Repository: FEX-Emu/FEX
Branch: main
Commit: 5c4c468d1326
Files: 3243
Total size: 18.8 MB

Directory structure:
gitextract_1dff4jwg/

├── .clang-format
├── .clang-format-ignore
├── .git-blame-ignore-revs
├── .github/
│   ├── ISSUE_TEMPLATE/
│   │   └── potential-game-bug.md
│   └── workflows/
│       ├── ccpp.yml
│       ├── glibc_fault.yml
│       ├── hostrunner.yml
│       ├── instcountci.yml
│       ├── mingw_build.yml
│       ├── pr-code-format.yml
│       ├── setup-env/
│       │   └── action.yml
│       ├── steamrt4.yml
│       ├── test/
│       │   └── action.yml
│       ├── vixl_simulator.yml
│       ├── wine_build/
│       │   └── action.yml
│       └── wine_dll_artifacts.yml
├── .gitignore
├── .gitlab-ci.yml
├── .gitmodules
├── CMakeLists.txt
├── CMakeSettings.json
├── CODE_OF_CONDUCT.md
├── CodeEmitter/
│   ├── CMakeLists.txt
│   └── CodeEmitter/
│       ├── ALUOps.inl
│       ├── ASIMDOps.inl
│       ├── BranchOps.inl
│       ├── Buffer.h
│       ├── Emitter.h
│       ├── LoadstoreOps.inl
│       ├── Registers.h
│       ├── SVEOps.inl
│       ├── ScalarOps.inl
│       ├── SystemOps.inl
│       └── VixlUtils.inl
├── Data/
│   ├── AppConfig/
│   │   ├── CMakeLists.txt
│   │   ├── client.json
│   │   └── steamwebhelper.json
│   ├── CI/
│   │   ├── FEXLinuxTestsThunks.json
│   │   ├── GLThunks.json
│   │   └── VulkanThunks.json
│   ├── CMake/
│   │   ├── FindZycore.cmake
│   │   ├── FindZydis.cmake
│   │   ├── Findxxhash.cmake
│   │   ├── LinkerGC.cmake
│   │   ├── cmake_uninstall.cmake.in
│   │   ├── toolchain_aarch64.cmake
│   │   ├── toolchain_mingw.cmake
│   │   ├── toolchain_x86_32.cmake
│   │   ├── toolchain_x86_64.cmake
│   │   └── version_to_variables.cmake
│   ├── Dockerfile
│   ├── ThunksDB.json
│   ├── binfmts/
│   │   ├── CMakeLists.txt
│   │   ├── FEX-x86.conf.in
│   │   ├── FEX-x86.in
│   │   ├── FEX-x86_64.conf.in
│   │   └── FEX-x86_64.in
│   └── nix/
│       ├── FEXLinuxTests/
│       │   └── shell.nix
│       ├── LibraryForwarding/
│       │   └── shell.nix
│       ├── WineOnArm/
│       │   └── shell.nix
│       ├── cmake_configure_woa32.sh
│       ├── cmake_configure_woa64.sh
│       ├── cmake_enable_flt.sh
│       └── cmake_enable_libfwd.sh
├── External/
│   ├── .clang-format
│   ├── SoftFloat-3e/
│   │   ├── CMakeLists.txt
│   │   ├── include/
│   │   │   └── SoftFloat-3e/
│   │   │       ├── opts-GCC.h
│   │   │       ├── platform.h
│   │   │       ├── primitiveTypes.h
│   │   │       ├── softfloat.h
│   │   │       └── softfloat_types.h
│   │   └── src/
│   │       ├── extF80_add.c
│   │       ├── extF80_div.c
│   │       ├── extF80_eq.c
│   │       ├── extF80_le.c
│   │       ├── extF80_lt.c
│   │       ├── extF80_mul.c
│   │       ├── extF80_rem.c
│   │       ├── extF80_roundToInt.c
│   │       ├── extF80_sqrt.c
│   │       ├── extF80_sub.c
│   │       ├── extF80_to_f128.c
│   │       ├── extF80_to_f32.c
│   │       ├── extF80_to_f64.c
│   │       ├── extF80_to_i32.c
│   │       ├── extF80_to_i64.c
│   │       ├── extF80_to_ui64.c
│   │       ├── f128_add.c
│   │       ├── f128_div.c
│   │       ├── f128_eq.c
│   │       ├── f128_eq_signaling.c
│   │       ├── f128_isSignalingNaN.c
│   │       ├── f128_le.c
│   │       ├── f128_le_quiet.c
│   │       ├── f128_lt.c
│   │       ├── f128_lt_quiet.c
│   │       ├── f128_mul.c
│   │       ├── f128_mulAdd.c
│   │       ├── f128_rem.c
│   │       ├── f128_sqrt.c
│   │       ├── f128_sub.c
│   │       ├── f128_to_extF80.c
│   │       ├── f128_to_f16.c
│   │       ├── f128_to_f32.c
│   │       ├── f128_to_f64.c
│   │       ├── f128_to_i32.c
│   │       ├── f128_to_i64.c
│   │       ├── f128_to_ui32.c
│   │       ├── f128_to_ui64.c
│   │       ├── f32_to_extF80.c
│   │       ├── f32_to_f128.c
│   │       ├── f64_to_extF80.c
│   │       ├── i32_to_extF80.c
│   │       ├── i32_to_f128.c
│   │       ├── internals.h
│   │       ├── primitives.h
│   │       ├── s_add128.c
│   │       ├── s_addMagsExtF80.c
│   │       ├── s_addMagsF128.c
│   │       ├── s_approxRecip32_1.c
│   │       ├── s_approxRecipSqrt32_1.c
│   │       ├── s_approxRecipSqrt_1Ks.c
│   │       ├── s_approxRecip_1Ks.c
│   │       ├── s_commonNaNToExtF80UI.c
│   │       ├── s_commonNaNToF128UI.c
│   │       ├── s_commonNaNToF32UI.c
│   │       ├── s_commonNaNToF64UI.c
│   │       ├── s_countLeadingZeros32.c
│   │       ├── s_countLeadingZeros64.c
│   │       ├── s_countLeadingZeros8.c
│   │       ├── s_extF80UIToCommonNaN.c
│   │       ├── s_f128UIToCommonNaN.c
│   │       ├── s_f32UIToCommonNaN.c
│   │       ├── s_f64UIToCommonNaN.c
│   │       ├── s_le128.c
│   │       ├── s_lt128.c
│   │       ├── s_mul64ByShifted32To128.c
│   │       ├── s_mul64To128.c
│   │       ├── s_normRoundPackToExtF80.c
│   │       ├── s_normRoundPackToF128.c
│   │       ├── s_normSubnormalExtF80Sig.c
│   │       ├── s_normSubnormalF128Sig.c
│   │       ├── s_normSubnormalF128SigM.c
│   │       ├── s_normSubnormalF32Sig.c
│   │       ├── s_normSubnormalF64Sig.c
│   │       ├── s_propagateNaNExtF80UI.c
│   │       ├── s_propagateNaNF128UI.c
│   │       ├── s_roundPackToExtF80.c
│   │       ├── s_roundPackToF128.c
│   │       ├── s_roundPackToF32.c
│   │       ├── s_roundPackToF64.c
│   │       ├── s_roundToI32.c
│   │       ├── s_roundToI64.c
│   │       ├── s_roundToUI64.c
│   │       ├── s_shiftRightJam128.c
│   │       ├── s_shiftRightJam128Extra.c
│   │       ├── s_shiftRightJam32.c
│   │       ├── s_shiftRightJam64.c
│   │       ├── s_shiftRightJam64Extra.c
│   │       ├── s_shortShiftLeft128.c
│   │       ├── s_shortShiftRight128.c
│   │       ├── s_shortShiftRightJam64.c
│   │       ├── s_shortShiftRightJam64Extra.c
│   │       ├── s_sub128.c
│   │       ├── s_subMagsExtF80.c
│   │       ├── s_subMagsF128.c
│   │       ├── softfloat_raiseFlags.c
│   │       ├── specialize.h
│   │       └── ui64_to_extF80.c
│   ├── cephes/
│   │   ├── CMakeLists.txt
│   │   ├── LICENSE
│   │   ├── include/
│   │   │   └── cephes_128bit.h
│   │   └── src/
│   │       └── 128bit/
│   │           ├── Impl.cpp
│   │           ├── atanll.c
│   │           ├── constll.c
│   │           ├── exp2ll.c
│   │           ├── floorll.c
│   │           ├── log2ll.c
│   │           ├── mconf.h
│   │           ├── mtherr.c
│   │           ├── polevll.c
│   │           ├── sinll.c
│   │           └── tanll.c
│   ├── code-format-helper/
│   │   ├── code-format-helper.py
│   │   ├── requirements_formatting.txt
│   │   └── requirements_formatting.txt.in
│   └── tiny-json/
│       ├── CMakeLists.txt
│       ├── LICENSE
│       ├── tiny-json.c
│       └── tiny-json.h
├── FEXCore/
│   ├── CMakeLists.txt
│   ├── LICENSE
│   ├── Readme.md
│   ├── Scripts/
│   │   ├── config_generator.py
│   │   ├── json_ir_doc_generator.py
│   │   └── json_ir_generator.py
│   ├── Source/
│   │   ├── CMakeLists.txt
│   │   ├── Common/
│   │   │   ├── BitSet.h
│   │   │   ├── JitSymbols.cpp
│   │   │   ├── JitSymbols.h
│   │   │   ├── SoftFloat.h
│   │   │   ├── StringConv.h
│   │   │   └── VectorRegType.h
│   │   ├── Interface/
│   │   │   ├── Config/
│   │   │   │   ├── Config.cpp
│   │   │   │   └── Config.json.in
│   │   │   ├── Context/
│   │   │   │   ├── Context.cpp
│   │   │   │   └── Context.h
│   │   │   ├── Core/
│   │   │   │   ├── Addressing.cpp
│   │   │   │   ├── Addressing.h
│   │   │   │   ├── ArchHelpers/
│   │   │   │   │   ├── Arm64Emitter.cpp
│   │   │   │   │   └── Arm64Emitter.h
│   │   │   │   ├── CPUBackend.cpp
│   │   │   │   ├── CPUBackend.h
│   │   │   │   ├── CPUID.cpp
│   │   │   │   ├── CPUID.h
│   │   │   │   ├── CodeCache.cpp
│   │   │   │   ├── Core.cpp
│   │   │   │   ├── Dispatcher/
│   │   │   │   │   ├── Dispatcher.cpp
│   │   │   │   │   └── Dispatcher.h
│   │   │   │   ├── Frontend.cpp
│   │   │   │   ├── Frontend.h
│   │   │   │   ├── Interpreter/
│   │   │   │   │   ├── Fallbacks/
│   │   │   │   │   │   ├── F80Fallbacks.h
│   │   │   │   │   │   ├── FallbackOpHandler.h
│   │   │   │   │   │   ├── InterpreterFallbacks.cpp
│   │   │   │   │   │   ├── StringCompareFallbacks.cpp
│   │   │   │   │   │   └── VectorFallbacks.h
│   │   │   │   │   └── InterpreterOps.h
│   │   │   │   ├── JIT/
│   │   │   │   │   ├── ALUOps.cpp
│   │   │   │   │   ├── Arm64Relocations.cpp
│   │   │   │   │   ├── AtomicOps.cpp
│   │   │   │   │   ├── BranchOps.cpp
│   │   │   │   │   ├── ConversionOps.cpp
│   │   │   │   │   ├── DebugData.h
│   │   │   │   │   ├── EncryptionOps.cpp
│   │   │   │   │   ├── JIT.cpp
│   │   │   │   │   ├── JITClass.h
│   │   │   │   │   ├── MemoryOps.cpp
│   │   │   │   │   ├── MiscOps.cpp
│   │   │   │   │   ├── MoveOps.cpp
│   │   │   │   │   ├── Relocations.h
│   │   │   │   │   └── VectorOps.cpp
│   │   │   │   ├── LookupCache.cpp
│   │   │   │   ├── LookupCache.h
│   │   │   │   ├── OpcodeDispatcher/
│   │   │   │   │   ├── AVX_128.cpp
│   │   │   │   │   ├── BaseTables.h
│   │   │   │   │   ├── Crypto.cpp
│   │   │   │   │   ├── DDDTables.h
│   │   │   │   │   ├── Flags.cpp
│   │   │   │   │   ├── H0F38Tables.h
│   │   │   │   │   ├── H0F3ATables.h
│   │   │   │   │   ├── PrimaryGroupTables.h
│   │   │   │   │   ├── SecondaryGroupTables.h
│   │   │   │   │   ├── SecondaryModRMTables.h
│   │   │   │   │   ├── SecondaryTables.h
│   │   │   │   │   ├── VEXTables.h
│   │   │   │   │   ├── Vector.cpp
│   │   │   │   │   ├── X87.cpp
│   │   │   │   │   └── X87F64.cpp
│   │   │   │   ├── OpcodeDispatcher.cpp
│   │   │   │   ├── OpcodeDispatcher.h
│   │   │   │   ├── VSyscall/
│   │   │   │   │   └── VSyscall.inc
│   │   │   │   └── X86Tables/
│   │   │   │       ├── BaseTables.cpp
│   │   │   │       ├── DDDTables.cpp
│   │   │   │       ├── H0F38Tables.cpp
│   │   │   │       ├── H0F3ATables.cpp
│   │   │   │       ├── PrimaryGroupTables.cpp
│   │   │   │       ├── SecondaryGroupTables.cpp
│   │   │   │       ├── SecondaryModRMTables.cpp
│   │   │   │       ├── SecondaryTables.cpp
│   │   │   │       ├── VEXTables.cpp
│   │   │   │       ├── X86Tables.h
│   │   │   │       └── X87Tables.cpp
│   │   │   ├── GDBJIT/
│   │   │   │   ├── GDBJIT.cpp
│   │   │   │   └── GDBJIT.h
│   │   │   └── IR/
│   │   │       ├── IR.h
│   │   │       ├── IR.json
│   │   │       ├── IRDumper.cpp
│   │   │       ├── IREmitter.cpp
│   │   │       ├── IREmitter.h
│   │   │       ├── IntrusiveIRList.h
│   │   │       ├── PassManager.cpp
│   │   │       ├── PassManager.h
│   │   │       ├── Passes/
│   │   │       │   ├── IRDumperPass.cpp
│   │   │       │   ├── IRValidation.cpp
│   │   │       │   ├── IRValidation.h
│   │   │       │   ├── RedundantFlagCalculationElimination.cpp
│   │   │       │   ├── RegisterAllocationPass.cpp
│   │   │       │   ├── RegisterAllocationPass.h
│   │   │       │   └── x87StackOptimizationPass.cpp
│   │   │       ├── Passes.h
│   │   │       └── RegisterAllocationData.h
│   │   └── Utils/
│   │       ├── Allocator/
│   │       │   ├── 64BitAllocator.cpp
│   │       │   ├── FlexBitSet.h
│   │       │   ├── HostAllocator.h
│   │       │   └── IntrusiveArenaAllocator.h
│   │       ├── Allocator.cpp
│   │       ├── Allocator.h
│   │       ├── AllocatorHooks.cpp
│   │       ├── AllocatorOverride.cpp
│   │       ├── ArchHelpers/
│   │       │   ├── Arm64.cpp
│   │       │   └── Arm64_stubs.cpp
│   │       ├── BucketList.h
│   │       ├── Config.h
│   │       ├── FileLoading.cpp
│   │       ├── ForcedAssert.cpp
│   │       ├── LogManager.cpp
│   │       ├── LongJump.cpp
│   │       ├── MemberFunctionToPointer.h
│   │       ├── Profiler.cpp
│   │       ├── SpinWaitLock.cpp
│   │       ├── SpinWaitLock.h
│   │       ├── Telemetry.cpp
│   │       ├── Threads.cpp
│   │       ├── WritePriorityMutex.h
│   │       └── variable_length_integer.h
│   ├── docs/
│   │   ├── CPUBackends.md
│   │   ├── CustomCPUBackend.md
│   │   ├── Frontend.md
│   │   ├── IR.md
│   │   ├── MemoryModelEmulation.md
│   │   └── OpDispatcher.md
│   ├── include/
│   │   ├── FEXCore/
│   │   │   ├── Config/
│   │   │   │   └── Config.h
│   │   │   ├── Core/
│   │   │   │   ├── CPUID.h
│   │   │   │   ├── CodeCache.h
│   │   │   │   ├── Context.h
│   │   │   │   ├── CoreState.h
│   │   │   │   ├── HostFeatures.h
│   │   │   │   ├── SignalDelegator.h
│   │   │   │   ├── Thunks.h
│   │   │   │   └── X86Enums.h
│   │   │   ├── Debug/
│   │   │   │   ├── GDBReaderInterface.h
│   │   │   │   └── InternalThreadState.h
│   │   │   ├── HLE/
│   │   │   │   ├── SourcecodeResolver.h
│   │   │   │   └── SyscallHandler.h
│   │   │   ├── IR/
│   │   │   │   └── IR.h
│   │   │   ├── Utils/
│   │   │   │   ├── Allocator.h
│   │   │   │   ├── AllocatorHooks.h
│   │   │   │   ├── ArchHelpers/
│   │   │   │   │   └── Arm64.h
│   │   │   │   ├── CompilerDefs.h
│   │   │   │   ├── EnumOperators.h
│   │   │   │   ├── EnumUtils.h
│   │   │   │   ├── Event.h
│   │   │   │   ├── FPState.h
│   │   │   │   ├── File.h
│   │   │   │   ├── FileLoading.h
│   │   │   │   ├── InterruptableConditionVariable.h
│   │   │   │   ├── IntervalList.h
│   │   │   │   ├── LogManager.h
│   │   │   │   ├── LongJump.h
│   │   │   │   ├── MathUtils.h
│   │   │   │   ├── PrctlUtils.h
│   │   │   │   ├── Profiler.h
│   │   │   │   ├── SHMStats.h
│   │   │   │   ├── SignalScopeGuards.h
│   │   │   │   ├── StringUtils.h
│   │   │   │   ├── Telemetry.h
│   │   │   │   ├── ThreadPoolAllocator.h
│   │   │   │   ├── Threads.h
│   │   │   │   └── TypeDefines.h
│   │   │   └── fextl/
│   │   │       ├── allocator.h
│   │   │       ├── deque.h
│   │   │       ├── fmt.h
│   │   │       ├── forward_list.h
│   │   │       ├── functional.h
│   │   │       ├── list.h
│   │   │       ├── map.h
│   │   │       ├── memory.h
│   │   │       ├── memory_resource.h
│   │   │       ├── queue.h
│   │   │       ├── robin_map.h
│   │   │       ├── robin_set.h
│   │   │       ├── set.h
│   │   │       ├── sstream.h
│   │   │       ├── stack.h
│   │   │       ├── string.h
│   │   │       ├── unordered_map.h
│   │   │       ├── unordered_set.h
│   │   │       └── vector.h
│   │   └── git_version.h.in
│   └── unittests/
│       ├── APITests/
│       │   ├── Allocator.cpp
│       │   ├── CMakeLists.txt
│       │   ├── FileLoading.cpp
│       │   ├── FlexBitSet.cpp
│       │   ├── FutexSpinTest.cpp
│       │   ├── ILog2.cpp
│       │   └── vl_integer.cpp
│       ├── CMakeLists.txt
│       └── Emitter/
│           ├── ALU_Tests.cpp
│           ├── ASIMD_Tests.cpp
│           ├── Branch_Tests.cpp
│           ├── CMakeLists.txt
│           ├── Loadstore_Tests.cpp
│           ├── SVE_Tests.cpp
│           ├── Scalar_Tests.cpp
│           ├── System_Tests.cpp
│           └── TestDisassembler.h
├── FEXHeaderUtils/
│   ├── CMakeLists.txt
│   └── FEXHeaderUtils/
│       ├── BitUtils.h
│       ├── Filesystem.h
│       ├── RingBuffer.h
│       ├── StringArgumentParser.h
│       ├── SymlinkChecks.h
│       └── Syscalls.h
├── LICENSE
├── Readme.md
├── Scripts/
│   ├── CI_FetchRootFS.py
│   ├── CheckBinfmtNotInstall.sh
│   ├── ClassifyCPU.py
│   ├── DefinitionExtract.py
│   ├── FEXUpdateAOTIRCache.sh
│   ├── GenerateSyscallNumbers.py
│   ├── InstallFEX.py
│   ├── InstructionCountParser.py
│   ├── NeedDisabledSVE.py
│   ├── StructPackVerifier.py
│   ├── Threaded_Lockstep_Runner.py
│   ├── UpdateInstructionCountJson.py
│   ├── aarch64_fit_native.py
│   ├── changelog_generator.py
│   ├── doc_outline_generator.py
│   ├── generate_changelog.sh
│   ├── generate_doc_outline.sh
│   ├── generate_release.sh
│   ├── guest_test_runner.py
│   ├── json_asm_config_parse.py
│   ├── json_config_parse.py
│   ├── json_ir_config_parse.py
│   ├── reformat.sh
│   ├── testharness_runner.py
│   └── update_instcountci.sh
├── Source/
│   ├── CMakeLists.txt
│   ├── Common/
│   │   ├── ArgumentLoader.cpp
│   │   ├── ArgumentLoader.h
│   │   ├── Async.h
│   │   ├── AsyncNet.h
│   │   ├── CMakeLists.txt
│   │   ├── CPUInfo.cpp
│   │   ├── CPUInfo.h
│   │   ├── Config.cpp
│   │   ├── Config.h
│   │   ├── FDUtils.h
│   │   ├── FEXServerClient.cpp
│   │   ├── FEXServerClient.h
│   │   ├── FileFormatCheck.cpp
│   │   ├── FileFormatCheck.h
│   │   ├── FileMappingBaseAddress.h
│   │   ├── HostFeatures.cpp
│   │   ├── HostFeatures.h
│   │   ├── JSONPool.cpp
│   │   ├── JSONPool.h
│   │   ├── Linux/
│   │   │   ├── SBRKAllocations.cpp
│   │   │   └── SBRKAllocations.h
│   │   ├── SHMStats.cpp
│   │   ├── SHMStats.h
│   │   ├── VolatileMetadata.cpp
│   │   ├── VolatileMetadata.h
│   │   └── X86Features.h
│   ├── Steam/
│   │   ├── CMakeLists.txt
│   │   ├── CompatTool.cpp
│   │   ├── ConfigTemplate.json
│   │   ├── ServerManager.cpp
│   │   ├── VERSIONS.txt.in
│   │   ├── emulator.json
│   │   └── toolmanifest.vdf
│   ├── Tools/
│   │   ├── CMakeLists.txt
│   │   ├── CodeSizeValidation/
│   │   │   ├── CMakeLists.txt
│   │   │   └── Main.cpp
│   │   ├── CommonTools/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── CodeLoader.h
│   │   │   ├── DummyHandlers.cpp
│   │   │   ├── DummyHandlers.h
│   │   │   ├── HarnessHelpers.h
│   │   │   ├── Linux/
│   │   │   │   └── Utils/
│   │   │   │       ├── ELFContainer.cpp
│   │   │   │       ├── ELFContainer.h
│   │   │   │       └── ELFParser.h
│   │   │   └── PortabilityInfo.h
│   │   ├── FEXBash/
│   │   │   ├── CMakeLists.txt
│   │   │   └── FEXBash.cpp
│   │   ├── FEXConfig/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── Main.cpp
│   │   │   ├── Main.h
│   │   │   ├── main.qml
│   │   │   ├── qml5.qrc
│   │   │   ├── qml6.qrc
│   │   │   ├── qt5/
│   │   │   │   ├── FileDialog.qml
│   │   │   │   ├── FolderDialog.qml
│   │   │   │   └── MessageDialog.qml
│   │   │   └── qt6/
│   │   │       ├── FileDialog.qml
│   │   │       ├── FolderDialog.qml
│   │   │       └── MessageDialog.qml
│   │   ├── FEXGDBReader/
│   │   │   ├── CMakeLists.txt
│   │   │   └── FEXGDBReader.cpp
│   │   ├── FEXGetConfig/
│   │   │   ├── CMakeLists.txt
│   │   │   └── Main.cpp
│   │   ├── FEXInterpreter/
│   │   │   ├── AOT/
│   │   │   │   ├── AOTGenerator.cpp
│   │   │   │   └── AOTGenerator.h
│   │   │   ├── CMakeLists.txt
│   │   │   ├── ELFCodeLoader.h
│   │   │   └── FEXInterpreter.cpp
│   │   ├── FEXOfflineCompiler/
│   │   │   ├── CMakeLists.txt
│   │   │   └── Main.cpp
│   │   ├── FEXRootFSFetcher/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── Main.cpp
│   │   │   ├── XXFileHash.cpp
│   │   │   └── XXFileHash.h
│   │   ├── FEXServer/
│   │   │   ├── ArgumentLoader.cpp
│   │   │   ├── ArgumentLoader.h
│   │   │   ├── CMakeLists.txt
│   │   │   ├── Logger.cpp
│   │   │   ├── Logger.h
│   │   │   ├── Main.cpp
│   │   │   ├── PipeScanner.cpp
│   │   │   ├── PipeScanner.h
│   │   │   ├── ProcessPipe.cpp
│   │   │   ├── ProcessPipe.h
│   │   │   ├── SquashFS.cpp
│   │   │   └── SquashFS.h
│   │   ├── LinuxEmulation/
│   │   │   ├── ArchHelpers/
│   │   │   │   ├── MContext.cpp
│   │   │   │   ├── MContext.h
│   │   │   │   ├── UContext.h
│   │   │   │   └── WinContext.h
│   │   │   ├── CMakeLists.txt
│   │   │   ├── GdbServer/
│   │   │   │   ├── Info.cpp
│   │   │   │   └── Info.h
│   │   │   ├── LinuxSyscalls/
│   │   │   │   ├── Arm64/
│   │   │   │   │   └── SyscallsEnum.h
│   │   │   │   ├── EmulatedFiles/
│   │   │   │   │   ├── EmulatedFiles.cpp
│   │   │   │   │   └── EmulatedFiles.h
│   │   │   │   ├── FaultSafeUserMemAccess.cpp
│   │   │   │   ├── FileManagement.cpp
│   │   │   │   ├── FileManagement.h
│   │   │   │   ├── GdbServer.cpp
│   │   │   │   ├── GdbServer.h
│   │   │   │   ├── LinuxAllocator.cpp
│   │   │   │   ├── LinuxAllocator.h
│   │   │   │   ├── Seccomp/
│   │   │   │   │   ├── BPFEmitter.cpp
│   │   │   │   │   ├── BPFEmitter.h
│   │   │   │   │   ├── Dumper.cpp
│   │   │   │   │   ├── SeccompEmulator.cpp
│   │   │   │   │   └── SeccompEmulator.h
│   │   │   │   ├── SignalDelegator/
│   │   │   │   │   └── GuestFramesManagement.cpp
│   │   │   │   ├── SignalDelegator.cpp
│   │   │   │   ├── SignalDelegator.h
│   │   │   │   ├── Syscalls/
│   │   │   │   │   ├── EPoll.cpp
│   │   │   │   │   ├── FD.cpp
│   │   │   │   │   ├── FS.cpp
│   │   │   │   │   ├── IO.cpp
│   │   │   │   │   ├── Info.cpp
│   │   │   │   │   ├── Memory.cpp
│   │   │   │   │   ├── NotImplemented.cpp
│   │   │   │   │   ├── Passthrough.cpp
│   │   │   │   │   ├── Signals.cpp
│   │   │   │   │   ├── Stubs.cpp
│   │   │   │   │   ├── Thread.cpp
│   │   │   │   │   ├── Thread.h
│   │   │   │   │   └── Timer.cpp
│   │   │   │   ├── Syscalls.cpp
│   │   │   │   ├── Syscalls.h
│   │   │   │   ├── SyscallsSMCTracking.cpp
│   │   │   │   ├── SyscallsVMATracking.cpp
│   │   │   │   ├── SyscallsVMATracking.h
│   │   │   │   ├── ThreadManager.cpp
│   │   │   │   ├── ThreadManager.h
│   │   │   │   ├── Types.h
│   │   │   │   ├── Utils/
│   │   │   │   │   ├── Threads.cpp
│   │   │   │   │   └── Threads.h
│   │   │   │   ├── x32/
│   │   │   │   │   ├── EPoll.cpp
│   │   │   │   │   ├── FD.cpp
│   │   │   │   │   ├── FS.cpp
│   │   │   │   │   ├── IO.cpp
│   │   │   │   │   ├── Info.cpp
│   │   │   │   │   ├── Ioctl/
│   │   │   │   │   │   ├── HelperDefines.h
│   │   │   │   │   │   ├── amdgpu_drm.inl
│   │   │   │   │   │   ├── asahi_drm.inl
│   │   │   │   │   │   ├── asound.h
│   │   │   │   │   │   ├── asound.inl
│   │   │   │   │   │   ├── drm.h
│   │   │   │   │   │   ├── drm.inl
│   │   │   │   │   │   ├── ext_fs.h
│   │   │   │   │   │   ├── ext_fs.inl
│   │   │   │   │   │   ├── f2fs.h
│   │   │   │   │   │   ├── f2fs.inl
│   │   │   │   │   │   ├── i915_drm.inl
│   │   │   │   │   │   ├── input.h
│   │   │   │   │   │   ├── input.inl
│   │   │   │   │   │   ├── joystick.h
│   │   │   │   │   │   ├── joystick.inl
│   │   │   │   │   │   ├── lima_drm.inl
│   │   │   │   │   │   ├── msdos_fs.h
│   │   │   │   │   │   ├── msdos_fs.inl
│   │   │   │   │   │   ├── msm_drm.inl
│   │   │   │   │   │   ├── nouveau_drm.inl
│   │   │   │   │   │   ├── nova_drm.inl
│   │   │   │   │   │   ├── panfrost_drm.inl
│   │   │   │   │   │   ├── panthor_drm.inl
│   │   │   │   │   │   ├── pvr_drm.inl
│   │   │   │   │   │   ├── radeon_drm.inl
│   │   │   │   │   │   ├── sockios.h
│   │   │   │   │   │   ├── sockios.inl
│   │   │   │   │   │   ├── streams.h
│   │   │   │   │   │   ├── streams.inl
│   │   │   │   │   │   ├── usbdev.h
│   │   │   │   │   │   ├── usbdev.inl
│   │   │   │   │   │   ├── v3d_drm.inl
│   │   │   │   │   │   ├── v4l2.h
│   │   │   │   │   │   ├── v4l2.inl
│   │   │   │   │   │   ├── vc4_drm.inl
│   │   │   │   │   │   ├── virtio_drm.inl
│   │   │   │   │   │   ├── wireless.h
│   │   │   │   │   │   ├── wireless.inl
│   │   │   │   │   │   └── xe_drm.inl
│   │   │   │   │   ├── IoctlEmulation.cpp
│   │   │   │   │   ├── IoctlEmulation.h
│   │   │   │   │   ├── Ioctls.inl
│   │   │   │   │   ├── Memory.cpp
│   │   │   │   │   ├── Msg.cpp
│   │   │   │   │   ├── NotImplemented.cpp
│   │   │   │   │   ├── Sched.cpp
│   │   │   │   │   ├── Semaphore.cpp
│   │   │   │   │   ├── Signals.cpp
│   │   │   │   │   ├── Socket.cpp
│   │   │   │   │   ├── Stubs.cpp
│   │   │   │   │   ├── Syscalls.cpp
│   │   │   │   │   ├── Syscalls.h
│   │   │   │   │   ├── SyscallsEnum.h
│   │   │   │   │   ├── SyscallsNames.inl
│   │   │   │   │   ├── Thread.cpp
│   │   │   │   │   ├── Thread.h
│   │   │   │   │   ├── Time.cpp
│   │   │   │   │   ├── Timer.cpp
│   │   │   │   │   └── Types.h
│   │   │   │   └── x64/
│   │   │   │       ├── EPoll.cpp
│   │   │   │       ├── FD.cpp
│   │   │   │       ├── Info.cpp
│   │   │   │       ├── Ioctl/
│   │   │   │       │   ├── HelperDefines.h
│   │   │   │       │   ├── amdgpu_drm.inl
│   │   │   │       │   ├── asound.h
│   │   │   │       │   ├── asound.inl
│   │   │   │       │   ├── drm.h
│   │   │   │       │   ├── drm.inl
│   │   │   │       │   ├── ext_fs.h
│   │   │   │       │   ├── ext_fs.inl
│   │   │   │       │   ├── f2fs.h
│   │   │   │       │   ├── f2fs.inl
│   │   │   │       │   ├── input.h
│   │   │   │       │   ├── input.inl
│   │   │   │       │   ├── joystick.h
│   │   │   │       │   ├── joystick.inl
│   │   │   │       │   ├── msdos_fs.h
│   │   │   │       │   ├── msdos_fs.inl
│   │   │   │       │   ├── msm_drm.inl
│   │   │   │       │   ├── sockios.h
│   │   │   │       │   ├── sockios.inl
│   │   │   │       │   ├── wireless.h
│   │   │   │       │   └── wireless.inl
│   │   │   │       ├── Memory.cpp
│   │   │   │       ├── NotImplemented.cpp
│   │   │   │       ├── Semaphore.cpp
│   │   │   │       ├── Signals.cpp
│   │   │   │       ├── Syscalls.cpp
│   │   │   │       ├── Syscalls.h
│   │   │   │       ├── SyscallsEnum.h
│   │   │   │       ├── SyscallsNames.inl
│   │   │   │       ├── Thread.cpp
│   │   │   │       ├── Thread.h
│   │   │   │       ├── Time.cpp
│   │   │   │       └── Types.h
│   │   │   ├── Thunks.cpp
│   │   │   ├── Thunks.h
│   │   │   ├── VDSO_Emulation.cpp
│   │   │   └── VDSO_Emulation.h
│   │   ├── TestHarnessRunner/
│   │   │   ├── CMakeLists.txt
│   │   │   ├── TestHarnessRunner/
│   │   │   │   ├── HostRunner.cpp
│   │   │   │   └── HostRunner.h
│   │   │   └── TestHarnessRunner.cpp
│   │   └── pidof/
│   │       ├── CMakeLists.txt
│   │       └── pidof.cpp
│   └── Windows/
│       ├── ARM64EC/
│       │   ├── BTInterface.h
│       │   ├── CMakeLists.txt
│       │   ├── Module.S
│       │   ├── Module.cpp
│       │   └── libarm64ecfex.def
│       ├── CMakeLists.txt
│       ├── Common/
│       │   ├── CMakeLists.txt
│       │   ├── CPUFeatures.cpp
│       │   ├── CPUFeatures.h
│       │   ├── CRT/
│       │   │   ├── Alloc.cpp
│       │   │   ├── CMakeLists.txt
│       │   │   ├── CRT.cpp
│       │   │   ├── CRT.h
│       │   │   ├── IO.cpp
│       │   │   ├── Math.cpp
│       │   │   ├── Misc.cpp
│       │   │   ├── Musl/
│       │   │   │   ├── CMakeLists.txt
│       │   │   │   ├── __math_divzero.c
│       │   │   │   ├── __math_invalid.c
│       │   │   │   ├── __math_oflow.c
│       │   │   │   ├── __math_uflow.c
│       │   │   │   ├── __math_xflow.c
│       │   │   │   ├── exp2.c
│       │   │   │   ├── exp_data.c
│       │   │   │   ├── exp_data.h
│       │   │   │   ├── fmod.c
│       │   │   │   ├── isnan.c
│       │   │   │   ├── libm.h
│       │   │   │   ├── log2.c
│       │   │   │   ├── log2_data.c
│       │   │   │   ├── log2_data.h
│       │   │   │   ├── remainder.c
│       │   │   │   ├── remquo.c
│       │   │   │   ├── strtoimax.c
│       │   │   │   ├── strtoll.c
│       │   │   │   ├── strtoull.c
│       │   │   │   └── strtoumax.c
│       │   │   └── String.cpp
│       │   ├── CallRetStack.h
│       │   ├── Exception.h
│       │   ├── Handle.h
│       │   ├── ImageTracker.cpp
│       │   ├── ImageTracker.h
│       │   ├── InvalidationTracker.cpp
│       │   ├── InvalidationTracker.h
│       │   ├── JITGuardPage.h
│       │   ├── LoadConfig.S
│       │   ├── Logging.cpp
│       │   ├── Logging.h
│       │   ├── Module.h
│       │   ├── OvercommitTracker.h
│       │   ├── PortabilityInfo.h
│       │   ├── Priv.h
│       │   ├── SHMStats.cpp
│       │   ├── SHMStats.h
│       │   ├── TSOHandlerConfig.h
│       │   └── WinAPI/
│       │       ├── Alloc.cpp
│       │       ├── CMakeLists.txt
│       │       ├── IO.cpp
│       │       ├── Misc.cpp
│       │       └── Sync.cpp
│       ├── Defs/
│       │   ├── ntdll.def
│       │   └── wow64.def
│       ├── WOW64/
│       │   ├── BTInterface.h
│       │   ├── CMakeLists.txt
│       │   ├── Module.cpp
│       │   └── libwow64fex.def
│       └── include/
│           ├── wine/
│           │   ├── debug.h
│           │   └── unixlib.h
│           ├── winnt.h
│           └── winternl.h
├── ThunkLibs/
│   ├── Generator/
│   │   ├── CMakeLists.txt
│   │   ├── analysis.cpp
│   │   ├── analysis.h
│   │   ├── data_layout.cpp
│   │   ├── data_layout.h
│   │   ├── diagnostics.h
│   │   ├── gen.cpp
│   │   ├── interface.h
│   │   └── main.cpp
│   ├── GuestLibs/
│   │   └── CMakeLists.txt
│   ├── HostLibs/
│   │   └── CMakeLists.txt
│   ├── README.md
│   ├── include/
│   │   └── common/
│   │       ├── GeneratorInterface.h
│   │       ├── Guest.h
│   │       ├── Host.h
│   │       ├── PackedArguments.h
│   │       └── X11Manager.h
│   ├── libEGL/
│   │   ├── libEGL_Guest.cpp
│   │   ├── libEGL_Host.cpp
│   │   └── libEGL_interface.cpp
│   ├── libGL/
│   │   ├── glcorearb.h
│   │   ├── libGL_Guest.cpp
│   │   ├── libGL_Host.cpp
│   │   └── libGL_interface.cpp
│   ├── libSDL2/
│   │   ├── libSDL2_Guest.cpp
│   │   └── libSDL2_Host.cpp
│   ├── libVDSO/
│   │   ├── Types.h
│   │   ├── libVDSO_Guest.cpp
│   │   ├── libVDSO_Guest.lds
│   │   ├── libVDSO_Guest_32.lds
│   │   └── libVDSO_interface.cpp
│   ├── libX11/
│   │   └── libX11_NativeGuest.cpp
│   ├── libasound/
│   │   ├── libasound_Guest.cpp
│   │   ├── libasound_Host.cpp
│   │   └── libasound_interface.cpp
│   ├── libdrm/
│   │   ├── Guest.cpp
│   │   ├── Host.cpp
│   │   └── libdrm_interface.cpp
│   ├── libfex_malloc/
│   │   ├── Guest.cpp
│   │   ├── Host.cpp
│   │   └── Types.h
│   ├── libfex_malloc_loader/
│   │   └── Guest.cpp
│   ├── libfex_malloc_symbols/
│   │   └── Host.cpp
│   ├── libfex_thunk_test/
│   │   ├── Guest.cpp
│   │   ├── Host.cpp
│   │   ├── api.h
│   │   ├── lib.cpp
│   │   └── libfex_thunk_test_interface.cpp
│   ├── libvulkan/
│   │   ├── Guest.cpp
│   │   ├── Host.cpp
│   │   └── libvulkan_interface.cpp
│   ├── libwayland-client/
│   │   ├── Guest.cpp
│   │   ├── Host.cpp
│   │   └── libwayland-client_interface.cpp
│   └── libxshmfence/
│       ├── Guest.cpp
│       ├── Host.cpp
│       └── libxshmfence_interface.cpp
├── docs/
│   ├── CPUID.md
│   ├── DeferredSignals.md
│   ├── ProgrammingConcerns.md
│   ├── Readme_CN.md
│   ├── ReleaseProcess.md
│   ├── SourceOutline.md
│   └── allocator_usage.md
└── unittests/
    ├── 32Bit_ASM/
    │   ├── CMakeLists.txt
    │   ├── Disabled_Tests
    │   ├── Disabled_Tests_Simulator
    │   ├── Disabled_Tests_host
    │   ├── FEX_bugs/
    │   │   ├── GOT_calculation.asm
    │   │   ├── IMUL_garbagedata.asm
    │   │   ├── InlineSyscall.asm
    │   │   ├── InvertedCarrySet.asm
    │   │   ├── LoopAddressSizeCheck.asm
    │   │   ├── SignExtendBug.asm
    │   │   ├── SubAddrBug.asm
    │   │   ├── TelemetryFlags.asm
    │   │   ├── VEXW_Bug.asm
    │   │   ├── adc.asm
    │   │   ├── rep_lods_bug.asm
    │   │   └── x87_unordered_cmp_fix_32.asm
    │   ├── Known_Failures
    │   ├── Primary/
    │   │   ├── Loops.asm
    │   │   ├── Pop_Segments.asm
    │   │   ├── Primary_00.asm
    │   │   ├── Primary_00_2.asm
    │   │   ├── Primary_00_3.asm
    │   │   ├── Primary_27.asm
    │   │   ├── Primary_2F.asm
    │   │   ├── Primary_37.asm
    │   │   ├── Primary_3F.asm
    │   │   ├── Primary_60.asm
    │   │   ├── Primary_60_2.asm
    │   │   ├── Primary_61.asm
    │   │   ├── Primary_61_2.asm
    │   │   ├── Primary_8C.asm
    │   │   ├── Primary_8C_2.asm
    │   │   ├── Primary_8D.asm
    │   │   ├── Primary_A0.asm
    │   │   ├── Primary_A2.asm
    │   │   ├── Primary_A6.asm
    │   │   ├── Primary_A6_REP.asm
    │   │   ├── Primary_A6_REPNE.asm
    │   │   ├── Primary_A6_REPNE_Equal.asm
    │   │   ├── Primary_A6_REP_Equal.asm
    │   │   ├── Primary_A6_REP_Smaller.asm
    │   │   ├── Primary_A6_REP_down.asm
    │   │   ├── Primary_A6_REP_down_Equal.asm
    │   │   ├── Primary_A6_down.asm
    │   │   ├── Primary_A7_dword.asm
    │   │   ├── Primary_A7_dword_down.asm
    │   │   ├── Primary_A7_word.asm
    │   │   ├── Primary_A7_word_down.asm
    │   │   ├── Primary_AE.asm
    │   │   ├── Primary_AE_REP.asm
    │   │   ├── Primary_AE_REPNE.asm
    │   │   ├── Primary_AE_REPNE_down.asm
    │   │   ├── Primary_AE_REP_down.asm
    │   │   ├── Primary_AF_REP_dword.asm
    │   │   ├── Primary_AF_REP_word.asm
    │   │   ├── Primary_C9.asm
    │   │   ├── Primary_CE.asm
    │   │   ├── Primary_CF.asm
    │   │   ├── Primary_D4.asm
    │   │   ├── Primary_D5.asm
    │   │   ├── Primary_D6.asm
    │   │   ├── Primary_E3.asm
    │   │   ├── Primary_E8.asm
    │   │   ├── Primary_E8_2.asm
    │   │   ├── Primary_E9.asm
    │   │   ├── Primary_E9_2.asm
    │   │   └── Push_Segments.asm
    │   ├── PrimaryGroup/
    │   │   ├── 3_F6_05.asm
    │   │   ├── 5_FF_02.asm
    │   │   ├── 5_FF_02_2.asm
    │   │   └── 5_FF_02_3.asm
    │   ├── Secondary/
    │   │   ├── 07_XX_00.asm
    │   │   ├── 07_XX_04.asm
    │   │   └── 15_XX_0.asm
    │   ├── SecondaryModRM/
    │   │   ├── Reg_7_1.asm
    │   │   └── Reg_7_4_2.asm
    │   ├── TwoByte/
    │   │   ├── 0F_82.asm
    │   │   └── 0F_82_2.asm
    │   ├── VEX/
    │   │   ├── vgather_qpd_128bit_1xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_2xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_4xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_8xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_1xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_2xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_4xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_8xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_1xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_2xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_4xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_8xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_1xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_2xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_4xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_4xdisp_overflow.asm
    │   │   └── vpgather_qq_256bit_8xdisp_overflow.asm
    │   ├── X87/
    │   │   ├── D8_00.asm
    │   │   ├── D8_01.asm
    │   │   ├── D8_04.asm
    │   │   ├── D8_05.asm
    │   │   ├── D8_06.asm
    │   │   ├── D8_07.asm
    │   │   ├── D8_C0.asm
    │   │   ├── D8_C8.asm
    │   │   ├── D8_E0.asm
    │   │   ├── D8_E8.asm
    │   │   ├── D8_F0.asm
    │   │   ├── D8_F0_2.asm
    │   │   ├── D8_F8.asm
    │   │   ├── D9_00.asm
    │   │   ├── D9_02.asm
    │   │   ├── D9_03.asm
    │   │   ├── D9_05.asm
    │   │   ├── D9_06.asm
    │   │   ├── D9_06_2.asm
    │   │   ├── D9_07.asm
    │   │   ├── D9_C0.asm
    │   │   ├── D9_C8.asm
    │   │   ├── D9_D0.asm
    │   │   ├── D9_E0.asm
    │   │   ├── D9_E1.asm
    │   │   ├── D9_E8.asm
    │   │   ├── D9_E9.asm
    │   │   ├── D9_EA.asm
    │   │   ├── D9_EB.asm
    │   │   ├── D9_EC.asm
    │   │   ├── D9_ED.asm
    │   │   ├── D9_EE.asm
    │   │   ├── D9_F0.asm
    │   │   ├── D9_F1.asm
    │   │   ├── D9_F2.asm
    │   │   ├── D9_F3.asm
    │   │   ├── D9_F4.asm
    │   │   ├── D9_F5.asm
    │   │   ├── D9_F6.asm
    │   │   ├── D9_F7.asm
    │   │   ├── D9_F8.asm
    │   │   ├── D9_F9.asm
    │   │   ├── D9_FA.asm
    │   │   ├── D9_FB.asm
    │   │   ├── D9_FC.asm
    │   │   ├── D9_FD.asm
    │   │   ├── D9_FE.asm
    │   │   ├── D9_FF.asm
    │   │   ├── DA_00.asm
    │   │   ├── DA_01.asm
    │   │   ├── DA_04.asm
    │   │   ├── DA_05.asm
    │   │   ├── DA_06.asm
    │   │   ├── DA_07.asm
    │   │   ├── DA_C0.asm
    │   │   ├── DA_C8.asm
    │   │   ├── DA_D0.asm
    │   │   ├── DA_D8.asm
    │   │   ├── DB_00.asm
    │   │   ├── DB_01.asm
    │   │   ├── DB_02.asm
    │   │   ├── DB_03.asm
    │   │   ├── DB_05.asm
    │   │   ├── DB_07.asm
    │   │   ├── DB_C0.asm
    │   │   ├── DB_C8.asm
    │   │   ├── DB_D0.asm
    │   │   ├── DB_D8.asm
    │   │   ├── DB_E2.asm
    │   │   ├── DB_E3.asm
    │   │   ├── DB_E3_2.asm
    │   │   ├── DC_00.asm
    │   │   ├── DC_01.asm
    │   │   ├── DC_04.asm
    │   │   ├── DC_05.asm
    │   │   ├── DC_06.asm
    │   │   ├── DC_07.asm
    │   │   ├── DC_C0.asm
    │   │   ├── DC_C8.asm
    │   │   ├── DC_E0.asm
    │   │   ├── DC_E8.asm
    │   │   ├── DC_F0.asm
    │   │   ├── DC_F8.asm
    │   │   ├── DD_00.asm
    │   │   ├── DD_01.asm
    │   │   ├── DD_02.asm
    │   │   ├── DD_03.asm
    │   │   ├── DD_04.asm
    │   │   ├── DD_04_2.asm
    │   │   ├── DD_07.asm
    │   │   ├── DD_C0.asm
    │   │   ├── DD_D0.asm
    │   │   ├── DD_D8.asm
    │   │   ├── DE_00.asm
    │   │   ├── DE_01.asm
    │   │   ├── DE_04.asm
    │   │   ├── DE_05.asm
    │   │   ├── DE_06.asm
    │   │   ├── DE_07.asm
    │   │   ├── DE_C0.asm
    │   │   ├── DE_C8.asm
    │   │   ├── DE_E0.asm
    │   │   ├── DE_E8.asm
    │   │   ├── DE_F0.asm
    │   │   ├── DE_F8.asm
    │   │   ├── DF_00.asm
    │   │   ├── DF_01.asm
    │   │   ├── DF_02.asm
    │   │   ├── DF_03.asm
    │   │   ├── DF_05.asm
    │   │   ├── DF_07.asm
    │   │   ├── DF_C0.asm
    │   │   ├── DF_E0.asm
    │   │   ├── FST_AddrModes.asm
    │   │   ├── RoundingNeg.asm
    │   │   ├── RoundingPos.asm
    │   │   ├── invalid_div_zero.asm
    │   │   ├── invalid_fcos_infinity.asm
    │   │   ├── invalid_fist_nan.asm
    │   │   ├── invalid_fist_overflow.asm
    │   │   ├── invalid_fist_overflow_16bit.asm
    │   │   ├── invalid_fist_overflow_32bit.asm
    │   │   ├── invalid_fist_overflow_64bit.asm
    │   │   ├── invalid_fprem_infinity.asm
    │   │   ├── invalid_fptan_infinity.asm
    │   │   ├── invalid_fsin_infinity.asm
    │   │   ├── invalid_fsincos_infinity.asm
    │   │   ├── invalid_infinity_fsub_memory.asm
    │   │   ├── invalid_infinity_fsubr_infinity.asm
    │   │   ├── invalid_infinity_mul_zero.asm
    │   │   ├── invalid_infinity_ops.asm
    │   │   ├── invalid_infinity_sub_infinity.asm
    │   │   ├── invalid_neg_infinity_sub_neg_infinity.asm
    │   │   ├── invalid_reduced_precision.asm
    │   │   ├── invalid_simple_test.asm
    │   │   ├── invalid_sqrt_negative.asm
    │   │   ├── valid_fist_16bit.asm
    │   │   └── valid_operation.asm
    │   ├── arpl.asm
    │   └── arpl_2.asm
    ├── APITests/
    │   ├── Allocator.cpp
    │   ├── ArgumentParser.cpp
    │   ├── CMakeLists.txt
    │   ├── ExtendedVolatileMetadata.cpp
    │   ├── FileMappingBaseAddress.cpp
    │   ├── Filesystem.cpp
    │   ├── InterruptableConditionVariable.cpp
    │   ├── StringUtils.cpp
    │   └── fextl_function.cpp
    ├── ASM/
    │   ├── 3DNow/
    │   │   ├── 0C.asm
    │   │   ├── 0D.asm
    │   │   ├── 0E.asm
    │   │   ├── 1C.asm
    │   │   ├── 1D.asm
    │   │   ├── 86.asm
    │   │   ├── 87.asm
    │   │   ├── 8A.asm
    │   │   ├── 8E.asm
    │   │   ├── 90.asm
    │   │   ├── 94.asm
    │   │   ├── 96.asm
    │   │   ├── 97.asm
    │   │   ├── 9A.asm
    │   │   ├── 9E.asm
    │   │   ├── A0.asm
    │   │   ├── A4.asm
    │   │   ├── A6.asm
    │   │   ├── A7.asm
    │   │   ├── AA.asm
    │   │   ├── AE.asm
    │   │   ├── B0.asm
    │   │   ├── B4.asm
    │   │   ├── B6.asm
    │   │   ├── B7.asm
    │   │   ├── BB.asm
    │   │   └── BF.asm
    │   ├── Atomics/
    │   │   ├── adc_atomic16.asm
    │   │   ├── adc_atomic32.asm
    │   │   ├── adc_atomic64.asm
    │   │   ├── neg_atomic16.asm
    │   │   ├── neg_atomic32.asm
    │   │   ├── neg_atomic64.asm
    │   │   ├── not_atomic16.asm
    │   │   ├── not_atomic32.asm
    │   │   ├── not_atomic64.asm
    │   │   ├── sbb_atomic16.asm
    │   │   ├── sbb_atomic32.asm
    │   │   └── sbb_atomic64.asm
    │   ├── CALL.asm
    │   ├── CMakeLists.txt
    │   ├── ConstProp/
    │   │   └── ConstPooling.asm
    │   ├── DAZTest.asm
    │   ├── Disabled_Tests
    │   ├── Disabled_Tests_ARMv8.0
    │   ├── Disabled_Tests_ARMv8.2
    │   ├── Disabled_Tests_ARMv8.4
    │   ├── Disabled_Tests_Simulator
    │   ├── Disabled_Tests_host
    │   ├── Disabled_Tests_x64
    │   ├── Displacement_Encoding.asm
    │   ├── FEX_bugs/
    │   │   ├── 32bit_syscall.asm
    │   │   ├── 3DNow_ModRMSIBDecode.asm
    │   │   ├── BEXTR_flags.asm
    │   │   ├── BLSI_flags.asm
    │   │   ├── BLSMSK_flags.asm
    │   │   ├── BLSR_flags.asm
    │   │   ├── BT_flags.asm
    │   │   ├── BZHI_Sign.asm
    │   │   ├── BitConditionCheck.asm
    │   │   ├── Blake3.asm
    │   │   ├── BranchConditionCheck.asm
    │   │   ├── CodeBufferOverflow.asm
    │   │   ├── Divide32.asm
    │   │   ├── H0F3AREXBug.asm
    │   │   ├── IMUL_garbagedata_negative.asm
    │   │   ├── InitialPFFlag.asm
    │   │   ├── LargeRotatesForSmallSizes.asm
    │   │   ├── LargeRotatesForSmallSizes_More.asm
    │   │   ├── LoadAtBoundary_LowerPrecision.asm
    │   │   ├── LongSignedDivide.asm
    │   │   ├── LoopAddressSizeCheck.asm
    │   │   ├── MinMaxNaN.asm
    │   │   ├── MoveMerging.asm
    │   │   ├── NegativeCallAddressSizeOverride.asm
    │   │   ├── OptSizeConfusion.asm
    │   │   ├── PSRLDQBuf.asm
    │   │   ├── Push.asm
    │   │   ├── REX/
    │   │   │   ├── 0F_38.asm
    │   │   │   ├── 0F_3A.asm
    │   │   │   ├── DDDNow.asm
    │   │   │   ├── Primary.asm
    │   │   │   ├── Primary_2.asm
    │   │   │   └── TwoByte.asm
    │   │   ├── RegCacheMMX.asm
    │   │   ├── SBCSmall.asm
    │   │   ├── SHRD_OF.asm
    │   │   ├── SIBScaleTranspose.asm
    │   │   ├── SegmentAddressOverride.asm
    │   │   ├── SelfPop.asm
    │   │   ├── ShiftConstantBug.asm
    │   │   ├── ShiftPF.asm
    │   │   ├── ShiftZeroFlagsUpdate.asm
    │   │   ├── SmallShiftFlags.asm
    │   │   ├── Test_CmpSelect_Merge.asm
    │   │   ├── Test_CmpSelect_Merge_Float.asm
    │   │   ├── Test_CmpSelect_Merge_Float_branch.asm
    │   │   ├── Test_CmpSelect_Merge_branch.asm
    │   │   ├── Test_JP.asm
    │   │   ├── Test_PF_Zero_Shift.asm
    │   │   ├── TrickyRA.asm
    │   │   ├── UnalignedLoadStoreSIGBUS.asm
    │   │   ├── VectorLoadCrash.asm
    │   │   ├── VectorShift_zero.asm
    │   │   ├── VectorShift_zero_256.asm
    │   │   ├── VectorShift_zero_avx_128.asm
    │   │   ├── X87MMXNZCV.asm
    │   │   ├── XeSS_quadratic.asm
    │   │   ├── adcx_size.asm
    │   │   ├── add_sub_carry.asm
    │   │   ├── add_sub_carry_2.asm
    │   │   ├── cmpxchg.asm
    │   │   ├── fnsave_fnrstor_size.asm
    │   │   ├── fxrstor_bug.asm
    │   │   ├── fxsave_bug.asm
    │   │   ├── issue5084_crossblock_const.asm
    │   │   ├── mmx_x87_register_conflating.asm
    │   │   ├── mov_address_size_override.asm
    │   │   ├── non_fatal_syscall.asm
    │   │   ├── nzcv_implicit_clobber.asm
    │   │   ├── nzcv_rmw.asm
    │   │   ├── nzcv_spill_enderlilies.asm
    │   │   ├── overlapping_memcpy_bug.asm
    │   │   ├── pcmpestri_garbage_rcx.asm
    │   │   ├── repeat_on_incdec.asm
    │   │   ├── repeat_stringops_crash.asm
    │   │   ├── rex_b_mmx.asm
    │   │   ├── rotate_zero_extend_with_zero.asm
    │   │   ├── sbbNZCVBug.asm
    │   │   ├── smallvectorload_regreg.asm
    │   │   ├── tls_vector_element.asm
    │   │   ├── vcvtdq2ps_incorrect_size.asm
    │   │   ├── vgather_xmm4.asm
    │   │   ├── vmov_size_test.asm
    │   │   ├── vroundscalar_sve256.asm
    │   │   ├── x87DecrementStackBug.asm
    │   │   ├── x87IncrementStackBug.asm
    │   │   ├── x87_fprem.asm
    │   │   ├── x87_integer_indefinite.asm
    │   │   ├── x87_unordered_cmp_fix.asm
    │   │   ├── xor_flags.asm
    │   │   └── zero-ah.asm
    │   ├── Flags/
    │   │   └── Shift.asm
    │   ├── GameTests/
    │   │   └── EnderLiliesFlash.asm
    │   ├── H0F38/
    │   │   ├── 0_F0.asm
    │   │   ├── 0_F1.asm
    │   │   ├── 66_00.asm
    │   │   ├── 66_00_2.asm
    │   │   ├── 66_01.asm
    │   │   ├── 66_02.asm
    │   │   ├── 66_03.asm
    │   │   ├── 66_04.asm
    │   │   ├── 66_05.asm
    │   │   ├── 66_06.asm
    │   │   ├── 66_07.asm
    │   │   ├── 66_08.asm
    │   │   ├── 66_09.asm
    │   │   ├── 66_0A.asm
    │   │   ├── 66_0B.asm
    │   │   ├── 66_10.asm
    │   │   ├── 66_14.asm
    │   │   ├── 66_15.asm
    │   │   ├── 66_17.asm
    │   │   ├── 66_17_2.asm
    │   │   ├── 66_1C.asm
    │   │   ├── 66_1D.asm
    │   │   ├── 66_1E.asm
    │   │   ├── 66_20.asm
    │   │   ├── 66_21.asm
    │   │   ├── 66_22.asm
    │   │   ├── 66_23.asm
    │   │   ├── 66_24.asm
    │   │   ├── 66_25.asm
    │   │   ├── 66_28.asm
    │   │   ├── 66_29.asm
    │   │   ├── 66_2A.asm
    │   │   ├── 66_2B.asm
    │   │   ├── 66_30.asm
    │   │   ├── 66_31.asm
    │   │   ├── 66_32.asm
    │   │   ├── 66_33.asm
    │   │   ├── 66_34.asm
    │   │   ├── 66_35.asm
    │   │   ├── 66_37.asm
    │   │   ├── 66_38.asm
    │   │   ├── 66_39.asm
    │   │   ├── 66_3A.asm
    │   │   ├── 66_3B.asm
    │   │   ├── 66_3C.asm
    │   │   ├── 66_3D.asm
    │   │   ├── 66_3E.asm
    │   │   ├── 66_3F.asm
    │   │   ├── 66_40.asm
    │   │   ├── 66_41.asm
    │   │   ├── 66_DB.asm
    │   │   ├── 66_DC.asm
    │   │   ├── 66_DD.asm
    │   │   ├── 66_DE.asm
    │   │   ├── 66_DF.asm
    │   │   ├── 66_F0.asm
    │   │   ├── 66_F0_2.asm
    │   │   ├── 66_F1.asm
    │   │   ├── 66_F1_2.asm
    │   │   ├── 66_F1_3.asm
    │   │   ├── F2_F0.asm
    │   │   ├── F2_F1.asm
    │   │   ├── XX_00.asm
    │   │   ├── XX_00_2.asm
    │   │   ├── XX_01.asm
    │   │   ├── XX_02.asm
    │   │   ├── XX_03.asm
    │   │   ├── XX_04.asm
    │   │   ├── XX_05.asm
    │   │   ├── XX_06.asm
    │   │   ├── XX_07.asm
    │   │   ├── XX_08.asm
    │   │   ├── XX_09.asm
    │   │   ├── XX_0A.asm
    │   │   ├── XX_0B.asm
    │   │   ├── XX_1C.asm
    │   │   ├── XX_1D.asm
    │   │   ├── XX_1E.asm
    │   │   ├── adcx.asm
    │   │   ├── adox.asm
    │   │   ├── sha1msg1.asm
    │   │   ├── sha1msg2.asm
    │   │   ├── sha1nexte.asm
    │   │   ├── sha256msg1.asm
    │   │   ├── sha256msg2.asm
    │   │   └── sha256rnds2.asm
    │   ├── H0F3A/
    │   │   ├── 0_66_0F.asm
    │   │   ├── 0_66_21.asm
    │   │   ├── 0_66_DF.asm
    │   │   ├── 0_XX_0F.asm
    │   │   ├── 66_08.asm
    │   │   ├── 66_09.asm
    │   │   ├── 66_0A.asm
    │   │   ├── 66_0B.asm
    │   │   ├── 66_0C.asm
    │   │   ├── 66_0D.asm
    │   │   ├── 66_0E.asm
    │   │   ├── 66_14.asm
    │   │   ├── 66_14_2.asm
    │   │   ├── 66_15.asm
    │   │   ├── 66_16.asm
    │   │   ├── 66_16_1.asm
    │   │   ├── 66_17.asm
    │   │   ├── 66_20.asm
    │   │   ├── 66_20_1.asm
    │   │   ├── 66_22.asm
    │   │   ├── 66_22_1.asm
    │   │   ├── 66_22_2.asm
    │   │   ├── 66_40.asm
    │   │   ├── 66_40_2.asm
    │   │   ├── 66_41.asm
    │   │   ├── 66_41_2.asm
    │   │   ├── 66_42.asm
    │   │   ├── pclmulqdq.asm
    │   │   ├── pcmpestri_equal_any.asm
    │   │   ├── pcmpestri_equal_each.asm
    │   │   ├── pcmpestri_equal_ordered.asm
    │   │   ├── pcmpestri_ranges.asm
    │   │   ├── pcmpestrm_equal_any.asm
    │   │   ├── pcmpestrm_equal_each.asm
    │   │   ├── pcmpestrm_equal_ordered.asm
    │   │   ├── pcmpestrm_ranges.asm
    │   │   ├── pcmpistri_equal_any.asm
    │   │   ├── pcmpistri_equal_each.asm
    │   │   ├── pcmpistri_equal_ordered.asm
    │   │   ├── pcmpistri_ranges.asm
    │   │   ├── pcmpistrm_equal_any.asm
    │   │   ├── pcmpistrm_equal_each.asm
    │   │   ├── pcmpistrm_equal_ordered.asm
    │   │   ├── pcmpistrm_ranges.asm
    │   │   └── sha1rnds4.asm
    │   ├── Includes/
    │   │   ├── checkprecision.mac
    │   │   ├── modrm_oob_macros.mac
    │   │   ├── x87cw.mac
    │   │   └── xsave_macros.mac
    │   ├── JMP.asm
    │   ├── Known_Failures
    │   ├── Known_Failures_host
    │   ├── Known_Failures_jit
    │   ├── MOVHPD.asm
    │   ├── MemoryData.asm
    │   ├── Multiblock/
    │   │   └── ReachableInvalidCode.asm
    │   ├── OpSize/
    │   │   ├── 15_BYTE.asm
    │   │   ├── 66_10.asm
    │   │   ├── 66_11.asm
    │   │   ├── 66_12.asm
    │   │   ├── 66_13.asm
    │   │   ├── 66_14.asm
    │   │   ├── 66_15.asm
    │   │   ├── 66_16.asm
    │   │   ├── 66_17.asm
    │   │   ├── 66_28.asm
    │   │   ├── 66_29.asm
    │   │   ├── 66_2A.asm
    │   │   ├── 66_2B.asm
    │   │   ├── 66_2C.asm
    │   │   ├── 66_2D.asm
    │   │   ├── 66_2E.asm
    │   │   ├── 66_2F.asm
    │   │   ├── 66_50.asm
    │   │   ├── 66_51.asm
    │   │   ├── 66_54.asm
    │   │   ├── 66_55.asm
    │   │   ├── 66_56.asm
    │   │   ├── 66_57.asm
    │   │   ├── 66_58.asm
    │   │   ├── 66_59.asm
    │   │   ├── 66_5A.asm
    │   │   ├── 66_5A_1.asm
    │   │   ├── 66_5B.asm
    │   │   ├── 66_5B_1.asm
    │   │   ├── 66_5C.asm
    │   │   ├── 66_5D.asm
    │   │   ├── 66_5E.asm
    │   │   ├── 66_5F.asm
    │   │   ├── 66_60.asm
    │   │   ├── 66_61.asm
    │   │   ├── 66_62.asm
    │   │   ├── 66_63.asm
    │   │   ├── 66_64.asm
    │   │   ├── 66_65.asm
    │   │   ├── 66_66.asm
    │   │   ├── 66_67.asm
    │   │   ├── 66_68.asm
    │   │   ├── 66_69.asm
    │   │   ├── 66_6A.asm
    │   │   ├── 66_6B.asm
    │   │   ├── 66_6C.asm
    │   │   ├── 66_6D.asm
    │   │   ├── 66_6E.asm
    │   │   ├── 66_6F.asm
    │   │   ├── 66_70.asm
    │   │   ├── 66_74.asm
    │   │   ├── 66_75.asm
    │   │   ├── 66_76.asm
    │   │   ├── 66_7C.asm
    │   │   ├── 66_7D.asm
    │   │   ├── 66_7E.asm
    │   │   ├── 66_7F.asm
    │   │   ├── 66_C2.asm
    │   │   ├── 66_C4.asm
    │   │   ├── 66_C4_2.asm
    │   │   ├── 66_C5.asm
    │   │   ├── 66_C5_2.asm
    │   │   ├── 66_C6.asm
    │   │   ├── 66_D0.asm
    │   │   ├── 66_D1.asm
    │   │   ├── 66_D2.asm
    │   │   ├── 66_D3.asm
    │   │   ├── 66_D4.asm
    │   │   ├── 66_D5.asm
    │   │   ├── 66_D6.asm
    │   │   ├── 66_D7.asm
    │   │   ├── 66_D8.asm
    │   │   ├── 66_D9.asm
    │   │   ├── 66_DA.asm
    │   │   ├── 66_DB.asm
    │   │   ├── 66_DC.asm
    │   │   ├── 66_DD.asm
    │   │   ├── 66_DE.asm
    │   │   ├── 66_DF.asm
    │   │   ├── 66_E0.asm
    │   │   ├── 66_E1.asm
    │   │   ├── 66_E2.asm
    │   │   ├── 66_E3.asm
    │   │   ├── 66_E4.asm
    │   │   ├── 66_E5.asm
    │   │   ├── 66_E6.asm
    │   │   ├── 66_E6_1.asm
    │   │   ├── 66_E7.asm
    │   │   ├── 66_E8.asm
    │   │   ├── 66_E9.asm
    │   │   ├── 66_EA.asm
    │   │   ├── 66_EB.asm
    │   │   ├── 66_EC.asm
    │   │   ├── 66_ED.asm
    │   │   ├── 66_EE.asm
    │   │   ├── 66_EF.asm
    │   │   ├── 66_F1.asm
    │   │   ├── 66_F2.asm
    │   │   ├── 66_F3.asm
    │   │   ├── 66_F4.asm
    │   │   ├── 66_F5.asm
    │   │   ├── 66_F6.asm
    │   │   ├── 66_F7.asm
    │   │   ├── 66_F8.asm
    │   │   ├── 66_F9.asm
    │   │   ├── 66_FA.asm
    │   │   ├── 66_FB.asm
    │   │   ├── 66_FC.asm
    │   │   ├── 66_FD.asm
    │   │   └── 66_FE.asm
    │   ├── Primary/
    │   │   ├── Pause.asm
    │   │   ├── Primary_00.asm
    │   │   ├── Primary_01_Atomic16.asm
    │   │   ├── Primary_01_Atomic32.asm
    │   │   ├── Primary_01_Atomic64.asm
    │   │   ├── Primary_08.asm
    │   │   ├── Primary_09_Atomic16.asm
    │   │   ├── Primary_09_Atomic32.asm
    │   │   ├── Primary_09_Atomic64.asm
    │   │   ├── Primary_10.asm
    │   │   ├── Primary_10_2.asm
    │   │   ├── Primary_10_3.asm
    │   │   ├── Primary_10_4.asm
    │   │   ├── Primary_18.asm
    │   │   ├── Primary_18_2.asm
    │   │   ├── Primary_18_3.asm
    │   │   ├── Primary_18_4.asm
    │   │   ├── Primary_20.asm
    │   │   ├── Primary_23_Atomic16.asm
    │   │   ├── Primary_23_Atomic32.asm
    │   │   ├── Primary_23_Atomic64.asm
    │   │   ├── Primary_28.asm
    │   │   ├── Primary_29_Atomic16.asm
    │   │   ├── Primary_29_Atomic32.asm
    │   │   ├── Primary_29_Atomic64.asm
    │   │   ├── Primary_30.asm
    │   │   ├── Primary_31_Atomic16.asm
    │   │   ├── Primary_31_Atomic32.asm
    │   │   ├── Primary_31_Atomic64.asm
    │   │   ├── Primary_38.asm
    │   │   ├── Primary_39.asm
    │   │   ├── Primary_3A.asm
    │   │   ├── Primary_3B.asm
    │   │   ├── Primary_3C.asm
    │   │   ├── Primary_3D.asm
    │   │   ├── Primary_50.asm
    │   │   ├── Primary_50_2.asm
    │   │   ├── Primary_63.asm
    │   │   ├── Primary_63_2.asm
    │   │   ├── Primary_68.asm
    │   │   ├── Primary_69.asm
    │   │   ├── Primary_6A.asm
    │   │   ├── Primary_6A_2.asm
    │   │   ├── Primary_6B.asm
    │   │   ├── Primary_84.asm
    │   │   ├── Primary_84_2.asm
    │   │   ├── Primary_85.asm
    │   │   ├── Primary_86.asm
    │   │   ├── Primary_87.asm
    │   │   ├── Primary_87_2.asm
    │   │   ├── Primary_87_3.asm
    │   │   ├── Primary_87_Atomic16.asm
    │   │   ├── Primary_87_Atomic32.asm
    │   │   ├── Primary_87_Atomic64.asm
    │   │   ├── Primary_8C.asm
    │   │   ├── Primary_8C_2.asm
    │   │   ├── Primary_8D.asm
    │   │   ├── Primary_8D_2.asm
    │   │   ├── Primary_90.asm
    │   │   ├── Primary_90_2.asm
    │   │   ├── Primary_90_3.asm
    │   │   ├── Primary_90_4.asm
    │   │   ├── Primary_98.asm
    │   │   ├── Primary_98_2.asm
    │   │   ├── Primary_99.asm
    │   │   ├── Primary_99_2.asm
    │   │   ├── Primary_9B.asm
    │   │   ├── Primary_9C.asm
    │   │   ├── Primary_9D.asm
    │   │   ├── Primary_9E.asm
    │   │   ├── Primary_A0.asm
    │   │   ├── Primary_A2.asm
    │   │   ├── Primary_A4.asm
    │   │   ├── Primary_A4_REP.asm
    │   │   ├── Primary_A4_REPNE.asm
    │   │   ├── Primary_A4_REPNE_Down.asm
    │   │   ├── Primary_A4_REPNE_many.asm
    │   │   ├── Primary_A4_REP_Down.asm
    │   │   ├── Primary_A4_REP_Down_Overlapping.asm
    │   │   ├── Primary_A4_REP_Overlapping.asm
    │   │   ├── Primary_A4_REP_many.asm
    │   │   ├── Primary_A5.asm
    │   │   ├── Primary_A5_REP.asm
    │   │   ├── Primary_A5_REPNE.asm
    │   │   ├── Primary_A5_REPNE_Down.asm
    │   │   ├── Primary_A5_REP_Down.asm
    │   │   ├── Primary_A5_dword.asm
    │   │   ├── Primary_A5_dword_REP.asm
    │   │   ├── Primary_A5_dword_REPNE.asm
    │   │   ├── Primary_A5_dword_REPNE_Down.asm
    │   │   ├── Primary_A5_dword_REP_Down.asm
    │   │   ├── Primary_A5_qword.asm
    │   │   ├── Primary_A5_qword_REP.asm
    │   │   ├── Primary_A5_qword_REPNE.asm
    │   │   ├── Primary_A5_qword_REPNE_Down.asm
    │   │   ├── Primary_A5_qword_REP_Down.asm
    │   │   ├── Primary_A6.asm
    │   │   ├── Primary_A6_REP.asm
    │   │   ├── Primary_A6_REPNE.asm
    │   │   ├── Primary_A6_REPNE_Equal.asm
    │   │   ├── Primary_A6_REP_Equal.asm
    │   │   ├── Primary_A6_REP_Smaller.asm
    │   │   ├── Primary_A6_REP_addrmod.asm
    │   │   ├── Primary_A6_REP_down.asm
    │   │   ├── Primary_A6_REP_down_Equal.asm
    │   │   ├── Primary_A6_addrmod.asm
    │   │   ├── Primary_A6_down.asm
    │   │   ├── Primary_A7_dword.asm
    │   │   ├── Primary_A7_dword_down.asm
    │   │   ├── Primary_A7_qword.asm
    │   │   ├── Primary_A7_qword_down.asm
    │   │   ├── Primary_A7_word.asm
    │   │   ├── Primary_A7_word_down.asm
    │   │   ├── Primary_A8.asm
    │   │   ├── Primary_A9.asm
    │   │   ├── Primary_AA.asm
    │   │   ├── Primary_AA_REP.asm
    │   │   ├── Primary_AA_REPNE.asm
    │   │   ├── Primary_AA_REPNE_down.asm
    │   │   ├── Primary_AA_REP_down.asm
    │   │   ├── Primary_AB_dword.asm
    │   │   ├── Primary_AB_dword_REP.asm
    │   │   ├── Primary_AB_dword_REPNE.asm
    │   │   ├── Primary_AB_dword_REPNE_down.asm
    │   │   ├── Primary_AB_dword_REP_down.asm
    │   │   ├── Primary_AB_qword.asm
    │   │   ├── Primary_AB_qword_REP.asm
    │   │   ├── Primary_AB_qword_REPNE.asm
    │   │   ├── Primary_AB_qword_REPNE_down.asm
    │   │   ├── Primary_AB_qword_REP_down.asm
    │   │   ├── Primary_AB_word.asm
    │   │   ├── Primary_AB_word_REP.asm
    │   │   ├── Primary_AB_word_REPNE.asm
    │   │   ├── Primary_AB_word_REPNE_down.asm
    │   │   ├── Primary_AB_word_REP_down.asm
    │   │   ├── Primary_AC.asm
    │   │   ├── Primary_AC_REP.asm
    │   │   ├── Primary_AC_REPNE.asm
    │   │   ├── Primary_AC_REPNE_down.asm
    │   │   ├── Primary_AC_REP_down.asm
    │   │   ├── Primary_AD_REPNE_dword.asm
    │   │   ├── Primary_AD_REPNE_dword_down.asm
    │   │   ├── Primary_AD_REPNE_qword.asm
    │   │   ├── Primary_AD_REPNE_qword_down.asm
    │   │   ├── Primary_AD_REPNE_word.asm
    │   │   ├── Primary_AD_REPNE_word_down.asm
    │   │   ├── Primary_AD_REP_dword.asm
    │   │   ├── Primary_AD_REP_dword_down.asm
    │   │   ├── Primary_AD_REP_qword.asm
    │   │   ├── Primary_AD_REP_qword_down.asm
    │   │   ├── Primary_AD_REP_word.asm
    │   │   ├── Primary_AD_REP_word_down.asm
    │   │   ├── Primary_AD_dword.asm
    │   │   ├── Primary_AD_qword.asm
    │   │   ├── Primary_AD_word.asm
    │   │   ├── Primary_AE.asm
    │   │   ├── Primary_AE_REP.asm
    │   │   ├── Primary_AE_REPNE.asm
    │   │   ├── Primary_AE_REPNE_down.asm
    │   │   ├── Primary_AE_REP_down.asm
    │   │   ├── Primary_AE_addrmod.asm
    │   │   ├── Primary_AF_REP_dword.asm
    │   │   ├── Primary_AF_REP_qword.asm
    │   │   ├── Primary_AF_REP_word.asm
    │   │   ├── Primary_B0.asm
    │   │   ├── Primary_B8.asm
    │   │   ├── Primary_B8_2.asm
    │   │   ├── Primary_B8_3.asm
    │   │   ├── Primary_C2.asm
    │   │   ├── Primary_C3.asm
    │   │   ├── Primary_C8.asm
    │   │   ├── Primary_C8_2.asm
    │   │   ├── Primary_C8_o16.asm
    │   │   ├── Primary_C9.asm
    │   │   ├── Primary_C9_o16.asm
    │   │   ├── Primary_CF.asm
    │   │   ├── Primary_D7.asm
    │   │   ├── Primary_E0.asm
    │   │   ├── Primary_E1.asm
    │   │   ├── Primary_E2.asm
    │   │   ├── Primary_E3.asm
    │   │   ├── Primary_E8.asm
    │   │   ├── Primary_E9.asm
    │   │   ├── Primary_EB.asm
    │   │   ├── Primary_F5.asm
    │   │   ├── Primary_F8.asm
    │   │   ├── Primary_F9.asm
    │   │   ├── Primary_FC.asm
    │   │   ├── Primary_FD.asm
    │   │   ├── Primary_FF_0_Atomic16.asm
    │   │   ├── Primary_FF_0_Atomic32.asm
    │   │   ├── Primary_FF_0_Atomic64.asm
    │   │   ├── Primary_FF_1_Atomic16.asm
    │   │   ├── Primary_FF_1_Atomic32.asm
    │   │   ├── Primary_FF_1_Atomic64.asm
    │   │   ├── ROL_Flags.asm
    │   │   ├── ROL_OF.asm
    │   │   ├── ROR_Flags.asm
    │   │   ├── ROR_OF.asm
    │   │   ├── SHL.asm
    │   │   └── SHR.asm
    │   ├── PrimaryGroup/
    │   │   ├── 1_80_00.asm
    │   │   ├── 1_80_01.asm
    │   │   ├── 1_80_02.asm
    │   │   ├── 1_80_02_2.asm
    │   │   ├── 1_80_03.asm
    │   │   ├── 1_80_03_2.asm
    │   │   ├── 1_80_04.asm
    │   │   ├── 1_80_05.asm
    │   │   ├── 1_80_06.asm
    │   │   ├── 1_80_07.asm
    │   │   ├── 1_81_00.asm
    │   │   ├── 1_81_01.asm
    │   │   ├── 1_81_02.asm
    │   │   ├── 1_81_02_2.asm
    │   │   ├── 1_81_03.asm
    │   │   ├── 1_81_03_2.asm
    │   │   ├── 1_81_04.asm
    │   │   ├── 1_81_05.asm
    │   │   ├── 1_81_06.asm
    │   │   ├── 1_81_07.asm
    │   │   ├── 1_83_00.asm
    │   │   ├── 1_83_01.asm
    │   │   ├── 1_83_02.asm
    │   │   ├── 1_83_02_2.asm
    │   │   ├── 1_83_03.asm
    │   │   ├── 1_83_03_2.asm
    │   │   ├── 1_83_04.asm
    │   │   ├── 1_83_05.asm
    │   │   ├── 1_83_06.asm
    │   │   ├── 1_83_07.asm
    │   │   ├── 2_C0_00.asm
    │   │   ├── 2_C0_01.asm
    │   │   ├── 2_C0_02.asm
    │   │   ├── 2_C0_02_2.asm
    │   │   ├── 2_C0_02_3.asm
    │   │   ├── 2_C0_02_4.asm
    │   │   ├── 2_C0_03.asm
    │   │   ├── 2_C0_03_2.asm
    │   │   ├── 2_C0_03_3.asm
    │   │   ├── 2_C0_03_4.asm
    │   │   ├── 2_C0_04.asm
    │   │   ├── 2_C0_05.asm
    │   │   ├── 2_C0_07.asm
    │   │   ├── 2_C0_07_2.asm
    │   │   ├── 2_C1_00.asm
    │   │   ├── 2_C1_01.asm
    │   │   ├── 2_C1_04.asm
    │   │   ├── 2_C1_05.asm
    │   │   ├── 2_C1_05_2.asm
    │   │   ├── 2_C1_07.asm
    │   │   ├── 2_D0_00.asm
    │   │   ├── 2_D0_01.asm
    │   │   ├── 2_D0_02.asm
    │   │   ├── 2_D0_02_2.asm
    │   │   ├── 2_D0_03.asm
    │   │   ├── 2_D0_03_2.asm
    │   │   ├── 2_D0_04.asm
    │   │   ├── 2_D0_05.asm
    │   │   ├── 2_D0_07.asm
    │   │   ├── 2_D1_00.asm
    │   │   ├── 2_D1_01.asm
    │   │   ├── 2_D1_02.asm
    │   │   ├── 2_D1_02_2.asm
    │   │   ├── 2_D1_02_3.asm
    │   │   ├── 2_D1_02_4.asm
    │   │   ├── 2_D1_02_5.asm
    │   │   ├── 2_D1_02_6.asm
    │   │   ├── 2_D1_03.asm
    │   │   ├── 2_D1_03_2.asm
    │   │   ├── 2_D1_03_3.asm
    │   │   ├── 2_D1_03_4.asm
    │   │   ├── 2_D1_03_5.asm
    │   │   ├── 2_D1_03_6.asm
    │   │   ├── 2_D1_04.asm
    │   │   ├── 2_D1_05.asm
    │   │   ├── 2_D1_07.asm
    │   │   ├── 2_D1_07_2.asm
    │   │   ├── 2_D2_02.asm
    │   │   ├── 2_D2_02_2.asm
    │   │   ├── 2_D2_02_3.asm
    │   │   ├── 2_D2_03.asm
    │   │   ├── 2_D2_03_2.asm
    │   │   ├── 2_D2_03_3.asm
    │   │   ├── 2_D3_00.asm
    │   │   ├── 2_D3_00_2.asm
    │   │   ├── 2_D3_00_3.asm
    │   │   ├── 2_D3_01.asm
    │   │   ├── 2_D3_01_2.asm
    │   │   ├── 2_D3_01_3.asm
    │   │   ├── 2_D3_02.asm
    │   │   ├── 2_D3_02_2.asm
    │   │   ├── 2_D3_02_3.asm
    │   │   ├── 2_D3_02_4.asm
    │   │   ├── 2_D3_02_5.asm
    │   │   ├── 2_D3_03.asm
    │   │   ├── 2_D3_03_2.asm
    │   │   ├── 2_D3_03_3.asm
    │   │   ├── 2_D3_03_4.asm
    │   │   ├── 2_D3_03_5.asm
    │   │   ├── 2_D3_03_6.asm
    │   │   ├── 2_D3_03_7.asm
    │   │   ├── 2_D3_03_8.asm
    │   │   ├── 2_D3_04.asm
    │   │   ├── 2_D3_05.asm
    │   │   ├── 2_D3_07.asm
    │   │   ├── 2_D3_07_2.asm
    │   │   ├── 3_F6_00.asm
    │   │   ├── 3_F6_02.asm
    │   │   ├── 3_F6_02_2.asm
    │   │   ├── 3_F6_03.asm
    │   │   ├── 3_F6_03_2.asm
    │   │   ├── 3_F6_04.asm
    │   │   ├── 3_F6_05.asm
    │   │   ├── 3_F6_05_2.asm
    │   │   ├── 3_F6_05_3.asm
    │   │   ├── 3_F6_05_4.asm
    │   │   ├── 3_F6_05_5.asm
    │   │   ├── 3_F6_06.asm
    │   │   ├── 3_F6_07.asm
    │   │   ├── 3_F6_07_2.asm
    │   │   ├── 3_F7_00.asm
    │   │   ├── 3_F7_00_2.asm
    │   │   ├── 3_F7_02.asm
    │   │   ├── 3_F7_02_2.asm
    │   │   ├── 3_F7_02_3.asm
    │   │   ├── 3_F7_03.asm
    │   │   ├── 3_F7_03_2.asm
    │   │   ├── 3_F7_04.asm
    │   │   ├── 3_F7_05.asm
    │   │   ├── 3_F7_05_2.asm
    │   │   ├── 3_F7_06.asm
    │   │   ├── 3_F7_06_2.asm
    │   │   ├── 3_F7_07.asm
    │   │   ├── 3_F7_07_2.asm
    │   │   ├── 4_FE_00.asm
    │   │   ├── 4_FE_01.asm
    │   │   ├── 5_FF_00.asm
    │   │   ├── 5_FF_00_2.asm
    │   │   ├── 5_FF_00_3.asm
    │   │   ├── 5_FF_01.asm
    │   │   ├── 5_FF_01_2.asm
    │   │   ├── 5_FF_01_3.asm
    │   │   ├── 5_FF_02.asm
    │   │   ├── 5_FF_04.asm
    │   │   ├── 5_FF_05.asm
    │   │   ├── 5_FF_05_03_o32.asm
    │   │   ├── 5_FF_05_03_o32_imm.asm
    │   │   ├── 5_FF_05_03_o64.asm
    │   │   ├── 5_FF_05_03_o64_imm.asm
    │   │   ├── 5_FF_06.asm
    │   │   ├── 6_C6_00.asm
    │   │   └── 6_C7_00.asm
    │   ├── REP/
    │   │   ├── F3_10.asm
    │   │   ├── F3_10_1.asm
    │   │   ├── F3_11.asm
    │   │   ├── F3_11_1.asm
    │   │   ├── F3_12.asm
    │   │   ├── F3_16.asm
    │   │   ├── F3_2A.asm
    │   │   ├── F3_2A_1.asm
    │   │   ├── F3_2A_2.asm
    │   │   ├── F3_2B.asm
    │   │   ├── F3_2C.asm
    │   │   ├── F3_2D.asm
    │   │   ├── F3_51.asm
    │   │   ├── F3_52.asm
    │   │   ├── F3_52_2.asm
    │   │   ├── F3_53.asm
    │   │   ├── F3_58.asm
    │   │   ├── F3_59.asm
    │   │   ├── F3_5A.asm
    │   │   ├── F3_5A_1.asm
    │   │   ├── F3_5B.asm
    │   │   ├── F3_5B_1.asm
    │   │   ├── F3_5C.asm
    │   │   ├── F3_5D.asm
    │   │   ├── F3_5E.asm
    │   │   ├── F3_5F.asm
    │   │   ├── F3_6F.asm
    │   │   ├── F3_70.asm
    │   │   ├── F3_7E.asm
    │   │   ├── F3_7F.asm
    │   │   ├── F3_B8.asm
    │   │   ├── F3_BC.asm
    │   │   ├── F3_BD.asm
    │   │   ├── F3_BD_2.asm
    │   │   ├── F3_BD_3.asm
    │   │   ├── F3_BD_4.asm
    │   │   ├── F3_C2.asm
    │   │   ├── F3_D6.asm
    │   │   ├── F3_E6.asm
    │   │   └── F3_E6_1.asm
    │   ├── REPNE/
    │   │   ├── F2_10.asm
    │   │   ├── F2_11.asm
    │   │   ├── F2_12.asm
    │   │   ├── F2_2A.asm
    │   │   ├── F2_2A_1.asm
    │   │   ├── F2_2B.asm
    │   │   ├── F2_2C.asm
    │   │   ├── F2_2D.asm
    │   │   ├── F2_2D_1.asm
    │   │   ├── F2_51.asm
    │   │   ├── F2_58.asm
    │   │   ├── F2_59.asm
    │   │   ├── F2_5A.asm
    │   │   ├── F2_5A_1.asm
    │   │   ├── F2_5C.asm
    │   │   ├── F2_5D.asm
    │   │   ├── F2_5E.asm
    │   │   ├── F2_5F.asm
    │   │   ├── F2_70.asm
    │   │   ├── F2_7C.asm
    │   │   ├── F2_7D.asm
    │   │   ├── F2_C2.asm
    │   │   ├── F2_D0.asm
    │   │   ├── F2_D6.asm
    │   │   ├── F2_E6.asm
    │   │   ├── F2_E6_1.asm
    │   │   └── F2_F0.asm
    │   ├── SSE4a/
    │   │   ├── extrq_imm.asm
    │   │   ├── extrq_variable.asm
    │   │   ├── insertq_imm.asm
    │   │   └── insertq_variable.asm
    │   ├── STOS.asm
    │   ├── STOSQ.asm
    │   ├── STOSQ2.asm
    │   ├── STOSQ2_REPNE.asm
    │   ├── STOSQ_REPNE.asm
    │   ├── STOS_REPNE.asm
    │   ├── Secondary/
    │   │   ├── 07_XX_00.asm
    │   │   ├── 07_XX_04.asm
    │   │   ├── 08_66_04.asm
    │   │   ├── 08_66_04_2.asm
    │   │   ├── 08_F2_04.asm
    │   │   ├── 08_F2_04_2.asm
    │   │   ├── 08_F2_07.asm
    │   │   ├── 08_F3_04.asm
    │   │   ├── 08_F3_04_2.asm
    │   │   ├── 08_XX_04.asm
    │   │   ├── 08_XX_04_2.asm
    │   │   ├── 08_XX_04_3.asm
    │   │   ├── 08_XX_05.asm
    │   │   ├── 08_XX_05_2.asm
    │   │   ├── 08_XX_05_3.asm
    │   │   ├── 08_XX_05_3_Atomic.asm
    │   │   ├── 08_XX_05_Atomic.asm
    │   │   ├── 08_XX_06.asm
    │   │   ├── 08_XX_06_2.asm
    │   │   ├── 08_XX_06_3.asm
    │   │   ├── 08_XX_06_3_Atomic.asm
    │   │   ├── 08_XX_06_Atomic.asm
    │   │   ├── 08_XX_07.asm
    │   │   ├── 08_XX_07_2.asm
    │   │   ├── 08_XX_07_3.asm
    │   │   ├── 08_XX_07_3_Atomic.asm
    │   │   ├── 08_XX_07_Atomic.asm
    │   │   ├── 09_F3_07.asm
    │   │   ├── 09_XX_01.asm
    │   │   ├── 09_XX_01_10.asm
    │   │   ├── 09_XX_01_11.asm
    │   │   ├── 09_XX_01_12.asm
    │   │   ├── 09_XX_01_13.asm
    │   │   ├── 09_XX_01_14.asm
    │   │   ├── 09_XX_01_15.asm
    │   │   ├── 09_XX_01_16.asm
    │   │   ├── 09_XX_01_17.asm
    │   │   ├── 09_XX_01_18.asm
    │   │   ├── 09_XX_01_19.asm
    │   │   ├── 09_XX_01_2.asm
    │   │   ├── 09_XX_01_3.asm
    │   │   ├── 09_XX_01_4.asm
    │   │   ├── 09_XX_01_5.asm
    │   │   ├── 09_XX_01_6.asm
    │   │   ├── 09_XX_01_7.asm
    │   │   ├── 09_XX_01_8.asm
    │   │   ├── 09_XX_01_9.asm
    │   │   ├── 09_XX_06.asm
    │   │   ├── 09_XX_07.asm
    │   │   ├── 12_66_02.asm
    │   │   ├── 12_66_04.asm
    │   │   ├── 12_66_06.asm
    │   │   ├── 13_66_02.asm
    │   │   ├── 13_66_04.asm
    │   │   ├── 13_66_06.asm
    │   │   ├── 14_66_02.asm
    │   │   ├── 14_66_06.asm
    │   │   ├── 14_66_07.asm
    │   │   ├── 14_XX_02.asm
    │   │   ├── 15_F3_00.asm
    │   │   ├── 15_F3_01.asm
    │   │   ├── 15_F3_02.asm
    │   │   ├── 15_F3_02_2.asm
    │   │   ├── 15_F3_03.asm
    │   │   ├── 15_F3_03_2.asm
    │   │   ├── 15_XX_0.asm
    │   │   ├── 15_XX_5.asm
    │   │   ├── 15_XX_6.asm
    │   │   ├── 15_XX_7.asm
    │   │   ├── 15_XX_7_2.asm
    │   │   ├── CLFLUSHOPT.asm
    │   │   ├── CLWB.asm
    │   │   ├── Prefetch.asm
    │   │   ├── shufps_optimization.asm
    │   │   ├── shufps_optimization_2.asm
    │   │   └── xsave/
    │   │       ├── xsave.asm
    │   │       ├── xsave_avx.asm
    │   │       ├── xsave_avx_x87.asm
    │   │       ├── xsave_sse.asm
    │   │       └── xsave_x87.asm
    │   ├── SecondaryModRM/
    │   │   ├── Reg_2_0.asm
    │   │   ├── Reg_7_1.asm
    │   │   ├── Reg_7_4.asm
    │   │   └── Reg_7_4_2.asm
    │   ├── SelfModifyingCode/
    │   │   ├── Delinking.asm
    │   │   ├── DifferentBlock.asm
    │   │   └── SameBlock.asm
    │   ├── TwoByte/
    │   │   ├── 0F_02.asm
    │   │   ├── 0F_0E.asm
    │   │   ├── 0F_10.asm
    │   │   ├── 0F_10_2.asm
    │   │   ├── 0F_11.asm
    │   │   ├── 0F_12.asm
    │   │   ├── 0F_13.asm
    │   │   ├── 0F_13_2.asm
    │   │   ├── 0F_14.asm
    │   │   ├── 0F_15.asm
    │   │   ├── 0F_16.asm
    │   │   ├── 0F_17.asm
    │   │   ├── 0F_19.asm
    │   │   ├── 0F_28.asm
    │   │   ├── 0F_29.asm
    │   │   ├── 0F_2A.asm
    │   │   ├── 0F_2B.asm
    │   │   ├── 0F_2C.asm
    │   │   ├── 0F_2D.asm
    │   │   ├── 0F_2E.asm
    │   │   ├── 0F_2F.asm
    │   │   ├── 0F_31.asm
    │   │   ├── 0F_40.asm
    │   │   ├── 0F_41.asm
    │   │   ├── 0F_42.asm
    │   │   ├── 0F_43.asm
    │   │   ├── 0F_44.asm
    │   │   ├── 0F_45.asm
    │   │   ├── 0F_46.asm
    │   │   ├── 0F_47.asm
    │   │   ├── 0F_48.asm
    │   │   ├── 0F_49.asm
    │   │   ├── 0F_4A.asm
    │   │   ├── 0F_4B.asm
    │   │   ├── 0F_4C.asm
    │   │   ├── 0F_4D.asm
    │   │   ├── 0F_4E.asm
    │   │   ├── 0F_4F.asm
    │   │   ├── 0F_50.asm
    │   │   ├── 0F_51.asm
    │   │   ├── 0F_52.asm
    │   │   ├── 0F_53.asm
    │   │   ├── 0F_54.asm
    │   │   ├── 0F_55.asm
    │   │   ├── 0F_56.asm
    │   │   ├── 0F_57.asm
    │   │   ├── 0F_58.asm
    │   │   ├── 0F_59.asm
    │   │   ├── 0F_5A.asm
    │   │   ├── 0F_5A_1.asm
    │   │   ├── 0F_5B.asm
    │   │   ├── 0F_5B_1.asm
    │   │   ├── 0F_5C.asm
    │   │   ├── 0F_5D.asm
    │   │   ├── 0F_5E.asm
    │   │   ├── 0F_5F.asm
    │   │   ├── 0F_60.asm
    │   │   ├── 0F_61.asm
    │   │   ├── 0F_62.asm
    │   │   ├── 0F_63.asm
    │   │   ├── 0F_64.asm
    │   │   ├── 0F_65.asm
    │   │   ├── 0F_66.asm
    │   │   ├── 0F_67.asm
    │   │   ├── 0F_68.asm
    │   │   ├── 0F_69.asm
    │   │   ├── 0F_6A.asm
    │   │   ├── 0F_6B.asm
    │   │   ├── 0F_6E.asm
    │   │   ├── 0F_6E_2.asm
    │   │   ├── 0F_6F.asm
    │   │   ├── 0F_70.asm
    │   │   ├── 0F_74.asm
    │   │   ├── 0F_75.asm
    │   │   ├── 0F_76.asm
    │   │   ├── 0F_77.asm
    │   │   ├── 0F_7E.asm
    │   │   ├── 0F_7F.asm
    │   │   ├── 0F_80.asm
    │   │   ├── 0F_81.asm
    │   │   ├── 0F_82.asm
    │   │   ├── 0F_83.asm
    │   │   ├── 0F_84.asm
    │   │   ├── 0F_85.asm
    │   │   ├── 0F_86.asm
    │   │   ├── 0F_87.asm
    │   │   ├── 0F_88.asm
    │   │   ├── 0F_89.asm
    │   │   ├── 0F_8A.asm
    │   │   ├── 0F_8B.asm
    │   │   ├── 0F_8B_16.asm
    │   │   ├── 0F_8B_32.asm
    │   │   ├── 0F_8B_64.asm
    │   │   ├── 0F_8C.asm
    │   │   ├── 0F_8D.asm
    │   │   ├── 0F_8E.asm
    │   │   ├── 0F_8F.asm
    │   │   ├── 0F_90.asm
    │   │   ├── 0F_91.asm
    │   │   ├── 0F_92.asm
    │   │   ├── 0F_93.asm
    │   │   ├── 0F_94.asm
    │   │   ├── 0F_95.asm
    │   │   ├── 0F_96.asm
    │   │   ├── 0F_97.asm
    │   │   ├── 0F_98.asm
    │   │   ├── 0F_99.asm
    │   │   ├── 0F_9A.asm
    │   │   ├── 0F_9B.asm
    │   │   ├── 0F_9C.asm
    │   │   ├── 0F_9D.asm
    │   │   ├── 0F_9E.asm
    │   │   ├── 0F_9F.asm
    │   │   ├── 0F_A2.asm
    │   │   ├── 0F_A3.asm
    │   │   ├── 0F_A3_2.asm
    │   │   ├── 0F_A4.asm
    │   │   ├── 0F_A4_2.asm
    │   │   ├── 0F_A5.asm
    │   │   ├── 0F_A5_2.asm
    │   │   ├── 0F_A5_3.asm
    │   │   ├── 0F_A5_4.asm
    │   │   ├── 0F_A5_5.asm
    │   │   ├── 0F_A5_6.asm
    │   │   ├── 0F_A5_7.asm
    │   │   ├── 0F_AB.asm
    │   │   ├── 0F_AB_2.asm
    │   │   ├── 0F_AB_2_Atomic.asm
    │   │   ├── 0F_AB_Atomic.asm
    │   │   ├── 0F_AC.asm
    │   │   ├── 0F_AC_2.asm
    │   │   ├── 0F_AD.asm
    │   │   ├── 0F_AD_2.asm
    │   │   ├── 0F_AD_3.asm
    │   │   ├── 0F_AD_4.asm
    │   │   ├── 0F_AD_5.asm
    │   │   ├── 0F_AD_6.asm
    │   │   ├── 0F_AD_7.asm
    │   │   ├── 0F_AF.asm
    │   │   ├── 0F_AF_2.asm
    │   │   ├── 0F_B0.asm
    │   │   ├── 0F_B0_10.asm
    │   │   ├── 0F_B0_11.asm
    │   │   ├── 0F_B0_2.asm
    │   │   ├── 0F_B0_3.asm
    │   │   ├── 0F_B0_4.asm
    │   │   ├── 0F_B0_5.asm
    │   │   ├── 0F_B0_6.asm
    │   │   ├── 0F_B0_7.asm
    │   │   ├── 0F_B0_8.asm
    │   │   ├── 0F_B0_9.asm
    │   │   ├── 0F_B3.asm
    │   │   ├── 0F_B3_2.asm
    │   │   ├── 0F_B3_2_Atomic.asm
    │   │   ├── 0F_B3_Atomic.asm
    │   │   ├── 0F_B6.asm
    │   │   ├── 0F_B7.asm
    │   │   ├── 0F_BB.asm
    │   │   ├── 0F_BB_2.asm
    │   │   ├── 0F_BB_2_Atomic.asm
    │   │   ├── 0F_BB_Atomic.asm
    │   │   ├── 0F_BC.asm
    │   │   ├── 0F_BD.asm
    │   │   ├── 0F_BE.asm
    │   │   ├── 0F_BF.asm
    │   │   ├── 0F_C0.asm
    │   │   ├── 0F_C0_2.asm
    │   │   ├── 0F_C0_Atomic16.asm
    │   │   ├── 0F_C0_Atomic32.asm
    │   │   ├── 0F_C0_Atomic64.asm
    │   │   ├── 0F_C2.asm
    │   │   ├── 0F_C3.asm
    │   │   ├── 0F_C4.asm
    │   │   ├── 0F_C4_2.asm
    │   │   ├── 0F_C5.asm
    │   │   ├── 0F_C5_2.asm
    │   │   ├── 0F_C6.asm
    │   │   ├── 0F_D1.asm
    │   │   ├── 0F_D2.asm
    │   │   ├── 0F_D3.asm
    │   │   ├── 0F_D4.asm
    │   │   ├── 0F_D5.asm
    │   │   ├── 0F_D7.asm
    │   │   ├── 0F_D8.asm
    │   │   ├── 0F_D9.asm
    │   │   ├── 0F_DA.asm
    │   │   ├── 0F_DB.asm
    │   │   ├── 0F_DC.asm
    │   │   ├── 0F_DD.asm
    │   │   ├── 0F_DE.asm
    │   │   ├── 0F_DF.asm
    │   │   ├── 0F_E0.asm
    │   │   ├── 0F_E1.asm
    │   │   ├── 0F_E2.asm
    │   │   ├── 0F_E3.asm
    │   │   ├── 0F_E4.asm
    │   │   ├── 0F_E5.asm
    │   │   ├── 0F_E7.asm
    │   │   ├── 0F_E8.asm
    │   │   ├── 0F_E9.asm
    │   │   ├── 0F_EA.asm
    │   │   ├── 0F_EB.asm
    │   │   ├── 0F_EC.asm
    │   │   ├── 0F_ED.asm
    │   │   ├── 0F_EE.asm
    │   │   ├── 0F_EF.asm
    │   │   ├── 0F_F1.asm
    │   │   ├── 0F_F2.asm
    │   │   ├── 0F_F3.asm
    │   │   ├── 0F_F4.asm
    │   │   ├── 0F_F5.asm
    │   │   ├── 0F_F6.asm
    │   │   ├── 0F_F6_2.asm
    │   │   ├── 0F_F7.asm
    │   │   ├── 0F_F8.asm
    │   │   ├── 0F_F9.asm
    │   │   ├── 0F_FA.asm
    │   │   ├── 0F_FB.asm
    │   │   ├── 0F_FC.asm
    │   │   ├── 0F_FD.asm
    │   │   └── 0F_FE.asm
    │   ├── VEX/
    │   │   ├── andn.asm
    │   │   ├── bextr.asm
    │   │   ├── blsi.asm
    │   │   ├── blsmsk.asm
    │   │   ├── blsr.asm
    │   │   ├── bzhi.asm
    │   │   ├── fma_fmadd_pd.asm
    │   │   ├── fma_fmadd_ps.asm
    │   │   ├── fma_fmadd_sd.asm
    │   │   ├── fma_fmadd_ss.asm
    │   │   ├── fma_fmaddsub_pd.asm
    │   │   ├── fma_fmaddsub_ps.asm
    │   │   ├── fma_fmsub_pd.asm
    │   │   ├── fma_fmsub_ps.asm
    │   │   ├── fma_fmsub_sd.asm
    │   │   ├── fma_fmsub_ss.asm
    │   │   ├── fma_fmsubadd_pd.asm
    │   │   ├── fma_fmsubadd_ps.asm
    │   │   ├── fma_fnmadd_pd.asm
    │   │   ├── fma_fnmadd_ps.asm
    │   │   ├── fma_fnmadd_sd.asm
    │   │   ├── fma_fnmadd_ss.asm
    │   │   ├── fma_fnmsub_pd.asm
    │   │   ├── fma_fnmsub_ps.asm
    │   │   ├── fma_fnmsub_sd.asm
    │   │   ├── fma_fnmsub_ss.asm
    │   │   ├── full_vpermq_imm.asm
    │   │   ├── mulx.asm
    │   │   ├── pdep.asm
    │   │   ├── pext.asm
    │   │   ├── rorx.asm
    │   │   ├── sarx.asm
    │   │   ├── shlx.asm
    │   │   ├── shrx.asm
    │   │   ├── vaddpd.asm
    │   │   ├── vaddps.asm
    │   │   ├── vaddsd.asm
    │   │   ├── vaddss.asm
    │   │   ├── vaddsubpd.asm
    │   │   ├── vaddsubps.asm
    │   │   ├── vaesdec.asm
    │   │   ├── vaesdec256.asm
    │   │   ├── vaesdeclast.asm
    │   │   ├── vaesdeclast256.asm
    │   │   ├── vaesenc.asm
    │   │   ├── vaesenc256.asm
    │   │   ├── vaesenclast.asm
    │   │   ├── vaesenclast256.asm
    │   │   ├── vaesimc.asm
    │   │   ├── vaeskeygenassist.asm
    │   │   ├── vandnpd.asm
    │   │   ├── vandnps.asm
    │   │   ├── vandpd.asm
    │   │   ├── vandps.asm
    │   │   ├── vblendpd.asm
    │   │   ├── vblendps.asm
    │   │   ├── vblendvpd.asm
    │   │   ├── vblendvps.asm
    │   │   ├── vbroadcastf128.asm
    │   │   ├── vbroadcasti128.asm
    │   │   ├── vbroadcastsd.asm
    │   │   ├── vbroadcastss.asm
    │   │   ├── vcmppd.asm
    │   │   ├── vcmppd_256.asm
    │   │   ├── vcmppd_full.asm
    │   │   ├── vcmpps.asm
    │   │   ├── vcmpps_256.asm
    │   │   ├── vcmpps_full.asm
    │   │   ├── vcmpsd.asm
    │   │   ├── vcmpsd_full.asm
    │   │   ├── vcmpss.asm
    │   │   ├── vcmpss_full.asm
    │   │   ├── vcomisd.asm
    │   │   ├── vcomiss.asm
    │   │   ├── vcvtdq2pd.asm
    │   │   ├── vcvtdq2ps.asm
    │   │   ├── vcvtpd2dq.asm
    │   │   ├── vcvtpd2dq_inexact.asm
    │   │   ├── vcvtpd2ps.asm
    │   │   ├── vcvtph2ps.asm
    │   │   ├── vcvtps2dq.asm
    │   │   ├── vcvtps2dq_inexact.asm
    │   │   ├── vcvtps2pd.asm
    │   │   ├── vcvtps2ph_rd.asm
    │   │   ├── vcvtps2ph_rd_mxcsr.asm
    │   │   ├── vcvtps2ph_rtne.asm
    │   │   ├── vcvtps2ph_rtne_mxcsr.asm
    │   │   ├── vcvtps2ph_ru.asm
    │   │   ├── vcvtps2ph_ru_mxcsr.asm
    │   │   ├── vcvtps2ph_trunc.asm
    │   │   ├── vcvtps2ph_trunc_mxcsr.asm
    │   │   ├── vcvtsd2si.asm
    │   │   ├── vcvtsd2ss.asm
    │   │   ├── vcvtsi2sd.asm
    │   │   ├── vcvtsi2ss.asm
    │   │   ├── vcvtss2sd.asm
    │   │   ├── vcvtss2si.asm
    │   │   ├── vcvttpd2dq.asm
    │   │   ├── vcvttps2dq.asm
    │   │   ├── vcvttsd2si.asm
    │   │   ├── vcvttss2si.asm
    │   │   ├── vdivpd.asm
    │   │   ├── vdivps.asm
    │   │   ├── vdivsd.asm
    │   │   ├── vdivss.asm
    │   │   ├── vdppd.asm
    │   │   ├── vdpps_128.asm
    │   │   ├── vdpps_256.asm
    │   │   ├── vextractf128.asm
    │   │   ├── vextracti128.asm
    │   │   ├── vextractps.asm
    │   │   ├── vgather_dpd_128bit_1xdisp.asm
    │   │   ├── vgather_dpd_128bit_2xdisp.asm
    │   │   ├── vgather_dpd_128bit_4xdisp.asm
    │   │   ├── vgather_dpd_128bit_8xdisp.asm
    │   │   ├── vgather_dpd_256bit_1xdisp.asm
    │   │   ├── vgather_dpd_256bit_2xdisp.asm
    │   │   ├── vgather_dpd_256bit_4xdisp.asm
    │   │   ├── vgather_dpd_256bit_8xdisp.asm
    │   │   ├── vgather_dps_128bit_1xdisp.asm
    │   │   ├── vgather_dps_128bit_2xdisp.asm
    │   │   ├── vgather_dps_128bit_4xdisp.asm
    │   │   ├── vgather_dps_128bit_8xdisp.asm
    │   │   ├── vgather_dps_256bit_1xdisp.asm
    │   │   ├── vgather_dps_256bit_2xdisp.asm
    │   │   ├── vgather_dps_256bit_4xdisp.asm
    │   │   ├── vgather_dps_256bit_8xdisp.asm
    │   │   ├── vgather_qpd_128bit_1xdisp.asm
    │   │   ├── vgather_qpd_128bit_1xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_2xdisp.asm
    │   │   ├── vgather_qpd_128bit_2xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_4xdisp.asm
    │   │   ├── vgather_qpd_128bit_4xdisp_overflow.asm
    │   │   ├── vgather_qpd_128bit_8xdisp.asm
    │   │   ├── vgather_qpd_128bit_8xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_1xdisp.asm
    │   │   ├── vgather_qpd_256bit_1xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_2xdisp.asm
    │   │   ├── vgather_qpd_256bit_2xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_4xdisp.asm
    │   │   ├── vgather_qpd_256bit_4xdisp_overflow.asm
    │   │   ├── vgather_qpd_256bit_8xdisp.asm
    │   │   ├── vgather_qpd_256bit_8xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_1xdisp.asm
    │   │   ├── vgather_qps_128bit_1xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_2xdisp.asm
    │   │   ├── vgather_qps_128bit_2xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_4xdisp.asm
    │   │   ├── vgather_qps_128bit_4xdisp_overflow.asm
    │   │   ├── vgather_qps_128bit_8xdisp.asm
    │   │   ├── vgather_qps_128bit_8xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_1xdisp.asm
    │   │   ├── vgather_qps_256bit_1xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_2xdisp.asm
    │   │   ├── vgather_qps_256bit_2xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_4xdisp.asm
    │   │   ├── vgather_qps_256bit_4xdisp_overflow.asm
    │   │   ├── vgather_qps_256bit_8xdisp.asm
    │   │   ├── vgather_qps_256bit_8xdisp_overflow.asm
    │   │   ├── vhaddpd.asm
    │   │   ├── vhaddps.asm
    │   │   ├── vhsubpd.asm
    │   │   ├── vhsubps.asm
    │   │   ├── vinsertf128.asm
    │   │   ├── vinserti128.asm
    │   │   ├── vinsertps.asm
    │   │   ├── vlddqu.asm
    │   │   ├── vldmxcsr.asm
    │   │   ├── vmaskmovdqu.asm
    │   │   ├── vmaskmovpd_load.asm
    │   │   ├── vmaskmovpd_store.asm
    │   │   ├── vmaskmovps_load.asm
    │   │   ├── vmaskmovps_store.asm
    │   │   ├── vmaxpd.asm
    │   │   ├── vmaxps.asm
    │   │   ├── vmaxsd.asm
    │   │   ├── vmaxss.asm
    │   │   ├── vminpd.asm
    │   │   ├── vminps.asm
    │   │   ├── vminsd.asm
    │   │   ├── vminss.asm
    │   │   ├── vmovapd.asm
    │   │   ├── vmovapd_mem.asm
    │   │   ├── vmovaps.asm
    │   │   ├── vmovaps_mem.asm
    │   │   ├── vmovddup.asm
    │   │   ├── vmovdqa.asm
    │   │   ├── vmovdqu.asm
    │   │   ├── vmovhlps.asm
    │   │   ├── vmovhpd.asm
    │   │   ├── vmovhps.asm
    │   │   ├── vmovlhps.asm
    │   │   ├── vmovlpd.asm
    │   │   ├── vmovlps.asm
    │   │   ├── vmovmskpd.asm
    │   │   ├── vmovmskps.asm
    │   │   ├── vmovntdq.asm
    │   │   ├── vmovntdqa.asm
    │   │   ├── vmovntpd.asm
    │   │   ├── vmovntps.asm
    │   │   ├── vmovq.asm
    │   │   ├── vmovq_vmovd_reg.asm
    │   │   ├── vmovsd_from_mem.asm
    │   │   ├── vmovsd_to_mem.asm
    │   │   ├── vmovsd_vectors.asm
    │   │   ├── vmovshdup.asm
    │   │   ├── vmovsldup.asm
    │   │   ├── vmovss_from_mem.asm
    │   │   ├── vmovss_to_mem.asm
    │   │   ├── vmovss_vectors.asm
    │   │   ├── vmovupd.asm
    │   │   ├── vmovupd_mem.asm
    │   │   ├── vmovups.asm
    │   │   ├── vmovups_mem.asm
    │   │   ├── vmpsadbw_128.asm
    │   │   ├── vmpsadbw_256.asm
    │   │   ├── vmulpd.asm
    │   │   ├── vmulps.asm
    │   │   ├── vmulsd.asm
    │   │   ├── vmulss.asm
    │   │   ├── vorpd.asm
    │   │   ├── vorps.asm
    │   │   ├── vpabsb.asm
    │   │   ├── vpabsd.asm
    │   │   ├── vpabsw.asm
    │   │   ├── vpackssdw.asm
    │   │   ├── vpacksswb.asm
    │   │   ├── vpackusdw.asm
    │   │   ├── vpackuswb.asm
    │   │   ├── vpaddb.asm
    │   │   ├── vpaddd.asm
    │   │   ├── vpaddq.asm
    │   │   ├── vpaddsb.asm
    │   │   ├── vpaddsw.asm
    │   │   ├── vpaddusb.asm
    │   │   ├── vpaddusw.asm
    │   │   ├── vpaddw.asm
    │   │   ├── vpalignr.asm
    │   │   ├── vpand.asm
    │   │   ├── vpandn.asm
    │   │   ├── vpavgb.asm
    │   │   ├── vpavgb_aliasing.asm
    │   │   ├── vpavgw.asm
    │   │   ├── vpavgw_aliasing.asm
    │   │   ├── vpblendd.asm
    │   │   ├── vpblendvb.asm
    │   │   ├── vpblendw.asm
    │   │   ├── vpbroadcastb.asm
    │   │   ├── vpbroadcastd.asm
    │   │   ├── vpbroadcastq.asm
    │   │   ├── vpbroadcastw.asm
    │   │   ├── vpclmulqdq.asm
    │   │   ├── vpclmulqdq_256.asm
    │   │   ├── vpcmpeqb.asm
    │   │   ├── vpcmpeqd.asm
    │   │   ├── vpcmpeqq.asm
    │   │   ├── vpcmpeqw.asm
    │   │   ├── vpcmpestri_equal_any.asm
    │   │   ├── vpcmpestri_equal_each.asm
    │   │   ├── vpcmpestri_equal_ordered.asm
    │   │   ├── vpcmpestri_ranges.asm
    │   │   ├── vpcmpestrm_equal_any.asm
    │   │   ├── vpcmpestrm_equal_each.asm
    │   │   ├── vpcmpestrm_equal_ordered.asm
    │   │   ├── vpcmpestrm_ranges.asm
    │   │   ├── vpcmpgtb.asm
    │   │   ├── vpcmpgtd.asm
    │   │   ├── vpcmpgtq.asm
    │   │   ├── vpcmpgtw.asm
    │   │   ├── vpcmpistri_equal_any.asm
    │   │   ├── vpcmpistri_equal_each.asm
    │   │   ├── vpcmpistri_equal_ordered.asm
    │   │   ├── vpcmpistri_ranges.asm
    │   │   ├── vpcmpistrm_equal_any.asm
    │   │   ├── vpcmpistrm_equal_each.asm
    │   │   ├── vpcmpistrm_equal_ordered.asm
    │   │   ├── vpcmpistrm_ranges.asm
    │   │   ├── vperm2f128.asm
    │   │   ├── vperm2i128.asm
    │   │   ├── vpermd.asm
    │   │   ├── vpermilpd_imm.asm
    │   │   ├── vpermilpd_reg.asm
    │   │   ├── vpermilps_imm.asm
    │   │   ├── vpermilps_reg.asm
    │   │   ├── vpermpd.asm
    │   │   ├── vpermps.asm
    │   │   ├── vpermq.asm
    │   │   ├── vpextrb.asm
    │   │   ├── vpextrd.asm
    │   │   ├── vpextrq.asm
    │   │   ├── vpextrw.asm
    │   │   ├── vpgather_dd_128bit_1xdisp.asm
    │   │   ├── vpgather_dd_128bit_2xdisp.asm
    │   │   ├── vpgather_dd_128bit_4xdisp.asm
    │   │   ├── vpgather_dd_128bit_8xdisp.asm
    │   │   ├── vpgather_dd_256bit_1xdisp.asm
    │   │   ├── vpgather_dd_256bit_2xdisp.asm
    │   │   ├── vpgather_dd_256bit_4xdisp.asm
    │   │   ├── vpgather_dd_256bit_8xdisp.asm
    │   │   ├── vpgather_dq_128bit_1xdisp.asm
    │   │   ├── vpgather_dq_128bit_2xdisp.asm
    │   │   ├── vpgather_dq_128bit_4xdisp.asm
    │   │   ├── vpgather_dq_128bit_8xdisp.asm
    │   │   ├── vpgather_dq_256bit_1xdisp.asm
    │   │   ├── vpgather_dq_256bit_2xdisp.asm
    │   │   ├── vpgather_dq_256bit_4xdisp.asm
    │   │   ├── vpgather_dq_256bit_8xdisp.asm
    │   │   ├── vpgather_qd_128bit_1xdisp.asm
    │   │   ├── vpgather_qd_128bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_2xdisp.asm
    │   │   ├── vpgather_qd_128bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_4xdisp.asm
    │   │   ├── vpgather_qd_128bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qd_128bit_8xdisp.asm
    │   │   ├── vpgather_qd_128bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_1xdisp.asm
    │   │   ├── vpgather_qd_256bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_2xdisp.asm
    │   │   ├── vpgather_qd_256bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_4xdisp.asm
    │   │   ├── vpgather_qd_256bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qd_256bit_8xdisp.asm
    │   │   ├── vpgather_qd_256bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_1xdisp.asm
    │   │   ├── vpgather_qq_128bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_2xdisp.asm
    │   │   ├── vpgather_qq_128bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_4xdisp.asm
    │   │   ├── vpgather_qq_128bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qq_128bit_8xdisp.asm
    │   │   ├── vpgather_qq_128bit_8xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_1xdisp.asm
    │   │   ├── vpgather_qq_256bit_1xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_2xdisp.asm
    │   │   ├── vpgather_qq_256bit_2xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_4xdisp.asm
    │   │   ├── vpgather_qq_256bit_4xdisp_overflow.asm
    │   │   ├── vpgather_qq_256bit_8xdisp.asm
    │   │   ├── vpgather_qq_256bit_8xdisp_overflow.asm
    │   │   ├── vphaddd.asm
    │   │   ├── vphaddsw.asm
    │   │   ├── vphaddsw_256.asm
    │   │   ├── vphaddw.asm
    │   │   ├── vphminposuw.asm
    │   │   ├── vphsubd.asm
    │   │   ├── vphsubsw.asm
    │   │   ├── vphsubsw_256.asm
    │   │   ├── vphsubw.asm
    │   │   ├── vpinsrb.asm
    │   │   ├── vpinsrd.asm
    │   │   ├── vpinsrq.asm
    │   │   ├── vpinsrw.asm
    │   │   ├── vpmaddubsw.asm
    │   │   ├── vpmaddubsw_256.asm
    │   │   ├── vpmaddwd.asm
    │   │   ├── vpmaskmovd_load.asm
    │   │   ├── vpmaskmovd_store.asm
    │   │   ├── vpmaskmovq_load.asm
    │   │   ├── vpmaskmovq_store.asm
    │   │   ├── vpmaxsb.asm
    │   │   ├── vpmaxsd.asm
    │   │   ├── vpmaxsw.asm
    │   │   ├── vpmaxub.asm
    │   │   ├── vpmaxud.asm
    │   │   ├── vpmaxuw.asm
    │   │   ├── vpminsb.asm
    │   │   ├── vpminsd.asm
    │   │   ├── vpminsw.asm
    │   │   ├── vpminub.asm
    │   │   ├── vpminud.asm
    │   │   ├── vpminuw.asm
    │   │   ├── vpmovmskb.asm
    │   │   ├── vpmovsxbd.asm
    │   │   ├── vpmovsxbq.asm
    │   │   ├── vpmovsxbw.asm
    │   │   ├── vpmovsxdq.asm
    │   │   ├── vpmovsxwd.asm
    │   │   ├── vpmovsxwq.asm
    │   │   ├── vpmovzxbd.asm
    │   │   ├── vpmovzxbq.asm
    │   │   ├── vpmovzxbw.asm
    │   │   ├── vpmovzxdq.asm
    │   │   ├── vpmovzxwd.asm
    │   │   ├── vpmovzxwq.asm
    │   │   ├── vpmuldq.asm
    │   │   ├── vpmuldq_256.asm
    │   │   ├── vpmulhrsw.asm
    │   │   ├── vpmulhuw.asm
    │   │   ├── vpmulhw.asm
    │   │   ├── vpmulld.asm
    │   │   ├── vpmullw.asm
    │   │   ├── vpmuludq.asm
    │   │   ├── vpor.asm
    │   │   ├── vpsadbw.asm
    │   │   ├── vpsadbw_256.asm
    │   │   ├── vpshufb.asm
    │   │   ├── vpshufd.asm
    │   │   ├── vpshufhw.asm
    │   │   ├── vpshuflw.asm
    │   │   ├── vpsignb.asm
    │   │   ├── vpsignd.asm
    │   │   ├── vpsignw.asm
    │   │   ├── vpslld.asm
    │   │   ├── vpslld_imm.asm
    │   │   ├── vpslldq.asm
    │   │   ├── vpsllq.asm
    │   │   ├── vpsllq_imm.asm
    │   │   ├── vpsllvd.asm
    │   │   ├── vpsllvq.asm
    │   │   ├── vpsllw.asm
    │   │   ├── vpsllw_imm.asm
    │   │   ├── vpsrad.asm
    │   │   ├── vpsrad_imm.asm
    │   │   ├── vpsravd.asm
    │   │   ├── vpsraw.asm
    │   │   ├── vpsraw_imm.asm
    │   │   ├── vpsrld.asm
    │   │   ├── vpsrld_imm.asm
    │   │   ├── vpsrldq.asm
    │   │   ├── vpsrlq.asm
    │   │   ├── vpsrlq_imm.asm
    │   │   ├── vpsrlvd.asm
    │   │   ├── vpsrlvq.asm
    │   │   ├── vpsrlw.asm
    │   │   ├── vpsrlw_imm.asm
    │   │   ├── vpsubb.asm
    │   │   ├── vpsubd.asm
    │   │   ├── vpsubq.asm
    │   │   ├── vpsubsb.asm
    │   │   ├── vpsubsw.asm
    │   │   ├── vpsubusb.asm
    │   │   ├── vpsubusw.asm
    │   │   ├── vpsubw.asm
    │   │   ├── vptest.asm
    │   │   ├── vpunpckhbw.asm
    │   │   ├── vpunpckhdq.asm
    │   │   ├── vpunpckhqdq.asm
    │   │   ├── vpunpckhwd.asm
    │   │   ├── vpunpcklbw.asm
    │   │   ├── vpunpckldq.asm
    │   │   ├── vpunpcklqdq.asm
    │   │   ├── vpunpcklwd.asm
    │   │   ├── vpxor.asm
    │   │   ├── vrcpps.asm
    │   │   ├── vrcpss.asm
    │   │   ├── vroundpd.asm
    │   │   ├── vroundps.asm
    │   │   ├── vroundsd.asm
    │   │   ├── vroundss.asm
    │   │   ├── vrsqrtps.asm
    │   │   ├── vrsqrtss.asm
    │   │   ├── vshufpd.asm
    │   │   ├── vshufps.asm
    │   │   ├── vsqrtpd.asm
    │   │   ├── vsqrtps.asm
    │   │   ├── vsqrtsd.asm
    │   │   ├── vsqrtss.asm
    │   │   ├── vsubpd.asm
    │   │   ├── vsubps.asm
    │   │   ├── vsubsd.asm
    │   │   ├── vsubss.asm
    │   │   ├── vtestpd.asm
    │   │   ├── vtestps.asm
    │   │   ├── vucomisd.asm
    │   │   ├── vucomiss.asm
    │   │   ├── vunpckhpd.asm
    │   │   ├── vunpckhps.asm
    │   │   ├── vunpcklpd.asm
    │   │   ├── vunpcklps.asm
    │   │   ├── vxorpd.asm
    │   │   ├── vxorps.asm
    │   │   ├── vzeroall.asm
    │   │   └── vzeroupper.asm
    │   ├── X87/
    │   │   ├── D8_00.asm
    │   │   ├── D8_01.asm
    │   │   ├── D8_04.asm
    │   │   ├── D8_05.asm
    │   │   ├── D8_06.asm
    │   │   ├── D8_07.asm
    │   │   ├── D8_C0.asm
    │   │   ├── D8_C8.asm
    │   │   ├── D8_D0.asm
    │   │   ├── D8_D9.asm
    │   │   ├── D8_E0.asm
    │   │   ├── D8_E8.asm
    │   │   ├── D8_F0.asm
    │   │   ├── D8_F0_2.asm
    │   │   ├── D8_F8.asm
    │   │   ├── D9_00.asm
    │   │   ├── D9_02.asm
    │   │   ├── D9_03.asm
    │   │   ├── D9_05.asm
    │   │   ├── D9_06.asm
    │   │   ├── D9_06_2.asm
    │   │   ├── D9_07.asm
    │   │   ├── D9_C0.asm
    │   │   ├── D9_C8.asm
    │   │   ├── D9_D0.asm
    │   │   ├── D9_E0.asm
    │   │   ├── D9_E1.asm
    │   │   ├── D9_E4.asm
    │   │   ├── D9_E8.asm
    │   │   ├── D9_E9.asm
    │   │   ├── D9_EA.asm
    │   │   ├── D9_EB.asm
    │   │   ├── D9_EC.asm
    │   │   ├── D9_ED.asm
    │   │   ├── D9_EE.asm
    │   │   ├── D9_F0.asm
    │   │   ├── D9_F1.asm
    │   │   ├── D9_F2.asm
    │   │   ├── D9_F3.asm
    │   │   ├── D9_F4.asm
    │   │   ├── D9_F4_02.asm
    │   │   ├── D9_F5.asm
    │   │   ├── D9_F5_2.asm
    │   │   ├── D9_F5_3.asm
    │   │   ├── D9_F6.asm
    │   │   ├── D9_F7.asm
    │   │   ├── D9_F8.asm
    │   │   ├── D9_F9.asm
    │   │   ├── D9_FA.asm
    │   │   ├── D9_FB.asm
    │   │   ├── D9_FC.asm
    │   │   ├── D9_FD.asm
    │   │   ├── D9_FD_2.asm
    │   │   ├── D9_FE.asm
    │   │   ├── D9_FF.asm
    │   │   ├── DA_00.asm
    │   │   ├── DA_01.asm
    │   │   ├── DA_02.asm
    │   │   ├── DA_04.asm
    │   │   ├── DA_05.asm
    │   │   ├── DA_06.asm
    │   │   ├── DA_07.asm
    │   │   ├── DA_C0.asm
    │   │   ├── DA_C8.asm
    │   │   ├── DA_D0.asm
    │   │   ├── DA_D8.asm
    │   │   ├── DA_D9.asm
    │   │   ├── DA_E9.asm
    │   │   ├── DB_00.asm
    │   │   ├── DB_01.asm
    │   │   ├── DB_02.asm
    │   │   ├── DB_03.asm
    │   │   ├── DB_05.asm
    │   │   ├── DB_07.asm
    │   │   ├── DB_07_2.asm
    │   │   ├── DB_C0.asm
    │   │   ├── DB_C8.asm
    │   │   ├── DB_D0.asm
    │   │   ├── DB_D8.asm
    │   │   ├── DB_E2.asm
    │   │   ├── DB_E3.asm
    │   │   ├── DB_E3_2.asm
    │   │   ├── DB_E8.asm
    │   │   ├── DB_F0.asm
    │   │   ├── DC_00.asm
    │   │   ├── DC_01.asm
    │   │   ├── DC_04.asm
    │   │   ├── DC_05.asm
    │   │   ├── DC_06.asm
    │   │   ├── DC_07.asm
    │   │   ├── DC_C0.asm
    │   │   ├── DC_C8.asm
    │   │   ├── DC_D0.asm
    │   │   ├── DC_D9.asm
    │   │   ├── DC_E0.asm
    │   │   ├── DC_E8.asm
    │   │   ├── DC_F0.asm
    │   │   ├── DC_F8.asm
    │   │   ├── DD_00.asm
    │   │   ├── DD_01.asm
    │   │   ├── DD_02.asm
    │   │   ├── DD_03.asm
    │   │   ├── DD_04.asm
    │   │   ├── DD_04_2.asm
    │   │   ├── DD_07.asm
    │   │   ├── DD_C0.asm
    │   │   ├── DD_C8.asm
    │   │   ├── DD_D0.asm
    │   │   ├── DD_D0_2.asm
    │   │   ├── DD_D8.asm
    │   │   ├── DD_E9.asm
    │   │   ├── DE_00.asm
    │   │   ├── DE_01.asm
    │   │   ├── DE_02.asm
    │   │   ├── DE_04.asm
    │   │   ├── DE_05.asm
    │   │   ├── DE_06.asm
    │   │   ├── DE_07.asm
    │   │   ├── DE_C0.asm
    │   │   ├── DE_C8.asm
    │   │   ├── DE_D0.asm
    │   │   ├── DE_E0.asm
    │   │   ├── DE_E8.asm
    │   │   ├── DE_F0.asm
    │   │   ├── DE_F8.asm
    │   │   ├── DF_00.asm
    │   │   ├── DF_01.asm
    │   │   ├── DF_02.asm
    │   │   ├── DF_03.asm
    │   │   ├── DF_04.asm
    │   │   ├── DF_05.asm
    │   │   ├── DF_07.asm
    │   │   ├── DF_C0.asm
    │   │   ├── DF_C8.asm
    │   │   ├── DF_D0.asm
    │   │   ├── DF_D8.asm
    │   │   ├── DF_E0.asm
    │   │   ├── DF_E8.asm
    │   │   ├── DF_F0.asm
    │   │   ├── FISTTP_16bit.asm
    │   │   ├── FISTTP_16bit_neg.asm
    │   │   ├── FISTTP_32bit.asm
    │   │   ├── FISTTP_32bit_neg.asm
    │   │   ├── FISTTP_64bit.asm
    │   │   ├── FISTTP_64bit_neg.asm
    │   │   ├── FPREM1_Flags.asm
    │   │   ├── FPREM_Flags.asm
    │   │   ├── FST_AddrModes.asm
    │   │   ├── FScale-Zero.asm
    │   │   ├── FScaleFXtract.asm
    │   │   ├── FXAM_Push.asm
    │   │   ├── FXAM_Push_2.asm
    │   │   ├── FXAM_Push_Simple.asm
    │   │   ├── FXAM_Push_Simple_2.asm
    │   │   ├── FXAM_Simple.asm
    │   │   ├── LoadAtBoundary.asm
    │   │   ├── Memcopy.asm
    │   │   ├── MemcopyWithCPUID.asm
    │   │   ├── Rounding.asm
    │   │   ├── StoreAtBoundary.asm
    │   │   ├── X87MMXInteraction.asm
    │   │   ├── invalid_div_zero.asm
    │   │   ├── invalid_fcos_infinity.asm
    │   │   ├── invalid_fist_nan.asm
    │   │   ├── invalid_fist_overflow.asm
    │   │   ├── invalid_fist_overflow_16bit.asm
    │   │   ├── invalid_fist_overflow_32bit.asm
    │   │   ├── invalid_fist_overflow_64bit.asm
    │   │   ├── invalid_fprem_infinity.asm
    │   │   ├── invalid_fptan_infinity.asm
    │   │   ├── invalid_fsin_infinity.asm
    │   │   ├── invalid_fsin_neg_infinity.asm
    │   │   ├── invalid_fsincos_infinity.asm
    │   │   ├── invalid_infinity_fsubr_infinity.asm
    │   │   ├── invalid_infinity_mul_zero.asm
    │   │   ├── invalid_infinity_ops.asm
    │   │   ├── invalid_infinity_sub_infinity.asm
    │   │   ├── invalid_neg_infinity_sub_neg_infinity.asm
    │   │   ├── invalid_reduced_precision.asm
    │   │   ├── invalid_simple_test.asm
    │   │   ├── invalid_sqrt_negative.asm
    │   │   ├── precision_test_fabs.asm
    │   │   ├── precision_test_fadd.asm
    │   │   ├── precision_test_fcos.asm
    │   │   ├── precision_test_fdiv.asm
    │   │   ├── precision_test_fdivr.asm
    │   │   ├── precision_test_fmul.asm
    │   │   ├── precision_test_fprem.asm
    │   │   ├── precision_test_fprem1.asm
    │   │   ├── precision_test_fscale.asm
    │   │   ├── precision_test_fsin.asm
    │   │   ├── precision_test_fsqrt.asm
    │   │   ├── precision_test_fsub.asm
    │   │   ├── precision_test_fsubr.asm
    │   │   ├── precision_test_ftan.asm
    │   │   ├── precision_test_fyl2x.asm
    │   │   ├── precision_test_fyl2xp1.asm
    │   │   ├── precision_test_neg_fabs.asm
    │   │   ├── precision_test_neg_fadd.asm
    │   │   ├── precision_test_neg_fcos.asm
    │   │   ├── precision_test_neg_fdiv.asm
    │   │   ├── precision_test_neg_fdivr.asm
    │   │   ├── precision_test_neg_fmul.asm
    │   │   ├── precision_test_neg_fprem.asm
    │   │   ├── precision_test_neg_fprem1.asm
    │   │   ├── precision_test_neg_fscale.asm
    │   │   ├── precision_test_neg_fsin.asm
    │   │   ├── precision_test_neg_fsub.asm
    │   │   ├── precision_test_neg_fsubr.asm
    │   │   ├── precision_test_neg_ftan.asm
    │   │   ├── precision_test_neg_fyl2x.asm
    │   │   ├── precision_test_neg_fyl2xp1.asm
    │   │   ├── valid_fist_16bit.asm
    │   │   └── valid_operation.asm
    │   ├── X87_F64/
    │   │   ├── D8_00_F64.asm
    │   │   ├── D8_01_F64.asm
    │   │   ├── D8_04_F64.asm
    │   │   ├── D8_05_F64.asm
    │   │   ├── D8_06_F64.asm
    │   │   ├── D8_07_F64.asm
    │   │   ├── D8_C0_F64.asm
    │   │   ├── D8_C8_F64.asm
    │   │   ├── D8_D9_F64.asm
    │   │   ├── D8_E0_F64.asm
    │   │   ├── D8_E8_F64.asm
    │   │   ├── D8_F0_2_F64.asm
    │   │   ├── D8_F0_F64.asm
    │   │   ├── D8_F8_F64.asm
    │   │   ├── D9_00_F64.asm
    │   │   ├── D9_02_F64.asm
    │   │   ├── D9_03_F64.asm
    │   │   ├── D9_05_F64.asm
    │   │   ├── D9_06_2_F64.asm
    │   │   ├── D9_06_F64.asm
    │   │   ├── D9_07_F64.asm
    │   │   ├── D9_C0_F64.asm
    │   │   ├── D9_C8_F64.asm
    │   │   ├── D9_D0_F64.asm
    │   │   ├── D9_E0_F64.asm
    │   │   ├── D9_E1_F64.asm
    │   │   ├── D9_E4_F64.asm
    │   │   ├── D9_E8_F64.asm
    │   │   ├── D9_E9_F64.asm
    │   │   ├── D9_EA_F64.asm
    │   │   ├── D9_EB_F64.asm
    │   │   ├── D9_EC_F64.asm
    │   │   ├── D9_ED_F64.asm
    │   │   ├── D9_EE_F64.asm
    │   │   ├── D9_F0_F64.asm
    │   │   ├── D9_F1_F64.asm
    │   │   ├── D9_F2_F64.asm
    │   │   ├── D9_F3_F64.asm
    │   │   ├── D9_F4_02_F64.asm
    │   │   ├── D9_F4_F64.asm
    │   │   ├── D9_F5_F64.asm
    │   │   ├── D9_F6_F64.asm
    │   │   ├── D9_F7_F64.asm
    │   │   ├── D9_F8_F64.asm
    │   │   ├── D9_F9_F64.asm
    │   │   ├── D9_FA_F64.asm
    │   │   ├── D9_FB_F64.asm
    │   │   ├── D9_FC_F64.asm
    │   │   ├── D9_FD_2_F64.asm
    │   │   ├── D9_FD_F64.asm
    │   │   ├── D9_FE_F64.asm
    │   │   ├── D9_FF_F64.asm
    │   │   ├── DA_01_F64.asm
    │   │   ├── DA_02_F64.asm
    │   │   ├── DA_04_F64.asm
    │   │   ├── DA_05_F64.asm
    │   │   ├── DA_06_F64.asm
    │   │   ├── DA_07_F64.asm
    │   │   ├── DA_C0_F64.asm
    │   │   ├── DA_C8_F64.asm
    │   │   ├── DA_D0_F64.asm
    │   │   ├── DA_D8_F64.asm
    │   │   ├── DA_D9_F64.asm
    │   │   ├── DA_E9_F64.asm
    │   │   ├── DB_00_F64.asm
    │   │   ├── DB_01_F64.asm
    │   │   ├── DB_02_F64.asm
    │   │   ├── DB_03_F64.asm
    │   │   ├── DB_05_F64.asm
    │   │   ├── DB_07_F64.asm
    │   │   ├── DB_C0_F64.asm
    │   │   ├── DB_C8_F64.asm
    │   │   ├── DB_D0_F64.asm
    │   │   ├── DB_D8_F64.asm
    │   │   ├── DB_E3.asm
    │   │   ├── DC_00_F64.asm
    │   │   ├── DC_01_F64.asm
    │   │   ├── DC_04_F64.asm
    │   │   ├── DC_05_F64.asm
    │   │   ├── DC_06_F64.asm
    │   │   ├── DC_07_F64.asm
    │   │   ├── DC_C0_F64.asm
    │   │   ├── DC_C8_F64.asm
    │   │   ├── DC_E0_F64.asm
    │   │   ├── DC_E8_F64.asm
    │   │   ├── DC_F0_F64.asm
    │   │   ├── DC_F8_F64.asm
    │   │   ├── DD_00_F64.asm
    │   │   ├── DD_01_F64.asm
    │   │   ├── DD_02_F64.asm
    │   │   ├── DD_03_F64.asm
    │   │   ├── DD_04_2_F64.asm
    │   │   ├── DD_04_F64.asm
    │   │   ├── DD_07_F64.asm
    │   │   ├── DD_C0_F64.asm
    │   │   ├── DD_D0_2_F64.asm
    │   │   ├── DD_D0_F64.asm
    │   │   ├── DD_D8_F64.asm
    │   │   ├── DD_E9_F64.asm
    │   │   ├── DE_00_F64.asm
    │   │   ├── DE_01_F64.asm
    │   │   ├── DE_02_F64.asm
    │   │   ├── DE_04_F64.asm
    │   │   ├── DE_05_F64.asm
    │   │   ├── DE_06_F64.asm
    │   │   ├── DE_07_F64.asm
    │   │   ├── DE_C0_F64.asm
    │   │   ├── DE_C8_F64.asm
    │   │   ├── DE_E0_F64.asm
    │   │   ├── DE_E8_F64.asm
    │   │   ├── DE_F0_F64.asm
    │   │   ├── DE_F8_F64.asm
    │   │   ├── DF_00_F64.asm
    │   │   ├── DF_01_F64.asm
    │   │   ├── DF_02_F64.asm
    │   │   ├── DF_03_F64.asm
    │   │   ├── DF_04_F64.asm
    │   │   ├── DF_05_F64.asm
    │   │   ├── DF_07_F64.asm
    │   │   ├── DF_E0_F64.asm
    │   │   ├── FCOM_F64.asm
    │   │   ├── FILD_NEG_F64.asm
    │   │   ├── FIST_F64.asm
    │   │   ├── FLDCW_F64.asm
    │   │   ├── FLD_F64.asm
    │   │   ├── FPREM1_Flags_F64.asm
    │   │   ├── FPREM_Flags_F64.asm
    │   │   ├── FScale-Zero_F64.asm
    │   │   ├── FScaleFXtract_F64.asm
    │   │   ├── FXAM_Push_2_F64.asm
    │   │   ├── FXAM_Push_F64.asm
    │   │   ├── Rounding_F64.asm
    │   │   ├── fptan_neg_zero_F64.asm
    │   │   ├── fptan_pos_zero_F64.asm
    │   │   ├── fsin_neg_zero_F64.asm
    │   │   ├── fsin_pos_zero_F64.asm
    │   │   └── fsincos_neg_zero_F64.asm
    │   ├── fadd.asm
    │   ├── fld.asm
    │   ├── full_pshufd_imm.asm
    │   ├── full_vpblendw_imm.asm
    │   ├── jump.asm
    │   ├── lea.asm
    │   ├── modrm_oob/
    │   │   ├── DDD.asm
    │   │   ├── H0F38.asm
    │   │   ├── H0F3A.asm
    │   │   ├── Primary.asm
    │   │   ├── PrimaryGroup.asm
    │   │   ├── Secondary.asm
    │   │   ├── SecondaryGroup.asm
    │   │   ├── SecondaryModRM.asm
    │   │   ├── SecondaryOpSize.asm
    │   │   ├── SecondaryREP.asm
    │   │   ├── SecondaryREPNE.asm
    │   │   ├── VEX.asm
    │   │   ├── VEXGroup.asm
    │   │   ├── X87.asm
    │   │   └── X87_Reduced.asm
    │   ├── mov.asm
    │   ├── movups.asm
    │   ├── movzx.asm
    │   ├── pslldq.asm
    │   └── x87_stack.asm
    ├── CMakeLists.txt
    ├── Example.asm
    ├── FEXLinuxTests/
    │   ├── CMakeLists.txt
    │   ├── Disabled_Tests
    │   ├── Disabled_Tests_Host
    │   ├── Expected_Output
    │   ├── Flake_Tests
    │   ├── Known_Failures
    │   └── tests/
    │       ├── CMakeLists.txt
    │       ├── cpu/
    │       │   └── cpu_count.cpp
    │       ├── fd/
    │       │   └── test_close_range.cpp
    │       ├── fs/
    │       │   └── self_symlink.cpp
    │       ├── include/
    │       │   ├── fpstate.h
    │       │   └── simple_x86.h
    │       ├── signal/
    │       │   ├── Syscall_state.32.cpp
    │       │   ├── Syscall_state.64.cpp
    │       │   ├── SystemInstructions.64.cpp
    │       │   ├── eflags_signal.cpp
    │       │   ├── into.32.cpp
    │       │   ├── invalid_hlt.cpp
    │       │   ├── invalid_int.cpp
    │       │   ├── invalid_int1.cpp
    │       │   ├── invalid_int3.cpp
    │       │   ├── invalid_ud2.cpp
    │       │   ├── invalid_util.h
    │       │   ├── invalid_vex.32.cpp
    │       │   ├── noexec_protect.64.cpp
    │       │   ├── pthread_cancel.cpp
    │       │   ├── sigill_flags.cpp
    │       │   ├── sigill_xstate_magic.cpp
    │       │   ├── signal_df_reset.64.cpp
    │       │   ├── signal_flags.cpp
    │       │   ├── signal_order.cpp
    │       │   ├── sigtest_defer.cpp
    │       │   ├── sigtest_no_defer.cpp
    │       │   ├── sigtest_samask.cpp
    │       │   ├── sigtest_siginfo.32.cpp
    │       │   ├── sigtest_siginfo.64.cpp
    │       │   ├── sigtest_sigmask.cpp
    │       │   ├── synchronous-signal-block.cpp
    │       │   ├── timer-sigev-thread.cpp
    │       │   ├── trap_flag.cpp
    │       │   └── x87_state.64.cpp
    │       ├── smc/
    │       │   ├── smc-1-dynamic.cpp
    │       │   ├── smc-2.cpp
    │       │   ├── smc-common.h
    │       │   ├── smc-exec-stack.cpp
    │       │   ├── smc-missing-gnustack.cpp
    │       │   ├── smc-mt-1.cpp
    │       │   ├── smc-mt-2.cpp
    │       │   ├── smc-shared-1.cpp
    │       │   ├── smc-shared-2.cpp
    │       │   └── smc-unexec-stack.cpp
    │       ├── syscalls/
    │       │   ├── execveat_memfd.cpp
    │       │   ├── futimesat.cpp
    │       │   ├── personality.cpp
    │       │   ├── syscall_exit.cpp
    │       │   ├── syscall_sigaltstack.cpp
    │       │   └── syscalls_efault.cpp
    │       ├── thunks/
    │       │   └── thunk_testlib.cpp
    │       └── vdso/
    │           └── vdso_test.cpp
    ├── InstructionCountCI/
    │   ├── AFP/
    │   │   ├── H0F3A.json
    │   │   ├── SVE256/
    │   │   │   ├── Secondary.json
    │   │   │   ├── Secondary_REP.json
    │   │   │   └── Secondary_REPNE.json
    │   │   ├── Secondary.json
    │   │   ├── Secondary_REP.json
    │   │   ├── Secondary_REPNE.json
    │   │   ├── VEX_map1.json
    │   │   └── VEX_map3.json
    │   ├── AVX128/
    │   │   ├── FMA4.json
    │   │   ├── VEX_map1.json
    │   │   ├── VEX_map1_FCMA.json
    │   │   ├── VEX_map1_SVE128.json
    │   │   ├── VEX_map1_flagm.json
    │   │   ├── VEX_map2.json
    │   │   ├── VEX_map2_AFP.json
    │   │   ├── VEX_map2_SVE128.json
    │   │   ├── VEX_map2_flagm.json
    │   │   ├── VEX_map3.json
    │   │   ├── VEX_map3_SVE128.json
    │   │   └── VEX_map_group.json
    │   ├── Atomics.json
    │   ├── CMakeLists.txt
    │   ├── Crypto/
    │   │   ├── H0F38.json
    │   │   └── H0F3A.json
    │   ├── DDD.json
    │   ├── FEXOpt/
    │   │   ├── AddressingLimitations.json
    │   │   ├── AddressingLimitations_32Bit.json
    │   │   ├── MultiInst.json
    │   │   ├── MultiInst_32bit.json
    │   │   ├── MultiInst_AFP.json
    │   │   ├── MultiInst_TSO.json
    │   │   ├── MultiInst_TSO_32bit.json
    │   │   └── libnss.json
    │   ├── FlagM/
    │   │   ├── Atomics.json
    │   │   ├── FlagOpts.json
    │   │   ├── H0F38.json
    │   │   ├── HotBlocks.json
    │   │   ├── HotBlocks_32Bit.json
    │   │   ├── HotBlocks_AFP.json
    │   │   ├── HotBlocks_TSO_32Bit.json
    │   │   ├── Primary.json
    │   │   ├── PrimaryGroup.json
    │   │   ├── Primary_32Bit.json
    │   │   ├── Secondary.json
    │   │   ├── SecondaryGroup.json
    │   │   ├── SecondaryModRM.json
    │   │   ├── Secondary_OpSize.json
    │   │   ├── Secondary_REP.json
    │   │   ├── Secondary_REP_CSSC.json
    │   │   ├── VEX_map1.json
    │   │   ├── VEX_map2.json
    │   │   ├── VEX_map_group.json
    │   │   ├── x87-Crysis2Max-fmodel.json
    │   │   ├── x87-HalfLife.json
    │   │   ├── x87-Oblivion.json
    │   │   ├── x87-Psychonauts.json
    │   │   ├── x87.json
    │   │   ├── x87_f64-Crysis2Max-fmodel.json
    │   │   ├── x87_f64-HalfLife.json
    │   │   ├── x87_f64-Oblivion.json
    │   │   ├── x87_f64-Psychonauts.json
    │   │   └── x87_f64.json
    │   ├── H0F38.json
    │   ├── H0F3A.json
    │   ├── H0F3A_SVE128.json
    │   ├── MOPS/
    │   │   └── Primary.json
    │   ├── Primary.json
    │   ├── PrimaryGroup.json
    │   ├── Primary_32Bit.json
    │   ├── RPRES/
    │   │   ├── DDD.json
    │   │   ├── Secondary.json
    │   │   ├── Secondary_REP_AFP.json
    │   │   └── VEX_map1_AFP.json
    │   ├── Repeat.json
    │   ├── SSE42_Strings.json
    │   ├── Secondary.json
    │   ├── SecondaryGroup.json
    │   ├── SecondaryModRM.json
    │   ├── Secondary_32Bit.json
    │   ├── Secondary_OpSize.json
    │   ├── Secondary_OpSize_FCMA.json
    │   ├── Secondary_OpSize_SVE128.json
    │   ├── Secondary_OpSize_SVE256.json
    │   ├── Secondary_REP.json
    │   ├── Secondary_REPNE.json
    │   ├── Secondary_REPNE_FCMA.json
    │   ├── Secondary_REPNE_SVE128.json
    │   ├── Secondary_REP_FRINTTS.json
    │   ├── Secondary_SVE128.json
    │   ├── VEX_map1.json
    │   ├── VEX_map1_FCMA.json
    │   ├── VEX_map1_FRINTTS.json
    │   ├── VEX_map2.json
    │   ├── VEX_map2_svebitperm.json
    │   ├── VEX_map3.json
    │   ├── VEX_map_group.json
    │   ├── X87ldst-SVE.json
    │   ├── x87.json
    │   ├── x87_32Bit.json
    │   ├── x87_f64.json
    │   └── x87_f64_32Bit.json
    ├── POSIX/
    │   ├── CMakeLists.txt
    │   ├── Disabled_Tests
    │   ├── Expected_Output
    │   ├── Flake_Tests
    │   └── Known_Failures
    ├── Readme.md
    ├── ThunkFunctionalTests/
    │   └── CMakeLists.txt
    ├── ThunkLibs/
    │   ├── CMakeLists.txt
    │   ├── abi.cpp
    │   ├── common.h
    │   └── generator.cpp
    ├── Utilities/
    │   ├── CMakeLists.txt
    │   └── DeleteOldSHMRegions.cpp
    ├── gcc-target-tests-32/
    │   ├── CMakeLists.txt
    │   ├── Disabled_Tests
    │   ├── Expected_Output
    │   └── Known_Failures
    ├── gcc-target-tests-64/
    │   ├── CMakeLists.txt
    │   ├── Disabled_Tests
    │   ├── Expected_Output
    │   └── Known_Failures
    └── gvisor-tests/
        ├── CMakeLists.txt
        ├── Disabled_Tests
        ├── Expected_Output
        ├── Flake_Tests
        └── Known_Failures

================================================
FILE CONTENTS
================================================

================================================
FILE: .clang-format
================================================
Language: Cpp
BasedOnStyle: WebKit
AccessModifierOffset: -2
AlignAfterOpenBracket: Align
AlignArrayOfStructures: None
AlignConsecutiveAssignments: None
AlignConsecutiveBitFields: Consecutive
AlignConsecutiveDeclarations: None
AlignConsecutiveMacros: None
AlignEscapedNewlines: Left
AlignOperands: Align
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortCaseLabelsOnASingleLine: true
AllowShortEnumsOnASingleLine: true
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: WithoutElse
AllowShortLambdasOnASingleLine: Inline
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
AttributeMacros:
  - JEMALLOC_NOTHROW
  - FEX_ALIGNED
  - FEX_ANNOTATE
  - FEX_DEFAULT_VISIBILITY
  - FEX_NAKED
  - FEX_PACKED
  - FEXCORE_PRESERVE_ALL_ATTR
  - GLIBC_ALIAS_FUNCTION
BinPackArguments: true
BinPackParameters: true
BitFieldColonSpacing: Both
BreakAfterAttributes: Leave
BreakBeforeBraces: Attach
BreakBeforeBinaryOperators: None
BreakBeforeInlineASMColon: OnlyMultiline # clang 16 required
BreakBeforeTernaryOperators: false
BreakConstructorInitializers: BeforeComma
BreakInheritanceList: BeforeColon
ColumnLimit: 140
CompactNamespaces: false
ConstructorInitializerIndentWidth: 2
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DerivePointerAlignment: false
EmptyLineAfterAccessModifier: Leave
EmptyLineBeforeAccessModifier: Leave
ExperimentalAutoDetectBinPacking: false
FixNamespaceComments: true
IncludeBlocks: Preserve
IndentAccessModifiers: false
IndentCaseBlocks: false
IndentCaseLabels: false
IndentExternBlock: AfterExternBlock
IndentGotoLabels: false
IndentPPDirectives: None
IndentRequires: false
IndentWidth: 2
InsertBraces: true
KeepEmptyLinesAtTheStartOfBlocks: true
LambdaBodyIndentation: Signature
LineEnding: LF # clang 16 required
MaxEmptyLinesToKeep: 2
NamespaceIndentation: Inner
QualifierAlignment: Left
PackConstructorInitializers: Never
PenaltyBreakAssignment: 2
PenaltyBreakBeforeFirstCallParameter: 2
PenaltyBreakOpenParenthesis: 2
PenaltyBreakString: 10
PenaltyBreakTemplateDeclaration: 8
PenaltyExcessCharacter: 2
PenaltyReturnTypeOnItsOwnLine: 16
PointerAlignment: Left
RemoveBracesLLVM: false
ReferenceAlignment: Left
ReflowComments: true
RequiresClausePosition: WithPreceding
SeparateDefinitionBlocks: Leave
SortIncludes: Never
SpaceAfterCStyleCast: false
SpaceAfterLogicalNot: false
SpaceAfterTemplateKeyword: false
SpaceAroundPointerQualifiers: Default
SpaceBeforeAssignmentOperators: true
SpaceBeforeCaseColon: false
SpaceBeforeCpp11BracedList: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: Custom
SpaceBeforeParensOptions:
  AfterControlStatements: true
  AfterFunctionDeclarationName: false
  AfterFunctionDefinitionName: false
  AfterOverloadedOperator: false
  AfterRequiresInClause: true
  BeforeNonEmptyParentheses: false
SpaceBeforeRangeBasedForLoopColon: true
SpaceBeforeSquareBrackets: false
SpaceInEmptyBlock: false
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: Leave
SpacesInCStyleCastParentheses: false
SpacesInConditionalStatement: false
SpacesInParentheses: false
Standard: c++20
UseTab: Never


================================================
FILE: .clang-format-ignore
================================================
# This file is used to ignore files and directories from clang-format
Source/Common/cpp-optparse/*

# Files with human-indented tables for readability - don't mess with these
FEXCore/Source/Interface/Core/X86Tables/*.cpp

# Inline headers with list-like content that can't be processed individually
Source/Tools/LinuxEmulation/LinuxSyscalls/x*/SyscallsNames.inl
Source/Tools/LinuxEmulation/LinuxSyscalls/x*/Ioctl/*.inl

# Include files in unittests
unittests/*ASM/Includes/*.inc


================================================
FILE: .git-blame-ignore-revs
================================================
# Since version 2.23 (released in August 2019), git-blame has a feature
# to ignore or bypass certain commits.
#
# This file contains a list of commits that are not likely what you
# are looking for in a blame, such as mass reformatting or renaming.
# You can set this file as a default ignore file for blame by running
# the following command.
#
# $ git config blame.ignoreRevsFile .git-blame-ignore-revs

# Whole tree reformat PR#3571
2b4ec88daebd35fefb5bf5c73d7fc2b4155771ed

# Second reformat to find fixed point PR#3577
905aa935f5ce344a48ef4d5edab3c31efa8d793e

# Reformat of CodeEmitter inl files
8760c593ece92d7e9fa94c40da0368fd367c9cad

# Whole-tree reformat with clang-format-19
5267cde60e7642852d18f20ae8568643bb5293d5

# Minor reformat with clang-format-19
9fdd96af61c969cb5732471223f00eda64b7a069

# Reformat of X86Tables.h
ba2b0ef809f66f1a6d334f000798fa2ceafab26f


================================================
FILE: .github/ISSUE_TEMPLATE/potential-game-bug.md
================================================
---
name: Potential Game Bug
about: A bug in FEX-Emu that causes a problem in a game
title: "[Game]: [Short Problem Description]"
labels: Game related
assignees: ''

---

**What Game**
The game name.
A link to the storefront where to get the game. GOG, Steam, Itch.io, etc

**Describe the bug**
A clear and concise description of what the bug is.

**To Reproduce**
Steps to reproduce the behavior:
1. Go to '...'
2. Click on '....'
3. Scroll down to '....'
4. See error

**Expected behavior**
A clear and concise description of what you expected to happen.

**Screenshots and Video**
If applicable, add screenshots and video to help explain your problem.

**System information:**
 - OS: [eg: Ubuntu 21.10]
 - CPU/SoC: [eg: Snapdragon 888, Intel Core i8-12900k]
 - Video driver version: [eg: OpenGL ES 3.2 Mesa 22.0.0-devel (git-9ff086052a)]
 - RootFS used: [eg: Ubuntu 21.10 Official Rootfs]
 - FEX version: (FEXGetConfig --version) [eg: FEX-2112-155-gc691d709]
 - Thunks Enabled: [Yes/No]

**Additional context**
 - Is this an x86 or x86-64 game: [x86/x86-64/Both]
 - Does this reproduce on AArch64 with Radeon/Intel/Nvidia: [Yes/No/Untested]
 - Is this a Vulkan game: [Yes/No/Unknown]
   - If Yes, What is your Vulkan driver:

Add any other context about the problem here.


================================================
FILE: .github/workflows/ccpp.yml
================================================
name: Build + Test

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++
  FEX_PORTABLE: 1

jobs:
  build_plus_test:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, ARMv8.0], [self-hosted, ARMv8.2], [self-hosted, ARMv8.4]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner info
      run: |
        echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV
        echo "runner_name=$(hostname)" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True \
          -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True -DBUILD_THUNKS=True \
          -DCMAKE_INSTALL_PREFIX="$PWD"/build/install

    # These steps make a lot of noise but rarely fail.
    # Put them in a separate step to make normal build logs easier to parse
    - name: Noisy Build Targets
      run: cmake --build build --target asm_files 32bit_asm_files JemallocLibs Catch2 vixl cephes_128bit

    - name: Build
      id: build
      run: cmake --build build

    - name: Install
      run: cmake --build build --target install

    # GCC tests
    - name: GCC64 Target Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: gcc_target_tests_64

    - name: GCC32 Target Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: gcc_target_tests_32

    # API tests
    - name: API Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: api_tests

    - name: FEXCore API Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: fexcore_apitests

    # ARM emission tests
    - name: ARM Emitter Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: emitter_tests

    # Linux  tests
    - name: FEX Linux Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: fex_linux_tests_all
      env:
        FEX_PORTABLE: 0

    # Thunking
    - name: Thunkgen tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: thunkgen_tests

    - name: Test GL No-Thunks
      if: ${{ steps.build.outcome == 'success' && matrix.arch[1] == 'x64' }}
      uses: ./.github/workflows/test
      with:
        target: thunk_functional_tests_nothunks
      env:
        DISPLAY: ':0'

    - name: Test GL Thunks
      if: ${{ steps.build.outcome == 'success' && matrix.arch[1] == 'x64' }}
      uses: ./.github/workflows/test
      with:
        target: thunk_functional_tests_thunks
      env:
        DISPLAY: ':0'

    # ASM tests
    - name: ASM Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: asm_tests

    # POSIX tests
    - name: POSIX Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: posix_tests

    # GVisor tests
    - name: GVisor Tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: gvisor_tests

    # Struct verifier tests
    - name: Struct verifier tests
      if: steps.build.outcome == 'success'
      uses: ./.github/workflows/test
      with:
        target: struct_verifier

    - name: Remove old SHM regions
      if: ${{ always() }}
      run: cmake --build build --target remove_old_shm_regions

    - name: Upload results
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        name: Results-${{ env.runner_name }}-${{ env.runner_label }}
        path: results/*.log
        retention-days: 3


================================================
FILE: .github/workflows/glibc_fault.yml
================================================
name: GLIBC fault test
# This workflow file is the same as the `Build + Test` with some key differences
# - Runs on any x86 and ARM64 runner
# - Disables the glibc jemalloc compile option
# - Enables the glibc allocator fault option
# - Disables gvisor tests to reduce stress on CI machines (tmp/shm tests overwhelm them)
# - Disables thunk tests since they are incompatible with glibc fault allocator
# - Disables ARMEmitter tests (We don't want to fault test vixl's disassembler)

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++
  FEX_PORTABLE: 1

jobs:
  glibc_fault_test:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, ARM64]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner info
      run: |
        echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV
        echo "runner_name=$(hostname)" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False \
          -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_FEX_LINUX_TESTS=True \
          -DENABLE_GLIBC_ALLOCATOR_HOOK_FAULT=True -DENABLE_JEMALLOC_GLIBC_ALLOC=False \
          -DCMAKE_INSTALL_PREFIX="$PWD"/build/install

    # These steps make a lot of noise but rarely fail.
    # Put them in a separate step to make normal build logs easier to parse
    - name: Noisy Build Targets
      run: cmake --build build --target asm_files 32bit_asm_files JemallocLibs Catch2 vixl cephes_128bit

    - name: Build
      run: cmake --build build

    - name: Install
      run: cmake --build build --target install

    # GCC tests
    - name: GCC64 Target Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: gcc_target_tests_64

    - name: GCC32 Target Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: gcc_target_tests_32

    # API Tests
    - name: API Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: api_tests

    - name: FEXCore API Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: fexcore_apitests

    # Linux tests
    - name: FEX Linux Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: fex_linux_tests_all

    # ASM Tests
    - name: ASM Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: asm_tests

    # POSIX Tests
    - name: POSIX Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: posix_tests

    - name: Remove old SHM regions
      if: ${{ always() }}
      run: cmake --build build --target remove_old_shm_regions

    - name: Upload results
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        name: Results-${{ env.runner_name }}-${{ env.runner_label }}
        path: results/*.log
        retention-days: 3


================================================
FILE: .github/workflows/hostrunner.yml
================================================
name: Hostrunner tests

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++
  FEX_PORTABLE: 1

jobs:
  hostrunner_tests:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, x64]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner info
      run: |
        echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV
        echo "runner_name=$(hostname)" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_LTO=False \
          -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True

    # These steps make a lot of noise but rarely fail.
    # Put them in a separate step to make normal build logs easier to parse
    - name: Noisy Build Targets
      run: cmake --build build --target asm_files 32bit_asm_files JemallocLibs Catch2 vixl cephes_128bit

    - name: Build
      run: cmake --build build

    # ASM tests
    - name: ASM Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: asm_tests

    - name: Upload results
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        name: Results-${{ env.runner_name }}-${{ env.runner_label }}
        path: results/*.log
        retention-days: 3


================================================
FILE: .github/workflows/instcountci.yml
================================================
name: Instruction Count CI run

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++

jobs:
  instcountci_tests:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, x64], [self-hosted, ARM64]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner info
      run: |
        echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV
        echo "runner_name=$(hostname)" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Set VIXL_SIM_ENABLED
      run: |
        case '${{ matrix.arch[1] }}' in
          x64) _sim=True ;;
          ARM64) _sim=False ;;
        esac
        echo "VIXL_SIM_ENABLED=$_sim" >> $GITHUB_ENV

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_VIXL_SIMULATOR=$VIXL_SIM_ENABLED \
          -DENABLE_VIXL_DISASSEMBLER=True -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True

    - name: Build
      env:
        FEX_DISABLETELEMETRY: 1
      run: cmake --build build --target CodeSizeValidation instcountci_test_files

    - name: Instruction Count Tests
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: instcountci_tests

    - name: Update local repo instcount
      if: ${{ always() }}
      run: cmake --build build --target instcountci_update_tests

    - name: Check InstCountCI diff
      if: ${{ always() }}
      run: git --no-pager diff --exit-code HEAD

    - name: Upload results
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        name: Results-${{ env.runner_name }}-${{ env.runner_label }}
        path: results/*.log
        retention-days: 3


================================================
FILE: .github/workflows/mingw_build.yml
================================================
name: Mingw build

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  BUILD_TYPE: Debug

jobs:
  mingw_build:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, ARM64, mingw], [self-hosted, ARM64EC, mingw, ARM64]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner label
      run: echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV

    - name: Add MingGW to PATH
      run: echo "$HOME/llvm-mingw/build/bin/" >> $GITHUB_PATH

    - name: Set CC
      run: |
        case '${{ matrix.arch[1] }}' in
          x64) _cpu=x86_64 ;;
          ARM64) _cpu=aarch64 ;;
          ARM64EC) _cpu=arm64ec ;;
        esac
        echo "MINGW_TRIPLE=${_cpu}-w64-mingw32" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/Data/CMake/toolchain_mingw.cmake \
        -DMINGW_TRIPLE=$MINGW_TRIPLE -G Ninja -DENABLE_LTO=False -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True -DBUILD_TESTING=False \
        -DCMAKE_INSTALL_PREFIX="$PWD"/build/install

    - name: Build
      run: cmake --build build


================================================
FILE: .github/workflows/pr-code-format.yml
================================================
# Inspired by LLVM's pr-code-format.yml at
# https://github.com/llvm/llvm-project/blob/main/.github/workflows/pr-code-format.yml

name: Check code formatting
on:
  pull_request:
    branches:
      - main

jobs:
  code_formatter:
    runs-on: [self-hosted, X64]
    if: github.repository == 'FEX-Emu/FEX'

    steps:
      - name: Checkout
        uses: actions/checkout@v4
        with:
          ref: ${{ github.event.pull_request.head.sha }}

      - name: Checkout through merge base
        uses: rmacklin/fetch-through-merge-base@v0
        timeout-minutes: 3
        with:
          base_ref: ${{ github.event.pull_request.base.ref }}
          head_ref: ${{ github.event.pull_request.head.sha }}
          deepen_length: 500

      - name: Get changed files
        run: |
          BASE=$(git merge-base main HEAD)
          FILES=$(git diff --name-only "$BASE" | tr '\n' ',' | sed 's/,$//')
          echo "CHANGED_FILES=$FILES" >> $GITHUB_ENV

          echo "Changed files:"
          echo "$FILES"

      - name: Check git-clang-format-19 exists
        run: which git-clang-format-19

      - name: Setup Python env
        uses: actions/setup-python@v4
        with:
          python-version: 3.11
          cache: pip
          cache-dependency-path: ./External/code-format-helper/requirements_formatting.txt

      - name: Install python dependencies
        run: pip install -r ./External/code-format-helper/requirements_formatting.txt

      - name: Run code formatter
        env:
          CLANG_FORMAT_PATH: git-clang-format-19
          GITHUB_PR_NUMBER: ${{ github.event.pull_request.number }}
          START_REV: ${{ github.event.pull_request.base.sha }}
          END_REV: ${{ github.event.pull_request.head.sha }}
        run: |
          python ./External/code-format-helper/code-format-helper.py \
            --repo "FEX-Emu/FEX" \
            --issue-number "$GITHUB_PR_NUMBER" \
            --start-rev "$START_REV" \
            --end-rev "$END_REV" \
            --changed-files "$CHANGED_FILES"


================================================
FILE: .github/workflows/setup-env/action.yml
================================================
name: Setup Build Environment
description: Setup RootFS and build environment

inputs:
  setup-rootfs:
    description: 'Whether or not to set up the rootfs'
    default: true

runs:
  using: composite
  steps:
    - name: Set rootfs paths
      if: ${{ inputs.setup-rootfs == 'true' }}
      shell: bash
      run: |
        echo "FEX_ROOTFS_MOUNT=/mnt/AutoNFS/rootfs/" >> $GITHUB_ENV
        echo "FEX_ROOTFS_PATH=$HOME/Rootfs/" >> $GITHUB_ENV
        echo "FEX_ROOTFS=$HOME/Rootfs/" >> $GITHUB_ENV

    - name: Update RootFS cache
      if: ${{ inputs.setup-rootfs == 'true' }}
      shell: bash
      run: python3 Scripts/CI_FetchRootFS.py

    - name: Checkout Submodules
      shell: bash
      run: |
        git submodule sync --recursive
        git submodule update --init --depth 1

    - name: Clean Build Environment
      shell: bash
      run: rm -Rf build


================================================
FILE: .github/workflows/steamrt4.yml
================================================
name: steamrt4 build

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  DEBIAN_FRONTEND: noninteractive
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++

jobs:
  steamrt4_build:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, ARM64, distrobox]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner label
      run: echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env
      with:
        setup-rootfs: false

    # Setup everything required.
    - name : distrobox setup
      run: |
        distrobox create -Y -i registry.gitlab.steamos.cloud/steamrt/steamrt4/sdk/arm64:4.0.20251117.183306 steamrt4 || true
        distrobox upgrade steamrt4
        distrobox enter --name steamrt4 -- sudo apt-get install -y \
          git cmake ninja-build ccache \
          lld clang \
          libclang-dev llvm-dev \
          libstdc++-14-dev-i386-cross libgcc-14-dev-i386-cross \
          libstdc++-14-dev-amd64-cross libgcc-14-dev-amd64-cross

    - name: Configure CMake
      run: |
        distrobox enter --name steamrt4 -- cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
          -G Ninja -DBUILD_STEAM_SUPPORT=True -DENABLE_LTO=True -DENABLE_ASSERTIONS=False -DBUILD_THUNKS=True \
          -DBUILD_FEXCONFIG=False -DBUILD_TESTING=False -DENABLE_CLANG_THUNKS=True -DUSE_LINKER=lld \
          -DCMAKE_INSTALL_PREFIX=/usr

    - name: Build
      run: distrobox enter --name steamrt4 -- cmake --build build

    - name: install
      run: DESTDIR="$PWD"/install distrobox enter --name steamrt4 -- cmake --build build -t install

    - name: Upload libraries
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        overwrite: true
        name: steamrt4_steampipe_depot
        path: ${{ github.workspace }}/install/*
        retention-days: 60
        compression-level: 9


================================================
FILE: .github/workflows/test/action.yml
================================================
name: Run Test and Store Logs
description: Run a test and store the log.
inputs:
  target:
    description: 'The test target to run'
    required: true

runs:
  using: composite
  steps:
    - name: Run Tests
      shell: bash
      run: cmake --build build --target ${{ inputs.target }}

    - name: Move and Truncate Results
      if: ${{ always() }}
      shell: bash
      run: |
        mkdir -p results
        mv build/Testing/Temporary/LastTest.log results/${{ inputs.target }}.log || true
        truncate --size="<20M" results/${{ inputs.target }}.log || true


================================================
FILE: .github/workflows/vixl_simulator.yml
================================================
name: Vixl Simulator run

on:
  push:
    branches:
      - main
  pull_request:
    branches:
      - main

env:
  # Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
  BUILD_TYPE: Release
  CC: clang
  CXX: clang++
  FEX_PORTABLE: 1

jobs:
  vixl_simulator:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        # Only the x86-64 runner is fast enough to run this
        arch: [[self-hosted, x64], [self-hosted, ARMv8.4]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Set runner info
      run: |
        echo "runner_label=${{ matrix.arch[1] }}" >> $GITHUB_ENV
        echo "runner_name=$(hostname)" >> $GITHUB_ENV

    - name: Setup Build Environment
      uses: ./.github/workflows/setup-env

    - name: Configure CMake
      run: |
        cmake -S . -B build -DCMAKE_BUILD_TYPE=$BUILD_TYPE -G Ninja -DENABLE_VIXL_SIMULATOR=True -DENABLE_LTO=False \
          -DENABLE_VIXL_DISASSEMBLER=True -DENABLE_ASSERTIONS=True -DENABLE_X86_HOST_DEBUG=True

    # These steps make a lot of noise but rarely fail.
    # Put them in a separate step to make normal build logs easier to parse
    - name: Noisy Build Targets
      run: cmake --build build --target asm_files 32bit_asm_files JemallocLibs Catch2 vixl cephes_128bit

    - name: Build
      run: cmake --build build

    - name: ASM Tests - SVE256
      if: ${{ always() }}
      uses: ./.github/workflows/test
      with:
        target: asm_tests

    - name: ASM Tests - SVE128
      if: ${{ always() }}
      uses: ./.github/workflows/test
      env:
        FEX_FORCESVEWIDTH: "128"
      with:
        target: asm_tests

    - name: ASM Tests - ASIMD
      if: ${{ always() }}
      uses: ./.github/workflows/test
      env:
        FEX_HOSTFEATURES: "disablesve"
      with:
        target: asm_tests

    - name: Upload results
      if: ${{ always() }}
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        name: Results-${{ env.runner_name }}-${{ env.runner_label }}
        path: results/*.log
        retention-days: 3


================================================
FILE: .github/workflows/wine_build/action.yml
================================================
name: Wine DLL Build
description: Build a wow64 or arm64ec Wine DLL

inputs:
  target:
    description: 'The target (arm64ec or wow64)'
    required: true

runs:
  using: composite
  steps:
    - name: Clean Build Environment
      shell: bash
      run: rm -Rf build_${{ inputs.target }}

    - name: Configure CMake
      shell: bash
      run: |
        case "${{ inputs.target }}" in
          wow64) _cc=aarch64 ;;
          arm64ec) _cc=arm64ec ;;
        esac

        cmake -S . -B build_${{ inputs.target }} -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DCMAKE_TOOLCHAIN_FILE=Data/CMake/toolchain_mingw.cmake \
          -DMINGW_TRIPLE=${_cc}-w64-mingw32 -DCMAKE_INSTALL_LIBDIR=/usr/lib/wine/aarch64-windows -G Ninja \
          -DENABLE_LTO=False -DENABLE_ASSERTIONS=False -DENABLE_JEMALLOC_GLIBC_ALLOC=False \
          -DBUILD_TESTING=False -DCMAKE_INSTALL_PREFIX=/usr -DTUNE_ARCH=generic -DTUNE_CPU=none

    - name: Build
      shell: bash
      run: cmake --build build_${{ inputs.target }}

    - name: Install
      shell: bash
      run: DESTDIR="$PWD"/install cmake --build build_${{ inputs.target }} -t install


================================================
FILE: .github/workflows/wine_dll_artifacts.yml
================================================
name: Wine DLL artifacts

on:
  push:
    branches:
      - main

env:
  BUILD_TYPE: Release

jobs:
  wine_dll_artifacts:
    runs-on: ${{ matrix.arch }}
    strategy:
      matrix:
        arch: [[self-hosted, ARM64, mingw]]
      fail-fast: false

    steps:
    - uses: actions/checkout@v6
      with:
        fetch-depth: '0'
        fetch-tags: 'true'

    - name: Add MingGW to PATH
      run: echo "$HOME/llvm-mingw/build/bin/" >> $GITHUB_PATH

    - name: Checkout Submodules
      # Need to update submodules
      run: |
        git submodule sync --recursive
        git submodule update --init --depth 1

    - name: Clean install directory
      run: rm -Rf install

    - name: Build (wow64)
      uses: ./.github/workflows/wine_build
      with:
        target: wow64

    - name: Build (arm64ec)
      uses: ./.github/workflows/wine_build
      with:
        target: arm64ec

    - name: Upload libraries
      uses: actions/upload-artifact@v6
      timeout-minutes: 1
      with:
        overwrite: true
        name: wine_dll_artifacts
        path: ${{ github.workspace }}/install/usr/lib/wine/aarch64-windows/lib*.dll
        retention-days: 60
        compression-level: 9


================================================
FILE: .gitignore
================================================
# Existing

compile_commands.json
vim_rc
Config.json

[Bb]uild*
[Bb]in/
out/
.vscode/
.vs/
*.pyc
.cache
.idea/
CMakeLists.txt.user


================================================
FILE: .gitlab-ci.yml
================================================
spec:
  inputs:
    PROMOTE_BRANCH:
      description: "Branch to promote the build to. Empty means no promotion."
      default: "bleeding-edge"

---

workflow:
  rules:
    - when: always
      variables:
        PROMOTE_BRANCH: $[[ inputs.PROMOTE_BRANCH ]]

variables:
    DEBIAN_FRONTEND: noninteractive
    GIT_SUBMODULE_STRATEGY: recursive
    GIT_DEPTH: 0
    CC: clang
    CXX: clang++

build:
  stage: build
  image: registry.gitlab.steamos.cloud/steamrt/steamrt4/sdk/arm64:4.0.20251117.183306
  tags:
    - docker
    - linux
    - arm64
    - aarch64
  script:
    - apt-get -y update
    - apt-get install -y
        git cmake ninja-build ccache
        lld clang
        libclang-dev llvm-dev
        libstdc++-14-dev-i386-cross libgcc-14-dev-i386-cross
        libstdc++-14-dev-amd64-cross libgcc-14-dev-amd64-cross
    - cmake -E make_directory build/
    - cmake -DCMAKE_BUILD_TYPE=Release -G Ninja -DBUILD_STEAM_SUPPORT=True -DENABLE_LTO=True -DENABLE_ASSERTIONS=False -DBUILD_THUNKS=True -DBUILD_FEXCONFIG=False -DBUILD_TESTING=False -DENABLE_CLANG_THUNKS=True -DUSE_LINKER=lld -DCMAKE_INSTALL_PREFIX=/usr -DTUNE_ARCH=armv8.2-a -DTUNE_CPU=none . -B build/
    - cmake --build build/ --config Release
    - DESTDIR=$(pwd)/install/ cmake --build build/ --config Release -t install

  artifacts:
    name: "steamrt artifacts"
    untracked: false
    paths:
      - install/

promote:
  stage: deploy
  variables:
    GIT_STRATEGY: none
  image: registry.gitlab.steamos.cloud/steamrt/steamrt4/sdk/arm64:4.0.20251117.183306
  tags:
    - docker
    - linux
    - arm64
    - aarch64
  rules:
    - if: '$PROMOTE_BRANCH'
  before_script:
    - apt-get -y update
    - apt-get install -y tmux curl
  script:
    # comment out to debug: SSH in via GCP, go down the container and attach to the session (with `tmux attach -t debug`)
#    - tmux new-session -d -s debug
#    - while tmux has-session -t debug 2>/dev/null; do sleep 1; done

    # ref controls which fex-depot code runs the pipeline, while VERSION_PARAM controls which fex branch's artifacts that pipeline downloads.
    - >
      curl --fail --location --request POST --form token=${FEX_DEPOT_TRIGGER_TOKEN} --form ref=master --form "variables[PROMOTE_BRANCH]=${PROMOTE_BRANCH}" --form "variables[VERSION_PARAM]=${CI_COMMIT_REF_NAME}" "${CI_API_V4_URL}/projects/fex%2Ffex-depot/trigger/pipeline"


================================================
FILE: .gitmodules
================================================
[submodule "External/vixl"]
	shallow = true
	path = External/vixl
	url = https://github.com/FEX-Emu/vixl.git
[submodule "External/cpp-optparse"]
	path = Source/Common/cpp-optparse
	url = https://github.com/Sonicadvance1/cpp-optparse
[submodule "External/fex-posixtest-bins"]
  shallow = true
	path = External/fex-posixtest-bins
	url = https://github.com/FEX-Emu/fex-posixtest-bins.git
[submodule "External/fex-gvisor-tests-bins"]
  shallow = true
	path = External/fex-gvisor-tests-bins
	url = https://github.com/FEX-Emu/fex-gvisor-tests-bins.git
[submodule "External/fex-gcc-target-tests-bins"]
  shallow = true
	path = External/fex-gcc-target-tests-bins
	url = https://github.com/FEX-Emu/fex-gcc-target-tests-bins.git
[submodule "External/fmt"]
	path = External/fmt
	url = https://github.com/fmtlib/fmt.git
[submodule "External/drm-headers"]
	path = External/drm-headers
	url = https://github.com/FEX-Emu/drm-headers.git
[submodule "External/xxhash"]
	path = External/xxhash
	url = https://github.com/Cyan4973/xxHash.git
[submodule "External/Catch2"]
	path = External/Catch2
	url = https://github.com/catchorg/Catch2.git
[submodule "External/Vulkan-Headers"]
	shallow = true
	path = External/Vulkan-Headers
	url = https://github.com/KhronosGroup/Vulkan-Headers.git
[submodule "External/jemalloc_glibc"]
	path = External/jemalloc_glibc
	url = https://github.com/FEX-Emu/jemalloc.git
[submodule "External/tracy"]
	path = External/tracy
	url = https://github.com/wolfpld/tracy
[submodule "External/range-v3"]
	path = External/range-v3
	url = https://github.com/ericniebler/range-v3.git
[submodule "External/zydis"]
	shallow = true
	path = External/zydis
	url = https://github.com/zyantific/zydis.git
[submodule "External/unordered_dense"]
	path = External/unordered_dense
	url = https://github.com/martinus/unordered_dense.git
[submodule "External/rpmalloc"]
	path = External/rpmalloc
	url = https://github.com/FEX-Emu/rpmalloc.git


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(FEX C CXX ASM)

include(CheckIncludeFiles)
check_include_files("gdb/jit-reader.h" HAVE_GDB_JIT_READER_H)

option(BUILD_FEX_LINUX_TESTS "Build FEXLinuxTests (requires x86 compiler)" FALSE)
option(BUILD_THUNKS "Build thunks" FALSE)
option(BUILD_FEXCONFIG "Build FEXConfig" TRUE)
option(ENABLE_CLANG_THUNKS "Build thunks with clang" TRUE)
option(ENABLE_IWYU "Enable the Include What You Use sanitizer" FALSE)
option(ENABLE_LTO "Enable LTO with compilation" TRUE)
option(ENABLE_XRAY "Enable building with LLVM X-Ray" FALSE)
set(USE_LINKER "" CACHE STRING "Path to a custom linker program")
option(ENABLE_UBSAN "Enable the Clang Undefined Behavior Sanitizer" FALSE)
option(ENABLE_ASAN "Enable the Clang Address Sanitizer" FALSE)
option(ENABLE_TSAN "Enable the Clang Thread Sanitizer" FALSE)
option(ENABLE_COVERAGE "Enable Code Coverage" FALSE)
option(ENABLE_ASSERTIONS "Enable debug assertions" FALSE)
option(ENABLE_GDB_SYMBOLS "Enable GDBSymbols integration support" ${HAVE_GDB_JIT_READER_H})
option(ENABLE_STRICT_WERROR "Enable stricter -Werror" FALSE)
option(ENABLE_WERROR "Enable -Werror" FALSE)
option(ENABLE_FEX_ALLOCATOR "Enable allocator for FEX" TRUE)
option(ENABLE_JEMALLOC_GLIBC_ALLOC "Enable jemalloc glibc allocator" TRUE)
option(ENABLE_OFFLINE_TELEMETRY "Enable FEX offline telemetry" TRUE)
option(ENABLE_COMPILE_TIME_TRACE "Enable time trace compile option" FALSE)
option(ENABLE_LIBCXX "Use LLVM's libc++ instead of the GNU libstdc++" FALSE)
option(ENABLE_CCACHE "Enable ccache for build caching" TRUE)
option(ENABLE_VIXL_SIMULATOR "Use the VIXL simulator for emulation (only useful for CI testing)" FALSE)
option(ENABLE_VIXL_DISASSEMBLER "Enable debug disassembler output with VIXL" FALSE)
option(ENABLE_ZYDIS "Enable x86/x86-64 guest disassembler output with Zydis" FALSE)
option(USE_LEGACY_BINFMTMISC "Use legacy method of setting up binfmt_misc" FALSE)
option(ENABLE_FEXCORE_PROFILER "Enable FEXCore's timeline profiling capabilities" FALSE)
set(FEXCORE_PROFILER_BACKEND "gpuvis" CACHE STRING "Set which backend to use for FEXCore's profiler")
set_property(CACHE FEXCORE_PROFILER_BACKEND PROPERTY STRINGS gpuvis tracy)
option(ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT "Enables glibc memory allocation hooking with fault for CI testing")
option(USE_PDB_DEBUGINFO "Build debug info in PDB format" FALSE)
option(BUILD_STEAM_SUPPORT "Enable Steam integration" FALSE)

set(X86_32_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/Data/CMake/toolchain_x86_32.cmake" CACHE FILEPATH "Toolchain file for the (cross-)compiler targeting i686")
set(X86_64_TOOLCHAIN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/Data/CMake/toolchain_x86_64.cmake" CACHE FILEPATH "Toolchain file for the (cross-)compiler targeting x86_64")
set(X86_DEV_ROOTFS "/" CACHE FILEPATH "Path to the sysroot used for cross-compiling for i686 and x86_64")
set(DATA_DIRECTORY "" CACHE PATH "Global data directory (override)")
set(HOSTLIBS_DATA_DIRECTORY "" CACHE PATH "Global data directory (override)")

if (NOT DATA_DIRECTORY)
  set(DATA_DIRECTORY "${CMAKE_INSTALL_PREFIX}/share/fex-emu")
endif()

include(GNUInstallDirs)
if (NOT HOSTLIBS_DATA_DIRECTORY)
  set(HOSTLIBS_DATA_DIRECTORY "${CMAKE_INSTALL_FULL_LIBDIR}/fex-emu")
endif()

## Platform Checks ##
# Only 64-bit Linux and Windows are supported

# NB: SIZEOF_VOID_P is in bytes, not bits
# On 32-bit systems this is set to 4
if (NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
  message(FATAL_ERROR "Unsupported pointer size ${CMAKE_SIZEOF_VOID_P}."
    " FEX only supports 64-bit (8-byte pointer) systems."
    " If you believe this is in error, file an issue.")
elseif (NOT (WIN32 OR CMAKE_SYSTEM_NAME STREQUAL "Linux"))
  message(FATAL_ERROR "Unsupported system type ${CMAKE_SYSTEM_NAME}."
    " FEX only supports Linux and Windows."
    " If you believe this is in error, file an issue.")
endif()

## Compiler Checks ##
# GCC and MSVC are unsupported
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  message(FATAL_ERROR "FEX doesn't support GCC! Use Clang instead.")
elseif (MSVC)
  message(FATAL_ERROR "FEX doesn't support MSVC! Use Clang on MinGW instead.")
elseif (MINGW)
  message(STATUS "Building for MinGW")
  set(ENABLE_FEX_ALLOCATOR TRUE)
  set(ENABLE_JEMALLOC_GLIBC_ALLOC FALSE)
else ()
  message(STATUS "Clang version ${CMAKE_CXX_COMPILER_VERSION}")
  set(CLANG_MINIMUM_VERSION 13.0)
  if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
    message(FATAL_ERROR "Clang version too old for FEX. Need at least ${CLANG_MINIMUM_VERSION} but has ${CMAKE_CXX_COMPILER_VERSION}")
  endif()
endif()

## Architecture Handling ##
string(TOLOWER ${CMAKE_SYSTEM_PROCESSOR} processor)
if (processor MATCHES "x86|amd64")
  option(ENABLE_X86_HOST_DEBUG "Enables compiling on x86_64 host" FALSE)
  if (NOT ENABLE_X86_HOST_DEBUG)
    message(FATAL_ERROR
      " FEX doesn't support compiling for x86-64 hosts!"
      " This is /only/ a supported configuration for FEX CI and nothing else!")
  else()
    message(STATUS "x86_64 debug build")
  endif()

  set(ARCHITECTURE_x86_64 1)
  add_compile_definitions(ARCHITECTURE_x86_64=1)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcx16")
elseif (processor MATCHES "^aarch64|^arm64|^armv8\.*")
  set(ARCHITECTURE_arm64 1)
  add_compile_definitions(ARCHITECTURE_arm64=1)

  # arm64ec needs to define both arm64 and arm64ec
  if (processor MATCHES "^arm64ec")
    set(ARCHITECTURE_arm64ec 1)
    add_compile_definitions(ARCHITECTURE_arm64ec=1)
  endif()
endif()

if (NOT (ARCHITECTURE_arm64 OR ARCHITECTURE_arm64ec OR ARCHITECTURE_x86_64))
  message(FATAL_ERROR "Unsupported processor type ${processor}."
    " If you believe this is in error, file an issue.")
endif()

if (BUILD_STEAM_SUPPORT)
  add_compile_definitions(FEX_STEAM_SUPPORT=1)
endif()

if (ENABLE_FEXCORE_PROFILER)
  add_compile_definitions(ENABLE_FEXCORE_PROFILER=1)
  string(TOUPPER "${FEXCORE_PROFILER_BACKEND}" FEXCORE_PROFILER_BACKEND)

  if (FEXCORE_PROFILER_BACKEND STREQUAL "GPUVIS")
    add_compile_definitions(FEXCORE_PROFILER_BACKEND=1)
  elseif (FEXCORE_PROFILER_BACKEND STREQUAL "TRACY")
    add_compile_definitions(FEXCORE_PROFILER_BACKEND=2)
    add_compile_definitions(TRACY_ENABLE=1)
    # Required so that Tracy will only start in the selected guest application
    add_compile_definitions(TRACY_MANUAL_LIFETIME=1)
    add_compile_definitions(TRACY_DELAYED_INIT=1)
    # This interferes with FEX's signal handling
    add_compile_definitions(TRACY_NO_CRASH_HANDLER=1)
    # Tracy can gather call stack samples in regular intervals, but this
    # isn't useful for us since it would usually sample opaque JIT code
    add_compile_definitions(TRACY_NO_SAMPLING=1)
    # This pulls in libbacktrace which allocators in global constructors (before FEX can set up its allocator hooks)
    add_compile_definitions(TRACY_NO_CALLSTACK=1)
    if (MINGW)
      message(FATAL_ERROR "Tracy profiler not supported on MinGW")
    endif()
  else()
    message(FATAL_ERROR "Unknown FEXCore profiler backend ${FEXCORE_PROFILER_BACKEND}")
  endif()
endif()

if (ENABLE_JEMALLOC_GLIBC_ALLOC AND ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
  message(FATAL_ERROR "Can't have both glibc fault allocator and jemalloc glibc allocator enabled at the same time")
endif()

if (ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
  add_compile_definitions(GLIBC_ALLOCATOR_FAULT=1)
endif()

# uninstall target
if(NOT TARGET uninstall)
  configure_file(
    "${CMAKE_CURRENT_SOURCE_DIR}/Data/CMake/cmake_uninstall.cmake.in"
    "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cmake_uninstall.cmake"
    IMMEDIATE @ONLY)

  add_custom_target(uninstall
    COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cmake_uninstall.cmake)
endif()

# These options are meant for package management
set(TUNE_CPU "native" CACHE STRING "Override the CPU the build is tuned for")
set(TUNE_ARCH "generic" CACHE STRING "Override the Arch the build is tuned for")
set(OVERRIDE_VERSION "detect" CACHE STRING "Override the FEX version")
set(OVERRIDE_HASH "detect" CACHE STRING "Override the FEX git hash")

get_property(IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
if (NOT IS_MULTI_CONFIG AND NOT CMAKE_BUILD_TYPE)
    set(CMAKE_BUILD_TYPE Release
        CACHE STRING "Choose the type of build." FORCE)
    message(STATUS "No build type set, defaulting to a Release build")
endif()

string(TOUPPER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE)
if (CMAKE_BUILD_TYPE MATCHES "DEBUG")
  set(ENABLE_ASSERTIONS TRUE)
endif()

if (ENABLE_ASSERTIONS)
  message(STATUS "Assertions enabled")
  add_compile_definitions(ASSERTIONS_ENABLED=1)
endif()

if (ENABLE_GDB_SYMBOLS)
  message(STATUS "GDBSymbols support enabled")
  add_compile_definitions(GDB_SYMBOLS_ENABLED=1)
endif()

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/Bin)
set(CMAKE_INCLUDE_CURRENT_DIR ON)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
cmake_policy(SET CMP0083 NEW) # Follow new PIE policy
include(CheckPIESupported)
check_pie_supported()

set(CMAKE_INTERPROCEDURAL_OPTIMIZATION ${ENABLE_LTO})

include(CheckCXXSourceCompiles)
set(CMAKE_REQUIRED_FLAGS "-std=c++11 -Wattributes -Werror=attributes")
check_cxx_source_compiles(
  "
  __attribute__((preserve_all))
  int Testy(int a, int b, int c, int d, int e, int f) {
  return a + b + c + d + e + f;
  }
  int main() {
  return Testy(0, 1, 2, 3, 4, 5);
  }"
  HAS_CLANG_PRESERVE_ALL)
unset(CMAKE_REQUIRED_FLAGS)
if (HAS_CLANG_PRESERVE_ALL)
  if (MINGW)
    message(STATUS "Ignoring broken clang::preserve_all support")
    set(HAS_CLANG_PRESERVE_ALL FALSE)
  else()
    message(STATUS "Has clang::preserve_all")
  endif()
endif()

if (ARCHITECTURE_arm64 AND HAS_CLANG_PRESERVE_ALL)
  add_compile_definitions("FEX_PRESERVE_ALL_ATTR=__attribute__((preserve_all))" "FEX_HAS_PRESERVE_ALL_ATTR=1")
else()
  add_compile_definitions("FEX_PRESERVE_ALL_ATTR=" "FEX_HAS_PRESERVE_ALL_ATTR=0")
endif()

check_cxx_source_compiles(
  "
  #define _GNU_SOURCE
  #include <errno.h>
  int main() {
  return program_invocation_name == nullptr;
  }"
  HAS_PROGRAM_INVOCATION_NAME)
add_compile_definitions("HAS_PROGRAM_INVOCATION_NAME=${HAS_PROGRAM_INVOCATION_NAME}")

if (ENABLE_VIXL_SIMULATOR)
  # We can run the simulator on both x86-64 or AArch64 hosts
  add_compile_definitions(VIXL_SIMULATOR=1 VIXL_INCLUDE_SIMULATOR_AARCH64=1)
endif()

if (ENABLE_CCACHE)
  find_program(CCACHE_PROGRAM ccache)
  if(CCACHE_PROGRAM)
    message(STATUS "CCache enabled")
    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
  endif()
endif()

if (ENABLE_XRAY)
  add_compile_options(-fxray-instrument)
  link_libraries(-fxray-instrument)
endif()

if (ENABLE_COMPILE_TIME_TRACE)
  add_compile_options(-ftime-trace)
  link_libraries(-ftime-trace)
endif()

set(PTHREAD_LIB pthread)

if (USE_LINKER)
  message(STATUS "Overriding linker to: ${USE_LINKER}")
  add_link_options("-fuse-ld=${USE_LINKER}")
endif()

if (ENABLE_LIBCXX)
  message(WARNING "This is an unsupported configuration and should only be used for testing")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -stdlib=libc++")
  set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
endif()

if (NOT ENABLE_OFFLINE_TELEMETRY)
  # Disable FEX offline telemetry entirely if asked
  add_compile_definitions(FEX_DISABLE_TELEMETRY=1)
endif()

if (ENABLE_UBSAN)
  # See https://github.com/FEX-Emu/FEX/pull/4494#issuecomment-2800608944
  # and related discussion for the use of -fno-sanitize=alignment -fno-sanitize=function
  # with UBSAN.
  # alignment: we don't follow a strict alignment policy, for example IR uses packed structs
  # that are regularly access unaligned.
  # function: syscalls cast function pointers to void (*)(unsigned long...), causing warnings
  # related to this access.
  add_compile_definitions(ENABLE_UBSAN=1)
  add_compile_options(-fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize=function -fno-sanitize-recover=undefined)
  link_libraries(-fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize=alignment -fno-sanitize=function -fno-sanitize-recover=undefined)
endif()

if (ENABLE_ASAN)
  add_compile_definitions(ENABLE_ASAN=1)
  add_compile_options(-fno-omit-frame-pointer -fsanitize=address -fsanitize-address-use-after-scope)
  link_libraries(-fno-omit-frame-pointer -fsanitize=address -fsanitize-address-use-after-scope)
endif()

if (ENABLE_TSAN)
  add_compile_options(-fno-omit-frame-pointer -fsanitize=thread)
  link_libraries(-fno-omit-frame-pointer -fsanitize=thread)
endif()

if (ENABLE_COVERAGE)
  add_compile_options(-fprofile-instr-generate -fcoverage-mapping)
  link_libraries(-fprofile-instr-generate -fcoverage-mapping)
endif()

if (ENABLE_JEMALLOC_GLIBC_ALLOC)
  # The glibc jemalloc subproject which hooks the glibc allocator.
  # Required for thunks to work.
  # All host native libraries will use this allocator, while *most* other FEX internal allocations will use the other jemalloc allocator.
  add_subdirectory(External/jemalloc_glibc/)
elseif (NOT MINGW)
  message(STATUS
    " jemalloc glibc allocator disabled!\n"
    " This is not a recommended configuration!\n"
    " This will very explicitly break thunk execution!\n"
    " Use at your own risk!")
endif()

if (ENABLE_FEX_ALLOCATOR)
  # The rpmalloc subproject that all FEXCore fextl objects allocate through.
  add_subdirectory(External/rpmalloc/)
elseif (NOT MINGW)
  message (STATUS
    " FEX allocator is disabled!\n"
    " This is not a recommended configuration!\n"
    " This will very explicitly break 32-bit application execution!\n"
    " Use at your own risk!")
endif()

if (USE_PDB_DEBUGINFO)
  add_compile_options(-g -gcodeview)
  add_link_options(-g -Wl,--pdb=)
endif()

set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer")
set(CMAKE_LINKER_FLAGS_RELWITHDEBINFO "${CMAKE_LINKER_FLAGS_RELWITHDEBINFO} -fno-omit-frame-pointer")

set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -fomit-frame-pointer")
set(CMAKE_LINKER_FLAGS_RELEASE "${CMAKE_LINKER_FLAGS_RELEASE} -fomit-frame-pointer")

## Modules ##
list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/Data/CMake/)

include(LinkerGC)

## Externals ##

find_package(unordered_dense QUIET CONFIG)
if (NOT unordered_dense_FOUND)
  add_subdirectory(External/unordered_dense)
endif()

include(CTest)
if (BUILD_TESTING OR ENABLE_VIXL_DISASSEMBLER OR ENABLE_VIXL_SIMULATOR)
  add_subdirectory(External/vixl/)
endif()

if (ENABLE_ZYDIS)
  find_package(Zycore 1.5 MODULE QUIET)
  find_package(Zydis 4.0 MODULE QUIET)

  if (TARGET Zydis::Zydis AND TARGET Zycore::Zycore)
    message(STATUS "Using system Zydis")
  else()
    set(ZYDIS_BUILD_TOOLS OFF CACHE BOOL "" FORCE)
    set(ZYDIS_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)

    message(STATUS "Using bundled Zydis")
    add_subdirectory(External/zydis/)
  endif()
endif()

if (ENABLE_FEXCORE_PROFILER AND FEXCORE_PROFILER_BACKEND STREQUAL "TRACY")
  add_subdirectory(External/tracy)
endif()

find_package(Python 3.9 REQUIRED COMPONENTS Interpreter)

set(BUILD_SHARED_LIBS OFF)

if (NOT CMAKE_CROSSCOMPILING)
  find_package(xxhash MODULE QUIET)
endif()

if (NOT TARGET xxHash::xxhash)
  set(XXHASH_BUNDLED_MODE TRUE)
  set(XXHASH_BUILD_XXHSUM FALSE)
  add_subdirectory(External/xxhash/cmake_unofficial/)
endif()

add_compile_options(-Wno-trigraphs)
add_compile_definitions(GLOBAL_DATA_DIRECTORY="${DATA_DIRECTORY}/")

if (BUILD_TESTING)
  find_package(Catch2 3 QUIET)
  if (NOT Catch2_FOUND)
    add_subdirectory(External/Catch2/)

    # Pull in catch_discover_tests definition
    list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/External/Catch2/contrib/")
  endif()

  include(Catch)
else ()
  # Override any previously generated test list to avoid running stale test binaries
  file(GENERATE OUTPUT CTestTestfile.cmake CONTENT "# No tests since BUILD_TESTING is disabled")
endif()

find_package(fmt QUIET)
if (NOT fmt_FOUND)
  # Disable fmt install
  set(FMT_INSTALL OFF)
  add_subdirectory(External/fmt/)
endif()

find_package(range-v3 QUIET)
if (NOT range-v3_FOUND)
  add_subdirectory(External/range-v3/)
  target_compile_definitions(range-v3 INTERFACE RANGES_DISABLE_DEPRECATED_WARNINGS)
endif()

add_subdirectory(External/tiny-json/)

include_directories(Source/)
include_directories("${CMAKE_BINARY_DIR}/Source/")

include(CheckCXXCompilerFlag)

# Add in diagnostic colours if the option is available.
# Ninja code generator will kill colours if this isn't here
check_cxx_compiler_flag(-fdiagnostics-color=always GCC_COLOR)
check_cxx_compiler_flag(-fcolor-diagnostics CLANG_COLOR)
check_cxx_compiler_flag(-Wno-deprecated-enum-enum-conversion ENUM_ENUM_WARNING)

if (GCC_COLOR)
  add_compile_options(-fdiagnostics-color=always)
endif()
if (CLANG_COLOR)
  add_compile_options(-fcolor-diagnostics)
endif()

if(ENUM_ENUM_WARNING)
  add_compile_options(-Wno-deprecated-enum-enum-conversion)
endif()

# GCC enables -Wchanges-meaning by default and treats some cases as an error
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  add_compile_options(-Wno-error=changes-meaning)
endif()

if(ENABLE_WERROR OR ENABLE_STRICT_WERROR)
  add_compile_options(-Werror)
  if (NOT ENABLE_STRICT_WERROR)
    # Disable some Werror that can add frustration when developing
    add_compile_options(-Wno-error=unused-variable)
  endif()
endif()

set(FEX_TUNE_COMPILE_FLAGS)
if (NOT TUNE_ARCH STREQUAL "generic")
  check_cxx_compiler_flag("-march=${TUNE_ARCH}" COMPILER_SUPPORTS_ARCH_TYPE)
  if(COMPILER_SUPPORTS_ARCH_TYPE)
    list(APPEND FEX_TUNE_COMPILE_FLAGS "-march=${TUNE_ARCH}")
  else()
    message(FATAL_ERROR "Trying to compile arch type '${TUNE_ARCH}' but the compiler doesn't support this")
  endif()
endif()

if (TUNE_CPU STREQUAL "native")
  if(ARCHITECTURE_arm64)
    if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 999999.0)
      # Clang 12.0 fixed the -mcpu=native bug with mixed big.little implementers
      # Clang can not currently check for native Apple M1 type in hypervisor. Currently disabled
      check_cxx_compiler_flag("-mcpu=native" COMPILER_SUPPORTS_CPU_TYPE)
      if(COMPILER_SUPPORTS_CPU_TYPE)
        list(APPEND FEX_TUNE_COMPILE_FLAGS "-mcpu=native")
      endif()
    else()
      execute_process(COMMAND python3 "${PROJECT_SOURCE_DIR}/Scripts/aarch64_fit_native.py" "/proc/cpuinfo" "${CMAKE_CXX_COMPILER_VERSION}"
        OUTPUT_VARIABLE AARCH64_CPU)

      string(STRIP ${AARCH64_CPU} AARCH64_CPU)

      execute_process(COMMAND python3 "${PROJECT_SOURCE_DIR}/Scripts/NeedDisabledSVE.py"
        RESULT_VARIABLE NEEDS_SVE_DISABLED)
      if (NEEDS_SVE_DISABLED)
        message(STATUS "Platform has bugged SVE. Disabling")
        set(AARCH64_CPU "cortex-a78")
      endif()

      check_cxx_compiler_flag("-mcpu=${AARCH64_CPU}" COMPILER_SUPPORTS_CPU_TYPE)
      if(COMPILER_SUPPORTS_CPU_TYPE)
        list(APPEND FEX_TUNE_COMPILE_FLAGS "-mcpu=${AARCH64_CPU}")
      endif()
    endif()
  else()
    check_cxx_compiler_flag("-march=native" COMPILER_SUPPORTS_MARCH_NATIVE)
    if(COMPILER_SUPPORTS_MARCH_NATIVE)
      list(APPEND FEX_TUNE_COMPILE_FLAGS "-march=native")
    endif()
  endif()
elseif (NOT TUNE_CPU STREQUAL "none")
  check_cxx_compiler_flag("-mcpu=${TUNE_CPU}" COMPILER_SUPPORTS_CPU_TYPE)
  if(COMPILER_SUPPORTS_CPU_TYPE)
    list(APPEND FEX_TUNE_COMPILE_FLAGS "-mcpu=${TUNE_CPU}")
  else()
    message(FATAL_ERROR "Trying to compile cpu type '${TUNE_CPU}' but the compiler doesn't support this")
  endif()
endif()

set(GIT_DESCRIBE_STRING "FEX-Unknown")

if (OVERRIDE_VERSION STREQUAL "detect")
  find_package(Git)

  if (GIT_FOUND)
    execute_process(
      COMMAND ${GIT_EXECUTABLE} describe --abbrev=7
      WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
      OUTPUT_VARIABLE GIT_DESCRIBE_STRING
      ERROR_QUIET
      OUTPUT_STRIP_TRAILING_WHITESPACE)
  endif()
else()
  set(GIT_DESCRIBE_STRING "${OVERRIDE_VERSION}")
endif()

set(GIT_HASH "Unknown")

if (OVERRIDE_HASH STREQUAL "detect")
  find_package(Git)

  if (GIT_FOUND)
    execute_process(
      COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
      WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
      OUTPUT_VARIABLE GIT_HASH
      ERROR_QUIET
      OUTPUT_STRIP_TRAILING_WHITESPACE)
  endif()
else()
  set(GIT_HASH "${OVERRIDE_HASH}")
endif()

message(STATUS "FEX version: ${GIT_DESCRIBE_STRING}")
message(STATUS "FEX commit: ${GIT_HASH}")

# Prepends 0x to every two-character sequence in the hash,
# OR the final character of the hash, to plumb it for C++ usage. e.g.:
# -DOVERRIDE_HASH=123456aa => 0x12, 0x34, 0x56, 0xaa,
# -DOVERRIDE_HASH=12345678a => 0x12, 0x34, 0x56, 0x78, 0xa,
string(REGEX
  REPLACE "(..|.$)" "0x\\1, "
  GIT_HASH_ARRAY "${GIT_HASH}")

if (ENABLE_IWYU)
  find_program(IWYU_EXE
    NAMES iwyu include-what-you-use)
  if (IWYU_EXE)
    message(STATUS "IWYU enabled")
    set(CMAKE_CXX_INCLUDE_WHAT_YOU_USE "${IWYU_EXE}")
  endif()
endif()

add_compile_options(-Wall)

if (BUILD_TESTING)
  message(STATUS "Unit tests are enabled")

  set(TEST_JOB_COUNT "" CACHE STRING "Override number of parallel jobs to use while running tests")
  if (TEST_JOB_COUNT)
    message(STATUS "Running tests with ${TEST_JOB_COUNT} jobs")
  elseif(CMAKE_VERSION VERSION_LESS "3.29")
    execute_process(COMMAND "nproc" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TEST_JOB_COUNT)
  endif()
  set(TEST_JOB_FLAG "-j${TEST_JOB_COUNT}")
endif()

add_subdirectory(External/SoftFloat-3e/)
add_subdirectory(External/cephes/)
add_subdirectory(FEXHeaderUtils/)
add_subdirectory(CodeEmitter/)
add_subdirectory(FEXCore/)

if (ARCHITECTURE_arm64 AND NOT MINGW AND NOT BUILD_STEAM_SUPPORT)
  # Binfmt_misc files must be installed prior to Source/ installs
  add_subdirectory(Data/binfmts/)
endif()

add_subdirectory(Source/)

if (NOT BUILD_STEAM_SUPPORT)
  add_subdirectory(Data/AppConfig/)
endif()

# Install the ThunksDB file
file(GLOB CONFIG_SOURCES CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/Data/*.json)

# Any application configuration json file gets installed
foreach(CONFIG_SRC ${CONFIG_SOURCES})
  install(FILES ${CONFIG_SRC}
    DESTINATION ${DATA_DIRECTORY}/
    COMPONENT Runtime)
endforeach()

if (BUILD_TESTING)
  add_subdirectory(unittests/)
endif()

if (BUILD_THUNKS)
  set(FEX_PROJECT_SOURCE_DIR ${PROJECT_SOURCE_DIR})
  add_subdirectory(ThunkLibs/Generator)

  # Thunk targets for both host libraries and IDE integration
  add_subdirectory(ThunkLibs/HostLibs)

  # Thunk targets for IDE integration of guest code, only
  add_subdirectory(ThunkLibs/GuestLibs)

  # Thunk targets for guest libraries
  include(ExternalProject)
  ExternalProject_Add(guest-libs
    PREFIX guest-libs
    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ThunkLibs/GuestLibs"
    BINARY_DIR "Guest"
    CMAKE_ARGS
      "-DBITNESS=64"
      "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
      "-DBUILD_FEX_LINUX_TESTS=${BUILD_FEX_LINUX_TESTS}"
      "-DENABLE_CLANG_THUNKS=${ENABLE_CLANG_THUNKS}"
      "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${X86_64_TOOLCHAIN_FILE}"
      "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
      "-DFEX_PROJECT_SOURCE_DIR=${FEX_PROJECT_SOURCE_DIR}"
      "-DGENERATOR_EXE=$<TARGET_FILE:thunkgen>"
      "-DX86_DEV_ROOTFS=${X86_DEV_ROOTFS}"
    INSTALL_COMMAND ""
    BUILD_ALWAYS ON
    DEPENDS thunkgen)

  ExternalProject_Add(guest-libs-32
    PREFIX guest-libs-32
    SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/ThunkLibs/GuestLibs"
    BINARY_DIR "Guest_32"
    CMAKE_ARGS
      "-DBITNESS=32"
      "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
      "-DBUILD_FEX_LINUX_TESTS=${BUILD_FEX_LINUX_TESTS}"
      "-DENABLE_CLANG_THUNKS=${ENABLE_CLANG_THUNKS}"
      "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${X86_32_TOOLCHAIN_FILE}"
      "-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}"
      "-DFEX_PROJECT_SOURCE_DIR=${FEX_PROJECT_SOURCE_DIR}"
      "-DGENERATOR_EXE=$<TARGET_FILE:thunkgen>"
      "-DX86_DEV_ROOTFS=${X86_DEV_ROOTFS}"
    INSTALL_COMMAND ""
    BUILD_ALWAYS ON
    DEPENDS thunkgen)

  install(
    CODE "message(\"-- Installing: guest-libs\")"
    CODE "
      execute_process(COMMAND ${CMAKE_COMMAND} --build . --target install
        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/Guest)"
    DEPENDS guest-libs
    COMPONENT Runtime)

  install(
    CODE "message(\"-- Installing: guest-libs-32\")"
    CODE "
      execute_process(COMMAND ${CMAKE_COMMAND} --build . --target install
        WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/Guest_32)"
    DEPENDS guest-libs-32
    COMPONENT Runtime)

  add_custom_target(uninstall_guest-libs
    COMMAND ${CMAKE_COMMAND} "--build" "." "--target" "uninstall"
    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/Guest)

  add_custom_target(uninstall_guest-libs-32
    COMMAND ${CMAKE_COMMAND} "--build" "." "--target" "uninstall"
    WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/Guest_32)

  add_dependencies(uninstall uninstall_guest-libs)
  add_dependencies(uninstall uninstall_guest-libs-32)
endif()

if (NOT MINGW AND BUILD_STEAM_SUPPORT)
  add_subdirectory(Source/Steam/)
endif()


================================================
FILE: CMakeSettings.json
================================================
﻿{
	"environments": [
		{
			"BuildPath": "${projectDir}\\out\\build\\${name}",
			"InstallPath": "${projectDir}\\out\\install\\${name}",
			"clangcl": "clang-cl.exe",
			"cc": "clang",
			"cxx": "clang++"
		}
	],
	"configurations": [
		{
			"name": "WSL-Clang-Debug",
			"generator": "Ninja",
			"configurationType": "Debug",
			"buildRoot": "${env.BuildPath}",
			"installRoot": "${env.InstallPath}",
			"cmakeExecutable": "/usr/bin/cmake",
			"cmakeCommandArgs": "",
			"buildCommandArgs": "-v",
			"ctestCommandArgs": "",
			"wslPath": "${defaultWSLPath}",
			"inheritEnvironments": [ "linux_clang_x64" ],
			"addressSanitizerRuntimeFlags": "detect_leaks=0",
			"variables": [
				{
					"name": "WSL",
					"value": "TRUE",
					"type": "BOOL"
				}
			]
		},
		{
			"name": "WSL-Clang-Release",
			"generator": "Ninja",
			"configurationType": "RelWithDebInfo",
			"buildRoot": "${env.BuildPath}",
			"installRoot": "${env.InstallPath}",
			"cmakeExecutable": "/usr/bin/cmake",
			"cmakeCommandArgs": "",
			"buildCommandArgs": "-v",
			"ctestCommandArgs": "",
			"wslPath": "${defaultWSLPath}",
			"inheritEnvironments": [ "linux_clang_x64" ],
			"addressSanitizerRuntimeFlags": "detect_leaks=0",
			"variables": [
				{
					"name": "WSL",
					"value": "TRUE",
					"type": "BOOL"
				}
			]
		},
		{
			"name": "x86-Clang-Cross-Debug",
			"generator": "Ninja",
			"configurationType": "Debug",
			"buildRoot": "${env.BuildPath}",
			"installRoot": "${env.InstallPath}",
			"cmakeCommandArgs": "",
			"buildCommandArgs": "-v",
			"ctestCommandArgs": "",
			"inheritEnvironments": [ "clang_cl_x86" ],
			"variables": [
				{
					"name": "CMAKE_C_COMPILER",
					"value": "${env.cc}",
					"type": "STRING"
				},
				{
					"name": "CMAKE_CXX_COMPILER",
					"value": "${env.cxx}",
					"type": "STRING"
				},
				{
					"name": "CMAKE_SYSROOT",
					"value": "${env.fexsysroot}",
					"type": "STRING"
				}
			]
		},
		{
			"name": "x64-Clang-Cross-Release",
			"generator": "Ninja",
			"configurationType": "RelWithDebInfo",
			"buildRoot": "${env.BuildPath}",
			"installRoot": "${env.InstallPath}",
			"cmakeCommandArgs": "",
			"buildCommandArgs": "-v",
			"ctestCommandArgs": "",
			"inheritEnvironments": [ "clang_cl_x86" ],
			"variables": [
				{
					"name": "CMAKE_C_COMPILER",
					"value": "${env.cc}",
					"type": "STRING"
				},
				{
					"name": "CMAKE_CXX_COMPILER",
					"value": "${env.cxx}",
					"type": "STRING"
				},
				{
					"name": "CMAKE_SYSROOT",
					"value": "${env.fexsysroot}",
					"type": "STRING"
				}
			]
		},
		{
			"name": "Linux-Clang-Remote-Debug",
			"generator": "Ninja",
			"configurationType": "Debug",
			"cmakeExecutable": "/usr/bin/cmake",
			"remoteCopySourcesExclusionList": [ ".vs", ".vscode", ".git", ".github", "build", "out", "bin" ],
			"cmakeCommandArgs": "",
			"buildCommandArgs": "-v",
			"ctestCommandArgs": "",
			"inheritEnvironments": [ "linux_clang_x64" ],
			"remoteMachineName": "${env.fexremote}",
			"remoteCMakeListsRoot": "$HOME/projects/.vs/${projectDirName}/src",
			"remoteBuildRoot": "$HOME/projects/.vs/${projectDirName}/build/${name}",
			"remoteInstallRoot": "$HOME/projects/.vs/${projectDirName}/install/${name}",
			"remoteCopySources": true,
			"rsyncCommandArgs": "-t --delete --delete-excluded",
			"remoteCopyBuildOutput": false,
			"remoteCopySourcesMethod": "rsync",
			"addressSanitizerRuntimeFlags": "detect_leaks=0",
			"variables": []
		}
	]
}


================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct

## Our Pledge

In the interest of fostering an open and welcoming environment, we as
contributors and maintainers pledge to making participation in our project and
our community a harassment-free experience for everyone, regardless of age, body
size, disability, ethnicity, sex characteristics, gender identity and expression,
level of experience, education, socio-economic status, nationality, personal
appearance, race, religion, or sexual identity and orientation.

## Our Standards

Examples of behavior that contributes to creating a positive environment
include:

* Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism
* Focusing on what is best for the community
* Showing empathy towards other community members

Examples of unacceptable behavior by participants include:

* The use of sexualized language or imagery and unwelcome sexual attention or
 advances
* Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or electronic
 address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a
 professional setting

## Our Responsibilities

Project maintainers are responsible for clarifying the standards of acceptable
behavior and are expected to take appropriate and fair corrective action in
response to any instances of unacceptable behavior.

Project maintainers have the right and responsibility to remove, edit, or
reject comments, commits, code, wiki edits, issues, and other contributions
that are not aligned to this Code of Conduct, or to ban temporarily or
permanently any contributor for other behaviors that they deem inappropriate,
threatening, offensive, or harmful.

## Scope

This Code of Conduct applies both within project spaces and in public spaces
when an individual is representing the project or its community. Examples of
representing a project or community include using an official project e-mail
address, posting via an official social media account, or acting as an appointed
representative at an online or offline event. Representation of a project may be
further defined and clarified by project maintainers.

## Enforcement

Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported by contacting the project team at team@fex-emu.com. All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
Further details of specific enforcement policies may be posted separately.

Project maintainers who do not follow or enforce the Code of Conduct in good
faith may face temporary or permanent repercussions as determined by other
members of the project's leadership.

## Attribution

This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html

[homepage]: https://www.contributor-covenant.org

For answers to common questions about this code of conduct, see
https://www.contributor-covenant.org/faq


================================================
FILE: CodeEmitter/CMakeLists.txt
================================================
add_library(CodeEmitter INTERFACE)
target_include_directories(CodeEmitter INTERFACE .)


================================================
FILE: CodeEmitter/CodeEmitter/ALUOps.inl
================================================
// SPDX-License-Identifier: MIT
/* ALU instruction emitters.
 *
 * Almost all of these operations have `ARMEmitter::Size` as their first argument.
 * This allows both 32-bit and 64-bit selection of how that instruction is going to operate.
 *
 * Some emitter operations explicitly use `XRegister` or `WRegister`.
 * This is usually due to the instruction only supporting one operating size.
 * Although in some cases is a minor convenience without any performance implications.
 *
 * FEX-Emu ALU operations usually have a 32-bit or 64-bit operating size encoded in the IR operation,
 * This allows FEX to use a single helper function which decodes to both handlers.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

private:
  static bool IsADRRange(int64_t Imm) {
    return Imm >= -1048576 && Imm <= 1048575;
  }
  static bool IsADRPRange(int64_t Imm) {
    return Imm >= -4294967296 && Imm <= 4294963200;
  }
  static bool IsADRPAligned(int64_t Imm) {
    return (Imm & 0xFFF) == 0;
  }
public:
  // PC relative
  void adr(ARMEmitter::Register rd, uint32_t Imm) {
    constexpr uint32_t Op = 0b0001'0000 << 24;
    DataProcessing_PCRel_Imm(Op, rd, Imm);
  }

  [[nodiscard]] BranchEncodeSucceeded adr(ARMEmitter::Register rd, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    if (IsADRRange(Imm)) {
      constexpr uint32_t Op = 0b0001'0000 << 24;
      DataProcessing_PCRel_Imm(Op, rd, Imm);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }
  [[nodiscard]] BranchEncodeSucceeded adr(ARMEmitter::Register rd, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::ADR});
    constexpr uint32_t Op = 0b0001'0000 << 24;
    DataProcessing_PCRel_Imm(Op, rd, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded adr(ARMEmitter::Register rd, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return adr(rd, &Label->Backward);
    } else {
      return adr(rd, &Label->Forward);
    }
  }

  void adrp(ARMEmitter::Register rd, uint32_t Imm) {
    constexpr uint32_t Op = 0b1001'0000 << 24;
    DataProcessing_PCRel_Imm(Op, rd, Imm);
  }

  [[nodiscard]] BranchEncodeSucceeded adrp(ARMEmitter::Register rd, const BackwardLabel* Label) {
    int64_t Imm = reinterpret_cast<int64_t>(Label->Location) - (GetCursorAddress<int64_t>() & ~0xFFFLL);

    if (IsADRPRange(Imm) && IsADRPAligned(Imm)) {
      constexpr uint32_t Op = 0b1001'0000 << 24;
      DataProcessing_PCRel_Imm(Op, rd, Imm);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded adrp(ARMEmitter::Register rd, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::ADRP});
    constexpr uint32_t Op = 0b1001'0000 << 24;
    DataProcessing_PCRel_Imm(Op, rd, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded adrp(ARMEmitter::Register rd, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return adrp(rd, &Label->Backward);
    } else {
      return adrp(rd, &Label->Forward);
    }
  }

  [[nodiscard]] BranchEncodeSucceeded LongAddressGen(ARMEmitter::Register rd, const BackwardLabel* Label) {
    const auto SLocation = reinterpret_cast<int64_t>(Label->Location);
    const auto ULocation = std::bit_cast<uint64_t>(SLocation);

    const int64_t Imm = SLocation - (GetCursorAddress<int64_t>());
    const auto UImm = std::bit_cast<uint64_t>(Imm);

    if (IsADRRange(Imm)) {
      // If the range is in ADR range then we can just use ADR.
      return adr(rd, Label);
    }
    if (IsADRPRange(Imm)) {
      const int64_t ADRPImm = (SLocation & ~0xFFFLL) - (GetCursorAddress<int64_t>() & ~0xFFFLL);

      // If the range is in the ADRP range then we can use ADRP.
      const bool NeedsOffset = !IsADRPAligned(ULocation);
      const uint64_t AlignedOffset = ULocation & 0xFFFULL;

      // First emit ADRP
      adrp(rd, ADRPImm >> 12);

      if (NeedsOffset) {
        // Now even an add
        add(ARMEmitter::Size::i64Bit, rd, rd, AlignedOffset);
      }

      return BranchEncodeSucceeded::Success;
    }

    // Stinky path, we need to load the address as a sequence of movz+movk+movk
    movz(ARMEmitter::Size::i64Bit, rd, (UImm >> 32) & 0xFFFF, 32);
    movk(ARMEmitter::Size::i64Bit, rd, (UImm >> 16) & 0xFFFF, 16);
    movk(ARMEmitter::Size::i64Bit, rd, UImm & 0xFFFF);

    return BranchEncodeSucceeded::Success;
  }
  [[nodiscard]] BranchEncodeSucceeded LongAddressGen(ARMEmitter::Register rd, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::LONG_ADDRESS_GEN});
    // Emit a register index and two nops. These will be backpatched.
    dc32(rd.Idx());
    nop();
    nop();

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded LongAddressGen(ARMEmitter::Register rd, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return LongAddressGen(rd, &Label->Backward);
    } else {
      return LongAddressGen(rd, &Label->Forward);
    }
  }

  // Add/subtract immediate
  void add(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    constexpr uint32_t Op = 0b0001'0001'0 << 23;
    DataProcessing_AddSub_Imm(Op, s, rd, rn, Imm, LSL12);
  }

  void adds(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    constexpr uint32_t Op = 0b0011'0001'0 << 23;
    DataProcessing_AddSub_Imm(Op, s, rd, rn, Imm, LSL12);
  }
  void cmn(ARMEmitter::Size s, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    adds(s, ARMEmitter::Reg::zr, rn, Imm, LSL12);
  }
  void sub(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    constexpr uint32_t Op = 0b0101'0001'0 << 23;
    DataProcessing_AddSub_Imm(Op, s, rd, rn, Imm, LSL12);
  }

  void cmp(ARMEmitter::Size s, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    constexpr uint32_t Op = 0b0111'0001'0 << 23;
    DataProcessing_AddSub_Imm(Op, s, ARMEmitter::Reg::rsp, rn, Imm, LSL12);
  }

  void subs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm, bool LSL12 = false) {
    constexpr uint32_t Op = 0b0111'0001'0 << 23;
    DataProcessing_AddSub_Imm(Op, s, rd, rn, Imm, LSL12);
  }

  // Min/max immediate
  void smax(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, int64_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -128 && Imm <= 127, "{} Immediate too large", __func__);
    MinMaxImmediate(0b0000, s, rd, rn, Imm);
  }

  void umax(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    LOGMAN_THROW_A_FMT(Imm <= 255, "{} Immediate too large", __func__);
    MinMaxImmediate(0b0001, s, rd, rn, Imm);
  }

  void smin(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, int64_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -128 && Imm <= 127, "{} Immediate too large", __func__);
    MinMaxImmediate(0b0010, s, rd, rn, Imm);
  }

  void umin(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    LOGMAN_THROW_A_FMT(Imm <= 255, "{} Immediate too large", __func__);
    MinMaxImmediate(0b0011, s, rd, rn, Imm);
  }

  // Logical immediate
  void and_(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    uint32_t n, immr, imms;
    const auto IsImm = IsImmLogical(Imm, RegSizeInBits(s), &n, &imms, &immr);
    LOGMAN_THROW_A_FMT(IsImm, "Couldn't encode immediate to logical op");
    and_(s, rd, rn, n, immr, imms);
  }

  void bic(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    and_(s, rd, rn, ~Imm);
  }

  void ands(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    uint32_t n, immr, imms;
    const auto IsImm = IsImmLogical(Imm, RegSizeInBits(s), &n, &imms, &immr);
    LOGMAN_THROW_A_FMT(IsImm, "Couldn't encode immediate to logical op");
    ands(s, rd, rn, n, immr, imms);
  }

  void bics(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    ands(s, rd, rn, ~Imm);
  }

  void orr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    uint32_t n, immr, imms;
    const auto IsImm = IsImmLogical(Imm, RegSizeInBits(s), &n, &imms, &immr);
    LOGMAN_THROW_A_FMT(IsImm, "Couldn't encode immediate to logical op");
    orr(s, rd, rn, n, immr, imms);
  }

  void eor(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    uint32_t n, immr, imms;
    const auto IsImm = IsImmLogical(Imm, RegSizeInBits(s), &n, &imms, &immr);
    LOGMAN_THROW_A_FMT(IsImm, "Couldn't encode immediate to logical op");
    eor(s, rd, rn, n, immr, imms);
  }

  void tst(ARMEmitter::Size s, Register rn, uint64_t imm) {
    ands(s, Reg::zr, rn, imm);
  }

  // Move wide immediate
  void movn(ARMEmitter::Size s, ARMEmitter::Register rd, uint32_t Imm, uint32_t Offset = 0) {
    LOGMAN_THROW_A_FMT((Imm & 0xFFFF0000U) == 0, "Upper bits of move wide not valid");
    LOGMAN_THROW_A_FMT((Offset % 16) == 0, "Offset must be 16bit aligned");

    constexpr uint32_t Op = 0b001'0010'100 << 21;
    DataProcessing_MoveWide(Op, s, rd, Imm, Offset >> 4);
  }
  void mov(ARMEmitter::Size s, ARMEmitter::Register rd, uint32_t Imm) {
    movz(s, rd, Imm, 0);
  }
  void mov(ARMEmitter::XRegister rd, uint32_t Imm) {
    movz(ARMEmitter::Size::i64Bit, rd.R(), Imm, 0);
  }
  void mov(ARMEmitter::WRegister rd, uint32_t Imm) {
    movz(ARMEmitter::Size::i32Bit, rd.R(), Imm, 0);
  }

  void movz(ARMEmitter::Size s, ARMEmitter::Register rd, uint32_t Imm, uint32_t Offset = 0) {
    LOGMAN_THROW_A_FMT((Imm & 0xFFFF0000U) == 0, "Upper bits of move wide not valid");
    LOGMAN_THROW_A_FMT((Offset % 16) == 0, "Offset must be 16bit aligned");

    constexpr uint32_t Op = 0b101'0010'100 << 21;
    DataProcessing_MoveWide(Op, s, rd, Imm, Offset >> 4);
  }
  void movk(ARMEmitter::Size s, ARMEmitter::Register rd, uint32_t Imm, uint32_t Offset = 0) {
    LOGMAN_THROW_A_FMT((Imm & 0xFFFF0000U) == 0, "Upper bits of move wide not valid");
    LOGMAN_THROW_A_FMT((Offset % 16) == 0, "Offset must be 16bit aligned");

    constexpr uint32_t Op = 0b111'0010'100 << 21;
    DataProcessing_MoveWide(Op, s, rd, Imm, Offset >> 4);
  }

  void movn(ARMEmitter::XRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movn(ARMEmitter::Size::i64Bit, rd.R(), Imm, Offset);
  }
  void movz(ARMEmitter::XRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movz(ARMEmitter::Size::i64Bit, rd.R(), Imm, Offset);
  }
  void movk(ARMEmitter::XRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movk(ARMEmitter::Size::i64Bit, rd.R(), Imm, Offset);
  }
  void movn(ARMEmitter::WRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movn(ARMEmitter::Size::i32Bit, rd.R(), Imm, Offset);
  }
  void movz(ARMEmitter::WRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movz(ARMEmitter::Size::i32Bit, rd.R(), Imm, Offset);
  }
  void movk(ARMEmitter::WRegister rd, uint32_t Imm, uint32_t Offset = 0) {
    movk(ARMEmitter::Size::i32Bit, rd.R(), Imm, Offset);
  }

  // Bitfield
  void sxtb(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    sbfm(s, rd, rn, 0, 7);
  }
  void sxth(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    sbfm(s, rd, rn, 0, 15);
  }
  void sxtw(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn) {
    sbfm(ARMEmitter::Size::i64Bit, rd, rn.X(), 0, 31);
  }
  void sbfx(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t lsb, uint32_t width) {
    LOGMAN_THROW_A_FMT(width > 0, "sbfx needs width > 0");
    LOGMAN_THROW_A_FMT((lsb + width) <= RegSizeInBits(s), "Tried to sbfx a region larger than the register");
    sbfm(s, rd, rn, lsb, lsb + width - 1);
  }
  void sbfiz(ARMEmitter::Size s, Register rd, Register rn, uint32_t lsb, uint32_t width) {
    xbfiz_helper(true, s, rd, rn, lsb, width);
  }
  void asr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t shift) {
    const auto RegSize_m1 = RegSizeInBits(s) - 1;
    shift &= RegSize_m1;
    sbfm(s, rd, rn, shift, RegSize_m1);
  }

  void uxtb(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    ubfm(s, rd, rn, 0, 7);
  }
  void uxth(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    ubfm(s, rd, rn, 0, 15);
  }
  void uxtw(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    ubfm(s, rd, rn, 0, 31);
  }

  void ubfm(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b0101'0011'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, s == ARMEmitter::Size::i64Bit, immr, imms);
  }

  void ubfiz(ARMEmitter::Size s, Register rd, Register rn, uint32_t lsb, uint32_t width) {
    xbfiz_helper(false, s, rd, rn, lsb, width);
  }

  void lsl(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t shift) {
    const auto RegSize_m1 = RegSizeInBits(s) - 1;
    shift &= RegSize_m1;
    ubfm(s, rd, rn, (RegSizeInBits(s) - shift) & RegSize_m1, RegSize_m1 - shift);
  }
  void lsr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t shift) {
    const auto RegSize_m1 = RegSizeInBits(s) - 1;
    shift &= RegSize_m1;
    ubfm(s, rd, rn, shift, RegSize_m1);
  }
  void ubfx(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t lsb, uint32_t width) {
    LOGMAN_THROW_A_FMT(width > 0, "ubfx needs width > 0");
    LOGMAN_THROW_A_FMT((lsb + width) <= RegSizeInBits(s), "Tried to ubfx a region larger than the register");
    ubfm(s, rd, rn, lsb, lsb + width - 1);
  }

  void bfi(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t lsb, uint32_t width) {
    const auto RegSize = RegSizeInBits(s);
    LOGMAN_THROW_A_FMT(width > 0, "bfc/bfi needs width > 0");
    LOGMAN_THROW_A_FMT((lsb + width) <= RegSize, "Tried to bfc/bfi a region larger than the register");
    bfm(s, rd, rn, (RegSize - lsb) & (RegSize - 1), width - 1);
  }
  void bfc(ARMEmitter::Size s, Register rd, uint32_t lsb, uint32_t width) {
    bfi(s, rd, Reg::zr, lsb, width);
  }
  void bfxil(ARMEmitter::Size s, Register rd, Register rn, uint32_t lsb, uint32_t width) {
    const auto reg_size_bits = RegSizeInBits(s);
    const auto lsb_p_width = lsb + width;

    LOGMAN_THROW_A_FMT(width >= 1, "bfxil needs width >= 1");
    LOGMAN_THROW_A_FMT(lsb_p_width <= reg_size_bits, "bfxil lsb + width ({}) must be <= {}. lsb={}, width={}", lsb_p_width, reg_size_bits,
                       lsb, width);

    bfm(s, rd, rn, lsb, lsb_p_width - 1);
  }

  // Extract
  void extr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, uint32_t Imm) {
    constexpr uint32_t Op = 0b001'0011'100 << 21;
    LOGMAN_THROW_A_FMT(Imm < RegSizeInBits(s), "Tried to extr a region larger than the register");
    DataProcessing_Extract(Op, s, rd, rn, rm, Imm);
  }

  void ror(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm) {
    Imm &= RegSizeInBits(s) - 1;
    extr(s, rd, rn, rn, Imm);
  }

  // Data processing - 2 source
  void udiv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0000'10U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void sdiv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0000'11U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }

  void lslv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0010'00U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void lsrv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0010'01U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void asrv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0010'10U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void rorv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0010'11U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void crc32b(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0100'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void crc32h(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0100'01U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void crc32w(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0100'10U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void crc32cb(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0101'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void crc32ch(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0101'01U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void crc32cw(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0101'10U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i32Bit, rd, rn, rm);
  }
  void smax(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0110'00U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void umax(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0110'01U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void smin(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0110'10U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void umin(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0110'11U << 10);
    DataProcessing_2Source(Op, s, rd, rn, rm);
  }
  void subp(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0000'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void irg(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0001'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void gmi(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0001'01U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void pacga(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0011'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void crc32x(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0100'11U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void crc32cx(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b001'1010'110U << 21) | (0b0101'11U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }
  void subps(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = (0b011'1010'110U << 21) | (0b0000'00U << 10);
    DataProcessing_2Source(Op, ARMEmitter::Size::i64Bit, rd, rn, rm);
  }

  // Data processing - 1 source
  void rbit(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'00U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void rev16(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'01U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void rev(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'10U << 10);
    DataProcessing_1Source(Op, ARMEmitter::Size::i32Bit, rd, rn);
  }
  void rev32(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'10U << 10);
    DataProcessing_1Source(Op, ARMEmitter::Size::i64Bit, rd, rn);
  }
  void clz(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0001'00U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void cls(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0001'01U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void rev(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'11U << 10);
    DataProcessing_1Source(Op, ARMEmitter::Size::i64Bit, rd, rn);
  }
  void rev(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0000'10U << 10) | (s == ARMEmitter::Size::i64Bit ? (1U << 10) : 0);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void ctz(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0001'10U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void cnt(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0001'11U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }
  void abs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    constexpr uint32_t Op = (0b101'1010'110U << 21) | (0b0'0000U << 16) | (0b0010'00U << 10);
    DataProcessing_1Source(Op, s, rd, rn);
  }

  // TODO: PAUTH

  // Logical - shifted register
  void mov(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn) {
    orr(s, rd, ARMEmitter::Reg::zr, rn, ARMEmitter::ShiftType::LSL, 0);
  }
  void mov(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn) {
    orr(ARMEmitter::Size::i64Bit, rd.R(), ARMEmitter::Reg::zr, rn.R(), ARMEmitter::ShiftType::LSL, 0);
  }
  void mov(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn) {
    orr(ARMEmitter::Size::i32Bit, rd.R(), ARMEmitter::Reg::zr, rn.R(), ARMEmitter::ShiftType::LSL, 0);
  }

  void mvn(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL,
           uint32_t amt = 0) {
    orn(s, rd, ARMEmitter::Reg::zr, rn, Shift, amt);
  }

  void and_(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b000'1010'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void ands(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b110'1010'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void bic(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b000'1010'001U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void bics(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b110'1010'001U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void orr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b010'1010'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void tst(ARMEmitter::Size s, Register rn, Register rm, ShiftType shift = ShiftType::LSL, uint32_t amt = 0) {
    ands(s, Reg::zr, rn, rm, shift, amt);
  }

  void orn(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b010'1010'001U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void eor(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b100'1010'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void eon(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    constexpr uint32_t Op = 0b100'1010'001U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }

  // AddSub - shifted register
  void add(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    add(ARMEmitter::Size::i64Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void adds(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    adds(ARMEmitter::Size::i64Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void cmn(ARMEmitter::XRegister rn, ARMEmitter::XRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    adds(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::zr, rn.R(), rm.R(), Shift, amt);
  }
  void sub(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    sub(ARMEmitter::Size::i64Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void neg(ARMEmitter::XRegister rd, ARMEmitter::XRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    sub(rd, ARMEmitter::XReg::zr, rm, Shift, amt);
  }
  void cmp(ARMEmitter::XRegister rn, ARMEmitter::XRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, rn.R(), rm.R(), Shift, amt);
  }
  void subs(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(ARMEmitter::Size::i64Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void negs(ARMEmitter::XRegister rd, ARMEmitter::XRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(rd, ARMEmitter::XReg::zr, rm, Shift, amt);
  }

  void add(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    add(ARMEmitter::Size::i32Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void adds(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    adds(ARMEmitter::Size::i32Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void cmn(ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    adds(ARMEmitter::Size::i32Bit, ARMEmitter::WReg::zr, rn.R(), rm.R(), Shift, amt);
  }
  void sub(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    sub(ARMEmitter::Size::i32Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void neg(ARMEmitter::WRegister rd, ARMEmitter::WRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    sub(rd, ARMEmitter::WReg::zr, rm, Shift, amt);
  }
  void cmp(ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::rsp, rn.R(), rm.R(), Shift, amt);
  }
  void subs(ARMEmitter::WRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(ARMEmitter::Size::i32Bit, rd.R(), rn.R(), rm.R(), Shift, amt);
  }
  void negs(ARMEmitter::WRegister rd, ARMEmitter::WRegister rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    subs(rd, ARMEmitter::WReg::zr, rm, Shift, amt);
  }

  void add(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    LOGMAN_THROW_A_FMT(Shift != ARMEmitter::ShiftType::ROR, "Doesn't support ROR");
    constexpr uint32_t Op = 0b000'1011'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void adds(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    LOGMAN_THROW_A_FMT(Shift != ARMEmitter::ShiftType::ROR, "Doesn't support ROR");
    constexpr uint32_t Op = 0b010'1011'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void cmn(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL,
           uint32_t amt = 0) {
    adds(s, ARMEmitter::Reg::zr, rn, rm, Shift, amt);
  }
  void sub(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
           ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    LOGMAN_THROW_A_FMT(Shift != ARMEmitter::ShiftType::ROR, "Doesn't support ROR");
    constexpr uint32_t Op = 0b100'1011'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void neg(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL,
           uint32_t amt = 0) {
    sub(s, rd, ARMEmitter::Reg::zr, rm, Shift, amt);
  }
  void cmp(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL,
           uint32_t amt = 0) {
    subs(s, ARMEmitter::Reg::zr, rn, rm, Shift, amt);
  }

  void subs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
            ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL, uint32_t amt = 0) {
    LOGMAN_THROW_A_FMT(Shift != ARMEmitter::ShiftType::ROR, "Doesn't support ROR");
    constexpr uint32_t Op = 0b110'1011'000U << 21;
    DataProcessing_Shifted_Reg(Op, s, rd, rn, rm, Shift, amt);
  }
  void negs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rm, ARMEmitter::ShiftType Shift = ARMEmitter::ShiftType::LSL,
            uint32_t amt = 0) {
    subs(s, rd, ARMEmitter::Reg::zr, rm, Shift, amt);
  }

  // AddSub - extended register
  void add(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option,
           uint32_t Shift = 0) {
    LOGMAN_THROW_A_FMT(Shift <= 4, "Shift amount is too large");
    constexpr uint32_t Op = 0b000'1011'001U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, Option, Shift);
  }
  void adds(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option,
            uint32_t Shift = 0) {
    constexpr uint32_t Op = 0b010'1011'001U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, Option, Shift);
  }
  void cmn(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift = 0) {
    adds(s, ARMEmitter::Reg::zr, rn, rm, Option, Shift);
  }
  void sub(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option,
           uint32_t Shift = 0) {
    constexpr uint32_t Op = 0b100'1011'001U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, Option, Shift);
  }
  void subs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option,
            uint32_t Shift = 0) {
    constexpr uint32_t Op = 0b110'1011'001U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, Option, Shift);
  }
  void cmp(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift = 0) {
    constexpr uint32_t Op = 0b110'1011'001U << 21;
    DataProcessing_Extended_Reg(Op, s, ARMEmitter::Reg::zr, rn, rm, Option, Shift);
  }

  // AddSub - with carry
  void adc(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = 0b0001'1010'000U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, ARMEmitter::ExtendedType::UXTB, 0);
  }
  void adcs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = 0b0011'1010'000U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, ARMEmitter::ExtendedType::UXTB, 0);
  }
  void sbc(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = 0b0101'1010'000U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, ARMEmitter::ExtendedType::UXTB, 0);
  }
  void sbcs(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Op = 0b0111'1010'000U << 21;
    DataProcessing_Extended_Reg(Op, s, rd, rn, rm, ARMEmitter::ExtendedType::UXTB, 0);
  }
  void ngc(ARMEmitter::Size s, Register rd, Register rm) {
    sbc(s, rd, Reg::zr, rm);
  }
  void ngcs(ARMEmitter::Size s, Register rd, Register rm) {
    sbcs(s, rd, Reg::zr, rm);
  }

  // Rotate right into flags
  void rmif(XRegister rn, uint32_t shift, uint32_t mask) {
    LOGMAN_THROW_A_FMT(shift <= 63, "Shift must be within 0-63. Shift: {}", shift);
    LOGMAN_THROW_A_FMT(mask <= 15, "Mask must be within 0-15. Mask: {}", mask);

    uint32_t Op = 0b1011'1010'0000'0000'0000'0100'0000'0000;
    Op |= rn.Idx() << 5;
    Op |= shift << 15;
    Op |= mask;

    dc32(Op);
  }

  // Evaluate into flags
  void setf8(WRegister rn) {
    constexpr uint32_t Op = 0b0011'1010'0000'0000'0000'1000'0000'1101;
    EvaluateIntoFlags(Op, 0, rn);
  }
  void setf16(WRegister rn) {
    constexpr uint32_t Op = 0b0011'1010'0000'0000'0000'1000'0000'1101;
    EvaluateIntoFlags(Op, 1, rn);
  }

  void cfinv() {
    constexpr uint32_t Op = 0b1101'0101'0000'0000'0100'0000'0001'1111;
    dc32(Op);
  }

  void axflag() {
    constexpr uint32_t Op = 0b1101'0101'0000'0000'0100'0000'0101'1111;
    dc32(Op);
  }

  void xaflag() {
    constexpr uint32_t Op = 0b1101'0101'0000'0000'0100'0000'0011'1111;
    dc32(Op);
  }

  // Conditional compare - register
  void ccmn(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::StatusFlags flags, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0011'1010'010 << 21;
    ConditionalCompare(Op, 0, 0b00, 0, s, rn, rm, flags, Cond);
  }
  void ccmp(ARMEmitter::Size s, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::StatusFlags flags, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0011'1010'010 << 21;
    ConditionalCompare(Op, 1, 0b00, 0, s, rn, rm, flags, Cond);
  }

  // Conditional compare - immediate
  void ccmn(ARMEmitter::Size s, ARMEmitter::Register rn, uint32_t rm, ARMEmitter::StatusFlags flags, ARMEmitter::Condition Cond) {
    LOGMAN_THROW_A_FMT((rm & ~0b1'1111) == 0, "Comparison imm too large");
    constexpr uint32_t Op = 0b0011'1010'010 << 21;
    ConditionalCompare(Op, 0, 0b10, 0, s, rn, rm, flags, Cond);
  }
  void ccmp(ARMEmitter::Size s, ARMEmitter::Register rn, uint32_t rm, ARMEmitter::StatusFlags flags, ARMEmitter::Condition Cond) {
    LOGMAN_THROW_A_FMT((rm & ~0b1'1111) == 0, "Comparison imm too large");
    constexpr uint32_t Op = 0b0011'1010'010 << 21;
    ConditionalCompare(Op, 1, 0b10, 0, s, rn, rm, flags, Cond);
  }

  // Conditional select
  void csel(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0001'1010'100 << 21;
    ConditionalCompare(Op, 0, 0b00, s, rd, rn, rm, Cond);
  }
  void cset(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0001'1010'100 << 21;
    ConditionalCompare(Op, 0, 0b01, s, rd, ARMEmitter::Reg::zr, ARMEmitter::Reg::zr,
                       static_cast<ARMEmitter::Condition>(FEXCore::ToUnderlying(Cond) ^ FEXCore::ToUnderlying(ARMEmitter::Condition::CC_NE)));
  }
  void csinc(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0001'1010'100 << 21;
    ConditionalCompare(Op, 0, 0b01, s, rd, rn, rm, Cond);
  }
  void csinv(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0001'1010'100 << 21;
    ConditionalCompare(Op, 1, 0b00, s, rd, rn, rm, Cond);
  }
  void csneg(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Condition Cond) {
    constexpr uint32_t Op = 0b0001'1010'100 << 21;
    ConditionalCompare(Op, 1, 0b01, s, rd, rn, rm, Cond);
  }
  void cneg(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Condition Cond) {
    csneg(s, rd, rn, rn, InvertCondition(Cond));
  }
  void cinc(ARMEmitter::Size s, Register rd, Register rn, Condition cond) {
    csinc(s, rd, rn, rn, InvertCondition(cond));
  }
  void cinv(ARMEmitter::Size s, Register rd, Register rn, Condition cond) {
    csinv(s, rd, rn, rn, InvertCondition(cond));
  }
  void csetm(ARMEmitter::Size s, Register rd, Condition cond) {
    csinv(s, rd, Reg::zr, Reg::zr, InvertCondition(cond));
  }

  // Data processing - 3 source
  void madd(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Register ra) {
    constexpr uint32_t Op = 0b001'1011'000U << 21;
    DataProcessing_3Source(Op, 0, s, rd, rn, rm, ra);
  }
  void mul(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    madd(s, rd, rn, rm, XReg::zr);
  }
  void msub(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::Register ra) {
    constexpr uint32_t Op = 0b001'1011'000U << 21;
    DataProcessing_3Source(Op, 1, s, rd, rn, rm, ra);
  }
  void mneg(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    msub(s, rd, rn, rm, XReg::zr);
  }
  void smaddl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::XRegister ra) {
    constexpr uint32_t Op = 0b001'1011'001U << 21;
    DataProcessing_3Source(Op, 0, ARMEmitter::Size::i64Bit, rd, rn, rm, ra);
  }
  void smull(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    smaddl(rd, rn, rm, XReg::zr);
  }
  void smsubl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::XRegister ra) {
    constexpr uint32_t Op = 0b001'1011'001U << 21;
    DataProcessing_3Source(Op, 1, ARMEmitter::Size::i64Bit, rd, rn, rm, ra);
  }
  void smnegl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    smsubl(rd, rn, rm, XReg::zr);
  }
  void smulh(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = 0b001'1011'010U << 21;
    DataProcessing_3Source(Op, 0, ARMEmitter::Size::i64Bit, rd, rn, rm, ARMEmitter::Reg::zr);
  }
  void umaddl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::XRegister ra) {
    constexpr uint32_t Op = 0b001'1011'101U << 21;
    DataProcessing_3Source(Op, 0, ARMEmitter::Size::i64Bit, rd, rn, rm, ra);
  }
  void umull(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    umaddl(rd, rn, rm, XReg::zr);
  }
  void umsubl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm, ARMEmitter::XRegister ra) {
    constexpr uint32_t Op = 0b001'1011'101U << 21;
    DataProcessing_3Source(Op, 1, ARMEmitter::Size::i64Bit, rd, rn, rm, ra);
  }
  void umnegl(ARMEmitter::XRegister rd, ARMEmitter::WRegister rn, ARMEmitter::WRegister rm) {
    umsubl(rd, rn, rm, XReg::zr);
  }
  void umulh(ARMEmitter::XRegister rd, ARMEmitter::XRegister rn, ARMEmitter::XRegister rm) {
    constexpr uint32_t Op = 0b001'1011'110U << 21;
    DataProcessing_3Source(Op, 0, ARMEmitter::Size::i64Bit, rd, rn, rm, ARMEmitter::Reg::zr);
  }

private:
  void and_(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t n, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b001'0010'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, n, immr, imms);
  }
  void ands(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t n, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b111'0010'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, n, immr, imms);
  }
  void orr(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t n, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b011'0010'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, n, immr, imms);
  }
  void eor(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t n, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b101'0010'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, n, immr, imms);
  }

  void sbfm(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b0001'0011'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, s == ARMEmitter::Size::i64Bit, immr, imms);
  }
  void bfm(ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t immr, uint32_t imms) {
    constexpr uint32_t Op = 0b0011'0011'00 << 22;
    DataProcessing_Logical_Imm(Op, s, rd, rn, s == ARMEmitter::Size::i64Bit, immr, imms);
  }
  // 4.1.64 - Data processing - Immediate
  void DataProcessing_PCRel_Imm(uint32_t Op, ARMEmitter::Register rd, uint32_t Imm) {
    // Ensure the immediate is masked.
    Imm &= 0b1'1111'1111'1111'1111'1111U;

    uint32_t Instr = Op;

    Instr |= (Imm & 0b11) << 29;
    Instr |= (Imm >> 2) << 5;
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  void DataProcessing_AddSub_Imm(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t Imm, bool LSL12) {
    bool TooLarge = (Imm & ~0b1111'1111'1111U) != 0;
    if (TooLarge && !LSL12 && ((Imm >> 12) & ~0b1111'1111'1111U) == 0) {
      // We can convert an immediate
      TooLarge = false;
      LSL12 = true;
      Imm >>= 12;
    }
    LOGMAN_THROW_A_FMT(TooLarge == false, "Imm amount too large: 0x{:x}", Imm);

    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= LSL12 << 22;
    Instr |= Imm << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Min/max immediate
  void MinMaxImmediate(uint32_t opc, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint64_t Imm) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = 0b1'0001'11U << 22;

    Instr |= SF;
    Instr |= opc << 18;
    Instr |= (Imm & 0xFF) << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Move Wide
  void DataProcessing_MoveWide(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, uint32_t Imm, uint32_t Offset) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= Imm << 5;
    Instr |= Offset << 21;
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Logical immediate
  void DataProcessing_Logical_Imm(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, uint32_t n,
                                  uint32_t immr, uint32_t imms) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= n << 22;
    Instr |= immr << 16;
    Instr |= imms << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  void xbfiz_helper(bool is_signed, ARMEmitter::Size s, Register rd, Register rn, uint32_t lsb, uint32_t width) {
    const auto lsb_p_width = lsb + width;
    const auto reg_size_bits = RegSizeInBits(s);

    LOGMAN_THROW_A_FMT(lsb_p_width <= reg_size_bits, "lsb + width ({}) must be <= {}. lsb={}, width={}", lsb_p_width, reg_size_bits, lsb, width);
    LOGMAN_THROW_A_FMT(width >= 1, "xbfiz width must be >= 1");

    const auto immr = (reg_size_bits - lsb) & (reg_size_bits - 1);
    const auto imms = width - 1;

    if (is_signed) {
      sbfm(s, rd, rn, immr, imms);
    } else {
      ubfm(s, rd, rn, immr, imms);
    }
  }

  void DataProcessing_Extract(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm,
                              uint32_t Imm) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    // Current ARMv8 spec hardcodes SF == N for this class of instructions.
    // Anythign else is undefined behaviour.
    const uint32_t N = s == ARMEmitter::Size::i64Bit ? (1U << 22) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= N;
    Instr |= Encode_rm(rm);
    Instr |= Imm << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Data-processing - 2 source
  void DataProcessing_2Source(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= Encode_rm(rm);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Data processing - 1 source
  template<typename T>
  void DataProcessing_1Source(uint32_t Op, ARMEmitter::Size s, T rd, T rn) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // AddSub - shifted register
  void DataProcessing_Shifted_Reg(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn,
                                  ARMEmitter::Register rm, ARMEmitter::ShiftType Shift, uint32_t amt) {
    LOGMAN_THROW_A_FMT((amt & ~0b11'1111U) == 0, "Shift amount too large");
    if (s == ARMEmitter::Size::i32Bit) {
      LOGMAN_THROW_A_FMT(amt < 32, "Shift amount for 32-bit must be below 32");
    }

    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= FEXCore::ToUnderlying(Shift) << 22;
    Instr |= Encode_rm(rm);
    Instr |= static_cast<uint32_t>(amt) << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // AddSub - extended register
  void DataProcessing_Extended_Reg(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn,
                                   ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= Encode_rm(rm);
    Instr |= FEXCore::ToUnderlying(Option) << 13;
    Instr |= static_cast<uint32_t>(Shift) << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }
  // Conditional compare - register
  template<typename T>
  void ConditionalCompare(uint32_t Op, uint32_t o1, uint32_t o2, uint32_t o3, ARMEmitter::Size s, ARMEmitter::Register rn, T rm,
                          ARMEmitter::StatusFlags flags, ARMEmitter::Condition Cond) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= o1 << 30;
    Instr |= Encode_rm(rm);
    Instr |= FEXCore::ToUnderlying(Cond) << 12;
    Instr |= o2 << 10;
    Instr |= Encode_rn(rn);
    Instr |= o3 << 4;
    Instr |= FEXCore::ToUnderlying(flags);

    dc32(Instr);
  }

  template<typename T>
  void ConditionalCompare(uint32_t Op, uint32_t o1, uint32_t o2, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn, T rm,
                          ARMEmitter::Condition Cond) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= o1 << 30;
    Instr |= Encode_rm(rm);
    Instr |= FEXCore::ToUnderlying(Cond) << 12;
    Instr |= o2 << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  // Data-processing - 3 source
  void DataProcessing_3Source(uint32_t Op, uint32_t Op0, ARMEmitter::Size s, ARMEmitter::Register rd, ARMEmitter::Register rn,
                              ARMEmitter::Register rm, ARMEmitter::Register ra) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= Encode_rm(rm);
    Instr |= Op0 << 15;
    Instr |= Encode_ra(ra);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  void EvaluateIntoFlags(uint32_t op, uint32_t size, WRegister rn) {
    uint32_t Instr = op;
    Instr |= size << 14;
    Instr |= rn.Idx() << 5;
    dc32(Instr);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/ASIMDOps.inl
================================================
// SPDX-License-Identifier: MIT
/* ASIMD instruction emitters.
 *
 * This contains emitters for vector operations explicitly.
 * Most instructions have a `SubRegSize` as their first argument to select element size while operating.
 * Additionally most emitters accept templated vector register arguments of both `QRegister` and `DRegister` types.
 * Based on the combination of those two arguments, it will emit an instruction operating on a 64-bit or 128-bit wide register
 * with the selected element size.
 *
 * Some vector operations are unsized and only operate at the one width. In these cases the instruction only
 * operates at one size, the width depends on the instruction.
 * The arguments for these instructions are usually `VRegister` but might be one of the other sized types as well.
 *
 * Only two instructions support the `i128Bit` ElementSize.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // Data Processing -- Scalar Floating-Point and Advanced SIMD
  // Cryptographic AES
  void aese(VRegister rd, VRegister rn) {
    CryptoAES(0b00100, rd, rn);
  }
  void aesd(VRegister rd, VRegister rn) {
    CryptoAES(0b00101, rd, rn);
  }
  void aesmc(VRegister rd, VRegister rn) {
    CryptoAES(0b00110, rd, rn);
  }
  void aesimc(VRegister rd, VRegister rn) {
    CryptoAES(0b00111, rd, rn);
  }

  // Cryptographic three-register SHA
  void sha1c(VRegister rd, SRegister rn, VRegister rm) {
    Crypto3RegSHA(0b000, rd, rn.V(), rm);
  }
  void sha1p(VRegister rd, SRegister rn, VRegister rm) {
    Crypto3RegSHA(0b001, rd, rn.V(), rm);
  }
  void sha1m(VRegister rd, SRegister rn, VRegister rm) {
    Crypto3RegSHA(0b010, rd, rn.V(), rm);
  }
  void sha1su0(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA(0b011, rd, rn, rm);
  }
  void sha256h(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA(0b100, rd, rn, rm);
  }
  void sha256h2(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA(0b101, rd, rn, rm);
  }
  void sha256su1(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA(0b110, rd, rn, rm);
  }

  // Cryptographic two-register SHA
  void sha1h(SRegister rd, SRegister rn) {
    Crypto2RegSHA(0b00000, rd.V(), rn.V());
  }
  void sha1su1(VRegister rd, VRegister rn) {
    Crypto2RegSHA(0b00001, rd, rn);
  }
  void sha256su0(VRegister rd, VRegister rn) {
    Crypto2RegSHA(0b00010, rd, rn);
  }
  // Advanced SIMD table lookup
  void tbl(QRegister rd, QRegister rn, QRegister rm) {
    ASIMDTable(1, 0b00, 0b00, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbl(DRegister rd, QRegister rn, DRegister rm) {
    ASIMDTable(0, 0b00, 0b00, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbx(QRegister rd, QRegister rn, QRegister rm) {
    ASIMDTable(1, 0b00, 0b00, 0b1, rd.V(), rn.V(), rm.V());
  }
  void tbx(DRegister rd, QRegister rn, DRegister rm) {
    ASIMDTable(0, 0b00, 0b00, 0b1, rd.V(), rn.V(), rm.V());
  }

  void tbl(QRegister rd, QRegister rn, QRegister rn2, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2), "rn and rn2 must be sequential");
    ASIMDTable(1, 0b00, 0b01, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbl(DRegister rd, QRegister rn, QRegister rn2, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2), "rn and rn2 must be sequential");
    ASIMDTable(0, 0b00, 0b01, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbx(QRegister rd, QRegister rn, QRegister rn2, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2), "rn and rn2 must be sequential");
    ASIMDTable(1, 0b00, 0b01, 0b1, rd.V(), rn.V(), rm.V());
  }
  void tbx(DRegister rd, QRegister rn, QRegister rn2, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2), "rn and rn2 must be sequential");
    ASIMDTable(0, 0b00, 0b01, 0b1, rd.V(), rn.V(), rm.V());
  }

  void tbl(QRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3), "rn, rn2, and rn3 must be sequential");
    ASIMDTable(1, 0b00, 0b10, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbl(DRegister rd, QRegister rn, QRegister rn2, QRegister rn3, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3), "rn, rn2, and rn3 must be sequential");
    ASIMDTable(0, 0b00, 0b10, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbx(QRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3), "rn, rn2, and rn3 must be sequential");
    ASIMDTable(1, 0b00, 0b10, 0b1, rd.V(), rn.V(), rm.V());
  }
  void tbx(DRegister rd, QRegister rn, QRegister rn2, QRegister rn3, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3), "rn, rn2, and rn3 must be sequential");
    ASIMDTable(0, 0b00, 0b10, 0b1, rd.V(), rn.V(), rm.V());
  }

  void tbl(QRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rn4, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3, rn4), "rn, rn2, rn3, and rn4 must be sequential");
    ASIMDTable(1, 0b00, 0b11, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbl(DRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rn4, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3, rn4), "rn, rn2, rn3, and rn4 must be sequential");
    ASIMDTable(0, 0b00, 0b11, 0b0, rd.V(), rn.V(), rm.V());
  }
  void tbx(QRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rn4, QRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3, rn4), "rn, rn2, rn3, and rn4 must be sequential");
    ASIMDTable(1, 0b00, 0b11, 0b1, rd.V(), rn.V(), rm.V());
  }
  void tbx(DRegister rd, QRegister rn, QRegister rn2, QRegister rn3, QRegister rn4, DRegister rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rn, rn2, rn3, rn4), "rn, rn2, rn3, and rn4 must be sequential");
    ASIMDTable(0, 0b00, 0b11, 0b1, rd.V(), rn.V(), rm.V());
  }

  // Advanced SIMD permute
  void uzp1(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b001, rd.V(), rn.V(), rm.V());
  }
  void uzp1(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b001, rd.V(), rn.V(), rm.V());
  }
  void trn1(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b010, rd.V(), rn.V(), rm.V());
  }
  void trn1(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b010, rd.V(), rn.V(), rm.V());
  }
  void zip1(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b011, rd.V(), rn.V(), rm.V());
  }
  void zip1(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b011, rd.V(), rn.V(), rm.V());
  }
  void uzp2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b101, rd.V(), rn.V(), rm.V());
  }
  void uzp2(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b101, rd.V(), rn.V(), rm.V());
  }
  void trn2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b110, rd.V(), rn.V(), rm.V());
  }
  void trn2(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b110, rd.V(), rn.V(), rm.V());
  }
  void zip2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    ASIMDPermute(1, size, 0b111, rd.V(), rn.V(), rm.V());
  }
  void zip2(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid 64-bit size on 64-bit permute");
    ASIMDPermute(0, size, 0b111, rd.V(), rn.V(), rm.V());
  }

  // Advanced SIMD extract
  void ext(QRegister rd, QRegister rn, QRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 16, "Index can't be more than 15");
    ASIMDExtract(1, 0b00, Index, rd.V(), rn.V(), rm.V());
  }
  void ext(DRegister rd, DRegister rn, DRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 8, "Index can't be more than 7");
    ASIMDExtract(0, 0b00, Index, rd.V(), rn.V(), rm.V());
  }

  // Advanced SIMD copy
  template<IsQOrDRegister T>
  void dup(SubRegSize size, T rd, T rn, uint32_t Index) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit dup");
    }

    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;

    const uint32_t SizeImm = FEXCore::ToUnderlying(size);
    const uint32_t IndexShift = SizeImm + 1;
    const uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] const uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(Q, 0, imm5, 0b0000, rd.V(), rn.V());
  }

  template<IsQOrDRegister T>
  void dup(SubRegSize size, T rd, Register rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit dup");
    }

    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;

    // Upper bits of imm5 are ignored for GPR dup
    const uint32_t imm5 = 1U << FEXCore::ToUnderlying(size);

    ASIMDScalarCopy(Q, 0, imm5, 0b0001, rd, ToVReg(rn));
  }

  template<SubRegSize size>
  requires (size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit)
  void smov(XRegister rd, VRegister rn, uint32_t Index) {
    static_assert(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit, "Unsupported smov size");

    constexpr uint32_t SizeImm = FEXCore::ToUnderlying(size);
    constexpr uint32_t IndexShift = SizeImm + 1;
    constexpr uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] constexpr uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(1, 0, imm5, 0b0101, ToVReg(rd), rn);
  }
  template<SubRegSize size>
  requires (size == SubRegSize::i8Bit || size == SubRegSize::i16Bit)
  void smov(WRegister rd, VRegister rn, uint32_t Index) {
    static_assert(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit, "Unsupported smov size");

    constexpr uint32_t SizeImm = FEXCore::ToUnderlying(size);
    constexpr uint32_t IndexShift = SizeImm + 1;
    constexpr uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] constexpr uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(0, 0, imm5, 0b0101, ToVReg(rd), rn);
  }

  template<SubRegSize size>
  void umov(Register rd, VRegister rn, uint32_t Index) {
    static_assert(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                  "Unsupported umov size");

    constexpr uint32_t Q = size == SubRegSize::i64Bit ? 1 : 0;

    constexpr uint32_t SizeImm = FEXCore::ToUnderlying(size);
    constexpr uint32_t IndexShift = SizeImm + 1;
    constexpr uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] constexpr uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(Q, 0, imm5, 0b0111, ToVReg(rd), rn);
  }

  void ins(SubRegSize size, VRegister rd, uint32_t Index, Register rn) {
    const uint32_t SizeImm = FEXCore::ToUnderlying(size);
    const uint32_t IndexShift = SizeImm + 1;
    const uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] const uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(1, 0, imm5, 0b0011, rd, ToVReg(rn));
  }

  void ins(SubRegSize size, VRegister rd, uint32_t Index, VRegister rn, uint32_t Index2) {
    const uint32_t SizeImm = FEXCore::ToUnderlying(size);
    const uint32_t IndexShift = SizeImm + 1;
    const uint32_t ElementSize = 1U << SizeImm;
    [[maybe_unused]] const uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);
    LOGMAN_THROW_A_FMT(Index2 < MaxIndex, "Index2 too large. Index2={}, Max Index: {}", Index2, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;
    const uint32_t imm4 = Index2 << SizeImm;

    ASIMDScalarCopy(1, 0b10, imm5, imm4, rd, rn);
  }

  // Advanced SIMD three-register extension
  template<IsQOrDRegister T>
  void sdot(ARMEmitter::SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMDThreeRegisterExt(0, 0b0010, size, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void usdot(T rd, T rn, T rm) {
    ASIMDThreeRegisterExt(0, 0b0011, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void sqrdmlah(ARMEmitter::SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMDThreeRegisterExt(1, 0b0000, size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void sqrdmlsh(ARMEmitter::SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMDThreeRegisterExt(1, 0b0001, size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void udot(ARMEmitter::SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMDThreeRegisterExt(1, 0b0010, size, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fcmla(ARMEmitter::SubRegSize size, T rd, T rn, T rm, ARMEmitter::Rotation Rot) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i8Bit, "8-bit subregsize not supported");

    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMDThreeRegisterExt(1, 0b1000 | FEXCore::ToUnderlying(Rot), size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void fcadd(ARMEmitter::SubRegSize size, T rd, T rn, T rm, ARMEmitter::Rotation Rot) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i8Bit, "8-bit subregsize not supported");

    if constexpr (std::is_same_v<ARMEmitter::DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(Rot == ARMEmitter::Rotation::ROTATE_90 || Rot == ARMEmitter::Rotation::ROTATE_270, "Invalid rotation");
    const uint32_t ConvertedRotation = Rot == ARMEmitter::Rotation::ROTATE_90 ? 0b00 : 0b10;
    ASIMDThreeRegisterExt(1, 0b1100 | ConvertedRotation, size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void bfdot(T rd, T rn, T rm) {
    ASIMDThreeRegisterExt(1, 0b1111, ARMEmitter::SubRegSize::i16Bit, rm, rn, rd);
  }
  void bfmlalb(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(1, 0b1111, ARMEmitter::SubRegSize::i64Bit, rm.D(), rn.D(), rd.D());
  }
  void bfmlalt(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(1, 0b1111, ARMEmitter::SubRegSize::i64Bit, rm.Q(), rn.Q(), rd.Q());
  }
  void smmla(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(0, 0b0100, ARMEmitter::SubRegSize::i32Bit, rm.Q(), rn.Q(), rd.Q());
  }
  void usmmla(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(0, 0b0101, ARMEmitter::SubRegSize::i32Bit, rm.Q(), rn.Q(), rd.Q());
  }
  void bfmmla(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(1, 0b1101, ARMEmitter::SubRegSize::i16Bit, rm.Q(), rn.Q(), rd.Q());
  }
  void ummla(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm) {
    ASIMDThreeRegisterExt(1, 0b0100, ARMEmitter::SubRegSize::i32Bit, rm.Q(), rn.Q(), rd.Q());
  }

  // Advanced SIMD two-register miscellaneous
  template<IsQOrDRegister T>
  void rev64(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD2RegMisc(0, size, 0b00000, rd, rn);
  }

  template<IsQOrDRegister T>
  void rev16(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit, "Only 8-bit subregsize supported");
    ASIMD2RegMisc(0, size, 0b00001, rd, rn);
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  template<IsQOrDRegister T>
  void saddlp(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(0, ConvertedSize, 0b00010, rd, rn);
  }

  template<IsQOrDRegister T>
  void suqadd(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b00011, rd, rn);
  }

  template<IsQOrDRegister T>
  void cls(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD2RegMisc(0, size, 0b00100, rd, rn);
  }
  template<IsQOrDRegister T>
  void cnt(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit, "Only 8-bit subregsize supported");
    ASIMD2RegMisc(0, size, 0b00101, rd, rn);
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  template<IsQOrDRegister T>
  void sadalp(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(0, ConvertedSize, 0b00110, rd, rn);
  }

  template<IsQOrDRegister T>
  void sqabs(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b00111, rd, rn);
  }
  // Comparison against zero
  template<IsQOrDRegister T>
  void cmgt(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b01000, rd, rn);
  }
  // Comparison against zero
  template<IsQOrDRegister T>
  void cmeq(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b01001, rd, rn);
  }
  // Comparison against zero
  template<IsQOrDRegister T>
  void cmlt(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b01010, rd, rn);
  }
  template<IsQOrDRegister T>
  void abs(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(0, size, 0b01011, rd, rn);
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  void xtn(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(0, size, 0b10010, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void xtn2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(0, size, 0b10010, rd.Q(), rn.Q());
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  void sqxtn(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(0, size, 0b10100, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void sqxtn2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(0, size, 0b10100, rd.Q(), rn.Q());
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtn(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i16Bit, "Only 16-bit & 32-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i32Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b10110, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtn2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i16Bit, "Only 16-bit & 32-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i32Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b10110, rd.Q(), rn.Q());
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtl(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b10111, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtl2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b10111, rd.Q(), rn.Q());
  }

  template<IsQOrDRegister T>
  void frintn(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11000, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11000, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void frintm(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11001, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11001, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcvtns(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11010, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11010, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcvtms(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11011, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11011, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcvtas(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11100, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11100, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void scvtf(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 0, 0b11101, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(0, ConvertedSize, 0b11101, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void frint32z(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b11110, rd, rn);
  }
  template<IsQOrDRegister T>
  void frint64z(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(0, ConvertedSize, 0b11111, rd, rn);
  }

  template<IsQOrDRegister T>
  void fcmgt(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b01100, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b01100, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcmeq(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b01101, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b01101, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcmlt(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b01110, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b01110, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fabs(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b01111, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b01111, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void frintp(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b11000, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b11000, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void frintz(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b11001, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b11001, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcvtps(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b11010, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b11010, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcvtzs(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b11011, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b11011, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void urecpe(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit, "Only 32-bit subregsize supported");
    ASIMD2RegMisc(0, size, 0b11100, rd, rn);
  }

  template<IsQOrDRegister T>
  void frecpe(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(0, 1, 0b11101, rn, rd);
    } else {
      ASIMD2RegMisc(0, size, 0b11101, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void rev32(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit, "Only 8-bit & 16-bit subregsize supported");
    ASIMD2RegMisc(1, size, 0b00000, rd, rn);
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  template<IsQOrDRegister T>
  void uaddlp(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(1, ConvertedSize, 0b00010, rd, rn);
  }

  template<IsQOrDRegister T>
  void usqadd(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(1, size, 0b00011, rd, rn);
  }

  template<IsQOrDRegister T>
  void clz(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD2RegMisc(1, size, 0b00100, rd, rn);
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  template<IsQOrDRegister T>
  void uadalp(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(1, ConvertedSize, 0b00110, rd, rn);
  }

  template<IsQOrDRegister T>
  void sqneg(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(1, size, 0b00111, rd, rn);
  }

  // Comparison against zero
  template<IsQOrDRegister T>
  void cmge(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(1, size, 0b01000, rd, rn);
  }
  // Comparison against zero
  template<IsQOrDRegister T>
  void cmle(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(1, size, 0b01001, rd, rn);
  }
  template<IsQOrDRegister T>
  void neg(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    ASIMD2RegMisc(1, size, 0b01011, rd, rn);
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void sqxtun(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(1, size, 0b10010, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void sqxtun2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit destination subregsize not supported");
    ASIMD2RegMisc(1, size, 0b10010, rd.Q(), rn.Q());
  }

  ///< size is the destination size.
  ///< source size is the next size up.
  void shll(SubRegSize size, DRegister rd, DRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(1, ConvertedSize, 0b10011, rd, rn);
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void shll2(SubRegSize size, QRegister rd, QRegister rn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD2RegMisc(1, ConvertedSize, 0b10011, rd, rn);
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void uqxtn(SubRegSize size, VRegister rd, VRegister rn) {
    ASIMD2RegMisc(1, size, 0b10100, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void uqxtn2(SubRegSize size, VRegister rd, VRegister rn) {
    ASIMD2RegMisc(1, size, 0b10100, rd.Q(), rn.Q());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtxn(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit, "Only 32-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i32Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(1, ConvertedSize, 0b10110, rd.D(), rn.D());
  }
  ///< size is the destination size.
  ///< source size is the next size up.
  void fcvtxn2(SubRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit, "Only 32-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i32Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(1, ConvertedSize, 0b10110, rd.Q(), rn.Q());
  }
  template<IsQOrDRegister T>
  void frinta(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11000, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11000, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void frintx(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11001, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11001, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcvtnu(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11010, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11010, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcvtmu(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11011, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11011, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcvtau(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11100, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11100, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void ucvtf(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 0, 0b11101, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD2RegMisc(1, ConvertedSize, 0b11101, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void frint32x(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(1, ConvertedSize, 0b11110, rd, rn);
  }
  template<IsQOrDRegister T>
  void frint64x(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;

    ASIMD2RegMisc(1, ConvertedSize, 0b11111, rd, rn);
  }

  template<IsQOrDRegister T>
  void not_(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit, "Only 8-bit subregsize supported");
    ASIMD2RegMisc(1, SubRegSize::i8Bit, 0b00101, rd, rn);
  }
  template<IsQOrDRegister T>
  void mvn(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit, "Only 8-bit subregsize supported");
    ASIMD2RegMisc(1, SubRegSize::i8Bit, 0b00101, rd, rn);
  }

  template<IsQOrDRegister T>
  void rbit(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit, "Only 8-bit subregsize supported");
    ASIMD2RegMisc(1, SubRegSize::i16Bit, 0b00101, rd, rn);
  }

  template<IsQOrDRegister T>
  void fcmge(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b01100, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b01100, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcmle(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b01101, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b01101, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fneg(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b01111, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b01111, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void frinti(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b11001, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b11001, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fcvtpu(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b11010, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b11010, rd, rn);
    }
  }
  template<IsQOrDRegister T>
  void fcvtzu(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b11011, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b11011, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void ursqrte(SubRegSize size, T rd, T rn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit, "Only 32-bit & 64-bit subregsize supported");
    ASIMD2RegMisc(1, size, 0b11100, rd, rn);
  }
  template<IsQOrDRegister T>
  void frsqrte(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b11101, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b11101, rd, rn);
    }
  }

  template<IsQOrDRegister T>
  void fsqrt(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDTwoRegMiscFP16(1, 1, 0b11111, rn, rd);
    } else {
      ASIMD2RegMisc(1, size, 0b11111, rd, rn);
    }
  }

  // Advanced SIMD across lanes
  ///< size is the destination size.
  ///< source size is the next size up.
  template<IsQOrDRegister T>
  void saddlv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMDAcrossLanes<T>(0, ConvertedSize, 0b00011, rd, rn);
  }

  template<IsQOrDRegister T>
  void smaxv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit && size != SubRegSize::i64Bit, "32/64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Destination 64-bit subregsize unsupported");
    ASIMDAcrossLanes<T>(0, size, 0b01010, rd, rn);
  }
  template<IsQOrDRegister T>
  void sminv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit && size != SubRegSize::i64Bit, "32/64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Destination 64-bit subregsize unsupported");
    ASIMDAcrossLanes<T>(0, size, 0b11010, rd, rn);
  }
  template<IsQOrDRegister T>
  void addv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit && size != SubRegSize::i64Bit, "32/64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Destination 64-bit subregsize unsupported");
    ASIMDAcrossLanes<T>(0, size, 0b11011, rd, rn);
  }
  template<IsQOrDRegister T>
  void uaddlv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Destination 8-bit subregsize unsupported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMDAcrossLanes<T>(1, ConvertedSize, 0b00011, rd, rn);
  }
  template<IsQOrDRegister T>
  void umaxv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit && size != SubRegSize::i64Bit, "32/64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Destination 64-bit subregsize unsupported");
    ASIMDAcrossLanes<T>(1, size, 0b01010, rd, rn);
  }
  template<IsQOrDRegister T>
  void uminv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit && size != SubRegSize::i64Bit, "32/64-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Destination 64-bit subregsize unsupported");
    ASIMDAcrossLanes<T>(1, size, 0b11010, rd, rn);
  }
  template<IsQOrDRegister T>
  void fmaxnmv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit, "32-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i64Bit, "Destination 8/64-bit subregsize unsupported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
    const auto U = size == SubRegSize::i16Bit ? 0 : 1;

    ASIMDAcrossLanes<T>(U, ConvertedSize, 0b01100, rd, rn);
  }
  template<IsQOrDRegister T>
  void fmaxv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit, "32-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i64Bit, "Destination 8/64-bit subregsize unsupported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
    const auto U = size == ARMEmitter::SubRegSize::i16Bit ? 0 : 1;

    ASIMDAcrossLanes<T>(U, ConvertedSize, 0b01111, rd, rn);
  }
  template<IsQOrDRegister T>
  void fminnmv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit, "32-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i64Bit, "Destination 8/64-bit subregsize unsupported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i64Bit : SubRegSize::i32Bit;
    const auto U = size == SubRegSize::i16Bit ? 0 : 1;

    ASIMDAcrossLanes<T>(U, ConvertedSize, 0b01100, rd, rn);
  }
  template<IsQOrDRegister T>
  void fminv(SubRegSize size, T rd, T rn) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i32Bit, "32-bit subregsize not supported");
    }
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i64Bit, "Destination 8/64-bit subregsize unsupported");
    const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i64Bit : SubRegSize::i32Bit;
    const auto U = size == SubRegSize::i16Bit ? 0 : 1;

    ASIMDAcrossLanes<T>(U, ConvertedSize, 0b01111, rd, rn);
  }

  // Advanced SIMD three different
  // TODO: Double check narrowing op size limits.
  // TODO: Don't enforce DRegister/QRegister for Q check
  ///< Size is dest size
  void saddl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void saddl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void saddw(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void saddw2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void ssubl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void ssubl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void ssubw(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0011, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void ssubw2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0011, ConvertedSize, rd, rn, rm);
  }
  void addhn(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(0, 0b0100, size, rd, rn, rm);
  }
  void addhn2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(0, 0b0100, size, rd, rn, rm);
  }
  ///< Size is dest size
  void sabal(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0101, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sabal2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");

    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};
    ASIMD3Different(0, 0b0101, ConvertedSize, rd, rn, rm);
  }
  void subhn(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(0, 0b0110, size, rd, rn, rm);
  }
  void subhn2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(0, 0b0110, size, rd, rn, rm);
  }
  ///< Size is dest size
  void sabdl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0111, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sabdl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b0111, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smlal(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smlal2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmlal(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmlal2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smlsl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smlsl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmlsl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1011, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmlsl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1011, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smull(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1100, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void smull2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1100, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmull(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1101, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void sqdmull2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "No 8/16-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1101, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void pmull(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i128Bit, "Only 16-bit and 128-bit destination supported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1110, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void pmull2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i128Bit, "Only 16-bit and 128-bit destination supported");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(0, 0b1110, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uaddl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uaddl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uaddw(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uaddw2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0001, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void usubl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void usubl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void usubw(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0011, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void usubw2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0011, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void raddhn(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(1, 0b0100, size, rd, rn, rm);
  }
  ///< Size is dest size
  void raddhn2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(1, 0b0100, size, rd, rn, rm);
  }
  ///< Size is dest size
  void uabal(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0101, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uabal2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0101, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void rsubhn(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(1, 0b0110, size, rd, rn, rm);
  }
  ///< Size is dest size
  void rsubhn2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "No 64-bit dest support.");
    ASIMD3Different(1, 0b0110, size, rd, rn, rm);
  }
  ///< Size is dest size
  void uabdl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0111, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void uabdl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b0111, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umlal(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umlal2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1000, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umlsl(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umlsl2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1010, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umull(SubRegSize size, DRegister rd, DRegister rn, DRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1100, ConvertedSize, rd, rn, rm);
  }
  ///< Size is dest size
  void umull2(SubRegSize size, QRegister rd, QRegister rn, QRegister rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    const auto ConvertedSize = SubRegSize {FEXCore::ToUnderlying(size) - 1};

    ASIMD3Different(1, 0b1100, ConvertedSize, rd, rn, rm);
  }

  // Advanced SIMD three same
  template<IsQOrDRegister T>
  void shadd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b00000, rd, rn, rm);
  }

  template<IsQOrDRegister T>
  void sqadd(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit sqadd");
    }
    ASIMD3Same<T>(0, size, 0b00001, rd, rn, rm);
  }

  template<IsQOrDRegister T>
  void srhadd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b00010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void shsub(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b00100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sqsub(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit sqsub");
    }
    ASIMD3Same<T>(0, size, 0b00101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmgt(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit cmgt");
    }
    ASIMD3Same<T>(0, size, 0b00110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmge(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit cmge");
    }
    ASIMD3Same<T>(0, size, 0b00111, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sshl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit sshl");
    }
    ASIMD3Same<T>(0, size, 0b01000, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sqshl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit sqshl");
    }
    ASIMD3Same<T>(0, size, 0b01001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void srshl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit srshl");
    }
    ASIMD3Same<T>(0, size, 0b01010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sqrshl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit sqrshl");
    }
    ASIMD3Same<T>(0, size, 0b01011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void smax(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b01100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void smin(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b01101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sabd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b01110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void saba(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b01111, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void add(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit add");
    }
    ASIMD3Same<T>(0, size, 0b10000, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmtst(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit cmtst");
    }
    ASIMD3Same<T>(0, size, 0b10001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void mla(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b10010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void mul(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b10011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void smaxp(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b10100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sminp(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b10101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sqdmulh(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "No 8-bit dest support.");
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(0, size, 0b10110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void addp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(0, size, 0b10111, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fmaxnm(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b000, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11000, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmla(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b001, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11001, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fadd(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b010, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11010, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmulx(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b011, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11011, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fcmeq(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b100, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11100, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmax(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b110, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11110, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void frecps(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 0, 0b111, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(0, ConvertedSize, 0b11111, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void and_(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i8Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fmlal(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i8Bit, 0b11101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fmlal2(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i8Bit, 0b11001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void bic(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i16Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fminnm(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 1, 0b000, rm, rn, rd);
    } else {
      ASIMD3Same<T>(0, size, 0b11000, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmls(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 1, 0b001, rm, rn, rd);
    } else {
      ASIMD3Same<T>(0, size, 0b11001, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fsub(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 1, 0b010, rm, rn, rd);
    } else {
      ASIMD3Same<T>(0, size, 0b11010, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmin(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 1, 0b110, rm, rn, rd);
    } else {
      ASIMD3Same<T>(0, size, 0b11110, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void frsqrts(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(0, 1, 0b111, rm, rn, rd);
    } else {
      ASIMD3Same<T>(0, size, 0b11111, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void orr(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i32Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void mov(T rd, T rn) {
    orr<T>(rd, rn, rn);
  }
  template<IsQOrDRegister T>
  void fmlsl(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i32Bit, 0b11101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fmlsl2(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i32Bit, 0b11001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void orn(T rd, T rn, T rm) {
    ASIMD3Same<T>(0, SubRegSize::i64Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uhadd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b00000, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uqadd(SubRegSize size, T rd, T rn, T rm) {
    ASIMD3Same<T>(1, size, 0b00001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void urhadd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b00010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uhsub(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b00100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uqsub(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b00101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmhi(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b00110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmhs(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b00111, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void ushl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b01000, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uqshl(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void urshl(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b01010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uqrshl(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void umax(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void umin(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uabd(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uaba(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b01111, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sub(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b10000, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void cmeq(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    ASIMD3Same<T>(1, size, 0b10001, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void mls(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b10010, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void pmul(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i8Bit, 0b10011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void umaxp(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b10100, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void uminp(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b10101, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void sqrdmulh(SubRegSize size, T rd, T rn, T rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit && size != SubRegSize::i8Bit, "8/64-bit subregsize not supported");
    ASIMD3Same<T>(1, size, 0b10110, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fmaxnmp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b000, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11000, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void faddp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b010, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11010, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmul(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b011, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11011, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fcmge(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b100, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11100, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void facge(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b101, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11101, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fmaxp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b110, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11110, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fdiv(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 0, 0b111, rm, rn, rd);
    } else {
      const auto ConvertedSize = size == SubRegSize::i64Bit ? SubRegSize::i16Bit : SubRegSize::i8Bit;
      ASIMD3Same<T>(1, ConvertedSize, 0b11111, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void eor(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i8Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void bsl(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i16Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void fminnmp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 1, 0b000, rm, rn, rd);
    } else {
      ASIMD3Same<T>(1, size, 0b11000, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fabd(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 1, 0b010, rm, rn, rd);
    } else {
      ASIMD3Same<T>(1, size, 0b11010, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fcmgt(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 1, 0b100, rm, rn, rd);
    } else {
      ASIMD3Same<T>(1, size, 0b11100, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void facgt(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 1, 0b101, rm, rn, rd);
    } else {
      ASIMD3Same<T>(1, size, 0b11101, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void fminp(SubRegSize size, T rd, T rn, T rm) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Only 16/32/64-bit subregsize supported");

    if (size == SubRegSize::i16Bit) {
      ASIMDThreeSameFP16(1, 1, 0b110, rm, rn, rd);
    } else {
      ASIMD3Same<T>(1, size, 0b11110, rd, rn, rm);
    }
  }
  template<IsQOrDRegister T>
  void bit(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i32Bit, 0b00011, rd, rn, rm);
  }
  template<IsQOrDRegister T>
  void bif(T rd, T rn, T rm) {
    ASIMD3Same<T>(1, SubRegSize::i64Bit, 0b00011, rd, rn, rm);
  }

  // Advanced SIMD modified immediate
  // XXX: ORR - 32-bit/16-bit
  // XXX: MOVI - Shifting ones
  template<IsQOrDRegister T>
  void fmov(SubRegSize size, T rd, float Value) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Unsupported fmov size");

    uint32_t op;
    uint32_t cmode = 0b1111;
    uint32_t o2;
    uint32_t Imm;
    if (size == SubRegSize::i16Bit) {
      LOGMAN_MSG_A_FMT("Unsupported");
      FEX_UNREACHABLE;
    } else if (size == SubRegSize::i32Bit) {
      op = 0;
      o2 = 0;
      Imm = FP32ToImm8(Value);
    } else if (size == SubRegSize::i64Bit) {
      op = 1;
      o2 = 0;
      Imm = FP64ToImm8(Value);
    } else {
      LOGMAN_MSG_A_FMT("Invalid subregsize");
      FEX_UNREACHABLE;
    }

    ASIMDModifiedImm(op, cmode, o2, Imm, rd);
  }
  // XXX: MVNI - Shifted immediate
  // XXX: BIC
  // void ASIMDModifiedImm(uint32_t Op, uint32_t op, uint32_t cmode, uint32_t o2, uint32_t imm, T rd) {

  template<IsQOrDRegister T>
  void movi(SubRegSize size, T rd, uint64_t Imm, uint16_t Shift = 0) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Unsupported movi size");

    uint32_t cmode;
    uint32_t op;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Shift == 0, "8-bit can't have shift");
      LOGMAN_THROW_A_FMT((Imm & ~0xFF) == 0, "Larger than 8-bit Imm not supported");
      cmode = 0b1110;
      op = 0;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 8, "Shift by invalid amount");
      LOGMAN_THROW_A_FMT((Imm & ~0xFF) == 0, "Larger than 8-bit Imm not supported");
      cmode = 0b1000 | (Shift ? 0b10 : 0b00);
      op = 0;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 8 || Shift == 16 || Shift == 24, "Shift by invalid amount");
      LOGMAN_THROW_A_FMT((Imm & ~0xFF) == 0, "Larger than 8-bit Imm not supported");
      cmode = 0b0000 | ((Shift >> 3) << 1);
      op = 0;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Shift == 0, "64-bit can't have shift");
      cmode = 0b1110;
      op = 1;

      // 64-bit movi doesn't behave like the smaller types
      // Each bit of the 8-bit imm encoding is expanded to a full 8-bits.
      // This gives us a full 64-bits for the final result but needs special handling.
      uint8_t NewImm {};
      for (size_t i = 0; i < 8; ++i) {
        const size_t BitOffset = i * 8;
        uint8_t Section = (Imm >> BitOffset) & 0xFF;
        LOGMAN_THROW_A_FMT(Section == 0 || Section == 0xFF, "Invalid 64-bit constant encoding");
        if (Section == 0xFF) {
          NewImm |= (1 << i);
        }
      }
      Imm = NewImm;
    } else {
      LOGMAN_MSG_A_FMT("Invalid subregsize");
      FEX_UNREACHABLE;
    }

    ASIMDModifiedImm(op, cmode, 0, Imm, rd);
  }

  // Advanced SIMD shift by immediate
  template<IsQOrDRegister T>
  void sshr(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b00000, rn, rd);
  }
  template<IsQOrDRegister T>
  void ssra(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b00010, rn, rd);
  }
  template<IsQOrDRegister T>
  void srshr(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b00100, rn, rd);
  }
  template<IsQOrDRegister T>
  void srsra(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b00110, rn, rd);
  }
  template<IsQOrDRegister T>
  void shl(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b01010, rn, rd);
  }
  template<IsQOrDRegister T>
  void sqshl(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b01110, rn, rd);
  }
  ///< size is destination size
  void shrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10000, rn, rd);
  }
  ///< size is destination size
  void shrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10000, rn, rd);
  }
  ///< size is destination size
  void rshrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10001, rn, rd);
  }
  ///< size is destination size
  void rshrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10001, rn, rd);
  }
  ///< size is destination size
  void sqshrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10010, rn, rd);
  }
  ///< size is destination size
  void sqshrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10010, rn, rd);
  }
  ///< size is destination size
  void sqrshrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10011, rn, rd);
  }
  ///< size is destination size
  void sqrshrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - (Shift);
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10011, rn, rd);
  }
  ///< size is destination size
  void sshll(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    size = SubRegSize(FEXCore::ToUnderlying(size) - 1);

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(Shift < SubregSizeInBits, "Shift must not be larger than incoming element size");

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10100, rn, rd);
  }

  ///< size is destination size
  void sshll2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    size = SubRegSize(FEXCore::ToUnderlying(size) - 1);

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(Shift < SubregSizeInBits, "Shift must not be larger than incoming element size");

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b10100, rn, rd);
  }
  ///< size is destination size
  void sxtl(SubRegSize size, VRegister rd, VRegister rn) {
    sshll(size, rd.D(), rn.D(), 0);
  }
  ///< size is destination size
  void sxtl2(SubRegSize size, VRegister rd, VRegister rn) {
    sshll2(size, rd.Q(), rn.Q(), 0);
  }

  template<IsQOrDRegister T>
  void scvtf(SubRegSize size, T rd, T rn, uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(FractionalBits < SubregSizeInBits, "FractionalBits must not be larger than incoming element size");

    // fbits encoded a bit weirdly.
    // fbits = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedFractionalBits = (SubregSizeInBits * 2) - FractionalBits;
    const uint32_t immh = InvertedFractionalBits >> 3;
    const uint32_t immb = InvertedFractionalBits & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b11100, rn, rd);
  }

  template<IsQOrDRegister T>
  void fcvtzs(SubRegSize size, T rd, T rn, uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(FractionalBits < SubregSizeInBits, "FractionalBits must not be larger than incoming element size");

    // fbits encoded a bit weirdly.
    // fbits = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedFractionalBits = (SubregSizeInBits * 2) - FractionalBits;
    const uint32_t immh = InvertedFractionalBits >> 3;
    const uint32_t immb = InvertedFractionalBits & 0b111;

    ASIMDShiftByImm(0, immh, immb, 0b11111, rn, rd);
  }

  template<IsQOrDRegister T>
  void ushr(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b00000, rn, rd);
  }
  template<IsQOrDRegister T>
  void usra(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b00010, rn, rd);
  }
  template<IsQOrDRegister T>
  void urshr(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b00100, rn, rd);
  }
  template<IsQOrDRegister T>
  void ursra(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b00110, rn, rd);
  }
  template<IsQOrDRegister T>
  void sri(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b01000, rn, rd);
  }
  template<IsQOrDRegister T>
  void sli(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b01010, rn, rd);
  }
  template<IsQOrDRegister T>
  void sqshlu(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b01100, rn, rd);
  }
  ///< size is destination size
  template<IsQOrDRegister T>
  void uqshl(SubRegSize size, T rd, T rn, uint32_t Shift) {
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b01110, rn, rd);
  }
  ///< size is destination size
  void sqshrun(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10000, rn, rd);
  }
  ///< size is destination size
  void sqshrun2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10000, rn, rd);
  }
  ///< size is destination size
  void sqrshrun(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10001, rn, rd);
  }
  ///< size is destination size
  void sqrshrun2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10001, rn, rd);
  }
  ///< size is destination size
  void uqshrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10010, rn, rd);
  }
  ///< size is destination size
  void uqshrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10010, rn, rd);
  }
  ///< size is destination size
  void uqrshrn(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10011, rn, rd);
  }
  ///< size is destination size
  void uqrshrn2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10011, rn, rd);
  }
  ///< size is destination size
  void ushll(SubRegSize size, DRegister rd, DRegister rn, uint32_t Shift) {
    size = SubRegSize(FEXCore::ToUnderlying(size) - 1);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b10100, rn, rd);
  }
  ///< size is destination size
  void ushll2(SubRegSize size, QRegister rd, QRegister rn, uint32_t Shift) {
    size = SubRegSize(FEXCore::ToUnderlying(size) - 1);
    const size_t SubregSizeInBits = SubRegSizeInBits(size);

    // Shift encoded a bit weirdly.
    // shift = immh:immb - esize but immh is /also/ used for element size.
    const uint32_t InvertedShift = SubregSizeInBits + Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;


    ASIMDShiftByImm(1, immh, immb, 0b10100, rn, rd);
  }
  void uxtl(SubRegSize size, DRegister rd, DRegister rn) {
    ushll(size, rd, rn, 0);
  }
  void uxtl2(SubRegSize size, QRegister rd, QRegister rn) {
    ushll2(size, rd, rn, 0);
  }
  template<IsQOrDRegister T>
  void ucvtf(SubRegSize size, T rd, T rn, uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(FractionalBits < SubregSizeInBits, "FractionalBits must not be larger than incoming element size");

    // fbits encoded a bit weirdly.
    // fbits = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedFractionalBits = (SubregSizeInBits * 2) - FractionalBits;
    const uint32_t immh = InvertedFractionalBits >> 3;
    const uint32_t immb = InvertedFractionalBits & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b11100, rn, rd);
  }

  template<IsQOrDRegister T>
  void fcvtzu(SubRegSize size, T rd, T rn, uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    if constexpr (std::is_same_v<DRegister, T>) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Invalid element size with 64-bit {}", __func__);
    }

    const size_t SubregSizeInBits = SubRegSizeInBits(size);
    LOGMAN_THROW_A_FMT(FractionalBits < SubregSizeInBits, "FractionalBits must not be larger than incoming element size");

    // fbits encoded a bit weirdly.
    // fbits = (esize * 2) - immh:immb but immh is /also/ used for element size.
    const uint32_t InvertedFractionalBits = (SubregSizeInBits * 2) - FractionalBits;
    const uint32_t immh = InvertedFractionalBits >> 3;
    const uint32_t immb = InvertedFractionalBits & 0b111;

    ASIMDShiftByImm(1, immh, immb, 0b11111, rn, rd);
  }

  // Advanced SIMD vector x indexed element
  ///< size is destination size
  void smlal(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0010, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void smlal2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0010, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  ///< size is destination size
  void sqdmlal(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0011, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void sqdmlal2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0011, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  ///< size is destination size
  void smlsl(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0110, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void smlsl2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0110, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  ///< size is destination size
  void sqdmlsl(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0111, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void sqdmlsl2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0111, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  template<IsQOrDRegister T>
  void mul(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1000, H, size, rm, rn, rd);
  }
  ///< size is destination size
  void smull(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1010, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void smull2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1010, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  ///< size is destination size
  void sqdmull(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1011, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void sqdmull2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1011, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }
  template<IsQOrDRegister T>
  void sqdmulh(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1100, H, size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void sqrdmulh(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1101, H, size, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void sdot(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 4, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 1) & 1;
    L = (Index >> 0) & 1;
    M = 0;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1110, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void sudot(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 4, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 1) & 1;
    L = (Index >> 0) & 1;
    M = 0;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1111, H, ARMEmitter::SubRegSize::i8Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void bfdot(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 4, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 1) & 1;
    L = (Index >> 0) & 1;
    M = 0;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1111, H, ARMEmitter::SubRegSize::i16Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmla(SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid destination size");
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    auto EncodedSubRegSize = size;

    if (size == SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
      // ARM in their infinite wisdom decided to encode 16-bit as an 8-bit operation even though 16-bit was unallocated.
      EncodedSubRegSize = SubRegSize::i8Bit;
    } else if (size == SubRegSize::i32Bit) {
      // Index encoded in H:L
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    } else {
      LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T>), "Can't encode DRegister with i64Bit");
      // Index encoded in H
      H = Index;
      L = 0;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0001, H, EncodedSubRegSize, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmls(SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid destination size");
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    auto EncodedSubRegSize = size;

    if (size == SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
      // ARM in their infinite wisdom decided to encode 16-bit as an 8-bit operation even though 16-bit was unallocated.
      EncodedSubRegSize = SubRegSize::i8Bit;
    } else if (size == SubRegSize::i32Bit) {
      // Index encoded in H:L
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    } else {
      LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T>), "Can't encode DRegister with i64Bit");
      // Index encoded in H
      H = Index;
      L = 0;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0101, H, EncodedSubRegSize, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmul(SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid destination size");
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    auto EncodedSubRegSize = size;

    if (size == SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
      // ARM in their infinite wisdom decided to encode 16-bit as an 8-bit operation even though 16-bit was unallocated.
      EncodedSubRegSize = SubRegSize::i8Bit;
    } else if (size == SubRegSize::i32Bit) {
      // Index encoded in H:L
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    } else {
      LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T>), "Can't encode DRegister with i64Bit");
      // Index encoded in H
      H = Index;
      L = 0;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1001, H, EncodedSubRegSize, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmlal(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0000, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmlal2(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1000, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmlsl(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b0100, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void fmlsl2(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1100, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void usdot(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 4, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 1) & 1;
    L = (Index >> 0) & 1;
    M = 0;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1111, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  void bfmlalb(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1111, H, ARMEmitter::SubRegSize::i64Bit, rm.D(), rn.D(), rd.D());
  }
  void bfmlalt(ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    LOGMAN_THROW_A_FMT(Index < 8, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 2) & 1;
    L = (Index >> 1) & 1;
    M = (Index >> 0) & 1;
    ASIMDVectorXIndexedElement(0b0, L, M, 0b1111, H, ARMEmitter::SubRegSize::i64Bit, rm.Q(), rn.Q(), rd.Q());
  }

  template<IsQOrDRegister T>
  void mla(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0000, H, size, rm, rn, rd);
  }

  ///< size is destination size
  void umlal(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0010, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void umlal2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0010, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }

  template<IsQOrDRegister T>
  void mls(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0100, H, size, rm, rn, rd);
  }

  ///< size is destination size
  void umlsl(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0110, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void umlsl2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b0110, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }

  ///< size is destination size
  void umull(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1010, H, EncodedSubRegSize, rm.D(), rn.D(), rd.D());
  }
  ///< size is destination size
  void umull2(ARMEmitter::SubRegSize size, ARMEmitter::VRegister rd, ARMEmitter::VRegister rn, ARMEmitter::VRegister rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i32Bit || size == ARMEmitter::SubRegSize::i64Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    const auto EncodedSubRegSize = ARMEmitter::SubRegSize(FEXCore::ToUnderlying(size) - 1);
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(EncodedSubRegSize), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i32Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1010, H, EncodedSubRegSize, rm.Q(), rn.Q(), rd.Q());
  }

  template<IsQOrDRegister T>
  void sqrdmlah(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1101, H, size, rm, rn, rd);
  }
  template<IsQOrDRegister T>
  void udot(T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(Index < 4, "Index must be less than the source register size");

    uint32_t H, L, M;
    // Index encoded in H:L
    // M overlaps rm register.
    H = (Index >> 1) & 1;
    L = (Index >> 0) & 1;
    M = 0;
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1110, H, ARMEmitter::SubRegSize::i32Bit, rm, rn, rd);
  }

  template<IsQOrDRegister T>
  void sqrdmlsh(ARMEmitter::SubRegSize size, T rd, T rn, T rm, uint32_t Index) {
    LOGMAN_THROW_A_FMT(size == ARMEmitter::SubRegSize::i16Bit || size == ARMEmitter::SubRegSize::i32Bit, "Invalid destination size");

    if (size == ARMEmitter::SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm.Idx() < 16, "Rm can't be v16-v31 with half source size");
    }
    LOGMAN_THROW_A_FMT(Index < SubRegSizeInBits(size), "Index must be less than the source register size");

    uint32_t H, L, M;
    if (size == ARMEmitter::SubRegSize::i16Bit) {
      // Index encoded in H:L:M
      H = (Index >> 2) & 1;
      L = (Index >> 1) & 1;
      M = (Index >> 0) & 1;
    } else {
      // Index encoded in H:L
      // M overlaps rm register.
      H = (Index >> 1) & 1;
      L = (Index >> 0) & 1;
      M = 0;
    }
    ASIMDVectorXIndexedElement(0b1, L, M, 0b1111, H, size, rm, rn, rd);
  }

  // Cryptographic three-register, imm2
  void sm3tt1a(VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    Crypto3RegImm(index, 0b00, rm, rn, rd);
  }
  void sm3tt1b(VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    Crypto3RegImm(index, 0b01, rm, rn, rd);
  }
  void sm3tt2a(VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    Crypto3RegImm(index, 0b10, rm, rn, rd);
  }
  void sm3tt2b(VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    Crypto3RegImm(index, 0b11, rm, rn, rd);
  }

  // Cryptographic three-register SHA 512
  void sha512h(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(0, 0b00, rm, rn, rd);
  }
  void sha512h2(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(0, 0b01, rm, rn, rd);
  }
  void sha512su1(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(0, 0b10, rm, rn, rd);
  }
  void rax1(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(0, 0b11, rm, rn, rd);
  }
  void sm3partw1(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(1, 0b00, rm, rn, rd);
  }
  void sm3partw2(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(1, 0b01, rm, rn, rd);
  }
  void sm4ekey(VRegister rd, VRegister rn, VRegister rm) {
    Crypto3RegSHA512(1, 0b10, rm, rn, rd);
  }

  // Cryptographic four-register
  void eor3(VRegister rd, VRegister rn, VRegister rm, VRegister ra) {
    Crypto4Register(0b00, rm, ra, rn, rd);
  }
  void bcax(VRegister rd, VRegister rn, VRegister rm, VRegister ra) {
    Crypto4Register(0b01, rm, ra, rn, rd);
  }
  void sm3ss1(VRegister rd, VRegister rn, VRegister rm, VRegister ra) {
    Crypto4Register(0b10, rm, ra, rn, rd);
  }

  // Cryptographic two-register SHA 512
  void sha512su0(VRegister rd, VRegister rn) {
    Crypto2RegSHA512(0b00, rn, rd);
  }
  void sm4e(VRegister rd, VRegister rn) {
    Crypto2RegSHA512(0b01, rn, rd);
  }

  // Conversion between floating-point and fixed-point
  void scvtf(ARMEmitter::ScalarRegSize ScalarSize, ARMEmitter::VRegister rd, ARMEmitter::Size GPRSize, ARMEmitter::Register rn,
             uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(FractionalBits >= 1 && FractionalBits <= ARMEmitter::RegSizeInBits(GPRSize), "Fractional bits out of range");

    uint32_t Scale = 64 - FractionalBits;
    const auto ConvertedSize = ScalarSize == ARMEmitter::ScalarRegSize::i64Bit ? 0b01 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i32Bit ? 0b00 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i16Bit ? 0b11 :
                                                                                 0;

    ScalarConvertBetweenFPAndFixed(0, 0b00, 0b010, Scale, GPRSize, ConvertedSize, rn, rd);
  }

  void ucvtf(ARMEmitter::ScalarRegSize ScalarSize, ARMEmitter::VRegister rd, ARMEmitter::Size GPRSize, ARMEmitter::Register rn,
             uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(FractionalBits >= 1 && FractionalBits <= ARMEmitter::RegSizeInBits(GPRSize), "Fractional bits out of range");

    uint32_t Scale = 64 - FractionalBits;
    const auto ConvertedSize = ScalarSize == ARMEmitter::ScalarRegSize::i64Bit ? 0b01 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i32Bit ? 0b00 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i16Bit ? 0b11 :
                                                                                 0;

    ScalarConvertBetweenFPAndFixed(0, 0b00, 0b011, Scale, GPRSize, ConvertedSize, rn, rd);
  }

  void fcvtzs(ARMEmitter::Size GPRSize, ARMEmitter::Register rd, ARMEmitter::ScalarRegSize ScalarSize, ARMEmitter::VRegister rn,
              uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(FractionalBits >= 1 && FractionalBits <= ARMEmitter::RegSizeInBits(GPRSize), "Fractional bits out of range");

    uint32_t Scale = 64 - FractionalBits;
    const auto ConvertedSize = ScalarSize == ARMEmitter::ScalarRegSize::i64Bit ? 0b01 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i32Bit ? 0b00 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i16Bit ? 0b11 :
                                                                                 0;

    ScalarConvertBetweenFPAndFixed(0, 0b11, 0b000, Scale, GPRSize, ConvertedSize, rn, rd);
  }

  void fcvtzu(ARMEmitter::Size GPRSize, ARMEmitter::Register rd, ARMEmitter::ScalarRegSize ScalarSize, ARMEmitter::VRegister rn,
              uint32_t FractionalBits) {
    LOGMAN_THROW_A_FMT(FractionalBits >= 1 && FractionalBits <= ARMEmitter::RegSizeInBits(GPRSize), "Fractional bits out of range");

    uint32_t Scale = 64 - FractionalBits;
    const auto ConvertedSize = ScalarSize == ARMEmitter::ScalarRegSize::i64Bit ? 0b01 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i32Bit ? 0b00 :
                               ScalarSize == ARMEmitter::ScalarRegSize::i16Bit ? 0b11 :
                                                                                 0;

    ScalarConvertBetweenFPAndFixed(0, 0b11, 0b001, Scale, GPRSize, ConvertedSize, rn, rd);
  }

  // Conversion between floating-point and integer
  void fcvtns(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b000, rd, ToReg(rn));
  }
  void fcvtns(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b000, rd, ToReg(rn));
  }
  void fcvtns(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b000, rd, ToReg(rn));
  }
  void fcvtnu(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b001, rd, ToReg(rn));
  }
  void fcvtnu(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b001, rd, ToReg(rn));
  }
  void fcvtnu(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b001, rd, ToReg(rn));
  }
  void scvtf(ARMEmitter::Size size, HRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b010, ToReg(rd), rn);
  }
  void scvtf(ARMEmitter::Size size, SRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b010, ToReg(rd), rn);
  }
  void scvtf(ARMEmitter::Size size, DRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b010, ToReg(rd), rn);
  }
  void ucvtf(ARMEmitter::Size size, HRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b011, ToReg(rd), rn);
  }
  void ucvtf(ARMEmitter::Size size, SRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b011, ToReg(rd), rn);
  }
  void ucvtf(ARMEmitter::Size size, DRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b011, ToReg(rd), rn);
  }
  void fcvtas(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b100, rd, ToReg(rn));
  }
  void fcvtas(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b100, rd, ToReg(rn));
  }
  void fcvtas(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b100, rd, ToReg(rn));
  }
  void fcvtau(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b101, rd, ToReg(rn));
  }
  void fcvtau(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b101, rd, ToReg(rn));
  }
  void fcvtau(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b101, rd, ToReg(rn));
  }
  void fmov(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b110, rd, ToReg(rn));
  }
  void fmov(ARMEmitter::Size size, Register rd, SRegister rn) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::Size::i64Bit, "Can't move SReg to 64-bit");
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b110, rd, ToReg(rn));
  }
  void fmov(ARMEmitter::Size size, Register rd, DRegister rn) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::Size::i32Bit, "Can't move DReg to 32-bit");
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b110, rd, ToReg(rn));
  }
  void fmov(ARMEmitter::Size size, Register rd, VRegister rn, bool Upper) {
    if (Upper) {
      LOGMAN_THROW_A_FMT(size == ARMEmitter::Size::i64Bit, "Can only move upper with 64-bit elements");
    }
    ASIMDFloatConvBetweenInt(size, 0, Upper ? 0b10 : 0b01, Upper ? 0b01 : 0b00, 0b110, rd, ToReg(rn));
  }
  void fmov(ARMEmitter::Size size, HRegister rd, Register rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b00, 0b111, ToReg(rd), rn);
  }
  void fmov(ARMEmitter::Size size, SRegister rd, Register rn) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::Size::i64Bit, "Can't move SReg to 64-bit");
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b00, 0b111, ToReg(rd), rn);
  }
  void fmov(ARMEmitter::Size size, DRegister rd, Register rn) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::Size::i32Bit, "Can't move DReg to 32-bit");
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b00, 0b111, ToReg(rd), rn);
  }
  void fmov(ARMEmitter::Size size, VRegister rd, Register rn, bool Upper) {
    if (Upper) {
      LOGMAN_THROW_A_FMT(size == ARMEmitter::Size::i64Bit, "Can only move upper with 64-bit elements");
    }
    ASIMDFloatConvBetweenInt(size, 0, Upper ? 0b10 : 0b01, Upper ? 0b01 : 0b00, 0b111, ToReg(rd), rn);
  }
  void fcvtps(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b01, 0b000, rd, ToReg(rn));
  }
  void fcvtps(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b01, 0b000, rd, ToReg(rn));
  }
  void fcvtps(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b01, 0b000, rd, ToReg(rn));
  }
  void fcvtpu(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b01, 0b001, rd, ToReg(rn));
  }
  void fcvtpu(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b01, 0b001, rd, ToReg(rn));
  }
  void fcvtpu(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b01, 0b001, rd, ToReg(rn));
  }
  void fcvtms(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b10, 0b000, rd, ToReg(rn));
  }
  void fcvtms(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b10, 0b000, rd, ToReg(rn));
  }
  void fcvtms(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b10, 0b000, rd, ToReg(rn));
  }
  void fcvtmu(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b10, 0b001, rd, ToReg(rn));
  }
  void fcvtmu(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b10, 0b001, rd, ToReg(rn));
  }
  void fcvtmu(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b10, 0b001, rd, ToReg(rn));
  }
  void fcvtzs(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b11, 0b000, rd, ToReg(rn));
  }
  void fcvtzs(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b11, 0b000, rd, ToReg(rn));
  }
  void fcvtzs(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b11, 0b000, rd, ToReg(rn));
  }
  void fcvtzs(ARMEmitter::Size size, Register rd, VRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b11, 0b000, rd, ToReg(rn));
  }
  void fcvtzu(ARMEmitter::Size size, Register rd, HRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b11, 0b11, 0b001, rd, ToReg(rn));
  }
  void fcvtzu(ARMEmitter::Size size, Register rd, SRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b00, 0b11, 0b001, rd, ToReg(rn));
  }
  void fcvtzu(ARMEmitter::Size size, Register rd, DRegister rn) {
    ASIMDFloatConvBetweenInt(size, 0, 0b01, 0b11, 0b001, rd, ToReg(rn));
  }

private:
  // Advanced SIMD three same (FP16)
  template<IsQOrDRegister T>
  void ASIMDThreeSameFP16(uint32_t U, uint32_t a, uint32_t opcode, T rm, T rn, T rd) {
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;
    constexpr uint32_t Op = 0b0000'1110'0100'0000'0000'01 << 10;

    uint32_t Instr = Op;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= a << 23;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Advanced SIMD two-register miscellaneous (FP16)
  template<IsQOrDRegister T>
  void ASIMDTwoRegMiscFP16(uint32_t U, uint32_t a, uint32_t opcode, T rn, T rd) {
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;
    constexpr uint32_t Op = 0b0000'1110'0111'1000'0000'10 << 10;

    uint32_t Instr = Op;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= a << 23;
    Instr |= opcode << 12;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Advanced SIMD three-register extension
  template<IsQOrDRegister T>
  void ASIMDThreeRegisterExt(uint32_t U, uint32_t opcode, ARMEmitter::SubRegSize size, T rm, T rn, T rd) {
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;
    constexpr uint32_t Op = 0b0000'1110'0000'0000'1000'01 << 10;

    uint32_t Instr = Op;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Cryptographic AES
  void CryptoAES(uint32_t opcode, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0100'1110'0010'1000'0000'10U << 10;
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Cryptographic three-register SHA
  void Crypto3RegSHA(uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0101'1110'0000'0000'0000'00U << 10;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Cryptographic two-register SHA
  void Crypto2RegSHA(uint32_t opcode, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0101'1110'0010'1000'0000'10U << 10;
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD table lookup
  void ASIMDTable(uint32_t Q, uint32_t op2, uint32_t len, uint32_t op, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0000'1110'000U << 21;
    Instr |= Q << 30;
    Instr |= op2 << 22;
    Instr |= Encode_rm(rm);
    Instr |= len << 13;
    Instr |= op << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD permute
  void ASIMDPermute(uint32_t Q, SubRegSize size, uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0000'1110'0000'0000'0000'10U << 10;
    Instr |= Q << 30;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD extract
  void ASIMDExtract(uint32_t Q, uint32_t op2, uint32_t imm4, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0010'1110'000U << 21;
    Instr |= Q << 30;
    Instr |= op2 << 22;
    Instr |= Encode_rm(rm);
    Instr |= imm4 << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD two-register miscellaneous
  template<IsQOrDRegister T>
  void ASIMD2RegMisc(uint32_t U, SubRegSize size, uint32_t opcode, T rd, T rn) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = 0b0000'1110'0010'0000'0000'10U << 10;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD across lanes
  template<IsQOrDRegister T>
  void ASIMDAcrossLanes(uint32_t U, SubRegSize size, uint32_t opcode, T rd, T rn) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = 0b0000'1110'0011'0000'0000'10U << 10;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD three different
  template<IsQOrDRegister T>
  void ASIMD3Different(uint32_t U, uint32_t opcode, SubRegSize size, T rd, T rn, T rm) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = 0b0000'1110'0010'0000'0000'00U << 10;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD three same
  template<IsQOrDRegister T>
  void ASIMD3Same(uint32_t U, SubRegSize size, uint32_t opcode, T rd, T rn, T rm) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = 0b0000'1110'0010'0000'0000'01U << 10;
    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD modified immediate
  template<IsQOrDRegister T>
  void ASIMDModifiedImm(uint32_t op, uint32_t cmode, uint32_t o2, uint32_t imm, T rd) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = 0b0000'1111'0000'0000'0000'01U << 10;
    Instr |= Q;
    Instr |= op << 29;
    Instr |= ((imm >> 7) & 1) << 18;
    Instr |= ((imm >> 6) & 1) << 17;
    Instr |= ((imm >> 5) & 1) << 16;
    Instr |= cmode << 12;
    Instr |= o2 << 11;
    Instr |= ((imm >> 4) & 1) << 9;
    Instr |= ((imm >> 3) & 1) << 8;
    Instr |= ((imm >> 2) & 1) << 7;
    Instr |= ((imm >> 1) & 1) << 6;
    Instr |= ((imm >> 0) & 1) << 5;

    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD shift by immediate
  template<IsQOrDRegister T>
  void ASIMDShiftByImm(uint32_t U, uint32_t immh, uint32_t immb, uint32_t opcode, T rn, T rd) {
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1U << 30 : 0;
    LOGMAN_THROW_A_FMT(immh != 0, "ImmH needs to not be zero");

    uint32_t Instr = 0b0000'1111'0000'0000'0000'01U << 10;

    Instr |= Q;
    Instr |= U << 29;
    Instr |= immh << 19;
    Instr |= immb << 16;
    Instr |= opcode << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD vector x indexed element
  template<IsQOrDRegister T>
  void ASIMDVectorXIndexedElement(uint32_t U, uint32_t L, uint32_t M, uint32_t opcode, uint32_t H, ARMEmitter::SubRegSize size, T rm, T rn, T rd) {
    constexpr uint32_t Op = 0b0000'1111'0000'0000'0000'00 << 10;
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = Op;

    Instr |= Q;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= L << 21;

    // M and Rm might overlap. It's up to the instruction emitter itself to ensure there is no conflict.
    Instr |= M << 20;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 12;
    Instr |= H << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  void Crypto3RegImm(uint32_t index, uint32_t opcode, VRegister rm, VRegister rn, VRegister rd) {
    LOGMAN_THROW_A_FMT(index <= 3, "index ({}) must be within [0-3]", index);

    uint32_t Instr = 0b1100'1110'0100'0000'1000'0000'0000'0000;
    Instr |= rm.Idx() << 16;
    Instr |= index << 12;
    Instr |= opcode << 10;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  void Crypto3RegSHA512(uint32_t o, uint32_t opcode, VRegister rm, VRegister rn, VRegister rd) {
    uint32_t Instr = 0b1100'1110'0110'0000'1000'0000'0000'0000;
    Instr |= rm.Idx() << 16;
    Instr |= o << 14;
    Instr |= opcode << 10;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  void Crypto4Register(uint32_t opcode, VRegister rm, VRegister ra, VRegister rn, VRegister rd) {
    uint32_t Instr = 0b1100'1110'0000'0000'0000'0000'0000'0000;
    Instr |= opcode << 21;
    Instr |= rm.Idx() << 16;
    Instr |= ra.Idx() << 10;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  void Crypto2RegSHA512(uint32_t opcode, VRegister rn, VRegister rd) {
    uint32_t Instr = 0b1100'1110'1100'0000'1000'0000'0000'0000;
    Instr |= opcode << 10;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Conversion between floating-point and fixed-point
  template<typename T, typename T2>
  void ScalarConvertBetweenFPAndFixed(uint32_t S, uint32_t rmode, uint32_t opcode, uint32_t scale, ARMEmitter::Size GPRSize,
                                      uint32_t ScalarSize, T rn, T2 rd) {
    constexpr uint32_t Op = 0b0001'1110'000 << 21;
    const uint32_t SF = GPRSize == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;
    Instr |= SF;
    Instr |= S << 29;
    Instr |= ScalarSize << 22;
    Instr |= rmode << 19;
    Instr |= opcode << 16;
    Instr |= scale << 10;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Conversion between floating-point and integer
  void ASIMDFloatConvBetweenInt(ARMEmitter::Size s, uint32_t S, uint32_t ptype, uint32_t rmode, uint32_t opcode, Register rd, Register rn) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = 0b0001'1110'001U << 21;
    Instr |= SF;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= rmode << 19;
    Instr |= opcode << 16;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  template<ARMEmitter::SubRegSize size, bool Load, IsQOrDRegister T>
  void ASIMDLoadStoreMultipleStructure(uint32_t Op, uint32_t opcode, T rt, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;

    uint32_t Instr = Op;

    Instr |= Q;
    Instr |= Load ? 1 << 22 : 0;
    Instr |= Encode_rm(rm);
    Instr |= opcode;
    Instr |= FEXCore::ToUnderlying(size) << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }
  template<ARMEmitter::SubRegSize size, bool Load, uint32_t Count>
  void ASIMDSTLD(uint32_t Op, uint32_t Opcode, ARMEmitter::VRegister rt, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && Index < 16) || (size == SubRegSize::i16Bit && Index < 8) ||
                         (size == SubRegSize::i32Bit && Index < 4) || (size == SubRegSize::i64Bit && Index < 2),
                       "Invalid Index selected");

    uint32_t Q {};
    uint32_t S {};
    uint32_t Size {};

    // selem is for determining if we are doing 1-3 loadstore single structure operations
    // eg: ST1/2/3/4 or LD1/2/3/4
    constexpr uint32_t selem = Count - 1;
    const uint32_t opcode = Opcode | (selem >> 1);

    // Index is encoded as:
    // 8-bit:  Q:S:size
    // 16-bit  Q:S:size<1>
    // 32-bit: Q:S
    // 64-bit: Q
    if constexpr (size == SubRegSize::i8Bit) {
      Q = ((Index & 0b1000) >> 3) << 30;
      S = ((Index & 0b0100) >> 2);
      Size = Index & 0b11;
    } else if constexpr (size == SubRegSize::i16Bit) {
      Q = ((Index & 0b0100) >> 2) << 30;
      S = ((Index & 0b0010) >> 1);
      Size = (Index & 0b1) << 1;
    } else if constexpr (size == SubRegSize::i32Bit) {
      Q = ((Index & 0b0010) >> 1) << 30;
      S = Index & 0b0001;
    } else if constexpr (size == SubRegSize::i64Bit) {
      Q = (Index & 0b0001) << 30;
      Size = 1;
    }

    // scale = opcode<2:1>
    // selem = opcode<0>:R + 1
    //
    // scale:
    // - 0
    //   - Index = Q:S:size - aka B[0-15]
    // - 1
    //   - Index = Q:S:size<1> - aka H[0-7]
    // - 2
    //   if (size == i32)
    //     - Index = Q:S - aka S[0-3]
    //   if (size == i64)
    //     - Index = Q - aka D[0-1]
    //   if (size == i128) undefined
    // - 3
    //   Load+Replicate
    //   scale = size

    ASIMDLoadStore(Op | Q, Load, selem & 1, opcode, S, Size, rt, rn, rm);
  }

  template<ARMEmitter::SubRegSize size, bool Load, uint32_t Count, typename T>
  void ASIMDSTLD(uint32_t Op, uint32_t Opcode, T rt, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1U << 30 : 0;
    constexpr uint32_t S = 0;

    // selem is for determining if we are doing 1-3 loadstore single structure operations
    // eg: ST1/2/3/4 or LD1/2/3/4
    constexpr uint32_t selem = Count - 1;
    const uint32_t opcode = Opcode | (selem >> 1);

    // scale = opcode<2:1>
    // selem = opcode<0>:R + 1
    //
    // scale:
    // - 0
    //   - Index = Q:S:size - aka B[0-15]
    // - 1
    //   - Index = Q:S:size<1> - aka H[0-7]
    // - 2
    //   if (size == i32)
    //     - Index = Q:S - aka S[0-3]
    //   if (size == i64)
    //     - Index = Q - aka D[0-1]
    //   if (size == i128) undefined
    // - 3
    //   Load+Replicate
    //   scale = size

    ASIMDLoadStore(Op | Q, Load, selem & 1, opcode, S, FEXCore::ToUnderlying(size), rt, rn, rm);
  }
  void ASIMDLoadStore(uint32_t Op, uint32_t L, uint32_t R, uint32_t opcode, uint32_t S, uint32_t size, ARMEmitter::VRegister rt,
                      ARMEmitter::Register rn, ARMEmitter::Register rm) {
    uint32_t Instr = Op;

    Instr |= L << 22;
    Instr |= R << 21;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 13;
    Instr |= S << 12;
    Instr |= size << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/BranchOps.inl
================================================
// SPDX-License-Identifier: MIT
/* Branch instruction emitters.
 *
 * Most of these instructions will use `BackwardLabel`, `ForwardLabel`, or `BiDirectionLabel` to determine where a branch targets.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // Branches, Exception Generating and System instructions
public:
  // Conditional branch immediate
  ///< Branch conditional
  void b(ARMEmitter::Condition Cond, uint32_t Imm) {
    constexpr uint32_t Op = 0b0101'010 << 25;
    Branch_Conditional(Op, 0, 0, Cond, Imm);
  }
  [[nodiscard]] BranchEncodeSucceeded b(ARMEmitter::Condition Cond, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    if (Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0101'010 << 25;
      Branch_Conditional(Op, 0, 0, Cond, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }
  [[nodiscard]] BranchEncodeSucceeded b(ARMEmitter::Condition Cond, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::BC});
    constexpr uint32_t Op = 0b0101'010 << 25;
    Branch_Conditional(Op, 0, 0, Cond, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded b(ARMEmitter::Condition Cond, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return b(Cond, &Label->Backward);
    } else {
      return b(Cond, &Label->Forward);
    }
  }

  ///< Branch consistent conditional
  void bc(ARMEmitter::Condition Cond, uint32_t Imm) {
    constexpr uint32_t Op = 0b0101'010 << 25;
    Branch_Conditional(Op, 0, 1, Cond, Imm);
  }
  [[nodiscard]] BranchEncodeSucceeded bc(ARMEmitter::Condition Cond, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    if (Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0101'010 << 25;
      Branch_Conditional(Op, 0, 1, Cond, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded bc(ARMEmitter::Condition Cond, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::BC});
    constexpr uint32_t Op = 0b0101'010 << 25;
    Branch_Conditional(Op, 0, 1, Cond, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded bc(ARMEmitter::Condition Cond, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return bc(Cond, &Label->Backward);
    } else {
      return bc(Cond, &Label->Forward);
    }
  }

  // Unconditional branch register
  void br(ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1101011 << 25 | 0b0'000 << 21 | // opc
                            0b1'1111 << 16 |                  // op2
                            0b0000'00 << 10 |                 // op3
                            0b0'0000;                         // op4

    UnconditionalBranch(Op, rn);
  }
  void blr(ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1101011 << 25 | 0b0'001 << 21 | // opc
                            0b1'1111 << 16 |                  // op2
                            0b0000'00 << 10 |                 // op3
                            0b0'0000;                         // op4

    UnconditionalBranch(Op, rn);
  }
  void ret(ARMEmitter::Register rn = ARMEmitter::Reg::r30) {
    constexpr uint32_t Op = 0b1101011 << 25 | 0b0'010 << 21 | // opc
                            0b1'1111 << 16 |                  // op2
                            0b0000'00 << 10 |                 // op3
                            0b0'0000;                         // op4

    UnconditionalBranch(Op, rn);
  }

  // Unconditional branch immediate
  void b(uint32_t Imm) {
    constexpr uint32_t Op = 0b0001'01 << 26;

    UnconditionalBranch(Op, Imm);
  }
  [[nodiscard]] BranchEncodeSucceeded b(const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    if (Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0001'01 << 26;
      UnconditionalBranch(Op, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }
  [[nodiscard]] BranchEncodeSucceeded b(ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::B});
    constexpr uint32_t Op = 0b0001'01 << 26;

    UnconditionalBranch(Op, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded b(BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return b(&Label->Backward);
    } else {
      return b(&Label->Forward);
    }
  }

  void bl(uint32_t Imm) {
    constexpr uint32_t Op = 0b1001'01 << 26;

    UnconditionalBranch(Op, Imm);
  }

  [[nodiscard]] BranchEncodeSucceeded bl(const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    if (Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b1001'01 << 26;
      UnconditionalBranch(Op, Imm >> 2);

      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }
  [[nodiscard]] BranchEncodeSucceeded bl(ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::B});
    constexpr uint32_t Op = 0b1001'01 << 26;

    UnconditionalBranch(Op, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded bl(BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return bl(&Label->Backward);
    } else {
      return bl(&Label->Forward);
    }
  }

  // Compare and branch
  void cbz(ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
    constexpr uint32_t Op = 0b0011'0100 << 24;

    CompareAndBranch(Op, s, rt, Imm);
  }

  [[nodiscard]] BranchEncodeSucceeded cbz(ARMEmitter::Size s, ARMEmitter::Register rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());

    if (Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0011'0100 << 24;
      CompareAndBranch(Op, s, rt, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded cbz(ARMEmitter::Size s, ARMEmitter::Register rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::BC});

    constexpr uint32_t Op = 0b0011'0100 << 24;

    CompareAndBranch(Op, s, rt, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded cbz(ARMEmitter::Size s, ARMEmitter::Register rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return cbz(s, rt, &Label->Backward);
    } else {
      return cbz(s, rt, &Label->Forward);
    }
  }

  void cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
    constexpr uint32_t Op = 0b0011'0101 << 24;

    CompareAndBranch(Op, s, rt, Imm);
  }

  [[nodiscard]] BranchEncodeSucceeded cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());

    if (Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0011'0101 << 24;
      CompareAndBranch(Op, s, rt, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::BC});

    constexpr uint32_t Op = 0b0011'0101 << 24;

    CompareAndBranch(Op, s, rt, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded cbnz(ARMEmitter::Size s, ARMEmitter::Register rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return cbnz(s, rt, &Label->Backward);
    } else {
      return cbnz(s, rt, &Label->Forward);
    }
  }

  // Test and branch immediate
  void tbz(ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
    constexpr uint32_t Op = 0b0011'0110 << 24;

    TestAndBranch(Op, rt, Bit, Imm);
  }
  [[nodiscard]] BranchEncodeSucceeded tbz(ARMEmitter::Register rt, uint32_t Bit, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());

    if (Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0011'0110 << 24;
      TestAndBranch(Op, rt, Bit, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded tbz(ARMEmitter::Register rt, uint32_t Bit, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::TEST_BRANCH});

    constexpr uint32_t Op = 0b0011'0110 << 24;

    TestAndBranch(Op, rt, Bit, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded tbz(ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return tbz(rt, Bit, &Label->Backward);
    } else {
      return tbz(rt, Bit, &Label->Forward);
    }
  }

  void tbnz(ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
    constexpr uint32_t Op = 0b0011'0111 << 24;

    TestAndBranch(Op, rt, Bit, Imm);
  }
  [[nodiscard]] BranchEncodeSucceeded tbnz(ARMEmitter::Register rt, uint32_t Bit, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());

    if (Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0)) {
      constexpr uint32_t Op = 0b0011'0111 << 24;
      TestAndBranch(Op, rt, Bit, Imm >> 2);
      return BranchEncodeSucceeded::Success;
    }

    // Can't encode.
    return BranchEncodeSucceeded::Failure;
  }

  [[nodiscard]] BranchEncodeSucceeded tbnz(ARMEmitter::Register rt, uint32_t Bit, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::TEST_BRANCH});
    constexpr uint32_t Op = 0b0011'0111 << 24;

    TestAndBranch(Op, rt, Bit, 0);

    // Forward label doesn't know if it can encode until Bind.
    return BranchEncodeSucceeded::Success;
  }

  [[nodiscard]] BranchEncodeSucceeded tbnz(ARMEmitter::Register rt, uint32_t Bit, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      return tbnz(rt, Bit, &Label->Backward);
    } else {
      return tbnz(rt, Bit, &Label->Forward);
    }
  }

private:
  // Conditional branch immediate
  void Branch_Conditional(uint32_t Op, uint32_t Op1, uint32_t Op0, ARMEmitter::Condition Cond, uint32_t Imm) {
    uint32_t Instr = Op;

    Instr |= Op1 << 24;
    Instr |= (Imm & 0x7'FFFF) << 5;
    Instr |= Op0 << 4;
    Instr |= FEXCore::ToUnderlying(Cond);

    dc32(Instr);
  }

  // Unconditional branch register
  void UnconditionalBranch(uint32_t Op, ARMEmitter::Register rn) {
    uint32_t Instr = Op;
    Instr |= Encode_rn(rn);
    dc32(Instr);
  }

  // Unconditional branch - immediate
  void UnconditionalBranch(uint32_t Op, uint32_t Imm) {
    uint32_t Instr = Op;
    Instr |= Imm & 0x3FF'FFFF;
    dc32(Instr);
  }

  // Compare and branch
  void CompareAndBranch(uint32_t Op, ARMEmitter::Size s, ARMEmitter::Register rt, uint32_t Imm) {
    const uint32_t SF = s == ARMEmitter::Size::i64Bit ? (1U << 31) : 0;

    uint32_t Instr = Op;

    Instr |= SF;
    Instr |= (Imm & 0x7'FFFF) << 5;
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  // Test and branch - immediate
  void TestAndBranch(uint32_t Op, ARMEmitter::Register rt, uint32_t Bit, uint32_t Imm) {
    uint32_t Instr = Op;

    Instr |= (Bit >> 5) << 31;
    Instr |= (Bit & 0b1'1111) << 19;
    Instr |= (Imm & 0x3FFF) << 5;
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/Buffer.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>

namespace ARMEmitter {
class Buffer {
public:
  Buffer() {
    SetBuffer(nullptr, 0);
  }

  Buffer(uint8_t* Base, uint64_t BaseSize) {
    SetBuffer(Base, BaseSize);
  }

  void SetBuffer(uint8_t* Base, uint64_t BaseSize) {
    BufferBase = Base;
    CurrentOffset = BufferBase;
    Size = BaseSize;
  }

  template<typename T>
  requires (std::is_trivially_copyable_v<T>)
  void dcn(const T& Data) {
    std::memcpy(CurrentOffset, &Data, sizeof(Data));
    CurrentOffset += sizeof(Data);
  }
  void dc8(uint8_t Data) {
    dcn(Data);
  }
  void dc16(uint16_t Data) {
    dcn(Data);
  }
  void dc32(uint32_t Data) {
    dcn(Data);
  }
  void dc64(uint64_t Data) {
    dcn(Data);
  }

  void EmitString(const char* String) {
    const auto StringLength = strlen(String);
    memcpy(CurrentOffset, String, StringLength);
    CurrentOffset += StringLength;
  }

  void Align(size_t Size = 4) {
    // Align the buffer to provided size.
    auto CurrentAlignment = reinterpret_cast<uint64_t>(CurrentOffset) & (Size - 1);
    if (!CurrentAlignment) {
      return;
    }
    std::memset(CurrentOffset, 0, Size - CurrentAlignment);
    CurrentOffset += Size - CurrentAlignment;
  }

  template<typename T>
  T GetCursorAddress() const {
    return reinterpret_cast<T>(CurrentOffset);
  }

  static void ClearICache(void* Begin, std::size_t Length) {
    __builtin___clear_cache(static_cast<char*>(Begin), static_cast<char*>(Begin) + Length);
  }

  size_t GetCursorOffset() const {
    return static_cast<size_t>(CurrentOffset - BufferBase);
  }

  uint8_t* GetBufferBase() const {
    return BufferBase;
  }

  void CursorIncrement(size_t Size) {
    CurrentOffset += Size;
  }

  void SetCursorOffset(size_t Offset) {
    CurrentOffset = BufferBase + Offset;
  }

  uint64_t GetBufferSize() const {
    return Size;
  }

  template<typename T>
  size_t GetCursorOffsetFromAddress(const T* Address) const {
    return static_cast<size_t>(reinterpret_cast<const uint8_t*>(Address) - BufferBase);
  }

protected:

  uint8_t* BufferBase;
  uint8_t* CurrentOffset;
  uint64_t Size;
};
} // namespace ARMEmitter


================================================
FILE: CodeEmitter/CodeEmitter/Emitter.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/vector.h>

#include <FEXHeaderUtils/BitUtils.h>
#include <CodeEmitter/Buffer.h>
#include <CodeEmitter/Registers.h>

#include <array>
#include <bit>
#include <cstdint>
#include <utility>
#include <type_traits>

/*
 * Welcome to FEX-Emu's custom AArch64 emitter.
 * This was written specifically to avoid the performance cost of the vixl emitter.
 *
 * There are some specific design constraints in this design to target a couple features:
 *   - High performance
 *   - Low CPU cache performance hit
 *   - Significantly reduced code footprint
 *   - Low number of branches
 *
 * These requirements are mostly achieved by removing a bunch of developer conveniences
 * that vixl provides. The developer needs to take a lot of care to not shoot themselves in the foot.
 *
 * Misc design decisions:
 * - Registers are encoded as basic uint32_t enums.
 *   - Converting between different registers is zero-cost.
 *   - Passing around as arguments are as cheap as registers
 *     - Contrast to vixl where every register requires living on the stack.
 *   - Registers can get encoded in to instructions with a simple `BFM` instruction.
 *
 * - Instructions are very simply emitted, allowing direct inlining most of the time.
 *   - These are simple enough that multiple back-to-back instructions get optimized to 128-bit load-store operations.
 *     - Contrast to vixl where pretty much no instruction emitter gets inlined.
 *
 * - Instruction emitters are /mostly/ unsized. Most instructions take a size argument first, which gets encoded
 *   directly in to the instruction.
 *   - Contrast to vixl where the register arguments are how the instructions determine operating size.
 *   - Size argument allows FEX to use `CSEL` to select a size at runtime, instead of branching.
 *   - Some instructions are explicitly sized based on register type. Read comments in the respective `inl` files to
 *     see why.
 *     Some scalar/vector operations are an example of this.
 *
 * - Almost zero helper functions.
 *   - Primary exception to this rule is load-store operations. These will use a helper to make
 *     it easier to select the correct load-store instruction. Mostly because these are a nightmare selecting
 *     the right instruction.
 */
namespace ARMEmitter {
/*
 * This `Size` enum is used for most ALU operations.
 * These follow the AArch64 encoding style in most cases.
 */
enum class Size : uint32_t {
  i32Bit = 0,
  i64Bit,
};

// This allows us to get the `Size` enum in bits.
[[nodiscard]]
constexpr size_t RegSizeInBits(Size size) {
  return size_t {32} << FEXCore::ToUnderlying(size);
}

/* This `SubRegSize` enum is used for most ASIMD operations.
 * These follow the AArch64 encoding style in most cases.
 */
enum class SubRegSize : uint32_t {
  i8Bit = 0b00,
  i16Bit = 0b01,
  i32Bit = 0b10,
  i64Bit = 0b11,
  i128Bit = 0b100,
};

// This allows us to get the `SubRegSize` in bits.
[[nodiscard]]
constexpr size_t SubRegSizeInBits(SubRegSize size) {
  return size_t {8} << FEXCore::ToUnderlying(size);
}

// Many floating point operations constrain their element sizes to the
// main three float sizes half, single, and double precision. This just
// combines all the checks together for brevity.
[[nodiscard]]
constexpr bool IsStandardFloatSize(SubRegSize size) {
  return size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit;
}

/* This `ScalarRegSize` enum is used for most scalar float
 * operations.
 *
 * This is specifically duplicated from `SubRegSize` to have strongly
 * typed functions.
 *
 * `ScalarRegSize` specifically doesn't have `i128Bit` because scalar operations
 * can't operate at 128-bit.
 */
enum class ScalarRegSize : uint32_t {
  i8Bit = 0b00,
  i16Bit = 0b01,
  i32Bit = 0b10,
  i64Bit = 0b11,
};

// This allows us to get the `ScalarRegSize` in bits.
[[nodiscard]]
constexpr size_t ScalarRegSizeInBits(ScalarRegSize size) {
  return size_t {8} << FEXCore::ToUnderlying(size);
}

/* This `VectorRegSizePair` union allows us to have an overlapping type
 * to select a scalar operation or a vector depending on which operation
 * we pass in.
 * Useful in FEX's vector operations that behave as scalar or vector
 * depending on various factors. But since the operation will have the sa,e
 * element size, we want to choose the operation more easily
 */
union VectorRegSizePair {
  ScalarRegSize Scalar;
  SubRegSize Vector;
};

// This allows us to create a `VectorRegSizePair` union.
[[nodiscard]]
constexpr VectorRegSizePair ToVectorSizePair(SubRegSize size) {
  return VectorRegSizePair {.Vector = size};
}
[[nodiscard]]
constexpr VectorRegSizePair ToVectorSizePair(ScalarRegSize size) {
  return VectorRegSizePair {.Scalar = size};
}

// This `ShiftType` enum is used for ALU shift-register encoded instructions.
enum class ShiftType : uint32_t {
  LSL = 0,
  LSR,
  ASR,
  ROR,
};

// This `ExtendedType` enum is used for ALU extended-register encoded instructions.
enum class ExtendedType : uint32_t {
  UXTB = 0b000,
  UXTH = 0b001,
  UXTW = 0b010,
  UXTX = 0b011,
  SXTB = 0b100,
  SXTH = 0b101,
  SXTW = 0b110,
  SXTX = 0b111,
  LSL_32 = UXTW,
  LSL_64 = UXTX,
};

// This `Condition` enum is used for various conditional instructions.
enum class Condition : uint32_t {
  // Meaning:   Int                    - Float
  CC_EQ = 0, // Equal                  - Equal
  CC_NE,     // Not Eq                 - Not Eq or unordered
  CC_CS,     // Carry set              - Greater than, equal, or unordered
  CC_CC,     // Carry clear            - Less than
  CC_MI,     // Minus/Negative         - Less than
  CC_PL,     // Plus, positive or zero - GT, equal, or unordered
  CC_VS,     // Overflow               - Unordered
  CC_VC,     // No Overflow            - Ordered
  CC_HI,     // Unsigned higher        - GT, or unordered
  CC_LS,     // Unsigned lower or same - LT or EQ
  CC_GE,     // Signed GT or EQ        - GT or EQ
  CC_LT,     // Signed LT              - LT or Unordered
  CC_GT,     // Signed GT              - GT
  CC_LE,     // Signed LT or EQ        - LT, EQ, or Unordered
  CC_AL,     // Always                 - Always
  CC_NV,     // Always                 - Always

  // Aliases
  CC_HS = CC_CS,
  CC_LO = CC_CC,
};

/*
 * This `StatusFlags` enum is used for conditional compare encoded instructions.
 * These directly encode to the `nzcv` flags.
 */
enum class StatusFlags : uint32_t {
  None = 0,
  Flag_V = 0b0001,
  Flag_C = 0b0010,
  Flag_Z = 0b0100,
  Flag_N = 0b1000,

  Flag_NZCV = Flag_N | Flag_Z | Flag_C | Flag_V,
};


/*
 * This `IndexType` enum is used for load-store instructions.
 * Not all load-store instructions use this, so the user needs to be careful.
 */
enum class IndexType {
  POST,
  OFFSET,
  PRE,

  UNPRIVILEGED,
};

// Used with adr and scalar + vector load/store variants to denote
// a modifier operation.
enum class SVEModType : uint8_t {
  MOD_UXTW,
  MOD_SXTW,
  MOD_LSL,
  MOD_NONE,
};

/* This `SVEMemOperand` class is used for the helper SVE load-store instructions.
 * Load-store instructions are quite expressive, so having a helper that handles these differences is worth it.
 */
class SVEMemOperand final {
public:
  enum class Type {
    ScalarPlusScalar,
    ScalarPlusImm,
    ScalarPlusVector,
    VectorPlusImm,
  };

  SVEMemOperand(XRegister rn, XRegister rm = XReg::zr)
    : rn {rn}
    , MemType {Type::ScalarPlusScalar}
    , MetaType {.ScalarScalarType {
        .rm = rm,
      }} {}
  SVEMemOperand(XRegister rn, int32_t imm = 0)
    : rn {rn}
    , MemType {Type::ScalarPlusImm}
    , MetaType {.ScalarImmType {
        .Imm = imm,
      }} {}
  SVEMemOperand(XRegister rn, ZRegister zm, SVEModType mod = SVEModType::MOD_NONE, uint8_t scale = 0)
    : rn {rn}
    , MemType {Type::ScalarPlusVector}
    , MetaType {.ScalarVectorType {
        .zm = zm,
        .mod = mod,
        .scale = scale,
      }} {}
  SVEMemOperand(ZRegister zn, uint32_t imm)
    : rn {Register {zn.Idx()}}
    , MemType {Type::VectorPlusImm}
    , MetaType {.VectorImmType {
        .Imm = imm,
      }} {}

  [[nodiscard]]
  bool IsScalarPlusScalar() const {
    return MemType == Type::ScalarPlusScalar;
  }
  [[nodiscard]]
  bool IsScalarPlusImm() const {
    return MemType == Type::ScalarPlusImm;
  }
  [[nodiscard]]
  bool IsScalarPlusVector() const {
    return MemType == Type::ScalarPlusVector;
  }
  [[nodiscard]]
  bool IsVectorPlusImm() const {
    return MemType == Type::VectorPlusImm;
  }

  union Data {
    struct {
      Register rm;
    } ScalarScalarType;

    struct {
      int32_t Imm;
    } ScalarImmType;

    struct {
      ZRegister zm;
      SVEModType mod;
      uint8_t scale;
    } ScalarVectorType;

    struct {
      // rn will be a ZRegister
      uint32_t Imm;
    } VectorImmType;
  };

  Register rn;
  Type MemType;
  Data MetaType;
};

/* This `ExtendedMemOperand` class is used for the helper load-store instructions.
 * Load-store instructions are quite expressive, so having a helper that handles these differences is worth it.
 */
class ExtendedMemOperand final {
public:
  ExtendedMemOperand(XRegister rn, XRegister rm = XReg::zr, ExtendedType Option = ExtendedType::LSL_64, uint32_t Shift = 0)
    : rn {rn}
    , MetaType {.Extended {
        .Header = {.MemType = TYPE_EXTENDED},
        .rm = rm,
        .Option = Option,
        .Shift = Shift,
      }} {}
  ExtendedMemOperand(XRegister rn, IndexType Index = IndexType::OFFSET, int32_t Imm = 0)
    : rn {rn}
    , MetaType {.ImmType {
        .Header = {.MemType = TYPE_IMM},
        .Index = Index,
        .Imm = Imm,
      }} {}

  Register rn;
  enum Type {
    TYPE_EXTENDED,
    TYPE_IMM,
  };
  struct HeaderStruct {
    Type MemType;
  };
  union {
    HeaderStruct Header;
    struct {
      HeaderStruct Header;
      Register rm;
      ExtendedType Option;
      uint32_t Shift;
    } Extended;
    struct {
      HeaderStruct Header;
      IndexType Index;
      int32_t Imm;
    } ImmType;
  } MetaType;
};

template<uint32_t op0, uint32_t op1, uint32_t CRn, uint32_t CRm, uint32_t op2>
inline constexpr uint32_t GenSystemReg = op0 << 19 | op1 << 16 | CRn << 12 | CRm << 8 | op2 << 5;

// This `SystemRegister` enum is used for the mrs/msr instructions.
enum class SystemRegister : uint32_t {
  CTR_EL0 = GenSystemReg<0b11, 0b011, 0b0000, 0b0000, 0b001>,
  DCZID_EL0 = GenSystemReg<0b11, 0b011, 0b0000, 0b0000, 0b111>,
  TPIDR_EL0 = GenSystemReg<0b11, 0b011, 0b1101, 0b0000, 0b010>,
  RNDR = GenSystemReg<0b11, 0b011, 0b0010, 0b0100, 0b000>,
  RNDRRS = GenSystemReg<0b11, 0b011, 0b0010, 0b0100, 0b001>,
  NZCV = GenSystemReg<0b11, 0b011, 0b0100, 0b0010, 0b000>,
  FPCR = GenSystemReg<0b11, 0b011, 0b0100, 0b0100, 0b000>,
  TPIDRRO_EL0 = GenSystemReg<0b11, 0b011, 0b1101, 0b0000, 0b011>,
  CNTFRQ_EL0 = GenSystemReg<0b11, 0b011, 0b1110, 0b0000, 0b000>,
  CNTVCT_EL0 = GenSystemReg<0b11, 0b011, 0b1110, 0b0000, 0b010>,
  CNTVCTSS_EL0 = GenSystemReg<0b11, 0b011, 0b1110, 0b0000, 0b110>,
};

template<uint32_t op1, uint32_t CRm, uint32_t op2>
inline constexpr uint32_t GenDCReg = op1 << 16 | CRm << 8 | op2 << 5;

// This `DataCacheOperation` enum is used for the dc instruction.
enum class DataCacheOperation : uint32_t {
  IVAC = GenDCReg<0b000, 0b0110, 0b001>,
  ISW = GenDCReg<0b000, 0b0110, 0b010>,
  CSW = GenDCReg<0b000, 0b1010, 0b010>,
  CISW = GenDCReg<0b000, 0b1110, 0b010>,
  ZVA = GenDCReg<0b011, 0b0100, 0b001>,
  CVAC = GenDCReg<0b011, 0b1010, 0b001>,
  CVAU = GenDCReg<0b011, 0b1011, 0b001>,
  CIVAC = GenDCReg<0b011, 0b1110, 0b001>,

  // MTE2
  IGVAC = GenDCReg<0b000, 0b0110, 0b011>,
  IGSW = GenDCReg<0b000, 0b0110, 0b100>,
  IGDVAC = GenDCReg<0b000, 0b0110, 0b101>,
  IGDSW = GenDCReg<0b000, 0b0110, 0b110>,
  CGSW = GenDCReg<0b000, 0b1010, 0b100>,
  CGDSW = GenDCReg<0b000, 0b1010, 0b110>,
  CIGSW = GenDCReg<0b000, 0b1110, 0b100>,
  CIGDSW = GenDCReg<0b000, 0b1110, 0b110>,

  // MTE
  GVA = GenDCReg<0b011, 0b0100, 0b011>,
  GZVA = GenDCReg<0b011, 0b0100, 0b100>,
  CGVAC = GenDCReg<0b011, 0b1010, 0b011>,
  CGDVAC = GenDCReg<0b011, 0b1010, 0b101>,
  CGVAP = GenDCReg<0b011, 0b1100, 0b011>,
  CGDVAP = GenDCReg<0b011, 0b1100, 0b101>,
  CGVADP = GenDCReg<0b011, 0b1101, 0b011>,
  CGDVADP = GenDCReg<0b011, 0b1101, 0b101>,
  CIGVAC = GenDCReg<0b011, 0b1110, 0b011>,
  CIGDVAC = GenDCReg<0b011, 0b1110, 0b101>,

  // DPB
  CVAP = GenDCReg<0b011, 0b1100, 0b001>,

  // DPB2
  CVADP = GenDCReg<0b011, 0b1101, 0b001>,
};

template<uint32_t CRm, uint32_t op2>
inline constexpr uint32_t GenHintBarrierReg = CRm << 8 | op2 << 5;

// This `HintRegister` enum is used for the hint instruction.
enum class HintRegister : uint32_t {
  NOP = GenHintBarrierReg<0b0000, 0b000>,
  YIELD = GenHintBarrierReg<0b0000, 0b001>,
  WFE = GenHintBarrierReg<0b0000, 0b010>,
  WFI = GenHintBarrierReg<0b0000, 0b011>,
  SEV = GenHintBarrierReg<0b0000, 0b100>,
  SEVL = GenHintBarrierReg<0b0000, 0b101>,
  DGH = GenHintBarrierReg<0b0000, 0b110>,
  CSDB = GenHintBarrierReg<0b0010, 0b100>,
};

// This `BarrierRegister` enum is used for the various barrier instructions.
enum class BarrierRegister : uint32_t {
  CLREX = GenHintBarrierReg<0b0000, 0b010>,
  TCOMMIT = GenHintBarrierReg<0b0000, 0b011>,
  DSB = GenHintBarrierReg<0b0000, 0b100>,
  DMB = GenHintBarrierReg<0b0000, 0b101>,
  ISB = GenHintBarrierReg<0b0000, 0b110>,
  SB = GenHintBarrierReg<0b0000, 0b111>,
};

// This `BarrierScope` enum is used for the dsb/dmb instructions.
enum class BarrierScope : uint32_t {
  // Outer shareable
  OSHLD = 0b0001,
  OSHST = 0b0010,
  OSH = 0b0011,
  // Non shareable
  NSHLD = 0b0101,
  NSHST = 0b0110,
  NSH = 0b0111,
  // Inner shareable
  ISHLD = 0b1001,
  ISHST = 0b1010,
  ISH = 0b1011,
  // Full System visibility
  LD = 0b1101,
  ST = 0b1110,
  SY = 0b1111,
};

// This `Prefetch` enum is used for prefetch instructions.
enum class Prefetch : uint32_t {
  // Prefetch for load
  PLDL1KEEP = 0b00000,
  PLDL1STRM = 0b00001,
  PLDL2KEEP = 0b00010,
  PLDL2STRM = 0b00011,
  PLDL3KEEP = 0b00100,
  PLDL3STRM = 0b00101,

  // Preload instructions
  PLIL1KEEP = 0b01000,
  PLIL1STRM = 0b01001,
  PLIL2KEEP = 0b01010,
  PLIL2STRM = 0b01011,
  PLIL3KEEP = 0b01100,
  PLIL3STRM = 0b01101,

  // Preload for store
  PSTL1KEEP = 0b10000,
  PSTL1STRM = 0b10001,
  PSTL2KEEP = 0b10010,
  PSTL2STRM = 0b10011,
  PSTL3KEEP = 0b10100,
  PSTL3STRM = 0b10101,
};

// This `PredicatePattern` enun is used for some SVE instructions.
enum class PredicatePattern : uint32_t {
  SVE_POW2 = 0b00000,
  SVE_VL1 = 0b00001,
  SVE_VL2 = 0b00010,
  SVE_VL3 = 0b00011,
  SVE_VL4 = 0b00100,
  SVE_VL5 = 0b00101,
  SVE_VL6 = 0b00110,
  SVE_VL7 = 0b00111,
  SVE_VL8 = 0b01000,
  SVE_VL16 = 0b01001,
  SVE_VL32 = 0b01010,
  SVE_VL64 = 0b01011,
  SVE_VL128 = 0b01100,
  SVE_VL256 = 0b01101,
  SVE_MUL4 = 0b11101,
  SVE_MUL3 = 0b11110,
  SVE_ALL = 0b11111,
};

// Used with SVE FP immediate arithmetic instructions
enum class SVEFAddSubImm : uint32_t {
  _0_5,
  _1_0,
};
enum class SVEFMulImm : uint32_t {
  _0_5,
  _2_0,
};
enum class SVEFMaxMinImm : uint32_t {
  _0_0,
  _1_0,
};

/* This `BackwardLabel` struct is used for retaining a location for PC-Relative instructions.
 * This is specifically a label for a target that is logically `below` an instruction that uses it.
 * Which means that a branch would jump backwards.
 */
struct BackwardLabel {
  uint8_t* Location {};
};

/* This `ForwardLabel` struct is used for retaining a location for PC-Relative instructions.
 * This is specifically a label for a target that is logically `above` an instruction that uses it.
 * Which means that a branch would jump forwards.
 */
struct ForwardLabel {
  enum class InstType {
    UNKNOWN,
    ADR,
    ADRP,
    B,
    BC,
    TEST_BRANCH,
    RELATIVE_LOAD,
    LONG_ADDRESS_GEN,
  };

  struct Reference {
    uint8_t* Location {};
    InstType Type = InstType::UNKNOWN;
  };

  // The first element is stored separately to avoid allocations for simple cases
  Reference FirstInst;

  fextl::vector<Reference> Insts;
};

/* This `BiDirectionalLabel` struct used for retaining a location for PC-Relative instructions.
 * This is specifically a label for a target that is in either direction of an instruction that uses it.
 * Which means a branch could jump backwards or forwards depending on situation.
 */
struct BiDirectionalLabel {
  BackwardLabel Backward;
  ForwardLabel Forward;
};

static inline void AddLocationToLabel(ForwardLabel* Label, ForwardLabel::Reference&& Location) {
  if (Label->FirstInst.Location == nullptr) {
    Label->FirstInst = Location;
  } else {
    Label->Insts.push_back(Location);
  }
}

// Some FCMA ASIMD instructions support a rotation argument.
enum class Rotation : uint32_t {
  ROTATE_0 = 0b00,
  ROTATE_90 = 0b01,
  ROTATE_180 = 0b10,
  ROTATE_270 = 0b11,
};

// Concept for contraining some instructions to accept only an XRegister or WRegister.
// Particularly for operations that differ encodings depending on which one is used.
template<typename T>
concept IsXOrWRegister = std::is_same_v<T, XRegister> || std::is_same_v<T, WRegister>;

// Concept for contraining some instructions to accept only a QRegister or DRegister.
template<typename T>
concept IsQOrDRegister = std::is_same_v<T, QRegister> || std::is_same_v<T, DRegister>;

template<typename T>
concept IsLabel = std::is_same_v<T, ARMEmitter::ForwardLabel> || std::is_same_v<T, ARMEmitter::BackwardLabel> ||
                  std::is_same_v<T, ARMEmitter::BiDirectionalLabel> || std::is_same_v<T, ARMEmitter::ForwardLabel::Reference>;

enum class BranchEncodeSucceeded {
  Success,
  Failure,
};

// Whether or not a given set of vector registers are sequential
// in increasing order as far as the register file is concerned (modulo its size)
//
// For example, a set of registers like:
//
// v1,  v2, v3 and
// v31, v0, v1
//
// would both be considered sequential sequences, and some instructions in particular
// limit register lists to these kind of sequences.
//
template<typename T, typename... Args>
constexpr bool AreVectorsSequential(T first, const Args&... args) {
  // Ensure we always have a pair of registers to compare against.
  static_assert(sizeof...(args) >= 1, "Number of arguments must be greater than 1");

  const auto fn = [](auto& lhs, const auto& rhs) {
    const auto result = ((lhs.Idx() + 1) % 32) == rhs.Idx();
    lhs = rhs;
    return result;
  };

  return (fn(first, args) && ...);
}

// Returns if the immediate can fit in to add/sub immediate instruction encodings.
constexpr bool IsImmAddSub(uint64_t imm) {
  constexpr uint64_t U12Mask = 0xFFF;
  auto FitsWithin12Bits = [](uint64_t imm) {
    return (imm & ~U12Mask) == 0;
  };
  // Can fit in to the instruction encoding:
  // - if only bits [11:0] are set.
  // - if only bits [23:12] are set.
  return FitsWithin12Bits(imm) || (FitsWithin12Bits(imm >> 12) && (imm & U12Mask) == 0);
}

// This is an emitter that is designed around the smallest code bloat as possible.
// Eschewing most developer convenience in order to keep code as small as possible.

// Choices:
// - Size of ops passed as an argument rather than template to let the compiler use csel instead of branching.
// - Registers are unsized so they can be passed in a GPR and not need conversion operations
class Emitter : public ARMEmitter::Buffer {
public:
  Emitter() = default;

  Emitter(uint8_t* Base, uint64_t BaseSize)
    : Buffer(Base, BaseSize) {}

  // Bind a backward label to an address.
  // Address that is bound is the current emitter location.
  [[nodiscard]] bool Bind(BackwardLabel* Label) {
    LOGMAN_THROW_A_FMT(Label->Location == nullptr, "Trying to bind a label twice");
    Label->Location = GetCursorAddress<uint8_t*>();

    // Always binds because it is only storing a location.
    return true;
  }

  [[nodiscard]] bool Bind(const ForwardLabel::Reference* Label) {
    uint8_t* CurrentAddress = GetCursorAddress<uint8_t*>();
    // Patch up the instructions
    switch (Label->Type) {
    case ForwardLabel::InstType::ADR: {
      uint32_t* Instruction = reinterpret_cast<uint32_t*>(Label->Location);
      int64_t Imm = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(Instruction);
      if (!IsADRRange(Imm)) {
        // Can't bind.
        return false;
      }
      uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5;
      uint32_t Offset = static_cast<uint32_t>(Imm) & 0x3F'FFFF;
      uint32_t Inst = *Instruction & ~InstMask;
      Inst |= (Offset & 0b11) << 29;
      Inst |= (Offset >> 2) << 5;
      *Instruction = Inst;
      break;
    }
    case ForwardLabel::InstType::ADRP: {
      uint32_t* Instruction = reinterpret_cast<uint32_t*>(Label->Location);
      int64_t Imm = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(Instruction);

      if (!(IsADRPRange(Imm) && IsADRPAligned(Imm))) {
        // Can't bind.
        return false;
      }

      Imm >>= 12;
      uint32_t InstMask = 0b11 << 29 | 0b1111'1111'1111'1111'111 << 5;
      uint32_t Offset = static_cast<uint32_t>(Imm) & 0x3F'FFFF;
      uint32_t Inst = *Instruction & ~InstMask;
      Inst |= (Offset & 0b11) << 29;
      Inst |= (Offset >> 2) << 5;
      *Instruction = Inst;
      break;
    }
    case ForwardLabel::InstType::B: {
      uint32_t* Instruction = reinterpret_cast<uint32_t*>(Label->Location);
      int64_t Imm = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(Instruction);
      if (!(Imm >= -134217728 && Imm <= 134217724 && ((Imm & 0b11) == 0))) {
        // Can't bind.
        return false;
      }
      Imm >>= 2;
      uint32_t InstMask = 0x3FF'FFFF;
      uint32_t Offset = static_cast<uint32_t>(Imm) & InstMask;
      uint32_t Inst = *Instruction & ~InstMask;
      Inst |= Offset;
      *Instruction = Inst;

      break;
    }
    case ForwardLabel::InstType::TEST_BRANCH: {
      uint32_t* Instruction = reinterpret_cast<uint32_t*>(Label->Location);
      int64_t Imm = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(Instruction);
      if (!(Imm >= -32768 && Imm <= 32764 && ((Imm & 0b11) == 0))) {
        // Can't bind.
        return false;
      }
      Imm >>= 2;
      uint32_t InstMask = 0x3FFF;
      uint32_t Offset = static_cast<uint32_t>(Imm) & InstMask;
      uint32_t Inst = *Instruction & ~(InstMask << 5);
      Inst |= Offset << 5;
      *Instruction = Inst;

      break;
    }
    case ForwardLabel::InstType::BC:
    case ForwardLabel::InstType::RELATIVE_LOAD: {
      uint32_t* Instruction = reinterpret_cast<uint32_t*>(Label->Location);
      int64_t Imm = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(Instruction);
      if (!(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0))) {
        // Can't bind.
        return false;
      }
      Imm >>= 2;
      uint32_t InstMask = 0x7'FFFF;
      uint32_t Offset = static_cast<uint32_t>(Imm) & InstMask;
      uint32_t Inst = *Instruction & ~(InstMask << 5);
      Inst |= Offset << 5;
      *Instruction = Inst;
      break;
    }
    case ForwardLabel::InstType::LONG_ADDRESS_GEN: {
      const auto* Instructions = reinterpret_cast<uint32_t*>(Label->Location);
      const auto ImmInstOne = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(&Instructions[0]);
      const auto ImmInstTwo = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(&Instructions[1]);
      const auto ImmInstThree = reinterpret_cast<int64_t>(CurrentAddress) - reinterpret_cast<int64_t>(&Instructions[2]);
      const auto OriginalOffset = GetCursorOffset();

      const auto InstOffset = GetCursorOffsetFromAddress(Instructions);
      SetCursorOffset(InstOffset);

      // We encoded the destination register in to the first instruction space.
      // Read it back.
      ARMEmitter::Register DestReg(Instructions[0]);

      if (IsADRRange(ImmInstThree)) {
        // If within ADR range from the third instruction, then we can emit NOP+NOP+ADR
        nop();
        nop();
        adr(DestReg, static_cast<uint32_t>(ImmInstThree) & 0x7FFF);
      } else if (IsADRPRange(ImmInstTwo)) {

        // If within ADRP range from the first instruction, then we are /definitely/ in range for the second instruction.
        // First check if we are in non-offset range for second instruction.
        if (IsADRPAligned(reinterpret_cast<uint64_t>(CurrentAddress))) {
          // We can emit nop + nop + adrp
          nop();
          nop();
          adrp(DestReg, static_cast<uint32_t>(ImmInstThree >> 12) & 0x7FFF);
        } else {
          // Not aligned, need nop + adrp + add
          nop();
          adrp(DestReg, static_cast<uint32_t>(ImmInstTwo >> 12) & 0x7FFF);
          add(ARMEmitter::Size::i64Bit, DestReg, DestReg, ImmInstTwo & 0xFFF);
        }
      } else {
        // Stinky path, we need to emit a movz+movk+movk sequence.
        movz(ARMEmitter::Size::i64Bit, DestReg, uint32_t(ImmInstOne >> 32) & 0x7FFF, 32);
        movk(ARMEmitter::Size::i64Bit, DestReg, uint32_t(ImmInstOne >> 16) & 0xFFFF, 16);
        movk(ARMEmitter::Size::i64Bit, DestReg, uint32_t(ImmInstOne) & 0xFFFF);
      }

      SetCursorOffset(OriginalOffset);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unexpected inst type in label fixup");
    }

    return true;
  }

  // Bind a forward label to a location.
  // This walks all the instructions in the label's vector.
  // Then backpatching all instructions that have used the label.
  [[nodiscard]] bool Bind(ForwardLabel* Label) {
    bool Bound = true;
    if (Label->FirstInst.Location) {
      Bound &= Bind(&Label->FirstInst);
    }
    for (auto& Inst : Label->Insts) {
      Bound &= Bind(&Inst);
    }

    return Bound;
  }

  // Bind a bidirectional location to a location.
  // Binds both forwards and backwards depending on how the label was used.
  [[nodiscard]] bool Bind(BiDirectionalLabel* Label) {
    bool Bound = true;
    if (!Label->Backward.Location) {
      Bound &= Bind(&Label->Backward);
    }
    Bound &= Bind(&Label->Forward);

    return Bound;
  }

  static constexpr Condition InvertCondition(Condition cond) {
    // These behave as always, so it makes no sense to allow inverting these.
    LOGMAN_THROW_A_FMT(cond != Condition::CC_AL && cond != Condition::CC_NV, "Cannot invert CC_AL or CC_NV");
    return static_cast<Condition>(FEXCore::ToUnderlying(cond) ^ 1);
  }

#include <CodeEmitter/VixlUtils.inl>

public:

// This symbol is used to allow external tooling (IDEs, clang-format, ...) to process the included files individually:
// If defined, the files will inject member functions into this class.
// If not, the files will wrap the member functions in a class so that tooling will process them properly.
#define INCLUDED_BY_EMITTER

  // TODO: Implement SME when it matters.
#include <CodeEmitter/ALUOps.inl>
#include <CodeEmitter/BranchOps.inl>
#include <CodeEmitter/LoadstoreOps.inl>
#include <CodeEmitter/SystemOps.inl>
#include <CodeEmitter/ScalarOps.inl>
#include <CodeEmitter/ASIMDOps.inl>
#include <CodeEmitter/SVEOps.inl>

#undef INCLUDED_BY_EMITTER

protected:
  template<typename T>
  uint32_t Encode_ra(T Reg) const {
    return Reg.Idx() << 10;
  }
  uint32_t Encode_ra(uint32_t Reg) const {
    return Reg << 10;
  }
  template<typename T>
  uint32_t Encode_rt2(T Reg) const {
    return Reg.Idx() << 10;
  }
  uint32_t Encode_rt2(uint32_t Reg) const {
    return Reg << 10;
  }
  template<typename T>
  uint32_t Encode_rm(T Reg) const {
    return Reg.Idx() << 16;
  }
  uint32_t Encode_rm(uint32_t Reg) const {
    return Reg << 16;
  }
  template<typename T>
  uint32_t Encode_rs(T Reg) const {
    return Reg.Idx() << 16;
  }
  uint32_t Encode_rs(uint32_t Reg) const {
    return Reg << 16;
  }
  template<typename T>
  uint32_t Encode_rn(T Reg) const {
    return Reg.Idx() << 5;
  }
  uint32_t Encode_rn(uint32_t Reg) const {
    return Reg << 5;
  }
  template<typename T>
  uint32_t Encode_rd(T Reg) const {
    return Reg.Idx();
  }
  uint32_t Encode_rd(uint32_t Reg) const {
    return Reg;
  }
  template<typename T>
  uint32_t Encode_rt(T Reg) const {
    return Reg.Idx();
  }
  uint32_t Encode_rt(Prefetch Reg) const {
    return FEXCore::ToUnderlying(Reg);
  }
  uint32_t Encode_rt(uint32_t Reg) const {
    return Reg;
  }
  template<typename T>
  uint32_t Encode_pd(T Reg) const {
    return FEXCore::ToUnderlying(Reg);
  }
};
} // namespace ARMEmitter


================================================
FILE: CodeEmitter/CodeEmitter/LoadstoreOps.inl
================================================
// SPDX-License-Identifier: MIT
/* Load-store instruction emitters
 *
 * For GPR load-stores that take a `Size` argument as their first argument can be 32-bit or 64-bit.
 * For GPR load-stores that don't take a `Size` argument, then their operating size is determined by the name of the instruction.
 *
 * For Vector load-stores, most take a `SubRegSize` to determine the size of the elements getting loaded or stored.
 * Depending on the instruction it can be an single element or the full instruction, it depends on the instruction.
 *
 * There are some load-store helper functions which take a `ExtendedMemOperand` argument.
 * This helper will select the viable load-store that can work with the provided encapsulated arguments.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // Compare and swap pair
  void casp(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rs2, ARMEmitter::Register rt, ARMEmitter::Register rt2,
            ARMEmitter::Register rn) {
    LOGMAN_THROW_A_FMT((rs.Idx() + 1) == rs2.Idx(), "These must be sequential");
    LOGMAN_THROW_A_FMT((rt.Idx() + 1) == rt2.Idx(), "These must be sequential");
    constexpr uint32_t Op = 0b0000'1000'001 << 21;
    AtomicOp(Op, s, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void caspa(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rs2, ARMEmitter::Register rt, ARMEmitter::Register rt2,
             ARMEmitter::Register rn) {
    LOGMAN_THROW_A_FMT((rs.Idx() + 1) == rs2.Idx(), "These must be sequential");
    LOGMAN_THROW_A_FMT((rt.Idx() + 1) == rt2.Idx(), "These must be sequential");
    constexpr uint32_t Op = 0b0000'1000'001 << 21;
    AtomicOp(Op, s, 1, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void caspl(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rs2, ARMEmitter::Register rt, ARMEmitter::Register rt2,
             ARMEmitter::Register rn) {
    LOGMAN_THROW_A_FMT((rs.Idx() + 1) == rs2.Idx(), "These must be sequential");
    LOGMAN_THROW_A_FMT((rt.Idx() + 1) == rt2.Idx(), "These must be sequential");
    constexpr uint32_t Op = 0b0000'1000'001 << 21;
    AtomicOp(Op, s, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void caspal(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rs2, ARMEmitter::Register rt, ARMEmitter::Register rt2,
              ARMEmitter::Register rn) {
    LOGMAN_THROW_A_FMT((rs.Idx() + 1) == rs2.Idx(), "These must be sequential");
    LOGMAN_THROW_A_FMT((rt.Idx() + 1) == rt2.Idx(), "These must be sequential");
    constexpr uint32_t Op = 0b0000'1000'001 << 21;
    AtomicOp(Op, s, 1, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }

  // Advanced SIMD load/store multiple structures
  template<SubRegSize size, typename T>
  void ld1(T rt, Register rn) {
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, T rt4, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, Register rn) {
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, T rt4, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld2(T rt, T rt2, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st2(T rt, T rt2, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld3(T rt, T rt2, T rt3, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st3(T rt, T rt2, T rt3, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void ld4(T rt, T rt2, T rt3, T rt4, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size, typename T>
  void st4(T rt, T rt2, T rt3, T rt4, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1100'000 << 21;
    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(Op, Opcode, rt, rn, Reg::r0);
  }
  // Advanced SIMD load/store multiple structures (post-indexed)
  static constexpr uint32_t ASIMDLoadstoreMultiplePost_Op = 0b0000'1100'100 << 21;
  template<SubRegSize size, typename T>
  void ld1(T rt, Register rn, Register rm) {
    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 16)) || (std::is_same_v<DRegister, T> && (PostOffset == 8)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 32)) || (std::is_same_v<DRegister, T> && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 48)) || (std::is_same_v<DRegister, T> && (PostOffset == 24)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, T rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld1(T rt, T rt2, T rt3, T rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 64)) || (std::is_same_v<DRegister, T> && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }

  template<SubRegSize size, typename T>
  void st1(T rt, Register rn, Register rm) {
    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 16)) || (std::is_same_v<DRegister, T> && (PostOffset == 8)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0111 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 32)) || (std::is_same_v<DRegister, T> && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b1010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 48)) || (std::is_same_v<DRegister, T> && (PostOffset == 24)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0110 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, T rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st1(T rt, T rt2, T rt3, T rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 64)) || (std::is_same_v<DRegister, T> && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0010 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }

  template<SubRegSize size, typename T>
  void ld2(T rt, T rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld2(T rt, T rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 32)) || (std::is_same_v<DRegister, T> && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st2(T rt, T rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st2(T rt, T rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 32)) || (std::is_same_v<DRegister, T> && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b1000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void ld3(T rt, T rt2, T rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld3(T rt, T rt2, T rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 48)) || (std::is_same_v<DRegister, T> && (PostOffset == 24)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st3(T rt, T rt2, T rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st3(T rt, T rt2, T rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 48)) || (std::is_same_v<DRegister, T> && (PostOffset == 24)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0100 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void ld4(T rt, T rt2, T rt3, T rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void ld4(T rt, T rt2, T rt3, T rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 64)) || (std::is_same_v<DRegister, T> && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, true>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }
  template<SubRegSize size, typename T>
  void st4(T rt, T rt2, T rt3, T rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, rm);
  }
  template<SubRegSize size, typename T>
  void st4(T rt, T rt2, T rt3, T rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT((std::is_same_v<QRegister, T> && (PostOffset == 64)) || (std::is_same_v<DRegister, T> && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Opcode = 0b0000 << 12;
    ASIMDLoadStoreMultipleStructure<size, false>(ASIMDLoadstoreMultiplePost_Op, Opcode, rt, rn, Reg::r31);
  }

  // ASIMD loadstore single
  template<SubRegSize size>
  void st1(VRegister rt, uint32_t Index, Register rn) {
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 1>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size>
  void st2(VRegister rt, VRegister rt2, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 2>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size>
  void st3(VRegister rt, VRegister rt2, VRegister rt3, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 3>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size>
  void st4(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 4>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size>
  void ld1(VRegister rt, uint32_t Index, Register rn) {
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size, IsQOrDRegister T>
  void ld1r(T rt, Register rn) {
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size>
  void ld2(VRegister rt, VRegister rt2, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size, IsQOrDRegister T>
  void ld2r(T rt, T rt2, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size>
  void ld3(VRegister rt, VRegister rt2, VRegister rt3, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size, IsQOrDRegister T>
  void ld3r(T rt, T rt2, T rt3, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, rn, Reg::r0);
  }
  template<SubRegSize size>
  void ld4(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, uint32_t Index, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, Index, rn, Reg::r0);
  }
  template<SubRegSize size, IsQOrDRegister T>
  void ld4r(T rt, T rt2, T rt3, T rt4, Register rn) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'000 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, rn, Reg::r0);
  }

  // ASIMD loadstore single post-indexed
  template<SubRegSize size>
  void st1(VRegister rt, uint32_t Index, Register rn, Register rm) {
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 1>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void st1(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 1)) || (size == SubRegSize::i16Bit && (PostOffset == 2)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 4)) || (size == SubRegSize::i64Bit && (PostOffset == 8)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 1>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void st2(VRegister rt, VRegister rt2, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 2>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void st2(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 2)) || (size == SubRegSize::i16Bit && (PostOffset == 4)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 8)) || (size == SubRegSize::i64Bit && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 2>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void st3(VRegister rt, VRegister rt2, VRegister rt3, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 3>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void st3(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 3)) || (size == SubRegSize::i16Bit && (PostOffset == 6)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 8)) || (size == SubRegSize::i64Bit && (PostOffset == 24)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 3>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void st4(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 4>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void st4(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 4)) || (size == SubRegSize::i16Bit && (PostOffset == 8)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 16)) || (size == SubRegSize::i64Bit && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");

    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, false, 4>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld1(VRegister rt, uint32_t Index, Register rn, Register rm) {
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void ld1(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 1)) || (size == SubRegSize::i16Bit && (PostOffset == 2)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 4)) || (size == SubRegSize::i64Bit && (PostOffset == 8)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld1r(VRegister rt, Register rn, Register rm) {
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, 0, rn, rm);
  }
  template<SubRegSize size>
  void ld1r(VRegister rt, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 1)) || (size == SubRegSize::i16Bit && (PostOffset == 2)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 4)) || (size == SubRegSize::i64Bit && (PostOffset == 8)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 1>(Op, Opcode, rt, 0, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld2(VRegister rt, VRegister rt2, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void ld2(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 2)) || (size == SubRegSize::i16Bit && (PostOffset == 4)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 8)) || (size == SubRegSize::i64Bit && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld2r(VRegister rt, VRegister rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, 0, rn, rm);
  }
  template<SubRegSize size>
  void ld2r(VRegister rt, VRegister rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 2)) || (size == SubRegSize::i16Bit && (PostOffset == 4)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 8)) || (size == SubRegSize::i64Bit && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 2>(Op, Opcode, rt, 0, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld3(VRegister rt, VRegister rt2, VRegister rt3, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void ld3(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 3)) || (size == SubRegSize::i16Bit && (PostOffset == 6)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 12)) || (size == SubRegSize::i64Bit && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld3r(VRegister rt, VRegister rt2, VRegister rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, 0, rn, rm);
  }
  template<SubRegSize size>
  void ld3r(VRegister rt, VRegister rt2, VRegister rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 3)) || (size == SubRegSize::i16Bit && (PostOffset == 6)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 12)) || (size == SubRegSize::i64Bit && (PostOffset == 16)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 3>(Op, Opcode, rt, 0, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld4(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, Index, rn, rm);
  }
  template<SubRegSize size>
  void ld4(VRegister rt, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 4)) || (size == SubRegSize::i16Bit && (PostOffset == 8)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 16)) || (size == SubRegSize::i64Bit && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode =
        size == SubRegSize::i8Bit  ? 0b000 : // Scale = 0
        size == SubRegSize::i16Bit ? 0b010 : // Scale = 1
        size == SubRegSize::i32Bit ? 0b100 : // Scale = 2
        size == SubRegSize::i64Bit ? 0b100 : // Scale = 2 (Uses size to determine difference between 32-bit).
        0;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, Index, rn, Reg::r31);
  }
  template<SubRegSize size>
  void ld4r(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, 0, rn, rm);
  }
  template<SubRegSize size>
  void ld4r(VRegister rt, VRegister rt2, VRegister rt3, VRegister rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT((size == SubRegSize::i8Bit && (PostOffset == 4)) || (size == SubRegSize::i16Bit && (PostOffset == 8)) ||
                         (size == SubRegSize::i32Bit && (PostOffset == 16)) || (size == SubRegSize::i64Bit && (PostOffset == 32)),
                       "Post-index offset needs to match number of elements times their size");
    constexpr uint32_t Op = 0b0000'1101'100 << 21;
    constexpr uint32_t Opcode = 0b110;
    ASIMDSTLD<size, true, 4>(Op, Opcode, rt, 0, rn, Reg::r31);
  }

  // Advanced SIMD load/store single structure (post-indexed)
  template<typename T>
  void st1(ARMEmitter::SubRegSize size, T rt, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == SubRegSizeInBits(size), "Post-Index size must match element size");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, ARMEmitter::Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void ld1(ARMEmitter::SubRegSize size, T rt, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == SubRegSizeInBits(size), "Post-Index size must match element size");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, ARMEmitter::Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void ld1r(ARMEmitter::SubRegSize size, T rt, ARMEmitter::Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(PostOffset == 1 || PostOffset == 2 || PostOffset == 4 || PostOffset == 8, "Index too large");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<ARMEmitter::QRegister, T> ? 1 : 0;
    uint32_t R = 0;
    uint32_t opcode = 0b110;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, ARMEmitter::Reg::r31, rn, rt);
  }

  template<typename T>
  void ld2r(SubRegSize size, T rt, T rt2, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    LOGMAN_THROW_A_FMT(PostOffset == 2 || PostOffset == 4 || PostOffset == 8 || PostOffset == 16, "Index too large");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 1;
    uint32_t opcode = 0b110;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt);
  }

  template<typename T>
  void ld3r(SubRegSize size, T rt, T rt2, T rt3, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    LOGMAN_THROW_A_FMT(PostOffset == 3 || PostOffset == 6 || PostOffset == 12 || PostOffset == 24, "Index too large");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 0;
    uint32_t opcode = 0b111;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt);
  }
  template<typename T>
  void ld4r(SubRegSize size, T rt, T rt2, T rt3, T rt4, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    LOGMAN_THROW_A_FMT(PostOffset == 4 || PostOffset == 8 || PostOffset == 16 || PostOffset == 32, "Index too large");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 1;
    uint32_t opcode = 0b111;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt);
  }

  template<typename T>
  void st2(SubRegSize size, T rt, T rt2, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 2), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void ld2(SubRegSize size, T rt, T rt2, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 2), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void st3(SubRegSize size, T rt, T rt2, T rt3, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 3), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void ld3(SubRegSize size, T rt, T rt2, T rt3, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 3), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void st4(SubRegSize size, T rt, T rt2, T rt3, T rt4, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 4), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }
  template<typename T>
  void ld4(SubRegSize size, T rt, T rt2, T rt3, T rt4, uint32_t Index, Register rn, uint32_t PostOffset) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT((PostOffset * 8) == (SubRegSizeInBits(size) * 4), "Post-Index size must match element size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, Reg::r31, rn, rt.Q());
  }

  template<typename T>
  void st1(ARMEmitter::SubRegSize size, T rt, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void ld1(ARMEmitter::SubRegSize size, T rt, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void ld1r(SubRegSize size, T rt, Register rn, Register rm) {
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 0;
    uint32_t opcode = 0b110;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, rm, rn, rt);
  }

  template<typename T>
  void ld2r(SubRegSize size, T rt, T rt2, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 1;
    uint32_t opcode = 0b110;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, rm, rn, rt);
  }

  template<typename T>
  void ld3r(SubRegSize size, T rt, T rt2, T rt3, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 0;
    uint32_t opcode = 0b111;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, rm, rn, rt);
  }
  template<typename T>
  void ld4r(SubRegSize size, T rt, T rt2, T rt3, T rt4, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");
    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    constexpr uint32_t Q = std::is_same_v<QRegister, T> ? 1 : 0;
    uint32_t R = 1;
    uint32_t opcode = 0b111;
    uint32_t S = 0;
    uint32_t Size = FEXCore::ToUnderlying(size);
    ASIMDLoadStoreSinglePost<T>(Op, Q, 1, R, opcode, S, Size, rm, rn, rt);
  }

  template<typename T>
  void st2(SubRegSize size, T rt, T rt2, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void ld2(SubRegSize size, T rt, T rt2, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2), "rt and rt2 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b000;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b010;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b100;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b100;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void st3(SubRegSize size, T rt, T rt2, T rt3, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void ld3(SubRegSize size, T rt, T rt2, T rt3, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3), "rt, rt2, and rt3 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 0;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void st4(SubRegSize size, T rt, T rt2, T rt3, T rt4, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 0, R, opcode, S, Size, rm, rn, rt.Q());
  }
  template<typename T>
  void ld4(SubRegSize size, T rt, T rt2, T rt3, T rt4, uint32_t Index, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit,
                       "Incorrect size");
    LOGMAN_THROW_A_FMT(AreVectorsSequential(rt, rt2, rt3, rt4), "rt, rt2, rt3, and rt4 must be sequential");

    constexpr uint32_t Op = 0b0000'1101'1 << 23;
    uint32_t Q;
    uint32_t R = 1;
    uint32_t opcode;
    uint32_t S;
    uint32_t Size;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(Index < 16, "Index too large");
      Q = Index >> 3;
      S = (Index >> 2) & 1;
      opcode = 0b001;
      Size = Index & 0b11;
    } else if (size == SubRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(Index < 8, "Index too large");
      Q = Index >> 2;
      S = (Index >> 1) & 1;
      opcode = 0b011;
      Size = (Index & 0b1) << 1;
    } else if (size == SubRegSize::i32Bit) {
      LOGMAN_THROW_A_FMT(Index < 4, "Index too large");
      Q = Index >> 1;
      S = Index & 1;
      opcode = 0b101;
      Size = 0b00;
    } else if (size == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(Index < 2, "Index too large");
      Q = Index;
      S = 0;
      opcode = 0b101;
      Size = 0b01;
    } else {
      LOGMAN_MSG_A_FMT("Unknown size");
      FEX_UNREACHABLE;
    }

    ASIMDLoadStoreSinglePost(Op, Q, 1, R, opcode, S, Size, rm, rn, rt.Q());
  }

  template<ARMEmitter::SubRegSize size, typename T>
  void st1(T rt, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    st1(size, rt, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld1(T rt, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld1(size, rt, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld1r(T rt, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld1r(size, rt, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld2r(T rt, T rt2, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld2r(size, rt, rt2, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld3r(T rt, T rt2, T rt3, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld3r(size, rt, rt2, rt3, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld4r(T rt, T rt2, T rt3, T rt4, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld4r(size, rt, rt2, rt3, rt4, rn, PostOffset);
  }

  template<ARMEmitter::SubRegSize size, typename T>
  void st2(T rt, T rt2, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    st2(size, rt, rt2, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld2(T rt, T rt2, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld2(size, rt, rt2, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void st3(T rt, T rt2, T rt3, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    st3(size, rt, rt2, rt3, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld3(T rt, T rt2, T rt3, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld3(size, rt, rt2, rt3, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void st4(T rt, T rt2, T rt3, T rt4, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    st4(size, rt, rt2, rt3, rt4, Index, rn, PostOffset);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld4(T rt, T rt2, T rt3, T rt4, uint32_t Index, ARMEmitter::Register rn, uint32_t PostOffset) {
    ld4(size, rt, rt2, rt3, rt4, Index, rn, PostOffset);
  }

  template<ARMEmitter::SubRegSize size, typename T>
  void st1(T rt, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    st1(size, rt, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld1(T rt, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld1(size, rt, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld1r(T rt, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld1r(size, rt, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld2r(T rt, T rt2, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld2r(size, rt, rt2, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld3r(T rt, T rt2, T rt3, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld3r(size, rt, rt2, rt3, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld4r(T rt, T rt2, T rt3, T rt4, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld4r(size, rt, rt2, rt3, rt4, rn, rm);
  }

  template<ARMEmitter::SubRegSize size, typename T>
  void st2(T rt, T rt2, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    st2(size, rt, rt2, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld2(T rt, T rt2, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld2(size, rt, rt2, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void st3(T rt, T rt2, T rt3, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    st3(size, rt, rt2, rt3, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld3(T rt, T rt2, T rt3, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld3(size, rt, rt2, rt3, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void st4(T rt, T rt2, T rt3, T rt4, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    st4(size, rt, rt2, rt3, rt4, Index, rn, rm);
  }
  template<ARMEmitter::SubRegSize size, typename T>
  void ld4(T rt, T rt2, T rt3, T rt4, uint32_t Index, ARMEmitter::Register rn, ARMEmitter::Register rm) {
    ld4(size, rt, rt2, rt3, rt4, Index, rn, rm);
  }

  template<typename T>
  void ASIMDLoadStoreSinglePost(uint32_t Op, uint32_t Q, uint32_t L, uint32_t R, uint32_t opcode, uint32_t S, uint32_t size,
                                ARMEmitter::Register rm, ARMEmitter::Register rn, T rt) {
    LOGMAN_THROW_A_FMT((std::is_same_v<ARMEmitter::QRegister, T> || std::is_same_v<ARMEmitter::DRegister, T>), "Only supports 128-bit and "
                                                                                                               "64-bit vector registers.");
    uint32_t Instr = Op;

    Instr |= Q << 30;
    Instr |= L << 22;
    Instr |= R << 21;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 13;
    Instr |= S << 12;
    Instr |= size << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }
  // Loadstore exclusive pair
  void stxp(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rt2, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1000'1000'001 << 21;
    AtomicOp(Op, s, 0, 0, rs, rt, rt2, rn);
  }
  void stlxp(ARMEmitter::Size s, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rt2, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1000'1000'001 << 21;
    AtomicOp(Op, s, 0, 1, rs, rt, rt2, rn);
  }
  void ldxp(ARMEmitter::Size s, ARMEmitter::Register rt, ARMEmitter::Register rt2, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1000'1000'001 << 21;
    AtomicOp(Op, s, 1, 0, ARMEmitter::Reg::r31, rt, rt2, rn);
  }
  void ldaxp(ARMEmitter::Size s, ARMEmitter::Register rt, ARMEmitter::Register rt2, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b1000'1000'001 << 21;
    AtomicOp(Op, s, 1, 1, ARMEmitter::Reg::r31, rt, rt2, rn);
  }
  // Loadstore exclusive register
  void stxrb(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void stlxrb(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldxrb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 1, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldaxrb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 1, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stxrh(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void stlxrh(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldxrh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 1, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldaxrh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 1, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stxr(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 0, 0, rs, rt, ARMEmitter::WReg::w31, rn);
  }
  void stlxr(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 0, 1, rs, rt, ARMEmitter::WReg::w31, rn);
  }
  void ldxr(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 1, 0, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void ldaxr(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 1, 1, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void stxr(ARMEmitter::XRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 0, 0, rs, rt, ARMEmitter::XReg::x31, rn);
  }
  void stlxr(ARMEmitter::WRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 0, 1, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void ldxr(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 1, 0, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  void ldaxr(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 1, 1, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  void stxr(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, size, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void stlxr(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, size, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldxr(ARMEmitter::SubRegSize size, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, size, 1, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldaxr(ARMEmitter::SubRegSize size, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'000 << 21;
    SubAtomicOp(Op, size, 1, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }

  // Load/store ordered
  static constexpr uint32_t LoadStoreOrdered_Op = 0b0000'1000'100 << 21;
  void stllrb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i8Bit, 0, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stlrb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i8Bit, 0, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldlarb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i8Bit, 1, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldarb(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i8Bit, 1, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stllrh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i16Bit, 0, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stlrh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i16Bit, 0, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldlarh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i16Bit, 1, 0, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void ldarh(ARMEmitter::Register rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i16Bit, 1, 1, ARMEmitter::Reg::r31, rt, ARMEmitter::Reg::r31, rn);
  }
  void stllr(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i32Bit, 0, 0, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void stlr(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i32Bit, 0, 1, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void ldlar(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i32Bit, 1, 0, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void ldar(ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i32Bit, 1, 1, ARMEmitter::WReg::w31, rt, ARMEmitter::WReg::w31, rn);
  }
  void stllr(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i64Bit, 0, 0, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  void stlr(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i64Bit, 0, 1, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  void ldlar(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i64Bit, 1, 0, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  void ldar(ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    SubAtomicOp(LoadStoreOrdered_Op, ARMEmitter::SubRegSize::i64Bit, 1, 1, ARMEmitter::XReg::x31, rt, ARMEmitter::XReg::x31, rn);
  }
  // Compare and swap
  void casb(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void caslb(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casab(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 1, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casalb(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i8Bit, 1, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void cash(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void caslh(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casah(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 1, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casalh(ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i16Bit, 1, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void cas(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 0, 0, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casl(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 0, 1, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casa(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 1, 0, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casal(ARMEmitter::WRegister rs, ARMEmitter::WRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i32Bit, 1, 1, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void cas(ARMEmitter::XRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 0, 0, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casl(ARMEmitter::XRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 0, 1, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casa(ARMEmitter::XRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 1, 0, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }
  void casal(ARMEmitter::XRegister rs, ARMEmitter::XRegister rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, ARMEmitter::SubRegSize::i64Bit, 1, 1, rs.R(), rt.R(), ARMEmitter::Reg::r31, rn);
  }

  void cas(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, size, 0, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casl(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, size, 0, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casa(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, size, 1, 0, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  void casal(ARMEmitter::SubRegSize size, ARMEmitter::Register rs, ARMEmitter::Register rt, ARMEmitter::Register rn) {
    constexpr uint32_t Op = 0b0000'1000'101 << 21;
    SubAtomicOp(Op, size, 1, 1, rs, rt, ARMEmitter::Reg::r31, rn);
  }
  // LDAPR/STLR unscaled immediate
  void stlurb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i8Bit, 0b00, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapurb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i8Bit, 0b01, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapursb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i8Bit, 0b11, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapursb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i8Bit, 0b10, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void stlurh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i16Bit, 0b00, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapurh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i16Bit, 0b01, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapursh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i16Bit, 0b11, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapursh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i16Bit, 0b10, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void stlur(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i32Bit, 0b00, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapur(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i32Bit, 0b01, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapursw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i32Bit, 0b10, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void stlur(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i64Bit, 0b00, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  void ldapur(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1001'000 << 21;
    SubAtomicImm(Op, ARMEmitter::SubRegSize::i64Bit, 0b01, rt, rn, static_cast<uint32_t>(Imm) & 0x1'FF);
  }
  // Load register literal
  void ldr(ARMEmitter::WRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::SRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::XRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0101'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::DRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0101'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldrs(ARMEmitter::WRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1001'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::QRegister rt, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1001'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void prfm(ARMEmitter::Prefetch prfop, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1101'1000 << 24;
    LoadStoreLiteral(Op, prfop, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::WRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::SRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0001'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::XRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0101'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::DRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0101'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldrsw(ARMEmitter::XRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1001'1000 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void ldr(ARMEmitter::QRegister rt, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1001'1100 << 24;
    LoadStoreLiteral(Op, rt, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }
  void prfm(ARMEmitter::Prefetch prfop, const BackwardLabel* Label) {
    int32_t Imm = static_cast<int32_t>(Label->Location - GetCursorAddress<uint8_t*>());
    LOGMAN_THROW_A_FMT(Imm >= -1048576 && Imm <= 1048575 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1101'1000 << 24;
    LoadStoreLiteral(Op, prfop, static_cast<uint32_t>(Imm >> 2) & 0x7'FFFF);
  }

  void ldr(ARMEmitter::WRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b0001'1000 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void ldr(ARMEmitter::SRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b0001'1100 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void ldr(ARMEmitter::XRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b0101'1000 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void ldr(ARMEmitter::DRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b0101'1100 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void ldrsw(ARMEmitter::XRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b1001'1000 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void ldr(ARMEmitter::QRegister rt, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b1001'1100 << 24;
    LoadStoreLiteral(Op, rt, 0);
  }

  void prfm(ARMEmitter::Prefetch prfop, ForwardLabel* Label) {
    AddLocationToLabel(Label, ForwardLabel::Reference {.Location = GetCursorAddress<uint8_t*>(), .Type = ForwardLabel::InstType::RELATIVE_LOAD});
    constexpr uint32_t Op = 0b1101'1000 << 24;
    LoadStoreLiteral(Op, prfop, 0);
  }

  void ldr(ARMEmitter::WRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void ldr(ARMEmitter::SRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void ldr(ARMEmitter::XRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void ldr(ARMEmitter::DRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void ldrs(ARMEmitter::WRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void ldr(ARMEmitter::QRegister rt, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      ldr(rt, &Label->Backward);
    } else {
      ldr(rt, &Label->Forward);
    }
  }
  void prfm(ARMEmitter::Prefetch prfop, BiDirectionalLabel* Label) {
    if (Label->Backward.Location) {
      prfm(prfop, &Label->Backward);
    } else {
      prfm(prfop, &Label->Forward);
    }
  }

  // Memory copy/set
  void cpyfp(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0000, rs, rn, rd);
  }
  void cpyfm(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0000, rs, rn, rd);
  }
  void cpyfe(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0000, rs, rn, rd);
  }
  void cpyfpwt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0001, rs, rn, rd);
  }
  void cpyfmwt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0001, rs, rn, rd);
  }
  void cpyfewt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0001, rs, rn, rd);
  }
  void cpyfprt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0010, rs, rn, rd);
  }
  void cpyfmrt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0010, rs, rn, rd);
  }
  void cpyfert(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0010, rs, rn, rd);
  }
  void cpyfpt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0011, rs, rn, rd);
  }
  void cpyfmt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0011, rs, rn, rd);
  }
  void cpyfet(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0011, rs, rn, rd);
  }
  void cpyfpwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0100, rs, rn, rd);
  }
  void cpyfmwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0100, rs, rn, rd);
  }
  void cpyfewn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0100, rs, rn, rd);
  }
  void cpyfpwtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0101, rs, rn, rd);
  }
  void cpyfmwtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0101, rs, rn, rd);
  }
  void cpyfewtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0101, rs, rn, rd);
  }
  void cpyfprtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0110, rs, rn, rd);
  }
  void cpyfmrtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0110, rs, rn, rd);
  }
  void cpyfertwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0110, rs, rn, rd);
  }
  void cpyfptwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b0111, rs, rn, rd);
  }
  void cpyfmtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b0111, rs, rn, rd);
  }
  void cpyfetwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b0111, rs, rn, rd);
  }
  void cpyfprn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1000, rs, rn, rd);
  }
  void cpyfmrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1000, rs, rn, rd);
  }
  void cpyfern(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1000, rs, rn, rd);
  }
  void cpyfpwtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1001, rs, rn, rd);
  }
  void cpyfmwtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1001, rs, rn, rd);
  }
  void cpyfewtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1001, rs, rn, rd);
  }
  void cpyfprtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1010, rs, rn, rd);
  }
  void cpyfmrtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1010, rs, rn, rd);
  }
  void cpyfertrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1010, rs, rn, rd);
  }
  void cpyfptrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1011, rs, rn, rd);
  }
  void cpyfmtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1011, rs, rn, rd);
  }
  void cpyfetrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1011, rs, rn, rd);
  }
  void cpyfpn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1100, rs, rn, rd);
  }
  void cpyfmn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1100, rs, rn, rd);
  }
  void cpyfen(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1100, rs, rn, rd);
  }
  void cpyfpwtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1101, rs, rn, rd);
  }
  void cpyfmwtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1101, rs, rn, rd);
  }
  void cpyfewtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1101, rs, rn, rd);
  }
  void cpyfprtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1110, rs, rn, rd);
  }
  void cpyfmrtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1110, rs, rn, rd);
  }
  void cpyfertn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1110, rs, rn, rd);
  }
  void cpyfptn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b00, 0b1111, rs, rn, rd);
  }
  void cpyfmtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b01, 0b1111, rs, rn, rd);
  }
  void cpyfetn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 0, 0b10, 0b1111, rs, rn, rd);
  }

  void setp(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0000, rs, rn, rd);
  }
  void setm(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0100, rs, rn, rd);
  }
  void sete(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b1000, rs, rn, rd);
  }
  void setpt(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0001, rs, rn, rd);
  }
  void setmt(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0101, rs, rn, rd);
  }
  void setet(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b1001, rs, rn, rd);
  }
  void setpn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0010, rs, rn, rd);
  }
  void setmn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0110, rs, rn, rd);
  }
  void seten(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b1010, rs, rn, rd);
  }
  void setptn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0011, rs, rn, rd);
  }
  void setmtn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b0111, rs, rn, rd);
  }
  void setetn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 0, 0b11, 0b1011, rs, rn, rd);
  }

  void cpyp(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0000, rs, rn, rd);
  }
  void cpym(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0000, rs, rn, rd);
  }
  void cpye(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0000, rs, rn, rd);
  }
  void cpypwt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0001, rs, rn, rd);
  }
  void cpymwt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0001, rs, rn, rd);
  }
  void cpyewt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0001, rs, rn, rd);
  }
  void cpyprt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0010, rs, rn, rd);
  }
  void cpymrt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0010, rs, rn, rd);
  }
  void cpyert(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0010, rs, rn, rd);
  }
  void cpypt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0011, rs, rn, rd);
  }
  void cpymt(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0011, rs, rn, rd);
  }
  void cpyet(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0011, rs, rn, rd);
  }
  void cpypwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0100, rs, rn, rd);
  }
  void cpymwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0100, rs, rn, rd);
  }
  void cpyewn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0100, rs, rn, rd);
  }
  void cpypwtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0101, rs, rn, rd);
  }
  void cpymwtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0101, rs, rn, rd);
  }
  void cpyewtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0101, rs, rn, rd);
  }
  void cpyprtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0110, rs, rn, rd);
  }
  void cpymrtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0110, rs, rn, rd);
  }
  void cpyertwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0110, rs, rn, rd);
  }
  void cpyptwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b0111, rs, rn, rd);
  }
  void cpymtwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b0111, rs, rn, rd);
  }
  void cpyetwn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b0111, rs, rn, rd);
  }
  void cpyprn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1000, rs, rn, rd);
  }
  void cpymrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1000, rs, rn, rd);
  }
  void cpyern(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1000, rs, rn, rd);
  }
  void cpypwtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1001, rs, rn, rd);
  }
  void cpymwtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1001, rs, rn, rd);
  }
  void cpyewtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1001, rs, rn, rd);
  }
  void cpyprtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1010, rs, rn, rd);
  }
  void cpymrtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1010, rs, rn, rd);
  }
  void cpyertrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1010, rs, rn, rd);
  }
  void cpyptrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1011, rs, rn, rd);
  }
  void cpymtrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1011, rs, rn, rd);
  }
  void cpyetrn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1011, rs, rn, rd);
  }
  void cpypn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1100, rs, rn, rd);
  }
  void cpymn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1100, rs, rn, rd);
  }
  void cpyen(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1100, rs, rn, rd);
  }
  void cpypwtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1101, rs, rn, rd);
  }
  void cpymwtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1101, rs, rn, rd);
  }
  void cpyewtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1101, rs, rn, rd);
  }
  void cpyprtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1110, rs, rn, rd);
  }
  void cpymrtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1110, rs, rn, rd);
  }
  void cpyertn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1110, rs, rn, rd);
  }
  void cpyptn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b00, 0b1111, rs, rn, rd);
  }
  void cpymtn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b01, 0b1111, rs, rn, rd);
  }
  void cpyetn(Register rd, Register rs, Register rn) {
    MemoryCopyAndMemorySet(0, 1, 0b10, 0b1111, rs, rn, rd);
  }

  void setgp(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0000, rs, rn, rd);
  }
  void setgm(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0100, rs, rn, rd);
  }
  void setge(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b1000, rs, rn, rd);
  }
  void setgpt(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0001, rs, rn, rd);
  }
  void setgmt(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0101, rs, rn, rd);
  }
  void setget(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b1001, rs, rn, rd);
  }
  void setgpn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0010, rs, rn, rd);
  }
  void setgmn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0110, rs, rn, rd);
  }
  void setgen(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b1010, rs, rn, rd);
  }
  void setgptn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0011, rs, rn, rd);
  }
  void setgmtn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b0111, rs, rn, rd);
  }
  void setgetn(Register rd, Register rn, Register rs) {
    MemoryCopyAndMemorySet(0, 1, 0b11, 0b1011, rs, rn, rd);
  }

  // Loadstore no-allocate pair
  void stnp(ARMEmitter::WRegister rt, ARMEmitter::WRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0010'1000'00 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 2) & 0b111'1111);
  }
  void ldnp(ARMEmitter::WRegister rt, ARMEmitter::WRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0010'1000'01 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 2) & 0b111'1111);
  }
  void stnp(ARMEmitter::SRegister rt, ARMEmitter::SRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0010'1100'00 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 2) & 0b111'1111);
  }
  void ldnp(ARMEmitter::SRegister rt, ARMEmitter::SRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0010'1100'01 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 2) & 0b111'1111);
  }
  void stnp(ARMEmitter::XRegister rt, ARMEmitter::XRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1010'1000'00 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 3) & 0b111'1111);
  }
  void ldnp(ARMEmitter::XRegister rt, ARMEmitter::XRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1010'1000'01 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 3) & 0b111'1111);
  }
  void stnp(ARMEmitter::DRegister rt, ARMEmitter::DRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0110'1100'00 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 3) & 0b111'1111);
  }
  void ldnp(ARMEmitter::DRegister rt, ARMEmitter::DRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b0110'1100'01 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 3) & 0b111'1111);
  }
  void stnp(ARMEmitter::QRegister rt, ARMEmitter::QRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1024 && Imm <= 1008 && ((Imm & 0b1111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1010'1100'00 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 4) & 0b111'1111);
  }
  void ldnp(ARMEmitter::QRegister rt, ARMEmitter::QRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1024 && Imm <= 1008 && ((Imm & 0b1111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = 0b1010'1100'01 << 22;
    LoadStoreNoAllocate(Op, rt, rt2, rn, static_cast<uint32_t>(Imm >> 4) & 0b111'1111);
  }
  // Loadstore register pair post-indexed
  // Loadstore register pair offset
  // Loadstore register pair pre-indexed
  template<IndexType Index>
  void stp(ARMEmitter::WRegister rt, ARMEmitter::WRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0010'1000'00 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 2) & 0b111'1111);
  }
  template<IndexType Index>
  void ldp(ARMEmitter::WRegister rt, ARMEmitter::WRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0010'1000'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 2) & 0b111'1111);
  }
  template<IndexType Index>
  void ldpsw(ARMEmitter::XRegister rt, ARMEmitter::XRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0110'1000'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);
    LoadStorePair(Op, rt, rt2, rn, (Imm >> 2) & 0b111'1111);
  }
  template<IndexType Index>
  void stp(ARMEmitter::XRegister rt, ARMEmitter::XRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b1010'1000'00 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 3) & 0b111'1111);
  }
  template<IndexType Index>
  void ldp(ARMEmitter::XRegister rt, ARMEmitter::XRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b1010'1000'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 3) & 0b111'1111);
  }
  template<IndexType Index>
  void stp(ARMEmitter::SRegister rt, ARMEmitter::SRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    stp_w<Index>(rt.V(), rt2.V(), rn, Imm);
  }
  template<IndexType Index>
  void ldp(ARMEmitter::SRegister rt, ARMEmitter::SRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldp_w<Index>(rt.V(), rt2.V(), rn, Imm);
  }
  template<IndexType Index>
  void stp(ARMEmitter::DRegister rt, ARMEmitter::DRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    stp_x<Index>(rt.V(), rt2.V(), rn, Imm);
  }
  template<IndexType Index>
  void ldp(ARMEmitter::DRegister rt, ARMEmitter::DRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldp_x<Index>(rt.V(), rt2.V(), rn, Imm);
  }
  template<IndexType Index>
  void stp(ARMEmitter::QRegister rt, ARMEmitter::QRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    stp_q<Index>(rt.V(), rt2.V(), rn, Imm);
  }
  template<IndexType Index>
  void ldp(ARMEmitter::QRegister rt, ARMEmitter::QRegister rt2, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldp_q<Index>(rt.V(), rt2.V(), rn, Imm);
  }

  // Loadstore register unscaled immediate
  void sturb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldurb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void sturb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldurb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldursb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldursb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<IndexType::OFFSET>(rt, rn, Imm);
  }
  void sturh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldurh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void sturh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldurh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldursh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldursh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<IndexType::OFFSET>(rt, rn, Imm);
  }
  void stur(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldur(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void stur(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldur(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldursw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsw<IndexType::OFFSET>(rt, rn, Imm);
  }
  void stur(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldur(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void stur(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldur(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void stur(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  void ldur(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::OFFSET>(rt, rn, Imm);
  }
  template<IndexType Index>
  void prfum(ARMEmitter::Prefetch prfop, ARMEmitter::Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");
    static_assert(Index == IndexType::OFFSET, "Doesn't support another index type");

    constexpr uint32_t Op = 0b1111'1000'10 << 22;
    constexpr uint32_t o2 = 0b00;

    LoadStoreImm(Op, o2, prfop, rn, Imm);
  }

  // Loadstore register immediate post-indexed
  // Loadstore register immediate pre-indexed
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void strb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void strb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrsb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrsb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void strh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void strh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrsh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrsh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void str(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void str(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldr(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldrsw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsw<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void str(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void str(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldr(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void str(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<Index>(rt, rn, Imm);
  }
  template<IndexType Index>
  requires (Index == IndexType::POST || Index == IndexType::PRE)
  void ldr(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<Index>(rt, rn, Imm);
  }

  // Loadstore register unprivileged
  void sttrb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrb<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrb<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrsb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrsb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsb<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void sttrh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXrh<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrh<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrsh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrsh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsh<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void sttr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtrsw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXrsw<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void sttr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    stXr<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  void ldtr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm = 0) {
    ldXr<IndexType::UNPRIVILEGED>(rt, rn, Imm);
  }
  // Atomic memory operations
  void stadd(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b000, rs, Reg::zr, rn);
  }
  void staddl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b000, rs, Reg::zr, rn);
  }
  void stadda(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b000, rs, Reg::zr, rn);
  }
  void staddal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b000, rs, Reg::zr, rn);
  }
  void stclr(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b001, rs, Reg::zr, rn);
  }
  void stclrl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b001, rs, Reg::zr, rn);
  }
  void stclra(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b001, rs, Reg::zr, rn);
  }
  void stclral(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b001, rs, Reg::zr, rn);
  }
  void stset(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b011, rs, Reg::zr, rn);
  }
  void stsetl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b011, rs, Reg::zr, rn);
  }
  void stseta(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b011, rs, Reg::zr, rn);
  }
  void stsetal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b011, rs, Reg::zr, rn);
  }
  void steor(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b010, rs, Reg::zr, rn);
  }
  void steorl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b010, rs, Reg::zr, rn);
  }
  void steora(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b010, rs, Reg::zr, rn);
  }
  void steoral(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b010, rs, Reg::zr, rn);
  }
  void stsmax(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b100, rs, Reg::zr, rn);
  }
  void stsmaxl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b100, rs, Reg::zr, rn);
  }
  void stsmaxa(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b100, rs, Reg::zr, rn);
  }
  void stsmaxal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b100, rs, Reg::zr, rn);
  }
  void stsmin(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b101, rs, Reg::zr, rn);
  }
  void stsminl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b101, rs, Reg::zr, rn);
  }
  void stsmina(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b101, rs, Reg::zr, rn);
  }
  void stsminal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b101, rs, Reg::zr, rn);
  }
  void stumax(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b110, rs, Reg::zr, rn);
  }
  void stumaxl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b110, rs, Reg::zr, rn);
  }
  void stumaxa(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b110, rs, Reg::zr, rn);
  }
  void stumaxal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b110, rs, Reg::zr, rn);
  }
  void stumin(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b111, rs, Reg::zr, rn);
  }
  void stuminl(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b111, rs, Reg::zr, rn);
  }
  void stumina(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b111, rs, Reg::zr, rn);
  }
  void stuminal(SubRegSize size, Register rs, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b111, rs, Reg::zr, rn);
  }
  void ldswp(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 1, 0b000, rs, rt, rn);
  }
  void ldswpl(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 1, 0b000, rs, rt, rn);
  }
  void ldswpa(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 1, 0b000, rs, rt, rn);
  }
  void ldswpal(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 1, 0b000, rs, rt, rn);
  }

  void ldadd(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b000, rs, rt, rn);
  }
  void ldadda(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b000, rs, rt, rn);
  }
  void ldaddl(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b000, rs, rt, rn);
  }
  void ldaddal(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclr(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b001, rs, rt, rn);
  }
  void ldclra(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b001, rs, rt, rn);
  }
  void ldclrl(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b001, rs, rt, rn);
  }
  void ldclral(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b001, rs, rt, rn);
  }

  void ldset(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b011, rs, rt, rn);
  }
  void ldseta(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsetl(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsetal(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b011, rs, rt, rn);
  }
  void ldeor(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 0, 0, 0b010, rs, rt, rn);
  }
  void ldeora(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 0, 0, 0b010, rs, rt, rn);
  }
  void ldeorl(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 0, 1, 0, 0b010, rs, rt, rn);
  }
  void ldeoral(SubRegSize size, Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(size, 1, 1, 0, 0b010, rs, rt, rn);
  }


  // 8-bit
  void ldaddb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclrb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeorb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b010, rs, rt, rn);
  }
  void ldsetb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsminb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b110, rs, rt, rn);
  }
  void lduminb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswpb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclrlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeorlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminlb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswplb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 0, 1, 1, 0b000, rs, rt, rn);
  }
  void ldaddab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclrab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeorab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b010, rs, rt, rn);
  }
  void ldsetab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsminab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b110, rs, rt, rn);
  }
  void lduminab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswpab(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclralb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeoralb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpalb(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 1, 1, 0b000, rs, rt, rn);
  }
  // 16-bit
  void ldaddh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclrh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeorh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b010, rs, rt, rn);
  }
  void ldseth(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsminh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b110, rs, rt, rn);
  }
  void lduminh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswph(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclrlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeorlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminlh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswplh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 0, 1, 1, 0b000, rs, rt, rn);
  }
  void ldaddah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclrah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeorah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b010, rs, rt, rn);
  }
  void ldsetah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsminah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b110, rs, rt, rn);
  }
  void lduminah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswpah(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclralh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeoralh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpalh(Register rs, Register rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 1, 1, 0b000, rs, rt, rn);
  }
  // 32-bit
  void ldadd(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclr(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeor(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b010, rs, rt, rn);
  }
  void ldset(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmax(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsmin(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumax(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b110, rs, rt, rn);
  }
  void ldumin(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswp(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclrl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeorl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpl(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 0, 1, 1, 0b000, rs, rt, rn);
  }
  void ldadda(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclra(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeora(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b010, rs, rt, rn);
  }
  void ldseta(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxa(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsmina(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxa(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b110, rs, rt, rn);
  }
  void ldumina(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswpa(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclral(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeoral(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpal(WRegister rs, WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 1, 1, 0b000, rs, rt, rn);
  }
  // 64-bit
  void ldadd(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclr(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeor(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b010, rs, rt, rn);
  }
  void ldset(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmax(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsmin(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumax(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b110, rs, rt, rn);
  }
  void ldumin(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswp(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclrl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeorl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpl(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 1, 1, 0b000, rs, rt, rn);
  }
  void ldadda(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b000, rs, rt, rn);
  }
  void ldclra(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b001, rs, rt, rn);
  }
  void ldeora(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b010, rs, rt, rn);
  }
  void ldseta(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxa(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b100, rs, rt, rn);
  }
  void ldsmina(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b101, rs, rt, rn);
  }
  void ldumaxa(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b110, rs, rt, rn);
  }
  void ldumina(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 0, 0b111, rs, rt, rn);
  }
  void ldswpa(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 1, 0b000, rs, rt, rn);
  }
  void ldaddal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b000, rs, rt, rn);
  }
  void ldclral(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b001, rs, rt, rn);
  }
  void ldeoral(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b010, rs, rt, rn);
  }
  void ldsetal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b011, rs, rt, rn);
  }
  void ldsmaxal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b100, rs, rt, rn);
  }
  void ldsminal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b101, rs, rt, rn);
  }
  void ldumaxal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b110, rs, rt, rn);
  }
  void lduminal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 0, 0b111, rs, rt, rn);
  }
  void ldswpal(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 1, 1, 0b000, rs, rt, rn);
  }
  void ldaprb(WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i8Bit, 1, 0, 1, 0b100, WReg::w31, rt, rn);
  }
  void ldaprh(WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i16Bit, 1, 0, 1, 0b100, WReg::w31, rt, rn);
  }
  void ldapr(WRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i32Bit, 1, 0, 1, 0b100, WReg::w31, rt, rn);
  }
  void ldapr(XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 1, 0, 1, 0b100, XReg::x31, rt, rn);
  }
  void st64bv0(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 1, 0b010, rs, rt, rn);
  }
  void st64bv(XRegister rs, XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 1, 0b011, rs, rt, rn);
  }
  void st64b(XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 1, 0b001, XReg::x31, rt, rn);
  }
  void ld64b(XRegister rt, Register rn) {
    LoadStoreAtomicLSE(SubRegSize::i64Bit, 0, 0, 1, 0b101, XReg::x31, rt, rn);
  }

  // Loadstore register-register offset
  void strb(ARMEmitter::Register rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, bool Shift = false) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrb(ARMEmitter::Register rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, bool Shift = false) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrsb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, bool Shift = false) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b10, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrsb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, bool Shift = false) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b11, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void strh(ARMEmitter::Register rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrh(ARMEmitter::Register rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrsh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b10, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrsh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b11, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void str(ARMEmitter::WRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 2, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 2, "Unsupported shift amount: {}", Shift);
    constexpr uint32_t Op = 0b1011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrsw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 2, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1011'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b10, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void str(ARMEmitter::XRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 3, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 3, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void prfm(ARMEmitter::Prefetch prfop, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 3, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1111'1000'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b10, prfop, rn, rm, Option, Shift ? 1 : 0);
  }
  void strb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, 0);
  }
  void ldrb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    constexpr uint32_t Op = 0b0011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, 0);
  }
  void strh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldrh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 1, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0111'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void str(ARMEmitter::SRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 2, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldr(ARMEmitter::SRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 2, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void str(ARMEmitter::DRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 3, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1111'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b00, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldr(ARMEmitter::DRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 3, "Unsupported shift amount");
    constexpr uint32_t Op = 0b1111'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b01, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void str(ARMEmitter::QRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 4, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b10, rt, rn, rm, Option, Shift ? 1 : 0);
  }
  void ldr(ARMEmitter::QRegister rt, ARMEmitter::Register rn, ARMEmitter::Register rm, ARMEmitter::ExtendedType Option, uint32_t Shift) {
    LOGMAN_THROW_A_FMT((FEXCore::ToUnderlying(Option) & 0b010) == 0b010, "Unsupported Extendtype");
    LOGMAN_THROW_A_FMT(Shift == 0 || Shift == 4, "Unsupported shift amount");
    constexpr uint32_t Op = 0b0011'1100'001 << 21 | (0b10 << 10);
    LoadStoreRegisterOffset(Op, 0b11, rt, rn, rm, Option, Shift ? 1 : 0);
  }

  void strb(ARMEmitter::Register rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      strb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      strb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          sturb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          strb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        strb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        strb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrb(ARMEmitter::Register rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          ldurb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrsb(ARMEmitter::XRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrsb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrsb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          ldursb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrsb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrsb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrsb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrsb(ARMEmitter::WRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrsb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrsb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          ldursb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrsb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrsb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrsb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void strh(ARMEmitter::Register rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      strh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      strh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          sturh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          strh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        strh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        strh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrh(ARMEmitter::Register rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldurh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrsh(ARMEmitter::XRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrsh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrsh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldursh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrsh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrsh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrsh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrsh(ARMEmitter::WRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrsh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrsh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldursh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrsh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrsh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrsh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void str(ARMEmitter::WRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      str(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      str(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b11) || MemSrc.MetaType.ImmType.Imm < 0) {
          stur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          str(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        str<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        str<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldr(ARMEmitter::WRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldr(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldr(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b11) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldr(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldr<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldr<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrsw(ARMEmitter::XRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrsw(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrsw(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b11) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldursw(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrsw(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrsw<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrsw<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void str(ARMEmitter::XRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      str(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      str(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b111) || MemSrc.MetaType.ImmType.Imm < 0) {
          stur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          str(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        str<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        str<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldr(ARMEmitter::XRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldr(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldr(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b111) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldr(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldr<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldr<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void prfm(ARMEmitter::Prefetch prfop, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      prfm(prfop, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      prfm(prfop, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b111) || MemSrc.MetaType.ImmType.Imm < 0) {
          prfum<IndexType::OFFSET>(prfop, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          prfm(prfop, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }

  void strb(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      LOGMAN_THROW_A_FMT(MemSrc.MetaType.Extended.Shift == false, "Can't shift byte");
      strb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      strb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          sturb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          strb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        strb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        strb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrb(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      LOGMAN_THROW_A_FMT(MemSrc.MetaType.Extended.Shift == false, "Can't shift byte");
      ldrb(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrb(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if (MemSrc.MetaType.ImmType.Imm < 0) {
          ldurb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrb(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrb<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrb<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void strh(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      strh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      strh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          sturh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          strh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        strh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        strh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldrh(ARMEmitter::VRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldrh(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldrh(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldurh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldrh(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldrh<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldrh<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void str(ARMEmitter::SRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      str(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      str(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b11) || MemSrc.MetaType.ImmType.Imm < 0) {
          stur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          str(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        str<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        str<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldr(ARMEmitter::SRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldr(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldr(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b11) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldr(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldr<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldr<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void str(ARMEmitter::DRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      str(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      str(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b111) || MemSrc.MetaType.ImmType.Imm < 0) {
          stur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          str(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        str<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        str<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldr(ARMEmitter::DRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldr(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldr(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b111) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldr(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldr<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldr<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void str(ARMEmitter::QRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      str(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      str(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1111) || MemSrc.MetaType.ImmType.Imm < 0) {
          stur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          str(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        str<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        str<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }
  void ldr(ARMEmitter::QRegister rt, ARMEmitter::ExtendedMemOperand MemSrc) {
    if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED &&
        MemSrc.MetaType.Extended.rm.Idx() != ARMEmitter::Reg::r31.Idx()) {
      ldr(rt, MemSrc.rn, MemSrc.MetaType.Extended.rm, MemSrc.MetaType.Extended.Option, MemSrc.MetaType.Extended.Shift);
    } else if (MemSrc.MetaType.Header.MemType == ARMEmitter::ExtendedMemOperand::Type::TYPE_EXTENDED) {
      ldr(rt, MemSrc.rn);
    } else {
      if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::OFFSET) {
        if ((MemSrc.MetaType.ImmType.Imm & 0b1111) || MemSrc.MetaType.ImmType.Imm < 0) {
          ldur(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        } else {
          ldr(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
        }
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::POST) {
        ldr<ARMEmitter::IndexType::POST>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else if (MemSrc.MetaType.ImmType.Index == ARMEmitter::IndexType::PRE) {
        ldr<ARMEmitter::IndexType::PRE>(rt, MemSrc.rn, MemSrc.MetaType.ImmType.Imm);
      } else {
        LOGMAN_MSG_A_FMT("Unexpected loadstore index type");
        FEX_UNREACHABLE;
      }
    }
  }

  // Loadstore PAC
  void ldraa(XRegister rt, XRegister rn, IndexType type, int32_t offset = 0) {
    LoadStorePAC(0b11, 0, 0, offset, type, rn, rt);
  }
  void ldrab(XRegister rt, XRegister rn, IndexType type, int32_t offset = 0) {
    LoadStorePAC(0b11, 0, 1, offset, type, rn, rt);
  }

  // Loadstore unsigned immediate
  // Maximum values of unsigned immediate offsets for particular data sizes.
  static constexpr uint32_t LSByteMaxUnsignedOffset = 4095;
  static constexpr uint32_t LSHalfMaxUnsignedOffset = 8190;
  static constexpr uint32_t LSWordMaxUnsignedOffset = 16380;
  static constexpr uint32_t LSDWordMaxUnsignedOffset = 32760;
  static constexpr uint32_t LSQWordMaxUnsignedOffset = 65520;

  void strb(Register rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 0, 0b00, rt, rn, Imm);
  }
  void ldrb(Register rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 0, 0b01, rt, rn, Imm);
  }
  void ldrsb(XRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 0, 0b10, rt, rn, Imm);
  }
  void ldrsb(WRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 0, 0b11, rt, rn, Imm);
  }
  void strb(VRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 1, 0b00, rt, rn, Imm);
  }
  void ldrb(VRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 1, 0b01, rt, rn, Imm);
  }
  void strh(Register rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 0, 0b00, rt, rn, Imm);
  }
  void ldrh(Register rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 0, 0b01, rt, rn, Imm);
  }
  void ldrsh(XRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 0, 0b10, rt, rn, Imm);
  }
  void ldrsh(WRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 0, 0b11, rt, rn, Imm);
  }
  void strh(VRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 1, 0b00, rt, rn, Imm);
  }
  void ldrh(VRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b01, 1, 0b01, rt, rn, Imm);
  }
  void str(WRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b10, 0, 0b00, rt, rn, Imm);
  }
  void ldr(WRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b10, 0, 0b01, rt, rn, Imm);
  }
  void ldrsw(XRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b10, 0, 0b10, rt, rn, Imm);
  }
  void str(SRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b10, 1, 0b00, rt, rn, Imm);
  }
  void ldr(SRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b10, 1, 0b01, rt, rn, Imm);
  }
  void str(XRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b11, 0, 0b00, rt, rn, Imm);
  }
  void ldr(XRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b11, 0, 0b01, rt, rn, Imm);
  }

  void ldr(SubRegSize size, Register rt, Register rn, uint32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LoadStoreUnsigned(FEXCore::ToUnderlying(size), 0, 0b01, rt, rn, Imm);
  }
  void str(SubRegSize size, Register rt, Register rn, uint32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LoadStoreUnsigned(FEXCore::ToUnderlying(size), 0, 0b00, rt, rn, Imm);
  }

  void prfm(Prefetch prfop, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b11, 0, 0b10, prfop, rn, Imm);
  }
  void str(DRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b11, 1, 0b00, rt, rn, Imm);
  }
  void ldr(DRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b11, 1, 0b01, rt, rn, Imm);
  }
  void str(QRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 1, 0b10, rt, rn, Imm);
  }
  void ldr(QRegister rt, Register rn, uint32_t Imm = 0) {
    LoadStoreUnsigned(0b00, 1, 0b11, rt, rn, Imm);
  }

private:
  void AtomicOp(uint32_t Op, ARMEmitter::Size s, uint32_t L, uint32_t o0, ARMEmitter::Register rs, ARMEmitter::Register rt,
                ARMEmitter::Register rt2, ARMEmitter::Register rn) {
    const uint32_t sz = s == ARMEmitter::Size::i64Bit ? (1U << 30) : 0;
    uint32_t Instr = Op;

    Instr |= sz;
    Instr |= L << 22;
    Instr |= Encode_rs(rs);
    Instr |= o0 << 15;
    Instr |= Encode_rt2(rt2);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }

  template<typename T>
  void SubAtomicOp(uint32_t Op, ARMEmitter::SubRegSize s, uint32_t L, uint32_t o0, T rs, T rt, T rt2, ARMEmitter::Register rn) {
    const uint32_t sz = FEXCore::ToUnderlying(s) << 30;
    uint32_t Instr = Op;

    Instr |= sz;
    Instr |= L << 22;
    Instr |= Encode_rs(rs);
    Instr |= o0 << 15;
    Instr |= Encode_rt2(rt2);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }

  template<typename T>
  void SubAtomicImm(uint32_t Op, ARMEmitter::SubRegSize s, uint32_t opc, T rt, ARMEmitter::Register rn, uint32_t Imm) {
    const uint32_t sz = FEXCore::ToUnderlying(s) << 30;
    uint32_t Instr = Op;

    Instr |= sz;
    Instr |= opc << 22;
    Instr |= Imm << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }
  // Load register literal
  template<typename T>
  void LoadStoreLiteral(uint32_t Op, T rt, uint32_t Imm) {
    uint32_t Instr = Op;

    Instr |= Imm << 5;
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  void MemoryCopyAndMemorySet(uint32_t sz, uint32_t o0, uint32_t op1, uint32_t op2, Register rs, Register rn, Register rd) {
    uint32_t Instr = 0b0001'1001'0000'0000'0000'0100'0000'0000;

    Instr |= sz << 30;
    Instr |= o0 << 26;
    Instr |= op1 << 22;
    Instr |= rs.Idx() << 16;
    Instr |= op2 << 12;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();

    dc32(Instr);
  }

  // Loadstore no-allocate pair
  template<typename T>
  void LoadStoreNoAllocate(uint32_t Op, T rt, T rt2, ARMEmitter::Register rn, uint32_t Imm) {
    uint32_t Instr = Op;

    Instr |= Imm << 15;
    Instr |= Encode_rt2(rt2);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }
  // Loadstore register pair post-indexed
  template<typename T>
  void LoadStorePair(uint32_t Op, T rt, T rt2, ARMEmitter::Register rn, uint32_t Imm) {
    uint32_t Instr = Op;
    Instr |= Imm << 15;
    Instr |= Encode_rt2(rt2);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  // Loadstore register unscaled immediate
  // Loadstore register immediate post-indexed
  // Loadstore register unprivileged
  // Loadstore register immediate pre-indexed
  template<typename T>
  void LoadStoreImm(uint32_t Op, uint32_t o2, T rt, ARMEmitter::Register rn, uint32_t Imm) {
    uint32_t Instr = Op;

    Instr |= Imm << 12;
    Instr |= o2 << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  // Atomic memory operations
  void LoadStoreAtomicLSE(SubRegSize s, uint32_t A, uint32_t R, uint32_t o3, uint32_t opc, Register rs, Register rt, Register rn) {
    uint32_t Instr = 0b0011'1000'0010'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(s) << 30;
    Instr |= A << 23;
    Instr |= R << 22;
    Instr |= Encode_rs(rs);
    Instr |= o3 << 15;
    Instr |= opc << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  // Loadstore register-register offset
  template<typename T>
  void LoadStoreRegisterOffset(uint32_t Op, uint32_t opc, T rt, ARMEmitter::Register rn, ARMEmitter::Register rm,
                               ARMEmitter::ExtendedType Option, uint32_t Shift) {
    uint32_t Instr = Op;

    Instr |= opc << 22;
    Instr |= Encode_rt(rt);
    Instr |= FEXCore::ToUnderlying(Option) << 13;
    Instr |= Shift << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rm(rm);
    dc32(Instr);
  }

  void LoadStorePAC(uint32_t size, uint32_t VR, uint32_t M, int32_t imm, IndexType type, Register rn, Register rt) {
    LOGMAN_THROW_A_FMT((imm % 8) == 0, "Immediate ({}) must be divisible by 8", imm);
    LOGMAN_THROW_A_FMT(imm >= -4096 && imm <= 4088, "Immediate ({}) must be within [-4096, 4088]", imm);
    LOGMAN_THROW_A_FMT(type == IndexType::OFFSET || type == IndexType::PRE, "PAC may only use offset or pre-indexed values");

    // The immediate is scaled down in order to fit within the available 10 immediate bits.
    const auto scaled_imm = static_cast<uint32_t>(imm / 8);
    const auto imm9 = scaled_imm & 0b1'1111'1111;
    const auto S = (scaled_imm >> 9) & 1;

    const auto W = type == IndexType::OFFSET ? 0U : 1U;

    uint32_t Instr = 0b0011'1000'0010'0000'0000'0100'0000'0000;
    Instr |= size << 30;
    Instr |= VR << 26;
    Instr |= M << 23;
    Instr |= S << 22;
    Instr |= imm9 << 12;
    Instr |= W << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rt.Idx();
    dc32(Instr);
  }

  // Loadstore unsigned immediate
  template<typename T>
  void LoadStoreUnsigned(uint32_t size, uint32_t V, uint32_t opc, T rt, Register rn, uint32_t Imm) {
    uint32_t SizeShift = size;
    if constexpr (std::is_same_v<T, QRegister>) {
      // 128-bit variant is specified via size=0b00, V=1, opc=0b1x
      // so we need to special case this one based on whether or not
      // rt indicates a 128-bit vector. Nice thing is this can be
      // checked at compile-time.
      SizeShift = 4;
    }

    [[maybe_unused]] const uint32_t MaxImm = LSByteMaxUnsignedOffset << SizeShift;
    [[maybe_unused]] const uint32_t ElementSize = 1U << SizeShift;

    LOGMAN_THROW_A_FMT(Imm <= MaxImm, "{}: Offset not valid: Imm: 0x{:x} Max: 0x{:x}", __func__, Imm, MaxImm);
    LOGMAN_THROW_A_FMT((Imm % ElementSize) == 0, "{}: Offset must be a multiple of {}. Offset: 0x{:x}", __func__, ElementSize, Imm);

    const uint32_t ShiftedImm = Imm >> SizeShift;

    uint32_t Instr = 0b0011'1001'0000'0000'0000'0000'0000'0000;
    Instr |= size << 30;
    Instr |= V << 26;
    Instr |= opc << 22;
    Instr |= ShiftedImm << 10;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  template<IndexType Index>
  void ldp_w(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0010'1100'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 2) & 0b111'1111);
  }
  template<IndexType Index>
  void ldp_x(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0110'1100'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 3) & 0b111'1111);
  }
  template<IndexType Index>
  void stp_w(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 252 && ((Imm & 0b11) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0010'1100'00 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 2) & 0b111'1111);
  }
  template<IndexType Index>
  void stp_x(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -512 && Imm <= 504 && ((Imm & 0b111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b0110'1100'00 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 3) & 0b111'1111);
  }
  template<IndexType Index>
  void ldp_q(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1024 && Imm <= 1008 && ((Imm & 0b1111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b1010'1100'01 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 4) & 0b111'1111);
  }
  template<IndexType Index>
  void stp_q(ARMEmitter::VRegister rt, ARMEmitter::VRegister rt2, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -1024 && Imm <= 1008 && ((Imm & 0b1111) == 0), "Unscaled offset too large");
    constexpr uint32_t Op = (0b1010'1100'00 << 22) | (Index == IndexType::POST   ? (0b01 << 23) :
                                                      Index == IndexType::PRE    ? (0b11 << 23) :
                                                      Index == IndexType::OFFSET ? (0b10 << 23) :
                                                                                   -1);

    LoadStorePair(Op, rt, rt2, rn, (Imm >> 4) & 0b111'1111);
  }

  template<IndexType Index>
  void stXrb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1000'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrb(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1000'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXrb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1100'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrb(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1100'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrsb(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1000'10 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrsb(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1000'11 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXrh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1000'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrh(ARMEmitter::Register rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1000'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXrh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1100'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrh(ARMEmitter::VRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1100'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrsh(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1000'10 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrsh(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0111'1000'11 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1011'1000'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXr(ARMEmitter::WRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1011'1000'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXr(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1011'1100'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXr(ARMEmitter::SRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1011'1100'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXrsw(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1011'1000'10 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1111'1000'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXr(ARMEmitter::XRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1111'1000'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXr(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1111'1100'00 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXr(ARMEmitter::DRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b1111'1100'01 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void stXr(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1100'10 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }
  template<IndexType Index>
  void ldXr(ARMEmitter::QRegister rt, ARMEmitter::Register rn, int32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm >= -256 && Imm <= 255, "Unscaled offset too large");

    constexpr uint32_t Op = 0b0011'1100'11 << 22;
    constexpr uint32_t o2 = Index == IndexType::POST         ? 0b01 :
                            Index == IndexType::PRE          ? 0b11 :
                            Index == IndexType::OFFSET       ? 0b00 :
                            Index == IndexType::UNPRIVILEGED ? 0b10 :
                                                               -1;

    LoadStoreImm(Op, o2, rt, rn, Imm & 0b1'1111'1111);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/Registers.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/EnumUtils.h>

#include <compare>
#include <cstdint>

namespace ARMEmitter {
class WRegister;
class XRegister;

/* Unsized GPR register class
 * This class doesn't imply a size when used
 */
class Register {
public:
  Register() = delete;
  constexpr explicit Register(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const Register&, const Register&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }
  constexpr WRegister W() const;
  constexpr XRegister X() const;

private:
  uint32_t Index;
};
static_assert(sizeof(Register) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<Register>);
static_assert(std::is_standard_layout_v<Register>);

/* 32-bit GPR register class.
 * This class will imply a 32-bit register size being used.
 */
class WRegister {
public:
  WRegister() = delete;
  constexpr explicit WRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const WRegister&, const WRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator Register() const {
    return Register(Index);
  }
  constexpr XRegister X() const;
  constexpr Register R() const;

private:
  uint32_t Index;
};
static_assert(sizeof(WRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<WRegister>);
static_assert(std::is_standard_layout_v<WRegister>);

/* 64-bit GPR register class.
 * This class will imply a 64-bit register size being used.
 */
class XRegister {
public:
  XRegister() = delete;
  constexpr explicit XRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const XRegister&, const XRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator Register() const {
    return Register(Index);
  }
  constexpr WRegister W() const;
  constexpr Register R() const;

private:
  uint32_t Index;
};
static_assert(sizeof(XRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<XRegister>);
static_assert(std::is_standard_layout_v<XRegister>);

inline constexpr WRegister Register::W() const {
  return WRegister {Index};
}

inline constexpr XRegister Register::X() const {
  return XRegister {Index};
}

inline constexpr XRegister WRegister::X() const {
  return XRegister {Index};
}

inline constexpr Register WRegister::R() const {
  return *this;
}

inline constexpr WRegister XRegister::W() const {
  return WRegister {Index};
}

inline constexpr Register XRegister::R() const {
  return *this;
}

// Namespace containing all unsized GPR register objects.
namespace Reg {
  constexpr static Register r0(0);
  constexpr static Register r1(1);
  constexpr static Register r2(2);
  constexpr static Register r3(3);
  constexpr static Register r4(4);
  constexpr static Register r5(5);
  constexpr static Register r6(6);
  constexpr static Register r7(7);
  constexpr static Register r8(8);
  constexpr static Register r9(9);
  constexpr static Register r10(10);
  constexpr static Register r11(11);
  constexpr static Register r12(12);
  constexpr static Register r13(13);
  constexpr static Register r14(14);
  constexpr static Register r15(15);
  constexpr static Register r16(16);
  constexpr static Register r17(17);
  constexpr static Register r18(18);
  constexpr static Register r19(19);
  constexpr static Register r20(20);
  constexpr static Register r21(21);
  constexpr static Register r22(22);
  constexpr static Register r23(23);
  constexpr static Register r24(24);
  constexpr static Register r25(25);
  constexpr static Register r26(26);
  constexpr static Register r27(27);
  constexpr static Register r28(28);
  constexpr static Register r29(29);
  constexpr static Register r30(30);
  constexpr static Register r31(31);

  // Named registers
  constexpr static Register ip0(16);
  constexpr static Register ip1(17);

  constexpr static Register fp(29);
  constexpr static Register lr(30);
  constexpr static Register rsp(31);
  constexpr static Register zr(31);
} // namespace Reg

// Namespace containing all 64-bit GPR register objects.
namespace XReg {
  constexpr static XRegister x0(0);
  constexpr static XRegister x1(1);
  constexpr static XRegister x2(2);
  constexpr static XRegister x3(3);
  constexpr static XRegister x4(4);
  constexpr static XRegister x5(5);
  constexpr static XRegister x6(6);
  constexpr static XRegister x7(7);
  constexpr static XRegister x8(8);
  constexpr static XRegister x9(9);
  constexpr static XRegister x10(10);
  constexpr static XRegister x11(11);
  constexpr static XRegister x12(12);
  constexpr static XRegister x13(13);
  constexpr static XRegister x14(14);
  constexpr static XRegister x15(15);
  constexpr static XRegister x16(16);
  constexpr static XRegister x17(17);
  constexpr static XRegister x18(18);
  constexpr static XRegister x19(19);
  constexpr static XRegister x20(20);
  constexpr static XRegister x21(21);
  constexpr static XRegister x22(22);
  constexpr static XRegister x23(23);
  constexpr static XRegister x24(24);
  constexpr static XRegister x25(25);
  constexpr static XRegister x26(26);
  constexpr static XRegister x27(27);
  constexpr static XRegister x28(28);
  constexpr static XRegister x29(29);
  constexpr static XRegister x30(30);
  constexpr static XRegister x31(31);

  // Named registers
  constexpr static XRegister ip0(16);
  constexpr static XRegister ip1(17);

  constexpr static XRegister fp(29);
  constexpr static XRegister lr(30);
  constexpr static XRegister rsp(31);
  constexpr static XRegister zr(31);
} // namespace XReg

// Namespace containing all 32-bit GPR register objects.
namespace WReg {
  constexpr static WRegister w0(0);
  constexpr static WRegister w1(1);
  constexpr static WRegister w2(2);
  constexpr static WRegister w3(3);
  constexpr static WRegister w4(4);
  constexpr static WRegister w5(5);
  constexpr static WRegister w6(6);
  constexpr static WRegister w7(7);
  constexpr static WRegister w8(8);
  constexpr static WRegister w9(9);
  constexpr static WRegister w10(10);
  constexpr static WRegister w11(11);
  constexpr static WRegister w12(12);
  constexpr static WRegister w13(13);
  constexpr static WRegister w14(14);
  constexpr static WRegister w15(15);
  constexpr static WRegister w16(16);
  constexpr static WRegister w17(17);
  constexpr static WRegister w18(18);
  constexpr static WRegister w19(19);
  constexpr static WRegister w20(20);
  constexpr static WRegister w21(21);
  constexpr static WRegister w22(22);
  constexpr static WRegister w23(23);
  constexpr static WRegister w24(24);
  constexpr static WRegister w25(25);
  constexpr static WRegister w26(26);
  constexpr static WRegister w27(27);
  constexpr static WRegister w28(28);
  constexpr static WRegister w29(29);
  constexpr static WRegister w30(30);
  constexpr static WRegister w31(31);

  // Named registers
  constexpr static WRegister ip0(16);
  constexpr static WRegister ip1(17);

  constexpr static WRegister fp(29);
  constexpr static WRegister lr(30);
  constexpr static WRegister rsp(31);
  constexpr static WRegister zr(31);
} // namespace WReg

class VRegister;
class BRegister;
class HRegister;
class SRegister;
class DRegister;
class QRegister;
class ZRegister;

/* Unsized ASIMD register class
 * This class doesn't imply a size when used, nor implies Vector or Scalar.
 * It does imply that this instruction isn't using the register for SVE.
 */
class VRegister {
public:
  VRegister() = delete;
  constexpr explicit VRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const VRegister&, const VRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr BRegister B() const;
  constexpr HRegister H() const;
  constexpr SRegister S() const;
  constexpr DRegister D() const;
  constexpr QRegister Q() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(VRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<VRegister>);
static_assert(std::is_standard_layout_v<VRegister>);

/* 8-bit ASIMD register class
 * This class implies 8-bit scalar register.
 */
class BRegister {
public:
  BRegister() = delete;
  constexpr explicit BRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const BRegister&, const BRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator VRegister() const {
    return VRegister(Index);
  }
  constexpr BRegister V() const;
  constexpr HRegister H() const;
  constexpr SRegister S() const;
  constexpr DRegister D() const;
  constexpr QRegister Q() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(BRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<BRegister>);
static_assert(std::is_standard_layout_v<BRegister>);

/* 16-bit ASIMD register class
 * This class implies 16-bit scalar register.
 */
class HRegister {
public:
  HRegister() = delete;
  constexpr explicit HRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const HRegister&, const HRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator VRegister() const {
    return VRegister(Index);
  }
  constexpr HRegister V() const;
  constexpr BRegister B() const;
  constexpr SRegister S() const;
  constexpr DRegister D() const;
  constexpr QRegister Q() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(HRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<HRegister>);
static_assert(std::is_standard_layout_v<HRegister>);

/* 32-bit ASIMD register class
 * This class implies 32-bit scalar register.
 */
class SRegister {
public:
  SRegister() = delete;
  constexpr explicit SRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const SRegister&, const SRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator VRegister() const {
    return VRegister(Index);
  }
  constexpr SRegister V() const;
  constexpr BRegister B() const;
  constexpr HRegister H() const;
  constexpr DRegister D() const;
  constexpr QRegister Q() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(SRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<SRegister>);
static_assert(std::is_standard_layout_v<SRegister>);

/* 64-bit ASIMD register class
 * This class doesn't imply Vector or Scalar.
 * Associated with operating the instruction at 64-bit.
 */
class DRegister {
public:
  DRegister() = delete;
  constexpr explicit DRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const DRegister&, const DRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator VRegister() const {
    return VRegister(Index);
  }
  constexpr DRegister V() const;
  constexpr BRegister B() const;
  constexpr HRegister H() const;
  constexpr SRegister S() const;
  constexpr QRegister Q() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(DRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<DRegister>);
static_assert(std::is_standard_layout_v<DRegister>);

/* 128-bit ASIMD register class
 * This class doesn't imply Vector or Scalar.
 * Associated with operating the instruction at 128-bit.
 */
class QRegister {
public:
  QRegister() = delete;
  constexpr explicit QRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const QRegister&, const QRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator VRegister() const {
    return VRegister(Index);
  }
  constexpr QRegister V() const;
  constexpr BRegister B() const;
  constexpr HRegister H() const;
  constexpr SRegister S() const;
  constexpr DRegister D() const;
  constexpr ZRegister Z() const;

private:
  uint32_t Index;
};
static_assert(sizeof(QRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<QRegister>);
static_assert(std::is_standard_layout_v<QRegister>);

/* Unsized SVE register class.
 * This class explicitly implies the instruction will operate using SVE.
 */
class ZRegister {
public:
  ZRegister() = delete;
  constexpr explicit ZRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const ZRegister&, const ZRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr VRegister V() const;
  constexpr BRegister B() const;
  constexpr HRegister H() const;
  constexpr SRegister S() const;
  constexpr DRegister D() const;
  constexpr QRegister Q() const;

private:
  uint32_t Index;
};
static_assert(sizeof(ZRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<ZRegister>);
static_assert(std::is_standard_layout_v<ZRegister>);

// VRegister
inline constexpr BRegister VRegister::B() const {
  return BRegister {Index};
}
inline constexpr HRegister VRegister::H() const {
  return HRegister {Index};
}
inline constexpr SRegister VRegister::S() const {
  return SRegister {Index};
}
inline constexpr DRegister VRegister::D() const {
  return DRegister {Index};
}
inline constexpr QRegister VRegister::Q() const {
  return QRegister {Index};
}
inline constexpr ZRegister VRegister::Z() const {
  return ZRegister {Index};
}

// BRegister
inline constexpr BRegister BRegister::V() const {
  return *this;
}
inline constexpr HRegister BRegister::H() const {
  return HRegister {Index};
}
inline constexpr SRegister BRegister::S() const {
  return SRegister {Index};
}
inline constexpr DRegister BRegister::D() const {
  return DRegister {Index};
}
inline constexpr QRegister BRegister::Q() const {
  return QRegister {Index};
}
inline constexpr ZRegister BRegister::Z() const {
  return ZRegister {Index};
}

// HRegister
inline constexpr HRegister HRegister::V() const {
  return *this;
}
inline constexpr BRegister HRegister::B() const {
  return BRegister {Index};
}
inline constexpr SRegister HRegister::S() const {
  return SRegister {Index};
}
inline constexpr DRegister HRegister::D() const {
  return DRegister {Index};
}
inline constexpr QRegister HRegister::Q() const {
  return QRegister {Index};
}
inline constexpr ZRegister HRegister::Z() const {
  return ZRegister {Index};
}

// SRegister
inline constexpr SRegister SRegister::V() const {
  return *this;
}
inline constexpr BRegister SRegister::B() const {
  return BRegister {Index};
}
inline constexpr HRegister SRegister::H() const {
  return HRegister {Index};
}
inline constexpr DRegister SRegister::D() const {
  return DRegister {Index};
}
inline constexpr QRegister SRegister::Q() const {
  return QRegister {Index};
}
inline constexpr ZRegister SRegister::Z() const {
  return ZRegister {Index};
}

// DRegister
inline constexpr DRegister DRegister::V() const {
  return DRegister {Index};
}
inline constexpr BRegister DRegister::B() const {
  return BRegister {Index};
}
inline constexpr HRegister DRegister::H() const {
  return HRegister {Index};
}
inline constexpr SRegister DRegister::S() const {
  return SRegister {Index};
}
inline constexpr QRegister DRegister::Q() const {
  return QRegister {Index};
}
inline constexpr ZRegister DRegister::Z() const {
  return ZRegister {Index};
}

// QRegister
inline constexpr QRegister QRegister::V() const {
  return *this;
}
inline constexpr BRegister QRegister::B() const {
  return BRegister {Index};
}
inline constexpr HRegister QRegister::H() const {
  return HRegister {Index};
}
inline constexpr SRegister QRegister::S() const {
  return SRegister {Index};
}
inline constexpr DRegister QRegister::D() const {
  return DRegister {Index};
}
inline constexpr ZRegister QRegister::Z() const {
  return ZRegister {Index};
}

// ZRegister
inline constexpr VRegister ZRegister::V() const {
  return VRegister(Index);
}
inline constexpr BRegister ZRegister::B() const {
  return BRegister(Index);
}
inline constexpr HRegister ZRegister::H() const {
  return HRegister(Index);
}
inline constexpr SRegister ZRegister::S() const {
  return SRegister(Index);
}
inline constexpr DRegister ZRegister::D() const {
  return DRegister(Index);
}
inline constexpr QRegister ZRegister::Q() const {
  return QRegister(Index);
}

// Namespace containing all unsized ASIMD register objects.
namespace VReg {
  constexpr static VRegister v0(0);
  constexpr static VRegister v1(1);
  constexpr static VRegister v2(2);
  constexpr static VRegister v3(3);
  constexpr static VRegister v4(4);
  constexpr static VRegister v5(5);
  constexpr static VRegister v6(6);
  constexpr static VRegister v7(7);
  constexpr static VRegister v8(8);
  constexpr static VRegister v9(9);
  constexpr static VRegister v10(10);
  constexpr static VRegister v11(11);
  constexpr static VRegister v12(12);
  constexpr static VRegister v13(13);
  constexpr static VRegister v14(14);
  constexpr static VRegister v15(15);
  constexpr static VRegister v16(16);
  constexpr static VRegister v17(17);
  constexpr static VRegister v18(18);
  constexpr static VRegister v19(19);
  constexpr static VRegister v20(20);
  constexpr static VRegister v21(21);
  constexpr static VRegister v22(22);
  constexpr static VRegister v23(23);
  constexpr static VRegister v24(24);
  constexpr static VRegister v25(25);
  constexpr static VRegister v26(26);
  constexpr static VRegister v27(27);
  constexpr static VRegister v28(28);
  constexpr static VRegister v29(29);
  constexpr static VRegister v30(30);
  constexpr static VRegister v31(31);
} // namespace VReg

// Namespace containing all 8-bit ASIMD register objects.
namespace BReg {
  constexpr static BRegister b0(0);
  constexpr static BRegister b1(1);
  constexpr static BRegister b2(2);
  constexpr static BRegister b3(3);
  constexpr static BRegister b4(4);
  constexpr static BRegister b5(5);
  constexpr static BRegister b6(6);
  constexpr static BRegister b7(7);
  constexpr static BRegister b8(8);
  constexpr static BRegister b9(9);
  constexpr static BRegister b10(10);
  constexpr static BRegister b11(11);
  constexpr static BRegister b12(12);
  constexpr static BRegister b13(13);
  constexpr static BRegister b14(14);
  constexpr static BRegister b15(15);
  constexpr static BRegister b16(16);
  constexpr static BRegister b17(17);
  constexpr static BRegister b18(18);
  constexpr static BRegister b19(19);
  constexpr static BRegister b20(20);
  constexpr static BRegister b21(21);
  constexpr static BRegister b22(22);
  constexpr static BRegister b23(23);
  constexpr static BRegister b24(24);
  constexpr static BRegister b25(25);
  constexpr static BRegister b26(26);
  constexpr static BRegister b27(27);
  constexpr static BRegister b28(28);
  constexpr static BRegister b29(29);
  constexpr static BRegister b30(30);
  constexpr static BRegister b31(31);
} // namespace BReg

// Namespace containing all 16-bit ASIMD register objects.
namespace HReg {
  constexpr static HRegister h0(0);
  constexpr static HRegister h1(1);
  constexpr static HRegister h2(2);
  constexpr static HRegister h3(3);
  constexpr static HRegister h4(4);
  constexpr static HRegister h5(5);
  constexpr static HRegister h6(6);
  constexpr static HRegister h7(7);
  constexpr static HRegister h8(8);
  constexpr static HRegister h9(9);
  constexpr static HRegister h10(10);
  constexpr static HRegister h11(11);
  constexpr static HRegister h12(12);
  constexpr static HRegister h13(13);
  constexpr static HRegister h14(14);
  constexpr static HRegister h15(15);
  constexpr static HRegister h16(16);
  constexpr static HRegister h17(17);
  constexpr static HRegister h18(18);
  constexpr static HRegister h19(19);
  constexpr static HRegister h20(20);
  constexpr static HRegister h21(21);
  constexpr static HRegister h22(22);
  constexpr static HRegister h23(23);
  constexpr static HRegister h24(24);
  constexpr static HRegister h25(25);
  constexpr static HRegister h26(26);
  constexpr static HRegister h27(27);
  constexpr static HRegister h28(28);
  constexpr static HRegister h29(29);
  constexpr static HRegister h30(30);
  constexpr static HRegister h31(31);
} // namespace HReg

// Namespace containing all 32-bit ASIMD register objects.
namespace SReg {
  constexpr static SRegister s0(0);
  constexpr static SRegister s1(1);
  constexpr static SRegister s2(2);
  constexpr static SRegister s3(3);
  constexpr static SRegister s4(4);
  constexpr static SRegister s5(5);
  constexpr static SRegister s6(6);
  constexpr static SRegister s7(7);
  constexpr static SRegister s8(8);
  constexpr static SRegister s9(9);
  constexpr static SRegister s10(10);
  constexpr static SRegister s11(11);
  constexpr static SRegister s12(12);
  constexpr static SRegister s13(13);
  constexpr static SRegister s14(14);
  constexpr static SRegister s15(15);
  constexpr static SRegister s16(16);
  constexpr static SRegister s17(17);
  constexpr static SRegister s18(18);
  constexpr static SRegister s19(19);
  constexpr static SRegister s20(20);
  constexpr static SRegister s21(21);
  constexpr static SRegister s22(22);
  constexpr static SRegister s23(23);
  constexpr static SRegister s24(24);
  constexpr static SRegister s25(25);
  constexpr static SRegister s26(26);
  constexpr static SRegister s27(27);
  constexpr static SRegister s28(28);
  constexpr static SRegister s29(29);
  constexpr static SRegister s30(30);
  constexpr static SRegister s31(31);
} // namespace SReg

// Namespace containing all 64-bit ASIMD register objects.
namespace DReg {
  constexpr static DRegister d0(0);
  constexpr static DRegister d1(1);
  constexpr static DRegister d2(2);
  constexpr static DRegister d3(3);
  constexpr static DRegister d4(4);
  constexpr static DRegister d5(5);
  constexpr static DRegister d6(6);
  constexpr static DRegister d7(7);
  constexpr static DRegister d8(8);
  constexpr static DRegister d9(9);
  constexpr static DRegister d10(10);
  constexpr static DRegister d11(11);
  constexpr static DRegister d12(12);
  constexpr static DRegister d13(13);
  constexpr static DRegister d14(14);
  constexpr static DRegister d15(15);
  constexpr static DRegister d16(16);
  constexpr static DRegister d17(17);
  constexpr static DRegister d18(18);
  constexpr static DRegister d19(19);
  constexpr static DRegister d20(20);
  constexpr static DRegister d21(21);
  constexpr static DRegister d22(22);
  constexpr static DRegister d23(23);
  constexpr static DRegister d24(24);
  constexpr static DRegister d25(25);
  constexpr static DRegister d26(26);
  constexpr static DRegister d27(27);
  constexpr static DRegister d28(28);
  constexpr static DRegister d29(29);
  constexpr static DRegister d30(30);
  constexpr static DRegister d31(31);
} // namespace DReg

// Namespace containing all 128-bit ASIMD register objects.
namespace QReg {
  constexpr static QRegister q0(0);
  constexpr static QRegister q1(1);
  constexpr static QRegister q2(2);
  constexpr static QRegister q3(3);
  constexpr static QRegister q4(4);
  constexpr static QRegister q5(5);
  constexpr static QRegister q6(6);
  constexpr static QRegister q7(7);
  constexpr static QRegister q8(8);
  constexpr static QRegister q9(9);
  constexpr static QRegister q10(10);
  constexpr static QRegister q11(11);
  constexpr static QRegister q12(12);
  constexpr static QRegister q13(13);
  constexpr static QRegister q14(14);
  constexpr static QRegister q15(15);
  constexpr static QRegister q16(16);
  constexpr static QRegister q17(17);
  constexpr static QRegister q18(18);
  constexpr static QRegister q19(19);
  constexpr static QRegister q20(20);
  constexpr static QRegister q21(21);
  constexpr static QRegister q22(22);
  constexpr static QRegister q23(23);
  constexpr static QRegister q24(24);
  constexpr static QRegister q25(25);
  constexpr static QRegister q26(26);
  constexpr static QRegister q27(27);
  constexpr static QRegister q28(28);
  constexpr static QRegister q29(29);
  constexpr static QRegister q30(30);
  constexpr static QRegister q31(31);
} // namespace QReg

// Namespace containing all unsigned SVE register objects.
namespace ZReg {
  constexpr static ZRegister z0(0);
  constexpr static ZRegister z1(1);
  constexpr static ZRegister z2(2);
  constexpr static ZRegister z3(3);
  constexpr static ZRegister z4(4);
  constexpr static ZRegister z5(5);
  constexpr static ZRegister z6(6);
  constexpr static ZRegister z7(7);
  constexpr static ZRegister z8(8);
  constexpr static ZRegister z9(9);
  constexpr static ZRegister z10(10);
  constexpr static ZRegister z11(11);
  constexpr static ZRegister z12(12);
  constexpr static ZRegister z13(13);
  constexpr static ZRegister z14(14);
  constexpr static ZRegister z15(15);
  constexpr static ZRegister z16(16);
  constexpr static ZRegister z17(17);
  constexpr static ZRegister z18(18);
  constexpr static ZRegister z19(19);
  constexpr static ZRegister z20(20);
  constexpr static ZRegister z21(21);
  constexpr static ZRegister z22(22);
  constexpr static ZRegister z23(23);
  constexpr static ZRegister z24(24);
  constexpr static ZRegister z25(25);
  constexpr static ZRegister z26(26);
  constexpr static ZRegister z27(27);
  constexpr static ZRegister z28(28);
  constexpr static ZRegister z29(29);
  constexpr static ZRegister z30(30);
  constexpr static ZRegister z31(31);
} // namespace ZReg

// Zero-cost FPR->GPR
inline constexpr Register ToReg(HRegister Reg) {
  return Register(Reg.Idx());
}
inline constexpr Register ToReg(SRegister Reg) {
  return Register(Reg.Idx());
}
inline constexpr Register ToReg(DRegister Reg) {
  return Register(Reg.Idx());
}
inline constexpr Register ToReg(VRegister Reg) {
  return Register(Reg.Idx());
}

// Zero-cost GPR->FPR
inline constexpr VRegister ToVReg(Register Reg) {
  return VRegister(Reg.Idx());
}
inline constexpr VRegister ToVReg(XRegister Reg) {
  return VRegister(Reg.Idx());
}
inline constexpr VRegister ToVReg(WRegister Reg) {
  return VRegister(Reg.Idx());
}

class PRegisterZero;
class PRegisterMerge;

/* Unsized predicate register for SVE.
 * This is unsized because of how SVE operates.
 */
class PRegister {
public:
  PRegister() = delete;
  constexpr PRegister(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const PRegister&, const PRegister&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr PRegisterZero Zeroing() const;
  constexpr PRegisterMerge Merging() const;

private:
  uint32_t Index;
};
static_assert(sizeof(PRegister) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<PRegister>);
static_assert(std::is_standard_layout_v<PRegister>);

// Unsized predicate register for SVE with zeroing semantics.
class PRegisterZero {
public:
  PRegisterZero() = delete;
  constexpr PRegisterZero(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const PRegisterZero&, const PRegisterZero&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator PRegister() const {
    return PRegister(Index);
  }
  constexpr PRegister P() const {
    return PRegister(Index);
  }
  constexpr PRegisterMerge Merging() const;

private:
  uint32_t Index;
};
static_assert(sizeof(PRegisterZero) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<PRegisterZero>);
static_assert(std::is_standard_layout_v<PRegisterZero>);

// Unsized predicate register for SVE with merging semantics.
class PRegisterMerge {
public:
  PRegisterMerge() = delete;
  constexpr PRegisterMerge(uint32_t Idx)
    : Index {Idx} {}

  friend constexpr auto operator<=>(const PRegisterMerge&, const PRegisterMerge&) = default;

  constexpr uint32_t Idx() const {
    return Index;
  }

  constexpr operator PRegister() const {
    return PRegister(Index);
  }
  constexpr PRegister P() const {
    return PRegister(Index);
  }
  constexpr PRegisterZero Zeroing() const;

private:
  uint32_t Index;
};
static_assert(sizeof(PRegisterMerge) == sizeof(uint32_t));
static_assert(std::is_trivially_copyable_v<PRegisterMerge>);
static_assert(std::is_standard_layout_v<PRegisterMerge>);

// PRegister
inline constexpr PRegisterZero PRegister::Zeroing() const {
  return PRegisterZero(Idx());
}
inline constexpr PRegisterMerge PRegister::Merging() const {
  return PRegisterMerge(Idx());
}

// PRegisterZero
inline constexpr PRegisterMerge PRegisterZero::Merging() const {
  return PRegisterMerge(Idx());
}

// PRegisterMerge
inline constexpr PRegisterZero PRegisterMerge::Zeroing() const {
  return PRegisterZero(Idx());
}

// Namespace containing all unsigned SVE predicate register objects.
namespace PReg {
  constexpr static PRegister p0(0);
  constexpr static PRegister p1(1);
  constexpr static PRegister p2(2);
  constexpr static PRegister p3(3);
  constexpr static PRegister p4(4);
  constexpr static PRegister p5(5);
  constexpr static PRegister p6(6);
  constexpr static PRegister p7(7);
  constexpr static PRegister p8(8);
  constexpr static PRegister p9(9);
  constexpr static PRegister p10(10);
  constexpr static PRegister p11(11);
  constexpr static PRegister p12(12);
  constexpr static PRegister p13(13);
  constexpr static PRegister p14(14);
  constexpr static PRegister p15(15);
} // namespace PReg

/* `OpType` enum describes how some SVE instructions operate if they support both forms.
 * Not all SVE instructions support this.
 */
enum class OpType : uint32_t {
  Destructive = 0,
  Constructive,
};
} // namespace ARMEmitter


================================================
FILE: CodeEmitter/CodeEmitter/SVEOps.inl
================================================
// SPDX-License-Identifier: MIT
/* SVE instruction emitters
 * These contain instruction emitters for AArch64 SVE and SVE2 operations.
 *
 * All of these SVE emitters have a `SubRegSize` as their first argument to set the element size on the instruction.
 * Since nearly every SVE instruction is unsized they don't need more than `ZRegister` and `PRegister` arguments.
 *
 * Most predicated instructions take a `PRegister` argument, not explicitly stating if it is merging or zeroing behaviour.
 * This is because the instruction only supports one style.
 * For instructions that take an explicit `PRegisterMerge` or `PRegisterZero`, then this instruction likely
 * supports both so we support both implementations depending on predicate register type.
 *
 * Some instructions take a templated `OpType` to choose between a destructive or constructive version of the instruction.
 *
 * Some instructions support the `i128Bit` SubRegSize, mostly around data movement.
 *
 * There are some SVE load-store helper functions which take a `SVEMemOperand` argument.
 * This helper will select the viable SVE load-store that can work with the provided encapsulated arguments.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // SVE encodings
  void dup(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Index) {
    SVEDupIndexed(size, zn, zd, Index);
  }

  void sel(SubRegSize size, ZRegister zd, PRegister pv, ZRegister zn, ZRegister zm) {
    SVESel(size, zm, pv, zn, zd);
  }
  void mov(SubRegSize size, ZRegister zd, PRegisterMerge pv, ZRegister zn) {
    sel(size, zd, pv, zn, zd);
  }

  void histcnt(SubRegSize size, ZRegister zd, PRegisterZero pv, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "SubRegSize must be 32-bit or 64-bit");
    LOGMAN_THROW_A_FMT(pv <= PReg::p7.Zeroing(), "histcnt can only use p0 to p7");

    uint32_t Op = 0b0100'0101'0010'0000'1100'0000'0000'0000;
    Op |= FEXCore::ToUnderlying(size) << 22;
    Op |= zm.Idx() << 16;
    Op |= pv.Idx() << 10;
    Op |= zn.Idx() << 5;
    Op |= zd.Idx();
    dc32(Op);
  }

  void histseg(ZRegister zd, ZRegister zn, ZRegister zm) {
    uint32_t Op = 0b0100'0101'0010'0000'1010'0000'0000'0000;
    Op |= zm.Idx() << 16;
    Op |= zn.Idx() << 5;
    Op |= zd.Idx();
    dc32(Op);
  }

  void fcmla(SubRegSize size, ZRegister zda, PRegisterMerge pv, ZRegister zn, ZRegister zm, Rotation rot) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT(pv <= PReg::p7.Merging(), "fcmla can only use p0 to p7");

    uint32_t Op = 0b0110'0100'0000'0000'0000'0000'0000'0000;
    Op |= FEXCore::ToUnderlying(size) << 22;
    Op |= zm.Idx() << 16;
    Op |= FEXCore::ToUnderlying(rot) << 13;
    Op |= pv.Idx() << 10;
    Op |= zn.Idx() << 5;
    Op |= zda.Idx();

    dc32(Op);
  }

  void fcadd(SubRegSize size, ZRegister zd, PRegisterMerge pv, ZRegister zn, ZRegister zm, Rotation rot) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT(pv <= PReg::p7.Merging(), "fcadd can only use p0 to p7");
    LOGMAN_THROW_A_FMT(rot == Rotation::ROTATE_90 || rot == Rotation::ROTATE_270, "fcadd rotation may only be 90 or 270 degrees");
    LOGMAN_THROW_A_FMT(zd == zn, "fcadd zd and zn must be the same register");

    const uint32_t ConvertedRotation = rot == Rotation::ROTATE_90 ? 0 : 1;

    uint32_t Op = 0b0110'0100'0000'0000'1000'0000'0000'0000;
    Op |= FEXCore::ToUnderlying(size) << 22;
    Op |= ConvertedRotation << 16;
    Op |= pv.Idx() << 10;
    Op |= zm.Idx() << 5;
    Op |= zd.Idx();

    dc32(Op);
  }

  // SVE integer add/subtract vectors (unpredicated)
  void add(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b000, size, zm, zn, zd);
  }
  void sub(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b001, size, zm, zn, zd);
  }
  void sqadd(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b100, size, zm, zn, zd);
  }
  void uqadd(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b101, size, zm, zn, zd);
  }
  void sqsub(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b110, size, zm, zn, zd);
  }
  void uqsub(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEIntegerAddSubUnpredicated(0b111, size, zm, zn, zd);
  }

  // SVE address generation
  void adr(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, SVEModType mod = SVEModType::MOD_NONE, uint32_t scale = 0) {
    SVEAddressGeneration(size, zd, zn, zm, mod, scale);
  }

  // SVE table lookup (three sources)
  void tbl(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVETableLookup(0b100, size, zm, zn, zd);
  }
  void tbl(SubRegSize size, ZRegister zd, ZRegister zn1, ZRegister zn2, ZRegister zm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zn1, zn2), "TBL zn1 and zn2 must be sequential");
    SVETableLookup(0b010, size, zm, zn1, zd);
  }
  void tbx(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVETableLookup(0b011, size, zm, zn, zd);
  }

  // SVE permute vector elements
  void zip1(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b000, size, zm, zn, zd);
  }
  void zip2(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b001, size, zm, zn, zd);
  }
  void uzp1(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b010, size, zm, zn, zd);
  }
  void uzp2(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b011, size, zm, zn, zd);
  }
  void trn1(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b100, size, zm, zn, zd);
  }
  void trn2(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEPermute(0b101, size, zm, zn, zd);
  }

  // SVE integer compare with unsigned immediate
  void cmphi(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, uint32_t imm) {
    SVEIntegerCompareImm(0, 1, imm, size, pg, zn, pd);
  }
  void cmphs(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, uint32_t imm) {
    SVEIntegerCompareImm(0, 0, imm, size, pg, zn, pd);
  }
  void cmplo(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, uint32_t imm) {
    SVEIntegerCompareImm(1, 0, imm, size, pg, zn, pd);
  }
  void cmpls(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, uint32_t imm) {
    SVEIntegerCompareImm(1, 1, imm, size, pg, zn, pd);
  }

  // SVE integer compare with signed immediate
  void cmpeq(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(1, 0, 0, imm, size, pg, zn, pd);
  }
  void cmpgt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(0, 0, 1, imm, size, pg, zn, pd);
  }
  void cmpge(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(0, 0, 0, imm, size, pg, zn, pd);
  }
  void cmplt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(0, 1, 0, imm, size, pg, zn, pd);
  }
  void cmple(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(0, 1, 1, imm, size, pg, zn, pd);
  }
  void cmpne(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, int32_t imm) {
    SVEIntegerCompareSignedImm(1, 0, 1, imm, size, pg, zn, pd);
  }

  // SVE predicate logical operations
  void and_(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 0, 0, 0, pm, pg, pn, pd);
  }
  void ands(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 1, 0, 0, pm, pg, pn, pd);
  }

  void mov(PRegister pd, PRegisterMerge pg, PRegister pn) {
    SVEPredicateLogical(0, 0, 1, 1, pd, pg, pn, pd);
  }
  void mov(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPredicateLogical(0, 0, 0, 0, pn, pg, pn, pd);
  }

  void movs(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPredicateLogical(0, 1, 0, 0, pn, pg, pn, pd);
  }
  void bic(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 0, 0, 1, pm, pg, pn, pd);
  }
  void bics(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 1, 0, 1, pm, pg, pn, pd);
  }

  void eor(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 0, 1, 0, pm, pg, pn, pd);
  }
  void eors(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 1, 1, 0, pm, pg, pn, pd);
  }

  void not_(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPredicateLogical(0, 0, 1, 0, pg, pg, pn, pd);
  }
  void sel(PRegister pd, PRegister pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(0, 0, 1, 1, pm, pg, pn, pd);
  }
  void orr(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 0, 0, 0, pm, pg, pn, pd);
  }
  void mov(PRegister pd, PRegister pn) {
    SVEPredicateLogical(1, 0, 0, 0, pn, pn, pn, pd);
  }
  void orn(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 0, 0, 1, pm, pg, pn, pd);
  }
  void nor(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 0, 1, 0, pm, pg, pn, pd);
  }
  void nand(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 0, 1, 1, pm, pg, pn, pd);
  }
  void orrs(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 1, 0, 0, pm, pg, pn, pd);
  }
  void movs(PRegister pd, PRegister pn) {
    SVEPredicateLogical(1, 1, 0, 0, pn, pn, pn, pd);
  }
  void orns(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 1, 0, 1, pm, pg, pn, pd);
  }
  void nors(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 1, 1, 0, pm, pg, pn, pd);
  }
  void nands(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPredicateLogical(1, 1, 1, 1, pm, pg, pn, pd);
  }

  // SVE broadcast predicate element
  // XXX:

  // SVE integer clamp
  // XXX:

  // SVE2 character match
  void match(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVECharacterMatch(0, size, pd, pg, zn, zm);
  }
  void nmatch(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVECharacterMatch(1, size, pd, pg, zn, zm);
  }

  // SVE floating-point convert precision odd elements
  void fcvtxnt(ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatConvertOdd(0b00, 0b10, pg, zn, zd);
  }
  ///< Size is destination size
  void fcvtnt(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i16Bit, "Unsupported size in {}", __func__);

    const auto ConvertedDestSize = size == SubRegSize::i16Bit ? 0b00 : size == SubRegSize::i32Bit ? 0b10 : 0b00;

    const auto ConvertedSrcSize = size == SubRegSize::i16Bit ? 0b10 : size == SubRegSize::i32Bit ? 0b11 : 0b00;

    SVEFloatConvertOdd(ConvertedSrcSize, ConvertedDestSize, pg, zn, zd);
  }

  ///< Size is destination size
  void fcvtlt(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Unsupported size in {}", __func__);

    const auto ConvertedDestSize = size == SubRegSize::i32Bit ? 0b01 : size == SubRegSize::i64Bit ? 0b11 : 0b00;

    const auto ConvertedSrcSize = size == SubRegSize::i32Bit ? 0b10 : size == SubRegSize::i64Bit ? 0b11 : 0b00;

    SVEFloatConvertOdd(ConvertedSrcSize, ConvertedDestSize, pg, zn, zd);
  }

  // XXX: BFCVTNT

  // SVE2 floating-point pairwise operations
  void faddp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatPairwiseArithmetic(0b000, size, pg, zd, zn, zm);
  }
  void fmaxnmp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatPairwiseArithmetic(0b100, size, pg, zd, zn, zm);
  }
  void fminnmp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatPairwiseArithmetic(0b101, size, pg, zd, zn, zm);
  }
  void fmaxp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatPairwiseArithmetic(0b110, size, pg, zd, zn, zm);
  }
  void fminp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatPairwiseArithmetic(0b111, size, pg, zd, zn, zm);
  }

  // SVE floating-point multiply-add (indexed)
  void fmla(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddIndexed(0, size, zda, zn, zm, index);
  }
  void fmls(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddIndexed(1, size, zda, zn, zm, index);
  }

  // SVE floating-point complex multiply-add (indexed)
  void fcmla(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index, Rotation rot) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit, "SubRegSize must be 16-bit or 32-bit");

    // 16 -> 32, 32 -> 64, since fcmla (indexed)'s restrictions and encodings
    // are essentially as if 16-bit were 32-bit and 32-bit were 64-bit.
    const auto DoubledSize = static_cast<SubRegSize>(FEXCore::ToUnderlying(size) + 1);

    SVEFPMultiplyAddIndexed(0b100 | FEXCore::ToUnderlying(rot), DoubledSize, zda, zn, zm, index);
  }

  // SVE floating-point multiply (indexed)
  void fmul(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddIndexed(0b1000, size, zd, zn, zm, index);
  }

  // SVE floating point matrix multiply accumulate
  // XXX: BFMMLA
  void fmmla(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMatrixMultiplyAccumulate(size, zda, zn, zm);
  }

  // SVE floating-point compare vectors
  void fcmeq(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(0, 1, 0, size, zm, pg, zn, pd);
  }
  void fcmgt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(0, 0, 1, size, zm, pg, zn, pd);
  }
  void fcmge(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(0, 0, 0, size, zm, pg, zn, pd);
  }
  void fcmne(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(0, 1, 1, size, zm, pg, zn, pd);
  }
  void fcmuo(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(1, 0, 0, size, zm, pg, zn, pd);
  }
  void facge(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(1, 0, 1, size, zm, pg, zn, pd);
  }
  void facgt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEFloatCompareVector(1, 1, 1, size, zm, pg, zn, pd);
  }
  void facle(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zm, ZRegister zn) {
    facge(size, pd, pg, zn, zm);
  }
  void faclt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zm, ZRegister zn) {
    facgt(size, pd, pg, zn, zm);
  }

  // SVE floating-point arithmetic (unpredicated)
  void fadd(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b000, size, zm, zn, zd);
  }
  void fsub(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b001, size, zm, zn, zd);
  }
  void fmul(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b010, size, zm, zn, zd);
  }
  void ftsmul(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b011, size, zm, zn, zd);
  }
  void frecps(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b110, size, zm, zn, zd);
  }
  void frsqrts(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticUnpredicated(0b111, size, zm, zn, zd);
  }

  // SVE floating-point recursive reduction
  void faddv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEFPRecursiveReduction(0b000, size, vd, pg, zn);
  }
  void fmaxnmv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEFPRecursiveReduction(0b100, size, vd, pg, zn);
  }
  void fminnmv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEFPRecursiveReduction(0b101, size, vd, pg, zn);
  }
  void fmaxv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEFPRecursiveReduction(0b110, size, vd, pg, zn);
  }
  void fminv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEFPRecursiveReduction(0b111, size, vd, pg, zn);
  }

  // SVE integer Multiply-Add - Predicated
  // SVE integer multiply-accumulate writing addend (predicated)
  void mla(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMultiplyAddSubPredicated(0b0, 0b0, size, zda, pg, zn, zm);
  }
  void mls(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMultiplyAddSubPredicated(0b0, 0b1, size, zda, pg, zn, zm);
  }

  // SVE integer multiply-add writing multiplicand (predicated)
  void mad(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEIntegerMultiplyAddSubPredicated(0b1, 0b0, size, zdn, pg, za, zm);
  }
  void msb(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEIntegerMultiplyAddSubPredicated(0b1, 0b1, size, zdn, pg, za, zm);
  }

  // SVE Integer Binary Arithmetic - Predicated
  // SVE integer add/subtract vectors (predicated)
  void add(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEAddSubVectorsPredicated(0b000, size, zd, pg, zn, zm);
  }
  void sub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEAddSubVectorsPredicated(0b001, size, zd, pg, zn, zm);
  }
  void subr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEAddSubVectorsPredicated(0b011, size, zd, pg, zn, zm);
  }

  // SVE integer min/max/difference (predicated)
  void smax(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b00, 0, size, pg, zdn, zm, zd);
  }
  void umax(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b00, 1, size, pg, zdn, zm, zd);
  }
  void smin(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b01, 0, size, pg, zdn, zm, zd);
  }
  void umin(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b01, 1, size, pg, zdn, zm, zd);
  }
  void sabd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b10, 0, size, pg, zdn, zm, zd);
  }
  void uabd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEIntegerMinMaxDifferencePredicated(0b10, 1, size, pg, zdn, zm, zd);
  }

  // SVE integer multiply vectors (predicated)
  void mul(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b0, 0b00, size, zd, pg, zn, zm);
  }
  void smulh(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b0, 0b10, size, zd, pg, zn, zm);
  }
  void umulh(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b0, 0b11, size, zd, pg, zn, zm);
  }

  // SVE integer divide vectors (predicated)
  void sdiv(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b1, 0b00, size, zd, pg, zn, zm);
  }
  void udiv(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b1, 0b01, size, zd, pg, zn, zm);
  }
  void sdivr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b1, 0b10, size, zd, pg, zn, zm);
  }
  void udivr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerMulDivVectorsPredicated(0b1, 0b11, size, zd, pg, zn, zm);
  }

  // SVE bitwise logical operations (predicated)
  void orr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEBitwiseLogicalPredicated(0b000, size, pg, zdn, zm, zd);
  }
  void eor(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEBitwiseLogicalPredicated(0b001, size, pg, zdn, zm, zd);
  }
  void and_(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEBitwiseLogicalPredicated(0b010, size, pg, zdn, zm, zd);
  }
  void bic(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, ZRegister zm) {
    SVEBitwiseLogicalPredicated(0b011, size, pg, zdn, zm, zd);
  }

  // SVE Integer Reduction
  // SVE integer add reduction (predicated)
  void saddv(SubRegSize size, DRegister vd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit, "saddv may only use 8-bit, "
                                                                                                              "16-bit, or 32-bit "
                                                                                                              "elements.");
    constexpr uint32_t Op = 0b0000'0100'0000'0000'0010'0000'0000'0000;
    SVEIntegerReductionOperation(Op, 0b00, size, vd, pg, zn);
  }
  void uaddv(SubRegSize size, DRegister vd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit || size == SubRegSize::i32Bit, "uaddv may only use 8-bit, "
                                                                                                              "16-bit, or 32-bit "
                                                                                                              "elements.");
    constexpr uint32_t Op = 0b0000'0100'0000'0000'0010'0000'0000'0000;
    SVEIntegerReductionOperation(Op, 0b01, size, vd, pg, zn);
  }

  // SVE integer min/max reduction (predicated)
  void smaxv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0000'1000'001 << 13;
    SVEIntegerReductionOperation(Op, 0b00, size, vd, pg, zn);
  }
  void umaxv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0000'1000'001 << 13;
    SVEIntegerReductionOperation(Op, 0b01, size, vd, pg, zn);
  }
  void sminv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0000'1000'001 << 13;
    SVEIntegerReductionOperation(Op, 0b10, size, vd, pg, zn);
  }
  void uminv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0000'1000'001 << 13;
    SVEIntegerReductionOperation(Op, 0b11, size, vd, pg, zn);
  }

  // SVE constructive prefix (predicated)
  template<typename T>
  requires (std::is_same_v<PRegisterZero, T> || std::is_same_v<PRegisterMerge, T>)
  void movprfx(SubRegSize size, ZRegister zd, T pg, ZRegister zn) {
    constexpr uint32_t M = std::is_same_v<PRegisterMerge, T> ? 1 : 0;
    SVEConstructivePrefixPredicated(0b00, M, size, pg, zn, zd);
  }

  // SVE bitwise logical reduction (predicated)
  void orv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0001'1000'0010'0000'0000'0000;
    SVEIntegerReductionOperation(Op, 0b00, size, vd, pg, zn);
  }
  void eorv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0001'1000'0010'0000'0000'0000;
    SVEIntegerReductionOperation(Op, 0b01, size, vd, pg, zn);
  }
  void andv(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    constexpr uint32_t Op = 0b0000'0100'0001'1000'0010'0000'0000'0000;
    SVEIntegerReductionOperation(Op, 0b10, size, vd, pg, zn);
  }

  // SVE Bitwise Shift - Predicated
  // SVE bitwise shift by immediate (predicated)
  void asr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b00, 0, 0, pg, zd, zdn, Shift);
  }
  void lsr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b00, 0, 1, pg, zd, zdn, Shift);
  }
  void lsl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b00, 1, 1, pg, zd, zdn, Shift);
  }
  void asrd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b01, 0, 0, pg, zd, zdn, Shift);
  }
  void sqshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b01, 1, 0, pg, zd, zdn, Shift);
  }
  void uqshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b01, 1, 1, pg, zd, zdn, Shift);
  }
  void srshr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b11, 0, 0, pg, zd, zdn, Shift);
  }
  void urshr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b11, 0, 1, pg, zd, zdn, Shift);
  }
  void sqshlu(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zdn, uint32_t Shift) {
    SVEBitWiseShiftImmediatePred(size, 0b11, 1, 1, pg, zd, zdn, Shift);
  }

  // SVE bitwise shift by vector (predicated)
  void asr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(0, 0, 0, size, pg, zd, zn, zm);
  }
  void lsr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(0, 0, 1, size, pg, zd, zn, zm);
  }
  void lsl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(0, 1, 1, size, pg, zd, zn, zm);
  }
  void asrr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(1, 0, 0, size, pg, zd, zn, zm);
  }
  void lsrr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(1, 0, 1, size, pg, zd, zn, zm);
  }
  void lslr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftbyVector(1, 1, 1, size, pg, zd, zn, zm);
  }

  // SVE bitwise shift by wide elements (predicated)
  void asr_wide(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementPredicated(size, 0b000, zd, pg, zn, zm);
  }
  void lsr_wide(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementPredicated(size, 0b001, zd, pg, zn, zm);
  }
  void lsl_wide(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementPredicated(size, 0b011, zd, pg, zn, zm);
  }

  // SVE Integer Unary Arithmetic - Predicated
  // SVE integer unary operations (predicated)
  void sxtb(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b000, size, pg, zn, zd);
  }
  void uxtb(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b001, size, pg, zn, zd);
  }
  void sxth(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b010, size, pg, zn, zd);
  }
  void uxth(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b011, size, pg, zn, zd);
  }
  void sxtw(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b100, size, pg, zn, zd);
  }
  void uxtw(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEIntegerUnaryPredicated(0b10, 0b101, size, pg, zn, zd);
  }
  void abs(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b10, 0b110, size, pg, zn, zd);
  }
  void neg(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b10, 0b111, size, pg, zn, zd);
  }

  // SVE bitwise unary operations (predicated)
  void cls(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b11, 0b000, size, pg, zn, zd);
  }
  void clz(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b11, 0b001, size, pg, zn, zd);
  }
  void cnt(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b11, 0b010, size, pg, zn, zd);
  }
  void cnot(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b11, 0b011, size, pg, zn, zd);
  }
  void fabs(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    SVEIntegerUnaryPredicated(0b11, 0b100, size, pg, zn, zd);
  }
  void fneg(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Invalid size");
    SVEIntegerUnaryPredicated(0b11, 0b101, size, pg, zn, zd);
  }
  void not_(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEIntegerUnaryPredicated(0b11, 0b110, size, pg, zn, zd);
  }

  // SVE Bitwise Logical - Unpredicated
  // SVE bitwise logical operations (unpredicated)
  void and_(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseLogicalUnpredicated(0b00, zm, zn, zd);
  }
  void orr(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseLogicalUnpredicated(0b01, zm, zn, zd);
  }
  void mov(ZRegister zd, ZRegister zn) {
    SVEBitwiseLogicalUnpredicated(0b01, zn, zn, zd);
  }
  void eor(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseLogicalUnpredicated(0b10, zm, zn, zd);
  }
  void bic(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseLogicalUnpredicated(0b11, zm, zn, zd);
  }

  void xar(SubRegSize size, ZRegister zd, ZRegister zm, uint32_t rotate) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Element size cannot be 128-bit.");

    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, rotate);

    uint32_t Inst = 0b0000'0100'0010'0000'0011'0100'0000'0000;
    Inst |= tszh << 22;
    Inst |= tszl_imm3 << 16;
    Inst |= zm.Idx() << 5;
    Inst |= zd.Idx();
    dc32(Inst);
  }

  // SVE2 bitwise ternary operations
  void eor3(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b00, 0, zm, zk, zd, zdn);
  }
  void bsl(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b00, 1, zm, zk, zd, zdn);
  }
  void bcax(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b01, 0, zm, zk, zd, zdn);
  }
  void bsl1n(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b01, 1, zm, zk, zd, zdn);
  }
  void bsl2n(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b10, 1, zm, zk, zd, zdn);
  }
  void nbsl(ZRegister zd, ZRegister zdn, ZRegister zm, ZRegister zk) {
    SVE2BitwiseTernary(0b11, 1, zm, zk, zd, zdn);
  }

  // SVE Index Generation
  void index(SubRegSize size, ZRegister zd, int32_t initial, int32_t increment) {
    LOGMAN_THROW_A_FMT(initial >= -16 && initial <= 15, "initial value must be within -16-15. initial: {}", initial);
    LOGMAN_THROW_A_FMT(increment >= -16 && increment <= 15, "increment value must be within -16-15. increment: {}", increment);
    SVEIndexGeneration(0b00, size, zd, initial, increment);
  }
  void index(SubRegSize size, ZRegister zd, Register initial, int32_t increment) {
    LOGMAN_THROW_A_FMT(increment >= -16 && increment <= 15, "increment value must be within -16-15. increment: {}", increment);
    SVEIndexGeneration(0b01, size, zd, static_cast<int32_t>(initial.Idx()), increment);
  }
  void index(SubRegSize size, ZRegister zd, int32_t initial, Register increment) {
    LOGMAN_THROW_A_FMT(initial >= -16 && initial <= 15, "initial value must be within -16-15. initial: {}", initial);
    SVEIndexGeneration(0b10, size, zd, initial, static_cast<int32_t>(increment.Idx()));
  }
  void index(SubRegSize size, ZRegister zd, Register initial, Register increment) {
    SVEIndexGeneration(0b11, size, zd, static_cast<int32_t>(initial.Idx()), static_cast<int32_t>(increment.Idx()));
  }

  // SVE Stack Allocation
  // SVE stack frame adjustment
  void addvl(XRegister rd, XRegister rn, int32_t imm) {
    SVEStackFrameOperation(0b00, rd, rn, imm);
  }
  void addpl(XRegister rd, XRegister rn, int32_t imm) {
    SVEStackFrameOperation(0b01, rd, rn, imm);
  }

  // Streaming SVE stack frame adjustment (SME)
  // XXX:

  // SVE stack frame size
  void rdvl(XRegister rd, int32_t imm) {
    // Would-be Rn field is just set to all 1's, which is the same
    // as writing the encoding for the SP into it.
    SVEStackFrameOperation(0b10, rd, XReg::rsp, imm);
  }

  // Streaming SVE stack frame size (SME)
  // XXX:

  // SVE2 Integer Multiply - Unpredicated
  // SVE2 integer multiply vectors (unpredicated)
  void mul(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b00, size, zm, zn, zd);
  }
  void smulh(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b10, size, zm, zn, zd);
  }

  void umulh(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b11, size, zm, zn, zd);
  }

  void pmul(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b01, SubRegSize::i8Bit, zm, zn, zd);
  }

  // SVE2 signed saturating doubling multiply high (unpredicated)
  void sqdmulh(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b100, size, zm, zn, zd);
  }
  void sqrdmulh(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyVectors(0b101, size, zm, zn, zd);
  }

  // SVE Bitwise Shift - Unpredicated
  // SVE bitwise shift by wide elements (unpredicated)
  void asr_wide(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementsUnpredicated(size, 0b00, zd, zn, zm);
  }
  void lsr_wide(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementsUnpredicated(size, 0b01, zd, zn, zm);
  }
  void lsl_wide(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVEBitwiseShiftByWideElementsUnpredicated(size, 0b11, zd, zn, zm);
  }

  // SVE bitwise shift by immediate (unpredicated)
  void asr(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVEBitWiseShiftImmediateUnpred(size, 0b00, zd, zn, shift);
  }
  void lsr(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVEBitWiseShiftImmediateUnpred(size, 0b01, zd, zn, shift);
  }
  void lsl(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVEBitWiseShiftImmediateUnpred(size, 0b11, zd, zn, shift);
  }

  // SVE Integer Misc - Unpredicated
  // SVE floating-point trig select coefficient
  void ftssel(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "ftssel may only use 16/32/64-bit element sizes");
    SVEIntegerMiscUnpredicated(0b00, zm.Idx(), FEXCore::ToUnderlying(size), zd, zn);
  }
  // SVE floating-point exponential accelerator
  void fexpa(SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "fexpa may only use 16/32/64-bit element sizes");
    SVEIntegerMiscUnpredicated(0b10, 0b00000, FEXCore::ToUnderlying(size), zd, zn);
  }
  // SVE constructive prefix (unpredicated)
  void movprfx(ZRegister zd, ZRegister zn) {
    SVEIntegerMiscUnpredicated(0b11, 0b00000, 0b00, zd, zn);
  }

  // SVE Element Count
  // SVE saturating inc/dec vector by element count
  void sqinch(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0000, SubRegSize::i16Bit, zdn, pattern, imm4);
  }
  void uqinch(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0001, SubRegSize::i16Bit, zdn, pattern, imm4);
  }
  void sqdech(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0010, SubRegSize::i16Bit, zdn, pattern, imm4);
  }
  void uqdech(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0011, SubRegSize::i16Bit, zdn, pattern, imm4);
  }
  void sqincw(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0000, SubRegSize::i32Bit, zdn, pattern, imm4);
  }
  void uqincw(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0001, SubRegSize::i32Bit, zdn, pattern, imm4);
  }
  void sqdecw(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0010, SubRegSize::i32Bit, zdn, pattern, imm4);
  }
  void uqdecw(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0011, SubRegSize::i32Bit, zdn, pattern, imm4);
  }
  void sqincd(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0000, SubRegSize::i64Bit, zdn, pattern, imm4);
  }
  void uqincd(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0001, SubRegSize::i64Bit, zdn, pattern, imm4);
  }
  void sqdecd(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0010, SubRegSize::i64Bit, zdn, pattern, imm4);
  }
  void uqdecd(ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    SVEElementCount(0, 0b0011, SubRegSize::i64Bit, zdn, pattern, imm4);
  }

  // SVE element count
  void cntb(XRegister rd, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1000, SubRegSize::i8Bit, ZRegister {rd.Idx()}, pattern, imm);
  }
  void cnth(XRegister rd, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1000, SubRegSize::i16Bit, ZRegister {rd.Idx()}, pattern, imm);
  }
  void cntw(XRegister rd, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1000, SubRegSize::i32Bit, ZRegister {rd.Idx()}, pattern, imm);
  }
  void cntd(XRegister rd, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1000, SubRegSize::i64Bit, ZRegister {rd.Idx()}, pattern, imm);
  }

  // SVE inc/dec vector by element count
  void inch(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0000, SubRegSize::i16Bit, zdn, pattern, imm);
  }
  void dech(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0001, SubRegSize::i16Bit, zdn, pattern, imm);
  }
  void incw(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0000, SubRegSize::i32Bit, zdn, pattern, imm);
  }
  void decw(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0001, SubRegSize::i32Bit, zdn, pattern, imm);
  }
  void incd(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0000, SubRegSize::i64Bit, zdn, pattern, imm);
  }
  void decd(ZRegister zdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b0001, SubRegSize::i64Bit, zdn, pattern, imm);
  }

  // SVE inc/dec register by element count
  void incb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1000, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void decb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1001, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void inch(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1000, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void dech(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1001, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void incw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1000, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void decw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1001, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void incd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1000, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void decd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1001, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }

  // SVE saturating inc/dec register by element count
  void sqincb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1100, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqincb(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1100, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1101, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincb(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1101, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1110, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecb(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1110, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecb(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1111, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecb(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1111, SubRegSize::i8Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }

  void sqinch(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1100, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqinch(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1100, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqinch(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1101, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqinch(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1101, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdech(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1110, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdech(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1110, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdech(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1111, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdech(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1111, SubRegSize::i16Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }

  void sqincw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1100, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqincw(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1100, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1101, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincw(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1101, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1110, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecw(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1110, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecw(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1111, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecw(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1111, SubRegSize::i32Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }

  void sqincd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1100, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqincd(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1100, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1101, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqincd(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1101, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1110, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void sqdecd(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1110, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecd(XRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(1, 0b1111, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }
  void uqdecd(WRegister rdn, PredicatePattern pattern, uint32_t imm) {
    SVEElementCount(0, 0b1111, SubRegSize::i64Bit, ZRegister {rdn.Idx()}, pattern, imm);
  }

  // SVE Bitwise Immediate
  // XXX: DUPM
  // SVE bitwise logical with immediate (unpredicated)
  // XXX:

  // SVE Integer Wide Immediate - Predicated
  void fcpy(SubRegSize size, ZRegister zd, PRegisterMerge pg, float value) {
    SVEBroadcastFloatImmPredicated(size, zd, pg, value);
  }
  void fmov(SubRegSize size, ZRegister zd, PRegisterMerge pg, float value) {
    fcpy(size, zd, pg, value);
  }

  // SVE copy integer immediate (predicated)
  void cpy(SubRegSize size, ZRegister zd, PRegisterZero pg, int32_t imm) {
    SVEBroadcastIntegerImmPredicated(0, size, zd, pg, imm);
  }
  void cpy(SubRegSize size, ZRegister zd, PRegisterMerge pg, int32_t imm) {
    SVEBroadcastIntegerImmPredicated(1, size, zd, pg, imm);
  }
  void mov_imm(SubRegSize size, ZRegister zd, PRegisterZero pg, int32_t imm) {
    cpy(size, zd, pg, imm);
  }
  void mov_imm(SubRegSize size, ZRegister zd, PRegisterMerge pg, int32_t imm) {
    cpy(size, zd, pg, imm);
  }

  // SVE Permute Vector - Unpredicated
  void dup(SubRegSize size, ZRegister zd, Register rn) {
    SVEPermuteUnpredicated(size, 0b00000, zd, ZRegister {rn.Idx()});
  }
  void mov(SubRegSize size, ZRegister zd, Register rn) {
    dup(size, zd, rn);
  }
  void insr(SubRegSize size, ZRegister zdn, Register rm) {
    SVEPermuteUnpredicated(size, 0b00100, zdn, ZRegister {rm.Idx()});
  }
  void insr(SubRegSize size, ZRegister zdn, VRegister vm) {
    SVEPermuteUnpredicated(size, 0b10100, zdn, vm.Z());
  }
  void rev(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVEPermuteUnpredicated(size, 0b11000, zd, zn);
  }

  // SVE unpack vector elements
  void sunpklo(SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEPermuteUnpredicated(size, 0b10000, zd, zn);
  }
  void sunpkhi(SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEPermuteUnpredicated(size, 0b10001, zd, zn);
  }
  void uunpklo(SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEPermuteUnpredicated(size, 0b10010, zd, zn);
  }
  void uunpkhi(SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid subregsize size");
    SVEPermuteUnpredicated(size, 0b10011, zd, zn);
  }

  // SVE Permute Predicate
  void rev(SubRegSize size, PRegister pd, PRegister pn) {
    SVEPermutePredicate(size, 0b10100, 0b0000, 0b0, pd, pn);
  }

  // SVE unpack predicate elements
  void punpklo(PRegister pd, PRegister pn) {
    SVEPermutePredicate(SubRegSize::i8Bit, 0b10000, 0b0000, 0b0, pd, pn);
  }
  void punpkhi(PRegister pd, PRegister pn) {
    SVEPermutePredicate(SubRegSize::i8Bit, 0b10001, 0b0000, 0b0, pd, pn);
  }

  // SVE permute predicate elements
  void zip1(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b0000, 0b0, pd, pn);
  }
  void zip2(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b0010, 0b0, pd, pn);
  }
  void uzp1(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b0100, 0b0, pd, pn);
  }
  void uzp2(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b0110, 0b0, pd, pn);
  }
  void trn1(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b1000, 0b0, pd, pn);
  }
  void trn2(SubRegSize size, PRegister pd, PRegister pn, PRegister pm) {
    SVEPermutePredicate(size, pm.Idx(), 0b1010, 0b0, pd, pn);
  }

  // SVE Permute Vector - Predicated - Base
  // CPY (SIMD&FP scalar)
  void cpy(SubRegSize size, ZRegister zd, PRegisterMerge pg, VRegister vn) {
    SVEPermuteVectorPredicated(0b00000, 0b0, size, zd, pg, ZRegister {vn.Idx()});
  }

  void compact(SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit || size == SubRegSize::i32Bit, "Invalid element size");
    SVEPermuteVectorPredicated(0b00001, 0b0, size, zd, pg, zn);
  }

  // CPY (scalar)
  void cpy(SubRegSize size, ZRegister zd, PRegisterMerge pg, Register rn) {
    SVEPermuteVectorPredicated(0b01000, 0b1, size, zd, pg, ZRegister {rn.Idx()});
  }

  template<OpType optype>
  requires (optype == OpType::Constructive)
  void splice(SubRegSize size, ZRegister zd, PRegister pv, ZRegister zn, ZRegister zn2) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zn, zn2), "zn and zn2 must be sequential registers");
    SVEPermuteVectorPredicated(0b01101, 0b0, size, zd, pv, zn);
  }

  template<OpType optype>
  requires (optype == OpType::Destructive)
  void splice(SubRegSize size, ZRegister zd, PRegister pv, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd needs to equal zn");
    SVEPermuteVectorPredicated(0b01100, 0b0, size, zd, pv, zm);
  }

  // SVE Permute Vector - Predicated
  // SVE extract element to general register
  void lasta(SubRegSize size, Register rd, PRegister pg, ZRegister zn) {
    SVEPermuteVectorPredicated(0b00000, 0b1, size, ZRegister {rd.Idx()}, pg, zn);
  }
  void lastb(SubRegSize size, Register rd, PRegister pg, ZRegister zn) {
    SVEPermuteVectorPredicated(0b00001, 0b1, size, ZRegister {rd.Idx()}, pg, zn);
  }

  // SVE extract element to SIMD&FP scalar register
  void lasta(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEPermuteVectorPredicated(0b00010, 0b0, size, ZRegister {vd.Idx()}, pg, zn);
  }
  void lastb(SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    SVEPermuteVectorPredicated(0b00011, 0b0, size, ZRegister {vd.Idx()}, pg, zn);
  }

  // SVE reverse within elements
  void revb(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Can't use 8-bit element size");
    SVEPermuteVectorPredicated(0b00100, 0b0, size, zd, pg, zn);
  }
  void revh(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i16Bit, "Can't use 8/16-bit element sizes");
    SVEPermuteVectorPredicated(0b00101, 0b0, size, zd, pg, zn);
  }
  void revw(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit, "Can't use 8/16/32-bit element sizes");
    SVEPermuteVectorPredicated(0b00110, 0b0, size, zd, pg, zn);
  }
  void rbit(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEPermuteVectorPredicated(0b00111, 0b0, size, zd, pg, zn);
  }

  // SVE conditionally broadcast element to vector
  void clasta(SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd must be the same as zn");
    SVEPermuteVectorPredicated(0b01000, 0b0, size, zd, pg, zm);
  }
  void clastb(SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd must be the same as zn");
    SVEPermuteVectorPredicated(0b01001, 0b0, size, zd, pg, zm);
  }

  // SVE conditionally extract element to SIMD&FP scalar
  void clasta(SubRegSize size, VRegister vd, PRegister pg, VRegister vn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(vd == vn, "vd must be the same as vn");
    SVEPermuteVectorPredicated(0b01010, 0b0, size, ZRegister {vd.Idx()}, pg, zm);
  }
  void clastb(SubRegSize size, VRegister vd, PRegister pg, VRegister vn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(vd == vn, "vd must be the same as vn");
    SVEPermuteVectorPredicated(0b01011, 0b0, size, ZRegister {vd.Idx()}, pg, zm);
  }

  // SVE reverse doublewords (SME)
  // XXX:

  // SVE conditionally extract element to general register
  void clasta(SubRegSize size, Register rd, PRegister pg, Register rn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(rd == rn, "rd must be the same as rn");
    SVEPermuteVectorPredicated(0b10000, 0b1, size, ZRegister {rd.Idx()}, pg, zm);
  }
  void clastb(SubRegSize size, Register rd, PRegister pg, Register rn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(rd == rn, "rd must be the same as rn");
    SVEPermuteVectorPredicated(0b10001, 0b1, size, ZRegister {rd.Idx()}, pg, zm);
  }

  // SVE Permute Vector - Extract
  // Constructive
  template<OpType optype>
  requires (optype == OpType::Constructive)
  void ext(ZRegister zd, ZRegister zn, ZRegister zn2, uint8_t Imm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zn, zn2), "zn and zn2 must be sequential registers");
    SVEPermuteVector(1, zd, zn, Imm);
  }

  // Destructive
  template<OpType optype>
  requires (optype == OpType::Destructive)
  void ext(ZRegister zd, ZRegister zdn, ZRegister zm, uint8_t Imm) {
    LOGMAN_THROW_A_FMT(zd == zdn, "Dest needs to equal zdn");
    SVEPermuteVector(0, zd, zm, Imm);
  }

  // SVE Permute Vector - Segments
  // SVE permute vector segments
  // XXX:

  // SVE Integer Compare - Vectors
  // SVE integer compare vectors
  void cmpeq(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(1, 1, 0, size, zm, pg, zn, pd);
  }
  void cmpge(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(1, 0, 0, size, zm, pg, zn, pd);
  }
  void cmpgt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(1, 0, 1, size, zm, pg, zn, pd);
  }
  void cmphi(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(0, 0, 1, size, zm, pg, zn, pd);
  }
  void cmphs(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(0, 0, 0, size, zm, pg, zn, pd);
  }
  void cmpne(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVector(1, 1, 1, size, zm, pg, zn, pd);
  }

  // SVE integer compare with wide elements
  void cmpeq_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b01, 0, size, pd, pg, zn, zm);
  }
  void cmpgt_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b10, 1, size, pd, pg, zn, zm);
  }
  void cmpge_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b10, 0, size, pd, pg, zn, zm);
  }
  void cmphi_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(1, 0b10, 1, size, pd, pg, zn, zm);
  }
  void cmphs_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(1, 0b10, 0, size, pd, pg, zn, zm);
  }
  void cmplt_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b11, 0, size, pd, pg, zn, zm);
  }
  void cmple_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b11, 1, size, pd, pg, zn, zm);
  }
  void cmplo_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(1, 0b11, 0, size, pd, pg, zn, zm);
  }
  void cmpls_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(1, 0b11, 1, size, pd, pg, zn, zm);
  }
  void cmpne_wide(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    SVEIntegerCompareVectorWide(0, 0b01, 1, size, pd, pg, zn, zm);
  }

  // SVE Propagate Break
  // SVE propagate break from previous partition
  void brkpa(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPropagateBreak(0b0000, 0b11, 0, pd, pg, pn, pm);
  }
  void brkpb(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPropagateBreak(0b0000, 0b11, 1, pd, pg, pn, pm);
  }
  void brkpas(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPropagateBreak(0b0100, 0b11, 0, pd, pg, pn, pm);
  }
  void brkpbs(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    SVEPropagateBreak(0b0100, 0b11, 1, pd, pg, pn, pm);
  }

  // SVE Partition Break
  // SVE propagate break to next partition
  void brkn(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    LOGMAN_THROW_A_FMT(pd == pm, "pd and pm need to be the same");
    SVEPropagateBreak(0b0001, 0b01, 0, pd, pg, pn, PReg::p8);
  }
  void brkns(PRegister pd, PRegisterZero pg, PRegister pn, PRegister pm) {
    LOGMAN_THROW_A_FMT(pd == pm, "pd and pm need to be the same");
    SVEPropagateBreak(0b0101, 0b01, 0, pd, pg, pn, PReg::p8);
  }

  // SVE partition break condition
  void brka(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPropagateBreak(0b0001, 0b01, 0, pd, pg, pn, PReg::p0);
  }
  void brka(PRegister pd, PRegisterMerge pg, PRegister pn) {
    SVEPropagateBreak(0b0001, 0b01, 1, pd, pg, pn, PReg::p0);
  }
  void brkas(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPropagateBreak(0b0101, 0b01, 0, pd, pg, pn, PReg::p0);
  }
  void brkb(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPropagateBreak(0b1001, 0b01, 0, pd, pg, pn, PReg::p0);
  }
  void brkb(PRegister pd, PRegisterMerge pg, PRegister pn) {
    SVEPropagateBreak(0b1001, 0b01, 1, pd, pg, pn, PReg::p0);
  }
  void brkbs(PRegister pd, PRegisterZero pg, PRegister pn) {
    SVEPropagateBreak(0b1101, 0b01, 0, pd, pg, pn, PReg::p0);
  }

  // SVE Predicate Misc
  void pnext(SubRegSize size, PRegister pd, PRegister pv, PRegister pn) {
    LOGMAN_THROW_A_FMT(pd == pn, "pd and pn need to be the same");
    SVEPredicateMisc(0b1001, 0b00010, pv.Idx(), size, pd);
  }

  // SVE predicate test
  void ptest(PRegister pg, PRegister pn) {
    SVEPredicateMisc(0b0000, pg.Idx() << 1, pn.Idx(), SubRegSize::i16Bit, PReg::p0);
  }

  // SVE predicate first active
  void pfirst(PRegister pd, PRegister pg, PRegister pn) {
    LOGMAN_THROW_A_FMT(pd == pn, "pd and pn need to be the same");
    SVEPredicateMisc(0b1000, 0b00000, pg.Idx(), SubRegSize::i16Bit, pd);
  }

  // SVE predicate zero
  void pfalse(PRegister pd) {
    SVEPredicateMisc(0b1000, 0b10010, 0b0000, SubRegSize::i8Bit, pd);
  }

  // SVE predicate read from FFR (predicated)
  void rdffr(PRegister pd, PRegisterZero pg) {
    SVEPredicateMisc(0b1000, 0b11000, pg.Idx(), SubRegSize::i8Bit, pd);
  }

  void rdffrs(PRegister pd, PRegisterZero pg) {
    SVEPredicateMisc(0b1000, 0b11000, pg.Idx(), SubRegSize::i16Bit, pd);
  }

  // SVE predicate read from FFR (unpredicated)
  void rdffr(PRegister pd) {
    SVEPredicateMisc(0b1001, 0b11000, 0b0000, SubRegSize::i8Bit, pd);
  }

  // SVE predicate initialize
  void ptrue(SubRegSize size, PRegister pd, PredicatePattern pattern) {
    SVEPredicateMisc(0b1000, 0b10000, FEXCore::ToUnderlying(pattern), size, pd);
  }
  void ptrues(SubRegSize size, PRegister pd, PredicatePattern pattern) {
    SVEPredicateMisc(0b1001, 0b10000, FEXCore::ToUnderlying(pattern), size, pd);
  }

  // SVE Integer Compare - Scalars
  // SVE integer compare scalar count and limit
  template<IsXOrWRegister T>
  void whilege(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar(IsXRegister << 2, 0, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilegt(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar(IsXRegister << 2, 1, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilelt(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b001, 0, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilele(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b001, 1, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilehs(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b010, 0, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilehi(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b010, 1, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilelo(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b011, 0, pd.Idx(), size, rn, rm);
  }
  template<IsXOrWRegister T>
  void whilels(SubRegSize size, PRegister pd, T rn, T rm) {
    constexpr auto IsXRegister = static_cast<uint32_t>(std::is_same_v<T, XRegister>);
    SVEIntCompareScalar((IsXRegister << 2) | 0b011, 1, pd.Idx(), size, rn, rm);
  }

  // SVE conditionally terminate scalars
  template<IsXOrWRegister T>
  void ctermeq(T rn, T rm) {
    constexpr auto size = std::is_same_v<T, XRegister> ? SubRegSize::i64Bit : SubRegSize::i32Bit;
    SVEIntCompareScalar(0b1000, 0, 0b0000, size, rn, rm);
  }
  template<IsXOrWRegister T>
  void ctermne(T rn, T rm) {
    constexpr auto size = std::is_same_v<T, XRegister> ? SubRegSize::i64Bit : SubRegSize::i32Bit;
    SVEIntCompareScalar(0b1000, 1, 0b0000, size, rn, rm);
  }

  // SVE pointer conflict compare
  void whilewr(SubRegSize size, PRegister pd, XRegister rn, XRegister rm) {
    SVEIntCompareScalar(0b1100, 0, pd.Idx(), size, rn, rm);
  }
  void whilerw(SubRegSize size, PRegister pd, XRegister rn, XRegister rm) {
    SVEIntCompareScalar(0b1100, 1, pd.Idx(), size, rn, rm);
  }

  // SVE Integer Wide Immediate - Unpredicated
  // SVE integer add/subtract immediate (unpredicated)
  void add(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b000, size, zd, zn, imm);
  }
  void sub(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b001, size, zd, zn, imm);
  }
  void subr(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b011, size, zd, zn, imm);
  }
  void sqadd(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b100, size, zd, zn, imm);
  }
  void uqadd(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b101, size, zd, zn, imm);
  }
  void sqsub(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b110, size, zd, zn, imm);
  }
  void uqsub(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    SVEAddSubImmediateUnpred(0b111, size, zd, zn, imm);
  }

  // SVE integer min/max immediate (unpredicated)
  void smax(SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    SVEMinMaxImmediateUnpred(0b000, size, zd, zn, imm);
  }
  void umax(SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    SVEMinMaxImmediateUnpred(0b001, size, zd, zn, imm);
  }
  void smin(SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    SVEMinMaxImmediateUnpred(0b010, size, zd, zn, imm);
  }
  void umin(SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    SVEMinMaxImmediateUnpred(0b011, size, zd, zn, imm);
  }

  // SVE integer multiply immediate (unpredicated)
  void mul(SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    SVEMultiplyImmediateUnpred(0b000, size, zd, zn, imm);
  }

  // SVE broadcast integer immediate (unpredicated)
  void dup_imm(SubRegSize size, ZRegister zd, int32_t Value) {
    SVEBroadcastImm(0b00, Value, size, zd);
  }
  void mov_imm(SubRegSize size, ZRegister zd, int32_t Value) {
    dup_imm(size, zd, Value);
  }

  // SVE broadcast floating-point immediate (unpredicated)
  void fdup(SubRegSize size, ZRegister zd, float Value) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Unsupported fmov size");

    uint32_t Imm {};
    if (size == SubRegSize::i16Bit) {
      LOGMAN_MSG_A_FMT("Unsupported");
      FEX_UNREACHABLE;
    } else if (size == SubRegSize::i32Bit) {
      Imm = FP32ToImm8(Value);
    } else if (size == SubRegSize::i64Bit) {
      Imm = FP64ToImm8(Value);
    }

    SVEBroadcastFloatImmUnpredicated(0b00, 0, Imm, size, zd);
  }
  void fmov(SubRegSize size, ZRegister zd, float Value) {
    fdup(size, zd, Value);
  }

  // SVE Predicate Count
  // SVE predicate count
  void cntp(SubRegSize size, XRegister rd, PRegister pg, PRegister pn) {
    SVEPredicateCount(0b000, size, rd, pg, pn);
  }

  // SVE Inc/Dec by Predicate Count
  // SVE saturating inc/dec vector by predicate count
  void sqincp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(0, 0, 0b00, 0b00, size, zdn, pm);
  }
  void uqincp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(0, 0, 0b00, 0b01, size, zdn, pm);
  }
  void sqdecp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(0, 0, 0b00, 0b10, size, zdn, pm);
  }
  void uqdecp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(0, 0, 0b00, 0b11, size, zdn, pm);
  }

  // SVE saturating inc/dec register by predicate count
  void sqincp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b10, 0b00, size, rdn, pm);
  }
  void sqincp(SubRegSize size, XRegister rdn, PRegister pm, WRegister wn) {
    LOGMAN_THROW_A_FMT(rdn.Idx() == wn.Idx(), "rdn and wn must be the same");
    SVEIncDecPredicateCountScalar(0, 1, 0b00, 0b00, size, rdn, pm);
  }
  void uqincp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b10, 0b01, size, rdn, pm);
  }
  void uqincp(SubRegSize size, WRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b00, 0b01, size, rdn, pm);
  }
  void sqdecp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b10, 0b10, size, rdn, pm);
  }
  void sqdecp(SubRegSize size, XRegister rdn, PRegister pm, WRegister wn) {
    LOGMAN_THROW_A_FMT(rdn.Idx() == wn.Idx(), "rdn and wn must be the same");
    SVEIncDecPredicateCountScalar(0, 1, 0b00, 0b10, size, rdn, pm);
  }
  void uqdecp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b10, 0b11, size, rdn, pm);
  }
  void uqdecp(SubRegSize size, WRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(0, 1, 0b00, 0b11, size, rdn, pm);
  }

  // SVE inc/dec vector by predicate count
  void incp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(1, 0, 0b00, 0b00, size, zdn, pm);
  }
  void decp(SubRegSize size, ZRegister zdn, PRegister pm) {
    SVEIncDecPredicateCountVector(1, 0, 0b00, 0b01, size, zdn, pm);
  }

  // SVE inc/dec register by predicate count
  void incp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(1, 1, 0b00, 0b00, size, rdn, pm);
  }
  void decp(SubRegSize size, XRegister rdn, PRegister pm) {
    SVEIncDecPredicateCountScalar(1, 1, 0b00, 0b01, size, rdn, pm);
  }

  // SVE Write FFR
  // SVE FFR write from predicate
  void wrffr(PRegister pn) {
    SVEWriteFFR(0, 0b00, 0b000, pn.Idx(), 0b00000);
  }
  // SVE FFR initialise
  void setffr() {
    SVEWriteFFR(1, 0b00, 0b000, 0b0000, 0b00000);
  }

  // SVE Integer Multiply-Add - Unpredicated
  void cdot(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, Rotation rot) {
    SVEIntegerDotProduct(0b0001, size, zda, zn, zm, rot);
  }

  // SVE integer dot product (unpredicated)
  void sdot(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerDotProduct(0b0000, size, zda, zn, zm, Rotation::ROTATE_0);
  }
  void udot(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerDotProduct(0b0000, size, zda, zn, zm, Rotation::ROTATE_90);
  }

  // SVE2 saturating multiply-add interleaved long
  void sqdmlalbt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2SaturatingMulAddInterleaved(0b000010, size, zda, zn, zm);
  }
  void sqdmlslbt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2SaturatingMulAddInterleaved(0b000011, size, zda, zn, zm);
  }

  // SVE2 complex integer multiply-add
  void cmla(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, Rotation rot) {
    SVEIntegerComplexMulAdd(0b0010, size, zda, zn, zm, rot);
  }
  void sqrdcmlah(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, Rotation rot) {
    SVEIntegerComplexMulAdd(0b0011, size, zda, zn, zm, rot);
  }

  // SVE2 integer multiply-add long
  void smlalb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'000, size, zda, zn, zm);
  }
  void smlalt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'001, size, zda, zn, zm);
  }
  void umlalb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'010, size, zda, zn, zm);
  }
  void umlalt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'011, size, zda, zn, zm);
  }
  void smlslb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'100, size, zda, zn, zm);
  }
  void smlslt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'101, size, zda, zn, zm);
  }
  void umlslb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'110, size, zda, zn, zm);
  }
  void umlslt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b010'111, size, zda, zn, zm);
  }

  // SVE2 saturating multiply-add long
  void sqdmlalb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b0110'00, size, zda, zn, zm);
  }
  void sqdmlalt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b0110'01, size, zda, zn, zm);
  }
  void sqdmlslb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b0110'10, size, zda, zn, zm);
  }
  void sqdmlslt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerMulAddLong(0b0110'11, size, zda, zn, zm);
  }

  // SVE2 saturating multiply-add high
  void sqrdmlah(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerMultiplyAddUnpredicated(0b011'100, size, zda, zn, zm);
  }
  void sqrdmlsh(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerMultiplyAddUnpredicated(0b011'101, size, zda, zn, zm);
  }

  // SVE mixed sign dot product
  void usdot(ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerDotProduct(0b0111, SubRegSize::i32Bit, zda, zn, zm, Rotation::ROTATE_180);
  }

  // SVE2 Integer - Predicated
  // SVE2 integer pairwise add and accumulate long
  void sadalp(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerPairwiseAddAccumulateLong(0, size, zda, pg, zn);
  }
  void uadalp(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerPairwiseAddAccumulateLong(1, size, zda, pg, zn);
  }

  // SVE2 integer unary operations (predicated)
  void urecpe(ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerUnaryOpsPredicated(0b00000, SubRegSize::i32Bit, zd, pg, zn);
  }
  void ursqrte(ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerUnaryOpsPredicated(0b00001, SubRegSize::i32Bit, zd, pg, zn);
  }
  void sqabs(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerUnaryOpsPredicated(0b01000, size, zd, pg, zn);
  }
  void sqneg(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerUnaryOpsPredicated(0b01001, size, zd, pg, zn);
  }

  // SVE2 saturating/rounding bitwise shift left (predicated)
  void srshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b00010, size, zd, pg, zn, zm);
  }
  void urshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b00011, size, zd, pg, zn, zm);
  }
  void srshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b00110, size, zd, pg, zn, zm);
  }
  void urshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b00111, size, zd, pg, zn, zm);
  }
  void sqshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01000, size, zd, pg, zn, zm);
  }
  void uqshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01001, size, zd, pg, zn, zm);
  }
  void sqrshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01010, size, zd, pg, zn, zm);
  }
  void uqrshl(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01011, size, zd, pg, zn, zm);
  }
  void sqshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01100, size, zd, pg, zn, zm);
  }
  void uqshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01101, size, zd, pg, zn, zm);
  }
  void sqrshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01110, size, zd, pg, zn, zm);
  }
  void uqrshlr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2SaturatingRoundingBitwiseShiftLeft(0b01111, size, zd, pg, zn, zm);
  }

  // SVE2 integer halving add/subtract (predicated)
  void shadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b000, size, pg, zd, zn, zm);
  }
  void uhadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b001, size, pg, zd, zn, zm);
  }
  void shsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b010, size, pg, zd, zn, zm);
  }
  void uhsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b011, size, pg, zd, zn, zm);
  }
  void srhadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b100, size, pg, zd, zn, zm);
  }
  void urhadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b101, size, pg, zd, zn, zm);
  }
  void shsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b110, size, pg, zd, zn, zm);
  }
  void uhsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerHalvingPredicated(0b111, size, pg, zd, zn, zm);
  }

  // SVE2 integer pairwise arithmetic
  void addp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerPairwiseArithmetic(0b00, 1, size, pg, zd, zn, zm);
  }
  void smaxp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerPairwiseArithmetic(0b10, 0, size, pg, zd, zn, zm);
  }
  void umaxp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerPairwiseArithmetic(0b10, 1, size, pg, zd, zn, zm);
  }
  void sminp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerPairwiseArithmetic(0b11, 0, size, pg, zd, zn, zm);
  }
  void uminp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEIntegerPairwiseArithmetic(0b11, 1, size, pg, zd, zn, zm);
  }

  // SVE2 saturating add/subtract
  void sqadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b000, size, zd, pg, zn, zm);
  }
  void uqadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b001, size, zd, pg, zn, zm);
  }
  void sqsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b010, size, zd, pg, zn, zm);
  }
  void uqsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b011, size, zd, pg, zn, zm);
  }
  void suqadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b100, size, zd, pg, zn, zm);
  }
  void usqadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b101, size, zd, pg, zn, zm);
  }
  void sqsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b110, size, zd, pg, zn, zm);
  }
  void uqsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVE2IntegerSaturatingAddSub(0b111, size, zd, pg, zn, zm);
  }

  // SVE2 Widening Integer Arithmetic
  // SVE2 integer add/subtract long
  void saddlb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b000, size, zd, zn, zm);
  }

  void saddlt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b001, size, zd, zn, zm);
  }

  void uaddlb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b010, size, zd, zn, zm);
  }

  void uaddlt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b011, size, zd, zn, zm);
  }

  void ssublb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b100, size, zd, zn, zm);
  }

  void ssublt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b101, size, zd, zn, zm);
  }

  void usublb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b110, size, zd, zn, zm);
  }

  void usublt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(0, 0b111, size, zd, zn, zm);
  }

  void sabdlb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(1, 0b100, size, zd, zn, zm);
  }

  void sabdlt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(1, 0b101, size, zd, zn, zm);
  }

  void uabdlb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(1, 0b110, size, zd, zn, zm);
  }

  void uabdlt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLong(1, 0b111, size, zd, zn, zm);
  }

  // SVE2 integer add/subtract wide
  void saddwb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b000, size, zd, zn, zm);
  }
  void saddwt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b001, size, zd, zn, zm);
  }
  void uaddwb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b010, size, zd, zn, zm);
  }
  void uaddwt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b011, size, zd, zn, zm);
  }
  void ssubwb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b100, size, zd, zn, zm);
  }
  void ssubwt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b101, size, zd, zn, zm);
  }
  void usubwb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b110, size, zd, zn, zm);
  }
  void usubwt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubWide(0b111, size, zd, zn, zm);
  }

  // SVE2 integer multiply long
  void sqdmullb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b000, size, zd, zn, zm);
  }
  void sqdmullt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b001, size, zd, zn, zm);
  }
  void pmullb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b010, size, zd, zn, zm);
  }
  void pmullt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b011, size, zd, zn, zm);
  }
  void smullb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b100, size, zd, zn, zm);
  }
  void smullt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b101, size, zd, zn, zm);
  }
  void umullb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b110, size, zd, zn, zm);
  }
  void umullt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerMultiplyLong(0b111, size, zd, zn, zm);
  }

  //
  // SVE Misc
  // SVE2 bitwise shift left long
  void sshllb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftLeftLong(size, 0b00, zd, zn, shift);
  }
  void sshllt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftLeftLong(size, 0b01, zd, zn, shift);
  }
  void ushllb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftLeftLong(size, 0b10, zd, zn, shift);
  }
  void ushllt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftLeftLong(size, 0b11, zd, zn, shift);
  }

  // SVE2 integer add/subtract interleaved long
  void saddlbt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b00, zd, zn, zm);
  }
  void ssublbt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b10, zd, zn, zm);
  }
  void ssubltb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b11, zd, zn, zm);
  }

  // SVE2 bitwise exclusive-or interleaved
  void eorbt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2BitwiseXorInterleaved(size, 0b0, zd, zn, zm);
  }
  void eortb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2BitwiseXorInterleaved(size, 0b1, zd, zn, zm);
  }

  // SVE integer matrix multiply accumulate
  void smmla(ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerMatrixMulAccumulate(0b00, zda, zn, zm);
  }
  void usmmla(ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerMatrixMulAccumulate(0b10, zda, zn, zm);
  }
  void ummla(ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEIntegerMatrixMulAccumulate(0b11, zda, zn, zm);
  }

  // SVE2 bitwise permute
  void bext(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2BitwisePermute(size, 0b00, zd, zn, zm);
  }
  void bdep(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2BitwisePermute(size, 0b01, zd, zn, zm);
  }
  void bgrp(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2BitwisePermute(size, 0b10, zd, zn, zm);
  }

  // SVE2 Accumulate
  // SVE2 complex integer add
  void cadd(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, Rotation rot) {
    SVE2ComplexIntAdd(size, 0b0, rot, zd, zn, zm);
  }
  void sqcadd(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, Rotation rot) {
    SVE2ComplexIntAdd(size, 0b1, rot, zd, zn, zm);
  }

  // SVE2 integer absolute difference and accumulate long
  void sabalb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b10000, zda, zn, zm);
  }
  void sabalt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b10001, zda, zn, zm);
  }
  void uabalb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b10010, zda, zn, zm);
  }
  void uabalt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubInterleavedLong(size, 0b10011, zda, zn, zm);
  }

  // SVE2 integer add/subtract long with carry
  void adclb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLongWithCarry(size, 0, 0, zda, zn, zm);
  }
  void adclt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLongWithCarry(size, 0, 1, zda, zn, zm);
  }
  void sbclb(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLongWithCarry(size, 1, 0, zda, zn, zm);
  }
  void sbclt(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubLongWithCarry(size, 1, 1, zda, zn, zm);
  }

  // SVE2 bitwise shift right and accumulate
  void ssra(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftRightAndAccumulate(size, 0b00, zda, zn, shift);
  }
  void usra(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftRightAndAccumulate(size, 0b01, zda, zn, shift);
  }
  void srsra(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftRightAndAccumulate(size, 0b10, zda, zn, shift);
  }
  void ursra(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftRightAndAccumulate(size, 0b11, zda, zn, shift);
  }

  // SVE2 bitwise shift and insert
  void sri(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftAndInsert(size, 0b0, zda, zn, shift);
  }
  void sli(SubRegSize size, ZRegister zda, ZRegister zn, uint32_t shift) {
    SVE2BitwiseShiftAndInsert(size, 0b1, zda, zn, shift);
  }

  // SVE2 integer absolute difference and accumulate
  void saba(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAbsDiffAndAccumulate(size, 0b0, zda, zn, zm);
  }
  void uaba(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2IntegerAbsDiffAndAccumulate(size, 0b1, zda, zn, zm);
  }

  // SVE2 Narrowing
  // SVE2 saturating extract narrow
  void sqxtnb(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b00, 0, zn, zd);
  }
  void sqxtnt(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b00, 1, zn, zd);
  }
  void uqxtnb(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b01, 0, zn, zd);
  }
  void uqxtnt(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b01, 1, zn, zd);
  }
  void sqxtunb(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b10, 0, zn, zd);
  }
  void sqxtunt(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVE2SaturatingExtractNarrow(size, 0b10, 1, zn, zd);
  }

  // SVE2 bitwise shift right narrow
  void sqshrunb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 0, 0, 0, zn, zd);
  }
  void sqshrunt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 0, 0, 1, zn, zd);
  }
  void sqrshrunb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 0, 1, 0, zn, zd);
  }
  void sqrshrunt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 0, 1, 1, zn, zd);
  }
  void shrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 1, 0, 0, zn, zd);
  }
  void shrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 1, 0, 1, zn, zd);
  }
  void rshrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 1, 1, 0, zn, zd);
  }
  void rshrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 0, 1, 1, 1, zn, zd);
  }
  void sqshrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 0, 0, 0, zn, zd);
  }
  void sqshrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 0, 0, 1, zn, zd);
  }
  void sqrshrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 0, 1, 0, zn, zd);
  }
  void sqrshrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 0, 1, 1, zn, zd);
  }
  void uqshrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 1, 0, 0, zn, zd);
  }
  void uqshrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 1, 0, 1, zn, zd);
  }
  void uqrshrnb(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 1, 1, 0, zn, zd);
  }
  void uqrshrnt(SubRegSize size, ZRegister zd, ZRegister zn, uint32_t Shift) {
    SVE2BitwiseShiftRightNarrow(size, Shift, 1, 1, 1, 1, zn, zd);
  }

  // SVE2 integer add/subtract narrow high part
  void addhnb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b000, zd, zn, zm);
  }
  void addhnt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b001, zd, zn, zm);
  }
  void raddhnb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b010, zd, zn, zm);
  }
  void raddhnt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b011, zd, zn, zm);
  }
  void subhnb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b100, zd, zn, zm);
  }
  void subhnt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b101, zd, zn, zm);
  }
  void rsubhnb(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b110, zd, zn, zm);
  }
  void rsubhnt(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2IntegerAddSubNarrowHighPart(size, 0b111, zd, zn, zm);
  }

  // SVE2 Crypto Extensions
  // SVE2 crypto unary operations
  void aesimc(ZRegister zdn, ZRegister zn) {
    SVE2CryptoUnaryOperation(1, zdn, zn);
  }
  void aesmc(ZRegister zdn, ZRegister zn) {
    SVE2CryptoUnaryOperation(0, zdn, zn);
  }

  // SVE2 crypto destructive binary operations
  void aese(ZRegister zdn, ZRegister zn, ZRegister zm) {
    SVE2CryptoDestructiveBinaryOperation(0, 0, zdn, zn, zm);
  }
  void aesd(ZRegister zdn, ZRegister zn, ZRegister zm) {
    SVE2CryptoDestructiveBinaryOperation(0, 1, zdn, zn, zm);
  }
  void sm4e(ZRegister zdn, ZRegister zn, ZRegister zm) {
    SVE2CryptoDestructiveBinaryOperation(1, 0, zdn, zn, zm);
  }

  // SVE2 crypto constructive binary operations
  void sm4ekey(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2CryptoConstructiveBinaryOperation(0, zd, zn, zm);
  }
  void rax1(ZRegister zd, ZRegister zn, ZRegister zm) {
    SVE2CryptoConstructiveBinaryOperation(1, zd, zn, zm);
  }

  // SVE Floating Point Widening Multiply-Add - Indexed
  // SVE BFloat16 floating-point dot product (indexed)
  // XXX:

  // SVE floating-point multiply-add long (indexed)
  void fmlalb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(0, 0, 0, dstsize, zda, zn, zm, index);
  }
  void fmlalt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(0, 0, 1, dstsize, zda, zn, zm, index);
  }
  void fmlslb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(0, 1, 0, dstsize, zda, zn, zm, index);
  }
  void fmlslt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(0, 1, 1, dstsize, zda, zn, zm, index);
  }
  void bfmlalb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(1, 0, 0, dstsize, zda, zn, zm, index);
  }
  void bfmlalt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(1, 0, 1, dstsize, zda, zn, zm, index);
  }
  void bfmlslb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(1, 1, 0, dstsize, zda, zn, zm, index);
  }
  void bfmlslt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    SVEFPMultiplyAddLongIndexed(1, 1, 1, dstsize, zda, zn, zm, index);
  }

  // SVE Floating Point Widening Multiply-Add
  // SVE BFloat16 floating-point dot product
  // XXX:

  // SVE floating-point multiply-add long
  void fmlalb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(0, 0, 0, dstsize, zda, zn, zm);
  }
  void fmlalt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(0, 0, 1, dstsize, zda, zn, zm);
  }
  void fmlslb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(0, 1, 0, dstsize, zda, zn, zm);
  }
  void fmlslt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(0, 1, 1, dstsize, zda, zn, zm);
  }
  void bfmlalb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(1, 0, 0, dstsize, zda, zn, zm);
  }
  void bfmlalt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(1, 0, 1, dstsize, zda, zn, zm);
  }
  void bfmlslb(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(1, 1, 0, dstsize, zda, zn, zm);
  }
  void bfmlslt(SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAddLong(1, 1, 1, dstsize, zda, zn, zm);
  }

  // SVE Floating Point Arithmetic - Predicated
  void ftmad(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, uint32_t imm) {
    LOGMAN_THROW_A_FMT(imm <= 7, "ftmad immediate must be within 0-7");
    SVEFloatArithmeticPredicated(0b10000 | imm, size, PReg::p0, zd, zn, zm);
  }
  // SVE floating-point arithmetic (predicated)
  void fadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0000, size, pg, zd, zn, zm);
  }
  void fsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0001, size, pg, zd, zn, zm);
  }
  void fmul(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0010, size, pg, zd, zn, zm);
  }
  void fsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0011, size, pg, zd, zn, zm);
  }
  void fmaxnm(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0100, size, pg, zd, zn, zm);
  }
  void fminnm(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0101, size, pg, zd, zn, zm);
  }
  void fmax(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0110, size, pg, zd, zn, zm);
  }
  void fmin(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b0111, size, pg, zd, zn, zm);
  }
  void fabd(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b1000, size, pg, zd, zn, zm);
  }
  void fscale(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b1001, size, pg, zd, zn, zm);
  }
  void fmulx(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b1010, size, pg, zd, zn, zm);
  }
  void fdivr(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b1100, size, pg, zd, zn, zm);
  }
  void fdiv(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFloatArithmeticPredicated(0b1101, size, pg, zd, zn, zm);
  }

  // SVE floating-point arithmetic with immediate (predicated)
  void fadd(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFAddSubImm imm) {
    SVEFPArithWithImmediate(0b000, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fsub(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFAddSubImm imm) {
    SVEFPArithWithImmediate(0b001, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fmul(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFMulImm imm) {
    SVEFPArithWithImmediate(0b010, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fsubr(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFAddSubImm imm) {
    SVEFPArithWithImmediate(0b011, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fmaxnm(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFMaxMinImm imm) {
    SVEFPArithWithImmediate(0b100, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fminnm(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFMaxMinImm imm) {
    SVEFPArithWithImmediate(0b101, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fmax(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFMaxMinImm imm) {
    SVEFPArithWithImmediate(0b110, size, zd, pg, FEXCore::ToUnderlying(imm));
  }
  void fmin(SubRegSize size, ZRegister zd, PRegisterMerge pg, SVEFMaxMinImm imm) {
    SVEFPArithWithImmediate(0b111, size, zd, pg, FEXCore::ToUnderlying(imm));
  }

  // SVE Floating Point Unary Operations - Predicated
  // SVE floating-point round to integral value
  void frinti(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b111, size, zd, pg, zn);
  }
  void frintx(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b110, size, zd, pg, zn);
  }
  void frinta(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b100, size, zd, pg, zn);
  }
  void frintn(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b000, size, zd, pg, zn);
  }
  void frintz(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b011, size, zd, pg, zn);
  }
  void frintm(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b010, size, zd, pg, zn);
  }
  void frintp(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatRoundIntegral(0b001, size, zd, pg, zn);
  }

  // SVE floating-point convert precision
  void fcvt(SubRegSize to, SubRegSize from, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFPConvertPrecision(to, from, zd, pg, zn);
  }
  void fcvtx(ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    uint32_t Instr = 0b0110'0101'0000'1010'1010'0000'0000'0000;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE floating-point unary operations
  void frecpx(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatUnary(0b00, size, pg, zn, zd);
  }
  void fsqrt(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVEFloatUnary(0b01, size, pg, zn, zd);
  }

  // SVE integer convert to floating-point
  void scvtf(ZRegister zd, SubRegSize dstsize, PRegisterMerge pg, ZRegister zn, SubRegSize srcsize) {
    uint32_t opc1, opc2;
    if (srcsize == SubRegSize::i16Bit) {
      // Srcsize = fp16, opc2 encodes dst size
      LOGMAN_THROW_A_FMT(dstsize == SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      opc1 = 0b01;
      opc2 = 0b01;
    } else if (srcsize == SubRegSize::i32Bit) {
      // Srcsize = fp32, opc1 encodes dst size
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;

      opc2 = dstsize == SubRegSize::i64Bit ? 0b00 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b10 : 0b00;
    } else if (srcsize == SubRegSize::i64Bit) {
      // SrcSize = fp64, opc2 encodes dst size
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b11 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b11 : 0b00;
    } else {
      FEX_UNREACHABLE;
    }
    SVEIntegerConvertToFloat(dstsize, srcsize, opc1, opc2, 0, pg, zn, zd);
  }
  void ucvtf(ZRegister zd, SubRegSize dstsize, PRegisterMerge pg, ZRegister zn, SubRegSize srcsize) {
    uint32_t opc1, opc2;
    if (srcsize == SubRegSize::i16Bit) {
      // Srcsize = fp16, opc2 encodes dst size
      LOGMAN_THROW_A_FMT(dstsize == SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      opc1 = 0b01;
      opc2 = 0b01;
    } else if (srcsize == SubRegSize::i32Bit) {
      // Srcsize = fp32, opc1 encodes dst size
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;

      opc2 = dstsize == SubRegSize::i64Bit ? 0b00 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b10 : 0b00;
    } else if (srcsize == SubRegSize::i64Bit) {
      // SrcSize = fp64, opc2 encodes dst size
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b11 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b11 : 0b00;
    } else {
      FEX_UNREACHABLE;
    }
    SVEIntegerConvertToFloat(dstsize, srcsize, opc1, opc2, 1, pg, zn, zd);
  }

  // SVE floating-point convert to integer
  void flogb(SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    const auto ConvertedSize = size == SubRegSize::i64Bit ? 0b11 :
                               size == SubRegSize::i32Bit ? 0b10 :
                               size == SubRegSize::i16Bit ? 0b01 :
                                                            0b00;

    SVEFloatConvertToInt(size, size, 1, 0b00, ConvertedSize, 0, pg, zn, zd);
  }
  void fcvtzs(ZRegister zd, SubRegSize dstsize, PRegisterMerge pg, ZRegister zn, SubRegSize srcsize) {
    uint32_t opc1, opc2;
    if (srcsize == SubRegSize::i16Bit) {
      // Srcsize = fp16, opc2 encodes dst size
      opc1 = 0b01;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;
    } else if (srcsize == SubRegSize::i32Bit) {
      // Srcsize = fp32, opc1 encodes dst size
      LOGMAN_THROW_A_FMT(dstsize != SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : 0b10;
      opc2 = 0b10;
    } else if (srcsize == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(dstsize != SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      // SrcSize = fp64, opc2 encodes dst size
      opc1 = 0b11;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : 0b00;
    } else {
      FEX_UNREACHABLE;
    }
    SVEFloatConvertToInt(dstsize, srcsize, 1, opc1, opc2, 0, pg, zn, zd);
  }
  void fcvtzu(ZRegister zd, SubRegSize dstsize, PRegisterMerge pg, ZRegister zn, SubRegSize srcsize) {
    uint32_t opc1, opc2;
    if (srcsize == SubRegSize::i16Bit) {
      // Srcsize = fp16, opc2 encodes dst size
      opc1 = 0b01;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : dstsize == SubRegSize::i32Bit ? 0b10 : dstsize == SubRegSize::i16Bit ? 0b01 : 0b00;
    } else if (srcsize == SubRegSize::i32Bit) {
      // Srcsize = fp32, opc1 encodes dst size
      LOGMAN_THROW_A_FMT(dstsize != SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      opc1 = dstsize == SubRegSize::i64Bit ? 0b11 : 0b10;
      opc2 = 0b10;
    } else if (srcsize == SubRegSize::i64Bit) {
      LOGMAN_THROW_A_FMT(dstsize != SubRegSize::i16Bit, "Unsupported size in {}", __func__);
      // SrcSize = fp64, opc2 encodes dst size
      opc1 = 0b11;
      opc2 = dstsize == SubRegSize::i64Bit ? 0b11 : 0b00;
    } else {
      FEX_UNREACHABLE;
    }
    SVEFloatConvertToInt(dstsize, srcsize, 1, opc1, opc2, 1, pg, zn, zd);
  }

  // SVE Floating Point Unary Operations - Unpredicated
  // SVE floating-point reciprocal estimate (unpredicated)
  void frecpe(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVEFPUnaryOpsUnpredicated(0b110, size, zd, zn);
  }
  void frsqrte(SubRegSize size, ZRegister zd, ZRegister zn) {
    SVEFPUnaryOpsUnpredicated(0b111, size, zd, zn);
  }

  // SVE Floating Point Compare - with Zero
  // SVE floating-point compare with zero
  void fcmge(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b00, 0, size, pd, pg, zn);
  }
  void fcmgt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b00, 1, size, pd, pg, zn);
  }
  void fcmlt(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b01, 0, size, pd, pg, zn);
  }
  void fcmle(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b01, 1, size, pd, pg, zn);
  }
  void fcmeq(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b10, 0, size, pd, pg, zn);
  }
  void fcmne(SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn) {
    SVEFPCompareWithZero(0b11, 0, size, pd, pg, zn);
  }

  // SVE Floating Point Accumulating Reduction
  // SVE floating-point serial reduction (predicated)
  void fadda(SubRegSize size, VRegister vd, PRegister pg, VRegister vn, ZRegister zm) {
    SVEFPSerialReductionPredicated(0b00, size, vd, pg, vn, zm);
  }

  // SVE Floating Point Multiply-Add
  // SVE floating-point multiply-accumulate writing addend
  void fmla(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAdd(0b000, size, zda, pg, zn, zm);
  }
  void fmls(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAdd(0b001, size, zda, pg, zn, zm);
  }
  void fnmla(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAdd(0b010, size, zda, pg, zn, zm);
  }
  void fnmls(SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    SVEFPMultiplyAdd(0b011, size, zda, pg, zn, zm);
  }

  // SVE floating-point multiply-accumulate writing multiplicand
  void fmad(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEFPMultiplyAdd(0b100, size, zdn, pg, zm, za);
  }
  void fmsb(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEFPMultiplyAdd(0b101, size, zdn, pg, zm, za);
  }
  void fnmad(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEFPMultiplyAdd(0b110, size, zdn, pg, zm, za);
  }
  void fnmsb(SubRegSize size, ZRegister zdn, PRegisterMerge pg, ZRegister zm, ZRegister za) {
    SVEFPMultiplyAdd(0b111, size, zdn, pg, zm, za);
  }

  // SVE Memory - 32-bit Gather and Unsized Contiguous
  void ldr(PRegister pt, XRegister rn, int32_t imm = 0) {
    SVEUnsizedLoadStoreContiguous(0b0, imm, ZRegister {pt.Idx()}, rn, false);
  }
  void ldr(ZRegister zt, XRegister rn, int32_t imm = 0) {
    SVEUnsizedLoadStoreContiguous(0b1, imm, zt, rn, false);
  }

  // SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
  // XXX:
  // SVE contiguous prefetch (scalar plus immediate)
  // XXX:
  // SVE2 32-bit gather non-temporal load (vector plus scalar)
  // XXX:
  // SVE contiguous prefetch (scalar plus scalar)
  // XXX:
  // SVE 32-bit gather prefetch (vector plus immediate)
  // XXX:

  // SVE load and broadcast element
  void ld1rb(SubRegSize esize, ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(false, esize, SubRegSize::i8Bit, zt, pg, rn, imm);
  }
  void ld1rsb(SubRegSize esize, ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(true, esize, SubRegSize::i8Bit, zt, pg, rn, imm);
  }
  void ld1rh(SubRegSize esize, ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(false, esize, SubRegSize::i16Bit, zt, pg, rn, imm);
  }
  void ld1rsh(SubRegSize esize, ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(true, esize, SubRegSize::i16Bit, zt, pg, rn, imm);
  }
  void ld1rw(SubRegSize esize, ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(false, esize, SubRegSize::i32Bit, zt, pg, rn, imm);
  }
  void ld1rsw(ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(true, SubRegSize::i64Bit, SubRegSize::i32Bit, zt, pg, rn, imm);
  }
  void ld1rd(ZRegister zt, PRegisterZero pg, Register rn, uint32_t imm = 0) {
    SVELoadAndBroadcastElement(false, SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, rn, imm);
  }

  // SVE contiguous non-temporal load (scalar plus immediate)
  void ldnt1b(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalLoad(0b00, zt, pg, rn, Imm);
  }
  void ldnt1h(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalLoad(0b01, zt, pg, rn, Imm);
  }
  void ldnt1w(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalLoad(0b10, zt, pg, rn, Imm);
  }
  void ldnt1d(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalLoad(0b11, zt, pg, rn, Imm);
  }

  // SVE contiguous non-temporal load (scalar plus scalar)
  // XXX:
  // SVE load multiple structures (scalar plus immediate)
  void ld2b(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, false, 0b00, Imm, zt1, pg, rn);
  }
  void ld3b(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, false, 0b00, Imm, zt1, pg, rn);
  }
  void ld4b(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, false, 0b00, Imm, zt1, pg, rn);
  }
  void ld2h(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, false, 0b01, Imm, zt1, pg, rn);
  }
  void ld3h(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, false, 0b01, Imm, zt1, pg, rn);
  }
  void ld4h(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, false, 0b01, Imm, zt1, pg, rn);
  }
  void ld2w(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, false, 0b10, Imm, zt1, pg, rn);
  }
  void ld3w(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, false, 0b10, Imm, zt1, pg, rn);
  }
  void ld4w(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, false, 0b10, Imm, zt1, pg, rn);
  }
  void ld2d(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, false, 0b11, Imm, zt1, pg, rn);
  }
  void ld3d(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, false, 0b11, Imm, zt1, pg, rn);
  }
  void ld4d(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, false, 0b11, Imm, zt1, pg, rn);
  }

  // SVE helper implementations
  template<SubRegSize size>
  void ld1b(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1b<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1b<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i8Bit, zt, pg, Src, true, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i8Bit, zt, pg, Src, true, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ldff1b(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1b<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1b doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i8Bit, zt, pg, Src, true, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i8Bit, zt, pg, Src, true, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  void ld1sw(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1sw(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1sw(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(SubRegSize::i64Bit, SubRegSize::i32Bit, zt, pg, Src, false, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(SubRegSize::i64Bit, SubRegSize::i32Bit, zt, pg, Src, false, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ld1h(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1h<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1h<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i16Bit, zt, pg, Src, true, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i16Bit, zt, pg, Src, true, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ld1sh(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1sh<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1sh<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i16Bit, zt, pg, Src, false, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i16Bit, zt, pg, Src, false, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ldff1h(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1h<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1h doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i16Bit, zt, pg, Src, true, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i16Bit, zt, pg, Src, true, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ldff1sh(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1sh<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1sh doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i16Bit, zt, pg, Src, false, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i16Bit, zt, pg, Src, false, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ld1w(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1w<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1w<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i32Bit, zt, pg, Src, true, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i32Bit, zt, pg, Src, true, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ldff1w(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1w<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1w doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i32Bit, zt, pg, Src, true, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i32Bit, zt, pg, Src, true, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  void ldff1sw(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1sw(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1sw doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(SubRegSize::i64Bit, SubRegSize::i32Bit, zt, pg, Src, false, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(SubRegSize::i64Bit, SubRegSize::i32Bit, zt, pg, Src, false, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ld1sb(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1sb<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1sb<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i8Bit, zt, pg, Src, false, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i8Bit, zt, pg, Src, false, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void ldff1sb(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1sb<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1sb doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(size, SubRegSize::i8Bit, zt, pg, Src, false, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(size, SubRegSize::i8Bit, zt, pg, Src, false, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  void ld1d(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ld1d(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      ld1d(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src, true, false);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src, true, false);
    } else {
      FEX_UNREACHABLE;
    }
  }

  void ldff1d(ZRegister zt, PRegisterZero pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      ldff1d(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      LOGMAN_THROW_A_FMT(false, "ldff1d doesn't have a scalar plus immediate variant");
    } else if (Src.IsScalarPlusVector()) {
      SVEGatherLoadScalarPlusVector(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src, true, true);
    } else if (Src.IsVectorPlusImm()) {
      SVEGatherLoadVectorPlusImm(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src, true, true);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void st1b(ZRegister zt, PRegister pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      st1b<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      st1b<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEScatterStoreScalarPlusVector(size, SubRegSize::i8Bit, zt, pg, Src);
    } else if (Src.IsVectorPlusImm()) {
      SVEScatterStoreVectorPlusImm(size, SubRegSize::i8Bit, zt, pg, Src);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void st1h(ZRegister zt, PRegister pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      st1h<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      st1h<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEScatterStoreScalarPlusVector(size, SubRegSize::i16Bit, zt, pg, Src);
    } else if (Src.IsVectorPlusImm()) {
      SVEScatterStoreVectorPlusImm(size, SubRegSize::i16Bit, zt, pg, Src);
    } else {
      FEX_UNREACHABLE;
    }
  }

  template<SubRegSize size>
  void st1w(ZRegister zt, PRegister pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      st1w<size>(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      st1w<size>(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEScatterStoreScalarPlusVector(size, SubRegSize::i32Bit, zt, pg, Src);
    } else if (Src.IsVectorPlusImm()) {
      SVEScatterStoreVectorPlusImm(size, SubRegSize::i32Bit, zt, pg, Src);
    } else {
      FEX_UNREACHABLE;
    }
  }

  void st1d(ZRegister zt, PRegister pg, SVEMemOperand Src) {
    if (Src.IsScalarPlusScalar()) {
      st1d(zt, pg, Src.rn, Src.MetaType.ScalarScalarType.rm);
    } else if (Src.IsScalarPlusImm()) {
      st1d(zt, pg, Src.rn, Src.MetaType.ScalarImmType.Imm);
    } else if (Src.IsScalarPlusVector()) {
      SVEScatterStoreScalarPlusVector(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src);
    } else if (Src.IsVectorPlusImm()) {
      SVEScatterStoreVectorPlusImm(SubRegSize::i64Bit, SubRegSize::i64Bit, zt, pg, Src);
    } else {
      FEX_UNREACHABLE;
    }
  }

  // SVE load multiple structures (scalar plus scalar)
  void ld2b(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b01, zt1, pg, rn, rm);
  }
  void ld3b(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b10, zt1, pg, rn, rm);
  }
  void ld4b(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i8Bit, 0b11, zt1, pg, rn, rm);
  }
  void ld2h(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b01, zt1, pg, rn, rm);
  }
  void ld3h(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b10, zt1, pg, rn, rm);
  }
  void ld4h(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i16Bit, 0b11, zt1, pg, rn, rm);
  }
  void ld2w(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b01, zt1, pg, rn, rm);
  }
  void ld3w(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b10, zt1, pg, rn, rm);
  }
  void ld4w(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i32Bit, 0b11, zt1, pg, rn, rm);
  }
  void ld2d(ZRegister zt1, ZRegister zt2, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b01, zt1, pg, rn, rm);
  }
  void ld3d(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b10, zt1, pg, rn, rm);
  }
  void ld4d(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegisterZero pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(false, SubRegSize::i64Bit, 0b11, zt1, pg, rn, rm);
  }

  // SVE load and broadcast quadword (scalar plus immediate)
  void ld1rqb(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b00, 0b00, zt, pg, rn, imm);
  }
  void ld1rob(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b00, 0b01, zt, pg, rn, imm);
  }
  void ld1rqh(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b01, 0b00, zt, pg, rn, imm);
  }
  void ld1roh(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b01, 0b01, zt, pg, rn, imm);
  }
  void ld1rqw(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b10, 0b00, zt, pg, rn, imm);
  }
  void ld1row(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b10, 0b01, zt, pg, rn, imm);
  }
  void ld1rqd(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b11, 0b00, zt, pg, rn, imm);
  }
  void ld1rod(ZRegister zt, PRegisterZero pg, Register rn, int imm = 0) {
    SVELoadBroadcastQuadScalarPlusImm(0b11, 0b01, zt, pg, rn, imm);
  }

  // SVE contiguous load (scalar plus immediate)
  template<SubRegSize size>
  void ld1b(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    SVEContiguousLoadImm(false, 0b0000 | FEXCore::ToUnderlying(size), Imm, pg, rn, zt);
  }

  void ld1sw(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    SVEContiguousLoadImm(false, 0b0100, Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1h(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    static_assert(size != SubRegSize::i8Bit, "Invalid size");
    SVEContiguousLoadImm(false, 0b0100 | FEXCore::ToUnderlying(size), Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1sh(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");

    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 1 : size == SubRegSize::i64Bit ? 0 : -1;

    SVEContiguousLoadImm(false, 0b1000 | ConvertedSize, Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1w(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");

    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 0 : size == SubRegSize::i64Bit ? 1 : -1;

    SVEContiguousLoadImm(false, 0b1010 | ConvertedSize, Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1sb(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    static_assert(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");

    constexpr uint32_t ConvertedSize = size == SubRegSize::i16Bit ? 0b10 :
                                       size == SubRegSize::i32Bit ? 0b01 :
                                       size == SubRegSize::i64Bit ? 0b00 :
                                                                    -1;

    SVEContiguousLoadImm(false, 0b1100 | ConvertedSize, Imm, pg, rn, zt);
  }
  void ld1d(ZRegister zt, PRegisterZero pg, Register rn, int32_t Imm = 0) {
    SVEContiguousLoadImm(false, 0b1111, Imm, pg, rn, zt);
  }

  // SVE contiguous non-fault load (scalar plus immediate)
  // XXX:

  // SVE load and broadcast quadword (scalar plus scalar)
  void ld1rqb(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b00, 0b00, zt, pg, rn, rm);
  }
  void ld1rob(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b00, 0b01, zt, pg, rn, rm);
  }
  void ld1rqh(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b01, 0b00, zt, pg, rn, rm);
  }
  void ld1roh(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b01, 0b01, zt, pg, rn, rm);
  }
  void ld1rqw(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b10, 0b00, zt, pg, rn, rm);
  }
  void ld1row(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b10, 0b01, zt, pg, rn, rm);
  }
  void ld1rqd(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b11, 0b00, zt, pg, rn, rm);
  }
  void ld1rod(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVELoadBroadcastQuadScalarPlusScalar(0b11, 0b01, zt, pg, rn, rm);
  }

  // SVE contiguous load (scalar plus scalar)
  template<SubRegSize size>
  void ld1b(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 0, 0b0000 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }

  void ld1sw(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 0, 0b0100, rm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1h(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size != SubRegSize::i8Bit, "Invalid size");
    SVEContiguousLoadStore(0, 0, 0b0100 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1sh(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 1 : size == SubRegSize::i64Bit ? 0 : -1;
    SVEContiguousLoadStore(0, 0, 0b1000 | ConvertedSize, rm, pg, rn, zt);
  }

  template<SubRegSize size>
  void ld1w(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 0 : size == SubRegSize::i64Bit ? 1 : -1;
    SVEContiguousLoadStore(0, 0, 0b1010 | ConvertedSize, rm, pg, rn, zt);
  }
  template<SubRegSize size>
  void ld1sb(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i16Bit ? 0b10 :
                                       size == SubRegSize::i32Bit ? 0b01 :
                                       size == SubRegSize::i64Bit ? 0b00 :
                                                                    -1;
    SVEContiguousLoadStore(0, 0, 0b1100 | ConvertedSize, rm, pg, rn, zt);
  }

  void ld1d(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 0, 0b1111, rm, pg, rn, zt);
  }

  // SVE contiguous first-fault load (scalar plus scalar)
  template<SubRegSize size>
  void ldff1b(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 1, 0b0000 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }
  template<SubRegSize size>
  void ldff1sb(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i16Bit ? 0b10 :
                                       size == SubRegSize::i32Bit ? 0b01 :
                                       size == SubRegSize::i64Bit ? 0b00 :
                                                                    -1;
    SVEContiguousLoadStore(0, 1, 0b1100 | ConvertedSize, rm, pg, rn, zt);
  }
  template<SubRegSize size>
  void ldff1h(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size != SubRegSize::i8Bit, "Invalid size");
    SVEContiguousLoadStore(0, 1, 0b0100 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }
  template<SubRegSize size>
  void ldff1sh(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 1 : size == SubRegSize::i64Bit ? 0 : -1;
    SVEContiguousLoadStore(0, 1, 0b1000 | ConvertedSize, rm, pg, rn, zt);
  }
  template<SubRegSize size>
  void ldff1w(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 0 : size == SubRegSize::i64Bit ? 1 : -1;
    SVEContiguousLoadStore(0, 1, 0b1010 | ConvertedSize, rm, pg, rn, zt);
  }
  void ldff1sw(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 1, 0b0100, rm, pg, rn, zt);
  }
  void ldff1d(ZRegister zt, PRegisterZero pg, Register rn, Register rm) {
    SVEContiguousLoadStore(0, 1, 0b1111, rm, pg, rn, zt);
  }

  // SVE Memory - 64-bit Gather
  // SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
  // XXX:
  // SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
  // XXX:
  // SVE 64-bit gather prefetch (vector plus immediate)
  // XXX:
  // SVE2 64-bit gather non-temporal load (vector plus scalar)
  // XXX:

  // SVE Memory - Contiguous Store and Unsized Contiguous
  void str(PRegister pt, XRegister rn, int32_t imm = 0) {
    SVEUnsizedLoadStoreContiguous(0b0, imm, ZRegister {pt.Idx()}, rn, true);
  }
  void str(ZRegister zt, XRegister rn, int32_t imm = 0) {
    SVEUnsizedLoadStoreContiguous(0b1, imm, zt, rn, true);
  }

  // SVE contiguous store (scalar plus scalar)
  template<SubRegSize size>
  void st1b(ZRegister zt, PRegister pg, Register rn, Register rm) {
    SVEContiguousLoadStore(1, 0, 0b0000 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }

  template<SubRegSize size>
  void st1h(ZRegister zt, PRegister pg, Register rn, Register rm) {
    static_assert(size != SubRegSize::i8Bit, "Invalid size");
    SVEContiguousLoadStore(1, 0, 0b0100 | FEXCore::ToUnderlying(size), rm, pg, rn, zt);
  }

  template<SubRegSize size>
  void st1w(ZRegister zt, PRegister pg, Register rn, Register rm) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");
    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 0 : size == SubRegSize::i64Bit ? 1 : -1;

    SVEContiguousLoadStore(1, 0, 0b1010 | ConvertedSize, rm, pg, rn, zt);
  }
  void st1d(ZRegister zt, PRegister pg, Register rn, Register rm) {
    SVEContiguousLoadStore(1, 0, 0b1111, rm, pg, rn, zt);
  }

  // SVE Memory - Non-temporal and Multi-register Store
  // SVE2 64-bit scatter non-temporal store (vector plus scalar)
  // XXX:
  // SVE contiguous non-temporal store (scalar plus scalar)
  // XXX:
  // SVE2 32-bit scatter non-temporal store (vector plus scalar)
  // XXX:

  // SVE store multiple structures (scalar plus scalar)
  void st2b(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i8Bit, 0b01, zt1, pg, rn, rm);
  }
  void st3b(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i8Bit, 0b10, zt1, pg, rn, rm);
  }
  void st4b(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i8Bit, 0b11, zt1, pg, rn, rm);
  }
  void st2h(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i16Bit, 0b01, zt1, pg, rn, rm);
  }
  void st3h(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i16Bit, 0b10, zt1, pg, rn, rm);
  }
  void st4h(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i16Bit, 0b11, zt1, pg, rn, rm);
  }
  void st2w(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i32Bit, 0b01, zt1, pg, rn, rm);
  }
  void st3w(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i32Bit, 0b10, zt1, pg, rn, rm);
  }
  void st4w(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i32Bit, 0b11, zt1, pg, rn, rm);
  }
  void st2d(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i64Bit, 0b01, zt1, pg, rn, rm);
  }
  void st3d(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i64Bit, 0b10, zt1, pg, rn, rm);
  }
  void st4d(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousLoadStoreMultipleScalar(true, SubRegSize::i64Bit, 0b11, zt1, pg, rn, rm);
  }

  // SVE Memory - Contiguous Store with Immediate Offset
  // SVE contiguous non-temporal store (scalar plus immediate)
  void stnt1b(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalStore(0b00, zt, pg, rn, Imm);
  }
  void stnt1h(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalStore(0b01, zt, pg, rn, Imm);
  }
  void stnt1w(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalStore(0b10, zt, pg, rn, Imm);
  }
  void stnt1d(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousNontemporalStore(0b11, zt, pg, rn, Imm);
  }

  // SVE store multiple structures (scalar plus immediate)
  void st2b(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, true, 0b00, Imm, zt1, pg, rn);
  }
  void st3b(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, true, 0b00, Imm, zt1, pg, rn);
  }
  void st4b(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, true, 0b00, Imm, zt1, pg, rn);
  }
  void st2h(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, true, 0b01, Imm, zt1, pg, rn);
  }
  void st3h(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, true, 0b01, Imm, zt1, pg, rn);
  }
  void st4h(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, true, 0b01, Imm, zt1, pg, rn);
  }
  void st2w(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, true, 0b10, Imm, zt1, pg, rn);
  }
  void st3w(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, true, 0b10, Imm, zt1, pg, rn);
  }
  void st4w(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, true, 0b10, Imm, zt1, pg, rn);
  }
  void st2d(ZRegister zt1, ZRegister zt2, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(2, true, 0b11, Imm, zt1, pg, rn);
  }
  void st3d(ZRegister zt1, ZRegister zt2, ZRegister zt3, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(3, true, 0b11, Imm, zt1, pg, rn);
  }
  void st4d(ZRegister zt1, ZRegister zt2, ZRegister zt3, ZRegister zt4, PRegister pg, Register rn, int32_t Imm = 0) {
    LOGMAN_THROW_A_FMT(AreVectorsSequential(zt1, zt2, zt3, zt4), "Registers need to be contiguous");
    SVEContiguousMultipleStructures(4, true, 0b11, Imm, zt1, pg, rn);
  }

  // SVE contiguous store (scalar plus immediate)
  template<SubRegSize size>
  void st1b(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousLoadImm(true, 0b0000 | FEXCore::ToUnderlying(size), Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void st1h(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    static_assert(size != SubRegSize::i8Bit, "Invalid size");
    SVEContiguousLoadImm(true, 0b0100 | FEXCore::ToUnderlying(size), Imm, pg, rn, zt);
  }

  template<SubRegSize size>
  void st1w(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    static_assert(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Invalid size");

    constexpr uint32_t ConvertedSize = size == SubRegSize::i32Bit ? 0 : size == SubRegSize::i64Bit ? 1 : -1;

    SVEContiguousLoadImm(true, 0b1010 | ConvertedSize, Imm, pg, rn, zt);
  }

  void st1d(ZRegister zt, PRegister pg, Register rn, int32_t Imm = 0) {
    SVEContiguousLoadImm(true, 0b1111, Imm, pg, rn, zt);
  }
private:
  // SVE encodings
  void SVEDupIndexed(SubRegSize size, ZRegister zn, ZRegister zd, uint32_t Index) {
    const auto size_bytes = 1U << FEXCore::ToUnderlying(size);
    const auto log2_size_bytes = FEXCore::ilog2(size_bytes);

    // We can index up to 512-bit registers with dup
    const auto max_index = (64U >> log2_size_bytes) - 1;
    LOGMAN_THROW_A_FMT(Index <= max_index, "dup index ({}) too large. Must be within [0, {}].", Index, max_index);

    // imm2:tsz make up a 7 bit wide field, with each increasing element size
    // restricting the range of those 7 bits (e.g. B: tsz=xxxx1, H: tsz=xxx10,
    // S: tsz=xx100. etc). So we can just use the log2 of the element size
    // to construct the overall immediate and form both imm2 and tsz.
    const auto imm7 = (Index << (log2_size_bytes + 1)) | (1U << log2_size_bytes);
    const auto imm2 = imm7 >> 5;
    const auto tsz = imm7 & 0b11111;

    uint32_t Instr = 0b0000'0101'0010'0000'0010'0000'0000'0000;
    Instr |= imm2 << 22;
    Instr |= tsz << 16;
    Instr |= Encode_rn(zn);
    Instr |= Encode_rd(zd);
    dc32(Instr);
  }

  void SVEAddSubImmediateUnpred(uint32_t opc, SubRegSize size, ZRegister zd, ZRegister zn, uint32_t imm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zn, "zd needs to equal zn");

    const bool is_uint8_imm = (imm >> 8) == 0;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(is_uint8_imm, "Can't perform LSL #8 shift on 8-bit elements.");
    }

    uint32_t shift = 0;
    if (!is_uint8_imm) {
      const bool is_uint16_imm = (imm >> 16) == 0;

      LOGMAN_THROW_A_FMT(is_uint16_imm, "Immediate ({}) must be a 16-bit value within [256, 65280]", imm);
      LOGMAN_THROW_A_FMT((imm % 256) == 0, "Immediate ({}) must be a multiple of 256", imm);

      imm /= 256;
      shift = 1;
    }

    uint32_t Instr = 0b0010'0101'0010'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= shift << 13;
    Instr |= imm << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEMinMaxImmediateUnpred(uint32_t opc, SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zn, "zd needs to equal zn");

    const bool is_signed = (opc & 1) == 0;
    if (is_signed) {
      LOGMAN_THROW_A_FMT(imm >= -128 && imm <= 127, "Invalid immediate ({}). Must be within [-127, 128]", imm);
    } else {
      LOGMAN_THROW_A_FMT(imm >= 0 && imm <= 255, "Invalid immediate ({}). Must be within [0, 255]", imm);
    }

    const auto imm8 = static_cast<uint32_t>(imm) & 0xFF;

    uint32_t Instr = 0b0010'0101'0010'1000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= imm8 << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEMultiplyImmediateUnpred(uint32_t opc, SubRegSize size, ZRegister zd, ZRegister zn, int32_t imm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zn, "zd needs to equal zn");
    LOGMAN_THROW_A_FMT(imm >= -128 && imm <= 127, "Invalid immediate ({}). Must be within [-127, 128]", imm);

    const auto imm8 = static_cast<uint32_t>(imm) & 0xFF;

    uint32_t Instr = 0b0010'0101'0011'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= imm8 << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBroadcastImm(uint32_t opc, int32_t imm, SubRegSize size, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    const auto [new_imm, is_shift] = HandleSVESImm8Shift(size, imm);

    uint32_t Instr = 0b0010'0101'0011'1000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 17;
    Instr |= is_shift << 13;
    Instr |= (static_cast<uint32_t>(new_imm) & 0xFF) << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBroadcastFloatImmPredicated(SubRegSize size, ZRegister zd, PRegister pg, float value) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Unsupported fcpy/fmov size");

    uint32_t imm {};
    if (size == SubRegSize::i16Bit) {
      LOGMAN_MSG_A_FMT("Unsupported");
      FEX_UNREACHABLE;
    } else if (size == SubRegSize::i32Bit) {
      imm = FP32ToImm8(value);
    } else if (size == SubRegSize::i64Bit) {
      imm = FP64ToImm8(value);
    }

    uint32_t Instr = 0b0000'0101'0001'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= pg.Idx() << 16;
    Instr |= imm << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBroadcastFloatImmUnpredicated(uint32_t opc, uint32_t o2, uint32_t imm, SubRegSize size, ZRegister zd) {
    uint32_t Instr = 0b0010'0101'0011'1001'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 17;
    Instr |= o2 << 13;
    Instr |= imm << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBroadcastIntegerImmPredicated(uint32_t m, SubRegSize size, ZRegister zd, PRegister pg, int32_t imm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    const auto [new_imm, is_shift] = HandleSVESImm8Shift(size, imm);

    uint32_t Instr = 0b0000'0101'0001'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= pg.Idx() << 16;
    Instr |= m << 14;
    Instr |= is_shift << 13;
    Instr |= (static_cast<uint32_t>(new_imm) & 0xFF) << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEAddressGeneration(SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm, SVEModType mod, uint32_t scale) {
    LOGMAN_THROW_A_FMT(scale <= 3, "Scale ({}) must be within [0, 3]", scale);

    uint32_t Instr = 0b0000'0100'0010'0000'1010'0000'0000'0000;

    switch (mod) {
    case SVEModType::MOD_UXTW:
    case SVEModType::MOD_SXTW: {
      LOGMAN_THROW_A_FMT(size == SubRegSize::i64Bit, "Unpacked ADR must be using 64-bit elements");

      const auto is_unsigned = mod == SVEModType::MOD_UXTW;
      if (is_unsigned) {
        Instr |= 1U << 22;
      }
      break;
    }
    case SVEModType::MOD_NONE:
    case SVEModType::MOD_LSL: {
      if (mod == SVEModType::MOD_NONE) {
        LOGMAN_THROW_A_FMT(scale == 0, "Cannot scale packed ADR without a modifier");
      }
      LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Packed ADR must be using 32-bit or 64-bit elements");
      Instr |= FEXCore::ToUnderlying(size) << 22;
      break;
    }
    }

    Instr |= zm.Idx() << 16;
    Instr |= scale << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVESel(SubRegSize size, ZRegister zm, PRegister pv, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    uint32_t Instr = 0b0000'0101'0010'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= pv.Idx() << 10;
    Instr |= Encode_rn(zn);
    Instr |= Encode_rd(zd);
    dc32(Instr);
  }

  void SVEBitwiseShiftbyVector(uint32_t R, uint32_t L, uint32_t U, SubRegSize size, PRegister pg, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(zd == zn, "Dest needs to equal zn");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0001'0000'1000'0000'0000'0000;

    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= R << 18;
    Instr |= L << 17;
    Instr |= U << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE integer add/subtract vectors (unpredicated)
  void SVEIntegerAddSubUnpredicated(uint32_t opc, SubRegSize size, ZRegister zm, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    uint32_t Instr = 0b0000'0100'0010'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE table lookup (three sources)
  void SVETableLookup(uint32_t op, SubRegSize size, ZRegister zm, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    uint32_t Instr = 0b0000'0101'0010'0000'0010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE permute vector elements
  void SVEPermute(uint32_t opc, SubRegSize size, ZRegister zm, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    uint32_t Instr = 0b0000'0101'0010'0000'0110'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE predicate logical operations
  void SVEPredicateLogical(uint32_t op, uint32_t S, uint32_t o2, uint32_t o3, PRegister pm, PRegister pg, PRegister pn, PRegister pd) {
    uint32_t Instr = 0b0010'0101'0000'0000'0100'0000'0000'0000;
    Instr |= op << 23;
    Instr |= S << 22;
    Instr |= pm.Idx() << 16;
    Instr |= pg.Idx() << 10;
    Instr |= o2 << 9;
    Instr |= pn.Idx() << 5;
    Instr |= o3 << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  // SVE floating-point convert precision odd elements
  void SVEFloatConvertOdd(uint32_t opc, uint32_t opc2, PRegister pg, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0100'0000'1000'1010'0000'0000'0000;
    Instr |= opc << 22;
    Instr |= opc2 << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE2 floating-point pairwise operations
  void SVEFloatPairwiseArithmetic(uint32_t opc, SubRegSize size, PRegister pg, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd needs to equal zn");
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid float size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0100'0001'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE floating-point arithmetic (unpredicated)
  void SVEFloatArithmeticUnpredicated(uint32_t opc, SubRegSize size, ZRegister zm, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid float size");

    uint32_t Instr = 0b0110'0101'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE bitwise logical operations (predicated)
  void SVEBitwiseLogicalPredicated(uint32_t opc, SubRegSize size, PRegister pg, ZRegister zdn, ZRegister zm, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != ARMEmitter::SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(zd == zdn, "zd needs to equal zdn");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0001'1000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE constructive prefix (predicated)
  void SVEConstructivePrefixPredicated(uint32_t opc, uint32_t M, SubRegSize size, PRegister pg, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0001'0000'0010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 17;
    Instr |= M << 16;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(zn);
    Instr |= Encode_rd(zd);
    dc32(Instr);
  }

  // SVE bitwise unary operations (predicated)
  void SVEIntegerUnaryPredicated(uint32_t op0, uint32_t opc, SubRegSize size, PRegister pg, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0000'0000'1010'0000'0000'0000;

    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= op0 << 19;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE bitwise logical operations (unpredicated)
  void SVEBitwiseLogicalUnpredicated(uint32_t opc, ZRegister zm, ZRegister zn, ZRegister zd) {
    uint32_t Instr = 0b0000'0100'0010'0000'0011'0000'0000'0000;

    Instr |= opc << 22;
    Instr |= zm.Idx() << 16;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE Permute Vector - Unpredicated
  void SVEPermuteUnpredicated(SubRegSize size, uint32_t opc, ZRegister zdn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0000'0101'0010'0000'0011'1000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= zm.Idx() << 5;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  // SVE Permute Predicate
  void SVEPermutePredicate(SubRegSize size, uint32_t op1, uint32_t op2, uint32_t op3, PRegister pd, PRegister pn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0000'0101'0010'0000'0100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= op1 << 16;
    Instr |= op2 << 9;
    Instr |= op3 << 4;
    Instr |= pn.Idx() << 5;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  // SVE Integer Misc - Unpredicated
  void SVEIntegerMiscUnpredicated(uint32_t op0, uint32_t opc, uint32_t opc2, ZRegister zd, ZRegister zn) {
    uint32_t Instr = 0b0000'0100'0010'0000'1011'0000'0000'0000;
    Instr |= opc2 << 22;
    Instr |= opc << 16;
    Instr |= op0 << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE floating-point arithmetic (predicated)
  void SVEFloatArithmeticPredicated(uint32_t opc, SubRegSize size, PRegister pg, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zn needs to equal zd");
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Invalid float size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0101'0000'0000'1000'0000'0000'0000;

    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVECharacterMatch(uint32_t opc, SubRegSize size, PRegister pd, PRegisterZero pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i8Bit || size == SubRegSize::i16Bit, "match/nmatch can only use 8-bit or 16-bit element sizes");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7.Zeroing(), "match/nmatch can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0100'0101'0010'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 4;
    Instr |= zm.Idx() << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEFPRecursiveReduction(uint32_t opc, SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "FP reduction operation can only use 16/32/64-bit element sizes");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "FP reduction operation can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0101'0000'0000'0010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= vd.Idx();
    dc32(Instr);
  }

  void SVEAddSubVectorsPredicated(uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd and zn must be the same register");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Add/Sub operation can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerMulDivVectorsPredicated(uint32_t b18, uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zd and zn must be the same register");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Mul/Div operation can only use p0-p7 as a governing predicate");

    // Division instruction
    if (b18 != 0) {
      LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Predicated divide only handles 32-bit or 64-bit "
                                                                                   "elements");
    }

    uint32_t Instr = 0b0000'0100'0001'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= b18 << 18;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerReductionOperation(uint32_t op, uint32_t opc, SubRegSize size, VRegister vd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size for reduction operation");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Integer reduction operation can only use p0-p7 as a governing predicate");

    uint32_t Instr = op;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= vd.Idx();
    dc32(Instr);
  }

  void SVEIntegerMultiplyAddSubPredicated(uint32_t op0, uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0000'0000'0100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op0 << 15;
    Instr |= opc << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEStackFrameOperation(uint32_t opc, XRegister rd, XRegister rn, int32_t imm) {
    LOGMAN_THROW_A_FMT(imm >= -32 && imm <= 31, "Stack frame operation immediate must be within -32 to 31");

    uint32_t Instr = 0b0000'0100'0010'0000'0101'0000'0000'0000;
    Instr |= opc << 22;
    Instr |= rn.Idx() << 16;
    Instr |= (static_cast<uint32_t>(imm) & 0b111111) << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  void SVEBitwiseShiftByWideElementPredicated(SubRegSize size, uint32_t opc, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit && size != SubRegSize::i128Bit, "Can't use 64-bit or 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zn, "zd and zn must be the same register");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7.Merging(), "Wide shift can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0001'1000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBitwiseShiftByWideElementsUnpredicated(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit && size != SubRegSize::i128Bit, "Can't use 64-bit or 128-bit element size");

    uint32_t Instr = 0b0000'0100'0010'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 10;
    Instr |= zm.Idx() << 16;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEFPArithWithImmediate(uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, uint32_t i1) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");

    uint32_t Instr = 0b0110'0101'0001'1000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= i1 << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEFPConvertPrecision(SubRegSize to, SubRegSize from, ZRegister zd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(to != from, "to and from sizes cannot be the same.");
    LOGMAN_THROW_A_FMT(to != SubRegSize::i8Bit && to != SubRegSize::i128Bit && from != SubRegSize::i8Bit && from != SubRegSize::i128Bit,
                       "Can't use 8-bit or 128-bit element size");

    // Encodings for the to and from sizes can get a little funky
    // depending on what is being converted to/from.
    const uint32_t op = [&] {
      switch (from) {
      case SubRegSize::i16Bit: {
        switch (to) {
        case SubRegSize::i32Bit: return 0x00810000U;
        case SubRegSize::i64Bit: return 0x00C10000U;
        default: return UINT32_MAX;
        }
      }

      case SubRegSize::i32Bit: {
        switch (to) {
        case SubRegSize::i16Bit: return 0x00800000U;
        case SubRegSize::i64Bit: return 0x00C30000U;
        default: return UINT32_MAX;
        }
      }

      case SubRegSize::i64Bit: {
        switch (to) {
        case SubRegSize::i16Bit: return 0x00C00000U;
        case SubRegSize::i32Bit: return 0x00C20000U;
        default: return UINT32_MAX;
        }
      }

      default: return UINT32_MAX;
      }
    }();
    LOGMAN_THROW_A_FMT(op != UINT32_MAX, "Invalid conversion op value: {}", op);

    uint32_t Instr = 0b0110'0101'0000'1000'1010'0000'0000'0000;
    Instr |= op;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2IntegerAddSubNarrowHighPart(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit && size != SubRegSize::i128Bit, "Can't use 64-bit or 128-bit element size");

    uint32_t Instr = 0b0100'0101'0010'0000'0110'0000'0000'0000;
    Instr |= (FEXCore::ToUnderlying(size) + 1) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2CryptoUnaryOperation(uint32_t op, ZRegister zdn, ZRegister zn) {
    LOGMAN_THROW_A_FMT(zdn == zn, "zdn and zn must be the same register");

    uint32_t Instr = 0b0100'0101'0010'0000'1110'0000'0000'0000;
    Instr |= op << 10;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  void SVE2CryptoDestructiveBinaryOperation(uint32_t op, uint32_t o2, ZRegister zdn, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zdn == zn, "zdn and zn must be the same register");

    uint32_t Instr = 0b0100'0101'0010'0010'1110'0000'0000'0000;
    Instr |= op << 16;
    Instr |= o2 << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  void SVE2CryptoConstructiveBinaryOperation(uint32_t op, ZRegister zd, ZRegister zn, ZRegister zm) {
    uint32_t Instr = 0b0100'0101'0010'0000'1111'0000'0000'0000;
    Instr |= zm.Idx() << 16;
    Instr |= op << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2BitwisePermute(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    uint32_t Instr = 0b0100'0101'0000'0000'1011'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseXorInterleaved(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    uint32_t Instr = 0b0100'0101'0000'0000'1001'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerMatrixMulAccumulate(uint32_t opc, ZRegister zda, ZRegister zn, ZRegister zm) {
    uint32_t Instr = 0b0100'0101'0000'0000'1001'1000'0000'0000;
    Instr |= opc << 22;
    Instr |= zm.Idx() << 16;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVE2IntegerAddSubInterleavedLong(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");

    uint32_t Instr = 0b0100'0101'0000'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2IntegerAbsDiffAndAccumulate(SubRegSize size, uint32_t opc, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    uint32_t Instr = 0b0100'0101'0000'0000'1111'1000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVE2IntegerAddSubLongWithCarry(SubRegSize size, uint32_t sizep1, uint32_t T, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Element size must be 32-bit or 64-bit");

    const uint32_t NewSize = size == SubRegSize::i32Bit ? 0 : 1;

    uint32_t Instr = 0b0100'0101'0000'0000'1101'0000'0000'0000;
    Instr |= sizep1 << 23;
    Instr |= NewSize << 22;
    Instr |= zm.Idx() << 16;
    Instr |= T << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseShiftRightAndAccumulate(SubRegSize size, uint32_t opc, ZRegister zda, ZRegister zn, uint32_t shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Element size cannot be 128-bit");

    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, shift);

    uint32_t Instr = 0b0100'0101'0000'0000'1110'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseShiftAndInsert(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, uint32_t shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Element size cannot be 128-bit");

    const bool IsLeftShift = opc != 0;
    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, shift, IsLeftShift);

    uint32_t Instr = 0b0100'0101'0000'0000'1111'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseShiftLeftLong(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, uint32_t shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");

    // The size provided in is the size to expand to (e.g. 16-bit means a long shift
    // expanding from 8-bit) so we just need to subtract the size by 1 so that our
    // encoding helper will perform the proper encoding.
    const auto size_minus_1 = SubRegSize {FEXCore::ToUnderlying(size) - 1};
    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size_minus_1, shift, true);

    uint32_t Instr = 0b0100'0101'0000'0000'1010'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2ComplexIntAdd(SubRegSize size, uint32_t opc, Rotation rot, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Complex add cannot use 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zn, "zd and zn must be the same register");
    LOGMAN_THROW_A_FMT(rot == Rotation::ROTATE_90 || rot == Rotation::ROTATE_270, "Rotation must be 90 or 270 degrees");

    const uint32_t SanitizedRot = rot == Rotation::ROTATE_90 ? 0 : 1;

    uint32_t Instr = 0b0100'0101'0000'0000'1101'1000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= SanitizedRot << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2AbsDiffAccLong(SubRegSize size, uint32_t opc, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Cannot use 8-bit or 128-bit element size");

    uint32_t Instr = 0b0100'0101'0000'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVEPermuteVectorUnpredicated(SubRegSize size, uint32_t opc, ZRegister zdn, VRegister vm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0000'0101'0010'0000'0011'1000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= vm.Idx() << 5;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  // SVE floating-point round to integral value
  void SVEFloatRoundIntegral(uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn) {
    // opc = round mode
    // 0b000 - N - Neaest ties to even
    // 0b001 - P - Towards +inf
    // 0b010 - M - Towards -inf
    // 0b011 - Z - Towards zero
    // 0b100 - A - Nearest away from zero
    // 0b101 - Unallocated
    // 0b110 - X - Current signalling inexact
    // 0b111 - I - Current

    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Unsupported size in {}", __func__);

    uint32_t Instr = 0b0110'0101'0000'0000'1010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  // SVE floating-point convert to integer
  void SVEFloatConvertToInt(SubRegSize dstsize, SubRegSize srcsize, uint32_t b19, uint32_t opc, uint32_t opc2, uint32_t U, PRegister pg,
                            ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(srcsize == SubRegSize::i16Bit || srcsize == SubRegSize::i32Bit || srcsize == SubRegSize::i64Bit,
                       "Unsupported src size in {}", __func__);
    LOGMAN_THROW_A_FMT(dstsize == SubRegSize::i16Bit || dstsize == SubRegSize::i32Bit || dstsize == SubRegSize::i64Bit,
                       "Unsupported dst size in {}", __func__);

    uint32_t Instr = 0b0110'0101'0001'0000'1010'0000'0000'0000;
    Instr |= opc << 22;
    Instr |= b19 << 19;
    Instr |= opc2 << 17;
    Instr |= U << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }
  // SVE integer convert to floating-point
  // We can implement this in terms of the floating-point to int version above,
  // since the only difference in encoding is setting bit 19 to 0.
  void SVEIntegerConvertToFloat(SubRegSize dstsize, SubRegSize srcsize, uint32_t opc, uint32_t opc2, uint32_t U, PRegister pg, ZRegister zn,
                                ZRegister zd) {
    SVEFloatConvertToInt(dstsize, srcsize, 0, opc, opc2, U, pg, zn, zd);
  }

  // SVE Memory - 32-bit Gather and Unsized Contiguous
  // Note: This also handles 64-bit variants to keep overall handling code
  //       compact and in the same place.
  void SVEGatherLoadScalarPlusVector(SubRegSize esize, SubRegSize msize, ZRegister zt, PRegisterZero pg, SVEMemOperand mem_op,
                                     bool is_unsigned, bool is_fault_first) {
    LOGMAN_THROW_A_FMT(esize == SubRegSize::i32Bit || esize == SubRegSize::i64Bit, "Gather load element size must be 32-bit or 64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    const auto& op_data = mem_op.MetaType.ScalarVectorType;
    const bool is_scaled = op_data.scale != 0;
    const auto msize_value = FEXCore::ToUnderlying(msize);

    LOGMAN_THROW_A_FMT(op_data.scale == 0 || op_data.scale == msize_value, "scale may only be 0 or {}", msize_value);

    uint32_t mod_value = FEXCore::ToUnderlying(op_data.mod);
    uint32_t Instr = 0b1000'0100'0000'0000'0000'0000'0000'0000;

    if (esize == SubRegSize::i64Bit) {
      Instr |= 1U << 30;

      const auto mod = op_data.mod;
      const bool is_lsl = mod == SVEModType::MOD_LSL;
      const bool is_none = mod == SVEModType::MOD_NONE;

      // LSL and no modifier encodings should be setting bit 22 to 1.
      if (is_lsl || is_none) {
        if (is_lsl) {
          LOGMAN_THROW_A_FMT(op_data.scale == msize_value, "mod type of LSL must have a scale of {}", msize_value);
        } else {
          LOGMAN_THROW_A_FMT(op_data.scale == 0, "mod type of none must have a scale of 0");
        }

        Instr |= 1U << 15;
        mod_value = 1;
      }
    } else {
      LOGMAN_THROW_A_FMT(op_data.mod == SVEModType::MOD_UXTW || op_data.mod == SVEModType::MOD_SXTW, "mod type for 32-bit lane size may "
                                                                                                     "only be UXTW or SXTW");
    }

    Instr |= FEXCore::ToUnderlying(msize) << 23;
    Instr |= static_cast<uint32_t>(mod_value) << 22;
    Instr |= static_cast<uint32_t>(is_scaled) << 21;
    Instr |= op_data.zm.Idx() << 16;
    Instr |= static_cast<uint32_t>(is_unsigned) << 14;
    Instr |= static_cast<uint32_t>(is_fault_first) << 13;
    Instr |= pg.Idx() << 10;
    Instr |= mem_op.rn.Idx() << 5;
    Instr |= zt.Idx();

    dc32(Instr);
  }

  void SVEScatterStoreScalarPlusVector(SubRegSize esize, SubRegSize msize, ZRegister zt, PRegister pg, SVEMemOperand mem_op) {
    LOGMAN_THROW_A_FMT(esize == SubRegSize::i32Bit || esize == SubRegSize::i64Bit, "Gather load element size must be 32-bit or 64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    const auto& op_data = mem_op.MetaType.ScalarVectorType;
    const bool is_scaled = op_data.scale != 0;

    const auto msize_value = FEXCore::ToUnderlying(msize);
    uint32_t mod_value = FEXCore::ToUnderlying(op_data.mod);

    LOGMAN_THROW_A_FMT(op_data.scale == 0 || op_data.scale == msize_value, "scale may only be 0 or {}", msize_value);

    uint32_t Instr = 0b1110'0100'0000'0000'1000'0000'0000'0000;

    if (esize == SubRegSize::i64Bit) {
      const auto mod = op_data.mod;
      const bool is_lsl = mod == SVEModType::MOD_LSL;
      const bool is_none = mod == SVEModType::MOD_NONE;

      if (is_lsl || is_none) {
        if (is_lsl) {
          LOGMAN_THROW_A_FMT(op_data.scale == msize_value, "mod type of LSL must have a scale of {}", msize_value);
        } else {
          LOGMAN_THROW_A_FMT(op_data.scale == 0, "mod type of none must have a scale of 0");
        }
        if (is_lsl || is_scaled) {
          LOGMAN_THROW_A_FMT(msize != SubRegSize::i8Bit, "Cannot use 8-bit store elements with unpacked 32-bit scaled offset and "
                                                         "64-bit scaled offset variants. Instructions not allocated.");
        }

        // 64-bit scaled/unscaled scatters need to set bit 13
        Instr |= 1U << 13;
        mod_value = 0;
      }
    } else {
      if (is_scaled) {
        LOGMAN_THROW_A_FMT(msize != SubRegSize::i8Bit && msize != SubRegSize::i64Bit, "Cannot use 8-bit or 64-bit store elements with "
                                                                                      "32-bit scaled offset variant. "
                                                                                      "Instructions not allocated");
      } else {
        LOGMAN_THROW_A_FMT(msize != SubRegSize::i64Bit, "Cannot use 64-bit store elements with 32-bit unscaled offset variant. "
                                                        "Instruction not allocated.");
      }

      LOGMAN_THROW_A_FMT(op_data.mod == SVEModType::MOD_UXTW || op_data.mod == SVEModType::MOD_SXTW, "mod type for 32-bit lane size may "
                                                                                                     "only be UXTW or SXTW");

      // 32-bit scatters need to set bit 22.
      Instr |= 1U << 22;
    }

    Instr |= msize_value << 23;
    Instr |= static_cast<uint32_t>(is_scaled) << 21;
    Instr |= op_data.zm.Idx() << 16;
    Instr |= static_cast<uint32_t>(mod_value) << 14;
    Instr |= pg.Idx() << 10;
    Instr |= mem_op.rn.Idx() << 5;
    Instr |= zt.Idx();

    dc32(Instr);
  }

  void SVEGatherScatterVectorPlusImm(SubRegSize esize, SubRegSize msize, ZRegister zt, PRegister pg, SVEMemOperand mem_op, bool is_store,
                                     bool is_unsigned, bool is_fault_first) {
    LOGMAN_THROW_A_FMT(esize == SubRegSize::i32Bit || esize == SubRegSize::i64Bit, "Gather load/store element size must be 32-bit or "
                                                                                   "64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    const auto msize_value = FEXCore::ToUnderlying(msize);
    const auto msize_bytes = 1U << msize_value;

    const auto imm_limit = (32U << msize_value) - msize_bytes;
    const auto imm = mem_op.MetaType.VectorImmType.Imm;
    const auto imm_to_encode = imm >> msize_value;

    LOGMAN_THROW_A_FMT(imm <= imm_limit, "Immediate must be within [0, {}]", imm_limit);
    LOGMAN_THROW_A_FMT(imm == 0 || (imm % msize_bytes) == 0, "Immediate must be cleanly divisible by {}", msize_bytes);

    uint32_t Instr = 0b1000'0100'0000'0000'1000'0000'0000'0000;

    if (is_store) {
      Instr |= 0x60402000U;
      if (esize == SubRegSize::i32Bit) {
        Instr |= 1U << 21;
      }
    } else {
      Instr |= 0x00200000U;
      if (esize == SubRegSize::i64Bit) {
        Instr |= 1U << 30;
      }
    }

    Instr |= msize_value << 23;
    Instr |= imm_to_encode << 16;
    Instr |= static_cast<uint32_t>(is_unsigned) << 14;
    Instr |= static_cast<uint32_t>(is_fault_first) << 13;
    Instr |= pg.Idx() << 10;
    Instr |= mem_op.rn.Idx() << 5;
    Instr |= zt.Idx();

    dc32(Instr);
  }

  void SVEGatherLoadVectorPlusImm(SubRegSize esize, SubRegSize msize, ZRegister zt, PRegisterZero pg, SVEMemOperand mem_op,
                                  bool is_unsigned, bool is_fault_first) {
    SVEGatherScatterVectorPlusImm(esize, msize, zt, pg, mem_op, false, is_unsigned, is_fault_first);
  }

  void SVEScatterStoreVectorPlusImm(SubRegSize esize, SubRegSize msize, ZRegister zt, PRegister pg, SVEMemOperand mem_op) {
    SVEGatherScatterVectorPlusImm(esize, msize, zt, pg, mem_op, true, false, true);
  }

  void SVEUnsizedLoadStoreContiguous(uint32_t op2, int32_t imm, ZRegister zt, Register rn, bool is_store) {
    LOGMAN_THROW_A_FMT(imm >= -256 && imm <= 255, "Immediate offset ({}) too large. Must be within [-256, 255].", imm);

    const auto imm9 = static_cast<uint32_t>(imm) & 0b1'1111'1111;

    uint32_t Instr = 0b1000'0101'1000'0000'0000'0000'0000'0000;

    if (is_store) {
      Instr |= 0x60000000U;
    }

    Instr |= (imm9 >> 3) << 16;
    Instr |= op2 << 14;
    Instr |= (imm9 & 0b111) << 10;
    Instr |= rn.Idx() << 5;
    Instr |= zt.Idx();

    dc32(Instr);
  }

  // SVE load/store multiple structures (scalar plus immediate)
  void SVEContiguousMultipleStructures(int32_t num_regs, bool is_store, uint32_t msz, int32_t imm, ZRegister zt, PRegister pg, Register rn) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT((imm % num_regs) == 0, "Offset must be a multiple of {}", num_regs);

    const auto min_offset = -8 * num_regs;
    const auto max_offset = 7 * num_regs;
    LOGMAN_THROW_A_FMT(imm >= min_offset && imm <= max_offset,
                       "Invalid load/store offset ({}). Offset must be a multiple of {} and be within [{}, {}]", imm, num_regs, min_offset,
                       max_offset);

    const auto imm4 = static_cast<uint32_t>(imm / num_regs) & 0xF;
    const auto opc = static_cast<uint32_t>(num_regs - 1);

    uint32_t Instr = 0b1010'0100'0000'0000'1110'0000'0000'0000;
    Instr |= msz << 23;
    Instr |= opc << 21;
    Instr |= imm4 << 16;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(rn);
    Instr |= zt.Idx();
    if (is_store) {
      Instr |= 0x40100000U;
    }
    dc32(Instr);
  }

  // SVE contiguous non-temporal load (scalar plus immediate)
  void SVEContiguousNontemporalLoad(uint32_t msz, ZRegister zt, PRegister pg, Register rn, int32_t imm) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(imm >= -8 && imm <= 7, "Invalid loadstore offset ({}). Must be between [-8, 7]", imm);

    const auto imm4 = static_cast<uint32_t>(imm) & 0xF;
    uint32_t Instr = 0b1010'0100'0000'0000'1110'0000'0000'0000;
    Instr |= msz << 23;
    Instr |= imm4 << 16;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(rn);
    Instr |= zt.Idx();
    dc32(Instr);
  }

  // SVE contiguous non-temporal store (scalar plus immediate)
  void SVEContiguousNontemporalStore(uint32_t msz, ZRegister zt, PRegister pg, Register rn, int32_t imm) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(imm >= -8 && imm <= 7, "Invalid loadstore offset ({}). Must be between [-8, 7]", imm);

    const auto imm4 = static_cast<uint32_t>(imm) & 0xF;
    uint32_t Instr = 0b1110'0100'0001'0000'1110'0000'0000'0000;
    Instr |= msz << 23;
    Instr |= imm4 << 16;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(rn);
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVEContiguousLoadImm(bool is_store, uint32_t dtype, int32_t imm, PRegister pg, Register rn, ZRegister zt) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(imm >= -8 && imm <= 7, "Invalid loadstore offset ({}). Must be between [-8, 7]", imm);

    const auto imm4 = static_cast<uint32_t>(imm) & 0xF;

    uint32_t Instr = 0b1010'0100'0000'0000'1010'0000'0000'0000;
    Instr |= dtype << 21;
    Instr |= imm4 << 16;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(rn);
    Instr |= zt.Idx();
    if (is_store) {
      Instr |= 0x40004000U;
    }
    dc32(Instr);
  }

  // zt.b, pg/z, xn, xm
  void SVEContiguousLoadStore(uint32_t b30, uint32_t b13, uint32_t dtype, Register rm, PRegister pg, Register rn, ZRegister zt) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b1010'0100'0000'0000'0100'0000'0000'0000;
    Instr |= b30 << 30;
    Instr |= dtype << 21;
    Instr |= Encode_rm(rm);
    Instr |= b13 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= Encode_rn(rn);
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVEContiguousLoadStoreMultipleScalar(bool is_store, SubRegSize msz, uint32_t opc, ZRegister zt, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(rm != Reg::rsp, "rm cannot be the stack pointer");

    uint32_t Instr = 0b1010'0100'0000'0000'0000'0000'0000'0000;
    if (is_store) {
      Instr |= 0x40006000U;
    } else {
      Instr |= 0x0000C000U;
    }
    Instr |= FEXCore::ToUnderlying(msz) << 23;
    Instr |= opc << 21;
    Instr |= rm.Idx() << 16;
    Instr |= pg.Idx() << 10;
    Instr |= rn.Idx() << 5;
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVELoadBroadcastQuadScalarPlusImm(uint32_t msz, uint32_t ssz, ZRegister zt, PRegister pg, Register rn, int imm) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    const auto esize = static_cast<int>(16 << ssz);
    const auto max_imm = (esize << 3) - esize;
    const auto min_imm = -(max_imm + esize);

    LOGMAN_THROW_A_FMT((imm % esize) == 0, "imm ({}) must be a multiple of {}", imm, esize);
    LOGMAN_THROW_A_FMT(imm >= min_imm && imm <= max_imm, "imm ({}) must be within [{}, {}]", imm, min_imm, max_imm);

    const auto sanitized_imm = static_cast<uint32_t>(imm / esize) & 0b1111;

    uint32_t Instr = 0b1010'0100'0000'0000'0010'0000'0000'0000;
    Instr |= msz << 23;
    Instr |= ssz << 21;
    Instr |= sanitized_imm << 16;
    Instr |= pg.Idx() << 10;
    Instr |= rn.Idx() << 5;
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVELoadBroadcastQuadScalarPlusScalar(uint32_t msz, uint32_t ssz, ZRegister zt, PRegister pg, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(rm != Reg::rsp, "rm may not be the stack pointer");

    uint32_t Instr = 0b1010'0100'0000'0000'0000'0000'0000'0000;
    Instr |= msz << 23;
    Instr |= ssz << 21;
    Instr |= rm.Idx() << 16;
    Instr |= pg.Idx() << 10;
    Instr |= rn.Idx() << 5;
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVELoadAndBroadcastElement(bool is_signed, SubRegSize esize, SubRegSize msize, ZRegister zt, PRegister pg, Register rn, uint32_t imm) {
    LOGMAN_THROW_A_FMT(esize != SubRegSize::i128Bit, "Cannot use 128-bit elements.");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    if (is_signed) {
      // The element size needs to be larger than memory size, otherwise you tell
      // me how we're gonna sign extend this bad boy in memory.
      LOGMAN_THROW_A_FMT(esize > msize, "Signed broadcast element size must be greater than memory size.");
    }

    const auto esize_value = FEXCore::ToUnderlying(esize);
    const auto msize_value = FEXCore::ToUnderlying(msize);

    const auto data_size_bytes = 1U << msize_value;
    const auto max_imm = (64U << msize_value) - data_size_bytes;
    LOGMAN_THROW_A_FMT((imm % data_size_bytes) == 0 && imm <= max_imm, "imm must be a multiple of {} and be within [0, {}]",
                       data_size_bytes, max_imm);

    const auto sanitized_imm = imm / data_size_bytes;

    auto dtypeh = msize_value;
    auto dtypel = esize_value;
    if (is_signed) {
      // Signed forms of the broadcast instructions are encoded in such a way
      // that msize will always be greater than esize, which, conveniently,
      // works out by just XORing the would-be unsigned dtype values by 3.
      dtypeh ^= 0b11;
      dtypel ^= 0b11;
    }
    // Guards against bogus combinations of element size and memory size values
    // being passed in. Unsigned variants will always have dtypeh be less than
    // or equal to dtypel. The only time this isn't the case is with signed variants.
    LOGMAN_THROW_A_FMT(is_signed == (dtypeh > dtypel),
                       "Invalid element size used with load broadcast instruction "
                       "(esize: {}, msize: {})",
                       esize_value, msize_value);

    uint32_t Instr = 0b1000'0100'0100'0000'1000'0000'0000'0000;
    Instr |= dtypeh << 23;
    Instr |= sanitized_imm << 16;
    Instr |= dtypel << 13;
    Instr |= pg.Idx() << 10;
    Instr |= rn.Idx() << 5;
    Instr |= zt.Idx();
    dc32(Instr);
  }

  void SVEIndexGeneration(uint32_t op, SubRegSize size, ZRegister zd, int32_t imm5, int32_t imm5b) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "INDEX cannot use 128-bit element sizes");

    uint32_t Instr = 0b0000'0100'0010'0000'0100'0000'0000'0000;
    Instr |= op << 10;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= (static_cast<uint32_t>(imm5b) & 0b11111) << 16;
    Instr |= (static_cast<uint32_t>(imm5) & 0b11111) << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerCompareImm(uint32_t lt, uint32_t ne, uint32_t imm7, SubRegSize size, PRegister pg, ZRegister zn, PRegister pd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(imm7 < 128, "Invalid imm ({}). Must be within [0, 128]", imm7);
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0010'0100'0010'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= imm7 << 14;
    Instr |= lt << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= ne << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEIntegerCompareSignedImm(uint32_t op, uint32_t o2, uint32_t ne, int32_t imm5, SubRegSize size, PRegister pg, ZRegister zn, PRegister pd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(imm5 >= -16 && imm5 <= 15, "Invalid imm ({}). Must be within [-16, 15].", imm5);
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0010'0101'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= (static_cast<uint32_t>(imm5) & 0b1'1111) << 16;
    Instr |= op << 15;
    Instr |= o2 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= ne << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEFloatCompareVector(uint32_t op, uint32_t o2, uint32_t o3, SubRegSize size, ZRegister zm, PRegister pg, ZRegister zn, PRegister pd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Can't use 8-bit size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0101'0000'0000'0100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op << 15;
    Instr |= o2 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= o3 << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEIntegerMinMaxDifferencePredicated(uint32_t opc, uint32_t U, SubRegSize size, PRegister pg, ZRegister zdn, ZRegister zm, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(zd == zdn, "zd needs to equal zdn");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0100'0000'1000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 17;
    Instr |= U << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBitWiseShiftImmediatePred(SubRegSize size, uint32_t opc, uint32_t L, uint32_t U, PRegister pg, ZRegister zd, ZRegister zdn,
                                    uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(zd == zdn, "zd needs to equal zdn");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    const bool IsLeftShift = L != 0;
    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, Shift, IsLeftShift);

    uint32_t Instr = 0b0000'0100'0000'0000'1000'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= opc << 18;
    Instr |= L << 17;
    Instr |= U << 16;
    Instr |= pg.Idx() << 10;
    Instr |= tszl_imm3 << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEBitWiseShiftImmediateUnpred(SubRegSize size, uint32_t opc, ZRegister zd, ZRegister zn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");

    const bool IsLeftShift = opc == 0b11;
    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, Shift, IsLeftShift);

    uint32_t Instr = 0b0000'0100'0010'0000'1001'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseTernary(uint32_t opc, uint32_t o2, ZRegister zm, ZRegister zk, ZRegister zd, ZRegister zdn) {
    LOGMAN_THROW_A_FMT(zd == zdn, "zd needs to equal zdn");

    uint32_t Instr = 0b0000'0100'0010'0000'0011'1000'0000'0000;
    Instr |= opc << 22;
    Instr |= zm.Idx() << 16;
    Instr |= o2 << 10;
    Instr |= zk.Idx() << 5;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  void SVEPermuteVector(uint32_t op0, ARMEmitter::ZRegister zd, ARMEmitter::ZRegister zm, uint32_t Imm) {
    constexpr uint32_t Op = 0b0000'0101'0010'0000'000 << 13;
    uint32_t Instr = Op;

    Instr |= op0 << 22;
    Instr |= (Imm >> 3) << 16;
    Instr |= (Imm & 0b111) << 10;
    Instr |= zm.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerCompareVector(uint32_t op, uint32_t o2, uint32_t ne, SubRegSize size, ZRegister zm, PRegister pg, ZRegister zn, PRegister pd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit element size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    constexpr uint32_t Op = 0b0010'0100'0000'0000'000 << 13;
    uint32_t Instr = Op;

    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op << 15;
    Instr |= o2 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= ne << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEIntegerCompareVectorWide(uint32_t op, uint32_t o2, uint32_t ne, SubRegSize size, PRegister pd, PRegister pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i64Bit, "Can't use 64-bit element size");
    SVEIntegerCompareVector(op, o2, ne, size, zm, pg, zn, pd);
  }

  void SVE2SaturatingExtractNarrow(SubRegSize size, uint32_t opc, uint32_t T, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit && size != SubRegSize::i64Bit, "Can't use 64/128-bit size");

    // While not necessarily a left shift, we can piggyback off its
    // encoding behavior to encode the tszh and tszl bits.
    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, 0, true);

    uint32_t Instr = 0b0100'0101'0010'0000'0100'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 11;
    Instr |= T << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2BitwiseShiftRightNarrow(SubRegSize size, uint32_t shift, uint32_t opc, uint32_t U, uint32_t R, uint32_t T, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit && size != SubRegSize::i64Bit, "Can't use 64/128-bit element size");

    const auto [tszh, tszl_imm3] = EncodeSVEShiftImmediate(size, shift);

    uint32_t Instr = 0b0100'0101'0010'0000'0000'0000'0000'0000;
    Instr |= tszh << 22;
    Instr |= tszl_imm3 << 16;
    Instr |= opc << 13;
    Instr |= U << 12;
    Instr |= R << 11;
    Instr |= T << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEFloatUnary(uint32_t opc, SubRegSize size, PRegister pg, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "Unsupported size in {}", __func__);

    uint32_t Instr = 0b0110'0101'0000'1100'1010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2IntegerMultiplyVectors(uint32_t opc, SubRegSize size, ZRegister zm, ZRegister zn, ZRegister zd) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    constexpr uint32_t Op = 0b0000'0100'0010'0000'0110 << 12;
    uint32_t Instr = Op;

    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEPermuteVectorPredicated(uint32_t opc1, uint32_t opc2, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0000'0101'0010'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc1 << 16;
    Instr |= opc2 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEPropagateBreak(uint32_t opc, uint32_t op2, uint32_t op3, PRegister pd, PRegister pg, PRegister pn, PRegister pm) {
    uint32_t Instr = 0b0010'0101'0000'0000'0000'0000'0000'0000;
    Instr |= opc << 20;
    Instr |= op2 << 14;
    Instr |= op3 << 4;
    Instr |= pm.Idx() << 16;
    Instr |= pg.Idx() << 10;
    Instr |= pn.Idx() << 5;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEPredicateMisc(uint32_t op0, uint32_t op2, uint32_t op3, SubRegSize size, PRegister pd) {
    // Note: op2 combines op1 like [op1:op2], since they're adjacent.
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    uint32_t Instr = 0b0010'0101'0001'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= op0 << 16;
    Instr |= op2 << 9;
    Instr |= op3 << 5;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEIntCompareScalar(uint32_t op1, uint32_t b4, uint32_t op2, SubRegSize size, Register rn, Register rm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Can't use 128-bit size");

    uint32_t Instr = 0b0010'0101'0010'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= rm.Idx() << 16;
    Instr |= op1 << 10;
    Instr |= rn.Idx() << 5;
    Instr |= b4 << 4;
    Instr |= op2;
    dc32(Instr);
  }

  void SVEWriteFFR(uint32_t op0, uint32_t op1, uint32_t op2, uint32_t op3, uint32_t op4) {
    uint32_t Instr = 0b0010'0101'0010'1000'1001'0000'0000'0000;
    Instr |= op0 << 18;
    Instr |= op1 << 16;
    Instr |= op2 << 9;
    Instr |= op3 << 5;
    Instr |= op4;
    dc32(Instr);
  }

  void SVEFPUnaryOpsUnpredicated(uint32_t opc, SubRegSize size, ZRegister zd, ZRegister zn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");

    uint32_t Instr = 0b0110'0101'0000'1000'0011'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEFPSerialReductionPredicated(uint32_t opc, SubRegSize size, VRegister vd, PRegister pg, VRegister vn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");
    LOGMAN_THROW_A_FMT(vd == vn, "vn must be the same as vd");

    uint32_t Instr = 0b0110'0101'0001'1000'0010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zm.Idx() << 5;
    Instr |= vd.Idx();
    dc32(Instr);
  }

  void SVEFPCompareWithZero(uint32_t eqlt, uint32_t ne, SubRegSize size, PRegister pd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0101'0001'0000'0010'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= eqlt << 16;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= ne << 4;
    Instr |= pd.Idx();
    dc32(Instr);
  }

  void SVEFPMultiplyAdd(uint32_t opc, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn, ZRegister zm) {
    // NOTE: opc also includes the op0 bit (bit 15) like op0:opc, since the fields are adjacent
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0110'0101'0010'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= opc << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEFPMultiplyAddIndexed(uint32_t op, SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, uint32_t index) {
    LOGMAN_THROW_A_FMT(IsStandardFloatSize(size), "SubRegSize must be 16-bit, 32-bit, or 64-bit");
    LOGMAN_THROW_A_FMT((size <= SubRegSize::i32Bit && zm <= ZReg::z7) || (size == SubRegSize::i64Bit && zm <= ZReg::z15),
                       "16-bit and 32-bit indexed variants may only use Zm between z0-z7\n"
                       "64-bit variants may only use Zm between z0-z15");

    const auto Underlying = FEXCore::ToUnderlying(size);
    const uint32_t IndexMax = (16 / (1U << Underlying)) - 1;
    LOGMAN_THROW_A_FMT(index <= IndexMax, "Index must be within 0-{}", IndexMax);

    // Can be bit 20 or 19 depending on whether or not the element size is 64-bit.
    const auto IndexShift = 19 + static_cast<uint32_t>(size == SubRegSize::i64Bit);

    uint32_t Instr = 0b0110'0100'0010'0000'0000'0000'0000'0000;
    Instr |= Underlying << 22;
    Instr |= (index & 0b1000) << 19;
    Instr |= (index & 0b0111) << IndexShift;
    Instr |= zm.Idx() << 16;
    Instr |= op << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVEFPMultiplyAddLongIndexed(uint32_t o2, uint32_t op, uint32_t T, SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm,
                                   uint32_t index) {
    LOGMAN_THROW_A_FMT(dstsize == SubRegSize::i32Bit, "Destination size must be 32-bit.");
    LOGMAN_THROW_A_FMT(index <= 7, "Index ({}) must be within [0, 7]", index);
    LOGMAN_THROW_A_FMT(zm <= ZReg::z7, "zm (z{}) must be within [z0, z7]", zm.Idx());

    uint32_t Inst = 0b0110'0100'1010'0000'0100'0000'0000'0000;
    Inst |= o2 << 22;
    Inst |= (index & 0b110) << 18;
    Inst |= zm.Idx() << 16;
    Inst |= op << 13;
    Inst |= (index & 0b001) << 11;
    Inst |= T << 10;
    Inst |= zn.Idx() << 5;
    Inst |= zda.Idx();
    dc32(Inst);
  }

  void SVEFPMultiplyAddLong(uint32_t o2, uint32_t op, uint32_t T, SubRegSize dstsize, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(dstsize == SubRegSize::i32Bit, "Destination size must be 32-bit.");

    uint32_t Instr = 0b0110'0100'1010'0000'1000'0000'0000'0000;
    Instr |= o2 << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op << 13;
    Instr |= T << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVEFPMatrixMultiplyAccumulate(SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "SubRegSize must be 32-bit or 64-bit");

    uint32_t Instr = 0b0110'0100'0010'0000'1110'0100'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= zn.Idx() << 5;
    Instr |= zda.Idx();
    dc32(Instr);
  }

  void SVEPredicateCount(uint32_t opc, SubRegSize size, XRegister rd, PRegister pg, PRegister pn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0010'0101'0010'0000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= opc << 16;
    Instr |= pg.Idx() << 10;
    Instr |= pn.Idx() << 5;
    Instr |= rd.Idx();

    dc32(Instr);
  }

  void SVEElementCount(uint32_t b20, uint32_t op1, SubRegSize size, ZRegister zdn, PredicatePattern pattern, uint32_t imm4) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");
    LOGMAN_THROW_A_FMT(imm4 >= 1 && imm4 <= 16, "Immediate must be between 1-16 inclusive");

    uint32_t Instr = 0b0000'0100'0010'0000'1100'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= b20 << 20;
    Instr |= (imm4 - 1) << 16;
    Instr |= op1 << 10;
    Instr |= FEXCore::ToUnderlying(pattern) << 5;
    Instr |= zdn.Idx();
    dc32(Instr);
  }

  void SVEIncDecPredicateCountScalar(uint32_t op0, uint32_t op1, uint32_t opc, uint32_t b16, SubRegSize size, Register rdn, PRegister pm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0010'0101'0010'1000'1000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= op0 << 18;
    Instr |= b16 << 16;
    Instr |= op1 << 11;
    Instr |= opc << 9;
    Instr |= pm.Idx() << 5;
    Instr |= rdn.Idx();
    dc32(Instr);
  }
  void SVEIncDecPredicateCountVector(uint32_t op0, uint32_t op1, uint32_t opc, uint32_t b16, SubRegSize size, ZRegister zdn, PRegister pm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Cannot use 8-bit element size");
    SVEIncDecPredicateCountScalar(op0, op1, opc, b16, size, Register {zdn.Idx()}, pm);
  }

  void SVE2IntegerPredicated(uint32_t op0, uint32_t op1, SubRegSize size, ZRegister zd, PRegister pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit size");
    LOGMAN_THROW_A_FMT(pg <= PReg::p7, "Can only use p0-p7 as a governing predicate");

    uint32_t Instr = 0b0100'0100'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= op0 << 16; // Intentionally 16 instead of 17 to handle bit range nicer
    Instr |= op1 << 13;
    Instr |= pg.Idx() << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2IntegerPairwiseAddAccumulateLong(uint32_t U, SubRegSize size, ZRegister zda, PRegisterMerge pg, ZRegister zn) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i16Bit || size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "SubRegSize must be 16-bit, "
                                                                                                               "32-bit, or 64-bit");
    SVE2IntegerPredicated((0b0010 << 1) | U, 0b101, size, zda, pg, zn);
  }

  void SVE2IntegerUnaryOpsPredicated(uint32_t op0, SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn) {
    SVE2IntegerPredicated(op0, 0b101, size, zd, pg, zn);
  }

  void SVE2SaturatingRoundingBitwiseShiftLeft(uint32_t op0, SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zn needs to equal zd");
    SVE2IntegerPredicated(op0, 0b100, size, zd, pg, zm);
  }

  void SVE2IntegerHalvingPredicated(uint32_t RSU, SubRegSize size, PRegister pg, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zn needs to equal zd");
    SVE2IntegerPredicated((0b10 << 3) | RSU, 0b100, size, zd, pg, zm);
  }

  void SVEIntegerPairwiseArithmetic(uint32_t opc, uint32_t U, SubRegSize size, PRegister pg, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zn needs to equal zd");
    SVE2IntegerPredicated((0b10 << 3) | (opc << 1) | U, 0b101, size, zd, pg, zm);
  }

  void SVE2IntegerSaturatingAddSub(uint32_t opc, SubRegSize size, ZRegister zd, PRegisterMerge pg, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(zd == zn, "zn needs to equal zd");
    SVE2IntegerPredicated((0b11 << 3) | opc, 0b100, size, zd, pg, zm);
  }

  void SVEIntegerMultiplyAddUnpredicated(uint32_t op0, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i128Bit, "Cannot use 128-bit element size");

    uint32_t Instr = 0b0100'0100'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op0 << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVEIntegerDotProduct(uint32_t op, SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, Rotation rot) {
    LOGMAN_THROW_A_FMT(size == SubRegSize::i32Bit || size == SubRegSize::i64Bit, "Dot product must only use 32-bit or 64-bit element "
                                                                                 "sizes");
    SVEIntegerComplexMulAdd(op, size, zda, zn, zm, rot);
  }

  void SVEIntegerComplexMulAdd(uint32_t op, SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm, Rotation rot) {
    const auto op0 = op << 2 | FEXCore::ToUnderlying(rot);
    SVEIntegerMultiplyAddUnpredicated(op0, size, zda, zn, zm);
  }

  void SVE2SaturatingMulAddInterleaved(uint32_t op0, SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit, "Element size may only be 16-bit, 32-bit, or 64-bit");
    SVEIntegerMultiplyAddUnpredicated(op0, size, zda, zn, zm);
  }

  void SVE2IntegerMulAddLong(uint32_t op0, SubRegSize size, ZRegister zda, ZRegister zn, ZRegister zm) {
    SVE2SaturatingMulAddInterleaved(op0, size, zda, zn, zm);
  }

  void SVE2WideningIntegerArithmetic(uint32_t op, uint32_t SUT, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    uint32_t Instr = 0b0100'0101'0000'0000'0000'0000'0000'0000;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= zm.Idx() << 16;
    Instr |= op << 13;
    Instr |= SUT << 10;
    Instr |= zn.Idx() << 5;
    Instr |= zd.Idx();
    dc32(Instr);
  }

  void SVE2IntegerAddSubLong(uint32_t op, uint32_t SUT, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");
    SVE2WideningIntegerArithmetic(op, SUT, size, zd, zn, zm);
  }

  void SVE2IntegerAddSubWide(uint32_t SUT, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");
    SVE2WideningIntegerArithmetic(0b10, SUT, size, zd, zn, zm);
  }

  void SVE2IntegerMultiplyLong(uint32_t SUT, SubRegSize size, ZRegister zd, ZRegister zn, ZRegister zm) {
    // PMULLB and PMULLT support the use of 128-bit element sizes (with the SVE2PMULL128 extension)
    if (SUT == 0b010 || SUT == 0b011) {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i32Bit, "Can't use 8-bit or 32-bit element size");

      // 128-bit variant is encoded as if it were 8-bit (0b00)
      if (size == SubRegSize::i128Bit) {
        size = SubRegSize::i8Bit;
      }
    } else {
      LOGMAN_THROW_A_FMT(size != SubRegSize::i8Bit && size != SubRegSize::i128Bit, "Can't use 8-bit or 128-bit element size");
    }

    SVE2WideningIntegerArithmetic(0b11, SUT, size, zd, zn, zm);
  }

  struct SVEEncodedImmShift {
    uint32_t tszh;
    uint32_t tszl_imm3;
  };
  // Helper for encoding shift immediates that make use of the tszh:tszl and imm3 field.
  static constexpr SVEEncodedImmShift EncodeSVEShiftImmediate(SubRegSize size, uint32_t shift, bool is_left_shift = false) {
    const uint32_t element_size = SubRegSizeInBits(size);

    if (is_left_shift) {
      LOGMAN_THROW_A_FMT(shift < element_size, "Invalid left shift value ({}). Must be within [0, {}]", shift, element_size - 1);
    } else {
      LOGMAN_THROW_A_FMT(shift > 0 && shift <= element_size, "Invalid right shift value ({}). Must be within [1, {}]", shift, element_size);
    }

    // Both left and right shifts encodes their shift as if it were
    // expanding the tszh:tszl (tsize) bits to the the left in order to accomodate
    // larger shift values. e.g. (B: tsize=0b0001, H: tsize=0b001x, etc)
    //
    // The difference is in how they're encoded. Left shifts are trivial and
    // encode as element_size_in_bits + shift, which works nicely since
    // the size will just occupy the next bit in tsize leaving the previous
    // one for encoding larger shifts.
    //
    // Right shifts instead encode it like a subtraction. e.g. A shift of 1
    // would encode like (S: tsize=0b0111 imm3=0b111, where 64 - 1 = 63, etc).
    // so the more lower in value the bits are set, the larger the shift.
    const uint32_t encoded_shift = is_left_shift ? element_size + shift : (2 * element_size) - shift;

    return {
      .tszh = encoded_shift >> 5,
      .tszl_imm3 = encoded_shift & 0b11111,
    };
  }

  // Alias that returns the equivalently sized unsigned type for a floating-point type T.
  template<typename T>
  requires (std::is_same_v<T, float> || std::is_same_v<T, double>)
  using FloatToEquivalentUInt = std::conditional_t<std::is_same_v<T, float>, uint32_t, uint64_t>;

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  // Determines if a floating-point value is capable of being converted
  // into an 8-bit immediate. See pseudocode definition of VFPExpandImm
  // in ARM A-profile reference manual for a general overview of how this was derived.
  template<typename T>
  requires (std::is_same_v<T, float> || std::is_same_v<T, double>)
  [[nodiscard]]
  static bool IsValidFPValueForImm8(T value) {
    const uint64_t bits = std::bit_cast<FloatToEquivalentUInt<T>>(value);
    const uint64_t datasize_idx = FEXCore::ilog2(sizeof(T)) - 1;

    static constexpr std::array mantissa_masks {
      0x00000000'0000003FULL, // half (bits [5:0])
      0x00000000'0007FFFFULL, // single (bits [18:0])
      0x0000FFFF'FFFFFFFFULL, // double (bits [47:0])
    };
    const auto mantissa_mask = mantissa_masks[datasize_idx];

    // Relevant mantissa bits must be set to zero
    if ((bits & mantissa_mask) != 0) {
      return false;
    }

    static constexpr std::array exponent_masks {
      0x00000000'00003000ULL, // half (bits [13:12])
      0x00000000'3E000000ULL, // single (bits [29:25])
      0x3FC00000'00000000ULL, // double (bits [61:54])
    };
    const auto exponent_mask = exponent_masks[datasize_idx];
    const auto masked_exponent = bits & exponent_mask;

    // Relevant exponent bits must either be all set or all cleared.
    if (masked_exponent != 0 && masked_exponent != exponent_mask) {
      return false;
    }

    // The two bits before the sign bit must be inverses of each other.
    const auto datasize = 8ULL * sizeof(T);
    const auto inverse = bits ^ (bits << 1);
    const auto inverse_mask = 1ULL << (datasize - 2);
    if ((inverse & inverse_mask) == 0) {
      return false;
    }

    return true;
  }
#endif

protected:
  static uint32_t FP32ToImm8(float value) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(IsValidFPValueForImm8(value), "Value ({}) cannot be encoded into an 8-bit immediate", value);
#endif

    const auto bits = std::bit_cast<uint32_t>(value);
    const auto sign = (bits & 0x80000000) >> 24;
    const auto expb2 = (bits & 0x20000000) >> 23;
    const auto b5_to_0 = (bits >> 19) & 0x3F;

    return sign | expb2 | b5_to_0;
  }

  static uint32_t FP64ToImm8(double value) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(IsValidFPValueForImm8(value), "Value ({}) cannot be encoded into an 8-bit immediate", value);
#endif

    const auto bits = std::bit_cast<uint64_t>(value);
    const auto sign = (bits & 0x80000000'00000000) >> 56;
    const auto expb2 = (bits & 0x20000000'00000000) >> 55;
    const auto b5_to_0 = (bits >> 48) & 0x3F;

    return static_cast<uint32_t>(sign | expb2 | b5_to_0);
  }

private:
  // Handling for signed 8-bit immediate shifts (e.g. in cpy/dup)
  struct HandledSImm8Shift {
    int32_t imm;
    uint32_t is_shift;
  };
  static constexpr HandledSImm8Shift HandleSVESImm8Shift(SubRegSize size, int32_t imm) {
    const int32_t imm8_limit = 128;
    const bool is_int8_imm = -imm8_limit <= imm && imm < imm8_limit;
    if (size == SubRegSize::i8Bit) {
      LOGMAN_THROW_A_FMT(is_int8_imm, "Can't perform LSL #8 shift on 8-bit elements.");
    }

    uint32_t shift = 0;
    if (!is_int8_imm) {
      const int32_t imm16_limit = 32768;
      const bool is_int16_imm = -imm16_limit <= imm && imm < imm16_limit;

      LOGMAN_THROW_A_FMT(is_int16_imm, "Immediate ({}) must be a 16-bit value within [-32768, 32512]", imm);
      LOGMAN_THROW_A_FMT((imm % 256) == 0, "Immediate ({}) must be a multiple of 256", imm);

      imm /= 256;
      shift = 1;
    }

    return {
      .imm = imm,
      .is_shift = shift,
    };
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/ScalarOps.inl
================================================
// SPDX-License-Identifier: MIT
/* Scalar instruction emitters.
 *
 * These contain instruction emitters for scalar ASIMD operations explicitly.
 * Some of these emitter arguments might seem a bit strange at first glance,
 * but is because ARM's instruction encodings for these instructions are a hot mess.
 *
 * Specifically FP16 was an afterthought for these scalar operations, using a `ScalarRegSize` with
 * 16-bit wouldn't encode an FP16 instruction because they are a different instruction class instead.
 *
 * Most FP16 operations instead have their own freestanding implementation using `HRegister` arguments.
 *
 * Meanwhile other FP32 and FP64 instructions will use `ScalarRegSize`, supporting both those sizes.
 *
 * For Scalar integer operations, these instructions will mostly support all `ScalarRegSize` operations.
 * Exceptions to this rule will have asserts in the emitter implementation when misused.
 *
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // Advanced SIMD scalar copy
  void dup(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Index) {
    const uint32_t SizeImm = FEXCore::ToUnderlying(size);
    const uint32_t IndexShift = SizeImm + 1;
    const uint32_t ElementSize = 1U << SizeImm;
    const uint32_t MaxIndex = 128U / (ElementSize * 8);

    LOGMAN_THROW_A_FMT(Index < MaxIndex, "Index too large. Index={}, Max Index: {}", Index, MaxIndex);

    const uint32_t imm5 = (Index << IndexShift) | ElementSize;

    ASIMDScalarCopy(1, 1, imm5, 0b0000, rd, rn);
  }

  void mov(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Index) {
    dup(size, rd, rn, Index);
  }

  // Advanced SIMD scalar three same FP16
  void fmulx(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(0, 0, 0b011, rm, rn, rd);
  }
  void fcmeq(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(0, 0, 0b100, rm, rn, rd);
  }
  void frecps(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(0, 0, 0b111, rm, rn, rd);
  }
  void frsqrts(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(0, 1, 0b111, rm, rn, rd);
  }
  void fcmge(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(1, 0, 0b100, rm, rn, rd);
  }
  void facge(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(1, 0, 0b101, rm, rn, rd);
  }
  void fabd(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(1, 1, 0b010, rm, rn, rd);
  }
  void fcmgt(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(1, 1, 0b100, rm, rn, rd);
  }
  void facgt(HRegister rd, HRegister rn, HRegister rm) {
    ASIMDScalarThreeSameFP16(1, 1, 0b101, rm, rn, rd);
  }

  // Advanced SIMD scalar two-register miscellaneous FP16
  void fcvtns(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 0, 0b11010, rn, rd);
  }
  void fcvtms(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 0, 0b11011, rn, rd);
  }
  void fcvtas(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 0, 0b11100, rn, rd);
  }
  void scvtf(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 0, 0b11101, rn, rd);
  }
  void fcmgt(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b01100, rn, rd);
  }
  void fcmeq(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b01101, rn, rd);
  }
  void fcmlt(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b01110, rn, rd);
  }
  void fcvtps(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b11010, rn, rd);
  }
  void fcvtzs(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b11011, rn, rd);
  }
  void frecpe(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b11101, rn, rd);
  }
  void frecpx(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(0, 1, 0b11111, rn, rd);
  }
  void fcvtnu(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 0, 0b11010, rn, rd);
  }
  void fcvtmu(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 0, 0b11011, rn, rd);
  }
  void fcvtau(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 0, 0b11100, rn, rd);
  }
  void ucvtf(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 0, 0b11101, rn, rd);
  }
  void fcmge(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 1, 0b01100, rn, rd);
  }
  void fcmle(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 1, 0b01101, rn, rd);
  }
  void fcvtpu(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 1, 0b11010, rn, rd);
  }
  void fcvtzu(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 1, 0b11011, rn, rd);
  }
  void frsqrte(HRegister rd, HRegister rn) {
    ASIMDScalarTwoRegMiscFP16(1, 1, 0b11101, rn, rd);
  }

  // Advanced SIMD scalar three same extra
  void sqrdmlah(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i16Bit || size == ScalarRegSize::i32Bit, "Only supports 16/32-bit");
    ASIMDScalarThreeSameExtra(1, size, 0b0000, rm, rn, rd);
  }
  void sqrdmlsh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i16Bit || size == ScalarRegSize::i32Bit, "Only supports 16/32-bit");
    ASIMDScalarThreeSameExtra(1, size, 0b0001, rm, rn, rd);
  }

  // Advanced SIMD scalar two-register miscellaneous
  void suqadd(ScalarRegSize size, VRegister rd, VRegister rn) {
    ASIMDScalar2RegMisc(0, 0, size, 0b00011, rd, rn);
  }
  void sqabs(ScalarRegSize size, VRegister rd, VRegister rn) {
    ASIMDScalar2RegMisc(0, 0, size, 0b00111, rd, rn);
  }

  ///< Comparison against 0.0
  void cmgt(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 0, size, 0b01000, rd, rn);
  }
  ///< Comparison against 0.0
  void cmeq(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 0, size, 0b01001, rd, rn);
  }

  ///< Comparison against 0.0
  void cmlt(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 0, size, 0b01010, rd, rn);
  }
  void abs(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 0, size, 0b01011, rd, rn);
  }
  ///< size is destination size.
  void sqxtn(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "64-bit destination not supported");
    ASIMDScalar2RegMisc(0, 0, size, 0b10100, rd, rn);
  }

  void fcvtns(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 0, ConvertedSize, 0b11010, rd, rn);
  }
  void fcvtms(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 0, ConvertedSize, 0b11011, rd, rn);
  }
  void fcvtas(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 0, ConvertedSize, 0b11100, rd, rn);
  }
  void scvtf(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 0, ConvertedSize, 0b11101, rd, rn);
  }

  ///< Comparison against 0.0
  void fcmgt(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float compare");
    ASIMDScalar2RegMisc(0, 0, size, 0b01100, rd, rn);
  }
  ///< Comparison against 0.0
  void fcmeq(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float compare");
    ASIMDScalar2RegMisc(0, 0, size, 0b01101, rd, rn);
  }
  ///< Comparison against 0.0
  void fcmlt(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float compare");

    ASIMDScalar2RegMisc(0, 0, size, 0b01110, rd, rn);
  }
  void fcvtps(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 0, size, 0b11010, rd, rn);
  }
  void fcvtzs(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 0, size, 0b11011, rd, rn);
  }
  void frecpe(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 0, size, 0b11101, rd, rn);
  }
  void frecpx(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 0, size, 0b11111, rd, rn);
  }
  void usqadd(ScalarRegSize size, VRegister rd, VRegister rn) {
    ASIMDScalar2RegMisc(0, 1, size, 0b00011, rd, rn);
  }
  void sqneg(ScalarRegSize size, VRegister rd, VRegister rn) {
    ASIMDScalar2RegMisc(0, 1, size, 0b00111, rd, rn);
  }
  ///< Comparison against 0.0
  void cmge(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 1, size, 0b01000, rd, rn);
  }
  ///< Comparison against 0.0
  void cmle(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 1, size, 0b01001, rd, rn);
  }
  void neg(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMDScalar2RegMisc(0, 1, size, 0b01011, rd, rn);
  }
  ///< size is destination.
  void sqxtun(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "64-bit destination not supported");
    ASIMDScalar2RegMisc(0, 1, size, 0b10010, rd, rn);
  }
  ///< size is destination.
  void uqxtn(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "64-bit destination not supported");
    ASIMDScalar2RegMisc(0, 1, size, 0b10100, rd, rn);
  }
  ///< size is destination.
  void fcvtxn(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMDScalar2RegMisc(0, 1, ScalarRegSize::i16Bit, 0b10110, rd, rn);
  }
  void fcvtnu(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 1, ConvertedSize, 0b11010, rd, rn);
  }
  void fcvtmu(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 1, ConvertedSize, 0b11011, rd, rn);
  }
  void fcvtau(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 1, ConvertedSize, 0b11100, rd, rn);
  }
  void ucvtf(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(0, 1, ConvertedSize, 0b11101, rd, rn);
  }
  ///< Comparison against 0.0
  void fcmge(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 1, size, 0b01100, rd, rn);
  }
  ///< Comparison against 0.0
  void fcmle(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 1, size, 0b01101, rd, rn);
  }
  void fcvtpu(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 1, size, 0b11010, rd, rn);
  }
  void fcvtzu(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 1, size, 0b11011, rd, rn);
  }
  void frsqrte(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    ASIMDScalar2RegMisc(0, 1, size, 0b11101, rd, rn);
  }
  // Advanced SIMD scalar pairwise
  void addp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Invalid size selected for addp");
    ASIMDScalar2RegMisc(1, 0, size, 0b11011, rd, rn);
  }

  void fmaxnmp(HRegister rd, HRegister rn) {
    ASIMDScalar2RegMisc(1, 0, ScalarRegSize::i8Bit, 0b01100, rd.V(), rn.V());
  }
  void faddp(HRegister rd, HRegister rn) {
    ASIMDScalar2RegMisc(1, 0, ScalarRegSize::i8Bit, 0b01101, rd.V(), rn.V());
  }
  void fmaxp(HRegister rd, HRegister rn) {
    ASIMDScalar2RegMisc(1, 0, ScalarRegSize::i8Bit, 0b01111, rd.V(), rn.V());
  }
  void fminnmp(HRegister rd, HRegister rn) {
    ASIMDScalar2RegMisc(1, 0, ScalarRegSize::i32Bit, 0b01100, rd.V(), rn.V());
  }
  void fminp(HRegister rd, HRegister rn) {
    ASIMDScalar2RegMisc(1, 0, ScalarRegSize::i32Bit, 0b01111, rd.V(), rn.V());
  }

  void fmaxnmp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(1, 1, ConvertedSize, 0b01100, rd, rn);
  }
  void faddp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(1, 1, ConvertedSize, 0b01101, rd, rn);
  }
  void fmaxp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMDScalar2RegMisc(1, 1, ConvertedSize, 0b01111, rd, rn);
  }
  void fminnmp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMDScalar2RegMisc(1, 1, size, 0b01100, rd, rn);
  }
  void fminp(ScalarRegSize size, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMDScalar2RegMisc(1, 1, size, 0b01111, rd, rn);
  }
  // Advanced SIMD scalar three different
  ///< size is destination.
  void sqdmlal(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i32Bit : ScalarRegSize::i16Bit;
    ASIMD3RegDifferent(0, ConvertedSize, 0b1001, rd, rn, rm);
  }
  ///< size is destination.
  void sqdmlsl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i32Bit : ScalarRegSize::i16Bit;
    ASIMD3RegDifferent(0, ConvertedSize, 0b1011, rd, rn, rm);
  }

  ///< size is destination.
  void sqdmull(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i32Bit : ScalarRegSize::i16Bit;
    ASIMD3RegDifferent(0, ConvertedSize, 0b1101, rd, rn, rm);
  }
  // Advanced SIMD scalar three same
  void sqadd(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(0, size, 0b00001, rd, rn, rm);
  }
  void sqsub(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(0, size, 0b00101, rd, rn, rm);
  }
  void cmgt(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b00110, rd, rn, rm);
  }
  void cmge(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b00111, rd, rn, rm);
  }
  void sshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b01000, rd, rn, rm);
  }
  void sqshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(0, size, 0b01001, rd, rn, rm);
  }
  void srshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b01010, rd, rn, rm);
  }
  void sqrshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(0, size, 0b01011, rd, rn, rm);
  }
  void add(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b10000, rd, rn, rm);
  }
  void cmtst(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(0, size, 0b10001, rd, rn, rm);
  }
  void sqdmulh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i32Bit || size == ScalarRegSize::i16Bit, "Invalid size");
    ASIMD3RegSame(0, size, 0b10110, rd, rn, rm);
  }
  void fmulx(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMD3RegSame(0, ConvertedSize, 0b11011, rd, rn, rm);
  }
  void fcmeq(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMD3RegSame(0, ConvertedSize, 0b11100, rd, rn, rm);
  }
  void frecps(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMD3RegSame(0, ConvertedSize, 0b11111, rd, rn, rm);
  }
  void frsqrts(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMD3RegSame(0, size, 0b11111, rd, rn, rm);
  }
  void uqadd(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(1, size, 0b00001, rd, rn, rm);
  }
  void uqsub(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(1, size, 0b00101, rd, rn, rm);
  }
  void cmhi(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b00110, rd, rn, rm);
  }
  void cmhs(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b00111, rd, rn, rm);
  }
  void ushl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b01000, rd, rn, rm);
  }
  void uqshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(1, size, 0b01001, rd, rn, rm);
  }
  void urshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b01010, rd, rn, rm);
  }
  void uqrshl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    ASIMD3RegSame(1, size, 0b01011, rd, rn, rm);
  }
  void sub(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b10000, rd, rn, rm);
  }
  void cmeq(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit, "Only supports 64-bit");
    ASIMD3RegSame(1, size, 0b10001, rd, rn, rm);
  }
  void sqrdmulh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i32Bit || size == ScalarRegSize::i16Bit, "Invalid size");
    ASIMD3RegSame(1, size, 0b10110, rd, rn, rm);
  }
  void fcmge(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMD3RegSame(1, ConvertedSize, 0b11100, rd, rn, rm);
  }
  void facge(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");

    const ScalarRegSize ConvertedSize = size == ScalarRegSize::i64Bit ? ScalarRegSize::i16Bit : ScalarRegSize::i8Bit;

    ASIMD3RegSame(1, ConvertedSize, 0b11101, rd, rn, rm);
  }
  void fabd(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMD3RegSame(1, size, 0b11010, rd, rn, rm);
  }
  void fcmgt(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMD3RegSame(1, size, 0b11100, rd, rn, rm);
  }
  void facgt(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit, "Invalid size selected for float convert");
    ASIMD3RegSame(1, size, 0b11101, rd, rn, rm);
  }
  // Advanced SIMD scalar shift by immediate
  void sshr(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b00000, rd, rn);
  }
  void ssra(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b00010, rd, rn);
  }
  void srshr(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b00100, rd, rn);
  }
  void srsra(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b00110, rd, rn);
  }
  void shl(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    // Shift encoded a bit weirdly.
    // shift = immh:immb - elementsize but immh is /also/ used for element size.
    const uint32_t immh = 1 << FEXCore::ToUnderlying(size) | (Shift >> 3);
    const uint32_t immb = Shift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b01010, rd, rn);
  }
  void sqshl(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    // Shift encoded a bit weirdly.
    // shift = immh:immb - elementsize but immh is /also/ used for element size.
    const uint32_t immh = 1 << FEXCore::ToUnderlying(size) | (Shift >> 3);
    const uint32_t immb = Shift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b01110, rd, rn);
  }
  ///< size is destination
  void sqshrn(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqshrn");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b10010, rd, rn);
  }
  void sqrshrn(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqshrn");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(0, immh, immb, 0b10011, rd, rn);
  }
  // TODO: SCVTF, FCVTZS
  void ushr(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b00000, rd, rn);
  }
  void usra(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b00010, rd, rn);
  }
  void urshr(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b00100, rd, rn);
  }
  void ursra(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b00110, rd, rn);
  }
  void sri(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b01000, rd, rn);
  }
  void sli(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < 64, "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size == ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sshr");
    // Shift encoded a bit weirdly.
    // shift = immh:immb - elementsize but immh is /also/ used for element size.
    const uint32_t immh = 1 << FEXCore::ToUnderlying(size) | (Shift >> 3);
    const uint32_t immb = Shift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b01010, rd, rn);
  }
  void sqshlu(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    // Shift encoded a bit weirdly.
    // shift = immh:immb - elementsize but immh is /also/ used for element size.
    const uint32_t immh = 1 << FEXCore::ToUnderlying(size) | (Shift >> 3);
    const uint32_t immb = Shift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b01100, rd, rn);
  }
  void uqshl(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    // Shift encoded a bit weirdly.
    // shift = immh:immb - elementsize but immh is /also/ used for element size.
    const uint32_t immh = 1 << FEXCore::ToUnderlying(size) | (Shift >> 3);
    const uint32_t immb = Shift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b01110, rd, rn);
  }
  ///< size is destination.
  void sqshrun(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqshrun");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b10000, rd, rn);
  }
  ///< size is destination.
  void sqrshrun(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqrshrun");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b10001, rd, rn);
  }
  ///< size is destination.
  void uqshrn(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqrshrun");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b10010, rd, rn);
  }
  ///< size is destination.
  void uqrshrn(ScalarRegSize size, VRegister rd, VRegister rn, uint32_t Shift) {
    LOGMAN_THROW_A_FMT(Shift > 0 && Shift < ScalarRegSizeInBits(size), "Invalid shift for sshr");
    LOGMAN_THROW_A_FMT(size != ARMEmitter::ScalarRegSize::i64Bit, "Invalid size selected for sqrshrun");
    const size_t SubregSizeInBits = ScalarRegSizeInBits(size);
    // Shift encoded in immh:immb, but inverted with 128-bit source
    // shift = (esize * 2) - immh:immb
    const uint32_t InvertedShift = (SubregSizeInBits * 2) - Shift;
    const uint32_t immh = InvertedShift >> 3;
    const uint32_t immb = InvertedShift & 0b111;
    ASIMDScalarShiftByImm(1, immh, immb, 0b10011, rd, rn);
  }

  // TODO: UCVTF, FCVTZU

  // Advanced SIMD scalar x indexed element
  void sqdmlal(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(0, size, 0b0011, rm, rn, rd, index);
  }
  void sqdmlsl(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(0, size, 0b0111, rm, rn, rd, index);
  }
  void sqdmull(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(0, size, 0b1011, rm, rn, rd, index);
  }
  void sqdmulh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(0, size, 0b1100, rm, rn, rd, index);
  }
  void sqrdmulh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(0, size, 0b1101, rm, rn, rd, index);
  }
  void fmla(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    ASIMDScalarXIndexedElement(0, size, 0b0001, rm, rn, rd, index);
  }
  void fmls(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    ASIMDScalarXIndexedElement(0, size, 0b0101, rm, rn, rd, index);
  }
  void fmul(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    ASIMDScalarXIndexedElement(0, size, 0b1001, rm, rn, rd, index);
  }
  void sqrdmlah(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(1, size, 0b1101, rm, rn, rd, index);
  }
  void sqrdmlsh(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i64Bit, "Scalar size must not be 64-bit");
    ASIMDScalarXIndexedElement(1, size, 0b1111, rm, rn, rd, index);
  }
  void fmulx(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, uint32_t index) {
    ASIMDScalarXIndexedElement(1, size, 0b1001, rm, rn, rd, index);
  }

  // Floating-point data-processing (1 source)
  void fmov(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b000000, rd, rn);
  }
  void fabs(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b000001, rd, rn);
  }
  void fneg(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b000010, rd, rn);
  }
  void fsqrt(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b000011, rd, rn);
  }
  void frintn(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001000, rd, rn);
  }
  void frintp(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001001, rd, rn);
  }
  void frintm(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001010, rd, rn);
  }
  void frintz(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001011, rd, rn);
  }
  void frinta(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001100, rd, rn);
  }
  void frintx(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001110, rd, rn);
  }
  void frinti(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b001111, rd, rn);
  }
  void frint32z(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b010000, rd, rn);
  }
  void frint32x(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b010001, rd, rn);
  }
  void frint64z(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b010010, rd, rn);
  }
  void frint64x(ScalarRegSize size, VRegister rd, VRegister rn) {
    Float1Source(size, 0, 0, 0b010011, rd, rn);
  }

  void fmov(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000000, rd.V(), rn.V());
  }
  void fabs(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000001, rd.V(), rn.V());
  }
  void fneg(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000010, rd.V(), rn.V());
  }
  void fsqrt(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000011, rd.V(), rn.V());
  }
  void fcvt(DRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000101, rd.V(), rn.V());
  }
  void fcvt(HRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b000111, rd.V(), rn.V());
  }
  void frintn(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001000, rd.V(), rn.V());
  }
  void frintp(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001001, rd.V(), rn.V());
  }
  void frintm(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001010, rd.V(), rn.V());
  }
  void frintz(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001011, rd.V(), rn.V());
  }
  void frinta(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001100, rd.V(), rn.V());
  }
  void frintx(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001110, rd.V(), rn.V());
  }
  void frinti(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b001111, rd.V(), rn.V());
  }
  void frint32z(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b010000, rd.V(), rn.V());
  }
  void frint32x(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b010001, rd.V(), rn.V());
  }
  void frint64z(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b010010, rd.V(), rn.V());
  }
  void frint64x(SRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b00, 0b010011, rd.V(), rn.V());
  }

  void fmov(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000000, rd.V(), rn.V());
  }
  void fabs(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000001, rd.V(), rn.V());
  }
  void fneg(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000010, rd.V(), rn.V());
  }
  void fsqrt(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000011, rd.V(), rn.V());
  }
  void fcvt(SRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000100, rd.V(), rn.V());
  }
  void bfcvt(HRegister rd, SRegister rn) {
    Float1Source(0, 0, 0b01, 0b000110, rd.V(), rn.V());
  }
  void fcvt(HRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b000111, rd.V(), rn.V());
  }
  void frintn(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001000, rd.V(), rn.V());
  }
  void frintp(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001001, rd.V(), rn.V());
  }
  void frintm(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001010, rd.V(), rn.V());
  }
  void frintz(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001011, rd.V(), rn.V());
  }
  void frinta(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001100, rd.V(), rn.V());
  }
  void frintx(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001110, rd.V(), rn.V());
  }
  void frinti(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b001111, rd.V(), rn.V());
  }
  void frint32z(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b010000, rd.V(), rn.V());
  }
  void frint32x(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b010001, rd.V(), rn.V());
  }
  void frint64z(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b010010, rd.V(), rn.V());
  }
  void frint64x(DRegister rd, DRegister rn) {
    Float1Source(0, 0, 0b01, 0b010011, rd.V(), rn.V());
  }

  void fmov(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000000, rd.V(), rn.V());
  }
  void fabs(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000001, rd.V(), rn.V());
  }
  void fneg(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000010, rd.V(), rn.V());
  }
  void fsqrt(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000011, rd.V(), rn.V());
  }
  void fcvt(SRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000100, rd.V(), rn.V());
  }
  void fcvt(DRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b000101, rd.V(), rn.V());
  }
  void frintn(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001000, rd.V(), rn.V());
  }
  void frintp(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001001, rd.V(), rn.V());
  }
  void frintm(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001010, rd.V(), rn.V());
  }
  void frintz(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001011, rd.V(), rn.V());
  }
  void frinta(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001100, rd.V(), rn.V());
  }
  void frintx(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001110, rd.V(), rn.V());
  }
  void frinti(HRegister rd, HRegister rn) {
    Float1Source(0, 0, 0b11, 0b001111, rd.V(), rn.V());
  }

  // Floating-point compare
  void fcmp(ScalarRegSize Size, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(Size != ScalarRegSize::i8Bit, "8-bit destination not supported");

    const auto ConvertedSize = Size == ARMEmitter::ScalarRegSize::i64Bit ? 0b01 :
                               Size == ARMEmitter::ScalarRegSize::i32Bit ? 0b00 :
                               Size == ARMEmitter::ScalarRegSize::i16Bit ? 0b11 :
                                                                           0;

    FloatCompare(0, 0, ConvertedSize, 0b00, 0b00000, rn, rm);
  }

  void fcmp(SRegister rn, SRegister rm) {
    FloatCompare(0, 0, 0b00, 0b00, 0b00000, rn.V(), rm.V());
  }
  ///< Compare to #0.0
  void fcmp(SRegister rn) {
    FloatCompare(0, 0, 0b00, 0b00, 0b01000, rn.V(), VReg::v0);
  }
  void fcmpe(SRegister rn, SRegister rm) {
    FloatCompare(0, 0, 0b00, 0b00, 0b10000, rn.V(), rm.V());
  }

  ///< Compare to #0.0
  void fcmpe(SRegister rn) {
    FloatCompare(0, 0, 0b00, 0b00, 0b11000, rn.V(), VReg::v0);
  }
  void fcmp(DRegister rn, DRegister rm) {
    FloatCompare(0, 0, 0b01, 0b00, 0b00000, rn.V(), rm.V());
  }

  ///< Compare to #0.0
  void fcmp(DRegister rn) {
    FloatCompare(0, 0, 0b01, 0b00, 0b01000, rn.V(), VReg::v0);
  }
  void fcmpe(DRegister rn, DRegister rm) {
    FloatCompare(0, 0, 0b01, 0b00, 0b10000, rn.V(), rm.V());
  }

  ///< Compare to #0.0
  void fcmpe(DRegister rn) {
    FloatCompare(0, 0, 0b01, 0b00, 0b11000, rn.V(), VReg::v0);
  }
  void fcmp(HRegister rn, HRegister rm) {
    FloatCompare(0, 0, 0b11, 0b00, 0b00000, rn.V(), rm.V());
  }

  ///< Compare to #0.0
  void fcmp(HRegister rn) {
    FloatCompare(0, 0, 0b11, 0b00, 0b01000, rn.V(), VReg::v0);
  }
  void fcmpe(HRegister rn, HRegister rm) {
    FloatCompare(0, 0, 0b11, 0b00, 0b10000, rn.V(), rm.V());
  }

  ///< Compare to #0.0
  void fcmpe(HRegister rn) {
    FloatCompare(0, 0, 0b11, 0b00, 0b11000, rn.V(), VReg::v0);
  }

  // Floating-point immediate
  void fmov(ARMEmitter::ScalarRegSize size, ARMEmitter::VRegister rd, float Value) {
    uint32_t M = 0;
    uint32_t S = 0;
    uint32_t ptype;
    uint32_t imm8;
    uint32_t imm5 = 0b0'0000;
    if (size == ARMEmitter::ScalarRegSize::i16Bit) {
      LOGMAN_MSG_A_FMT("Unsupported");
      FEX_UNREACHABLE;
    } else if (size == ARMEmitter::ScalarRegSize::i32Bit) {
      ptype = 0b00;
      imm8 = FP32ToImm8(Value);
    } else if (size == ARMEmitter::ScalarRegSize::i64Bit) {
      ptype = 0b01;
      imm8 = FP64ToImm8(Value);
    } else {
      FEX_UNREACHABLE;
    }

    FloatScalarImmediate(M, S, ptype, imm8, imm5, rd);
  }

  void FloatScalarImmediate(uint32_t M, uint32_t S, uint32_t ptype, uint32_t imm8, uint32_t imm5, ARMEmitter::VRegister rd) {
    constexpr uint32_t Op = 0b0001'1110'0010'0000'0001'00 << 10;
    uint32_t Instr = Op;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= imm8 << 13;
    Instr |= imm5 << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Floating-point conditional compare
  void fccmp(SRegister rn, SRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b00, 0b0, rn.V(), rm.V(), flags, Cond);
  }
  void fccmpe(SRegister rn, SRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b00, 0b1, rn.V(), rm.V(), flags, Cond);
  }
  void fccmp(DRegister rn, DRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b01, 0b0, rn.V(), rm.V(), flags, Cond);
  }
  void fccmpe(DRegister rn, DRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b01, 0b1, rn.V(), rm.V(), flags, Cond);
  }
  void fccmp(HRegister rn, HRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b11, 0b0, rn.V(), rm.V(), flags, Cond);
  }
  void fccmpe(HRegister rn, HRegister rm, StatusFlags flags, Condition Cond) {
    FloatConditionalCompare(0, 0, 0b11, 0b1, rn.V(), rm.V(), flags, Cond);
  }

  // Floating-point data-processing (2 source)
  void fmul(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0000, rd, rn, rm);
  }
  void fdiv(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0001, rd, rn, rm);
  }
  void fadd(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0010, rd, rn, rm);
  }
  void fsub(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0011, rd, rn, rm);
  }
  void fmax(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0100, rd, rn, rm);
  }
  void fmin(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0101, rd, rn, rm);
  }
  void fmaxnm(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0110, rd, rn, rm);
  }
  void fminnm(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b0111, rd, rn, rm);
  }
  void fnmul(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm) {
    Float2Source(size, 0, 0, 0b1000, rd, rn, rm);
  }

  void fmul(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0000, rd.V(), rn.V(), rm.V());
  }
  void fdiv(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0001, rd.V(), rn.V(), rm.V());
  }
  void fadd(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0010, rd.V(), rn.V(), rm.V());
  }
  void fsub(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0011, rd.V(), rn.V(), rm.V());
  }
  void fmax(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0100, rd.V(), rn.V(), rm.V());
  }
  void fmin(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0101, rd.V(), rn.V(), rm.V());
  }
  void fmaxnm(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0110, rd.V(), rn.V(), rm.V());
  }
  void fminnm(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b0111, rd.V(), rn.V(), rm.V());
  }
  void fnmul(SRegister rd, SRegister rn, SRegister rm) {
    Float2Source(0, 0, 0b00, 0b1000, rd.V(), rn.V(), rm.V());
  }

  void fmul(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0000, rd.V(), rn.V(), rm.V());
  }
  void fdiv(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0001, rd.V(), rn.V(), rm.V());
  }
  void fadd(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0010, rd.V(), rn.V(), rm.V());
  }
  void fsub(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0011, rd.V(), rn.V(), rm.V());
  }
  void fmax(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0100, rd.V(), rn.V(), rm.V());
  }
  void fmin(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0101, rd.V(), rn.V(), rm.V());
  }
  void fmaxnm(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0110, rd.V(), rn.V(), rm.V());
  }
  void fminnm(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b0111, rd.V(), rn.V(), rm.V());
  }
  void fnmul(DRegister rd, DRegister rn, DRegister rm) {
    Float2Source(0, 0, 0b01, 0b1000, rd.V(), rn.V(), rm.V());
  }

  void fmul(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0000, rd.V(), rn.V(), rm.V());
  }
  void fdiv(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0001, rd.V(), rn.V(), rm.V());
  }
  void fadd(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0010, rd.V(), rn.V(), rm.V());
  }
  void fsub(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0011, rd.V(), rn.V(), rm.V());
  }
  void fmax(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0100, rd.V(), rn.V(), rm.V());
  }
  void fmin(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0101, rd.V(), rn.V(), rm.V());
  }
  void fmaxnm(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0110, rd.V(), rn.V(), rm.V());
  }
  void fminnm(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b0111, rd.V(), rn.V(), rm.V());
  }
  void fnmul(HRegister rd, HRegister rn, HRegister rm) {
    Float2Source(0, 0, 0b11, 0b1000, rd.V(), rn.V(), rm.V());
  }

  // Floating-point conditional select
  void fcsel(ScalarRegSize size, VRegister rd, VRegister rn, VRegister rm, Condition Cond) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i16Bit || size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit,
                       "Invalid size selected for {}", __func__);

    const uint32_t ConvertedSize = size == ScalarRegSize::i64Bit ? 0b01 : size == ScalarRegSize::i32Bit ? 0b00 : 0b11;

    FloatConditionalSelect(0, 0, ConvertedSize, rd, rn, rm, Cond);
  }

  void fcsel(SRegister rd, SRegister rn, SRegister rm, Condition Cond) {
    FloatConditionalSelect(0, 0, 0b00, rd.V(), rn.V(), rm.V(), Cond);
  }
  void fcsel(DRegister rd, DRegister rn, DRegister rm, Condition Cond) {
    FloatConditionalSelect(0, 0, 0b01, rd.V(), rn.V(), rm.V(), Cond);
  }
  void fcsel(HRegister rd, HRegister rn, HRegister rm, Condition Cond) {
    FloatConditionalSelect(0, 0, 0b11, rd.V(), rn.V(), rm.V(), Cond);
  }

  // Floating-point data-processing (3 source)
  void fmadd(SRegister rd, SRegister rn, SRegister rm, SRegister ra) {
    Float3Source(0, 0, 0b00, 0, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fmsub(SRegister rd, SRegister rn, SRegister rm, SRegister ra) {
    Float3Source(0, 0, 0b00, 0, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmadd(SRegister rd, SRegister rn, SRegister rm, SRegister ra) {
    Float3Source(0, 0, 0b00, 1, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmsub(SRegister rd, SRegister rn, SRegister rm, SRegister ra) {
    Float3Source(0, 0, 0b00, 1, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }

  void fmadd(DRegister rd, DRegister rn, DRegister rm, DRegister ra) {
    Float3Source(0, 0, 0b01, 0, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fmsub(DRegister rd, DRegister rn, DRegister rm, DRegister ra) {
    Float3Source(0, 0, 0b01, 0, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmadd(DRegister rd, DRegister rn, DRegister rm, DRegister ra) {
    Float3Source(0, 0, 0b01, 1, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmsub(DRegister rd, DRegister rn, DRegister rm, DRegister ra) {
    Float3Source(0, 0, 0b01, 1, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }

  void fmadd(HRegister rd, HRegister rn, HRegister rm, HRegister ra) {
    Float3Source(0, 0, 0b11, 0, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fmsub(HRegister rd, HRegister rn, HRegister rm, HRegister ra) {
    Float3Source(0, 0, 0b11, 0, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmadd(HRegister rd, HRegister rn, HRegister rm, HRegister ra) {
    Float3Source(0, 0, 0b11, 1, 0, rd.V(), rn.V(), rm.V(), ra.V());
  }
  void fnmsub(HRegister rd, HRegister rn, HRegister rm, HRegister ra) {
    Float3Source(0, 0, 0b11, 1, 1, rd.V(), rn.V(), rm.V(), ra.V());
  }

private:
  // Advanced SIMD scalar copy
  void ASIMDScalarCopy(uint32_t Q, uint32_t b28, uint32_t imm5, uint32_t imm4, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0000'1110'0000'0000'0000'01U << 10;
    Instr |= Q << 30;
    Instr |= b28 << 28;
    Instr |= imm5 << 16;
    Instr |= imm4 << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD scalar three same FP16
  void ASIMDScalarThreeSameFP16(uint32_t U, uint32_t a, uint32_t opcode, HRegister rm, HRegister rn, HRegister rd) {
    uint32_t Instr = 0b0101'1110'0100'0000'0000'0100'0000'0000;

    Instr |= U << 29;
    Instr |= a << 23;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }
  // Advanced SIMD scalar two-register miscellaneous FP16
  void ASIMDScalarTwoRegMiscFP16(uint32_t U, uint32_t a, uint32_t opcode, HRegister rn, HRegister rd) {
    uint32_t Instr = 0b0101'1110'0111'1000'0000'1000'0000'0000;

    Instr |= U << 29;
    Instr |= a << 23;
    Instr |= opcode << 12;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Advanced SIMD scalar three same extra
  void ASIMDScalarThreeSameExtra(uint32_t U, ScalarRegSize size, uint32_t opcode, VRegister rm, VRegister rn, VRegister rd) {
    uint32_t Instr = 0b0101'1110'0000'0000'1000'0100'0000'0000;
    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Advanced SIMD scalar two-register miscellaneous
  void ASIMDScalar2RegMisc(uint32_t b20, uint32_t U, ScalarRegSize size, uint32_t opcode, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0101'1110'0010'0000'0000'1000'0000'0000;

    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= b20 << 20;
    Instr |= opcode << 12;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Advanced SIMD scalar three different
  void ASIMD3RegDifferent(uint32_t U, ScalarRegSize size, uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0101'1110'0010'0000'0000'0000'0000'0000;

    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }
  // Advanced SIMD scalar three same
  void ASIMD3RegSame(uint32_t U, ScalarRegSize size, uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0101'1110'0010'0000'0000'0100'0000'0000;

    Instr |= U << 29;
    Instr |= FEXCore::ToUnderlying(size) << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }
  // Advanced SIMD scalar shift by immediate
  void ASIMDScalarShiftByImm(uint32_t U, uint32_t immh, uint32_t immb, uint32_t opcode, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0101'1111'0000'0000'0000'0100'0000'0000;

    Instr |= U << 29;
    Instr |= immh << 19;
    Instr |= immb << 16;
    Instr |= opcode << 11;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

  // Advanced SIMD scalar x indexed element
  void ASIMDScalarXIndexedElement(uint32_t U, ScalarRegSize size, uint32_t opcode, VRegister rm, VRegister rn, VRegister rd, uint32_t index) {
    LOGMAN_THROW_A_FMT(size != ScalarRegSize::i8Bit, "Scalar size must not be 8-bit");

    const auto invalid_bound = 16U >> FEXCore::ToUnderlying(size);
    LOGMAN_THROW_A_FMT(index < invalid_bound, "Index ({}) must be within [0-{}]", index, invalid_bound - 1);

    uint32_t Instr = 0b0101'1111'0000'0000'0000'0000'0000'0000;

    // FMUL/FMLA/FMLS indexed variants deal with size differently.
    if (opcode == 0b0001 || opcode == 0b0101 || opcode == 0b1001) {
      // Unlike other instructions in the group, 16-bit is encoded as zero
      // and 32/64-bit are encoded with the top bit always set to one.
      if (size != ScalarRegSize::i16Bit) {
        Instr |= (0b10 | (FEXCore::ToUnderlying(size) & 1)) << 22;
      }
    } else {
      Instr |= FEXCore::ToUnderlying(size) << 22;
    }

    uint32_t H = 0;
    uint32_t LM = 0;
    if (size == ScalarRegSize::i16Bit) {
      LOGMAN_THROW_A_FMT(rm <= VReg::v15, "rm ({}) must be within [v0-v15]", rm.Idx());
      H = (index >> 2) & 1;
      LM = index & 0b11;
    } else if (size == ScalarRegSize::i32Bit) {
      H = (index >> 1) & 1;
      LM = (index & 0b01) << 1;
    } else {
      H = index & 1;
    }

    Instr |= U << 29;
    Instr |= LM << 20;
    Instr |= rm.Idx() << 16;
    Instr |= opcode << 12;
    Instr |= H << 11;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Floating-point data-processing (1 source)
  void Float1Source(uint32_t M, uint32_t S, uint32_t ptype, uint32_t opcode, VRegister rd, VRegister rn) {
    uint32_t Instr = 0b0001'1110'0010'0000'0100'0000'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= opcode << 15;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }
  void Float1Source(ScalarRegSize size, uint32_t M, uint32_t S, uint32_t opcode, VRegister rd, VRegister rn) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i16Bit || size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit,
                       "Invalid size selected for {}", __func__);

    const uint32_t ConvertedSize = size == ScalarRegSize::i64Bit ? 0b01 : size == ScalarRegSize::i32Bit ? 0b00 : 0b11;

    Float1Source(M, S, ConvertedSize, opcode, rd, rn);
  }

  // Floating-point compare
  void FloatCompare(uint32_t M, uint32_t S, uint32_t ftype, uint32_t op, uint32_t opcode2, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0001'1110'0010'0000'0010'0000'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ftype << 22;
    Instr |= Encode_rm(rm);
    Instr |= op << 14;
    Instr |= Encode_rn(rn);
    Instr |= opcode2;

    dc32(Instr);
  }
  // Floating-point immediate
  // XXX:
  // Floating-point conditional compare
  void FloatConditionalCompare(uint32_t M, uint32_t S, uint32_t ptype, uint32_t op, VRegister rn, VRegister rm, StatusFlags flags, Condition Cond) {
    uint32_t Instr = 0b0001'1110'0010'0000'0000'0100'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= Encode_rm(rm);
    Instr |= FEXCore::ToUnderlying(Cond) << 12;
    Instr |= Encode_rn(rn);
    Instr |= op << 4;
    Instr |= FEXCore::ToUnderlying(flags);

    dc32(Instr);
  }
  // Floating-point data-processing (2 source)

  void Float2Source(uint32_t M, uint32_t S, uint32_t ptype, uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    uint32_t Instr = 0b0001'1110'0010'0000'0000'1000'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= Encode_rm(rm);
    Instr |= opcode << 12;
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);

    dc32(Instr);
  }

  void Float2Source(ScalarRegSize size, uint32_t M, uint32_t S, uint32_t opcode, VRegister rd, VRegister rn, VRegister rm) {
    LOGMAN_THROW_A_FMT(size == ScalarRegSize::i16Bit || size == ScalarRegSize::i64Bit || size == ScalarRegSize::i32Bit,
                       "Invalid size selected for {}", __func__);

    const uint32_t ConvertedSize = size == ScalarRegSize::i64Bit ? 0b01 : size == ScalarRegSize::i32Bit ? 0b00 : 0b11;

    Float2Source(M, S, ConvertedSize, opcode, rd, rn, rm);
  }

  // Floating-point conditional select
  void FloatConditionalSelect(uint32_t M, uint32_t S, uint32_t ptype, VRegister rd, VRegister rn, VRegister rm, Condition Cond) {
    uint32_t Instr = 0b0001'1110'0010'0000'0000'1100'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= rm.Idx() << 16;
    Instr |= FEXCore::ToUnderlying(Cond) << 12;
    Instr |= rn.Idx() << 5;
    Instr |= rd.Idx();
    dc32(Instr);
  }

  // Floating-point data-processing (3 source)
  void Float3Source(uint32_t M, uint32_t S, uint32_t ptype, uint32_t o1, uint32_t o0, VRegister rd, VRegister rn, VRegister rm, VRegister ra) {
    uint32_t Instr = 0b0001'1111'0000'0000'0000'0000'0000'0000;

    Instr |= M << 31;
    Instr |= S << 29;
    Instr |= ptype << 22;
    Instr |= o1 << 21;
    Instr |= Encode_rm(rm);
    Instr |= o0 << 15;
    Instr |= Encode_ra(ra);
    Instr |= Encode_rn(rn);
    Instr |= Encode_rd(rd);
    dc32(Instr);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/SystemOps.inl
================================================
// SPDX-License-Identifier: MIT
/* System instruction emitters.
 *
 * This is mostly a mashup of various instruction types.
 * Nothing follows an explicit pattern since they are mostly different.
 */

#pragma once
#ifndef INCLUDED_BY_EMITTER
#include <CodeEmitter/Emitter.h>
namespace ARMEmitter {
struct EmitterOps : Emitter {
#endif

public:
  // Reserved
  void udf(uint32_t Imm) {
    LOGMAN_THROW_A_FMT(Imm < 0x1'0000, "Immediate needs to be 16-bit");
    dc32(Imm);
  }

  // System with result
  // TODO: SYSL
  // System Instruction
  // TODO: AT
  // TODO: CFP
  // TODO: CPP
  void dc(ARMEmitter::DataCacheOperation DCOp, ARMEmitter::Register rt) {
    constexpr uint32_t Op = 0b1101'0101'0000'1000'0111 << 12;
    SystemInstruction(Op, 0, FEXCore::ToUnderlying(DCOp), rt);
  }
  // TODO: DVP
  // TODO: IC
  // TODO: TLBI

  // Exception generation
  void svc(uint32_t Imm) {
    ExceptionGeneration(0b000, 0b000, 0b01, Imm);
  }
  void hvc(uint32_t Imm) {
    ExceptionGeneration(0b000, 0b000, 0b10, Imm);
  }
  void smc(uint32_t Imm) {
    ExceptionGeneration(0b000, 0b000, 0b11, Imm);
  }
  void brk(uint32_t Imm) {
    ExceptionGeneration(0b001, 0b000, 0b00, Imm);
  }
  void hlt(uint32_t Imm) {
    ExceptionGeneration(0b010, 0b000, 0b00, Imm);
  }
  void tcancel(uint32_t Imm) {
    ExceptionGeneration(0b011, 0b000, 0b00, Imm);
  }
  void dcps1(uint32_t Imm) {
    ExceptionGeneration(0b101, 0b000, 0b01, Imm);
  }
  void dcps2(uint32_t Imm) {
    ExceptionGeneration(0b101, 0b000, 0b10, Imm);
  }
  void dcps3(uint32_t Imm) {
    ExceptionGeneration(0b101, 0b000, 0b11, Imm);
  }
  // System instructions with register argument
  void wfet(ARMEmitter::Register rt) {
    SystemInstructionWithReg(0b0000, 0b000, rt);
  }
  void wfit(ARMEmitter::Register rt) {
    SystemInstructionWithReg(0b0000, 0b001, rt);
  }

  // Hints
  void nop() {
    Hint(ARMEmitter::HintRegister::NOP);
  }
  void yield() {
    Hint(ARMEmitter::HintRegister::YIELD);
  }
  void wfe() {
    Hint(ARMEmitter::HintRegister::WFE);
  }
  void wfi() {
    Hint(ARMEmitter::HintRegister::WFI);
  }
  void sev() {
    Hint(ARMEmitter::HintRegister::SEV);
  }
  void sevl() {
    Hint(ARMEmitter::HintRegister::SEVL);
  }
  void dgh() {
    Hint(ARMEmitter::HintRegister::DGH);
  }
  void csdb() {
    Hint(ARMEmitter::HintRegister::CSDB);
  }

  // Barriers
  void clrex(uint32_t imm = 15) {
    LOGMAN_THROW_A_FMT(imm < 16, "Immediate out of range");
    Barrier(ARMEmitter::BarrierRegister::CLREX, imm);
  }
  void dsb(ARMEmitter::BarrierScope Scope) {
    Barrier(ARMEmitter::BarrierRegister::DSB, FEXCore::ToUnderlying(Scope));
  }
  void dmb(ARMEmitter::BarrierScope Scope) {
    Barrier(ARMEmitter::BarrierRegister::DMB, FEXCore::ToUnderlying(Scope));
  }
  void isb() {
    Barrier(ARMEmitter::BarrierRegister::ISB, FEXCore::ToUnderlying(ARMEmitter::BarrierScope::SY));
  }
  void sb() {
    Barrier(ARMEmitter::BarrierRegister::SB, 0);
  }
  void tcommit() {
    Barrier(ARMEmitter::BarrierRegister::TCOMMIT, 0);
  }

  // System register move
  void msr(ARMEmitter::SystemRegister reg, ARMEmitter::Register rt) {
    constexpr uint32_t Op = 0b1101'0101'0001 << 20;
    SystemRegisterMove(Op, rt, reg);
  }

  void mrs(ARMEmitter::Register rd, ARMEmitter::SystemRegister reg) {
    constexpr uint32_t Op = 0b1101'0101'0011 << 20;
    SystemRegisterMove(Op, rd, reg);
  }

private:

  // Exception Generation
  void ExceptionGeneration(uint32_t opc, uint32_t op2, uint32_t LL, uint32_t Imm) {
    LOGMAN_THROW_A_FMT((Imm & 0xFFFF'0000) == 0, "Imm amount too large");

    uint32_t Instr = 0b1101'0100 << 24;

    Instr |= opc << 21;
    Instr |= Imm << 5;
    Instr |= op2 << 2;
    Instr |= LL;

    dc32(Instr);
  }

  // System instructions with register argument
  void SystemInstructionWithReg(uint32_t CRm, uint32_t op2, ARMEmitter::Register rt) {
    uint32_t Instr = 0b1101'0101'0000'0011'0001 << 12;

    Instr |= CRm << 8;
    Instr |= op2 << 5;
    Instr |= Encode_rt(rt);
    dc32(Instr);
  }

  // Hints
  void Hint(ARMEmitter::HintRegister Reg) {
    uint32_t Instr = 0b1101'0101'0000'0011'0010'0000'0001'1111U;
    Instr |= FEXCore::ToUnderlying(Reg);
    dc32(Instr);
  }
  // Barriers
  void Barrier(ARMEmitter::BarrierRegister Reg, uint32_t CRm) {
    uint32_t Instr = 0b1101'0101'0000'0011'0011'0000'0001'1111U;
    Instr |= CRm << 8;
    Instr |= FEXCore::ToUnderlying(Reg);
    dc32(Instr);
  }

  // System Instruction
  void SystemInstruction(uint32_t Op, uint32_t L, uint32_t SubOp, ARMEmitter::Register rt) {
    uint32_t Instr = Op;

    Instr |= L << 21;
    Instr |= SubOp;
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }

  // System register move
  void SystemRegisterMove(uint32_t Op, ARMEmitter::Register rt, ARMEmitter::SystemRegister reg) {
    uint32_t Instr = Op;

    Instr |= FEXCore::ToUnderlying(reg);
    Instr |= Encode_rt(rt);

    dc32(Instr);
  }

#ifndef INCLUDED_BY_EMITTER
}; // struct LoadstoreEmitterOps
} // namespace ARMEmitter
#endif


================================================
FILE: CodeEmitter/CodeEmitter/VixlUtils.inl
================================================
// Collection of utilities from vixl.
// Following is the vixl license.
// Copyright 2015, VIXL authors
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
//   * Redistributions of source code must retain the above copyright notice,
//     this list of conditions and the following disclaimer.
//   * Redistributions in binary form must reproduce the above copyright notice,
//     this list of conditions and the following disclaimer in the documentation
//     and/or other materials provided with the distribution.
//   * Neither the name of ARM Limited nor the names of its contributors may be
//     used to endorse or promote products derived from this software without
//     specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


// Test if a given value can be encoded in the immediate field of a logical
// instruction.
// If it can be encoded, the function returns true, and values pointed to by n,
// imm_s and imm_r are updated with immediates encoded in the format required
// by the corresponding fields in the logical instruction.
// If it can not be encoded, the function returns false, and the values pointed
// to by n, imm_s and imm_r are undefined.
static bool IsImmLogical(uint64_t value, unsigned width, unsigned* n = nullptr, unsigned* imm_s = nullptr, unsigned* imm_r = nullptr) {
  [[maybe_unused]] constexpr auto kBRegSize = 8;
  [[maybe_unused]] constexpr auto kHRegSize = 16;
  [[maybe_unused]] constexpr auto kSRegSize = 32;
  [[maybe_unused]] constexpr auto kDRegSize = 64;

  constexpr auto kWRegSize = 32;

  LOGMAN_THROW_A_FMT((width == kBRegSize) || (width == kHRegSize) || (width == kSRegSize) || (width == kDRegSize), "Unexpected imm size");

  bool negate = false;

  // Logical immediates are encoded using parameters n, imm_s and imm_r using
  // the following table:
  //
  //    N   imms    immr    size        S             R
  //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
  //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
  //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
  //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
  //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
  //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
  // (s bits must not be all set)
  //
  // A pattern is constructed of size bits, where the least significant S+1 bits
  // are set. The pattern is rotated right by R, and repeated across a 32 or
  // 64-bit value, depending on destination register width.
  //
  // Put another way: the basic format of a logical immediate is a single
  // contiguous stretch of 1 bits, repeated across the whole word at intervals
  // given by a power of 2. To identify them quickly, we first locate the
  // lowest stretch of 1 bits, then the next 1 bit above that; that combination
  // is different for every logical immediate, so it gives us all the
  // information we need to identify the only logical immediate that our input
  // could be, and then we simply check if that's the value we actually have.
  //
  // (The rotation parameter does give the possibility of the stretch of 1 bits
  // going 'round the end' of the word. To deal with that, we observe that in
  // any situation where that happens the bitwise NOT of the value is also a
  // valid logical immediate. So we simply invert the input whenever its low bit
  // is set, and then we know that the rotated case can't arise.)

  if (value & 1) {
    // If the low bit is 1, negate the value, and set a flag to remember that we
    // did (so that we can adjust the return values appropriately).
    negate = true;
    value = ~value;
  }

  if (width <= kWRegSize) {
    // To handle 8/16/32-bit logical immediates, the very easiest thing is to repeat
    // the input value to fill a 64-bit word. The correct encoding of that as a
    // logical immediate will also be the correct encoding of the value.

    // Avoid making the assumption that the most-significant 56/48/32 bits are zero by
    // shifting the value left and duplicating it.
    for (unsigned bits = width; bits <= kWRegSize; bits *= 2) {
      value <<= bits;
      uint64_t mask = (UINT64_C(1) << bits) - 1;
      value |= ((value >> bits) & mask);
    }
  }

  // The basic analysis idea: imagine our input word looks like this.
  //
  //    0011111000111110001111100011111000111110001111100011111000111110
  //                                                          c  b    a
  //                                                          |<--d-->|
  //
  // We find the lowest set bit (as an actual power-of-2 value, not its index)
  // and call it a. Then we add a to our original number, which wipes out the
  // bottommost stretch of set bits and replaces it with a 1 carried into the
  // next zero bit. Then we look for the new lowest set bit, which is in
  // position b, and subtract it, so now our number is just like the original
  // but with the lowest stretch of set bits completely gone. Now we find the
  // lowest set bit again, which is position c in the diagram above. Then we'll
  // measure the distance d between bit positions a and c (using CLZ), and that
  // tells us that the only valid logical immediate that could possibly be equal
  // to this number is the one in which a stretch of bits running from a to just
  // below b is replicated every d bits.
  uint64_t a = LowestSetBit(value);
  uint64_t value_plus_a = value + a;
  uint64_t b = LowestSetBit(value_plus_a);
  uint64_t value_plus_a_minus_b = value_plus_a - b;
  uint64_t c = LowestSetBit(value_plus_a_minus_b);

  int d, clz_a, out_n;
  uint64_t mask;

  if (c != 0) {
    // The general case, in which there is more than one stretch of set bits.
    // Compute the repeat distance d, and set up a bitmask covering the basic
    // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
    // of these cases the N bit of the output will be zero.
    clz_a = std::countl_zero(a);
    int clz_c = std::countl_zero(c);
    d = clz_a - clz_c;
    mask = ((UINT64_C(1) << d) - 1);
    out_n = 0;
  } else {
    // Handle degenerate cases.
    //
    // If any of those 'find lowest set bit' operations didn't find a set bit at
    // all, then the word will have been zero thereafter, so in particular the
    // last lowest_set_bit operation will have returned zero. So we can test for
    // all the special case conditions in one go by seeing if c is zero.
    if (a == 0) {
      // The input was zero (or all 1 bits, which will come to here too after we
      // inverted it at the start of the function), for which we just return
      // false.
      return false;
    } else {
      // Otherwise, if c was zero but a was not, then there's just one stretch
      // of set bits in our word, meaning that we have the trivial case of
      // d == 64 and only one 'repetition'. Set up all the same variables as in
      // the general case above, and set the N bit in the output.
      clz_a = std::countl_zero(a);
      d = 64;
      mask = ~UINT64_C(0);
      out_n = 1;
    }
  }

  // If the repeat period d is not a power of two, it can't be encoded.
  if (!std::has_single_bit(uint32_t(d))) {
    return false;
  }

  if (((b - a) & ~mask) != 0) {
    // If the bit stretch (b - a) does not fit within the mask derived from the
    // repeat period, then fail.
    return false;
  }

  // The only possible option is b - a repeated every d bits. Now we're going to
  // actually construct the valid logical immediate derived from that
  // specification, and see if it equals our original input.
  //
  // To repeat a value every d bits, we multiply it by a number of the form
  // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
  // be derived using a table lookup on CLZ(d).
  static const uint64_t multipliers[] = {
    0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL, 0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
  };
  uint64_t multiplier = multipliers[std::countl_zero(uint64_t(d)) - 57];
  uint64_t candidate = (b - a) * multiplier;

  if (value != candidate) {
    // The candidate pattern doesn't match our input value, so fail.
    return false;
  }

  // We have a match! This is a valid logical immediate, so now we have to
  // construct the bits and pieces of the instruction encoding that generates
  // it.

  // Count the set bits in our basic stretch. The special case of clz(0) == -1
  // makes the answer come out right for stretches that reach the very top of
  // the word (e.g. numbers like 0xffffc00000000000).
  int clz_b = (b == 0) ? -1 : std::countl_zero(b);
  int s = clz_a - clz_b;

  // Decide how many bits to rotate right by, to put the low bit of that basic
  // stretch in position a.
  int r;
  if (negate) {
    // If we inverted the input right at the start of this function, here's
    // where we compensate: the number of set bits becomes the number of clear
    // bits, and the rotation count is based on position b rather than position
    // a (since b is the location of the 'lowest' 1 bit after inversion).
    s = d - s;
    r = (clz_b + 1) & (d - 1);
  } else {
    r = (clz_a + 1) & (d - 1);
  }

  // Now we're done, except for having to encode the S output in such a way that
  // it gives both the number of set bits and the length of the repeated
  // segment. The s field is encoded like this:
  //
  //     imms    size        S
  //    ssssss    64    UInt(ssssss)
  //    0sssss    32    UInt(sssss)
  //    10ssss    16    UInt(ssss)
  //    110sss     8    UInt(sss)
  //    1110ss     4    UInt(ss)
  //    11110s     2    UInt(s)
  //
  // So we 'or' (2 * -d) with our computed s to form imms.
  if (n != nullptr) {
    *n = out_n;
  }
  if (imm_s != nullptr) {
    *imm_s = ((2 * -d) | (s - 1)) & 0x3f;
  }
  if (imm_r != nullptr) {
    *imm_r = r;
  }

  return true;
}

static inline bool IsIntN(unsigned n, int64_t x) {
  if (n == 64) {
    return true;
  }
  int64_t limit = INT64_C(1) << (n - 1);
  return (-limit <= x) && (x < limit);
}

static inline bool IsUintN(unsigned n, int64_t x) {
  // Convert to an unsigned integer to avoid implementation-defined behavior.
  return !(static_cast<uint64_t>(x) >> n);
}

// clang-format off
#define INT_1_TO_32_LIST(V)                                                    \
V(1)  V(2)  V(3)  V(4)  V(5)  V(6)  V(7)  V(8)                                 \
V(9)  V(10) V(11) V(12) V(13) V(14) V(15) V(16)                                \
V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24)                                \
V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32)

#define INT_33_TO_63_LIST(V)                                                   \
V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40)                                \
V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48)                                \
V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56)                                \
V(57) V(58) V(59) V(60) V(61) V(62) V(63)

#define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V)

// clang-format on

#define DECLARE_IS_INT_N(N)                \
  static inline bool IsInt##N(int64_t x) { \
    return IsIntN(N, x);                   \
  }

#define DECLARE_IS_UINT_N(N)                \
  static inline bool IsUint##N(int64_t x) { \
    return IsUintN(N, x);                   \
  }

INT_1_TO_63_LIST(DECLARE_IS_INT_N)
INT_1_TO_63_LIST(DECLARE_IS_UINT_N)

#undef DECLARE_IS_INT_N
#undef DECLARE_IS_UINT_N

private:

// Some compilers dislike negating unsigned integers,
// so we provide an equivalent.
template<typename T>
static inline T UnsignedNegate(T value) {
  static_assert(std::is_unsigned<T>::value);
  return ~value + 1;
}

static inline uint64_t LowestSetBit(uint64_t value) {
  return value & UnsignedNegate(value);
}

public:


================================================
FILE: Data/AppConfig/CMakeLists.txt
================================================
file(GLOB CONFIG_SOURCES CONFIGURE_DEPENDS *.json)
file(GLOB GEN_CONFIG_SOURCES CONFIGURE_DEPENDS *.json.in)

# Any application configuration json file gets installed
foreach(CONFIG_SRC ${CONFIG_SOURCES})
  install(FILES ${CONFIG_SRC}
    DESTINATION ${DATA_DIRECTORY}/AppConfig/
    COMPONENT Runtime)
endforeach()

# Any configuration file json file that needs to be generated
# First generate then install it
foreach(GEN_CONFIG_SRC ${GEN_CONFIG_SOURCES})
  # Get the filename only component
  get_filename_component(CONFIG_NAME ${GEN_CONFIG_SRC} NAME_WLE)

  # Configure it
  configure_file(${GEN_CONFIG_SRC} ${CMAKE_BINARY_DIR}/Data/AppConfig/${CONFIG_NAME})

  # Then install the configured json
  install(FILES ${CMAKE_BINARY_DIR}/Data/AppConfig/${CONFIG_NAME}
    DESTINATION ${DATA_DIRECTORY}/AppConfig/
    COMPONENT Runtime)
endforeach()


================================================
FILE: Data/AppConfig/client.json
================================================
{
  "Config": {
    "HideHypervisorBit": "1"
  }
}


================================================
FILE: Data/AppConfig/steamwebhelper.json
================================================
{
  "Comment": "Bypasses libGL's glX and instead sends GLX requests directly via xcb",
  "ThunksDB": {
    "GL": 0
  }
}


================================================
FILE: Data/CI/FEXLinuxTestsThunks.json
================================================
{
  "ThunksDB": {
    "fex_thunk_test": 1
  }
}


================================================
FILE: Data/CI/GLThunks.json
================================================
{
  "ThunksDB": {
    "GL": 1
  }
}


================================================
FILE: Data/CI/VulkanThunks.json
================================================
{
  "ThunksDB": {
    "Vulkan": 1
  }
}


================================================
FILE: Data/CMake/FindZycore.cmake
================================================
# SPDX-License-Identifier: MIT

if (CMAKE_CROSSCOMPILING)
    return()
endif()

include(FindPackageHandleStandardArgs)

find_package(Zycore QUIET CONFIG)

if (Zycore_CONSIDERED_CONFIGS)
    find_package_handle_standard_args(Zycore CONFIG_MODE)
else()
    find_package(PkgConfig QUIET)
    pkg_search_module(Zycore QUIET IMPORTED_TARGET zycore)
    find_package_handle_standard_args(Zycore
        REQUIRED_VARS zycore_LINK_LIBRARIES
        VERSION_VAR zycore_VERSION)

    if (TARGET PkgConfig::zycore)
      add_library(Zycore::Zycore ALIAS PkgConfig::zycore)
    endif()
endif()


================================================
FILE: Data/CMake/FindZydis.cmake
================================================
# SPDX-License-Identifier: MIT

if (CMAKE_CROSSCOMPILING)
    return()
endif()

include(FindPackageHandleStandardArgs)

find_package(Zydis QUIET CONFIG)

if (Zydis_CONSIDERED_CONFIGS)
    find_package_handle_standard_args(Zydis CONFIG_MODE)
else()
    find_package(PkgConfig QUIET)
    pkg_search_module(Zydis QUIET IMPORTED_TARGET zydis)
    find_package_handle_standard_args(Zydis
        REQUIRED_VARS zydis_LINK_LIBRARIES
        VERSION_VAR zydis_VERSION)

    if (TARGET PkgConfig::zydis)
      add_library(Zydis::Zydis ALIAS PkgConfig::zydis)
    endif()
endif()


================================================
FILE: Data/CMake/Findxxhash.cmake
================================================
# SPDX-License-Identifier: MIT

include(FindPackageHandleStandardArgs)

find_package(PkgConfig QUIET)
pkg_search_module(xxhash QUIET IMPORTED_TARGET xxhash libxxhash)
find_package_handle_standard_args(xxhash
    REQUIRED_VARS xxhash_LINK_LIBRARIES
    VERSION_VAR xxhash_VERSION
)

if (xxhash_FOUND AND NOT TARGET xxHash::xxhash)
    if (TARGET PkgConfig::xxhash)
        add_library(xxHash::xxhash ALIAS PkgConfig::xxhash)
    else()
        add_library(xxHash::xxhash ALIAS xxhash)
    endif()
endif()


================================================
FILE: Data/CMake/LinkerGC.cmake
================================================
# SPDX-License-Identifier: MIT

# This applies some common linker options that reduce code size and linking time in Release mode. Namely:
# --gc-sections: Linktime garbage collection, discards unused sections from the final output
# --strip-all  : Similar to running `strip`, discards the symbol table from the final output
# --as-needed  : Only includes libraries that are actually needed in the final output.

macro(LinkerGC target)
  if (CMAKE_BUILD_TYPE MATCHES "RELEASE")
    target_link_options(${target} PRIVATE
      "LINKER:--gc-sections"
      "LINKER:--strip-all"
      "LINKER:--as-needed")
  endif()
endmacro()


================================================
FILE: Data/CMake/cmake_uninstall.cmake.in
================================================
if(NOT EXISTS "@CMAKE_BINARY_DIR@/install_manifest.txt")
  message(FATAL_ERROR "Cannot find install manifest: @CMAKE_BINARY_DIR@/install_manifest.txt")
endif()

file(READ "@CMAKE_BINARY_DIR@/install_manifest.txt" files)
string(REGEX REPLACE "\n" ";" files "${files}")
foreach(file ${files})
  message(STATUS "Uninstalling $ENV{DESTDIR}${file}")
  if(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
    exec_program(
      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
      OUTPUT_VARIABLE rm_out
      RETURN_VALUE rm_retval
      )
    if(NOT "${rm_retval}" STREQUAL 0)
      message(FATAL_ERROR "Problem when removing $ENV{DESTDIR}${file}")
    endif()
  else(IS_SYMLINK "$ENV{DESTDIR}${file}" OR EXISTS "$ENV{DESTDIR}${file}")
    message(STATUS "File $ENV{DESTDIR}${file} does not exist.")
  endif()
endforeach()


================================================
FILE: Data/CMake/toolchain_aarch64.cmake
================================================
# This is a reference AArch64 cross compile script
# Pass in to cmake when building:
# eg: cmake --toolchain ../Data/CMake/toolchain_aarch64.cmake ..
if (NOT DEFINED ENV{SYSROOT})
  message(FATAL_ERROR "Need to have SYSROOT environment variable set")
endif()

set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR aarch64)
set(CMAKE_CROSSCOMPILING TRUE)

# Target triple needs to match the binutils exactly
set(TARGET_TRIPLE aarch64-linux-gnu)
set(CMAKE_C_COMPILER "clang")
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_C_COMPILER_AR "llvm-ar")
set(CMAKE_CXX_COMPILER_AR "llvm-ar")
set(CMAKE_C_COMPILER_RANLIB "llvm-ranlib")
set(CMAKE_CXX_COMPILER_RANLIB "llvm-ranlib")
set(CMAKE_LINKER "ld.lld")

set(CMAKE_C_COMPILER_TARGET ${TARGET_TRIPLE})
set(CMAKE_CXX_COMPILER_TARGET ${TARGET_TRIPLE})

# Set the environment variable SYSROOT to the aarch64 rootfs
set(CMAKE_FIND_ROOT_PATH "$ENV{SYSROOT}")
set(CMAKE_SYSROOT "$ENV{SYSROOT}")

list(APPEND CMAKE_PREFIX_PATH "$ENV{SYSROOT}/usr/lib/${TARGET_TRIPLE}/cmake/")

set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)

set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)


================================================
FILE: Data/CMake/toolchain_mingw.cmake
================================================
set(MINGW_TRIPLE "" CACHE STRING "MinGW compiler target architecture triple")

set(CMAKE_RC_COMPILER ${MINGW_TRIPLE}-windres)
set(CMAKE_C_COMPILER ${MINGW_TRIPLE}-clang)
set(CMAKE_CXX_COMPILER ${MINGW_TRIPLE}-clang++)
set(CMAKE_DLLTOOL ${MINGW_TRIPLE}-dlltool)
set(CMAKE_AR ${MINGW_TRIPLE}-ar)

# Compile everything as static to avoid requiring the MinGW runtime libraries, force page aligned sections so that
# debug symbols work correctly, and disable loop alignment to workaround an LLVM bug
# (https://github.com/llvm/llvm-project/issues/47432)
set(CMAKE_SHARED_LINKER_FLAGS_INIT "-static -static-libgcc -static-libstdc++ -Wl,--file-alignment=4096,/mllvm:-align-loops=1")
set(CMAKE_EXE_LINKER_FLAGS_INIT "-static -static-libgcc -static-libstdc++ -Wl,--file-alignment=4096,/mllvm:-align-loops=1")
set(CMAKE_C_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
set(CMAKE_CXX_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
set(CMAKE_STANDARD_LIBRARIES "" CACHE STRING "" FORCE)
set(CMAKE_SYSTEM_NAME Windows)
set(CMAKE_SYSTEM_PROCESSOR ${MINGW_TRIPLE})

set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)


================================================
FILE: Data/CMake/toolchain_x86_32.cmake
================================================
option(ENABLE_CLANG_THUNKS "Enable building thunks with clang" FALSE)

set(CMAKE_SYSTEM_PROCESSOR i686)

if (ENABLE_CLANG_THUNKS)
  message(STATUS "Enabling thunk clang building. Force enabling LLD as well")

  set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_C_COMPILER clang)
  set(CMAKE_CXX_COMPILER clang++)
  set(CLANG_FLAGS "-target i686-linux-gnu -msse2 -mfpmath=sse")

  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CLANG_FLAGS}")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CLANG_FLAGS}")
else()
  set(CMAKE_C_COMPILER x86_64-linux-gnu-gcc -m32)
  set(CMAKE_CXX_COMPILER x86_64-linux-gnu-g++ -m32)
endif()


================================================
FILE: Data/CMake/toolchain_x86_64.cmake
================================================
option(ENABLE_CLANG_THUNKS "Enable building thunks with clang" FALSE)

set(CMAKE_SYSTEM_PROCESSOR x86_64)

if (ENABLE_CLANG_THUNKS)
  message(STATUS "Enabling thunk clang building. Force enabling LLD as well")

  set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
  set(CMAKE_C_COMPILER clang)
  set(CMAKE_CXX_COMPILER clang++)
  set(CLANG_FLAGS "-target x86_64-linux-gnu")

  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${CLANG_FLAGS}")
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CLANG_FLAGS}")
else()
  set(CMAKE_C_COMPILER x86_64-linux-gnu-gcc)
  set(CMAKE_CXX_COMPILER x86_64-linux-gnu-g++)
endif()


================================================
FILE: Data/CMake/version_to_variables.cmake
================================================
# Extracts a version from the passed in version string in the form of "<Major>.<Minor>.<Patch>".
# If a part of the version is missing then it gets set as zero.
# Version variables returned in:
# ${Package}_VERSION_MAJOR
# ${Package}_VERSION_MINOR
# ${Package}_VERSION_PATCH
function(version_to_variables VERSION _Package)
  string(REPLACE "." ";" VERSION_LIST "${VERSION}")
  list (LENGTH VERSION_LIST VERSION_LEN)
  if (${VERSION_LEN} GREATER 0)
    list(GET VERSION_LIST 0 VERSION_MAJOR)
    set(${_Package}_VERSION_MAJOR ${VERSION_MAJOR} PARENT_SCOPE)
  else()
    set(${_Package}_VERSION_MAJOR 0 PARENT_SCOPE)
  endif()

  if (${VERSION_LEN} GREATER 1)
    list(GET VERSION_LIST 1 VERSION_MINOR)
    set(${_Package}_VERSION_MINOR ${VERSION_MINOR} PARENT_SCOPE)
  else()
    set(${_Package}_VERSION_MINOR 0 PARENT_SCOPE)
  endif()

  if (${VERSION_LEN} GREATER 2)
    list(GET VERSION_LIST 2 VERSION_PATCH)
    set(${_Package}_VERSION_PATCH ${VERSION_PATCH} PARENT_SCOPE)
  else()
    set(${_Package}_VERSION_PATCH 0 PARENT_SCOPE)
  endif()
endfunction()


================================================
FILE: Data/Dockerfile
================================================
# --- Stage 1: Builder ---
FROM ubuntu:22.04 as builder

RUN DEBIAN_FRONTEND="noninteractive" apt-get update
RUN DEBIAN_FRONTEND="noninteractive" apt install -y cmake \
clang-13 llvm-13 nasm ninja-build pkg-config \
libcap-dev libglfw3-dev libepoxy-dev python3-dev libsdl2-dev \
python3 linux-headers-generic  \
git  qtbase5-dev qtdeclarative5-dev lld

RUN git clone --recurse-submodules https://github.com/FEX-Emu/FEX.git
WORKDIR /FEX
RUN mkdir build

ARG CC=clang-13
ARG CXX=clang++-13
RUN cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=Release -DUSE_LINKER=lld -DENABLE_LTO=True -DBUILD_TESTING=False -DENABLE_ASSERTIONS=False -G Ninja .
RUN ninja

WORKDIR /FEX/build

# --- Stage 2: Runner ---
FROM builder as runner

RUN DEBIAN_FRONTEND="noninteractive" apt-get update
RUN DEBIAN_FRONTEND="noninteractive" apt install -y \
libcap-dev libglfw3-dev libepoxy-dev

COPY --from=builder /FEX/Bin/* /usr/bin/

WORKDIR /


================================================
FILE: Data/ThunksDB.json
================================================
{
  "DB": {
    "GL": {
      "Library" : "libGL-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libGL.so",
        "@PREFIX_LIB@/libGL.so.1",
        "@PREFIX_LIB@/libGL.so.1.2.0",
        "@PREFIX_LIB@/libGL.so.1.7.0"
      ]
    },
    "Vulkan": {
      "Library": "libvulkan-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libvulkan.so",
        "@PREFIX_LIB@/libvulkan.so.1",
        "@HOME@/.local/share/Steam/ubuntu12_32/steam-runtime/pinned_libs_64/libvulkan.so.1"
      ]
    },
    "drm": {
      "Library": "libdrm-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libdrm.so",
        "@PREFIX_LIB@/libdrm.so.2",
        "@PREFIX_LIB@/libdrm.so.2.4.0"
      ]
    },
    "asound": {
      "Library": "libasound-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libasound.so",
        "@PREFIX_LIB@/libasound.so.2",
        "@PREFIX_LIB@/libasound.so.2.0.0"
      ]
    },
    "fex_thunk_test": {
      "Library": "libfex_thunk_test-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libfex_thunk_test.so"
      ]
    },
    "WaylandClient": {
      "Library" : "libwayland-client-guest.so",
      "Overlay": [
        "@PREFIX_LIB@/libwayland-client.so",
        "@PREFIX_LIB@/libwayland-client.so.0",
        "@PREFIX_LIB@/libwayland-client.so.0.20.0"
      ]
    }
  }
}


================================================
FILE: Data/binfmts/CMakeLists.txt
================================================
function(GenBinFmt Name)
  # Get the filename only component
  get_filename_component(FMT_NAME ${Name} NAME_WE)

  # Configure it
  configure_file(${Name} ${CMAKE_BINARY_DIR}/Data/binfmts/${FMT_NAME})

  # Then install the configured binfmt
  install(FILES ${CMAKE_BINARY_DIR}/Data/binfmts/${FMT_NAME}
    DESTINATION ${CMAKE_INSTALL_PREFIX}/share/binfmts/
    COMPONENT Runtime)
endfunction()

if (NOT USE_LEGACY_BINFMTMISC)
  configure_file(FEX-x86.conf.in ${CMAKE_BINARY_DIR}/Data/binfmts/FEX-x86.conf)
  configure_file(FEX-x86_64.conf.in ${CMAKE_BINARY_DIR}/Data/binfmts/FEX-x86_64.conf)

  install(FILES ${CMAKE_BINARY_DIR}/Data/binfmts/FEX-x86.conf ${CMAKE_BINARY_DIR}/Data/binfmts/FEX-x86_64.conf
    DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/binfmt.d/
    COMPONENT Runtime)
else()
  GenBinFmt(FEX-x86.in)
  GenBinFmt(FEX-x86_64.in)
endif()


================================================
FILE: Data/binfmts/FEX-x86.conf.in
================================================
:FEX-x86:M:0:\x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00:\xff\xff\xff\xff\xff\xfe\xfe\x00\x00\x00\x00\xff\xff\xff\xff\xff\xfe\xff\xff\xff:@CMAKE_INSTALL_PREFIX@/bin/FEX:POCF


================================================
FILE: Data/binfmts/FEX-x86.in
================================================
package fex
interpreter @CMAKE_INSTALL_PREFIX@/bin/FEX
magic \x7fELF\x01\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x03\x00
offset 0
mask \xff\xff\xff\xff\xff\xfe\xfe\x00\x00\x00\x00\xff\xff\xff\xff\xff\xfe\xff\xff\xff
credentials yes
fix_binary yes
preserve yes


================================================
FILE: Data/binfmts/FEX-x86_64.conf.in
================================================
:FEX-x86_64:M:0:\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00:\xff\xff\xff\xff\xff\xfe\xfe\x00\x00\x00\x00\xff\xff\xff\xff\xff\xfe\xff\xff\xff:@CMAKE_INSTALL_PREFIX@/bin/FEX:POCF


================================================
FILE: Data/binfmts/FEX-x86_64.in
================================================
package fex
interpreter @CMAKE_INSTALL_PREFIX@/bin/FEX
magic \x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00
offset 0
mask \xff\xff\xff\xff\xff\xfe\xfe\x00\x00\x00\x00\xff\xff\xff\xff\xff\xfe\xff\xff\xff
credentials yes
fix_binary yes
preserve yes


================================================
FILE: Data/nix/FEXLinuxTests/shell.nix
================================================
{ pkgs ? import <nixpkgs> { } }:

let
  pkgsCross32 = pkgs.pkgsCross.gnu32;
  pkgsCross64 = pkgs.pkgsCross.gnu64;

  gcc32 = pkgs.writeText "toolchain_nix_gcc_x86_32.txt" ''
    set(CMAKE_SYSTEM_PROCESSOR i686)
    set(CMAKE_C_COMPILER ${pkgsCross32.buildPackages.gcc}/bin/i686-unknown-linux-gnu-gcc)
    set(CMAKE_CXX_COMPILER ${pkgsCross32.buildPackages.gcc}/bin/i686-unknown-linux-gnu-g++)
  '';

  gcc64 = pkgs.writeText "toolchain_nix_gcc_x86_64.txt" ''
    set(CMAKE_SYSTEM_PROCESSOR x86_64)
    set(CMAKE_C_COMPILER ${pkgsCross64.buildPackages.gcc}/bin/x86_64-unknown-linux-gnu-gcc)
    set(CMAKE_CXX_COMPILER ${pkgsCross64.buildPackages.gcc}/bin/x86_64-unknown-linux-gnu-g++)
  '';
in
pkgs.mkShell {
  buildInputs = [
    pkgsCross64.buildPackages.clang
    pkgsCross32.buildPackages.clang
  ];

  shellHook = ''
    if [[ $- == *i* ]]; then
      echo "toolchain32: ${gcc32}"
      echo "toolchain64: ${gcc64}"
      echo ""
      echo "Use \$FEX_CMAKE_TOOLCHAINS to configure CMake."
    fi
  '';

  FEX_CMAKE_TOOLCHAINS = "-DX86_32_TOOLCHAIN_FILE=${gcc32} -DX86_64_TOOLCHAIN_FILE=${gcc64}";
}


================================================
FILE: Data/nix/LibraryForwarding/shell.nix
================================================
{ pkgs ? import <nixpkgs> { } }:

let
  pkgsCross32 = pkgs.pkgsCross.gnu32;
  pkgsCross64 = pkgs.pkgsCross.gnu64;

  devRootFS = pkgs.buildEnv {
    name = "fex-dev-rootfs";
    paths = [
      pkgsCross64.stdenv.cc.libc_dev
      pkgsCross32.stdenv.cc.libc_dev
      pkgsCross64.stdenv.cc.cc
      pkgsCross32.stdenv.cc.cc

      pkgs.alsa-lib.dev
      pkgs.libdrm.dev
      pkgs.libGL.dev
      pkgs.wayland.dev
      pkgs.xorg.libX11.dev
      pkgs.xorg.libxcb.dev
      pkgs.xorg.libXrandr.dev
      pkgs.xorg.libXrender.dev
      pkgs.xorg.xorgproto
    ];
    ignoreCollisions = true;
    pathsToLink = [
      "/include"
      "/lib"
    ];

    postBuild = ''
      mkdir -p $out/usr
      ln -s $out/include $out/usr/
    '';
  };

  toolchain32 = pkgs.writeText "toolchain_nix_x86_32.txt" ''
    set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_SYSTEM_PROCESSOR i686)
    set(CMAKE_C_COMPILER clang)
    set(CMAKE_CXX_COMPILER clang++)
    set(CMAKE_C_COMPILER ${pkgsCross32.buildPackages.clang}/bin/i686-unknown-linux-gnu-clang)
    set(CMAKE_CXX_COMPILER ${pkgsCross32.buildPackages.clang}/bin/i686-unknown-linux-gnu-clang++)
    set(CLANG_FLAGS "-nodefaultlibs -nostartfiles -lstdc++ -target i686-linux-gnu -msse2 -mfpmath=sse --sysroot=${devRootFS} -iwithsysroot/include")
    set(CMAKE_C_FLAGS "''${CMAKE_C_FLAGS} ''${CLANG_FLAGS}")
    set(CMAKE_CXX_FLAGS "''${CMAKE_CXX_FLAGS} ''${CLANG_FLAGS}")
  '';

  toolchain64 = pkgs.writeText "toolchain_nix_x86_64.txt" ''
    set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
    set(CMAKE_SYSTEM_PROCESSOR x86_64)
    set(CMAKE_C_COMPILER clang)
    set(CMAKE_CXX_COMPILER clang++)
    set(CMAKE_C_COMPILER ${pkgsCross64.buildPackages.clang}/bin/x86_64-unknown-linux-gnu-clang)
    set(CMAKE_CXX_COMPILER ${pkgsCross64.buildPackages.clang}/bin/x86_64-unknown-linux-gnu-clang++)
    set(CLANG_FLAGS "-nodefaultlibs -nostartfiles -lstdc++ -target x86_64-linux-gnu --sysroot=${devRootFS} -iwithsysroot/usr/include")
    set(CMAKE_C_FLAGS "''${CMAKE_C_FLAGS} ''${CLANG_FLAGS}")
    set(CMAKE_CXX_FLAGS "''${CMAKE_CXX_FLAGS} ''${CLANG_FLAGS}")
  '';
in
pkgs.mkShell {
  buildInputs = [
    pkgsCross64.buildPackages.clang
    pkgsCross32.buildPackages.clang
  ];

  shellHook = ''
    if [[ $- == *i* ]]; then
      echo "Set up dev RootFS at ${devRootFS}"
      echo "toolchain32: ${toolchain32}"
      echo "toolchain64: ${toolchain64}"
      echo ""
      echo "Use \$FEX_CMAKE_TOOLCHAINS to configure CMake."
    fi
  '';

  FEX_CMAKE_TOOLCHAINS = "-DX86_32_TOOLCHAIN_FILE=${toolchain32} -DX86_64_TOOLCHAIN_FILE=${toolchain64} -DX86_DEV_ROOTFS=${devRootFS}";
  ROOTFS = "${devRootFS}";
}


================================================
FILE: Data/nix/WineOnArm/shell.nix
================================================
{ pkgs ? import <nixpkgs> { } }:

let
  toolchain = pkgs.fetchzip {
    url = "https://github.com/bylaws/llvm-mingw/releases/download/20250920/llvm-mingw-20250920-ucrt-ubuntu-22.04-aarch64.tar.xz";
    sha256 = "sha256-LaojKjC8KzY+soW5u6eoDoXE3qtYk9Ejr7M3enTqRAE=";
  };

  cmakeToolchainFile = pkgs.substitute {
    # Use absolute paths that are discoverable outside of the nix shell
    src = ../../CMake/toolchain_mingw.cmake;
    substitutions = ["--replace-fail" "\${MINGW_TRIPLE}-" "${toolchain}/bin/\${MINGW_TRIPLE}-"];
  };

  mesonCrossFile = pkgs.writeText "crossfile_llvm_mingw.txt" ''
    [binaries]
    ar = '${toolchain}/bin/arm64ec-w64-mingw32-ar'
    c = '${toolchain}/bin/arm64ec-w64-mingw32-gcc'
    cpp = '${toolchain}/bin/arm64ec-w64-mingw32-g++'
    ld = '${toolchain}/bin/arm64ec-w64-mingw32-ld'
    windres = '${toolchain}/bin/arm64ec-w64-mingw32-windres'
    strip = '${toolchain}/bin/strip'
    widl = '${toolchain}/bin/arm64ec-w64-mingw32-widl'
    pkgconfig = 'aarch64-linux-gnu-pkg-config'
    [host_machine]
    system = 'windows'
    cpu_family = 'aarch64'
    cpu = 'aarch64'
    endian = 'little'
  '';
in
pkgs.mkShell {
  buildInputs = [
    toolchain
  ];

  shellHook = ''
    if [[ $- == *i* ]]; then
      echo "llvm-mingw set up at ${toolchain}."
      echo ""
      echo "To configure DXVK/vkd3d-proton: meson setup \$FEX_MESON_CROSSFILE"
      echo ""
      echo "To configure 32-bit FEX build: cmake \$FEX_CMAKE_TOOLCHAIN_WOW64"
      echo "To configure 64-bit FEX build: cmake \$FEX_CMAKE_TOOLCHAIN_ARM64EC"
    fi
  '';

  # E.g. cmake $FEX_CMAKE_TOOLCHAIN_ARM64EC -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_LTO=False -DBUILD_TESTING=False
  FEX_CMAKE_TOOLCHAIN_ARM64EC = "--toolchain ${cmakeToolchainFile} -DMINGW_TRIPLE=arm64ec-w64-mingw32 -DCMAKE_INSTALL_LIBDIR=/usr/lib/wine/aarch64-windows";
  FEX_CMAKE_TOOLCHAIN_WOW64 = "--toolchain ${cmakeToolchainFile} -DMINGW_TRIPLE=aarch64-w64-mingw32 -DCMAKE_INSTALL_LIBDIR=/usr/lib/wine/aarch64-windows";
  FEX_MESON_CROSSFILE = "--cross-file ${mesonCrossFile}";
}


================================================
FILE: Data/nix/cmake_configure_woa32.sh
================================================
#! /usr/bin/env nix-shell
#! nix-shell -i bash WineOnArm/shell.nix

# Helper script to configure CMake for building FEX as library for emulation
# of 32-bit applications in Wine/Proton.
# The required cross-toolchains will be set up and managed by nix.

if [ $# -eq 0 ]
then
  echo "Expected CMake argument list"
  exit 1
fi

if [ -f CMakeCache.txt ]
then
  echo "Expected empty build folder"
  exit 1
fi

set -o xtrace
cmake $FEX_CMAKE_TOOLCHAIN_WOW64 -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_LTO=False -DBUILD_TESTING=False $@


================================================
FILE: Data/nix/cmake_configure_woa64.sh
================================================
#! /usr/bin/env nix-shell
#! nix-shell -i bash WineOnArm/shell.nix

# Helper script to configure CMake for building FEX as library for emulation
# of 64-bit applications in Wine/Proton
# Nix is used to install and manage the required cross-toolchains.

if [ $# -eq 0 ]
then
  echo "Expected CMake argument list"
  exit 1
fi

if [ -f CMakeCache.txt ]
then
  echo "Expected empty build folder"
  exit 1
fi

set -o xtrace
cmake $FEX_CMAKE_TOOLCHAIN_ARM64EC -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DENABLE_LTO=False -DBUILD_TESTING=False $@


================================================
FILE: Data/nix/cmake_enable_flt.sh
================================================
#! /usr/bin/env nix-shell
#! nix-shell -i bash FEXLinuxTests/shell.nix

# Helper script to configure CMake for building FEXLinuxTests.
# Nix is used to install and manage the required cross-toolchains.

if [ ! -f CMakeCache.txt ]
then
  echo "Must be run from a pre-configured CMake build folder"
  exit 1
fi

# Remove previous build to ensure the new toolchain is applied
rm -rf unittests/FEXLinuxTests

set -o xtrace
cmake . $FEX_CMAKE_TOOLCHAINS -DBUILD_TESTING=ON -DBUILD_FEX_LINUX_TESTS=ON


================================================
FILE: Data/nix/cmake_enable_libfwd.sh
================================================
# Helper script to configure CMake for library forwarding in FEX.
# Nix is used to install and manage the required cross-toolchains.

if [ ! -f CMakeCache.txt ]
then
  echo "Must be run from a pre-configured CMake build folder"
  exit 1
fi

# Remove previous build to ensure the new toolchain is applied
rm -rf guest-libs guest-libs-32 Guest Guest_32

# Set clang executable path manually since the one from the nix store
# will be picked up otherwise
CLANG_EXEC_PATH=""
if ! grep -q CLANG_EXEC_PATH CMakeCache.txt
then
  CLANG_EXEC_PATH="-DCLANG_EXEC_PATH=`which clang`"
fi

nix-shell `dirname -- "$0"`/LibraryForwarding/shell.nix \
  --run "set -o xtrace; cmake . \$FEX_CMAKE_TOOLCHAINS -DBUILD_THUNKS=ON $CLANG_EXEC_PATH; set +o xtrace"


================================================
FILE: External/.clang-format
================================================
DisableFormat: true


================================================
FILE: External/SoftFloat-3e/CMakeLists.txt
================================================

add_library(softfloat_3e STATIC
  # F80 support
  src/extF80_add.c
  src/extF80_div.c
  src/extF80_sub.c
  src/extF80_mul.c
  src/extF80_rem.c
  src/extF80_sqrt.c
  src/extF80_le.c
  src/extF80_to_i32.c
  src/extF80_to_i64.c
  src/extF80_to_ui64.c
  src/extF80_to_f32.c
  src/extF80_to_f64.c
  src/i32_to_extF80.c
  src/ui64_to_extF80.c
  src/extF80_to_f128.c
  src/f128_to_extF80.c

  # F128 support
  src/f128_add.c
  src/f128_div.c
  src/f128_eq.c
  src/f128_eq_signaling.c
  src/f128_isSignalingNaN.c
  src/f128_le.c
  src/f128_le_quiet.c
  src/f128_lt.c
  src/f128_lt_quiet.c
  src/f128_mulAdd.c
  src/f128_mul.c
  src/f128_rem.c
  src/f128_sqrt.c
  src/f128_sub.c
  src/f128_to_f16.c
  src/f128_to_f32.c
  src/f128_to_f64.c
  src/f128_to_i32.c
  src/f128_to_i64.c
  src/f128_to_ui32.c
  src/f128_to_ui64.c
  src/s_addMagsF128.c
  src/s_subMagsF128.c
  src/s_normRoundPackToF128.c
  src/s_roundPackToF128.c
  src/s_propagateNaNF128UI.c

  # Conversion
  src/f32_to_f128.c
  src/i32_to_f128.c

  src/s_roundToUI64.c
  src/s_f128UIToCommonNaN.c
  src/s_commonNaNToF128UI.c
  src/s_normSubnormalF128Sig.c
  src/s_roundToI32.c
  src/s_roundToI64.c
  src/s_roundPackToF32.c
  src/s_addMagsExtF80.c
  src/s_extF80UIToCommonNaN.c
  src/s_commonNaNToF32UI.c
  src/s_commonNaNToF64UI.c
  src/s_roundPackToF64.c
  src/s_propagateNaNExtF80UI.c
  src/s_roundPackToExtF80.c
  src/s_normSubnormalExtF80Sig.c
  src/s_subMagsExtF80.c
  src/s_shiftRightJam128.c
  src/s_shiftRightJam128Extra.c
  src/s_normRoundPackToExtF80.c
  src/s_approxRecip_1Ks.c
  src/s_approxRecipSqrt32_1.c
  src/s_approxRecipSqrt_1Ks.c
  src/softfloat_raiseFlags.c
  src/f64_to_extF80.c
  src/s_commonNaNToExtF80UI.c
  src/s_normSubnormalF64Sig.c
  src/s_f64UIToCommonNaN.c
  src/extF80_roundToInt.c
  src/extF80_eq.c
  src/extF80_lt.c
  src/f32_to_extF80.c
  src/s_normSubnormalF32Sig.c
  src/s_f32UIToCommonNaN.c)

if (ARCHITECTURE_arm64 AND HAS_CLANG_PRESERVE_ALL)
  list(APPEND DEFINES "-DFEXCORE_PRESERVE_ALL_ATTR=__attribute__((preserve_all));-DFEXCORE_HAS_PRESERVE_ALL_ATTR=1")
else()
  list(APPEND DEFINES "-DFEXCORE_PRESERVE_ALL_ATTR=;-DFEXCORE_HAS_PRESERVE_ALL_ATTR=0")
endif()

list(APPEND DEFINES "-DSOFTFLOAT_BUILTIN_CLZ=1;-DINLINE=static inline;-DINLINE_LEVEL=4;-DSOFTFLOAT_FAST_INT64=1;-DSOFTFLOAT_FAST_DIV32TO16=1;-DSOFTFLOAT_FAST_DIV64TO32=1")

target_include_directories(softfloat_3e PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/)
target_include_directories(softfloat_3e PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/SoftFloat-3e/)
target_compile_definitions(softfloat_3e PUBLIC ${DEFINES})


================================================
FILE: External/SoftFloat-3e/include/SoftFloat-3e/opts-GCC.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2017 The Regents of the University of California.  All rights
reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef opts_GCC_h
#define opts_GCC_h 1

#ifdef INLINE

#include <stdint.h>
#include "primitiveTypes.h"

#ifdef SOFTFLOAT_BUILTIN_CLZ

INLINE uint_fast8_t softfloat_countLeadingZeros16( uint16_t a )
    { return a ? __builtin_clz( a ) - 16 : 16; }
#define softfloat_countLeadingZeros16 softfloat_countLeadingZeros16

INLINE uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
    { return a ? __builtin_clz( a ) : 32; }
#define softfloat_countLeadingZeros32 softfloat_countLeadingZeros32

INLINE uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
    { return a ? __builtin_clzll( a ) : 64; }
#define softfloat_countLeadingZeros64 softfloat_countLeadingZeros64

#endif

#ifdef SOFTFLOAT_INTRINSIC_INT128

INLINE struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b )
{
    union { unsigned __int128 ui; struct uint128 s; } uZ;
    uZ.ui = (unsigned __int128) a * ((uint_fast64_t) b<<32);
    return uZ.s;
}
#define softfloat_mul64ByShifted32To128 softfloat_mul64ByShifted32To128

INLINE struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
{
    union { unsigned __int128 ui; struct uint128 s; } uZ;
    uZ.ui = (unsigned __int128) a * b;
    return uZ.s;
}
#define softfloat_mul64To128 softfloat_mul64To128

INLINE
struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
{
    union { unsigned __int128 ui; struct uint128 s; } uZ;
    uZ.ui = ((unsigned __int128) a64<<64 | a0) * b;
    return uZ.s;
}
#define softfloat_mul128By32 softfloat_mul128By32

INLINE
void
 softfloat_mul128To256M(
     uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0, uint64_t *zPtr )
{
    unsigned __int128 z0, mid1, mid, z128;
    z0 = (unsigned __int128) a0 * b0;
    mid1 = (unsigned __int128) a64 * b0;
    mid = mid1 + (unsigned __int128) a0 * b64;
    z128 = (unsigned __int128) a64 * b64;
    z128 += (unsigned __int128) (mid < mid1)<<64 | mid>>64;
    mid <<= 64;
    z0 += mid;
    z128 += (z0 < mid);
    zPtr[indexWord( 4, 0 )] = z0;
    zPtr[indexWord( 4, 1 )] = z0>>64;
    zPtr[indexWord( 4, 2 )] = z128;
    zPtr[indexWord( 4, 3 )] = z128>>64;
}
#define softfloat_mul128To256M softfloat_mul128To256M

#endif

#endif

#endif


================================================
FILE: External/SoftFloat-3e/include/SoftFloat-3e/platform.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define LITTLEENDIAN 1

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define SOFTFLOAT_BUILTIN_CLZ 1
#define SOFTFLOAT_INTRINSIC_INT128 1
#define SOFTFLOAT_FAST_INT64 1
#include "opts-GCC.h"


================================================
FILE: External/SoftFloat-3e/include/SoftFloat-3e/primitiveTypes.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef primitiveTypes_h
#define primitiveTypes_h 1

#include <stdint.h>

#ifdef SOFTFLOAT_FAST_INT64

#ifdef LITTLEENDIAN
struct uint128 { uint64_t v0, v64; };
struct uint64_extra { uint64_t extra, v; };
struct uint128_extra { uint64_t extra; struct uint128 v; };
#else
struct uint128 { uint64_t v64, v0; };
struct uint64_extra { uint64_t v, extra; };
struct uint128_extra { struct uint128 v; uint64_t extra; };
#endif

#endif

/*----------------------------------------------------------------------------
| These macros are used to isolate the differences in word order between big-
| endian and little-endian platforms.
*----------------------------------------------------------------------------*/
#ifdef LITTLEENDIAN
#define wordIncr 1
#define indexWord( total, n ) (n)
#define indexWordHi( total ) ((total) - 1)
#define indexWordLo( total ) 0
#define indexMultiword( total, m, n ) (n)
#define indexMultiwordHi( total, n ) ((total) - (n))
#define indexMultiwordLo( total, n ) 0
#define indexMultiwordHiBut( total, n ) (n)
#define indexMultiwordLoBut( total, n ) 0
#define INIT_UINTM4( v3, v2, v1, v0 ) { v0, v1, v2, v3 }
#else
#define wordIncr -1
#define indexWord( total, n ) ((total) - 1 - (n))
#define indexWordHi( total ) 0
#define indexWordLo( total ) ((total) - 1)
#define indexMultiword( total, m, n ) ((total) - 1 - (m))
#define indexMultiwordHi( total, n ) 0
#define indexMultiwordLo( total, n ) ((total) - (n))
#define indexMultiwordHiBut( total, n ) 0
#define indexMultiwordLoBut( total, n ) (n)
#define INIT_UINTM4( v3, v2, v1, v0 ) { v3, v2, v1, v0 }
#endif

#endif


================================================
FILE: External/SoftFloat-3e/include/SoftFloat-3e/softfloat.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/


/*============================================================================
| Note:  If SoftFloat is made available as a general library for programs to
| use, it is strongly recommended that a platform-specific version of this
| header, "softfloat.h", be created that folds in "softfloat_types.h" and that
| eliminates all dependencies on compile-time macros.
*============================================================================*/


#ifndef softfloat_h
#define softfloat_h 1

#include <stdbool.h>
#include <stdint.h>
#include "softfloat_types.h"

/*----------------------------------------------------------------------------
| Routine to raise any or all of the software floating-point exception flags.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_raiseFlags( struct softfloat_state *, uint_fast8_t );

/*----------------------------------------------------------------------------
| Integer-to-floating-point conversion routines.
*----------------------------------------------------------------------------*/
float16_t ui32_to_f16( uint32_t );
float32_t ui32_to_f32( uint32_t );
float64_t ui32_to_f64( uint32_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t ui32_to_extF80( uint32_t );
float128_t ui32_to_f128( uint32_t );
#endif
void ui32_to_extF80M( uint32_t, extFloat80_t * );
void ui32_to_f128M( uint32_t, float128_t * );
float16_t ui64_to_f16( uint64_t );
float32_t ui64_to_f32( uint64_t );
float64_t ui64_to_f64( uint64_t );
#ifdef SOFTFLOAT_FAST_INT64
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t ui64_to_extF80( uint64_t );
float128_t ui64_to_f128( uint64_t );
#endif
void ui64_to_extF80M( uint64_t, extFloat80_t * );
void ui64_to_f128M( uint64_t, float128_t * );
float16_t i32_to_f16( int32_t );
float32_t i32_to_f32( int32_t );
float64_t i32_to_f64( int32_t );
#ifdef SOFTFLOAT_FAST_INT64
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t i32_to_extF80( int32_t );
float128_t i32_to_f128( int32_t );
#endif
void i32_to_extF80M( int32_t, extFloat80_t * );
void i32_to_f128M( int32_t, float128_t * );
float16_t i64_to_f16( int64_t );
float32_t i64_to_f32( int64_t );
float64_t i64_to_f64( int64_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t i64_to_extF80( int64_t );
float128_t i64_to_f128( int64_t );
#endif
void i64_to_extF80M( int64_t, extFloat80_t * );
void i64_to_f128M( int64_t, float128_t * );

/*----------------------------------------------------------------------------
| 16-bit (half-precision) floating-point operations.
*----------------------------------------------------------------------------*/
uint_fast32_t f16_to_ui32( float16_t, uint_fast8_t, bool );
uint_fast64_t f16_to_ui64( float16_t, uint_fast8_t, bool );
int_fast32_t f16_to_i32( float16_t, uint_fast8_t, bool );
int_fast64_t f16_to_i64( float16_t, uint_fast8_t, bool );
uint_fast32_t f16_to_ui32_r_minMag( float16_t, bool );
uint_fast64_t f16_to_ui64_r_minMag( float16_t, bool );
int_fast32_t f16_to_i32_r_minMag( float16_t, bool );
int_fast64_t f16_to_i64_r_minMag( float16_t, bool );
float32_t f16_to_f32( float16_t );
float64_t f16_to_f64( float16_t );
#ifdef SOFTFLOAT_FAST_INT64
extFloat80_t f16_to_extF80( float16_t );
float128_t f16_to_f128( float16_t );
#endif
void f16_to_extF80M( float16_t, extFloat80_t * );
void f16_to_f128M( float16_t, float128_t * );
float16_t f16_roundToInt( float16_t, uint_fast8_t, bool );
float16_t f16_add( float16_t, float16_t );
float16_t f16_sub( float16_t, float16_t );
float16_t f16_mul( float16_t, float16_t );
float16_t f16_mulAdd( float16_t, float16_t, float16_t );
float16_t f16_div( float16_t, float16_t );
float16_t f16_rem( float16_t, float16_t );
float16_t f16_sqrt( float16_t );
bool f16_eq( float16_t, float16_t );
bool f16_le( float16_t, float16_t );
bool f16_lt( float16_t, float16_t );
bool f16_eq_signaling( float16_t, float16_t );
bool f16_le_quiet( float16_t, float16_t );
bool f16_lt_quiet( float16_t, float16_t );
bool f16_isSignalingNaN( float16_t );

/*----------------------------------------------------------------------------
| 32-bit (single-precision) floating-point operations.
*----------------------------------------------------------------------------*/
uint_fast32_t f32_to_ui32( float32_t, uint_fast8_t, bool );
uint_fast64_t f32_to_ui64( float32_t, uint_fast8_t, bool );
int_fast32_t f32_to_i32( float32_t, uint_fast8_t, bool );
int_fast64_t f32_to_i64( float32_t, uint_fast8_t, bool );
uint_fast32_t f32_to_ui32_r_minMag( float32_t, bool );
uint_fast64_t f32_to_ui64_r_minMag( float32_t, bool );
int_fast32_t f32_to_i32_r_minMag( float32_t, bool );
int_fast64_t f32_to_i64_r_minMag( float32_t, bool );
float16_t f32_to_f16( float32_t );
float64_t f32_to_f64( float32_t );
#ifdef SOFTFLOAT_FAST_INT64
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f32_to_extF80( struct softfloat_state *, float32_t );
float128_t f32_to_f128( struct softfloat_state *, float32_t );
#endif
void f32_to_extF80M( float32_t, extFloat80_t * );
void f32_to_f128M( float32_t, float128_t * );
float32_t f32_roundToInt( float32_t, uint_fast8_t, bool );
float32_t f32_add( float32_t, float32_t );
float32_t f32_sub( float32_t, float32_t );
float32_t f32_mul( float32_t, float32_t );
float32_t f32_mulAdd( float32_t, float32_t, float32_t );
float32_t f32_div( float32_t, float32_t );
float32_t f32_rem( float32_t, float32_t );
float32_t f32_sqrt( float32_t );
bool f32_eq( float32_t, float32_t );
bool f32_le( float32_t, float32_t );
bool f32_lt( float32_t, float32_t );
bool f32_eq_signaling( float32_t, float32_t );
bool f32_le_quiet( float32_t, float32_t );
bool f32_lt_quiet( float32_t, float32_t );
bool f32_isSignalingNaN( float32_t );

/*----------------------------------------------------------------------------
| 64-bit (double-precision) floating-point operations.
*----------------------------------------------------------------------------*/
uint_fast32_t f64_to_ui32( float64_t, uint_fast8_t, bool );
uint_fast64_t f64_to_ui64( float64_t, uint_fast8_t, bool );
int_fast32_t f64_to_i32( float64_t, uint_fast8_t, bool );
int_fast64_t f64_to_i64( float64_t, uint_fast8_t, bool );
uint_fast32_t f64_to_ui32_r_minMag( float64_t, bool );
uint_fast64_t f64_to_ui64_r_minMag( float64_t, bool );
int_fast32_t f64_to_i32_r_minMag( float64_t, bool );
int_fast64_t f64_to_i64_r_minMag( float64_t, bool );
float16_t f64_to_f16( float64_t );
float32_t f64_to_f32( float64_t );
#ifdef SOFTFLOAT_FAST_INT64
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f64_to_extF80( struct softfloat_state *, float64_t );
float128_t f64_to_f128( float64_t );
#endif
void f64_to_extF80M( float64_t, extFloat80_t * );
void f64_to_f128M( float64_t, float128_t * );
float64_t f64_roundToInt( float64_t, uint_fast8_t, bool );
float64_t f64_add( float64_t, float64_t );
float64_t f64_sub( float64_t, float64_t );
float64_t f64_mul( float64_t, float64_t );
float64_t f64_mulAdd( float64_t, float64_t, float64_t );
float64_t f64_div( float64_t, float64_t );
float64_t f64_rem( float64_t, float64_t );
float64_t f64_sqrt( float64_t );
bool f64_eq( float64_t, float64_t );
bool f64_le( float64_t, float64_t );
bool f64_lt( float64_t, float64_t );
bool f64_eq_signaling( float64_t, float64_t );
bool f64_le_quiet( float64_t, float64_t );
bool f64_lt_quiet( float64_t, float64_t );
bool f64_isSignalingNaN( float64_t );

/*----------------------------------------------------------------------------
| 80-bit extended double-precision floating-point operations.
*----------------------------------------------------------------------------*/
#ifdef SOFTFLOAT_FAST_INT64
uint_fast32_t extF80_to_ui32( extFloat80_t, uint_fast8_t, bool );
FEXCORE_PRESERVE_ALL_ATTR
uint_fast64_t extF80_to_ui64( struct softfloat_state *, extFloat80_t, uint_fast8_t, bool );
FEXCORE_PRESERVE_ALL_ATTR
int_fast32_t extF80_to_i32( struct softfloat_state *, extFloat80_t, uint_fast8_t, bool );
FEXCORE_PRESERVE_ALL_ATTR
int_fast64_t extF80_to_i64( struct softfloat_state *, extFloat80_t, uint_fast8_t, bool );
uint_fast32_t extF80_to_ui32_r_minMag( extFloat80_t, bool );
uint_fast64_t extF80_to_ui64_r_minMag( extFloat80_t, bool );
int_fast32_t extF80_to_i32_r_minMag( extFloat80_t, bool );
int_fast64_t extF80_to_i64_r_minMag( extFloat80_t, bool );
float16_t extF80_to_f16( extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
float32_t extF80_to_f32( struct softfloat_state *, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
float64_t extF80_to_f64( struct softfloat_state *, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
float128_t extF80_to_f128( struct softfloat_state *, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_roundToInt( struct softfloat_state *, extFloat80_t, uint_fast8_t, bool );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_add( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_sub( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_mul( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_div( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_rem( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_sqrt( struct softfloat_state *, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
bool extF80_eq( struct softfloat_state *, extFloat80_t, extFloat80_t );
bool extF80_le( struct softfloat_state *, extFloat80_t, extFloat80_t );
FEXCORE_PRESERVE_ALL_ATTR
bool extF80_lt( struct softfloat_state *, extFloat80_t, extFloat80_t );
bool extF80_eq_signaling( extFloat80_t, extFloat80_t );
bool extF80_le_quiet( extFloat80_t, extFloat80_t );
bool extF80_lt_quiet( extFloat80_t, extFloat80_t );
bool extF80_isSignalingNaN( extFloat80_t );
static inline extFloat80_t extF80_complement_sign(extFloat80_t a) {
  a.signExp ^= 1ULL << 15;
  return a;
}
#endif
uint_fast32_t extF80M_to_ui32( const extFloat80_t *, uint_fast8_t, bool );
uint_fast64_t extF80M_to_ui64( const extFloat80_t *, uint_fast8_t, bool );
int_fast32_t extF80M_to_i32( const extFloat80_t *, uint_fast8_t, bool );
int_fast64_t extF80M_to_i64( const extFloat80_t *, uint_fast8_t, bool );
uint_fast32_t extF80M_to_ui32_r_minMag( const extFloat80_t *, bool );
uint_fast64_t extF80M_to_ui64_r_minMag( const extFloat80_t *, bool );
int_fast32_t extF80M_to_i32_r_minMag( const extFloat80_t *, bool );
int_fast64_t extF80M_to_i64_r_minMag( const extFloat80_t *, bool );
float16_t extF80M_to_f16( const extFloat80_t * );
float32_t extF80M_to_f32( const extFloat80_t * );
float64_t extF80M_to_f64( const extFloat80_t * );
void extF80M_to_f128M( const extFloat80_t *, float128_t * );
void
 extF80M_roundToInt(
     const extFloat80_t *, uint_fast8_t, bool, extFloat80_t * );
void extF80M_add( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
void extF80M_sub( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
void extF80M_mul( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
void extF80M_div( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
void extF80M_rem( const extFloat80_t *, const extFloat80_t *, extFloat80_t * );
void extF80M_sqrt( const extFloat80_t *, extFloat80_t * );
bool extF80M_eq( const extFloat80_t *, const extFloat80_t * );
bool extF80M_le( const extFloat80_t *, const extFloat80_t * );
bool extF80M_lt( const extFloat80_t *, const extFloat80_t * );
bool extF80M_eq_signaling( const extFloat80_t *, const extFloat80_t * );
bool extF80M_le_quiet( const extFloat80_t *, const extFloat80_t * );
bool extF80M_lt_quiet( const extFloat80_t *, const extFloat80_t * );
bool extF80M_isSignalingNaN( const extFloat80_t * );

/*----------------------------------------------------------------------------
| 128-bit (quadruple-precision) floating-point operations.
*----------------------------------------------------------------------------*/
#ifdef SOFTFLOAT_FAST_INT64
uint_fast32_t f128_to_ui32( struct softfloat_state *, float128_t, uint_fast8_t, bool );
uint_fast64_t f128_to_ui64( struct softfloat_state *, float128_t, uint_fast8_t, bool );
int_fast32_t f128_to_i32( struct softfloat_state *, float128_t, uint_fast8_t, bool );
int_fast64_t f128_to_i64( struct softfloat_state *, float128_t, uint_fast8_t, bool );
uint_fast32_t f128_to_ui32_r_minMag( float128_t, bool );
uint_fast64_t f128_to_ui64_r_minMag( float128_t, bool );
int_fast32_t f128_to_i32_r_minMag( float128_t, bool );
int_fast64_t f128_to_i64_r_minMag( float128_t, bool );
float16_t f128_to_f16( struct softfloat_state *, float128_t );
float32_t f128_to_f32( struct softfloat_state *, float128_t );
float64_t f128_to_f64( struct softfloat_state *, float128_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f128_to_extF80( struct softfloat_state *, float128_t );
float128_t f128_roundToInt( float128_t, uint_fast8_t, bool );
float128_t f128_add( struct softfloat_state *, float128_t, float128_t );
float128_t f128_sub( struct softfloat_state *, float128_t, float128_t );
float128_t f128_mul( struct softfloat_state *, float128_t, float128_t );
float128_t f128_mulAdd( struct softfloat_state *, float128_t, float128_t, float128_t );
float128_t f128_div( struct softfloat_state *, float128_t, float128_t );
float128_t f128_rem( struct softfloat_state *, float128_t, float128_t );
float128_t f128_sqrt( struct softfloat_state *, float128_t );
bool f128_eq( struct softfloat_state *, float128_t, float128_t );
bool f128_le( struct softfloat_state *, float128_t, float128_t );
bool f128_lt( struct softfloat_state *, float128_t, float128_t );
bool f128_eq_signaling( struct softfloat_state *, float128_t, float128_t );
bool f128_le_quiet( struct softfloat_state *, float128_t, float128_t );
bool f128_lt_quiet( struct softfloat_state *, float128_t, float128_t );
bool f128_isSignalingNaN( float128_t );
static inline float128_t f128_complement_sign(float128_t a) {
  a.v[1] ^= 1ULL << 63;
  return a;
}
#endif
uint_fast32_t f128M_to_ui32( const float128_t *, uint_fast8_t, bool );
uint_fast64_t f128M_to_ui64( const float128_t *, uint_fast8_t, bool );
int_fast32_t f128M_to_i32( const float128_t *, uint_fast8_t, bool );
int_fast64_t f128M_to_i64( const float128_t *, uint_fast8_t, bool );
uint_fast32_t f128M_to_ui32_r_minMag( const float128_t *, bool );
uint_fast64_t f128M_to_ui64_r_minMag( const float128_t *, bool );
int_fast32_t f128M_to_i32_r_minMag( const float128_t *, bool );
int_fast64_t f128M_to_i64_r_minMag( const float128_t *, bool );
float16_t f128M_to_f16( const float128_t * );
float32_t f128M_to_f32( const float128_t * );
float64_t f128M_to_f64( const float128_t * );
void f128M_to_extF80M( const float128_t *, extFloat80_t * );
void f128M_roundToInt( const float128_t *, uint_fast8_t, bool, float128_t * );
void f128M_add( const float128_t *, const float128_t *, float128_t * );
void f128M_sub( const float128_t *, const float128_t *, float128_t * );
void f128M_mul( const float128_t *, const float128_t *, float128_t * );
void
 f128M_mulAdd(
     const float128_t *, const float128_t *, const float128_t *, float128_t *
 );
void f128M_div( const float128_t *, const float128_t *, float128_t * );
void f128M_rem( const float128_t *, const float128_t *, float128_t * );
void f128M_sqrt( const float128_t *, float128_t * );
bool f128M_eq( const float128_t *, const float128_t * );
bool f128M_le( const float128_t *, const float128_t * );
bool f128M_lt( const float128_t *, const float128_t * );
bool f128M_eq_signaling( const float128_t *, const float128_t * );
bool f128M_le_quiet( const float128_t *, const float128_t * );
bool f128M_lt_quiet( const float128_t *, const float128_t * );
bool f128M_isSignalingNaN( const float128_t * );

#endif


================================================
FILE: External/SoftFloat-3e/include/SoftFloat-3e/softfloat_types.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef softfloat_types_h
#define softfloat_types_h 1

#include <stdint.h>

/*----------------------------------------------------------------------------
| Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point
| arguments and results to/from functions.  These types must be exactly
| 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively.  Where a
| platform has "native" support for IEEE-Standard floating-point formats,
| the types below may, if desired, be defined as aliases for the native types
| (typically 'float' and 'double', and possibly 'long double').
*----------------------------------------------------------------------------*/
typedef struct { uint16_t v; } float16_t;
typedef struct { uint32_t v; } float32_t;
typedef struct { uint64_t v; } float64_t;
typedef struct { uint64_t v[2]; } float128_t;

/*----------------------------------------------------------------------------
| The format of an 80-bit extended floating-point number in memory.  This
| structure must contain a 16-bit field named 'signExp' and a 64-bit field
| named 'signif'.
*----------------------------------------------------------------------------*/
#ifdef LITTLEENDIAN
struct extFloat80M { uint64_t signif; uint16_t signExp; };
#else
struct extFloat80M { uint16_t signExp; uint64_t signif; };
#endif

/*----------------------------------------------------------------------------
| The type used to pass 80-bit extended floating-point arguments and
| results to/from functions.  This type must have size identical to
| 'struct extFloat80M'.  Type 'extFloat80_t' can be defined as an alias for
| 'struct extFloat80M'.  Alternatively, if a platform has "native" support
| for IEEE-Standard 80-bit extended floating-point, it may be possible,
| if desired, to define 'extFloat80_t' as an alias for the native type
| (presumably either 'long double' or a nonstandard compiler-intrinsic type).
| In that case, the 'signif' and 'signExp' fields of 'struct extFloat80M'
| must align exactly with the locations in memory of the sign, exponent, and
| significand of the native type.
*----------------------------------------------------------------------------*/
typedef struct extFloat80M extFloat80_t;

enum {
    softfloat_tininess_beforeRounding = 0,
    softfloat_tininess_afterRounding  = 1
};

enum {
    softfloat_round_near_even   = 0,
    softfloat_round_minMag      = 1,
    softfloat_round_min         = 2,
    softfloat_round_max         = 3,
    softfloat_round_near_maxMag = 4,
    softfloat_round_odd         = 6
};

enum {
    softfloat_flag_inexact   =  1,
    softfloat_flag_underflow =  2,
    softfloat_flag_overflow  =  4,
    softfloat_flag_infinite  =  8,
    softfloat_flag_invalid   = 16
};

struct softfloat_state {
/*----------------------------------------------------------------------------
| Software floating-point underflow tininess-detection mode.
*----------------------------------------------------------------------------*/
uint8_t detectTininess; /* = init_detectTininess */
/*----------------------------------------------------------------------------
| Software floating-point rounding mode.  (Mode "odd" is supported only if
| SoftFloat is compiled with macro 'SOFTFLOAT_ROUND_ODD' defined.)
*----------------------------------------------------------------------------*/
uint8_t roundingMode; /* = softfloat_round_near_even */

/*----------------------------------------------------------------------------
| Software floating-point exception flags.
*----------------------------------------------------------------------------*/
uint8_t exceptionFlags; /* = 0 */

/*----------------------------------------------------------------------------
| Rounding precision for 80-bit extended double-precision floating-point.
| Valid values are 32, 64, and 80.
*----------------------------------------------------------------------------*/
uint8_t roundingPrecision; /* = 80 */
};

#endif


================================================
FILE: External/SoftFloat-3e/src/extF80_add.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_add( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signB;
    extFloat80_t
        (*magsFuncPtr)(
            struct softfloat_state *, uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    signB = signExtF80UI64( uiB64 );
    magsFuncPtr =
        (signA == signB) ? softfloat_addMagsExtF80 : softfloat_subMagsExtF80;
    return (*magsFuncPtr)( state, uiA64, uiA0, uiB64, uiB0, signA );
}


================================================
FILE: External/SoftFloat-3e/src/extF80_div.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_div( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    int_fast32_t expA;
    uint_fast64_t sigA;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signB;
    int_fast32_t expB;
    uint_fast64_t sigB;
    bool signZ;
    struct exp32_sig64 normExpSig;
    int_fast32_t expZ;
    struct uint128 rem;
    uint_fast32_t recip32;
    uint_fast64_t sigZ;
    int ix;
    uint_fast64_t q64;
    uint_fast32_t q;
    struct uint128 term;
    uint_fast64_t sigZExtra;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    expA  = expExtF80UI64( uiA64 );
    sigA  = uiA0;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    signB = signExtF80UI64( uiB64 );
    expB  = expExtF80UI64( uiB64 );
    sigB  = uiB0;
    signZ = signA ^ signB;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        if ( expB == 0x7FFF ) {
            if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
            goto invalid;
        }
        goto infinity;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        goto zero;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) expB = 1;
    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigB ) {
            if ( ! sigA ) goto invalid;
            softfloat_raiseFlags( state, softfloat_flag_infinite );
            goto infinity;
        }
        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
        expB += normExpSig.exp;
        sigB = normExpSig.sig;
    }
    if ( ! expA ) expA = 1;
    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigA ) goto zero;
        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
        expA += normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA - expB + 0x3FFF;
    if ( sigA < sigB ) {
        --expZ;
        rem = softfloat_shortShiftLeft128( 0, sigA, 32 );
    } else {
        rem = softfloat_shortShiftLeft128( 0, sigA, 31 );
    }
    recip32 = softfloat_approxRecip32_1( sigB>>32 );
    sigZ = 0;
    ix = 2;
    for (;;) {
        q64 = (uint_fast64_t) (uint32_t) (rem.v64>>2) * recip32;
        q = (q64 + 0x80000000)>>32;
        --ix;
        if ( ix < 0 ) break;
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
        term = softfloat_mul64ByShifted32To128( sigB, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            --q;
            rem = softfloat_add128( rem.v64, rem.v0, sigB>>32, sigB<<32 );
        }
        sigZ = (sigZ<<29) + q;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ((q + 1) & 0x3FFFFF) < 2 ) {
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
        term = softfloat_mul64ByShifted32To128( sigB, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        term = softfloat_shortShiftLeft128( 0, sigB, 32 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            --q;
            rem = softfloat_add128( rem.v64, rem.v0, term.v64, term.v0 );
        } else if ( softfloat_le128( term.v64, term.v0, rem.v64, rem.v0 ) ) {
            ++q;
            rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        }
        if ( rem.v64 | rem.v0 ) q |= 1;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigZ = (sigZ<<6) + (q>>23);
    sigZExtra = (uint64_t) ((uint_fast64_t) q<<41);
    return
        softfloat_roundPackToExtF80(
            state, signZ, expZ, sigZ, sigZExtra, state->roundingPrecision );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, uiB64, uiB0 );
    uiZ64 = uiZ.v64;
    uiZ0  = uiZ.v0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ64 = defaultNaNExtF80UI64;
    uiZ0  = defaultNaNExtF80UI0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 infinity:
    uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
    uiZ0  = UINT64_C( 0x8000000000000000 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ64 = packToExtF80UI64( signZ, 0 );
    uiZ0  = 0;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_eq.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
bool extF80_eq( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
        if (
               softfloat_isSigNaNExtF80UI( uiA64, uiA0 )
            || softfloat_isSigNaNExtF80UI( uiB64, uiB0 )
        ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
        }
        return false;
    }
    return
           (uiA0 == uiB0)
        && ((uiA64 == uiB64) || (! uiA0 && ! ((uiA64 | uiB64) & 0x7FFF)));

}


================================================
FILE: External/SoftFloat-3e/src/extF80_le.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

bool extF80_le( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return false;
    }
    signA = signExtF80UI64( uiA64 );
    signB = signExtF80UI64( uiB64 );
    return
        (signA != signB)
            ? signA || ! (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
            : ((uiA64 == uiB64) && (uiA0 == uiB0))
                  || (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/extF80_lt.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
bool extF80_lt( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    if ( isNaNExtF80UI( uiA64, uiA0 ) || isNaNExtF80UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return false;
    }
    signA = signExtF80UI64( uiA64 );
    signB = signExtF80UI64( uiB64 );
    return
        (signA != signB)
            ? signA && (((uiA64 | uiB64) & 0x7FFF) | uiA0 | uiB0)
            : ((uiA64 != uiB64) || (uiA0 != uiB0))
                  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/extF80_mul.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_mul( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    int_fast32_t expA;
    uint_fast64_t sigA;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signB;
    int_fast32_t expB;
    uint_fast64_t sigB;
    bool signZ;
    uint_fast64_t magBits;
    struct exp32_sig64 normExpSig;
    int_fast32_t expZ;
    struct uint128 sig128Z, uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    expA  = expExtF80UI64( uiA64 );
    sigA  = uiA0;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    signB = signExtF80UI64( uiB64 );
    expB  = expExtF80UI64( uiB64 );
    sigB  = uiB0;
    signZ = signA ^ signB;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if (
               (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
            || ((expB == 0x7FFF) && (sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
        ) {
            goto propagateNaN;
        }
        magBits = expB | sigB;
        goto infArg;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        magBits = expA | sigA;
        goto infArg;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expA ) expA = 1;
    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigA ) goto zero;
        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
        expA += normExpSig.exp;
        sigA = normExpSig.sig;
    }
    if ( ! expB ) expB = 1;
    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigB ) goto zero;
        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
        expB += normExpSig.exp;
        sigB = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA + expB - 0x3FFE;
    sig128Z = softfloat_mul64To128( sigA, sigB );
    if ( sig128Z.v64 < UINT64_C( 0x8000000000000000 ) ) {
        --expZ;
        sig128Z =
            softfloat_add128(
                sig128Z.v64, sig128Z.v0, sig128Z.v64, sig128Z.v0 );
    }
    return
        softfloat_roundPackToExtF80(
            state, signZ, expZ, sig128Z.v64, sig128Z.v0, state->roundingPrecision );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, uiB64, uiB0 );
    uiZ64 = uiZ.v64;
    uiZ0  = uiZ.v0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 infArg:
    if ( ! magBits ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        uiZ64 = defaultNaNExtF80UI64;
        uiZ0  = defaultNaNExtF80UI0;
    } else {
        uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
        uiZ0  = UINT64_C( 0x8000000000000000 );
    }
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ64 = packToExtF80UI64( signZ, 0 );
    uiZ0  = 0;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_rem.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_rem( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    int_fast32_t expA;
    uint_fast64_t sigA;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    int_fast32_t expB;
    uint_fast64_t sigB;
    struct exp32_sig64 normExpSig;
    int_fast32_t expDiff;
    struct uint128 rem, shiftedSigB;
    uint_fast32_t q, recip32;
    uint_fast64_t q64;
    struct uint128 term, altRem, meanRem;
    bool signRem;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    expA  = expExtF80UI64( uiA64 );
    sigA  = uiA0;
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    expB  = expExtF80UI64( uiB64 );
    sigB  = uiB0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if (
               (sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
            || ((expB == 0x7FFF) && (sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
        ) {
            goto propagateNaN;
        }
        goto invalid;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        /*--------------------------------------------------------------------
        | Argument b is an infinity.  Doubling `expB' is an easy way to ensure
        | that `expDiff' later is less than -1, which will result in returning
        | a canonicalized version of argument a.
        *--------------------------------------------------------------------*/
        expB += expB;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) expB = 1;
    if ( ! (sigB & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigB ) goto invalid;
        normExpSig = softfloat_normSubnormalExtF80Sig( sigB );
        expB += normExpSig.exp;
        sigB = normExpSig.sig;
    }
    if ( ! expA ) expA = 1;
    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigA ) {
            expA = 0;
            goto copyA;
        }
        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
        expA += normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expDiff = expA - expB;
    if ( expDiff < -1 ) goto copyA;
    rem = softfloat_shortShiftLeft128( 0, sigA, 32 );
    shiftedSigB = softfloat_shortShiftLeft128( 0, sigB, 32 );
    if ( expDiff < 1 ) {
        if ( expDiff ) {
            --expB;
            shiftedSigB = softfloat_shortShiftLeft128( 0, sigB, 33 );
            q = 0;
        } else {
            q = (sigB <= sigA);
            if ( q ) {
                rem =
                    softfloat_sub128(
                        rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
            }
        }
    } else {
        recip32 = softfloat_approxRecip32_1( sigB>>32 );
        expDiff -= 30;
        for (;;) {
            q64 = (uint_fast64_t) (uint32_t) (rem.v64>>2) * recip32;
            if ( expDiff < 0 ) break;
            q = (q64 + 0x80000000)>>32;
            rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
            term = softfloat_mul64ByShifted32To128( sigB, q );
            rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
            if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
                rem =
                    softfloat_add128(
                        rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
            }
            expDiff -= 29;
        }
        /*--------------------------------------------------------------------
        | (`expDiff' cannot be less than -29 here.)
        *--------------------------------------------------------------------*/
        q = (uint32_t) (q64>>32)>>(~expDiff & 31);
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, expDiff + 30 );
        term = softfloat_mul64ByShifted32To128( sigB, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            altRem =
                softfloat_add128(
                    rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
            goto selectRem;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    do {
        altRem = rem;
        ++q;
        rem =
            softfloat_sub128(
                rem.v64, rem.v0, shiftedSigB.v64, shiftedSigB.v0 );
    } while ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) );
 selectRem:
    meanRem = softfloat_add128( rem.v64, rem.v0, altRem.v64, altRem.v0 );
    if (
        (meanRem.v64 & UINT64_C( 0x8000000000000000 ))
            || (! (meanRem.v64 | meanRem.v0) && (q & 1))
    ) {
        rem = altRem;
    }
    signRem = signA;
    if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
        signRem = ! signRem;
        rem = softfloat_sub128( 0, 0, rem.v64, rem.v0 );
    }
    return
        softfloat_normRoundPackToExtF80(
            state, signRem, rem.v64 | rem.v0 ? expB + 32 : 0, rem.v64, rem.v0, 80 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, uiB64, uiB0 );
    uiZ64 = uiZ.v64;
    uiZ0  = uiZ.v0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ64 = defaultNaNExtF80UI64;
    uiZ0  = defaultNaNExtF80UI0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 copyA:
    if ( expA < 1 ) {
        sigA >>= 1 - expA;
        expA = 0;
    }
    uiZ64 = packToExtF80UI64( signA, expA );
    uiZ0  = sigA;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_roundToInt.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t
 extF80_roundToInt( struct softfloat_state *state, extFloat80_t a, uint_fast8_t roundingMode, bool exact )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64, signUI64;
    int_fast32_t exp;
    uint_fast64_t sigA;
    uint_fast16_t uiZ64;
    uint_fast64_t sigZ;
    struct exp32_sig64 normExpSig;
    struct uint128 uiZ;
    uint_fast64_t lastBitMask, roundBitsMask;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    signUI64 = uiA64 & packToExtF80UI64( 1, 0 );
    exp = expExtF80UI64( uiA64 );
    sigA = uA.s.signif;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( !(sigA & UINT64_C( 0x8000000000000000 )) && (exp != 0x7FFF) ) {
        if ( !sigA ) {
            uiZ64 = signUI64;
            sigZ = 0;
            goto uiZ;
        }
        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
        exp += normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x403E <= exp ) {
        if ( exp == 0x7FFF ) {
            if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
                uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, sigA, 0, 0 );
                uiZ64 = uiZ.v64;
                sigZ  = uiZ.v0;
                goto uiZ;
            }
            sigZ = UINT64_C( 0x8000000000000000 );
        } else {
            sigZ = sigA;
        }
        uiZ64 = signUI64 | exp;
        goto uiZ;
    }
    if ( exp <= 0x3FFE ) {
        if ( exact ) state->exceptionFlags |= softfloat_flag_inexact;
        switch ( roundingMode ) {
         case softfloat_round_near_even:
            if ( !(sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) break;
            __attribute__((fallthrough));
         case softfloat_round_near_maxMag:
            if ( exp == 0x3FFE ) goto mag1;
            break;
         case softfloat_round_min:
            if ( signUI64 ) goto mag1;
            break;
         case softfloat_round_max:
            if ( !signUI64 ) goto mag1;
            break;
#ifdef SOFTFLOAT_ROUND_ODD
         case softfloat_round_odd:
            goto mag1;
#endif
        }
        uiZ64 = signUI64;
        sigZ  = 0;
        goto uiZ;
     mag1:
        uiZ64 = signUI64 | 0x3FFF;
        sigZ  = UINT64_C( 0x8000000000000000 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiZ64 = signUI64 | exp;
    lastBitMask = (uint_fast64_t) 1<<(0x403E - exp);
    roundBitsMask = lastBitMask - 1;
    sigZ = sigA;
    if ( roundingMode == softfloat_round_near_maxMag ) {
        sigZ += lastBitMask>>1;
    } else if ( roundingMode == softfloat_round_near_even ) {
        sigZ += lastBitMask>>1;
        if ( !(sigZ & roundBitsMask) ) sigZ &= ~lastBitMask;
    } else if (
        roundingMode == (signUI64 ? softfloat_round_min : softfloat_round_max)
    ) {
        sigZ += roundBitsMask;
    }
    sigZ &= ~roundBitsMask;
    if ( !sigZ ) {
        ++uiZ64;
        sigZ = UINT64_C( 0x8000000000000000 );
    }
    if ( sigZ != sigA ) {
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) sigZ |= lastBitMask;
#endif
        if ( exact ) state->exceptionFlags |= softfloat_flag_inexact;
    }
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif = sigZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_sqrt.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_sqrt( struct softfloat_state *state, extFloat80_t a )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    int_fast32_t expA;
    uint_fast64_t sigA;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    struct exp32_sig64 normExpSig;
    int_fast32_t expZ;
    uint_fast32_t sig32A, recipSqrt32, sig32Z;
    struct uint128 rem;
    uint_fast64_t q, x64, sigZ;
    struct uint128 y, term;
    uint_fast64_t sigZExtra;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    expA  = expExtF80UI64( uiA64 );
    sigA  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
            uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, 0, 0 );
            uiZ64 = uiZ.v64;
            uiZ0  = uiZ.v0;
            goto uiZ;
        }
        if ( ! signA ) return a;
        goto invalid;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( signA ) {
        if ( ! sigA ) goto zero;
        goto invalid;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expA ) expA = 1;
    if ( ! (sigA & UINT64_C( 0x8000000000000000 )) ) {
        if ( ! sigA ) goto zero;
        normExpSig = softfloat_normSubnormalExtF80Sig( sigA );
        expA += normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    | (`sig32Z' is guaranteed to be a lower bound on the square root of
    | `sig32A', which makes `sig32Z' also a lower bound on the square root of
    | `sigA'.)
    *------------------------------------------------------------------------*/
    expZ = ((expA - 0x3FFF)>>1) + 0x3FFF;
    expA &= 1;
    sig32A = sigA>>32;
    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
    sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
    if ( expA ) {
        sig32Z >>= 1;
        rem = softfloat_shortShiftLeft128( 0, sigA, 61 );
    } else {
        rem = softfloat_shortShiftLeft128( 0, sigA, 62 );
    }
    rem.v64 -= (uint_fast64_t) sig32Z * sig32Z;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    q = ((uint32_t) (rem.v64>>2) * (uint_fast64_t) recipSqrt32)>>32;
    x64 = (uint_fast64_t) sig32Z<<32;
    sigZ = x64 + (q<<3);
    y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
    /*------------------------------------------------------------------------
    | (Repeating this loop is a rare occurrence.)
    *------------------------------------------------------------------------*/
    for (;;) {
        term = softfloat_mul64ByShifted32To128( x64 + sigZ, q );
        rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
        if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
        --q;
        sigZ -= 1<<3;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    q = (((rem.v64>>2) * recipSqrt32)>>32) + 2;
    x64 = sigZ;
    sigZ = (sigZ<<1) + (q>>25);
    sigZExtra = (uint64_t) (q<<39);
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( (q & 0xFFFFFF) <= 2 ) {
        q &= ~(uint_fast64_t) 0xFFFF;
        sigZExtra = (uint64_t) (q<<39);
        term = softfloat_mul64ByShifted32To128( x64 + (q>>27), q );
        x64 = (uint32_t) (q<<5) * (uint_fast64_t) (uint32_t) q;
        term = softfloat_add128( term.v64, term.v0, 0, x64 );
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 28 );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            if ( ! sigZExtra ) --sigZ;
            --sigZExtra;
        } else {
            if ( rem.v64 | rem.v0 ) sigZExtra |= 1;
        }
    }
    return
        softfloat_roundPackToExtF80(
            state, 0, expZ, sigZ, sigZExtra, state->roundingPrecision );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ64 = defaultNaNExtF80UI64;
    uiZ0  = defaultNaNExtF80UI0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ64 = packToExtF80UI64( signA, 0 );
    uiZ0  = 0;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_sub.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t extF80_sub( struct softfloat_state *state, extFloat80_t a, extFloat80_t b )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool signA;
    union { struct extFloat80M s; extFloat80_t f; } uB;
    uint_fast16_t uiB64;
    uint_fast64_t uiB0;
    bool signB;
#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
    extFloat80_t
        (*magsFuncPtr)(
            struct softfloat_state *, uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
#endif

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    signA = signExtF80UI64( uiA64 );
    uB.f = b;
    uiB64 = uB.s.signExp;
    uiB0  = uB.s.signif;
    signB = signExtF80UI64( uiB64 );
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
    if ( signA == signB ) {
        return softfloat_subMagsExtF80( state, uiA64, uiA0, uiB64, uiB0, signA );
    } else {
        return softfloat_addMagsExtF80( state, uiA64, uiA0, uiB64, uiB0, signA );
    }
#else
    magsFuncPtr =
        (signA == signB) ? softfloat_subMagsExtF80 : softfloat_addMagsExtF80;
    return (*magsFuncPtr)( state, uiA64, uiA0, uiB64, uiB0, signA );
#endif

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_f128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
float128_t extF80_to_f128( struct softfloat_state *state, extFloat80_t a )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    uint_fast16_t exp;
    uint_fast64_t frac;
    struct commonNaN commonNaN;
    struct uint128 uiZ;
    bool sign;
    struct uint128 frac128;
    union ui128_f128 uZ;

    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    exp = expExtF80UI64( uiA64 );
    frac = uiA0 & UINT64_C( 0x7FFFFFFFFFFFFFFF );
    if ( (exp == 0x7FFF) && frac ) {
        softfloat_extF80UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
        uiZ = softfloat_commonNaNToF128UI( &commonNaN );
    } else {
        sign = signExtF80UI64( uiA64 );
        frac128 = softfloat_shortShiftLeft128( 0, frac, 49 );
        uiZ.v64 = packToF128UI64( sign, exp, frac128.v64 );
        uiZ.v0  = frac128.v0;
    }
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_f32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
float32_t extF80_to_f32( struct softfloat_state *state, extFloat80_t a )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig;
    struct commonNaN commonNaN;
    uint_fast32_t uiZ, sig32;
    union ui32_f32 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    sign = signExtF80UI64( uiA64 );
    exp  = expExtF80UI64( uiA64 );
    sig  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
            softfloat_extF80UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
        } else {
            uiZ = packToF32UI( sign, 0xFF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig32 = softfloat_shortShiftRightJam64( sig, 33 );
    if ( ! (exp | sig32) ) {
        uiZ = packToF32UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    exp -= 0x3F81;
    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
        if ( exp < -0x1000 ) exp = -0x1000;
    }
    return softfloat_roundPackToF32( state, sign, exp, sig32 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_f64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
float64_t extF80_to_f64( struct softfloat_state *state, extFloat80_t a )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    uint_fast64_t uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig;
    struct commonNaN commonNaN;
    uint_fast64_t uiZ;
    union ui64_f64 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    uiA0  = uA.s.signif;
    sign = signExtF80UI64( uiA64 );
    exp  = expExtF80UI64( uiA64 );
    sig  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! (exp | sig) ) {
        uiZ = packToF64UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
            softfloat_extF80UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToF64UI( &commonNaN );
        } else {
            uiZ = packToF64UI( sign, 0x7FF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig = softfloat_shortShiftRightJam64( sig, 1 );
    exp -= 0x3C01;
    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
        if ( exp < -0x1000 ) exp = -0x1000;
    }
    return softfloat_roundPackToF64( state, sign, exp, sig );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_i32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
int_fast32_t
 extF80_to_i32( struct softfloat_state *state, extFloat80_t a, uint_fast8_t roundingMode, bool exact )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig;
    int_fast32_t shiftDist;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    sign = signExtF80UI64( uiA64 );
    exp  = expExtF80UI64( uiA64 );
    sig = uA.s.signif;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
    if ( (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF )) ) {
#if (i32_fromNaN == i32_fromPosOverflow)
        sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
        sign = 1;
#else
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return i32_fromNaN;
#endif
    }
#endif
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    shiftDist = 0x4032 - exp;
    if ( shiftDist <= 0 ) shiftDist = 1;
    sig = softfloat_shiftRightJam64( sig, shiftDist );
    return softfloat_roundToI32( state, sign, sig, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_i64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
int_fast64_t
 extF80_to_i64( struct softfloat_state *state, extFloat80_t a, uint_fast8_t roundingMode, bool exact )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig;
    int_fast32_t shiftDist;
    uint_fast64_t sigExtra;
    struct uint64_extra sig64Extra;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    sign = signExtF80UI64( uiA64 );
    exp  = expExtF80UI64( uiA64 );
    sig = uA.s.signif;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    shiftDist = 0x403E - exp;
    if ( shiftDist <= 0 ) {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        if ( shiftDist ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
            return
                (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                    ? i64_fromNaN
                    : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
        }
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        sigExtra = 0;
    } else {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        sig64Extra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
        sig = sig64Extra.v;
        sigExtra = sig64Extra.extra;
    }
    return softfloat_roundToI64( state, sign, sig, sigExtra, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/extF80_to_ui64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
uint_fast64_t
 extF80_to_ui64( struct softfloat_state *state, extFloat80_t a, uint_fast8_t roundingMode, bool exact )
{
    union { struct extFloat80M s; extFloat80_t f; } uA;
    uint_fast16_t uiA64;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig;
    int_fast32_t shiftDist;
    uint_fast64_t sigExtra;
    struct uint64_extra sig64Extra;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.s.signExp;
    sign = signExtF80UI64( uiA64 );
    exp  = expExtF80UI64( uiA64 );
    sig = uA.s.signif;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    shiftDist = 0x403E - exp;
    if ( shiftDist < 0 ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return
            (exp == 0x7FFF) && (sig & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                ? ui64_fromNaN
                : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigExtra = 0;
    if ( shiftDist ) {
        sig64Extra = softfloat_shiftRightJam64Extra( sig, 0, shiftDist );
        sig = sig64Extra.v;
        sigExtra = sig64Extra.extra;
    }
    return softfloat_roundToUI64( state, sign, sig, sigExtra, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/f128_add.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

float128_t f128_add( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signB;
#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
    float128_t
        (*magsFuncPtr)(
            uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
#endif

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    signB = signF128UI64( uiB64 );
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
    if ( signA == signB ) {
        return softfloat_addMagsF128( state, uiA64, uiA0, uiB64, uiB0, signA );
    } else {
        return softfloat_subMagsF128( state, uiA64, uiA0, uiB64, uiB0, signA );
    }
#else
    magsFuncPtr =
        (signA == signB) ? softfloat_addMagsF128 : softfloat_subMagsF128;
    return (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
#endif

}


================================================
FILE: External/SoftFloat-3e/src/f128_div.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t f128_div( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signB;
    int_fast32_t expB;
    struct uint128 sigB;
    bool signZ;
    struct exp32_sig128 normExpSig;
    int_fast32_t expZ;
    struct uint128 rem;
    uint_fast32_t recip32;
    int ix;
    uint_fast64_t q64;
    uint_fast32_t q;
    struct uint128 term;
    uint_fast32_t qs[3];
    uint_fast64_t sigZExtra;
    struct uint128 sigZ, uiZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    signB = signF128UI64( uiB64 );
    expB  = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    signZ = signA ^ signB;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
        if ( expB == 0x7FFF ) {
            if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
            goto invalid;
        }
        goto infinity;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
        goto zero;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) {
        if ( ! (sigB.v64 | sigB.v0) ) {
            if ( ! (expA | sigA.v64 | sigA.v0) ) goto invalid;
            softfloat_raiseFlags( state, softfloat_flag_infinite );
            goto infinity;
        }
        normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
        expB = normExpSig.exp;
        sigB = normExpSig.sig;
    }
    if ( ! expA ) {
        if ( ! (sigA.v64 | sigA.v0) ) goto zero;
        normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
        expA = normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA - expB + 0x3FFE;
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sigB.v64 |= UINT64_C( 0x0001000000000000 );
    rem = sigA;
    if ( softfloat_lt128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 ) ) {
        --expZ;
        rem = softfloat_add128( sigA.v64, sigA.v0, sigA.v64, sigA.v0 );
    }
    recip32 = softfloat_approxRecip32_1( sigB.v64>>17 );
    ix = 3;
    for (;;) {
        q64 = (uint_fast64_t) (uint32_t) (rem.v64>>19) * recip32;
        q = (q64 + 0x80000000)>>32;
        --ix;
        if ( ix < 0 ) break;
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
        term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            --q;
            rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
        }
        qs[ix] = q;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ((q + 1) & 7) < 2 ) {
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
        term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            --q;
            rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
        } else if ( softfloat_le128( sigB.v64, sigB.v0, rem.v64, rem.v0 ) ) {
            ++q;
            rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
        }
        if ( rem.v64 | rem.v0 ) q |= 1;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigZExtra = (uint64_t) ((uint_fast64_t) q<<60);
    term = softfloat_shortShiftLeft128( 0, qs[1], 54 );
    sigZ =
        softfloat_add128(
            (uint_fast64_t) qs[2]<<19, ((uint_fast64_t) qs[0]<<25) + (q>>4),
            term.v64, term.v0
        );
    return
        softfloat_roundPackToF128( state, signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, uiB64, uiB0 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0  = defaultNaNF128UI0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 infinity:
    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
    goto uiZ0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ.v64 = packToF128UI64( signZ, 0, 0 );
 uiZ0:
    uiZ.v0 = 0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_eq.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

bool f128_eq( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        if (
               softfloat_isSigNaNF128UI( uiA64, uiA0 )
            || softfloat_isSigNaNF128UI( uiB64, uiB0 )
        ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
        }
        return false;
    }
    return
           (uiA0 == uiB0)
        && (   (uiA64 == uiB64)
            || (! uiA0 && ! ((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
           );

}


================================================
FILE: External/SoftFloat-3e/src/f128_eq_signaling.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

bool f128_eq_signaling( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return false;
    }
    return
           (uiA0 == uiB0)
        && (   (uiA64 == uiB64)
            || (! uiA0 && ! ((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF )))
           );

}


================================================
FILE: External/SoftFloat-3e/src/f128_isSignalingNaN.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

bool f128_isSignalingNaN( float128_t a )
{
    union ui128_f128 uA;

    uA.f = a;
    return softfloat_isSigNaNF128UI( uA.ui.v64, uA.ui.v0 );

}


================================================
FILE: External/SoftFloat-3e/src/f128_le.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

bool f128_le( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return false;
    }
    signA = signF128UI64( uiA64 );
    signB = signF128UI64( uiB64 );
    return
        (signA != signB)
            ? signA
                  || ! (((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                            | uiA0 | uiB0)
            : ((uiA64 == uiB64) && (uiA0 == uiB0))
                  || (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/f128_le_quiet.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

bool f128_le_quiet( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        if (
               softfloat_isSigNaNF128UI( uiA64, uiA0 )
            || softfloat_isSigNaNF128UI( uiB64, uiB0 )
        ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
        }
        return false;
    }
    signA = signF128UI64( uiA64 );
    signB = signF128UI64( uiB64 );
    return
        (signA != signB)
            ? signA
                  || ! (((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                            | uiA0 | uiB0)
            : ((uiA64 == uiB64) && (uiA0 == uiB0))
                  || (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/f128_lt.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

bool f128_lt( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        return false;
    }
    signA = signF128UI64( uiA64 );
    signB = signF128UI64( uiB64 );
    return
        (signA != signB)
            ? signA
                  && (((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                          | uiA0 | uiB0)
            : ((uiA64 != uiB64) || (uiA0 != uiB0))
                  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/f128_lt_quiet.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

bool f128_lt_quiet( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signA, signB;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    if ( isNaNF128UI( uiA64, uiA0 ) || isNaNF128UI( uiB64, uiB0 ) ) {
        if (
               softfloat_isSigNaNF128UI( uiA64, uiA0 )
            || softfloat_isSigNaNF128UI( uiB64, uiB0 )
        ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
        }
        return false;
    }
    signA = signF128UI64( uiA64 );
    signB = signF128UI64( uiB64 );
    return
        (signA != signB)
            ? signA
                  && (((uiA64 | uiB64) & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                          | uiA0 | uiB0)
            : ((uiA64 != uiB64) || (uiA0 != uiB0))
                  && (signA ^ softfloat_lt128( uiA64, uiA0, uiB64, uiB0 ));

}


================================================
FILE: External/SoftFloat-3e/src/f128_mul.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t f128_mul( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signB;
    int_fast32_t expB;
    struct uint128 sigB;
    bool signZ;
    uint_fast64_t magBits;
    struct exp32_sig128 normExpSig;
    int_fast32_t expZ;
    uint64_t sig256Z[4];
    uint_fast64_t sigZExtra;
    struct uint128 sigZ;
    struct uint128_extra sig128Extra;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    signB = signF128UI64( uiB64 );
    expB  = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    signZ = signA ^ signB;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if (
            (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0))
        ) {
            goto propagateNaN;
        }
        magBits = expB | sigB.v64 | sigB.v0;
        goto infArg;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
        magBits = expA | sigA.v64 | sigA.v0;
        goto infArg;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expA ) {
        if ( ! (sigA.v64 | sigA.v0) ) goto zero;
        normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
        expA = normExpSig.exp;
        sigA = normExpSig.sig;
    }
    if ( ! expB ) {
        if ( ! (sigB.v64 | sigB.v0) ) goto zero;
        normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
        expB = normExpSig.exp;
        sigB = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA + expB - 0x4000;
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 16 );
    softfloat_mul128To256M( sigA.v64, sigA.v0, sigB.v64, sigB.v0, sig256Z );
    sigZExtra = sig256Z[indexWord( 4, 1 )] | (sig256Z[indexWord( 4, 0 )] != 0);
    sigZ =
        softfloat_add128(
            sig256Z[indexWord( 4, 3 )], sig256Z[indexWord( 4, 2 )],
            sigA.v64, sigA.v0
        );
    if ( UINT64_C( 0x0002000000000000 ) <= sigZ.v64 ) {
        ++expZ;
        sig128Extra =
            softfloat_shortShiftRightJam128Extra(
                sigZ.v64, sigZ.v0, sigZExtra, 1 );
        sigZ = sig128Extra.v;
        sigZExtra = sig128Extra.extra;
    }
    return
        softfloat_roundPackToF128( state, signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, uiB64, uiB0 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 infArg:
    if ( ! magBits ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        uiZ.v64 = defaultNaNF128UI64;
        uiZ.v0  = defaultNaNF128UI0;
        goto uiZ;
    }
    uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
    goto uiZ0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 zero:
    uiZ.v64 = packToF128UI64( signZ, 0, 0 );
 uiZ0:
    uiZ.v0 = 0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_mulAdd.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

float128_t f128_mulAdd( struct softfloat_state *state, float128_t a, float128_t b, float128_t c )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    union ui128_f128 uC;
    uint_fast64_t uiC64, uiC0;

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    uC.f = c;
    uiC64 = uC.ui.v64;
    uiC0  = uC.ui.v0;
    return softfloat_mulAddF128( uiA64, uiA0, uiB64, uiB0, uiC64, uiC0, 0 );

}


================================================
FILE: External/SoftFloat-3e/src/f128_rem.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t f128_rem( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    int_fast32_t expB;
    struct uint128 sigB;
    struct exp32_sig128 normExpSig;
    struct uint128 rem;
    int_fast32_t expDiff;
    uint_fast32_t q, recip32;
    uint_fast64_t q64;
    struct uint128 term, altRem, meanRem;
    bool signRem;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    expB  = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if (
            (sigA.v64 | sigA.v0) || ((expB == 0x7FFF) && (sigB.v64 | sigB.v0))
        ) {
            goto propagateNaN;
        }
        goto invalid;
    }
    if ( expB == 0x7FFF ) {
        if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
        return a;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expB ) {
        if ( ! (sigB.v64 | sigB.v0) ) goto invalid;
        normExpSig = softfloat_normSubnormalF128Sig( sigB.v64, sigB.v0 );
        expB = normExpSig.exp;
        sigB = normExpSig.sig;
    }
    if ( ! expA ) {
        if ( ! (sigA.v64 | sigA.v0) ) return a;
        normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
        expA = normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sigB.v64 |= UINT64_C( 0x0001000000000000 );
    rem = sigA;
    expDiff = expA - expB;
    if ( expDiff < 1 ) {
        if ( expDiff < -1 ) return a;
        if ( expDiff ) {
            --expB;
            sigB = softfloat_add128( sigB.v64, sigB.v0, sigB.v64, sigB.v0 );
            q = 0;
        } else {
            q = softfloat_le128( sigB.v64, sigB.v0, rem.v64, rem.v0 );
            if ( q ) {
                rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
            }
        }
    } else {
        recip32 = softfloat_approxRecip32_1( sigB.v64>>17 );
        expDiff -= 30;
        for (;;) {
            q64 = (uint_fast64_t) (uint32_t) (rem.v64>>19) * recip32;
            if ( expDiff < 0 ) break;
            q = (q64 + 0x80000000)>>32;
            rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
            term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
            rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
            if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
                rem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
            }
            expDiff -= 29;
        }
        /*--------------------------------------------------------------------
        | (`expDiff' cannot be less than -29 here.)
        *--------------------------------------------------------------------*/
        q = (uint32_t) (q64>>32)>>(~expDiff & 31);
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, expDiff + 30 );
        term = softfloat_mul128By32( sigB.v64, sigB.v0, q );
        rem = softfloat_sub128( rem.v64, rem.v0, term.v64, term.v0 );
        if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
            altRem = softfloat_add128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
            goto selectRem;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    do {
        altRem = rem;
        ++q;
        rem = softfloat_sub128( rem.v64, rem.v0, sigB.v64, sigB.v0 );
    } while ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) );
 selectRem:
    meanRem = softfloat_add128( rem.v64, rem.v0, altRem.v64, altRem.v0 );
    if (
        (meanRem.v64 & UINT64_C( 0x8000000000000000 ))
            || (! (meanRem.v64 | meanRem.v0) && (q & 1))
    ) {
        rem = altRem;
    }
    signRem = signA;
    if ( rem.v64 & UINT64_C( 0x8000000000000000 ) ) {
        signRem = ! signRem;
        rem = softfloat_sub128( 0, 0, rem.v64, rem.v0 );
    }
    return softfloat_normRoundPackToF128( state, signRem, expB - 1, rem.v64, rem.v0 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, uiB64, uiB0 );
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0  = defaultNaNF128UI0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_sqrt.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t f128_sqrt( struct softfloat_state *state, float128_t a )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    int_fast32_t expA;
    struct uint128 sigA, uiZ;
    struct exp32_sig128 normExpSig;
    int_fast32_t expZ;
    uint_fast32_t sig32A, recipSqrt32, sig32Z;
    struct uint128 rem;
    uint32_t qs[3];
    uint_fast32_t q;
    uint_fast64_t x64, sig64Z;
    struct uint128 y, term;
    uint_fast64_t sigZExtra;
    struct uint128 sigZ;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    expA  = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expA == 0x7FFF ) {
        if ( sigA.v64 | sigA.v0 ) {
            uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, 0, 0 );
            goto uiZ;
        }
        if ( ! signA ) return a;
        goto invalid;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( signA ) {
        if ( ! (expA | sigA.v64 | sigA.v0) ) return a;
        goto invalid;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! expA ) {
        if ( ! (sigA.v64 | sigA.v0) ) return a;
        normExpSig = softfloat_normSubnormalF128Sig( sigA.v64, sigA.v0 );
        expA = normExpSig.exp;
        sigA = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    | (`sig32Z' is guaranteed to be a lower bound on the square root of
    | `sig32A', which makes `sig32Z' also a lower bound on the square root of
    | `sigA'.)
    *------------------------------------------------------------------------*/
    expZ = ((expA - 0x3FFF)>>1) + 0x3FFE;
    expA &= 1;
    sigA.v64 |= UINT64_C( 0x0001000000000000 );
    sig32A = sigA.v64>>17;
    recipSqrt32 = softfloat_approxRecipSqrt32_1( expA, sig32A );
    sig32Z = ((uint_fast64_t) sig32A * recipSqrt32)>>32;
    if ( expA ) {
        sig32Z >>= 1;
        rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 12 );
    } else {
        rem = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 13 );
    }
    qs[2] = sig32Z;
    rem.v64 -= (uint_fast64_t) sig32Z * sig32Z;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    q = ((uint32_t) (rem.v64>>2) * (uint_fast64_t) recipSqrt32)>>32;
    x64 = (uint_fast64_t) sig32Z<<32;
    sig64Z = x64 + ((uint_fast64_t) q<<3);
    y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
    /*------------------------------------------------------------------------
    | (Repeating this loop is a rare occurrence.)
    *------------------------------------------------------------------------*/
    for (;;) {
        term = softfloat_mul64ByShifted32To128( x64 + sig64Z, q );
        rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
        if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
        --q;
        sig64Z -= 1<<3;
    }
    qs[1] = q;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    q = ((rem.v64>>2) * recipSqrt32)>>32;
    y = softfloat_shortShiftLeft128( rem.v64, rem.v0, 29 );
    sig64Z <<= 1;
    /*------------------------------------------------------------------------
    | (Repeating this loop is a rare occurrence.)
    *------------------------------------------------------------------------*/
    for (;;) {
        term = softfloat_shortShiftLeft128( 0, sig64Z, 32 );
        term = softfloat_add128( term.v64, term.v0, 0, (uint_fast64_t) q<<6 );
        term = softfloat_mul128By32( term.v64, term.v0, q );
        rem = softfloat_sub128( y.v64, y.v0, term.v64, term.v0 );
        if ( ! (rem.v64 & UINT64_C( 0x8000000000000000 )) ) break;
        --q;
    }
    qs[0] = q;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    q = (((rem.v64>>2) * recipSqrt32)>>32) + 2;
    sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
    term = softfloat_shortShiftLeft128( 0, qs[1], 53 );
    sigZ =
        softfloat_add128(
            (uint_fast64_t) qs[2]<<18, ((uint_fast64_t) qs[0]<<24) + (q>>5),
            term.v64, term.v0
        );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( (q & 0xF) <= 2 ) {
        q &= ~3;
        sigZExtra = (uint64_t) ((uint_fast64_t) q<<59);
        y = softfloat_shortShiftLeft128( sigZ.v64, sigZ.v0, 6 );
        y.v0 |= sigZExtra>>58;
        term = softfloat_sub128( y.v64, y.v0, 0, q );
        y    = softfloat_mul64ByShifted32To128( term.v0,  q );
        term = softfloat_mul64ByShifted32To128( term.v64, q );
        term = softfloat_add128( term.v64, term.v0, 0, y.v64 );
        rem = softfloat_shortShiftLeft128( rem.v64, rem.v0, 20 );
        term = softfloat_sub128( term.v64, term.v0, rem.v64, rem.v0 );
        /*--------------------------------------------------------------------
        | The concatenation of `term' and `y.v0' is now the negative remainder
        | (3 words altogether).
        *--------------------------------------------------------------------*/
        if ( term.v64 & UINT64_C( 0x8000000000000000 ) ) {
            sigZExtra |= 1;
        } else {
            if ( term.v64 | term.v0 | y.v0 ) {
                if ( sigZExtra ) {
                    --sigZExtra;
                } else {
                    sigZ = softfloat_sub128( sigZ.v64, sigZ.v0, 0, 1 );
                    sigZExtra = ~0;
                }
            }
        }
    }
    return softfloat_roundPackToF128( state, 0, expZ, sigZ.v64, sigZ.v0, sigZExtra );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    uiZ.v64 = defaultNaNF128UI64;
    uiZ.v0  = defaultNaNF128UI0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_sub.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

float128_t f128_sub( struct softfloat_state *state, float128_t a, float128_t b )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool signA;
    union ui128_f128 uB;
    uint_fast64_t uiB64, uiB0;
    bool signB;
#if ! defined INLINE_LEVEL || (INLINE_LEVEL < 2)
    float128_t
        (*magsFuncPtr)(
            uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
#endif

    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    signA = signF128UI64( uiA64 );
    uB.f = b;
    uiB64 = uB.ui.v64;
    uiB0  = uB.ui.v0;
    signB = signF128UI64( uiB64 );
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
    if ( signA == signB ) {
        return softfloat_subMagsF128( state, uiA64, uiA0, uiB64, uiB0, signA );
    } else {
        return softfloat_addMagsF128( state, uiA64, uiA0, uiB64, uiB0, signA );
    }
#else
    magsFuncPtr =
        (signA == signB) ? softfloat_subMagsF128 : softfloat_addMagsF128;
    return (*magsFuncPtr)( uiA64, uiA0, uiB64, uiB0, signA );
#endif

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_extF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f128_to_extF80( struct softfloat_state *state, float128_t a )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t frac64, frac0;
    struct commonNaN commonNaN;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    struct exp32_sig128 normExpSig;
    struct uint128 sig128;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign   = signF128UI64( uiA64 );
    exp    = expF128UI64( uiA64 );
    frac64 = fracF128UI64( uiA64 );
    frac0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( frac64 | frac0 ) {
            softfloat_f128UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
            uiZ64 = uiZ.v64;
            uiZ0  = uiZ.v0;
        } else {
            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
            uiZ0  = UINT64_C( 0x8000000000000000 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
        if ( ! (frac64 | frac0) ) {
            uiZ64 = packToExtF80UI64( sign, 0 );
            uiZ0  = 0;
            goto uiZ;
        }
        normExpSig = softfloat_normSubnormalF128Sig( frac64, frac0 );
        exp   = normExpSig.exp;
        frac64 = normExpSig.sig.v64;
        frac0  = normExpSig.sig.v0;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig128 =
        softfloat_shortShiftLeft128(
            frac64 | UINT64_C( 0x0001000000000000 ), frac0, 15 );
    return softfloat_roundPackToExtF80( state, sign, exp, sig128.v64, sig128.v0, 80 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_f16.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float16_t f128_to_f16( struct softfloat_state *state, float128_t a )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t frac64;
    struct commonNaN commonNaN;
    uint_fast16_t uiZ, frac16;
    union ui16_f16 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    frac64 = fracF128UI64( uiA64 ) | (uiA0 != 0);
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( frac64 ) {
            softfloat_f128UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToF16UI( &commonNaN );
        } else {
            uiZ = packToF16UI( sign, 0x1F, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    frac16 = softfloat_shortShiftRightJam64( frac64, 34 );
    if ( ! (exp | frac16) ) {
        uiZ = packToF16UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    exp -= 0x3FF1;
    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
        if ( exp < -0x40 ) exp = -0x40;
    }
    return softfloat_roundPackToF16( sign, exp, frac16 | 0x4000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_f32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float32_t f128_to_f32( struct softfloat_state *state, float128_t a )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t frac64;
    struct commonNaN commonNaN;
    uint_fast32_t uiZ, frac32;
    union ui32_f32 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    frac64 = fracF128UI64( uiA64 ) | (uiA0 != 0);
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( frac64 ) {
            softfloat_f128UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToF32UI( &commonNaN );
        } else {
            uiZ = packToF32UI( sign, 0xFF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    frac32 = softfloat_shortShiftRightJam64( frac64, 18 );
    if ( ! (exp | frac32) ) {
        uiZ = packToF32UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    exp -= 0x3F81;
    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
        if ( exp < -0x1000 ) exp = -0x1000;
    }
    return softfloat_roundPackToF32( state, sign, exp, frac32 | 0x40000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_f64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float64_t f128_to_f64( struct softfloat_state *state, float128_t a )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t frac64, frac0;
    struct commonNaN commonNaN;
    uint_fast64_t uiZ;
    struct uint128 frac128;
    union ui64_f64 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    frac64 = fracF128UI64( uiA64 );
    frac0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FFF ) {
        if ( frac64 | frac0 ) {
            softfloat_f128UIToCommonNaN( state, uiA64, uiA0, &commonNaN );
            uiZ = softfloat_commonNaNToF64UI( &commonNaN );
        } else {
            uiZ = packToF64UI( sign, 0x7FF, 0 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    frac128 = softfloat_shortShiftLeft128( frac64, frac0, 14 );
    frac64 = frac128.v64 | (frac128.v0 != 0);
    if ( ! (exp | frac64) ) {
        uiZ = packToF64UI( sign, 0, 0 );
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    exp -= 0x3C01;
    if ( sizeof (int_fast16_t) < sizeof (int_fast32_t) ) {
        if ( exp < -0x1000 ) exp = -0x1000;
    }
    return
        softfloat_roundPackToF64(
            state, sign, exp, frac64 | UINT64_C( 0x4000000000000000 ) );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_i32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

int_fast32_t f128_to_i32( struct softfloat_state *state, float128_t a, uint_fast8_t roundingMode, bool exact )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig64, sig0;
    int_fast32_t shiftDist;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    sig64 = fracF128UI64( uiA64 );
    sig0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
#if (i32_fromNaN != i32_fromPosOverflow) || (i32_fromNaN != i32_fromNegOverflow)
    if ( (exp == 0x7FFF) && (sig64 | sig0) ) {
#if (i32_fromNaN == i32_fromPosOverflow)
        sign = 0;
#elif (i32_fromNaN == i32_fromNegOverflow)
        sign = 1;
#else
        softfloat_raiseFlags( softfloat_flag_invalid );
        return i32_fromNaN;
#endif
    }
#endif
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
    sig64 |= (sig0 != 0);
    shiftDist = 0x4023 - exp;
    if ( 0 < shiftDist ) sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
    return softfloat_roundToI32( state, sign, sig64, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_i64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

int_fast64_t f128_to_i64( struct softfloat_state *state, float128_t a, uint_fast8_t roundingMode, bool exact )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig64, sig0;
    int_fast32_t shiftDist;
    struct uint128 sig128;
    struct uint64_extra sigExtra;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    sig64 = fracF128UI64( uiA64 );
    sig0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    shiftDist = 0x402F - exp;
    if ( shiftDist <= 0 ) {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        if ( shiftDist < -15 ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
            return
                (exp == 0x7FFF) && (sig64 | sig0) ? i64_fromNaN
                    : sign ? i64_fromNegOverflow : i64_fromPosOverflow;
        }
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        sig64 |= UINT64_C( 0x0001000000000000 );
        if ( shiftDist ) {
            sig128 = softfloat_shortShiftLeft128( sig64, sig0, -shiftDist );
            sig64 = sig128.v64;
            sig0  = sig128.v0;
        }
    } else {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
        sigExtra = softfloat_shiftRightJam64Extra( sig64, sig0, shiftDist );
        sig64 = sigExtra.v;
        sig0  = sigExtra.extra;
    }
    return softfloat_roundToI64( state, sign, sig64, sig0, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_ui32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

uint_fast32_t
 f128_to_ui32( struct softfloat_state *state, float128_t a, uint_fast8_t roundingMode, bool exact )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig64;
    int_fast32_t shiftDist;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    sig64 = fracF128UI64( uiA64 ) | (uiA0 != 0);
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
#if (ui32_fromNaN != ui32_fromPosOverflow) || (ui32_fromNaN != ui32_fromNegOverflow)
    if ( (exp == 0x7FFF) && sig64 ) {
#if (ui32_fromNaN == ui32_fromPosOverflow)
        sign = 0;
#elif (ui32_fromNaN == ui32_fromNegOverflow)
        sign = 1;
#else
        softfloat_raiseFlags( softfloat_flag_invalid );
        return ui32_fromNaN;
#endif
    }
#endif
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
    shiftDist = 0x4023 - exp;
    if ( 0 < shiftDist ) {
        sig64 = softfloat_shiftRightJam64( sig64, shiftDist );
    }
    return softfloat_roundToUI32( sign, sig64, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/f128_to_ui64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

uint_fast64_t
 f128_to_ui64( struct softfloat_state *state, float128_t a, uint_fast8_t roundingMode, bool exact )
{
    union ui128_f128 uA;
    uint_fast64_t uiA64, uiA0;
    bool sign;
    int_fast32_t exp;
    uint_fast64_t sig64, sig0;
    int_fast32_t shiftDist;
    struct uint128 sig128;
    struct uint64_extra sigExtra;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA64 = uA.ui.v64;
    uiA0  = uA.ui.v0;
    sign  = signF128UI64( uiA64 );
    exp   = expF128UI64( uiA64 );
    sig64 = fracF128UI64( uiA64 );
    sig0  = uiA0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    shiftDist = 0x402F - exp;
    if ( shiftDist <= 0 ) {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        if ( shiftDist < -15 ) {
            softfloat_raiseFlags( state, softfloat_flag_invalid );
            return
                (exp == 0x7FFF) && (sig64 | sig0) ? ui64_fromNaN
                    : sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;
        }
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        sig64 |= UINT64_C( 0x0001000000000000 );
        if ( shiftDist ) {
            sig128 = softfloat_shortShiftLeft128( sig64, sig0, -shiftDist );
            sig64 = sig128.v64;
            sig0  = sig128.v0;
        }
    } else {
        /*--------------------------------------------------------------------
        *--------------------------------------------------------------------*/
        if ( exp ) sig64 |= UINT64_C( 0x0001000000000000 );
        sigExtra = softfloat_shiftRightJam64Extra( sig64, sig0, shiftDist );
        sig64 = sigExtra.v;
        sig0  = sigExtra.extra;
    }
    return softfloat_roundToUI64( state, sign, sig64, sig0, roundingMode, exact );

}


================================================
FILE: External/SoftFloat-3e/src/f32_to_extF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f32_to_extF80( struct softfloat_state *state, float32_t a )
{
    union ui32_f32 uA;
    uint_fast32_t uiA;
    bool sign;
    int_fast16_t exp;
    uint_fast32_t frac;
    struct commonNaN commonNaN;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    struct exp16_sig32 normExpSig;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF32UI( uiA );
    exp  = expF32UI( uiA );
    frac = fracF32UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0xFF ) {
        if ( frac ) {
            softfloat_f32UIToCommonNaN( state, uiA, &commonNaN );
            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
            uiZ64 = uiZ.v64;
            uiZ0  = uiZ.v0;
        } else {
            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
            uiZ0  = UINT64_C( 0x8000000000000000 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
        if ( ! frac ) {
            uiZ64 = packToExtF80UI64( sign, 0 );
            uiZ0  = 0;
            goto uiZ;
        }
        normExpSig = softfloat_normSubnormalF32Sig( frac );
        exp = normExpSig.exp;
        frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiZ64 = packToExtF80UI64( sign, exp + 0x3F80 );
    uiZ0  = (uint_fast64_t) (frac | 0x00800000)<<40;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f32_to_f128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t f32_to_f128( struct softfloat_state *state, float32_t a )
{
    union ui32_f32 uA;
    uint_fast32_t uiA;
    bool sign;
    int_fast16_t exp;
    uint_fast32_t frac;
    struct commonNaN commonNaN;
    struct uint128 uiZ;
    struct exp16_sig32 normExpSig;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF32UI( uiA );
    exp  = expF32UI( uiA );
    frac = fracF32UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0xFF ) {
        if ( frac ) {
            softfloat_f32UIToCommonNaN( state, uiA, &commonNaN );
            uiZ = softfloat_commonNaNToF128UI( &commonNaN );
        } else {
            uiZ.v64 = packToF128UI64( sign, 0x7FFF, 0 );
            uiZ.v0  = 0;
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
        if ( ! frac ) {
            uiZ.v64 = packToF128UI64( sign, 0, 0 );
            uiZ.v0  = 0;
            goto uiZ;
        }
        normExpSig = softfloat_normSubnormalF32Sig( frac );
        exp = normExpSig.exp - 1;
        frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiZ.v64 = packToF128UI64( sign, exp + 0x3F80, (uint_fast64_t) frac<<25 );
    uiZ.v0  = 0;
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/f64_to_extF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t f64_to_extF80( struct softfloat_state *state, float64_t a )
{
    union ui64_f64 uA;
    uint_fast64_t uiA;
    bool sign;
    int_fast16_t exp;
    uint_fast64_t frac;
    struct commonNaN commonNaN;
    struct uint128 uiZ;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    struct exp16_sig64 normExpSig;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uA.f = a;
    uiA = uA.ui;
    sign = signF64UI( uiA );
    exp  = expF64UI( uiA );
    frac = fracF64UI( uiA );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( exp == 0x7FF ) {
        if ( frac ) {
            softfloat_f64UIToCommonNaN( state, uiA, &commonNaN );
            uiZ = softfloat_commonNaNToExtF80UI( &commonNaN );
            uiZ64 = uiZ.v64;
            uiZ0  = uiZ.v0;
        } else {
            uiZ64 = packToExtF80UI64( sign, 0x7FFF );
            uiZ0  = UINT64_C( 0x8000000000000000 );
        }
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( ! exp ) {
        if ( ! frac ) {
            uiZ64 = packToExtF80UI64( sign, 0 );
            uiZ0  = 0;
            goto uiZ;
        }
        normExpSig = softfloat_normSubnormalF64Sig( frac );
        exp = normExpSig.exp;
        frac = normExpSig.sig;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    uiZ64 = packToExtF80UI64( sign, exp + 0x3C00 );
    uiZ0  = (frac | UINT64_C( 0x0010000000000000 ))<<11;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/i32_to_extF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t i32_to_extF80( int32_t a )
{
    uint_fast16_t uiZ64;
    uint_fast32_t absA;
    bool sign;
    int_fast8_t shiftDist;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    uiZ64 = 0;
    absA = 0;
    if ( a ) {
        sign = (a < 0);
        absA = sign ? -(uint_fast32_t) a : (uint_fast32_t) a;
        shiftDist = softfloat_countLeadingZeros32( absA );
        uiZ64 = packToExtF80UI64( sign, 0x401E - shiftDist );
        absA <<= shiftDist;
    }
    uZ.s.signExp = uiZ64;
    uZ.s.signif = (uint_fast64_t) absA<<32;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/i32_to_f128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

float128_t i32_to_f128( int32_t a )
{
    uint_fast64_t uiZ64;
    bool sign;
    uint_fast32_t absA;
    int_fast8_t shiftDist;
    union ui128_f128 uZ;

    uiZ64 = 0;
    if ( a ) {
        sign = (a < 0);
        absA = sign ? -(uint_fast32_t) a : (uint_fast32_t) a;
        shiftDist = softfloat_countLeadingZeros32( absA ) + 17;
        uiZ64 =
            packToF128UI64(
                sign, 0x402E - shiftDist, (uint_fast64_t) absA<<shiftDist );
    }
    uZ.ui.v64 = uiZ64;
    uZ.ui.v0  = 0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/internals.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef internals_h
#define internals_h 1

#include <stdbool.h>
#include <stdint.h>
#include "primitives.h"
#include "softfloat_types.h"

union ui16_f16 { uint16_t ui; float16_t f; };
union ui32_f32 { uint32_t ui; float32_t f; };
union ui64_f64 { uint64_t ui; float64_t f; };

#ifdef SOFTFLOAT_FAST_INT64
union extF80M_extF80 { struct extFloat80M fM; extFloat80_t f; };
union ui128_f128 { struct uint128 ui; float128_t f; };
#endif

enum {
    softfloat_mulAdd_subC    = 1,
    softfloat_mulAdd_subProd = 2
};

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
uint_fast32_t softfloat_roundToUI32( bool, uint_fast64_t, uint_fast8_t, bool );

#ifdef SOFTFLOAT_FAST_INT64
uint_fast64_t
 softfloat_roundToUI64(
     struct softfloat_state *, bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
#else
uint_fast64_t softfloat_roundMToUI64( bool, uint32_t *, uint_fast8_t, bool );
#endif

FEXCORE_PRESERVE_ALL_ATTR
int_fast32_t softfloat_roundToI32( struct softfloat_state *, bool, uint_fast64_t, uint_fast8_t, bool );

#ifdef SOFTFLOAT_FAST_INT64
FEXCORE_PRESERVE_ALL_ATTR
int_fast64_t
 softfloat_roundToI64(
     struct softfloat_state *, bool, uint_fast64_t, uint_fast64_t, uint_fast8_t, bool );
#else
int_fast64_t softfloat_roundMToI64( bool, uint32_t *, uint_fast8_t, bool );
#endif

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF16UI( a ) ((bool) ((uint16_t) (a)>>15))
#define expF16UI( a ) ((int_fast8_t) ((a)>>10) & 0x1F)
#define fracF16UI( a ) ((a) & 0x03FF)
#define packToF16UI( sign, exp, sig ) (((uint16_t) (sign)<<15) + ((uint16_t) (exp)<<10) + (sig))

#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))

struct exp8_sig16 { int_fast8_t exp; uint_fast16_t sig; };
struct exp8_sig16 softfloat_normSubnormalF16Sig( uint_fast16_t );

float16_t softfloat_roundPackToF16( bool, int_fast16_t, uint_fast16_t );
float16_t softfloat_normRoundPackToF16( bool, int_fast16_t, uint_fast16_t );

float16_t softfloat_addMagsF16( uint_fast16_t, uint_fast16_t );
float16_t softfloat_subMagsF16( uint_fast16_t, uint_fast16_t );
float16_t
 softfloat_mulAddF16(
     uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast8_t );

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF32UI( a ) ((bool) ((uint32_t) (a)>>31))
#define expF32UI( a ) ((int_fast16_t) ((a)>>23) & 0xFF)
#define fracF32UI( a ) ((a) & 0x007FFFFF)
#define packToF32UI( sign, exp, sig ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<23) + (sig))

#define isNaNF32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF))

struct exp16_sig32 { int_fast16_t exp; uint_fast32_t sig; };
FEXCORE_PRESERVE_ALL_ATTR
struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t );

FEXCORE_PRESERVE_ALL_ATTR
float32_t softfloat_roundPackToF32( struct softfloat_state *, bool, int_fast16_t, uint_fast32_t );
float32_t softfloat_normRoundPackToF32( bool, int_fast16_t, uint_fast32_t );

float32_t softfloat_addMagsF32( uint_fast32_t, uint_fast32_t );
float32_t softfloat_subMagsF32( uint_fast32_t, uint_fast32_t );
float32_t
 softfloat_mulAddF32(
     uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast8_t );

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF64UI( a ) ((bool) ((uint64_t) (a)>>63))
#define expF64UI( a ) ((int_fast16_t) ((a)>>52) & 0x7FF)
#define fracF64UI( a ) ((a) & UINT64_C( 0x000FFFFFFFFFFFFF ))
#define packToF64UI( sign, exp, sig ) ((uint64_t) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<52) + (sig)))

#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0000000000000 )) == 0) && ((a) & UINT64_C( 0x000FFFFFFFFFFFFF )))

struct exp16_sig64 { int_fast16_t exp; uint_fast64_t sig; };
FEXCORE_PRESERVE_ALL_ATTR
struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t );

FEXCORE_PRESERVE_ALL_ATTR
float64_t softfloat_roundPackToF64( struct softfloat_state *, bool, int_fast16_t, uint_fast64_t );
float64_t softfloat_normRoundPackToF64( bool, int_fast16_t, uint_fast64_t );

float64_t softfloat_addMagsF64( uint_fast64_t, uint_fast64_t, bool );
float64_t softfloat_subMagsF64( uint_fast64_t, uint_fast64_t, bool );
float64_t
 softfloat_mulAddF64(
     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signExtF80UI64( a64 ) ((bool) ((uint16_t) (a64)>>15))
#define expExtF80UI64( a64 ) ((a64) & 0x7FFF)
#define packToExtF80UI64( sign, exp ) ((uint_fast16_t) (sign)<<15 | (exp))

#define isNaNExtF80UI( a64, a0 ) ((((a64) & 0x7FFF) == 0x7FFF) && ((a0) & UINT64_C( 0x7FFFFFFFFFFFFFFF )))

#ifdef SOFTFLOAT_FAST_INT64

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/

struct exp32_sig64 { int_fast32_t exp; uint64_t sig; };
FEXCORE_PRESERVE_ALL_ATTR
struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t );

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t
 softfloat_roundPackToExtF80(
     struct softfloat_state *, bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );
FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t
 softfloat_normRoundPackToExtF80(
     struct softfloat_state *, bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast8_t );

extFloat80_t
 softfloat_addMagsExtF80(
     struct softfloat_state *, uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );
extFloat80_t
 softfloat_subMagsExtF80(
     struct softfloat_state *, uint_fast16_t, uint_fast64_t, uint_fast16_t, uint_fast64_t, bool );

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF128UI64( a64 ) ((bool) ((uint64_t) (a64)>>63))
#define expF128UI64( a64 ) ((int_fast32_t) ((a64)>>48) & 0x7FFF)
#define fracF128UI64( a64 ) ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))
#define packToF128UI64( sign, exp, sig64 ) (((uint_fast64_t) (sign)<<63) + ((uint_fast64_t) (exp)<<48) + (sig64))

#define isNaNF128UI( a64, a0 ) (((~(a64) & UINT64_C( 0x7FFF000000000000 )) == 0) && (a0 || ((a64) & UINT64_C( 0x0000FFFFFFFFFFFF ))))

struct exp32_sig128 { int_fast32_t exp; struct uint128 sig; };
FEXCORE_PRESERVE_ALL_ATTR
struct exp32_sig128
 softfloat_normSubnormalF128Sig( uint_fast64_t, uint_fast64_t );

float128_t
 softfloat_roundPackToF128(
     struct softfloat_state *,
     bool, int_fast32_t, uint_fast64_t, uint_fast64_t, uint_fast64_t );
float128_t
 softfloat_normRoundPackToF128(
     struct softfloat_state *,
     bool, int_fast32_t, uint_fast64_t, uint_fast64_t );

float128_t
 softfloat_addMagsF128(
     struct softfloat_state *,
     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
float128_t
 softfloat_subMagsF128(
     struct softfloat_state *,
     uint_fast64_t, uint_fast64_t, uint_fast64_t, uint_fast64_t, bool );
float128_t
 softfloat_mulAddF128(
     uint_fast64_t,
     uint_fast64_t,
     uint_fast64_t,
     uint_fast64_t,
     uint_fast64_t,
     uint_fast64_t,
     uint_fast8_t
 );

#else

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/

bool
 softfloat_tryPropagateNaNExtF80M(
     const struct extFloat80M *,
     const struct extFloat80M *,
     struct extFloat80M *
 );
void softfloat_invalidExtF80M( struct extFloat80M * );

int softfloat_normExtF80SigM( uint64_t * );

void
 softfloat_roundPackMToExtF80M(
     bool, int32_t, uint32_t *, uint_fast8_t, struct extFloat80M * );
void
 softfloat_normRoundPackMToExtF80M(
     bool, int32_t, uint32_t *, uint_fast8_t, struct extFloat80M * );

void
 softfloat_addExtF80M(
     const struct extFloat80M *,
     const struct extFloat80M *,
     struct extFloat80M *,
     bool
 );

int
 softfloat_compareNonnormExtF80M(
     const struct extFloat80M *, const struct extFloat80M * );

/*----------------------------------------------------------------------------
*----------------------------------------------------------------------------*/
#define signF128UI96( a96 ) ((bool) ((uint32_t) (a96)>>31))
#define expF128UI96( a96 ) ((int32_t) ((a96)>>16) & 0x7FFF)
#define fracF128UI96( a96 ) ((a96) & 0x0000FFFF)
#define packToF128UI96( sign, exp, sig96 ) (((uint32_t) (sign)<<31) + ((uint32_t) (exp)<<16) + (sig96))

bool softfloat_isNaNF128M( const uint32_t * );

bool
 softfloat_tryPropagateNaNF128M(
     const uint32_t *, const uint32_t *, uint32_t * );
void softfloat_invalidF128M( uint32_t * );

int softfloat_shiftNormSigF128M( const uint32_t *, uint_fast8_t, uint32_t * );

void softfloat_roundPackMToF128M( bool, int32_t, uint32_t *, uint32_t * );
void softfloat_normRoundPackMToF128M( bool, int32_t, uint32_t *, uint32_t * );

void
 softfloat_addF128M( const uint32_t *, const uint32_t *, uint32_t *, bool );
void
 softfloat_mulAddF128M(
     const uint32_t *,
     const uint32_t *,
     const uint32_t *,
     uint32_t *,
     uint_fast8_t
 );

#endif

#endif


================================================
FILE: External/SoftFloat-3e/src/primitives.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef primitives_h
#define primitives_h 1

#include <stdbool.h>
#include <stdint.h>
#include "primitiveTypes.h"

#ifndef softfloat_shortShiftRightJam64
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must be in
| the range 1 to 63.  If any nonzero bits are shifted off, they are "jammed"
| into the least-significant bit of the shifted value by setting the least-
| significant bit to 1.  This shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
    { return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0); }
#else
FEXCORE_PRESERVE_ALL_ATTR
uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shiftRightJam32
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero.  If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1.  This shifted-and-jammed value is returned.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
| greater than 32, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
{
    return
        (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0);
}
#else
FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist );
#endif
#endif

#ifndef softfloat_shiftRightJam64
/*----------------------------------------------------------------------------
| Shifts 'a' right by the number of bits given in 'dist', which must not
| be zero.  If any nonzero bits are shifted off, they are "jammed" into the
| least-significant bit of the shifted value by setting the least-significant
| bit to 1.  This shifted-and-jammed value is returned.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
| greater than 64, the result will be either 0 or 1, depending on whether 'a'
| is zero or nonzero.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
INLINE uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
{
    return
        (dist < 63) ? a>>dist | ((uint64_t) (a<<(-dist & 63)) != 0) : (a != 0);
}
#else
FEXCORE_PRESERVE_ALL_ATTR
uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist );
#endif
#endif

/*----------------------------------------------------------------------------
| A constant table that translates an 8-bit unsigned integer (the array index)
| into the number of leading 0 bits before the most-significant 1 of that
| integer.  For integer zero (index 0), the corresponding table element is 8.
*----------------------------------------------------------------------------*/
extern const uint_least8_t softfloat_countLeadingZeros8[256];

#ifndef softfloat_countLeadingZeros16
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'.  If 'a' is zero, 16 is returned.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE uint_fast8_t softfloat_countLeadingZeros16( uint16_t a )
{
    uint_fast8_t count = 8;
    if ( 0x100 <= a ) {
        count = 0;
        a >>= 8;
    }
    count += softfloat_countLeadingZeros8[a];
    return count;
}
#else
uint_fast8_t softfloat_countLeadingZeros16( uint16_t a );
#endif
#endif

#ifndef softfloat_countLeadingZeros32
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'.  If 'a' is zero, 32 is returned.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
INLINE uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
{
    uint_fast8_t count = 0;
    if ( a < 0x10000 ) {
        count = 16;
        a <<= 16;
    }
    if ( a < 0x1000000 ) {
        count += 8;
        a <<= 8;
    }
    count += softfloat_countLeadingZeros8[a>>24];
    return count;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
uint_fast8_t softfloat_countLeadingZeros32( uint32_t a );
#endif
#endif

#ifndef softfloat_countLeadingZeros64
/*----------------------------------------------------------------------------
| Returns the number of leading 0 bits before the most-significant 1 bit of
| 'a'.  If 'a' is zero, 64 is returned.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint_fast8_t softfloat_countLeadingZeros64( uint64_t a );
#endif

extern const uint16_t softfloat_approxRecip_1k0s[16];
extern const uint16_t softfloat_approxRecip_1k1s[16];

#ifndef softfloat_approxRecip32_1
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the number represented by 'a',
| where 'a' is interpreted as an unsigned fixed-point number with one integer
| bit and 31 fraction bits.  The 'a' input must be "normalized", meaning that
| its most-significant bit (bit 31) must be 1.  Thus, if A is the value of
| the fixed-point interpretation of 'a', then 1 <= A < 2.  The returned value
| is interpreted as a pure unsigned fraction, having no integer bits and 32
| fraction bits.  The approximation returned is never greater than the true
| reciprocal 1/A, and it differs from the true reciprocal by at most 2.006 ulp
| (units in the last place).
*----------------------------------------------------------------------------*/
#ifdef SOFTFLOAT_FAST_DIV64TO32
#define softfloat_approxRecip32_1( a ) ((uint32_t) (UINT64_C( 0x7FFFFFFFFFFFFFFF ) / (uint32_t) (a)))
#else
FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_approxRecip32_1( uint32_t a );
#endif
#endif

extern const uint16_t softfloat_approxRecipSqrt_1k0s[16];
extern const uint16_t softfloat_approxRecipSqrt_1k1s[16];

#ifndef softfloat_approxRecipSqrt32_1
/*----------------------------------------------------------------------------
| Returns an approximation to the reciprocal of the square root of the number
| represented by 'a', where 'a' is interpreted as an unsigned fixed-point
| number either with one integer bit and 31 fraction bits or with two integer
| bits and 30 fraction bits.  The format of 'a' is determined by 'oddExpA',
| which must be either 0 or 1.  If 'oddExpA' is 1, 'a' is interpreted as
| having one integer bit, and if 'oddExpA' is 0, 'a' is interpreted as having
| two integer bits.  The 'a' input must be "normalized", meaning that its
| most-significant bit (bit 31) must be 1.  Thus, if A is the value of the
| fixed-point interpretation of 'a', it follows that 1 <= A < 2 when 'oddExpA'
| is 1, and 2 <= A < 4 when 'oddExpA' is 0.
|   The returned value is interpreted as a pure unsigned fraction, having
| no integer bits and 32 fraction bits.  The approximation returned is never
| greater than the true reciprocal 1/sqrt(A), and it differs from the true
| reciprocal by at most 2.06 ulp (units in the last place).  The approximation
| returned is also always within the range 0.5 to 1; thus, the most-
| significant bit of the result is always set.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a );
#endif

#ifdef SOFTFLOAT_FAST_INT64

/*----------------------------------------------------------------------------
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is
| defined.
*----------------------------------------------------------------------------*/

#ifndef softfloat_eq128
/*----------------------------------------------------------------------------
| Returns true if the 128-bit unsigned integer formed by concatenating 'a64'
| and 'a0' is equal to the 128-bit unsigned integer formed by concatenating
| 'b64' and 'b0'.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (1 <= INLINE_LEVEL)
INLINE
bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
    { return (a64 == b64) && (a0 == b0); }
#else
bool softfloat_eq128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
#endif
#endif

#ifndef softfloat_le128
/*----------------------------------------------------------------------------
| Returns true if the 128-bit unsigned integer formed by concatenating 'a64'
| and 'a0' is less than or equal to the 128-bit unsigned integer formed by
| concatenating 'b64' and 'b0'.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
    { return (a64 < b64) || ((a64 == b64) && (a0 <= b0)); }
#else
FEXCORE_PRESERVE_ALL_ATTR
bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
#endif
#endif

#ifndef softfloat_lt128
/*----------------------------------------------------------------------------
| Returns true if the 128-bit unsigned integer formed by concatenating 'a64'
| and 'a0' is less than the 128-bit unsigned integer formed by concatenating
| 'b64' and 'b0'.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
    { return (a64 < b64) || ((a64 == b64) && (a0 < b0)); }
#else
FEXCORE_PRESERVE_ALL_ATTR
bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
#endif
#endif

#ifndef softfloat_shortShiftLeft128
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' left by the
| number of bits given in 'dist', which must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
struct uint128
 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;
    z.v64 = a64<<dist | a0>>(-dist & 63);
    z.v0 = a0<<dist;
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shortShiftRight128
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
struct uint128
 softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;
    z.v64 = a64>>dist;
    z.v0 = a64<<(-dist & 63) | a0>>dist;
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shortShiftRightJam64Extra
/*----------------------------------------------------------------------------
| This function is the same as 'softfloat_shiftRightJam64Extra' (below),
| except that 'dist' must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
struct uint64_extra
 softfloat_shortShiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast8_t dist )
{
    struct uint64_extra z;
    z.v = a>>dist;
    z.extra = a<<(-dist & 63) | (extra != 0);
    return z;
}
#else
struct uint64_extra
 softfloat_shortShiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shortShiftRightJam128
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must be in the range 1 to 63.  If any
| nonzero bits are shifted off, they are "jammed" into the least-significant
| bit of the shifted value by setting the least-significant bit to 1.  This
| shifted-and-jammed value is returned.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
INLINE
struct uint128
 softfloat_shortShiftRightJam128(
     uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    uint_fast8_t negDist = -dist;
    struct uint128 z;
    z.v64 = a64>>dist;
    z.v0 =
        a64<<(negDist & 63) | a0>>dist
            | ((uint64_t) (a0<<(negDist & 63)) != 0);
    return z;
}
#else
struct uint128
 softfloat_shortShiftRightJam128(
     uint64_t a64, uint64_t a0, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shortShiftRightJam128Extra
/*----------------------------------------------------------------------------
| This function is the same as 'softfloat_shiftRightJam128Extra' (below),
| except that 'dist' must be in the range 1 to 63.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
INLINE
struct uint128_extra
 softfloat_shortShiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist )
{
    uint_fast8_t negDist = -dist;
    struct uint128_extra z;
    z.v.v64 = a64>>dist;
    z.v.v0 = a64<<(negDist & 63) | a0>>dist;
    z.extra = a0<<(negDist & 63) | (extra != 0);
    return z;
}
#else
struct uint128_extra
 softfloat_shortShiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast8_t dist );
#endif
#endif

#ifndef softfloat_shiftRightJam64Extra
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a' and 'extra' right by 64
| _plus_ the number of bits given in 'dist', which must not be zero.  This
| shifted value is at most 64 nonzero bits and is returned in the 'v' field
| of the 'struct uint64_extra' result.  The 64-bit 'extra' field of the result
| contains a value formed as follows from the bits that were shifted off:  The
| _last_ bit shifted off is the most-significant bit of the 'extra' field, and
| the other 63 bits of the 'extra' field are all zero if and only if _all_but_
| _the_last_ bits shifted off were all zero.
|   (This function makes more sense if 'a' and 'extra' are considered to form
| an unsigned fixed-point number with binary point between 'a' and 'extra'.
| This fixed-point value is shifted right by the number of bits given in
| 'dist', and the integer part of this shifted value is returned in the 'v'
| field of the result.  The fractional part of the shifted value is modified
| as described above and returned in the 'extra' field of the result.)
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
INLINE
struct uint64_extra
 softfloat_shiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast32_t dist )
{
    struct uint64_extra z;
    if ( dist < 64 ) {
        z.v = a>>dist;
        z.extra = a<<(-dist & 63);
    } else {
        z.v = 0;
        z.extra = (dist == 64) ? a : (a != 0);
    }
    z.extra |= (extra != 0);
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint64_extra
 softfloat_shiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast32_t dist );
#endif
#endif

#ifndef softfloat_shiftRightJam128
/*----------------------------------------------------------------------------
| Shifts the 128 bits formed by concatenating 'a64' and 'a0' right by the
| number of bits given in 'dist', which must not be zero.  If any nonzero bits
| are shifted off, they are "jammed" into the least-significant bit of the
| shifted value by setting the least-significant bit to 1.  This shifted-and-
| jammed value is returned.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
| greater than 128, the result will be either 0 or 1, depending on whether the
| original 128 bits are all zeros.
*----------------------------------------------------------------------------*/
struct uint128
 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist );
#endif

#ifndef softfloat_shiftRightJam128Extra
/*----------------------------------------------------------------------------
| Shifts the 192 bits formed by concatenating 'a64', 'a0', and 'extra' right
| by 64 _plus_ the number of bits given in 'dist', which must not be zero.
| This shifted value is at most 128 nonzero bits and is returned in the 'v'
| field of the 'struct uint128_extra' result.  The 64-bit 'extra' field of the
| result contains a value formed as follows from the bits that were shifted
| off:  The _last_ bit shifted off is the most-significant bit of the 'extra'
| field, and the other 63 bits of the 'extra' field are all zero if and only
| if _all_but_the_last_ bits shifted off were all zero.
|   (This function makes more sense if 'a64', 'a0', and 'extra' are considered
| to form an unsigned fixed-point number with binary point between 'a0' and
| 'extra'.  This fixed-point value is shifted right by the number of bits
| given in 'dist', and the integer part of this shifted value is returned
| in the 'v' field of the result.  The fractional part of the shifted value
| is modified as described above and returned in the 'extra' field of the
| result.)
*----------------------------------------------------------------------------*/
struct uint128_extra
 softfloat_shiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist );
#endif

#ifndef softfloat_shiftRightJam256M
/*----------------------------------------------------------------------------
| Shifts the 256-bit unsigned integer pointed to by 'aPtr' right by the number
| of bits given in 'dist', which must not be zero.  If any nonzero bits are
| shifted off, they are "jammed" into the least-significant bit of the shifted
| value by setting the least-significant bit to 1.  This shifted-and-jammed
| value is stored at the location pointed to by 'zPtr'.  Each of 'aPtr' and
| 'zPtr' points to an array of four 64-bit elements that concatenate in the
| platform's normal endian order to form a 256-bit integer.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist'
| is greater than 256, the stored result will be either 0 or 1, depending on
| whether the original 256 bits are all zeros.
*----------------------------------------------------------------------------*/
void
 softfloat_shiftRightJam256M(
     const uint64_t *aPtr, uint_fast32_t dist, uint64_t *zPtr );
#endif

#ifndef softfloat_add128
/*----------------------------------------------------------------------------
| Returns the sum of the 128-bit integer formed by concatenating 'a64' and
| 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.  The
| addition is modulo 2^128, so any carry out is lost.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
struct uint128
 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
#endif
#endif

#ifndef softfloat_add256M
/*----------------------------------------------------------------------------
| Adds the two 256-bit integers pointed to by 'aPtr' and 'bPtr'.  The addition
| is modulo 2^256, so any carry out is lost.  The sum is stored at the
| location pointed to by 'zPtr'.  Each of 'aPtr', 'bPtr', and 'zPtr' points to
| an array of four 64-bit elements that concatenate in the platform's normal
| endian order to form a 256-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_add256M(
     const uint64_t *aPtr, const uint64_t *bPtr, uint64_t *zPtr );
#endif

#ifndef softfloat_sub128
/*----------------------------------------------------------------------------
| Returns the difference of the 128-bit integer formed by concatenating 'a64'
| and 'a0' and the 128-bit integer formed by concatenating 'b64' and 'b0'.
| The subtraction is modulo 2^128, so any borrow out (carry out) is lost.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
struct uint128
 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;
    z.v0 = a0 - b0;
    z.v64 = a64 - b64;
    z.v64 -= (a0 < b0);
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 );
#endif
#endif

#ifndef softfloat_sub256M
/*----------------------------------------------------------------------------
| Subtracts the 256-bit integer pointed to by 'bPtr' from the 256-bit integer
| pointed to by 'aPtr'.  The addition is modulo 2^256, so any borrow out
| (carry out) is lost.  The difference is stored at the location pointed to
| by 'zPtr'.  Each of 'aPtr', 'bPtr', and 'zPtr' points to an array of four
| 64-bit elements that concatenate in the platform's normal endian order to
| form a 256-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_sub256M(
     const uint64_t *aPtr, const uint64_t *bPtr, uint64_t *zPtr );
#endif

#ifndef softfloat_mul64ByShifted32To128
/*----------------------------------------------------------------------------
| Returns the 128-bit product of 'a', 'b', and 2^32.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (3 <= INLINE_LEVEL)
INLINE struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b )
{
    uint_fast64_t mid;
    struct uint128 z;
    mid = (uint_fast64_t) (uint32_t) a * b;
    z.v0 = mid<<32;
    z.v64 = (uint_fast64_t) (uint32_t) (a>>32) * b + (mid>>32);
    return z;
}
#else
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b );
#endif
#endif

#ifndef softfloat_mul64To128
/*----------------------------------------------------------------------------
| Returns the 128-bit product of 'a' and 'b'.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b );
#endif

#ifndef softfloat_mul128By32
/*----------------------------------------------------------------------------
| Returns the product of the 128-bit integer formed by concatenating 'a64' and
| 'a0', multiplied by 'b'.  The multiplication is modulo 2^128; any overflow
| bits are discarded.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (4 <= INLINE_LEVEL)
INLINE
struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b )
{
    struct uint128 z;
    uint_fast64_t mid;
    uint_fast32_t carry;
    z.v0 = a0 * b;
    mid = (uint_fast64_t) (uint32_t) (a0>>32) * b;
    carry = (uint32_t) ((uint_fast32_t) (z.v0>>32) - (uint_fast32_t) mid);
    z.v64 = a64 * b + (uint_fast32_t) ((mid + carry)>>32);
    return z;
}
#else
struct uint128 softfloat_mul128By32( uint64_t a64, uint64_t a0, uint32_t b );
#endif
#endif

#ifndef softfloat_mul128To256M
/*----------------------------------------------------------------------------
| Multiplies the 128-bit unsigned integer formed by concatenating 'a64' and
| 'a0' by the 128-bit unsigned integer formed by concatenating 'b64' and
| 'b0'.  The 256-bit product is stored at the location pointed to by 'zPtr'.
| Argument 'zPtr' points to an array of four 64-bit elements that concatenate
| in the platform's normal endian order to form a 256-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_mul128To256M(
     uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0, uint64_t *zPtr );
#endif

#else

/*----------------------------------------------------------------------------
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is not
| defined.
*----------------------------------------------------------------------------*/

#ifndef softfloat_compare96M
/*----------------------------------------------------------------------------
| Compares the two 96-bit unsigned integers pointed to by 'aPtr' and 'bPtr'.
| Returns -1 if the first integer (A) is less than the second (B); returns 0
| if the two integers are equal; and returns +1 if the first integer (A)
| is greater than the second (B).  (The result is thus the signum of A - B.)
| Each of 'aPtr' and 'bPtr' points to an array of three 32-bit elements that
| concatenate in the platform's normal endian order to form a 96-bit integer.
*----------------------------------------------------------------------------*/
int_fast8_t softfloat_compare96M( const uint32_t *aPtr, const uint32_t *bPtr );
#endif

#ifndef softfloat_compare128M
/*----------------------------------------------------------------------------
| Compares the two 128-bit unsigned integers pointed to by 'aPtr' and 'bPtr'.
| Returns -1 if the first integer (A) is less than the second (B); returns 0
| if the two integers are equal; and returns +1 if the first integer (A)
| is greater than the second (B).  (The result is thus the signum of A - B.)
| Each of 'aPtr' and 'bPtr' points to an array of four 32-bit elements that
| concatenate in the platform's normal endian order to form a 128-bit integer.
*----------------------------------------------------------------------------*/
int_fast8_t
 softfloat_compare128M( const uint32_t *aPtr, const uint32_t *bPtr );
#endif

#ifndef softfloat_shortShiftLeft64To96M
/*----------------------------------------------------------------------------
| Extends 'a' to 96 bits and shifts the value left by the number of bits given
| in 'dist', which must be in the range 1 to 31.  The result is stored at the
| location pointed to by 'zPtr'.  Argument 'zPtr' points to an array of three
| 32-bit elements that concatenate in the platform's normal endian order to
| form a 96-bit integer.
*----------------------------------------------------------------------------*/
#if defined INLINE_LEVEL && (2 <= INLINE_LEVEL)
INLINE
void
 softfloat_shortShiftLeft64To96M(
     uint64_t a, uint_fast8_t dist, uint32_t *zPtr )
{
    zPtr[indexWord( 3, 0 )] = (uint32_t) a<<dist;
    a >>= 32 - dist;
    zPtr[indexWord( 3, 2 )] = a>>32;
    zPtr[indexWord( 3, 1 )] = a;
}
#else
void
 softfloat_shortShiftLeft64To96M(
     uint64_t a, uint_fast8_t dist, uint32_t *zPtr );
#endif
#endif

#ifndef softfloat_shortShiftLeftM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' left by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
| shifted N-bit result is stored at the location pointed to by 'zPtr'.  Each
| of 'aPtr' and 'zPtr' points to a 'size_words'-long array of 32-bit elements
| that concatenate in the platform's normal endian order to form an N-bit
| integer.
*----------------------------------------------------------------------------*/
void
 softfloat_shortShiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint_fast8_t dist,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_shortShiftLeft96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft96M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 3, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shortShiftLeft128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft128M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 4, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shortShiftLeft160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftLeftM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftLeft160M( aPtr, dist, zPtr ) softfloat_shortShiftLeftM( 5, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftLeftM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' left by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must not be zero.  Any nonzero bits shifted off are lost.  The shifted
| N-bit result is stored at the location pointed to by 'zPtr'.  Each of 'aPtr'
| and 'zPtr' points to a 'size_words'-long array of 32-bit elements that
| concatenate in the platform's normal endian order to form an N-bit integer.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
| greater than N, the stored result will be 0.
*----------------------------------------------------------------------------*/
void
 softfloat_shiftLeftM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_shiftLeft96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftLeftM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_shiftLeft96M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 3, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftLeft128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftLeftM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shiftLeft128M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 4, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftLeft160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftLeftM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shiftLeft160M( aPtr, dist, zPtr ) softfloat_shiftLeftM( 5, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shortShiftRightM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' right by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must be in the range 1 to 31.  Any nonzero bits shifted off are lost.  The
| shifted N-bit result is stored at the location pointed to by 'zPtr'.  Each
| of 'aPtr' and 'zPtr' points to a 'size_words'-long array of 32-bit elements
| that concatenate in the platform's normal endian order to form an N-bit
| integer.
*----------------------------------------------------------------------------*/
void
 softfloat_shortShiftRightM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint_fast8_t dist,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_shortShiftRight128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftRightM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftRight128M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 4, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shortShiftRight160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftRightM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftRight160M( aPtr, dist, zPtr ) softfloat_shortShiftRightM( 5, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shortShiftRightJamM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' right by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must be in the range 1 to 31.  If any nonzero bits are shifted off, they are
| "jammed" into the least-significant bit of the shifted value by setting the
| least-significant bit to 1.  This shifted-and-jammed N-bit result is stored
| at the location pointed to by 'zPtr'.  Each of 'aPtr' and 'zPtr' points
| to a 'size_words'-long array of 32-bit elements that concatenate in the
| platform's normal endian order to form an N-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_shortShiftRightJamM(
     uint_fast8_t, const uint32_t *, uint_fast8_t, uint32_t * );
#endif

#ifndef softfloat_shortShiftRightJam160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shortShiftRightJamM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shortShiftRightJam160M( aPtr, dist, zPtr ) softfloat_shortShiftRightJamM( 5, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftRightM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' right by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must not be zero.  Any nonzero bits shifted off are lost.  The shifted
| N-bit result is stored at the location pointed to by 'zPtr'.  Each of 'aPtr'
| and 'zPtr' points to a 'size_words'-long array of 32-bit elements that
| concatenate in the platform's normal endian order to form an N-bit integer.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist' is
| greater than N, the stored result will be 0.
*----------------------------------------------------------------------------*/
void
 softfloat_shiftRightM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_shiftRight96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRight96M( aPtr, dist, zPtr ) softfloat_shiftRightM( 3, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftRightJamM
/*----------------------------------------------------------------------------
| Shifts the N-bit unsigned integer pointed to by 'aPtr' right by the number
| of bits given in 'dist', where N = 'size_words' * 32.  The value of 'dist'
| must not be zero.  If any nonzero bits are shifted off, they are "jammed"
| into the least-significant bit of the shifted value by setting the least-
| significant bit to 1.  This shifted-and-jammed N-bit result is stored
| at the location pointed to by 'zPtr'.  Each of 'aPtr' and 'zPtr' points
| to a 'size_words'-long array of 32-bit elements that concatenate in the
| platform's normal endian order to form an N-bit integer.
|   The value of 'dist' can be arbitrarily large.  In particular, if 'dist'
| is greater than N, the stored result will be either 0 or 1, depending on
| whether the original N bits are all zeros.
*----------------------------------------------------------------------------*/
void
 softfloat_shiftRightJamM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     uint32_t dist,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_shiftRightJam96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightJamM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRightJam96M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 3, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftRightJam128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightJamM' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRightJam128M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 4, aPtr, dist, zPtr )
#endif

#ifndef softfloat_shiftRightJam160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_shiftRightJamM' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_shiftRightJam160M( aPtr, dist, zPtr ) softfloat_shiftRightJamM( 5, aPtr, dist, zPtr )
#endif

#ifndef softfloat_addM
/*----------------------------------------------------------------------------
| Adds the two N-bit integers pointed to by 'aPtr' and 'bPtr', where N =
| 'size_words' * 32.  The addition is modulo 2^N, so any carry out is lost.
| The N-bit sum is stored at the location pointed to by 'zPtr'.  Each of
| 'aPtr', 'bPtr', and 'zPtr' points to a 'size_words'-long array of 32-bit
| elements that concatenate in the platform's normal endian order to form an
| N-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_addM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     const uint32_t *bPtr,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_add96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_addM' with 'size_words'
| = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_add96M( aPtr, bPtr, zPtr ) softfloat_addM( 3, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_add128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_addM' with 'size_words'
| = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_add128M( aPtr, bPtr, zPtr ) softfloat_addM( 4, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_add160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_addM' with 'size_words'
| = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_add160M( aPtr, bPtr, zPtr ) softfloat_addM( 5, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_addCarryM
/*----------------------------------------------------------------------------
| Adds the two N-bit unsigned integers pointed to by 'aPtr' and 'bPtr', where
| N = 'size_words' * 32, plus 'carry', which must be either 0 or 1.  The N-bit
| sum (modulo 2^N) is stored at the location pointed to by 'zPtr', and any
| carry out is returned as the result.  Each of 'aPtr', 'bPtr', and 'zPtr'
| points to a 'size_words'-long array of 32-bit elements that concatenate in
| the platform's normal endian order to form an N-bit integer.
*----------------------------------------------------------------------------*/
uint_fast8_t
 softfloat_addCarryM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     const uint32_t *bPtr,
     uint_fast8_t carry,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_addComplCarryM
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_addCarryM', except that
| the value of the unsigned integer pointed to by 'bPtr' is bit-wise completed
| before the addition.
*----------------------------------------------------------------------------*/
uint_fast8_t
 softfloat_addComplCarryM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     const uint32_t *bPtr,
     uint_fast8_t carry,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_addComplCarry96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_addComplCarryM' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_addComplCarry96M( aPtr, bPtr, carry, zPtr ) softfloat_addComplCarryM( 3, aPtr, bPtr, carry, zPtr )
#endif

#ifndef softfloat_negXM
/*----------------------------------------------------------------------------
| Replaces the N-bit unsigned integer pointed to by 'zPtr' by the
| 2s-complement of itself, where N = 'size_words' * 32.  Argument 'zPtr'
| points to a 'size_words'-long array of 32-bit elements that concatenate in
| the platform's normal endian order to form an N-bit integer.
*----------------------------------------------------------------------------*/
void softfloat_negXM( uint_fast8_t size_words, uint32_t *zPtr );
#endif

#ifndef softfloat_negX96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_negXM' with 'size_words'
| = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_negX96M( zPtr ) softfloat_negXM( 3, zPtr )
#endif

#ifndef softfloat_negX128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_negXM' with 'size_words'
| = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_negX128M( zPtr ) softfloat_negXM( 4, zPtr )
#endif

#ifndef softfloat_negX160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_negXM' with 'size_words'
| = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_negX160M( zPtr ) softfloat_negXM( 5, zPtr )
#endif

#ifndef softfloat_negX256M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_negXM' with 'size_words'
| = 8 (N = 256).
*----------------------------------------------------------------------------*/
#define softfloat_negX256M( zPtr ) softfloat_negXM( 8, zPtr )
#endif

#ifndef softfloat_sub1XM
/*----------------------------------------------------------------------------
| Subtracts 1 from the N-bit integer pointed to by 'zPtr', where N =
| 'size_words' * 32.  The subtraction is modulo 2^N, so any borrow out (carry
| out) is lost.  Argument 'zPtr' points to a 'size_words'-long array of 32-bit
| elements that concatenate in the platform's normal endian order to form an
| N-bit integer.
*----------------------------------------------------------------------------*/
void softfloat_sub1XM( uint_fast8_t size_words, uint32_t *zPtr );
#endif

#ifndef softfloat_sub1X96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_sub1XM' with 'size_words'
| = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_sub1X96M( zPtr ) softfloat_sub1XM( 3, zPtr )
#endif

#ifndef softfloat_sub1X160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_sub1XM' with 'size_words'
| = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_sub1X160M( zPtr ) softfloat_sub1XM( 5, zPtr )
#endif

#ifndef softfloat_subM
/*----------------------------------------------------------------------------
| Subtracts the two N-bit integers pointed to by 'aPtr' and 'bPtr', where N =
| 'size_words' * 32.  The subtraction is modulo 2^N, so any borrow out (carry
| out) is lost.  The N-bit difference is stored at the location pointed to by
| 'zPtr'.  Each of 'aPtr', 'bPtr', and 'zPtr' points to a 'size_words'-long
| array of 32-bit elements that concatenate in the platform's normal endian
| order to form an N-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_subM(
     uint_fast8_t size_words,
     const uint32_t *aPtr,
     const uint32_t *bPtr,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_sub96M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_subM' with 'size_words'
| = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_sub96M( aPtr, bPtr, zPtr ) softfloat_subM( 3, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_sub128M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_subM' with 'size_words'
| = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_sub128M( aPtr, bPtr, zPtr ) softfloat_subM( 4, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_sub160M
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_subM' with 'size_words'
| = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_sub160M( aPtr, bPtr, zPtr ) softfloat_subM( 5, aPtr, bPtr, zPtr )
#endif

#ifndef softfloat_mul64To128M
/*----------------------------------------------------------------------------
| Multiplies 'a' and 'b' and stores the 128-bit product at the location
| pointed to by 'zPtr'.  Argument 'zPtr' points to an array of four 32-bit
| elements that concatenate in the platform's normal endian order to form a
| 128-bit integer.
*----------------------------------------------------------------------------*/
void softfloat_mul64To128M( uint64_t a, uint64_t b, uint32_t *zPtr );
#endif

#ifndef softfloat_mul128MTo256M
/*----------------------------------------------------------------------------
| Multiplies the two 128-bit unsigned integers pointed to by 'aPtr' and
| 'bPtr', and stores the 256-bit product at the location pointed to by 'zPtr'.
| Each of 'aPtr' and 'bPtr' points to an array of four 32-bit elements that
| concatenate in the platform's normal endian order to form a 128-bit integer.
| Argument 'zPtr' points to an array of eight 32-bit elements that concatenate
| to form a 256-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_mul128MTo256M(
     const uint32_t *aPtr, const uint32_t *bPtr, uint32_t *zPtr );
#endif

#ifndef softfloat_remStepMBy32
/*----------------------------------------------------------------------------
| Performs a "remainder reduction step" as follows:  Arguments 'remPtr' and
| 'bPtr' both point to N-bit unsigned integers, where N = 'size_words' * 32.
| Defining R and B as the values of those integers, the expression (R<<'dist')
| - B * q is computed modulo 2^N, and the N-bit result is stored at the
| location pointed to by 'zPtr'.  Each of 'remPtr', 'bPtr', and 'zPtr' points
| to a 'size_words'-long array of 32-bit elements that concatenate in the
| platform's normal endian order to form an N-bit integer.
*----------------------------------------------------------------------------*/
void
 softfloat_remStepMBy32(
     uint_fast8_t size_words,
     const uint32_t *remPtr,
     uint_fast8_t dist,
     const uint32_t *bPtr,
     uint32_t q,
     uint32_t *zPtr
 );
#endif

#ifndef softfloat_remStep96MBy32
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_remStepMBy32' with
| 'size_words' = 3 (N = 96).
*----------------------------------------------------------------------------*/
#define softfloat_remStep96MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 3, remPtr, dist, bPtr, q, zPtr )
#endif

#ifndef softfloat_remStep128MBy32
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_remStepMBy32' with
| 'size_words' = 4 (N = 128).
*----------------------------------------------------------------------------*/
#define softfloat_remStep128MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 4, remPtr, dist, bPtr, q, zPtr )
#endif

#ifndef softfloat_remStep160MBy32
/*----------------------------------------------------------------------------
| This function or macro is the same as 'softfloat_remStepMBy32' with
| 'size_words' = 5 (N = 160).
*----------------------------------------------------------------------------*/
#define softfloat_remStep160MBy32( remPtr, dist, bPtr, q, zPtr ) softfloat_remStepMBy32( 5, remPtr, dist, bPtr, q, zPtr )
#endif

#endif

#endif


================================================
FILE: External/SoftFloat-3e/src/s_add128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_add128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_add128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;

    z.v0 = a0 + b0;
    z.v64 = a64 + b64 + (z.v0 < a0);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_addMagsExtF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

extFloat80_t
 softfloat_addMagsExtF80(
     struct softfloat_state *state,
     uint_fast16_t uiA64,
     uint_fast64_t uiA0,
     uint_fast16_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    uint_fast64_t sigA;
    int_fast32_t expB;
    uint_fast64_t sigB;
    int_fast32_t expDiff;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0, sigZ, sigZExtra;
    struct exp32_sig64 normExpSig;
    int_fast32_t expZ;
    struct uint64_extra sig64Extra;
    struct uint128 uiZ;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expA = expExtF80UI64( uiA64 );
    sigA = uiA0;
    expB = expExtF80UI64( uiB64 );
    sigB = uiB0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expDiff = expA - expB;
    if ( ! expDiff ) {
        if ( expA == 0x7FFF ) {
            if ( (sigA | sigB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
                goto propagateNaN;
            }
            uiZ64 = uiA64;
            uiZ0  = uiA0;
            goto uiZ;
        }
        sigZ = sigA + sigB;
        sigZExtra = 0;
        if ( ! expA ) {
            normExpSig = softfloat_normSubnormalExtF80Sig( sigZ );
            expZ = normExpSig.exp + 1;
            sigZ = normExpSig.sig;
            goto roundAndPack;
        }
        expZ = expA;
        goto shiftRight1;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( expDiff < 0 ) {
        if ( expB == 0x7FFF ) {
            if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
            uiZ64 = packToExtF80UI64( signZ, 0x7FFF );
            uiZ0  = uiB0;
            goto uiZ;
        }
        expZ = expB;
        if ( ! expA ) {
            ++expDiff;
            sigZExtra = 0;
            if ( ! expDiff ) goto newlyAligned;
        }
        sig64Extra = softfloat_shiftRightJam64Extra( sigA, 0, -expDiff );
        sigA = sig64Extra.v;
        sigZExtra = sig64Extra.extra;
    } else {
        if ( expA == 0x7FFF ) {
            if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
            uiZ64 = uiA64;
            uiZ0  = uiA0;
            goto uiZ;
        }
        expZ = expA;
        if ( ! expB ) {
            --expDiff;
            sigZExtra = 0;
            if ( ! expDiff ) goto newlyAligned;
        }
        sig64Extra = softfloat_shiftRightJam64Extra( sigB, 0, expDiff );
        sigB = sig64Extra.v;
        sigZExtra = sig64Extra.extra;
    }
 newlyAligned:
    sigZ = sigA + sigB;
    if ( sigZ & UINT64_C( 0x8000000000000000 ) ) goto roundAndPack;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 shiftRight1:
    sig64Extra = softfloat_shortShiftRightJam64Extra( sigZ, sigZExtra, 1 );
    sigZ = sig64Extra.v | UINT64_C( 0x8000000000000000 );
    sigZExtra = sig64Extra.extra;
    ++expZ;
 roundAndPack:
    return
        softfloat_roundPackToExtF80(
            state, signZ, expZ, sigZ, sigZExtra, state->roundingPrecision );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, uiB64, uiB0 );
    uiZ64 = uiZ.v64;
    uiZ0  = uiZ.v0;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_addMagsF128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"

float128_t
 softfloat_addMagsF128(
     struct softfloat_state *state,
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    struct uint128 sigA;
    int_fast32_t expB;
    struct uint128 sigB;
    int_fast32_t expDiff;
    struct uint128 uiZ, sigZ;
    int_fast32_t expZ;
    uint_fast64_t sigZExtra;
    struct uint128_extra sig128Extra;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    expDiff = expA - expB;
    if ( ! expDiff ) {
        if ( expA == 0x7FFF ) {
            if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
            uiZ.v64 = uiA64;
            uiZ.v0  = uiA0;
            goto uiZ;
        }
        sigZ = softfloat_add128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
        if ( ! expA ) {
            uiZ.v64 = packToF128UI64( signZ, 0, sigZ.v64 );
            uiZ.v0  = sigZ.v0;
            goto uiZ;
        }
        expZ = expA;
        sigZ.v64 |= UINT64_C( 0x0002000000000000 );
        sigZExtra = 0;
        goto shiftRight1;
    }
    if ( expDiff < 0 ) {
        if ( expB == 0x7FFF ) {
            if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
            uiZ.v64 = packToF128UI64( signZ, 0x7FFF, 0 );
            uiZ.v0  = 0;
            goto uiZ;
        }
        expZ = expB;
        if ( expA ) {
            sigA.v64 |= UINT64_C( 0x0001000000000000 );
        } else {
            ++expDiff;
            sigZExtra = 0;
            if ( ! expDiff ) goto newlyAligned;
        }
        sig128Extra =
            softfloat_shiftRightJam128Extra( sigA.v64, sigA.v0, 0, -expDiff );
        sigA = sig128Extra.v;
        sigZExtra = sig128Extra.extra;
    } else {
        if ( expA == 0x7FFF ) {
            if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
            uiZ.v64 = uiA64;
            uiZ.v0  = uiA0;
            goto uiZ;
        }
        expZ = expA;
        if ( expB ) {
            sigB.v64 |= UINT64_C( 0x0001000000000000 );
        } else {
            --expDiff;
            sigZExtra = 0;
            if ( ! expDiff ) goto newlyAligned;
        }
        sig128Extra =
            softfloat_shiftRightJam128Extra( sigB.v64, sigB.v0, 0, expDiff );
        sigB = sig128Extra.v;
        sigZExtra = sig128Extra.extra;
    }
 newlyAligned:
    sigZ =
        softfloat_add128(
            sigA.v64 | UINT64_C( 0x0001000000000000 ),
            sigA.v0,
            sigB.v64,
            sigB.v0
        );
    --expZ;
    if ( sigZ.v64 < UINT64_C( 0x0002000000000000 ) ) goto roundAndPack;
    ++expZ;
 shiftRight1:
    sig128Extra =
        softfloat_shortShiftRightJam128Extra(
            sigZ.v64, sigZ.v0, sigZExtra, 1 );
    sigZ = sig128Extra.v;
    sigZExtra = sig128Extra.extra;
 roundAndPack:
    return
        softfloat_roundPackToF128( state, signZ, expZ, sigZ.v64, sigZ.v0, sigZExtra );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_approxRecip32_1.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_approxRecip32_1

extern const uint16_t softfloat_approxRecip_1k0s[16];
extern const uint16_t softfloat_approxRecip_1k1s[16];

FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_approxRecip32_1( uint32_t a )
{
    int index;
    uint16_t eps, r0;
    uint32_t sigma0;
    uint_fast32_t r;
    uint32_t sqrSigma0;

    index = a>>27 & 0xF;
    eps = (uint16_t) (a>>11);
    r0 = softfloat_approxRecip_1k0s[index]
             - ((softfloat_approxRecip_1k1s[index] * (uint_fast32_t) eps)>>20);
    sigma0 = ~(uint_fast32_t) ((r0 * (uint_fast64_t) a)>>7);
    r = ((uint_fast32_t) r0<<16) + ((r0 * (uint_fast64_t) sigma0)>>24);
    sqrSigma0 = ((uint_fast64_t) sigma0 * sigma0)>>32;
    r += ((uint32_t) r * (uint_fast64_t) sqrSigma0)>>48;
    return r;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_approxRecipSqrt32_1.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_approxRecipSqrt32_1

extern const uint16_t softfloat_approxRecipSqrt_1k0s[];
extern const uint16_t softfloat_approxRecipSqrt_1k1s[];

FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_approxRecipSqrt32_1( unsigned int oddExpA, uint32_t a )
{
    int index;
    uint16_t eps, r0;
    uint_fast32_t ESqrR0;
    uint32_t sigma0;
    uint_fast32_t r;
    uint32_t sqrSigma0;

    index = (a>>27 & 0xE) + oddExpA;
    eps = (uint16_t) (a>>12);
    r0 = softfloat_approxRecipSqrt_1k0s[index]
             - ((softfloat_approxRecipSqrt_1k1s[index] * (uint_fast32_t) eps)
                    >>20);
    ESqrR0 = (uint_fast32_t) r0 * r0;
    if ( ! oddExpA ) ESqrR0 <<= 1;
    sigma0 = ~(uint_fast32_t) (((uint32_t) ESqrR0 * (uint_fast64_t) a)>>23);
    r = ((uint_fast32_t) r0<<16) + ((r0 * (uint_fast64_t) sigma0)>>25);
    sqrSigma0 = ((uint_fast64_t) sigma0 * sigma0)>>32;
    r += ((uint32_t) ((r>>1) + (r>>3) - ((uint_fast32_t) r0<<14))
              * (uint_fast64_t) sqrSigma0)
             >>48;
    if ( ! (r & 0x80000000) ) r = 0x80000000;
    return r;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_approxRecipSqrt_1Ks.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"

const uint16_t softfloat_approxRecipSqrt_1k0s[16] = {
    0xB4C9, 0xFFAB, 0xAA7D, 0xF11C, 0xA1C5, 0xE4C7, 0x9A43, 0xDA29,
    0x93B5, 0xD0E5, 0x8DED, 0xC8B7, 0x88C6, 0xC16D, 0x8424, 0xBAE1
};
const uint16_t softfloat_approxRecipSqrt_1k1s[16] = {
    0xA5A5, 0xEA42, 0x8C21, 0xC62D, 0x788F, 0xAA7F, 0x6928, 0x94B6,
    0x5CC7, 0x8335, 0x52A6, 0x74E2, 0x4A3E, 0x68FE, 0x432B, 0x5EFD
};


================================================
FILE: External/SoftFloat-3e/src/s_approxRecip_1Ks.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"

const uint16_t softfloat_approxRecip_1k0s[16] = {
    0xFFC4, 0xF0BE, 0xE363, 0xD76F, 0xCCAD, 0xC2F0, 0xBA16, 0xB201,
    0xAA97, 0xA3C6, 0x9D7A, 0x97A6, 0x923C, 0x8D32, 0x887E, 0x8417
};
const uint16_t softfloat_approxRecip_1k1s[16] = {
    0xF0F1, 0xD62C, 0xBFA1, 0xAC77, 0x9C0A, 0x8DDB, 0x8185, 0x76BA,
    0x6D3B, 0x64D4, 0x5D5C, 0x56B1, 0x50B6, 0x4B55, 0x4679, 0x4211
};


================================================
FILE: External/SoftFloat-3e/src/s_commonNaNToExtF80UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"
#include "specialize.h"

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into an 80-bit extended
| floating-point NaN, and returns the bit pattern of this value as an unsigned
| integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr )
{
    struct uint128 uiZ;

    uiZ.v64 = (uint_fast16_t) aPtr->sign<<15 | 0x7FFF;
    uiZ.v0 = UINT64_C( 0xC000000000000000 ) | aPtr->v64>>1;
    return uiZ;

}


================================================
FILE: External/SoftFloat-3e/src/s_commonNaNToF128UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"
#include "specialize.h"

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 128-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN *aPtr )
{
    struct uint128 uiZ;

    uiZ = softfloat_shortShiftRight128( aPtr->v64, aPtr->v0, 16 );
    uiZ.v64 |= (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FFF800000000000 );
    return uiZ;

}


================================================
FILE: External/SoftFloat-3e/src/s_commonNaNToF32UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "specialize.h"

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr )
{

    return (uint_fast32_t) aPtr->sign<<31 | 0x7FC00000 | aPtr->v64>>41;

}


================================================
FILE: External/SoftFloat-3e/src/s_commonNaNToF64UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "specialize.h"

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by `aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr )
{

    return
        (uint_fast64_t) aPtr->sign<<63 | UINT64_C( 0x7FF8000000000000 )
            | aPtr->v64>>12;

}


================================================
FILE: External/SoftFloat-3e/src/s_countLeadingZeros32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_countLeadingZeros32

#define softfloat_countLeadingZeros32 softfloat_countLeadingZeros32
#include "primitives.h"

FEXCORE_PRESERVE_ALL_ATTR
uint_fast8_t softfloat_countLeadingZeros32( uint32_t a )
{
    uint_fast8_t count;

    count = 0;
    if ( a < 0x10000 ) {
        count = 16;
        a <<= 16;
    }
    if ( a < 0x1000000 ) {
        count += 8;
        a <<= 8;
    }
    count += softfloat_countLeadingZeros8[a>>24];
    return count;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_countLeadingZeros64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_countLeadingZeros64

#define softfloat_countLeadingZeros64 softfloat_countLeadingZeros64
#include "primitives.h"

FEXCORE_PRESERVE_ALL_ATTR
uint_fast8_t softfloat_countLeadingZeros64( uint64_t a )
{
    uint_fast8_t count;
    uint32_t a32;

    count = 0;
    a32 = a>>32;
    if ( ! a32 ) {
        count = 32;
        a32 = a;
    }
    /*------------------------------------------------------------------------
    | From here, result is current count + count leading zeros of `a32'.
    *------------------------------------------------------------------------*/
    if ( a32 < 0x10000 ) {
        count += 16;
        a32 <<= 16;
    }
    if ( a32 < 0x1000000 ) {
        count += 8;
        a32 <<= 8;
    }
    count += softfloat_countLeadingZeros8[a32>>24];
    return count;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_countLeadingZeros8.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"

const uint_least8_t softfloat_countLeadingZeros8[256] = {
    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};


================================================
FILE: External/SoftFloat-3e/src/s_extF80UIToCommonNaN.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
| has the bit pattern of an 80-bit extended floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void
 softfloat_extF80UIToCommonNaN(
     struct softfloat_state *state, uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
{

    if ( softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
    }
    zPtr->sign = uiA64>>15;
    zPtr->v64  = uiA0<<1;
    zPtr->v0   = 0;

}


================================================
FILE: External/SoftFloat-3e/src/s_f128UIToCommonNaN.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitives.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Assuming the unsigned integer formed from concatenating `uiA64' and `uiA0'
| has the bit pattern of a 128-bit floating-point NaN, converts this NaN to
| the common NaN form, and stores the resulting common NaN at the location
| pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid exception
| is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void
 softfloat_f128UIToCommonNaN(
     struct softfloat_state *state, uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr )
{
    struct uint128 NaNSig;

    if ( softfloat_isSigNaNF128UI( uiA64, uiA0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
    }
    NaNSig = softfloat_shortShiftLeft128( uiA64, uiA0, 16 );
    zPtr->sign = uiA64>>63;
    zPtr->v64  = NaNSig.v64;
    zPtr->v0   = NaNSig.v0;

}


================================================
FILE: External/SoftFloat-3e/src/s_f32UIToCommonNaN.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_f32UIToCommonNaN( struct softfloat_state *state, uint_fast32_t uiA, struct commonNaN *zPtr )
{

    if ( softfloat_isSigNaNF32UI( uiA ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>31;
    zPtr->v64  = (uint_fast64_t) uiA<<41;
    zPtr->v0   = 0;

}


================================================
FILE: External/SoftFloat-3e/src/s_f64UIToCommonNaN.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Assuming `uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by `zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_f64UIToCommonNaN( struct softfloat_state *state, uint_fast64_t uiA, struct commonNaN *zPtr )
{

    if ( softfloat_isSigNaNF64UI( uiA ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
    }
    zPtr->sign = uiA>>63;
    zPtr->v64  = uiA<<12;
    zPtr->v0   = 0;

}


================================================
FILE: External/SoftFloat-3e/src/s_le128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"

#ifndef softfloat_le128

FEXCORE_PRESERVE_ALL_ATTR
bool softfloat_le128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{

    return (a64 < b64) || ((a64 == b64) && (a0 <= b0));

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_lt128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"

#ifndef softfloat_lt128

FEXCORE_PRESERVE_ALL_ATTR
bool softfloat_lt128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{

    return (a64 < b64) || ((a64 == b64) && (a0 < b0));

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_mul64ByShifted32To128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_mul64ByShifted32To128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_mul64ByShifted32To128( uint64_t a, uint32_t b )
{
    uint_fast64_t mid;
    struct uint128 z;

    mid = (uint_fast64_t) (uint32_t) a * b;
    z.v0 = mid<<32;
    z.v64 = (uint_fast64_t) (uint32_t) (a>>32) * b + (mid>>32);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_mul64To128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_mul64To128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_mul64To128( uint64_t a, uint64_t b )
{
    uint32_t a32, a0, b32, b0;
    struct uint128 z;
    uint64_t mid1, mid;

    a32 = a>>32;
    a0 = a;
    b32 = b>>32;
    b0 = b;
    z.v0 = (uint_fast64_t) a0 * b0;
    mid1 = (uint_fast64_t) a32 * b0;
    mid = mid1 + (uint_fast64_t) a0 * b32;
    z.v64 = (uint_fast64_t) a32 * b32;
    z.v64 += (uint_fast64_t) (mid < mid1)<<32 | mid>>32;
    mid <<= 32;
    z.v0 += mid;
    z.v64 += (z.v0 < mid);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_normRoundPackToExtF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t
 softfloat_normRoundPackToExtF80(
     struct softfloat_state *state,
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig,
     uint_fast64_t sigExtra,
     uint_fast8_t roundingPrecision
 )
{
    int_fast8_t shiftDist;
    struct uint128 sig128;

    if ( ! sig ) {
        exp -= 64;
        sig = sigExtra;
        sigExtra = 0;
    }
    shiftDist = softfloat_countLeadingZeros64( sig );
    exp -= shiftDist;
    if ( shiftDist ) {
        sig128 = softfloat_shortShiftLeft128( sig, sigExtra, shiftDist );
        sig = sig128.v64;
        sigExtra = sig128.v0;
    }
    return
        softfloat_roundPackToExtF80(
            state, sign, exp, sig, sigExtra, roundingPrecision );

}


================================================
FILE: External/SoftFloat-3e/src/s_normRoundPackToF128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"

float128_t
 softfloat_normRoundPackToF128(
     struct softfloat_state *state,
     bool sign, int_fast32_t exp, uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct uint128 sig128;
    union ui128_f128 uZ;
    uint_fast64_t sigExtra;
    struct uint128_extra sig128Extra;

    if ( ! sig64 ) {
        exp -= 64;
        sig64 = sig0;
        sig0 = 0;
    }
    shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
    exp -= shiftDist;
    if ( 0 <= shiftDist ) {
        if ( shiftDist ) {
            sig128 = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
            sig64 = sig128.v64;
            sig0  = sig128.v0;
        }
        if ( (uint32_t) exp < 0x7FFD ) {
            uZ.ui.v64 = packToF128UI64( sign, sig64 | sig0 ? exp : 0, sig64 );
            uZ.ui.v0  = sig0;
            return uZ.f;
        }
        sigExtra = 0;
    } else {
        sig128Extra =
            softfloat_shortShiftRightJam128Extra( sig64, sig0, 0, -shiftDist );
        sig64 = sig128Extra.v.v64;
        sig0  = sig128Extra.v.v0;
        sigExtra = sig128Extra.extra;
    }
    return softfloat_roundPackToF128( state, sign, exp, sig64, sig0, sigExtra );

}


================================================
FILE: External/SoftFloat-3e/src/s_normSubnormalExtF80Sig.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"

FEXCORE_PRESERVE_ALL_ATTR
struct exp32_sig64 softfloat_normSubnormalExtF80Sig( uint_fast64_t sig )
{
    int_fast8_t shiftDist;
    struct exp32_sig64 z;

    shiftDist = softfloat_countLeadingZeros64( sig );
    z.exp = -shiftDist;
    z.sig = sig<<shiftDist;
    return z;

}


================================================
FILE: External/SoftFloat-3e/src/s_normSubnormalF128Sig.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"

FEXCORE_PRESERVE_ALL_ATTR
struct exp32_sig128
 softfloat_normSubnormalF128Sig( uint_fast64_t sig64, uint_fast64_t sig0 )
{
    int_fast8_t shiftDist;
    struct exp32_sig128 z;

    if ( ! sig64 ) {
        shiftDist = softfloat_countLeadingZeros64( sig0 ) - 15;
        z.exp = -63 - shiftDist;
        if ( shiftDist < 0 ) {
            z.sig.v64 = sig0>>-shiftDist;
            z.sig.v0  = sig0<<(shiftDist & 63);
        } else {
            z.sig.v64 = sig0<<shiftDist;
            z.sig.v0  = 0;
        }
    } else {
        shiftDist = softfloat_countLeadingZeros64( sig64 ) - 15;
        z.exp = 1 - shiftDist;
        z.sig = softfloat_shortShiftLeft128( sig64, sig0, shiftDist );
    }
    return z;

}


================================================
FILE: External/SoftFloat-3e/src/s_normSubnormalF128SigM.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"

int softfloat_normSubnormalF128SigM( uint32_t *sigPtr )
{
    const uint32_t *ptr;
    int_fast16_t shiftDist;
    uint32_t wordSig;

    ptr = sigPtr + indexWordHi( 4 );
    shiftDist = 0;
    for (;;) {
        wordSig = *ptr;
        if ( wordSig ) break;
        shiftDist += 32;
        if ( 128 <= shiftDist ) return 1;
        ptr -= wordIncr;
    }
    shiftDist += softfloat_countLeadingZeros32( wordSig ) - 15;
    if ( shiftDist ) softfloat_shiftLeft128M( sigPtr, shiftDist, sigPtr );
    return 1 - shiftDist;

}


================================================
FILE: External/SoftFloat-3e/src/s_normSubnormalF32Sig.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"

FEXCORE_PRESERVE_ALL_ATTR
struct exp16_sig32 softfloat_normSubnormalF32Sig( uint_fast32_t sig )
{
    int_fast8_t shiftDist;
    struct exp16_sig32 z;

    shiftDist = softfloat_countLeadingZeros32( sig ) - 8;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;

}


================================================
FILE: External/SoftFloat-3e/src/s_normSubnormalF64Sig.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"

FEXCORE_PRESERVE_ALL_ATTR
struct exp16_sig64 softfloat_normSubnormalF64Sig( uint_fast64_t sig )
{
    int_fast8_t shiftDist;
    struct exp16_sig64 z;

    shiftDist = softfloat_countLeadingZeros64( sig ) - 11;
    z.exp = 1 - shiftDist;
    z.sig = sig<<shiftDist;
    return z;

}


================================================
FILE: External/SoftFloat-3e/src/s_propagateNaNExtF80UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2018 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
| 80-bit extended floating-point value, and assuming at least on of these
| floating-point values is a NaN, returns the bit pattern of the combined NaN
| result.  If either original floating-point value is a signaling NaN, the
| invalid exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_propagateNaNExtF80UI(
     struct softfloat_state *state,
     uint_fast16_t uiA64,
     uint_fast64_t uiA0,
     uint_fast16_t uiB64,
     uint_fast64_t uiB0
 )
{
    bool isSigNaNA, isSigNaNB;
    uint_fast64_t uiNonsigA0, uiNonsigB0;
    uint_fast16_t uiMagA64, uiMagB64;
    struct uint128 uiZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    isSigNaNA = softfloat_isSigNaNExtF80UI( uiA64, uiA0 );
    isSigNaNB = softfloat_isSigNaNExtF80UI( uiB64, uiB0 );
    /*------------------------------------------------------------------------
    | Make NaNs non-signaling.
    *------------------------------------------------------------------------*/
    uiNonsigA0 = uiA0 | UINT64_C( 0xC000000000000000 );
    uiNonsigB0 = uiB0 | UINT64_C( 0xC000000000000000 );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( isSigNaNA | isSigNaNB ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        if ( isSigNaNA ) {
            if ( isSigNaNB ) goto returnLargerMag;
            if ( isNaNExtF80UI( uiB64, uiB0 ) ) goto returnB;
            goto returnA;
        } else {
            if ( isNaNExtF80UI( uiA64, uiA0 ) ) goto returnA;
            goto returnB;
        }
    }
 returnLargerMag:
    uiMagA64 = uiA64 & 0x7FFF;
    uiMagB64 = uiB64 & 0x7FFF;
    if ( uiMagA64 < uiMagB64 ) goto returnB;
    if ( uiMagB64 < uiMagA64 ) goto returnA;
    if ( uiA0 < uiB0 ) goto returnB;
    if ( uiB0 < uiA0 ) goto returnA;
    if ( uiA64 < uiB64 ) goto returnA;
 returnB:
    uiZ.v64 = uiB64;
    uiZ.v0  = uiNonsigB0;
    return uiZ;
 returnA:
    uiZ.v64 = uiA64;
    uiZ.v0  = uiNonsigA0;
    return uiZ;

}


================================================
FILE: External/SoftFloat-3e/src/s_propagateNaNF128UI.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Interpreting the unsigned integer formed from concatenating `uiA64' and
| `uiA0' as a 128-bit floating-point value, and likewise interpreting the
| unsigned integer formed from concatenating `uiB64' and `uiB0' as another
| 128-bit floating-point value, and assuming at least on of these floating-
| point values is a NaN, returns the bit pattern of the combined NaN result.
| If either original floating-point value is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
struct uint128
 softfloat_propagateNaNF128UI(
     struct softfloat_state *state,
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0
 )
{
    bool isSigNaNA;
    struct uint128 uiZ;

    isSigNaNA = softfloat_isSigNaNF128UI( uiA64, uiA0 );
    if ( isSigNaNA || softfloat_isSigNaNF128UI( uiB64, uiB0 ) ) {
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        if ( isSigNaNA ) goto returnNonsigA;
    }
    if ( isNaNF128UI( uiA64, uiA0 ) ) {
 returnNonsigA:
        uiZ.v64 = uiA64;
        uiZ.v0  = uiA0;
    } else {
        uiZ.v64 = uiB64;
        uiZ.v0  = uiB0;
    }
    uiZ.v64 |= UINT64_C( 0x0000800000000000 );
    return uiZ;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundPackToExtF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t
 softfloat_roundPackToExtF80(
     struct softfloat_state *state,
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig,
     uint_fast64_t sigExtra,
     uint_fast8_t roundingPrecision
 )
{
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint_fast64_t roundIncrement, roundMask, roundBits;
    bool isTiny, doIncrement;
    struct uint64_extra sig64Extra;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = state->roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    if ( roundingPrecision == 80 ) goto precision80;
    if ( roundingPrecision == 64 ) {
        roundIncrement = UINT64_C( 0x0000000000000400 );
        roundMask = UINT64_C( 0x00000000000007FF );
    } else if ( roundingPrecision == 32 ) {
        roundIncrement = UINT64_C( 0x0000008000000000 );
        roundMask = UINT64_C( 0x000000FFFFFFFFFF );
    } else {
        goto precision80;
    }
    sig |= (sigExtra != 0);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        roundIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                ? roundMask
                : 0;
    }
    roundBits = sig & roundMask;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
        if ( exp <= 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                   (state->detectTininess
                        == softfloat_tininess_beforeRounding)
                || (exp < 0)
                || (sig <= (uint64_t) (sig + roundIncrement));
            sig = softfloat_shiftRightJam64( sig, 1 - exp );
            roundBits = sig & roundMask;
            if ( roundBits ) {
                if ( isTiny ) softfloat_raiseFlags( state, softfloat_flag_underflow );
                state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
                if ( roundingMode == softfloat_round_odd ) {
                    sig |= roundMask + 1;
                }
#endif
            }
            sig += roundIncrement;
            exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
            roundIncrement = roundMask + 1;
            if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
                roundMask |= roundIncrement;
            }
            sig &= ~roundMask;
            goto packReturn;
        }
        if (
               (0x7FFE < exp)
            || ((exp == 0x7FFE) && ((uint64_t) (sig + roundIncrement) < sig))
        ) {
            goto overflow;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( roundBits ) {
        state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig = (sig & ~roundMask) | (roundMask + 1);
            goto packReturn;
        }
#endif
    }
    sig = (uint64_t) (sig + roundIncrement);
    if ( sig < roundIncrement ) {
        ++exp;
        sig = UINT64_C( 0x8000000000000000 );
    }
    roundIncrement = roundMask + 1;
    if ( roundNearEven && (roundBits<<1 == roundIncrement) ) {
        roundMask |= roundIncrement;
    }
    sig &= ~roundMask;
    goto packReturn;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 precision80:
    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        doIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                && sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) (exp - 1) ) {
        if ( exp <= 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                   (state->detectTininess
                        == softfloat_tininess_beforeRounding)
                || (exp < 0)
                || ! doIncrement
                || (sig < UINT64_C( 0xFFFFFFFFFFFFFFFF ));
            sig64Extra =
                softfloat_shiftRightJam64Extra( sig, sigExtra, 1 - exp );
            exp = 0;
            sig = sig64Extra.v;
            sigExtra = sig64Extra.extra;
            if ( sigExtra ) {
                if ( isTiny ) softfloat_raiseFlags( state, softfloat_flag_underflow );
                state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
                if ( roundingMode == softfloat_round_odd ) {
                    sig |= 1;
                    goto packReturn;
                }
#endif
            }
            doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
            if (
                ! roundNearEven
                    && (roundingMode != softfloat_round_near_maxMag)
            ) {
                doIncrement =
                    (roundingMode
                         == (sign ? softfloat_round_min : softfloat_round_max))
                        && sigExtra;
            }
            if ( doIncrement ) {
                ++sig;
                sig &=
                    ~(uint_fast64_t)
                         (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                              & roundNearEven);
                exp = ((sig & UINT64_C( 0x8000000000000000 )) != 0);
            }
            goto packReturn;
        }
        if (
               (0x7FFE < exp)
            || ((exp == 0x7FFE) && (sig == UINT64_C( 0xFFFFFFFFFFFFFFFF ))
                    && doIncrement)
        ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            roundMask = 0;
 overflow:
            softfloat_raiseFlags(
                state, softfloat_flag_overflow | softfloat_flag_inexact );
            if (
                   roundNearEven
                || (roundingMode == softfloat_round_near_maxMag)
                || (roundingMode
                        == (sign ? softfloat_round_min : softfloat_round_max))
            ) {
                exp = 0x7FFF;
                sig = UINT64_C( 0x8000000000000000 );
            } else {
                exp = 0x7FFE;
                sig = ~roundMask;
            }
            goto packReturn;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( sigExtra ) {
        state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig |= 1;
            goto packReturn;
        }
#endif
    }
    if ( doIncrement ) {
        ++sig;
        if ( ! sig ) {
            ++exp;
            sig = UINT64_C( 0x8000000000000000 );
        } else {
            sig &=
                ~(uint_fast64_t)
                     (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                          & roundNearEven);
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 packReturn:
    uZ.s.signExp = packToExtF80UI64( sign, exp );
    uZ.s.signif = sig;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundPackToF128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

float128_t
 softfloat_roundPackToF128(
     struct softfloat_state *state,
     bool sign,
     int_fast32_t exp,
     uint_fast64_t sig64,
     uint_fast64_t sig0,
     uint_fast64_t sigExtra
 )
{
    uint_fast8_t roundingMode;
    bool roundNearEven, doIncrement, isTiny;
    struct uint128_extra sig128Extra;
    uint_fast64_t uiZ64, uiZ0;
    struct uint128 sig128;
    union ui128_f128 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = state->roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        doIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                && sigExtra;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FFD <= (uint32_t) exp ) {
        if ( exp < 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                   (state->detectTininess
                        == softfloat_tininess_beforeRounding)
                || (exp < -1)
                || ! doIncrement
                || softfloat_lt128(
                       sig64,
                       sig0,
                       UINT64_C( 0x0001FFFFFFFFFFFF ),
                       UINT64_C( 0xFFFFFFFFFFFFFFFF )
                   );
            sig128Extra =
                softfloat_shiftRightJam128Extra( sig64, sig0, sigExtra, -exp );
            sig64 = sig128Extra.v.v64;
            sig0  = sig128Extra.v.v0;
            sigExtra = sig128Extra.extra;
            exp = 0;
            if ( isTiny && sigExtra ) {
                softfloat_raiseFlags( state, softfloat_flag_underflow );
            }
            doIncrement = (UINT64_C( 0x8000000000000000 ) <= sigExtra);
            if (
                   ! roundNearEven
                && (roundingMode != softfloat_round_near_maxMag)
            ) {
                doIncrement =
                    (roundingMode
                         == (sign ? softfloat_round_min : softfloat_round_max))
                        && sigExtra;
            }
        } else if (
               (0x7FFD < exp)
            || ((exp == 0x7FFD)
                    && softfloat_eq128( 
                           sig64,
                           sig0,
                           UINT64_C( 0x0001FFFFFFFFFFFF ),
                           UINT64_C( 0xFFFFFFFFFFFFFFFF )
                       )
                    && doIncrement)
        ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            softfloat_raiseFlags(
                state, softfloat_flag_overflow | softfloat_flag_inexact );
            if (
                   roundNearEven
                || (roundingMode == softfloat_round_near_maxMag)
                || (roundingMode
                        == (sign ? softfloat_round_min : softfloat_round_max))
            ) {
                uiZ64 = packToF128UI64( sign, 0x7FFF, 0 );
                uiZ0  = 0;
            } else {
                uiZ64 =
                    packToF128UI64(
                        sign, 0x7FFE, UINT64_C( 0x0000FFFFFFFFFFFF ) );
                uiZ0 = UINT64_C( 0xFFFFFFFFFFFFFFFF );
            }
            goto uiZ;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( sigExtra ) {
        state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig0 |= 1;
            goto packReturn;
        }
#endif
    }
    if ( doIncrement ) {
        sig128 = softfloat_add128( sig64, sig0, 0, 1 );
        sig64 = sig128.v64;
        sig0 =
            sig128.v0
                & ~(uint64_t)
                       (! (sigExtra & UINT64_C( 0x7FFFFFFFFFFFFFFF ))
                            & roundNearEven);
    } else {
        if ( ! (sig64 | sig0) ) exp = 0;
    }
    /*------------------------------------------------------------------------
     *------------------------------------------------------------------------*/
    uiZ64 = packToF128UI64( sign, exp, sig64 );
    uiZ0  = sig0;
 uiZ:
    uZ.ui.v64 = uiZ64;
    uZ.ui.v0  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundPackToF32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
float32_t
 softfloat_roundPackToF32( struct softfloat_state *state, bool sign, int_fast16_t exp, uint_fast32_t sig )
{
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint_fast8_t roundIncrement, roundBits;
    bool isTiny;
    uint_fast32_t uiZ;
    union ui32_f32 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = state->roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    roundIncrement = 0x40;
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        roundIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                ? 0x7F
                : 0;
    }
    roundBits = sig & 0x7F;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0xFD <= (unsigned int) exp ) {
        if ( exp < 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                (state->detectTininess == softfloat_tininess_beforeRounding)
                    || (exp < -1) || (sig + roundIncrement < 0x80000000);
            sig = softfloat_shiftRightJam32( sig, -exp );
            exp = 0;
            roundBits = sig & 0x7F;
            if ( isTiny && roundBits ) {
                softfloat_raiseFlags( state, softfloat_flag_underflow );
            }
        } else if ( (0xFD < exp) || (0x80000000 <= sig + roundIncrement) ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            softfloat_raiseFlags(
                state, softfloat_flag_overflow | softfloat_flag_inexact );
            uiZ = packToF32UI( sign, 0xFF, 0 ) - ! roundIncrement;
            goto uiZ;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig = (sig + roundIncrement)>>7;
    if ( roundBits ) {
        state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig |= 1;
            goto packReturn;
        }
#endif
    }
    sig &= ~(uint_fast32_t) (! (roundBits ^ 0x40) & roundNearEven);
    if ( ! sig ) exp = 0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
#ifdef SOFTFLOAT_ROUND_ODD
 packReturn:
#endif
    uiZ = packToF32UI( sign, exp, sig );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundPackToF64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2017 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
float64_t
 softfloat_roundPackToF64( struct softfloat_state *state, bool sign, int_fast16_t exp, uint_fast64_t sig )
{
    uint_fast8_t roundingMode;
    bool roundNearEven;
    uint_fast16_t roundIncrement, roundBits;
    bool isTiny;
    uint_fast64_t uiZ;
    union ui64_f64 uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundingMode = state->roundingMode;
    roundNearEven = (roundingMode == softfloat_round_near_even);
    roundIncrement = 0x200;
    if ( ! roundNearEven && (roundingMode != softfloat_round_near_maxMag) ) {
        roundIncrement =
            (roundingMode
                 == (sign ? softfloat_round_min : softfloat_round_max))
                ? 0x3FF
                : 0;
    }
    roundBits = sig & 0x3FF;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if ( 0x7FD <= (uint16_t) exp ) {
        if ( exp < 0 ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            isTiny =
                (state->detectTininess == softfloat_tininess_beforeRounding)
                    || (exp < -1)
                    || (sig + roundIncrement < UINT64_C( 0x8000000000000000 ));
            sig = softfloat_shiftRightJam64( sig, -exp );
            exp = 0;
            roundBits = sig & 0x3FF;
            if ( isTiny && roundBits ) {
                softfloat_raiseFlags( state, softfloat_flag_underflow );
            }
        } else if (
            (0x7FD < exp)
                || (UINT64_C( 0x8000000000000000 ) <= sig + roundIncrement)
        ) {
            /*----------------------------------------------------------------
            *----------------------------------------------------------------*/
            softfloat_raiseFlags(
                state, softfloat_flag_overflow | softfloat_flag_inexact );
            uiZ = packToF64UI( sign, 0x7FF, 0 ) - ! roundIncrement;
            goto uiZ;
        }
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    sig = (sig + roundIncrement)>>10;
    if ( roundBits ) {
        state->exceptionFlags |= softfloat_flag_inexact;
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) {
            sig |= 1;
            goto packReturn;
        }
#endif
    }
    sig &= ~(uint_fast64_t) (! (roundBits ^ 0x200) & roundNearEven);
    if ( ! sig ) exp = 0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
#ifdef SOFTFLOAT_ROUND_ODD
 packReturn:
#endif
    uiZ = packToF64UI( sign, exp, sig );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundToI32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
int_fast32_t
 softfloat_roundToI32(
     struct softfloat_state *state, bool sign, uint_fast64_t sig, uint_fast8_t roundingMode, bool exact )
{
    uint_fast16_t roundIncrement, roundBits;
    uint_fast32_t sig32;
    union { uint32_t ui; int32_t i; } uZ;
    int_fast32_t z;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    roundIncrement = 0x800;
    if (
        (roundingMode != softfloat_round_near_maxMag)
            && (roundingMode != softfloat_round_near_even)
    ) {
        roundIncrement = 0;
        if ( 
            sign
                ? (roundingMode == softfloat_round_min)
#ifdef SOFTFLOAT_ROUND_ODD
                      || (roundingMode == softfloat_round_odd)
#endif
                : (roundingMode == softfloat_round_max)
        ) {
            roundIncrement = 0xFFF;
        }
    }
    roundBits = sig & 0xFFF;
    sig += roundIncrement;
    if ( sig & UINT64_C( 0xFFFFF00000000000 ) ) goto invalid;
    sig32 = sig>>12;
    if (
        (roundBits == 0x800) && (roundingMode == softfloat_round_near_even)
    ) {
        sig32 &= ~(uint_fast32_t) 1;
    }
    uZ.ui = sign ? -sig32 : sig32;
    z = uZ.i;
    if ( z && ((z < 0) ^ sign) ) goto invalid;
    if ( roundBits ) {
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) z |= 1;
#endif
        if ( exact ) state->exceptionFlags |= softfloat_flag_inexact;
    }
    return z;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    return sign ? i32_fromNegOverflow : i32_fromPosOverflow;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundToI64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
int_fast64_t
 softfloat_roundToI64(
     struct softfloat_state *state,
     bool sign,
     uint_fast64_t sig,
     uint_fast64_t sigExtra,
     uint_fast8_t roundingMode,
     bool exact
 )
{
    union { uint64_t ui; int64_t i; } uZ;
    int_fast64_t z;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if (
        (roundingMode == softfloat_round_near_maxMag)
            || (roundingMode == softfloat_round_near_even)
    ) {
        if ( UINT64_C( 0x8000000000000000 ) <= sigExtra ) goto increment;
    } else {
        if (
            sigExtra
                && (sign
                        ? (roundingMode == softfloat_round_min)
#ifdef SOFTFLOAT_ROUND_ODD
                              || (roundingMode == softfloat_round_odd)
#endif
                        : (roundingMode == softfloat_round_max))
        ) {
 increment:
            ++sig;
            if ( !sig ) goto invalid;
            if (
                (sigExtra == UINT64_C( 0x8000000000000000 ))
                    && (roundingMode == softfloat_round_near_even)
            ) {
                sig &= ~(uint_fast64_t) 1;
            }
        }
    }
    uZ.ui = sign ? -sig : sig;
    z = uZ.i;
    if ( z && ((z < 0) ^ sign) ) goto invalid;
    if ( sigExtra ) {
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) z |= 1;
#endif
        if ( exact ) state->exceptionFlags |= softfloat_flag_inexact;
    }
    return z;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    return sign ? i64_fromNegOverflow : i64_fromPosOverflow;

}


================================================
FILE: External/SoftFloat-3e/src/s_roundToUI64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

uint_fast64_t
 softfloat_roundToUI64(
     struct softfloat_state *state,
     bool sign,
     uint_fast64_t sig,
     uint_fast64_t sigExtra,
     uint_fast8_t roundingMode,
     bool exact
 )
{

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    if (
        (roundingMode == softfloat_round_near_maxMag)
            || (roundingMode == softfloat_round_near_even)
    ) {
        if ( UINT64_C( 0x8000000000000000 ) <= sigExtra ) goto increment;
    } else {
        if ( sign ) {
            if ( !(sig | sigExtra) ) return 0;
            if ( roundingMode == softfloat_round_min ) goto invalid;
#ifdef SOFTFLOAT_ROUND_ODD
            if ( roundingMode == softfloat_round_odd ) goto invalid;
#endif
        } else {
            if ( (roundingMode == softfloat_round_max) && sigExtra ) {
 increment:
                ++sig;
                if ( !sig ) goto invalid;
                if ( 
                    (sigExtra == UINT64_C( 0x8000000000000000 ))
                        && (roundingMode == softfloat_round_near_even)
                ) {
                    sig &= ~(uint_fast64_t) 1;
                }
            }
        }
    }
    if ( sign && sig ) goto invalid;
    if ( sigExtra ) {
#ifdef SOFTFLOAT_ROUND_ODD
        if ( roundingMode == softfloat_round_odd ) sig |= 1;
#endif
        if ( exact ) state->exceptionFlags |= softfloat_flag_inexact;
    }
    return sig;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 invalid:
    softfloat_raiseFlags( state, softfloat_flag_invalid );
    return sign ? ui64_fromNegOverflow : ui64_fromPosOverflow;

}


================================================
FILE: External/SoftFloat-3e/src/s_shiftRightJam128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shiftRightJam128

struct uint128
 softfloat_shiftRightJam128( uint64_t a64, uint64_t a0, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128 z;

    if ( dist < 64 ) {
        u8NegDist = -dist;
        z.v64 = a64>>dist;
        z.v0 =
            a64<<(u8NegDist & 63) | a0>>dist
                | ((uint64_t) (a0<<(u8NegDist & 63)) != 0);
    } else {
        z.v64 = 0;
        z.v0 =
            (dist < 127)
                ? a64>>(dist & 63)
                      | (((a64 & (((uint_fast64_t) 1<<(dist & 63)) - 1)) | a0)
                             != 0)
                : ((a64 | a0) != 0);
    }
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shiftRightJam128Extra.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shiftRightJam128Extra

struct uint128_extra
 softfloat_shiftRightJam128Extra(
     uint64_t a64, uint64_t a0, uint64_t extra, uint_fast32_t dist )
{
    uint_fast8_t u8NegDist;
    struct uint128_extra z;

    u8NegDist = -dist;
    if ( dist < 64 ) {
        z.v.v64 = a64>>dist;
        z.v.v0 = a64<<(u8NegDist & 63) | a0>>dist;
        z.extra = a0<<(u8NegDist & 63);
    } else {
        z.v.v64 = 0;
        if ( dist == 64 ) {
            z.v.v0 = a64;
            z.extra = a0;
        } else {
            extra |= a0;
            if ( dist < 128 ) {
                z.v.v0 = a64>>(dist & 63);
                z.extra = a64<<(u8NegDist & 63);
            } else {
                z.v.v0 = 0;
                z.extra = (dist == 128) ? a64 : (a64 != 0);
            }
        }
    }
    z.extra |= (extra != 0);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shiftRightJam32.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_shiftRightJam32

FEXCORE_PRESERVE_ALL_ATTR
uint32_t softfloat_shiftRightJam32( uint32_t a, uint_fast16_t dist )
{

    return
        (dist < 31) ? a>>dist | ((uint32_t) (a<<(-dist & 31)) != 0) : (a != 0);

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shiftRightJam64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_shiftRightJam64

FEXCORE_PRESERVE_ALL_ATTR
uint64_t softfloat_shiftRightJam64( uint64_t a, uint_fast32_t dist )
{

    return
        (dist < 63) ? a>>dist | ((uint64_t) (a<<(-dist & 63)) != 0) : (a != 0);

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shiftRightJam64Extra.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shiftRightJam64Extra

FEXCORE_PRESERVE_ALL_ATTR
struct uint64_extra
 softfloat_shiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast32_t dist )
{
    struct uint64_extra z;

    if ( dist < 64 ) {
        z.v = a>>dist;
        z.extra = a<<(-dist & 63);
    } else {
        z.v = 0;
        z.extra = (dist == 64) ? a : (a != 0);
    }
    z.extra |= (extra != 0);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shortShiftLeft128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shortShiftLeft128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_shortShiftLeft128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;

    z.v64 = a64<<dist | a0>>(-dist & 63);
    z.v0 = a0<<dist;
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shortShiftRight128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shortShiftRight128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_shortShiftRight128( uint64_t a64, uint64_t a0, uint_fast8_t dist )
{
    struct uint128 z;

    z.v64 = a64>>dist;
    z.v0 = a64<<(-dist & 63) | a0>>dist;
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shortShiftRightJam64.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"

#ifndef softfloat_shortShiftRightJam64

FEXCORE_PRESERVE_ALL_ATTR
uint64_t softfloat_shortShiftRightJam64( uint64_t a, uint_fast8_t dist )
{

    return a>>dist | ((a & (((uint_fast64_t) 1<<dist) - 1)) != 0);

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_shortShiftRightJam64Extra.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_shortShiftRightJam64Extra

struct uint64_extra
 softfloat_shortShiftRightJam64Extra(
     uint64_t a, uint64_t extra, uint_fast8_t dist )
{
    struct uint64_extra z;

    z.v = a>>dist;
    z.extra = a<<(-dist & 63) | (extra != 0);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_sub128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "primitiveTypes.h"

#ifndef softfloat_sub128

FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_sub128( uint64_t a64, uint64_t a0, uint64_t b64, uint64_t b0 )
{
    struct uint128 z;

    z.v0 = a0 - b0;
    z.v64 = a64 - b64 - (a0 < b0);
    return z;

}

#endif


================================================
FILE: External/SoftFloat-3e/src/s_subMagsExtF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

extFloat80_t
 softfloat_subMagsExtF80(
     struct softfloat_state *state,
     uint_fast16_t uiA64,
     uint_fast64_t uiA0,
     uint_fast16_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    uint_fast64_t sigA;
    int_fast32_t expB;
    uint_fast64_t sigB;
    int_fast32_t expDiff;
    uint_fast16_t uiZ64;
    uint_fast64_t uiZ0;
    int_fast32_t expZ;
    uint_fast64_t sigExtra;
    struct uint128 sig128, uiZ;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expA = expExtF80UI64( uiA64 );
    sigA = uiA0;
    expB = expExtF80UI64( uiB64 );
    sigB = uiB0;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expDiff = expA - expB;
    if ( 0 < expDiff ) goto expABigger;
    if ( expDiff < 0 ) goto expBBigger;
    if ( expA == 0x7FFF ) {
        if ( (sigA | sigB) & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) {
            goto propagateNaN;
        }
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        uiZ64 = defaultNaNExtF80UI64;
        uiZ0  = defaultNaNExtF80UI0;
        goto uiZ;
    }
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
    expZ = expA;
    if ( ! expZ ) expZ = 1;
    sigExtra = 0;
    if ( sigB < sigA ) goto aBigger;
    if ( sigA < sigB ) goto bBigger;
    uiZ64 =
        packToExtF80UI64( (state->roundingMode == softfloat_round_min), 0 );
    uiZ0 = 0;
    goto uiZ;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 expBBigger:
    if ( expB == 0x7FFF ) {
        if ( sigB & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        uiZ64 = packToExtF80UI64( signZ ^ 1, 0x7FFF );
        uiZ0  = UINT64_C( 0x8000000000000000 );
        goto uiZ;
    }
    if ( ! expA ) {
        ++expDiff;
        sigExtra = 0;
        if ( ! expDiff ) goto newlyAlignedBBigger;
    }
    sig128 = softfloat_shiftRightJam128( sigA, 0, -expDiff );
    sigA = sig128.v64;
    sigExtra = sig128.v0;
 newlyAlignedBBigger:
    expZ = expB;
 bBigger:
    signZ = ! signZ;
    sig128 = softfloat_sub128( sigB, 0, sigA, sigExtra );
    goto normRoundPack;
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 expABigger:
    if ( expA == 0x7FFF ) {
        if ( sigA & UINT64_C( 0x7FFFFFFFFFFFFFFF ) ) goto propagateNaN;
        uiZ64 = uiA64;
        uiZ0  = uiA0;
        goto uiZ;
    }
    if ( ! expB ) {
        --expDiff;
        sigExtra = 0;
        if ( ! expDiff ) goto newlyAlignedABigger;
    }
    sig128 = softfloat_shiftRightJam128( sigB, 0, expDiff );
    sigB = sig128.v64;
    sigExtra = sig128.v0;
 newlyAlignedABigger:
    expZ = expA;
 aBigger:
    sig128 = softfloat_sub128( sigA, 0, sigB, sigExtra );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 normRoundPack:
    return
        softfloat_normRoundPackToExtF80(
            state, signZ, expZ, sig128.v64, sig128.v0, state->roundingPrecision );
    /*------------------------------------------------------------------------
    *------------------------------------------------------------------------*/
 propagateNaN:
    uiZ = softfloat_propagateNaNExtF80UI( state, uiA64, uiA0, uiB64, uiB0 );
    uiZ64 = uiZ.v64;
    uiZ0  = uiZ.v0;
 uiZ:
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = uiZ0;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/s_subMagsF128.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of
California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdbool.h>
#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "specialize.h"
#include "softfloat.h"

float128_t
 softfloat_subMagsF128(
     struct softfloat_state *state,
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0,
     bool signZ
 )
{
    int_fast32_t expA;
    struct uint128 sigA;
    int_fast32_t expB;
    struct uint128 sigB, sigZ;
    int_fast32_t expDiff, expZ;
    struct uint128 uiZ;
    union ui128_f128 uZ;

    expA = expF128UI64( uiA64 );
    sigA.v64 = fracF128UI64( uiA64 );
    sigA.v0  = uiA0;
    expB = expF128UI64( uiB64 );
    sigB.v64 = fracF128UI64( uiB64 );
    sigB.v0  = uiB0;
    sigA = softfloat_shortShiftLeft128( sigA.v64, sigA.v0, 4 );
    sigB = softfloat_shortShiftLeft128( sigB.v64, sigB.v0, 4 );
    expDiff = expA - expB;
    if ( 0 < expDiff ) goto expABigger;
    if ( expDiff < 0 ) goto expBBigger;
    if ( expA == 0x7FFF ) {
        if ( sigA.v64 | sigA.v0 | sigB.v64 | sigB.v0 ) goto propagateNaN;
        softfloat_raiseFlags( state, softfloat_flag_invalid );
        uiZ.v64 = defaultNaNF128UI64;
        uiZ.v0  = defaultNaNF128UI0;
        goto uiZ;
    }
    expZ = expA;
    if ( ! expZ ) expZ = 1;
    if ( sigB.v64 < sigA.v64 ) goto aBigger;
    if ( sigA.v64 < sigB.v64 ) goto bBigger;
    if ( sigB.v0 < sigA.v0 ) goto aBigger;
    if ( sigA.v0 < sigB.v0 ) goto bBigger;
    uiZ.v64 =
        packToF128UI64(
            (state->roundingMode == softfloat_round_min), 0, 0 );
    uiZ.v0 = 0;
    goto uiZ;
 expBBigger:
    if ( expB == 0x7FFF ) {
        if ( sigB.v64 | sigB.v0 ) goto propagateNaN;
        uiZ.v64 = packToF128UI64( signZ ^ 1, 0x7FFF, 0 );
        uiZ.v0  = 0;
        goto uiZ;
    }
    if ( expA ) {
        sigA.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
        ++expDiff;
        if ( ! expDiff ) goto newlyAlignedBBigger;
    }
    sigA = softfloat_shiftRightJam128( sigA.v64, sigA.v0, -expDiff );
 newlyAlignedBBigger:
    expZ = expB;
    sigB.v64 |= UINT64_C( 0x0010000000000000 );
 bBigger:
    signZ = ! signZ;
    sigZ = softfloat_sub128( sigB.v64, sigB.v0, sigA.v64, sigA.v0 );
    goto normRoundPack;
 expABigger:
    if ( expA == 0x7FFF ) {
        if ( sigA.v64 | sigA.v0 ) goto propagateNaN;
        uiZ.v64 = uiA64;
        uiZ.v0  = uiA0;
        goto uiZ;
    }
    if ( expB ) {
        sigB.v64 |= UINT64_C( 0x0010000000000000 );
    } else {
        --expDiff;
        if ( ! expDiff ) goto newlyAlignedABigger;
    }
    sigB = softfloat_shiftRightJam128( sigB.v64, sigB.v0, expDiff );
 newlyAlignedABigger:
    expZ = expA;
    sigA.v64 |= UINT64_C( 0x0010000000000000 );
 aBigger:
    sigZ = softfloat_sub128( sigA.v64, sigA.v0, sigB.v64, sigB.v0 );
 normRoundPack:
    return softfloat_normRoundPackToF128( state, signZ, expZ - 5, sigZ.v64, sigZ.v0 );
 propagateNaN:
    uiZ = softfloat_propagateNaNF128UI( state, uiA64, uiA0, uiB64, uiB0 );
 uiZ:
    uZ.ui = uiZ;
    return uZ.f;

}


================================================
FILE: External/SoftFloat-3e/src/softfloat_raiseFlags.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014 The Regents of the University of California.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include "platform.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Raises the exceptions specified by `flags'.  Floating-point traps can be
| defined here if desired.  It is currently not possible for such a trap
| to substitute a result value.  If traps are not implemented, this routine
| should be simply `softfloat_exceptionFlags |= flags;'.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_raiseFlags( struct softfloat_state *state, uint_fast8_t flags )
{

    state->exceptionFlags |= flags;

}


================================================
FILE: External/SoftFloat-3e/src/specialize.h
================================================

/*============================================================================

This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2018 The Regents of the
University of California.  All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#ifndef specialize_h
#define specialize_h 1

#include <stdbool.h>
#include <stdint.h>
#include "primitiveTypes.h"
#include "softfloat.h"

/*----------------------------------------------------------------------------
| Default value for 'softfloat_detectTininess'.
*----------------------------------------------------------------------------*/
#define init_detectTininess softfloat_tininess_afterRounding

/*----------------------------------------------------------------------------
| The values to return on conversions to 32-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui32_fromPosOverflow 0xFFFFFFFF
#define ui32_fromNegOverflow 0xFFFFFFFF
#define ui32_fromNaN         0xFFFFFFFF
#define i32_fromPosOverflow  (-0x7FFFFFFF - 1)
#define i32_fromNegOverflow  (-0x7FFFFFFF - 1)
#define i32_fromNaN          (-0x7FFFFFFF - 1)

/*----------------------------------------------------------------------------
| The values to return on conversions to 64-bit integer formats that raise an
| invalid exception.
*----------------------------------------------------------------------------*/
#define ui64_fromPosOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define ui64_fromNegOverflow UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define ui64_fromNaN         UINT64_C( 0xFFFFFFFFFFFFFFFF )
#define i64_fromPosOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
#define i64_fromNegOverflow  (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)
#define i64_fromNaN          (-INT64_C( 0x7FFFFFFFFFFFFFFF ) - 1)

/*----------------------------------------------------------------------------
| "Common NaN" structure, used to transfer NaN representations from one format
| to another.
*----------------------------------------------------------------------------*/
struct commonNaN {
    bool sign;
#ifdef LITTLEENDIAN
    uint64_t v0, v64;
#else
    uint64_t v64, v0;
#endif
};

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 16-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF16UI 0xFE00

/*----------------------------------------------------------------------------
| Returns true when 16-bit unsigned integer 'uiA' has the bit pattern of a
| 16-bit floating-point signaling NaN.
| Note:  This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF16UI( uiA ) ((((uiA) & 0x7E00) == 0x7C00) && ((uiA) & 0x01FF))

/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 16-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
void softfloat_f16UIToCommonNaN( uint_fast16_t uiA, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 16-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
uint_fast16_t softfloat_commonNaNToF16UI( const struct commonNaN *aPtr );

/*----------------------------------------------------------------------------
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 16-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
uint_fast16_t
 softfloat_propagateNaNF16UI( uint_fast16_t uiA, uint_fast16_t uiB );

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 32-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF32UI 0xFFC00000

/*----------------------------------------------------------------------------
| Returns true when 32-bit unsigned integer 'uiA' has the bit pattern of a
| 32-bit floating-point signaling NaN.
| Note:  This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF32UI( uiA ) ((((uiA) & 0x7FC00000) == 0x7F800000) && ((uiA) & 0x003FFFFF))

/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 32-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_f32UIToCommonNaN( struct softfloat_state *, uint_fast32_t uiA, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 32-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint_fast32_t softfloat_commonNaNToF32UI( const struct commonNaN *aPtr );

/*----------------------------------------------------------------------------
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 32-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
uint_fast32_t
 softfloat_propagateNaNF32UI( uint_fast32_t uiA, uint_fast32_t uiB );

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 64-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF64UI UINT64_C( 0xFFF8000000000000 )

/*----------------------------------------------------------------------------
| Returns true when 64-bit unsigned integer 'uiA' has the bit pattern of a
| 64-bit floating-point signaling NaN.
| Note:  This macro evaluates its argument more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF64UI( uiA ) ((((uiA) & UINT64_C( 0x7FF8000000000000 )) == UINT64_C( 0x7FF0000000000000 )) && ((uiA) & UINT64_C( 0x0007FFFFFFFFFFFF )))

/*----------------------------------------------------------------------------
| Assuming 'uiA' has the bit pattern of a 64-bit floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void softfloat_f64UIToCommonNaN( struct softfloat_state *, uint_fast64_t uiA, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 64-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
uint_fast64_t softfloat_commonNaNToF64UI( const struct commonNaN *aPtr );

/*----------------------------------------------------------------------------
| Interpreting 'uiA' and 'uiB' as the bit patterns of two 64-bit floating-
| point values, at least one of which is a NaN, returns the bit pattern of
| the combined NaN result.  If either 'uiA' or 'uiB' has the pattern of a
| signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
uint_fast64_t
 softfloat_propagateNaNF64UI( uint_fast64_t uiA, uint_fast64_t uiB );

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 80-bit extended floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNExtF80UI64 0xFFFF
#define defaultNaNExtF80UI0  UINT64_C( 0xC000000000000000 )

/*----------------------------------------------------------------------------
| Returns true when the 80-bit unsigned integer formed from concatenating
| 16-bit 'uiA64' and 64-bit 'uiA0' has the bit pattern of an 80-bit extended
| floating-point signaling NaN.
| Note:  This macro evaluates its arguments more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNExtF80UI( uiA64, uiA0 ) ((((uiA64) & 0x7FFF) == 0x7FFF) && ! ((uiA0) & UINT64_C( 0x4000000000000000 )) && ((uiA0) & UINT64_C( 0x3FFFFFFFFFFFFFFF )))

#ifdef SOFTFLOAT_FAST_INT64

/*----------------------------------------------------------------------------
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is
| defined.
*----------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------
| Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
| has the bit pattern of an 80-bit extended floating-point NaN, converts
| this NaN to the common NaN form, and stores the resulting common NaN at the
| location pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void
 softfloat_extF80UIToCommonNaN(
     struct softfloat_state *, uint_fast16_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
| floating-point NaN, and returns the bit pattern of this value as an unsigned
| integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_commonNaNToExtF80UI( const struct commonNaN *aPtr );

/*----------------------------------------------------------------------------
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
| 'uiA0' as an 80-bit extended floating-point value, and likewise interpreting
| the unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
| 80-bit extended floating-point value, and assuming at least on of these
| floating-point values is a NaN, returns the bit pattern of the combined NaN
| result.  If either original floating-point value is a signaling NaN, the
| invalid exception is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128
 softfloat_propagateNaNExtF80UI(
     struct softfloat_state *,
     uint_fast16_t uiA64,
     uint_fast64_t uiA0,
     uint_fast16_t uiB64,
     uint_fast64_t uiB0
 );

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 128-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF128UI64 UINT64_C( 0xFFFF800000000000 )
#define defaultNaNF128UI0  UINT64_C( 0 )

/*----------------------------------------------------------------------------
| Returns true when the 128-bit unsigned integer formed from concatenating
| 64-bit 'uiA64' and 64-bit 'uiA0' has the bit pattern of a 128-bit floating-
| point signaling NaN.
| Note:  This macro evaluates its arguments more than once.
*----------------------------------------------------------------------------*/
#define softfloat_isSigNaNF128UI( uiA64, uiA0 ) ((((uiA64) & UINT64_C( 0x7FFF800000000000 )) == UINT64_C( 0x7FFF000000000000 )) && ((uiA0) || ((uiA64) & UINT64_C( 0x00007FFFFFFFFFFF ))))

/*----------------------------------------------------------------------------
| Assuming the unsigned integer formed from concatenating 'uiA64' and 'uiA0'
| has the bit pattern of a 128-bit floating-point NaN, converts this NaN to
| the common NaN form, and stores the resulting common NaN at the location
| pointed to by 'zPtr'.  If the NaN is a signaling NaN, the invalid exception
| is raised.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
void
 softfloat_f128UIToCommonNaN(
     struct softfloat_state *, uint_fast64_t uiA64, uint_fast64_t uiA0, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
| NaN, and returns the bit pattern of this value as an unsigned integer.
*----------------------------------------------------------------------------*/
FEXCORE_PRESERVE_ALL_ATTR
struct uint128 softfloat_commonNaNToF128UI( const struct commonNaN * );

/*----------------------------------------------------------------------------
| Interpreting the unsigned integer formed from concatenating 'uiA64' and
| 'uiA0' as a 128-bit floating-point value, and likewise interpreting the
| unsigned integer formed from concatenating 'uiB64' and 'uiB0' as another
| 128-bit floating-point value, and assuming at least on of these floating-
| point values is a NaN, returns the bit pattern of the combined NaN result.
| If either original floating-point value is a signaling NaN, the invalid
| exception is raised.
*----------------------------------------------------------------------------*/
struct uint128
 softfloat_propagateNaNF128UI(
     struct softfloat_state *,
     uint_fast64_t uiA64,
     uint_fast64_t uiA0,
     uint_fast64_t uiB64,
     uint_fast64_t uiB0
 );

#else

/*----------------------------------------------------------------------------
| The following functions are needed only when 'SOFTFLOAT_FAST_INT64' is not
| defined.
*----------------------------------------------------------------------------*/

/*----------------------------------------------------------------------------
| Assuming the 80-bit extended floating-point value pointed to by 'aSPtr' is
| a NaN, converts this NaN to the common NaN form, and stores the resulting
| common NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling
| NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
void
 softfloat_extF80MToCommonNaN(
     const struct extFloat80M *aSPtr, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into an 80-bit extended
| floating-point NaN, and stores this NaN at the location pointed to by
| 'zSPtr'.
*----------------------------------------------------------------------------*/
void
 softfloat_commonNaNToExtF80M(
     const struct commonNaN *aPtr, struct extFloat80M *zSPtr );

/*----------------------------------------------------------------------------
| Assuming at least one of the two 80-bit extended floating-point values
| pointed to by 'aSPtr' and 'bSPtr' is a NaN, stores the combined NaN result
| at the location pointed to by 'zSPtr'.  If either original floating-point
| value is a signaling NaN, the invalid exception is raised.
*----------------------------------------------------------------------------*/
void
 softfloat_propagateNaNExtF80M(
     const struct extFloat80M *aSPtr,
     const struct extFloat80M *bSPtr,
     struct extFloat80M *zSPtr
 );

/*----------------------------------------------------------------------------
| The bit pattern for a default generated 128-bit floating-point NaN.
*----------------------------------------------------------------------------*/
#define defaultNaNF128UI96 0xFFFF8000
#define defaultNaNF128UI64 0
#define defaultNaNF128UI32 0
#define defaultNaNF128UI0  0

/*----------------------------------------------------------------------------
| Assuming the 128-bit floating-point value pointed to by 'aWPtr' is a NaN,
| converts this NaN to the common NaN form, and stores the resulting common
| NaN at the location pointed to by 'zPtr'.  If the NaN is a signaling NaN,
| the invalid exception is raised.  Argument 'aWPtr' points to an array of
| four 32-bit elements that concatenate in the platform's normal endian order
| to form a 128-bit floating-point value.
*----------------------------------------------------------------------------*/
void
 softfloat_f128MToCommonNaN( const uint32_t *aWPtr, struct commonNaN *zPtr );

/*----------------------------------------------------------------------------
| Converts the common NaN pointed to by 'aPtr' into a 128-bit floating-point
| NaN, and stores this NaN at the location pointed to by 'zWPtr'.  Argument
| 'zWPtr' points to an array of four 32-bit elements that concatenate in the
| platform's normal endian order to form a 128-bit floating-point value.
*----------------------------------------------------------------------------*/
void
 softfloat_commonNaNToF128M( const struct commonNaN *aPtr, uint32_t *zWPtr );

/*----------------------------------------------------------------------------
| Assuming at least one of the two 128-bit floating-point values pointed to by
| 'aWPtr' and 'bWPtr' is a NaN, stores the combined NaN result at the location
| pointed to by 'zWPtr'.  If either original floating-point value is a
| signaling NaN, the invalid exception is raised.  Each of 'aWPtr', 'bWPtr',
| and 'zWPtr' points to an array of four 32-bit elements that concatenate in
| the platform's normal endian order to form a 128-bit floating-point value.
*----------------------------------------------------------------------------*/
void
 softfloat_propagateNaNF128M(
     const uint32_t *aWPtr, const uint32_t *bWPtr, uint32_t *zWPtr );

#endif

#endif


================================================
FILE: External/SoftFloat-3e/src/ui64_to_extF80.c
================================================

/*============================================================================

This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic
Package, Release 3e, by John R. Hauser.

Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
California.  All Rights Reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

 1. Redistributions of source code must retain the above copyright notice,
    this list of conditions, and the following disclaimer.

 2. Redistributions in binary form must reproduce the above copyright notice,
    this list of conditions, and the following disclaimer in the documentation
    and/or other materials provided with the distribution.

 3. Neither the name of the University nor the names of its contributors may
    be used to endorse or promote products derived from this software without
    specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

=============================================================================*/

#include <stdint.h>
#include "platform.h"
#include "internals.h"
#include "softfloat.h"

FEXCORE_PRESERVE_ALL_ATTR
extFloat80_t ui64_to_extF80( uint64_t a )
{
    uint_fast16_t uiZ64;
    int_fast8_t shiftDist;
    union { struct extFloat80M s; extFloat80_t f; } uZ;

    uiZ64 = 0;
    if ( a ) {
        shiftDist = softfloat_countLeadingZeros64( a );
        uiZ64 = 0x403E - shiftDist;
        a <<= shiftDist;
    }
    uZ.s.signExp = uiZ64;
    uZ.s.signif  = a;
    return uZ.f;

}


================================================
FILE: External/cephes/CMakeLists.txt
================================================
add_library(cephes_128bit STATIC
  src/128bit/Impl.cpp
  src/128bit/atanll.c
  src/128bit/constll.c
  src/128bit/exp2ll.c
  src/128bit/floorll.c
  src/128bit/log2ll.c
  src/128bit/mtherr.c
  src/128bit/polevll.c
  src/128bit/sinll.c
  src/128bit/tanll.c)

# 128-bit library
target_link_libraries(cephes_128bit softfloat_3e)
target_include_directories(cephes_128bit PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include/)
target_compile_options(cephes_128bit PRIVATE -fno-builtin)


================================================
FILE: External/cephes/LICENSE
================================================
The cephes math library is BSD licensed.
The source can be accessed from https://www.netlib.org/cephes/

Original license from https://www.netlib.org/cephes/readme :
>    Some software in this archive may be from the book _Methods and
> Programs for Mathematical Functions_ (Prentice-Hall or Simon & Schuster
> International, 1989) or from the Cephes Mathematical Library, a
> commercial product. In either event, it is copyrighted by the author.
> What you see here may be used freely but it comes with no support or
> guarantee.
>
>    The two known misprints in the book are repaired here in the
> source listings for the gamma function and the incomplete beta
> integral.
>
>
>    Stephen L. Moshier
>    moshier@na-net.ornl.gov

The author was e-mailed and they allowed it to be relicensed under BSD.
Resources:
https://bugs.gentoo.org/687276
https://lists.debian.org/debian-legal/2004/12/msg00295.html
https://github.com/deepmind/torch-cephes/blob/master/LICENSE.txt
https://github.com/nearform/node-cephes/blob/master/LICENSE

E-mail snippit from torch-cephes source:

Return-Path: <steve@moshier.net>
X-Original-To: julien@cornebise.com
Delivered-To: julien@cornebise.com
Received: from atl4mhob11.myregisteredsite.com (atl4mhob11.myregisteredsite.com [209.17.115.49])
    by cornebise.com (Postfix) with ESMTP id D47B139FC0
    for <julien@cornebise.com>; Fri, 25 Oct 2013 16:32:40 +0200 (CEST)
Received: from mailpod1.hostingplatform.com ([10.30.71.116])
    by atl4mhob11.myregisteredsite.com (8.14.4/8.14.4) with ESMTP id r9PEWcwQ003543
    for <julien@cornebise.com>; Fri, 25 Oct 2013 10:32:38 -0400
Received: (qmail 11948 invoked by uid 0); 25 Oct 2013 12:36:20 -0000
X-TCPREMOTEIP: 76.24.25.74
X-Authenticated-UID: steve@moshier.net
Received: from unknown (HELO d510.local) (steve@moshier.net@76.24.25.74)
  by 0 with ESMTPA; 25 Oct 2013 12:36:20 -0000
Date: Fri, 25 Oct 2013 08:36:19 -0400 (EDT)
From: Stephen Moshier <steve@moshier.net>
X-X-Sender: steve@d510
To: Julien Cornebise <julien@cornebise.com>
Subject: Re: Cephes: permission to wrap+distribute for Lua
In-Reply-To: <52653AD3.1010004@cornebise.com>
Message-ID: <alpine.DEB.2.02.1310250827040.17646@d510>
References: <52653AD3.1010004@cornebise.com>
User-Agent: Alpine 2.02 (DEB 1266 2009-07-14)
MIME-Version: 1.0
Content-Type: TEXT/PLAIN; charset=US-ASCII; format=flowed


Julien, thank you for writing.
BSD license is fine, modification is OK.
There are more build scripts available in the web site distributions than
there are on the Netlib.  I think there is an update to Planck's radiation
function that I haven't sent to Netlib yet.  But Netlib is a more stable
site, so it is better to cite that as a reference.


On Mon, 21 Oct 2013, Julien Cornebise wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Dear Mr Moshier
>
> I am a researcher in mathematics and machine learning in London, and
> am writing about your awesome Cephes library, whom I found at the
> heart of Scipy.
>
> It is so useful that, with your permission, I would like to wrap it
> for Lua and Torch (a machine learning overlay to Lua, specialized in
> neural nets, see http://www.torch.ch). I would like to distribute it
> as a package for Torch, including your source code along the wrapping
> code.
> This wouldbe a public package, distributed under BSD License. I have
> put a first draft on github:
> https://github.com/jucor/torch-cephes
>
> Hence my three questions, please:
>
> 1/ How would you like to be acknowledged, beyond the comments that are
> already in your code? Do you have any standard header/disclaimer that
> I could add to the documentation?
>
> 2/ At the moment, your code is left untouched. However, if I ever need
> to modify bits of the code, what are the conditions/restrictions?
> Nothing huge -- I definitely do not want to mess with it: I was
> planning to use the natural completion of some functions on the
> completed real line (e.g. CDF returing 1 when called with "infinity",
> or quantiles returning -Infinity when called with 0), either natively
> if supported, or by setting a specific flag  via mtherr().
>
> 3/ I am currently using the source from Netlib. Do you recommend using
> the source from your website instead ?
>
> Thank you very much for your attention,
> and, more importantly, for the time and effort your poured into Cephes.
>
> Best regards,
>
> Julien Cornebise, Ph.D.
> London, UK
> http://www.cornebise.com/julien
> -----BEGIN PGP SIGNATURE-----
> Version: GnuPG v1.4.14 (Darwin)
> Comment: GPGTools - http://gpgtools.org
> Comment: Using GnuPG with Thunderbird - http://www.enigmail.net/
>
> iEYEARECAAYFAlJlOtEACgkQKYR3gC0rw/gIpQCfZKu6+iDh9ghhm6QfsLXnldKN
> BuIAn2zZHu1c/IrRAevhjM7N7xGg0LHO
> =WeP5
> -----END PGP SIGNATURE-----


================================================
FILE: External/cephes/include/cephes_128bit.h
================================================
#pragma once

extern "C" {
#include "SoftFloat-3e/platform.h"
#include "SoftFloat-3e/softfloat.h"
}

namespace FEXCore::cephes_128bit {
  float128_t atan2l(float128_t y, float128_t x);
  float128_t cosl(float128_t x);
  float128_t exp2l(float128_t x);
  float128_t log2l(float128_t x);
  float128_t sinl(float128_t x);
  float128_t tanl(float128_t x);
}


================================================
FILE: External/cephes/src/128bit/Impl.cpp
================================================
#include "cephes_128bit.h"

extern "C" {
// cephes_128bit functions
float128_t cephes_f128_atan2l(float128_t y, float128_t x);
float128_t cephes_f128_cosl(float128_t x);
float128_t cephes_f128_exp2l(float128_t x);
float128_t cephes_f128_log2l(float128_t x);
float128_t cephes_f128_sinl(float128_t x);
float128_t cephes_f128_tanl(float128_t x);
}

namespace FEXCore::cephes_128bit {
  float128_t atan2l(float128_t y, float128_t x) {
    return cephes_f128_atan2l(y, x);
  }
  float128_t cosl(float128_t x) {
    return cephes_f128_cosl(x);
  }
  float128_t exp2l(float128_t x) {
    return cephes_f128_exp2l(x);
  }
  float128_t log2l(float128_t x) {
    return cephes_f128_log2l(x);
  }
  float128_t sinl(float128_t x) {
    return cephes_f128_sinl(x);
  }
  float128_t tanl(float128_t x) {
    return cephes_f128_tanl(x);
  }
}


================================================
FILE: External/cephes/src/128bit/atanll.c
================================================
/*							atanl.c
 *
 *	Inverse circular tangent, 128-bit float128_t precision
 *      (arctangent)
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, atanl();
 *
 * y = atanl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns radian angle between -pi/2 and +pi/2 whose tangent
 * is x.
 *
 * Range reduction is from four intervals into the interval
 * from zero to  tan( pi/8 ).  The approximant uses a rational
 * function of degree 3/4 of the form x + x**3 P(x)/Q(x).
 *
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      -10, 10    100,000      2.6e-34     6.5e-35
 *
 */
/*							atan2l()
 *
 *	Quadrant correct inverse circular tangent,
 *	float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, z, atan2l();
 *
 * z = atan2l( y, x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns radian angle whose tangent is y/x.
 * Define compile time symbol ANSIC = 1 for ANSI standard,
 * range -PI < z <= +PI, args (y,x); else ANSIC = 0 for range
 * 0 to 2PI, args (x,y).
 *
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      -10, 10    100,000      3.2e-34      5.9e-35
 * See atan.c.
 *
 */

/*							atan.c */


/*
Cephes Math Library Release 2.2:  December, 1990
Copyright 1984, 1990 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"

/* arctan(x) = x + x^3 P(x^2)
 * Theoretical peak relative error = 3.0e-36
 * relative peak error spread = 6.6e-8
 */
static float128_t P[9] = {
{0xf3f0105b1dae46bfULL, 0xbff45be85838aa26ULL}, // -6.635810778635296712545011270011752799963E-4L,
{0x529a2bf25f15874bULL, 0xbffec0f17ae68a18ULL}, // -8.768423468036849091777415076702113400070E-1L,
{0x3054a2e7144e265cULL, 0xc00397b0dc1f4d10ULL}, // -2.548067867495502632615671450650071218995E1L,
{0x1e19d6b8c5cd9e65ULL, 0xc006f38d4e47779aULL}, // -2.497759878476618348858065206895055957104E2L,
{0x69dcb1e41a413bddULL, 0xc0091f0a8586c642ULL}, // -1.148164399808514330375280133523543970854E3L,
{0x501d0f5157516744ULL, 0xc00a5d08ba650145ULL}, // -2.792272753241044941703278827346430350236E3L,
{0x16f18bf3f5b4b987ULL, 0xc00ace087656cfbeULL}, // -3.696264445691821235400930243493001671932E3L,
{0x2966de608cbf9696ULL, 0xc00a3a5a8d629fc7ULL}, // -2.514829758941713674909996882101723647996E3L,
{0xeb77db69572ecd22ULL, 0xc0085807a6c98431ULL}, // -6.880597774405940432145577545328795037141E2L
};
static float128_t Q[8] = {
/* 1.000000000000000000000000000000000000000E0L, */
{0x0cc994a760137543ULL, 0x40041d4c974b22bcULL}, // 3.566239794444800849656497338030115886153E1L,
{0xa5b186c10b6a065eULL, 0x4007aed5b7e20c37ULL}, // 4.308348370818927353321556740027020068897E2L,
{0x8711ebf202296129ULL, 0x400a37d5c6fdd0cdULL}, // 2.494680540950601626662048893678584497900E3L,
{0x02d59339ee4eee21ULL, 0x400bef892855649eULL}, // 7.928572347062145288093560392463784743935E3L,
{0xd9b903b0950fefb3ULL, 0x400cc7c8d1c45b09ULL}, // 1.458510242529987155225086911411015961174E4L,
{0x174d6e0dae833752ULL, 0x400ce38f8ba0a897ULL}, // 1.547394317752562611786521896296215170819E4L,
{0xfcbdd5dddcf7c68cULL, 0x400c1277f99a3d1aULL}, // 8.782996876218210302516194604424986107121E3L,
{0x7099e48f01631a53ULL, 0x400a0205bd172325ULL}, // 2.064179332321782129643673263598686441900E3L
};

/* tan( 3*pi/8 ) */
static float128_t T3P8 = {0x6484597d89b3754bULL, 0x40003504f333f9deULL};

/* tan( pi/8 ) */
static float128_t TP8 = {0x2422cbec4d9baa56ULL, 0x3ffda827999fcef3ULL};

static const float128_t zero = {0, 0};
static const float128_t one = {0, 0x3fff000000000000ULL};
__attribute__((unused)) static const float128_t f_2_p0 = {0x0000000000000000ULL, 0x4000000000000000ULL};
__attribute__((unused)) static const float128_t f_3_p0 = {0x0000000000000000ULL, 0x4000800000000000ULL};

float128_t cephes_f128_atanl(float128_t x)
{
struct softfloat_state state = {};
float128_t y, z;
short sign;

/* make argument positive and save the sign */
sign = 1;
if( f128_lt(&state, x, zero) )
	{
	sign = -1;
	x = f128_complement_sign(x);
	}

/* range reduction */
// if( x > T3P8 )

if( f128_lt(&state, T3P8, x) )
	{
	y = F128_PIO2L;
	x = f128_complement_sign( f128_div(&state, one, x));
	}

else if( f128_lt(&state, TP8, x) )
	{
	y = F128_PIO4L;
	x = f128_div(&state, f128_sub(&state, x, one), f128_add(&state, x, one));
	}
else
	y = zero;

/* rational form in x**2 */
z = f128_mul(&state, x, x);
y = f128_add(&state, f128_add(&state, y, f128_mul(&state, f128_mul(&state, f128_div(&state, cephes_f128_polevll( z, P, 8 ), cephes_f128_p1evll( z, Q, 8 ) ), z), x)), x);

if( sign < 0 )
	y = f128_complement_sign(y);

return(y);
}

/*							atan2	*/


#if ANSIC
float128_t cephes_f128_atan2l( float128_t y, float128_t x )
#else
float128_t cephes_f128_atan2l( float128_t x, float128_t y )
#endif
{
struct softfloat_state state = {};
float128_t z, w;
short code;


code = 0;
w = zero;

if( f128_lt(&state, x, zero) )
	code = 2;
if( f128_lt(&state, y, zero) )
	code |= 1;

if( f128_eq(&state, x, zero) )
	{
	if( code & 1 )
		{
#if ANSIC
		return( f128_complement_sign(F128_PIO2L) );
#else
		return( f128_mul(&state, f_3_p0, F128_PIO2L) );
#endif
		}
	if( f128_eq(&state, y, zero) )
		return zero;
	return( F128_PIO2L );
	}

if( f128_eq(&state, y, zero) )
	{
	if( code & 2 )
		return( F128_PIL );
	return zero;
	}


switch( code )
	{
#if ANSIC
	case 0:
	case 1: w = zero; break;
	case 2: w = F128_PIL; break;
	case 3: w = f128_complement_sign(F128_PIL); break;
#else
	case 0: w = zero; break;
	case 1: w = f128_mul(&state, f_2_p0, F128_PIL); break;
	case 2:
	case 3: w = F128_PIL; break;
#endif
	}

z = cephes_f128_atanl( f128_div(&state, y, x) );

return f128_add(&state, w, z );
}


================================================
FILE: External/cephes/src/128bit/constll.c
================================================
#include "mconf.h"

/* (1 - 2^-113) 2^16384 */
float128_t F128_MAXNUML = {0xffffffffffffffffULL, 0x7ffeffffffffffffULL}; //1.189731495357231765085759326628007016196469e4932L;

/* 2^-113 */
float128_t F128_MACHEPL = {0x0000000000000000ULL, 0x3f8e000000000000ULL}; // 9.629649721936179265279889712924636592690508e-35L;

/* (1 + 2^-112) 2^-16382 */
float128_t F128_UFTHRESHL = {0x0000000000000001ULL, 0x0001000000000000ULL}; // 3.362103143112093506262677817321753250115591e-4932L;

/* 2^-16494 */
float128_t F128_MINNUML = {0x0000000000000001ULL, 0x0000000000000000ULL}; // 6.475175119438025110924438958227646552499569e-4966L;

/* ln(MAXNUM) */
float128_t F128_MAXLOGL = {0xf35793c7673007e6ULL, 0x400c62e42fefa39eULL}; // 1.1356523406294143949491931077970764891253E4L;

/* ln(MINNUM) */
float128_t F128_MINLOGL = {0x2c89d24d65e96274ULL, 0xc00c654628220780ULL}; // -1.143276959615573793352782661133116431383730e4L;

/* ln(UFTHRESH) */
/* float128_t F128_MINLOGL = -1.135513711193302405887309661372784853802025e4L; */

float128_t F128_PIL = {0x8469898cc51701b8ULL, 0x4000921fb54442d1ULL}; // 3.141592653589793238462643383279502884197169L;

float128_t F128_PIO2L = {0x8469898cc51701b8ULL, 0x3fff921fb54442d1ULL}; // 1.570796326794896619231321691639751442098585L;

float128_t F128_PIO4L =  {0x8469898cc51701b8ULL, 0x3ffe921fb54442d1ULL}; // 0.7853981633974483096156608458198757210492923L;

float128_t F128_LOGE2L =  {0xf35793c7673007e6ULL, 0x3ffe62e42fefa39eULL}; // 0.6931471805599453094172321214581765680755001L;

float128_t F128_LOG2EL =  {0xe1777d0ffda0d23aULL, 0x3fff71547652b82fULL}; // 1.442695040888963407359924681001892137426646L;

float128_t F128_INFINITYL = {0x0000000000000000ULL, 0x7fff000000000000ULL}; // 1.0L / 0.0L;


================================================
FILE: External/cephes/src/128bit/exp2ll.c
================================================
/*							exp2l.c
 *
 *	Base 2 exponential function, 128-bit float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, exp2l();
 *
 * y = exp2l( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns 2 raised to the x power.
 *
 * Range reduction is accomplished by separating the argument
 * into an integer k and fraction f such that
 *     x    k  f
 *    2  = 2  2.
 *
 * A Pade' form
 *
 *   1 + 2x P(x**2) / (Q(x**2) - x P(x**2) )
 *
 * approximates 2**x in the basic range [-0.5, 0.5].
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      +-16300    100,000      2.0e-34     4.8e-35
 *
 *
 * See exp.c for comments on error amplification.
 *
 *
 * ERROR MESSAGES:
 *
 *   message         condition      value returned
 * exp2l underflow   x < -16382        0.0
 * exp2l overflow    x >= 16384       MAXNUM
 *
 */


/*
Cephes Math Library Release 2.2:  January, 1991
Copyright 1984, 1991 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"
static char fname[] = {"exp2l"};

/* Pade' coefficients for 2^x - 1
   Theoretical peak relative error = 1.4e-40,
   relative peak error spread = 6.8e-14
 */
static float128_t P[5] = {
 {0x3008ca100ca13471ULL, 0x40063d6f2f556577ULL}, // 1.587171580015525194694938306936721666031E2L,
 {0x9fac10fe43d72769ULL, 0x40122e00e88b4606ULL}, // 6.185032670011643762127954396427045467506E5L,
 {0x4c22cf0c6c7a8fc7ULL, 0x401c0eb996d98ba4ULL}, // 5.677513871931844661829755443994214173883E8L,
 {0x4acd9b1339dda08aULL, 0x40241d19e728a6beULL}, // 1.530625323728429161131811299626419117557E11L,
 {0xae406b996488ba7aULL, 0x402a0840400c1c84ULL}, // 9.079594442980146270952372234833529694788E12L
};
static float128_t Q[5] = {
/* 1.000000000000000000000000000000000000000E0L, */
 {0xcf48c9db239c2189ULL, 0x400c827029417a6aULL}, // 1.236602014442099053716561665053645270207E4L,
 {0xb20f61f9a3c778b9ULL, 0x40174d9860120d5dULL}, // 2.186249607051644894762167991800811827835E7L,
 {0x9f361a3e85f209ceULL, 0x4020457bc8296e4eULL}, // 1.092141473886177435056423606755843616331E10L,
 {0x2dcf78c66f0a65ddULL, 0x40275b0c5bcbd7a7ULL}, // 1.490560994263653042761789432690793026977E12L,
 {0x4e4a9905cf9c9235ULL, 0x402b7d3bcb89794eULL}, // 2.619817175234089411411070339065679229869E13L
};

static const float128_t MAXL2 = {0x0000000000000000ULL, 0x400d000000000000ULL};
static const float128_t MINL2 = {0x0000000000000000ULL, 0xc00cfff000000000ULL};
static const float128_t zero = {0, 0};
static const float128_t f_0_p5 = {0, 0x3ffe000000000000ULL};
static const float128_t one = {0, 0x3fff000000000000ULL};

extern float128_t F128_MAXNUML;

float128_t cephes_f128_exp2l(float128_t x) {
struct softfloat_state state = {};
float128_t px, xx;
int n;

if( f128_le(&state, MAXL2, x))
	{
	mtherr( fname, OVERFLOW );
	return( F128_MAXNUML );
	}

if(f128_lt(&state, x, MINL2) )
	{
	mtherr( fname, UNDERFLOW );
	return zero;
	}

xx = x;	/* save x */
/* separate into integer and fractional parts */
px = cephes_f128_floorl(f128_add(&state, x, f_0_p5));
n = f128_to_i32(&state, px, softfloat_round_near_even, true);
x = f128_sub(&state, x, px);

/* rational approximation
 * exp2(x) = 1.0 +  2xP(xx)/(Q(xx) - P(xx))
 * where xx = x**2
 */
xx = f128_mul(&state, x, x);
px = f128_mul(&state, x, cephes_f128_polevll( xx, P, 4 ));
x = f128_div(&state, px, f128_sub(&state, cephes_f128_p1evll( xx, Q, 5 ), px));
x = f128_add(&state, one, cephes_f128_ldexpl( x, 1 ));

/* scale by power of 2 */
x = cephes_f128_ldexpl( x, n );
return(x);
}


================================================
FILE: External/cephes/src/128bit/floorll.c
================================================
/*                                                      ceill()
 *                                                      floorl()
 *                                                      frexpl()
 *                                                      ldexpl()
 *                                                      fabsl()
 *							signbitl()
 *							isnanl()
 *							isfinitel()
 *
 *      Floating point numeric utilities
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y;
 * float128_t ceill(), floorl(), frexpl(), ldexpl(), fabsl();
 * int signbitl(), isnanl(), isfinitel();
 * int expnt, n;
 *
 * y = floorl(x);
 * y = ceill(x);
 * y = frexpl( x, &expnt );
 * y = ldexpl( x, n );
 * y = fabsl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * All four routines return a float128_t precision floating point
 * result.
 *
 * floorl() returns the largest integer less than or equal to x.
 * It truncates toward minus infinity.
 *
 * ceill() returns the smallest integer greater than or equal
 * to x.  It truncates toward plus infinity.
 *
 * frexpl() extracts the exponent from x.  It returns an integer
 * power of two to expnt and the significand between 0.5 and 1
 * to y.  Thus  x = y * 2**expn.
 *
 * ldexpl() multiplies x by 2**n.
 *
 * fabsl() returns the absolute value of its argument.
 *
 * signbitl(x) returns 1 if the sign bit of x is 1, else 0.
 *
 * These functions are part of the standard C run time library
 * for some but not all C compilers.  The ones supplied are
 * written in C for IEEE arithmetic.  They should
 * be used only if your compiler library does not already have
 * them.
 *
 * The IEEE versions assume that denormal numbers are implemented
 * in the arithmetic.  Some modifications will be required if
 * the arithmetic has abrupt rather than gradual underflow.
 */


/*
Cephes Math Library Release 2.2:  July, 1992
Copyright 1984, 1987, 1988, 1992 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"
#define DENORMAL 1

#ifdef UNK
char *unkmsg = "ceill(), floorl(), frexpl(), ldexpl() must be rewritten!\n";
#undef UNK
#define MIEEE 1
#define EXPOFS 0
#endif

#ifdef IBMPC
#define NBITS 113
#define EXPOFS 7
#endif

#ifdef MIEEE
#define NBITS 113
#define EXPOFS 0
#endif

extern float128_t F128_MAXNUML;


static const float128_t zero = {0, 0};
static const float128_t f_0_p5 = {0, 0x3ffe000000000000ULL};
static const float128_t one = {0, 0x3fff000000000000ULL};
static const float128_t neg_one = {0, 0xbfff000000000000ULL};
static const float128_t f_2_p0 = {0, 0x4000000000000000ULL};

float128_t cephes_f128_fabsl(float128_t x)
{
struct softfloat_state state = {};

if( f128_lt(&state, x, zero) )
        return f128_sub(&state, zero, x );
else
        return( x );
}


float128_t cephes_f128_ceill(float128_t x)
{
float128_t y;

#ifdef UNK
mtherr( "ceill", DOMAIN );
return(0.0L);
#endif

struct softfloat_state state = {};
y = cephes_f128_floorl(x);
if( f128_lt(&state, y, x) )
        y = f128_add(&state, y, one);
return(y);
}


/* Bit clearing masks: */

static unsigned short bmask[] = {
0xffff,
0xfffe,
0xfffc,
0xfff8,
0xfff0,
0xffe0,
0xffc0,
0xff80,
0xff00,
0xfe00,
0xfc00,
0xf800,
0xf000,
0xe000,
0xc000,
0x8000,
0x0000,
};


float128_t cephes_f128_floorl(float128_t x)
{
union
  {
    float128_t y;
    unsigned short sh[8];
  } u;
int e, j;

#ifdef UNK
mtherr( "floor", DOMAIN );
return(0.0L);
#endif

struct softfloat_state state = {};
u.y = x;
/* find the exponent (power of 2) */
e = (u.sh[EXPOFS] & 0x7fff) - 0x3fff;

if( e < 0 )
        {
        if( f128_lt(&state, u.y, zero) )
                return neg_one;
        else
                return zero;
        }

#ifdef IBMPC
j = 0;
#endif

#ifdef MIEEE
j = 7;
#endif

e = (NBITS - 1) - e;
/* clean out 16 bits at a time */
while( e >= 16 )
        {
#ifdef IBMPC
        u.sh[j++] = 0;
#endif

#ifdef MIEEE
        u.sh[j--] = 0;
#endif
        e -= 16;
        }

/* clear the remaining bits */
if( e > 0 )
        u.sh[j] &= bmask[e];

if( f128_lt(&state, x, zero) && !f128_eq(&state, u.y, x) )
        u.y = f128_sub(&state, u.y, one);;

return(u.y);
}


float128_t cephes_f128_frexpl( float128_t x, int *pw2 )
{
union
  {
    float128_t y;
    unsigned short sh[8];
  } u;
int i, k;

struct softfloat_state state = {};
u.y = x;

#ifdef UNK
mtherr( "frexp", DOMAIN );
return(0.0L);
#endif

/* find the exponent (power of 2) */
i  = u.sh[EXPOFS] & 0x7fff;

if( i == 0 )
        {
        if( f128_eq(&state, u.y, zero))
                {
                *pw2 = 0;
                return zero;
                }
/* Number is denormal or zero */
#if DENORMAL
/* Handle denormal number. */
do
        {
        u.y = f128_mul(&state, u.y, f_2_p0);
        i -= 1;
        k  = u.sh[EXPOFS] & 0x7fff;
        }
while( (k == 0) && (i > -115) );
i = i + k;
#else
        *pw2 = 0;
        return(0.0L);
#endif /* DENORMAL */
        }

*pw2 = i - 0x3ffe;
u.sh[EXPOFS] = 0x3ffe;
return( u.y );
}


float128_t cephes_f128_ldexpl( float128_t x, int pw2 )
{
union
  {
    float128_t y;
    unsigned short sh[8];
  } u;
long e;

#ifdef UNK
mtherr( "ldexp", DOMAIN );
return zero;
#endif

struct softfloat_state state = {};
u.y = x;
while( (e = (u.sh[EXPOFS] & 0x7fffL)) == 0 )
        {
#if DENORMAL
        if( f128_eq(&state, u.y, zero))
                {
                return zero;
                }
/* Input is denormal. */
        if( pw2 > 0 )
                {
                u.y = f128_mul(&state, u.y, f_2_p0);
                pw2 -= 1;
                }
        if( pw2 < 0 )
                {
                if( pw2 < -113 )
                        return zero;
                u.y = f128_sub(&state, u.y, f_0_p5);
                pw2 += 1;
                }
        if( pw2 == 0 )
                return(u.y);
#else
        return zero;
#endif
        }

e = e + pw2;

/* Handle overflow */
if( e > 0x7ffeL )
        {
          e = u.sh[EXPOFS];
          u.y = zero;
          u.sh[EXPOFS] = e | 0x7fff;
          return( u.y );
        }
u.sh[EXPOFS] &= 0x8000;
/* Handle denormalized results */
if( e < 1 )
        {
#if DENORMAL
        if( e < -113 )
                return zero;
        u.sh[EXPOFS] |= 1;
        while( e < 1 )
                {
                u.y = f128_sub(&state, u.y, f_0_p5);
                e += 1;
                }
        e = 0;
#else
        return zero;
#endif
        }

u.sh[EXPOFS] |= e & 0x7fff;
return(u.y);
}

/* Return 1 if x is a number that is Not a Number, else return 0.  */

int cephes_f128_isnanl(float128_t x)
{
#ifdef NANS
union
	{
	float128_t d;
	unsigned short s[8];
	unsigned int i[4];
	} u;

u.d = x;

if( sizeof(int) == 4 )
	{
#ifdef IBMPC	    
	if( ((u.s[7] & 0x7fff) == 0x7fff)
	    && ((u.i[3] & 0x7fff) | u.i[2] | u.i[1] | u.i[0]))
		return 1;
#endif
#ifdef MIEEE
	if( ((u.i[0] & 0x7fff0000) == 0x7fff0000)
	    && ((u.i[0] & 0x7fff) | u.i[1] | u.i[2] | u.i[3]))
		return 1;
#endif
	return(0);
	}
else
	{ /* size int not 4 */
#ifdef IBMPC
	if( (u.s[7] & 0x7fff) == 0x7fff)
		{
		if((u.s[6] & 0x7fff) | u.s[5] | u.s[4] | u.s[3] | u.s[2] | u.s[1] | u.s[0])
			return(1);
		}
#endif
#ifdef MIEEE
	if( (u.s[0] & 0x7fff) == 0x7fff)
		{
		if((u.s[1] & 0x7fff) | (u.s[2] & 0x7fff) | u.s[3] | u.s[4] | u.s[5] | u.s[6] | u.s[7])
			return(1);
		}
#endif
	return(0);
	} /* size int not 4 */

#else
/* No NANS.  */
return(0);
#endif
}


/* Return 1 if x is not infinite and is not a NaN.  */

int cephes_f128_isfinitel(float128_t x)
{
#ifdef INFINITIES
union
	{
	float128_t d;
	unsigned short s[8];
	unsigned int i[4];
	} u;

u.d = x;

if( sizeof(int) == 4 )
	{
#ifdef IBMPC
	if( (u.s[7] & 0x7fff) != 0x7fff)
		return 1;
#endif
#ifdef MIEEE
	if( (u.i[0] & 0x7fff0000) != 0x7fff0000)
		return 1;
#endif
	return(0);
	}
else
	{
#ifdef IBMPC
	if( (u.s[7] & 0x7fff) != 0x7fff)
		return 1;
#endif
#ifdef MIEEE
	if( (u.s[0] & 0x7fff) != 0x7fff)
		return 1;
#endif
	return(0);
	}
#else
/* No INFINITY.  */
return(1);
#endif
}


/* Return 1 if the sign bit of x is 1, else 0.  */

int cephes_f128_signbitl(float128_t x)
{
union
	{
	float128_t d;
	short s[8];
	int i[4];
	} u;

u.d = x;

if( sizeof(int) == 4 )
	{
#ifdef IBMPC
	return( u.s[7] < 0 );
#endif
#ifdef DEC
error no such DEC format
#endif
#ifdef MIEEE
	return( u.i[0] < 0 );
#endif
	}
else
	{
#ifdef IBMPC
	return( u.s[7] < 0 );
#endif
#ifdef DEC
error no such DEC format
#endif
#ifdef MIEEE
	return( u.s[0] < 0 );
#endif
	}
}


================================================
FILE: External/cephes/src/128bit/log2ll.c
================================================
/*							cephes_f128_log2l.c
 *
 *	Base 2 logarithm, float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, cephes_f128_log2l();
 *
 * y = cephes_f128_log2l( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns the base 2 logarithm of x.
 *
 * The argument is separated into its exponent and fractional
 * parts.  If the exponent is between -1 and +1, the (natural)
 * logarithm of the fraction is approximated by
 *
 *     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
 *
 * Otherwise, setting  z = 2(x-1)/x+1),
 * 
 *     log(x) = z + z**3 P(z)/Q(z).
 *
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE      0.5, 2.0     100,000    1.3e-34     4.5e-35
 *    IEEE     exp(+-10000)  100,000    9.6e-35     4.0e-35
 *
 * In the tests over the interval exp(+-10000), the logarithms
 * of the random arguments were uniformly distributed over
 * [-10000, +10000].
 *
 * ERROR MESSAGES:
 *
 * log singularity:  x = 0; returns MINLOG
 * log domain:       x < 0; returns MINLOG
 */

/*
Cephes Math Library Release 2.2:  January, 1991
Copyright 1984, 1991 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"
static char fname[] = {"cephes_f128_log2l"};

/* Coefficients for ln(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 5.3e-37,
 * relative peak error spread = 2.3e-14
 */
static float128_t P[13] = {
	{0x95434922008560fcULL, 0x3feb9d04a0d6ed82ULL}, // 1.538612243596254322971797716843006400388E-6L
	{0x2e9cb5e91a8c2fa0ULL, 0x3ffdffd7e21347ccULL}, // 4.998469661968096229986658302195402690910E-1L
	{0x674c43ea62a592e7ULL, 0x400373615178fe96ULL}, // 2.321125933898420063925789532045674660756E1L
	{0xfa539715d5fd0560ULL, 0x40079b73a8639c28ULL}, // 4.114517881637811823002128927449878962058E2L
	{0x5ec5c60d38b7fa2aULL, 0x400ade1e79b3ae12ULL}, // 3.824952356185897735160588078446136783779E3L
	{0x6369f0cada64eeecULL, 0x400d4ca24f0550cfULL}, // 2.128857716871515081352991964243375186031E4L
	{0x115104b644c1f464ULL, 0x400f28a791822d40ULL}, // 7.594356839258970405033155585486712125861E4L
	{0x95ec43488121aff8ULL, 0x40105f196a49f171ULL}, // 1.797628303815655343403735250238293741397E5L
	{0xa2484b7171ab5034ULL, 0x401116caba9f2757ULL}, // 2.854829159639697837788887080758954924001E5L
	{0xe49b2bf8646a8a1eULL, 0x401125a72eb05ba7ULL}, // 3.007007295140399532324943111654767187848E5L
	{0x17ac5c737d1b8ad4ULL, 0x4010897ca319418dULL}, // 2.014652742082537582487669938141683759923E5L
	{0x9ff15925da76d408ULL, 0x400f2f8f8bfbf9a1ULL}, // 7.771154681358524243729929227226708890930E4L
	{0xe740b8544d79077cULL, 0x400c9a7dcad5d0efULL}, // 1.313572404063446165910279910527789794488E4L
};
static float128_t Q[12] = {
/* 1.000000000000000000000000000000000000000E0L, */
{0x4a2113daac8d7fa5ULL,0x40048322fbda4d3fULL}, // 4.839208193348159620282142911143429644326E1L,
{0x9efb2fe2c778f56fULL,0x4008c73f14777e56ULL}, // 9.104928120962988414618126155557301584078E2L,
{0xf23a98d434d3a705ULL,0x400c1dd933ea5565ULL}, // 9.147150349299596453976674231612674085381E3L,
{0x4b44059a3b76f461ULL,0x400eb5f4d77aed02ULL}, // 5.605842085972455027590989944010492125825E4L,
{0x2962234d48fff0bcULL,0x4010b71bb67f5effULL}, // 2.248234257620569139969141618556349415120E5L,
{0xe673c713bcf24ee3ULL,0x40122b6c5ddac3b8ULL}, // 6.132189329546557743179177159925690841200E5L,
{0x34d8d36e8de37c71ULL,0x40131ab83fa3b03bULL}, // 1.158019977462989115839826904108208787040E6L,
{0x061338bb0e95b314ULL,0x401371d8273f762aULL}, // 1.514882452993549494932585972882995548426E6L,
{0xe379b5d8e7071d74ULL,0x401348fbe89d38e2ULL}, // 1.347518538384329112529391120390701166528E6L,
{0x412eafafea233277ULL,0x40127bc5211688c1ULL}, // 7.777690340007566932935753241556479363645E5L,
{0x16378fd2514ba129ULL,0x40110088814003eaULL}, // 2.626900195321832660448791748036714883242E5L,
{0xed708a3f3a1ac5caULL,0x400e33de58205cb3ULL}, // 3.940717212190338497730839731583397586124E4L
};

/* Coefficients for log(x) = z + z^3 P(z^2)/Q(z^2),
 * where z = 2(x-1)/(x+1)
 * 1/sqrt(2) <= x < sqrt(2)
 * Theoretical peak relative error = 1.1e-35,
 * relative peak error spread 1.1e-9
 */
static float128_t R[6] = {
	{0x68479d54e4ced708ULL, 0xbffec40a1c874f5aULL}, // -8.828896441624934385266096344596648080902E-1L,
	{0x565b5611a30df628ULL, 0x40054247b533971eULL}, // 8.057002716646055371965756206836056074715E1L,
	{0xb690eddd457e03b0ULL, 0xc009fa1350a9210eULL}, // -2.024301798136027039250415126250455056397E3L,
	{0xea1230d4dc2a41c8ULL, 0x400d4020cbb3c4edULL}, // 2.048819892795278657810231591630928516206E4L,
	{0x388e5d3ae806c32aULL, 0xc00f5eac94780e23ULL}, // -8.977257995689735303686582344659576526998E4L,
	{0x6802a6fb3250b4fdULL, 0x401014fab5e2e8c1ULL}, // 1.418134209872192732479751274970992665513E5L
};
static float128_t S[6] = {
/* 1.000000000000000000000000000000000000000E0L, */
 {0x2575cd7cadd52c63ULL, 0xc005da8b34108b63ULL}, // -1.186359407982897997337150403816839480438E2L,
 {0x9022bf51e9d20aecULL, 0x400af3d0db24df08ULL}, // 3.998526750980007367835804959888064681098E3L,
 {0xeb27fc1032bb267dULL, 0xc00ec11ad77cc51cULL}, // -5.748542087379434595104154610899551484314E4L,
 {0xaeec5bd6a5211cbdULL, 0x401186c6f13df72eULL}, // 4.001557694070773974936904547424676279307E5L,
 {0xee9e91e4b3020178ULL, 0xc013455371e04bc5ULL}, // -1.332535117259762928288745111081235577029E6L,
 {0x1c03fa78cb791730ULL, 0x40139f7810d45d22ULL}, // 1.701761051846631278975701529965589676574E6L
};
/* log2(e) - 1 */
static const float128_t LOG2EA = {0x85ddf43ff68348eaULL, 0x3ffdc551d94ae0bfULL};

static const float128_t SQRTH = {0xc908b2fb1366ea95ULL,  0x3ffe6a09e667f3bcULL};
static const float128_t zero = {0, 0};
static const float128_t f_0_p5 = {0, 0x3ffe000000000000ULL};
static const float128_t one = {0, 0x3fff000000000000ULL};

static const float128_t indeterminate = {0x0000000000000000ULL, 0xc00d000000000000ULL};

float128_t cephes_f128_log2l(float128_t x) {
VOLATILE float128_t z;
float128_t y;
int e;

struct softfloat_state state = {};

/* Test for domain */
if( f128_le(&state, x, zero) )
	{
	if( f128_eq(&state, x, zero) )
		mtherr( fname, SING );
	else
		mtherr( fname, DOMAIN );
	return indeterminate;
	}

/* separate mantissa from exponent */

/* Note, frexp is used so that denormal numbers
 * will be handled properly.
 */
x = cephes_f128_frexpl( x, &e );


/* logarithm using log(x) = z + z**3 P(z)/Q(z),
 * where z = 2(x-1)/x+1)
 */
if( (e > 2) || (e < -2) )
{
if( f128_lt(&state, x, SQRTH) )
	{ /* 2( 2x-1 )/( 2x+1 ) */
	e -= 1;
	z = f128_sub(&state, x, f_0_p5);
	y = f128_add(&state, f128_mul(&state, f_0_p5, z), f_0_p5);
	}	
else
	{ /*  2 (x-1)/(x+1)   */
	z = f128_sub(&state, x, f_0_p5);
	z = f128_sub(&state, z, f_0_p5);
	y = f128_add(&state, f128_mul(&state, f_0_p5, x), f_0_p5);
	}
x = f128_div(&state, z, y);
z = f128_mul(&state, x, x);
y = f128_mul(&state, x,
    f128_div(&state, f128_mul(&state, z, cephes_f128_polevll( z, R, 5 )), cephes_f128_p1evll( z, S, 6 ) ));
goto done;
}


/* logarithm using log(1+x) = x - .5x**2 + x**3 P(x)/Q(x) */

if( f128_lt(&state, x, SQRTH) )
	{
	e -= 1;
	x = f128_sub(&state, cephes_f128_ldexpl( x, 1 ), one); /*  2x - 1  */
	}
else
	{
	x = f128_sub(&state, x, one);
	}
z = f128_mul(&state, x, x);
y = f128_mul(&state, x,
    f128_div(&state, f128_mul(&state, z, cephes_f128_polevll( x, P, 12 )), cephes_f128_p1evll( x, Q, 12 )));
y = f128_sub(&state, y, cephes_f128_ldexpl( z, -1 ));   /* -0.5x^2 + ... */

done:

/* Multiply log of fraction by log2(e)
 * and base 2 exponent by 1
 *
 * ***CAUTION***
 *
 * This sequence of operations is critical and it may
 * be horribly defeated by some compiler optimizers.
 */
z = f128_mul(&state, y, LOG2EA);
z = f128_add(&state, z, f128_mul(&state, x, LOG2EA));
z = f128_add(&state, z, y);
z = f128_add(&state, z, x);
z = f128_add(&state, z, i32_to_f128(e));
return( z );
}


================================================
FILE: External/cephes/src/128bit/mconf.h
================================================
/*							mconf.h
 *
 *	Common include file for math routines
 *
 *
 *
 * SYNOPSIS:
 *
 * #include "mconf.h"
 *
 *
 *
 * DESCRIPTION:
 *
 * This file contains definitions for error codes that are
 * passed to the common error handling routine mtherr()
 * (which see).
 *
 * The file also includes a conditional assembly definition
 * for the type of computer arithmetic (IEEE, DEC, Motorola
 * IEEE, or UNKnown).
 * 
 * For Digital Equipment PDP-11 and VAX computers, certain
 * IBM systems, and others that use numbers with a 56-bit
 * significand, the symbol DEC should be defined.  In this
 * mode, most floating point constants are given as arrays
 * of octal integers to eliminate decimal to binary conversion
 * errors that might be introduced by the compiler.
 *
 * For little-endian computers, such as IBM PC, that follow the
 * IEEE Standard for Binary Floating Point Arithmetic (ANSI/IEEE
 * Std 754-1985), the symbol IBMPC should be defined.  These
 * numbers have 53-bit significands.  In this mode, constants
 * are provided as arrays of hexadecimal 16 bit integers.
 *
 * Big-endian IEEE format is denoted MIEEE.  On some RISC
 * systems such as Sun SPARC, double precision constants
 * must be stored on 8-byte address boundaries.  Since integer
 * arrays may be aligned differently, the MIEEE configuration
 * may fail on such machines.
 *
 * To accommodate other types of computer arithmetic, all
 * constants are also provided in a normal decimal radix
 * which one can hope are correctly converted to a suitable
 * format by the available C language compiler.  To invoke
 * this mode, define the symbol UNK.
 *
 * An important difference among these modes is a predefined
 * set of machine arithmetic constants for each.  The numbers
 * MACHEP (the machine roundoff error), MAXNUM (largest number
 * represented), and several other parameters are preset by
 * the configuration symbol.  Check the file const.c to
 * ensure that these values are correct for your computer.
 *
 * Configurations NANS, INFINITIES, MINUSZERO, and DENORMAL
 * may fail on many systems.  Verify that they are supposed
 * to work on your computer.
 */

/*
Cephes Math Library Release 2.3:  June, 1995
Copyright 1984, 1987, 1989, 1995 by Stephen L. Moshier
*/


/* Constant definitions for math error conditions
 */

#include "SoftFloat-3e/platform.h"
#include "SoftFloat-3e/softfloat.h"

#define DOMAIN		1	/* argument domain error */
#define SING		2	/* argument singularity */
#define OVERFLOW	3	/* overflow range error */
#define UNDERFLOW	4	/* underflow range error */
#define TLOSS		5	/* total loss of precision */
#define PLOSS		6	/* partial loss of precision */

#define EDOM		33
#define ERANGE		34

/* Complex numeral.  */
typedef struct
	{
	double r;
	double i;
	} cmplx;

typedef struct
	{
	float r;
	float i;
	} cmplxf;

/* Long double complex numeral.  */

typedef struct
	{
	float128_t r;
	float128_t i;
	} cmplxl;


/* Type of computer arithmetic */

/* PDP-11, Pro350, VAX:
 */
/* #define DEC 1 */

/* Intel IEEE, low order words come first:
 */
#define IBMPC 1

/* Motorola IEEE, high order words come first
 * (Sun 680x0 workstation):
 */
/* #define MIEEE 1 */

/* UNKnown arithmetic, invokes coefficients given in
 * normal decimal format.  Beware of range boundary
 * problems (MACHEP, MAXLOG, etc. in const.c) and
 * roundoff problems in pow.c:
 * (Sun SPARCstation)
 */
/* #define UNK 1 */

/* If you define UNK, then be sure to set BIGENDIAN properly. */
/* #define BIGENDIAN 1 */

/* Define this `volatile' if your compiler thinks
 * that floating point arithmetic obeys the associative
 * and distributive laws.  It will defeat some optimizations
 * (but probably not enough of them).
 *
 * #define VOLATILE volatile
 */
#define VOLATILE

/* For 12-byte long doubles on an i386, pad a 16-bit short 0
 * to the end of real constants initialized by integer arrays.
 *
 * #define XPD 0,
 *
 * Otherwise, the type is 10 bytes long and XPD should be
 * defined blank (e.g., Microsoft C).
 *
 * #define XPD
 */
#define XPD 0,

/* Define to support tiny denormal numbers, else undefine. */
#define DENORMAL 1

/* Define to ask for infinity support, else undefine. */
#define INFINITIES 1

/* Define to ask for support of numbers that are Not-a-Number,
   else undefine.  This may automatically define INFINITIES in some files. */
#define NANS 1

/* Define to distinguish between -0.0 and +0.0.  */
#define MINUSZERO 1

/* Define 1 for ANSI C atan2() function
   and ANSI prototypes for float arguments.
   See atan.c and clog.c. */
#define ANSIC 1

/* Variable for error reporting.  See mtherr.c.  */
extern int merror;

/* Forward declarations */
extern float128_t F128_MINLOGL;

extern float128_t F128_MAXNUML;
extern float128_t F128_PIL;
extern float128_t F128_PIO2L, F128_PIO4L;

float128_t cephes_f128_atanl(float128_t x);
#if ANSIC
float128_t cephes_f128_atan2l( float128_t y, float128_t x );
#else
float128_t cephes_f128_atan2l( float128_t x, float128_t y );
#endif
float128_t cephes_f128_ceill(float128_t x);
float128_t cephes_f128_cosl(float128_t x);
float128_t cephes_f128_fabsl(float128_t x);
float128_t cephes_f128_floorl(float128_t x);
float128_t cephes_f128_frexpl( float128_t x, int *pw2 );
int cephes_f128_isfinitel(float128_t x);
int cephes_f128_isnanl(float128_t x);
float128_t cephes_f128_ldexpl( float128_t x, int pw2 );
float128_t cephes_f128_polevll( float128_t x, void *PP, int n );
float128_t cephes_f128_p1evll( float128_t x, void *PP, int n );
int cephes_f128_signbitl(float128_t x);
float128_t cephes_f128_sinl(float128_t x);
int mtherr( char *name, int code );

/* Public symbol declarations */
float128_t cephes_f128_log2l(float128_t x);


================================================
FILE: External/cephes/src/128bit/mtherr.c
================================================
/*							mtherr.c
 *
 *	Library common error handling routine
 *
 *
 *
 * SYNOPSIS:
 *
 * char *fctnam;
 * int code;
 * int mtherr();
 *
 * mtherr( fctnam, code );
 *
 *
 *
 * DESCRIPTION:
 *
 * This routine may be called to report one of the following
 * error conditions (in the include file mconf.h).
 *  
 *   Mnemonic        Value          Significance
 *
 *    DOMAIN            1       argument domain error
 *    SING              2       function singularity
 *    OVERFLOW          3       overflow range error
 *    UNDERFLOW         4       underflow range error
 *    TLOSS             5       total loss of precision
 *    PLOSS             6       partial loss of precision
 *    EDOM             33       Unix domain error code
 *    ERANGE           34       Unix range error code
 *
 * The default version of the file prints the function name,
 * passed to it by the pointer fctnam, followed by the
 * error condition.  The display is directed to the standard
 * output device.  The routine then returns to the calling
 * program.  Users may wish to modify the program to abort by
 * calling exit() under severe error conditions such as domain
 * errors.
 *
 * Since all error conditions pass control to this function,
 * the display may be easily changed, eliminated, or directed
 * to an error logging device.
 *
 * SEE ALSO:
 *
 * mconf.h
 *
 */

/*
Cephes Math Library Release 2.0:  April, 1987
Copyright 1984, 1987 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"

int merror = 0;

int mtherr( char *name, int code )
{
#if 0
/* Display string passed by calling program,
 * which is supposed to be the name of the
 * function in which the error occurred:
 */
printf( "\n%s ", name );

/* Set global error message word */
merror = code;

/* Display error message defined
 * by the code argument.
 */
if( (code <= 0) || (code >= 7) )
	code = 0;
printf( "%s error\n", ermsg[code] );

#endif
/* Return to calling
 * program
 */
return( 0 );
}


================================================
FILE: External/cephes/src/128bit/polevll.c
================================================
/*							polevll.c
 *							p1evll.c
 *
 *	Evaluate polynomial
 *
 *
 *
 * SYNOPSIS:
 *
 * int N;
 * float128_t x, y, coef[N+1], polevl[];
 *
 * y = polevll( x, coef, N );
 *
 *
 *
 * DESCRIPTION:
 *
 * Evaluates polynomial of degree N:
 *
 *                     2          N
 * y  =  C  + C x + C x  +...+ C x
 *        0    1     2          N
 *
 * Coefficients are stored in reverse order:
 *
 * coef[0] = C  , ..., coef[N] = C  .
 *            N                   0
 *
 *  The function p1evll() assumes that coef[N] = 1.0 and is
 * omitted from the array.  Its calling arguments are
 * otherwise the same as polevll().
 *
 *
 * SPEED:
 *
 * In the interest of speed, there are no checks for out
 * of bounds arithmetic.  This routine is used by most of
 * the functions in the library.  Depending on available
 * equipment features, the user may wish to rewrite the
 * program in microcode or assembly language.
 *
 */


/*
Cephes Math Library Release 2.2:  July, 1992
Copyright 1984, 1987, 1988, 1992 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/
#include "mconf.h"


/* Polynomial evaluator:
 *  P[0] x^n  +  P[1] x^(n-1)  +  ...  +  P[n]
 */
float128_t cephes_f128_polevll( float128_t x, void *PP, int n )
{

struct softfloat_state state = {};
register float128_t y;
float128_t *P;

P = (float128_t *) PP;
y = *P++;
do
	{
	y = f128_add(&state, f128_mul(&state, y, x), *P++);
	}
while( --n );
return(y);
}


/* Polynomial evaluator:
 *  x^n  +  P[0] x^(n-1)  +  P[1] x^(n-2)  +  ...  +  P[n]
 */
float128_t cephes_f128_p1evll( float128_t x, void *PP, int n )
{
struct softfloat_state state = {};
register float128_t y;
float128_t *P;

P = (float128_t *) PP;
n -= 1;
y = f128_add(&state, x, *P++);
do
	{
	y = f128_add(&state, f128_mul(&state, y, x), *P++);
	}
while( --n );
return( y );
}


================================================
FILE: External/cephes/src/128bit/sinll.c
================================================
/*							sinl.c
 *
 *	Circular sine, float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, sinl();
 *
 * y = sinl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Range reduction is into intervals of pi/4.  The reduction
 * error is nearly eliminated by contriving an extended precision
 * modular arithmetic.
 *
 * Two polynomial approximating functions are employed.
 * Between 0 and pi/4 the sine is approximated by the Cody
 * and Waite polynomial form
 *      x + x^3 P(x^2) .
 * Between pi/4 and pi/2 the cosine is represented as
 *      1 - .5 x^2 + x^4 Q(x^2) .
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain      # trials      peak         rms
 *    IEEE     +-3.6e16      100,000    2.0e-34     5.3e-35
 *
 * ERROR MESSAGES:
 *
 *   message           condition        value returned
 * sin total loss      x > 2^55              0.0
 *
 */
/*							cosl.c
 *
 *	Circular cosine, float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, cosl();
 *
 * y = cosl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Range reduction is into intervals of pi/4.  The reduction
 * error is nearly eliminated by contriving an extended precision
 * modular arithmetic.
 *
 * Two polynomial approximating functions are employed.
 * Between 0 and pi/4 the cosine is approximated by
 *      1 - .5 x^2 + x^4 Q(x^2) .
 * Between pi/4 and pi/2 the sine is represented by the Cody
 * and Waite polynomial form
 *      x  +  x^3 P(x^2) .
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain      # trials      peak         rms
 *    IEEE     +-3.6e16     100,000      2.0e-34     5.2e-35
 *
 * ERROR MESSAGES:
 *
 *   message           condition        value returned
 * cos total loss      x > 2^55              0.0
 */

/*							sin.c	*/

/*
Cephes Math Library Release 2.2:  December, 1990
Copyright 1985, 1990 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"

/* sin(x) = x + x^3 P(x^2)
 * Theoretical peak relative error = 5.6e-39
 * relative peak error spread = 1.7e-9
 */

static float128_t sincof[12] = {
{0x07424c0cc240ddd5ULL, 0x3fab3d6c15b6d187ULL}, // 6.410290407010279602425714995528976754871E-26L,
{0x0f48760e659301d0ULL, 0xbfb47619a65f0be7ULL}, // -3.868105354403065333804959405965295962871E-23L,
{0xcb791f8ea7c13184ULL, 0x3fbd71b8ee9a64e1ULL}, // 1.957294039628045847156851410307133941611E-20L,
{0x0b420eabbeb9d9bcULL, 0xbfc62f49b467cdf7ULL}, // -8.220635246181818130416407184286068307901E-18L,
{0x4be70cee4054eef9ULL, 0x3fce952c77030ab5ULL}, // 2.811457254345322887443598804951004537784E-15L,
{0xe782874b38cbd281ULL, 0xbfd6ae7f3e733b81ULL}, // -7.647163731819815869711749952353081768709E-13L,
{0x97c83627668fe57cULL, 0x3fde6124613a86d0ULL}, // 1.605904383682161459812515654720205050216E-10L,
{0x38fe73eef2ec94cdULL, 0xbfe5ae64567f544eULL}, // -2.505210838544171877505034150892770940116E-8L,
{0x38faac1c6f6fa52aULL, 0x3fec71de3a556c73ULL}, // 2.755731922398589065255731765498970284004E-6L,
{0xa01a01a019fc52ccULL, 0xbff2a01a01a01a01ULL}, // -1.984126984126984126984126984045294307281E-4L,
{0x1111111111111083ULL, 0x3ff8111111111111ULL}, // 8.333333333333333333333333333333119885283E-3L,
{0x5555555555555555ULL, 0xbffc555555555555ULL}, // -1.666666666666666666666666666666666647199E-1L
};
/* cos(x) = 1 - .5 x^2 + x^2 (x^2 P(x^2))
 * Theoretical peak relative error = 2.1e-37,
 * relative peak error spread = 1.4e-8
 */
static float128_t coscof[11] = {
{0x86919a6fdf15a4b3ULL, 0x3fafefc8801eb0a1ULL}, // 1.601961934248327059668321782499768648351E-24L,
{0x902367b3281c9510ULL, 0xbfb90ce245980e11ULL}, // -8.896621117922334603659240022184527001401E-22L,
{0xcf5102d043ad399aULL, 0x3fc1e542b8eb4f0dULL}, // 4.110317451243694098169570731967589555498E-19L,
{0xa8272970c73ab5ffULL, 0xbfca6827863b2960ULL}, // -1.561920696747074515985647487260202922160E-16L,
{0xf9016edb75d1fb52ULL, 0x3fd2ae7f3e733b51ULL}, // 4.779477332386900932514186378501779328195E-14L,
{0xc3e862188c1c1f15ULL, 0xbfda93974a8c07c9ULL}, // -1.147074559772972328629102981460088437917E-11L,
{0x7b517ff3abf58399ULL, 0x3fe21eed8eff8d89ULL}, // 2.087675698786809897637922200570559726116E-9L,
{0xc72eef5d4453f45cULL, 0xbfe927e4fb7789f5ULL}, // -2.755731922398589065255365968070684102298E-7L,
{0xa01a019fdf56450dULL, 0x3fefa01a01a01a01ULL}, // 2.480158730158730158730158440896461945271E-5L,
{0x6c16c16c16b76e10ULL, 0xbff56c16c16c16c1ULL}, // -1.388888888888888888888888888765724370132E-3L,
{0x55555555555553fdULL, 0x3ffa555555555555ULL}, // 4.166666666666666666666666666666459301466E-2L
};
/*
static float128_t DP1 = 7.853981554508209228515625E-1L;
static float128_t DP2 =  7.94662735614792836713604629039764404296875E-9L;
static float128_t DP3 = 3.0616169978683829430651648306875026455243736148E-17L;
static float128_t lossth = 5.49755813888e11L;
*/
static float128_t DP1 =
{0x8400000000000000ULL, 0x3ffe921fb54442d1ULL};
 //7.853981633974483067550664827649598009884357452392578125E-1L;
static float128_t DP2 =
{0xe000000000000000ULL, 0x3fc4a62633145c06ULL};
 //2.8605943630549158983813312792950660807511260829685741796657E-18L;
static float128_t DP3 =
{0xa67cc74020bbea64ULL, 0x3f8bcd129024e088ULL};
 //2.1679525325309452561992610065108379921905808E-35L;

static const float128_t lossth =  {0x0000000000000000ULL, 0x4036000000000000ULL}; // 3.6028797018963968E16L; /* 2^55 */
static const float128_t zero = {0, 0};
static const float128_t one = {0, 0x3fff000000000000ULL};

float128_t cephes_f128_sinl(float128_t x)
{
struct softfloat_state state = {};
float128_t y, z, zz;
int j, sign;

/* make argument positive but save the sign */
sign = 1;
if( f128_lt(&state, x, zero) )
	{
	x = f128_complement_sign(x);
	sign = -1;
	}

if( f128_lt(&state, lossth, x))
	{
	mtherr( "sinl", TLOSS );
	return zero;
	}

y = cephes_f128_floorl( f128_div(&state, x, F128_PIO4L) ); /* integer part of x/PIO4 */

/* strip high bits of integer part to prevent integer overflow */
z = cephes_f128_ldexpl( y, -4 );
z = cephes_f128_floorl(z);           /* integer part of y/8 */
z = f128_sub(&state, y, cephes_f128_ldexpl( z, 4 ));  /* y - 16 * (y/16) */

j = f128_to_i32(&state, z, softfloat_round_near_even, true); /* convert to integer for tests on the phase angle */
/* map zeros to origin */
if( j & 1 )
	{
	j += 1;
	y = f128_add(&state, y, one);
	}
j = j & 07; /* octant modulo 360 degrees */
/* reflect in x axis */
if( j > 3)
	{
	sign = -sign;
	j -= 4;
	}

/* Extended precision modular arithmetic */
// z = ((x - y * DP1) - y * DP2) - y * DP3;
 {
   float128_t tmp1 = f128_mul(&state, y, DP1);
   float128_t tmp2 = f128_mul(&state, y, DP2);
   float128_t tmp3 = f128_mul(&state, y, DP3);
   float128_t tmp4 = f128_sub(&state, x, tmp1);
   float128_t tmp5 = f128_sub(&state, tmp4, tmp2);
   z = f128_sub(&state, tmp5, tmp3);
 }

z = f128_sub(&state, f128_sub(&state, f128_sub(&state, x, f128_mul(&state, y, DP1)), f128_mul(&state, y, DP2)), f128_mul(&state, y, DP3));

zz = f128_mul(&state, z, z);
if( (j==1) || (j==2) )
	{
  // y = 1.0L - ldexpl(zz,-1) + zz * zz * polevll( zz, coscof, 10 );
  float128_t tmp1 = f128_mul(&state, zz, zz);
  float128_t tmp2 = f128_mul(&state, tmp1, cephes_f128_polevll( zz, coscof, 10 ));
  float128_t tmp3 = f128_sub(&state, one, cephes_f128_ldexpl(zz,-1));
  y = f128_add(&state, tmp3, tmp2);
	}
else
	{
  // y = z  +  z * (zz * polevll( zz, sincof, 11 ));
  float128_t tmp1 = f128_mul(&state, zz, cephes_f128_polevll( zz, sincof, 11 ));
  float128_t tmp2 = f128_mul(&state, z, tmp1);
  y = f128_add(&state, z, tmp2);
	}

if(sign < 0)
	y = f128_complement_sign(y);

return(y);
}


float128_t cephes_f128_cosl(float128_t x)
{
struct softfloat_state state = {};
float128_t y, z, zz;
long i;
int j, sign;

/* make argument positive */
sign = 1;
if( f128_lt(&state, x, zero) )
	x = f128_complement_sign(x);


if( f128_lt(&state, lossth, x))
	{
	mtherr( "cosl", TLOSS );
	return zero;
	}

y = cephes_f128_floorl( f128_div(&state, x, F128_PIO4L));
z = cephes_f128_ldexpl( y, -4 );
z = cephes_f128_floorl(z);		/* integer part of y/8 */
z = f128_sub(&state, y, cephes_f128_ldexpl( z, 4 ));  /* y - 16 * (y/16) */

/* integer and fractional part modulo one octant */
i = f128_to_i32(&state, z, softfloat_round_near_even, true);
if( i & 1 )	/* map zeros to origin */
	{
	i += 1;
	y = f128_add(&state, y, one);
	}
j = i & 07;
if( j > 3)
	{
	j -=4;
	sign = -sign;
	}

if( j > 1 )
	sign = -sign;

/* Extended precision modular arithmetic */
// z = ((x - y * DP1) - y * DP2) - y * DP3;
 {
   float128_t tmp1 = f128_mul(&state, y, DP1);
   float128_t tmp2 = f128_mul(&state, y, DP2);
   float128_t tmp3 = f128_mul(&state, y, DP3);
   float128_t tmp4 = f128_sub(&state, x, tmp1);
   float128_t tmp5 = f128_sub(&state, tmp4, tmp2);
   z = f128_sub(&state, tmp5, tmp3);
 }

zz = f128_mul(&state, z, z);
if( (j==1) || (j==2) )
	{
    // y = z  +  z * (zz * polevll( zz, sincof, 11 ));
    float128_t tmp1 = f128_mul(&state, zz, cephes_f128_polevll( zz, sincof, 11 ));
    float128_t tmp2 = f128_mul(&state, z, tmp1);
    y = f128_add(&state, z, tmp2);
	}
else
	{
    // y = 1.0L - ldexpl(zz,-1) + zz * zz * polevll( zz, coscof, 10 );
    float128_t tmp1 = f128_mul(&state, zz, zz);
    float128_t tmp2 = f128_mul(&state, tmp1, cephes_f128_polevll( zz, coscof, 10 ));
    float128_t tmp3 = f128_sub(&state, one, cephes_f128_ldexpl(zz,-1));
    y = f128_add(&state, tmp3, tmp2);
	}

if(sign < 0)
	y = f128_complement_sign(y);

return(y);
}


================================================
FILE: External/cephes/src/128bit/tanll.c
================================================
/*							tanl.c
 *
 *	Circular tangent, 128-bit float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, tanl();
 *
 * y = tanl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns the circular tangent of the radian argument x.
 *
 * Range reduction is modulo pi/4.  A rational function
 *       x + x**3 P(x**2)/Q(x**2)
 * is employed in the basic interval [0, pi/4].
 *
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE     +-3.6e16    100,000      3.0e-34      7.2e-35
 *
 * ERROR MESSAGES:
 *
 *   message         condition          value returned
 * tan total loss   x > 2^55                0.0
 *
 */
/*							cotl.c
 *
 *	Circular cotangent, float128_t precision
 *
 *
 *
 * SYNOPSIS:
 *
 * float128_t x, y, cotl();
 *
 * y = cotl( x );
 *
 *
 *
 * DESCRIPTION:
 *
 * Returns the circular cotangent of the radian argument x.
 *
 * Range reduction is modulo pi/4.  A rational function
 *       x + x**3 P(x**2)/Q(x**2)
 * is employed in the basic interval [0, pi/4].
 *
 *
 *
 * ACCURACY:
 *
 *                      Relative error:
 * arithmetic   domain     # trials      peak         rms
 *    IEEE     +-3.6e16    100,000      2.9e-34     7.2e-35
 *
 *
 * ERROR MESSAGES:
 *
 *   message         condition          value returned
 * cot total loss   x > 2^55                0.0
 * cot singularity  x = 0                  MAXNUM
 *
 */

/*
Cephes Math Library Release 2.2:  December, 1990
Copyright 1984, 1990 by Stephen L. Moshier
Direct inquiries to 30 Frost Street, Cambridge, MA 02140
*/

#include "mconf.h"

/* tan(x) = x + x^3 P(x^2)
 * 0 <= |x| <= pi/4
 * Theoretical peak relative error = 4.3e-38
 * relative peak error spread = 6.1e-11
 */
static float128_t P[6] = {
 {0x09978dc7ae2a2f4bULL, 0xbffefa5d486820e2ULL}, // -9.889929415807650724957118893791829849557E-1L,
 {0x52a017b1ca7c4799ULL, 0x40093e130edd1294ULL}, // 1.272297782199996882828849455156962260810E3L,
 {0x8857161b398b3c53ULL, 0xc0119f024bdcc6c3ULL}, // -4.249691853501233575668486667664718192660E5L,
 {0xcc299261a6616b83ULL, 0x401889b0ed404622ULL}, // 5.160188250214037865511600561074819366815E7L,
 {0x37d9311de4cdbf04ULL, 0xc01e1304fe4d6331ULL}, // -2.307030822693734879744223131873392503321E9L,
 {0x6e9f0eac6b638a9aULL, 0x4021ada98af62f83ULL}, // 2.883414728874239697964612246732416606301E10L
};
static float128_t Q[6] = {
/* 1.000000000000000000000000000000000000000E0L, */
 {0xeb01d728f7d3bb04ULL, 0xc009494f98d3c1caULL}, // -1.317243702830553658702531997959756728291E3L,
 {0xcdd312b4ac46a6cdULL, 0x4011ba538d331a98ULL}, // 4.529422062441341616231663543669583527923E5L,
 {0x2a1a6372eebd73a1ULL, 0xc018b57281a9f10bULL}, // -5.733709132766856723608447733926138506824E7L,
 {0x3e9defb0e348fbe5ULL, 0x401e48d6025d9b41ULL}, // 2.758476078803232151774723646710890525496E9L,
 {0x7cd82869db5580d1ULL, 0xc022355d0fdbd24eULL}, // -4.152206921457208101480801635640958361612E10L,
 {0x92f74b01508aa7f3ULL, 0x4023423f2838a3a2ULL}, // 8.650244186622719093893836740197250197602E10L
};

static float128_t DP1 =
{0x8400000000000000ULL, 0x3ffe921fb54442d1ULL};
 //7.853981633974483067550664827649598009884357452392578125E-1L;
static float128_t DP2 =
{0xe000000000000000ULL, 0x3fc4a62633145c06ULL};
 //2.8605943630549158983813312792950660807511260829685741796657E-18L;
static float128_t DP3 =
{0xa67cc74020bbea64ULL, 0x3f8bcd129024e088ULL};
 // 2.1679525325309452561992610065108379921905808E-35L;

static const float128_t lossth =  {0x0000000000000000ULL, 0x4036000000000000ULL}; // 3.6028797018963968E16L; /* 2^55 */

static const float128_t zero = {0, 0};
static const float128_t one = {0, 0x3fff000000000000ULL};
static const float128_t neg_one = {0, 0xbfff000000000000ULL};

static const float128_t max_quad = {0x35d511e976394d7aULL, 0x3fbc79ca10c92422ULL};

static float128_t tancotl( struct softfloat_state *state, float128_t xx, int cotflg );

float128_t cephes_f128_tanl(float128_t x)
{
struct softfloat_state state = {};
return( tancotl(&state, x,0) );
}


float128_t cotl(float128_t x)
{
struct softfloat_state state = {};

if( f128_eq(&state, x, zero) )
	{
	mtherr( "cotl", SING );
	return( F128_MAXNUML );
	}
return( tancotl(&state, x,1) );
}


static float128_t tancotl( struct softfloat_state *state, float128_t xx, int cotflg )
{
float128_t x, y, z, zz;
int j, sign;

/* make argument positive but save the sign */
// if (xx < 0.0L)
if( f128_lt(state, xx, zero) )
	{
	x = f128_sub(state, zero, xx);
	sign = -1;
	}
else
	{
	x = xx;
	sign = 1;
	}

//if (x > lossth)
if (f128_lt(state, lossth, x))
	{
	if( cotflg )
		mtherr( "cotl", TLOSS );
	else
		mtherr( "tanl", TLOSS );
	return zero;
	}

/* compute x mod PIO4 */
y = cephes_f128_floorl( f128_div(state, x, F128_PIO4L));

/* strip high bits of integer part */
z = cephes_f128_ldexpl( y, -4 );
z = cephes_f128_floorl(z);		/* integer part of y/16 */
z = f128_sub(state, y, cephes_f128_ldexpl( z, 4 ));  /* y - 16 * (y/16) */

/* integer and fractional part modulo one octant */
j = f128_to_i32(state, z, softfloat_round_near_even, true);

/* map zeros and singularities to origin */
if( j & 1 )
	{
	j += 1;
	y = f128_add(state, y, one);
	}

z = f128_sub(state, f128_sub(state, f128_sub(state, x, f128_mul(state, y, DP1)), f128_mul(state, y, DP2)), f128_mul(state, y, DP3));

zz = f128_mul(state, z, z);

// if( zz > 1.0e-20L )
if (f128_lt(state, max_quad, zz))
{
	y = f128_add(state, z, f128_mul(state, z, f128_div(state, f128_mul(state, zz, cephes_f128_polevll( zz, P, 5 )), cephes_f128_p1evll(zz, Q, 6))));
}
else
{
	y = z;
}
	
if( j & 2 )
	{
	if( cotflg )
    y = f128_complement_sign(y);
	else
		y = f128_div(state, neg_one, y);
	}
else
	{
	if( cotflg )
		y = f128_div(state, one, y);
	}

if( sign < 0 )
  y = f128_complement_sign(y);

return( y );
}


================================================
FILE: External/code-format-helper/code-format-helper.py
================================================
#!/usr/bin/env python3
#
# ====- code-format-helper, runs code formatters from the ci or in a hook --*- python -*--==#
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ==--------------------------------------------------------------------------------------==#

import argparse
import os
import subprocess
import sys
from typing import List, Optional

"""
This script is run by GitHub actions to ensure that the code in PR's conform to
the coding style of LLVM. It can also be installed as a pre-commit git hook to
check the coding style before submitting it. The canonical source of this script
is in the LLVM source tree under llvm/utils/git.

For C/C++ code it uses clang-format.

You can learn more about the LLVM coding style on llvm.org:
https://llvm.org/docs/CodingStandards.html

You can install this script as a git hook by symlinking it to the .git/hooks
directory:

ln -s $(pwd)/llvm/utils/git/code-format-helper.py .git/hooks/pre-commit

You can control the exact path to clang-format with the following
environment variable: $CLANG_FORMAT_PATH.
"""


class FormatArgs:
    start_rev: str = None
    end_rev: str = None
    repo: str = None
    changed_files: List[str] = []
    token: str = None
    verbose: bool = True
    issue_number: int = 0
    write_comment_to_file: str = None

    def __init__(self, args: argparse.Namespace = None) -> None:
        if not args is None:
            self.start_rev = args.start_rev
            self.end_rev = args.end_rev
            self.repo = args.repo
            self.token = args.token
            self.changed_files = args.changed_files
            self.issue_number = args.issue_number
            self.write_comment_to_file = args.write_comment_to_file


class FormatHelper:
    COMMENT_TAG = "<!--CODE FORMAT COMMENT: {fmt}-->"
    name: str
    friendly_name: str
    comment: dict = None

    @property
    def comment_tag(self) -> str:
        return self.COMMENT_TAG.replace("fmt", self.name)

    @property
    def instructions(self) -> str:
        raise NotImplementedError()

    def has_tool(self) -> bool:
        raise NotImplementedError()

    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
        raise NotImplementedError()

    def pr_comment_text_for_diff(self, diff: str) -> str:
        return f"""
:warning: {self.friendly_name}, {self.name} found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
{self.instructions}
``````````

</details>

<details>
<summary>
View the diff from {self.name} here.
</summary>

``````````diff
{diff}
``````````

</details>
"""

    # TODO: any type should be replaced with the correct github type, but it requires refactoring to
    # not require the github module to be installed everywhere.
    def find_comment(self, pr: any) -> any:
        for comment in pr.as_issue().get_comments():
            if self.comment_tag in comment.body:
                return comment
        return None

    def update_pr(self, comment_text: str, args: FormatArgs, create_new: bool) -> None:
        import github
        from github import IssueComment, PullRequest

        repo = github.Github(args.token).get_repo(args.repo)
        pr = repo.get_issue(args.issue_number).as_pull_request()

        comment_text = self.comment_tag + "\n\n" + comment_text

        existing_comment = self.find_comment(pr)

        if args.write_comment_to_file:
            if create_new or existing_comment:
                self.comment = {"body": comment_text}
            if existing_comment:
                self.comment["id"] = existing_comment.id
            return

        if existing_comment:
            existing_comment.edit(comment_text)
        elif create_new:
            pr.as_issue().create_comment(comment_text)

    def run(self, changed_files: List[str], args: FormatArgs) -> bool:
        changed_files = [arg for arg in changed_files if "third-party" not in arg]
        diff = self.format_run(changed_files, args)
        should_update_gh = args.token is not None and args.repo is not None

        if diff is None:
            if should_update_gh:
                comment_text = (
                    ":white_check_mark: With the latest revision "
                    f"this PR passed the {self.friendly_name}."
                )
                self.update_pr(comment_text, args, create_new=False)
            return True
        elif len(diff) > 0:
            if should_update_gh:
                comment_text = self.pr_comment_text_for_diff(diff)
                self.update_pr(comment_text, args, create_new=True)
            else:
                print(
                    f"Warning: {self.friendly_name}, {self.name} detected "
                    "some issues with your code formatting..."
                )
            return False
        else:
            # The formatter failed but didn't output a diff (e.g. some sort of
            # infrastructure failure).
            comment_text = (
                f":warning: The {self.friendly_name} failed without printing "
                "a diff. Check the logs for stderr output. :warning:"
            )
            self.update_pr(comment_text, args, create_new=False)
            return False


class ClangFormatHelper(FormatHelper):
    name = "git-clang-format"
    friendly_name = "C/C++ code formatter"


    @property
    def instructions(self) -> str:
        return " ".join(self.cf_cmd)

    def should_include_extensionless_file(self, path: str) -> bool:
        return path.startswith("libcxx/include")

    def filter_changed_files(self, changed_files: List[str]) -> List[str]:
        filtered_files = []
        for path in changed_files:
            _, ext = os.path.splitext(path)
            if ext in (".cpp", ".c", ".h", ".hpp", ".hxx", ".cxx", ".inc", ".cppm"):
                filtered_files.append(path)
            elif ext == "" and self.should_include_extensionless_file(path):
                filtered_files.append(path)
        return filtered_files

    @property
    def clang_fmt_path(self) -> str:
        if "CLANG_FORMAT_PATH" in os.environ:
            return os.environ["CLANG_FORMAT_PATH"]
        return "git-clang-format-19"

    def has_tool(self) -> bool:
        cmd = [self.clang_fmt_path, "-h"]
        proc = None
        try:
            proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        except:
            return False
        return proc.returncode == 0

    def format_run(self, changed_files: List[str], args: FormatArgs) -> Optional[str]:
        cpp_files = self.filter_changed_files(changed_files)
        if not cpp_files:
            return None

        cf_cmd = [
            self.clang_fmt_path,
            "--binary=clang-format-19",
            "--diff",
        ]

        if args.start_rev and args.end_rev:
            cf_cmd.append(args.start_rev)
            cf_cmd.append(args.end_rev)

        cf_cmd.append("--")
        cf_cmd += cpp_files

        if args.verbose:
            print(f"Running: {' '.join(cf_cmd)}")
        self.cf_cmd = cf_cmd
        proc = subprocess.run(cf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        sys.stdout.write(proc.stderr.decode("utf-8"))

        if proc.returncode != 0:
            # formatting needed, or the command otherwise failed
            if args.verbose:
                print(f"error: {self.name} exited with code {proc.returncode}")
                # Print the diff in the log so that it is viewable there
                print(proc.stdout.decode("utf-8"))
            return proc.stdout.decode("utf-8")
        else:
            return None

ALL_FORMATTERS = [ClangFormatHelper()]

def hook_main():
    # fill out args
    args = FormatArgs()
    args.verbose = False

    # find the changed files
    cmd = ["git", "diff", "--cached", "--name-only", "--diff-filter=d"]
    proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    output = proc.stdout.decode("utf-8")
    for line in output.splitlines():
        args.changed_files.append(line)

    failed_fmts = []
    for fmt in ALL_FORMATTERS:
        if fmt.has_tool():
            if not fmt.run(args.changed_files, args):
                failed_fmts.append(fmt.name)
            if fmt.comment:
                comments.append(fmt.comment)
        else:
            print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower())

    if len(failed_fmts) > 0:
        sys.exit(1)

    sys.exit(0)


if __name__ == "__main__":
    script_path = os.path.abspath(__file__)
    if ".git/hooks" in script_path:
        hook_main()
        sys.exit(0)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--token", type=str, required=False, help="GitHub authentication token"
    )
    parser.add_argument(
        "--repo",
        type=str,
        default=os.getenv("GITHUB_REPOSITORY", "llvm/llvm-project"),
        help="The GitHub repository that we are working with in the form of <owner>/<repo> (e.g. llvm/llvm-project)",
    )
    parser.add_argument("--issue-number", type=int, required=True)
    parser.add_argument(
        "--start-rev",
        type=str,
        required=True,
        help="Compute changes from this revision.",
    )
    parser.add_argument(
        "--end-rev", type=str, required=True, help="Compute changes to this revision"
    )
    parser.add_argument(
        "--changed-files",
        type=str,
        help="Comma separated list of files that has been changed",
    )
    parser.add_argument(
        "--write-comment-to-file",
        type=str,
        help="Don't post comments on the PR, instead write the comments and metadata a file",
    )

    args = FormatArgs(parser.parse_args())

    changed_files = []
    if args.changed_files:
        changed_files = args.changed_files.split(",")

    failed_formatters = []
    comments = []
    for fmt in ALL_FORMATTERS:
        if not fmt.run(changed_files, args):
            failed_formatters.append(fmt.name)
        if fmt.comment:
            comments.append(fmt.comment)

    if len(comments):
        with open(args.write_comment_to_file, "w") as f:
            import json

            json.dump(comments, f)

    if len(failed_formatters) > 0:
        print(f"error: some formatters failed: {' '.join(failed_formatters)}")
        sys.exit(1)


================================================
FILE: External/code-format-helper/requirements_formatting.txt
================================================
#
# This file is autogenerated by pip-compile with Python 3.13
# by the following command:
#
#    pip-compile --generate-hashes --output-file=requirements_formatting.txt --strip-extras requirements_formatting.txt.in
#
black==26.3.1 \
    --hash=sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c \
    --hash=sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7 \
    --hash=sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff \
    --hash=sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b \
    --hash=sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07 \
    --hash=sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78 \
    --hash=sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f \
    --hash=sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5 \
    --hash=sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b \
    --hash=sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e \
    --hash=sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a \
    --hash=sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac \
    --hash=sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a \
    --hash=sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54 \
    --hash=sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2 \
    --hash=sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f \
    --hash=sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1 \
    --hash=sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5 \
    --hash=sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2 \
    --hash=sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f \
    --hash=sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1 \
    --hash=sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c \
    --hash=sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839 \
    --hash=sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983 \
    --hash=sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb \
    --hash=sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56 \
    --hash=sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568
    # via
    #   -r requirements_formatting.txt.in
    #   darker
certifi==2025.7.14 \
    --hash=sha256:6b31f564a415d79ee77df69d757bb49a5bb53bd9f756cbbe24394ffd6fc1f4b2 \
    --hash=sha256:8ea99dbdfaaf2ba2f9bac77b9249ef62ec5218e7c2b2e903378ed5fccf765995
    # via
    #   -r requirements_formatting.txt.in
    #   requests
cffi==2.0.0 \
    --hash=sha256:00bdf7acc5f795150faa6957054fbbca2439db2f775ce831222b66f192f03beb \
    --hash=sha256:07b271772c100085dd28b74fa0cd81c8fb1a3ba18b21e03d7c27f3436a10606b \
    --hash=sha256:087067fa8953339c723661eda6b54bc98c5625757ea62e95eb4898ad5e776e9f \
    --hash=sha256:0a1527a803f0a659de1af2e1fd700213caba79377e27e4693648c2923da066f9 \
    --hash=sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44 \
    --hash=sha256:0f6084a0ea23d05d20c3edcda20c3d006f9b6f3fefeac38f59262e10cef47ee2 \
    --hash=sha256:12873ca6cb9b0f0d3a0da705d6086fe911591737a59f28b7936bdfed27c0d47c \
    --hash=sha256:19f705ada2530c1167abacb171925dd886168931e0a7b78f5bffcae5c6b5be75 \
    --hash=sha256:1cd13c99ce269b3ed80b417dcd591415d3372bcac067009b6e0f59c7d4015e65 \
    --hash=sha256:1e3a615586f05fc4065a8b22b8152f0c1b00cdbc60596d187c2a74f9e3036e4e \
    --hash=sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a \
    --hash=sha256:1fc9ea04857caf665289b7a75923f2c6ed559b8298a1b8c49e59f7dd95c8481e \
    --hash=sha256:203a48d1fb583fc7d78a4c6655692963b860a417c0528492a6bc21f1aaefab25 \
    --hash=sha256:2081580ebb843f759b9f617314a24ed5738c51d2aee65d31e02f6f7a2b97707a \
    --hash=sha256:21d1152871b019407d8ac3985f6775c079416c282e431a4da6afe7aefd2bccbe \
    --hash=sha256:24b6f81f1983e6df8db3adc38562c83f7d4a0c36162885ec7f7b77c7dcbec97b \
    --hash=sha256:256f80b80ca3853f90c21b23ee78cd008713787b1b1e93eae9f3d6a7134abd91 \
    --hash=sha256:28a3a209b96630bca57cce802da70c266eb08c6e97e5afd61a75611ee6c64592 \
    --hash=sha256:2c8f814d84194c9ea681642fd164267891702542f028a15fc97d4674b6206187 \
    --hash=sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c \
    --hash=sha256:38100abb9d1b1435bc4cc340bb4489635dc2f0da7456590877030c9b3d40b0c1 \
    --hash=sha256:3925dd22fa2b7699ed2617149842d2e6adde22b262fcbfada50e3d195e4b3a94 \
    --hash=sha256:3e17ed538242334bf70832644a32a7aae3d83b57567f9fd60a26257e992b79ba \
    --hash=sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb \
    --hash=sha256:3f4d46d8b35698056ec29bca21546e1551a205058ae1a181d871e278b0b28165 \
    --hash=sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529 \
    --hash=sha256:45d5e886156860dc35862657e1494b9bae8dfa63bf56796f2fb56e1679fc0bca \
    --hash=sha256:4647afc2f90d1ddd33441e5b0e85b16b12ddec4fca55f0d9671fef036ecca27c \
    --hash=sha256:4671d9dd5ec934cb9a73e7ee9676f9362aba54f7f34910956b84d727b0d73fb6 \
    --hash=sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c \
    --hash=sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0 \
    --hash=sha256:5fed36fccc0612a53f1d4d9a816b50a36702c28a2aa880cb8a122b3466638743 \
    --hash=sha256:61d028e90346df14fedc3d1e5441df818d095f3b87d286825dfcbd6459b7ef63 \
    --hash=sha256:66f011380d0e49ed280c789fbd08ff0d40968ee7b665575489afa95c98196ab5 \
    --hash=sha256:6824f87845e3396029f3820c206e459ccc91760e8fa24422f8b0c3d1731cbec5 \
    --hash=sha256:6c6c373cfc5c83a975506110d17457138c8c63016b563cc9ed6e056a82f13ce4 \
    --hash=sha256:6d02d6655b0e54f54c4ef0b94eb6be0607b70853c45ce98bd278dc7de718be5d \
    --hash=sha256:6d50360be4546678fc1b79ffe7a66265e28667840010348dd69a314145807a1b \
    --hash=sha256:730cacb21e1bdff3ce90babf007d0a0917cc3e6492f336c2f0134101e0944f93 \
    --hash=sha256:737fe7d37e1a1bffe70bd5754ea763a62a066dc5913ca57e957824b72a85e205 \
    --hash=sha256:74a03b9698e198d47562765773b4a8309919089150a0bb17d829ad7b44b60d27 \
    --hash=sha256:7553fb2090d71822f02c629afe6042c299edf91ba1bf94951165613553984512 \
    --hash=sha256:7a66c7204d8869299919db4d5069a82f1561581af12b11b3c9f48c584eb8743d \
    --hash=sha256:7cc09976e8b56f8cebd752f7113ad07752461f48a58cbba644139015ac24954c \
    --hash=sha256:81afed14892743bbe14dacb9e36d9e0e504cd204e0b165062c488942b9718037 \
    --hash=sha256:8941aaadaf67246224cee8c3803777eed332a19d909b47e29c9842ef1e79ac26 \
    --hash=sha256:89472c9762729b5ae1ad974b777416bfda4ac5642423fa93bd57a09204712322 \
    --hash=sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb \
    --hash=sha256:8eca2a813c1cb7ad4fb74d368c2ffbbb4789d377ee5bb8df98373c2cc0dee76c \
    --hash=sha256:92b68146a71df78564e4ef48af17551a5ddd142e5190cdf2c5624d0c3ff5b2e8 \
    --hash=sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4 \
    --hash=sha256:94698a9c5f91f9d138526b48fe26a199609544591f859c870d477351dc7b2414 \
    --hash=sha256:9a67fc9e8eb39039280526379fb3a70023d77caec1852002b4da7e8b270c4dd9 \
    --hash=sha256:9de40a7b0323d889cf8d23d1ef214f565ab154443c42737dfe52ff82cf857664 \
    --hash=sha256:a05d0c237b3349096d3981b727493e22147f934b20f6f125a3eba8f994bec4a9 \
    --hash=sha256:afb8db5439b81cf9c9d0c80404b60c3cc9c3add93e114dcae767f1477cb53775 \
    --hash=sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739 \
    --hash=sha256:b1e74d11748e7e98e2f426ab176d4ed720a64412b6a15054378afdb71e0f37dc \
    --hash=sha256:b21e08af67b8a103c71a250401c78d5e0893beff75e28c53c98f4de42f774062 \
    --hash=sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe \
    --hash=sha256:b882b3df248017dba09d6b16defe9b5c407fe32fc7c65a9c69798e6175601be9 \
    --hash=sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92 \
    --hash=sha256:c649e3a33450ec82378822b3dad03cc228b8f5963c0c12fc3b1e0ab940f768a5 \
    --hash=sha256:c654de545946e0db659b3400168c9ad31b5d29593291482c43e3564effbcee13 \
    --hash=sha256:c6638687455baf640e37344fe26d37c404db8b80d037c3d29f58fe8d1c3b194d \
    --hash=sha256:c8d3b5532fc71b7a77c09192b4a5a200ea992702734a2e9279a37f2478236f26 \
    --hash=sha256:cb527a79772e5ef98fb1d700678fe031e353e765d1ca2d409c92263c6d43e09f \
    --hash=sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495 \
    --hash=sha256:d48a880098c96020b02d5a1f7d9251308510ce8858940e6fa99ece33f610838b \
    --hash=sha256:d68b6cef7827e8641e8ef16f4494edda8b36104d79773a334beaa1e3521430f6 \
    --hash=sha256:d9b29c1f0ae438d5ee9acb31cadee00a58c46cc9c0b2f9038c6b0b3470877a8c \
    --hash=sha256:d9b97165e8aed9272a6bb17c01e3cc5871a594a446ebedc996e2397a1c1ea8ef \
    --hash=sha256:da68248800ad6320861f129cd9c1bf96ca849a2771a59e0344e88681905916f5 \
    --hash=sha256:da902562c3e9c550df360bfa53c035b2f241fed6d9aef119048073680ace4a18 \
    --hash=sha256:dbd5c7a25a7cb98f5ca55d258b103a2054f859a46ae11aaf23134f9cc0d356ad \
    --hash=sha256:dd4f05f54a52fb558f1ba9f528228066954fee3ebe629fc1660d874d040ae5a3 \
    --hash=sha256:de8dad4425a6ca6e4e5e297b27b5c824ecc7581910bf9aee86cb6835e6812aa7 \
    --hash=sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5 \
    --hash=sha256:e6e73b9e02893c764e7e8d5bb5ce277f1a009cd5243f8228f75f842bf937c534 \
    --hash=sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49 \
    --hash=sha256:f93fd8e5c8c0a4aa1f424d6173f14a892044054871c771f8566e4008eaa359d2 \
    --hash=sha256:fc33c5141b55ed366cfaad382df24fe7dcbc686de5be719b207bb248e3053dc5 \
    --hash=sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453 \
    --hash=sha256:fe562eb1a64e67dd297ccc4f5addea2501664954f2692b69a76449ec7913ecbf
    # via
    #   cryptography
    #   pynacl
charset-normalizer==3.2.0 \
    --hash=sha256:04e57ab9fbf9607b77f7d057974694b4f6b142da9ed4a199859d9d4d5c63fe96 \
    --hash=sha256:09393e1b2a9461950b1c9a45d5fd251dc7c6f228acab64da1c9c0165d9c7765c \
    --hash=sha256:0b87549028f680ca955556e3bd57013ab47474c3124dc069faa0b6545b6c9710 \
    --hash=sha256:1000fba1057b92a65daec275aec30586c3de2401ccdcd41f8a5c1e2c87078706 \
    --hash=sha256:1249cbbf3d3b04902ff081ffbb33ce3377fa6e4c7356f759f3cd076cc138d020 \
    --hash=sha256:1920d4ff15ce893210c1f0c0e9d19bfbecb7983c76b33f046c13a8ffbd570252 \
    --hash=sha256:193cbc708ea3aca45e7221ae58f0fd63f933753a9bfb498a3b474878f12caaad \
    --hash=sha256:1a100c6d595a7f316f1b6f01d20815d916e75ff98c27a01ae817439ea7726329 \
    --hash=sha256:1f30b48dd7fa1474554b0b0f3fdfdd4c13b5c737a3c6284d3cdc424ec0ffff3a \
    --hash=sha256:203f0c8871d5a7987be20c72442488a0b8cfd0f43b7973771640fc593f56321f \
    --hash=sha256:246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6 \
    --hash=sha256:2dee8e57f052ef5353cf608e0b4c871aee320dd1b87d351c28764fc0ca55f9f4 \
    --hash=sha256:2efb1bd13885392adfda4614c33d3b68dee4921fd0ac1d3988f8cbb7d589e72a \
    --hash=sha256:2f4ac36d8e2b4cc1aa71df3dd84ff8efbe3bfb97ac41242fbcfc053c67434f46 \
    --hash=sha256:3170c9399da12c9dc66366e9d14da8bf7147e1e9d9ea566067bbce7bb74bd9c2 \
    --hash=sha256:3b1613dd5aee995ec6d4c69f00378bbd07614702a315a2cf6c1d21461fe17c23 \
    --hash=sha256:3bb3d25a8e6c0aedd251753a79ae98a093c7e7b471faa3aa9a93a81431987ace \
    --hash=sha256:3bb7fda7260735efe66d5107fb7e6af6a7c04c7fce9b2514e04b7a74b06bf5dd \
    --hash=sha256:41b25eaa7d15909cf3ac4c96088c1f266a9a93ec44f87f1d13d4a0e86c81b982 \
    --hash=sha256:45de3f87179c1823e6d9e32156fb14c1927fcc9aba21433f088fdfb555b77c10 \
    --hash=sha256:46fb8c61d794b78ec7134a715a3e564aafc8f6b5e338417cb19fe9f57a5a9bf2 \
    --hash=sha256:48021783bdf96e3d6de03a6e39a1171ed5bd7e8bb93fc84cc649d11490f87cea \
    --hash=sha256:4957669ef390f0e6719db3613ab3a7631e68424604a7b448f079bee145da6e09 \
    --hash=sha256:5e86d77b090dbddbe78867a0275cb4df08ea195e660f1f7f13435a4649e954e5 \
    --hash=sha256:6339d047dab2780cc6220f46306628e04d9750f02f983ddb37439ca47ced7149 \
    --hash=sha256:681eb3d7e02e3c3655d1b16059fbfb605ac464c834a0c629048a30fad2b27489 \
    --hash=sha256:6c409c0deba34f147f77efaa67b8e4bb83d2f11c8806405f76397ae5b8c0d1c9 \
    --hash=sha256:7095f6fbfaa55defb6b733cfeb14efaae7a29f0b59d8cf213be4e7ca0b857b80 \
    --hash=sha256:70c610f6cbe4b9fce272c407dd9d07e33e6bf7b4aa1b7ffb6f6ded8e634e3592 \
    --hash=sha256:72814c01533f51d68702802d74f77ea026b5ec52793c791e2da806a3844a46c3 \
    --hash=sha256:7a4826ad2bd6b07ca615c74ab91f32f6c96d08f6fcc3902ceeedaec8cdc3bcd6 \
    --hash=sha256:7c70087bfee18a42b4040bb9ec1ca15a08242cf5867c58726530bdf3945672ed \
    --hash=sha256:855eafa5d5a2034b4621c74925d89c5efef61418570e5ef9b37717d9c796419c \
    --hash=sha256:8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200 \
    --hash=sha256:89f1b185a01fe560bc8ae5f619e924407efca2191b56ce749ec84982fc59a32a \
    --hash=sha256:8b2c760cfc7042b27ebdb4a43a4453bd829a5742503599144d54a032c5dc7e9e \
    --hash=sha256:8c2f5e83493748286002f9369f3e6607c565a6a90425a3a1fef5ae32a36d749d \
    --hash=sha256:8e098148dd37b4ce3baca71fb394c81dc5d9c7728c95df695d2dca218edf40e6 \
    --hash=sha256:94aea8eff76ee6d1cdacb07dd2123a68283cb5569e0250feab1240058f53b623 \
    --hash=sha256:95eb302ff792e12aba9a8b8f8474ab229a83c103d74a750ec0bd1c1eea32e669 \
    --hash=sha256:9bd9b3b31adcb054116447ea22caa61a285d92e94d710aa5ec97992ff5eb7cf3 \
    --hash=sha256:9e608aafdb55eb9f255034709e20d5a83b6d60c054df0802fa9c9883d0a937aa \
    --hash=sha256:a103b3a7069b62f5d4890ae1b8f0597618f628b286b03d4bc9195230b154bfa9 \
    --hash=sha256:a386ebe437176aab38c041de1260cd3ea459c6ce5263594399880bbc398225b2 \
    --hash=sha256:a38856a971c602f98472050165cea2cdc97709240373041b69030be15047691f \
    --hash=sha256:a401b4598e5d3f4a9a811f3daf42ee2291790c7f9d74b18d75d6e21dda98a1a1 \
    --hash=sha256:a7647ebdfb9682b7bb97e2a5e7cb6ae735b1c25008a70b906aecca294ee96cf4 \
    --hash=sha256:aaf63899c94de41fe3cf934601b0f7ccb6b428c6e4eeb80da72c58eab077b19a \
    --hash=sha256:b0dac0ff919ba34d4df1b6131f59ce95b08b9065233446be7e459f95554c0dc8 \
    --hash=sha256:baacc6aee0b2ef6f3d308e197b5d7a81c0e70b06beae1f1fcacffdbd124fe0e3 \
    --hash=sha256:bf420121d4c8dce6b889f0e8e4ec0ca34b7f40186203f06a946fa0276ba54029 \
    --hash=sha256:c04a46716adde8d927adb9457bbe39cf473e1e2c2f5d0a16ceb837e5d841ad4f \
    --hash=sha256:c0b21078a4b56965e2b12f247467b234734491897e99c1d51cee628da9786959 \
    --hash=sha256:c1c76a1743432b4b60ab3358c937a3fe1341c828ae6194108a94c69028247f22 \
    --hash=sha256:c4983bf937209c57240cff65906b18bb35e64ae872da6a0db937d7b4af845dd7 \
    --hash=sha256:c4fb39a81950ec280984b3a44f5bd12819953dc5fa3a7e6fa7a80db5ee853952 \
    --hash=sha256:c57921cda3a80d0f2b8aec7e25c8aa14479ea92b5b51b6876d975d925a2ea346 \
    --hash=sha256:c8063cf17b19661471ecbdb3df1c84f24ad2e389e326ccaf89e3fb2484d8dd7e \
    --hash=sha256:ccd16eb18a849fd8dcb23e23380e2f0a354e8daa0c984b8a732d9cfaba3a776d \
    --hash=sha256:cd6dbe0238f7743d0efe563ab46294f54f9bc8f4b9bcf57c3c666cc5bc9d1299 \
    --hash=sha256:d62e51710986674142526ab9f78663ca2b0726066ae26b78b22e0f5e571238dd \
    --hash=sha256:db901e2ac34c931d73054d9797383d0f8009991e723dab15109740a63e7f902a \
    --hash=sha256:e03b8895a6990c9ab2cdcd0f2fe44088ca1c65ae592b8f795c3294af00a461c3 \
    --hash=sha256:e1c8a2f4c69e08e89632defbfabec2feb8a8d99edc9f89ce33c4b9e36ab63037 \
    --hash=sha256:e4b749b9cc6ee664a3300bb3a273c1ca8068c46be705b6c31cf5d276f8628a94 \
    --hash=sha256:e6a5bf2cba5ae1bb80b154ed68a3cfa2fa00fde979a7f50d6598d3e17d9ac20c \
    --hash=sha256:e857a2232ba53ae940d3456f7533ce6ca98b81917d47adc3c7fd55dad8fab858 \
    --hash=sha256:ee4006268ed33370957f55bf2e6f4d263eaf4dc3cfc473d1d90baff6ed36ce4a \
    --hash=sha256:eef9df1eefada2c09a5e7a40991b9fc6ac6ef20b1372abd48d2794a316dc0449 \
    --hash=sha256:f058f6963fd82eb143c692cecdc89e075fa0828db2e5b291070485390b2f1c9c \
    --hash=sha256:f25c229a6ba38a35ae6e25ca1264621cc25d4d38dca2942a7fce0b67a4efe918 \
    --hash=sha256:f2a1d0fd4242bd8643ce6f98927cf9c04540af6efa92323e9d3124f57727bfc1 \
    --hash=sha256:f7560358a6811e52e9c4d142d497f1a6e10103d3a6881f18d04dbce3729c0e2c \
    --hash=sha256:f779d3ad205f108d14e99bb3859aa7dd8e9c68874617c72354d7ecaec2a054ac \
    --hash=sha256:f87f746ee241d30d6ed93969de31e5ffd09a2961a051e60ae6bddde9ec3583aa
    # via requests
click==8.1.7 \
    --hash=sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28 \
    --hash=sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de
    # via black
cryptography==46.0.5 \
    --hash=sha256:02f547fce831f5096c9a567fd41bc12ca8f11df260959ecc7c3202555cc47a72 \
    --hash=sha256:039917b0dc418bb9f6edce8a906572d69e74bd330b0b3fea4f79dab7f8ddd235 \
    --hash=sha256:1abfdb89b41c3be0365328a410baa9df3ff8a9110fb75e7b52e66803ddabc9a9 \
    --hash=sha256:2ae6971afd6246710480e3f15824ed3029a60fc16991db250034efd0b9fb4356 \
    --hash=sha256:2b7a67c9cd56372f3249b39699f2ad479f6991e62ea15800973b956f4b73e257 \
    --hash=sha256:351695ada9ea9618b3500b490ad54c739860883df6c1f555e088eaf25b1bbaad \
    --hash=sha256:38946c54b16c885c72c4f59846be9743d699eee2b69b6988e0a00a01f46a61a4 \
    --hash=sha256:3b4995dc971c9fb83c25aa44cf45f02ba86f71ee600d81091c2f0cbae116b06c \
    --hash=sha256:3ce58ba46e1bc2aac4f7d9290223cead56743fa6ab94a5d53292ffaac6a91614 \
    --hash=sha256:3ee190460e2fbe447175cda91b88b84ae8322a104fc27766ad09428754a618ed \
    --hash=sha256:4108d4c09fbbf2789d0c926eb4152ae1760d5a2d97612b92d508d96c861e4d31 \
    --hash=sha256:420d0e909050490d04359e7fdb5ed7e667ca5c3c402b809ae2563d7e66a92229 \
    --hash=sha256:47fb8a66058b80e509c47118ef8a75d14c455e81ac369050f20ba0d23e77fee0 \
    --hash=sha256:4c3341037c136030cb46e4b1e17b7418ea4cbd9dd207e4a6f3b2b24e0d4ac731 \
    --hash=sha256:4d7e3d356b8cd4ea5aff04f129d5f66ebdc7b6f8eae802b93739ed520c47c79b \
    --hash=sha256:4d8ae8659ab18c65ced284993c2265910f6c9e650189d4e3f68445ef82a810e4 \
    --hash=sha256:4e817a8920bfbcff8940ecfd60f23d01836408242b30f1a708d93198393a80b4 \
    --hash=sha256:50bfb6925eff619c9c023b967d5b77a54e04256c4281b0e21336a130cd7fc263 \
    --hash=sha256:556e106ee01aa13484ce9b0239bca667be5004efb0aabbed28d353df86445595 \
    --hash=sha256:582f5fcd2afa31622f317f80426a027f30dc792e9c80ffee87b993200ea115f1 \
    --hash=sha256:5be7bf2fb40769e05739dd0046e7b26f9d4670badc7b032d6ce4db64dddc0678 \
    --hash=sha256:60ee7e19e95104d4c03871d7d7dfb3d22ef8a9b9c6778c94e1c8fcc8365afd48 \
    --hash=sha256:61aa400dce22cb001a98014f647dc21cda08f7915ceb95df0c9eaf84b4b6af76 \
    --hash=sha256:68f68d13f2e1cb95163fa3b4db4bf9a159a418f5f6e7242564fc75fcae667fd0 \
    --hash=sha256:7d1f30a86d2757199cb2d56e48cce14deddf1f9c95f1ef1b64ee91ea43fe2e18 \
    --hash=sha256:7d731d4b107030987fd61a7f8ab512b25b53cef8f233a97379ede116f30eb67d \
    --hash=sha256:803812e111e75d1aa73690d2facc295eaefd4439be1023fefc4995eaea2af90d \
    --hash=sha256:80a8d7bfdf38f87ca30a5391c0c9ce4ed2926918e017c29ddf643d0ed2778ea1 \
    --hash=sha256:8293f3dea7fc929ef7240796ba231413afa7b68ce38fd21da2995549f5961981 \
    --hash=sha256:8456928655f856c6e1533ff59d5be76578a7157224dbd9ce6872f25055ab9ab7 \
    --hash=sha256:890bcb4abd5a2d3f852196437129eb3667d62630333aacc13dfd470fad3aaa82 \
    --hash=sha256:94a76daa32eb78d61339aff7952ea819b1734b46f73646a07decb40e5b3448e2 \
    --hash=sha256:9f16fbdf4da055efb21c22d81b89f155f02ba420558db21288b3d0035bafd5f4 \
    --hash=sha256:a3d1fae9863299076f05cb8a778c467578262fae09f9dc0ee9b12eb4268ce663 \
    --hash=sha256:a3d507bb6a513ca96ba84443226af944b0f7f47dcc9a399d110cd6146481d24c \
    --hash=sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d \
    --hash=sha256:ba2a27ff02f48193fc4daeadf8ad2590516fa3d0adeeb34336b96f7fa64c1e3a \
    --hash=sha256:bc84e875994c3b445871ea7181d424588171efec3e185dced958dad9e001950a \
    --hash=sha256:bfd56bb4b37ed4f330b82402f6f435845a5f5648edf1ad497da51a8452d5d62d \
    --hash=sha256:c18ff11e86df2e28854939acde2d003f7984f721eba450b56a200ad90eeb0e6b \
    --hash=sha256:c3bcce8521d785d510b2aad26ae2c966092b7daa8f45dd8f44734a104dc0bc1a \
    --hash=sha256:c4143987a42a2397f2fc3b4d7e3a7d313fbe684f67ff443999e803dd75a76826 \
    --hash=sha256:c69fd885df7d089548a42d5ec05be26050ebcd2283d89b3d30676eb32ff87dee \
    --hash=sha256:ced80795227d70549a411a4ab66e8ce307899fad2220ce5ab2f296e687eacde9 \
    --hash=sha256:d66e421495fdb797610a08f43b05269e0a5ea7f5e652a89bfd5a7d3c1dee3648 \
    --hash=sha256:d861ee9e76ace6cf36a6a89b959ec08e7bc2493ee39d07ffe5acb23ef46d27da \
    --hash=sha256:e9251e3be159d1020c4030bd2e5f84d6a43fe54b6c19c12f51cde9542a2817b2 \
    --hash=sha256:f145bba11b878005c496e93e257c1e88f154d278d2638e6450d17e0f31e558d2 \
    --hash=sha256:fe346b143ff9685e40192a4960938545c699054ba11d4f9029f94751e3f71d87
    # via
    #   -r requirements_formatting.txt.in
    #   pyjwt
darker==2.1.1 \
    --hash=sha256:a6e6a682c0604e76fe9aec7650e96a944f517563c69b28fcc076db9d957d98ea \
    --hash=sha256:ead701414c45359fc0312bc285614d3285fc135476d43f3bc08d989ee19d9020
    # via -r requirements_formatting.txt.in
darkgraylib==1.2.1 \
    --hash=sha256:60c59de69842367ce0c78c32c451fa8e9d29500e681312d9864a7416bcdb7792 \
    --hash=sha256:a5dd6a2015a470d9047278cdd01a91ccb1d746675f8fd4562b3b5f6b8cbda930
    # via
    #   darker
    #   graylint
deprecated==1.2.14 \
    --hash=sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c \
    --hash=sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3
    # via pygithub
graylint==1.1.1 \
    --hash=sha256:0fd8e02972ca03d0ef2bf0adea76b5343efcd492d7afb5f658f3e3a724f55a36 \
    --hash=sha256:b7e0eab6c159684dbf5ef84e942c3340f6a6549b02a3d11b1a1763cc4f8f0593
    # via darker
idna==3.10 \
    --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \
    --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3
    # via
    #   -r requirements_formatting.txt.in
    #   requests
mypy-extensions==1.0.0 \
    --hash=sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d \
    --hash=sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782
    # via black
packaging==23.1 \
    --hash=sha256:994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61 \
    --hash=sha256:a392980d2b6cffa644431898be54b0045151319d1e7ec34f0cfed48767dd334f
    # via black
pathspec==1.0.4 \
    --hash=sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645 \
    --hash=sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723
    # via black
platformdirs==3.10.0 \
    --hash=sha256:b45696dab2d7cc691a3226759c0d3b00c47c8b6e293d96f6436f733303f77f6d \
    --hash=sha256:d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d
    # via black
pycparser==2.21 \
    --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \
    --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206
    # via cffi
pygithub==2.6.1 \
    --hash=sha256:6f2fa6d076ccae475f9fc392cc6cdbd54db985d4f69b8833a28397de75ed6ca3 \
    --hash=sha256:b5c035392991cca63959e9453286b41b54d83bf2de2daa7d7ff7e4312cebf3bf
    # via -r requirements_formatting.txt.in
pyjwt==2.12.1 \
    --hash=sha256:28ca37c070cad8ba8cd9790cd940535d40274d22f80ab87f3ac6a713e6e8454c \
    --hash=sha256:c74a7a2adf861c04d002db713dd85f84beb242228e671280bf709d765b03672b
    # via
    #   -r requirements_formatting.txt.in
    #   pygithub
pynacl==1.6.2 \
    --hash=sha256:018494d6d696ae03c7e656e5e74cdfd8ea1326962cc401bcf018f1ed8436811c \
    --hash=sha256:04316d1fc625d860b6c162fff704eb8426b1a8bcd3abacea11142cbd99a6b574 \
    --hash=sha256:22de65bb9010a725b0dac248f353bb072969c94fa8d6b1f34b87d7953cf7bbe4 \
    --hash=sha256:26bfcd00dcf2cf160f122186af731ae30ab120c18e8375684ec2670dccd28130 \
    --hash=sha256:2fef529ef3ee487ad8113d287a593fa26f48ee3620d92ecc6f1d09ea38e0709b \
    --hash=sha256:320ef68a41c87547c91a8b58903c9caa641ab01e8512ce291085b5fe2fcb7590 \
    --hash=sha256:3bffb6d0f6becacb6526f8f42adfb5efb26337056ee0831fb9a7044d1a964444 \
    --hash=sha256:44081faff368d6c5553ccf55322ef2819abb40e25afaec7e740f159f74813634 \
    --hash=sha256:46065496ab748469cdd999246d17e301b2c24ae2fdf739132e580a0e94c94a87 \
    --hash=sha256:5811c72b473b2f38f7e2a3dc4f8642e3a3e9b5e7317266e4ced1fba85cae41aa \
    --hash=sha256:622d7b07cc5c02c666795792931b50c91f3ce3c2649762efb1ef0d5684c81594 \
    --hash=sha256:62985f233210dee6548c223301b6c25440852e13d59a8b81490203c3227c5ba0 \
    --hash=sha256:68be3a09455743ff9505491220b64440ced8973fe930f270c8e07ccfa25b1f9e \
    --hash=sha256:834a43af110f743a754448463e8fd61259cd4ab5bbedcf70f9dabad1d28a394c \
    --hash=sha256:8845c0631c0be43abdd865511c41eab235e0be69c81dc66a50911594198679b0 \
    --hash=sha256:8a66d6fb6ae7661c58995f9c6435bda2b1e68b54b598a6a10247bfcdadac996c \
    --hash=sha256:8b097553b380236d51ed11356c953bf8ce36a29a3e596e934ecabe76c985a577 \
    --hash=sha256:a84bf1c20339d06dc0c85d9aea9637a24f718f375d861b2668b2f9f96fa51145 \
    --hash=sha256:a9f9932d8d2811ce1a8ffa79dcbdf3970e7355b5c8eb0c1a881a57e7f7d96e88 \
    --hash=sha256:bc4a36b28dd72fb4845e5d8f9760610588a96d5a51f01d84d8c6ff9849968c14 \
    --hash=sha256:c8a231e36ec2cab018c4ad4358c386e36eede0319a0c41fed24f840b1dac59f6 \
    --hash=sha256:c949ea47e4206af7c8f604b8278093b674f7c79ed0d4719cc836902bf4517465 \
    --hash=sha256:d071c6a9a4c94d79eb665db4ce5cedc537faf74f2355e4d502591d850d3913c0 \
    --hash=sha256:d29bfe37e20e015a7d8b23cfc8bd6aa7909c92a1b8f41ee416bbb3e79ef182b2 \
    --hash=sha256:fe9847ca47d287af41e82be1dd5e23023d3c31a951da134121ab02e42ac218c9
    # via
    #   -r requirements_formatting.txt.in
    #   pygithub
pytokens==0.4.1 \
    --hash=sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1 \
    --hash=sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009 \
    --hash=sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083 \
    --hash=sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1 \
    --hash=sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de \
    --hash=sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2 \
    --hash=sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a \
    --hash=sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1 \
    --hash=sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5 \
    --hash=sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a \
    --hash=sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3 \
    --hash=sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db \
    --hash=sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68 \
    --hash=sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037 \
    --hash=sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321 \
    --hash=sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc \
    --hash=sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7 \
    --hash=sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f \
    --hash=sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918 \
    --hash=sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9 \
    --hash=sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c \
    --hash=sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1 \
    --hash=sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1 \
    --hash=sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3 \
    --hash=sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b \
    --hash=sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb \
    --hash=sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1 \
    --hash=sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a \
    --hash=sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4 \
    --hash=sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa \
    --hash=sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78 \
    --hash=sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe \
    --hash=sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9 \
    --hash=sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d \
    --hash=sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975 \
    --hash=sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440 \
    --hash=sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16 \
    --hash=sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc \
    --hash=sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d \
    --hash=sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6 \
    --hash=sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6 \
    --hash=sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324
    # via black
requests==2.32.4 \
    --hash=sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c \
    --hash=sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422
    # via
    #   -r requirements_formatting.txt.in
    #   pygithub
toml==0.10.2 \
    --hash=sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b \
    --hash=sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f
    # via
    #   darker
    #   darkgraylib
typing-extensions==4.14.1 \
    --hash=sha256:38b39f4aeeab64884ce9f74c94263ef78f3c22467c8724005483154c26648d36 \
    --hash=sha256:d1e1e3b58374dc93031d6eda2420a48ea44a36c2b4766a4fdeb3710755731d76
    # via pygithub
urllib3==2.6.3 \
    --hash=sha256:1b62b6884944a57dbe321509ab94fd4d3b307075e0c2eae991ac71ee15ad38ed \
    --hash=sha256:bf272323e553dfb2e87d9bfd225ca7b0f467b919d7bbd355436d3fd37cb0acd4
    # via
    #   -r requirements_formatting.txt.in
    #   pygithub
    #   requests
wrapt==1.15.0 \
    --hash=sha256:02fce1852f755f44f95af51f69d22e45080102e9d00258053b79367d07af39c0 \
    --hash=sha256:077ff0d1f9d9e4ce6476c1a924a3332452c1406e59d90a2cf24aeb29eeac9420 \
    --hash=sha256:078e2a1a86544e644a68422f881c48b84fef6d18f8c7a957ffd3f2e0a74a0d4a \
    --hash=sha256:0970ddb69bba00670e58955f8019bec4a42d1785db3faa043c33d81de2bf843c \
    --hash=sha256:1286eb30261894e4c70d124d44b7fd07825340869945c79d05bda53a40caa079 \
    --hash=sha256:21f6d9a0d5b3a207cdf7acf8e58d7d13d463e639f0c7e01d82cdb671e6cb7923 \
    --hash=sha256:230ae493696a371f1dbffaad3dafbb742a4d27a0afd2b1aecebe52b740167e7f \
    --hash=sha256:26458da5653aa5b3d8dc8b24192f574a58984c749401f98fff994d41d3f08da1 \
    --hash=sha256:2cf56d0e237280baed46f0b5316661da892565ff58309d4d2ed7dba763d984b8 \
    --hash=sha256:2e51de54d4fb8fb50d6ee8327f9828306a959ae394d3e01a1ba8b2f937747d86 \
    --hash=sha256:2fbfbca668dd15b744418265a9607baa970c347eefd0db6a518aaf0cfbd153c0 \
    --hash=sha256:38adf7198f8f154502883242f9fe7333ab05a5b02de7d83aa2d88ea621f13364 \
    --hash=sha256:3a8564f283394634a7a7054b7983e47dbf39c07712d7b177b37e03f2467a024e \
    --hash=sha256:3abbe948c3cbde2689370a262a8d04e32ec2dd4f27103669a45c6929bcdbfe7c \
    --hash=sha256:3bbe623731d03b186b3d6b0d6f51865bf598587c38d6f7b0be2e27414f7f214e \
    --hash=sha256:40737a081d7497efea35ab9304b829b857f21558acfc7b3272f908d33b0d9d4c \
    --hash=sha256:41d07d029dd4157ae27beab04d22b8e261eddfc6ecd64ff7000b10dc8b3a5727 \
    --hash=sha256:46ed616d5fb42f98630ed70c3529541408166c22cdfd4540b88d5f21006b0eff \
    --hash=sha256:493d389a2b63c88ad56cdc35d0fa5752daac56ca755805b1b0c530f785767d5e \
    --hash=sha256:4ff0d20f2e670800d3ed2b220d40984162089a6e2c9646fdb09b85e6f9a8fc29 \
    --hash=sha256:54accd4b8bc202966bafafd16e69da9d5640ff92389d33d28555c5fd4f25ccb7 \
    --hash=sha256:56374914b132c702aa9aa9959c550004b8847148f95e1b824772d453ac204a72 \
    --hash=sha256:578383d740457fa790fdf85e6d346fda1416a40549fe8db08e5e9bd281c6a475 \
    --hash=sha256:58d7a75d731e8c63614222bcb21dd992b4ab01a399f1f09dd82af17bbfc2368a \
    --hash=sha256:5c5aa28df055697d7c37d2099a7bc09f559d5053c3349b1ad0c39000e611d317 \
    --hash=sha256:5fc8e02f5984a55d2c653f5fea93531e9836abbd84342c1d1e17abc4a15084c2 \
    --hash=sha256:63424c681923b9f3bfbc5e3205aafe790904053d42ddcc08542181a30a7a51bd \
    --hash=sha256:64b1df0f83706b4ef4cfb4fb0e4c2669100fd7ecacfb59e091fad300d4e04640 \
    --hash=sha256:74934ebd71950e3db69960a7da29204f89624dde411afbfb3b4858c1409b1e98 \
    --hash=sha256:75669d77bb2c071333417617a235324a1618dba66f82a750362eccbe5b61d248 \
    --hash=sha256:75760a47c06b5974aa5e01949bf7e66d2af4d08cb8c1d6516af5e39595397f5e \
    --hash=sha256:76407ab327158c510f44ded207e2f76b657303e17cb7a572ffe2f5a8a48aa04d \
    --hash=sha256:76e9c727a874b4856d11a32fb0b389afc61ce8aaf281ada613713ddeadd1cfec \
    --hash=sha256:77d4c1b881076c3ba173484dfa53d3582c1c8ff1f914c6461ab70c8428b796c1 \
    --hash=sha256:780c82a41dc493b62fc5884fb1d3a3b81106642c5c5c78d6a0d4cbe96d62ba7e \
    --hash=sha256:7dc0713bf81287a00516ef43137273b23ee414fe41a3c14be10dd95ed98a2df9 \
    --hash=sha256:7eebcdbe3677e58dd4c0e03b4f2cfa346ed4049687d839adad68cc38bb559c92 \
    --hash=sha256:896689fddba4f23ef7c718279e42f8834041a21342d95e56922e1c10c0cc7afb \
    --hash=sha256:96177eb5645b1c6985f5c11d03fc2dbda9ad24ec0f3a46dcce91445747e15094 \
    --hash=sha256:96e25c8603a155559231c19c0349245eeb4ac0096fe3c1d0be5c47e075bd4f46 \
    --hash=sha256:9d37ac69edc5614b90516807de32d08cb8e7b12260a285ee330955604ed9dd29 \
    --hash=sha256:9ed6aa0726b9b60911f4aed8ec5b8dd7bf3491476015819f56473ffaef8959bd \
    --hash=sha256:a487f72a25904e2b4bbc0817ce7a8de94363bd7e79890510174da9d901c38705 \
    --hash=sha256:a4cbb9ff5795cd66f0066bdf5947f170f5d63a9274f99bdbca02fd973adcf2a8 \
    --hash=sha256:a74d56552ddbde46c246b5b89199cb3fd182f9c346c784e1a93e4dc3f5ec9975 \
    --hash=sha256:a89ce3fd220ff144bd9d54da333ec0de0399b52c9ac3d2ce34b569cf1a5748fb \
    --hash=sha256:abd52a09d03adf9c763d706df707c343293d5d106aea53483e0ec8d9e310ad5e \
    --hash=sha256:abd8f36c99512755b8456047b7be10372fca271bf1467a1caa88db991e7c421b \
    --hash=sha256:af5bd9ccb188f6a5fdda9f1f09d9f4c86cc8a539bd48a0bfdc97723970348418 \
    --hash=sha256:b02f21c1e2074943312d03d243ac4388319f2456576b2c6023041c4d57cd7019 \
    --hash=sha256:b06fa97478a5f478fb05e1980980a7cdf2712015493b44d0c87606c1513ed5b1 \
    --hash=sha256:b0724f05c396b0a4c36a3226c31648385deb6a65d8992644c12a4963c70326ba \
    --hash=sha256:b130fe77361d6771ecf5a219d8e0817d61b236b7d8b37cc045172e574ed219e6 \
    --hash=sha256:b56d5519e470d3f2fe4aa7585f0632b060d532d0696c5bdfb5e8319e1d0f69a2 \
    --hash=sha256:b67b819628e3b748fd3c2192c15fb951f549d0f47c0449af0764d7647302fda3 \
    --hash=sha256:ba1711cda2d30634a7e452fc79eabcadaffedf241ff206db2ee93dd2c89a60e7 \
    --hash=sha256:bbeccb1aa40ab88cd29e6c7d8585582c99548f55f9b2581dfc5ba68c59a85752 \
    --hash=sha256:bd84395aab8e4d36263cd1b9308cd504f6cf713b7d6d3ce25ea55670baec5416 \
    --hash=sha256:c99f4309f5145b93eca6e35ac1a988f0dc0a7ccf9ccdcd78d3c0adf57224e62f \
    --hash=sha256:ca1cccf838cd28d5a0883b342474c630ac48cac5df0ee6eacc9c7290f76b11c1 \
    --hash=sha256:cd525e0e52a5ff16653a3fc9e3dd827981917d34996600bbc34c05d048ca35cc \
    --hash=sha256:cdb4f085756c96a3af04e6eca7f08b1345e94b53af8921b25c72f096e704e145 \
    --hash=sha256:ce42618f67741d4697684e501ef02f29e758a123aa2d669e2d964ff734ee00ee \
    --hash=sha256:d06730c6aed78cee4126234cf2d071e01b44b915e725a6cb439a879ec9754a3a \
    --hash=sha256:d5fe3e099cf07d0fb5a1e23d399e5d4d1ca3e6dfcbe5c8570ccff3e9208274f7 \
    --hash=sha256:d6bcbfc99f55655c3d93feb7ef3800bd5bbe963a755687cbf1f490a71fb7794b \
    --hash=sha256:d787272ed958a05b2c86311d3a4135d3c2aeea4fc655705f074130aa57d71653 \
    --hash=sha256:e169e957c33576f47e21864cf3fc9ff47c223a4ebca8960079b8bd36cb014fd0 \
    --hash=sha256:e20076a211cd6f9b44a6be58f7eeafa7ab5720eb796975d0c03f05b47d89eb90 \
    --hash=sha256:e826aadda3cae59295b95343db8f3d965fb31059da7de01ee8d1c40a60398b29 \
    --hash=sha256:eef4d64c650f33347c1f9266fa5ae001440b232ad9b98f1f43dfe7a79435c0a6 \
    --hash=sha256:f2e69b3ed24544b0d3dbe2c5c0ba5153ce50dcebb576fdc4696d52aa22db6034 \
    --hash=sha256:f87ec75864c37c4c6cb908d282e1969e79763e0d9becdfe9fe5473b7bb1e5f09 \
    --hash=sha256:fbec11614dba0424ca72f4e8ba3c420dba07b4a7c206c8c8e4e73f2e98f4c559 \
    --hash=sha256:fd69666217b62fa5d7c6aa88e507493a34dec4fa20c5bd925e4bc12fce586639
    # via deprecated


================================================
FILE: External/code-format-helper/requirements_formatting.txt.in
================================================
black>=26.3.1
darker==2.1.1
PyGithub==2.6.1
cryptography>=46.0.5
urllib3>=2.6.3
requests>=2.32.4
idna>=3.7
certifi>=2024.7.4
PyNaCl>=1.6.2
PyJWT>=2.12.1


================================================
FILE: External/tiny-json/CMakeLists.txt
================================================
set(NAME tiny-json)
set(SRCS tiny-json.c)
add_library(${NAME} STATIC ${SRCS})

target_include_directories(${NAME} PUBLIC ${CMAKE_CURRENT_LIST_DIR})
add_library(${NAME}::${NAME} ALIAS ${NAME})

================================================
FILE: External/tiny-json/LICENSE
================================================
MIT License

Copyright (c) 2018 Rafa Garcia

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: External/tiny-json/tiny-json.c
================================================

/*

<https://github.com/rafagafe/tiny-json>
     
  Licensed under the MIT License <http://opensource.org/licenses/MIT>.
  SPDX-License-Identifier: MIT
  Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>.

  Permission is hereby  granted, free of charge, to any  person obtaining a copy
  of this software and associated  documentation files (the "Software"), to deal
  in the Software  without restriction, including without  limitation the rights
  to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
  copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in all
  copies or substantial portions of the Software.

  THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
  IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
  FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
  AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
  LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
    
*/

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stddef.h> // For NULL
#include "tiny-json.h"

/** Structure to handle a heap of JSON properties. */
typedef struct jsonStaticPool_s {
    json_t* const mem;      /**< Pointer to array of json properties.      */
    unsigned int const qty; /**< Length of the array of json properties.   */
    unsigned int nextFree;  /**< The index of the next free json property. */
    jsonPool_t pool;
} jsonStaticPool_t;

/* Search a property by its name in a JSON object. */
json_t const* json_getProperty( json_t const* obj, char const* property ) {
    json_t const* sibling;
    for( sibling = obj->u.c.child; sibling; sibling = sibling->sibling )
        if ( sibling->name && !strcmp( sibling->name, property ) )
            return sibling;
    return 0;
}

/* Search a property by its name in a JSON object and return its value. */
char const* json_getPropertyValue( json_t const* obj, char const* property ) {
	json_t const* field = json_getProperty( obj, property );
	if ( !field ) return 0;
        jsonType_t type = json_getType( field );
        if ( JSON_ARRAY >= type ) return 0;
	return json_getValue( field );
}

/* Internal prototypes: */
static char* goBlank( char* str );
static char* goNum( char* str );
static json_t* poolInit( jsonPool_t* pool );
static json_t* poolAlloc( jsonPool_t* pool );
static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool );
static char* setToNull( char* ch );
static bool isEndOfPrimitive( char ch );

/* Parse a string to get a json. */
json_t const* json_createWithPool( char *str, jsonPool_t *pool ) {
    char* ptr = goBlank( str );
    if ( !ptr || *ptr != '{' ) return 0;
    json_t* obj = pool->init( pool );
    obj->name    = 0;
    obj->sibling = 0;
    obj->u.c.child = 0;
    ptr = objValue( ptr, obj, pool );
    if ( !ptr ) return 0;
    return obj;
}

/* Parse a string to get a json. */
json_t const* json_create( char* str, json_t mem[], unsigned int qty ) {
    jsonStaticPool_t spool = {
        .mem  = mem,
        .qty  = qty,
        .pool = {
            .init = poolInit,
            .alloc = poolAlloc
        }
    };
    return json_createWithPool( str, &spool.pool );
}

/** Get a special character with its escape character. Examples:
  * 'b' -> '\b', 'n' -> '\n', 't' -> '\t'
  * @param ch The escape character.
  * @return  The character code. */
static char getEscape( char ch ) {
    static struct { char ch; char code; } const pair[] = {
        { '\"', '\"' }, { '\\', '\\' },
        { '/',  '/'  }, { 'b',  '\b' },
        { 'f',  '\f' }, { 'n',  '\n' },
        { 'r',  '\r' }, { 't',  '\t' },
    };
    unsigned int i;
    for( i = 0; i < sizeof pair / sizeof *pair; ++i )
        if ( pair[i].ch == ch )
            return pair[i].code;
    return '\0';
}

/** Parse 4 characters.
  * @Param str Pointer to  first digit.
  * @retval '?' If the four characters are hexadecimal digits.
  * @retcal '\0' In other cases. */
static unsigned char getCharFromUnicode( unsigned char const* str ) {
    unsigned int i;
    for( i = 0; i < 4; ++i )
        if ( !isxdigit( str[i] ) )
            return '\0';
    return '?';
}

/** Parse a string and replace the scape characters by their meaning characters.
  * This parser stops when finds the character '\"'. Then replaces '\"' by '\0'.
  * @param str Pointer to first character.
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* parseString( char* str ) {
    unsigned char* head = (unsigned char*)str;
    unsigned char* tail = (unsigned char*)str;
    for( ; *head >= ' '; ++head, ++tail ) {
        if ( *head == '\"' ) {
            *tail = '\0';
            return (char*)++head;
        }
        if ( *head == '\\' ) {
            if ( *++head == 'u' ) {
                char const ch = getCharFromUnicode( ++head );
                if ( ch == '\0' ) return 0;
                *tail = ch;
                head += 3;
            }
            else {
                char const esc = getEscape( *head );
                if ( esc == '\0' ) return 0;
                *tail = esc;
            }
        }
        else *tail = *head;
    }
    return 0;
}

/** Parse a string to get the name of a property.
  * @param str Pointer to first character.
  * @param property The property to assign the name.
  * @retval Pointer to first of property value. If success.
  * @retval Null pointer if any error occur. */
static char* propertyName( char* ptr, json_t* property ) {
    property->name = ++ptr;
    ptr = parseString( ptr );
    if ( !ptr ) return 0;
    ptr = goBlank( ptr );
    if ( !ptr ) return 0;
    if ( *ptr++ != ':' ) return 0;
    return goBlank( ptr );
}

/** Parse a string to get the value of a property when its type is JSON_TEXT.
  * @param str Pointer to first character ('\"').
  * @param property The property to assign the name.
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* textValue( char* ptr, json_t* property ) {
    ++property->u.value;
    ptr = parseString( ++ptr );
    if ( !ptr ) return 0;
    property->type = JSON_TEXT;
    return ptr;
}

/** Compare two strings until get the null character in the second one.
  * @param ptr sub string
  * @param str main string
  * @retval Pointer to next character.
  * @retval Null pointer if any error occur. */
static char* checkStr( char* ptr, char const* str ) {
    while( *str )
        if ( *ptr++ != *str++ )
            return 0;
    return ptr;
}

/** Parser a string to get a primitive value.
  * If the first character after the value is different of '}' or ']' is set to '\0'.
  * @param str Pointer to first character.
  * @param property Property handler to set the value and the type, (true, false or null).
  * @param value String with the primitive literal.
  * @param type The code of the type. ( JSON_BOOLEAN or JSON_NULL )
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* primitiveValue( char* ptr, json_t* property, char const* value, jsonType_t type ) {
    ptr = checkStr( ptr, value );
    if ( !ptr || !isEndOfPrimitive( *ptr ) ) return 0;
    ptr = setToNull( ptr );
    property->type = type;
    return ptr;
}

/** Parser a string to get a true value.
  * If the first character after the value is different of '}' or ']' is set to '\0'.
  * @param str Pointer to first character.
  * @param property Property handler to set the value and the type, (true, false or null).
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* trueValue( char* ptr, json_t* property ) {
    return primitiveValue( ptr, property, "true", JSON_BOOLEAN );
}

/** Parser a string to get a false value.
  * If the first character after the value is different of '}' or ']' is set to '\0'.
  * @param str Pointer to first character.
  * @param property Property handler to set the value and the type, (true, false or null).
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* falseValue( char* ptr, json_t* property ) {
    return primitiveValue( ptr, property, "false", JSON_BOOLEAN );
}

/** Parser a string to get a null value.
  * If the first character after the value is different of '}' or ']' is set to '\0'.
  * @param str Pointer to first character.
  * @param property Property handler to set the value and the type, (true, false or null).
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* nullValue( char* ptr, json_t* property ) {
    return primitiveValue( ptr, property, "null", JSON_NULL );
}

/** Analyze the exponential part of a real number.
  * @param str Pointer to first character.
  * @retval Pointer to first non numerical after the string. If success.
  * @retval Null pointer if any error occur. */
static char* expValue( char* ptr ) {
    if ( *ptr == '-' || *ptr == '+' ) ++ptr;
    if ( !isdigit( *ptr ) ) return 0;
    ptr = goNum( ++ptr );
    return ptr;
}

/** Analyze the decimal part of a real number.
  * @param str Pointer to first character.
  * @retval Pointer to first non numerical after the string. If success.
  * @retval Null pointer if any error occur. */
static char* fraqValue( char* ptr ) {
    if ( !isdigit( *ptr ) ) return 0;
    ptr = goNum( ++ptr );
    if ( !ptr ) return 0;
    return ptr;
}

/** Parser a string to get a numerical value.
  * If the first character after the value is different of '}' or ']' is set to '\0'.
  * @param str Pointer to first character.
  * @param property Property handler to set the value and the type: JSON_REAL or JSON_INTEGER.
  * @retval Pointer to first non white space after the string. If success.
  * @retval Null pointer if any error occur. */
static char* numValue( char* ptr, json_t* property ) {
    if ( *ptr == '-' ) ++ptr;
    if ( !isdigit( *ptr ) ) return 0;
    if ( *ptr != '0' ) {
        ptr = goNum( ptr );
        if ( !ptr ) return 0;
    }
    else if ( isdigit( *++ptr ) ) return 0;
    property->type = JSON_INTEGER;
    if ( *ptr == '.' ) {
        ptr = fraqValue( ++ptr );
        if ( !ptr ) return 0;
        property->type = JSON_REAL;
    }
    if ( *ptr == 'e' || *ptr == 'E' ) {
        ptr = expValue( ++ptr );
        if ( !ptr ) return 0;
        property->type = JSON_REAL;
    }
    if ( !isEndOfPrimitive( *ptr ) ) return 0;
    if ( JSON_INTEGER == property->type ) {
        char const* value = property->u.value;
        bool const negative = *value == '-';
        static char const min[] = "-9223372036854775808";
        static char const max[] = "9223372036854775807";
        unsigned int const maxdigits = ( negative? sizeof min: sizeof max ) - 1;
        unsigned int const len = ptr - value;
        if ( len > maxdigits ) return 0;
        if ( len == maxdigits ) {
            char const tmp = *ptr;
            *ptr = '\0';
            char const* const threshold = negative ? min: max;
            if ( 0 > strcmp( threshold, value ) ) return 0;
            *ptr = tmp;
        }
    }
    ptr = setToNull( ptr );
    return ptr;
}

/** Add a property to a JSON object or array.
  * @param obj The handler of the JSON object or array.
  * @param property The handler of the property to be added. */
static void add( json_t* obj, json_t* property ) {
    property->sibling = 0;
    if ( !obj->u.c.child ){
	    obj->u.c.child = property;
	    obj->u.c.last_child = property;
    } else {
	    obj->u.c.last_child->sibling = property;
	    obj->u.c.last_child = property;
    }
}

/** Parser a string to get a json object value.
  * @param str Pointer to first character.
  * @param pool The handler of a json pool for creating json instances.
  * @retval Pointer to first character after the value. If success.
  * @retval Null pointer if any error occur. */
static char* objValue( char* ptr, json_t* obj, jsonPool_t* pool ) {
    obj->type    = JSON_OBJ;
    obj->u.c.child = 0;
    obj->sibling = 0;
    ptr++;
    for(;;) {
        ptr = goBlank( ptr );
        if ( !ptr ) return 0;
        if ( *ptr == ',' ) {
            ++ptr;
            continue;
        }
        char const endchar = ( obj->type == JSON_OBJ )? '}': ']';
        if ( *ptr == endchar ) {
            *ptr = '\0';
            json_t* parentObj = obj->sibling;
            if ( !parentObj ) return ++ptr;
            obj->sibling = 0;
            obj = parentObj;
            ++ptr;
            continue;
        }
        json_t* property = pool->alloc( pool );
        if ( !property ) return 0;
        if( obj->type != JSON_ARRAY ) {
            if ( *ptr != '\"' ) return 0;
            ptr = propertyName( ptr, property );
            if ( !ptr ) return 0;
        }
        else property->name = 0;
        add( obj, property );
        property->u.value = ptr;
        switch( *ptr ) {
            case '{':
                property->type    = JSON_OBJ;
                property->u.c.child = 0;
                property->sibling = obj;
                obj = property;
                ++ptr;
                break;
            case '[':
                property->type    = JSON_ARRAY;
                property->u.c.child = 0;
                property->sibling = obj;
                obj = property;
                ++ptr;
                break;
            case '\"': ptr = textValue( ptr, property );  break;
            case 't':  ptr = trueValue( ptr, property );  break;
            case 'f':  ptr = falseValue( ptr, property ); break;
            case 'n':  ptr = nullValue( ptr, property );  break;
            default:   ptr = numValue( ptr, property );   break;
        }
        if ( !ptr ) return 0;
    }
}

/** Initialize a json pool.
  * @param pool The handler of the pool.
  * @return a instance of a json. */
static json_t* poolInit( jsonPool_t* pool ) {
    jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool );
    spool->nextFree = 1;
    return spool->mem;
}

/** Create an instance of a json from a pool.
  * @param pool The handler of the pool.
  * @retval The handler of the new instance if success.
  * @retval Null pointer if the pool was empty. */
static json_t* poolAlloc( jsonPool_t* pool ) {
    jsonStaticPool_t *spool = json_containerOf( pool, jsonStaticPool_t, pool );
    if ( spool->nextFree >= spool->qty ) return 0;
    return spool->mem + spool->nextFree++;
}

/** Checks whether an character belongs to set.
  * @param ch Character value to be checked.
  * @param set Set of characters. It is just a null-terminated string.
  * @return true or false there is membership or not. */
static bool isOneOfThem( char ch, char const* set ) {
    while( *set != '\0' )
        if ( ch == *set++ )
            return true;
    return false;
}

/** Increases a pointer while it points to a character that belongs to a set.
  * @param str The initial pointer value.
  * @param set Set of characters. It is just a null-terminated string.
  * @return The final pointer value or null pointer if the null character was found. */
static char* goWhile( char* str, char const* set ) {
    for(; *str != '\0'; ++str ) {
        if ( !isOneOfThem( *str, set ) )
            return str;
    }
    return 0;
}

/** Set of characters that defines a blank. */
static char const* const blank = " \n\r\t\f";

/** Increases a pointer while it points to a white space character.
  * @param str The initial pointer value.
  * @return The final pointer value or null pointer if the null character was found. */
static char* goBlank( char* str ) {
    return goWhile( str, blank );
}

/** Increases a pointer while it points to a decimal digit character.
  * @param str The initial pointer value.
  * @return The final pointer value or null pointer if the null character was found. */
static char* goNum( char* str ) {
    for( ; *str != '\0'; ++str ) {
        if ( !isdigit( *str ) )
            return str;
    }
    return 0;
}

/** Set of characters that defines the end of an array or a JSON object. */
static char const* const endofblock = "}]";

/** Set a char to '\0' and increase its pointer if the char is different to '}' or ']'.
  * @param ch Pointer to character.
  * @return  Final value pointer. */
static char* setToNull( char* ch ) {
    if ( !isOneOfThem( *ch, endofblock ) ) *ch++ = '\0';
    return ch;
}

/** Indicate if a character is the end of a primitive value. */
static bool isEndOfPrimitive( char ch ) {
    return ch == ',' || isOneOfThem( ch, blank ) || isOneOfThem( ch, endofblock );
}

/** Add a character at the end of a string.
  * @param dest Pointer to the null character of the string
  * @param ch Value to be added.
  * @return Pointer to the null character of the destination string. */
static char* chtoa( char* dest, char ch ) {
    *dest   = ch;
    *++dest = '\0';
    return dest;
}

/** Copy a null-terminated string.
  * @param dest Destination memory block.
  * @param src Source string.
  * @return Pointer to the null character of the destination string. */
static char* atoa( char* dest, char const* src ) {
    for( ; *src != '\0'; ++dest, ++src )
        *dest = *src;
    *dest = '\0';
    return dest;
}

/* Open a JSON object in a JSON string. */
char* json_objOpen( char* dest, char const* name ) {
    if ( NULL == name )
        dest = chtoa( dest, '{' );
    else {
        dest = chtoa( dest, '\"' );
        dest = atoa( dest, name );
        dest = atoa( dest, "\":{" );
    }
    return dest;
}

/* Close a JSON object in a JSON string. */
char* json_objClose( char* dest ) {
    if ( dest[-1] == ',' )
        --dest;
    return atoa( dest, "}," );
}

/* Open an array in a JSON string. */
char* json_arrOpen( char* dest, char const* name ) {
    if ( NULL == name )
        dest = chtoa( dest, '[' );
    else {
        dest = chtoa( dest, '\"' );
        dest = atoa( dest, name );
        dest = atoa( dest, "\":[" );
    }
    return dest;
}

/* Close an array in a JSON string. */
char* json_arrClose( char* dest ) {
    if ( dest[-1] == ',' )
        --dest;
    return atoa( dest, "]," );
}

/** Add the name of a text property.
  * @param dest Destination memory.
  * @param name The name of the property.
  * @return Pointer to the next char. */
static char* strname( char* dest, char const* name ) {
    dest = chtoa( dest, '\"' );
    if ( NULL != name ) {
        dest = atoa( dest, name );
        dest = atoa( dest, "\":\"" );
    }
    return dest;
}

/** Get the hexadecimal digit of the least significant nibble of a integer. */
static int nibbletoch( int nibble ) {
    return "0123456789ABCDEF"[ nibble % 16u ];
}

/** Get the escape character of a non-printable.
  * @param ch Character source.
  * @return The escape character or null character if error. */
static int escape( int ch ) {
    static struct { char code; char ch; } const pair[] = {
        { '\"', '\"' }, { '\\', '\\' }, { '/',  '/'  }, { 'b',  '\b' },
        { 'f',  '\f' }, { 'n',  '\n' }, { 'r',  '\r' }, { 't',  '\t' },
    };
    for( int i = 0; i < sizeof pair / sizeof *pair; ++i )
        if ( ch == pair[i].ch )
            return pair[i].code;
    return '\0';
}

/** Copy a null-terminated string inserting escape characters if needed.
  * @param dest Destination memory block.
  * @param src Source string.
  * @return Pointer to the null character of the destination string. */
static char* atoesc( char* dest, char const* src ) {
    for( ; *src != '\0'; ++dest, ++src ) {
        if ( *src >= ' ' && *src != '\"' && *src != '\\' && *src != '/' )
            *dest = *src;
        else {
            *dest++ = '\\';
            int const esc = escape( *src );
            if ( esc )
                *dest = esc;
            else {
                *dest++ = 'u';
                *dest++ = '0';
                *dest++ = '0';
                *dest++ = nibbletoch( *src / 16 );
                *dest++ = nibbletoch( *src );
            }
        }
    }
    *dest = '\0';
    return dest;
}

/* Add a text property in a JSON string. */
char* json_str( char* dest, char const* name, char const* value ) {
    dest = strname( dest, name );
    dest = atoesc( dest, value );
    dest = atoa( dest, "\"," );
    return dest;
}

/** Add the name of a primitive property.
  * @param dest Destination memory.
  * @param name The name of the property.
  * @return Pointer to the next char. */
static char* primitivename( char* dest, char const* name ) {
    if( NULL == name )
        return dest;
    dest = chtoa( dest, '\"' );
    dest = atoa( dest, name );
    dest = atoa( dest, "\":" );
    return dest;
}

/*  Add a boolean property in a JSON string. */
char* json_bool( char* dest, char const* name, int value ) {
    dest = primitivename( dest, name );
    dest = atoa( dest, value ? "true," : "false," );
    return dest;
}

/* Add a null property in a JSON string. */
char* json_null( char* dest, char const* name ) {
    dest = primitivename( dest, name );
    dest = atoa( dest, "null," );
    return dest;
}

/* Used to finish the root JSON object. After call json_objClose(). */
char* json_end( char* dest ) {
    if ( ',' == dest[-1] ) {
        dest[-1] = '\0';
        --dest;
    }
    return dest;
}

#define ALL_TYPES \
    X( json_int,      int,           "%d"   ) \
    X( json_long,     long,          "%ld"  ) \
    X( json_uint,     unsigned int,  "%u"   ) \
    X( json_ulong,    unsigned long, "%lu"  ) \
    X( json_verylong, long long,     "%lld" ) \
    X( json_double,   double,        "%g"   ) \


#define json_num( funcname, type, fmt )                         \
char* funcname( char* dest, char const* name, type value ) {    \
    dest = primitivename( dest, name );                         \
    dest += sprintf( dest, fmt, value );                        \
    dest = chtoa( dest, ',' );                                  \
    return dest;                                                \
}

#define X( name, type, fmt ) json_num( name, type, fmt )
ALL_TYPES
#undef X


================================================
FILE: External/tiny-json/tiny-json.h
================================================

/*

<https://github.com/rafagafe/tiny-json>
     
  Licensed under the MIT License <http://opensource.org/licenses/MIT>.
  SPDX-License-Identifier: MIT
  Copyright (c) 2016-2018 Rafa Garcia <rafagarcia77@gmail.com>.

  Permission is hereby  granted, free of charge, to any  person obtaining a copy
  of this software and associated  documentation files (the "Software"), to deal
  in the Software  without restriction, including without  limitation the rights
  to  use, copy,  modify, merge,  publish, distribute,  sublicense, and/or  sell
  copies  of  the Software,  and  to  permit persons  to  whom  the Software  is
  furnished to do so, subject to the following conditions:

  The above copyright notice and this permission notice shall be included in all
  copies or substantial portions of the Software.

  THE SOFTWARE  IS PROVIDED "AS  IS", WITHOUT WARRANTY  OF ANY KIND,  EXPRESS OR
  IMPLIED,  INCLUDING BUT  NOT  LIMITED TO  THE  WARRANTIES OF  MERCHANTABILITY,
  FITNESS FOR  A PARTICULAR PURPOSE AND  NONINFRINGEMENT. IN NO EVENT  SHALL THE
  AUTHORS  OR COPYRIGHT  HOLDERS  BE  LIABLE FOR  ANY  CLAIM,  DAMAGES OR  OTHER
  LIABILITY, WHETHER IN AN ACTION OF  CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  OUT OF OR IN CONNECTION WITH THE SOFTWARE  OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
    
*/

#ifndef _TINY_JSON_H_
#define	_TINY_JSON_H_

#ifdef __cplusplus
extern "C" {
#endif

#include <stddef.h>
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h>

#define json_containerOf( ptr, type, member ) \
    ((type*)( (char*)ptr - offsetof( type, member ) ))

/** @defgroup tinyJson Tiny JSON parser.
  * @{ */

/** Enumeration of codes of supported JSON properties types. */
typedef enum {
    JSON_OBJ, JSON_ARRAY, JSON_TEXT, JSON_BOOLEAN,
    JSON_INTEGER, JSON_REAL, JSON_NULL
} jsonType_t;

/** Structure to handle JSON properties. */
typedef struct json_s {
    struct json_s* sibling;
    const char* name;
    union {
      const char* value;
      struct {
        struct json_s* child;
        struct json_s* last_child;
        } c;
    } u;
    jsonType_t type;
} json_t;

/** Parse a string to get a json.
  * @param str String pointer with a JSON object. It will be modified.
  * @param mem Array of json properties to allocate.
  * @param qty Number of elements of mem.
  * @retval Null pointer if any was wrong in the parse process.
  * @retval If the parser process was successfully a valid handler of a json.
  *         This property is always unnamed and its type is JSON_OBJ. */
const json_t* json_create(char* str, json_t mem[], unsigned int qty);

/** Get the name of a json property.
  * @param json A valid handler of a json property.
  * @retval Pointer to null-terminated if property has name.
  * @retval Null pointer if the property is unnamed. */
static inline const char* json_getName(const json_t* json) {
    return json->name;
}

/** Get the value of a json property.
  * The type of property cannot be JSON_OBJ or JSON_ARRAY.
  * @param json A valid handler of a json property.
  * @return Pointer to null-terminated string with the value. */
static inline const char* json_getValue(const json_t* property) {
    return property->u.value;
}

/** Get the type of a json property.
  * @param json A valid handler of a json property.
  * @return The code of type.*/
static inline jsonType_t json_getType(const json_t* json) {
    return json->type;
}

/** Get the next sibling of a JSON property that is within a JSON object or array.
  * @param json A valid handler of a json property.
  * @retval The handler of the next sibling if found.
  * @retval Null pointer if the json property is the last one. */
static inline const json_t* json_getSibling(const json_t* json) {
    return json->sibling;
}

/** Search a property by its name in a JSON object.
  * @param obj A valid handler of a json object. Its type must be JSON_OBJ.
  * @param property The name of property to get.
  * @retval The handler of the json property if found.
  * @retval Null pointer if not found. */
const json_t* json_getProperty(const json_t* obj, const char* property);


/** Search a property by its name in a JSON object and return its value.
  * @param obj A valid handler of a json object. Its type must be JSON_OBJ.
  * @param property The name of property to get.
  * @retval If found a pointer to null-terminated string with the value.
  * @retval Null pointer if not found or it is an array or an object. */
const char* json_getPropertyValue(const json_t* obj, const char* property);

/** Get the first property of a JSON object or array.
  * @param json A valid handler of a json property.
  *             Its type must be JSON_OBJ or JSON_ARRAY.
  * @retval The handler of the first property if there is.
  * @retval Null pointer if the json object has not properties. */
static inline const json_t* json_getChild(const json_t* json) {
    return json->u.c.child;
}

/** Get the value of a json boolean property.
  * @param property A valid handler of a json object. Its type must be JSON_BOOLEAN.
  * @return The value stdbool. */
static inline bool json_getBoolean(const json_t* property) {
    return *property->u.value == 't';
}

/** Get the value of a json integer property.
  * @param property A valid handler of a json object. Its type must be JSON_INTEGER.
  * @return The value stdint. */
static inline int64_t json_getInteger(const json_t* property) {
    return atoll( property->u.value );
}

/** Get the value of a json real property.
  * @param property A valid handler of a json object. Its type must be JSON_REAL.
  * @return The value. */
static inline double json_getReal(const json_t* property) {
    return atof( property->u.value );
}


/** Structure to handle a heap of JSON properties. */
typedef struct jsonPool_s jsonPool_t;
struct jsonPool_s {
    json_t* (*init)( jsonPool_t* pool );
    json_t* (*alloc)( jsonPool_t* pool );
};

/** Parse a string to get a json.
  * @param str String pointer with a JSON object. It will be modified.
  * @param pool Custom json pool pointer.
  * @retval Null pointer if any was wrong in the parse process.
  * @retval If the parser process was successfully a valid handler of a json.
  *         This property is always unnamed and its type is JSON_OBJ. */
const json_t* json_createWithPool(char* str, jsonPool_t* pool);

/** @ } */

/** @defgroup makejoson Make JSON.
 * @{ */

/** Open a JSON object in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @return Pointer to the new end of JSON under construction. */
char* json_objOpen(char* dest, const char* name);

/** Close a JSON object in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @return Pointer to the new end of JSON under construction. */
char* json_objClose(char* dest);

/** Used to finish the root JSON object. After call json_objClose().
 * @param dest Pointer to the end of JSON under construction.
 * @return Pointer to the new end of JSON under construction. */
char* json_end(char* dest);

/** Open an array in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @return Pointer to the new end of JSON under construction. */
char* json_arrOpen(char* dest, const char* name);

/** Close an array in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @return Pointer to the new end of JSON under construction. */
char* json_arrClose(char* dest);

/** Add a text property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value A valid null-terminated string with the value.
 *              Backslash escapes will be added for special characters.
 * @return Pointer to the new end of JSON under construction. */
char* json_str(char* dest, const char* name, const char* value);

/** Add a boolean property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Zero for false. Non zero for true.
 * @return Pointer to the new end of JSON under construction. */
char* json_bool(char* dest, const char* name, int value);

/** Add a null property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @return Pointer to the new end of JSON under construction. */
char* json_null(char* dest, const char* name);

/** Add an integer property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_int(char* dest, const char* name, int value);

/** Add an unsigned integer property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_uint(char* dest, const char* name, unsigned int value);

/** Add a long integer property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_long(char* dest, const char* name, long int value);

/** Add an unsigned long integer property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_ulong(char* dest, const char* name, unsigned long int value);

/** Add a long long integer property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_verylong(char* dest, const char* name, long long int value);

/** Add a double precision number property in a JSON string.
 * @param dest Pointer to the end of JSON under construction.
 * @param name Pointer to null-terminated string or null for unnamed.
 * @param value Value of the property.
 * @return Pointer to the new end of JSON under construction. */
char* json_double(char* dest, const char* name, double value);

/** @ } */

#ifdef __cplusplus
}
#endif

#endif	/* _TINY_JSON_H_ */


================================================
FILE: FEXCore/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
set(PROJECT_NAME FEXCore)
project(${PROJECT_NAME}
  VERSION 0.01
  LANGUAGES CXX)

if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
  set(ARCHITECTURE_x86_64 1)
  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcx16")
endif()

if (CMAKE_SYSTEM_PROCESSOR MATCHES "^aarch64|^arm64|^armv8\.*")
  set(ARCHITECTURE_arm64 1)
endif()

set(CMAKE_POSITION_INDEPENDENT_CODE ON)
cmake_policy(SET CMP0083 NEW) # Follow new PIE policy
include(CheckPIESupported)
check_pie_supported()

set(CMAKE_INCLUDE_CURRENT_DIR ON)

include(CheckCXXCompilerFlag)
include(CheckIncludeFileCXX)
include(CheckCXXSourceCompiles)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

configure_file(${CMAKE_CURRENT_SOURCE_DIR}/include/git_version.h.in
  ${CMAKE_BINARY_DIR}/generated/git_version.h)

include_directories(${CMAKE_BINARY_DIR}/generated)

# Disable strict aliasing for all build modes
# See discussion in https://github.com/FEX-Emu/FEX/pull/4494#issuecomment-2800608944
# for background context.
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fno-strict-aliasing> $<$<COMPILE_LANGUAGE:CXX>:-fno-exceptions>)

add_subdirectory(Source/)

if (NOT BUILD_STEAM_SUPPORT)
  install (DIRECTORY include/FEXCore ${CMAKE_BINARY_DIR}/include/FEXCore
    DESTINATION include
    COMPONENT Development)
endif()

if (BUILD_TESTING)
  add_subdirectory(unittests/)
endif()


================================================
FILE: FEXCore/LICENSE
================================================
MIT License

Copyright (c) 2019 Ryan Houdek <Sonicadvance1@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: FEXCore/Readme.md
================================================
# FEXCore - Fast x86 Core emulation library
This is the core emulation library that is used for the FEX emulator project.
This project aims to provide a fast and functional x86-64 emulation library that can meet and surpass other x86-64 emulation libraries.
### Goals
* Be as fast as possible, beating and exceeding current options for x86-64 emulation
  * 25% - 50% lower performance than native code would be desired target
  * Use an IR to efficiently translate x86-64 to our host architecture
  * Support a tiered recompiler to allow for fast runtime performance
  * Support offline compilation and offline tooling for inspection and performance analysis
  * Support threaded emulation. Including emulating x86-64's strong memory model on weak memory model architectures
* Support a significant portion of the x86-64 instruction space.
  * Including MMX, SSE, SSE2, SSE3, SSSE3, and SSE4*
* Support fallback routines for uncommonly used x86-64 instructions
  * Including x87 and 3DNow!
* Only support userspace emulation.
  * All x86-64 instructions run as if they are under CPL-3(userland) security layer
* Minimal Linux Syscall emulation for testing purposes
* Portable library implementation in order to support easy integration in to applications
### Target Host Architecture
The target host architecture for this library is AArch64. Specifically the ARMv8.1 version or newer.
The CPU IR is designed with AArch64 in mind but should allow for other architectures as well.
x86-64 host support is available for ease of development, but is not a priority.
### Not desired
* Kernel space emulation
* CPL0-2 emulation
* Real Mode, Protected Mode, Virtual-8086 Mode, System Management Mode
* IRQs
* SVM
* "Cycle Accurate" emulation


================================================
FILE: FEXCore/Scripts/config_generator.py
================================================
import datetime
import json
import sys

def print_header():
    header = '''#ifndef OPT_BASE
#define OPT_BASE(type, group, enum, json, default)
#endif
#ifndef OPT_BOOL
#define OPT_BOOL(group, enum, json, default) OPT_BASE(bool, group, enum, json, default)
#endif
#ifndef OPT_UINT8
#define OPT_UINT8(group, enum, json, default) OPT_BASE(uint8_t, group, enum, json, default)
#endif
#ifndef OPT_INT32
#define OPT_INT32(group, enum, json, default) OPT_BASE(int32_t, group, enum, json, default)
#endif
#ifndef OPT_UINT32
#define OPT_UINT32(group, enum, json, default) OPT_BASE(uint32_t, group, enum, json, default)
#endif
#ifndef OPT_UINT64
#define OPT_UINT64(group, enum, json, default) OPT_BASE(uint64_t, group, enum, json, default)
#endif
#ifndef OPT_STR
#define OPT_STR(group, enum, json, default) OPT_BASE(fextl::string, group, enum, json, default)
#endif
#ifndef OPT_STRARRAY
#define OPT_STRARRAY(group, enum, json, default) OPT_BASE(fextl::string, group, enum, json, default)
#endif
#ifndef OPT_STRENUM
#define OPT_STRENUM(group, enum, json, default) OPT_BASE(uint64_t, group, enum, json, default)
#endif
'''
    output_file.write(header)

def print_tail():
    tail = '''#undef OPT_BASE
#undef OPT_BOOL
#undef OPT_UINT8
#undef OPT_INT32
#undef OPT_UINT32
#undef OPT_UINT64
#undef OPT_STR
#undef OPT_STRARRAY
#undef OPT_STRENUM
'''
    output_file.write(tail)

def print_config(type, group_name, json_name, default_value):
    output_file.write("OPT_{0} ({1}, {2}, {3}, {4})\n".format(type.upper(), group_name.upper(), json_name.upper(), json_name, default_value))

def print_options(options):
    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            default = op_vals["Default"]
            if (op_vals["Type"] == "str" or op_vals["Type"] == "strarray"):
                # Wrap the string argument in quotes
                default = "\"" + default + "\""

            print_config(
                op_vals["Type"],
                op_group,
                op_key,
                default)

        output_file.write("\n")

def print_unnamed_options(options):
    output_file.write("// Unnamed configuration options\n")
    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            default = op_vals["Default"]
            if (op_vals["Type"] == "str" or op_vals["Type"] == "strarray"):
                # Wrap the string argument in quotes
                default = "\"" + default + "\""

            print_config(
                op_vals["Type"],
                op_group,
                op_key.upper(), # KEY is the enum here, there is no json configuration for these
                default)

        output_file.write("\n")

def print_man_option(short, long, desc, default):
    if (short != None):
        output_man.write(".It Fl {0} , ".format(short))
    else:
        output_man.write(".It ")

    output_man.write("Fl Fl {0}=".format(long))

    output_man.write("\n");

    # Print description
    for line in desc:
        output_man.write(".Pp\n")
        output_man.write("{0}\n".format(line))

    output_man.write(".Pp\n")
    output_man.write("\\fBdefault:\\fR {0}\n".format(default))
    output_man.write(".Pp\n\n")

def print_man_env_option(name, desc, default, no_json_key):
    output_man.write("\\fBFEX_{0}\\fR\n".format(name.upper()))

    # Print description
    for line in desc:
        output_man.write(".Pp\n")
        output_man.write("{0}\n".format(line))

    if (not no_json_key):
        output_man.write(".Pp\n")
        output_man.write("\\fBJSON key:\\fR '{0}'\n".format(name))
        output_man.write(".Pp\n\n")

    output_man.write(".Pp\n")
    output_man.write("\\fBdefault:\\fR {0}\n".format(default))
    output_man.write(".Pp\n\n")

def print_man_environment(options):
    output_man.write(".Sh ENVIRONMENT\n")
    output_man.write(".Bl -tag -width -indent\n")
    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            default = op_vals["Default"]
            value_type = op_vals["Type"]

            # Textual default rather than enum based
            if ("TextDefault" in op_vals):
                default = op_vals["TextDefault"]

            if (value_type == "str" or value_type == "strarray" or value_type == "strenum"):
                # Wrap the string argument in quotes
                default = "'" + default + "'"
            print_man_env_option(
                op_key,
                op_vals["Desc"],
                default,
                False
            )

            if (value_type == "strenum"):
                Enums = op_vals["Enums"]
                output_man.write("\\fBAvailable Options:\\fR\n")
                output_man.write(", ".join(f"{enum_op_val}" for [_, enum_op_val] in Enums.items()))
                output_man.write("\n.sp\n")

    print_man_environment_tail()
    output_man.write(".El\n")

def print_man_environment_tail():

    # Additional environment variables that live outside of the normal loop
    print_man_env_option(
    "APP_CONFIG_LOCATION",
    [
    "Allows the user to override where FEX looks for configuration files",
    "By default FEX will look in ${XDG_CONFIG_HOME, $HOME/.config}/fex-emu/",
    "This will override the full path",
    "If FEX_PORTABLE is declared then relative paths are also supported",
    "For FEX: Relative to the FEX binary",
    "For WINE: Relative to %LOCALAPPDATA%"
    ],
    "''", True)

    print_man_env_option(
    "APP_CONFIG",
    [
    "Allows the user to override where FEX looks for only the application config file",
    "By default FEX will look in ${XDG_CONFIG_HOME, $HOME/.config}/fex-emu/Config.json",
    "This will override this file location",
    "One must be careful with this option as it will override any applications that load with execve as well"
    "If you need to support applications that execve then use FEX_APP_CONFIG_LOCATION instead"
    "If FEX_PORTABLE is declared then relative paths are also supported",
    "For FEX: Relative to the FEX binary",
    "For WINE: Relative to %LOCALAPPDATA%"
    ],
    "''", True)

    print_man_env_option(
    "APP_DATA_LOCATION",
    [
    "Allows the user to override where FEX looks for data files",
    "By default FEX will look in {$XDG_DATA_HOME, $HOME/.local/share}/fex-emu/",
    "This will override the full path",
    "This is the folder where FEX stores generated files like IR cache"
    ],
    "''", True)

    print_man_env_option(
    "PORTABLE",
    [
    "Allows FEX to run without installation. Global locations for configuration and binfmt_misc are ignored.",
    "For FEX on Linux:",
    "These files are instead read from <FEXPath>/fex-emu/ by default.",
    "For Arm64ec/Wow64 WINE builds:",
    "These files are instead read from $LOCALAPPDATA/fex-emu/ by default.",
    "For further customization, see FEX_APP_CONFIG_LOCATION and FEX_APP_DATA_LOCATION."
    ],
    "''", True)

    print_man_env_option(
    "APP_CACHE_LOCATION",
    [
    "Allows the user to override where FEX stores and loads cache files",
    "By default FEX will look in ${XDG_CACHE_HOME, $HOME/.cache}/fex-emu/",
    "This will override the full path, trailing forward-slash is expected to exist",
    ],
    "''", True)

def print_man_header():
    header ='''.Dd {0}
.Dt FEX
.Os Linux
.Sh NAME
.Nm FEX
.Nm FEXBash
.Nd Fast x86-64 and x86 emulation.
.Sh SYNOPSIS
.Nm
.Ar <args> ...
.Pp
.Nm FEXBash
.Ar <args> ...
.Sh DESCRIPTION
FEX allows you to run x86 and x86-64 binaries on an AArch64 host, similar to qemu-user and box86.
It has native support for a rootfs overlay, so you don't need to chroot, as well as some thunklibs so it can forward things like GL to the host.
FEX presents a Linux 5.0 interface to the guest, and supports both AArch64 and x86-64 as hosts.
FEX is very much work in progress, so expect things to change.
'''
    output_man.write(header.format(datetime.datetime.now().strftime("%d-%m-%Y")))

def print_man_tail():
    tail ='''.Sh FILES
.Bl -tag -width "$prefix/share/fex-emu/GuestThunks" -compact
.It Pa $XDG_CONFIG_DIR/fex-emu
Default FEX user configuration directory
.It Pa $prefix/share/fex-emu/AppConfig
System level application configuration files
.It Pa $prefix/share/fex-emu/GuestThunks
guest-side thunk data libraries
.It Pa $prefix/lib/fex-emu/HostThunks
host-side thunks for guest communication
.El
'''
    output_man.write(tail)

def print_config_option(type, group_name, json_name, default_value, short, choices, desc):
    if (type == "bool"):
        # Bool gets some special handling to add an inverted case
        output_argloader.write("{0}Group".format(group_name))

        options = ""
        AddedArg = False
        if (short != None):
            AddedArg = True
            options += "\"-{0}\"".format(short)

        if (AddedArg):
            options += ", "
        options += "\"--{0}\"".format(json_name.lower())

        output_argloader.write(".add_option({0})".format(options))

        output_argloader.write("\n")

        output_argloader.write("\t.action(\"store_true\")\n")

        output_argloader.write("\t.dest(\"{0}\")\n".format(json_name));

        # help
        output_argloader.write("\t.help(\n")
        desc_line_ender = ""
        if (len(desc) > 1):
            desc_line_ender = "\\n"

        for line in desc:
            output_argloader.write("\t\t\"{0}{1}\"\n".format(line, desc_line_ender))
        output_argloader.write("\t)\n")

        output_argloader.write("\t.set_default({0});\n\n".format(default_value));

        output_argloader.write("{0}Group".format(group_name))
        output_argloader.write(".add_option(\"--no-{0}\")\n".format(json_name.lower()))

        # Inverted case sets the bool to false
        output_argloader.write("\t.action(\"store_false\")\n")

        output_argloader.write("\t.dest(\"{0}\");\n".format(json_name));
    else:
        output_argloader.write("{0}Group".format(group_name))
        options = ""
        AddedArg = False
        if (short != None):
            AddedArg = True
            options += "\"-{0}\"".format(short)

        if (AddedArg):
            options += ", "
        options += "\"--{0}\"".format(json_name.lower())

        output_argloader.write(".add_option({0})".format(options))

        output_argloader.write("\n")

        output_argloader.write("\t.dest(\"{0}\")\n".format(json_name));

        if (choices != None):
            output_argloader.write("\t.choices({\n")
            for choice in choices:
                output_argloader.write("\t\t\"{0}\",\n".format(choice))
            output_argloader.write("\t})\n")


        # help
        output_argloader.write("\t.help(\n")
        desc_line_ender = ""
        if (len(desc) > 1):
            desc_line_ender = "\\n"

        for line in desc:
            output_argloader.write("\t\t\"{0}{1}\"\n".format(line, desc_line_ender))
        output_argloader.write("\t)\n")

        output_argloader.write("\t.set_default({0});\n".format(default_value));

    output_argloader.write("\n");

def print_parse_envloader_options(options):
    output_argloader.write("#ifdef ENVLOADER\n")
    output_argloader.write("#undef ENVLOADER\n")
    output_argloader.write("if (false) {}\n")

    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            value_type = op_vals["Type"]
            if (value_type == "strenum"):
                output_argloader.write("else if (Key == \"FEX_{0}\") {{\n".format(op_key.upper()))
                output_argloader.write("\tValue = FEXCore::Config::EnumParser<FEXCore::Config::{}ConfigPair>(FEXCore::Config::{}_EnumPairs, Value_View);\n".format(op_key, op_key))
                output_argloader.write("}\n")

            if ("ArgumentHandler" in op_vals):
                conversion_func = "FEXCore::Config::Handler::{0}".format(op_vals["ArgumentHandler"])
                output_argloader.write("else if (Key == \"FEX_{0}\") {{\n".format(op_key.upper()))
                output_argloader.write("\tValue = {0}(Value_View);\n".format(conversion_func))
                output_argloader.write("}\n")
    output_argloader.write("#endif\n")

def print_parse_jsonloader_options(options):
    output_argloader.write("#ifdef JSONLOADER\n")
    output_argloader.write("#undef JSONLOADER\n")
    output_argloader.write("if (false) {}\n")
    op_key = None
    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            value_type = op_vals["Type"]
            if (value_type == "strenum"):
                output_argloader.write("else if (KeyName == \"{0}\") {{\n".format(op_key))
                output_argloader.write("\tSet(KeyOption, FEXCore::Config::EnumParser<FEXCore::Config::{}ConfigPair>(FEXCore::Config::{}_EnumPairs, Value_View));\n".format(op_key, op_key))
                output_argloader.write("}\n")
            elif (value_type == "strarray"):
                output_argloader.write("else if (KeyName == \"{0}\") {{\n".format(op_key))
                output_argloader.write("\tAppendStrArrayValue(KeyOption, ConfigString);\n")
                output_argloader.write("}\n")
    assert op_key is not None, "No options found in JSONLOADER"
    output_argloader.write("else {\n")
    output_argloader.write("\tSet(KeyOption, ConfigString);\n")
    output_argloader.write("}\n")

    output_argloader.write("#endif\n")

def print_parse_enum_options(options):
    output_argloader.write("#ifdef ENUMDEFINES\n")
    output_argloader.write("#undef ENUMDEFINES\n")
    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            if (op_vals["Type"] == "strenum"):
                output_argloader.write("enum class {} : uint64_t {{\n".format(op_key))
                Enums = op_vals["Enums"]
                i = 0
                # Always have an OFF.
                output_argloader.write("\tOFF = 0,\n")
                for enum_op_key, enum_op_vals in Enums.items():
                    output_argloader.write("\t{} = 1ULL << {},\n".format(enum_op_key.upper(), i))
                    i += 1

                output_argloader.write("};\n")
                output_argloader.write("FEX_DEF_NUM_OPS({})\n".format(op_key))


    for op_group, group_vals in options.items():
        for op_key, op_vals in group_vals.items():
            if (op_vals["Type"] == "strenum"):
                Enums = op_vals["Enums"]

                output_argloader.write("using {}ConfigPair = std::pair<std::string_view, FEXCore::Config::{}>;\n".format(op_key, op_key))
                output_argloader.write("constexpr static std::array<{}ConfigPair, {}> {}_EnumPairs = {{{{\n".format(op_key, len(Enums) + 1, op_key))
                i = 0
                # Always have an OFF.
                output_argloader.write("\t{{ \"off\", FEXCore::Config::{}::OFF }},\n".format(op_key))
                for enum_op_key, enum_op_vals in Enums.items():
                    output_argloader.write("\t{{ \"{}\", FEXCore::Config::{}::{} }},\n".format(enum_op_vals, op_key, enum_op_key.upper()))
                    i += 1

                output_argloader.write("}};\n")

    output_argloader.write("#endif\n")

if (len(sys.argv) < 5):
    sys.exit()

output_filename = sys.argv[2]
output_man_page = sys.argv[3]
output_argumentloader_filename = sys.argv[4]

json_file = open(sys.argv[1], "r")
json_text = json_file.read()
json_file.close()

json_object = json.loads(json_text)

options = json_object["Options"]
unnamed_options = json_object["UnnamedOptions"]

# Generate config include file
output_file = open(output_filename, "w")
print_header()
print_options(options)
print_unnamed_options(unnamed_options)
print_tail()
output_file.close()

# Generate man file
output_man = open(output_man_page, "w")
print_man_header()
print_man_environment(options)
print_man_tail()

output_man.close()

# Generate argument loader code
output_argloader = open(output_argumentloader_filename, "w")

# Generate environment loader code
print_parse_envloader_options(options);

# Generate json loader code
print_parse_jsonloader_options(options);

# Generate enum variable options
print_parse_enum_options(options);

output_argloader.close()


================================================
FILE: FEXCore/Scripts/json_ir_doc_generator.py
================================================
import collections
import json
import sys

OpClasses = collections.OrderedDict()

def get_ir_classes(ops, defines):
    global OpClasses

    for op_class, opslist in ops.items():
        if not (op_class in OpClasses):
            OpClasses[op_class] = []

        for op, op_val in opslist.items():
            OpClasses[op_class].append([op, op_val])

    # Sort the dictionary after we are done parsing it
    OpClasses = collections.OrderedDict(sorted(OpClasses.items()))

def print_ir_op_index():
    output_file.write("# Index\n")
    output_file.write("## Op Classes\n")
    for class_key, class_value in OpClasses.items():
        output_file.write("- [%s](#%s)\n\n" % (class_key, class_key))

    output_file.write("## Definitions\n")
    output_file.write("- [Defines](#Defines)\n\n")

def print_ir_ops():
    for class_key, class_value in OpClasses.items():
        output_file.write("# %s\n\n" % (class_key))
        for op in class_value:
            op_key = op[0]
            op_vals = op[1]
            output_file.write("## %s\n" % (op_key))

            output_file.write(">")
            output_file.write(op_key)

            output_file.write("\n\n")

            if ("Desc" in op_vals):
                desc = op_vals["Desc"]
                if (isinstance(desc, list)):
                    for line in desc:
                        output_file.write("%s\n\n" % line)
                else:
                    output_file.write("%s\n" % op_vals["Desc"])
            else:
                output_file.write("XXX: Missing op desc!\n")

def print_ir_defines(defines):
    output_file.write("## Defines\n")
    output_file.write("```cpp\n")
    for define in defines:
        output_file.write("%s\n" % (define))
    output_file.write("```\n")

if (len(sys.argv) < 3):
    sys.exit()

output_filename = sys.argv[2]
json_file = open(sys.argv[1], "r")
json_text = json_file.read()
json_file.close()

json_object = json.loads(json_text)
json_object = {k.upper(): v for k, v in json_object.items()}

ops = json_object["OPS"]
defines = json_object["DEFINES"]

get_ir_classes(ops, defines)

output_file = open(output_filename, "w")

print_ir_op_index()

output_file.write("# IR documentation\n\n")

print_ir_ops()

print_ir_defines(defines)

output_file.close()


================================================
FILE: FEXCore/Scripts/json_ir_generator.py
================================================
#!/bin/python3
import json
import sys
from dataclasses import dataclass, field
import textwrap

def ExitError(msg):
    print(msg)
    sys.exit(-1)

@dataclass
class IRType:
    IRName: str
    CXXName: str
    def __init__(self, IRName, CXXName):
        self.IRName = IRName
        self.CXXName = CXXName

@dataclass
class OpArgument:
    Type: str
    IsSSA: bool
    Temporary: bool
    Name: str
    NameWithPrefix: str
    DefaultInitializer: str

    def __init__(self):
        self.Type = None
        self.IsSSA = False
        self.Temporary = False
        self.Name = None
        self.NameWithPrefix = None
        self.DefaultInitializer = None
        return

    def print(self):
        attrs = vars(self)
        print(", ".join("%s: %s" % item for item in attrs.items()))

@dataclass
class OpDefinition:
    Name: str
    HasDest: bool
    DestType: str
    DestSize: str
    ElementSize: str
    OpClass: str
    HasSideEffects: bool
    ImplicitFlagClobber: bool
    RAOverride: int
    SwitchGen: bool
    ArgPrinter: bool
    SSAArgNum: int
    NonSSAArgNum: int
    DynamicDispatch: bool
    LoweredX87: bool
    JITDispatch: bool
    JITDispatchOverride: str
    TiedSource: int
    Inline: list[str]
    Arguments: list[OpArgument]
    EmitValidation: list[str]
    Desc: list[str]

    def __init__(self):
        self.Name = None
        self.HasDest = False
        self.DestType = None
        self.DestSize = None
        self.ElementSize = None
        self.OpClass = None
        self.OpSize = 0
        self.HasSideEffects = False
        self.ImplicitFlagClobber = False
        self.RAOverride = -1
        self.SwitchGen = True
        self.ArgPrinter = True
        self.SSAArgNum = 0
        self.NonSSAArgNum = 0
        self.DynamicDispatch = False
        self.LoweredX87 = False
        self.JITDispatch = True
        self.JITDispatchOverride = None
        self.TiedSource = -1
        self.Arguments = []
        self.EmitValidation = []
        self.Desc = []
        return

    def print(self):
        attrs = vars(self)
        print(", ".join("%s: %s" % item for item in attrs.items()))

IRTypesToCXX: dict[str, IRType] = {}
CXXTypeToIR: dict[str, IRType] = {}
IROps: list[OpDefinition] = []

IROpNameSet: set[str] = set()

def is_ssa_type(op_type: str):
    return op_type in {"SSA", "GPR", "GPRPair", "FPR"}

def parse_irtypes(irtypes):
    for op_key, op_val in irtypes.items():
        IRTypesToCXX[op_key] = IRType(op_key, op_val)
        CXXTypeToIR[op_val] = IRType(op_key, op_val)

def parse_ops(ops):
    for op_class, opslist in ops.items():
        for op, op_val in opslist.items():
            if "Ignore" in op_val:
                # Skip these
                continue

            OpDef = OpDefinition()

            # Check if we have a destination
            # Only happens if the IR name contains `=`
            EqualSplit = op.split("=", 1)

            RHS = EqualSplit[0].strip()
            if len(EqualSplit) > 1:
                LHS = EqualSplit[0].strip()
                RHS = EqualSplit[1].strip()

                if ":" in LHS:
                    # Named destinations. This is a hack, but so is the entire
                    # multi-destination support bolten onto the old IR...
                    #
                    # Named destinations require side effects because they break
                    # SSA hard. Validate that.
                    assert("HasSideEffects" in op_val and op_val["HasSideEffects"])

                    for Dest in LHS.split(","):
                        Dest = Dest.strip()
                        DType, Name = Dest.split(":$")

                        # If the destination appears also as a source, it is
                        # read-modify-write.
                        if Dest in RHS:
                            # Turn RMW into an in/out source
                            RHS = RHS.replace(Dest.strip(), f"{DType}:$Inout{Name}")
                        else:
                            # Turn named destinations into an out source.
                            RHS += f", {DType}:$Out{Name}"
                else:
                    # Single anonymous destination
                    if LHS not in ["SSA", "GPR", "GPRPair", "FPR"]:
                        ExitError(f"Unknown destination class type {LHS}. Needs to be one of SSA, GPR, GPRPair, FPR")

                    OpDef.HasDest = True
                    OpDef.DestType = LHS

            # IR Op needs to start with a name
            RHS = RHS.split(" ", 1)

            if len(RHS) < 1:
                ExitError("Missing IR op name. Needs to be a string")

            # Set the op name
            OpDef.Name = RHS[0]

            # Parse the arguments
            if len(RHS) > 1:
                Arguments = RHS[1].strip().split(",")
                for Argument in Arguments:
                    Argument = Argument.strip()
                    OpArg = OpArgument()

                    Split = Argument.split(":", 1)
                    if len(Split) != 2:
                        ExitError("Error parsing argument. Missing Type and name colon split")

                    # Type is the first argument
                    OpArg.Type = Split[0]

                    # Validate typing is in our type map
                    if not OpArg.Type in IRTypesToCXX:
                        ExitError("IR type {} isn't in IR type map. From IR op {}, argument {}".format(OpArg.Type, OpDef.Name, Argument))

                    # Style is the first byte of the name
                    if Split[1][0] == "#":
                        OpArg.Temporary = True
                        OpArg.IsSSA = False
                    elif Split[1][0] == "$":
                        OpArg.Temporary = False
                        OpArg.IsSSA = is_ssa_type(OpArg.Type)
                        if OpArg.IsSSA:
                            OpDef.SSAArgNum = OpDef.SSAArgNum + 1
                        else:
                            OpDef.NonSSAArgNum = OpDef.NonSSAArgNum + 1
                    else:
                        ExitError("IR Op {} missing value argument style specifier. Needs to be one of {{#, $}}".format(OpDef.Name))

                    Prefix = Split[1][0]
                    ArgName = Split[1][1:]
                    NameWithPrefix = Prefix + ArgName

                    if len(ArgName) == 0:
                        ExitError("Argument is missing variable name")

                    DefaultInit = ArgName.split("{", 1)
                    if len(DefaultInit) > 1:
                        # We have a default initializer, need to do some more work
                        # First argument will still be the argument name
                        ArgName = DefaultInit[0].strip()
                        NameWithPrefix = Prefix + ArgName
                        # Second argument will be the default initializer
                        # Since we stripped the opening curly brace then it'll end with a closing brace
                        if DefaultInit[1][-1] != "}":
                            ExitError("IR op {} Argument {} is missing closing curly brace in default initializer?".format(OpDef.Name, ArgName))

                        OpArg.DefaultInitializer = DefaultInit[1][:-1]

                    # If SSA type then we can generate validation for this op
                    if OpArg.IsSSA and OpArg.Type in {"GPR", "GPRPair", "FPR"}:
                        OpDef.EmitValidation.append(f"GetOpRegClass({ArgName}) == RegClass::Invalid || WalkFindRegClass({ArgName}) == RegClass::{OpArg.Type}")

                    OpArg.Name = ArgName
                    OpArg.NameWithPrefix = NameWithPrefix
                    OpDef.Arguments.append(OpArg)

            # Additional metadata
            if "DestSize" in op_val:
                OpDef.DestSize = op_val["DestSize"]

            if "ElementSize" in op_val:
                OpDef.ElementSize = op_val["ElementSize"]

            if len(op_class):
                OpDef.OpClass = op_class

            if "HasSideEffects" in op_val:
                OpDef.HasSideEffects = bool(op_val["HasSideEffects"])

            if "ImplicitFlagClobber" in op_val:
                OpDef.ImplicitFlagClobber = bool(op_val["ImplicitFlagClobber"])

            if "ArgPrinter" in op_val:
                OpDef.ArgPrinter = bool(op_val["ArgPrinter"])

            if "RAOverride" in op_val:
                OpDef.RAOverride = int(op_val["RAOverride"])

            if "SwitchGen" in op_val:
                OpDef.SwitchGen = op_val["SwitchGen"]

            if "EmitValidation" in op_val:
                OpDef.EmitValidation.extend(op_val["EmitValidation"])

            if "Desc" in op_val:
                OpDef.Desc = op_val["Desc"]

            if "DynamicDispatch" in op_val:
                OpDef.DynamicDispatch = bool(op_val["DynamicDispatch"])

            if "JITDispatch" in op_val:
                OpDef.JITDispatch = bool(op_val["JITDispatch"])

            if "JITDispatchOverride" in op_val:
                OpDef.JITDispatchOverride = op_val["JITDispatchOverride"]

            if "X87" in op_val:
                OpDef.LoweredX87 = op_val["X87"]

                # X87 implies !JITDispatch
                assert("JITDispatch" not in op_val)
                OpDef.JITDispatch = False

            if "TiedSource" in op_val:
                OpDef.TiedSource = op_val["TiedSource"]

            # Pad Inline out to the argument count
            OpDef.Inline = [''] * len(OpDef.Arguments)
            if "Inline" in op_val:
                Value = op_val["Inline"]
                OpDef.Inline[0:len(Value)] = Value

            # Do some fixups of the data here
            if len(OpDef.EmitValidation) != 0:
                for i in range(len(OpDef.EmitValidation)):
                    # Patch up all the argument names
                    for Arg in OpDef.Arguments:
                        # Temporary ops just replace all instances no prefix variant
                        OpDef.EmitValidation[i] = OpDef.EmitValidation[i].replace(Arg.NameWithPrefix, Arg.Name)

            #OpDef.print()

            # Error on duplicate op
            if OpDef.Name in IROpNameSet:
                ExitError("Duplicate Op defined! {}".format(OpDef.Name))

            IROps.append(OpDef)
            IROpNameSet.add(OpDef.Name)

# Print out enum values
def print_enums(enums):
    output_file.write("#ifdef IROP_ENUM\n")
    output_file.write("enum IROps : uint16_t {\n")
    for op in IROps:
        output_file.write("\tOP_{},\n" .format(op.Name.upper()))
    output_file.write("};\n")

    for name, members in enums.items():
        output_file.write(f"enum {name} {{\n")
        for member in members:
            if member:
                output_file.write(f"\t{member}\n")
            else:
                output_file.write("\n")
        output_file.write("};\n\n")

    output_file.write("#undef IROP_ENUM\n")
    output_file.write("#endif\n\n")

def print_ir_structs(defines):
    output_file.write("#ifdef IROP_STRUCTS\n")

    # Print out defines here
    for op_val in defines:
        if op_val:
            output_file.write("\t%s;\n" % op_val)
        else:
            output_file.write("\n")

    # Emit the default struct first
    output_file.write("// Default structs\n")
    output_file.write("struct __attribute__((packed)) IROp_Header {\n")
    output_file.write("\tvoid* Data[0];\n")
    output_file.write("\tIROps Op;\n\n")
    output_file.write("\tIR::OpSize Size;\n")
    output_file.write("\tIR::OpSize ElementSize;\n")

    output_file.write("\ttemplate<typename T>\n")
    output_file.write("\tT const* C() const { return reinterpret_cast<T const*>(Data); }\n")
    output_file.write("\ttemplate<typename T>\n")
    output_file.write("\tT* CW() { return reinterpret_cast<T*>(Data); }\n")

    output_file.write("\tOrderedNodeWrapper Args[0];\n")

    output_file.write("};\n\n");
    output_file.write("static_assert(sizeof(IROp_Header) == sizeof(uint32_t), \"IROp_Header should be 32-bits in size\");\n\n");

    # Now the user defined types
    output_file.write("// User defined IR Op structs\n")
    for op in IROps:
        output_file.write("struct __attribute__((packed)) IROp_{} {{\n".format(op.Name))
        output_file.write("\tIROp_Header Header;\n")

        # SSA arguments have a hard requirement to appear after the header
        if op.SSAArgNum > 0:
            output_file.write("\t// SSA arguments\n")

            # Walk the SSA arguments and place them in order of declaration
            for arg in op.Arguments:
                if arg.IsSSA:
                    output_file.write("\tOrderedNodeWrapper {};\n".format(arg.Name));

        # Non-SSA arguments are also placed in order of declaration, after SSA though
        if op.NonSSAArgNum > 0:
            output_file.write("\t// Non-SSA arguments\n")
            for arg in op.Arguments:
                if not arg.Temporary and not arg.IsSSA:
                    CType = IRTypesToCXX[arg.Type].CXXName
                    output_file.write("\t{} {};\n".format(CType, arg.Name));

        output_file.write("\tstatic constexpr IROps OPCODE = OP_{};\n".format(op.Name.upper()))


        if op.SSAArgNum > 0:
            output_file.write("\t// Get index of argument by name\n")
            SSAArg = 0
            for arg in op.Arguments:
                if arg.IsSSA:
                    output_file.write("\tstatic constexpr size_t {}_Index = {};\n".format(arg.Name, SSAArg))
                    SSAArg = SSAArg + 1


        output_file.write("};\n")

        # Add a static assert that the IR ops must be pod
        output_file.write("static_assert(std::is_trivially_copyable_v<IROp_{}>);\n".format(op.Name))
        output_file.write("static_assert(std::is_standard_layout_v<IROp_{}>);\n\n".format(op.Name))

    output_file.write("#undef IROP_STRUCTS\n")
    output_file.write("#endif\n\n")

# Print out const expression to calculate IR Op sizes
def print_ir_sizes():
    output_file.write("#ifdef IROP_SIZES\n")

    output_file.write("constexpr std::array<size_t, IROps::OP_LAST + 1> IRSizes = {\n")
    for op in IROps:
        if op.Name == "Last":
            output_file.write("\t-1ULL,\n")
        else:
            output_file.write(f"\tsizeof(IROp_{op.Name}),\n")

    output_file.write(textwrap.dedent("""
    };

    // Make sure our array maps directly to the IROps enum
    static_assert(IRSizes[IROps::OP_LAST] == -1ULL);

    [[nodiscard]] inline size_t GetSize(IROps Op) { return IRSizes[Op]; }
    [[nodiscard, gnu::const]] std::string_view const& GetName(IROps Op);
    [[nodiscard, gnu::const]] uint8_t GetArgs(IROps Op);
    [[nodiscard, gnu::const]] uint8_t GetRAArgs(IROps Op);
    [[nodiscard, gnu::const]] FEXCore::IR::RegClass GetRegClass(IROps Op);
    [[nodiscard, gnu::const]] bool HasSideEffects(IROps Op);
    [[nodiscard, gnu::const]] bool ImplicitFlagClobber(IROps Op);
    [[nodiscard, gnu::const]] bool GetHasDest(IROps Op);
    [[nodiscard, gnu::const]] bool LoweredX87(IROps Op);
    [[nodiscard, gnu::const]] int8_t TiedSource(IROps Op);

    #undef IROP_SIZES
    #endif
    """))

def print_ir_reg_classes():
    output_file.write("#ifdef IROP_REG_CLASSES_IMPL\n")

    output_file.write("constexpr std::array<FEXCore::IR::RegClass, IROps::OP_LAST + 1> IRRegClasses = {\n")
    for op in IROps:
        if op.Name == "Last":
            output_file.write("\tRegClass::Invalid,\n")
        else:
            if op.HasDest and op.DestType is None:
                ExitError("IR op {} has destination with no destination class".format(op.Name))

            if op.HasDest and op.DestType == "SSA": # Special case SSA type
                output_file.write("\tRegClass::Complex,\n")
            elif op.HasDest:
                output_file.write("\tRegClass::{},\n".format(op.DestType))
            else:
                # No destination so it has an invalid destination class
                output_file.write("\tRegClass::Invalid, // No destination\n")


    output_file.write("};\n\n")

    output_file.write("// Make sure our array maps directly to the IROps enum\n")
    output_file.write("static_assert(IRRegClasses[IROps::OP_LAST] == RegClass::Invalid);\n\n")

    output_file.write("FEXCore::IR::RegClass GetRegClass(IROps Op) { return IRRegClasses[Op]; }\n\n")

    output_file.write("#undef IROP_REG_CLASSES_IMPL\n")
    output_file.write("#endif\n\n")

# Print out the name printer implementation
def print_ir_getname():
    output_file.write("#ifdef IROP_GETNAME_IMPL\n")
    output_file.write("constexpr std::array<std::string_view const, OP_LAST + 1> IRNames = {\n")
    for op in IROps:
        output_file.write("\t\"{}\",\n".format(op.Name))

    output_file.write("};\n\n")

    output_file.write("static_assert(IRNames[OP_LAST] == \"Last\");\n\n")

    output_file.write("std::string_view const& GetName(IROps Op) {\n")
    output_file.write("  return IRNames[Op];\n")
    output_file.write("}\n")

    output_file.write("#undef IROP_GETNAME_IMPL\n")
    output_file.write("#endif\n\n")

# Print out the number of SSA args that need to be RA'd
def print_ir_getraargs():
    output_file.write("#ifdef IROP_GETRAARGS_IMPL\n")

    output_file.write("constexpr std::array<uint8_t, OP_LAST + 1> IRRAArgs = {\n")
    for op in IROps:
        SSAArgs = op.SSAArgNum

        if op.RAOverride != -1:
            if op.RAOverride > op.SSAArgNum:
                ExitError("Op {} has RA override of {} which is more than total SSA values {}. This doesn't work".format(op.Name, op.RAOverride, op.SSAArgNum))
            SSAArgs = op.RAOverride

        output_file.write("\t{},\n".format(SSAArgs))

    output_file.write("};\n\n")


    output_file.write("constexpr std::array<uint8_t, OP_LAST + 1> IRArgs = {\n")
    for op in IROps:
        SSAArgs = op.SSAArgNum
        output_file.write("\t{},\n".format(SSAArgs))

    output_file.write("};\n\n")

    output_file.write("uint8_t GetRAArgs(IROps Op) {\n")
    output_file.write("  return IRRAArgs[Op];\n")
    output_file.write("}\n")

    output_file.write("uint8_t GetArgs(IROps Op) {\n")
    output_file.write("  return IRArgs[Op];\n")
    output_file.write("}\n")

    output_file.write("#undef IROP_GETRAARGS_IMPL\n")
    output_file.write("#endif\n\n")

def print_ir_hassideeffects():
    output_file.write("#ifdef IROP_HASSIDEEFFECTS_IMPL\n")

    for prop, T in [
        ("HasSideEffects", "bool"),
        ("ImplicitFlagClobber", "bool"),
        ("LoweredX87", "bool"),
        ("TiedSource", "int8_t"),
    ]:
        output_file.write(
            f"constexpr std::array<{'uint8_t' if T == 'bool' else T}, OP_LAST + 1> {prop}_ = {{\n"
        )
        for op in IROps:
            if T == "bool":
                output_file.write(
                    "\t{},\n".format(("true" if getattr(op, prop) else "false"))
                )
            else:
                output_file.write(f"\t{getattr(op, prop)},\n")

        output_file.write("};\n\n")

        output_file.write(f"{T} {prop}(IROps Op) {{\n")
        output_file.write(f"  return {prop}_[Op];\n")
        output_file.write("}\n")

    output_file.write("#undef IROP_HASSIDEEFFECTS_IMPL\n")
    output_file.write("#endif\n\n")

def print_ir_gethasdest():
    output_file.write("#ifdef IROP_GETHASDEST_IMPL\n")

    output_file.write("constexpr std::array<bool, OP_LAST + 1> IRDest = {\n")
    for op in IROps:
        if op.HasDest:
            output_file.write("\ttrue,\n")
        else:
            output_file.write("\tfalse,\n")

    output_file.write("};\n\n")

    output_file.write("bool GetHasDest(IROps Op) {\n")
    output_file.write("  return IRDest[Op];\n")
    output_file.write("}\n")

    output_file.write("#undef IROP_GETHASDEST_IMPL\n")
    output_file.write("#endif\n\n")

# Print out IR argument printing
def print_ir_arg_printer():
    output_file.write("#ifdef IROP_ARGPRINTER_HELPER\n")
    output_file.write("switch (IROp->Op) {\n")
    for op in IROps:
        if not op.ArgPrinter:
            continue

        output_file.write("case IROps::OP_{}: {{\n".format(op.Name.upper()))

        if len(op.Arguments) != 0:
            output_file.write("\t[[maybe_unused]] auto Op = IROp->C<IR::IROp_{}>();\n".format(op.Name))
            output_file.write("\t*out << \" \";\n")

            SSAArgNum = 0
            FirstArg = True
            for arg in op.Arguments:
                # No point printing temporaries that we can't recover
                if arg.Temporary:
                    continue

                if FirstArg:
                    FirstArg = False
                else:
                    output_file.write('\t*out << ", ";\n')

                if arg.IsSSA:
                    # SSA value
                    output_file.write("\tPrintArg(out, IR, Op->Header.Args[{}]);\n".format(SSAArgNum))
                    SSAArgNum = SSAArgNum + 1
                else:
                    # User defined op that is stored
                    output_file.write("\tPrintArg(out, IR, Op->{});\n".format(arg.Name))

        output_file.write("break;\n")
        output_file.write("}\n")

    output_file.write("#undef IROP_ARGPRINTER_HELPER\n")
    output_file.write("#endif\n")

def print_validation(op):
    if len(op.EmitValidation) != 0:
        output_file.write("#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED\n")

        for Validation in op.EmitValidation:
            Sanitized = Validation.replace("\"", "\\\"")
            output_file.write("\t\tLOGMAN_THROW_A_FMT({}, \"{}\");\n".format(Validation, Sanitized))
        output_file.write("#endif\n")

# Print out IR allocator helpers
def print_ir_allocator_helpers():
    output_file.write("#ifdef IROP_ALLOCATE_HELPERS\n")

    output_file.write("\ttemplate <class T>\n")
    output_file.write("\tstruct Wrapper final {\n")
    output_file.write("\t\tT *first;\n")
    output_file.write("\t\tOrderedNode *Node; ///< Actual offset of this IR in ths list\n")
    output_file.write("\n")
    output_file.write("\t\toperator Wrapper<IROp_Header>() const { return Wrapper<IROp_Header> {reinterpret_cast<IROp_Header*>(first), Node}; }\n")
    output_file.write("\t\toperator OrderedNode *() { return Node; }\n")
    output_file.write("\t\toperator const OrderedNode *() const { return Node; }\n")
    output_file.write("\t\toperator OpNodeWrapper () const { return Node->Header.Value; }\n")
    output_file.write("\t};\n")

    output_file.write("\ttemplate <class T>\n")
    output_file.write("\tusing IRPair = Wrapper<T>;\n\n")

    output_file.write("\tIRPair<IROp_Header> AllocateRawOp(size_t HeaderSize) {\n")
    output_file.write("\t\tauto Op = reinterpret_cast<IROp_Header*>(DualListData.DataAllocate(HeaderSize));\n")
    output_file.write("\t\tmemset(Op, 0, HeaderSize);\n")
    output_file.write("\t\tOp->Op = IROps::OP_DUMMY;\n")
    output_file.write("\t\treturn IRPair<IROp_Header>{Op, CreateNode(Op)};\n")
    output_file.write("\t}\n\n")

    output_file.write("\ttemplate<class T, IROps T2>\n")
    output_file.write("\tT *AllocateOrphanOp() {\n")
    output_file.write("\t\tsize_t Size = FEXCore::IR::GetSize(T2);\n")
    output_file.write("\t\tauto Op = reinterpret_cast<T*>(DualListData.DataAllocate(Size));\n")
    output_file.write("\t\tmemset(Op, 0, Size);\n")
    output_file.write("\t\tOp->Header.Op = T2;\n")
    output_file.write("\t\treturn Op;\n")
    output_file.write("\t}\n\n")

    output_file.write("\ttemplate<class T, IROps T2>\n")
    output_file.write("\tIRPair<T> AllocateOp() {\n")
    output_file.write("\t\tsize_t Size = FEXCore::IR::GetSize(T2);\n")
    output_file.write("\t\tauto Op = reinterpret_cast<T*>(DualListData.DataAllocate(Size));\n")
    output_file.write("\t\tmemset(Op, 0, Size);\n")
    output_file.write("\t\tOp->Header.Op = T2;\n")
    output_file.write("\t\treturn IRPair<T>{Op, CreateNode(&Op->Header)};\n")
    output_file.write("\t}\n\n")

    output_file.write("\tIR::OpSize GetOpSize(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
    output_file.write("\t\treturn HeaderOp->Size;\n")
    output_file.write("\t}\n\n")

    output_file.write("\tIR::OpSize GetOpElementSize(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
    output_file.write("\t\treturn HeaderOp->ElementSize;\n")
    output_file.write("\t}\n\n")

    output_file.write("\tuint8_t GetOpElements(const OrderedNode *Op) const {\n")
    output_file.write("\t\tLOGMAN_THROW_A_FMT(OpHasDest(Op), \"Op {} has no dest\\n\", GetOpName(Op));\n")
    output_file.write("\t\treturn IR::OpSizeToSize(GetOpSize(Op)) / IR::OpSizeToSize(GetOpElementSize(Op));\n")
    output_file.write("\t}\n\n")

    output_file.write("\tbool OpHasDest(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
    output_file.write("\t\treturn GetHasDest(HeaderOp->Op);\n")
    output_file.write("\t}\n\n")

    output_file.write("\tIROps GetOpType(const OrderedNode *Op) const {\n")
    output_file.write("\t\tauto HeaderOp = Op->Header.Value.GetNode(DualListData.DataBegin());\n")
    output_file.write("\t\treturn HeaderOp->Op;\n")
    output_file.write("\t}\n\n")

    output_file.write("\tFEXCore::IR::RegClass GetOpRegClass(const OrderedNode *Op) const {\n")
    output_file.write("\t\treturn GetRegClass(GetOpType(Op));\n")
    output_file.write("\t}\n\n")

    output_file.write("\tstd::string_view const& GetOpName(const OrderedNode *Op) const {\n")
    output_file.write("\t\treturn IR::GetName(GetOpType(Op));\n")
    output_file.write("\t}\n\n")

    # Generate helpers with operands
    for op in IROps:
        if op.Name != "Last":
            output_file.write("\t///\n".join(["\t/// {}\n" .format(comment) for comment in op.Desc]))
            output_file.write("\tIRPair<IROp_{}> _{}(" .format(op.Name, op.Name))

            # Output SSA args first
            for i, arg in enumerate(op.Arguments):
                LastArg = i == len(op.Arguments) - 1

                if arg.Temporary:
                    CType = IRTypesToCXX[arg.Type].CXXName
                    output_file.write("{} {}".format(CType, arg.Name))
                elif arg.IsSSA:
                    # SSA value
                    output_file.write("OrderedNodeWrapper {}".format(arg.Name))
                else:
                    # User defined op that is stored
                    CType = IRTypesToCXX[arg.Type].CXXName
                    output_file.write("{} {}".format(CType, arg.Name))

                if arg.DefaultInitializer:
                    output_file.write(" = {}".format(arg.DefaultInitializer))

                if not LastArg:
                    output_file.write(", ")

            output_file.write(") {\n")

            # Save NZCV if needed before clobbering NZCV
            if op.ImplicitFlagClobber:
                output_file.write("\t\tSaveNZCV(IROps::OP_{});".format(op.Name.upper()))

            # We gather the "has x87?" flag as we go. This saves the user from
            # having to keep track of whether they emitted any x87.
            # Also changes the mmx state to X87.
            if op.LoweredX87:
                output_file.write("\t\tRecordX87Use();\n")
                output_file.write(
                    "\t\tif(MMXState == MMXState_MMX) ChgStateMMX_X87();\n"
                )

            output_file.write("\t\tauto _Op = AllocateOp<IROp_{}, IROps::OP_{}>();\n".format(op.Name, op.Name.upper()))

            if op.SSAArgNum != 0:
                for arg in op.Arguments:
                    if arg.IsSSA:
                        output_file.write("\t\t_Op.first->{} = {};\n".format(arg.Name, arg.Name))

            if len(op.Arguments) != 0:
                for arg in op.Arguments:
                    if not arg.Temporary and not arg.IsSSA:
                        output_file.write("\t\t_Op.first->{} = {};\n".format(arg.Name, arg.Name))

            assert not (op.HasDest and op.DestSize is None)

            # Some ops without a destination still need an operating size
            # Effectively reusing the destination size value for operation size
            if op.DestSize != None:
                output_file.write("\t\t_Op.first->Header.Size = {};\n".format(op.DestSize))

            if op.ElementSize == None:
                output_file.write("\t\t_Op.first->Header.ElementSize = _Op.first->Header.Size;\n")
            else:
                output_file.write("\t\t_Op.first->Header.ElementSize = {};\n".format(op.ElementSize))


            # Only validate here if there's no OrderedNode * version. Else
            # validation is in that version, see the comment below.
            if op.SSAArgNum == 0:
                print_validation(op)

            output_file.write("\t\treturn _Op;\n")
            output_file.write("\t}\n\n")

            # Now do the OrderedNode * version if necessary
            if op.SSAArgNum:
                output_file.write("\t///\n".join(["\t/// {}\n" .format(comment) for comment in op.Desc]))
                output_file.write("\tIRPair<IROp_{}> _{}(" .format(op.Name, op.Name))

                for i, arg in enumerate(op.Arguments):
                    LastArg = i == len(op.Arguments) - 1

                    if arg.Temporary:
                        CType = IRTypesToCXX[arg.Type].CXXName
                        output_file.write("{} {}".format(CType, arg.Name))
                    elif arg.IsSSA:
                        output_file.write("OrderedNode *{}".format(arg.Name))
                    else:
                        CType = IRTypesToCXX[arg.Type].CXXName
                        output_file.write("{} {}".format(CType, arg.Name))

                    if arg.DefaultInitializer:
                        output_file.write(" = {}".format(arg.DefaultInitializer))

                    if not LastArg:
                        output_file.write(", ")

                output_file.write(") {\n")
                output_file.write("\t\tauto ListDataBegin = DualListData.ListBegin();\n")

                idx = 0
                for arg in op.Arguments:
                    if arg.IsSSA:
                        # Inline an immediate if we can
                        inline = op.Inline[idx]
                        idx += 1

                        if inline != '':
                            Sized = "Size" in [x.Name for x in op.Arguments]
                            P = ["Size" if Sized else "OpSize::i64Bit", arg.Name]

                            # A few cases need extra info plumbed.
                            if inline == "SubtractZero":
                                P += ["Src2"]
                            elif inline == "Mem":
                                P += ["OffsetType", "OffsetScale"]
                            elif inline == "Memtso":
                                P += ["OffsetType", "OffsetScale", "true /* TSO */"]
                                inline = "Mem"

                            output_file.write(f"\t\t{arg.Name} = Inline{inline}({', '.join(P)});\n")

                        output_file.write(f"\t\t{arg.Name}->AddUse();\n")

                # Insert validation here. This is skipped for the
                # OrderedNodeWrapper version because validation can depend on
                # the OrderedNode, but that's ok in practice. Everything pre-RA
                # uses the OrderedNode version, and anything RA-onwards is
                # dubious to validate.
                print_validation(op)

                output_file.write(f"\t\treturn _{op.Name}(")
                for i, arg in enumerate(op.Arguments):
                    LastArg = i == len(op.Arguments) - 1
                    output_file.write(arg.Name)
                    if arg.IsSSA:
                        output_file.write("->Wrapped(ListDataBegin)")
                    if not LastArg:
                        output_file.write(", ")
                output_file.write(");\n")
                output_file.write("\t}\n\n")

    output_file.write("#undef IROP_ALLOCATE_HELPERS\n")
    output_file.write("#endif\n")

def print_ir_dispatcher_defs():
    output_dispatch_file.write("#ifdef IROP_DISPATCH_DEFS\n")
    for op in IROps:
        if op.Name != "Last" and op.SwitchGen and op.JITDispatch and op.JITDispatchOverride == None:
            output_dispatch_file.write("DEF_OP({});\n".format(op.Name))

    output_dispatch_file.write("#undef IROP_DISPATCH_DEFS\n")
    output_dispatch_file.write("#endif\n")

def print_ir_dispatcher_dispatch():
    output_dispatch_file.write("#ifdef IROP_DISPATCH_DISPATCH\n")
    for op in IROps:
        if op.Name != "Last" and op.JITDispatch:
            DispatchName = op.Name
            if op.JITDispatchOverride != None:
                DispatchName = op.JITDispatchOverride

            if (op.DynamicDispatch):
                output_dispatch_file.write("REGISTER_OP_RT({}, {});\n".format(op.Name.upper(), DispatchName))
            else:
                output_dispatch_file.write("REGISTER_OP({}, {});\n".format(op.Name.upper(), DispatchName))

    output_dispatch_file.write("#undef IROP_DISPATCH_DISPATCH\n")
    output_dispatch_file.write("#endif\n")


if len(sys.argv) < 4:
    ExitError("Insufficient parameters passed to script")

output_filename = sys.argv[2]
output_dispatcher_filename = sys.argv[3]

json_file = open(sys.argv[1], "r")
json_text = json_file.read()
json_file.close()

json_object = json.loads(json_text)
json_object = {k.upper(): v for k, v in json_object.items()}

enums = json_object["ENUMS"]
ops = json_object["OPS"]
irtypes = json_object["IRTYPES"]
defines = json_object["DEFINES"]

parse_irtypes(irtypes)
parse_ops(ops)

output_file = open(output_filename, "w")

print_enums(enums)
print_ir_structs(defines)
print_ir_sizes()
print_ir_reg_classes()
print_ir_getname()
print_ir_getraargs()
print_ir_hassideeffects()
print_ir_gethasdest()
print_ir_arg_printer()
print_ir_allocator_helpers()

output_file.close()

output_dispatch_file = open(output_dispatcher_filename, "w")
print_ir_dispatcher_defs()
print_ir_dispatcher_dispatch()

output_dispatch_file.close()


================================================
FILE: FEXCore/Source/CMakeLists.txt
================================================
set(MAN_DIR share/man CACHE PATH "MAN_DIR")

set(FEXCORE_BASE_SRCS
  Interface/Config/Config.cpp
  Utils/Allocator.cpp
  Utils/FileLoading.cpp
  Utils/ForcedAssert.cpp
  Utils/LogManager.cpp
  Utils/SpinWaitLock.cpp)

if (NOT MINGW)
  list(APPEND FEXCORE_BASE_SRCS
    Utils/Allocator/64BitAllocator.cpp)
endif()

set(SRCS
  Common/JitSymbols.cpp
  Interface/Context/Context.cpp
  Interface/Core/LookupCache.cpp
  Interface/Core/CodeCache.cpp
  Interface/Core/Core.cpp
  Interface/Core/CPUBackend.cpp
  Interface/Core/Addressing.cpp
  Interface/Core/CPUID.cpp
  Interface/Core/Frontend.cpp
  Interface/Core/OpcodeDispatcher/AVX_128.cpp
  Interface/Core/OpcodeDispatcher/Crypto.cpp
  Interface/Core/OpcodeDispatcher/Flags.cpp
  Interface/Core/OpcodeDispatcher/Vector.cpp
  Interface/Core/OpcodeDispatcher/X87.cpp
  Interface/Core/OpcodeDispatcher/X87F64.cpp
  Interface/Core/OpcodeDispatcher.cpp
  Interface/Core/ArchHelpers/Arm64Emitter.cpp
  Interface/Core/Dispatcher/Dispatcher.cpp
  Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
  Interface/Core/Interpreter/Fallbacks/StringCompareFallbacks.cpp
  Interface/Core/JIT/JIT.cpp
  Interface/Core/JIT/ALUOps.cpp
  Interface/Core/JIT/AtomicOps.cpp
  Interface/Core/JIT/BranchOps.cpp
  Interface/Core/JIT/ConversionOps.cpp
  Interface/Core/JIT/EncryptionOps.cpp
  Interface/Core/JIT/MemoryOps.cpp
  Interface/Core/JIT/MiscOps.cpp
  Interface/Core/JIT/MoveOps.cpp
  Interface/Core/JIT/VectorOps.cpp
  Interface/Core/JIT/Arm64Relocations.cpp
  Interface/Core/X86Tables/BaseTables.cpp
  Interface/Core/X86Tables/DDDTables.cpp
  Interface/Core/X86Tables/H0F38Tables.cpp
  Interface/Core/X86Tables/H0F3ATables.cpp
  Interface/Core/X86Tables/PrimaryGroupTables.cpp
  Interface/Core/X86Tables/SecondaryGroupTables.cpp
  Interface/Core/X86Tables/SecondaryModRMTables.cpp
  Interface/Core/X86Tables/SecondaryTables.cpp
  Interface/Core/X86Tables/VEXTables.cpp
  Interface/Core/X86Tables/X87Tables.cpp
  Interface/GDBJIT/GDBJIT.cpp
  Interface/IR/IRDumper.cpp
  Interface/IR/IREmitter.cpp
  Interface/IR/PassManager.cpp
  Interface/IR/Passes/IRDumperPass.cpp
  Interface/IR/Passes/IRValidation.cpp
  Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
  Interface/IR/Passes/RegisterAllocationPass.cpp
  Interface/IR/Passes/x87StackOptimizationPass.cpp
  Utils/LongJump.cpp
  Utils/Telemetry.cpp
  Utils/Threads.cpp
  Utils/Profiler.cpp)

if (ARCHITECTURE_arm64)
  list(APPEND SRCS Utils/ArchHelpers/Arm64.cpp)
else()
  list(APPEND SRCS Utils/ArchHelpers/Arm64_stubs.cpp)
endif()

if (ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
  list(APPEND FEXCORE_BASE_SRCS
    Utils/AllocatorOverride.cpp)
endif()

set(DEFINES -DJIT_ARM64)

if (ARCHITECTURE_x86_64)
  list(APPEND DEFINES -DARCHITECTURE_x86_64=1)
endif()

if (ARCHITECTURE_arm64)
  list(APPEND DEFINES -DARCHITECTURE_arm64=1)
endif()

if (ENABLE_VIXL_DISASSEMBLER)
  list(APPEND DEFINES -DVIXL_DISASSEMBLER=1)
endif()

if (ENABLE_ZYDIS)
  list(APPEND DEFINES -DZYDIS_DISASSEMBLER=1)
endif()

if (ARCHITECTURE_arm64 AND HAS_CLANG_PRESERVE_ALL)
  list(APPEND DEFINES "-DFEXCORE_PRESERVE_ALL_ATTR=__attribute__((preserve_all));-DFEXCORE_HAS_PRESERVE_ALL_ATTR=1")
else()
  list(APPEND DEFINES "-DFEXCORE_PRESERVE_ALL_ATTR=;-DFEXCORE_HAS_PRESERVE_ALL_ATTR=0")
endif()

set(LIBS fmt::fmt xxHash::xxhash FEXHeaderUtils CodeEmitter cephes_128bit)

if (ENABLE_VIXL_DISASSEMBLER OR ENABLE_VIXL_SIMULATOR)
  list(APPEND LIBS vixl::vixl)
endif()

if (ENABLE_ZYDIS)
  list(APPEND LIBS Zydis::Zydis)
endif()

if (NOT MINGW)
  list(APPEND LIBS dl)
else()
  list(APPEND LIBS synchronization)
  if (ARCHITECTURE_arm64ec)
    list(APPEND LIBS mincore)
  endif()
endif()

if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
  # GCC requires libatomic to use 128-bit atomics
  list(APPEND LIBS atomic)
endif()

# Generate config
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/Interface/Config/Config.json.in
  ${CMAKE_BINARY_DIR}/generated/Config/Config.json)

# Generate IR include file
set(OUTPUT_IR_FOLDER "${CMAKE_BINARY_DIR}/include/FEXCore/IR")
set(OUTPUT_NAME "${OUTPUT_IR_FOLDER}/IRDefines.inc")
set(OUTPUT_DISPATCHER_NAME "${OUTPUT_IR_FOLDER}/IRDefines_Dispatch.inc")
set(INPUT_NAME "${CMAKE_CURRENT_SOURCE_DIR}/Interface/IR/IR.json")

file(MAKE_DIRECTORY "${OUTPUT_IR_FOLDER}")

add_custom_command(
  OUTPUT "${OUTPUT_NAME}" "${OUTPUT_DISPATCHER_NAME}"
  DEPENDS "${INPUT_NAME}"
  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_generator.py"
  COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_generator.py"
    "${INPUT_NAME}" "${OUTPUT_NAME}" "${OUTPUT_DISPATCHER_NAME}")

set_source_files_properties(${OUTPUT_NAME} PROPERTIES GENERATED TRUE)

# Generate IR documentation
set(OUTPUT_IR_DOC "${CMAKE_BINARY_DIR}/IR.md")

add_custom_command(
  OUTPUT "${OUTPUT_IR_DOC}"
  DEPENDS "${INPUT_NAME}"
  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_doc_generator.py"
  COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/json_ir_doc_generator.py"
    "${INPUT_NAME}" "${OUTPUT_IR_DOC}")

set_source_files_properties(${OUTPUT_IR_NAME} PROPERTIES GENERATED TRUE)

# Create the target
add_custom_target(IR_INC
  DEPENDS "${OUTPUT_NAME}"
  DEPENDS "${OUTPUT_IR_DOC}")

# Generate the configuration include file
set(OUTPUT_CONFIG_FOLDER "${CMAKE_BINARY_DIR}/include/FEXCore/Config")
set(OUTPUT_CONFIG_NAME "${OUTPUT_CONFIG_FOLDER}/ConfigValues.inl")
set(OUTPUT_CONFIG_OPTION_NAME "${OUTPUT_CONFIG_FOLDER}/ConfigOptions.inl")
set(INPUT_CONFIG_NAME "${CMAKE_BINARY_DIR}/generated/Config/Config.json")
set(OUTPUT_MAN_NAME "${CMAKE_BINARY_DIR}/generated/FEX.1")
set(OUTPUT_MAN_NAME_COMPRESS "${CMAKE_BINARY_DIR}/generated/FEX.1.gz")

file(MAKE_DIRECTORY "${OUTPUT_CONFIG_FOLDER}")

add_custom_command(
  OUTPUT "${OUTPUT_CONFIG_NAME}"
  OUTPUT "${OUTPUT_CONFIG_OPTION_NAME}"
  OUTPUT "${OUTPUT_MAN_NAME}"
  DEPENDS "${INPUT_CONFIG_NAME}"
  DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/config_generator.py"
  COMMAND "python3" "${CMAKE_CURRENT_SOURCE_DIR}/../Scripts/config_generator.py" "${INPUT_CONFIG_NAME}" "${OUTPUT_CONFIG_NAME}" "${OUTPUT_MAN_NAME}"
  "${OUTPUT_CONFIG_OPTION_NAME}")

add_custom_command(
  OUTPUT "${OUTPUT_MAN_NAME_COMPRESS}"
  DEPENDS "${OUTPUT_MAN_NAME}"
  COMMAND "gzip" "-kf9n" "${OUTPUT_MAN_NAME}")

set_source_files_properties(${OUTPUT_CONFIG_NAME} PROPERTIES
  GENERATED TRUE)
set_source_files_properties(${OUTPUT_CONFIG_OPTION_NAME} PROPERTIES
  GENERATED TRUE)

set_source_files_properties(${OUTPUT_MAN_NAME} PROPERTIES
  GENERATED TRUE)
set_source_files_properties(${OUTPUT_MAN_NAME_COMPRESS} PROPERTIES
  GENERATED TRUE)

# Create the target
add_custom_target(CONFIG_INC
  DEPENDS "${OUTPUT_CONFIG_NAME}"
  DEPENDS "${OUTPUT_CONFIG_OPTION_NAME}"
  DEPENDS "${OUTPUT_MAN_NAME}"
  DEPENDS "${OUTPUT_MAN_NAME_COMPRESS}")

if (NOT BUILD_STEAM_SUPPORT)
  # Install the compressed man page
  install(FILES ${OUTPUT_MAN_NAME_COMPRESS} COMPONENT Runtime DESTINATION ${MAN_DIR}/man1)
endif()

# Add in diagnostic colours if the option is available.
# Ninja code generator will kill colours if this isn't here
check_cxx_compiler_flag(-fdiagnostics-color=always GCC_COLOR)
check_cxx_compiler_flag(-fcolor-diagnostics CLANG_COLOR)

function(AddDefaultOptionsToTarget Name)
  set_target_properties(${Name} PROPERTIES C_VISIBILITY_PRESET hidden)
  set_target_properties(${Name} PROPERTIES CXX_VISIBILITY_PRESET hidden)
  set_target_properties(${Name} PROPERTIES VISIBILITY_INLINES_HIDDEN TRUE)
  target_include_directories(${Name} PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")

  target_include_directories(${Name} PRIVATE IncludePrivate/)
  target_include_directories(${Name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/)

  target_include_directories(${Name} PUBLIC "${PROJECT_SOURCE_DIR}/include/")
  target_include_directories(${Name} PUBLIC "${CMAKE_BINARY_DIR}/include/")

  target_compile_definitions(${Name} PRIVATE ${DEFINES})
  add_dependencies(${Name} CONFIG_INC IR_INC)

  target_compile_options(${Name} PRIVATE
    -Wall
    -Werror=cast-qual
    -Werror=ignored-qualifiers
    -Werror=implicit-fallthrough

    -Wno-trigraphs
    -ffunction-sections
    -fwrapv)

  if (GCC_COLOR)
    target_compile_options(${Name} PRIVATE "-fdiagnostics-color=always")
  endif()

  if (CLANG_COLOR)
    target_compile_options(${Name} PRIVATE "-fcolor-diagnostics")
  endif()

  LinkerGC(${Name})
  target_link_libraries(${Name} PUBLIC unordered_dense::unordered_dense)
endfunction()

# Build FEXCore_Base static library
add_library(FEXCore_Base STATIC ${FEXCORE_BASE_SRCS})
target_link_libraries(FEXCore_Base PUBLIC ${LIBS})
AddDefaultOptionsToTarget(FEXCore_Base)

if (ENABLE_FEXCORE_PROFILER AND FEXCORE_PROFILER_BACKEND STREQUAL "TRACY")
  target_link_libraries(FEXCore_Base PUBLIC TracyClient)
endif()

function(AddObject Name)
  add_library(${Name} OBJECT ${SRCS})

  target_link_libraries(${Name} PRIVATE FEXCore_Base)
  target_compile_options(${Name} PRIVATE ${FEX_TUNE_COMPILE_FLAGS})
  AddDefaultOptionsToTarget(${Name})
endfunction()

function(AddLibrary Name Type)
  add_library(${Name} ${Type} $<TARGET_OBJECTS:${PROJECT_NAME}_object>)
  set_target_properties(${Name} PROPERTIES OUTPUT_NAME FEXCore)

  # During generation of the import library (dll.a), MinGW needs some extra symbols from libraries
  # such as fmt, which are propagated by FEXCore_Base. Wonderful.
  if (MINGW)
    target_link_libraries(${Name} PRIVATE FEXCore_Base)
  endif()
  AddDefaultOptionsToTarget(${Name})
endfunction()

AddObject(${PROJECT_NAME}_object)
AddLibrary(${PROJECT_NAME} STATIC)
AddLibrary(${PROJECT_NAME}_shared SHARED)

if (NOT MINGW AND NOT BUILD_STEAM_SUPPORT)
  install(TARGETS ${PROJECT_NAME}_shared LIBRARY
    DESTINATION ${CMAKE_INSTALL_LIBDIR}
    COMPONENT Libraries)
endif()

# Meta-library to link jemalloc libraries enabled in the build configuration.
# Only needed for targets that run emulation. For others, use JemallocDummy.
add_library(JemallocLibs STATIC Utils/AllocatorHooks.cpp)
if (ENABLE_FEX_ALLOCATOR)
  target_compile_definitions(JemallocLibs PRIVATE ENABLE_FEX_ALLOCATOR=1)
  target_link_libraries(JemallocLibs PUBLIC rpmalloc)
endif()
if (ENABLE_JEMALLOC_GLIBC_ALLOC)
  set_source_files_properties(Interface/HLE/Thunks/Thunks.cpp PROPERTIES COMPILE_DEFINITIONS ENABLE_JEMALLOC_GLIBC=1)
  target_link_libraries(JemallocLibs INTERFACE FEX_jemalloc_glibc)
endif()

if (NOT MINGW)
  # Dummy project to use for host tools.
  # This overrides use of jemalloc in FEXCore with the normal glibc allocator.
  add_library(JemallocDummy STATIC Utils/AllocatorHooks.cpp)
  target_include_directories(JemallocDummy PRIVATE "${PROJECT_SOURCE_DIR}/include/")
endif()

# The shared library should always link enabled jemalloc libraries
target_link_libraries(${PROJECT_NAME}_shared PRIVATE JemallocLibs)


================================================
FILE: FEXCore/Source/Common/BitSet.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <type_traits>

namespace FEXCore {

template<typename T>
struct BitSet final {
  using ElementType = T;
  constexpr static size_t MinimumSize = sizeof(ElementType);
  constexpr static size_t MinimumSizeBits = sizeof(ElementType) * 8;

  ElementType* Memory;
  void Allocate(size_t Elements) {
    size_t AllocateSize = ToBytes(Elements);
    LOGMAN_THROW_A_FMT((AllocateSize * MinimumSize) >= Elements, "Fail");
    Memory = static_cast<ElementType*>(FEXCore::Allocator::malloc(AllocateSize));
  }
  void Realloc(size_t Elements) {
    size_t AllocateSize = ToBytes(Elements);
    LOGMAN_THROW_A_FMT((AllocateSize * MinimumSize) >= Elements, "Fail");
    Memory = static_cast<ElementType*>(FEXCore::Allocator::realloc(Memory, AllocateSize));
  }
  void Free() {
    FEXCore::Allocator::free(Memory);
    Memory = nullptr;
  }
  bool Get(T Element) {
    return (Memory[Element / MinimumSizeBits] & (1ULL << (Element % MinimumSizeBits))) != 0;
  }
  void Set(T Element) {
    Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits));
  }
  void Clear(T Element) {
    Memory[Element / MinimumSizeBits] &= (1ULL << (Element % MinimumSizeBits));
  }
  void MemClear(size_t Elements) {
    memset(Memory, 0, ToBytes(Elements));
  }
  void MemSet(size_t Elements) {
    memset(Memory, 0xFF, ToBytes(Elements));
  }
  uint32_t ToBytes(size_t Elements) {
    return AlignUp(Elements, MinimumSizeBits) / MinimumSize;
  }

  // This very explicitly doesn't let you take an address
  // Is only a getter
  bool operator[](T Element) {
    return Get(Element);
  }
};

template<typename T>
struct BitSetView final {
  using ElementType = T;
  constexpr static size_t MinimumSize = sizeof(ElementType);
  constexpr static size_t MinimumSizeBits = sizeof(ElementType) * 8;

  ElementType* Memory;

  void GetView(BitSet<T>& Set, uint64_t ElementOffset) {
    LOGMAN_THROW_A_FMT((ElementOffset % MinimumSize) == 0, "Bitset view offset needs to be aligned to size of backing element");
    Memory = &Set.Memory[ElementOffset / MinimumSizeBits];
  }

  bool Get(T Element) {
    return (Memory[Element / MinimumSizeBits] & (1ULL << (Element % MinimumSizeBits))) != 0;
  }
  void Set(T Element) {
    Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits));
  }
  void Clear(T Element) {
    Memory[Element / MinimumSizeBits] &= (1ULL << (Element % MinimumSizeBits));
  }
  void MemClear(size_t Elements) {
    memset(Memory, 0, AlignUp(Elements / MinimumSizeBits, MinimumSizeBits));
  }
  void MemSet(size_t Elements) {
    memset(Memory, 0xFF, AlignUp(Elements / MinimumSizeBits, MinimumSizeBits));
  }

  // This very explicitly doesn't let you take an address
  // Is only a getter
  bool operator[](T Element) {
    return Get(Element);
  }
};

static_assert(sizeof(BitSet<uint32_t>) == sizeof(uintptr_t), "Needs to just be a pointer");
static_assert(std::is_trivially_copyable_v<BitSet<uint32_t>>, "Needs to trivially copyable");

static_assert(sizeof(BitSetView<uint32_t>) == sizeof(uintptr_t), "Needs to just be a pointer");
static_assert(std::is_trivially_copyable_v<BitSetView<uint32_t>>, "Needs to trivially copyable");

} // namespace FEXCore


================================================
FILE: FEXCore/Source/Common/JitSymbols.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/fmt.h>

#include "Common/JitSymbols.h"

#include <fcntl.h>
#include <unistd.h>

namespace FEXCore {
JITSymbols::JITSymbols() {}

JITSymbols::~JITSymbols() {
  if (fd != -1) {
    close(fd);
  }
}

void JITSymbols::InitFile() {
  // We can't use FILE here since we must be robust against forking processes closing our FD from under us.
#ifdef __ANDROID__
  // Android simpleperf looks in /data/local/tmp instead of /tmp
  const auto PerfMap = fextl::fmt::format("/data/local/tmp/perf-{}.map", getpid());
#else
  const auto PerfMap = fextl::fmt::format("/tmp/perf-{}.map", getpid());
#endif
  fd = open(PerfMap.c_str(), O_CREAT | O_TRUNC | O_WRONLY | O_APPEND, 0644);
}

void JITSymbols::RegisterNamedRegion(const void* HostAddr, uint32_t CodeSize, std::string_view Name) {
  if (fd == -1) {
    return;
  }

  // Linux perf format is very straightforward
  // `<HostPtr> <Size> <Name>\n`
  const auto Buffer = fextl::fmt::format("{} {:x} {}\n", HostAddr, CodeSize, Name);
  auto Result = write(fd, Buffer.c_str(), Buffer.size());
  if (Result == -1 && errno == EBADF) {
    fd = -1;
  }
}

void JITSymbols::RegisterJITSpace(const void* HostAddr, uint32_t CodeSize) {
  if (fd == -1) {
    return;
  }

  // Linux perf format is very straightforward
  // `<HostPtr> <Size> <Name>\n`
  const auto Buffer = fextl::fmt::format("{} {:x} FEXJIT\n", HostAddr, CodeSize);
  auto Result = write(fd, Buffer.c_str(), Buffer.size());
  if (Result == -1 && errno == EBADF) {
    fd = -1;
  }
}

// Buffered JIT symbols.
void JITSymbols::Register(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint64_t GuestAddr, uint32_t CodeSize) {
  if (fd == -1) {
    return;
  }

  // Calculate remaining sizes.
  const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset;
  const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset];

  // Linux perf format is very straightforward
  // `<HostPtr> <Size> <Name>\n`
  const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} JIT_0x{:x}_{}\n", HostAddr, CodeSize, GuestAddr, HostAddr);
  if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) {
    // Couldn't fit, need to force a write.
    WriteBuffer(Buffer, true);
    // Rerun
    Register(Buffer, HostAddr, GuestAddr, CodeSize);
    return;
  }

  Buffer->Offset += FMTResult.size;
  WriteBuffer(Buffer);
}

void JITSymbols::Register(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name, uintptr_t Offset) {
  if (fd == -1) {
    return;
  }

  // Calculate remaining sizes.
  const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset;
  const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset];

  // Linux perf format is very straightforward
  // `<HostPtr> <Size> <Name>\n`
  const auto FMTResult =
    fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}+0x{:x} ({})\n", HostAddr, CodeSize, Name, Offset, HostAddr);
  if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) {
    // Couldn't fit, need to force a write.
    WriteBuffer(Buffer, true);
    // Rerun
    Register(Buffer, HostAddr, CodeSize, Name, Offset);
    return;
  }

  Buffer->Offset += FMTResult.size;
  WriteBuffer(Buffer);
}

void JITSymbols::RegisterNamedRegion(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name) {
  if (fd == -1) {
    return;
  }

  // Calculate remaining sizes.
  const auto RemainingSize = Buffer->BUFFER_SIZE - Buffer->Offset;
  const auto CurrentBufferOffset = &Buffer->Buffer[Buffer->Offset];

  // Linux perf format is very straightforward
  // `<HostPtr> <Size> <Name>\n`
  const auto FMTResult = fmt::format_to_n(CurrentBufferOffset, RemainingSize, "{} {:x} {}\n", HostAddr, CodeSize, Name);
  if (FMTResult.out >= &Buffer->Buffer[Buffer->BUFFER_SIZE]) {
    // Couldn't fit, need to force a write.
    WriteBuffer(Buffer, true);
    // Rerun
    RegisterNamedRegion(Buffer, HostAddr, CodeSize, Name);
    return;
  }

  Buffer->Offset += FMTResult.size;
  WriteBuffer(Buffer);
}

void JITSymbols::WriteBuffer(FEXCore::JITSymbolBuffer* Buffer, bool ForceWrite) {
  auto Now = std::chrono::steady_clock::now();
  if (!ForceWrite) {
    if (((Buffer->LastWrite - Now) < Buffer->MAXIMUM_THRESHOLD) && Buffer->Offset < Buffer->NEEDS_WRITE_DISTANCE) {
      // Still buffering, no need to write.
      return;
    }
  }

  Buffer->LastWrite = Now;
  auto Result = write(fd, Buffer->Buffer, Buffer->Offset);
  if (Result == -1 && errno == EBADF) {
    fd = -1;
  }

  Buffer->Offset = 0;
}
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Common/JitSymbols.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Utils/TypeDefines.h>

#include <FEXCore/fextl/memory.h>

#include <chrono>
#include <cstddef>
#include <cstdint>
#include <string_view>

namespace FEXCore {
// Buffered JIT symbol tracking.
struct JITSymbolBuffer {
  // Maximum buffer size to ensure we are a page in size.
  constexpr static size_t BUFFER_SIZE = FEXCore::Utils::FEX_PAGE_SIZE - (8 * 2);
  // Maximum distance until the end of the buffer to do a write.
  constexpr static size_t NEEDS_WRITE_DISTANCE = BUFFER_SIZE - 64;
  // Maximum time threshhold to wait before a buffer write occurs.
  constexpr static std::chrono::milliseconds MAXIMUM_THRESHOLD {100};

  JITSymbolBuffer()
    : LastWrite {std::chrono::steady_clock::now()} {}
  // stead_clock to ensure a monotonic increasing clock.
  // In highly stressed situations this can still cause >2% CPU time in vdso_clock_gettime.
  // If we need lower CPU time when JIT symbols are enabled then FEX can read the cycle counter directly.
  std::chrono::steady_clock::time_point LastWrite {};
  size_t Offset {};
  char Buffer[BUFFER_SIZE] {};
};
static_assert(sizeof(JITSymbolBuffer) == FEXCore::Utils::FEX_PAGE_SIZE, "Ensure this is one page in size");

class JITSymbols final {
public:
  JITSymbols();
  ~JITSymbols();

  void InitFile();
  void RegisterNamedRegion(const void* HostAddr, uint32_t CodeSize, std::string_view Name);
  void RegisterJITSpace(const void* HostAddr, uint32_t CodeSize);

  // Allocate JIT buffer.
  static fextl::unique_ptr<FEXCore::JITSymbolBuffer> AllocateBuffer() {
    return fextl::make_unique<FEXCore::JITSymbolBuffer>();
  }

  void Register(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint64_t GuestAddr, uint32_t CodeSize);
  void Register(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name, uintptr_t Offset);
  void RegisterNamedRegion(FEXCore::JITSymbolBuffer* Buffer, const void* HostAddr, uint32_t CodeSize, std::string_view Name);

private:
  int fd {-1};
  void WriteBuffer(FEXCore::JITSymbolBuffer* Buffer, bool ForceWrite = false);
};
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Common/SoftFloat.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/string.h>
#include "cephes_128bit.h"

#include <bit>
#include <cmath>
#include <cstring>
#include <stdint.h>

#include "Common/VectorRegType.h"

extern "C" {
#include "SoftFloat-3e/platform.h"
#include "SoftFloat-3e/softfloat.h"
}

struct FEX_PACKED X80SoftFloat {
#ifdef ARCHITECTURE_x86_64
// Define this to push some operations to x87
// Only useful to see if precision loss is killing something
// #define DEBUG_X86_FLOAT
#ifdef DEBUG_X86_FLOAT
#define BIGFLOAT long double
#define BIGFLOATSIZE 10
#else
#define BIGFLOAT float128_t
#define BIGFLOATSIZE 16
#endif
#elif defined(ARCHITECTURE_arm64)
#define BIGFLOAT float128_t
#define BIGFLOATSIZE 16
#else
#error No 128bit float for this target!
#endif

  uint64_t Significand;
  union {
    uint16_t Raw;
    struct {
      uint16_t Exponent : 15;
      uint16_t Sign     : 1;
    };
  } Top;

  X80SoftFloat() {
    memset(this, 0, sizeof(*this));
  }
  X80SoftFloat(uint16_t _Sign, uint16_t _Exponent, uint64_t _Significand)
    : Significand {_Significand}
    , Top {.Raw = static_cast<uint16_t>((_Exponent & 0x7FFF) | (_Sign << 15))} {}

  fextl::string str() const {
    fextl::ostringstream string;
    string << std::hex << Top.Sign;
    string << "_" << Top.Exponent;
    string << "_" << (Significand >> 63);
    string << "_" << (Significand & ((1ULL << 63) - 1));
    return string.str();
  }

  // Ops
  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FADD(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    faddp;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    return extF80_add(state, lhs, rhs);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FSUB(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fsubp;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    return extF80_sub(state, lhs, rhs);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FMUL(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fmulp;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    return extF80_mul(state, lhs, rhs);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FDIV(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fdivp;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    return extF80_div(state, lhs, rhs);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FREM(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#if defined(DEBUG_X86_FLOAT)
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fprem;
    fstpt %[result];
    ffreep %%st(0);
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    /*
     * Check for invalid operation cases first - Intel FPREM sets Invalid Operation
     * for several cases including infinity dividend and zero divisor.
     */
    X80SoftFloat result = 0;
    if (HandleInfinityOp(state, lhs, result)) {
      return result;
    } else if (lhs.Top.Exponent == 0x7FFF && (lhs.Significand & 0x7FFFFFFFFFFFFFFFULL)) { // NaN
      // propagate NaN
      state->exceptionFlags |= softfloat_flag_invalid;
      return lhs;
    }

    // Check for zero divisor - fprem(x, 0) is invalid operation
    if (rhs.Top.Exponent == 0 && rhs.Significand == 0) {
      state->exceptionFlags |= softfloat_flag_invalid;
      // Return QNaN
      result.Top.Sign = 0;
      result.Top.Exponent = 0x7FFF;
      result.Significand = 0xC000000000000000ULL;
      return result;
    }

    /*
     * FPREM is not an IEEE-754 remainder.  From the Intel spec:
     *
     *    Computes the remainder obtained from dividing the value in the ST(0)
     *    register (the dividend) by the value in the ST(1) register (the divisor
     *    or modulus), and stores the result in ST(0). The remainder represents the
     *    following value:
     *
     *    Remainder := ST(0) − (Q * ST(1))
     *
     *    Here, Q is an integer value that is obtained by truncating the
     *    floating-point number quotient of [ST(0) / ST(1)] toward zero.
     *
     * We implement this sequence literally. softfloat_round_minMag means
     * "truncate towards zero".
     */
    extFloat80_t quotient = extF80_div(state, lhs, rhs);
    extFloat80_t Q = extF80_roundToInt(state, quotient, softfloat_round_minMag, true);
    bool Q_zero = Q.signif == 0 && (Q.signExp & ~(1 << 15)) == 0;

    if (Q_zero) {
      return lhs;
    } else {
      return extF80_sub(state, lhs, extF80_mul(state, Q, rhs));
    }
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FREM1(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#if defined(DEBUG_X86_FLOAT)
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fprem1;
    fstpt %[result];
    ffreep %%st(0);
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    return extF80_rem(state, lhs, rhs);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FRNDINT(softfloat_state* state, const X80SoftFloat& lhs) {
    return extF80_roundToInt(state, lhs, state->roundingMode, false);
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FRNDINT(softfloat_state* state, const X80SoftFloat& lhs, uint_fast8_t RoundMode) {
    return extF80_roundToInt(state, lhs, RoundMode, false);
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FXTRACT_SIG(const X80SoftFloat& lhs) {
#if defined(DEBUG_X86_FLOAT)
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fxtract;
    fstpt %[result];
    ffreep %%st(0);
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st", "st(1)");

    return Result;
#else
    // Zero is a special case, the significand for +/- 0 is +/- zero.
    if (lhs.Top.Exponent == 0x0 && lhs.Significand == 0x0) {
      return lhs;
    }
    X80SoftFloat Tmp = lhs;
    Tmp.Top.Exponent = 0x3FFF;
    Tmp.Top.Sign = lhs.Top.Sign;
    return Tmp;
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FXTRACT_EXP(const X80SoftFloat& lhs) {
#if defined(DEBUG_X86_FLOAT)
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fxtract;
    ffreep %%st(0);
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st", "st(1)");

    return Result;
#else
    // Zero is a special case, the exponent is always -inf
    if (lhs.Top.Exponent == 0x0 && lhs.Significand == 0x0) {
      X80SoftFloat Result(1, 0x7FFFUL, 0x8000'0000'0000'0000UL);
      return Result;
    }

    int32_t TrueExp = lhs.Top.Exponent - ExponentBias;
    return i32_to_extF80(TrueExp);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static void
  FCMP(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs, bool* eq, bool* lt, bool* nan) {
    *eq = extF80_eq(state, lhs, rhs);
    *lt = extF80_lt(state, lhs, rhs);

    // Use IEEE 754 semantics: unordered if neither <, =, nor > is true
    // This is more reliable than custom NaN detection
    bool gt = !(*eq) && !(*lt) && extF80_le(state, rhs, lhs);
    *nan = !(*eq) && !(*lt) && !gt;
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FSCALE(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st1
    fldt %[lhs]; # st0
    fscale; # st0 = st0 * 2^(rdint(st1))
    fstpt %[result];
    ffreep %%st(0);
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    extFloat80_t Zero {0, 0};
    if (extF80_eq(state, lhs, Zero)) {
      return lhs;
    }
    X80SoftFloat Int = FRNDINT(state, rhs, softfloat_round_minMag);
    BIGFLOAT Src2_d = Int.ToFMax(state);
    Src2_d = FEXCore::cephes_128bit::exp2l(Src2_d);
    X80SoftFloat Src2_X80(state, Src2_d);
    X80SoftFloat Result = extF80_mul(state, lhs, Src2_X80);
    return Result;
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat F2XM1(softfloat_state* state, const X80SoftFloat& lhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    f2xm1; # st0 = 2^st(0) - 1
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st");

    return Result;
#else
    auto Src1_d = lhs.ToFMax(state);
    auto Result = FEXCore::cephes_128bit::exp2l(Src1_d);

    static const float128_t one {0x0ULL, 0x3fff000000000000ULL};
    return X80SoftFloat(state, f128_sub(state, Result, one));
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FYL2X(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[rhs]; # st(1)
    fldt %[lhs]; # st(0)
    fyl2x; # st(1) * log2l(st(0))
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    auto Src1_d = lhs.ToFMax(state);
    auto Src2_d = rhs.ToFMax(state);

    auto Tmp = f128_mul(state, Src2_d, FEXCore::cephes_128bit::log2l(Src1_d));
    return X80SoftFloat(state, Tmp);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FATAN(softfloat_state* state, const X80SoftFloat& lhs, const X80SoftFloat& rhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs];
    fldt %[rhs];
    fpatan;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs), [rhs] "m"(rhs)
        : "st", "st(1)");

    return Result;
#else
    BIGFLOAT Src1_d = lhs.ToFMax(state);
    BIGFLOAT Src2_d = rhs.ToFMax(state);
    BIGFLOAT Tmp = FEXCore::cephes_128bit::atan2l(Src1_d, Src2_d);
    return X80SoftFloat(state, Tmp);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FTAN(softfloat_state* state, const X80SoftFloat& lhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fptan;
    ffreep %%st(0);
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st");

    return Result;
#else
    X80SoftFloat result;
    if (HandleInfinityOp(state, lhs, result)) {
      return result;
    }

    BIGFLOAT Src_d = lhs.ToFMax(state);
    Src_d = FEXCore::cephes_128bit::tanl(Src_d);
    return X80SoftFloat(state, Src_d);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FSIN(softfloat_state* state, const X80SoftFloat& lhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fsin;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st");

    return Result;
#else
    X80SoftFloat result;
    if (HandleInfinityOp(state, lhs, result)) {
      return result;
    }

    BIGFLOAT Src_d = lhs.ToFMax(state);
    Src_d = FEXCore::cephes_128bit::sinl(Src_d);
    return X80SoftFloat(state, Src_d);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FCOS(softfloat_state* state, const X80SoftFloat& lhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fcos;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st");

    return Result;
#else
    X80SoftFloat result;
    if (HandleInfinityOp(state, lhs, result)) {
      return result;
    }

    BIGFLOAT Src_d = lhs.ToFMax(state);
    Src_d = FEXCore::cephes_128bit::cosl(Src_d);
    return X80SoftFloat(state, Src_d);
#endif
  }

  FEXCORE_PRESERVE_ALL_ATTR static X80SoftFloat FSQRT(softfloat_state* state, const X80SoftFloat& lhs) {
#ifdef DEBUG_X86_FLOAT
    BIGFLOAT Result;
    asm(R"(
    fninit;
    fldt %[lhs]; # st0
    fsqrt;
    fstpt %[result];
    )"
        : [result] "=m"(Result)
        : [lhs] "m"(lhs)
        : "st");

    return Result;
#else
    return extF80_sqrt(state, lhs);
#endif
  }

  float ToF32(softfloat_state* state) const {
    const float32_t Result = extF80_to_f32(state, *this);
    return std::bit_cast<float>(Result);
  }

  double ToF64(softfloat_state* state) const {
    const float64_t Result = extF80_to_f64(state, *this);
    return std::bit_cast<double>(Result);
  }

  FEXCore::VectorRegType ToVector() const {
    FEXCore::VectorRegType Ret {};
    memcpy(&Ret, this, sizeof(*this));
    return Ret;
  }

  BIGFLOAT ToFMax(softfloat_state* state) const {
#if BIGFLOATSIZE == 16
    const float128_t Result = extF80_to_f128(state, *this);
    return std::bit_cast<BIGFLOAT>(Result);
#else
    BIGFLOAT result {};
    memcpy(&result, this, sizeof(result));
    return result;
#endif
  }

  int16_t ToI16(softfloat_state* state) const {
    auto rv = extF80_to_i32(state, *this, state->roundingMode, false);
    if (rv > INT16_MAX || rv < INT16_MIN) {
      ///< Indefinite value for 16-bit conversions.
      return INT16_MIN;
    } else {
      return rv;
    }
  }

  int32_t ToI32(softfloat_state* state) const {
    return extF80_to_i32(state, *this, state->roundingMode, false);
  }

  int64_t ToI64(softfloat_state* state) const {
    return extF80_to_i64(state, *this, state->roundingMode, false);
  }

  uint64_t ToUI64(softfloat_state* state) const {
    return extF80_to_ui64(state, *this, state->roundingMode, false);
  }

  void operator=(const int16_t rhs) {
    *this = i32_to_extF80(rhs);
  }

  void operator=(const int32_t rhs) {
    *this = i32_to_extF80(rhs);
  }

  void operator=(const uint64_t rhs) {
    *this = ui64_to_extF80(rhs);
  }

#if BIGFLOATSIZE == 10
  void operator=(const long double rhs) {
    memcpy(this, &rhs, sizeof(rhs));
  }
#endif

  operator void*() {
    return reinterpret_cast<void*>(this);
  }

  X80SoftFloat(extFloat80_t rhs) {
    Significand = rhs.signif;
    Top.Raw = rhs.signExp;
  }

  X80SoftFloat(softfloat_state* state, const float rhs) {
    *this = f32_to_extF80(state, std::bit_cast<float32_t>(rhs));
  }

  X80SoftFloat(softfloat_state* state, const double rhs) {
    *this = f64_to_extF80(state, std::bit_cast<float64_t>(rhs));
  }

  X80SoftFloat(softfloat_state* state, BIGFLOAT rhs) {
#if BIGFLOATSIZE == 16
    *this = f128_to_extF80(state, std::bit_cast<float128_t>(rhs));
#else
    *this = std::bit_cast<long double>(rhs);
#endif
  }

  X80SoftFloat(const int16_t rhs) {
    *this = i32_to_extF80(rhs);
  }

  X80SoftFloat(const int32_t rhs) {
    *this = i32_to_extF80(rhs);
  }

  X80SoftFloat(const FEXCore::VectorRegType rhs) {
    memcpy(this, &rhs, sizeof(*this));
  }

  void operator=(extFloat80_t rhs) {
    Significand = rhs.signif;
    Top.Raw = rhs.signExp;
  }

  operator FEXCore::VectorRegType() const {
    return ToVector();
  }

  operator extFloat80_t() const {
    extFloat80_t Result {};
    Result.signif = Significand;
    Result.signExp = Top.Raw;
    return Result;
  }

  static bool IsNan(const X80SoftFloat& lhs) {
    return (lhs.Top.Exponent == 0x7FFF) && (lhs.Significand & IntegerBit) && (lhs.Significand & Bottom62Significand);
  }

  static bool SignBit(const X80SoftFloat& lhs) {
    return lhs.Top.Sign;
  }

private:
  static constexpr uint64_t IntegerBit = (1ULL << 63);
  static constexpr uint64_t Bottom62Significand = ((1ULL << 62) - 1);
  static constexpr uint32_t ExponentBias = 16383;

  // Helper function to check for infinity and set invalid operation flag.
  // Returns true if infinity is dealt with, false otherwise.
  FEXCORE_PRESERVE_ALL_ATTR static bool HandleInfinityOp(softfloat_state* state, const X80SoftFloat& arg, X80SoftFloat& result) {
    if (arg.Top.Exponent == 0x7FFF && arg.Significand == 0x8000000000000000ULL) {
      state->exceptionFlags |= softfloat_flag_invalid;
      // Return QNaN.
      result.Top.Sign = 0;
      result.Top.Exponent = 0x7FFF;
      result.Significand = 0xC000000000000000ULL;
      return true;
    }
    return false;
  }
};

static_assert(sizeof(X80SoftFloat) == 10, "tword must be 10bytes in size");


================================================
FILE: FEXCore/Source/Common/StringConv.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>

#include <concepts>
#include <string_view>

namespace FEXCore::StrConv {
template<std::integral T>
bool Conv(std::string_view Value, T* Result) {
  if constexpr (std::is_signed_v<T>) {
    *Result = static_cast<T>(std::strtoll(Value.data(), nullptr, 0));
  } else {
    *Result = static_cast<T>(std::strtoull(Value.data(), nullptr, 0));
  }
  return true;
}

template<typename T, typename = std::enable_if_t<std::is_enum_v<T>, T>>
bool Conv(std::string_view Value, T* Result) {
  *Result = static_cast<T>(std::strtoull(Value.data(), nullptr, 0));
  return true;
}

inline bool Conv(std::string_view Value, fextl::string* Result) {
  *Result = Value;
  return true;
}
} // namespace FEXCore::StrConv


================================================
FILE: FEXCore/Source/Common/VectorRegType.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#ifdef ARCHITECTURE_x86_64
#include <xmmintrin.h>
#include <immintrin.h>
#else
#include <cstdint>
#endif

namespace FEXCore {
struct VectorScalarF64Pair {
  double val[2];
};

#ifdef ARCHITECTURE_arm64
// Can't use uint8x16_t directly from arm_neon.h here.
// Overrides softfloat-3e's defines which causes problems.
#ifdef __clang__
using VectorRegType = __attribute__((neon_vector_type(16))) uint8_t;
#else
using VectorRegType = __attribute__((vector_size(16))) uint8_t;
#endif
struct VectorRegPairType {
  VectorRegType val[2];
};

static inline VectorRegPairType MakeVectorRegPair(VectorRegType low, VectorRegType high) {
  return VectorRegPairType {low, high};
}

#elif defined(ARCHITECTURE_x86_64)
using VectorRegType = __m128i;
using VectorRegPairType = __m256i;

static inline VectorRegPairType MakeVectorRegPair(VectorRegType low, VectorRegType high) {
  return _mm256_set_m128i(high, low);
}
#endif
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Config/Config.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/StringConv.h"
#include "FEXCore/Utils/EnumUtils.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/StringUtils.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <array>
#include <cstdlib>
#include <optional>
#include <stddef.h>
#include <stdint.h>
#include <string_view>
#include <type_traits>
#include <utility>

namespace FEXCore::Context {
class Context;
}

namespace FEXCore::Config {
namespace detail {
#define P(x) x
#define OPT_BASE(type, group, enum, json, default) const P(type) P(enum) = P(default);
#define OPT_STR(group, enum, json, default) const std::string_view P(enum) = P(default);
#define OPT_STRARRAY(group, enum, json, default) OPT_STR(group, enum, json, default)
#define OPT_STRENUM(group, enum, json, default) const uint64_t P(enum) = FEXCore::ToUnderlying(P(default));
#include <FEXCore/Config/ConfigValues.inl>
} // namespace detail

enum Paths {
  PATH_DATA_DIR_LOCAL = 0,
  PATH_DATA_DIR_GLOBAL,
  PATH_CONFIG_DIR_LOCAL,
  PATH_CONFIG_DIR_GLOBAL,
  PATH_CONFIG_FILE_LOCAL,
  PATH_CONFIG_FILE_GLOBAL,
  PATH_CONFIG_TELEMETRY_FOLDER,
  PATH_LAST,
};
static std::array<fextl::string, Paths::PATH_LAST> Paths;

void SetDataDirectory(const std::string_view Path, bool Global) {
  Paths[PATH_DATA_DIR_LOCAL + Global] = Path;
}

void SetConfigDirectory(const std::string_view Path, bool Global) {
  Paths[PATH_CONFIG_DIR_LOCAL + Global] = Path;
}

void SetConfigFileLocation(const std::string_view Path, bool Global) {
  Paths[PATH_CONFIG_FILE_LOCAL + Global] = Path;
}

const fextl::string& GetTelemetryDirectory() {
  auto& Path = Paths[PATH_CONFIG_TELEMETRY_FOLDER];
  if (Path.empty()) {
    FEX_CONFIG_OPT(TelemetryDirectory, TELEMETRYDIRECTORY);
    if (!TelemetryDirectory().empty()) {
      Path = TelemetryDirectory;
      Path += "/";
    } else {
      Path = Config::GetDataDirectory(false) + "Telemetry/";
    }
  }

  return Path;
}

const fextl::string& GetDataDirectory(bool Global) {
  return Paths[PATH_DATA_DIR_LOCAL + Global];
}

const fextl::string& GetConfigDirectory(bool Global) {
  return Paths[PATH_CONFIG_DIR_LOCAL + Global];
}

const fextl::string& GetConfigFileLocation(bool Global) {
  return Paths[PATH_CONFIG_FILE_LOCAL + Global];
}

fextl::string GetApplicationConfig(const std::string_view Program, bool Global) {
  fextl::string ConfigFile = GetConfigDirectory(Global);

  if (!Global && !FHU::Filesystem::Exists(ConfigFile) && !FHU::Filesystem::CreateDirectories(ConfigFile)) {
    LogMan::Msg::DFmt("Couldn't create config directory: '{}'", ConfigFile);
    // Let's go local in this case
    return fextl::fmt::format("./{}.json", Program);
  }

  ConfigFile += "AppConfig/";

  // Attempt to create the local folder if it doesn't exist
  if (!Global && !FHU::Filesystem::Exists(ConfigFile) && !FHU::Filesystem::CreateDirectories(ConfigFile)) {
    // Let's go local in this case
    return fextl::fmt::format("./{}.json", Program);
  }

  return fextl::fmt::format("{}{}.json", ConfigFile, Program);
}

static fextl::map<FEXCore::Config::LayerType, fextl::unique_ptr<FEXCore::Config::Layer>> ConfigLayers;
class MetaLayer;
static FEXCore::Config::MetaLayer* Meta {};

constexpr std::array<FEXCore::Config::LayerType, 10> LoadOrder = {
  FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN,      FEXCore::Config::LayerType::LAYER_MAIN,
  FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP, FEXCore::Config::LayerType::LAYER_GLOBAL_APP,
  FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP,  FEXCore::Config::LayerType::LAYER_LOCAL_APP,
  FEXCore::Config::LayerType::LAYER_ARGUMENTS,        FEXCore::Config::LayerType::LAYER_USER_OVERRIDE,
  FEXCore::Config::LayerType::LAYER_ENVIRONMENT,      FEXCore::Config::LayerType::LAYER_TOP};

Layer::Layer(const LayerType _Type)
  : Type {_Type} {}

Layer::~Layer() {}

class MetaLayer final : public FEXCore::Config::Layer {
public:
  MetaLayer(const LayerType _Type)
    : FEXCore::Config::Layer(_Type) {}
  ~MetaLayer() {}
  void Load();

  template<typename T>
  requires (!std::is_same_v<fextl::string, T> && !std::is_same_v<StringArrayType, T>)
  std::optional<T> GetConv(ConfigOption Option) {
    const auto it = OptionMap.find(Option);
    if (it == OptionMap.end()) {
      return std::nullopt;
    }

    const auto& Value = it->second;
    LOGMAN_THROW_A_FMT(!std::holds_alternative<StringArrayType>(Value), "Tried to get config of invalid type!");

    if (std::holds_alternative<T>(Value)) [[likely]] {
      return std::get<T>(Value);
    }

    T ConvertedValue;
    if (std::holds_alternative<fextl::string>(Value)) {
      const auto& StrVal = std::get<fextl::string>(Value);
      if (FEXCore::StrConv::Conv(StrVal, &ConvertedValue)) {
        // Convert the value.
        OptionMap[Option].emplace<T>(ConvertedValue);
        return ConvertedValue;
      } else {
        LOGMAN_MSG_A_FMT("Couldn't Convert {} to specified type!", StrVal);
      }
    }

    FEX_UNREACHABLE;
  }

private:
  void MergeConfigMap(const LayerOptions& Options);
  void MergeEnvironmentVariables(const ConfigOption& Option, const StringArrayType& Value);
};

void MetaLayer::Load() {
  OptionMap.clear();

  for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) {
    auto it = ConfigLayers.find(*CurrentLayer);
    if (it != ConfigLayers.end() && *CurrentLayer != Type) {
      // Merge this layer's options to this layer
      MergeConfigMap(it->second->GetOptionMap());
    }
  }
}


void MetaLayer::MergeEnvironmentVariables(const ConfigOption& Option, const StringArrayType& Value) {
  // Environment variables need a bit of additional work
  // We want to merge the arrays rather than overwrite entirely
  auto MetaEnvironment = OptionMap.find(Option);
  if (MetaEnvironment == OptionMap.end()) {
    // Doesn't exist, just insert
    OptionMap.insert_or_assign(Option, Value);
    return;
  }

  // If an environment variable exists in both current meta and in the incoming layer then the meta layer value is overwritten
  fextl::unordered_map<fextl::string, fextl::string> LookupMap;
  const auto AddToMap = [&LookupMap](const StringArrayType& Value) {
    for (const auto& EnvVar : Value) {
      const auto ItEq = EnvVar.find_first_of('=');
      if (ItEq == fextl::string::npos) {
        // Broken environment variable
        // Skip
        continue;
      }
      auto Key = fextl::string(EnvVar.begin(), EnvVar.begin() + ItEq);
      auto Value = fextl::string(EnvVar.begin() + ItEq + 1, EnvVar.end());

      // Add the key to the map, overwriting whatever previous value was there
      LookupMap.insert_or_assign(std::move(Key), std::move(Value));
    }
  };

  AddToMap(std::get<StringArrayType>(MetaEnvironment->second));
  AddToMap(Value);

  // Now with the two layers merged in the map
  // Add all the values to the option
  Erase(Option);
  for (auto& Val : LookupMap) {
    // Set will emplace multiple options in to its list
    AppendStrArrayValue(Option, Val.first + "=" + Val.second);
  }
}

void MetaLayer::MergeConfigMap(const LayerOptions& Options) {
  // Insert this layer's options, overlaying previous options that exist here
  for (auto& it : Options) {
    if (it.first == FEXCore::Config::ConfigOption::CONFIG_ENV || it.first == FEXCore::Config::ConfigOption::CONFIG_HOSTENV) {
      LOGMAN_THROW_A_FMT(std::holds_alternative<StringArrayType>(it.second), "Tried to get config of invalid type!");
      MergeEnvironmentVariables(it.first, std::get<StringArrayType>(it.second));
    } else {
      OptionMap.insert_or_assign(it.first, it.second);
    }
  }
}

void Initialize() {
  AddLayer(fextl::make_unique<MetaLayer>(FEXCore::Config::LayerType::LAYER_TOP));
  Meta = dynamic_cast<MetaLayer*>(ConfigLayers.begin()->second.get());
}

void Shutdown() {
  ConfigLayers.clear();
  Meta = nullptr;
}

void Load() {
  for (auto CurrentLayer = LoadOrder.begin(); CurrentLayer != LoadOrder.end(); ++CurrentLayer) {
    auto it = ConfigLayers.find(*CurrentLayer);
    if (it != ConfigLayers.end()) {
      it->second->Load();
    }
  }
}

fextl::string ExpandPath(const fextl::string& ContainerPrefix, const fextl::string& PathName) {
  if (PathName.empty()) {
    return {};
  }

  // Expand home if it exists
  if (FHU::Filesystem::IsRelative(PathName)) {
    fextl::string Home = getenv("HOME") ?: "";
    // Home expansion only works if it is the first character
    // This matches bash behaviour
    if (PathName.starts_with("~/")) {
      Home.append(PathName.begin() + 1, PathName.end());
      return Home;
    }

    // Expand relative path to absolute
    char ExistsTempPath[PATH_MAX];
    char* RealPath = FHU::Filesystem::Absolute(PathName.c_str(), ExistsTempPath);
    if (RealPath && FHU::Filesystem::Exists(RealPath)) {
      return RealPath;
    }

    // Only return if it exists
    if (FHU::Filesystem::Exists(PathName)) {
      return PathName;
    }
  } else {
    // If the containerprefix and pathname isn't empty
    // Then we check if the pathname exists in our current namespace
    // If the path DOESN'T exist but DOES exist with the prefix applied
    // then redirect to the prefix
    //
    // This might not be expected behaviour for some edge cases but since
    // all paths aren't mounted inside the container, then it'll be fine
    //
    // Main catch case for this is the default thunk install folders
    // HostThunks: $CMAKE_INSTALL_PREFIX/lib/fex-emu/HostThunks/
    // GuestThunks: $CMAKE_INSTALL_PREFIX/share/fex-emu/GuestThunks/
    if (!ContainerPrefix.empty() && !PathName.empty()) {
      if (!FHU::Filesystem::Exists(PathName)) {
        auto ContainerPath = ContainerPrefix + PathName;
        if (FHU::Filesystem::Exists(ContainerPath)) {
          return ContainerPath;
        }
      }
    }
  }
  return {};
}

constexpr char ContainerManager[] = "/run/host/container-manager";

fextl::string FindContainer() {
  // We only support pressure-vessel at the moment
  if (FHU::Filesystem::Exists(ContainerManager)) {
    fextl::string Manager {};
    if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) {
      // Trim the whitespace, may contain a newline
      return FEXCore::StringUtils::Trim(Manager);
    }
  }
  return {};
}

fextl::string FindContainerPrefix() {
  // We only support pressure-vessel at the moment
  if (FHU::Filesystem::Exists(ContainerManager)) {
    fextl::string Manager {};
    if (FEXCore::FileLoading::LoadFile(Manager, ContainerManager)) {
      // Trim the whitespace, may contain a newline
      if (FEXCore::StringUtils::Trim(Manager) == "pressure-vessel") {
        // We are running inside of pressure vessel
        // Our $CMAKE_INSTALL_PREFIX paths are now inside of /run/host/$CMAKE_INSTALL_PREFIX
        return "/run/host/";
      }
    }
  }
  return {};
}

void ReloadMetaLayer() {
  Meta->Load();

  const fextl::string ContainerPrefix {FindContainerPrefix()};
  auto ExpandPathIfExists = [&ContainerPrefix](FEXCore::Config::ConfigOption Config, const fextl::string& PathName) {
    const auto NewPath = ExpandPath(ContainerPrefix, PathName);
    if (!NewPath.empty()) {
      FEXCore::Config::Set(Config, NewPath);
    }
  };

  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_ROOTFS)) {
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_ROOTFS);
    const auto ExpandedString = ExpandPath(ContainerPrefix, *PathName);
    if (!ExpandedString.empty()) {
      // Adjust the path if it ended up being relative
      FEXCore::Config::Set(FEXCore::Config::CONFIG_ROOTFS, ExpandedString);
    } else if (!PathName->empty()) {
      // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder
      const auto PathNameCopy = *PathName;
      for (auto Global : {true, false}) {
        for (auto DirectoryFetchers : {GetDataDirectory, GetConfigDirectory}) {
          fextl::string NamedRootFS = DirectoryFetchers(Global) + "RootFS/" + PathNameCopy;
          if (FHU::Filesystem::Exists(NamedRootFS)) {
            FEXCore::Config::Set(FEXCore::Config::CONFIG_ROOTFS, NamedRootFS);
          }
        }
      }
    }
  }
  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKHOSTLIBS)) {
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_THUNKHOSTLIBS);
    ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKHOSTLIBS, *PathName);
  }
  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKGUESTLIBS)) {
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_THUNKGUESTLIBS);
    ExpandPathIfExists(FEXCore::Config::CONFIG_THUNKGUESTLIBS, *PathName);
  }
  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_THUNKCONFIG)) {
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_THUNKCONFIG);
    const auto ExpandedString = ExpandPath(ContainerPrefix, *PathName);
    if (!ExpandedString.empty()) {
      // Adjust the path if it ended up being relative
      FEXCore::Config::Set(FEXCore::Config::CONFIG_THUNKCONFIG, ExpandedString);
    } else if (!PathName->empty()) {
      // If the filesystem doesn't exist then let's see if it exists in the fex-emu folder
      const auto PathNameCopy = *PathName;
      for (auto Global : {true, false}) {
        for (auto DirectoryFetchers : {GetDataDirectory, GetConfigDirectory}) {
          fextl::string NamedConfig = DirectoryFetchers(Global) + "ThunkConfigs/" + PathNameCopy;
          if (FHU::Filesystem::Exists(NamedConfig)) {
            FEXCore::Config::Set(FEXCore::Config::CONFIG_THUNKCONFIG, NamedConfig);
          }
        }
      }
    }
  }
  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_OUTPUTLOG)) {
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_OUTPUTLOG);
    if (*PathName != "stdout" && *PathName != "stderr" && *PathName != "server") {
      ExpandPathIfExists(FEXCore::Config::CONFIG_OUTPUTLOG, *PathName);
    }
  }

  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_DUMPIR) && !FEXCore::Config::Exists(FEXCore::Config::CONFIG_PASSMANAGERDUMPIR)) {
    // If DumpIR is set but no PassManagerDumpIR configuration is set, then default to `afteropt`
    const auto PathName = *Meta->Get(FEXCore::Config::CONFIG_DUMPIR);
    if (*PathName != "no") {
      Set(FEXCore::Config::ConfigOption::CONFIG_PASSMANAGERDUMPIR,
          fextl::fmt::format("{}", static_cast<uint64_t>(FEXCore::Config::PassManagerDumpIR::AFTEROPT)));
    }
  }

  if (FEXCore::Config::Exists(FEXCore::Config::CONFIG_SINGLESTEP) && Meta->GetConv<bool>(FEXCore::Config::CONFIG_SINGLESTEP).value_or(false)) {
    // Single stepping also enforces single instruction size blocks
    Set(FEXCore::Config::ConfigOption::CONFIG_MAXINST, "1");
  }
}

void AddLayer(fextl::unique_ptr<FEXCore::Config::Layer> _Layer) {
  ConfigLayers.emplace(_Layer->GetLayerType(), std::move(_Layer));
}

bool Exists(ConfigOption Option) {
  return Meta->OptionExists(Option);
}

std::optional<StringArrayType*> All(ConfigOption Option) {
  return Meta->All(Option);
}

std::optional<fextl::string*> Get(ConfigOption Option) {
  return Meta->Get(Option);
}

template<typename T>
std::optional<T> GetConv(ConfigOption Option) {
  return Meta->GetConv<T>(Option);
}

template std::optional<bool> GetConv(ConfigOption Option);
template std::optional<uint8_t> GetConv(ConfigOption Option);
template std::optional<int32_t> GetConv(ConfigOption Option);
template std::optional<uint32_t> GetConv(ConfigOption Option);
template std::optional<uint64_t> GetConv(ConfigOption Option);

void Set(ConfigOption Option, std::string_view Data) {
  Meta->Set(Option, Data);
}

void Erase(ConfigOption Option) {
  Meta->Erase(Option);
}

template<typename T>
T Value<T>::GetIfExists(FEXCore::Config::ConfigOption Option, T Default) {
  auto Value = FEXCore::Config::GetConv<T>(Option);
  if (Value) {
    return *Value;
  }

  return Default;
}

template<>
fextl::string Value<fextl::string>::GetIfExists(FEXCore::Config::ConfigOption Option, fextl::string Default) {
  auto Value = FEXCore::Config::Get(Option);
  if (Value) {
    return **Value;
  } else {
    return Default;
  }
}

template<>
fextl::string Value<fextl::string>::GetIfExists(FEXCore::Config::ConfigOption Option, std::string_view Default) {
  auto Value = FEXCore::Config::Get(Option);
  if (Value) {
    return **Value;
  } else {
    return fextl::string(Default);
  }
}

template bool Value<bool>::GetIfExists(FEXCore::Config::ConfigOption Option, bool Default);
template int8_t Value<int8_t>::GetIfExists(FEXCore::Config::ConfigOption Option, int8_t Default);
template uint8_t Value<uint8_t>::GetIfExists(FEXCore::Config::ConfigOption Option, uint8_t Default);
template int16_t Value<int16_t>::GetIfExists(FEXCore::Config::ConfigOption Option, int16_t Default);
template uint16_t Value<uint16_t>::GetIfExists(FEXCore::Config::ConfigOption Option, uint16_t Default);
template int32_t Value<int32_t>::GetIfExists(FEXCore::Config::ConfigOption Option, int32_t Default);
template uint32_t Value<uint32_t>::GetIfExists(FEXCore::Config::ConfigOption Option, uint32_t Default);
template int64_t Value<int64_t>::GetIfExists(FEXCore::Config::ConfigOption Option, int64_t Default);
template uint64_t Value<uint64_t>::GetIfExists(FEXCore::Config::ConfigOption Option, uint64_t Default);

// Constructor
template Value<fextl::string>::Value(FEXCore::Config::ConfigOption _Option, fextl::string Default);
template Value<bool>::Value(FEXCore::Config::ConfigOption _Option, bool Default);
template Value<uint8_t>::Value(FEXCore::Config::ConfigOption _Option, uint8_t Default);
template Value<uint64_t>::Value(FEXCore::Config::ConfigOption _Option, uint64_t Default);

template<typename T>
void Value<T>::GetListIfExists(FEXCore::Config::ConfigOption Option, StringArrayType* List) {
  auto Value = FEXCore::Config::All(Option);
  List->clear();
  if (Value) {
    *List = **Value;
  }
}
template void Value<StringArrayType>::GetListIfExists(FEXCore::Config::ConfigOption Option, StringArrayType* List);
} // namespace FEXCore::Config


================================================
FILE: FEXCore/Source/Interface/Config/Config.json.in
================================================
{
  "Options": {
    "CPU": {
      "Multiblock": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Controls multiblock code compilation",
          "Can cause long JIT compilation times and stutter"
        ]
      },
      "MaxInst": {
        "Type": "int32",
        "Default": "5000",
        "Desc": [
          "Maximum number of instruction to store in a block"
        ]
      },
      "EnableCodeCachingWIP": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enable the code caching subsystem"
        ]
      },
      "EnableCodeCacheValidation": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enable expensive validation when loading code caches"
        ]
      },
      "HostFeatures": {
        "Type": "strenum",
        "Default": "FEXCore::Config::HostFeatures::OFF",
        "Enums": {
          "ENABLESVE": "enablesve",
          "DISABLESVE": "disablesve",
          "ENABLEAVX": "enableavx",
          "DISABLEAVX": "disableavx",
          "ENABLEAFP": "enableafp",
          "DISABLEAFP": "disableafp",
          "ENABLELRCPC": "enablelrcpc",
          "DISABLELRCPC": "disablelrcpc",
          "ENABLELRCPC2": "enablelrcpc2",
          "DISABLELRCPC2": "disablelrcpc2",
          "ENABLECSSC": "enablecssc",
          "DISABLECSSC": "disablecssc",
          "ENABLEPMULL128": "enablepmull128",
          "DISABLEPMULL128": "disablepmull128",
          "ENABLERNG": "enablerng",
          "DISABLERNG": "disablerng",
          "ENABLECLZERO": "enableclzero",
          "DISABLECLZERO": "disableclzero",
          "ENABLEATOMICS": "enableatomics",
          "DISABLEATOMICS": "disableatomics",
          "ENABLEFCMA": "enablefcma",
          "DISABLEFCMA": "disablefcma",
          "ENABLEFLAGM": "enableflagm",
          "DISABLEFLAGM": "disableflagm",
          "ENABLEFLAGM2": "enableflagm2",
          "DISABLEFLAGM2": "disableflagm2",
          "ENABLEFRINTTS": "enablefrintts",
          "DISABLEFRINTTS": "disablefrintts",
          "ENABLECRYPTO": "enablecrypto",
          "DISABLECRYPTO": "disablecrypto",
          "ENABLERPRES": "enablerpres",
          "DISABLERPRES": "disablerpres",
          "ENABLESVEBITPERM": "enablesvebitperm",
          "DISABLESVEBITPERM": "disablesvebitperm",
          "ENABLEPRESERVEALLABI": "enablepreserveallabi",
          "DISABLEPRESERVEALLABI": "disablepreserveallabi",
          "ENABLEWFXT": "enablewfxt",
          "DISABLEWFXT": "disablewfxt",
          "ENABLE3DNOW": "enable3dnow",
          "DISABLE3DNOW": "disable3dnow",
          "ENABLESSE4A": "enablesse4a",
          "DISABLESSE4A": "disablesse4a",
          "ENABLEMOPS": "enablemops",
          "DISABLEMOPS": "disablemops"
        },
        "Desc": [
          "Allows controlling of the CPU features in the JIT.",
          "\toff: Default CPU features queried from CPU features",
          "\t{enable,disable}sve: Will force enable or disable sve even if the host doesn't support it",
          "\t{enable,disable}avx: Will force enable or disable avx even if the host doesn't support it",
          "\t{enable,disable}afp: Will force enable or disable afp even if the host doesn't support it",
          "\t{enable,disable}lrcpc: Will force enable or disable lrcpc even if the host doesn't support it",
          "\t{enable,disable}lrcpc2: Will force enable or disable lrcpc2 even if the host doesn't support it",
          "\t{enable,disable}cssc: Will force enable or disable cssc even if the host doesn't support it",
          "\t{enable,disable}pmull128: Will force enable or disable pmull128 even if the host doesn't support it",
          "\t{enable,disable}rng: Will force enable or disable rng even if the host doesn't support it",
          "\t{enable,disable}clzero: Will force enable or disable clzero even if the host doesn't support it",
          "\t{enable,disable}atomics: Will force enable or disable ARMv8.1 LSE atomics even if the host doesn't support it",
          "\t{enable,disable}fcma: Will force enable or disable fcma even if the host doesn't support it",
          "\t{enable,disable}flagm: Will force enable or disable flagm even if the host doesn't support it",
          "\t{enable,disable}flagm2: Will force enable or disable flagm2 even if the host doesn't support it",
          "\t{enable,disable}crypto: Will force enable or disable crypto extensions even if the host doesn't support it",
          "\t{enable,disable}rpres: Will force enable or disable rpres even if the host doesn't support it",
          "\t{enable,disable}svebitperm: Will force enable or disable svebitperm even if the host doesn't support it",
          "\t{enable,disable}preserveallabi: Will force enable or disable preserve_all abi even if the host doesn't support it",
          "\t{enable,disable}wfxt: Will force enable or disable wfxt even if the host doesn't support it",
          "\t{enable,disable}3dnow: Will force enable or disable 3DNow! even if the host doesn't support it",
          "\t{enable,disable}sse4a: Will force enable or disable SSE4a even if the host doesn't support it",
          "\t{enable,disable}mops: Will force enable or disable FEAT_MOPS even if the host doesn't support it"
        ]
      },
      "SmallTSCScale": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Scales the cycle counter on systems that have low frequencies."
        ]
      },
      "HideHybrid": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Hides hybrid CPU core arrangement."
        ]
      },
      "CPUFeatureRegisters": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Allows overriding cpu feature flags for manual testing"
        ]
      }
    },
    "Emulation": {
      "RootFS": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Which Root filesystem prefix to use",
          "This can be a filesystem path",
          "\teg: ~/RootFS/Debian_x86_64",
          "Or this can be a name of a rootfs",
          "If the named rootfs exists in the FEX data folder then it will use that one",
          "\teg: $XDG_DATA_HOME/fex-emu/RootFS/<RootFS name>/",
          "If XDG_DATA_HOME is unset, ~/.local/share will be used in its place.",
          "\teg: $HOME/.local/share/fex-emu/RootFS/<RootFS name>/"
        ]
      },
      "ThunkHostLibs": {
        "Type": "str",
        "Default": "@CMAKE_INSTALL_FULL_LIBDIR@/fex-emu/HostThunks",
        "Desc": [
          "Folder to find the host-side thunking libraries."
        ]
      },
      "ThunkGuestLibs": {
        "Type": "str",
        "Default": "@CMAKE_INSTALL_PREFIX@/share/fex-emu/GuestThunks",
        "Desc": [
          "Folder to find the guest-side thunking libraries."
        ]
      },
      "ThunkConfig": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "A json file specifying where to overlay the thunks.",
          "This can be a filesystem path",
          "\teg: ~/MyThunkConfig.json",
          "Or this can be a named of a Thunk config file",
          "If the named config file exists in the FEX data folder folder the it will use that one",
          "\teg: $XDG_DATA_HOME/fex-emu/ThunkConfigs/<ThunkConfig name>",
          "If XDG_DATA_HOME is unset, ~/.local/share will be used in its place.",
          "\teg: $HOME/.local/share/fex-emu/ThunkConfigs/<ThunkConfig name>"
        ]
      },
      "Env": {
        "Type": "strarray",
        "Default": "",
        "Desc": [
          "Adds an environment variable to the emulated environment."
        ]
      },
      "HostEnv": {
        "Type": "strarray",
        "Default": "",
        "Desc": [
          "Adds an environment variable to the host environment.",
          "This can be useful for setting environment variables that thunks can pick up.",
          "Typically isn't necessary since the guest libc isn't thunked. But is possible."
        ]
      },
      "AdditionalArguments": {
        "Type": "strarray",
        "Default": "",
        "Desc": [
          "Allows the user to pass additional arguments to the application"
        ]
      },
      "DisableL2Cache": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Disables FEXCore's JIT L2 cache lookup. Saving memory.",
          "Can potentially introduce more stutters."
        ]
      },
      "DynamicL1Cache": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Switches FEXCore's JIT L1 cache to be dynamically sized. Saving memory.",
          "Can potentially introduce more stutters."
        ]
      },
      "DynamicL1CacheIncreaseCountHeuristic": {
        "Type": "uint64",
        "Default": "250",
        "Desc": [
          "Threshold of lookups per second that the L1 dynamic cache should increase its size.",
          "Lower numbers means more aggressive scaling upward to the maximum size.",
          "Higher numbers means more conservative scaling, using less memory.",
          "Can potentially introduce stutters, more likely the higher the number.",
          "Don't have this number smaller than the decrease count!"
        ]
      },
      "DynamicL1CacheDecreaseCountHeuristic": {
        "Type": "uint64",
        "Default": "50",
        "Desc": [
          "Threshold of lookups per second that the L1 dynamic cache should decrease its size.",
          "The higher the number, the more aggressively it reduces the L1 cache size.",
          "Lower numbers means more conservative memory savings.",
          "Can potentially introduce more stutters, more likely the higher the number.",
          "Don't have this number larger than the increase count!"
        ]
      }
    },
    "Debug": {
      "SingleStep": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Single stepping configuration."
        ]
      },
      "GdbServer": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enables the GDB server."
        ]
      },
      "DumpIR": {
        "Type": "str",
        "Default": "no",
        "Desc": [
          "Folder to dump the IR in to.",
          "[no, stdout, stderr, server, <Folder>]"
        ]
      },
      "PassManagerDumpIR": {
        "Type": "strenum",
        "Default": "FEXCore::Config::PassManagerDumpIR::OFF",
        "Enums": {
          "BEFOREOPT": "beforeopt",
          "AFTEROPT": "afteropt",
          "BEFOREPASS": "beforepass",
          "AFTERPASS": "afterpass"
        },
        "Desc": [
          "Allows controlling when FEX dumps its IR.",
          "\toff: IR dumping will be disabled",
          "\tbeforeopt: Dump IR before any optimizations",
          "\tafteropt: Dump IR after all optimizations",
          "\tbeforepass: Dump IR before every optimization pass",
          "\tafterpass: Dump IR after every optimization pass"
        ]
      },
      "DumpGPRs": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "When the test harness ends, print the GPR state."
        ]
      },
      "O0": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Disables optimizations passes for debugging."
        ]
      },
      "GlobalJITNaming": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Uses JITSymbols to name all JIT state as one symbol",
          "Useful for querying how much time is spent inside of the JIT",
          "Profiling tools will show JIT time as FEXJIT"
        ]
      },
      "LibraryJITNaming": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Uses JITSymbols to name JIT symbols grouped by library",
          "Useful for querying how much time is spent in each guest library",
          "Can be used to help guide thunk generation"
        ]
      },
      "BlockJITNaming": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Uses JITSymbols to name JIT symbols",
          "Useful for determining hot blocks of code",
          "Has some file writing overhead per JIT block"
        ]
      },
      "GDBSymbols": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Integrates with GDB using the JIT interface.",
          "Needs the fex jit loader in GDB, which can be loaded via `jit-reader-load libFEXGDBReader.so.`",
          "Also needs x86_64-linux-gnu-objdump in PATH.",
          "Can be very slow."
        ]
      },
      "InjectLibSegFault": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Sets the environment variable LD_PRELOAD=libSegFault.so",
          "This allows the user to very easily enable libSegFault without dealing with environment variables",
          "Very useful for applications that have launch scripts that set the variable to nothing at launch",
          "Set this in an application configuration for injecting in to only specific applications.",
          "\tNote: If x86/x86_64 libSegFault.so isn't installed then this option won't work."
        ]
      },
      "Disassemble": {
        "Type": "strenum",
        "Default": "FEXCore::Config::Disassemble::OFF",
        "Enums": {
          "DISPATCHER": "dispatcher",
          "BLOCKS": "blocks",
          "STATS": "stats"
        },
        "Desc": [
          "Allows controlling of the vixl disassembler for generated ARM code.",
          "\toff: No disassembly will be output",
          "\tdispatcher: Will enable disassembly of the JIT dispatcher loop",
          "\tblocks: Will enable disassembly of the translated instruction code blocks",
          "\tstats: Will print stats when disassembling the code"
        ]
      },
      "X86Disassemble": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enables x86/x86-64 guest disassembly output for compiled blocks.",
          "Requires FEX to be built with -DENABLE_ZYDIS=TRUE"
        ]
      },
      "ForceSVEWidth": {
        "Type": "uint32",
        "Default": "0",
        "Desc": [
          "Allows overriding the SVE width in the vixl simulator.",
          "Useful as a debugging feature."
        ]
      },
      "DisableTelemetry": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Disables telemetry at runtime.",
          "Useful for CI instcountCI mostly"
        ]
      }
    },
    "Logging": {
      "SilentLog": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Disables logging"
        ]
      },
      "OutputLog": {
        "Type": "str",
        "Default": "server",
        "Desc": [
          "File to write FEX output to.",
          "[stderr, server, <Filename>]"
        ]
      },
      "TelemetryDirectory": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Redirects the telemetry folder that FEX usually writes to.",
          "By default telemetry data is stored in {$FEX_APP_DATA_LOCATION,{$XDG_DATA_HOME,$HOME}/fex-emu/Telemetry/}"
        ]
      },
      "ProfileStats": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enables FEX's low-overhead sampling profile statistics.",
          "Requires a supported version of Mangohud to see the results"
        ]
      },
      "EnableGpuvisProfiling": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Enables profiling when FEX was built with the gpuvis profiler backend."
        ]
      }
    },
    "Hacks": {
      "SMCChecks": {
        "Type": "uint8",
        "Default": "FEXCore::Config::CONFIG_SMC_MTRACK",
        "TextDefault": "mtrack",
        "ArgumentHandler": "SMCCheckHandler",
        "Desc": [
          "Checks code for modification before execution.",
          "\tnone: No checks",
          "\tmtrack: Page tracking based invalidation (default)",
          "\tfull: Validate code before every run (slow)"
        ]
      },
      "TSOEnabled": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Controls TSO IR ops.",
          "Highly likely to break any multithreaded application if disabled."
        ]
      },
      "VectorTSOEnabled": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "When TSO emulation is enabled, controls if vector loadstores should also be atomic."
        ]
      },
      "MemcpySetTSOEnabled": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "When TSO emulation is enabled, controls if memcpy and memset should also be atomic.",
          "Only affects REP MOVS and REP STOS instructions"
        ]
      },
      "HalfBarrierTSOEnabled": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "When TSO emulation is enabled, controls if unaligned loads and stores should be backpatched to half-barrier atomics.",
          "Can be dangerous due to aligned loadstores through the same code now become non-atomic."
        ]
      },
      "StrictInProcessSplitLocks": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Strict global lock when handling an unaligned atomic that crosses a 16-byte or cacheline granularity",
          "This is required to ensure a split-lock doesn't tear inside the process"
        ]
      },
      "KernelUnalignedAtomicBackpatching": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "When the kernel unaligned atomic handler is enabled, use backpatching to reduce kernel context switches."
        ]
      },
      "VolatileMetadata": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Use volatile metadata in PE files to inform TSO instructions when available.",
          "When metadata is unavailable falls back to the currently enabled TSO options."
        ]
      },
      "X87ReducedPrecision": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Emulates X87 floating point using 64-bit precision. This reduces emulation accuracy and may result in rendering bugs."
        ]
      },
      "StallProcess": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Forces a process to stall out on initialization",
          "Useful for a process that keeps restarting and doesn't work"
        ]
      },
      "HideHypervisorBit": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Hides the hypervisor CPUID bit when set.",
          "Should only be used for applications that have issues with this set."
        ]
      },
      "StartupSleep": {
        "Type": "uint32",
        "Default": "0",
        "Desc": [
          "Sleeps the process at startup for a duration of seconds.",
          "Useful if an application crashes too quickly to attach a debugger."
        ]
      },
      "StartupSleepProcName": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Contrains the startup sleep to only apply to processes that match this name."
        ]
      },
      "MonoHacks": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "Permits a hook-based SMC approach and smaller JIT blocks when mono is detected."
        ]
      }
    },
    "Misc": {
      "ServerSocketPath": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Override for a FEXServer socket path. Only useful for chroots."
        ]
      },
      "NeedsSeccomp": {
        "Type": "bool",
        "Default": "false",
        "Desc": [
          "Disables inline syscalls in order to support seccomp handling"
        ]
      },
      "ExtendedVolatileMetadata": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "Configuration provided volatile metadata. Only implemented for WoW64/arm64ec.",
          "Limited in its use but can be handy.",
          "Extends on top of what Microsoft has for volatile metadata, but also supported for WoW64.",
          "Colon delimited modules, then semi-colon delimited instructions, then comma delimited ranges",
          "Default disables TSO in the module, unless instructions overlap the range",
          "<module>;<offset begin>-<offset-end>,...;<instruction offset to force TSO>,...:<another>",
          "examples:",
          "  * Disable TSO for a full module: Just provide the module name:",
          "      `hl2_linux`",
          "  * Disable TSO for a part of the module:",
          "      `hl2_linux;<offset begin>-<offset-end>`",
          "  * Disable TSO for a part of the module, but enable TSO for some instructions within the module",
          "      `hl2_linux;<offset begin>-<offset-end>;<instruction offset>,<instruction offset>`",
          "  * Disable TSO for multiple modules",
          "      `hl2_linux:libsdl2.so`"
        ]
      }
    }
  },
  "UnnamedOptions": {
    "Misc": {
      "INTERPRETER_INSTALLED": {
        "Type": "bool",
        "Default": "false"
      },
      "APP_FILENAME": {
        "Type": "str",
        "Default": ""
      },
      "APP_CONFIG_NAME": {
        "Type": "str",
        "Default": "",
        "Desc": [
          "This is the application config name that has been loaded.",
          "This differs from APP_FILENAME in two ways",
          "Where APP_FILENAME always points to the executable path that FEX-Emu is executing.",
          "This matches what is used to load the AppLayer configuration name.",
          "When running through a compatibility layer like wine, this will only be the exe name, instead of wine full path."
        ]
      },
      "IS64BIT_MODE": {
        "Type": "bool",
        "Default": "false"
      },
      "DISABLE_VIXL_INDIRECT_RUNTIME_CALLS": {
        "Type": "bool",
        "Default": "true",
        "Desc": [
          "This option is used for the InstructionCountCI so it can generate the same codegen between Arm64 hosts and vixl simulator hosts.",
          "Vixl simulator indirect runtime calls are a special hlt instruction with metadata after it. Effectively making a custom call instruction.",
          "With visual simulator calls disabled, the code generation would be the same as on a native Arm64 host, but running the code is broken."
        ]
      }
    }
  }
}


================================================
FILE: FEXCore/Source/Interface/Context/Context.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Interface/Context/Context.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CPUID.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/HLE/SyscallHandler.h>

#include <FEXCore/Core/Thunks.h>
#include "FEXCore/Debug/InternalThreadState.h"

namespace FEXCore::Context {
fextl::unique_ptr<FEXCore::Context::Context> FEXCore::Context::Context::CreateNewContext(const FEXCore::HostFeatures& Features) {
  return fextl::make_unique<FEXCore::Context::ContextImpl>(Features);
}

void FEXCore::Context::ContextImpl::CompileRIP(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP) {
  CompileBlock(Thread->CurrentFrame, GuestRIP);
}

void FEXCore::Context::ContextImpl::CompileRIPCount(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) {
  CompileBlock(Thread->CurrentFrame, GuestRIP, MaxInst);
}

void FEXCore::Context::ContextImpl::SetSignalDelegator(FEXCore::SignalDelegator* _SignalDelegation) {
  SignalDelegation = _SignalDelegation;
}

void FEXCore::Context::ContextImpl::SetSyscallHandler(FEXCore::HLE::SyscallHandler* Handler) {
  SyscallHandler = Handler;
  SourcecodeResolver = Handler->GetSourcecodeResolver();
}

void FEXCore::Context::ContextImpl::SetThunkHandler(FEXCore::ThunkHandler* Handler) {
  ThunkHandler = Handler;
}

FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunction(uint32_t Function, uint32_t Leaf) {
  return CPUID.RunFunction(Function, Leaf);
}

FEXCore::CPUID::XCRResults FEXCore::Context::ContextImpl::RunXCRFunction(uint32_t Function) {
  return CPUID.RunXCRFunction(Function);
}

FEXCore::CPUID::FunctionResults FEXCore::Context::ContextImpl::RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) {
  return CPUID.RunFunctionName(Function, Leaf, CPU);
}

bool FEXCore::Context::ContextImpl::IsAddressInCodeBuffer(FEXCore::Core::InternalThreadState* Thread, uintptr_t Address) const {
  return Thread->CPUBackend->IsAddressInCodeBuffer(Address);
}
} // namespace FEXCore::Context


================================================
FILE: FEXCore/Source/Interface/Context/Context.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Common/JitSymbols.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/CPUID.h"
#include <Interface/IR/IntrusiveIRList.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>

#include <atomic>
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <optional>
#include <shared_mutex>

namespace FEXCore {
class SignalDelegator;
class ThunkHandler;
struct LookupCacheWriteLockToken;

namespace Core {
  struct DebugData;
  struct InternalThreadState;
} // namespace Core

namespace CPU {
  class Dispatcher;
} // namespace CPU

namespace HLE {
  class SourcecodeResolver;
  class SyscallHandler;
} // namespace HLE
} // namespace FEXCore

namespace FEXCore::Context {
struct FEX_PACKED ExitFunctionLinkData {
  uint64_t HostCode;
  uint64_t GuestRIP;
  int64_t CallerOffset;
};

struct CustomIRResult {
  void* Creator;
  void* Data;

  CustomIRResult(void* Creator, void* Data)
    : Creator(Creator)
    , Data(Data) {}
};

using BlockDelinkerFunc = void (*)(FEXCore::Context::ExitFunctionLinkData* Record);
constexpr uint32_t TSC_SCALE_MAXIMUM = 1'000'000'000; ///< 1Ghz

class CodeCache : public AbstractCodeCache {
public:
  CodeCache(ContextImpl&);
  ~CodeCache();

  ContextImpl& CTX;
  fextl::unique_ptr<ContextImpl> ValidationCTX;
  fextl::unique_ptr<Core::InternalThreadState> ValidationThread;
  FEXCore::Core::CPUState::gdt_segment ValidationGDT[32] {};
  bool IsGeneratingCache = false;

  FEX_CONFIG_OPT(EnableCodeCaching, ENABLECODECACHINGWIP);
  FEX_CONFIG_OPT(EnableCodeCacheValidation, ENABLECODECACHEVALIDATION);

  uint64_t ComputeCodeMapId(std::string_view Filename, int FD) override;
  bool SaveData(Core::InternalThreadState&, int TargetFD, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress) override;
  bool LoadData(Core::InternalThreadState*, std::byte* MappedCacheFile, const ExecutableFileSectionInfo&) override;

  /**
   * Performs expensive extra validation on the loaded code cache data.
   *
   * This kicks off an in-process recompile of all cached blocks and compares
   * them with the cached data. Differences will be reported as fatal errors,
   * which can uncover bugs like for example:
   * - mismatches of the JIT configuration used during cache generation
   * - hidden position dependencies due to missing FEX relocations
   * - incorrect instruction padding
   */
  void Validate(const ExecutableFileSectionInfo&, fextl::set<uint64_t> GuestBlocks, const fextl::set<uint64_t>& HostBlocks,
                std::span<std::byte> CachedCode);

  void InitiateCacheGeneration() override {
    IsGeneratingCache = true;
  }

  /**
   * Applies a set of FEX relocations to the given code section.
   *
   * FEX relocations describe runtime-dependencies of FEX-generated code.
   * When loading a code cache, they are used to move cached code to the
   * dynamically chosen base address of the guest binary.
   *
   * Conversely, relocations are applied in reverse when writing code caches
   * to ensure consistency across generation runs.
   *
   * Note that FEX relocations are unrelated to ELF/PE relocations.
   *
   * @param GuestDelta Guest address offset to apply to RIP-relative data
   * @param ForStorage True for serializing data (producing deterministic output); false for de-serializing it (resolving dynamic symbols)
   *
   * @return Returns true on success
   */
  [[nodiscard]]
  bool ApplyCodeRelocations(uint64_t GuestDelta, std::span<std::byte> Code, std::span<const CPU::Relocation> Relocations, bool ForStorage);
};

class ContextImpl final : public FEXCore::Context::Context, public CPU::CodeBufferManager {
public:
  // Context base class implementation.
  bool InitCore() override;

  void ExecuteThread(FEXCore::Core::InternalThreadState* Thread) override;

  bool CheckIfBlockIsCacheable(FEXCore::Core::InternalThreadState&, uint64_t GuestRIP, uint64_t MaxInst) override;
  void CompileRIP(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP) override;
  void CompileRIPCount(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) override;

  void HandleCallback(FEXCore::Core::InternalThreadState* Thread, uint64_t RIP) override;

  bool IsAddressInCurrentBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t Size) override;
  bool IsCurrentBlockSingleInst(FEXCore::Core::InternalThreadState* Thread) override;
  uint64_t GetGuestBlockEntry(FEXCore::Core::InternalThreadState* Thread) override;

  uint64_t RestoreRIPFromHostPC(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC) override;
  uint32_t ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, bool WasInJIT, const uint64_t* HostGPRs, uint64_t PSTATE) override;
  void SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) override;

  void ReconstructXMMRegisters(const FEXCore::Core::InternalThreadState* Thread, __uint128_t* XMM_Low, __uint128_t* YMM_High) override;
  void SetXMMRegistersFromState(FEXCore::Core::InternalThreadState* Thread, const __uint128_t* XMM_Low, const __uint128_t* YMM_High) override;

  /**
   * @brief Used to create FEX thread objects in preparation for creating a true OS thread. Does set a TID or PID.
   *
   * @param InitialRIP The starting RIP of this thread
   * @param StackPointer The starting RSP of this thread
   * @param NewThreadState The initial thread state to setup for our state, if inheriting.
   *
   * @return The InternalThreadState object that tracks all of the emulated thread's state
   *
   * Usecases:
   *  Parent thread Creation:
   *    - Thread = CreateThread(InitialRIP, InitialStack, nullptr, 0);
   *    - CTX->ExecuteThread(Thread);
   *  OS thread Creation:
   *    - Thread = CreateThread(0, 0, NewState, PPID);
   *    - Thread->ExecutionThread = FEXCore::Threads::Thread::Create(ThreadHandler, Arg);
   *    - ThreadHandler calls `CTX->ExecuteThread(Thread)`
   *  OS fork (New thread created with a clone of thread state):
   *    - clone{2, 3}
   *    - Thread = CreateThread(0, 0, CopyOfThreadState, PPID);
   *    - ExecuteThread(Thread); // Starts executing without creating another host thread
   *  Thunk callback executing guest code from native host thread
   *    - Thread = CreateThread(0, 0, NewState, PPID);
   *    - HandleCallback(Thread, RIP);
   */

  FEXCore::Core::InternalThreadState* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState) override;

  /**
   * @brief Destroys this FEX thread object and stops tracking it internally
   *
   * @param Thread The internal FEX thread state object
   */
  void DestroyThread(FEXCore::Core::InternalThreadState* Thread) override;

#ifndef _WIN32
  void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) override;
  void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) override;
#endif
  void SetSignalDelegator(FEXCore::SignalDelegator* SignalDelegation) override;
  void SetSyscallHandler(FEXCore::HLE::SyscallHandler* Handler) override;
  void SetThunkHandler(FEXCore::ThunkHandler* Handler) override;

  FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) override;
  FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) override;
  FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) override;

  CodeCache& GetCodeCache() override {
    return CodeCache;
  }

  void SetCodeMapWriter(fextl::unique_ptr<CodeMapWriter> Writer) override {
    CodeMapWriter = std::move(Writer);
  }

  void FlushAndCloseCodeMap() override {
    if (CodeMapWriter) {
      CodeMapWriter.reset();
    }
  }

  void OnCodeBufferAllocated(const std::shared_ptr<CPU::CodeBuffer>&) override;
  void ClearCodeCache(FEXCore::Core::InternalThreadState* Thread, bool NewCodeBuffer = true) override;
  void InvalidateCodeBuffersCodeRange(uint64_t Start, uint64_t Length) override;
  void InvalidateThreadCachedCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override;
  FEXCore::ForkableSharedMutex& GetCodeInvalidationMutex() override {
    return CodeInvalidationMutex;
  }

  void ConfigureAOTGen(FEXCore::Core::InternalThreadState* Thread, fextl::set<uint64_t>* ExternalBranches, uint64_t SectionMaxAddress) override;

  bool IsAddressInCodeBuffer(FEXCore::Core::InternalThreadState* Thread, uintptr_t Address) const override;

  // returns false if a handler was already registered
  std::optional<CustomIRResult>
  AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void* Creator = nullptr, void* Data = nullptr);

  void AddThunkTrampolineIRHandler(uintptr_t Entrypoint, uintptr_t GuestThunkEntrypoint) override;

  void AddForceTSOInformation(const IntervalList<uint64_t>& ValidRanges, fextl::set<uint64_t>&& Instructions) override;

  void RemoveForceTSOInformation(uint64_t Address, uint64_t Size) override;

  void MarkMonoDetected() override {
    MonoDetected = true;
  }

  void MarkMonoBackpatcherBlock(uint64_t BlockEntry) override;

public:
  struct {
    uint64_t VirtualMemSize {1ULL << 36};
    uint64_t TSCScale = 0;

    // Used if the JIT needs to have its interrupt fault code emitted.
    bool NeedsPendingInterruptFaultCheck {false};

    FEX_CONFIG_OPT(Multiblock, MULTIBLOCK);
    FEX_CONFIG_OPT(SingleStepConfig, SINGLESTEP);
    FEX_CONFIG_OPT(GdbServer, GDBSERVER);
    FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
    FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
    FEX_CONFIG_OPT(VectorTSOEnabled, VECTORTSOENABLED);
    FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED);
    FEX_CONFIG_OPT(SMCChecks, SMCCHECKS);
    FEX_CONFIG_OPT(MaxInstPerBlock, MAXINST);
    FEX_CONFIG_OPT(RootFSPath, ROOTFS);
    FEX_CONFIG_OPT(GlobalJITNaming, GLOBALJITNAMING);
    FEX_CONFIG_OPT(LibraryJITNaming, LIBRARYJITNAMING);
    FEX_CONFIG_OPT(BlockJITNaming, BLOCKJITNAMING);
    FEX_CONFIG_OPT(GDBSymbols, GDBSYMBOLS);
    FEX_CONFIG_OPT(x87ReducedPrecision, X87REDUCEDPRECISION);
    FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY);
    FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS);
    FEX_CONFIG_OPT(SmallTSCScale, SMALLTSCSCALE);
    FEX_CONFIG_OPT(StrictInProcessSplitLocks, STRICTINPROCESSSPLITLOCKS);
    FEX_CONFIG_OPT(MonoHacks, MONOHACKS);
  } Config;

  FEXCore::ForkableSharedMutex CodeInvalidationMutex;

  uint32_t StrictSplitLockMutex {};

  FEXCore::HostFeatures HostFeatures;
  // CPUID depends on HostFeatures so needs to be initialized after that.
  FEXCore::CPUIDEmu CPUID;
  FEXCore::HLE::SyscallHandler* SyscallHandler {};
  FEXCore::HLE::SourcecodeResolver* SourcecodeResolver {};
  FEXCore::ThunkHandler* ThunkHandler {};
  fextl::unique_ptr<FEXCore::CPU::Dispatcher> Dispatcher;
  CodeCache CodeCache;
  fextl::unique_ptr<CodeMapWriter> CodeMapWriter;

  SignalDelegator* SignalDelegation {};

  ContextImpl(const FEXCore::HostFeatures& Features);

  static void ThreadRemoveCodeEntryFromJit(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP);

  // This is used as a replacement for the SMC writes in the mono callsite backpatcher that avoids atomic operations
  // (safe as the invalidation mutex is locked) and manually invalidates the modified range. Allowing SMC to be detected
  // even if faulting is disabled.
  static void MonoBackpatcherWrite(FEXCore::Core::CpuStateFrame* Frame, uint8_t Size, uint64_t Address, uint64_t Value);

  void RemoveCustomIREntrypoint(FEXCore::Core::InternalThreadState* Thread, uintptr_t Entrypoint);

  struct GenerateIRResult {
    std::optional<IR::IRListView> IRView;
    uint64_t TotalInstructions;
    uint64_t TotalInstructionsLength;
    uint64_t StartAddr;
    uint64_t Length;
    bool NeedsAddGuestCodeRanges;
  };
  [[nodiscard]]
  GenerateIRResult GenerateIR(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, bool ExtendedDebugInfo, uint64_t MaxInst);

  struct CompileCodeResult {
    CPU::CPUBackend::CompiledCode CompiledCode;
    fextl::unique_ptr<FEXCore::Core::DebugData> DebugData;
    uint64_t StartAddr;
    uint64_t Length;
    bool NeedsAddGuestCodeRanges;
  };
  [[nodiscard]]
  CompileCodeResult CompileCode(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst = 0);
  uintptr_t CompileBlock(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP, uint64_t MaxInst = 0);
  uintptr_t CompileSingleStep(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP);

  FEXCore::JITSymbols Symbols;

  FEXCore::Utils::PooledAllocatorVirtual OpDispatcherAllocator {"FEXMem_OpDispatcher"};
  FEXCore::Utils::PooledAllocatorVirtual FrontendAllocator {"FEXMem_Frontend"};
  FEXCore::Utils::PooledAllocatorVirtualWithGuard CPUBackendAllocator {"FEXMem_CPUBackend"};

  // If Atomic-based TSO emulation is enabled or not.
  bool IsAtomicTSOEnabled() const {
    return AtomicTSOEmulationEnabled;
  }

  // If atomic-based TSO emulation is enabled for vector operations.
  bool IsVectorAtomicTSOEnabled() const {
    return VectorAtomicTSOEmulationEnabled;
  }

  // If atomic-based TSO emulation is enabled for memcpy operations.
  bool IsMemcpyAtomicTSOEnabled() const {
    return MemcpyAtomicTSOEmulationEnabled;
  }

  void SetHardwareTSOSupport(bool HardwareTSOSupported) override {
    SupportsHardwareTSO = HardwareTSOSupported;
    UpdateAtomicTSOEmulationConfig();
  }

  void EnableExitOnHLT() override {
    ExitOnHLT = true;
  }

  bool ExitOnHLTEnabled() const {
    return ExitOnHLT;
  }

  bool AreMonoHacksActive() const {
    return Config.MonoHacks && MonoDetected;
  }

protected:
  void UpdateAtomicTSOEmulationConfig() {
    if (SupportsHardwareTSO) {
      // If the hardware supports TSO then we don't need to emulate it through atomics.
      AtomicTSOEmulationEnabled = false;
      VectorAtomicTSOEmulationEnabled = false;
      MemcpyAtomicTSOEmulationEnabled = false;
    } else {
      AtomicTSOEmulationEnabled = Config.TSOEnabled;
      VectorAtomicTSOEmulationEnabled = Config.TSOEnabled && Config.VectorTSOEnabled;
      MemcpyAtomicTSOEmulationEnabled = Config.TSOEnabled && Config.MemcpySetTSOEnabled;
    }
  }

private:
  /**
   * @brief Initializes the JIT compilers for the thread
   *
   * @param State The internal FEX thread state object
   *
   * InitializeCompiler is called inside of CreateThread, so you likely don't need this
   */
  void InitializeCompiler(FEXCore::Core::InternalThreadState* Thread);

  bool SupportsHardwareTSO = false;
  bool AtomicTSOEmulationEnabled = true;
  bool VectorAtomicTSOEmulationEnabled = false;
  bool MemcpyAtomicTSOEmulationEnabled = false;

  bool ExitOnHLT = false;
  FEX_CONFIG_OPT(AppFilename, APP_FILENAME);

  std::shared_mutex CustomIRMutex;
  std::atomic<bool> HasCustomIRHandlers {};
  struct CustomIRHandlerEntry final {
    CustomIREntrypointHandler Handler;
    void* Creator;
    void* Data;
  };
  fextl::unordered_map<uint64_t, CustomIRHandlerEntry> CustomIRHandlers;
  IntervalList<uint64_t> ForceTSOValidRanges; // The ranges for which ForceTSOInstructions has populated data
  fextl::set<uint64_t> ForceTSOInstructions;

  bool MonoDetected = false;
  std::atomic<uint64_t> MonoBackpatcherBlock;

  std::mutex CodeBufferListLock;
  fextl::vector<std::weak_ptr<CPU::CodeBuffer>> CodeBufferList;
};
} // namespace FEXCore::Context


================================================
FILE: FEXCore/Source/Interface/Core/Addressing.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Interface/Core/Addressing.h"

#include "Interface/IR/IREmitter.h"
#include "FEXCore/Utils/MathUtils.h"
#include "Interface/IR/IR.h"

namespace FEXCore::IR {

Ref LoadEffectiveAddress(IREmitter* IREmit, const AddressMode& A, IR::OpSize GPRSize, bool AddSegmentBase, bool AllowUpperGarbage) {
  Ref Tmp = A.Base;

  if (A.Offset) {
    Tmp = Tmp ? IREmit->Add(GPRSize, Tmp, A.Offset) : IREmit->Constant(A.Offset);
  }

  if (A.Index) {
    if (A.IndexScale != 1) {
      uint32_t Log2 = FEXCore::ilog2(A.IndexScale);

      if (Tmp) {
        Tmp = IREmit->_AddShift(GPRSize, Tmp, A.Index, ShiftType::LSL, Log2);
      } else {
        Tmp = IREmit->_Lshl(GPRSize, A.Index, IREmit->Constant(Log2));
      }
    } else {
      Tmp = Tmp ? IREmit->Add(GPRSize, Tmp, A.Index) : A.Index;
    }
  }

  // For 64-bit AddrSize can be 32-bit or 64-bit
  // For 32-bit AddrSize can be 32-bit or 16-bit
  //
  // If the AddrSize is not the GPRSize then we need to clear the upper bits.
  if ((A.AddrSize < GPRSize) && !AllowUpperGarbage && Tmp) {
    uint32_t Bits = IR::OpSizeAsBits(A.AddrSize);

    if (A.Base || A.Index) {
      Tmp = IREmit->_Bfe(GPRSize, Bits, 0, Tmp);
    } else if (A.Offset) {
      uint64_t X = A.Offset;
      X &= (1ull << Bits) - 1;
      Tmp = IREmit->Constant(X);
    }
  }

  if (A.Segment && AddSegmentBase) {
    Tmp = Tmp ? IREmit->Add(GPRSize, Tmp, A.Segment) : A.Segment;
  }

  return Tmp ?: IREmit->Constant(0);
}

AddressMode SelectAddressMode(IREmitter* IREmit, const AddressMode& A, IR::OpSize GPRSize, bool HostSupportsTSOImm9, bool AtomicTSO,
                              bool Vector, IR::OpSize AccessSize) {
  const auto Is32Bit = GPRSize == OpSize::i32Bit;
  const auto GPRSizeMatchesAddrSize = A.AddrSize == GPRSize;
  const auto OffsetIndexToLargeFor32Bit = Is32Bit && (A.Offset <= -16384 || A.Offset >= 16384);
  if (!GPRSizeMatchesAddrSize || OffsetIndexToLargeFor32Bit) {
    // If address size doesn't match GPR size then no optimizations can occur.
    return {
      .Base = LoadEffectiveAddress(IREmit, A, GPRSize, true),
      .Index = IREmit->Invalid(),
    };
  }

  // Loadstore rules:
  // Non-TSO GPR:
  // * LDR/STR:   [Reg]
  // * LDR/STR:   [Reg + Reg, {Shift <AccessSize>}]
  //   * Can't use with 32-bit
  // * LDR/STR:   [Reg + [0,4095] * <AccessSize>]
  //   * Imm must be smaller than 16k with 32-bit
  // * LDUR/STUR: [Reg + [-256, 255]]
  //
  // TSO GPR:
  // * ARMv8.0:
  //  LDAR/STLR: [Reg]
  // * FEAT_LRCPC:
  //  LDAPR: [Reg]
  // * FEAT_LRCPC2:
  //  LDAPUR/STLUR: [Reg + [-256, 255]]
  //
  // Non-TSO Vector:
  // * LDR/STR: [Reg + [0,4095] * <AccessSize>]
  // * LDUR/STUR: [Reg + [-256,255]]
  //
  // TSO Vector:
  // * ARMv8.0:
  //   Just DMB + previous
  // * FEAT_LRCPC3 (Unsupported by FEXCore currently):
  //   LDAPUR/STLUR: [Reg + [-256,255]]

  const auto AccessSizeAsImm = OpSizeToSize(AccessSize);
  const bool OffsetIsSIMM9 = A.Offset && A.Offset >= -256 && A.Offset <= 255;
  const bool OffsetIsUnsignedScaled = A.Offset > 0 && (A.Offset & (AccessSizeAsImm - 1)) == 0 && (A.Offset / AccessSizeAsImm) <= 4095;

  if ((AtomicTSO && !Vector && HostSupportsTSOImm9 && OffsetIsSIMM9) || (!AtomicTSO && (OffsetIsSIMM9 || OffsetIsUnsignedScaled))) {
    // Peel off the offset
    AddressMode B = A;
    B.Offset = 0;

    return {
      .Base = LoadEffectiveAddress(IREmit, B, GPRSize, true /* AddSegmentBase */, false),
      .Index = IREmit->Constant(A.Offset),
      .IndexType = MemOffsetType::SXTX,
      .IndexScale = 1,
    };
  }

  if (AtomicTSO) {
    // TODO: LRCPC3 support for vector Imm9.
  } else if (!Is32Bit && A.Base && (A.Index || A.Segment) && !A.Offset && (A.IndexScale == 1 || A.IndexScale == AccessSizeAsImm)) {
    AddressMode B = A;

    // ScaledRegisterLoadstore
    if (B.Index && B.Segment) {
      B.Base = IREmit->Add(GPRSize, B.Base, B.Segment);
    } else if (B.Segment) {
      B.Index = B.Segment;
      B.IndexScale = 1;
    }

    return B;
  }

  if (Vector || !AtomicTSO) {
    if ((A.Base || A.Segment) && A.Offset) {
      const bool Const_16K = A.Offset > -16384 && A.Offset < 16384 && GPRSizeMatchesAddrSize && Is32Bit;

      if (!Is32Bit || Const_16K) {
        // Peel off the offset
        AddressMode B = A;
        B.Offset = 0;

        return {
          .Base = LoadEffectiveAddress(IREmit, B, GPRSize, true /* AddSegmentBase */, false),
          .Index = IREmit->Constant(A.Offset),
          .IndexType = MemOffsetType::SXTX,
          .IndexScale = 1,
        };
      }
    }
  }

  // Fallback on software address calculation
  return {
    .Base = LoadEffectiveAddress(IREmit, A, GPRSize, true),
    .Index = IREmit->Invalid(),
  };
}


}; // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/Addressing.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Interface/IR/IR.h"
#include <cstdint>

namespace FEXCore::IR {
class IREmitter;

struct AddressMode {
  Ref Segment {nullptr};
  Ref Base {nullptr};
  Ref Index {nullptr};
  int64_t Offset = 0;

  MemOffsetType IndexType = MemOffsetType::SXTX;
  uint8_t IndexScale = 1;

  // Size in bytes for the address calculation. 8 for an arm64 hardware mode.
  IR::OpSize AddrSize;
  bool NonTSO;
};

Ref LoadEffectiveAddress(IREmitter* IREmit, const AddressMode& A, IR::OpSize GPRSize, bool AddSegmentBase, bool AllowUpperGarbage = false);
AddressMode SelectAddressMode(IREmitter* IREmit, const AddressMode& A, IR::OpSize GPRSize, bool HostSupportsTSOImm9, bool AtomicTSO,
                              bool Vector, IR::OpSize AccessSize);

} // namespace FEXCore::IR

================================================
FILE: FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Context/Context.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>

#include <FEXHeaderUtils/BitUtils.h>
#include <CodeEmitter/Emitter.h>
#include <CodeEmitter/Registers.h>

#ifdef VIXL_DISASSEMBLER
#include <aarch64/cpu-aarch64.h>
#include <aarch64/instructions-aarch64.h>
#include <cpu-features.h>
#include <utils-vixl.h>
#endif

#include <array>
#include <tuple>
#include <utility>

namespace FEXCore::CPU {

// LLVM's preserve_all doc, this is used throughout this file and reproduced
// here for reference:
//
//    the callee preserve all general purpose registers,
//    except X0-X8 and X16-X18. Furthermore it also preserves lower 128 bits of
//    V8-V31 SIMD - floating point registers.
//
// Note that the call necessarily also clobbers x30, the link register (LR)
// which is not considered general purpose.
//
// Meanwhile, for non-preserve_all, the AAPCS64 ABI says:
//
//    A subroutine invocation must preserve the contents of the registers
//    r19-r29 and SP.

namespace x64 {
#ifndef ARCHITECTURE_arm64ec
  // All but x19 and x29 are caller saved
  // Note that rax/rdx are rearranged here so we can coalesce cmpxchg.
  constexpr std::array<ARMEmitter::Register, 18> SRA = {
    ARMEmitter::Reg::r4,
    ARMEmitter::Reg::r7,
    ARMEmitter::Reg::r5,
    ARMEmitter::Reg::r6,
    ARMEmitter::Reg::r8,
    ARMEmitter::Reg::r9,
    ARMEmitter::Reg::r10,
    ARMEmitter::Reg::r11,
    ARMEmitter::Reg::r12,
    ARMEmitter::Reg::r13,
    ARMEmitter::Reg::r14,
    ARMEmitter::Reg::r15,
    ARMEmitter::Reg::r16,
    ARMEmitter::Reg::r17,
    ARMEmitter::Reg::r19,
    ARMEmitter::Reg::r29,
    // PF/AF must be last.
    REG_PF,
    REG_AF,
  };

  // I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15.
  // SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI.
  constexpr std::array<ARMEmitter::Register, 7> PreserveAll_SRA = {
    ARMEmitter::Reg::r4, ARMEmitter::Reg::r5,  ARMEmitter::Reg::r6,  ARMEmitter::Reg::r7,
    ARMEmitter::Reg::r8, ARMEmitter::Reg::r16, ARMEmitter::Reg::r17,
  };

  constexpr std::array<ARMEmitter::Register, 7> RA = {
    // All these callee saved
    ARMEmitter::Reg::r20, ARMEmitter::Reg::r21, ARMEmitter::Reg::r22, ARMEmitter::Reg::r23,
    ARMEmitter::Reg::r24, ARMEmitter::Reg::r30, ARMEmitter::Reg::r18,
  };

  constexpr unsigned RAPairs = 4;

  // Dynamic GPRs
  constexpr std::array<ARMEmitter::Register, 2> PreserveAll_Dynamic = {
    ARMEmitter::Reg::r18,
    ARMEmitter::Reg::r30,
  };

  constexpr std::array<ARMEmitter::Register, 2> NotPreserved_Dynamic = PreserveAll_Dynamic;

  // All are caller saved
  constexpr std::array<ARMEmitter::VRegister, 16> SRAFPR = {
    ARMEmitter::VReg::v16, ARMEmitter::VReg::v17, ARMEmitter::VReg::v18, ARMEmitter::VReg::v19,
    ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22, ARMEmitter::VReg::v23,
    ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27,
    ARMEmitter::VReg::v28, ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};

  // SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI.
  constexpr std::array<ARMEmitter::Register, 0> PreserveAll_SRAFPR = {
    // None.
  };

  //  v8..v15 = (lower 64bits) Callee saved
  constexpr std::array<ARMEmitter::VRegister, 14> RAFPR = {
    // v0 ~ v1 are used as temps.
    // ARMEmitter::VReg::v0, ARMEmitter::VReg::v1,

    ARMEmitter::VReg::v2,  ARMEmitter::VReg::v3,  ARMEmitter::VReg::v4,  ARMEmitter::VReg::v5,  ARMEmitter::VReg::v6,
    ARMEmitter::VReg::v7,  ARMEmitter::VReg::v8,  ARMEmitter::VReg::v9,  ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
    ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
  };

  constexpr std::array<ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
    ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
  };
#else
  constexpr std::array<ARMEmitter::Register, 18> SRA = {
    ARMEmitter::Reg::r8,
    ARMEmitter::Reg::r0,
    ARMEmitter::Reg::r1,
    ARMEmitter::Reg::r27,
    // SP's register location isn't specified by the ARM64EC ABI, we choose to use r23
    ARMEmitter::Reg::r23,
    ARMEmitter::Reg::r29,
    ARMEmitter::Reg::r25,
    ARMEmitter::Reg::r26,
    ARMEmitter::Reg::r2,
    ARMEmitter::Reg::r3,
    ARMEmitter::Reg::r4,
    ARMEmitter::Reg::r5,
    ARMEmitter::Reg::r19,
    ARMEmitter::Reg::r20,
    ARMEmitter::Reg::r21,
    ARMEmitter::Reg::r22,
    // PF/AF must be last.
    REG_PF,
    REG_AF,
  };

  constexpr std::array<ARMEmitter::Register, 7> PreserveAll_SRA = {
    ARMEmitter::Reg::r0, ARMEmitter::Reg::r1, ARMEmitter::Reg::r2, ARMEmitter::Reg::r3,
    ARMEmitter::Reg::r4, ARMEmitter::Reg::r5, ARMEmitter::Reg::r8,
  };

  constexpr std::array<ARMEmitter::Register, 6> RA = {
    ARMEmitter::Reg::r6, ARMEmitter::Reg::r7, ARMEmitter::Reg::r14, ARMEmitter::Reg::r15, ARMEmitter::Reg::r16, ARMEmitter::Reg::r30,
  };

  constexpr std::array<ARMEmitter::Register, 5> PreserveAll_Dynamic = {ARMEmitter::Reg::r6, ARMEmitter::Reg::r7, ARMEmitter::Reg::r16,
                                                                       ARMEmitter::Reg::r17, ARMEmitter::Reg::r30};

  constexpr std::array<ARMEmitter::Register, 7> NotPreserved_Dynamic = {ARMEmitter::Reg::r6,  ARMEmitter::Reg::r7,  ARMEmitter::Reg::r14,
                                                                        ARMEmitter::Reg::r15, ARMEmitter::Reg::r16, ARMEmitter::Reg::r17,
                                                                        ARMEmitter::Reg::r30};

  constexpr unsigned RAPairs = 4;

  constexpr std::array<ARMEmitter::VRegister, 16> SRAFPR = {
    ARMEmitter::VReg::v0,  ARMEmitter::VReg::v1,  ARMEmitter::VReg::v2,  ARMEmitter::VReg::v3,
    ARMEmitter::VReg::v4,  ARMEmitter::VReg::v5,  ARMEmitter::VReg::v6,  ARMEmitter::VReg::v7,
    ARMEmitter::VReg::v8,  ARMEmitter::VReg::v9,  ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
    ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
  };

  constexpr std::array<ARMEmitter::VRegister, 8> PreserveAll_SRAFPR = {
    ARMEmitter::VReg::v0, ARMEmitter::VReg::v1, ARMEmitter::VReg::v2, ARMEmitter::VReg::v3,
    ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
  };

  constexpr std::array<ARMEmitter::VRegister, 14> RAFPR = {
    ARMEmitter::VReg::v18, ARMEmitter::VReg::v19, ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22,
    ARMEmitter::VReg::v23, ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27,
    ARMEmitter::VReg::v28, ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};

  constexpr std::array<ARMEmitter::VRegister, 0> PreserveAll_DynamicFPR = {
    // None
  };
#endif

  constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t {
    uint32_t Mask {};
    for (auto Reg : PreserveAll_SRA) {
      switch (Reg.Idx()) {
      case 0:
      case 1:
      case 2:
      case 3:
      case 4:
      case 5:
      case 6:
      case 7:
      case 8:
      case 16:
      case 17: Mask |= (1U << Reg.Idx()); break;
      default: break;
      }
    }

    return Mask;
  }()};

  constexpr uint32_t PreserveAll_SRAFPRMask = {[]() -> uint32_t {
    uint32_t Mask {};
    for (auto Reg : PreserveAll_SRAFPR) {
      Mask |= (1U << Reg.Idx());
    }
    return Mask;
  }()};

  // SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI.
  // This is /all/ of the SRA registers
  constexpr std::array<ARMEmitter::VRegister, 16> PreserveAll_SRAFPRSVE = SRAFPR;

  constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t {
    uint32_t Mask {};
    for (auto Reg : PreserveAll_SRAFPRSVE) {
      Mask |= (1U << Reg.Idx());
    }
    return Mask;
  }()};

  // Dynamic FPRs when the host supports SVE-256bit.
  constexpr std::array<ARMEmitter::VRegister, 14> PreserveAll_DynamicFPRSVE = {
    // v0 ~ v1 are used as temps.
    ARMEmitter::VReg::v2,  ARMEmitter::VReg::v3,  ARMEmitter::VReg::v4,  ARMEmitter::VReg::v5,  ARMEmitter::VReg::v6,
    ARMEmitter::VReg::v7,  ARMEmitter::VReg::v8,  ARMEmitter::VReg::v9,  ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
    ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,
  };
} // namespace x64

namespace x32 {
  // All but x19 and x29 are caller saved. eax/edx rearranged for cmpxchg.
  constexpr std::array<ARMEmitter::Register, 10> SRA = {
    ARMEmitter::Reg::r4,
    ARMEmitter::Reg::r7,
    ARMEmitter::Reg::r5,
    ARMEmitter::Reg::r6,
    ARMEmitter::Reg::r8,
    ARMEmitter::Reg::r9,
    ARMEmitter::Reg::r10,
    ARMEmitter::Reg::r11,
    // PF/AF must be last.
    REG_PF,
    REG_AF,
  };

  constexpr std::array<ARMEmitter::Register, 14> RA = {
    // All these callee saved
    ARMEmitter::Reg::r20,
    ARMEmitter::Reg::r21,
    ARMEmitter::Reg::r22,
    ARMEmitter::Reg::r23,

    // Registers only available on 32-bit
    // All these are caller saved (except for r19).
    ARMEmitter::Reg::r12,
    ARMEmitter::Reg::r13,
    ARMEmitter::Reg::r14,
    ARMEmitter::Reg::r15,
    ARMEmitter::Reg::r16,
    ARMEmitter::Reg::r17,
    ARMEmitter::Reg::r29,
    ARMEmitter::Reg::r30,

    ARMEmitter::Reg::r24,
    ARMEmitter::Reg::r19,
  };

  constexpr std::array<ARMEmitter::Register, 7> NotPreserved_Dynamic = {
    ARMEmitter::Reg::r12, ARMEmitter::Reg::r13, ARMEmitter::Reg::r14, ARMEmitter::Reg::r15,
    ARMEmitter::Reg::r16, ARMEmitter::Reg::r17, ARMEmitter::Reg::r30,
  };

  constexpr unsigned RAPairs = 10;

  // All are caller saved
  constexpr std::array<ARMEmitter::VRegister, 8> SRAFPR = {
    ARMEmitter::VReg::v16, ARMEmitter::VReg::v17, ARMEmitter::VReg::v18, ARMEmitter::VReg::v19,
    ARMEmitter::VReg::v20, ARMEmitter::VReg::v21, ARMEmitter::VReg::v22, ARMEmitter::VReg::v23,
  };

  //  v8..v15 = (lower 64bits) Callee saved
  constexpr std::array<ARMEmitter::VRegister, 22> RAFPR = {
    // v0 ~ v1 are used as temps.
    // ARMEmitter::VReg::v0, ARMEmitter::VReg::v1,

    ARMEmitter::VReg::v2,  ARMEmitter::VReg::v3,  ARMEmitter::VReg::v4,  ARMEmitter::VReg::v5,  ARMEmitter::VReg::v6,
    ARMEmitter::VReg::v7,  ARMEmitter::VReg::v8,  ARMEmitter::VReg::v9,  ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
    ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,

    ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27, ARMEmitter::VReg::v28,
    ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};

  // I wish this could get constexpr generated from SRA's definition but impossible until libstdc++12, libc++15.
  // SRA GPRs that need to be spilled when calling a function with `preserve_all` ABI.
  constexpr std::array<ARMEmitter::Register, 5> PreserveAll_SRA = {
    ARMEmitter::Reg::r4, ARMEmitter::Reg::r5, ARMEmitter::Reg::r6, ARMEmitter::Reg::r7, ARMEmitter::Reg::r8,
  };

  constexpr uint32_t PreserveAll_SRAMask = {[]() -> uint32_t {
    uint32_t Mask {};
    for (auto Reg : PreserveAll_SRA) {
      switch (Reg.Idx()) {
      case 0:
      case 1:
      case 2:
      case 3:
      case 4:
      case 5:
      case 6:
      case 7:
      case 8:
      case 16:
      case 17: Mask |= (1U << Reg.Idx()); break;
      default: break;
      }
    }

    return Mask;
  }()};

  // Dynamic GPRs
  constexpr std::array<ARMEmitter::Register, 3> PreserveAll_Dynamic = {ARMEmitter::Reg::r16, ARMEmitter::Reg::r17, ARMEmitter::Reg::r30};

  // SRA FPRs that need to be spilled when calling a function with `preserve_all` ABI.
  constexpr uint32_t PreserveAll_SRAFPRMask = 0;

  // Dynamic FPRs
  // - v0-v7
  constexpr std::array<ARMEmitter::VRegister, 6> PreserveAll_DynamicFPR = {
    // v0 ~ v1 are temps
    ARMEmitter::VReg::v2, ARMEmitter::VReg::v3, ARMEmitter::VReg::v4, ARMEmitter::VReg::v5, ARMEmitter::VReg::v6, ARMEmitter::VReg::v7,
  };

  // SRA FPRs that need to be spilled when the host supports SVE-256bit with `preserve_all` ABI.
  // This is /all/ of the SRA registers
  constexpr std::array<ARMEmitter::VRegister, 8> PreserveAll_SRAFPRSVE = SRAFPR;

  constexpr uint32_t PreserveAll_SRAFPRSVEMask = {[]() -> uint32_t {
    uint32_t Mask {};
    for (auto Reg : PreserveAll_SRAFPRSVE) {
      Mask |= (1U << Reg.Idx());
    }
    return Mask;
  }()};

  // Dynamic FPRs when the host supports SVE-256bit.
  constexpr std::array<ARMEmitter::VRegister, 22> PreserveAll_DynamicFPRSVE = {
    // v0 ~ v1 are used as temps.
    ARMEmitter::VReg::v2,  ARMEmitter::VReg::v3,  ARMEmitter::VReg::v4,  ARMEmitter::VReg::v5,  ARMEmitter::VReg::v6,
    ARMEmitter::VReg::v7,  ARMEmitter::VReg::v8,  ARMEmitter::VReg::v9,  ARMEmitter::VReg::v10, ARMEmitter::VReg::v11,
    ARMEmitter::VReg::v12, ARMEmitter::VReg::v13, ARMEmitter::VReg::v14, ARMEmitter::VReg::v15,

    ARMEmitter::VReg::v24, ARMEmitter::VReg::v25, ARMEmitter::VReg::v26, ARMEmitter::VReg::v27, ARMEmitter::VReg::v28,
    ARMEmitter::VReg::v29, ARMEmitter::VReg::v30, ARMEmitter::VReg::v31};
} // namespace x32

// We want vixl to not allocate a default buffer. Jit and dispatcher will manually create one.
Arm64Emitter::Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr, size_t size)
  : Emitter(static_cast<uint8_t*>(EmissionPtr), size)
  , EmitterCTX {ctx}
#ifdef VIXL_SIMULATOR
  , Simulator {&SimDecoder, stdout, vixl::aarch64::SimStack(SimulatorStackSize).Allocate()}
#endif
{
#ifdef VIXL_SIMULATOR
  FEX_CONFIG_OPT(ForceSVEWidth, FORCESVEWIDTH);
  // Hardcode a 256-bit vector width if we are running in the simulator.
  // Allow the user to override this.
  Simulator.SetVectorLengthInBits(ForceSVEWidth() ? ForceSVEWidth() : 256);
  // FEX doesn't support GCS.
  Simulator.DisableGCSCheck();
#endif
#ifdef VIXL_DISASSEMBLER
  // Only setup the disassembler if enabled.
  // vixl's decoder is expensive to setup.
  if (Disassemble()) {
    DisasmBuffer.resize(DISASM_BUFFER_SIZE);
    Disasm = fextl::make_unique<vixl::aarch64::Disassembler>(DisasmBuffer.data(), DISASM_BUFFER_SIZE);
    DisasmDecoder = fextl::make_unique<vixl::aarch64::Decoder>();
    DisasmDecoder->AppendVisitor(Disasm.get());
  }
#endif

  // Number of register available is dependent on what operating mode the proccess is in.
  if (EmitterCTX->Config.Is64BitMode()) {
    StaticRegisters = x64::SRA;
    GeneralRegisters = x64::RA;
    GeneralRegistersNotPreserved = x64::NotPreserved_Dynamic;
    StaticFPRegisters = x64::SRAFPR;
    GeneralFPRegisters = x64::RAFPR;
    PairRegisters = x64::RAPairs;
  } else {
    PairRegisters = x32::RAPairs;

    StaticRegisters = x32::SRA;
    GeneralRegisters = x32::RA;
    GeneralRegistersNotPreserved = x32::NotPreserved_Dynamic;

    StaticFPRegisters = x32::SRAFPR;
    GeneralFPRegisters = x32::RAFPR;
  }
}

FEXCore::X86State::X86Reg Arm64Emitter::GetX86RegRelationToARMReg(ARMEmitter::Register Reg) {
  for (size_t i = 0; i < StaticRegisters.size(); ++i) {
    const auto& RegI = StaticRegisters[i];
    if (RegI == Reg) {
      // X86 Registers are mapped linerally from the StaticRegisters span.
      // Directly correlating Enum index to span index.
      return static_cast<FEXCore::X86State::X86Reg>(FEXCore::ToUnderlying(FEXCore::X86State::X86Reg::REG_RAX) + i);
    }
  }

  // Unmapped register.
  return FEXCore::X86State::X86Reg::REG_INVALID;
}

void Arm64Emitter::LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, uint64_t Constant, PadType Pad, int MaxBytes) {
  bool NOPPad = false;
  if (Pad == PadType::DOPAD) {
    NOPPad = true;
  } else if (Pad == PadType::NOPAD) {
    NOPPad = false;
  } else if (Pad == PadType::AUTOPAD) {
    // Force NOP padding to ensure relocated constants always have enough encoding space available
    NOPPad = EnableCodeCaching;
  }

  bool Is64Bit = s == ARMEmitter::Size::i64Bit;
  const auto UpperBound = Is64Bit ? 4 : 2;
  int Segments = MaxBytes ? (MaxBytes / 2) : UpperBound;

  LOGMAN_THROW_A_FMT(MaxBytes >= 0 && MaxBytes <= (UpperBound * 2) && (MaxBytes & 1) == 0,
                     "MaxBytes must be bounded in the range of [0, {}] and 16-bit aligned", UpperBound);
  // If MaxBytes specified then make sure to sanity check incoming data.
  LOGMAN_THROW_A_FMT(MaxBytes == 0 || (Constant >> (MaxBytes * 8)) == 0, "MaxBytes provided but data can't fit within provided range.");

  if (Is64Bit && ((~Constant) >> 16) == 0) {
    if (NOPPad) {
      nop();
      nop();
      nop();
    }

    movn(s, Reg, (~Constant) & 0xFFFF);
    return;
  }

  if ((Constant >> 32) == 0 && !NOPPad) {
    // If the upper 32-bits is all zero, we can now switch to a 32-bit move.
    // NOTE: The NOP padding code does not appropriately adjust to this yet,
    //       so we skip this optimization in that case
    s = ARMEmitter::Size::i32Bit;
    Is64Bit = false;
    Segments = std::min(Segments, 2);
  }

  if (!Is64Bit && ((~Constant) & 0xFFFF0000) == 0) {
    if (NOPPad) {
      nop();
      nop();
      nop();
    }

    movn(s, Reg.W(), (~Constant) & 0xFFFF);
    return;
  }

  int RequiredMoveSegments {};

  // Count the number of move segments
  // We only want to use ADRP+ADD if we have more than 1 segment
  for (size_t i = 0; i < Segments; ++i) {
    uint16_t Part = (Constant >> (i * 16)) & 0xFFFF;
    if (Part != 0) {
      ++RequiredMoveSegments;
    }
  }

  // If this can be loaded with a mov bitmask.
  if (RequiredMoveSegments > 1) {
    // Only try to use this path if the number of segments is > 1.
    // `movz` is better than `orr` since hardware will rename or merge if possible when `movz` is used.
    const auto IsImm = ARMEmitter::Emitter::IsImmLogical(Constant, RegSizeInBits(s));
    if (IsImm) {
      if (NOPPad) {
        nop();
        nop();
        nop();
      }
      orr(s, Reg, ARMEmitter::Reg::zr, Constant);
      return;
    }
  }

  // If we can't handle negatives with the orr, try with movn+movk
  if (Is64Bit && ((~Constant) >> 32) == 0) {
    if (NOPPad) {
      nop();
      nop();
    }
    movn(s, Reg, (~Constant) & 0xFFFF);
    movk(s, Reg, (Constant >> 16) & 0xFFFF, 16);
    return;
  }

  // ADRP+ADD is specifically optimized in hardware
  // Check if we can use this
  auto PC = GetCursorAddress<uint64_t>();

  // PC aligned to page
  uint64_t AlignedPC = PC & ~0xFFFULL;

  // Offset from aligned PC
  auto AlignedOffset = std::bit_cast<int64_t>(Constant - AlignedPC);

  int NumMoves = 0;

  // If the aligned offset is within the 4GB window then we can use ADRP+ADD
  // and the number of move segments more than 1
  // NOTE: JIT output is moved to a different buffer after compilation, so the
  //       current cursor address doesn't match the runtime instruction address.
  //       Hence this optimization is disabled until we enable code relocation patches.
  if (RequiredMoveSegments > 1 && ARMEmitter::Emitter::IsInt32(AlignedOffset) && false) {
    // If this is 4k page aligned then we only need ADRP
    if ((AlignedOffset & 0xFFF) == 0) {
      adrp(Reg, AlignedOffset >> 12);
    } else {
      // If the constant is within 1MB of PC then we can still use ADR to load in a single instruction
      // 21-bit signed integer here
      auto SmallOffset = std::bit_cast<int64_t>(Constant - PC);
      if (ARMEmitter::Emitter::IsInt21(SmallOffset)) {
        adr(Reg, SmallOffset);
      } else {
        // Need to use ADRP + ADD
        adrp(Reg, AlignedOffset >> 12);
        add(s, Reg, Reg, Constant & 0xFFF);
        NumMoves = 2;
      }
    }
  } else {
    int CurrentSegment = 0;
    for (; CurrentSegment < Segments; ++CurrentSegment) {
      uint16_t Part = (Constant >> (CurrentSegment * 16)) & 0xFFFF;
      if (Part) {
        movz(s, Reg, Part, CurrentSegment * 16);
        ++CurrentSegment;
        ++NumMoves;
        break;
      }
    }

    for (; CurrentSegment < Segments; ++CurrentSegment) {
      uint16_t Part = (Constant >> (CurrentSegment * 16)) & 0xFFFF;
      if (Part) {
        movk(s, Reg, Part, CurrentSegment * 16);
        ++NumMoves;
      }
    }

    if (NumMoves == 0) {
      // If we didn't move anything that means this is a zero move. Special case this.
      movz(s, Reg, 0);
      ++NumMoves;
    }
  }

  if (NOPPad) {
    for (int i = NumMoves; i < Segments; ++i) {
      nop();
    }
  }
}

void Arm64Emitter::PushCalleeSavedRegisters() {
  // We need to save pairs of registers
  // We save r19-r30
  constexpr static std::array<std::pair<ARMEmitter::XRegister, ARMEmitter::XRegister>, 6> CalleeSaved = {{
    {ARMEmitter::XReg::x19, ARMEmitter::XReg::x20},
    {ARMEmitter::XReg::x21, ARMEmitter::XReg::x22},
    {ARMEmitter::XReg::x23, ARMEmitter::XReg::x24},
    {ARMEmitter::XReg::x25, ARMEmitter::XReg::x26},
    {ARMEmitter::XReg::x27, ARMEmitter::XReg::x28},
    {ARMEmitter::XReg::x29, ARMEmitter::XReg::x30},
  }};

  for (auto& RegPair : CalleeSaved) {
    stp<ARMEmitter::IndexType::PRE>(RegPair.first, RegPair.second, ARMEmitter::Reg::rsp, -16);
  }

  // Additionally we need to store the lower 64bits of v8-v15
  // Here's a fun thing, we can use two ST4 instructions to store everything
  // We just need a single sub to sp before that
  constexpr static std::array< std::tuple<ARMEmitter::DRegister, ARMEmitter::DRegister, ARMEmitter::DRegister, ARMEmitter::DRegister>, 2> FPRs = {{
    {ARMEmitter::DReg::d8, ARMEmitter::DReg::d9, ARMEmitter::DReg::d10, ARMEmitter::DReg::d11},
    {ARMEmitter::DReg::d12, ARMEmitter::DReg::d13, ARMEmitter::DReg::d14, ARMEmitter::DReg::d15},
  }};

  uint32_t VectorSaveSize = sizeof(uint64_t) * 8;
  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, VectorSaveSize);
  // SP supporting move
  // We just saved x19 so it is safe
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r19, ARMEmitter::Reg::rsp, 0);

  for (auto& RegQuad : FPRs) {
    st4(ARMEmitter::SubRegSize::i64Bit, std::get<0>(RegQuad), std::get<1>(RegQuad), std::get<2>(RegQuad), std::get<3>(RegQuad), 0,
        ARMEmitter::Reg::r19, 32);
  }
}

void Arm64Emitter::PopCalleeSavedRegisters() {
  constexpr static std::array< std::tuple<ARMEmitter::DRegister, ARMEmitter::DRegister, ARMEmitter::DRegister, ARMEmitter::DRegister>, 2> FPRs = {{
    {ARMEmitter::DReg::d8, ARMEmitter::DReg::d9, ARMEmitter::DReg::d10, ARMEmitter::DReg::d11},
    {ARMEmitter::DReg::d12, ARMEmitter::DReg::d13, ARMEmitter::DReg::d14, ARMEmitter::DReg::d15},
  }};

  for (auto& RegQuad : FPRs) {
    ld4(ARMEmitter::SubRegSize::i64Bit, std::get<0>(RegQuad), std::get<1>(RegQuad), std::get<2>(RegQuad), std::get<3>(RegQuad), 0,
        ARMEmitter::Reg::rsp, 32);
  }

  constexpr static std::array<std::pair<ARMEmitter::XRegister, ARMEmitter::XRegister>, 6> CalleeSaved = {{
    {ARMEmitter::XReg::x29, ARMEmitter::XReg::x30},
    {ARMEmitter::XReg::x27, ARMEmitter::XReg::x28},
    {ARMEmitter::XReg::x25, ARMEmitter::XReg::x26},
    {ARMEmitter::XReg::x23, ARMEmitter::XReg::x24},
    {ARMEmitter::XReg::x21, ARMEmitter::XReg::x22},
    {ARMEmitter::XReg::x19, ARMEmitter::XReg::x20},
  }};

  for (auto& RegPair : CalleeSaved) {
    ldp<ARMEmitter::IndexType::POST>(RegPair.first, RegPair.second, ARMEmitter::Reg::rsp, 16);
  }
}

void Arm64Emitter::FillSpecialRegs(ARMEmitter::Register TmpReg, ARMEmitter::Register TmpReg2, bool SetFIZ, bool SetPredRegs) {
#ifndef VIXL_SIMULATOR
  if (EmitterCTX->HostFeatures.SupportsAFP) {
    // Enable AFP features when filling JIT state.
    mrs(TmpReg, ARMEmitter::SystemRegister::FPCR);

    // Enable FPCR.NEP and FPCR.AH
    // NEP(2): Changes ASIMD scalar instructions to insert in to the lower bits of the destination.
    // AH(1):  Changes NaN behaviour in some instructions. Specifically fmin, fmax.
    //
    // Additional interesting AFP bits:
    // FIZ(0): Flush Inputs to Zero
    orr(ARMEmitter::Size::i64Bit, TmpReg, TmpReg,
        (1U << 2) |   // NEP
          (1U << 1)); // AH

    if (SetFIZ) {
      // Insert MXCSR.DAZ in to FIZ
      ldr(TmpReg2.W(), STATE.R(), offsetof(FEXCore::Core::CPUState, mxcsr));
      bfxil(ARMEmitter::Size::i64Bit, TmpReg, TmpReg2, 6, 1);
    }

    msr(ARMEmitter::SystemRegister::FPCR, TmpReg);
  }
#endif

  if (SetPredRegs && (EmitterCTX->HostFeatures.SupportsSVE256 || EmitterCTX->HostFeatures.SupportsSVE128)) {
    // Set up predicate registers.
    // We don't bother spilling these in SpillStaticRegs,
    // since all that matters is we restore them on a fill.
    // It's not a concern if they get trounced by something else.
    if (EmitterCTX->HostFeatures.SupportsSVE256) {
      ptrue(ARMEmitter::SubRegSize::i8Bit, PRED_TMP_32B, ARMEmitter::PredicatePattern::SVE_VL32);
    }

    if (EmitterCTX->HostFeatures.SupportsSVE128) {
      ptrue(ARMEmitter::SubRegSize::i8Bit, PRED_TMP_16B, ARMEmitter::PredicatePattern::SVE_VL16);
    }

    // Fill in the predicate register for the x87 ldst SVE optimization.
    ptrue(ARMEmitter::SubRegSize::i16Bit, PRED_X87_SVEOPT, ARMEmitter::PredicatePattern::SVE_VL5);
  }
}

void Arm64Emitter::SpillStaticRegs(ARMEmitter::Register TmpReg, SpillStaticRegOptions Options) {
#ifndef VIXL_SIMULATOR
  if (EmitterCTX->HostFeatures.SupportsAFP) {
    // Disable AFP features when spilling registers.
    //
    // Disable FPCR.NEP and FPCR.AH and FPCR.FIZ
    // NEP(2): Changes ASIMD scalar instructions to insert in to the lower bits of the destination.
    // AH(1):  Changes NaN behaviour in some instructions. Specifically fmin, fmax.
    //         Also interacts with RPRES to change reciprocal/rsqrt precision from 8-bit mantissa to 12-bit.
    //
    // Additional interesting AFP bits:
    // FIZ(0): Flush Inputs to Zero
    mrs(TmpReg, ARMEmitter::SystemRegister::FPCR);
    bic(ARMEmitter::Size::i64Bit, TmpReg, TmpReg,
        (1U << 2) |   // NEP
          (1U << 1) | // AH
          (1U << 0)); // FIZ
    msr(ARMEmitter::SystemRegister::FPCR, TmpReg);
  }
#endif

  if (Options.NZCV) {
    // Regardless of what GPRs/FPRs we're spilling, we need to spill NZCV since it
    // is always static and almost certainly clobbered by the subsequent code.
    //
    // TODO: Can we prove that NZCV is not used across a call in some cases and
    // omit this? Might help x87 perf? Future idea.
    mrs(TmpReg, ARMEmitter::SystemRegister::NZCV);
    str(TmpReg.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
  }

  // PF/AF are special, remove them from the mask
  uint32_t PFAFMask = ((1u << REG_PF.Idx()) | ((1u << REG_AF.Idx())));
  unsigned PFAFSpillMask = Options.GPRSpillMask & PFAFMask;
  Options.GPRSpillMask &= ~PFAFSpillMask;

  str(REG_CALLRET_SP, STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.callret_sp));

  for (size_t i = 0; i < StaticRegisters.size(); i += 2) {
    auto Reg1 = StaticRegisters[i];
    auto Reg2 = StaticRegisters[i + 1];
    if (((1U << Reg1.Idx()) & Options.GPRSpillMask) && ((1U << Reg2.Idx()) & Options.GPRSpillMask)) {
      stp<ARMEmitter::IndexType::OFFSET>(Reg1.X(), Reg2.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i));
    } else if (((1U << Reg1.Idx()) & Options.GPRSpillMask)) {
      str(Reg1.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i));
    } else if (((1U << Reg2.Idx()) & Options.GPRSpillMask)) {
      str(Reg2.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i + 1));
    }
  }

  // Now handle PF/AF
  if (Options.NZCV && PFAFSpillMask) {
    auto PFOffset = offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw);
    auto AFOffset = offsetof(FEXCore::Core::CpuStateFrame, State.af_raw);
    LOGMAN_THROW_A_FMT(PFAFSpillMask == PFAFMask, "PF/AF not spilled together");
    LOGMAN_THROW_A_FMT(AFOffset == PFOffset + 4, "PF/AF are together");

    stp<ARMEmitter::IndexType::OFFSET>(REG_PF.W(), REG_AF.W(), STATE.R(), PFOffset);
  }

  if (Options.FPRs) {
    if (EmitterCTX->HostFeatures.SupportsAVX && EmitterCTX->HostFeatures.SupportsSVE256) {
      for (size_t i = 0; i < StaticFPRegisters.size(); i++) {
        const auto Reg = StaticFPRegisters[i];

        if (((1U << Reg.Idx()) & Options.FPRSpillMask) != 0) {
          mov(ARMEmitter::Size::i64Bit, TmpReg, ARRAY_OFFSETOF(Core::CpuStateFrame, State.xmm.avx.data, i));
          st1b<ARMEmitter::SubRegSize::i8Bit>(Reg.Z(), PRED_TMP_32B, STATE.R(), TmpReg);
        }
      }
    } else {
      if (Options.GPRSpillMask && Options.FPRSpillMask == ~0U) {
        // Optimize the common case where we can spill four registers per instruction
        // Load the sse offset in to the temporary register
        add(ARMEmitter::Size::i64Bit, TmpReg, STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data));
        for (size_t i = 0; i < StaticFPRegisters.size(); i += 4) {
          const auto Reg1 = StaticFPRegisters[i];
          const auto Reg2 = StaticFPRegisters[i + 1];
          const auto Reg3 = StaticFPRegisters[i + 2];
          const auto Reg4 = StaticFPRegisters[i + 3];
          st1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), TmpReg, 64);
        }
      } else {
        for (size_t i = 0; i < StaticFPRegisters.size(); i += 2) {
          const auto Reg1 = StaticFPRegisters[i];
          const auto Reg2 = StaticFPRegisters[i + 1];

          if (((1U << Reg1.Idx()) & Options.FPRSpillMask) && ((1U << Reg2.Idx()) & Options.FPRSpillMask)) {
            stp<ARMEmitter::IndexType::OFFSET>(Reg1.Q(), Reg2.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i));
          } else if (((1U << Reg1.Idx()) & Options.FPRSpillMask)) {
            str(Reg1.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i));
          } else if (((1U << Reg2.Idx()) & Options.FPRSpillMask)) {
            str(Reg2.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i + 1));
          }
        }
      }
    }
  }
}

void Arm64Emitter::FillStaticRegs(FillStaticRegOptions Options) {
  auto FindTempReg = [this](uint32_t* GPRFillMask) -> std::optional<ARMEmitter::Register> {
    for (auto Reg : StaticRegisters) {
      if (((1U << Reg.Idx()) & *GPRFillMask)) {
        *GPRFillMask &= ~(1U << Reg.Idx());
        return std::make_optional(Reg);
      }
    }
    return std::nullopt;
  };

  LOGMAN_THROW_A_FMT(Options.GPRFillMask != 0, "Must fill at least 2 GPRs for a temp");
  uint32_t TempGPRFillMask = Options.GPRFillMask;
  if (!Options.OptionalReg.has_value()) {
    Options.OptionalReg = FindTempReg(&TempGPRFillMask);
  }

  if (!Options.OptionalReg2.has_value()) {
    Options.OptionalReg2 = FindTempReg(&TempGPRFillMask);
  }
  LOGMAN_THROW_A_FMT(Options.OptionalReg.has_value() && Options.OptionalReg2.has_value(), "Didn't have an SRA register to use as a "
                                                                                          "temporary while "
                                                                                          "spilling!");

  auto TmpReg = *Options.OptionalReg;
  auto TmpReg2 = *Options.OptionalReg2;

#ifdef ARCHITECTURE_arm64ec
  // Load STATE in from the CPU area as x28 is not callee saved in the ARM64EC ABI.
  ldr(TmpReg.X(), ARMEmitter::Reg::r18, TEB_CPU_AREA_OFFSET);
  ldr(STATE, TmpReg, CPU_AREA_EMULATOR_DATA_OFFSET);
#endif

  ldr(REG_CALLRET_SP, STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.callret_sp));

  if (Options.NZCV) {
    // Regardless of what GPRs/FPRs we're filling, we need to fill NZCV since it
    // is always static and was almost certainly clobbered.
    //
    // TODO: Can we prove that NZCV is not used across a call in some cases and
    // omit this? Might help x87 perf? Future idea.
    ldr(TmpReg.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    msr(ARMEmitter::SystemRegister::NZCV, TmpReg);
  }

  FillSpecialRegs(TmpReg, TmpReg2, true, Options.FPRs);

  if (Options.FPRs) {
    if (EmitterCTX->HostFeatures.SupportsAVX && EmitterCTX->HostFeatures.SupportsSVE256) {
      for (size_t i = 0; i < StaticFPRegisters.size(); i++) {
        const auto Reg = StaticFPRegisters[i];
        if (((1U << Reg.Idx()) & Options.FPRFillMask) != 0) {
          mov(ARMEmitter::Size::i64Bit, TmpReg, ARRAY_OFFSETOF(Core::CpuStateFrame, State.xmm.avx.data, i));
          ld1b<ARMEmitter::SubRegSize::i8Bit>(Reg.Z(), PRED_TMP_32B.Zeroing(), STATE.R(), TmpReg);
        }
      }
    } else {
      if (Options.GPRFillMask && Options.FPRFillMask == ~0U) {
        // Optimize the common case where we can fill four registers per instruction.
        // Use one of the filling static registers before we fill it.
        // Load the sse offset in to the temporary register
        add(ARMEmitter::Size::i64Bit, TmpReg, STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.xmm.sse.data));
        for (size_t i = 0; i < StaticFPRegisters.size(); i += 4) {
          const auto Reg1 = StaticFPRegisters[i];
          const auto Reg2 = StaticFPRegisters[i + 1];
          const auto Reg3 = StaticFPRegisters[i + 2];
          const auto Reg4 = StaticFPRegisters[i + 3];
          ld1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), TmpReg, 64);
        }
      } else {
        for (size_t i = 0; i < StaticFPRegisters.size(); i += 2) {
          const auto Reg1 = StaticFPRegisters[i];
          const auto Reg2 = StaticFPRegisters[i + 1];

          if (((1U << Reg1.Idx()) & Options.FPRFillMask) && ((1U << Reg2.Idx()) & Options.FPRFillMask)) {
            ldp<ARMEmitter::IndexType::OFFSET>(Reg1.Q(), Reg2.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i));
          } else if (((1U << Reg1.Idx()) & Options.FPRFillMask)) {
            ldr(Reg1.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i));
          } else if (((1U << Reg2.Idx()) & Options.FPRFillMask)) {
            ldr(Reg2.Q(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.xmm.sse.data, i + 1));
          }
        }
      }
    }
  }

  // PF/AF are special, remove them from the mask
  uint32_t PFAFMask = ((1u << REG_PF.Idx()) | ((1u << REG_AF.Idx())));
  uint32_t PFAFFillMask = Options.GPRFillMask & PFAFMask;
  Options.GPRFillMask &= ~PFAFMask;

  for (size_t i = 0; i < StaticRegisters.size(); i += 2) {
    auto Reg1 = StaticRegisters[i];
    auto Reg2 = StaticRegisters[i + 1];
    if (((1U << Reg1.Idx()) & Options.GPRFillMask) && ((1U << Reg2.Idx()) & Options.GPRFillMask)) {
      ldp<ARMEmitter::IndexType::OFFSET>(Reg1.X(), Reg2.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i));
    } else if ((1U << Reg1.Idx()) & Options.GPRFillMask) {
      ldr(Reg1.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i));
    } else if ((1U << Reg2.Idx()) & Options.GPRFillMask) {
      ldr(Reg2.X(), STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, State.gregs, i + 1));
    }
  }

  // Now handle PF/AF
  if (Options.NZCV && PFAFFillMask) {
    LOGMAN_THROW_A_FMT(PFAFFillMask == PFAFMask, "PF/AF not filled together");

    ldp<ARMEmitter::IndexType::OFFSET>(REG_PF.W(), REG_AF.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.pf_raw));
  }
}

void Arm64Emitter::PushVectorRegisters(ARMEmitter::Register TmpReg, bool SVE256Regs, std::span<const ARMEmitter::VRegister> VRegs) {
  if (SVE256Regs) {
    size_t i = 0;

    for (; i < (VRegs.size() % 4); i += 2) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      st2b(Reg1.Z(), Reg2.Z(), PRED_TMP_32B, TmpReg, 0);
      add(ARMEmitter::Size::i64Bit, TmpReg, TmpReg, 32 * 2);
    }

    for (; i < VRegs.size(); i += 4) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      const auto Reg3 = VRegs[i + 2];
      const auto Reg4 = VRegs[i + 3];
      st4b(Reg1.Z(), Reg2.Z(), Reg3.Z(), Reg4.Z(), PRED_TMP_32B, TmpReg, 0);
      add(ARMEmitter::Size::i64Bit, TmpReg, TmpReg, 32 * 4);
    }
  } else {
    size_t i = 0;
    for (; i < (VRegs.size() % 4); i += 2) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      st1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), TmpReg, 32);
    }

    for (; i < VRegs.size(); i += 4) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      const auto Reg3 = VRegs[i + 2];
      const auto Reg4 = VRegs[i + 3];
      st1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), TmpReg, 64);
    }
  }
}

void Arm64Emitter::PushGeneralRegisters(ARMEmitter::Register TmpReg, std::span<const ARMEmitter::Register> Regs) {
  size_t i = 0;
  for (; i < (Regs.size() % 2); ++i) {
    const auto Reg1 = Regs[i];
    str<ARMEmitter::IndexType::POST>(Reg1.X(), TmpReg, 16);
  }

  for (; i < Regs.size(); i += 2) {
    const auto Reg1 = Regs[i];
    const auto Reg2 = Regs[i + 1];
    stp<ARMEmitter::IndexType::POST>(Reg1.X(), Reg2.X(), TmpReg, 16);
  }
}

void Arm64Emitter::PopVectorRegisters(bool SVE256Regs, std::span<const ARMEmitter::VRegister> VRegs) {
  if (SVE256Regs) {
    size_t i = 0;
    for (; i < (VRegs.size() % 4); i += 2) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      ld2b(Reg1.Z(), Reg2.Z(), PRED_TMP_32B.Zeroing(), ARMEmitter::Reg::rsp);
      add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, 32 * 2);
    }

    for (; i < VRegs.size(); i += 4) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      const auto Reg3 = VRegs[i + 2];
      const auto Reg4 = VRegs[i + 3];
      ld4b(Reg1.Z(), Reg2.Z(), Reg3.Z(), Reg4.Z(), PRED_TMP_32B.Zeroing(), ARMEmitter::Reg::rsp);
      add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, 32 * 4);
    }
  } else {
    size_t i = 0;
    for (; i < (VRegs.size() % 4); i += 2) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      ld1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), ARMEmitter::Reg::rsp, 32);
    }

    for (; i < VRegs.size(); i += 4) {
      const auto Reg1 = VRegs[i];
      const auto Reg2 = VRegs[i + 1];
      const auto Reg3 = VRegs[i + 2];
      const auto Reg4 = VRegs[i + 3];
      ld1<ARMEmitter::SubRegSize::i64Bit>(Reg1.Q(), Reg2.Q(), Reg3.Q(), Reg4.Q(), ARMEmitter::Reg::rsp, 64);
    }
  }
}

void Arm64Emitter::PopGeneralRegisters(std::span<const ARMEmitter::Register> Regs) {
  size_t i = 0;
  for (; i < (Regs.size() % 2); ++i) {
    const auto Reg1 = Regs[i];
    ldr<ARMEmitter::IndexType::POST>(Reg1.X(), ARMEmitter::Reg::rsp, 16);
  }
  for (; i < Regs.size(); i += 2) {
    const auto Reg1 = Regs[i];
    const auto Reg2 = Regs[i + 1];
    ldp<ARMEmitter::IndexType::POST>(Reg1.X(), Reg2.X(), ARMEmitter::Reg::rsp, 16);
  }
}

size_t Arm64Emitter::PushDynamicRegs(ARMEmitter::Register TmpReg) {
  const auto CanUseSVE256 = EmitterCTX->HostFeatures.SupportsSVE256;
  const auto GPRSize = GeneralRegistersNotPreserved.size() * Core::CPUState::GPR_REG_SIZE;
  const auto FPRRegSize = CanUseSVE256 ? 32 : 16;
  const auto FPRSize = GeneralFPRegisters.size() * FPRRegSize;
  const uint64_t SPOffset = AlignUp(GPRSize + FPRSize, 16);

  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, SPOffset);

  // rsp capable move
  add(ARMEmitter::Size::i64Bit, TmpReg, ARMEmitter::Reg::rsp, 0);

  LOGMAN_THROW_A_FMT(GeneralFPRegisters.size() % 2 == 0, "Needs to have multiple of 2 FPRs for RA");

  // Push the vector registers
  PushVectorRegisters(TmpReg, CanUseSVE256, GeneralFPRegisters);

  // Push the general registers.
  PushGeneralRegisters(TmpReg, GeneralRegistersNotPreserved);

  return SPOffset;
}

void Arm64Emitter::PopDynamicRegs() {
  const auto CanUseSVE256 = EmitterCTX->HostFeatures.SupportsSVE256;

  // Pop vectors first
  PopVectorRegisters(CanUseSVE256, GeneralFPRegisters);

  // Pop GPRs second
  PopGeneralRegisters(GeneralRegistersNotPreserved);
}

size_t Arm64Emitter::SpillForPreserveAllABICall(ARMEmitter::Register TmpReg, bool FPRs) {
  const auto CanUseSVE256 = EmitterCTX->HostFeatures.SupportsSVE256;
  const auto FPRRegSize = CanUseSVE256 ? 32 : 16;

  std::span<const ARMEmitter::Register> DynamicGPRs {};
  std::span<const ARMEmitter::VRegister> DynamicFPRs {};
  uint32_t PreserveSRAMask {};
  uint32_t PreserveSRAFPRMask {};
  if (EmitterCTX->Config.Is64BitMode()) {
    DynamicGPRs = x64::PreserveAll_Dynamic;
    DynamicFPRs = x64::PreserveAll_DynamicFPR;
    PreserveSRAMask = x64::PreserveAll_SRAMask;
    PreserveSRAFPRMask = x64::PreserveAll_SRAFPRMask;

    if (CanUseSVE256) {
      DynamicFPRs = x64::PreserveAll_DynamicFPRSVE;
      PreserveSRAFPRMask = x64::PreserveAll_SRAFPRSVEMask;
    }
  } else {
    DynamicGPRs = x32::PreserveAll_Dynamic;
    DynamicFPRs = x32::PreserveAll_DynamicFPR;
    PreserveSRAMask = x32::PreserveAll_SRAMask;
    PreserveSRAFPRMask = x32::PreserveAll_SRAFPRMask;

    if (CanUseSVE256) {
      DynamicFPRs = x32::PreserveAll_DynamicFPRSVE;
      PreserveSRAFPRMask = x32::PreserveAll_SRAFPRSVEMask;
    }
  }

  const auto GPRSize = AlignUp(DynamicGPRs.size(), 2) * Core::CPUState::GPR_REG_SIZE;
  const auto FPRSize = DynamicFPRs.size() * FPRRegSize;
  const uint64_t SPOffset = AlignUp(GPRSize + FPRSize, 16);

  // Spill the static registers.
  SpillStaticRegs(TmpReg, {
                            .GPRSpillMask = PreserveSRAMask,
                            .FPRSpillMask = PreserveSRAFPRMask,
                          });

  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, SPOffset);

  // rsp capable move
  add(ARMEmitter::Size::i64Bit, TmpReg, ARMEmitter::Reg::rsp, 0);

  // Push the vector registers.
  PushVectorRegisters(TmpReg, CanUseSVE256, DynamicFPRs);

  // Push the general registers.
  PushGeneralRegisters(TmpReg, DynamicGPRs);

  return SPOffset;
}

void Arm64Emitter::FillForPreserveAllABICall(bool FPRs) {
  const auto CanUseSVE256 = EmitterCTX->HostFeatures.SupportsSVE256;

  std::span<const ARMEmitter::Register> DynamicGPRs {};
  std::span<const ARMEmitter::VRegister> DynamicFPRs {};
  uint32_t PreserveSRAMask {};
  uint32_t PreserveSRAFPRMask {};

  if (EmitterCTX->Config.Is64BitMode()) {
    DynamicGPRs = x64::PreserveAll_Dynamic;
    DynamicFPRs = x64::PreserveAll_DynamicFPR;
    PreserveSRAMask = x64::PreserveAll_SRAMask;
    PreserveSRAFPRMask = x64::PreserveAll_SRAFPRMask;

    if (CanUseSVE256) {
      DynamicFPRs = x64::PreserveAll_DynamicFPRSVE;
      PreserveSRAFPRMask = x64::PreserveAll_SRAFPRSVEMask;
    }
  } else {
    DynamicGPRs = x32::PreserveAll_Dynamic;
    DynamicFPRs = x32::PreserveAll_DynamicFPR;
    PreserveSRAMask = x32::PreserveAll_SRAMask;
    PreserveSRAFPRMask = x32::PreserveAll_SRAFPRMask;

    if (CanUseSVE256) {
      DynamicFPRs = x32::PreserveAll_DynamicFPRSVE;
      PreserveSRAFPRMask = x32::PreserveAll_SRAFPRSVEMask;
    }
  }

  // Fill the static registers.
  FillStaticRegs({
    .GPRFillMask = PreserveSRAMask,
    .FPRFillMask = PreserveSRAFPRMask,
    .FPRs = FPRs,
  });

  // Pop the vector registers.
  PopVectorRegisters(CanUseSVE256, DynamicFPRs);

  // Pop the general registers.
  PopGeneralRegisters(DynamicGPRs);
}

void Arm64Emitter::Align16B() {
  uint64_t CurrentOffset = GetCursorAddress<uint64_t>();
  for (uint64_t i = (-CurrentOffset & 0xF); i != 0; i -= 4) {
    nop();
  }
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/ArchHelpers/Arm64Emitter.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Config/Config.h>

#ifdef VIXL_DISASSEMBLER
#include <aarch64/disasm-aarch64.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/vector.h>
#endif
#ifdef VIXL_SIMULATOR
#include <aarch64/simulator-aarch64.h>
#include <aarch64/simulator-constants-aarch64.h>
#endif

#include <CodeEmitter/Emitter.h>
#include <CodeEmitter/Registers.h>

#include <cstddef>
#include <cstdint>
#include <optional>
#include <span>

namespace FEXCore::Context {
class ContextImpl;
}
namespace FEXCore::X86State {
enum X86Reg : uint32_t;
}

namespace FEXCore::CPU {
// Contains the address to the currently available CPU state
constexpr auto STATE = ARMEmitter::XReg::x28;

#ifndef ARCHITECTURE_arm64ec
// GPR temporaries. Only x3 can be used across spill boundaries
// so if these ever need to change, be very careful about that.
constexpr auto TMP1 = ARMEmitter::XReg::x0;
constexpr auto TMP2 = ARMEmitter::XReg::x1;
constexpr auto TMP3 = ARMEmitter::XReg::x2;
constexpr auto TMP4 = ARMEmitter::XReg::x3;
constexpr bool TMP_ABIARGS = true;

// We pin r26/r27 as PF/AF respectively, this is internal FEX ABI.
constexpr auto REG_PF = ARMEmitter::Reg::r26;
constexpr auto REG_AF = ARMEmitter::Reg::r27;

constexpr auto REG_CALLRET_SP = ARMEmitter::XReg::x25;

// Vector temporaries
constexpr auto VTMP1 = ARMEmitter::VReg::v0;
constexpr auto VTMP2 = ARMEmitter::VReg::v1;

// Predicate register for X87 SVE Optimization
constexpr auto SVE_OPT_PRED = ARMEmitter::PReg::p2;

#else
constexpr auto TMP1 = ARMEmitter::XReg::x10;
constexpr auto TMP2 = ARMEmitter::XReg::x11;
constexpr auto TMP3 = ARMEmitter::XReg::x12;
constexpr auto TMP4 = ARMEmitter::XReg::x13;
constexpr bool TMP_ABIARGS = false;

// We pin r11/r12 as PF/AF respectively for arm64ec, as r26/r27 are used for SRA.
constexpr auto REG_PF = ARMEmitter::Reg::r9;
constexpr auto REG_AF = ARMEmitter::Reg::r24;

constexpr auto REG_CALLRET_SP = ARMEmitter::XReg::x17;

// Vector temporaries
constexpr auto VTMP1 = ARMEmitter::VReg::v16;
constexpr auto VTMP2 = ARMEmitter::VReg::v17;

// Entry/Exit ABI
constexpr auto EC_CALL_CHECKER_PC_REG = ARMEmitter::XReg::x9;
constexpr auto EC_ENTRY_CPUAREA_REG = ARMEmitter::XReg::x17;

// Predicate register for X87 SVE Optimization
constexpr auto SVE_OPT_PRED = ARMEmitter::PReg::p2;

// These structures are not included in the standard Windows headers, define the offsets of members we care about for EC here.
constexpr size_t TEB_CPU_AREA_OFFSET = 0x1788;
constexpr size_t TEB_PEB_OFFSET = 0x60;
constexpr size_t PEB_EC_CODE_BITMAP_OFFSET = 0x368;
constexpr size_t CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET = 0x1;
constexpr size_t CPU_AREA_EMULATOR_STACK_BASE_OFFSET = 0x8;
constexpr size_t CPU_AREA_EMULATOR_DATA_OFFSET = 0x30;

constexpr uint64_t EC_CODE_BITMAP_MAX_ADDRESS = 1ULL << 47;
#endif

// Will force one single instruction block to be generated first if set when entering the JIT filling SRA.
// FillStaticRegs must preserve this
constexpr auto ENTRY_FILL_SRA_SINGLE_INST_REG = TMP2;

// Predicate to use in the X87 SVE optimization
constexpr ARMEmitter::PRegister PRED_X87_SVEOPT = ARMEmitter::PReg::p2;

// Predicate register temporaries (used when AVX support is enabled)
// PRED_TMP_16B indicates a predicate register that indicates the first 16 bytes set to 1.
// PRED_TMP_32B indicates a predicate register that indicates the first 32 bytes set to 1.
constexpr ARMEmitter::PRegister PRED_TMP_16B = ARMEmitter::PReg::p6;
constexpr ARMEmitter::PRegister PRED_TMP_32B = ARMEmitter::PReg::p7;


// This class contains common emitter utility functions that can
// be used by both Arm64 JIT and ARM64 Dispatcher
class Arm64Emitter : public ARMEmitter::Emitter {
public:
  Arm64Emitter(FEXCore::Context::ContextImpl* ctx, void* EmissionPtr = nullptr, size_t size = 0);

  enum class PadType {
    // Explicitly does not need padding, even if code-caching is enabled.
    NOPAD,
    // Explicitly needs padding, even if code-caching is disabled.
    DOPAD,
    // Choose to pad or not depending on if code-caching is enabled.
    AUTOPAD,
  };
  void LoadConstant(ARMEmitter::Size s, ARMEmitter::Register Reg, uint64_t Constant, PadType Pad = PadType::NOPAD, int MaxBytes = 0);

protected:
  FEXCore::Context::ContextImpl* EmitterCTX;

  std::span<const ARMEmitter::Register> StaticRegisters {};
  std::span<const ARMEmitter::Register> GeneralRegisters {};
  std::span<const ARMEmitter::Register> GeneralRegistersNotPreserved {};
  std::span<const ARMEmitter::VRegister> StaticFPRegisters {};
  std::span<const ARMEmitter::VRegister> GeneralFPRegisters {};
  uint32_t PairRegisters = 0;

  void FillSpecialRegs(ARMEmitter::Register TmpReg, ARMEmitter::Register TmpReg2, bool SetFIZ, bool SetPredRegs);

  // Correlate an ARM register back to an x86 register index.
  // Returning REG_INVALID if there was no mapping.
  FEXCore::X86State::X86Reg GetX86RegRelationToARMReg(ARMEmitter::Register Reg);

  struct SpillStaticRegOptions final {
    uint32_t GPRSpillMask {~0U};
    uint32_t FPRSpillMask {~0U};
    bool FPRs {true};
    bool NZCV {true};
  };

  struct FillStaticRegOptions final {
    std::optional<ARMEmitter::Register> OptionalReg {std::nullopt};
    std::optional<ARMEmitter::Register> OptionalReg2 {std::nullopt};
    uint32_t GPRFillMask {~0U};
    uint32_t FPRFillMask {~0U};
    bool FPRs {true};
    bool NZCV {true};
  };

  void SpillStaticRegs(ARMEmitter::Register TmpReg, SpillStaticRegOptions Options);
  void FillStaticRegs(FillStaticRegOptions Options);


  void SpillStaticRegs(ARMEmitter::Register TmpReg) {
    // Work around a clang bug: https://bugs.llvm.org/show_bug.cgi?id=36684
    SpillStaticRegs(TmpReg, {});
  }

  void FillStaticRegs() {
    // Work around a clang bug: https://bugs.llvm.org/show_bug.cgi?id=36684
    FillStaticRegs({});
  }

  // Register 0-18 + 29 + 30 are caller saved
  static constexpr uint32_t CALLER_GPR_MASK = 0b0110'0000'0000'0111'1111'1111'1111'1111U;

  // This isn't technically true because the lower 64-bits of v8..v15 are callee saved
  // We can't guarantee only the lower 64bits are used so flush everything
  static constexpr uint32_t CALLER_FPR_MASK = ~0U;

  // Generic push and pop vector registers.
  void PushVectorRegisters(ARMEmitter::Register TmpReg, bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs);
  void PushGeneralRegisters(ARMEmitter::Register TmpReg, std::span<const ARMEmitter::Register> Regs);

  void PopVectorRegisters(bool SVERegs, std::span<const ARMEmitter::VRegister> VRegs);
  void PopGeneralRegisters(std::span<const ARMEmitter::Register> Regs);

  // Returns stack size consumed for pushing dynamic registers.
  size_t PushDynamicRegs(ARMEmitter::Register TmpReg);
  void PopDynamicRegs();

  void PushCalleeSavedRegisters();
  void PopCalleeSavedRegisters();

  // Spills and fills SRA/Dynamic registers that are required for Arm64 `preserve_all` ABI.
  // This ABI changes most registers to be callee saved.
  // Caller Saved:
  // - X0-X8, X16-X18, X30.
  // - v0-v7
  // - For 256-bit SVE hosts: top 128-bits of v8-v31
  //
  // Callee Saved:
  // - X9-X15, X19-X29, X31
  // - Low 128-bits of v8-v31
  size_t SpillForPreserveAllABICall(ARMEmitter::Register TmpReg, bool FPRs = true);
  void FillForPreserveAllABICall(bool FPRs = true);

  size_t SpillForABICall(bool SupportsPreserveAllABI, ARMEmitter::Register TmpReg, bool FPRs = true) {
    if (SupportsPreserveAllABI) {
      return SpillForPreserveAllABICall(TmpReg, FPRs);
    } else {
      SpillStaticRegs(TmpReg, {
                                .FPRs = FPRs,
                              });
      return PushDynamicRegs(TmpReg);
    }
  }

  void FillForABICall(bool SupportsPreserveAllABI, bool FPRs = true) {
    if (SupportsPreserveAllABI) {
      FillForPreserveAllABICall(FPRs);
    } else {
      PopDynamicRegs();
      FillStaticRegs({.FPRs = FPRs});
    }
  }

  void Align16B();

#ifdef VIXL_SIMULATOR
  // Generates a vixl simulator runtime call.
  //
  // This matches behaviour of vixl's macro assembler, but we need to reimplement it since we aren't using the macro assembler.
  // This isn't too complex with how vixl emits this.
  //
  // Emit:
  // 1) hlt(kRuntimeCallOpcode)
  // 2) Simulator wrapper handler
  // 3) Function to call
  // 4) Style of the function call (Call versus tail-call)

  template<typename R, typename... P>
  void GenerateRuntimeCall(R (*Function)(P...)) {
    uintptr_t SimulatorWrapperAddress = reinterpret_cast<uintptr_t>(&(vixl::aarch64::Simulator::RuntimeCallStructHelper<R, P...>::Wrapper));

    uintptr_t FunctionAddress = reinterpret_cast<uintptr_t>(Function);

    hlt(vixl::aarch64::kRuntimeCallOpcode);

    // Simulator wrapper address pointer.
    dc64(SimulatorWrapperAddress);

    // Runtime function address to call
    dc64(FunctionAddress);

    // Call type
    dc32(vixl::aarch64::kCallRuntime);
  }

  template<typename R, typename... P>
  void GenerateIndirectRuntimeCall(ARMEmitter::Register Reg) {
    uintptr_t SimulatorWrapperAddress = reinterpret_cast<uintptr_t>(&(vixl::aarch64::Simulator::RuntimeCallStructHelper<R, P...>::Wrapper));

    hlt(vixl::aarch64::kIndirectRuntimeCallOpcode);

    // Simulator wrapper address pointer.
    dc64(SimulatorWrapperAddress);

    // Register that contains the function to call
    dc32(Reg.Idx());

    // Call type
    dc32(vixl::aarch64::kCallRuntime);
  }

  template<>
  void GenerateIndirectRuntimeCall<float, __uint128_t>(ARMEmitter::Register Reg) {
    uintptr_t SimulatorWrapperAddress =
      reinterpret_cast<uintptr_t>(&(vixl::aarch64::Simulator::RuntimeCallStructHelper<float, __uint128_t>::Wrapper));

    hlt(vixl::aarch64::kIndirectRuntimeCallOpcode);

    // Simulator wrapper address pointer.
    dc64(SimulatorWrapperAddress);

    // Register that contains the function to call
    dc32(Reg.Idx());

    // Call type
    dc32(vixl::aarch64::kCallRuntime);
  }
#else
  template<typename R, typename... P>
  void GenerateRuntimeCall(R (*Function)(P...)) {
    // Explicitly doing nothing.
  }
  template<typename R, typename... P>
  void GenerateIndirectRuntimeCall(ARMEmitter::Register Reg) {
    // Explicitly doing nothing.
  }
#endif

#ifdef VIXL_SIMULATOR
  vixl::aarch64::Decoder SimDecoder;
  vixl::aarch64::Simulator Simulator;
  constexpr static size_t SimulatorStackSize = 8 * 1024 * 1024;
#endif

#ifdef VIXL_DISASSEMBLER
  fextl::vector<char> DisasmBuffer;
  constexpr static int DISASM_BUFFER_SIZE {256};
  fextl::unique_ptr<vixl::aarch64::Disassembler> Disasm;
  fextl::unique_ptr<vixl::aarch64::Decoder> DisasmDecoder;

  FEX_CONFIG_OPT(Disassemble, DISASSEMBLE);
#endif

  FEX_CONFIG_OPT(EnableCodeCaching, ENABLECODECACHINGWIP);
};

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/CPUBackend.cpp
================================================
// SPDX-License-Identifier: MIT
#include "FEXCore/Config/Config.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"

#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/AllocatorHooks.h>
#include <FEXCore/Utils/PrctlUtils.h>

#include <cstdint>

#ifndef _WIN32
#include <linux/prctl.h>
#include <sys/prctl.h>
#endif

namespace FEXCore {
namespace CPU {

  static constexpr size_t INITIAL_CODE_SIZE = 1024 * 1024 * 16;
  // We don't want to move above 128MB atm because that means we will have to encode longer jumps
  static constexpr size_t MAX_CODE_SIZE = 1024 * 1024 * 128;

  constexpr static uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX][2] = {
    {0x0003'0002'0001'0000ULL, 0x0007'0006'0005'0004ULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX
    {0x000B'000A'0009'0008ULL, 0x000F'000E'000D'000CULL}, // NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER
    {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT
    {0x0000'0000'8000'0000ULL, 0x0000'0000'8000'0000ULL}, // NAMED_VECTOR_PADDSUBPS_INVERT_UPPER
    {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT
    {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'0000ULL}, // NAMED_VECTOR_PADDSUBPD_INVERT_UPPER
    {0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPS_INVERT
    {0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPS_INVERT_UPPER
    {0x0000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPD_INVERT
    {0x0000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_PSUBADDPD_INVERT_UPPER
    {0x0000'0001'0000'0000ULL, 0x0000'0003'0000'0002ULL}, // NAMED_VECTOR_MOVMSKPS_SHIFT
    {0x040B'0E01'0B0E'0104ULL, 0x0C03'0609'0306'090CULL}, // NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE
    {0x0706'0504'FFFF'FFFFULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0110B
    {0x0706'0504'0302'0100ULL, 0xFFFF'FFFF'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_0111B
    {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1001B
    {0x0706'0504'0302'0100ULL, 0x0F0E'0D0C'FFFF'FFFFULL}, // NAMED_VECTOR_BLENDPS_1011B
    {0xFFFF'FFFF'0302'0100ULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1101B
    {0x0706'0504'FFFF'FFFFULL, 0x0F0E'0D0C'0B0A'0908ULL}, // NAMED_VECTOR_BLENDPS_1110B
    {0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB
    {0x8040'2010'0804'0201ULL, 0x8040'2010'0804'0201ULL}, // NAMED_VECTOR_MOVMASKB_UPPER
    {0x8000'0000'0000'0000ULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_ONE
    {0xD49A'784B'CD1B'8AFEULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_LOG2_10
    {0xB8AA'3B29'5C17'F0BCULL, 0x0000'0000'0000'3FFFULL}, // NAMED_VECTOR_X87_LOG2_E
    {0xC90F'DAA2'2168'C235ULL, 0x0000'0000'0000'4000ULL}, // NAMED_VECTOR_X87_PI
    {0x9A20'9A84'FBCF'F799ULL, 0x0000'0000'0000'3FFDULL}, // NAMED_VECTOR_X87_LOG10_2
    {0xB172'17F7'D1CF'79ACULL, 0x0000'0000'0000'3FFEULL}, // NAMED_VECTOR_X87_LOG_2
    {0x4F00'0000'4F00'0000ULL, 0x4F00'0000'4F00'0000ULL}, // NAMED_VECTOR_CVTMAX_F32_I32
    {0x4F00'0000'4F00'0000ULL, 0x4F00'0000'4F00'0000ULL}, // NAMED_VECTOR_CVTMAX_F32_I32_UPPER
    {0x5F00'0000'5F00'0000ULL, 0x5F00'0000'5F00'0000ULL}, // NAMED_VECTOR_CVTMAX_F32_I64
    {0x41E0'0000'0000'0000ULL, 0x41E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I32
    {0x41E0'0000'0000'0000ULL, 0x41E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I32_UPPER
    {0x43E0'0000'0000'0000ULL, 0x43E0'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_F64_I64
    {0x8000'0000'8000'0000ULL, 0x8000'0000'8000'0000ULL}, // NAMED_VECTOR_CVTMAX_I32
    {0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL}, // NAMED_VECTOR_CVTMAX_I64
    {0x0000'0000'0000'0000ULL, 0x0000'0000'0000'8000ULL}, // NAMED_VECTOR_F80_SIGN_MASK
    {0x5A82'7999'5A82'7999ULL, 0x5A82'7999'5A82'7999ULL}, // NAMED_VECTOR_SHA1RNDS_K0
    {0x6ED9'EBA1'6ED9'EBA1ULL, 0x6ED9'EBA1'6ED9'EBA1ULL}, // NAMED_VECTOR_SHA1RNDS_K1
    {0x8F1B'BCDC'8F1B'BCDCULL, 0x8F1B'BCDC'8F1B'BCDCULL}, // NAMED_VECTOR_SHA1RNDS_K2
    {0xCA62'C1D6'CA62'C1D6ULL, 0xCA62'C1D6'CA62'C1D6ULL}, // NAMED_VECTOR_SHA1RNDS_K3
  };

  constexpr static auto PSHUFLW_LUT {[]() consteval {
    struct LUTType {
      uint64_t Val[2];
    };
    // Expectation for this LUT is to simulate PSHUFLW with ARM's TBL (single register) instruction
    // PSHUFLW behaviour:
    // 16-bit words in [63:48], [47:32], [31:16], [15:0] are selected using the 8-bit Index.
    // For 128-bit PSHUFLW, bits [127:64] are identity copied.
    constexpr uint64_t IdentityCopyUpper = 0x0f'0e'0d'0c'0b'0a'09'08;
    std::array<LUTType, 256> TotalLUT {};
    uint64_t WordSelection[4] = {
      0x01'00,
      0x03'02,
      0x05'04,
      0x07'06,
    };
    for (size_t i = 0; i < 256; ++i) {
      auto& LUT = TotalLUT[i];
      const auto Word0 = (i >> 0) & 0b11;
      const auto Word1 = (i >> 2) & 0b11;
      const auto Word2 = (i >> 4) & 0b11;
      const auto Word3 = (i >> 6) & 0b11;

      LUT.Val[0] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 16) | (WordSelection[Word2] << 32) | (WordSelection[Word3] << 48);

      LUT.Val[1] = IdentityCopyUpper;
    }
    return TotalLUT;
  }()};

  constexpr static auto PSHUFHW_LUT {[]() consteval {
    struct LUTType {
      uint64_t Val[2];
    };
    // Expectation for this LUT is to simulate PSHUFHW with ARM's TBL (single register) instruction
    // PSHUFHW behaviour:
    // 16-bit words in [127:112], [111:96], [95:80], [79:64] are selected using the 8-bit Index.
    // Incoming words come from bits [127:64] of the source.
    // Bits [63:0] are identity copied.
    constexpr uint64_t IdentityCopyLower = 0x07'06'05'04'03'02'01'00;
    std::array<LUTType, 256> TotalLUT {};
    uint64_t WordSelection[4] = {
      0x09'08,
      0x0b'0a,
      0x0d'0c,
      0x0f'0e,
    };
    for (size_t i = 0; i < 256; ++i) {
      auto& LUT = TotalLUT[i];
      const auto Word0 = (i >> 0) & 0b11;
      const auto Word1 = (i >> 2) & 0b11;
      const auto Word2 = (i >> 4) & 0b11;
      const auto Word3 = (i >> 6) & 0b11;

      LUT.Val[0] = IdentityCopyLower;

      LUT.Val[1] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 16) | (WordSelection[Word2] << 32) | (WordSelection[Word3] << 48);
    }
    return TotalLUT;
  }()};

  constexpr static auto PSHUFD_LUT {[]() consteval {
    struct LUTType {
      uint64_t Val[2];
    };
    // Expectation for this LUT is to simulate PSHUFD with ARM's TBL (single register) instruction
    // PSHUFD behaviour:
    // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index.
    std::array<LUTType, 256> TotalLUT {};
    uint64_t WordSelection[4] = {
      0x03'02'01'00,
      0x07'06'05'04,
      0x0b'0a'09'08,
      0x0f'0e'0d'0c,
    };
    for (size_t i = 0; i < 256; ++i) {
      auto& LUT = TotalLUT[i];
      const auto Word0 = (i >> 0) & 0b11;
      const auto Word1 = (i >> 2) & 0b11;
      const auto Word2 = (i >> 4) & 0b11;
      const auto Word3 = (i >> 6) & 0b11;

      LUT.Val[0] = (WordSelection[Word0] << 0) | (WordSelection[Word1] << 32);

      LUT.Val[1] = (WordSelection[Word2] << 0) | (WordSelection[Word3] << 32);
    }
    return TotalLUT;
  }()};

  constexpr static auto SHUFPS_LUT {[]() consteval {
    struct LUTType {
      uint64_t Val[2];
    };
    // 32-bit words in [127:96], [95:64], [63:32], [31:0] are selected using the 8-bit Index.
    // Expectation for this LUT is to simulate SHUFPS with ARM's TBL (two register) instruction.
    // SHUFPS behaviour:
    // Two 32-bits words from each source are selected from each source in the lower and upper halves of the 128-bit destination.
    // Dest[31:0]   = Src1[<Word0>]
    // Dest[63:32]  = Src1[<Word1>]
    // Dest[95:64]  = Src2[<Word2>]
    // Dest[127:96] = Src2[<Word3>]

    std::array<LUTType, 256> TotalLUT {};
    const uint64_t WordSelectionSrc1[4] = {
      0x03'02'01'00,
      0x07'06'05'04,
      0x0b'0a'09'08,
      0x0f'0e'0d'0c,
    };

    // Src2 needs to offset each byte index by 16-bytes to pull from the second source.
    const uint64_t WordSelectionSrc2[4] = {
      0x03'02'01'00 + (0x10101010),
      0x07'06'05'04 + (0x10101010),
      0x0b'0a'09'08 + (0x10101010),
      0x0f'0e'0d'0c + (0x10101010),
    };

    for (size_t i = 0; i < 256; ++i) {
      auto& LUT = TotalLUT[i];
      const auto Word0 = (i >> 0) & 0b11;
      const auto Word1 = (i >> 2) & 0b11;
      const auto Word2 = (i >> 4) & 0b11;
      const auto Word3 = (i >> 6) & 0b11;

      LUT.Val[0] = (WordSelectionSrc1[Word0] << 0) | (WordSelectionSrc1[Word1] << 32);

      LUT.Val[1] = (WordSelectionSrc2[Word2] << 0) | (WordSelectionSrc2[Word3] << 32);
    }
    return TotalLUT;
  }()};

  constexpr static auto DPPS_MASK {[]() consteval {
    struct LUTType {
      uint32_t Val[4];
    };

    std::array<LUTType, 16> TotalLUT {};
    for (size_t i = 0; i < TotalLUT.size(); ++i) {
      auto& LUT = TotalLUT[i];
      constexpr auto GetLUT = [](size_t i, size_t Index) {
        if (i & (1U << Index)) {
          return -1U;
        }
        return 0U;
      };

      LUT.Val[0] = GetLUT(i, 0);
      LUT.Val[1] = GetLUT(i, 1);
      LUT.Val[2] = GetLUT(i, 2);
      LUT.Val[3] = GetLUT(i, 3);
    }
    return TotalLUT;
  }()};

  constexpr static auto DPPD_MASK {[]() consteval {
    struct LUTType {
      uint64_t Val[2];
    };

    std::array<LUTType, 4> TotalLUT {};
    for (size_t i = 0; i < TotalLUT.size(); ++i) {
      auto& LUT = TotalLUT[i];
      constexpr auto GetLUT = [](size_t i, size_t Index) {
        if (i & (1U << Index)) {
          return -1ULL;
        }
        return 0ULL;
      };

      LUT.Val[0] = GetLUT(i, 0);
      LUT.Val[1] = GetLUT(i, 1);
    }
    return TotalLUT;
  }()};

  constexpr static auto PBLENDW_LUT {[]() consteval {
    struct LUTType {
      uint16_t Val[8];
    };
    // 16-bit words in [127:112], [111:96], [95:80], [79:64], [63:48], [47:32], [31:16], [15:0] are selected using 8-bit swizzle.
    // Expectation for this LUT is to simulate PBLENDW with ARM's TBX (one register) instruction.
    // PBLENDW behaviour:
    // 16-bit words from the source is moved in to the destination based on the bit in the swizzle.
    // Dest[15:0]    = Swizzle[0] ? Src[15:0] : Dest[15:0]
    // Dest[31:16]   = Swizzle[1] ? Src[31:16] : Dest[31:16]
    // Dest[47:32]   = Swizzle[2] ? Src[47:32] : Dest[47:32]
    // Dest[63:48]   = Swizzle[3] ? Src[63:48] : Dest[63:48]
    // Dest[79:64]   = Swizzle[4] ? Src[79:64] : Dest[79:64]
    // Dest[95:80]   = Swizzle[5] ? Src[95:80] : Dest[95:80]
    // Dest[111:96]  = Swizzle[6] ? Src[111:96] : Dest[111:96]
    // Dest[127:112] = Swizzle[7] ? Src[127:112] : Dest[127:112]

    std::array<LUTType, 256> TotalLUT {};
    const uint16_t WordSelectionSrc[8] = {
      0x01'00, 0x03'02, 0x05'04, 0x07'06, 0x09'08, 0x0B'0A, 0x0D'0C, 0x0F'0E,
    };

    constexpr uint16_t OriginalDest = 0xFF'FF;

    for (size_t i = 0; i < 256; ++i) {
      auto& LUT = TotalLUT[i];
      for (size_t j = 0; j < 8; ++j) {
        LUT.Val[j] = ((i >> j) & 1) ? WordSelectionSrc[j] : OriginalDest;
      }
    }
    return TotalLUT;
  }()};

  CPUBackend::CPUBackend(CodeBufferManager& CodeBuffers, FEXCore::Core::InternalThreadState* ThreadState)
    : ThreadState(ThreadState)
    , CodeBuffers(CodeBuffers) {

    auto& Ptrs = ThreadState->CurrentFrame->Pointers;

    // Initialize named vector constants.
    for (size_t i = 0; i < FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX; ++i) {
      Ptrs.NamedVectorConstantPointers[i] = reinterpret_cast<uint64_t>(NamedVectorConstants[i]);
    }

    // Copy named vector constants.
    memcpy(Ptrs.NamedVectorConstants, NamedVectorConstants, sizeof(NamedVectorConstants));

    // Initialize Indexed named vector constants.
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW] =
      reinterpret_cast<uint64_t>(PSHUFLW_LUT.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW] =
      reinterpret_cast<uint64_t>(PSHUFHW_LUT.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD] =
      reinterpret_cast<uint64_t>(PSHUFD_LUT.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS] =
      reinterpret_cast<uint64_t>(SHUFPS_LUT.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPS_MASK] =
      reinterpret_cast<uint64_t>(DPPS_MASK.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK] =
      reinterpret_cast<uint64_t>(DPPD_MASK.data());
    Ptrs.IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW] =
      reinterpret_cast<uint64_t>(PBLENDW_LUT.data());

#ifndef FEX_DISABLE_TELEMETRY
    // Fill in telemetry values
    for (size_t i = 0; i < FEXCore::Telemetry::TYPE_LAST; ++i) {
      auto& Telem = FEXCore::Telemetry::GetTelemetryValue(static_cast<FEXCore::Telemetry::TelemetryType>(i));
      Ptrs.TelemetryValueAddresses[i] = reinterpret_cast<uint64_t>(&Telem);
    }
#endif
  }

  CPUBackend::~CPUBackend() = default;

  auto CPUBackend::GetEmptyCodeBuffer() -> CodeBuffer* {
    auto PrevCodeBuffer = CurrentCodeBuffer;

    // Resize the code buffer and reallocate our code size
    CurrentCodeBuffer = CodeBuffers.StartLargerCodeBuffer();

    RegisterForSignalHandler(std::move(PrevCodeBuffer));
    return CurrentCodeBuffer.get();
  }

  void CPUBackend::RegisterForSignalHandler(fextl::shared_ptr<CodeBuffer> CodeBuffer) {
    if (ThreadState->CurrentFrame->SignalHandlerRefCounter != 0) {
      // We have signal handlers that have generated code
      // This means that we can not safely clear the code at this point in time
      // Keep a reference to the old code buffer to delay deallocation
      SignalHandlerCodeBuffers.push_back(std::move(CodeBuffer));
    } else {
      SignalHandlerCodeBuffers.clear();
    }
  }

  fextl::shared_ptr<CodeBuffer> CPUBackend::CheckCodeBufferUpdate() {
    auto NewCodeBuffer = CodeBuffers.GetLatest();
    if (CurrentCodeBuffer != NewCodeBuffer) {
      RegisterForSignalHandler(CurrentCodeBuffer);
      return std::exchange(CurrentCodeBuffer, NewCodeBuffer);
    }
    return nullptr;
  }

  GuestToHostMap& GetLookupCache(const CodeBuffer& Buffer) {
    return *Buffer.LookupCache;
  }

  CodeBuffer::CodeBuffer(size_t Size)
    : AllocatedSize(Size) {
    Ptr = static_cast<uint8_t*>(FEXCore::Allocator::VirtualAlloc(Size, true));
    LOGMAN_THROW_A_FMT(!!Ptr, "Couldn't allocate code buffer");

    // Protect the last page of the allocated buffer to trigger SIGSEGV on write access
    uintptr_t LastPageAddr = AlignDown(reinterpret_cast<uintptr_t>(Ptr) + Size - 1, FEXCore::Utils::FEX_PAGE_SIZE);
    if (!FEXCore::Allocator::VirtualProtect(reinterpret_cast<void*>(LastPageAddr), FEXCore::Utils::FEX_PAGE_SIZE,
                                            FEXCore::Allocator::ProtectOptions::None)) {
      LogMan::Msg::EFmt("Failed to mprotect last page of code buffer.");
    }

    FEXCore::Allocator::VirtualName("FEXMemJIT", reinterpret_cast<void*>(Ptr), Size);

    // Huge-pages reduce the amount of iTLB misses dramatically when it works.
    FEXCore::Allocator::VirtualTHPControl(reinterpret_cast<void*>(Ptr), Size, FEXCore::Allocator::THPControl::Enable);

    LookupCache = fextl::make_unique<GuestToHostMap>();
  }

  CodeBuffer::~CodeBuffer() {
    FEXCore::Allocator::VirtualFree(Ptr, AllocatedSize);
  }

  auto CodeBufferManager::AllocateNew(size_t Size) -> fextl::shared_ptr<CodeBuffer> {
#ifndef _WIN32
// MDWE (Memory-Deny-Write-Execute) is a new Linux 6.3 feature.
// It's equivalent to systemd's `MemoryDenyWriteExecute` but implemented entirely in the kernel.
//
// MDWE prevents applications from creating RWX memory mappings.
// This prevents FEX from doing anything JIT related, as FEX uses RWX for JIT memory mappings.
//
// A potential workaround to make FEX work with MDWE is to call mprotect every time we need to write or modify code.
// Alternatively, FEX could use a memory mirror where one half is mapped as RW and the other is RX.
//
// Once MDWE is enabled with the prctl, the feature is sealed and it can /NOT/ be turned off.
//
// Status of MDWE is queried through prctl using `PR_GET_MDWE`:
// -1: The kernel doesn't support MDWE
// 0: MDWE is supported but disabled
// >0: MDWE is enabled, hence prohibiting RWX mappings
#ifndef PR_GET_MDWE
#define PR_GET_MDWE 66
#endif
    int MDWE = ::prctl(PR_GET_MDWE, 0, 0, 0, 0);
    if (MDWE != -1 && MDWE != 0) {
      LogMan::Msg::EFmt("MDWE was set to 0x{:x} which means FEX can't allocate executable memory", MDWE);
    }
#endif

    auto Buffer = fextl::make_shared<CodeBuffer>(Size);

    Latest = Buffer;
    LatestOffset = 0;

    OnCodeBufferAllocated(Buffer);

    return Buffer;
  }

  fextl::shared_ptr<CodeBuffer> CodeBufferManager::GetLatest() {
    if (!Latest) {
      if (FEXCore::Config::Get_ENABLECODECACHINGWIP()) {
        // Start with a larger code buffer to avoid resizes that would discard
        // code loaded from caches
        AllocateNew(MAX_CODE_SIZE);
      } else {
        AllocateNew(INITIAL_CODE_SIZE);
      }
    }
    return Latest;
  }

  fextl::shared_ptr<CodeBuffer> CodeBufferManager::StartLargerCodeBuffer() {
    if (!Latest) {
      // Allocate initial CodeBuffer and return it
      return GetLatest();
    }

    auto NewCodeBufferSize = GetLatest()->AllocatedSize;
    NewCodeBufferSize = std::min<size_t>(NewCodeBufferSize * 2, MAX_CODE_SIZE);
    return AllocateNew(NewCodeBufferSize);
  }


  bool CPUBackend::IsAddressInCodeBuffer(uintptr_t Address) const {
    auto CheckCodeBuffer = [](CodeBuffer& Buffer, uintptr_t Address) {
      // The last page of the code buffer is protected, so we need to exclude it from the valid range
      // when checking if the address is in the code buffer.
      uintptr_t LastPageAddr = AlignDown(reinterpret_cast<uintptr_t>(Buffer.Ptr) + Buffer.AllocatedSize - 1, FEXCore::Utils::FEX_PAGE_SIZE);
      return (Address >= reinterpret_cast<uintptr_t>(Buffer.Ptr) && Address < LastPageAddr);
    };

    if (CheckCodeBuffer(*CurrentCodeBuffer, Address)) {
      return true;
    }
    for (auto& Buffer : SignalHandlerCodeBuffers) {
      if (CheckCodeBuffer(*Buffer, Address)) {
        return true;
      }
    }
    return false;
  }

} // namespace CPU
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/CPUBackend.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
category: backend ~ IR to host code generation
tags: backend|shared
$end_info$
*/

#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/fextl/map.h>

#include <cstdint>

namespace FEXCore::CPU {
union Relocation;
}

namespace FEXCore {

namespace IR {
  class IRListView;
} // namespace IR

namespace Core {
  struct DebugData;
  struct ThreadState;
  struct CpuStateFrame;
  struct InternalThreadState;
} // namespace Core

namespace CodeSerialize {
  struct CodeObjectFileSection;
}

struct GuestToHostMap;

namespace CPU {
  struct CodeBuffer {
    uint8_t* Ptr;
    size_t AllocatedSize; // including guard page; see UsableSize()

    fextl::unique_ptr<GuestToHostMap> LookupCache;

    CodeBuffer(size_t Size);
    CodeBuffer(const CodeBuffer&) = delete;
    CodeBuffer& operator=(const CodeBuffer&) = delete;
    CodeBuffer(CodeBuffer&& oth) = delete;
    CodeBuffer& operator=(CodeBuffer&&) = delete;

    ~CodeBuffer();

    /// Returns the number of bytes available for storing code
    size_t UsableSize() const {
      return AllocatedSize - FEXCore::Utils::FEX_PAGE_SIZE;
    }
  };

  /**
   * A manager that coordinates access to the CodeBuffer used for compiling new code across threads.
   *
   * The CodeBuffer is managed as a partially persistent data structure:
   * - Exactly one CodeBuffer is now designated as "active", which means data can be appended to it
   * - Lossy modifications to the active CodeBuffer will not invalidate any data in use by other threads (which is what enables save CodeBuffer sharing across threads)
   * - Instead, such lossy modifications trigger a new "version" of the data in the modifying thread. Old versions of the CodeBuffer persist as read-only data for use by the other threads.
   * - The other threads can update their version of the CodeBuffer. This will decrease the reference count and eventually trigger deallocation of the old version
   */
  class CodeBufferManager {
  public:
    // Get the CodeBuffer that was most recently allocated.
    // This is the only CodeBuffer that data may be written to.
    fextl::shared_ptr<CodeBuffer> GetLatest();

    // Allocate a new CodeBuffer with geometric growth up to an internal maximum.
    // Subsequent calls to GetLatest will point to the returned buffer.
    fextl::shared_ptr<CodeBuffer> StartLargerCodeBuffer();

    // Write offset into the latest CodeBuffer
    std::size_t LatestOffset {};

    // Protects writes to the latest CodeBuffer and changes to LatestOffset
    FEXCore::ForkableUniqueMutex CodeBufferWriteMutex;

    virtual void OnCodeBufferAllocated(const std::shared_ptr<CodeBuffer>&) {};

  private:
    fextl::shared_ptr<CodeBuffer> Latest;

    fextl::shared_ptr<CodeBuffer> AllocateNew(size_t Size);
  };

  class CPUBackend {
  public:

    CPUBackend(CodeBufferManager&, FEXCore::Core::InternalThreadState*);

    virtual ~CPUBackend();

    struct CompiledCode {
      // Where this code block begins.
      uint8_t* BlockBegin;
      fextl::map<uint64_t, uint8_t*> EntryPoints;
      // The total size of the codeblock from [BlockBegin, BlockBegin+Size).
      size_t Size;
    };

    // Header that can live at the start of a JIT block.
    // We want the header to be quite small, with most data living in the tail object.
    struct JITCodeHeader {
      // Offset from the start of this header to where the tail lives.
      // Only 32-bit since the tail block won't ever be more than 4GB away.
      uint32_t OffsetToBlockTail;
    };

    // Header that can live at the end of the JIT block.
    // For any state reconstruction or other data, this is where it should live.
    // Any data that is explicitly tied to the JIT code and needs to be cached with it
    // should end up in this data structure.
    struct JITCodeTail {
      // The total size of the codeblock from [BlockBegin, BlockBegin+Size).
      size_t Size;

      // RIP that the block's entry comes from.
      uint64_t RIP;

      // The length of the guest code for this block.
      size_t GuestSize;

      // Number of RIP entries for this JIT Code section.
      uint32_t NumberOfRIPEntries;

      // Offset after this block to the start of the RIP entries.
      uint32_t OffsetToRIPEntries;

      // Shared-code modification spin-loop futex.
      uint32_t SpinLockFutex;

      // If this block represents a single guest instruction.
      bool SingleInst;

      uint8_t _Pad[3];
    };

    /**
     * @brief Tells this CPUBackend to compile code for the provided IR and DebugData
     *
     * The returned pointer needs to be long lived and be executable in the host environment
     * FEXCore's frontend will store this pointer in to a cache for the current RIP when this was executed
     *
     * This is a thread specific compilation unit since there is one CPUBackend per guest thread
     *
     * @param Size - The byte size of the guest code for this block
     * @param SingleInst - If this block represents a single guest instruction
     * @param IR -  IR that maps to the IR for this RIP
     * @param DebugData - Debug data that is available for this IR indirectly
     * @param CheckTF - If EFLAGS.TF checks should be emitted at the start of the block
     *
     * @return Information about the compiled code block.
     */
    [[nodiscard]]
    virtual CompiledCode CompileCode(uint64_t Entry, uint64_t Size, bool SingleInst, const FEXCore::IR::IRListView* IR,
                                     FEXCore::Core::DebugData* DebugData, bool CheckTF) = 0;

    virtual fextl::vector<FEXCore::CPU::Relocation> TakeRelocations(uint64_t GuestBaseAddress) = 0;

    virtual void ClearCache() {}

    /**
     * @brief Clear any relocations after JIT compiling
     */
    virtual void ClearRelocations() {}

    bool IsAddressInCodeBuffer(uintptr_t Address) const;

    // Updates the CodeBuffer if needed and returns a reference to the old one.
    // The returned reference should be kept alive carefully to avoid early deletion of resources.
    [[nodiscard]]
    fextl::shared_ptr<CodeBuffer> CheckCodeBufferUpdate();

  protected:
    // Max spill slot size in bytes. We need at most 32 bytes
    // to be able to handle a 256-bit vector store to a slot.
    constexpr static uint32_t MaxSpillSlotSize = 32;

    FEXCore::Core::InternalThreadState* ThreadState;

    [[nodiscard]]
    CodeBuffer* GetEmptyCodeBuffer();

    // This is the code buffer containing the main code under execution by this thread.
    // CheckCodeBufferUpdate must be used before compiling new code.
    fextl::shared_ptr<CodeBuffer> CurrentCodeBuffer;

    // Old CodeBuffer generations required to be valid until returning from signal handlers
    fextl::vector<fextl::shared_ptr<CodeBuffer>> SignalHandlerCodeBuffers;

    CodeBufferManager& CodeBuffers;

  private:
    void RegisterForSignalHandler(fextl::shared_ptr<CodeBuffer>);
  };

} // namespace CPU
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/CPUID.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: opcodes|cpuid
desc: Handles presented capability bits for guest cpu
$end_info$
*/

#include "Common/StringConv.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/CPUID.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CPUID.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/string.h>
#include <FEXHeaderUtils/Syscalls.h>

#include "git_version.h"

#include <cstring>

namespace FEXCore {
namespace ProductNames {
#ifdef ARCHITECTURE_arm64
  static const char ARM_UNKNOWN[] = "Unknown ARM CPU";
  static const char ARM_A57[] = "Cortex-A57";
  static const char ARM_A72[] = "Cortex-A72";
  static const char ARM_A73[] = "Cortex-A73";
  static const char ARM_A75[] = "Cortex-A75";
  static const char ARM_A76[] = "Cortex-A76";
  static const char ARM_A76AE[] = "Cortex-A76AE";
  static const char ARM_V1[] = "Neoverse V1";
  static const char ARM_V2[] = "Neoverse V2";
  static const char ARM_V3[] = "Neoverse V3";
  static const char ARM_V3AE[] = "Neoverse V3AE";
  static const char ARM_A77[] = "Cortex-A77";
  static const char ARM_A78[] = "Cortex-A78";
  static const char ARM_A78AE[] = "Cortex-A78AE";
  static const char ARM_A78C[] = "Cortex-A78C";
  static const char ARM_A710[] = "Cortex-A710";
  static const char ARM_A715[] = "Cortex-A715";
  static const char ARM_A720[] = "Cortex-A720";
  static const char ARM_A725[] = "Cortex-A725";
  static const char ARM_C1Pro[] = "C1-Pro";
  static const char ARM_C1Premium[] = "C1-Premium";
  static const char ARM_X1[] = "Cortex-X1";
  static const char ARM_X1C[] = "Cortex-X1C";
  static const char ARM_X2[] = "Cortex-X2";
  static const char ARM_X3[] = "Cortex-X3";
  static const char ARM_X4[] = "Cortex-X4";
  static const char ARM_X925[] = "Cortex-X925";
  static const char ARM_C1Ultra[] = "C1-Ultra";
  static const char ARM_N1[] = "Neoverse N1";
  static const char ARM_N2[] = "Neoverse N2";
  static const char ARM_N3[] = "Neoverse N3";
  static const char ARM_E1[] = "Neoverse E1";
  static const char ARM_A35[] = "Cortex-A35";
  static const char ARM_A53[] = "Cortex-A53";
  static const char ARM_A55[] = "Cortex-A55";
  static const char ARM_A65[] = "Cortex-A65";
  static const char ARM_A510[] = "Cortex-A510";
  static const char ARM_A520[] = "Cortex-A520";
  static const char ARM_C1Nano[] = "C1-Nano";

  static const char ARM_Kryo200[] = "Kryo 2xx";
  static const char ARM_Kryo300[] = "Kryo 3xx";
  static const char ARM_Kryo400[] = "Kryo 4xx/5xx";

  static const char ARM_Kryo200S[] = "Kryo 2xx S";
  static const char ARM_Kryo300S[] = "Kryo 3xx S";
  static const char ARM_Kryo400S[] = "Kryo 4xx/5xx S";

  static const char ARM_Denver[] = "Nvidia Denver";
  static const char ARM_Carmel[] = "Nvidia Carmel";
  static const char ARM_Olympus[] = "Nvidia Olympus";

  static const char ARM_Firestorm_M1[] = "Apple Firestorm (M1)";
  static const char ARM_Icestorm_M1[] = "Apple Icestorm (M1)";
  static const char ARM_Firestorm_M1Pro[] = "Apple Firestorm (M1 Pro)";
  static const char ARM_Icestorm_M1Pro[] = "Apple Icestorm (M1 Pro)";
  static const char ARM_Firestorm_M1Max[] = "Apple Firestorm (M1 Max)";
  static const char ARM_Icestorm_M1Max[] = "Apple Icestorm (M1 Max)";
  static const char ARM_Avalanche_M2[] = "Apple Avalanche (M2)";
  static const char ARM_Blizzard_M2[] = "Apple Blizzard (M2)";
  static const char ARM_Avalanche_M2Pro[] = "Apple Avalanche (M2 Pro)";
  static const char ARM_Blizzard_M2Pro[] = "Apple Blizzard (M2 Pro)";
  static const char ARM_Avalanche_M2Max[] = "Apple Avalanche (M2 Max)";
  static const char ARM_Blizzard_M2Max[] = "Apple Blizzard (M2 Max)";
  static const char ARM_AppleSilicon[] = "Apple Silicon";

  static const char ARM_ORYON_1[] = "Oryon-1";
  static const char ARM_Ampere_1[] = "AmpereOne";
  static const char ARM_Ampere_1A[] = "AmpereOneA";
  static const char ARM_Ampere_1B[] = "AmpereOneB";
  static const char ARM_Ampere_1C[] = "AmpereOneC";
#endif
} // namespace ProductNames

uint32_t GetCPUID_Syscall() {
  uint32_t CPU {};
  FHU::Syscalls::getcpu(&CPU, nullptr);
  return CPU;
}

struct CPUFamily {
  uint32_t Stepping         : 4;
  uint32_t Model            : 4;
  uint32_t ExtendedModel    : 4;
  uint32_t FamilyID         : 4;
  uint32_t ExtendedFamilyID : 8;
  uint32_t ProcessorType    : 4;
};

constexpr static uint32_t GenerateFamily(const CPUFamily Family) {
  return Family.Stepping | (Family.Model << 4) | (Family.FamilyID << 8) | (Family.ProcessorType << 12) | (Family.ExtendedModel << 16) |
         (Family.ExtendedFamilyID << 20);
}

#ifdef CPUID_AMD
constexpr uint32_t FAMILY_IDENTIFIER = GenerateFamily(CPUFamily {
  .Stepping = 0,
  .Model = 0xA,
  .ExtendedModel = 0,
  .FamilyID = 0xF,
  .ExtendedFamilyID = 1,
  .ProcessorType = 0,
});

#else
constexpr uint32_t FAMILY_IDENTIFIER = GenerateFamily(CPUFamily {
  .Stepping = 1,
  .Model = 6,
  .ExtendedModel = 0xA,
  .FamilyID = 6,
  .ExtendedFamilyID = 0,
  .ProcessorType = 0,
});
#endif

#ifdef ARCHITECTURE_arm64
uint64_t GetCycleCounterFrequency() {
  uint64_t Result {};
  __asm("mrs %[Res], CNTFRQ_EL0" : [Res] "=r"(Result));
  return Result;
}

uint32_t GetCPUID_TPIDRRO() {
  uint64_t Result {};
  __asm("mrs %[Res], TPIDRRO_EL0" : [Res] "=r"(Result));
  return Result;
}

void CPUIDEmu::SetupHostHybridFlag() {
  FEX_CONFIG_OPT(HideHybrid, HIDEHYBRID);
  PerCPUData.resize(Cores);

  uint64_t MIDR {};
  for (size_t i = 0; i < Cores; ++i) {
    auto NewMIDR = CTX->HostFeatures.CPUMIDRs[i];
    if (MIDR != 0 && MIDR != NewMIDR) {
      // CPU mismatch, claim hybrid
      Hybrid = true;
    }

    // Truncate to 32-bits, top 32-bits are all reserved in MIDR
    PerCPUData[i].ProductName = ProductNames::ARM_UNKNOWN;
    PerCPUData[i].MIDR = NewMIDR;
    MIDR = NewMIDR;
  }

  if (HideHybrid()) {
    // Hide the hybrid flag.
    Hybrid = false;
  }

  struct CPUMIDR {
    uint8_t Implementer;
    uint16_t Part;
    bool DefaultBig; // Defaults to a big core
    const char* ProductName {};
  };

  // CPU priority order
  // This is mostly arbitrary but will sort by some sort of CPU priority by performance
  // Relative list so things they will commonly end up in big.little configurations sort of relate
  static constexpr std::array<CPUMIDR, 67> CPUMIDRs = {{
    // Typically big CPU cores
    {0x51, 0x001, 1, ProductNames::ARM_ORYON_1}, // Qualcomm Oryon-1

    {0x61, 0x039, 1, ProductNames::ARM_Avalanche_M2Max}, // Apple Avalanche (M2 Max)
    {0x61, 0x035, 1, ProductNames::ARM_Avalanche_M2Pro}, // Apple Avalanche (M2 Pro)
    {0x61, 0x033, 1, ProductNames::ARM_Avalanche_M2},    // Apple Avalanche (M2)
    {0x61, 0x029, 1, ProductNames::ARM_Firestorm_M1Max}, // Apple Firestorm (M1 Max)
    {0x61, 0x025, 1, ProductNames::ARM_Firestorm_M1Pro}, // Apple Firestorm (M1 Pro)
    {0x61, 0x023, 1, ProductNames::ARM_Firestorm_M1},    // Apple Firestorm (M1)
    {0x61, 0, 1, ProductNames::ARM_AppleSilicon},        // QEmu Apple Silicon

    {0x41, 0xd8c, 1, ProductNames::ARM_C1Ultra},   // C1-Ultra
    {0x41, 0xd90, 1, ProductNames::ARM_C1Premium}, // C1-Premium
    {0x41, 0xd8b, 1, ProductNames::ARM_C1Pro},     // C1-Pro
    {0x41, 0xd85, 1, ProductNames::ARM_X925},      // X925
    {0x41, 0xd87, 1, ProductNames::ARM_A725},      // A725
    {0x41, 0xd84, 1, ProductNames::ARM_V3},        // V3
    {0x41, 0xd83, 1, ProductNames::ARM_V3AE},      // V3AE
    {0x41, 0xd8e, 1, ProductNames::ARM_N3},        // N3
    {0x41, 0xd82, 1, ProductNames::ARM_X4},        // X4
    {0x41, 0xd81, 1, ProductNames::ARM_A720},      // A720
    {0x41, 0xd4e, 1, ProductNames::ARM_X3},        // X3
    {0x41, 0xd4d, 1, ProductNames::ARM_A715},      // A715
    {0x41, 0xd4f, 1, ProductNames::ARM_V2},        // V2
    {0x41, 0xd4b, 1, ProductNames::ARM_A78C},      // A78C
    {0x41, 0xd4a, 1, ProductNames::ARM_E1},        // E1
    {0x41, 0xd49, 1, ProductNames::ARM_N2},        // N2
    {0x41, 0xd48, 1, ProductNames::ARM_X2},        // X2
    {0x41, 0xd47, 1, ProductNames::ARM_A710},      // A710
    {0x41, 0xd4C, 1, ProductNames::ARM_X1C},       // X1C
    {0x41, 0xd44, 1, ProductNames::ARM_X1},        // X1
    {0x41, 0xd42, 1, ProductNames::ARM_A78AE},     // A78AE
    {0x41, 0xd41, 1, ProductNames::ARM_A78},       // A78
    {0x41, 0xd40, 1, ProductNames::ARM_V1},        // V1
    {0x41, 0xd0e, 1, ProductNames::ARM_A76AE},     // A76AE
    {0x41, 0xd0d, 1, ProductNames::ARM_A77},       // A77
    {0x41, 0xd0c, 1, ProductNames::ARM_N1},        // N1
    {0x41, 0xd0b, 1, ProductNames::ARM_A76},       // A76
    {0x51, 0x804, 1, ProductNames::ARM_Kryo400},   // Kryo 4xx Gold (A76 based)
    {0x41, 0xd0a, 1, ProductNames::ARM_A75},       // A75
    {0x51, 0x802, 1, ProductNames::ARM_Kryo300},   // Kryo 3xx Gold (A75 based)
    {0x41, 0xd09, 1, ProductNames::ARM_A73},       // A73
    {0x51, 0x800, 1, ProductNames::ARM_Kryo200},   // Kryo 2xx Gold (A73 based)
    {0x41, 0xd08, 1, ProductNames::ARM_A72},       // A72

    {0xc0, 0xac3, 1, ProductNames::ARM_Ampere_1},  // AmpereOne
    {0xc0, 0xac4, 1, ProductNames::ARM_Ampere_1A}, // AmpereOneA
    {0xc0, 0xac5, 1, ProductNames::ARM_Ampere_1B}, // AmpereOneB
    {0xc0, 0xac7, 1, ProductNames::ARM_Ampere_1C}, // AmpereOneC

    {0x4e, 0x010, 1, ProductNames::ARM_Olympus}, // Olympus
    {0x4e, 0x004, 1, ProductNames::ARM_Carmel},  // Carmel

    // Denver rated above A57 to match TX2 weirdness
    {0x4e, 0x003, 1, ProductNames::ARM_Denver}, // Denver

    {0x41, 0xd07, 1, ProductNames::ARM_A57}, // A57

    // Typically Little CPU cores
    {0x61, 0x038, 0, ProductNames::ARM_Blizzard_M2Max}, // Apple Blizzard (M2 Max)
    {0x61, 0x034, 0, ProductNames::ARM_Blizzard_M2Pro}, // Apple Blizzard (M2 Pro)
    {0x61, 0x032, 0, ProductNames::ARM_Blizzard_M2},    // Apple Blizzard (M2)
    {0x61, 0x028, 0, ProductNames::ARM_Icestorm_M1Max}, // Apple Icestorm (M1 Max)
    {0x61, 0x024, 0, ProductNames::ARM_Icestorm_M1Pro}, // Apple Icestorm (M1 Pro)
    {0x61, 0x022, 0, ProductNames::ARM_Icestorm_M1},    // Apple Icestorm (M1)

    {0x41, 0xd8a, 1, ProductNames::ARM_C1Nano},   // C1-Nano
    {0x41, 0xd80, 0, ProductNames::ARM_A520},     // A520
    {0x41, 0xd46, 0, ProductNames::ARM_A510},     // A510
    {0x41, 0xd06, 0, ProductNames::ARM_A65},      // A65
    {0x41, 0xd05, 0, ProductNames::ARM_A55},      // A55
    {0x51, 0x805, 0, ProductNames::ARM_Kryo400S}, // Kryo 4xx/5xx Silver (A55 based)
    {0x51, 0x803, 0, ProductNames::ARM_Kryo300S}, // Kryo 3xx Silver (A55 based)
    {0x41, 0xd03, 0, ProductNames::ARM_A53},      // A53
    {0x51, 0x801, 0, ProductNames::ARM_Kryo200S}, // Kryo 2xx Silver (A53 based)
    {0x41, 0xd04, 0, ProductNames::ARM_A35},      // A35

    {0x41, 0, 0, ProductNames::ARM_UNKNOWN}, // Invalid CPU or Apple CPU inside Parallels VM
    {0x0, 0, 0, ProductNames::ARM_UNKNOWN},  // Invalid starting point is lowest ranked
  }};

  auto FindDefinedMIDR = [](uint32_t MIDR) -> const CPUMIDR* {
    uint8_t Implementer = MIDR >> 24;
    uint16_t Part = (MIDR >> 4) & 0xFFF;

    for (auto& MIDROption : CPUMIDRs) {
      if (MIDROption.Implementer == Implementer && MIDROption.Part == Part) {
        return &MIDROption;
      }
    }

    return nullptr;
  };

  if (Hybrid) {
    // Walk the MIDRs and calculate big little designs
    fextl::vector<const CPUMIDR*> BigCores;
    fextl::vector<const CPUMIDR*> LittleCores;

    // Separate CPU cores out to big or little selected
    for (size_t i = 0; i < Cores; ++i) {
      uint32_t MIDR = PerCPUData[i].MIDR;
      auto MIDROption = FindDefinedMIDR(MIDR);
      if (MIDROption) {
        // Found one
        if (MIDROption->DefaultBig) {
          BigCores.emplace_back(MIDROption);
        } else {
          LittleCores.emplace_back(MIDROption);
        }
      } else {
        // If we didn't insert this MIDR then claim it is a little core.
        LittleCores.emplace_back(&CPUMIDRs.back());
      }
    }

    if (LittleCores.empty()) {
      // If we only ended up with big cores then we need to move some to be little cores
      uint32_t LowestMIDR = ~0U;
      uint32_t LowestMIDRIdx = 0;
      // Walk all the big cores
      for (size_t i = 0; i < BigCores.size(); ++i) {
        uint8_t Implementer = BigCores[i]->Implementer;
        uint16_t Part = BigCores[i]->Part;

        // Walk our list of CPUMIDRs to find the most little core
        for (size_t j = LowestMIDRIdx; j < CPUMIDRs.size(); ++j) {
          auto& MIDROption = CPUMIDRs[i];
          if ((MIDROption.Implementer == Implementer && MIDROption.Part == Part) || (MIDROption.Implementer == 0 && MIDROption.Part == 0)) {

            LowestMIDRIdx = j;
            LowestMIDR = MIDR;
            break;
          }
        }
      }

      // Now we WILL have found a big core to demote to little status
      // Demote them
      std::erase_if(BigCores, [&LittleCores, LowestMIDR](auto* Entry) {
        // Demote by erase copy to little array
        uint8_t Implementer = LowestMIDR >> 24;
        uint16_t Part = (LowestMIDR >> 4) & 0xFFF;

        if (Entry->Implementer == Implementer && Entry->Part == Part) {
          // Add it to the BigCore list
          LittleCores.emplace_back(Entry);
          return true;
        }
        return false;
      });
    }

    if (BigCores.empty()) {
      // We never found a CPU core we understand
      // Grab the first core, consider it as little, move everything else to Big
      uint32_t LittleMIDR = PerCPUData[0].MIDR;
      // Now walk the little cores and move them to Big if they don't match
      std::erase_if(LittleCores, [&BigCores, LittleMIDR](auto* Entry) {
        // You're promoted now
        uint8_t Implementer = LittleMIDR >> 24;
        uint16_t Part = (LittleMIDR >> 4) & 0xFFF;

        if (Entry->Implementer != Implementer || Entry->Part != Part) {
          // Add it to the BigCore list
          BigCores.emplace_back(Entry);
          return true;
        }
        return false;
      });
    }

    // Now walk the per CPU data one more time and set if it is big or little
    for (auto& Data : PerCPUData) {
      uint8_t Implementer = Data.MIDR >> 24;
      uint16_t Part = (Data.MIDR >> 4) & 0xFFF;

      bool FoundBig {};
      const CPUMIDR* MIDR {};
      for (auto Big : BigCores) {
        if (Big->Implementer == Implementer && Big->Part == Part) {
          FoundBig = true;
          MIDR = Big;
          break;
        }
      }

      if (!FoundBig) {
        for (auto Little : LittleCores) {
          if (Little->Implementer == Implementer && Little->Part == Part) {
            MIDR = Little;
            break;
          }
        }
      }

      Data.IsBig = FoundBig;
      if (MIDR) {
        Data.ProductName = MIDR->ProductName ?: ProductNames::ARM_UNKNOWN;
      } else {
        Data.ProductName = ProductNames::ARM_UNKNOWN;
      }
    }
  } else {
    // If we aren't hybrid then just claim everything is big
    for (size_t i = 0; i < Cores; ++i) {
      const auto MIDRIndex = HideHybrid() ? 0 : i;
      uint32_t MIDR = PerCPUData[MIDRIndex].MIDR;
      auto MIDROption = FindDefinedMIDR(MIDR);

      PerCPUData[i].IsBig = true;
      if (MIDROption) {
        PerCPUData[i].ProductName = MIDROption->ProductName ?: ProductNames::ARM_UNKNOWN;
      } else {
        PerCPUData[i].ProductName = ProductNames::ARM_UNKNOWN;
      }
    }
  }
}

#else
uint64_t GetCycleCounterFrequency() {
  return 0;
}

void CPUIDEmu::SetupHostHybridFlag() {}

#endif


void CPUIDEmu::SetupFeatures() {
  if (CTX->HostFeatures.SupportsAVX) {
    XCR0 |= XCR0_AVX;
  }

  Features.SHA = CTX->HostFeatures.SupportsSHA;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};

  // EBX, EDX, ECX become the manufacturer id string
#ifdef CPUID_AMD
  Res.eax = 0x0D; // Let's say we are a Zen+
  Res.ebx = CPUID_VENDOR_AMD1;
  Res.edx = CPUID_VENDOR_AMD2;
  Res.ecx = CPUID_VENDOR_AMD3;
#else
  Res.eax = 0x16; // Let's say we are a Skylake
  Res.ebx = CPUID_VENDOR_INTEL1;
  Res.edx = CPUID_VENDOR_INTEL2;
  Res.ecx = CPUID_VENDOR_INTEL3;
#endif
  return Res;
}

// Processor Info and Features bits
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_01h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};

  // Hypervisor bit is normally set but some applications have issues with it.
  uint32_t Hypervisor = HideHypervisorBit() ? 0 : 1;

  Res.eax = FAMILY_IDENTIFIER;

  Res.ebx = 0 |                 // Brand index
            (8 << 8) |          // Cache line size in bytes
            (Cores << 16) |     // Number of addressable IDs for the logical cores in the physical CPU
            (GetCPUID() << 24); // Local APIC ID

  Res.ecx = (1 << 0) |                                      // SSE3
            (CTX->HostFeatures.SupportsPMULL_128Bit << 1) | // PCLMULQDQ
            (1 << 2) |                                      // DS area supports 64bit layout
            (1 << 3) |                                      // MWait
            (0 << 4) |                                      // DS-CPL
            (0 << 5) |                                      // VMX
            (0 << 6) |                                      // SMX
            (0 << 7) |                                      // Intel SpeedStep
            (1 << 8) |                                      // Thermal Monitor 2
            (1 << 9) |                                      // SSSE3
            (0 << 10) |                                     // L1 context ID
            (0 << 11) |                                     // Silicon debug
            (SupportsAVX() << 12) |                         // FMA3
            (1 << 13) |                                     // CMPXCHG16B
            (0 << 14) |                                     // xTPR update control
            (0 << 15) |                                     // Perfmon and debug capability
            (0 << 16) |                                     // Reserved
            (0 << 17) |                                     // Process-context identifiers
            (0 << 18) |                                     // Prefetching from memory mapped device
            (1 << 19) |                                     // SSE4.1
            (CTX->HostFeatures.SupportsCRC << 20) |         // SSE4.2
            (0 << 21) |                                     // X2APIC
            (1 << 22) |                                     // MOVBE
            (1 << 23) |                                     // POPCNT
            (0 << 24) |                                     // APIC TSC-Deadline
            (CTX->HostFeatures.SupportsAES << 25) |         // AES
            (SupportsAVX() << 26) |                         // XSAVE
            (SupportsAVX() << 27) |                         // OSXSAVE
            (SupportsAVX() << 28) |                         // AVX
            (SupportsAVX() << 29) |                         // F16C
            (CTX->HostFeatures.SupportsRAND << 30) |        // RDRAND
            (Hypervisor << 31);

  Res.edx = (1 << 0) |  // FPU
            (1 << 1) |  // Virtual 8086 mode enhancements
            (0 << 2) |  // Debugging extensions
            (0 << 3) |  // Page size extension
            (1 << 4) |  // RDTSC supported
            (1 << 5) |  // MSR supported
            (1 << 6) |  // PAE
            (1 << 7) |  // Machine Check exception
            (1 << 8) |  // CMPXCHG8B
            (1 << 9) |  // APIC on-chip
            (0 << 10) | // Reserved
            (1 << 11) | // SYSENTER/SYSEXIT
            (1 << 12) | // Memory Type Range registers, MTRRs are supported
            (1 << 13) | // Page Global bit
            (1 << 14) | // Machine Check architecture
            (1 << 15) | // CMOV
            (1 << 16) | // Page Attribute Table
            (1 << 17) | // 36bit page size extension
            (0 << 18) | // Processor serial number
            (1 << 19) | // CLFLUSH
            (0 << 20) | // Reserved
            (0 << 21) | // Debug store
            (0 << 22) | // Thermal monitor and software controled clock
            (1 << 23) | // MMX
            (1 << 24) | // FXSAVE/FXRSTOR
            (1 << 25) | // SSE
            (1 << 26) | // SSE2
            (0 << 27) | // Self Snoop
            (0 << 28) | // (HTT) Max APIC IDs reserved field is valid
            (1 << 29) | // Thermal monitor
            (0 << 30) | // Reserved
            (0 << 31);  // Pending break enable
  return Res;
}

// 2: Cache and TLB information
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_02h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};

  // returns default values from i7 model 1Ah
  Res.eax = 0x1 | // Number of iterations needed for all descriptors
            (0x5A << 8) | (0x03 << 16) | (0x55 << 24);

  Res.ebx = 0xE4 | (0xB2 << 8) | (0xF0 << 16) | (0 << 24);

  Res.ecx = 0; // null descriptors

  Res.edx = 0x2C | (0x21 << 8) | (0xCA << 16) | (0x09 << 24);

  return Res;
}

// 4: Deterministic cache parameters for each level
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_04h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  constexpr uint32_t CacheType_Data = 1;
  constexpr uint32_t CacheType_Instruction = 2;
  constexpr uint32_t CacheType_Unified = 3;

  if (Leaf == 0) {
    // Report L1D
    uint32_t CoreCount = Cores - 1;

    Res.eax = CacheType_Data |   // Cache type
              (0b001 << 5) |     // Cache level
              (1 << 8) |         // Self initializing cache level
              (0 << 9) |         // Fully associative
              (0 << 14) |        // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1)
              (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 32KB
    Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1) | // Cache inclusiveness - Includes lower caches
              (0 << 2);  // Complex cache indexing - 0: Direct, 1: Complex
  } else if (Leaf == 1) {
    // Report L1I
    uint32_t CoreCount = Cores - 1;

    Res.eax = CacheType_Instruction | // Cache type
              (0b001 << 5) |          // Cache level
              (1 << 8) |              // Self initializing cache level
              (0 << 9) |              // Fully associative
              (0 << 14) |        // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1)
              (CoreCount << 26); // Maximum number of addressable IDs for processor cores in the physical package

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 32KB
    Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1) | // Cache inclusiveness - Includes lower caches
              (0 << 2);  // Complex cache indexing - 0: Direct, 1: Complex
  } else if (Leaf == 2) {
    // Report L2
    uint32_t CoreCount = Cores - 1;

    Res.eax = CacheType_Unified | // Cache type
              (0b010 << 5) |      // Cache level
              (1 << 8) |          // Self initializing cache level
              (0 << 9) |          // Fully associative
              (0 << 14) |         // Maximum number of addressable IDs for logical processors sharing this cache
              (CoreCount << 26);  // Maximum number of addressable IDs for processor cores in the physical package

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 512KB
    Res.ecx = 0x3FF; // Number of sets - 1 : Claiming 1024 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1) | // Cache inclusiveness - Includes lower caches
              (0 << 2);  // Complex cache indexing - 0: Direct, 1: Complex
  } else if (Leaf == 3) {
    // Report L3
    uint32_t CoreCount = Cores - 1;

    Res.eax = CacheType_Unified | // Cache type
              (0b011 << 5) |      // Cache level
              (1 << 8) |          // Self initializing cache level
              (0 << 9) |          // Fully associative
              (CoreCount << 14) | // Maximum number of addressable IDs for logical processors sharing this cache
              (CoreCount << 26);  // Maximum number of addressable IDs for processor cores in the physical package

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 8MB
    Res.ecx = 0x4000; // Number of sets - 1 : Claiming 16384 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1) | // Cache inclusiveness - Includes lower caches
              (1 << 2);  // Complex cache indexing - 0: Direct, 1: Complex
  }

  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_06h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  Res.eax = (1 << 2); // Always running APIC
  Res.ecx = (0 << 3); // Intel performance energy bias preference (EPB)
  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_07h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  if (Leaf == 0) {
    // Disable Enhanced REP MOVS when TSO is enabled.
    // vcruntime140 memmove will use `rep movsb` in this case which completely destroys perf in Hades(appId 1145360)
    // This is due to LRCPC performance on Cortex being abysmal.
    // Only enable EnhancedREPMOVS if atomic memcpy tso emulation isn't enabled.
    const uint32_t SupportsEnhancedREPMOVS = CTX->IsMemcpyAtomicTSOEnabled() == false;
    const uint32_t SupportsVPCLMULQDQ = CTX->HostFeatures.SupportsPMULL_128Bit && SupportsAVX();
    const uint32_t SupportsWFXT = CTX->HostFeatures.SupportsWFXT;

    // Number of subfunctions
    Res.eax = 0x0;
    Res.ebx = (1 << 0) |                               // FS/GS support
              (0 << 1) |                               // TSC adjust MSR
              (0 << 2) |                               // SGX
              (SupportsAVX() << 3) |                   // BMI1
              (0 << 4) |                               // Intel Hardware Lock Elison
              (SupportsAVX() << 5) |                   // AVX2 support
              (1 << 6) |                               // FPU data pointer updated only on exception
              (1 << 7) |                               // SMEP support
              (SupportsAVX() << 8) |                   // BMI2
              (SupportsEnhancedREPMOVS << 9) |         // Enhanced REP MOVSB/STOSB
              (1 << 10) |                              // INVPCID for system software control of process-context
              (0 << 11) |                              // Restricted transactional memory
              (0 << 12) |                              // Intel resource directory technology Monitoring
              (1 << 13) |                              // Deprecates FPU CS and DS
              (0 << 14) |                              // Intel MPX
              (0 << 15) |                              // Intel Resource Directory Technology Allocation
              (0 << 16) |                              // AVX512-F
              (0 << 17) |                              // AVX512-DQ
              (CTX->HostFeatures.SupportsRAND << 18) | // RDSEED
              (1 << 19) |                              // ADCX and ADOX instructions
              (0 << 20) |                              // SMAP Supervisor mode access prevention and CLAC/STAC instructions
              (0 << 21) |                              // AVX512-IFMA
              (0 << 22) |                              // PCOMMIT (deprecated?)
              (1 << 23) |                              // CLFLUSHOPT instruction
              (1 << 24) |                              // CLWB instruction
              (0 << 25) |                              // Intel processor trace
              (0 << 26) |                              // AVX512-PF
              (0 << 27) |                              // AVX512-ER
              (0 << 28) |                              // AVX512-CD
              (Features.SHA << 29) |                   // SHA instructions
              (0 << 30) |                              // AVX512-BW
              (0 << 31);                               // AVX512-VL

    Res.ecx = (1 << 0) |                                // PREFETCHWT1
              (0 << 1) |                                // AVX512VBMI
              (0 << 2) |                                // Usermode instruction prevention
              (0 << 3) |                                // Protection keys for user mode pages
              (0 << 4) |                                // OS protection keys
              (SupportsWFXT << 5) |                     // waitpkg
              (0 << 6) |                                // AVX512-VBMI2
              (0 << 7) |                                // CET shadow stack
              (0 << 8) |                                // GFNI
              (CTX->HostFeatures.SupportsAES256 << 9) | // VAES
              (SupportsVPCLMULQDQ << 10) |              // VPCLMULQDQ
              (0 << 11) |                               // AVX512-VNNI
              (0 << 12) |                               // AVX512-BITALG
              (0 << 13) |                               // Intel Total Memory Encryption
              (0 << 14) |                               // AVX512-VPOPCNTDQ
              (0 << 15) |                               // FZM (TDX)
              (0 << 16) |                               // 5 Level page tables
              (0 << 17) |                               // MPX MAWAU
              (0 << 18) |                               // MPX MAWAU
              (0 << 19) |                               // MPX MAWAU
              (0 << 20) |                               // MPX MAWAU
              (0 << 21) |                               // MPX MAWAU
              (1 << 22) |                               // RDPID Read Processor ID
              (0 << 23) |                               // AES Key Locker
              (1 << 24) |                               // bus-lock-detect
              (0 << 25) |                               // CLDEMOTE
              (0 << 26) |                               // MPRR (TDX)
              (0 << 27) |                               // MOVDIRI
              (0 << 28) |                               // MOVDIR64B
              (0 << 29) |                               // ENQCMD
              (0 << 30) |                               // SGX Launch configuration
              (0 << 31);                                // PKS

    Res.edx = (0 << 0) |                   // SGX-TEM (TDX)
              (0 << 1) |                   // SGX-KEYS
              (0 << 2) |                   // AVX512-4VNNIW
              (0 << 3) |                   // AVX512-4FMAPS
              (1 << 4) |                   // Fast Short Rep Mov
              (0 << 5) |                   // UINTR
              (0 << 6) |                   // Reserved
              (0 << 7) |                   // Reserved
              (0 << 8) |                   // AVX512-VP2INTERSECT
              (0 << 9) |                   // SRBDS_CTRL (Special Register Buffer Data Sampling Mitigations)
              (0 << 10) |                  // VERW clears CPU buffers
              (0 << 11) |                  // rtm-always-abort
              (0 << 12) |                  // Reserved
              (0 << 13) |                  // TSX Force Abort (TSX will force abort if attempted)
              (0 << 14) |                  // SERIALIZE instruction
              ((Hybrid ? 1U : 0U) << 15) | // Hybrid
              (0 << 16) |                  // TSXLDTRK (TSX Suspend load address tracking) - Allows untracked memory loads inside TSX region
              (0 << 17) |                  // Reserved
              (0 << 18) |                  // Intel PCONFIG
              (0 << 19) |                  // Intel Architectural LBR
              (0 << 20) |                  // Intel CET
              (0 << 21) |                  // Reserved
              (0 << 22) |                  // AMX-BF16 - Tile computation on bfloat16
              (0 << 23) |                  // AVX512-FP16 - FP16 AVX512 instructions
              (0 << 24) |                  // AMX-tile - If AMX is implemented
              (0 << 25) |                  // AMX-int8 - AMX on 8-bit integers
              (0 << 26) |                  // IBRS_IBPB - Speculation control
              (0 << 27) |                  // STIBP - Single Thread Indirect Branch Predictor, Part of IBC
              (0 << 28) |                  // L1D Flush
              (0 << 29) |                  // Arch capabilities - Speculative side channel mitigations
              (0 << 30) |                  // Arch capabilities - MSR module specific
              (0 << 31);                   // SSBD - Speculative Store Bypass Disable
  }

  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_0Dh(uint32_t Leaf) const {
  // Leaf 0
  FEXCore::CPUID::FunctionResults Res {};

  uint32_t XFeatureSupportedSizeMax = SupportsAVX() ? 0x0000'0340 : 0x0000'0240; // XFeatureEnabledSizeMax: Legacy Header + FPU/SSE + AVX
  if (Leaf == 0) {
    // XFeatureSupportedMask[31:0]
    Res.eax = (1 << 0) |             // X87 support
              (1 << 1) |             // 128-bit SSE support
              (SupportsAVX() << 2) | // 256-bit AVX support
              (0b00 << 3) |          // MPX State
              (0b000 << 5) |         // AVX-512 state
              (0 << 8) |             // "Used for IA32_XSS" ... Used for what?
              (0 << 9);              // PKRU state

    // EBX and ECX doesn't need to match if a feature is supported but not enabled
    Res.ebx = XFeatureSupportedSizeMax;
    Res.ecx = XFeatureSupportedSizeMax; // XFeatureSupportedSizeMax: Size in bytes of XSAVE/XRSTOR area

    // XFeatureSupportedMask[63:32]
    Res.edx = 0; // Upper 32-bits of XFeatureSupportedMask
  } else if (Leaf == 1) {
    Res.eax = (1 << 0) | // XSAVEOPT
              (0 << 1) | // XSAVEC (and XRSTOR)
              (0 << 2) | // XGETBV - XGETBV with ECX=1 supported
              (0 << 3);  // XSAVES - XSAVES, XRSTORS, and IA32_XSS supported

    // Same information as Leaf 0 for ebx
    Res.ebx = XFeatureSupportedSizeMax;

    // Lower supported 32bits of IA32_XSS MSR. IA32_XSS[n] can only be set to 1 if ECX[n] is 1
    Res.ecx = (0b0000'0000 << 0) | // Used for XCR0
              (0 << 8) |           // PT state
              (0 << 9);            // Used for XCR0

    // Upper supported 32bits of IA32_XSS MSR. IA32_XSS[n+32] can only be set to 1 if EDX[n] is 1
    // Entirely reserved atm
    Res.edx = 0;
  } else if (Leaf == 2) {
    Res.eax = SupportsAVX() ? 0x0000'0100 : 0; // YmmSaveStateSize
    Res.ebx = SupportsAVX() ? 0x0000'0240 : 0; // YmmSaveStateOffset

    // Reserved
    Res.ecx = 0;
    Res.edx = 0;
  }
  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_15h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  // TSC frequency = ECX * EBX / EAX
  uint64_t FrequencyHz = GetCycleCounterFrequency();
  if (FrequencyHz) {
    Res.eax = 1;
    Res.ebx = 1U << CTX->Config.TSCScale;
    Res.ecx = FrequencyHz;
  }
  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_1Ah(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  if (Hybrid) {
    uint32_t CPU = GetCPUID();
    auto& Data = PerCPUData[CPU];
    // 0x40 is a big CPU
    // 0x20 is a little CPU
    Res.eax |= (Data.IsBig ? 0x40 : 0x20) << 24;
  }
  return Res;
}

// Hypervisor CPUID information leaf
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0000h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  // Maximum supported hypervisor leafs
  // We only expose the information leaf
  //
  // Common courtesy to follow VMWare's "Hypervisor CPUID Interface proposal"
  // 4000_0000h - Information leaf. Advertising to the software which hypervisor this is
  // 4000_0001h - 4000_000Fh - Hypervisor specific leafs. FEX can use these for anything
  // 4000_0010h - 4000_00FFh - "Generic Leafs" - Try not to overwrite, other hypervisors might expect information in these
  //
  // CPUID documentation information:
  // 4000_0000h - 4FFF_FFFFh - No existing or future CPU will return information in this range
  // Reserved entirely for VMs to do whatever they want.
  Res.eax = 0x40000001;

  // EBX, EDX, ECX become the hypervisor ID signature
  constexpr static char HypervisorID[12] = "FEXIFEXIEMU";
  memcpy(&Res.ebx, HypervisorID, sizeof(HypervisorID));
  return Res;
}

constexpr std::array<char, std::char_traits<char>::length(GIT_DESCRIBE_STRING) + 1> GitString = {GIT_DESCRIBE_STRING};
static_assert(GitString.size() < 32);

// Hypervisor CPUID information leaf
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_4000_0001h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  constexpr uint32_t MaximumSubLeafNumber = 2;
  if (Leaf == 0) {
    // EAX[3:0] Is the host architecture that FEX is running under
#ifdef ARCHITECTURE_x86_64
    // EAX[3:0] = 1 = x86_64 host architecture
    Res.eax |= 0b0001;
#elif defined(ARCHITECTURE_arm64)
    // EAX[3:0] = 2 = AArch64 host architecture
    Res.eax |= 0b0010;
#else
    // EAX[3:0] = 0 = Unknown architecture
#endif

    // EAX[15:4] = Reserved

    // EAX[31:16] = Maximum sub-leaf value.
    Res.eax |= MaximumSubLeafNumber << 16;
  } else if (Leaf == 1) {
    memcpy(&Res, GitString.data(), std::min<size_t>(GitString.size(), sizeof(FEXCore::CPUID::FunctionResults)));
  } else if (Leaf == 2) {
    memcpy(&Res, GitString.data() + 16, std::min<size_t>(std::max<ssize_t>(0, GitString.size() - 16), sizeof(FEXCore::CPUID::FunctionResults)));
  }

  return Res;
}

// Highest extended function implemented
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0000h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  Res.eax = 0x8000001F;

  // EBX, EDX, ECX become the manufacturer id string
  // Just like cpuid function 0
#ifdef CPUID_AMD
  Res.ebx = CPUID_VENDOR_AMD1;
  Res.edx = CPUID_VENDOR_AMD2;
  Res.ecx = CPUID_VENDOR_AMD3;
#else
  Res.ebx = CPUID_VENDOR_INTEL1;
  Res.edx = CPUID_VENDOR_INTEL2;
  Res.ecx = CPUID_VENDOR_INTEL3;
#endif
  return Res;
}

// Extended processor and feature bits
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0001h(uint32_t Leaf) const {

#ifndef _WIN32
  constexpr uint32_t SUPPORTS_RDTSCP = 1;
#else
  // RDTSCP under WIN32 is only supported if CPUIndex is available in TPIDRRO.
  const uint32_t SUPPORTS_RDTSCP = SupportsCPUIndexInTPIDRRO;
#endif
  FEXCore::CPUID::FunctionResults Res {};

  Res.eax = FAMILY_IDENTIFIER;

  Res.ecx = (1 << 0) |                               // LAHF/SAHF
            (1 << 1) |                               // 0 = Single core product, 1 = multi core product
            (0 << 2) |                               // SVM
            (1 << 3) |                               // Extended APIC register space
            (0 << 4) |                               // LOCK MOV CR0 means MOV CR8
            (1 << 5) |                               // ABM instructions
            (CTX->HostFeatures.SupportsSSE4a << 6) | // SSE4a
            (0 << 7) |                               // Misaligned SSE mode
            (1 << 8) |                               // PREFETCHW
            (0 << 9) |                               // OS visible workaround support
            (0 << 10) |                              // Instruction based sampling support
            (0 << 11) |                              // XOP
            (0 << 12) |                              // SKINIT
            (0 << 13) |                              // Watchdog timer support
            (0 << 14) |                              // Reserved
            (0 << 15) |                              // Lightweight profiling support
            (0 << 16) |                              // FMA4
            (1 << 17) |                              // Translation cache extension
            (0 << 18) |                              // Reserved
            (0 << 19) |                              // Reserved
            (0 << 20) |                              // Reserved
            (0 << 21) |                              // XOP-TBM
            (0 << 22) |                              // Topology extensions support
            (0 << 23) |                              // Core performance counter extensions
            (0 << 24) |                              // NB performance counter extensions
            (0 << 25) |                              // Reserved
            (0 << 26) |                              // Data breakpoints extensions
            (0 << 27) |                              // Performance TSC
            (0 << 28) |                              // L2 perf counter extensions
            (0 << 29) |                              // MONITORX
            (0 << 30) |                              // Reserved
            (0 << 31);                               // Reserved

  Res.edx = (1 << 0) |                                // FPU
            (1 << 1) |                                // Virtual mode extensions
            (1 << 2) |                                // Debugging extensions
            (1 << 3) |                                // Page size extensions
            (1 << 4) |                                // TSC
            (1 << 5) |                                // MSR support
            (1 << 6) |                                // PAE
            (1 << 7) |                                // Machine Check Exception
            (1 << 8) |                                // CMPXCHG8B
            (1 << 9) |                                // APIC
            (0 << 10) |                               // Reserved
            (1 << 11) |                               // SYSCALL/SYSRET
            (1 << 12) |                               // MTRR
            (1 << 13) |                               // Page global extension
            (1 << 14) |                               // Machine Check architecture
            (1 << 15) |                               // CMOV
            (1 << 16) |                               // Page attribute table
            (1 << 17) |                               // Page-size extensions
            (0 << 18) |                               // Reserved
            (0 << 19) |                               // Reserved
            (1 << 20) |                               // NX
            (0 << 21) |                               // Reserved
            (1 << 22) |                               // MMXExt
            (1 << 23) |                               // MMX
            (1 << 24) |                               // FXSAVE/FXRSTOR
            (1 << 25) |                               // FXSAVE/FXRSTOR Optimizations
            (0 << 26) |                               // 1 gigabit pages
            (SUPPORTS_RDTSCP << 27) |                 // RDTSCP
            (0 << 28) |                               // Reserved
            (1 << 29) |                               // Long Mode
            (CTX->HostFeatures.Supports3DNow << 30) | // 3DNow! Extensions
            (CTX->HostFeatures.Supports3DNow << 31);  // 3DNow!
  return Res;
}

// Processor brand string
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0002h(uint32_t Leaf) const {
  return Function_8000_0002h(Leaf, GetCPUID());
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0003h(uint32_t Leaf) const {
  return Function_8000_0003h(Leaf, GetCPUID());
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0004h(uint32_t Leaf) const {
  return Function_8000_0004h(Leaf, GetCPUID());
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0002h(uint32_t Leaf, uint32_t CPU) const {
  FEXCore::CPUID::FunctionResults Res {};
  auto& Data = PerCPUData[CPU];
  memcpy(&Res, Data.ProductName, std::min(strlen(Data.ProductName), sizeof(FEXCore::CPUID::FunctionResults)));
  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0003h(uint32_t Leaf, uint32_t CPU) const {
  FEXCore::CPUID::FunctionResults Res {};
  auto& Data = PerCPUData[CPU];
  const auto RemainingStringSize = std::max<ssize_t>(0, strlen(Data.ProductName) - 16);
  memcpy(&Res, Data.ProductName + 16, std::min<size_t>(RemainingStringSize, sizeof(FEXCore::CPUID::FunctionResults)));
  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0004h(uint32_t Leaf, uint32_t CPU) const {
  FEXCore::CPUID::FunctionResults Res {};
  auto& Data = PerCPUData[CPU];
  const auto RemainingStringSize = std::max<ssize_t>(0, strlen(Data.ProductName) - 32);
  memcpy(&Res, Data.ProductName + 32, std::min<size_t>(RemainingStringSize, sizeof(FEXCore::CPUID::FunctionResults)));
  return Res;
}

// L1 Cache and TLB identifiers
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0005h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};

  // L1 TLB Information for 2MB and 4MB pages
  Res.eax = (64 << 0) |  // Number of TLB instruction entries
            (255 << 8) | // instruction TLB associativity type (full)
            (64 << 16) | // Number of TLB data entries
            (255 << 24); // data TLB associativity type (full)

  // L1 TLB Information for 4KB pages
  Res.ebx = (64 << 0) |  // Number of TLB instruction entries
            (255 << 8) | // instruction TLB associativity type (full)
            (64 << 16) | // Number of TLB data entries
            (255 << 24); // data TLB associativity type (full)

  // L1 data cache identifiers
  Res.ecx = (64 << 0) | // L1 data cache size line in bytes
            (1 << 8) |  // L1 data cachelines per tag
            (8 << 16) | // L1 data cache associativity
            (32 << 24); // L1 data cache size in KB

  // L1 instruction cache identifiers
  Res.edx = (64 << 0) | // L1 instruction cache line size in bytes
            (1 << 8) |  // L1 instruction cachelines per tag
            (4 << 16) | // L1 instruction cache associativity
            (64 << 24); // L1 instruction cache size in KB

  return Res;
}

// L2 Cache identifiers
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0006h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};

  // L2 TLB Information for 2MB and 4MB pages
  Res.eax = (1024 << 0) |  // Number of TLB instruction entries
            (6 << 12) |    // instruction TLB associativity type
            (1536 << 16) | // Number of TLB data entries
            (3 << 28);     // data TLB associativity type

  // L2 TLB Information for 4KB pages
  Res.ebx = (1024 << 0) |  // Number of TLB instruction entries
            (6 << 12) |    // instruction TLB associativity type
            (1536 << 16) | // Number of TLB data entries
            (5 << 28);     // data TLB associativity type

  // L2 cache identifiers
  Res.ecx = (64 << 0) |  // cacheline size
            (1 << 8) |   // cachelines per tag
            (6 << 12) |  // cache associativity
            (512 << 16); // L2 cache size in KB

  // L3 cache identifiers
  Res.edx = (64 << 0) | // cacheline size
            (1 << 8) |  // cachelines per tag
            (6 << 12) | // cache associativity
            (16 << 18); // L2 cache size in KB
  return Res;
}

// Advanced power management
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0007h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  Res.eax = (1 << 2); // APIC timer not affected by p-state
  Res.edx = (1 << 8); // Invariant TSC
  return Res;
}

// Virtual and physical address sizes
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0008h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  Res.eax = (48 << 0) | // PhysAddrSize = 48-bit
            (48 << 8) | // LinAddrSize = 48-bit
            (0 << 16);  // GuestPhysAddrSize == PhysAddrSize

  Res.ebx = (0 << 2) |                               // XSaveErPtr: Saving and restoring error pointers
            (0 << 1) |                               // IRPerf: Instructions retired count support
            (CTX->HostFeatures.SupportsCLZERO << 0); // CLZERO support

  uint32_t CoreCount = Cores - 1;
  Res.ecx = (0 << 16) |                    // PerfTscSize: Performance timestamp count size
            (std::bit_ceil(Cores) << 12) | // ApicIdSize: Number of bits in ApicID
            (CoreCount << 0);              // Count count subtract one

  return Res;
}

// TLB 1GB page identifiers
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_0019h(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  Res.eax = (0xF << 28) | // L1 DTLB associativity for 1GB pages
            (64 << 16) |  // L1 DTLB entry count for 1GB pages
            (0xF << 12) | // L1 ITLB associativity for 1GB pages
            (64 << 0);    // L1 ITLB entry count for 1GB pages

  Res.ebx = (0 << 28) | // L2 DTLB associativity for 1GB pages
            (0 << 16) | // L2 DTLB entry count for 1GB pages
            (0 << 12) | // L2 ITLB associativity for 1GB pages
            (0 << 0);   // L2 ITLB entry count for 1GB pages
  return Res;
}

// Deterministic cache parameters for each level
FEXCore::CPUID::FunctionResults CPUIDEmu::Function_8000_001Dh(uint32_t Leaf) const {
  // This is nearly a copy of CPUID function 4h
  // There are some minor changes though

  FEXCore::CPUID::FunctionResults Res {};
  constexpr uint32_t CacheType_Data = 1;
  constexpr uint32_t CacheType_Instruction = 2;
  constexpr uint32_t CacheType_Unified = 3;

  if (Leaf == 0) {
    // Report L1D
    Res.eax = CacheType_Data | // Cache type
              (0b001 << 5) |   // Cache level
              (1 << 8) |       // Self initializing cache level
              (0 << 9) |       // Fully associative
              (0 << 14);       // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1)

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 32KB
    Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1);  // Cache inclusiveness - Includes lower caches
  } else if (Leaf == 1) {
    // Report L1I
    Res.eax = CacheType_Instruction | // Cache type
              (0b001 << 5) |          // Cache level
              (1 << 8) |              // Self initializing cache level
              (0 << 9) |              // Fully associative
              (0 << 14); // Maximum number of addressable IDs for logical processors sharing this cache (With SMT this would be 1)

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 32KB
    Res.ecx = 63; // Number of sets - 1 : Claiming 64 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1);  // Cache inclusiveness - Includes lower caches
  } else if (Leaf == 2) {
    // Report L2
    Res.eax = CacheType_Unified | // Cache type
              (0b010 << 5) |      // Cache level
              (1 << 8) |          // Self initializing cache level
              (0 << 9) |          // Fully associative
              (0 << 14);          // Maximum number of addressable IDs for logical processors sharing this cache

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 512KB
    Res.ecx = 0x3FF; // Number of sets - 1 : Claiming 1024 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1);  // Cache inclusiveness - Includes lower caches
  } else if (Leaf == 3) {
    // Report L3
    uint32_t CoreCount = Cores - 1;

    Res.eax = CacheType_Unified | // Cache type
              (0b011 << 5) |      // Cache level
              (1 << 8) |          // Self initializing cache level
              (0 << 9) |          // Fully associative
              (CoreCount << 14);  // Maximum number of addressable IDs for logical processors sharing this cache

    Res.ebx = (63 << 0) | // Line Size - 1 : Claiming 64 byte
              (0 << 12) | // Physical Line partitions
              (7 << 22);  // Associativity - 1 : Claiming 8 way

    // 8MB
    Res.ecx = 0x4000; // Number of sets - 1 : Claiming 16384 sets

    Res.edx = (0 << 0) | // Write-back invalidate
              (0 << 1);  // Cache inclusiveness - Includes lower caches
  }

  return Res;
}

FEXCore::CPUID::FunctionResults CPUIDEmu::Function_Reserved(uint32_t Leaf) const {
  FEXCore::CPUID::FunctionResults Res {};
  return Res;
}

FEXCore::CPUID::XCRResults CPUIDEmu::XCRFunction_0h() const {
  // This just returns XCR0
  FEXCore::CPUID::XCRResults Res {
    .eax = static_cast<uint32_t>(XCR0),
    .edx = static_cast<uint32_t>(XCR0 >> 32),
  };

  return Res;
}

CPUIDEmu::CPUIDEmu(const FEXCore::Context::ContextImpl* ctx)
  : CTX {ctx}
  , SupportsCPUIndexInTPIDRRO {CTX->HostFeatures.SupportsCPUIndexInTPIDRRO}
  , GetCPUID {GetCPUID_Syscall} {
  Cores = CTX->HostFeatures.CPUMIDRs.size();

  // Setup some state tracking
  SetupHostHybridFlag();

  SetupFeatures();

#ifdef ARCHITECTURE_arm64
  if (SupportsCPUIndexInTPIDRRO) {
    GetCPUID = GetCPUID_TPIDRRO;
  }
#endif
}
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/CPUID.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/CPUID.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <unordered_map>
#include <utility>

namespace FEXCore {
namespace Context {
  class ContextImpl;
}

uint64_t GetCycleCounterFrequency();

// Debugging define to switch what family of CPU we execute as.
// Might be useful if an application makes an assumption about a CPU.
// #define CPUID_AMD
class CPUIDEmu final {
private:
  constexpr static uint32_t CPUID_VENDOR_INTEL1 = 0x756E6547; // "Genu"
  constexpr static uint32_t CPUID_VENDOR_INTEL2 = 0x49656E69; // "ineI"
  constexpr static uint32_t CPUID_VENDOR_INTEL3 = 0x6C65746E; // "ntel"

  constexpr static uint32_t CPUID_VENDOR_AMD1 = 0x68747541; // "Auth"
  constexpr static uint32_t CPUID_VENDOR_AMD2 = 0x69746E65; // "enti"
  constexpr static uint32_t CPUID_VENDOR_AMD3 = 0x444D4163; // "cAMD"

public:
  CPUIDEmu(const FEXCore::Context::ContextImpl* ctx);

  // X86 cacheline size effectively has to be hardcoded to 64
  // if we report anything differently then applications are likely to break
  constexpr static uint64_t CACHELINE_SIZE = 64;

  FEXCore::CPUID::FunctionResults RunFunction(uint32_t Function, uint32_t Leaf) const {
    if (Function < Primary.size()) {
      const auto Handler = Primary[Function];
      return (this->*Handler)(Leaf);
    }

    constexpr uint32_t HypervisorBase = 0x4000'0000;
    if (Function >= HypervisorBase && Function < (HypervisorBase + Hypervisor.size())) {
      const auto Handler = Hypervisor[Function - HypervisorBase];
      return (this->*Handler)(Leaf);
    }

    constexpr uint32_t ExtendedBase = 0x8000'0000;
    if (Function >= ExtendedBase && Function < (ExtendedBase + Extended.size())) {
      const auto Handler = Extended[Function - ExtendedBase];
      return (this->*Handler)(Leaf);
    }

    return Function_Reserved(Leaf);
  }

  FEXCore::CPUID::FunctionResults RunFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) const {
    if (Function == 0x8000'0002U) {
      return Function_8000_0002h(Leaf, CPU % PerCPUData.size());
    } else if (Function == 0x8000'0003U) {
      return Function_8000_0003h(Leaf, CPU % PerCPUData.size());
    } else {
      return Function_8000_0004h(Leaf, CPU % PerCPUData.size());
    }
  }

  FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) const {
    if (Function >= 1) {
      // XCR function 1 is not yet supported.
      return {};
    }

    return XCRFunction_0h();
  }

  bool DoesXCRFunctionReportConstantData(uint32_t Function) const {
    // Every function currently returns constant data.
    return true;
  }

  enum class SupportsConstant {
    CONSTANT,
    NONCONSTANT,
  };
  enum class NeedsLeafConstant {
    NEEDSLEAFCONSTANT,
    NOLEAFCONSTANT,
  };
  struct FunctionConstant {
    SupportsConstant SupportsConstantFunction;
    NeedsLeafConstant NeedsLeaf;
  };

  static constexpr FunctionConstant DoesFunctionReportConstantData(uint32_t Function) {
    if (Function < Primary.size()) {
      return Primary_Constant[Function];
    }

    constexpr uint32_t HypervisorBase = 0x4000'0000;
    if (Function >= HypervisorBase && Function < (HypervisorBase + Hypervisor.size())) {
      return Hypervisor_Constant[Function - HypervisorBase];
    }

    constexpr uint32_t ExtendedBase = 0x8000'0000;
    if (Function >= ExtendedBase && Function < (ExtendedBase + Extended.size())) {
      return Extended_Constant[Function - ExtendedBase];
    }

    // Anything unsupported is known constant return of reserved data.
    return {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT};
  }

private:
  const FEXCore::Context::ContextImpl* CTX;
  [[maybe_unused]] bool SupportsCPUIndexInTPIDRRO {};
  bool Hybrid {};
  uint32_t Cores {};
  FEX_CONFIG_OPT(HideHypervisorBit, HIDEHYPERVISORBIT);

  // XFEATURE_ENABLED_MASK
  // Mask that configures what features are enabled on the CPU.
  // Affects XSAVE and XRSTOR when modified.
  // Bit layout is as follows.
  // [0]     - x87 enabled
  // [1]     - SSE enabled
  // [2]     - YMM enabled (256-bit SSE)
  // [8:3]   - Reserved. MBZ.
  // [9]     - MPK
  // [10]    - Reserved. MBZ.
  // [11]    - CET_U
  // [12]    - CET_S
  // [61:13] - Reserved. MBZ.
  // [62]    - LWP (Lightweight profiling)
  // [63]    - Reserved for XCR bit vector expansion. MBZ.
  // Always enable x87 and SSE by default.
  constexpr static uint64_t XCR0_X87 = 1ULL << 0;
  constexpr static uint64_t XCR0_SSE = 1ULL << 1;
  constexpr static uint64_t XCR0_AVX = 1ULL << 2;

  struct FeaturesConfig {
    uint64_t SHA  : 1;
    uint64_t _pad : 63;
  };

  FeaturesConfig Features {
    .SHA = 1,
  };

  uint64_t XCR0 {XCR0_X87 | XCR0_SSE};

  uint32_t SupportsAVX() const {
    return (XCR0 & XCR0_AVX) ? 1 : 0;
  }

  using FunctionHandler = FEXCore::CPUID::FunctionResults (CPUIDEmu::*)(uint32_t Leaf) const;

  struct CPUData {
    const char* ProductName {};
#ifdef ARCHITECTURE_arm64
    uint32_t MIDR {};
#endif
    bool IsBig {};
  };
  fextl::vector<CPUData> PerCPUData {};

  // Functions
  FEXCore::CPUID::FunctionResults Function_0h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_01h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_02h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_04h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_06h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_07h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_0Dh(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_15h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_1Ah(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_4000_0000h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_4000_0001h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0000h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0001h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0002h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0003h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0004h(uint32_t Leaf) const;

  FEXCore::CPUID::FunctionResults Function_8000_0002h(uint32_t Leaf, uint32_t CPU) const;
  FEXCore::CPUID::FunctionResults Function_8000_0003h(uint32_t Leaf, uint32_t CPU) const;
  FEXCore::CPUID::FunctionResults Function_8000_0004h(uint32_t Leaf, uint32_t CPU) const;

  FEXCore::CPUID::FunctionResults Function_8000_0005h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0006h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0007h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0008h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_0019h(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_8000_001Dh(uint32_t Leaf) const;
  FEXCore::CPUID::FunctionResults Function_Reserved(uint32_t Leaf) const;

  FEXCore::CPUID::XCRResults XCRFunction_0h() const;

  void SetupHostHybridFlag();
  void SetupFeatures();
  static constexpr size_t PRIMARY_FUNCTION_COUNT = 27;
  static constexpr size_t HYPERVISOR_FUNCTION_COUNT = 2;
  static constexpr size_t EXTENDED_FUNCTION_COUNT = 32;
  static constexpr std::array<FunctionHandler, PRIMARY_FUNCTION_COUNT> Primary = {
    // 0: Highest function parameter and ID
    &CPUIDEmu::Function_0h,
    // 1: Processor info
    &CPUIDEmu::Function_01h,
    // 2: Cache and TLB info
    &CPUIDEmu::Function_02h,
    // 3: Serial Number(previously), now reserved
    &CPUIDEmu::Function_Reserved,
#ifndef CPUID_AMD
    // 4: Deterministic cache parameters for each level
    &CPUIDEmu::Function_04h,
#else
    &CPUIDEmu::Function_Reserved,
#endif
    // 5: Monitor/mwait
    &CPUIDEmu::Function_Reserved,
    // 6: Thermal and power management
    &CPUIDEmu::Function_06h,
    // 7: Extended feature flags
    &CPUIDEmu::Function_07h,
    // 0x08: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 9: Direct Cache Access information
    &CPUIDEmu::Function_Reserved,
    // 0x0A: Architectural performance monitoring
    &CPUIDEmu::Function_Reserved,
    // 0x0B: Extended topology enumeration
    &CPUIDEmu::Function_Reserved,
    // 0x0C: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x0D: Processor extended state enumeration
    &CPUIDEmu::Function_0Dh,
    // 0x0E: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x0F: Intel RDT monitoring
    &CPUIDEmu::Function_Reserved,
    // 0x10: Intel RDT allocation enumeration
    &CPUIDEmu::Function_Reserved,
    // 0x12: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x12: Intel SGX capability enumeration
    &CPUIDEmu::Function_Reserved,
    // 0x13: Reserved
    &CPUIDEmu::Function_Reserved,
    // 0x14: Intel Processor trace
    &CPUIDEmu::Function_Reserved,
#ifndef CPUID_AMD
    // Timestamp counter information
    // Doesn't exist on AMD hardware
    &CPUIDEmu::Function_15h,
#else
    &CPUIDEmu::Function_Reserved,
#endif
    // 0x16: Processor frequency information
    &CPUIDEmu::Function_Reserved,
    // 0x17: SoC vendor attribute enumeration
    &CPUIDEmu::Function_Reserved,
    // 0x18: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x19: Reserved?
    &CPUIDEmu::Function_Reserved,
#ifndef CPUID_AMD
    // 0x1A: Hybrid Information Sub-leaf
    &CPUIDEmu::Function_1Ah,
#else
    &CPUIDEmu::Function_Reserved,
#endif
  };

  static constexpr std::array<FunctionConstant, PRIMARY_FUNCTION_COUNT> Primary_Constant = {{
    // 0: Highest function parameter and ID
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 1: Processor info
    {SupportsConstant::NONCONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 2: Cache and TLB info
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 3: Serial Number(previously), now reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#ifndef CPUID_AMD
    // 4: Deterministic cache parameters for each level
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NEEDSLEAFCONSTANT},
#else
    // 4: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#endif
    // 5: Monitor/mwait
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 6: Thermal and power management
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 7: Extended feature flags
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NEEDSLEAFCONSTANT},
    // 0x08: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 9: Direct Cache Access information
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x0A: Architectural performance monitoring
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x0B: Extended topology enumeration
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x0C: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x0D: Processor extended state enumeration
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NEEDSLEAFCONSTANT},
    // 0x0E: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x0F: Intel RDT monitoring
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x10: Intel RDT allocation enumeration
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x12: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x12: Intel SGX capability enumeration
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x13: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x14: Intel Processor trace
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#ifndef CPUID_AMD
    // 0x15: Timestamp counter information
    // Doesn't exist on AMD hardware
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#else
    // 0x15: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#endif
    // 0x16: Processor frequency information
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x17: SoC vendor attribute enumeration
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x18: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x19: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#ifndef CPUID_AMD
    // 0x1A: Hybrid Information Sub-leaf
    {SupportsConstant::NONCONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#else
    // 0x1A: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#endif
  }};

  static constexpr std::array<FunctionHandler, HYPERVISOR_FUNCTION_COUNT> Hypervisor = {
    // Hypervisor CPUID information leaf
    &CPUIDEmu::Function_4000_0000h,
    // FEX-Emu specific leaf
    &CPUIDEmu::Function_4000_0001h,
  };

  static constexpr std::array<FunctionConstant, HYPERVISOR_FUNCTION_COUNT> Hypervisor_Constant = {{
    // Hypervisor CPUID information leaf
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // FEX-Emu specific leaf
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NEEDSLEAFCONSTANT},
  }};

  static constexpr std::array<FunctionHandler, EXTENDED_FUNCTION_COUNT> Extended = {
    // Largest extended function number
    &CPUIDEmu::Function_8000_0000h,
    // Processor vendor
    &CPUIDEmu::Function_8000_0001h,
    // Processor brand string
    &CPUIDEmu::Function_8000_0002h,
    // Processor brand string continued
    &CPUIDEmu::Function_8000_0003h,
    // Processor brand string continued
    &CPUIDEmu::Function_8000_0004h,
#ifdef CPUID_AMD
    // 0x8000'0005: L1 Cache and TLB identifiers
    &CPUIDEmu::Function_8000_0005h,
#else
    &CPUIDEmu::Function_Reserved,
#endif
    // 0x8000'0006: L2 Cache identifiers
    &CPUIDEmu::Function_8000_0006h,
    // 0x8000'0007: Advanced power management information
    &CPUIDEmu::Function_8000_0007h,
    // 0x8000'0008: Virtual and physical address sizes
    &CPUIDEmu::Function_8000_0008h,
    // 0x8000'0009: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000A: SVM Revision
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000B: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000C: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000D: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000E: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'000F: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0010: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0011: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0012: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0013: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0014: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0015: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0016: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0017: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0018: Reserved?
    &CPUIDEmu::Function_Reserved,
    // 0x8000'0019: TLB 1GB page identifiers
    &CPUIDEmu::Function_8000_0019h,
    // 0x8000'001A: Performance optimization identifiers
    &CPUIDEmu::Function_Reserved,
    // 0x8000'001B: Instruction based sampling identifiers
    &CPUIDEmu::Function_Reserved,
    // 0x8000'001C: Lightweight profiling capabilities
    &CPUIDEmu::Function_Reserved,
#ifdef CPUID_AMD
    // 0x8000'001D: Cache properties
    &CPUIDEmu::Function_8000_001Dh,
#else
    &CPUIDEmu::Function_Reserved,
#endif
    // 0x8000'001E: Extended APIC ID
    &CPUIDEmu::Function_Reserved,
    // 0x8000'001F: AMD Secure Encryption
    &CPUIDEmu::Function_Reserved,
  };

  static constexpr std::array<FunctionConstant, EXTENDED_FUNCTION_COUNT> Extended_Constant = {{
    // Largest extended function number
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // Processor vendor
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // Processor brand string
    {SupportsConstant::NONCONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // Processor brand string continued
    {SupportsConstant::NONCONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // Processor brand string continued
    {SupportsConstant::NONCONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#ifdef CPUID_AMD
    // 0x8000'0005: L1 Cache and TLB identifiers
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#else
    // 0x8000'0005: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#endif
    // 0x8000'0006: L2 Cache identifiers
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0007: Advanced power management information
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0008: Virtual and physical address sizes
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0009: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000A: SVM Revision
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000B: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000C: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000D: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000E: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'000F: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0010: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0011: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0012: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0013: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0014: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0015: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0016: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0017: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0018: Reserved?
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'0019: TLB 1GB page identifiers
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'001A: Performance optimization identifiers
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'001B: Instruction based sampling identifiers
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'001C: Lightweight profiling capabilities
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#ifdef CPUID_AMD
    // 0x8000'001D: Cache properties
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NEEDSLEAFCONSTANT},
#else
    // 0x8000'001D: Reserved
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
#endif
    // 0x8000'001E: Extended APIC ID
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
    // 0x8000'001F: AMD Secure Encryption
    {SupportsConstant::CONSTANT, NeedsLeafConstant::NOLEAFCONSTANT},
  }};

  using GetCPUIDPtr = uint32_t (*)();
  GetCPUIDPtr GetCPUID;
};
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/CodeCache.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Utils/SpinWaitLock.h"

#include <Interface/Context/Context.h>
#include <Interface/Core/ArchHelpers/Arm64Emitter.h>
#include <Interface/Core/Dispatcher/Dispatcher.h>
#include <Interface/Core/JIT/DebugData.h>
#include <Interface/Core/JIT/Relocations.h>
#include <Interface/Core/LookupCache.h>
#include <Interface/Core/OpcodeDispatcher.h>
#include <Interface/IR/PassManager.h>

#include <FEXCore/Core/Thunks.h>
#include <FEXCore/HLE/SourcecodeResolver.h>
#include <FEXCore/HLE/SyscallHandler.h>

#include <FEXHeaderUtils/Filesystem.h>

#include <git_version.h>

#include <xxhash.h>

#include <fstream>

namespace FEXCore {

#if __clang_major__ < 16
ExecutableFileInfo::ExecutableFileInfo(fextl::unique_ptr<HLE::SourcecodeMap> Map, uint64_t FileId, fextl::string Filename)
  : SourcecodeMap(std::move(Map))
  , FileId(FileId)
  , Filename(Filename) {}
#endif
ExecutableFileInfo::~ExecutableFileInfo() = default;

fextl::string CodeMap::GetBaseFilename(const ExecutableFileInfo& MainExecutable, bool AddNombSuffix) {
  auto FileId = MainExecutable.FileId;

  std::string_view base_filename = FHU::Filesystem::GetFilename(std::string_view {MainExecutable.Filename});
  if (FileId != 0xffff'ffff'ffff'ffff) {
    return fextl::fmt::format("{}-{:016x}{}", base_filename, MainExecutable.FileId, AddNombSuffix ? "-nomb" : "");
  }

  return "";
}

fextl::map<CodeMapFileId, CodeMap::ParsedContents> CodeMap::ParseCodeMap(std::ifstream& File) {
  fextl::map<CodeMapFileId, CodeMap::ParsedContents> Ret;
  while (true) {
    Entry Entry;
    File.read(reinterpret_cast<char*>(&Entry), sizeof(Entry));
    if (!File) {
      break;
    }

    if (Entry.FileId == LoadExternalLibrary.FileId && Entry.BlockOffset == LoadExternalLibrary.BlockOffset) {
      ExternalLibraryInfo Info;
      File.read(reinterpret_cast<char*>(&Info), sizeof(Info));

      fextl::string Filename;
      std::getline(File, Filename, '\0');

      // Align to 4-byte boundary
      char Null[4];
      File.read(Null, AlignUp(Filename.size() + 1, 4) - Filename.size() - 1);
      if (!File) {
        break;
      }
      Ret[Info.ExternalFileId].Filename = std::move(Filename);
    } else if (Entry.FileId == SetExecutableFileId {}.Marker.FileId && Entry.BlockOffset == SetExecutableFileId {}.Marker.BlockOffset) {
      CodeMapFileId ExecutableFileId;
      File.read(reinterpret_cast<char*>(&ExecutableFileId), sizeof(ExecutableFileId));
      if (!File) {
        break;
      }
      Ret[ExecutableFileId].IsExecutable = true;
    } else {
      if (!Ret.contains(Entry.FileId)) {
        LogMan::Msg::EFmt("Code map referenced unknown file id {:016x}", Entry.FileId);
      } else {
        Ret[Entry.FileId].Blocks.insert(Entry.BlockOffset);
      }
    }

    if (!File) {
      break;
    }
  }
  return Ret;
}

CodeMapWriter::CodeMapWriter(CodeMapOpener& Opener, bool OpenEagerly)
  : Buffer(4096)
  , FileOpener(Opener) {
  if (OpenEagerly) {
    CodeMapFD = FileOpener.OpenCodeMapFile();
  }
}

CodeMapWriter::~CodeMapWriter() {
  if (CodeMapFD.value_or(-1) != -1) {
    Flush(BufferOffset);
    close(*CodeMapFD);
  }
}

bool CodeMapWriter::IsWriteEnabled(const ExecutableFileSectionInfo& Section) {
  if (CodeMapFD == -1) {
    return false;
  }

  // PV libraries can't yet be read by FEXServer, so skip dumping them
  if (Section.FileInfo.Filename.starts_with("/run/pressure-vessel")) {
    return false;
  }

  if (CodeMapFD) {
    return true;
  }

  // Acquire mutex and re-check CodeMapFD to avoid race conditions
  auto lk = std::unique_lock {Mutex};
  if (!CodeMapFD) {
    CodeMapFD = FileOpener.OpenCodeMapFile();
  }

  return CodeMapFD != -1;
}

void CodeMapWriter::Flush(size_t Offset) {
  // Acquire exclusive lock and flush circular buffer
  std::unique_lock Lock {Mutex};
  Flush(Offset, Lock);
}

void CodeMapWriter::Flush(size_t Offset, std::unique_lock<std::shared_mutex>&) {
  write(*CodeMapFD, Buffer.data(), Offset);
  BufferOffset = 0;
}

void CodeMapWriter::AppendBlock(const FEXCore::ExecutableFileSectionInfo& SectionInfo, uint64_t BlockEntry) {
  if (!IsWriteEnabled(SectionInfo)) {
    return;
  }

  BlockEntry -= SectionInfo.FileStartVA;
  if (BlockEntry > std::numeric_limits<uint32_t>::max()) {
    ERROR_AND_DIE_FMT("Cannot write code map");
  }

  // Register new library if not already known
  bool NewLibraryLoad = false;
  {
    // Check prior registration with shared lock
    std::shared_lock Lock {Mutex};
    NewLibraryLoad = !KnownFileIds.contains(SectionInfo.FileInfo.FileId);
  }
  if (NewLibraryLoad) {
    // Register to map with exclusive lock
    std::unique_lock Lock {Mutex};
    NewLibraryLoad &= KnownFileIds.insert(SectionInfo.FileInfo.FileId).second;
  }
  if (NewLibraryLoad) {
    // Add entry to code map
    AppendLibraryLoad(SectionInfo.FileInfo);
  }

  // Register the actual code block
  CodeMap::Entry DataEntry {SectionInfo.FileInfo.FileId, static_cast<uint32_t>(BlockEntry)};
  AppendData(std::as_bytes(std::span {&DataEntry, 1}));
}

void CodeMapWriter::AppendLibraryLoad(const FEXCore::ExecutableFileInfo& FileInfo) {
  // See CodeMap::ExternalLibraryInfo
  auto ExternalFileId = FileInfo.FileId;
  auto TotalSize = AlignUp(sizeof(CodeMap::LoadExternalLibrary) + sizeof(ExternalFileId) + FileInfo.Filename.size() + 1, 4);
  const auto Data = reinterpret_cast<char*>(alloca(TotalSize));
  auto WritePtr = std::copy_n(reinterpret_cast<const char*>(&CodeMap::LoadExternalLibrary), sizeof(CodeMap::LoadExternalLibrary), Data);
  WritePtr = std::copy_n(reinterpret_cast<const char*>(&ExternalFileId), sizeof(ExternalFileId), WritePtr);
  WritePtr = std::copy(FileInfo.Filename.begin(), FileInfo.Filename.end(), WritePtr);
  std::fill(WritePtr, Data + TotalSize, 0);
  AppendData(std::as_bytes(std::span {Data, TotalSize}));
}

void CodeMapWriter::AppendSetMainExecutable(const FEXCore::ExecutableFileInfo& FileInfo) {
  CodeMap::SetExecutableFileId Data {.ExecutableFileId = FileInfo.FileId};
  AppendData(std::span {reinterpret_cast<const std::byte*>(&Data), sizeof(Data)});
}

void CodeMapWriter::AppendData(std::span<const std::byte> Data) {
  std::shared_lock Lock {Mutex};
  auto Offset = BufferOffset.fetch_add(Data.size_bytes());
  if (Offset + Data.size_bytes() > Buffer.size()) {
    // Acquire exclusive lock and flush the buffer.
    // Under heavy pressure, multiple threads may observe an exhausted buffer simultaneously.
    // The thread with the last in-bounds Offset is responsible for flushing the buffer.
    Lock.unlock();
    bool IsResponsibleForFlush = false;
    {
      std::unique_lock ExclusiveLock {Mutex};
      IsResponsibleForFlush = (Offset <= Buffer.size());
      if (IsResponsibleForFlush) {
        Flush(Offset, ExclusiveLock);
      }
    }
    if (!IsResponsibleForFlush) {
      // Wait for the buffer to be flushed on the responsible thread
      Utils::SpinWaitLock::WaitPred<std::less_equal<>, size_t>(reinterpret_cast<size_t*>(&BufferOffset), Buffer.size());
    }
    AppendData(Data);
    return;
  }

  memcpy(&Buffer.at(Offset), Data.data(), Data.size_bytes());
}

} // namespace FEXCore

namespace FEXCore::Context {

CodeCache::CodeCache(ContextImpl& CTX_)
  : CTX(CTX_) {}
CodeCache::~CodeCache() = default;

uint64_t CodeCache::ComputeCodeMapId(std::string_view Filename, int FD) {
  if (Filename.empty()) {
    return 0xffff'ffff'ffff'ffff;
  }

  // For now, we just use the file path as an identifier.
  // TODO: Ensure the hash is unique enough to distinguish executables while remaining independent of the installation location
  return XXH3_64bits(Filename.data(), Filename.size());
}

struct CodeCacheHeader {
  std::array<char, 4> Magic = ExpectedMagic;
  uint32_t FormatVersion = 1;
  uint8_t FEXVersion[20] = {};
  uint32_t NumBlocks;
  uint32_t NumCodePages;
  uint32_t CodeBufferSize;
  uint32_t NumRelocations;
  uint32_t padding;
  uint64_t SerializedBaseAddress;
  // TODO: Consider including information from LookupCache.BlockLinks

  static constexpr std::array<char, 4> ExpectedMagic = {'F', 'X', 'C', 'C'};
};

template<typename T>
concept OrderedContainer = requires { typename T::key_compare; };

bool CodeCache::SaveData(Core::InternalThreadState& Thread, int fd, const ExecutableFileSectionInfo& SourceBinary, uint64_t SerializedBaseAddress) {
  auto CodeBuffer = CTX.GetLatest();
  auto& LookupCache = *Thread.LookupCache->Shared;
  auto Relocations = Thread.CPUBackend->TakeRelocations(SourceBinary.FileStartVA);

  // Write file header
  CodeCacheHeader header {};
  static_assert(GIT_HASH.size() == sizeof(header.FEXVersion));
  std::ranges::copy(GIT_HASH, header.FEXVersion);
  header.NumBlocks = LookupCache.BlockList.size();
  header.NumCodePages = LookupCache.CodePages.size();
  header.CodeBufferSize = CTX.LatestOffset;
  header.NumRelocations = Relocations.size();
  header.SerializedBaseAddress = SerializedBaseAddress;
  ::write(fd, &header, sizeof(header));

  // Dump guest<->host block mappings
  {
    // Cache contents must be deterministic, so copy the unordered block list and then sort by key
    static_assert(!OrderedContainer<decltype(LookupCache.BlockList)>, "Already deterministic; drop temporary container");
    fextl::vector<std::pair<uint64_t, const GuestToHostMap::BlockEntry*>> BlockList;
    BlockList.reserve(LookupCache.BlockList.size());
    for (auto& [Guest, BlockEntry] : LookupCache.BlockList) {
      static_assert(sizeof(Guest) == 8, "Breaking change in code cache data layout");
      BlockList.emplace_back(Guest, &BlockEntry);
    }
    std::ranges::sort(BlockList);

    for (auto [Guest, Host] : BlockList) {
      static_assert(sizeof(Host->HostCode) == 8, "Breaking change in code cache data layout");
      static_assert(sizeof(Host->CodePages[0]) == 8, "Breaking change in code cache data layout");

      Guest -= SourceBinary.FileStartVA;
      ::write(fd, &Guest, sizeof(Guest));
      uint64_t HostCode = Host->HostCode - reinterpret_cast<uintptr_t>(CodeBuffer->Ptr);
      ::write(fd, &HostCode, sizeof(HostCode));
      uint64_t NumCodePages = Host->CodePages.size();
      ::write(fd, &NumCodePages, sizeof(NumCodePages));
      LOGMAN_THROW_A_FMT(std::ranges::is_sorted(Host->CodePages), "Code pages aren't sorted");
      for (auto CodePage : Host->CodePages) {
        CodePage -= SourceBinary.FileStartVA;
        ::write(fd, &CodePage, sizeof(CodePage));
      }
    }
  }

  // Dump relocations
  static_assert(sizeof(Relocations[0]) == 48, "Breaking change in code cache data layout");
  ::write(fd, Relocations.data(), Relocations.size() * sizeof(Relocations[0]));

  // Pad to next page in file so that the CodeBuffer can be mmap'ed into process on load
  char Zero[64] {};
  auto Off = lseek(fd, 0, SEEK_CUR);
  while (Off != AlignUp(Off, Utils::FEX_PAGE_SIZE)) {
    auto BytesToWrite = std::min(AlignUp(Off, Utils::FEX_PAGE_SIZE) - Off, sizeof(Zero));
    ::write(fd, Zero, BytesToWrite);
    Off += BytesToWrite;
  }

  // Dump the host code (relocated for position-independent serialization)
  std::span CodeBufferData(reinterpret_cast<std::byte*>(CodeBuffer->Ptr), reinterpret_cast<std::byte*>(CodeBuffer->Ptr) + CTX.LatestOffset);
  if (!ApplyCodeRelocations(SerializedBaseAddress, CodeBufferData, Relocations, true)) {
    LOGMAN_THROW_A_FMT(false, "Failed to apply code relocations");
    return false;
  }
  ::write(fd, CodeBufferData.data(), CodeBufferData.size());

  // Dump code pages
  static_assert(OrderedContainer<decltype(LookupCache.CodePages)>, "Non-deterministic data source");
  for (const auto& [PageIndex, Entrypoints] : LookupCache.CodePages) {
    uint64_t PageAddr = (PageIndex << 12) - SourceBinary.FileStartVA;
    ::write(fd, &PageAddr, sizeof(PageAddr));
    uint64_t NumEntrypoints = Entrypoints.size();
    ::write(fd, &NumEntrypoints, sizeof(NumEntrypoints));
    for (uint64_t Entrypoint : Entrypoints) {
      Entrypoint -= SourceBinary.FileStartVA;
      ::write(fd, &Entrypoint, sizeof(Entrypoint));
    }
  }

  return true;
}

bool CodeCache::LoadData(Core::InternalThreadState* Thread, std::byte* MappedCacheFile, const ExecutableFileSectionInfo& BinarySection) {
  if (!EnableCodeCaching) {
    return true;
  }

  namespace ranges = std::ranges;

  // Read file header
  CodeCacheHeader header {};
  ::memcpy(&header, MappedCacheFile, sizeof(header));
  MappedCacheFile += sizeof(header);

  LogMan::Msg::IFmt("Cache load: {:5} blocks; base={:#14x}; off={:#9x}-{:#09x}; {:016x} {}", header.NumBlocks, BinarySection.FileStartVA,
                    BinarySection.BeginVA - BinarySection.FileStartVA, BinarySection.EndVA - BinarySection.FileStartVA,
                    BinarySection.FileInfo.FileId, BinarySection.FileInfo.Filename);

  if (!ranges::equal(header.Magic, header.ExpectedMagic)) {
    LogMan::Msg::EFmt("Invalid cache file header");
    return false;
  }

  if (!ranges::equal(header.FEXVersion, GIT_HASH)) {
    LogMan::Msg::IFmt("Cache generated from old FEX version {:02x}, current is {:02x}; skipping", fmt::join(header.FEXVersion, ""),
                      fmt::join(GIT_HASH, ""));
    return false;
  }

  if (header.NumBlocks == 0) {
    // Valid caches are never empty
    LogMan::Msg::IFmt("Code cache empty, aborting");
    return false;
  }

  // Read guest<->host block mappings
  using BlockListEntry = decltype(GuestToHostMap::BlockList)::value_type;
  fextl::vector<BlockListEntry> BlockList(header.NumBlocks);
  {
    for (auto& BlockPtr : BlockList) {
      ::memcpy(&BlockPtr.first, MappedCacheFile, sizeof(BlockPtr.first));
      MappedCacheFile += sizeof(BlockPtr.first);
      ::memcpy(&BlockPtr.second.HostCode, MappedCacheFile, sizeof(BlockPtr.second.HostCode));
      MappedCacheFile += sizeof(BlockPtr.second.HostCode);
      uint64_t NumGuestPages;
      ::memcpy(&NumGuestPages, MappedCacheFile, sizeof(NumGuestPages));
      MappedCacheFile += sizeof(NumGuestPages);

      BlockPtr.second.CodePages.resize(NumGuestPages);
      ::memcpy(BlockPtr.second.CodePages.data(), MappedCacheFile, std::span {BlockPtr.second.CodePages}.size_bytes());
      MappedCacheFile += std::span {BlockPtr.second.CodePages}.size_bytes();
    }

    // Consistency check: VMA regions at the top and end should belong to the same file
    auto [min_val, max_val] = ranges::minmax_element(BlockList, std::less {}, &decltype(BlockList)::value_type::first);
    auto MinBound = CTX.SyscallHandler->LookupExecutableFileSection(Thread, min_val->first + BinarySection.FileStartVA);
    auto MaxBound = CTX.SyscallHandler->LookupExecutableFileSection(Thread, max_val->first + BinarySection.FileStartVA);
    if (&MinBound->FileInfo != &BinarySection.FileInfo || &MaxBound->FileInfo != &BinarySection.FileInfo) {
      ERROR_AND_DIE_FMT("Cached blocks offsets {:#x}-{:#x} out of bounds for guest library {} ({:016x} @ {:#x}) while trying to load "
                        "section {:#x}-{:#x}!",
                        min_val->first, max_val->first, BinarySection.FileInfo.Filename, BinarySection.FileInfo.FileId,
                        BinarySection.FileStartVA, BinarySection.BeginVA, BinarySection.EndVA);
    }

    // Constrain BlockList to the given ExecutableFileSectionInfo
    LOGMAN_THROW_A_FMT(ranges::is_sorted(BlockList, [](auto& a, auto& b) { return a.first < b.first; }), "Expected sorted block list");
    auto begin = ranges::lower_bound(BlockList, BinarySection.BeginVA - BinarySection.FileStartVA, std::less {}, &BlockListEntry::first);
    auto end =
      ranges::upper_bound(begin, BlockList.end(), BinarySection.EndVA - BinarySection.FileStartVA - 1, std::less {}, &BlockListEntry::first);
    BlockList.erase(end, BlockList.end());
    BlockList.erase(BlockList.begin(), begin);
    if (BlockList.empty()) {
      // Not an error since there is just no data to load
      LogMan::Msg::IFmt("No blocks cached in this range, aborting");
      return true;
    }
  }

  // Read relocations
  fextl::vector<FEXCore::CPU::Relocation> Relocations(header.NumRelocations, FEXCore::CPU::Relocation::Default());
  ::memcpy(Relocations.data(), MappedCacheFile, Relocations.size() * sizeof(Relocations[0]));
  MappedCacheFile += Relocations.size() * sizeof(Relocations[0]);

  // Pad to next page in file, which contains CodeBuffer data
  MappedCacheFile = reinterpret_cast<std::byte*>(AlignUp(reinterpret_cast<uintptr_t>(MappedCacheFile), Utils::FEX_PAGE_SIZE));

  // Prepare CodeBuffer: Page aligned and big enough to hold all cached data
  auto Lock = std::unique_lock {CTX.CodeBufferWriteMutex};
  if (Thread) {
    if (auto Prev = Thread->CPUBackend->CheckCodeBufferUpdate()) {
      Allocator::VirtualDontNeed(Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE);
      auto lk = Thread->LookupCache->AcquireWriteLock();
      Thread->LookupCache->ChangeGuestToHostMapping(*Prev, *CTX.GetLatest()->LookupCache, lk);
    }
  }

  auto CodeBuffer = CTX.GetLatest();
  LOGMAN_THROW_A_FMT(reinterpret_cast<uintptr_t>(CodeBuffer->Ptr) % 0x1000 == 0, "Expected CodeBuffer base to be page-aligned");
  const auto Delta = AlignUp(CTX.LatestOffset, 0x1000) - CTX.LatestOffset;
  CTX.LatestOffset += Delta;

  while (CTX.LatestOffset + header.CodeBufferSize > CodeBuffer->UsableSize()) {
    if (Thread) {
      CTX.ClearCodeCache(Thread);
      CodeBuffer = CTX.GetLatest();
      LogMan::Msg::IFmt("Increased code buffer size to {} MiB for cache load", CodeBuffer->AllocatedSize / 1024 / 1024);
    } else {
      ERROR_AND_DIE_FMT("Cannot extend codebuffer without thread!");
    }
  }

  // Read CodeBuffer data from file. Make sure the destination is page-aligned.
  // TODO: Only load the data needed for the selected section
  auto CodeBufferRange =
    std::as_writable_bytes(std::span {CodeBuffer->Ptr, CodeBuffer->UsableSize()}).subspan(CTX.LatestOffset, header.CodeBufferSize);
  ::memcpy(CodeBufferRange.data(), MappedCacheFile, header.CodeBufferSize);
  MappedCacheFile += header.CodeBufferSize;
  CTX.LatestOffset += header.CodeBufferSize;

  // Apply FEX relocations
  auto Ret = ApplyCodeRelocations(BinarySection.FileStartVA, CodeBufferRange, Relocations, false);
  LOGMAN_THROW_A_FMT(Ret == true, "Failed to apply code cache relocations");

  {
    auto& LookupCache = *CodeBuffer->LookupCache;
    auto WriteLock = LookupCache.AcquireWriteLock();

    // Register blocks to LookupCache
    for (auto& [Guest, Host] : BlockList) {
      for (auto& CodePage : Host.CodePages) {
        CodePage += BinarySection.FileStartVA;
      }
      auto HostCode = reinterpret_cast<void*>(Host.HostCode + reinterpret_cast<uintptr_t>(CodeBufferRange.data()));
      LookupCache.AddBlockMapping(Guest + BinarySection.FileStartVA, std::move(Host.CodePages), HostCode, WriteLock);
    }

    // Register loaded code ranges
    fextl::vector<uint64_t> Entrypoints;
    for (uint32_t i = 0; i < header.NumCodePages; ++i) {
      uint64_t CodePage;
      memcpy(&CodePage, MappedCacheFile, sizeof(CodePage));
      CodePage += BinarySection.FileStartVA;
      MappedCacheFile += sizeof(CodePage);

      uint64_t NumEntrypoints;
      memcpy(&NumEntrypoints, MappedCacheFile, sizeof(NumEntrypoints));
      MappedCacheFile += sizeof(NumEntrypoints);

      Entrypoints.resize(NumEntrypoints);
      memcpy(Entrypoints.data(), MappedCacheFile, NumEntrypoints * sizeof(Entrypoints[0]));
      MappedCacheFile += NumEntrypoints * sizeof(Entrypoints[0]);
      for (auto& Entrypoint : Entrypoints) {
        Entrypoint += BinarySection.FileStartVA;
      }

      if (LookupCache.AddBlockExecutableRange(Entrypoints, CodePage, FEXCore::Utils::FEX_PAGE_SIZE, WriteLock)) {
        CTX.SyscallHandler->MarkGuestExecutableRange(Thread, CodePage, FEXCore::Utils::FEX_PAGE_SIZE);
      }
    }
  }

  if (EnableCodeCacheValidation) {
    fextl::set<uint64_t> GuestBlocks, HostBlocks;
    for (auto& [Guest, Host] : BlockList) {
      GuestBlocks.insert(Guest + BinarySection.FileStartVA);
      HostBlocks.insert(Host.HostCode);
    }

    Validate(BinarySection, std::move(GuestBlocks), HostBlocks, CodeBufferRange);
  }

  return true;
}

void CodeCache::Validate(const ExecutableFileSectionInfo& Section, fextl::set<uint64_t> GuestBlocks, const fextl::set<uint64_t>& HostBlocks,
                         std::span<std::byte> CachedCode) {
  LOGMAN_THROW_A_FMT(!HostBlocks.empty(), "Tried to validate without any host blocks");
  // Skip any cached data before the first host block
  CachedCode = CachedCode.subspan(*HostBlocks.begin() - sizeof(CPU::CPUBackend::JITCodeHeader));

  if (!ValidationCTX) {
    ValidationCTX.reset(static_cast<ContextImpl*>(FEXCore::Context::Context::CreateNewContext(CTX.HostFeatures).release()));
    ValidationCTX->SetSignalDelegator(CTX.SignalDelegation);
    ValidationCTX->SetSyscallHandler(CTX.SyscallHandler);
    ValidationCTX->SetThunkHandler(CTX.ThunkHandler);
    if (!ValidationCTX->InitCore()) {
      ERROR_AND_DIE_FMT("Failed to create cache load validation context");
    }

    ValidationThread.reset(ValidationCTX->CreateThread(0, 0, nullptr));

    auto Frame = ValidationThread->CurrentFrame;
    Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &ValidationGDT[0];
    Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &ValidationGDT[0];
    Frame->State.cs_idx = 0;
    Frame->State.cs_cached = 0;

    if (ValidationCTX->Config.Is64BitMode()) {
      ValidationGDT[0].L = 1; // L = Long Mode = 64-bit
      ValidationGDT[0].D = 0; // D = Default Operand Size = Reserved
    } else {
      ValidationGDT[0].L = 0; // L = Long Mode = 32-bit
      ValidationGDT[0].D = 1; // D = Default Operand Size = 32-bit
    }
  }

  auto NewCodeBuffer = ValidationCTX->GetLatest();
  while (CachedCode.size_bytes() > NewCodeBuffer->UsableSize()) {
    ValidationCTX->ClearCodeCache(ValidationThread.get());
    NewCodeBuffer = ValidationCTX->GetLatest();
    LogMan::Msg::IFmt("Increased cache validation code buffer size to {} MiB", NewCodeBuffer->AllocatedSize / 1024 / 1024);
  }

  std::span<std::byte> CodeBufferRangeRef =
    std::as_writable_bytes(std::span {NewCodeBuffer->Ptr, NewCodeBuffer->Ptr + NewCodeBuffer->UsableSize()}).subspan(0, CachedCode.size_bytes());

  while (!GuestBlocks.empty()) {
    auto [CompiledBlocks, _, _2, _3, _4] = ValidationCTX->CompileCode(ValidationThread.get(), *GuestBlocks.begin(), 0 /* TODO: Set MaxInst? */);
    for (auto& Entry : CompiledBlocks.EntryPoints) {
      GuestBlocks.erase(Entry.first);
    }
  }

  // Patch FEX-internal function addresses with values from the main Context to ensure the code blocks are comparable
  auto NewRelocations = ValidationThread->CPUBackend->TakeRelocations(Section.FileStartVA);
  NewRelocations.erase(std::remove_if(NewRelocations.begin(), NewRelocations.end(), [](const CPU::Relocation& Reloc) {
    return Reloc.Header.Type != CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL && Reloc.Header.Type != CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE;
  }));
  (void)ApplyCodeRelocations(Section.FileStartVA, CodeBufferRangeRef, NewRelocations, false);

  if (ValidationCTX->LatestOffset <= CodeBufferRangeRef.size()) {
    // Reference compilation produced fewer bytes than our cache, so validation is going to fail.
    // Make sure we don't output any garbage bytes though.
    CodeBufferRangeRef = CodeBufferRangeRef.subspan(0, ValidationCTX->LatestOffset);
  }

  auto [Mismatch, _] = std::mismatch(CodeBufferRangeRef.begin(), CodeBufferRangeRef.end(), CachedCode.begin());
  if (Mismatch != CodeBufferRangeRef.end()) {
    // Align down to instruction size
    auto Idx = AlignDown(std::distance(CodeBufferRangeRef.begin(), Mismatch), 4);

    auto BlockIt = std::prev(HostBlocks.lower_bound(*HostBlocks.begin() + Idx + 1));
    std::optional<uint64_t> GuestBlockAddr;
    std::optional<uint64_t> GuestBlockAddrRef;
    if (BlockIt != HostBlocks.end()) {
      for (int i : {0, 1}) {
        std::span Buffer = (i == 0 ? CachedCode : CodeBufferRangeRef);

        // Second instruction is always a constant load for relative offset to the (multi)block start
        int32_t addr = (*reinterpret_cast<uint32_t*>(&Buffer[*BlockIt - *HostBlocks.begin() + 4]) & 0x3ff'ffe0) << 11;
        addr >>= 14;
        auto header = reinterpret_cast<CPU::CPUBackend::JITCodeHeader*>(&Buffer[*BlockIt - *HostBlocks.begin() + 4 + addr]);
        auto tail = reinterpret_cast<CPU::CPUBackend::JITCodeTail*>(reinterpret_cast<uintptr_t>(header) + header->OffsetToBlockTail);
        (i == 0 ? GuestBlockAddr : GuestBlockAddrRef) = tail->RIP - Section.FileStartVA;
        LogMan::Msg::EFmt("Recorded rip {}: {:#x} (offset {:#x})", i, tail->RIP, tail->RIP - Section.FileStartVA);

        if (i == 1) {
          if (tail->RIP >= Section.BeginVA && tail->RIP < Section.EndVA) {
            auto [IRView, TotalInstructions, TotalInstructionsLength, StartAddr, Length, _] =
              ValidationCTX->GenerateIR(ValidationThread.get(), tail->RIP, false, FEXCore::Config::Get_MAXINST());
            fextl::stringstream ss;
            FEXCore::IR::Dump(&ss, &*IRView);
            LogMan::Msg::EFmt("IR:\n{}", ss.str());
          } else {
            LogMan::Msg::EFmt("Can't dump IR for out-of-range RIP {:#x}", tail->RIP);
          }
        }
      }
    }

    fextl::string GuestBlockInfo = "UNKNOWN";
    if (GuestBlockAddr) {
      GuestBlockInfo = fextl::fmt::format("{:#x}", GuestBlockAddr.value());
    }
    if (GuestBlockAddr != GuestBlockAddrRef) {
      GuestBlockInfo += " (MISMATCH)";
    }
    ERROR_AND_DIE_FMT("Cache validation failed at offset {:#x}: {:02x} <-> {:02x} (at {} <-> {}, guest block {})", Idx,
                      fmt::join(CachedCode.subspan(Idx, 4), ""), fmt::join(CodeBufferRangeRef.subspan(Idx, 4), ""),
                      fmt::ptr(CachedCode.data()), fmt::ptr(CodeBufferRangeRef.data()), GuestBlockInfo);
  }

  // Reset Context state for next validation
  ValidationThread->LookupCache->ClearCache(ValidationThread->LookupCache->AcquireWriteLock());
  ValidationCTX->LatestOffset = 0;

  LogMan::Msg::IFmt("\tSuccessfully validated cache");
}

bool CodeCache::ApplyCodeRelocations(uint64_t GuestEntry, std::span<std::byte> Code,
                                     std::span<const FEXCore::CPU::Relocation> EntryRelocations, bool ForStorage) {
  CPU::Arm64Emitter Emitter(&CTX, Code.data(), Code.size_bytes());
  for (size_t j = 0; j < EntryRelocations.size(); ++j) {
    const FEXCore::CPU::Relocation& Reloc = EntryRelocations[j];
    Emitter.SetCursorOffset(Reloc.Header.Offset);

    switch (Reloc.Header.Type) {
    case FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL: {
      // Generate a literal so we can place it
      uint64_t Pointer = ForStorage ? 0 : GetNamedSymbolLiteral(CTX, Reloc.NamedSymbolLiteral.Symbol);
      Emitter.dc64(Pointer);
      break;
    }
    case FEXCore::CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE: {
      uint64_t Pointer = ForStorage ? 0 : reinterpret_cast<uint64_t>(CTX.ThunkHandler->LookupThunk(Reloc.NamedThunkMove.Symbol));
      if (Pointer == ~0ULL) {
        return false;
      }
      // TODO: Pointers are required to fit within 48-bit VA space.
      // But forcing 6-byte broke relocations.
      Emitter.LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc.NamedThunkMove.RegisterIndex), Pointer,
                           CPU::Arm64Emitter::PadType::DOPAD);
      break;
    }
    case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_LITERAL: {
      Emitter.dc64(GuestEntry + Reloc.GuestRIP.GuestRIP);
      break;
    }
    case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE: {
      uint64_t Pointer = Reloc.GuestRIP.GuestRIP + GuestEntry;
      // TODO: Pointers are required to fit within 48-bit VA space.
      // But forcing 6-byte broke relocations.
      Emitter.LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Register(Reloc.GuestRIP.RegisterIndex), Pointer, CPU::Arm64Emitter::PadType::DOPAD);
      break;
    }

    default: ERROR_AND_DIE_FMT("Unknown relocation type {}", ToUnderlying(Reloc.Header.Type));
    }
  }

  return true;
}

} // namespace FEXCore::Context


================================================
FILE: FEXCore/Source/Interface/Core/Core.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
category: glue ~ Logic that binds various parts together
meta: glue|driver ~ Emulation mainloop related glue logic
tags: glue|driver
desc: Glues Frontend, OpDispatcher and IR Opts & Compilation, LookupCache, Dispatcher and provides the Execution loop entrypoint
$end_info$
*/

#include <cstdint>
#ifdef ZYDIS_DISASSEMBLER
#include <Zydis/Zydis.h>
#endif
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/CPUID.h"
#include "Interface/Core/Frontend.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/JIT/JITClass.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include <Interface/GDBJIT/GDBJIT.h>
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"
#include "Interface/IR/Passes.h"
#include "Interface/IR/PassManager.h"
#include "Interface/IR/RegisterAllocationData.h"
#include "Utils/Allocator.h"
#include "Utils/Allocator/HostAllocator.h"
#include "Utils/SpinWaitLock.h"
#include "Utils/variable_length_integer.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/Thunks.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/HLE/SourcecodeResolver.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/Event.h>
#include <FEXCore/Utils/File.h>
#include <FEXCore/Utils/LogManager.h>
#include "FEXCore/Utils/SignalScopeGuards.h"
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/SHMStats.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <algorithm>
#include <array>
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <fcntl.h>
#include <functional>
#include <mutex>
#include <queue>
#include <shared_mutex>
#include <signal.h>
#include <stdio.h>
#include <string_view>
#include <sys/stat.h>
#include <type_traits>
#include <unistd.h>
#include <unordered_map>
#include <utility>
#include <xxhash.h>

namespace FEXCore::Context {
ContextImpl::ContextImpl(const FEXCore::HostFeatures& Features)
  : HostFeatures {Features}
  , CPUID {this}
  , CodeCache {*this} {
  if (!Config.Is64BitMode()) {
    // When operating in 32-bit mode, the virtual memory we care about is only the lower 32-bits.
    Config.VirtualMemSize = 1ULL << 32;
  }

  if (Config.BlockJITNaming() || Config.GlobalJITNaming() || Config.LibraryJITNaming()) {
    // Only initialize symbols file if enabled. Ensures we don't pollute /tmp with empty files.
    Symbols.InitFile();
  }

  uint64_t FrequencyCounter = FEXCore::GetCycleCounterFrequency();
  if (FrequencyCounter && FrequencyCounter < FEXCore::Context::TSC_SCALE_MAXIMUM && Config.SmallTSCScale()) {
    // Scale TSC until it is at the minimum required.
    while (FrequencyCounter < FEXCore::Context::TSC_SCALE_MAXIMUM) {
      FrequencyCounter <<= 1;
      ++Config.TSCScale;
    }
  }

  // Track atomic TSO emulation configuration.
  UpdateAtomicTSOEmulationConfig();
}

struct GetFrameBlockInfoResult {
  const CPU::CPUBackend::JITCodeHeader* InlineHeader;
  const CPU::CPUBackend::JITCodeTail* InlineTail;
};
static GetFrameBlockInfoResult GetFrameBlockInfo(FEXCore::Core::CpuStateFrame* Frame) {
  const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader;
  auto InlineHeader = reinterpret_cast<const CPU::CPUBackend::JITCodeHeader*>(BlockBegin);

  if (InlineHeader) {
    auto InlineTail = reinterpret_cast<const CPU::CPUBackend::JITCodeTail*>(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail);
    return {InlineHeader, InlineTail};
  }

  return {InlineHeader, nullptr};
}

bool ContextImpl::IsAddressInCurrentBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t Size) {
  auto [_, InlineTail] = GetFrameBlockInfo(Thread->CurrentFrame);
  return InlineTail && (Address + Size > InlineTail->RIP && Address < InlineTail->RIP + InlineTail->GuestSize);
}

bool ContextImpl::IsCurrentBlockSingleInst(FEXCore::Core::InternalThreadState* Thread) {
  auto [_, InlineTail] = GetFrameBlockInfo(Thread->CurrentFrame);
  return InlineTail && InlineTail->SingleInst;
}

uint64_t ContextImpl::GetGuestBlockEntry(FEXCore::Core::InternalThreadState* Thread) {
  auto [_, InlineTail] = GetFrameBlockInfo(Thread->CurrentFrame);
  return InlineTail ? InlineTail->RIP : 0;
}

uint64_t ContextImpl::RestoreRIPFromHostPC(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC) {
  const auto Frame = Thread->CurrentFrame;
  const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader;
  auto [InlineHeader, InlineTail] = GetFrameBlockInfo(Thread->CurrentFrame);

  if (InlineHeader) {
    // Check if the host PC is currently within a code block.
    // If it is then RIP can be reconstructed from the beginning of the code block.
    // This is currently as close as FEX can get RIP reconstructions.
    if (HostPC >= reinterpret_cast<uint64_t>(BlockBegin) && HostPC < reinterpret_cast<uint64_t>(BlockBegin + InlineTail->Size)) {

      auto RIPEntry =
        reinterpret_cast<const uint8_t*>(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail + InlineTail->OffsetToRIPEntries);

      // Reconstruct RIP from JIT entries for this block.
      uint64_t StartingHostPC = BlockBegin;
      uint64_t StartingGuestRIP = InlineTail->RIP;

      for (uint32_t i = 0; i < InlineTail->NumberOfRIPEntries; ++i) {
        auto Offset = FEXCore::Utils::vl64pair::Decode(RIPEntry);
        RIPEntry += Offset.Size;
        if (HostPC >= (StartingHostPC + Offset.IntegerARMPC)) {
          // We are beyond this entry, keep going forward.
          StartingHostPC += Offset.IntegerARMPC;
          StartingGuestRIP += Offset.IntegerX86RIP;
        } else {
          // Passed where the Host PC is at. Break now.
          break;
        }
      }
      return StartingGuestRIP;
    }
  }

  // Fallback to what is stored in the RIP currently.
  return Frame->State.rip;
}

uint32_t ContextImpl::ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, bool WasInJIT, const uint64_t* HostGPRs,
                                                 uint64_t PSTATE) {
  const auto Frame = Thread->CurrentFrame;
  uint32_t EFLAGS {};

  // Currently these flags just map 1:1 inside of the resulting value.
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) {
    switch (i) {
    case X86State::RFLAG_CF_RAW_LOC:
    case X86State::RFLAG_PF_RAW_LOC:
    case X86State::RFLAG_AF_RAW_LOC:
    case X86State::RFLAG_TF_RAW_LOC:
    case X86State::RFLAG_ZF_RAW_LOC:
    case X86State::RFLAG_SF_RAW_LOC:
    case X86State::RFLAG_OF_RAW_LOC:
    case X86State::RFLAG_DF_RAW_LOC:
      // Intentionally do nothing.
      // These contain multiple bits which can corrupt other members when compacted.
      break;
    default: EFLAGS |= uint32_t {Frame->State.flags[i]} << i; break;
    }
  }

  uint32_t Packed_NZCV {};
  if (WasInJIT) {
    // If we were in the JIT then NZCV is in the CPU's PSTATE object.
    // Packed in to the same bit locations as RFLAG_NZCV_LOC.
    Packed_NZCV = PSTATE;

    // If we were in the JIT then PF and AF are in registers.
    // Move them to the CPUState frame now.
    Frame->State.pf_raw = HostGPRs[CPU::REG_PF.Idx()];
    Frame->State.af_raw = HostGPRs[CPU::REG_AF.Idx()];
  } else {
    // If we were not in the JIT then the NZCV state is stored in the CPUState RFLAG_NZCV_LOC.
    // SF/ZF/CF/OF are packed in a 32-bit value in RFLAG_NZCV_LOC.
    memcpy(&Packed_NZCV, &Frame->State.flags[X86State::RFLAG_NZCV_LOC], sizeof(Packed_NZCV));
  }

  uint32_t OF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC)) & 1;
  uint32_t CF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC)) & 1;
  uint32_t ZF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC)) & 1;
  uint32_t SF = (Packed_NZCV >> IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC)) & 1;

  // CF is inverted in our representation, undo the invert here.
  CF ^= 1;

  // Pack in to EFLAGS
  EFLAGS |= OF << X86State::RFLAG_OF_RAW_LOC;
  EFLAGS |= CF << X86State::RFLAG_CF_RAW_LOC;
  EFLAGS |= ZF << X86State::RFLAG_ZF_RAW_LOC;
  EFLAGS |= SF << X86State::RFLAG_SF_RAW_LOC;

  // PF calculation is deferred, calculate it now.
  // Popcount the 8-bit flag and then extract the lower bit.
  uint32_t PFByte = Frame->State.pf_raw & 0xff;
  uint32_t PF = std::popcount(PFByte ^ 1) & 1;
  EFLAGS |= PF << X86State::RFLAG_PF_RAW_LOC;

  // AF calculation is deferred, calculate it now.
  // XOR with PF byte and extract bit 4.
  uint32_t AF = ((Frame->State.af_raw ^ PFByte) & (1 << 4)) ? 1 : 0;
  EFLAGS |= AF << X86State::RFLAG_AF_RAW_LOC;

  uint8_t TFByte = Frame->State.flags[X86State::RFLAG_TF_RAW_LOC];
  EFLAGS |= (TFByte & 1) << X86State::RFLAG_TF_RAW_LOC;

  // DF is pretransformed, undo the transform from 1/-1 back to 0/1
  uint8_t DFByte = Frame->State.flags[X86State::RFLAG_DF_RAW_LOC];
  if (DFByte & 0x80) {
    EFLAGS |= 1 << X86State::RFLAG_DF_RAW_LOC;
  }

  return EFLAGS;
}

void ContextImpl::ReconstructXMMRegisters(const FEXCore::Core::InternalThreadState* Thread, __uint128_t* XMM_Low, __uint128_t* YMM_High) {
  const size_t MaximumRegisters = Config.Is64BitMode ? FEXCore::Core::CPUState::NUM_XMMS : 8;

  if (YMM_High != nullptr && HostFeatures.SupportsAVX) {
    const bool SupportsConvergedRegisters = HostFeatures.SupportsSVE256;

    if (SupportsConvergedRegisters) {
      ///< Output wants to de-interleave
      for (size_t i = 0; i < MaximumRegisters; ++i) {
        memcpy(&XMM_Low[i], &Thread->CurrentFrame->State.xmm.avx.data[i][0], sizeof(__uint128_t));
        memcpy(&YMM_High[i], &Thread->CurrentFrame->State.xmm.avx.data[i][2], sizeof(__uint128_t));
      }
    } else {
      ///< Matches what FEX wants with non-converged registers
      for (size_t i = 0; i < MaximumRegisters; ++i) {
        memcpy(&XMM_Low[i], &Thread->CurrentFrame->State.xmm.sse.data[i][0], sizeof(__uint128_t));
        memcpy(&YMM_High[i], &Thread->CurrentFrame->State.avx_high[i][0], sizeof(__uint128_t));
      }
    }
  } else {
    // Only support SSE, no AVX here, even if requested.
    memcpy(XMM_Low, Thread->CurrentFrame->State.xmm.sse.data, MaximumRegisters * sizeof(__uint128_t));
  }
}

void ContextImpl::SetXMMRegistersFromState(FEXCore::Core::InternalThreadState* Thread, const __uint128_t* XMM_Low, const __uint128_t* YMM_High) {
  const size_t MaximumRegisters = Config.Is64BitMode ? FEXCore::Core::CPUState::NUM_XMMS : 8;
  if (YMM_High != nullptr && HostFeatures.SupportsAVX) {
    const bool SupportsConvergedRegisters = HostFeatures.SupportsSVE256;

    if (SupportsConvergedRegisters) {
      ///< Output wants to de-interleave
      for (size_t i = 0; i < MaximumRegisters; ++i) {
        memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][0], &XMM_Low[i], sizeof(__uint128_t));
        memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][2], &YMM_High[i], sizeof(__uint128_t));
      }
    } else {
      ///< Matches what FEX wants with non-converged registers
      for (size_t i = 0; i < MaximumRegisters; ++i) {
        memcpy(&Thread->CurrentFrame->State.xmm.sse.data[i][0], &XMM_Low[i], sizeof(__uint128_t));
        memcpy(&Thread->CurrentFrame->State.avx_high[i][0], &YMM_High[i], sizeof(__uint128_t));
      }
    }
  } else {
    // Only support SSE, no AVX here, even if requested.
    memcpy(Thread->CurrentFrame->State.xmm.sse.data, XMM_Low, MaximumRegisters * sizeof(__uint128_t));
  }
}

void ContextImpl::SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) {
  const auto Frame = Thread->CurrentFrame;
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_EFLAG_BITS; ++i) {
    switch (i) {
    case X86State::RFLAG_OF_RAW_LOC:
    case X86State::RFLAG_CF_RAW_LOC:
    case X86State::RFLAG_ZF_RAW_LOC:
    case X86State::RFLAG_SF_RAW_LOC:
      // Intentionally do nothing.
      break;
    case X86State::RFLAG_AF_RAW_LOC:
      // AF stored in bit 4 in our internal representation. It is also
      // XORed with byte 4 of the PF byte, but we write that as zero here so
      // we don't need any special handling for that.
      Frame->State.af_raw = (EFLAGS & (1U << i)) ? (1 << 4) : 0;
      break;
    case X86State::RFLAG_PF_RAW_LOC:
      // PF is inverted in our internal representation.
      Frame->State.pf_raw = (EFLAGS & (1U << i)) ? 0 : 1;
      break;
    case X86State::RFLAG_DF_RAW_LOC:
      // DF is encoded as 1/-1
      Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 0xff : 1;
      break;
    default: Frame->State.flags[i] = (EFLAGS & (1U << i)) ? 1 : 0; break;
    }
  }

  // Calculate packed NZCV. Note CF is inverted.
  uint32_t Packed_NZCV {};
  Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_OF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_OF_RAW_LOC) : 0;
  Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_CF_RAW_LOC)) ? 0 : 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_CF_RAW_LOC);
  Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_ZF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_ZF_RAW_LOC) : 0;
  Packed_NZCV |= (EFLAGS & (1U << X86State::RFLAG_SF_RAW_LOC)) ? 1U << IR::OpDispatchBuilder::IndexNZCV(X86State::RFLAG_SF_RAW_LOC) : 0;
  memcpy(&Frame->State.flags[X86State::RFLAG_NZCV_LOC], &Packed_NZCV, sizeof(Packed_NZCV));

  // Reserved, Read-As-1, Write-as-1
  Frame->State.flags[X86State::RFLAG_RESERVED_LOC] = 1;
  // Interrupt Flag. Can't be written by CPL-3 userland.
  Frame->State.flags[X86State::RFLAG_IF_LOC] = 1;
}

bool ContextImpl::InitCore() {
  // Initialize the CPU core signal handlers & DispatcherConfig
  Dispatcher = FEXCore::CPU::Dispatcher::Create(this);

  // Set up the SignalDelegator config since core is initialized.
  SignalDelegation->SetConfig(Dispatcher->MakeSignalDelegatorConfig());

#if defined(_WIN32) && !defined(ARCHITECTURE_arm64ec)
  // WOW64 always needs the interrupt fault check to be enabled.
  Config.NeedsPendingInterruptFaultCheck = true;
#endif

  if (Config.GdbServer) {
    // If gdbserver is enabled then this needs to be enabled.
    Config.NeedsPendingInterruptFaultCheck = true;
  }

  return true;
}

void ContextImpl::HandleCallback(FEXCore::Core::InternalThreadState* Thread, uint64_t RIP) {
  static_cast<ContextImpl*>(Thread->CTX)->Dispatcher->ExecuteJITCallback(Thread->CurrentFrame, RIP);
}

void ContextImpl::ExecuteThread(FEXCore::Core::InternalThreadState* Thread) {
  // Update the thread pointer for Thunk return to the latest.
  Thread->CurrentFrame->Pointers.ThunkCallbackRet = SignalDelegation->GetThunkCallbackRET();

  Dispatcher->ExecuteDispatch(Thread->CurrentFrame);

  // If it is the parent thread that died then just leave
  // TODO: This doesn't make sense when the parent thread doesn't outlive its children
}

void ContextImpl::InitializeCompiler(FEXCore::Core::InternalThreadState* Thread) {
  Thread->OpDispatcher = fextl::make_unique<FEXCore::IR::OpDispatchBuilder>(this);
  Thread->OpDispatcher->SetMultiblock(Config.Multiblock);
  Thread->LookupCache = fextl::make_unique<FEXCore::LookupCache>(this);
  Thread->FrontendDecoder = fextl::make_unique<FEXCore::Frontend::Decoder>(Thread);
  Thread->PassManager = fextl::make_unique<FEXCore::IR::PassManager>();

  Thread->CurrentFrame->State.L1Pointer = Thread->LookupCache->GetL1Pointer();
  Thread->CurrentFrame->State.L1Mask = Thread->LookupCache->GetScaledL1PointerMask();

  Thread->CurrentFrame->Pointers.L2Pointer = Thread->LookupCache->GetPagePointer();

  Dispatcher->InitThreadPointers(Thread);

  Thread->PassManager->AddDefaultPasses(this);
  Thread->PassManager->AddDefaultValidationPasses();

  Thread->PassManager->RegisterSyscallHandler(SyscallHandler);

  // Create CPU backend
  Thread->PassManager->InsertRegisterAllocationPass(this);
  Thread->CPUBackend = FEXCore::CPU::CreateArm64JITCore(this, Thread);

  Thread->PassManager->Finalize();
}

FEXCore::Core::InternalThreadState*
ContextImpl::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState) {
  FEXCore::Core::InternalThreadState* Thread = new FEXCore::Core::InternalThreadState {
    .CTX = this,
  };
  FEXCore::Allocator::VirtualName("FEXMem_ThreadState", Thread, sizeof(*Thread));

  Thread->CurrentFrame->State.gregs[X86State::REG_RSP] = StackPointer;
  Thread->CurrentFrame->State.rip = InitialRIP;

  // Copy over the new thread state to the new object
  if (NewThreadState) {
    memcpy(&Thread->CurrentFrame->State, NewThreadState, sizeof(FEXCore::Core::CPUState));
  }

  // Set up the thread manager state
  Thread->CurrentFrame->Thread = Thread;

  InitializeCompiler(Thread);

  Thread->CurrentFrame->State.DeferredSignalRefCount.Store(0);

  if (Config.BlockJITNaming() || Config.GlobalJITNaming() || Config.LibraryJITNaming()) {
    // Allocate a JIT symbol buffer only if enabled.
    Thread->SymbolBuffer = JITSymbols::AllocateBuffer();
  }

  return Thread;
}

void ContextImpl::DestroyThread(FEXCore::Core::InternalThreadState* Thread) {
  FEXCore::Allocator::VirtualProtect(&Thread->InterruptFaultPage, sizeof(Thread->InterruptFaultPage),
                                     Allocator::ProtectOptions::Read | Allocator::ProtectOptions::Write);
  delete Thread;
}

#ifndef _WIN32
void ContextImpl::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) {
  Allocator::UnlockAfterFork(LiveThread, Child);

  Profiler::PostForkAction(Child);
  if (Child) {
    if (CodeMapWriter) {
      CodeMapWriter->ResetAfterFork();
    }

    CodeInvalidationMutex.StealAndDropActiveLocks();
    if (Config.StrictInProcessSplitLocks) {
      StrictSplitLockMutex = 0;
    }
  } else {
    CodeInvalidationMutex.unlock();
    if (Config.StrictInProcessSplitLocks) {
      FEXCore::Utils::SpinWaitLock::unlock(&StrictSplitLockMutex);
    }
    return;
  }
}

void ContextImpl::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) {
  CodeInvalidationMutex.lock();
  Allocator::LockBeforeFork(Thread);
  if (Config.StrictInProcessSplitLocks) {
    FEXCore::Utils::SpinWaitLock::lock(&StrictSplitLockMutex);
  }
}
#endif

void ContextImpl::OnCodeBufferAllocated(const fextl::shared_ptr<CPU::CodeBuffer>& Buffer) {
  if (Config.GlobalJITNaming()) {
    Symbols.RegisterJITSpace(Buffer->Ptr, Buffer->AllocatedSize);
  }

  {
    std::scoped_lock lk {CodeBufferListLock};
    CodeBufferList.emplace_back(Buffer);
  }
}

void ContextImpl::ClearCodeCache(FEXCore::Core::InternalThreadState* Thread, bool NewCodeBuffer) {
  FEXCORE_PROFILE_INSTANT("ClearCodeCache");

  if (NewCodeBuffer) {
    // Allocate new CodeBuffer + L3 LookupCache and clear L1+L2 caches
    Thread->CPUBackend->ClearCache();
  } else {
    // Clear L1+L2 cache of this thread, and clear L3 cache across any threads using it
    auto lk = Thread->LookupCache->AcquireWriteLock();
    Thread->LookupCache->ClearCache(lk);
  }
  Allocator::VirtualDontNeed(Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE);
}

static void IRDumper(FEXCore::Core::InternalThreadState* Thread, IR::IREmitter* IREmitter, uint64_t GuestRIP) {
  FEXCore::File::File FD = FEXCore::File::File::GetStdERR();
  fextl::stringstream out;
  auto NewIR = IREmitter->ViewIR();
  FEXCore::IR::Dump(&out, &NewIR);
  fextl::fmt::print(FD, "IR-ShouldDump-{} 0x{:x}:\n{}\n@@@@@\n", NewIR.PostRA() ? "post" : "pre", GuestRIP, out.str());
};

bool ContextImpl::CheckIfBlockIsCacheable(FEXCore::Core::InternalThreadState& Thread, uint64_t GuestRIP, uint64_t MaxInst) {
  return Thread.FrontendDecoder->CheckIfCacheable(Thread, reinterpret_cast<const uint8_t*>(GuestRIP), GuestRIP, MaxInst);
}

ContextImpl::GenerateIRResult
ContextImpl::GenerateIR(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, bool ExtendedDebugInfo, uint64_t MaxInst) {
  FEXCORE_PROFILE_SCOPED("GenerateIR");

  Thread->OpDispatcher->ResetWorkingList();

  uint64_t TotalInstructions {0};
  uint64_t TotalInstructionsLength {0};

  bool HasCustomIR {};

  if (HasCustomIRHandlers.load(std::memory_order_relaxed)) {
    std::shared_lock lk(CustomIRMutex);
    auto Handler = CustomIRHandlers.find(GuestRIP);
    if (Handler != CustomIRHandlers.end()) {
      TotalInstructions = 1;
      TotalInstructionsLength = 1;
      Handler->second.Handler(GuestRIP, Thread->OpDispatcher.get());
      HasCustomIR = true;
    }
  }

  if (!HasCustomIR) {
    const uint8_t* GuestCode {};
    GuestCode = reinterpret_cast<const uint8_t*>(GuestRIP);

    bool HadDispatchError {false};
    bool HadInvalidInst {false};

    Thread->FrontendDecoder->DecodeInstructionsAtEntry(Thread, GuestCode, GuestRIP, MaxInst);

    auto BlockInfo = Thread->FrontendDecoder->GetDecodedBlockInfo();
    auto CodeBlocks = &BlockInfo->Blocks;

    Thread->OpDispatcher->BeginFunction(GuestRIP, CodeBlocks, BlockInfo->TotalInstructionCount, BlockInfo->Is64BitMode,
                                        AreMonoHacksActive() && MonoBackpatcherBlock.load(std::memory_order_relaxed) == GuestRIP);

    const auto GPRSize = Thread->OpDispatcher->GetGPROpSize();

#ifdef ZYDIS_DISASSEMBLER
    const auto ZydisMachineMode = Config.Is64BitMode ? ZYDIS_MACHINE_MODE_LONG_64 : ZYDIS_MACHINE_MODE_LEGACY_32;
    if (FEXCore::Config::Get_X86DISASSEMBLE()) {
      const uint64_t DecodedMin = Thread->FrontendDecoder->DecodedMinAddress;
      const uint64_t DecodedMax = Thread->FrontendDecoder->DecodedMaxAddress;
      LogMan::Msg::IFmt("Guest x86 Begin (RIP={:#x}, {:#x}-{:#x})", GuestRIP, DecodedMin, DecodedMax);
    }
#endif

    for (size_t j = 0; j < CodeBlocks->size(); ++j) {
      const FEXCore::Frontend::Decoder::DecodedBlocks& Block = CodeBlocks->at(j);

#ifdef ZYDIS_DISASSEMBLER
      if (FEXCore::Config::Get_X86DISASSEMBLE() && CodeBlocks->size() > 1) {
        LogMan::Msg::IFmt("  Block {} Entry={:#x} NumInsts={}", j, Block.Entry, Block.NumInstructions);
      }
#endif

      bool BlockInForceTSOValidRange = false;
      auto InstForceTSOIt = ForceTSOInstructions.end();
      if (ForceTSOValidRanges.Contains({Block.Entry, Block.Entry + Block.Size})) {
        if (auto It = ForceTSOInstructions.lower_bound(Block.Entry); *It < Block.Entry + Block.Size) {
          InstForceTSOIt = It;
          BlockInForceTSOValidRange = true;
        }
      }

      // Set the block entry point
      Thread->OpDispatcher->SetNewBlockIfChanged(Block.Entry);

      uint64_t BlockInstructionsLength {};

      // Reset any block-specific state
      Thread->OpDispatcher->StartNewBlock();

      uint64_t InstsInBlock = Block.NumInstructions;

      if (InstsInBlock == 0) {
        // Special case for an empty instruction block.
        Thread->OpDispatcher->ExitFunction(Thread->OpDispatcher->_InlineEntrypointOffset(GPRSize, Block.Entry - GuestRIP));
      }

      for (size_t i = 0; i < InstsInBlock; ++i) {
        uint64_t InstAddress = Block.Entry + BlockInstructionsLength;
        const FEXCore::X86Tables::X86InstInfo* TableInfo {nullptr};
        const FEXCore::X86Tables::DecodedInst* DecodedInfo {nullptr};

        TableInfo = Block.DecodedInstructions[i].TableInfo;
        DecodedInfo = &Block.DecodedInstructions[i];

#ifdef ZYDIS_DISASSEMBLER
        if (FEXCore::Config::Get_X86DISASSEMBLE()) {
          const uint8_t* InstBytes = reinterpret_cast<const uint8_t*>(InstAddress);
          ZydisDisassembledInstruction ZydisInst;
          if (ZYAN_SUCCESS(ZydisDisassembleIntel(ZydisMachineMode, InstAddress, InstBytes, DecodedInfo->InstSize, &ZydisInst))) {
            LogMan::Msg::IFmt("    {:#x}: {}", InstAddress, ZydisInst.text);
          } else {
            LogMan::Msg::IFmt("    {:#x}: (decode failed, {} bytes)", InstAddress, DecodedInfo->InstSize);
          }
        }
#endif

        bool IsLocked = DecodedInfo->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;

        // Do a partial register cache flush before every instruction. This
        // prevents cross-instruction static register caching, while allowing
        // context load/stores to be optimized within a block. Theoretically,
        // this flush is not required for correctness, all mandatory flushes are
        // included in instruction-specific handlers. Instead, this is a blunt
        // heuristic to make the register cache less aggressive, as the current
        // RA generates bad code in common cases with tied registers otherwise.
        //
        // However, it makes our exception handling behaviour more predictable.
        // It is potentially correctness bearing in that sense, but that is a
        // side effect here and (if that behaviour is required) we should handle
        // that more explicitly later.
        Thread->OpDispatcher->FlushRegisterCache(true);

        if (ExtendedDebugInfo || Thread->OpDispatcher->CanHaveSideEffects(TableInfo, DecodedInfo)) {
          Thread->OpDispatcher->_GuestOpcode(InstAddress - GuestRIP);
        }

        if (Config.SMCChecks == FEXCore::Config::CONFIG_SMC_FULL || Block.ForceFullSMCDetection) {
          auto ExistingCodePtr = reinterpret_cast<uint8_t*>(Block.Entry + BlockInstructionsLength);
          auto InstAddressReg = Thread->OpDispatcher->_EntrypointOffset(GPRSize, InstAddress - GuestRIP);
          std::array<uint8_t, 0x10> CodeOriginal;
          memcpy(CodeOriginal.data(), ExistingCodePtr, DecodedInfo->InstSize);
          auto CodeChanged = Thread->OpDispatcher->_ValidateCode(CodeOriginal, InstAddressReg, DecodedInfo->InstSize);

          auto InvalidateCodeCond = Thread->OpDispatcher->CondJump(CodeChanged);

          auto CurrentBlock = Thread->OpDispatcher->GetCurrentBlock();
          auto CodeWasChangedBlock = Thread->OpDispatcher->CreateNewCodeBlockAtEnd();
          Thread->OpDispatcher->SetTrueJumpTarget(InvalidateCodeCond, CodeWasChangedBlock);

          Thread->OpDispatcher->SetCurrentCodeBlock(CodeWasChangedBlock);
          Thread->OpDispatcher->_ThreadRemoveCodeEntry();
          Thread->OpDispatcher->ExitFunction(Thread->OpDispatcher->_InlineEntrypointOffset(GPRSize, InstAddress - GuestRIP));

          auto NextOpBlock = Thread->OpDispatcher->CreateNewCodeBlockAfter(CurrentBlock);

          Thread->OpDispatcher->SetFalseJumpTarget(InvalidateCodeCond, NextOpBlock);
          Thread->OpDispatcher->SetCurrentCodeBlock(NextOpBlock);
        }

        if (TableInfo && TableInfo->OpcodeDispatcher.OpDispatch) {
          auto Fn = TableInfo->OpcodeDispatcher.OpDispatch;
          Thread->OpDispatcher->ResetHandledLock();
          Thread->OpDispatcher->ResetDecodeFailure();
          IR::ForceTSOMode ForceTSO = IR::ForceTSOMode::NoOverride;
          if (BlockInForceTSOValidRange) {
            if (InstForceTSOIt != ForceTSOInstructions.end() && *InstForceTSOIt == InstAddress) {
              ForceTSO = IR::ForceTSOMode::ForceEnabled;
            } else {
              ForceTSO = IR::ForceTSOMode::ForceDisabled;
            }
          } else if (DecodedInfo->Flags & X86Tables::DecodeFlags::FLAG_FORCE_TSO) {
            ForceTSO = IR::ForceTSOMode::ForceEnabled;
          }

          Thread->OpDispatcher->SetForceTSO(ForceTSO);
          std::invoke(Fn, Thread->OpDispatcher, DecodedInfo);
          if (Thread->OpDispatcher->HadDecodeFailure()) {
            HadDispatchError = true;
          } else {
            if (Thread->OpDispatcher->HasHandledLock() != IsLocked) {
              HadDispatchError = true;
              LogMan::Msg::EFmt("Missing LOCK HANDLER at 0x{:x}{{'{}'}}", InstAddress, TableInfo->Name ?: "UND");
            }
            BlockInstructionsLength += DecodedInfo->InstSize;
            TotalInstructionsLength += DecodedInfo->InstSize;
            ++TotalInstructions;

            // Walk InstForceTSOIt forward past the handled instruction
            InstForceTSOIt =
              std::find_if(InstForceTSOIt, ForceTSOInstructions.end(), [&](auto Val) { return Val >= Block.Entry + BlockInstructionsLength; });
          }
        } else {
          // Invalid instruction
          if (!BlockInstructionsLength) {
            // SMC can modify block contents and patch invalid instructions to valid ones inline.
            // End blocks upon encountering them and only emit an invalid opcode exception if there are no prior instructions in the block (that could have modified it to be valid).

            if (TableInfo) {
              LogMan::Msg::EFmt("Invalid or Unknown instruction: {} 0x{:x}", TableInfo->Name ?: "UND", Block.Entry - GuestRIP);
            }

            if (Block.BlockStatus == Frontend::Decoder::DecodedBlockStatus::INVALID_INST ||
                Block.BlockStatus == Frontend::Decoder::DecodedBlockStatus::BAD_RELOCATION) {
              Thread->OpDispatcher->InvalidOp(DecodedInfo);
            } else {
              Thread->OpDispatcher->NoExecOp(DecodedInfo);
            }
          }

          HadInvalidInst = true;
        }

        const bool NeedsBlockEnd = (HadDispatchError && TotalInstructions > 0) ||
                                   (Thread->OpDispatcher->NeedsBlockEnder() && i + 1 == InstsInBlock) || HadInvalidInst;

        // If we had a dispatch error then leave early
        if (HadDispatchError && TotalInstructions == 0) {
          // Couldn't handle any instruction in op dispatcher
          Thread->OpDispatcher->DelayedDisownBuffer();
          return {std::nullopt, 0, 0, 0, 0};
        }

        if (NeedsBlockEnd) {
          // We had some instructions. Early exit
          Thread->OpDispatcher->ExitFunction(
            Thread->OpDispatcher->_InlineEntrypointOffset(GPRSize, Block.Entry + BlockInstructionsLength - GuestRIP));
          break;
        }


        if (Thread->OpDispatcher->FinishOp(DecodedInfo->PC + DecodedInfo->InstSize, i + 1 == InstsInBlock)) {
          break;
        }
      }
    }

#ifdef ZYDIS_DISASSEMBLER
    if (FEXCore::Config::Get_X86DISASSEMBLE()) {
      LogMan::Msg::IFmt("Guest x86 End");
    }
#endif

    Thread->OpDispatcher->Finalize();

    Thread->FrontendDecoder->DelayedDisownBuffer();
  }

  IR::IREmitter* IREmitter = Thread->OpDispatcher.get();

  auto ShouldDump = Thread->OpDispatcher->ShouldDumpIR();
  // Debug
  if (ShouldDump) {
    IRDumper(Thread, IREmitter, GuestRIP);
  }

  // Run the passmanager over the IR from the dispatcher
  Thread->PassManager->Run(IREmitter);

  // Debug
  if (ShouldDump) {
    IRDumper(Thread, IREmitter, GuestRIP);
  }

  return {
    .IRView = IREmitter->ViewIR(),
    .TotalInstructions = TotalInstructions,
    .TotalInstructionsLength = TotalInstructionsLength,
    .StartAddr = Thread->FrontendDecoder->DecodedMinAddress,
    .Length = Thread->FrontendDecoder->DecodedMaxAddress - Thread->FrontendDecoder->DecodedMinAddress,
    .NeedsAddGuestCodeRanges = !HasCustomIR,
  };
}

ContextImpl::CompileCodeResult ContextImpl::CompileCode(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) {
  if (SourcecodeResolver && Config.GDBSymbols()) {
    auto MappedSection = SyscallHandler->LookupExecutableFileSection(Thread, GuestRIP);
    if (MappedSection) {
      MappedSection->FileInfo.SourcecodeMap =
        SourcecodeResolver->GenerateMap(MappedSection->FileInfo.Filename, CodeMap::GetBaseFilename(MappedSection->FileInfo, false));
    }
  }

  // Generate IR + Meta Info
  auto [IRView, TotalInstructions, TotalInstructionsLength, StartAddr, Length, NeedsAddGuestCodeRanges] =
    GenerateIR(Thread, GuestRIP, Config.GDBSymbols(), MaxInst);
  if (!IRView) {
    // OpDispatcher IR already released in this case.
    return {{}, nullptr, 0, 0, false};
  }

  // Attempt to get the CPU backend to compile this code
  // Re-check if another thread raced us in compiling this block.
  // We could lock CodeBufferWriteMutex earlier to prevent this from happening,
  // but this would increase lock contention. Redundant frontend runs aren't
  // as expensive and are easily reverted.
  if (MaxInst != 1) {
    if (auto Block = Thread->LookupCache->FindBlock(Thread, GuestRIP)) {
      // Raced to compile, release the OpDispatcher IR.
      Thread->OpDispatcher->DelayedDisownBuffer();
      return {.CompiledCode = {.BlockBegin = reinterpret_cast<uint8_t*>(Block), .EntryPoints = {{GuestRIP, reinterpret_cast<uint8_t*>(Block)}}},
              .DebugData = nullptr,
              .StartAddr = 0,
              .Length = 0,
              .NeedsAddGuestCodeRanges = false};
    }
  }

  auto DebugData = fextl::make_unique<FEXCore::Core::DebugData>();

  // If the trap flag is set we generate single instruction blocks that each check to generate a single step exception.
  bool TFSet = Thread->CurrentFrame->State.flags[X86State::RFLAG_TF_RAW_LOC];

  auto CompiledCode = Thread->CPUBackend->CompileCode(GuestRIP, Length, TotalInstructions == 1, &*IRView, DebugData.get(), TFSet);

  // Release the IR
  Thread->OpDispatcher->DelayedDisownBuffer();

  return {
    .CompiledCode = std::move(CompiledCode),
    .DebugData = std::move(DebugData),
    .StartAddr = StartAddr,
    .Length = Length,
    .NeedsAddGuestCodeRanges = NeedsAddGuestCodeRanges,
  };
}

uintptr_t ContextImpl::CompileBlock(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP, uint64_t MaxInst) {
  auto Thread = Frame->Thread;
  FEXCORE_PROFILE_SCOPED("CompileBlock");
  FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedJITTime);

  static_cast<ContextImpl*>(Thread->CTX)->SyscallHandler->PreCompile();

  // Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled
  auto lk = GuardSignalDeferringSection<std::shared_lock>(CodeInvalidationMutex, Thread);

  // Is the code in the cache?
  // The backends only check L1 and L2, not L3
  if (auto HostCode = Thread->LookupCache->FindBlock(Thread, GuestRIP)) {
    return HostCode;
  }

  // Accumulate a JIT count now, as even if another thread raced us, it should count as a compile.
  FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedJITCount, 1);

  auto [CompiledCode, DebugData, StartAddr, Length, NeedsAddGuestCodeRanges] = CompileCode(Thread, GuestRIP, MaxInst);
  auto CodePtr = CompiledCode.EntryPoints[GuestRIP];
  if (CodePtr == nullptr) {
    return 0;
  } else if (!DebugData) {
    // DebugData wasn't populated, indicating another thread raced us for compiling this block
    return reinterpret_cast<uintptr_t>(CodePtr);
  }

  // The core managed to compile the code.
  if (Config.BlockJITNaming()) {
    auto FragmentBasePtr = CompiledCode.BlockBegin;

    auto GuestRIPLookup = SyscallHandler->LookupExecutableFileSection(Thread, GuestRIP);

    if (DebugData->Subblocks.size()) {
      for (auto& Subblock : DebugData->Subblocks) {
        auto BlockBasePtr = FragmentBasePtr + Subblock.HostCodeOffset;
        if (GuestRIPLookup) {
          Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, CompiledCode.Size, GuestRIPLookup->FileInfo.Filename,
                           GuestRIP - GuestRIPLookup->FileStartVA);
        } else {
          Symbols.Register(Thread->SymbolBuffer.get(), BlockBasePtr, GuestRIP, Subblock.HostCodeSize);
        }
      }
    } else {
      if (GuestRIPLookup) {
        Symbols.Register(Thread->SymbolBuffer.get(), FragmentBasePtr, CompiledCode.Size, GuestRIPLookup->FileInfo.Filename,
                         GuestRIP - GuestRIPLookup->FileStartVA);
      } else {
        Symbols.Register(Thread->SymbolBuffer.get(), FragmentBasePtr, GuestRIP, CompiledCode.Size);
      }
    }
  }

  if (Config.LibraryJITNaming() || Config.GDBSymbols()) {
    auto MappedSection = SyscallHandler->LookupExecutableFileSection(Thread, GuestRIP);
    if (MappedSection) {
      if (Config.LibraryJITNaming()) {
        Symbols.RegisterNamedRegion(Thread->SymbolBuffer.get(), CodePtr, DebugData->HostCodeSize, MappedSection->FileInfo.Filename);
      }

      if (Config.GDBSymbols()) {
        GDBJITRegister(MappedSection->FileInfo, MappedSection->FileStartVA, GuestRIP, (uintptr_t)CodePtr, *DebugData);
      }
    }
  }

  // Clear any relocations that might have been generated
  if (!CodeCache.IsGeneratingCache) {
    Thread->CPUBackend->ClearRelocations();
  }

  fextl::vector<uint64_t> CodePages;

  if (NeedsAddGuestCodeRanges) {
    // Track in the guest to host map all entrypoints for all pages the compiled block touches, if any page didn't previously
    // contain code, inform the frontend so it can setup SMC detection.
    auto BlockInfo = Thread->FrontendDecoder->GetDecodedBlockInfo();
    CodePages.reserve(BlockInfo->CodePages.size());
    CodePages.insert(CodePages.end(), BlockInfo->CodePages.begin(), BlockInfo->CodePages.end());
    for (auto CodePage : BlockInfo->CodePages) {
      if (Thread->LookupCache->AddBlockExecutableRange(Thread, BlockInfo->EntryPoints, CodePage, FEXCore::Utils::FEX_PAGE_SIZE)) {
        SyscallHandler->MarkGuestExecutableRange(Thread, CodePage, FEXCore::Utils::FEX_PAGE_SIZE);
      }
    }
  }

  // Insert to lookup cache

  for (auto [GuestAddr, HostAddr] : CompiledCode.EntryPoints) {
    Thread->LookupCache->AddBlockMapping(Thread, GuestAddr, CodePages, HostAddr);
  }

  if (CodeMapWriter) {
    auto Region = SyscallHandler->LookupExecutableFileSection(Thread, GuestRIP);
    if (Region && Region->FileStartVA != 0) {
      CodeMapWriter->AppendBlock(*Region, GuestRIP);
    }
  }

  return (uintptr_t)CodePtr;
}

uintptr_t ContextImpl::CompileSingleStep(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP) {
  FEXCORE_PROFILE_SCOPED("CompileSingleStep");
  auto Thread = Frame->Thread;

  static_cast<ContextImpl*>(Thread->CTX)->SyscallHandler->PreCompile();

  // Invalidate might take a unique lock on this, to guarantee that during invalidation no code gets compiled
  auto lk = GuardSignalDeferringSection<std::shared_lock>(CodeInvalidationMutex, Thread);

  auto [CompiledCode, DebugData, StartAddr, Length, _] = CompileCode(Thread, GuestRIP, 1);
  auto CodePtr = CompiledCode.EntryPoints[GuestRIP];
  if (CodePtr == nullptr) {
    return 0;
  }

  // Clear any relocations that might have been generated
  Thread->CPUBackend->ClearRelocations();

  return (uintptr_t)CodePtr;
}

void ContextImpl::InvalidateCodeBuffersCodeRange(uint64_t Start, uint64_t Length) {
  FEXCORE_PROFILE_SCOPED("InvalidateCodeBuffersCodeRange");

  LOGMAN_THROW_A_FMT(CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to be unique_locked here");
  std::scoped_lock lk {CodeBufferListLock};
  auto it = CodeBufferList.begin();
  while (it != CodeBufferList.end()) {
    if (auto Strong = it->lock()) {
      Strong->LookupCache->InvalidateRange(Start, Length);
      it++;
    } else {
      it = CodeBufferList.erase(it);
    }
  }
}

void ContextImpl::InvalidateThreadCachedCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) {
  LOGMAN_THROW_A_FMT(CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to be unique_locked here");

  // Ensures now-modified mappings aren't cached as being in their previous non-executable state.
  // Accessing FrontendDecoder is safe as the thread's code invalidation mutex must be locked here.
  Thread->FrontendDecoder->ResetExecutableRangeCache();

  if (Thread->LookupCache->InvalidateCacheRange(Start, Length)) {
    FEXCORE_PROFILE_SCOPED("InvalidateCallRet");

    // This may cause access violations in the thread on Windows as zeroing is not atomic, this is handled by the frontend
    Allocator::VirtualDontNeed(Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE);
  }
}

void ContextImpl::ThreadRemoveCodeEntryFromJit(FEXCore::Core::CpuStateFrame* Frame, uint64_t GuestRIP) {
  static_cast<ContextImpl*>(Frame->Thread->CTX)->SyscallHandler->InvalidateGuestCodeRange(Frame->Thread, GuestRIP, 1);
}

std::optional<CustomIRResult>
ContextImpl::AddCustomIREntrypoint(uintptr_t Entrypoint, CustomIREntrypointHandler Handler, void* Creator, void* Data) {
  LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint);

  std::unique_lock lk(CustomIRMutex);

  auto InsertedIterator = CustomIRHandlers.emplace(Entrypoint, CustomIRHandlerEntry {Handler, Creator, Data});
  HasCustomIRHandlers = true;

  if (!InsertedIterator.second) {
    const auto& [fn, Creator, Data] = InsertedIterator.first->second;
    return CustomIRResult(Creator, Data);
  }

  return std::nullopt;
}

void ContextImpl::AddThunkTrampolineIRHandler(uintptr_t Entrypoint, uintptr_t GuestThunkEntrypoint) {
  LOGMAN_THROW_A_FMT(Entrypoint, "Tried to link null pointer address to guest function");
  LOGMAN_THROW_A_FMT(GuestThunkEntrypoint, "Tried to link address to null pointer guest function");
  if (!Config.Is64BitMode) {
    LOGMAN_THROW_A_FMT((Entrypoint >> 32) == 0, "Tried to link 64-bit address in 32-bit mode");
    LOGMAN_THROW_A_FMT((GuestThunkEntrypoint >> 32) == 0, "Tried to link 64-bit address in 32-bit mode");
  }

  LogMan::Msg::DFmt("Thunks: Adding guest trampoline from address {:#x} to guest function {:#x}", Entrypoint, GuestThunkEntrypoint);

  auto Result = AddCustomIREntrypoint(
    Entrypoint,
    [this, GuestThunkEntrypoint](uintptr_t Entrypoint, FEXCore::IR::IREmitter* emit) {
      auto IRHeader = emit->_IRHeader(emit->Invalid(), Entrypoint, 0, 0, 0, 0);
      auto Block = emit->CreateCodeNode(true, 0);
      IRHeader.first->Blocks = emit->WrapNode(Block);
      emit->SetCurrentCodeBlock(Block);

      const auto GPRSize = this->Config.Is64BitMode ? IR::OpSize::i64Bit : IR::OpSize::i32Bit;

      // Thunk entry-points don't get cached, don't need to be padded.
      if (GPRSize == IR::OpSize::i64Bit) {
        IR::Ref R = emit->_StoreRegister(emit->Constant(Entrypoint), GPRSize);
        R->Reg = IR::PhysicalRegister(IR::RegClass::GPRFixed, X86State::REG_R11).Raw;
      } else {
        emit->_StoreContextFPR(GPRSize, emit->_VCastFromGPR(IR::OpSize::i64Bit, IR::OpSize::i64Bit, emit->Constant(Entrypoint)),
                               offsetof(Core::CPUState, mm[0][0]));
      }
      emit->_ExitFunction(IR::OpSize::i64Bit, emit->Constant(GuestThunkEntrypoint), IR::BranchHint::None, emit->Invalid(), emit->Invalid());
    },
    ThunkHandler, (void*)GuestThunkEntrypoint);

  if (Result.has_value()) {
    if (Result->Creator != ThunkHandler) {
      ERROR_AND_DIE_FMT("Input address for AddThunkTrampoline is already linked by another module");
    }
    if (Result->Data != (void*)GuestThunkEntrypoint) {
      // NOTE: This may happen in Vulkan thunks if the Vulkan driver resolves two different symbols
      //       to the same function (e.g. vkGetPhysicalDeviceFeatures2/vkGetPhysicalDeviceFeatures2KHR)
      LogMan::Msg::EFmt("Input address for AddThunkTrampoline is already linked elsewhere");
    }
  }
}

void ContextImpl::AddForceTSOInformation(const IntervalList<uint64_t>& ValidRanges, fextl::set<uint64_t>&& Instructions) {
  LogMan::Throw::AFmt(CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to be unique_locked here");
  ForceTSOValidRanges.Insert(ValidRanges);
  ForceTSOInstructions.merge(std::move(Instructions));
}

void ContextImpl::RemoveForceTSOInformation(uint64_t Address, uint64_t Size) {
  LogMan::Throw::AFmt(CodeInvalidationMutex.try_lock() == false, "CodeInvalidationMutex needs to be unique_locked here");

  ForceTSOValidRanges.Remove({Address, Address + Size});
  ForceTSOInstructions.erase(ForceTSOInstructions.lower_bound(Address), ForceTSOInstructions.upper_bound(Address + Size));
}

void ContextImpl::MarkMonoBackpatcherBlock(uint64_t BlockEntry) {
  MonoBackpatcherBlock.store(BlockEntry, std::memory_order_relaxed);
}

void ContextImpl::RemoveCustomIREntrypoint(FEXCore::Core::InternalThreadState* Thread, uintptr_t Entrypoint) {
  LOGMAN_THROW_A_FMT(Config.Is64BitMode || !(Entrypoint >> 32), "64-bit Entrypoint in 32-bit mode {:x}", Entrypoint);

  std::scoped_lock lk(CustomIRMutex);

  CustomIRHandlers.erase(Entrypoint);
  HasCustomIRHandlers = !CustomIRHandlers.empty();
  SyscallHandler->InvalidateGuestCodeRange(Thread, Entrypoint, 1);
}

void ContextImpl::MonoBackpatcherWrite(FEXCore::Core::CpuStateFrame* Frame, uint8_t Size, uint64_t Address, uint64_t Value) {
  auto Thread = Frame->Thread;
  auto CTX = static_cast<ContextImpl*>(Thread->CTX);
  {
    auto lk = GuardSignalDeferringSection(CTX->CodeInvalidationMutex, Thread);

    if (Size == 8) {
      *reinterpret_cast<uint64_t*>(Address) = Value;
    } else if (Size == 4) {
      *reinterpret_cast<uint32_t*>(Address) = Value;
    } else {
      ERROR_AND_DIE_FMT("Unexpected write size for backpatcher: {}", Size);
    }
  }

  CTX->SyscallHandler->InvalidateGuestCodeRange(Thread, Address, Size);
}

void ContextImpl::ConfigureAOTGen(FEXCore::Core::InternalThreadState* Thread, fextl::set<uint64_t>* ExternalBranches, uint64_t SectionMaxAddress) {
  Thread->FrontendDecoder->SetExternalBranches(ExternalBranches);
}
} // namespace FEXCore::Context


================================================
FILE: FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.cpp
================================================
// SPDX-License-Identifier: MIT

#include "Common/VectorRegType.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/LookupCache.h"
#include "Utils/MemberFunctionToPointer.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Event.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <CodeEmitter/Emitter.h>

#ifdef VIXL_SIMULATOR
#include <aarch64/simulator-aarch64.h>
#endif

#include <array>
#include <bit>
#include <cstring>

namespace FEXCore::CPU {

static void SleepThread(FEXCore::Context::ContextImpl* CTX, FEXCore::Core::CpuStateFrame* Frame) {
  CTX->SyscallHandler->SleepThread(CTX, Frame);
}

constexpr size_t MAX_DISPATCHER_CODE_SIZE = FEXCore::Utils::FEX_PAGE_SIZE * 4;

Dispatcher::Dispatcher(FEXCore::Context::ContextImpl* ctx)
  : Arm64Emitter(ctx, FEXCore::Allocator::VirtualAlloc(MAX_DISPATCHER_CODE_SIZE, true), MAX_DISPATCHER_CODE_SIZE)
  , CTX {ctx} {
  EmitDispatcher();

  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(GetBufferBase()), MAX_DISPATCHER_CODE_SIZE);
}

Dispatcher::~Dispatcher() {
  auto BufferSize = GetBufferSize();
  if (BufferSize) {
    FEXCore::Allocator::VirtualFree(GetBufferBase(), BufferSize);
  }
}

void Dispatcher::EmitDispatcher() {
  // Don't modify TMP3 since it contains our RIP once the block doesn't exist
  auto RipReg = TMP3;
#ifdef VIXL_DISASSEMBLER
  const auto DisasmBegin = GetCursorAddress<const vixl::aarch64::Instruction*>();
#endif

  DispatchPtr = GetCursorAddress<AsmDispatch>();

  // while (true) {
  //    Ptr = FindBlock(RIP)
  //    if (!Ptr)
  //      Ptr = CTX->CompileBlock(RIP);
  //
  //    Ptr();
  // }

  ARMEmitter::ForwardLabel l_CTX;
  ARMEmitter::ForwardLabel l_Sleep;
  ARMEmitter::ForwardLabel l_CompileBlock;
  ARMEmitter::ForwardLabel l_CompileSingleStep;

  // Push all the register we need to save
  PushCalleeSavedRegisters();

  // Push our memory base to the correct register
  // Move our thread pointer to the correct register
  // This is passed in to parameter 0 (x0)
  mov(STATE, ARMEmitter::XReg::x0);

  // Save this stack pointer so we can cleanly shutdown the emulation with a long jump
  // regardless of where we were in the stack
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::rsp, 0);
  str(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));

  ARMEmitter::ForwardLabel CompileSingleStep;
  AbsoluteLoopTopAddressFillSRA = GetCursorAddress<uint64_t>();

  FillStaticRegs();
  ldr(RipReg, STATE_PTR(CpuStateFrame, State.rip));
  (void)cbnz(ARMEmitter::Size::i32Bit, ENTRY_FILL_SRA_SINGLE_INST_REG, &CompileSingleStep);

  ARMEmitter::BiDirectionalLabel LoopTop {};

#ifdef ARCHITECTURE_arm64ec
  (void)b(&LoopTop);

  AbsoluteLoopTopAddressEnterECFillSRA = GetCursorAddress<uint64_t>();
  ldr(STATE, EC_ENTRY_CPUAREA_REG, CPU_AREA_EMULATOR_DATA_OFFSET);
  FillStaticRegs();

  ldr(RipReg, STATE_PTR(CpuStateFrame, State.rip));
  // Force a single instruction block if ENTRY_FILL_SRA_SINGLE_INST_REG is nonzero entering the JIT, used for inline SMC handling.
  (void)cbnz(ARMEmitter::Size::i32Bit, ENTRY_FILL_SRA_SINGLE_INST_REG, &CompileSingleStep);

  // Enter JIT
  (void)b(&LoopTop);

  AbsoluteLoopTopAddressEnterEC = GetCursorAddress<uint64_t>();
  // Load ThreadState and write the target PC there
  ldr(STATE, EC_ENTRY_CPUAREA_REG, CPU_AREA_EMULATOR_DATA_OFFSET);
  str(EC_CALL_CHECKER_PC_REG, STATE_PTR(CpuStateFrame, State.rip));

  // Swap stacks to the emulator stack
  ldr(TMP1, EC_ENTRY_CPUAREA_REG, CPU_AREA_EMULATOR_STACK_BASE_OFFSET);
  add(ARMEmitter::Size::i64Bit, StaticRegisters[X86State::REG_RSP], ARMEmitter::Reg::rsp, 0);
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, TMP1, 0);

  ldr(REG_CALLRET_SP, STATE_PTR(CpuStateFrame, State.callret_sp));

  FillSpecialRegs(TMP1, TMP2, false, true);

  // As ARM64EC uses this as an entrypoint for both guest calls and host returns, opportunistically try to return
  // using the call-ret stack to avoid unbalancing it.
  ldp<ARMEmitter::IndexType::OFFSET>(TMP1, TMP2, REG_CALLRET_SP);
  // EC_CALL_CHECKER_PC_REG is REG_PF which isn't touched by any of the above
  sub(ARMEmitter::Size::i64Bit, TMP1, EC_CALL_CHECKER_PC_REG, TMP1);
  (void)cbnz(ARMEmitter::Size::i64Bit, TMP1, &LoopTop);

  // If the entry at the TOS is for the target address, pop it and return to the JIT code
  add(ARMEmitter::Size::i64Bit, REG_CALLRET_SP, REG_CALLRET_SP, 0x10);
  ret(TMP2);

  // Enter JIT
#endif

  // We want to ensure that we are 16 byte aligned at the top of this loop
  Align16B();

  (void)Bind(&LoopTop);
  AbsoluteLoopTopAddress = GetCursorAddress<uint64_t>();

  // Load in our RIP
  ldr(RipReg, STATE_PTR(CpuStateFrame, State.rip));

#ifdef ARCHITECTURE_arm64ec
  // Clobbers TMP1/2
  // Check the EC code bitmap incase we need to exit the JIT to call into native code.
  ARMEmitter::ForwardLabel l_NotECCode;
  ldr(TMP1, ARMEmitter::XReg::x18, TEB_PEB_OFFSET);
  ldr(TMP1, TMP1, PEB_EC_CODE_BITMAP_OFFSET);

  lsr(ARMEmitter::Size::i64Bit, TMP2, RipReg, 15);
  and_(ARMEmitter::Size::i64Bit, TMP2, TMP2, 0x1fffffffffff8);
  ldr(TMP1, TMP1, TMP2, ARMEmitter::ExtendedType::LSL_64, 0);
  lsr(ARMEmitter::Size::i64Bit, TMP2, RipReg, 12);
  lsrv(ARMEmitter::Size::i64Bit, TMP1, TMP1, TMP2);
  (void)tbz(TMP1, 0, &l_NotECCode);

  str(REG_CALLRET_SP, STATE_PTR(CpuStateFrame, State.callret_sp));

  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, StaticRegisters[X86State::REG_RSP], 0);
  mov(EC_CALL_CHECKER_PC_REG, RipReg);
  ldr(TMP2, STATE_PTR(CpuStateFrame, Pointers.ExitFunctionEC));
  br(TMP2);

  (void)Bind(&l_NotECCode);
#endif

  ldrb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));
  (void)cbnz(ARMEmitter::Size::i32Bit, TMP1, &CompileSingleStep);

  ARMEmitter::ForwardLabel NoBlock;

  if (DisableL2Cache()) {
    (void)b(&NoBlock);
  } else {
    // This is the block cache lookup routine
    // It matches what is going on it LookupCache.h::FindBlock
    ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.L2Pointer));

    // Mask the address by the virtual address size so we can check for aliases
    uint64_t VirtualMemorySize = CTX->Config.VirtualMemSize;
    if (std::popcount(VirtualMemorySize) == 1) {
      and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), VirtualMemorySize - 1);
    } else {
      LoadConstant(ARMEmitter::Size::i64Bit, TMP4, VirtualMemorySize);
      and_(ARMEmitter::Size::i64Bit, TMP4, RipReg.R(), TMP4);
    }

    {
      // Offset the address and add to our page pointer
      lsr(ARMEmitter::Size::i64Bit, TMP2, TMP4, 12);

      // Load the pointer from the offset
      ldr(TMP1, TMP1, TMP2, ARMEmitter::ExtendedType::LSL_64, 3);

      // If page pointer is zero then we have no block
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &NoBlock);

      // Steal the page offset
      and_(ARMEmitter::Size::i64Bit, TMP2, TMP4, 0x0FFF);

      // Shift the offset by the size of the block cache entry
      add(TMP1, TMP1, TMP2, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(sizeof(LookupCache::LookupCacheEntry)));

      // The the full LookupCacheEntry with a single LDP.
      // Check the guest address first to ensure it maps to the address we are currently at.
      // This fixes aliasing problems
      ldp<ARMEmitter::IndexType::OFFSET>(TMP4, TMP2, TMP1, 0);

      // If the guest address doesn't match, Compile the block.
      sub(TMP2, TMP2, RipReg);
      (void)cbnz(ARMEmitter::Size::i64Bit, TMP2, &NoBlock);

      // Check the host address to see if it matches, else compile the block.
      (void)cbz(ARMEmitter::Size::i64Bit, TMP4, &NoBlock);

      // If we've made it here then we have a real compiled block
      {
        // update L1 cache
        ldp<ARMEmitter::IndexType::OFFSET>(TMP1, TMP2, STATE, offsetof(FEXCore::Core::CpuStateFrame, State.L1Pointer));

        // Calculate (tmp1 + ((ripreg & L1_ENTRIES_MASK) << 4)) for the address
        // L1Mask is pre-shifted.
        and_(ARMEmitter::Size::i64Bit, TMP2, TMP2, RipReg.R(), ARMEmitter::ShiftType::LSL, FEXCore::ilog2(sizeof(LookupCache::LookupCacheEntry)));
        add(TMP1, TMP1, TMP2);

        stp<ARMEmitter::IndexType::OFFSET>(TMP4, RipReg, TMP1);

        // Jump to the block
        br(TMP4);
      }
    }
  }

  {
    ThreadStopHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
    SpillStaticRegs(TMP1);

    ThreadStopHandlerAddress = GetCursorAddress<uint64_t>();

    PopCalleeSavedRegisters();

    // Return from the function
    // LR is set to the correct return location now
    ret();
  }

  // Clobbers TMP1/2
  auto EmitSignalGuardedRegion = [&](auto Body) {
#ifndef _WIN32
    ldr(TMP2, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
    add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 1);
    str(TMP2, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
#endif

#ifdef ARCHITECTURE_arm64ec
    ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET);
    LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1);
    strb(TMP1.W(), TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET);
#endif

    Body();

#ifdef ARCHITECTURE_arm64ec
    ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET);
    strb(ARMEmitter::WReg::zr, TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET);
#endif

#ifndef _WIN32
    ldr(TMP2, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
    sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, 1);
    str(TMP2, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));

    // Trigger segfault if any deferred signals are pending
    strb(ARMEmitter::XReg::zr, STATE,
         offsetof(FEXCore::Core::InternalThreadState, InterruptFaultPage) - offsetof(FEXCore::Core::InternalThreadState, BaseFrameState));
#endif
  };

  {
    ExitFunctionLinkerAddress = GetCursorAddress<uint64_t>();
    EmitSignalGuardedRegion([&]() {
      SpillStaticRegs(TMP1);

      mov(ARMEmitter::XReg::x0, STATE);
      mov(ARMEmitter::XReg::x1, ARMEmitter::XReg::lr);

      ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.ExitFunctionLink));
      if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
        GenerateIndirectRuntimeCall<uintptr_t, void*, void*>(ARMEmitter::Reg::r2);
      } else {
        blr(ARMEmitter::Reg::r2);
      }

      if (!TMP_ABIARGS) {
        mov(TMP1, ARMEmitter::XReg::x0);
      }

      FillStaticRegs();
    });

    br(TMP1);
  }

  // Need to create the block
  {
    (void)Bind(&NoBlock);

    EmitSignalGuardedRegion([&]() {
      SpillStaticRegs(TMP1);

      if (!TMP_ABIARGS) {
        mov(ARMEmitter::XReg::x2, RipReg);
      }

      ldr(ARMEmitter::XReg::x0, &l_CTX);
      mov(ARMEmitter::XReg::x1, STATE);
      // x2 contains guest RIP
      mov(ARMEmitter::XReg::x3, 0);
      ldr(ARMEmitter::XReg::x4, &l_CompileBlock);

      if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
        GenerateIndirectRuntimeCall<uintptr_t, void*, void*, uint64_t, uint64_t>(ARMEmitter::Reg::r4);
      } else {
        blr(ARMEmitter::Reg::r4); // { CTX, Frame, RIP, MaxInst }
      }

      // Result is now in x0
      if (!TMP_ABIARGS) {
        mov(TMP1, ARMEmitter::XReg::x0);
      }

      FillStaticRegs();
    });

    // Jump to the compiled block
    br(TMP1);
  }

  {
    (void)Bind(&CompileSingleStep);

    EmitSignalGuardedRegion([&]() {
      SpillStaticRegs(TMP1);

      if (!TMP_ABIARGS) {
        mov(ARMEmitter::XReg::x2, RipReg);
      }

      ldr(ARMEmitter::XReg::x0, &l_CTX);
      mov(ARMEmitter::XReg::x1, STATE);
      // x2 contains guest RIP
      ldr(ARMEmitter::XReg::x4, &l_CompileSingleStep);

      if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
        GenerateIndirectRuntimeCall<uintptr_t, void*, void*, uint64_t, uint64_t>(ARMEmitter::Reg::r4);
      } else {
        blr(ARMEmitter::Reg::r4); // { CTX, Frame, RIP }
      }

      // Result is now in x0
      if (!TMP_ABIARGS) {
        mov(TMP1, ARMEmitter::XReg::x0);
      }

      FillStaticRegs();
    });

    // Jump to the compiled block
    br(TMP1);
  }

  {
    SignalHandlerReturnAddress = GetCursorAddress<uint64_t>();

    // Now to get back to our old location we need to do a fault dance
    // We can't use SIGTRAP here since gdb catches it and never gives it to the application!
    hlt(0);
  }

  {
    SignalHandlerReturnAddressRT = GetCursorAddress<uint64_t>();

    // Now to get back to our old location we need to do a fault dance
    // We can't use SIGTRAP here since gdb catches it and never gives it to the application!
    hlt(0);
  }

  {
    // Guest SIGILL handler
    // Needs to be distinct from the SignalHandlerReturnAddress
    GuestSignal_SIGILL = GetCursorAddress<uint64_t>();

    SpillStaticRegs(TMP1);

    hlt(0);
  }

  {
    // Guest SIGTRAP handler
    // Needs to be distinct from the SignalHandlerReturnAddress
    GuestSignal_SIGTRAP = GetCursorAddress<uint64_t>();

    SpillStaticRegs(TMP1);

    brk(0);
  }

  {
    // Guest Overflow handler
    // Needs to be distinct from the SignalHandlerReturnAddress
    GuestSignal_SIGSEGV = GetCursorAddress<uint64_t>();

    SpillStaticRegs(TMP1);

    // hlt/udf = SIGILL
    // brk = SIGTRAP
    // ??? = SIGSEGV
    // Force a SIGSEGV by loading zero
    if (CTX->ExitOnHLTEnabled()) {
      ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));
      add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::r0, 0);
      PopCalleeSavedRegisters();
      ret();
    } else {
      LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0);
      ldr(ARMEmitter::XReg::x1, ARMEmitter::Reg::r1);
    }
  }

  {
    ThreadPauseHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
    SpillStaticRegs(TMP1);

    ThreadPauseHandlerAddress = GetCursorAddress<uint64_t>();
    // We are pausing, this means the frontend should be waiting for this thread to idle
    // We will have faulted and jumped to this location at this point

    // Call our sleep handler
    ldr(ARMEmitter::XReg::x0, &l_CTX);
    mov(ARMEmitter::XReg::x1, STATE);
    ldr(ARMEmitter::XReg::x2, &l_Sleep);
    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<void, void*, void*>(ARMEmitter::Reg::r2);
    } else {
      blr(ARMEmitter::Reg::r2);
    }

    PauseReturnInstruction = GetCursorAddress<uint64_t>();
    // Fault to start running again
    hlt(0);
  }

  {
    // The expectation here is that a thunked function needs to call back in to the JIT in a reentrant safe way
    // To do this safely we need to do some state tracking and register saving
    //
    // eg:
    // JIT Call->
    //  Thunk->
    //    Thunk callback->
    //
    // The thunk callback needs to execute JIT code and when it returns, it needs to safely return to the thunk rather than JIT space
    // This is handled by pushing a return address trampoline to the stack so when the guest address returns it hits our custom thunk return
    //  - This will safely return us to the thunk
    //
    // On return to the thunk, the thunk can get whatever its return value is from the thread context depending on ABI handling on its end
    // When the thunk itself returns, it'll do its regular return logic there
    // void ReentrantCallback(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP);
    CallbackPtr = GetCursorAddress<JITCallback>();

    // We expect the thunk to have previously pushed the registers it was using
    PushCalleeSavedRegisters();

    // First thing we need to move the thread state pointer back in to our register
    mov(STATE, ARMEmitter::XReg::x0);

    // Make sure to adjust the refcounter so we don't clear the cache now
    ldr(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));
    add(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 1);
    str(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));

    // Now push the callback return trampoline to the guest stack
    // Guest will be misaligned because calling a thunk won't correct the guest's stack once we call the callback from the host
    ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.ThunkCallbackRet));

    ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));
    sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, CTX->Config.Is64BitMode ? 16 : 12);
    str(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));

    // Store the trampoline to the guest stack
    // Guest stack is now correctly misaligned after a regular call instruction
    str(ARMEmitter::XReg::x0, ARMEmitter::Reg::r2, 0);

    // Store RIP to the context state
    str(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, State.rip));

    // load static regs
    FillStaticRegs();
    stp<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::zr, ARMEmitter::XReg::zr, REG_CALLRET_SP, -0x10);

    // Now go back to the regular dispatcher loop
    (void)b(&LoopTop);
  }

  auto EmitLongALUOpHandler = [&](auto R, auto Offset) {
    auto Address = GetCursorAddress<uint64_t>();

    PushDynamicRegs(TMP4);
    SpillStaticRegs(TMP4);

    if (!TMP_ABIARGS) {
      mov(ARMEmitter::XReg::x0, TMP1);
      mov(ARMEmitter::XReg::x1, TMP2);
      mov(ARMEmitter::XReg::x2, TMP3);
    }

    ldr(ARMEmitter::XReg::x3, R, Offset);
    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<__uint128_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
    } else {
      blr(ARMEmitter::Reg::r3);
    }

    // Result is now in x0, x1
    if (!TMP_ABIARGS) {
      mov(TMP1, ARMEmitter::XReg::x0);
      mov(TMP2, ARMEmitter::XReg::x1);
    }

    FillStaticRegs();

    // Fix the stack and any values that were stepped on
    PopDynamicRegs();

    // Go back to our code block
    ret();
    return Address;
  };

  LUDIVHandlerAddress = EmitLongALUOpHandler(STATE_PTR(CpuStateFrame, Pointers.LUDIV));
  LDIVHandlerAddress = EmitLongALUOpHandler(STATE_PTR(CpuStateFrame, Pointers.LDIV));

  EmitF64Sin();
  EmitF64Cos();
  EmitF64Tan();

  // Interpreter fallbacks
  {
    constexpr static std::array<FallbackABI, FABI_UNKNOWN> ABIS {{
      FABI_F80_I16_F32_PTR,
      FABI_F80_I16_F64_PTR,
      FABI_F80_I16_I16_PTR,
      FABI_F80_I16_I32_PTR,
      FABI_F32_I16_F80_PTR,
      FABI_F64_I16_F80_PTR,
      FABI_F64_F64_PTR,
      FABI_F64_F64_F64_PTR,
      FABI_I16_I16_F80_PTR,
      FABI_I32_I16_F80_PTR,
      FABI_I64_I16_F80_PTR,
      FABI_I64_I16_F80_F80_PTR,
      FABI_F80_I16_F80_PTR,
      FABI_F80_I16_F80_F80_PTR,
      FABI_F80x2_I16_F80_PTR,
      FABI_F64x2_F64_PTR,
      FABI_I32_I64_I64_V128_V128_I16,
      FABI_I32_V128_V128_I16,
    }};

    for (auto ABI : ABIS) {
      ABIPointers[ABI] = GenerateABICall(ABI);
    }
  }

  (void)Bind(&l_CTX);
  dc64(reinterpret_cast<uintptr_t>(CTX));
  (void)Bind(&l_Sleep);
  dc64(reinterpret_cast<uint64_t>(SleepThread));
  (void)Bind(&l_CompileBlock);
  FEXCore::Utils::MemberFunctionToPointerCast PMFCompileBlock(&FEXCore::Context::ContextImpl::CompileBlock);
  dc64(PMFCompileBlock.GetConvertedPointer());
  (void)Bind(&l_CompileSingleStep);

  FEXCore::Utils::MemberFunctionToPointerCast PMFCompileSingleStep(&FEXCore::Context::ContextImpl::CompileSingleStep);
  dc64(PMFCompileSingleStep.GetConvertedPointer());

  Start = reinterpret_cast<uint64_t>(DispatchPtr);
  End = GetCursorAddress<uint64_t>();
  ClearICache(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));

  if (CTX->Config.BlockJITNaming()) {
    fextl::string Name = fextl::fmt::format("Dispatch_{}", FHU::Syscalls::gettid());
    CTX->Symbols.RegisterNamedRegion(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr), Name);
  }
  if (CTX->Config.GlobalJITNaming()) {
    CTX->Symbols.RegisterJITSpace(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));
  }

#ifdef VIXL_DISASSEMBLER
  if (Disassemble() & FEXCore::Config::Disassemble::DISPATCHER) {
    const auto DisasmEnd = GetCursorAddress<const vixl::aarch64::Instruction*>();
    for (auto PCToDecode = DisasmBegin; PCToDecode < DisasmEnd; PCToDecode += 4) {
      DisasmDecoder->Decode(PCToDecode);
      auto Output = Disasm->GetOutput();
      LogMan::Msg::IFmt("{}", Output);
    }
  }
#endif
}

#ifdef VIXL_SIMULATOR
void Dispatcher::ExecuteDispatch(FEXCore::Core::CpuStateFrame* Frame) {
  Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
  Simulator.WriteXRegister(1, 0);
  Simulator.RunFrom(reinterpret_cast< const vixl::aarch64::Instruction*>(DispatchPtr));
}

void Dispatcher::ExecuteJITCallback(FEXCore::Core::CpuStateFrame* Frame, uint64_t RIP) {
  Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
  Simulator.WriteXRegister(1, RIP);
  Simulator.RunFrom(reinterpret_cast< const vixl::aarch64::Instruction*>(CallbackPtr));
}

#endif

void Dispatcher::EmitI32ToExtF80() {
  ARMEmitter::ForwardLabel ZeroCase;
  ARMEmitter::ForwardLabel Done;

  (void)cbz(ARMEmitter::Size::i32Bit, TMP2, &ZeroCase);

  lsr(ARMEmitter::Size::i32Bit, TMP4, TMP2, 31);
  tst(ARMEmitter::Size::i32Bit, TMP2, TMP2);
  neg(ARMEmitter::Size::i32Bit, TMP3, TMP2);
  csel(ARMEmitter::Size::i32Bit, TMP3, TMP3, TMP2, ARMEmitter::Condition::CC_MI);

  clz(ARMEmitter::Size::i32Bit, TMP1, TMP3);

  mov(ARMEmitter::Size::i32Bit, TMP2, 0x401E);
  sub(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP1);
  orr(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP4, ARMEmitter::ShiftType::LSL, 15);

  lslv(ARMEmitter::Size::i32Bit, TMP3, TMP3, TMP1);

  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 32);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);

  (void)b(&Done);

  (void)Bind(&ZeroCase);
  movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);

  (void)Bind(&Done);
}

void Dispatcher::EmitI16ToExtF80() {
  sxth(ARMEmitter::Size::i32Bit, TMP2, TMP2);

  ARMEmitter::ForwardLabel ZeroCase;
  ARMEmitter::ForwardLabel Done;

  (void)cbz(ARMEmitter::Size::i32Bit, TMP2, &ZeroCase);

  lsr(ARMEmitter::Size::i32Bit, TMP4, TMP2, 31);
  tst(ARMEmitter::Size::i32Bit, TMP2, TMP2);
  neg(ARMEmitter::Size::i32Bit, TMP3, TMP2);
  csel(ARMEmitter::Size::i32Bit, TMP3, TMP3, TMP2, ARMEmitter::Condition::CC_MI);

  clz(ARMEmitter::Size::i32Bit, TMP1, TMP3);

  mov(ARMEmitter::Size::i32Bit, TMP2, 0x401E);
  sub(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP1);
  orr(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP4, ARMEmitter::ShiftType::LSL, 15);

  lslv(ARMEmitter::Size::i32Bit, TMP3, TMP3, TMP1);

  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 32);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);

  (void)b(&Done);

  (void)Bind(&ZeroCase);
  movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);

  (void)Bind(&Done);
}

void Dispatcher::EmitF32ToExtF80() {
  ARMEmitter::ForwardLabel InfNaN;
  ARMEmitter::ForwardLabel ZeroDenormal;
  ARMEmitter::ForwardLabel Denormal;
  ARMEmitter::ForwardLabel NaN;
  ARMEmitter::ForwardLabel Done;
  ARMEmitter::BiDirectionalLabel NormalPath;
  ARMEmitter::ForwardLabel ZeroResult;

  fmov(ARMEmitter::Size::i32Bit, TMP1, VTMP1.S());

  ubfx(ARMEmitter::Size::i32Bit, TMP2, TMP1, 23, 8);
  and_(ARMEmitter::Size::i32Bit, TMP3, TMP1, 0x007FFFFF);
  lsr(ARMEmitter::Size::i32Bit, TMP4, TMP1, 31);

  cmp(ARMEmitter::Size::i32Bit, TMP2, 0xFF);
  (void)b(ARMEmitter::Condition::CC_EQ, &InfNaN);

  (void)cbz(ARMEmitter::Size::i32Bit, TMP2, &ZeroDenormal);

  (void)Bind(&NormalPath);
  // Exponent bias adjustment, where bias is 0x3F80
  LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 0x3F80);
  add(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP1);
  orr(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP4, ARMEmitter::ShiftType::LSL, 15);

  // Set implicit bit and shift fraction to extF80 position
  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, 0x00800000ULL);
  orr(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);
  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 40);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&ZeroDenormal);
  (void)cbz(ARMEmitter::Size::i32Bit, TMP3, &ZeroResult);

  (void)Bind(&Denormal);
  clz(ARMEmitter::Size::i32Bit, TMP1, TMP3);
  sub(ARMEmitter::Size::i32Bit, TMP1, TMP1, 8);
  mov(ARMEmitter::Size::i32Bit, TMP2, 1);
  sub(ARMEmitter::Size::i32Bit, TMP2, TMP2, TMP1);
  lslv(ARMEmitter::Size::i32Bit, TMP3, TMP3, TMP1);
  (void)b(&NormalPath);

  (void)Bind(&ZeroResult);
  lsl(ARMEmitter::Size::i32Bit, TMP2, TMP4, 15);
  movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&InfNaN);
  (void)cbnz(ARMEmitter::Size::i32Bit, TMP3, &NaN);

  lsl(ARMEmitter::Size::i32Bit, TMP2, TMP4, 15);
  orr(ARMEmitter::Size::i32Bit, TMP2, TMP2, 0x7FFF);

  LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 0x8000000000000000ULL);
  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&NaN);
  lsl(ARMEmitter::Size::i32Bit, TMP2, TMP4, 15);
  orr(ARMEmitter::Size::i32Bit, TMP2, TMP2, 0x7FFF);

  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 40);
  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, 0xC000000000000000ULL);
  orr(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);

  (void)Bind(&Done);
}

void Dispatcher::EmitF64ToExtF80() {
  ARMEmitter::ForwardLabel InfNaN;
  ARMEmitter::ForwardLabel ZeroDenormal;
  ARMEmitter::ForwardLabel Denormal;
  ARMEmitter::ForwardLabel NaN;
  ARMEmitter::ForwardLabel Done;
  ARMEmitter::BiDirectionalLabel NormalPath;
  ARMEmitter::ForwardLabel ZeroResult;

  fmov(ARMEmitter::Size::i64Bit, TMP1, VTMP1.D());

  lsr(ARMEmitter::Size::i64Bit, TMP4, TMP1, 63);
  ubfx(ARMEmitter::Size::i64Bit, TMP2, TMP1, 52, 11);
  LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 0x000FFFFFFFFFFFFFULL);
  and_(ARMEmitter::Size::i64Bit, TMP3, TMP1, TMP3);

  cmp(ARMEmitter::Size::i64Bit, TMP2, 0x7FF);
  (void)b(ARMEmitter::Condition::CC_EQ, &InfNaN);

  (void)cbz(ARMEmitter::Size::i64Bit, TMP2, &ZeroDenormal);

  (void)Bind(&NormalPath);
  // Exponent bias adjustment where bias difference is 0x3C00
  add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 0x3000);
  add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 0xC00);
  orr(ARMEmitter::Size::i64Bit, TMP2, TMP2, TMP4, ARMEmitter::ShiftType::LSL, 15);

  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, 0x0010000000000000ULL);
  orr(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);
  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 11);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&ZeroDenormal);
  (void)cbz(ARMEmitter::Size::i64Bit, TMP3, &ZeroResult);

  (void)Bind(&Denormal);
  clz(ARMEmitter::Size::i64Bit, TMP1, TMP3);
  sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 11);
  mov(ARMEmitter::Size::i64Bit, TMP2, 1);
  sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, TMP1);
  lslv(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);
  (void)b(&NormalPath);

  (void)Bind(&ZeroResult);
  lsl(ARMEmitter::Size::i64Bit, TMP2, TMP4, 15);
  movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&InfNaN);
  (void)cbnz(ARMEmitter::Size::i64Bit, TMP3, &NaN);

  lsl(ARMEmitter::Size::i64Bit, TMP2, TMP4, 15);
  orr(ARMEmitter::Size::i64Bit, TMP2, TMP2, 0x7FFF);

  LoadConstant(ARMEmitter::Size::i64Bit, TMP3, 0x8000000000000000ULL);
  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);
  (void)b(&Done);

  (void)Bind(&NaN);
  lsl(ARMEmitter::Size::i64Bit, TMP2, TMP4, 15);
  orr(ARMEmitter::Size::i64Bit, TMP2, TMP2, 0x7FFF);

  lsl(ARMEmitter::Size::i64Bit, TMP3, TMP3, 11);
  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, 0xC000000000000000ULL);
  orr(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);

  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP3);
  ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 4, TMP2);

  (void)Bind(&Done);
}

void Dispatcher::EmitF64Sin() {
  F64SinHandlerAddress = GetCursorAddress<uint64_t>();

  constexpr auto V2 = ARMEmitter::VReg::v2;
  constexpr auto V3 = ARMEmitter::VReg::v3;
  constexpr auto V4 = ARMEmitter::VReg::v4;
  constexpr auto V5 = ARMEmitter::VReg::v5;

  ARMEmitter::ForwardLabel Fallback, NonZero;
  ARMEmitter::ForwardLabel InvPiPi1Label, Pi23Label;
  ARMEmitter::ForwardLabel C0Label, C1Label, C2Label, C3Label, C4Label, C5Label, C6Label;
  ARMEmitter::ForwardLabel RangeLabel;

  // sin(+/-0) = +/-0
  fmov(ARMEmitter::Size::i64Bit, TMP1, VTMP1.D());
  lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1);
  (void)cbnz(ARMEmitter::Size::i64Bit, TMP1, &NonZero);
  ret();
  (void)Bind(&NonZero);

  // Save q2-q5.
  stp<ARMEmitter::IndexType::PRE>(ARMEmitter::QReg::q2, ARMEmitter::QReg::q3, ARMEmitter::Reg::rsp, -64);
  stp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::QReg::q4, ARMEmitter::QReg::q5, ARMEmitter::Reg::rsp, 32);

  // save nzcv
  mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
  str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));

  // Range check: fall back for |x| >= 2^23, NaN, and inf.
  fabs(VTMP2.D(), VTMP1.D());
  ldr(V2.D(), &RangeLabel);
  fcmp(VTMP2.D(), V2.D());
  (void)b(ARMEmitter::Condition::CC_HS, &Fallback);

  // n = rint(x/pi).
  ldr(V2.Q(), &InvPiPi1Label); // q2 = {inv_pi, pi_1}
  fmul(VTMP2.D(), VTMP1.D(), V2.D());
  frinta(VTMP2.D(), VTMP2.D());

  // odd = (int(n) & 1) << 63.
  fcvtzs(ARMEmitter::Size::i64Bit, TMP1, VTMP2.D());
  lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, 63);

  // r = x - n*pi (range reduction) via .2D lane-indexed FMLS.
  ldr(V3.Q(), &Pi23Label);                                            // q3 = {pi_2, pi_3}
  fmov(V4.D(), VTMP1.D());                                            // r = x
  fmls(ARMEmitter::SubRegSize::i64Bit, V4.Q(), VTMP2.Q(), V2.Q(), 1); // r -= n * pi_1
  fmls(ARMEmitter::SubRegSize::i64Bit, V4.Q(), VTMP2.Q(), V3.Q(), 0); // r -= n * pi_2
  fmls(ARMEmitter::SubRegSize::i64Bit, V4.Q(), VTMP2.Q(), V3.Q(), 1); // r -= n * pi_3

  // r^2, r^4.
  fmul(V5.D(), V4.D(), V4.D());
  fmov(ARMEmitter::Size::i64Bit, TMP2, V4.D());
  fmul(V3.D(), V5.D(), V5.D());

  // Estrin polynomial: p = c0 + r2*c1 + r4*(c2 + r2*c3) + r8*(c4 + r2*c5 + r4*c6).
  // Level 1 (independent FMAs).
  ldr(VTMP1.D(), &C0Label);
  ldr(VTMP2.D(), &C1Label);
  fmadd(VTMP1.D(), V5.D(), VTMP2.D(), VTMP1.D()); // p01 = c0 + r2*c1

  ldr(VTMP2.D(), &C2Label);
  ldr(V2.D(), &C3Label);
  fmadd(VTMP2.D(), V5.D(), V2.D(), VTMP2.D()); // p23 = c2 + r2*c3

  ldr(V2.D(), &C4Label);
  ldr(V4.D(), &C5Label);
  fmadd(V2.D(), V5.D(), V4.D(), V2.D()); // p45 = c4 + r2*c5

  // Level 2 (serial).
  ldr(V4.D(), &C6Label);
  fmadd(V2.D(), V3.D(), V4.D(), V2.D());          // p46 = p45 + r4*c6
  fmadd(VTMP2.D(), V3.D(), V2.D(), VTMP2.D());    // p26 = p23 + r4*p46
  fmadd(VTMP1.D(), V3.D(), VTMP2.D(), VTMP1.D()); // p06 = p01 + r4*p26

  // y = r + r^3 * p06.
  fmov(ARMEmitter::Size::i64Bit, V4.D(), TMP2);
  fmul(V5.D(), V5.D(), V4.D());
  fmadd(VTMP1.D(), V5.D(), VTMP1.D(), V4.D());

  // result = y XOR odd.
  fmov(ARMEmitter::Size::i64Bit, TMP2, VTMP1.D());
  eor(ARMEmitter::Size::i64Bit, TMP2, TMP2, TMP1);
  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP2);

  // restore nzcv
  ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
  msr(ARMEmitter::SystemRegister::NZCV, TMP1);

  // Restore q2-q5 and return.
  ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::QReg::q4, ARMEmitter::QReg::q5, ARMEmitter::Reg::rsp, 32);
  ldp<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::QReg::q3, ARMEmitter::Reg::rsp, 64);
  ret();

  // Fallback path.
  (void)Bind(&Fallback);
  ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::QReg::q4, ARMEmitter::QReg::q5, ARMEmitter::Reg::rsp, 32);
  ldp<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::QReg::q3, ARMEmitter::Reg::rsp, 64);
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64SIN].ABIHandler));
  ldr(TMP4, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64SIN].Func));
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  ret();

  // Constant pool.
  Align(16);
  (void)Bind(&InvPiPi1Label);
  dc64(0x3FD4'5F30'6DC9'C883ULL); // inv_pi
  dc64(0x4009'21FB'5444'2D18ULL); // pi_1
  (void)Bind(&Pi23Label);
  dc64(0x3CA1'A626'3314'5C06ULL); // pi_2
  dc64(0x395C'1CD1'2902'4E09ULL); // pi_3
  (void)Bind(&C0Label);
  dc64(0xBFC5'5555'5555'547BULL); // c0
  (void)Bind(&C1Label);
  dc64(0x3F81'1111'1110'8A4DULL); // c1
  (void)Bind(&C2Label);
  dc64(0xBF2A'01A0'1993'6F27ULL); // c2
  (void)Bind(&C3Label);
  dc64(0x3EC7'1DE3'7A97'D93EULL); // c3
  (void)Bind(&C4Label);
  dc64(0xBE5A'E633'9199'87C6ULL); // c4
  (void)Bind(&C5Label);
  dc64(0x3DE6'0E27'7AE0'7CECULL); // c5
  (void)Bind(&C6Label);
  dc64(0xBD69'E954'0300'A100ULL); // c6
  (void)Bind(&RangeLabel);
  dc64(0x4160'0000'0000'0000ULL); // 2^23
}

void Dispatcher::EmitF64Cos() {
  F64CosHandlerAddress = GetCursorAddress<uint64_t>();

  constexpr auto Accum = ARMEmitter::VReg::v2;

  ARMEmitter::ForwardLabel Fallback;
  ARMEmitter::ForwardLabel RangeLabel, InvPiLabel;
  ARMEmitter::ForwardLabel Pi1Label, Pi2Label, Pi3Label;
  ARMEmitter::ForwardLabel C0Label, C1Label, C2Label, C3Label, C4Label, C5Label, C6Label;

  // Save q2 for use as accumulator
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, -16);

  // save nzcv
  mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
  str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));

  // Range check: fall back for |x| >= 2^23, NaN, and inf.
  fabs(VTMP2.D(), VTMP1.D());
  ldr(Accum.D(), &RangeLabel);
  fcmp(VTMP2.D(), Accum.D());
  (void)b(ARMEmitter::Condition::CC_HS, &Fallback);

  // n = rint(x * (1/pi) + 0.5).
  ldr(Accum.D(), &InvPiLabel);
  fmov(ARMEmitter::ScalarRegSize::i64Bit, VTMP2, 0.5f);
  fmadd(VTMP2.D(), VTMP1.D(), Accum.D(), VTMP2.D());
  frinta(VTMP2.D(), VTMP2.D());

  // odd = (int(n) & 1) << 63.
  fcvtzs(ARMEmitter::Size::i64Bit, TMP1, VTMP2.D());
  lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, 63);

  // Save input to Accum before overwriting VTMP1.
  fmov(Accum.D(), VTMP1.D());

  // n = n - 0.5.
  fmov(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, 0.5f);
  fsub(VTMP2.D(), VTMP2.D(), VTMP1.D());

  // r = x - n*pi (range reduction), in extended precision.
  ldr(VTMP1.D(), &Pi1Label);
  fmsub(Accum.D(), VTMP2.D(), VTMP1.D(), Accum.D());
  ldr(VTMP1.D(), &Pi2Label);
  fmsub(Accum.D(), VTMP2.D(), VTMP1.D(), Accum.D());
  ldr(VTMP1.D(), &Pi3Label);
  fmsub(Accum.D(), VTMP2.D(), VTMP1.D(), Accum.D());

  // sin(r) poly approx.
  fmul(VTMP1.D(), Accum.D(), Accum.D());
  fmov(ARMEmitter::Size::i64Bit, TMP2, Accum.D());

  // Horner: p = c6 + r2*(c5 + r2*(... + r2*c0)).
  ldr(VTMP2.D(), &C6Label);
  ldr(Accum.D(), &C5Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C4Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C3Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C2Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C1Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C0Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());

  // y = r + r^3 * p.
  fmov(ARMEmitter::Size::i64Bit, Accum.D(), TMP2);
  fmul(VTMP1.D(), VTMP1.D(), Accum.D());
  fmadd(Accum.D(), VTMP1.D(), VTMP2.D(), Accum.D());

  // result = y XOR odd.
  fmov(ARMEmitter::Size::i64Bit, TMP2, Accum.D());
  eor(ARMEmitter::Size::i64Bit, TMP2, TMP2, TMP1);
  fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), TMP2);

  // restore nzcv
  ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
  msr(ARMEmitter::SystemRegister::NZCV, TMP1);

  // Restore q2 and return.
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, 16);
  ret();

  // Fallback path.
  (void)Bind(&Fallback);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, 16);
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64COS].ABIHandler));
  ldr(TMP4, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64COS].Func));
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  ret();

  // Constant pool.
  Align(16);
  (void)Bind(&InvPiLabel);
  dc64(0x3FD4'5F30'6DC9'C883ULL); // inv_pi
  (void)Bind(&Pi1Label);
  dc64(0x4009'21FB'5444'2D18ULL); // pi_1
  (void)Bind(&Pi2Label);
  dc64(0x3CA1'A626'3314'5C06ULL); // pi_2
  (void)Bind(&Pi3Label);
  dc64(0x395C'1CD1'2902'4E09ULL); // pi_3
  (void)Bind(&C0Label);
  dc64(0xBFC5'5555'5555'547BULL); // c0
  (void)Bind(&C1Label);
  dc64(0x3F81'1111'1110'8A4DULL); // c1
  (void)Bind(&C2Label);
  dc64(0xBF2A'01A0'1993'6F27ULL); // c2
  (void)Bind(&C3Label);
  dc64(0x3EC7'1DE3'7A97'D93EULL); // c3
  (void)Bind(&C4Label);
  dc64(0xBE5A'E633'9199'87C6ULL); // c4
  (void)Bind(&C5Label);
  dc64(0x3DE6'0E27'7AE0'7CECULL); // c5
  (void)Bind(&C6Label);
  dc64(0xBD69'E954'0300'A100ULL); // c6
  (void)Bind(&RangeLabel);
  dc64(0x4160'0000'0000'0000ULL); // 2^23
}

void Dispatcher::EmitF64Tan() {
  F64TanHandlerAddress = GetCursorAddress<uint64_t>();

  constexpr auto Accum = ARMEmitter::VReg::v2;

  ARMEmitter::ForwardLabel Fallback, NonZero;
  ARMEmitter::ForwardLabel RangeLabel, TwoOverPiLabel;
  ARMEmitter::ForwardLabel HalfPi0Label, HalfPi1Label;
  ARMEmitter::ForwardLabel C0Label, C1Label, C2Label, C3Label, C4Label, C5Label, C6Label, C7Label, C8Label;

  // tan(+/-0) = +/-0
  fmov(ARMEmitter::Size::i64Bit, TMP1, VTMP1.D());
  lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1);
  (void)cbnz(ARMEmitter::Size::i64Bit, TMP1, &NonZero);
  ret();
  (void)Bind(&NonZero);

  // Save q2 for use as accumulator
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, -16);

  // save nzcv
  mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
  str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));

  // Range check: fall back for |x| >= 2^23, NaN, and inf.
  fabs(VTMP2.D(), VTMP1.D());
  ldr(Accum.D(), &RangeLabel);
  fcmp(VTMP2.D(), Accum.D());
  (void)b(ARMEmitter::Condition::CC_HS, &Fallback);

  // q = nearest integer to 2 * x / pi.
  ldr(VTMP2.D(), &TwoOverPiLabel);
  fmul(VTMP2.D(), VTMP1.D(), VTMP2.D());
  frinta(VTMP2.D(), VTMP2.D());

  // qi = int(q).
  fcvtzs(ARMEmitter::Size::i64Bit, TMP1, VTMP2.D());

  // r = x - q * pi/2 (range reduction), in extended precision.
  fmov(Accum.D(), VTMP1.D());
  ldr(VTMP1.D(), &HalfPi0Label);
  fmsub(Accum.D(), VTMP2.D(), VTMP1.D(), Accum.D());
  ldr(VTMP1.D(), &HalfPi1Label);
  fmsub(Accum.D(), VTMP2.D(), VTMP1.D(), Accum.D());

  // Further reduce r to [-pi/8, pi/8].
  fmov(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, 0.5f);
  fmul(Accum.D(), Accum.D(), VTMP1.D());

  // Approximate tan(r) using order 8 polynomial.
  fmul(VTMP1.D(), Accum.D(), Accum.D());
  fmov(ARMEmitter::Size::i64Bit, TMP2, Accum.D());

  // Horner: p = C8 + r2*(C7 + r2*(... + r2*C0)).
  ldr(VTMP2.D(), &C8Label);
  ldr(Accum.D(), &C7Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C6Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C5Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C4Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C3Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C2Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C1Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());
  ldr(Accum.D(), &C0Label);
  fmadd(VTMP2.D(), VTMP1.D(), VTMP2.D(), Accum.D());

  // p = r + r^3 * p.
  fmov(ARMEmitter::Size::i64Bit, Accum.D(), TMP2);
  fmul(VTMP1.D(), VTMP1.D(), Accum.D());
  fmadd(Accum.D(), VTMP1.D(), VTMP2.D(), Accum.D());

  // Double-angle reconstruction: tan(2x) = 2*tan(x) / (1 - tan^2(x)).
  fadd(VTMP1.D(), Accum.D(), Accum.D());
  fmul(VTMP2.D(), Accum.D(), Accum.D());
  fmov(ARMEmitter::ScalarRegSize::i64Bit, Accum, 1.0f);
  fsub(VTMP2.D(), VTMP2.D(), Accum.D());

  ARMEmitter::ForwardLabel SkipSwap;
  (void)tbnz(TMP1, 0, &SkipSwap);

  fneg(Accum.D(), VTMP1.D());
  fmov(VTMP1.D(), VTMP2.D());
  fmov(VTMP2.D(), Accum.D());

  (void)Bind(&SkipSwap);

  // result = numerator / denominator -> VTMP1.
  fdiv(VTMP1.D(), VTMP2.D(), VTMP1.D());

  // restore nzcv
  ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
  msr(ARMEmitter::SystemRegister::NZCV, TMP1);

  // Restore q2 and return.
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, 16);
  ret();

  // Fallback path.
  (void)Bind(&Fallback);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::QReg::q2, ARMEmitter::Reg::rsp, 16);
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64TAN].ABIHandler));
  ldr(TMP4, STATE_PTR(CpuStateFrame, Pointers.FallbackHandlerPointers[FEXCore::Core::OPINDEX_F64TAN].Func));
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  ret();

  // Constant pool.
  Align(16);
  (void)Bind(&TwoOverPiLabel);
  dc64(0x3FE4'5F30'6DC9'C883ULL); // two_over_pi
  (void)Bind(&HalfPi0Label);
  dc64(0x3FF9'21FB'5444'2D18ULL); // half_pi[0]
  (void)Bind(&HalfPi1Label);
  dc64(0x3C91'A626'3314'5C07ULL); // half_pi[1]
  (void)Bind(&C0Label);
  dc64(0x3FD5'5555'5555'5556ULL); // C0
  (void)Bind(&C1Label);
  dc64(0x3FC1'1111'1111'0A63ULL); // C1
  (void)Bind(&C2Label);
  dc64(0x3FAB'A1BA'1BB4'6414ULL); // C2
  (void)Bind(&C3Label);
  dc64(0x3F96'64F4'7E5B'5445ULL); // C3
  (void)Bind(&C4Label);
  dc64(0x3F82'26E5'E5EC'DFA3ULL); // C4
  (void)Bind(&C5Label);
  dc64(0x3F6D'6C7D'DBF8'7047ULL); // C5
  (void)Bind(&C6Label);
  dc64(0x3F57'EA75'D05B'583EULL); // C6
  (void)Bind(&C7Label);
  dc64(0x3F42'89F2'2964'A03CULL); // C7
  (void)Bind(&C8Label);
  dc64(0x3F34'E4FD'1414'7622ULL); // C8
  (void)Bind(&RangeLabel);
  dc64(0x4160'0000'0000'0000ULL); // 2^23
}

uint64_t Dispatcher::GenerateABICall(FallbackABI ABI) {
  auto Address = GetCursorAddress<uint64_t>();
  constexpr static auto FallbackPointerReg = TMP4;
  constexpr static auto ABI1 = ARMEmitter::XReg::x0;
  constexpr static auto ABI2 = ARMEmitter::XReg::x1;
  constexpr static auto ABI3 = ARMEmitter::XReg::x2;

  constexpr static auto VABI1 = ARMEmitter::VReg::v0;
  constexpr static auto VABI2 = ARMEmitter::VReg::v1;

  auto FillF80x2Result = [&]() {
    if (!TMP_ABIARGS) {
      mov(VTMP1.Q(), VABI1.Q());
      mov(VTMP2.Q(), VABI2.Q());
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillF64x2Result = [&]() {
    if (!TMP_ABIARGS) {
      fmov(VTMP1.D(), VABI1.D());
      fmov(VTMP2.D(), VABI2.D());
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillF80Result = [&]() {
    if (VTMP1 != VABI1) {
      mov(VTMP1.Q(), VABI1.Q());
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillF64Result = [&]() {
    if (!TMP_ABIARGS) {
      fmov(VTMP1.D(), VABI1.D());
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillF32Result = [&]() {
    if (!TMP_ABIARGS) {
      fmov(VTMP1.S(), VABI1.S());
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillI64Result = [&]() {
    if (!TMP_ABIARGS) {
      mov(TMP1, ARMEmitter::XReg::x0);
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillI32Result = [&]() {
    if (!TMP_ABIARGS) {
      mov(TMP1.W(), ARMEmitter::WReg::w0);
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  auto FillI16Result = [&]() {
    if (!TMP_ABIARGS) {
      mov(TMP1, ARMEmitter::XReg::x0);
    }
    FillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, true);
  };

  switch (ABI) {
  case FABI_F80_I16_F32_PTR: {
    // Save NZCV - it's a static register (guest x86 flags) and the inline code clobbers it
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
    str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    EmitF32ToExtF80();
    ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } break;
  case FABI_F80_I16_F64_PTR: {
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
    str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    EmitF64ToExtF80();
    ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } break;
  case FABI_F80_I16_I16_PTR: {
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
    str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    EmitI16ToExtF80();
    ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } break;
  case FABI_F80_I16_I32_PTR: {
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);
    str(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    EmitI32ToExtF80();
    ldr(TMP1.W(), STATE.R(), offsetof(FEXCore::Core::CpuStateFrame, State.flags[24]));
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } break;
  case FABI_F32_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }

    mov(ARMEmitter::XReg::x1, STATE);
    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<float, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF32Result();
  } break;
  case FABI_F64_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<double, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF64Result();
  } break;
  case FABI_F64_F64_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    if (!TMP_ABIARGS) {
      fmov(VABI1.D(), VTMP1.D());
    }
    mov(ARMEmitter::XReg::x0, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<double, double, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF64Result();
  } break;
  case FABI_F64_F64_F64_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v17): vector source 2
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    if (!TMP_ABIARGS) {
      fmov(VABI1.D(), VTMP1.D());
      fmov(VABI2.D(), VTMP2.D());
    }

    mov(ARMEmitter::XReg::x0, STATE);
    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<double, double, double, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF64Result();
  } break;
  case FABI_I16_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint32_t, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI16Result();
  } break;
  case FABI_I32_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint32_t, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI32Result();
  } break;
  case FABI_I64_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): source
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint64_t, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI64Result();
  } break;
  case FABI_I64_I16_F80_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v17): vector source 2
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
      mov(VABI2.Q(), VTMP2.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint64_t, uint16_t, FEXCore::VectorRegType, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI64Result();
  } break;
  case FABI_F80_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    mov(ARMEmitter::XReg::x1, STATE);
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<FEXCore::VectorRegType, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF80Result();
  } break;
  case FABI_F80_I16_F80_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v17): vector source 2
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
      mov(VABI2.Q(), VTMP2.Q());
    }
    mov(ARMEmitter::XReg::x1, STATE);

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<FEXCore::VectorRegType, uint16_t, FEXCore::VectorRegType, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF80Result();
  } break;
  case FABI_F80x2_I16_F80_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v16): vector source 2

    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    ldrh(ARMEmitter::WReg::w0, STATE, offsetof(FEXCore::Core::CPUState, FCW));
    mov(ARMEmitter::XReg::x1, STATE);
    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
    }

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      // GenerateIndirectRuntimeCall<FEXCore::VectorRegPairType, uint16_t, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF80x2Result();
  } break;
  case FABI_F64x2_F64_PTR: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v16): vector source 2

    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    mov(ARMEmitter::XReg::x0, STATE);
    if (!TMP_ABIARGS) {
      fmov(VABI1.D(), VTMP1.D());
    }

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      // GenerateIndirectRuntimeCall<FEXCore::VectorScalarF64Pair, FEXCore::VectorRegType, uint64_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillF64x2Result();
  } break;
  case FABI_I32_I64_I64_V128_V128_I16: {
    // Linux Reg/Win32 Reg:
    // stack: FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v17): vector source 2
    // tmp1 (x0/x10): source 1
    // tmp2 (x1/x11): source 2
    // tmp3 (x2/x12): source 3

    const size_t OriginalSPOffset = SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP4, true);

    // Load the Fallback handler pointer from the stack.
    ldr(FallbackPointerReg, ARMEmitter::XReg::rsp, OriginalSPOffset);

    if (!TMP_ABIARGS) {
      mov(ABI1, TMP1);
      mov(ABI2, TMP2);
      mov(ABI3, TMP3);
      mov(VABI1.Q(), VTMP1.Q());
      mov(VABI2.Q(), VTMP2.Q());
    }

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint32_t, uint64_t, uint64_t, FEXCore::VectorRegType, FEXCore::VectorRegType, uint16_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI32Result();
  } break;
  case FABI_I32_V128_V128_I16: {
    // Linux Reg/Win32 Reg:
    // tmp4 (x4/x13): FallbackHandler
    // x30: return
    // vtmp1 (v0/v16): vector source 1
    // vtmp2 (v1/v17): vector source 2
    // tmp1 (x0/x10): source 1
    SpillForABICall(CTX->HostFeatures.SupportsPreserveAllABI, TMP3, true);

    if (!TMP_ABIARGS) {
      mov(VABI1.Q(), VTMP1.Q());
      mov(VABI2.Q(), VTMP2.Q());
      mov(ABI1, TMP1);
    }

    if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
      GenerateIndirectRuntimeCall<uint32_t, FEXCore::VectorRegType, FEXCore::VectorRegType, uint16_t>(FallbackPointerReg);
    } else {
      blr(FallbackPointerReg);
    }

    FillI32Result();
  } break;
  case FABI_UNKNOWN:
  default:
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_MSG_A_FMT("Unhandled IR Fallback ABI: {}", ToUnderlying(ABI));
#endif
    break;
  }

  // Return to JIT
  ret();

  return Address;
}

void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState* Thread) {
  // Setup dispatcher specific pointers that need to be accessed from JIT code
  {
    auto& Ptrs = Thread->CurrentFrame->Pointers;

    Ptrs.DispatcherLoopTop = AbsoluteLoopTopAddress;
    Ptrs.DispatcherLoopTopFillSRA = AbsoluteLoopTopAddressFillSRA;
    Ptrs.DispatcherLoopTopEnterEC = AbsoluteLoopTopAddressEnterEC;
    Ptrs.DispatcherLoopTopEnterECFillSRA = AbsoluteLoopTopAddressEnterECFillSRA;
    Ptrs.ExitFunctionLinker = ExitFunctionLinkerAddress;
    Ptrs.ThreadStopHandlerSpillSRA = ThreadStopHandlerAddressSpillSRA;
    Ptrs.ThreadPauseHandlerSpillSRA = ThreadPauseHandlerAddressSpillSRA;
    Ptrs.GuestSignal_SIGILL = GuestSignal_SIGILL;
    Ptrs.GuestSignal_SIGTRAP = GuestSignal_SIGTRAP;
    Ptrs.GuestSignal_SIGSEGV = GuestSignal_SIGSEGV;
    Ptrs.SignalReturnHandler = SignalHandlerReturnAddress;
    Ptrs.SignalReturnHandlerRT = SignalHandlerReturnAddressRT;
    Ptrs.LUDIVHandler = LUDIVHandlerAddress;
    Ptrs.LDIVHandler = LDIVHandlerAddress;
    Ptrs.F64SinHandler = F64SinHandlerAddress;
    Ptrs.F64CosHandler = F64CosHandlerAddress;
    Ptrs.F64TanHandler = F64TanHandlerAddress;

    // Fill in the fallback handlers
    InterpreterOps::FillFallbackIndexPointers(Ptrs.FallbackHandlerPointers, &ABIPointers[0]);
  }
}

SignalDelegatorConfig Dispatcher::MakeSignalDelegatorConfig() const {
  // PF/AF are the final two SRA registers. We only want GPRs
  const auto GPRCount = uint16_t(StaticRegisters.size() - 2);
  const auto FPRCount = uint16_t(StaticFPRegisters.size());

  const auto GetSRAGPRMapping = [GPRCount, this] {
    SignalDelegatorConfig::SRAIndexMapping Mapping {};
    for (size_t i = 0; i < GPRCount; ++i) {
      Mapping[i] = StaticRegisters[i].Idx();
    }
    return Mapping;
  };

  const auto GetSRAFPRMapping = [FPRCount, this] {
    SignalDelegatorConfig::SRAIndexMapping Mapping {};
    for (size_t i = 0; i < FPRCount; ++i) {
      Mapping[i] = StaticFPRegisters[i].Idx();
    }
    return Mapping;
  };

  return FEXCore::SignalDelegatorConfig {
    .DispatcherBegin = Start,
    .DispatcherEnd = End,

    .AbsoluteLoopTopAddress = AbsoluteLoopTopAddress,
    .AbsoluteLoopTopAddressFillSRA = AbsoluteLoopTopAddressFillSRA,
    .SignalHandlerReturnAddress = SignalHandlerReturnAddress,
    .SignalHandlerReturnAddressRT = SignalHandlerReturnAddressRT,

    .PauseReturnInstruction = PauseReturnInstruction,
    .ThreadPauseHandlerAddressSpillSRA = ThreadPauseHandlerAddressSpillSRA,
    .ThreadPauseHandlerAddress = ThreadPauseHandlerAddress,

    // Stop handlers.
    .ThreadStopHandlerAddressSpillSRA = ThreadStopHandlerAddressSpillSRA,
    .ThreadStopHandlerAddress = ThreadStopHandlerAddress,

    // SRA information.
    .SRAGPRCount = GPRCount,
    .SRAFPRCount = FPRCount,

    .SRAGPRMapping = GetSRAGPRMapping(),
    .SRAFPRMapping = GetSRAFPRMapping(),
  };
}

fextl::unique_ptr<Dispatcher> Dispatcher::Create(FEXCore::Context::ContextImpl* CTX) {
  return fextl::make_unique<Dispatcher>(CTX);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Dispatcher/Dispatcher.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/memory.h>

#include <array>
#include <cstddef>
#include <cstdint>

namespace FEXCore {
struct GuestSigAction;
struct SignalDelegatorConfig;
} // namespace FEXCore

namespace FEXCore::Core {
struct CpuStateFrame;
struct InternalThreadState;
} // namespace FEXCore::Core

namespace FEXCore::Context {
class ContextImpl;
}

namespace FEXCore::CPU {

#define STATE_PTR(STATE_TYPE, FIELD) STATE.R(), offsetof(FEXCore::Core::STATE_TYPE, FIELD)
#define STATE_PTR_IDX(STATE_TYPE, FIELD, INDEX) STATE.R(), ARRAY_OFFSETOF(FEXCore::Core::STATE_TYPE, FIELD, INDEX)
#define FALLBACK_HANDLER_OFFSET(INDEX, FIELD) \
  STATE.R(),                                  \
    (ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, Pointers.FallbackHandlerPointers, INDEX) + offsetof(FEXCore::Core::FallbackABIInfo, FIELD))

class Dispatcher final : public Arm64Emitter {
public:
  static fextl::unique_ptr<Dispatcher> Create(FEXCore::Context::ContextImpl* CTX);

  Dispatcher(FEXCore::Context::ContextImpl* ctx);
  ~Dispatcher();

  void InitThreadPointers(FEXCore::Core::InternalThreadState* Thread);

#ifdef VIXL_SIMULATOR
  void ExecuteDispatch(FEXCore::Core::CpuStateFrame* Frame);
  void ExecuteJITCallback(FEXCore::Core::CpuStateFrame* Frame, uint64_t RIP);
#else
  void ExecuteDispatch(FEXCore::Core::CpuStateFrame* Frame) {
    DispatchPtr(Frame, false);
  }

  void ExecuteJITCallback(FEXCore::Core::CpuStateFrame* Frame, uint64_t RIP) {
    CallbackPtr(Frame, RIP);
  }
#endif

  uint64_t GetExitFunctionLinkerAddress() const {
    return ExitFunctionLinkerAddress;
  }

  SignalDelegatorConfig MakeSignalDelegatorConfig() const;

protected:
  FEXCore::Context::ContextImpl* CTX;

  using AsmDispatch = void (*)(FEXCore::Core::CpuStateFrame* Frame, bool SingleInst);
  using JITCallback = void (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t RIP);

  AsmDispatch DispatchPtr;
  JITCallback CallbackPtr;
private:
  /**
   * @name Dispatch Helper functions
   * @{ */
  uint64_t ThreadStopHandlerAddress {};
  uint64_t ThreadStopHandlerAddressSpillSRA {};
  uint64_t AbsoluteLoopTopAddress {};
  uint64_t AbsoluteLoopTopAddressFillSRA {};
  uint64_t AbsoluteLoopTopAddressEnterEC {};
  uint64_t AbsoluteLoopTopAddressEnterECFillSRA {};
  uint64_t ThreadPauseHandlerAddress {};
  uint64_t ThreadPauseHandlerAddressSpillSRA {};
  uint64_t ExitFunctionLinkerAddress {};
  uint64_t SignalHandlerReturnAddress {};
  uint64_t SignalHandlerReturnAddressRT {};
  uint64_t GuestSignal_SIGILL {};
  uint64_t GuestSignal_SIGTRAP {};
  uint64_t GuestSignal_SIGSEGV {};

  uint64_t PauseReturnInstruction {};
  std::array<uint64_t, FallbackABI::FABI_UNKNOWN> ABIPointers {};
  /**  @} */

  uint64_t Start {};
  uint64_t End {};

  // Long division helpers
  uint64_t LUDIVHandlerAddress {};
  uint64_t LDIVHandlerAddress {};

  // F64 trig shared handlers
  uint64_t F64SinHandlerAddress {};
  uint64_t F64CosHandlerAddress {};
  uint64_t F64TanHandlerAddress {};

  void EmitDispatcher();
  uint64_t GenerateABICall(FallbackABI ABI);

  // Inline softfloat conversion emitters - avoid FPCR save/restore overhead
  // These emit ARM64 code that performs the conversion using only integer ops
  void EmitI16ToExtF80();
  void EmitI32ToExtF80();
  void EmitF32ToExtF80();
  void EmitF64ToExtF80();

  void EmitF64Sin();
  void EmitF64Cos();
  void EmitF64Tan();

  FEX_CONFIG_OPT(DisableL2Cache, DISABLEL2CACHE);
};

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Frontend.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-meta-blocks
desc: Extracts instruction & block meta info, frontend multiblock logic
$end_info$
*/

#include "Interface/Context/Context.h"
#include "Interface/Core/Frontend.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/LookupCache.h"

#include <array>
#include <algorithm>
#include <cstring>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/fextl/set.h>

namespace FEXCore::Frontend {
#include "Interface/Core/VSyscall/VSyscall.inc"

using namespace FEXCore::X86Tables;

static uint32_t MapModRMToReg(uint8_t REX, uint8_t bits, bool HighBits, bool HasREX, bool HasXMM, bool HasMM, uint8_t InvalidOffset = 16) {
  using GPRArray = std::array<uint32_t, 16>;

  static constexpr GPRArray GPR8BitHighIndexes = {
    // Classical ordering?
    FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX, FEXCore::X86State::REG_RBX,
    FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX, FEXCore::X86State::REG_RBX,
    FEXCore::X86State::REG_R8,  FEXCore::X86State::REG_R9,  FEXCore::X86State::REG_R10, FEXCore::X86State::REG_R11,
    FEXCore::X86State::REG_R12, FEXCore::X86State::REG_R13, FEXCore::X86State::REG_R14, FEXCore::X86State::REG_R15,
  };

  uint8_t Offset = (REX << 3) | bits;

  if (Offset == InvalidOffset) {
    return FEXCore::X86State::REG_INVALID;
  }

  if (HasXMM) {
    return FEXCore::X86State::REG_XMM_0 + Offset;
  } else if (HasMM) {
    return FEXCore::X86State::REG_MM_0 + bits; // Ignore REX extension for MMX registers
  } else if (!(HighBits && !HasREX)) {
    return FEXCore::X86State::REG_RAX + Offset;
  }

  return GPR8BitHighIndexes[Offset];
}

static uint32_t MapVEXToReg(uint8_t vvvv, bool HasXMM) {
  if (HasXMM) {
    return FEXCore::X86State::REG_XMM_0 + vvvv;
  } else {
    return FEXCore::X86State::REG_RAX + vvvv;
  }
}

Decoder::Decoder(FEXCore::Core::InternalThreadState* Thread)
  : Thread {Thread}
  , CTX {static_cast<FEXCore::Context::ContextImpl*>(Thread->CTX)}
  , OSABI {CTX->SyscallHandler ? CTX->SyscallHandler->GetOSABI() : FEXCore::HLE::SyscallOSABI::OS_UNKNOWN}
  , PoolObject {CTX->FrontendAllocator, sizeof(FEXCore::X86Tables::DecodedInst) * DefaultDecodedBufferSize} {

  FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION);
  if (ReducedPrecision) {
    X87Table = &FEXCore::X86Tables::X87F64Ops;
  } else {
    X87Table = &FEXCore::X86Tables::X87F80Ops;
  }

  if (CTX->HostFeatures.SupportsAVX && CTX->HostFeatures.SupportsSVE256) {
    VEXTable = &FEXCore::X86Tables::VEXTableOps;
    VEXTableGroup = &FEXCore::X86Tables::VEXTableGroupOps;
  } else if (CTX->HostFeatures.SupportsAVX) {
    VEXTable = &FEXCore::X86Tables::VEXTableOps_AVX128;
    VEXTableGroup = &FEXCore::X86Tables::VEXTableGroupOps_AVX128;
  }
}

bool Decoder::CheckRangeExecutable(uint64_t Address, uint64_t Size) {
  while (Address < ExecutableRangeBase || Address + Size > ExecutableRangeEnd) {
    auto RangeInfo = CTX->SyscallHandler->QueryGuestExecutableRange(Thread, Address);
    ExecutableRangeBase = RangeInfo.Base;
    ExecutableRangeEnd = RangeInfo.Base + RangeInfo.Size;
    ExecutableRangeWritable = RangeInfo.Writable;

    if (RangeInfo.Size == 0) {
      return false;
    }

    uint64_t RangeRemainingSize = ExecutableRangeEnd - Address;
    if (Size > RangeRemainingSize) {
      Size -= RangeRemainingSize;
      Address += RangeRemainingSize;
    }
  }

  return true;
}

uint8_t Decoder::ReadByte() {
  LOGMAN_THROW_A_FMT(InstructionSize < MAX_INST_SIZE, "Max instruction size exceeded!");
  std::optional<uint8_t> Byte = PeekByte(0);
  if (!Byte) {
    HitNonExecutableRange = true;
    // Pretend we read 0, the main decode loop will see HitNonExecutableRange and rollback the instruction.
    return 0;
  }

  Instruction[InstructionSize] = *Byte;
  InstructionSize++;
  return *Byte;
}

std::optional<uint8_t> Decoder::PeekByte(uint8_t Offset) {
  uint64_t ByteAddress = reinterpret_cast<uint64_t>(InstStream + InstructionSize + Offset);
  if (CheckRangeExecutable(ByteAddress, 1)) {
    return InstStream[InstructionSize + Offset];
  } else {
    return std::nullopt;
  }
}

std::pair<uint64_t, bool> Decoder::ReadData(uint8_t Size) {
  LOGMAN_THROW_A_FMT(Size != 0 && Size <= sizeof(uint64_t), "Unknown data size to read");

  uint64_t Res = 0;
  uint64_t Address = reinterpret_cast<uint64_t>(InstStream + InstructionSize);
  if (CheckRangeExecutable(Address, Size)) {
    std::memcpy(&Res, &InstStream[InstructionSize], Size);
  } else {
    HitNonExecutableRange = true;
    // See PeekByte, this specific case may cause some executable memory to read as 0 but it doesn't matter as the entire instruction will be rolled back anyway.
    Res = 0;
  }


#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  for (size_t i = 0; i < Size; ++i) {
    ReadByte();
  }
#else
  SkipBytes(Size);
#endif

  if (Relocations) {
    uint32_t SectionOffset = static_cast<uint32_t>(Address - SectionMinAddress);
    if (auto It = Relocations->find(SectionOffset); It != Relocations->end()) {
      if (It->second == GuestRelocationType::Rel32 && Size == 4) {
        return {static_cast<int64_t>(static_cast<int32_t>(Res) - static_cast<int32_t>(EntryPoint)), true};
      } else if (It->second == GuestRelocationType::Rel64 && Size == 8) {
        return {static_cast<int64_t>(Res) - static_cast<int64_t>(EntryPoint), true};
      } else {
        HitBadRelocation = true;
        Res = 0;
      }
    }
  }

  return {Res, false};
}

void Decoder::DecodeModRM_16(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM) {
  // 16bit modrm behaves similar to SIB but encoded directly in modrm
  // mod != 0b11 case
  // RM    | Result
  // ===============
  // 0b000 | [BX + SI]
  // 0b001 | [BX + DI]
  // 0b010 | [BP + SI]
  // 0b011 | [BP + DI]
  // 0b100 | [SI]
  // 0b101 | [DI]
  // 0b110 | {[BP], disp16}
  // 0b111 | [BX]
  // if mod = 0b00
  //    0b110 = disp16
  // if mod = 0b01
  //    All encodings gain 8bit displacement
  //    0b110 = [BP] + disp8
  // if mod = 0b10
  //    All encodings gain 16bit displacement
  //    0b110 = [BP] + disp16
  uint32_t Literal {};
  uint8_t DisplacementSize {};
  if ((ModRM.mod == 0 && ModRM.rm == 0b110) || ModRM.mod == 0b10) {
    DisplacementSize = 2;
  } else if (ModRM.mod == 0b01) {
    DisplacementSize = 1;
  }
  if (DisplacementSize) {
    bool IsRelocation = false;
    std::tie(Literal, IsRelocation) = ReadData(DisplacementSize);
    LOGMAN_THROW_A_FMT(!IsRelocation, "1/2 byte relocations unsupported");
    if (DisplacementSize == 1) {
      Literal = static_cast<int8_t>(Literal);
    }
  }

  Operand->Type = DecodedOperand::OpType::SIB;
  Operand->Data.SIB.Scale = 1;
  Operand->Data.SIB.Offset = Literal;

  // Only called when ModRM.mod != 0b11
  struct Encodings {
    uint8_t Base;
    uint8_t Index;
  };
  constexpr static std::array<Encodings, 24> Lookup = {{
    // Mod = 0b00
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_INVALID, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_INVALID},
    // Mod = 0b01
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_INVALID},
    // Mod = 0b10
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RSI},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_RDI},
    {FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RBP, FEXCore::X86State::REG_INVALID},
    {FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_INVALID},
  }};

  uint8_t LookupIndex = ModRM.mod << 3 | ModRM.rm;
  auto it = Lookup[LookupIndex];
  Operand->Data.SIB.Base = it.Base;
  Operand->Data.SIB.Index = it.Index;
}

void Decoder::DecodeModRM_64(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM) {
  uint8_t Displacement {};
  // Do we have an offset?
  if (ModRM.mod == 0b01) {
    Displacement = 1;
  } else if (ModRM.mod == 0b10) {
    Displacement = 4;
  } else if (ModRM.mod == 0 && ModRM.rm == 0b101) {
    Displacement = 4;
  }

  // Calculate SIB
  bool HasSIB = ((ModRM.mod != 0b11) && (ModRM.rm == 0b100));

  if (HasSIB) {
    FEXCore::X86Tables::SIBDecoded SIB;
    if (DecodeInst->Flags & DecodeFlags::FLAG_DECODED_SIB) {
      SIB.Hex = DecodeInst->SIB;
    } else {
      // Haven't yet grabbed SIB, pull it now
      DecodeInst->SIB = ReadByte();
      SIB.Hex = DecodeInst->SIB;
      DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_SIB;
    }

    // If the SIB base is 0b101, aka BP or R13 then we have a 32bit displacement
    if (ModRM.mod == 0b00 && ModRM.rm == 0b100 && SIB.base == 0b101) {
      Displacement = 4;
    }

    // SIB
    Operand->Type = DecodedOperand::OpType::SIB;
    Operand->Data.SIB.Scale = 1 << SIB.scale;

    // The invalid encoding types are described at Table 1-12. "promoted nsigned is always non-zero"
    {
      // If we have a VSIB byte (as opposed to SIB), then the index register is a vector.
      // DecodeInst->TableInfo may be null in the case of 3DNow! ModRM decoding.
      const bool IsIndexVector = DecodeInst->TableInfo && (DecodeInst->TableInfo->Flags & InstFlags::FLAGS_VEX_VSIB) != 0;
      uint8_t InvalidSIBIndex = 0b100; ///< SIB Index where there is no register encoding.
      if (IsIndexVector) {
        DecodeInst->Flags |= X86Tables::DecodeFlags::FLAG_VSIB_BYTE;
        InvalidSIBIndex = ~0; ///< No Invalid SIB Index with Index Vectors.
      }

      const uint8_t IndexREX = (DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_X) != 0 ? 1 : 0;
      const uint8_t BaseREX = (DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B) != 0 ? 1 : 0;

      Operand->Data.SIB.Index = MapModRMToReg(IndexREX, SIB.index, false, false, IsIndexVector, false, InvalidSIBIndex);
      Operand->Data.SIB.Base = MapModRMToReg(BaseREX, SIB.base, false, false, false, false, ModRM.mod == 0 ? 0b101 : 16);
    }

    LOGMAN_THROW_A_FMT(Displacement <= 4, "Number of bytes should be <= 4 for literal src");

    if (Displacement) {
      auto [Literal, IsRelocation] = ReadData(Displacement);
      if (IsRelocation) {
        Operand->Type = DecodedOperand::OpType::SIBRelocation;
      }
      if (Displacement == 1) {
        Literal = static_cast<int8_t>(Literal);
      }
      Operand->Data.SIB.Offset = Literal;
    }
  } else if (ModRM.mod == 0) {
    // Explained in Table 1-14. "Operand Addressing Using ModRM and SIB Bytes"
    if (ModRM.rm == 0b101) {
      // 32bit Displacement
      auto [Literal, IsRelocation] = ReadData(4);
      Operand->Type = IsRelocation ? DecodedOperand::OpType::RIPRelativeRelocation : DecodedOperand::OpType::RIPRelative;
      Operand->Data.RIPLiteral.Value = Literal;
    } else {
      // Register-direct addressing
      Operand->Type = DecodedOperand::OpType::GPRDirect;
      Operand->Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false, false);
    }
  } else {
    uint8_t DisplacementSize = ModRM.mod == 1 ? 1 : 4;
    auto [Literal, IsRelocation] = ReadData(DisplacementSize);
    if (DisplacementSize == 1) {
      Literal = static_cast<int8_t>(Literal);
    }

    Operand->Type = IsRelocation ? DecodedOperand::OpType::GPRIndirectRelocation : DecodedOperand::OpType::GPRIndirect;
    Operand->Data.GPRIndirect.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, false, false, false, false);
    Operand->Data.GPRIndirect.Displacement = Literal;
  }
}

bool Decoder::NormalOp(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op, DecodedHeader Options) {
  if (Info->Type == FEXCore::X86Tables::TYPE_ARCH_DISPATCHER) [[unlikely]] {
    // Dispatcher Op.
    // TODO: Move this in to `NormalOpHeader`, Dispatch tables have a bug currently where some subtables don't inherit flags correctly.
    // Can be seen by running FEX asm tests if this is removed.
    return NormalOp(&Info->OpcodeDispatcher.Indirect[BlockInfo.Is64BitMode ? 1 : 0], Op);
  }

  DecodeInst->OP = Op;
  DecodeInst->TableInfo = Info;

  if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN) {
    return false;
  }

  if (Info->Type == FEXCore::X86Tables::TYPE_INVALID) {
    return false;
  }

  LOGMAN_THROW_A_FMT(!(Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P), "Group Ops "
                                                                                                                          "should have "
                                                                                                                          "been decoded "
                                                                                                                          "before this!");

  uint8_t DestSize {};
  const bool HasWideningDisplacement =
    (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST) != 0 ||
    (Options.w && BlockInfo.Is64BitMode);
  const bool HasNarrowingDisplacement =
    (FEXCore::X86Tables::DecodeFlags::GetOpAddr(DecodeInst->Flags, 0) & FEXCore::X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST) != 0;

  const bool HasXMMFlags = (Info->Flags & InstFlags::FLAGS_XMM_FLAGS) != 0;
  bool HasXMMSrc =
    HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC);
  bool HasXMMDst =
    HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST);
  bool HasMMSrc =
    HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_SRC_GPR) && HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_SRC);
  bool HasMMDst =
    HasXMMFlags && !HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_DST_GPR) && HAS_XMM_SUBFLAG(Info->Flags, InstFlags::FLAGS_SF_MMX_DST);

  // Is ModRM present via explicit instruction encoded or REX?
  const bool HasMODRM = !!(Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM);

  const bool HasREX = !!(DecodeInst->Flags & DecodeFlags::FLAG_REX_PREFIX);
  const bool Has16BitAddressing = !BlockInfo.Is64BitMode && DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE;

  if (Options.w && (Info->Flags & InstFlags::FLAGS_REX_W_0)) {
    return false;
  } else if (!Options.w && (Info->Flags & InstFlags::FLAGS_REX_W_1)) {
    return false;
  }

  if (Options.L && (Info->Flags & InstFlags::FLAGS_VEX_L_0)) {
    return false;
  } else if (!Options.L && (Info->Flags & InstFlags::FLAGS_VEX_L_1)) {
    return false;
  }

  const bool UseVEXL = Options.L && !(Info->Flags & InstFlags::FLAGS_VEX_L_IGNORE);

  // This is used for ModRM register modification
  // For both modrm.reg and modrm.rm(when mod == 0b11) when value is >= 0b100
  // then it changes from expected registers to the high 8bits of the lower registers
  // Bit annoying to support
  // In the case of no modrm (REX in byte situation) then it is unaffected
  bool Is8BitSrc {};
  bool Is8BitDest {};

  // If we require ModRM and haven't decoded it yet, do it now
  // Some instructions have to read modrm upfront, others do it later
  if (HasMODRM && !(DecodeInst->Flags & DecodeFlags::FLAG_DECODED_MODRM)) {
    DecodeInst->ModRM = ReadByte();
    DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;
  }

  // New instruction size decoding
  {
    // Decode destinations first
    const auto DstSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeDstFlags(Info->Flags);
    const auto SrcSizeFlag = FEXCore::X86Tables::InstFlags::GetSizeSrcFlags(Info->Flags);

    if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_8BIT);
      DestSize = 1;
      Is8BitDest = true;
    } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT);
      DestSize = 2;
    } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) {
      if (UseVEXL) {
        DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_256BIT);
        DestSize = 32;
      } else {
        DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_128BIT);
        DestSize = 16;
      }
    } else if (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_256BIT);
      DestSize = 32;
    } else if (HasNarrowingDisplacement &&
               (DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) {
      // See table 1-2. Operand-Size Overrides for this decoding
      // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_16BIT);
      DestSize = 2;
    } else if ((HasXMMDst || HasMMDst || BlockInfo.Is64BitMode) && (HasWideningDisplacement || DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT ||
                                                                    DstSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) {
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_64BIT);
      DestSize = 8;
    } else {
      DecodeInst->Flags |= DecodeFlags::GenSizeDstSize(DecodeFlags::SIZE_32BIT);
      DestSize = 4;
    }

    // Decode sources
    if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_8BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_8BIT);
      Is8BitSrc = true;
    } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_16BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT);
    } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_128BIT) {
      if (UseVEXL) {
        DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_256BIT);
      } else {
        DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_128BIT);
      }
    } else if (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_256BIT) {
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_256BIT);
    } else if (HasNarrowingDisplacement &&
               (SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_DEF || SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) {
      // See table 1-2. Operand-Size Overrides for this decoding
      // If the default operating mode is 32bit and we have the operand size flag then the operating size drops to 16bit
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_16BIT);
    } else if ((HasXMMSrc || HasMMSrc || BlockInfo.Is64BitMode) && (HasWideningDisplacement || SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BIT ||
                                                                    SrcSizeFlag == FEXCore::X86Tables::InstFlags::SIZE_64BITDEF)) {
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_64BIT);
    } else {
      DecodeInst->Flags |= DecodeFlags::GenSizeSrcSize(DecodeFlags::SIZE_32BIT);
    }
  }

  auto* CurrentDest = &DecodeInst->Dest;

  if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ||
      HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RDX)) {
    // Some instructions hardcode their destination as RAX
    CurrentDest->Type = DecodedOperand::OpType::GPR;
    CurrentDest->Data.GPR.HighBits = false;
    CurrentDest->Data.GPR.GPR =
      HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_DST_RAX) ? FEXCore::X86State::REG_RAX : FEXCore::X86State::REG_RDX;
    CurrentDest = &DecodeInst->Src[0];
  } else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_REX_IN_BYTE)) {
    LOGMAN_THROW_A_FMT(!HasMODRM, "This instruction shouldn't have ModRM!");

    // If the REX is in the byte that means the lower nibble of the OP contains the destination GPR
    // This also means that the destination is always a GPR on these ones
    // ADDITIONALLY:
    // If there is a REX prefix then that allows extended GPR usage
    CurrentDest->Type = DecodedOperand::OpType::GPR;
    DecodeInst->Dest.Data.GPR.HighBits = (Is8BitDest && !HasREX && (Op & 0b111) >= 0b100);
    CurrentDest->Data.GPR.GPR =
      MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, Op & 0b111, Is8BitDest, HasREX, false, false);

    if (CurrentDest->Data.GPR.GPR == FEXCore::X86State::REG_INVALID) {
      return false;
    }
  }

  uint8_t Bytes = Info->MoreBytes;

  if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_MUL_2) && HasWideningDisplacement) {
    Bytes <<= 1;
  }
  if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_DISPLACE_SIZE_DIV_2) && HasNarrowingDisplacement) {
    Bytes >>= 1;
  }

  if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MEM_OFFSET) && (DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE)) {
    // If we have a memory offset and have the address size override then divide it just like narrowing displacement
    Bytes >>= 1;
  }

  auto ModRMOperand = [&](FEXCore::X86Tables::DecodedOperand& GPR, FEXCore::X86Tables::DecodedOperand& NonGPR, bool HasXMMGPR,
                          bool HasXMMNonGPR, bool HasMMGPR, bool HasMMNonGPR, bool GPR8Bit, bool NonGPR8Bit) {
    FEXCore::X86Tables::ModRMDecoded ModRM;
    ModRM.Hex = DecodeInst->ModRM;

    if (ModRM.reg != 0b000 && (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_ZERO_REG)) {
      return false;
    }

    if (ModRM.mod == 0b11 && (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_MEM_ONLY)) {
      return false;
    }

    if (ModRM.mod != 0b11 && (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_REG_ONLY)) {
      return false;
    }

    // Decode the GPR source first
    GPR.Type = DecodedOperand::OpType::GPR;
    GPR.Data.GPR.HighBits = (GPR8Bit && ModRM.reg >= 0b100 && !HasREX);
    GPR.Data.GPR.GPR = MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_R ? 1 : 0, ModRM.reg, GPR8Bit, HasREX, HasXMMGPR, HasMMGPR);

    if (GPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) {
      return false;
    }

    // ModRM.mod == 0b11 == Register
    // ModRM.Mod != 0b11 == Register-direct addressing
    if (ModRM.mod == 0b11) {
      NonGPR.Type = DecodedOperand::OpType::GPR;
      NonGPR.Data.GPR.HighBits = (NonGPR8Bit && ModRM.rm >= 0b100 && !HasREX);
      NonGPR.Data.GPR.GPR =
        MapModRMToReg(DecodeInst->Flags & DecodeFlags::FLAG_REX_XGPR_B ? 1 : 0, ModRM.rm, NonGPR8Bit, HasREX, HasXMMNonGPR, HasMMNonGPR);
      if (NonGPR.Data.GPR.GPR == FEXCore::X86State::REG_INVALID) {
        return false;
      }
    } else {
      // Only decode if we haven't pre-decoded
      if (NonGPR.IsNone()) {
        auto Disp = DecodeModRMs_Disp[Has16BitAddressing];
        (this->*Disp)(&NonGPR, ModRM);
      }
    }

    return true;
  };

  size_t CurrentSrc = 0;

  const auto VEXOperand = Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_VEX_SRC_MASK;
  if (VEXOperand == FEXCore::X86Tables::InstFlags::FLAGS_VEX_NO_OPERAND && Options.vvvv) {
    return false;
  }

  if (VEXOperand == FEXCore::X86Tables::InstFlags::FLAGS_VEX_1ST_SRC) {
    DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::GPR;
    DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false;

    // If we have XMM flags at all, then SRC 1 cannot be a GPR. The only case where
    // this is possible is with BMI1 and BMI2 instructions (which are all GPR-based
    // and don't use XMM flags)
    DecodeInst->Src[CurrentSrc].Data.GPR.GPR = MapVEXToReg(Options.vvvv, HasXMMFlags);

    ++CurrentSrc;
  }

  if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_MODRM) {
    if (Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SF_MOD_DST) {
      if (!ModRMOperand(DecodeInst->Src[CurrentSrc], DecodeInst->Dest, HasXMMSrc, HasXMMDst, HasMMSrc, HasMMDst, Is8BitSrc, Is8BitDest)) {
        return false;
      }
    } else {
      if (!ModRMOperand(DecodeInst->Dest, DecodeInst->Src[CurrentSrc], HasXMMDst, HasXMMSrc, HasMMDst, HasMMSrc, Is8BitDest, Is8BitSrc)) {
        return false;
      }
    }
    ++CurrentSrc;
  }

  if (VEXOperand == FEXCore::X86Tables::InstFlags::FLAGS_VEX_2ND_SRC) {
    DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::GPR;
    DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false;
    DecodeInst->Src[CurrentSrc].Data.GPR.GPR = MapVEXToReg(Options.vvvv, HasXMMSrc);
    ++CurrentSrc;
  }

  if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RAX)) {
    DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::GPR;
    DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false;
    DecodeInst->Src[CurrentSrc].Data.GPR.GPR = FEXCore::X86State::REG_RAX;
    ++CurrentSrc;
  } else if (HAS_NON_XMM_SUBFLAG(Info->Flags, FEXCore::X86Tables::InstFlags::FLAGS_SF_SRC_RCX)) {
    DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::GPR;
    DecodeInst->Src[CurrentSrc].Data.GPR.HighBits = false;
    DecodeInst->Src[CurrentSrc].Data.GPR.GPR = FEXCore::X86State::REG_RCX;
    ++CurrentSrc;
  }

  if (VEXOperand == FEXCore::X86Tables::InstFlags::FLAGS_VEX_DST) {
    CurrentDest->Type = DecodedOperand::OpType::GPR;
    CurrentDest->Data.GPR.HighBits = false;
    CurrentDest->Data.GPR.GPR = MapVEXToReg(Options.vvvv, HasXMMDst);
  }

  if (Bytes != 0) {
    LOGMAN_THROW_A_FMT(Bytes <= 8, "Number of bytes should be <= 8 for literal src");


    auto [Literal, IsRelocation] = ReadData(Bytes);
    if (IsRelocation) {
      DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::LiteralRelocation;
      DecodeInst->Src[CurrentSrc].Data.LiteralRelocation.EntrypointOffset = Literal;
    } else {
      DecodeInst->Src[CurrentSrc].Data.Literal.Size = Bytes;

      if ((Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT) ||
          (DecodeFlags::GetSizeDstFlags(DecodeInst->Flags) == DecodeFlags::SIZE_64BIT &&
           Info->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SRC_SEXT64BIT)) {
        if (Bytes == 1) {
          Literal = static_cast<int8_t>(Literal);
        } else if (Bytes == 2) {
          Literal = static_cast<int16_t>(Literal);
        } else {
          Literal = static_cast<int32_t>(Literal);
        }
        DecodeInst->Src[CurrentSrc].Data.Literal.Size = DestSize;
      }

      DecodeInst->Src[CurrentSrc].Type = DecodedOperand::OpType::Literal;
      DecodeInst->Src[CurrentSrc].Data.Literal.Value = Literal;
    }

    Bytes = 0;
  }

  LOGMAN_THROW_A_FMT(Bytes == 0, "Inst at 0x{:x}: 0x{:04x} '{}' Had an instruction of size {} with {} remaining", DecodeInst->PC,
                     DecodeInst->OP, DecodeInst->TableInfo->Name ?: "UND", InstructionSize, Bytes);
  DecodeInst->InstSize = InstructionSize;
  return true;
}

bool Decoder::NormalOpHeader(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op) {
  DecodeInst->OPRaw = DecodeInst->OP = Op;
  DecodeInst->TableInfo = Info;

  if (Info->Type == FEXCore::X86Tables::TYPE_UNKNOWN) {
    return false;
  }

  if (Info->Type == FEXCore::X86Tables::TYPE_INVALID) {
    return false;
  }

  LOGMAN_THROW_A_FMT(Info->Type != FEXCore::X86Tables::TYPE_REX_PREFIX, "REX PREFIX should have been decoded before this!");

  // A normal instruction is the most likely.
  if (Info->Type == FEXCore::X86Tables::TYPE_INST) [[likely]] {
    return NormalOp(Info, Op);
  } else if (Info->Type == FEXCore::X86Tables::TYPE_ARCH_DISPATCHER) [[unlikely]] {
    // Dispatcher Op.
    return NormalOp(&Info->OpcodeDispatcher.Indirect[BlockInfo.Is64BitMode ? 1 : 0], Op);
  } else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_1 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_11) {
    uint8_t ModRMByte = ReadByte();
    DecodeInst->ModRM = ModRMByte;
    DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;

    FEXCore::X86Tables::ModRMDecoded ModRM;
    ModRM.Hex = DecodeInst->ModRM;

#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
    Op = OPD(Info->Type, Info->MoreBytes, ModRM.reg);
    return NormalOp(&PrimaryInstGroupOps[Op], Op);
#undef OPD
  } else if (Info->Type >= FEXCore::X86Tables::TYPE_GROUP_6 && Info->Type <= FEXCore::X86Tables::TYPE_GROUP_P) {
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg))
    constexpr uint16_t PF_NONE = 0;
    constexpr uint16_t PF_F3 = 1;
    constexpr uint16_t PF_66 = 2;
    constexpr uint16_t PF_F2 = 3;

    uint16_t PrefixType = PF_NONE;
    if (LastEscapePrefix == 0xF3) {
      PrefixType = PF_F3;
    } else if (LastEscapePrefix == 0xF2) {
      PrefixType = PF_F2;
    } else if (LastEscapePrefix == 0x66) {
      PrefixType = PF_66;
    }

    // We have ModRM
    uint8_t ModRMByte = ReadByte();
    DecodeInst->ModRM = ModRMByte;
    DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;

    FEXCore::X86Tables::ModRMDecoded ModRM;
    ModRM.Hex = DecodeInst->ModRM;

    uint16_t LocalOp = OPD(Info->Type, PrefixType, ModRM.reg);
    const FEXCore::X86Tables::X86InstInfo* LocalInfo = &SecondInstGroupOps[LocalOp];
#undef OPD
    if (LocalInfo->Type == FEXCore::X86Tables::TYPE_SECOND_GROUP_MODRM && ModRM.mod == 0b11) {
      // Everything in this group is privileged instructions aside from XGETBV
      constexpr std::array<uint8_t, 8> RegToField = {
        255, 0, 1, 2, 255, 255, 255, 3,
      };
      uint8_t Field = RegToField[ModRM.reg];
      if (Field == 255) {
        return false;
      }

      LocalOp = (Field << 3) | ModRM.rm;
      return NormalOp(&SecondModRMTableOps[LocalOp], LocalOp);
    } else {
      return NormalOp(&SecondInstGroupOps[LocalOp], LocalOp);
    }
  } else if (Info->Type == FEXCore::X86Tables::TYPE_X87_TABLE_PREFIX) {
    // We have ModRM
    uint8_t ModRMByte = ReadByte();
    DecodeInst->ModRM = ModRMByte;
    DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;

    uint16_t X87Op = ((Op - 0xD8) << 8) | ModRMByte;
    return NormalOp(&(*X87Table)[X87Op], X87Op);
  } else if (Info->Type == FEXCore::X86Tables::TYPE_VEX_TABLE_PREFIX) {
    if (!VEXTable) {
      // AVX not enabled.
      return false;
    }

    uint16_t map_select = 1;
    uint16_t pp = 0;
    const uint8_t Byte1 = ReadByte();
    DecodedHeader options {};

    if ((Byte1 & 0b10000000) == 0) {
      if (!BlockInfo.Is64BitMode) {
        return false;
      }

      DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_R;
    }

    if (Op == 0xC5) { // Two byte VEX
      pp = Byte1 & 0b11;
      const uint8_t vvvv = ((Byte1 & 0b01111000) >> 3);
      if (!BlockInfo.Is64BitMode && vvvv <= 0b0111) {
        // Invalid on 32-bit, can't use the high registers.
        return false;
      }
      options.vvvv = 15 - vvvv;
      options.L = (Byte1 & 0b100) != 0;
    } else { // 0xC4 = Three byte VEX
      const uint8_t Byte2 = ReadByte();
      pp = Byte2 & 0b11;
      map_select = Byte1 & 0b11111;
      const uint8_t vvvv = ((Byte2 & 0b01111000) >> 3);
      if (!BlockInfo.Is64BitMode && vvvv <= 0b0111) {
        // Invalid on 32-bit, can't use the high registers.
        return false;
      }
      options.vvvv = 15 - vvvv;
      options.w = (Byte2 & 0b10000000) != 0;
      options.L = (Byte2 & 0b100) != 0;
      if ((Byte1 & 0b01000000) == 0) {
        if (!BlockInfo.Is64BitMode) {
          return false;
        }
        DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_X;
      }
      if (BlockInfo.Is64BitMode && (Byte1 & 0b00100000) == 0) {
        DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_B;
      }
      if (options.w) {
        DecodeInst->Flags |= DecodeFlags::FLAG_OPTION_AVX_W;
      }
      if (!(map_select >= 1 && map_select <= 3)) {
        return false;
      }
    }

    uint16_t VEXOp = ReadByte();
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
    Op = OPD(map_select, pp, VEXOp);
#undef OPD

    const FEXCore::X86Tables::X86InstInfo* LocalInfo = &(*VEXTable)[Op];

    if (LocalInfo->Type >= FEXCore::X86Tables::TYPE_VEX_GROUP_12 && LocalInfo->Type <= FEXCore::X86Tables::TYPE_VEX_GROUP_17) {
      // We have ModRM
      uint8_t ModRMByte = ReadByte();
      DecodeInst->ModRM = ModRMByte;
      DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;

      FEXCore::X86Tables::ModRMDecoded ModRM;
      ModRM.Hex = DecodeInst->ModRM;

#define OPD(group, pp, opcode) (((group - TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode))
      Op = OPD(LocalInfo->Type, pp, ModRM.reg);
#undef OPD
      return NormalOp(&(*VEXTableGroup)[Op], Op, options);
    } else {
      return NormalOp(LocalInfo, Op, options);
    }
  } else if (Info->Type == FEXCore::X86Tables::TYPE_GROUP_EVEX) {
    FEXCORE_TELEMETRY_SET(TYPE_USES_EVEX_OPS, 1);
    // EVEX unsupported
    return false;
  }

  LOGMAN_MSG_A_FMT("Invalid instruction decoding type");
  FEX_UNREACHABLE;
}

bool Decoder::DecodeInstructionImpl(uint64_t PC) {
  InstructionSize = 0;
  LastEscapePrefix = 0;
  Instruction.fill(0);

  DecodeInst = &DecodedBuffer[DecodedSize];
  memset(DecodeInst, 0, sizeof(DecodedInst));
  DecodeInst->PC = PC;

  for (;;) {
    if (InstructionSize >= MAX_INST_SIZE) {
      return false;
    }
    uint8_t Op = ReadByte();
    switch (Op) {
    case 0x0F: { // Escape Op
      uint8_t EscapeOp = ReadByte();
      switch (EscapeOp) {
      case 0x0F:
        [[unlikely]] { // 3DNow!
          DecodeREXIfValid(-2);
          // 3DNow! Instruction Encoding: 0F 0F [ModRM] [SIB] [Displacement] [Opcode]
          // Decode ModRM
          uint8_t ModRMByte = ReadByte();
          DecodeInst->ModRM = ModRMByte;
          DecodeInst->Flags |= DecodeFlags::FLAG_DECODED_MODRM;

          FEXCore::X86Tables::ModRMDecoded ModRM;
          ModRM.Hex = DecodeInst->ModRM;

          const bool Has16BitAddressing = !BlockInfo.Is64BitMode && DecodeInst->Flags & DecodeFlags::FLAG_ADDRESS_SIZE;

          // All 3DNow! instructions have the second argument as the rm handler
          // We need to decode it upfront to get the displacement out of the way
          if (ModRM.mod != 0b11) {
            auto Disp = DecodeModRMs_Disp[Has16BitAddressing];
            (this->*Disp)(&DecodeInst->Src[0], ModRM);
          }

          // Take a peek at the op just past the displacement
          uint8_t LocalOp = ReadByte();
          return NormalOpHeader(&FEXCore::X86Tables::DDDNowOps[LocalOp], LocalOp);
          break;
        }
      case 0x38: { // F38 Table!
        DecodeREXIfValid(-2);
        constexpr uint16_t PF_38_NONE = 0;
        constexpr uint16_t PF_38_66 = (1U << 0);
        constexpr uint16_t PF_38_F2 = (1U << 1);
        constexpr uint16_t PF_38_F3 = (1U << 2);

        uint16_t Prefix = PF_38_NONE;
        if (DecodeInst->Flags & DecodeFlags::FLAG_OPERAND_SIZE) {
          Prefix |= PF_38_66;
        }
        if (DecodeInst->Flags & DecodeFlags::FLAG_REPNE_PREFIX) {
          Prefix |= PF_38_F2;
        }
        if (DecodeInst->Flags & DecodeFlags::FLAG_REP_PREFIX) {
          Prefix |= PF_38_F3;
        }

        uint16_t LocalOp = (Prefix << 8) | ReadByte();

        bool NoOverlay66 = (FEXCore::X86Tables::H0F38TableOps[LocalOp].Flags & InstFlags::FLAGS_NO_OVERLAY66) != 0;
        if (LastEscapePrefix == 0x66 && NoOverlay66) { // Operand Size
          // Remove prefix so it doesn't effect calculations.
          // This is only an escape prefix rather than modifier now
          DecodeInst->Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE;
          DecodeFlags::PopOpAddrIf(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST);
        }
        return NormalOpHeader(&FEXCore::X86Tables::H0F38TableOps[LocalOp], LocalOp);
        break;
      }
      case 0x3A: { // F3A Table!
        DecodeREXIfValid(-2);
        constexpr uint16_t PF_3A_NONE = 0;
        constexpr uint16_t PF_3A_66 = (1 << 0);
        constexpr uint16_t PF_3A_REX = (1 << 1);

        uint16_t Prefix = PF_3A_NONE;
        if (LastEscapePrefix == 0x66) { // Operand Size
          Prefix = PF_3A_66;
        }

        if (DecodeInst->Flags & DecodeFlags::FLAG_REX_WIDENING) {
          Prefix |= PF_3A_REX;
        }

        uint16_t LocalOp = (Prefix << 8) | ReadByte();
        return NormalOpHeader(&FEXCore::X86Tables::H0F3ATableOps[LocalOp], LocalOp);
        break;
      }
      default:
        [[likely]] { // Two byte table!
          // x86-64 abuses three legacy prefixes to extend the table encodings
          // 0x66 - Operand Size prefix
          // 0xF2 - REPNE prefix
          // 0xF3 - REP prefix
          // If any of these three prefixes are used then it falls down the subtable
          // Additionally: If you hit repeat of differnt prefixes then only the LAST one before this one works for subtable selection

          bool NoOverlay = (FEXCore::X86Tables::SecondBaseOps[EscapeOp].Flags & InstFlags::FLAGS_NO_OVERLAY) != 0;
          bool NoOverlay66 = (FEXCore::X86Tables::SecondBaseOps[EscapeOp].Flags & InstFlags::FLAGS_NO_OVERLAY66) != 0;

          DecodeREXIfValid(-2);
          if (NoOverlay) { // This section of the table ignores prefix extention
            return NormalOpHeader(&FEXCore::X86Tables::SecondBaseOps[EscapeOp], EscapeOp);
          } else if (LastEscapePrefix == 0xF3) { // REP
            // Remove prefix so it doesn't effect calculations.
            // This is only an escape prefix rather tan modifier now
            DecodeInst->Flags &= ~DecodeFlags::FLAG_REP_PREFIX;
            return NormalOpHeader(&FEXCore::X86Tables::RepModOps[EscapeOp], EscapeOp);
          } else if (LastEscapePrefix == 0xF2) { // REPNE
            // Remove prefix so it doesn't effect calculations.
            // This is only an escape prefix rather tan modifier now
            DecodeInst->Flags &= ~DecodeFlags::FLAG_REPNE_PREFIX;
            return NormalOpHeader(&FEXCore::X86Tables::RepNEModOps[EscapeOp], EscapeOp);
          } else if (LastEscapePrefix == 0x66 && !NoOverlay66) { // Operand Size
            // Remove prefix so it doesn't effect calculations.
            // This is only an escape prefix rather tan modifier now
            DecodeInst->Flags &= ~DecodeFlags::FLAG_OPERAND_SIZE;
            DecodeFlags::PopOpAddrIf(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST);
            return NormalOpHeader(&FEXCore::X86Tables::OpSizeModOps[EscapeOp], EscapeOp);
          } else {
            return NormalOpHeader(&FEXCore::X86Tables::SecondBaseOps[EscapeOp], EscapeOp);
          }
          break;
        }
      }
      break;
    }
    case 0x66: // Operand Size prefix
      DecodeInst->Flags |= DecodeFlags::FLAG_OPERAND_SIZE;
      LastEscapePrefix = Op;
      DecodeFlags::PushOpAddr(&DecodeInst->Flags, DecodeFlags::FLAG_OPERAND_SIZE_LAST);
      break;
    case 0x67: // Address Size override prefix
      DecodeInst->Flags |= DecodeFlags::FLAG_ADDRESS_SIZE;
      break;
    case 0x26: // ES legacy prefix
      if (!BlockInfo.Is64BitMode) {
        DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_ES_PREFIX;
      }
      break;
    case 0x2E: // CS legacy prefix
      if (!BlockInfo.Is64BitMode) {
        DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_CS_PREFIX;
      }
      break;
    case 0x36: // SS legacy prefix
      if (!BlockInfo.Is64BitMode) {
        DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_SS_PREFIX;
      }
      break;
    case 0x3E: // DS legacy prefix
      if (!BlockInfo.Is64BitMode) {
        DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_DS_PREFIX;
      }
      break;
    case 0xF0: // LOCK prefix
      DecodeInst->Flags |= DecodeFlags::FLAG_LOCK;
      break;
    case 0xF2: // REPNE prefix
      DecodeInst->Flags |= DecodeFlags::FLAG_REPNE_PREFIX;
      LastEscapePrefix = Op;
      break;
    case 0xF3: // REP prefix
      DecodeInst->Flags |= DecodeFlags::FLAG_REP_PREFIX;
      LastEscapePrefix = Op;
      break;
    case 0x64: // FS prefix
      DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_FS_PREFIX;
      break;
    case 0x65: // GS prefix
      DecodeInst->Flags = (DecodeInst->Flags & ~FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS) | DecodeFlags::FLAG_GS_PREFIX;
      break;
    default:
      [[likely]] { // Default base table
        const X86InstInfo* Info = &FEXCore::X86Tables::BaseOps[Op];
        if (Info->Type == FEXCore::X86Tables::TYPE_ARCH_DISPATCHER) {
          Info = &Info->OpcodeDispatcher.Indirect[BlockInfo.Is64BitMode ? 1 : 0];
        }

        if (Info->Type == FEXCore::X86Tables::TYPE_REX_PREFIX) {
          DecodeInst->REXIndex = InstructionSize;
        } else {
          DecodeREXIfValid();
          return NormalOpHeader(Info, Op);
        }

        break;
      }
    }
  }

  if (DecodeInst->Dest.IsGPR()) {
    return false;
  }

  return true;
}

void Decoder::DecodeREXIfValid(int8_t ExpectedOffset) {
  LOGMAN_THROW_A_FMT(ExpectedOffset < 0, "Expecting an negative offset for the REX offset!");
  const int8_t REXIndex = InstructionSize + ExpectedOffset;

  if (DecodeInst->REXIndex != 0 && DecodeInst->REXIndex == REXIndex) {
    const uint8_t Op = Instruction[REXIndex - 1];
    DecodeInst->Flags |= DecodeFlags::FLAG_REX_PREFIX;

    // Widening displacement
    if (Op & 0b1000) {
      DecodeInst->Flags |= DecodeFlags::FLAG_REX_WIDENING;
      DecodeFlags::PushOpAddr(&DecodeInst->Flags, DecodeFlags::FLAG_WIDENING_SIZE_LAST);
    }

    // XGPR_B bit set
    if (Op & 0b0001) {
      DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_B;
    }

    // XGPR_X bit set
    if (Op & 0b0010) {
      DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_X;
    }

    // XGPR_R bit set
    if (Op & 0b0100) {
      DecodeInst->Flags |= DecodeFlags::FLAG_REX_XGPR_R;
    }
  }
}

Decoder::DecodedBlockStatus Decoder::DecodeInstruction(uint64_t PC) {
  // Will be set if DecodeInstructionImpl tries to read non-executable memory
  HitNonExecutableRange = false;
  HitBadRelocation = false;
  bool ErrorDuringDecoding = !DecodeInstructionImpl(PC);

  if (ErrorDuringDecoding || HitNonExecutableRange || HitBadRelocation) [[unlikely]] {
    // Put an invalid instruction in the stream so the core can raise SIGILL if hit
    // Error while decoding instruction. We don't know the table or instruction size
    DecodeInst->TableInfo = nullptr;
    auto Result = ErrorDuringDecoding   ? DecodedBlockStatus::INVALID_INST :
                  DecodeInst->InstSize  ? DecodedBlockStatus::PARTIAL_DECODE_INST :
                  HitNonExecutableRange ? DecodedBlockStatus::NOEXEC_INST :
                                          DecodedBlockStatus::BAD_RELOCATION;
    DecodeInst->InstSize = 0;
    return Result;
  } else if (!DecodeInst->TableInfo || (DecodeInst->TableInfo->Type == TYPE_INST && !DecodeInst->TableInfo->OpcodeDispatcher.OpDispatch)) {
    // If there wasn't an error during decoding but we have no dispatcher for the instruction then claim invalid instruction.
    return DecodedBlockStatus::INVALID_INST;
  }

  if (CTX->AreMonoHacksActive()) {
    // Unity uses a standard SPSC ringbuffer with cached read/write pointers and thread waiting flags at the following
    // offsets, which are consistent between 32-bit and 64-bit Unity versions from 2015 onwards.
    auto IsKnownAtomicDisplacement = [](uint64_t Displacement) {
      return Displacement == 0x80 || Displacement == 0x84 || Displacement == 0xC0 || Displacement == 0xC4;
    };

    if (DecodeInst->OP == 0x8b && DecodeInst->Src[0].IsGPRIndirect() &&
        IsKnownAtomicDisplacement(DecodeInst->Src[0].Data.GPRIndirect.Displacement)) {
      DecodeInst->Flags |= X86Tables::DecodeFlags::FLAG_FORCE_TSO;
    }
    if (DecodeInst->OP == 0x89 && DecodeInst->Dest.IsGPRIndirect() && IsKnownAtomicDisplacement(DecodeInst->Dest.Data.GPRIndirect.Displacement)) {
      DecodeInst->Flags |= X86Tables::DecodeFlags::FLAG_FORCE_TSO;
    }
  }

  return DecodedBlockStatus::SUCCESS;
}

void Decoder::BranchTargetInMultiblockRange() {
  if (!CTX->Config.Multiblock) {
    return;
  }

  // If the RIP setting is conditional AND within our symbol range then it can be considered for multiblock
  uint64_t TargetRIP = 0;
  const auto GPRSize = GetGPROpSize();
  bool Conditional = true;
  const auto InstEnd = DecodeInst->PC + DecodeInst->InstSize;

  if (DecodeInst->TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_CALL) {
    if (ExecutableRangeWritable && CTX->AreMonoHacksActive()) {
      // Mono generated code often contains noreturn calls with garbage following them, and calls are always backpatched
      // after CIL compilation leading to n recompiles for a multiblock with n calls. Choose to minimize stutters over
      // raw performance and disable tracking past calls for mono generated code.
      return;
    }

    AddBranchTarget(InstEnd);
    BlockInfo.EntryPoints.emplace(InstEnd);
    return;
  }

  // Calls are handled above
  switch (DecodeInst->OP) {
  case 0x70 ... 0x7F:   // Conditional JUMP
  case 0x80 ... 0x8F: { // More conditional
    // Source is a literal
    // auto RIPOffset = LoadSource(Op, Op->Src[0], Op->Flags);
    // auto RIPTargetConst = Constant(Op->PC + Op->InstSize);
    // Target offset is PC + InstSize + Literal
    TargetRIP = InstEnd + DecodeInst->Src[0].Literal();
    break;
  }
  case 0xE9:
  case 0xEB: // Both are unconditional JMP instructions
    TargetRIP = InstEnd + DecodeInst->Src[0].Literal();
    Conditional = false;
    break;
  case 0xC2: // RET imm
  case 0xC3: // RET
  default: return; break;
  }

  if (GPRSize == IR::OpSize::i32Bit) {
    // If we are running a 32bit guest then wrap around addresses that go above 32bit
    TargetRIP &= 0xFFFFFFFFU;
  }

  if (Conditional) {
    // If we are conditional then a target can be the instruction past the conditional instruction
    AddBranchTarget(InstEnd);
  }

  // If the target RIP is x86 code within the symbol ranges then we are golden
  // Forbid distant branches to have the cost code better match the guest code layout, avoiding massive (range-wise) code
  // blocks in highly fragmented guest code. Such branches are often not-taken branches to garbage in obfuscated code.
  constexpr uint64_t MAX_FORWARD_BRANCH_DIST = FEXCore::Utils::FEX_PAGE_SIZE * 4;
  bool ValidMultiblockMember = TargetRIP >= EntryPoint && TargetRIP < std::min(InstEnd + MAX_FORWARD_BRANCH_DIST, SectionMaxAddress);

#ifdef ARCHITECTURE_arm64ec
  ValidMultiblockMember = ValidMultiblockMember && !RtlIsEcCode(TargetRIP);
#endif

  if (ValidMultiblockMember) {
    // Update our conditional branch ranges before we return
    if (Conditional) {
      MaxCondBranchForward = std::max(MaxCondBranchForward, TargetRIP);
      MaxCondBranchBackwards = std::min(MaxCondBranchBackwards, TargetRIP);
    }

    AddBranchTarget(TargetRIP);
  } else {
    if (ExternalBranches) {
      ExternalBranches->insert(TargetRIP);
    }
  }
}

bool Decoder::IsBranchMonoTailcall(uint64_t NumInstructions) const {
  // While the mono call backpatching block can easily be detected due it being the only one to contain SMC-faulting
  // atomics, that can't be said for the tailcall jump backpatcher which has changed several times across versions and
  // can be partially inlined. To work around this, instead detect the tailcall site itself and force full non-signal-based
  // SMC detection for that single block.
  if (!ExecutableRangeWritable) {
    // We only care about jitted code
    return false;
  }

  // See mini-{amd64,x86}.c in the mono codebase, specifically where METHOD_JUMP patches are emitted.
  if (GetGPROpSize() == IR::OpSize::i32Bit) {
    // Matches:
    // LEAVE
    // <none> / NOP / MOV EAX, EAX / LEA EBP, [EBP+0]
    // JMP imm32
    if (DecodeInst->OP != 0xE9 || NumInstructions < 2) {
      return false;
    }

    auto PrevInst = std::prev(DecodeInst);
    if (PrevInst->OP == 0xC9) {
      return true;
    }

    if (NumInstructions < 3 || std::prev(PrevInst)->OP != 0xC9) {
      return false;
    }

    return PrevInst->OP == 0x90 || (PrevInst->OP == 0x8B && PrevInst->ModRM == 0xC0) ||
           (PrevInst->OP == 0x8D && PrevInst->ModRM == 0x6D && PrevInst->Src[1].IsLiteral() && PrevInst->Src[1].Literal() == 0);
  } else {
    FEXCore::X86Tables::ModRMDecoded ModRM;
    ModRM.Hex = DecodeInst->ModRM;
    if (DecodeInst->OPRaw == 0xFF && ModRM.reg == 4 && DecodeInst->Src[0].IsGPR()) {
      if (DecodeInst->Src[0].Data.GPR.GPR == FEXCore::X86State::REG_RAX) {
        // Found in versions of mono from 2024 onwards - matches:
        // REX.W JMP rax
        return (DecodeInst->Flags & (DecodeFlags::FLAG_REX_PREFIX | DecodeFlags::FLAG_REX_WIDENING | DecodeFlags::FLAG_REX_XGPR_B |
                                     DecodeFlags::FLAG_REX_XGPR_X | DecodeFlags::FLAG_REX_XGPR_R)) ==
               (DecodeFlags::FLAG_REX_PREFIX | DecodeFlags::FLAG_REX_WIDENING);
      } else if (NumInstructions > 1 && DecodeInst->Src[0].Data.GPR.GPR == FEXCore::X86State::REG_R11) {
        // Found in older versions of mono - match:
        // MOV r11, imm64
        // JMP r11
        auto PrevInst = std::prev(DecodeInst);
        return PrevInst->OP == 0xBB && PrevInst->Dest.IsGPR() && PrevInst->Dest.Data.GPR.GPR == FEXCore::X86State::REG_R11;
      }
    }
  }

  return false;
}

bool Decoder::InstCanContinue() const {
  if (DecodeInst->PC + DecodeInst->InstSize == NextBlockStartAddress) {
    return false;
  }

  if (!(DecodeInst->TableInfo->Flags & (FEXCore::X86Tables::InstFlags::FLAGS_BLOCK_END | FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP))) {
    return true;
  }

  uint64_t TargetRIP = 0;
  const auto GPRSize = GetGPROpSize();

  if (DecodeInst->OP == 0xE8) { // Call - immediate target
    const uint64_t NextRIP = DecodeInst->PC + DecodeInst->InstSize;
    TargetRIP = DecodeInst->PC + DecodeInst->InstSize + DecodeInst->Src[0].Literal();

    if (GPRSize == IR::OpSize::i32Bit) {
      // If we are running a 32bit guest then wrap around addresses that go above 32bit
      TargetRIP &= 0xFFFFFFFFU;
    }

    if (TargetRIP == NextRIP) {
      // Optimize the case that the instruction is jumping just after itself.
      // This is a GOT calculation which we can optimize out.
      // Optimization occurs inside of the OpDispatcher implementation
      return true;
    }
  }

  return false;
}

void Decoder::AddBranchTarget(uint64_t Target) {
  if (VisitedBlocks.contains(Target)) {
    return;
  }

  auto BlockSuccIt = std::lower_bound(BlockInfo.Blocks.begin(), BlockInfo.Blocks.end(), Target,
                                      [](const auto& a, uint64_t Address) { return a.Entry < Address; });

  LOGMAN_THROW_A_FMT(BlockSuccIt == BlockInfo.Blocks.end() || BlockSuccIt->Entry != Target, "unexpected");

  if (BlockSuccIt != BlockInfo.Blocks.begin()) {
    auto BlockIt = std::prev(BlockSuccIt);
    if (BlockIt->Entry + BlockIt->Size > Target) {
      uint64_t SplitIdx = 0;
      uint64_t SplitAddr = BlockIt->Entry;
      // Find the instruction boundary of the split
      for (; SplitIdx < BlockIt->NumInstructions && SplitAddr < Target; SplitIdx++) {
        SplitAddr += BlockIt->DecodedInstructions[SplitIdx].InstSize;
      }
      uint64_t SplitOffset = SplitAddr - BlockIt->Entry;

      LOGMAN_THROW_A_FMT(SplitIdx != 0, "unexpected");

      if (SplitAddr == Target) {
        // Split at the boundary
        DecodedBlocks SplitBlock {
          .Entry = SplitAddr,
          .Size = BlockIt->Size - SplitOffset,
          .NumInstructions = BlockIt->NumInstructions - SplitIdx,
          .DecodedInstructions = BlockIt->DecodedInstructions + SplitIdx,
          .BlockStatus = BlockIt->BlockStatus,
        };

        BlockIt->Size = SplitOffset;
        BlockIt->NumInstructions = SplitIdx;

        BlockInfo.Blocks.insert(BlockSuccIt, SplitBlock);
      } // else misaligned, leave as a branch out of the block

      // If we split a block then the target has already been visited as part of that, if it was
      // misaligned the jump will just leave the multiblock, mark it as visited to avoid running
      // this code path again and just bail out early.
      VisitedBlocks.insert(Target);
      return;
    }
  }

  CurrentBlockTargets.insert(Target);
  if (Target >= DecodeInst->PC + DecodeInst->InstSize && Target < NextBlockStartAddress) {
    NextBlockStartAddress = Target;
  }
}

const uint8_t* Decoder::AdjustAddrForSpecialRegion(const uint8_t* _InstStream, uint64_t EntryPoint, uint64_t RIP) {
  constexpr uint64_t VSyscall_Base = 0xFFFF'FFFF'FF60'0000ULL;
  constexpr uint64_t VSyscall_End = VSyscall_Base + 0x1000;

  if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX64 && RIP >= VSyscall_Base && RIP < VSyscall_End) {
    // VSyscall
    // This doesn't exist on AArch64 and on x86_64 hosts this is emulated with faults to a region mapped with --xp permissions
    // Offset     0: vgettimeofday
    // Offset 0x400: vtime
    // Offset 0x800: vgetcpu
    uint64_t Offset = RIP - VSyscall_Base;
    return VSyscallData + Offset;
  }

  return _InstStream - EntryPoint + RIP;
}

bool Decoder::CheckIfCacheable(FEXCore::Core::InternalThreadState& Thread, const uint8_t* InstStream, uint64_t PC, uint64_t MaxInst) {
  DecodeInstructionsAtEntry(&Thread, InstStream, PC, MaxInst);
  bool Uncacheable = HitBadRelocation;
  DelayedDisownBuffer();
  return !Uncacheable;
}

void Decoder::DecodeInstructionsAtEntry(FEXCore::Core::InternalThreadState* Thread, const uint8_t* _InstStream, uint64_t PC, uint64_t MaxInst) {
  FEXCORE_PROFILE_SCOPED("DecodeInstructions");
  BlockInfo.TotalInstructionCount = 0;
  BlockInfo.Blocks.clear();
  VisitedBlocks.clear();
  // Reset internal state management
  DecodedSize = 0;
  MaxCondBranchForward = 0;
  MaxCondBranchBackwards = ~0ULL;
  DecodedBuffer = PoolObject.ReownOrClaimBuffer();

  // Decode operating mode from thread's CS segment.
  const auto CSSegment = Core::CPUState::GetSegmentFromIndex(Thread->CurrentFrame->State, Thread->CurrentFrame->State.cs_idx);
  BlockInfo.Is64BitMode = CSSegment->L == 1;
  LOGMAN_THROW_A_FMT(BlockInfo.Is64BitMode == CTX->Config.Is64BitMode, "Expected operating mode to not change at runtime!");

  EntryPoint = PC;
  BlockInfo.EntryPoints = {PC};
  InstStream = _InstStream;

  uint64_t TotalInstructions {};

  SectionMinAddress = 0;
  SectionMaxAddress = ~0ULL;
  Relocations = nullptr;

  if (CTX->GetCodeCache().IsGeneratingCache || EnableCodeCacheValidation) {
    // If generating cache, attempt to load section bounds and relocations
    if (auto SectionInfo = CTX->SyscallHandler->LookupExecutableFileSection(Thread, EntryPoint)) {
      SectionMinAddress = SectionInfo->FileStartVA;
      SectionMaxAddress = SectionInfo->EndVA;
      Relocations = &SectionInfo->FileInfo.Relocations;
    }
  }

  DecodedMinAddress = EntryPoint;
  DecodedMaxAddress = EntryPoint;

  // Entry is a jump target
  BlocksToDecode = {PC};

  uint64_t CurrentCodePage = PC & FEXCore::Utils::FEX_PAGE_MASK;

  BlockInfo.CodePages = {CurrentCodePage};

  if (MaxInst == 0) {
    MaxInst = CTX->Config.MaxInstPerBlock;
  }

  bool EntryBlock {true};
  bool FinalInstruction {false};

  while (!FinalInstruction && !BlocksToDecode.empty()) {
    auto BlockDecodeIt = BlocksToDecode.begin();
    uint64_t RIPToDecode = *BlockDecodeIt;
    BlocksToDecode.erase(BlockDecodeIt);
    VisitedBlocks.emplace(RIPToDecode);

    auto BlockSuccIt = std::lower_bound(BlockInfo.Blocks.begin(), BlockInfo.Blocks.end(), RIPToDecode,
                                        [](const auto& a, uint64_t Address) { return a.Entry < Address; });

    LOGMAN_THROW_A_FMT(BlockSuccIt == BlockInfo.Blocks.end() || BlockSuccIt->Entry != RIPToDecode, "unexpected");

    NextBlockStartAddress = ~0ULL;
    if (!BlocksToDecode.empty()) {
      // We just erased the lowest, the front is then the second lowest
      NextBlockStartAddress = *BlocksToDecode.begin();
    }
    if (BlockSuccIt != BlockInfo.Blocks.end() && BlockSuccIt->Entry < NextBlockStartAddress) {
      NextBlockStartAddress = BlockSuccIt->Entry;
    }
    LOGMAN_THROW_A_FMT(NextBlockStartAddress > RIPToDecode, "unexpected");

    // Insert the block now so it can be looked up and split if necessary on a backward edge
    auto BlockIt = BlockInfo.Blocks.emplace(BlockSuccIt);

    BlockIt->Entry = RIPToDecode;
    BlockIt->Size = 0;
    BlockIt->IsEntryPoint = EntryBlock;

    uint64_t PCOffset = 0;
    uint64_t BlockStartOffset = DecodedSize;
    bool EraseBlock = true; // Unset once the block contains an instruction

    BlockIt->DecodedInstructions = &DecodedBuffer[BlockStartOffset];
    BlockIt->NumInstructions = 0;

    // Do a bit of pointer math to figure out where we are in code
    InstStream = AdjustAddrForSpecialRegion(_InstStream, EntryPoint, RIPToDecode);

    while (1) {
      InstructionSize = 0;

      // MAX_INST_SIZE assumes worst case
      auto OpAddress = RIPToDecode + PCOffset;
      auto OpMaxAddress = OpAddress + MAX_INST_SIZE;

      auto OpMinPage = OpAddress & FEXCore::Utils::FEX_PAGE_MASK;
      auto OpMaxPage = OpMaxAddress & FEXCore::Utils::FEX_PAGE_MASK;

      if (!EntryBlock && OpMinPage == OpMaxPage && PeekByte(0).value_or(0) == 0 && PeekByte(1).value_or(0) == 0) [[unlikely]] {
        // End the multiblock early if we hit 2 consecutive null bytes (add [rax], al) in the same page with the
        // assumption we are most likely trying to explore garbage code.
        break;
      }

      if (OpMinPage != CurrentCodePage) {
        CurrentCodePage = OpMinPage;
        BlockInfo.CodePages.insert(CurrentCodePage);
      }

      if (OpMaxPage != CurrentCodePage) {
        CurrentCodePage = OpMaxPage;
        BlockInfo.CodePages.insert(CurrentCodePage);
      }

      BlockIt->BlockStatus = DecodeInstruction(OpAddress);
      if (HitBadRelocation) {
        BlockInfo.TotalInstructionCount = 0;
        BlockInfo.Blocks = {*BlockIt};
        BlockInfo.EntryPoints.clear();
        BlockInfo.CodePages.clear();
        return;
      }
      uint64_t OpEndAddress = OpAddress + DecodeInst->InstSize;

      DecodedMinAddress = std::min(DecodedMinAddress, OpAddress);
      DecodedMaxAddress = std::max(DecodedMaxAddress, OpEndAddress);

      if (OpEndAddress > NextBlockStartAddress) {
        // This instruction would overlap with another so skip adding it to the multiblock
        break;
      }

      EraseBlock = false; // Block contains at least one valid instruction, so unset erase
      ++TotalInstructions;
      ++DecodedSize;
      ++BlockIt->NumInstructions;
      BlockIt->Size += DecodeInst->InstSize;

      // Can not continue this block at all on invalid instruction
      if (BlockIt->BlockStatus != DecodedBlockStatus::SUCCESS) [[unlikely]] {
        if (!EntryBlock && BlockIt->BlockStatus != DecodedBlockStatus::BAD_RELOCATION) {
          // In multiblock configurations, we can early terminate any non-entrypoint blocks with the expectation that this won't get hit.
          // Improves compile-times.
          // Just need to undo additions that this block decoding has caused.
          TotalInstructions -= BlockIt->NumInstructions;
          DecodedSize = BlockStartOffset;
          InstStream -= PCOffset;
          EraseBlock = true;
        } else {
          LogMan::Msg::EFmt("{} instruction in entry block: {:X}",
                            BlockIt->BlockStatus == DecodedBlockStatus::INVALID_INST   ? "Invalid" :
                            BlockIt->BlockStatus == DecodedBlockStatus::NOEXEC_INST    ? "NoExec" :
                            BlockIt->BlockStatus == DecodedBlockStatus::BAD_RELOCATION ? "BadRelocation" :
                                                                                         "PartialDecode",
                            OpAddress);
        }
        break;
      }

      // Check if we need to end the entire multiblock
      FinalInstruction = DecodedSize >= MaxInst || DecodedSize >= DefaultDecodedBufferSize || TotalInstructions >= MaxInst;
      if (FinalInstruction) {
        break;
      }

      if (!InstCanContinue()) {
        if (DecodeInst->TableInfo->Flags & FEXCore::X86Tables::InstFlags::FLAGS_SETS_RIP) {
          // If we have multiblock enabled
          // If the branch target is within our multiblock range then we can keep going on
          // We don't want to short circuit this since we want to calculate our ranges still
          // NOTE: This will invalidate BlockIt, this is fine as we immediately break from the loop and EraseBlock cannot be true
          BlockIt->ForceFullSMCDetection = CTX->AreMonoHacksActive() && IsBranchMonoTailcall(BlockIt->NumInstructions);
          BranchTargetInMultiblockRange();
        }

        break;
      }

      PCOffset += DecodeInst->InstSize;
      InstStream += DecodeInst->InstSize;
    }

    // NOTE: BlockIt is only valid here in the EraseBlock case
    if (EraseBlock) {
      BlockInfo.Blocks.erase(BlockIt);
    } else {
      BlocksToDecode.merge(CurrentBlockTargets);
    }

    CurrentBlockTargets.clear();
    EntryBlock = false;
  }

  BlockInfo.TotalInstructionCount = TotalInstructions;

  for (auto& Block : BlockInfo.Blocks) {
    Block.IsEntryPoint = BlockInfo.EntryPoints.contains(Block.Entry);
  }
}

} // namespace FEXCore::Frontend


================================================
FILE: FEXCore/Source/Interface/Core/Frontend.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/Utils/ThreadPoolAllocator.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/fextl/robin_map.h>

#include <array>
#include <cstddef>
#include <cstdint>
#include <optional>

namespace FEXCore::Context {
class ContextImpl;
}
namespace FEXCore::HLE {
enum class SyscallOSABI;
}

namespace FEXCore::Frontend {
class Decoder final {
public:
  enum class DecodedBlockStatus {
    SUCCESS,
    INVALID_INST,
    NOEXEC_INST,
    PARTIAL_DECODE_INST,
    BAD_RELOCATION,
  };

  // New Frontend decoding
  struct DecodedBlocks final {
    uint64_t Entry {};
    uint64_t Size {};
    uint64_t NumInstructions {};
    FEXCore::X86Tables::DecodedInst* DecodedInstructions;
    DecodedBlockStatus BlockStatus;
    bool IsEntryPoint {};
    bool ForceFullSMCDetection {};
  };

  struct DecodedBlockInformation final {
    uint64_t TotalInstructionCount;
    bool Is64BitMode {};
    fextl::vector<DecodedBlocks> Blocks;
    fextl::set<uint64_t> EntryPoints;
    fextl::set<uint64_t> CodePages; // Start addresses of all pages touching the block
  };

  Decoder(FEXCore::Core::InternalThreadState* Thread);
  bool CheckIfCacheable(FEXCore::Core::InternalThreadState&, const uint8_t* InstStream, uint64_t PC, uint64_t MaxInst);
  void DecodeInstructionsAtEntry(FEXCore::Core::InternalThreadState* Thread, const uint8_t* InstStream, uint64_t PC, uint64_t MaxInst);

  const DecodedBlockInformation* GetDecodedBlockInfo() const {
    return &BlockInfo;
  }

  uint64_t DecodedMinAddress {};
  uint64_t DecodedMaxAddress {~0ULL};

  void SetExternalBranches(fextl::set<uint64_t>* v) {
    ExternalBranches = v;
  }

  void DelayedDisownBuffer() {
    PoolObject.DelayedDisownBuffer();
  }

  void ResetExecutableRangeCache() {
    ExecutableRangeBase = ExecutableRangeEnd = 0;
  }

private:
  // To pass any information from instruction prefixes
  // down into the actual instruction handling machinery.
  struct DecodedHeader {
    uint8_t vvvv; // Encoded operand in a VEX prefix.
    bool w;       // VEX.W bit.
    bool L;       // VEX.L bit (if set then 256 bit operation, if unset then scalar or 128-bit operation)
  };

  FEXCore::Core::InternalThreadState* Thread;
  FEXCore::Context::ContextImpl* CTX;
  const FEXCore::HLE::SyscallOSABI OSABI {};

  FEX_CONFIG_OPT(EnableCodeCacheValidation, ENABLECODECACHEVALIDATION);

  bool DecodeInstructionImpl(uint64_t PC);
  DecodedBlockStatus DecodeInstruction(uint64_t PC);

  void BranchTargetInMultiblockRange();
  bool IsBranchMonoTailcall(uint64_t NumInstructions) const;
  bool InstCanContinue() const;

  void AddBranchTarget(uint64_t Target);

  bool CheckRangeExecutable(uint64_t Address, uint64_t Size);

  uint8_t ReadByte();
  std::optional<uint8_t> PeekByte(uint8_t Offset);
  std::pair<uint64_t, bool> ReadData(uint8_t Size);

  void SkipBytes(uint8_t Size) {
    InstructionSize += Size;
  }

  bool NormalOp(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op, DecodedHeader Options = {});
  bool NormalOpHeader(const FEXCore::X86Tables::X86InstInfo* Info, uint16_t Op);

  void DecodeREXIfValid(int8_t ExpectedOffset = -1);

  static constexpr size_t DefaultDecodedBufferSize = 0x10000;
  FEXCore::X86Tables::DecodedInst* DecodedBuffer {};
  Utils::PoolBufferWithTimedRetirement<FEXCore::X86Tables::DecodedInst*, 5000, 500> PoolObject;
  size_t DecodedSize {};

  uint64_t ExecutableRangeBase {};
  uint64_t ExecutableRangeEnd {};
  bool ExecutableRangeWritable {};
  bool HitNonExecutableRange {};
  bool HitBadRelocation {};

  const uint8_t* InstStream {};
  IR::OpSize GetGPROpSize() const {
    return BlockInfo.Is64BitMode ? IR::OpSize::i64Bit : IR::OpSize::i32Bit;
  }

  static constexpr size_t MAX_INST_SIZE = 15;
  uint8_t InstructionSize {};
  std::array<uint8_t, MAX_INST_SIZE> Instruction;
  uint8_t LastEscapePrefix {};
  FEXCore::X86Tables::DecodedInst* DecodeInst;

  // This is for multiblock data tracking
  uint64_t EntryPoint {};
  uint64_t MaxCondBranchForward {};
  uint64_t MaxCondBranchBackwards {~0ULL};
  uint64_t SectionMaxAddress {~0ULL};
  uint64_t SectionMinAddress {};
  uint64_t NextBlockStartAddress {~0ULL};

  DecodedBlockInformation BlockInfo;
  fextl::set<uint64_t> CurrentBlockTargets;
  fextl::set<uint64_t> BlocksToDecode;
  fextl::set<uint64_t> VisitedBlocks;
  fextl::set<uint64_t>* ExternalBranches {nullptr};

  const fextl::robin_map<uint32_t, GuestRelocationType>* Relocations {nullptr};

  // ModRM rm decoding
  using DecodeModRMPtr = void (FEXCore::Frontend::Decoder::*)(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM);
  void DecodeModRM_16(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM);
  void DecodeModRM_64(X86Tables::DecodedOperand* Operand, X86Tables::ModRMDecoded ModRM);

  static constexpr std::array<DecodeModRMPtr, 2> DecodeModRMs_Disp {
    &FEXCore::Frontend::Decoder::DecodeModRM_64,
    &FEXCore::Frontend::Decoder::DecodeModRM_16,
  };

  const std::array<X86Tables::X86InstInfo, X86Tables::MAX_X87_TABLE_SIZE>* X87Table;

  const std::array<X86Tables::X86InstInfo, X86Tables::MAX_VEX_TABLE_SIZE>* VEXTable {};
  const std::array<X86Tables::X86InstInfo, X86Tables::MAX_VEX_GROUP_TABLE_SIZE>* VEXTableGroup {};

  const uint8_t* AdjustAddrForSpecialRegion(const uint8_t* _InstStream, uint64_t EntryPoint, uint64_t RIP);
};
} // namespace FEXCore::Frontend


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Common/SoftFloat.h"

#include "Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/SHMStats.h>

namespace FEXCore::CPU {
FEXCORE_PRESERVE_ALL_ATTR static softfloat_state SoftFloatStateFromFCW(uint16_t FCW, bool Force80BitPrecision = false) {
  softfloat_state State {};
  State.detectTininess = softfloat_tininess_afterRounding;
  State.exceptionFlags = 0;
  State.roundingPrecision = 80;

  if (!Force80BitPrecision) {
    auto PC = (FCW >> 8) & 3;
    switch (PC) {
    case 0: State.roundingPrecision = 32; break;
    case 2: State.roundingPrecision = 64; break;
    case 3: State.roundingPrecision = 80; break;
    case 1: LOGMAN_MSG_A_FMT("Invalid x87 precision mode, {}", PC);
    }
  }

  auto RC = (FCW >> 10) & 3;
  switch (RC) {
  case 0: State.roundingMode = softfloat_round_near_even; break;
  case 1: State.roundingMode = softfloat_round_min; break;
  case 2: State.roundingMode = softfloat_round_max; break;
  case 3: State.roundingMode = softfloat_round_minMag; break;
  }

  return State;
}

FEXCORE_PRESERVE_ALL_ATTR static void HandleX87Exception(const softfloat_state& State, FEXCore::Core::CpuStateFrame* Frame) {
  // Check for Invalid Operation exception (bit 0 of X87 status word)
  if (State.exceptionFlags & softfloat_flag_invalid) {
    Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = 1;
  }
}

// Wrapper for SoftFloat state to handle X87 exceptions
class ScopedSoftFloatState {
public:
  FEXCORE_PRESERVE_ALL_ATTR ScopedSoftFloatState(uint16_t FCW, FEXCore::Core::CpuStateFrame* Frame, bool Force80BitPrecision = false)
    : State(SoftFloatStateFromFCW(FCW, Force80BitPrecision))
    , Frame(Frame) {}

  FEXCORE_PRESERVE_ALL_ATTR ~ScopedSoftFloatState() {
    HandleX87Exception(State, Frame);
  }

  // Disable copy and move to ensure RAII semantics
  ScopedSoftFloatState(const ScopedSoftFloatState&) = delete;
  ScopedSoftFloatState& operator=(const ScopedSoftFloatState&) = delete;
  ScopedSoftFloatState(ScopedSoftFloatState&&) = delete;
  ScopedSoftFloatState& operator=(ScopedSoftFloatState&&) = delete;

  softfloat_state State;

private:
  FEXCore::Core::CpuStateFrame* Frame;
};

template<>
struct OpHandlers<IR::OP_F80CVTTO> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle4(uint16_t FCW, float src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(&State.State, src);
  }

  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle8(uint16_t FCW, double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(&State.State, src);
  }
};

template<>
struct OpHandlers<IR::OP_F80CMP> {
  FEXCORE_PRESERVE_ALL_ATTR static uint64_t handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};

    bool eq, lt, nan;
    uint64_t ResultFlags = 0;

    X80SoftFloat::FCMP(&State.State, Src1, Src2, &eq, &lt, &nan);
    if (lt) {
      ResultFlags |= (1 << IR::FCMP_FLAG_LT);
    }
    if (nan) {
      ResultFlags |= (1 << IR::FCMP_FLAG_UNORDERED);
    }
    if (eq) {
      ResultFlags |= (1 << IR::FCMP_FLAG_EQ);
    }
    return ResultFlags;
  }
};

template<>
struct OpHandlers<IR::OP_F80CVT> {
  FEXCORE_PRESERVE_ALL_ATTR static float handle4(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(src).ToF32(&State.State);
  }

  FEXCORE_PRESERVE_ALL_ATTR static double handle8(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(src).ToF64(&State.State);
  }
};

template<>
struct OpHandlers<IR::OP_F80CVTINT> {
  FEXCORE_PRESERVE_ALL_ATTR static int16_t handle2(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(src).ToI16(&State.State);
  }

  FEXCORE_PRESERVE_ALL_ATTR static int32_t handle4(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(src).ToI32(&State.State);
  }

  FEXCORE_PRESERVE_ALL_ATTR static int64_t handle8(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat(src).ToI64(&State.State);
  }

  FEXCORE_PRESERVE_ALL_ATTR static int16_t handle2t(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    auto rv = extF80_to_i32(&State.State, X80SoftFloat(src), softfloat_round_minMag, false);

    if (rv > INT16_MAX || rv < INT16_MIN) {
      ///< Indefinite value for 16-bit conversions.
      return INT16_MIN;
    } else {
      return rv;
    }
  }

  FEXCORE_PRESERVE_ALL_ATTR static int32_t handle4t(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return extF80_to_i32(&State.State, X80SoftFloat(src), softfloat_round_minMag, false);
  }

  FEXCORE_PRESERVE_ALL_ATTR static int64_t handle8t(uint16_t FCW, VectorRegType src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return extF80_to_i64(&State.State, X80SoftFloat(src), softfloat_round_minMag, false);
  }
};

template<>
struct OpHandlers<IR::OP_F80CVTTOINT> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle2(uint16_t FCW, int16_t src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return X80SoftFloat(src);
  }

  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle4(uint16_t FCW, int32_t src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return X80SoftFloat(src);
  }
};

template<>
struct OpHandlers<IR::OP_F80ROUND> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FRNDINT(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80F2XM1> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::F2XM1(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80TAN> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FTAN(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80SQRT> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat::FSQRT(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80SIN> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FSIN(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80COS> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FCOS(&State.State, Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80SINCOS> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegPairType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return FEXCore::MakeVectorRegPair(X80SoftFloat::FSIN(&State.State, Src1), X80SoftFloat::FCOS(&State.State, Src1));
  }
};

template<>
struct OpHandlers<IR::OP_F80XTRACT_EXP> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return X80SoftFloat::FXTRACT_EXP(Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80XTRACT_SIG> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return X80SoftFloat::FXTRACT_SIG(Src1);
  }
};

template<>
struct OpHandlers<IR::OP_F80ADD> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat::FADD(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80SUB> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat::FSUB(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80MUL> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat::FMUL(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80DIV> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame};
    return X80SoftFloat::FDIV(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80FYL2X> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FYL2X(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80ATAN> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FATAN(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80FPREM1> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FREM1(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80FPREM> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FREM(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F80SCALE> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1, VectorRegType Src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    ScopedSoftFloatState State {FCW, Frame, true};
    return X80SoftFloat::FSCALE(&State.State, Src1, Src2);
  }
};

template<>
struct OpHandlers<IR::OP_F64SIN> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return sin(src);
  }
};

template<>
struct OpHandlers<IR::OP_F64COS> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return cos(src);
  }
};

template<>
struct OpHandlers<IR::OP_F64SINCOS> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorScalarF64Pair handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    double sin, cos;
#ifdef _WIN32
    sin = ::sin(src);
    cos = ::cos(src);
#else
    sincos(src, &sin, &cos);
#endif
    return VectorScalarF64Pair {sin, cos};
  }
};

template<>
struct OpHandlers<IR::OP_F64TAN> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return tan(src);
  }
};

template<>
struct OpHandlers<IR::OP_F64F2XM1> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return exp2(src) - 1.0;
  }
};

template<>
struct OpHandlers<IR::OP_F64ATAN> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return atan2(src1, src2);
  }
};

template<>
struct OpHandlers<IR::OP_F64FPREM> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return fmod(src1, src2);
  }
};

template<>
struct OpHandlers<IR::OP_F64FPREM1> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return remainder(src1, src2);
  }
};

template<>
struct OpHandlers<IR::OP_F64FYL2X> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    return src2 * log2(src1);
  }
};

template<>
struct OpHandlers<IR::OP_F64SCALE> {
  FEXCORE_PRESERVE_ALL_ATTR static double handle(double src1, double src2, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    if (src1 == 0.0) { // src1 might be +/- zero
      return src1;     // this will return negative or positive zero if when appropriate
    }
    double trun = trunc(src2);
    return src1 * exp2(trun);
  }
};

template<>
struct OpHandlers<IR::OP_F80BCDSTORE> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src1q, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    X80SoftFloat Src1 = Src1q;
    ScopedSoftFloatState State {FCW, Frame};
    bool Negative = Src1.Top.Sign;

    Src1 = X80SoftFloat::FRNDINT(&State.State, Src1);

    // Clear the Sign bit
    Src1.Top.Sign = 0;

    uint64_t Tmp = Src1.ToI64(&State.State);
    X80SoftFloat Rv;
    uint8_t* BCD = reinterpret_cast<uint8_t*>(&Rv);

    for (size_t i = 0; i < 9; ++i) {
      if (Tmp == 0) {
        // Nothing left? Just leave
        break;
      }
      // Extract the lower 100 values
      uint8_t Digit = Tmp % 100;

      // Now divide it for the next iteration
      Tmp /= 100;

      uint8_t UpperNibble = Digit / 10;
      uint8_t LowerNibble = Digit % 10;

      // Now store the BCD
      BCD[i] = (UpperNibble << 4) | LowerNibble;
    }

    // Set negative flag once converted to x87
    BCD[9] = Negative ? 0x80 : 0;

    return Rv;
  }
};

template<>
struct OpHandlers<IR::OP_F80BCDLOAD> {
  FEXCORE_PRESERVE_ALL_ATTR static VectorRegType handle(uint16_t FCW, VectorRegType Src, FEXCore::Core::CpuStateFrame* Frame) {
    FEXCORE_PROFILE_INSTANT_INCREMENT(Frame->Thread, AccumulatedFloatFallbackCount, 1);
    uint8_t* Src1 = reinterpret_cast<uint8_t*>(&Src);
    uint64_t BCD {};
    // We walk through each uint8_t and pull out the BCD encoding
    // Each 4bit split is a digit
    // Only 0-9 is supported, A-F results in undefined data
    // | 4 bit     | 4 bit    |
    // | 10s place | 1s place |
    // EG 0x48 = 48
    // EG 0x4847 = 4847
    // This gives us an 18digit value encoded in BCD
    // The last byte lets us know if it negative or not
    for (size_t i = 0; i < 9; ++i) {
      uint8_t Digit = Src1[8 - i];
      // First shift our last value over
      BCD *= 100;

      // Add the tens place digit
      BCD += (Digit >> 4) * 10;

      // Add the ones place digit
      BCD += Digit & 0xF;
    }

    // Set negative flag once converted to x87
    bool Negative = Src1[9] & 0x80;
    X80SoftFloat Tmp;

    Tmp = BCD;
    Tmp.Top.Sign = Negative;
    return Tmp;
  }
};

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>

namespace FEXCore::IR {
enum IROps : uint16_t;
}

namespace FEXCore::CPU {

// Base template for fallback handling.
//
// Registering and hooking up fallback is currently like so:
//
// 1. Go to InterpreterFallbacks.cpp and create a template specialization of
//    the GetFallbackInfo member function.
//
//    This member function should reasonably define what the fallback you're
//    going to create will take as parameters and return as a result. For example:
//
//    template<>
//    FallbackInfo GetFallbackInfo(X80SoftFloat(*fn)(double), Core::FallbackHandlerIndex Index) {
//      return {FABI_F80_F64, (void*)fn, Index};
//    }
//
//    Defines info about a fallback that takes a double as an argument and
//    returns a X80SoftFloat instance.
//
//    You will also want to define a new FallbackHandlerIndex enum member and use it
//    to set up the new info handler into the Info array in FillFallbackIndexPointers.
//
// 1.1. (potentially optional). Define a new ABI element in the FallbackAPI enum.
//      This ABI enum value will be used to tell the JITs how to handle the fallback
//      properly. These enum values specify the return type followed by its argument types.
//
//      So, FABI_I64_F80_F80, for example indicates that the function will behave like a
//      function as if were defined as:
//
//      uint64_t fn(X80SoftFloat, X80SoftFloat)
//
// 1.2. (potentially optional). If you needed to define a new enum ABI type like in 1.1, then
//      you need to add the handling for it in the JITs, which can be found in the respective
//      JIT's JIT.cpp file in a function called Op_Unhandled
//
//      You need to add a new case to the ABI switch statement using the new ABI type
//      and do the necessary moving of data from register-allocated JIT parameters
//      into that platform's registers that respects the calling convention. After this is
//      done, most of the necessary background boilerplate is finished.
//
// 2. Now, make a specialization of this class with a member function named 'handle()'
//    that takes the same parameters as the ones described in the fallback info function
//    specialization.
//
//    For example, if you have the fallback info from the example in step 1, it would be:
//
//    template <>
//    struct OpHandlers<IR::CoolNewIROpcode> {
//      static X80SoftFloat handle(double src) {
//        return ...;
//      }
//    };
//
// 3. Fill out the behavior of the OpHandler specialization to perform what you would like
//    the fallback to do.
//
// 4. Add an implementation of the IR op to the Interpreter that passes through to the
//    OpHandler implementation.
//
// 5. Done.
//
template<IR::IROps Op>
struct OpHandlers {};

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Core/CoreState.h>

#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/Interpreter/Fallbacks/F80Fallbacks.h"
#include "Interface/Core/Interpreter/Fallbacks/VectorFallbacks.h"

#include <cstddef>
#include <cstdint>

namespace FEXCore::CPU {

template<typename R, typename... Args>
static FallbackInfo GetFallbackInfo(R (*fn)(Args...), FEXCore::Core::FallbackHandlerIndex HandlerIndex) {
  return {FABI_UNKNOWN, HandlerIndex};
}

void InterpreterOps::FillFallbackIndexPointers(Core::FallbackABIInfo* Info, uint64_t* ABIHandlers) {
  Info[Core::OPINDEX_F80CVTTO_4] = {ABIHandlers[FABI_F80_I16_F32_PTR],
                                    reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTTO>::handle4)};
  Info[Core::OPINDEX_F80CVTTO_8] = {ABIHandlers[FABI_F80_I16_F64_PTR],
                                    reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTTO>::handle8)};
  Info[Core::OPINDEX_F80CVT_4] = {ABIHandlers[FABI_F32_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVT>::handle4)};
  Info[Core::OPINDEX_F80CVT_8] = {ABIHandlers[FABI_F64_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVT>::handle8)};
  Info[Core::OPINDEX_F80CVTINT_2] = {ABIHandlers[FABI_I16_I16_F80_PTR],
                                     reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2)};
  Info[Core::OPINDEX_F80CVTINT_4] = {ABIHandlers[FABI_I32_I16_F80_PTR],
                                     reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4)};
  Info[Core::OPINDEX_F80CVTINT_8] = {ABIHandlers[FABI_I64_I16_F80_PTR],
                                     reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8)};
  Info[Core::OPINDEX_F80CVTINT_TRUNC2] = {ABIHandlers[FABI_I16_I16_F80_PTR],
                                          reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle2t)};
  Info[Core::OPINDEX_F80CVTINT_TRUNC4] = {ABIHandlers[FABI_I32_I16_F80_PTR],
                                          reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle4t)};
  Info[Core::OPINDEX_F80CVTINT_TRUNC8] = {ABIHandlers[FABI_I64_I16_F80_PTR],
                                          reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTINT>::handle8t)};
  Info[Core::OPINDEX_F80CMP] = {ABIHandlers[FABI_I64_I16_F80_F80_PTR],
                                reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CMP>::handle)};
  Info[Core::OPINDEX_F80CVTTOINT_2] = {ABIHandlers[FABI_F80_I16_I16_PTR],
                                       reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle2)};
  Info[Core::OPINDEX_F80CVTTOINT_4] = {ABIHandlers[FABI_F80_I16_I32_PTR],
                                       reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80CVTTOINT>::handle4)};

  // Unary
  Info[Core::OPINDEX_F80ROUND] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80ROUND>::handle)};
  Info[Core::OPINDEX_F80F2XM1] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80F2XM1>::handle)};
  Info[Core::OPINDEX_F80TAN] = {ABIHandlers[FABI_F80_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80TAN>::handle)};
  Info[Core::OPINDEX_F80SQRT] = {ABIHandlers[FABI_F80_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80SQRT>::handle)};
  Info[Core::OPINDEX_F80SIN] = {ABIHandlers[FABI_F80_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80SIN>::handle)};
  Info[Core::OPINDEX_F80COS] = {ABIHandlers[FABI_F80_I16_F80_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80COS>::handle)};
  Info[Core::OPINDEX_F80SINCOS] = {ABIHandlers[FABI_F80x2_I16_F80_PTR],
                                   reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80SINCOS>::handle)};
  Info[Core::OPINDEX_F80XTRACT_EXP] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                       reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80XTRACT_EXP>::handle)};
  Info[Core::OPINDEX_F80XTRACT_SIG] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                       reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80XTRACT_SIG>::handle)};
  Info[Core::OPINDEX_F80BCDSTORE] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                     reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80BCDSTORE>::handle)};
  Info[Core::OPINDEX_F80BCDLOAD] = {ABIHandlers[FABI_F80_I16_F80_PTR],
                                    reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80BCDLOAD>::handle)};

  // Binary
  Info[Core::OPINDEX_F80ADD] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80ADD>::handle)};
  Info[Core::OPINDEX_F80SUB] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80SUB>::handle)};
  Info[Core::OPINDEX_F80MUL] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80MUL>::handle)};
  Info[Core::OPINDEX_F80DIV] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80DIV>::handle)};
  Info[Core::OPINDEX_F80FYL2X] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80FYL2X>::handle)};
  Info[Core::OPINDEX_F80ATAN] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                 reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80ATAN>::handle)};
  Info[Core::OPINDEX_F80FPREM1] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                   reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80FPREM1>::handle)};
  Info[Core::OPINDEX_F80FPREM] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80FPREM>::handle)};
  Info[Core::OPINDEX_F80SCALE] = {ABIHandlers[FABI_F80_I16_F80_F80_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F80SCALE>::handle)};

  // Double Precision Unary
  Info[Core::OPINDEX_F64SIN] = {ABIHandlers[FABI_F64_F64_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64SIN>::handle)};
  Info[Core::OPINDEX_F64COS] = {ABIHandlers[FABI_F64_F64_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64COS>::handle)};
  Info[Core::OPINDEX_F64SINCOS] = {ABIHandlers[FABI_F64x2_F64_PTR],
                                   reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64SINCOS>::handle)};
  Info[Core::OPINDEX_F64TAN] = {ABIHandlers[FABI_F64_F64_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64TAN>::handle)};
  Info[Core::OPINDEX_F64F2XM1] = {ABIHandlers[FABI_F64_F64_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64F2XM1>::handle)};

  // Double Precision Binary
  Info[Core::OPINDEX_F64ATAN] = {ABIHandlers[FABI_F64_F64_F64_PTR], reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64ATAN>::handle)};
  Info[Core::OPINDEX_F64FPREM] = {ABIHandlers[FABI_F64_F64_F64_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64FPREM>::handle)};
  Info[Core::OPINDEX_F64FPREM1] = {ABIHandlers[FABI_F64_F64_F64_PTR],
                                   reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64FPREM1>::handle)};
  Info[Core::OPINDEX_F64FYL2X] = {ABIHandlers[FABI_F64_F64_F64_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64FYL2X>::handle)};
  Info[Core::OPINDEX_F64SCALE] = {ABIHandlers[FABI_F64_F64_F64_PTR],
                                  reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_F64SCALE>::handle)};

  // SSE4.2 string instructions
  Info[Core::OPINDEX_VPCMPESTRX] = {ABIHandlers[FABI_I32_I64_I64_V128_V128_I16],
                                    reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_VPCMPESTRX>::handle)};
  Info[Core::OPINDEX_VPCMPISTRX] = {ABIHandlers[FABI_I32_V128_V128_I16],
                                    reinterpret_cast<uint64_t>(&FEXCore::CPU::OpHandlers<IR::OP_VPCMPISTRX>::handle)};
}

bool InterpreterOps::GetFallbackHandler(const IR::IROp_Header* IROp, FallbackInfo* Info) {
  const auto OpSize = IROp->Size;
  switch (IROp->Op) {
  case IR::OP_F80CVTTO: {
    auto Op = IROp->C<IR::IROp_F80CVTTo>();

    switch (Op->SrcSize) {
    case IR::OpSize::i32Bit: {
      *Info = {FABI_F80_I16_F32_PTR, Core::OPINDEX_F80CVTTO_4};
      return true;
    }
    case IR::OpSize::i64Bit: {
      *Info = {FABI_F80_I16_F64_PTR, Core::OPINDEX_F80CVTTO_8};
      return true;
    }
    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
    }
    break;
  }
  case IR::OP_F80CVT: {
    switch (OpSize) {
    case IR::OpSize::i32Bit: {
      *Info = {FABI_F32_I16_F80_PTR, Core::OPINDEX_F80CVT_4};
      return true;
    }
    case IR::OpSize::i64Bit: {
      *Info = {FABI_F64_I16_F80_PTR, Core::OPINDEX_F80CVT_8};
      return true;
    }
    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
    }
    break;
  }
  case IR::OP_F80CVTINT: {
    auto Op = IROp->C<IR::IROp_F80CVTInt>();

    switch (OpSize) {
    case IR::OpSize::i16Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I16_I16_F80_PTR, Core::OPINDEX_F80CVTINT_TRUNC2};
      } else {
        *Info = {FABI_I16_I16_F80_PTR, Core::OPINDEX_F80CVTINT_2};
      }
      return true;
    }
    case IR::OpSize::i32Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I32_I16_F80_PTR, Core::OPINDEX_F80CVTINT_TRUNC4};
      } else {
        *Info = {FABI_I32_I16_F80_PTR, Core::OPINDEX_F80CVTINT_4};
      }
      return true;
    }
    case IR::OpSize::i64Bit: {
      if (Op->Truncate) {
        *Info = {FABI_I64_I16_F80_PTR, Core::OPINDEX_F80CVTINT_TRUNC8};
      } else {
        *Info = {FABI_I64_I16_F80_PTR, Core::OPINDEX_F80CVTINT_8};
      }
      return true;
    }
    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
    }
    break;
  }
  case IR::OP_F80CMP: {
    *Info = {FABI_I64_I16_F80_F80_PTR, (Core::FallbackHandlerIndex)(Core::OPINDEX_F80CMP)};
    return true;
  }

  case IR::OP_F80CVTTOINT: {
    auto Op = IROp->C<IR::IROp_F80CVTToInt>();

    switch (Op->SrcSize) {
    case IR::OpSize::i16Bit: {
      *Info = {FABI_F80_I16_I16_PTR, Core::OPINDEX_F80CVTTOINT_2};
      return true;
    }
    case IR::OpSize::i32Bit: {
      *Info = {FABI_F80_I16_I32_PTR, Core::OPINDEX_F80CVTTOINT_4};
      return true;
    }
    default: LogMan::Msg::DFmt("Unhandled size: {}", OpSize);
    }
    break;
  }

#define COMMON_UNARY_X87_OP(OP)                            \
  case IR::OP_F80##OP: {                                   \
    *Info = {FABI_F80_I16_F80_PTR, Core::OPINDEX_F80##OP}; \
    return true;                                           \
  }

#define COMMON_UNARYPAIR_X87_OP(OP)                          \
  case IR::OP_F80##OP: {                                     \
    *Info = {FABI_F80x2_I16_F80_PTR, Core::OPINDEX_F80##OP}; \
    return true;                                             \
  }

#define COMMON_BINARY_X87_OP(OP)                               \
  case IR::OP_F80##OP: {                                       \
    *Info = {FABI_F80_I16_F80_F80_PTR, Core::OPINDEX_F80##OP}; \
    return true;                                               \
  }

#define COMMON_F64_OP(OP)                                                                              \
  case IR::OP_F64##OP: {                                                                               \
    *Info = GetFallbackInfo(&FEXCore::CPU::OpHandlers<IR::OP_F64##OP>::handle, Core::OPINDEX_F64##OP); \
    return true;                                                                                       \
  }

#define COMMON_UNARY_F64_OP(OP)                        \
  case IR::OP_F64##OP: {                               \
    *Info = {FABI_F64_F64_PTR, Core::OPINDEX_F64##OP}; \
    return true;                                       \
  }
#define COMMON_UNARYPAIR_F64_OP(OP)                      \
  case IR::OP_F64##OP: {                                 \
    *Info = {FABI_F64x2_F64_PTR, Core::OPINDEX_F64##OP}; \
    return true;                                         \
  }

#define COMMON_BINARY_F64_OP(OP)                           \
  case IR::OP_F64##OP: {                                   \
    *Info = {FABI_F64_F64_F64_PTR, Core::OPINDEX_F64##OP}; \
    return true;                                           \
  }

    // Unary
    COMMON_UNARY_X87_OP(ROUND)
    COMMON_UNARY_X87_OP(F2XM1)
    COMMON_UNARY_X87_OP(TAN)
    COMMON_UNARY_X87_OP(SQRT)
    COMMON_UNARY_X87_OP(SIN)
    COMMON_UNARY_X87_OP(COS)
    COMMON_UNARYPAIR_X87_OP(SINCOS)
    COMMON_UNARY_X87_OP(XTRACT_EXP)
    COMMON_UNARY_X87_OP(XTRACT_SIG)
    COMMON_UNARY_X87_OP(BCDSTORE)
    COMMON_UNARY_X87_OP(BCDLOAD)

    // Binary
    COMMON_BINARY_X87_OP(ADD)
    COMMON_BINARY_X87_OP(SUB)
    COMMON_BINARY_X87_OP(MUL)
    COMMON_BINARY_X87_OP(DIV)
    COMMON_BINARY_X87_OP(FYL2X)
    COMMON_BINARY_X87_OP(ATAN)
    COMMON_BINARY_X87_OP(FPREM1)
    COMMON_BINARY_X87_OP(FPREM)
    COMMON_BINARY_X87_OP(SCALE)

    // Double Precision Unary
    COMMON_UNARY_F64_OP(F2XM1)
    COMMON_UNARY_F64_OP(TAN)
    COMMON_UNARY_F64_OP(SIN)
    COMMON_UNARY_F64_OP(COS)
    COMMON_UNARYPAIR_F64_OP(SINCOS)

    // Double Precision Binary
    COMMON_BINARY_F64_OP(FYL2X)
    COMMON_BINARY_F64_OP(ATAN)
    COMMON_BINARY_F64_OP(FPREM1)
    COMMON_BINARY_F64_OP(FPREM)
    COMMON_BINARY_F64_OP(SCALE)

  // SSE4.2 Fallbacks
  case IR::OP_VPCMPESTRX: *Info = {FABI_I32_I64_I64_V128_V128_I16, Core::OPINDEX_VPCMPESTRX}; return true;
  case IR::OP_VPCMPISTRX: *Info = {FABI_I32_V128_V128_I16, Core::OPINDEX_VPCMPISTRX}; return true;

  default: break;
  }

  return false;
}


} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/Fallbacks/StringCompareFallbacks.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Interface/Core/Interpreter/Fallbacks/VectorFallbacks.h"
#include "Interface/IR/IR.h"

#ifdef ARCHITECTURE_arm64
#include <arm_neon.h>
#endif

#include <cstring>

namespace FEXCore::CPU {
#ifdef ARCHITECTURE_arm64
FEXCORE_PRESERVE_ALL_ATTR static int32_t GetImplicitLength(FEXCore::VectorRegType data, uint16_t control) {
  const auto is_using_words = (control & 1) != 0;

  if (is_using_words) {
    uint16x8_t a = vreinterpretq_u16_u8(data);
    uint16x8_t VIndexes {};
    const uint16x8_t VIndex16 = vdupq_n_u16(8);
    uint16_t Indexes[8] = {
      0, 1, 2, 3, 4, 5, 6, 7,
    };
    memcpy(&VIndexes, Indexes, sizeof(VIndexes));
    auto MaskResult = vceqzq_u16(a);
    auto SelectResult = vbslq_u16(MaskResult, VIndexes, VIndex16);
    return vminvq_u16(SelectResult);
  } else {
    uint8x16_t VIndexes {};
    const uint8x16_t VIndex16 = vdupq_n_u8(16);
    uint8_t Indexes[16] = {
      0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
    };
    memcpy(&VIndexes, Indexes, sizeof(VIndexes));
    auto MaskResult = vceqzq_u8(data);
    auto SelectResult = vbslq_u8(MaskResult, VIndexes, VIndex16);
    return vminvq_u8(SelectResult);
  }
}
#else
FEXCORE_PRESERVE_ALL_ATTR static int32_t GetImplicitLength(FEXCore::VectorRegType data, uint16_t control) {
  const auto* data_u8 = reinterpret_cast<const uint8_t*>(&data);
  const auto is_using_words = (control & 1) != 0;

  int32_t length = 0;

  if (is_using_words) {
    const auto get_word = [data_u8](int32_t index) {
      const auto* src = data_u8 + (index * sizeof(uint16_t));

      uint16_t element {};
      std::memcpy(&element, src, sizeof(uint16_t));
      return element;
    };

    while (length < 8 && get_word(length) != 0) {
      length++;
    }
  } else {
    while (length < 16 && data_u8[length] != 0) {
      length++;
    }
  }

  return length;
}
#endif

// Essentially the same in terms of behavior with VPCMPESTRX instructions,
// with the only difference being that the length of the string is encoded
// as part of the data vectors passed in.
//
// i.e. Length is determined by the presence of a NUL (all-zero) character
//      within the data.
//
//      If no NUL character exists, then the length of the strings are assumed
//      to be the max length possible for the given character size specified
//      in the control flags (16 characters for 8-bit, and 8 characters for 16-bit).
//
FEXCORE_PRESERVE_ALL_ATTR uint32_t OpHandlers<IR::OP_VPCMPISTRX>::handle(FEXCore::VectorRegType lhs, FEXCore::VectorRegType rhs, uint16_t control) {
  // Subtract by 1 in order to make validity limits 0-based
  const auto valid_lhs = GetImplicitLength(lhs, control) - 1;
  const auto valid_rhs = GetImplicitLength(rhs, control) - 1;
  __uint128_t lhs_i;
  memcpy(&lhs_i, &lhs, sizeof(lhs_i));
  __uint128_t rhs_i;
  memcpy(&rhs_i, &rhs, sizeof(rhs_i));

  return OpHandlers<IR::OP_VPCMPESTRX>::MainBody(lhs_i, valid_lhs, rhs_i, valid_rhs, control);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/Fallbacks/VectorFallbacks.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <cstring>

#include "Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h"
#include "Interface/IR/IR.h"
#include "Common/VectorRegType.h"

namespace FEXCore::CPU {

template<>
struct OpHandlers<IR::OP_VPCMPESTRX> {
  enum class AggregationOp {
    EqualAny = 0b00,
    Ranges = 0b01,
    EqualEach = 0b10,
    EqualOrdered = 0b11,
  };

  enum class SourceData {
    U8,
    U16,
    S8,
    S16,
  };

  enum class Polarity {
    Positive,
    Negative,
    PositiveMasked,
    NegativeMasked,
  };

  FEXCORE_PRESERVE_ALL_ATTR static uint32_t handle(uint64_t RAX, uint64_t RDX, VectorRegType lhs_v, VectorRegType rhs_v, uint16_t control) {
    __uint128_t lhs;
    memcpy(&lhs, &lhs_v, sizeof(lhs));
    __uint128_t rhs;
    memcpy(&rhs, &rhs_v, sizeof(rhs));

    // Subtract by 1 in order to make validity limits 0-based
    const auto valid_lhs = GetExplicitLength(RAX, control) - 1;
    const auto valid_rhs = GetExplicitLength(RDX, control) - 1;

    return MainBody(lhs, valid_lhs, rhs, valid_rhs, control);
  }

  // Main PCMPXSTRX algorithm body. Allows for reuse with both implicit and explicit length variants.
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t MainBody(const __uint128_t& lhs, int valid_lhs, const __uint128_t& rhs, int valid_rhs, uint16_t control) {
    const uint32_t aggregation = PerformAggregation(lhs, valid_lhs, rhs, valid_rhs, control);
    const int32_t upper_limit = (16 >> (control & 1)) - 1;

    // Bits are arranged as:
    // Bit #:   3    2    1    0
    //         [SF | ZF | CF | OF]
    uint32_t flags = 0;
    flags |= (valid_rhs < upper_limit) ? 0b0100 : 0b0000;
    flags |= (valid_lhs < upper_limit) ? 0b1000 : 0b0000;

    const uint32_t result = HandlePolarity(aggregation, control, upper_limit, valid_rhs);
    if (result != 0) {
      flags |= 0b0010;
    }
    if ((result & 1) != 0) {
      flags |= 0b0001;
    }

    // We track the flags in the usual NZCV bit position so we can msr them
    // later. Avoids handling flags natively in JIT.
    return result | (flags << 28);
  }

  FEXCORE_PRESERVE_ALL_ATTR static int32_t GetExplicitLength(uint64_t reg, uint16_t control) {
    // Bit 8 controls whether or not the reg value is 64-bit or 32-bit.
    int64_t value = 0;
    if (((control >> 8) & 1) != 0) {
      value = static_cast<int64_t>(reg);
    } else {
      // We need a sign extend in this case.
      value = static_cast<int32_t>(reg);
    }

    // If control[0] is set, then we're dealing with words instead of bytes
    const int64_t limit = (control & 1) != 0 ? 8 : 16;

    // Length needs to saturate to 16 (if bytes) or 8 (if words)
    // when the length value is greater than 16 (if bytes)/8 (if words)
    // or if the length value is less than -16 (if bytes)/-8 (if words).
    if (value < -limit || value > limit) {
      return limit;
    }

    return std::abs(static_cast<int>(value));
  }

  FEXCORE_PRESERVE_ALL_ATTR static int32_t GetElement(const __uint128_t& vec, int32_t index, uint16_t control) {
    const auto* vec_ptr = reinterpret_cast<const uint8_t*>(&vec);

    // Control bits [1:0] define the data type being dealt with.
    switch (static_cast<SourceData>(control & 0b11)) {
    case SourceData::U8: return static_cast<int32_t>(vec_ptr[index]);
    case SourceData::U16: {
      uint16_t value {};
      std::memcpy(&value, vec_ptr + (sizeof(uint16_t) * static_cast<size_t>(index)), sizeof(value));
      return value;
    }
    case SourceData::S8: return static_cast<int8_t>(vec_ptr[index]);
    case SourceData::S16:
    default: {
      int16_t value {};
      std::memcpy(&value, vec_ptr + (sizeof(int16_t) * static_cast<size_t>(index)), sizeof(value));
      return value;
    }
    }
  }

  FEXCORE_PRESERVE_ALL_ATTR static uint32_t
  PerformAggregation(const __uint128_t& lhs, int32_t valid_lhs, const __uint128_t& rhs, int32_t valid_rhs, uint16_t control) {
    switch (static_cast<AggregationOp>((control >> 2) & 0b11)) {
    case AggregationOp::EqualAny: return HandleEqualAny(lhs, valid_lhs, rhs, valid_rhs, control);
    case AggregationOp::Ranges: return HandleRanges(lhs, valid_lhs, rhs, valid_rhs, control);
    case AggregationOp::EqualEach: return HandleEqualEach(lhs, valid_lhs, rhs, valid_rhs, control);
    case AggregationOp::EqualOrdered:
    default: return HandleEqualOrdered(lhs, valid_lhs, rhs, valid_rhs, control);
    }
  }

  FEXCORE_PRESERVE_ALL_ATTR static uint32_t HandlePolarity(uint32_t value, uint16_t control, int upper_limit, int valid_rhs) {
    switch (static_cast<Polarity>((control >> 4) & 0b11)) {
    case Polarity::Negative: return value ^ ((2U << upper_limit) - 1);
    case Polarity::NegativeMasked: return value ^ ((1U << (valid_rhs + 1)) - 1);
    case Polarity::Positive:
    case Polarity::PositiveMasked:
    default:
      // Both positive masking and positive polarity are documented
      // as both being equivalent to "IntRes2 = IntRes1", where IntRes1
      // is our 'value' parameter, so we don't need to do anything in
      // these cases except return the same value.
      return value;
    }
  }

  // Finds characters from an overall character set.
  //
  // Scans through RHS trying to find any characters contained in LHS.
  // Think of this as a sort of vectorized version of strspn (kind of).
  //
  // e.g. Assume operating on two character vectors as unsigned words
  //
  //         0  1  2  3  4  5  6  7
  // LHS -> [a, b, c, d, e, f, g, n]
  // RHS -> [z, k, v, c, d, o, p, n]
  //
  // With both explicit lengths for each string being 8 (the max length for words),
  // this would result in an intermediate result like:
  //
  //            0b1001'1000
  //              │  │ │
  // 'n' match ───┘  │ │
  //                 │ │
  // 'd' match ──────┘ │
  //                   │
  // 'c' match ────────┘
  //
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t
  HandleEqualAny(const __uint128_t& lhs, int32_t valid_lhs, const __uint128_t& rhs, int32_t valid_rhs, uint16_t control) {
    uint32_t result = 0;

    for (int j = valid_rhs; j >= 0; j--) {
      result <<= 1;

      const int rhs_value = GetElement(rhs, j, control);
      for (int i = valid_lhs; i >= 0; i--) {
        const int lhs_value = GetElement(lhs, i, control);
        result |= static_cast<uint32_t>(rhs_value == lhs_value);
      }
    }

    return result;
  }

  // Determines if a character falls within a limited range
  //
  // Scans through rhs using a range denoted by two elements
  // in lhs and determines if the respective character in rhs
  // falls within its range.
  //
  // i.e.
  //      lhs_upper_bound >= rhs_value && lhs_lower_bound <= rhs_value
  //
  // e.g. Assume operating on two character vectors as unsigned words
  //
  //         0  1  2  3  4  5  6  7
  // LHS -> [a, z, A, Z, 0, 0, 0, 0]
  // RHS -> [z, k, ., C, M, ;, \, ']
  //
  // With LHS's length being 4 and RHS's lenth being 8,
  // this would result in an intermediate result like:
  //
  //                          0b0001'1011
  //                               │ │ ││
  // 'z' >= 'M' && 'a' <= 'M' ─────┘ │ ││
  //                                 │ ││
  // 'z' >= 'C' && 'a' <= 'C' ───────┘ ││
  //                                   ││
  // 'Z' >= 'k' && 'A' <= 'k' ─────────┘│
  //                                    │
  // 'Z' >= 'z' && 'A' <= 'z' ──────────┘
  //
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t
  HandleRanges(const __uint128_t& lhs, int32_t valid_lhs, const __uint128_t& rhs, int32_t valid_rhs, uint16_t control) {
    uint32_t result = 0;

    for (int j = valid_rhs; j >= 0; j--) {
      result <<= 1;

      const int element = GetElement(rhs, j, control);
      for (int i = (valid_lhs - 1) | 1; i >= 0; i -= 2) {
        const int upper_bound = GetElement(lhs, i - 0, control);
        const int lower_bound = GetElement(lhs, i - 1, control);

        const bool ge = upper_bound >= element;
        const bool le = lower_bound <= element;

        result |= static_cast<uint32_t>(ge && le);
      }
    }

    return result;
  }

  // Determines if each character is equal to one another (string compare)
  //
  // Essentially the PCMPXSTRX variant of memcmp/strcmp. Sets the bit of the
  // resulting mask if both elements are equal to one another. Otherwise
  // sets it to false.
  //
  // e.g. Assume operating on two character vectors as unsigned words
  //
  //         0  1  2  3  4  5  6  7
  // LHS -> [a, b, c, d, e, f, g, n]
  // RHS -> [a, b, c, d, e, f, e, x]
  //
  // With both explicit lengths for each string being 8 (the max length for words),
  // this would result in an intermediate result like:
  //
  //            0b0011'1111
  //                ││ ││││
  // 'f' == 'f' ────┘│ ││││
  //                 │ ││││
  // 'e' == 'e' ─────┘ ││││
  //                   ││││
  // 'd' == 'd' ───────┘│││
  //                    │││
  // 'c' == 'c' ────────┘││
  //                     ││
  // 'b' == 'b' ─────────┘│
  //                      │
  // 'a' == 'a' ──────────┘
  //
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t
  HandleEqualEach(const __uint128_t& lhs, int32_t valid_lhs, const __uint128_t& rhs, int32_t valid_rhs, uint16_t control) {
    const auto upper_limit = (16 >> (control & 1)) - 1;
    const auto max_valid = std::max(valid_lhs, valid_rhs);
    const auto min_valid = std::min(valid_lhs, valid_rhs);

    // All values past the end of string must be forced to true.
    // (See 4.1.6 Valid/Invalid Override of Comparisons in the Intel Software Development Manual)
    // So we can calculate this part of the mask ahead of time and set all those to-be bits to true
    // and then progressively shift them into place over the course of execution.
    uint32_t result = (1U << (upper_limit - max_valid)) - 1;
    result <<= (max_valid - min_valid);

    for (int i = min_valid; i >= 0; i--) {
      const int lhs_element = GetElement(lhs, i, control);
      const int rhs_element = GetElement(rhs, i, control);

      result <<= 1;
      result |= static_cast<uint32_t>(lhs_element == rhs_element);
    }

    return result;
  }

  // Determines if a substring exists within an overall string
  //
  // Somewhat equivalent to the behavior of strstr.
  //
  // Sets the corresponding index in the result where a substring is found.
  //
  // e.g. Assume operating on two character vectors as unsigned words
  //
  //         0  1  2  3  4  5  6  7
  // LHS -> [b, a, x, z, y, v, o, m]
  // RHS -> [b, a, d, b, a, n, k, s]
  //
  // With the length of LHS being 2 and the length of RHS being 8, we have a composition like:
  //
  //      Substring to look for
  //       ┌──┴──┐
  // LHS -> [b, a, x, z, y, v, o, m]
  // RHS -> [b, a, d, b, a, n, k, s]
  //       └───────────┬────────────┘
  //         Entire string to search
  //
  // And we end up with a result like:
  //
  //            0b0000'1001
  //                   │  │
  // At index 3 ───────┘  │
  //                      │
  // At index 0 ──────────┘
  //
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t
  HandleEqualOrdered(const __uint128_t& lhs, int32_t valid_lhs, const __uint128_t& rhs, int32_t valid_rhs, uint16_t control) {
    const auto upper_limit = (16 >> (control & 1)) - 1;

    // Edge case!
    // If we have *no* valid characters in our inner string, then
    // we need to return the intermediate result as
    // 0xFF (if operating on words) or 0xFFFF (if operating on bytes)
    if (valid_lhs == -1) {
      return (2U << upper_limit) - 1;
    }

    uint32_t result = 0;
    const int initial = valid_rhs == upper_limit ? valid_rhs : valid_rhs - valid_lhs;
    for (int j = initial; j >= 0; j--) {
      result <<= 1;

      uint32_t value = 1;
      const int start = std::min(valid_rhs - j, valid_lhs);
      for (int i = start; i >= 0; i--) {
        const int lhs_value = GetElement(lhs, i + 0, control);
        const int rhs_value = GetElement(rhs, i + j, control);

        value &= static_cast<uint32_t>(lhs_value == rhs_value);
      }

      result |= value;
    }

    return result;
  }
};

template<>
struct OpHandlers<IR::OP_VPCMPISTRX> {
  FEXCORE_PRESERVE_ALL_ATTR static uint32_t handle(VectorRegType lhs, VectorRegType rhs, uint16_t control);
};

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/Interpreter/InterpreterOps.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/IR/IR.h>

namespace FEXCore::IR {
class IRListView;
struct IROp_Header;
} // namespace FEXCore::IR

namespace FEXCore::CPU {
enum FallbackABI {
  FABI_F80_I16_F32_PTR,
  FABI_F80_I16_F64_PTR,
  FABI_F80_I16_I16_PTR,
  FABI_F80_I16_I32_PTR,
  FABI_F32_I16_F80_PTR,
  FABI_F64_I16_F80_PTR,
  FABI_F64_F64_PTR,
  FABI_F64_F64_F64_PTR,
  FABI_I16_I16_F80_PTR,
  FABI_I32_I16_F80_PTR,
  FABI_I64_I16_F80_PTR,
  FABI_I64_I16_F80_F80_PTR,
  FABI_F80_I16_F80_PTR,
  FABI_F80_I16_F80_F80_PTR,
  FABI_F80x2_I16_F80_PTR,
  FABI_F64x2_F64_PTR,
  FABI_I32_I64_I64_V128_V128_I16,
  FABI_I32_V128_V128_I16,
  FABI_UNKNOWN,
};
struct FallbackInfo {
  FallbackABI ABI;
  FEXCore::Core::FallbackHandlerIndex HandlerIndex;
};

class InterpreterOps {
public:
  static void FillFallbackIndexPointers(Core::FallbackABIInfo* Info, uint64_t* ABIHandlers);
  static bool GetFallbackHandler(const IR::IROp_Header* IROp, FallbackInfo* Info);
};
} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/ALUOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "CodeEmitter/Emitter.h"
#include "FEXCore/IR/IR.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/JIT/JITClass.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"

namespace FEXCore::CPU {

#define GRD(Node) (IROp->Size <= 4 ? GetDst<RA_32>(Node) : GetDst<RA_64>(Node))
#define GRS(Node) (IROp->Size <= 4 ? GetReg<RA_32>(Node) : GetReg<RA_64>(Node))

#define DEF_BINOP_WITH_CONSTANT(FEXOp, VarOp, ConstOp)                                    \
  DEF_OP(FEXOp) {                                                                         \
    auto Op = IROp->C<IR::IROp_##FEXOp>();                                                \
                                                                                          \
    uint64_t Const;                                                                       \
    if (IsInlineConstant(Op->Src2, &Const)) {                                             \
      ConstOp(ConvertSize(IROp), GetReg(Node), GetReg(Op->Src1), Const);                  \
    } else {                                                                              \
      VarOp(ConvertSize(IROp), GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2)); \
    }                                                                                     \
  }

DEF_BINOP_WITH_CONSTANT(Add, add, add)
DEF_BINOP_WITH_CONSTANT(Sub, sub, sub)
DEF_BINOP_WITH_CONSTANT(AddWithFlags, adds, adds)
DEF_BINOP_WITH_CONSTANT(SubWithFlags, subs, subs)
DEF_BINOP_WITH_CONSTANT(Or, orr, orr)
DEF_BINOP_WITH_CONSTANT(And, and_, and_)
DEF_BINOP_WITH_CONSTANT(Andn, bic, bic)
DEF_BINOP_WITH_CONSTANT(Xor, eor, eor)
DEF_BINOP_WITH_CONSTANT(Lshl, lslv, lsl)
DEF_BINOP_WITH_CONSTANT(Lshr, lsrv, lsr)
DEF_BINOP_WITH_CONSTANT(Ror, rorv, ror)

DEF_OP(Constant) {
  auto Op = IROp->C<IR::IROp_Constant>();
  auto Dst = GetReg(Node);

  const auto PadType = [Pad = Op->Pad]() {
    switch (Pad) {
    case IR::ConstPad::NoPad: return CPU::Arm64Emitter::PadType::NOPAD;
    case IR::ConstPad::DoPad: return CPU::Arm64Emitter::PadType::DOPAD;
    default: return CPU::Arm64Emitter::PadType::AUTOPAD;
    }
  }();
  LoadConstant(ARMEmitter::Size::i64Bit, Dst, Op->Constant, PadType, Op->MaxBytes);
}

DEF_OP(EntrypointOffset) {
  auto Op = IROp->C<IR::IROp_EntrypointOffset>();

  auto Constant = Entry + Op->Offset;
  uint64_t Mask = ~0ULL;
  const auto OpSize = IROp->Size;
  if (OpSize == IR::OpSize::i32Bit) {
    Mask = 0xFFFF'FFFFULL;
  }

  InsertGuestRIPMove(GetReg(Node), Constant & Mask);
}

DEF_OP(InlineConstant) {
  // nop
}

DEF_OP(InlineEntrypointOffset) {
  // nop
}

DEF_OP(CycleCounter) {
  auto Op = IROp->C<IR::IROp_CycleCounter>();
  if (CTX->HostFeatures.SupportsECV && Op->SelfSynchronizingLoads) {
    // CNTVCTSS_EL0 is "self-synchronizing", which means loads can't speculate past this instruction.
    // Stores still aren't synchronized although.
    mrs(GetReg(Node), ARMEmitter::SystemRegister::CNTVCTSS_EL0);
  } else {
    if (Op->SelfSynchronizingLoads) {
      // If ECV isn't supported then an ISB must be emitted to synchronize all instructions and loads before the cycle read.
      isb();
    }
    mrs(GetReg(Node), ARMEmitter::SystemRegister::CNTVCT_EL0);
  }
}

DEF_OP(AddShift) {
  auto Op = IROp->C<IR::IROp_AddShift>();

  add(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2), ConvertIRShiftType(Op->Shift), Op->ShiftAmount);
}

DEF_OP(AddNZCV) {
  auto Op = IROp->C<IR::IROp_AddNZCV>();

  const auto EmitSize = ConvertSize(IROp);
  auto Src1 = GetReg(Op->Src1);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
    LOGMAN_THROW_A_FMT(IROp->Size >= IR::OpSize::i32Bit, "Constant not allowed here");
    cmn(EmitSize, Src1, Const);
  } else if (IROp->Size < IR::OpSize::i32Bit) {
    unsigned Shift = 32 - IR::OpSizeAsBits(IROp->Size);

    lsl(ARMEmitter::Size::i32Bit, TMP1, Src1, Shift);
    cmn(EmitSize, TMP1, GetReg(Op->Src2), ARMEmitter::ShiftType::LSL, Shift);
  } else {
    cmn(EmitSize, Src1, GetReg(Op->Src2));
  }
}

DEF_OP(AdcNZCV) {
  auto Op = IROp->C<IR::IROp_AdcNZCV>();

  adcs(ConvertSize48(IROp), ARMEmitter::Reg::zr, GetReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(AdcWithFlags) {
  auto Op = IROp->C<IR::IROp_AdcWithFlags>();

  adcs(ConvertSize48(IROp), GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(AdcZeroWithFlags) {
  auto Op = IROp->C<IR::IROp_AdcZeroWithFlags>();
  auto Size = ConvertSize48(IROp);

  cset(Size, TMP1, ARMEmitter::Condition::CC_CC);
  adds(Size, GetReg(Node), GetReg(Op->Src1), TMP1);
}

DEF_OP(AdcZero) {
  auto Op = IROp->C<IR::IROp_AdcZero>();
  auto Size = ConvertSize48(IROp);

  cinc(Size, GetReg(Node), GetReg(Op->Src1), ARMEmitter::Condition::CC_CC);
}

DEF_OP(Adc) {
  auto Op = IROp->C<IR::IROp_Adc>();

  adc(ConvertSize48(IROp), GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(SbbWithFlags) {
  auto Op = IROp->C<IR::IROp_SbbWithFlags>();

  sbcs(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(SbbNZCV) {
  auto Op = IROp->C<IR::IROp_SbbNZCV>();

  sbcs(ConvertSize48(IROp), ARMEmitter::Reg::zr, GetReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(Sbb) {
  auto Op = IROp->C<IR::IROp_Sbb>();

  sbc(ConvertSize48(IROp), GetReg(Node), GetZeroableReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(TestNZ) {
  auto Op = IROp->C<IR::IROp_TestNZ>();
  const auto EmitSize = ConvertSize(IROp);

  uint64_t Const;
  auto Src1 = GetReg(Op->Src1);

  // Shift the sign bit into place, clearing out the garbage in upper bits.
  // Adding zero does an effective test, setting NZ according to the result and
  // zeroing CV.
  if (IROp->Size < IR::OpSize::i32Bit) {
    // Cheaper to and+cmn than to lsl+lsl+tst, so do the and ourselves if
    // needed.
    if (Op->Src1 != Op->Src2) {
      if (IsInlineConstant(Op->Src2, &Const)) {
        and_(EmitSize, TMP1, Src1, Const);
      } else {
        auto Src2 = GetReg(Op->Src2);
        and_(EmitSize, TMP1, Src1, Src2);
      }

      Src1 = TMP1;
    }

    unsigned Shift = 32 - IR::OpSizeAsBits(IROp->Size);
    cmn(EmitSize, ARMEmitter::Reg::zr, Src1, ARMEmitter::ShiftType::LSL, Shift);
  } else {
    if (IsInlineConstant(Op->Src2, &Const)) {
      tst(EmitSize, Src1, Const);
    } else {
      const auto Src2 = GetReg(Op->Src2);
      tst(EmitSize, Src1, Src2);
    }
  }
}

DEF_OP(TestZ) {
  auto Op = IROp->C<IR::IROp_TestZ>();
  LOGMAN_THROW_A_FMT(IROp->Size < IR::OpSize::i32Bit, "TestNZ used at higher sizes");
  const auto EmitSize = ARMEmitter::Size::i32Bit;

  uint64_t Const;
  uint64_t Mask = IROp->Size == IR::OpSize::i64Bit ? ~0ULL : ((1ull << IR::OpSizeAsBits(IROp->Size)) - 1);
  auto Src1 = GetReg(Op->Src1);

  if (IsInlineConstant(Op->Src2, &Const)) {
    // We can promote 8/16-bit tests to 32-bit since the constant is masked.
    LOGMAN_THROW_A_FMT(!(Const & ~Mask), "constant is already masked");
    tst(EmitSize, Src1, Const);
  } else {
    const auto Src2 = GetReg(Op->Src2);
    if (Src1 == Src2) {
      tst(EmitSize, Src1 /* Src2 */, Mask);
    } else {
      and_(EmitSize, TMP1, Src1, Src2);
      tst(EmitSize, TMP1, Mask);
    }
  }
}

DEF_OP(SubShift) {
  auto Op = IROp->C<IR::IROp_SubShift>();

  sub(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2), ConvertIRShiftType(Op->Shift), Op->ShiftAmount);
}

DEF_OP(SubNZCV) {
  auto Op = IROp->C<IR::IROp_SubNZCV>();
  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
    LOGMAN_THROW_A_FMT(OpSize >= IR::OpSize::i32Bit, "Constant not allowed here");
    cmp(EmitSize, GetReg(Op->Src1), Const);
  } else {
    unsigned Shift = OpSize < IR::OpSize::i32Bit ? (32 - IR::OpSizeAsBits(OpSize)) : 0;
    ARMEmitter::Register ShiftedSrc1 = GetZeroableReg(Op->Src1);

    // Shift to fix flags for <32-bit ops.
    // Any shift of zero is still zero so optimize out silly zero shifts.
    if (OpSize < IR::OpSize::i32Bit && ShiftedSrc1 != ARMEmitter::Reg::zr) {
      lsl(ARMEmitter::Size::i32Bit, TMP1, ShiftedSrc1, Shift);
      ShiftedSrc1 = TMP1;
    }

    if (OpSize < IR::OpSize::i32Bit) {
      cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2), ARMEmitter::ShiftType::LSL, Shift);
    } else {
      cmp(EmitSize, ShiftedSrc1, GetReg(Op->Src2));
    }
  }
}

DEF_OP(CmpPairZ) {
  auto Op = IROp->C<IR::IROp_CmpPairZ>();
  const auto EmitSize = ConvertSize(IROp);

  // Save NZCV
  mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

  // Compare, setting Z and clobbering NzCV
  cmp(EmitSize, GetReg(Op->Src1Lo), GetReg(Op->Src2Lo));
  ccmp(EmitSize, GetReg(Op->Src1Hi), GetReg(Op->Src2Hi), ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);

  // Restore NzCV
  if (CTX->HostFeatures.SupportsFlagM) {
    rmif(TMP1, 0, 0xb /* NzCV */);
  } else {
    cset(ARMEmitter::Size::i32Bit, TMP2, ARMEmitter::Condition::CC_EQ);
    bfi(ARMEmitter::Size::i32Bit, TMP1, TMP2, 30 /* lsb: Z */, 1);
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  }
}

DEF_OP(CarryInvert) {
  LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");
  cfinv();
}

DEF_OP(RmifNZCV) {
  auto Op = IROp->C<IR::IROp_RmifNZCV>();
  LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");

  rmif(GetZeroableReg(Op->Src).X(), Op->Rotate, Op->Mask);
}

DEF_OP(SetSmallNZV) {
  auto Op = IROp->C<IR::IROp_SetSmallNZV>();
  LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFlagM, "Unsupported flagm op");

  const auto OpSize = IROp->Size;
  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i8Bit || OpSize == IR::OpSize::i16Bit, "Unsupported {} size: {}", __func__, OpSize);

  if (OpSize == IR::OpSize::i8Bit) {
    setf8(GetReg(Op->Src).W());
  } else {
    setf16(GetReg(Op->Src).W());
  }
}

DEF_OP(AXFlag) {
  if (CTX->HostFeatures.SupportsFlagM2) {
    axflag();
  } else {
    // AXFLAG is defined in the Arm spec as
    //
    //   gt: nzCv -> nzCv
    //   lt: Nzcv -> nzcv  <==>  1 + 0
    //   eq: nZCv -> nZCv  <==>  1 + (~0)
    //   un: nzCV -> nZcv  <==>  0 + 0
    //
    // For the latter 3 cases, we therefore get the right NZCV by adding V_inv
    // to (eq ? ~0 : 0). The remaining case is forced with ccmn.
    auto V_inv = GetReg(IROp->Args[0]);
    csetm(ARMEmitter::Size::i64Bit, TMP1, ARMEmitter::Condition::CC_EQ);
    ccmn(ARMEmitter::Size::i64Bit, V_inv, TMP1, ARMEmitter::StatusFlags {0x2} /* nzCv */, ARMEmitter::Condition::CC_LE);
  }
}

DEF_OP(Parity) {
  auto Op = IROp->C<IR::IROp_Parity>();
  auto Raw = GetReg(Op->Raw);
  auto Dest = GetReg(Node);

  // Cascade to calculate parity of bottom 8-bits to bottom bit.
  eor(ARMEmitter::Size::i32Bit, TMP1, Raw, Raw, ARMEmitter::ShiftType::LSR, 4);
  eor(ARMEmitter::Size::i32Bit, TMP1, TMP1, TMP1, ARMEmitter::ShiftType::LSR, 2);

  if (Op->Invert) {
    eon(ARMEmitter::Size::i32Bit, Dest, TMP1, TMP1, ARMEmitter::ShiftType::LSR, 1);
  } else {
    eor(ARMEmitter::Size::i32Bit, Dest, TMP1, TMP1, ARMEmitter::ShiftType::LSR, 1);
  }

  // The above sequence leaves garbage in the upper bits.
  if (Op->Mask) {
    and_(ARMEmitter::Size::i32Bit, Dest, Dest, 1);
  }
}

DEF_OP(CondAddNZCV) {
  auto Op = IROp->C<IR::IROp_CondAddNZCV>();

  ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV;
  uint64_t Const = 0;
  auto Src1 = GetZeroableReg(Op->Src1);

  if (IsInlineConstant(Op->Src2, &Const)) {
    ccmn(ConvertSize48(IROp), Src1, Const, Flags, MapCC(Op->Cond));
  } else {
    ccmn(ConvertSize48(IROp), Src1, GetReg(Op->Src2), Flags, MapCC(Op->Cond));
  }
}

DEF_OP(CondSubNZCV) {
  auto Op = IROp->C<IR::IROp_CondSubNZCV>();

  ARMEmitter::StatusFlags Flags = (ARMEmitter::StatusFlags)Op->FalseNZCV;
  uint64_t Const = 0;
  auto Src1 = GetZeroableReg(Op->Src1);

  if (IsInlineConstant(Op->Src2, &Const)) {
    ccmp(ConvertSize48(IROp), Src1, Const, Flags, MapCC(Op->Cond));
  } else {
    ccmp(ConvertSize48(IROp), Src1, GetReg(Op->Src2), Flags, MapCC(Op->Cond));
  }
}

DEF_OP(Neg) {
  auto Op = IROp->C<IR::IROp_Neg>();

  if (Op->Cond == IR::CondClass::AL) {
    neg(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src));
  } else {
    cneg(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src), MapCC(Op->Cond));
  }
}

DEF_OP(Mul) {
  auto Op = IROp->C<IR::IROp_Mul>();

  mul(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(UMul) {
  auto Op = IROp->C<IR::IROp_UMul>();

  mul(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2));
}

DEF_OP(UMull) {
  auto Op = IROp->C<IR::IROp_UMull>();
  umull(GetReg(Node).X(), GetReg(Op->Src1).W(), GetReg(Op->Src2).W());
}

DEF_OP(SMull) {
  auto Op = IROp->C<IR::IROp_SMull>();
  smull(GetReg(Node).X(), GetReg(Op->Src1).W(), GetReg(Op->Src2).W());
}

DEF_OP(MulH) {
  auto Op = IROp->C<IR::IROp_MulH>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit, "Unsupported {} size: {}", __func__, OpSize);

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);
  const auto Src2 = GetReg(Op->Src2);

  if (OpSize == IR::OpSize::i32Bit) {
    sxtw(TMP1, Src1.W());
    sxtw(TMP2, Src2.W());
    mul(ARMEmitter::Size::i32Bit, Dst, TMP1, TMP2);
    ubfx(ARMEmitter::Size::i32Bit, Dst, Dst, 32, 32);
  } else {
    smulh(Dst.X(), Src1.X(), Src2.X());
  }
}

DEF_OP(UMulH) {
  auto Op = IROp->C<IR::IROp_UMulH>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit, "Unsupported {} size: {}", __func__, OpSize);

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);
  const auto Src2 = GetReg(Op->Src2);

  if (OpSize == IR::OpSize::i32Bit) {
    uxtw(ARMEmitter::Size::i64Bit, TMP1, Src1);
    uxtw(ARMEmitter::Size::i64Bit, TMP2, Src2);
    mul(ARMEmitter::Size::i64Bit, Dst, TMP1, TMP2);
    ubfx(ARMEmitter::Size::i64Bit, Dst, Dst, 32, 32);
  } else {
    umulh(Dst.X(), Src1.X(), Src2.X());
  }
}

DEF_OP(Orlshl) {
  auto Op = IROp->C<IR::IROp_Orlshl>();
  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
    orr(ConvertSize(IROp), Dst, Src1, Const << Op->BitShift);
  } else {
    const auto Src2 = GetReg(Op->Src2);
    orr(ConvertSize(IROp), Dst, Src1, Src2, ARMEmitter::ShiftType::LSL, Op->BitShift);
  }
}

DEF_OP(Orlshr) {
  auto Op = IROp->C<IR::IROp_Orlshr>();

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
    orr(ConvertSize(IROp), Dst, Src1, Const >> Op->BitShift);
  } else {
    const auto Src2 = GetReg(Op->Src2);
    orr(ConvertSize(IROp), Dst, Src1, Src2, ARMEmitter::ShiftType::LSR, Op->BitShift);
  }
}

DEF_OP(Ornror) {
  auto Op = IROp->C<IR::IROp_Ornror>();

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);

  const auto Src2 = GetReg(Op->Src2);
  orn(ConvertSize(IROp), Dst, Src1, Src2, ARMEmitter::ShiftType::ROR, Op->BitShift);
}

DEF_OP(AndWithFlags) {
  auto Op = IROp->C<IR::IROp_AndWithFlags>();
  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  uint64_t Const;
  const auto Dst = GetReg(Node);
  auto Src1 = GetReg(Op->Src1);

  // See TestNZ
  if (OpSize < IR::OpSize::i32Bit) {
    if (IsInlineConstant(Op->Src2, &Const)) {
      and_(EmitSize, Dst, Src1, Const);
    } else {
      auto Src2 = GetReg(Op->Src2);

      if (Src1 != Src2) {
        and_(EmitSize, Dst, Src1, Src2);
      } else if (Dst != Src1) {
        mov(ARMEmitter::Size::i64Bit, Dst, Src1);
      }
    }

    unsigned Shift = 32 - IR::OpSizeAsBits(OpSize);
    cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift);
  } else {
    if (IsInlineConstant(Op->Src2, &Const)) {
      ands(EmitSize, Dst, Src1, Const);
    } else {
      const auto Src2 = GetReg(Op->Src2);
      ands(EmitSize, Dst, Src1, Src2);
    }
  }
}

DEF_OP(AndShift) {
  auto Op = IROp->C<IR::IROp_XorShift>();

  and_(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2), ConvertIRShiftType(Op->Shift), Op->ShiftAmount);
}

DEF_OP(XorShift) {
  auto Op = IROp->C<IR::IROp_XorShift>();

  eor(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2), ConvertIRShiftType(Op->Shift), Op->ShiftAmount);
}

DEF_OP(XornShift) {
  auto Op = IROp->C<IR::IROp_XornShift>();

  eon(ConvertSize48(IROp), GetReg(Node), GetReg(Op->Src1), GetReg(Op->Src2), ConvertIRShiftType(Op->Shift), Op->ShiftAmount);
}

DEF_OP(Ashr) {
  auto Op = IROp->C<IR::IROp_Ashr>();
  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);

  uint64_t Const;
  if (IsInlineConstant(Op->Src2, &Const)) {
    if (OpSize >= IR::OpSize::i32Bit) {
      asr(EmitSize, Dst, Src1, (unsigned int)Const);
    } else {
      sbfx(EmitSize, TMP1, Src1, 0, IR::OpSizeAsBits(OpSize));
      asr(EmitSize, Dst, TMP1, (unsigned int)Const);
      ubfx(EmitSize, Dst, Dst, 0, IR::OpSizeAsBits(OpSize));
    }
  } else {
    const auto Src2 = GetReg(Op->Src2);
    if (OpSize >= IR::OpSize::i32Bit) {
      asrv(EmitSize, Dst, Src1, Src2);
    } else {
      sbfx(EmitSize, TMP1, Src1, 0, IR::OpSizeAsBits(OpSize));
      asrv(EmitSize, Dst, TMP1, Src2);
      ubfx(EmitSize, Dst, Dst, 0, IR::OpSizeAsBits(OpSize));
    }
  }
}

DEF_OP(ShiftFlags) {
  auto Op = IROp->C<IR::IROp_ShiftFlags>();
  const auto OpSize = Op->Size;
  const auto EmitSize = OpSize == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  const auto PFOutput = GetReg(Node);
  const auto PFInput = GetReg(Op->PFInput);
  const auto Dst = GetReg(Op->Result);
  const auto Src1 = GetReg(Op->Src1);
  const auto Src2 = GetReg(Op->Src2);

  bool PFBlocked = (PFOutput == Dst) || (PFOutput == Src1) || (PFOutput == Src2);
  const auto PFTemp = PFBlocked ? TMP4 : PFOutput;

  // Set the output outside the branch to avoid needing an extra leg of the
  // branch. We specifically do not hardcode the PF register anywhere (relying
  // on a tied SRA register instead) to avoid fighting with RA.
  if (PFTemp != PFInput) {
    mov(ARMEmitter::Size::i64Bit, PFTemp, PFInput);
  }

  // We need to mask the source before comparing it. We don't just skip flag
  // updates for Src2=0 but anything that masks to zero.
  and_(ARMEmitter::Size::i32Bit, TMP1, Src2, OpSize == IR::OpSize::i64Bit ? 0x3f : 0x1f);

  ARMEmitter::ForwardLabel Done;
  (void)cbz(EmitSize, TMP1, &Done);
  {
    // PF/SF/ZF/OF
    if (OpSize >= IR::OpSize::i32Bit) {
      ands(EmitSize, PFTemp, Dst, Dst);
    } else {
      unsigned Shift = 32 - (IR::OpSizeToSize(OpSize) * 8);
      cmn(EmitSize, ARMEmitter::Reg::zr, Dst, ARMEmitter::ShiftType::LSL, Shift);
      mov(ARMEmitter::Size::i64Bit, PFTemp, Dst);
    }

    auto CFWord = TMP1;
    unsigned CFBit = 0;

    // Extract the last bit shifted in to CF
    if (Op->Shift == IR::ShiftType::LSL) {
      if (OpSize >= IR::OpSize::i32Bit) {
        neg(EmitSize, CFWord, Src2);
        lsrv(EmitSize, CFWord, Src1, CFWord);
      } else {
        CFWord = Dst.X();
        CFBit = IR::OpSizeToSize(OpSize) * 8;
      }
    } else {
      sub(ARMEmitter::Size::i64Bit, CFWord, Src2, 1);
      lsrv(EmitSize, CFWord, Src1, CFWord);
    }

    if (Op->InvertCF) {
      mvn(ARMEmitter::Size::i64Bit, TMP1, CFWord);
      CFWord = TMP1;
    }

    bool SetOF = Op->Shift != IR::ShiftType::ASR;
    if (SetOF) {
      // Only defined when Shift is 1 else undefined
      // OF flag is set if a sign change occurred
      eor(EmitSize, TMP3, Src1, Dst);
    }

    if (CTX->HostFeatures.SupportsFlagM) {
      rmif(CFWord, (CFBit - 1) % 64, (1 << 1) /* C */);

      if (SetOF) {
        rmif(TMP3, IR::OpSizeToSize(OpSize) * 8 - 1, (1 << 0) /* V */);
      }
    } else {
      mrs(TMP2, ARMEmitter::SystemRegister::NZCV);

      if (CFBit != 0) {
        lsr(ARMEmitter::Size::i64Bit, TMP1, CFWord, CFBit);
        CFWord = TMP1;
      }

      bfi(ARMEmitter::Size::i32Bit, TMP2, CFWord, 29 /* C */, 1);

      if (SetOF) {
        lsr(EmitSize, TMP3, TMP3, IR::OpSizeToSize(OpSize) * 8 - 1);
        bfi(ARMEmitter::Size::i32Bit, TMP2, TMP3, 28 /* V */, 1);
      }

      msr(ARMEmitter::SystemRegister::NZCV, TMP2);
    }
  }
  (void)Bind(&Done);

  // TODO: Make RA less dumb so this can't happen (e.g. with late-kill).
  if (PFOutput != PFTemp) {
    mov(ARMEmitter::Size::i64Bit, PFOutput, PFTemp);
  }
}

DEF_OP(RotateFlags) {
  auto Op = IROp->C<IR::IROp_RotateFlags>();
  const auto Result = GetReg(Op->Result);
  const auto Shift = GetReg(Op->Shift);
  const bool Left = Op->Left;
  const auto EmitSize = Op->Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  // If shift=0, flags are unaffected. Wrap the whole implementation in a cbz.
  ARMEmitter::ForwardLabel Done;
  (void)cbz(EmitSize, Shift, &Done);
  {
    // Extract the last bit shifted in to CF
    const auto BitSize = IR::OpSizeToSize(Op->Size) * 8;
    unsigned CFBit = Left ? 0 : BitSize - 1;

    // For ROR, OF is the XOR of the new CF bit and the most significant bit of the result.
    // For ROL, OF is the LSB and MSB XOR'd together.
    // OF is architecturally only defined for 1-bit rotate.
    eor(ARMEmitter::Size::i64Bit, TMP1, Result, Result, ARMEmitter::ShiftType::LSR, Left ? BitSize - 1 : 1);
    unsigned OFBit = Left ? 0 : BitSize - 2;

    // Invert result so we get inverted carry.
    mvn(ARMEmitter::Size::i64Bit, TMP2, Result);

    if (CTX->HostFeatures.SupportsFlagM) {
      rmif(TMP2, (CFBit - 1) % 64, 1 << 1 /* nzCv */);
      rmif(TMP1, OFBit, 1 << 0 /* nzcV */);
    } else {
      if (OFBit != 0) {
        lsr(EmitSize, TMP1, TMP1, OFBit);
      }
      if (CFBit != 0) {
        lsr(EmitSize, TMP2, TMP2, CFBit);
      }

      mrs(TMP3, ARMEmitter::SystemRegister::NZCV);
      bfi(ARMEmitter::Size::i32Bit, TMP3, TMP1, 28 /* V */, 1);
      bfi(ARMEmitter::Size::i32Bit, TMP3, TMP2, 29 /* C */, 1);
      msr(ARMEmitter::SystemRegister::NZCV, TMP3);
    }
  }
  (void)Bind(&Done);
}

DEF_OP(Extr) {
  auto Op = IROp->C<IR::IROp_Extr>();
  const auto Dst = GetReg(Node);
  const auto Upper = GetReg(Op->Upper);
  const auto Lower = GetReg(Op->Lower);

  extr(ConvertSize48(IROp), Dst, Upper, Lower, Op->LSB);
}

DEF_OP(PDep) {
  auto Op = IROp->C<IR::IROp_PExt>();
  const auto EmitSize = ConvertSize48(IROp);

  const auto Dest = GetReg(Node);

  // We can't clobber these
  const auto OrigInput = GetReg(Op->Input);
  const auto OrigMask = GetReg(Op->Mask);

  if (CTX->HostFeatures.SupportsSVEBitPerm) {
    // SVE added support for PDEP but it needs to be done in a vector register.
    if (EmitSize == ARMEmitter::Size::i32Bit) {
      fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), OrigInput.W());
      fmov(ARMEmitter::Size::i32Bit, VTMP2.S(), OrigMask.W());
      bdep(ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), VTMP1.Z(), VTMP2.Z());
      umov<ARMEmitter::SubRegSize::i32Bit>(Dest, VTMP1, 0);
    } else {
      fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), OrigInput.X());
      fmov(ARMEmitter::Size::i64Bit, VTMP2.D(), OrigMask.X());
      bdep(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), VTMP1.Z(), VTMP2.Z());
      umov<ARMEmitter::SubRegSize::i64Bit>(Dest, VTMP1, 0);
    }
  } else {
    // PDep implementation follows the ideas from
    // http://0x80.pl/articles/pdep-soft-emu.html ... Basically, iterate the *set*
    // bits only, which will be faster than the naive implementation as long as
    // there are enough holes in the mask.
    //
    // The specific arm64 assembly used is based on the sequence that clang
    // generates for the C code, giving context to the scheduling yielding better
    // ILP than I would do by hand. The registers are allocated by hand however,
    // to fit within the tight constraints we have here withot spilling. Also, we
    // use cbz/cbnz for conditional branching to avoid clobbering NZCV.

    // So we have shadow as temporaries
    const auto Input = TMP1.R();
    const auto Mask = TMP2.R();

    // these get used variously as scratch
    const auto T0 = TMP3.R();
    const auto T1 = TMP4.R();

    ARMEmitter::BackwardLabel NextBit;
    ARMEmitter::ForwardLabel Done;

    // First, copy the input/mask, since we'll be clobbering. Copy as 64-bit to
    // make this 0-uop on Firestorm.
    mov(ARMEmitter::Size::i64Bit, Input, OrigInput);
    mov(ARMEmitter::Size::i64Bit, Mask, OrigMask);

    // Now, they're copied, so we can start setting Dest (even if it overlaps with
    // one of them).  Handle early exit case
    mov(EmitSize, Dest, 0);
    (void)cbz(EmitSize, OrigMask, &Done);

    // Setup for first iteration
    neg(EmitSize, T0, Mask);
    and_(EmitSize, T0, T0, Mask);

    // Main loop
    (void)Bind(&NextBit);
    sbfx(EmitSize, T1, Input, 0, 1);
    eor(EmitSize, Mask, Mask, T0);
    and_(EmitSize, T0, T1, T0);
    neg(EmitSize, T1, Mask);
    orr(EmitSize, Dest, Dest, T0);
    lsr(EmitSize, Input, Input, 1);
    and_(EmitSize, T0, Mask, T1);
    (void)cbnz(EmitSize, T0, &NextBit);

    // All done with nothing to do.
    (void)Bind(&Done);
  }
}

DEF_OP(PExt) {
  auto Op = IROp->C<IR::IROp_PExt>();
  const auto OpSize = IROp->Size;
  const auto OpSizeBitsM1 = IR::OpSizeAsBits(OpSize) - 1;
  const auto EmitSize = ConvertSize48(IROp);

  const auto Input = GetReg(Op->Input);
  const auto Mask = GetReg(Op->Mask);
  const auto Dest = GetReg(Node);

  if (CTX->HostFeatures.SupportsSVEBitPerm) {
    // SVE added support for PEXT but it needs to be done in a vector register.
    if (EmitSize == ARMEmitter::Size::i32Bit) {
      fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Input.W());
      fmov(ARMEmitter::Size::i32Bit, VTMP2.S(), Mask.W());
      bext(ARMEmitter::SubRegSize::i32Bit, VTMP1.Z(), VTMP1.Z(), VTMP2.Z());
      umov<ARMEmitter::SubRegSize::i32Bit>(Dest, VTMP1, 0);
    } else {
      fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Input.X());
      fmov(ARMEmitter::Size::i64Bit, VTMP2.D(), Mask.X());
      bext(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), VTMP1.Z(), VTMP2.Z());
      umov<ARMEmitter::SubRegSize::i64Bit>(Dest, VTMP1, 0);
    }
  } else {
    const auto MaskReg = TMP1;
    const auto BitReg = TMP2;
    const auto ValueReg = TMP3;

    ARMEmitter::ForwardLabel EarlyExit;
    ARMEmitter::BackwardLabel NextBit;
    ARMEmitter::ForwardLabel Done;

    (void)cbz(EmitSize, Mask, &EarlyExit);
    mov(EmitSize, MaskReg, Mask);
    mov(EmitSize, ValueReg, Input);
    mov(EmitSize, Dest, ARMEmitter::Reg::zr);

    // Main loop
    (void)Bind(&NextBit);
    (void)cbz(EmitSize, MaskReg, &Done);
    clz(EmitSize, BitReg, MaskReg);
    lslv(EmitSize, ValueReg, ValueReg, BitReg);
    lslv(EmitSize, MaskReg, MaskReg, BitReg);
    extr(EmitSize, Dest, Dest, ValueReg, OpSizeBitsM1);
    bfc(EmitSize, MaskReg, OpSizeBitsM1, 1);
    (void)b(&NextBit);

    // Early exit
    (void)Bind(&EarlyExit);
    mov(EmitSize, Dest, ARMEmitter::Reg::zr);

    // All done with nothing to do.
    (void)Bind(&Done);
  }
}

DEF_OP(Div) {
  auto Op = IROp->C<IR::IROp_Div>();
  const auto OpSize = IROp->Size;

  const auto Quotient = GetReg(Op->OutQuotient);
  const auto Remainder = GetReg(Op->OutRemainder);
  auto Lower = GetReg(Op->Lower);
  auto Divisor = GetReg(Op->Divisor);

  if (Op->Upper.IsInvalid()) {
    const auto EmitSize = ConvertSize(IROp);

    if (OpSize == IR::OpSize::i8Bit) {
      sxtb(EmitSize, TMP1, Lower);
      sxtb(EmitSize, TMP2, Divisor);

      Lower = TMP1;
      Divisor = TMP2;
    } else if (OpSize == IR::OpSize::i16Bit) {
      sxth(EmitSize, TMP1, Lower);
      sxth(EmitSize, TMP2, Divisor);

      Lower = TMP1;
      Divisor = TMP2;
    }

    sdiv(EmitSize, Quotient, Lower, Divisor);
    msub(EmitSize, Remainder, Quotient, Divisor, Lower);
    return;
  }

  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
  const auto Upper = GetReg(Op->Upper);

  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64
  switch (OpSize) {
  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    sxth(EmitSize, TMP2, Divisor);
    sdiv(EmitSize, Quotient, TMP1, TMP2);
    msub(EmitSize, Remainder, Quotient, TMP2, TMP1);
    break;
  }
  case IR::OpSize::i32Bit: {
    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
    sxtw(TMP2, Divisor.W());
    sdiv(EmitSize, Quotient, TMP1, TMP2);
    msub(EmitSize, Remainder, Quotient, TMP2, TMP1);
    break;
  }
  case IR::OpSize::i64Bit: {
    ARMEmitter::ForwardLabel Only64Bit {};
    ARMEmitter::ForwardLabel LongDIVRet {};

    // Check if the upper bits match the top bit of the lower 64-bits
    // Sign extend the top bit of lower bits
    sbfx(EmitSize, TMP1, Lower, 63, 1);
    eor(EmitSize, TMP1, TMP1, Upper);

    // If the sign bit matches then the result is zero
    (void)cbz(EmitSize, TMP1, &Only64Bit);

    // Long divide
    {
      mov(EmitSize, TMP1, Upper);
      mov(EmitSize, TMP2, Lower);
      mov(EmitSize, TMP3, Divisor);

      ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.LDIVHandler));

      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
      blr(TMP4);
      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);

      // Move results to the destination registers
      mov(EmitSize, Quotient, TMP1);
      mov(EmitSize, Remainder, TMP2);

      // Skip 64-bit path
      (void)b(&LongDIVRet);
    }

    (void)Bind(&Only64Bit);
    // 64-Bit only
    {
      sdiv(EmitSize, Quotient, Lower, Divisor);
      msub(EmitSize, Remainder, Quotient, Divisor, Lower);
    }

    (void)Bind(&LongDIVRet);
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown DIV Size: {}", OpSize); break;
  }
}

DEF_OP(UDiv) {
  auto Op = IROp->C<IR::IROp_UDiv>();
  const auto OpSize = IROp->Size;

  const auto Quotient = GetReg(Op->OutQuotient);
  const auto Remainder = GetReg(Op->OutRemainder);
  const auto Lower = GetReg(Op->Lower);
  const auto Divisor = GetReg(Op->Divisor);

  // Each source is OpSize in size
  // So you can have up to a 128bit divide from x86-64=
  if (Op->Upper.IsInvalid()) {
    const auto EmitSize = ConvertSize(IROp);
    udiv(EmitSize, Quotient, Lower, Divisor);
    msub(EmitSize, Remainder, Quotient, Divisor, Lower);
    return;
  }

  const auto EmitSize = OpSize >= IR::OpSize::i32Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
  const auto Upper = GetReg(Op->Upper);

  switch (OpSize) {
  case IR::OpSize::i16Bit: {
    uxth(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 16, 16);
    udiv(EmitSize, Quotient, TMP1, Divisor);
    msub(EmitSize, Remainder, Quotient, Divisor, TMP1);
    break;
  }
  case IR::OpSize::i32Bit: {
    // We need to mask divisor if we have Upper bits, since the frontend does
    // not on the hope that we can optimize to use the path above.
    mov(ARMEmitter::Size::i32Bit, TMP2, Divisor);

    // TODO: 32-bit operation should be guaranteed not to leave garbage in the upper bits.
    mov(EmitSize, TMP1, Lower);
    bfi(EmitSize, TMP1, Upper, 32, 32);
    udiv(EmitSize, Quotient, TMP1, TMP2);
    msub(EmitSize, Remainder, Quotient, TMP2, TMP1);
    break;
  }
  case IR::OpSize::i64Bit: {
    ARMEmitter::ForwardLabel Only64Bit {};
    ARMEmitter::ForwardLabel LongDIVRet {};

    // Check the upper bits for zero
    // If the upper bits are zero then we can do a 64-bit divide
    (void)cbz(EmitSize, Upper, &Only64Bit);

    // Long divide
    {
      mov(EmitSize, TMP1, Upper);
      mov(EmitSize, TMP2, Lower);
      mov(EmitSize, TMP3, Divisor);

      ldr(TMP4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.LUDIVHandler));

      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
      blr(TMP4);
      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);

      // Move results to the destination registers
      mov(EmitSize, Quotient, TMP1);
      mov(EmitSize, Remainder, TMP2);

      // Skip 64-bit path
      (void)b(&LongDIVRet);
    }

    (void)Bind(&Only64Bit);
    // 64-Bit only
    {
      udiv(EmitSize, Quotient, Lower, Divisor);
      msub(EmitSize, Remainder, Quotient, Divisor, Lower);
    }

    (void)Bind(&LongDIVRet);
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown LUDIV Size: {}", OpSize); break;
  }
}

DEF_OP(Not) {
  auto Op = IROp->C<IR::IROp_Not>();

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  mvn(ConvertSize48(IROp), Dst, Src);
}

DEF_OP(Popcount) {
  auto Op = IROp->C<IR::IROp_Popcount>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  if (CTX->HostFeatures.SupportsCSSC) {
    switch (OpSize) {
    case IR::OpSize::i8Bit:
      uxtb(ARMEmitter::Size::i32Bit, Dst, Src);
      cnt(ARMEmitter::Size::i32Bit, Dst, Dst);
      break;
    case IR::OpSize::i16Bit:
      uxth(ARMEmitter::Size::i32Bit, Dst, Src);
      cnt(ARMEmitter::Size::i32Bit, Dst, Dst);
      break;
    case IR::OpSize::i32Bit: cnt(ARMEmitter::Size::i32Bit, Dst, Src); break;
    case IR::OpSize::i64Bit: cnt(ARMEmitter::Size::i64Bit, Dst, Src); break;
    default: LOGMAN_MSG_A_FMT("Unsupported Popcount size: {}", OpSize);
    }
  } else {
    switch (OpSize) {
    case IR::OpSize::i8Bit:
      fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
      // only use lowest byte
      cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      break;
    case IR::OpSize::i16Bit:
      fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
      cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      // only count two lowest bytes
      addp(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D(), VTMP1.D());
      break;
    case IR::OpSize::i32Bit:
      fmov(ARMEmitter::Size::i32Bit, VTMP1.S(), Src);
      cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      // fmov has zero extended, unused bytes are zero
      addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      break;
    case IR::OpSize::i64Bit:
      fmov(ARMEmitter::Size::i64Bit, VTMP1.D(), Src);
      cnt(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      // fmov has zero extended, unused bytes are zero
      addv(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());
      break;
    default: LOGMAN_MSG_A_FMT("Unsupported Popcount size: {}", OpSize);
    }

    umov<ARMEmitter::SubRegSize::i8Bit>(Dst, VTMP1, 0);
  }
}

DEF_OP(FindLSB) {
  auto Op = IROp->C<IR::IROp_FindLSB>();
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  // We assume the source is nonzero, so we can just rbit+clz without worrying
  // about upper garbage for smaller types.
  rbit(EmitSize, TMP1, Src);
  clz(EmitSize, Dst, TMP1);
}

DEF_OP(FindMSB) {
  auto Op = IROp->C<IR::IROp_FindMSB>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
                     "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  movz(ARMEmitter::Size::i64Bit, TMP1, IR::OpSizeAsBits(OpSize) - 1);

  if (OpSize == IR::OpSize::i16Bit) {
    lsl(EmitSize, Dst, Src, 16);
    clz(EmitSize, Dst, Dst);
  } else {
    clz(EmitSize, Dst, Src);
  }

  sub(ARMEmitter::Size::i64Bit, Dst, TMP1, Dst);
}

DEF_OP(FindTrailingZeroes) {
  auto Op = IROp->C<IR::IROp_FindTrailingZeroes>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
                     "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  rbit(EmitSize, Dst, Src);

  if (OpSize == IR::OpSize::i16Bit) {
    // This orr does two things. First, if the (masked) source is zero, it
    // reverses to zero in the top so it forces clz to return 16. Second, it
    // ensures garbage in the upper bits of the source don't affect clz, because
    // they'll rbit to garbage in the bottom below the 0x8000 and be ignored by
    // the clz. So we handle Src upper garbage without explicitly masking.
    orr(EmitSize, Dst, Dst, 0x8000);
  }

  clz(EmitSize, Dst, Dst);
}

DEF_OP(CountLeadingZeroes) {
  auto Op = IROp->C<IR::IROp_CountLeadingZeroes>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
                     "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  if (OpSize == IR::OpSize::i16Bit) {
    // Expressing as lsl+orr+clz clears away any garbage in the upper bits
    // (alternatively could do uxth+clz+sub.. equal cost in total).
    lsl(EmitSize, Dst, Src, 16);
    orr(EmitSize, Dst, Dst, 0x8000);
    clz(EmitSize, Dst, Dst);
  } else {
    clz(EmitSize, Dst, Src);
  }
}

DEF_OP(Rev) {
  auto Op = IROp->C<IR::IROp_Rev>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit,
                     "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  rev(EmitSize, Dst, Src);
  if (OpSize == IR::OpSize::i16Bit) {
    lsr(EmitSize, Dst, Dst, 16);
  }
}

DEF_OP(Rbit) {
  auto Op = IROp->C<IR::IROp_Rbit>();
  const auto OpSize = IROp->Size;

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i64Bit, "Unsupported {} size: {}", __func__, OpSize);
  const auto EmitSize = ConvertSize48(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  rbit(EmitSize, Dst, Src);
}

DEF_OP(Bfi) {
  auto Op = IROp->C<IR::IROp_Bfi>();
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto SrcDst = GetReg(Op->Dest);
  const auto Src = GetReg(Op->Src);

  if (Dst == SrcDst) {
    // If Dst and SrcDst match then this turns in to a simple BFI instruction.
    bfi(EmitSize, Dst, Src, Op->lsb, Op->Width);
  } else if (Dst != Src) {
    // If the destination isn't the source then we can move the DstSrc and insert directly.
    //
    // The move is 64-bit to allow register renaming, the upper bits don't
    // matter because of the bfi's EmitSize.
    mov(ARMEmitter::Size::i64Bit, Dst, SrcDst);
    bfi(EmitSize, Dst, Src, Op->lsb, Op->Width);
  } else {
    // Destination didn't match the dst source register.
    // TODO: Inefficient until FEX can have RA constraints here.
    mov(EmitSize, TMP1, SrcDst);
    bfi(EmitSize, TMP1, Src, Op->lsb, Op->Width);

    if (IROp->Size >= IR::OpSize::i32Bit) {
      mov(EmitSize, Dst, TMP1.R());
    } else {
      ubfx(EmitSize, Dst, TMP1, 0, IR::OpSizeAsBits(IROp->Size));
    }
  }
}

DEF_OP(Bfxil) {
  auto Op = IROp->C<IR::IROp_Bfxil>();
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto SrcDst = GetReg(Op->Dest);
  const auto Src = GetReg(Op->Src);

  if (Dst == SrcDst) {
    // If Dst and SrcDst match then this turns in to a single instruction.
    bfxil(EmitSize, Dst, Src, Op->lsb, Op->Width);
  } else if (Dst != Src) {
    // If the destination isn't the source then we can move the DstSrc and insert directly.
    mov(EmitSize, Dst, SrcDst);
    bfxil(EmitSize, Dst, Src, Op->lsb, Op->Width);
  } else {
    // Destination didn't match the dst source register.
    // TODO: Inefficient until FEX can have RA constraints here.
    mov(EmitSize, TMP1, SrcDst);
    bfxil(EmitSize, TMP1, Src, Op->lsb, Op->Width);
    mov(EmitSize, Dst, TMP1.R());
  }
}

DEF_OP(Bfe) {
  auto Op = IROp->C<IR::IROp_Bfe>();
  LOGMAN_THROW_A_FMT(IROp->Size <= IR::OpSize::i64Bit, "OpSize is too large for BFE: {}", IROp->Size);
  LOGMAN_THROW_A_FMT(Op->Width != 0, "Invalid BFE width of 0");
  const auto EmitSize = ConvertSize(IROp);

  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  if (Op->lsb == 0 && Op->Width == 32) {
    mov(ARMEmitter::Size::i32Bit, Dst, Src);
  } else if (Op->lsb == 0 && Op->Width == 64) {
    LOGMAN_THROW_A_FMT(IROp->Size == IR::OpSize::i64Bit, "Must be 64-bit wide register");
    mov(ARMEmitter::Size::i64Bit, Dst, Src);
  } else {
    ubfx(EmitSize, Dst, Src, Op->lsb, Op->Width);
  }
}

DEF_OP(Sbfe) {
  auto Op = IROp->C<IR::IROp_Sbfe>();
  const auto Dst = GetReg(Node);
  const auto Src = GetReg(Op->Src);

  sbfx(ConvertSize(IROp), Dst, Src, Op->lsb, Op->Width);
}

DEF_OP(MaskGenerateFromBitWidth) {
  auto Op = IROp->C<IR::IROp_MaskGenerateFromBitWidth>();
  auto BitWidth = GetReg(Op->BitWidth);

  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, -1);
  cmp(ARMEmitter::Size::i64Bit, BitWidth, 0);
  lslv(ARMEmitter::Size::i64Bit, TMP2, TMP1, BitWidth);
  csinv(ARMEmitter::Size::i64Bit, GetReg(Node), TMP1, TMP2, ARMEmitter::Condition::CC_EQ);
}

DEF_OP(Select) {
  auto Op = IROp->C<IR::IROp_Select>();
  const auto OpSize = IROp->Size;
  const auto EmitSize = ConvertSize(IROp);
  const auto CompareEmitSize = Op->CompareSize == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;

  uint64_t Const;
  auto cc = MapCC(Op->Cond);

  if (IsGPR(Op->Cmp1)) {
    const auto Src1 = GetReg(Op->Cmp1);

    if (IsInlineConstant(Op->Cmp2, &Const)) {
      cmp(CompareEmitSize, Src1, Const);
    } else {
      const auto Src2 = GetReg(Op->Cmp2);
      cmp(CompareEmitSize, Src1, Src2);
    }
  } else if (IsFPR(Op->Cmp1)) {
    const auto Src1 = GetVReg(Op->Cmp1);
    const auto Src2 = GetVReg(Op->Cmp2);
    fcmp(Op->CompareSize == IR::OpSize::i64Bit ? ARMEmitter::ScalarRegSize::i64Bit : ARMEmitter::ScalarRegSize::i32Bit, Src1, Src2);
  } else {
    LOGMAN_MSG_A_FMT("Select: Expected GPR or FPR");
  }

  uint64_t const_true, const_false;
  bool is_const_true = IsInlineConstant(Op->TrueVal, &const_true);
  bool is_const_false = IsInlineConstant(Op->FalseVal, &const_false);

  uint64_t all_ones = OpSize == IR::OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;

  ARMEmitter::Register Dst = GetReg(Node);

  if (is_const_true || is_const_false) {
    if (is_const_false != true || is_const_true != true || !(const_true == 1 || const_true == all_ones) || const_false != 0) {
      LOGMAN_MSG_A_FMT("Select: Unsupported compare inline parameters");
    }

    if (const_true == all_ones) {
      csetm(EmitSize, Dst, cc);
    } else {
      cset(EmitSize, Dst, cc);
    }
  } else {
    csel(EmitSize, Dst, GetReg(Op->TrueVal), GetReg(Op->FalseVal), cc);
  }
}

DEF_OP(NZCVSelect) {
  auto Op = IROp->C<IR::IROp_NZCVSelect>();
  const auto EmitSize = ConvertSize(IROp);

  auto cc = MapCC(Op->Cond);

  uint64_t const_true, const_false;
  bool is_const_true = IsInlineConstant(Op->TrueVal, &const_true);
  bool is_const_false = IsInlineConstant(Op->FalseVal, &const_false);

  uint64_t all_ones = IROp->Size == IR::OpSize::i64Bit ? 0xffff'ffff'ffff'ffffull : 0xffff'ffffull;

  ARMEmitter::Register Dst = GetReg(Node);

  if (is_const_true) {
    if (is_const_false != true || !(const_true == 1 || const_true == all_ones) || const_false != 0) {
      LOGMAN_MSG_A_FMT("NZCVSelect: Unsupported constant");
    }

    if (const_true == all_ones) {
      csetm(EmitSize, Dst, cc);
    } else {
      cset(EmitSize, Dst, cc);
    }
  } else {
    csel(EmitSize, Dst, GetReg(Op->TrueVal), GetZeroableReg(Op->FalseVal), cc);
  }
}

DEF_OP(NZCVSelectV) {
  auto Op = IROp->C<IR::IROp_NZCVSelectV>();

  auto cc = MapCC(Op->Cond);
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  fcsel(SubRegSize.Scalar, GetVReg(Node), GetVReg(Op->TrueVal), GetVReg(Op->FalseVal), cc);
}

DEF_OP(NZCVSelectIncrement) {
  auto Op = IROp->C<IR::IROp_NZCVSelectIncrement>();

  csinc(ConvertSize(IROp), GetReg(Node), GetReg(Op->TrueVal), GetZeroableReg(Op->FalseVal), MapCC(Op->Cond));
}

DEF_OP(VExtractToGPR) {
  const auto Op = IROp->C<IR::IROp_VExtractToGPR>();
  const auto OpSize = IROp->Size;

  constexpr auto AVXRegBitSize = Core::CPUState::XMM_AVX_REG_SIZE * 8;
  constexpr auto SSERegBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
  const auto ElementSizeBits = IR::OpSizeAsBits(Op->Header.ElementSize);

  const auto Offset = ElementSizeBits * Op->Index;
  const auto Is256Bit = Offset >= SSERegBitSize;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  const auto PerformMove = [&](const ARMEmitter::VRegister reg, int index) {
    switch (OpSize) {
    case IR::OpSize::i8Bit: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
    case IR::OpSize::i16Bit: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
    case IR::OpSize::i32Bit: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
    case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
    default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", OpSize); break;
    }
  };

  if (Offset < SSERegBitSize) {
    // Desired data lies within the lower 128-bit lane, so we
    // can treat the operation as a 128-bit operation, even
    // when acting on larger register sizes.
    PerformMove(Vector, Op->Index);
  } else {
    LOGMAN_THROW_A_FMT(Is256Bit, "Can't perform 256-bit extraction with op side: {}", OpSize);
    LOGMAN_THROW_A_FMT(Offset < AVXRegBitSize, "Trying to extract element outside bounds of register. Offset={}, Index={}", Offset, Op->Index);

    // We need to use the upper 128-bit lane, so lets move it down.
    // Inverting our dedicated predicate for 128-bit operations selects
    // all of the top lanes. We can then compact those into a temporary.
    const auto CompactPred = ARMEmitter::PReg::p0;
    not_(CompactPred, PRED_TMP_32B.Zeroing(), PRED_TMP_16B);
    compact(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), CompactPred, Vector.Z());

    // Sanitize the zero-based index to work on the now-moved
    // upper half of the vector.
    const auto SanitizedIndex = [OpSize, Op] {
      switch (OpSize) {
      case IR::OpSize::i8Bit: return Op->Index - 16;
      case IR::OpSize::i16Bit: return Op->Index - 8;
      case IR::OpSize::i32Bit: return Op->Index - 4;
      case IR::OpSize::i64Bit: return Op->Index - 2;
      default: LOGMAN_MSG_A_FMT("Unhandled OpSize: {}", OpSize); return 0;
      }
    }();

    // Move the value from the now-low-lane data.
    PerformMove(VTMP1, SanitizedIndex);
  }
}

DEF_OP(Float_ToGPR_ZS) {
  auto Op = IROp->C<IR::IROp_Float_ToGPR_ZS>();

  ARMEmitter::Register Dst = GetReg(Node);
  ARMEmitter::VRegister Src = GetVReg(Op->Scalar);

  if (Op->SrcElementSize == IR::OpSize::i64Bit) {
    fcvtzs(ConvertSize(IROp), Dst, Src.D());
  } else {
    fcvtzs(ConvertSize(IROp), Dst, Src.S());
  }
}

DEF_OP(Float_ToGPR_S) {
  auto Op = IROp->C<IR::IROp_Float_ToGPR_S>();

  ARMEmitter::Register Dst = GetReg(Node);
  ARMEmitter::VRegister Src = GetVReg(Op->Scalar);

  if (Op->SrcElementSize == IR::OpSize::i64Bit) {
    frinti(VTMP1.D(), Src.D());
    fcvtzs(ConvertSize(IROp), Dst, VTMP1.D());
  } else {
    frinti(VTMP1.S(), Src.S());
    fcvtzs(ConvertSize(IROp), Dst, VTMP1.S());
  }
}

DEF_OP(FCmp) {
  auto Op = IROp->C<IR::IROp_FCmp>();
  const auto EmitSubSize = Op->ElementSize == IR::OpSize::i64Bit ? ARMEmitter::ScalarRegSize::i64Bit : ARMEmitter::ScalarRegSize::i32Bit;

  ARMEmitter::VRegister Scalar1 = GetVReg(Op->Scalar1);
  ARMEmitter::VRegister Scalar2 = GetVReg(Op->Scalar2);

  fcmp(EmitSubSize, Scalar1, Scalar2);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/Arm64Relocations.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
desc: relocation logic of the arm64 splatter backend
$end_info$
*/
#include "Interface/Context/Context.h"
#include "Interface/Core/JIT/JITClass.h"

#include <FEXCore/Core/Thunks.h>

namespace FEXCore::CPU {
uint64_t GetNamedSymbolLiteral(FEXCore::Context::ContextImpl& CTX, FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol Op) {
  switch (Op) {
  case FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol::SYMBOL_LITERAL_EXITFUNCTION_LINKER:
    return CTX.Dispatcher->GetExitFunctionLinkerAddress();

  default: ERROR_AND_DIE_FMT("Unknown named symbol literal: {}", static_cast<uint32_t>(Op));
  }
}

void Arm64JITCore::InsertNamedThunkRelocation(ARMEmitter::Register Reg, const IR::SHA256Sum& Sum) {
  Relocation MoveABI {};
  MoveABI.NamedThunkMove.Header = {.Offset = GetCursorOffset(), .Type = FEXCore::CPU::RelocationTypes::RELOC_NAMED_THUNK_MOVE};
  MoveABI.NamedThunkMove.Symbol = Sum;
  MoveABI.NamedThunkMove.RegisterIndex = Reg.Idx();

  uint64_t Pointer = reinterpret_cast<uint64_t>(EmitterCTX->ThunkHandler->LookupThunk(Sum));

  // Pointers are required to fit within 48-bit VA space.
  // TODO: Force 6-byte `MaxSize`, with zext extension to 64-bit. Current code not smart enough to handle negatives.
  LoadConstant(ARMEmitter::Size::i64Bit, Reg, Pointer, FEXCore::CPU::Arm64Emitter::PadType::AUTOPAD);
  Relocations.emplace_back(MoveABI);
}

Arm64JITCore::NamedSymbolLiteralPair Arm64JITCore::InsertNamedSymbolLiteral(FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol Op) {
  uint64_t Pointer = GetNamedSymbolLiteral(*CTX, Op);

  NamedSymbolLiteralPair Lit {
    .Lit = Pointer,
    .MoveABI =
      {
        .NamedSymbolLiteral =
          {
            .Header =
              {
                .Offset = 0, // Set by PlaceNamedSymbolLiteral
                .Type = FEXCore::CPU::RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL,
              },
            .Symbol = Op,
          },
      },
  };
  return Lit;
}

void Arm64JITCore::PlaceNamedSymbolLiteral(NamedSymbolLiteralPair Lit) {
  switch (Lit.MoveABI.Header.Type) {
  case RelocationTypes::RELOC_NAMED_SYMBOL_LITERAL:
  case RelocationTypes::RELOC_GUEST_RIP_LITERAL: {
    Lit.MoveABI.Header.Offset = GetCursorOffset();
    break;
  }

  default: ERROR_AND_DIE_FMT("Unknown relocation type for {}", __FUNCTION__);
  }

  BindOrRestart(&Lit.Loc);
  dc64(Lit.Lit);
  Relocations.emplace_back(Lit.MoveABI);
}

auto Arm64JITCore::InsertGuestRIPLiteral(uint64_t GuestRIP) -> NamedSymbolLiteralPair {
  return {
    .Lit = GuestRIP,
    .MoveABI =
      {
        .GuestRIP = {.Header =
                       {
                         .Offset = 0, // Set by PlaceNamedSymbolLiteral
                         .Type = FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_LITERAL,
                       },
                     // NOTE: Cache serialization will subtract the guest binary base address later to produce consistency results
                     .GuestRIP = GuestRIP},
      },
  };
}

void Arm64JITCore::InsertGuestRIPMove(ARMEmitter::Register Reg, uint64_t Constant) {
  Relocation MoveABI {};
  MoveABI.GuestRIP.Header = {.Offset = GetCursorOffset(), .Type = FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE};
  // NOTE: Cache serialization will subtract the guest binary base address later to produce consistency results
  MoveABI.GuestRIP.GuestRIP = Constant;
  MoveABI.GuestRIP.RegisterIndex = Reg.Idx();

  // Pointers are required to fit within 48-bit VA space.
  // TODO: Force 6-byte `MaxSize`, with sign extension to 64-bit. Current code not smart enough to handle negatives.
  // 48-bit sign extension works because x86-64 guests only receive 47-bit VA space, with 48-bit being reserved for kernel.
  // Additional quirk, "canonical" 48-bit pointers on x86-64, sign extend the 48-bit as well (Which is why kernel pointers are negative).
  LoadConstant(ARMEmitter::Size::i64Bit, Reg, Constant, FEXCore::CPU::Arm64Emitter::PadType::AUTOPAD);
  Relocations.emplace_back(MoveABI);
}

fextl::vector<FEXCore::CPU::Relocation> Arm64JITCore::TakeRelocations(uint64_t GuestBaseAddress) {
  // Rebase relocations to library base address
  for (auto& Relocation : Relocations) {
    switch (Relocation.Header.Type) {
    case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_MOVE:
    case FEXCore::CPU::RelocationTypes::RELOC_GUEST_RIP_LITERAL: {
      Relocation.GuestRIP.GuestRIP -= GuestBaseAddress;
      break;
    }
    default:;
    }
  }

  return std::move(Relocations);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/AtomicOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Context/Context.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/JIT/JITClass.h"

namespace FEXCore::CPU {
DEF_OP(CASPair) {
  auto Op = IROp->C<IR::IROp_CASPair>();
  LOGMAN_THROW_A_FMT(IROp->ElementSize == IR::OpSize::i32Bit || IROp->ElementSize == IR::OpSize::i64Bit, "Wrong element size");
  // Size is the size of each pair element
  auto Dst0 = GetReg(Op->OutLo);
  auto Dst1 = GetReg(Op->OutHi);
  auto Expected0 = GetReg(Op->ExpectedLo);
  auto Expected1 = GetReg(Op->ExpectedHi);
  auto Desired0 = GetReg(Op->DesiredLo);
  auto Desired1 = GetReg(Op->DesiredHi);
  auto MemSrc = GetReg(Op->Addr);

  const auto EmitSize = IROp->ElementSize == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
  if (CTX->HostFeatures.SupportsAtomics) {
    // RA has heuristics to try to pair sources, but we need to handle the cases
    // where they fail. We do so by moving to temporaries. Note we use 64-bit
    // moves here even for 32-bit cmpxchg, for the Firestorm register renamer.
    if (Desired1.Idx() != (Desired0.Idx() + 1) || Desired0.Idx() & 1) {
      mov(ARMEmitter::Size::i64Bit, TMP1, Desired0);
      mov(ARMEmitter::Size::i64Bit, TMP2, Desired1);
      Desired0 = TMP1;
      Desired1 = TMP2;
    }

    auto CaspalDst0 = Dst0;
    auto CaspalDst1 = Dst1;
    if (CaspalDst1.Idx() != (CaspalDst0.Idx() + 1) || CaspalDst0.Idx() & 1) {
      CaspalDst0 = TMP3;
      CaspalDst1 = TMP4;
    }

    // We can't clobber the source, these moves are inherently required due to
    // ISA limitations. But by making them 64-bit, Firestorm can rename.
    mov(ARMEmitter::Size::i64Bit, CaspalDst0, Expected0);
    mov(ARMEmitter::Size::i64Bit, CaspalDst1, Expected1);
    caspal(EmitSize, CaspalDst0, CaspalDst1, Desired0, Desired1, MemSrc);

    if (CaspalDst0 != Dst0) {
      mov(ARMEmitter::Size::i64Bit, Dst0, CaspalDst0);
      mov(ARMEmitter::Size::i64Bit, Dst1, CaspalDst1);
    }
  } else {
    // Save NZCV so we don't have to mark this op as clobbering NZCV (the
    // SupportsAtomics does not clobber atomics and this !SupportsAtomics path
    // is so slow it's not worth the complexity of splitting the IR op.). We
    // clobber NZCV inside the hot loop and we can't replace cmp/ccmp/b.ne with
    // something NZCV-preserving without requiring an extra instruction.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    ARMEmitter::BackwardLabel LoopTop;
    ARMEmitter::ForwardLabel LoopNotExpected;
    ARMEmitter::ForwardLabel LoopExpected;
    (void)Bind(&LoopTop);

    // This instruction sequence must be synced with HandleCASPAL_Armv8.
    ldaxp(EmitSize, TMP2, TMP3, MemSrc);
    cmp(EmitSize, TMP2, Expected0);
    ccmp(EmitSize, TMP3, Expected1, ARMEmitter::StatusFlags::None, ARMEmitter::Condition::CC_EQ);
    (void)b(ARMEmitter::Condition::CC_NE, &LoopNotExpected);
    stlxp(EmitSize, TMP2, Desired0, Desired1, MemSrc);
    (void)cbnz(EmitSize, TMP2, &LoopTop);
    mov(EmitSize, Dst0, Expected0);
    mov(EmitSize, Dst1, Expected1);

    (void)b(&LoopExpected);

    (void)Bind(&LoopNotExpected);
    mov(EmitSize, Dst0, TMP2.R());
    mov(EmitSize, Dst1, TMP3.R());
    // exclusive monitor needs to be cleared here
    // Might have hit the case where ldaxr was hit but stlxr wasn't
    clrex();
    (void)Bind(&LoopExpected);

    // Restore
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  }
}

DEF_OP(CAS) {
  auto Op = IROp->C<IR::IROp_CAS>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);
  // DataSrc = *Src1
  // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc
  // This will write to memory! Careful!

  auto Expected = GetReg(Op->Expected);
  auto Desired = GetReg(Op->Desired);
  auto MemSrc = GetReg(Op->Addr);
  auto Dst = GetReg(Node);

  if (CTX->HostFeatures.SupportsAtomics) {
    if (Expected == Dst && Dst != MemSrc && Dst != Desired) {
      casal(SubEmitSize, Dst, Desired, MemSrc);
    } else {
      mov(EmitSize, TMP2, Expected);
      casal(SubEmitSize, TMP2, Desired, MemSrc);
      mov(EmitSize, Dst, TMP2.R());
    }
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    ARMEmitter::ForwardLabel LoopNotExpected;
    ARMEmitter::ForwardLabel LoopExpected;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    if (IROp->Size == IR::OpSize::i8Bit) {
      cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTB, 0);
    } else if (IROp->Size == IR::OpSize::i16Bit) {
      cmp(EmitSize, TMP2, Expected, ARMEmitter::ExtendedType::UXTH, 0);
    } else {
      cmp(EmitSize, TMP2, Expected);
    }
    (void)b(ARMEmitter::Condition::CC_NE, &LoopNotExpected);
    stlxr(SubEmitSize, TMP3, Desired, MemSrc);
    (void)cbnz(EmitSize, TMP3, &LoopTop);
    mov(EmitSize, Dst, Expected);
    (void)b(&LoopExpected);

    (void)Bind(&LoopNotExpected);
    mov(EmitSize, Dst, TMP2.R());
    // exclusive monitor needs to be cleared here
    // Might have hit the case where ldaxr was hit but stlxr wasn't
    clrex();
    (void)Bind(&LoopExpected);
  }
}

DEF_OP(AtomicSwap) {
  auto Op = IROp->C<IR::IROp_AtomicSwap>();
  const auto OpSize = IROp->Size;
  LOGMAN_THROW_A_FMT(
    OpSize == IR::OpSize::i64Bit || OpSize == IR::OpSize::i32Bit || OpSize == IR::OpSize::i16Bit || OpSize == IR::OpSize::i8Bit, "Unexpecte"
                                                                                                                                 "d CAS "
                                                                                                                                 "size");

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = OpSize == IR::OpSize::i64Bit ? ARMEmitter::SubRegSize::i64Bit :
                           OpSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
                           OpSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
                                                          ARMEmitter::SubRegSize::i8Bit;

  if (CTX->HostFeatures.SupportsAtomics) {
    ldswpal(SubEmitSize, Src, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    stlxr(SubEmitSize, TMP4, Src, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    ubfm(EmitSize, GetReg(Node), TMP2, 0, IR::OpSizeAsBits(OpSize) - 1);
  }
}

DEF_OP(AtomicFetchAdd) {
  auto Op = IROp->C<IR::IROp_AtomicFetchAdd>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    ldaddal(SubEmitSize, Src, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    add(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchSub) {
  auto Op = IROp->C<IR::IROp_AtomicFetchSub>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    neg(EmitSize, TMP2, Src);
    ldaddal(SubEmitSize, TMP2, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    sub(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchAnd) {
  auto Op = IROp->C<IR::IROp_AtomicFetchAnd>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    mvn(EmitSize, TMP2, Src);
    ldclral(SubEmitSize, TMP2, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    and_(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchCLR) {
  auto Op = IROp->C<IR::IROp_AtomicFetchCLR>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    ldclral(SubEmitSize, Src, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    bic(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchOr) {
  auto Op = IROp->C<IR::IROp_AtomicFetchOr>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    ldsetal(SubEmitSize, Src, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    orr(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchXor) {
  auto Op = IROp->C<IR::IROp_AtomicFetchXor>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);
  auto Src = GetReg(Op->Value);

  if (CTX->HostFeatures.SupportsAtomics) {
    ldeoral(SubEmitSize, Src, GetReg(Node), MemSrc);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    eor(EmitSize, TMP3, TMP2, Src);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(AtomicFetchNeg) {
  auto Op = IROp->C<IR::IROp_AtomicFetchNeg>();
  const auto EmitSize = ConvertSize(IROp);
  const auto SubEmitSize = ConvertSubRegSize8(IROp->Size);

  auto MemSrc = GetReg(Op->Addr);

  if (CTX->HostFeatures.SupportsAtomics) {
    // Use a CAS loop to avoid needing to emulate unaligned LLSC atomics
    ldr(SubEmitSize, TMP2, MemSrc);
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    mov(EmitSize, TMP4, TMP2);
    neg(EmitSize, TMP3, TMP2);
    casal(SubEmitSize, TMP2, TMP3, MemSrc);
    sub(EmitSize, TMP3, TMP2, TMP4);
    (void)cbnz(EmitSize, TMP3, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(SubEmitSize, TMP2, MemSrc);
    neg(EmitSize, TMP3, TMP2);
    stlxr(SubEmitSize, TMP4, TMP3, MemSrc);
    (void)cbnz(EmitSize, TMP4, &LoopTop);
    mov(EmitSize, GetReg(Node), TMP2.R());
  }
}

DEF_OP(TelemetrySetValue) {
#ifndef FEX_DISABLE_TELEMETRY
  auto Op = IROp->C<IR::IROp_TelemetrySetValue>();
  auto Src = GetReg(Op->Value);

  ldr(TMP2, STATE_PTR_IDX(CpuStateFrame, Pointers.TelemetryValueAddresses, Op->TelemetryValueIndex));

  // Cortex fuses cmp+cset.
  cmp(ARMEmitter::Size::i32Bit, Src, 0);
  cset(ARMEmitter::Size::i32Bit, TMP1, ARMEmitter::Condition::CC_NE);

  if (CTX->HostFeatures.SupportsAtomics) {
    stsetl(ARMEmitter::SubRegSize::i64Bit, TMP1, TMP2);
  } else {
    ARMEmitter::BackwardLabel LoopTop;
    (void)Bind(&LoopTop);
    ldaxr(ARMEmitter::SubRegSize::i64Bit, TMP3, TMP2);
    orr(ARMEmitter::Size::i32Bit, TMP3, TMP3, Src);
    stlxr(ARMEmitter::SubRegSize::i64Bit, TMP3, TMP3, TMP2);
    (void)cbnz(ARMEmitter::Size::i32Bit, TMP3, &LoopTop);
  }
#endif
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/BranchOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Context/Context.h"
#include "FEXCore/IR/IR.h"
#include "Interface/Core/LookupCache.h"

#include "Interface/Core/JIT/JITClass.h"

#include <FEXCore/Core/Thunks.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/MathUtils.h>

namespace FEXCore::CPU {

DEF_OP(CallbackReturn) {
  // spill back to CTX
  SpillStaticRegs(TMP1);

  // First we must reset the stack
  ResetStack();

  // We can now lower the ref counter again

  ldr(ARMEmitter::WReg::w2, STATE, offsetof(FEXCore::Core::CpuStateFrame, SignalHandlerRefCounter));
  sub(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 1);
  str(ARMEmitter::WReg::w2, STATE, offsetof(FEXCore::Core::CpuStateFrame, SignalHandlerRefCounter));

  // We need to adjust an additional 8 bytes to get back to the original "misaligned" RSP state
  ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, State.gregs[X86State::REG_RSP]));
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 8);
  str(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, State.gregs[X86State::REG_RSP]));

  PopCalleeSavedRegisters();

  // Return to the thunk
  ret();
}

DEF_OP(ExitFunction) {
  auto Op = IROp->C<IR::IROp_ExitFunction>();

  ResetStack();

  if (CTX->HostFeatures.IsInstCountCI) [[unlikely]] {
    // Emit function end marker
    udf(0x420F);
  }

  uint64_t NewRIP;

  if (IsInlineConstant(Op->NewRIP, &NewRIP) || IsInlineEntrypointOffset(Op->NewRIP, &NewRIP)) {
#ifdef ARCHITECTURE_arm64ec
    if (NewRIP < EC_CODE_BITMAP_MAX_ADDRESS && RtlIsEcCode(NewRIP)) {
      str(REG_CALLRET_SP, STATE_PTR(CpuStateFrame, State.callret_sp));
      add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, StaticRegisters[X86State::REG_RSP], 0);
      InsertGuestRIPMove(EC_CALL_CHECKER_PC_REG, NewRIP);
      ldr(TMP2, STATE_PTR(CpuStateFrame, Pointers.ExitFunctionEC));
      br(TMP2);
    } else {
#endif
      // In order to support direct branches without constantly hitting the L1 cache, we emit a call to a block linker,
      // this will compile the branch target block when it is hit and replace the branch to the linker at the callsite
      // with a direct branch to the destination block. Upon invalidation of the target block the backpatch is undone.
      //
      // In addition, to avoid needing to lookup in the cache for returns and any indirect branch prediction penalty,
      // a shadow stack of <GuestReturnRIP, HostReturnPC> pairs is maintained, acting as a first level cache for any
      // return operations. As the guest may not balance calls and returns exactly, an exception handler is expected to
      // be installed by the frontend, to reset the shadow stack to the middle of its valid bounds on overflow/underflow.
      // This shadow stack is also cleared on block invalidation operations or codebuffer switches, to ensure all pointed-to
      // host code is always valid.

      // This code will be backpatched by Arm64JITCore_ExitFunctionLink, below is an enumeration of all the possible cases.
      // Jump thunks are emitted in JIT.cpp after compilation of the entire multiblock.
      //
      // Call with known return block - unlinked
      //    00: adr TMP1, 0xC
      //    04: stp RetReg, TMP1, [SpReg, -0x10]!
      //    08: bl JmpThunk00
      //    JmpThunk00:
      //    00: b 0x8
      //    04: br TMP1
      //    08: ldr TMP1, <Shared exit linker>
      //    0c: blr TMP1
      //    10: HostCode
      //    18: GuestRIP
      //    20: CallerOffset
      //
      // Call with known return block after backpatching - linked in branch immediate range
      //    00: adr TMP1, 0xC
      //    04: stp RetReg, TMP1, [SpReg, -0x10]!
      //    08: bl HostCode                                        - MODIFIED
      //
      // Call with known return block after backpatching - linked out of range
      //    00: adr TMP1, 0xC
      //    04: stp RetReg, TMP1, [SpReg, -0x10]!
      //    08: bl JmpThunk00
      //    JmpThunk00:
      //    00: ldr TMP1, 0x10                                     - MODIFIED 2nd
      //    04: br TMP1
      //    08: ldr TMP1, <Shared exit linker>
      //    0c: blr TMP1
      //    10: HostCode                                           - MODIFIED 1st
      //    18: GuestRIP
      //    20: CallerOffset
      //
      // Jump - unlinked
      //    00: b JmpThunk00
      //    JmpThunk00:
      //    00: b 0x8
      //    04: br TMP1
      //    08: ldr TMP1, <Shared exit linker>
      //    0c: blr TMP1
      //    10: HostCode
      //    18: GuestRIP
      //    20: CallerOffset
      //
      // Jump after backpatching - linked in branch immediate range
      //    00: b HostCode                                         - MODIFIED
      //
      // Jump after backpatching - linked out of range
      //    00: b JmpThunk00
      //    JmpThunk00:
      //    00: ldr TMP1, 0x10                                     - MODIFIED 2nd
      //    04: br TMP1
      //    08: ldr TMP1, <Shared exit linker>
      //    0c: blr TMP1
      //    10: HostCode                                           - MODIFIED 1st
      //    18: GuestRIP
      //    20: CallerOffset

      ARMEmitter::ForwardLabel l_BranchHost;
      ARMEmitter::ForwardLabel l_CallReturn;
      if (Op->Hint == IR::BranchHint::Call) {
        if (!Op->CallReturnBlock.IsInvalid()) {
          auto CallReturnAddressReg = GetReg(Op->CallReturnAddress).X();
          PendingCallReturnTargetLabel = &CallReturnTargets.try_emplace(Op->CallReturnBlock.ID()).first->second;
          (void)adr(TMP1, &l_CallReturn);
          stp<ARMEmitter::IndexType::PRE>(CallReturnAddressReg, TMP1, REG_CALLRET_SP, -0x10);
        } else {
          stp<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::zr, ARMEmitter::XReg::zr, REG_CALLRET_SP, -0x10);
        }
      } else if (Op->Hint == IR::BranchHint::CheckTF) {
        ARMEmitter::ForwardLabel TFUnset;
        ldrb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));
        (void)cbz(ARMEmitter::Size::i32Bit, TMP1, &TFUnset);
        InsertGuestRIPMove(TMP1, NewRIP);
        str(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, State.rip));
        ldr(TMP2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.DispatcherLoopTop));
        blr(TMP2);
        (void)Bind(&TFUnset);
      }

      EmitLinkedBranch(NewRIP, Op->Hint == IR::BranchHint::Call);
      (void)Bind(&l_CallReturn);
#ifdef ARCHITECTURE_arm64ec
    }
#endif
  } else {
    ARMEmitter::ForwardLabel SkipFullLookup;
    auto RipReg = GetReg(Op->NewRIP);

    if (Op->Hint == IR::BranchHint::Return) {
      // First try to pop from the call-ret stack, otherwise follow the normal path (but ending in a ret)
      ldp<ARMEmitter::IndexType::POST>(TMP1, TMP2, REG_CALLRET_SP, 0x10);
      sub(TMP1, TMP1, RipReg.X());
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &SkipFullLookup);
    }

    // L1 Cache
    ldp<ARMEmitter::IndexType::OFFSET>(TMP1, TMP2, STATE, offsetof(FEXCore::Core::CpuStateFrame, State.L1Pointer));

    // Calculate (tmp1 + ((ripreg & L1_ENTRIES_MASK) << 4)) for the address
    // L1Mask is pre-shifted.
    and_(ARMEmitter::Size::i64Bit, TMP2, TMP2, RipReg, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(sizeof(LookupCache::LookupCacheEntry)));
    add(TMP1, TMP1, TMP2);

    ldp<ARMEmitter::IndexType::OFFSET>(TMP2, TMP1, TMP1, 0);

    // Note: sub+cbnz used over cmp+br to preserve flags.
    sub(TMP1, TMP1, RipReg.X());
    (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &SkipFullLookup);
    ldr(TMP2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.DispatcherLoopTop));
    str(RipReg.X(), STATE, offsetof(FEXCore::Core::CpuStateFrame, State.rip));

    (void)Bind(&SkipFullLookup);
    if (Op->Hint == IR::BranchHint::Call) {
      ARMEmitter::ForwardLabel l_CallReturn;
      if (!Op->CallReturnBlock.IsInvalid()) {
        auto CallReturnAddressReg = GetReg(Op->CallReturnAddress).X();
        PendingCallReturnTargetLabel = &CallReturnTargets.try_emplace(Op->CallReturnBlock.ID()).first->second;
        (void)adr(TMP1, &l_CallReturn);
        stp<ARMEmitter::IndexType::PRE>(CallReturnAddressReg, TMP1, REG_CALLRET_SP, -0x10);
      } else {
        stp<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::zr, ARMEmitter::XReg::zr, REG_CALLRET_SP, -0x10);
      }
      blr(TMP2);
      (void)Bind(&l_CallReturn);
    } else if (Op->Hint == IR::BranchHint::Return) {
      ret(TMP2);
    } else {
      br(TMP2);
    }
  }
}

DEF_OP(Jump) {
  const auto Op = IROp->C<IR::IROp_Jump>();

  PendingTargetLabel = JumpTarget(Op->TargetBlock);
}

DEF_OP(CondJump) {
  auto Op = IROp->C<IR::IROp_CondJump>();

  auto TrueTargetLabel = JumpTarget(Op->TrueBlock);

  if (Op->FromNZCV) {
    b_OrRestart(MapCC(Op->Cond), TrueTargetLabel);
  } else {
    uint64_t Const;
    const bool isConst = IsInlineConstant(Op->Cmp2, &Const);

    auto Reg = GetReg(Op->Cmp1);
    const auto Size = Op->CompareSize == IR::OpSize::i32Bit ? ARMEmitter::Size::i32Bit : ARMEmitter::Size::i64Bit;

    LOGMAN_THROW_A_FMT(IsGPR(Op->Cmp1), "CondJump: Expected GPR");
    LOGMAN_THROW_A_FMT(isConst, "CondJump: Expected constant source");

    if (Op->Cond == IR::CondClass::EQ) {
      LOGMAN_THROW_A_FMT(Const == 0, "CondJump: Expected 0 source");
      cbz_OrRestart(Size, Reg, TrueTargetLabel);
    } else if (Op->Cond == IR::CondClass::NEQ) {
      LOGMAN_THROW_A_FMT(Const == 0, "CondJump: Expected 0 source");
      cbnz_OrRestart(Size, Reg, TrueTargetLabel);
    } else if (Op->Cond == IR::CondClass::TSTZ) {
      LOGMAN_THROW_A_FMT(Const < 64, "CondJump: Expected valid bit source");
      tbz_OrRestart(Reg, Const, TrueTargetLabel);
    } else if (Op->Cond == IR::CondClass::TSTNZ) {
      LOGMAN_THROW_A_FMT(Const < 64, "CondJump: Expected valid bit source");
      tbnz_OrRestart(Reg, Const, TrueTargetLabel);
    } else {
      LOGMAN_THROW_A_FMT(false, "CondJump expected simple condition");
    }
  }

  PendingTargetLabel = JumpTarget(Op->FalseBlock);
}

DEF_OP(Syscall) {
  auto Op = IROp->C<IR::IROp_Syscall>();
  // Arguments are passed as follows:
  // X0: SyscallHandler
  // X1: ThreadState
  // X2: Pointer to SyscallArguments

  PushDynamicRegs(TMP1);

  uint32_t GPRSpillMask = ~0U;
  uint32_t FPRSpillMask = ~0U;

  SpillStaticRegs(TMP1, {
                          .GPRSpillMask = GPRSpillMask,
                          .FPRSpillMask = FPRSpillMask,
                        });

  // Now that we are spilled, store in the state that we are in a syscall
  // Still without overwriting registers that matter
  // 16bit LoadConstant to be a single instruction
  // This gives the signal handler a value to check to see if we are in a syscall at all
  LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GPRSpillMask & 0xFFFF);
  str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo));

  uint64_t SPOffset = AlignUp(FEXCore::HLE::SyscallArguments::MAX_ARGS * 8, 16);
  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, SPOffset);
  for (uint32_t i = 0; i < FEXCore::HLE::SyscallArguments::MAX_ARGS; ++i) {
    if (Op->Header.Args[i].IsInvalid()) {
      continue;
    }
    str(GetReg(Op->Header.Args[i]).X(), ARMEmitter::Reg::rsp, i * 8);
  }

  ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.SyscallHandlerObj));
  ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.SyscallHandlerFunc));
  mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, STATE.R());

  // SP supporting move
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::rsp, 0);
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<uint64_t, void*, void*, void*>(ARMEmitter::Reg::r3);
  } else {
    blr(ARMEmitter::Reg::r3);
  }

  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, SPOffset);

  // Result is now in x0
  // Fix the stack and any values that were stepped on
  FillStaticRegs({
    .OptionalReg = ARMEmitter::Reg::r1,
    .OptionalReg2 = ARMEmitter::Reg::r2,
    .GPRFillMask = GPRSpillMask,
    .FPRFillMask = FPRSpillMask,
  });

  // Now the registers we've spilled are back in their original host registers
  // We can safely claim we are no longer in a syscall
  str(ARMEmitter::XReg::zr, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo));

  PopDynamicRegs();

  const auto OSABI = CTX->SyscallHandler->GetOSABI();

  if (OSABI != FEXCore::HLE::SyscallOSABI::OS_GENERIC) {
    // Move result to its destination register.
    // Only if `NORETURNEDRESULT` wasn't set, otherwise we might overwrite the CPUState refilled with `FillStaticRegs`
    mov(ARMEmitter::Size::i64Bit, GetReg(Node), ARMEmitter::Reg::r0);
  }
}

DEF_OP(Thunk) {
  auto Op = IROp->C<IR::IROp_Thunk>();
  // Arguments are passed as follows:
  // X0: CTX
  // X1: Args (from guest stack)

  // spill to ctx before ra64 spill
  SpillStaticRegs(TMP1, {
                          .NZCV = false,
                        });

  PushDynamicRegs(TMP1);

  mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GetReg(Op->ArgPtr));

  InsertNamedThunkRelocation(ARMEmitter::Reg::r2, Op->ThunkNameHash);
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<void, void*, void*>(ARMEmitter::Reg::r2);
  } else {
    blr(ARMEmitter::Reg::r2);
  }

  PopDynamicRegs();

  // load from ctx after ra64 refill
  FillStaticRegs({
    .NZCV = false,
  });
}

DEF_OP(ValidateCode) {
  auto Op = IROp->C<IR::IROp_ValidateCode>();
  auto OldCode = Op->CodeOriginal.data();
  auto Base = GetReg(Op->Header.Args[0]).X();
  int len = Op->CodeLength;
  int Offset = 0;
  ARMEmitter::ForwardLabel Fail;

  const auto Dst = GetReg(Node);

  auto EmitCheck = [&](size_t Size, auto&& LoadData) {
    while (len >= Size) {
      LoadData();
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, TMP2);
      cbnz_OrRestart(ARMEmitter::Size::i64Bit, TMP1, &Fail);
      len -= Size;
      Offset += Size;
    }
  };

  EmitCheck(8, [&]() {
    ldr(TMP1, Base, Offset);
    LoadConstant(ARMEmitter::Size::i64Bit, TMP2, *(const uint64_t*)(OldCode + Offset));
  });

  EmitCheck(4, [&]() {
    ldr(TMP1.W(), Base, Offset);
    LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint32_t*)(OldCode + Offset));
  });

  EmitCheck(2, [&]() {
    ldrh(TMP1.W(), Base, Offset);
    LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint16_t*)(OldCode + Offset));
  });

  EmitCheck(1, [&]() {
    ldrb(TMP1.W(), Base, Offset);
    LoadConstant(ARMEmitter::Size::i32Bit, TMP2, *(const uint8_t*)(OldCode + Offset));
  });

  ARMEmitter::ForwardLabel End;
  LoadConstant(ARMEmitter::Size::i32Bit, Dst, 0);
  b_OrRestart(&End);
  BindOrRestart(&Fail);
  LoadConstant(ARMEmitter::Size::i32Bit, Dst, 1);
  BindOrRestart(&End);
}

DEF_OP(ThreadRemoveCodeEntry) {
  PushDynamicRegs(TMP4);
  SpillStaticRegs(TMP4);

  // Arguments are passed as follows:
  // X0: Thread
  // X1: RIP
  mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, STATE.R());

  // TODO: Relocations don't seem to be wired up to this...?
  LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Entry, CPU::Arm64Emitter::PadType::AUTOPAD);

  ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.ThreadRemoveCodeEntryFromJIT));
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<void, void*, void*>(ARMEmitter::Reg::r2);
  } else {
    blr(ARMEmitter::Reg::r2);
  }
  FillStaticRegs();

  // Fix the stack and any values that were stepped on
  PopDynamicRegs();
}

DEF_OP(CPUID) {
  auto Op = IROp->C<IR::IROp_CPUID>();

  isb();
  mov(ARMEmitter::Size::i64Bit, TMP2, GetReg(Op->Function));
  mov(ARMEmitter::Size::i64Bit, TMP3, GetReg(Op->Leaf));

  PushDynamicRegs(TMP4);
  SpillStaticRegs(TMP4);

  // x0 = CPUID Handler
  // x1 = CPUID Function
  // x2 = CPUID Leaf
  ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.CPUIDObj));
  ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.CPUIDFunction));

  if (!TMP_ABIARGS) {
    mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, TMP2);
    mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, TMP3);
  }

  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<__uint128_t, void*, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
  } else {
    blr(ARMEmitter::Reg::r3);
  }

  if (!TMP_ABIARGS) {
    mov(ARMEmitter::Size::i64Bit, TMP1, ARMEmitter::Reg::r0);
    mov(ARMEmitter::Size::i64Bit, TMP2, ARMEmitter::Reg::r1);
  }

  FillStaticRegs();

  PopDynamicRegs();

  // Results are in x0, x1
  // Results want to be 4xi32 scalars
  mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutEAX), TMP1);
  mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutECX), TMP2);
  ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEBX), TMP1, 32, 32);
  ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEDX), TMP2, 32, 32);
}

DEF_OP(XGetBV) {
  auto Op = IROp->C<IR::IROp_XGetBV>();

  PushDynamicRegs(TMP4);
  SpillStaticRegs(TMP4);

  mov(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r1, GetReg(Op->Function));

  // x0 = CPUID Handler
  // x1 = XCR Function
  ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.CPUIDObj));
  ldr(ARMEmitter::XReg::x2, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.XCRFunction));
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<uint64_t, void*, uint32_t>(ARMEmitter::Reg::r2);
  } else {
    blr(ARMEmitter::Reg::r2);
  }

  if (!TMP_ABIARGS) {
    mov(ARMEmitter::Size::i64Bit, TMP1, ARMEmitter::Reg::r0);
  }

  FillStaticRegs();

  PopDynamicRegs();

  // Results are in x0, need to split into i32 parts
  mov(ARMEmitter::Size::i32Bit, GetReg(Op->OutEAX), TMP1);
  ubfx(ARMEmitter::Size::i64Bit, GetReg(Op->OutEDX), TMP1, 32, 32);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/ConversionOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Core/JIT/JITClass.h"
#include "Interface/Context/Context.h"

namespace FEXCore::CPU {
DEF_OP(VInsGPR) {
  const auto Op = IROp->C<IR::IROp_VInsGPR>();
  const auto OpSize = IROp->Size;

  const auto DestIdx = Op->DestIdx;
  const auto ElementSize = Op->Header.ElementSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize8(IROp);
  const auto ElementsPer128Bit = IR::NumElements(IR::OpSize::i128Bit, ElementSize);

  const auto Dst = GetVReg(Node);
  const auto DestVector = GetVReg(Op->DestVector);
  const auto Src = GetReg(Op->Src);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);
    const auto Offset = ElementSizeBits * DestIdx;

    const auto SSEBitSize = Core::CPUState::XMM_SSE_REG_SIZE * 8;
    const auto InUpperLane = Offset >= SSEBitSize;

    // This is going to be a little gross. Pls forgive me.
    // Since SVE has the whole vector length agnostic programming
    // thing going on, we can't exactly freely insert entries into
    // arbitrary locations in the vector.
    //
    // SVE *does* have INSR, however this only shifts the entire
    // vector to the left by an element size and inserts a value
    // at the beginning of the vector. Not *quite* what we need.
    // (though INSR *is* very useful for other things).
    //
    // The idea is (in the case of the upper lane), move the upper
    // lane down, insert into it and recombine with the lower lane.
    //
    // In the case of the lower lane, insert and then recombine with
    // the upper lane.

    if (InUpperLane) {
      // Move the upper lane down for the insertion.
      const auto CompactPred = ARMEmitter::PReg::p0;
      not_(CompactPred, PRED_TMP_32B.Zeroing(), PRED_TMP_16B);
      compact(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), CompactPred, DestVector.Z());
    }

    // Put data in place for destructive SPLICE below.
    mov(Dst.Z(), DestVector.Z());

    // Inserts the GPR value into the given V register.
    // Also automatically adjusts the index in the case of using the
    // moved upper lane.
    const auto Insert = [&](const ARMEmitter::VRegister& reg, int index) {
      if (InUpperLane) {
        index -= ElementsPer128Bit;
      }
      ins(SubEmitSize, reg, index, Src);
    };

    if (InUpperLane) {
      Insert(VTMP1, DestIdx);
      splice<ARMEmitter::OpType::Destructive>(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), PRED_TMP_16B, Dst.Z(), VTMP1.Z());
    } else {
      Insert(Dst, DestIdx);
      splice<ARMEmitter::OpType::Destructive>(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), PRED_TMP_16B, Dst.Z(), DestVector.Z());
    }
  } else {
    // No need to move if Dst and DestVector alias one another.
    if (Dst != DestVector) {
      mov(Dst.Q(), DestVector.Q());
    }
    ins(SubEmitSize, Dst, DestIdx, Src);
  }
}

DEF_OP(VCastFromGPR) {
  auto Op = IROp->C<IR::IROp_VCastFromGPR>();
  auto Dst = GetVReg(Node);
  auto Src = GetReg(Op->Src);

  switch (Op->Header.ElementSize) {
  case IR::OpSize::i8Bit:
    uxtb(ARMEmitter::Size::i32Bit, TMP1, Src);
    fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1);
    break;
  case IR::OpSize::i16Bit:
    uxth(ARMEmitter::Size::i32Bit, TMP1, Src);
    fmov(ARMEmitter::Size::i32Bit, Dst.S(), TMP1);
    break;
  case IR::OpSize::i32Bit: fmov(ARMEmitter::Size::i32Bit, Dst.S(), Src); break;
  case IR::OpSize::i64Bit: fmov(ARMEmitter::Size::i64Bit, Dst.D(), Src); break;
  default: LOGMAN_MSG_A_FMT("Unknown castGPR element size: {}", Op->Header.ElementSize);
  }
}

DEF_OP(VLoadTwoGPRs) {
  const auto Op = IROp->C<IR::IROp_VLoadTwoGPRs>();

  const auto Dst = GetVReg(Node);
  const auto SrcLower = GetReg(Op->Lower);
  const auto SrcUpper = GetReg(Op->Upper);
  fmov(ARMEmitter::Size::i64Bit, Dst.D(), SrcLower);
  fmov(ARMEmitter::Size::i64Bit, Dst.D(), SrcUpper, true);
}

DEF_OP(VDupFromGPR) {
  const auto Op = IROp->C<IR::IROp_VDupFromGPR>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Src = GetReg(Op->Src);

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize8(IROp);

  if (HostSupportsSVE256 && Is256Bit) {
    dup(SubEmitSize, Dst.Z(), Src);
  } else {
    dup(SubEmitSize, Dst.Q(), Src);
  }
}

DEF_OP(Float_FromGPR_S) {
  const auto Op = IROp->C<IR::IROp_Float_FromGPR_S>();

  const uint16_t ElementSize = IR::OpSizeToSize(Op->Header.ElementSize);
  const uint16_t Conv = (ElementSize << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto Dst = GetVReg(Node);
  auto Src = GetReg(Op->Src);

  switch (Conv) {
  case 0x0204: { // Half <- int32_t
    scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src);
    break;
  }
  case 0x0208: { // Half <- int64_t
    scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src);
    break;
  }
  case 0x0404: { // Float <- int32_t
    scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src);
    break;
  }
  case 0x0408: { // Float <- int64_t
    scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src);
    break;
  }
  case 0x0804: { // Double <- int32_t
    scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src);
    break;
  }
  case 0x0808: { // Double <- int64_t
    scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src);
    break;
  }
  default:
    LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}, ElementSize={}, SrcElementSize={}", Conv, ElementSize, Op->SrcElementSize);
    break;
  }
}

DEF_OP(Float_FToF) {
  auto Op = IROp->C<IR::IROp_Float_FToF>();
  const uint16_t Conv = (IR::OpSizeToSize(Op->Header.ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto Dst = GetVReg(Node);
  auto Src = GetVReg(Op->Scalar);

  switch (Conv) {
  case 0x0204: { // Half <- Float
    fcvt(Dst.H(), Src.S());
    break;
  }
  case 0x0208: { // Half <- Double
    fcvt(Dst.H(), Src.D());
    break;
  }
  case 0x0402: { // Float <- Half
    fcvt(Dst.S(), Src.H());
    break;
  }
  case 0x0802: { // Double <- Half
    fcvt(Dst.D(), Src.H());
    break;
  }
  case 0x0804: { // Double <- Float
    fcvt(Dst.D(), Src.S());
    break;
  }
  case 0x0408: { // Float <- Double
    fcvt(Dst.S(), Src.D());
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
  }
}

DEF_OP(Vector_SToF) {
  const auto Op = IROp->C<IR::IROp_Vector_SToF>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B;
    scvtf(Dst.Z(), SubEmitSize, Mask.Merging(), Vector.Z(), SubEmitSize);
  } else {
    if (OpSize == ElementSize) {
      if (ElementSize == IR::OpSize::i64Bit) {
        scvtf(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D());
      } else if (ElementSize == IR::OpSize::i32Bit) {
        scvtf(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S());
      } else {
        scvtf(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H());
      }
    } else {
      if (OpSize == IR::OpSize::i64Bit) {
        scvtf(SubEmitSize, Dst.D(), Vector.D());
      } else {
        scvtf(SubEmitSize, Dst.Q(), Vector.Q());
      }
    }
  }
}

DEF_OP(Vector_FToZS) {
  const auto Op = IROp->C<IR::IROp_Vector_FToZS>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B;
    fcvtzs(Dst.Z(), SubEmitSize, Mask.Merging(), Vector.Z(), SubEmitSize);
  } else {
    if (OpSize == ElementSize) {
      if (ElementSize == IR::OpSize::i64Bit) {
        fcvtzs(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Vector.D());
      } else if (ElementSize == IR::OpSize::i32Bit) {
        fcvtzs(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Vector.S());
      } else {
        fcvtzs(ARMEmitter::ScalarRegSize::i16Bit, Dst.H(), Vector.H());
      }
    } else {
      if (OpSize == IR::OpSize::i64Bit) {
        fcvtzs(SubEmitSize, Dst.D(), Vector.D());
      } else {
        fcvtzs(SubEmitSize, Dst.Q(), Vector.Q());
      }
    }
  }
}

DEF_OP(Vector_FToS) {
  const auto Op = IROp->C<IR::IROp_Vector_FToS>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubEmitSize = ConvertSubRegSize248(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B;
    frinti(SubEmitSize, Dst.Z(), Mask.Merging(), Vector.Z());
    fcvtzs(Dst.Z(), SubEmitSize, Mask.Merging(), Dst.Z(), SubEmitSize);
  } else {
    const auto Dst = GetVReg(Node);
    const auto Vector = GetVReg(Op->Vector);
    if (OpSize == IR::OpSize::i64Bit) {
      frinti(SubEmitSize, Dst.D(), Vector.D());
      fcvtzs(SubEmitSize, Dst.D(), Dst.D());
    } else {
      frinti(SubEmitSize, Dst.Q(), Vector.Q());
      fcvtzs(SubEmitSize, Dst.Q(), Dst.Q());
    }
  }
}

DEF_OP(Vector_FToF) {
  const auto Op = IROp->C<IR::IROp_Vector_FToF>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Conv = (IR::OpSizeToSize(ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    // Curiously, FCVTLT and FCVTNT have no bottom variants,
    // and also interesting is that FCVTLT will iterate the
    // source vector by accessing each odd element and storing
    // them consecutively in the destination.
    //
    // FCVTNT is somewhat like the opposite. It will read each
    // consecutive element, but store each result into every odd
    // element in the destination vector.
    //
    // We need to undo the behavior of FCVTNT with UZP2. In the case
    // of FCVTLT, we instead need to set the vector up with ZIP1, so
    // that the elements will be processed correctly.

    const auto Mask = PRED_TMP_32B.Merging();

    switch (Conv) {
    case 0x0402: { // Float <- Half
      zip1(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Vector.Z(), Vector.Z());
      fcvtlt(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Dst.Z());
      break;
    }
    case 0x0804: { // Double <- Float
      zip1(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Vector.Z(), Vector.Z());
      fcvtlt(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Dst.Z());
      break;
    }
    case 0x0204: { // Half <- Float
      fcvtnt(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Mask, Vector.Z());
      uzp2(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), Dst.Z(), Dst.Z());
      break;
    }
    case 0x0408: { // Float <- Double
      fcvtnt(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z());
      uzp2(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); break;
    }
  } else {
    switch (Conv) {
    case 0x0402:   // Float <- Half
    case 0x0804: { // Double <- Float
      fcvtl(SubEmitSize, Dst.D(), Vector.D());
      break;
    }
    case 0x0204:   // Half <- Float
    case 0x0408: { // Float <- Double
      fcvtn(SubEmitSize, Dst.D(), Vector.D());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unknown Vector_FToF Type : 0x{:04x}", Conv); break;
    }
  }
}

DEF_OP(VFCVTL2) {
  const auto Op = IROp->C<IR::IROp_VFCVTL2>();

  const auto SubEmitSize = ConvertSubRegSize248(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  fcvtl2(SubEmitSize, Dst.D(), Vector.D());
}

DEF_OP(VFCVTN2) {
  const auto Op = IROp->C<IR::IROp_VFCVTN2>();

  const auto SubEmitSize = ConvertSubRegSize248(IROp);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  auto Lower = VectorLower;
  if (Dst != VectorLower) {
    mov(VTMP1.Q(), VectorLower.Q());
    Lower = VTMP1;
  }

  fcvtn2(SubEmitSize, Lower.Q(), VectorUpper.Q());

  if (Dst != VectorLower) {
    mov(Dst.Q(), Lower.Q());
  }
}

DEF_OP(Vector_FToI) {
  const auto Op = IROp->C<IR::IROp_Vector_FToI>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    switch (Op->Round) {
    case IR::RoundMode::Nearest: frintn(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::NegInfinity: frintm(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::PosInfinity: frintp(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::TowardsZero: frintz(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::Host: frinti(SubEmitSize, Dst.Z(), Mask, Vector.Z()); break;
    }
  } else {
    const auto IsScalar = ElementSize == OpSize;

    if (IsScalar) {
// Since we have multiple overloads of the same name (e.g.
// frinti having AdvSIMD, AdvSIMD scalar, and an SVE version),
// we can't just use a lambda without some seriously ugly casting.
// This is fairly self-contained otherwise.
#define ROUNDING_FN(name)                         \
  if (ElementSize == IR::OpSize::i16Bit) {        \
    name(Dst.H(), Vector.H());                    \
  } else if (ElementSize == IR::OpSize::i32Bit) { \
    name(Dst.S(), Vector.S());                    \
  } else if (ElementSize == IR::OpSize::i64Bit) { \
    name(Dst.D(), Vector.D());                    \
  } else {                                        \
    FEX_UNREACHABLE;                              \
  }

      switch (Op->Round) {
      case IR::RoundMode::Nearest: ROUNDING_FN(frintn); break;
      case IR::RoundMode::NegInfinity: ROUNDING_FN(frintm); break;
      case IR::RoundMode::PosInfinity: ROUNDING_FN(frintp); break;
      case IR::RoundMode::TowardsZero: ROUNDING_FN(frintz); break;
      case IR::RoundMode::Host: ROUNDING_FN(frinti); break;
      }

#undef ROUNDING_FN
    } else {
      switch (Op->Round) {
      case IR::RoundMode::Nearest: frintn(SubEmitSize, Dst.Q(), Vector.Q()); break;
      case IR::RoundMode::NegInfinity: frintm(SubEmitSize, Dst.Q(), Vector.Q()); break;
      case IR::RoundMode::PosInfinity: frintp(SubEmitSize, Dst.Q(), Vector.Q()); break;
      case IR::RoundMode::TowardsZero: frintz(SubEmitSize, Dst.Q(), Vector.Q()); break;
      case IR::RoundMode::Host: frinti(SubEmitSize, Dst.Q(), Vector.Q()); break;
      }
    }
  }
}

DEF_OP(Vector_FToISized) {
  const auto Op = IROp->C<IR::IROp_Vector_FToISized>();

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubEmitSize = ConvertSubRegSize248(IROp);
  LOGMAN_THROW_A_FMT(IROp->Size != IR::OpSize::i256Bit, "256-bit not wired up, though we could change that");
  LOGMAN_THROW_A_FMT(CTX->HostFeatures.SupportsFRINTTS, "Need FRINTTS for Vector_FToISized");

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (ElementSize == IROp->Size) {
// See above
#define ROUNDING_FN(name)                         \
  if (ElementSize == IR::OpSize::i32Bit) {        \
    name(Dst.S(), Vector.S());                    \
  } else if (ElementSize == IR::OpSize::i64Bit) { \
    name(Dst.D(), Vector.D());                    \
  } else {                                        \
    FEX_UNREACHABLE;                              \
  }

    if (Op->IntSize == IR::OpSize::i64Bit) {
      if (Op->HostRound) {
        ROUNDING_FN(frint64x);
      } else {
        ROUNDING_FN(frint64z);
      }
    } else {
      if (Op->HostRound) {
        ROUNDING_FN(frint32x);
      } else {
        ROUNDING_FN(frint32z);
      }
    }

#undef ROUNDING_FN
  } else {
    if (Op->IntSize == IR::OpSize::i64Bit) {
      if (Op->HostRound) {
        frint64x(SubEmitSize, Dst.Q(), Vector.Q());
      } else {
        frint64z(SubEmitSize, Dst.Q(), Vector.Q());
      }
    } else {
      if (Op->HostRound) {
        frint32x(SubEmitSize, Dst.Q(), Vector.Q());
      } else {
        frint32z(SubEmitSize, Dst.Q(), Vector.Q());
      }
    }
  }
}

DEF_OP(Vector_F64ToI32) {
  const auto Op = IROp->C<IR::IROp_Vector_F64ToI32>();
  const auto OpSize = IROp->Size;
  const auto Round = Op->Round;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  if (HostSupportsSVE128 || HostSupportsSVE256) {
    const auto Mask = Is256Bit ? PRED_TMP_32B.Merging() : PRED_TMP_16B.Merging();
    // First step is to round the f64 values to integrals (frint*)
    // Then convert to integers using fcvtzs.
    auto CVTReg = Dst.Z();
    switch (Round) {
    case IR::RoundMode::Nearest: frintn(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::NegInfinity: frintm(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::PosInfinity: frintp(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); break;
    case IR::RoundMode::TowardsZero: CVTReg = Vector.Z(); break;
    case IR::RoundMode::Host: frinti(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z()); break;
    }

    fcvtzs(Dst.Z(), ARMEmitter::SubRegSize::i32Bit, Mask, CVTReg, ARMEmitter::SubRegSize::i64Bit);

    ///< Fixup format of register that fcvtzs returns.
    uzp1(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Dst.Z(), Dst.Z());
    if (Op->EnsureZeroUpperHalf) {
      ///< Match CVTPD2DQ/CVTTPD2DQ behaviour if necessary by zeroing the upper bits here.
      if (Is256Bit) {
        mov(Dst.Q(), Dst.Q());
      } else {
        mov(Dst.D(), Dst.D());
      }
    }
  } else {
    // This has a known precision issue that isn't easily resolvable without throwing away performance.
    // Doing the conversion in multi-stage steps has an issue that you can lose precision in the f32->i32 step if your source was f64.
    // To get around this with ASIMD FEX needs to use fcvtzs (Scalar, Integer, to GPR) for each F64 to be directly converted to i32.
    // This is a very costly transform that the SVE path doesn't need to do since it supports f64->i32 directly.
    // If this precision issue is necessary then we can add an option for it in the future.

    ///< Round float to integral depending on rounding mode.
    switch (Round) {
    case IR::RoundMode::Nearest: frintn(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), Vector.Q()); break;
    case IR::RoundMode::NegInfinity: frintm(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), Vector.Q()); break;
    case IR::RoundMode::PosInfinity: frintp(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), Vector.Q()); break;
    case IR::RoundMode::TowardsZero: frintz(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), Vector.Q()); break;
    case IR::RoundMode::Host: frinti(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), Vector.Q()); break;
    }

    // Now narrow from f64 to f32.
    fcvtn(ARMEmitter::SubRegSize::i32Bit, Dst.Q(), Dst.Q());

    ///< Convert the two F32 integrals to real integers.
    fcvtzs(ARMEmitter::SubRegSize::i32Bit, Dst.D(), Dst.D());
  }
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/DebugData.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/AllocatorHooks.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>

namespace FEXCore::CPU {
union Relocation;
} // namespace FEXCore::CPU

namespace FEXCore::Core {
struct DebugDataSubblock {
  uint32_t HostCodeOffset;
  uint32_t HostCodeSize;
};

struct DebugDataGuestOpcode {
  uint64_t GuestEntryOffset;
  ptrdiff_t HostEntryOffset;
};

/**
 * @brief Contains debug data for a block of code for later debugger analysis
 *
 * Needs to remain around for as long as the code could be executed at least
 */
struct DebugData : public FEXCore::Allocator::FEXAllocOperators {
  uint64_t HostCodeSize; ///< The size of the code generated in the host JIT
  fextl::vector<DebugDataSubblock> Subblocks;
  fextl::vector<DebugDataGuestOpcode> GuestOpcodes;
  fextl::vector<FEXCore::CPU::Relocation>* Relocations;
};
} // namespace FEXCore::Core


================================================
FILE: FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Core/JIT/JITClass.h"

namespace FEXCore::CPU {

DEF_OP(VAESImc) {
  auto Op = IROp->C<IR::IROp_VAESImc>();
  aesimc(GetVReg(Node), GetVReg(Op->Vector));
}

DEF_OP(VAESEnc) {
  const auto Op = IROp->C<IR::IROp_VAESEnc>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Key = GetVReg(Op->Key);
  const auto State = GetVReg(Op->State);
  const auto ZeroReg = GetVReg(Op->ZeroReg);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
    // This matches the common case of XMM AES.
    aese(Dst.Q(), ZeroReg.Q());
    aesmc(Dst.Q(), Dst.Q());
    eor(Dst.Q(), Dst.Q(), Key.Q());
  } else {
    mov(VTMP1.Q(), State.Q());
    aese(VTMP1, ZeroReg.Q());
    aesmc(VTMP1, VTMP1);
    eor(Dst.Q(), VTMP1.Q(), Key.Q());
  }
}

DEF_OP(VAESEncLast) {
  const auto Op = IROp->C<IR::IROp_VAESEncLast>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Key = GetVReg(Op->Key);
  const auto State = GetVReg(Op->State);
  const auto ZeroReg = GetVReg(Op->ZeroReg);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
    // This matches the common case of XMM AES.
    aese(Dst.Q(), ZeroReg.Q());
    eor(Dst.Q(), Dst.Q(), Key.Q());
  } else {
    mov(VTMP1.Q(), State.Q());
    aese(VTMP1, ZeroReg.Q());
    eor(Dst.Q(), VTMP1.Q(), Key.Q());
  }
}

DEF_OP(VAESDec) {
  const auto Op = IROp->C<IR::IROp_VAESDec>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Key = GetVReg(Op->Key);
  const auto State = GetVReg(Op->State);
  const auto ZeroReg = GetVReg(Op->ZeroReg);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
    // This matches the common case of XMM AES.
    aesd(Dst.Q(), ZeroReg.Q());
    aesimc(Dst.Q(), Dst.Q());
    eor(Dst.Q(), Dst.Q(), Key.Q());
  } else {
    mov(VTMP1.Q(), State.Q());
    aesd(VTMP1, ZeroReg.Q());
    aesimc(VTMP1, VTMP1);
    eor(Dst.Q(), VTMP1.Q(), Key.Q());
  }
}

DEF_OP(VAESDecLast) {
  const auto Op = IROp->C<IR::IROp_VAESDecLast>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Key = GetVReg(Op->Key);
  const auto State = GetVReg(Op->State);
  const auto ZeroReg = GetVReg(Op->ZeroReg);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  if (Dst == State && Dst != Key) {
    // Optimal case in which Dst already contains the starting state.
    // This matches the common case of XMM AES.
    aesd(Dst.Q(), ZeroReg.Q());
    eor(Dst.Q(), Dst.Q(), Key.Q());
  } else {
    mov(VTMP1.Q(), State.Q());
    aesd(VTMP1, ZeroReg.Q());
    eor(Dst.Q(), VTMP1.Q(), Key.Q());
  }
}

DEF_OP(VAESKeyGenAssist) {
  auto Op = IROp->C<IR::IROp_VAESKeyGenAssist>();
  const auto Dst = GetVReg(Node);
  const auto Src = GetVReg(Op->Src);
  const auto Swizzle = GetVReg(Op->KeyGenTBLSwizzle);
  auto ZeroReg = GetVReg(Op->ZeroReg);

  if (Dst == ZeroReg) {
    // Seriously? ZeroReg ended up being the destination register?
    // Just copy it over in this case...
    mov(VTMP1.Q(), ZeroReg.Q());
    ZeroReg = VTMP1;
  }

  if (Dst != Src) {
    mov(Dst.Q(), Src.Q());
  }

  // Do a "regular" AESE step
  aese(Dst, ZeroReg.Q());

  // Now EOR in the RCON
  if (Op->RCON) {
    tbl(Dst.Q(), Dst.Q(), Swizzle.Q());

    LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast<uint64_t>(Op->RCON) << 32);
    dup(ARMEmitter::SubRegSize::i64Bit, VTMP2.Q(), TMP1);
    eor(Dst.Q(), Dst.Q(), VTMP2.Q());
  } else {
    tbl(Dst.Q(), Dst.Q(), Swizzle.Q());
  }
}

DEF_OP(CRC32) {
  auto Op = IROp->C<IR::IROp_CRC32>();

  const auto Dst = GetReg(Node);
  const auto Src1 = GetReg(Op->Src1);
  const auto Src2 = GetReg(Op->Src2);

  switch (Op->SrcSize) {
  case IR::OpSize::i8Bit: crc32cb(Dst.W(), Src1.W(), Src2.W()); break;
  case IR::OpSize::i16Bit: crc32ch(Dst.W(), Src1.W(), Src2.W()); break;
  case IR::OpSize::i32Bit: crc32cw(Dst.W(), Src1.W(), Src2.W()); break;
  case IR::OpSize::i64Bit: crc32cx(Dst.X(), Src1.X(), Src2.X()); break;
  default: LOGMAN_MSG_A_FMT("Unknown CRC32 size: {}", Op->SrcSize);
  }
}

DEF_OP(VSha1H) {
  auto Op = IROp->C<IR::IROp_VSha1H>();

  const auto Dst = GetVReg(Node);
  const auto Src = GetVReg(Op->Src);

  sha1h(Dst.S(), Src.S());
}

DEF_OP(VSha1C) {
  auto Op = IROp->C<IR::IROp_VSha1C>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);
  const auto Src3 = GetVReg(Op->Src3);

  if (Dst == Src1) {
    sha1c(Dst, Src2.S(), Src3);
  } else if (Dst != Src2 && Dst != Src3) {
    mov(Dst.Q(), Src1.Q());
    sha1c(Dst, Src2.S(), Src3);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha1c(VTMP1, Src2.S(), Src3);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha1M) {
  auto Op = IROp->C<IR::IROp_VSha1M>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);
  const auto Src3 = GetVReg(Op->Src3);

  if (Dst == Src1) {
    sha1m(Dst, Src2.S(), Src3);
  } else if (Dst != Src2 && Dst != Src3) {
    mov(Dst.Q(), Src1.Q());
    sha1m(Dst, Src2.S(), Src3);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha1m(VTMP1, Src2.S(), Src3);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha1P) {
  auto Op = IROp->C<IR::IROp_VSha1P>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);
  const auto Src3 = GetVReg(Op->Src3);

  if (Dst == Src1) {
    sha1p(Dst, Src2.S(), Src3);
  } else if (Dst != Src2 && Dst != Src3) {
    mov(Dst.Q(), Src1.Q());
    sha1p(Dst, Src2.S(), Src3);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha1p(VTMP1, Src2.S(), Src3);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha1SU1) {
  auto Op = IROp->C<IR::IROp_VSha1SU1>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);

  if (Dst == Src1) {
    sha1su1(Dst, Src2);
  } else if (Dst != Src2) {
    mov(Dst.Q(), Src1.Q());
    sha1su1(Dst, Src2);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha1su1(VTMP1, Src2);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha256H) {
  auto Op = IROp->C<IR::IROp_VSha256H>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);
  const auto Src3 = GetVReg(Op->Src3);

  if (Dst == Src1) {
    sha256h(Dst, Src2, Src3);
  } else if (Dst != Src2 && Dst != Src3) {
    mov(Dst.Q(), Src1.Q());
    sha256h(Dst, Src2, Src3);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha256h(VTMP1, Src2, Src3);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha256H2) {
  auto Op = IROp->C<IR::IROp_VSha256H2>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);
  const auto Src3 = GetVReg(Op->Src3);

  if (Dst == Src1) {
    sha256h2(Dst, Src2, Src3);
  } else if (Dst != Src2 && Dst != Src3) {
    mov(Dst.Q(), Src1.Q());
    sha256h2(Dst, Src2, Src3);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha256h2(VTMP1, Src2, Src3);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha256U0) {
  auto Op = IROp->C<IR::IROp_VSha256U0>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);

  if (Dst == Src1) {
    sha256su0(Dst, Src2);
  } else {
    mov(VTMP1.Q(), Src1.Q());
    sha256su0(VTMP1, Src2);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSha256U1) {
  auto Op = IROp->C<IR::IROp_VSha256U1>();

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);

  if (Dst != Src1 && Dst != Src2) {
    movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0);
    sha256su1(Dst, Src1, Src2);
  } else {
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    sha256su1(VTMP1, Src1, Src2);
    mov(Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(PCLMUL) {
  const auto Op = IROp->C<IR::IROp_PCLMUL>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Src1 = GetVReg(Op->Src1);
  const auto Src2 = GetVReg(Op->Src2);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Currently only supports 128-bit operations.");

  switch (Op->Selector) {
  case 0b00000000: pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), Src1.D(), Src2.D()); break;
  case 0b00000001:
    dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), Src1.Q(), 1);
    pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), VTMP1.D(), Src2.D());
    break;
  case 0b00010000:
    dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), Src2.Q(), 1);
    pmull(ARMEmitter::SubRegSize::i128Bit, Dst.D(), VTMP1.D(), Src1.D());
    break;
  case 0b00010001: pmull2(ARMEmitter::SubRegSize::i128Bit, Dst.Q(), Src1.Q(), Src2.Q()); break;
  default: LOGMAN_MSG_A_FMT("Unknown PCLMUL selector: {}", Op->Selector); break;
  }
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/JIT.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
glossary: Splatter ~ a code generator backend that concatenates configurable macros instead of doing isel
glossary: IR ~ Intermediate Representation, our high-level opcode representation, loosely modeling arm64
glossary: SSA ~ Single Static Assignment, a form of representing IR in memory
glossary: Basic Block ~ A block of instructions with no control flow, terminated by control flow
glossary: Fragment ~ A Collection of basic blocks, possibly an entire guest function or a subset of it
tags: backend|arm64
desc: Main glue logic of the arm64 splatter backend
$end_info$
*/

#include "Interface/Context/Context.h"
#include "Interface/Core/LookupCache.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/Interpreter/InterpreterOps.h"
#include "Interface/Core/JIT/DebugData.h"
#include "Interface/Core/JIT/JITClass.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"

#include "Utils/MemberFunctionToPointer.h"
#include "Utils/variable_length_integer.h"

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/LongJump.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/HLE/SyscallHandler.h>

#include <cstdio>
#include <cstring>
#include <unistd.h>

namespace {
struct DivRem {
  uint64_t Quotient;
  uint64_t Remainder;
};

static struct DivRem LUDIV(uint64_t SrcHigh, uint64_t SrcLow, uint64_t Divisor) {
  __uint128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow;

  return {
    .Quotient = (uint64_t)(Source / Divisor),
    .Remainder = (uint64_t)(Source % Divisor),
  };
}

static struct DivRem
LDIV(uint64_t SrcHigh, uint64_t SrcLow, int64_t Divisor) {
  __int128_t Source = (static_cast<__uint128_t>(SrcHigh) << 64) | SrcLow;

  return {
    .Quotient = (uint64_t)(Source / Divisor),
    .Remainder = (uint64_t)(Source % Divisor),
  };
}

static void
PrintValue(uint64_t Value) {
  LogMan::Msg::DFmt("Value: 0x{:x}", Value);
}

static void PrintVectorValue(uint64_t Value, uint64_t ValueUpper) {
  LogMan::Msg::DFmt("Value: 0x{:016x}'{:016x}", ValueUpper, Value);
}
} // namespace

namespace FEXCore::CPU {

void Arm64JITCore::Op_Unhandled(const IR::IROp_Header* IROp, IR::Ref Node) {
  FallbackInfo Info;
  if (!InterpreterOps::GetFallbackHandler(IROp, &Info)) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_MSG_A_FMT("Unhandled IR Op: {}", FEXCore::IR::GetName(IROp->Op));
#endif
  } else {
    auto FillF80x2Result = [&](auto DstLo, auto DstHi) {
      mov(DstLo.Q(), VTMP1.Q());
      mov(DstHi.Q(), VTMP2.Q());
    };

    auto FillF64x2Result = [&](auto DstLo, auto DstHi) {
      fmov(DstLo.D(), VTMP1.D());
      fmov(DstHi.D(), VTMP2.D());
    };

    auto FillF80Result = [&]() {
      const auto Dst = GetVReg(Node);
      mov(Dst.Q(), VTMP1.Q());
    };

    auto FillF64Result = [&]() {
      const auto Dst = GetVReg(Node);
      fmov(Dst.D(), VTMP1.D());
    };

    auto FillF32Result = [&]() {
      const auto Dst = GetVReg(Node);
      fmov(Dst.S(), VTMP1.S());
    };

    auto FillI64Result = [&]() {
      const auto Dst = GetReg(Node);
      mov(Dst.X(), TMP1);
    };

    auto FillI32Result = [&]() {
      const auto Dst = GetReg(Node);
      mov(Dst.W(), TMP1.W());
    };

    auto FillI16Result = [&]() {
      const auto Dst = GetReg(Node);
      mov(Dst.W(), TMP1.W());
    };

    switch (Info.ABI) {
    case FABI_F80_I16_F32_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      fmov(VTMP1.S(), Src1.S());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80Result();
    } break;

    case FABI_F80_I16_F64_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      fmov(VTMP1.D(), Src1.D());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80Result();
    } break;

    case FABI_F80_I16_I16_PTR:
    case FABI_F80_I16_I32_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // tmp2 (x1/x11): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetReg(IROp->Args[0]);

      // Need to sign or zero extend this for the dispatcher handler.
      if (Info.ABI == FABI_F80_I16_I16_PTR) {
        sxth(ARMEmitter::Size::i32Bit, TMP2, Src1);
      } else {
        mov(ARMEmitter::Size::i32Bit, TMP2, Src1);
      }

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80Result();
    } break;

    case FABI_F32_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF32Result();
    } break;

    case FABI_F64_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF64Result();
    } break;

    case FABI_F64_F64_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      fmov(VTMP1.D(), Src1.D());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF64Result();
    } break;
    case FABI_F64x2_F64_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source
      // vtmp2 (v1/v16): vector source
#ifdef VIXL_SIMULATOR
      LOGMAN_THROW_A_FMT(CTX->Config.DisableVixlIndirectCalls, "Vector register pairs unsupported by simulator currently");
#endif
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      const auto DstLo = GetVReg(IROp->Args[1]);
      const auto DstHi = GetVReg(IROp->Args[2]);

      fmov(VTMP1.D(), Src1.D());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF64x2Result(DstLo, DstHi);
    } break;

    case FABI_F64_F64_F64_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v17): vector source 2
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      const auto Src2 = GetVReg(IROp->Args[1]);

      fmov(VTMP1.D(), Src1.D());
      fmov(VTMP2.D(), Src2.D());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF64Result();
    } break;

    case FABI_I16_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI16Result();
    } break;

    case FABI_I32_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI32Result();
    } break;

    case FABI_I64_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): source
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI64Result();
    } break;

    case FABI_I64_I16_F80_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v17): vector source 2
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      const auto Src2 = GetVReg(IROp->Args[1]);
      mov(VTMP1.Q(), Src1.Q());
      mov(VTMP2.Q(), Src2.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI64Result();
    } break;

    case FABI_F80_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80Result();
    } break;

    case FABI_F80x2_I16_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v16): vector source 2
#ifdef VIXL_SIMULATOR
      LOGMAN_THROW_A_FMT(CTX->Config.DisableVixlIndirectCalls, "Vector register pairs unsupported by simulator currently");
#endif
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      const auto DstLo = GetVReg(IROp->Args[1]);
      const auto DstHi = GetVReg(IROp->Args[2]);

      mov(VTMP1.Q(), Src1.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80x2Result(DstLo, DstHi);
    } break;

    case FABI_F80_I16_F80_F80_PTR: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v17): vector source 2
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(IROp->Args[0]);
      const auto Src2 = GetVReg(IROp->Args[1]);

      mov(VTMP1.Q(), Src1.Q());
      mov(VTMP2.Q(), Src2.Q());

      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP1);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillF80Result();
    } break;

    case FABI_I32_I64_I64_V128_V128_I16: {
      // Linux Reg/Win32 Reg:
      // stack: FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v17): vector source 2
      // tmp1 (x0/x10): source 1
      // tmp2 (x1/x11): source 2
      // tmp3 (x2/x12): source 3
      const auto Op = IROp->C<IR::IROp_VPCMPESTRX>();
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP1, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));

      stp<ARMEmitter::IndexType::PRE>(TMP1, ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto SrcRAX = GetReg(Op->RAX);
      const auto SrcRDX = GetReg(Op->RDX);
      const auto Control = Op->Control;

      mov(TMP1, SrcRAX.X());
      mov(TMP2, SrcRDX.X());
      movz(ARMEmitter::Size::i32Bit, TMP3, Control);

      const auto Src1 = GetVReg(Op->LHS);
      const auto Src2 = GetVReg(Op->RHS);

      mov(VTMP1.Q(), Src1.Q());
      mov(VTMP2.Q(), Src2.Q());

      blr(TMP4);

      ldp<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::zr, ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI32Result();
    } break;
    case FABI_I32_V128_V128_I16: {
      // Linux Reg/Win32 Reg:
      // tmp4 (x4/x13): FallbackHandler
      // x30: return
      // vtmp1 (v0/v16): vector source 1
      // vtmp2 (v1/v17): vector source 2
      // tmp1 (x0/x10): source 1
      const auto Op = IROp->C<IR::IROp_VPCMPISTRX>();
      str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);

      const auto Src1 = GetVReg(Op->LHS);
      const auto Src2 = GetVReg(Op->RHS);
      const auto Control = Op->Control;

      mov(VTMP1.Q(), Src1.Q());
      mov(VTMP2.Q(), Src2.Q());
      movz(ARMEmitter::Size::i32Bit, TMP1, Control);

      ldr(TMP2, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, ABIHandler));
      ldr(TMP4, FALLBACK_HANDLER_OFFSET(Info.HandlerIndex, Func));
      blr(TMP2);

      ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
      FillI32Result();
    } break;
    case FABI_UNKNOWN:
    default:
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      LOGMAN_MSG_A_FMT("Unhandled IR Fallback ABI: {} {}", FEXCore::IR::GetName(IROp->Op), ToUnderlying(Info.ABI));
#endif
      break;
    }
  }
}

static void DirectBlockDelinker(FEXCore::Context::ExitFunctionLinkData* Record, bool Call) {
  uintptr_t JumpThunkStartAddress = reinterpret_cast<uintptr_t>(Record) - 0x10;
  uintptr_t CallerAddress = JumpThunkStartAddress + Record->CallerOffset;
  auto BranchOffset = JumpThunkStartAddress / 4 - CallerAddress / 4;

  // Replace the patched callsite with a branch to the jump thunk.
  uint32_t BranchInst = 0;
  ARMEmitter::Emitter BranchEmit(reinterpret_cast<uint8_t*>(&BranchInst), 4);
  if (Call) {
    BranchEmit.bl(BranchOffset);
  } else {
    BranchEmit.b(BranchOffset);
  }

  std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(CallerAddress)).store(BranchInst, std::memory_order::relaxed);
  ARMEmitter::Emitter::ClearICache(reinterpret_cast<void*>(CallerAddress), 4);
}

static void IndirectBlockDelinker(FEXCore::Context::ExitFunctionLinkData* Record) {
  uintptr_t JumpThunkStartAddress = reinterpret_cast<uintptr_t>(Record) - 0x10;
  uint32_t BranchInst = 0;
  ARMEmitter::Emitter BranchEmit(reinterpret_cast<uint8_t*>(&BranchInst), 4);
  // Restore branch +2 instructions to jump to the linker block
  BranchEmit.b(0x2);

  std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(JumpThunkStartAddress)).store(BranchInst, std::memory_order::relaxed);
  ARMEmitter::Emitter::ClearICache(reinterpret_cast<void*>(JumpThunkStartAddress), 4);

  // No need to reset HostCode here as the exit linker pointer is stored separately, and if the block is relinked it will be updated.
}

uint64_t Arm64JITCore::ExitFunctionLink(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record) {
  auto Thread = Frame->Thread;
  bool TFSet = Thread->CurrentFrame->State.flags[X86State::RFLAG_TF_RAW_LOC];
  uintptr_t HostCode {};
  auto GuestRip = Record->GuestRIP;

  if (TFSet) {
    // If TF is set, the cache must be skipped as different code needs to be generated.
    Frame->State.rip = GuestRip;
    return Frame->Pointers.DispatcherLoopTop;
  } else {
    {
      // Guard the LookupCache lock with the code invalidation mutex, to avoid issues with forking
      auto lk_inval =
        GuardSignalDeferringSection<std::shared_lock>(static_cast<Context::ContextImpl*>(Thread->CTX)->CodeInvalidationMutex, Thread);
      HostCode = Thread->LookupCache->FindBlock(Thread, GuestRip);
    }
    if (!HostCode) {
      // Hold a reference to the code buffer, to avoid linking unmapped code if compilation triggers a recreation.
      auto CodeBuffer = static_cast<Arm64JITCore*>(Thread->CPUBackend.get())->CurrentCodeBuffer;
      HostCode = static_cast<Context::ContextImpl*>(Thread->CTX)->CompileBlock(Frame, GuestRip, 0);
      if (Thread->LookupCache->Shared != CodeBuffer->LookupCache.get()) {
        return HostCode;
      }
    }
  }

  // See ExitFunction in BranchOps.cpp for an assembly level view of the handled cases.
  uintptr_t JumpThunkStartAddress = reinterpret_cast<uintptr_t>(Record) - 0x10;
  uintptr_t CallerAddress = JumpThunkStartAddress + Record->CallerOffset;
  auto BranchOffset = HostCode / 4 - CallerAddress / 4;

  uint32_t ExpectedKnownCallMarkerInst = 0;
  ARMEmitter::Emitter ExpectedKnownCallMarkerEmit(reinterpret_cast<uint8_t*>(&ExpectedKnownCallMarkerInst), 4);
  ExpectedKnownCallMarkerEmit.adr(TMP1, 0xC);

  // Guard the LookupCache lock with the code invalidation mutex, to avoid issues with forking
  auto lk_inval = GuardSignalDeferringSection<std::shared_lock>(static_cast<Context::ContextImpl*>(Thread->CTX)->CodeInvalidationMutex, Thread);

  // Lock here is necessary to prevent simultaneous linking and delinking
  auto lk = Thread->LookupCache->AcquireWriteLock();

  // For non-calls, this would extend into the block's code, however that's fine as an out-of-range adr would never
  // be generated avoiding any false positives.
  uintptr_t KnownCallMarkerAddr = CallerAddress - 0x8;
  uint32_t KnownCallMarkerInst = *reinterpret_cast<uint32_t*>(KnownCallMarkerAddr);
  if (ARMEmitter::Emitter::IsInt26(BranchOffset)) {
    // Directly patch the callsite with the appropriate branch instruction.
    uint32_t BranchInst = 0;
    ARMEmitter::Emitter BranchEmit(reinterpret_cast<uint8_t*>(&BranchInst), 4);

    if (KnownCallMarkerInst == ExpectedKnownCallMarkerInst) {
      BranchEmit.bl(BranchOffset);
      Thread->LookupCache->AddBlockLink(
        GuestRip, Record, [](FEXCore::Context::ExitFunctionLinkData* Record) { DirectBlockDelinker(Record, true); }, lk);
    } else {
      BranchEmit.b(BranchOffset);
      Thread->LookupCache->AddBlockLink(
        GuestRip, Record, [](FEXCore::Context::ExitFunctionLinkData* Record) { DirectBlockDelinker(Record, false); }, lk);
    }

    std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(CallerAddress)).store(BranchInst, std::memory_order::relaxed);
    ARMEmitter::Emitter::ClearICache(reinterpret_cast<void*>(CallerAddress), 4);
  } else {
    // This case is common between calls and jumps as the thunk callsite can be left untouched.
    std::atomic_ref<uint64_t>(Record->HostCode).store(HostCode, std::memory_order::seq_cst);
#ifdef ARCHITECTURE_arm64
    // Make memory write visible to other threads reading the same location
    asm volatile("dc cvau, %0; dsb ish" : : "r"(Record->HostCode) :);
#endif

    uint32_t LdrInst = 0;
    ARMEmitter::Emitter LdrEmit(reinterpret_cast<uint8_t*>(&LdrInst), 4);
    LdrEmit.ldr(TMP1, reinterpret_cast<uint64_t>(&Record->HostCode) - JumpThunkStartAddress);
    std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(JumpThunkStartAddress)).store(LdrInst, std::memory_order::relaxed);
    ARMEmitter::Emitter::ClearICache(reinterpret_cast<void*>(JumpThunkStartAddress), 4);

    Thread->LookupCache->AddBlockLink(GuestRip, Record, IndirectBlockDelinker, lk);
  }

  return HostCode;
}

void Arm64JITCore::Op_NoOp(const IR::IROp_Header* IROp, IR::Ref Node) {}

Arm64JITCore::Arm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread)
  : CPUBackend(*ctx, Thread)
  , Arm64Emitter(ctx)
  , HostSupportsSVE128 {ctx->HostFeatures.SupportsSVE128}
  , HostSupportsSVE256 {ctx->HostFeatures.SupportsSVE256}
  , HostSupportsAVX256 {ctx->HostFeatures.SupportsAVX && ctx->HostFeatures.SupportsSVE256}
  , HostSupportsRPRES {ctx->HostFeatures.SupportsRPRES}
  , HostSupportsAFP {ctx->HostFeatures.SupportsAFP}
  , CTX {ctx}
  , TempAllocator(ctx->CPUBackendAllocator, 0) {

  RAPass = Thread->PassManager->GetPass<IR::RegisterAllocationPass>("RA");

  RAPass->AddRegisters(IR::RegClass::GPR, GeneralRegisters.size());
  RAPass->AddRegisters(IR::RegClass::GPRFixed, StaticRegisters.size());
  RAPass->AddRegisters(IR::RegClass::FPR, GeneralFPRegisters.size());
  RAPass->AddRegisters(IR::RegClass::FPRFixed, StaticFPRegisters.size());
  RAPass->PairRegs = PairRegisters;

  {
    // Set up pointers that the JIT needs to load

    // Common
    auto& Ptrs = ThreadState->CurrentFrame->Pointers;

    Ptrs.PrintValue = reinterpret_cast<uint64_t>(PrintValue);
    Ptrs.PrintVectorValue = reinterpret_cast<uint64_t>(PrintVectorValue);
    Ptrs.ThreadRemoveCodeEntryFromJIT = reinterpret_cast<uintptr_t>(&Context::ContextImpl::ThreadRemoveCodeEntryFromJit);
    Ptrs.MonoBackpatcherWrite = reinterpret_cast<uint64_t>(&Context::ContextImpl::MonoBackpatcherWrite);
    Ptrs.CPUIDObj = reinterpret_cast<uint64_t>(&CTX->CPUID);

    {
      FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunFunction);
      Ptrs.CPUIDFunction = PMF.GetConvertedPointer();
    }

    {
      FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::CPUIDEmu::RunXCRFunction);
      Ptrs.XCRFunction = PMF.GetConvertedPointer();
    }

    {
      FEXCore::Utils::MemberFunctionToPointerCast PMF(&FEXCore::HLE::SyscallHandler::HandleSyscall);
      Ptrs.SyscallHandlerObj = reinterpret_cast<uint64_t>(CTX->SyscallHandler);
      Ptrs.SyscallHandlerFunc = PMF.GetVTableEntry(CTX->SyscallHandler);
    }
    Ptrs.ExitFunctionLink = reinterpret_cast<uintptr_t>(&Arm64JITCore::ExitFunctionLink);
    Ptrs.LUDIV = reinterpret_cast<uint64_t>(LUDIV);
    Ptrs.LDIV = reinterpret_cast<uint64_t>(LDIV);
  }

  CurrentCodeBuffer = CodeBuffers.GetLatest();
  ThreadState->LookupCache->Shared = CurrentCodeBuffer->LookupCache.get();
}

void Arm64JITCore::EmitDetectionString() {
  const char JITString[] = "FEXJIT::Arm64JITCore::";
  EmitString(JITString);
  Align();
}

void Arm64JITCore::ClearCache() {
  // NOTE: Holding on to the reference here is required to ensure validity of the WriteLock mutex
  auto PrevCodeBuffer = CurrentCodeBuffer;
  auto lk = PrevCodeBuffer->LookupCache->AcquireWriteLock();

  auto CodeBuffer = GetEmptyCodeBuffer();
  SetBuffer(CodeBuffer->Ptr, CodeBuffer->AllocatedSize);
  EmitDetectionString();

  ThreadState->LookupCache->ChangeGuestToHostMapping(*PrevCodeBuffer, *CurrentCodeBuffer->LookupCache, lk);
}

Arm64JITCore::~Arm64JITCore() {}

bool Arm64JITCore::IsInlineConstant(const IR::OrderedNodeWrapper& WNode, uint64_t* Value) const {
  if (WNode.IsImmediate()) {
    return false;
  }

  auto OpHeader = IR->GetOp<IR::IROp_Header>(WNode);

  if (OpHeader->Op == IR::IROps::OP_INLINECONSTANT) {
    auto Op = OpHeader->C<IR::IROp_InlineConstant>();
    if (Value) {
      *Value = Op->Constant;
    }
    return true;
  } else {
    return false;
  }
}

bool Arm64JITCore::IsInlineEntrypointOffset(const IR::OrderedNodeWrapper& WNode, uint64_t* Value) const {
  if (WNode.IsImmediate()) {
    return false;
  }

  auto OpHeader = IR->GetOp<IR::IROp_Header>(WNode);

  if (OpHeader->Op == IR::IROps::OP_INLINEENTRYPOINTOFFSET) {
    auto Op = OpHeader->C<IR::IROp_InlineEntrypointOffset>();
    if (Value) {
      uint64_t Mask = ~0ULL;
      const auto Size = OpHeader->Size;
      if (Size == IR::OpSize::i32Bit) {
        Mask = 0xFFFF'FFFFULL;
      }
      *Value = (Entry + Op->Offset) & Mask;
    }
    return true;
  } else {
    return false;
  }
}

void Arm64JITCore::EmitTFCheck() {
  ARMEmitter::ForwardLabel l_TFUnset;
  ARMEmitter::ForwardLabel l_TFBlocked;

  // Note that this needs to be before the below suspend checks, as X86 checks this flag immediately after executing an instruction.
  ldrb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));

  (void)cbz(ARMEmitter::Size::i32Bit, TMP1, &l_TFUnset);

  // X86 semantically checks TF after executing each instruction, so e.g. setting a context with TF set will execute a single instruction
  // and then raise an exception. However on the FEX side this is simpler to implement by checking at the start of each instruction, handle this by having bit 1 being unset in the flag state indicate that TF is blocked for a single instruction.
  (void)tbz(TMP1, 1, &l_TFBlocked);

  // Block TF for a single instruction when the frontend jumps to a new context by unsetting bit 1.
  ldrb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));
  and_(ARMEmitter::Size::i32Bit, TMP1, TMP1, ~(1 << 1));
  strb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));

  Core::CpuStateFrame::SynchronousFaultDataStruct State = {
    .FaultToTopAndGeneratedException = 1,
    .Signal = Core::FAULT_SIGTRAP,
    .TrapNo = X86State::X86_TRAPNO_DB,
    .si_code = 2,
    .err_code = 0,
  };

  uint64_t Constant {};
  memcpy(&Constant, &State, sizeof(State));

  LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Constant);
  str(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, SynchronousFaultData));
  ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.GuestSignal_SIGTRAP));
  br(TMP1);

  (void)Bind(&l_TFBlocked);
  // If TF was blocked for this instruction, unblock it for the next.
  LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 0b11);
  strb(TMP1, STATE_PTR(CpuStateFrame, State.flags[X86State::RFLAG_TF_RAW_LOC]));
  (void)Bind(&l_TFUnset);
}

void Arm64JITCore::EmitSuspendInterruptCheck() {
  if (CTX->Config.NeedsPendingInterruptFaultCheck) {
    // Trigger a fault if there are any pending interrupts
    // Used only for suspend on WIN32 at the moment
    strb(ARMEmitter::XReg::zr, STATE,
         offsetof(FEXCore::Core::InternalThreadState, InterruptFaultPage) - offsetof(FEXCore::Core::InternalThreadState, BaseFrameState));
  }

#ifdef ARCHITECTURE_arm64ec
  static constexpr uint16_t SuspendMagic {0xCAFE};

  ldr(TMP2.W(), STATE_PTR(CpuStateFrame, SuspendDoorbell));
  ARMEmitter::ForwardLabel l_NoSuspend;
  (void)cbz(ARMEmitter::Size::i32Bit, TMP2, &l_NoSuspend);
  brk(SuspendMagic);
  (void)Bind(&l_NoSuspend);
#endif
}

void Arm64JITCore::EmitEntryPoint(ARMEmitter::BackwardLabel& HeaderLabel, bool CheckTF) {
  // Get the address of the JITCodeHeader and store in to the core state.
  // Two instruction cost, each 1 cycle.
  adr_OrRestart(TMP1, &HeaderLabel);
  str(TMP1, STATE, offsetof(FEXCore::Core::CPUState, InlineJITBlockHeader));

  if (CheckTF) {
    EmitTFCheck();
  }

  if (SpillSlots) {
    const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

    if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
      sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
    } else {
      LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize);
      sub(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::rsp, ARMEmitter::XReg::rsp, TMP1, ARMEmitter::ExtendedType::LSL_64, 0);
    }
  }

  EmitSuspendInterruptCheck();
}

CPUBackend::CompiledCode Arm64JITCore::CompileCode(uint64_t Entry, uint64_t Size, bool SingleInst, const FEXCore::IR::IRListView* IR,
                                                   FEXCore::Core::DebugData* DebugData, bool CheckTF) {
  FEXCORE_PROFILE_SCOPED("Arm64::CompileCode");

  const auto PrevNumAllocations = Relocations.size();

  this->Entry = Entry;
  this->DebugData = DebugData;
  this->IR = IR;
  RequiresFarARM64Jumps = false;
  SSANodeMultiplier = 24;

  // Prepare restart via long jump in case branch encoding fails.
  // This uses UncheckedLongJump since we don't implement std::longjmp in WoA setups
  switch (static_cast<RestartOptions::Control>(FEXCore::UncheckedLongJump::SetJump(ThreadState->RestartJump))) {
  case RestartOptions::Control::Incoming:
    // Nothing
    break;
  case RestartOptions::Control::EnableFarARM64Jumps: RequiresFarARM64Jumps = true; break;
  case RestartOptions::Control::NeedsLargerJITSpace:
    // Get rid of the claimed buffer immediately, we can't fit in it at all.
    TempAllocator.UnclaimBuffer();
    SSANodeMultiplier *= 2;
    break;
  default: LOGMAN_MSG_A_FMT("Unhandled Arm64 restart condition!");
  }

  uint32_t SSACount = IR->GetSSACount();
  JumpTargets.clear();
  CallReturnTargets.clear();
  PendingJumpThunks.clear();
  JumpTargets.resize(IR->GetHeader()->BlockCount, {});
  Relocations.resize(PrevNumAllocations, FEXCore::CPU::Relocation::Default()); // Discard any relocations generated from a previous attempt

  CodeData.EntryPoints.clear();

  // Fairly excessive buffer range to make sure we don't overflow
  // One page baseline, plus SSANodeMultipler bytes, plus another page for guard page.
  const uint32_t DesiredBufferRange = AlignUp(FEXCore::Utils::FEX_PAGE_SIZE * 2 + SSACount * SSANodeMultiplier, FEXCore::Utils::FEX_PAGE_SIZE);

  // JIT output is first written to a temporary buffer and later relocated to the CodeBuffer.
  // This minimizes lock contention of CodeBufferWriteMutex.
  auto TempCodeBufferInfo = TempAllocator.ReownOrClaimBufferWithSize(DesiredBufferRange);
  auto TempCodeBuffer = TempCodeBufferInfo.Ptr;
  const uint32_t UsableBufferRange = TempCodeBufferInfo.Size - FEXCore::Utils::FEX_PAGE_SIZE;

  SetBuffer(TempCodeBuffer, UsableBufferRange);

  ThreadState->JITGuardPage = reinterpret_cast<uintptr_t>(TempCodeBuffer) + UsableBufferRange;
  ThreadState->JITGuardOverflowArgument = FEXCore::ToUnderlying(RestartOptions::Control::NeedsLargerJITSpace);

  CodeData.BlockBegin = GetCursorAddress<uint8_t*>();

  // Put the code header at the start of the data block.
  ARMEmitter::BackwardLabel JITCodeHeaderLabel {};
  (void)Bind(&JITCodeHeaderLabel);
  JITCodeHeader* CodeHeader = GetCursorAddress<JITCodeHeader*>();
  CursorIncrement(sizeof(JITCodeHeader));

  auto CodeBegin = GetCursorAddress<uint8_t*>();

  // AAPCS64
  // r30      = LR
  // r29      = FP
  // r19..r28 = Callee saved
  // r18      = Platform Register (Matters if we target Windows or iOS)
  // r16..r17 = Inter-procedure scratch
  //  r9..r15 = Temp
  //  r8      = Indirect Result
  //  r0...r7 = Parameter/Results
  //
  //  FPRS:
  //  v8..v15 = (lower 64bits) Callee saved

  // Our allocation:
  // X0 = ThreadState
  // X1 = MemBase
  //
  // X1-X3 = Temp
  // X4-r18 = RA

  SpillSlots = IR->SpillSlots();

  PendingTargetLabel = nullptr;
  PendingCallReturnTargetLabel = nullptr;

  for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) {
    using namespace FEXCore::IR;
    auto BlockIROp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(BlockIROp->Header.Op == IR::OP_CODEBLOCK, "IR type failed to be a code block");
#endif

    auto BlockStartHostCode = GetCursorAddress<uint8_t*>();
    {
      const auto Node = IR->GetID(BlockNode);
      const auto Target = &JumpTargets[BlockIROp->ID];

      // if there's a pending branch, and it is not fall-through
      if (PendingTargetLabel && PendingTargetLabel != Target) {
        if (PendingTargetLabel->Backward.Location) {
          EmitSuspendInterruptCheck();
        }
        b_OrRestart(PendingTargetLabel);
        PendingTargetLabel = nullptr;
      }

      if (BlockIROp->EntryPoint) {
        uint64_t BlockStartRIP = Entry + BlockIROp->GuestEntryOffset;

        const auto IsReturnTarget = CallReturnTargets.try_emplace(Node).first;
        if (PendingTargetLabel) {
          // If there is a fallthrough branch to this block, skip over the entrypoint code.
          b_OrRestart(Target);
        } else if (PendingCallReturnTargetLabel && PendingCallReturnTargetLabel != &IsReturnTarget->second) {
          // If we just emitted a call, but the block we're now emitting is not the return block so don't fallthrough.
          b_OrRestart(PendingCallReturnTargetLabel);
        }
        PendingCallReturnTargetLabel = nullptr;

        BindOrRestart(&IsReturnTarget->second);
        CodeData.EntryPoints.emplace(BlockStartRIP, GetCursorAddress<uint8_t*>());
        DebugData->GuestOpcodes.push_back({BlockIROp->GuestEntryOffset, GetCursorAddress<uint8_t*>() - CodeData.BlockBegin});

        EmitEntryPoint(JITCodeHeaderLabel, CheckTF);
      }

      if (PendingCallReturnTargetLabel) {
        // If there is still a pending call return target, then the block we're emitting is not the return block so don't fallthrough.
        b_OrRestart(PendingCallReturnTargetLabel);
        PendingCallReturnTargetLabel = nullptr;
      }
      PendingTargetLabel = nullptr;

      BindOrRestart(Target);
    }

    for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) {
      switch (IROp->Op) {
#define REGISTER_OP(op, x) \
  case FEXCore::IR::IROps::OP_##op: Op_##x(IROp, CodeNode); break

#define IROP_DISPATCH_DISPATCH
#include <FEXCore/IR/IRDefines_Dispatch.inc>
#undef REGISTER_OP

      default: Op_Unhandled(IROp, CodeNode); break;
      }
    }

    DebugData->Subblocks.push_back({static_cast<uint32_t>(BlockStartHostCode - CodeData.BlockBegin),
                                    static_cast<uint32_t>(GetCursorAddress<uint8_t*>() - BlockStartHostCode)});
  }

  // Make sure last branch is generated. It certainly can't be eliminated here.
  if (PendingTargetLabel) {
    if (PendingTargetLabel->Backward.Location) {
      EmitSuspendInterruptCheck();
    }
    b_OrRestart(PendingTargetLabel);
  }
  PendingTargetLabel = nullptr;

  ARMEmitter::ForwardLabel l_ExitLink;
  for (auto& PendingJumpThunk : PendingJumpThunks) {
    // Align as 64-bit atomics are used on the HostCode field.
    Align(8);

    ARMEmitter::ForwardLabel l_DoLink;
    uint64_t ThunkAddress = GetCursorAddress<uint64_t>();
    BindOrRestart(&PendingJumpThunk.Label);
    b_OrRestart(&l_DoLink);
    br(TMP1);
    BindOrRestart(&l_DoLink);
    ldr(TMP1, &l_ExitLink);
    blr(TMP1);

    // This is a ExitFunctionLinkData struct
    BindOrRestart(&l_ExitLink);
    dc64(0);                                                                   // HostCode
    PlaceNamedSymbolLiteral(InsertGuestRIPLiteral(PendingJumpThunk.GuestRIP)); // GuestRIP
    dc64(PendingJumpThunk.CallerAddress - ThunkAddress);                       // CallerOffset
  }

  BindOrRestart(&l_ExitLink);
  PlaceNamedSymbolLiteral(InsertNamedSymbolLiteral(RelocNamedSymbolLiteral::NamedSymbol::SYMBOL_LITERAL_EXITFUNCTION_LINKER));

  // CodeSize not including the header or tail data.
  const uint64_t CodeOnlySize = GetCursorAddress<uint8_t*>() - CodeBegin;

  // Add the JitCodeTail (written later)
  Align(alignof(JITCodeTail));
  const auto JITBlockTailLocation = GetCursorAddress<uint8_t*>();
  CodeHeader->OffsetToBlockTail = JITBlockTailLocation - CodeData.BlockBegin;

  JITCodeTail JITBlockTail {
    .RIP = Entry,
    .GuestSize = Size,
    .SpinLockFutex = 0,
    .SingleInst = SingleInst,
  };

  // Entries that live after the JITCodeTail.
  // These entries correlate JIT code regions with guest RIP regions.
  // Using these entries FEX is able to reconstruct the guest RIP accurately when an instruction cause a signal fault.
  // Packed using two variable length integer entries to ensure the size isn't too large.
  // These smaller sizes means that each entry is relative to each other instead of absolute offset from the start of the JIT block.
  // When reconstructing the RIP, each entry must be walked linearly and accumulated with the previous entries.
  // This is a trade-off between compression inside the JIT code space and execution time when reconstruction the RIP.
  // RIP reconstruction when faulting is less likely so we are requiring the accumulation.
  //
  // struct {
  //   // The Host PC offset from the previous entry.
  //   FEXCore::Utils::vl64 HostPCOffset;
  //   // How much to offset the RIP from the previous entry.
  //   FEXCore::Utils::vl64 GuestRIPOffset;
  // };

  const auto JITRIPEntriesBegin = JITBlockTailLocation + sizeof(JITBlockTail);
  auto JITRIPEntriesLocation = JITRIPEntriesBegin;

  {
    // Store the RIP entries.
    JITBlockTail.NumberOfRIPEntries = DebugData->GuestOpcodes.size();
    JITBlockTail.OffsetToRIPEntries = JITRIPEntriesBegin - JITBlockTailLocation;
    uintptr_t CurrentRIPOffset = 0;
    uint64_t CurrentPCOffset = 0;

    for (size_t i = 0; i < DebugData->GuestOpcodes.size(); i++) {
      const auto& GuestOpcode = DebugData->GuestOpcodes[i];
      int64_t HostPCOffset = GuestOpcode.HostEntryOffset - CurrentPCOffset;
      int64_t GuestRIPOffset = GuestOpcode.GuestEntryOffset - CurrentRIPOffset;

      JITRIPEntriesLocation += FEXCore::Utils::vl64pair::Encode(JITRIPEntriesLocation, HostPCOffset, GuestRIPOffset);

      CurrentPCOffset = GuestOpcode.HostEntryOffset;
      CurrentRIPOffset = GuestOpcode.GuestEntryOffset;
    }
  }

  SetCursorOffset(JITRIPEntriesLocation - CodeData.BlockBegin);
  Align();

  CodeData.Size = GetCursorAddress<uint8_t*>() - CodeData.BlockBegin;

  // Finalize and write block tail data
  JITBlockTail.Size = CodeData.Size;
  {
    auto PrevCur = GetCursorOffset();
    memcpy(JITBlockTailLocation, &JITBlockTail, sizeof(JITBlockTail));
    SetCursorOffset(JITBlockTailLocation - CodeData.BlockBegin + offsetof(JITCodeTail, RIP));
    PlaceNamedSymbolLiteral(InsertGuestRIPLiteral(JITBlockTail.RIP));
    SetCursorOffset(PrevCur);
  }

  // Migrate the compile output from temporary storage to the actual CodeBuffer.
  // This can block progress in other compiling threads, so the duration of the lock should be as small as possible.
  {
    auto CodeBufferLock = std::unique_lock {CodeBuffers.CodeBufferWriteMutex};

    // Query size of generated code
    const auto TempSize = GetCursorOffset();

    // Bring CodeBuffer up to date
    {
      LOGMAN_THROW_A_FMT(CurrentCodeBuffer->LookupCache.get() == ThreadState->LookupCache->Shared, "INVARIANT VIOLATED: SharedLookupCache "
                                                                                                   "doesn't match up!\n");
      if (auto Prev = CheckCodeBufferUpdate()) {
        Allocator::VirtualDontNeed(ThreadState->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE);
        auto lk = ThreadState->LookupCache->AcquireWriteLock();
        ThreadState->LookupCache->ChangeGuestToHostMapping(*Prev, *CurrentCodeBuffer->LookupCache, lk);
      }

      // NOTE: 16-byte alignment of the new cursor offset must be preserved for block linking records
      SetBuffer(CurrentCodeBuffer->Ptr, CurrentCodeBuffer->AllocatedSize);
      SetCursorOffset(CodeBuffers.LatestOffset);
      Align16B();
      if ((GetCursorOffset() + TempSize) > CurrentCodeBuffer->UsableSize()) {
        CTX->ClearCodeCache(ThreadState);
      }

      CodeBuffers.LatestOffset = GetCursorOffset();
    }

    // Adjust host addresses
    const auto Delta = GetCursorAddress<uint8_t*>() - CodeData.BlockBegin;
    CodeData.BlockBegin += Delta;
    for (auto& EntryPoint : CodeData.EntryPoints) {
      EntryPoint.second += Delta;
    }
    CodeBegin += Delta;

    for (std::size_t Idx = PrevNumAllocations; Idx != Relocations.size(); ++Idx) {
      Relocations[Idx].Header.Offset += CodeBuffers.LatestOffset;
    }

    // Copy over CodeBuffer contents
    memcpy(GetCursorAddress<uint8_t*>(), TempCodeBuffer, TempSize);
    SetCursorOffset(CodeBuffers.LatestOffset + TempSize);

    CodeBuffers.LatestOffset = GetCursorOffset();
  }

  TempAllocator.DelayedDisownBuffer();

  ClearICache(CodeBegin, CodeOnlySize);

#ifdef VIXL_DISASSEMBLER
  if (Disassemble() & FEXCore::Config::Disassemble::STATS) {
    auto HeaderOp = IR->GetHeader();
    LOGMAN_THROW_A_FMT(HeaderOp->Header.Op == IR::OP_IRHEADER, "First op wasn't IRHeader");

    LogMan::Msg::IFmt("RIP: 0x{:x}", Entry);
    LogMan::Msg::IFmt("Guest Code instructions: {}", HeaderOp->NumHostInstructions);
    LogMan::Msg::IFmt("Host Code instructions: {}", CodeOnlySize >> 2);
    LogMan::Msg::IFmt("Blow-up Amt: {}x", double(CodeOnlySize >> 2) / double(HeaderOp->NumHostInstructions));
  }

  if (Disassemble() & FEXCore::Config::Disassemble::BLOCKS) {
    const auto DisasmBegin = reinterpret_cast<const vixl::aarch64::Instruction*>(CodeBegin);
    const auto DisasmEnd = reinterpret_cast<const vixl::aarch64::Instruction*>(CodeBegin + CodeOnlySize);
    LogMan::Msg::IFmt("Disassemble Begin");
    for (auto PCToDecode = DisasmBegin; PCToDecode < DisasmEnd; PCToDecode += 4) {
      DisasmDecoder->Decode(PCToDecode);
      auto Output = Disasm->GetOutput();
      LogMan::Msg::IFmt("{}", Output);
    }
    LogMan::Msg::IFmt("Disassemble End");
  }
#endif

  DebugData->HostCodeSize = CodeData.Size;
  DebugData->Relocations = &Relocations;

  this->IR = nullptr;

  return std::move(CodeData);
}

void Arm64JITCore::ResetStack() {
  if (SpillSlots == 0) {
    return;
  }

  const auto TotalSpillSlotsSize = SpillSlots * MaxSpillSlotSize;

  if (ARMEmitter::IsImmAddSub(TotalSpillSlotsSize)) {
    add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, TotalSpillSlotsSize);
  } else {
    // Too big to fit in a 12bit immediate
    LoadConstant(ARMEmitter::Size::i64Bit, TMP1, TotalSpillSlotsSize);
    add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::rsp, ARMEmitter::XReg::rsp, TMP1, ARMEmitter::ExtendedType::LSL_64, 0);
  }
}

fextl::unique_ptr<CPUBackend> CreateArm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread) {
  return fextl::make_unique<Arm64JITCore>(ctx, Thread);
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/JITClass.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#pragma once

#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/CPUBackend.h"
#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/JIT/Relocations.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IntrusiveIRList.h"
#include "Interface/IR/RegisterAllocationData.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/Utils/LongJump.h>

#include <CodeEmitter/Emitter.h>

#include <array>
#include <cstdint>
#include <functional>
#include <optional>
#include <utility>
#include <variant>

namespace FEXCore::Core {
struct InternalThreadState;
}
namespace FEXCore::Context {
struct ExitFunctionLinkData;
}
namespace FEXCore::IR {
class RegisterAllocationPass;
}

namespace FEXCore::CPU {
class Arm64JITCore final : public CPUBackend, public Arm64Emitter {
public:
  explicit Arm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread);
  ~Arm64JITCore() override;

  [[nodiscard]]
  CPUBackend::CompiledCode CompileCode(uint64_t Entry, uint64_t Size, bool SingleInst, const FEXCore::IR::IRListView* IR,
                                       FEXCore::Core::DebugData* DebugData, bool CheckTF) override;

  void ClearCache() override;

  void ClearRelocations() override {
    Relocations.clear();
  }

private:
  const bool HostSupportsSVE128 {};
  const bool HostSupportsSVE256 {};
  const bool HostSupportsAVX256 {};
  const bool HostSupportsRPRES {};
  const bool HostSupportsAFP {};

  struct RestartOptions {
    enum class Control : uint64_t {
      Incoming = 0,
      EnableFarARM64Jumps = 1,
      NeedsLargerJITSpace = 2,
    };
  };

  // FEXCore makes assumptions in the JIT about certain conditions being true.
  // In the rare case when those assumptions are broken, FEX needs to safely restart the JIT.
  RestartOptions RestartControl {};
  bool RequiresFarARM64Jumps {};
  // Default to 6 instructions per SSA node.
  uint32_t SSANodeMultiplier {24};

  ARMEmitter::BiDirectionalLabel* PendingTargetLabel {};
  ARMEmitter::BiDirectionalLabel* PendingCallReturnTargetLabel {};
  FEXCore::Context::ContextImpl* CTX {};
  const FEXCore::IR::IRListView* IR {};
  uint64_t Entry {};
  CPUBackend::CompiledCode CodeData {};

  fextl::vector<ARMEmitter::BiDirectionalLabel> JumpTargets;

  ARMEmitter::BiDirectionalLabel* JumpTarget(IR::OrderedNodeWrapper Node) {
    auto Block = IR->GetOp<IR::IROp_CodeBlock>(Node);
    return &JumpTargets[Block->ID];
  }

  fextl::map<IR::NodeID, ARMEmitter::BiDirectionalLabel> CallReturnTargets;

  struct PendingJumpThunk {
    uint64_t CallerAddress;
    uint64_t GuestRIP;
    ARMEmitter::ForwardLabel Label;
  };
  fextl::vector<PendingJumpThunk> PendingJumpThunks;

  Utils::PoolBufferWithTimedRetirement<uint8_t*, 5000, 500> TempAllocator;

  static uint64_t ExitFunctionLink(FEXCore::Core::CpuStateFrame* Frame, FEXCore::Context::ExitFunctionLinkData* Record);

  [[nodiscard]]
  ARMEmitter::Register GetReg(IR::PhysicalRegister Reg) const {
    const auto RegClass = Reg.AsRegClass();

    LOGMAN_THROW_A_FMT(RegClass == IR::RegClass::GPRFixed || RegClass == IR::RegClass::GPR, "Unexpected Class: {}", Reg.Class);

    if (RegClass == IR::RegClass::GPRFixed) {
      return StaticRegisters[Reg.Reg];
    } else if (RegClass == IR::RegClass::GPR) {
      return GeneralRegisters[Reg.Reg];
    }

    FEX_UNREACHABLE;
  }

  [[nodiscard]]
  ARMEmitter::Register GetReg(IR::Ref Node) const {
    return GetReg(IR::PhysicalRegister(Node));
  }

  [[nodiscard]]
  ARMEmitter::Register GetReg(IR::OrderedNodeWrapper Wrap) const {
    return GetReg(IR::PhysicalRegister(Wrap));
  }

  [[nodiscard]]
  ARMEmitter::VRegister GetVReg(IR::PhysicalRegister Reg) const {
    const auto RegClass = Reg.AsRegClass();

    LOGMAN_THROW_A_FMT(RegClass == IR::RegClass::FPRFixed || RegClass == IR::RegClass::FPR, "Unexpected Class: {}", Reg.Class);

    if (RegClass == IR::RegClass::FPRFixed) {
      return StaticFPRegisters[Reg.Reg];
    } else if (RegClass == IR::RegClass::FPR) {
      return GeneralFPRegisters[Reg.Reg];
    }

    FEX_UNREACHABLE;
  }

  [[nodiscard]]
  ARMEmitter::VRegister GetVReg(IR::Ref Node) const {
    return GetVReg(IR::PhysicalRegister(Node));
  }

  [[nodiscard]]
  ARMEmitter::VRegister GetVReg(IR::OrderedNodeWrapper Wrap) const {
    return GetVReg(IR::PhysicalRegister(Wrap));
  }

  [[nodiscard]]
  static IR::RegClass GetRegClass(IR::Ref Node) {
    return IR::PhysicalRegister(Node).AsRegClass();
  }

  [[nodiscard]]
  ARMEmitter::Register GetZeroableReg(IR::OrderedNodeWrapper Src) const {
    uint64_t Const;
    if (IsInlineConstant(Src, &Const)) {
      LOGMAN_THROW_A_FMT(Const == 0, "Only valid constant");
      return ARMEmitter::Reg::zr;
    } else {
      return GetReg(Src);
    }
  }

  // Converts IR-base shift type to ARMEmitter shift type.
  // Will be a no-op, only a type conversion since the two definitions match.
  [[nodiscard]]
  static ARMEmitter::ShiftType ConvertIRShiftType(IR::ShiftType Shift) {
    return Shift == IR::ShiftType::LSL ? ARMEmitter::ShiftType::LSL :
           Shift == IR::ShiftType::LSR ? ARMEmitter::ShiftType::LSR :
           Shift == IR::ShiftType::ASR ? ARMEmitter::ShiftType::ASR :
                                         ARMEmitter::ShiftType::ROR;
  }

  [[nodiscard]]
  static ARMEmitter::Size ConvertSize(const IR::IROp_Header* Op) {
    return Op->Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
  }

  [[nodiscard]]
  static ARMEmitter::Size ConvertSize48(const IR::IROp_Header* Op) {
    LOGMAN_THROW_A_FMT(Op->Size == IR::OpSize::i32Bit || Op->Size == IR::OpSize::i64Bit, "Invalid size");
    return ConvertSize(Op);
  }

  [[nodiscard]]
  static ARMEmitter::Size ConvertSize(IR::OpSize Size) {
    return Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit;
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize16(IR::OpSize ElementSize) {
    LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit ||
                         ElementSize == IR::OpSize::i64Bit || ElementSize == IR::OpSize::i128Bit,
                       "Invalid size");
    return ElementSize == IR::OpSize::i8Bit  ? ARMEmitter::SubRegSize::i8Bit :
           ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
           ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
           ElementSize == IR::OpSize::i64Bit ? ARMEmitter::SubRegSize::i64Bit :
                                               ARMEmitter::SubRegSize::i128Bit;
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize16(const IR::IROp_Header* Op) {
    return ConvertSubRegSize16(Op->ElementSize);
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize8(IR::OpSize ElementSize) {
    LOGMAN_THROW_A_FMT(ElementSize != IR::OpSize::i128Bit, "Invalid size");
    return ConvertSubRegSize16(ElementSize);
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize8(const IR::IROp_Header* Op) {
    return ConvertSubRegSize8(Op->ElementSize);
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize4(const IR::IROp_Header* Op) {
    LOGMAN_THROW_A_FMT(Op->ElementSize != IR::OpSize::i64Bit, "Invalid size");
    return ConvertSubRegSize8(Op);
  }

  [[nodiscard]]
  static ARMEmitter::SubRegSize ConvertSubRegSize248(const IR::IROp_Header* Op) {
    LOGMAN_THROW_A_FMT(Op->ElementSize != IR::OpSize::i8Bit, "Invalid size");
    return ConvertSubRegSize8(Op);
  }

  [[nodiscard]]
  static ARMEmitter::VectorRegSizePair ConvertSubRegSizePair16(const IR::IROp_Header* Op) {
    return ARMEmitter::ToVectorSizePair(ConvertSubRegSize16(Op));
  }

  [[nodiscard]]
  static ARMEmitter::VectorRegSizePair ConvertSubRegSizePair8(const IR::IROp_Header* Op) {
    LOGMAN_THROW_A_FMT(Op->ElementSize != IR::OpSize::i128Bit, "Invalid size");
    return ConvertSubRegSizePair16(Op);
  }

  [[nodiscard]]
  static ARMEmitter::VectorRegSizePair ConvertSubRegSizePair248(const IR::IROp_Header* Op) {
    LOGMAN_THROW_A_FMT(Op->ElementSize != IR::OpSize::i8Bit, "Invalid size");
    return ConvertSubRegSizePair8(Op);
  }

  [[nodiscard]]
  static ARMEmitter::Condition MapCC(IR::CondClass Cond) {
    switch (Cond) {
    case IR::CondClass::EQ: return ARMEmitter::Condition::CC_EQ;
    case IR::CondClass::NEQ: return ARMEmitter::Condition::CC_NE;
    case IR::CondClass::SGE: return ARMEmitter::Condition::CC_GE;
    case IR::CondClass::SLT: return ARMEmitter::Condition::CC_LT;
    case IR::CondClass::SGT: return ARMEmitter::Condition::CC_GT;
    case IR::CondClass::SLE: return ARMEmitter::Condition::CC_LE;
    case IR::CondClass::UGE: return ARMEmitter::Condition::CC_CS;
    case IR::CondClass::ULT: return ARMEmitter::Condition::CC_CC;
    case IR::CondClass::UGT: return ARMEmitter::Condition::CC_HI;
    case IR::CondClass::ULE: return ARMEmitter::Condition::CC_LS;
    case IR::CondClass::FLU: return ARMEmitter::Condition::CC_LT;
    case IR::CondClass::FGE: return ARMEmitter::Condition::CC_GE;
    case IR::CondClass::FLEU: return ARMEmitter::Condition::CC_LE;
    case IR::CondClass::FGT: return ARMEmitter::Condition::CC_GT;
    case IR::CondClass::FU:
    case IR::CondClass::VS: return ARMEmitter::Condition::CC_VS;
    case IR::CondClass::FNU:
    case IR::CondClass::VC: return ARMEmitter::Condition::CC_VC;
    case IR::CondClass::MI: return ARMEmitter::Condition::CC_MI;
    case IR::CondClass::PL: return ARMEmitter::Condition::CC_PL;
    default: LOGMAN_MSG_A_FMT("Unsupported compare type"); return ARMEmitter::Condition::CC_NV;
    }
  }

  [[nodiscard]]
  static bool IsFPR(IR::RegClass Class) {
    return Class == IR::RegClass::FPR || Class == IR::RegClass::FPRFixed;
  }

  [[nodiscard]]
  static bool IsGPR(IR::RegClass Class) {
    return Class == IR::RegClass::GPR || Class == IR::RegClass::GPRFixed;
  }

  [[nodiscard]]
  static bool IsGPR(IR::Ref Node) {
    return IsGPR(GetRegClass(Node));
  }

  [[nodiscard]]
  static bool IsFPR(IR::Ref Node) {
    return IsFPR(GetRegClass(Node));
  }

  [[nodiscard]]
  static bool IsGPR(IR::OrderedNodeWrapper Wrap) {
    return IsGPR(IR::PhysicalRegister(Wrap).AsRegClass());
  }

  [[nodiscard]]
  static bool IsFPR(IR::OrderedNodeWrapper Wrap) {
    return IsFPR(IR::PhysicalRegister(Wrap).AsRegClass());
  }

  [[nodiscard]]
  ARMEmitter::ExtendedMemOperand GenerateMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
                                                    IR::MemOffsetType OffsetType, uint8_t OffsetScale);

  [[nodiscard]]
  ARMEmitter::Register ApplyMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, ARMEmitter::Register Tmp,
                                       IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale);

  // NOTE: Will use TMP1 as a way to encode immediates that happen to fall outside
  //       the limits of the scalar plus immediate variant of SVE load/stores.
  //
  //       TMP1 is safe to use again once this memory operand is used with its
  //       equivalent loads or stores that this was called for.
  [[nodiscard]]
  ARMEmitter::SVEMemOperand GenerateSVEMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
                                                  IR::MemOffsetType OffsetType, uint8_t OffsetScale);

  [[nodiscard]]
  bool IsInlineConstant(const IR::OrderedNodeWrapper& Node, uint64_t* Value = nullptr) const;
  [[nodiscard]]
  bool IsInlineEntrypointOffset(const IR::OrderedNodeWrapper& WNode, uint64_t* Value) const;

  struct LiveRange {
    uint32_t Begin;
    uint32_t End;
  };

  void EmitLinkedBranch(uint64_t GuestRIP, bool Call) {
    PendingJumpThunks.push_back({GetCursorAddress<uint64_t>(), GuestRIP, {}});
    auto& Thunk = PendingJumpThunks.back();
    BindOrRestart(&Thunk.Label);
    if (Call) {
      bl_OrRestart(&Thunk.Label);
    } else {
      b_OrRestart(&Thunk.Label);
    }
  }

  // Restart helpers
  template<ARMEmitter::IsLabel T>
  void bl_OrRestart(T* Label) {
    if (bl(Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    // We can support this but currently unnecessary.
    ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void b_OrRestart(T* Label) {
    if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    // We can support this but currently unnecessary.
    ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void b_OrRestart(ARMEmitter::Condition Cond, T* Label) {
    if (RequiresFarARM64Jumps) {
      ARMEmitter::ForwardLabel Skip {};
      // Wrap a manual Cond check around an unconditional branch; this can encode larger offsets
      (void)b(InvertCondition(Cond), &Skip);
      if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
      }

      (void)Bind(&Skip);
      return;
    }

    if (b(Cond, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void cbz_OrRestart(ARMEmitter::Size s, ARMEmitter::Register rt, T* Label) {
    if (RequiresFarARM64Jumps) {
      ARMEmitter::ForwardLabel Skip {};
      // Wrap a manual Cond check around an unconditional branch; this can encode larger offsets
      (void)cbnz(s, rt, &Skip);
      if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
      }

      (void)Bind(&Skip);
      return;
    }

    if (cbz(s, rt, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void cbnz_OrRestart(ARMEmitter::Size s, ARMEmitter::Register rt, T* Label) {
    if (RequiresFarARM64Jumps) {
      ARMEmitter::ForwardLabel Skip {};
      // Wrap a manual Cond check around an unconditional branch; this can encode larger offsets
      (void)cbz(s, rt, &Skip);
      if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
      }

      (void)Bind(&Skip);
      return;
    }

    if (cbnz(s, rt, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void tbz_OrRestart(ARMEmitter::Register rt, uint32_t Bit, T* Label) {
    if (RequiresFarARM64Jumps) {
      ARMEmitter::ForwardLabel Skip {};
      // Wrap a manual Cond check around an unconditional branch; this can encode larger offsets
      (void)tbnz(rt, Bit, &Skip);
      if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
      }

      (void)Bind(&Skip);
      return;
    }

    if (tbz(rt, Bit, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void tbnz_OrRestart(ARMEmitter::Register rt, uint32_t Bit, T* Label) {
    if (RequiresFarARM64Jumps) {
      ARMEmitter::ForwardLabel Skip {};
      // Wrap a manual Cond check around an unconditional branch; this can encode larger offsets
      (void)tbz(rt, Bit, &Skip);
      if (b(Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Tried to branch larger than 128MB away!");
      }

      (void)Bind(&Skip);
      return;
    }

    if (tbnz(rt, Bit, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void adr_OrRestart(ARMEmitter::Register rd, T* Label) {
    if (RequiresFarARM64Jumps) {
      if (LongAddressGen(rd, Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Unable to encode long ADR.");
      }
      return;
    }
    if (adr(rd, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void adrp_OrRestart(ARMEmitter::Register rd, T* Label) {
    if (RequiresFarARM64Jumps) {
      if (LongAddressGen(rd, Label) == ARMEmitter::BranchEncodeSucceeded::Failure) {
        ERROR_AND_DIE_FMT("Unable to encode long ADRP.");
      }
      return;
    }
    if (adrp(rd, Label) == ARMEmitter::BranchEncodeSucceeded::Success) {
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  template<ARMEmitter::IsLabel T>
  void BindOrRestart(T* Label) {
    if (Bind(Label)) {
      return;
    }

    if (RequiresFarARM64Jumps) {
      // This should have been caught before this point.
      ERROR_AND_DIE_FMT("Unhandled long bind");
      return;
    }

    FEXCore::UncheckedLongJump::LongJump(ThreadState->RestartJump, FEXCore::ToUnderlying(RestartOptions::Control::EnableFarARM64Jumps));
  }

  // This is purely a debugging aid for developers to see if they are in JIT code space when inspecting raw memory
  void EmitDetectionString();
  IR::RegisterAllocationPass* RAPass {};
  FEXCore::Core::DebugData* DebugData {};

  void ResetStack();
  /**
   * @name Relocations
   * @{ */

  /**
   * @brief A literal pair relocation object for named symbol literals
   */
  struct NamedSymbolLiteralPair {
    ARMEmitter::ForwardLabel Loc;
    uint64_t Lit;
    Relocation MoveABI {};
  };

  /**
   * @brief Inserts a thunk relocation
   *
   * @param Reg - The GPR to move the thunk handler in to
   * @param Sum - The hash of the thunk
   */
  void InsertNamedThunkRelocation(ARMEmitter::Register Reg, const IR::SHA256Sum& Sum);

  /**
   * @brief Inserts a guest GPR move relocation
   *
   * @param Reg - The GPR to move the guest RIP in to
   * @param Constant - The guest RIP that will be relocated
   */
  void InsertGuestRIPMove(ARMEmitter::Register Reg, uint64_t Constant);

  /**
   * @brief Inserts a named symbol as a literal in memory
   *
   * Need to use `PlaceNamedSymbolLiteral` with the return value to place the literal in the desired location
   *
   * @param Op The named symbol to place
   *
   * @return A temporary `NamedSymbolLiteralPair`
   */
  NamedSymbolLiteralPair InsertNamedSymbolLiteral(FEXCore::CPU::RelocNamedSymbolLiteral::NamedSymbol Op);

  /**
   * @brief Inserts a relocation for a constant value relative to the guest entrypoint
   *
   * @param Reg - The GPR to move the guest RIP in to
   * @param Constant - The guest RIP that will be relocated
   */
  NamedSymbolLiteralPair InsertGuestRIPLiteral(uint64_t GuestRIP);

  /**
   * @brief Place the named symbol literal relocation in memory
   *
   * @param Lit - Which literal to place
   */
  void PlaceNamedSymbolLiteral(NamedSymbolLiteralPair Lit);

  fextl::vector<FEXCore::CPU::Relocation> Relocations;

  /**
   * Returns any relocations generated since the last call to TakeRelocations.
   *
   * GuestBaseAddress must match the base virtual address to which the
   * input x86 binary is mapped.
   */
  fextl::vector<FEXCore::CPU::Relocation> TakeRelocations(uint64_t GuestBaseAddress) override;

  /**  @} */

  uint32_t SpillSlots {};
  using OpType = void (Arm64JITCore::*)(const IR::IROp_Header* IROp, IR::Ref Node);

  using ScalarFMAOpCaller =
    std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2, ARMEmitter::VRegister Src3)>;
  void VFScalarFMAOperation(IR::OpSize OpSize, IR::OpSize ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
                            ARMEmitter::VRegister Upper, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2,
                            ARMEmitter::VRegister Addend);
  using ScalarBinaryOpCaller = std::function<void(ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2)>;
  void VFScalarOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
                         ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2);
  using ScalarUnaryOpCaller = std::function<void(ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar)>;
  void VFScalarUnaryOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit,
                              ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1,
                              std::variant<ARMEmitter::VRegister, ARMEmitter::Register> Vector2);

  void Emulate128BitGather(IR::OpSize Size, IR::OpSize ElementSize, ARMEmitter::VRegister Dst, ARMEmitter::VRegister IncomingDst,
                           std::optional<ARMEmitter::Register> BaseAddr, ARMEmitter::VRegister VectorIndexLow,
                           std::optional<ARMEmitter::VRegister> VectorIndexHigh, ARMEmitter::VRegister MaskReg, IR::OpSize VectorIndexSize,
                           size_t DataElementOffsetStart, size_t IndexElementOffsetStart, uint8_t OffsetScale, IR::OpSize AddrSize);

  void EmitTFCheck();

  void EmitSuspendInterruptCheck();

  void EmitEntryPoint(ARMEmitter::BackwardLabel& HeaderLabel, bool CheckTF);

#define DEF_OP(x) void Op_##x(IR::IROp_Header const* IROp, IR::Ref Node)

  ///< Unhandled handler
  DEF_OP(Unhandled);

  ///< No-op Handler
  DEF_OP(NoOp);

#define IROP_DISPATCH_DEFS
#include <FEXCore/IR/IRDefines_Dispatch.inc>
#undef DEF_OP
};

#define DEF_OP(x) void Arm64JITCore::Op_##x(IR::IROp_Header const* IROp, IR::Ref Node)

[[nodiscard]]
fextl::unique_ptr<CPUBackend> CreateArm64JITCore(FEXCore::Context::ContextImpl* ctx, FEXCore::Core::InternalThreadState* Thread);

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "FEXCore/Core/X86Enums.h"
#include "FEXCore/Utils/LogManager.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
#include "Interface/Core/CPUID.h"
#include "Interface/Core/JIT/JITClass.h"
#include "Interface/IR/RegisterAllocationData.h"
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/MathUtils.h>

namespace FEXCore::CPU {

DEF_OP(LoadContext) {
  const auto Op = IROp->C<IR::IROp_LoadContext>();
  const auto OpSize = IROp->Size;

  if (Op->Class == IR::RegClass::GPR) {
    auto Dst = GetReg(Node);

    switch (OpSize) {
    case IR::OpSize::i8Bit: ldrb(Dst, STATE, Op->Offset); break;
    case IR::OpSize::i16Bit: ldrh(Dst, STATE, Op->Offset); break;
    case IR::OpSize::i32Bit: ldr(Dst.W(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: ldr(Dst.X(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); break;
    }
  } else {
    auto Dst = GetVReg(Node);

    switch (OpSize) {
    case IR::OpSize::i8Bit: ldrb(Dst, STATE, Op->Offset); break;
    case IR::OpSize::i16Bit: ldrh(Dst, STATE, Op->Offset); break;
    case IR::OpSize::i32Bit: ldr(Dst.S(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: ldr(Dst.D(), STATE, Op->Offset); break;
    case IR::OpSize::i128Bit: ldr(Dst.Q(), STATE, Op->Offset); break;
    case IR::OpSize::i256Bit:
      mov(TMP1, Op->Offset);
      ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), STATE, TMP1);
      break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadContext size: {}", OpSize); break;
    }
  }
}

DEF_OP(LoadContextPair) {
  const auto Op = IROp->C<IR::IROp_LoadContextPair>();

  if (Op->Class == IR::RegClass::GPR) {
    const auto Dst1 = GetReg(Op->OutValue1);
    const auto Dst2 = GetReg(Op->OutValue2);

    switch (IROp->Size) {
    case IR::OpSize::i32Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.W(), Dst2.W(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.X(), Dst2.X(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
    }
  } else {
    const auto Dst1 = GetVReg(Op->OutValue1);
    const auto Dst2 = GetVReg(Op->OutValue2);

    switch (IROp->Size) {
    case IR::OpSize::i32Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.S(), Dst2.S(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.D(), Dst2.D(), STATE, Op->Offset); break;
    case IR::OpSize::i128Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.Q(), Dst2.Q(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
    }
  }
}

DEF_OP(StoreContext) {
  const auto Op = IROp->C<IR::IROp_StoreContext>();
  const auto OpSize = IROp->Size;

  if (Op->Class == IR::RegClass::GPR) {
    auto Src = GetZeroableReg(Op->Value);

    switch (OpSize) {
    case IR::OpSize::i8Bit: strb(Src, STATE, Op->Offset); break;
    case IR::OpSize::i16Bit: strh(Src, STATE, Op->Offset); break;
    case IR::OpSize::i32Bit: str(Src.W(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: str(Src.X(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break;
    }
  } else {
    const auto Src = GetVReg(Op->Value);

    switch (OpSize) {
    case IR::OpSize::i8Bit: strb(Src, STATE, Op->Offset); break;
    case IR::OpSize::i16Bit: strh(Src, STATE, Op->Offset); break;
    case IR::OpSize::i32Bit: str(Src.S(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: str(Src.D(), STATE, Op->Offset); break;
    case IR::OpSize::i128Bit: str(Src.Q(), STATE, Op->Offset); break;
    case IR::OpSize::i256Bit:
      mov(TMP1, Op->Offset);
      st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, STATE, TMP1);
      break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break;
    }
  }
}

DEF_OP(StoreContextPair) {
  const auto Op = IROp->C<IR::IROp_StoreContextPair>();
  const auto OpSize = IROp->Size;

  if (Op->Class == IR::RegClass::GPR) {
    auto Src1 = GetZeroableReg(Op->Value1);
    auto Src2 = GetZeroableReg(Op->Value2);

    switch (OpSize) {
    case IR::OpSize::i32Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.W(), Src2.W(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.X(), Src2.X(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContext size: {}", OpSize); break;
    }
  } else {
    const auto Src1 = GetVReg(Op->Value1);
    const auto Src2 = GetVReg(Op->Value2);

    switch (OpSize) {
    case IR::OpSize::i32Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.S(), Src2.S(), STATE, Op->Offset); break;
    case IR::OpSize::i64Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.D(), Src2.D(), STATE, Op->Offset); break;
    case IR::OpSize::i128Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.Q(), Src2.Q(), STATE, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContextPair size: {}", OpSize); break;
    }
  }
}

DEF_OP(LoadRegister) {
  const auto Op = IROp->C<IR::IROp_LoadRegister>();

  if (Op->Class == IR::RegClass::GPR) {
    LOGMAN_THROW_A_FMT(Op->Reg < StaticRegisters.size(), "out of range reg");

    mov(GetReg(Node).X(), StaticRegisters[Op->Reg].X());
  } else if (Op->Class == IR::RegClass::FPR) {
    const auto regSize = HostSupportsAVX256 ? IR::OpSize::i256Bit : IR::OpSize::i128Bit;
    LOGMAN_THROW_A_FMT(Op->Reg < StaticFPRegisters.size(), "out of range reg");
    LOGMAN_THROW_A_FMT(IROp->Size == regSize, "expected sized");

    const auto guest = StaticFPRegisters[Op->Reg];
    const auto host = GetVReg(Node);

    if (HostSupportsAVX256) {
      mov(ARMEmitter::SubRegSize::i64Bit, host.Z(), PRED_TMP_32B.Merging(), guest.Z());
    } else {
      mov(host.Q(), guest.Q());
    }
  } else {
    LOGMAN_THROW_A_FMT(false, "Unhandled Op->Class {}", Op->Class);
  }
}

DEF_OP(LoadPF) {
  const auto reg = StaticRegisters[StaticRegisters.size() - 2];

  if (GetReg(Node).Idx() != reg.Idx()) {
    mov(GetReg(Node).X(), reg.X());
  }
}

DEF_OP(LoadAF) {
  const auto reg = StaticRegisters[StaticRegisters.size() - 1];

  if (GetReg(Node).Idx() != reg.Idx()) {
    mov(GetReg(Node).X(), reg.X());
  }
}

DEF_OP(StoreRegister) {
  const auto Op = IROp->C<IR::IROp_StoreRegister>();
  const auto Reg = IR::PhysicalRegister(Node);
  const auto RegClass = Reg.AsRegClass();

  if (RegClass == IR::RegClass::GPRFixed) {
    // Always use 64-bit, it's faster. Upper bits ignored for 32-bit mode.
    mov(ARMEmitter::Size::i64Bit, GetReg(Reg), GetReg(Op->Value));
  } else if (RegClass == IR::RegClass::FPRFixed) {
    const auto regSize = HostSupportsAVX256 ? IR::OpSize::i256Bit : IR::OpSize::i128Bit;
    LOGMAN_THROW_A_FMT(IROp->Size == regSize, "expected sized");

    const auto guest = GetVReg(Reg);
    const auto host = GetVReg(Op->Value);

    if (HostSupportsAVX256) {
      mov(ARMEmitter::SubRegSize::i64Bit, guest.Z(), PRED_TMP_32B.Merging(), host.Z());
    } else {
      mov(guest.Q(), host.Q());
    }
  } else {
    LOGMAN_THROW_A_FMT(false, "Unhandled Op->Class {}", RegClass);
  }
}

DEF_OP(StorePF) {
  const auto Op = IROp->C<IR::IROp_StorePF>();
  const auto reg = StaticRegisters[StaticRegisters.size() - 2];
  const auto Src = GetReg(Op->Value);

  if (Src.Idx() != reg.Idx()) {
    // Always use 64-bit, it's faster. Upper bits ignored for 32-bit mode.
    mov(ARMEmitter::Size::i64Bit, reg, Src);
  }
}

DEF_OP(StoreAF) {
  const auto Op = IROp->C<IR::IROp_StoreAF>();
  const auto reg = StaticRegisters[StaticRegisters.size() - 1];
  const auto Src = GetReg(Op->Value);

  if (Src.Idx() != reg.Idx()) {
    // Always use 64-bit, it's faster. Upper bits ignored for 32-bit mode.
    mov(ARMEmitter::Size::i64Bit, reg, Src);
  }
}

DEF_OP(LoadContextIndexed) {
  const auto Op = IROp->C<IR::IROp_LoadContextIndexed>();
  const auto OpSize = IROp->Size;

  const auto Index = GetReg(Op->Index);

  if (Op->Class == IR::RegClass::GPR) {
    switch (Op->Stride) {
    case 1:
    case 2:
    case 4:
    case 8: {
      add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
      const auto Dst = GetReg(Node);
      switch (OpSize) {
      case IR::OpSize::i8Bit: ldrb(Dst, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i16Bit: ldrh(Dst, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i32Bit: ldr(Dst.W(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i64Bit: ldr(Dst.X(), TMP1, Op->BaseOffset); break;
      default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); break;
      }
      break;
    }
    case 16: LOGMAN_MSG_A_FMT("Invalid Class load of size 16"); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); break;
    }
  } else {
    switch (Op->Stride) {
    case 1:
    case 2:
    case 4:
    case 8:
    case 16:
    case 32: {
      add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
      const auto Dst = GetVReg(Node);

      switch (OpSize) {
      case IR::OpSize::i8Bit: ldrb(Dst, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i16Bit: ldrh(Dst, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i32Bit: ldr(Dst.S(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i64Bit: ldr(Dst.D(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i128Bit:
        if (Op->BaseOffset % 16 == 0) {
          ldr(Dst.Q(), TMP1, Op->BaseOffset);
        } else {
          add(ARMEmitter::Size::i64Bit, TMP1, TMP1, Op->BaseOffset);
          ldur(Dst.Q(), TMP1, Op->BaseOffset);
        }
        break;
      case IR::OpSize::i256Bit:
        mov(TMP2, Op->BaseOffset);
        ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, TMP2);
        break;
      default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed size: {}", OpSize); break;
      }
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled LoadContextIndexed stride: {}", Op->Stride); break;
    }
  }
}

DEF_OP(StoreContextIndexed) {
  const auto Op = IROp->C<IR::IROp_StoreContextIndexed>();
  const auto OpSize = IROp->Size;

  const auto Index = GetReg(Op->Index);

  if (Op->Class == IR::RegClass::GPR) {
    const auto Value = GetReg(Op->Value);

    switch (Op->Stride) {
    case 1:
    case 2:
    case 4:
    case 8: {
      add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));

      switch (OpSize) {
      case IR::OpSize::i8Bit: strb(Value, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i16Bit: strh(Value, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i32Bit: str(Value.W(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i64Bit: str(Value.X(), TMP1, Op->BaseOffset); break;
      default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); break;
      }
      break;
    }
    case 16: LOGMAN_MSG_A_FMT("Invalid Class store of size 16"); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); break;
    }
  } else {
    const auto Value = GetVReg(Op->Value);

    switch (Op->Stride) {
    case 1:
    case 2:
    case 4:
    case 8:
    case 16:
    case 32: {
      add(ARMEmitter::Size::i64Bit, TMP1, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));

      switch (OpSize) {
      case IR::OpSize::i8Bit: strb(Value, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i16Bit: strh(Value, TMP1, Op->BaseOffset); break;
      case IR::OpSize::i32Bit: str(Value.S(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i64Bit: str(Value.D(), TMP1, Op->BaseOffset); break;
      case IR::OpSize::i128Bit:
        if (Op->BaseOffset % 16 == 0) {
          str(Value.Q(), TMP1, Op->BaseOffset);
        } else {
          add(ARMEmitter::Size::i64Bit, TMP1, TMP1, Op->BaseOffset);
          stur(Value.Q(), TMP1, Op->BaseOffset);
        }
        break;
      case IR::OpSize::i256Bit:
        mov(TMP2, Op->BaseOffset);
        st1b<ARMEmitter::SubRegSize::i8Bit>(Value.Z(), PRED_TMP_32B, TMP1, TMP2);
        break;
      default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed size: {}", OpSize); break;
      }
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled StoreContextIndexed stride: {}", Op->Stride); break;
    }
  }
}

DEF_OP(FormContextAddress) {
  const auto Op = IROp->C<IR::IROp_FormContextAddress>();
  const auto Index = GetReg(Op->Index);
  const auto Dst = GetReg(Node);

  switch (Op->Stride) {
  case 1:
  case 2:
  case 4:
  case 8:
  case 16:
  case 32: {
    add(ARMEmitter::Size::i64Bit, Dst, STATE, Index, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Op->Stride));
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unhandled FormContextAddress stride: {}", Op->Stride); break;
  }
}

DEF_OP(SpillRegister) {
  const auto Op = IROp->C<IR::IROp_SpillRegister>();
  const auto OpSize = IROp->Size;
  const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

  if (Op->Class == IR::RegClass::GPR) {
    const auto Src = GetReg(Op->Value);
    switch (OpSize) {
    case IR::OpSize::i8Bit: {
      if (SlotOffset > LSByteMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        strb(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        strb(Src, ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i16Bit: {
      if (SlotOffset > LSHalfMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        strh(Src, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        strh(Src, ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i32Bit: {
      if (SlotOffset > LSWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        str(Src.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        str(Src.W(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i64Bit: {
      if (SlotOffset > LSDWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        str(Src.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        str(Src.X(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); break;
    }
  } else if (Op->Class == FEXCore::IR::RegClass::FPR) {
    const auto Src = GetVReg(Op->Value);

    switch (OpSize) {
    case IR::OpSize::i32Bit: {
      if (SlotOffset > LSWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        str(Src.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        str(Src.S(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i64Bit: {
      if (SlotOffset > LSDWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        str(Src.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        str(Src.D(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i128Bit: {
      if (SlotOffset > LSQWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        str(Src.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        str(Src.Q(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i256Bit: {
      mov(TMP3, SlotOffset);
      st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, ARMEmitter::Reg::rsp, TMP3);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled SpillRegister size: {}", OpSize); break;
    }
  } else {
    LOGMAN_MSG_A_FMT("Unhandled SpillRegister class: {}", Op->Class);
  }
}

DEF_OP(FillRegister) {
  const auto Op = IROp->C<IR::IROp_FillRegister>();
  const auto OpSize = IROp->Size;
  const uint32_t SlotOffset = Op->Slot * MaxSpillSlotSize;

  if (Op->Class == IR::RegClass::GPR) {
    const auto Dst = GetReg(Node);
    switch (OpSize) {
    case IR::OpSize::i8Bit: {
      if (SlotOffset > LSByteMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldrb(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldrb(Dst, ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i16Bit: {
      if (SlotOffset > LSHalfMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldrh(Dst, ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldrh(Dst, ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i32Bit: {
      if (SlotOffset > LSWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldr(Dst.W(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldr(Dst.W(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i64Bit: {
      if (SlotOffset > LSDWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldr(Dst.X(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldr(Dst.X(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); break;
    }
  } else if (Op->Class == FEXCore::IR::RegClass::FPR) {
    const auto Dst = GetVReg(Node);

    switch (OpSize) {
    case IR::OpSize::i32Bit: {
      if (SlotOffset > LSWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldr(Dst.S(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldr(Dst.S(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i64Bit: {
      if (SlotOffset > LSDWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldr(Dst.D(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldr(Dst.D(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i128Bit: {
      if (SlotOffset > LSQWordMaxUnsignedOffset) {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, SlotOffset);
        ldr(Dst.Q(), ARMEmitter::Reg::rsp, TMP1.R(), ARMEmitter::ExtendedType::LSL_64, 0);
      } else {
        ldr(Dst.Q(), ARMEmitter::Reg::rsp, SlotOffset);
      }
      break;
    }
    case IR::OpSize::i256Bit: {
      mov(TMP3, SlotOffset);
      ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), ARMEmitter::Reg::rsp, TMP3);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled FillRegister size: {}", OpSize); break;
    }
  } else {
    LOGMAN_MSG_A_FMT("Unhandled FillRegister class: {}", Op->Class);
  }
}

DEF_OP(LoadNZCV) {
  auto Dst = GetReg(Node);

  mrs(Dst, ARMEmitter::SystemRegister::NZCV);
}

DEF_OP(StoreNZCV) {
  auto Op = IROp->C<IR::IROp_StoreNZCV>();

  msr(ARMEmitter::SystemRegister::NZCV, GetReg(Op->Value));
}

DEF_OP(LoadDF) {
  auto Dst = GetReg(Node);
  auto Flag = X86State::RFLAG_DF_RAW_LOC;

  // DF needs sign extension to turn 0x1/0xFF into 1/-1
  ldrsb(Dst.X(), STATE, ARRAY_OFFSETOF(FEXCore::Core::CPUState, flags, Flag));
}

DEF_OP(ContextClear) {
  auto Op = IROp->C<IR::IROp_ContextClear>();
  if (CTX->HostFeatures.SupportsCLZERO) {
    // We can use CLZero directly when hardware supports it.
    // Provides a fairly generous speed-up on Ampere1A hardware.
    // TODO: When FEAT_MOPS hardware ships, test memset using MOPS.
    for (size_t i = 0; i < Op->Size; i += 64) {
      add(ARMEmitter::Size::i64Bit, TMP1, STATE.R(), Op->Offset + i);
      dc(ARMEmitter::DataCacheOperation::ZVA, TMP1);
    }
  } else {
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    for (size_t i = 0; i < Op->Size; i += 32) {
      stp<ARMEmitter::IndexType::OFFSET>(VTMP1.Q(), VTMP1.Q(), STATE.R(), Op->Offset + i);
    }
  }
}

ARMEmitter::ExtendedMemOperand Arm64JITCore::GenerateMemOperand(
  IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
  if (Offset.IsInvalid()) {
    return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, 0);
  } else {
    if (OffsetScale != 1 && OffsetScale != IR::OpSizeToSize(AccessSize)) {
      LOGMAN_MSG_A_FMT("Unhandled GenerateMemOperand OffsetScale: {}", OffsetScale);
    }
    uint64_t Const;
    if (IsInlineConstant(Offset, &Const)) {
      return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, Const);
    } else {
      auto RegOffset = GetReg(Offset);
      switch (OffsetType) {
      case IR::MemOffsetType::SXTX:
        return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTX, FEXCore::ilog2(OffsetScale));
      case IR::MemOffsetType::UXTW:
        return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::UXTW, FEXCore::ilog2(OffsetScale));
      case IR::MemOffsetType::SXTW:
        return ARMEmitter::ExtendedMemOperand(Base.X(), RegOffset.X(), ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale));
      default: LOGMAN_MSG_A_FMT("Unhandled GenerateMemOperand OffsetType: {}", OffsetType); break;
      }
    }
  }

  FEX_UNREACHABLE;
}

ARMEmitter::Register Arm64JITCore::ApplyMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, ARMEmitter::Register Tmp,
                                                   IR::OrderedNodeWrapper Offset, IR::MemOffsetType OffsetType, uint8_t OffsetScale) {
  if (Offset.IsInvalid()) {
    return Base;
  }

  if (OffsetScale != 1 && OffsetScale != IR::OpSizeToSize(AccessSize)) {
    LOGMAN_MSG_A_FMT("Unhandled OffsetScale: {}", OffsetScale);
  }

  uint64_t Const;
  if (IsInlineConstant(Offset, &Const)) {
    if (Const == 0) {
      return Base;
    }
    LoadConstant(ARMEmitter::Size::i64Bit, Tmp, Const);
    add(ARMEmitter::Size::i64Bit, Tmp, Base, Tmp, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(OffsetScale));
  } else {
    auto RegOffset = GetReg(Offset);
    switch (OffsetType) {
    case IR::MemOffsetType::SXTX:
      add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::SXTX, FEXCore::ilog2(OffsetScale));
      break;

    case IR::MemOffsetType::UXTW:
      add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::UXTW, FEXCore::ilog2(OffsetScale));
      break;

    case IR::MemOffsetType::SXTW:
      add(ARMEmitter::Size::i64Bit, Tmp, Base, RegOffset, ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale));
      break;

    default: LOGMAN_MSG_A_FMT("Unhandled OffsetType: {}", OffsetType); break;
    }
  }
  return Tmp;
}

ARMEmitter::SVEMemOperand Arm64JITCore::GenerateSVEMemOperand(IR::OpSize AccessSize, ARMEmitter::Register Base, IR::OrderedNodeWrapper Offset,
                                                              IR::MemOffsetType OffsetType, [[maybe_unused]] uint8_t OffsetScale) {
  if (Offset.IsInvalid()) {
    return ARMEmitter::SVEMemOperand(Base.X(), 0);
  }

  uint64_t Const {};
  if (IsInlineConstant(Offset, &Const)) {
    if (Const == 0) {
      return ARMEmitter::SVEMemOperand(Base.X(), 0);
    }

    const auto SignedConst = static_cast<int64_t>(Const);
    const auto SignedSVESize = static_cast<int64_t>(HostSupportsSVE256 ? Core::CPUState::XMM_AVX_REG_SIZE : Core::CPUState::XMM_SSE_REG_SIZE);

    const auto IsCleanlyDivisible = (SignedConst % SignedSVESize) == 0;
    const auto Index = SignedConst / SignedSVESize;

    // SVE's immediate variants of load stores are quite limited in terms
    // of immediate range. They also operate on a by-vector-length basis.
    //
    // e.g. On a 256-bit SVE capable system:
    //
    //      LD1B Dst.B, Predicate/Z, [Reg, #1, MUL VL]
    //
    //      Will add 32 to the base register as the offset
    //
    // So if we have a constant that cleanly lies along a 256-bit offset
    // and is also within the limitations of the immediate of -8 to 7
    // then we can encode it as an immediate offset.
    //
    if (IsCleanlyDivisible && Index >= -8 && Index <= 7) {
      return ARMEmitter::SVEMemOperand(Base.X(), static_cast<uint64_t>(Index));
    }

    // If we can't do that for whatever reason, then unfortunately, we need
    // to move it over to a temporary to use as an offset.
    mov(TMP1, Const);
    return ARMEmitter::SVEMemOperand(Base.X(), TMP1);
  }

  // Otherwise handle it like normal.
  // Note that we do nothing with the offset type and offset scale,
  // since SVE loads and stores don't have the ability to perform an
  // optional extension or shift as part of their behavior.
  LOGMAN_THROW_A_FMT(OffsetType == IR::MemOffsetType::SXTX, "Currently only the default offset type (SXTX) is supported.");

  const auto RegOffset = GetReg(Offset);
  return ARMEmitter::SVEMemOperand(Base.X(), RegOffset.X());
}

DEF_OP(LoadMem) {
  const auto Op = IROp->C<IR::IROp_LoadMem>();
  const auto OpSize = IROp->Size;

  const auto MemReg = GetReg(Op->Addr);
  const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

  if (Op->Class == IR::RegClass::GPR) {
    const auto Dst = GetReg(Node);

    switch (OpSize) {
    case IR::OpSize::i8Bit: ldrb(Dst, MemSrc); break;
    case IR::OpSize::i16Bit: ldrh(Dst, MemSrc); break;
    case IR::OpSize::i32Bit: ldr(Dst.W(), MemSrc); break;
    case IR::OpSize::i64Bit: ldr(Dst.X(), MemSrc); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); break;
    }
  } else {
    const auto Dst = GetVReg(Node);

    switch (OpSize) {
    case IR::OpSize::i8Bit: ldrb(Dst, MemSrc); break;
    case IR::OpSize::i16Bit: ldrh(Dst, MemSrc); break;
    case IR::OpSize::i32Bit: ldr(Dst.S(), MemSrc); break;
    case IR::OpSize::i64Bit: ldr(Dst.D(), MemSrc); break;
    case IR::OpSize::i128Bit: ldr(Dst.Q(), MemSrc); break;
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
      const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
      ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), Operand);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMem size: {}", OpSize); break;
    }
  }
}

DEF_OP(LoadMemPair) {
  const auto Op = IROp->C<IR::IROp_LoadMemPair>();
  const auto Addr = GetReg(Op->Addr);

  if (Op->Class == IR::RegClass::GPR) {
    const auto Dst1 = GetReg(Op->OutValue1);
    const auto Dst2 = GetReg(Op->OutValue2);

    switch (IROp->Size) {
    case IR::OpSize::i32Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.W(), Dst2.W(), Addr, Op->Offset); break;
    case IR::OpSize::i64Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.X(), Dst2.X(), Addr, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
    }
  } else {
    const auto Dst1 = GetVReg(Op->OutValue1);
    const auto Dst2 = GetVReg(Op->OutValue2);

    switch (IROp->Size) {
    case IR::OpSize::i32Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.S(), Dst2.S(), Addr, Op->Offset); break;
    case IR::OpSize::i64Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.D(), Dst2.D(), Addr, Op->Offset); break;
    case IR::OpSize::i128Bit: ldp<ARMEmitter::IndexType::OFFSET>(Dst1.Q(), Dst2.Q(), Addr, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMemPair size: {}", IROp->Size); break;
    }
  }
}

DEF_OP(LoadMemTSO) {
  const auto Op = IROp->C<IR::IROp_LoadMemTSO>();
  const auto OpSize = IROp->Size;

  const auto MemReg = GetReg(Op->Addr);

  if (Op->Class == IR::RegClass::GPR) {
    LOGMAN_THROW_A_FMT(Op->Offset.IsInvalid() || CTX->HostFeatures.SupportsTSOImm9, "unexpected offset");
    LOGMAN_THROW_A_FMT(Op->OffsetScale == 1, "unexpected offset scale");
    LOGMAN_THROW_A_FMT(Op->OffsetType == IR::MemOffsetType::SXTX, "unexpected offset type");
  }

  if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == IR::RegClass::GPR) {
    const auto Dst = GetReg(Node);
    uint64_t Offset = 0;
    if (!Op->Offset.IsInvalid()) {
      bool IsInline = IsInlineConstant(Op->Offset, &Offset);
      LOGMAN_THROW_A_FMT(IsInline, "expected immediate");
    }

    if (OpSize == IR::OpSize::i8Bit) {
      // 8bit load is always aligned to natural alignment
      const auto Dst = GetReg(Node);
      ldapurb(Dst, MemReg, Offset);
    } else {
      switch (OpSize) {
      case IR::OpSize::i16Bit: ldapurh(Dst, MemReg, Offset); break;
      case IR::OpSize::i32Bit: ldapur(Dst.W(), MemReg, Offset); break;
      case IR::OpSize::i64Bit: ldapur(Dst.X(), MemReg, Offset); break;
      default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break;
      }
      // Half-barrier once back-patched.
      nop();
    }
  } else if (CTX->HostFeatures.SupportsRCPC && Op->Class == IR::RegClass::GPR) {
    const auto Dst = GetReg(Node);
    if (OpSize == IR::OpSize::i8Bit) {
      // 8bit load is always aligned to natural alignment
      ldaprb(Dst.W(), MemReg);
    } else {
      switch (OpSize) {
      case IR::OpSize::i16Bit: ldaprh(Dst.W(), MemReg); break;
      case IR::OpSize::i32Bit: ldapr(Dst.W(), MemReg); break;
      case IR::OpSize::i64Bit: ldapr(Dst.X(), MemReg); break;
      default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break;
      }
      // Half-barrier once back-patched.
      nop();
    }
  } else if (Op->Class == IR::RegClass::GPR) {
    const auto Dst = GetReg(Node);
    if (OpSize == IR::OpSize::i8Bit) {
      // 8bit load is always aligned to natural alignment
      ldarb(Dst, MemReg);
    } else {
      switch (OpSize) {
      case IR::OpSize::i16Bit: ldarh(Dst, MemReg); break;
      case IR::OpSize::i32Bit: ldar(Dst.W(), MemReg); break;
      case IR::OpSize::i64Bit: ldar(Dst.X(), MemReg); break;
      default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break;
      }
      // Half-barrier once back-patched.
      nop();
    }
  } else {
    const auto Dst = GetVReg(Node);
    const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
    switch (OpSize) {
    case IR::OpSize::i8Bit: ldrb(Dst, MemSrc); break;
    case IR::OpSize::i16Bit: ldrh(Dst, MemSrc); break;
    case IR::OpSize::i32Bit: ldr(Dst.S(), MemSrc); break;
    case IR::OpSize::i64Bit: ldr(Dst.D(), MemSrc); break;
    case IR::OpSize::i128Bit: ldr(Dst.Q(), MemSrc); break;
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
      const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
      ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), MemSrc);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled LoadMemTSO size: {}", OpSize); break;
    }
    if (CTX->IsVectorAtomicTSOEnabled()) {
      // Half-barrier.
      dmb(ARMEmitter::BarrierScope::ISHLD);
    }
  }
}

DEF_OP(VLoadVectorMasked) {

  const auto Op = IROp->C<IR::IROp_VLoadVectorMasked>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto CMPPredicate = ARMEmitter::PReg::p0;
  const auto GoverningPredicate = Is256Bit ? PRED_TMP_32B : PRED_TMP_16B;

  const auto Dst = GetVReg(Node);
  const auto MaskReg = GetVReg(Op->Mask);
  const auto MemReg = GetReg(Op->Addr);

  if (HostSupportsSVE128 || HostSupportsSVE256) {
    const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

    // Check if the sign bit is set for the given element size.
    cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);

    switch (IROp->ElementSize) {
    case IR::OpSize::i8Bit: {
      ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), CMPPredicate.Zeroing(), MemSrc);
      break;
    }
    case IR::OpSize::i16Bit: {
      ld1h<ARMEmitter::SubRegSize::i16Bit>(Dst.Z(), CMPPredicate.Zeroing(), MemSrc);
      break;
    }
    case IR::OpSize::i32Bit: {
      ld1w<ARMEmitter::SubRegSize::i32Bit>(Dst.Z(), CMPPredicate.Zeroing(), MemSrc);
      break;
    }
    case IR::OpSize::i64Bit: {
      ld1d(Dst.Z(), CMPPredicate.Zeroing(), MemSrc);
      break;
    }
    default: break;
    }
  } else {
    const auto PerformMove = [this](IR::OpSize ElementSize, const ARMEmitter::Register Dst, const ARMEmitter::VRegister Vector, int index) {
      switch (ElementSize) {
      case IR::OpSize::i8Bit: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
      case IR::OpSize::i16Bit: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
      case IR::OpSize::i32Bit: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
      case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
      default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", ElementSize); break;
      }
    };

    // Prepare yourself adventurer. For a masked load without instructions that implement it.
    LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Only supports 128-bit without SVE256");
    size_t NumElements = IR::NumElements(IROp->Size, IROp->ElementSize);

    // Use VTMP1 as the temporary destination
    auto TempDst = VTMP1;
    auto WorkingReg = TMP1;
    auto TempMemReg = MemReg;
    movi(ARMEmitter::SubRegSize::i64Bit, TempDst.Q(), 0);
    uint64_t Const {};
    if (Op->Offset.IsInvalid()) {
      // Intentional no-op.
    } else if (IsInlineConstant(Op->Offset, &Const)) {
      TempMemReg = TMP2;
      add(ARMEmitter::Size::i64Bit, TMP2, MemReg, Const);
    } else {
      LOGMAN_MSG_A_FMT("Complex addressing requested and not supported!");
    }

    const uint64_t ElementSizeInBits = IR::OpSizeAsBits(IROp->ElementSize);
    for (size_t i = 0; i < NumElements; ++i) {
      // Extract the mask element.
      PerformMove(IROp->ElementSize, WorkingReg, MaskReg, i);

      // If the sign bit is zero then skip the load
      ARMEmitter::ForwardLabel Skip {};
      (void)tbz(WorkingReg, ElementSizeInBits - 1, &Skip);
      // Do the gather load for this element into the destination
      switch (IROp->ElementSize) {
      case IR::OpSize::i8Bit: ld1<ARMEmitter::SubRegSize::i8Bit>(TempDst.Q(), i, TempMemReg); break;
      case IR::OpSize::i16Bit: ld1<ARMEmitter::SubRegSize::i16Bit>(TempDst.Q(), i, TempMemReg); break;
      case IR::OpSize::i32Bit: ld1<ARMEmitter::SubRegSize::i32Bit>(TempDst.Q(), i, TempMemReg); break;
      case IR::OpSize::i64Bit: ld1<ARMEmitter::SubRegSize::i64Bit>(TempDst.Q(), i, TempMemReg); break;
      case IR::OpSize::i128Bit: ldr(TempDst.Q(), TempMemReg, 0); break;
      default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, IROp->ElementSize); return;
      }

      (void)Bind(&Skip);

      if ((i + 1) != NumElements) {
        // Handle register rename to save a move.
        auto WorkingReg = TempMemReg;
        TempMemReg = TMP2;
        add(ARMEmitter::Size::i64Bit, TempMemReg, WorkingReg, IR::OpSizeToSize(IROp->ElementSize));
      }
    }

    // Move result.
    mov(Dst.Q(), TempDst.Q());
  }
}

DEF_OP(VStoreVectorMasked) {
  const auto Op = IROp->C<IR::IROp_VStoreVectorMasked>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto CMPPredicate = ARMEmitter::PReg::p0;
  const auto GoverningPredicate = Is256Bit ? PRED_TMP_32B : PRED_TMP_16B;

  const auto RegData = GetVReg(Op->Data);
  const auto MaskReg = GetVReg(Op->Mask);
  const auto MemReg = GetReg(Op->Addr);
  if (HostSupportsSVE128 || HostSupportsSVE256) {
    const auto MemDst = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

    // Check if the sign bit is set for the given element size.
    cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);

    switch (IROp->ElementSize) {
    case IR::OpSize::i8Bit: {
      st1b<ARMEmitter::SubRegSize::i8Bit>(RegData.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i16Bit: {
      st1h<ARMEmitter::SubRegSize::i16Bit>(RegData.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i32Bit: {
      st1w<ARMEmitter::SubRegSize::i32Bit>(RegData.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i64Bit: {
      st1d(RegData.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    default: break;
    }
  } else {
    const auto PerformMove = [this](IR::OpSize ElementSize, const ARMEmitter::Register Dst, const ARMEmitter::VRegister Vector, int index) {
      switch (ElementSize) {
      case IR::OpSize::i8Bit: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
      case IR::OpSize::i16Bit: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
      case IR::OpSize::i32Bit: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
      case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
      default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", ElementSize); break;
      }
    };

    // Prepare yourself adventurer. For a masked store without instructions that implement it.
    LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "Only supports 128-bit without SVE256");
    size_t NumElements = IR::NumElements(IROp->Size, IROp->ElementSize);

    // Use VTMP1 as the temporary destination
    auto WorkingReg = TMP1;
    auto TempMemReg = MemReg;

    uint64_t Const {};
    if (Op->Offset.IsInvalid()) {
      // Intentional no-op.
    } else if (IsInlineConstant(Op->Offset, &Const)) {
      TempMemReg = TMP2;
      add(ARMEmitter::Size::i64Bit, TMP2, MemReg, Const);
    } else {
      LOGMAN_MSG_A_FMT("Complex addressing requested and not supported!");
    }

    const uint64_t ElementSizeInBits = IR::OpSizeAsBits(IROp->ElementSize);
    for (size_t i = 0; i < NumElements; ++i) {
      // Extract the mask element.
      PerformMove(IROp->ElementSize, WorkingReg, MaskReg, i);

      // If the sign bit is zero then skip the load
      ARMEmitter::ForwardLabel Skip {};
      (void)tbz(WorkingReg, ElementSizeInBits - 1, &Skip);
      // Do the gather load for this element into the destination
      switch (IROp->ElementSize) {
      case IR::OpSize::i8Bit: st1<ARMEmitter::SubRegSize::i8Bit>(RegData.Q(), i, TempMemReg); break;
      case IR::OpSize::i16Bit: st1<ARMEmitter::SubRegSize::i16Bit>(RegData.Q(), i, TempMemReg); break;
      case IR::OpSize::i32Bit: st1<ARMEmitter::SubRegSize::i32Bit>(RegData.Q(), i, TempMemReg); break;
      case IR::OpSize::i64Bit: st1<ARMEmitter::SubRegSize::i64Bit>(RegData.Q(), i, TempMemReg); break;
      case IR::OpSize::i128Bit: str(RegData.Q(), TempMemReg, 0); break;
      default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, IROp->ElementSize); return;
      }

      (void)Bind(&Skip);

      if ((i + 1) != NumElements) {
        // Handle register rename to save a move.
        auto WorkingReg = TempMemReg;
        TempMemReg = TMP2;
        add(ARMEmitter::Size::i64Bit, TempMemReg, WorkingReg, IR::OpSizeToSize(IROp->ElementSize));
      }
    }
  }
}

void Arm64JITCore::Emulate128BitGather(IR::OpSize Size, IR::OpSize ElementSize, ARMEmitter::VRegister Dst,
                                       ARMEmitter::VRegister IncomingDst, std::optional<ARMEmitter::Register> BaseAddr,
                                       ARMEmitter::VRegister VectorIndexLow, std::optional<ARMEmitter::VRegister> VectorIndexHigh,
                                       ARMEmitter::VRegister MaskReg, IR::OpSize VectorIndexSize, size_t DataElementOffsetStart,
                                       size_t IndexElementOffsetStart, uint8_t OffsetScale, IR::OpSize AddrSize) {
  LOGMAN_THROW_A_FMT(ElementSize >= IR::OpSize::i8Bit && ElementSize <= IR::OpSize::i64Bit, "Invalid element size");

  const auto PerformSMove = [this](IR::OpSize ElementSize, const ARMEmitter::Register Dst, const ARMEmitter::VRegister Vector, int index) {
    switch (ElementSize) {
    case IR::OpSize::i8Bit: smov<ARMEmitter::SubRegSize::i8Bit>(Dst.X(), Vector, index); break;
    case IR::OpSize::i16Bit: smov<ARMEmitter::SubRegSize::i16Bit>(Dst.X(), Vector, index); break;
    case IR::OpSize::i32Bit: smov<ARMEmitter::SubRegSize::i32Bit>(Dst.X(), Vector, index); break;
    case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst.X(), Vector, index); break;
    default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", ElementSize); break;
    }
  };

  const auto PerformMove = [this](IR::OpSize ElementSize, const ARMEmitter::Register Dst, const ARMEmitter::VRegister Vector, int index) {
    switch (ElementSize) {
    case IR::OpSize::i8Bit: umov<ARMEmitter::SubRegSize::i8Bit>(Dst, Vector, index); break;
    case IR::OpSize::i16Bit: umov<ARMEmitter::SubRegSize::i16Bit>(Dst, Vector, index); break;
    case IR::OpSize::i32Bit: umov<ARMEmitter::SubRegSize::i32Bit>(Dst, Vector, index); break;
    case IR::OpSize::i64Bit: umov<ARMEmitter::SubRegSize::i64Bit>(Dst, Vector, index); break;
    default: LOGMAN_MSG_A_FMT("Unhandled ExtractElementSize: {}", ElementSize); break;
    }
  };

  // FEX needs to use a temporary destination vector register in a couple of instances.
  // When Dst overlaps MaskReg, VectorIndexLow, or VectorIndexHigh
  // Due to x86 gather instruction limitations, it is highly likely that a destination temporary isn't required.
  const bool NeedsDestTmp = Dst == MaskReg || Dst == VectorIndexLow || (VectorIndexHigh.has_value() && Dst == *VectorIndexHigh);

  // If the incoming destination isn't the destination then we need to move.
  const bool NeedsIncomingDestMove = Dst != IncomingDst || NeedsDestTmp;

  ///< Adventurers beware, emulated ASIMD style gather masked load operation.
  // Number of elements to load is calculated by the number of index elements available.
  size_t NumAddrElements = (VectorIndexHigh.has_value() ? 32 : 16) / IR::OpSizeToSize(VectorIndexSize);
  // The number of elements is clamped by the resulting register size.
  size_t NumDataElements = std::min<size_t>(IR::OpSizeToSize(Size) / IR::OpSizeToSize(ElementSize), NumAddrElements);

  size_t IndexElementsSizeBytes = NumAddrElements * IR::OpSizeToSize(VectorIndexSize);
  if (IndexElementsSizeBytes > 16) {
    // We must have a high register in this case.
    LOGMAN_THROW_A_FMT(VectorIndexHigh.has_value(), "Need High vector index register!");
  }

  auto ResultReg = Dst;
  if (NeedsDestTmp) {
    // Use VTMP1 as the temporary destination
    ResultReg = VTMP1;
  }
  auto WorkingReg = TMP1;
  auto TempMemReg = TMP2;
  const uint64_t ElementSizeInBits = IR::OpSizeToSize(ElementSize) * 8;

  if (NeedsIncomingDestMove) {
    mov(ResultReg.Q(), IncomingDst.Q());
  }

  for (size_t i = DataElementOffsetStart, IndexElement = IndexElementOffsetStart; i < NumDataElements; ++i, ++IndexElement) {
    ARMEmitter::ForwardLabel Skip {};
    // Extract mask element
    PerformMove(ElementSize, WorkingReg, MaskReg, i);

    // Skip if the mask's sign bit isn't set
    (void)tbz(WorkingReg, ElementSizeInBits - 1, &Skip);

    // Extract Index Element
    if ((IndexElement * IR::OpSizeToSize(VectorIndexSize)) >= 16) {
      // Fetch from the high index register.
      PerformSMove(VectorIndexSize, WorkingReg, *VectorIndexHigh, IndexElement - (16 / IR::OpSizeToSize(VectorIndexSize)));
    } else {
      // Fetch from the low index register.
      PerformSMove(VectorIndexSize, WorkingReg, VectorIndexLow, IndexElement);
    }

    // Calculate memory position for this gather load
    if (BaseAddr.has_value()) {
      if (VectorIndexSize == IR::OpSize::i32Bit) {
        add(ConvertSize(AddrSize), TempMemReg, *BaseAddr, WorkingReg, ARMEmitter::ExtendedType::SXTW, FEXCore::ilog2(OffsetScale));
      } else {
        add(ConvertSize(AddrSize), TempMemReg, *BaseAddr, WorkingReg, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(OffsetScale));
      }
    } else {
      ///< In this case we have no base address, All addresses come from the vector register itself
      if (VectorIndexSize == IR::OpSize::i32Bit) {
        // Sign extend and shift in to the 64-bit register
        sbfiz(ConvertSize(AddrSize), TempMemReg, WorkingReg, FEXCore::ilog2(OffsetScale), 32);
      } else {
        lsl(ConvertSize(AddrSize), TempMemReg, WorkingReg, FEXCore::ilog2(OffsetScale));
      }
    }

    // Now that the address is calculated. Do the load.
    switch (ElementSize) {
    case IR::OpSize::i8Bit: ld1<ARMEmitter::SubRegSize::i8Bit>(ResultReg.Q(), i, TempMemReg); break;
    case IR::OpSize::i16Bit: ld1<ARMEmitter::SubRegSize::i16Bit>(ResultReg.Q(), i, TempMemReg); break;
    case IR::OpSize::i32Bit: ld1<ARMEmitter::SubRegSize::i32Bit>(ResultReg.Q(), i, TempMemReg); break;
    case IR::OpSize::i64Bit: ld1<ARMEmitter::SubRegSize::i64Bit>(ResultReg.Q(), i, TempMemReg); break;
    case IR::OpSize::i128Bit: ldr(ResultReg.Q(), TempMemReg, 0); break;
    default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); FEX_UNREACHABLE;
    }

    (void)Bind(&Skip);
  }

  if (NeedsDestTmp) {
    // Move result.
    mov(Dst.Q(), ResultReg.Q());
  }
}

DEF_OP(VLoadVectorGatherMasked) {
  const auto Op = IROp->C<IR::IROp_VLoadVectorGatherMasked>();
  const auto OpSize = IROp->Size;

  const auto VectorIndexSize = Op->VectorIndexElementSize;
  const auto OffsetScale = Op->OffsetScale;
  const auto DataElementOffsetStart = Op->DataElementOffsetStart;
  const auto IndexElementOffsetStart = Op->IndexElementOffsetStart;

  ///< This IR operation handles discontiguous masked gather loadstore instructions. Some things to note about its behaviour.
  ///  - VSIB behaviour is mostly entirely exposed in the IR operation directly.
  ///    - Displacement is the only value missing as that can be added directly to AddrBase.
  ///  - VectorIndex{Low,High} contains the index offsets for each element getting loaded.
  ///     - These element sizes are decoupled from the resulting element size. These can be 32-bit or 64-bit.
  ///     - When the element size is 32-bit then the value is zero-extended to the full 64-bit address calculation
  ///     - When loading a 128-bit result with 64-bit VectorIndex Elements, this requires the use of both VectorIndexLow and VectorIndexHigh
  ///     to get enough pointers.
  ///  - When VectorIndexElementSize and OffsetScale matches Arm64 SVE behaviour then the operation becomes more optimal
  ///     - When the behaviour doesn't match then it gets decomposed to ASIMD style masked load.
  ///  - AddrBase also doesn't need to exist
  ///     - If the instruction is using 64-bit vector indexing or 32-bit addresses where the top-bit isn't set then this is valid!
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto IncomingDst = GetVReg(Op->Incoming);

  const auto MaskReg = GetVReg(Op->Mask);
  std::optional<ARMEmitter::Register> BaseAddr = !Op->AddrBase.IsInvalid() ? std::make_optional(GetReg(Op->AddrBase)) : std::nullopt;
  const auto VectorIndexLow = GetVReg(Op->VectorIndexLow);
  std::optional<ARMEmitter::VRegister> VectorIndexHigh =
    !Op->VectorIndexHigh.IsInvalid() ? std::make_optional(GetVReg(Op->VectorIndexHigh)) : std::nullopt;

  ///< If the host supports SVE and the offset scale matches SVE limitations then it can do an SVE style load.
  const bool SupportsSVELoad = (HostSupportsSVE128 || HostSupportsSVE256) &&
                               (OffsetScale == 1 || OffsetScale == IR::OpSizeToSize(VectorIndexSize)) &&
                               VectorIndexSize == IROp->ElementSize && Op->AddrSize == IR::OpSize::i64Bit;

  if (SupportsSVELoad) {
    uint8_t SVEScale = FEXCore::ilog2(OffsetScale);
    ARMEmitter::SVEModType ModType = ARMEmitter::SVEModType::MOD_NONE;
    if (VectorIndexSize == IR::OpSize::i32Bit) {
      ModType = ARMEmitter::SVEModType::MOD_SXTW;
    } else if (VectorIndexSize == IR::OpSize::i64Bit && OffsetScale != 1) {
      ModType = ARMEmitter::SVEModType::MOD_LSL;
    }

    const auto SubRegSize = ConvertSubRegSize8(IROp);

    const auto CMPPredicate = ARMEmitter::PReg::p0;
    const auto GoverningPredicate = Is256Bit ? PRED_TMP_32B : PRED_TMP_16B;

    // Check if the sign bit is set for the given element size.
    cmplt(SubRegSize, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);
    auto TempDst = VTMP1;

    // No need to load a temporary register in the case that we weren't provided a base address and there is no scaling.
    ARMEmitter::SVEMemOperand MemDst {ARMEmitter::SVEMemOperand(VectorIndexLow.Z(), 0)};
    if (BaseAddr.has_value() || OffsetScale != 1) {
      ARMEmitter::Register AddrReg = TMP1;
      if (BaseAddr.has_value()) {
        AddrReg = GetReg(Op->AddrBase);
      } else {
        ///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
        LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
      }
      MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndexLow.Z(), ModType, SVEScale);
    }

    switch (IROp->ElementSize) {
    case IR::OpSize::i8Bit: {
      ld1b<ARMEmitter::SubRegSize::i8Bit>(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i16Bit: {
      ld1h<ARMEmitter::SubRegSize::i16Bit>(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i32Bit: {
      ld1w<ARMEmitter::SubRegSize::i32Bit>(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    case IR::OpSize::i64Bit: {
      ld1d(TempDst.Z(), CMPPredicate.Zeroing(), MemDst);
      break;
    }
    default: break;
    }

    ///< Merge elements based on predicate.
    sel(SubRegSize, Dst.Z(), CMPPredicate, TempDst.Z(), IncomingDst.Z());
  } else {
    LOGMAN_THROW_A_FMT(!Is256Bit, "Can't emulate this gather load in the backend! Programming error!");
    Emulate128BitGather(IROp->Size, IROp->ElementSize, Dst, IncomingDst, BaseAddr, VectorIndexLow, VectorIndexHigh, MaskReg,
                        VectorIndexSize, DataElementOffsetStart, IndexElementOffsetStart, OffsetScale, Op->AddrSize);
  }
}

DEF_OP(VLoadVectorGatherMaskedQPS) {
  const auto Op = IROp->C<IR::IROp_VLoadVectorGatherMaskedQPS>();

  /// This instruction behaves similarly to the non-QPS version except for some STRICT limitations
  /// - Only supports 32-bit element data size!
  /// - Only supports 64-bit element address size!
  /// - Only masks elements based on 32-bit element data size! (NOT ADDR SIZE!)
  /// - Optimally uses SVE's `ld1w {zt.D}` variant instruction!
  /// - Only outputs a single 128-bit result, while consuming 128-bit or 256-bit of address indexes!
  /// - Matches VGATHERQPS/VPGATHERQD behaviour!
  const auto OffsetScale = Op->OffsetScale;
  const auto Dst = GetVReg(Node);
  const auto IncomingDst = GetVReg(Op->Incoming);

  const auto MaskReg = GetVReg(Op->MaskReg);
  std::optional<ARMEmitter::Register> BaseAddr = !Op->AddrBase.IsInvalid() ? std::make_optional(GetReg(Op->AddrBase)) : std::nullopt;
  const auto VectorIndexLow = GetVReg(Op->VectorIndexLow);
  std::optional<ARMEmitter::VRegister> VectorIndexHigh =
    !Op->VectorIndexHigh.IsInvalid() ? std::make_optional(GetVReg(Op->VectorIndexHigh)) : std::nullopt;

  ///< If the host supports SVE and the offset scale matches SVE limitations then it can do an SVE style load.
  const bool SupportsSVELoad = HostSupportsSVE128 && (OffsetScale == 1 || OffsetScale == 4) && Op->AddrSize == IR::OpSize::i64Bit;

  if (SupportsSVELoad) {
    ARMEmitter::SVEModType ModType = ARMEmitter::SVEModType::MOD_NONE;
    if (OffsetScale != 1) {
      ModType = ARMEmitter::SVEModType::MOD_LSL;
    }

    const auto CMPPredicate = ARMEmitter::PReg::p0;
    const auto CMPPredicate2 = ARMEmitter::PReg::p1;

    const auto GoverningPredicate = PRED_TMP_16B;

    // Check if the sign bit is set for the given element size.
    // This will set the predicate bits for elements [0, 1, 2, 3]
    // We then use punpklo to extend the low results to be for 64-bit elements.
    cmplt(ARMEmitter::SubRegSize::i32Bit, CMPPredicate, GoverningPredicate.Zeroing(), MaskReg.Z(), 0);
    punpklo(CMPPredicate2, CMPPredicate);
    auto TempDst = VTMP1;

    auto GatherExtend = [this](ARMEmitter::VRegister Dst, std::optional<ARMEmitter::Register> BaseAddr, ARMEmitter::VRegister VectorIndex,
                               ARMEmitter::PRegister CMPPredicate, ARMEmitter::SVEModType ModType, uint8_t OffsetScale) {
      // No need to load a temporary register in the case that we weren't provided a base address and there is no scaling.
      uint8_t SVEScale = FEXCore::ilog2(OffsetScale);
      ARMEmitter::SVEMemOperand MemDst {ARMEmitter::SVEMemOperand(VectorIndex.Z(), 0)};
      if (BaseAddr.has_value() || OffsetScale != 1) {
        ARMEmitter::Register AddrReg = TMP1;
        if (BaseAddr.has_value()) {
          AddrReg = *BaseAddr;
        } else {
          ///< OpcodeDispatcher didn't provide a Base address while SVE requires one.
          LoadConstant(ARMEmitter::Size::i64Bit, AddrReg, 0);
        }
        MemDst = ARMEmitter::SVEMemOperand(AddrReg.X(), VectorIndex.Z(), ModType, SVEScale);
      }

      ld1w<ARMEmitter::SubRegSize::i64Bit>(Dst.Z(), CMPPredicate.Zeroing(), MemDst);
    };

    GatherExtend(TempDst, BaseAddr, VectorIndexLow, CMPPredicate2, ModType, OffsetScale);

    if (VectorIndexHigh.has_value()) {
      punpkhi(CMPPredicate2, CMPPredicate);
      GatherExtend(VTMP2, BaseAddr, *VectorIndexHigh, CMPPredicate2, ModType, OffsetScale);
      // Move elements to the lower half.
      uzp1(ARMEmitter::SubRegSize::i32Bit, TempDst.Q(), TempDst.Q(), VTMP2.Q());
      ///< Merge elements based on predicate.
      sel(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), CMPPredicate, TempDst.Z(), IncomingDst.Z());
    } else {
      // Move elements to the lower half.
      xtn(ARMEmitter::SubRegSize::i32Bit, TempDst.Q(), TempDst.Q());
      ///< Merge elements based on predicate.
      sel(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), CMPPredicate, TempDst.Z(), IncomingDst.Z());
    }
  } else {
    Emulate128BitGather(IR::OpSize::i128Bit, IR::OpSize::i32Bit, Dst, IncomingDst, BaseAddr, VectorIndexLow, VectorIndexHigh, MaskReg,
                        IR::OpSize::i64Bit, 0, 0, OffsetScale, Op->AddrSize);
  }
}

DEF_OP(VLoadVectorElement) {
  const auto Op = IROp->C<IR::IROp_VLoadVectorElement>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  const auto ElementSize = IROp->ElementSize;

  const auto Dst = GetVReg(Node);
  const auto DstSrc = GetVReg(Op->DstSrc);
  const auto MemReg = GetReg(Op->Addr);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit ||
                       ElementSize == IR::OpSize::i64Bit || ElementSize == IR::OpSize::i128Bit,
                     "Invalid element "
                     "size");

  if (Is256Bit) {
    LOGMAN_MSG_A_FMT("Unsupported 256-bit VLoadVectorElement");
  } else {
    if (Dst != DstSrc && ElementSize != IR::OpSize::i128Bit) {
      mov(Dst.Q(), DstSrc.Q());
    }
    switch (ElementSize) {
    case IR::OpSize::i8Bit: ld1<ARMEmitter::SubRegSize::i8Bit>(Dst.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i16Bit: ld1<ARMEmitter::SubRegSize::i16Bit>(Dst.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i32Bit: ld1<ARMEmitter::SubRegSize::i32Bit>(Dst.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i64Bit: ld1<ARMEmitter::SubRegSize::i64Bit>(Dst.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i128Bit: ldr(Dst.Q(), MemReg); break;
    default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); return;
    }
  }

  // Emit a half-barrier if TSO is enabled.
  if (CTX->IsVectorAtomicTSOEnabled()) {
    dmb(ARMEmitter::BarrierScope::ISHLD);
  }
}

DEF_OP(VStoreVectorElement) {
  const auto Op = IROp->C<IR::IROp_VStoreVectorElement>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  const auto ElementSize = IROp->ElementSize;

  const auto Value = GetVReg(Op->Value);
  const auto MemReg = GetReg(Op->Addr);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit ||
                       ElementSize == IR::OpSize::i64Bit || ElementSize == IR::OpSize::i128Bit,
                     "Invalid element "
                     "size");

  // Emit a half-barrier if TSO is enabled.
  if (CTX->IsVectorAtomicTSOEnabled()) {
    dmb(ARMEmitter::BarrierScope::ISH);
  }

  if (Is256Bit) {
    LOGMAN_MSG_A_FMT("Unsupported 256-bit {}", __func__);
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit: st1<ARMEmitter::SubRegSize::i8Bit>(Value.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i16Bit: st1<ARMEmitter::SubRegSize::i16Bit>(Value.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i32Bit: st1<ARMEmitter::SubRegSize::i32Bit>(Value.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i64Bit: st1<ARMEmitter::SubRegSize::i64Bit>(Value.Q(), Op->Index, MemReg); break;
    case IR::OpSize::i128Bit: str(Value.Q(), MemReg); break;
    default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ElementSize); return;
    }
  }
}

DEF_OP(VBroadcastFromMem) {
  const auto Op = IROp->C<IR::IROp_VBroadcastFromMem>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto ElementSize = IROp->ElementSize;

  const auto Dst = GetVReg(Node);
  const auto MemReg = GetReg(Op->Address);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit ||
                       ElementSize == IR::OpSize::i64Bit || ElementSize == IR::OpSize::i128Bit,
                     "Invalid element "
                     "size");

  if (Is256Bit && HostSupportsSVE256) {
    const auto GoverningPredicate = PRED_TMP_32B.Zeroing();

    switch (ElementSize) {
    case IR::OpSize::i8Bit: ld1rb(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), GoverningPredicate, MemReg); break;
    case IR::OpSize::i16Bit: ld1rh(ARMEmitter::SubRegSize::i16Bit, Dst.Z(), GoverningPredicate, MemReg); break;
    case IR::OpSize::i32Bit: ld1rw(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), GoverningPredicate, MemReg); break;
    case IR::OpSize::i64Bit: ld1rd(Dst.Z(), GoverningPredicate, MemReg); break;
    case IR::OpSize::i128Bit: ld1rqb(Dst.Z(), GoverningPredicate, MemReg); break;
    default: LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); return;
    }
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit: ld1r<ARMEmitter::SubRegSize::i8Bit>(Dst.Q(), MemReg); break;
    case IR::OpSize::i16Bit: ld1r<ARMEmitter::SubRegSize::i16Bit>(Dst.Q(), MemReg); break;
    case IR::OpSize::i32Bit: ld1r<ARMEmitter::SubRegSize::i32Bit>(Dst.Q(), MemReg); break;
    case IR::OpSize::i64Bit: ld1r<ARMEmitter::SubRegSize::i64Bit>(Dst.Q(), MemReg); break;
    case IR::OpSize::i128Bit:
      // Normal load, like ld1rqb with 128-bit regs.
      ldr(Dst.Q(), MemReg);
      break;
    default: LOGMAN_MSG_A_FMT("Unhandled VBroadcastFromMem size: {}", ElementSize); return;
    }
  }

  // Emit a half-barrier if TSO is enabled.
  if (CTX->IsVectorAtomicTSOEnabled()) {
    dmb(ARMEmitter::BarrierScope::ISHLD);
  }
}

DEF_OP(Push) {
  const auto Op = IROp->C<IR::IROp_Push>();
  const auto ValueSize = IR::OpSizeToSize(Op->ValueSize);
  auto Src = GetReg(Op->Value);
  const auto AddrSrc = GetReg(Op->Addr);
  const auto Dst = GetReg(Node);

  bool NeedsMoveAfterwards = false;
  if (Dst != AddrSrc) {
    if (Dst == Src) {
      NeedsMoveAfterwards = true;
      // Need to be careful here, incoming source might be reused afterwards.
    } else {
      // RA constraints would let this always be true.
      mov(IROp->Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, Dst, AddrSrc);
    }
  }

  if (Src == AddrSrc) {
    // If the data source is the address source then we need to do some additional work.
    // This is because it is undefined behaviour to do a writeback on store operation where dest == src.
    // In the case of writeback where the source is the address there are multiple behaviours.
    // - SIGILL - Apple Silicon Behaviour
    // - Stores original value - Cortex behaviour
    // - Stores value after pre-index adjust adjust - Vixl simulator behaviour.
    // - Undefined value stored
    // - Undefined behaviour(!)

    // In this path Src can end up overlapping both AddrSrc and Dst.
    // Move the data to a temporary and store from there instead.
    mov(TMP1, Src.X());
    Src = TMP1;
  }

  if (NeedsMoveAfterwards) {
    switch (ValueSize) {
    case 1: {
      sturb(Src.W(), AddrSrc, -ValueSize);
      break;
    }
    case 2: {
      sturh(Src.W(), AddrSrc, -ValueSize);
      break;
    }
    case 4: {
      stur(Src.W(), AddrSrc, -ValueSize);
      break;
    }
    case 8: {
      stur(Src.X(), AddrSrc, -ValueSize);
      break;
    }
    default: {
      LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize);
      break;
    }
    }

    sub(IROp->Size == IR::OpSize::i64Bit ? ARMEmitter::Size::i64Bit : ARMEmitter::Size::i32Bit, Dst, AddrSrc, ValueSize);
  } else {
    switch (ValueSize) {
    case 1: {
      strb<ARMEmitter::IndexType::PRE>(Src.W(), Dst, -ValueSize);
      break;
    }
    case 2: {
      strh<ARMEmitter::IndexType::PRE>(Src.W(), Dst, -ValueSize);
      break;
    }
    case 4: {
      str<ARMEmitter::IndexType::PRE>(Src.W(), Dst, -ValueSize);
      break;
    }
    case 8: {
      str<ARMEmitter::IndexType::PRE>(Src.X(), Dst, -ValueSize);
      break;
    }
    default: {
      LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize);
      break;
    }
    }
  }
}

DEF_OP(PushTwo) {
  const auto Op = IROp->C<IR::IROp_PushTwo>();
  const auto ValueSize = IR::OpSizeToSize(Op->ValueSize);
  auto Src1 = GetReg(Op->Value1);
  auto Src2 = GetReg(Op->Value2);
  const auto Dst = GetReg(Op->Addr);

  switch (ValueSize) {
  case 4: {
    stp<ARMEmitter::IndexType::PRE>(Src1.W(), Src2.W(), Dst, -2 * ValueSize);
    break;
  }
  case 8: {
    stp<ARMEmitter::IndexType::PRE>(Src1.X(), Src2.X(), Dst, -2 * ValueSize);
    break;
  }
  default: {
    LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, ValueSize);
    break;
  }
  }
}

DEF_OP(Pop) {
  const auto Op = IROp->C<IR::IROp_Pop>();
  const auto Size = IR::OpSizeToSize(Op->Size);
  const auto Addr = GetReg(Op->InoutAddr);
  const auto Dst = GetReg(Op->OutValue);

  LOGMAN_THROW_A_FMT(Dst != Addr, "Invalid");

  switch (Size) {
  case 1: {
    ldrb<ARMEmitter::IndexType::POST>(Dst.W(), Addr, Size);
    break;
  }
  case 2: {
    ldrh<ARMEmitter::IndexType::POST>(Dst.W(), Addr, Size);
    break;
  }
  case 4: {
    ldr<ARMEmitter::IndexType::POST>(Dst.W(), Addr, Size);
    break;
  }
  case 8: {
    ldr<ARMEmitter::IndexType::POST>(Dst.X(), Addr, Size);
    break;
  }
  default: {
    LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Op->Size);
    break;
  }
  }
}

DEF_OP(PopTwo) {
  const auto Op = IROp->C<IR::IROp_PopTwo>();
  const auto Size = IR::OpSizeToSize(Op->Size);
  const auto Addr = GetReg(Op->InoutAddr);
  auto Dst1 = GetReg(Op->OutValue1);
  const auto Dst2 = GetReg(Op->OutValue2);

  // ldp x, x is invalid. Explicitly discard the first destination to encode.
  if (Dst1 == Dst2) {
    Dst1 = ARMEmitter::Reg::zr;
  }

  LOGMAN_THROW_A_FMT(Dst1 != Addr && Dst2 != Addr, "Invalid");
  LOGMAN_THROW_A_FMT(Dst1 != Dst2, "Invalid");

  switch (Size) {
  case 4: {
    ldp<ARMEmitter::IndexType::POST>(Dst1.W(), Dst2.W(), Addr, 2 * Size);
    break;
  }
  case 8: {
    ldp<ARMEmitter::IndexType::POST>(Dst1.X(), Dst2.X(), Addr, 2 * Size);
    break;
  }
  default: {
    LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Op->Size);
    break;
  }
  }
}

DEF_OP(StoreMem) {
  const auto Op = IROp->C<IR::IROp_StoreMem>();
  const auto OpSize = IROp->Size;

  const auto MemReg = GetReg(Op->Addr);
  const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

  if (Op->Class == IR::RegClass::GPR) {
    const auto Src = GetZeroableReg(Op->Value);
    switch (OpSize) {
    case IR::OpSize::i8Bit: strb(Src, MemSrc); break;
    case IR::OpSize::i16Bit: strh(Src, MemSrc); break;
    case IR::OpSize::i32Bit: str(Src.W(), MemSrc); break;
    case IR::OpSize::i64Bit: str(Src.X(), MemSrc); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break;
    }
  } else {
    const auto Src = GetVReg(Op->Value);

    switch (OpSize) {
    case IR::OpSize::i8Bit: {
      strb(Src, MemSrc);
      break;
    }
    case IR::OpSize::i16Bit: {
      strh(Src, MemSrc);
      break;
    }
    case IR::OpSize::i32Bit: {
      str(Src.S(), MemSrc);
      break;
    }
    case IR::OpSize::i64Bit: {
      str(Src.D(), MemSrc);
      break;
    }
    case IR::OpSize::i128Bit: {
      str(Src.Q(), MemSrc);
      break;
    }
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
      const auto MemSrc = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
      st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, MemSrc);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break;
    }
  }
}

DEF_OP(StoreMemX87SVEOptPredicate) {
  const auto Op = IROp->C<IR::IROp_StoreMemX87SVEOptPredicate>();
  const auto Predicate = PRED_X87_SVEOPT;

  LOGMAN_THROW_A_FMT(HostSupportsSVE128 || HostSupportsSVE256, "StoreMemX87SVEOptPredicate needs SVE support");

  const auto RegData = GetVReg(Op->Value);
  const auto MemReg = GetReg(Op->Addr);
  const auto MemDst = ARMEmitter::SVEMemOperand(MemReg.X(), 0);

  switch (IROp->ElementSize) {
  case IR::OpSize::i8Bit: {
    st1b<ARMEmitter::SubRegSize::i8Bit>(RegData.Z(), Predicate, MemDst);
    break;
  }
  case IR::OpSize::i16Bit: {
    st1h<ARMEmitter::SubRegSize::i16Bit>(RegData.Z(), Predicate, MemDst);
    break;
  }
  case IR::OpSize::i32Bit: {
    st1w<ARMEmitter::SubRegSize::i32Bit>(RegData.Z(), Predicate, MemDst);
    break;
  }
  case IR::OpSize::i64Bit: {
    st1d(RegData.Z(), Predicate, MemDst);
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, IROp->ElementSize); break;
  }
}

DEF_OP(LoadMemX87SVEOptPredicate) {
  const auto Op = IROp->C<IR::IROp_LoadMemX87SVEOptPredicate>();
  const auto Dst = GetVReg(Node);
  const auto Predicate = PRED_X87_SVEOPT;
  const auto MemReg = GetReg(Op->Addr);

  LOGMAN_THROW_A_FMT(HostSupportsSVE128 || HostSupportsSVE256, "LoadMemX87SVEOptPredicate needs SVE support");

  const auto MemDst = ARMEmitter::SVEMemOperand(MemReg.X(), 0);

  switch (IROp->ElementSize) {
  case IR::OpSize::i8Bit: {
    ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), Predicate.Zeroing(), MemDst);
    break;
  }
  case IR::OpSize::i16Bit: {
    ld1h<ARMEmitter::SubRegSize::i16Bit>(Dst.Z(), Predicate.Zeroing(), MemDst);
    break;
  }
  case IR::OpSize::i32Bit: {
    ld1w<ARMEmitter::SubRegSize::i32Bit>(Dst.Z(), Predicate.Zeroing(), MemDst);
    break;
  }
  case IR::OpSize::i64Bit: {
    ld1d(Dst.Z(), Predicate.Zeroing(), MemDst);
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unhandled {} element size: {}", __func__, IROp->ElementSize); break;
  }
}

DEF_OP(StoreMemPair) {
  const auto Op = IROp->C<IR::IROp_StoreMemPair>();
  const auto OpSize = IROp->Size;
  const auto Addr = GetReg(Op->Addr);

  if (Op->Class == IR::RegClass::GPR) {
    const auto Src1 = GetZeroableReg(Op->Value1);
    const auto Src2 = GetZeroableReg(Op->Value2);
    switch (OpSize) {
    case IR::OpSize::i32Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.W(), Src2.W(), Addr, Op->Offset); break;
    case IR::OpSize::i64Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.X(), Src2.X(), Addr, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreMem size: {}", OpSize); break;
    }
  } else {
    const auto Src1 = GetVReg(Op->Value1);
    const auto Src2 = GetVReg(Op->Value2);

    switch (OpSize) {
    case IR::OpSize::i32Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.S(), Src2.S(), Addr, Op->Offset); break;
    case IR::OpSize::i64Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.D(), Src2.D(), Addr, Op->Offset); break;
    case IR::OpSize::i128Bit: stp<ARMEmitter::IndexType::OFFSET>(Src1.Q(), Src2.Q(), Addr, Op->Offset); break;
    default: LOGMAN_MSG_A_FMT("Unhandled StoreMemPair size: {}", OpSize); break;
    }
  }
}

DEF_OP(StoreMemTSO) {
  const auto Op = IROp->C<IR::IROp_StoreMemTSO>();
  const auto OpSize = IROp->Size;

  const auto MemReg = GetReg(Op->Addr);

  if (Op->Class == IR::RegClass::GPR) {
    LOGMAN_THROW_A_FMT(Op->Offset.IsInvalid() || CTX->HostFeatures.SupportsTSOImm9, "unexpected offset");
    LOGMAN_THROW_A_FMT(Op->OffsetScale == 1, "unexpected offset scale");
    LOGMAN_THROW_A_FMT(Op->OffsetType == IR::MemOffsetType::SXTX, "unexpected offset type");
  }

  if (CTX->HostFeatures.SupportsTSOImm9 && Op->Class == IR::RegClass::GPR) {
    const auto Src = GetZeroableReg(Op->Value);
    uint64_t Offset = 0;
    if (!Op->Offset.IsInvalid()) {
      bool IsInline = IsInlineConstant(Op->Offset, &Offset);
      LOGMAN_THROW_A_FMT(IsInline, "expected immediate");
    }

    if (OpSize == IR::OpSize::i8Bit) {
      // 8bit load is always aligned to natural alignment
      stlurb(Src, MemReg, Offset);
    } else {
      // Half-barrier once back-patched.
      nop();
      switch (OpSize) {
      case IR::OpSize::i16Bit: stlurh(Src, MemReg, Offset); break;
      case IR::OpSize::i32Bit: stlur(Src.W(), MemReg, Offset); break;
      case IR::OpSize::i64Bit: stlur(Src.X(), MemReg, Offset); break;
      default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break;
      }
    }
  } else if (Op->Class == IR::RegClass::GPR) {
    const auto Src = GetZeroableReg(Op->Value);

    if (OpSize == IR::OpSize::i8Bit) {
      // 8bit load is always aligned to natural alignment
      stlrb(Src, MemReg);
    } else {
      // Half-barrier once back-patched.
      nop();
      switch (OpSize) {
      case IR::OpSize::i16Bit: stlrh(Src, MemReg); break;
      case IR::OpSize::i32Bit: stlr(Src.W(), MemReg); break;
      case IR::OpSize::i64Bit: stlr(Src.X(), MemReg); break;
      default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break;
      }
    }
  } else {
    if (CTX->IsVectorAtomicTSOEnabled()) {
      // Half-Barrier.
      dmb(ARMEmitter::BarrierScope::ISH);
    }
    const auto Src = GetVReg(Op->Value);
    const auto MemSrc = GenerateMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
    switch (OpSize) {
    case IR::OpSize::i8Bit: strb(Src, MemSrc); break;
    case IR::OpSize::i16Bit: strh(Src, MemSrc); break;
    case IR::OpSize::i32Bit: str(Src.S(), MemSrc); break;
    case IR::OpSize::i64Bit: str(Src.D(), MemSrc); break;
    case IR::OpSize::i128Bit: str(Src.Q(), MemSrc); break;
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
      const auto Operand = GenerateSVEMemOperand(OpSize, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);
      st1b<ARMEmitter::SubRegSize::i8Bit>(Src.Z(), PRED_TMP_32B, Operand);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled StoreMemTSO size: {}", OpSize); break;
    }
  }
}

DEF_OP(MemSet) {
  const auto Op = IROp->C<IR::IROp_MemSet>();

  const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled();
  const auto Size = IR::OpSizeToSize(Op->Size);
  const auto MemReg = GetReg(Op->Addr);
  const auto Value = GetZeroableReg(Op->Value);
  const auto Length = GetReg(Op->Length);
  const auto Dst = GetReg(Node);

  uint64_t DirectionConstant;
  bool DirectionIsInline = IsInlineConstant(Op->Direction, &DirectionConstant);
  ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
  if (!DirectionIsInline) {
    DirectionReg = GetReg(Op->Direction);
  }

  // If Direction > 0 then:
  //   MemReg is incremented (by size)
  // else:
  //   MemReg is decremented (by size)
  //
  // Counter is decremented regardless.

  ARMEmitter::ForwardLabel BackwardImpl {};
  ARMEmitter::ForwardLabel Done {};

  mov(TMP1, Length.X());
  if (Op->Prefix.IsInvalid()) {
    mov(TMP2, MemReg.X());
  } else {
    const auto Prefix = GetReg(Op->Prefix);
    add(TMP2, Prefix.X(), MemReg.X());
  }

  if (!DirectionIsInline) {
    // Backward or forwards implementation depends on flag
    (void)tbnz(DirectionReg, 1, &BackwardImpl);
  }

  auto MemStore = [this](auto Value, uint32_t OpSize, int32_t Size) {
    switch (OpSize) {
    case 1: strb<ARMEmitter::IndexType::POST>(Value.W(), TMP2, Size); break;
    case 2: strh<ARMEmitter::IndexType::POST>(Value.W(), TMP2, Size); break;
    case 4: str<ARMEmitter::IndexType::POST>(Value.W(), TMP2, Size); break;
    case 8: str<ARMEmitter::IndexType::POST>(Value.X(), TMP2, Size); break;
    default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
    }
  };

  auto MemStoreTSO = [this](auto Value, uint32_t OpSize, int32_t Size) {
    if (OpSize == 1) {
      // 8bit load is always aligned to natural alignment
      stlrb(Value.W(), TMP2);
    } else {
      nop();
      switch (OpSize) {
      case 2: stlrh(Value.W(), TMP2); break;
      case 4: stlr(Value.W(), TMP2); break;
      case 8: stlr(Value.X(), TMP2); break;
      default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
      }
    }

    if (Size >= 0) {
      add(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize);
    } else {
      sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize);
    }
  };

  const auto SubRegSize = Size == 1 ? ARMEmitter::SubRegSize::i8Bit :
                          Size == 2 ? ARMEmitter::SubRegSize::i16Bit :
                          Size == 4 ? ARMEmitter::SubRegSize::i32Bit :
                          Size == 8 ? ARMEmitter::SubRegSize::i64Bit :
                                      ARMEmitter::SubRegSize::i8Bit;

  auto EmitMemset = [&](int32_t Direction) {
    const int32_t SizeDirection = Size * Direction;
    const bool IsBackwards = Direction == -1;

    // Sets the result to the final address written depending on
    // whether or not the memset is forwards or backwards.
    const auto MakeFinalAddress = [&] {
      if (IsBackwards) {
        switch (Size) {
        case 1: sub(Dst.X(), MemReg.X(), Length.X()); break;
        case 2:
        case 4:
        case 8: sub(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size)); break;
        default: LOGMAN_MSG_A_FMT("Unhandled MemSet size: {}", Size); break;
        }
      } else {
        switch (Size) {
        case 1: add(Dst.X(), MemReg.X(), Length.X()); break;
        case 2:
        case 4:
        case 8: add(Dst.X(), MemReg.X(), Length.X(), ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size)); break;
        default: LOGMAN_MSG_A_FMT("Unhandled MemSet size: {}", Size); break;
        }
      }
    };

    ARMEmitter::BiDirectionalLabel AgainInternal {};
    ARMEmitter::ForwardLabel DoneInternal {};

    // Early exit if zero count.
    (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

    if (!IsAtomic) {
      if (CTX->HostFeatures.SupportsMOPS) {
        const bool Is8Bit = SubRegSize == ARMEmitter::SubRegSize::i8Bit;

        // We can handle 8-bit memsets and any other size that happens
        // to be using an inlined zero value (resulting in the use of ZR).
        //
        // NOTE:
        // Strictly speaking, this can also be trivially expanded to handle other sizes
        // that happen to use any value that could fit inside a byte if the need
        // arises. This does increase branching and code generation, however, since
        // we'd still need to emit the fallback in the event a value for a larger size
        // falls outside the range of a byte instead of only generating the MOPS code.
        if (Is8Bit || Value == ARMEmitter::Reg::zr) {
          // If we're performing a non-byte-sized zeroing operation then we need to
          // scale the counter accordingly. (e.g. a 64-bit memset of size 2 needs to
          // be turned into an 8-bit memset of size 16)
          if (!Is8Bit) {
            lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, FEXCore::ToUnderlying(SubRegSize));
          }

          // If backwards, then we need to adjust the starting address because
          // set{p, m, e} memset forwards, so we need to slide this bad boy
          // back like: (address - count) + 1.
          //
          // This lets us offset the address such that we can treat a backwards
          // memset as if it were a forwards one.
          if (IsBackwards) {
            sub(TMP2, TMP2, TMP1);
            add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 1);
          }

          // Unfortunately set operations fiddle with NZCV, so we need to preserve it.
          mrs(TMP3, ARMEmitter::SystemRegister::NZCV);
          setp(TMP2, TMP1, Value.X());
          setm(TMP2, TMP1, Value.X());
          sete(TMP2, TMP1, Value.X());
          msr(ARMEmitter::SystemRegister::NZCV, TMP3);

          MakeFinalAddress();
          (void)Bind(&DoneInternal);
          return;
        }
      }

      ARMEmitter::ForwardLabel AgainInternal256Exit {};
      ARMEmitter::BackwardLabel AgainInternal256 {};
      ARMEmitter::ForwardLabel AgainInternal128Exit {};
      ARMEmitter::BackwardLabel AgainInternal128 {};

      if (IsBackwards) {
        sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
      }

      // Keep the counter one copy ahead, so that underflow can be used to detect when to fallback
      // to the copy unit size copy loop for the last chunk.
      // Do this in two parts, to fallback to the byte by byte loop if size < 32, and to the
      // single copy loop if size < 64.
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal128Exit);

      // Fill VTMP2 with the set pattern
      dup(SubRegSize, VTMP2.Q(), Value);

      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal256Exit);

      (void)Bind(&AgainInternal256);
      stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
      stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
      (void)tbz(TMP1, 63, &AgainInternal256);

      (void)Bind(&AgainInternal256Exit);
      add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal128Exit);
      (void)Bind(&AgainInternal128);
      stp<ARMEmitter::IndexType::POST>(VTMP2.Q(), VTMP2.Q(), TMP2, 32 * Direction);
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbz(TMP1, 63, &AgainInternal128);

      (void)Bind(&AgainInternal128Exit);
      add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

      if (IsBackwards) {
        add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
      }
    }

    (void)Bind(&AgainInternal);
    if (IsAtomic) {
      MemStoreTSO(Value, Size, SizeDirection);
    } else {
      MemStore(Value, Size, SizeDirection);
    }
    sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1);
    (void)cbnz(ARMEmitter::Size::i64Bit, TMP1, &AgainInternal);

    (void)Bind(&DoneInternal);

    MakeFinalAddress();
  };

  if (DirectionIsInline) {
    LOGMAN_THROW_A_FMT(DirectionConstant == 1 || DirectionConstant == -1, "unexpected direction");
    EmitMemset(DirectionConstant);
  } else {
    // Emit forward direction memset then backward direction memset.
    for (int32_t Direction : {1, -1}) {
      EmitMemset(Direction);

      if (Direction == 1) {
        (void)b(&Done);
        (void)Bind(&BackwardImpl);
      }
    }

    (void)Bind(&Done);
    // Destination already set to the final pointer.
  }
}

DEF_OP(MemCpy) {
  const auto Op = IROp->C<IR::IROp_MemCpy>();

  const bool IsAtomic = CTX->IsMemcpyAtomicTSOEnabled();
  const auto Size = IR::OpSizeToSize(Op->Size);
  const auto MemRegDest = GetReg(Op->Dest);
  const auto MemRegSrc = GetReg(Op->Src);

  const auto Length = GetReg(Op->Length);
  uint64_t DirectionConstant;
  bool DirectionIsInline = IsInlineConstant(Op->Direction, &DirectionConstant);
  ARMEmitter::Register DirectionReg = ARMEmitter::Reg::r0;
  if (!DirectionIsInline) {
    DirectionReg = GetReg(Op->Direction);
  }

  auto Dst0 = GetReg(Op->OutDstAddress);
  auto Dst1 = GetReg(Op->OutSrcAddress);
  // If Direction > 0 then:
  //   MemRegDest is incremented (by size)
  //   MemRegSrc is incremented (by size)
  // else:
  //   MemRegDest is decremented (by size)
  //   MemRegSrc is decremented (by size)
  //
  // Counter is decremented regardless.

  ARMEmitter::ForwardLabel BackwardImpl {};
  ARMEmitter::ForwardLabel Done {};

  mov(TMP1, Length.X());
  mov(TMP2, MemRegDest.X());
  mov(TMP3, MemRegSrc.X());

  // TMP1 = Length
  // TMP2 = Dest
  // TMP3 = Src
  // TMP4 = load+store temp value

  if (!DirectionIsInline) {
    // Backward or forwards implementation depends on flag
    (void)tbnz(DirectionReg, 1, &BackwardImpl);
  }

  auto MemCpy = [this](uint32_t OpSize, int32_t Size) {
    switch (OpSize) {
    case 1:
      ldrb<ARMEmitter::IndexType::POST>(TMP4.W(), TMP3, Size);
      strb<ARMEmitter::IndexType::POST>(TMP4.W(), TMP2, Size);
      break;
    case 2:
      ldrh<ARMEmitter::IndexType::POST>(TMP4.W(), TMP3, Size);
      strh<ARMEmitter::IndexType::POST>(TMP4.W(), TMP2, Size);
      break;
    case 4:
      ldr<ARMEmitter::IndexType::POST>(TMP4.W(), TMP3, Size);
      str<ARMEmitter::IndexType::POST>(TMP4.W(), TMP2, Size);
      break;
    case 8:
      ldr<ARMEmitter::IndexType::POST>(TMP4, TMP3, Size);
      str<ARMEmitter::IndexType::POST>(TMP4, TMP2, Size);
      break;
    case 32:
      ldp<ARMEmitter::IndexType::POST>(VTMP1.Q(), VTMP2.Q(), TMP3, Size);
      stp<ARMEmitter::IndexType::POST>(VTMP1.Q(), VTMP2.Q(), TMP2, Size);
      break;
    default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
    }
  };

  auto MemCpyTSO = [this](uint32_t OpSize, int32_t Size) {
    if (CTX->HostFeatures.SupportsRCPC) {
      if (OpSize == 1) {
        // 8bit load is always aligned to natural alignment
        ldaprb(TMP4.W(), TMP3);
        stlrb(TMP4.W(), TMP2);
      } else {
        switch (OpSize) {
        case 2: ldaprh(TMP4.W(), TMP3); break;
        case 4: ldapr(TMP4.W(), TMP3); break;
        case 8: ldapr(TMP4, TMP3); break;
        default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
        }

        // Placeholders for backpatching barriers (one per load/store)
        nop();
        nop();

        switch (OpSize) {
        case 2: stlrh(TMP4.W(), TMP2); break;
        case 4: stlr(TMP4.W(), TMP2); break;
        case 8: stlr(TMP4, TMP2); break;
        default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
        }
      }
    } else {
      if (OpSize == 1) {
        // 8bit load is always aligned to natural alignment
        ldarb(TMP4.W(), TMP3);
        stlrb(TMP4.W(), TMP2);
      } else {
        switch (OpSize) {
        case 2: ldarh(TMP4.W(), TMP3); break;
        case 4: ldar(TMP4.W(), TMP3); break;
        case 8: ldar(TMP4, TMP3); break;
        default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
        }

        // Placeholders for backpatching barriers (one per load/store)
        nop();
        nop();

        switch (OpSize) {
        case 2: stlrh(TMP4.W(), TMP2); break;
        case 4: stlr(TMP4.W(), TMP2); break;
        case 8: stlr(TMP4, TMP2); break;
        default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, Size); break;
        }
      }
    }

    if (Size >= 0) {
      add(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize);
      add(ARMEmitter::Size::i64Bit, TMP3, TMP3, OpSize);
    } else {
      sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, OpSize);
      sub(ARMEmitter::Size::i64Bit, TMP3, TMP3, OpSize);
    }
  };

  auto EmitMemcpy = [&](int32_t Direction) {
    const int32_t SizeDirection = Size * Direction;
    const bool IsBackwards = Direction == -1;

    const auto FinalizeAddresses = [&] {
      if (IsBackwards) {
        switch (Size) {
        case 1:
          sub(Dst0.X(), TMP1, TMP3);
          sub(Dst1.X(), TMP2, TMP3);
          break;
        case 2:
        case 4:
        case 8:
          sub(Dst0.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size));
          sub(Dst1.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size));
          break;
        default: LOGMAN_MSG_A_FMT("Unhandled MemCpy size: {}", Size); break;
        }
      } else {
        switch (Size) {
        case 1:
          add(Dst0.X(), TMP1, TMP3);
          add(Dst1.X(), TMP2, TMP3);
          break;
        case 2:
        case 4:
        case 8:
          add(Dst0.X(), TMP1, TMP3, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size));
          add(Dst1.X(), TMP2, TMP3, ARMEmitter::ShiftType::LSL, FEXCore::ilog2(Size));
          break;
        default: LOGMAN_MSG_A_FMT("Unhandled MemCpy size: {}", Size); break;
        }
      }
    };

    ARMEmitter::BiDirectionalLabel AgainInternal {};
    ARMEmitter::ForwardLabel DoneInternal {};

    // Early exit if zero count.
    (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

    if (!IsAtomic) {
      if (CTX->HostFeatures.SupportsMOPS) {
        // In the event we have an overlap (gross), we need to fall back
        // to the non-mops copy handler. Since the overlap check needs to
        // make use of NZCV, we need to save it. This can be avoided with
        // ARMv9.6+'s FEAT_CMPBR, but alas, we don't have access to that right now.
        //
        // NOTE: That we need to temporarily trash TMP1 and restore it after the
        //       comparison.
        ARMEmitter::ForwardLabel OverlapCase;
        mrs(TMP4, ARMEmitter::SystemRegister::NZCV);
        sub(ARMEmitter::Size::i64Bit, TMP1, TMP2, TMP3);
        cmp(ARMEmitter::Size::i64Bit, TMP1, Length.X());
        mov(TMP1, Length.X());
        (void)bc(ARMEmitter::Condition::CC_LT, &OverlapCase);

        // If doing something larger than a byte copy, then we need to scale
        // the counter value accordingly to convert it to bytes.
        if (Size > 1) {
          lsl(ARMEmitter::Size::i64Bit, TMP1, TMP1, FEXCore::ilog2(Size));
        }

        // Adjust addresses so that we treat the backward copy as a forward copy
        if (IsBackwards) {
          sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, TMP1);
          sub(ARMEmitter::Size::i64Bit, TMP3, TMP3, TMP1);
          add(ARMEmitter::Size::i64Bit, TMP2, TMP2, Size);
          add(ARMEmitter::Size::i64Bit, TMP3, TMP3, Size);
        }

        // Unfortunately copy operations fiddle with NZCV, so we need to preserve it.
        cpyfp(TMP2, TMP3, TMP1);
        cpyfm(TMP2, TMP3, TMP1);
        cpyfe(TMP2, TMP3, TMP1);
        msr(ARMEmitter::SystemRegister::NZCV, TMP4);

        (void)b(&DoneInternal);

        // Turns out we overlap and need to fall back. Make sure to restore NZCV.
        (void)Bind(&OverlapCase);
        msr(ARMEmitter::SystemRegister::NZCV, TMP4);
      }

      ARMEmitter::ForwardLabel AbsPos {};
      ARMEmitter::ForwardLabel AgainInternal256Exit {};
      ARMEmitter::ForwardLabel AgainInternal128Exit {};
      ARMEmitter::BackwardLabel AgainInternal128 {};
      ARMEmitter::BackwardLabel AgainInternal256 {};

      sub(ARMEmitter::Size::i64Bit, TMP4, TMP2, TMP3);
      (void)tbz(TMP4, 63, &AbsPos);
      neg(ARMEmitter::Size::i64Bit, TMP4, TMP4);
      (void)Bind(&AbsPos);
      sub(ARMEmitter::Size::i64Bit, TMP4, TMP4, 32);
      (void)tbnz(TMP4, 63, &AgainInternal);

      if (IsBackwards) {
        sub(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
        sub(ARMEmitter::Size::i64Bit, TMP3, TMP3, 32 - Size);
      }

      // Keep the counter one copy ahead, so that underflow can be used to detect when to fallback
      // to the copy unit size copy loop for the last chunk.
      // Do this in two parts, to fallback to the byte by byte loop if size < 32, and to the
      // single copy loop if size < 64.
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal128Exit);
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal256Exit);

      (void)Bind(&AgainInternal256);
      MemCpy(32, 32 * Direction);
      MemCpy(32, 32 * Direction);
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
      (void)tbz(TMP1, 63, &AgainInternal256);

      (void)Bind(&AgainInternal256Exit);
      add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 64 / Size);
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbnz(TMP1, 63, &AgainInternal128Exit);
      (void)Bind(&AgainInternal128);
      MemCpy(32, 32 * Direction);
      sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)tbz(TMP1, 63, &AgainInternal128);

      (void)Bind(&AgainInternal128Exit);
      add(ARMEmitter::Size::i64Bit, TMP1, TMP1, 32 / Size);
      (void)cbz(ARMEmitter::Size::i64Bit, TMP1, &DoneInternal);

      if (IsBackwards) {
        add(ARMEmitter::Size::i64Bit, TMP2, TMP2, 32 - Size);
        add(ARMEmitter::Size::i64Bit, TMP3, TMP3, 32 - Size);
      }
    }

    (void)Bind(&AgainInternal);
    if (IsAtomic) {
      MemCpyTSO(Size, SizeDirection);
    } else {
      MemCpy(Size, SizeDirection);
    }
    sub(ARMEmitter::Size::i64Bit, TMP1, TMP1, 1);
    (void)cbnz(ARMEmitter::Size::i64Bit, TMP1, &AgainInternal);

    (void)Bind(&DoneInternal);

    // Needs to use temporaries just in case of overwrite
    mov(TMP1, MemRegDest.X());
    mov(TMP2, MemRegSrc.X());
    mov(TMP3, Length.X());

    FinalizeAddresses();
  };

  if (DirectionIsInline) {
    LOGMAN_THROW_A_FMT(DirectionConstant == 1 || DirectionConstant == -1, "unexpected direction");
    EmitMemcpy(DirectionConstant);
  } else {
    // Emit forward direction memcpy then backward direction memcpy.
    for (int32_t Direction : {1, -1}) {
      EmitMemcpy(Direction);
      if (Direction == 1) {
        (void)b(&Done);
        (void)Bind(&BackwardImpl);
      }
    }
    (void)Bind(&Done);
    // Destination already set to the final pointer.
  }
}

DEF_OP(CacheLineClear) {
  if (!CTX->HostFeatures.SupportsCacheMaintenanceOps) {
    dmb(ARMEmitter::BarrierScope::SY);
    return;
  }

  auto Op = IROp->C<IR::IROp_CacheLineClear>();

  auto MemReg = GetReg(Op->Addr);

  // Clear dcache only
  // icache doesn't matter here since the guest application shouldn't be calling clflush on JIT code.
  if (CTX->HostFeatures.DCacheLineSize >= 64U) {
    dc(ARMEmitter::DataCacheOperation::CIVAC, MemReg);
  } else {
    auto CurrentWorkingReg = MemReg.X();
    for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) {
      dc(ARMEmitter::DataCacheOperation::CIVAC, TMP1);
      add(ARMEmitter::Size::i64Bit, TMP1, CurrentWorkingReg, CTX->HostFeatures.DCacheLineSize);
      CurrentWorkingReg = TMP1;
    }
  }

  if (Op->Serialize) {
    // If requested, serialized all of the data cache operations.
    dsb(ARMEmitter::BarrierScope::ISH);
  }
}

DEF_OP(CacheLineClean) {
  if (!CTX->HostFeatures.SupportsCacheMaintenanceOps) {
    dmb(ARMEmitter::BarrierScope::ST);
    return;
  }

  auto Op = IROp->C<IR::IROp_CacheLineClean>();

  auto MemReg = GetReg(Op->Addr);

  // Clean dcache only
  if (CTX->HostFeatures.DCacheLineSize >= 64U) {
    dc(ARMEmitter::DataCacheOperation::CVAC, MemReg);
  } else {
    auto CurrentWorkingReg = MemReg.X();
    for (size_t i = 0; i < std::max(1U, CTX->HostFeatures.DCacheLineSize / 64U); ++i) {
      dc(ARMEmitter::DataCacheOperation::CVAC, TMP1);
      add(ARMEmitter::Size::i64Bit, TMP1, CurrentWorkingReg, CTX->HostFeatures.DCacheLineSize);
      CurrentWorkingReg = TMP1;
    }
  }
}

DEF_OP(CacheLineZero) {
  auto Op = IROp->C<IR::IROp_CacheLineZero>();

  auto MemReg = GetReg(Op->Addr);

  if (CTX->HostFeatures.SupportsCLZERO) {
    // We can use this instruction directly
    dc(ARMEmitter::DataCacheOperation::ZVA, MemReg);
  } else {
    // We must walk the cacheline ourselves
    // Force cacheline alignment
    and_(ARMEmitter::Size::i64Bit, TMP1, MemReg, ~(CPUIDEmu::CACHELINE_SIZE - 1));
    // This will end up being four STPs
    // Depending on uarch it could be slightly more efficient in instructions emitted
    // and uops to use vector pair STP, but we want the non-temporal bit specifically here
    for (size_t i = 0; i < CPUIDEmu::CACHELINE_SIZE; i += 16) {
      stnp(ARMEmitter::XReg::zr, ARMEmitter::XReg::zr, TMP1, i);
    }
  }
}

DEF_OP(Prefetch) {
  auto Op = IROp->C<IR::IROp_Prefetch>();
  const auto MemReg = GetReg(Op->Addr);

  // Access size is only ever handled as 8-byte. Even though it is accesssed as a cacheline.
  const auto MemSrc = GenerateMemOperand(IR::OpSize::i64Bit, MemReg, Op->Offset, Op->OffsetType, Op->OffsetScale);

  size_t LUT = (Op->Stream ? 1 : 0) | ((Op->CacheLevel - 1) << 1) | (Op->ForStore ? 1U << 3 : 0);

  constexpr static std::array<ARMEmitter::Prefetch, 14> PrefetchType = {
    ARMEmitter::Prefetch::PLDL1KEEP,
    ARMEmitter::Prefetch::PLDL1STRM,

    ARMEmitter::Prefetch::PLDL2KEEP,
    ARMEmitter::Prefetch::PLDL2STRM,

    ARMEmitter::Prefetch::PLDL3KEEP,
    ARMEmitter::Prefetch::PLDL3STRM,

    // Gap of two.
    // 0b0'11'0
    ARMEmitter::Prefetch::PLDL1STRM,
    // 0b0'11'1
    ARMEmitter::Prefetch::PLDL1STRM,

    ARMEmitter::Prefetch::PSTL1KEEP,
    ARMEmitter::Prefetch::PSTL1STRM,

    ARMEmitter::Prefetch::PSTL2KEEP,
    ARMEmitter::Prefetch::PSTL2STRM,

    ARMEmitter::Prefetch::PSTL3KEEP,
    ARMEmitter::Prefetch::PSTL3STRM,
  };

  prfm(PrefetchType[LUT], MemSrc);
}

DEF_OP(VStoreNonTemporal) {
  const auto Op = IROp->C<IR::IROp_VStoreNonTemporal>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Value = GetVReg(Op->Value);
  const auto MemReg = GetReg(Op->Addr);
  const auto Offset = Op->Offset;

  if (Is256Bit) {
    const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
    const auto OffsetScaled = Offset / 32;
    stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled);
  } else if (Is128Bit && HostSupportsSVE128) {
    const auto GoverningPredicate = PRED_TMP_16B.Zeroing();
    const auto OffsetScaled = Offset / 16;
    stnt1b(Value.Z(), GoverningPredicate, MemReg, OffsetScaled);
  } else {
    // Treat the non-temporal store as a regular vector store in this case for compatibility
    str(Value.Q(), MemReg, Offset);
  }
}

DEF_OP(VStoreNonTemporalPair) {
  const auto Op = IROp->C<IR::IROp_VStoreNonTemporalPair>();
  const auto OpSize = IROp->Size;

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;
  LOGMAN_THROW_A_FMT(Is128Bit, "This IR operation only operates at 128-bit wide");

  const auto ValueLow = GetVReg(Op->ValueLow);
  const auto ValueHigh = GetVReg(Op->ValueHigh);

  const auto MemReg = GetReg(Op->Addr);
  const auto Offset = Op->Offset;

  stnp(ValueLow.Q(), ValueHigh.Q(), MemReg, Offset);
}

DEF_OP(VLoadNonTemporal) {
  const auto Op = IROp->C<IR::IROp_VLoadNonTemporal>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto MemReg = GetReg(Op->Addr);
  const auto Offset = Op->Offset;

  if (Is256Bit) {
    const auto GoverningPredicate = PRED_TMP_32B.Zeroing();
    const auto OffsetScaled = Offset / 32;
    ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
  } else if (Is128Bit && HostSupportsSVE128) {
    const auto GoverningPredicate = PRED_TMP_16B.Zeroing();
    const auto OffsetScaled = Offset / 16;
    ldnt1b(Dst.Z(), GoverningPredicate, MemReg, OffsetScaled);
  } else {
    // Treat the non-temporal store as a regular vector store in this case for compatibility
    ldr(Dst.Q(), MemReg, Offset);
  }
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/MiscOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#ifndef _WIN32
#include <syscall.h>
#endif

#include "Interface/Context/Context.h"
#include "Interface/Core/JIT/DebugData.h"
#include "Interface/Core/JIT/JITClass.h"

#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/EnumUtils.h>

namespace FEXCore::CPU {

DEF_OP(WFET) {
  auto Op = IROp->C<IR::IROp_WFET>();
  const auto Lower = GetReg(Op->Lower);
  const auto Upper = GetReg(Op->Upper);

  // Combine registers.
  mov(ARMEmitter::Size::i64Bit, TMP1, Lower);
  bfi(ARMEmitter::Size::i64Bit, TMP1, Upper, 32, 32);
  if (CTX->Config.TSCScale) {
    // Scale back to ARM64 TSC scale if necessary
    lsr(ARMEmitter::Size::i64Bit, TMP1, TMP1, CTX->Config.TSCScale);
  }

  // Clear the exclusive monitor so it can't spuriously wake up with that event.
  clrex();

  // Execute wfet to wait until the TSC.
  wfet(TMP1);
}

DEF_OP(GuestOpcode) {
  auto Op = IROp->C<IR::IROp_GuestOpcode>();
  // metadata
  DebugData->GuestOpcodes.push_back({Op->GuestEntryOffset, GetCursorAddress<uint8_t*>() - CodeData.BlockBegin});
}

DEF_OP(Fence) {
  auto Op = IROp->C<IR::IROp_Fence>();
  switch (Op->Fence) {
  case IR::FenceType::Load: dmb(ARMEmitter::BarrierScope::LD); break;
  case IR::FenceType::LoadStore: dmb(ARMEmitter::BarrierScope::SY); break;
  case IR::FenceType::Store: dmb(ARMEmitter::BarrierScope::ST); break;
  case IR::FenceType::Inst: isb(); break;
  default: LOGMAN_MSG_A_FMT("Unknown Fence: {}", Op->Fence); break;
  }
}

DEF_OP(Break) {
  auto Op = IROp->C<IR::IROp_Break>();

  // First we must reset the stack
  ResetStack();

  Core::CpuStateFrame::SynchronousFaultDataStruct State = {
    .FaultToTopAndGeneratedException = 1,
    .Signal = Op->Reason.Signal,
    .TrapNo = Op->Reason.TrapNumber,
    .si_code = Op->Reason.si_code,
    .err_code = Op->Reason.ErrorRegister,
  };

  uint64_t Constant {};
  memcpy(&Constant, &State, sizeof(State));

  LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, Constant);
  str(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CpuStateFrame, SynchronousFaultData));

  switch (Op->Reason.Signal) {
  case Core::FAULT_SIGILL:
    ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.GuestSignal_SIGILL));
    br(TMP1);
    break;
  case Core::FAULT_SIGTRAP:
    ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.GuestSignal_SIGTRAP));
    br(TMP1);
    break;
  case Core::FAULT_SIGSEGV:
    ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.GuestSignal_SIGSEGV));
    br(TMP1);
    break;
  default:
    ldr(TMP1, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.GuestSignal_SIGTRAP));
    br(TMP1);
    break;
  }
}

DEF_OP(GetRoundingMode) {
  auto Dst = GetReg(Node);
  mrs(Dst, ARMEmitter::SystemRegister::FPCR);
  ubfx(ARMEmitter::Size::i64Bit, Dst, Dst, 22, 3);

  // FTZ is already in the correct location
  // Rounding mode is different
  //
  // Need to remap rounding mode from order nearest, pos inf, neg inf, toward
  // zero. Just swapping 01 and 10. That's a bitfield reverse. Round mode is in
  // bottom two bits. After reversing as a 32-bit operation, it'll be in [31:30]
  // and ripe for reinsertion back at 0.
  static_assert(FEXCore::ToUnderlying(IR::RoundMode::Nearest) == 0);
  static_assert(FEXCore::ToUnderlying(IR::RoundMode::NegInfinity) == 1);
  static_assert(FEXCore::ToUnderlying(IR::RoundMode::PosInfinity) == 2);
  static_assert(FEXCore::ToUnderlying(IR::RoundMode::TowardsZero) == 3);

  rbit(ARMEmitter::Size::i32Bit, TMP1, Dst);
  bfi(ARMEmitter::Size::i64Bit, Dst, TMP1, 30, 2);
}

DEF_OP(SetRoundingMode) {
  auto Op = IROp->C<IR::IROp_SetRoundingMode>();
  auto Src = GetReg(Op->RoundMode);
  auto MXCSR = GetReg(Op->MXCSR);

  // As above, setup the rounding flags in [31:30]
  rbit(ARMEmitter::Size::i32Bit, TMP2, Src);
  // and extract
  lsr(ARMEmitter::Size::i32Bit, TMP2, TMP2, 30);

  mrs(TMP1, ARMEmitter::SystemRegister::FPCR);

  // vixl simulator doesn't support anything beyond ties-to-even rounding
  if (CTX->Config.DisableVixlIndirectCalls) [[likely]] {
    // Insert the rounding flags
    bfi(ARMEmitter::Size::i64Bit, TMP1, TMP2, 22, 2);
  }

  // Insert the FTZ flag
  lsr(ARMEmitter::Size::i64Bit, TMP2, Src, 2);
  bfi(ARMEmitter::Size::i64Bit, TMP1, TMP2, 24, 1);

  if (Op->SetDAZ && HostSupportsAFP) {
    // Extract DAZ from MXCSR and insert to in FPCR.FIZ
    bfxil(ARMEmitter::Size::i64Bit, TMP1, MXCSR, 6, 1);
  }

  // Now save the new FPCR
  msr(ARMEmitter::SystemRegister::FPCR, TMP1);
}

DEF_OP(PushRoundingMode) {
  auto Op = IROp->C<IR::IROp_PushRoundingMode>();
  auto Dest = GetReg(Node);

  // Save the old rounding mode
  mrs(Dest, ARMEmitter::SystemRegister::FPCR);

  // vixl simulator doesn't support anything beyond ties-to-even rounding
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    return;
  }

  // Insert the rounding flags, reversing the mode bits as above
  if (Op->RoundMode == 3) {
    orr(ARMEmitter::Size::i64Bit, TMP1, Dest, 3 << 22);
  } else if (Op->RoundMode == 0) {
    and_(ARMEmitter::Size::i64Bit, TMP1, Dest, ~(3 << 22));
  } else {
    LOGMAN_THROW_A_FMT(Op->RoundMode == 1 || Op->RoundMode == 2, "expect a valid round mode");

    and_(ARMEmitter::Size::i64Bit, TMP1, Dest, ~(Op->RoundMode << 22));
    orr(ARMEmitter::Size::i64Bit, TMP1, TMP1, (Op->RoundMode == 2 ? 1 : 2) << 22);
  }

  // Now save the new FPCR
  msr(ARMEmitter::SystemRegister::FPCR, TMP1);
}

DEF_OP(PopRoundingMode) {
  auto Op = IROp->C<IR::IROp_PopRoundingMode>();
  msr(ARMEmitter::SystemRegister::FPCR, GetReg(Op->FPCR));
}

DEF_OP(Print) {
  auto Op = IROp->C<IR::IROp_Print>();

  PushDynamicRegs(TMP1);
  SpillStaticRegs(TMP1);

  if (IsGPR(Op->Value)) {
    mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GetReg(Op->Value));
    ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.PrintValue));
  } else {
    fmov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, GetVReg(Op->Value), false);
    fmov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, GetVReg(Op->Value), true);
    ldr(ARMEmitter::XReg::x3, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.PrintVectorValue));
  }

  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    if (IsGPR(Op->Value)) {
      GenerateIndirectRuntimeCall<void, uint64_t>(ARMEmitter::Reg::r3);
    } else {
      GenerateIndirectRuntimeCall<void, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
    }
  } else {
    blr(ARMEmitter::Reg::r3);
  }

  FillStaticRegs();
  PopDynamicRegs();
}

DEF_OP(ProcessorID) {
  if (CTX->HostFeatures.SupportsCPUIndexInTPIDRRO) {
    mrs(GetReg(Node), ARMEmitter::SystemRegister::TPIDRRO_EL0);
    return;
  }
#ifdef _WIN32
  else {
    // If on Windows and TPIDRRO isn't supported (like in wine), then this is a programming error.
    ERROR_AND_DIE_FMT("Unsupported");
  }
#else
  // We always need to spill x8 since we can't know if it is live at this SSA location
  uint32_t SpillMask = 1U << 8;

  // Ordering is incredibly important here
  // We must spill any overlapping registers first THEN claim we are in a syscall without invalidating state at all
  // Only spill the registers that intersect with our usage
  SpillStaticRegs(TMP1, {
                          .GPRSpillMask = SpillMask,
                          .FPRs = false,
                        });

  // Now that we are spilled, store in the state that we are in a syscall
  // Still without overwriting registers that matter
  // 16bit LoadConstant to be a single instruction
  // We must always spill at least one register (x8) so this value always has a bit set
  // This gives the signal handler a value to check to see if we are in a syscall at all
  LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, SpillMask & 0xFFFF);
  str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo));

  // Allocate some temporary space for storing the uint32_t CPU and Node IDs
  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, 16);

  // Load the getcpu syscall number
#if defined(ARCHITECTURE_x86_64)
  // Just to ensure the syscall number doesn't change if compiled for an x86_64 host.
  constexpr auto GetCPUSyscallNum = 0xa8;
#else
  constexpr auto GetCPUSyscallNum = SYS_getcpu;
#endif
  LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r8, GetCPUSyscallNum);

  // CPU pointer in x0
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::rsp, 0);
  // Node in x1
  add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, ARMEmitter::Reg::rsp, 4);

  svc(0);
  // On updated signal mask we can receive a signal RIGHT HERE

  // Load the values returned by the kernel
  ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::WReg::w0, ARMEmitter::WReg::w1, ARMEmitter::Reg::rsp);
  // Deallocate stack space
  sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::rsp, 16);

  // Now that we are done in the syscall we need to carefully peel back the state
  // First unspill the registers from before

  FillStaticRegs({
    .OptionalReg = ARMEmitter::Reg::r8,
    .OptionalReg2 = ARMEmitter::Reg::r2,
    .GPRFillMask = SpillMask,
    .FPRs = false,
  });

  // Now the registers we've spilled are back in their original host registers
  // We can safely claim we are no longer in a syscall
  str(ARMEmitter::XReg::zr, STATE, offsetof(FEXCore::Core::CpuStateFrame, InSyscallInfo));

  // Now store the result in the destination in the expected format
  // uint32_t Res = (node << 12) | cpu;
  // CPU is in w0
  // Node is in w1
  orr(ARMEmitter::Size::i64Bit, GetReg(Node), ARMEmitter::Reg::r0, ARMEmitter::Reg::r1, ARMEmitter::ShiftType::LSL, 12);
#endif
}

DEF_OP(RDRAND) {
  auto Op = IROp->C<IR::IROp_RDRAND>();

  mrs(GetReg(Node), Op->GetReseeded ? ARMEmitter::SystemRegister::RNDRRS : ARMEmitter::SystemRegister::RNDR);
}

DEF_OP(Yield) {
  yield();
}

DEF_OP(MonoBackpatcherWrite) {
  auto Op = IROp->C<IR::IROp_MonoBackpatcherWrite>();

  mov(ARMEmitter::Size::i64Bit, TMP3, GetReg(Op->Addr));
  mov(ARMEmitter::Size::i64Bit, TMP4, GetReg(Op->Value));

  PushDynamicRegs(TMP1);
  SpillStaticRegs(TMP1);

  mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, STATE.R());
  mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, IR::OpSizeToSize(Op->Size));

  if (!TMP_ABIARGS) {
    mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, TMP3);
    mov(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, TMP4);
  }

#ifdef ARCHITECTURE_arm64ec
  ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET);
  LoadConstant(ARMEmitter::Size::i32Bit, TMP1, 1);
  strb(TMP1.W(), TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET);
#endif

  ldr(ARMEmitter::XReg::x4, STATE, offsetof(FEXCore::Core::CpuStateFrame, Pointers.MonoBackpatcherWrite));
  if (!CTX->Config.DisableVixlIndirectCalls) [[unlikely]] {
    GenerateIndirectRuntimeCall<void, void*, uint8_t, uint64_t, uint64_t>(ARMEmitter::Reg::r4);
  } else {
    blr(ARMEmitter::Reg::r4);
  }

#ifdef ARCHITECTURE_arm64ec
  ldr(TMP2, ARMEmitter::XReg::x18, TEB_CPU_AREA_OFFSET);
  strb(ARMEmitter::WReg::zr, TMP2, CPU_AREA_IN_SYSCALL_CALLBACK_OFFSET);
#endif

  FillStaticRegs();
  PopDynamicRegs();
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/MoveOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Core/JIT/JITClass.h"

namespace FEXCore::CPU {
DEF_OP(Copy) {
  auto Op = IROp->C<IR::IROp_Copy>();

  mov(ARMEmitter::Size::i64Bit, GetReg(Node), GetReg(Op->Source));
}

DEF_OP(RMWHandle) {
  mov(ARMEmitter::Size::i64Bit, GetReg(Node), GetReg(IROp->Args[0]));
}

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/Relocations.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>

namespace FEXCore::Context {
class ContextImpl;
}

namespace FEXCore::CPU {
enum class RelocationTypes : uint32_t {
  // 8 byte literal in memory for symbol
  // Aligned to struct RelocNamedSymbolLiteral
  RELOC_NAMED_SYMBOL_LITERAL,

  // Fixed size named thunk move
  // 4 instruction constant generation
  // Aligned to struct RelocNamedThunkMove
  RELOC_NAMED_THUNK_MOVE,

  // 8 byte literal (relative to binary base address)
  RELOC_GUEST_RIP_LITERAL,

  // Fixed size guest RIP move
  // 4 instruction constant generation
  // Aligned to struct RelocGuestRIP
  RELOC_GUEST_RIP_MOVE,
};

struct FEX_PACKED RelocationHeader final {
  // Offset to the relocated host code data
  uint64_t Offset {};

  RelocationTypes Type;
};

struct RelocNamedSymbolLiteral final {
  enum class NamedSymbol : uint32_t {
    ///< Thread specific relocations
    // JIT Literal pointers
    SYMBOL_LITERAL_EXITFUNCTION_LINKER,
  };

  RelocationHeader Header {};

  NamedSymbol Symbol;

  uint32_t Pad[8];
};

struct RelocNamedThunkMove final {
  RelocationHeader Header {};

  // GPR index the constant is being moved to
  uint32_t RegisterIndex;

  // The thunk SHA256 hash
  IR::SHA256Sum Symbol;
};

struct RelocGuestRIP final {
  RelocationHeader Header {};

  // GPR index the constant is being moved to (for non-literal relocations)
  uint8_t RegisterIndex;

  char Pad[3];

  // The base RIP (to be moved by the register for non-literal relocations).
  // In a serialized code cache, this is relative to the binary base address.
  uint64_t GuestRIP;

  uint32_t pad2[6] {};
};

union Relocation {
  // Clang 16 Can't default-initialize this union
  static Relocation Default() {
#if __clang_major__ < 17
    Relocation Ret {.Header {}};
    memset(&Ret, 0, sizeof(Ret));
    return Ret;
#else
    return {};
#endif
  }

  RelocationHeader Header {};

  RelocNamedSymbolLiteral NamedSymbolLiteral;
  // This makes our union of relocations at least 48 bytes
  // It might be more efficient to not use a union
  RelocNamedThunkMove NamedThunkMove;

  RelocGuestRIP GuestRIP;
};

uint64_t GetNamedSymbolLiteral(FEXCore::Context::ContextImpl&, RelocNamedSymbolLiteral::NamedSymbol);

} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/JIT/VectorOps.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: backend|arm64
$end_info$
*/

#include "Interface/Core/Dispatcher/Dispatcher.h"
#include "Interface/Core/JIT/JITClass.h"

#include <FEXCore/Utils/MathUtils.h>

namespace FEXCore::CPU {

#define DEF_UNOP(FEXOp, ARMOp, ScalarCase)                                                                                          \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
                                                                                                                                    \
    const auto ElementSize = Op->Header.ElementSize;                                                                                \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                               \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto Src = GetVReg(Op->Vector);                                                                                           \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());                                                                  \
    } else {                                                                                                                        \
      if (ElementSize == OpSize && ScalarCase) {                                                                                    \
        ARMOp(SubRegSize, Dst.D(), Src.D());                                                                                        \
      } else {                                                                                                                      \
        ARMOp(SubRegSize, Dst.Q(), Src.Q());                                                                                        \
      }                                                                                                                             \
    }                                                                                                                               \
  }

#define DEF_BITOP(FEXOp, ARMOp)                                                                                                     \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    const auto Is128Bit = OpSize == IR::OpSize::i128Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto Vector1 = GetVReg(Op->Vector1);                                                                                      \
    const auto Vector2 = GetVReg(Op->Vector2);                                                                                      \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(Dst.Z(), Vector1.Z(), Vector2.Z());                                                                                     \
    } else if (Is128Bit) {                                                                                                          \
      ARMOp(Dst.Q(), Vector1.Q(), Vector2.Q());                                                                                     \
    } else {                                                                                                                        \
      ARMOp(Dst.D(), Vector1.D(), Vector2.D());                                                                                     \
    }                                                                                                                               \
  }

#define DEF_BINOP(FEXOp, ARMOp)                                                                                                     \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
                                                                                                                                    \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                               \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto Vector1 = GetVReg(Op->Vector1);                                                                                      \
    const auto Vector2 = GetVReg(Op->Vector2);                                                                                      \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());                                                                         \
    } else {                                                                                                                        \
      ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());                                                                         \
    }                                                                                                                               \
  }

#define DEF_ZIPOP(FEXOp, ARMOp)                                                                                                     \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
                                                                                                                                    \
    const auto SubRegSize = ConvertSubRegSize8(IROp);                                                                               \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto VectorLower = GetVReg(Op->VectorLower);                                                                              \
    const auto VectorUpper = GetVReg(Op->VectorUpper);                                                                              \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(SubRegSize, Dst.Z(), VectorLower.Z(), VectorUpper.Z());                                                                 \
    } else {                                                                                                                        \
      if (OpSize == IR::OpSize::i64Bit) {                                                                                           \
        ARMOp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D());                                                               \
      } else {                                                                                                                      \
        ARMOp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q());                                                               \
      }                                                                                                                             \
    }                                                                                                                               \
  }

#define DEF_FUNOP(FEXOp, ARMOp)                                                                                                     \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
                                                                                                                                    \
    const auto ElementSize = Op->Header.ElementSize;                                                                                \
    const auto SubRegSize = ConvertSubRegSize248(IROp);                                                                             \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto Src = GetVReg(Op->Vector);                                                                                           \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(SubRegSize, Dst.Z(), PRED_TMP_32B.Merging(), Src.Z());                                                                  \
    } else {                                                                                                                        \
      if (ElementSize == OpSize) {                                                                                                  \
        switch (ElementSize) {                                                                                                      \
        case IR::OpSize::i16Bit: {                                                                                                  \
          ARMOp(Dst.H(), Src.H());                                                                                                  \
          break;                                                                                                                    \
        }                                                                                                                           \
        case IR::OpSize::i32Bit: {                                                                                                  \
          ARMOp(Dst.S(), Src.S());                                                                                                  \
          break;                                                                                                                    \
        }                                                                                                                           \
        case IR::OpSize::i64Bit: {                                                                                                  \
          ARMOp(Dst.D(), Src.D());                                                                                                  \
          break;                                                                                                                    \
        }                                                                                                                           \
        default: break;                                                                                                             \
        }                                                                                                                           \
      } else {                                                                                                                      \
        ARMOp(SubRegSize, Dst.Q(), Src.Q());                                                                                        \
      }                                                                                                                             \
    }                                                                                                                               \
  }

#define DEF_FBINOP(FEXOp, ARMOp)                                                                                                    \
  DEF_OP(FEXOp) {                                                                                                                   \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                                    \
    const auto OpSize = IROp->Size;                                                                                                 \
                                                                                                                                    \
    const auto ElementSize = Op->Header.ElementSize;                                                                                \
    const auto SubRegSize = ConvertSubRegSize248(IROp);                                                                             \
    const auto Is256Bit = OpSize == IR::OpSize::i256Bit;                                                                            \
    LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__); \
    const auto IsScalar = ElementSize == OpSize;                                                                                    \
                                                                                                                                    \
    const auto Dst = GetVReg(Node);                                                                                                 \
    const auto Vector1 = GetVReg(Op->Vector1);                                                                                      \
    const auto Vector2 = GetVReg(Op->Vector2);                                                                                      \
                                                                                                                                    \
    if (HostSupportsSVE256 && Is256Bit) {                                                                                           \
      ARMOp(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());                                                                         \
    } else {                                                                                                                        \
      if (IsScalar) {                                                                                                               \
        switch (ElementSize) {                                                                                                      \
        case IR::OpSize::i16Bit: {                                                                                                  \
          ARMOp(Dst.H(), Vector1.H(), Vector2.H());                                                                                 \
          break;                                                                                                                    \
        }                                                                                                                           \
        case IR::OpSize::i32Bit: {                                                                                                  \
          ARMOp(Dst.S(), Vector1.S(), Vector2.S());                                                                                 \
          break;                                                                                                                    \
        }                                                                                                                           \
        case IR::OpSize::i64Bit: {                                                                                                  \
          ARMOp(Dst.D(), Vector1.D(), Vector2.D());                                                                                 \
          break;                                                                                                                    \
        }                                                                                                                           \
        default: break;                                                                                                             \
        }                                                                                                                           \
      } else {                                                                                                                      \
        ARMOp(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());                                                                       \
      }                                                                                                                             \
    }                                                                                                                               \
  }

#define DEF_FBINOP_SCALAR_INSERT(FEXOp, ARMOp)                                                                                \
  DEF_OP(FEXOp) {                                                                                                             \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                              \
    const auto ElementSize = Op->Header.ElementSize;                                                                          \
    const auto SubRegSize = ConvertSubRegSizePair248(IROp);                                                                   \
                                                                                                                              \
    auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) { \
      ARMOp(SubRegSize.Scalar, Dst, Src1, Src2);                                                                              \
    };                                                                                                                        \
                                                                                                                              \
    const auto Dst = GetVReg(Node);                                                                                           \
    const auto Vector1 = GetVReg(Op->Vector1);                                                                                \
    const auto Vector2 = GetVReg(Op->Vector2);                                                                                \
                                                                                                                              \
    VFScalarOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);                         \
  }

#define DEF_FMAOP_SCALAR_INSERT(FEXOp, ARMOp)                                                                                \
  DEF_OP(FEXOp) {                                                                                                            \
    const auto Op = IROp->C<IR::IROp_##FEXOp>();                                                                             \
    const auto ElementSize = Op->Header.ElementSize;                                                                         \
                                                                                                                             \
    auto ScalarEmit = [this, ElementSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2, \
                                          ARMEmitter::VRegister Src3) {                                                      \
      if (ElementSize == IR::OpSize::i16Bit) {                                                                               \
        ARMOp(Dst.H(), Src1.H(), Src2.H(), Src3.H());                                                                        \
      } else if (ElementSize == IR::OpSize::i32Bit) {                                                                        \
        ARMOp(Dst.S(), Src1.S(), Src2.S(), Src3.S());                                                                        \
      } else if (ElementSize == IR::OpSize::i64Bit) {                                                                        \
        ARMOp(Dst.D(), Src1.D(), Src2.D(), Src3.D());                                                                        \
      }                                                                                                                      \
    };                                                                                                                       \
                                                                                                                             \
    const auto Dst = GetVReg(Node);                                                                                          \
    const auto Upper = GetVReg(Op->Upper);                                                                                   \
    const auto Vector1 = GetVReg(Op->Vector1);                                                                               \
    const auto Vector2 = GetVReg(Op->Vector2);                                                                               \
    const auto Addend = GetVReg(Op->Addend);                                                                                 \
                                                                                                                             \
    VFScalarFMAOperation(IROp->Size, ElementSize, ScalarEmit, Dst, Upper, Vector1, Vector2, Addend);                         \
  }

DEF_UNOP(VAbs, abs, true)
DEF_UNOP(VPopcount, cnt, true)
DEF_UNOP(VNeg, neg, false)
DEF_UNOP(VFNeg, fneg, false)

DEF_BITOP(VAnd, and_)
DEF_BITOP(VAndn, bic)
DEF_BITOP(VOr, orr)
DEF_BITOP(VXor, eor)

DEF_BINOP(VAdd, add)
DEF_BINOP(VSub, sub)
DEF_BINOP(VUQAdd, uqadd)
DEF_BINOP(VUQSub, uqsub)
DEF_BINOP(VSQAdd, sqadd)
DEF_BINOP(VSQSub, sqsub)

DEF_ZIPOP(VZip, zip1)
DEF_ZIPOP(VZip2, zip2)
DEF_ZIPOP(VUnZip, uzp1)
DEF_ZIPOP(VUnZip2, uzp2)
DEF_ZIPOP(VTrn, trn1)
DEF_ZIPOP(VTrn2, trn2)

DEF_FUNOP(VFSqrt, fsqrt)
DEF_FUNOP(VFAbs, fabs)

DEF_FBINOP(VFAdd, fadd)
DEF_FBINOP(VFSub, fsub)
DEF_FBINOP(VFMul, fmul)

DEF_FBINOP_SCALAR_INSERT(VFAddScalarInsert, fadd)
DEF_FBINOP_SCALAR_INSERT(VFSubScalarInsert, fsub)
DEF_FBINOP_SCALAR_INSERT(VFMulScalarInsert, fmul)
DEF_FBINOP_SCALAR_INSERT(VFDivScalarInsert, fdiv)

DEF_FMAOP_SCALAR_INSERT(VFMLAScalarInsert, fmadd)
DEF_FMAOP_SCALAR_INSERT(VFMLSScalarInsert, fnmsub)
DEF_FMAOP_SCALAR_INSERT(VFNMLAScalarInsert, fmsub)
DEF_FMAOP_SCALAR_INSERT(VFNMLSScalarInsert, fnmadd)

void Arm64JITCore::VFScalarFMAOperation(IR::OpSize OpSize, IR::OpSize ElementSize, ScalarFMAOpCaller ScalarEmit, ARMEmitter::VRegister Dst,
                                        ARMEmitter::VRegister Upper, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2,
                                        ARMEmitter::VRegister Addend) {
  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit, "256-bit unsupported", __func__);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit || ElementSize == IR::OpSize::i64Bit, "Invalid "
                                                                                                                                  "size");
  const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
                                                       ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
                                                                                           ARMEmitter::SubRegSize::i64Bit);

  if (Dst != Upper) {
    // If destination is not tied, move the upper bits to the destination first.
    mov(Dst.Q(), Upper.Q());
  }

  if (HostSupportsAFP && Dst == Addend) {
    ///< Exactly matches ARM scalar FMA semantics
    // If the host CPU supports AFP then scalar does an insert without modifying upper bits.
    ScalarEmit(Dst, Vector1, Vector2, Addend);
  } else {
    // Host doesn't support AFP, need to emit in to a temporary then insert.
    ScalarEmit(VTMP1, Vector1, Vector2, Addend);
    ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
  }
}

// VFScalarOperation performs the operation described through ScalarEmit between Vector1 and Vector2,
// storing it into Dst. This is a scalar operation, so the only lowest element of each vector is used for the operation.
// The result is stored into the destination. The untouched bits of the destination come from Vector1, unless it's a 256 vector
// and ZeroUpperBits is true, in which case the upper bits are zero.
void Arm64JITCore::VFScalarOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarBinaryOpCaller ScalarEmit,
                                     ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1, ARMEmitter::VRegister Vector2) {
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__);

  // Bit of a tricky detail.
  // The upper bits of the destination comes from Vector1.
  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit || ElementSize == IR::OpSize::i64Bit, "Invalid "
                                                                                                                                  "size");
  const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
                                                       ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
                                                                                           ARMEmitter::SubRegSize::i64Bit);

  constexpr auto Predicate = ARMEmitter::PReg::p0;

  if (Dst == Vector1) {
    if (ZeroUpperBits) {
      // When zeroing the upper 128-bits we just use an ASIMD move.
      mov(Dst.Q(), Vector1.Q());
    }

    if (HostSupportsAFP) {
      // If the host CPU supports AFP then scalar does an insert without modifying upper bits.
      ScalarEmit(Dst, Vector1, Vector2);
    } else {
      // If AFP is unsupported then the operation result goes in to a temporary.
      // and then it gets inserted.
      ScalarEmit(VTMP1, Vector1, Vector2);
      if (!ZeroUpperBits && Is256Bit) {
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  } else if (Dst != Vector2) { // Dst different from both Vector1 and Vector2
    if (Is256Bit && !ZeroUpperBits) {
      mov(Dst.Z(), Vector1.Z());
    } else {
      mov(Dst.Q(), Vector1.Q());
    }

    if (HostSupportsAFP) {
      ScalarEmit(Dst, Vector1, Vector2);
    } else {
      ScalarEmit(VTMP1, Vector1, Vector2);
      if (!ZeroUpperBits && Is256Bit) {
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  } else { // Dst same as Vector2

    ScalarEmit(VTMP1, Vector1, Vector2);

    if (!ZeroUpperBits && Is256Bit) {
      mov(Dst.Z(), Vector1.Z());
      ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
      mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
    } else {
      mov(Dst.Q(), Vector1.Q());
      ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
    }
  }
}

// Similarly to VFScalarOperation it performs the operation described through ScalarEmit operating on Vector2.
// However the result of the scalar operation is inserted into Vector1 and moved to Destination.
// The untouched bits of the destination come from Vector1, unless it's a 256 vector
// and ZeroUpperBits is true, in which case the upper bits are zero.
void Arm64JITCore::VFScalarUnaryOperation(IR::OpSize OpSize, IR::OpSize ElementSize, bool ZeroUpperBits, ScalarUnaryOpCaller ScalarEmit,
                                          ARMEmitter::VRegister Dst, ARMEmitter::VRegister Vector1,
                                          std::variant<ARMEmitter::VRegister, ARMEmitter::Register> Vector2) {
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  LOGMAN_THROW_A_FMT(Is256Bit || !ZeroUpperBits, "128-bit operation doesn't support ZeroUpperBits in {}", __func__);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i16Bit || ElementSize == IR::OpSize::i32Bit || ElementSize == IR::OpSize::i64Bit, "Invalid "
                                                                                                                                  "size");
  const auto SubRegSize = ARMEmitter::ToVectorSizePair(ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i16Bit :
                                                       ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i32Bit :
                                                                                           ARMEmitter::SubRegSize::i64Bit);

  constexpr auto Predicate = ARMEmitter::PReg::p0;
  bool DstOverlapsVector2 = false;
  if (const auto* Vector2Reg = std::get_if<ARMEmitter::VRegister>(&Vector2)) {
    DstOverlapsVector2 = Dst == *Vector2Reg;
  }

  if (Dst == Vector1) {
    if (ZeroUpperBits) {
      // When zeroing the upper 128-bits we just use an ASIMD move.
      mov(Dst.Q(), Vector1.Q());
    }

    if (HostSupportsAFP) { // or Dst (here Dst == Vector1)
      // If the host CPU supports AFP then scalar does an insert without modifying upper bits.
      ScalarEmit(Dst, Vector2);
    } else {
      // If AFP is unsupported then the operation result goes in to a temporary.
      // and then it gets inserted.
      ScalarEmit(VTMP1, Vector2);
      if (!ZeroUpperBits && Is256Bit) {
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  } else if (!DstOverlapsVector2) {
    if (!ZeroUpperBits && Is256Bit) {
      mov(Dst.Z(), Vector1.Z());
    } else {
      mov(Dst.Q(), Vector1.Q());
    }

    if (HostSupportsAFP) {
      ScalarEmit(Dst, Vector2);
    } else {
      ScalarEmit(VTMP1, Vector2);
      if (!ZeroUpperBits && Is256Bit) {
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  } else {
    // Destination intersects Vector2, can't do anything optimal in this case.
    // Do the scalar operation first and then move and insert.
    ScalarEmit(VTMP1, Vector2);

    if (!ZeroUpperBits && Is256Bit) {
      mov(Dst.Z(), Vector1.Z());
      ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
      mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
    } else {
      mov(Dst.Q(), Vector1.Q());
      ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
    }
  }
}

DEF_OP(VFMinScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFMinScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
    if (HostSupportsAFP) {
      // AFP.AH lets fmin behave like x86 min
      fmin(SubRegSize.Scalar, Dst, Src1, Src2);
    } else {
      // Only take the first operand if it is strictly less. Otherwise take
      // the second. This emulates all the weird x86 rules for signed zero and
      // NaNs. No, they're not IEEE-754 semantics.
      fcmp(SubRegSize.Scalar, Src1, Src2);
      fcsel(SubRegSize.Scalar, Dst, Src1, Src2, ARMEmitter::Condition::CC_MI);
    }
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
}

DEF_OP(VFMaxScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFMaxScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  // AFP can make this more optimal.
  auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
    if (HostSupportsAFP) {
      // AFP.AH lets fmax behave like x86 max
      fmax(SubRegSize.Scalar, Dst, Src1, Src2);
    } else {
      // Only take the first operand if it is strictly greater. See fmin.
      fcmp(SubRegSize.Scalar, Src1, Src2);
      fcsel(SubRegSize.Scalar, Dst, Src1, Src2, ARMEmitter::Condition::CC_GT);
    }
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
}

DEF_OP(VFSqrtScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFSqrtScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);
    fsqrt(SubRegSize.Scalar, Dst, Src);
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
}

DEF_OP(VFRSqrtScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFRSqrtScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);

    fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f);
    fsqrt(SubRegSize.Scalar, VTMP2, Src);
    if (HostSupportsAFP) {
      fdiv(SubRegSize.Scalar, VTMP1, VTMP1, VTMP2);
      ins(SubRegSize.Vector, Dst, 0, VTMP1, 0);
    } else {
      fdiv(SubRegSize.Scalar, Dst, VTMP1, VTMP2);
    }
  };

  auto ScalarEmitRPRES = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);
    frsqrte(SubRegSize.Scalar, Dst.D(), Src.D());
  };

  std::array<ScalarUnaryOpCaller, 2> Handlers = {
    ScalarEmit,
    ScalarEmitRPRES,
  };
  const auto HandlerIndex = ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES ? 1 : 0;

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, Handlers[HandlerIndex], Dst, Vector1, Vector2);
}

DEF_OP(VFRecpScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFRecpScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  auto ScalarEmit = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);

    fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f);
    if (HostSupportsAFP) {
      fdiv(SubRegSize.Scalar, VTMP1, VTMP1, Src);
      ins(SubRegSize.Vector, Dst, 0, VTMP1, 0);
    } else {
      fdiv(SubRegSize.Scalar, Dst, VTMP1, Src);
    }
  };

  auto ScalarEmitRPRES = [this, SubRegSize](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);
    frecpe(SubRegSize.Scalar, Dst, Src);
  };

  std::array<ScalarUnaryOpCaller, 2> Handlers = {
    ScalarEmit,
    ScalarEmitRPRES,
  };
  const auto HandlerIndex = ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES ? 1 : 0;

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, Handlers[HandlerIndex], Dst, Vector1, Vector2);
}

DEF_OP(VFToFScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFToFScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const uint16_t Conv = (IR::OpSizeToSize(Op->Header.ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto ScalarEmit = [this, Conv](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);

    switch (Conv) {
    case 0x0204: { // Half <- Float
      fcvt(Dst.H(), Src.S());
      break;
    }
    case 0x0208: { // Half <- Double
      fcvt(Dst.H(), Src.D());
      break;
    }
    case 0x0402: { // Float <- Half
      fcvt(Dst.S(), Src.H());
      break;
    }
    case 0x0802: { // Double <- Half
      fcvt(Dst.D(), Src.H());
      break;
    }
    case 0x0804: { // Double <- Float
      fcvt(Dst.D(), Src.S());
      break;
    }
    case 0x0408: { // Float <- Double
      fcvt(Dst.S(), Src.D());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unknown FCVT sizes: 0x{:x}", Conv);
    }
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
}

DEF_OP(VSToFVectorInsert) {
  const auto Op = IROp->C<IR::IROp_VSToFVectorInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto HasTwoElements = Op->HasTwoElements;

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i32Bit || ElementSize == IR::OpSize::i64Bit, "Invalid size");
  if (HasTwoElements) {
    LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i32Bit, "Can't have two elements for 8-byte size");
  }

  auto ScalarEmit = [this, ElementSize, HasTwoElements](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);
    if (ElementSize == IR::OpSize::i32Bit) {
      if (HasTwoElements) {
        scvtf(ARMEmitter::SubRegSize::i32Bit, Dst.D(), Src.D());
      } else {
        scvtf(ARMEmitter::ScalarRegSize::i32Bit, Dst.S(), Src.S());
      }
    } else {
      scvtf(ARMEmitter::ScalarRegSize::i64Bit, Dst.D(), Src.D());
    }
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  // Claim the element size is 8-bytes.
  // Might be scalar 8-byte (cvtsi2ss xmm0, rax)
  // Might be vector i32v2 (cvtpi2ps xmm0, mm0)
  if (!HasTwoElements) {
    VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
    return;
  }

  // Dealing with the odd case of this being actually a vector operation rather than scalar.
  const auto Is256Bit = IROp->Size == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  constexpr auto Predicate = ARMEmitter::PReg::p0;

  ScalarEmit(VTMP1, Vector2);
  if (!Op->ZeroUpperBits && Is256Bit) {
    if (Dst != Vector1) {
      mov(Dst.Z(), Vector1.Z());
    }
    ptrue(ARMEmitter::SubRegSize::i64Bit, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
    mov(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Predicate.Merging(), VTMP1.Z());
  } else {
    if (Dst != Vector1) {
      mov(Dst.Q(), Vector1.Q());
    }
    ins(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0, VTMP1.Q(), 0);
  }
}

DEF_OP(VSToFGPRInsert) {
  const auto Op = IROp->C<IR::IROp_VSToFGPRInsert>();

  const auto ElementSize = Op->Header.ElementSize;
  const uint16_t Conv = (IR::OpSizeToSize(ElementSize) << 8) | IR::OpSizeToSize(Op->SrcElementSize);

  auto ScalarEmit = [this, Conv](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::Register>(&SrcVar);

    switch (Conv) {
    case 0x0204: { // Half <- int32_t
      scvtf(ARMEmitter::Size::i32Bit, Dst.H(), Src);
      break;
    }
    case 0x0208: { // Half <- int64_t
      scvtf(ARMEmitter::Size::i64Bit, Dst.H(), Src);
      break;
    }
    case 0x0404: { // Float <- int32_t
      scvtf(ARMEmitter::Size::i32Bit, Dst.S(), Src);
      break;
    }
    case 0x0408: { // Float <- int64_t
      scvtf(ARMEmitter::Size::i64Bit, Dst.S(), Src);
      break;
    }
    case 0x0804: { // Double <- int32_t
      scvtf(ARMEmitter::Size::i32Bit, Dst.D(), Src);
      break;
    }
    case 0x0808: { // Double <- int64_t
      scvtf(ARMEmitter::Size::i64Bit, Dst.D(), Src);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unhandled conversion mask: Mask=0x{:04x}", Conv); break;
    }
  };


  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  const auto GPR = GetReg(Op->Src);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector, GPR);
}

DEF_OP(VFToIScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFToIScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto RoundMode = Op->Round;

  auto ScalarEmit = [this, SubRegSize, RoundMode](ARMEmitter::VRegister Dst, std::variant<ARMEmitter::VRegister, ARMEmitter::Register> SrcVar) {
    auto Src = *std::get_if<ARMEmitter::VRegister>(&SrcVar);

    switch (RoundMode) {
    case IR::RoundMode::Nearest: frintn(SubRegSize.Scalar, Dst, Src); break;
    case IR::RoundMode::NegInfinity: frintm(SubRegSize.Scalar, Dst, Src); break;
    case IR::RoundMode::PosInfinity: frintp(SubRegSize.Scalar, Dst, Src); break;
    case IR::RoundMode::TowardsZero: frintz(SubRegSize.Scalar, Dst, Src); break;
    case IR::RoundMode::Host: frinti(SubRegSize.Scalar, Dst, Src); break;
    }
  };

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarUnaryOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, ScalarEmit, Dst, Vector1, Vector2);
}

DEF_OP(VFCMPScalarInsert) {
  const auto Op = IROp->C<IR::IROp_VFCMPScalarInsert>();
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  const auto ZeroUpperBits = Op->ZeroUpperBits;
  const auto Is256Bit = IROp->Size == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  auto ScalarEmitEQ = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmeq(Dst.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit: fcmeq(SubRegSize.Scalar, Dst, Src2, Src1); break;
    default: break;
    }
  };
  auto ScalarEmitLT = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmgt(Dst.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit: fcmgt(SubRegSize.Scalar, Dst, Src2, Src1); break;
    default: break;
    }
  };
  auto ScalarEmitLE = [this, SubRegSize](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1, ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmge(Dst.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit: fcmge(SubRegSize.Scalar, Dst, Src2, Src1); break;
    default: break;
    }
  };
  auto ScalarEmitUNO = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1,
                                                                   ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmge(VTMP1.H(), Src1.H(), Src2.H());
      fcmgt(VTMP2.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit:
      fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2);
      fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1);
      break;
    default: break;
    }
    // If the destination is a temporary then it is going to do an insert after the operation.
    // This means this operation can avoid a redundant insert in this case.
    const bool DstIsTemp = Dst == VTMP1;

    // Combine results and invert directly in VTMP1.
    orr(VTMP1.D(), VTMP1.D(), VTMP2.D());
    mvn(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());

    if (!DstIsTemp) {
      // If the destination doesn't overlap VTMP1, then we need to insert the final result.
      // This only happens in the case that the host supports AFP.
      if (!ZeroUpperBits && Is256Bit) {
        constexpr auto Predicate = ARMEmitter::PReg::p0;
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  };
  auto ScalarEmitNEQ = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1,
                                                                   ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmeq(VTMP1.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit: fcmeq(SubRegSize.Scalar, VTMP1, Src2, Src1); break;
    default: break;
    }
    // If the destination is a temporary then it is going to do an insert after the operation.
    // This means this operation can avoid a redundant insert in this case.
    const bool DstIsTemp = Dst == VTMP1;

    // Invert directly in VTMP1.
    mvn(ARMEmitter::SubRegSize::i8Bit, VTMP1.D(), VTMP1.D());

    if (!DstIsTemp) {
      // If the destination doesn't overlap VTMP1, then we need to insert the final result.
      // This only happens in the case that the host supports AFP.
      if (!ZeroUpperBits && Is256Bit) {
        constexpr auto Predicate = ARMEmitter::PReg::p0;
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  };
  auto ScalarEmitORD = [this, SubRegSize, ZeroUpperBits, Is256Bit](ARMEmitter::VRegister Dst, ARMEmitter::VRegister Src1,
                                                                   ARMEmitter::VRegister Src2) {
    switch (SubRegSize.Scalar) {
    case ARMEmitter::ScalarRegSize::i16Bit: {
      fcmge(VTMP1.H(), Src1.H(), Src2.H());
      fcmgt(VTMP2.H(), Src2.H(), Src1.H());
      break;
    }
    case ARMEmitter::ScalarRegSize::i32Bit:
    case ARMEmitter::ScalarRegSize::i64Bit:
      fcmge(SubRegSize.Scalar, VTMP1, Src1, Src2);
      fcmgt(SubRegSize.Scalar, VTMP2, Src2, Src1);
      break;
    default: break;
    }
    // If the destination is a temporary then it is going to do an insert after the operation.
    // This means this operation can avoid a redundant insert in this case.
    const bool DstIsTemp = Dst == VTMP1;

    // Combine results directly in VTMP1.
    orr(VTMP1.D(), VTMP1.D(), VTMP2.D());

    if (!DstIsTemp) {
      // If the destination doesn't overlap VTMP1, then we need to insert the final result.
      // This only happens in the case that the host supports AFP.
      if (!ZeroUpperBits && Is256Bit) {
        constexpr auto Predicate = ARMEmitter::PReg::p0;
        ptrue(SubRegSize.Vector, Predicate, ARMEmitter::PredicatePattern::SVE_VL1);
        mov(SubRegSize.Vector, Dst.Z(), Predicate.Merging(), VTMP1.Z());
      } else {
        ins(SubRegSize.Vector, Dst.Q(), 0, VTMP1.Q(), 0);
      }
    }
  };

  std::array<ScalarBinaryOpCaller, 6> Funcs = {{
    ScalarEmitEQ,
    ScalarEmitLT,
    ScalarEmitLE,
    ScalarEmitUNO,
    ScalarEmitNEQ,
    ScalarEmitORD,
  }};

  // Bit of a tricky detail.
  // The upper bits of the destination comes from the first source.
  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  VFScalarOperation(IROp->Size, ElementSize, Op->ZeroUpperBits, Funcs[FEXCore::ToUnderlying(Op->Op)], Dst, Vector1, Vector2);
}

DEF_OP(VectorImm) {
  const auto Op = IROp->C<IR::IROp_VectorImm>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto Dst = GetVReg(Node);

  if (HostSupportsSVE256 && Is256Bit) {
    LOGMAN_THROW_A_FMT(Op->ShiftAmount == 0, "SVE VectorImm doesn't support a shift");
    if (ElementSize > IR::OpSize::i8Bit && (Op->Immediate & 0x80)) {
      // SVE dup uses sign extension where VectorImm wants zext
      LoadConstant(ARMEmitter::Size::i64Bit, TMP1, Op->Immediate);
      dup(SubRegSize, Dst.Z(), TMP1);
    } else {
      dup_imm(SubRegSize, Dst.Z(), static_cast<int8_t>(Op->Immediate));
    }
  } else {
    if (ElementSize == IR::OpSize::i64Bit) {
      // movi with 64bit element size doesn't do what we want here
      LoadConstant(ARMEmitter::Size::i64Bit, TMP1, static_cast<uint64_t>(Op->Immediate) << Op->ShiftAmount);
      dup(SubRegSize, Dst.Q(), TMP1.R());
    } else {
      movi(SubRegSize, Dst.Q(), Op->Immediate, Op->ShiftAmount);
    }
  }
}

DEF_OP(LoadNamedVectorConstant) {
  const auto Op = IROp->C<IR::IROp_LoadNamedVectorConstant>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  switch (Op->Constant) {
  case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_ZERO: movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0); return;
  default:
    // Intentionally doing nothing.
    break;
  }

  if (HostSupportsSVE128) {
    switch (Op->Constant) {
    case FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT: index(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), 0, 1); return;
    default:
      // Intentionally doing nothing.
      break;
    }
  }
  // Load the pointer.
  auto GenerateMemOperand = [this](IR::OpSize OpSize, uint32_t NamedConstant, ARMEmitter::Register Base) {
    const auto ConstantOffset = ARRAY_OFFSETOF(FEXCore::Core::CpuStateFrame, Pointers.NamedVectorConstants, NamedConstant);

    if (ConstantOffset <= 255 || // Unscaled 9-bit signed
        ((ConstantOffset & (IR::OpSizeToSize(OpSize) - 1)) == 0 &&
         FEXCore::DividePow2(ConstantOffset, IR::OpSizeToSize(OpSize)) <= 4095)) /* 12-bit unsigned scaled */ {
      return ARMEmitter::ExtendedMemOperand(Base.X(), ARMEmitter::IndexType::OFFSET, ConstantOffset);
    }

    ldr(TMP1, STATE_PTR_IDX(CpuStateFrame, Pointers.NamedVectorConstantPointers, NamedConstant));
    return ARMEmitter::ExtendedMemOperand(TMP1, ARMEmitter::IndexType::OFFSET, 0);
  };

  if (OpSize == IR::OpSize::i256Bit) {
    // Handle SVE 32-byte variant upfront.
    ldr(TMP1, STATE_PTR_IDX(CpuStateFrame, Pointers.NamedVectorConstantPointers, Op->Constant));
    ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, 0);
    return;
  }

  auto MemOperand = GenerateMemOperand(OpSize, Op->Constant, STATE);
  switch (OpSize) {
  case IR::OpSize::i8Bit: ldrb(Dst, MemOperand); break;
  case IR::OpSize::i16Bit: ldrh(Dst, MemOperand); break;
  case IR::OpSize::i32Bit: ldr(Dst.S(), MemOperand); break;
  case IR::OpSize::i64Bit: ldr(Dst.D(), MemOperand); break;
  case IR::OpSize::i128Bit: ldr(Dst.Q(), MemOperand); break;
  default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break;
  }
}
DEF_OP(LoadNamedVectorIndexedConstant) {
  const auto Op = IROp->C<IR::IROp_LoadNamedVectorIndexedConstant>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);

  // Load the pointer.
  ldr(TMP1, STATE_PTR_IDX(CpuStateFrame, Pointers.IndexedNamedVectorConstantPointers, Op->Constant));

  switch (OpSize) {
  case IR::OpSize::i8Bit: ldrb(Dst, TMP1, Op->Index); break;
  case IR::OpSize::i16Bit: ldrh(Dst, TMP1, Op->Index); break;
  case IR::OpSize::i32Bit: ldr(Dst.S(), TMP1, Op->Index); break;
  case IR::OpSize::i64Bit: ldr(Dst.D(), TMP1, Op->Index); break;
  case IR::OpSize::i128Bit: ldr(Dst.Q(), TMP1, Op->Index); break;
  case IR::OpSize::i256Bit: {
    add(ARMEmitter::Size::i64Bit, TMP1, TMP1, Op->Index);
    ld1b<ARMEmitter::SubRegSize::i8Bit>(Dst.Z(), PRED_TMP_32B.Zeroing(), TMP1, 0);
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unhandled {} size: {}", __func__, OpSize); break;
  }
}

DEF_OP(VMov) {
  const auto Op = IROp->C<IR::IROp_VMov>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto Source = GetVReg(Op->Source);

  switch (OpSize) {
  case IR::OpSize::i8Bit: {
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    ins(ARMEmitter::SubRegSize::i8Bit, VTMP1, 0, Source, 0);
    mov(Dst.Q(), VTMP1.Q());
    break;
  }
  case IR::OpSize::i16Bit: {
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    ins(ARMEmitter::SubRegSize::i16Bit, VTMP1, 0, Source, 0);
    mov(Dst.Q(), VTMP1.Q());
    break;
  }
  case IR::OpSize::i32Bit: {
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    ins(ARMEmitter::SubRegSize::i32Bit, VTMP1, 0, Source, 0);
    mov(Dst.Q(), VTMP1.Q());
    break;
  }
  case IR::OpSize::i64Bit: {
    mov(Dst.D(), Source.D());
    break;
  }
  case IR::OpSize::i128Bit: {
    if (HostSupportsSVE256 || Dst.Idx() != Source.Idx()) {
      mov(Dst.Q(), Source.Q());
    }
    break;
  }
  case IR::OpSize::i256Bit: {
    // NOTE: If, in the distant future we support larger moves, or registers
    //       (*cough* AVX-512 *cough*) make sure to change this to treat
    //       256-bit moves with zero extending behavior instead of doing only
    //       a regular SVE move into a 512-bit register.
    if (Dst.Idx() != Source.Idx()) {
      mov(Dst.Z(), Source.Z());
    }
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown Op Size: {}", OpSize); break;
  }
}

DEF_OP(VAddP) {
  const auto Op = IROp->C<IR::IROp_VAddP>();
  const auto OpSize = IROp->Size;
  const auto IsScalar = OpSize == IR::OpSize::i64Bit;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);
  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // SVE ADDP is a destructive operation, so we need a temporary
    movprfx(VTMP1.Z(), VectorLower.Z());

    // Unlike Adv. SIMD's version of ADDP, which acts like it concats the
    // upper vector onto the end of the lower vector and then performs
    // pairwise addition, the SVE version actually interleaves the
    // results of the pairwise addition (gross!), so we need to undo that.
    addp(SubRegSize, VTMP1.Z(), Pred, VTMP1.Z(), VectorUpper.Z());
    uzp1(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP1.Z());
    uzp2(SubRegSize, VTMP2.Z(), VTMP1.Z(), VTMP1.Z());

    // Merge upper half with lower half.
    splice<ARMEmitter::OpType::Destructive>(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), PRED_TMP_16B, Dst.Z(), VTMP2.Z());
  } else {
    if (IsScalar) {
      addp(SubRegSize, Dst.D(), VectorLower.D(), VectorUpper.D());
    } else {
      addp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q());
    }
  }
}

DEF_OP(VOrn) {
  const auto Op = IROp->C<IR::IROp_VOrn>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();
    not_(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), Pred, Vector2.Z());
    orr(Dst.Z(), Vector1.Z(), VTMP1.Z());
  } else if (Is128Bit) {
    orn(Dst.Q(), Vector1.Q(), Vector2.Q());
  } else {
    orn(Dst.D(), Vector1.D(), Vector2.D());
  }
}

DEF_OP(VFAddV) {
  const auto Op = IROp->C<IR::IROp_VFAddV>();
  const auto OpSize = IROp->Size;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  LOGMAN_THROW_A_FMT(OpSize == IR::OpSize::i128Bit || OpSize == IR::OpSize::i256Bit, "Only AVX and SSE size supported");
  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();
    faddv(SubRegSize.Vector, Dst, Pred, Vector.Z());
  }
  if (HostSupportsSVE128) {
    const auto Pred = PRED_TMP_16B.Merging();
    faddv(SubRegSize.Vector, Dst, Pred, Vector.Z());
  } else {
    // ASIMD doesn't support faddv, need to use multiple faddp to match behaviour.
    if (ElementSize == IR::OpSize::i32Bit) {
      faddp(SubRegSize.Vector, Dst.Q(), Vector.Q(), Vector.Q());
      faddp(SubRegSize.Scalar, Dst, Dst);
    } else {
      faddp(SubRegSize.Scalar, Dst, Vector);
    }
  }
}

DEF_OP(VAddV) {
  const auto Op = IROp->C<IR::IROp_VAddV>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    // SVE doesn't have an equivalent ADDV instruction, so we make do
    // by performing two Adv. SIMD ADDV operations on the high and low
    // 128-bit lanes and then sum them up.

    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto CompactPred = ARMEmitter::PReg::p0;

    // Select all our upper elements to run ADDV over them.
    not_(CompactPred, Mask, PRED_TMP_16B);
    compact(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), CompactPred, Vector.Z());

    addv(SubRegSize.Vector, VTMP2.Q(), Vector.Q());
    addv(SubRegSize.Vector, VTMP1.Q(), VTMP1.Q());
    add(SubRegSize.Vector, Dst.Q(), VTMP1.Q(), VTMP2.Q());
  } else {
    if (ElementSize == IR::OpSize::i64Bit) {
      addp(SubRegSize.Scalar, Dst, Vector);
    } else {
      addv(SubRegSize.Vector, Dst.Q(), Vector.Q());
    }
  }
}

DEF_OP(VUMinV) {
  const auto Op = IROp->C<IR::IROp_VUMinV>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B;
    uminv(SubRegSize, Dst, Pred, Vector.Z());
  } else {
    // Vector
    uminv(SubRegSize, Dst.Q(), Vector.Q());
  }
}

DEF_OP(VUMaxV) {
  const auto Op = IROp->C<IR::IROp_VUMaxV>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B;
    umaxv(SubRegSize, Dst, Pred, Vector.Z());
  } else {
    // Vector
    umaxv(SubRegSize, Dst.Q(), Vector.Q());
  }
}

DEF_OP(VURAvg) {
  const auto Op = IROp->C<IR::IROp_VURAvg>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    // Trivial cases where we already have source data to be averaged in
    // the destination register. We can just do the operation in place.
    if (Dst == Vector1) {
      urhadd(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      urhadd(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector1.Z());
    } else {
      // SVE URHADD is a destructive operation, but we know that
      // we don't have any source/destination aliasing happening here
      // so we can safely move one of the source operands into the destination.
      movprfx(Dst.Z(), Vector1.Z());
      urhadd(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
    }
  } else {
    urhadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
  }
}

DEF_OP(VFAddP) {
  const auto Op = IROp->C<IR::IROp_VFAddP>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize248(IROp);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // SVE FADDP is a destructive operation, so we need a temporary
    movprfx(VTMP1.Z(), VectorLower.Z());

    // Unlike Adv. SIMD's version of FADDP, which acts like it concats the
    // upper vector onto the end of the lower vector and then performs
    // pairwise addition, the SVE version actually interleaves the
    // results of the pairwise addition (gross!), so we need to undo that.
    faddp(SubRegSize, VTMP1.Z(), Pred, VTMP1.Z(), VectorUpper.Z());
    uzp1(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP1.Z());
    uzp2(SubRegSize, VTMP2.Z(), VTMP1.Z(), VTMP1.Z());

    // Merge upper half with lower half.
    splice<ARMEmitter::OpType::Destructive>(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), PRED_TMP_16B, Dst.Z(), VTMP2.Z());
  } else {
    faddp(SubRegSize, Dst.Q(), VectorLower.Q(), VectorUpper.Q());
  }
}

DEF_OP(VFDiv) {
  const auto Op = IROp->C<IR::IROp_VFDiv>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (Dst == Vector1) {
      // Trivial case where we already have source data to be divided in the
      // destination register. We can just divide by Vector2 and be done with it.
      fdiv(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      // If the destination aliases the second vector, then we need
      // to use a temp.
      movprfx(VTMP1.Z(), Vector1.Z());
      fdiv(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), Vector2.Z());
      mov(Dst.Z(), VTMP1.Z());
    } else {
      // If no registers alias the destination, then we can move directly
      // into the destination and then divide.
      movprfx(Dst.Z(), Vector1.Z());
      fdiv(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
    }
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fdiv(Dst.H(), Vector1.H(), Vector2.H());
        break;
      }
      case IR::OpSize::i32Bit: {
        fdiv(Dst.S(), Vector1.S(), Vector2.S());
        break;
      }
      case IR::OpSize::i64Bit: {
        fdiv(Dst.D(), Vector1.D(), Vector2.D());
        break;
      }
      default: break;
      }
    } else {
      fdiv(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
    }
  }
}

DEF_OP(VFMin) {
  const auto Op = IROp->C<IR::IROp_VFMin>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  // NOTE: We don't directly use FMIN** here for any of the implementations,
  //       because it has undesirable NaN handling behavior (it sets
  //       entries either to the incoming NaN value*, or the default NaN
  //       depending on FPCR flags set). We want behavior that sets NaN
  //       entries to zero for the comparison result.
  //
  // * - Not exactly (differs slightly with SNaNs), but close enough for the explanation
  // ** - Unless the host supports AFP.AH, which allows FMIN/FMAX to select the second source element as expected of x86.

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B;
    const auto ComparePred = ARMEmitter::PReg::p0;

    // General idea:
    // 1. Compare greater than against the two vectors
    // 2. Invert the resulting values in the predicate register.
    // 3. Move the first vector into a temporary
    // 4. Merge all the elements that correspond to the inverted
    //    predicate bits from the second vector into the
    //    same temporary.
    // 5. Move temporary into the destination register and we're done.
    fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), Vector2.Z(), Vector1.Z());
    not_(ComparePred, Mask.Zeroing(), ComparePred);

    if (Dst == Vector1) {
      // Trivial case where Vector1 is also the destination.
      // We don't need to move any data around in this case (aside from the merge).
      mov(SubRegSize, Dst.Z(), ComparePred.Merging(), Vector2.Z());
    } else {
      mov(VTMP1.Z(), Vector1.Z());
      mov(SubRegSize, VTMP1.Z(), ComparePred.Merging(), Vector2.Z());
      mov(Dst.Z(), VTMP1.Z());
    }
  } else {
    LOGMAN_THROW_A_FMT(!IsScalar, "should use VFMinScalarInsert instead");

    if (HostSupportsAFP) {
      // AFP.AH lets fmin behave like x86 min
      fmin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      return;
    }

    if (Dst == Vector1) {
      // Destination is already Vector1, need to insert Vector2 on false.
      fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      bif(Dst.Q(), Vector2.Q(), VTMP1.Q());
    } else if (Dst == Vector2) {
      // Destination is already Vector2, Invert arguments and insert Vector1 on false.
      fcmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q());
      bif(Dst.Q(), Vector1.Q(), VTMP1.Q());
    } else {
      // Dst is not either source, need a move.
      fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      mov(Dst.Q(), Vector1.Q());
      bif(Dst.Q(), Vector2.Q(), VTMP1.Q());
    }
  }
}

DEF_OP(VFMax) {
  const auto Op = IROp->C<IR::IROp_VFMax>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  // NOTE: See VFMin implementation for reasons why we
  //       don't just use FMAX/FMIN for these implementations.

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B;
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmgt(SubRegSize, ComparePred, Mask.Zeroing(), Vector2.Z(), Vector1.Z());

    if (Dst == Vector1) {
      // Trivial case where Vector1 is also the destination.
      // We don't need to move any data around in this case (aside from the merge).
      mov(SubRegSize, Dst.Z(), ComparePred.Merging(), Vector2.Z());
    } else {
      mov(VTMP1.Z(), Vector1.Z());
      mov(SubRegSize, VTMP1.Z(), ComparePred.Merging(), Vector2.Z());
      mov(Dst.Z(), VTMP1.Z());
    }
  } else {
    LOGMAN_THROW_A_FMT(!IsScalar, "should use VFMaxScalarInsert instead");

    if (HostSupportsAFP) {
      // AFP.AH lets fmax behave like x86 max
      fmax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      return;
    }

    if (Dst == Vector1) {
      // Destination is already Vector1, need to insert Vector2 on true.
      fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      bit(Dst.Q(), Vector2.Q(), VTMP1.Q());
    } else if (Dst == Vector2) {
      // Destination is already Vector2, Invert arguments and insert Vector1 on true.
      fcmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q());
      bit(Dst.Q(), Vector1.Q(), VTMP1.Q());
    } else {
      // Dst is not either source, need a move.
      fcmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      mov(Dst.Q(), Vector1.Q());
      bit(Dst.Q(), Vector2.Q(), VTMP1.Q());
    }
  }
}

DEF_OP(VFRecp) {
  const auto Op = IROp->C<IR::IROp_VFRecp>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = Op->Header.ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
      // RPRES gives enough precision for this.
      frecpe(SubRegSize.Vector, Dst.Z(), Vector.Z());
      return;
    }

    fmov(SubRegSize.Vector, VTMP1.Z(), 1.0);
    fdiv(SubRegSize.Vector, VTMP1.Z(), Pred, VTMP1.Z(), Vector.Z());
    mov(Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
        // RPRES gives enough precision for this.
        frecpe(SubRegSize.Scalar, Dst.S(), Vector.S());
        return;
      }

      fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f);
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fdiv(Dst.H(), VTMP1.H(), Vector.H());
        break;
      }
      case IR::OpSize::i32Bit: {
        fdiv(Dst.S(), VTMP1.S(), Vector.S());
        break;
      }
      case IR::OpSize::i64Bit: {
        fdiv(Dst.D(), VTMP1.D(), Vector.D());
        break;
      }
      default: {
        LOGMAN_MSG_A_FMT("Unexpected ElementSize for {}", __func__);
        FEX_UNREACHABLE;
      }
      }
    } else {
      if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
        // RPRES gives enough precision for this.
        if (OpSize == IR::OpSize::i64Bit) {
          frecpe(SubRegSize.Vector, Dst.D(), Vector.D());
        } else {
          frecpe(SubRegSize.Vector, Dst.Q(), Vector.Q());
        }
        return;
      }

      fmov(SubRegSize.Vector, VTMP1.Q(), 1.0f);
      fdiv(SubRegSize.Vector, Dst.Q(), VTMP1.Q(), Vector.Q());
    }
  }
}

DEF_OP(VFRecpPrecision) {
  const auto Op = IROp->C<IR::IROp_VFRecpPrecision>();
  const auto OpSize = IROp->Size;
  const auto ElementSize = Op->Header.ElementSize;

  LOGMAN_THROW_A_FMT((OpSize == IR::OpSize::i64Bit || OpSize == IR::OpSize::i32Bit) && ElementSize == IR::OpSize::i32Bit,
                     "Unexpected sizes for operation.", __func__);

  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = OpSize == ElementSize;

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (IsScalar) {
    if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
      // Not enough precision so we need to improve it with frecps
      frecpe(SubRegSize.Scalar, VTMP1.S(), Vector.S());
      frecps(SubRegSize.Scalar, VTMP2.S(), VTMP1.S(), Vector.S());
      fmul(SubRegSize.Scalar, Dst.S(), VTMP1.S(), VTMP2.S());
      return;
    }

    fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0f);
    // Element size is known to be 32bits
    fdiv(Dst.S(), VTMP1.S(), Vector.S());
  } else { // Vector operation - Opsize 64bits, elementsize 32bits
    if (HostSupportsRPRES) {
      frecpe(SubRegSize.Vector, VTMP1.D(), Vector.D());
      frecps(SubRegSize.Vector, VTMP2.D(), VTMP1.D(), Vector.D());
      fmul(SubRegSize.Vector, Dst.D(), VTMP1.D(), VTMP2.D());
      return;
    }

    // No RPRES, so normal division
    fmov(SubRegSize.Vector, VTMP1.Q(), 1.0f);
    fdiv(SubRegSize.Vector, Dst.Q(), VTMP1.Q(), Vector.Q());
  }
}

DEF_OP(VFRSqrt) {
  const auto Op = IROp->C<IR::IROp_VFRSqrt>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();
    if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
      // RPRES gives enough precision for this.
      frsqrte(SubRegSize.Vector, Dst.Z(), Vector.Z());
      return;
    }

    fsqrt(SubRegSize.Vector, VTMP1.Z(), Pred, Vector.Z());
    fmov(SubRegSize.Vector, Dst.Z(), 1.0);
    fdiv(SubRegSize.Vector, Dst.Z(), Pred, Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
        // RPRES gives enough precision for this.
        frsqrte(SubRegSize.Scalar, Dst.S(), Vector.S());
        return;
      }

      fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0);
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fsqrt(VTMP2.H(), Vector.H());
        fdiv(Dst.H(), VTMP1.H(), VTMP2.H());
        break;
      }
      case IR::OpSize::i32Bit: {
        fsqrt(VTMP2.S(), Vector.S());
        fdiv(Dst.S(), VTMP1.S(), VTMP2.S());
        break;
      }
      case IR::OpSize::i64Bit: {
        fsqrt(VTMP2.D(), Vector.D());
        fdiv(Dst.D(), VTMP1.D(), VTMP2.D());
        break;
      }
      default: break;
      }
    } else {
      if (ElementSize == IR::OpSize::i32Bit && HostSupportsRPRES) {
        // RPRES gives enough precision for this.
        if (OpSize == IR::OpSize::i64Bit) {
          frsqrte(SubRegSize.Vector, Dst.D(), Vector.D());
        } else {
          frsqrte(SubRegSize.Vector, Dst.Q(), Vector.Q());
        }
        return;
      }

      fmov(SubRegSize.Vector, VTMP1.Q(), 1.0);
      fsqrt(SubRegSize.Vector, VTMP2.Q(), Vector.Q());
      fdiv(SubRegSize.Vector, Dst.Q(), VTMP1.Q(), VTMP2.Q());
    }
  }
}

DEF_OP(VFRSqrtPrecision) {
  const auto Op = IROp->C<IR::IROp_VFRSqrtPrecision>();
  const auto OpSize = IROp->Size;
  const auto ElementSize = Op->Header.ElementSize;


  LOGMAN_THROW_A_FMT((OpSize == IR::OpSize::i64Bit || OpSize == IR::OpSize::i32Bit) && ElementSize == IR::OpSize::i32Bit,
                     "Unexpected sizes for operation.", __func__);

  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (IsScalar) {
    if (HostSupportsRPRES) {
      frsqrte(SubRegSize.Scalar, VTMP1.S(), Vector.S());
      // Improve initial estimate which is not good enough.
      fmul(SubRegSize.Scalar, VTMP2.S(), VTMP1.S(), VTMP1.S());
      frsqrts(SubRegSize.Scalar, VTMP2.S(), VTMP2.S(), Vector.S());
      fmul(SubRegSize.Scalar, Dst.S(), VTMP1.S(), VTMP2.S());
      return;
    }

    fmov(SubRegSize.Scalar, VTMP1.Q(), 1.0);
    // element size is known to be 32bits
    fsqrt(VTMP2.S(), Vector.S());
    fdiv(Dst.S(), VTMP1.S(), VTMP2.S());
  } else {
    if (HostSupportsRPRES) {
      frsqrte(SubRegSize.Vector, VTMP1.D(), Vector.D());
      // Improve initial estimate which is not good enough.
      fmul(SubRegSize.Vector, VTMP2.D(), VTMP1.D(), VTMP1.D());
      frsqrts(SubRegSize.Vector, VTMP2.D(), VTMP2.D(), Vector.D());
      fmul(SubRegSize.Vector, Dst.D(), VTMP1.D(), VTMP2.D());
      return;
    }
    fmov(SubRegSize.Vector, VTMP1.Q(), 1.0);
    fsqrt(SubRegSize.Vector, VTMP2.Q(), Vector.Q());
    fdiv(SubRegSize.Vector, Dst.Q(), VTMP1.Q(), VTMP2.Q());
  }
}

DEF_OP(VNot) {
  const auto Op = IROp->C<IR::IROp_VNot>();
  const auto OpSize = IROp->Size;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    not_(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), PRED_TMP_32B.Merging(), Vector.Z());
  } else {
    mvn(ARMEmitter::SubRegSize::i8Bit, Dst.Q(), Vector.Q());
  }
}

DEF_OP(VUMin) {
  const auto Op = IROp->C<IR::IROp_VUMin>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // In any case where the destination aliases one of the source vectors
    // then we can just perform the UMIN in place.
    if (Dst == Vector1) {
      umin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      umin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector1.Z());
    } else {
      // SVE UMIN is a destructive operation, but we know nothing is
      // aliasing the destination by this point, so we can move into
      // the destination without needing a temporary.
      movprfx(Dst.Z(), Vector1.Z());
      umin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    }
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit:
    case IR::OpSize::i16Bit:
    case IR::OpSize::i32Bit: {
      umin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      break;
    }
    case IR::OpSize::i64Bit: {
      cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      mov(VTMP2.Q(), Vector1.Q());
      bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q());
      mov(Dst.Q(), VTMP2.Q());
      break;
    }
    default: break;
    }
  }
}

DEF_OP(VSMin) {
  const auto Op = IROp->C<IR::IROp_VSMin>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // In any case where the destination aliases one of the source vectors
    // then we can just perform the SMIN in place.
    if (Dst == Vector1) {
      smin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      smin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector1.Z());
    } else {
      // SVE SMIN is a destructive operation, but we know nothing is
      // aliasing the destination by this point, so we can move into
      // the destination without needing a temporary.
      movprfx(Dst.Z(), Vector1.Z());
      smin(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    }
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit:
    case IR::OpSize::i16Bit:
    case IR::OpSize::i32Bit: {
      smin(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      break;
    }
    case IR::OpSize::i64Bit: {
      cmgt(SubRegSize, VTMP1.Q(), Vector1.Q(), Vector2.Q());
      mov(VTMP2.Q(), Vector1.Q());
      bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q());
      mov(Dst.Q(), VTMP2.Q());
      break;
    }
    default: break;
    }
  }
}

DEF_OP(VUMax) {
  const auto Op = IROp->C<IR::IROp_VUMax>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // In any case where the destination aliases one of the source vectors
    // then we can just perform the UMAX in place.
    if (Dst == Vector1) {
      umax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      umax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector1.Z());
    } else {
      // SVE UMAX is a destructive operation, but we know nothing is
      // aliasing the destination by this point, so we can move into
      // the destination without needing a temporary.
      movprfx(Dst.Z(), Vector1.Z());
      umax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    }
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit:
    case IR::OpSize::i16Bit:
    case IR::OpSize::i32Bit: {
      umax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      break;
    }
    case IR::OpSize::i64Bit: {
      cmhi(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      mov(VTMP2.Q(), Vector1.Q());
      bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q());
      mov(Dst.Q(), VTMP2.Q());
      break;
    }
    default: break;
    }
  }
}

DEF_OP(VSMax) {
  const auto Op = IROp->C<IR::IROp_VSMax>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Pred = PRED_TMP_32B.Merging();

    // In any case where the destination aliases one of the source vectors
    // then we can just perform the SMAX in place.
    if (Dst == Vector1) {
      smax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    } else if (Dst == Vector2) {
      smax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector1.Z());
    } else {
      // SVE SMAX is a destructive operation, but we know nothing is
      // aliasing the destination by this point, so we can move into
      // the destination without needing a temporary.
      movprfx(Dst.Z(), Vector1.Z());
      smax(SubRegSize, Dst.Z(), Pred, Dst.Z(), Vector2.Z());
    }
  } else {
    switch (ElementSize) {
    case IR::OpSize::i8Bit:
    case IR::OpSize::i16Bit:
    case IR::OpSize::i32Bit: {
      smax(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
      break;
    }
    case IR::OpSize::i64Bit: {
      cmgt(SubRegSize, VTMP1.Q(), Vector2.Q(), Vector1.Q());
      mov(VTMP2.Q(), Vector1.Q());
      bif(VTMP2.Q(), Vector2.Q(), VTMP1.Q());
      mov(Dst.Q(), VTMP2.Q());
      break;
    }
    default: break;
    }
  }
}

DEF_OP(VBSL) {
  const auto Op = IROp->C<IR::IROp_VBSL>();
  const auto OpSize = IROp->Size;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto VectorFalse = GetVReg(Op->VectorFalse);
  const auto VectorTrue = GetVReg(Op->VectorTrue);
  const auto VectorMask = GetVReg(Op->VectorMask);

  if (HostSupportsSVE256 && Is256Bit) {
    // NOTE: Slight parameter difference from ASIMD
    //       ASIMD -> BSL Mask, True, False
    //       SVE   -> BSL True, True, False, Mask
    //       ASIMD -> BIT True, False, Mask
    //       ASIMD -> BIF False, True, Mask
    if (Dst == VectorTrue) {
      // Trivial case where we can perform the operation in place.
      bsl(Dst.Z(), Dst.Z(), VectorFalse.Z(), VectorMask.Z());
    } else {
      movprfx(VTMP1.Z(), VectorTrue.Z());
      bsl(VTMP1.Z(), VTMP1.Z(), VectorFalse.Z(), VectorMask.Z());
      mov(Dst.Z(), VTMP1.Z());
    }
  } else if (!HostSupportsSVE256 && HostSupportsSVE128 && Is128Bit && Dst != VectorFalse && Dst != VectorTrue && Dst != VectorMask) {
    // Needs to move but SVE movprfx+bsl is slightly more efficient than ASIMD mov+bsl on CPUs that support
    // movprfx fusion and NOT zero-cycle vector register moves.
    movprfx(Dst.Z(), VectorTrue.Z());
    bsl(Dst.Z(), Dst.Z(), VectorFalse.Z(), VectorMask.Z());
  } else {
    if (VectorMask == Dst) {
      // Can use BSL without any moves.
      if (OpSize == IR::OpSize::i64Bit) {
        bsl(Dst.D(), VectorTrue.D(), VectorFalse.D());
      } else {
        bsl(Dst.Q(), VectorTrue.Q(), VectorFalse.Q());
      }
    } else if (VectorTrue == Dst) {
      // Can use BIF without any moves.
      if (OpSize == IR::OpSize::i64Bit) {
        bif(Dst.D(), VectorFalse.D(), VectorMask.D());
      } else {
        bif(Dst.Q(), VectorFalse.Q(), VectorMask.Q());
      }
    } else if (VectorFalse == Dst) {
      // Can use BIT without any moves.
      if (OpSize == IR::OpSize::i64Bit) {
        bit(Dst.D(), VectorTrue.D(), VectorMask.D());
      } else {
        bit(Dst.Q(), VectorTrue.Q(), VectorMask.Q());
      }
    } else {
      // Needs moves.
      if (OpSize == IR::OpSize::i64Bit) {
        mov(Dst.D(), VectorMask.D());
        bsl(Dst.D(), VectorTrue.D(), VectorFalse.D());
      } else {
        mov(Dst.Q(), VectorMask.Q());
        bsl(Dst.Q(), VectorTrue.Q(), VectorFalse.Q());
      }
    }
  }
}

DEF_OP(VCMPEQ) {
  const auto Op = IROp->C<IR::IROp_VCMPEQ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // General idea is to compare for equality, not the equal vals
    // from one of the registers, then or both together to make the
    // relevant equal entries all 1s.
    cmpeq(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());

    // Restore NZCV
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } else {
    if (IsScalar) {
      cmeq(SubRegSize.Scalar, Dst, Vector1, Vector2);
    } else {
      cmeq(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q());
    }
  }
}

DEF_OP(VCMPEQZ) {
  const auto Op = IROp->C<IR::IROp_VCMPEQZ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    // Ensure no junk is in the temp (important for ensuring
    // non-equal entries remain as zero).
    mov_imm(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), 0);
    // Unlike with VCMPEQ, we can skip needing to bitwise OR the
    // final results, since if our elements are equal to zero,
    // we just need to bitwise NOT them and they're already set
    // to all 1s.
    cmpeq(SubRegSize.Vector, ComparePred, Mask, Vector.Z(), 0);
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector.Z());
    mov(Dst.Z(), VTMP1.Z());

    // Restore NZCV
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } else {
    if (IsScalar) {
      cmeq(SubRegSize.Scalar, Dst, Vector);
    } else {
      cmeq(SubRegSize.Vector, Dst.Q(), Vector.Q());
    }
  }
}

DEF_OP(VCMPGT) {
  const auto Op = IROp->C<IR::IROp_VCMPGT>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    // General idea is to compare for greater-than, bitwise NOT
    // the valid values, then ORR the NOTed values with the original
    // values to form entries that are all 1s.
    cmpgt(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());

    // Restore NZCV
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } else {
    if (IsScalar) {
      cmgt(SubRegSize.Scalar, Dst, Vector1, Vector2);
    } else {
      cmgt(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q());
    }
  }
}

DEF_OP(VCMPGTZ) {
  const auto Op = IROp->C<IR::IROp_VCMPGTZ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    // Ensure no junk is in the temp (important for ensuring
    // non greater-than values remain as zero).
    mov_imm(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), 0);
    cmpgt(SubRegSize.Vector, ComparePred, Mask, Vector.Z(), 0);
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector.Z());
    orr(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), VTMP1.Z(), Vector.Z());
    mov(Dst.Z(), VTMP1.Z());

    // Restore NZCV
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } else {
    if (IsScalar) {
      cmgt(SubRegSize.Scalar, Dst, Vector);
    } else {
      cmgt(SubRegSize.Vector, Dst.Q(), Vector.Q());
    }
  }
}

DEF_OP(VCMPLTZ) {
  const auto Op = IROp->C<IR::IROp_VCMPLTZ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair16(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
    mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

    // Ensure no junk is in the temp (important for ensuring
    // non less-than values remain as zero).
    mov_imm(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), 0);
    cmplt(SubRegSize.Vector, ComparePred, Mask, Vector.Z(), 0);
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector.Z());
    orr(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), VTMP1.Z(), Vector.Z());
    mov(Dst.Z(), VTMP1.Z());

    // Restore NZCV
    msr(ARMEmitter::SystemRegister::NZCV, TMP1);
  } else {
    if (IsScalar) {
      cmlt(SubRegSize.Scalar, Dst, Vector);
    } else {
      cmlt(SubRegSize.Vector, Dst.Q(), Vector.Q());
    }
  }
}

DEF_OP(VFCMPEQ) {
  const auto Op = IROp->C<IR::IROp_VFCMPEQ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmeq(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmeq(Dst.H(), Vector1.H(), Vector2.H());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit: fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); break;
      default: break;
      }
    } else {
      fcmeq(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q());
    }
  }
}

DEF_OP(VFCMPNEQ) {
  const auto Op = IROp->C<IR::IROp_VFCMPNEQ>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmne(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmeq(Dst.H(), Vector1.H(), Vector2.H());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit: fcmeq(SubRegSize.Scalar, Dst, Vector1, Vector2); break;
      default: break;
      }
      mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D());
    } else {
      fcmeq(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q());
      mvn(ARMEmitter::SubRegSize::i8Bit, Dst.Q(), Dst.Q());
    }
  }
}

DEF_OP(VFCMPLT) {
  const auto Op = IROp->C<IR::IROp_VFCMPLT>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmgt(SubRegSize.Vector, ComparePred, Mask, Vector2.Z(), Vector1.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector2.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector2.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmgt(Dst.H(), Vector2.H(), Vector1.H());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit: fcmgt(SubRegSize.Scalar, Dst, Vector2, Vector1); break;
      default: break;
      }
    } else {
      fcmgt(SubRegSize.Vector, Dst.Q(), Vector2.Q(), Vector1.Q());
    }
  }
}

DEF_OP(VFCMPGT) {
  const auto Op = IROp->C<IR::IROp_VFCMPGT>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmgt(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmgt(Dst.H(), Vector1.H(), Vector2.H());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit: fcmgt(SubRegSize.Scalar, Dst, Vector1, Vector2); break;
      default: break;
      }
    } else {
      fcmgt(SubRegSize.Vector, Dst.Q(), Vector1.Q(), Vector2.Q());
    }
  }
}

DEF_OP(VFCMPLE) {
  const auto Op = IROp->C<IR::IROp_VFCMPLE>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmge(SubRegSize.Vector, ComparePred, Mask, Vector2.Z(), Vector1.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector2.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector2.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmge(Dst.H(), Vector2.H(), Vector1.H());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit: fcmge(SubRegSize.Scalar, Dst, Vector2, Vector1); break;
      default: break;
      }
    } else {
      fcmge(SubRegSize.Vector, Dst.Q(), Vector2.Q(), Vector1.Q());
    }
  }
}

DEF_OP(VFCMPORD) {
  const auto Op = IROp->C<IR::IROp_VFCMPORD>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    // The idea is like comparing for unordered, but we just
    // invert the predicate from the comparison to instead
    // select all ordered elements in the vector.
    fcmuo(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(ComparePred, Mask, ComparePred);
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmge(VTMP1.H(), Vector1.H(), Vector2.H());
        fcmgt(VTMP2.H(), Vector2.H(), Vector1.H());
        orr(Dst.D(), VTMP1.D(), VTMP2.D());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit:
        fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2);
        fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1);
        orr(Dst.D(), VTMP1.D(), VTMP2.D());
        break;
      default: break;
      }
    } else {
      fcmge(SubRegSize.Vector, VTMP1.Q(), Vector1.Q(), Vector2.Q());
      fcmgt(SubRegSize.Vector, VTMP2.Q(), Vector2.Q(), Vector1.Q());
      orr(Dst.Q(), VTMP1.Q(), VTMP2.Q());
    }
  }
}

DEF_OP(VFCMPUNO) {
  const auto Op = IROp->C<IR::IROp_VFCMPUNO>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSizePair248(IROp);
  const auto IsScalar = ElementSize == OpSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Zeroing();
    const auto ComparePred = ARMEmitter::PReg::p0;

    fcmuo(SubRegSize.Vector, ComparePred, Mask, Vector1.Z(), Vector2.Z());
    not_(SubRegSize.Vector, VTMP1.Z(), ComparePred.Merging(), Vector1.Z());
    movprfx(SubRegSize.Vector, Dst.Z(), ComparePred.Zeroing(), Vector1.Z());
    orr(SubRegSize.Vector, Dst.Z(), ComparePred.Merging(), Dst.Z(), VTMP1.Z());
  } else {
    if (IsScalar) {
      switch (ElementSize) {
      case IR::OpSize::i16Bit: {
        fcmge(VTMP1.H(), Vector1.H(), Vector2.H());
        fcmgt(VTMP2.H(), Vector2.H(), Vector1.H());
        orr(Dst.D(), VTMP1.D(), VTMP2.D());
        mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D());
        break;
      }
      case IR::OpSize::i32Bit:
      case IR::OpSize::i64Bit:
        fcmge(SubRegSize.Scalar, VTMP1, Vector1, Vector2);
        fcmgt(SubRegSize.Scalar, VTMP2, Vector2, Vector1);
        orr(Dst.D(), VTMP1.D(), VTMP2.D());
        mvn(ARMEmitter::SubRegSize::i8Bit, Dst.D(), Dst.D());
        break;
      default: break;
      }
    } else {
      fcmge(SubRegSize.Vector, VTMP1.Q(), Vector1.Q(), Vector2.Q());
      fcmgt(SubRegSize.Vector, VTMP2.Q(), Vector2.Q(), Vector1.Q());
      orr(Dst.Q(), VTMP1.Q(), VTMP2.Q());
      mvn(ARMEmitter::SubRegSize::i8Bit, Dst.Q(), Dst.Q());
    }
  }
}

DEF_OP(VUShl) {
  const auto Op = IROp->C<IR::IROp_VUShl>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = IROp->ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto MaxShift = IR::OpSizeAsBits(ElementSize);

  const auto Dst = GetVReg(Node);
  auto ShiftVector = GetVReg(Op->ShiftVector);
  const auto Vector = GetVReg(Op->Vector);
  const auto RangeCheck = Op->RangeCheck;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (RangeCheck) {
      dup_imm(SubRegSize, VTMP2.Z(), MaxShift);
      umin(SubRegSize, VTMP2.Z(), Mask, VTMP2.Z(), ShiftVector.Z());
      ShiftVector = VTMP2;
    }

    if (Dst == ShiftVector) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP2.Z(), ShiftVector.Z());
      ShiftVector = VTMP2;
    }

    // If Dst aliases Vector, then we can skip the move.
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftVector.Z());
  } else {
    if (RangeCheck) {
      if (ElementSize < IR::OpSize::i64Bit) {
        movi(SubRegSize, VTMP1.Q(), MaxShift);
        umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q());
      } else {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift);
        dup(SubRegSize, VTMP1.Q(), TMP1.R());

        // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements
        cmhi(SubRegSize, VTMP2.Q(), ShiftVector.Q(), VTMP1.Q());
        bif(VTMP1.Q(), ShiftVector.Q(), VTMP2.Q());
      }
      ShiftVector = VTMP1;
    }

    ushl(SubRegSize, Dst.Q(), Vector.Q(), ShiftVector.Q());
  }
}

DEF_OP(VUShr) {
  const auto Op = IROp->C<IR::IROp_VUShr>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = IROp->ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto MaxShift = IR::OpSizeAsBits(ElementSize);

  const auto Dst = GetVReg(Node);
  auto ShiftVector = GetVReg(Op->ShiftVector);
  const auto Vector = GetVReg(Op->Vector);
  const auto RangeCheck = Op->RangeCheck;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (RangeCheck) {
      dup_imm(SubRegSize, VTMP2.Z(), MaxShift);
      umin(SubRegSize, VTMP2.Z(), Mask, VTMP2.Z(), ShiftVector.Z());
      ShiftVector = VTMP2;
    }

    if (Dst == ShiftVector) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP2.Z(), ShiftVector.Z());
      ShiftVector = VTMP2;
    }

    // If Dst aliases Vector, then we can skip the move.
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftVector.Z());
  } else {
    if (RangeCheck) {
      if (ElementSize < IR::OpSize::i64Bit) {
        movi(SubRegSize, VTMP1.Q(), MaxShift);
        umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q());
      } else {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift);
        dup(SubRegSize, VTMP1.Q(), TMP1.R());

        // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements
        cmhi(SubRegSize, VTMP2.Q(), ShiftVector.Q(), VTMP1.Q());
        bif(VTMP1.Q(), ShiftVector.Q(), VTMP2.Q());
      }
      ShiftVector = VTMP1;
    }

    // Need to invert shift values to perform a right shift with USHL
    // (USHR only has an immediate variant).
    neg(SubRegSize, VTMP1.Q(), ShiftVector.Q());
    ushl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VSShr) {
  const auto Op = IROp->C<IR::IROp_VSShr>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = IROp->ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto MaxShift = IR::OpSizeAsBits(ElementSize) - 1;
  const auto RangeCheck = Op->RangeCheck;

  const auto Dst = GetVReg(Node);
  auto ShiftVector = GetVReg(Op->ShiftVector);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (RangeCheck) {
      dup_imm(SubRegSize, VTMP1.Z(), MaxShift);
      umin(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), ShiftVector.Z());
      ShiftVector = VTMP1;
    }

    if (Dst == ShiftVector) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP1.Z(), ShiftVector.Z());
      ShiftVector = VTMP1;
    }

    // If Dst aliases Vector, then we can skip the move.
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftVector.Z());
  } else {
    if (RangeCheck) {
      if (ElementSize < IR::OpSize::i64Bit) {
        movi(SubRegSize, VTMP1.Q(), MaxShift);
        umin(SubRegSize, VTMP1.Q(), VTMP1.Q(), ShiftVector.Q());
      } else {
        LoadConstant(ARMEmitter::Size::i64Bit, TMP1, MaxShift);
        dup(SubRegSize, VTMP1.Q(), TMP1.R());

        // UMIN is silly on Adv.SIMD and doesn't have a variant that handles 64-bit elements
        cmhi(SubRegSize, VTMP2.Q(), ShiftVector.Q(), VTMP1.Q());
        bif(VTMP1.Q(), ShiftVector.Q(), VTMP2.Q());
      }
      ShiftVector = VTMP1;
    }

    // Need to invert shift values to perform a right shift with SSHL
    // (SSHR only has an immediate variant).
    neg(SubRegSize, VTMP1.Q(), ShiftVector.Q());
    sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VUShlS) {
  const auto Op = IROp->C<IR::IROp_VUShlS>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    // NOTE: SVE LSL is a destructive operation, so we need to
    //       move the vector into the destination if they don't
    //       already alias.
    dup(SubRegSize, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
  } else {
    dup(SubRegSize, VTMP1.Q(), ShiftScalar.Q(), 0);
    ushl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VUShrS) {
  const auto Op = IROp->C<IR::IROp_VUShrS>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    // NOTE: SVE LSR is a destructive operation, so we need to
    //       move the vector into the destination if they don't
    //       already alias.
    dup(SubRegSize, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
  } else {
    dup(SubRegSize, VTMP1.Q(), ShiftScalar.Q(), 0);
    neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
    ushl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VUShrSWide) {
  const auto Op = IROp->C<IR::IROp_VUShrSWide>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    } else {
      lsr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    }
  } else if (HostSupportsSVE128) {
    const auto Mask = PRED_TMP_16B.Merging();

    auto ShiftRegister = ShiftScalar;
    if (OpSize > IR::OpSize::i64Bit) {
      // SVE wide shifts don't need to duplicate the low bits unless the OpSize is 16-bytes
      // Slightly more optimal for 8-byte opsize.
      dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
      ShiftRegister = VTMP1;
    }

    if (Dst == ShiftRegister) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP1.Z(), ShiftRegister.Z());
      ShiftRegister = VTMP1;
    }

    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    } else {
      lsr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    }
  } else {
    // uqshl + ushr of 57-bits leaves 7-bits remaining.
    // This saturates the 64-bit shift value from an arbitrary 64-bit length
    // variable to maximum of 0x7F.
    // This allows the shift to fit within the width of the signed 8-bits
    // that ASIMD's vector shift requires.
    uqshl(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, ShiftScalar, 57);
    ushr(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, VTMP1, 57);
    dup(SubRegSize, VTMP1.Q(), VTMP1.Q(), 0);
    neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
    ushl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VSShrSWide) {
  const auto Op = IROp->C<IR::IROp_VSShrSWide>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    } else {
      asr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    }
  } else if (HostSupportsSVE128) {
    const auto Mask = PRED_TMP_16B.Merging();

    auto ShiftRegister = ShiftScalar;
    if (OpSize > IR::OpSize::i64Bit) {
      // SVE wide shifts don't need to duplicate the low bits unless the OpSize is 16-bytes
      // Slightly more optimal for 8-byte opsize.
      dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
      ShiftRegister = VTMP1;
    }

    if (Dst == ShiftRegister) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP1.Z(), ShiftRegister.Z());
      ShiftRegister = VTMP1;
    }

    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    } else {
      asr_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    }
  } else {
    // uqshl + ushr of 57-bits leaves 7-bits remaining.
    // This saturates the 64-bit shift value from an arbitrary 64-bit length
    // variable to maximum of 0x7F.
    // This allows the shift to fit within the width of the signed 8-bits
    // that ASIMD's vector shift requires.
    uqshl(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, ShiftScalar, 57);
    ushr(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, VTMP1, 57);
    dup(SubRegSize, VTMP1.Q(), VTMP1.Q(), 0);
    neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
    sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VUShlSWide) {
  const auto Op = IROp->C<IR::IROp_VUShlSWide>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    } else {
      lsl_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
    }
  } else if (HostSupportsSVE128) {
    const auto Mask = PRED_TMP_16B.Merging();

    auto ShiftRegister = ShiftScalar;
    if (OpSize > IR::OpSize::i64Bit) {
      // SVE wide shifts don't need to duplicate the low bits unless the OpSize is 16-bytes
      // Slightly more optimal for 8-byte opsize.
      dup(ARMEmitter::SubRegSize::i64Bit, VTMP1.Z(), ShiftScalar.Z(), 0);
      ShiftRegister = VTMP1;
    }

    if (Dst == ShiftRegister) {
      // If destination aliases the shift vector then we need to move it temporarily.
      mov(VTMP1.Z(), ShiftRegister.Z());
      ShiftRegister = VTMP1;
    }

    if (Dst != Vector) {
      // NOTE: SVE LSR is a destructive operation.
      movprfx(Dst.Z(), Vector.Z());
    }
    if (ElementSize == IR::OpSize::i64Bit) {
      lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    } else {
      lsl_wide(SubRegSize, Dst.Z(), Mask, Dst.Z(), ShiftRegister.Z());
    }
  } else {
    // uqshl + ushr of 57-bits leaves 7-bits remaining.
    // This saturates the 64-bit shift value from an arbitrary 64-bit length
    // variable to maximum of 0x7F.
    // This allows the shift to fit within the width of the signed 8-bits
    // that ASIMD's vector shift requires.
    uqshl(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, ShiftScalar, 57);
    ushr(ARMEmitter::ScalarRegSize::i64Bit, VTMP1, VTMP1, 57);
    dup(SubRegSize, VTMP1.Q(), VTMP1.Q(), 0);
    ushl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VSShrS) {
  const auto Op = IROp->C<IR::IROp_VSShrS>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto ShiftScalar = GetVReg(Op->ShiftScalar);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    // NOTE: SVE ASR is a destructive operation, so we need to
    //       move the vector into the destination if they don't
    //       already alias.
    dup(SubRegSize, VTMP1.Z(), ShiftScalar.Z(), 0);
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP1.Z());
  } else {
    dup(SubRegSize, VTMP1.Q(), ShiftScalar.Q(), 0);
    neg(SubRegSize, VTMP1.Q(), VTMP1.Q());
    sshl(SubRegSize, Dst.Q(), Vector.Q(), VTMP1.Q());
  }
}

DEF_OP(VInsElement) {
  const auto Op = IROp->C<IR::IROp_VInsElement>();
  const auto OpSize = IROp->Size;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize16(IROp);

  const uint32_t DestIdx = Op->DestIdx;
  const uint32_t SrcIdx = Op->SrcIdx;

  const auto Dst = GetVReg(Node);
  const auto SrcVector = GetVReg(Op->SrcVector);
  auto Reg = GetVReg(Op->DestVector);

  if (HostSupportsSVE256 && Is256Bit) {
    // Broadcast our source value across a temporary,
    // then combine with the destination.
    dup(SubRegSize, VTMP2.Z(), SrcVector.Z(), SrcIdx);

    // We don't need to move the data unnecessarily if
    // DestVector just so happens to also be the IR op
    // destination.
    if (Dst != Reg) {
      mov(Dst.Z(), Reg.Z());
    }

    constexpr auto Predicate = ARMEmitter::PReg::p0;

    if (ElementSize == IR::OpSize::i128Bit) {
      if (DestIdx == 0) {
        mov(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), PRED_TMP_16B.Merging(), VTMP2.Z());
      } else {
        not_(Predicate, PRED_TMP_32B.Zeroing(), PRED_TMP_16B);
        mov(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), Predicate.Merging(), VTMP2.Z());
      }
    } else {
      const auto UpperBound = 16 >> FEXCore::ilog2(IR::OpSizeToSize(ElementSize));
      const auto TargetElement = static_cast<int>(DestIdx) - UpperBound;

      // FIXME: We should rework this op to avoid the NZCV spill/fill dance.
      mrs(TMP1, ARMEmitter::SystemRegister::NZCV);

      index(SubRegSize, VTMP1.Z(), -UpperBound, 1);
      cmpeq(SubRegSize, Predicate, PRED_TMP_32B.Zeroing(), VTMP1.Z(), TargetElement);
      mov(SubRegSize, Dst.Z(), Predicate.Merging(), VTMP2.Z());

      // Restore NZCV
      msr(ARMEmitter::SystemRegister::NZCV, TMP1);
    }
  } else {
    // If nothing aliases the destination, then we can just
    // move the DestVector over and directly insert.
    if (Dst != Reg && Dst != SrcVector) {
      mov(Dst.Q(), Reg.Q());
      ins(SubRegSize, Dst.Q(), DestIdx, SrcVector.Q(), SrcIdx);
      return;
    }

    // If our vector data to insert into is within a register
    // that aliases the destination, then we can avoid using a
    // temporary and just perform the insert.
    //
    // Otherwise, if the source vector to select from aliases
    // the destination, then we hit the worst case where we
    // need to use a temporary to avoid clobbering data.
    if (Dst != Reg) {
      mov(VTMP1.Q(), Reg.Q());
      Reg = VTMP1;
    }

    ins(SubRegSize, Reg.Q(), DestIdx, SrcVector.Q(), SrcIdx);

    if (Dst != Reg) {
      mov(Dst.Q(), Reg.Q());
    }
  }
}

DEF_OP(VDupElement) {
  const auto Op = IROp->C<IR::IROp_VDupElement>();
  const auto OpSize = IROp->Size;

  const auto Index = Op->Index;
  const auto SubRegSize = ConvertSubRegSize16(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    dup(SubRegSize, Dst.Z(), Vector.Z(), Index);
  } else {
    if (Is128Bit) {
      dup(SubRegSize, Dst.Q(), Vector.Q(), Index);
    } else {
      dup(SubRegSize, Dst.D(), Vector.D(), Index);
    }
  }
}

DEF_OP(VExtr) {
  const auto Op = IROp->C<IR::IROp_VExtr>();
  const auto OpSize = IROp->Size;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  // AArch64 ext op has bit arrangement as [Vm:Vn] so arguments need to be swapped
  const auto Dst = GetVReg(Node);
  auto UpperBits = GetVReg(Op->VectorLower);
  auto LowerBits = GetVReg(Op->VectorUpper);

  const auto ElementSize = Op->Header.ElementSize;
  auto Index = Op->Index;

  if (Index >= IR::OpSizeToSize(OpSize)) {
    // Upper bits have moved in to the lower bits
    LowerBits = UpperBits;

    // Upper bits are all now zero
    UpperBits = VTMP1;
    movi(ARMEmitter::SubRegSize::i64Bit, VTMP1.Q(), 0);
    Index -= IR::OpSizeToSize(OpSize);
  }

  const auto CopyFromByte = Index * IR::OpSizeToSize(ElementSize);

  if (HostSupportsSVE256 && Is256Bit) {
    if (Dst == LowerBits) {
      // Trivial case where we don't need to do any moves
      ext<ARMEmitter::OpType::Destructive>(Dst.Z(), Dst.Z(), UpperBits.Z(), CopyFromByte);
    } else if (Dst == UpperBits) {
      movprfx(VTMP2.Z(), LowerBits.Z());
      ext<ARMEmitter::OpType::Destructive>(VTMP2.Z(), VTMP2.Z(), UpperBits.Z(), CopyFromByte);
      mov(Dst.Z(), VTMP2.Z());
    } else {
      // No registers alias the destination, so we can safely move into it.
      movprfx(Dst.Z(), LowerBits.Z());
      ext<ARMEmitter::OpType::Destructive>(Dst.Z(), Dst.Z(), UpperBits.Z(), CopyFromByte);
    }
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      ext(Dst.D(), LowerBits.D(), UpperBits.D(), CopyFromByte);
    } else {
      ext(Dst.Q(), LowerBits.Q(), UpperBits.Q(), CopyFromByte);
    }
  }
}

DEF_OP(VUShrI) {
  const auto Op = IROp->C<IR::IROp_VUShrI>();
  const auto OpSize = IROp->Size;

  const auto BitShift = Op->BitShift;
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (BitShift >= IR::OpSizeAsBits(ElementSize)) {
    movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0);
  } else {
    if (HostSupportsSVE256 && Is256Bit) {
      const auto Mask = PRED_TMP_32B.Merging();

      if (BitShift == 0) {
        if (Dst != Vector) {
          mov(Dst.Z(), Vector.Z());
        }
      } else {
        // SVE LSR is destructive, so lets set up the destination if
        // Vector doesn't already alias it.
        if (Dst != Vector) {
          movprfx(Dst.Z(), Vector.Z());
        }
        lsr(SubRegSize, Dst.Z(), Mask, Dst.Z(), BitShift);
      }
    } else {
      if (BitShift == 0) {
        if (Dst != Vector) {
          mov(Dst.Q(), Vector.Q());
        }
      } else {
        ushr(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
      }
    }
  }
}

DEF_OP(VUShraI) {
  const auto Op = IROp->C<IR::IROp_VUShraI>();
  const auto OpSize = IROp->Size;

  const auto BitShift = Op->BitShift;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto DestVector = GetVReg(Op->DestVector);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    if (Dst == DestVector) {
      usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift);
    } else {
      if (Dst != Vector) {
        mov(Dst.Z(), DestVector.Z());
        usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift);
      } else {
        mov(VTMP1.Z(), DestVector.Z());
        usra(SubRegSize, Dst.Z(), Vector.Z(), BitShift);
        mov(Dst.Z(), VTMP1.Z());
      }
    }
  } else {
    if (Dst == DestVector) {
      usra(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
    } else {
      if (Dst != Vector) {
        mov(Dst.Q(), DestVector.Q());
        usra(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
      } else {
        mov(VTMP1.Q(), DestVector.Q());
        usra(SubRegSize, VTMP1.Q(), Vector.Q(), BitShift);
        mov(Dst.Q(), VTMP1.Q());
      }
    }
  }
}

DEF_OP(VSShrI) {
  const auto Op = IROp->C<IR::IROp_VSShrI>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  LOGMAN_THROW_A_FMT(ElementSize >= IR::OpSize::i8Bit && ElementSize <= IR::OpSize::i64Bit, "Invalid element size");
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Shift = std::min<uint8_t>(IR::OpSizeAsBits(ElementSize) - 1, Op->BitShift);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (Shift == 0) {
      if (Dst != Vector) {
        mov(Dst.Z(), Vector.Z());
      }
    } else {
      // SVE ASR is destructive, so lets set up the destination if
      // Vector doesn't already alias it.
      if (Dst != Vector) {
        movprfx(Dst.Z(), Vector.Z());
      }
      asr(SubRegSize, Dst.Z(), Mask, Dst.Z(), Shift);
    }
  } else {
    if (Shift == 0) {
      if (Dst != Vector) {
        mov(Dst.Q(), Vector.Q());
      }
    } else {
      sshr(SubRegSize, Dst.Q(), Vector.Q(), Shift);
    }
  }
}

DEF_OP(VShlI) {
  const auto Op = IROp->C<IR::IROp_VShlI>();
  const auto OpSize = IROp->Size;

  const auto BitShift = Op->BitShift;
  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (BitShift >= IR::OpSizeAsBits(ElementSize)) {
    movi(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), 0);
  } else {
    if (HostSupportsSVE256 && Is256Bit) {
      const auto Mask = PRED_TMP_32B.Merging();

      if (BitShift == 0) {
        if (Dst != Vector) {
          mov(Dst.Z(), Vector.Z());
        }
      } else {
        // SVE LSL is destructive, so lets set up the destination if
        // Vector doesn't already alias it.
        if (Dst != Vector) {
          movprfx(Dst.Z(), Vector.Z());
        }
        lsl(SubRegSize, Dst.Z(), Mask, Dst.Z(), BitShift);
      }
    } else {
      if (BitShift == 0) {
        if (Dst != Vector) {
          mov(Dst.Q(), Vector.Q());
        }
      } else {
        shl(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
      }
    }
  }
}

DEF_OP(VUShrNI) {
  const auto Op = IROp->C<IR::IROp_VUShrNI>();
  const auto OpSize = IROp->Size;

  const auto BitShift = Op->BitShift;
  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    shrnb(SubRegSize, Dst.Z(), Vector.Z(), BitShift);
    uzp1(SubRegSize, Dst.Z(), Dst.Z(), Dst.Z());
  } else {
    if (BitShift == 0) {
      xtn(SubRegSize, Dst.D(), Vector.D());
    } else {
      shrn(SubRegSize, Dst.D(), Vector.D(), BitShift);
    }
  }
}

DEF_OP(VUShrNI2) {
  const auto Op = IROp->C<IR::IROp_VUShrNI2>();
  const auto OpSize = IROp->Size;

  const auto BitShift = Op->BitShift;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_16B;

    shrnb(SubRegSize, VTMP2.Z(), VectorUpper.Z(), BitShift);
    uzp1(SubRegSize, VTMP2.Z(), VTMP2.Z(), VTMP2.Z());

    if (Dst != VectorLower) {
      movprfx(Dst.Z(), VectorLower.Z());
    }
    splice<ARMEmitter::OpType::Destructive>(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP2.Z());
  } else {
    auto Lower = VectorLower;
    if (Dst != VectorLower) {
      mov(VTMP1.Q(), VectorLower.Q());
      Lower = VTMP1;
    }

    shrn2(SubRegSize, Lower.Q(), VectorUpper.Q(), BitShift);

    if (Dst != VectorLower) {
      mov(Dst.Q(), Lower.Q());
    }
  }
}

DEF_OP(VSXTL) {
  const auto Op = IROp->C<IR::IROp_VSXTL>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) {
    sunpklo(SubRegSize, Dst.Z(), Vector.Z());
  } else {
    sxtl(SubRegSize, Dst.D(), Vector.D());
  }
}

DEF_OP(VSXTL2) {
  const auto Op = IROp->C<IR::IROp_VSXTL2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) {
    sunpkhi(SubRegSize, Dst.Z(), Vector.Z());
  } else {
    sxtl2(SubRegSize, Dst.Q(), Vector.Q());
  }
}

DEF_OP(VSSHLL) {
  const auto Op = IROp->C<IR::IROp_VSSHLL>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  const auto BitShift = Op->BitShift;
  LOGMAN_THROW_A_FMT(BitShift < IR::OpSizeAsBits(IROp->ElementSize / 2), "Bitshift size too large for source element size: {} < {}",
                     BitShift, IR::OpSizeAsBits(IROp->ElementSize / 2));

  if (Is256Bit) {
    sunpklo(SubRegSize, Dst.Z(), Vector.Z());
    lsl(SubRegSize, Dst.Z(), Dst.Z(), BitShift);
  } else {
    sshll(SubRegSize, Dst.D(), Vector.D(), BitShift);
  }
}

DEF_OP(VSSHLL2) {
  const auto Op = IROp->C<IR::IROp_VSSHLL2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  const auto BitShift = Op->BitShift;
  LOGMAN_THROW_A_FMT(BitShift < IR::OpSizeAsBits(IROp->ElementSize / 2), "Bitshift size too large for source element size: {} < {}",
                     BitShift, IR::OpSizeAsBits(IROp->ElementSize / 2));

  if (Is256Bit) {
    sunpkhi(SubRegSize, Dst.Z(), Vector.Z());
    lsl(SubRegSize, Dst.Z(), Dst.Z(), BitShift);
  } else {
    sshll2(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
  }
}

DEF_OP(VUXTL) {
  const auto Op = IROp->C<IR::IROp_VUXTL>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) {
    uunpklo(SubRegSize, Dst.Z(), Vector.Z());
  } else {
    uxtl(SubRegSize, Dst.D(), Vector.D());
  }
}

DEF_OP(VUXTL2) {
  const auto Op = IROp->C<IR::IROp_VUXTL2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if ((HostSupportsSVE128 && !Is256Bit && !HostSupportsSVE256) || (HostSupportsSVE256 && Is256Bit)) {
    uunpkhi(SubRegSize, Dst.Z(), Vector.Z());
  } else {
    uxtl2(SubRegSize, Dst.Q(), Vector.Q());
  }
}

DEF_OP(VSQXTN) {
  const auto Op = IROp->C<IR::IROp_VSQXTN>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    // Note that SVE SQXTNB and SQXTNT are a tad different
    // in behavior compared to most other [name]B and [name]T
    // instructions.
    //
    // Most other bottom and top instructions operate
    // on even (bottom) or odd (top) elements and store each
    // result into the next subsequent element in the destination
    // vector
    //
    // SQXTNB and SQXTNT will operate on the same elements regardless
    // of which one is chosen, but will instead place results from
    // the operation into either each subsequent even (bottom) element
    // or odd (top) element. However the bottom instruction will zero the
    // odd elements out in the destination vector, while the top instruction
    // will leave the even elements alone (in a behavior similar to Adv.SIMD's
    // SQXTN/SQXTN2 instructions).
    //
    // e.g. consider this 64-bit (for brevity) vector with four 16-bit elements:
    //
    // ╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗
    // ║  Value 3  ║║  Value 2  ║║  Value 1  ║║  Value 0  ║
    // ╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝
    //
    // SQXTNB Dst.VnB, Src.VnH will result in:
    //
    // ╔═════╗╔═════╗╔═════╗╔═════╗╔═════╗╔═════╗╔═════╗╔═════╗
    // ║  0  ║║ V3  ║║  0  ║║ V2  ║║  0  ║║ V1  ║║  0  ║║ V0  ║
    // ╚═════╝╚═════╝╚═════╝╚═════╝╚═════╝╚═════╝╚═════╝╚═════╝
    //
    // This is kind of convenient, considering we only need
    // to use the bottom variant and then concatenate all the
    // even elements with SVE UZP1.

    sqxtnb(SubRegSize, Dst.Z(), Vector.Z());
    uzp1(SubRegSize, Dst.Z(), Dst.Z(), Dst.Z());
  } else {
    sqxtn(SubRegSize, Dst, Vector);
  }
}

DEF_OP(VSQXTN2) {
  const auto Op = IROp->C<IR::IROp_VSQXTN2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    // We use the 16 byte mask due to how SPLICE works. We only
    // want to get at the first 16 bytes in the lower vector, so
    // that SPLICE will then begin copying the first 16 bytes
    // from the upper vector and begin placing them after the
    // previously copied lower 16 bytes.
    const auto Mask = PRED_TMP_16B;

    sqxtnb(SubRegSize, VTMP2.Z(), VectorUpper.Z());
    uzp1(SubRegSize, VTMP2.Z(), VTMP2.Z(), VTMP2.Z());

    // Need to use the destructive variant of SPLICE, since
    // the constructive variant requires a register list, and
    // we can't guarantee VectorLower and VectorUpper will always
    // have consecutive indexes with one another.
    if (Dst != VectorLower) {
      movprfx(Dst.Z(), VectorLower.Z());
    }
    splice<ARMEmitter::OpType::Destructive>(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP2.Z());
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      sqxtn(SubRegSize, VTMP2, VectorUpper);
      mov(Dst.Q(), VectorLower.Q());
      ins(ARMEmitter::SubRegSize::i32Bit, Dst, 1, VTMP2, 0);
    } else {
      mov(VTMP1.Q(), VectorLower.Q());
      sqxtn2(SubRegSize, VTMP1, VectorUpper);
      mov(Dst.Q(), VTMP1.Q());
    }
  }
}

DEF_OP(VSQXTNPair) {
  const auto Op = IROp->C<IR::IROp_VSQXTNPair>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    // This combines the SVE versions of VSQXTN/VSQXTN2.
    // Upper VSQXTN2 handling.
    // Doing upper first to ensure it doesn't get overwritten by lower calculation.
    const auto Mask = PRED_TMP_16B;

    sqxtnb(SubRegSize, VTMP2.Z(), VectorUpper.Z());
    uzp1(SubRegSize, VTMP2.Z(), VTMP2.Z(), VTMP2.Z());

    // Look at those implementations for details about this.
    // Lower VSQXTN handling.
    sqxtnb(SubRegSize, Dst.Z(), VectorLower.Z());
    uzp1(SubRegSize, Dst.Z(), Dst.Z(), Dst.Z());

    // Merge.
    splice<ARMEmitter::OpType::Destructive>(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP2.Z());
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      zip1(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), VectorLower.Q(), VectorUpper.Q());
      sqxtn(SubRegSize, Dst, Dst);
    } else {
      if (Dst == VectorUpper) {
        // If the destination overlaps the upper then we need to move it temporarily.
        mov(VTMP1.Q(), VectorUpper.Q());
        VectorUpper = VTMP1;
      }
      sqxtn(SubRegSize, Dst, VectorLower);
      sqxtn2(SubRegSize, Dst, VectorUpper);
    }
  }
}

DEF_OP(VSQXTUN) {
  const auto Op = IROp->C<IR::IROp_VSQXTUN>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    sqxtunb(SubRegSize, Dst.Z(), Vector.Z());
    uzp1(SubRegSize, Dst.Z(), Dst.Z(), Dst.Z());
  } else {
    sqxtun(SubRegSize, Dst, Vector);
  }
}

DEF_OP(VSQXTUN2) {
  const auto Op = IROp->C<IR::IROp_VSQXTUN2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  const auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    // NOTE: See VSQXTN2 implementation for an in-depth explanation
    //       of everything going on here.

    const auto Mask = PRED_TMP_16B;

    sqxtunb(SubRegSize, VTMP2.Z(), VectorUpper.Z());
    uzp1(SubRegSize, VTMP2.Z(), VTMP2.Z(), VTMP2.Z());

    if (Dst != VectorLower) {
      movprfx(Dst.Z(), VectorLower.Z());
    }
    splice<ARMEmitter::OpType::Destructive>(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP2.Z());
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      sqxtun(SubRegSize, VTMP2, VectorUpper);
      mov(Dst.Q(), VectorLower.Q());
      ins(ARMEmitter::SubRegSize::i32Bit, Dst, 1, VTMP2, 0);
    } else {
      auto Lower = VectorLower;
      if (Dst != VectorLower) {
        mov(VTMP1.Q(), VectorLower.Q());
        Lower = VTMP1;
      }

      sqxtun2(SubRegSize, Lower, VectorUpper);

      if (Dst != VectorLower) {
        mov(Dst.Q(), Lower.Q());
      }
    }
  }
}

DEF_OP(VSQXTUNPair) {
  const auto Op = IROp->C<IR::IROp_VSQXTUNPair>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto VectorLower = GetVReg(Op->VectorLower);
  auto VectorUpper = GetVReg(Op->VectorUpper);

  if (HostSupportsSVE256 && Is256Bit) {
    // This combines the SVE versions of VSQXTUN/VSQXTUN2.
    // Upper VSQXTUN2 handling.
    // Doing upper first to ensure it doesn't get overwritten by lower calculation.
    const auto Mask = PRED_TMP_16B;

    sqxtunb(SubRegSize, VTMP2.Z(), VectorUpper.Z());
    uzp1(SubRegSize, VTMP2.Z(), VTMP2.Z(), VTMP2.Z());

    // Look at those implementations for details about this.
    // Lower VSQXTUN handling.
    sqxtunb(SubRegSize, Dst.Z(), VectorLower.Z());
    uzp1(SubRegSize, Dst.Z(), Dst.Z(), Dst.Z());

    // Merge.
    splice<ARMEmitter::OpType::Destructive>(SubRegSize, Dst.Z(), Mask, Dst.Z(), VTMP2.Z());
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      zip1(ARMEmitter::SubRegSize::i64Bit, Dst.Q(), VectorLower.Q(), VectorUpper.Q());
      sqxtun(SubRegSize, Dst, Dst);
    } else {
      if (Dst == VectorUpper) {
        // If the destination overlaps the upper then we need to move it temporarily.
        mov(VTMP1.Q(), VectorUpper.Q());
        VectorUpper = VTMP1;
      }
      sqxtun(SubRegSize, Dst, VectorLower);
      sqxtun2(SubRegSize, Dst, VectorUpper);
    }
  }
}

DEF_OP(VSRSHR) {
  const auto Op = IROp->C<IR::IROp_VSRSHR>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize16(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  const auto BitShift = Op->BitShift;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    // SVE SRSHR is destructive, so lets set up the destination
    // in the event we Dst and Vector don't alias.
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    srshr(SubRegSize, Dst.Z(), Mask, Dst.Z(), BitShift);
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      srshr(SubRegSize, Dst.D(), Vector.D(), BitShift);
    } else {
      srshr(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
    }
  }
}

DEF_OP(VSQSHL) {
  const auto Op = IROp->C<IR::IROp_VSQSHL>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize8(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);
  const auto BitShift = Op->BitShift;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    // SVE SQSHL is destructive, so lets set up the destination
    // in the event Dst and Vector don't alias
    if (Dst != Vector) {
      movprfx(Dst.Z(), Vector.Z());
    }
    sqshl(SubRegSize, Dst.Z(), Mask, Dst.Z(), BitShift);
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      sqshl(SubRegSize, Dst.D(), Vector.D(), BitShift);
    } else {
      sqshl(SubRegSize, Dst.Q(), Vector.Q(), BitShift);
    }
  }
}

DEF_OP(VMul) {
  const auto Op = IROp->C<IR::IROp_VMul>();
  const auto OpSize = IROp->Size;

  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto SubRegSize = ConvertSubRegSize16(IROp);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    mul(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());
  } else {
    mul(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
  }
}

DEF_OP(VUMull) {
  const auto Op = IROp->C<IR::IROp_VUMull>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    umullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    umullt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip1(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    umull(SubRegSize, Dst.D(), Vector1.D(), Vector2.D());
  }
}

DEF_OP(VSMull) {
  const auto Op = IROp->C<IR::IROp_VSMull>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    smullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    smullt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip1(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    smull(SubRegSize, Dst.D(), Vector1.D(), Vector2.D());
  }
}

DEF_OP(VUMull2) {
  const auto Op = IROp->C<IR::IROp_VUMull2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    umullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    umullt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip2(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    umull2(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
  }
}

DEF_OP(VSMull2) {
  const auto Op = IROp->C<IR::IROp_VSMull2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    smullb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    smullt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip2(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    smull2(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
  }
}

DEF_OP(VUMulH) {
  const auto Op = IROp->C<IR::IROp_VUMulH>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  const auto SubRegSizeLarger = ElementSize == IR::OpSize::i8Bit  ? ARMEmitter::SubRegSize::i16Bit :
                                ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i32Bit :
                                ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i64Bit :
                                                                    ARMEmitter::SubRegSize::i8Bit;

  if (HostSupportsSVE256 && Is256Bit) {
    umulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());
  } else if (HostSupportsSVE128 && Is128Bit) {
    if (HostSupportsSVE256) {
      // Do predicated to ensure upper-bits get zero as expected
      const auto Mask = PRED_TMP_16B.Merging();

      if (Dst == Vector1) {
        umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
      } else if (Dst == Vector2) {
        umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector1.Z());
      } else {
        // Destination register doesn't overlap either source.
        // NOTE: SVE umulh (predicated) is a destructive operation.
        movprfx(Dst.Z(), Vector1.Z());
        umulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
      }
    } else {
      umulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());
    }
  } else if (OpSize == IR::OpSize::i64Bit) {
    umull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D());
    shrn(SubRegSize, Dst.D(), Dst.D(), IR::OpSizeAsBits(ElementSize));
  } else {
    // ASIMD doesn't have a umulh. Need to emulate.
    umull2(SubRegSizeLarger, VTMP1.Q(), Vector1.Q(), Vector2.Q());
    umull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D());
    uzp2(SubRegSize, Dst.Q(), Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VSMulH) {
  const auto Op = IROp->C<IR::IROp_VSMulH>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize8(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  const auto SubRegSizeLarger = ElementSize == IR::OpSize::i8Bit  ? ARMEmitter::SubRegSize::i16Bit :
                                ElementSize == IR::OpSize::i16Bit ? ARMEmitter::SubRegSize::i32Bit :
                                ElementSize == IR::OpSize::i32Bit ? ARMEmitter::SubRegSize::i64Bit :
                                                                    ARMEmitter::SubRegSize::i8Bit;

  if (HostSupportsSVE256 && Is256Bit) {
    smulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());
  } else if (HostSupportsSVE128 && Is128Bit) {
    if (HostSupportsSVE256) {
      // Do predicated to ensure upper-bits get zero as expected
      const auto Mask = PRED_TMP_16B.Merging();

      if (Dst == Vector1) {
        smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
      } else if (Dst == Vector2) {
        smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector1.Z());
      } else {
        // Destination register doesn't overlap either source.
        // NOTE: SVE umulh (predicated) is a destructive operation.
        movprfx(Dst.Z(), Vector1.Z());
        smulh(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z());
      }
    } else {
      smulh(SubRegSize, Dst.Z(), Vector1.Z(), Vector2.Z());
    }
  } else if (OpSize == IR::OpSize::i64Bit) {
    smull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D());
    shrn(SubRegSize, Dst.D(), Dst.D(), IR::OpSizeAsBits(ElementSize));
  } else {
    // ASIMD doesn't have a umulh. Need to emulate.
    smull2(SubRegSizeLarger, VTMP1.Q(), Vector1.Q(), Vector2.Q());
    smull(SubRegSizeLarger, Dst.D(), Vector1.D(), Vector2.D());
    uzp2(SubRegSize, Dst.Q(), Dst.Q(), VTMP1.Q());
  }
}

DEF_OP(VUABDL) {
  const auto Op = IROp->C<IR::IROp_VUABDL>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    // To mimic the behavior of AdvSIMD UABDL, we need to get the
    // absolute difference of the even elements (UADBLB), get the
    // absolute difference of the odd elemenets (UABDLT), then
    // interleave the results in both vectors together.

    uabdlb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    uabdlt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip1(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    uabdl(SubRegSize, Dst.D(), Vector1.D(), Vector2.D());
  }
}

DEF_OP(VUABDL2) {
  const auto Op = IROp->C<IR::IROp_VUABDL2>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  if (HostSupportsSVE256 && Is256Bit) {
    // To mimic the behavior of AdvSIMD UABDL, we need to get the
    // absolute difference of the even elements (UADBLB), get the
    // absolute difference of the odd elemenets (UABDLT), then
    // interleave the results in both vectors together.

    uabdlb(SubRegSize, VTMP1.Z(), Vector1.Z(), Vector2.Z());
    uabdlt(SubRegSize, VTMP2.Z(), Vector1.Z(), Vector2.Z());
    zip2(SubRegSize, Dst.Z(), VTMP1.Z(), VTMP2.Z());
  } else {
    uabdl2(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q());
  }
}

DEF_OP(VTBL1) {
  const auto Op = IROp->C<IR::IROp_VTBL1>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto VectorIndices = GetVReg(Op->VectorIndices);
  const auto VectorTable = GetVReg(Op->VectorTable);

  switch (OpSize) {
  case IR::OpSize::i64Bit: {
    tbl(Dst.D(), VectorTable.Q(), VectorIndices.D());
    break;
  }
  case IR::OpSize::i128Bit: {
    tbl(Dst.Q(), VectorTable.Q(), VectorIndices.Q());
    break;
  }
  case IR::OpSize::i256Bit: {
    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup");

    tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable.Z(), VectorIndices.Z());
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break;
  }
}

DEF_OP(VTBL2) {
  const auto Op = IROp->C<IR::IROp_VTBL2>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto VectorIndices = GetVReg(Op->VectorIndices);
  auto VectorTable1 = GetVReg(Op->VectorTable1);
  auto VectorTable2 = GetVReg(Op->VectorTable2);

  if (!ARMEmitter::AreVectorsSequential(VectorTable1, VectorTable2)) {
    // Vector registers aren't sequential, need to move to temporaries.
    if (OpSize == IR::OpSize::i256Bit) {
      mov(VTMP1.Z(), VectorTable1.Z());
      mov(VTMP2.Z(), VectorTable2.Z());
    } else {
      mov(VTMP1.Q(), VectorTable1.Q());
      mov(VTMP2.Q(), VectorTable2.Q());
    }

    static_assert(ARMEmitter::AreVectorsSequential(VTMP1, VTMP2), "VTMP1 and VTMP2 must be sequential in order to use double-table "
                                                                  "TBL");
    VectorTable1 = VTMP1;
    VectorTable2 = VTMP2;
  }

  switch (OpSize) {
  case IR::OpSize::i64Bit: {
    tbl(Dst.D(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.D());
    break;
  }
  case IR::OpSize::i128Bit: {
    tbl(Dst.Q(), VectorTable1.Q(), VectorTable2.Q(), VectorIndices.Q());
    break;
  }
  case IR::OpSize::i256Bit: {
    LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup");

    tbl(ARMEmitter::SubRegSize::i8Bit, Dst.Z(), VectorTable1.Z(), VectorTable2.Z(), VectorIndices.Z());
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break;
  }
}

DEF_OP(VTBX1) {
  const auto Op = IROp->C<IR::IROp_VTBX1>();
  const auto OpSize = IROp->Size;

  const auto Dst = GetVReg(Node);
  const auto VectorSrcDst = GetVReg(Op->VectorSrcDst);
  const auto VectorIndices = GetVReg(Op->VectorIndices);
  const auto VectorTable = GetVReg(Op->VectorTable);

  if (Dst != VectorSrcDst) {
    switch (OpSize) {
    case IR::OpSize::i64Bit: {
      mov(VTMP1.D(), VectorSrcDst.D());
      tbx(VTMP1.D(), VectorTable.Q(), VectorIndices.D());
      mov(Dst.D(), VTMP1.D());
      break;
    }
    case IR::OpSize::i128Bit: {
      mov(VTMP1.Q(), VectorSrcDst.Q());
      tbx(VTMP1.Q(), VectorTable.Q(), VectorIndices.Q());
      mov(Dst.Q(), VTMP1.Q());
      break;
    }
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup");
      mov(VTMP1.Z(), VectorSrcDst.Z());
      tbx(ARMEmitter::SubRegSize::i8Bit, VTMP1.Z(), VectorTable.Z(), VectorIndices.Z());
      mov(Dst.Z(), VTMP1.Z());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break;
    }
  } else {
    switch (OpSize) {
    case IR::OpSize::i64Bit: {
      tbx(VectorSrcDst.D(), VectorTable.Q(), VectorIndices.D());
      break;
    }
    case IR::OpSize::i128Bit: {
      tbx(VectorSrcDst.Q(), VectorTable.Q(), VectorIndices.Q());
      break;
    }
    case IR::OpSize::i256Bit: {
      LOGMAN_THROW_A_FMT(HostSupportsSVE256, "Host does not support SVE. Cannot perform 256-bit table lookup");

      tbx(ARMEmitter::SubRegSize::i8Bit, VectorSrcDst.Z(), VectorTable.Z(), VectorIndices.Z());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unknown OpSize: {}", OpSize); break;
    }
  }
}

DEF_OP(VRev32) {
  const auto Op = IROp->C<IR::IROp_VRev32>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  LOGMAN_THROW_A_FMT(ElementSize == IR::OpSize::i8Bit || ElementSize == IR::OpSize::i16Bit, "Invalid size");
  const auto SubRegSize = ElementSize == IR::OpSize::i8Bit ? ARMEmitter::SubRegSize::i8Bit : ARMEmitter::SubRegSize::i16Bit;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    switch (ElementSize) {
    case IR::OpSize::i8Bit: {
      revb(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z());
      break;
    }
    case IR::OpSize::i16Bit: {
      revh(ARMEmitter::SubRegSize::i32Bit, Dst.Z(), Mask, Vector.Z());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); break;
    }
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      rev32(SubRegSize, Dst.D(), Vector.D());
    } else {
      rev32(SubRegSize, Dst.Q(), Vector.Q());
    }
  }
}


DEF_OP(VRev64) {
  const auto Op = IROp->C<IR::IROp_VRev64>();
  const auto OpSize = IROp->Size;

  const auto ElementSize = Op->Header.ElementSize;
  const auto SubRegSize = ConvertSubRegSize4(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector = GetVReg(Op->Vector);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    switch (ElementSize) {
    case IR::OpSize::i8Bit: {
      revb(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z());
      break;
    }
    case IR::OpSize::i16Bit: {
      revh(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z());
      break;
    }
    case IR::OpSize::i32Bit: {
      revw(ARMEmitter::SubRegSize::i64Bit, Dst.Z(), Mask, Vector.Z());
      break;
    }
    default: LOGMAN_MSG_A_FMT("Invalid Element Size: {}", ElementSize); break;
    }
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      rev64(SubRegSize, Dst.D(), Vector.D());
    } else {
      rev64(SubRegSize, Dst.Q(), Vector.Q());
    }
  }
}

DEF_OP(VFCADD) {
  const auto Op = IROp->C<IR::IROp_VFCADD>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);

  LOGMAN_THROW_A_FMT(Op->Rotate == 90 || Op->Rotate == 270, "Invalidate Rotate");
  const auto Rotate = Op->Rotate == 90 ? ARMEmitter::Rotation::ROTATE_90 : ARMEmitter::Rotation::ROTATE_270;

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();

    if (Dst == Vector1) {
      // Trivial case where we already have first vector in the destination
      // register. We can just do the operation in place.
      fcadd(SubRegSize, Dst.Z(), Mask, Vector1.Z(), Vector2.Z(), Rotate);
    } else if (Dst == Vector2) {
      // SVE FCADD is a destructive operation, so we need
      // a temporary for performing operations.
      movprfx(VTMP1.Z(), Vector1.Z());
      fcadd(SubRegSize, VTMP1.Z(), Mask, VTMP1.Z(), Vector2.Z(), Rotate);
      mov(Dst.Z(), VTMP1.Z());
    } else {
      // We have no source/dest aliasing, so we can move into the destination.
      movprfx(Dst.Z(), Vector1.Z());
      fcadd(SubRegSize, Dst.Z(), Mask, Dst.Z(), Vector2.Z(), Rotate);
    }
  } else {
    if (OpSize == IR::OpSize::i64Bit) {
      fcadd(SubRegSize, Dst.D(), Vector1.D(), Vector2.D(), Rotate);
    } else {
      fcadd(SubRegSize, Dst.Q(), Vector1.Q(), Vector2.Q(), Rotate);
    }
  }
}

DEF_OP(VFMLA) {
  ///< Dest = (Vector1 * Vector2) + Addend
  // Matches:
  // - SVE    - FMLA
  // - ASIMD  - FMLA
  // - Scalar - FMADD
  const auto Op = IROp->C<IR::IROp_VFMLA>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);
  const auto VectorAddend = GetVReg(Op->Addend);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else {
    if (IROp->ElementSize == OpSize) {
      if (IROp->ElementSize == IR::OpSize::i16Bit) {
        fmadd(Dst.H(), Vector1.H(), Vector2.H(), VectorAddend.H());
      } else if (IROp->ElementSize == IR::OpSize::i32Bit) {
        fmadd(Dst.S(), Vector1.S(), Vector2.S(), VectorAddend.S());
      } else if (IROp->ElementSize == IR::OpSize::i64Bit) {
        fmadd(Dst.D(), Vector1.D(), Vector2.D(), VectorAddend.D());
      }
      return;
    }
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Q(), VectorAddend.Q());
    }
    if (OpSize == IR::OpSize::i128Bit) {
      fmla(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
    } else {
      fmla(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
    }

    if (Dst != DestTmp) {
      mov(Dst.Q(), DestTmp.Q());
    }
  }
}

DEF_OP(VFMLS) {
  ///< Dest = (Vector1 * Vector2) - Addend
  // Matches:
  // - SVE    - FNMLS
  // - ASIMD  - FMLA (With negated addend)
  // - Scalar - FNMSUB
  const auto Op = IROp->C<IR::IROp_VFMLS>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);
  const auto VectorAddend = GetVReg(Op->Addend);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fnmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else if (HostSupportsSVE128 && Is128Bit) {
    const auto Mask = PRED_TMP_16B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fnmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else {
    if (IROp->ElementSize == OpSize) {
      if (IROp->ElementSize == IR::OpSize::i16Bit) {
        fnmsub(Dst.H(), Vector1.H(), Vector2.H(), VectorAddend.H());
      } else if (IROp->ElementSize == IR::OpSize::i32Bit) {
        fnmsub(Dst.S(), Vector1.S(), Vector2.S(), VectorAddend.S());
      } else if (IROp->ElementSize == IR::OpSize::i64Bit) {
        fnmsub(Dst.D(), Vector1.D(), Vector2.D(), VectorAddend.D());
      }
      return;
    }

    // Addend needs to get negated to match correct behaviour here.
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst == Vector1 || Dst == Vector2) {
      DestTmp = VTMP1;
    }

    if (Is128Bit) {
      fneg(SubRegSize, DestTmp.Q(), VectorAddend.Q());
    } else {
      fneg(SubRegSize, DestTmp.D(), VectorAddend.D());
    }

    if (Is128Bit) {
      fmla(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
    } else {
      fmla(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
    }

    if (DestTmp != Dst) {
      if (Is128Bit) {
        mov(Dst.Q(), DestTmp.Q());
      } else {
        mov(Dst.D(), DestTmp.D());
      }
    }
  }
}

DEF_OP(VFNMLA) {
  ///< Dest = (-Vector1 * Vector2) + Addend
  // Matches:
  // - SVE    - FMLS
  // - ASIMD  - FMLS
  // - Scalar - FMSUB
  const auto Op = IROp->C<IR::IROp_VFMLA>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);
  const auto VectorAddend = GetVReg(Op->Addend);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fmls(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else {
    if (IROp->ElementSize == OpSize) {
      if (IROp->ElementSize == IR::OpSize::i16Bit) {
        fmsub(Dst.H(), Vector1.H(), Vector2.H(), VectorAddend.H());
      } else if (IROp->ElementSize == IR::OpSize::i32Bit) {
        fmsub(Dst.S(), Vector1.S(), Vector2.S(), VectorAddend.S());
      } else if (IROp->ElementSize == IR::OpSize::i64Bit) {
        fmsub(Dst.D(), Vector1.D(), Vector2.D(), VectorAddend.D());
      }
      return;
    }

    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Q(), VectorAddend.Q());
    }
    if (OpSize == IR::OpSize::i128Bit) {
      fmls(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
    } else {
      fmls(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
    }

    if (Dst != DestTmp) {
      mov(Dst.Q(), DestTmp.Q());
    }
  }
}

DEF_OP(VFNMLS) {
  ///< Dest = (-Vector1 * Vector2) - Addend
  // Matches:
  // - SVE    - FNMLA
  // - ASIMD  - FMLS (With Negated addend)
  // - Scalar - FNMADD

  const auto Op = IROp->C<IR::IROp_VFMLS>();
  const auto OpSize = IROp->Size;

  const auto SubRegSize = ConvertSubRegSize248(IROp);
  const auto Is256Bit = OpSize == IR::OpSize::i256Bit;
  LOGMAN_THROW_A_FMT(!Is256Bit || HostSupportsSVE256, "Need SVE256 support in order to use {} with 256-bit operation", __func__);

  const auto Is128Bit = OpSize == IR::OpSize::i128Bit;

  const auto Dst = GetVReg(Node);
  const auto Vector1 = GetVReg(Op->Vector1);
  const auto Vector2 = GetVReg(Op->Vector2);
  const auto VectorAddend = GetVReg(Op->Addend);

  if (HostSupportsSVE256 && Is256Bit) {
    const auto Mask = PRED_TMP_32B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fnmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else if (HostSupportsSVE128 && Is128Bit) {
    const auto Mask = PRED_TMP_16B.Merging();
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst != VectorAddend) {
      if (Dst != Vector1 && Dst != Vector2) {
        DestTmp = Dst;
      } else {
        DestTmp = VTMP1;
      }
      mov(DestTmp.Z(), VectorAddend.Z());
    }

    fnmla(SubRegSize, DestTmp.Z(), Mask, Vector1.Z(), Vector2.Z());
    if (Dst != DestTmp) {
      mov(Dst.Z(), DestTmp.Z());
    }
  } else {
    if (IROp->ElementSize == OpSize) {
      if (IROp->ElementSize == IR::OpSize::i16Bit) {
        fnmadd(Dst.H(), Vector1.H(), Vector2.H(), VectorAddend.H());
      } else if (IROp->ElementSize == IR::OpSize::i32Bit) {
        fnmadd(Dst.S(), Vector1.S(), Vector2.S(), VectorAddend.S());
      } else if (IROp->ElementSize == IR::OpSize::i64Bit) {
        fnmadd(Dst.D(), Vector1.D(), Vector2.D(), VectorAddend.D());
      }
      return;
    }

    // Addend needs to get negated to match correct behaviour here.
    ARMEmitter::VRegister DestTmp = Dst;
    if (Dst == Vector1 || Dst == Vector2) {
      DestTmp = VTMP1;
    }

    if (Is128Bit) {
      fneg(SubRegSize, DestTmp.Q(), VectorAddend.Q());
    } else {
      fneg(SubRegSize, DestTmp.D(), VectorAddend.D());
    }

    if (Is128Bit) {
      fmls(SubRegSize, DestTmp.Q(), Vector1.Q(), Vector2.Q());
    } else {
      fmls(SubRegSize, DestTmp.D(), Vector1.D(), Vector2.D());
    }

    if (DestTmp != Dst) {
      if (Is128Bit) {
        mov(Dst.Q(), DestTmp.Q());
      } else {
        mov(Dst.D(), DestTmp.D());
      }
    }
  }
}

DEF_OP(VFCopySign) {
  auto Op = IROp->C<IR::IROp_VFCopySign>();
  const auto OpSize = IROp->Size;
  const auto SubRegSize = ConvertSubRegSize248(IROp);

  ARMEmitter::VRegister Magnitude = GetVReg(Op->Vector1);
  ARMEmitter::VRegister Sign = GetVReg(Op->Vector2);

  //  We don't assign explicity to Dst but Dst and Magniture are tied to the same register.
  //  Similar in semantics to C's copysignf.
  switch (OpSize) {
  case IR::OpSize::i64Bit:
    movi(SubRegSize, VTMP1.D(), 0x80, 24);
    bit(Magnitude.D(), Sign.D(), VTMP1.D());
    break;
  case IR::OpSize::i128Bit:
    movi(SubRegSize, VTMP1.Q(), 0x80, 24);
    bit(Magnitude.Q(), Sign.Q(), VTMP1.Q());
    break;
  default: LOGMAN_MSG_A_FMT("Unsupported element size for operation {}", __func__); FEX_UNREACHABLE;
  }
}

DEF_OP(F64SIN) {
  const auto Op = IROp->C<IR::IROp_F64SIN>();
  const auto Src = GetVReg(Op->Src);
  const auto Dst = GetVReg(Node);

  fmov(VTMP1.D(), Src.D());
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.F64SinHandler));
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  fmov(Dst.D(), VTMP1.D());
}

DEF_OP(F64COS) {
  const auto Op = IROp->C<IR::IROp_F64COS>();
  const auto Src = GetVReg(Op->Src);
  const auto Dst = GetVReg(Node);

  fmov(VTMP1.D(), Src.D());
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.F64CosHandler));
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  fmov(Dst.D(), VTMP1.D());
}

DEF_OP(F64TAN) {
  const auto Op = IROp->C<IR::IROp_F64TAN>();
  const auto Src = GetVReg(Op->Src);
  const auto Dst = GetVReg(Node);

  fmov(VTMP1.D(), Src.D());
  ldr(TMP1, STATE_PTR(CpuStateFrame, Pointers.F64TanHandler));
  str<ARMEmitter::IndexType::PRE>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, -16);
  blr(TMP1);
  ldr<ARMEmitter::IndexType::POST>(ARMEmitter::XReg::lr, ARMEmitter::Reg::rsp, 16);
  fmov(Dst.D(), VTMP1.D());
}


} // namespace FEXCore::CPU


================================================
FILE: FEXCore/Source/Interface/Core/LookupCache.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|block-database
desc: Stores information about blocks, and provides C++ implementations to lookup the blocks
$end_info$
*/

#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/HLE/SyscallHandler.h>

#include "Interface/Context/Context.h"
#include "Interface/Core/LookupCache.h"

namespace FEXCore {
GuestToHostMap::GuestToHostMap()
  : BlockLinks_mbr {"FEXMem_BlockLinks"} {
  BlockLinks_pma = fextl::make_unique<std::pmr::polymorphic_allocator<std::byte>>(&BlockLinks_mbr);
  // Setup our PMR map.
  BlockLinks = BlockLinks_pma->new_object<BlockLinksMapType>();
}

LookupCache::LookupCache(FEXCore::Context::ContextImpl* CTX)
  : ctx {CTX} {

  TotalCacheSize = ctx->Config.VirtualMemSize / FEXCore::Utils::FEX_PAGE_SIZE * 8 + CODE_SIZE + MAX_L1_SIZE;

  // Block cache ends up looking like this
  // PageMemoryMap[VirtualMemoryRegion >> 12]
  //       |
  //       v
  // PageMemory[Memory & (VIRTUAL_PAGE_SIZE - 1)]
  //       |
  //       v
  // Pointer to Code
  //
  // Allocate a region of memory that we can use to back our block pointers
  // We need one pointer per page of virtual memory
  // At 64GB of virtual memory this will allocate 128MB of virtual memory space
  PagePointer = reinterpret_cast<uintptr_t>(FEXCore::Allocator::VirtualAlloc(TotalCacheSize, false, false));
  LOGMAN_THROW_A_FMT(PagePointer != -1ULL, "Failed to allocate PagePointer");

  // Disable THP on the Lookup cache.
  FEXCore::Allocator::VirtualTHPControl(reinterpret_cast<void*>(PagePointer), TotalCacheSize, FEXCore::Allocator::THPControl::Disable);

  FEXCore::Allocator::VirtualName("FEXMem_Lookup", reinterpret_cast<void*>(PagePointer),
                                  ctx->Config.VirtualMemSize / FEXCore::Utils::FEX_PAGE_SIZE * 8 + CODE_SIZE);
  CTX->SyscallHandler->MarkOvercommitRange(PagePointer, TotalCacheSize);

  // Allocate our memory backing our pages
  // We need 32KB per guest page (One pointer per byte)
  // XXX: We can drop down to 16KB if we store 4byte offsets from the code base
  // We currently limit to 128MB of real memory for caching for the total cache size.
  // Can end up being inefficient if we compile a small number of blocks per page
  PageMemory = PagePointer + ctx->Config.VirtualMemSize / FEXCore::Utils::FEX_PAGE_SIZE * 8;

  // L1 Cache
  L1Pointer = PageMemory + CODE_SIZE;
  FEXCore::Allocator::VirtualName("FEXMem_Lookup_L1", reinterpret_cast<void*>(L1Pointer), MAX_L1_SIZE);

  VirtualMemSize = ctx->Config.VirtualMemSize;

  if (DynamicL1Cache()) {
    // Start at minimum size when dynamic.
    L1PointerMask = MIN_L1_ENTRIES - 1;
  } else {
    // Start at maximum instead.
    L1PointerMask = MAX_L1_ENTRIES - 1;
  }
}

LookupCache::~LookupCache() {
  FEXCore::Allocator::VirtualFree(reinterpret_cast<void*>(PagePointer), TotalCacheSize);
  ctx->SyscallHandler->UnmarkOvercommitRange(PagePointer, TotalCacheSize);

  // No need to free BlockLinks map.
  // These will get freed when their memory allocators are deallocated.
}

void LookupCache::ClearL2Cache(const FEXCore::LookupCacheBaseLockToken& lk) {
  // Clear out the page memory
  // PagePointer and PageMemory are sequential with each other. Clear both at once.
  FEXCore::Allocator::VirtualDontNeed(reinterpret_cast<void*>(PagePointer),
                                      ctx->Config.VirtualMemSize / FEXCore::Utils::FEX_PAGE_SIZE * 8 + CODE_SIZE, false);
  AllocateOffset = 0;
}

void LookupCache::ClearThreadLocalCaches(const LookupCacheWriteLockToken&) {
  // Clear L1 and L2 by clearing the full cache.
  FEXCore::Allocator::VirtualDontNeed(reinterpret_cast<void*>(PagePointer), TotalCacheSize, false);
  CachedCodePages.clear();
}

void LookupCache::ClearCache(const LookupCacheWriteLockToken& lk) {
  // Clear L1 and L2 by clearing the full cache.
  ClearThreadLocalCaches(lk);
  Shared->ClearCache(lk);
}

void GuestToHostMap::ClearCache(const LookupCacheWriteLockToken&) {
  // Allocate a new pointer from the BlockLinks pma again.
  BlockLinks = BlockLinks_pma->new_object<BlockLinksMapType>();
  // All code is gone, clear the block list
  BlockList.clear();
}

} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/LookupCache.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Context/Context.h"
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/SHMStats.h>
#include "Utils/WritePriorityMutex.h"

#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory_resource.h>
#include <FEXCore/fextl/robin_map.h>
#include <FEXCore/fextl/robin_set.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/fextl/memory_resource.h>

#include <cstdint>
#include <stddef.h>
#include <utility>
#include <mutex>

namespace FEXCore {
struct LookupCacheBaseLockToken {
protected:
  // Protected constructor - only derived classes can construct
  LookupCacheBaseLockToken() = default;
};

struct LookupCacheWriteLockToken : public LookupCacheBaseLockToken {
private:
  // Only constructible by GuestToHostMap
  friend struct GuestToHostMap;
  LookupCacheWriteLockToken(FEXCore::Utils::WritePriorityMutex::Mutex& Mutex)
    : Lock {Mutex} {}
  std::lock_guard<FEXCore::Utils::WritePriorityMutex::Mutex> Lock;
};

struct LookupCacheReadLockToken : public LookupCacheBaseLockToken {
private:
  // Only constructible by GuestToHostMap
  friend struct GuestToHostMap;
  LookupCacheReadLockToken(FEXCore::Utils::WritePriorityMutex::Mutex& Mutex)
    : Lock {Mutex} {}
  std::shared_lock<FEXCore::Utils::WritePriorityMutex::Mutex> Lock;
};

struct GuestToHostMap {
  FEXCore::Utils::WritePriorityMutex::Mutex Lock {};

  [[nodiscard]]
  LookupCacheWriteLockToken AcquireWriteLock() {
    return LookupCacheWriteLockToken {Lock};
  }

  [[nodiscard]]
  LookupCacheReadLockToken AcquireReadLock() {
    return LookupCacheReadLockToken {Lock};
  }

  struct BlockLinkTag {
    uint64_t GuestDestination;
    FEXCore::Context::ExitFunctionLinkData* HostLink;

    bool operator<(const BlockLinkTag& other) const {
      if (GuestDestination < other.GuestDestination) {
        return true;
      } else if (GuestDestination == other.GuestDestination) {
        return HostLink < other.HostLink;
      } else {
        return false;
      }
    }
  };

  // Use a monotonic buffer resource to allocate both the std::pmr::map and its members.
  // This allows us to quickly clear the block link map by clearing the monotonic allocator.
  // If we had allocated the block link map without the MBR, then clearing the map would require slowly
  // walking each block member and destructing objects.
  //
  // This makes `BlockLinks` look like a raw pointer that could memory leak, but since it is backed by the MBR, it won't.
  fextl::pmr::named_monotonic_page_buffer_resource BlockLinks_mbr;
  using BlockLinksMapType = std::pmr::map<BlockLinkTag, FEXCore::Context::BlockDelinkerFunc>;
  fextl::unique_ptr<std::pmr::polymorphic_allocator<std::byte>> BlockLinks_pma;
  BlockLinksMapType* BlockLinks;

  struct BlockEntry {
    uint64_t HostCode;
    fextl::vector<uint64_t> CodePages;
  };

  fextl::robin_map<uint64_t, BlockEntry> BlockList;

  fextl::map<uint64_t, fextl::vector<uint64_t>> CodePages;

  GuestToHostMap();

  // Adds to Guest -> Host code mapping
  const BlockEntry& AddBlockMapping(uint64_t Address, const fextl::vector<uint64_t>& CodePages, void* HostCode, const LookupCacheWriteLockToken&) {
    // This may replace an existing mapping
    // NOTE: Generally no previous entry should exist, however there is one exception:
    //       If the backend updates the active thread's CodeBuffer, the new associated LookupCache
    //       may already contain the block address. Since is comparatively rare, we'll just leak
    //       one of the two blocks in this case.
    return BlockList.insert_or_assign(Address, BlockEntry {(uintptr_t)HostCode, CodePages}).first->second;
  }

  const BlockEntry* FindBlock(uint64_t Address, const LookupCacheReadLockToken&) {
    auto HostCode = BlockList.find(Address);
    if (HostCode == BlockList.end()) {
      return nullptr;
    }
    return &HostCode->second;
  }

  bool Erase(uint64_t Address, const LookupCacheWriteLockToken&) {
    // Sever any links to this block
    auto lower = BlockLinks->lower_bound({Address, nullptr});
    auto upper = BlockLinks->upper_bound({Address, reinterpret_cast<FEXCore::Context::ExitFunctionLinkData*>(UINTPTR_MAX)});
    for (auto it = lower; it != upper; it = BlockLinks->erase(it)) {
      it->second(it->first.HostLink);
    }

    // Remove from BlockList
    return BlockList.erase(Address) != 0;
  }

  void InvalidateRange(uint64_t Start, uint64_t Length) {
    auto lk = AcquireWriteLock();

    auto lower = CodePages.lower_bound(Start >> 12);
    auto upper = CodePages.upper_bound((Start + Length - 1) >> 12);

    for (auto it = lower; it != upper; it++) {
      for (const auto& Entry : it->second) {
        Erase(Entry, lk);
      }
    }
    CodePages.erase(lower, upper);
  }

  void AddBlockLink(uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData* HostLink,
                    const FEXCore::Context::BlockDelinkerFunc& delinker, const LookupCacheWriteLockToken&) {
    BlockLinks->insert({{GuestDestination, HostLink}, delinker});
  }

  bool AddBlockExecutableRange(const std::ranges::input_range auto& Addresses, uint64_t Start, uint64_t Length, const LookupCacheWriteLockToken&) {
    bool rv = false;

    for (auto CurrentPage = Start >> 12, EndPage = (Start + Length - 1) >> 12; CurrentPage <= EndPage; CurrentPage++) {
      auto& CodePage = CodePages[CurrentPage];
      rv |= CodePage.empty();
      CodePage.insert(CodePage.end(), Addresses.begin(), Addresses.end());
    }

    return rv;
  }

  void ClearCache(const LookupCacheWriteLockToken&);
};

class LookupCache {
public:
  struct LookupCacheEntry {
    uintptr_t HostCode;
    uintptr_t GuestCode;
  };

  LookupCache(FEXCore::Context::ContextImpl* CTX);
  ~LookupCache();

  // Swaps out the underlying GuestToHostMap and clears all associated caches.
  // This interface requires the previous CodeBuffer to be provided despite not using it. This ensures the shared write lock is still valid.
  void ChangeGuestToHostMapping([[maybe_unused]] CPU::CodeBuffer& Prev, GuestToHostMap& NewMap, const LookupCacheWriteLockToken& lk) {
    ClearThreadLocalCaches(lk);
    Shared = &NewMap;
  }

  uintptr_t FindBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) {
    // Try L1, no lock needed
    auto& L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1PointerMask];
    if (L1Entry.GuestCode == Address) {
      return L1Entry.HostCode;
    }

    // L2 and L3 need to be locked
    uintptr_t HostPtr {};
    {
      std::optional<FEXCore::SHMStats::AccumulationBlock<uint64_t>> LockTime(
        Thread->ThreadStats ? &Thread->ThreadStats->AccumulatedCacheReadLockTime : nullptr);
      auto lk = Shared->AcquireReadLock();
      LockTime.reset();

      if (!DisableL2Cache()) {
        // Try L2
        const auto PageIndex = (Address & (VirtualMemSize - 1)) >> 12;
        const auto PageOffset = Address & (0x0FFF);

        const auto Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
        auto LocalPagePointer = Pointers[PageIndex];

        // Do we a page pointer for this address?
        if (LocalPagePointer) {
          // Find there pointer for the address in the blocks
          auto BlockPointers = reinterpret_cast<LookupCacheEntry*>(LocalPagePointer);

          if (BlockPointers[PageOffset].GuestCode == Address) {
            L1Entry.GuestCode = Address;
            L1Entry.HostCode = BlockPointers[PageOffset].HostCode;
            HostPtr = L1Entry.HostCode;
          }
        }
      }

      if (!HostPtr) {
        // Try L3
        auto Entry = Shared->FindBlock(Address, lk);
        if (Entry) {
          CacheBlockMapping(Address, *Entry, false, lk);
          HostPtr = Entry->HostCode;
        }
      }
    }

    if (HostPtr && DynamicL1Cache()) {
      UpdateDynamicL1Stats(Thread);
    }

    FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedCacheMissCount, 1);

    return HostPtr;
  }

  void UpdateDynamicL1Stats(FEXCore::Core::InternalThreadState* Thread) {
    // If host pointer was found in L2 or L3, then add it to the counter.
    // Keeping track not L1 misses, but specifically L2/L3 hits.
    ++L2L3CacheHits;

    const auto CurrentTime = std::chrono::system_clock::now();
    const auto Period = CurrentTime - LastPeriod;
    if (Period >= SamplePeriod) {
      // If larger than the sample period then check if we need to increase L1 cache size.
      const double AveragePerSecond = static_cast<double>(L2L3CacheHits) /
                                      static_cast<double>(std::chrono::duration_cast<std::chrono::milliseconds>(Period).count()) * 1000.0;

      if (AveragePerSecond >= DynamicL1CacheIncreaseCountHeuristic()) {
        if (CurrentL1Entries < MAX_L1_ENTRIES) {
          CurrentL1Entries <<= 1;
          L1PointerMask = CurrentL1Entries - 1;

          // Update the thread's L1 pointer mask to increase how much cache it uses.
          // Since we're in C-code, this is safe to update here.
          Thread->CurrentFrame->State.L1Mask = GetScaledL1PointerMask();
        }
      } else if (AveragePerSecond < DynamicL1CacheDecreaseCountHeuristic()) {
        if (CurrentL1Entries > MIN_L1_ENTRIES) {
          CurrentL1Entries >>= 1;
          L1PointerMask = CurrentL1Entries - 1;

          // Madvise the entries that we are dropping. Gives the memory back to the OS.
          LookupCacheEntry* FirstZeroL1Entry = &reinterpret_cast<LookupCacheEntry*>(L1Pointer)[CurrentL1Entries];
          size_t ZeroMemorySize = (MAX_L1_ENTRIES - CurrentL1Entries) * sizeof(LookupCacheEntry);
          FEXCore::Allocator::VirtualDontNeed(FirstZeroL1Entry, ZeroMemorySize, false);

          // Update the thread's L1 pointer mask to increase how much cache it uses.
          // Since we're in C-code, this is safe to update here.
          Thread->CurrentFrame->State.L1Mask = GetScaledL1PointerMask();
        }
      }

      // Update Last period to start again.
      LastPeriod = CurrentTime;
      L2L3CacheHits = 0;
    }
  }

  GuestToHostMap* Shared = nullptr;

  // Appends a list of Block {Address} to CodePages [Start, Start + Length)
  // Returns true if new pages are marked as containing code
  bool AddBlockExecutableRange(FEXCore::Core::InternalThreadState* Thread, const fextl::set<uint64_t>& Addresses, uint64_t Start, uint64_t Length) {
    std::optional<FEXCore::SHMStats::AccumulationBlock<uint64_t>> LockTime(
      Thread->ThreadStats ? &Thread->ThreadStats->AccumulatedCacheWriteLockTime : nullptr);
    auto lk = Shared->AcquireWriteLock();
    LockTime.reset();

    return Shared->AddBlockExecutableRange(Addresses, Start, Length, lk);
  }

  // Adds to Guest -> Host code mapping
  void AddBlockMapping(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, const fextl::vector<uint64_t>& CodePages, void* HostCode) {
    std::optional<FEXCore::SHMStats::AccumulationBlock<uint64_t>> LockTime(
      Thread->ThreadStats ? &Thread->ThreadStats->AccumulatedCacheWriteLockTime : nullptr);
    auto lk = Shared->AcquireWriteLock();
    LockTime.reset();

    const auto& Entry = Shared->AddBlockMapping(Address, CodePages, HostCode, lk);

    // There is no need to update L1 or L2, they will get updated on first lookup
    // However, adding to L1 here increases performance
    CacheBlockMapping(Address, Entry, true, lk);
  }

  // Invalidates L1/L2 for a given guest block
  void InvalidateCache(uint64_t Address, const LookupCacheWriteLockToken& lk) {
    // Do L1
    auto& L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1PointerMask];
    if (L1Entry.GuestCode == Address) {
      L1Entry.GuestCode = 0;
      // Leave L1Entry.HostCode as is, so that concurrent lookups won't read a null pointer
      // This is a soft guarantee for cross thread invalidation, as atomics are not used
      // and it hasn't been thoroughly tested
    }

    if (!DisableL2Cache()) {
      // Do full map
      Address = Address & (VirtualMemSize - 1);
      uint64_t PageOffset = Address & (0x0FFF);
      Address >>= 12;

      uintptr_t* Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
      uint64_t LocalPagePointer = Pointers[Address];
      if (!LocalPagePointer) {
        // Page for this code didn't even exist, nothing to do
        return;
      }

      // Page exists, just set the offset to zero
      auto BlockPointers = reinterpret_cast<LookupCacheEntry*>(LocalPagePointer);
      BlockPointers[PageOffset].GuestCode = 0;
      BlockPointers[PageOffset].HostCode = 0;
    }
  }

  // Invalidates all L1/L2 entries for all guest block that intersect the given range
  bool InvalidateCacheRange(uint64_t Start, uint64_t Length) {
    auto lk = Shared->AcquireWriteLock();

    auto lower = CachedCodePages.lower_bound(Start >> 12);
    auto upper = CachedCodePages.upper_bound((Start + Length - 1) >> 12);

    for (auto it = lower; it != upper; it++) {
      for (const auto& Entry : it->second) {
        InvalidateCache(Entry, lk);
      }
    }
    bool ret = upper != lower;
    CachedCodePages.erase(lower, upper);
    return ret;
  }

  void AddBlockLink(uint64_t GuestDestination, FEXCore::Context::ExitFunctionLinkData* HostLink,
                    const FEXCore::Context::BlockDelinkerFunc& delinker, const LookupCacheWriteLockToken& lk) {
    Shared->AddBlockLink(GuestDestination, HostLink, delinker, lk);
  }

  void ClearCache(const LookupCacheWriteLockToken&);
  void ClearL2Cache(const LookupCacheBaseLockToken&);
  void ClearThreadLocalCaches(const LookupCacheWriteLockToken&);

  uintptr_t GetL1Pointer() const {
    return L1Pointer;
  }
  uintptr_t GetScaledL1PointerMask() const {
    return L1PointerMask << FEXCore::ilog2(sizeof(LookupCache::LookupCacheEntry));
  }
  uintptr_t GetPagePointer() const {
    return PagePointer;
  }
  uintptr_t GetVirtualMemorySize() const {
    return VirtualMemSize;
  }

  // This needs to be taken before reads or writes to L2, L3, CodePages,
  // and before writes to L1. Concurrent access from a thread that this LookupCache doesn't belong to
  // may only happen during cross thread invalidation (::Erase).
  // All other operations must be done from the owning thread.
  // Some care is taken so that L1 lookups can be done without locks, and even tearing is unlikely to lead to a crash.
  // This approach has not been fully vetted yet.
  // Also note that L1 lookups might be inlined in the JIT Dispatcher and/or block ends.
  auto AcquireWriteLock() {
    return Shared->AcquireWriteLock();
  }

private:
  void CacheBlockMapping(uint64_t Address, const GuestToHostMap::BlockEntry& Entry, bool L1Only, const LookupCacheBaseLockToken& lk) {
    for (const auto& CodePage : Entry.CodePages) {
      CachedCodePages[CodePage >> 12].insert(Address);
    }

    // Do L1
    auto& L1Entry = reinterpret_cast<LookupCacheEntry*>(L1Pointer)[Address & L1PointerMask];
    L1Entry.GuestCode = Address;
    L1Entry.HostCode = Entry.HostCode;

    if (!DisableL2Cache() && !L1Only) {
      // Do ful map
      auto FullAddress = Address;
      Address = Address & (VirtualMemSize - 1);

      uint64_t PageOffset = Address & (0x0FFF);
      Address >>= 12;

      uintptr_t* Pointers = reinterpret_cast<uintptr_t*>(PagePointer);
      uint64_t LocalPagePointer = Pointers[Address];
      if (!LocalPagePointer) {
        // We don't have a page pointer for this address
        // Allocate one now if we can
        uintptr_t NewPageBacking = AllocateBackingForPage();
        if (!NewPageBacking) {
          // Couldn't allocate, clear L2 and retry
          ClearL2Cache(lk);
          CacheBlockMapping(FullAddress, Entry, false, lk);
          return;
        }
        Pointers[Address] = NewPageBacking;
        LocalPagePointer = NewPageBacking;
      }

      // Add the new pointer to the page block
      auto BlockPointers = reinterpret_cast<LookupCacheEntry*>(LocalPagePointer);

      // This silently replaces existing mappings
      BlockPointers[PageOffset].GuestCode = FullAddress;
      BlockPointers[PageOffset].HostCode = Entry.HostCode;
    }
  }

  uintptr_t AllocateBackingForPage() {
    uintptr_t NewBase = AllocateOffset;
    uintptr_t NewEnd = AllocateOffset + SIZE_PER_PAGE;

    if (NewEnd >= CODE_SIZE) {
      // We ran out of block backing space. Need to clear the block cache and tell the JIT cores to clear their caches as well
      // Tell whatever is calling this that it needs to do it.
      return 0;
    }

    AllocateOffset = NewEnd;
    return PageMemory + NewBase;
  }

  // Maps from a page index to all blocks in the page that have at some point been fetched into L1/L2
  fextl::map<uint64_t, fextl::robin_set<uint64_t>> CachedCodePages;

  uintptr_t PagePointer;
  uintptr_t PageMemory;
  uintptr_t L1Pointer;
  uintptr_t L1PointerMask;

  size_t TotalCacheSize;

  // Start with 8k entries in L1 to give 128KB of L1 cache to each thread.
  // Max out at 1 million entries to give each thread 16MB of L1 cache maximum.
  constexpr static size_t MIN_L1_ENTRIES = 8 * 1024;        // Must be a power of 2
  constexpr static size_t MAX_L1_ENTRIES = 1 * 1024 * 1024; // Must be a power of 2

  constexpr static size_t CODE_SIZE = 128 * 1024 * 1024;
  constexpr static size_t SIZE_PER_PAGE = FEXCore::Utils::FEX_PAGE_SIZE * sizeof(LookupCacheEntry);
  constexpr static size_t MAX_L1_SIZE = MAX_L1_ENTRIES * sizeof(LookupCacheEntry);

  size_t AllocateOffset {};

  FEXCore::Context::ContextImpl* ctx;
  uint64_t VirtualMemSize {};

  size_t CurrentL1Entries = MIN_L1_ENTRIES;
  uint64_t L2L3CacheHits {};
  std::chrono::time_point<std::chrono::system_clock> LastPeriod {};
  constexpr static std::chrono::seconds SamplePeriod {1};
  FEX_CONFIG_OPT(DynamicL1CacheIncreaseCountHeuristic, DYNAMICL1CACHEINCREASECOUNTHEURISTIC);
  FEX_CONFIG_OPT(DynamicL1CacheDecreaseCountHeuristic, DYNAMICL1CACHEDECREASECOUNTHEURISTIC);

  FEX_CONFIG_OPT(DynamicL1Cache, DYNAMICL1CACHE);
  FEX_CONFIG_OPT(DisableL2Cache, DISABLEL2CACHE);
};
} // namespace FEXCore


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 AVX instructions to 128-bit IR
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"

#include <FEXCore/Utils/LogManager.h>
#include "Interface/Core/OpcodeDispatcher.h"

#include <array>
#include <cstdint>
#include <tuple>
#include <utility>

namespace FEXCore::IR {
class OrderedNode;

#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_LoadSource_WithOpSize(
  const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags, bool NeedsHigh, MemoryAccessType AccessType) {

  if (Operand.IsGPR()) {
    const auto gpr = Operand.Data.GPR.GPR;
    LOGMAN_THROW_A_FMT(gpr >= FEXCore::X86State::REG_XMM_0 && gpr <= FEXCore::X86State::REG_XMM_15, "must be AVX reg");
    const auto gprIndex = gpr - X86State::REG_XMM_0;
    return {
      .Low = AVX128_LoadXMMRegister(gprIndex, false),
      .High = NeedsHigh ? AVX128_LoadXMMRegister(gprIndex, true) : nullptr,
    };
  } else {
    LOGMAN_THROW_A_FMT(IsOperandMem(Operand, true), "only memory sources");

    if (Operand.IsSIB()) {
      const bool IsVSIB = (Op->Flags & X86Tables::DecodeFlags::FLAG_VSIB_BYTE) != 0;
      LOGMAN_THROW_A_FMT(!IsVSIB, "VSIB uses LoadVSIB instead");
    }

    const AddressMode A = DecodeAddress(Op, Operand, AccessType, true /* IsLoad */);
    if (NeedsHigh) {
      return _LoadMemPairFPRAutoTSO(OpSize::i128Bit, A, OpSize::i8Bit);
    } else {
      return {.Low = _LoadMemFPRAutoTSO(OpSize::i128Bit, A, OpSize::i8Bit)};
    }
  }
}

OpDispatchBuilder::RefVSIB
OpDispatchBuilder::AVX128_LoadVSIB(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags, bool NeedsHigh) {
  const bool IsVSIB = (Op->Flags & X86Tables::DecodeFlags::FLAG_VSIB_BYTE) != 0;
  LOGMAN_THROW_A_FMT((Operand.IsSIB() || Operand.IsSIBRelocation()) && IsVSIB, "Trying to load VSIB for something that isn't the correct "
                                                                               "type!");

  // VSIB is a very special case which has a ton of encoded data.
  // Get it in a format we can reason about.

  const auto Index_gpr = Operand.Data.SIB.Index;
  const auto Base_gpr = Operand.Data.SIB.Base;
  LOGMAN_THROW_A_FMT(Index_gpr >= FEXCore::X86State::REG_XMM_0 && Index_gpr <= FEXCore::X86State::REG_XMM_15, "must be AVX reg");
  LOGMAN_THROW_A_FMT(Base_gpr == FEXCore::X86State::REG_INVALID || (Base_gpr >= FEXCore::X86State::REG_RAX && Base_gpr <= FEXCore::X86State::REG_R15),
                     "Base must be a GPR.");
  const auto Index_XMM_gpr = Index_gpr - X86State::REG_XMM_0;

  OpDispatchBuilder::RefVSIB A {
    .Low = AVX128_LoadXMMRegister(Index_XMM_gpr, false),
    .High = NeedsHigh ? AVX128_LoadXMMRegister(Index_XMM_gpr, true) : Invalid(),
    .BaseAddr = Base_gpr != FEXCore::X86State::REG_INVALID ? LoadGPRRegister(Base_gpr, OpSize::i64Bit, 0, false) : nullptr,
    .Scale = Operand.Data.SIB.Scale,
  };

  if (Operand.IsSIBRelocation()) {
    auto EPOffset = _EntrypointOffset(OpSize::i64Bit, Operand.Data.SIB.Offset);
    if (A.BaseAddr) {
      A.BaseAddr = Add(OpSize::i64Bit, EPOffset, A.BaseAddr);
    } else {
      A.BaseAddr = EPOffset;
    }
  } else {
    A.Displacement = static_cast<int32_t>(Operand.Data.SIB.Offset);
  }

  return A;
}

void OpDispatchBuilder::AVX128_StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, const FEXCore::X86Tables::DecodedOperand& Operand,
                                                      const RefPair Src, MemoryAccessType AccessType) {
  if (Operand.IsGPR()) {
    const auto gpr = Operand.Data.GPR.GPR;
    LOGMAN_THROW_A_FMT(gpr >= FEXCore::X86State::REG_XMM_0 && gpr <= FEXCore::X86State::REG_XMM_15, "expected AVX register");
    const auto gprIndex = gpr - X86State::REG_XMM_0;

    if (Src.Low) {
      AVX128_StoreXMMRegister(gprIndex, Src.Low, false);
    }

    if (Src.High) {
      AVX128_StoreXMMRegister(gprIndex, Src.High, true);
    }
  } else {
    AddressMode A = DecodeAddress(Op, Operand, AccessType, false /* IsLoad */);

    if (Src.High) {
      _StoreMemPairFPRAutoTSO(OpSize::i128Bit, A, Src.Low, Src.High, OpSize::i8Bit);
    } else {
      _StoreMemFPRAutoTSO(OpSize::i128Bit, A, Src.Low, OpSize::i8Bit);
    }
  }
}

Ref OpDispatchBuilder::AVX128_LoadXMMRegister(uint32_t XMM, bool High) {
  if (High) {
    return LoadContext(AVXHigh0Index + XMM);
  } else {
    return LoadXMMRegister(XMM);
  }
}

void OpDispatchBuilder::AVX128_StoreXMMRegister(uint32_t XMM, const Ref Src, bool High) {
  if (High) {
    StoreContext(AVXHigh0Index + XMM, Src);
  } else {
    StoreXMMRegister(XMM, Src);
  }
}

void OpDispatchBuilder::AVX128_VMOVAPS(OpcodeArgs) {
  // Reg <- Mem or Reg <- Reg
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  if (Is128Bit) {
    // Zero upper 128-bits
    auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

    ///< Zero upper bits when destination is GPR.
    if (Op->Dest.IsGPR()) {
      Src.High = LoadZeroVector(OpSize::i128Bit);
    }
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
  } else {
    // Copy or memory load
    auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
  }
}

void OpDispatchBuilder::AVX128_VMOVScalarImpl(OpcodeArgs, IR::OpSize ElementSize) {
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Src[1].IsGPR()) {
    // VMOVSS/SD xmm1, xmm2, xmm3
    // Lower 128-bits are merged
    // Upper 128-bits are zero'd
    auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
    auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
    Ref Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Src1.Low, Src2.Low);
    auto High = LoadZeroVector(OpSize::i128Bit);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result, .High = High});
  } else if (Op->Dest.IsGPR()) {
    // VMOVSS/SD xmm1, mem32/mem64
    Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[1], ElementSize, Op->Flags);
    auto High = LoadZeroVector(OpSize::i128Bit);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Src, .High = High});
  } else {
    // VMOVSS/SD mem32/mem64, xmm1
    auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src.Low, ElementSize);
  }
}

void OpDispatchBuilder::AVX128_VMOVSD(OpcodeArgs) {
  AVX128_VMOVScalarImpl(Op, OpSize::i64Bit);
}

void OpDispatchBuilder::AVX128_VMOVSS(OpcodeArgs) {
  AVX128_VMOVScalarImpl(Op, OpSize::i32Bit);
}

void OpDispatchBuilder::AVX128_VectorALU(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
  DeriveOp(Result_Low, IROp, _VAdd(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low));

  if (Is128Bit) {
    auto High = LoadZeroVector(OpSize::i128Bit);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = High});
  } else {
    DeriveOp(Result_High, IROp, _VAdd(OpSize::i128Bit, ElementSize, Src1.High, Src2.High));
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = Result_High});
  }
}

void OpDispatchBuilder::AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  DeriveOp(Result_Low, IROp, _VFSqrt(OpSize::i128Bit, ElementSize, Src.Low));

  if (Is128Bit) {
    auto High = LoadZeroVector(OpSize::i128Bit);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = High});
  } else {
    DeriveOp(Result_High, IROp, _VFSqrt(OpSize::i128Bit, ElementSize, Src.High));
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = Result_High});
  }
}

void OpDispatchBuilder::AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                                               std::function<Ref(IR::OpSize ElementSize, Ref Src)> Helper) {
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  RefPair Result {};
  Result.Low = Helper(ElementSize, Src.Low);

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    Result.High = Helper(ElementSize, Src.High);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VectorBinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                                                std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2)> Helper) {
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
  RefPair Result {};
  Result.Low = Helper(ElementSize, Src1.Low, Src2.Low);

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    Result.High = Helper(ElementSize, Src1.High, Src2.High);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VectorTrinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, Ref Src3,
                                                 std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper) {
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);
  RefPair Result {};
  Result.Low = Helper(ElementSize, Src1.Low, Src2.Low, Src3);

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    Result.High = Helper(ElementSize, Src1.High, Src2.High, Src3);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VectorShiftWideImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp) {
  const auto Is128Bit = GetSrcSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

  // Incoming element size for the shift source is always 8-bytes in the lower register.
  DeriveOp(Low, IROp, _VUShrSWide(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low));

  RefPair Result {};
  Result.Low = Low;

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    DeriveOp(High, IROp, _VUShrSWide(OpSize::i128Bit, ElementSize, Src1.High, Src2.Low));
    Result.High = High;
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VectorShiftImmImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
  const uint64_t ShiftConstant = Op->Src[1].Literal();

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  RefPair Result {};

  if (ShiftConstant == 0) [[unlikely]] {
    Result = Src;
  } else {
    DeriveOp(Low, IROp, _VUShrI(OpSize::i128Bit, ElementSize, Src.Low, ShiftConstant));
    Result.Low = Low;

    if (!Is128Bit) {
      DeriveOp(High, IROp, _VUShrI(OpSize::i128Bit, ElementSize, Src.High, ShiftConstant));
      Result.High = High;
    }
  }

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VectorXOR(OpcodeArgs) {
  // Special case for vector xor with itself being the optimal way for x86 to zero vector registers.
  if (Op->Src[0].IsGPR() && Op->Src[1].IsGPR() && Op->Src[0].Data.GPR.GPR == Op->Src[1].Data.GPR.GPR) {
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(LoadZeroVector(OpSize::i128Bit)));
    return;
  }

  ///< Regular code path
  AVX128_VectorALU(Op, OP_VXOR, OpSize::i128Bit);
}

void OpDispatchBuilder::AVX128_VZERO(OpcodeArgs) {
  const auto DstSize = GetDstSize(Op);
  const auto IsVZEROALL = DstSize == Core::CPUState::XMM_AVX_REG_SIZE;
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  if (IsVZEROALL) {
    // NOTE: Despite the name being VZEROALL, this will still only ever
    //       zero out up to the first 16 registers (even on AVX-512, where we have 32 registers)
    Ref ZeroVector {};

    for (uint32_t i = 0; i < NumRegs; i++) {
      // Explicitly not caching named vector zero. This ensures that every register gets movi #0.0 directly.
      ZeroVector = LoadUncachedZeroVector(OpSize::i128Bit);
      AVX128_StoreXMMRegister(i, ZeroVector, false);
    }

    InvalidateHighAVXRegisters();
    _ContextClear(offsetof(FEXCore::Core::CPUState, avx_high), sizeof(FEXCore::Core::CPUState::avx_high[0]) * NumRegs);
  } else {
    // Likewise, VZEROUPPER will only ever zero only up to the first 16 registers
    InvalidateHighAVXRegisters();
    _ContextClear(offsetof(FEXCore::Core::CPUState, avx_high), sizeof(FEXCore::Core::CPUState::avx_high[0]) * NumRegs);
  }
}

void OpDispatchBuilder::AVX128_MOVVectorNT(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  if (Op->Dest.IsGPR()) {
    ///< MOVNTDQA load non-temporal comes from SSE4.1 and is extended by AVX/AVX2.
    RefPair Src {};
    Ref SrcAddr = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.LoadData = false});
    Src.Low = _VLoadNonTemporal(OpSize::i128Bit, SrcAddr, 0);

    if (Is128Bit) {
      Src.High = LoadZeroVector(OpSize::i128Bit);
    } else {
      Src.High = _VLoadNonTemporal(OpSize::i128Bit, SrcAddr, 16);
    }
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
  } else {
    auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit, MemoryAccessType::STREAM);
    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});

    if (Is128Bit) {
      // Single store non-temporal for 128-bit operations.
      _VStoreNonTemporal(OpSize::i128Bit, Src.Low, Dest, 0);
    } else {
      // For a 256-bit store, use a non-temporal store pair
      _VStoreNonTemporalPair(OpSize::i128Bit, Src.Low, Src.High, Dest, 0);
    }
  }
}

void OpDispatchBuilder::AVX128_MOVQ(OpcodeArgs) {
  RefPair Src {};
  if (Op->Src[0].IsGPR()) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  } else {
    Src.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], OpSize::i64Bit, Op->Flags);
  }

  // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 256bit
  if (Op->Dest.IsGPR()) {
    // Zero bits [127:64] as well.
    Src.Low = VZeroExtendOperand(OpSize::i64Bit, Op->Src[0], Src.Low);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
    Src.High = ZeroVector;
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
  } else {
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src.Low, OpSize::i64Bit, OpSize::i64Bit);
  }
}

void OpDispatchBuilder::AVX128_VMOVLP(OpcodeArgs) {
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

  if (!Op->Dest.IsGPR()) {
    ///< VMOVLPS/PD mem64, xmm1
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src1.Low, OpSize::i64Bit, OpSize::i64Bit);
  } else if (!Op->Src[1].IsGPR()) {
    ///< VMOVLPS/PD xmm1, xmm2, mem64
    // Bits[63:0] come from Src2[63:0]
    // Bits[127:64] come from Src1[127:64]
    auto Src2 = MakeSegmentAddress(Op, Op->Src[1]);
    Ref Result_Low = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 0, Src2);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
  } else {
    ///< VMOVHLPS/PD xmm1, xmm2, xmm3
    auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

    Ref Result_Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1.Low, Src2.Low);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
  }
}

void OpDispatchBuilder::AVX128_VMOVHP(OpcodeArgs) {
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

  if (!Op->Dest.IsGPR()) {
    ///< VMOVHPS/PD mem64, xmm1
    // Need to store Bits[127:64]. Use a vector element store.
    auto Dest = MakeSegmentAddress(Op, Op->Dest);
    _VStoreVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 1, Dest);
  } else if (!Op->Src[1].IsGPR()) {
    ///< VMOVHPS/PD xmm2, xmm1, mem64
    auto Src2 = MakeSegmentAddress(Op, Op->Src[1]);

    // Bits[63:0] come from Src1[63:0]
    // Bits[127:64] come from Src2[63:0]
    Ref Result_Low = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, 1, Src2);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
  } else {
    // VMOVLHPS xmm1, xmm2, xmm3
    auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

    Ref Result_Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Src1.Low, Src2.Low);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
  }
}

void OpDispatchBuilder::AVX128_VMOVDDUP(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  const auto IsSrcGPR = Op->Src[0].IsGPR();

  RefPair Src {};
  if (IsSrcGPR) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  } else {
    // Accesses from memory are a little weird.
    // 128-bit operation only loads 8-bytes.
    // 256-bit operation loads a full 32-bytes.
    if (Is128Bit) {
      Src.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], OpSize::i64Bit, Op->Flags);
    } else {
      Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
    }
  }

  if (Is128Bit) {
    // Duplicate Src[63:0] in to low 128-bits
    auto Result_Low = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 0);
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);

    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = ZeroVector});
  } else {
    // Duplicate Src.Low[63:0] in to low 128-bits
    auto Result_Low = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 0);
    // Duplicate Src.High[63:0] in to high 128-bits
    auto Result_High = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.High, 0);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = Result_High});
  }
}

void OpDispatchBuilder::AVX128_VMOVSLDUP(OpcodeArgs) {
  AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i32Bit,
                         [this](IR::OpSize ElementSize, Ref Src) { return _VTrn(OpSize::i128Bit, ElementSize, Src, Src); });
}

void OpDispatchBuilder::AVX128_VMOVSHDUP(OpcodeArgs) {
  AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i32Bit,
                         [this](IR::OpSize ElementSize, Ref Src) { return _VTrn2(OpSize::i128Bit, ElementSize, Src, Src); });
}

void OpDispatchBuilder::AVX128_VBROADCAST(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
  RefPair Src {};

  if (Op->Src[0].IsGPR()) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
    if (ElementSize != OpSize::i128Bit) {
      // Only duplicate if not VBROADCASTF128.
      Src.Low = _VDupElement(OpSize::i128Bit, ElementSize, Src.Low, 0);
    }
  } else {
    // Get the address to broadcast from into a GPR.
    Ref Address = MakeSegmentAddress(Op, Op->Src[0], GetGPROpSize());
    Src.Low = _VBroadcastFromMem(OpSize::i128Bit, ElementSize, Address);
  }

  if (Is128Bit) {
    Src.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    Src.High = Src.Low;
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
}

void OpDispatchBuilder::AVX128_VPUNPCKL(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip(OpSize::i128Bit, _ElementSize, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VPUNPCKH(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VZip2(OpSize::i128Bit, _ElementSize, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_MOVVectorUnaligned(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  if (!Is128Bit && Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    // Nop
    return;
  }

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  if (Is128Bit) {
    Src.High = LoadZeroVector(OpSize::i128Bit);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Src);
}

void OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR(OpcodeArgs, IR::OpSize DstElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

  RefPair Result {};

  if (Op->Src[1].IsGPR()) {
    // If the source is a GPR then convert directly from the GPR.
    auto Src2 = LoadSourceGPR_WithOpSize(Op, Op->Src[1], GetGPROpSize(), Op->Flags);
    Result.Low = _VSToFGPRInsert(OpSize::i128Bit, DstElementSize, SrcSize, Src1.Low, Src2, false);
  } else if (SrcSize != DstElementSize) {
    // If the source is from memory but the Source size and destination size aren't the same,
    // then it is more optimal to load in to a GPR and convert between GPR->FPR.
    // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't.
    auto Src2 = LoadSourceGPR(Op, Op->Src[1], Op->Flags);
    Result.Low = _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1.Low, Src2, false);
  } else {
    // In the case of cvtsi2s{s,d} where the source and destination are the same size,
    // then it is more optimal to load in to the FPR register directly and convert there.
    auto Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
    // Always signed
    Result.Low = _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1.Low, Src2, false, false);
  }

  const auto Is128Bit = DstSize == OpSize::i128Bit;
  LOGMAN_THROW_A_FMT(Is128Bit, "Programming Error: This should never occur!");
  Result.High = LoadZeroVector(OpSize::i128Bit);

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_CVTFPR_To_GPR(OpcodeArgs, IR::OpSize SrcElementSize, bool HostRoundingMode) {
  // If loading a vector, use the full size, so we don't
  // unnecessarily zero extend the vector. Otherwise, if
  // memory, then we want to load the element size exactly.
  RefPair Src {};
  if (Op->Src[0].IsGPR()) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  } else {
    Src.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcElementSize, Op->Flags);
  }

  Ref Result = CVTFPR_To_GPRImpl(Op, Src.Low, SrcElementSize, HostRoundingMode);
  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::AVX128_VANDN(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return _VAndn(OpSize::i128Bit, _ElementSize, Src2, Src1); });
}

void OpDispatchBuilder::AVX128_VPACKSS(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return _VSQXTNPair(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
}

void OpDispatchBuilder::AVX128_VPACKUS(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return _VSQXTUNPair(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
}

Ref OpDispatchBuilder::AVX128_PSIGNImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  Ref Control = _VSQSHL(OpSize::i128Bit, ElementSize, Src2, IR::OpSizeAsBits(ElementSize) - 1);
  Control = _VSRSHR(OpSize::i128Bit, ElementSize, Control, IR::OpSizeAsBits(ElementSize) - 1);
  return _VMul(OpSize::i128Bit, ElementSize, Src1, Control);
}

void OpDispatchBuilder::AVX128_VPSIGN(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return AVX128_PSIGNImpl(_ElementSize, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_UCOMISx(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : ElementSize;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, false);

  RefPair Src2 {};

  // Careful here, if the source is from a GPR then we want to load the full 128-bit lower half.
  // If it is memory then we only want to load the element size.
  if (Op->Src[0].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  } else {
    Src2.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  }

  Comiss(ElementSize, Src1.Low, Src2.Low);
}

void OpDispatchBuilder::AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, IR::OpSize ElementSize) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  RefPair Src2 {};
  if (Op->Src[1].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
  } else {
    Src2.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags);
  }

  // If OpSize == ElementSize then it only does the lower scalar op
  DeriveOp(Result_Low, IROp, _VFAddScalarInsert(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, false));
  auto High = LoadZeroVector(OpSize::i128Bit);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, RefPair {.Low = Result_Low, .High = High});
}

void OpDispatchBuilder::AVX128_VFCMP(OpcodeArgs, IR::OpSize ElementSize) {
  const uint8_t CompType = Op->Src[2].Literal();

  struct {
    FEXCore::X86Tables::DecodedOp Op;
    uint32_t CompType {};
  } Capture {
    .Op = Op,
    .CompType = CompType & 0b11111u,
  };

  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this, &Capture](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return VFCMPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2, Capture.CompType);
  });
}

void OpDispatchBuilder::AVX128_InsertScalarFCMP(OpcodeArgs, IR::OpSize ElementSize) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  RefPair Src2 {};

  if (Op->Src[1].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
  } else {
    Src2.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags);
  }

  const uint8_t CompType = Op->Src[2].Literal();

  RefPair Result {};
  Result.Low = InsertScalarFCMPOpImpl(OpSize::i128Bit, OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, CompType & 0b11111, false);
  Result.High = LoadZeroVector(OpSize::i128Bit);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR(OpcodeArgs) {
  if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) {
    ///< XMM <- Reg/Mem

    RefPair Result {};
    if (Op->Src[0].IsGPR()) {
      // Loading from GPR and moving to Vector.
      Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], GetGPROpSize(), Op->Flags);
      // zext to 128bit
      Result.Low = _VCastFromGPR(OpSize::i128Bit, OpSizeFromSrc(Op), Src);
    } else {
      // Loading from Memory as a scalar. Zero extend
      Result.Low = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }

    Result.High = LoadZeroVector(OpSize::i128Bit);
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
  } else {
    ///< Reg/Mem <- XMM
    auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);

    if (Op->Dest.IsGPR()) {
      auto ElementSize = OpSizeFromDst(Op);
      // Extract element from GPR. Zero extending in the process.
      Src.Low = _VExtractToGPR(OpSizeFromSrc(Op), ElementSize, Src.Low, 0);
      StoreResultGPR(Op, Op->Dest, Src.Low);
    } else {
      // Storing first element to memory.
      Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});
      _StoreMemFPR(OpSizeFromDst(Op), Dest, Src.Low, OpSize::i8Bit);
    }
  }
}

void OpDispatchBuilder::AVX128_PExtr(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  uint64_t Index = Op->Src[1].Literal();

  // Fixup of 32-bit element size.
  // When the element size is 32-bit then it can be overriden as 64-bit because the encoding of PEXTRD/PEXTRQ
  // is the same except that REX.W or VEX.W is set to 1. Incredibly frustrating.
  // Use the destination size as the element size in this case.
  auto OverridenElementSize = ElementSize;
  if (ElementSize == OpSize::i32Bit) {
    OverridenElementSize = DstSize;
  }

  // AVX version only operates on 128-bit.
  const uint8_t NumElements = IR::NumElements(std::min(OpSizeFromSrc(Op), OpSize::i128Bit), OverridenElementSize);
  Index &= NumElements - 1;

  if (Op->Dest.IsGPR()) {
    const auto GPRSize = GetGPROpSize();
    // Extract already zero extends the result.
    Ref Result = _VExtractToGPR(OpSize::i128Bit, OverridenElementSize, Src.Low, Index);
    StoreResultGPR_WithOpSize(Op, Op->Dest, Result, GPRSize);
    return;
  }

  // If we are storing to memory then we store the size of the element extracted
  Ref Dest = MakeSegmentAddress(Op, Op->Dest);
  _VStoreVectorElement(OpSize::i128Bit, OverridenElementSize, Src.Low, Index, Dest);
}

void OpDispatchBuilder::AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed) {
  const auto DstSize = OpSizeFromDst(Op);

  const auto GetSrc = [&] {
    if (Op->Src[0].IsGPR()) {
      return AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false).Low;
    } else {
      // For memory operands the 256-bit variant loads twice the size specified in the table.
      const auto Is256Bit = DstSize == OpSize::i256Bit;
      const auto SrcSize = OpSizeFromSrc(Op);
      const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize;

      return LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);
    }
  };

  auto Transform = [=, this](Ref Src) {
    for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize = CurrentElementSize << 1) {
      if (Signed) {
        Src = _VSXTL(OpSize::i128Bit, CurrentElementSize, Src);
      } else {
        Src = _VUXTL(OpSize::i128Bit, CurrentElementSize, Src);
      }
    }
    return Src;
  };

  Ref Src = GetSrc();
  RefPair Result {};

  if (DstSize == OpSize::i128Bit) {
    // 128-bit operation is easy, it stays within the single register.
    Result.Low = Transform(Src);
  } else {
    // 256-bit operation is a bit special. It splits the incoming source between lower and upper registers.
    size_t TotalElementCount = IR::NumElements(OpSize::i256Bit, DstElementSize);
    size_t TotalElementsToSplitSize = (TotalElementCount / 2) * IR::OpSizeToSize(ElementSize);

    // Split the number of elements in half between lower and upper.
    Ref SrcHigh = _VDupElement(OpSize::i128Bit, IR::SizeToOpSize(TotalElementsToSplitSize), Src, 1);
    Result.Low = Transform(Src);
    Result.High = Transform(SrcHigh);
  }

  if (DstSize == OpSize::i128Bit) {
    // Regular zero-extending semantics.
    Result.High = LoadZeroVector(OpSize::i128Bit);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_MOVMSK(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  auto Mask8Byte = [this](Ref Src) {
    // UnZip2 the 64-bit elements as 32-bit to get the sign bits closer.
    // Sign bits are now in bit positions 31 and 63 after this.
    Src = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);

    // Extract the low 64-bits to GPR in one move.
    Ref GPR = _VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, Src, 0);
    // BFI the sign bit in 31 in to 62.
    // Inserting the full lower 32-bits offset 31 so the sign bit ends up at offset 63.
    GPR = _Bfi(OpSize::i64Bit, 32, 31, GPR, GPR);
    // Shift right to only get the two sign bits we care about.
    return _Lshr(OpSize::i64Bit, GPR, Constant(62));
  };

  auto Mask4Byte = [this](Ref Src) {
    // Shift all the sign bits to the bottom of their respective elements.
    Src = _VUShrI(OpSize::i128Bit, OpSize::i32Bit, Src, 31);
    // Load the specific 128-bit movmskps shift elements operator.
    auto ConstantUSHL = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, NAMED_VECTOR_MOVMSKPS_SHIFT);
    // Shift the sign bits in to specific locations.
    Src = _VUShl(OpSize::i128Bit, OpSize::i32Bit, Src, ConstantUSHL, false);
    // Add across the vector so the sign bits will end up in bits [3:0]
    Src = _VAddV(OpSize::i128Bit, OpSize::i32Bit, Src);
    // Extract to a GPR.
    return _VExtractToGPR(OpSize::i128Bit, OpSize::i32Bit, Src, 0);
  };

  Ref GPR {};
  if (Is128Bit) {
    if (ElementSize == OpSize::i64Bit) {
      GPR = Mask8Byte(Src.Low);
    } else {
      GPR = Mask4Byte(Src.Low);
    }
  } else if (ElementSize == OpSize::i32Bit) {
    auto GPRLow = Mask4Byte(Src.Low);
    auto GPRHigh = Mask4Byte(Src.High);
    GPR = _Orlshl(OpSize::i64Bit, GPRLow, GPRHigh, 4);
  } else {
    auto GPRLow = Mask8Byte(Src.Low);
    auto GPRHigh = Mask8Byte(Src.High);
    GPR = _Orlshl(OpSize::i64Bit, GPRLow, GPRHigh, 2);
  }
  StoreResultGPR_WithOpSize(Op, Op->Dest, GPR, GetGPROpSize());
}

void OpDispatchBuilder::AVX128_MOVMSKB(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  Ref VMask = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, NAMED_VECTOR_MOVMASKB);

  auto Mask1Byte = [this](Ref Src, Ref VMask) {
    auto VCMP = _VCMPLTZ(OpSize::i128Bit, OpSize::i8Bit, Src);
    auto VAnd = _VAnd(OpSize::i128Bit, OpSize::i8Bit, VCMP, VMask);

    auto VAdd1 = _VAddP(OpSize::i128Bit, OpSize::i8Bit, VAnd, VAnd);
    auto VAdd2 = _VAddP(OpSize::i128Bit, OpSize::i8Bit, VAdd1, VAdd1);
    auto VAdd3 = _VAddP(OpSize::i64Bit, OpSize::i8Bit, VAdd2, VAdd2);

    ///< 16-bits of data per 128-bit
    return _VExtractToGPR(OpSize::i128Bit, OpSize::i16Bit, VAdd3, 0);
  };

  Ref Result = Mask1Byte(Src.Low, VMask);

  if (!Is128Bit) {
    auto ResultHigh = Mask1Byte(Src.High, VMask);
    Result = _Orlshl(OpSize::i64Bit, Result, ResultHigh, 16);
  }

  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                                         const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) {
  const auto NumElements = IR::NumElements(OpSize::i128Bit, ElementSize);
  const uint64_t Index = Imm.Literal() & (NumElements - 1);
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Src1Op, Op->Flags, false);

  RefPair Result {};

  if (Src2Op.IsGPR()) {
    // If the source is a GPR then convert directly from the GPR.
    auto Src2 = LoadSourceGPR_WithOpSize(Op, Src2Op, GetGPROpSize(), Op->Flags);
    Result.Low = _VInsGPR(OpSize::i128Bit, ElementSize, Index, Src1.Low, Src2);
  } else {
    // If loading from memory then we only load the element size
    auto Src2 = MakeSegmentAddress(Op, Src2Op);
    Result.Low = _VLoadVectorElement(OpSize::i128Bit, ElementSize, Src1.Low, Index, Src2);
  }

  Result.High = LoadZeroVector(OpSize::i128Bit);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VPINSRB(OpcodeArgs) {
  AVX128_PINSRImpl(Op, OpSize::i8Bit, Op->Src[0], Op->Src[1], Op->Src[2]);
}

void OpDispatchBuilder::AVX128_VPINSRW(OpcodeArgs) {
  AVX128_PINSRImpl(Op, OpSize::i16Bit, Op->Src[0], Op->Src[1], Op->Src[2]);
}

void OpDispatchBuilder::AVX128_VPINSRDQ(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  AVX128_PINSRImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]);
}

void OpDispatchBuilder::AVX128_VariableShiftImpl(OpcodeArgs, IROps IROp) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSizeFromSrc(Op), [this, IROp](IR::OpSize ElementSize, Ref Src1, Ref Src2) {
    DeriveOp(Shift, IROp, _VUShr(OpSize::i128Bit, ElementSize, Src1, Src2, true));
    return Shift;
  });
}

void OpDispatchBuilder::AVX128_ShiftDoubleImm(OpcodeArgs, ShiftDirection Dir) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
  const bool Right = Dir == ShiftDirection::RIGHT;

  const uint64_t Shift = Op->Src[1].Literal();
  const uint64_t ExtrShift = Right ? Shift : IR::OpSizeToSize(OpSize::i128Bit) - Shift;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  RefPair Result {};
  if (Shift == 0) [[unlikely]] {
    Result = Src;
  } else if (Shift >= Core::CPUState::XMM_SSE_REG_SIZE) {
    Result.Low = LoadZeroVector(OpSize::i128Bit);
    Result.High = Result.Low;
  } else {
    Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
    RefPair Zero {ZeroVector, ZeroVector};
    RefPair Src1 = Right ? Zero : Src;
    RefPair Src2 = Right ? Src : Zero;

    Result.Low = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1.Low, Src2.Low, ExtrShift);
    if (!Is128Bit) {
      Result.High = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1.High, Src2.High, ExtrShift);
    }
  }

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VINSERT(OpcodeArgs) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;
  const auto Selector = Op->Src[2].Literal() & 1;

  auto Result = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);

  if (Selector == 0) {
    // Insert in to Low bits
    Result.Low = Src2.Low;
  } else {
    // Insert in to the High bits
    Result.High = Src2.Low;
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VINSERTPS(OpcodeArgs) {
  Ref Result = InsertPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result));
}

void OpDispatchBuilder::AVX128_VPHSUB(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return PHSUBOpImpl(OpSize::i128Bit, Src1, Src2, _ElementSize);
  });
}

void OpDispatchBuilder::AVX128_VPHSUBSW(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHSUBSOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VADDSUBP(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) {
    return ADDSUBPOpImpl(OpSize::i128Bit, _ElementSize, Src1, Src2);
  });
}

void OpDispatchBuilder::AVX128_VPMULL(OpcodeArgs, IR::OpSize ElementSize, bool Signed) {
  LOGMAN_THROW_A_FMT(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize, [&](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
    return PMULLOpImpl(OpSize::i128Bit, _ElementSize, Signed, Src1, Src2);
  });
}

void OpDispatchBuilder::AVX128_VPMULHRSW(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref { return PMULHRSWOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VPMULHW(OpcodeArgs, bool Signed) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit, [&](IR::OpSize _ElementSize, Ref Src1, Ref Src2) -> Ref {
    if (Signed) {
      return _VSMulH(OpSize::i128Bit, _ElementSize, Src1, Src2);
    } else {
      return _VUMulH(OpSize::i128Bit, _ElementSize, Src1, Src2);
    }
  });
}

void OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize) {
  // Gotta be careful with this operation.
  // It inserts in to the lowest element, retaining the remainder of the lower 128-bits.
  // Then zero extends the top 128-bit.
  const auto SrcSize = Op->Src[1].IsGPR() ? OpSize::i128Bit : SrcElementSize;
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  Ref Result = _VFToFScalarInsert(OpSize::i128Bit, DstElementSize, SrcElementSize, Src1.Low, Src2, false);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result));
}

void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

  const auto IsFloatSrc = SrcElementSize == OpSize::i32Bit;
  auto Is128BitSrc = SrcSize == OpSize::i128Bit;
  auto Is128BitDst = DstSize == OpSize::i128Bit;

  ///< Decompose correctly.
  if (DstElementSize > SrcElementSize && !Is128BitDst) {
    Is128BitSrc = true;
  } else if (SrcElementSize > DstElementSize && !Is128BitSrc) {
    Is128BitDst = true;
  }

  const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2) : SrcSize;

  RefPair Src {};
  if (Op->Src[0].IsGPR() || LoadSize >= OpSize::i128Bit) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128BitSrc);
  } else {
    // Handle 64-bit memory source.
    // In the case of cvtps2pd xmm, m64.
    Src.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);
  }

  RefPair Result {};

  auto TransformLow = [&](Ref Src) -> Ref {
    return _Vector_FToF(OpSize::i128Bit, DstElementSize, Src, SrcElementSize);
  };

  auto TransformHigh = [&](Ref Src) -> Ref {
    return _VFCVTL2(OpSize::i128Bit, SrcElementSize, Src);
  };

  Result.Low = TransformLow(Src.Low);
  if (Is128BitSrc) {
    if (Is128BitDst) {
      // cvtps2pd xmm, xmm or cvtpd2ps xmm, xmm
      // Done here
    } else {
      LOGMAN_THROW_A_FMT(DstElementSize > SrcElementSize, "cvtpd2ps ymm, xmm doesn't exist");

      // cvtps2pd ymm, xmm
      Result.High = TransformHigh(Src.Low);
    }
  } else {
    // 256-bit src
    LOGMAN_THROW_A_FMT(Is128BitDst, "Not real: cvt{ps2pd,pd2ps} ymm, ymm");
    LOGMAN_THROW_A_FMT(DstElementSize < SrcElementSize, "cvtps2pd xmm, ymm doesn't exist");

    // cvtpd2ps xmm, ymm
    Result.Low = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Result.Low, TransformLow(Src.High));
  }

  if (Is128BitDst) {
    Result = AVX128_Zext(Result.Low);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int(OpcodeArgs, IR::OpSize SrcElementSize, bool HostRoundingMode) {
  const auto SrcSize = GetSrcSize(Op);

  const auto Is128BitSrc = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  // VCVTPD2DQ/VCVTTPD2DQ only use the bottom lane, even for the 256-bit version.
  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128BitSrc);
  RefPair Result {};

  Result.Low = Vector_CVT_Float_To_Int32Impl(Op, OpSize::i128Bit, Src.Low, OpSize::i128Bit, SrcElementSize, HostRoundingMode, Is128BitSrc);
  if (Is128BitSrc) {
    // Zero the upper 128-bit lane of the result.
    Result = AVX128_Zext(Result.Low);
  } else {
    Result.High = Vector_CVT_Float_To_Int32Impl(Op, OpSize::i128Bit, Src.High, OpSize::i128Bit, SrcElementSize, HostRoundingMode, false);
    // Also convert the upper 128-bit lane
    if (SrcElementSize == OpSize::i64Bit) {
      // Zip the two halves together in to the lower 128-bits
      Result.Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Result.Low, Result.High);

      // Zero the upper 128-bit lane of the result.
      Result = AVX128_Zext(Result.Low);
    }
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen) {
  const auto Size = OpSizeFromDst(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;

  RefPair Src = [&] {
    if (Widen && !Op->Src[0].IsGPR()) {
      // If loading a vector, use the full size, so we don't
      // unnecessarily zero extend the vector. Otherwise, if
      // memory, then we want to load the element size exactly.
      const auto LoadSize = IR::SizeToOpSize(8 * (IR::OpSizeToSize(Size) / 16));
      return RefPair {.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags)};
    } else {
      return AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
    }
  }();

  auto Convert = [&](Ref Src, IROps Op) -> Ref {
    auto ElementSize = SrcElementSize;
    if (Widen) {
      DeriveOp(Extended, Op, _VSXTL(OpSize::i128Bit, ElementSize, Src));
      Src = Extended;
      ElementSize = ElementSize << 1;
    }

    return _Vector_SToF(OpSize::i128Bit, ElementSize, Src);
  };

  RefPair Result {};
  Result.Low = Convert(Src.Low, IROps::OP_VSXTL);

  if (Is128Bit) {
    Result = AVX128_Zext(Result.Low);
  } else {
    if (Widen) {
      Result.High = Convert(Src.Low, IROps::OP_VSXTL2);
    } else {
      Result.High = Convert(Src.High, IROps::OP_VSXTL);
    }
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VEXTRACT128(OpcodeArgs) {
  const auto DstIsXMM = Op->Dest.IsGPR();
  const auto Selector = Op->Src[1].Literal() & 0b1;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);

  RefPair Result {};
  if (Selector == 0) {
    Result.Low = Src.Low;
  } else {
    Result.Low = Src.High;
  }

  if (DstIsXMM) {
    // Only zero the upper-half when destination is XMM, otherwise this is a memory store.
    Result = AVX128_Zext(Result.Low);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VAESImc(OpcodeArgs) {
  ///< 128-bit only.
  AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this](IR::OpSize, Ref Src) { return _VAESImc(Src); });
}

void OpDispatchBuilder::AVX128_VAESEnc(OpcodeArgs) {
  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESEnc(OpSize::i128Bit, Src1, Src2, Src3); });
}

void OpDispatchBuilder::AVX128_VAESEncLast(OpcodeArgs) {
  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESEncLast(OpSize::i128Bit, Src1, Src2, Src3); });
}

void OpDispatchBuilder::AVX128_VAESDec(OpcodeArgs) {
  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESDec(OpSize::i128Bit, Src1, Src2, Src3); });
}

void OpDispatchBuilder::AVX128_VAESDecLast(OpcodeArgs) {
  AVX128_VectorTrinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit, LoadZeroVector(OpSize::i128Bit),
                           [this](IR::OpSize, Ref Src1, Ref Src2, Ref Src3) { return _VAESDecLast(OpSize::i128Bit, Src1, Src2, Src3); });
}

void OpDispatchBuilder::AVX128_VAESKeyGenAssist(OpcodeArgs) {
  ///< 128-bit only.
  const uint64_t RCON = Op->Src[1].Literal();
  auto ZeroRegister = LoadZeroVector(OpSize::i128Bit);
  auto KeyGenSwizzle = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE);

  struct {
    Ref ZeroRegister;
    Ref KeyGenSwizzle;
    uint64_t RCON;
  } Capture {
    .ZeroRegister = ZeroRegister,
    .KeyGenSwizzle = KeyGenSwizzle,
    .RCON = RCON,
  };

  AVX128_VectorUnaryImpl(Op, OpSize::i128Bit, OpSize::i128Bit, [this, &Capture](IR::OpSize, Ref Src) {
    return _VAESKeyGenAssist(Src, Capture.KeyGenSwizzle, Capture.ZeroRegister, Capture.RCON);
  });
}

void OpDispatchBuilder::AVX128_VPCMPESTRI(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, true, false);

  ///< Does not zero anything.
}

void OpDispatchBuilder::AVX128_VPCMPESTRM(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, true, true);

  ///< Zero the upper 128-bits of hardcoded YMM0
  AVX128_StoreXMMRegister(0, LoadZeroVector(OpSize::i128Bit), true);
}

void OpDispatchBuilder::AVX128_VPCMPISTRI(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, false, false);

  ///< Does not zero anything.
}

void OpDispatchBuilder::AVX128_VPCMPISTRM(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, false, true);

  ///< Zero the upper 128-bits of hardcoded YMM0
  AVX128_StoreXMMRegister(0, LoadZeroVector(OpSize::i128Bit), true);
}

void OpDispatchBuilder::AVX128_PHMINPOSUW(OpcodeArgs) {
  Ref Result = PHMINPOSUWOpImpl(Op);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result));
}

void OpDispatchBuilder::AVX128_VectorRound(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  const auto Mode = Op->Src[1].Literal();

  AVX128_VectorUnaryImpl(Op, Size, ElementSize,
                         [this, Mode](IR::OpSize ElementSize, Ref Src) { return VectorRoundImpl(OpSize::i128Bit, ElementSize, Src, Mode); });
}

void OpDispatchBuilder::AVX128_InsertScalarRound(OpcodeArgs, IR::OpSize ElementSize) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false);
  RefPair Src2 {};
  if (Op->Src[1].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false);
  } else {
    Src2.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags);
  }

  // If OpSize == ElementSize then it only does the lower scalar op
  const auto SourceMode = TranslateRoundType(Op->Src[2].Literal());

  Ref Result = _VFToIScalarInsert(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, SourceMode, false);
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result));
}

void OpDispatchBuilder::AVX128_VDPP(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t Literal = Op->Src[2].Literal();

  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this, Literal](IR::OpSize ElementSize, Ref Src1, Ref Src2) {
    return DPPOpImpl(OpSize::i128Bit, Src1, Src2, Literal, ElementSize);
  });
}

void OpDispatchBuilder::AVX128_VPERMQ(OpcodeArgs) {
  ///< Only ever 256-bit.
  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
  const auto Selector = Op->Src[1].Literal();

  RefPair Result {};

  // Crack the operation in to two halves and implement per half
  uint8_t SelectorLow = Selector & 0b1111;
  uint8_t SelectorHigh = (Selector >> 4) & 0b1111;
  auto SelectLane = [this](uint8_t Selector, RefPair Src) -> Ref {
    LOGMAN_THROW_A_FMT(Selector < 16, "Selector too large!");

    switch (Selector) {
    case 0b00'00: return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 0);
    case 0b00'01: return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src.Low, Src.Low, 8);
    case 0b00'10: return _VZip(OpSize::i128Bit, OpSize::i64Bit, Src.High, Src.Low);
    case 0b00'11: return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src.Low, Src.High, 8);
    case 0b01'00: return Src.Low;
    case 0b01'01: return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.Low, 1);
    case 0b01'10: return _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src.High, Src.Low);
    case 0b01'11: return _VTrn2(OpSize::i128Bit, OpSize::i64Bit, Src.High, Src.Low);
    case 0b10'00: return _VZip(OpSize::i128Bit, OpSize::i64Bit, Src.Low, Src.High);
    case 0b10'01: return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src.High, Src.Low, 8);
    case 0b10'10: return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.High, 0);
    case 0b10'11: return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src.High, Src.High, 8);
    case 0b11'00: return _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src.Low, Src.High);
    case 0b11'01: return _VTrn2(OpSize::i128Bit, OpSize::i64Bit, Src.Low, Src.High);
    case 0b11'10: return Src.High;
    case 0b11'11: return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src.High, 1);
    default: FEX_UNREACHABLE;
    }
  };

  Result.Low = SelectLane(SelectorLow, Src);
  Result.High = SelectorLow == SelectorHigh ? Result.Low : SelectLane(SelectorHigh, Src);

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VPSHUFW(OpcodeArgs, bool Low) {
  auto Shuffle = Op->Src[1].Literal();

  struct DataPacking {
    OpDispatchBuilder* This;
    uint8_t Shuffle;
    bool Low;
  };

  DataPacking Pack {
    .This = this,
    .Shuffle = static_cast<uint8_t>(Shuffle),
    .Low = Low,
  };

  AVX128_VectorUnaryImpl(Op, OpSizeFromSrc(Op), OpSize::i16Bit, [Pack](IR::OpSize, Ref Src) {
    const auto IndexedVectorConstant = Pack.Low ? FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW :
                                                  FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW;

    return Pack.This->PShufWLane(OpSize::i128Bit, IndexedVectorConstant, Pack.Low, Src, Pack.Shuffle);
  });
}

void OpDispatchBuilder::AVX128_VSHUF(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
  auto Shuffle = Op->Src[2].Literal();

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  RefPair Result {};
  Result.Low = SHUFOpImpl(Op, OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, Shuffle);

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    const uint8_t ShiftAmount = ElementSize == OpSize::i32Bit ? 0 : 2;
    Result.High = SHUFOpImpl(Op, OpSize::i128Bit, ElementSize, Src1.High, Src2.High, Shuffle >> ShiftAmount);
  }
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VPERMILImm(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  const auto Selector = Op->Src[1].Literal() & 0xFF;
  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  RefPair Result = AVX128_Zext(LoadZeroVector(OpSize::i128Bit));

  if (ElementSize == OpSize::i64Bit) {
    auto DoSwizzle64 = [this](Ref Src, uint8_t Selector) -> Ref {
      switch (Selector) {
      case 0b00:
      case 0b11: return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src, Selector & 1);
      case 0b01: return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 8);
      case 0b10:
        // No swizzle
        return Src;
      default: FEX_UNREACHABLE;
      }
    };
    Result.Low = DoSwizzle64(Src.Low, Selector & 0b11);

    if (!Is128Bit) {
      Result.High = DoSwizzle64(Src.High, (Selector >> 2) & 0b11);
    }
  } else {
    Result.Low = Single128Bit4ByteVectorShuffle(Src.Low, Selector);

    if (!Is128Bit) {
      Result.High = Single128Bit4ByteVectorShuffle(Src.High, Selector);
    }
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VHADDP(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [&](IR::OpSize ElementSize, Ref Src1, Ref Src2) {
    DeriveOp(Res, IROp, _VFAddP(OpSize::i128Bit, ElementSize, Src1, Src2));
    return Res;
  });
}

void OpDispatchBuilder::AVX128_VPHADDSW(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i16Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PHADDSOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VPMADDUBSW(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDUBSWOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VPMADDWD(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i128Bit,
                          [this](IR::OpSize _ElementSize, Ref Src1, Ref Src2) { return PMADDWDOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VBLEND(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is128Bit = SrcSize == OpSize::i128Bit;
  const uint64_t Selector = Op->Src[2].Literal();

  ///< High Selector shift depends on element size:
  /// i16Bit: Reuses same bits, no shift
  /// i32Bit: Shift by 4
  /// i64Bit: Shift by 2
  const uint64_t SelectorShift = ElementSize == OpSize::i64Bit ? 2 : ElementSize == OpSize::i32Bit ? 4 : 0;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  RefPair Result {};
  Result.Low = VectorBlend(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low, Selector);

  if (Is128Bit) {
    Result = AVX128_Zext(Result.Low);
  } else {
    Result.High = VectorBlend(OpSize::i128Bit, ElementSize, Src1.High, Src2.High, (Selector >> SelectorShift));
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VHSUBP(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), ElementSize,
                          [&](IR::OpSize, Ref Src1, Ref Src2) { return HSUBPOpImpl(OpSize::i128Bit, ElementSize, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VPSHUFB(OpcodeArgs) {
  auto MaskVector = GeneratePSHUFBMask(OpSize::i128Bit);
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i8Bit, [this, MaskVector](IR::OpSize, Ref Src1, Ref Src2) {
    return PSHUFBOpImpl(OpSize::i128Bit, Src1, Src2, MaskVector);
  });
}

void OpDispatchBuilder::AVX128_VPSADBW(OpcodeArgs) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromDst(Op), OpSize::i8Bit,
                          [this](IR::OpSize, Ref Src1, Ref Src2) { return PSADBWOpImpl(OpSize::i128Bit, Src1, Src2); });
}

void OpDispatchBuilder::AVX128_VMPSADBW(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;
  const uint64_t Selector = Op->Src[2].Literal();

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  RefPair Result {};
  auto ZeroRegister = LoadZeroVector(OpSize::i128Bit);

  Result.Low = MPSADBWOpImpl(OpSize::i128Bit, Src1.Low, Src2.Low, Selector);

  if (Is128Bit) {
    Result.High = ZeroRegister;
  } else {
    Result.High = MPSADBWOpImpl(OpSize::i128Bit, Src1.High, Src2.High, Selector >> 3);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VPALIGNR(OpcodeArgs) {
  const auto Index = Op->Src[2].Literal();
  const auto Size = OpSizeFromDst(Op);
  const auto SanitizedDstSize = std::min(Size, OpSize::i128Bit);

  AVX128_VectorBinaryImpl(Op, Size, SanitizedDstSize, [this, Index](IR::OpSize SanitizedDstSize, Ref Src1, Ref Src2) -> Ref {
    if (Index >= (IR::OpSizeToSize(SanitizedDstSize) * 2)) {
      // If the immediate is greater than both vectors combined then it zeroes the vector
      return LoadZeroVector(OpSize::i128Bit);
    }

    if (Index == 0) {
      return Src2;
    }

    if (Index == 16) {
      return Src1;
    }

    auto SanitizedIndex = Index;
    if (Index > 16) {
      Src2 = Src1;
      Src1 = LoadZeroVector(OpSize::i128Bit);
      SanitizedIndex -= 16;
    }

    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src2, SanitizedIndex);
  });
}

void OpDispatchBuilder::AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstSize, bool IsStore,
                                            const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) {
  const auto Is128Bit = DstSize == OpSize::i128Bit;

  auto Mask = AVX128_LoadSource_WithOpSize(Op, MaskOp, Op->Flags, !Is128Bit);

  const auto MakeAddress = [this, Op](const X86Tables::DecodedOperand& Data) {
    return MakeSegmentAddress(Op, Data, GetGPROpSize());
  };

  if (IsStore) {
    auto Address = MakeAddress(Op->Dest);

    auto Data = AVX128_LoadSource_WithOpSize(Op, DataOp, Op->Flags, !Is128Bit);
    _VStoreVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Data.Low, Address, Invalid(), MemOffsetType::SXTX, 1);
    if (!Is128Bit) {
      _VStoreVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, Data.High, Address, _InlineConstant(16), MemOffsetType::SXTX, 1);
    }
  } else {
    auto Address = MakeAddress(DataOp);

    RefPair Result {};
    Result.Low = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.Low, Address, Invalid(), MemOffsetType::SXTX, 1);

    if (Is128Bit) {
      Result.High = LoadZeroVector(OpSize::i128Bit);
    } else {
      Result.High = _VLoadVectorMasked(OpSize::i128Bit, ElementSize, Mask.High, Address, _InlineConstant(16), MemOffsetType::SXTX, 1);
    }
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
  }
}

void OpDispatchBuilder::AVX128_VPMASKMOV(OpcodeArgs, bool IsStore) {
  AVX128_VMASKMOVImpl(Op, OpSizeFromSrc(Op), OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]);
}

void OpDispatchBuilder::AVX128_VMASKMOV(OpcodeArgs, IR::OpSize ElementSize, bool IsStore) {
  AVX128_VMASKMOVImpl(Op, ElementSize, OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]);
}

void OpDispatchBuilder::AVX128_MASKMOV(OpcodeArgs) {
  ///< This instruction only supports 128-bit.
  const auto Size = OpSizeFromSrc(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;

  auto MaskSrc = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  // Mask only cares about the top bit of each byte
  MaskSrc.Low = _VCMPLTZ(Size, OpSize::i8Bit, MaskSrc.Low);

  // Vector that will overwrite byte elements.
  auto VectorSrc = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);

  // RDI source (DS prefix by default)
  auto MemDest = MakeSegmentAddress(X86State::REG_RDI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);

  Ref XMMReg = _LoadMemFPR(Size, MemDest, OpSize::i8Bit);

  // If the Mask element high bit is set then overwrite the element with the source, else keep the memory variant
  XMMReg = _VBSL(Size, MaskSrc.Low, VectorSrc.Low, XMMReg);
  _StoreMemFPR(Size, MemDest, XMMReg, OpSize::i8Bit);
}

void OpDispatchBuilder::AVX128_VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;
  const auto Src3Selector = Op->Src[2].Literal();

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  uint8_t MaskRegister = (Src3Selector >> 4) & 0b1111;
  RefPair Mask {.Low = AVX128_LoadXMMRegister(MaskRegister, false)};

  if (!Is128Bit) {
    Mask.High = AVX128_LoadXMMRegister(MaskRegister, true);
  }

  auto Convert = [&](Ref Src1, Ref Src2, Ref Mask) {
    const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);
    Ref Shifted = _VSShrI(OpSize::i128Bit, ElementSize, Mask, ElementSizeBits - 1);
    return _VBSL(OpSize::i128Bit, Shifted, Src2, Src1);
  };

  RefPair Result {};
  Result.Low = Convert(Src1.Low, Src2.Low, Mask.Low);
  if (!Is128Bit) {
    Result.High = Convert(Src1.High, Src2.High, Mask.High);
  } else {
    Result = AVX128_Zext(Result.Low);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_SaveAVXState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    RefPair Pair = LoadContextPair(OpSize::i128Bit, AVXHigh0Index + i);
    _StoreMemPairFPR(OpSize::i128Bit, Pair.Low, Pair.High, MemBase, i * 16 + 576);
  }
}

void OpDispatchBuilder::AVX128_RestoreAVXState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    auto YMMHRegs = LoadMemPairFPR(OpSize::i128Bit, MemBase, i * 16 + 576);

    AVX128_StoreXMMRegister(i, YMMHRegs.Low, true);
    AVX128_StoreXMMRegister(i + 1, YMMHRegs.High, true);
  }
}

void OpDispatchBuilder::AVX128_DefaultAVXState() {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  auto ZeroRegister = LoadZeroVector(OpSize::i128Bit);
  for (uint32_t i = 0; i < NumRegs; i++) {
    AVX128_StoreXMMRegister(i, ZeroRegister, true);
  }
}

void OpDispatchBuilder::AVX128_VPERM2(OpcodeArgs) {
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, true);
  const auto Selector = Op->Src[2].Literal();

  RefPair Result = AVX128_Zext(LoadZeroVector(OpSize::i128Bit));
  Ref Elements[4] = {Src1.Low, Src1.High, Src2.Low, Src2.High};

  if ((Selector & 0b00001000) == 0) {
    Result.Low = Elements[Selector & 0b11];
  }

  if ((Selector & 0b10000000) == 0) {
    Result.High = Elements[(Selector >> 4) & 0b11];
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VTESTP(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = GetSrcSize(Op);
  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  // For 128-bit, we use the common path.
  if (Is128Bit) {
    VTESTOpImpl(OpSize::i128Bit, ElementSize, Src1.Low, Src2.Low);
    return;
  }

  // For 256-bit, we need to split up the operation. This is nontrivial.
  // Let's go the simple route here.
  Ref ZF, CFInv;

  const auto ElementSizeInBits = IR::OpSizeAsBits(ElementSize);

  {
    // Calculate ZF first.
    auto AndLow = _VAnd(OpSize::i128Bit, OpSize::i8Bit, Src2.Low, Src1.Low);
    auto AndHigh = _VAnd(OpSize::i128Bit, OpSize::i8Bit, Src2.High, Src1.High);

    auto ShiftLow = _VUShrI(OpSize::i128Bit, ElementSize, AndLow, ElementSizeInBits - 1);
    auto ShiftHigh = _VUShrI(OpSize::i128Bit, ElementSize, AndHigh, ElementSizeInBits - 1);
    // Only have the signs now, add it all
    auto AddResult = _VAdd(OpSize::i128Bit, ElementSize, ShiftHigh, ShiftLow);
    Ref AddWide {};
    if (ElementSize == OpSize::i32Bit) {
      AddWide = _VAddV(OpSize::i128Bit, ElementSize, AddResult);
    } else {
      AddWide = _VAddP(OpSize::i128Bit, ElementSize, AddResult, AddResult);
    }

    // ExtGPR will either be [0, 8] or [0, 16] If 0 then set Flag.
    ZF = _VExtractToGPR(OpSize::i128Bit, ElementSize, AddWide, 0);
  }

  {
    // Calculate CF Second
    auto AndLow = _VAndn(OpSize::i128Bit, OpSize::i8Bit, Src2.Low, Src1.Low);
    auto AndHigh = _VAndn(OpSize::i128Bit, OpSize::i8Bit, Src2.High, Src1.High);

    auto ShiftLow = _VUShrI(OpSize::i128Bit, ElementSize, AndLow, ElementSizeInBits - 1);
    auto ShiftHigh = _VUShrI(OpSize::i128Bit, ElementSize, AndHigh, ElementSizeInBits - 1);
    // Only have the signs now, add it all
    auto AddResult = _VAdd(OpSize::i128Bit, ElementSize, ShiftHigh, ShiftLow);
    Ref AddWide {};
    if (ElementSize == OpSize::i32Bit) {
      AddWide = _VAddV(OpSize::i128Bit, ElementSize, AddResult);
    } else {
      AddWide = _VAddP(OpSize::i128Bit, ElementSize, AddResult, AddResult);
    }

    // ExtGPR will either be [0, 8] or [0, 16] If 0 then set Flag.
    auto ExtGPR = _VExtractToGPR(OpSize::i128Bit, ElementSize, AddWide, 0);
    CFInv = To01(OpSize::i64Bit, ExtGPR);
  }

  // As in PTest, this sets Z appropriately while zeroing the rest of NZCV.
  SetNZ_ZeroCV(OpSize::i32Bit, ZF);
  SetCFInverted(CFInv);
  ZeroPF_AF();
}

void OpDispatchBuilder::AVX128_PTest(OpcodeArgs) {
  const auto Size = GetSrcSize(Op);
  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);

  // For 128-bit, use the common path.
  if (Is128Bit) {
    PTestOpImpl(OpSize::i128Bit, Src1.Low, Src2.Low);
    return;
  }

  // For 256-bit, we need to unroll. This is nontrivial.
  Ref Test1Low = _VAnd(OpSize::i128Bit, OpSize::i8Bit, Src1.Low, Src2.Low);
  Ref Test2Low = _VAndn(OpSize::i128Bit, OpSize::i8Bit, Src2.Low, Src1.Low);

  Ref Test1High = _VAnd(OpSize::i128Bit, OpSize::i8Bit, Src1.High, Src2.High);
  Ref Test2High = _VAndn(OpSize::i128Bit, OpSize::i8Bit, Src2.High, Src1.High);

  // Element size must be less than 32-bit for the sign bit tricks.
  Ref Test1Max = _VUMax(OpSize::i128Bit, OpSize::i16Bit, Test1Low, Test1High);
  Ref Test2Max = _VUMax(OpSize::i128Bit, OpSize::i16Bit, Test2Low, Test2High);

  Ref Test1 = _VUMaxV(OpSize::i128Bit, OpSize::i16Bit, Test1Max);
  Ref Test2 = _VUMaxV(OpSize::i128Bit, OpSize::i16Bit, Test2Max);

  Test1 = _VExtractToGPR(OpSize::i128Bit, OpSize::i16Bit, Test1, 0);
  Test2 = _VExtractToGPR(OpSize::i128Bit, OpSize::i16Bit, Test2, 0);

  Test2 = To01(OpSize::i64Bit, Test2);

  // Careful, these flags are different between {V,}PTEST and VTESTP{S,D}
  // Set ZF according to Test1. SF will be zeroed since we do a 32-bit test on
  // the results of a 16-bit value from the UMaxV, so the 32-bit sign bit is
  // cleared even if the 16-bit scalars were negative.
  SetNZ_ZeroCV(OpSize::i32Bit, Test1);
  SetCFInverted(Test2);
  ZeroPF_AF();
}

void OpDispatchBuilder::AVX128_VPERMILReg(OpcodeArgs, IR::OpSize ElementSize) {
  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), ElementSize, [this](IR::OpSize _ElementSize, Ref Src, Ref Indices) {
    return VPERMILRegOpImpl(OpSize::i128Bit, _ElementSize, Src, Indices);
  });
}

void OpDispatchBuilder::AVX128_VPERMD(OpcodeArgs) {
  // Only 256-bit
  auto Indices = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, true);
  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, true);

  auto DoPerm = [this](RefPair Src, Ref Indices, Ref IndexMask, Ref AddVector) {
    Ref FinalIndices = VPERMDIndices(OpSize::i128Bit, Indices, IndexMask, AddVector);
    return _VTBL2(OpSize::i128Bit, Src.Low, Src.High, FinalIndices);
  };

  RefPair Result {};

  Ref IndexMask = _VectorImm(OpSize::i128Bit, OpSize::i32Bit, 0b111);
  Ref AddConst = Constant(0x03020100);
  Ref Repeating3210 = _VDupFromGPR(OpSize::i128Bit, OpSize::i32Bit, AddConst);

  Result.Low = DoPerm(Src, Indices.Low, IndexMask, Repeating3210);
  Result.High = DoPerm(Src, Indices.High, IndexMask, Repeating3210);

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VPCLMULQDQ(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsPMULL_128Bit) {
    UnimplementedOp(Op);
    return;
  }

  const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());

  AVX128_VectorBinaryImpl(Op, OpSizeFromSrc(Op), OpSize::iInvalid, [this, Selector](IR::OpSize, Ref Src1, Ref Src2) {
    return _PCLMUL(OpSize::i128Bit, Src1, Src2, Selector & 0b1'0001);
  });
}

// FMA differences between AArch64 and x86 make this really confusing to remember how things match.
// Here's a little guide for remembering how these instructions related across the architectures.
//
///< AArch64 Vector FMA behaviour
// FMLA vd, vn, vm
// - vd = (vn * vm) + vd
// FMLS vd, vn, vm
// - vd = (-vn * vm) + vd
//
// SVE ONLY! No FNMLA or FNMLS variants until SVE!
// FMLA zda, pg/m, zn, zm - Ignore predicate here
// - zda = (zn * zm) + zda
// FMLS zda, pg/m, zn, zm - Ignore predicate here
// - zda = (-zn * zm) + zda
// FNMLA zda, pg/m, zn, zm - Ignore predicate here
// - zda = (-zn * zm) - zda
// FNMLS zda, pg/m, zn, zm - Ignore predicate here
// - zda = (zn * zm) - zda
//
///< AArch64 Scalar FMA behaviour (FMA4 versions!)
// All variants support 16-bit, 32-bit, and 64-bit.
// FMADD d, n, m, a
// - d = (n * m) + a
// FMSUB d, n, m, a
// - d = (-n * m) + a
// FNMADD d, n, m, a
// - d = (-n * m) - a
// FNMSUB d, n, m, a
// - d = (n * m) - a
//
///< x86 FMA behaviour
// ## Packed variants
// - VFMADD{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1 = (src1 * src3) + src2
// - 213 - src1 = (src2 * src1) + src3
// - 231 - src1 = (src2 * src3) + src1
//   ^ Matches ARM FMLA
//
// - VFMSUB{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1 = (src1 * src3) - src2
// - 213 - src1 = (src2 * src1) - src3
// - 231 - src1 = (src2 * src3) - src1
//   ^ Matches ARM FMLA with addend negated first
//   ^ Or just SVE FNMLS
//   ^ or scalar FNMSUB
//
// - VFNMADD{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1 = (-src1 * src3) + src2
// - 213 - src1 = (-src2 * src1) + src3
// - 231 - src1 = (-src2 * src3) + src1
//   ^ Matches ARM FMLS behaviour! (REALLY CONFUSINGLY NAMED!)
//   ^ Or Scalar FMSUB
//
// - VFNMSUB{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1 = (-src1 * src3) - src2
// - 213 - src1 = (-src2 * src1) - src3
// - 231 - src1 = (-src2 * src3) - src1
//   ^ Matches ARM FMLS behaviour with addend negated first! (REALLY CONFUSINGLY NAMED!)
//   ^ Or just SVE FNMLA
//   ^ Or scalar FNMADD
//
// - VFNMADDSUB{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1.odd  = (src1.odd  * src3.odd)  + src2.odd
//       - src1.even = (src1.even * src3.even) - src2.even
// - 213 - src1.odd  = (src2.odd  * src1.odd)  + src3.odd
//       - src1.even = (src2.even * src1.even) - src3.even
// - 231 - src1.odd  = (src2.odd  * src3.odd)  + src1.odd
//       - src1.even = (src2.even * src3.even) - src1.even
//   ^ Matches ARM FMLA behaviour with addend.even negated first!
//
// - VFNMSUBADD{PD,PS}suffix src1, src2, src3/mem
// - 132 - src1.odd  = (src1.odd  * src3.odd)  - src2.odd
//       - src1.even = (src1.even * src3.even) + src2.even
// - 213 - src1.odd  = (src2.odd  * src1.odd)  - src3.odd
//       - src1.even = (src2.even * src1.even) + src3.even
// - 231 - src1.odd  = (src2.odd  * src3.odd)  - src1.odd
//       - src1.even = (src2.even * src3.even) + src1.even
//   ^ Matches ARM FMLA behaviour with addend.odd negated first!
//
// As shown only the 231 suffixed instructions matches AArch64 behaviour.
// FEX will insert moves to transpose the vectors to match AArch64 behaviour for 132 and 213 variants.

void OpDispatchBuilder::AVX128_VFMAImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = GetDstSize(Op);
  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;


  auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  RefPair Sources[3] = {Dest, Src1, Src2};

  RefPair Result {};
  DeriveOp(Result_Low, IROp, _VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].Low, Sources[Src2Idx - 1].Low, Sources[AddendIdx - 1].Low));
  Result.Low = Result_Low;
  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    DeriveOp(Result_High, IROp,
             _VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].High, Sources[Src2Idx - 1].High, Sources[AddendIdx - 1].High));
    Result.High = Result_High;
  }
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VFMAScalarImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, false).Low;
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, false).Low;
  Ref Src2 {};
  if (Op->Src[1].IsGPR()) {
    Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, false).Low;
  } else {
    Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], ElementSize, Op->Flags);
  }

  Ref Sources[3] = {Dest, Src1, Src2};

  DeriveOp(Result_Low, IROp,
           _VFMLAScalarInsert(OpSize::i128Bit, ElementSize, Dest, Sources[Src1Idx - 1], Sources[Src2Idx - 1], Sources[AddendIdx - 1]));
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, AVX128_Zext(Result_Low));
}

void OpDispatchBuilder::AVX128_VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = GetDstSize(Op);
  const auto Is128Bit = Size == Core::CPUState::XMM_SSE_REG_SIZE;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto Src1 = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128Bit);
  auto Src2 = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  RefPair Sources[3] = {
    Dest,
    Src1,
    Src2,
  };

  RefPair Result {};

  Ref ConstantEOR {};
  if (AddSub) {
    ConstantEOR = LoadAndCacheNamedVectorConstant(
      OpSize::i128Bit, ElementSize == OpSize::i32Bit ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT);
  } else {
    ConstantEOR = LoadAndCacheNamedVectorConstant(
      OpSize::i128Bit, ElementSize == OpSize::i32Bit ? NAMED_VECTOR_PSUBADDPS_INVERT : NAMED_VECTOR_PSUBADDPD_INVERT);
  }
  auto InvertedSourceLow = _VXor(OpSize::i128Bit, ElementSize, Sources[AddendIdx - 1].Low, ConstantEOR);

  Result.Low = _VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].Low, Sources[Src2Idx - 1].Low, InvertedSourceLow);
  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
  } else {
    auto InvertedSourceHigh = _VXor(OpSize::i128Bit, ElementSize, Sources[AddendIdx - 1].High, ConstantEOR);
    Result.High = _VFMLA(OpSize::i128Bit, ElementSize, Sources[Src1Idx - 1].High, Sources[Src2Idx - 1].High, InvertedSourceHigh);
  }
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_VPGatherImpl(OpcodeArgs, OpSize Size, OpSize ElementLoadSize, OpSize AddrElementSize,
                                                                  RefPair Dest, RefPair Mask, RefVSIB VSIB) {
  LOGMAN_THROW_A_FMT(AddrElementSize == OpSize::i32Bit || AddrElementSize == OpSize::i64Bit, "Unknown address element size");
  const auto Is128Bit = Size == OpSize::i128Bit;

  ///< BaseAddr doesn't need to exist, calculate that here.
  Ref BaseAddr = VSIB.BaseAddr;
  if (BaseAddr && VSIB.Displacement) {
    BaseAddr = Add(OpSize::i64Bit, BaseAddr, VSIB.Displacement);
  } else if (VSIB.Displacement) {
    BaseAddr = Constant(VSIB.Displacement);
  } else if (!BaseAddr) {
    BaseAddr = Invalid();
  }

  if (CTX->HostFeatures.SupportsSVE128) {
    if (ElementLoadSize == OpSize::i64Bit && AddrElementSize == OpSize::i32Bit) {
      // In the case that FEX is loading double the amount of data than the number of address bits then we can optimize this case.
      // For 256-bits of data we need to sign extend all four 32-bit address elements to be 64-bit.
      // For 128-bits of data we only need to sign extend the lower two 32-bit address elements.
      LOGMAN_THROW_A_FMT(VSIB.High == Invalid(), "Need to not have a high VSIB source");

      if (!Is128Bit) {
        VSIB.High = _VSSHLL2(OpSize::i128Bit, OpSize::i32Bit, VSIB.Low, FEXCore::ilog2(VSIB.Scale));
      }
      VSIB.Low = _VSSHLL(OpSize::i128Bit, OpSize::i32Bit, VSIB.Low, FEXCore::ilog2(VSIB.Scale));

      ///< Set the scale to one now that it has been prescaled as well.
      VSIB.Scale = 1;

      // Set the address element size to 64-bit now that the elements are extended.
      AddrElementSize = OpSize::i64Bit;
    } else if (ElementLoadSize == OpSize::i64Bit && AddrElementSize == OpSize::i64Bit && (VSIB.Scale == 2 || VSIB.Scale == 4)) {
      // SVE gather instructions don't support scaling their vector elements by anything other than 1 or the address element size.
      // Pre-scale 64-bit addresses in the case that scale doesn't match in-order to hit SVE code paths more frequently.
      // Only hit this path if the host supports SVE. Otherwise it's a degradation for the ASIMD codepath.
      VSIB.Low = _VShlI(OpSize::i128Bit, OpSize::i64Bit, VSIB.Low, FEXCore::ilog2(VSIB.Scale));
      if (!Is128Bit) {
        VSIB.High = _VShlI(OpSize::i128Bit, OpSize::i64Bit, VSIB.High, FEXCore::ilog2(VSIB.Scale));
      }
      ///< Set the scale to one now that it has been prescaled.
      VSIB.Scale = 1;
    }
  }

  const auto GPRSize = GetGPROpSize();
  auto AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize;

  RefPair Result {};
  ///< Calculate the low-half.
  Result.Low = _VLoadVectorGatherMasked(OpSize::i128Bit, ElementLoadSize, Dest.Low, Mask.Low, BaseAddr, VSIB.Low, VSIB.High,
                                        AddrElementSize, VSIB.Scale, 0, 0, AddrSize);

  if (Is128Bit) {
    Result.High = LoadZeroVector(OpSize::i128Bit);
    if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
      // Special case for the 128-bit gather load using 64-bit address indexes with 32-bit results.
      // Only loads two 32-bit elements in to the lower 64-bits of the first destination.
      // Bits [255:65] all become zero.
      Result.Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Result.Low, Result.High);
    }
  } else {
    RefPair AddrAddressing {};

    Ref DestReg = Dest.High;
    Ref MaskReg = Mask.High;
    uint8_t IndexElementOffset {};
    uint8_t DataElementOffset {};
    if (AddrElementSize == ElementLoadSize) {
      // If the address size matches the loading element size then it will be fetching at the same rate between low and high
      AddrAddressing.Low = VSIB.High;
      AddrAddressing.High = Invalid();
    } else if (AddrElementSize == OpSize::i32Bit && ElementLoadSize == OpSize::i64Bit) {
      // If the address element size if half the size of the Element load size then we need to start fetching half-way through the low register.
      AddrAddressing.Low = VSIB.Low;
      AddrAddressing.High = VSIB.High;
      IndexElementOffset = IR::NumElements(OpSize::i128Bit, AddrElementSize) / 2;
    } else if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
      AddrAddressing.Low = VSIB.High;
      AddrAddressing.High = Invalid();
      DestReg = Result.Low; ///< Start mixing with the low register.
      MaskReg = Mask.Low;   ///< Mask starts with the low mask here.
      IndexElementOffset = 0;
      DataElementOffset = IR::NumElements(OpSize::i128Bit, ElementLoadSize) / 2;
    }

    ///< Calculate the high-half.
    auto ResultHigh = _VLoadVectorGatherMasked(OpSize::i128Bit, ElementLoadSize, DestReg, MaskReg, BaseAddr, AddrAddressing.Low,
                                               AddrAddressing.High, AddrElementSize, VSIB.Scale, DataElementOffset, IndexElementOffset, AddrSize);

    if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
      // If we only fetched 128-bits worth of data then the upper-result is all zero.
      Result = AVX128_Zext(ResultHigh);
    } else {
      Result.High = ResultHigh;
    }
  }

  return Result;
}

OpDispatchBuilder::RefPair OpDispatchBuilder::AVX128_VPGatherQPSImpl(OpcodeArgs, Ref Dest, Ref Mask, RefVSIB VSIB) {

  ///< BaseAddr doesn't need to exist, calculate that here.
  Ref BaseAddr = VSIB.BaseAddr;
  if (BaseAddr && VSIB.Displacement) {
    BaseAddr = Add(OpSize::i64Bit, BaseAddr, VSIB.Displacement);
  } else if (VSIB.Displacement) {
    BaseAddr = Constant(VSIB.Displacement);
  } else if (!BaseAddr) {
    BaseAddr = Invalid();
  }

  bool NeedsSVEScale = (VSIB.Scale == 2 || VSIB.Scale == 8) || (BaseAddr == Invalid() && VSIB.Scale != 1);

  if (CTX->HostFeatures.SupportsSVE128 && NeedsSVEScale) {
    // SVE gather instructions don't support scaling their vector elements by anything other than 1 or the address element size.
    // Pre-scale 64-bit addresses in the case that scale doesn't match in-order to hit SVE code paths more frequently.
    // Only hit this path if the host supports SVE. Otherwise it's a degradation for the ASIMD codepath.
    VSIB.Low = _VShlI(OpSize::i128Bit, OpSize::i64Bit, VSIB.Low, FEXCore::ilog2(VSIB.Scale));
    if (VSIB.High != Invalid()) {
      VSIB.High = _VShlI(OpSize::i128Bit, OpSize::i64Bit, VSIB.High, FEXCore::ilog2(VSIB.Scale));
    }
    ///< Set the scale to one now that it has been prescaled.
    VSIB.Scale = 1;
  }

  RefPair Result {};

  const auto GPRSize = GetGPROpSize();
  auto AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize;

  ///< Calculate the low-half.
  Result.Low = _VLoadVectorGatherMaskedQPS(OpSize::i128Bit, OpSize::i32Bit, Dest, Mask, BaseAddr, VSIB.Low, VSIB.High, VSIB.Scale, AddrSize);
  Result.High = LoadZeroVector(OpSize::i128Bit);
  if (VSIB.High == Invalid()) {
    // Special case for only loading two floats.
    // The upper 64-bits of the lower lane also gets zero.
    Result.Low = _VZip(OpSize::i128Bit, OpSize::i64Bit, Result.Low, Result.High);
  }

  return Result;
}

void OpDispatchBuilder::AVX128_VPGATHER(OpcodeArgs, OpSize AddrElementSize) {

  const auto Size = OpSizeFromDst(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;

  ///< Element size is determined by W flag.
  const OpSize ElementLoadSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  // We only need the high address register if the number of data elements is more than what the low half can consume.
  // But also the number of address elements is clamped by the destination size as well.
  const size_t NumDataElements = IR::NumElements(Size, ElementLoadSize);
  const size_t NumAddrElementBytes = std::min<size_t>(IR::OpSizeToSize(Size), (NumDataElements * IR::OpSizeToSize(AddrElementSize)));
  const bool NeedsHighAddrBytes = NumAddrElementBytes > IR::OpSizeToSize(OpSize::i128Bit);

  auto Dest = AVX128_LoadSource_WithOpSize(Op, Op->Dest, Op->Flags, !Is128Bit);
  auto VSIB = AVX128_LoadVSIB(Op, Op->Src[0], Op->Flags, NeedsHighAddrBytes);
  auto Mask = AVX128_LoadSource_WithOpSize(Op, Op->Src[1], Op->Flags, !Is128Bit);

  bool NeedsSVEScale = (VSIB.Scale == 2 || VSIB.Scale == 8) || (VSIB.BaseAddr == Invalid() && VSIB.Scale != 1);

  const bool NeedsExplicitSVEPath =
    CTX->HostFeatures.SupportsSVE128 && AddrElementSize == OpSize::i32Bit && ElementLoadSize == OpSize::i32Bit && NeedsSVEScale;

  RefPair Result {};
  if (NeedsExplicitSVEPath) {
    // Special case for VGATHERDPS/VPGATHERDD (32-bit addresses loading 32-bit elements) that can't use the SVE codepath.
    // The problem is due to the scale not matching SVE limitations, we need to prescale the addresses to be 64-bit.
    auto ScaleVSIBHalf = [this](Ref VSIB, Ref BaseAddr, int32_t Displacement, uint8_t Scale) -> RefVSIB {
      RefVSIB Result {};
      Result.High = _VSSHLL2(OpSize::i128Bit, OpSize::i32Bit, VSIB, FEXCore::ilog2(Scale));
      Result.Low = _VSSHLL(OpSize::i128Bit, OpSize::i32Bit, VSIB, FEXCore::ilog2(Scale));

      Result.Displacement = Displacement;
      Result.BaseAddr = BaseAddr;

      ///< Set the scale to one now that it has been prescaled as well.
      Result.Scale = 1;
      return Result;
    };

    RefVSIB VSIBLow = ScaleVSIBHalf(VSIB.Low, VSIB.BaseAddr, VSIB.Displacement, VSIB.Scale);
    RefVSIB VSIBHigh {};

    if (NeedsHighAddrBytes) {
      VSIBHigh = ScaleVSIBHalf(VSIB.High, VSIB.BaseAddr, VSIB.Displacement, VSIB.Scale);
    }

    ///< AddressElementSize is now OpSize::i64Bit
    Result = AVX128_VPGatherQPSImpl(Op, Dest.Low, Mask.Low, VSIBLow);
    if (NeedsHighAddrBytes) {
      auto Res = AVX128_VPGatherQPSImpl(Op, Dest.High, Mask.High, VSIBHigh);
      Result.High = Res.Low;
    }
  } else if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
    Result = AVX128_VPGatherQPSImpl(Op, Dest.Low, Mask.Low, VSIB);
  } else {
    Result = AVX128_VPGatherImpl(Op, Size, ElementLoadSize, AddrElementSize, Dest, Mask, VSIB);
  }
  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);

  ///< Assume non-faulting behaviour and clear the mask register.
  RefPair ZeroPair {};
  ZeroPair.Low = LoadZeroVector(OpSize::i128Bit);
  ZeroPair.High = ZeroPair.Low;
  AVX128_StoreResult_WithOpSize(Op, Op->Src[1], ZeroPair);
}

void OpDispatchBuilder::AVX128_VCVTPH2PS(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(DstSize) / 2);
  const auto Is128BitSrc = SrcSize == OpSize::i128Bit;
  const auto Is128BitDst = DstSize == OpSize::i128Bit;

  RefPair Src {};
  if (Op->Src[0].IsGPR()) {
    Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128BitSrc);
  } else {
    // In the event that a memory operand is used as the source operand,
    // the access width will always be half the size of the destination vector width
    // (i.e. 128-bit vector -> 64-bit mem, 256-bit vector -> 128-bit mem)
    Src.Low = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  }

  RefPair Result {};
  Result.Low = _Vector_FToF(OpSize::i128Bit, OpSize::i32Bit, Src.Low, OpSize::i16Bit);

  if (Is128BitSrc) {
    Result.High = _VFCVTL2(OpSize::i128Bit, OpSize::i16Bit, Src.Low);
  }

  if (Is128BitDst) {
    Result = AVX128_Zext(Result.Low);
  }

  AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
}

void OpDispatchBuilder::AVX128_VCVTPS2PH(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is128BitSrc = SrcSize == OpSize::i128Bit;
  const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2);

  const auto Imm8 = Op->Src[1].Literal();
  const auto UseMXCSR = (Imm8 & 0b100) != 0;

  auto Src = AVX128_LoadSource_WithOpSize(Op, Op->Src[0], Op->Flags, !Is128BitSrc);

  RefPair Result {};

  Ref OldFPCR {};
  if (!UseMXCSR) {
    // No ARM float conversion instructions allow passing in
    // a rounding mode as an immediate. All of them depend on
    // the RM field in the FPCR. And so! We have to do some ugly
    // rounding mode shuffling.
    const auto NewRMode = Imm8 & 0b11;
    OldFPCR = _PushRoundingMode(NewRMode);
  }

  Result.Low = _Vector_FToF(OpSize::i128Bit, OpSize::i16Bit, Src.Low, OpSize::i32Bit);
  if (!Is128BitSrc) {
    Result.Low = _VFCVTN2(OpSize::i128Bit, OpSize::i32Bit, Result.Low, Src.High);
  }

  if (!UseMXCSR) {
    _PopRoundingMode(OldFPCR);
  }

  // We need to eliminate upper junk if we're storing into a register with
  // a 256-bit source (VCVTPS2PH's destination for registers is an XMM).
  if (Op->Src[0].IsGPR() && SrcSize == OpSize::i256Bit) {
    Result = AVX128_Zext(Result.Low);
  }

  if (!Op->Dest.IsGPR()) {
    StoreResultFPR_WithOpSize(Op, Op->Dest, Result.Low, StoreSize);
  } else {
    AVX128_StoreResult_WithOpSize(Op, Op->Dest, Result);
  }
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/BaseTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
constexpr inline DispatchTableEntry OpDispatch_BaseOpTable[] = {
  // Instructions
  {0x00, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ALUOp, FEXCore::IR::IROps::OP_ADD, FEXCore::IR::IROps::OP_ATOMICFETCHADD, 0>},

  {0x08, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ALUOp, FEXCore::IR::IROps::OP_OR, FEXCore::IR::IROps::OP_ATOMICFETCHOR, 0>},

  {0x10, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ADCOp, 0>},

  {0x18, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SBBOp, 0>},

  {0x20, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ALUOp, FEXCore::IR::IROps::OP_ANDWITHFLAGS, FEXCore::IR::IROps::OP_ATOMICFETCHAND, 0>},

  {0x28, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ALUOp, FEXCore::IR::IROps::OP_SUB, FEXCore::IR::IROps::OP_ATOMICFETCHSUB, 0>},

  {0x30, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ALUOp, FEXCore::IR::IROps::OP_XOR, FEXCore::IR::IROps::OP_ATOMICFETCHXOR, 0>},

  {0x38, 6, &OpDispatchBuilder::Bind<&OpDispatchBuilder::CMPOp, 0>},
  {0x50, 8, &OpDispatchBuilder::PUSHREGOp},
  {0x58, 8, &OpDispatchBuilder::POPOp},
  {0x68, 1, &OpDispatchBuilder::PUSHOp},
  {0x69, 1, &OpDispatchBuilder::IMUL2SrcOp},
  {0x6A, 1, &OpDispatchBuilder::PUSHOp},
  {0x6B, 1, &OpDispatchBuilder::IMUL2SrcOp},
  {0x6C, 4, &OpDispatchBuilder::PermissionRestrictedOp},

  {0x70, 16, &OpDispatchBuilder::CondJUMPOp},
  {0x84, 2, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 0>},
  {0x86, 2, &OpDispatchBuilder::XCHGOp},
  {0x88, 4, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVGPROp, 0>},

  {0x8C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVSegOp, false>},
  {0x8D, 1, &OpDispatchBuilder::LEAOp},
  {0x8E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVSegOp, true>},
  {0x8F, 1, &OpDispatchBuilder::POPOp},
  {0x90, 8, &OpDispatchBuilder::XCHGOp},

  {0x98, 1, &OpDispatchBuilder::CDQOp},
  {0x99, 1, &OpDispatchBuilder::CQOOp},
  {0x9B, 1, &OpDispatchBuilder::NOPOp},
  {0x9C, 1, &OpDispatchBuilder::PUSHFOp},
  {0x9D, 1, &OpDispatchBuilder::POPFOp},
  {0x9E, 1, &OpDispatchBuilder::SAHFOp},
  {0x9F, 1, &OpDispatchBuilder::LAHFOp},
  {0xA4, 2, &OpDispatchBuilder::MOVSOp},

  {0xA6, 2, &OpDispatchBuilder::CMPSOp},
  {0xA8, 2, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 0>},
  {0xAA, 2, &OpDispatchBuilder::STOSOp},
  {0xAC, 2, &OpDispatchBuilder::LODSOp},
  {0xAE, 2, &OpDispatchBuilder::SCASOp},
  {0xB0, 16, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVGPROp, 0>},
  {0xC2, 2, &OpDispatchBuilder::RETOp},
  {0xC8, 1, &OpDispatchBuilder::EnterOp},
  {0xC9, 1, &OpDispatchBuilder::LEAVEOp},
  {0xCA, 2, &OpDispatchBuilder::RETFARIndirectOp},
  {0xCC, 2, &OpDispatchBuilder::INTOp},
  {0xCF, 1, &OpDispatchBuilder::IRETOp},
  {0xD7, 2, &OpDispatchBuilder::XLATOp},
  {0xE0, 3, &OpDispatchBuilder::LoopOp},
  {0xE3, 1, &OpDispatchBuilder::CondJUMPRCXOp},
  {0xE4, 4, &OpDispatchBuilder::PermissionRestrictedOp},
  {0xE8, 1, &OpDispatchBuilder::CALLOp},
  {0xE9, 1, &OpDispatchBuilder::JUMPOp},
  {0xEB, 1, &OpDispatchBuilder::JUMPOp},
  {0xEC, 4, &OpDispatchBuilder::PermissionRestrictedOp},
  {0xF1, 1, &OpDispatchBuilder::INTOp},
  {0xF4, 1, &OpDispatchBuilder::INTOp},

  {0xF5, 1, &OpDispatchBuilder::FLAGControlOp},
  {0xF8, 2, &OpDispatchBuilder::FLAGControlOp},
  {0xFA, 2, &OpDispatchBuilder::PermissionRestrictedOp},
  {0xFC, 2, &OpDispatchBuilder::FLAGControlOp},
};
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 Crypto instructions to IR
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"

#include <FEXCore/Utils/LogManager.h>
#include "Interface/Core/OpcodeDispatcher.h"

#include <cstdint>

namespace FEXCore::IR {
class OrderedNode;

#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

void OpDispatchBuilder::SHA1NEXTEOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // ARMv8 SHA1 extension provides a `SHA1H` instruction which does a fixed rotate by 30.
  // This only operates on element 0 rather than element 3. We don't have the luxury of rewriting the x86 SHA algorithm to take advantage of this.
  // Move the element to zero, rotate, and then move back (Using duplicates).
  // Saves one instruction versus that path that doesn't support SHA extension.
  auto Duplicated = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Dest, 3);
  auto Sha1HRotated = _VSha1H(Duplicated);
  auto RotatedNode = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Sha1HRotated, 0);
  auto Tmp = _VAdd(OpSize::i128Bit, OpSize::i32Bit, Src, RotatedNode);
  auto Result = _VInsElement(OpSize::i128Bit, OpSize::i32Bit, 3, 3, Src, Tmp);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA1MSG1Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref NewVec = _VExtr(OpSize::i128Bit, OpSize::i64Bit, Dest, Src, 1);

  // [W0, W1, W2, W3] ^ [W2, W3, W4, W5]
  Ref Result = _VXor(OpSize::i128Bit, OpSize::i8Bit, Dest, NewVec);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA1MSG2Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // ARM SHA1 mostly matches x86 semantics, except the input and outputs are both flipped from elements 0,1,2,3 to 3,2,1,0.
  auto Src1 = SHADataShuffle(Dest);
  auto Src2 = SHADataShuffle(Src);

  // The result is swizzled differently than expected
  auto Result = SHADataShuffle(_VSha1SU1(Src1, Src2));

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA1RNDS4Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  const uint64_t Imm8 = Op->Src[1].Literal() & 0b11;
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result {};
  Ref ConstantVector {};
  switch (Imm8) {
  case 0:
    ConstantVector = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K0);
    break;
  case 1:
    ConstantVector = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K1);
    break;
  case 2:
    ConstantVector = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K2);
    break;
  case 3:
    ConstantVector = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K3);
    break;
  }

  const auto ZeroRegister = LoadZeroVector(OpSize::i32Bit);

  Ref Src1 = SHADataShuffle(Dest);
  Ref Src2 = SHADataShuffle(Src);
  Src2 = _VAdd(OpSize::i128Bit, OpSize::i32Bit, Src2, ConstantVector);

  switch (Imm8) {
  case 0: Result = SHADataShuffle(_VSha1C(Src1, ZeroRegister, Src2)); break;
  case 2: Result = SHADataShuffle(_VSha1M(Src1, ZeroRegister, Src2)); break;
  case 1:
  case 3: Result = SHADataShuffle(_VSha1P(Src1, ZeroRegister, Src2)); break;
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA256MSG1Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto Result = _VSha256U0(Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA256MSG2Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto Src1 = _VExtr(OpSize::i128Bit, OpSize::i32Bit, Dest, Dest, 3);
  auto DupDst = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Dest, 3);
  auto Src2 = _VZip2(OpSize::i128Bit, OpSize::i64Bit, DupDst, Src);

  auto Result = _VSha256U1(Src1, Src2);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::SHA256RNDS2Op(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsSHA) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  // Hardcoded to XMM0
  auto XMM0 = LoadXMMRegister(0);

  auto shuffle_abcd = [this](Ref Src1, Ref Src2) -> Ref {
    // Generates a suitable SHA256 `abcd` configuration from x86 format.
    auto Tmp = _VZip2(OpSize::i128Bit, OpSize::i64Bit, Src2, Src1);
    return _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
  };

  auto shuffle_efgh = [this](Ref Src1, Ref Src2) -> Ref {
    // Generates a suitable SHA256 `efgh` configuration from x86 format.
    auto Tmp = _VZip(OpSize::i128Bit, OpSize::i64Bit, Src2, Src1);
    return _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
  };

  auto ABCD = shuffle_abcd(Dest, Src);
  auto EFGH = shuffle_efgh(Dest, Src);

  // x86 uses only the bottom 64-bits of the key, so duplicate to match ARM64 semantics.
  auto Key = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, XMM0, 0);

  auto A = _VSha256H(ABCD, EFGH, Key);
  auto B = _VSha256H2(EFGH, ABCD, Key);
  auto Result = shuffle_abcd(A, B);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AESImcOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VAESImc(Src);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AESEncOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VAESEnc(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit));
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VAESEncOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESENC.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENC unimplemented");

  Ref State = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Key = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VAESEnc(DstSize, State, Key, LoadZeroVector(DstSize));

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AESEncLastOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VAESEncLast(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit));
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VAESEncLastOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESENCLAST.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESENCLAST unimplemented");

  Ref State = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Key = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VAESEncLast(DstSize, State, Key, LoadZeroVector(DstSize));

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AESDecOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VAESDec(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit));
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VAESDecOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESDEC.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDEC unimplemented");

  Ref State = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Key = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VAESDec(DstSize, State, Key, LoadZeroVector(DstSize));

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AESDecLastOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VAESDecLast(OpSize::i128Bit, Dest, Src, LoadZeroVector(OpSize::i128Bit));
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VAESDecLastOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;

  // TODO: Handle 256-bit VAESDECLAST.
  LOGMAN_THROW_A_FMT(Is128Bit, "256-bit VAESDECLAST unimplemented");

  Ref State = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Key = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VAESDecLast(DstSize, State, Key, LoadZeroVector(DstSize));

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::AESKeyGenAssistImpl(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  const uint64_t RCON = Op->Src[1].Literal();

  auto KeyGenSwizzle = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE);
  return _VAESKeyGenAssist(Src, KeyGenSwizzle, LoadZeroVector(OpSize::i128Bit), RCON);
}

void OpDispatchBuilder::AESKeyGenAssist(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsAES) {
    UnimplementedOp(Op);
    return;
  }

  Ref Result = AESKeyGenAssistImpl(Op);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PCLMULQDQOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsPMULL_128Bit) {
    UnimplementedOp(Op);
    return;
  }
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  const auto Selector = static_cast<uint8_t>(Op->Src[1].Literal());

  auto Res = _PCLMUL(OpSize::i128Bit, Dest, Src, Selector & 0b1'0001);
  StoreResultFPR(Op, Res);
}

void OpDispatchBuilder::VPCLMULQDQOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsPMULL_128Bit) {
    UnimplementedOp(Op);
    return;
  }
  const auto DstSize = OpSizeFromDst(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  const auto Selector = static_cast<uint8_t>(Op->Src[2].Literal());

  Ref Res = _PCLMUL(DstSize, Src1, Src2, Selector & 0b1'0001);
  StoreResultFPR(Op, Res);
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/DDDTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
constexpr DispatchTableEntry OpDispatch_DDDTable[] = {
  {0x0C, 1, &OpDispatchBuilder::PI2FWOp},
  {0x0D, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
  {0x1C, 1, &OpDispatchBuilder::PF2IWOp},
  {0x1D, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},

  {0x86, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECPPRECISION, OpSize::i32Bit>},
  {0x87, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RSqrt3DNowOp, false>},

  {0x8A, 1, &OpDispatchBuilder::PFNACCOp},
  {0x8E, 1, &OpDispatchBuilder::PFPNACCOp},

  {0x90, 1, &OpDispatchBuilder::VPFCMPOp<1>},
  {0x94, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>},
  {0x96, 1, &OpDispatchBuilder::VectorUnaryDuplicateOp<IR::OP_VFRECPPRECISION, OpSize::i32Bit>},
  {0x97, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RSqrt3DNowOp, true>},

  {0x9A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>},
  {0x9E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i32Bit>},

  {0xA0, 1, &OpDispatchBuilder::VPFCMPOp<2>},
  {0xA4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>},
  // Can be treated as a move
  {0xA6, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0xA7, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},

  {0xAA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VFSUB, OpSize::i32Bit>},
  {0xAE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i32Bit>},

  {0xB0, 1, &OpDispatchBuilder::VPFCMPOp<0>},
  {0xB4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>},
  // Can be treated as a move
  {0xB6, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0xB7, 1, &OpDispatchBuilder::PMULHRWOp},

  {0xBB, 1, &OpDispatchBuilder::PSWAPDOp},
  {0xBF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>},
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 flag generation
$end_info$
*/

#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/LogManager.h>

#include <array>
#include <cstdint>

namespace FEXCore::IR {
constexpr std::array<uint32_t, 17> FlagOffsets = {
  FEXCore::X86State::RFLAG_CF_RAW_LOC, FEXCore::X86State::RFLAG_PF_RAW_LOC, FEXCore::X86State::RFLAG_AF_RAW_LOC,
  FEXCore::X86State::RFLAG_ZF_RAW_LOC, FEXCore::X86State::RFLAG_SF_RAW_LOC, FEXCore::X86State::RFLAG_TF_RAW_LOC,
  FEXCore::X86State::RFLAG_IF_LOC,     FEXCore::X86State::RFLAG_DF_RAW_LOC, FEXCore::X86State::RFLAG_OF_RAW_LOC,
  FEXCore::X86State::RFLAG_IOPL_LOC,   FEXCore::X86State::RFLAG_NT_LOC,     FEXCore::X86State::RFLAG_RF_LOC,
  FEXCore::X86State::RFLAG_VM_LOC,     FEXCore::X86State::RFLAG_AC_LOC,     FEXCore::X86State::RFLAG_VIF_LOC,
  FEXCore::X86State::RFLAG_VIP_LOC,    FEXCore::X86State::RFLAG_ID_LOC,
};

void OpDispatchBuilder::ZeroPF_AF() {
  // PF is stored inverted, so invert it when we zero.
  SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(Constant(1));
  SetAF(0);
}

void OpDispatchBuilder::SetPackedRFLAG(bool Lower8, Ref Src) {
  size_t NumFlags = FlagOffsets.size();
  if (Lower8) {
    // Calculate flags early.
    // This is only a partial overwrite of flags since OF isn't stored here.
    CalculateDeferredFlags();
    NumFlags = 5;
  }

  // PF and CF are both stored inverted, so hoist the invert.
  auto SrcInverted = _Not(OpSize::i32Bit, Src);

  for (size_t i = 0; i < NumFlags; ++i) {
    const auto FlagOffset = FlagOffsets[i];

    if (FlagOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
      // AF is in bit 4 architecturally, and we need to store it to bit 4 of our
      // AF register, with garbage in the other bits. The extract is deferred.
      // We also defer a XOR with the result bit, which is implemented as XOR
      // with PF[4]. But the _Bfe below reliably zeros bit 4 of the PF byte, so
      // that will be a no-op and we get the right result.
      //
      // So we write out the whole flags byte to AF without an extract.
      static_assert(FEXCore::X86State::RFLAG_AF_RAW_LOC == 4);
      SetRFLAG(Src, FEXCore::X86State::RFLAG_AF_RAW_LOC);
    } else if (FlagOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC || FlagOffset == FEXCore::X86State::RFLAG_CF_RAW_LOC) {
      // PF and CF are both stored parity flipped.
      SetRFLAG(SrcInverted, FlagOffset, FlagOffset, true);
    } else {
      SetRFLAG(Src, FlagOffset, FlagOffset, true);
    }
  }

  CFInverted = true;
}

Ref OpDispatchBuilder::GetPackedRFLAG(uint32_t FlagsMask) {
  // Calculate flags early.
  CalculateDeferredFlags();

  // SF/ZF and N/Z are together on both arm64 and x86_64, so we special case that.
  bool GetNZ = (FlagsMask & (1 << FEXCore::X86State::RFLAG_SF_RAW_LOC)) && (FlagsMask & (1 << FEXCore::X86State::RFLAG_ZF_RAW_LOC));

  // Handle CF first, since it's at bit 0 and hence doesn't need shift or OR.
  LOGMAN_THROW_A_FMT(FlagsMask & (1 << FEXCore::X86State::RFLAG_CF_RAW_LOC), "CF always handled");
  static_assert(FEXCore::X86State::RFLAG_CF_RAW_LOC == 0);
  Ref Original = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

  for (size_t i = 0; i < FlagOffsets.size(); ++i) {
    const auto FlagOffset = FlagOffsets[i];
    if (!((1U << FlagOffset) & FlagsMask)) {
      continue;
    }

    if ((GetNZ && (FlagOffset == FEXCore::X86State::RFLAG_SF_RAW_LOC || FlagOffset == FEXCore::X86State::RFLAG_ZF_RAW_LOC)) ||
        FlagOffset == FEXCore::X86State::RFLAG_CF_RAW_LOC || FlagOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
      // Already handled
      continue;
    }

    // Note that the Bfi only considers the bottom bit of the flag, the rest of
    // the byte is allowed to be garbage.
    Ref Flag;
    if (FlagOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
      Flag = LoadAF();
    } else {
      Flag = GetRFLAG(FlagOffset);
    }

    Original = _Orlshl(OpSize::i64Bit, Original, Flag, FlagOffset);
  }

  // Raw PF value needs to have its bottom bit masked out and inverted. The
  // naive sequence is and/eor/orlshl. But we can do the inversion implicitly
  // instead.
  if (FlagsMask & (1 << FEXCore::X86State::RFLAG_PF_RAW_LOC)) {
    // Set every bit except the bottommost.
    auto OnesInvPF = _Or(OpSize::i64Bit, LoadPFRaw(false, false), _InlineConstant(~1ull));

    // Rotate the bottom bit to the appropriate location for PF, so we get
    // something like 111P1111. Then invert that to get 000p0000. Then OR that
    // into the flags. This is 1 A64 instruction :-)
    auto RightRotation = 64 - FEXCore::X86State::RFLAG_PF_RAW_LOC;
    Original = _Ornror(OpSize::i64Bit, Original, OnesInvPF, RightRotation);
  }

  // OR in the SF/ZF flags at the end, allowing the lshr to fold with the OR
  if (GetNZ) {
    static_assert(FEXCore::X86State::RFLAG_SF_RAW_LOC == (FEXCore::X86State::RFLAG_ZF_RAW_LOC + 1));
    auto NZCV = GetNZCV();
    auto NZ = _And(OpSize::i64Bit, NZCV, _InlineConstant(0b11u << 30));
    Original = _Orlshr(OpSize::i64Bit, Original, NZ, 31 - FEXCore::X86State::RFLAG_SF_RAW_LOC);
  }

  // The constant is OR'ed in at the end, to avoid a pointless or xzr, #2.
  if ((1U << X86State::RFLAG_RESERVED_LOC) & FlagsMask) {
    Original = _Or(OpSize::i64Bit, Original, _InlineConstant(2));
  }

  return Original;
}

void OpDispatchBuilder::CalculateOF(IR::OpSize SrcSize, Ref Res, Ref Src1, Ref Src2, bool Sub) {
  LOGMAN_THROW_A_FMT(SrcSize >= IR::OpSize::i8Bit && SrcSize <= IR::OpSize::i64Bit, "Invalid size");
  const auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
  const uint64_t SignBit = IR::OpSizeAsBits(SrcSize) - 1;
  Ref Anded = nullptr;

  // For add, OF is set iff the sources have the same sign but the destination
  // sign differs. If we know a source sign, we can simplify the expression: if
  // source 2 is known to be positive, we set OF if source 1 is positive and
  // source 2 is negative. Similarly if source 2 is known negative.
  //
  // For sub, OF is set iff the sources have differing signs and the destination
  // sign matches the second source. If source 2 is known positive, set iff
  // source 1 negative and source 2 positive.
  uint64_t Const;
  if (IsValueConstant(WrapNode(Src2), &Const)) {
    bool Negative = (Const & (1ull << SignBit)) != 0;

    if (Negative ^ Sub) {
      Anded = _Andn(OpSize, Src1, Res);
    } else {
      Anded = _Andn(OpSize, Res, Src1);
    }
  } else {
    auto XorOp1 = _Xor(OpSize, Src1, Src2);
    auto XorOp2 = _Xor(OpSize, Res, Src1);

    if (Sub) {
      Anded = _And(OpSize, XorOp2, XorOp1);
    } else {
      Anded = _Andn(OpSize, XorOp2, XorOp1);
    }
  }

  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Anded, SignBit, true);
}

Ref OpDispatchBuilder::LoadPFRaw(bool Mask, bool Invert) {
  // Most blocks do not read parity, so PF optimization is gated on this flag.
  CurrentHeader->ReadsParity = true;

  // Evaluate parity on the deferred raw value.
  return _Parity(GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC), Mask, Invert);
}

Ref OpDispatchBuilder::LoadAF() {
  // Read the stored value. This is the XOR of the arguments.
  auto AFWord = GetRFLAG(FEXCore::X86State::RFLAG_AF_RAW_LOC);

  // Read the result, stored for PF.
  auto Result = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC);

  // What's left is to XOR and extract. This is the deferred part. We
  // specifically use a 64-bit Xor here as we don't need masking.
  return _Bfe(OpSize::i32Bit, 1, 4, _Xor(OpSize::i64Bit, AFWord, Result));
}

void OpDispatchBuilder::FixupAF() {
  // The caller has set a desired value of AF in AF[4], regardless of the value
  // of PF. We need to fixup AF[4] so that we get the right value when we XOR in
  // PF[4] later. The easiest solution is to XOR by PF[4], since:
  //
  //  (AF[4] ^ PF[4]) ^ PF[4] = AF[4]

  auto PFRaw = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC);
  auto AFRaw = GetRFLAG(FEXCore::X86State::RFLAG_AF_RAW_LOC);

  // Again 64-bit as masking is more expensive.
  Ref XorRes = _Xor(OpSize::i64Bit, AFRaw, PFRaw);
  SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(XorRes);
}

void OpDispatchBuilder::SetAFAndFixup(Ref AF) {
  // We have a value of AF, we shift into AF[4].  We need to fixup AF[4] so that
  // we get the right value when we XOR in PF[4] later. The easiest solution is
  // to XOR by PF[4], since:
  //
  //  (AF[4] ^ PF[4]) ^ PF[4] = AF[4]

  auto PFRaw = GetRFLAG(FEXCore::X86State::RFLAG_PF_RAW_LOC);

  Ref XorRes = _XorShift(OpSize::i32Bit, PFRaw, AF, ShiftType::LSL, 4);
  SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(XorRes);
}

void OpDispatchBuilder::CalculatePF(Ref Res) {
  // Calculation is entirely deferred until load, just store the 8-bit result.
  SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(Res);
}

void OpDispatchBuilder::CalculateAF(Ref Src1, Ref Src2) {
  // We only care about bit 4 in the subsequent XOR. If we'll XOR with 0,
  // there's no sense XOR'ing at all. If we'll XOR with 1, that's just
  // inverting.
  for (unsigned i = 0; i < 2; ++i) {
    Ref SrcA = i ? Src1 : Src2;
    Ref SrcB = i ? Src2 : Src1;

    uint64_t Const;
    if (IsValueConstant(WrapNode(SrcA), &Const)) {
      if (Const & (1u << 4)) {
        SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(_Not(OpSize::i32Bit, SrcB));
      } else {
        SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(SrcB);
      }

      return;
    }
  }

  // We store the XOR of the arguments. At read time, we XOR with the
  // appropriate bit of the result (available as the PF flag) and extract the
  // appropriate bit. Again 64-bit to avoid masking.
  Ref XorRes = Src1 == Src2 ? Constant(0) : _Xor(OpSize::i64Bit, Src1, Src2);
  SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(XorRes);
}

void OpDispatchBuilder::CalculateDeferredFlags() {
  if (NZCVDirty && CachedNZCV) {
    _StoreNZCV(CachedNZCV);
  }

  CachedNZCV = nullptr;
  NZCVDirty = false;
}

Ref OpDispatchBuilder::IncrementByCarry(OpSize OpSize, Ref Src) {
  // If CF not inverted, we use .cc since the increment happens when the
  // condition is false. If CF inverted, invert to use .cs. A bit mindbendy.
  return _NZCVSelectIncrement(OpSize, CFInverted ? CondClass::UGE : CondClass::ULT, Src, Src);
}

Ref OpDispatchBuilder::CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2) {
  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
  Ref Res;

  CalculateAF(Src1, Src2);

  if (SrcSize >= OpSize::i32Bit) {
    RectifyCarryInvert(false);
    HandleNZCV_RMW();
    Res = _AdcWithFlags(OpSize, Src1, Src2);
    CFInverted = false;
  } else {
    // Need to zero-extend for correct comparisons below
    Src2 = ARef(Src2).Bfe(0, IR::OpSizeAsBits(SrcSize)).Ref();

    // Note that we do not extend Src2PlusCF, since we depend on proper
    // 32-bit arithmetic to correctly handle the Src2 = 0xffff case.
    Ref Src2PlusCF = IncrementByCarry(OpSize, Src2);

    // Need to zero-extend for the comparison.
    Res = Add(OpSize, Src1, Src2PlusCF);
    Res = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Res);

    // TODO: We can fold that second Bfe in (cmp uxth).
    auto SelectCFInv = Select01(OpSize, CondClass::UGE, Res, Src2PlusCF);

    SetNZ_ZeroCV(SrcSize, Res);
    SetCFInverted(SelectCFInv);
    CalculateOF(SrcSize, Res, Src1, Src2, false);
  }

  CalculatePF(Res);
  return Res;
}

Ref OpDispatchBuilder::CalculateFlags_SBB(IR::OpSize SrcSize, Ref Src1, Ref Src2) {
  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;

  CalculateAF(Src1, Src2);

  Ref Res;
  if (SrcSize >= OpSize::i32Bit) {
    // Arm's subtraction has inverted CF from x86, so rectify the input and
    // invert the output.
    RectifyCarryInvert(true);
    HandleNZCV_RMW();
    Res = _SbbWithFlags(OpSize, Src1, Src2);
    CFInverted = true;
  } else {
    // Zero extend for correct comparison behaviour with Src1 = 0xffff.
    Src1 = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Src1);
    Src2 = ARef(Src2).Bfe(0, IR::OpSizeAsBits(SrcSize)).Ref();

    auto Src2PlusCF = IncrementByCarry(OpSize, Src2);

    Res = Sub(OpSize, Src1, Src2PlusCF);
    Res = _Bfe(OpSize, IR::OpSizeAsBits(SrcSize), 0, Res);

    auto SelectCFInv = Select01(OpSize, CondClass::UGE, Src1, Src2PlusCF);

    SetNZ_ZeroCV(SrcSize, Res);
    SetCFInverted(SelectCFInv);
    CalculateOF(SrcSize, Res, Src1, Src2, true);
  }

  CalculatePF(Res);
  return Res;
}

Ref OpDispatchBuilder::CalculateFlags_SUB(IR::OpSize SrcSize, Ref Src1, Ref Src2, bool UpdateCF) {
  // Stash CF before stomping over it
  auto OldCFInv = UpdateCF ? nullptr : GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC, true);

  HandleNZCVWrite();

  CalculateAF(Src1, Src2);

  Ref Res;
  if (SrcSize >= OpSize::i32Bit) {
    Res = SubWithFlags(SrcSize, Src1, Src2);
  } else {
    _SubNZCV(SrcSize, Src1, Src2);
    Res = Sub(OpSize::i32Bit, Src1, Src2);
  }

  CalculatePF(Res);

  // If we're updating CF, we need it to be inverted because SubNZCV is inverted
  // from x86. If we're not updating CF, we need to restore the CF since we
  // stomped over it.
  if (UpdateCF) {
    CFInverted = true;
  } else {
    SetCFInverted(OldCFInv);
  }

  return Res;
}

Ref OpDispatchBuilder::CalculateFlags_ADD(IR::OpSize SrcSize, Ref Src1, Ref Src2, bool UpdateCF) {
  // Stash CF before stomping over it
  auto OldCFInv = UpdateCF ? nullptr : GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC, true);

  HandleNZCVWrite();

  CalculateAF(Src1, Src2);

  Ref Res;
  if (SrcSize >= OpSize::i32Bit) {
    Res = AddWithFlags(SrcSize, Src1, Src2);
  } else {
    _AddNZCV(SrcSize, Src1, Src2);
    Res = Add(OpSize::i32Bit, Src1, Src2);
  }

  CalculatePF(Res);

  // We stomped over CF while calculation flags, restore it.
  if (UpdateCF) {
    // Adds match between x86 and arm64.
    CFInverted = false;
  } else {
    SetCFInverted(OldCFInv);
  }

  return Res;
}

void OpDispatchBuilder::CalculateFlags_MUL(IR::OpSize SrcSize, Ref Res, Ref High) {
  HandleNZCVWrite();
  InvalidatePF_AF();

  // CF and OF are set if the result of the operation can't be fit in to the destination register
  // If the value can fit then the top bits will be zero
  auto SignBit = _Sbfe(OpSize::i64Bit, 1, IR::OpSizeAsBits(SrcSize) - 1, Res);
  _SubNZCV(OpSize::i64Bit, High, SignBit);

  // If High = SignBit, then sets to nZCv. Else sets to nzcV. Since SF/ZF
  // undefined, this does what we need after inverting carry.
  auto Zero = _InlineConstant(0);
  _CondSubNZCV(OpSize::i64Bit, Zero, Zero, CondClass::EQ, 0x1 /* nzcV */);
  CFInverted = true;
}

void OpDispatchBuilder::CalculateFlags_UMUL(Ref High) {
  HandleNZCVWrite();
  InvalidatePF_AF();

  auto Zero = _InlineConstant(0);
  const auto Size = GetOpSize(High);

  // CF and OF are set if the result of the operation can't be fit in to the destination register
  // The result register will be all zero if it can't fit due to how multiplication behaves
  _SubNZCV(Size, High, Zero);

  // If High = 0, then sets to nZCv. Else sets to nzcV. Since SF/ZF undefined,
  // this does what we need.
  _CondSubNZCV(Size, Zero, Zero, CondClass::EQ, 0x1 /* nzcV */);
  CFInverted = true;
}

void OpDispatchBuilder::CalculateFlags_Logical(IR::OpSize SrcSize, Ref Res) {
  InvalidateAF();
  SetNZP_ZeroCV(SrcSize, Res);
}

void OpDispatchBuilder::CalculateFlags_ShiftLeftImmediate(IR::OpSize SrcSize, Ref UnmaskedRes, Ref Src1, uint64_t Shift) {
  // No flags changed if shift is zero
  if (Shift == 0) {
    return;
  }

  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;

  SetNZ_ZeroCV(SrcSize, UnmaskedRes);

  // CF
  {
    // Extract the last bit shifted in to CF. Shift is already masked, but for
    // 8/16-bit it might be >= SrcSizeBits, in which case CF is cleared. There's
    // nothing to do in that case since we already cleared CF above.
    const auto SrcSizeBits = IR::OpSizeAsBits(SrcSize);
    if (Shift < SrcSizeBits) {
      SetCFDirect(Src1, SrcSizeBits - Shift, true);
    }
  }

  CalculatePF(UnmaskedRes);
  InvalidateAF();

  // OF
  // In the case of left shift. OF is only set from the result of <Top Source Bit> XOR <Top Result Bit>
  if (Shift == 1) {
    auto Xor = _Xor(OpSize, UnmaskedRes, Src1);
    SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, IR::OpSizeAsBits(SrcSize) - 1, true);
  } else {
    // Undefined, we choose to zero as part of SetNZ_ZeroCV
  }
}

void OpDispatchBuilder::CalculateFlags_SignShiftRightImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift) {
  // No flags changed if shift is zero
  if (Shift == 0) {
    return;
  }

  SetNZ_ZeroCV(SrcSize, Res);

  // Extract the last bit shifted in to CF
  SetCFDirect(Src1, Shift - 1, true);

  CalculatePF(Res);
  InvalidateAF();

  // OF
  // Only defined when Shift is 1 else undefined. Only is set if the top bit was set to 1 when
  // shifted So it is set to zero.  In the undefined case we choose to zero as well. Since it was
  // already zeroed there's nothing to do here.
}

void OpDispatchBuilder::CalculateFlags_ShiftRightImmediateCommon(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift) {
  // Set SF and PF. Clobbers OF, but OF only defined for Shift = 1 where it is
  // set below.
  SetNZ_ZeroCV(SrcSize, Res);

  // Extract the last bit shifted in to CF
  SetCFDirect(Src1, Shift - 1, true);

  CalculatePF(Res);
  InvalidateAF();
}

void OpDispatchBuilder::CalculateFlags_ShiftRightImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift) {
  // No flags changed if shift is zero
  if (Shift == 0) {
    return;
  }

  CalculateFlags_ShiftRightImmediateCommon(SrcSize, Res, Src1, Shift);

  // OF
  {
    // Only defined when Shift is 1 else undefined
    // Is set to the MSB of the original value
    if (Shift == 1) {
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Src1, IR::OpSizeAsBits(SrcSize) - 1, true);
    }
  }
}

void OpDispatchBuilder::CalculateFlags_ShiftRightDoubleImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift) {
  // No flags changed if shift is zero
  if (Shift == 0) {
    return;
  }

  const auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;
  CalculateFlags_ShiftRightImmediateCommon(SrcSize, Res, Src1, Shift);

  // OF
  {
    // Only defined when Shift is 1 else undefined
    // Is set if the MSB bit changes.
    // XOR of Result and Src1
    if (Shift == 1) {
      auto val = _Xor(OpSize, Src1, Res);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(val, IR::OpSizeAsBits(SrcSize) - 1, true);
    }
  }
}

void OpDispatchBuilder::CalculateFlags_ZCNT(IR::OpSize SrcSize, Ref Result) {
  // OF, SF, AF, PF all undefined
  // Test ZF of result, SF is undefined so this is ok.
  SetNZ_ZeroCV(SrcSize, Result);

  // Now set CF if the Result = SrcSize * 8. Since SrcSize is a power-of-two and
  // Result is <= SrcSize * 8, we equivalently check if the log2(SrcSize * 8)
  // bit is set. No masking is needed because no higher bits could be set.
  unsigned CarryBit = FEXCore::ilog2(IR::OpSizeAsBits(SrcSize));
  SetCFDirect(Result, CarryBit);
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F38Tables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
#define OPD(prefix, opcode) (((prefix) << 8) | opcode)
constexpr uint16_t PF_38_NONE = 0;
constexpr uint16_t PF_38_66 = (1U << 0);
constexpr uint16_t PF_38_F2 = (1U << 1);
constexpr uint16_t PF_38_F3 = (1U << 2);

constexpr DispatchTableEntry OpDispatch_H0F38Table[] = {
  {OPD(PF_38_NONE, 0x00), 1, &OpDispatchBuilder::PSHUFBOp},
  {OPD(PF_38_66, 0x00), 1, &OpDispatchBuilder::PSHUFBOp},
  {OPD(PF_38_NONE, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i16Bit>},
  {OPD(PF_38_66, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i16Bit>},
  {OPD(PF_38_NONE, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADDP, OpSize::i32Bit>},
  {OPD(PF_38_NONE, 0x03), 1, &OpDispatchBuilder::PHADDS},
  {OPD(PF_38_66, 0x03), 1, &OpDispatchBuilder::PHADDS},
  {OPD(PF_38_NONE, 0x04), 1, &OpDispatchBuilder::PMADDUBSW},
  {OPD(PF_38_66, 0x04), 1, &OpDispatchBuilder::PMADDUBSW},
  {OPD(PF_38_NONE, 0x05), 1, &OpDispatchBuilder::PHSUB<OpSize::i16Bit>},
  {OPD(PF_38_66, 0x05), 1, &OpDispatchBuilder::PHSUB<OpSize::i16Bit>},
  {OPD(PF_38_NONE, 0x06), 1, &OpDispatchBuilder::PHSUB<OpSize::i32Bit>},
  {OPD(PF_38_66, 0x06), 1, &OpDispatchBuilder::PHSUB<OpSize::i32Bit>},
  {OPD(PF_38_NONE, 0x07), 1, &OpDispatchBuilder::PHSUBS},
  {OPD(PF_38_66, 0x07), 1, &OpDispatchBuilder::PHSUBS},
  {OPD(PF_38_NONE, 0x08), 1, &OpDispatchBuilder::PSIGN<OpSize::i8Bit>},
  {OPD(PF_38_66, 0x08), 1, &OpDispatchBuilder::PSIGN<OpSize::i8Bit>},
  {OPD(PF_38_NONE, 0x09), 1, &OpDispatchBuilder::PSIGN<OpSize::i16Bit>},
  {OPD(PF_38_66, 0x09), 1, &OpDispatchBuilder::PSIGN<OpSize::i16Bit>},
  {OPD(PF_38_NONE, 0x0A), 1, &OpDispatchBuilder::PSIGN<OpSize::i32Bit>},
  {OPD(PF_38_66, 0x0A), 1, &OpDispatchBuilder::PSIGN<OpSize::i32Bit>},
  {OPD(PF_38_NONE, 0x0B), 1, &OpDispatchBuilder::PMULHRSW},
  {OPD(PF_38_66, 0x0B), 1, &OpDispatchBuilder::PMULHRSW},
  {OPD(PF_38_66, 0x10), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i8Bit>},
  {OPD(PF_38_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorVariableBlend, OpSize::i64Bit>},
  {OPD(PF_38_66, 0x17), 1, &OpDispatchBuilder::PTestOp},
  {OPD(PF_38_NONE, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>},
  {OPD(PF_38_66, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>},
  {OPD(PF_38_NONE, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>},
  {OPD(PF_38_66, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>},
  {OPD(PF_38_NONE, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, true>},
  {OPD(PF_38_66, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, true>},
  {OPD(PF_38_66, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, true>},
  {OPD(PF_38_66, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, true>},
  {OPD(PF_38_66, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, true>},
  {OPD(PF_38_66, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, true>},
  {OPD(PF_38_66, 0x28), 1, &OpDispatchBuilder::PMULLOp<OpSize::i32Bit, true>},
  {OPD(PF_38_66, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i64Bit>},
  {OPD(PF_38_66, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp},
  {OPD(PF_38_66, 0x2B), 1, &OpDispatchBuilder::PACKUSOp<OpSize::i32Bit>},
  {OPD(PF_38_66, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, false>},
  {OPD(PF_38_66, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, false>},
  {OPD(PF_38_66, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, false>},
  {OPD(PF_38_66, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, false>},
  {OPD(PF_38_66, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, false>},
  {OPD(PF_38_66, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, false>},
  {OPD(PF_38_66, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i64Bit>},
  {OPD(PF_38_66, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i8Bit>},
  {OPD(PF_38_66, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i16Bit>},
  {OPD(PF_38_66, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i8Bit>},
  {OPD(PF_38_66, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i16Bit>},
  {OPD(PF_38_66, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i32Bit>},
  {OPD(PF_38_66, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp},

  {OPD(PF_38_NONE, 0xC8), 1, &OpDispatchBuilder::SHA1NEXTEOp},
  {OPD(PF_38_NONE, 0xC9), 1, &OpDispatchBuilder::SHA1MSG1Op},
  {OPD(PF_38_NONE, 0xCA), 1, &OpDispatchBuilder::SHA1MSG2Op},
  {OPD(PF_38_NONE, 0xCB), 1, &OpDispatchBuilder::SHA256RNDS2Op},
  {OPD(PF_38_NONE, 0xCC), 1, &OpDispatchBuilder::SHA256MSG1Op},
  {OPD(PF_38_NONE, 0xCD), 1, &OpDispatchBuilder::SHA256MSG2Op},

  {OPD(PF_38_66, 0xDB), 1, &OpDispatchBuilder::AESImcOp},
  {OPD(PF_38_66, 0xDC), 1, &OpDispatchBuilder::AESEncOp},
  {OPD(PF_38_66, 0xDD), 1, &OpDispatchBuilder::AESEncLastOp},
  {OPD(PF_38_66, 0xDE), 1, &OpDispatchBuilder::AESDecOp},
  {OPD(PF_38_66, 0xDF), 1, &OpDispatchBuilder::AESDecLastOp},

  {OPD(PF_38_NONE, 0xF0), 2, &OpDispatchBuilder::MOVBEOp},
  {OPD(PF_38_66, 0xF0), 2, &OpDispatchBuilder::MOVBEOp},

  {OPD(PF_38_F2, 0xF0), 1, &OpDispatchBuilder::CRC32},
  {OPD(PF_38_F2, 0xF1), 1, &OpDispatchBuilder::CRC32},

  {OPD(PF_38_66 | PF_38_F2, 0xF0), 1, &OpDispatchBuilder::CRC32},
  {OPD(PF_38_66 | PF_38_F2, 0xF1), 1, &OpDispatchBuilder::CRC32},

  {OPD(PF_38_66, 0xF6), 1, &OpDispatchBuilder::ADXOp},
  {OPD(PF_38_F3, 0xF6), 1, &OpDispatchBuilder::ADXOp},
};
#undef OPD

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/H0F3ATables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
#define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode)
#define PF_3A_NONE 0
#define PF_3A_66 1
constexpr auto OpDispatchTableGenH0F3A = []() consteval {
  constexpr auto OpDispatchTableGenH0F3AREX = []<uint16_t REX>() consteval {
    constexpr DispatchTableEntry Table[] = {
      {OPD(REX, PF_3A_66, 0x08), 1, &OpDispatchBuilder::VectorRound<OpSize::i32Bit>},
      {OPD(REX, PF_3A_66, 0x09), 1, &OpDispatchBuilder::VectorRound<OpSize::i64Bit>},
      {OPD(REX, PF_3A_66, 0x0A), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i32Bit>},
      {OPD(REX, PF_3A_66, 0x0B), 1, &OpDispatchBuilder::InsertScalarRound<OpSize::i64Bit>},
      {OPD(REX, PF_3A_66, 0x0C), 1, &OpDispatchBuilder::VectorBlend<OpSize::i32Bit>},
      {OPD(REX, PF_3A_66, 0x0D), 1, &OpDispatchBuilder::VectorBlend<OpSize::i64Bit>},
      {OPD(REX, PF_3A_66, 0x0E), 1, &OpDispatchBuilder::VectorBlend<OpSize::i16Bit>},

      {OPD(REX, PF_3A_NONE, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},
      {OPD(REX, PF_3A_66, 0x0F), 1, &OpDispatchBuilder::PAlignrOp},

      {OPD(REX, PF_3A_66, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>},
      {OPD(REX, PF_3A_66, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
      {OPD(REX, PF_3A_66, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},

      {OPD(REX, PF_3A_66, 0x20), 1, &OpDispatchBuilder::PINSROp<OpSize::i8Bit>},
      {OPD(REX, PF_3A_66, 0x21), 1, &OpDispatchBuilder::InsertPSOp},
      {OPD(REX, PF_3A_66, 0x40), 1, &OpDispatchBuilder::DPPOp<OpSize::i32Bit>},
      {OPD(REX, PF_3A_66, 0x41), 1, &OpDispatchBuilder::DPPOp<OpSize::i64Bit>},
      {OPD(REX, PF_3A_66, 0x42), 1, &OpDispatchBuilder::MPSADBWOp},
      {OPD(REX, PF_3A_66, 0x44), 1, &OpDispatchBuilder::PCLMULQDQOp},

      {OPD(REX, PF_3A_66, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp},
      {OPD(REX, PF_3A_66, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp},
      {OPD(REX, PF_3A_66, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp},
      {OPD(REX, PF_3A_66, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp},

      {OPD(REX, PF_3A_NONE, 0xCC), 1, &OpDispatchBuilder::SHA1RNDS4Op},
      {OPD(REX, PF_3A_66, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist},

    };
    return std::to_array(Table);
  };

  auto REX0 = OpDispatchTableGenH0F3AREX.template operator()<0>();
  auto REX1 = OpDispatchTableGenH0F3AREX.template operator()<1>();
  auto concat = []<typename T, size_t N1, size_t N2>(const std::array<T, N1>& lhs,
                                                     const std::array<T, N2>& rhs) consteval -> std::array<T, N1 + N2> {
    std::array<T, N1 + N2> Table {};
    for (size_t i = 0; i < N1; ++i) {
      Table[i] = lhs[i];
    }

    for (size_t i = 0; i < N2; ++i) {
      Table[N1 + i] = rhs[i];
    }

    return Table;
  };
  return concat(REX0, REX1);
};

constexpr auto OpDispatch_H0F3ATableIgnoreREX = OpDispatchTableGenH0F3A();

constexpr DispatchTableEntry OpDispatch_H0F3ATableNeedsREX0[] = {
  {OPD(0, PF_3A_66, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},
  {OPD(0, PF_3A_66, 0x22), 1, &OpDispatchBuilder::PINSROp<OpSize::i32Bit>},
};

#undef PF_3A_NONE
#undef PF_3A_66

#undef OPD
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/PrimaryGroupTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
using X86Tables::OpToIndex;
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
constexpr DispatchTableEntry OpDispatch_PrimaryGroupTables[] = {
  // GROUP 1
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 0), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ADCOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SBBOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 4), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 5), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::CMPOp, 1>}, // CMP

  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ADCOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SBBOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 4), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::CMPOp, 1>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ADCOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SBBOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6), 1, &OpDispatchBuilder::SecondaryALUOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::CMPOp, 1>},

  // GROUP 2
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, true, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, true, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, &OpDispatchBuilder::RCLOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, &OpDispatchBuilder::RCROp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHRImmediateOp, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, false>}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, true, false>}, // SAR

  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, true, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, true, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, &OpDispatchBuilder::RCLOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, &OpDispatchBuilder::RCROp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHRImmediateOp, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, false>}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, true, false>}, // SAR

  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, true, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, true, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, &OpDispatchBuilder::RCLOp1Bit},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, &OpDispatchBuilder::RCROp8x1Bit},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHRImmediateOp, true>}, // 1Bit SHR
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, true>}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, true, true>},   // SAR

  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, true, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, true, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, &OpDispatchBuilder::RCLOp1Bit},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, &OpDispatchBuilder::RCROp1Bit},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, true>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHRImmediateOp, true>}, // 1Bit SHR
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHLImmediateOp, true>}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, true, true>},   // SAR

  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, false, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, false, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, &OpDispatchBuilder::RCLSmallerOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, &OpDispatchBuilder::RCRSmallerOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, &OpDispatchBuilder::SHLOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, false, false>}, // SAR

  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, true, false, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::RotateOp, false, false, false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, &OpDispatchBuilder::RCLOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, &OpDispatchBuilder::RCROp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, &OpDispatchBuilder::SHLOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, &OpDispatchBuilder::SHROp}, // SHR by CL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 6), 1, &OpDispatchBuilder::SHLOp}, // SAL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_2, OpToIndex(0xD3), 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::ASHROp, false, false>}, // SAR

  // GROUP 3
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 2), 1, &OpDispatchBuilder::NOTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 3), 1, &OpDispatchBuilder::NEGOp}, // NEG
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 4), 1, &OpDispatchBuilder::MULOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 5), 1, &OpDispatchBuilder::IMULOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, &OpDispatchBuilder::DIVOp},  // DIV
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF6), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV

  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::TESTOp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 2), 1, &OpDispatchBuilder::NOTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 3), 1, &OpDispatchBuilder::NEGOp}, // NEG

  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 4), 1, &OpDispatchBuilder::MULOp}, // MUL
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 5), 1, &OpDispatchBuilder::IMULOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, &OpDispatchBuilder::DIVOp},  // DIV
  {OPD(FEXCore::X86Tables::TYPE_GROUP_3, OpToIndex(0xF7), 7), 1, &OpDispatchBuilder::IDIVOp}, // IDIV

  // GROUP 4
  {OPD(FEXCore::X86Tables::TYPE_GROUP_4, OpToIndex(0xFE), 0), 1, &OpDispatchBuilder::INCOp}, // INC
  {OPD(FEXCore::X86Tables::TYPE_GROUP_4, OpToIndex(0xFE), 1), 1, &OpDispatchBuilder::DECOp}, // DEC

  // GROUP 5
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 0), 1, &OpDispatchBuilder::INCOp}, // INC
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 1), 1, &OpDispatchBuilder::DECOp}, // DEC
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 2), 1, &OpDispatchBuilder::CALLAbsoluteOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 3), 1, &OpDispatchBuilder::CALLFARIndirectOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 4), 1, &OpDispatchBuilder::JUMPAbsoluteOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 5), 1, &OpDispatchBuilder::JUMPFARIndirectOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_5, OpToIndex(0xFF), 6), 1, &OpDispatchBuilder::PUSHOp},

  // GROUP 11
  {OPD(FEXCore::X86Tables::TYPE_GROUP_11, OpToIndex(0xC6), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVGPROp, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_11, OpToIndex(0xC7), 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVGPROp, 1>},
};
#undef OPD

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg))
constexpr uint16_t PF_NONE = 0;
constexpr uint16_t PF_F3 = 1;
constexpr uint16_t PF_66 = 2;
constexpr uint16_t PF_F2 = 3;
constexpr DispatchTableEntry OpDispatch_SecondaryGroupTables[] = {
  // GROUP 6
  {OPD(FEXCore::X86Tables::TYPE_GROUP_6, PF_NONE, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_6, PF_F3, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_6, PF_66, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_6, PF_F2, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},

  // GROUP 7
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 0), 1, &OpDispatchBuilder::SGDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 0), 1, &OpDispatchBuilder::SGDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 0), 1, &OpDispatchBuilder::SGDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 0), 1, &OpDispatchBuilder::SGDTOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 1), 1, &OpDispatchBuilder::SIDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 1), 1, &OpDispatchBuilder::SIDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 1), 1, &OpDispatchBuilder::SIDTOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 1), 1, &OpDispatchBuilder::SIDTOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 3), 1, &OpDispatchBuilder::PermissionRestrictedOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 4), 1, &OpDispatchBuilder::SMSWOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 4), 1, &OpDispatchBuilder::SMSWOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 4), 1, &OpDispatchBuilder::SMSWOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 4), 1, &OpDispatchBuilder::SMSWOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 6), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 6), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 6), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 6), 1, &OpDispatchBuilder::PermissionRestrictedOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_NONE, 7), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F3, 7), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_66, 7), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_7, PF_F2, 7), 1, &OpDispatchBuilder::PermissionRestrictedOp},

  // GROUP 8
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTNone>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F3, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTNone>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTNone>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F2, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTNone>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_NONE, 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTSet>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F3, 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTSet>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_66, 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTSet>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F2, 5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTSet>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTClear>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F3, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTClear>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTClear>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F2, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTClear>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_NONE, 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTComplement>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F3, 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTComplement>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_66, 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTComplement>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_8, PF_F2, 7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 1, BTAction::BTComplement>},

  // GROUP 9
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_NONE, 1), 1, &OpDispatchBuilder::CMPXCHGPairOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_NONE, 6), 1, &OpDispatchBuilder::RDRANDOp<false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_NONE, 7), 1, &OpDispatchBuilder::RDRANDOp<true>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_66, 1), 1, &OpDispatchBuilder::CMPXCHGPairOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_66, 6), 1, &OpDispatchBuilder::RDRANDOp<false>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_66, 7), 1, &OpDispatchBuilder::RDRANDOp<true>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_F2, 1), 1, &OpDispatchBuilder::CMPXCHGPairOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_F3, 1), 1, &OpDispatchBuilder::CMPXCHGPairOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_9, PF_F3, 7), 1, &OpDispatchBuilder::RDPIDOp},

  // GROUP 12
  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i16Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i16Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i16Bit>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i16Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i16Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_12, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i16Bit>},

  // GROUP 13
  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i32Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i32Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i32Bit>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i32Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAIOp, OpSize::i32Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_13, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i32Bit>},

  // GROUP 14
  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i64Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_NONE, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i64Bit>},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLI, OpSize::i64Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 3), 1, &OpDispatchBuilder::PSRLDQ},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLLI, OpSize::i64Bit>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_14, PF_66, 7), 1, &OpDispatchBuilder::PSLLDQ},

  // GROUP 15
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 0), 1, &OpDispatchBuilder::FXSaveOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 1), 1, &OpDispatchBuilder::FXRStoreOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 2), 1, &OpDispatchBuilder::LDMXCSR},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 3), 1, &OpDispatchBuilder::STMXCSR},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 4), 1, &OpDispatchBuilder::XSaveOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 5), 1, &OpDispatchBuilder::LoadFenceOrXRSTOR},   // LFENCE (or XRSTOR)
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 6), 1, &OpDispatchBuilder::MemFenceOrXSAVEOPT},  // MFENCE (or XSAVEOPT)
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_NONE, 7), 1, &OpDispatchBuilder::StoreFenceOrCLFlush}, // SFENCE (or CLFLUSH)

  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 5), 1, &OpDispatchBuilder::UnimplementedOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F3, 6), 1, &OpDispatchBuilder::UMonitorOrCLRSSBSY},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 6), 1, &OpDispatchBuilder::CLWBOrTPause},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_66, 7), 1, &OpDispatchBuilder::CLFLUSHOPT},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_15, PF_F2, 6), 1, &OpDispatchBuilder::UMWaitOp},

  // GROUP 16
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, true, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 2>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 3>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_NONE, 4), 4, &OpDispatchBuilder::NOPOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, true, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 2>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 3>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F3, 4), 4, &OpDispatchBuilder::NOPOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, true, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 2>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 3>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_66, 4), 4, &OpDispatchBuilder::NOPOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, true, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 2>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 3>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_16, PF_F2, 4), 4, &OpDispatchBuilder::NOPOp},

  // GROUP 17
  {OPD(FEXCore::X86Tables::TYPE_GROUP_17, PF_66, 0), 1, &OpDispatchBuilder::Extrq_imm},

  // GROUP P
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_NONE, 0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, false, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_NONE, 1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, true, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_NONE, 2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Prefetch, true, false, 1>},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_NONE, 3), 5, &OpDispatchBuilder::NOPOp},

  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_F3, 0), 8, &OpDispatchBuilder::NOPOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_66, 0), 8, &OpDispatchBuilder::NOPOp},
  {OPD(FEXCore::X86Tables::TYPE_GROUP_P, PF_F2, 0), 8, &OpDispatchBuilder::NOPOp},
};

#undef OPD

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryModRMTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
constexpr DispatchTableEntry OpDispatch_SecondaryModRMTables[] = {
  // REG /1
  {((0 << 3) | 0), 1, &OpDispatchBuilder::UnimplementedOp},
  {((0 << 3) | 1), 1, &OpDispatchBuilder::UnimplementedOp},

  // REG /2
  {((1 << 3) | 0), 1, &OpDispatchBuilder::XGetBVOp},

  // REG /3
  {((2 << 3) | 7), 1, &OpDispatchBuilder::PermissionRestrictedOp},

  // REG /7
  {((3 << 3) | 0), 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {((3 << 3) | 1), 1, &OpDispatchBuilder::RDTSCPOp},
  {((3 << 3) | 4), 1, &OpDispatchBuilder::CLZeroOp},
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/SecondaryTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
constexpr DispatchTableEntry OpDispatch_TwoByteOpTable[] = {
  // Instructions
  {0x03, 1, &OpDispatchBuilder::LSLOp},
  {0x06, 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {0x07, 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {0x0B, 1, &OpDispatchBuilder::INTOp},
  {0x0E, 1, &OpDispatchBuilder::X87EMMS},

  {0x19, 7, &OpDispatchBuilder::NOPOp}, // NOP with ModRM

  {0x20, 4, &OpDispatchBuilder::PermissionRestrictedOp},

  {0x30, 1, &OpDispatchBuilder::PermissionRestrictedOp},
  {0x31, 1, &OpDispatchBuilder::RDTSCOp},
  {0x32, 2, &OpDispatchBuilder::PermissionRestrictedOp},
  {0x34, 3, &OpDispatchBuilder::UnimplementedOp},

  {0x40, 16, &OpDispatchBuilder::CMOVOp},
  {0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::MMX>},
  {0x6F, 1, &OpDispatchBuilder::MOVQMMXOp},
  {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::MMX>},
  {0x7F, 1, &OpDispatchBuilder::MOVQMMXOp},
  {0x80, 16, &OpDispatchBuilder::CondJUMPOp},
  {0x90, 16, &OpDispatchBuilder::SETccOp},
  {0xA2, 1, &OpDispatchBuilder::CPUIDOp},
  {0xA3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 0, BTAction::BTNone>}, // BT
  {0xA4, 1, &OpDispatchBuilder::SHLDImmediateOp},
  {0xA5, 1, &OpDispatchBuilder::SHLDOp},
  {0xAB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 0, BTAction::BTSet>}, // BTS
  {0xAC, 1, &OpDispatchBuilder::SHRDImmediateOp},
  {0xAD, 1, &OpDispatchBuilder::SHRDOp},
  {0xAF, 1, &OpDispatchBuilder::IMUL1SrcOp},
  {0xB0, 2, &OpDispatchBuilder::CMPXCHGOp},                                            // CMPXCHG
  {0xB3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 0, BTAction::BTClear>}, // BTR
  {0xB6, 2, &OpDispatchBuilder::MOVZXOp},
  {0xBB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::BTOp, 0, BTAction::BTComplement>}, // BTC
  {0xBC, 1, &OpDispatchBuilder::BSFOp},                                                     // BSF
  {0xBD, 1, &OpDispatchBuilder::BSROp},                                                     // BSF
  {0xBE, 2, &OpDispatchBuilder::MOVSXOp},
  {0xC0, 2, &OpDispatchBuilder::XADDOp},
  {0xC3, 1, &OpDispatchBuilder::MOVGPRNTOp},
  {0xC4, 1, &OpDispatchBuilder::PINSROp<OpSize::i16Bit>},
  {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
  {0xC8, 8, &OpDispatchBuilder::BSWAPOp},

  // SSE
  {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0x12, 2, &OpDispatchBuilder::MOVLPOp},
  {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>},
  {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>},
  {0x16, 2, &OpDispatchBuilder::MOVHPDOp},
  {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp},
  {0x2A, 1, &OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
  {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<OpSize::i32Bit>},
  {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i32Bit>},
  {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, OpSize::i32Bit>},
  {0x52, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>},
  {0x53, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>},
  {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>},
  {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>},
  {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>},
  {0x57, 1, &OpDispatchBuilder::VectorXOROp},
  {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i32Bit>},
  {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>},
  {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit, false>},
  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
  {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>},
  {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>},
  {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, OpSize::i32Bit>},
  {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>},
  {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i8Bit>},
  {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i16Bit>},
  {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>},
  {0x63, 1, &OpDispatchBuilder::PACKSSOp<OpSize::i16Bit>},
  {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>},
  {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>},
  {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>},
  {0x67, 1, &OpDispatchBuilder::PACKUSOp<OpSize::i16Bit>},
  {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i8Bit>},
  {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i16Bit>},
  {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>},
  {0x6B, 1, &OpDispatchBuilder::PACKSSOp<OpSize::i32Bit>},
  {0x70, 1, &OpDispatchBuilder::PSHUFW8ByteOp},

  {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>},
  {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>},
  {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>},
  {0x77, 1, &OpDispatchBuilder::X87EMMS},

  {0xC2, 1, &OpDispatchBuilder::VFCMPOp<OpSize::i32Bit>},
  {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, OpSize::i32Bit>},

  {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i16Bit>},
  {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i32Bit>},
  {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i64Bit>},
  {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i64Bit>},
  {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i16Bit>},
  {0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB
  {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>},
  {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>},
  {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>},
  {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i64Bit>},
  {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>},
  {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>},
  {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>},
  {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>},
  {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>},
  {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i16Bit>},
  {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i32Bit>},
  {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>},
  {0xE4, 1, &OpDispatchBuilder::PMULHW<false>},
  {0xE5, 1, &OpDispatchBuilder::PMULHW<true>},
  {0xE7, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>},
  {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>},
  {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>},
  {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i64Bit>},
  {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>},
  {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>},
  {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>},
  {0xEF, 1, &OpDispatchBuilder::VectorXOROp},

  {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i16Bit>},
  {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i32Bit>},
  {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i64Bit>},
  {0xF4, 1, &OpDispatchBuilder::PMULLOp<OpSize::i32Bit, false>},
  {0xF5, 1, &OpDispatchBuilder::PMADDWD},
  {0xF6, 1, &OpDispatchBuilder::PSADBW},
  {0xF7, 1, &OpDispatchBuilder::MASKMOVOp},
  {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i8Bit>},
  {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i16Bit>},
  {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i32Bit>},
  {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i64Bit>},
  {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i8Bit>},
  {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i16Bit>},
  {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i32Bit>},

#ifndef _WIN32
  // FEX reserved instructions
  {0x3E, 1, &OpDispatchBuilder::CallbackReturnOp},
  {0x3F, 1, &OpDispatchBuilder::ThunkOp},
#endif
};

constexpr DispatchTableEntry OpDispatch_SecondaryRepModTables[] = {
  {0x10, 2, &OpDispatchBuilder::MOVSSOp},
  {0x12, 1, &OpDispatchBuilder::VMOVSLDUPOp},
  {0x16, 1, &OpDispatchBuilder::VMOVSHDUPOp},
  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i32Bit>},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, true>},
  {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>},
  {0x52, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>},
  {0x53, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>},
  {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>},
  {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>},
  {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>},
  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},
  {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>},
  {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>},
  {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>},
  {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>},
  {0x6F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, false>},
  {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>},
  {0x7F, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0xB8, 1, &OpDispatchBuilder::PopcountOp},
  {0xBC, 1, &OpDispatchBuilder::TZCNT},
  {0xBD, 1, &OpDispatchBuilder::LZCNT},
  {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp<OpSize::i32Bit>},
  {0xD6, 1, &OpDispatchBuilder::MOVQ2DQ<true>},
  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, true>},
};

constexpr DispatchTableEntry OpDispatch_SecondaryRepNEModTables[] = {
  {0x10, 2, &OpDispatchBuilder::MOVSDOp},
  {0x12, 1, &OpDispatchBuilder::MOVDDUPOp},
  {0x2A, 1, &OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i64Bit>},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, true>},
  {0x51, 1, &OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>},
  // x52 = Invalid
  {0x58, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>},
  {0x59, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>},
  {0x5A, 1, &OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>},
  {0x5C, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>},
  {0x5D, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>},
  {0x5E, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>},
  {0x5F, 1, &OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>},
  {0x70, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSHUFWOp, true>},
  {0x78, 1, &OpDispatchBuilder::Insertq_imm},
  {0x79, 1, &OpDispatchBuilder::Insertq},
  {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i32Bit>},
  {0x7D, 1, &OpDispatchBuilder::HSUBP<OpSize::i32Bit>},
  {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<OpSize::i32Bit>},
  {0xD6, 1, &OpDispatchBuilder::MOVQ2DQ<false>},
  {0xC2, 1, &OpDispatchBuilder::InsertScalarFCMPOp<OpSize::i64Bit>},
  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
  {0xF0, 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
};

constexpr DispatchTableEntry OpDispatch_SecondaryOpSizeModTables[] = {
  {0x10, 2, &OpDispatchBuilder::MOVVectorUnalignedOp},
  {0x12, 2, &OpDispatchBuilder::MOVLPOp},
  {0x14, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i64Bit>},
  {0x15, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i64Bit>},
  {0x16, 2, &OpDispatchBuilder::MOVHPDOp},
  {0x28, 2, &OpDispatchBuilder::MOVVectorAlignedOp},
  {0x2A, 1, &OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float},
  {0x2B, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0x2C, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
  {0x2D, 1, &OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},
  {0x2E, 2, &OpDispatchBuilder::UCOMISxOp<OpSize::i64Bit>},

  {0x50, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i64Bit>},
  {0x51, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorUnaryOp, IR::OP_VFSQRT, OpSize::i64Bit>},
  {0x54, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>},
  {0x55, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>},
  {0x56, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>},
  {0x57, 1, &OpDispatchBuilder::VectorXOROp},
  {0x58, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADD, OpSize::i64Bit>},
  {0x59, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMUL, OpSize::i64Bit>},
  {0x5A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit, false>},
  {0x5B, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
  {0x5C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>},
  {0x5D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMIN, OpSize::i64Bit>},
  {0x5E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFDIV, OpSize::i64Bit>},
  {0x5F, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFMAX, OpSize::i64Bit>},
  {0x60, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i8Bit>},
  {0x61, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i16Bit>},
  {0x62, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i32Bit>},
  {0x63, 1, &OpDispatchBuilder::PACKSSOp<OpSize::i16Bit>},
  {0x64, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>},
  {0x65, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>},
  {0x66, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>},
  {0x67, 1, &OpDispatchBuilder::PACKUSOp<OpSize::i16Bit>},
  {0x68, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i8Bit>},
  {0x69, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i16Bit>},
  {0x6A, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i32Bit>},
  {0x6B, 1, &OpDispatchBuilder::PACKSSOp<OpSize::i32Bit>},
  {0x6C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKLOp, OpSize::i64Bit>},
  {0x6D, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PUNPCKHOp, OpSize::i64Bit>},
  {0x6E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>},
  {0x6F, 1, &OpDispatchBuilder::MOVVectorAlignedOp},
  {0x70, 1, &OpDispatchBuilder::PSHUFDOp},

  {0x74, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>},
  {0x75, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>},
  {0x76, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>},
  {0x78, 1, nullptr}, // GROUP 17
  {0x79, 1, &OpDispatchBuilder::Extrq},
  {0x7C, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VFADDP, OpSize::i64Bit>},
  {0x7D, 1, &OpDispatchBuilder::HSUBP<OpSize::i64Bit>},
  {0x7E, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::SSE>},
  {0x7F, 1, &OpDispatchBuilder::MOVVectorAlignedOp},
  {0xC2, 1, &OpDispatchBuilder::VFCMPOp<OpSize::i64Bit>},
  {0xC4, 1, &OpDispatchBuilder::PINSROp<OpSize::i16Bit>},
  {0xC5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
  {0xC6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::SHUFOp, OpSize::i64Bit>},

  {0xD0, 1, &OpDispatchBuilder::ADDSUBPOp<OpSize::i64Bit>},
  {0xD1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i16Bit>},
  {0xD2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i32Bit>},
  {0xD3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRLDOp, OpSize::i64Bit>},
  {0xD4, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i64Bit>},
  {0xD5, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VMUL, OpSize::i16Bit>},
  {0xD6, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::SSE>},
  {0xD7, 1, &OpDispatchBuilder::MOVMSKOpOne}, // PMOVMSKB
  {0xD8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>},
  {0xD9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>},
  {0xDA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>},
  {0xDB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VAND, OpSize::i128Bit>},
  {0xDC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>},
  {0xDD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>},
  {0xDE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>},
  {0xDF, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUROp, IR::OP_VANDN, OpSize::i64Bit>},
  {0xE0, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>},
  {0xE1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i16Bit>},
  {0xE2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSRAOp, OpSize::i32Bit>},
  {0xE3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>},
  {0xE4, 1, &OpDispatchBuilder::PMULHW<false>},
  {0xE5, 1, &OpDispatchBuilder::PMULHW<true>},
  {0xE6, 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
  {0xE7, 1, &OpDispatchBuilder::MOVVectorNTOp},
  {0xE8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>},
  {0xE9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>},
  {0xEA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>},
  {0xEB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VOR, OpSize::i128Bit>},
  {0xEC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>},
  {0xED, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>},
  {0xEE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>},
  {0xEF, 1, &OpDispatchBuilder::VectorXOROp},

  {0xF1, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i16Bit>},
  {0xF2, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i32Bit>},
  {0xF3, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PSLL, OpSize::i64Bit>},
  {0xF4, 1, &OpDispatchBuilder::PMULLOp<OpSize::i32Bit, false>},
  {0xF5, 1, &OpDispatchBuilder::PMADDWD},
  {0xF6, 1, &OpDispatchBuilder::PSADBW},
  {0xF7, 1, &OpDispatchBuilder::MASKMOVOp},
  {0xF8, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i8Bit>},
  {0xF9, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i16Bit>},
  {0xFA, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i32Bit>},
  {0xFB, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VSUB, OpSize::i64Bit>},
  {0xFC, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i8Bit>},
  {0xFD, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i16Bit>},
  {0xFE, 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VectorALUOp, IR::OP_VADD, OpSize::i32Bit>},
};
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/VEXTables.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Interface/Core/OpcodeDispatcher.h"

namespace FEXCore::IR {
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
constexpr DispatchTableEntry OpDispatch_VEXTable[] = {
  {OPD(2, 0b00, 0xF2), 1, &OpDispatchBuilder::ANDNBMIOp}, {OPD(2, 0b00, 0xF5), 1, &OpDispatchBuilder::BZHI},
  {OPD(2, 0b10, 0xF5), 1, &OpDispatchBuilder::PEXT},      {OPD(2, 0b11, 0xF5), 1, &OpDispatchBuilder::PDEP},
  {OPD(2, 0b11, 0xF6), 1, &OpDispatchBuilder::MULX},      {OPD(2, 0b00, 0xF7), 1, &OpDispatchBuilder::BEXTRBMIOp},
  {OPD(2, 0b01, 0xF7), 1, &OpDispatchBuilder::BMI2Shift}, {OPD(2, 0b10, 0xF7), 1, &OpDispatchBuilder::BMI2Shift},
  {OPD(2, 0b11, 0xF7), 1, &OpDispatchBuilder::BMI2Shift},

  {OPD(3, 0b11, 0xF0), 1, &OpDispatchBuilder::RORX},
};
#undef OPD

#define OPD(group, pp, opcode) (((group - X86Tables::InstType::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode))
constexpr DispatchTableEntry OpDispatch_VEXGroupTable[] = {
  {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b001), 1, &OpDispatchBuilder::BLSRBMIOp},
  {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b010), 1, &OpDispatchBuilder::BLSMSKBMIOp},
  {OPD(X86Tables::InstType::TYPE_VEX_GROUP_17, 0, 0b011), 1, &OpDispatchBuilder::BLSIBMIOp},
};
#undef OPD

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 Vector instructions to IR
$end_info$
*/

#include "Interface/Context/Context.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/LogManager.h>

#include <array>
#include <bit>
#include <cstdint>
#include <stddef.h>

namespace FEXCore::IR {
#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

void OpDispatchBuilder::MOVVectorAlignedOp(OpcodeArgs) {
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    // Nop
    return;
  }
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  StoreResultFPR(Op, Src);
}

void OpDispatchBuilder::MOVVectorUnalignedOp(OpcodeArgs) {
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    // Nop
    return;
  }
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});
  StoreResultFPR(Op, Src, OpSize::i8Bit);
}

void OpDispatchBuilder::MOVVectorNTOp(OpcodeArgs) {
  const auto Size = OpSizeFromDst(Op);

  if (Op->Dest.IsGPR() && Size >= OpSize::i128Bit) {
    ///< MOVNTDQA load non-temporal comes from SSE4.1 and is extended by AVX/AVX2.
    Ref SrcAddr = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.LoadData = false});
    auto Src = _VLoadNonTemporal(Size, SrcAddr, 0);

    StoreResultFPR(Op, Src, OpSize::i8Bit, MemoryAccessType::STREAM);
  } else if (Op->Dest.IsGPR()) {
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit, .AccessType = MemoryAccessType::STREAM});
    StoreResultFPR(Op, Src, OpSize::i8Bit, MemoryAccessType::STREAM);
  } else {
    LOGMAN_THROW_A_FMT(!Op->Dest.IsGPR(), "Destination can't be GPR for non-temporal stores");
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit, .AccessType = MemoryAccessType::STREAM});
    if (Size < OpSize::i128Bit) {
      // Normal streaming store if less than 128-bit
      // XMM Scalar 32-bit and 64-bit comes from SSE4a MOVNTSS, MOVNTSD
      // MMX 64-bit comes from MOVNTQ
      StoreResultFPR(Op, Src, OpSize::i8Bit, MemoryAccessType::STREAM);
    } else {
      Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});

      // Single store non-temporal for larger operations.
      _VStoreNonTemporal(Size, Src, Dest, 0);
    }
  }
}

void OpDispatchBuilder::VMOVAPS_VMOVAPDOp(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  if (Is128Bit && Op->Dest.IsGPR()) {
    Src = VZeroExtendOperand(OpSize::i128Bit, Op->Src[0], Src);
  }
  StoreResultFPR(Op, Src);
}

void OpDispatchBuilder::VMOVUPS_VMOVUPDOp(OpcodeArgs) {
  const auto SrcSize = GetSrcSize(Op);
  const auto Is128Bit = SrcSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});

  if (Is128Bit && Op->Dest.IsGPR()) {
    Src = VZeroExtendOperand(OpSize::i128Bit, Op->Src[0], Src);
  }
  StoreResultFPR(Op, Src, OpSize::i8Bit);
}

void OpDispatchBuilder::MOVHPDOp(OpcodeArgs) {
  if (Op->Dest.IsGPR()) {
    if (Op->Src[0].IsGPR()) {
      // MOVLHPS between two vector registers.
      Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
      Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, OpSize::i128Bit, Op->Flags);
      auto Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Dest, Src);
      StoreResultFPR(Op, Result);
    } else {
      // If the destination is a GPR then the source is memory
      // xmm1[127:64] = src
      Ref Src = MakeSegmentAddress(Op, Op->Src[0]);
      Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, OpSize::i128Bit, Op->Flags);
      auto Result = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Dest, 1, Src);
      StoreResultFPR(Op, Result);
    }
  } else {
    // In this case memory is the destination and the high bits of the XMM are source
    // Mem64 = xmm1[127:64]
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Ref Dest = MakeSegmentAddress(Op, Op->Dest);
    _VStoreVectorElement(OpSize::i128Bit, OpSize::i64Bit, Src, 1, Dest);
  }
}

void OpDispatchBuilder::VMOVHPOp(OpcodeArgs) {
  if (Op->Dest.IsGPR()) {
    Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i128Bit});
    Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags, {.Align = OpSize::i64Bit});
    Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 0, Src1, Src2);

    StoreResultFPR(Op, Result);
  } else {
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i128Bit});
    Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src, Src);
    StoreResultFPR_WithOpSize(Op, Op->Dest, Result, OpSize::i64Bit, OpSize::i64Bit);
  }
}

void OpDispatchBuilder::MOVLPOp(OpcodeArgs) {
  if (Op->Dest.IsGPR()) {
    // xmm, xmm is movhlps special case
    if (Op->Src[0].IsGPR()) {
      Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i128Bit});
      Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags, {.Align = OpSize::i128Bit});
      auto Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Dest, Src);
      StoreResultFPR_WithOpSize(Op, Op->Dest, Result, OpSize::i128Bit, OpSize::i128Bit);
    } else {
      const auto DstSize = OpSizeFromDst(Op);
      Ref Src = MakeSegmentAddress(Op, Op->Src[0]);
      Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags);
      auto Result = _VLoadVectorElement(OpSize::i128Bit, OpSize::i64Bit, Dest, 0, Src);
      StoreResultFPR(Op, Result);
    }
  } else {
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i64Bit});
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src, OpSize::i64Bit, OpSize::i64Bit);
  }
}

void OpDispatchBuilder::VMOVLPOp(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i128Bit});

  if (!Op->Dest.IsGPR()) {
    ///< VMOVLPS/PD mem64, xmm1
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src1, OpSize::i64Bit, OpSize::i64Bit);
  } else if (!Op->Src[1].IsGPR()) {
    ///< VMOVLPS/PD xmm1, xmm2, mem64
    // Bits[63:0] come from Src2[63:0]
    // Bits[127:64] come from Src1[127:64]
    Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags, {.Align = OpSize::i64Bit});
    Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 1, 1, Src2, Src1);
    StoreResultFPR(Op, Result);
  } else {
    ///< VMOVHLPS/PD xmm1, xmm2, xmm3
    Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags, {.Align = OpSize::i128Bit});
    Ref Result = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 1, Src1, Src2);
    StoreResultFPR(Op, Result);
  }
}

void OpDispatchBuilder::VMOVSHDUPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VTrn2(SrcSize, OpSize::i32Bit, Src, Src);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VMOVSLDUPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VTrn(SrcSize, OpSize::i32Bit, Src, Src);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::MOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize) {
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR()) {
    // MOVSS/SD xmm1, xmm2
    Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    auto Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Dest, Src);
    StoreResultFPR(Op, Result);
  } else if (Op->Dest.IsGPR()) {
    // MOVSS/SD xmm1, mem32/mem64
    // xmm1[127:0] <- zext(mem32/mem64)
    Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], ElementSize, Op->Flags);
    StoreResultFPR(Op, Src);
  } else {
    // MOVSS/SD mem32/mem64, xmm1
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src, ElementSize);
  }
}

void OpDispatchBuilder::MOVSSOp(OpcodeArgs) {
  MOVScalarOpImpl(Op, OpSize::i32Bit);
}

void OpDispatchBuilder::MOVSDOp(OpcodeArgs) {
  MOVScalarOpImpl(Op, OpSize::i64Bit);
}

void OpDispatchBuilder::VMOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize) {
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Src[1].IsGPR()) {
    // VMOVSS/SD xmm1, xmm2, xmm3
    Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
    Ref Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Src1, Src2);
    StoreResultFPR(Op, Result);
  } else if (Op->Dest.IsGPR()) {
    // VMOVSS/SD xmm1, mem32/mem64
    Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[1], ElementSize, Op->Flags);
    StoreResultFPR(Op, Src);
  } else {
    // VMOVSS/SD mem32/mem64, xmm1
    Ref Src = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
    StoreResultFPR_WithOpSize(Op, Op->Dest, Src, ElementSize);
  }
}

void OpDispatchBuilder::VMOVSDOp(OpcodeArgs) {
  VMOVScalarOpImpl(Op, OpSize::i64Bit);
}

void OpDispatchBuilder::VMOVSSOp(OpcodeArgs) {
  VMOVScalarOpImpl(Op, OpSize::i32Bit);
}

void OpDispatchBuilder::VectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);

  DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Dest, Src));

  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::VectorXOROp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  // Special case for vector xor with itself being the optimal way for x86 to zero vector registers.
  if (Op->Dest.IsGPR() && Op->Src[0].IsGPR() && Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    const auto ZeroRegister = LoadZeroVector(Size);
    StoreResultFPR(Op, ZeroRegister);
    return;
  }

  ///< Regular code path
  VectorALUOp(Op, OP_VXOR, Size);
}

void OpDispatchBuilder::AVXVectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src1, Src2));

  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::AVXVectorXOROp(OpcodeArgs) {
  // Special case for vector xor with itself being the optimal way for x86 to zero vector registers.
  if (Op->Src[0].IsGPR() && Op->Src[1].IsGPR() && Op->Src[0].Data.GPR.GPR == Op->Src[1].Data.GPR.GPR) {
    const auto DstSize = OpSizeFromDst(Op);
    const auto ZeroRegister = LoadZeroVector(DstSize);
    StoreResultFPR(Op, ZeroRegister);
    return;
  }

  ///< Regular code path
  AVXVectorALUOp(Op, OP_VXOR, OpSize::i128Bit);
}

void OpDispatchBuilder::VectorALUROp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);

  DeriveOp(ALUOp, IROp, _VAdd(Size, ElementSize, Src, Dest));

  StoreResultFPR(Op, ALUOp);
}

Ref OpDispatchBuilder::VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize,
                                                   const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op,
                                                   bool ZeroUpperBits) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  // If OpSize == ElementSize then it only does the lower scalar op
  DeriveOp(ALUOp, IROp, _VFAddScalarInsert(DstSize, ElementSize, Src1, Src2, ZeroUpperBits));
  return ALUOp;
}

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::VectorScalarInsertALUOp(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::AVXVectorScalarInsertALUOp(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize,
                                                        const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op,
                                                        bool ZeroUpperBits) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  // If OpSize == ElementSize then it only does the lower scalar op
  DeriveOp(ALUOp, IROp, _VFSqrtScalarInsert(DstSize, ElementSize, Src1, Src2, ZeroUpperBits));
  return ALUOp;
}

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::VectorScalarUnaryInsertALUOp(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Dest, Op->Src[0], false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = VectorScalarInsertALUOpImpl(Op, IROp, DstSize, ElementSize, Op->Src[0], Op->Src[1], true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto DstSize = GetGuestVectorLength();
  const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i64Bit : OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags);
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);

  // Always 32-bit.
  const auto ElementSize = OpSize::i32Bit;
  // Always signed
  Dest = _VSToFVectorInsert(DstSize, ElementSize, ElementSize, Dest, Src, true, false);

  StoreResultFPR_WithOpSize(Op, Op->Dest, Dest, DstSize);
}

Ref OpDispatchBuilder::InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op,
                                               const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, DstSize, Op->Flags);

  if (Src2Op.IsGPR()) {
    // If the source is a GPR then convert directly from the GPR.
    auto Src2 = LoadSourceGPR_WithOpSize(Op, Src2Op, GetGPROpSize(), Op->Flags);
    return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
  } else if (SrcSize != DstElementSize) {
    // If the source is from memory but the Source size and destination size aren't the same,
    // then it is more optimal to load in to a GPR and convert between GPR->FPR.
    // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't.
    auto Src2 = LoadSourceGPR(Op, Src2Op, Op->Flags);
    return _VSToFGPRInsert(DstSize, DstElementSize, SrcSize, Src1, Src2, ZeroUpperBits);
  }

  // In the case of cvtsi2s{s,d} where the source and destination are the same size,
  // then it is more optimal to load in to the FPR register directly and convert there.
  auto Src2 = LoadSourceFPR(Op, Src2Op, Op->Flags);
  // Always signed
  return _VSToFVectorInsert(DstSize, DstElementSize, DstElementSize, Src1, Src2, false, ZeroUpperBits);
}

template<IR::OpSize DstElementSize>
void OpDispatchBuilder::InsertCVTGPR_To_FPR(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  auto Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Dest, Op->Src[0], false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::InsertCVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize DstElementSize>
void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  Ref Result = InsertCVTGPR_To_FPRImpl(Op, DstSize, DstElementSize, Op->Src[0], Op->Src[1], true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}
template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize,
                                                           IR::OpSize SrcElementSize, const X86Tables::DecodedOperand& Src1Op,
                                                           const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits) {

  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = Src2Op.IsGPR() ? OpSize::i128Bit : SrcElementSize;

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  return _VFToFScalarInsert(DstSize, DstElementSize, SrcElementSize, Src1, Src2, ZeroUpperBits);
}

template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  Ref Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0], false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::InsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>(OpcodeArgs);

template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs) {
  const auto DstSize = GetGuestVectorLength();
  Ref Result = InsertScalar_CVT_Float_To_FloatImpl(Op, DstSize, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1], true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>(OpcodeArgs);

RoundMode OpDispatchBuilder::TranslateRoundType(uint8_t Mode) {
  const uint64_t RoundControlSource = (Mode >> 2) & 1;
  uint64_t RoundControl = Mode & 0b11;

  static constexpr std::array SourceModes = {
    RoundMode::Nearest,
    RoundMode::NegInfinity,
    RoundMode::PosInfinity,
    RoundMode::TowardsZero,
  };

  return RoundControlSource ? RoundMode::Host : SourceModes[RoundControl];
}

Ref OpDispatchBuilder::InsertScalarRoundImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                                             const X86Tables::DecodedOperand& Src2Op, uint64_t Mode, bool ZeroUpperBits) {
  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Src2Op, SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  const auto SourceMode = TranslateRoundType(Mode);
  auto ALUOp = _VFToIScalarInsert(DstSize, ElementSize, Src1, Src2, SourceMode, ZeroUpperBits);

  return ALUOp;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::InsertScalarRound(OpcodeArgs) {
  const uint64_t Mode = Op->Src[1].Literal();
  const auto DstSize = GetGuestVectorLength();

  Ref Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Dest, Op->Src[0], Mode, false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::InsertScalarRound<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::InsertScalarRound<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVXInsertScalarRound(OpcodeArgs) {
  const uint64_t Mode = Op->Src[2].Literal();
  const auto DstSize = GetGuestVectorLength();

  Ref Result = InsertScalarRoundImpl(Op, DstSize, ElementSize, Op->Src[0], Op->Src[1], Mode, true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::AVXInsertScalarRound<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXInsertScalarRound<OpSize::i64Bit>(OpcodeArgs);


Ref OpDispatchBuilder::InsertScalarFCMPOpImpl(OpSize Size, IR::OpSize OpDstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2,
                                              uint8_t CompType, bool ZeroUpperBits) {
  switch (static_cast<VectorCompareType>(CompType)) {
  case VectorCompareType::EQ_OQ:
  case VectorCompareType::EQ_OS: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::EQ, ZeroUpperBits);
  case VectorCompareType::LT_OS: // GT(Swapped operand)
  case VectorCompareType::LT_OQ: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::LT, ZeroUpperBits);
  case VectorCompareType::LE_OS: // GE(Swapped operand)
  case VectorCompareType::LE_OQ: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::LE, ZeroUpperBits);
  case VectorCompareType::UNORD_Q:
  case VectorCompareType::UNORD_S: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::UNO, ZeroUpperBits);
  case VectorCompareType::NEQ_UQ:
  case VectorCompareType::NEQ_US: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::NEQ, ZeroUpperBits);
  case VectorCompareType::NLT_US: // NGT(Swapped operand)
  case VectorCompareType::NLT_UQ: {
    Ref Result = _VFCMPLT(ElementSize, ElementSize, Src1, Src2);
    Result = _VNot(ElementSize, ElementSize, Result);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::NLE_US: // NGE(Swapped operand)
  case VectorCompareType::NLE_UQ: {
    Ref Result = _VFCMPLE(ElementSize, ElementSize, Src1, Src2);
    Result = _VNot(ElementSize, ElementSize, Result);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::ORD_Q:
  case VectorCompareType::ORD_S: return _VFCMPScalarInsert(Size, ElementSize, Src1, Src2, FloatCompareOp::ORD, ZeroUpperBits);
  case VectorCompareType::NGT_UQ:
  case VectorCompareType::NGT_US: {
    Ref Result = _VFCMPLT(ElementSize, ElementSize, Src2, Src1);
    Result = _VNot(ElementSize, ElementSize, Result);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::NGE_UQ:
  case VectorCompareType::NGE_US: {
    Ref Result = _VFCMPLE(ElementSize, ElementSize, Src2, Src1);
    Result = _VNot(ElementSize, ElementSize, Result);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::GT_OQ:
  case VectorCompareType::GT_OS: {
    Ref Result = _VFCMPLT(ElementSize, ElementSize, Src2, Src1);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::GE_OQ:
  case VectorCompareType::GE_OS: {
    Ref Result = _VFCMPLE(ElementSize, ElementSize, Src2, Src1);
    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::EQ_UQ:
  case VectorCompareType::EQ_US: {
    // If either of the sources are unordered, then returns true.
    Ref Src1_U = _VFCMPEQ(Size, ElementSize, Src1, Src1);
    Ref Src2_U = _VFCMPEQ(Size, ElementSize, Src2, Src2);
    auto Ordered = _VAnd(Size, ElementSize, Src1_U, Src2_U);

    Ref Compare_Ordered = _VFCMPEQ(Size, ElementSize, Src1, Src2);
    Ref Result = _VOrn(Size, ElementSize, Compare_Ordered, Ordered);

    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::NEQ_OQ:
  case VectorCompareType::NEQ_OS: {
    // If either of the sources are unordered, then returns false.
    Ref Src1_U = _VFCMPEQ(Size, ElementSize, Src1, Src1);
    Ref Src2_U = _VFCMPEQ(Size, ElementSize, Src2, Src2);

    Ref Compare_Ordered = _VFCMPEQ(Size, ElementSize, Src1, Src2);
    Ref Result = _VAndn(Size, ElementSize, Src1_U, Compare_Ordered);
    Result = _VAnd(Size, ElementSize, Result, Src2_U);

    // Insert the lower bits
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, Result);
  }
  case VectorCompareType::FALSE_OQ:
  case VectorCompareType::FALSE_OS: return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, LoadZeroVector(OpSize::i128Bit));
  case VectorCompareType::TRUE_UQ:
  case VectorCompareType::TRUE_US:
    return _VInsElement(OpDstSize, ElementSize, 0, 0, Src1, _VectorImm(OpSize::i128Bit, OpSize::i8Bit, -1, 0));
  }
  FEX_UNREACHABLE;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::InsertScalarFCMPOp(OpcodeArgs) {
  const uint8_t CompType = Op->Src[1].Literal();
  const auto DstSize = GetGuestVectorLength();
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  Ref Result = InsertScalarFCMPOpImpl(DstSize, OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType & 0b111, false);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::InsertScalarFCMPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::InsertScalarFCMPOp<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVXInsertScalarFCMPOp(OpcodeArgs) {
  const uint8_t CompType = Op->Src[2].Literal();
  const auto DstSize = GetGuestVectorLength();
  const auto SrcSize = OpSizeFromSrc(Op);

  // We load the full vector width when dealing with a source vector,
  // so that we don't do any unnecessary zero extension to the scalar
  // element that we're going to operate on.
  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags, {.AllowUpperGarbage = true});

  Ref Result = InsertScalarFCMPOpImpl(DstSize, OpSizeFromDst(Op), ElementSize, Src1, Src2, CompType & 0b11111, true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::AVXInsertScalarFCMPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXInsertScalarFCMPOp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::RSqrt3DNowOp(OpcodeArgs, bool Duplicate) {
  const auto Size = OpSizeFromSrc(Op);
  const auto ElementSize = OpSize::i32Bit;

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], Size, Op->Flags);

  // For the sqrt reciprocal in 3DNow!, if the source is negative,
  // then the result has the same sign as the source but the result is always calculated
  // as if the source was positive.
  Ref AbsSrc = _VFAbs(Size, ElementSize, Src);
  Ref PosRSqrt = _VFRSqrtPrecision(Size, ElementSize, AbsSrc);
  Ref Result = _VFCopySign(Size, ElementSize, PosRSqrt, Src);

  if (Duplicate) {
    Result = _VDupElement(Size, ElementSize, Result, 0);
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  // In the event of a scalar operation and a vector source, then
  // we can specify the entire vector length in order to avoid
  // unnecessary sign extension on the element to be operated on.
  // In the event of a memory operand, we load the exact element size.
  const auto Size = OpSizeFromSrc(Op);

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], Size, Op->Flags);

  DeriveOp(ALUOp, IROp, _VFSqrt(Size, ElementSize, Src));
  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::AVXVectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  // In the event of a scalar operation and a vector source, then
  // we can specify the entire vector length in order to avoid
  // unnecessary sign extension on the element to be operated on.
  // In the event of a memory operand, we load the exact element size.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);

  DeriveOp(ALUOp, IROp, _VFSqrt(SrcSize, ElementSize, Src));

  // NOTE: We don't need to clear the upper lanes here, since the
  //       IR ops make use of 128-bit AdvSimd for 128-bit cases,
  //       which, on hardware with SVE, zero-extends as part of
  //       storing into the destination.

  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  DeriveOp(ALUOp, IROp, _VFSqrt(ElementSize, ElementSize, Src));

  // Duplicate the lower bits
  auto Result = _VDupElement(Size, ElementSize, ALUOp, 0);
  StoreResultFPR(Op, Result);
}

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::VectorUnaryDuplicateOp(OpcodeArgs) {
  VectorUnaryDuplicateOpImpl(Op, IROp, ElementSize);
}

// TODO: there's only one instantiation of this template. Lets remove it.
template void OpDispatchBuilder::VectorUnaryDuplicateOp<IR::OP_VFRECPPRECISION, OpSize::i32Bit>(OpcodeArgs);

void OpDispatchBuilder::MOVQOp(OpcodeArgs, VectorOpType VectorType) {
  const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  // This instruction is a bit special that if the destination is a register then it'll ZEXT the 64bit source to 128bit
  if (Op->Dest.IsGPR()) {
    const auto gpr = Op->Dest.Data.GPR.GPR;
    const auto gprIndex = gpr - X86State::REG_XMM_0;

    auto Reg = VZeroExtendOperand(OpSize::i64Bit, Op->Src[0], Src);
    StoreXMMRegister_WithAVXInsert(VectorType, gprIndex, Reg);
  } else {
    // This is simple, just store the result
    StoreResultFPR(Op, Src);
  }
}

void OpDispatchBuilder::MOVQMMXOp(OpcodeArgs) {
  // Partial store into bottom 64-bits, leave the upper bits unaffected.
  if (MMXState == MMXState_X87) {
    ChgStateX87_MMX();
  }
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});
  StoreResultFPR(Op, Src, OpSize::i8Bit);
}

void OpDispatchBuilder::MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  const auto NumElements = IR::NumElements(Size, ElementSize);

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  if (Size == OpSize::i128Bit && ElementSize == OpSize::i64Bit) {
    // UnZip2 the 64-bit elements as 32-bit to get the sign bits closer.
    // Sign bits are now in bit positions 31 and 63 after this.
    Src = _VUnZip2(Size, OpSize::i32Bit, Src, Src);

    // Extract the low 64-bits to GPR in one move.
    Ref GPR = _VExtractToGPR(Size, OpSize::i64Bit, Src, 0);
    // BFI the sign bit in 31 in to 62.
    // Inserting the full lower 32-bits offset 31 so the sign bit ends up at offset 63.
    GPR = _Bfi(OpSize::i64Bit, 32, 31, GPR, GPR);
    // Shift right to only get the two sign bits we care about.
    GPR = _Lshr(OpSize::i64Bit, GPR, Constant(62));
    StoreResultGPR_WithOpSize(Op, Op->Dest, GPR, GetGPROpSize());
  } else if (Size == OpSize::i128Bit && ElementSize == OpSize::i32Bit) {
    // Shift all the sign bits to the bottom of their respective elements.
    Src = _VUShrI(Size, OpSize::i32Bit, Src, 31);
    // Load the specific 128-bit movmskps shift elements operator.
    auto ConstantUSHL = LoadAndCacheNamedVectorConstant(Size, NAMED_VECTOR_MOVMSKPS_SHIFT);
    // Shift the sign bits in to specific locations.
    Src = _VUShl(Size, OpSize::i32Bit, Src, ConstantUSHL, false);
    // Add across the vector so the sign bits will end up in bits [3:0]
    Src = _VAddV(Size, OpSize::i32Bit, Src);
    // Extract to a GPR.
    Ref GPR = _VExtractToGPR(Size, OpSize::i32Bit, Src, 0);
    StoreResultGPR_WithOpSize(Op, Op->Dest, GPR, GetGPROpSize());
  } else {
    Ref CurrentVal = Constant(0);

    for (unsigned i = 0; i < NumElements; ++i) {
      // Extract the top bit of the element
      Ref Tmp = _VExtractToGPR(Size, ElementSize, Src, i);
      Tmp = _Bfe(ElementSize, 1, IR::OpSizeAsBits(ElementSize) - 1, Tmp);

      // Shift it to the correct location and or it with the current value
      if (i != 0) {
        CurrentVal = _Orlshl(OpSize::i64Bit, CurrentVal, Tmp, i);
      } else {
        CurrentVal = Tmp;
      }
    }
    StoreResultGPR(Op, CurrentVal);
  }
}

void OpDispatchBuilder::MOVMSKOpOne(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  const auto ExtractSize = Is256Bit ? OpSize::i32Bit : OpSize::i16Bit;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref VMask = LoadAndCacheNamedVectorConstant(SrcSize, NAMED_VECTOR_MOVMASKB);

  auto VCMP = _VCMPLTZ(SrcSize, OpSize::i8Bit, Src);
  auto VAnd = _VAnd(SrcSize, OpSize::i8Bit, VCMP, VMask);

  // Since we also handle the MM MOVMSKB here too,
  // we need to clamp the lower bound.
  const auto VAdd1Size = std::max(SrcSize, OpSize::i128Bit);
  const auto VAdd2Size = std::max(SrcSize >> 1, OpSize::i64Bit);

  auto VAdd1 = _VAddP(VAdd1Size, OpSize::i8Bit, VAnd, VAnd);
  auto VAdd2 = _VAddP(VAdd2Size, OpSize::i8Bit, VAdd1, VAdd1);
  auto VAdd3 = _VAddP(OpSize::i64Bit, OpSize::i8Bit, VAdd2, VAdd2);

  auto Result = _VExtractToGPR(SrcSize, ExtractSize, VAdd3, 0);

  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::PUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto ALUOp = _VZip(Size, ElementSize, Dest, Src);
  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::VPUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result {};
  if (Is128Bit) {
    Result = _VZip(SrcSize, ElementSize, Src1, Src2);
  } else {
    Ref ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2);
    Ref ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2);

    Result = _VInsElement(SrcSize, OpSize::i128Bit, 1, 0, ZipLo, ZipHi);
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto ALUOp = _VZip2(Size, ElementSize, Dest, Src);
  StoreResultFPR(Op, ALUOp);
}

void OpDispatchBuilder::VPUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is128Bit = SrcSize == OpSize::i128Bit;
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result {};
  if (Is128Bit) {
    Result = _VZip2(SrcSize, ElementSize, Src1, Src2);
  } else {
    Ref ZipLo = _VZip(SrcSize, ElementSize, Src1, Src2);
    Ref ZipHi = _VZip2(SrcSize, ElementSize, Src1, Src2);

    Result = _VInsElement(SrcSize, OpSize::i128Bit, 0, 1, ZipHi, ZipLo);
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::GeneratePSHUFBMask(IR::OpSize SrcSize) {
  // PSHUFB doesn't 100% match VTBL behaviour
  // VTBL will set the element zero if the index is greater than
  // the number of elements in the array
  //
  // Bit 7 is the only bit that is supposed to set elements to zero with PSHUFB
  // Mask the selection bits and top bit correctly
  // Bits [6:4] is reserved for 128-bit/256-bit
  // Bits [6:3] is reserved for 64-bit
  const uint8_t MaskImm = SrcSize == OpSize::i64Bit ? 0b1000'0111 : 0b1000'1111;

  return _VectorImm(SrcSize, OpSize::i8Bit, MaskImm);
}

Ref OpDispatchBuilder::PSHUFBOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, Ref MaskVector) {
  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  // We perform the 256-bit version as two 128-bit operations due to
  // the lane splitting behavior, so cap the maximum size at 16.
  const auto SanitizedSrcSize = std::min(SrcSize, OpSize::i128Bit);

  Ref MaskedIndices = _VAnd(SrcSize, SrcSize, Src2, MaskVector);

  Ref Low = _VTBL1(SanitizedSrcSize, Src1, MaskedIndices);
  if (!Is256Bit) {
    return Low;
  }

  Ref HighSrc1 = _VInsElement(SrcSize, OpSize::i128Bit, 0, 1, Src1, Src1);
  Ref High = _VTBL1(SanitizedSrcSize, HighSrc1, MaskedIndices);
  return _VInsElement(SrcSize, OpSize::i128Bit, 1, 0, Low, High);
}

void OpDispatchBuilder::PSHUFBOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = PSHUFBOpImpl(SrcSize, Src1, Src2, GeneratePSHUFBMask(SrcSize));
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSHUFBOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = PSHUFBOpImpl(SrcSize, Src1, Src2, GeneratePSHUFBMask(SrcSize));
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PShufWLane(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane,
                                  uint8_t Shuffle) {
  constexpr auto IdentityCopy = 0b11'10'01'00;

  const bool Is128BitLane = Size == OpSize::i128Bit;
  const auto NumElements = IR::NumElements(Size, IR::OpSize::i16Bit);
  const auto HalfNumElements = NumElements >> 1;

  // TODO: There can be more optimized copies here.
  switch (Shuffle) {
  case IdentityCopy: {
    // Special case identity copy.
    return IncomingLane;
  }
  case 0b00'00'00'00:
  case 0b01'01'01'01:
  case 0b10'10'10'10:
  case 0b11'11'11'11: {
    // Special case element duplicate and broadcast to low or high 64-bits.
    Ref Dup = _VDupElement(Size, OpSize::i16Bit, IncomingLane, (LowLane ? 0 : HalfNumElements) + (Shuffle & 0b11));
    if (Is128BitLane) {
      if (LowLane) {
        // DUP goes low.
        // Source goes high.
        Dup = _VTrn2(Size, OpSize::i64Bit, Dup, IncomingLane);
      } else {
        // DUP goes high.
        // Source goes low.
        Dup = _VTrn(Size, OpSize::i64Bit, IncomingLane, Dup);
      }
    }

    return Dup;
  }
  default: {
    // PSHUFLW needs to scale index by 16.
    // PSHUFHW needs to scale index by 16.
    // PSHUFW (mmx) also needs to scale by 16 to get correct low element.
    auto LookupIndexes = LoadAndCacheIndexedNamedVectorConstant(Size, IndexConstant, Shuffle * 16);
    return _VTBL1(Size, IncomingLane, LookupIndexes);
  }
  }
}

void OpDispatchBuilder::PSHUFW8ByteOp(OpcodeArgs) {
  uint16_t Shuffle = Op->Src[1].Data.Literal.Value;
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = PShufWLane(Size, FEXCore::IR::INDEXED_NAMED_VECTOR_PSHUFLW, true, Src, Shuffle);
  StoreResultFPR(Op, Dest);
}

void OpDispatchBuilder::PSHUFWOp(OpcodeArgs, bool Low) {
  uint16_t Shuffle = Op->Src[1].Data.Literal.Value;
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  const auto IndexedVectorConstant = Low ? FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW :
                                           FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW;

  Ref Dest = PShufWLane(Size, IndexedVectorConstant, Low, Src, Shuffle);

  StoreResultFPR(Op, Dest);
}

Ref OpDispatchBuilder::Single128Bit4ByteVectorShuffle(Ref Src, uint8_t Shuffle) {
  constexpr auto IdentityCopy = 0b11'10'01'00;

  // TODO: There can be more optimized copies here.
  switch (Shuffle) {
  case IdentityCopy: {
    // Special case identity copy.
    return Src;
  }
  case 0b00'00'00'00:
  case 0b01'01'01'01:
  case 0b10'10'10'10:
  case 0b11'11'11'11: {
    // Special case element duplicate and broadcast to low or high 64-bits.
    return _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Src, Shuffle & 0b11);
  }
  case 0b00'00'10'10: {
    // Weird reverse low elements and broadcast to each half of the register
    Ref Tmp = _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b00'00'11'10: {
    // First element duplicated and shifted in to the top.
    auto Dup = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Src, 0);
    return _VExtr(OpSize::i128Bit, OpSize::i32Bit, Dup, Src, 2);
  }
  case 0b00'01'00'01: {
    ///< Weird reversed low elements and broadcast
    Ref Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Src);
    return _VZip(OpSize::i128Bit, OpSize::i64Bit, Tmp, Tmp);
  }
  case 0b00'01'01'00: {
    ///< Weird reverse low two elements in to high half
    Ref Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b00'01'10'11: {
    // Inverse elements
    Ref Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp, 2);
  }
  case 0b00'10'00'10: {
    ///< Weird reversed even elements and broadcast
    Ref Tmp = _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b00'10'10'00: {
    // Weird reversed low elements in upper half of the register
    Ref Tmp = _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b00'11'00'11: {
    ///< Weird Low plus high element reversed and broadcast
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    return _VZip2(OpSize::i128Bit, OpSize::i64Bit, Tmp, Tmp);
  }
  case 0b00'11'10'01:
    ///< Vector rotate - One element
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
  case 0b00'11'11'00: {
    // Weird reversed low and high elements in upper half of the register
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VZip2(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b01'00'00'01: {
    ///< Weird duplicate bottom two elements, then rotate in the low half
    Ref Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b01'00'01'00:
    ///< Duplicate bottom 64-bits
    return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src, 0);
  case 0b01'00'11'10:
    ///< Vector rotate - Two elements
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 8);
  case 0b01'01'00'00: {
    // Zip with self.
    // Dest[0] = Src[0]
    // Dest[1] = Src[0]
    // Dest[2] = Src[1]
    // Dest[3] = Src[1]
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
  }
  case 0b01'01'10'10: {
    ///< Weird reverse middle elements and broadcast to each half of the register
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b01'01'11'11: {
    ///< Weird reverse odd elements and broadcast to each half of the register
    Ref Tmp = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b01'10'01'10: {
    ///< Weird middle elements swizzle plus broadcast
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
    return _VZip(OpSize::i128Bit, OpSize::i64Bit, Tmp, Tmp);
  }
  case 0b01'10'10'01: {
    ///< Weird middle elements swizzle plus broadcast and reverse
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b01'11'01'11: {
    ///< Weird reversed odd elements and broadcast
    Ref Tmp = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b01'11'11'01: {
    ///< Weird odd elements swizzle plus broadcast and reverse
    Ref Tmp = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b10'00'00'10: {
    ///< Weird even elements swizzle plus broadcast and reverse
    Ref Tmp = _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b10'00'10'00:
    ///< Even elements broadcast
    return _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
  case 0b10'01'00'11:
    ///< Vector rotate - Three elements
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 12);

  case 0b10'01'01'10: {
    ///< Weird odd elements swizzle plus broadcast and reverse
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b10'01'10'01: {
    ///< Middle two elements broadcast
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    return _VZip(OpSize::i128Bit, OpSize::i64Bit, Tmp, Tmp);
  }
  case 0b10'10'00'00: {
    ///< Broadcast even elements to each half of the register
    Ref Tmp = _VUnZip(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b10'10'01'01: {
    ///< Broadcast middle elements to each half of the register
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b10'10'11'11: {
    ///< Reverse top two elements and broadcast to each half of the register
    Ref Tmp = _VZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 8);
  }
  case 0b10'11'00'01: {
    // Reverse each 64-bit lane.
    return _VRev64(OpSize::i128Bit, OpSize::i32Bit, Src);
  }
  case 0b10'11'10'11: {
    ///< Weird top two elements reverse and broadcast
    Ref Tmp = _VZip2(OpSize::i128Bit, OpSize::i64Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b10'11'11'10: {
    ///< Weird move top two elements to bottom and reverse in the top half
    Ref Tmp = _VZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b11'00'00'11: {
    ///< Weird low plus high elements swizzle plus broadcast and reverse
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VZip2(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b11'00'11'00: {
    ///< Weird low plus high element broadcast
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 4);
    Tmp = _VZip2(OpSize::i128Bit, OpSize::i64Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 4);
  }
  case 0b11'01'01'11: {
    ///< Weird odd elements swizzle plus broadcast and reverse
    Ref Tmp = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    Tmp = _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b11'01'11'01:
    ///< Odd elements broadcast
    return _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
  case 0b11'10'10'11: {
    ///< Rotate top two elements in to bottom half of the register
    Ref Tmp = _VZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 12);
  }
  case 0b11'10'11'10:
    ///< Duplicate Top 64-bits
    return _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src, 1);
  case 0b11'11'00'00: {
    ///< Weird Broadcast bottom and top element to each half of the register
    Ref Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Src, 12);
    Tmp = _VRev64(OpSize::i128Bit, OpSize::i32Bit, Tmp);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b11'11'01'01: {
    ///< Broadcast odd elements to each half of the register
    Ref Tmp = _VUnZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
    return _VZip(OpSize::i128Bit, OpSize::i32Bit, Tmp, Tmp);
  }
  case 0b11'11'10'10:
    ///< Broadcast top two elements to each half of the register
    return _VZip2(OpSize::i128Bit, OpSize::i32Bit, Src, Src);
  default: {
    // PSHUFD needs to scale index by 16.
    auto LookupIndexes =
      LoadAndCacheIndexedNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD, Shuffle * 16);
    return _VTBL1(OpSize::i128Bit, Src, LookupIndexes);
  }
  }
}

void OpDispatchBuilder::PSHUFDOp(OpcodeArgs) {
  uint16_t Shuffle = Op->Src[1].Data.Literal.Value;
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  StoreResultFPR(Op, Single128Bit4ByteVectorShuffle(Src, Shuffle));
}

void OpDispatchBuilder::VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  auto Shuffle = Op->Src[1].Literal();

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // Note/TODO: With better immediate facilities or vector loading in our IR
  //            much of this can be reduced to setting up a table index register
  //            and then using TBL
  //
  //            SVE has the INDEX instruction that works essentially like
  //            std::iota (setting a range to an initial value and progressively
  //            incrementing each successive element), so it's well suited for this.
  //            It's just a matter of exposing these facilities in a way that works
  //            well together.
  //
  //            Should be much nicer than doing repeated inserts in any case.

  const size_t BaseElement = Low ? 0 : 4;
  Ref Result = Src;
  if (Is256Bit) {
    for (size_t i = 0; i < 4; i++) {
      const auto Index = Shuffle & 0b11;
      const auto UpperLaneOffset = IR::NumElements(OpSize::i128Bit, ElementSize);

      const auto LowDstIndex = BaseElement + i;
      const auto LowSrcIndex = BaseElement + Index;

      const auto HighDstIndex = BaseElement + UpperLaneOffset + i;
      const auto HighSrcIndex = BaseElement + UpperLaneOffset + Index;

      // Take care of both lanes per iteration
      Result = _VInsElement(SrcSize, ElementSize, LowDstIndex, LowSrcIndex, Result, Src);
      Result = _VInsElement(SrcSize, ElementSize, HighDstIndex, HighSrcIndex, Result, Src);

      Shuffle >>= 2;
    }
  } else {
    for (size_t i = 0; i < 4; i++) {
      const auto Index = Shuffle & 0b11;
      Result = _VInsElement(SrcSize, ElementSize, BaseElement + i, BaseElement + Index, Result, Src);
      Shuffle >>= 2;
    }
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle) {
  // Since 256-bit variants and up don't lane cross, we can construct
  // everything in terms of the 128-variant, as each lane is essentially
  // its own 128-bit segment.
  const uint8_t NumElements = IR::NumElements(OpSize::i128Bit, ElementSize);
  const uint8_t HalfNumElements = NumElements >> 1;

  const bool Is256Bit = DstSize == OpSize::i256Bit;

  std::array<Ref, 4> Srcs {};
  for (size_t i = 0; i < HalfNumElements; ++i) {
    Srcs[i] = Src1;
  }
  for (size_t i = HalfNumElements; i < NumElements; ++i) {
    Srcs[i] = Src2;
  }

  Ref Dest = Src1;
  const uint8_t SelectionMask = NumElements - 1;
  const uint8_t ShiftAmount = std::popcount(SelectionMask);

  if (Is256Bit) {
    for (uint8_t Element = 0; Element < NumElements; ++Element) {
      const auto SrcIndex1 = Shuffle & SelectionMask;

      // AVX differs the behavior of VSHUFPD and VSHUFPS.
      // The same immediate bits are used for both lanes with VSHUFPS,
      // but VSHUFPD uses different immediate bits for each lane.
      const auto SrcIndex2 = ElementSize == OpSize::i32Bit ? SrcIndex1 : ((Shuffle >> 2) & SelectionMask);

      Ref Insert = _VInsElement(DstSize, ElementSize, Element, SrcIndex1, Dest, Srcs[Element]);
      Dest = _VInsElement(DstSize, ElementSize, Element + NumElements, SrcIndex2 + NumElements, Insert, Srcs[Element]);

      Shuffle >>= ShiftAmount;
    }
  } else {
    if (ElementSize == OpSize::i32Bit) {
      // We can shuffle optimally in a lot of cases.
      // TODO: We can optimize more of these cases.
      switch (Shuffle) {
      case 0b01'00'01'00:
        // Combining of low 64-bits.
        // Dest[63:0]   = Src1[63:0]
        // Dest[127:64] = Src2[63:0]
        return _VZip(DstSize, OpSize::i64Bit, Src1, Src2);
      case 0b11'10'11'10:
        // Combining of high 64-bits.
        // Dest[63:0]   = Src1[127:64]
        // Dest[127:64] = Src2[127:64]
        return _VZip2(DstSize, OpSize::i64Bit, Src1, Src2);
      case 0b11'10'01'00:
        // Mixing Low and high elements
        // Dest[63:0]   = Src1[63:0]
        // Dest[127:64] = Src2[127:64]
        return _VInsElement(DstSize, OpSize::i64Bit, 1, 1, Src1, Src2);
      case 0b01'00'11'10:
        // Mixing Low and high elements, inverse of above
        // Dest[63:0]   = Src1[127:64]
        // Dest[127:64] = Src2[63:0]
        return _VExtr(DstSize, OpSize::i8Bit, Src2, Src1, 8);
      case 0b10'00'10'00:
        // Mixing even elements.
        // Dest[31:0]   = Src1[31:0]
        // Dest[63:32]  = Src1[95:64]
        // Dest[95:64]  = Src2[31:0]
        // Dest[127:96] = Src2[95:64]
        return _VUnZip(DstSize, ElementSize, Src1, Src2);
      case 0b11'01'11'01:
        // Mixing odd elements.
        // Dest[31:0]   = Src1[63:32]
        // Dest[63:32]  = Src1[127:96]
        // Dest[95:64]  = Src2[63:32]
        // Dest[127:96] = Src2[127:96]
        return _VUnZip2(DstSize, ElementSize, Src1, Src2);
      case 0b11'10'00'00:
      case 0b11'10'01'01:
      case 0b11'10'10'10:
      case 0b11'10'11'11: {
        // Bottom elements duplicated, Top 64-bits inserted
        auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1, Shuffle & 0b11);
        return _VZip2(DstSize, OpSize::i64Bit, DupSrc1, Src2);
      }
      case 0b01'00'00'00:
      case 0b01'00'01'01:
      case 0b01'00'10'10:
      case 0b01'00'11'11: {
        // Bottom elements duplicated, Bottom 64-bits inserted
        auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1, Shuffle & 0b11);
        return _VZip(DstSize, OpSize::i64Bit, DupSrc1, Src2);
      }
      case 0b00'00'01'00:
      case 0b01'01'01'00:
      case 0b10'10'01'00:
      case 0b11'11'01'00: {
        // Top elements duplicated, Bottom 64-bits inserted
        auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2, (Shuffle >> 4) & 0b11);
        return _VZip(DstSize, OpSize::i64Bit, Src1, DupSrc2);
      }
      case 0b00'00'11'10:
      case 0b01'01'11'10:
      case 0b10'10'11'10:
      case 0b11'11'11'10: {
        // Top elements duplicated, Top 64-bits inserted
        auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2, (Shuffle >> 4) & 0b11);
        return _VZip2(DstSize, OpSize::i64Bit, Src1, DupSrc2);
      }
      case 0b01'00'01'11: {
        // TODO: This doesn't generate optimal code.
        // RA doesn't understand that Src1 is dead after VInsElement due to SRA class differences.
        // With RA fixes this would be 2 instructions.
        // Odd elements inverted, Low 64-bits inserted
        Src1 = _VInsElement(DstSize, OpSize::i32Bit, 0, 3, Src1, Src1);
        return _VZip(DstSize, OpSize::i64Bit, Src1, Src2);
      }
      case 0b11'10'01'11: {
        // TODO: This doesn't generate optimal code.
        // RA doesn't understand that Src1 is dead after VInsElement due to SRA class differences.
        // With RA fixes this would be 2 instructions.
        // Odd elements inverted, Top 64-bits inserted
        Src1 = _VInsElement(DstSize, OpSize::i32Bit, 0, 3, Src1, Src1);
        return _VInsElement(DstSize, OpSize::i64Bit, 1, 1, Src1, Src2);
      }
      case 0b01'00'00'01: {
        // Lower 32-bit elements inverted, low 64-bits inserted
        Src1 = _VRev64(DstSize, OpSize::i32Bit, Src1);
        return _VZip(DstSize, OpSize::i64Bit, Src1, Src2);
      }
      case 0b11'10'00'01: {
        // TODO: This doesn't generate optimal code.
        // RA doesn't understand that Src1 is dead after VInsElement due to SRA class differences.
        // With RA fixes this would be 2 instructions.
        // Lower 32-bit elements inverted, Top 64-bits inserted
        Src1 = _VRev64(DstSize, OpSize::i32Bit, Src1);
        return _VInsElement(DstSize, OpSize::i64Bit, 1, 1, Src1, Src2);
      }
      case 0b00'00'00'00:
      case 0b00'00'01'01:
      case 0b00'00'10'10:
      case 0b00'00'11'11:
      case 0b01'01'00'00:
      case 0b01'01'01'01:
      case 0b01'01'10'10:
      case 0b01'01'11'11:
      case 0b10'10'00'00:
      case 0b10'10'01'01:
      case 0b10'10'10'10:
      case 0b10'10'11'11:
      case 0b11'11'00'00:
      case 0b11'11'01'01:
      case 0b11'11'10'10:
      case 0b11'11'11'11: {
        // Duplicate element in upper and lower across each 64-bit segment.
        auto DupSrc1 = _VDupElement(DstSize, ElementSize, Src1, Shuffle & 0b11);
        auto DupSrc2 = _VDupElement(DstSize, ElementSize, Src2, (Shuffle >> 4) & 0b11);
        return _VZip(DstSize, OpSize::i64Bit, DupSrc1, DupSrc2);
      }
      default:
        // Use a TBL2 operation to handle this implementation.
        auto LookupIndexes =
          LoadAndCacheIndexedNamedVectorConstant(DstSize, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS, Shuffle * 16);
        return _VTBL2(DstSize, Src1, Src2, LookupIndexes);
      }
    } else {
      switch (Shuffle & 0b11) {
      case 0b00:
        // Low 64-bits of each source interleaved.
        return _VZip(DstSize, ElementSize, Src1, Src2);
      case 0b01:
        // Upper 64-bits of Src1 in lower bits
        // Lower 64-bits of Src2 in upper bits.
        return _VExtr(DstSize, OpSize::i8Bit, Src2, Src1, 8);
      case 0b10:
        // Lower 32-bits of Src1 in lower bits.
        // Upper 64-bits of Src2 in upper bits.
        return _VInsElement(DstSize, ElementSize, 1, 1, Src1, Src2);
      case 0b11:
        // Upper 64-bits of each source interleaved.
        return _VZip2(DstSize, ElementSize, Src1, Src2);
      }
    }

    for (uint8_t Element = 0; Element < NumElements; ++Element) {
      const auto SrcIndex = Shuffle & SelectionMask;
      Dest = _VInsElement(DstSize, ElementSize, Element, SrcIndex, Dest, Srcs[Element]);
      Shuffle >>= ShiftAmount;
    }
  }

  return Dest;
}

void OpDispatchBuilder::SHUFOp(OpcodeArgs, IR::OpSize ElementSize) {
  Ref Src1Node = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2Node = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  uint8_t Shuffle = Op->Src[1].Literal();

  Ref Result = SHUFOpImpl(Op, OpSizeFromDst(Op), ElementSize, Src1Node, Src2Node, Shuffle);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VSHUFOp(OpcodeArgs, IR::OpSize ElementSize) {
  Ref Src1Node = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2Node = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  uint8_t Shuffle = Op->Src[2].Literal();

  Ref Result = SHUFOpImpl(Op, OpSizeFromDst(Op), ElementSize, Src1Node, Src2Node, Shuffle);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VANDNOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Dest = _VAndn(SrcSize, SrcSize, Src2, Src1);

  StoreResultFPR(Op, Dest);
}

template<IROps IROp, IR::OpSize ElementSize>
void OpDispatchBuilder::VHADDPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  DeriveOp(Res, IROp, _VFAddP(SrcSize, ElementSize, Src1, Src2));

  Ref Dest = Res;
  if (Is256Bit) {
    Dest = _VInsElement(SrcSize, OpSize::i64Bit, 1, 2, Res, Res);
    Dest = _VInsElement(SrcSize, OpSize::i64Bit, 2, 1, Dest, Res);
  }

  StoreResultFPR(Op, Dest);
}

template void OpDispatchBuilder::VHADDPOp<IR::OP_VADDP, OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::VHADDPOp<IR::OP_VADDP, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VHADDPOp<IR::OP_VFADDP, OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VHADDPOp<IR::OP_VFADDP, OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::VBROADCASTOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  Ref Result {};

  if (Op->Src[0].IsGPR()) {
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Result = _VDupElement(DstSize, ElementSize, Src, 0);
  } else {
    // Get the address to broadcast from into a GPR.
    Ref Address = MakeSegmentAddress(Op, Op->Src[0], GetGPROpSize());
    Result = _VBroadcastFromMem(DstSize, ElementSize, Address);
  }

  // No need to zero-extend result, since implementations
  // use zero extending AdvSIMD or zeroing SVE loads internally.

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PINSROpImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                                   const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm) {
  const auto Size = OpSizeFromDst(Op);
  const auto NumElements = IR::NumElements(Size, ElementSize);
  const uint64_t Index = Imm.Literal() & (NumElements - 1);
  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, Size, Op->Flags);

  if (Src2Op.IsGPR()) {
    // If the source is a GPR then convert directly from the GPR.
    auto Src2 = LoadSourceGPR_WithOpSize(Op, Src2Op, GetGPROpSize(), Op->Flags);
    return _VInsGPR(Size, ElementSize, Index, Src1, Src2);
  }

  // If loading from memory then we only load the element size
  Ref Src2 = MakeSegmentAddress(Op, Src2Op);
  return _VLoadVectorElement(Size, ElementSize, Src1, Index, Src2);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::PINSROp(OpcodeArgs) {
  Ref Result = PINSROpImpl(Op, ElementSize, Op->Dest, Op->Src[0], Op->Src[1]);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::PINSROp<OpSize::i8Bit>(OpcodeArgs);
template void OpDispatchBuilder::PINSROp<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::PINSROp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::PINSROp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::VPINSRBOp(OpcodeArgs) {
  Ref Result = PINSROpImpl(Op, OpSize::i8Bit, Op->Src[0], Op->Src[1], Op->Src[2]);
  if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPINSRDQOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Result = PINSROpImpl(Op, SrcSize, Op->Src[0], Op->Src[1], Op->Src[2]);
  if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPINSRWOp(OpcodeArgs) {
  Ref Result = PINSROpImpl(Op, OpSize::i16Bit, Op->Src[0], Op->Src[1], Op->Src[2]);
  if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                                      const X86Tables::DecodedOperand& Imm) {
  const uint8_t ImmValue = Imm.Literal();
  uint8_t CountS = (ImmValue >> 6);
  uint8_t CountD = (ImmValue >> 4) & 0b11;
  const uint8_t ZMask = ImmValue & 0xF;

  const auto DstSize = OpSizeFromDst(Op);

  Ref Dest {};
  if (ZMask != 0xF) {
    // Only need to load destination if it isn't a full zero
    Dest = LoadSourceFPR_WithOpSize(Op, Src1, DstSize, Op->Flags);
  }

  if ((ZMask & (1 << CountD)) == 0) {
    // In the case that ZMask overwrites the destination element, then don't even insert
    Ref Src {};
    if (Src2.IsGPR()) {
      Src = LoadSourceFPR(Op, Src2, Op->Flags);
    } else {
      // If loading from memory then CountS is forced to zero
      CountS = 0;
      Src = LoadSourceFPR_WithOpSize(Op, Src2, OpSize::i32Bit, Op->Flags);
    }

    Dest = _VInsElement(DstSize, OpSize::i32Bit, CountD, CountS, Dest, Src);
  }

  // ZMask happens after insert
  if (ZMask == 0xF) {
    return LoadZeroVector(DstSize);
  }

  if (ZMask) {
    auto Zero = LoadZeroVector(DstSize);
    for (size_t i = 0; i < 4; ++i) {
      if ((ZMask & (1 << i)) != 0) {
        Dest = _VInsElement(DstSize, OpSize::i32Bit, i, 0, Dest, Zero);
      }
    }
  }

  return Dest;
}

void OpDispatchBuilder::InsertPSOp(OpcodeArgs) {
  Ref Result = InsertPSOpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1]);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VINSERTPSOp(OpcodeArgs) {
  Ref Result = InsertPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PExtrOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  uint64_t Index = Op->Src[1].Literal();

  // Fixup of 32-bit element size.
  // When the element size is 32-bit then it can be overriden as 64-bit because the encoding of PEXTRD/PEXTRQ
  // is the same except that REX.W or VEX.W is set to 1. Incredibly frustrating.
  // Use the destination size as the element size in this case.
  auto OverridenElementSize = ElementSize;
  if (ElementSize == OpSize::i32Bit) {
    OverridenElementSize = DstSize;
  }

  // AVX version only operates on 128-bit.
  const uint8_t NumElements = IR::NumElements(std::min(OpSizeFromSrc(Op), OpSize::i128Bit), OverridenElementSize);
  Index &= NumElements - 1;

  if (Op->Dest.IsGPR()) {
    const auto GPRSize = GetGPROpSize();
    // Extract already zero extends the result.
    Ref Result = _VExtractToGPR(OpSize::i128Bit, OverridenElementSize, Src, Index);
    StoreResultGPR_WithOpSize(Op, Op->Dest, Result, GPRSize);
    return;
  }

  // If we are storing to memory then we store the size of the element extracted
  Ref Dest = MakeSegmentAddress(Op, Op->Dest);
  _VStoreVectorElement(OpSize::i128Bit, OverridenElementSize, Src, Index, Dest);
}

void OpDispatchBuilder::VEXTRACT128Op(OpcodeArgs) {
  const auto DstIsXMM = Op->Dest.IsGPR();
  const auto StoreSize = DstIsXMM ? OpSize::i256Bit : OpSize::i128Bit;
  const auto Selector = Op->Src[1].Literal() & 0b1;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // A selector of zero is the same as doing a 128-bit vector move.
  if (Selector == 0) {
    Ref Result = DstIsXMM ? _VMov(OpSize::i128Bit, Src) : Src;
    StoreResultFPR_WithOpSize(Op, Op->Dest, Result, StoreSize);
    return;
  }

  // Otherwise replicate the element and only store the first 128-bits.
  Ref Result = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, Src, Selector);
  if (DstIsXMM) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, StoreSize);
}

Ref OpDispatchBuilder::PSIGNImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Control = _VSQSHL(Size, ElementSize, Src2, IR::OpSizeAsBits(ElementSize) - 1);
  Control = _VSRSHR(Size, ElementSize, Control, IR::OpSizeAsBits(ElementSize) - 1);
  return _VMul(Size, ElementSize, Src1, Control);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::PSIGN(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Res = PSIGNImpl(Op, ElementSize, Dest, Src);

  StoreResultFPR(Op, Res);
}

template void OpDispatchBuilder::PSIGN<OpSize::i8Bit>(OpcodeArgs);
template void OpDispatchBuilder::PSIGN<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::PSIGN<OpSize::i32Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VPSIGN(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Res = PSIGNImpl(Op, ElementSize, Src1, Src2);

  StoreResultFPR(Op, Res);
}

template void OpDispatchBuilder::VPSIGN<OpSize::i8Bit>(OpcodeArgs);
template void OpDispatchBuilder::VPSIGN<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::VPSIGN<OpSize::i32Bit>(OpcodeArgs);

Ref OpDispatchBuilder::PSRLDOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) {
  const auto Size = OpSizeFromSrc(Op);

  // Incoming element size for the shift source is always 8
  return _VUShrSWide(Size, ElementSize, Src, ShiftVec);
}

void OpDispatchBuilder::PSRLDOp(OpcodeArgs, IR::OpSize ElementSize) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PSRLDOpImpl(Op, ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSRLDOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Shift = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PSRLDOpImpl(Op, ElementSize, Src, Shift);

  if (Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PSRLI(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t ShiftConstant = Op->Src[1].Literal();
  if (ShiftConstant == 0) [[unlikely]] {
    // Nothing to do, value is already in Dest.
    return;
  }

  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Shift = _VUShrI(Size, ElementSize, Dest, ShiftConstant);
  StoreResultFPR(Op, Shift);
}

void OpDispatchBuilder::VPSRLIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;
  const uint64_t ShiftConstant = Op->Src[1].Literal();

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = Src;

  if (ShiftConstant != 0) [[likely]] {
    Result = _VUShrI(Size, ElementSize, Src, ShiftConstant);
  } else {
    if (Is128Bit) {
      Result = _VMov(OpSize::i128Bit, Result);
    }
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PSLLIImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, uint64_t Shift) {
  if (Shift == 0) [[unlikely]] {
    // If zero-shift then just return the source.
    return Src;
  }
  const auto Size = OpSizeFromSrc(Op);
  return _VShlI(Size, ElementSize, Src, Shift);
}

void OpDispatchBuilder::PSLLI(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t ShiftConstant = Op->Src[1].Literal();
  if (ShiftConstant == 0) [[unlikely]] {
    // Nothing to do, value is already in Dest.
    return;
  }

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Result = PSLLIImpl(Op, ElementSize, Dest, ShiftConstant);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSLLIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t ShiftConstant = Op->Src[1].Literal();
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PSLLIImpl(Op, ElementSize, Src, ShiftConstant);
  if (ShiftConstant == 0 && Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PSLLImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) {
  const auto Size = OpSizeFromDst(Op);

  // Incoming element size for the shift source is always 8
  return _VUShlSWide(Size, ElementSize, Src, ShiftVec);
}

void OpDispatchBuilder::PSLL(OpcodeArgs, IR::OpSize ElementSize) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PSLLImpl(Op, ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSLLOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], OpSize::i128Bit, Op->Flags);
  Ref Result = PSLLImpl(Op, ElementSize, Src1, Src2);

  if (Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PSRAOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec) {
  const auto Size = OpSizeFromDst(Op);

  // Incoming element size for the shift source is always 8
  return _VSShrSWide(Size, ElementSize, Src, ShiftVec);
}

void OpDispatchBuilder::PSRAOp(OpcodeArgs, IR::OpSize ElementSize) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PSRAOpImpl(Op, ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSRAOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PSRAOpImpl(Op, ElementSize, Src1, Src2);

  if (Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PSRLDQ(OpcodeArgs) {
  const uint64_t Shift = Op->Src[1].Literal();
  if (Shift == 0) [[unlikely]] {
    // Nothing to do, value is already in Dest.
    return;
  }

  const auto Size = OpSizeFromDst(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Result = LoadZeroVector(Size);

  if (Shift < IR::OpSizeToSize(Size)) {
    Result = _VExtr(Size, OpSize::i8Bit, Result, Dest, Shift);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSRLDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const uint64_t Shift = Op->Src[1].Literal();

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result {};
  if (Shift == 0) [[unlikely]] {
    if (Is128Bit) {
      Result = _VMov(OpSize::i128Bit, Src);
    } else {
      Result = Src;
    }
  } else {
    Result = LoadZeroVector(DstSize);

    if (Is128Bit) {
      if (Shift < IR::OpSizeToSize(DstSize)) {
        Result = _VExtr(DstSize, OpSize::i8Bit, Result, Src, Shift);
      }
    } else {
      if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) {
        Ref ResultBottom = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Result, Src, Shift);
        Ref ResultTop = _VExtr(DstSize, OpSize::i8Bit, Result, Src, 16 + Shift);

        Result = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ResultBottom, ResultTop);
      }
    }
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PSLLDQ(OpcodeArgs) {
  const uint64_t Shift = Op->Src[1].Literal();
  if (Shift == 0) [[unlikely]] {
    // Nothing to do, value is already in Dest.
    return;
  }

  const auto Size = OpSizeFromDst(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Result = LoadZeroVector(Size);
  if (Shift < IR::OpSizeToSize(Size)) {
    Result = _VExtr(Size, OpSize::i8Bit, Dest, Result, IR::OpSizeToSize(Size) - Shift);
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSLLDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto DstSizeInt = IR::OpSizeToSize(DstSize);
  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const uint64_t Shift = Op->Src[1].Literal();

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = Src;

  if (Shift == 0) {
    if (Is128Bit) {
      Result = _VMov(OpSize::i128Bit, Result);
    }
  } else {
    Result = LoadZeroVector(DstSize);
    if (Is128Bit) {
      if (Shift < DstSizeInt) {
        Result = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSizeInt - Shift);
      }
    } else {
      if (Shift < Core::CPUState::XMM_SSE_REG_SIZE) {
        Ref ResultBottom = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src, Result, 16 - Shift);
        Ref ResultTop = _VExtr(DstSize, OpSize::i8Bit, Src, Result, DstSizeInt - Shift);

        Result = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ResultBottom, ResultTop);
      }
    }
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PSRAIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t Shift = Op->Src[1].Literal();
  if (Shift == 0) [[unlikely]] {
    // Nothing to do, value is already in Dest.
    return;
  }

  const auto Size = OpSizeFromDst(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Result = _VSShrI(Size, ElementSize, Dest, Shift);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSRAIOp(OpcodeArgs, IR::OpSize ElementSize) {
  const uint64_t Shift = Op->Src[1].Literal();
  const auto Size = OpSizeFromDst(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = Src;

  if (Shift != 0) [[likely]] {
    Result = _VSShrI(Size, ElementSize, Src, Shift);
  } else {
    if (Is128Bit) {
      Result = _VMov(OpSize::i128Bit, Result);
    }
  }

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AVXVariableShiftImpl(OpcodeArgs, IROps IROp) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Vector = LoadSourceFPR_WithOpSize(Op, Op->Src[0], DstSize, Op->Flags);
  Ref ShiftVector = LoadSourceFPR_WithOpSize(Op, Op->Src[1], DstSize, Op->Flags);

  DeriveOp(Shift, IROp, _VUShr(DstSize, SrcSize, Vector, ShiftVector, true));

  StoreResultFPR(Op, Shift);
}

void OpDispatchBuilder::VPSLLVOp(OpcodeArgs) {
  AVXVariableShiftImpl(Op, IROps::OP_VUSHL);
}

void OpDispatchBuilder::VPSRAVDOp(OpcodeArgs) {
  AVXVariableShiftImpl(Op, IROps::OP_VSSHR);
}

void OpDispatchBuilder::VPSRLVOp(OpcodeArgs) {
  AVXVariableShiftImpl(Op, IROps::OP_VUSHR);
}

void OpDispatchBuilder::MOVDDUPOp(OpcodeArgs) {
  // If loading a vector, use the full size, so we don't
  // unnecessarily zero extend the vector. Otherwise, if
  // memory, then we want to load the element size exactly.
  const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  Ref Res = _VDupElement(OpSize::i128Bit, OpSizeFromSrc(Op), Src, 0);

  StoreResultFPR(Op, Res);
}

void OpDispatchBuilder::VMOVDDUPOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto IsSrcGPR = Op->Src[0].IsGPR();
  const auto Is256Bit = SrcSize == OpSize::i256Bit;
  const auto MemSize = Is256Bit ? OpSize::i256Bit : OpSize::i64Bit;

  const auto LoadSize = IsSrcGPR ? SrcSize : MemSize;
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);

  Ref Res {};
  if (Is256Bit) {
    Res = _VTrn(SrcSize, OpSize::i64Bit, Src, Src);
  } else {
    Res = _VDupElement(SrcSize, OpSize::i64Bit, Src, 0);
  }

  StoreResultFPR(Op, Res);
}

Ref OpDispatchBuilder::CVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op,
                                         const X86Tables::DecodedOperand& Src2Op) {
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, OpSize::i128Bit, Op->Flags);
  Ref Converted {};
  if (Src2Op.IsGPR()) {
    // If the source is a GPR then convert directly from the GPR.
    auto Src2 = LoadSourceGPR_WithOpSize(Op, Src2Op, GetGPROpSize(), Op->Flags);
    Converted = _Float_FromGPR_S(DstElementSize, SrcSize, Src2);
  } else if (SrcSize != DstElementSize) {
    // If the source is from memory but the Source size and destination size aren't the same,
    // then it is more optimal to load in to a GPR and convert between GPR->FPR.
    // ARM GPR->FPR conversion supports different size source and destinations while FPR->FPR doesn't.
    auto Src2 = LoadSourceGPR(Op, Src2Op, Op->Flags);
    Converted = _Float_FromGPR_S(DstElementSize, SrcSize, Src2);
  } else {
    // In the case of cvtsi2s{s,d} where the source and destination are the same size,
    // then it is more optimal to load in to the FPR register directly and convert there.
    auto Src2 = LoadSourceFPR(Op, Src2Op, Op->Flags);
    Converted = _Vector_SToF(SrcSize, SrcSize, Src2);
  }

  return _VInsElement(OpSize::i128Bit, DstElementSize, 0, 0, Src1, Converted);
}

template<IR::OpSize DstElementSize>
void OpDispatchBuilder::CVTGPR_To_FPR(OpcodeArgs) {
  Ref Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Dest, Op->Src[0]);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::CVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::CVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize DstElementSize>
void OpDispatchBuilder::AVXCVTGPR_To_FPR(OpcodeArgs) {
  Ref Result = CVTGPR_To_FPRImpl(Op, DstElementSize, Op->Src[0], Op->Src[1]);
  StoreResultFPR(Op, Result);
}
template void OpDispatchBuilder::AVXCVTGPR_To_FPR<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXCVTGPR_To_FPR<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::CVTFPR_To_GPRImpl(OpcodeArgs, Ref Src, IR::OpSize SrcElementSize, bool HostRoundingMode) {
  // GPR size is determined by REX.W
  // Source Element size is determined by instruction
  const auto GPRSize = OpSizeFromDst(Op);

  if (CTX->HostFeatures.SupportsFRINTTS) {
    // When we have FRINTTS, this is a two-step process. First, we round to the
    // right integer (where _Vector_FToISized matches x86 semantics), then just
    // convert that to a GPR.
    Src = _Vector_FToISized(SrcElementSize, SrcElementSize, Src, HostRoundingMode, GPRSize);
    return _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src);
  } else {
    // When we lack hardware support, we need a bit of a convoluted sequence of
    // fixups before before and after conversion to emulate x86 semantics.
    if (HostRoundingMode) {
      Src = _Vector_FToI(SrcElementSize, SrcElementSize, Src, RoundMode::Host);
    }

    Ref Converted = _Float_ToGPR_ZS(GPRSize, SrcElementSize, Src);

    bool Dst32 = GPRSize == OpSize::i32Bit;
    Ref MaxI = Dst32 ? Constant(0x80000000) : Constant(0x8000000000000000);
    Ref MaxF = LoadAndCacheNamedVectorConstant(SrcElementSize, (SrcElementSize == OpSize::i32Bit) ?
                                                                 (Dst32 ? NAMED_VECTOR_CVTMAX_F32_I32 : NAMED_VECTOR_CVTMAX_F32_I64) :
                                                                 (Dst32 ? NAMED_VECTOR_CVTMAX_F64_I32 : NAMED_VECTOR_CVTMAX_F64_I64));
    return _Select(GPRSize, SrcElementSize, CondClass::FGT, MaxF, Src, Converted, MaxI);
  }
}

template<IR::OpSize SrcElementSize, bool HostRoundingMode>
void OpDispatchBuilder::CVTFPR_To_GPR(OpcodeArgs) {
  // If loading a vector, use the full size, so we don't
  // unnecessarily zero extend the vector. Otherwise, if
  // memory, then we want to load the element size exactly.
  const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : SrcElementSize;
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  Ref Result = CVTFPR_To_GPRImpl(Op, Src, SrcElementSize, HostRoundingMode);
  StoreResultGPR(Op, Result);
}

template void OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, false>(OpcodeArgs);

template void OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, false>(OpcodeArgs);

Ref OpDispatchBuilder::Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen) {
  const auto Size = OpSizeFromDst(Op);

  Ref Src = [&] {
    if (Widen) {
      // If loading a vector, use the full size, so we don't
      // unnecessarily zero extend the vector. Otherwise, if
      // memory, then we want to load the element size exactly.
      const auto LoadSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(8 * (IR::OpSizeToSize(Size) / 16));
      return LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);
    } else {
      return LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }
  }();

  auto ElementSize = SrcElementSize;
  if (Widen) {
    Src = _VSXTL(Size, ElementSize, Src);
    ElementSize = ElementSize << 1;
  }

  return _Vector_SToF(Size, ElementSize, Src);
}

template<IR::OpSize SrcElementSize, bool Widen>
void OpDispatchBuilder::Vector_CVT_Int_To_Float(OpcodeArgs) {
  Ref Result = Vector_CVT_Int_To_FloatImpl(Op, SrcElementSize, Widen);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>(OpcodeArgs);

Ref OpDispatchBuilder::Vector_CVT_Float_To_Int32Impl(OpcodeArgs, IR::OpSize DstSize, Ref Src, IR::OpSize SrcSize, IR::OpSize SrcElementSize,
                                                     bool HostRoundingMode, bool ZeroUpperHalf) {
  if (CTX->HostFeatures.SupportsFRINTTS && SrcSize != OpSize::i256Bit) {
    // If we have FRINTS, this is the usual 2-step
    Src = _Vector_FToISized(SrcSize, SrcElementSize, Src, HostRoundingMode, OpSize::i32Bit);
    Ref Dst = _Vector_FToZS(SrcSize, SrcElementSize, Src);
    if (SrcElementSize == OpSize::i32Bit) {
      // Return 32-bit result as-is
      return Dst;
    } else {
      // Down step from 64-bit ints to 32-bit ints
      return _VUShrNI(DstSize, SrcElementSize, Dst, 0);
    }
  } else {
    // Otherwise, we have to do all the fixups, but vectorized.
    if (HostRoundingMode) {
      Src = _Vector_FToI(SrcSize, SrcElementSize, Src, RoundMode::Host);
    }

    OpSize OverflowConstSize = ZeroUpperHalf && SrcElementSize == OpSize::i64Bit ? DstSize / 2 : DstSize;
    Ref MaxI = LoadAndCacheNamedVectorConstant(OverflowConstSize, NAMED_VECTOR_CVTMAX_I32);
    Ref Converted {}, Cmp {};
    if (SrcElementSize == OpSize::i64Bit) {
      Ref MaxF = LoadAndCacheNamedVectorConstant(SrcSize, NAMED_VECTOR_CVTMAX_F64_I32);
      Converted = _Vector_F64ToI32(DstSize, Src, RoundMode::TowardsZero, ZeroUpperHalf);

      Cmp = _VFCMPGT(SrcSize, OpSize::i64Bit, MaxF, Src);
      Cmp = _VUShrNI(DstSize, OpSize::i64Bit, Cmp, 32);
    } else {
      Ref MaxF = LoadAndCacheNamedVectorConstant(DstSize, NAMED_VECTOR_CVTMAX_F32_I32);
      Converted = _Vector_FToZS(DstSize, OpSize::i32Bit, Src);
      Cmp = _VFCMPGT(DstSize, OpSize::i32Bit, MaxF, Src);
    }
    return _VBSL(DstSize, Cmp, Converted, MaxI);
  }
}

template<IR::OpSize SrcElementSize, bool HostRoundingMode>
void OpDispatchBuilder::Vector_CVT_Float_To_Int(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = Vector_CVT_Float_To_Int32Impl(Op, DstSize, Src, OpSizeFromSrc(Op), SrcElementSize, HostRoundingMode, true);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>(OpcodeArgs);

template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>(OpcodeArgs);

Ref OpDispatchBuilder::Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
                                                     const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op) {
  // In the case of vectors, we can just specify the full vector length,
  // so that we don't unnecessarily zero-extend the entire vector.
  // Otherwise, if it's a memory load, then we only want to load its exact size.
  const auto Src2Size = Src2Op.IsGPR() ? OpSize::i128Bit : SrcElementSize;

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Src1Op, OpSize::i128Bit, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Src2Op, Src2Size, Op->Flags);

  Ref Converted = _Float_FToF(DstElementSize, SrcElementSize, Src2);

  return _VInsElement(OpSize::i128Bit, DstElementSize, 0, 0, Src1, Converted);
}

template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
void OpDispatchBuilder::Scalar_CVT_Float_To_Float(OpcodeArgs) {
  Ref Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Dest, Op->Src[0]);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::Scalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::Scalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>(OpcodeArgs);

template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float(OpcodeArgs) {
  Ref Result = Scalar_CVT_Float_To_FloatImpl(Op, DstElementSize, SrcElementSize, Op->Src[0], Op->Src[1]);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>(OpcodeArgs);

void OpDispatchBuilder::Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, bool IsAVX) {
  const auto SrcSize = OpSizeFromSrc(Op);

  const auto IsFloatSrc = SrcElementSize == OpSize::i32Bit;
  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  const auto LoadSize = IsFloatSrc && !Op->Src[0].IsGPR() ? (SrcSize >> 1) : SrcSize;

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);

  Ref Result {};
  if (DstElementSize > SrcElementSize) {
    Result = _Vector_FToF(SrcSize, SrcElementSize << 1, Src, SrcElementSize);
  } else {
    Result = _Vector_FToF(SrcSize, SrcElementSize >> 1, Src, SrcElementSize);
  }

  if (IsAVX) {
    if (!IsFloatSrc && !Is128Bit) {
      // VCVTPD2PS path
      Result = _VMov(OpSize::i128Bit, Result);
    } else if (IsFloatSrc && Is128Bit) {
      // VCVTPS2PD path
      Result = _VMov(OpSize::i128Bit, Result);
    }
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // Always 32-bit.
  auto ElementSize = OpSize::i32Bit;
  const auto DstSize = OpSizeFromDst(Op);

  Src = _VSXTL(DstSize, ElementSize, Src);
  ElementSize = ElementSize << 1;

  // Always signed
  Src = _Vector_SToF(DstSize, ElementSize, Src);

  StoreResultFPR(Op, Src);
}

template<IR::OpSize SrcElementSize, bool HostRoundingMode>
void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs) {
  // This function causes a change in MMX state from X87 to MMX
  if (MMXState == MMXState_X87) {
    ChgStateX87_MMX();
  }

  // If loading a vector, use the full size, so we don't
  // unnecessarily zero extend the vector. Otherwise, if
  // memory, then we want to load the element size exactly.
  const auto SrcSize = Op->Src[0].IsGPR() ? OpSize::i128Bit : OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  Ref Result = Vector_CVT_Float_To_Int32Impl(Op, DstSize, Src, SrcSize, SrcElementSize, HostRoundingMode, false /* TODO? */);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::XMM_To_MMX_Vector_CVT_Float_To_Int<OpSize::i64Bit, true>(OpcodeArgs);

void OpDispatchBuilder::MASKMOVOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref MaskSrc = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  // Mask only cares about the top bit of each byte
  MaskSrc = _VCMPLTZ(Size, OpSize::i8Bit, MaskSrc);

  // Vector that will overwrite byte elements.
  Ref VectorSrc = LoadSourceGPR(Op, Op->Dest, Op->Flags);

  // RDI source (DS prefix by default)
  auto MemDest = MakeSegmentAddress(X86State::REG_RDI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);

  Ref XMMReg = _LoadMemFPR(Size, MemDest, OpSize::i8Bit);

  // If the Mask element high bit is set then overwrite the element with the source, else keep the memory variant
  XMMReg = _VBSL(Size, MaskSrc, VectorSrc, XMMReg);
  _StoreMemFPR(Size, MemDest, XMMReg, OpSize::i8Bit);
}

void OpDispatchBuilder::VMASKMOVOpImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DataSize, bool IsStore,
                                       const X86Tables::DecodedOperand& MaskOp, const X86Tables::DecodedOperand& DataOp) {

  const auto MakeAddress = [this, Op](const X86Tables::DecodedOperand& Data) {
    return MakeSegmentAddress(Op, Data, GetGPROpSize());
  };

  Ref Mask = LoadSourceFPR_WithOpSize(Op, MaskOp, DataSize, Op->Flags);

  if (IsStore) {
    Ref Data = LoadSourceFPR_WithOpSize(Op, DataOp, DataSize, Op->Flags);
    Ref Address = MakeAddress(Op->Dest);
    _VStoreVectorMasked(DataSize, ElementSize, Mask, Data, Address, Invalid(), MemOffsetType::SXTX, 1);
  } else {
    const auto Is128Bit = GetDstSize(Op) == Core::CPUState::XMM_SSE_REG_SIZE;

    Ref Address = MakeAddress(DataOp);
    Ref Result = _VLoadVectorMasked(DataSize, ElementSize, Mask, Address, Invalid(), MemOffsetType::SXTX, 1);

    if (Is128Bit) {
      Result = _VMov(OpSize::i128Bit, Result);
    }
    StoreResultFPR(Op, Result);
  }
}

template<IR::OpSize ElementSize, bool IsStore>
void OpDispatchBuilder::VMASKMOVOp(OpcodeArgs) {
  VMASKMOVOpImpl(Op, ElementSize, OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]);
}
template void OpDispatchBuilder::VMASKMOVOp<OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::VMASKMOVOp<OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::VMASKMOVOp<OpSize::i64Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::VMASKMOVOp<OpSize::i64Bit, true>(OpcodeArgs);

template<bool IsStore>
void OpDispatchBuilder::VPMASKMOVOp(OpcodeArgs) {
  VMASKMOVOpImpl(Op, OpSizeFromSrc(Op), OpSizeFromDst(Op), IsStore, Op->Src[0], Op->Src[1]);
}
template void OpDispatchBuilder::VPMASKMOVOp<false>(OpcodeArgs);
template void OpDispatchBuilder::VPMASKMOVOp<true>(OpcodeArgs);

void OpDispatchBuilder::MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType) {
  if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= FEXCore::X86State::REG_XMM_0) {
    Ref Result {};
    if (Op->Src[0].IsGPR()) {
      // Loading from GPR and moving to Vector.
      Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], GetGPROpSize(), Op->Flags);
      // zext to 128bit
      Result = _VCastFromGPR(OpSize::i128Bit, OpSizeFromSrc(Op), Src);
    } else {
      // Loading from Memory as a scalar. Zero extend
      Result = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }

    StoreResult_WithAVXInsert(VectorType, RegClass::FPR, Op, Result);
  } else {
    Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

    if (Op->Dest.IsGPR()) {
      const auto ElementSize = OpSizeFromDst(Op);
      // Extract element from GPR. Zero extending in the process.
      Src = _VExtractToGPR(OpSizeFromSrc(Op), ElementSize, Src, 0);
      StoreResultGPR(Op, Op->Dest, Src);
    } else {
      // Storing first element to memory.
      Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});
      _StoreMemFPR(OpSizeFromDst(Op), Dest, Src, OpSize::i8Bit);
    }
  }
}

Ref OpDispatchBuilder::VFCMPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType) {
  switch (static_cast<VectorCompareType>(CompType)) {
  case VectorCompareType::EQ_OQ:
  case VectorCompareType::EQ_OS: return _VFCMPEQ(Size, ElementSize, Src1, Src2);
  case VectorCompareType::LT_OS: // GT(Swapped operand)
  case VectorCompareType::LT_OQ: return _VFCMPLT(Size, ElementSize, Src1, Src2);
  case VectorCompareType::LE_OS: // GE(Swapped operand)
  case VectorCompareType::LE_OQ: return _VFCMPLE(Size, ElementSize, Src1, Src2);
  case VectorCompareType::UNORD_Q:
  case VectorCompareType::UNORD_S: return _VFCMPUNO(Size, ElementSize, Src1, Src2);
  case VectorCompareType::NEQ_UQ:
  case VectorCompareType::NEQ_US: return _VFCMPNEQ(Size, ElementSize, Src1, Src2);
  case VectorCompareType::NLT_US: // NGT(Swapped operand)
  case VectorCompareType::NLT_UQ: {
    Ref Result = _VFCMPLT(Size, ElementSize, Src1, Src2);
    return _VNot(Size, ElementSize, Result);
  }
  case VectorCompareType::NLE_US: // NGE(Swapped operand)
  case VectorCompareType::NLE_UQ: {
    Ref Result = _VFCMPLE(Size, ElementSize, Src1, Src2);
    return _VNot(Size, ElementSize, Result);
  }
  case VectorCompareType::ORD_Q:
  case VectorCompareType::ORD_S: return _VFCMPORD(Size, ElementSize, Src1, Src2);
  case VectorCompareType::NGT_UQ:
  case VectorCompareType::NGT_US: {
    Ref Result = _VFCMPLT(Size, ElementSize, Src2, Src1);
    return _VNot(Size, ElementSize, Result);
  }
  case VectorCompareType::NGE_UQ:
  case VectorCompareType::NGE_US: {
    Ref Result = _VFCMPLE(Size, ElementSize, Src2, Src1);
    return _VNot(Size, ElementSize, Result);
  }
  case VectorCompareType::GT_OQ:
  case VectorCompareType::GT_OS: return _VFCMPLT(Size, ElementSize, Src2, Src1);
  case VectorCompareType::GE_OQ:
  case VectorCompareType::GE_OS: return _VFCMPLE(Size, ElementSize, Src2, Src1);
  case VectorCompareType::EQ_UQ:
  case VectorCompareType::EQ_US: {
    // If either of the sources are unordered, then returns true.
    Ref Src1_U = _VFCMPEQ(Size, ElementSize, Src1, Src1);
    Ref Src2_U = _VFCMPEQ(Size, ElementSize, Src2, Src2);
    auto Ordered = _VAnd(Size, ElementSize, Src1_U, Src2_U);

    Ref Compare_Ordered = _VFCMPEQ(Size, ElementSize, Src1, Src2);
    return _VOrn(Size, ElementSize, Compare_Ordered, Ordered);
  }
  case VectorCompareType::NEQ_OQ:
  case VectorCompareType::NEQ_OS: {
    // If either of the sources are unordered, then returns false.
    Ref Src1_U = _VFCMPEQ(Size, ElementSize, Src1, Src1);
    Ref Src2_U = _VFCMPEQ(Size, ElementSize, Src2, Src2);

    Ref Compare_Ordered = _VFCMPEQ(Size, ElementSize, Src1, Src2);
    Ref Result = _VAndn(Size, ElementSize, Src1_U, Compare_Ordered);
    return _VAnd(Size, ElementSize, Result, Src2_U);
  }
  case VectorCompareType::FALSE_OQ:
  case VectorCompareType::FALSE_OS: return LoadZeroVector(Size);
  case VectorCompareType::TRUE_UQ:
  case VectorCompareType::TRUE_US: return _VectorImm(Size, OpSize::i8Bit, -1, 0);
  }
  FEX_UNREACHABLE;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VFCMPOp(OpcodeArgs) {
  // No need for zero-extending in the scalar case, since
  // all we need is an insert at the end of the operation.
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags);
  const uint8_t CompType = Op->Src[1].Data.Literal.Value;

  Ref Result = VFCMPOpImpl(OpSizeFromSrc(Op), ElementSize, Dest, Src, CompType & 0b111);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VFCMPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VFCMPOp<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVXVFCMPOp(OpcodeArgs) {
  // No need for zero-extending in the scalar case, since
  // all we need is an insert at the end of the operation.
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);
  const uint8_t CompType = Op->Src[2].Literal();

  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], DstSize, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], SrcSize, Op->Flags);
  Ref Result = VFCMPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2, CompType & 0b11111);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::AVXVFCMPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVFCMPOp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::FXSaveOp(OpcodeArgs) {
  Ref Mem = MakeSegmentAddress(Op, Op->Dest);

  SaveX87State(Op, Mem);
  SaveSSEState(Mem);
  SaveMXCSRState(Mem);
}

void OpDispatchBuilder::XSaveOp(OpcodeArgs) {
  XSaveOpImpl(Op);
}

Ref OpDispatchBuilder::XSaveBase(X86Tables::DecodedOp Op) {
  return MakeSegmentAddress(Op, Op->Dest);
}

void OpDispatchBuilder::XSaveOpImpl(OpcodeArgs) {
  // NOTE: Mask should be EAX and EDX concatenated, but we only need to test
  //       for features that are in the lower 32 bits, so EAX only is sufficient.
  const auto OpSize = GetGPROpSize();

  const auto StoreIfFlagSet = [this, OpSize](uint32_t BitIndex, auto fn, uint32_t FieldSize = 1) {
    Ref Mask = LoadGPRRegister(X86State::REG_RAX);
    Ref BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask);
    auto CondJump_ = CondJump(BitFlag, CondClass::NEQ);

    auto StoreBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetTrueJumpTarget(CondJump_, StoreBlock);
    SetCurrentCodeBlock(StoreBlock);
    StartNewBlock();
    { fn(); }
    auto Jump_ = Jump();
    auto NextJumpTarget = CreateNewCodeBlockAfter(StoreBlock);
    SetJumpTarget(Jump_, NextJumpTarget);
    SetFalseJumpTarget(CondJump_, NextJumpTarget);
    SetCurrentCodeBlock(NextJumpTarget);
    StartNewBlock();
  };

  // x87
  {
    StoreIfFlagSet(0, [this, Op] { SaveX87State(Op, XSaveBase(Op)); });
  }
  // SSE
  {
    StoreIfFlagSet(1, [this, Op] { SaveSSEState(XSaveBase(Op)); });
  }
  // AVX
  if (CTX->HostFeatures.SupportsAVX) {
    StoreIfFlagSet(2, [this, Op] { std::invoke(SaveAVXStateFunc, this, XSaveBase(Op)); });
  }

  // We need to save MXCSR and MXCSR_MASK if either SSE or AVX are requested to be saved
  {
    StoreIfFlagSet(1, [this, Op] { SaveMXCSRState(XSaveBase(Op)); }, 2);
  }

  // Update XSTATE_BV region of the XSAVE header
  {
    Ref Base = XSaveBase(Op);

    // NOTE: We currently only support the first 3 bits (x87, SSE, and AVX)
    Ref Mask = LoadGPRRegister(X86State::REG_RAX);
    Ref RequestedFeatures = _Bfe(OpSize, 3, 0, Mask);

    // XSTATE_BV section of the header is 8 bytes in size, but we only really
    // care about setting at most 3 bits in the first byte. We zero out the rest.
    _StoreMemGPR(OpSize::i64Bit, RequestedFeatures, Base, Constant(512), OpSize::i8Bit, MemOffsetType::SXTX, 1);
  }
}

void OpDispatchBuilder::SaveX87State(OpcodeArgs, Ref MemBase) {
  _SyncStackToSlow();

  // Saves 512bytes to the memory location provided
  // Header changes depending on if REX.W is set or not
  if (Op->Flags & X86Tables::DecodeFlags::FLAG_REX_WIDENING) {
    // BYTE | 0 1 | 2 3 | 4   | 5     | 6 7 | 8 9 | a b | c d | e f |
    // ------------------------------------------
    //   00 | FCW | FSW | FTW | <R>   | FOP | FIP                   |
    //   16 | FDP                           | MXCSR     | MXCSR_MASK|
  } else {
    // BYTE | 0 1 | 2 3 | 4   | 5     | 6 7 | 8 9 | a b | c d | e f |
    // ------------------------------------------
    //   00 | FCW | FSW | FTW | <R>   | FOP | FIP[31:0] | FCS | <R> |
    //   16 | FDP[31:0] | FDS         | <R> | MXCSR     | MXCSR_MASK|
  }

  {
    auto FCW = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, FCW));
    _StoreMemGPR(OpSize::i16Bit, MemBase, FCW, OpSize::i16Bit);
  }

  { _StoreMemGPR(OpSize::i16Bit, ReconstructFSW_Helper(), MemBase, Constant(2), OpSize::i16Bit, MemOffsetType::SXTX, 1); }

  {
    // Abridged FTW
    auto FTW = _LoadContextGPR(OpSize::i8Bit, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
    _StoreMemGPR(OpSize::i8Bit, FTW, MemBase, Constant(4), OpSize::i8Bit, MemOffsetType::SXTX, 1);
  }

  // BYTE | 0 1 | 2 3 | 4   | 5     | 6 7 | 8 9 | a b | c d | e f |
  // ------------------------------------------
  //   32 | ST0/MM0                             | <R>
  //   48 | ST1/MM1                             | <R>
  //   64 | ST2/MM2                             | <R>
  //   80 | ST3/MM3                             | <R>
  //   96 | ST4/MM4                             | <R>
  //  112 | ST5/MM5                             | <R>
  //  128 | ST6/MM6                             | <R>
  //  144 | ST7/MM7                             | <R>
  //  160 | XMM0
  //  173 | XMM1
  //  192 | XMM2
  //  208 | XMM3
  //  224 | XMM4
  //  240 | XMM5
  //  256 | XMM6
  //  272 | XMM7
  //  288 | 64BitMode ? <R> : XMM8
  //  304 | 64BitMode ? <R> : XMM9
  //  320 | 64BitMode ? <R> : XMM10
  //  336 | 64BitMode ? <R> : XMM11
  //  352 | 64BitMode ? <R> : XMM12
  //  368 | 64BitMode ? <R> : XMM13
  //  384 | 64BitMode ? <R> : XMM14
  //  400 | 64BitMode ? <R> : XMM15
  //  416 | <R>
  //  432 | <R>
  //  448 | <R>
  //  464 | Available
  //  480 | Available
  //  496 | Available
  // FCW: x87 FPU control word
  // FSW: x87 FPU status word
  // FTW: x87 FPU Tag word (Abridged)
  // FOP: x87 FPU opcode. Lower 11 bits of the opcode
  // FIP: x87 FPU instructyion pointer offset
  // FCS: x87 FPU instruction pointer selector. If CPUID_0000_0007_0000_00000:EBX[bit 13] = 1 then this is deprecated and stores as 0
  // FDP: x87 FPU instruction operand (data) pointer offset
  // FDS: x87 FPU instruction operand (data) pointer selector. Same deprecation as FCS
  // MXCSR: If OSFXSR bit in CR4 is not set then this may not be saved
  // MXCSR_MASK: Mask for writes to the MXCSR register
  // If OSFXSR bit in CR4 is not set than FXSAVE /may/ not save the XMM registers
  // This is implementation dependent
  //
  // x87 registers are stored rotated depending on the current TOP.
  Ref Top = GetX87Top();
  auto SevenConst = Constant(7);
  const auto LoadSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;

  for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) {
    Ref data = _LoadContextFPRIndexed(Top, LoadSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit));
    if (ReducedPrecisionMode) {
      data = _F80CVTTo(data, OpSize::i64Bit);
    }
    _StoreMemFPR(OpSize::i128Bit, data, MemBase, Constant(16 * i + 32), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    Top = _And(OpSize::i32Bit, Add(OpSize::i32Bit, Top, 1), SevenConst);
  }
}

void OpDispatchBuilder::SaveSSEState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    _StoreMemPairFPR(OpSize::i128Bit, LoadXMMRegister(i), LoadXMMRegister(i + 1), MemBase, i * 16 + 160);
  }
}

void OpDispatchBuilder::SaveMXCSRState(Ref MemBase) {
  // Store MXCSR and the mask for all bits.
  _StoreMemPairGPR(OpSize::i32Bit, GetMXCSR(), Constant(0xFFFF), MemBase, 24);
}

void OpDispatchBuilder::SaveAVXState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    Ref Upper0 = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, LoadXMMRegister(i + 0), 1);
    Ref Upper1 = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, LoadXMMRegister(i + 1), 1);

    _StoreMemPairFPR(OpSize::i128Bit, Upper0, Upper1, MemBase, i * 16 + 576);
  }
}

Ref OpDispatchBuilder::GetMXCSR() {
  Ref MXCSR = _LoadContextGPR(OpSize::i32Bit, offsetof(FEXCore::Core::CPUState, mxcsr));
  // Mask out unsupported bits
  // Keeps FZ, RC, exception masks, and DAZ
  MXCSR = _And(OpSize::i32Bit, MXCSR, Constant(0xFFC0));
  return MXCSR;
}

void OpDispatchBuilder::FXRStoreOp(OpcodeArgs) {
  Ref Mem = MakeSegmentAddress(Op, Op->Src[0]);

  RestoreX87State(Mem);
  RestoreSSEState(Mem);

  Ref MXCSR = _LoadMemGPR(OpSize::i32Bit, Mem, Constant(24), OpSize::i32Bit, MemOffsetType::SXTX, 1);
  RestoreMXCSRState(MXCSR);
}

void OpDispatchBuilder::XRstorOpImpl(OpcodeArgs) {
  const auto OpSize = GetGPROpSize();

  // If a bit in our XSTATE_BV is set, then we restore from that region of the XSAVE area,
  // otherwise, if not set, then we need to set the relevant data the bit corresponds to
  // to it's defined initial configuration.
  const auto RestoreIfFlagSetOrDefault = [this, Op, OpSize](uint32_t BitIndex, auto restore_fn, auto default_fn, uint32_t FieldSize = 1) {
    // Set up base address for the XSAVE region to restore from, and also read
    // the XSTATE_BV bit flags out of the XSTATE header.
    //
    // Note: we rematerialize Base/Mask in each block to avoid crossblock
    // liveness.
    Ref Base = XSaveBase(Op);
    Ref Mask = _LoadMemGPR(OpSize::i64Bit, Base, Constant(512), OpSize::i64Bit, MemOffsetType::SXTX, 1);

    Ref BitFlag = _Bfe(OpSize, FieldSize, BitIndex, Mask);
    auto CondJump_ = CondJump(BitFlag, CondClass::NEQ);

    auto RestoreBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetTrueJumpTarget(CondJump_, RestoreBlock);
    SetCurrentCodeBlock(RestoreBlock);
    StartNewBlock();
    { restore_fn(); }
    auto RestoreExitJump = Jump();
    auto DefaultBlock = CreateNewCodeBlockAfter(RestoreBlock);
    auto ExitBlock = CreateNewCodeBlockAfter(DefaultBlock);
    SetJumpTarget(RestoreExitJump, ExitBlock);
    SetFalseJumpTarget(CondJump_, DefaultBlock);
    SetCurrentCodeBlock(DefaultBlock);
    StartNewBlock();
    { default_fn(); }
    auto DefaultExitJump = Jump();
    SetJumpTarget(DefaultExitJump, ExitBlock);
    SetCurrentCodeBlock(ExitBlock);
    StartNewBlock();
  };

  // x87
  {
    RestoreIfFlagSetOrDefault(0, [this, Op] { RestoreX87State(XSaveBase(Op)); }, [this, Op] { DefaultX87State(Op); });
  }
  // SSE
  {
    RestoreIfFlagSetOrDefault(1, [this, Op] { RestoreSSEState(XSaveBase(Op)); }, [this] { DefaultSSEState(); });
  }
  // AVX
  if (CTX->HostFeatures.SupportsAVX) {
    RestoreIfFlagSetOrDefault(
      2, [this, Op] { std::invoke(RestoreAVXStateFunc, this, XSaveBase(Op)); }, [this] { std::invoke(DefaultAVXStateFunc, this); });
  }

  {
    // We need to restore the MXCSR if either SSE or AVX are requested to be saved
    RestoreIfFlagSetOrDefault(
      1,
      [this, Op] {
        Ref Base = XSaveBase(Op);
        Ref MXCSR = _LoadMemGPR(OpSize::i32Bit, Base, Constant(24), OpSize::i32Bit, MemOffsetType::SXTX, 1);
        RestoreMXCSRState(MXCSR);
      },
      [] { /* Intentionally do nothing*/ }, 2);
  }
}

void OpDispatchBuilder::RestoreX87State(Ref MemBase) {
  _StackForceSlow();

  auto NewFCW = _LoadMemGPR(OpSize::i16Bit, MemBase, OpSize::i16Bit);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

  {
    auto NewFSW = _LoadMemGPR(OpSize::i16Bit, MemBase, Constant(2), OpSize::i16Bit, MemOffsetType::SXTX, 1);
    ReconstructX87StateFromFSW_Helper(NewFSW);
  }

  {
    // Abridged FTW
    auto NewFTW = _LoadMemGPR(OpSize::i8Bit, MemBase, Constant(4), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    _StoreContextGPR(OpSize::i8Bit, NewFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
  }

  for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; i += 2) {
    auto MMRegs = LoadMemPairFPR(OpSize::i128Bit, MemBase, i * 16 + 32);
    _StoreContextFPR(OpSize::i128Bit, MMRegs.Low, MMBaseOffset() + i * 16);
    _StoreContextFPR(OpSize::i128Bit, MMRegs.High, MMBaseOffset() + (i + 1) * 16);
  }
}

void OpDispatchBuilder::RestoreSSEState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    auto XMMRegs = LoadMemPairFPR(OpSize::i128Bit, MemBase, i * 16 + 160);

    StoreXMMRegister(i, XMMRegs.Low);
    StoreXMMRegister(i + 1, XMMRegs.High);
  }
}

void OpDispatchBuilder::RestoreMXCSRState(Ref MXCSR) {
  // Mask out unsupported bits
  MXCSR = _And(OpSize::i32Bit, MXCSR, Constant(0xFFC0));

  _StoreContextGPR(OpSize::i32Bit, MXCSR, offsetof(FEXCore::Core::CPUState, mxcsr));
  // We only support the rounding mode and FTZ bit being set
  Ref RoundingMode = _Bfe(OpSize::i32Bit, 3, 13, MXCSR);
  _SetRoundingMode(RoundingMode, true, MXCSR);
}

void OpDispatchBuilder::RestoreAVXState(Ref MemBase) {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i += 2) {
    Ref XMMReg0 = LoadXMMRegister(i + 0);
    Ref XMMReg1 = LoadXMMRegister(i + 1);
    auto YMMHRegs = LoadMemPairFPR(OpSize::i128Bit, MemBase, i * 16 + 576);
    StoreXMMRegister(i + 0, _VInsElement(OpSize::i256Bit, OpSize::i128Bit, 1, 0, XMMReg0, YMMHRegs.Low));
    StoreXMMRegister(i + 1, _VInsElement(OpSize::i256Bit, OpSize::i128Bit, 1, 0, XMMReg1, YMMHRegs.High));
  }
}

void OpDispatchBuilder::DefaultX87State(OpcodeArgs) {
  // We can piggy-back on FNINIT's implementation, since
  // it performs the same behavior as required by XRSTOR for resetting flags
  FNINIT(Op);

  // On top of resetting the flags to a default state, we also need to clear
  // all of the ST0-7/MM0-7 registers to zero.
  Ref ZeroVector = LoadZeroVector(OpSize::i64Bit);
  for (uint32_t i = 0; i < Core::CPUState::NUM_MMS; ++i) {
    _StoreContextFPR(OpSize::i128Bit, ZeroVector, MMBaseOffset() + i * 16);
  }
}

void OpDispatchBuilder::DefaultSSEState() {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  Ref ZeroVector = LoadZeroVector(OpSize::i128Bit);
  for (uint32_t i = 0; i < NumRegs; ++i) {
    StoreXMMRegister(i, ZeroVector);
  }
}

void OpDispatchBuilder::DefaultAVXState() {
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  for (uint32_t i = 0; i < NumRegs; i++) {
    Ref Reg = LoadXMMRegister(i);
    Ref Dst = _VMov(OpSize::i128Bit, Reg);
    StoreXMMRegister(i, Dst);
  }
}

Ref OpDispatchBuilder::PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                                     const X86Tables::DecodedOperand& Imm, bool IsAVX) {
  // For the 256-bit case we handle it as pairs of 128-bit halves.
  const auto DstSize = OpSizeFromDst(Op);
  const auto SanitizedDstSize = std::min(DstSize, OpSize::i128Bit);

  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Index = Imm.Literal();

  Ref Src2Node = LoadSourceFPR(Op, Src2, Op->Flags);
  if (Index == 0) {
    if (IsAVX && !Is256Bit) {
      // 128-bit AVX needs to zero the upper bits.
      return _VMov(OpSize::i128Bit, Src2Node);
    } else {
      return Src2Node;
    }
  }
  Ref Src1Node = LoadSourceFPR(Op, Src1, Op->Flags);

  if (Index >= (IR::OpSizeToSize(SanitizedDstSize) * 2)) {
    // If the immediate is greater than both vectors combined then it zeroes the vector
    return LoadZeroVector(DstSize);
  }

  Ref Low = _VExtr(SanitizedDstSize, OpSize::i8Bit, Src1Node, Src2Node, Index);
  if (!Is256Bit) {
    return Low;
  }

  Ref HighSrc1 = _VInsElement(DstSize, OpSize::i128Bit, 0, 1, Src1Node, Src1Node);
  Ref HighSrc2 = _VInsElement(DstSize, OpSize::i128Bit, 0, 1, Src2Node, Src2Node);
  Ref High = _VExtr(SanitizedDstSize, OpSize::i8Bit, HighSrc1, HighSrc2, Index);
  return _VInsElement(DstSize, OpSize::i128Bit, 1, 0, Low, High);
}

void OpDispatchBuilder::PAlignrOp(OpcodeArgs) {
  Ref Result = PALIGNROpImpl(Op, Op->Dest, Op->Src[0], Op->Src[1], false);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPALIGNROp(OpcodeArgs) {
  Ref Result = PALIGNROpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2], true);
  StoreResultFPR(Op, Result);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::UCOMISxOp(OpcodeArgs) {
  const auto SrcSize = Op->Src[0].IsGPR() ? GetGuestVectorLength() : ElementSize;
  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Dest, GetGuestVectorLength(), Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);

  Comiss(ElementSize, Src1, Src2);
}

template void OpDispatchBuilder::UCOMISxOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::UCOMISxOp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::LDMXCSR(OpcodeArgs) {
  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, OpSize::i32Bit, Op->Flags);
  RestoreMXCSRState(Dest);
}

void OpDispatchBuilder::STMXCSR(OpcodeArgs) {
  StoreResultGPR_WithOpSize(Op, Op->Dest, GetMXCSR(), OpSize::i32Bit);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::PACKUSOp(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VSQXTUNPair(OpSizeFromSrc(Op), ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::PACKUSOp<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::PACKUSOp<OpSize::i32Bit>(OpcodeArgs);

void OpDispatchBuilder::VPACKUSOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VSQXTUNPair(OpSizeFromSrc(Op), ElementSize, Src1, Src2);

  if (Is256Bit) {
    // We do a little cheeky 64-bit swapping to interleave the result.
    Ref Swapped = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Result, Result);
    Result = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Swapped, Result);
  }
  StoreResultFPR(Op, Result);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::PACKSSOp(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = _VSQXTNPair(OpSizeFromSrc(Op), ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::PACKSSOp<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::PACKSSOp<OpSize::i32Bit>(OpcodeArgs);

void OpDispatchBuilder::VPACKSSOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = _VSQXTNPair(OpSizeFromSrc(Op), ElementSize, Src1, Src2);

  if (Is256Bit) {
    // We do a little cheeky 64-bit swapping to interleave the result.
    Ref Swapped = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Result, Result);
    Result = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Swapped, Result);
  }
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PMULLOpImpl(OpSize Size, IR::OpSize ElementSize, bool Signed, Ref Src1, Ref Src2) {
  if (Size == OpSize::i64Bit) {
    if (Signed) {
      return _VSMull(OpSize::i128Bit, ElementSize, Src1, Src2);
    } else {
      return _VUMull(OpSize::i128Bit, ElementSize, Src1, Src2);
    }
  } else {
    auto InsSrc1 = _VUnZip(Size, ElementSize, Src1, Src1);
    auto InsSrc2 = _VUnZip(Size, ElementSize, Src2, Src2);

    if (Signed) {
      return _VSMull(Size, ElementSize, InsSrc1, InsSrc2);
    } else {
      return _VUMull(Size, ElementSize, InsSrc1, InsSrc2);
    }
  }
}

template<IR::OpSize ElementSize, bool Signed>
void OpDispatchBuilder::PMULLOp(OpcodeArgs) {
  static_assert(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Res = PMULLOpImpl(OpSizeFromSrc(Op), ElementSize, Signed, Src1, Src2);

  StoreResultFPR(Op, Res);
}

template void OpDispatchBuilder::PMULLOp<OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::PMULLOp<OpSize::i32Bit, true>(OpcodeArgs);

template<IR::OpSize ElementSize, bool Signed>
void OpDispatchBuilder::VPMULLOp(OpcodeArgs) {
  static_assert(ElementSize == OpSize::i32Bit, "Currently only handles 32-bit -> 64-bit");

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PMULLOpImpl(OpSizeFromSrc(Op), ElementSize, Signed, Src1, Src2);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VPMULLOp<OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::VPMULLOp<OpSize::i32Bit, true>(OpcodeArgs);

template<bool ToXMM>
void OpDispatchBuilder::MOVQ2DQ(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // This instruction is a bit special in that if the source is MMX then it zexts to 128bit
  if constexpr (ToXMM) {
    const auto Index = Op->Dest.Data.GPR.GPR - FEXCore::X86State::REG_XMM_0;

    Src = VZeroExtendOperand(OpSize::i128Bit, Op->Src[0], Src);
    StoreXMMRegister(Index, Src);
  } else {
    // This is simple, just store the result
    StoreResultFPR(Op, Src);
  }
}

template void OpDispatchBuilder::MOVQ2DQ<false>(OpcodeArgs);
template void OpDispatchBuilder::MOVQ2DQ<true>(OpcodeArgs);

Ref OpDispatchBuilder::ADDSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  if (CTX->HostFeatures.SupportsFCMA) {
    if (ElementSize == OpSize::i32Bit) {
      auto Swizzle = _VRev64(Size, OpSize::i32Bit, Src2);
      return _VFCADD(Size, ElementSize, Src1, Swizzle, 90);
    } else {
      auto Swizzle = _VExtr(Size, OpSize::i8Bit, Src2, Src2, 8);
      return _VFCADD(Size, ElementSize, Src1, Swizzle, 90);
    }
  } else {
    auto ConstantEOR =
      LoadAndCacheNamedVectorConstant(Size, ElementSize == OpSize::i32Bit ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT);
    auto InvertedSource = _VXor(Size, ElementSize, Src2, ConstantEOR);
    return _VFAdd(Size, ElementSize, Src1, InvertedSource);
  }
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::ADDSUBPOp(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = ADDSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Dest, Src);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::ADDSUBPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::ADDSUBPOp<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VADDSUBPOp(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = ADDSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VADDSUBPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VADDSUBPOp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::PFNACCOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto DestUnzip = _VUnZip(Size, OpSize::i32Bit, Dest, Src);
  auto SrcUnzip = _VUnZip2(Size, OpSize::i32Bit, Dest, Src);
  auto Result = _VFSub(Size, OpSize::i32Bit, DestUnzip, SrcUnzip);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PFPNACCOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref ResAdd {};
  Ref ResSub {};
  auto UpperSubDest = _VDupElement(Size, OpSize::i32Bit, Dest, 1);

  ResSub = _VFSub(OpSize::i32Bit, OpSize::i32Bit, Dest, UpperSubDest);
  ResAdd = _VFAddP(Size, OpSize::i32Bit, Src, Src);

  auto Result = _VInsElement(OpSize::i64Bit, OpSize::i32Bit, 1, 0, ResSub, ResAdd);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PSWAPDOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto Result = _VRev64(Size, OpSize::i32Bit, Src);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PI2FWOp(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  const auto Size = OpSizeFromDst(Op);

  // We now need to transpose the lower 16-bits of each element together
  // Only needing to move the upper element down in this case
  Src = _VUnZip(Size, OpSize::i16Bit, Src, Src);

  // Now we need to sign extend the 16bit value to 32-bit
  Src = _VSXTL(Size, OpSize::i16Bit, Src);

  // int32_t to float
  Src = _Vector_SToF(Size, OpSize::i32Bit, Src);

  StoreResultFPR_WithOpSize(Op, Op->Dest, Src, Size);
}

void OpDispatchBuilder::PF2IWOp(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  const auto Size = OpSizeFromDst(Op);

  // Float to int32_t
  Src = _Vector_FToZS(Size, OpSize::i32Bit, Src);

  // We now need to transpose the lower 16-bits of each element together
  // Only needing to move the upper element down in this case
  Src = _VUnZip(Size, OpSize::i16Bit, Src, Src);

  // Now we need to sign extend the 16bit value to 32-bit
  Src = _VSXTL(Size, OpSize::i16Bit, Src);
  StoreResultFPR_WithOpSize(Op, Op->Dest, Src, Size);
}

void OpDispatchBuilder::PMULHRWOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Res {};

  // Implementation is more efficient for 8byte registers
  // Multiplies 4 16bit values in to 4 32bit values
  Res = _VSMull(Size << 1, OpSize::i16Bit, Dest, Src);

  // Load 0x0000_8000 in to each 32-bit element.
  Ref VConstant = _VectorImm(OpSize::i128Bit, OpSize::i32Bit, 0x80, 8);

  Res = _VAdd(Size << 1, OpSize::i32Bit, Res, VConstant);

  // Now shift and narrow to convert 32-bit values to 16bit, storing the top 16bits
  Res = _VUShrNI(Size << 1, OpSize::i32Bit, Res, 16);

  StoreResultFPR(Op, Res);
}

template<uint8_t CompType>
void OpDispatchBuilder::VPFCMPOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, OpSizeFromDst(Op), Op->Flags);

  Ref Result {};
  // This maps 1:1 to an AArch64 NEON Op
  // auto ALUOp = _VCMPGT(Size, 4, Dest, Src);
  switch (CompType) {
  case 0x00: // EQ
    Result = _VFCMPEQ(Size, OpSize::i32Bit, Dest, Src);
    break;
  case 0x01: // GE(Swapped operand)
    Result = _VFCMPLE(Size, OpSize::i32Bit, Src, Dest);
    break;
  case 0x02: // GT
    Result = _VFCMPGT(Size, OpSize::i32Bit, Dest, Src);
    break;
  default: LOGMAN_MSG_A_FMT("Unknown Comparison type: {}", CompType); break;
  }

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VPFCMPOp<0>(OpcodeArgs);
template void OpDispatchBuilder::VPFCMPOp<1>(OpcodeArgs);
template void OpDispatchBuilder::VPFCMPOp<2>(OpcodeArgs);

Ref OpDispatchBuilder::PMADDWDOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {
  // This is a pretty curious operation
  // Does two MADD operations across 4 16bit signed integers and accumulates to 32bit integers in the destination
  //
  // x86 PMADDWD: xmm1, xmm2
  //              xmm1[31:0]  = (xmm1[15:0] * xmm2[15:0]) + (xmm1[31:16] * xmm2[31:16])
  //              xmm1[63:32] = (xmm1[47:32] * xmm2[47:32]) + (xmm1[63:48] * xmm2[63:48])
  //              etc.. for larger registers

  if (Size == OpSize::i64Bit) {
    // MMX implementation can be slightly more optimal
    Size = Size >> 1;
    auto MullResult = _VSMull(Size, OpSize::i16Bit, Src1, Src2);
    return _VAddP(Size, OpSize::i32Bit, MullResult, MullResult);
  }

  auto Lower = _VSMull(Size, OpSize::i16Bit, Src1, Src2);
  auto Upper = _VSMull2(Size, OpSize::i16Bit, Src1, Src2);

  // [15:0 ] + [31:16], [32:47 ] + [63:48  ], [79:64] + [95:80], [111:96] + [127:112]
  return _VAddP(Size, OpSize::i32Bit, Lower, Upper);
}

void OpDispatchBuilder::PMADDWD(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = PMADDWDOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPMADDWDOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = PMADDWDOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PMADDUBSWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {
  if (Size == OpSize::i64Bit) {
    const auto MultSize = Size << 1;
    // 64bit is more efficient

    // Src1 is unsigned
    auto Src1_16b = _VUXTL(MultSize, OpSize::i8Bit, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]

    // Src2 is signed
    auto Src2_16b = _VSXTL(MultSize, OpSize::i8Bit, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]

    auto ResMul_L = _VSMull(MultSize, OpSize::i16Bit, Src1_16b, Src2_16b);
    auto ResMul_H = _VSMull2(MultSize, OpSize::i16Bit, Src1_16b, Src2_16b);

    // Now add pairwise across the vector
    auto ResAdd = _VAddP(MultSize, OpSize::i32Bit, ResMul_L, ResMul_H);

    // Add saturate back down to 16bit
    return _VSQXTN(MultSize, OpSize::i32Bit, ResAdd);
  }

  // V{U,S}XTL{,2}/ and VUnZip{,2} can be optimized in this solution to save about one instruction.
  // We can up-front zero extend and sign extend the elements in-place.
  // This means extracting even and odd elements up-front so the unzips aren't required.
  // Requires implementing IR ops for BIC (vector, immediate) although.

  // Src1 is unsigned
  auto Src1_16b_L = _VUXTL(Size, OpSize::i8Bit, Src1); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]
  auto Src2_16b_L = _VSXTL(Size, OpSize::i8Bit, Src2); // [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]
  auto ResMul_L = _VMul(Size, OpSize::i16Bit, Src1_16b_L, Src2_16b_L);

  // Src2 is signed
  auto Src1_16b_H = _VUXTL2(Size, OpSize::i8Bit, Src1); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]
  auto Src2_16b_H = _VSXTL2(Size, OpSize::i8Bit, Src2); // Offset to +64bits [7:0 ], [15:8], [23:16], [31:24], [39:32], [47:40], [55:48], [63:56]
  auto ResMul_L_H = _VMul(Size, OpSize::i16Bit, Src1_16b_H, Src2_16b_H);

  auto TmpZip1 = _VUnZip(Size, OpSize::i16Bit, ResMul_L, ResMul_L_H);
  auto TmpZip2 = _VUnZip2(Size, OpSize::i16Bit, ResMul_L, ResMul_L_H);

  return _VSQAdd(Size, OpSize::i16Bit, TmpZip1, TmpZip2);
}

void OpDispatchBuilder::PMADDUBSW(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = PMADDUBSWOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPMADDUBSWOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = PMADDUBSWOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PMULHWOpImpl(OpcodeArgs, bool Signed, Ref Src1, Ref Src2) {
  const auto Size = OpSizeFromSrc(Op);
  if (Signed) {
    return _VSMulH(Size, OpSize::i16Bit, Src1, Src2);
  } else {
    return _VUMulH(Size, OpSize::i16Bit, Src1, Src2);
  }
}

template<bool Signed>
void OpDispatchBuilder::PMULHW(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PMULHWOpImpl(Op, Signed, Dest, Src);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::PMULHW<false>(OpcodeArgs);
template void OpDispatchBuilder::PMULHW<true>(OpcodeArgs);

template<bool Signed>
void OpDispatchBuilder::VPMULHWOp(OpcodeArgs) {
  const auto DstSize = GetDstSize(Op);
  const auto Is128Bit = DstSize == Core::CPUState::XMM_SSE_REG_SIZE;

  Ref Dest = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PMULHWOpImpl(Op, Signed, Dest, Src);

  if (Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VPMULHWOp<false>(OpcodeArgs);
template void OpDispatchBuilder::VPMULHWOp<true>(OpcodeArgs);

Ref OpDispatchBuilder::PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2) {
  Ref Res {};
  if (Size == OpSize::i64Bit) {
    // Implementation is more efficient for 8byte registers
    Res = _VSMull(Size << 1, OpSize::i16Bit, Src1, Src2);
    Res = _VSShrI(Size << 1, OpSize::i32Bit, Res, 14);
    auto OneVector = _VectorImm(Size << 1, OpSize::i32Bit, 1);
    Res = _VAdd(Size << 1, OpSize::i32Bit, Res, OneVector);
    return _VUShrNI(Size << 1, OpSize::i32Bit, Res, 1);
  } else {
    // 128-bit and 256-bit are less efficient
    Ref ResultLow;
    Ref ResultHigh;

    ResultLow = _VSMull(Size, OpSize::i16Bit, Src1, Src2);
    ResultHigh = _VSMull2(Size, OpSize::i16Bit, Src1, Src2);

    ResultLow = _VSShrI(Size, OpSize::i32Bit, ResultLow, 14);
    ResultHigh = _VSShrI(Size, OpSize::i32Bit, ResultHigh, 14);
    auto OneVector = _VectorImm(Size, OpSize::i32Bit, 1);

    ResultLow = _VAdd(Size, OpSize::i32Bit, ResultLow, OneVector);
    ResultHigh = _VAdd(Size, OpSize::i32Bit, ResultHigh, OneVector);

    // Combine the results
    Res = _VUShrNI(Size, OpSize::i32Bit, ResultLow, 1);
    return _VUShrNI2(Size, OpSize::i32Bit, Res, ResultHigh, 1);
  }
}

void OpDispatchBuilder::PMULHRSW(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PMULHRSWOpImpl(OpSizeFromSrc(Op), Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPMULHRSWOp(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PMULHRSWOpImpl(OpSizeFromSrc(Op), Dest, Src);

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::HSUBPOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  auto Even = _VUnZip(SrcSize, ElementSize, Src1, Src2);
  auto Odd = _VUnZip2(SrcSize, ElementSize, Src1, Src2);
  return _VFSub(SrcSize, ElementSize, Even, Odd);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::HSUBP(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = HSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::HSUBP<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::HSUBP<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::VHSUBPOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = HSUBPOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2);
  Ref Dest = Result;
  if (Is256Bit) {
    Dest = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Result, Result);
    Dest = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Dest, Result);
  }

  StoreResultFPR(Op, Dest);
}

Ref OpDispatchBuilder::PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, IR::OpSize ElementSize) {
  auto Even = _VUnZip(Size, ElementSize, Src1, Src2);
  auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2);
  return _VSub(Size, ElementSize, Even, Odd);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::PHSUB(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PHSUBOpImpl(OpSizeFromSrc(Op), Src1, Src2, ElementSize);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::PHSUB<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::PHSUB<OpSize::i32Bit>(OpcodeArgs);

void OpDispatchBuilder::VPHSUBOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PHSUBOpImpl(OpSizeFromSrc(Op), Src1, Src2, ElementSize);
  if (Is256Bit) {
    Ref Inserted = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Result, Result);
    Result = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Inserted, Result);
  }
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::PHADDSOpImpl(OpSize Size, Ref Src1, Ref Src2) {
  const auto ElementSize = OpSize::i16Bit;

  auto Even = _VUnZip(Size, ElementSize, Src1, Src2);
  auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2);

  // Saturate back down to the result
  return _VSQAdd(Size, ElementSize, Even, Odd);
}

void OpDispatchBuilder::PHADDS(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = PHADDSOpImpl(OpSizeFromSrc(Op), Src1, Src2);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPHADDSWOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto Is256Bit = SrcSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = PHADDSOpImpl(OpSizeFromSrc(Op), Src1, Src2);
  Ref Dest = Result;

  if (Is256Bit) {
    Dest = _VInsElement(SrcSize, OpSize::i64Bit, 1, 2, Result, Result);
    Dest = _VInsElement(SrcSize, OpSize::i64Bit, 2, 1, Dest, Result);
  }

  StoreResultFPR(Op, Dest);
}

Ref OpDispatchBuilder::PHSUBSOpImpl(OpSize Size, Ref Src1, Ref Src2) {
  const auto ElementSize = OpSize::i16Bit;

  auto Even = _VUnZip(Size, ElementSize, Src1, Src2);
  auto Odd = _VUnZip2(Size, ElementSize, Src1, Src2);

  // Saturate back down to the result
  return _VSQSub(Size, ElementSize, Even, Odd);
}

void OpDispatchBuilder::PHSUBS(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = PHSUBSOpImpl(OpSizeFromSrc(Op), Src1, Src2);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPHSUBSWOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  Ref Result = PHSUBSOpImpl(OpSizeFromSrc(Op), Src1, Src2);

  Ref Dest = Result;
  if (Is256Bit) {
    Dest = _VInsElement(DstSize, OpSize::i64Bit, 1, 2, Result, Result);
    Dest = _VInsElement(DstSize, OpSize::i64Bit, 2, 1, Dest, Result);
  }

  StoreResultFPR(Op, Dest);
}

Ref OpDispatchBuilder::PSADBWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2) {
  // The documentation is actually incorrect in how this instruction operates
  // It strongly implies that the `abs(dest[i] - src[i])` operates in 8bit space
  // but it actually operates in more than 8bit space
  // This can be seen with `abs(0 - 0xFF)` returning a different result depending
  // on bit length
  const auto Is128Bit = Size == OpSize::i128Bit;

  if (Size == OpSize::i64Bit) {
    auto AbsResult = _VUABDL(Size << 1, OpSize::i8Bit, Src1, Src2);

    // Now vector-wide add the results for each
    return _VAddV(Size << 1, OpSize::i16Bit, AbsResult);
  }

  auto AbsResult_Low = _VUABDL(Size, OpSize::i8Bit, Src1, Src2);
  auto AbsResult_High = _VUABDL2(Size, OpSize::i8Bit, Src1, Src2);

  Ref Result_Low = _VAddV(OpSize::i128Bit, OpSize::i16Bit, AbsResult_Low);
  Ref Result_High = _VAddV(OpSize::i128Bit, OpSize::i16Bit, AbsResult_High);
  auto Low = _VZip(Size, OpSize::i64Bit, Result_Low, Result_High);

  if (Is128Bit) {
    return Low;
  }

  Ref HighSrc1 = _VDupElement(Size, OpSize::i128Bit, AbsResult_Low, 1);
  Ref HighSrc2 = _VDupElement(Size, OpSize::i128Bit, AbsResult_High, 1);

  Ref HighResult_Low = _VAddV(OpSize::i128Bit, OpSize::i16Bit, HighSrc1);
  Ref HighResult_High = _VAddV(OpSize::i128Bit, OpSize::i16Bit, HighSrc2);

  Ref High = _VInsElement(Size, OpSize::i64Bit, 1, 0, HighResult_Low, HighResult_High);
  Ref Full = _VInsElement(Size, OpSize::i128Bit, 1, 0, Low, High);

  Ref Tmp = _VInsElement(Size, OpSize::i64Bit, 2, 1, Full, Full);
  return _VInsElement(Size, OpSize::i64Bit, 1, 2, Tmp, Full);
}

void OpDispatchBuilder::PSADBW(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = PSADBWOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPSADBWOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = PSADBWOpImpl(Size, Src1, Src2);
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed) {
  const auto DstSize = OpSizeFromDst(Op);

  const auto GetSrc = [&] {
    if (Op->Src[0].IsGPR()) {
      return LoadSourceFPR_WithOpSize(Op, Op->Src[0], DstSize, Op->Flags);
    } else {
      // For memory operands the 256-bit variant loads twice the size specified in the table.
      const auto Is256Bit = DstSize == OpSize::i256Bit;
      const auto SrcSize = OpSizeFromSrc(Op);
      const auto LoadSize = Is256Bit ? IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) * 2) : SrcSize;

      return LoadSourceFPR_WithOpSize(Op, Op->Src[0], LoadSize, Op->Flags);
    }
  };

  Ref Src = GetSrc();
  Ref Result {Src};

  for (auto CurrentElementSize = ElementSize; CurrentElementSize != DstElementSize; CurrentElementSize = CurrentElementSize << 1) {
    if (Signed) {
      Result = _VSXTL(DstSize, CurrentElementSize, Result);
    } else {
      Result = _VUXTL(DstSize, CurrentElementSize, Result);
    }
  }

  return Result;
}

template<IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed>
void OpDispatchBuilder::ExtendVectorElements(OpcodeArgs) {
  Ref Result = ExtendVectorElementsImpl(Op, ElementSize, DstElementSize, Signed);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, false>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, false>(OpcodeArgs);

template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, true>(OpcodeArgs);
template void OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, true>(OpcodeArgs);

Ref OpDispatchBuilder::VectorRoundImpl(OpSize Size, IR::OpSize ElementSize, Ref Src, uint64_t Mode) {
  return _Vector_FToI(Size, ElementSize, Src, TranslateRoundType(Mode));
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VectorRound(OpcodeArgs) {
  // No need to zero extend the vector in the event we have a
  // scalar source, especially since it's only inserted into another vector.
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);

  const uint64_t Mode = Op->Src[1].Literal();
  Src = VectorRoundImpl(OpSizeFromDst(Op), ElementSize, Src, Mode);

  StoreResultFPR(Op, Src);
}

template void OpDispatchBuilder::VectorRound<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorRound<OpSize::i64Bit>(OpcodeArgs);

template<IR::OpSize ElementSize>
void OpDispatchBuilder::AVXVectorRound(OpcodeArgs) {
  const auto Mode = Op->Src[1].Literal();

  // No need to zero extend the vector in the event we have a
  // scalar source, especially since it's only inserted into another vector.
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  Ref Result = VectorRoundImpl(OpSizeFromDst(Op), ElementSize, Src, Mode);

  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::AVXVectorRound<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::AVXVectorRound<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::VectorBlend(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Selector) {
  if (ElementSize == OpSize::i32Bit) {
    Selector &= 0b1111;
    switch (Selector) {
    case 0b0000:
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src1[127:96]
      // Copy
      return Src1;
    case 0b0001:
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src1[127:96]
      return _VInsElement(Size, ElementSize, 0, 0, Src1, Src2);
    case 0b0010:
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src1[127:96]
      return _VInsElement(Size, ElementSize, 1, 1, Src1, Src2);
    case 0b0011:
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src1[127:96]
      return _VInsElement(Size, OpSize::i64Bit, 0, 0, Src1, Src2);
    case 0b0100:
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src1[127:96]
      return _VInsElement(Size, ElementSize, 2, 2, Src1, Src2);
    case 0b0101: {
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src1[127:96]
      // Rotate the elements of the incoming source so they end up in the correct location.
      // Then trn2 keeps the destination results in the expected location.
      auto Temp = _VRev64(Size, OpSize::i32Bit, Src2);
      return _VTrn2(Size, ElementSize, Temp, Src1);
    }
    case 0b0110: {
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src1[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0110B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b0111: {
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src1[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_0111B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b1000:
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src2[127:96]
      return _VInsElement(Size, ElementSize, 3, 3, Src1, Src2);
    case 0b1001: {
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src2[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1001B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b1010: {
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src2[127:96]
      // Rotate the elements of the incoming destination so they end up in the correct location.
      // Then trn2 keeps the source results in the expected location.
      auto Temp = _VRev64(Size, OpSize::i32Bit, Src1);
      return _VTrn2(Size, ElementSize, Temp, Src2);
    }
    case 0b1011: {
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src1[95:64]
      // Dest[127:96] = Src2[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1011B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b1100:
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src2[127:96]
      return _VInsElement(Size, OpSize::i64Bit, 1, 1, Src1, Src2);
    case 0b1101: {
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src1[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src2[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1101B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b1110: {
      // Dest[31:0]   = Src1[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src2[127:96]
      auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_BLENDPS_1110B);
      return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
    }
    case 0b1111:
      // Dest[31:0]   = Src2[31:0]
      // Dest[63:32]  = Src2[63:32]
      // Dest[95:64]  = Src2[95:64]
      // Dest[127:96] = Src2[127:96]
      // Copy
      return Src2;
    default: break;
    }
  } else if (ElementSize == OpSize::i64Bit) {
    Selector &= 0b11;
    switch (Selector) {
    case 0b00:
      // No-op
      return Src1;
    case 0b01:
      // Dest[63:0]   = Src2[63:0]
      // Dest[127:64] = Src1[127:64]
      return _VInsElement(Size, ElementSize, 0, 0, Src1, Src2);
    case 0b10:
      // Dest[63:0]   = Src1[63:0]
      // Dest[127:64] = Src2[127:64]
      return _VInsElement(Size, ElementSize, 1, 1, Src1, Src2);
    case 0b11:
      // Copy
      return Src2;
    }
  } else {
    ///< Zero instruction copies
    switch (Selector) {
    case 0b0000'0000: return Src1;
    case 0b1111'1111: return Src2;
    default: break;
    }

    ///< Single instruction implementation
    switch (Selector) {
    case 0b0000'0001:
    case 0b0000'0010:
    case 0b0000'0100:
    case 0b0000'1000:
    case 0b0001'0000:
    case 0b0010'0000:
    case 0b0100'0000:
    case 0b1000'0000: {
      // Single 16-bit element insert.
      const auto Element = FEXCore::ilog2(Selector);
      return _VInsElement(Size, ElementSize, Element, Element, Src1, Src2);
    }
    case 0b1111'1110:
    case 0b1111'1101:
    case 0b1111'1011:
    case 0b1111'0111:
    case 0b1110'1111:
    case 0b1101'1111:
    case 0b1011'1111:
    case 0b0111'1111: {
      // Single 16-bit element insert, inverted
      uint8_t SelectorInvert = ~Selector;
      const auto Element = FEXCore::ilog2(SelectorInvert);
      return _VInsElement(Size, ElementSize, Element, Element, Src2, Src1);
    }
    case 0b0000'0011:
    case 0b0000'1100:
    case 0b0011'0000:
    case 0b1100'0000: {
      // Single 32-bit element insert.
      const auto Element = std::countr_zero(Selector) / 2;
      return _VInsElement(Size, OpSize::i32Bit, Element, Element, Src1, Src2);
    }
    case 0b1111'1100:
    case 0b1111'0011:
    case 0b1100'1111:
    case 0b0011'1111: {
      // Single 32-bit element insert, inverted
      uint8_t SelectorInvert = ~Selector;
      const auto Element = std::countr_zero(SelectorInvert) / 2;
      return _VInsElement(Size, OpSize::i32Bit, Element, Element, Src2, Src1);
    }
    case 0b0000'1111:
    case 0b1111'0000: {
      // Single 64-bit element insert.
      const auto Element = std::countr_zero(Selector) / 4;
      return _VInsElement(Size, OpSize::i64Bit, Element, Element, Src1, Src2);
    }
    default: break;
    }

    ///< Two instruction implementation
    switch (Selector) {
    ///< Fancy double VExtr
    case 0b0'0'0'0'0'1'1'1: {
      auto Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src2, Src1, 6);
      return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 10);
    }
    case 0b0'0'0'1'1'1'1'1: {
      auto Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src2, Src1, 10);
      return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 6);
    }
    case 0b1'1'1'0'0'0'0'0: {
      auto Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src2, 10);
      return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 6);
    }
    case 0b1'1'1'1'1'0'0'0: {
      auto Tmp = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src2, 6);
      return _VExtr(OpSize::i128Bit, OpSize::i8Bit, Tmp, Tmp, 10);
    }
    default: break;
    }

    // TODO: There are some of these swizzles that can be more optimal.
    // NamedConstant + VTBX1 is quite quick already.
    // Implement more if it becomes relevant.
    auto ConstantSwizzle =
      LoadAndCacheIndexedNamedVectorConstant(Size, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW, Selector * 16);
    return _VTBX1(Size, Src1, Src2, ConstantSwizzle);
  }

  FEX_UNREACHABLE;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VectorBlend(OpcodeArgs) {
  uint8_t Select = Op->Src[1].Literal();

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Dest = VectorBlend(OpSize::i128Bit, ElementSize, Dest, Src, Select);
  StoreResultFPR(Op, Dest);
}

template void OpDispatchBuilder::VectorBlend<OpSize::i16Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorBlend<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VectorBlend<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto Mask = LoadXMMRegister(0);

  // Each element is selected by the high bit of that element size
  // Dest[ElementIdx] = Xmm0[ElementIndex][HighBit] ? Src : Dest;
  //
  // To emulate this on AArch64
  // Arithmetic shift right by the element size, then use BSL to select the registers
  Mask = _VSShrI(Size, ElementSize, Mask, IR::OpSizeAsBits(ElementSize) - 1);

  auto Result = _VBSL(Size, Mask, Src, Dest);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::AVXVectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto ElementSizeBits = IR::OpSizeAsBits(ElementSize);

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  // Mask register is encoded within bits [7:4] of the selector
  const auto Src3Selector = Op->Src[2].Literal();
  Ref Mask = LoadXMMRegister((Src3Selector >> 4) & 0b1111);

  Ref Shifted = _VSShrI(SrcSize, ElementSize, Mask, ElementSizeBits - 1);
  Ref Result = _VBSL(SrcSize, Shifted, Src2, Src1);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::PTestOpImpl(OpSize Size, Ref Dest, Ref Src) {
  Ref Test1 = _VAnd(Size, OpSize::i8Bit, Dest, Src);
  Ref Test2 = _VAndn(Size, OpSize::i8Bit, Src, Dest);

  // Element size must be less than 32-bit for the sign bit tricks.
  Test1 = _VUMaxV(Size, OpSize::i16Bit, Test1);
  Test2 = _VUMaxV(Size, OpSize::i16Bit, Test2);

  Test1 = _VExtractToGPR(Size, OpSize::i16Bit, Test1, 0);
  Test2 = _VExtractToGPR(Size, OpSize::i16Bit, Test2, 0);

  Test2 = To01(OpSize::i64Bit, Test2);

  // Careful, these flags are different between {V,}PTEST and VTESTP{S,D}
  // Set ZF according to Test1. SF will be zeroed since we do a 32-bit test on
  // the results of a 16-bit value from the UMaxV, so the 32-bit sign bit is
  // cleared even if the 16-bit scalars were negative.
  SetNZ_ZeroCV(OpSize::i32Bit, Test1);
  SetCFInverted(Test2);
  ZeroPF_AF();
}

void OpDispatchBuilder::PTestOp(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  PTestOpImpl(OpSizeFromSrc(Op), Dest, Src);
}

void OpDispatchBuilder::VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2) {
  LOGMAN_THROW_A_FMT(ElementSize >= IR::OpSize::i8Bit && ElementSize <= IR::OpSize::i64Bit, "Invalid size");
  const auto ElementSizeInBits = IR::OpSizeAsBits(ElementSize);
  const auto MaskConstant = uint64_t {1} << (ElementSizeInBits - 1);

  Ref Mask = _VDupFromGPR(SrcSize, ElementSize, Constant(MaskConstant));

  Ref AndTest = _VAnd(SrcSize, OpSize::i8Bit, Src2, Src1);
  Ref AndNotTest = _VAndn(SrcSize, OpSize::i8Bit, Src2, Src1);

  Ref MaskedAnd = _VAnd(SrcSize, OpSize::i8Bit, AndTest, Mask);
  Ref MaskedAndNot = _VAnd(SrcSize, OpSize::i8Bit, AndNotTest, Mask);

  Ref MaxAnd = _VUMaxV(SrcSize, OpSize::i16Bit, MaskedAnd);
  Ref MaxAndNot = _VUMaxV(SrcSize, OpSize::i16Bit, MaskedAndNot);

  Ref AndGPR = _VExtractToGPR(SrcSize, OpSize::i16Bit, MaxAnd, 0);
  Ref AndNotGPR = _VExtractToGPR(SrcSize, OpSize::i16Bit, MaxAndNot, 0);

  Ref CFInv = To01(OpSize::i64Bit, AndNotGPR);

  // As in PTest, this sets Z appropriately while zeroing the rest of NZCV.
  SetNZ_ZeroCV(OpSize::i32Bit, AndGPR);
  SetCFInverted(CFInv);
  ZeroPF_AF();
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VTESTPOp(OpcodeArgs) {
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  VTESTOpImpl(OpSizeFromSrc(Op), ElementSize, Src1, Src2);
}
template void OpDispatchBuilder::VTESTPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VTESTPOp<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::PHMINPOSUWOpImpl(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  // Setup a vector swizzle
  // Initially load a 64-bit mask of immediates
  // Then zero-extend that to 128-bit mask with the immediates in the lower 16-bits of each element
  auto ConstantSwizzle = LoadAndCacheNamedVectorConstant(Size, FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX);

  // We now need to zip the vector sources together to become two uint32x4_t vectors
  // Upper:
  // [127:96]: ([127:112] << 16) | (7)
  // [95:64] : ([111:96]  << 16) | (6)
  // [63:32] : ([95:80]   << 16) | (5)
  // [31:0]  : ([79:64]   << 16) | (4)

  // Lower:
  // [127:96]: ([63:48] << 16) | (3)
  // [95:64] : ([47:32] << 16) | (2)
  // [63:32] : ([31:16] << 16) | (1)
  // [31:0]  : ([15:0]  << 16) | (0)

  auto ZipLower = _VZip(Size, OpSize::i16Bit, ConstantSwizzle, Src);
  auto ZipUpper = _VZip2(Size, OpSize::i16Bit, ConstantSwizzle, Src);
  // The elements are now 32-bit between two vectors.
  auto MinBetween = _VUMin(Size, OpSize::i32Bit, ZipLower, ZipUpper);

  // Now do a horizontal vector minimum
  auto Min = _VUMinV(Size, OpSize::i32Bit, MinBetween);

  // We now have a value in the bottom 32-bits in the order of:
  // [31:0]: (Src[<Min>] << 16) | <Index>
  // This instruction wants it in the form of:
  // [31:0]: (<Index> << 16) | Src[<Min>]
  // Rev32 does this for us
  return _VRev32(Size, OpSize::i16Bit, Min);
}

void OpDispatchBuilder::PHMINPOSUWOp(OpcodeArgs) {
  Ref Result = PHMINPOSUWOpImpl(Op);
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t Mask, IR::OpSize ElementSize) {
  const auto SizeMask = [ElementSize]() {
    if (ElementSize == OpSize::i32Bit) {
      return 0b1111;
    }
    return 0b11;
  }();

  const uint8_t SrcMask = (Mask >> 4) & SizeMask;
  const uint8_t DstMask = Mask & SizeMask;

  const auto NamedIndexMask = [ElementSize]() {
    if (ElementSize == OpSize::i32Bit) {
      return FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPS_MASK;
    }

    return FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK;
  }();

  Ref ZeroVec = LoadZeroVector(DstSize);
  if (SrcMask == 0 || DstMask == 0) {
    // What are you even doing here? Go away.
    return ZeroVec;
  }

  // First step is to do an FMUL
  Ref Temp = _VFMul(DstSize, ElementSize, Src1, Src2);

  // Now mask results based on IndexMask.
  if (SrcMask != SizeMask) {
    auto InputMask = LoadAndCacheIndexedNamedVectorConstant(DstSize, NamedIndexMask, SrcMask * 16);
    Temp = _VAnd(DstSize, ElementSize, Temp, InputMask);
  }

  // Now due a float reduction
  Temp = _VFAddV(DstSize, ElementSize, Temp);

  // Now using the destination mask we choose where the result ends up
  // It can duplicate and zero results
  if (ElementSize == OpSize::i64Bit) {
    switch (DstMask) {
    case 0b01:
      // Dest[63:0] = Result
      // Dest[127:64] = Zero
      return _VZip(DstSize, ElementSize, Temp, ZeroVec);
    case 0b10:
      // Dest[63:0] = Zero
      // Dest[127:64] = Result
      return _VZip(DstSize, ElementSize, ZeroVec, Temp);
    case 0b11:
      // Broadcast
      // Dest[63:0] = Result
      // Dest[127:64] = Result
      return _VDupElement(DstSize, ElementSize, Temp, 0);
    case 0:
    default: LOGMAN_MSG_A_FMT("Unsupported");
    }
  } else {
    auto BadPath = [&]() {
      Ref Result = ZeroVec;

      for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
        const auto Bit = 1U << (i % 4);

        if ((DstMask & Bit) != 0) {
          Result = _VInsElement(DstSize, ElementSize, i, 0, Result, Temp);
        }
      }

      return Result;
    };
    switch (DstMask) {
    case 0b0001:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
      return _VZip(DstSize, ElementSize, Temp, ZeroVec);
    case 0b0010:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
      return _VZip(DstSize >> 1, ElementSize, ZeroVec, Temp);
    case 0b0011:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
      return _VDupElement(DstSize >> 1, ElementSize, Temp, 0);
    case 0b0100:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Result
      // Dest[127:96] = Zero
      return _VZip(DstSize, OpSize::i64Bit, ZeroVec, Temp);
    case 0b0101:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Result
      // Dest[127:96] = Zero
      return _VZip(DstSize, OpSize::i64Bit, Temp, Temp);
    case 0b0110:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Result
      // Dest[95:64]  = Result
      // Dest[127:96] = Zero
      return BadPath();
    case 0b0111:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Result
      // Dest[95:64]  = Result
      // Dest[127:96] = Zero
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VInsElement(DstSize, ElementSize, 3, 0, Temp, ZeroVec);
    case 0b1000:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Zero
      // Dest[127:96] = Result
      return _VExtr(DstSize, OpSize::i8Bit, Temp, ZeroVec, 4);
    case 0b1001:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Zero
      // Dest[127:96] = Result
      return BadPath();
    case 0b1010:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Result
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VZip(DstSize, OpSize::i32Bit, ZeroVec, Temp);
    case 0b1011:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Result
      // Dest[95:64]  = Zero
      // Dest[127:96] = Result
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VInsElement(DstSize, ElementSize, 2, 0, Temp, ZeroVec);
    case 0b1100:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Result
      // Dest[127:96] = Result
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VZip(DstSize, OpSize::i64Bit, ZeroVec, Temp);
    case 0b1101:
      // Dest[31:0]   = Result
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Result
      // Dest[127:96] = Result
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VInsElement(DstSize, ElementSize, 1, 0, Temp, ZeroVec);
    case 0b1110:
      // Dest[31:0]   = Zero
      // Dest[63:32]  = Result
      // Dest[95:64]  = Result
      // Dest[127:96] = Result
      Temp = _VDupElement(DstSize, ElementSize, Temp, 0);
      return _VInsElement(DstSize, ElementSize, 0, 0, Temp, ZeroVec);
    case 0b1111:
      // Broadcast
      // Dest[31:0]   = Result
      // Dest[63:32]  = Zero
      // Dest[95:64]  = Zero
      // Dest[127:96] = Zero
      return _VDupElement(DstSize, ElementSize, Temp, 0);
    case 0:
    default: LOGMAN_MSG_A_FMT("Unsupported");
    }
  }
  FEX_UNREACHABLE;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::DPPOp(OpcodeArgs) {

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = DPPOpImpl(OpSizeFromDst(Op), Dest, Src, Op->Src[1].Literal(), ElementSize);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::DPPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::DPPOp<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                                   const X86Tables::DecodedOperand& Imm) {
  constexpr auto ElementSize = OpSize::i32Bit;
  const uint8_t Mask = Imm.Literal();
  const uint8_t SrcMask = Mask >> 4;
  const uint8_t DstMask = Mask & 0xF;

  const auto DstSize = OpSizeFromDst(Op);

  Ref Src1V = LoadSourceFPR(Op, Src1, Op->Flags);
  Ref Src2V = LoadSourceFPR(Op, Src2, Op->Flags);

  Ref ZeroVec = LoadZeroVector(DstSize);

  // First step is to do an FMUL
  Ref Temp = _VFMul(DstSize, ElementSize, Src1V, Src2V);

  // Now we zero out elements based on src mask
  for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
    const auto Bit = 1U << (i % 4);

    if ((SrcMask & Bit) == 0) {
      Temp = _VInsElement(DstSize, ElementSize, i, 0, Temp, ZeroVec);
    }
  }

  // Now we need to do a horizontal add of the elements
  // We only have pairwise float add so this needs to be done in steps
  Temp = _VFAddP(DstSize, ElementSize, Temp, ZeroVec);

  if (ElementSize == OpSize::i32Bit) {
    // For 32-bit float we need one more step to add all four results together
    Temp = _VFAddP(DstSize, ElementSize, Temp, ZeroVec);
  }

  // Now using the destination mask we choose where the result ends up
  // It can duplicate and zero results
  Ref Result = ZeroVec;

  for (size_t i = 0; i < IR::NumElements(DstSize, ElementSize); ++i) {
    const auto Bit = 1U << (i % 4);

    if ((DstMask & Bit) != 0) {
      Result = _VInsElement(DstSize, ElementSize, i, 0, Result, Temp);
    }
  }

  return Result;
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VDPPOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);

  Ref Result {};
  if (ElementSize == OpSize::i32Bit && DstSize == OpSize::i256Bit) {
    // 256-bit DPPS isn't handled by the 128-bit solution.
    Result = VDPPSOpImpl(Op, Op->Src[0], Op->Src[1], Op->Src[2]);
  } else {
    Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

    Result = DPPOpImpl(DstSize, Src1, Src2, Op->Src[2].Literal(), ElementSize);
  }

  // We don't need to emit a _VMov to clear the upper lane, since DPPOpImpl uses a zero vector
  // to construct the results, so the upper lane will always be cleared for the 128-bit version.
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VDPPOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VDPPOp<OpSize::i64Bit>(OpcodeArgs);

Ref OpDispatchBuilder::MPSADBWOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, uint8_t Select) {
  const auto LaneHelper = [&, this](uint32_t Selector_Src1, uint32_t Selector_Src2, Ref Src1, Ref Src2) {
    // Src2 will grab a 32bit element and duplicate it across the 128bits
    Ref DupSrc = _VDupElement(OpSize::i128Bit, OpSize::i32Bit, Src2, Selector_Src2);

    // Src1/Dest needs a bunch of magic

    // Shift right by selected bytes
    // This will give us Dest[15:0], and Dest[79:64]
    Ref Dest1 = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src1, Selector_Src1 + 0);
    // This will give us Dest[31:16], and Dest[95:80]
    Ref Dest2 = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src1, Selector_Src1 + 1);
    // This will give us Dest[47:32], and Dest[111:96]
    Ref Dest3 = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src1, Selector_Src1 + 2);
    // This will give us Dest[63:48], and Dest[127:112]
    Ref Dest4 = _VExtr(OpSize::i128Bit, OpSize::i8Bit, Src1, Src1, Selector_Src1 + 3);

    // For each shifted section, we now have two 32-bit values per vector that can be used
    // Dest1.S[0] and Dest1.S[1] = Bytes - 0,1,2,3:4,5,6,7
    // Dest2.S[0] and Dest2.S[1] = Bytes - 1,2,3,4:5,6,7,8
    // Dest3.S[0] and Dest3.S[1] = Bytes - 2,3,4,5:6,7,8,9
    // Dest4.S[0] and Dest4.S[1] = Bytes - 3,4,5,6:7,8,9,10
    Dest1 = _VUABDL(OpSize::i128Bit, OpSize::i8Bit, Dest1, DupSrc);
    Dest2 = _VUABDL(OpSize::i128Bit, OpSize::i8Bit, Dest2, DupSrc);
    Dest3 = _VUABDL(OpSize::i128Bit, OpSize::i8Bit, Dest3, DupSrc);
    Dest4 = _VUABDL(OpSize::i128Bit, OpSize::i8Bit, Dest4, DupSrc);

    // Dest[1,2,3,4] Now contains the data prior to combining
    // Temp[0,1,2,3] for each step

    // Each destination now has 16bit x 8 elements in it that were the absolute difference for each byte
    // Needs each to be 16bit to store the next step
    // Next stage is to sum pairwise
    // Dest1:
    //  ADDP Dest3, Dest1: TmpCombine1
    //  ADDP Dest4, Dest2: TmpCombine2
    //    TmpCombine1.8H[0] = Dest1.8H[0] + Dest1.8H[1];
    //    TmpCombine1.8H[1] = Dest1.8H[2] + Dest1.8H[3];
    //    TmpCombine1.8H[2] = Dest1.8H[4] + Dest1.8H[5];
    //    TmpCombine1.8H[3] = Dest1.8H[6] + Dest1.8H[7];
    //    TmpCombine1.8H[4] = Dest3.8H[0] + Dest3.8H[1];
    //    TmpCombine1.8H[5] = Dest3.8H[2] + Dest3.8H[3];
    //    TmpCombine1.8H[6] = Dest3.8H[4] + Dest3.8H[5];
    //    TmpCombine1.8H[7] = Dest3.8H[6] + Dest3.8H[7];
    //    <Repeat for Dest4 and Dest3>
    auto TmpCombine1 = _VAddP(OpSize::i128Bit, OpSize::i16Bit, Dest1, Dest3);
    auto TmpCombine2 = _VAddP(OpSize::i128Bit, OpSize::i16Bit, Dest2, Dest4);

    // TmpTranspose1:
    // VTrn TmpCombine1, TmpCombine2: TmpTranspose1
    // Transposes Even and odd elements so we can use vaddp for final results.
    auto TmpTranspose1 = _VTrn(OpSize::i128Bit, OpSize::i32Bit, TmpCombine1, TmpCombine2);
    auto TmpTranspose2 = _VTrn2(OpSize::i128Bit, OpSize::i32Bit, TmpCombine1, TmpCombine2);

    // ADDP TmpTranspose1, TmpTranspose2: FinalCombine
    //    FinalCombine.8H[0] = TmpTranspose1.8H[0] + TmpTranspose1.8H[1]
    //    FinalCombine.8H[1] = TmpTranspose1.8H[2] + TmpTranspose1.8H[3]
    //    FinalCombine.8H[2] = TmpTranspose1.8H[4] + TmpTranspose1.8H[5]
    //    FinalCombine.8H[3] = TmpTranspose1.8H[6] + TmpTranspose1.8H[7]
    //    FinalCombine.8H[4] = TmpTranspose2.8H[0] + TmpTranspose2.8H[1]
    //    FinalCombine.8H[5] = TmpTranspose2.8H[2] + TmpTranspose2.8H[3]
    //    FinalCombine.8H[6] = TmpTranspose2.8H[4] + TmpTranspose2.8H[5]
    //    FinalCombine.8H[7] = TmpTranspose2.8H[6] + TmpTranspose2.8H[7]

    return _VAddP(OpSize::i128Bit, OpSize::i16Bit, TmpTranspose1, TmpTranspose2);
  };

  const auto Is128Bit = SrcSize == OpSize::i128Bit;

  // Src1 needs to be in byte offset
  const uint8_t Select_Src1_Low = ((Select & 0b100) >> 2) * 32 / 8;
  const uint8_t Select_Src2_Low = Select & 0b11;

  Ref Lower = LaneHelper(Select_Src1_Low, Select_Src2_Low, Src1, Src2);
  if (Is128Bit) {
    return Lower;
  }

  const uint8_t Select_Src1_High = ((Select & 0b100000) >> 5) * 32 / 8;
  const uint8_t Select_Src2_High = (Select & 0b11000) >> 3;

  Ref UpperSrc1 = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, Src1, 1);
  Ref UpperSrc2 = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, Src2, 1);
  Ref Upper = LaneHelper(Select_Src1_High, Select_Src2_High, UpperSrc1, UpperSrc2);
  return _VInsElement(OpSize::i256Bit, OpSize::i128Bit, 1, 0, Lower, Upper);
}

void OpDispatchBuilder::MPSADBWOp(OpcodeArgs) {
  const uint8_t Select = Op->Src[1].Literal();
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = MPSADBWOpImpl(SrcSize, Src1, Src2, Select);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VMPSADBWOp(OpcodeArgs) {
  const uint8_t Select = Op->Src[2].Literal();
  const auto SrcSize = OpSizeFromSrc(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = MPSADBWOpImpl(SrcSize, Src1, Src2, Select);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VINSERTOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], OpSize::i128Bit, Op->Flags);

  const auto Selector = Op->Src[2].Literal() & 1;
  Ref Result = _VInsElement(DstSize, OpSize::i128Bit, Selector, 0, Src1, Src2);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VCVTPH2PSOp(OpcodeArgs) {
  // In the event that a memory operand is used as the source operand,
  // the access width will always be half the size of the destination vector width
  // (i.e. 128-bit vector -> 64-bit mem, 256-bit vector -> 128-bit mem)
  const auto DstSize = OpSizeFromDst(Op);
  const auto SrcLoadSize = Op->Src[0].IsGPR() ? DstSize : IR::SizeToOpSize(IR::OpSizeToSize(DstSize) / 2);

  Ref Src = LoadSourceFPR_WithOpSize(Op, Op->Src[0], SrcLoadSize, Op->Flags);
  Ref Result = _Vector_FToF(DstSize, OpSize::i32Bit, Src, OpSize::i16Bit);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VCVTPS2PHOp(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto StoreSize = Op->Dest.IsGPR() ? OpSize::i128Bit : IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) / 2);

  const auto Imm8 = Op->Src[1].Literal();
  const auto UseMXCSR = (Imm8 & 0b100) != 0;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  Ref Result = nullptr;
  if (UseMXCSR) {
    Result = _Vector_FToF(SrcSize, OpSize::i16Bit, Src, OpSize::i32Bit);
  } else {
    // No ARM float conversion instructions allow passing in
    // a rounding mode as an immediate. All of them depend on
    // the RM field in the FPCR. And so! We have to do some ugly
    // rounding mode shuffling.
    const auto NewRMode = Imm8 & 0b11;
    Ref SavedFPCR = _PushRoundingMode(NewRMode);

    Result = _Vector_FToF(SrcSize, OpSize::i16Bit, Src, OpSize::i32Bit);
    _PopRoundingMode(SavedFPCR);
  }

  // We need to eliminate upper junk if we're storing into a register with
  // a 256-bit source (VCVTPS2PH's destination for registers is an XMM).
  if (Op->Src[0].IsGPR() && SrcSize == OpSize::i256Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }

  StoreResultFPR_WithOpSize(Op, Op->Dest, Result, StoreSize);
}

void OpDispatchBuilder::VPERM2Op(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  const auto Selector = Op->Src[2].Literal();
  Ref Result = LoadZeroVector(DstSize);

  const auto SelectElement = [&](uint64_t Index, uint64_t SelectorIdx) {
    switch (SelectorIdx) {
    case 0:
    case 1: return _VInsElement(DstSize, OpSize::i128Bit, Index, SelectorIdx, Result, Src1);
    case 2:
    case 3:
    default: return _VInsElement(DstSize, OpSize::i128Bit, Index, SelectorIdx - 2, Result, Src2);
    }
  };

  if ((Selector & 0b00001000) == 0) {
    Result = SelectElement(0, Selector & 0b11);
  }
  if ((Selector & 0b10000000) == 0) {
    Result = SelectElement(1, (Selector >> 4) & 0b11);
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::VPERMDIndices(OpSize DstSize, Ref Indices, Ref IndexMask, Ref Repeating3210) {
  // Get rid of any junk unrelated to the relevant selector index bits (bits [2:0])
  Ref SanitizedIndices = _VAnd(DstSize, OpSize::i8Bit, Indices, IndexMask);

  // Build up the broadcasted index mask. e.g. On x86-64, the selector index
  // is always in the lower 3 bits of a 32-bit element. However, in order to
  // build up a vector we can use with the ARMv8 TBL instruction, we need the
  // selector index for each particular element to be within each byte of the
  // 32-bit element.
  //
  // We can do this by TRN-ing the selector index vector twice. Once using byte elements
  // then once more using half-word elements.
  //
  // The first pass creates the half-word elements, and then the second pass uses those
  // halfword elements to place the indices in the top part of the 32-bit element.
  //
  // e.g. Consider a selector vector with indices in 32-bit elements like:
  //
  // ╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗╔═══════════╗
  // ║     4     ║║     1     ║║     2     ║║     6     ║║     7     ║║     0     ║║     3     ║║     5     ║
  // ╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝╚═══════════╝
  //
  // TRNing once using byte elements by itself will create a vector with 8-bit elements like:
  // ╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗
  // ║ 0 ║║ 0 ║║ 4 ║║ 4 ║║ 0 ║║ 0 ║║ 1 ║║ 1 ║║ 0 ║║ 0 ║║ 2 ║║ 2 ║║ 0 ║║ 0 ║║ 6 ║║ 6 ║║ 0 ║║ 0 ║║ 7 ║║ 7 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 3 ║║ 3 ║║ 0 ║║ 0 ║║ 5 ║║ 5 ║
  // ╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝
  //
  // TRNing once using half-word elements by itself will then transform the vector into:
  // ╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗╔═══╗
  // ║ 4 ║║ 4 ║║ 4 ║║ 4 ║║ 1 ║║ 1 ║║ 1 ║║ 1 ║║ 2 ║║ 2 ║║ 2 ║║ 2 ║║ 6 ║║ 6 ║║ 6 ║║ 6 ║║ 7 ║║ 7 ║║ 7 ║║ 7 ║║ 0 ║║ 0 ║║ 0 ║║ 0 ║║ 3 ║║ 3 ║║ 3 ║║ 3 ║║ 5 ║║ 5 ║║ 5 ║║ 5 ║
  // ╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝╚═══╝
  //
  // Cool! We now have everything we need to take this further.

  Ref IndexTrn1 = _VTrn(DstSize, OpSize::i8Bit, SanitizedIndices, SanitizedIndices);
  Ref IndexTrn2 = _VTrn(DstSize, OpSize::i16Bit, IndexTrn1, IndexTrn1);

  // Now that we have the indices set up, now we need to multiply each
  // element by 4 to convert the elements into byte indices rather than
  // 32-bit word indices.
  //
  // e.g. We turn our vector into:
  // ╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗
  // ║ 16 ║║ 16 ║║ 16 ║║ 16 ║║ 4  ║║ 4  ║║ 4  ║║ 4  ║║ 8  ║║ 8  ║║ 8  ║║ 8  ║║ 24 ║║ 24 ║║ 24 ║║ 24 ║║ 28 ║║ 28 ║║ 28 ║║ 28 ║║ 0  ║║ 0  ║║ 00 ║║ 0  ║║ 12 ║║ 12 ║║ 12 ║║ 12 ║║ 20 ║║ 20 ║║ 20 ║║ 20 ║
  // ╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝
  //
  Ref ShiftedIndices = _VShlI(DstSize, OpSize::i8Bit, IndexTrn2, 2);

  // Now we need to add a byte vector containing [3, 2, 1, 0] repeating for the
  // entire length of it, to the index register, so that we specify the bytes
  // that make up the entire word in the source register.
  //
  // e.g. Our vector finally looks like so:
  //
  // ╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗╔════╗
  // ║ 19 ║║ 18 ║║ 17 ║║ 16 ║║ 7  ║║ 6  ║║ 5  ║║ 4  ║║ 11 ║║ 10 ║║ 9  ║║ 8  ║║ 27 ║║ 26 ║║ 25 ║║ 24 ║║ 31 ║║ 30 ║║ 29 ║║ 28 ║║ 3  ║║ 2  ║║ 01 ║║ 0  ║║ 15 ║║ 14 ║║ 13 ║║ 12 ║║ 23 ║║ 22 ║║ 21 ║║ 20 ║
  // ╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝╚════╝
  //
  // Which finally lets us permute the source vector and be done with everything.
  return _VAdd(DstSize, OpSize::i8Bit, ShiftedIndices, Repeating3210);
}

void OpDispatchBuilder::VPERMDOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);

  Ref Indices = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  // Get rid of any junk unrelated to the relevant selector index bits (bits [2:0])
  Ref IndexMask = _VectorImm(DstSize, OpSize::i32Bit, 0b111);

  Ref AddConst = Constant(0x03020100);
  Ref Repeating3210 = _VDupFromGPR(DstSize, OpSize::i32Bit, AddConst);
  Ref FinalIndices = VPERMDIndices(OpSizeFromDst(Op), Indices, IndexMask, Repeating3210);

  // Now lets finally shuffle this bad boy around.
  Ref Result = _VTBL1(DstSize, Src, FinalIndices);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPERMQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  const auto Selector = Op->Src[1].Literal();
  Ref Result {};

  // If we're just broadcasting one element in particular across the vector
  // then this can be done fairly simply without any individual inserts.
  if (Selector == 0x00 || Selector == 0x55 || Selector == 0xAA || Selector == 0xFF) {
    const auto Index = Selector & 0b11;
    Result = _VDupElement(DstSize, OpSize::i64Bit, Src, Index);
  } else {
    Result = LoadZeroVector(DstSize);
    for (size_t i = 0; i < IR::NumElements(DstSize, IR::OpSize::i64Bit); i++) {
      const auto SrcIndex = (Selector >> (i * 2)) & 0b11;
      Result = _VInsElement(DstSize, OpSize::i64Bit, i, SrcIndex, Result, Src);
    }
  }
  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::VBLENDOpImpl(IR::OpSize VecSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector) {
  const std::array Sources {Src1, Src2};

  Ref Result = ZeroRegister;
  const auto NumElements = IR::NumElements(VecSize, ElementSize);
  for (int i = 0; i < NumElements; i++) {
    const auto SelectorIndex = (Selector >> i) & 1;

    Result = _VInsElement(VecSize, ElementSize, i, i, Result, Sources[SelectorIndex]);
  }

  return Result;
}

void OpDispatchBuilder::VBLENDPDOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  if (Selector == 0) {
    Ref Result = Is256Bit ? Src1 : _VMov(OpSize::i128Bit, Src1);
    StoreResultFPR(Op, Result);
    return;
  }
  // Only the first four bits of the 8-bit immediate are used, so only check them.
  if (((Selector & 0b11) == 0b11 && !Is256Bit) || (Selector & 0b1111) == 0b1111) {
    Ref Result = Is256Bit ? Src2 : _VMov(OpSize::i128Bit, Src2);
    StoreResultFPR(Op, Result);
    return;
  }

  const auto ZeroRegister = LoadZeroVector(DstSize);
  Ref Result = VBLENDOpImpl(DstSize, OpSize::i64Bit, Src1, Src2, ZeroRegister, Selector);
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPBLENDDOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  // Each bit in the selector chooses between Src1 and Src2.
  // If a bit is set, then we select it's corresponding 32-bit element from Src2
  // If a bit is not set, then we select it's corresponding 32-bit element from Src1

  // Cases where we can exit out early, since the selector is indicating a copy
  // of an entire input vector. Unlikely to occur, since it's slower than
  // just an equivalent vector move instruction. but just in case something
  // silly is happening, we have your back.

  if (Selector == 0) {
    Ref Result = Is256Bit ? Src1 : _VMov(OpSize::i128Bit, Src1);
    StoreResultFPR(Op, Result);
    return;
  }
  if (Selector == 0xFF && Is256Bit) {
    StoreResultFPR(Op, Src2);
    return;
  }
  // The only bits we care about from the 8-bit immediate for 128-bit operations
  // are the first four bits. We do a bitwise check here to catch cases where
  // silliness is going on and the upper bits are being set even when they'll
  // be ignored
  if ((Selector & 0xF) == 0xF && !Is256Bit) {
    StoreResultFPR(Op, _VMov(OpSize::i128Bit, Src2));
    return;
  }

  const auto ZeroRegister = LoadZeroVector(DstSize);
  Ref Result = VBLENDOpImpl(DstSize, OpSize::i32Bit, Src1, Src2, ZeroRegister, Selector);
  if (!Is256Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VPBLENDWOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is128Bit = DstSize == OpSize::i128Bit;
  const auto Selector = Op->Src[2].Literal();

  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  if (Selector == 0) {
    Ref Result = Is128Bit ? _VMov(OpSize::i128Bit, Src1) : Src1;
    StoreResultFPR(Op, Result);
    return;
  }
  if (Selector == 0xFF) {
    Ref Result = Is128Bit ? _VMov(OpSize::i128Bit, Src2) : Src2;
    StoreResultFPR(Op, Result);
    return;
  }

  // 256-bit VPBLENDW acts as if the 8-bit selector values were also applied
  // to the upper bits, so we can just replicate the bits by forming a 16-bit
  // imm for the helper function to use.
  const auto NewSelector = Selector << 8 | Selector;

  const auto ZeroRegister = LoadZeroVector(DstSize);
  Ref Result = VBLENDOpImpl(DstSize, OpSize::i16Bit, Src1, Src2, ZeroRegister, NewSelector);
  if (Is128Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VZEROOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto IsVZEROALL = DstSize == OpSize::i256Bit;
  const auto NumRegs = Is64BitMode ? 16U : 8U;

  if (IsVZEROALL) {
    // NOTE: Despite the name being VZEROALL, this will still only ever
    //       zero out up to the first 16 registers (even on AVX-512, where we have 32 registers)

    for (uint32_t i = 0; i < NumRegs; i++) {
      // Explicitly not caching named vector zero. This ensures that every register gets movi #0.0 directly.
      Ref ZeroVector = LoadUncachedZeroVector(DstSize);
      StoreXMMRegister(i, ZeroVector);
    }
  } else {
    // Likewise, VZEROUPPER will only ever zero only up to the first 16 registers

    for (uint32_t i = 0; i < NumRegs; i++) {
      Ref Reg = LoadXMMRegister(i);
      Ref Dst = _VMov(OpSize::i128Bit, Reg);
      StoreXMMRegister(i, Dst);
    }
  }
}

void OpDispatchBuilder::VPERMILImmOp(OpcodeArgs, IR::OpSize ElementSize) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto Is256Bit = DstSize == OpSize::i256Bit;
  const auto Selector = Op->Src[1].Literal() & 0xFF;

  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Result = LoadZeroVector(DstSize);

  if (ElementSize == OpSize::i64Bit) {
    Result = _VInsElement(DstSize, ElementSize, 0, Selector & 0b0001, Result, Src);
    Result = _VInsElement(DstSize, ElementSize, 1, (Selector & 0b0010) >> 1, Result, Src);

    if (Is256Bit) {
      Result = _VInsElement(DstSize, ElementSize, 2, ((Selector & 0b0100) >> 2) + 2, Result, Src);
      Result = _VInsElement(DstSize, ElementSize, 3, ((Selector & 0b1000) >> 3) + 2, Result, Src);
    }
  } else {
    Result = _VInsElement(DstSize, ElementSize, 0, Selector & 0b00000011, Result, Src);
    Result = _VInsElement(DstSize, ElementSize, 1, (Selector & 0b00001100) >> 2, Result, Src);
    Result = _VInsElement(DstSize, ElementSize, 2, (Selector & 0b00110000) >> 4, Result, Src);
    Result = _VInsElement(DstSize, ElementSize, 3, (Selector & 0b11000000) >> 6, Result, Src);

    if (Is256Bit) {
      Result = _VInsElement(DstSize, ElementSize, 4, (Selector & 0b00000011) + 4, Result, Src);
      Result = _VInsElement(DstSize, ElementSize, 5, ((Selector & 0b00001100) >> 2) + 4, Result, Src);
      Result = _VInsElement(DstSize, ElementSize, 6, ((Selector & 0b00110000) >> 4) + 4, Result, Src);
      Result = _VInsElement(DstSize, ElementSize, 7, ((Selector & 0b11000000) >> 6) + 4, Result, Src);
    }
  }

  StoreResultFPR(Op, Result);
}

Ref OpDispatchBuilder::VPERMILRegOpImpl(OpSize DstSize, IR::OpSize ElementSize, Ref Src, Ref Indices) {
  // NOTE: See implementation of VPERMD for the gist of what we do to make this work.
  //
  //       The only difference here is that we need to add 16 to the upper lane
  //       before doing the final addition to build up the indices for TBL.

  const auto Is256Bit = DstSize == OpSize::i256Bit;
  auto IsPD = ElementSize == OpSize::i64Bit;

  if (IsPD) {
    // VPERMILPD stores the selector in the second bit, rather than the
    // first bit of each element in the index vector. So move it over by one.
    Indices = _VUShrI(DstSize, ElementSize, Indices, 1);
  }

  // Sanitize indices first
  const auto ShiftAmount = 0b11 >> static_cast<uint32_t>(IsPD);
  Ref IndexMask = _VectorImm(DstSize, ElementSize, ShiftAmount);
  Ref SanitizedIndices = _VAnd(DstSize, OpSize::i8Bit, Indices, IndexMask);

  Ref IndexTrn1 = _VTrn(DstSize, OpSize::i8Bit, SanitizedIndices, SanitizedIndices);
  Ref IndexTrn2 = _VTrn(DstSize, OpSize::i16Bit, IndexTrn1, IndexTrn1);
  Ref IndexTrn3 = IndexTrn2;
  if (IsPD) {
    IndexTrn3 = _VTrn(DstSize, OpSize::i32Bit, IndexTrn2, IndexTrn2);
  }

  auto IndexShift = IsPD ? 3 : 2;
  Ref ShiftedIndices = _VShlI(DstSize, OpSize::i8Bit, IndexTrn3, IndexShift);

  uint64_t VConstant = IsPD ? 0x0706050403020100 : 0x03020100;
  Ref VectorConst = _VDupFromGPR(DstSize, ElementSize, Constant(VConstant));
  Ref FinalIndices {};

  if (Is256Bit) {
    const auto ZeroRegister = LoadZeroVector(DstSize);
    Ref Vector16 = _VInsElement(DstSize, OpSize::i128Bit, 1, 0, ZeroRegister, _VectorImm(DstSize, OpSize::i8Bit, 16));
    Ref IndexOffsets = _VAdd(DstSize, OpSize::i8Bit, VectorConst, Vector16);

    FinalIndices = _VAdd(DstSize, OpSize::i8Bit, IndexOffsets, ShiftedIndices);
  } else {
    FinalIndices = _VAdd(DstSize, OpSize::i8Bit, VectorConst, ShiftedIndices);
  }

  return _VTBL1(DstSize, Src, FinalIndices);
}

template<IR::OpSize ElementSize>
void OpDispatchBuilder::VPERMILRegOp(OpcodeArgs) {
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Indices = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result = VPERMILRegOpImpl(OpSizeFromDst(Op), ElementSize, Src, Indices);
  StoreResultFPR(Op, Result);
}

template void OpDispatchBuilder::VPERMILRegOp<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VPERMILRegOp<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask) {
  const uint16_t Control = Op->Src[1].Literal();

  // NOTE: Unlike most other SSE/AVX instructions, the SSE4.2 string and text
  //       instructions do *not* require memory operands to be aligned on a 16 byte
  //       boundary (see "Other Exceptions" descriptions for the relevant
  //       instructions in the Intel Software Development Manual).
  //
  //       So, we specify Src2 as having an alignment of 1 to indicate this.
  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Dest, OpSize::i128Bit, Op->Flags);
  Ref Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], OpSize::i128Bit, Op->Flags, {.Align = OpSize::i8Bit});

  Ref IntermediateResult {};
  if (IsExplicit) {
    // Will be 4 in the absence of a REX.W bit and 8 in the presence of a REX.W bit.
    //
    // While the control bit immediate for the instruction itself is only ever 8 bits
    // in size, we use it as a 16-bit value so that we can use the 8th bit to signify
    // whether or not RAX and RDX should be interpreted as a 64-bit value.
    const auto SrcSize = OpSizeFromSrc(Op);
    const auto Is64Bit = SrcSize == OpSize::i64Bit;
    const auto NewControl = uint16_t(Control | (uint16_t(Is64Bit) << 8));

    Ref SrcRAX = LoadGPRRegister(X86State::REG_RAX);
    Ref SrcRDX = LoadGPRRegister(X86State::REG_RDX);

    IntermediateResult = _VPCMPESTRX(Src1, Src2, SrcRAX, SrcRDX, NewControl);
  } else {
    IntermediateResult = _VPCMPISTRX(Src1, Src2, Control);
  }

  Ref ZeroConst = Constant(0);

  if (IsMask) {
    // For the masked variant of the instructions, if control[6] is set, then we
    // need to expand the intermediate result into a byte or word mask (depending
    // on data size specified in control[1]) along the entire length of XMM0,
    // where set bits in the intermediate result set the corresponding entry
    // in XMM0 to all 1s and unset bits set the corresponding entry to all 0s.
    //
    // If control[6] is not set, then we just store the intermediate result as-is
    // into the least significant bits of XMM0 and zero extend it.
    const auto IsExpandedMask = (Control & 0b0100'0000) != 0;

    if (IsExpandedMask) {
      // We need to iterate over the intermediate result and
      // expand the mask into XMM0 elements.
      const auto ElementSize = 1U << (Control & 1);
      const auto NumElements = 16U >> (Control & 1);

      Ref Result = LoadZeroVector(OpSize::i128Bit);
      for (uint32_t i = 0; i < NumElements; i++) {
        Ref SignBit = _Sbfe(OpSize::i64Bit, 1, i, IntermediateResult);
        Result = _VInsGPR(OpSize::i128Bit, IR::SizeToOpSize(ElementSize), i, Result, SignBit);
      }
      StoreXMMRegister(0, Result);
    } else {
      // We insert the intermediate result as-is.
      StoreXMMRegister(0, _VCastFromGPR(OpSize::i128Bit, OpSize::i16Bit, IntermediateResult));
    }
  } else {
    // For the indexed variant of the instructions, if control[6] is set, then we
    // store the index of the most significant bit into ECX. If it's not set,
    // then we store the least significant bit.
    const auto UseMSBIndex = (Control & 0b0100'0000) != 0;

    Ref ResultNoFlags = _Bfe(OpSize::i32Bit, 16, 0, IntermediateResult);

    Ref IfZero = Constant(16 >> (Control & 1));
    Ref IfNotZero = UseMSBIndex ? _FindMSB(IR::OpSize::i32Bit, ResultNoFlags) : _FindLSB(IR::OpSize::i32Bit, ResultNoFlags);
    Ref Result = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, ResultNoFlags, ZeroConst, IfZero, IfNotZero);

    // Store the result, it is already zero-extended to 64-bit implicitly.
    StoreGPRRegister(X86State::REG_RCX, Result);
  }

  // Set all of the necessary flags. NZCV stored in bits 28...31 like the hw op.
  SetNZCV(IntermediateResult);
  CFInverted = false;
  ZeroPF_AF();
}

void OpDispatchBuilder::VPCMPESTRIOp(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, true, false);
}
void OpDispatchBuilder::VPCMPESTRMOp(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, true, true);
}
void OpDispatchBuilder::VPCMPISTRIOp(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, false, false);
}
void OpDispatchBuilder::VPCMPISTRMOp(OpcodeArgs) {
  PCMPXSTRXOpImpl(Op, false, true);
}

void OpDispatchBuilder::VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = OpSizeFromDst(Op);
  const auto Is256Bit = Size == OpSize::i256Bit;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  Ref Dest = LoadSourceFPR_WithOpSize(Op, Op->Dest, Size, Op->Flags);
  Ref Src1 = LoadSourceFPR_WithOpSize(Op, Op->Src[0], Size, Op->Flags);
  Ref Src2 {};
  if (Op->Src[1].IsGPR()) {
    Src2 = LoadSourceFPR_WithOpSize(Op, Op->Src[1], Size, Op->Flags);
  } else {
    Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);
  }

  Ref Sources[3] = {
    Dest,
    Src1,
    Src2,
  };

  DeriveOp(FMAResult, IROp, _VFMLA(Size, ElementSize, Sources[Src1Idx - 1], Sources[Src2Idx - 1], Sources[AddendIdx - 1]));
  Ref Result = FMAResult;
  if (Scalar) {
    // Special case, scalar inserts in to the low bits of the destination.
    Result = _VInsElement(OpSize::i128Bit, ElementSize, 0, 0, Dest, Result);
  }

  if (!Is256Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx) {
  const auto Size = OpSizeFromDst(Op);
  const auto Is256Bit = Size == OpSize::i256Bit;

  const OpSize ElementSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src1 = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  Ref Src2 = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Sources[3] = {
    Dest,
    Src1,
    Src2,
  };

  Ref ConstantEOR {};
  if (AddSub) {
    ConstantEOR =
      LoadAndCacheNamedVectorConstant(Size, ElementSize == OpSize::i32Bit ? NAMED_VECTOR_PADDSUBPS_INVERT : NAMED_VECTOR_PADDSUBPD_INVERT);
  } else {
    ConstantEOR =
      LoadAndCacheNamedVectorConstant(Size, ElementSize == OpSize::i32Bit ? NAMED_VECTOR_PSUBADDPS_INVERT : NAMED_VECTOR_PSUBADDPD_INVERT);
  }

  auto InvertedSourc = _VXor(Size, ElementSize, Sources[AddendIdx - 1], ConstantEOR);

  Ref Result = _VFMLA(Size, ElementSize, Sources[Src1Idx - 1], Sources[Src2Idx - 1], InvertedSourc);
  if (!Is256Bit) {
    Result = _VMov(OpSize::i128Bit, Result);
  }
  StoreResultFPR(Op, Result);
}

OpDispatchBuilder::RefVSIB OpDispatchBuilder::LoadVSIB(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags) {
  const bool IsVSIB = (Op->Flags & X86Tables::DecodeFlags::FLAG_VSIB_BYTE) != 0;
  LOGMAN_THROW_A_FMT((Operand.IsSIB() || Operand.IsSIBRelocation()) && IsVSIB, "Trying to load VSIB for something that isn't the correct "
                                                                               "type!");

  // VSIB is a very special case which has a ton of encoded data.
  // Get it in a format we can reason about.

  const auto Index_gpr = Operand.Data.SIB.Index;
  const auto Base_gpr = Operand.Data.SIB.Base;
  LOGMAN_THROW_A_FMT(Index_gpr >= FEXCore::X86State::REG_XMM_0 && Index_gpr <= FEXCore::X86State::REG_XMM_15, "must be AVX reg");
  LOGMAN_THROW_A_FMT(Base_gpr == FEXCore::X86State::REG_INVALID || (Base_gpr >= FEXCore::X86State::REG_RAX && Base_gpr <= FEXCore::X86State::REG_R15),
                     "Base must be a GPR.");
  const auto Index_XMM_gpr = Index_gpr - X86State::REG_XMM_0;

  OpDispatchBuilder::RefVSIB A {
    .Low = LoadXMMRegister(Index_XMM_gpr),
    .BaseAddr = Base_gpr != FEXCore::X86State::REG_INVALID ? LoadGPRRegister(Base_gpr, OpSize::i64Bit, 0, false) : nullptr,
    .Scale = Operand.Data.SIB.Scale,
  };

  if (Operand.IsSIBRelocation()) {
    auto EPOffset = _EntrypointOffset(OpSize::i64Bit, Operand.Data.SIB.Offset);
    if (A.BaseAddr) {
      A.BaseAddr = Add(OpSize::i64Bit, EPOffset, A.BaseAddr);
    } else {
      A.BaseAddr = EPOffset;
    }
  } else {
    A.Displacement = static_cast<int32_t>(Operand.Data.SIB.Offset);
  }

  return A;
}

template<OpSize AddrElementSize>
void OpDispatchBuilder::VPGATHER(OpcodeArgs) {
  LOGMAN_THROW_A_FMT(AddrElementSize == OpSize::i32Bit || AddrElementSize == OpSize::i64Bit, "Unknown address element size");

  const auto Size = OpSizeFromDst(Op);
  const auto Is128Bit = Size == OpSize::i128Bit;
  const auto GPRSize = GetGPROpSize();
  auto AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize;

  ///< Element size is determined by W flag.
  const OpSize ElementLoadSize = Op->Flags & X86Tables::DecodeFlags::FLAG_OPTION_AVX_W ? OpSize::i64Bit : OpSize::i32Bit;

  // We only need the high address register if the number of data elements is more than what the low half can consume.
  // But also the number of address elements is clamped by the destination size as well.
  const size_t NumDataElements = IR::NumElements(Size, ElementLoadSize);
  const size_t NumAddrElementBytes = std::min<size_t>(IR::OpSizeToSize(Size), (NumDataElements * IR::OpSizeToSize(AddrElementSize)));
  const bool Needs128BitHighAddrBytes = NumAddrElementBytes > IR::OpSizeToSize(OpSize::i128Bit);

  auto VSIB = LoadVSIB(Op, Op->Src[0], Op->Flags);

  const bool SupportsSVELoad = (VSIB.Scale == 1 || VSIB.Scale == IR::OpSizeToSize(AddrElementSize)) && (AddrElementSize == ElementLoadSize);

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Mask = LoadSourceFPR(Op, Op->Src[1], Op->Flags);

  Ref Result {};
  if (!SupportsSVELoad) {
    // We need to go down the fallback path in the case that we don't hit the backend's SVE mode.
    RefPair Dest128 {
      .Low = Dest,
      .High = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, Dest, 1),
    };

    RefPair Mask128 {
      .Low = Mask,
      .High = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, Mask, 1),
    };

    RefVSIB VSIB128 = VSIB;
    VSIB128.High = Invalid();

    if (Needs128BitHighAddrBytes) {
      if (Is128Bit) {
        ///< A bit careful for the VSIB index register duplicating.
        VSIB128.High = VSIB128.Low;
      } else {
        VSIB128.High = _VDupElement(OpSize::i256Bit, OpSize::i128Bit, VSIB128.Low, 1);
      }
    }

    auto Result128 = AVX128_VPGatherImpl(Op, Size, ElementLoadSize, AddrElementSize, Dest128, Mask128, VSIB128);
    // The registers are current split, need to merge them.
    Result = _VInsElement(OpSize::i256Bit, OpSize::i128Bit, 1, 0, Result128.Low, Result128.High);
  } else {
    ///< Calculate the full operation.
    ///< BaseAddr doesn't need to exist, calculate that here.
    Ref BaseAddr = VSIB.BaseAddr;
    if (BaseAddr && VSIB.Displacement) {
      BaseAddr = Add(OpSize::i64Bit, BaseAddr, VSIB.Displacement);
    } else if (VSIB.Displacement) {
      BaseAddr = Constant(VSIB.Displacement);
    } else if (!BaseAddr) {
      BaseAddr = Invalid();
    }

    Result =
      _VLoadVectorGatherMasked(Size, ElementLoadSize, Dest, Mask, BaseAddr, VSIB.Low, Invalid(), AddrElementSize, VSIB.Scale, 0, 0, AddrSize);
  }

  if (Is128Bit) {
    if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
      // Special case for the 128-bit gather load using 64-bit address indexes with 32-bit results.
      // Only loads two 32-bit elements in to the lower 64-bits of the first destination.
      // Bits [255:65] all become zero.
      Result = _VMov(OpSize::i64Bit, Result);
    } else {
      Result = _VMov(OpSize::i128Bit, Result);
    }
  } else {
    if (AddrElementSize == OpSize::i64Bit && ElementLoadSize == OpSize::i32Bit) {
      // If we only fetched 128-bits worth of data then the upper-result is all zero.
      Result = _VMov(OpSize::i128Bit, Result);
    }
  }

  StoreResultFPR(Op, Result);

  ///< Assume non-faulting behaviour and clear the mask register.
  auto Zero = LoadZeroVector(Size);
  StoreResultFPR_WithOpSize(Op, Op->Src[1], Zero, Size);
}

template void OpDispatchBuilder::VPGATHER<OpSize::i32Bit>(OpcodeArgs);
template void OpDispatchBuilder::VPGATHER<OpSize::i64Bit>(OpcodeArgs);

void OpDispatchBuilder::Extrq_imm(OpcodeArgs) {
  const uint8_t MaskWidth = Op->Src[1].Literal() & 0x3F;
  const uint8_t Shift = (Op->Src[1].Literal() >> 8) & 0x3F;

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Result = Dest;
  if (Shift > 0) {
    Result = _VUShrI(OpSize::i64Bit, OpSize::i64Bit, Dest, Shift);
  }

  const uint64_t Mask = ~0ULL >> (MaskWidth == 0 ? 0 : (64 - MaskWidth));
  const Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask));
  Result = _VAnd(OpSize::i128Bit, OpSize::i64Bit, Result, MaskVector);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::Insertq_imm(OpcodeArgs) {
  const uint8_t MaskWidth = Op->Src[1].Literal() & 0x3F;
  const uint8_t Shift = (Op->Src[1].Literal() >> 8) & 0x3F;

  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  const uint64_t Mask = ~0ULL >> (MaskWidth == 0 ? 0 : (64 - MaskWidth));
  Ref MaskVector = _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _Constant(Mask));

  // Mask incoming source.
  Src = _VAnd(OpSize::i64Bit, OpSize::i64Bit, Src, MaskVector);

  // If shifting then shift source and mask in to the correct location.
  if (Shift) {
    Src = _VShlI(OpSize::i64Bit, OpSize::i64Bit, Src, Shift);
    MaskVector = _VShlI(OpSize::i128Bit, OpSize::i64Bit, MaskVector, Shift);
  }

  // Negate the mask.
  MaskVector = _VNot(OpSize::i64Bit, OpSize::i64Bit, MaskVector);

  Dest = _VAnd(OpSize::i64Bit, OpSize::i64Bit, Dest, MaskVector);
  const Ref Result = _VOr(OpSize::i64Bit, OpSize::i64Bit, Dest, Src);

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::Extrq(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  const Ref ElementMask = _VectorImm(OpSize::i64Bit, OpSize::i64Bit, 0x3F);

  auto GenerateMask = [this](Ref VectorWidthInBits) -> Ref {
    const Ref VectorWidth = _VExtractToGPR(OpSize::i64Bit, OpSize::i64Bit, VectorWidthInBits, 0);
    return _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _MaskGenerateFromBitWidth(VectorWidth));
  };

  // Bits[5:0] = Mask width in bits
  const Ref MaskWidthBits = _VAnd(OpSize::i64Bit, OpSize::i64Bit, Src, ElementMask);

  // Bits[13:8] = Shift right in bits
  const Ref ShiftBits = _VAnd(OpSize::i64Bit, OpSize::i64Bit, _VUShrI(OpSize::i64Bit, OpSize::i64Bit, Src, 8), ElementMask);

  // First shift in to the correct position.
  Ref Result = _VUShr(OpSize::i64Bit, OpSize::i64Bit, Dest, ShiftBits, false);

  Result = _VAnd(OpSize::i128Bit, OpSize::i64Bit, Result, GenerateMask(MaskWidthBits));

  StoreResultFPR(Op, Result);
}

void OpDispatchBuilder::Insertq(OpcodeArgs) {
  Ref Dest = LoadSourceFPR(Op, Op->Dest, Op->Flags);
  Ref Src = LoadSourceFPR(Op, Op->Src[0], Op->Flags);

  auto SelectorBits = _VDupElement(OpSize::i128Bit, OpSize::i64Bit, Src, 1);

  const Ref ElementMask = _VectorImm(OpSize::i64Bit, OpSize::i64Bit, 0x3F);

  auto GenerateMask = [this](Ref VectorWidthInBits) -> Ref {
    const Ref VectorWidth = _VExtractToGPR(OpSize::i64Bit, OpSize::i64Bit, VectorWidthInBits, 0);
    return _VCastFromGPR(OpSize::i128Bit, OpSize::i64Bit, _MaskGenerateFromBitWidth(VectorWidth));
  };

  // Bits[5:0] = Mask width in bits
  const Ref MaskWidthBits = _VAnd(OpSize::i64Bit, OpSize::i64Bit, SelectorBits, ElementMask);

  // Bits[13:8] = Shift right in bits
  const Ref ShiftBits = _VAnd(OpSize::i64Bit, OpSize::i64Bit, _VUShrI(OpSize::i64Bit, OpSize::i64Bit, SelectorBits, 8), ElementMask);

  // Extract the source data and put in to the correct location
  const Ref SrcMask = GenerateMask(MaskWidthBits);
  Ref SrcData = _VAnd(OpSize::i128Bit, OpSize::i64Bit, Src, SrcMask);
  SrcData = _VUShl(OpSize::i128Bit, OpSize::i64Bit, SrcData, ShiftBits, false);

  // Generate a destination mask
  const Ref DstMask = _VNot(OpSize::i64Bit, OpSize::i64Bit, _VUShl(OpSize::i128Bit, OpSize::i64Bit, SrcMask, ShiftBits, false));

  Ref Result = _VAnd(OpSize::i64Bit, OpSize::i64Bit, Dest, DstMask);
  Result = _VOr(OpSize::i64Bit, OpSize::i64Bit, Result, SrcData);
  StoreResultFPR(Op, Result);
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 x87 to IR
$end_info$
*/

#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"
#include "Interface/Core/Addressing.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/FPState.h>

#include <stddef.h>
#include <stdint.h>

namespace FEXCore::IR {
class OrderedNode;
#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

Ref OpDispatchBuilder::GetX87Top() {
  // Yes, we are storing 3 bits in a single flag register.
  // Deal with it
  return _LoadContextGPR(OpSize::i8Bit, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC);
}

void OpDispatchBuilder::SetX87FTW(Ref FTW) {
  _StackForceSlow(); // Invalidate x87 FTW register cache

  // For the output, we want a 1-bit for each pair not equal to 11 (Empty).
  static_assert(static_cast<uint8_t>(FPState::X87Tag::Empty) == 0b11);

  // Make even bits 1 if the pair is equal to 11, and 0 otherwise.
  FTW = _AndShift(OpSize::i32Bit, FTW, FTW, ShiftType::LSR, 1);

  // Invert FTW and clear the odd bits. Even bits are 1 if the pair
  // is not equal to 11, and odd bits are 0.
  FTW = _Andn(OpSize::i32Bit, Constant(0x55555555), FTW);

  // All that's left is to compact away the odd bits. That is a Morton
  // deinterleave operation, which has a standard solution. See
  // https://stackoverflow.com/questions/3137266/how-to-de-interleave-bits-unmortonizing
  FTW = _And(OpSize::i32Bit, _Orlshr(OpSize::i32Bit, FTW, FTW, 1), Constant(0x33333333));
  FTW = _And(OpSize::i32Bit, _Orlshr(OpSize::i32Bit, FTW, FTW, 2), Constant(0x0f0f0f0f));
  FTW = _Orlshr(OpSize::i32Bit, FTW, FTW, 4);

  // ...and that's it. StoreContext implicitly does the final masking.
  _StoreContextGPR(OpSize::i8Bit, FTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
}

void OpDispatchBuilder::SetX87Top(Ref Value) {
  _StoreContextGPR(OpSize::i8Bit, Value, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC);
}

// Float LoaD operation with memory operand
void OpDispatchBuilder::FLD(OpcodeArgs, IR::OpSize Width) {
  Ref Data = LoadSourceFPR_WithOpSize(Op, Op->Src[0], Width, Op->Flags);
  Ref ConvertedData = Data;
  // Convert to 80bit float
  if (Width == OpSize::i32Bit || Width == OpSize::i64Bit) {
    ConvertedData = _F80CVTTo(Data, Width);
  }
  _PushStack(ConvertedData, Data, Width);
}

// Float LoaD operation with memory operand
void OpDispatchBuilder::FLDFromStack(OpcodeArgs) {
  _CopyPushStack(Op->OP & 7);
}

void OpDispatchBuilder::FBLD(OpcodeArgs) {
  // Read from memory
  Ref Data = LoadSourceFPR_WithOpSize(Op, Op->Src[0], OpSize::f80Bit, Op->Flags);
  Ref ConvertedData = _F80BCDLoad(Data);
  _PushStack(ConvertedData, Invalid(), OpSize::iInvalid);
}

void OpDispatchBuilder::FBSTP(OpcodeArgs) {
  Ref converted = _F80BCDStore(_ReadStackValue(0));
  StoreResultFPR_WithOpSize(Op, Op->Dest, converted, OpSize::f80Bit, OpSize::i8Bit);
  _PopStackDestroy();
}

void OpDispatchBuilder::FLD_Const(OpcodeArgs, NamedVectorConstant K) {
  // Update TOP
  Ref Data = LoadAndCacheNamedVectorConstant(OpSize::i128Bit, K);
  _PushStack(Data, Data, OpSize::f80Bit);
}

void OpDispatchBuilder::FILD(OpcodeArgs) {
  const auto ReadWidth = OpSizeFromSrc(Op);
  // Read from memory
  Ref Data = LoadSourceGPR_WithOpSize(Op, Op->Src[0], ReadWidth, Op->Flags);

  // Sign extend to 64bits
  if (ReadWidth != OpSize::i64Bit) {
    Data = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(ReadWidth), 0, Data);
  }

  // We're about to clobber flags to grab the sign, so save NZCV.
  SaveNZCV();

  // Extract sign and make integer absolute
  auto zero = Constant(0);
  _SubNZCV(OpSize::i64Bit, Data, zero);
  auto sign = _NZCVSelect(OpSize::i64Bit, CondClass::SLT, Constant(0x8000), zero);
  auto absolute = _Neg(OpSize::i64Bit, Data, CondClass::MI);

  // left justify the absolute integer
  auto shift = Sub(OpSize::i64Bit, Constant(63), _FindMSB(IR::OpSize::i64Bit, absolute));
  auto shifted = _Lshl(OpSize::i64Bit, absolute, shift);

  auto adjusted_exponent = Sub(OpSize::i64Bit, Constant(0x3fff + 63), shift);
  auto zeroed_exponent = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, absolute, zero, zero, adjusted_exponent);
  auto upper = _Or(OpSize::i64Bit, sign, zeroed_exponent);

  Ref ConvertedData = _VLoadTwoGPRs(shifted, upper);
  _PushStack(ConvertedData, Invalid(), OpSize::iInvalid);
}

void OpDispatchBuilder::FST(OpcodeArgs, IR::OpSize Width) {
  LOGMAN_THROW_A_FMT(Width == OpSize::i32Bit || Width == OpSize::i64Bit || Width == OpSize::f80Bit, "Invalid store width for FST");
  const auto SourceSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::f80Bit;
  AddressMode A = DecodeAddress(Op, Op->Dest, MemoryAccessType::DEFAULT, false);

  A = SelectAddressMode(this, A, GetGPROpSize(), CTX->HostFeatures.SupportsTSOImm9, false, false, Width);
  _StoreStackMem(SourceSize, Width, A.Base, A.Index, OpSize::iInvalid, A.IndexType, A.IndexScale);

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FSTToStack(OpcodeArgs) {
  const uint8_t Offset = Op->OP & 7;
  if (Offset != 0) {
    _StoreStackToStack(Offset);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

// Store integer to memory (possibly with truncation)
void OpDispatchBuilder::FIST(OpcodeArgs, bool Truncate) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Data = _ReadStackValue(0);

  // For 16-bit integers, we need to manually check for overflow
  // since _F80CVTInt doesn't handle 16-bit overflow detection properly
  if (Size == OpSize::i16Bit) {
    // Extract the 80-bit float value to check for special cases
    // Get the upper 64 bits which contain sign and exponent and then the exponent from upper.
    Ref Upper = _VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, Data, 1);
    Ref Exponent = _And(OpSize::i64Bit, Upper, Constant(0x7fff));

    // Check for NaN/Infinity: exponent = 0x7fff
    SaveNZCV();
    _TestNZ(OpSize::i64Bit, Exponent, Constant(0x7fff));
    Ref IsSpecial = _NZCVSelect01(CondClass::EQ);

    // For overflow detection, check if exponent indicates a value >= 2^15
    // Biased exponent for 2^15 is 0x3fff + 15 = 0x400e
    SubWithFlags(OpSize::i64Bit, Exponent, 0x400e);
    Ref IsOverflow = _NZCVSelect01(CondClass::UGE);

    // Set Invalid Operation flag if overflow or special value
    Ref InvalidFlag = _Or(OpSize::i64Bit, IsSpecial, IsOverflow);
    SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(InvalidFlag);
  }

  Data = _F80CVTInt(Size, Data, Truncate);

  StoreResultGPR_WithOpSize(Op, Op->Dest, Data, Size, OpSize::i8Bit);

  if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FADD(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) { // Implicit argument case
    auto Offset = Op->OP & 7;
    auto St0 = 0;
    if (ResInST0 == OpResult::RES_STI) {
      _F80AddStack(Offset, St0);
    } else {
      _F80AddStack(St0, Offset);
    }
    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory");
  // We have one memory argument
  Ref Arg {};
  if (Integer) {
    Arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    Arg = _F80CVTToInt(Arg, Width);
  } else {
    Arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Arg = _F80CVTTo(Arg, Width);
  }

  // top of stack is at offset zero
  _F80AddValue(0, Arg);
}

void OpDispatchBuilder::FMUL(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) { // Implicit argument case
    auto offset = Op->OP & 7;
    auto st0 = 0;
    if (ResInST0 == OpResult::RES_STI) {
      _F80MulStack(offset, st0);
    } else {
      _F80MulStack(st0, offset);
    }
    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory");
  // We have one memory argument
  Ref arg {};
  if (Integer) {
    arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    arg = _F80CVTToInt(arg, Width);
  } else {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    arg = _F80CVTTo(arg, Width);
  }

  // top of stack is at offset zero
  _F80MulValue(0, arg);

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) {
    const uint8_t Offset = Op->OP & 7;
    const uint8_t St0 = 0;
    const uint8_t Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

    if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
      _F80DivStack(Result, Offset, St0);
    } else {
      _F80DivStack(Result, St0, Offset);
    }

    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory");
  // We have one memory argument
  Ref arg {};
  if (Integer) {
    arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    arg = _F80CVTToInt(arg, Width);
  } else {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    arg = _F80CVTTo(arg, Width);
  }

  // top of stack is at offset zero
  if (Reverse) {
    _F80DivRValue(arg, 0);
  } else {
    _F80DivValue(0, arg);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) {
    const auto Offset = Op->OP & 7;
    const auto St0 = 0;
    const auto Result = (ResInST0 == OpResult::RES_STI) ? Offset : St0;

    if (Reverse ^ (ResInST0 == OpResult::RES_STI)) {
      _F80SubStack(Result, Offset, St0);
    } else {
      _F80SubStack(Result, St0, Offset);
    }

    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  LOGMAN_THROW_A_FMT(Width != OpSize::f80Bit, "No 80-bit floats from memory");
  // We have one memory argument
  Ref Arg {};
  if (Integer) {
    Arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    Arg = _F80CVTToInt(Arg, Width);
  } else {
    Arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    Arg = _F80CVTTo(Arg, Width);
  }

  // top of stack is at offset zero
  if (Reverse) {
    _F80SubRValue(Arg, 0);
  } else {
    _F80SubValue(0, Arg);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

Ref OpDispatchBuilder::GetX87FTW_Helper() {
  // AbridgedFTWIndex has 1-bit per slot (8 slots). Duplicate each bit to get
  // 2-bits per slot (16-bit result). Duplicating bits is equivalent to
  // Morton interleaving a number with itself. To interleave efficiently two
  // bytes, we use the well-known bit twiddling algorithm:
  //
  // https://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
  Ref X = _LoadContextGPR(OpSize::i8Bit, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
  X = _Orlshl(OpSize::i32Bit, X, X, 4);
  X = _And(OpSize::i32Bit, X, Constant(0x0f0f0f0f));
  X = _Orlshl(OpSize::i32Bit, X, X, 2);
  X = _And(OpSize::i32Bit, X, Constant(0x33333333));
  X = _Orlshl(OpSize::i32Bit, X, X, 1);
  X = _And(OpSize::i32Bit, X, Constant(0x55555555));
  X = _Orlshl(OpSize::i32Bit, X, X, 1);

  // The above sequence sets valid to 11 and empty to 00, so invert to finalize.
  static_assert(static_cast<uint8_t>(FPState::X87Tag::Valid) == 0b00);
  static_assert(static_cast<uint8_t>(FPState::X87Tag::Empty) == 0b11);
  return _Xor(OpSize::i32Bit, X, Constant(0xffff));
}

void OpDispatchBuilder::X87FNSTENV(OpcodeArgs) {


  // 14 bytes for 16bit
  // 2 Bytes : FCW
  // 2 Bytes : FSW
  // 2 bytes : FTW
  // 2 bytes : Instruction offset
  // 2 bytes : Instruction CS selector
  // 2 bytes : Data offset
  // 2 bytes : Data selector

  // 28 bytes for 32bit
  // 4 bytes : FCW
  // 4 bytes : FSW
  // 4 bytes : FTW
  // 4 bytes : Instruction pointer
  // 2 bytes : Instruction pointer selector
  // 2 bytes : Opcode
  // 4 bytes : data pointer offset
  // 4 bytes : data pointer selector

  // Before we store anything we need to sync our stack to the registers.
  _SyncStackToSlow();

  const auto Size = OpSizeFromSrc(Op);
  Ref Mem = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});
  Mem = AppendSegmentOffset(Mem, Op->Flags);

  {
    auto FCW = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, FCW));
    _StoreMemGPR(Size, Mem, FCW, Size);
  }

  { _StoreMemGPR(Size, ReconstructFSW_Helper(), Mem, Constant(IR::OpSizeToSize(Size) * 1), Size, MemOffsetType::SXTX, 1); }

  auto ZeroConst = Constant(0);

  {
    // FTW
    _StoreMemGPR(Size, GetX87FTW_Helper(), Mem, Constant(IR::OpSizeToSize(Size) * 2), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Instruction Offset
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 3), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Instruction CS selector (+ Opcode)
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 4), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Data pointer offset
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 5), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Data pointer selector
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 6), Size, MemOffsetType::SXTX, 1);
  }
}

Ref OpDispatchBuilder::ReconstructX87StateFromFSW_Helper(Ref FSW) {
  auto Top = _Bfe(OpSize::i32Bit, 3, 11, FSW);
  SetX87Top(Top);

  auto C0 = _Bfe(OpSize::i32Bit, 1, 8, FSW);
  auto C1 = _Bfe(OpSize::i32Bit, 1, 9, FSW);
  auto C2 = _Bfe(OpSize::i32Bit, 1, 10, FSW);
  auto C3 = _Bfe(OpSize::i32Bit, 1, 14, FSW);
  auto IE = _Bfe(OpSize::i32Bit, 1, 0, FSW);

  SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(C0);
  SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(C1);
  SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(C2);
  SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(C3);
  SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(IE);
  return Top;
}

void OpDispatchBuilder::X87LDENV(OpcodeArgs) {
  _StackForceSlow();

  const auto Size = OpSizeFromSrc(Op);
  Ref Mem = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.LoadData = false});
  Mem = AppendSegmentOffset(Mem, Op->Flags);

  auto NewFCW = _LoadMemGPR(OpSize::i16Bit, Mem, OpSize::i16Bit);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

  Ref MemLocation = Add(OpSize::i64Bit, Mem, IR::OpSizeToSize(Size) * 1);
  auto NewFSW = _LoadMemGPR(Size, MemLocation, Size);
  ReconstructX87StateFromFSW_Helper(NewFSW);

  {
    // FTW
    Ref MemLocation = Add(OpSize::i64Bit, Mem, IR::OpSizeToSize(Size) * 2);
    SetX87FTW(_LoadMemGPR(Size, MemLocation, Size));
  }
}

void OpDispatchBuilder::X87FNSAVE(OpcodeArgs) {
  _SyncStackToSlow();

  // 14 bytes for 16bit
  // 2 Bytes : FCW
  // 2 Bytes : FSW
  // 2 bytes : FTW
  // 2 bytes : Instruction offset
  // 2 bytes : Instruction CS selector
  // 2 bytes : Data offset
  // 2 bytes : Data selector

  // 28 bytes for 32bit
  // 4 bytes : FCW
  // 4 bytes : FSW
  // 4 bytes : FTW
  // 4 bytes : Instruction pointer
  // 2 bytes : instruction pointer selector
  // 2 bytes : Opcode
  // 4 bytes : data pointer offset
  // 4 bytes : data pointer selector
  const auto Size = OpSizeFromDst(Op);
  Ref Mem = MakeSegmentAddress(Op, Op->Dest);
  Ref Top = GetX87Top();
  {
    auto FCW = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, FCW));
    _StoreMemGPR(Size, Mem, FCW, Size);
  }

  { _StoreMemGPR(Size, ReconstructFSW_Helper(), Mem, Constant(IR::OpSizeToSize(Size) * 1), Size, MemOffsetType::SXTX, 1); }

  auto ZeroConst = Constant(0);

  {
    // FTW
    _StoreMemGPR(Size, GetX87FTW_Helper(), Mem, Constant(IR::OpSizeToSize(Size) * 2), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Instruction Offset
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 3), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Instruction CS selector (+ Opcode)
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 4), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Data pointer offset
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 5), Size, MemOffsetType::SXTX, 1);
  }

  {
    // Data pointer selector
    _StoreMemGPR(Size, ZeroConst, Mem, Constant(IR::OpSizeToSize(Size) * 6), Size, MemOffsetType::SXTX, 1);
  }

  auto SevenConst = Constant(7);
  const auto LoadSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  for (int i = 0; i < 7; ++i) {
    Ref data = _LoadContextFPRIndexed(Top, LoadSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit));
    if (ReducedPrecisionMode) {
      data = _F80CVTTo(data, OpSize::i64Bit);
    }
    _StoreMemFPR(OpSize::i128Bit, data, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (10 * i)), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    Top = _And(OpSize::i32Bit, Add(OpSize::i32Bit, Top, 1), SevenConst);
  }

  // The final st(7) needs a bit of special handling here
  Ref data = _LoadContextFPRIndexed(Top, LoadSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit));
  if (ReducedPrecisionMode) {
    data = _F80CVTTo(data, OpSize::i64Bit);
  }
  // ST7 broken in to two parts
  // Lower 64bits [63:0]
  // upper 16 bits [79:64]
  _StoreMemFPR(OpSize::i64Bit, data, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (7 * 10)), OpSize::i8Bit, MemOffsetType::SXTX, 1);
  auto topBytes = _VDupElement(OpSize::i128Bit, OpSize::i16Bit, data, 4);
  _StoreMemFPR(OpSize::i16Bit, topBytes, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (7 * 10) + 8), OpSize::i8Bit, MemOffsetType::SXTX, 1);

  // reset to default
  FNINIT(Op);
}

void OpDispatchBuilder::X87FRSTOR(OpcodeArgs) {
  _StackForceSlow();
  const auto Size = OpSizeFromSrc(Op);
  Ref Mem = MakeSegmentAddress(Op, Op->Src[0]);

  auto NewFCW = _LoadMemGPR(OpSize::i16Bit, Mem, OpSize::i16Bit);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
  if (ReducedPrecisionMode) {
    // ignore the rounding precision, we're always 64-bit in F64.
    // extract rounding mode
    Ref roundingMode = NewFCW;
    auto roundShift = Constant(10);
    auto roundMask = Constant(3);
    roundingMode = _Lshr(OpSize::i32Bit, roundingMode, roundShift);
    roundingMode = _And(OpSize::i32Bit, roundingMode, roundMask);
    _SetRoundingMode(roundingMode, false, roundingMode);
  }

  auto NewFSW = _LoadMemGPR(Size, Mem, Constant(IR::OpSizeToSize(Size) * 1), Size, MemOffsetType::SXTX, 1);
  Ref Top = ReconstructX87StateFromFSW_Helper(NewFSW);
  {
    // FTW
    SetX87FTW(_LoadMemGPR(Size, Mem, Constant(IR::OpSizeToSize(Size) * 2), Size, MemOffsetType::SXTX, 1));
  }

  auto SevenConst = Constant(7);
  auto low = Constant(~0ULL);
  auto high = Constant(0xFFFF);
  Ref Mask = _VLoadTwoGPRs(low, high);
  const auto StoreSize = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  for (int i = 0; i < 7; ++i) {
    Ref Reg = _LoadMemFPR(OpSize::i128Bit, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (10 * i)), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    // Mask off the top bits
    Reg = _VAnd(OpSize::i128Bit, OpSize::i128Bit, Reg, Mask);
    if (ReducedPrecisionMode) {
      // Convert to double precision
      Reg = _F80CVT(OpSize::i64Bit, Reg);
    }
    _StoreContextFPRIndexed(Reg, Top, StoreSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit));

    Top = _And(OpSize::i32Bit, Add(OpSize::i32Bit, Top, 1), SevenConst);
  }

  // The final st(7) needs a bit of special handling here
  // ST7 broken in to two parts
  // Lower 64bits [63:0]
  // upper 16 bits [79:64]
  Ref Reg = _LoadMemFPR(OpSize::i64Bit, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (10 * 7)), OpSize::i8Bit, MemOffsetType::SXTX, 1);
  Ref RegHigh = _LoadMemFPR(OpSize::i16Bit, Mem, Constant((IR::OpSizeToSize(Size) * 7) + (10 * 7) + 8), OpSize::i8Bit, MemOffsetType::SXTX, 1);
  Reg = _VInsElement(OpSize::i128Bit, OpSize::i16Bit, 4, 0, Reg, RegHigh);
  if (ReducedPrecisionMode) {
    Reg = _F80CVT(OpSize::i64Bit, Reg); // Convert to double precision
  }
  _StoreContextFPRIndexed(Reg, Top, StoreSize, MMBaseOffset(), IR::OpSizeToSize(OpSize::i128Bit));
}

// Load / Store Control Word
void OpDispatchBuilder::X87FSTCW(OpcodeArgs) {
  auto FCW = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, FCW));
  StoreResultGPR(Op, FCW);
}

void OpDispatchBuilder::X87FLDCW(OpcodeArgs) {
  // FIXME: Because loading control flags will affect several instructions in fast path, we might have
  // to switch for now to slow mode whenever these are manually changed.
  // Remove the next line and try DF_04.asm in fast path.
  _StackForceSlow();
  Ref NewFCW = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
}

void OpDispatchBuilder::FXCH(OpcodeArgs) {
  uint8_t Offset = Op->OP & 7;
  // fxch st0, st0 is for us essentially a nop
  if (Offset != 0) {
    _F80StackXchange(Offset);
  }
  SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Constant(0));
}

void OpDispatchBuilder::X87FYL2X(OpcodeArgs, bool IsFYL2XP1) {
  if (IsFYL2XP1) {
    // create an add between top of stack and 1.
    Ref One = ReducedPrecisionMode ? _VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, Constant(0x3FF0000000000000)) :
                                     LoadAndCacheNamedVectorConstant(OpSize::i128Bit, NamedVectorConstant::NAMED_VECTOR_X87_ONE);
    _F80AddValue(0, One);
  }

  _F80FYL2XStack();
}

void OpDispatchBuilder::FCOMI(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) {
  Ref arg {};
  Ref b {};

  Ref Res {};
  if (Op->Src[0].IsNone()) {
    // Implicit arg
    uint8_t Offset = Op->OP & 7;
    Res = _F80CmpStack(Offset);
  } else {
    if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) {
      // Memory arg
      if (Integer) {
        arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
        b = _F80CVTToInt(arg, Width);
      } else {
        arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
        b = _F80CVTTo(arg, Width);
      }
    } else {
      FEX_UNREACHABLE;
    }
    Res = _F80CmpValue(b);
  }

  Ref HostFlag_CF = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_LT, Res);
  Ref HostFlag_ZF = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_EQ, Res);
  Ref HostFlag_Unordered = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_UNORDERED, Res);
  HostFlag_CF = _Or(OpSize::i32Bit, HostFlag_CF, HostFlag_Unordered);
  HostFlag_ZF = _Or(OpSize::i32Bit, HostFlag_ZF, HostFlag_Unordered);

  if (WhichFlags == FCOMIFlags::FLAGS_X87) {
    SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(HostFlag_CF);
    SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Constant(0));
    SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(HostFlag_Unordered);
    SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(HostFlag_ZF);
  } else {
    // OF, SF, AF, PF all undefined
    SetCFDirect(HostFlag_CF);
    SetRFLAG<FEXCore::X86State::RFLAG_ZF_RAW_LOC>(HostFlag_ZF);

    // PF is stored inverted, so invert from the host flag.
    // TODO: This could perhaps be optimized?
    auto PF = _Xor(OpSize::i32Bit, HostFlag_Unordered, Constant(1));
    SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(PF);
  }

  // Set Invalid Operation flag when unordered (NaN comparison)
  SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(HostFlag_Unordered);

  if (PopTwice) {
    _PopStackDestroy();
    _PopStackDestroy();
  } else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FTST(OpcodeArgs) {
  Ref Res = _F80StackTest(0);

  Ref HostFlag_CF = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_LT, Res);
  Ref HostFlag_ZF = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_EQ, Res);
  Ref HostFlag_Unordered = _Bfe(OpSize::i64Bit, 1, FCMP_FLAG_UNORDERED, Res);
  HostFlag_CF = _Or(OpSize::i32Bit, HostFlag_CF, HostFlag_Unordered);
  HostFlag_ZF = _Or(OpSize::i32Bit, HostFlag_ZF, HostFlag_Unordered);

  SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(HostFlag_CF);
  SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Constant(0));
  SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(HostFlag_Unordered);
  SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(HostFlag_ZF);

  // Set Invalid Operation flag when unordered (NaN comparison)
  SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(HostFlag_Unordered);
}

void OpDispatchBuilder::X87OpHelper(OpcodeArgs, FEXCore::IR::IROps IROp, bool ZeroC2) {
  DeriveOp(Result, IROp, _F80SCALEStack());
  if (ZeroC2) {
    SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(Constant(0));
  }
}

void OpDispatchBuilder::X87ModifySTP(OpcodeArgs, bool Inc) {
  if (Inc) {
    _IncStackTop();
  } else {
    _DecStackTop();
  }
}

// Operations dealing with loading and storing environment pieces

// Reconstruct as a constant the Status Word of the FPU.
// We only track stack top and each of the code conditions (C flags)
// Top is 3 bits at bit 11.
// C0 is 1 bit at bit 8.
// C1 is 1 bit at bit 9.
// C2 is 1 bit at bit 10.
// C3 is 1 bit at bit 14.
// Optionally we can pass a pre calculated value for Top, otherwise we calculate it
// during the function runtime.
Ref OpDispatchBuilder::ReconstructFSW_Helper(Ref T) {
  // Start with the top value
  auto Top = T ? T : GetX87Top();
  Ref FSW = _Lshl(OpSize::i64Bit, Top, Constant(11));

  // We must construct the FSW from our various bits
  auto C0 = GetRFLAG(FEXCore::X86State::X87FLAG_C0_LOC);
  FSW = _Orlshl(OpSize::i64Bit, FSW, C0, 8);

  auto C1 = GetRFLAG(FEXCore::X86State::X87FLAG_C1_LOC);
  FSW = _Orlshl(OpSize::i64Bit, FSW, C1, 9);

  auto C2 = GetRFLAG(FEXCore::X86State::X87FLAG_C2_LOC);
  FSW = _Orlshl(OpSize::i64Bit, FSW, C2, 10);

  auto C3 = GetRFLAG(FEXCore::X86State::X87FLAG_C3_LOC);
  FSW = _Orlshl(OpSize::i64Bit, FSW, C3, 14);

  auto IE = GetRFLAG(FEXCore::X86State::X87FLAG_IE_LOC);
  FSW = _Or(OpSize::i64Bit, FSW, IE);

  return FSW;
}

// Store Status Word
// There's no load Status Word instruction but you can load it through frstor
// or fldenv.
void OpDispatchBuilder::X87FNSTSW(OpcodeArgs) {
  Ref TopValue = _SyncStackToSlow();
  Ref StatusWord = ReconstructFSW_Helper(TopValue);
  StoreResultGPR(Op, StatusWord);
}

void OpDispatchBuilder::FNCLEX(OpcodeArgs) {
  // Clear the exception flag bit
  SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(_Constant(0));
}

void OpDispatchBuilder::FNINIT(OpcodeArgs) {
  _SyncStackToSlow(); // Invalidate x87 register caches

  auto Zero = Constant(0);

  if (ReducedPrecisionMode) {
    _SetRoundingMode(Zero, false, Zero);
  }

  // Init FCW to 0x037F
  auto NewFCW = Constant(0x037F);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

  // Set top to zero
  SetX87Top(Zero);
  // Tags all get marked as invalid
  _StoreContextGPR(OpSize::i8Bit, Zero, offsetof(FEXCore::Core::CPUState, AbridgedFTW));

  // Reinits the simulated stack
  _InitStack();

  SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(Zero);
  SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Zero);
  SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(Zero);
  SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(Zero);
  SetRFLAG<FEXCore::X86State::X87FLAG_IE_LOC>(Zero);
}

void OpDispatchBuilder::X87FFREE(OpcodeArgs) {
  _InvalidateStack(Op->OP & 7);
}

void OpDispatchBuilder::X87EMMS(OpcodeArgs) {
  // Tags all get set to 0b11
  _InvalidateStack(0xff);
}

void OpDispatchBuilder::X87FCMOV(OpcodeArgs) {
  CalculateDeferredFlags();

  uint16_t Opcode = Op->OP & 0b1111'1111'1000;
  uint8_t CC = 0;

  switch (Opcode) {
  case 0x3'C0:
    CC = 0x3; // JNC
    break;
  case 0x2'C0:
    CC = 0x2; // JC
    break;
  case 0x2'C8:
    CC = 0x4; // JE
    break;
  case 0x3'C8:
    CC = 0x5; // JNE
    break;
  case 0x2'D0:
    CC = 0x6; // JNA
    break;
  case 0x3'D0:
    CC = 0x7; // JA
    break;
  case 0x2'D8:
    CC = 0xA; // JP
    break;
  case 0x3'D8:
    CC = 0xB; // JNP
    break;
  default: LOGMAN_MSG_A_FMT("Unhandled FCMOV op: 0x{:x}", Opcode); break;
  }

  Ref VecCond = _VDupFromGPR(OpSize::i128Bit, OpSize::i64Bit, SelectCC0All1(CC));
  _F80VBSLStack(OpSize::i128Bit, VecCond, Op->OP & 7, 0);
}

void OpDispatchBuilder::X87FXAM(OpcodeArgs) {
  auto a = _ReadStackValue(0);
  Ref Result =
    ReducedPrecisionMode ? _VExtractToGPR(OpSize::i64Bit, OpSize::i64Bit, a, 0) : _VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, a, 1);

  // Extract the sign bit
  Result = ReducedPrecisionMode ? _Bfe(OpSize::i64Bit, 1, 63, Result) : _Bfe(OpSize::i64Bit, 1, 15, Result);
  SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Result);

  // Claim this is a normal number
  // We don't support anything else
  auto TopValid = _StackValidTag(0);

  // In the case of top being invalid then C3:C2:C0 is 0b101
  auto C3 = Select01(OpSize::i32Bit, CondClass::NEQ, TopValid, Constant(1));

  auto C2 = TopValid;
  auto C0 = C3; // Mirror C3 until something other than zero is supported
  SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(C0);
  SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(C2);
  SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(C3);
}

void OpDispatchBuilder::X87FXTRACT(OpcodeArgs) {
  auto Top = _ReadStackValue(0);

  _PopStackDestroy();
  auto Exp = _F80XTRACT_EXP(Top);
  auto Sig = _F80XTRACT_SIG(Top);
  _PushStack(Exp, Invalid(), OpSize::iInvalid);
  _PushStack(Sig, Invalid(), OpSize::iInvalid);
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 x87 to IR
$end_info$
*/

#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <stddef.h>
#include <stdint.h>

namespace FEXCore::IR {
class OrderedNode;

#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

void OpDispatchBuilder::X87LDENVF64(OpcodeArgs) {
  _StackForceSlow();

  const auto Size = OpSizeFromSrc(Op);
  Ref Mem = MakeSegmentAddress(Op, Op->Src[0]);

  auto NewFCW = _LoadMemGPR(OpSize::i16Bit, Mem, OpSize::i16Bit);
  // ignore the rounding precision, we're always 64-bit in F64.
  // extract rounding mode
  Ref roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW);
  _SetRoundingMode(roundingMode, false, roundingMode);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));

  auto NewFSW = _LoadMemGPR(Size, Mem, Constant(IR::OpSizeToSize(Size)), Size, MemOffsetType::SXTX, 1);
  ReconstructX87StateFromFSW_Helper(NewFSW);

  {
    // FTW
    SetX87FTW(_LoadMemGPR(Size, Mem, Constant(IR::OpSizeToSize(Size) * 2), Size, MemOffsetType::SXTX, 1));
  }
}

void OpDispatchBuilder::X87FLDCWF64(OpcodeArgs) {
  _StackForceSlow();

  Ref NewFCW = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  // ignore the rounding precision, we're always 64-bit in F64.
  // extract rounding mode
  Ref roundingMode = _Bfe(OpSize::i32Bit, 3, 10, NewFCW);
  _SetRoundingMode(roundingMode, false, roundingMode);
  _StoreContextGPR(OpSize::i16Bit, NewFCW, offsetof(FEXCore::Core::CPUState, FCW));
}

// F64 ops
// Float load op with memory operand
void OpDispatchBuilder::FLDF64(OpcodeArgs, IR::OpSize Width) {
  Ref Data = LoadSourceFPR_WithOpSize(Op, Op->Src[0], Width, Op->Flags);
  // Convert to 64bit float
  Ref ConvertedData = Data;
  if (Width == OpSize::i32Bit) {
    ConvertedData = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, Data);
  } else if (Width == OpSize::f80Bit) {
    ConvertedData = _F80CVT(OpSize::i64Bit, Data);
  }
  _PushStack(ConvertedData, Data, Width);
}

void OpDispatchBuilder::FBLDF64(OpcodeArgs) {
  // Read from memory
  Ref Data = LoadSourceFPR_WithOpSize(Op, Op->Src[0], OpSize::f80Bit, Op->Flags);
  Ref ConvertedData = _F80BCDLoad(Data);
  ConvertedData = _F80CVT(OpSize::i64Bit, ConvertedData);
  _PushStack(ConvertedData, Invalid(), OpSize::iInvalid);
}

void OpDispatchBuilder::FBSTPF64(OpcodeArgs) {
  Ref converted = _F80CVTTo(_ReadStackValue(0), OpSize::i64Bit);
  converted = _F80BCDStore(converted);
  StoreResultFPR_WithOpSize(Op, Op->Dest, converted, OpSize::f80Bit, OpSize::i8Bit);
  _PopStackDestroy();
}

void OpDispatchBuilder::FLDF64_Const(OpcodeArgs, uint64_t Num) {
  auto Data = _VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, Constant(Num));
  _PushStack(Data, Data, OpSize::i64Bit);
}

void OpDispatchBuilder::FILDF64(OpcodeArgs) {
  const auto ReadWidth = OpSizeFromSrc(Op);

  // Read from memory
  Ref Data = LoadSourceGPR_WithOpSize(Op, Op->Src[0], ReadWidth, Op->Flags);
  if (ReadWidth == OpSize::i16Bit) {
    Data = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(ReadWidth), 0, Data);
  }
  auto ConvertedData = _Float_FromGPR_S(OpSize::i64Bit, ReadWidth == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, Data);
  _PushStack(ConvertedData, Invalid(), OpSize::iInvalid);
}

void OpDispatchBuilder::FISTF64(OpcodeArgs, bool Truncate) {
  const auto Size = OpSizeFromSrc(Op);

  Ref data = _ReadStackValue(0);
  if (Truncate) {
    data = _Float_ToGPR_ZS(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
  } else {
    data = _Float_ToGPR_S(Size == OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, OpSize::i64Bit, data);
  }
  StoreResultGPR_WithOpSize(Op, Op->Dest, data, Size, OpSize::i8Bit);

  if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FADDF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) { // Implicit argument case
    auto Offset = Op->OP & 7;
    auto St0 = 0;
    if (ResInST0 == OpResult::RES_STI) {
      _F80AddStack(Offset, St0);
    } else {
      _F80AddStack(St0, Offset);
    }
    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  // We have one memory argument
  Ref arg {};

  if (Integer) {
    arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    if (Width == OpSize::i16Bit) {
      arg = _Sbfe(OpSize::i64Bit, 16, 0, arg);
    }
    arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg);
  } else if (Width == OpSize::i32Bit) {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg);
  } else if (Width == OpSize::i64Bit) {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  } else {
    FEX_UNREACHABLE;
  }

  // top of stack is at offset zero
  _F80AddValue(0, arg);
}

// FIXME: following is very similar to FADDF64
void OpDispatchBuilder::FMULF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) { // Implicit argument case
    auto offset = Op->OP & 7;
    auto st0 = 0;
    if (ResInST0 == OpResult::RES_STI) {
      _F80MulStack(offset, st0);
    } else {
      _F80MulStack(st0, offset);
    }
    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  // We have one memory argument
  Ref arg {};

  if (Integer) {
    arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    if (Width == OpSize::i16Bit) {
      arg = _Sbfe(OpSize::i64Bit, 16, 0, arg);
    }
    arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg);
  } else if (Width == OpSize::i32Bit) {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg);
  } else if (Width == OpSize::i64Bit) {
    arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
  } else {
    FEX_UNREACHABLE;
  }

  // top of stack is at offset zero
  _F80MulValue(0, arg);

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FDIVF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) {
    const auto offset = Op->OP & 7;
    const auto st0 = 0;

    if (Reverse) {
      if (ResInST0 == OpResult::RES_STI) {
        _F80DivStack(offset, st0, offset);
      } else {
        _F80DivStack(st0, offset, st0);
      }
    } else {
      if (ResInST0 == OpResult::RES_STI) {
        _F80DivStack(offset, offset, st0);
      } else {
        _F80DivStack(st0, st0, offset);
      }
    }

    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  // We have one memory argument
  Ref Arg {};

  if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) {
    if (Integer) {
      Arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
      if (Width == OpSize::i16Bit) {
        Arg = _Sbfe(OpSize::i64Bit, 16, 0, Arg);
      }
      Arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, Arg);
    } else if (Width == OpSize::i32Bit) {
      Arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
      Arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, Arg);
    } else if (Width == OpSize::i64Bit) {
      Arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }
  } else {
    FEX_UNREACHABLE;
  }

  // top of stack is at offset zero
  if (Reverse) {
    _F80DivRValue(Arg, 0);
  } else {
    _F80DivValue(0, Arg);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FSUBF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpDispatchBuilder::OpResult ResInST0) {
  if (Op->Src[0].IsNone()) {
    const auto Offset = Op->OP & 7;
    const auto St0 = 0;

    if (Reverse) {
      if (ResInST0 == OpResult::RES_STI) {
        _F80SubStack(Offset, St0, Offset);
      } else {
        _F80SubStack(St0, Offset, St0);
      }
    } else {
      if (ResInST0 == OpResult::RES_STI) {
        _F80SubStack(Offset, Offset, St0);
      } else {
        _F80SubStack(St0, St0, Offset);
      }
    }

    if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
      _PopStackDestroy();
    }
    return;
  }

  // We have one memory argument
  Ref arg {};

  if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) {
    if (Integer) {
      arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
      if (Width == OpSize::i16Bit) {
        arg = _Sbfe(OpSize::i64Bit, 16, 0, arg);
      }
      arg = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg);
    } else if (Width == OpSize::i32Bit) {
      arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
      arg = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg);
    } else if (Width == OpSize::i64Bit) {
      arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }
  } else {
    FEX_UNREACHABLE;
  }

  // top of stack is at offset zero
  if (Reverse) {
    _F80SubRValue(arg, 0);
  } else {
    _F80SubValue(0, arg);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::FTSTF64(OpcodeArgs) {
  // We are going to clobber NZCV, make sure it's in a GPR first.
  SaveNZCV();

  // Now we do our comparison.
  _F80StackTest(0);
  ConvertNZCVToX87();
}

void OpDispatchBuilder::FCOMIF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpDispatchBuilder::FCOMIFlags WhichFlags, bool PopTwice) {
  Ref arg {};
  Ref b {};

  if (Op->Src[0].IsNone()) {
    // Implicit arg
    uint8_t offset = Op->OP & 7;
    b = _ReadStackValue(offset);
  } else if (Width == OpSize::i16Bit || Width == OpSize::i32Bit || Width == OpSize::i64Bit) {
    // Memory arg
    if (Integer) {
      arg = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
      if (Width == OpSize::i16Bit) {
        arg = _Sbfe(OpSize::i64Bit, 16, 0, arg);
      }
      b = _Float_FromGPR_S(OpSize::i64Bit, Width == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, arg);
    } else if (Width == OpSize::i32Bit) {
      arg = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
      b = _Float_FToF(OpSize::i64Bit, OpSize::i32Bit, arg);
    } else if (Width == OpSize::i64Bit) {
      b = LoadSourceFPR(Op, Op->Src[0], Op->Flags);
    }
  } else {
    FEX_UNREACHABLE;
  }

  if (WhichFlags == FCOMIFlags::FLAGS_X87) {
    // We are going to clobber NZCV, make sure it's in a GPR first.
    SaveNZCV();

    _F80CmpValue(b);
    ConvertNZCVToX87();
  } else {
    HandleNZCVWrite();
    _F80CmpValue(b);
    ComissFlags(true /* InvalidateAF */);
  }

  if (PopTwice) {
    _PopStackDestroy();
    _PopStackDestroy();
  } else if ((Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_POP) != 0) {
    _PopStackDestroy();
  }
}

void OpDispatchBuilder::X87FXTRACTF64(OpcodeArgs) {
  // Split node into SIG and EXP while handling the special zero case.
  // i.e. if val == 0.0, then sig = 0.0, exp = -inf
  // if val == -0.0, then sig = -0.0, exp = -inf
  // otherwise we just extract the 64-bit sig and exp as normal.
  Ref Node = _ReadStackValue(0);

  Ref Gpr = _VExtractToGPR(OpSize::i64Bit, OpSize::i64Bit, Node, 0);

  // zero case
  Ref ExpZV = _VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, Constant(0xfff0'0000'0000'0000UL));
  Ref SigZV = Node;

  // non zero case
  Ref ExpNZ = _Bfe(OpSize::i64Bit, 11, 52, Gpr);
  ExpNZ = Sub(OpSize::i64Bit, ExpNZ, Constant(1023));
  Ref ExpNZV = _Float_FromGPR_S(OpSize::i64Bit, OpSize::i64Bit, ExpNZ);

  Ref SigNZ = _And(OpSize::i64Bit, Gpr, Constant(0x800f'ffff'ffff'ffffLL));
  SigNZ = _Or(OpSize::i64Bit, SigNZ, Constant(0x3ff0'0000'0000'0000LL));
  Ref SigNZV = _VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, SigNZ);

  // Comparison and select to push onto stack
  SaveNZCV();
  _TestNZ(OpSize::i64Bit, Gpr, Constant(0x7fff'ffff'ffff'ffffUL));

  Ref Sig = _NZCVSelectV(OpSize::i64Bit, CondClass::EQ, SigZV, SigNZV);
  Ref Exp = _NZCVSelectV(OpSize::i64Bit, CondClass::EQ, ExpZV, ExpNZV);

  _PopStackDestroy();
  _PushStack(Exp, Invalid(), OpSize::iInvalid);
  _PushStack(Sig, Invalid(), OpSize::iInvalid);
}
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-to-ir, opcodes|dispatcher-implementations
desc: Handles x86/64 ops to IR, no-pf opt, local-flags opt
$end_info$
*/

#include "FEXCore/Core/HostFeatures.h"
#include "FEXCore/Utils/Telemetry.h"
#include "Interface/Context/Context.h"
#include "Interface/Core/OpcodeDispatcher.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <FEXHeaderUtils/BitUtils.h>

#include <algorithm>
#include <array>
#include <cstdint>

namespace FEXCore::IR {

using X86Tables::OpToIndex;

#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

void OpDispatchBuilder::SyscallOp(OpcodeArgs, bool IsSyscallInst) {
  constexpr size_t SyscallArgs = 7;
  using SyscallArray = std::array<uint64_t, SyscallArgs>;

  size_t NumArguments {};
  const SyscallArray* GPRIndexes {};
  static constexpr SyscallArray GPRIndexes_64 = {
    FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_RDX,
    FEXCore::X86State::REG_R10, FEXCore::X86State::REG_R8,  FEXCore::X86State::REG_R9,
  };
  static constexpr SyscallArray GPRIndexes_32 = {
    FEXCore::X86State::REG_RAX, FEXCore::X86State::REG_RBX, FEXCore::X86State::REG_RCX, FEXCore::X86State::REG_RDX,
    FEXCore::X86State::REG_RSI, FEXCore::X86State::REG_RDI, FEXCore::X86State::REG_RBP,
  };

  const auto OSABI = CTX->SyscallHandler->GetOSABI();
  if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX64) {
    NumArguments = GPRIndexes_64.size();
    GPRIndexes = &GPRIndexes_64;
  } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_LINUX32) {
    NumArguments = GPRIndexes_32.size();
    GPRIndexes = &GPRIndexes_32;
  } else if (OSABI == FEXCore::HLE::SyscallOSABI::OS_GENERIC) {
    // All registers will be spilled before the syscall and filled afterwards so no JIT-side argument handling is necessary.
    NumArguments = 0;
    GPRIndexes = nullptr;
  } else {
    ERROR_AND_DIE_FMT("Unhandled OSABI syscall");
  }

  // Calculate flags early.
  CalculateDeferredFlags();

  const auto GPRSize = GetGPROpSize();
  auto NewRIP = GetRelocatedPC(Op, -Op->InstSize);
  _StoreContextGPR(GPRSize, NewRIP, offsetof(FEXCore::Core::CPUState, rip));

  Ref Arguments[SyscallArgs] {
    InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode, InvalidNode,
  };
  for (size_t i = 0; i < NumArguments; ++i) {
    Arguments[i] = LoadGPRRegister(GPRIndexes->at(i));
  }

  if (IsSyscallInst) {
    // If this is the `Syscall` instruction rather than `int 0x80` then we need to do some additional work.
    // RCX = RIP after this instruction
    // R11 = EFlags
    // Calculate flags.
    CalculateDeferredFlags();

    auto RFLAG = GetPackedRFLAG();
    StoreGPRRegister(X86State::REG_R11, RFLAG, OpSize::i64Bit);

    auto RIPAfterInst = GetRelocatedPC(Op);
    StoreGPRRegister(X86State::REG_RCX, RIPAfterInst, OpSize::i64Bit);
  }

  FlushRegisterCache();
  auto SyscallOp = _Syscall(Arguments[0], Arguments[1], Arguments[2], Arguments[3], Arguments[4], Arguments[5], Arguments[6]);

  // Generic ABI doesn't store result in RAX.
  if (OSABI != FEXCore::HLE::SyscallOSABI::OS_GENERIC) {
    StoreGPRRegister(X86State::REG_RAX, SyscallOp);
  }

  if (Op->TableInfo->Flags & X86Tables::InstFlags::FLAGS_BLOCK_END) {
    // RIP could have been updated after coming back from the Syscall.
    NewRIP = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, rip));
    ExitFunction(NewRIP);
  }
}

void OpDispatchBuilder::ThunkOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  uint8_t* sha256 = (uint8_t*)(Op->PC + 2);

  if (Is64BitMode) {
    // x86-64 ABI puts the function argument in RDI
    Thunk(LoadGPRRegister(X86State::REG_RDI), *reinterpret_cast<SHA256Sum*>(sha256));
  } else {
    // x86 fastcall ABI puts the function argument in ECX
    Thunk(LoadGPRRegister(X86State::REG_RCX), *reinterpret_cast<SHA256Sum*>(sha256));
  }

  auto NewRIP = Pop(GPRSize);

  // Store the new RIP
  ExitFunction(NewRIP, BranchHint::Return);
  BlockSetRIP = true;
}

void OpDispatchBuilder::LEAOp(OpcodeArgs) {
  // LEA specifically ignores segment prefixes
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto OpAddr = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0);
  OpSize DstSize {};

  if (Is64BitMode) {
    DstSize = OpAddr == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST  ? OpSize::i16Bit :
              OpAddr == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? OpSize::i64Bit :
                                                                          OpSize::i32Bit;
  } else {
    DstSize = OpAddr == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : OpSize::i32Bit;
  }

  auto Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags, {.LoadData = false, .AllowUpperGarbage = SrcSize > DstSize});
  StoreResultGPR_WithOpSize(Op, Op->Dest, Src, DstSize);
}

void OpDispatchBuilder::NOPOp(OpcodeArgs) {}

void OpDispatchBuilder::RETOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();

  Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
  Ref NewRIP = Pop(GPRSize, SP);

  if (Op->OP == 0xC2) {
    auto Offset = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    SP = Add(GPRSize, SP, Offset);
  }

  // Store the new stack pointer
  StoreGPRRegister(X86State::REG_RSP, SP);

  // Store the new RIP
  ExitFunction(NewRIP, BranchHint::Return);
  BlockSetRIP = true;
}

/*
stack contains:
Size of each member is 64-bit, 32-bit, or 16-bit depending on operating size
RIP
CS
EFLAGS
RSP
SS
*/
void OpDispatchBuilder::IRETOp(OpcodeArgs) {
  // Operand Size override unsupported!
  if ((Op->Flags & X86Tables::DecodeFlags::FLAG_OPERAND_SIZE) != 0) {
    LogMan::Msg::EFmt("IRET only implemented for 64bit and 32bit sizes");
    DecodeFailure = true;
    return;
  }

  const auto GPRSize = GetGPROpSize();

  Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));

  // RIP (64/32/16 bits)
  auto NewRIP = Pop(GPRSize, SP);
  // CS (lower 16 used)
  auto NewSegmentCS = Pop(GPRSize, SP);
  _StoreContextGPR(OpSize::i16Bit, NewSegmentCS, offsetof(FEXCore::Core::CPUState, cs_idx));
  UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX);

  // eflags (lower 16 used)
  SetPackedRFLAG(false, Pop(GPRSize, SP));

  if (Is64BitMode) {
    // RSP and SS only happen in 64-bit mode or if this is a CPL mode jump!
    // FEX doesn't support a CPL mode switch, so don't need to worry about this on 32-bit
    StoreGPRRegister(X86State::REG_RSP, Pop(GPRSize, SP));

    // ss
    auto NewSegmentSS = Pop(GPRSize, SP);
    _StoreContextGPR(OpSize::i16Bit, NewSegmentSS, offsetof(FEXCore::Core::CPUState, ss_idx));
    UpdatePrefixFromSegment(NewSegmentSS, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX);
  } else {
    // Store the stack in 32-bit mode
    StoreGPRRegister(X86State::REG_RSP, SP);
  }

  ExitFunction(NewRIP);
  BlockSetRIP = true;
}

void OpDispatchBuilder::CallbackReturnOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  // Store the new RIP
  _CallbackReturn();
  auto NewRIP = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, rip));
  // This ExitFunction won't actually get hit but needs to exist
  ExitFunction(NewRIP);
  BlockSetRIP = true;
}

void OpDispatchBuilder::SecondaryALUOp(OpcodeArgs) {
  FEXCore::IR::IROps IROp, AtomicIROp;
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
  switch (Op->OP) {
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 0):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 0):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 0):
    IROp = FEXCore::IR::IROps::OP_ADD;
    AtomicIROp = FEXCore::IR::IROps::OP_ATOMICFETCHADD;
    break;
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 1):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 1):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 1):
    IROp = FEXCore::IR::IROps::OP_OR;
    AtomicIROp = FEXCore::IR::IROps::OP_ATOMICFETCHOR;
    break;
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 4):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 4):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 4):
    IROp = FEXCore::IR::IROps::OP_ANDWITHFLAGS;
    AtomicIROp = FEXCore::IR::IROps::OP_ATOMICFETCHAND;
    break;
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 5):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 5):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 5):
    IROp = FEXCore::IR::IROps::OP_SUB;
    AtomicIROp = FEXCore::IR::IROps::OP_ATOMICFETCHSUB;
    break;
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x80), 6):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x81), 6):
  case OPD(FEXCore::X86Tables::TYPE_GROUP_1, OpToIndex(0x83), 6):
    IROp = FEXCore::IR::IROps::OP_XOR;
    AtomicIROp = FEXCore::IR::IROps::OP_ATOMICFETCHXOR;
    break;
  default:
    IROp = FEXCore::IR::IROps::OP_LAST;
    AtomicIROp = FEXCore::IR::IROps::OP_LAST;
    LogMan::Msg::EFmt("Unknown ALU Op: 0x{:x}", Op->OP);
    DecodeFailure = true;
    return;
  };
#undef OPD

  ALUOp(Op, IROp, AtomicIROp, 1);
}

void OpDispatchBuilder::ADCOp(OpcodeArgs, uint32_t SrcIndex) {
  // Calculate flags early.
  CalculateDeferredFlags();

  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
  const auto Size = OpSizeFromDst(Op);
  const auto OpSize = std::max(OpSize::i32Bit, Size);

  Ref Before {};
  if (DestIsLockedMem(Op)) {
    auto ALUOp = IncrementByCarry(OpSize, Src);
    HandledLock = true;

    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    Before = _AtomicFetchAdd(Size, ALUOp, DestMem);
  } else {
    Before = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }

  Ref Result;
  if (!DestIsLockedMem(Op) && Op->Src[SrcIndex].IsLiteral() && Op->Src[SrcIndex].Literal() == 0 && Size >= OpSize::i32Bit) {
    HandleNZCV_RMW();
    RectifyCarryInvert(true);
    Result = _AdcZeroWithFlags(OpSize, Before);
    SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(Before);
    CalculatePF(Result);
    CFInverted = false;
  } else {
    Result = CalculateFlags_ADC(Size, Before, Src);
  }

  if (!DestIsLockedMem(Op)) {
    StoreResultGPR(Op, Result);
  }
}

void OpDispatchBuilder::SBBOp(OpcodeArgs, uint32_t SrcIndex) {
  // Calculate flags early.
  CalculateDeferredFlags();

  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
  const auto Size = OpSizeFromDst(Op);
  const auto OpSize = std::max(OpSize::i32Bit, Size);

  Ref Result {};
  Ref Before {};
  if (DestIsLockedMem(Op)) {
    HandledLock = true;

    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    auto SrcPlusCF = IncrementByCarry(OpSize, Src);
    Before = _AtomicFetchSub(Size, SrcPlusCF, DestMem);
  } else {
    Before = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }

  Result = CalculateFlags_SBB(Size, Before, Src);

  if (!DestIsLockedMem(Op)) {
    StoreResultGPR(Op, Result);
  }
}

void OpDispatchBuilder::SALCOp(OpcodeArgs) {
  CalculateDeferredFlags();

  auto Result = NZCVSelect(OpSize::i32Bit, CondClass::UGE /* CF = 1 */, _InlineConstant(0xffffffff), _InlineConstant(0));

  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::PUSHOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Push(Size, LoadSourceGPR(Op, Op->Src[0], Op->Flags));
}

void OpDispatchBuilder::PUSHREGOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Push(Size, LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true}));
}

void OpDispatchBuilder::PUSHAOp(OpcodeArgs) {
  // 32bit only
  const auto Size = OpSizeFromSrc(Op);

  Ref OldSP = _Copy(LoadGPRRegister(X86State::REG_RSP));

  Push(Size, LoadGPRRegister(X86State::REG_RAX));
  Push(Size, LoadGPRRegister(X86State::REG_RCX));
  Push(Size, LoadGPRRegister(X86State::REG_RDX));
  Push(Size, LoadGPRRegister(X86State::REG_RBX));
  Push(Size, OldSP);
  Push(Size, LoadGPRRegister(X86State::REG_RBP));
  Push(Size, LoadGPRRegister(X86State::REG_RSI));
  Push(Size, LoadGPRRegister(X86State::REG_RDI));
}

void OpDispatchBuilder::PUSHSegmentOp(OpcodeArgs, uint32_t SegmentReg) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

  Ref Src {};
  if (!Is64BitMode) {
    switch (SegmentReg) {
    case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, es_idx));
      break;
    }
    case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, cs_idx));
      break;
    }
    case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, ss_idx));
      break;
    }
    case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, ds_idx));
      break;
    }
    case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, fs_idx));
      break;
    }
    case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX: {
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, gs_idx));
      break;
    }
    default: FEX_UNREACHABLE;
    }
  } else {
    switch (SegmentReg) {
    case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, es_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, cs_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, ss_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, ds_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, fs_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX:
      Src = _LoadContextGPR(SrcSize, offsetof(FEXCore::Core::CPUState, gs_cached));
      break;
    default: FEX_UNREACHABLE;
    }
  }

  // Store our value to the new stack location
  // AMD hardware zexts segment selector to 32bit
  // Intel hardware inserts segment selector
  Push(DstSize, Src);
}

void OpDispatchBuilder::POPOp(OpcodeArgs) {
  Ref Value = Pop(OpSizeFromSrc(Op));
  StoreResultGPR(Op, Value);
}

void OpDispatchBuilder::POPAOp(OpcodeArgs) {
  // 32bit only
  const auto Size = OpSizeFromSrc(Op);

  Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));

  StoreGPRRegister(X86State::REG_RDI, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RSI, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RBP, Pop(Size, SP), Size);

  // Skip loading RSP because it'll be correct at the end
  SP = _RMWHandle(Add(OpSize::i64Bit, SP, IR::OpSizeToSize(Size)));

  StoreGPRRegister(X86State::REG_RBX, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RDX, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RCX, Pop(Size, SP), Size);
  StoreGPRRegister(X86State::REG_RAX, Pop(Size, SP), Size);

  // Store the new stack pointer
  StoreGPRRegister(X86State::REG_RSP, SP);
}

void OpDispatchBuilder::POPSegmentOp(OpcodeArgs, uint32_t SegmentReg) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto DstSize = OpSizeFromDst(Op);

  auto NewSegment = Pop(SrcSize);

  switch (SegmentReg) {
  case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, es_idx));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, cs_idx));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
    // Unset the 'active' bit in the packed TF, skipping the single step exception after this instruction
    SetRFLAG<FEXCore::X86State::RFLAG_TF_RAW_LOC>(_And(OpSize::i32Bit, GetRFLAG(FEXCore::X86State::RFLAG_TF_RAW_LOC), Constant(1)));
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, ss_idx));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, ds_idx));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX:
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, fs_idx));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX:
    _StoreContextGPR(DstSize, NewSegment, offsetof(FEXCore::Core::CPUState, gs_idx));
    break;
  default: break; // Do nothing
  }

  UpdatePrefixFromSegment(NewSegment, SegmentReg);
}

void OpDispatchBuilder::LEAVEOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto OperandSize = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_OPERAND_SIZE) ? OpSize::i16Bit : GPRSize;

  // First we move RBP in to RSP and then behave effectively like a pop
  auto SP = _RMWHandle(LoadGPRRegister(X86State::REG_RBP));
  auto NewGPR = Pop(OperandSize, SP);

  // Store the new stack pointer
  StoreGPRRegister(X86State::REG_RSP, SP, OperandSize);

  // Store what we loaded to RBP
  StoreGPRRegister(X86State::REG_RBP, NewGPR, OperandSize);
}

void OpDispatchBuilder::CALLOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();

  BlockSetRIP = true;

  // Call instruction only uses up to 32-bit signed displacement
  const int64_t TargetOffset = Op->Src[0].Literal();

  const auto ConstantPC = GetRelocatedPC(Op);

  // Push the return address.
  Push(GPRSize, ConstantPC);

  if (TargetOffset != 0) {
    // Store the RIP
    const uint64_t NextRIP = Op->PC + Op->InstSize;

    ExitRelocatedPC(Op, TargetOffset, BranchHint::Call, ConstantPC, [&]() {
      auto CallReturnJumpTarget = JumpTargets.find(NextRIP);
      if (CallReturnJumpTarget != JumpTargets.end() && CallReturnJumpTarget->second.IsEntryPoint) {
        return CallReturnJumpTarget->second.BlockEntry;
      }
      return InvalidNode;
    }());
  } else {
    NeedsBlockEnd = true;
  }
}

void OpDispatchBuilder::CALLAbsoluteOp(OpcodeArgs) {
  BlockSetRIP = true;

  const auto Size = OpSizeFromSrc(Op);
  Ref JMPPCOffset = LoadSourceGPR(Op, Op->Src[0], Op->Flags);

  // Push the return address.
  auto ConstantPC = GetRelocatedPC(Op);
  Push(Size, ConstantPC);

  // Store the RIP
  const uint64_t NextRIP = Op->PC + Op->InstSize;
  ExitFunction(JMPPCOffset, BranchHint::Call, ConstantPC, [&]() {
    auto CallReturnJumpTarget = JumpTargets.find(NextRIP);
    if (CallReturnJumpTarget != JumpTargets.end() && CallReturnJumpTarget->second.IsEntryPoint) {
      return CallReturnJumpTarget->second.BlockEntry;
    }
    return InvalidNode;
  }());
}

std::optional<CondClass> OpDispatchBuilder::DecodeNZCVCondition(uint8_t OP) {
  switch (OP) {
  case 0x0: { // JO - Jump if OF == 1
    return CondClass::FU;
  }
  case 0x1: { // JNO - Jump if OF == 0
    return CondClass::FNU;
  }
  case 0x2: { // JC - Jump if CF == 1
    return CFInverted ? CondClass::ULT : CondClass::UGE;
  }
  case 0x3: { // JNC - Jump if CF == 0
    return CFInverted ? CondClass::UGE : CondClass::ULT;
  }
  case 0x4: { // JE - Jump if ZF == 1
    return CondClass::EQ;
  }
  case 0x5: { // JNE - Jump if ZF == 0
    return CondClass::NEQ;
  }
  case 0x6: { // JNA - Jump if CF == 1 || ZF == 1
    // With CF, we want (C == 0 || Z == 1). By De Morgan's, that's
    // equivalent to !(C == 1 && Z == 0). That's .ls
    RectifyCarryInvert(true);
    return CondClass::ULE;
  }
  case 0x7: { // JA - Jump if CF == 0 && ZF == 0
    // With CF inverted, we want (C == 1 && Z == 0). That's .hi
    RectifyCarryInvert(true);
    return CondClass::UGT;
  }
  case 0x8: { // JS - Jump if SF == 1
    return CondClass::MI;
  }
  case 0x9: { // JNS - Jump if SF == 0
    return CondClass::PL;
  }
  case 0xC: { // SF <> OF
    return CondClass::SLT;
  }
  case 0xD: { // SF = OF
    return CondClass::SGE;
  }
  case 0xE: { // ZF = 1 || SF <> OF
    return CondClass::SLE;
  }
  case 0xF: { // ZF = 0 && SF = OF
    return CondClass::SGT;
  }
  default:
    // Other conditions do not map directly, caller gets to deal with it.
    return std::nullopt;
  }
}

static bool ParityJumpIsJP(uint8_t OP) {
  LOGMAN_THROW_A_FMT(OP == 0xA || OP == 0xB, "JP or JNP");
  return OP == 0xA;
}

Ref OpDispatchBuilder::SelectCC0All1(uint8_t OP) {
  if (auto Cond = DecodeNZCVCondition(OP); Cond) {
    // Use raw select since DecodeNZCVCondition handles the carry invert
    return _NZCVSelect(OpSize::i64Bit, *Cond, _InlineConstant(~0ULL), _InlineConstant(0));
  } else {
    // Raw value contains inverted PF in bottom bit
    return _Sbfe(OpSize::i64Bit, 1, 0, LoadPFRaw(false, ParityJumpIsJP(OP)));
  }
}

void OpDispatchBuilder::SETccOp(OpcodeArgs) {
  CalculateDeferredFlags();

  Ref SrcCond;
  if (auto Cond = DecodeNZCVCondition(Op->OP & 0xf); Cond) {
    // Use raw select since DecodeNZCVCondition handles the carry invert
    SrcCond = _NZCVSelect01(*Cond);
  } else {
    SrcCond = LoadPFRaw(true, ParityJumpIsJP(Op->OP & 0xf));
  }

  StoreResultGPR(Op, SrcCond);
}

void OpDispatchBuilder::CMOVOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto OP = Op->OP & 0xF;
  const auto ResultSize = std::max(OpSize::i32Bit, OpSizeFromSrc(Op));

  CalculateDeferredFlags();

  // Destination is always a GPR.
  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GPRSize, Op->Flags);
  Ref Src {}, SrcCond {};
  if (Op->Src[0].IsGPR()) {
    Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], GPRSize, Op->Flags);
  } else {
    Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  }

  if (auto Cond = DecodeNZCVCondition(OP); Cond) {
    // Use raw select since DecodeNZCVCondition handles the carry invert
    SrcCond = _NZCVSelect(ResultSize, *Cond, Src, Dest);
  } else {
    // Raw value contains inverted PF in bottom bit
    Ref Cmp = LoadPFRaw(false, ParityJumpIsJP(OP));
    SaveNZCV();

    // Because we're only clobbering NZCV internally, we ignore all carry flag
    // shenanigans and just use the raw test and raw select.
    _TestNZ(OpSize::i32Bit, Cmp, _InlineConstant(1));
    SrcCond = _NZCVSelect(ResultSize, CondClass::NEQ, Src, Dest);
  }

  StoreResultGPR(Op, SrcCond);
}

void OpDispatchBuilder::CondJUMPOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;

  // Jump instruction only uses up to 32-bit signed displacement
  int64_t TargetOffset = Op->Src[0].Literal();
  uint64_t InstRIP = Op->PC + Op->InstSize;
  uint64_t Target = InstRIP + TargetOffset;

  if (GetGPROpSize() == OpSize::i32Bit) {
    // If the GPRSize is 4 then we need to be careful about PC wrapping
    if (TargetOffset < 0 && -TargetOffset > InstRIP) {
      // Invert the signed value if we are underflowing
      TargetOffset = 0x1'0000'0000ULL + TargetOffset;
    } else if (TargetOffset >= 0 && Target >= 0x1'0000'0000ULL) {
      // We are overflowing, wrap around
      TargetOffset = TargetOffset - 0x1'0000'0000ULL;
    }
    Target &= 0xFFFFFFFFU;
  }

  FlushRegisterCache();
  auto TrueBlock = JumpTargets.find(Target);
  auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);

  auto CurrentBlock = GetCurrentBlock();

  {
    IRPair<IR::IROp_CondJump> CondJump_;
    auto OP = Op->OP & 0xF;
    auto Cond = DecodeNZCVCondition(OP);
    if (Cond) {
      CondJump_ = CondJumpNZCV(*Cond);
    } else {
      LOGMAN_THROW_A_FMT(OP == 0xA || OP == 0xB, "only PF left");
      CondJump_ = CondJumpBit(LoadPFRaw(false, false), 0, OP == 0xB);
    }

    // Taking branch block
    if (TrueBlock != JumpTargets.end()) {
      SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      auto JumpTarget = CreateNewCodeBlockAtEnd();
      SetTrueJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Store the new RIP
      ExitRelocatedPC(Op, TargetOffset);
    }

    // Failure to take branch
    if (FalseBlock != JumpTargets.end()) {
      SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      // Place it after this block for fallthrough optimization
      auto JumpTarget = CreateNewCodeBlockAfter(CurrentBlock);
      SetFalseJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Leave block & store the new RIP
      ExitRelocatedPC(Op);
    }
  }
}

void OpDispatchBuilder::CondJUMPRCXOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;
  auto JcxGPRSize = GetGPROpSize();
  JcxGPRSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? (JcxGPRSize >> 1) : JcxGPRSize;

  uint64_t Target = Op->PC + Op->InstSize + Op->Src[0].Literal();

  Ref CondReg = LoadGPRRegister(X86State::REG_RCX, JcxGPRSize);

  auto TrueBlock = JumpTargets.find(Target);
  auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);

  auto CurrentBlock = GetCurrentBlock();

  {
    auto CondJump_ = CondJump(CondReg, CondClass::EQ);

    // Taking branch block
    if (TrueBlock != JumpTargets.end()) {
      SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      auto JumpTarget = CreateNewCodeBlockAtEnd();
      SetTrueJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Store the new RIP
      ExitRelocatedPC(Op, Op->Src[0].Literal());
    }

    // Failure to take branch
    if (FalseBlock != JumpTargets.end()) {
      SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      // Place it after the current block for fallthrough behavior
      auto JumpTarget = CreateNewCodeBlockAfter(CurrentBlock);
      SetFalseJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Leave block & store the new RIP
      ExitRelocatedPC(Op);
    }
  }
}

void OpDispatchBuilder::LoopOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  bool CheckZF = Op->OP != 0xE2;
  bool ZFTrue = Op->OP == 0xE1;

  BlockSetRIP = true;
  auto SrcSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? OpSize::i32Bit : OpSize::i64Bit;
  auto OpSize = SrcSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit;

  if (!Is64BitMode) {
    // RCX size is 32-bit or 16-bit when executing in 32-bit mode.
    SrcSize = IR::SizeToOpSize(IR::OpSizeToSize(SrcSize) >> 1);
    OpSize = OpSize::i32Bit;
  }

  uint64_t Target = Op->PC + Op->InstSize + Op->Src[1].Literal();

  Ref CondReg = LoadSourceGPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  CondReg = Sub(OpSize, CondReg, 1);
  StoreResultGPR(Op, Op->Src[0], CondReg);

  // If LOOPE then jumps to target if RCX != 0 && ZF == 1
  // If LOOPNE then jumps to target if RCX != 0 && ZF == 0
  //
  // To handle efficiently, smash RCX to zero if ZF is wrong (1 csel).
  if (CheckZF) {
    const auto cond = ZFTrue ? CondClass::EQ : CondClass::NEQ;
    CondReg = NZCVSelect(OpSize, cond, CondReg, _InlineConstant(0));
  }

  CalculateDeferredFlags();
  auto TrueBlock = JumpTargets.find(Target);
  auto FalseBlock = JumpTargets.find(Op->PC + Op->InstSize);

  {
    auto CondJump_ = CondJump(CondReg);

    // Taking branch block
    if (TrueBlock != JumpTargets.end()) {
      SetTrueJumpTarget(CondJump_, TrueBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      auto JumpTarget = CreateNewCodeBlockAtEnd();
      SetTrueJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Store the new RIP
      ExitRelocatedPC(Op, Op->Src[1].Literal());
    }

    // Failure to take branch
    if (FalseBlock != JumpTargets.end()) {
      SetFalseJumpTarget(CondJump_, FalseBlock->second.BlockEntry);
    } else {
      // Make sure to start a new block after ending this one
      // Place after this block for fallthrough behavior
      auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetFalseJumpTarget(CondJump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();

      // Leave block & store the new RIP
      ExitRelocatedPC(Op);
    }
  }
}

void OpDispatchBuilder::JUMPOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;

  // Jump instruction only uses up to 32-bit signed displacement
  int64_t TargetOffset = Op->Src[0].Literal();
  uint64_t InstRIP = Op->PC + Op->InstSize;
  uint64_t TargetRIP = InstRIP + TargetOffset;

  if (GetGPROpSize() == OpSize::i32Bit) {
    // If the GPRSize is 4 then we need to be careful about PC wrapping
    if (TargetOffset < 0 && -TargetOffset > InstRIP) {
      // Invert the signed value if we are underflowing
      TargetOffset = 0x1'0000'0000ULL + TargetOffset;
    } else if (TargetOffset >= 0 && TargetRIP >= 0x1'0000'0000ULL) {
      // We are overflowing, wrap around
      TargetOffset = TargetOffset - 0x1'0000'0000ULL;
    }

    TargetRIP &= 0xFFFFFFFFU;
  }

  CalculateDeferredFlags();
  // This is just an unconditional relative literal jump
  if (Multiblock) {
    auto JumpBlock = JumpTargets.find(TargetRIP);
    if (JumpBlock != JumpTargets.end()) {
      Jump(GetNewJumpBlock(TargetRIP));
    } else {
      // If the block isn't a jump target then we need to create an exit block
      auto Jump_ = Jump();

      // Place after this block for fallthrough behavior
      auto JumpTarget = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetJumpTarget(Jump_, JumpTarget);
      SetCurrentCodeBlock(JumpTarget);
      StartNewBlock();
      ExitRelocatedPC(Op, TargetOffset);
    }
  } else {
    ExitRelocatedPC(Op, TargetOffset);
  }
}

void OpDispatchBuilder::JUMPAbsoluteOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;
  // This is just an unconditional jump
  // This uses ModRM to determine its location
  // No way to use this effectively in multiblock
  auto RIPOffset = LoadSourceGPR(Op, Op->Src[0], Op->Flags);

  // Store the new RIP
  ExitFunction(RIPOffset);
}

void OpDispatchBuilder::JUMPFARIndirectOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;
  // This is just an unconditional jump
  // This uses ModRM to determine its location
  // No way to use this effectively in multiblock
  Ref Src = MakeSegmentAddress(Op, Op->Dest);
  AddressMode SrcCS = {.Base = Src, .Offset = 4, .AddrSize = OpSize::i64Bit};
  auto RIPOffset = _LoadMemGPRAutoTSO(OpSize::i32Bit, Src, OpSize::i8Bit);
  auto NewSegmentCS = _LoadMemGPRAutoTSO(OpSize::i16Bit, SrcCS, OpSize::i8Bit);

  // Set up the new CSSegment.
  _StoreContextGPR(OpSize::i16Bit, NewSegmentCS, offsetof(FEXCore::Core::CPUState, cs_idx));
  UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX);

  // Store the new RIP
  ExitFunction(RIPOffset);
}

void OpDispatchBuilder::CALLFARIndirectOp(OpcodeArgs) {
  const auto SrcSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit;

  // Calculate flags early.
  CalculateDeferredFlags();

  BlockSetRIP = true;

  Ref Src = MakeSegmentAddress(Op, Op->Dest);
  AddressMode SrcCS = {.Base = Src, .Offset = 4, .AddrSize = OpSize::i64Bit};
  auto RIPOffset = _LoadMemGPRAutoTSO(OpSize::i32Bit, Src, OpSize::i8Bit);
  auto NewSegmentCS = _LoadMemGPRAutoTSO(OpSize::i16Bit, SrcCS, OpSize::i8Bit);
  auto CurrentCS = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, cs_idx));

  auto NewRIP = GetRelocatedPC(Op);

  // Push the current CS
  Push(SrcSize, CurrentCS);

  // Push the return address.
  Push(SrcSize, NewRIP);

  // Set up the new CSSegment.
  _StoreContextGPR(OpSize::i16Bit, NewSegmentCS, offsetof(FEXCore::Core::CPUState, cs_idx));
  UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX);

  // Store the new RIP
  ExitFunction(RIPOffset);
}

void OpDispatchBuilder::RETFARIndirectOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto SrcSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit;

  Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
  Ref NewRIP = Pop(SrcSize, SP);
  Ref NewSegmentCS = Pop(SrcSize, SP);

  // Optional SP offset.
  if (Op->Src[0].IsLiteral()) {
    SP = Add(GPRSize, SP, Op->Src[0].Literal());
  }

  // Store the new stack pointer
  StoreGPRRegister(X86State::REG_RSP, SP);

  _StoreContextGPR(OpSize::i16Bit, NewSegmentCS, offsetof(FEXCore::Core::CPUState, cs_idx));
  UpdatePrefixFromSegment(NewSegmentCS, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX);

  // Store the new RIP
  ExitFunction(NewRIP);
  BlockSetRIP = true;
}

void OpDispatchBuilder::TESTOp(OpcodeArgs, uint32_t SrcIndex) {
  // TEST is an instruction that does an AND between the sources
  // Result isn't stored in result, only writes to flags
  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromDst(Op);
  LOGMAN_THROW_A_FMT(Size >= IR::OpSize::i8Bit && Size <= IR::OpSize::i64Bit, "Invalid size");

  uint64_t Const;
  bool AlwaysNonnegative = false;
  if (IsValueConstant(WrapNode(Src), &Const)) {
    // Optimize out masking constants
    if (Const == (Size == OpSize::i64Bit ? ~0ULL : ((1ull << IR::OpSizeAsBits(Size)) - 1))) {
      Src = Dest;
    }

    // Optimize test with non-sign bits
    AlwaysNonnegative = (Const & (1ull << (IR::OpSizeAsBits(Size) - 1))) == 0;
  }

  if (Dest == Src) {
    // Optimize out the AND.
    SetNZP_ZeroCV(Size, Src);
  } else if (Size < OpSize::i32Bit && AlwaysNonnegative) {
    // If we know the result is always nonnegative, we can use a 32-bit test.
    auto Res = _And(OpSize::i32Bit, Dest, Src);
    CalculatePF(Res);
    SetNZ_ZeroCV(OpSize::i32Bit, Res);
  } else {
    HandleNZ00Write();
    CalculatePF(_AndWithFlags(Size, Dest, Src));
  }

  InvalidateAF();
}

void OpDispatchBuilder::ARPLOp(OpcodeArgs) {
  // ARPL r/m16, r16
  // If the RPL field in the destination selector is less privileged than the
  // RPL field in the source selector, then adjust destination RPL to match
  // source RPL and set ZF=1. Otherwise ZF=0 and destination is unchanged.
  //
  // Only ZF is modified by ARPL.
  constexpr auto Size = OpSize::i16Bit;

  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, Size, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], Size, Op->Flags, {.AllowUpperGarbage = true});

  // RPL is the low two bits of the selector.
  Ref DestRPL = _Bfe(OpSize::i32Bit, 2, 0, Dest);
  Ref SrcRPL = _Bfe(OpSize::i32Bit, 2, 0, Src);

  // NeedUpdate is 1 when DestRPL < SrcRPL, else 0.
  Ref NeedUpdate = _Select(OpSize::i32Bit, OpSize::i32Bit, CondClass::ULT, DestRPL, SrcRPL, Constant(1), Constant(0));
  SetRFLAG<FEXCore::X86State::RFLAG_ZF_RAW_LOC>(NeedUpdate);

  // Compute adjusted destination selector: (Dest & ~3) | SrcRPL.
  auto NewDest = _Bfxil(OpSize::i32Bit, 2, 0, Dest, SrcRPL);

  // Conditionally select updated selector based on NeedUpdate.
  Ref FinalDest = _Select(OpSize::i32Bit, OpSize::i32Bit, CondClass::NEQ, NeedUpdate, Constant(0), NewDest, Dest);
  StoreResultGPR_WithOpSize(Op, Op->Dest, FinalDest, Size);
}

void OpDispatchBuilder::MOVSXDOp(OpcodeArgs) {
  // This instruction is a bit special
  // if SrcSize == 2
  //  Then lower 16 bits of destination is written without changing the upper 48 bits
  // else /* Size == 4 */
  //  if REX_WIDENING:
  //   Sext(32, Src)
  //  else
  //   Zext(32, Src)
  //
  auto Size = std::min<IR::OpSize>(OpSize::i32Bit, OpSizeFromSrc(Op));
  bool Sext = (Size != OpSize::i16Bit) && Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING;

  Ref Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], Size, Op->Flags, {.AllowUpperGarbage = Sext});
  if (Size == OpSize::i16Bit) {
    // This'll make sure to insert in to the lower 16bits without modifying upper bits
    StoreResultGPR_WithOpSize(Op, Op->Dest, Src, Size);
  } else if (Sext) {
    // With REX.W then Sext
    Src = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(Size), 0, Src);
    StoreResultGPR(Op, Src);
  } else {
    // Without REX.W then Zext (store result implicitly zero extends)
    StoreResultGPR(Op, Src);
  }
}

void OpDispatchBuilder::MOVSXOp(OpcodeArgs) {
  // Load garbage in upper bits, since we're sign extending anyway
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  // Sign-extend to DstSize and zero-extend to the register size, using a fast
  // path for 32-bit dests where the native 32-bit Sbfe zero extends the top.
  const auto DstSize = OpSizeFromDst(Op);
  Src = _Sbfe(DstSize == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Src);
  StoreResultGPR(Op, Op->Dest, Src);
}

void OpDispatchBuilder::MOVZXOp(OpcodeArgs) {
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  // Store result implicitly zero extends
  StoreResultGPR(Op, Src);
}

void OpDispatchBuilder::CMPOp(OpcodeArgs, uint32_t SrcIndex) {
  // CMP is an instruction that does a SUB between the sources
  // Result isn't stored in result, only writes to flags
  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true});
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  CalculateFlags_SUB(OpSizeFromSrc(Op), Dest, Src);
}

void OpDispatchBuilder::CQOOp(OpcodeArgs) {
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto Size = OpSizeFromSrc(Op);
  Ref Upper = _Sbfe(std::max(OpSize::i32Bit, Size), 1, GetSrcBitSize(Op) - 1, Src);

  StoreResultGPR(Op, Upper);
}

void OpDispatchBuilder::XCHGOp(OpcodeArgs) {
  // Load both the source and the destination
  if (Op->OP == 0x90 && Op->Src[0].IsGPR() && Op->Src[0].Data.GPR.GPR == FEXCore::X86State::REG_RAX && Op->Dest.IsGPR() &&
      Op->Dest.Data.GPR.GPR == FEXCore::X86State::REG_RAX) {
    // This is one heck of a sucky special case
    // If we are the 0x90 XCHG opcode (Meaning source is GPR RAX)
    // and destination register is ALSO RAX
    // and in this very specific case we are 32bit or above
    // Then this is a no-op
    // This is because 0x90 without a prefix is technically `xchg eax, eax`
    // But this would result in a zext on 64bit, which would ruin the no-op nature of the instruction
    // So x86-64 spec mandates this special case that even though it is a 32bit instruction and
    // is supposed to zext the result, it is a true no-op
    //
    // x86 spec text here:
    //
    //    XCHG (E)AX, (E)AX (encoded instruction byte is 90H) is an alias for
    //    NOP regardless of data size prefixes, including REX.W.
    //
    // Note that also includes 16-bit so we don't gate this on size. The
    // sequence (66 90) is a valid two-byte nop that we also ignore.
    if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX) {
      // If this instruction has a REP prefix then this is architecturally
      // defined to be a `PAUSE` instruction. On older processors this ends up
      // being a true `REP NOP` which is why they stuck this here.
      _Yield();
    }
    return;
  }

  // AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult.
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  if (DestIsMem(Op)) {
    HandledLock = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0;

    Ref Dest = MakeSegmentAddress(Op, Op->Dest);
    if (IsMonoBackpatcherBlock) {
      _MonoBackpatcherWrite(OpSizeFromSrc(Op), Src, Dest);
    } else {
      auto Result = _AtomicSwap(OpSizeFromSrc(Op), Src, Dest);
      StoreResultGPR(Op, Op->Src[0], Result);
    }
  } else {
    // AllowUpperGarbage: OK to allow as it will be overwritten by StoreResult.
    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

    // Swap the contents
    // Order matters here since we don't want to swap context contents for one that effects the other
    StoreResultGPR(Op, Op->Dest, Src);
    StoreResultGPR(Op, Op->Src[0], Dest);
  }
}

void OpDispatchBuilder::CDQOp(OpcodeArgs) {
  const auto DstSize = OpSizeFromDst(Op);
  const auto SrcSize = DstSize / 2;
  Ref Src = LoadGPRRegister(X86State::REG_RAX, SrcSize, 0, true);

  Src = _Sbfe(DstSize <= OpSize::i32Bit ? OpSize::i32Bit : OpSize::i64Bit, IR::OpSizeAsBits(SrcSize), 0, Src);

  StoreResultGPR_WithOpSize(Op, Op->Dest, Src, DstSize);
}

void OpDispatchBuilder::SAHFOp(OpcodeArgs) {
  // Extract AH
  Ref Src = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit, 8);

  // Clear bits that aren't supposed to be set
  Src = _Andn(OpSize::i64Bit, Src, Constant(0b101000));

  // Set the bit that is always set here
  Src = _Or(OpSize::i64Bit, Src, _InlineConstant(0b10));

  // Store the lower 8 bits in to RFLAGS
  SetPackedRFLAG(true, Src);
}
void OpDispatchBuilder::LAHFOp(OpcodeArgs) {
  // Load the lower 8 bits of the Rflags register
  auto RFLAG = GetPackedRFLAG(0xFF);

  // Store the lower 8 bits of the rflags register in to AH
  StoreGPRRegister(X86State::REG_RAX, RFLAG, OpSize::i8Bit, 8);
}

void OpDispatchBuilder::FLAGControlOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  switch (Op->OP) {
  case 0xF5: // CMC
    CarryInvert();
    break;
  case 0xF8: // CLC
    SetCFInverted(Constant(1));
    break;
  case 0xF9: // STC
    SetCFInverted(Constant(0));
    break;
  case 0xFC: // CLD
    // Transformed
    StoreDF(Constant(1));
    break;
  case 0xFD: // STD
    StoreDF(Constant(-1));
    break;
  }
}


void OpDispatchBuilder::MOVSegOp(OpcodeArgs, bool ToSeg) {
  // In x86-64 mode the accesses to the segment registers end up being constant zero moves
  // Aside from FS/GS
  // In x86-64 mode the accesses to segment registers can actually still touch the segments
  // These write to the selector portion of the register
  //
  // FS and GS are specially handled here though
  // AMD documentation is /wrong/ in this regard
  // AMD documentation claims that the MOV to SReg and POP SReg registers will load a 32bit
  // value in to the HIDDEN portions of the FS and GS registers /OR/ ignored if a null selector is
  // selected for the registers
  // This statement is actually untrue, the instructions will /actually/ load 16bits in to the selector portion of the register!
  // Tested on a Zen+ CPU, the selector is the portion that is modified!
  // We don't currently support FS/GS selector modifying, so this needs to be asserted out
  // The loads here also load the selector, NOT the base

  if (ToSeg) {
    Ref Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], OpSize::i16Bit, Op->Flags);

    switch (Op->Dest.Data.GPR.GPR) {
    case FEXCore::X86State::REG_RAX: // ES
    case FEXCore::X86State::REG_R8:  // ES
      _StoreContextGPR(OpSize::i16Bit, Src, offsetof(FEXCore::Core::CPUState, es_idx));
      UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX);
      break;
    case FEXCore::X86State::REG_RBX: // DS
    case FEXCore::X86State::REG_R11: // DS
      _StoreContextGPR(OpSize::i16Bit, Src, offsetof(FEXCore::Core::CPUState, ds_idx));
      UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);
      break;
    case FEXCore::X86State::REG_RCX: // CS
    case FEXCore::X86State::REG_R9:  // CS
      // CPL3 can't write to this
      Break(FEXCore::IR::BreakDefinition {
        .ErrorRegister = 0,
        .Signal = SIGILL,
        .TrapNumber = 0,
        .si_code = 0,
      });
      break;
    case FEXCore::X86State::REG_RDX: // SS
    case FEXCore::X86State::REG_R10: // SS
      _StoreContextGPR(OpSize::i16Bit, Src, offsetof(FEXCore::Core::CPUState, ss_idx));
      UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX);
      break;
    case FEXCore::X86State::REG_RBP: // GS
    case FEXCore::X86State::REG_R13: // GS
      if (!Is64BitMode) {
        _StoreContextGPR(OpSize::i16Bit, Src, offsetof(FEXCore::Core::CPUState, gs_idx));
        UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX);
      } else {
        LogMan::Msg::EFmt("We don't support modifying GS selector in 64bit mode!");
        DecodeFailure = true;
      }
      break;
    case FEXCore::X86State::REG_RSP: // FS
    case FEXCore::X86State::REG_R12: // FS
      if (!Is64BitMode) {
        _StoreContextGPR(OpSize::i16Bit, Src, offsetof(FEXCore::Core::CPUState, fs_idx));
        UpdatePrefixFromSegment(Src, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX);
      } else {
        LogMan::Msg::EFmt("We don't support modifying FS selector in 64bit mode!");
        DecodeFailure = true;
      }
      break;
    default: UnimplementedOp(Op); return;
    }
  } else {
    Ref Segment {};

    switch (Op->Src[0].Data.GPR.GPR) {
    case FEXCore::X86State::REG_RAX: // ES
    case FEXCore::X86State::REG_R8:  // ES
      Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, es_idx));
      break;
    case FEXCore::X86State::REG_RBX: // DS
    case FEXCore::X86State::REG_R11: // DS
      Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, ds_idx));
      break;
    case FEXCore::X86State::REG_RCX: // CS
    case FEXCore::X86State::REG_R9:  // CS
      Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, cs_idx));
      break;
    case FEXCore::X86State::REG_RDX: // SS
    case FEXCore::X86State::REG_R10: // SS
      Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, ss_idx));
      break;
    case FEXCore::X86State::REG_RBP: // GS
    case FEXCore::X86State::REG_R13: // GS
      if (Is64BitMode) {
        Segment = Constant(0);
      } else {
        Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, gs_idx));
      }
      break;
    case FEXCore::X86State::REG_RSP: // FS
    case FEXCore::X86State::REG_R12: // FS
      if (Is64BitMode) {
        Segment = Constant(0);
      } else {
        Segment = _LoadContextGPR(OpSize::i16Bit, offsetof(FEXCore::Core::CPUState, fs_idx));
      }
      break;
    default: UnimplementedOp(Op); return;
    }
    if (DestIsMem(Op)) {
      // If the destination is memory then we always store 16-bits only
      StoreResultGPR_WithOpSize(Op, Op->Dest, Segment, OpSize::i16Bit);
    } else {
      // If the destination is a GPR then we follow register storing rules
      StoreResultGPR(Op, Segment);
    }
  }
}

void OpDispatchBuilder::MOVOffsetOp(OpcodeArgs) {
  switch (Op->OP) {
  case 0xA0:
  case 0xA1: {
    // Source is memory(literal)
    // Dest is GPR
    auto Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.ForceLoad = true});
    StoreResultGPR(Op, Op->Dest, Src);
    break;
  }
  case 0xA2:
  case 0xA3: {
    // Source is GPR
    // Dest is memory(literal)
    Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

    // This one is a bit special since the destination is a literal
    // So the destination gets stored in Src[1]
    StoreResultGPR(Op, Op->Src[1], Src);
    break;
  }
  }
}

void OpDispatchBuilder::CPUIDOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();

  Ref Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], GPRSize, Op->Flags);
  Ref Leaf = LoadGPRRegister(X86State::REG_RCX);

  Ref RAX = _AllocateGPR(false);
  Ref RBX = _AllocateGPR(false);
  Ref RCX = _AllocateGPR(false);
  Ref RDX = _AllocateGPR(false);

  _CPUID(Src, Leaf, RAX, RBX, RCX, RDX);

  StoreGPRRegister(X86State::REG_RAX, RAX);
  StoreGPRRegister(X86State::REG_RBX, RBX);
  StoreGPRRegister(X86State::REG_RCX, RCX);
  StoreGPRRegister(X86State::REG_RDX, RDX);
}

uint32_t OpDispatchBuilder::GetConstantShift(X86Tables::DecodedOp Op, bool Is1Bit) {
  if (Is1Bit) {
    return 1;
  } else {
    // x86 masks the shift by 0x3F or 0x1F depending on size of op
    const auto Size = OpSizeFromSrc(Op);
    uint64_t Mask = Size == OpSize::i64Bit ? 0x3F : 0x1F;

    return Op->Src[1].Literal() & Mask;
  }
}

void OpDispatchBuilder::XGetBVOp(OpcodeArgs) {
  Ref Function = LoadGPRRegister(X86State::REG_RCX);

  auto RAX = _AllocateGPR(false);
  auto RDX = _AllocateGPR(false);
  _XGetBV(Function, RAX, RDX);

  StoreGPRRegister(X86State::REG_RAX, RAX);
  StoreGPRRegister(X86State::REG_RDX, RDX);
}

void OpDispatchBuilder::SHLOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  auto Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  Ref Result = _Lshl(Size == OpSize::i64Bit ? OpSize::i64Bit : OpSize::i32Bit, Dest, Src);
  HandleShift(Op, Result, Dest, ShiftType::LSL, Src);
}

void OpDispatchBuilder::SHLImmediateOp(OpcodeArgs, bool SHL1Bit) {
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

  uint64_t Shift = GetConstantShift(Op, SHL1Bit);
  const auto Size = GetSrcBitSize(Op);

  Ref Result = _Lshl(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Constant(Shift));

  CalculateFlags_ShiftLeftImmediate(OpSizeFromSrc(Op), Result, Dest, Shift);
  CalculateDeferredFlags();
  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::SHROp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= OpSize::i32Bit});
  auto Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  auto ALUOp = _Lshr(std::max(OpSize::i32Bit, Size), Dest, Src);
  HandleShift(Op, ALUOp, Dest, ShiftType::LSR, Src);
}

void OpDispatchBuilder::SHRImmediateOp(OpcodeArgs, bool SHR1Bit) {
  const auto Size = GetSrcBitSize(Op);
  auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32});

  uint64_t Shift = GetConstantShift(Op, SHR1Bit);
  auto ALUOp = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Constant(Shift));

  CalculateFlags_ShiftRightImmediate(OpSizeFromSrc(Op), ALUOp, Dest, Shift);
  CalculateDeferredFlags();
  StoreResultGPR(Op, ALUOp);
}

void OpDispatchBuilder::SHLDOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  const auto Size = GetSrcBitSize(Op);

  // Allow garbage on the Src if it will be ignored by the Lshr below
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = Size >= 32});
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);

  // Allow garbage on the shift, we're masking it anyway.
  Ref Shift = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  // x86 masks the shift by 0x3F or 0x1F depending on size of op.
  if (Size == 64) {
    Shift = _And(OpSize::i64Bit, Shift, _InlineConstant(0x3F));
  } else {
    Shift = _And(OpSize::i64Bit, Shift, _InlineConstant(0x1F));
  }

  // a64 masks the bottom bits, so if we're using a native 32/64-bit shift, we
  // can negate to do the subtract (it's congruent), which saves a constant.
  auto ShiftRight = Size >= 32 ? _Neg(OpSize::i64Bit, Shift) : Sub(OpSize::i64Bit, Constant(Size), Shift);

  auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, Shift);
  auto Tmp2 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Src, ShiftRight);

  Ref Res = _Or(OpSize::i64Bit, Tmp1, Tmp2);

  // If shift count was zero then output doesn't change
  // Needs to be checked for the 32bit operand case
  // where shift = 0 and the source register still gets Zext
  //
  // TODO: With a backwards pass ahead-of-time, we could stick this in the
  // if(shift) used for flags.
  //
  // TODO: This whole function wants to be wrapped in the if. Maybe b/w pass is
  // a good idea after all.
  Res = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, Shift, Constant(0), Dest, Res);

  HandleShift(Op, Res, Dest, ShiftType::LSL, Shift);
}

void OpDispatchBuilder::SHLDImmediateOp(OpcodeArgs) {
  uint64_t Shift = GetConstantShift(Op, false);
  const auto Size = GetSrcBitSize(Op);

  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = Size >= 32});
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32});

  if (Shift != 0) {
    Ref Res {};
    if (Size < 32) {
      Ref ShiftLeft = Constant(Shift);
      auto ShiftRight = Size - Shift;

      auto Tmp1 = _Lshl(OpSize::i64Bit, Dest, ShiftLeft);
      Ref Tmp2 = ShiftRight ? _Lshr(OpSize::i32Bit, Src, Constant(ShiftRight)) : Src;

      Res = _Or(OpSize::i64Bit, Tmp1, Tmp2);
    } else {
      // 32-bit and 64-bit SHLD behaves like an EXTR where the lower bits are filled from the source.
      Res = _Extr(OpSizeFromSrc(Op), Dest, Src, Size - Shift);
    }

    CalculateFlags_ShiftLeftImmediate(OpSizeFromSrc(Op), Res, Dest, Shift);
    CalculateDeferredFlags();
    StoreResultGPR(Op, Res);
  } else if (Shift == 0 && Size == 32) {
    // Ensure Zext still occurs
    StoreResultGPR(Op, Dest);
  }
}

void OpDispatchBuilder::SHRDOp(OpcodeArgs) {
  // Calculate flags early.
  // This instruction conditionally generates flags so we need to insure sane state going in.
  CalculateDeferredFlags();

  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);

  Ref Shift = LoadGPRRegister(X86State::REG_RCX);

  const auto Size = GetDstBitSize(Op);

  // x86 masks the shift by 0x3F or 0x1F depending on size of op
  if (Size == 64) {
    Shift = _And(OpSize::i64Bit, Shift, _InlineConstant(0x3F));
  } else {
    Shift = _And(OpSize::i64Bit, Shift, _InlineConstant(0x1F));
  }

  auto ShiftLeft = Sub(OpSize::i64Bit, Constant(Size), Shift);

  auto Tmp1 = _Lshr(Size == 64 ? OpSize::i64Bit : OpSize::i32Bit, Dest, Shift);
  auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft);

  Ref Res = _Or(OpSize::i64Bit, Tmp1, Tmp2);

  // If shift count was zero then output doesn't change
  // Needs to be checked for the 32bit operand case
  // where shift = 0 and the source register still gets Zext
  Res = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, Shift, Constant(0), Dest, Res);

  HandleShift(Op, Res, Dest, ShiftType::LSR, Shift);
}

void OpDispatchBuilder::SHRDImmediateOp(OpcodeArgs) {
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);

  uint64_t Shift = GetConstantShift(Op, false);
  const auto Size = GetSrcBitSize(Op);

  if (Shift != 0) {
    Ref Res {};
    if (Size < 32) {
      Ref ShiftRight = Constant(Shift);
      auto ShiftLeft = Constant(Size - Shift);

      auto Tmp1 = _Lshr(OpSize::i32Bit, Dest, ShiftRight);
      auto Tmp2 = _Lshl(OpSize::i64Bit, Src, ShiftLeft);

      Res = _Or(OpSize::i64Bit, Tmp1, Tmp2);
    } else {
      // 32-bit and 64-bit SHRD behaves like an EXTR where the upper bits are filled from the source.
      Res = _Extr(OpSizeFromSrc(Op), Src, Dest, Shift);
    }

    StoreResultGPR(Op, Res);
    CalculateFlags_ShiftRightDoubleImmediate(OpSizeFromSrc(Op), Res, Dest, Shift);
  } else if (Shift == 0 && Size == 32) {
    // Ensure Zext still occurs
    StoreResultGPR(Op, Dest);
  }
}

void OpDispatchBuilder::ASHROp(OpcodeArgs, bool Immediate, bool SHR1Bit) {
  const auto Size = OpSizeFromSrc(Op);
  const auto OpSize = std::max(OpSize::i32Bit, OpSizeFromDst(Op));

  // If Size < 4, then we Sbfe the Dest so we can have garbage.
  // Otherwise, if Size = Opsize, then both are 4 or 8 and match the a64
  // semantics directly, so again we can have garbage. The only case where we
  // need zero-extension here is when the sizes mismatch.
  auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = (OpSize == Size) || (Size < OpSize::i32Bit)});

  if (Size < OpSize::i32Bit) {
    Dest = _Sbfe(OpSize::i64Bit, IR::OpSizeAsBits(Size), 0, Dest);
  }

  if (Immediate) {
    uint64_t Shift = GetConstantShift(Op, SHR1Bit);
    Ref Result = _Ashr(OpSize, Dest, Constant(Shift));

    CalculateFlags_SignShiftRightImmediate(OpSizeFromSrc(Op), Result, Dest, Shift);
    CalculateDeferredFlags();
    StoreResultGPR(Op, Result);
  } else {
    auto Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
    Ref Result = _Ashr(OpSize, Dest, Src);

    HandleShift(Op, Result, Dest, ShiftType::ASR, Src);
  }
}

void OpDispatchBuilder::RotateOp(OpcodeArgs, bool Left, bool IsImmediate, bool Is1Bit) {
  CalculateDeferredFlags();

  const uint32_t Size = GetSrcBitSize(Op);
  const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit;
  uint64_t UnmaskedConst {};

  // x86 masks the shift by 0x3F or 0x1F depending on size of op. But it's
  // equivalent to mask to the actual size of the op, that way we can bound
  // things tighter for 8-bit later in the function.
  uint64_t Mask = Size == 8 ? 7 : (Size == 64 ? 0x3F : 0x1F);

  ArithRef UnmaskedSrc;
  if (Is1Bit || IsImmediate) {
    UnmaskedConst = GetConstantShift(Op, Is1Bit);
    UnmaskedSrc = ARef(UnmaskedConst);
  } else {
    UnmaskedSrc = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}));
  }
  auto Src = UnmaskedSrc.And(Mask);

  // We fill the upper bits so we allow garbage on load.
  auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

  if (Size < 32) {
    // ARM doesn't support 8/16bit rotates. Emulate with an insert
    // StoreResult truncates back to a 8/16 bit value
    Dest = _Bfi(OpSize, Size, Left ? (32 - Size) : Size, Dest, Dest);
  }

  // To rotate 64-bits left, right-rotate by (64 - Shift) = -Shift mod 64.
  auto Res = _Ror(OpSize, Dest, (Left ? Src.Neg() : Src).Ref());
  StoreResultGPR(Op, Res);

  if (Is1Bit || IsImmediate) {
    if (UnmaskedSrc.C) {
      // Extract the last bit shifted in to CF
      SetCFDirect(Res, Left ? 0 : Size - 1, true);

      // For ROR, OF is the XOR of the new CF bit and the most significant bit of the result.
      // For ROL, OF is the LSB and MSB XOR'd together.
      // OF is architecturally only defined for 1-bit rotate.
      if (UnmaskedSrc.C == 1) {
        auto NewOF = _XorShift(OpSize, Res, Res, ShiftType::LSR, Left ? Size - 1 : 1);
        SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Left ? 0 : Size - 2, true);
      }
    }
  } else {
    HandleNZCVWrite();
    RectifyCarryInvert(true);

    // We deferred the masking for 8-bit to the flag section, do it here.
    if (Size == 8) {
      Src = UnmaskedSrc.And(0x1F);
    }

    _RotateFlags(OpSizeFromSrc(Op), Res, Src.Ref(), Left);
  }
}

void OpDispatchBuilder::ANDNBMIOp(OpcodeArgs) {
  auto* Src1 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Src2 = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  auto Dest = _Andn(OpSizeFromSrc(Op), Src2, Src1);

  StoreResultGPR(Op, Dest);
  CalculateFlags_Logical(OpSizeFromSrc(Op), Dest);
}

void OpDispatchBuilder::BEXTRBMIOp(OpcodeArgs) {
  // Essentially (Src1 >> Start) & ((1 << Length) - 1)
  // along with some edge-case handling and flag setting.

  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Src1 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Src2 = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
  const auto SrcSize = IR::OpSizeAsBits(Size);
  const auto MaxSrcBit = SrcSize - 1;
  auto MaxSrcBitOp = Constant(MaxSrcBit);

  // Shift the operand down to the starting bit
  auto Start = _Bfe(OpSizeFromSrc(Op), 8, 0, Src2);
  auto Shifted = _Lshr(Size, Src1, Start);

  // Shifts larger than operand size need to be set to zero.
  auto SanitizedShifted = _Select(Size, Size, CondClass::ULE, Start, MaxSrcBitOp, Shifted, Constant(0));

  // Now handle the length specifier.
  auto Length = _Bfe(Size, 8, 8, Src2);

  // Now build up the mask
  // (1 << Length) - 1 = ~(~0 << Length)
  auto AllOnes = Constant(~0ull);
  auto InvertedMask = _Lshl(Size, AllOnes, Length);

  // Now put it all together and make the result.
  auto Masked = _Andn(Size, SanitizedShifted, InvertedMask);

  // Sanitize the length. If it is above the max, we don't do the masking.
  auto Dest = _Select(Size, Size, CondClass::ULE, Length, MaxSrcBitOp, Masked, SanitizedShifted);

  // Finally store the result.
  StoreResultGPR(Op, Dest);

  // ZF is set properly. CF and OF are defined as being set to zero. SF, PF, and
  // AF are undefined.
  SetNZ_ZeroCV(GetOpSize(Dest), Dest);
  InvalidatePF_AF();
}

void OpDispatchBuilder::BLSIBMIOp(OpcodeArgs) {
  // Equivalent to performing: SRC & -SRC
  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  const auto Size = OpSizeFromSrc(Op);

  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto NegatedSrc = _Neg(Size, Src);
  auto Result = _And(Size, Src, NegatedSrc);

  StoreResultGPR(Op, Result);

  // CF is cleared if Src is zero, otherwise it's set. However, Src is zero iff
  // Result is zero, so we can test the result instead. So, CF is just the
  // inverted ZF.
  //
  // ZF/SF/OF set as usual.
  SetNZ_ZeroCV(Size, Result);
  InvalidatePF_AF();
  SetCFInverted(GetRFLAG(X86State::RFLAG_ZF_RAW_LOC));
}

void OpDispatchBuilder::BLSMSKBMIOp(OpcodeArgs) {
  // Equivalent to: (Src - 1) ^ Src
  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  const auto Size = OpSizeFromSrc(Op);

  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _Xor(Size, Sub(Size, Src, 1), Src);

  StoreResultGPR(Op, Result);
  InvalidatePF_AF();

  // CF set according to the Src
  auto CFInv = To01(OpSize::i64Bit, Src);

  // The output of BLSMSK is always nonzero, so TST will clear Z (along with C
  // and O) while setting S.
  SetNZ_ZeroCV(Size, Result);
  SetCFInverted(CFInv);
}

void OpDispatchBuilder::BLSRBMIOp(OpcodeArgs) {
  // Equivalent to: (Src - 1) & Src
  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  const auto Size = OpSizeFromSrc(Op);

  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _And(Size, Sub(Size, Src, 1), Src);

  StoreResultGPR(Op, Result);

  auto CFInv = To01(OpSize::i64Bit, Src);

  SetNZ_ZeroCV(Size, Result);
  SetCFInverted(CFInv);
  InvalidatePF_AF();
}

// Handles SARX, SHLX, and SHRX
void OpDispatchBuilder::BMI2Shift(OpcodeArgs) {
  // In the event the source is a memory operand, use the
  // exact width instead of the GPR size.
  const auto GPRSize = GetGPROpSize();
  const auto Size = OpSizeFromSrc(Op);
  const auto SrcSize = Op->Src[0].IsGPR() ? GPRSize : Size;

  auto* Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], SrcSize, Op->Flags);
  auto* Shift = LoadSourceGPR_WithOpSize(Op, Op->Src[1], GPRSize, Op->Flags, {.AllowUpperGarbage = true});

  Ref Result;
  if (Op->OP == 0x6F7) {
    // SARX
    Result = _Ashr(Size, Src, Shift);
  } else if (Op->OP == 0x5F7) {
    // SHLX
    Result = _Lshl(Size, Src, Shift);
  } else {
    // SHRX
    Result = _Lshr(Size, Src, Shift);
  }

  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::BZHI(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  const auto OperandSize = IR::OpSizeAsBits(Size);

  // In 32-bit mode we only look at bottom 32-bit, no 8 or 16-bit BZHI so no
  // need to zero-extend sources
  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  auto* Index = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  // Clear the high bits specified by the index. A64 only considers bottom bits
  // of the shift, so we don't need to mask bottom 8-bits ourselves.
  // Out-of-bounds results ignored after.
  auto Mask = _Lshl(Size, Constant(-1), Index);
  auto MaskResult = _Andn(Size, Src, Mask);

  // If the index is above OperandSize, we don't clear anything. BZHI only
  // considers the bottom 8-bits, so we really want to know if the bottom 8-bits
  // have their top bits set. Test exactly that.
  //
  // Because we're clobbering flags internally we ignore all carry invert
  // shenanigans and use the raw versions here.
  _TestNZ(OpSize::i64Bit, Index, Constant(0xFF & ~(OperandSize - 1)));
  auto Result = _NZCVSelect(Size, CondClass::NEQ, Src, MaskResult);
  StoreResultGPR(Op, Result);

  auto CFInv = _NZCVSelect01(CondClass::EQ);

  InvalidatePF_AF();
  SetNZ_ZeroCV(Size, Result);
  SetCFInverted(CFInv);
}

void OpDispatchBuilder::RORX(OpcodeArgs) {
  const auto SrcSize = OpSizeFromSrc(Op);
  const auto SrcSizeBits = IR::OpSizeAsBits(SrcSize);
  const auto Amount = Op->Src[1].Literal() & (SrcSizeBits - 1);
  const auto GPRSize = GetGPROpSize();

  const auto DoRotation = Amount != 0 && Amount < SrcSizeBits;
  const auto IsSameGPR = Op->Src[0].IsGPR() && Op->Dest.IsGPR() && Op->Src[0].Data.GPR.GPR == Op->Dest.Data.GPR.GPR;
  const auto SrcSizeIsGPRSize = SrcSize == GPRSize;

  // If we don't need to rotate and our source is the same as the destination
  // then we don't need to do anything at all. We still need to be careful,
  // since 32-bit operations on 64-bit mode still need to zero-extend the
  // destination register. So also compare source size and GPR size.
  //
  // Very unlikely, but hey, we can do nothing faster.
  if (!DoRotation && IsSameGPR && SrcSizeIsGPRSize) [[unlikely]] {
    return;
  }

  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Result = Src;
  if (DoRotation) [[likely]] {
    Result = _Ror(OpSizeFromSrc(Op), Src, _InlineConstant(Amount));
  }

  StoreResultGPR(Op, Result);
}

void OpDispatchBuilder::MULX(OpcodeArgs) {
  // RDX is the implied source operand in the instruction
  const auto OpSize = OpSizeFromSrc(Op);

  // Src1 can be a memory operand, so ensure we constrain to the
  // absolute width of the access in that scenario.
  const auto GPRSize = GetGPROpSize();
  const auto Src1Size = Op->Src[1].IsGPR() ? GPRSize : OpSize;

  Ref Src1 = LoadSourceGPR_WithOpSize(Op, Op->Src[1], Src1Size, Op->Flags);
  Ref Src2 = LoadGPRRegister(X86State::REG_RDX, GPRSize);

  // As per the Intel Software Development Manual, if the destination and
  // first operand correspond to the same register, then the result
  // will be the high half of the multiplication result.
  if (Op->Dest.Data.GPR.GPR == Op->Src[0].Data.GPR.GPR) {
    Ref ResultHi = _UMulH(OpSize, Src1, Src2);
    StoreResultGPR(Op, Op->Dest, ResultHi);
  } else {
    Ref ResultLo = _UMul(OpSize, Src1, Src2);
    Ref ResultHi = _UMulH(OpSize, Src1, Src2);

    StoreResultGPR(Op, Op->Src[0], ResultLo);
    StoreResultGPR(Op, Op->Dest, ResultHi);
  }
}

void OpDispatchBuilder::PDEP(OpcodeArgs) {
  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Input = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Mask = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _PDep(OpSizeFromSrc(Op), Input, Mask);

  StoreResultGPR(Op, Op->Dest, Result);
}

void OpDispatchBuilder::PEXT(OpcodeArgs) {
  LOGMAN_THROW_A_FMT(Op->InstSize >= 4, "No masking needed");
  auto* Input = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Mask = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  auto Result = _PExt(OpSizeFromSrc(Op), Input, Mask);

  StoreResultGPR(Op, Op->Dest, Result);
}

void OpDispatchBuilder::ADXOp(OpcodeArgs) {
  const auto OpSize = OpSizeFromSrc(Op);

  // Only 32/64-bit anyway so allow garbage, we use 32-bit ops.
  auto* Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  auto* Before = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

  // Handles ADCX and ADOX
  const bool IsADCX = Op->OP == 0x1F6;
  auto Zero = Constant(0);

  // Before we go trashing NZCV, save the current NZCV state.
  Ref OldNZCV = GetNZCV();

  // We want to use arm64 adc. For ADOX, copy the overflow flag into CF.  For
  // ADCX, we just rectify the carry.
  if (IsADCX) {
    RectifyCarryInvert(false);
  } else {
    // If overflow, 0 - 0 sets carry. Else, forces carry to 0.
    _CondSubNZCV(OpSize::i32Bit, Zero, Zero, CondClass::FU, 0x0 /* nzcv */);
  }

  // Do the actual add.
  HandleNZCV_RMW();
  auto Result = _AdcWithFlags(OpSize, Src, Before);
  StoreResultGPR(Op, Result);

  // Now restore all flags except the one we're updating.
  if (CTX->HostFeatures.SupportsFlagM) {
    // For ADOX, we need to copy the new carry into the overflow flag. If carry is clear (ULT with uninverted
    // carry), 0 - 0 clears overflow. Else, force overflow on.
    if (!IsADCX) {
      _CondSubNZCV(OpSize::i32Bit, Zero, Zero, CondClass::ULT, 0x1 /* nzcV */);
    }

    _RmifNZCV(OldNZCV, 28, IsADCX ? 0xd /* NzcV */ : 0xe /* NZCv */);
  } else {
    // For either operation, insert the new flag into the old NZCV.
    bool SavedCFInvert = CFInverted;
    CFInverted = false;
    Ref OutputCF = GetRFLAG(X86State::RFLAG_CF_RAW_LOC, IsADCX);
    CFInverted = IsADCX ? true : SavedCFInvert;

    Ref NewNZCV = _Bfi(OpSize::i32Bit, 1, IsADCX ? 29 : 28, OldNZCV, OutputCF);
    SetNZCV(NewNZCV);
  }
}

void OpDispatchBuilder::RCROp1Bit(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  // We expliclty mask for <32-bit so allow garbage
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  const auto Size = GetSrcBitSize(Op);
  auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
  Ref Res;

  // Our new CF will be bit 0 of the source. Set upfront to avoid a move.
  SetCFDirect(Dest, 0, true);

  uint32_t Shift = 1;

  if (Size == 32 || Size == 64) {
    // Rotate and insert CF in the upper bit
    Res = _Extr(OpSizeFromSrc(Op), CF, Dest, Shift);
  } else {
    // Res = Src >> Shift
    Res = _Bfe(OpSize::i32Bit, Size - Shift, Shift, Dest);

    // inject the CF
    Res = _Orlshl(OpSize::i32Bit, Res, CF, Size - Shift);
  }

  StoreResultGPR(Op, Res);

  // OF is the top two MSBs XOR'd together
  // Only when Shift == 1, it is undefined otherwise
  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(_XorShift(OpSize::i64Bit, Res, Res, ShiftType::LSR, 1), Size - 2, true);
}

void OpDispatchBuilder::RCROp8x1Bit(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);
  const auto SizeBit = GetSrcBitSize(Op);
  auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

  // Our new CF will be bit (Shift - 1) of the source
  SetCFDirect(Dest, 0, true);

  // Rotate and insert CF in the upper bit
  Ref Res = _Bfe(OpSize::i32Bit, 7, 1, Dest);
  Res = _Bfi(OpSize::i32Bit, 1, 7, Res, CF);

  StoreResultGPR(Op, Res);

  // OF is the top two MSBs XOR'd together
  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(_XorShift(OpSize::i32Bit, Res, Res, ShiftType::LSR, 1), SizeBit - 2, true);
}

void OpDispatchBuilder::RCROp(OpcodeArgs) {
  const auto Size = GetSrcBitSize(Op);

  if (Size == 8 || Size == 16) {
    RCRSmallerOp(Op);
    return;
  }

  const auto Mask = (Size == 64) ? 0x3F : 0x1F;

  // Calculate flags early.
  CalculateDeferredFlags();
  const auto OpSize = OpSizeFromSrc(Op);

  Ref Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  uint64_t Const;
  if (IsValueConstant(WrapNode(Src), &Const)) {
    Const &= Mask;
    if (!Const) {
      ZeroShiftResult(Op);
      return;
    }

    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

    // Res = Src >> Shift
    Ref Res = _Lshr(OpSize, Dest, Src);
    auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

    // Constant folded version of the above, with fused shifts.
    if (Const > 1) {
      Res = _Orlshl(OpSize, Res, Dest, Size + 1 - Const);
    }

    // Our new CF will be bit (Shift - 1) of the source.
    SetCFDirect(Dest, Const - 1, true);

    // Since shift != 0 we can inject the CF
    Res = _Orlshl(OpSize, Res, CF, Size - Const);

    // OF is the top two MSBs XOR'd together
    // Only when Shift == 1, it is undefined otherwise
    if (Const == 1) {
      auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);
    }

    StoreResultGPR(Op, Res);
    return;
  }

  Ref SrcMasked = _And(OpSize, Src, _InlineConstant(Mask));
  Calculate_ShiftVariable(
    Op, SrcMasked,
    [this, Op, Size, OpSize]() {
      // Rematerialize loads to avoid crossblock liveness
      Ref Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
      Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});

      // Res = Src >> Shift
      Ref Res = _Lshr(OpSize, Dest, Src);
      auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

      // Res |= (Dest << (Size - Shift + 1));
      // Expressed as Res | ((Src << (Size - Shift)) << 1) to get correct
      // behaviour for Shift without clobbering NZCV. Then observe that modulo
      // Size, Size - Shift = -Shift so we can use a simple Neg.
      //
      // The masking of Lshl means we don't need mask the source, since:
      //
      //  -(x & Mask) & Mask = (-x) & Mask
      Ref NegSrc = _Neg(OpSize, Src);
      Res = _Orlshl(OpSize, Res, _Lshl(OpSize, Dest, NegSrc), 1);

      // Our new CF will be bit (Shift - 1) of the source. this is hoisted up to
      // avoid the need to copy the source. Again, the Lshr absorbs the masking.
      auto NewCF = _Lshr(OpSize, Dest, Sub(OpSize, Src, 1));
      SetCFDirect(NewCF, 0, true);

      // Since shift != 0 we can inject the CF
      Res = _Or(OpSize, Res, _Lshl(OpSize, CF, NegSrc));

      // OF is the top two MSBs XOR'd together
      // Only when Shift == 1, it is undefined otherwise
      auto Xor = _XorShift(OpSize, Res, Res, ShiftType::LSR, 1);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(Xor, Size - 2, true);

      StoreResultGPR(Op, Res);
    },
    OpSizeFromSrc(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
}

void OpDispatchBuilder::RCRSmallerOp(OpcodeArgs) {
  CalculateDeferredFlags();

  const auto Size = GetSrcBitSize(Op);

  // x86 masks the shift by 0x3F or 0x1F depending on size of op
  auto Src = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}));
  Src = Src.And(0x1F);

  // CF only changes if we actually shifted. OF undefined if we didn't shift.
  // The result is unchanged if we didn't shift. So branch over the whole thing.
  Calculate_ShiftVariable(Op, Src.Ref(), [this, Op, Size]() {
    // Rematerialized to avoid crossblock liveness
    auto Src = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}));
    Src = Src.And(0x1F);

    auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);
    Ref Tmp {};

    // Insert the incoming value across the temporary 64bit source
    // Make sure to insert at <BitSize> + 1 offsets
    // We need to cover 32bits plus the amount that could rotate in

    if (Size == 8) {
      // 8-bit optimal cascade
      // Cascade: 0
      //   Data: -> [7:0]
      //   CF:   -> [8:8]
      // Cascade: 1
      //   Data: -> [16:9]
      //   CF:   -> [17:17]
      // Cascade: 2
      //   Data: -> [25:18]
      //   CF:   -> [26:26]
      // Cascade: 3
      //   Data: -> [34:27]
      //   CF:   -> [35:35]
      // Cascade: 4
      //   Data: -> [43:36]
      //   CF:   -> [44:44]

      // Insert CF, Destination already at [7:0]
      Tmp = _Bfi(OpSize::i64Bit, 1, 8, Dest, CF);

      // First Cascade, copies 9 bits from itself.
      Tmp = _Bfi(OpSize::i64Bit, 9, 9, Tmp, Tmp);

      // Second cascade, copies 18 bits from itself.
      Tmp = _Bfi(OpSize::i64Bit, 18, 18, Tmp, Tmp);

      // Final cascade, copies 9 bits again from itself.
      Tmp = _Bfi(OpSize::i64Bit, 9, 36, Tmp, Tmp);
    } else {
      // 16-bit optimal cascade
      // Cascade: 0
      //   Data: -> [15:0]
      //   CF:   -> [16:16]
      // Cascade: 1
      //   Data: -> [32:17]
      //   CF:   -> [33:33]
      // Cascade: 2
      //   Data: -> [49:34]
      //   CF:   -> [50:50]

      // Insert CF, Destination already at [15:0]
      Tmp = _Bfi(OpSize::i64Bit, 1, 16, Dest, CF);

      // First Cascade, copies 17 bits from itself.
      Tmp = _Bfi(OpSize::i64Bit, 17, 17, Tmp, Tmp);

      // Final Cascade, copies 17 bits from itself again.
      Tmp = _Bfi(OpSize::i64Bit, 17, 34, Tmp, Tmp);
    }

    // Entire bitfield has been setup. Just extract the 8 or 16bits we need.
    // 64-bit shift used because we want to rotate in our cascaded upper bits
    // rather than zeroes.
    Ref Res = _Lshr(OpSize::i64Bit, Tmp, Src.Ref());

    StoreResultGPR(Op, Res);

    // Our new CF will be bit (Shift - 1) of the source. 32-bit Lshr masks the
    // same as x86, but if we constant fold we must mask ourselves.
    if (Src.IsConstant) {
      SetCFDirect(Tmp, (Src.C & 0x1f) - 1, true);
    } else {
      auto NewCF = _Lshr(OpSize::i32Bit, Tmp, Sub(OpSize::i32Bit, Src.Ref(), 1));
      SetCFDirect(NewCF, 0, true);
    }

    // OF is the top two MSBs XOR'd together
    // Only when Shift == 1, it is undefined otherwise
    if (!Src.IsConstant || Src.C == 1) {
      auto NewOF = _XorShift(OpSize::i32Bit, Res, Res, ShiftType::LSR, 1);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 2, true);
    }
  });
}

void OpDispatchBuilder::RCLOp1Bit(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);
  const auto Size = GetSrcBitSize(Op);
  const auto OpSize = Size == 64 ? OpSize::i64Bit : OpSize::i32Bit;
  auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

  // Rotate left and insert CF in to lowest bit
  // TODO: Use `adc Res, xzr, Dest, lsl 1` to save an instruction
  Ref Res = _Orlshl(OpSize, CF, Dest, 1);

  // Our new CF will be the top bit of the source
  SetCFDirect(Dest, Size - 1, true);

  // OF is the top two MSBs XOR'd together
  // Top two MSBs is CF and top bit of result
  SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(_Xor(OpSize, Res, Dest), Size - 1, true);

  StoreResultGPR(Op, Res);
}

void OpDispatchBuilder::RCLOp(OpcodeArgs) {
  const auto Size = GetSrcBitSize(Op);

  if (Size == 8 || Size == 16) {
    RCLSmallerOp(Op);
    return;
  }

  const auto Mask = (Size == 64) ? 0x3F : 0x1F;

  // Calculate flags early.
  CalculateDeferredFlags();

  Ref Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});
  const auto OpSize = OpSizeFromSrc(Op);

  uint64_t Const;
  if (IsValueConstant(WrapNode(Src), &Const)) {
    Const &= Mask;
    if (!Const) {
      ZeroShiftResult(Op);
      return;
    }

    // Res = Src << Shift
    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
    Ref Res = _Lshl(OpSize, Dest, Src);
    auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

    // Res |= (Src << (Size - Shift + 1));
    if (Const > 1) {
      Res = _Orlshr(OpSize, Res, Dest, Size + 1 - Const);
    }

    // Our new CF will be bit (Shift - 1) of the source
    SetCFDirect(Dest, Size - Const, true);

    // Since Shift != 0 we can inject the CF
    Res = _Orlshl(OpSize, Res, CF, Const - 1);

    // OF is the top two MSBs XOR'd together
    // Only when Shift == 1, it is undefined otherwise
    if (Const == 1) {
      auto NewOF = _Xor(OpSize, Res, Dest);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);
    }

    StoreResultGPR(Op, Res);
    return;
  }

  Ref SrcMasked = _And(OpSize, Src, _InlineConstant(Mask));
  Calculate_ShiftVariable(
    Op, SrcMasked,
    [this, Op, Size, OpSize]() {
      // Rematerialized to avoid crossblock liveness
      Ref Src = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

      // Res = Src << Shift
      Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
      Ref Res = _Lshl(OpSize, Dest, Src);
      auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

      // Res |= (Dest >> (Size - Shift + 1)), expressed as
      // Res | ((Dest >> (-Shift)) >> 1), since Size - Shift = -Shift mod
      // Size. The shift aborbs the masking.
      auto NegSrc = _Neg(OpSize, Src);
      Res = _Orlshr(OpSize, Res, _Lshr(OpSize, Dest, NegSrc), 1);

      // Our new CF will be bit (Shift - 1) of the source
      auto NewCF = _Lshr(OpSize, Dest, NegSrc);
      SetCFDirect(NewCF, 0, true);

      // Since Shift != 0 we can inject the CF. Shift absorbs the masking.
      Ref CFShl = Sub(OpSize, Src, 1);
      auto TmpCF = _Lshl(OpSize, CF, CFShl);
      Res = _Or(OpSize, Res, TmpCF);

      // OF is the top two MSBs XOR'd together
      // Only when Shift == 1, it is undefined otherwise
      //
      // Note that NewCF has garbage in the upper bits, but we ignore them here
      // and mask as part of the set after.
      auto NewOF = _XorShift(OpSize, Res, NewCF, ShiftType::LSL, Size - 1);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, Size - 1, true);

      StoreResultGPR(Op, Res);
    },
    OpSizeFromSrc(Op) == OpSize::i32Bit ? std::make_optional(&OpDispatchBuilder::ZeroShiftResult) : std::nullopt);
}

void OpDispatchBuilder::RCLSmallerOp(OpcodeArgs) {
  CalculateDeferredFlags();

  const auto Size = GetSrcBitSize(Op);

  // x86 masks the shift by 0x3F or 0x1F depending on size of op
  auto Src = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}));
  Src = Src.And(0x1F);

  // CF only changes if we actually shifted. OF undefined if we didn't shift.
  // The result is unchanged if we didn't shift. So branch over the whole thing.
  Calculate_ShiftVariable(Op, Src.Ref(), [this, Op, Size]() {
    // Rematerialized to avoid crossblock liveness
    auto Src = ARef(LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true}));
    Src = Src.And(0x1F);
    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);

    auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);

    Ref Tmp = Constant(0);

    for (size_t i = 0; i < (32 + Size + 1); i += (Size + 1)) {
      // Insert incoming value
      Tmp = _Bfi(OpSize::i64Bit, Size, 63 - i - Size, Tmp, Dest);

      // Insert CF
      Tmp = _Bfi(OpSize::i64Bit, 1, 63 - i, Tmp, CF);
    }

    // Insert incoming value
    Tmp = _Bfi(OpSize::i64Bit, Size, 0, Tmp, Dest);

    // The data is now set up like this
    // [Data][CF]:[Data][CF]:[Data][CF]:[Data][CF]
    // Shift 1 more bit that expected to get our result
    // Shifting to the right will now behave like a rotate to the left
    // Which we emulate with a _Ror
    Ref Res = _Ror(OpSize::i64Bit, Tmp, Src.Neg().Ref());

    StoreResultGPR(Op, Res);

    // Our new CF is now at the bit position that we are shifting
    // Either 0 if CF hasn't changed (CF is living in bit 0)
    // or higher
    auto NewCF = _Ror(OpSize::i64Bit, Tmp, Src.Presub(63).Ref());
    SetCFDirect(NewCF, 0, true);

    // OF is the XOR of the NewCF and the MSB of the result
    // Only defined for 1-bit rotates.
    if (!Src.IsConstant || Src.C == 1) {
      auto NewOF = _XorShift(OpSize::i64Bit, NewCF, Res, ShiftType::LSR, Size - 1);
      SetRFLAG<FEXCore::X86State::RFLAG_OF_RAW_LOC>(NewOF, 0, true);
    }
  });
}

void OpDispatchBuilder::BTOp(OpcodeArgs, uint32_t SrcIndex, BTAction Action) {
  Ref Value;
  ArithRef Src;
  bool IsNonconstant = Op->Src[SrcIndex].IsGPR();

  const uint32_t Size = GetDstBitSize(Op);
  const uint32_t Mask = Size - 1;

  if (IsNonconstant) {
    // Because we mask explicitly with And/Bfe/Sbfe after, we can allow garbage here.
    Src = ARef(LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.AllowUpperGarbage = true}));
  } else {
    // Can only be an immediate
    // Masked by operand size
    Src = ARef(Op->Src[SrcIndex].Literal() & Mask);
  }

  if (Op->Dest.IsGPR()) {
    // When the destination is a GPR, we don't care about garbage in the upper bits.
    // Load the full register.
    auto Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GetGPROpSize(), Op->Flags);
    Value = Dest;

    // Get the bit selection from the src. We need to mask for 8/16-bit, but
    // rely on the implicit masking of Lshr for native sizes.
    unsigned LshrSize = std::max<uint8_t>(IR::OpSizeToSize(OpSize::i32Bit), Size / 8);
    auto BitSelect = (Size == (LshrSize * 8)) ? Src : Src.And(Mask);
    auto LshrOpSize = IR::SizeToOpSize(LshrSize);

    // OF/SF/AF/PF undefined. ZF must be preserved. We choose to preserve OF/SF
    // too since we just use an rmif to insert into CF directly. We could
    // optimize perhaps.
    //
    // Set CF before the action to save a move, except for complements where we
    // can reuse the invert.
    if (Action != BTAction::BTComplement) {
      if (IsNonconstant) {
        Value = _Lshr(IR::SizeToOpSize(LshrSize), Value, BitSelect.Ref());
      }

      SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, Src.IsConstant ? Src.C : 0, true);
      CFInverted = false;
    }

    switch (Action) {
    case BTAction::BTNone: {
      /* Nothing to do */
      break;
    }

    case BTAction::BTClear: {
      Dest = _Andn(LshrOpSize, Dest, BitSelect.MaskBit(LshrOpSize).Ref());
      StoreResultGPR(Op, Dest);
      break;
    }

    case BTAction::BTSet: {
      Dest = _Or(LshrOpSize, Dest, BitSelect.MaskBit(LshrOpSize).Ref());
      StoreResultGPR(Op, Dest);
      break;
    }

    case BTAction::BTComplement: {
      Dest = _Xor(LshrOpSize, Dest, BitSelect.MaskBit(LshrOpSize).Ref());

      if (IsNonconstant) {
        Value = _Lshr(LshrOpSize, Dest, BitSelect.Ref());
      } else {
        Value = Dest;
      }

      SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, Src.IsConstant ? Src.C : 0, true);
      CFInverted = true;

      StoreResultGPR(Op, Dest);
      break;
    }
    }
  } else {
    // Load the address to the memory location
    Ref Dest = MakeSegmentAddress(Op, Op->Dest);
    // Get the bit selection from the src
    auto BitSelect = Src.Bfe(0, 3);

    // Address is provided as bits we want BYTE offsets
    // Extract Signed offset
    Src = Src.Sbfe(3, Size - 3);

    // Get the address offset by shifting out the size of the op (To shift out the bit selection)
    // Then use that to index in to the memory location by size of op
    AddressMode Address = {.Base = Dest, .Index = Src.Ref(), .AddrSize = OpSize::i64Bit};

    switch (Action) {
    case BTAction::BTNone: {
      Value = _LoadMemGPRAutoTSO(OpSize::i8Bit, Address, OpSize::i8Bit);
      break;
    }

    case BTAction::BTClear: {
      Ref BitMask = BitSelect.MaskBit(OpSize::i64Bit).Ref();

      if (DestIsLockedMem(Op)) {
        HandledLock = true;
        Value = _AtomicFetchCLR(OpSize::i8Bit, BitMask, LoadEffectiveAddress(this, Address, GetGPROpSize(), true));
      } else {
        Value = _LoadMemGPRAutoTSO(OpSize::i8Bit, Address, OpSize::i8Bit);

        auto Modified = _Andn(OpSize::i64Bit, Value, BitMask);
        _StoreMemGPRAutoTSO(OpSize::i8Bit, Address, Modified, OpSize::i8Bit);
      }
      break;
    }

    case BTAction::BTSet: {
      Ref BitMask = BitSelect.MaskBit(OpSize::i64Bit).Ref();

      if (DestIsLockedMem(Op)) {
        HandledLock = true;
        Value = _AtomicFetchOr(OpSize::i8Bit, BitMask, LoadEffectiveAddress(this, Address, GetGPROpSize(), true));
      } else {
        Value = _LoadMemGPRAutoTSO(OpSize::i8Bit, Address, OpSize::i8Bit);

        auto Modified = _Or(OpSize::i64Bit, Value, BitMask);
        _StoreMemGPRAutoTSO(OpSize::i8Bit, Address, Modified, OpSize::i8Bit);
      }
      break;
    }

    case BTAction::BTComplement: {
      Ref BitMask = BitSelect.MaskBit(OpSize::i64Bit).Ref();

      if (DestIsLockedMem(Op)) {
        HandledLock = true;
        Value = _AtomicFetchXor(OpSize::i8Bit, BitMask, LoadEffectiveAddress(this, Address, GetGPROpSize(), true));
      } else {
        Value = _LoadMemGPRAutoTSO(OpSize::i8Bit, Address, OpSize::i8Bit);

        auto Modified = _Xor(OpSize::i64Bit, Value, BitMask);
        _StoreMemGPRAutoTSO(OpSize::i8Bit, Address, Modified, OpSize::i8Bit);
      }
      break;
    }
    }

    // Now shift in to the correct bit location
    if (!BitSelect.IsDefinitelyZero()) {
      Value = _Lshr(std::max(OpSize::i32Bit, GetOpSize(Value)), Value, BitSelect.Ref());
    }

    // OF/SF/ZF/AF/PF undefined.
    SetCFDirect(Value, 0, true);
  }
}

void OpDispatchBuilder::IMUL1SrcOp(OpcodeArgs) {
  /* We're just going to sign-extend the non-garbage anyway.. */
  Ref Src1 = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Dest {};
  Ref ResultHigh {};
  switch (Size) {
  case OpSize::i8Bit:
  case OpSize::i16Bit: {
    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
    Src2 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src2);
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, Dest);
    break;
  }
  case OpSize::i32Bit: {
    ResultHigh = _SMull(Src1, Src2);
    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, ResultHigh);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i32Bit, Src1, Src2);
    break;
  }
  case OpSize::i64Bit: {
    ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
    break;
  }
  default: FEX_UNREACHABLE;
  }

  StoreResultGPR(Op, Dest);
  CalculateFlags_MUL(Size, Dest, ResultHigh);
}

void OpDispatchBuilder::IMUL2SrcOp(OpcodeArgs) {
  Ref Src1 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadSourceGPR(Op, Op->Src[1], Op->Flags, {.AllowUpperGarbage = true});

  const auto Size = OpSizeFromSrc(Op);
  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Dest {};
  Ref ResultHigh {};

  switch (Size) {
  case OpSize::i8Bit:
  case OpSize::i16Bit: {
    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
    Src2 = ARef(Src2).Sbfe(0, SizeBits).Ref();
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, Dest);
    break;
  }
  case OpSize::i32Bit: {
    ResultHigh = _SMull(Src1, Src2);
    ResultHigh = _Sbfe(OpSize::i64Bit, SizeBits, SizeBits, ResultHigh);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i32Bit, Src1, Src2);
    break;
  }
  case OpSize::i64Bit: {
    ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2);
    // Flipped order to save a move
    Dest = _Mul(OpSize::i64Bit, Src1, Src2);
    break;
  }
  default: FEX_UNREACHABLE;
  }

  StoreResultGPR(Op, Dest);
  CalculateFlags_MUL(Size, Dest, ResultHigh);
}

void OpDispatchBuilder::IMULOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Src1 = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadGPRRegister(X86State::REG_RAX);

  if (Size != OpSize::i64Bit) {
    Src1 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src1);
    Src2 = _Sbfe(OpSize::i64Bit, SizeBits, 0, Src2);
  }

  // 64-bit special cased to save a move
  Ref Result {};
  if (Size < OpSize::i64Bit) {
    Result = _Mul(OpSize::i64Bit, Src1, Src2);
  }
  Ref ResultHigh {};
  if (Size == OpSize::i8Bit) {
    // Result is stored in AX
    StoreGPRRegister(X86State::REG_RAX, Result, OpSize::i16Bit);
    ResultHigh = _Sbfe(OpSize::i64Bit, 8, 8, Result);
  } else if (Size == OpSize::i16Bit) {
    // 16bits stored in AX
    // 16bits stored in DX
    StoreGPRRegister(X86State::REG_RAX, Result, Size);
    ResultHigh = _Sbfe(OpSize::i64Bit, 16, 16, Result);
    StoreGPRRegister(X86State::REG_RDX, ResultHigh, Size);
  } else if (Size == OpSize::i32Bit) {
    // 32bits stored in EAX
    // 32bits stored in EDX
    // Make sure they get Zext correctly
    auto LocalResult = _Bfe(OpSize::i64Bit, 32, 0, Result);
    auto LocalResultHigh = _Bfe(OpSize::i64Bit, 32, 32, Result);
    ResultHigh = _Sbfe(OpSize::i64Bit, 32, 32, Result);
    Result = _Sbfe(OpSize::i64Bit, 32, 0, Result);
    StoreGPRRegister(X86State::REG_RAX, LocalResult);
    StoreGPRRegister(X86State::REG_RDX, LocalResultHigh);
  } else if (Size == OpSize::i64Bit) {
    if (!Is64BitMode) {
      LogMan::Msg::EFmt("Doesn't exist in 32bit mode");
      DecodeFailure = true;
      return;
    }
    // 64bits stored in RAX
    // 64bits stored in RDX
    ResultHigh = _MulH(OpSize::i64Bit, Src1, Src2);
    Result = _Mul(OpSize::i64Bit, Src1, Src2);
    StoreGPRRegister(X86State::REG_RAX, Result);
    StoreGPRRegister(X86State::REG_RDX, ResultHigh);
  }

  CalculateFlags_MUL(Size, Result, ResultHigh);
}

void OpDispatchBuilder::MULOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  const auto SizeBits = IR::OpSizeAsBits(Size);

  Ref Src1 = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src2 = LoadGPRRegister(X86State::REG_RAX);
  Ref Result {};

  if (Size != OpSize::i64Bit) {
    Src1 = _Bfe(OpSize::i64Bit, SizeBits, 0, Src1);
    Src2 = _Bfe(OpSize::i64Bit, SizeBits, 0, Src2);
    Result = _UMul(OpSize::i64Bit, Src1, Src2);
  }
  Ref ResultHigh {};

  if (Size == OpSize::i8Bit) {
    // Result is stored in AX
    StoreGPRRegister(X86State::REG_RAX, Result, OpSize::i16Bit);
    ResultHigh = _Bfe(OpSize::i64Bit, 8, 8, Result);
  } else if (Size == OpSize::i16Bit) {
    // 16bits stored in AX
    // 16bits stored in DX
    StoreGPRRegister(X86State::REG_RAX, Result, Size);
    ResultHigh = _Bfe(OpSize::i64Bit, 16, 16, Result);
    StoreGPRRegister(X86State::REG_RDX, ResultHigh, Size);
  } else if (Size == OpSize::i32Bit) {
    // 32bits stored in EAX
    // 32bits stored in EDX
    Ref ResultLow = _Bfe(OpSize::i64Bit, 32, 0, Result);
    ResultHigh = _Bfe(OpSize::i64Bit, 32, 32, Result);
    StoreGPRRegister(X86State::REG_RAX, ResultLow);
    StoreGPRRegister(X86State::REG_RDX, ResultHigh);
  } else if (Size == OpSize::i64Bit) {
    if (!Is64BitMode) {
      LogMan::Msg::EFmt("Doesn't exist in 32bit mode");
      DecodeFailure = true;
      return;
    }
    // 64bits stored in RAX
    // 64bits stored in RDX
    //
    // Calculate high first to allow better RA.
    ResultHigh = _UMulH(OpSize::i64Bit, Src1, Src2);
    Result = _UMul(OpSize::i64Bit, Src1, Src2);
    StoreGPRRegister(X86State::REG_RAX, Result);
    StoreGPRRegister(X86State::REG_RDX, ResultHigh);
  }

  CalculateFlags_UMUL(ResultHigh);
}

void OpDispatchBuilder::NOTOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  const auto SizeBits = IR::OpSizeAsBits(Size);
  LOGMAN_THROW_A_FMT(Size >= IR::OpSize::i8Bit && Size <= IR::OpSize::i64Bit, "Invalid size");

  Ref MaskConst {};
  if (Size == OpSize::i64Bit) {
    MaskConst = Constant(~0ULL);
  } else {
    MaskConst = Constant((1ULL << SizeBits) - 1);
  }

  if (DestIsLockedMem(Op)) {
    HandledLock = true;
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    // Result unused
    _AtomicFetchXor(Size, MaskConst, DestMem);
  } else if (!Op->Dest.IsGPR()) {
    // GPR version plays fast and loose with sizes, be safe for memory tho.
    Ref Src = LoadSourceGPR(Op, Op->Dest, Op->Flags);
    Src = _Xor(OpSize::i64Bit, Src, MaskConst);
    StoreResultGPR(Op, Src);
  } else {
    // Specially handle high bits so we can invert in place with the correct
    // mask and a larger type.
    auto Dest = Op->Dest;
    if (Dest.Data.GPR.HighBits) {
      LOGMAN_THROW_A_FMT(Size == OpSize::i8Bit, "Only 8-bit GPRs get high bits");
      MaskConst = Constant(0xFF00);
      Dest.Data.GPR.HighBits = false;
    }

    // Always load full size, we explicitly want the upper bits to get the
    // insert behaviour for free/implicitly.
    const auto GPRSize = GetGPROpSize();
    Ref Src = LoadSourceGPR_WithOpSize(Op, Dest, GPRSize, Op->Flags);

    // For 8/16-bit, use 64-bit invert so we invert in place, while getting
    // insert behaviour. For 32-bit, use 32-bit invert to zero the upper bits.
    const auto EffectiveSize = Size == OpSize::i32Bit ? OpSize::i32Bit : GPRSize;

    // If we're inverting the whole thing, use Not instead of Xor to save a constant.
    if (Size >= OpSize::i32Bit) {
      Src = _Not(EffectiveSize, Src);
    } else {
      Src = _Xor(EffectiveSize, Src, MaskConst);
    }

    // Always store 64-bit, the Not/Xor correctly handle the upper bits and this
    // way we can delete the store.
    StoreResultGPR_WithOpSize(Op, Dest, Src, GPRSize);
  }
}

void OpDispatchBuilder::XADDOp(OpcodeArgs) {
  Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  Ref Result;

  if (Op->Dest.IsGPR()) {
    // If this is a GPR then we can just do an Add
    Result = CalculateFlags_ADD(OpSizeFromSrc(Op), Dest, Src);

    // Previous value in dest gets stored in src
    StoreResultGPR(Op, Op->Src[0], Dest);

    // Calculated value gets stored in dst (order is important if dst is same as src)
    StoreResultGPR(Op, Result);
  } else {
    HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;
    Dest = AppendSegmentOffset(Dest, Op->Flags);
    auto Before = _AtomicFetchAdd(OpSizeFromSrc(Op), Src, Dest);
    CalculateFlags_ADD(OpSizeFromSrc(Op), Before, Src);
    StoreResultGPR(Op, Op->Src[0], Before);
  }
}

void OpDispatchBuilder::PopcountOp(OpcodeArgs) {
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = CTX->HostFeatures.SupportsCSSC || GetSrcSize(Op) >= 4});
  Src = _Popcount(OpSizeFromSrc(Op), Src);
  StoreResultGPR(Op, Src);

  // We need to set ZF while clearing the rest of NZCV. The result of a popcount
  // is in the range [0, 63]. In particular, it is always positive. So a
  // combined NZ test will correctly zero SF/CF/OF while setting ZF.
  SetNZ_ZeroCV(OpSize::i32Bit, Src);
  ZeroPF_AF();
}

Ref OpDispatchBuilder::CalculateAFForDecimal(Ref A) {
  auto Nibble = _And(OpSize::i64Bit, A, Constant(0xF));
  auto Greater = Select01(OpSize::i64Bit, CondClass::UGT, Nibble, Constant(9));

  return _Or(OpSize::i64Bit, LoadAF(), Greater);
}

void OpDispatchBuilder::DAAOp(OpcodeArgs) {
  CalculateDeferredFlags();
  auto AL = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);
  auto CFInv = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC, true);
  auto AF = CalculateAFForDecimal(AL);

  // CF |= (AL > 0x99);
  CFInv = _And(OpSize::i64Bit, CFInv, Select01(OpSize::i64Bit, CondClass::ULE, AL, Constant(0x99)));

  // AL = AF ? (AL + 0x6) : AL;
  AL = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::NEQ, AF, Constant(0), Add(OpSize::i64Bit, AL, 0x6), AL);

  // AL = CF ? (AL + 0x60) : AL;
  AL = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, CFInv, Constant(0), Add(OpSize::i64Bit, AL, 0x60), AL);

  // SF, ZF, PF set according to result. CF set per above. OF undefined.
  StoreGPRRegister(X86State::REG_RAX, AL, OpSize::i8Bit);
  SetNZ_ZeroCV(OpSize::i8Bit, AL);
  SetCFInverted(CFInv);
  CalculatePF(AL);
  SetAFAndFixup(AF);
}

void OpDispatchBuilder::DASOp(OpcodeArgs) {
  CalculateDeferredFlags();
  auto AL = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);
  auto CF = GetRFLAG(FEXCore::X86State::RFLAG_CF_RAW_LOC);
  auto AF = CalculateAFForDecimal(AL);

  // CF |= (AL > 0x99);
  CF = _Or(OpSize::i64Bit, CF, Select01(OpSize::i64Bit, CondClass::UGT, AL, Constant(0x99)));

  // NewCF = CF | (AF && (Borrow from AL - 6))
  auto NewCF = _Or(OpSize::i32Bit, CF, _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::ULT, AL, Constant(6), AF, CF));

  // AL = AF ? (AL - 0x6) : AL;
  AL = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::NEQ, AF, Constant(0), Sub(OpSize::i64Bit, AL, 0x6), AL);

  // AL = CF ? (AL - 0x60) : AL;
  AL = _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::NEQ, CF, Constant(0), Sub(OpSize::i64Bit, AL, 0x60), AL);

  // SF, ZF, PF set according to result. CF set per above. OF undefined.
  StoreGPRRegister(X86State::REG_RAX, AL, OpSize::i8Bit);
  SetNZ_ZeroCV(OpSize::i8Bit, AL);
  SetCFDirect(NewCF);
  CalculatePF(AL);
  SetAFAndFixup(AF);
}

void OpDispatchBuilder::AAAOp(OpcodeArgs) {
  auto A = LoadGPRRegister(X86State::REG_RAX);
  auto AF = CalculateAFForDecimal(A);

  // CF = AF, OF/SF/ZF/PF undefined
  SetCFDirect_InvalidateNZV(AF);
  SetAFAndFixup(AF);
  CalculateDeferredFlags();

  // AX = CF ? (AX + 0x106) : 0
  A = NZCVSelect(OpSize::i32Bit, CondClass::UGE /* CF = 1 */, Add(OpSize::i32Bit, A, 0x106), A);

  // AL = AL & 0x0F
  A = _And(OpSize::i32Bit, A, Constant(0xFF0F));
  StoreGPRRegister(X86State::REG_RAX, A, OpSize::i16Bit);
}

void OpDispatchBuilder::AASOp(OpcodeArgs) {
  auto A = LoadGPRRegister(X86State::REG_RAX);
  auto AF = CalculateAFForDecimal(A);

  // CF = AF, OF/SF/ZF/PF undefined
  SetCFDirect_InvalidateNZV(AF);
  SetAFAndFixup(AF);
  CalculateDeferredFlags();

  // AX = CF ? (AX - 0x106) : 0
  A = NZCVSelect(OpSize::i32Bit, CondClass::UGE /* CF = 1 */, Sub(OpSize::i32Bit, A, 0x106), A);

  // AL = AL & 0x0F
  A = _And(OpSize::i32Bit, A, Constant(0xFF0F));
  StoreGPRRegister(X86State::REG_RAX, A, OpSize::i16Bit);
}

void OpDispatchBuilder::AAMOp(OpcodeArgs) {
  auto AL = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);
  auto Imm8 = Constant(Op->Src[0].Literal() & 0xFF);
  Ref Quotient = _AllocateGPR(true);
  Ref Remainder = _AllocateGPR(true);
  _UDiv(OpSize::i64Bit, AL, Invalid(), Imm8, Quotient, Remainder);
  auto Res = _AddShift(OpSize::i64Bit, Remainder, Quotient, ShiftType::LSL, 8);
  StoreGPRRegister(X86State::REG_RAX, Res, OpSize::i16Bit);

  SetNZ_ZeroCV(OpSize::i8Bit, Res);
  CalculatePF(Res);
  InvalidateAF();
}

void OpDispatchBuilder::AADOp(OpcodeArgs) {
  auto A = LoadGPRRegister(X86State::REG_RAX);
  auto AH = _Lshr(OpSize::i32Bit, A, Constant(8));
  auto Imm8 = Constant(Op->Src[0].Literal() & 0xFF);
  auto NewAL = Add(OpSize::i64Bit, A, _Mul(OpSize::i64Bit, AH, Imm8));
  auto Result = _And(OpSize::i64Bit, NewAL, Constant(0xFF));
  StoreGPRRegister(X86State::REG_RAX, Result, OpSize::i16Bit);

  SetNZ_ZeroCV(OpSize::i8Bit, Result);
  CalculatePF(Result);
  InvalidateAF();
}

void OpDispatchBuilder::XLATOp(OpcodeArgs) {
  Ref Src = MakeSegmentAddress(X86State::REG_RBX, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);
  Ref Offset = LoadGPRRegister(X86State::REG_RAX, OpSize::i8Bit);

  AddressMode A = {.Base = Src, .Index = Offset, .AddrSize = OpSize::i64Bit};
  auto Res = _LoadMemGPRAutoTSO(OpSize::i8Bit, A, OpSize::i8Bit);

  StoreGPRRegister(X86State::REG_RAX, Res, OpSize::i8Bit);
}

void OpDispatchBuilder::ReadSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment Seg) {
  // 64-bit only
  // Doesn't hit the segment register optimization
  const auto Size = OpSizeFromSrc(Op);
  Ref Src {};
  if (Seg == Segment::FS) {
    Src = _LoadContextGPR(Size, offsetof(FEXCore::Core::CPUState, fs_cached));
  } else {
    Src = _LoadContextGPR(Size, offsetof(FEXCore::Core::CPUState, gs_cached));
  }

  StoreResultGPR(Op, Src);
}

void OpDispatchBuilder::WriteSegmentReg(OpcodeArgs, OpDispatchBuilder::Segment Seg) {
  // Documentation claims that the 32-bit version of this instruction inserts in to the lower 32-bits of the segment
  // This is incorrect and it instead zero extends the 32-bit value to 64-bit
  const auto Size = OpSizeFromDst(Op);
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
  if (Seg == Segment::FS) {
    _StoreContextGPR(Size, Src, offsetof(FEXCore::Core::CPUState, fs_cached));
  } else {
    _StoreContextGPR(Size, Src, offsetof(FEXCore::Core::CPUState, gs_cached));
  }
}

void OpDispatchBuilder::EnterOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto OperandSize = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_OPERAND_SIZE) ? OpSize::i16Bit : GPRSize;
  const uint64_t Value = Op->Src[0].Literal();

  const uint16_t AllocSpace = Value & 0xFFFF;
  const uint8_t Level = (Value >> 16) & 0x1F;

  const auto PushValue = [&](IR::OpSize Size, Ref Src) -> Ref {
    auto OldSP = LoadGPRRegister(X86State::REG_RSP);
    auto NewSP = _Push(GPRSize, Size, Src, OldSP);

    // Store the new stack pointer
    StoreGPRRegister(X86State::REG_RSP, NewSP);
    return NewSP;
  };

  auto OldBP = LoadGPRRegister(X86State::REG_RBP);
  auto NewSP = PushValue(OperandSize, OldBP);
  auto temp_RBP = NewSP;

  if (Level > 0) {
    for (uint8_t i = 1; i < Level; ++i) {
      auto MemLoc = Sub(GPRSize, OldBP, i * IR::OpSizeToSize(OperandSize));
      auto Mem = _LoadMemGPR(OperandSize, MemLoc, OperandSize);
      NewSP = PushValue(OperandSize, Mem);
    }
    NewSP = PushValue(OperandSize, temp_RBP);
  }
  NewSP = Sub(GPRSize, NewSP, AllocSpace);
  StoreGPRRegister(X86State::REG_RSP, NewSP);
  StoreGPRRegister(X86State::REG_RBP, temp_RBP);
}

void OpDispatchBuilder::SGDTOp(OpcodeArgs) {
  auto DestAddress = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});

  // Store an emulated value in the format of:
  // uint16_t Limit;
  // {uint32_t,uint64_t} Base;
  //
  // Limit is always 0
  // Base is always in kernel space at: 0xFFFFFFFFFFFE0000ULL
  //
  // Operand size prefix is ignored on this instruction, size purely depends on operating mode.
  uint64_t GDTAddress = 0xFFFFFFFFFFFE0000ULL;
  auto GDTStoreSize = OpSize::i64Bit;
  if (!Is64BitMode) {
    // Mask off upper bits if 32-bit result.
    GDTAddress &= ~0U;
    GDTStoreSize = OpSize::i32Bit;
  }

  _StoreMemGPRAutoTSO(OpSize::i16Bit, DestAddress, Constant(0));
  _StoreMemGPRAutoTSO(GDTStoreSize, AddressMode {.Base = DestAddress, .Offset = 2, .AddrSize = OpSize::i64Bit}, Constant(GDTAddress));
}

void OpDispatchBuilder::SIDTOp(OpcodeArgs) {
  auto DestAddress = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.LoadData = false});

  // See SGDTOp, matches Linux in reported values
  uint64_t IDTAddress = 0xFFFFFE0000000000ULL;
  auto IDTStoreSize = OpSize::i64Bit;
  if (!Is64BitMode) {
    // Mask off upper bits if 32-bit result.
    IDTAddress &= ~0U;
    IDTStoreSize = OpSize::i32Bit;
  }

  _StoreMemGPRAutoTSO(OpSize::i16Bit, DestAddress, Constant(0xfff));
  _StoreMemGPRAutoTSO(IDTStoreSize, AddressMode {.Base = DestAddress, .Offset = 2, .AddrSize = OpSize::i64Bit}, Constant(IDTAddress));
}

void OpDispatchBuilder::SMSWOp(OpcodeArgs) {
  const bool IsMemDst = DestIsMem(Op);

  IR::OpSize DstSize {OpSize::iInvalid};
  Ref Const = Constant((1U << 31) | ///< PG - Paging
                       (0U << 30) | ///< CD - Cache Disable
                       (0U << 29) | ///< NW - Not Writethrough (Legacy, now ignored)
                       ///< [28:19] - Reserved
                       (1U << 18) | ///< AM - Alignment Mask
                       ///< 17 - Reserved
                       (1U << 16) | ///< WP - Write Protect
                       ///< [15:6] - Reserved
                       (1U << 5) | ///< NE - Numeric Error
                       (1U << 4) | ///< ET - Extension Type (Legacy, now reserved and 1)
                       (0U << 3) | ///< TS - Task Switched
                       (0U << 2) | ///< EM - Emulation
                       (1U << 1) | ///< MP - Monitor Coprocessor
                       (1U << 0)); ///< PE - Protection Enabled
  const auto OpAddr = X86Tables::DecodeFlags::GetOpAddr(Op->Flags, 0);
  if (Is64BitMode) {
    DstSize = OpAddr == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST  ? OpSize::i16Bit :
              OpAddr == X86Tables::DecodeFlags::FLAG_WIDENING_SIZE_LAST ? OpSize::i64Bit :
                                                                          OpSize::i32Bit;

    if (!IsMemDst && DstSize == OpSize::i32Bit) {
      // Special-case version of `smsw ebx`. This instruction does an insert in to the lower 32-bits on 64-bit hosts.
      // Override and insert.
      auto Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GetGPROpSize(), Op->Flags);
      Const = _Bfi(OpSize::i64Bit, 32, 0, Dest, Const);
      DstSize = OpSize::i64Bit;
    }
  } else {
    DstSize = OpAddr == X86Tables::DecodeFlags::FLAG_OPERAND_SIZE_LAST ? OpSize::i16Bit : OpSize::i32Bit;
  }

  if (IsMemDst) {
    // Memory destinatino always writes only 16-bits.
    DstSize = OpSize::i16Bit;
  }

  StoreResultGPR_WithOpSize(Op, Op->Dest, Const, DstSize);
}

OpDispatchBuilder::CycleCounterPair OpDispatchBuilder::CycleCounter(bool SelfSynchronizingLoads) {
  Ref CounterLow {};
  Ref CounterHigh {};
  auto Counter = _CycleCounter(SelfSynchronizingLoads);
  if (CTX->Config.TSCScale) {
    CounterLow = _Lshl(OpSize::i32Bit, Counter, Constant(CTX->Config.TSCScale));
    CounterHigh = _Lshr(OpSize::i64Bit, Counter, Constant(32 - CTX->Config.TSCScale));
  } else {
    CounterLow = _Bfe(OpSize::i64Bit, 32, 0, Counter);
    CounterHigh = _Bfe(OpSize::i64Bit, 32, 32, Counter);
  }

  return {
    .CounterLow = CounterLow,
    .CounterHigh = CounterHigh,
  };
}

void OpDispatchBuilder::RDTSCOp(OpcodeArgs) {
  auto Counter = CycleCounter(false);
  StoreGPRRegister(X86State::REG_RAX, Counter.CounterLow);
  StoreGPRRegister(X86State::REG_RDX, Counter.CounterHigh);
}

void OpDispatchBuilder::INCOp(OpcodeArgs) {
  Ref Dest;
  Ref Result;
  const auto Size = GetSrcBitSize(Op);
  const bool IsLocked = DestIsLockedMem(Op);

  if (IsLocked) {
    HandledLock = true;

    Ref DestAddress = MakeSegmentAddress(Op, Op->Dest);
    Dest = _AtomicFetchAdd(OpSizeFromSrc(Op), Constant(1), DestAddress);
  } else {
    Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32});
  }

  CalculateDeferredFlags();

  if (Size < 32 && CTX->HostFeatures.SupportsFlagM) {
    // Addition producing upper garbage
    Result = Add(OpSize::i32Bit, Dest, 1);
    CalculatePF(Result);
    CalculateAF(Dest, Constant(1));

    // Correctly set NZ flags, preserving C
    HandleNZCV_RMW();
    _SetSmallNZV(OpSizeFromSrc(Op), Result);

    // Fix up V flag. INC overflows only when incrementing a positive and
    // getting a negative. So compare the sign bits to calculate V.
    _RmifNZCV(_Andn(OpSize::i32Bit, Result, Dest), Size - 1, 1);
  } else {
    Result = CalculateFlags_ADD(OpSizeFromSrc(Op), Dest, Constant(1), false);
  }

  if (!IsLocked) {
    StoreResultGPR(Op, Result);
  }
}

void OpDispatchBuilder::DECOp(OpcodeArgs) {
  Ref Dest;
  Ref Result;
  const auto Size = GetSrcBitSize(Op);
  const bool IsLocked = DestIsLockedMem(Op);

  if (IsLocked) {
    HandledLock = true;

    Ref DestAddress = MakeSegmentAddress(Op, Op->Dest);

    // Use Add instead of Sub to avoid a NEG
    Dest = _AtomicFetchAdd(OpSizeFromSrc(Op), Constant(Size == 64 ? -1 : ((1ULL << Size) - 1)), DestAddress);
  } else {
    Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= 32});
  }

  CalculateDeferredFlags();

  if (Size < 32 && CTX->HostFeatures.SupportsFlagM) {
    // Subtraction producing upper garbage
    Result = Sub(OpSize::i32Bit, Dest, 1);
    CalculatePF(Result);
    CalculateAF(Dest, Constant(1));

    // Correctly set NZ flags, preserving C
    HandleNZCV_RMW();
    _SetSmallNZV(OpSizeFromSrc(Op), Result);

    // Fix up V flag. DEC overflows only when decrementing a negative and
    // getting a positive. So compare the sign bits to calculate V.
    _RmifNZCV(_Andn(OpSize::i32Bit, Dest, Result), Size - 1, 1);
  } else {
    Result = CalculateFlags_SUB(OpSizeFromSrc(Op), Dest, Constant(1), false);
  }

  if (!IsLocked) {
    StoreResultGPR(Op, Result);
  }
}

void OpDispatchBuilder::STOSOp(OpcodeArgs) {
  if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) {
    LogMan::Msg::EFmt("STOSOp: Can't handle address size override (OP: 0x{:04X}, Flags: 0x{:08X})", Op->OP, Op->Flags);
    DecodeFailure = true;
    return;
  }

  const auto Size = OpSizeFromSrc(Op);
  const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0;

  if (!Repeat) {
    // Src is used only for a store of the same size so allow garbage
    Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

    // Only ES prefix
    Ref Dest = MakeSegmentAddress(X86State::REG_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

    // Store to memory where RDI points
    if (CTX->IsMemcpyAtomicTSOEnabled()) {
      _StoreMemGPRAutoTSO(Size, Dest, Src, Size);
    } else {
      _StoreMem(RegClass::GPR, Size, Src, Dest, Invalid(), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    }

    // Offset the pointer
    Ref TailDest = LoadGPRRegister(X86State::REG_RDI);
    StoreGPRRegister(X86State::REG_RDI, OffsetByDir(TailDest, IR::OpSizeToSize(Size)));
  } else {
    // FEX doesn't support partial faulting REP instructions.
    // Converting this to a `MemSet` IR op optimizes this quite significantly in our codegen.
    // If FEX is to gain support for faulting REP instructions, then this implementation needs to change significantly.
    Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    Ref Dest = LoadGPRRegister(X86State::REG_RDI);

    // Only ES prefix
    auto Segment = GetSegment(0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

    Ref Counter = LoadGPRRegister(X86State::REG_RCX);

    auto Result = _MemSet(CTX->IsAtomicTSOEnabled(), Size, Segment ?: InvalidNode, Dest, Src, Counter, LoadDir(1));
    StoreGPRRegister(X86State::REG_RCX, Constant(0));
    StoreGPRRegister(X86State::REG_RDI, Result);
  }
}

void OpDispatchBuilder::MOVSOp(OpcodeArgs) {
  if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) {
    LogMan::Msg::EFmt("MOVSOp: Can't handle address size override (OP: 0x{:04X}, Flags: 0x{:08X})", Op->OP, Op->Flags);
    DecodeFailure = true;
    return;
  }

  // RA now can handle these to be here, to avoid DF accesses
  const auto Size = OpSizeFromSrc(Op);

  if (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) {
    auto SrcAddr = LoadGPRRegister(X86State::REG_RSI);
    auto DstAddr = LoadGPRRegister(X86State::REG_RDI);
    auto Counter = LoadGPRRegister(X86State::REG_RCX);

    auto DstSegment = GetSegment(0, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);
    auto SrcSegment = GetSegment(Op->Flags, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX);

    if (DstSegment) {
      DstAddr = Add(OpSize::i64Bit, DstAddr, DstSegment);
    }

    if (SrcSegment) {
      SrcAddr = Add(OpSize::i64Bit, SrcAddr, SrcSegment);
    }

    Ref Result_Src = _AllocateGPR(false);
    Ref Result_Dst = _AllocateGPR(false);
    _MemCpy(CTX->IsAtomicTSOEnabled(), Size, DstAddr, SrcAddr, Counter, LoadDir(1), Result_Dst, Result_Src);

    if (DstSegment) {
      Result_Dst = Sub(OpSize::i64Bit, Result_Dst, DstSegment);
    }

    if (SrcSegment) {
      Result_Src = Sub(OpSize::i64Bit, Result_Src, SrcSegment);
    }

    StoreGPRRegister(X86State::REG_RCX, Constant(0));
    StoreGPRRegister(X86State::REG_RDI, Result_Dst);
    StoreGPRRegister(X86State::REG_RSI, Result_Src);
  } else {
    Ref RSI = MakeSegmentAddress(X86State::REG_RSI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);
    Ref RDI = MakeSegmentAddress(X86State::REG_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

    if (CTX->IsMemcpyAtomicTSOEnabled()) {
      auto Src = _LoadMemGPRAutoTSO(Size, RSI, Size);

      // Store to memory where RDI points
      _StoreMemGPRAutoTSO(Size, RDI, Src, Size);
    } else {
      auto Src = _LoadMem(RegClass::GPR, Size, RSI, Invalid(), OpSize::i8Bit, MemOffsetType::SXTX, 1);
      _StoreMem(RegClass::GPR, Size, Src, RDI, Invalid(), OpSize::i8Bit, MemOffsetType::SXTX, 1);
    }

    RSI = OffsetByDir(RSI, IR::OpSizeToSize(Size));
    RDI = OffsetByDir(RDI, IR::OpSizeToSize(Size));

    StoreGPRRegister(X86State::REG_RSI, RSI);
    StoreGPRRegister(X86State::REG_RDI, RDI);
  }
}

IR::OpSize OpDispatchBuilder::GetStringOpSize(X86Tables::DecodedOp Op) const {
  LOGMAN_THROW_A_FMT(Is64BitMode || !(Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE), "Invalid modifier on 32bit address");
  return !Is64BitMode || (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) ? OpSize::i32Bit : OpSize::i64Bit;
}

void OpDispatchBuilder::CMPSOp(OpcodeArgs) {
  if (!Is64BitMode && (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE)) {
    LogMan::Msg::EFmt("CMPSOp: Address size override (0x67) not supported in 32-bit mode (OP: 0x{:04X}).", Op->OP);
    DecodeFailure = true;
    return;
  }

  const auto Size = OpSizeFromSrc(Op);
  OpSize AddrSize = GetStringOpSize(Op);

  bool Repeat = Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX);
  if (!Repeat) {
    Ref Src_RSI = LoadGPRRegister(X86State::REG_RSI, AddrSize);
    Ref Src_RDI = LoadGPRRegister(X86State::REG_RDI, AddrSize);

    Ref Dest_RSI = AppendSegmentOffset(Src_RSI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);
    Ref Dest_RDI = AppendSegmentOffset(Src_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

    auto Src1 = _LoadMemGPRAutoTSO(Size, Dest_RDI, Size);
    auto Src2 = _LoadMemGPRAutoTSO(Size, Dest_RSI, Size);

    CalculateFlags_SUB(OpSizeFromSrc(Op), Src2, Src1);

    Dest_RDI = OffsetByDir(Src_RDI, IR::OpSizeToSize(Size));
    if (Is64BitMode && AddrSize == OpSize::i32Bit) {
      Dest_RDI = _Bfe(OpSize::i64Bit, 32, 0, Dest_RDI);
      StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
    } else {
      StoreGPRRegister(X86State::REG_RDI, Dest_RDI, AddrSize);
    }

    Dest_RSI = OffsetByDir(Src_RSI, IR::OpSizeToSize(Size));
    if (Is64BitMode && AddrSize == OpSize::i32Bit) {
      Dest_RSI = _Bfe(OpSize::i64Bit, 32, 0, Dest_RSI);
      StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
    } else {
      StoreGPRRegister(X86State::REG_RSI, Dest_RSI, AddrSize);
    }
  } else {
    // Calculate flags early.
    CalculateDeferredFlags();

    bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;

    // If rcx = 0, skip the whole loop.
    Ref Counter = LoadGPRRegister(X86State::REG_RCX);
    auto OuterJump = CondJump(Counter, CondClass::EQ);

    auto BeforeLoop = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetFalseJumpTarget(OuterJump, BeforeLoop);
    SetCurrentCodeBlock(BeforeLoop);
    StartNewBlock();

    ForeachDirection([this, Op, Size, AddrSize, REPE](int32_t PtrDir) {
      IRPair<IROp_CondJump> InnerJump;
      auto JumpIntoLoop = Jump();

      // Setup for the loop
      auto LoopHeader = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetCurrentCodeBlock(LoopHeader);
      StartNewBlock();
      SetJumpTarget(JumpIntoLoop, LoopHeader);

      // Working loop
      {
        Ref Src_RSI = LoadGPRRegister(X86State::REG_RSI, AddrSize);
        Ref Src_RDI = LoadGPRRegister(X86State::REG_RDI, AddrSize);

        Ref Dest_RSI = AppendSegmentOffset(Src_RSI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);
        Ref Dest_RDI = AppendSegmentOffset(Src_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

        auto Src1 = _LoadMemGPRAutoTSO(Size, Dest_RDI, Size);
        auto Src2 = _LoadMemGPR(Size, Dest_RSI, Size);

        // We'll calculate PF/AF after the loop, so use them as temporaries here.
        StoreRegister(Core::CPUState::PF_AS_GREG, false, Src1);
        StoreRegister(Core::CPUState::AF_AS_GREG, false, Src2);

        Ref TailCounter = LoadGPRRegister(X86State::REG_RCX);

        // Decrement counter
        TailCounter = SubWithFlags(OpSize::i64Bit, TailCounter, 1);

        // Store the counter since we don't have phis
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        Dest_RDI = Add(AddrSize, Src_RDI, PtrDir * static_cast<int32_t>(IR::OpSizeToSize(Size)));
        if (Is64BitMode && AddrSize == OpSize::i32Bit) {
          Dest_RDI = _Bfe(OpSize::i64Bit, 32, 0, Dest_RDI);
          StoreGPRRegister(X86State::REG_RDI, Dest_RDI);
        } else {
          StoreGPRRegister(X86State::REG_RDI, Dest_RDI, AddrSize);
        }

        Dest_RSI = Add(AddrSize, Src_RSI, PtrDir * static_cast<int32_t>(IR::OpSizeToSize(Size)));
        if (Is64BitMode && AddrSize == OpSize::i32Bit) {
          Dest_RSI = _Bfe(OpSize::i64Bit, 32, 0, Dest_RSI);
          StoreGPRRegister(X86State::REG_RSI, Dest_RSI);
        } else {
          StoreGPRRegister(X86State::REG_RSI, Dest_RSI, AddrSize);
        }

        // If TailCounter != 0, compare sources.
        // If TailCounter == 0, set ZF iff that would break.
        _CondSubNZCV(OpSize::i64Bit, Src2, Src1, CondClass::NEQ, REPE ? 0 : (1 << 2) /* Z */);
        CachedNZCV = nullptr;
        NZCVDirty = false;
        InnerJump = CondJumpNZCV(REPE ? CondClass::EQ : CondClass::NEQ);

        // Jump back to the start if we have more work to do
        SetTrueJumpTarget(InnerJump, LoopHeader);
      }

      // Make sure to start a new block after ending this one
      auto LoopEnd = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetFalseJumpTarget(InnerJump, LoopEnd);
      SetCurrentCodeBlock(LoopEnd);
      StartNewBlock();
    });

    // Make sure to start a new block after ending this one
    {
      // Grab the sources from the last iteration so we can set flags.
      auto Src1 = LoadGPR(Core::CPUState::PF_AS_GREG);
      auto Src2 = LoadGPR(Core::CPUState::AF_AS_GREG);
      CalculateFlags_SUB(OpSizeFromSrc(Op), Src2, Src1);
    }
    auto Jump_ = Jump();

    auto Exit = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetJumpTarget(Jump_, Exit);
    SetTrueJumpTarget(OuterJump, Exit);
    SetCurrentCodeBlock(Exit);
    StartNewBlock();
  }
}

void OpDispatchBuilder::LODSOp(OpcodeArgs) {
  if (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) {
    LogMan::Msg::EFmt("LODSOp: Can't handle address size override (OP: 0x{:04X}, Flags: 0x{:08X})", Op->OP, Op->Flags);
    DecodeFailure = true;
    return;
  }

  const auto Size = OpSizeFromSrc(Op);
  const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX)) != 0;

  if (!Repeat) {
    Ref Dest_RSI = MakeSegmentAddress(X86State::REG_RSI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);

    auto Src = _LoadMemGPRAutoTSO(Size, Dest_RSI, Size);

    StoreResultGPR(Op, Src);

    // Offset the pointer
    Ref TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);
    StoreGPRRegister(X86State::REG_RSI, OffsetByDir(TailDest_RSI, IR::OpSizeToSize(Size)));
  } else {
    // Calculate flags early. because end of block
    CalculateDeferredFlags();

    ForeachDirection([this, Op, Size](int32_t PtrDir) {
      // XXX: Theoretically LODS could be optimized to
      // RSI += {-}(RCX * Size)
      // RAX = [RSI - Size]
      // But this might violate the case of an application scanning pages for read permission and catching the fault
      // May or may not matter

      auto JumpStart = Jump();
      // Make sure to start a new block after ending this one
      auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetJumpTarget(JumpStart, LoopStart);
      SetCurrentCodeBlock(LoopStart);
      StartNewBlock();

      Ref Counter = LoadGPRRegister(X86State::REG_RCX);

      // Can we end the block?

      // We leave if RCX = 0
      auto CondJump_ = CondJump(Counter, CondClass::EQ);

      auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
      SetFalseJumpTarget(CondJump_, LoopTail);
      SetCurrentCodeBlock(LoopTail);
      StartNewBlock();

      // Working loop
      {
        Ref Dest_RSI = MakeSegmentAddress(X86State::REG_RSI, Op->Flags, X86Tables::DecodeFlags::FLAG_DS_PREFIX);

        auto Src = _LoadMemGPRAutoTSO(Size, Dest_RSI, Size);

        StoreResultGPR(Op, Src);

        Ref TailCounter = LoadGPRRegister(X86State::REG_RCX);
        Ref TailDest_RSI = LoadGPRRegister(X86State::REG_RSI);

        // Decrement counter
        TailCounter = Sub(OpSize::i64Bit, TailCounter, 1);

        // Store the counter since we don't have phis
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        // Offset the pointer
        TailDest_RSI = Add(OpSize::i64Bit, TailDest_RSI, PtrDir * static_cast<int32_t>(IR::OpSizeToSize(Size)));
        StoreGPRRegister(X86State::REG_RSI, TailDest_RSI);

        // Jump back to the start, we have more work to do
        Jump(LoopStart);
      }
      // Make sure to start a new block after ending this one
      auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
      SetTrueJumpTarget(CondJump_, LoopEnd);
      SetCurrentCodeBlock(LoopEnd);
      StartNewBlock();
    });
  }
}

void OpDispatchBuilder::SCASOp(OpcodeArgs) {
  if (!Is64BitMode && (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE)) {
    LogMan::Msg::EFmt("SCASOp: Address size override (0x67) not supported in 32-bit mode (OP: 0x{:04X}).", Op->OP);
    DecodeFailure = true;
    return;
  }

  const auto Size = OpSizeFromSrc(Op);
  OpSize AddrSize = GetStringOpSize(Op);
  const bool Repeat = (Op->Flags & (FEXCore::X86Tables::DecodeFlags::FLAG_REPNE_PREFIX | FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX)) != 0;

  if (!Repeat) {
    Ref Src_RDI = LoadGPRRegister(X86State::REG_RDI, AddrSize);
    Ref Dest_RDI = AppendSegmentOffset(Src_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

    auto Src1 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
    auto Src2 = _LoadMemGPRAutoTSO(Size, Dest_RDI, Size);

    CalculateFlags_SUB(OpSizeFromSrc(Op), Src1, Src2);

    Ref TailDest_RDI = OffsetByDir(Src_RDI, IR::OpSizeToSize(Size));
    if (Is64BitMode && AddrSize == OpSize::i32Bit) {
      TailDest_RDI = _Bfe(OpSize::i64Bit, 32, 0, TailDest_RDI);
      StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
    } else {
      StoreGPRRegister(X86State::REG_RDI, TailDest_RDI, AddrSize);
    }
  } else {
    // Calculate flags early. because end of block
    CalculateDeferredFlags();

    ForeachDirection([this, Op, Size, AddrSize](int32_t Dir) {
      bool REPE = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REP_PREFIX;

      auto JumpStart = Jump();
      // Make sure to start a new block after ending this one
      auto LoopStart = CreateNewCodeBlockAfter(GetCurrentBlock());
      SetJumpTarget(JumpStart, LoopStart);
      SetCurrentCodeBlock(LoopStart);
      StartNewBlock();

      Ref Counter = LoadGPRRegister(X86State::REG_RCX);

      // Can we end the block?
      // We leave if RCX = 0
      auto CondJump_ = CondJump(Counter, CondClass::EQ);
      IRPair<IROp_CondJump> InternalCondJump;

      auto LoopTail = CreateNewCodeBlockAfter(LoopStart);
      SetFalseJumpTarget(CondJump_, LoopTail);
      SetCurrentCodeBlock(LoopTail);
      StartNewBlock();

      // Working loop
      {
        Ref Src_RDI = LoadGPRRegister(X86State::REG_RDI, AddrSize);
        Ref Dest_RDI = AppendSegmentOffset(Src_RDI, 0, X86Tables::DecodeFlags::FLAG_ES_PREFIX, true);

        auto Src1 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
        auto Src2 = _LoadMemGPRAutoTSO(Size, Dest_RDI, Size);

        CalculateFlags_SUB(OpSizeFromSrc(Op), Src1, Src2);

        // Calculate flags early.
        CalculateDeferredFlags();

        Ref TailCounter = LoadGPRRegister(X86State::REG_RCX);
        Ref Src_RDI_Tail = LoadGPRRegister(X86State::REG_RDI, AddrSize);

        // Decrement counter
        TailCounter = Sub(OpSize::i64Bit, TailCounter, 1);

        // Store the counter since we don't have phis
        StoreGPRRegister(X86State::REG_RCX, TailCounter);

        Ref TailDest_RDI = Add(AddrSize, Src_RDI_Tail, Dir * static_cast<int32_t>(IR::OpSizeToSize(Size)));
        if (Is64BitMode && AddrSize == OpSize::i32Bit) {
          TailDest_RDI = _Bfe(OpSize::i64Bit, 32, 0, TailDest_RDI);
          StoreGPRRegister(X86State::REG_RDI, TailDest_RDI);
        } else {
          StoreGPRRegister(X86State::REG_RDI, TailDest_RDI, AddrSize);
        }

        CalculateDeferredFlags();
        InternalCondJump = CondJumpNZCV(REPE ? CondClass::EQ : CondClass::NEQ);

        // Jump back to the start if we have more work to do
        SetTrueJumpTarget(InternalCondJump, LoopStart);
      }
      // Make sure to start a new block after ending this one
      auto LoopEnd = CreateNewCodeBlockAfter(LoopTail);
      SetTrueJumpTarget(CondJump_, LoopEnd);

      SetFalseJumpTarget(InternalCondJump, LoopEnd);

      SetCurrentCodeBlock(LoopEnd);
      StartNewBlock();
    });
  }
}

void OpDispatchBuilder::BSWAPOp(OpcodeArgs) {
  Ref Dest;
  const auto Size = OpSizeFromSrc(Op);
  if (Size == OpSize::i16Bit) {
    // BSWAP of 16bit is undef. ZEN+ causes the lower 16bits to get zero'd
    Dest = Constant(0);
  } else {
    Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GetGPROpSize(), Op->Flags);
    Dest = _Rev(Size, Dest);
  }
  StoreResultGPR(Op, Dest);
}

void OpDispatchBuilder::PUSHFOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);

  Push(Size, GetPackedRFLAG());
}

void OpDispatchBuilder::POPFOp(OpcodeArgs) {
  const auto Size = OpSizeFromSrc(Op);
  Ref Src = Pop(Size);

  // Add back our flag constants
  // Bit 1 is always 1
  // Bit 9 is always 1 because we always have interrupts enabled

  Src = _Or(OpSize::i64Bit, Src, Constant(0x202));

  SetPackedRFLAG(false, Src);

  auto NewRIP = GetRelocatedPC(Op);
  ExitFunction(NewRIP, BranchHint::CheckTF);
  BlockSetRIP = true;
}

void OpDispatchBuilder::NEGOp(OpcodeArgs) {
  HandledLock = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0;

  const auto Size = OpSizeFromSrc(Op);
  auto ZeroConst = Constant(0);

  if (DestIsLockedMem(Op)) {
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    Ref Dest = _AtomicFetchNeg(Size, DestMem);
    CalculateFlags_SUB(Size, ZeroConst, Dest);
  } else {
    Ref Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
    Ref Result = CalculateFlags_SUB(Size, ZeroConst, Dest);

    StoreResultGPR(Op, Result);
  }
}

void OpDispatchBuilder::DIVOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  auto Size = OpSizeFromSrc(Op);

  // This loads the divisor. 32-bit/64-bit paths mask inside the JIT, 8/16 do not.
  Ref Divisor = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = Size >= OpSize::i32Bit});

  if (Size == OpSize::i64Bit && !Is64BitMode) {
    LogMan::Msg::EFmt("Doesn't exist in 32bit mode");
    DecodeFailure = true;
    return;
  }

  Ref Quotient = _AllocateGPR(true);
  Ref Remainder = _AllocateGPR(true);

  if (Size == OpSize::i8Bit) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX, OpSize::i16Bit);

    _UDiv(OpSize::i16Bit, Src1, Invalid(), Divisor, Quotient, Remainder);

    // AX[15:0] = concat<URem[7:0]:UDiv[7:0]>
    auto ResultAX = _Bfi(GPRSize, 8, 8, Quotient, Remainder);
    StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
  } else {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
    Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

    _UDiv(Size, Src1, Src2, Divisor, Quotient, Remainder);

    if (Size == OpSize::i32Bit) {
      Quotient = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Quotient);
      Remainder = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Remainder);
      Size = OpSize::iInvalid;
    }

    StoreGPRRegister(X86State::REG_RAX, Quotient, Size);
    StoreGPRRegister(X86State::REG_RDX, Remainder, Size);
  }
}

void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
  // This loads the divisor
  Ref Divisor = LoadSourceGPR(Op, Op->Dest, Op->Flags);

  const auto GPRSize = GetGPROpSize();
  auto Size = OpSizeFromSrc(Op);

  if (Size == OpSize::i64Bit && !Is64BitMode) {
    LogMan::Msg::EFmt("Doesn't exist in 32bit mode");
    DecodeFailure = true;
    return;
  }

  Ref Quotient = _AllocateGPR(true);
  Ref Remainder = _AllocateGPR(true);

  if (Size == OpSize::i8Bit) {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
    Src1 = _Sbfe(OpSize::i64Bit, 16, 0, Src1);
    Divisor = _Sbfe(OpSize::i64Bit, 8, 0, Divisor);

    _Div(OpSize::i64Bit, Src1, Invalid(), Divisor, Quotient, Remainder);

    // AX[15:0] = concat<URem[7:0]:UDiv[7:0]>
    auto ResultAX = _Bfi(GPRSize, 8, 8, Quotient, Remainder);
    StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
  } else {
    Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
    Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

    _Div(Size, Src1, Src2, Divisor, Quotient, Remainder);

    if (Size == OpSize::i32Bit) {
      Quotient = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Quotient);
      Remainder = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, Remainder);
      Size = OpSize::iInvalid;
    }

    StoreGPRRegister(X86State::REG_RAX, Quotient, Size);
    StoreGPRRegister(X86State::REG_RDX, Remainder, Size);
  }
}

void OpDispatchBuilder::BSFOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto DstSize = OpSizeFromDst(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize;
  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  // Find the LSB of this source
  auto Result = _FindLSB(OpSizeFromSrc(Op), Src);

  // OF, SF, AF, PF, CF all undefined
  // ZF is set to 1 if the source was zero
  SetZ_InvalidateNCV(OpSizeFromSrc(Op), Src);

  // If Src was zero then the destination doesn't get modified.
  //
  // Although Intel does not guarantee that semantic, AMD does and Intel
  // hardware satisfies it. We provide the stronger AMD behaviour as
  // applications might rely on that in the wild.
  auto SelectOp = NZCVSelect(GPRSize, CondClass::EQ, Dest, Result);
  StoreResultGPR_WithOpSize(Op, Op->Dest, SelectOp, DstSize);
}

void OpDispatchBuilder::BSROp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto DstSize = OpSizeFromDst(Op) == OpSize::i16Bit ? OpSize::i16Bit : GPRSize;
  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, DstSize, Op->Flags, {.AllowUpperGarbage = true});
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  // Find the MSB of this source
  auto Result = _FindMSB(OpSizeFromSrc(Op), Src);

  // OF, SF, AF, PF, CF all undefined
  // ZF is set to 1 if the source was zero
  SetZ_InvalidateNCV(OpSizeFromSrc(Op), Src);

  // If Src was zero then the destination doesn't get modified
  auto SelectOp = NZCVSelect(GPRSize, CondClass::EQ, Dest, Result);
  StoreResultGPR_WithOpSize(Op, Op->Dest, SelectOp, DstSize);
}

void OpDispatchBuilder::CMPXCHGOp(OpcodeArgs) {
  // CMPXCHG ModRM, reg, {RAX}
  // MemData = *ModRM.dest
  // if (RAX == MemData)
  //    modRM.dest = reg;
  //    ZF = 1
  // else
  //    ZF = 0
  // RAX = MemData
  //
  // CASL Xs, Xt, Xn
  // MemData = *Xn
  // if (MemData == Xs)
  //    *Xn = Xt
  // Xs = MemData

  const auto GPRSize = GetGPROpSize();
  auto Size = OpSizeFromSrc(Op);

  if (Op->Dest.IsGPR()) {
    // This is our source register
    Ref Src2 = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});
    Ref Src3 = LoadGPRRegister(X86State::REG_RAX);

    // If the destination is also the accumulator, we get some algebraic
    // simplifications. Not sure if this is actually hit but it's in
    // InstCountCI.
    bool Trivial = Op->Dest.Data.GPR.GPR == X86State::REG_RAX && !Op->Dest.IsGPRDirect() && !Op->Dest.Data.GPR.HighBits;

    Ref Src1 {};
    Ref Src1Lower {};

    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      Src1 = LoadSourceGPR_WithOpSize(Op, Op->Dest, GPRSize, Op->Flags, {.AllowUpperGarbage = true});
      Src1Lower = Trivial ? Src1 : _Bfe(GPRSize, IR::OpSizeAsBits(Size), 0, Src1);
    } else {
      Src1 = LoadSourceGPR_WithOpSize(Op, Op->Dest, Size, Op->Flags, {.AllowUpperGarbage = true});
      Src1Lower = Src1;
    }

    // Compare RAX with the destination, setting flags accordingly.
    CalculateFlags_SUB(OpSizeFromSrc(Op), Src3, Src1Lower);
    CalculateDeferredFlags();

    if (!Trivial) {
      if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
        // This allows us to only hit the ZEXT case on failure
        Ref RAXResult = NZCVSelect(OpSize::i64Bit, CondClass::EQ, Src3, Src1Lower);

        // When the size is 4 we need to make sure not zext the GPR when the comparison fails
        StoreGPRRegister(X86State::REG_RAX, RAXResult);
      } else {
        StoreGPRRegister(X86State::REG_RAX, Src1Lower, Size);
      }
    }

    // Op1 = RAX == Op1 ? Op2 : Op1
    // If they match then set the rm operand to the input
    // else don't set the rm operand
    Ref Src2Lower = Src2;
    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      Src2Lower = _Bfe(GPRSize, IR::OpSizeAsBits(Size), 0, Src2);
    }
    Ref DestResult = Trivial ? Src2 : NZCVSelect(OpSize::i64Bit, CondClass::EQ, Src2Lower, Src1);

    // Store in to GPR Dest
    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      StoreResultGPR_WithOpSize(Op, Op->Dest, DestResult, GPRSize);
    } else {
      StoreResultGPR(Op, DestResult);
    }
  } else {
    Ref Src2 = LoadSourceGPR(Op, Op->Src[0], Op->Flags);
    HandledLock = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK;

    auto Src3 = LoadGPRRegister(X86State::REG_RAX);
    auto Src3Lower = _Bfe(OpSize::i64Bit, OpSizeAsBits(Size), 0, Src3);

    // If this is a memory location then we want the pointer to it
    Ref Src1 = MakeSegmentAddress(Op, Op->Dest);

    // DataSrc = *Src1
    // if (DataSrc == Src3) { *Src1 == Src2; } Src2 = DataSrc
    // This will write to memory! Careful!
    // Third operand must be a calculated guest memory address
    Ref CASResult = _CAS(Size, Src3, Src2, Src1);
    Ref RAXResult = CASResult;

    CalculateFlags_SUB(OpSizeFromSrc(Op), Src3Lower, CASResult);
    CalculateDeferredFlags();

    if (GPRSize == OpSize::i64Bit && Size == OpSize::i32Bit) {
      // This allows us to only hit the ZEXT case on failure
      RAXResult = _NZCVSelect(OpSize::i64Bit, CondClass::EQ, Src3, CASResult);
      Size = OpSize::i64Bit;
    }

    // RAX gets the result of the CAS op
    StoreGPRRegister(X86State::REG_RAX, RAXResult, Size);
  }
}

void OpDispatchBuilder::CMPXCHGPairOp(OpcodeArgs) {
  // Calculate flags early.
  CalculateDeferredFlags();

  // REX.W used to determine if it is 16byte or 8byte
  // Unlike CMPXCHG, the destination can only be a memory location
  const auto Size = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit;

  HandledLock = (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0;

  // If this is a memory location then we want the pointer to it
  Ref Src1 = MakeSegmentAddress(Op, Op->Dest);

  // Load the full 64-bit registers, all the users ignore the upper 32-bits for
  // 32-bit only cmpxchg. This saves some zero extension.
  Ref Expected_Lower = LoadGPRRegister(X86State::REG_RAX);
  Ref Expected_Upper = LoadGPRRegister(X86State::REG_RDX);
  Ref Desired_Lower = LoadGPRRegister(X86State::REG_RBX);
  Ref Desired_Upper = LoadGPRRegister(X86State::REG_RCX);

  // ssa0 = Expected
  // ssa1 = Desired
  // ssa2 = MemoryLocation

  // DataSrc = *MemSrc
  // if (DataSrc == Expected) { *MemSrc == Desired; } Expected = DataSrc
  // This will write to memory! Careful!
  // Third operand must be a calculated guest memory address

  Ref Result_Lower = _AllocateGPR(true);
  Ref Result_Upper = _AllocateGPRAfter(Result_Lower);
  _CASPair(Size, Expected_Lower, Expected_Upper, Desired_Lower, Desired_Upper, Src1, Result_Lower, Result_Upper);

  HandleNZCV_RMW();
  _CmpPairZ(Size, Result_Lower, Result_Upper, Expected_Lower, Expected_Upper);
  CalculateDeferredFlags();

  auto UpdateIfNotZF = [this](auto Reg, auto Value) {
    // Always use 64-bit csel to preserve existing upper bits. If we have a
    // 32-bit cmpxchg in a 64-bit context, Value will be zeroed in upper bits.
    StoreGPRRegister(Reg, NZCVSelect(OpSize::i64Bit, CondClass::NEQ, Value, LoadGPRRegister(Reg)));
  };

  UpdateIfNotZF(X86State::REG_RAX, Result_Lower);
  UpdateIfNotZF(X86State::REG_RDX, Result_Upper);
}

void OpDispatchBuilder::CreateJumpBlocks(const fextl::vector<FEXCore::Frontend::Decoder::DecodedBlocks>* Blocks) {
  Ref PrevCodeBlock {};
  for (auto& Target : *Blocks) {
    auto CodeNode = CreateCodeNode(Target.IsEntryPoint, Target.Entry - Entry);

    JumpTargets.try_emplace(Target.Entry, JumpTargetInfo {CodeNode, false, Target.IsEntryPoint});

    if (PrevCodeBlock) {
      LinkCodeBlocks(PrevCodeBlock, CodeNode);
    }

    PrevCodeBlock = CodeNode;
  }
}

void OpDispatchBuilder::BeginFunction(uint64_t RIP, const fextl::vector<FEXCore::Frontend::Decoder::DecodedBlocks>* Blocks,
                                      uint32_t NumInstructions, bool _Is64BitMode, bool MonoBackpatcherBlock) {
  Entry = RIP;
  Is64BitMode = _Is64BitMode;
  LOGMAN_THROW_A_FMT(Is64BitMode == CTX->Config.Is64BitMode, "Expected operating mode to not change at runtime!");
  IsMonoBackpatcherBlock = MonoBackpatcherBlock;
  auto IRHeader = _IRHeader(InvalidNode, RIP, 0, NumInstructions, 0, 0);
  CreateJumpBlocks(Blocks);

  auto Block = GetNewJumpBlock(RIP);
  SetCurrentCodeBlock(Block);
  IRHeader.first->Blocks = Block->Wrapped(DualListData.ListBegin());
  CurrentHeader = IRHeader.first;
}

void OpDispatchBuilder::Finalize() {
  // This usually doesn't emit any IR but in the case of hitting the block instruction limit it will
  FlushRegisterCache();
  const auto GPRSize = GetGPROpSize();

  // Node 0 is invalid node
  Ref RealNode = reinterpret_cast<Ref>(GetNode(1));

  const FEXCore::IR::IROp_Header* IROp = RealNode->Op(DualListData.DataBegin());
  LOGMAN_THROW_A_FMT(IROp->Op == OP_IRHEADER, "First op in function must be our header");

  // Let's walk the jump blocks and see if we have handled every block target
  for (auto& Handler : JumpTargets) {
    if (Handler.second.HaveEmitted) {
      continue;
    }

    // We haven't emitted. Dump out to the dispatcher
    SetCurrentCodeBlock(Handler.second.BlockEntry);
    ExitFunction(_InlineEntrypointOffset(GPRSize, Handler.first - Entry));
  }
}

uint8_t OpDispatchBuilder::GetDstSize(X86Tables::DecodedOp Op) const {
  const uint32_t DstSizeFlag = X86Tables::DecodeFlags::GetSizeDstFlags(Op->Flags);
  LOGMAN_THROW_A_FMT(DstSizeFlag != 0 && DstSizeFlag != X86Tables::DecodeFlags::SIZE_MASK, "Invalid destination size for op");
  return 1u << (DstSizeFlag - 1);
}

uint8_t OpDispatchBuilder::GetSrcSize(X86Tables::DecodedOp Op) const {
  const uint32_t SrcSizeFlag = X86Tables::DecodeFlags::GetSizeSrcFlags(Op->Flags);
  LOGMAN_THROW_A_FMT(SrcSizeFlag != 0 && SrcSizeFlag != X86Tables::DecodeFlags::SIZE_MASK, "Invalid destination size for op");
  return 1u << (SrcSizeFlag - 1);
}

uint32_t OpDispatchBuilder::GetSrcBitSize(X86Tables::DecodedOp Op) const {
  return GetSrcSize(Op) * 8;
}

uint32_t OpDispatchBuilder::GetDstBitSize(X86Tables::DecodedOp Op) const {
  return GetDstSize(Op) * 8;
}

Ref OpDispatchBuilder::GetSegment(uint32_t Flags, uint32_t DefaultPrefix, bool Override) {
  const auto GPRSize = GetGPROpSize();
  uint32_t Prefix = Flags & FEXCore::X86Tables::DecodeFlags::FLAG_SEGMENTS;

  if (Is64BitMode) {
    if (Prefix == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX) {
      return _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, fs_cached));
    } else if (Prefix == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) {
      return _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, gs_cached));
    }
    // If there was any other segment in 64bit then it is ignored
  } else {
    if (Prefix == FEXCore::X86Tables::DecodeFlags::FLAG_NO_PREFIX || Override) {
      // If there was no prefix then use the default one if available
      // Or the argument only uses a specific prefix (with override set)
      Prefix = DefaultPrefix;
    }
    // With the segment register optimization we store the GDT bases directly in the segment register to remove indexed loads
    Ref SegmentResult {};
    switch (Prefix) {
    [[likely]] case FEXCore::X86Tables::DecodeFlags::FLAG_NO_PREFIX:
      return nullptr;
    case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, es_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, cs_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, ss_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, ds_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, fs_cached));
      break;
    case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX:
      SegmentResult = _LoadContextGPR(GPRSize, offsetof(FEXCore::Core::CPUState, gs_cached));
      break;
    default: FEX_UNREACHABLE;
    }

    CheckLegacySegmentRead(SegmentResult, Prefix);
    return SegmentResult;
  }
  return nullptr;
}

Ref OpDispatchBuilder::AppendSegmentOffset(Ref Value, uint32_t Flags, uint32_t DefaultPrefix, bool Override) {
  auto Segment = GetSegment(Flags, DefaultPrefix, Override);
  if (Segment) {
    Value = Add(std::max(OpSize::i32Bit, std::max(GetOpSize(Value), GetOpSize(Segment))), Value, Segment);
  }

  return Value;
}


void OpDispatchBuilder::CheckLegacySegmentRead(Ref NewNode, uint32_t SegmentReg) {
#ifndef FEX_DISABLE_TELEMETRY
  if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) {
    // FS and GS segments aren't considered legacy.
    return;
  }

  if (!(SegmentsNeedReadCheck & SegmentReg)) {
    // If the block has done multiple reads of a segment register then skip redundant read checks.
    // Segment write will cause another read check.
    return;
  }

  if (CTX->Config.DisableTelemetry()) {
    // Telemetry disabled at runtime.
    return;
  }

  FEXCore::Telemetry::TelemetryType TelemIndex {};
  switch (SegmentReg) {
  case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_USES_32BIT_SEGMENT_ES;
    SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_USES_32BIT_SEGMENT_CS;
    SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_USES_32BIT_SEGMENT_SS;
    SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_USES_32BIT_SEGMENT_DS;
    SegmentsNeedReadCheck &= ~FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX;
    break;
  default: FEX_UNREACHABLE;
  }

  // Will set the telemetry value if NewNode is != 0
  _TelemetrySetValue(NewNode, TelemIndex);
  // Telemetry will dirty flags, and user code does not expect LoadSource to clobber flags, fix that up here as this is an edge case.
  CalculateDeferredFlags();
#endif
}

void OpDispatchBuilder::CheckLegacySegmentWrite(Ref NewNode, uint32_t SegmentReg) {
#ifndef FEX_DISABLE_TELEMETRY
  if (SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX || SegmentReg == FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX) {
    // FS and GS segments aren't considered legacy.
    return;
  }

  if (CTX->Config.DisableTelemetry()) {
    // Telemetry disabled at runtime.
    return;
  }

  FEXCore::Telemetry::TelemetryType TelemIndex {};
  switch (SegmentReg) {
  case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_ES;
    SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_CS;
    SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_SS;
    SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX;
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
    TelemIndex = FEXCore::Telemetry::TelemetryType::TYPE_WRITES_32BIT_SEGMENT_DS;
    SegmentsNeedReadCheck |= FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX;
    break;
  default: FEX_UNREACHABLE;
  }

  // Will set the telemetry value if NewNode is != 0
  _TelemetrySetValue(NewNode, TelemIndex);
  // Telemetry will dirty flags, and user code does not expect LoadSource to clobber flags, fix that up here as this is an edge case.
  CalculateDeferredFlags();
#endif
}

void OpDispatchBuilder::UpdatePrefixFromSegment(Ref Segment, uint32_t SegmentReg) {
  // Use BFE to extract the selector index in bits [15,3] of the segment register.
  // In some cases the upper 16-bits of the 32-bit GPR contain garbage to ignore.
  auto GDT = _Bfe(OpSize::i32Bit, 1, 2, Segment);
  // Fun quirk, if we mask the selector then it is premultiplied by 8 which we need to do for accessing anyway.
  auto SegmentOffset = _And(OpSize::i32Bit, Segment, _Constant(0xfff8));
  Ref SegmentBase = _LoadContextGPRIndexed(GDT, OpSize::i64Bit, offsetof(FEXCore::Core::CPUState, segment_arrays[0]), 8);
  Ref NewSegment = _LoadMemGPR(OpSize::i64Bit, SegmentBase, SegmentOffset, OpSize::i8Bit, MemOffsetType::UXTW, 1);
  CheckLegacySegmentWrite(NewSegment, SegmentReg);

  // Extract the 32-bit base from the GDT segment.
  auto Upper32 = _Lshr(OpSize::i64Bit, NewSegment, _Constant(32));
  auto Masked = _And(OpSize::i32Bit, Upper32, _Constant(0xFF00'0000));
  Ref Merged = _Orlshr(OpSize::i32Bit, Masked, NewSegment, 16);
  NewSegment = _Bfi(OpSize::i32Bit, 8, 16, Merged, Upper32);

  switch (SegmentReg) {
  case FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, es_cached));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, cs_cached));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, ss_cached));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, ds_cached));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, fs_cached));
    break;
  case FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX:
    _StoreContextGPR(OpSize::i32Bit, NewSegment, offsetof(FEXCore::Core::CPUState, gs_cached));
    break;
  default: break; // Do nothing
  }
}

AddressMode OpDispatchBuilder::DecodeAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand,
                                             MemoryAccessType AccessType, bool IsLoad) {
  const auto GPRSize = GetGPROpSize();

  AddressMode A {};
  A.Segment = GetSegment(Op->Flags);
  A.AddrSize = (Op->Flags & X86Tables::DecodeFlags::FLAG_ADDRESS_SIZE) != 0 ? (GPRSize >> 1) : GPRSize;
  A.NonTSO = AccessType == MemoryAccessType::NONTSO || AccessType == MemoryAccessType::STREAM;

  if (Operand.IsLiteral()) {
    A.Offset = Operand.Literal();

    if (Operand.Data.Literal.Size != 8 && IsLoad) {
      // zero extend
      uint64_t width = Operand.Data.Literal.Size * 8;
      A.Offset &= ((1ULL << width) - 1);
    }
  } else if (Operand.IsGPR()) {
    // Not an address, let the caller deal with it
    A.AddrSize = GPRSize;
  } else if (Operand.IsGPRDirect()) {
    A.Base = LoadGPRRegister(Operand.Data.GPR.GPR, GPRSize);
    A.NonTSO |= IsNonTSOReg(AccessType, Operand.Data.GPR.GPR);
  } else if (Operand.IsGPRIndirect() || Operand.IsGPRIndirectRelocation()) {
    A.Base = LoadGPRRegister(Operand.Data.GPRIndirect.GPR, GPRSize);
    if (Operand.IsGPRIndirectRelocation()) {
      A.Base = Add(GPRSize, _EntrypointOffset(GPRSize, Operand.Data.GPRIndirect.Displacement), A.Base);
    } else {
      A.Offset = static_cast<int32_t>(Operand.Data.GPRIndirect.Displacement);
    }
    A.NonTSO |= IsNonTSOReg(AccessType, Operand.Data.GPRIndirect.GPR);
  } else if (Operand.IsRIPRelative() || Operand.IsRIPRelativeRelocation()) {
    if (Is64BitMode) {
      A.Base = GetRelocatedPC(Op, static_cast<int32_t>(Operand.Data.RIPLiteral.Value));
    } else {
      // 32bit this isn't RIP relative but instead absolute
      if (Operand.IsRIPRelativeRelocation()) {
        A.Base = _EntrypointOffset(GPRSize, Operand.Data.RIPLiteral.Value);
      } else {
        A.Offset = Operand.Data.RIPLiteral.Value;
      }
    }
  } else if (Operand.IsSIB() || Operand.IsSIBRelocation()) {
    const bool IsVSIB = IsLoad && ((Op->Flags & X86Tables::DecodeFlags::FLAG_VSIB_BYTE) != 0);

    if (Operand.Data.SIB.Base != FEXCore::X86State::REG_INVALID) {
      A.Base = LoadGPRRegister(Operand.Data.SIB.Base, GPRSize);
    }

    // NOTE: VSIB cannot have the index * scale portion calculated ahead of time,
    //       since the index in this case is a vector. So, we can't just apply the scale
    //       to it, since this needs to be applied to each element in the index register
    //       after said element has been sign extended. So, we pass this through for the
    //       instruction implementation to handle.
    //
    //       What we do handle though, is the applying the displacement value to
    //       the base register (if a base register is provided), since this is a
    //       part of the address calculation that can be done ahead of time.
    if (!IsVSIB && Operand.Data.SIB.Index != FEXCore::X86State::REG_INVALID) {
      A.Index = LoadGPRRegister(Operand.Data.SIB.Index, GPRSize);
      A.IndexScale = Operand.Data.SIB.Scale;
    }

    if (Operand.IsSIBRelocation()) {
      auto EPOffset = _EntrypointOffset(GPRSize, Operand.Data.SIB.Offset);
      if (A.Base) {
        A.Base = Add(GPRSize, EPOffset, A.Base);
      } else {
        A.Base = EPOffset;
      }
    } else {
      A.Offset = static_cast<int32_t>(Operand.Data.SIB.Offset);
    }

    A.NonTSO |= IsNonTSOReg(AccessType, Operand.Data.SIB.Base) || IsNonTSOReg(AccessType, Operand.Data.SIB.Index);
  } else if (Operand.IsLiteralRelocation()) {
    A.Base = _EntrypointOffset(GPRSize, Operand.Data.LiteralRelocation.EntrypointOffset);
  } else {
    LOGMAN_MSG_A_FMT("Unknown Src Type: {}\n", Operand.Type);
  }

  return A;
}


Ref OpDispatchBuilder::LoadSource_WithOpSize(RegClass Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand,
                                             IR::OpSize OpSize, uint32_t Flags, const LoadSourceOptions& Options) {
  auto [Align, LoadData, ForceLoad, AccessType, AllowUpperGarbage] = Options;
  AddressMode A = DecodeAddress(Op, Operand, AccessType, true /* IsLoad */);

  if (Operand.IsGPR()) {
    const auto gpr = Operand.Data.GPR.GPR;
    const auto highIndex = Operand.Data.GPR.HighBits ? 1 : 0;

    if (gpr >= FEXCore::X86State::REG_MM_0) {
      LOGMAN_THROW_A_FMT(OpSize == OpSize::i64Bit, "full");

      if (MMXState != MMXState_MMX) {
        ChgStateX87_MMX();
      }

      A.Base = LoadContext(OpSize::i64Bit, MM0Index + gpr - FEXCore::X86State::REG_MM_0);
    } else if (gpr >= FEXCore::X86State::REG_XMM_0) {
      const auto gprIndex = gpr - X86State::REG_XMM_0;

      // Load the full register size if it is a XMM register source.
      A.Base = LoadXMMRegister(gprIndex);

      // Now extract the subregister if it was a partial load /smaller/ than SSE size
      // TODO: Instead of doing the VMov implicitly on load, hunt down all use cases that require partial loads and do it after load.
      // We don't have information here to know if the operation needs zero upper bits or can contain data.
      if (!AllowUpperGarbage && OpSize < OpSize::i128Bit) {
        A.Base = _VMov(OpSize, A.Base);
      }
    } else {
      A.Base = LoadGPRRegister(gpr, OpSize, highIndex ? 8 : 0, AllowUpperGarbage);
    }
  }

  if ((IsOperandMem(Operand, true) && LoadData) || ForceLoad) {
    if (OpSize == OpSize::f80Bit) {
      Ref MemSrc = LoadEffectiveAddress(this, A, GetGPROpSize(), true);
      if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
        return _LoadMemX87SVEOptPredicate(OpSize::i128Bit, OpSize::i16Bit, MemSrc);
      } else {
        // For X87 extended doubles, Split the load.
        auto Res = _LoadMem(Class, OpSize::i64Bit, MemSrc, Align == OpSize::iInvalid ? OpSize : Align);
        return _VLoadVectorElement(OpSize::i128Bit, OpSize::i16Bit, Res, 4, Add(OpSize::i64Bit, MemSrc, 8));
      }
    }

    return _LoadMemAutoTSO(Class, OpSize, A, Align == OpSize::iInvalid ? OpSize : Align);
  } else {
    return LoadEffectiveAddress(this, A, GetGPROpSize(), false, AllowUpperGarbage);
  }
}

Ref OpDispatchBuilder::LoadGPRRegister(uint32_t GPR, IR::OpSize Size, uint8_t Offset, bool AllowUpperGarbage) {
  const auto GPRSize = GetGPROpSize();
  if (Size == OpSize::iInvalid) {
    Size = GPRSize;
  }
  Ref Reg = LoadGPR(GPR);

  if ((!AllowUpperGarbage && (Size != GPRSize)) || Offset != 0) {
    // Extract the subregister if requested.
    const auto OpSize = std::max(OpSize::i32Bit, Size);
    if (AllowUpperGarbage) {
      Reg = _Lshr(OpSize, Reg, Constant(Offset));
    } else {
      Reg = _Bfe(OpSize, IR::OpSizeAsBits(Size), Offset, Reg);
    }
  }
  return Reg;
}

void OpDispatchBuilder::StoreGPRRegister(uint32_t GPR, const Ref Src, IR::OpSize Size, uint8_t Offset) {
  const auto GPRSize = GetGPROpSize();
  if (Size == OpSize::iInvalid) {
    Size = GPRSize;
  }

  Ref Reg = Src;
  if (Size != GPRSize || Offset != 0) {
    // Need to do an insert if not automatic size or zero offset.
    Reg = ARef(Reg).BfiInto(LoadGPRRegister(GPR), Offset, IR::OpSizeAsBits(Size));
  }

  StoreRegister(GPR, false, Reg);
}

void OpDispatchBuilder::StoreXMMRegister(uint32_t XMM, const Ref Src) {
  StoreRegister(XMM, true, Src);
}

Ref OpDispatchBuilder::LoadSource(RegClass Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                                  const LoadSourceOptions& Options) {
  const auto OpSize = OpSizeFromSrc(Op);
  return LoadSource_WithOpSize(Class, Op, Operand, OpSize, Flags, Options);
}

void OpDispatchBuilder::StoreResult_WithOpSize(RegClass Class, FEXCore::X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand,
                                               Ref Src, IR::OpSize OpSize, IR::OpSize Align, MemoryAccessType AccessType) {
  if (Operand.IsGPR()) {
    // 8Bit and 16bit destination types store their result without effecting the upper bits
    // 32bit ops ZEXT the result to 64bit
    const auto GPRSize = GetGPROpSize();

    const auto gpr = Operand.Data.GPR.GPR;
    if (gpr >= FEXCore::X86State::REG_MM_0) {
      LOGMAN_THROW_A_FMT(OpSize == OpSize::i64Bit, "full");
      LOGMAN_THROW_A_FMT(Class == RegClass::FPR, "MMX is floaty");

      if (MMXState != MMXState_MMX) {
        ChgStateX87_MMX();
      }

      uint8_t Index = MM0Index + gpr - FEXCore::X86State::REG_MM_0;
      StoreContext(Index, Src);
      RegCache.Partial |= (1ull << (uint64_t)Index);
    } else if (gpr >= FEXCore::X86State::REG_XMM_0) {
      const auto gprIndex = gpr - X86State::REG_XMM_0;
      const auto VectorSize = GetGuestVectorLength();

      auto Result = Src;
      if (OpSize != VectorSize) {
        // Partial writes can come from FPRs.
        // TODO: Fix the instructions doing partial writes rather than dealing with it here.

        LOGMAN_THROW_A_FMT(Class != RegClass::GPR, "Partial writes from GPR not allowed. Instruction: {}", Op->TableInfo->Name);

        // XMM-size is handled in implementations.
        if (VectorSize != OpSize::i256Bit || OpSize != OpSize::i128Bit) {
          auto SrcVector = LoadXMMRegister(gprIndex);
          Result = _VInsElement(VectorSize, OpSize, 0, 0, SrcVector, Src);
        }
      }

      StoreXMMRegister(gprIndex, Result);
    } else {
      if (GPRSize == OpSize::i64Bit && OpSize == OpSize::i32Bit) {
        // If the Source IR op is 64 bits, we need to zext the upper bits
        // For all other sizes, the upper bits are guaranteed to already be zero
        Ref Value = GetOpSize(Src) == OpSize::i64Bit ? ARef(Src).Bfe(0, 32).Ref() : Src;
        StoreGPRRegister(gpr, Value, GPRSize);

        LOGMAN_THROW_A_FMT(!Operand.Data.GPR.HighBits, "Can't handle 32bit store to high 8bit register");
      } else {
        LOGMAN_THROW_A_FMT(!(GPRSize == OpSize::i32Bit && OpSize > OpSize::i32Bit), "Oops had a {} GPR load", OpSize);

        if (GPRSize != OpSize) {
          // if the GPR isn't the full size then we need to insert.
          // eg:
          // mov al, 2 ; Move in to lower 8-bits.
          // mov ah, 2 ; Move in to upper 8-bits of 16-bit reg.
          // mov ax, 2 ; Move in to lower 16-bits of reg.
          StoreGPRRegister(gpr, Src, OpSize, Operand.Data.GPR.HighBits * 8);
        } else {
          StoreGPRRegister(gpr, Src, std::min(GPRSize, OpSize));
        }
      }
    }
    return;
  }

  AddressMode A = DecodeAddress(Op, Operand, AccessType, false /* IsLoad */);

  if (OpSize == OpSize::f80Bit) {
    Ref MemStoreDst = LoadEffectiveAddress(this, A, GetGPROpSize(), true);
    if (CTX->HostFeatures.SupportsSVE128 || CTX->HostFeatures.SupportsSVE256) {
      _StoreMemX87SVEOptPredicate(OpSize::i128Bit, OpSize::i16Bit, Src, MemStoreDst);
    } else {
      // For X87 extended doubles, split before storing
      _StoreMemFPR(OpSize::i64Bit, MemStoreDst, Src, Align);
      auto Upper = _VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, Src, 1);
      _StoreMemGPR(OpSize::i16Bit, Upper, MemStoreDst, Constant(8), std::min(Align, OpSize::i64Bit), MemOffsetType::SXTX, 1);
    }
  } else {
    _StoreMemAutoTSO(Class, OpSize, A, Src, Align == OpSize::iInvalid ? OpSize : Align);
  }
}

void OpDispatchBuilder::StoreResult(RegClass Class, X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src,
                                    IR::OpSize Align, MemoryAccessType AccessType) {
  StoreResult_WithOpSize(Class, Op, Operand, Src, OpSizeFromDst(Op), Align, AccessType);
}

void OpDispatchBuilder::StoreResult(RegClass Class, X86Tables::DecodedOp Op, Ref Src, IR::OpSize Align, MemoryAccessType AccessType) {
  StoreResult(Class, Op, Op->Dest, Src, Align, AccessType);
}

OpDispatchBuilder::OpDispatchBuilder(FEXCore::Context::ContextImpl* ctx)
  : IREmitter {ctx->OpDispatcherAllocator, ctx->HostFeatures.SupportsTSOImm9}
  , CTX {ctx} {
  if (CTX->HostFeatures.SupportsAVX && CTX->HostFeatures.SupportsSVE256) {
    SaveAVXStateFunc = &OpDispatchBuilder::SaveAVXState;
    RestoreAVXStateFunc = &OpDispatchBuilder::RestoreAVXState;
    DefaultAVXStateFunc = &OpDispatchBuilder::DefaultAVXState;
  } else if (CTX->HostFeatures.SupportsAVX) {
    SaveAVXStateFunc = &OpDispatchBuilder::AVX128_SaveAVXState;
    RestoreAVXStateFunc = &OpDispatchBuilder::AVX128_RestoreAVXState;
    DefaultAVXStateFunc = &OpDispatchBuilder::AVX128_DefaultAVXState;
  }
}

void OpDispatchBuilder::ResetWorkingList() {
  IREmitter::ReownOrClaimBuffer();

  JumpTargets.clear();
  BlockSetRIP = false;
  DecodeFailure = false;
  ShouldDump = false;
  CurrentCodeBlock = nullptr;
  RegCache.Written = 0;
  RegCache.Cached = 0;
}

void OpDispatchBuilder::UnhandledOp(OpcodeArgs) {
  DecodeFailure = true;
}

void OpDispatchBuilder::MOVGPROp(OpcodeArgs, uint32_t SrcIndex) {
  // StoreResult will store with the same size as the input, so we allow upper
  // garbage on the input. The zero extension would be pointless.
  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIndex], Op->Flags, {.Align = OpSize::i8Bit, .AllowUpperGarbage = true});
  StoreResultGPR(Op, Src, OpSize::i8Bit);
}

void OpDispatchBuilder::MOVGPRNTOp(OpcodeArgs) {
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});
  StoreResultGPR(Op, Src, OpSize::i8Bit, MemoryAccessType::STREAM);
}

void OpDispatchBuilder::ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, unsigned SrcIdx) {
  // On x86, the canonical way to zero a register is XOR with itself. Detect and
  // emit optimal arm64 assembly.
  if (!DestIsLockedMem(Op) && ALUIROp == FEXCore::IR::IROps::OP_XOR && Op->Dest.IsGPR() && Op->Src[SrcIdx].IsGPR() &&
      Op->Dest.Data.GPR == Op->Src[SrcIdx].Data.GPR) {

    // Set flags for zero result with inverted carry. We subtract an arbitrary
    // register from itself to get the zero, since `subs wzr, #0` is not
    // encodable. This is optimal and works regardless of the opsize.
    auto Zero = LoadGPR(Op->Dest.Data.GPR.GPR);
    HandleNZ00Write();
    InvalidateAF();
    CalculatePF(SubWithFlags(OpSize::i32Bit, Zero, Zero));
    CFInverted = true;
    FlushRegisterCache();

    // Move 0 into the register
    StoreResultGPR(Op, Constant(0));
    return;
  }

  auto Size = OpSizeFromDst(Op);
  auto ResultSize = Size;

  auto RoundedSize = Size;
  if (ALUIROp != FEXCore::IR::IROps::OP_ANDWITHFLAGS) {
    RoundedSize = std::max(OpSize::i32Bit, RoundedSize);
  }

  // X86 basic ALU ops just do the operation between the destination and a single source
  Ref Src = LoadSourceGPR(Op, Op->Src[SrcIdx], Op->Flags, {.AllowUpperGarbage = true});

  // Try to eliminate the masking after 8/16-bit operations with constants, by
  // promoting to a full size operation that preserves the upper bits.
  uint64_t Const;
  bool IsConst = IsValueConstant(WrapNode(Src), &Const);
  if (Size < OpSize::i32Bit && !DestIsLockedMem(Op) && Op->Dest.IsGPR() && !Op->Dest.Data.GPR.HighBits && IsConst &&
      (ALUIROp == IR::IROps::OP_XOR || ALUIROp == IR::IROps::OP_OR || ALUIROp == IR::IROps::OP_ANDWITHFLAGS)) {

    RoundedSize = ResultSize = GetGPROpSize();
    LOGMAN_THROW_A_FMT(Const < (1ull << IR::OpSizeAsBits(Size)), "does not clobber");

    // For AND, we can play the same trick but we instead need the upper bits of
    // the constant to be all-1s instead of all-0s to preserve. We also can't
    // use andwithflags in this case, since we've promoted to 64-bit so the
    // negate flag would be wrong, but using the regular logical operation path
    // instead still ends up a net win for uops.
    //
    // In the common case where the constant is of the form (1 << x) - 1, the
    // adjusted constant here will inline into the arm64 and instruction, so if
    // flags are not needed, we save an instruction overall.
    if (ALUIROp == IR::IROps::OP_ANDWITHFLAGS) {
      Src = Constant(Const | ~((1ull << IR::OpSizeAsBits(Size)) - 1));
      ALUIROp = IR::IROps::OP_AND;
    }
  }

  Ref Result {};
  Ref Dest {};

  if (DestIsLockedMem(Op)) {
    HandledLock = true;
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    DeriveOp(FetchOp, AtomicFetchOp, _AtomicFetchAdd(Size, Src, DestMem));
    Dest = FetchOp;
  } else {
    Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags, {.AllowUpperGarbage = true});
  }

  const auto OpSize = RoundedSize;
  uint64_t Mask = Size == OpSize::i64Bit ? ~0ull : ((1ull << IR::OpSizeAsBits(Size)) - 1);
  if (IsConst && Const == Mask && !DestIsLockedMem(Op) && ALUIROp == IR::IROps::OP_XOR && Size >= OpSize::i32Bit) {
    Result = _Not(OpSize, Dest);
  } else if (IsConst && Const == Mask && !DestIsLockedMem(Op) && ALUIROp == IR::IROps::OP_AND) {
    Result = Dest;
  } else {
    DeriveOp(ALUOp, ALUIROp, _AndWithFlags(OpSize, Dest, Src));
    Result = ALUOp;
  }

  // Flags set
  switch (ALUIROp) {
  case FEXCore::IR::IROps::OP_ADD: Result = CalculateFlags_ADD(Size, Dest, Src); break;
  case FEXCore::IR::IROps::OP_SUB: Result = CalculateFlags_SUB(Size, Dest, Src); break;
  case FEXCore::IR::IROps::OP_XOR:
  case FEXCore::IR::IROps::OP_AND:
  case FEXCore::IR::IROps::OP_OR: {
    CalculateFlags_Logical(Size, Result);
    break;
  }
  case FEXCore::IR::IROps::OP_ANDWITHFLAGS: {
    HandleNZ00Write();
    CalculatePF(Result);
    InvalidateAF();
    break;
  }
  default: break;
  }

  if (!DestIsLockedMem(Op)) {
    StoreResultGPR_WithOpSize(Op, Op->Dest, Result, ResultSize, OpSize::iInvalid, MemoryAccessType::DEFAULT);
  }
}

void OpDispatchBuilder::LSLOp(OpcodeArgs) {
  // Emulate by always returning failure, this deviates from both Linux and Windows but
  // shouldn't be depended on by anything.
  SetRFLAG<FEXCore::X86State::RFLAG_ZF_RAW_LOC>(Constant(0));
}

void OpDispatchBuilder::INTOp(OpcodeArgs) {
  IR::BreakDefinition Reason;
  bool SetRIPToNext = false;

  switch (Op->OP) {
  case 0xCD: { // INT imm8
    uint8_t Literal = Op->Src[0].Literal();

#ifndef _WIN32
    constexpr uint8_t SYSCALL_LITERAL = 0x80;
    if (Literal == SYSCALL_LITERAL) {
      if (Is64BitMode) [[unlikely]] {
        LogMan::Msg::EFmt("[Unsupported] Trying to execute 32-bit syscall from a 64-bit process.");
        UnhandledOp(Op);
        return;
      }
      // Syscall on linux
      SyscallOp(Op, false);
      return;
    }
#else
    constexpr uint8_t SYSCALL_LITERAL = 0x2E;
    if (Literal == SYSCALL_LITERAL) {
      // Can be used for both 64-bit and 32-bit syscalls on windows
      SyscallOp(Op, false);
      return;
    }
#endif

#ifdef ARCHITECTURE_arm64ec
    // This is used when QueryPerformanceCounter is called on recent Windows versions, it causes CNTVCT to be written into RAX.
    constexpr uint8_t GET_CNTVCT_LITERAL = 0x81;
    if (Literal == GET_CNTVCT_LITERAL) {
      StoreGPRRegister(X86State::REG_RAX, _CycleCounter(false));
      return;
    }
#endif

    Reason.ErrorRegister = Literal << 3 | (0b010);
    Reason.Signal = Core::FAULT_SIGSEGV;
    // GP is raised when task-gate isn't setup to be valid
    Reason.TrapNumber = X86State::X86_TRAPNO_GP;
    Reason.si_code = 0x80;
    break;
  }
  case 0xCE: // INTO
    Reason.ErrorRegister = 0;
    Reason.Signal = Core::FAULT_SIGSEGV;
    Reason.TrapNumber = X86State::X86_TRAPNO_OF;
    Reason.si_code = 0x80;
    break;
  case 0xF1: // INT1
    Reason.ErrorRegister = 0;
    Reason.Signal = Core::FAULT_SIGTRAP;
    Reason.TrapNumber = X86State::X86_TRAPNO_DB;
    Reason.si_code = 1;
    SetRIPToNext = true;
    break;
  case 0xF4: { // HLT
    Reason.ErrorRegister = 0;
    Reason.Signal = Core::FAULT_SIGSEGV;
    Reason.TrapNumber = X86State::X86_TRAPNO_GP;
    Reason.si_code = 0x80;
    break;
  }
  case 0x0B: // UD2
    Reason.ErrorRegister = 0;
    Reason.Signal = Core::FAULT_SIGILL;
    Reason.TrapNumber = X86State::X86_TRAPNO_UD;
    Reason.si_code = 2;
    break;
  case 0xCC: // INT3
    Reason.ErrorRegister = 0;
    Reason.Signal = Core::FAULT_SIGTRAP;
    Reason.TrapNumber = X86State::X86_TRAPNO_BP;
    Reason.si_code = 0x80;
    SetRIPToNext = true;
    break;
  default: FEX_UNREACHABLE;
  }

  // Calculate flags early.
  FlushRegisterCache();

  const auto GPRSize = GetGPROpSize();

  if (SetRIPToNext) {
    BlockSetRIP = SetRIPToNext;

    // We want to set RIP to the next instruction after INT3/INT1
    auto NewRIP = GetRelocatedPC(Op);
    _StoreContextGPR(GPRSize, NewRIP, offsetof(FEXCore::Core::CPUState, rip));
  } else if (Op->OP != 0xCE) {
    auto NewRIP = GetRelocatedPC(Op, -Op->InstSize);
    _StoreContextGPR(GPRSize, NewRIP, offsetof(FEXCore::Core::CPUState, rip));
  }

  if (Op->OP == 0xCE) { // Conditional to only break if Overflow == 1
    CalculateDeferredFlags();

    // If condition doesn't hold then keep going
    // CondClass::FNU means OF == 0
    auto CondJump_ = CondJumpNZCV(CondClass::FNU);
    auto FalseBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetFalseJumpTarget(CondJump_, FalseBlock);
    SetCurrentCodeBlock(FalseBlock);
    StartNewBlock();

    auto NewRIP = GetRelocatedPC(Op);
    _StoreContextGPR(GPRSize, NewRIP, offsetof(FEXCore::Core::CPUState, rip));
    Break(Reason);

    // Make sure to start a new block after ending this one
    auto JumpTarget = CreateNewCodeBlockAfter(FalseBlock);
    SetTrueJumpTarget(CondJump_, JumpTarget);
    SetCurrentCodeBlock(JumpTarget);
    StartNewBlock();
  } else {
    BlockSetRIP = true;
    Break(Reason);
  }
}

void OpDispatchBuilder::TZCNT(OpcodeArgs) {
  // _FindTrailingZeroes ignores upper garbage so we don't need to mask
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  Src = _FindTrailingZeroes(OpSizeFromSrc(Op), Src);
  StoreResultGPR(Op, Src);

  CalculateFlags_ZCNT(OpSizeFromSrc(Op), Src);
}

void OpDispatchBuilder::LZCNT(OpcodeArgs) {
  // _CountLeadingZeroes clears upper garbage so we don't need to mask
  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.AllowUpperGarbage = true});

  auto Res = _CountLeadingZeroes(OpSizeFromSrc(Op), Src);
  StoreResultGPR(Op, Res);
  CalculateFlags_ZCNT(OpSizeFromSrc(Op), Res);
}

void OpDispatchBuilder::MOVBEOp(OpcodeArgs) {
  const auto GPRSize = GetGPROpSize();
  const auto SrcSize = OpSizeFromSrc(Op);

  Ref Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});

  if (DestIsMem(Op) || SrcSize != OpSize::i16Bit) {
    Src = _Rev(SrcSize, Src);
    StoreResultGPR(Op, Op->Dest, Src);
  } else {
    Src = _Rev(std::max(OpSize::i32Bit, SrcSize), Src);
    // 16-bit does an insert.
    // Rev of 16-bit value as 32-bit replaces the result in the upper 16-bits of the result.
    // bfxil the 16-bit result in to the GPR.
    Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GPRSize, Op->Flags);
    auto Result = _Bfxil(GPRSize, 16, 16, Dest, Src);
    StoreResultGPR_WithOpSize(Op, Op->Dest, Result, GPRSize);
  }
}

void OpDispatchBuilder::CLWBOrTPause(OpcodeArgs) {
  if (DestIsMem(Op)) {
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    _CacheLineClean(DestMem);
  } else {
    if (!CTX->HostFeatures.SupportsWFXT) {
      UnimplementedOp(Op);
    } else {
      auto RAX = LoadGPRRegister(X86State::REG_RAX);
      auto RDX = LoadGPRRegister(X86State::REG_RDX);

      // Incoming source register is unused.
      _WFET(RDX, RAX);

      // OF, SF, ZF, AF, PF, CF all zero.
      // CF is used if the OS deadline is set, which we don't do anything with.
      ZeroPF_AF();
      ZeroNZCV();
    }
  }
}

void OpDispatchBuilder::CLFLUSHOPT(OpcodeArgs) {
  Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
  _CacheLineClear(DestMem, false);
}

void OpDispatchBuilder::LoadFenceOrXRSTOR(OpcodeArgs) {
  // 0xE8 signifies LFENCE
  if (Op->ModRM == 0xE8) {
    _Fence(FenceType::Load);
  } else {
    XRstorOpImpl(Op);
  }
}

void OpDispatchBuilder::MemFenceOrXSAVEOPT(OpcodeArgs) {
  if (Op->ModRM == 0xF0) {
    // 0xF0 is MFENCE
    _Fence(FenceType::LoadStore);
  } else {
    XSaveOp(Op);
  }
}

void OpDispatchBuilder::StoreFenceOrCLFlush(OpcodeArgs) {
  if (Op->ModRM == 0xF8) {
    // 0xF8 is SFENCE
    _Fence(FenceType::Store);
  } else {
    // This is a CLFlush
    Ref DestMem = MakeSegmentAddress(Op, Op->Dest);
    _CacheLineClear(DestMem, true);
  }
}

void OpDispatchBuilder::UMonitorOrCLRSSBSY(OpcodeArgs) {
  if (DestIsMem(Op) || !CTX->HostFeatures.SupportsWFXT) {
    // CLRSSBSY
    UnimplementedOp(Op);
  } else {
    // Explicit NOP implementation of umonitor.
  }
}

void OpDispatchBuilder::UMWaitOp(OpcodeArgs) {
  if (DestIsMem(Op) || !CTX->HostFeatures.SupportsWFXT) {
    UnimplementedOp(Op);
  } else {
    // Explicit NOP implementation of umwait.
    // Still zero flags.
    //
    // OF, SF, ZF, AF, PF, CF all zero.
    ZeroPF_AF();
    ZeroNZCV();
  }
}

void OpDispatchBuilder::CLZeroOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsCLZERO) {
    UnimplementedOp(Op);
    return;
  }
  Ref DestMem = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.LoadData = false});
  _CacheLineZero(DestMem);
}

void OpDispatchBuilder::Prefetch(OpcodeArgs, bool ForStore, bool Stream, uint8_t Level) {
  Ref DestMem = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.LoadData = false});
  _Prefetch(ForStore, Stream, Level, DestMem, Invalid(), MemOffsetType::SXTX, 1);
}

void OpDispatchBuilder::RDTSCPOp(OpcodeArgs) {
  // RDTSCP is slightly different than RDTSC
  // IA32_TSC_AUX is returned in RCX
  // All previous loads are globally visible
  //  - Explicitly does not wait for stores to be globally visible
  //  - Explicitly use an MFENCE before this instruction if you want this behaviour
  // This instruction is not an execution fence, so subsequent instructions can execute after this
  //  - Explicitly use an LFENCE after RDTSCP if you want to block this behaviour

  auto Counter = CycleCounter(true);

  auto ID = _ProcessorID();
  StoreGPRRegister(X86State::REG_RAX, Counter.CounterLow);
  StoreGPRRegister(X86State::REG_RCX, ID);
  StoreGPRRegister(X86State::REG_RDX, Counter.CounterHigh);
}

void OpDispatchBuilder::RDPIDOp(OpcodeArgs) {
  StoreResultGPR(Op, _ProcessorID());
}

void OpDispatchBuilder::CRC32(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsCRC) {
    UnimplementedOp(Op);
    return;
  }
  const auto GPRSize = GetGPROpSize();

  // Destination GPR size is always 4 or 8 bytes depending on widening
  const auto DstSize = Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_REX_WIDENING ? OpSize::i64Bit : OpSize::i32Bit;
  Ref Dest = LoadSourceGPR_WithOpSize(Op, Op->Dest, GPRSize, Op->Flags);

  // Incoming memory is 8, 16, 32, or 64
  Ref Src {};
  if (Op->Src[0].IsGPR()) {
    Src = LoadSourceGPR_WithOpSize(Op, Op->Src[0], GPRSize, Op->Flags);
  } else {
    Src = LoadSourceGPR(Op, Op->Src[0], Op->Flags, {.Align = OpSize::i8Bit});
  }
  auto Result = _CRC32(Dest, Src, OpSizeFromSrc(Op));
  StoreResultGPR_WithOpSize(Op, Op->Dest, Result, DstSize);
}

template<bool Reseed>
void OpDispatchBuilder::RDRANDOp(OpcodeArgs) {
  if (!CTX->HostFeatures.SupportsRAND) {
    UnimplementedOp(Op);
    return;
  }

  StoreResultGPR(Op, _RDRAND(Reseed));

  // If the rng number is valid then NZCV is 0b0000, otherwise NZCV is 0b0100
  auto CF_inv = GetRFLAG(X86State::RFLAG_ZF_RAW_LOC);

  // OF, SF, ZF, AF, PF all zero. CF indicates if valid.
  ZeroPF_AF();

  if (!CTX->HostFeatures.SupportsFlagM) {
    ZeroNZCV();
    SetCFInverted(CF_inv);
  } else {
    // Accelerated path. Invalid is 0 or 1, so set NZCV with a single rmif.
    HandleNZCVWrite();
    _RmifNZCV(CF_inv, (64 - 1) /* rotate bit 0 into bit 1 = C */, 0xf);
    CFInverted = true;
  }
}

template void OpDispatchBuilder::RDRANDOp<true>(OpcodeArgs);
template void OpDispatchBuilder::RDRANDOp<false>(OpcodeArgs);

void OpDispatchBuilder::BreakOp(OpcodeArgs, FEXCore::IR::BreakDefinition BreakDefinition) {
  const auto GPRSize = GetGPROpSize();

  // We don't actually support this instruction
  // Multiblock may hit it though
  _StoreContextGPR(GPRSize, GetRelocatedPC(Op, -Op->InstSize), offsetof(FEXCore::Core::CPUState, rip));
  Break(BreakDefinition);


  if (Multiblock) {
    auto NextBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    SetCurrentCodeBlock(NextBlock);
    StartNewBlock();
  } else {
    BlockSetRIP = true;
  }
}

void OpDispatchBuilder::UnimplementedOp(OpcodeArgs) {
  BreakOp(Op, FEXCore::IR::BreakDefinition {
                .ErrorRegister = 0,
                .Signal = SIGILL,
                .TrapNumber = X86State::X86_TRAPNO_UD,
                .si_code = 2, ///< ILL_ILLOPN
              });
}

void OpDispatchBuilder::PermissionRestrictedOp(OpcodeArgs) {
  BreakOp(Op, FEXCore::IR::BreakDefinition {
                .ErrorRegister = 0,
                .Signal = SIGSEGV,
                .TrapNumber = X86State::X86_TRAPNO_GP,
                .si_code = 0x80,
              });
}

void OpDispatchBuilder::InvalidOp(OpcodeArgs) {
  BreakOp(Op, FEXCore::IR::BreakDefinition {
                .ErrorRegister = 0,
                .Signal = SIGILL,
                .TrapNumber = 0,
                .si_code = 0,
              });
}

void OpDispatchBuilder::NoExecOp(OpcodeArgs) {
  BreakOp(Op, FEXCore::IR::BreakDefinition {
                .ErrorRegister = X86State::X86_PF_PROT | X86State::X86_PF_USER | X86State::X86_PF_INSTR,
                .Signal = Core::FAULT_SIGSEGV,
                .TrapNumber = X86State::X86_TRAPNO_PF,
                .si_code = 2, // SEGV_ACCERR
              });
}

#undef OpcodeArgs
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/OpcodeDispatcher.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Interface/Core/Frontend.h"
#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/Addressing.h"
#include "Interface/Context/Context.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/RegisterAllocationData.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/IR/IR.h>

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/vector.h>

#include <bit>
#include <cstdint>
#include <fmt/format.h>
#include <stddef.h>
#include <utility>
#include <xxhash.h>

namespace FEXCore::IR {
enum class VectorCompareType {
  // SSE comparisons.
  EQ_OQ = 0,
  LT_OS = 1,
  LE_OS = 2,
  UNORD_Q = 3,
  NEQ_UQ = 4,
  NLT_US = 5,
  NLE_US = 6,
  ORD_Q = 7,
  // AVX-only comparisons.
  EQ_UQ = 8,
  NGE_US = 9,
  NGT_US = 10,
  FALSE_OQ = 11,
  NEQ_OQ = 12,
  GE_OS = 13,
  GT_OS = 14,
  TRUE_UQ = 15,
  EQ_OS = 16,
  LT_OQ = 17,
  LE_OQ = 18,
  UNORD_S = 19,
  NEQ_US = 20,
  NLT_UQ = 21,
  NLE_UQ = 22,
  ORD_S = 23,
  EQ_US = 24,
  NGE_UQ = 25,
  NGT_UQ = 26,
  FALSE_OS = 27,
  NEQ_OS = 28,
  GE_OQ = 29,
  GT_OQ = 30,
  TRUE_US = 31,
};

enum class MemoryAccessType {
  // Choose TSO or Non-TSO depending on access type
  DEFAULT,
  // TSO access behaviour
  TSO,
  // Non-TSO access behaviour
  NONTSO,
  // Non-temporal streaming
  STREAM,
};

enum class BTAction {
  BTNone,
  BTClear,
  BTSet,
  BTComplement,
};

enum class ForceTSOMode {
  NoOverride,
  ForceDisabled,
  ForceEnabled,
};

struct LoadSourceOptions {
  // Alignment of the load in bytes. iInvalid signifies opsize aligned.
  IR::OpSize Align = OpSize::iInvalid;

  // Whether or not to load the data if a memory access occurs.
  // If set to false, then the address that would have been loaded from
  // will be returned instead.
  //
  // Note: If returning the address, make sure to apply the segment offset
  //       after with AppendSegmentOffset().
  //
  bool LoadData = true;

  // Use to force a load even if the underlying type isn't loadable.
  bool ForceLoad = false;

  // Specifies the access type of the load.
  MemoryAccessType AccessType = MemoryAccessType::DEFAULT;

  // Whether or not a zero extend should clear the upper bits
  // in the register (e.g. an 8-bit load would clear the upper 24 bits
  // or 56 bits depending on the operating mode).
  // If true, no zero-extension occurs.
  bool AllowUpperGarbage = false;
};

struct DispatchTableEntry {
  uint16_t Op;
  uint8_t Count;
  X86Tables::OpDispatchPtr Ptr;
};

class OpDispatchBuilder final : public IREmitter {
public:
  Ref GetNewJumpBlock(uint64_t RIP) {
    auto it = JumpTargets.find(RIP);
    LOGMAN_THROW_A_FMT(it != JumpTargets.end(), "Couldn't find block generated for 0x{:x}", RIP);
    return it->second.BlockEntry;
  }

  void SetNewBlockIfChanged(uint64_t RIP) {
    auto it = JumpTargets.find(RIP);
    if (it == JumpTargets.end()) {
      return;
    }

    it->second.HaveEmitted = true;

    if (CurrentCodeBlock->Wrapped(DualListData.ListBegin()).ID() == it->second.BlockEntry->Wrapped(DualListData.ListBegin()).ID()) {
      return;
    }

    // We have hit a RIP that is a jump target
    // Thus we need to end up in a new block
    SetCurrentCodeBlock(it->second.BlockEntry);
  }

  void StartNewBlock() {
    // If we loaded flags but didn't change them, invalidate the cached copy and move on.
    // Changes get stored out by CalculateDeferredFlags.
    CachedNZCV = nullptr;
    CFInverted = CFInvertedABI;

    FlushRegisterCache();

    // Start block in X87 state.
    // This is important to ensure that blocks always start with the same state independently of predecessors
    // which allows independent compilation of blocks.
    // Starting in the X87 state is better than starting in MMX state because
    // MMX state is more work to initialize.
    MMXState = MMXState_X87;

    // New block needs to reset segment telemetry.
    SegmentsNeedReadCheck = ~0U;

    // Need to clear any named constants that were cached.
    ClearCachedNamedConstants();
  }

  IRPair<IROp_Jump> Jump() {
    FlushRegisterCache();
    return _Jump();
  }
  IRPair<IROp_Jump> Jump(Ref _TargetBlock) {
    FlushRegisterCache();
    return _Jump(_TargetBlock);
  }
  IRPair<IROp_CondJump> CondJump(Ref _Cmp1, Ref _Cmp2, Ref _TrueBlock, Ref _FalseBlock, CondClass _Cond = CondClass::NEQ,
                                 IR::OpSize _CompareSize = OpSize::iInvalid) {
    FlushRegisterCache();
    return _CondJump(_Cmp1, _Cmp2, _TrueBlock, _FalseBlock, _Cond, _CompareSize);
  }
  IRPair<IROp_CondJump> CondJump(Ref ssa0, CondClass cond = CondClass::NEQ) {
    FlushRegisterCache();
    return _CondJump(ssa0, cond);
  }
  IRPair<IROp_CondJump> CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClass cond = CondClass::NEQ) {
    FlushRegisterCache();
    return _CondJump(ssa0, ssa1, ssa2, cond);
  }
  IRPair<IROp_CondJump> CondJumpNZCV(CondClass Cond) {
    FlushRegisterCache();
    return _CondJump(InvalidNode, InvalidNode, InvalidNode, InvalidNode, Cond, OpSize::iInvalid, true);
  }
  IRPair<IROp_CondJump> CondJumpBit(Ref Src, unsigned Bit, bool Set) {
    FlushRegisterCache();
    auto InlineConst = _InlineConstant(Bit);
    auto Cond = Set ? CondClass::TSTNZ : CondClass::TSTZ;
    return _CondJump(Src, InlineConst, InvalidNode, InvalidNode, Cond, OpSize::iInvalid, false);
  }
  IRPair<IROp_ExitFunction> ExitFunction(Ref NewRIP, BranchHint Hint = BranchHint::None) {
    FlushRegisterCache();
    return _ExitFunction(GetOpSize(NewRIP), NewRIP, Hint, InvalidNode, InvalidNode);
  }
  IRPair<IROp_ExitFunction> ExitFunction(Ref NewRIP, BranchHint Hint, Ref CallReturnAddress, Ref CallReturnBlock) {
    FlushRegisterCache();
    return _ExitFunction(GetOpSize(NewRIP), NewRIP, Hint, CallReturnAddress, CallReturnBlock);
  }
  IRPair<IROp_Break> Break(BreakDefinition Reason) {
    FlushRegisterCache();
    return _Break(Reason);
  }
  IRPair<IROp_Thunk> Thunk(Ref ArgPtr, SHA256Sum ThunkNameHash) {
    FlushRegisterCache();
    return _Thunk(ArgPtr, ThunkNameHash);
  }

  bool FinishOp(uint64_t NextRIP, bool LastOp) {
    // If we are switching to a new block and this current block has yet to set a RIP
    // Then we need to insert an unconditional jump from the current block to the one we are going to
    // This happens most frequently when an instruction jumps backwards to another location
    // eg:
    //
    //  nop dword [rax], eax
    // .label:
    //  rdi, 0x8
    //  cmp qword [rdi-8], 0
    //  jne .label
    if (LastOp && !BlockSetRIP) {
      auto it = JumpTargets.find(NextRIP);
      if (it == JumpTargets.end()) {

        const auto GPRSize = GetGPROpSize();
        // If we don't have a jump target to a new block then we have to leave
        // Set the RIP to the next instruction and leave
        ExitFunction(_InlineEntrypointOffset(GPRSize, NextRIP - Entry));
      } else if (it != JumpTargets.end()) {
        Jump(it->second.BlockEntry);
        return true;
      }
    }

    BlockSetRIP = false;

    return false;
  }

  static bool CanHaveSideEffects(const FEXCore::X86Tables::X86InstInfo* TableInfo, FEXCore::X86Tables::DecodedOp Op) {
    if (TableInfo) {
      if (TableInfo->Flags & X86Tables::InstFlags::FLAGS_DEBUG_MEM_ACCESS) {
        // If it is marked as having memory access then always say it has a side-effect.
        // Not always true but better to be safe.
        return true;
      }

      if (TableInfo->Flags & (X86Tables::InstFlags::FLAGS_SETS_RIP | X86Tables::InstFlags::FLAGS_BLOCK_END)) {
        // Cooperative suspend interrupts can be triggered at any back-edge, the RIP must be reconstructed correctly in such cases
        return true;
      }
    }

    auto CanHaveSideEffects = false;

    auto HasPotentialMemoryAccess = [](const X86Tables::DecodedOperand& Operand) -> bool {
      if (Operand.IsNone()) {
        return false;
      }

      // This isn't guaranteed that all of these types will access memory, but be safe.
      return Operand.IsGPRDirect() || Operand.IsGPRIndirect() || Operand.IsRIPRelative() || Operand.IsSIB();
    };

    CanHaveSideEffects |= HasPotentialMemoryAccess(Op->Dest);
    CanHaveSideEffects |= HasPotentialMemoryAccess(Op->Src[0]);
    CanHaveSideEffects |= HasPotentialMemoryAccess(Op->Src[1]);
    CanHaveSideEffects |= HasPotentialMemoryAccess(Op->Src[2]);
    return CanHaveSideEffects;
  }

  template<typename F>
  void ForeachDirection(F&& Routine) {
    // Otherwise, prepare to branch.
    auto Zero = Constant(0);

    // If the shift is zero, do not touch the flags.
    auto ForwardBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    auto BackwardBlock = CreateNewCodeBlockAfter(ForwardBlock);
    auto ExitBlock = CreateNewCodeBlockAfter(BackwardBlock);

    auto DF = GetRFLAG(X86State::RFLAG_DF_RAW_LOC);
    CondJump(DF, Zero, ForwardBlock, BackwardBlock, CondClass::EQ);

    for (auto D = 0; D < 2; ++D) {
      SetCurrentCodeBlock(D ? BackwardBlock : ForwardBlock);
      StartNewBlock();
      {
        Routine(D ? -1 : 1);
        Jump(ExitBlock);
      }
    }

    SetCurrentCodeBlock(ExitBlock);
    StartNewBlock();
  }

  OpDispatchBuilder(FEXCore::Context::ContextImpl* ctx);

  // Should only be called at the start of IR Emission.
  void ResetWorkingList();

  void ResetDecodeFailure() {
    NeedsBlockEnd = DecodeFailure = false;
  }
  bool HadDecodeFailure() const {
    return DecodeFailure;
  }
  bool NeedsBlockEnder() const {
    return NeedsBlockEnd;
  }

  void ResetHandledLock() {
    HandledLock = false;
  }
  bool HasHandledLock() const {
    return HandledLock;
  }

  void SetForceTSO(ForceTSOMode Mode) {
    ForceTSO = Mode;
  }
  ForceTSOMode GetForceTSO() const {
    return ForceTSO;
  }

  void SetDumpIR(bool DumpIR) {
    ShouldDump = DumpIR;
  }
  bool ShouldDumpIR() const {
    return ShouldDump;
  }

  void BeginFunction(uint64_t RIP, const fextl::vector<FEXCore::Frontend::Decoder::DecodedBlocks>* Blocks, uint32_t NumInstructions,
                     bool Is64BitMode, bool MonoBackpatcherBlock);
  void Finalize();

  // Dispatch builder functions
#define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op

  /**
   * Binds a sequence of compile-time constants as arguments to another member function.
   * This allows to construct OpDispatchPtrs that are specialized for the given set of arguments.
   */
  template<auto Fn, auto... Args>
  void Bind(OpcodeArgs) {
    [[clang::noinline]] (this->*Fn)(Op, Args...);
  };

  void UnhandledOp(OpcodeArgs);
  void MOVGPROp(OpcodeArgs, uint32_t SrcIndex);
  void MOVGPRNTOp(OpcodeArgs);
  void MOVVectorAlignedOp(OpcodeArgs);
  void MOVVectorUnalignedOp(OpcodeArgs);
  void MOVVectorNTOp(OpcodeArgs);
  void ALUOp(OpcodeArgs, FEXCore::IR::IROps ALUIROp, FEXCore::IR::IROps AtomicFetchOp, unsigned SrcIdx);
  void LSLOp(OpcodeArgs);
  void INTOp(OpcodeArgs);
  void SyscallOp(OpcodeArgs, bool IsSyscallInst);
  void ThunkOp(OpcodeArgs);
  void LEAOp(OpcodeArgs);
  void NOPOp(OpcodeArgs);
  void RETOp(OpcodeArgs);
  void IRETOp(OpcodeArgs);
  void CallbackReturnOp(OpcodeArgs);
  void SecondaryALUOp(OpcodeArgs);
  void ADCOp(OpcodeArgs, uint32_t SrcIndex);
  void SBBOp(OpcodeArgs, uint32_t SrcIndex);
  void SALCOp(OpcodeArgs);
  void PUSHOp(OpcodeArgs);
  void PUSHREGOp(OpcodeArgs);
  void PUSHAOp(OpcodeArgs);
  void PUSHSegmentOp(OpcodeArgs, uint32_t SegmentReg);
  void POPOp(OpcodeArgs);
  void POPAOp(OpcodeArgs);
  void POPSegmentOp(OpcodeArgs, uint32_t SegmentReg);
  void LEAVEOp(OpcodeArgs);
  void CALLOp(OpcodeArgs);
  void CALLAbsoluteOp(OpcodeArgs);
  void CondJUMPOp(OpcodeArgs);
  void CondJUMPRCXOp(OpcodeArgs);
  void LoopOp(OpcodeArgs);
  void JUMPOp(OpcodeArgs);
  void JUMPAbsoluteOp(OpcodeArgs);
  void JUMPFARIndirectOp(OpcodeArgs);
  void CALLFARIndirectOp(OpcodeArgs);
  void RETFARIndirectOp(OpcodeArgs);
  void TESTOp(OpcodeArgs, uint32_t SrcIndex);
  void ARPLOp(OpcodeArgs);
  void MOVSXDOp(OpcodeArgs);
  void MOVSXOp(OpcodeArgs);
  void MOVZXOp(OpcodeArgs);
  void CMPOp(OpcodeArgs, uint32_t SrcIndex);
  void SETccOp(OpcodeArgs);
  void CQOOp(OpcodeArgs);
  void CDQOp(OpcodeArgs);
  void XCHGOp(OpcodeArgs);
  void SAHFOp(OpcodeArgs);
  void LAHFOp(OpcodeArgs);
  void MOVSegOp(OpcodeArgs, bool ToSeg);
  void FLAGControlOp(OpcodeArgs);
  void MOVOffsetOp(OpcodeArgs);
  void CMOVOp(OpcodeArgs);
  void CPUIDOp(OpcodeArgs);
  void XGetBVOp(OpcodeArgs);
  uint32_t GetConstantShift(X86Tables::DecodedOp Op, bool Is1Bit);
  void SHLOp(OpcodeArgs);
  void SHLImmediateOp(OpcodeArgs, bool SHL1Bit);
  void SHROp(OpcodeArgs);
  void SHRImmediateOp(OpcodeArgs, bool SHR1Bit);
  void SHLDOp(OpcodeArgs);
  void SHLDImmediateOp(OpcodeArgs);
  void SHRDOp(OpcodeArgs);
  void SHRDImmediateOp(OpcodeArgs);
  void ASHROp(OpcodeArgs, bool IsImmediate, bool Is1Bit);
  void RotateOp(OpcodeArgs, bool Left, bool IsImmediate, bool Is1Bit);
  void RCROp1Bit(OpcodeArgs);
  void RCROp8x1Bit(OpcodeArgs);
  void RCROp(OpcodeArgs);
  void RCRSmallerOp(OpcodeArgs);
  void RCLOp1Bit(OpcodeArgs);
  void RCLOp(OpcodeArgs);
  void RCLSmallerOp(OpcodeArgs);

  void BTOp(OpcodeArgs, uint32_t SrcIndex, enum BTAction Action);

  void IMUL1SrcOp(OpcodeArgs);
  void IMUL2SrcOp(OpcodeArgs);
  void IMULOp(OpcodeArgs);
  void STOSOp(OpcodeArgs);
  void MOVSOp(OpcodeArgs);
  void CMPSOp(OpcodeArgs);
  void LODSOp(OpcodeArgs);
  void SCASOp(OpcodeArgs);
  void BSWAPOp(OpcodeArgs);
  void PUSHFOp(OpcodeArgs);
  void POPFOp(OpcodeArgs);

  struct CycleCounterPair {
    Ref CounterLow;
    Ref CounterHigh;
  };
  CycleCounterPair CycleCounter(bool SelfSynchronizingLoads);
  void RDTSCOp(OpcodeArgs);
  void INCOp(OpcodeArgs);
  void DECOp(OpcodeArgs);
  void NEGOp(OpcodeArgs);
  void DIVOp(OpcodeArgs);
  void IDIVOp(OpcodeArgs);
  void BSFOp(OpcodeArgs);
  void BSROp(OpcodeArgs);
  void CMPXCHGOp(OpcodeArgs);
  void CMPXCHGPairOp(OpcodeArgs);
  void MULOp(OpcodeArgs);
  void NOTOp(OpcodeArgs);
  void XADDOp(OpcodeArgs);
  void PopcountOp(OpcodeArgs);
  void DAAOp(OpcodeArgs);
  void DASOp(OpcodeArgs);
  void AAAOp(OpcodeArgs);
  void AASOp(OpcodeArgs);
  void AAMOp(OpcodeArgs);
  void AADOp(OpcodeArgs);
  void XLATOp(OpcodeArgs);
  template<bool Reseed>
  void RDRANDOp(OpcodeArgs);

  enum class Segment {
    FS,
    GS,
  };
  void ReadSegmentReg(OpcodeArgs, Segment Seg);
  void WriteSegmentReg(OpcodeArgs, Segment Seg);
  void EnterOp(OpcodeArgs);

  void SGDTOp(OpcodeArgs);
  void SIDTOp(OpcodeArgs);
  void SMSWOp(OpcodeArgs);

  enum class VectorOpType {
    MMX,
    SSE,
    AVX,
  };
  // SSE
  void MOVLPOp(OpcodeArgs);
  void MOVHPDOp(OpcodeArgs);
  void MOVSDOp(OpcodeArgs);
  void MOVSSOp(OpcodeArgs);
  void VectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void VectorXOROp(OpcodeArgs);

  void VectorALUROp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void VectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void RSqrt3DNowOp(OpcodeArgs, bool Duplicate);
  template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
  void VectorUnaryDuplicateOp(OpcodeArgs);

  void MOVQOp(OpcodeArgs, VectorOpType VectorType);
  void MOVQMMXOp(OpcodeArgs);
  void MOVMSKOp(OpcodeArgs, IR::OpSize ElementSize);
  void MOVMSKOpOne(OpcodeArgs);
  void PUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize);
  void PUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize);
  void PSHUFBOp(OpcodeArgs);
  Ref PShufWLane(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant IndexConstant, bool LowLane, Ref IncomingLane, uint8_t Shuffle);
  void PSHUFWOp(OpcodeArgs, bool Low);
  void PSHUFW8ByteOp(OpcodeArgs);
  void PSHUFDOp(OpcodeArgs);
  void PSRLDOp(OpcodeArgs, IR::OpSize ElementSize);
  void PSRLI(OpcodeArgs, IR::OpSize ElementSize);
  void PSLLI(OpcodeArgs, IR::OpSize ElementSize);
  void PSLL(OpcodeArgs, IR::OpSize ElementSize);
  void PSRAOp(OpcodeArgs, IR::OpSize ElementSize);
  void PSRLDQ(OpcodeArgs);
  void PSLLDQ(OpcodeArgs);
  void PSRAIOp(OpcodeArgs, IR::OpSize ElementSize);
  void MOVDDUPOp(OpcodeArgs);
  template<IR::OpSize DstElementSize>
  void CVTGPR_To_FPR(OpcodeArgs);
  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
  void CVTFPR_To_GPR(OpcodeArgs);
  template<IR::OpSize SrcElementSize, bool Widen>
  void Vector_CVT_Int_To_Float(OpcodeArgs);
  template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
  void Scalar_CVT_Float_To_Float(OpcodeArgs);
  void Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize, bool IsAVX);
  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
  void Vector_CVT_Float_To_Int(OpcodeArgs);
  void MMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
  template<IR::OpSize SrcElementSize, bool HostRoundingMode>
  void XMM_To_MMX_Vector_CVT_Float_To_Int(OpcodeArgs);
  void MASKMOVOp(OpcodeArgs);
  void MOVBetweenGPR_FPR(OpcodeArgs, VectorOpType VectorType);
  void TZCNT(OpcodeArgs);
  void LZCNT(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void VFCMPOp(OpcodeArgs);
  void SHUFOp(OpcodeArgs, IR::OpSize ElementSize);
  template<IR::OpSize ElementSize>
  void PINSROp(OpcodeArgs);
  void InsertPSOp(OpcodeArgs);
  void PExtrOp(OpcodeArgs, IR::OpSize ElementSize);

  template<IR::OpSize ElementSize>
  void PSIGN(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void VPSIGN(OpcodeArgs);

  // BMI1 Ops
  void ANDNBMIOp(OpcodeArgs);
  void BEXTRBMIOp(OpcodeArgs);
  void BLSIBMIOp(OpcodeArgs);
  void BLSMSKBMIOp(OpcodeArgs);
  void BLSRBMIOp(OpcodeArgs);

  // BMI2 Ops
  void BMI2Shift(OpcodeArgs);
  void BZHI(OpcodeArgs);
  void MULX(OpcodeArgs);
  void PDEP(OpcodeArgs);
  void PEXT(OpcodeArgs);
  void RORX(OpcodeArgs);

  // ADX Ops
  void ADXOp(OpcodeArgs);

  // AVX Ops
  void AVXVectorXOROp(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void AVXVectorRound(OpcodeArgs);

  template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
  void AVXScalar_CVT_Float_To_Float(OpcodeArgs);

  template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
  void VectorScalarInsertALUOp(OpcodeArgs);
  template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
  void AVXVectorScalarInsertALUOp(OpcodeArgs);

  template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
  void VectorScalarUnaryInsertALUOp(OpcodeArgs);
  template<FEXCore::IR::IROps IROp, IR::OpSize ElementSize>
  void AVXVectorScalarUnaryInsertALUOp(OpcodeArgs);

  void InsertMMX_To_XMM_Vector_CVT_Int_To_Float(OpcodeArgs);
  template<IR::OpSize DstElementSize>
  void InsertCVTGPR_To_FPR(OpcodeArgs);
  template<IR::OpSize DstElementSize>
  void AVXInsertCVTGPR_To_FPR(OpcodeArgs);

  template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
  void InsertScalar_CVT_Float_To_Float(OpcodeArgs);
  template<IR::OpSize DstElementSize, IR::OpSize SrcElementSize>
  void AVXInsertScalar_CVT_Float_To_Float(OpcodeArgs);

  RoundMode TranslateRoundType(uint8_t Mode);

  template<IR::OpSize ElementSize>
  void InsertScalarRound(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void AVXInsertScalarRound(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void InsertScalarFCMPOp(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void AVXInsertScalarFCMPOp(OpcodeArgs);

  template<IR::OpSize DstElementSize>
  void AVXCVTGPR_To_FPR(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void AVXVFCMPOp(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void VADDSUBPOp(OpcodeArgs);

  void VAESDecOp(OpcodeArgs);
  void VAESDecLastOp(OpcodeArgs);
  void VAESEncOp(OpcodeArgs);
  void VAESEncLastOp(OpcodeArgs);

  void VANDNOp(OpcodeArgs);

  Ref VBLENDOpImpl(IR::OpSize VecSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref ZeroRegister, uint64_t Selector);
  void VBLENDPDOp(OpcodeArgs);
  void VPBLENDDOp(OpcodeArgs);
  void VPBLENDWOp(OpcodeArgs);

  void VBROADCASTOp(OpcodeArgs, IR::OpSize ElementSize);

  template<IR::OpSize ElementSize>
  void VDPPOp(OpcodeArgs);

  void VEXTRACT128Op(OpcodeArgs);

  template<IROps IROp, IR::OpSize ElementSize>
  void VHADDPOp(OpcodeArgs);
  void VHSUBPOp(OpcodeArgs, IR::OpSize ElementSize);

  void VINSERTOp(OpcodeArgs);
  void VINSERTPSOp(OpcodeArgs);

  template<IR::OpSize ElementSize, bool IsStore>
  void VMASKMOVOp(OpcodeArgs);

  void VMOVHPOp(OpcodeArgs);
  void VMOVLPOp(OpcodeArgs);

  void VMOVDDUPOp(OpcodeArgs);
  void VMOVSHDUPOp(OpcodeArgs);
  void VMOVSLDUPOp(OpcodeArgs);

  void VMOVSDOp(OpcodeArgs);
  void VMOVSSOp(OpcodeArgs);

  void VMOVAPS_VMOVAPDOp(OpcodeArgs);
  void VMOVUPS_VMOVUPDOp(OpcodeArgs);

  void VMPSADBWOp(OpcodeArgs);

  void VPACKSSOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPACKUSOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPALIGNROp(OpcodeArgs);

  void VPCMPESTRIOp(OpcodeArgs);
  void VPCMPESTRMOp(OpcodeArgs);
  void VPCMPISTRIOp(OpcodeArgs);
  void VPCMPISTRMOp(OpcodeArgs);

  void VCVTPH2PSOp(OpcodeArgs);
  void VCVTPS2PHOp(OpcodeArgs);

  Ref VPERMDIndices(OpSize DstSize, Ref Indices, Ref IndexMask, Ref Repeating3210);
  void VPERM2Op(OpcodeArgs);
  void VPERMDOp(OpcodeArgs);
  void VPERMQOp(OpcodeArgs);

  void VPERMILImmOp(OpcodeArgs, IR::OpSize ElementSize);

  Ref VPERMILRegOpImpl(OpSize DstSize, IR::OpSize ElementSize, Ref Src, Ref Indices);
  template<IR::OpSize ElementSize>
  void VPERMILRegOp(OpcodeArgs);

  void VPHADDSWOp(OpcodeArgs);

  void VPHSUBOp(OpcodeArgs, IR::OpSize ElementSize);
  void VPHSUBSWOp(OpcodeArgs);

  void VPINSRBOp(OpcodeArgs);
  void VPINSRDQOp(OpcodeArgs);
  void VPINSRWOp(OpcodeArgs);

  void VPMADDUBSWOp(OpcodeArgs);
  void VPMADDWDOp(OpcodeArgs);

  template<bool IsStore>
  void VPMASKMOVOp(OpcodeArgs);

  void VPMULHRSWOp(OpcodeArgs);

  template<bool Signed>
  void VPMULHWOp(OpcodeArgs);

  template<IR::OpSize ElementSize, bool Signed>
  void VPMULLOp(OpcodeArgs);

  void VPSADBWOp(OpcodeArgs);

  void VPSHUFBOp(OpcodeArgs);

  void VPSHUFWOp(OpcodeArgs, IR::OpSize ElementSize, bool Low);

  void VPSLLOp(OpcodeArgs, IR::OpSize ElementSize);
  void VPSLLDQOp(OpcodeArgs);
  void VPSLLIOp(OpcodeArgs, IR::OpSize ElementSize);
  void VPSLLVOp(OpcodeArgs);

  void VPSRAOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPSRAIOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPSRAVDOp(OpcodeArgs);
  void VPSRLVOp(OpcodeArgs);

  void VPSRLDOp(OpcodeArgs, IR::OpSize ElementSize);
  void VPSRLDQOp(OpcodeArgs);

  void VPUNPCKHOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPUNPCKLOp(OpcodeArgs, IR::OpSize ElementSize);

  void VPSRLIOp(OpcodeArgs, IR::OpSize ElementSize);

  void VSHUFOp(OpcodeArgs, IR::OpSize ElementSize);

  template<IR::OpSize ElementSize>
  void VTESTPOp(OpcodeArgs);

  void VZEROOp(OpcodeArgs);

  // X87 Ops
  Ref ReconstructFSW_Helper(Ref T = nullptr);
  // Returns new x87 stack top from FSW.
  Ref ReconstructX87StateFromFSW_Helper(Ref FSW);
  void FLD(OpcodeArgs, IR::OpSize Width);
  void FLDFromStack(OpcodeArgs);
  void FLD_Const(OpcodeArgs, NamedVectorConstant K);

  void FBLD(OpcodeArgs);
  void FBSTP(OpcodeArgs);

  void FILD(OpcodeArgs);

  void FST(OpcodeArgs, IR::OpSize Width);
  void FSTToStack(OpcodeArgs);

  void FIST(OpcodeArgs, bool Truncate);

  // OpResult is used for Stack operations,
  // describes if the result of the operation is stored in ST(0) or ST(i),
  // where ST(i) is one of the arguments to the operation.
  enum class OpResult {
    RES_ST0,
    RES_STI,
  };

  void FADD(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0);
  void FDIV(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0);
  void FMUL(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0);
  void FNCLEX(OpcodeArgs);
  void FNINIT(OpcodeArgs);
  void FSUB(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0);
  void FTST(OpcodeArgs);
  void FXCH(OpcodeArgs);
  void X87EMMS(OpcodeArgs);
  void X87FCMOV(OpcodeArgs);
  void X87FFREE(OpcodeArgs);
  void X87FLDCW(OpcodeArgs);
  void X87FNSAVE(OpcodeArgs);
  void X87FNSTENV(OpcodeArgs);
  void X87FNSTSW(OpcodeArgs);
  void X87FRSTOR(OpcodeArgs);
  void X87FSTCW(OpcodeArgs);
  void X87FXAM(OpcodeArgs);
  void X87FXTRACT(OpcodeArgs);
  void X87FYL2X(OpcodeArgs, bool IsFYL2XP1);
  void X87LDENV(OpcodeArgs);
  void X87ModifySTP(OpcodeArgs, bool Inc);
  void X87OpHelper(OpcodeArgs, FEXCore::IR::IROps IROp, bool ZeroC2);

  enum class FCOMIFlags {
    FLAGS_X87,
    FLAGS_RFLAGS,
  };
  void FCOMI(OpcodeArgs, IR::OpSize Width, bool Integer, FCOMIFlags WhichFlags, bool PopTwice);

  // F64 X87 Ops
  void FADDF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0);
  void FBLDF64(OpcodeArgs);
  void FBSTPF64(OpcodeArgs);
  void FCOMIF64(OpcodeArgs, IR::OpSize width, bool Integer, FCOMIFlags whichflags, bool poptwice);
  void FDIVF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0);
  void FILDF64(OpcodeArgs);
  void FISTF64(OpcodeArgs, bool Truncate);
  void FLDF64_Const(OpcodeArgs, uint64_t Num);
  void FLDF64(OpcodeArgs, IR::OpSize Width);
  void FMULF64(OpcodeArgs, IR::OpSize Width, bool Integer, OpResult ResInST0);
  void FSUBF64(OpcodeArgs, IR::OpSize Width, bool Integer, bool Reverse, OpResult ResInST0);
  void FTSTF64(OpcodeArgs);
  void X87FLDCWF64(OpcodeArgs);
  void X87FXTRACTF64(OpcodeArgs);
  void X87LDENVF64(OpcodeArgs);

  void FXSaveOp(OpcodeArgs);
  void FXRStoreOp(OpcodeArgs);

  Ref XSaveBase(X86Tables::DecodedOp Op);
  void XSaveOp(OpcodeArgs);

  void PAlignrOp(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void UCOMISxOp(OpcodeArgs);
  void LDMXCSR(OpcodeArgs);
  void STMXCSR(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void PACKUSOp(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void PACKSSOp(OpcodeArgs);

  template<IR::OpSize ElementSize, bool Signed>
  void PMULLOp(OpcodeArgs);

  template<bool ToXMM>
  void MOVQ2DQ(OpcodeArgs);

  template<IR::OpSize ElementSize>
  void ADDSUBPOp(OpcodeArgs);

  void PFNACCOp(OpcodeArgs);
  void PFPNACCOp(OpcodeArgs);
  void PSWAPDOp(OpcodeArgs);

  template<uint8_t CompType>
  void VPFCMPOp(OpcodeArgs);
  void PI2FWOp(OpcodeArgs);
  void PF2IWOp(OpcodeArgs);

  void PMULHRWOp(OpcodeArgs);

  void PMADDWD(OpcodeArgs);
  void PMADDUBSW(OpcodeArgs);

  template<bool Signed>
  void PMULHW(OpcodeArgs);

  void PMULHRSW(OpcodeArgs);

  void MOVBEOp(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void HSUBP(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void PHSUB(OpcodeArgs);

  void PHADDS(OpcodeArgs);
  void PHSUBS(OpcodeArgs);

  void CLWBOrTPause(OpcodeArgs);
  void CLFLUSHOPT(OpcodeArgs);
  void LoadFenceOrXRSTOR(OpcodeArgs);
  void MemFenceOrXSAVEOPT(OpcodeArgs);
  void StoreFenceOrCLFlush(OpcodeArgs);
  void UMonitorOrCLRSSBSY(OpcodeArgs);
  void UMWaitOp(OpcodeArgs);
  void CLZeroOp(OpcodeArgs);
  void RDTSCPOp(OpcodeArgs);
  void RDPIDOp(OpcodeArgs);

  void Prefetch(OpcodeArgs, bool ForStore, bool Stream, uint8_t Level);

  void PSADBW(OpcodeArgs);

  void SHA1NEXTEOp(OpcodeArgs);
  void SHA1MSG1Op(OpcodeArgs);
  void SHA1MSG2Op(OpcodeArgs);
  void SHA1RNDS4Op(OpcodeArgs);

  void SHA256MSG1Op(OpcodeArgs);
  void SHA256MSG2Op(OpcodeArgs);
  void SHA256RNDS2Op(OpcodeArgs);

  void AESImcOp(OpcodeArgs);
  void AESEncOp(OpcodeArgs);
  void AESEncLastOp(OpcodeArgs);
  void AESDecOp(OpcodeArgs);
  void AESDecLastOp(OpcodeArgs);
  void AESKeyGenAssist(OpcodeArgs);

  void VFMAImpl(OpcodeArgs, IROps IROp, bool Scalar, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
  void VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);

  struct RefVSIB {
    Ref Low, High;
    Ref BaseAddr;
    int32_t Displacement;
    uint8_t Scale;
  };

  RefVSIB LoadVSIB(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags);
  template<OpSize AddrElementSize>
  void VPGATHER(OpcodeArgs);

  template<IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed>
  void ExtendVectorElements(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void VectorRound(OpcodeArgs);

  Ref VectorBlend(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Selector);

  template<IR::OpSize ElementSize>
  void VectorBlend(OpcodeArgs);

  void VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize);
  void PTestOpImpl(OpSize Size, Ref Dest, Ref Src);
  void PTestOp(OpcodeArgs);
  void PHMINPOSUWOp(OpcodeArgs);
  template<IR::OpSize ElementSize>
  void DPPOp(OpcodeArgs);

  void MPSADBWOp(OpcodeArgs);
  void PCLMULQDQOp(OpcodeArgs);
  void VPCLMULQDQOp(OpcodeArgs);

  void CRC32(OpcodeArgs);
  void Extrq_imm(OpcodeArgs);
  void Insertq_imm(OpcodeArgs);
  void Extrq(OpcodeArgs);
  void Insertq(OpcodeArgs);

  void BreakOp(OpcodeArgs, FEXCore::IR::BreakDefinition BreakDefinition);
  void UnimplementedOp(OpcodeArgs);
  void PermissionRestrictedOp(OpcodeArgs);

  ///< Helper for PSHUD and VPERMILPS(imm) since they are the same instruction
  Ref Single128Bit4ByteVectorShuffle(Ref Src, uint8_t Shuffle);
  // AVX 128-bit operations
  Ref AVX128_LoadXMMRegister(uint32_t XMM, bool High);
  void AVX128_StoreXMMRegister(uint32_t XMM, const Ref Src, bool High);

  struct RefPair {
    Ref Low, High;
  };

  RefPair AVX128_Zext(Ref R) {
    RefPair Pair;
    Pair.Low = R;
    Pair.High = LoadZeroVector(OpSize::i128Bit);
    return Pair;
  }

  Ref SHADataShuffle(Ref Src) {
    // SHA data shuffle matches PSHUFD shuffle where elements are inverted.
    // Because this shuffle mask gets reused multiple times per instruction, it's always a win to load the mask once and reuse it.
    const uint32_t Shuffle = 0b00'01'10'11;
    auto LookupIndexes =
      LoadAndCacheIndexedNamedVectorConstant(OpSize::i128Bit, FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD, Shuffle * 16);
    return _VTBL1(OpSize::i128Bit, Src, LookupIndexes);
  }

  RefPair AVX128_LoadSource_WithOpSize(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                                       bool NeedsHigh, MemoryAccessType AccessType = MemoryAccessType::DEFAULT);

  RefVSIB AVX128_LoadVSIB(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags, bool NeedsHigh);
  void AVX128_StoreResult_WithOpSize(FEXCore::X86Tables::DecodedOp Op, const FEXCore::X86Tables::DecodedOperand& Operand, const RefPair Src,
                                     MemoryAccessType AccessType = MemoryAccessType::DEFAULT);
  void AVX128_VMOVScalarImpl(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VectorALU(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVX128_VectorUnary(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVX128_VectorUnaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, std::function<Ref(IR::OpSize ElementSize, Ref Src)> Helper);
  void AVX128_VectorBinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize,
                               std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2)> Helper);
  void AVX128_VectorShiftWideImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp);
  void AVX128_VectorShiftImmImpl(OpcodeArgs, IR::OpSize ElementSize, IROps IROp);
  void AVX128_VectorTrinaryImpl(OpcodeArgs, IR::OpSize SrcSize, IR::OpSize ElementSize, Ref Src3,
                                std::function<Ref(IR::OpSize ElementSize, Ref Src1, Ref Src2, Ref Src3)> Helper);

  enum class ShiftDirection { RIGHT, LEFT };
  void AVX128_ShiftDoubleImm(OpcodeArgs, ShiftDirection Dir);

  void AVX128_VMOVAPS(OpcodeArgs);
  void AVX128_VMOVSD(OpcodeArgs);
  void AVX128_VMOVSS(OpcodeArgs);

  void AVX128_VectorXOR(OpcodeArgs);

  void AVX128_VZERO(OpcodeArgs);
  void AVX128_MOVVectorNT(OpcodeArgs);
  void AVX128_MOVQ(OpcodeArgs);
  void AVX128_VMOVLP(OpcodeArgs);
  void AVX128_VMOVHP(OpcodeArgs);
  void AVX128_VMOVDDUP(OpcodeArgs);
  void AVX128_VMOVSLDUP(OpcodeArgs);
  void AVX128_VMOVSHDUP(OpcodeArgs);
  void AVX128_VBROADCAST(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VPUNPCKL(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VPUNPCKH(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_MOVVectorUnaligned(OpcodeArgs);
  void AVX128_InsertCVTGPR_To_FPR(OpcodeArgs, IR::OpSize DstElementSize);
  void AVX128_CVTFPR_To_GPR(OpcodeArgs, IR::OpSize SrcElementSize, bool HostRoundingMode);
  void AVX128_VANDN(OpcodeArgs);
  void AVX128_VPACKSS(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VPACKUS(OpcodeArgs, IR::OpSize ElementSize);
  Ref AVX128_PSIGNImpl(IR::OpSize ElementSize, Ref Src1, Ref Src2);
  void AVX128_VPSIGN(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_UCOMISx(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VectorScalarInsertALU(OpcodeArgs, FEXCore::IR::IROps IROp, IR::OpSize ElementSize);
  void AVX128_VFCMP(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_InsertScalarFCMP(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_MOVBetweenGPR_FPR(OpcodeArgs);
  void AVX128_PExtr(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_ExtendVectorElements(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed);
  void AVX128_MOVMSK(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_MOVMSKB(OpcodeArgs);
  void AVX128_PINSRImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                        const X86Tables::DecodedOperand& Src2Op, const X86Tables::DecodedOperand& Imm);
  void AVX128_VPINSRB(OpcodeArgs);
  void AVX128_VPINSRW(OpcodeArgs);
  void AVX128_VPINSRDQ(OpcodeArgs);

  void AVX128_VariableShiftImpl(OpcodeArgs, IROps IROp);

  void AVX128_VINSERT(OpcodeArgs);
  void AVX128_VINSERTPS(OpcodeArgs);

  void AVX128_VPHSUB(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VPHSUBSW(OpcodeArgs);

  void AVX128_VADDSUBP(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VPMULL(OpcodeArgs, IR::OpSize ElementSize, bool Signed);

  void AVX128_VPMULHRSW(OpcodeArgs);

  void AVX128_VPMULHW(OpcodeArgs, bool Signed);

  void AVX128_InsertScalar_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize);

  void AVX128_Vector_CVT_Float_To_Float(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize);

  void AVX128_Vector_CVT_Float_To_Int(OpcodeArgs, IR::OpSize SrcElementSize, bool HostRoundingMode);

  void AVX128_Vector_CVT_Int_To_Float(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen);

  void AVX128_VEXTRACT128(OpcodeArgs);
  void AVX128_VAESImc(OpcodeArgs);
  void AVX128_VAESEnc(OpcodeArgs);
  void AVX128_VAESEncLast(OpcodeArgs);
  void AVX128_VAESDec(OpcodeArgs);
  void AVX128_VAESDecLast(OpcodeArgs);
  void AVX128_VAESKeyGenAssist(OpcodeArgs);

  void AVX128_VPCMPESTRI(OpcodeArgs);
  void AVX128_VPCMPESTRM(OpcodeArgs);
  void AVX128_VPCMPISTRI(OpcodeArgs);
  void AVX128_VPCMPISTRM(OpcodeArgs);

  void AVX128_PHMINPOSUW(OpcodeArgs);

  void AVX128_VectorRound(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_InsertScalarRound(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VDPP(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_VPERMQ(OpcodeArgs);

  void AVX128_VPSHUFW(OpcodeArgs, bool Low);

  void AVX128_VSHUF(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VPERMILImm(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VHADDP(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);

  void AVX128_VPHADDSW(OpcodeArgs);

  void AVX128_VPMADDUBSW(OpcodeArgs);
  void AVX128_VPMADDWD(OpcodeArgs);

  void AVX128_VBLEND(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VHSUBP(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VPSHUFB(OpcodeArgs);
  void AVX128_VPSADBW(OpcodeArgs);

  void AVX128_VMPSADBW(OpcodeArgs);
  void AVX128_VPALIGNR(OpcodeArgs);

  void AVX128_VMASKMOVImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp,
                           const X86Tables::DecodedOperand& DataOp);

  void AVX128_VPMASKMOV(OpcodeArgs, bool IsStore);

  void AVX128_VMASKMOV(OpcodeArgs, IR::OpSize ElementSize, bool IsStore);

  void AVX128_MASKMOV(OpcodeArgs);

  void AVX128_VectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_SaveAVXState(Ref MemBase);
  void AVX128_RestoreAVXState(Ref MemBase);
  void AVX128_DefaultAVXState();

  void AVX128_VPERM2(OpcodeArgs);
  void AVX128_VTESTP(OpcodeArgs, IR::OpSize ElementSize);
  void AVX128_PTest(OpcodeArgs);

  void AVX128_VPERMILReg(OpcodeArgs, IR::OpSize ElementSize);

  void AVX128_VPERMD(OpcodeArgs);

  void AVX128_VPCLMULQDQ(OpcodeArgs);

  void AVX128_VFMAImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
  void AVX128_VFMAScalarImpl(OpcodeArgs, IROps IROp, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);
  void AVX128_VFMAddSubImpl(OpcodeArgs, bool AddSub, uint8_t Src1Idx, uint8_t Src2Idx, uint8_t AddendIdx);

  RefPair AVX128_VPGatherQPSImpl(OpcodeArgs, Ref Dest, Ref Mask, RefVSIB VSIB);
  RefPair AVX128_VPGatherImpl(OpcodeArgs, OpSize Size, OpSize ElementLoadSize, OpSize AddrElementSize, RefPair Dest, RefPair Mask, RefVSIB VSIB);

  void AVX128_VPGATHER(OpcodeArgs, OpSize AddrElementSize);

  void AVX128_VCVTPH2PS(OpcodeArgs);
  void AVX128_VCVTPS2PH(OpcodeArgs);

  // End of AVX 128-bit implementation

  // AVX 256-bit operations
  void StoreResult_WithAVXInsert(VectorOpType Type, RegClass Class, FEXCore::X86Tables::DecodedOp Op, Ref Value,
                                 IR::OpSize Align = IR::OpSize::iInvalid, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    if (Op->Dest.IsGPR() && Op->Dest.Data.GPR.GPR >= X86State::REG_XMM_0 && Op->Dest.Data.GPR.GPR <= X86State::REG_XMM_15 &&
        GetGuestVectorLength() == OpSize::i256Bit && Type == VectorOpType::SSE) {
      const auto gpr = Op->Dest.Data.GPR.GPR;
      const auto gprIndex = gpr - X86State::REG_XMM_0;
      auto DestVector = LoadXMMRegister(gprIndex);
      Value = _VInsElement(GetGuestVectorLength(), OpSize::i128Bit, 0, 0, DestVector, Value);
      StoreXMMRegister(gprIndex, Value);
      return;
    }

    StoreResult(Class, Op, Value, Align, AccessType);
  }

  void StoreXMMRegister_WithAVXInsert(VectorOpType Type, uint32_t XMM, Ref Value) {
    if (GetGuestVectorLength() == OpSize::i256Bit && Type == VectorOpType::SSE) {
      ///< SSE vector stores need to insert in the low 128-bit lane of the 256-bit register.
      auto DestVector = LoadXMMRegister(XMM);
      Value = _VInsElement(GetGuestVectorLength(), OpSize::i128Bit, 0, 0, DestVector, Value);
      StoreXMMRegister(XMM, Value);
      return;
    }
    StoreXMMRegister(XMM, Value);
  }

  void AVXVectorALUOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVXVectorUnaryOp(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);
  void AVXVectorVariableBlend(OpcodeArgs, IR::OpSize ElementSize);

  // End of AVX 256-bit implementation

  void InvalidOp(OpcodeArgs);
  void NoExecOp(OpcodeArgs);

  void SetPackedRFLAG(bool Lower8, Ref Src);
  Ref GetPackedRFLAG(uint32_t FlagsMask = ~0U);

  void SetMultiblock(bool _Multiblock) {
    Multiblock = _Multiblock;
  }

  static inline constexpr unsigned IndexNZCV(unsigned BitOffset) {
    switch (BitOffset) {
    case FEXCore::X86State::RFLAG_OF_RAW_LOC: return 28;
    case FEXCore::X86State::RFLAG_CF_RAW_LOC: return 29;
    case FEXCore::X86State::RFLAG_ZF_RAW_LOC: return 30;
    case FEXCore::X86State::RFLAG_SF_RAW_LOC: return 31;
    default: FEX_UNREACHABLE;
    }
  }

  void StoreContextHelper(IR::OpSize Size, RegClass Class, Ref Value, uint32_t Offset) {
    // For i128Bit, we won't see a normal Constant to inline, but as a special
    // case we can replace with a 2x64-bit store which can use inline zeroes.
    if (Size == OpSize::i128Bit) {
      auto Header = GetOpHeader(WrapNode(Value));
      const auto MAX_STP_OFFSET = (252 * 4);

      if (Offset <= MAX_STP_OFFSET && Header->Op == OP_LOADNAMEDVECTORCONSTANT) {
        auto Const = Header->C<IR::IROp_LoadNamedVectorConstant>();

        if (Const->Constant == IR::NamedVectorConstant::NAMED_VECTOR_ZERO) {
          Ref Zero = _Constant(0);
          Ref STP = _StoreContextPair(IR::OpSize::i64Bit, RegClass::GPR, Zero, Zero, Offset);

          // XXX: This works around InlineConstant not having an associated
          // register class, else we'd just do InlineConstant above.
          Ref InlineZero = _InlineConstant(0);
          ReplaceNodeArgument(STP, 0, InlineZero);
          ReplaceNodeArgument(STP, 1, InlineZero);
          return;
        }
      }
    }

    _StoreContext(Size, Class, Value, Offset);
  }

  void FlushRegisterCache(bool SRAOnly = false, bool MMXOnly = false) {
    // At block boundaries, fix up the carry flag.
    if (!SRAOnly) {
      RectifyCarryInvert(CFInvertedABI);
    }

    if (!MMXOnly) {
      CalculateDeferredFlags();
    }

    const auto GPRSize = GetGPROpSize();
    const auto VectorSize = GetGuestVectorLength();

    // Write backwards. This is a heuristic to improve coalescing, since we
    // often copy from (low) fixed GPRs to (high) PF/AF for celebrity
    // instructions like "add rax, 1". This hack will go away with clauses.
    uint64_t Bits = RegCache.Written;

    // We have an SRA only mode that exists as a hack to make register caching
    // less aggressive. We should get rid of this once RA can take it.
    uint64_t Mask = ~0ULL;

    if (SRAOnly) {
      const uint64_t GPRMask = ((1ull << (AFIndex - GPR0Index + 1)) - 1) << GPR0Index;
      const uint64_t FPRMask = ((1ull << (FPR15Index - FPR0Index + 1)) - 1) << FPR0Index;

      Mask &= (GPRMask | FPRMask);
      Bits &= Mask;
    }

    if (MMXOnly) {
      Mask &= ((1ull << (MM7Index - MM0Index + 1)) - 1) << MM0Index;
      Bits &= Mask;
    }

    while (Bits != 0) {
      uint32_t Index = 63 - std::countl_zero(Bits);
      Ref Value = RegCache.Value[Index];

      if (Index >= GPR0Index && Index <= GPR15Index) {
        Ref R = _StoreRegister(Value, GPRSize);
        R->Reg = PhysicalRegister(RegClass::GPRFixed, Index - GPR0Index).Raw;
      } else if (Index == PFIndex) {
        _StorePF(Value, GPRSize);
      } else if (Index == AFIndex) {
        _StoreAF(Value, GPRSize);
      } else if (Index >= FPR0Index && Index <= FPR15Index) {
        Ref R = _StoreRegister(Value, VectorSize);
        R->Reg = PhysicalRegister(RegClass::FPRFixed, Index - FPR0Index).Raw;
      } else if (Index == DFIndex) {
        _StoreContextGPR(OpSize::i8Bit, Value, offsetof(Core::CPUState, flags[X86State::RFLAG_DF_RAW_LOC]));
      } else {
        bool Partial = RegCache.Partial & (1ull << Index);
        auto Size = Partial ? OpSize::i64Bit : CacheIndexToOpSize(Index);
        uint64_t NextBit = (1ull << (Index - 1));
        uint32_t Offset = CacheIndexToContextOffset(Index);
        auto Class = CacheIndexClass(Index);
        LOGMAN_THROW_A_FMT(Offset != ~0U, "Invalid offset");

        // Use stp where possible to store multiple values at a time. This accelerates AVX.
        // TODO: this is all really confusing because of backwards iteration,
        // can we peel back that hack?
        const auto SizeInt = IR::OpSizeToSize(Size);
        if ((Bits & NextBit) && !Partial && Size >= OpSize::i32Bit && CacheIndexToContextOffset(Index - 1) == Offset - SizeInt &&
            (Offset - SizeInt) / SizeInt < 64) {
          LOGMAN_THROW_A_FMT(CacheIndexClass(Index - 1) == Class, "construction");
          LOGMAN_THROW_A_FMT((Offset % SizeInt) == 0, "construction");
          Ref ValueNext = RegCache.Value[Index - 1];

          _StoreContextPair(Size, Class, ValueNext, Value, Offset - SizeInt);
          Bits &= ~NextBit;
        } else {
          StoreContextHelper(Size, Class, Value, Offset);
          // If Partial and MMX register, then we need to store all 1s in bits 64-80
          if (Partial && Index >= MM0Index && Index <= MM7Index) {
            _StoreContextGPR(OpSize::i16Bit, Constant(0xFFFF), Offset + 8);
          }
        }
      }

      Bits &= ~(1ull << Index);
    }

    RegCache.Written &= ~Mask;
    RegCache.Cached &= ~Mask;
    RegCache.Partial &= ~Mask;
  }

  IR::OpSize GetGPROpSize() const {
    return Is64BitMode ? IR::OpSize::i64Bit : IR::OpSize::i32Bit;
  }

protected:
  void RecordX87Use() override {
    CurrentHeader->HasX87 = true;
  }

  void SaveNZCV(IROps Op = OP_DUMMY) override {
    /* Some opcodes are conservatively marked as clobbering flags, but in fact
     * do not clobber flags in certain conditions. Check for that here as an
     * optimization.
     */
    switch (Op) {
    case OP_VFMINSCALARINSERT:
    case OP_VFMAXSCALARINSERT:
      /* On AFP platforms, becomes fmin/fmax and preserves NZCV. Otherwise
       * becomes fcmp and clobbers.
       */
      if (CTX->HostFeatures.SupportsAFP) {
        return;
      }
      break;

    case OP_VLOADVECTORMASKED:
    case OP_VLOADVECTORGATHERMASKED:
    case OP_VLOADVECTORGATHERMASKEDQPS:
    case OP_VSTOREVECTORMASKED:
      /* On ASIMD platforms, the emulation happens to preserve NZCV, unlike the
       * more optimal SVE implementation that clobbers.
       */
      if (!CTX->HostFeatures.SupportsSVE128 && !CTX->HostFeatures.SupportsSVE256) {
        return;
      }

      break;
    default: break;
    }

    // Invariant: When executing instructions that clobber NZCV, the flags must
    // be resident in a GPR, which is equivalent to CachedNZCV != nullptr. Get
    // the NZCV which fills the cache if necessary.
    if (CachedNZCV == nullptr) {
      GetNZCV();
    }

    // Assume we'll need a reload.
    NZCVDirty = true;
  }

private:
  FEX_CONFIG_OPT(ReducedPrecisionMode, X87REDUCEDPRECISION);

  struct JumpTargetInfo {
    Ref BlockEntry;
    bool HaveEmitted;
    bool IsEntryPoint;
  };

  FEXCore::Context::ContextImpl* CTX {};

  constexpr static unsigned FullNZCVMask = (1U << FEXCore::X86State::RFLAG_CF_RAW_LOC) | (1U << FEXCore::X86State::RFLAG_ZF_RAW_LOC) |
                                           (1U << FEXCore::X86State::RFLAG_SF_RAW_LOC) | (1U << FEXCore::X86State::RFLAG_OF_RAW_LOC);

  static bool ContainsNZCV(unsigned BitMask) {
    return (BitMask & FullNZCVMask) != 0;
  }

  static bool IsNZCV(unsigned BitOffset) {
    return BitOffset < 32 && ContainsNZCV(1U << BitOffset);
  }

  Ref CachedNZCV {};
  bool NZCVDirty {};

  // Set if the host carry is inverted from the guest carry. This is set after
  // subtraction, because arm64 and x86 have inverted borrow flags, but clear
  // after addition.
  //
  // All CF access needs to maintain this flag. cfinv may be inserted at the end
  // of a block to rectify to the FEX convention (current convention: NOT
  // INVERTED).
  bool CFInverted {};

  // FEX convention for CF at the end of blocks: INVERTED.
  const bool CFInvertedABI {true};

  fextl::map<uint64_t, JumpTargetInfo> JumpTargets;
  bool HandledLock {false};
  bool DecodeFailure {false};
  bool NeedsBlockEnd {false};
  ForceTSOMode ForceTSO {ForceTSOMode::NoOverride};
  // Used during new op bringup
  bool ShouldDump {false};

  using SaveStoreAVXStatePtr = void (OpDispatchBuilder::*)(Ref MemBase);
  using DefaultAVXStatePtr = void (OpDispatchBuilder::*)();
  SaveStoreAVXStatePtr SaveAVXStateFunc {&OpDispatchBuilder::SaveAVXState};
  SaveStoreAVXStatePtr RestoreAVXStateFunc {&OpDispatchBuilder::RestoreAVXState};
  DefaultAVXStatePtr DefaultAVXStateFunc {&OpDispatchBuilder::DefaultAVXState};

  // Opcode helpers for generalizing behavior across VEX and non-VEX variants.

  Ref ADDSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2);

  void AVXVariableShiftImpl(OpcodeArgs, IROps IROp);

  Ref AESKeyGenAssistImpl(OpcodeArgs);

  Ref CVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);

  Ref DPPOpImpl(IR::OpSize DstSize, Ref Src1, Ref Src2, uint8_t Mask, IR::OpSize ElementSize);

  Ref VDPPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2, const X86Tables::DecodedOperand& Imm);

  Ref ExtendVectorElementsImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DstElementSize, bool Signed);

  Ref HSUBPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2);

  Ref InsertPSOpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                     const X86Tables::DecodedOperand& Imm);

  Ref MPSADBWOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, uint8_t Select);

  Ref PALIGNROpImpl(OpcodeArgs, const X86Tables::DecodedOperand& Src1, const X86Tables::DecodedOperand& Src2,
                    const X86Tables::DecodedOperand& Imm, bool IsAVX);

  void PCMPXSTRXOpImpl(OpcodeArgs, bool IsExplicit, bool IsMask);

  Ref PHADDSOpImpl(OpSize Size, Ref Src1, Ref Src2);

  Ref PHMINPOSUWOpImpl(OpcodeArgs);

  Ref PHSUBOpImpl(OpSize Size, Ref Src1, Ref Src2, IR::OpSize ElementSize);

  Ref PHSUBSOpImpl(OpSize Size, Ref Src1, Ref Src2);

  Ref PINSROpImpl(OpcodeArgs, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op,
                  const X86Tables::DecodedOperand& Imm);

  Ref PMADDWDOpImpl(IR::OpSize Size, Ref Src1, Ref Src2);

  Ref PMADDUBSWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2);

  Ref PMULHRSWOpImpl(OpSize Size, Ref Src1, Ref Src2);

  Ref PMULHWOpImpl(OpcodeArgs, bool Signed, Ref Src1, Ref Src2);

  Ref PMULLOpImpl(OpSize Size, IR::OpSize ElementSize, bool Signed, Ref Src1, Ref Src2);

  Ref PSADBWOpImpl(IR::OpSize Size, Ref Src1, Ref Src2);

  Ref GeneratePSHUFBMask(IR::OpSize SrcSize);
  Ref PSHUFBOpImpl(IR::OpSize SrcSize, Ref Src1, Ref Src2, Ref MaskVector);

  Ref PSIGNImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src1, Ref Src2);

  Ref PSLLIImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, uint64_t Shift);

  Ref PSLLImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec);

  Ref PSRAOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec);

  Ref PSRLDOpImpl(OpcodeArgs, IR::OpSize ElementSize, Ref Src, Ref ShiftVec);

  Ref SHUFOpImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t Shuffle);

  void VMASKMOVOpImpl(OpcodeArgs, IR::OpSize ElementSize, IR::OpSize DataSize, bool IsStore, const X86Tables::DecodedOperand& MaskOp,
                      const X86Tables::DecodedOperand& DataOp);

  void MOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize);
  void VMOVScalarOpImpl(OpcodeArgs, IR::OpSize ElementSize);

  Ref VFCMPOpImpl(OpSize Size, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType);

  void VTESTOpImpl(OpSize SrcSize, IR::OpSize ElementSize, Ref Src1, Ref Src2);

  void VectorUnaryDuplicateOpImpl(OpcodeArgs, IROps IROp, IR::OpSize ElementSize);

  // x86 ALU scalar operations operate in three different ways
  // - AVX512: Writemask shenanigans that we don't care about.
  // - AVX/VEX: Two source
  //   - Example 32bit VADDSS Dest, Src1, Src2
  //   - Dest[31:0] = Src1[31:0] + Src2[31:0]
  //   - Dest[127:32] = Src1[127:32]
  // - SSE: Scalar operation inserts in to the low bits, upper bits completely unaffected.
  //   - Example 32bit ADDSS Dest, Src
  //   - Dest[31:0] = Dest[31:0] + Src[31:0]
  //   - Dest[{256,128}:32] = (Unmodified)
  Ref VectorScalarInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize,
                                  const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);

  Ref VectorScalarUnaryInsertALUOpImpl(OpcodeArgs, IROps IROp, IR::OpSize DstSize, IR::OpSize ElementSize,
                                       const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);

  Ref InsertCVTGPR_To_FPRImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, const X86Tables::DecodedOperand& Src1Op,
                              const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);

  Ref InsertScalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
                                          const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op, bool ZeroUpperBits);
  Ref InsertScalarRoundImpl(OpcodeArgs, IR::OpSize DstSize, IR::OpSize ElementSize, const X86Tables::DecodedOperand& Src1Op,
                            const X86Tables::DecodedOperand& Src2Op, uint64_t Mode, bool ZeroUpperBits);

  Ref InsertScalarFCMPOpImpl(OpSize Size, IR::OpSize OpDstSize, IR::OpSize ElementSize, Ref Src1, Ref Src2, uint8_t CompType, bool ZeroUpperBits);

  Ref VectorRoundImpl(OpSize Size, IR::OpSize ElementSize, Ref Src, uint64_t Mode);

  Ref Scalar_CVT_Float_To_FloatImpl(OpcodeArgs, IR::OpSize DstElementSize, IR::OpSize SrcElementSize,
                                    const X86Tables::DecodedOperand& Src1Op, const X86Tables::DecodedOperand& Src2Op);

  Ref CVTFPR_To_GPRImpl(OpcodeArgs, Ref Src, IR::OpSize SrcElementSize, bool HostRoundingMode);

  Ref Vector_CVT_Float_To_Int32Impl(OpcodeArgs, IR::OpSize DstSize, Ref Src, IR::OpSize SrcSize, IR::OpSize SrcElementSize,
                                    bool HostRoundingMode, bool ZeroUpperHalf);

  Ref Vector_CVT_Int_To_FloatImpl(OpcodeArgs, IR::OpSize SrcElementSize, bool Widen);

  void XSaveOpImpl(OpcodeArgs);
  void SaveX87State(OpcodeArgs, Ref MemBase);
  void SaveSSEState(Ref MemBase);
  void SaveMXCSRState(Ref MemBase);
  void SaveAVXState(Ref MemBase);

  void XRstorOpImpl(OpcodeArgs);
  void RestoreX87State(Ref MemBase);
  void RestoreSSEState(Ref MemBase);
  void RestoreMXCSRState(Ref MXCSR);
  void RestoreAVXState(Ref MemBase);
  void DefaultX87State(OpcodeArgs);
  void DefaultSSEState();
  void DefaultAVXState();

  Ref GetMXCSR();

#undef OpcodeArgs

  Ref AppendSegmentOffset(Ref Value, uint32_t Flags, uint32_t DefaultPrefix = 0, bool Override = false);
  Ref GetSegment(uint32_t Flags, uint32_t DefaultPrefix = FEXCore::X86Tables::DecodeFlags::FLAG_NO_PREFIX, bool Override = false);

  void UpdatePrefixFromSegment(Ref Segment, uint32_t SegmentReg);

  Ref LoadGPRRegister(uint32_t GPR, IR::OpSize Size = OpSize::iInvalid, uint8_t Offset = 0, bool AllowUpperGarbage = false);
  void StoreGPRRegister(uint32_t GPR, const Ref Src, IR::OpSize Size = OpSize::iInvalid, uint8_t Offset = 0);
  void StoreXMMRegister(uint32_t XMM, const Ref Src);

  Ref _GetRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset, bool Inline) {
    const auto GPRSize = GetGPROpSize();
    const auto Offs = Op->PC + Op->InstSize + Offset - Entry;
    return Inline ? _InlineEntrypointOffset(GPRSize, Offs) : _EntrypointOffset(GPRSize, Offs);
  }

  Ref GetRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset = 0) {
    return _GetRelocatedPC(Op, Offset, false);
  }

  void ExitRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset = 0) {
    ExitFunction(_GetRelocatedPC(Op, Offset, true /* Inline */));
  }

  void ExitRelocatedPC(const FEXCore::X86Tables::DecodedOp& Op, int64_t Offset, BranchHint Hint, Ref CallReturnAddress, Ref CallReturnBlock) {
    ExitFunction(_GetRelocatedPC(Op, Offset, true /* Inline */), Hint, CallReturnAddress, CallReturnBlock);
  }

  [[nodiscard]]
  static bool IsOperandMem(const X86Tables::DecodedOperand& Operand, bool Load) {
    // Literals are immediates as sources but memory addresses as destinations.
    return !(Load && (Operand.IsLiteral() || Operand.IsLiteralRelocation())) && !Operand.IsGPR();
  }

  [[nodiscard]]
  static bool IsNonTSOReg(MemoryAccessType Access, uint8_t Reg) {
    return Access == MemoryAccessType::DEFAULT && Reg == X86State::REG_RSP;
  }

  AddressMode DecodeAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, MemoryAccessType AccessType, bool IsLoad);

  Ref LoadSource(RegClass Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                 const LoadSourceOptions& Options = {});
  Ref LoadSourceGPR(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                    const LoadSourceOptions& Options = {}) {
    return LoadSource(RegClass::GPR, Op, Operand, Flags, Options);
  }
  Ref LoadSourceFPR(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, uint32_t Flags,
                    const LoadSourceOptions& Options = {}) {
    return LoadSource(RegClass::FPR, Op, Operand, Flags, Options);
  }

  Ref LoadSource_WithOpSize(RegClass Class, const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, IR::OpSize OpSize,
                            uint32_t Flags, const LoadSourceOptions& Options = {});
  Ref LoadSourceGPR_WithOpSize(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, IR::OpSize OpSize, uint32_t Flags,
                               const LoadSourceOptions& Options = {}) {
    return LoadSource_WithOpSize(RegClass::GPR, Op, Operand, OpSize, Flags, Options);
  }
  Ref LoadSourceFPR_WithOpSize(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, IR::OpSize OpSize, uint32_t Flags,
                               const LoadSourceOptions& Options = {}) {
    return LoadSource_WithOpSize(RegClass::FPR, Op, Operand, OpSize, Flags, Options);
  }

  void StoreResult_WithOpSize(RegClass Class, X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, IR::OpSize OpSize,
                              IR::OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT);
  void StoreResultGPR_WithOpSize(X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, IR::OpSize OpSize,
                                 IR::OpSize Align = IR::OpSize::iInvalid, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult_WithOpSize(RegClass::GPR, Op, Operand, Src, OpSize, Align, AccessType);
  }
  void StoreResultFPR_WithOpSize(X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, IR::OpSize OpSize,
                                 IR::OpSize Align = IR::OpSize::iInvalid, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult_WithOpSize(RegClass::FPR, Op, Operand, Src, OpSize, Align, AccessType);
  }

  void StoreResult(RegClass Class, X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, OpSize Align,
                   MemoryAccessType AccessType = MemoryAccessType::DEFAULT);
  void StoreResultGPR(X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, OpSize Align = OpSize::iInvalid,
                      MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult(RegClass::GPR, Op, Operand, Src, Align, AccessType);
  }
  void StoreResultFPR(X86Tables::DecodedOp Op, const X86Tables::DecodedOperand& Operand, Ref Src, OpSize Align = OpSize::iInvalid,
                      MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult(RegClass::FPR, Op, Operand, Src, Align, AccessType);
  }

  void StoreResult(RegClass Class, X86Tables::DecodedOp Op, Ref Src, OpSize Align, MemoryAccessType AccessType = MemoryAccessType::DEFAULT);
  void StoreResultGPR(X86Tables::DecodedOp Op, Ref Src, OpSize Align = OpSize::iInvalid, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult(RegClass::GPR, Op, Src, Align, AccessType);
  }
  void StoreResultFPR(X86Tables::DecodedOp Op, Ref Src, OpSize Align = OpSize::iInvalid, MemoryAccessType AccessType = MemoryAccessType::DEFAULT) {
    StoreResult(RegClass::FPR, Op, Src, Align, AccessType);
  }

  // In several instances, it's desirable to get a base address with the segment offset
  // applied to it. This pulls all the common-case appending into a single set of functions.
  [[nodiscard]]
  Ref MakeSegmentAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand, IR::OpSize OpSize) {
    Ref Mem = LoadSourceGPR_WithOpSize(Op, Operand, OpSize, Op->Flags, {.LoadData = false});
    return AppendSegmentOffset(Mem, Op->Flags);
  }
  [[nodiscard]]
  Ref MakeSegmentAddress(const X86Tables::DecodedOp& Op, const X86Tables::DecodedOperand& Operand) {
    return MakeSegmentAddress(Op, Operand, OpSizeFromSrc(Op));
  }
  [[nodiscard]]
  Ref MakeSegmentAddress(X86State::X86Reg Reg, uint32_t Flags, uint32_t DefaultPrefix = 0, bool Override = false) {
    Ref Address = LoadGPRRegister(Reg);
    return AppendSegmentOffset(Address, Flags, DefaultPrefix, Override);
  }

  constexpr OpSize GetGuestVectorLength() const {
    return (CTX->HostFeatures.SupportsSVE256 && CTX->HostFeatures.SupportsAVX) ? OpSize::i256Bit : OpSize::i128Bit;
  }

  [[nodiscard]]
  static uint32_t GPROffset(X86State::X86Reg reg) {
    LOGMAN_THROW_A_FMT(reg <= X86State::X86Reg::REG_R15, "Invalid reg used");
    return static_cast<uint32_t>(ARRAY_OFFSETOF(Core::CPUState, gregs, reg));
  }

  [[nodiscard]]
  static uint32_t MMBaseOffset() {
    return static_cast<uint32_t>(offsetof(Core::CPUState, mm[0][0]));
  }

  [[nodiscard]]
  uint8_t GetDstSize(X86Tables::DecodedOp Op) const;
  [[nodiscard]]
  uint8_t GetSrcSize(X86Tables::DecodedOp Op) const;
  [[nodiscard]]
  uint32_t GetDstBitSize(X86Tables::DecodedOp Op) const;
  [[nodiscard]]
  uint32_t GetSrcBitSize(X86Tables::DecodedOp Op) const;
  [[nodiscard]]
  IR::OpSize OpSizeFromDst(X86Tables::DecodedOp Op) const {
    return IR::SizeToOpSize(GetDstSize(Op));
  }
  [[nodiscard]]
  IR::OpSize OpSizeFromSrc(X86Tables::DecodedOp Op) const {
    return IR::SizeToOpSize(GetSrcSize(Op));
  }

  [[nodiscard]]
  IR::OpSize GetStringOpSize(X86Tables::DecodedOp Op) const;

  // Set flag tracking to prepare for an operation that directly writes NZCV.
  void HandleNZCVWrite() {
    CachedNZCV = nullptr;
    NZCVDirty = false;
  }

  // Set flag tracking to prepare for a read-modify-write operation on NZCV.
  void HandleNZCV_RMW() {
    CalculateDeferredFlags();

    if (NZCVDirty && CachedNZCV) {
      _StoreNZCV(CachedNZCV);
    }

    HandleNZCVWrite();
  }

  // Special case of the above where we are known to zero C/V
  void HandleNZ00Write() {
    HandleNZCVWrite();

    // Host carry will be implicitly zeroed, and we want guest carry zeroed as
    // well. So do not invert.
    CFInverted = false;
  }

  Ref GetNZCV() {
    if (!CachedNZCV) {
      CachedNZCV = _LoadNZCV();
    }

    return CachedNZCV;
  }

  void SetNZCV(Ref Value) {
    CachedNZCV = Value;
    NZCVDirty = true;
  }

  void ZeroNZCV() {
    CachedNZCV = Constant(0);
    NZCVDirty = true;
  }

  void SetNZ_ZeroCV(IR::OpSize SrcSize, Ref Res, bool SetPF = false) {
    HandleNZ00Write();

    // x - 0 = x. NZ set according to Res. C always set. V always unset. This
    // matches what we want since we want carry inverted.
    //
    // This is currently worse for 8/16-bit, but that should be optimized. TODO
    if (SrcSize >= OpSize::i32Bit) {
      if (SetPF) {
        CalculatePF(SubWithFlags(SrcSize, Res, (uint64_t)0));
      } else {
        _SubNZCV(SrcSize, Res, Constant(0));
      }

      CFInverted = true;
    } else {
      _TestNZ(SrcSize, Res, Res);
      CFInverted = false;

      if (SetPF) {
        CalculatePF(Res);
      }
    }
  }

  void SetNZP_ZeroCV(IR::OpSize SrcSize, Ref Res) {
    SetNZ_ZeroCV(SrcSize, Res, true);
  }

  void InsertNZCV(unsigned BitOffset, Ref Value, signed FlagOffset, bool MustMask) {
    signed Bit = IndexNZCV(BitOffset);

    // Heuristic to choose rmif vs msr.
    bool PreferRmif = !NZCVDirty || FlagOffset || MustMask;

    if (CTX->HostFeatures.SupportsFlagM && PreferRmif) {
      // Update NZCV
      if (NZCVDirty && CachedNZCV) {
        _StoreNZCV(CachedNZCV);
      }

      CachedNZCV = nullptr;
      NZCVDirty = false;

      // Insert as NZCV.
      signed RmifBit = Bit - 28;
      _RmifNZCV(Value, (64 + FlagOffset - RmifBit) % 64, 1u << RmifBit);
      CachedNZCV = nullptr;
    } else {
      // Insert as GPR
      if (FlagOffset || MustMask) {
        Value = _Bfe(OpSize::i64Bit, 1, FlagOffset, Value);
      }

      SetNZCV(_Bfi(OpSize::i32Bit, 1, Bit, GetNZCV(), Value));
    }
  }

  // If we don't care about N/C/V and just need Z, we can test with a simple
  // mask without any shifting.
  void SetZ_InvalidateNCV(IR::OpSize Size, Ref Src) {
    HandleNZCVWrite();
    CFInverted = true;

    if (Size < OpSize::i32Bit) {
      _TestNZ(OpSize::i32Bit, Src, _InlineConstant((1u << (IR::OpSizeAsBits(Size))) - 1));
    } else {
      _TestNZ(Size, Src, Src);
    }
  }

  // Ensure the carry invert flag matches the desired form. Used before an
  // operation reading carry or at the end of a block.
  void RectifyCarryInvert(bool RequiredInvert) {
    if (CFInverted != RequiredInvert) {
      if (CTX->HostFeatures.SupportsFlagM && !NZCVDirty) {
        // Invert as NZCV.
        _CarryInvert();
        CachedNZCV = nullptr;
      } else {
        // Invert as a GPR
        unsigned Bit = IndexNZCV(FEXCore::X86State::RFLAG_CF_RAW_LOC);
        SetNZCV(_Xor(OpSize::i32Bit, GetNZCV(), Constant(1u << Bit)));
        CalculateDeferredFlags();
      }

      CFInverted ^= true;
    }

    LOGMAN_THROW_A_FMT(CFInverted == RequiredInvert, "post condition");
  }

  void CarryInvert() {
    CFInverted ^= true;
  }

  template<unsigned BitOffset>
  void SetRFLAG(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
    SetRFLAG(Value, BitOffset, ValueOffset, MustMask);
  }

  void SetCFDirect(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
    Value = _Xor(OpSize::i64Bit, Value, _InlineConstant(1ull << ValueOffset));
    SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, ValueOffset, MustMask);
    CFInverted = true;
  }

  // Set CF directly to the given 0/1 value. This needs to respect the
  // invert. We use a subtraction:
  //
  //     0 - x = 0 + (~x) + 1.
  //
  // If x = 0, then 0 + (~0) + 1 = 0x100000000 so hardware C is set.
  // If x = 1, then 0 + (~1) + 1 = 0x0ffffffff so hardware C is not set.
  void SetCFDirect_InvalidateNZV(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
    if (ValueOffset || MustMask) {
      Value = _Bfe(OpSize::i64Bit, 1, ValueOffset, Value);
    }

    HandleNZCVWrite();
    _SubNZCV(OpSize::i32Bit, Constant(0), Value);
    CFInverted = true;
  }

  void SetCFInverted(Ref Value, unsigned ValueOffset = 0, bool MustMask = false) {
    SetRFLAG(Value, X86State::RFLAG_CF_RAW_LOC, ValueOffset, MustMask);
    CFInverted = true;
  }

  void SetRFLAG(Ref Value, unsigned BitOffset, unsigned ValueOffset = 0, bool MustMask = false) {
    if (IsNZCV(BitOffset)) {
      InsertNZCV(BitOffset, Value, ValueOffset, MustMask);
      return;
    }

    if (ValueOffset || MustMask) {
      Value = _Bfe(OpSize::i32Bit, 1, ValueOffset, Value);
    }

    if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
      StoreRegister(Core::CPUState::PF_AS_GREG, false, Value);
    } else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
      StoreRegister(Core::CPUState::AF_AS_GREG, false, Value);
    } else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
      // For DF, we need to transform 0/1 into 1/-1
      StoreDF(_SubShift(OpSize::i64Bit, Constant(1), Value, ShiftType::LSL, 1));
    } else if (BitOffset == FEXCore::X86State::RFLAG_TF_RAW_LOC) {
      auto PackedTF = _LoadContextGPR(OpSize::i8Bit, ARRAY_OFFSETOF(FEXCore::Core::CPUState, flags, BitOffset));
      // An exception should still be raised after an instruction that unsets TF, leave the unblocked bit set but unset
      // the TF bit to cause such behaviour. The handling code at the start of the next block will then unset the
      // unblocked bit before raising the exception.
      auto NewPackedTF =
        _Select(OpSize::i64Bit, OpSize::i64Bit, CondClass::EQ, Value, Constant(0), _And(OpSize::i32Bit, PackedTF, Constant(~1)), Constant(1));
      _StoreContextGPR(OpSize::i8Bit, NewPackedTF, ARRAY_OFFSETOF(FEXCore::Core::CPUState, flags, BitOffset));
    } else {
      _StoreContextGPR(OpSize::i8Bit, Value, ARRAY_OFFSETOF(FEXCore::Core::CPUState, flags, BitOffset));
    }
  }

  void SetAF(unsigned K) {
    // AF is stored in bit 4 of the AF flag byte, with garbage in the other
    // bits. This allows us to defer the extract in the usual case. When it is
    // read, bit 4 is extracted.  In order to write a constant value of AF, that
    // means we need to left-shift here to compensate.
    SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(Constant(K << 4));
  }

  void ZeroPF_AF();

  void InvalidateAF() {
    _InvalidateFlags((1u << X86State::RFLAG_AF_RAW_LOC));
    InvalidateReg(Core::CPUState::AF_AS_GREG);
  }

  void InvalidatePF_AF() {
    _InvalidateFlags((1u << X86State::RFLAG_PF_RAW_LOC) | (1u << X86State::RFLAG_AF_RAW_LOC));
    InvalidateReg(Core::CPUState::PF_AS_GREG);
    InvalidateReg(Core::CPUState::AF_AS_GREG);
  }

  [[nodiscard]]
  static CondClass CondForNZCVBit(unsigned BitOffset, bool Invert) {
    switch (BitOffset) {
    case X86State::RFLAG_SF_RAW_LOC: return Invert ? CondClass::PL : CondClass::MI;
    case X86State::RFLAG_ZF_RAW_LOC: return Invert ? CondClass::NEQ : CondClass::EQ;
    case X86State::RFLAG_CF_RAW_LOC: return Invert ? CondClass::ULT : CondClass::UGE;
    case X86State::RFLAG_OF_RAW_LOC: return Invert ? CondClass::FNU : CondClass::FU;
    default: FEX_UNREACHABLE;
    }
  }

  /* Layout of cache indices. We use a single 64-bit bitmask for the cache */
  static const int GPR0Index = 0;
  static const int GPR15Index = 15;
  static const int PFIndex = 16;
  static const int AFIndex = 17;
  /* Gap 18..19 */
  /* Note this range is only valid if MMXState = MMXState_MMX */
  static const int MM0Index = 20;
  static const int MM7Index = 27;
  /* Gap 28..30 */
  static const int DFIndex = 31;
  static const int FPR0Index = 32;
  static const int FPR15Index = 47;
  static const int AVXHigh0Index = 48;
  static const int AVXHigh15Index = 63;

  [[nodiscard]]
  static uint32_t CacheIndexToContextOffset(int Index) {
    switch (Index) {
    case MM0Index ... MM7Index: return ARRAY_OFFSETOF(FEXCore::Core::CPUState, mm, Index - MM0Index);
    case AVXHigh0Index ... AVXHigh15Index: return ARRAY_OFFSETOF(FEXCore::Core::CPUState, avx_high, Index - AVXHigh0Index);
    default: return ~0U;
    }
  }

  [[nodiscard]]
  static RegClass CacheIndexClass(int Index) {
    if ((Index >= MM0Index && Index <= MM7Index) || Index >= FPR0Index) {
      return RegClass::FPR;
    } else {
      return RegClass::GPR;
    }
  }

  [[nodiscard]]
  static IR::OpSize CacheIndexToOpSize(int Index) {
    // MMX registers are rounded up to 128-bit since they are shared with 80-bit
    // x87 registers, even though MMX is logically only 64-bit.
    if (Index >= AVXHigh0Index || ((Index >= MM0Index && Index <= MM7Index))) {
      return OpSize::i128Bit;
    } else {
      return OpSize::i8Bit;
    }
  }

  struct {
    uint64_t Cached;
    uint64_t Written;

    // Indicates that Value contains only the lower 64-bit of the full 80-bit
    // register. Used for MMX/x87 optimization.
    uint64_t Partial;

    Ref Value[64];
  } RegCache {};

  void InvalidateReg(uint8_t Index) {
    uint64_t Bit = (1ull << (uint64_t)Index);
    RegCache.Cached &= ~Bit;
    RegCache.Written &= ~Bit;
  }

  Ref LoadRegCache(uint64_t Offset, uint8_t Index, RegClass Class, IR::OpSize Size) {
    LOGMAN_THROW_A_FMT(Index < 64, "valid index");
    uint64_t Bit = (1ull << (uint64_t)Index);

    if (Size == OpSize::i128Bit && (RegCache.Partial & Bit)) {
      // We need to load the full register extend if we previously did a partial access.
      Ref Value = RegCache.Value[Index];
      Ref Full = _LoadContext(Size, Class, Offset);

      // If we did a partial store, we're inserting into the full register
      if (RegCache.Written & Bit) {
        Full = _VInsElement(OpSize::i128Bit, OpSize::i64Bit, 0, 0, Full, Value);
      }

      RegCache.Value[Index] = Full;
    }

    if (!(RegCache.Cached & Bit)) {
      if (Index == DFIndex) {
        RegCache.Value[Index] = _LoadDF();
      } else if ((Index >= MM0Index && Index <= MM7Index) || Index >= AVXHigh0Index) {
        RegCache.Value[Index] = _LoadContext(Size, Class, Offset);

        // We may have done a partial load, this requires special handling.
        if (Size == OpSize::i64Bit) {
          RegCache.Partial |= Bit;
        }
      } else if (Index == PFIndex) {
        RegCache.Value[Index] = _LoadPF(Size);
      } else if (Index == AFIndex) {
        RegCache.Value[Index] = _LoadAF(Size);
      } else {
        RegCache.Value[Index] = _LoadRegister(Offset, Class, Size);
      }

      RegCache.Cached |= Bit;
    }

    return RegCache.Value[Index];
  }

  RefPair AllocatePair(RegClass Class, IR::OpSize Size) {
    if (Class == RegClass::FPR) {
      return {_AllocateFPR(Size, Size), _AllocateFPR(Size, Size)};
    } else {
      return {_AllocateGPR(false), _AllocateGPR(false)};
    }
  }

  RefPair LoadContextPair_Uncached(RegClass Class, IR::OpSize Size, unsigned Offset) {
    RefPair Values = AllocatePair(Class, Size);
    _LoadContextPair(Size, Class, Offset, Values.Low, Values.High);
    return Values;
  }

  RefPair LoadRegCachePair(uint64_t Offset, uint8_t Index, RegClass Class, IR::OpSize Size) {
    LOGMAN_THROW_A_FMT(Index != DFIndex, "must be pairable");
    LOGMAN_THROW_A_FMT(Size != IR::OpSize::iUnsized, "Invalid size!");

    // Try to load a pair into the cache
    uint64_t Bits = (3ull << (uint64_t)Index);
    const auto SizeInt = IR::OpSizeToSize(Size);
    if (((RegCache.Partial | RegCache.Cached) & Bits) == 0 && ((Offset / SizeInt) < 64)) {
      auto Values = LoadContextPair_Uncached(Class, Size, Offset);
      RegCache.Value[Index] = Values.Low;
      RegCache.Value[Index + 1] = Values.High;
      RegCache.Cached |= Bits;
      if (Size == OpSize::i64Bit) {
        RegCache.Partial |= Bits;
      }
      return Values;
    }

    // Fallback on a pair of loads
    return {
      .Low = LoadRegCache(Offset, Index, Class, Size),
      .High = LoadRegCache(Offset + SizeInt, Index + 1, Class, Size),
    };
  }

  Ref LoadGPR(uint8_t Reg) {
    return LoadRegCache(Reg, GPR0Index + Reg, RegClass::GPR, GetGPROpSize());
  }

  Ref LoadContext(IR::OpSize Size, uint8_t Index) {
    return LoadRegCache(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size);
  }

  RefPair LoadContextPair(IR::OpSize Size, uint8_t Index) {
    return LoadRegCachePair(CacheIndexToContextOffset(Index), Index, CacheIndexClass(Index), Size);
  }

  Ref LoadContext(uint8_t Index) {
    return LoadContext(CacheIndexToOpSize(Index), Index);
  }

  Ref LoadXMMRegister(uint8_t Reg) {
    return LoadRegCache(Reg, FPR0Index + Reg, RegClass::FPR, GetGuestVectorLength());
  }

  Ref LoadDF() {
    return LoadGPR(DFIndex);
  }

  void StoreContext(uint8_t Index, Ref Value) {
    LOGMAN_THROW_A_FMT(Index < 64, "valid index");
    LOGMAN_THROW_A_FMT(Value != InvalidNode, "storing valid");

    uint64_t Bit = (1ull << (uint64_t)Index);

    RegCache.Value[Index] = Value;
    RegCache.Cached |= Bit;
    RegCache.Written |= Bit;
  }

  void InvalidateHighAVXRegisters() {
    for (size_t i = 0; i < 16; ++i) {
      InvalidateReg(AVXHigh0Index + i);
    }
  }

  void StoreRegister(uint8_t Reg, bool FPR, Ref Value) {
    StoreContext(Reg + (FPR ? FPR0Index : GPR0Index), Value);
  }

  void StoreDF(Ref Value) {
    StoreContext(DFIndex, Value);
  }

  Ref GetRFLAG(unsigned BitOffset, bool Invert = false) {
    if (IsNZCV(BitOffset)) {
      // Handle the CFInverted state internally so GetRFLAG is safe regardless
      // of the invert state. This simplifies the call sites.
      if (BitOffset == X86State::RFLAG_CF_RAW_LOC) {
        Invert ^= CFInverted;
      }

      if (NZCVDirty) {
        auto Value = _Bfe(OpSize::i32Bit, 1, IndexNZCV(BitOffset), GetNZCV());

        if (Invert) {
          return _Xor(OpSize::i32Bit, Value, Constant(1));
        } else {
          return Value;
        }
      } else {
        // Because we explicitly inverted for CF above, we use the unsafe
        // _NZCVSelect rather than the safe CF-aware version.
        return _NZCVSelect01(CondForNZCVBit(BitOffset, Invert));
      }
    } else if (BitOffset == FEXCore::X86State::RFLAG_PF_RAW_LOC) {
      return LoadGPR(Core::CPUState::PF_AS_GREG);
    } else if (BitOffset == FEXCore::X86State::RFLAG_AF_RAW_LOC) {
      return LoadGPR(Core::CPUState::AF_AS_GREG);
    } else if (BitOffset == FEXCore::X86State::RFLAG_DF_RAW_LOC) {
      // Recover the sign bit, it is the logical DF value
      return _Lshr(OpSize::i64Bit, LoadDF(), Constant(63));
    } else {
      return _LoadContextGPR(OpSize::i8Bit, ARRAY_OFFSETOF(Core::CPUState, flags, BitOffset));
    }
  }

  // Returns (DF ? -Size : Size)
  Ref LoadDir(const unsigned Size) {
    return ARef(LoadDF()).Lshl(FEXCore::ilog2(Size)).Ref();
  }

  // Returns DF ? (X - Size) : (X + Size)
  Ref OffsetByDir(Ref X, const unsigned Size) {
    auto Shift = FEXCore::ilog2(Size);

    return _AddShift(OpSize::i64Bit, X, LoadDF(), ShiftType::LSL, Shift);
  }

  // Safe version of NZCVSelect that handles inverted carries automatically.
  Ref NZCVSelect(OpSize OpSize, CondClass Cond, Ref TrueV, Ref FalseV, bool CarryIsInverted = false) {
    switch (Cond) {
    case CondClass::UGE: /* cs */
    case CondClass::ULT: /* cc */
      // Invert the condition to match our expectations.
      if (CarryIsInverted != CFInverted) {
        Cond = (Cond == CondClass::UGE) ? CondClass::ULT : CondClass::UGE;
      }
      break;

    case CondClass::UGT: /* hi */
    case CondClass::ULE: /* ls */
      // No clever optimization we can do here, rectify carry itself.
      RectifyCarryInvert(CarryIsInverted);
      break;

    default:
      // No other condition codes read carry so no need to rectify.
      break;
    }

    return _NZCVSelect(OpSize, Cond, TrueV, FalseV);
  }

  // Compares two floats and sets flags for a COMISS instruction
  void Comiss(IR::OpSize ElementSize, Ref Src1, Ref Src2, bool InvalidateAF = false) {
    // First, set flags according to Arm FCMP.
    HandleNZCVWrite();
    _FCmp(ElementSize, Src1, Src2);
    CFInverted = false;
    ComissFlags(InvalidateAF);
  }

  // Sets flags for a COMISS instruction
  void ComissFlags(bool InvalidateAF = false) {
    LOGMAN_THROW_A_FMT(!NZCVDirty, "only expected after fcmp");

    // We need to set PF according to the unordered flag. We'd rather do this
    // after axflag, since some impls fuse fcmp+axflag, so we want to do this
    // after. We can recover "unordered" after axflag as (Z && !C), but
    // there's no condition code for this so it would take 2 instructions
    // instead of one, which seems worse than doing 1 op before and breaking
    // the fusion.
    //
    // We set PF to unordered (V), but our PF representation is inverted so we
    // actually set to !V. This is one instruction with the VC cond code.
    Ref V_inv = GetRFLAG(FEXCore::X86State::RFLAG_OF_RAW_LOC, true);
    SetRFLAG<FEXCore::X86State::RFLAG_PF_RAW_LOC>(V_inv);

    if (!InvalidateAF) {
      // Zero AF. Note that the comparison sets the raw PF to 0/1 above, so
      // PF[4] is 0 so the XOR with PF will have no effect, so setting the AF
      // byte to zero will indeed zero AF as intended.
      SetRFLAG<FEXCore::X86State::RFLAG_AF_RAW_LOC>(Constant(0));
    }

    // Convert NZCV from the Arm representation to an eXternal representation
    // that's totally not a euphemism for x86, nuh-uh. But maps to exactly we
    // need, what a coincidence!
    //
    // Our AXFlag emulation on FlagM2-less systems needs V_inv passed.
    _AXFlag(CTX->HostFeatures.SupportsFlagM2 ? Invalid() : V_inv);
    CFInverted = true;
  }

  // Set x87 comparison flags based on the result set by Arm FCMP. Clobbers
  // NZCV on flagm2 platforms.
  void ConvertNZCVToX87() {
    LOGMAN_THROW_A_FMT(NZCVDirty && CachedNZCV, "NZCV must be saved");

    Ref V = _NZCVSelect01(CondForNZCVBit(FEXCore::X86State::RFLAG_OF_RAW_LOC, false));

    if (CTX->HostFeatures.SupportsFlagM2) {
      // Convert to x86 flags, saves us from or'ing after.
      _AXFlag(Invalid());
    }

    // CF is inverted after FCMP
    Ref C = _NZCVSelect01(CondForNZCVBit(FEXCore::X86State::RFLAG_CF_RAW_LOC, true));
    Ref Z = _NZCVSelect01(CondForNZCVBit(FEXCore::X86State::RFLAG_ZF_RAW_LOC, false));

    if (!CTX->HostFeatures.SupportsFlagM2) {
      C = _Or(OpSize::i32Bit, C, V);
      Z = _Or(OpSize::i32Bit, Z, V);
    }

    SetRFLAG<FEXCore::X86State::X87FLAG_C0_LOC>(C);
    SetRFLAG<FEXCore::X86State::X87FLAG_C1_LOC>(Constant(0));
    SetRFLAG<FEXCore::X86State::X87FLAG_C2_LOC>(V);
    SetRFLAG<FEXCore::X86State::X87FLAG_C3_LOC>(Z);
  }

  // Helper to store a variable shift and calculate its flags for a variable
  // shift, with correct PF handling.
  void HandleShift(X86Tables::DecodedOp Op, Ref Result, Ref Dest, ShiftType Shift, Ref Src) {

    auto OldPF = GetRFLAG(X86State::RFLAG_PF_RAW_LOC);

    HandleNZCV_RMW();
    CalculatePF(_ShiftFlags(OpSizeFromSrc(Op), Result, Dest, Shift, Src, OldPF, CFInverted));
    StoreResultGPR(Op, Result);
  }

  // Helper to derive Dest by a given builder-using Expression with the opcode
  // replaced with NewOp. Useful for generic building code. Not safe in general.
  // but does the right handling of ImplicitFlagClobber at least and must be
  // used instead of raw Op mutation.
#define DeriveOp(Dest, NewOp, Expr)                \
  if (ImplicitFlagClobber(NewOp)) SaveNZCV(NewOp); \
  auto Dest = (Expr);                              \
  Dest.first->Header.Op = (NewOp)

  // Named constant cache for the current block.
  // Different arrays for sizes 1,2,4,8,16,32.
  Ref CachedNamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_MAX][6] {};
  struct IndexNamedVectorMapKey {
    uint32_t Index {};
    FEXCore::IR::IndexNamedVectorConstant NamedIndexedConstant;
    uint8_t log2_size_in_bytes {};
    uint16_t _pad {};

    bool operator==(const IndexNamedVectorMapKey&) const = default;
  };
  struct IndexNamedVectorMapKeyHasher {
    std::size_t operator()(const IndexNamedVectorMapKey& k) const noexcept {
      return XXH3_64bits(&k, sizeof(k));
    }
  };
  fextl::unordered_map<IndexNamedVectorMapKey, Ref, IndexNamedVectorMapKeyHasher> CachedIndexedNamedVectorConstants;

  // Load and cache a named vector constant.
  Ref LoadAndCacheNamedVectorConstant(IR::OpSize Size, FEXCore::IR::NamedVectorConstant NamedConstant) {
    auto log2_size_bytes = FEXCore::ilog2(IR::OpSizeToSize(Size));
    if (CachedNamedVectorConstants[NamedConstant][log2_size_bytes]) {
      return CachedNamedVectorConstants[NamedConstant][log2_size_bytes];
    }

    auto K = _LoadNamedVectorConstant(Size, NamedConstant);
    CachedNamedVectorConstants[NamedConstant][log2_size_bytes] = K;
    return K;
  }
  Ref LoadAndCacheIndexedNamedVectorConstant(IR::OpSize Size, FEXCore::IR::IndexNamedVectorConstant NamedIndexedConstant, uint32_t Index) {
    IndexNamedVectorMapKey Key {
      .Index = Index,
      .NamedIndexedConstant = NamedIndexedConstant,
      .log2_size_in_bytes = FEXCore::ilog2(IR::OpSizeToSize(Size)),
    };
    auto it = CachedIndexedNamedVectorConstants.find(Key);

    if (it != CachedIndexedNamedVectorConstants.end()) {
      return it->second;
    }

    auto K = _LoadNamedVectorIndexedConstant(Size, NamedIndexedConstant, Index);
    CachedIndexedNamedVectorConstants.insert_or_assign(Key, K);
    return K;
  }

  Ref LoadUncachedZeroVector(IR::OpSize Size) {
    return _LoadNamedVectorConstant(Size, IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
  }

  Ref LoadZeroVector(IR::OpSize Size) {
    return LoadAndCacheNamedVectorConstant(Size, IR::NamedVectorConstant::NAMED_VECTOR_ZERO);
  }

  // Reset the named vector constants cache array.
  // These are only cached per block.
  void ClearCachedNamedConstants() {
    memset(CachedNamedVectorConstants, 0, sizeof(CachedNamedVectorConstants));
    CachedIndexedNamedVectorConstants.clear();
  }

  std::optional<CondClass> DecodeNZCVCondition(uint8_t OP);
  Ref SelectCC0All1(uint8_t OP);

  /**
   * @brief Flushes NZCV. Mostly vestigial.
   */
  void CalculateDeferredFlags();

  void ZeroShiftResult(FEXCore::X86Tables::DecodedOp Op) {
    // In the case of zero-rotate, we need to store the destination still to deal with 32-bit semantics.
    const auto Size = OpSizeFromSrc(Op);
    if (Size != OpSize::i32Bit) {
      return;
    }
    auto Dest = LoadSourceGPR(Op, Op->Dest, Op->Flags);
    StoreResultGPR(Op, Dest);
  }

  using ZeroShiftFunctionPtr = void (OpDispatchBuilder::*)(FEXCore::X86Tables::DecodedOp Op);

  template<typename F>
  void Calculate_ShiftVariable(FEXCore::X86Tables::DecodedOp Op, Ref Shift, F&& Calculate,
                               std::optional<ZeroShiftFunctionPtr> ZeroShiftResult = std::nullopt) {
    // RCR can call this with constants, so handle that without branching.
    uint64_t Const;
    if (IsValueConstant(WrapNode(Shift), &Const)) {
      if (Const) {
        Calculate();
      } else if (ZeroShiftResult) {
        (this->*(*ZeroShiftResult))(Op);
      }

      return;
    }

    // Otherwise, prepare to branch.
    auto Zero = Constant(0);

    // If the shift is zero, do not touch the flags.
    auto SetBlock = CreateNewCodeBlockAfter(GetCurrentBlock());
    IRPair<IROp_CodeBlock> NextBlock = SetBlock;
    IRPair<IROp_CodeBlock> ZeroShiftBlock;
    if (ZeroShiftResult) {
      ZeroShiftBlock = CreateNewCodeBlockAfter(NextBlock);
      NextBlock = ZeroShiftBlock;
    }
    auto EndBlock = CreateNewCodeBlockAfter(NextBlock);

    ///< Jump to zeroshift block or end block depending on if it was provided.
    IRPair<IROp_CodeBlock> TailHandling = ZeroShiftResult ? ZeroShiftBlock : EndBlock;
    CondJump(Shift, Zero, TailHandling, SetBlock, CondClass::EQ);

    SetCurrentCodeBlock(SetBlock);
    StartNewBlock();
    {
      Calculate();
      Jump(EndBlock);
    }

    if (ZeroShiftResult) {
      SetCurrentCodeBlock(ZeroShiftBlock);
      StartNewBlock();
      {
        (this->*(*ZeroShiftResult))(Op);
        Jump(EndBlock);
      }
    }

    SetCurrentCodeBlock(EndBlock);
    StartNewBlock();
  }

  /**
   * @name These functions are used by the deferred flag handling while it is calculating and storing flags in to RFLAGs.
   * @{ */
  Ref LoadPFRaw(bool Mask, bool Invert);
  Ref LoadAF();
  void FixupAF();
  void SetAFAndFixup(Ref AF);
  Ref CalculateAFForDecimal(Ref A);
  void CalculatePF(Ref Res);
  void CalculateAF(Ref Src1, Ref Src2);

  Ref IncrementByCarry(OpSize OpSize, Ref Src);

  void CalculateOF(IR::OpSize SrcSize, Ref Res, Ref Src1, Ref Src2, bool Sub);
  Ref CalculateFlags_ADC(IR::OpSize SrcSize, Ref Src1, Ref Src2);
  Ref CalculateFlags_SBB(IR::OpSize SrcSize, Ref Src1, Ref Src2);
  Ref CalculateFlags_SUB(IR::OpSize SrcSize, Ref Src1, Ref Src2, bool UpdateCF = true);
  Ref CalculateFlags_ADD(IR::OpSize SrcSize, Ref Src1, Ref Src2, bool UpdateCF = true);
  void CalculateFlags_MUL(IR::OpSize SrcSize, Ref Res, Ref High);
  void CalculateFlags_UMUL(Ref High);
  void CalculateFlags_Logical(IR::OpSize SrcSize, Ref Res);
  void CalculateFlags_ShiftLeftImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift);
  void CalculateFlags_ShiftRightImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift);
  void CalculateFlags_ShiftRightDoubleImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift);
  void CalculateFlags_ShiftRightImmediateCommon(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift);
  void CalculateFlags_SignShiftRightImmediate(IR::OpSize SrcSize, Ref Res, Ref Src1, uint64_t Shift);
  void CalculateFlags_ZCNT(IR::OpSize SrcSize, Ref Result);
  /**  @} */

  Ref GetX87Top();
  void SetX87FTW(Ref FTW);
  Ref GetX87FTW_Helper();
  void SetX87Top(Ref Value);

  void ChgStateX87_MMX() override {
    LOGMAN_THROW_A_FMT(MMXState == MMXState_X87, "Expected state to be x87");
    _StackForceSlow();
    SetX87Top(Constant(0)); // top reset to zero
    _StoreContextGPR(OpSize::i8Bit, Constant(0xFFFFUL), offsetof(FEXCore::Core::CPUState, AbridgedFTW));
    MMXState = MMXState_MMX;
  }

  void ChgStateMMX_X87() override {
    LOGMAN_THROW_A_FMT(MMXState == MMXState_MMX, "Expected state to be MMX");
    // The opcode dispatcher register cache is used for MMX, but the x87 pass register cache is used for x87, spill to
    // context to ensure coherence.
    FlushRegisterCache(false, true);
    // We explicitly initialize to x87 state in StartNewBlock.
    // So if we ever change this to do something else, we need to
    // make sure that we consider if we need to explicitly set it there.
    MMXState = MMXState_X87;
  }

  bool DestIsLockedMem(FEXCore::X86Tables::DecodedOp Op) const {
    return DestIsMem(Op) && (Op->Flags & FEXCore::X86Tables::DecodeFlags::FLAG_LOCK) != 0;
  }

  bool DestIsMem(FEXCore::X86Tables::DecodedOp Op) const {
    return !Op->Dest.IsGPR();
  }

  void CreateJumpBlocks(const fextl::vector<FEXCore::Frontend::Decoder::DecodedBlocks>* Blocks);
  bool BlockSetRIP {false};

  bool Multiblock {};
  bool Is64BitMode {};
  uint64_t Entry {};

  // Set if mono hacks are enabled and the current block is the mono callsite backpatcher, in which case the
  // XCHG ops that would patch code are replaced with a hook that performs the write and manually invalidates
  // the target address.
  bool IsMonoBackpatcherBlock {false};
  IROp_IRHeader* CurrentHeader {};

  [[nodiscard]]
  bool IsTSOEnabled(RegClass Class) const {
    if (ForceTSO == ForceTSOMode::ForceEnabled) {
      return true;
    } else if (ForceTSO == ForceTSOMode::ForceDisabled) {
      return false;
    } else if (Class == RegClass::FPR) {
      return CTX->IsVectorAtomicTSOEnabled();
    } else {
      return CTX->IsAtomicTSOEnabled();
    }
  }

  Ref _StoreMemAutoTSO(RegClass Class, OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    if (IsTSOEnabled(Class)) {
      return _StoreMemTSO(Class, Size, Value, Addr, Invalid(), Align, MemOffsetType::SXTX, 1);
    } else {
      return _StoreMem(Class, Size, Value, Addr, Invalid(), Align, MemOffsetType::SXTX, 1);
    }
  }
  Ref _StoreMemGPRAutoTSO(OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMemAutoTSO(RegClass::GPR, Size, Addr, Value, Align);
  }
  Ref _StoreMemFPRAutoTSO(OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMemAutoTSO(RegClass::FPR, Size, Addr, Value, Align);
  }

  Ref _LoadMemAutoTSO(RegClass Class, OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    if (IsTSOEnabled(Class)) {
      return _LoadMemTSO(Class, Size, ssa0, Invalid(), Align, MemOffsetType::SXTX, 1);
    } else {
      return _LoadMem(Class, Size, ssa0, Invalid(), Align, MemOffsetType::SXTX, 1);
    }
  }
  Ref _LoadMemGPRAutoTSO(OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    return _LoadMemAutoTSO(RegClass::GPR, Size, ssa0, Align);
  }
  Ref _LoadMemFPRAutoTSO(OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    return _LoadMemAutoTSO(RegClass::FPR, Size, ssa0, Align);
  }

  Ref _LoadMemAutoTSO(RegClass Class, OpSize Size, const AddressMode& A, OpSize Align = OpSize::i8Bit) {
    const bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO;
    const auto B = SelectAddressMode(this, A, GetGPROpSize(), CTX->HostFeatures.SupportsTSOImm9, AtomicTSO, Class != RegClass::GPR, Size);

    if (AtomicTSO) {
      return _LoadMemTSO(Class, Size, B.Base, B.Index, Align, B.IndexType, B.IndexScale);
    } else {
      return _LoadMem(Class, Size, B.Base, B.Index, Align, B.IndexType, B.IndexScale);
    }
  }
  Ref _LoadMemGPRAutoTSO(OpSize Size, const AddressMode& A, OpSize Align = OpSize::i8Bit) {
    return _LoadMemAutoTSO(RegClass::GPR, Size, A, Align);
  }
  Ref _LoadMemFPRAutoTSO(OpSize Size, const AddressMode& A, OpSize Align = OpSize::i8Bit) {
    return _LoadMemAutoTSO(RegClass::FPR, Size, A, Align);
  }

  AddressMode SelectPairAddressMode(AddressMode A, IR::OpSize Size) {
    LOGMAN_THROW_A_FMT(Size != IR::OpSize::iUnsized, "Invalid size!");
    const auto SizeInt = IR::OpSizeToSize(Size);
    AddressMode Out {};

    signed OffsetEl = A.Offset / SizeInt;
    if ((A.Offset % SizeInt) == 0 && OffsetEl >= -64 && OffsetEl < 64) {
      Out.Offset = A.Offset;
      A.Offset = 0;
    }

    Out.Base = LoadEffectiveAddress(this, A, GetGPROpSize(), true, false);
    return Out;
  }


  RefPair LoadMemPair(RegClass Class, OpSize Size, Ref Base, uint32_t Offset) {
    RefPair Values = AllocatePair(Class, Size);
    _LoadMemPair(Class, Size, Base, Offset, Values.Low, Values.High);
    return Values;
  }
  RefPair LoadMemPairFPR(OpSize Size, Ref Base, uint32_t Offset) {
    return LoadMemPair(RegClass::FPR, Size, Base, Offset);
  }

  RefPair _LoadMemPairAutoTSO(RegClass Class, OpSize Size, const AddressMode& A, OpSize Align = OpSize::i8Bit) {
    const bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO;

    // Use ldp if possible, otherwise fallback on two loads.
    if (!AtomicTSO && !A.Segment && Size >= OpSize::i32Bit && Size <= OpSize::i128Bit) {
      const auto B = SelectPairAddressMode(A, Size);
      return LoadMemPair(Class, Size, B.Base, B.Offset);
    }

    AddressMode HighA = A;
    HighA.Offset += 16;

    return {
      .Low = _LoadMemAutoTSO(Class, Size, A, Align),
      .High = _LoadMemAutoTSO(Class, Size, HighA, Align),
    };
  }
  RefPair _LoadMemPairFPRAutoTSO(OpSize Size, const AddressMode& A, OpSize Align = OpSize::i8Bit) {
    return _LoadMemPairAutoTSO(RegClass::FPR, Size, A, Align);
  }

  Ref _StoreMemAutoTSO(RegClass Class, OpSize Size, const AddressMode& A, Ref Value, OpSize Align = OpSize::i8Bit) {
    const bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO;
    const auto B = SelectAddressMode(this, A, GetGPROpSize(), CTX->HostFeatures.SupportsTSOImm9, AtomicTSO, Class != RegClass::GPR, Size);

    if (AtomicTSO) {
      return _StoreMemTSO(Class, Size, Value, B.Base, B.Index, Align, B.IndexType, B.IndexScale);
    } else {
      return _StoreMem(Class, Size, Value, B.Base, B.Index, Align, B.IndexType, B.IndexScale);
    }
  }
  Ref _StoreMemGPRAutoTSO(OpSize Size, const AddressMode& A, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMemAutoTSO(RegClass::GPR, Size, A, Value, Align);
  }
  Ref _StoreMemFPRAutoTSO(OpSize Size, const AddressMode& A, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMemAutoTSO(RegClass::FPR, Size, A, Value, Align);
  }

  void _StoreMemPairAutoTSO(RegClass Class, OpSize Size, const AddressMode& A, Ref Value1, Ref Value2, OpSize Align = OpSize::i8Bit) {
    const auto SizeInt = IR::OpSizeToSize(Size);
    const bool AtomicTSO = IsTSOEnabled(Class) && !A.NonTSO;

    // Use stp if possible, otherwise fallback on two stores.
    if (!AtomicTSO && !A.Segment && Size >= OpSize::i32Bit && Size <= OpSize::i128Bit) {
      const auto B = SelectPairAddressMode(A, Size);
      _StoreMemPair(Class, Size, Value1, Value2, B.Base, B.Offset);
    } else {
      auto B = A;

      _StoreMemAutoTSO(Class, Size, B, Value1, OpSize::i8Bit);
      B.Offset += SizeInt;
      _StoreMemAutoTSO(Class, Size, B, Value2, OpSize::i8Bit);
    }
  }
  void _StoreMemPairFPRAutoTSO(OpSize Size, const AddressMode& A, Ref Value1, Ref Value2, OpSize Align = OpSize::i8Bit) {
    return _StoreMemPairAutoTSO(RegClass::FPR, Size, A, Value1, Value2, Align);
  }

  Ref Pop(IR::OpSize Size, Ref SP_RMW) {
    Ref Value = _AllocateGPR(false);
    _Pop(Size, SP_RMW, Value);
    return Value;
  }

  Ref Pop(IR::OpSize Size) {
    Ref SP = _RMWHandle(LoadGPRRegister(X86State::REG_RSP));
    Ref Value = _AllocateGPR(false);

    _Pop(Size, SP, Value);

    // Store the new stack pointer
    StoreGPRRegister(X86State::REG_RSP, SP);
    return Value;
  }

  Ref VZeroExtendOperand(OpSize Size, X86Tables::DecodedOperand Op, Ref Value) {
    bool IsMMX = Op.IsGPR() && Op.Data.GPR.GPR >= X86State::REG_MM_0;
    bool AlreadyExtended = Op.IsGPRDirect() || Op.IsGPRIndirect() || IsMMX;

    return AlreadyExtended ? Value : _VMov(Size, Value);
  }

  void Push(IR::OpSize Size, Ref Value) {
    auto OldSP = LoadGPRRegister(X86State::REG_RSP);
    auto NewSP = _Push(GetGPROpSize(), Size, Value, OldSP);
    StoreGPRRegister(X86State::REG_RSP, NewSP);
    FlushRegisterCache();
  }

  struct ArithRef {
    IREmitter* E {};
    bool IsConstant {};
    union {
      Ref R {};
      uint64_t C;
    };

    ArithRef() {}

    ArithRef(IREmitter* IREmit, Ref Reference)
      : E(IREmit)
      , IsConstant(false)
      , R(Reference) {}

    ArithRef(IREmitter* IREmit, uint64_t K)
      : E(IREmit)
      , IsConstant(true)
      , C(K) {}

    ArithRef Neg() {
      return IsConstant ? ArithRef(E, -C) : ArithRef(E, E->_Neg(OpSize::i64Bit, R));
    }

    ArithRef And(uint64_t K) {
      return IsConstant ? ArithRef(E, C & K) : ArithRef(E, E->_And(OpSize::i64Bit, R, E->Constant(K)));
    }

    ArithRef Presub(uint64_t K) {
      return IsConstant ? ArithRef(E, K - C) : ArithRef(E, E->Sub(OpSize::i64Bit, E->Constant(K), R));
    }

    ArithRef Lshl(uint64_t Shift) {
      if (Shift == 0) {
        return *this;
      } else if (IsConstant) {
        return ArithRef(E, C << Shift);
      } else {
        return ArithRef(E, E->_Lshl(OpSize::i64Bit, R, E->Constant(Shift)));
      }
    }

    ArithRef Bfe(unsigned Start, unsigned Size) {
      if (IsConstant) {
        return ArithRef(E, (C >> Start) & ((1ull << Size) - 1));
      } else {
        return ArithRef(E, E->_Bfe(OpSize::i64Bit, Size, Start, R));
      }
    }

    ArithRef Sbfe(unsigned Start, unsigned Size) {
      if (IsConstant) {
        uint64_t SourceMask = Size == 64 ? ~0ULL : ((1ULL << Size) - 1);
        SourceMask <<= Start;

        int64_t NewConstant = (C & SourceMask) >> Start;
        NewConstant <<= 64 - Size;
        NewConstant >>= 64 - Size;

        return ArithRef(E, NewConstant);
      } else {
        return ArithRef(E, E->_Sbfe(OpSize::i64Bit, Size, Start, R));
      }
    }

    Ref BfiInto(Ref Bitfield, unsigned Start, unsigned Size) {
      if (IsConstant && (Size > 0 && Size < 64)) {
        uint64_t SourceMask = (1ULL << Size) - 1;
        uint64_t SourceMaskShifted = SourceMask << Start;

        if (C == 0) {
          return E->_And(OpSize::i64Bit, Bitfield, E->_InlineConstant(~SourceMaskShifted));
        } else if (C == SourceMask) {
          return E->_Or(OpSize::i64Bit, Bitfield, E->_InlineConstant(SourceMaskShifted));
        }
      }

      if (IsConstant) {
        return E->_Bfi(OpSize::i64Bit, Size, Start, Bitfield, E->Constant(C));
      } else {
        return E->_Bfi(OpSize::i64Bit, Size, Start, Bitfield, R);
      }
    }

    ArithRef MaskBit(OpSize Size) {
      if (IsConstant) {
        uint64_t ShiftMask = Size == OpSize::i64Bit ? 63 : 31;
        uint64_t Result = 1ull << (C & ShiftMask);
        if (ShiftMask == 31) {
          Result &= ((1ull << 32) - 1);
        }

        return ArithRef(E, Result);
      } else {
        return ArithRef(E, E->_Lshl(Size, E->Constant(1), R));
      }
    }

    Ref Ref() {
      return IsConstant ? E->Constant(C) : R;
    }

    bool IsDefinitelyZero() const {
      return IsConstant && C == 0;
    }
  };

  ArithRef ARef(Ref R) {
    uint64_t C;

    if (IsValueConstant(WrapNode(R), &C)) {
      return ARef(C);
    } else {
      return ArithRef(this, R);
    }
  }

  ArithRef ARef(uint64_t K) {
    return ArithRef(this, K);
  }

  ///< Segment telemetry tracking
  uint32_t SegmentsNeedReadCheck {~0U};
  void CheckLegacySegmentWrite(Ref NewNode, uint32_t SegmentReg);
  void CheckLegacySegmentRead(Ref NewNode, uint32_t SegmentReg);
};

constexpr inline void InstallToTable(auto& FinalTable, const auto& LocalTable) {
  for (const auto& Op : LocalTable) {
    auto OpNum = Op.Op;
    auto Dispatcher = Op.Ptr;
    for (uint8_t i = 0; i < Op.Count; ++i) {
      auto& TableOp = FinalTable[OpNum + i];
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      if (TableOp.OpcodeDispatcher.OpDispatch) {
        ERROR_AND_DIE_FMT("Duplicate Entry {}", TableOp.Name);
      }
#endif

      TableOp.OpcodeDispatcher.OpDispatch = Dispatcher;
    }
  }
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/Core/VSyscall/VSyscall.inc
================================================
// SPDX-License-Identifier: MIT
// This is the vsyscall page for x86_64 guest code
// This was compiled with nasm with the following source then exported to binary

//BITS 64;
//
//align 4096, db 0xcc
//  ; __NR_gettimeofday
//  mov rax, 96
//  syscall
//  ret
//
//align 1024, db 0xcc
//  ; __NR_time
//  mov rax, 201
//  syscall
//  ret
//
//align 1024, db 0xcc
//  ; __NR_getcpu
//  mov rax, 309
//  syscall
//  ret
//
//align 4096, db 0xcc

// We only want one of these pages per FEX process
// One page
const static uint8_t VSyscallData[0x1000] = {
	0xB8, 0x60, 0x00, 0x00, 0x00, 0x0F, 0x05, 0xC3, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xB8, 0xC9, 0x00, 0x00, 0x00, 0x0F, 0x05, 0xC3, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xB8, 0x35, 0x01, 0x00, 0x00, 0x0F, 0x05, 0xC3, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC,
	0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
};


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/BaseTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/BaseTables.h"

#include <FEXCore/Core/Context.h>

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;

enum Primary_LUT {
  ENTRY_06,
  ENTRY_07,
  ENTRY_0E,
  ENTRY_16,
  ENTRY_17,
  ENTRY_1E,
  ENTRY_1F,
  ENTRY_27,
  ENTRY_2F,
  ENTRY_37,
  ENTRY_3F,
  ENTRY_40,
  ENTRY_48,
  ENTRY_60,
  ENTRY_61,
  ENTRY_63,
  ENTRY_9A,
  ENTRY_A0,
  ENTRY_A1,
  ENTRY_A2,
  ENTRY_A3,
  ENTRY_CE,
  ENTRY_D4,
  ENTRY_D5,
  ENTRY_D6,
  ENTRY_EA,
  ENTRY_MAX,
};

constexpr std::array<X86InstInfo[2], ENTRY_MAX> Primary_ArchSelect_LUT = {{
  // ENTRY_06
  {
    {"PUSH ES",  TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_07
  {
    {"POP ES",   TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_DEF) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_ES_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_0E
  {
    {"PUSH CS",  TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_CS_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_16
  {
    {"PUSH SS",  TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_17
  {
    {"POP SS",   TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_DEF) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_SS_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_1E
  {
    {"PUSH DS",  TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_1F
  {
    {"POP DS",   TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_DEF) | FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_DS_PREFIX> } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_27
  {
    {"DAA",      TYPE_INST, GenFlagsDstSize(SIZE_8BIT) | FLAGS_SF_DST_RAX, 0, { .OpDispatch = &IR::OpDispatchBuilder::DAAOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_2F
  {
    {"DAS",      TYPE_INST, GenFlagsDstSize(SIZE_8BIT) | FLAGS_SF_DST_RAX, 0, { .OpDispatch = &IR::OpDispatchBuilder::DASOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_37
  {
    {"AAA",      TYPE_INST, GenFlagsDstSize(SIZE_16BIT) | FLAGS_SF_DST_RAX, 0, { .OpDispatch = &IR::OpDispatchBuilder::AAAOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_3F
  {
    {"AAS",      TYPE_INST, GenFlagsDstSize(SIZE_16BIT) | FLAGS_SF_DST_RAX, 0, { .OpDispatch = &IR::OpDispatchBuilder::AASOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_40
  {
    {"INC",    TYPE_INST, FLAGS_SF_REX_IN_BYTE, 0, { .OpDispatch = &IR::OpDispatchBuilder::INCOp } },
    // REX
    {"", TYPE_REX_PREFIX, FLAGS_NONE, 0},
  },
  // ENTRY_48
  {
    {"DEC",    TYPE_INST, FLAGS_SF_REX_IN_BYTE, 0, { .OpDispatch = &IR::OpDispatchBuilder::DECOp } },
    {"", TYPE_REX_PREFIX, FLAGS_NONE, 0},
  },
  // ENTRY_60
  {
    {"PUSHA",  TYPE_INST, FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::PUSHAOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_61
  {
    {"POPA",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS, 0, { .OpDispatch = &IR::OpDispatchBuilder::POPAOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_63
  {
    {"ARPL",   TYPE_INST, GenFlagsSameSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0, { .OpDispatch = &IR::OpDispatchBuilder::ARPLOp } },
    {"MOVSXD", TYPE_INST, GenFlagsDstSize(SIZE_64BIT) | FLAGS_MODRM, 0, { .OpDispatch = &IR::OpDispatchBuilder::MOVSXDOp } },
  },
  // ENTRY_9A
  {
    {"CALLF",  TYPE_INST, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_A0
  {
    {"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX | FLAGS_MEM_OFFSET, 4, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
    {"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX | FLAGS_MEM_OFFSET, 8, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
  },
  // ENTRY_A1
  {
    {"MOV",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_MEM_OFFSET, 4, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
    {"MOV",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_MEM_OFFSET, 8, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
  },
  // ENTRY_A2
  {
    {"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_SRC_RAX | FLAGS_MEM_OFFSET, 4, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
    {"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_SRC_RAX | FLAGS_MEM_OFFSET, 8, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
  },
  // ENTRY_A3
  {
    {"MOV",    TYPE_INST, FLAGS_SF_SRC_RAX | FLAGS_MEM_OFFSET, 4, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
    {"MOV",    TYPE_INST, FLAGS_SF_SRC_RAX | FLAGS_MEM_OFFSET, 8, { .OpDispatch = &IR::OpDispatchBuilder::MOVOffsetOp } },
  },
  // ENTRY_CE
  {
    {"INTO",   TYPE_INST, FLAGS_NONE, 0, { .OpDispatch = &IR::OpDispatchBuilder::INTOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_D4
  {
    {"AAM",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX, 1, { .OpDispatch = &IR::OpDispatchBuilder::AAMOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_D5
  {
    {"AAD",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX, 1, { .OpDispatch = &IR::OpDispatchBuilder::AADOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_D6
  {
    {"SALC",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX | FLAGS_SF_SRC_RAX, 0, { .OpDispatch = &IR::OpDispatchBuilder::SALCOp } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  // ENTRY_EA
  {
    {"JMPF",   TYPE_INST, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
}};

const std::array<X86InstInfo, MAX_PRIMARY_TABLE_SIZE> BaseOps = []() consteval {
  std::array<X86InstInfo, MAX_PRIMARY_TABLE_SIZE> Table{};

  constexpr U8U8InfoStruct BaseOpTable[] = {
    // Prefixes
    // Operand size overide
    {0x66, 1, X86InstInfo{"",      TYPE_PREFIX, FLAGS_NONE,        0}},
    // Address size override
    {0x67, 1, X86InstInfo{"",      TYPE_PREFIX, FLAGS_NONE,        0}},
    {0x26, 1, X86InstInfo{"ES",    TYPE_LEGACY_PREFIX, FLAGS_NONE, 0}},
    {0x2E, 1, X86InstInfo{"CS",    TYPE_LEGACY_PREFIX, FLAGS_NONE, 0}},
    {0x36, 1, X86InstInfo{"SS",    TYPE_LEGACY_PREFIX, FLAGS_NONE, 0}},
    {0x3E, 1, X86InstInfo{"DS",    TYPE_LEGACY_PREFIX, FLAGS_NONE, 0}},
    // These are still invalid on 64bit
    {0x64, 1, X86InstInfo{"FS",    TYPE_PREFIX, FLAGS_NONE,        0}},
    {0x65, 1, X86InstInfo{"GS",    TYPE_PREFIX, FLAGS_NONE,        0}},
    {0xF0, 1, X86InstInfo{"LOCK",  TYPE_PREFIX, FLAGS_NONE,        0}},
    {0xF2, 1, X86InstInfo{"REPNE", TYPE_PREFIX, FLAGS_NONE,        0}},
    {0xF3, 1, X86InstInfo{"REP",   TYPE_PREFIX, FLAGS_NONE,        0}},

    // Instructions
    {0x00, 1, X86InstInfo{"ADD",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x01, 1, X86InstInfo{"ADD",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2,                                       0}},
    {0x02, 1, X86InstInfo{"ADD",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x03, 1, X86InstInfo{"ADD",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x04, 1, X86InstInfo{"ADD",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x05, 1, X86InstInfo{"ADD",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},

    {0x06, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_06] }}},
    {0x07, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_07] }}},

    {0x08, 1, X86InstInfo{"OR",     TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x09, 1, X86InstInfo{"OR",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                                   0}},
    {0x0A, 1, X86InstInfo{"OR",     TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x0B, 1, X86InstInfo{"OR",     TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x0C, 1, X86InstInfo{"OR",     TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX ,                              1}},
    {0x0D, 1, X86InstInfo{"OR",     TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},
    {0x0E, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_0E] }}},

    {0x10, 1, X86InstInfo{"ADC",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x11, 1, X86InstInfo{"ADC",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2,                                       0}},
    {0x12, 1, X86InstInfo{"ADC",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x13, 1, X86InstInfo{"ADC",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x14, 1, X86InstInfo{"ADC",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x15, 1, X86InstInfo{"ADC",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},
    {0x16, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_16] }}},
    {0x17, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_17] }}},

    {0x18, 1, X86InstInfo{"SBB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x19, 1, X86InstInfo{"SBB",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_DISPLACE_SIZE_DIV_2,                                       0}},
    {0x1A, 1, X86InstInfo{"SBB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x1B, 1, X86InstInfo{"SBB",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x1C, 1, X86InstInfo{"SBB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x1D, 1, X86InstInfo{"SBB",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},
    {0x1E, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_1E] }}},
    {0x1F, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_1F] }}},

    {0x20, 1, X86InstInfo{"AND",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x21, 1, X86InstInfo{"AND",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                                   0}},
    {0x22, 1, X86InstInfo{"AND",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x23, 1, X86InstInfo{"AND",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x24, 1, X86InstInfo{"AND",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x25, 1, X86InstInfo{"AND",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},

    {0x27, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_27] }}},
    {0x28, 1, X86InstInfo{"SUB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x29, 1, X86InstInfo{"SUB",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                                   0}},
    {0x2A, 1, X86InstInfo{"SUB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x2B, 1, X86InstInfo{"SUB",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x2C, 1, X86InstInfo{"SUB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX ,                              1}},
    {0x2D, 1, X86InstInfo{"SUB",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},
    {0x2F, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_2F] }}},

    {0x30, 1, X86InstInfo{"XOR",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x31, 1, X86InstInfo{"XOR",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                                   0}},
    {0x32, 1, X86InstInfo{"XOR",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x33, 1, X86InstInfo{"XOR",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x34, 1, X86InstInfo{"XOR",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x35, 1, X86InstInfo{"XOR",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},

    {0x37, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_37] }}},
    {0x38, 1, X86InstInfo{"CMP",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                   0}},
    {0x39, 1, X86InstInfo{"CMP",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                                   0}},
    {0x3A, 1, X86InstInfo{"CMP",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,                                                   0}},
    {0x3B, 1, X86InstInfo{"CMP",    TYPE_INST, FLAGS_MODRM,                                                                   0}},
    {0x3C, 1, X86InstInfo{"CMP",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX  ,                              1}},
    {0x3D, 1, X86InstInfo{"CMP",    TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2, 4}},
    {0x3F, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_3F] }}},

    {0x40, 8, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_40] }}},
    {0x48, 8, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_48] }}},

    {0x50, 8, X86InstInfo{"PUSH",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SF_REX_IN_BYTE | FLAGS_DEBUG_MEM_ACCESS ,                    0}},
    {0x58, 8, X86InstInfo{"POP",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SF_REX_IN_BYTE | FLAGS_DEBUG_MEM_ACCESS ,                    0}},


    {0x60, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_60] }}},
    {0x61, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_61] }}},
    {0x62, 1, X86InstInfo{"",       TYPE_GROUP_EVEX, FLAGS_NONE,                                                                           0}},
    {0x63, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_63] }}},

    {0x68, 1, X86InstInfo{"PUSH",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_SRC_SEXT, 4}},
    {0x69, 1, X86InstInfo{"IMUL",   TYPE_INST, FLAGS_MODRM | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2,        4}},
    {0x6A, 1, X86InstInfo{"PUSH",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_SRC_SEXT ,            1}},
    {0x6B, 1, X86InstInfo{"IMUL",   TYPE_INST, FLAGS_MODRM | FLAGS_SRC_SEXT ,                                    1}},

    // This should just throw a GP
    {0x6C, 1, X86InstInfo{"INSB",   TYPE_INST, FLAGS_BLOCK_END, 0}},
    {0x6D, 1, X86InstInfo{"INSW",   TYPE_INST, FLAGS_BLOCK_END, 0}},
    {0x6E, 1, X86InstInfo{"OUTS",   TYPE_INST, FLAGS_BLOCK_END, 0}},
    {0x6F, 1, X86InstInfo{"OUTS",   TYPE_INST, FLAGS_BLOCK_END, 0}},

    {0x70, 1, X86InstInfo{"JO",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x71, 1, X86InstInfo{"JNO",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x72, 1, X86InstInfo{"JB",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x73, 1, X86InstInfo{"JNB",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x74, 1, X86InstInfo{"JZ",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x75, 1, X86InstInfo{"JNZ",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x76, 1, X86InstInfo{"JBE",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x77, 1, X86InstInfo{"JNBE",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x78, 1, X86InstInfo{"JS",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x79, 1, X86InstInfo{"JNS",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7A, 1, X86InstInfo{"JP",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7B, 1, X86InstInfo{"JNP",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7C, 1, X86InstInfo{"JL",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7D, 1, X86InstInfo{"JNL",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7E, 1, X86InstInfo{"JLE",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},
    {0x7F, 1, X86InstInfo{"JNLE",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT , 1}},

    {0x84, 1, X86InstInfo{"TEST",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,         0}},
    {0x85, 1, X86InstInfo{"TEST",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                         0}},
    {0x86, 1, X86InstInfo{"XCHG",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,         0}},
    {0x87, 1, X86InstInfo{"XCHG",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                         0}},

    {0x88, 1, X86InstInfo{"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,         0}},
    {0x89, 1, X86InstInfo{"MOV",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                         0}},
    {0x8A, 1, X86InstInfo{"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM,         0}},
    {0x8B, 1, X86InstInfo{"MOV",    TYPE_INST, FLAGS_MODRM,                         0}},
    {0x8C, 1, X86InstInfo{"MOV",    TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                      0}},
    {0x8D, 1, X86InstInfo{"LEA",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_MODRM,                         0}},
    {0x8E, 1, X86InstInfo{"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_16BIT) | FLAGS_MODRM,                      0}},
    {0x8F, 1, X86InstInfo{"POP",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_ZERO_REG | FLAGS_DEBUG_MEM_ACCESS, 0}},
    {0x90, 8, X86InstInfo{"XCHG",   TYPE_INST, FLAGS_SF_REX_IN_BYTE | FLAGS_SF_SRC_RAX, 0}},
    {0x98, 1, X86InstInfo{"CDQE",   TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SF_SRC_RAX,     0}},
    {0x99, 1, X86InstInfo{"CQO",    TYPE_INST, FLAGS_SF_DST_RDX | FLAGS_SF_SRC_RAX,     0}},

    // These three are all X87 instructions
    {0x9B, 1, X86InstInfo{"FWAIT",  TYPE_INST, FLAGS_NONE,                              0}},
    {0x9C, 1, X86InstInfo{"PUSHF",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF),         0}},
    {0x9D, 1, X86InstInfo{"POPF",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_BLOCK_END,         0}},

    {0x9E, 1, X86InstInfo{"SAHF",   TYPE_INST, FLAGS_NONE,                              0}},
    {0x9F, 1, X86InstInfo{"LAHF",   TYPE_INST, FLAGS_NONE,                              0}},

    {0xA0, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_A0] }}},
    {0xA1, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_A1] }}},
    {0xA2, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_A2] }}},
    {0xA3, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_A3] }}},

    {0xA4, 1, X86InstInfo{"MOVSB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_DEBUG_MEM_ACCESS,                                            0}},
    {0xA5, 1, X86InstInfo{"MOVS",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS,                                                            0}},
    {0xA6, 1, X86InstInfo{"CMPSB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_DEBUG_MEM_ACCESS,                                            0}},
    {0xA7, 1, X86InstInfo{"CMPS",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS,                                                            0}},

    {0xA8, 1, X86InstInfo{"TEST",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX ,                                             1}},
    {0xA9, 1, X86InstInfo{"TEST",   TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2,                4}},
    {0xAA, 1, X86InstInfo{"STOS",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_SF_SRC_RAX,                   0}},
    {0xAB, 1, X86InstInfo{"STOS",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_SF_SRC_RAX,                                   0}},
    {0xAC, 1, X86InstInfo{"LODS",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_DST_RAX | FLAGS_DEBUG_MEM_ACCESS,                                                      0}},
    {0xAD, 1, X86InstInfo{"LODS",   TYPE_INST, FLAGS_SF_DST_RAX | FLAGS_DEBUG_MEM_ACCESS,                                                      0}},
    {0xAE, 1, X86InstInfo{"SCAS",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_SF_SRC_RAX,                                   0}},
    {0xAF, 1, X86InstInfo{"SCAS",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_SF_SRC_RAX,                                   0}},

    {0xB0, 8, X86InstInfo{"MOV",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_SF_REX_IN_BYTE ,                                         1}},
    {0xB8, 8, X86InstInfo{"MOV",    TYPE_INST, FLAGS_SF_REX_IN_BYTE | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_DISPLACE_SIZE_MUL_2, 4}},

    {0xC2, 1, X86InstInfo{"RET",    TYPE_INST, FLAGS_SETS_RIP | FLAGS_BLOCK_END,                                             2}},
    {0xC3, 1, X86InstInfo{"RET",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_BLOCK_END ,                                                0}},
    {0xC8, 1, X86InstInfo{"ENTER",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_DEBUG_MEM_ACCESS ,                                      3}},
    {0xC9, 1, X86InstInfo{"LEAVE",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_DEBUG_MEM_ACCESS ,                                                0}},
    {0xCA, 1, X86InstInfo{"RETF",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_BLOCK_END,                                                              2}},
    {0xCB, 1, X86InstInfo{"RETF",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_BLOCK_END,                                                              0}},
    {0xCC, 1, X86InstInfo{"INT3",   TYPE_INST, FLAGS_BLOCK_END,                                                                                      0}},
    {0xCD, 1, X86InstInfo{"INT",    TYPE_INST, DEFAULT_SYSCALL_FLAGS,                                                                  1}},
    {0xCE, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_CE] }}},
    {0xCF, 1, X86InstInfo{"IRET",   TYPE_INST, FLAGS_SETS_RIP | FLAGS_BLOCK_END,                                                                                    0}},

    {0xD4, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_D4] }}},
    {0xD5, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_D5] }}},
    {0xD6, 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Primary_ArchSelect_LUT[ENTRY_D6] }}},
    {0xD7, 1, X86InstInfo{"XLAT",   TYPE_INST, FLAGS_DEBUG_MEM_ACCESS,                                                                           0}},

    {0xE0, 1, X86InstInfo{"LOOPNE", TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_SF_SRC_RCX,                             1}},
    {0xE1, 1, X86InstInfo{"LOOPE",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_SF_SRC_RCX,                             1}},
    {0xE2, 1, X86InstInfo{"LOOP",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_SF_SRC_RCX,                             1}},
    {0xE3, 1, X86InstInfo{"JrCXZ",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT ,                             1}},

    // Should just throw GP
    {0xE4, 2, X86InstInfo{"IN",     TYPE_INST, FLAGS_BLOCK_END,                                                                                                      1}},
    {0xE6, 2, X86InstInfo{"OUT",    TYPE_INST, FLAGS_BLOCK_END,                                                                                                      1}},

    {0xE8, 1, X86InstInfo{"CALL",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_BLOCK_END | FLAGS_CALL , 4}},
    {0xE9, 1, X86InstInfo{"JMP",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_BLOCK_END , 4}},
    {0xEB, 1, X86InstInfo{"JMP",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_BLOCK_END ,                             1}},

    // Should just throw GP
    {0xEC, 2, X86InstInfo{"IN",     TYPE_INST, FLAGS_BLOCK_END,             0}},
    {0xEE, 2, X86InstInfo{"OUT",    TYPE_INST, FLAGS_BLOCK_END,             0}},

    {0xF1, 1, X86InstInfo{"INT1",   TYPE_INST, FLAGS_BLOCK_END,               0}},
    {0xF4, 1, X86InstInfo{"HLT",    TYPE_INST, FLAGS_BLOCK_END,               0}},
    {0xF5, 1, X86InstInfo{"CMC",    TYPE_INST, FLAGS_NONE,                0}},
    {0xF8, 1, X86InstInfo{"CLC",    TYPE_INST, FLAGS_NONE,                0}},
    {0xF9, 1, X86InstInfo{"STC",    TYPE_INST, FLAGS_NONE,                0}},
    {0xFA, 1, X86InstInfo{"CLI",    TYPE_INST, FLAGS_NONE,                0}},
    {0xFB, 1, X86InstInfo{"STI",    TYPE_INST, FLAGS_NONE,                0}},
    {0xFC, 1, X86InstInfo{"CLD",    TYPE_INST, FLAGS_NONE,                0}},
    {0xFD, 1, X86InstInfo{"STD",    TYPE_INST, FLAGS_NONE,                0}},

    // Two Byte table
    {0x0F, 1, X86InstInfo{"",   TYPE_SECONDARY_TABLE_PREFIX, FLAGS_NONE,  0}},

    // x87 table
    {0xD8, 8, X86InstInfo{"",   TYPE_X87_TABLE_PREFIX, FLAGS_MODRM,        0}},

    // ModRM table
    // MoreBytes field repurposed for valid bits mask
    {0x80, 1, X86InstInfo{"",   TYPE_GROUP_1, FLAGS_MODRM, 0}},
    {0x81, 1, X86InstInfo{"",   TYPE_GROUP_1, FLAGS_MODRM, 1}},
    {0x82, 1, X86InstInfo{"",   TYPE_GROUP_1, FLAGS_MODRM, 2}},
    {0x83, 1, X86InstInfo{"",   TYPE_GROUP_1, FLAGS_MODRM, 3}},
    {0xC0, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 0}},
    {0xC1, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 1}},
    {0xD0, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 2}},
    {0xD1, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 3}},
    {0xD2, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 4}},
    {0xD3, 1, X86InstInfo{"",   TYPE_GROUP_2, FLAGS_MODRM, 5}},
    {0xF6, 1, X86InstInfo{"",   TYPE_GROUP_3, FLAGS_MODRM, 0}},
    {0xF7, 1, X86InstInfo{"",   TYPE_GROUP_3, FLAGS_MODRM, 1}},
    {0xFE, 1, X86InstInfo{"",   TYPE_GROUP_4, FLAGS_MODRM, 0}},
    {0xFF, 1, X86InstInfo{"",   TYPE_GROUP_5, FLAGS_MODRM, 0}},

    // Group 11
    {0xC6, 1, X86InstInfo{"",   TYPE_GROUP_11, FLAGS_MODRM, 0}},
    {0xC7, 1, X86InstInfo{"",   TYPE_GROUP_11, FLAGS_MODRM, 1}},

    // VEX table
    {0xC4, 2, X86InstInfo{"",   TYPE_VEX_TABLE_PREFIX, FLAGS_NONE, 0}},
  };

  GenerateTable(Table.data(), BaseOpTable, std::size(BaseOpTable));
  IR::InstallToTable(Table, IR::OpDispatch_BaseOpTable);

  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/DDDTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/DDDTables.h"

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;

constexpr std::array<X86InstInfo, MAX_3DNOW_TABLE_SIZE> DDDNowOps = []() consteval {
  std::array<X86InstInfo, MAX_3DNOW_TABLE_SIZE> Table{};
  constexpr U8U8InfoStruct DDDNowOpTable[] = {
    {0x0C, 1, X86InstInfo{"PI2FW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x0D, 1, X86InstInfo{"PI2FD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x1C, 1, X86InstInfo{"PF2IW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x1D, 1, X86InstInfo{"PF2ID",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    // Inverse 3DNow! These two instructions are Geode product line specific
    // No CPUID for these, you're expected to read ID_CONFIG_MSR (1250h) bit 1
    {0x86, 1, X86InstInfo{"PFRCPV",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x87, 1, X86InstInfo{"PFRSQRTV", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0x8A, 1, X86InstInfo{"PFNACC",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x8E, 1, X86InstInfo{"PFPNACC",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0x90, 1, X86InstInfo{"PFCMPGE",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x94, 1, X86InstInfo{"PFMIN",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x96, 1, X86InstInfo{"PFRCP",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x97, 1, X86InstInfo{"PFRSQRT",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0x9A, 1, X86InstInfo{"PFSUB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x9E, 1, X86InstInfo{"PFADD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0xA0, 1, X86InstInfo{"PFCMPGT",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xA4, 1, X86InstInfo{"PFMAX",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xA6, 1, X86InstInfo{"PFRCPIT1", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xA7, 1, X86InstInfo{"PFRSQIT1", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0xAA, 1, X86InstInfo{"PFSUBR",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xAE, 1, X86InstInfo{"PFACC",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0xB0, 1, X86InstInfo{"PFCMPEQ",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xB4, 1, X86InstInfo{"PFMUL",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xB6, 1, X86InstInfo{"PFRCPIT2", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xB7, 1, X86InstInfo{"PMULHRW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},

    {0xBB, 1, X86InstInfo{"PSWAPD",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0xBF, 1, X86InstInfo{"PAVGUSB",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
  };

  GenerateTable(Table.data(), DDDNowOpTable, std::size(DDDNowOpTable));

  IR::InstallToTable(Table, IR::OpDispatch_DDDTable);
  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/H0F38Tables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/H0F38Tables.h"

#include <iterator>
#include <stdint.h>

namespace FEXCore::X86Tables {
using namespace InstFlags;
constexpr std::array<X86InstInfo, MAX_0F_38_TABLE_SIZE> H0F38TableOps = []() consteval {
  std::array<X86InstInfo, MAX_0F_38_TABLE_SIZE> Table{};

#define OPD(prefix, opcode) (((prefix) << 8) | opcode)
  constexpr uint16_t PF_38_NONE = 0;
  constexpr uint16_t PF_38_66   = (1U << 0);
  constexpr uint16_t PF_38_F2   = (1U << 1);
  constexpr uint16_t PF_38_F3   = (1U << 2);

  constexpr U16U8InfoStruct H0F38Table[] = {
    {OPD(PF_38_NONE, 0x00), 1, X86InstInfo{"PSHUFB",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x00), 1, X86InstInfo{"PSHUFB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x01), 1, X86InstInfo{"PHADDW",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x01), 1, X86InstInfo{"PHADDW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x02), 1, X86InstInfo{"PHADDD",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x02), 1, X86InstInfo{"PHADDD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x03), 1, X86InstInfo{"PHADDSW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x03), 1, X86InstInfo{"PHADDSW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x04), 1, X86InstInfo{"PMADDUBSW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x04), 1, X86InstInfo{"PMADDUBSW",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x05), 1, X86InstInfo{"PHSUBW",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x05), 1, X86InstInfo{"PHSUBW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x06), 1, X86InstInfo{"PHSUBD",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x06), 1, X86InstInfo{"PHSUBD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x07), 1, X86InstInfo{"PHSUBSW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x07), 1, X86InstInfo{"PHSUBSW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x08), 1, X86InstInfo{"PSIGNB",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x08), 1, X86InstInfo{"PSIGNB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x09), 1, X86InstInfo{"PSIGNW",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x09), 1, X86InstInfo{"PSIGNW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x0A), 1, X86InstInfo{"PSIGND",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x0A), 1, X86InstInfo{"PSIGND",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x0B), 1, X86InstInfo{"PMULHRSW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x0B), 1, X86InstInfo{"PMULHRSW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_66,   0x10), 1, X86InstInfo{"PBLENDVB",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x14), 1, X86InstInfo{"BLENDVPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x15), 1, X86InstInfo{"BLENDVPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x17), 1, X86InstInfo{"PTEST",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x1C), 1, X86InstInfo{"PABSB",      TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x1C), 1, X86InstInfo{"PABSB",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x1D), 1, X86InstInfo{"PABSW",      TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x1D), 1, X86InstInfo{"PABSW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0x1E), 1, X86InstInfo{"PABSD",      TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {OPD(PF_38_66,   0x1E), 1, X86InstInfo{"PABSD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_66,   0x20), 1, X86InstInfo{"PMOVSXBW",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x21), 1, X86InstInfo{"PMOVSXBD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x22), 1, X86InstInfo{"PMOVSXBQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x23), 1, X86InstInfo{"PMOVSXWD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x24), 1, X86InstInfo{"PMOVSXWQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x25), 1, X86InstInfo{"PMOVSXDQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x28), 1, X86InstInfo{"PMULDQ",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x29), 1, X86InstInfo{"PCMPEQQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x2A), 1, X86InstInfo{"MOVNTDQA",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x2B), 1, X86InstInfo{"PACKUSDW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_66,   0x30), 1, X86InstInfo{"PMOVZXBW",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x31), 1, X86InstInfo{"PMOVZXBD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x32), 1, X86InstInfo{"PMOVZXBQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x33), 1, X86InstInfo{"PMOVZXWD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x34), 1, X86InstInfo{"PMOVZXWQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x35), 1, X86InstInfo{"PMOVZXDQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x37), 1, X86InstInfo{"PCMPGTQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x38), 1, X86InstInfo{"PMINSB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x39), 1, X86InstInfo{"PMINSD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3A), 1, X86InstInfo{"PMINUW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3B), 1, X86InstInfo{"PMINUD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3C), 1, X86InstInfo{"PMAXSB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3D), 1, X86InstInfo{"PMAXSD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3E), 1, X86InstInfo{"PMAXUW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x3F), 1, X86InstInfo{"PMAXUD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_66,   0x40), 1, X86InstInfo{"PMULLD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0x41), 1, X86InstInfo{"PHMINPOSUW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_NONE, 0xC8), 1, X86InstInfo{"SHA1NEXTE",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0xC9), 1, X86InstInfo{"SHA1MSG1",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0xCA), 1, X86InstInfo{"SHA1MSG2",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_NONE, 0xCB), 1, X86InstInfo{"SHA256RNDS2", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0xCC), 1, X86InstInfo{"SHA256MSG1",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_NONE, 0xCD), 1, X86InstInfo{"SHA256MSG2",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_66,   0xDB), 1, X86InstInfo{"AESIMC",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0xDC), 1, X86InstInfo{"AESENC",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0xDD), 1, X86InstInfo{"AESENCLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0xDE), 1, X86InstInfo{"AESDEC",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(PF_38_66,   0xDF), 1, X86InstInfo{"AESDECLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(PF_38_NONE, 0xF0), 1, X86InstInfo{"MOVBE",      TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(PF_38_NONE, 0xF1), 1, X86InstInfo{"MOVBE",      TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},

    {OPD(PF_38_66, 0xF0), 1, X86InstInfo{"MOVBE",      TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(PF_38_66, 0xF1), 1, X86InstInfo{"MOVBE",      TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},

    {OPD(PF_38_F2,   0xF0), 1, X86InstInfo{"CRC32",      TYPE_INST, GenFlagsSizes(SIZE_DEF, SIZE_8BIT) | FLAGS_MODRM, 0}},
    {OPD(PF_38_F2,   0xF1), 1, X86InstInfo{"CRC32",      TYPE_INST, FLAGS_MODRM, 0}},
    {OPD(PF_38_66 | PF_38_F2,   0xF0), 1, X86InstInfo{"CRC32",      TYPE_INST, GenFlagsSizes(SIZE_DEF, SIZE_8BIT) | FLAGS_MODRM, 0}},
    {OPD(PF_38_66 | PF_38_F2,   0xF1), 1, X86InstInfo{"CRC32",      TYPE_INST, FLAGS_MODRM, 0}},

    {OPD(PF_38_66,   0xF6), 1, X86InstInfo{"ADCX",       TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY66, 0}},
    {OPD(PF_38_F3,   0xF6), 1, X86InstInfo{"ADOX",       TYPE_INST, FLAGS_MODRM, 0}},
  };
#undef OPD

  GenerateTable(Table.data(), H0F38Table, std::size(H0F38Table));

  IR::InstallToTable(Table, IR::OpDispatch_H0F38Table);
  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/H0F3ATables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/H0F3ATables.h"

#include <FEXCore/Core/Context.h>

#include <iterator>
#include <stdint.h>

namespace FEXCore::X86Tables {
using namespace InstFlags;
#define OPD(REX, prefix, opcode) ((REX << 9) | (prefix << 8) | opcode)
constexpr uint16_t PF_3A_NONE = 0;
constexpr uint16_t PF_3A_66   = 1;

enum H0F3A_LUT {
  ENTRY_1_3A_66_16,
  ENTRY_1_3A_66_22,
  ENTRY_MAX,
};

constexpr std::array<X86InstInfo[2], ENTRY_MAX> H0F3A_ArchSelect_LUT = {{
  // ENTRY_1_3A_66_16
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"PEXTRQ",          TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PExtrOp, IR::OpSize::i64Bit> }},
  },
  // ENTRY_1_3A_66_22
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"PINSRQ",          TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1, { .OpDispatch = &IR::OpDispatchBuilder::PINSROp<IR::OpSize::i64Bit> }},
  },
}};

constexpr std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> H0F3ATableOps = []() consteval {
  std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> Table{};
  auto TableGen = []<uint16_t REX>() consteval {
    constexpr U16U8InfoStruct Table[] = {
      {OPD(REX, PF_3A_NONE, 0x0F), 1, X86InstInfo{"PALIGNR",         TYPE_INST, GenFlagsSameSize(SIZE_64BIT)  | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
      {OPD(REX, PF_3A_66,   0x08), 1, X86InstInfo{"ROUNDPS",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x09), 1, X86InstInfo{"ROUNDPD",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x0A), 1, X86InstInfo{"ROUNDSS",         TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x0B), 1, X86InstInfo{"ROUNDSD",         TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x0C), 1, X86InstInfo{"BLENDPS",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,           1}},
      {OPD(REX, PF_3A_66,   0x0D), 1, X86InstInfo{"BLENDPD",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,           1}},
      {OPD(REX, PF_3A_66,   0x0E), 1, X86InstInfo{"PBLENDW",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,           1}},
      {OPD(REX, PF_3A_66,   0x0F), 1, X86InstInfo{"PALIGNR",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},

      {OPD(REX, PF_3A_66,   0x14), 1, X86InstInfo{"PEXTRB",          TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x15), 1, X86InstInfo{"PEXTRW",          TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x17), 1, X86InstInfo{"EXTRACTPS",       TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},

      {OPD(REX, PF_3A_66,   0x20), 1, X86InstInfo{"PINSRB",          TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR,           1}},
      {OPD(REX, PF_3A_66,   0x21), 1, X86InstInfo{"INSERTPS",        TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,           1}},
      {OPD(REX, PF_3A_66,   0x40), 1, X86InstInfo{"DPPS",            TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x41), 1, X86InstInfo{"DPPD",            TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x42), 1, X86InstInfo{"MPSADBW",         TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x44), 1, X86InstInfo{"PCLMULQDQ",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},

      {OPD(REX, PF_3A_66,   0x60), 1, X86InstInfo{"PCMPESTRM",       TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x61), 1, X86InstInfo{"PCMPESTRI",       TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x62), 1, X86InstInfo{"PCMPISTRM",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
      {OPD(REX, PF_3A_66,   0x63), 1, X86InstInfo{"PCMPISTRI",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},

      {OPD(REX, PF_3A_NONE, 0xCC), 1, X86InstInfo{"SHA1RNDS4",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},

      {OPD(REX, PF_3A_66,   0xDF), 1, X86InstInfo{"AESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
    };
    return std::to_array(Table);
  };
  constexpr auto H0F3ATable_IgnoresREX0 = TableGen.template operator()<0>();
  constexpr auto H0F3ATable_IgnoresREX1 = TableGen.template operator()<1>();

  GenerateTable(Table.data(), H0F3ATable_IgnoresREX0.data(), H0F3ATable_IgnoresREX0.size());
  GenerateTable(Table.data(), H0F3ATable_IgnoresREX1.data(), H0F3ATable_IgnoresREX1.size());

  constexpr U16U8InfoStruct TableNeedsREX0[] = {
    {OPD(0, PF_3A_66,   0x16), 1, X86InstInfo{"PEXTRD",          TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
    {OPD(0, PF_3A_66,   0x22), 1, X86InstInfo{"PINSRD",          TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR,           1}},
  };
  GenerateTable(Table.data(), TableNeedsREX0, std::size(TableNeedsREX0));

  constexpr U16U8InfoStruct TableNeedsREX1[] = {
    {OPD(1, PF_3A_66,   0x16), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = H0F3A_ArchSelect_LUT[ENTRY_1_3A_66_16] }}},
    {OPD(1, PF_3A_66,   0x22), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = H0F3A_ArchSelect_LUT[ENTRY_1_3A_66_22] }}},
  };
  GenerateTable(Table.data(), TableNeedsREX1, std::size(TableNeedsREX1));

  IR::InstallToTable(Table, IR::OpDispatch_H0F3ATableIgnoreREX);
  IR::InstallToTable(Table, IR::OpDispatch_H0F3ATableNeedsREX0);

  return Table;
}();
}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/PrimaryGroupTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/PrimaryGroupTables.h"

#include <FEXCore/Core/Context.h>

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;
enum PrimaryGroup_LUT {
  ENTRY_1_82_0,
  ENTRY_1_82_1,
  ENTRY_1_82_2,
  ENTRY_1_82_3,
  ENTRY_1_82_4,
  ENTRY_1_82_5,
  ENTRY_1_82_6,
  ENTRY_1_82_7,
  ENTRY_MAX,
};

constexpr std::array<X86InstInfo[2], ENTRY_MAX> PrimaryGroup_ArchSelect_LUT = {{
  {
    {"ADD",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::SecondaryALUOp }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"OR",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::SecondaryALUOp }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"ADC",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::ADCOp, 1> }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"SBB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::SBBOp, 1> }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"AND",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::SecondaryALUOp }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"SUB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::SecondaryALUOp }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"XOR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::SecondaryALUOp }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
  {
    {"CMP",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 1, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::CMPOp, 1> }},
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
  },
}};

constexpr std::array<X86InstInfo, MAX_INST_GROUP_TABLE_SIZE> PrimaryInstGroupOps = []() consteval {
  std::array<X86InstInfo, MAX_INST_GROUP_TABLE_SIZE> Table{};
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg))
  constexpr U16U8InfoStruct PrimaryGroupOpTable[] = {
    // GROUP_1 | 0x80 | reg
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 0), 1, X86InstInfo{"ADD",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 1), 1, X86InstInfo{"OR",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 2), 1, X86InstInfo{"ADC",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 3), 1, X86InstInfo{"SBB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 4), 1, X86InstInfo{"AND",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 5), 1, X86InstInfo{"SUB",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 6), 1, X86InstInfo{"XOR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x80), 7), 1, X86InstInfo{"CMP",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},

    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 0), 1, X86InstInfo{"ADD",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 1), 1, X86InstInfo{"OR",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 2), 1, X86InstInfo{"ADC",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 3), 1, X86InstInfo{"SBB",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 4), 1, X86InstInfo{"AND",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 5), 1, X86InstInfo{"SUB",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 6), 1, X86InstInfo{"XOR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x81), 7), 1, X86InstInfo{"CMP",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},

    // Duplicates the 0x80 opcode group
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 0), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_0] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 1), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_1] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 2), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_2] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 3), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_3] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 4), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_4] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 5), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_5] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 6), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_6] }}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x82), 7), 1, X86InstInfo{"",  TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = PrimaryGroup_ArchSelect_LUT[ENTRY_1_82_7] }}},

    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 0), 1, X86InstInfo{"ADD",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 1), 1, X86InstInfo{"OR",   TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 2), 1, X86InstInfo{"ADC",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 3), 1, X86InstInfo{"SBB",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 4), 1, X86InstInfo{"AND",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 5), 1, X86InstInfo{"SUB",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 6), 1, X86InstInfo{"XOR",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},
    {OPD(TYPE_GROUP_1, OpToIndex(0x83), 7), 1, X86InstInfo{"CMP",  TYPE_INST, FLAGS_SRC_SEXT | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     1}},

    // GROUP 2
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 0), 1, X86InstInfo{"ROL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 1), 1, X86InstInfo{"ROR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 2), 1, X86InstInfo{"RCL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 3), 1, X86InstInfo{"RCR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 4), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 5), 1, X86InstInfo{"SHR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 6), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC0), 7), 1, X86InstInfo{"SAR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},

    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 0), 1, X86InstInfo{"ROL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 1), 1, X86InstInfo{"ROR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 2), 1, X86InstInfo{"RCL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 3), 1, X86InstInfo{"RCR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 4), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 5), 1, X86InstInfo{"SHR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 6), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xC1), 7), 1, X86InstInfo{"SAR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      1}},

    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 0), 1, X86InstInfo{"ROL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 1), 1, X86InstInfo{"ROR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 2), 1, X86InstInfo{"RCL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 3), 1, X86InstInfo{"RCR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 4), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 5), 1, X86InstInfo{"SHR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 6), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD0), 7), 1, X86InstInfo{"SAR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},

    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 0), 1, X86InstInfo{"ROL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 1), 1, X86InstInfo{"ROR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 2), 1, X86InstInfo{"RCL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 3), 1, X86InstInfo{"RCR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 4), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 5), 1, X86InstInfo{"SHR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 6), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD1), 7), 1, X86InstInfo{"SAR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},

    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 0), 1, X86InstInfo{"ROL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 1), 1, X86InstInfo{"ROR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 2), 1, X86InstInfo{"RCL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 3), 1, X86InstInfo{"RCR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 4), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 5), 1, X86InstInfo{"SHR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 6), 1, X86InstInfo{"SHL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD2), 7), 1, X86InstInfo{"SAR",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                   0}},

    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 0), 1, X86InstInfo{"ROL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 1), 1, X86InstInfo{"ROR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 2), 1, X86InstInfo{"RCL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 3), 1, X86InstInfo{"RCR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 4), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 5), 1, X86InstInfo{"SHR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 6), 1, X86InstInfo{"SHL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},
    {OPD(TYPE_GROUP_2, OpToIndex(0xD3), 7), 1, X86InstInfo{"SAR",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX,                                   0}},

    // GROUP 3
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 0), 1, X86InstInfo{"TEST", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 1), 1, X86InstInfo{"TEST", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      1}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 2), 1, X86InstInfo{"NOT",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 3), 1, X86InstInfo{"NEG",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 4), 1, X86InstInfo{"MUL",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 5), 1, X86InstInfo{"IMUL", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 6), 1, X86InstInfo{"DIV",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF6), 7), 1, X86InstInfo{"IDIV", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                      0}},

    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 0), 1, X86InstInfo{"TEST", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 1), 1, X86InstInfo{"TEST", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT64BIT | FLAGS_DISPLACE_SIZE_DIV_2,                          4}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 2), 1, X86InstInfo{"NOT",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 3), 1, X86InstInfo{"NEG",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 4), 1, X86InstInfo{"MUL",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 5), 1, X86InstInfo{"IMUL", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 6), 1, X86InstInfo{"DIV",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},
    {OPD(TYPE_GROUP_3, OpToIndex(0xF7), 7), 1, X86InstInfo{"IDIV", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                      0}},

    // GROUP 4
    {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 0), 1, X86InstInfo{"INC",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     0}},
    {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 1), 1, X86InstInfo{"DEC",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                     0}},
    {OPD(TYPE_GROUP_4, OpToIndex(0xFE), 2), 6, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                                       0}},

    // GROUP 5
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 0), 1, X86InstInfo{"INC",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                     0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 1), 1, X86InstInfo{"DEC",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                     0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 2), 1, X86InstInfo{"CALL",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END | FLAGS_CALL , 0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 3), 1, X86InstInfo{"CALLF", TYPE_INST, FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY | FLAGS_BLOCK_END,                  0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 4), 1, X86InstInfo{"JMP",   TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_BLOCK_END , 0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 5), 1, X86InstInfo{"JMPF",  TYPE_INST, FLAGS_SETS_RIP | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY | FLAGS_BLOCK_END,                  0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 6), 1, X86InstInfo{"PUSH",  TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM,                                                     0}},
    {OPD(TYPE_GROUP_5, OpToIndex(0xFF), 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                                       0}},

    // GROUP 11
    {OPD(TYPE_GROUP_11, OpToIndex(0xC6), 0), 1, X86InstInfo{"MOV",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST  | FLAGS_SRC_SEXT,                   1}},
    {OPD(TYPE_GROUP_11, OpToIndex(0xC6), 1), 5, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,                                                       0}},
    {OPD(TYPE_GROUP_11, OpToIndex(0xC6), 7), 1, X86InstInfo{"XABORT", TYPE_INST, FLAGS_MODRM,                                                       1}},
    {OPD(TYPE_GROUP_11, OpToIndex(0xC7), 0), 1, X86InstInfo{"MOV",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2,                                   4}},
    {OPD(TYPE_GROUP_11, OpToIndex(0xC7), 1), 5, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,                                                       0}},
    {OPD(TYPE_GROUP_11, OpToIndex(0xC7), 7), 1, X86InstInfo{"XBEGIN", TYPE_INST, FLAGS_MODRM | FLAGS_SRC_SEXT | FLAGS_SETS_RIP | FLAGS_DISPLACE_SIZE_DIV_2,                                                       4}},
  };

  GenerateTable(Table.data(), PrimaryGroupOpTable, std::size(PrimaryGroupOpTable));

  IR::InstallToTable(Table, IR::OpDispatch_PrimaryGroupTables);
  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/SecondaryGroupTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/SecondaryGroupTables.h"

#include <iterator>
#include <stdint.h>

namespace FEXCore::X86Tables {
using namespace InstFlags;
constexpr uint16_t PF_NONE = 0;
constexpr uint16_t PF_F3   = 1;
constexpr uint16_t PF_66   = 2;
constexpr uint16_t PF_F2   = 3;
#define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_6) << 5) | (prefix) << 3 | (Reg))

enum SecondGroup_LUT {
  ENTRY_15_F3_0,
  ENTRY_15_F3_1,
  ENTRY_15_F3_2,
  ENTRY_15_F3_3,
  ENTRY_MAX,
};

constexpr std::array<X86InstInfo[2], ENTRY_MAX> SecondGroup_ArchSelect_LUT = {{
  // ENTRY_15_F3_0
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"RDFSBASE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::ReadSegmentReg, IR::OpDispatchBuilder::Segment::FS> } },
  },
  // ENTRY_15_F3_1
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"RDGSBASE", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::ReadSegmentReg, IR::OpDispatchBuilder::Segment::GS> } },
  },
  // ENTRY_15_F3_2
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"WRFSBASE", TYPE_INST, GenFlagsDstSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::WriteSegmentReg, IR::OpDispatchBuilder::Segment::FS> } },
  },
  // ENTRY_15_F3_3
  {
    {"", TYPE_INVALID, FLAGS_NONE, 0, { .OpDispatch = nullptr } },
    {"WRGSBASE", TYPE_INST, GenFlagsDstSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::WriteSegmentReg, IR::OpDispatchBuilder::Segment::GS> } },
  },
}};

constexpr std::array<X86InstInfo, MAX_INST_SECOND_GROUP_TABLE_SIZE> SecondInstGroupOps = []() consteval {
  std::array<X86InstInfo, MAX_INST_SECOND_GROUP_TABLE_SIZE> Table{};
  constexpr U16U8InfoStruct SecondaryExtensionOpTable[] = {
    // GROUP 1
    // GROUP 2
    // GROUP 3
    // GROUP 4
    // GROUP 5
    // Pulls from other MODRM table

    // GROUP 6
    {OPD(TYPE_GROUP_6, PF_NONE, 0), 1, X86InstInfo{"SLDT",  TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 1), 1, X86InstInfo{"STR",   TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST,  0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 2), 1, X86InstInfo{"LLDT",  TYPE_PRIV, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 3), 1, X86InstInfo{"LTR",   TYPE_INST, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 4), 1, X86InstInfo{"VERR",  TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 5), 1, X86InstInfo{"VERW",  TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 6), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,    0}},
    {OPD(TYPE_GROUP_6, PF_NONE, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,    0}},

    {OPD(TYPE_GROUP_6, PF_F3, 0), 1, X86InstInfo{"SLDT",    TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPD(TYPE_GROUP_6, PF_F3, 1), 1, X86InstInfo{"STR",     TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST,  0}},
    {OPD(TYPE_GROUP_6, PF_F3, 2), 1, X86InstInfo{"LLDT",    TYPE_PRIV, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_F3, 3), 1, X86InstInfo{"LTR",     TYPE_INST, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_F3, 4), 1, X86InstInfo{"VERR",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_F3, 5), 1, X86InstInfo{"VERW",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_F3, 6), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},
    {OPD(TYPE_GROUP_6, PF_F3, 7), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},

    {OPD(TYPE_GROUP_6, PF_66, 0), 1, X86InstInfo{"SLDT",    TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPD(TYPE_GROUP_6, PF_66, 1), 1, X86InstInfo{"STR",     TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST,  0}},
    {OPD(TYPE_GROUP_6, PF_66, 2), 1, X86InstInfo{"LLDT",    TYPE_PRIV, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_66, 3), 1, X86InstInfo{"LTR",     TYPE_INST, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_66, 4), 1, X86InstInfo{"VERR",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_66, 5), 1, X86InstInfo{"VERW",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_66, 6), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},
    {OPD(TYPE_GROUP_6, PF_66, 7), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},

    {OPD(TYPE_GROUP_6, PF_F2, 0), 1, X86InstInfo{"SLDT",    TYPE_UNDEC, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPD(TYPE_GROUP_6, PF_F2, 1), 1, X86InstInfo{"STR",     TYPE_PRIV, FLAGS_MODRM | FLAGS_SF_MOD_DST,  0}},
    {OPD(TYPE_GROUP_6, PF_F2, 2), 1, X86InstInfo{"LLDT",    TYPE_PRIV, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_F2, 3), 1, X86InstInfo{"LTR",     TYPE_INST, FLAGS_NONE,       0}},
    {OPD(TYPE_GROUP_6, PF_F2, 4), 1, X86InstInfo{"VERR",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_F2, 5), 1, X86InstInfo{"VERW",    TYPE_UNDEC, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_6, PF_F2, 6), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},
    {OPD(TYPE_GROUP_6, PF_F2, 7), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,    0}},

    // GROUP 7
    {OPD(TYPE_GROUP_7, PF_NONE, 0), 1, X86InstInfo{"SGDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,         0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 1), 1, X86InstInfo{"SIDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,         0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 2), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 3), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE, 0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,          0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 5), 1, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,            0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 6), 1, X86InstInfo{"LMSW", TYPE_INST, FLAGS_MODRM,          0}},
    {OPD(TYPE_GROUP_7, PF_NONE, 7), 1, X86InstInfo{"INVLPG", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},

    {OPD(TYPE_GROUP_7, PF_F3, 0), 1, X86InstInfo{"SGDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_F3, 1), 1, X86InstInfo{"SIDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_F3, 2), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_F3, 3), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_F3, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,            0}},
    {OPD(TYPE_GROUP_7, PF_F3, 5), 1, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,              0}},
    {OPD(TYPE_GROUP_7, PF_F3, 6), 1, X86InstInfo{"LMSW", TYPE_INST, FLAGS_MODRM,            0}},
    {OPD(TYPE_GROUP_7, PF_F3, 7), 1, X86InstInfo{"INVLPG", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,   0}},

    {OPD(TYPE_GROUP_7, PF_66, 0), 1, X86InstInfo{"SGDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_66, 1), 1, X86InstInfo{"SIDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_66, 2), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_66, 3), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_66, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,            0}},
    {OPD(TYPE_GROUP_7, PF_66, 5), 1, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,              0}},
    {OPD(TYPE_GROUP_7, PF_66, 6), 1, X86InstInfo{"LMSW", TYPE_INST, FLAGS_MODRM,            0}},
    {OPD(TYPE_GROUP_7, PF_66, 7), 1, X86InstInfo{"INVLPG", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,   0}},

    {OPD(TYPE_GROUP_7, PF_F2, 0), 1, X86InstInfo{"SGDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_F2, 1), 1, X86InstInfo{"SIDT", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,           0}},
    {OPD(TYPE_GROUP_7, PF_F2, 2), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_F2, 3), 1, X86InstInfo{"",     TYPE_SECOND_GROUP_MODRM, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_7, PF_F2, 4), 1, X86InstInfo{"SMSW", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,            0}},
    {OPD(TYPE_GROUP_7, PF_F2, 5), 1, X86InstInfo{"",     TYPE_INVALID, FLAGS_NONE,              0}},
    {OPD(TYPE_GROUP_7, PF_F2, 6), 1, X86InstInfo{"LMSW", TYPE_INST, FLAGS_MODRM,            0}},
    {OPD(TYPE_GROUP_7, PF_F2, 7), 1, X86InstInfo{"INVLPG", TYPE_SECOND_GROUP_MODRM, FLAGS_MODRM | FLAGS_SF_MOD_DST,   0}},

    // GROUP 8
    {OPD(TYPE_GROUP_8, PF_NONE, 0), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_8, PF_NONE, 1), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_8, PF_NONE, 2), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_8, PF_NONE, 3), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_8, PF_NONE, 4), 1, X86InstInfo{"BT",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1}},
    {OPD(TYPE_GROUP_8, PF_NONE, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1}},
    {OPD(TYPE_GROUP_8, PF_NONE, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1}},
    {OPD(TYPE_GROUP_8, PF_NONE, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST, 1}},

    {OPD(TYPE_GROUP_8, PF_F3, 0), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F3, 1), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F3, 2), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F3, 3), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F3, 4), 1, X86InstInfo{"BT",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F3, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F3, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F3, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},

    {OPD(TYPE_GROUP_8, PF_66, 0), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_66, 1), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_66, 2), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_66, 3), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_66, 4), 1, X86InstInfo{"BT",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_66, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_66, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_66, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},

    {OPD(TYPE_GROUP_8, PF_F2, 0), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F2, 1), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F2, 2), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F2, 3), 1, X86InstInfo{"",    TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_8, PF_F2, 4), 1, X86InstInfo{"BT",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F2, 5), 1, X86InstInfo{"BTS", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F2, 6), 1, X86InstInfo{"BTR", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},
    {OPD(TYPE_GROUP_8, PF_F2, 7), 1, X86InstInfo{"BTC", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,   1}},

    // GROUP 9

    // AMD documentation is a bit broken for Group 9
    // Claims the entire group has n/a applied for the prefix (Implies that the prefix is ignored)
    // RDRAND/RDSEED only work with no prefix (Other than 66h)
    // CMPXCHG8B/16B works with all prefixes
    // Tooling fails to decode CMPXCHG with prefix
    {OPD(TYPE_GROUP_9, PF_NONE, 0), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 1), 1, X86InstInfo{"CMPXCHG8B/16B", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 2), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 3), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 4), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 5), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,   0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 6), 1, X86InstInfo{"RDRAND",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_NONE, 7), 1, X86InstInfo{"RDSEED",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0}},

    {OPD(TYPE_GROUP_9, PF_F3, 0), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 1), 1, X86InstInfo{"CMPXCHG8B/16B", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_F3, 2), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 3), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 4), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 5), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 6), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F3, 7), 1, X86InstInfo{"RDPID",      TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0}},

    {OPD(TYPE_GROUP_9, PF_66, 0), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_66, 1), 1, X86InstInfo{"CMPXCHG8B/16B", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_66, 2), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_66, 3), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_66, 4), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_66, 5), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_66, 6), 1, X86InstInfo{"RDRAND",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_66, 7), 1, X86InstInfo{"RDSEED",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY, 0}},

    {OPD(TYPE_GROUP_9, PF_F2, 0), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 1), 1, X86InstInfo{"CMPXCHG8B/16B", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_9, PF_F2, 2), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 3), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 4), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 5), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 6), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},
    {OPD(TYPE_GROUP_9, PF_F2, 7), 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,     0}},

    // GROUP 10
    {OPD(TYPE_GROUP_10, PF_NONE, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},
    {OPD(TYPE_GROUP_10, PF_NONE, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END, 0}},

    {OPD(TYPE_GROUP_10, PF_F3, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F3, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},

    {OPD(TYPE_GROUP_10, PF_66, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_66, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},

    {OPD(TYPE_GROUP_10, PF_F2, 0), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 1), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 2), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 3), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 4), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 5), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 6), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},
    {OPD(TYPE_GROUP_10, PF_F2, 7), 1, X86InstInfo{"UD1", TYPE_INST, FLAGS_BLOCK_END,   0}},

    // GROUP 12
    {OPD(TYPE_GROUP_12, PF_NONE, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_12, PF_NONE, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_12, PF_NONE, 2), 1, X86InstInfo{"PSRLW", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_12, PF_NONE, 3), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_12, PF_NONE, 4), 1, X86InstInfo{"PSRAW", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_12, PF_NONE, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_12, PF_NONE, 6), 1, X86InstInfo{"PSLLW", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_12, PF_NONE, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},

    {OPD(TYPE_GROUP_12, PF_66, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_12, PF_66, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_12, PF_66, 2), 1, X86InstInfo{"PSRLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_12, PF_66, 3), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_12, PF_66, 4), 1, X86InstInfo{"PSRAW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_12, PF_66, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_12, PF_66, 6), 1, X86InstInfo{"PSLLW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_12, PF_66, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},

    {OPD(TYPE_GROUP_12, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    {OPD(TYPE_GROUP_12, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_12, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    // GROUP 13
    {OPD(TYPE_GROUP_13, PF_NONE, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_13, PF_NONE, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_13, PF_NONE, 2), 1, X86InstInfo{"PSRLD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_13, PF_NONE, 3), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_13, PF_NONE, 4), 1, X86InstInfo{"PSRAD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_13, PF_NONE, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_13, PF_NONE, 6), 1, X86InstInfo{"PSLLD", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_13, PF_NONE, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},

    {OPD(TYPE_GROUP_13, PF_66, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_13, PF_66, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_13, PF_66, 2), 1, X86InstInfo{"PSRLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_13, PF_66, 3), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_13, PF_66, 4), 1, X86InstInfo{"PSRAD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_13, PF_66, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_13, PF_66, 6), 1, X86InstInfo{"PSLLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_13, PF_66, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},

    {OPD(TYPE_GROUP_13, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    {OPD(TYPE_GROUP_13, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_13, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    // GROUP 14
    {OPD(TYPE_GROUP_14, PF_NONE, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_14, PF_NONE, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_14, PF_NONE, 2), 1, X86InstInfo{"PSRLQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_14, PF_NONE, 3), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_14, PF_NONE, 4), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_14, PF_NONE, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},
    {OPD(TYPE_GROUP_14, PF_NONE, 6), 1, X86InstInfo{"PSLLQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {OPD(TYPE_GROUP_14, PF_NONE, 7), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                      0}},

    {OPD(TYPE_GROUP_14, PF_66, 0), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_14, PF_66, 1), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_14, PF_66, 2), 1, X86InstInfo{"PSRLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_14, PF_66, 3), 1, X86InstInfo{"PSRLDQ",TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_14, PF_66, 4), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_14, PF_66, 5), 1, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE,                                        0}},
    {OPD(TYPE_GROUP_14, PF_66, 6), 1, X86InstInfo{"PSLLQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},
    {OPD(TYPE_GROUP_14, PF_66, 7), 1, X86InstInfo{"PSLLDQ",TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,  1}},

    {OPD(TYPE_GROUP_14, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    {OPD(TYPE_GROUP_14, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},
    {OPD(TYPE_GROUP_14, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                             0}},

    // GROUP 15
    {OPD(TYPE_GROUP_15, PF_NONE, 0), 1, X86InstInfo{"FXSAVE",          TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,       0}}, // MMX/x87
    {OPD(TYPE_GROUP_15, PF_NONE, 1), 1, X86InstInfo{"FXRSTOR",         TYPE_INST, FLAGS_MODRM,       0}}, // MMX/x87
    {OPD(TYPE_GROUP_15, PF_NONE, 2), 1, X86InstInfo{"LDMXCSR",         TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_15, PF_NONE, 3), 1, X86InstInfo{"STMXCSR",         TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_15, PF_NONE, 4), 1, X86InstInfo{"XSAVE",           TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},
    {OPD(TYPE_GROUP_15, PF_NONE, 5), 1, X86InstInfo{"LFENCE/XRSTOR",   TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},
    {OPD(TYPE_GROUP_15, PF_NONE, 6), 1, X86InstInfo{"MFENCE/XSAVEOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},
    {OPD(TYPE_GROUP_15, PF_NONE, 7), 1, X86InstInfo{"SFENCE/CLFLUSH",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},

    {OPD(TYPE_GROUP_15, PF_F3, 0), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = SecondGroup_ArchSelect_LUT[ENTRY_15_F3_0] }}},
    {OPD(TYPE_GROUP_15, PF_F3, 1), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = SecondGroup_ArchSelect_LUT[ENTRY_15_F3_1] }}},
    {OPD(TYPE_GROUP_15, PF_F3, 2), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = SecondGroup_ArchSelect_LUT[ENTRY_15_F3_2] }}},
    {OPD(TYPE_GROUP_15, PF_F3, 3), 1, X86InstInfo{"", TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = SecondGroup_ArchSelect_LUT[ENTRY_15_F3_3] }}},
    {OPD(TYPE_GROUP_15, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F3, 5), 1, X86InstInfo{"INCSSPQ", TYPE_INST, FLAGS_MODRM,                    0}},
    {OPD(TYPE_GROUP_15, PF_F3, 6), 1, X86InstInfo{"UMONITOR/CLRSSBSY", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,       0}},
    {OPD(TYPE_GROUP_15, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},

    {OPD(TYPE_GROUP_15, PF_66, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_66, 6), 1, X86InstInfo{"CLWB", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},
    {OPD(TYPE_GROUP_15, PF_66, 7), 1, X86InstInfo{"CLFLUSHOPT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,      0}},

    {OPD(TYPE_GROUP_15, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},
    {OPD(TYPE_GROUP_15, PF_F2, 6), 1, X86InstInfo{"UMWAIT", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST,       0}},
    {OPD(TYPE_GROUP_15, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                    0}},

    // GROUP 16
    // AMD documentation claims again that this entire group is n/a to prefix
    // Tooling once again fails to disassemble oens with the prefix. Disable until proven otherwise
    {OPD(TYPE_GROUP_16, PF_NONE, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 1), 1, X86InstInfo{"PREFETCH T0",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 2), 1, X86InstInfo{"PREFETCH T1",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 3), 1, X86InstInfo{"PREFETCH T2",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 4), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 5), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 6), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM, 0}},
    {OPD(TYPE_GROUP_16, PF_NONE, 7), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM, 0}},

    {OPD(TYPE_GROUP_16, PF_F3, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 1), 1, X86InstInfo{"PREFETCH T0",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 2), 1, X86InstInfo{"PREFETCH T1",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 3), 1, X86InstInfo{"PREFETCH T2",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 4), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 5), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 6), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F3, 7), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},

    {OPD(TYPE_GROUP_16, PF_66, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 1), 1, X86InstInfo{"PREFETCH T0",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 2), 1, X86InstInfo{"PREFETCH T1",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 3), 1, X86InstInfo{"PREFETCH T2",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 4), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 5), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 6), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_66, 7), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},

    {OPD(TYPE_GROUP_16, PF_F2, 0), 1, X86InstInfo{"PREFETCH NTA", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 1), 1, X86InstInfo{"PREFETCH T0",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 2), 1, X86InstInfo{"PREFETCH T1",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 3), 1, X86InstInfo{"PREFETCH T2",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 4), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 5), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 6), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},
    {OPD(TYPE_GROUP_16, PF_F2, 7), 1, X86InstInfo{"NOP",          TYPE_INST, FLAGS_MODRM,   0}},

    // GROUP 17
    {OPD(TYPE_GROUP_17, PF_NONE, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},
    {OPD(TYPE_GROUP_17, PF_NONE, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                          0}},

    {OPD(TYPE_GROUP_17, PF_F3, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F3, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},

    {OPD(TYPE_GROUP_17, PF_66, 0), 1, X86InstInfo{"EXTRQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 2}},
    {OPD(TYPE_GROUP_17, PF_66, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_66, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},

    {OPD(TYPE_GROUP_17, PF_F2, 0), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 1), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 2), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 3), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 4), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 6), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},
    {OPD(TYPE_GROUP_17, PF_F2, 7), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE,                                            0}},

    // GROUP P
    // AMD documentation claims n/a for all instructions in Group P
    // It also claims that instructions /2, /4, /5, /6, /7 all alias to /0
    // It claims that /3 is still Prefetch Mod
    // Tooling fails to decode past the /2 encoding but runs fine in hardware
    // Hardware also runs all the prefixes correctly
    {OPD(TYPE_GROUP_P, PF_NONE, 0), 1, X86InstInfo{"PREFETCH Ex",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_GROUP_P, PF_NONE, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY, 0}},

    {OPD(TYPE_GROUP_P, PF_F3, 0), 1, X86InstInfo{"PREFETCH Ex",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F3, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},

    {OPD(TYPE_GROUP_P, PF_66, 0), 1, X86InstInfo{"PREFETCH Ex",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_66, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},

    {OPD(TYPE_GROUP_P, PF_F2, 0), 1, X86InstInfo{"PREFETCH Ex",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 1), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 2), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 3), 1, X86InstInfo{"PREFETCH Mod", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 4), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 5), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 6), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
    {OPD(TYPE_GROUP_P, PF_F2, 7), 1, X86InstInfo{"PREFETCH Res", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY,   0}},
  };
#undef OPD

  GenerateTable(Table.data(), SecondaryExtensionOpTable, std::size(SecondaryExtensionOpTable));

  IR::InstallToTable(Table, IR::OpDispatch_SecondaryGroupTables);
  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/SecondaryModRMTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/SecondaryModRMTables.h"

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;
constexpr std::array<X86InstInfo, MAX_SECOND_MODRM_TABLE_SIZE> SecondModRMTableOps = []() consteval {
  std::array<X86InstInfo, MAX_SECOND_MODRM_TABLE_SIZE> Table{};
  constexpr U8U8InfoStruct SecondaryModRMExtensionOpTable[] = {
    // REG /1
    {((0 << 3) | 0), 1, X86InstInfo{"MONITOR",  TYPE_INST,    FLAGS_NONE, 0}},
    {((0 << 3) | 1), 1, X86InstInfo{"MWAIT",    TYPE_INST,    FLAGS_NONE, 0}},
    {((0 << 3) | 2), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((0 << 3) | 3), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((0 << 3) | 4), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((0 << 3) | 5), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((0 << 3) | 6), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((0 << 3) | 7), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},

    // REG /2
    {((1 << 3) | 0), 1, X86InstInfo{"XGETBV",   TYPE_INST,    FLAGS_NONE, 0}},
    {((1 << 3) | 1), 1, X86InstInfo{"XSETBV",   TYPE_PRIV,    FLAGS_NONE, 0}},
    {((1 << 3) | 2), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((1 << 3) | 3), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((1 << 3) | 4), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((1 << 3) | 5), 1, X86InstInfo{"XEND",     TYPE_INST, FLAGS_NONE, 0}},
    {((1 << 3) | 6), 1, X86InstInfo{"XTEST",    TYPE_INST, FLAGS_NONE, 0}},
    {((1 << 3) | 7), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},

    // REG /3
    {((2 << 3) | 0), 1, X86InstInfo{"VMRUN",    TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 1), 1, X86InstInfo{"VMMCALL",  TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 2), 1, X86InstInfo{"VMLOAD",   TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 3), 1, X86InstInfo{"VMSAVE",   TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 4), 1, X86InstInfo{"STGI",     TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 5), 1, X86InstInfo{"CLGI",     TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 6), 1, X86InstInfo{"SKINIT",   TYPE_PRIV,    FLAGS_NONE, 0}},
    {((2 << 3) | 7), 1, X86InstInfo{"INVLPGA",  TYPE_INST,    FLAGS_NONE, 0}},

    // REG /7
    {((3 << 3) | 0), 1, X86InstInfo{"SWAPGS",   TYPE_INST,    FLAGS_NONE, 0}},
    {((3 << 3) | 1), 1, X86InstInfo{"RDTSCP",   TYPE_INST,    FLAGS_NONE, 0}},
    {((3 << 3) | 2), 1, X86InstInfo{"MONITORX", TYPE_PRIV,    FLAGS_NONE, 0}},
    {((3 << 3) | 3), 1, X86InstInfo{"MWAITX",   TYPE_PRIV,    FLAGS_NONE, 0}},
    {((3 << 3) | 4), 1, X86InstInfo{"CLZERO",   TYPE_INST,    GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SF_SRC_RAX | FLAGS_DEBUG_MEM_ACCESS, 0}},
    {((3 << 3) | 5), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((3 << 3) | 6), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
    {((3 << 3) | 7), 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE, 0}},
  };

  GenerateTable(Table.data(), SecondaryModRMExtensionOpTable, std::size(SecondaryModRMExtensionOpTable));

  IR::InstallToTable(Table, IR::OpDispatch_SecondaryModRMTables);
  return Table;
}();

}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/SecondaryTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/SecondaryTables.h"

#include <FEXCore/Core/Context.h>

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;

enum Secondary_LUT {
  ENTRY_05,
  ENTRY_A0,
  ENTRY_A1,
  ENTRY_A8,
  ENTRY_A9,
  ENTRY_MAX,
};

constexpr std::array<X86InstInfo[2], ENTRY_MAX> Secondary_ArchSelect_LUT = {{
  {
    {"SYSCALL", TYPE_INST, DEFAULT_SYSCALL_FLAGS, 0, { .OpDispatch = &IR::OpDispatchBuilder::NOPOp } },
    {"SYSCALL", TYPE_INST, DEFAULT_SYSCALL_FLAGS, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::SyscallOp, true> } },
  },
  {
    {"PUSH FS", TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX> } },
    {"PUSH FS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX> } },
  },
  {
    {"POP FS",  TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_DEF) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX> } },
    {"POP FS",  TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_FS_PREFIX> } },
  },
  {
    {"PUSH GS", TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX> } },
    {"PUSH GS", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::PUSHSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX> } },
  },
  {
    {"POP GS",  TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_DEF) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX> } },
    {"POP GS",  TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_64BIT) | FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .OpDispatch = &IR::OpDispatchBuilder::Bind<&IR::OpDispatchBuilder::POPSegmentOp, FEXCore::X86Tables::DecodeFlags::FLAG_GS_PREFIX> } },
  },
}};

constexpr std::array<X86InstInfo, MAX_SECOND_TABLE_SIZE> SecondBaseOps = []() consteval {
  std::array<X86InstInfo, MAX_SECOND_TABLE_SIZE> Table{};

  constexpr U8U8InfoStruct TwoByteOpTable[] = {
    // Instructions
    {0x00, 1, X86InstInfo{"",           TYPE_GROUP_6, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                                                 0}},
    {0x01, 1, X86InstInfo{"",           TYPE_GROUP_7, FLAGS_NO_OVERLAY,                                                                                 0}},
    // These two load segment register data
    {0x02, 1, X86InstInfo{"LAR",        TYPE_UNDEC, FLAGS_NO_OVERLAY,                                                                                   0}},
    {0x03, 1, X86InstInfo{"LSL",        TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                                                    0}},
    {0x04, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                 0}},
    {0x05, 1, X86InstInfo{"",           TYPE_ARCH_DISPATCHER, FLAGS_NONE, 0, { .Indirect = Secondary_ArchSelect_LUT[ENTRY_05] }}},
    {0x06, 1, X86InstInfo{"CLTS",       TYPE_INST, FLAGS_NO_OVERLAY,                                                                                    0}},
    {0x07, 1, X86InstInfo{"SYSRET",     TYPE_INST, FLAGS_NO_OVERLAY,                                                                                    0}},
    {0x08, 1, X86InstInfo{"INVD",       TYPE_PRIV, FLAGS_NO_OVERLAY,                                                                                    0}},
    {0x09, 1, X86InstInfo{"WBINVD",     TYPE_PRIV, FLAGS_NO_OVERLAY,                                                                                    0}},
    {0x0A, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                 0}},
    {0x0B, 1, X86InstInfo{"UD2",        TYPE_INST, FLAGS_BLOCK_END | FLAGS_NO_OVERLAY,                                                    0}},
    {0x0C, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                 0}},
    {0x0D, 1, X86InstInfo{"",           TYPE_GROUP_P, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                                                 0}},
    {0x0E, 1, X86InstInfo{"FEMMS",      TYPE_INST, FLAGS_NO_OVERLAY,                                                            0}},
    {0x0F, 1, X86InstInfo{"",           TYPE_3DNOW_TABLE, FLAGS_NO_OVERLAY,                                                                             0}},

    {0x10, 1, X86InstInfo{"MOVUPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x11, 1, X86InstInfo{"MOVUPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                                                 0}},
    {0x12, 1, X86InstInfo{"MOVLPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                              0}},
    {0x13, 1, X86InstInfo{"MOVLPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                              0}},
    {0x14, 1, X86InstInfo{"UNPCKLPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x15, 1, X86InstInfo{"UNPCKHPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x16, 1, X86InstInfo{"MOVLHPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x17, 1, X86InstInfo{"MOVHPS",     TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {0x18, 1, X86InstInfo{"",           TYPE_GROUP_16, FLAGS_NO_OVERLAY,                                                                                      0}},
    {0x19, 7, X86InstInfo{"NOP",        TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                                                     0}},

    {0x20, 2, X86InstInfo{"MOV",        TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_NO_OVERLAY,                                                     0}},
    {0x22, 2, X86InstInfo{"MOV",        TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_NO_OVERLAY,                                                     0}},
    {0x24, 4, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                                                       0}},
    {0x28, 1, X86InstInfo{"MOVAPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x29, 1, X86InstInfo{"MOVAPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                                                 0}},
    {0x2A, 1, X86InstInfo{"CVTPI2PS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_SRC,                                                   0}},
    {0x2B, 1, X86InstInfo{"MOVNTPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                              0}},
    {0x2C, 1, X86InstInfo{"CVTTPS2PI",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_DST,                                                   0}},
    {0x2D, 1, X86InstInfo{"CVTPS2PI",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_DST,                                                   0}},
    {0x2E, 1, X86InstInfo{"UCOMISS",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                                   0}},
    {0x2F, 1, X86InstInfo{"COMISS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                                   0}},

    {0x30, 1, X86InstInfo{"WRMSR",      TYPE_INST, FLAGS_NO_OVERLAY,                                                                             0}},
    {0x31, 1, X86InstInfo{"RDTSC",      TYPE_INST, FLAGS_NO_OVERLAY,                                                               0}},
    {0x32, 1, X86InstInfo{"RDMSR",      TYPE_INST, FLAGS_NO_OVERLAY,                                                                             0}},
    {0x33, 1, X86InstInfo{"RDPMC",      TYPE_INST, FLAGS_NO_OVERLAY,                                                                             0}},
    {0x34, 1, X86InstInfo{"SYSENTER",   TYPE_INST, FLAGS_NO_OVERLAY,                                                                             0}},
    {0x35, 1, X86InstInfo{"SYSEXIT",    TYPE_INST, FLAGS_NO_OVERLAY,                                                                             0}},
    {0x36, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                          0}},
    {0x37, 1, X86InstInfo{"GETSEC",     TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                          0}},
    {0x38, 1, X86InstInfo{"",           TYPE_0F38_TABLE, FLAGS_NO_OVERLAY,                                                                       0}},
    {0x39, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                          0}},
    {0x3A, 1, X86InstInfo{"",           TYPE_0F3A_TABLE, FLAGS_NO_OVERLAY,                                                                       0}},
    {0x3B, 3, X86InstInfo{"",           TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                          0}},

    {0x40, 1, X86InstInfo{"CMOVO",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x41, 1, X86InstInfo{"CMOVNO",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x42, 1, X86InstInfo{"CMOVB",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x43, 1, X86InstInfo{"CMOVNB",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x44, 1, X86InstInfo{"CMOVZ",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x45, 1, X86InstInfo{"CMOVNZ",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x46, 1, X86InstInfo{"CMOVBE",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x47, 1, X86InstInfo{"CMOVNBE",    TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x48, 1, X86InstInfo{"CMOVS",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x49, 1, X86InstInfo{"CMOVNS",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4A, 1, X86InstInfo{"CMOVP",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4B, 1, X86InstInfo{"CMOVNP",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4C, 1, X86InstInfo{"CMOVL",      TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4D, 1, X86InstInfo{"CMOVNL",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4E, 1, X86InstInfo{"CMOVLE",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},
    {0x4F, 1, X86InstInfo{"CMOVNLE",    TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                               0}},

    {0x50, 1, X86InstInfo{"MOVMSKPS",   TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR,      0}},
    {0x51, 1, X86InstInfo{"SQRTPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x52, 1, X86InstInfo{"RSQRTPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x53, 1, X86InstInfo{"RCPPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x54, 1, X86InstInfo{"ANDPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x55, 1, X86InstInfo{"ANDNPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x56, 1, X86InstInfo{"ORPS",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x57, 1, X86InstInfo{"XORPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x58, 1, X86InstInfo{"ADDPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x59, 1, X86InstInfo{"MULPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5A, 1, X86InstInfo{"CVTPS2PD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5B, 1, X86InstInfo{"CVTDQ2PS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5C, 1, X86InstInfo{"SUBPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5D, 1, X86InstInfo{"MINPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5E, 1, X86InstInfo{"DIVPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},
    {0x5F, 1, X86InstInfo{"MAXPS",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                 0}},

    {0x60, 1, X86InstInfo{"PUNPCKLBW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x61, 1, X86InstInfo{"PUNPCKLWD",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x62, 1, X86InstInfo{"PUNPCKLDQ",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x63, 1, X86InstInfo{"PACKSSWB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x64, 1, X86InstInfo{"PCMPGTB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x65, 1, X86InstInfo{"PCMPGTW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x66, 1, X86InstInfo{"PCMPGTD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x67, 1, X86InstInfo{"PACKUSWB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x68, 1, X86InstInfo{"PUNPCKHBW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x69, 1, X86InstInfo{"PUNPCKHWD",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x6A, 1, X86InstInfo{"PUNPCKHDQ",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x6B, 1, X86InstInfo{"PACKSSDW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},
    {0x6C, 2, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                                                       0}},
    {0x6E, 1, X86InstInfo{"MOVD",       TYPE_INST, GenFlagsDstSize(SIZE_64BIT)   | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                0}},
    {0x6F, 1, X86InstInfo{"MOVQ",       TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   0}},

    {0x70, 1, X86InstInfo{"PSHUFW",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                   1}},
    {0x71, 1, X86InstInfo{"",           TYPE_GROUP_12, FLAGS_NO_OVERLAY,                                                                                0}},
    {0x72, 1, X86InstInfo{"",           TYPE_GROUP_13, FLAGS_NO_OVERLAY,                                                                                0}},
    {0x73, 1, X86InstInfo{"",           TYPE_GROUP_14, FLAGS_NO_OVERLAY,                                                                                0}},
    {0x74, 1, X86InstInfo{"PCMPEQB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                         0}},
    {0x75, 1, X86InstInfo{"PCMPEQW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                         0}},
    {0x76, 1, X86InstInfo{"PCMPEQD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                         0}},
    {0x77, 1, X86InstInfo{"EMMS",       TYPE_INST, FLAGS_NONE,                                                                                    0}},
    {0x78, 6, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                                                       0}},
    {0x7E, 1, X86InstInfo{"MOVD",       TYPE_INST, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 0}},
    {0x7F, 1, X86InstInfo{"MOVQ",       TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                    0}},

    {0x80, 1, X86InstInfo{"JO",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x81, 1, X86InstInfo{"JNO",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x82, 1, X86InstInfo{"JB",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x83, 1, X86InstInfo{"JNB",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x84, 1, X86InstInfo{"JZ",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x85, 1, X86InstInfo{"JNZ",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x86, 1, X86InstInfo{"JBE",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x87, 1, X86InstInfo{"JNBE",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x88, 1, X86InstInfo{"JS",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x89, 1, X86InstInfo{"JNS",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8A, 1, X86InstInfo{"JP",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8B, 1, X86InstInfo{"JNP",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8C, 1, X86InstInfo{"JL",      TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8D, 1, X86InstInfo{"JNL",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8E, 1, X86InstInfo{"JLE",     TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},
    {0x8F, 1, X86InstInfo{"JNLE",    TYPE_INST, GenFlagsSameSize(SIZE_64BITDEF) | FLAGS_SETS_RIP | FLAGS_SRC_SEXT | FLAGS_DISPLACE_SIZE_DIV_2 | FLAGS_NO_OVERLAY,    4}},

    {0x90, 1, X86InstInfo{"SETO",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x91, 1, X86InstInfo{"SETNO",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x92, 1, X86InstInfo{"SETB",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x93, 1, X86InstInfo{"SETNB",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x94, 1, X86InstInfo{"SETZ",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x95, 1, X86InstInfo{"SETNZ",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x96, 1, X86InstInfo{"SETBE",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x97, 1, X86InstInfo{"SETNBE",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x98, 1, X86InstInfo{"SETS",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x99, 1, X86InstInfo{"SETNS",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9A, 1, X86InstInfo{"SETP",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9B, 1, X86InstInfo{"SETNP",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9C, 1, X86InstInfo{"SETL",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9D, 1, X86InstInfo{"SETNL",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9E, 1, X86InstInfo{"SETLE",   TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},
    {0x9F, 1, X86InstInfo{"SETNLE",  TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                        0}},

    {0xA0, 1, X86InstInfo{"",        TYPE_ARCH_DISPATCHER, FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .Indirect = Secondary_ArchSelect_LUT[ENTRY_A0] }}},
    {0xA1, 1, X86InstInfo{"",        TYPE_ARCH_DISPATCHER, FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .Indirect = Secondary_ArchSelect_LUT[ENTRY_A1] }}},
    {0xA2, 1, X86InstInfo{"CPUID",   TYPE_INST,     FLAGS_SF_SRC_RAX | FLAGS_NO_OVERLAY,                                              0}},
    {0xA3, 1, X86InstInfo{"BT",      TYPE_INST,     FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                     0}},
    {0xA4, 1, X86InstInfo{"SHLD",    TYPE_INST,     FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                                              1}},
    {0xA5, 1, X86InstInfo{"SHLD",    TYPE_INST,     FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX | FLAGS_NO_OVERLAY,                           0}},
    {0xA6, 2, X86InstInfo{"",        TYPE_INVALID,  FLAGS_NO_OVERLAY,                                                                               0}},
    {0xA8, 1, X86InstInfo{"",        TYPE_ARCH_DISPATCHER, FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .Indirect = Secondary_ArchSelect_LUT[ENTRY_A8] }}},
    {0xA9, 1, X86InstInfo{"",        TYPE_ARCH_DISPATCHER, FLAGS_DEBUG_MEM_ACCESS | FLAGS_NO_OVERLAY, 0, { .Indirect = Secondary_ArchSelect_LUT[ENTRY_A9] }}},
    {0xAA, 1, X86InstInfo{"RSM",     TYPE_PRIV,     FLAGS_NO_OVERLAY,                                                                               0}},
    {0xAB, 1, X86InstInfo{"BTS",     TYPE_INST,     FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                     0}},
    {0xAC, 1, X86InstInfo{"SHRD",    TYPE_INST,     FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                                              1}},
    {0xAD, 1, X86InstInfo{"SHRD",    TYPE_INST,     FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_SRC_RCX | FLAGS_NO_OVERLAY,                           0}},
    {0xAE, 1, X86InstInfo{"",        TYPE_GROUP_15, FLAGS_NO_OVERLAY,                                                                               0}},
    {0xAF, 1, X86InstInfo{"IMUL",    TYPE_INST,     FLAGS_MODRM | FLAGS_NO_OVERLAY,                                                                 0}},

    {0xB0, 1, X86InstInfo{"CMPXCHG", TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                    0}},
    {0xB1, 1, X86InstInfo{"CMPXCHG", TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                                                  0}},
    {0xB2, 1, X86InstInfo{"LSS",     TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                0}},
    {0xB3, 1, X86InstInfo{"BTR",     TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                         0}},
    {0xB4, 1, X86InstInfo{"LFS",     TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                0}},
    {0xB5, 1, X86InstInfo{"LGS",     TYPE_INVALID, FLAGS_NO_OVERLAY,                                                                                0}},
    {0xB6, 1, X86InstInfo{"MOVZX",   TYPE_INST, GenFlagsSrcSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_NO_OVERLAY,                                        0}},
    {0xB7, 1, X86InstInfo{"MOVZX",   TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_NO_OVERLAY,                                       0}},
    {0xB8, 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE,                                                                                      0}},
    {0xB9, 1, X86InstInfo{"",        TYPE_GROUP_10, FLAGS_NO_OVERLAY,                                                                               0}},
    {0xBA, 1, X86InstInfo{"",        TYPE_GROUP_8, FLAGS_NO_OVERLAY,                                                                                0}},
    {0xBB, 1, X86InstInfo{"BTC",     TYPE_INST, FLAGS_DEBUG_MEM_ACCESS | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                         0}},
    {0xBC, 1, X86InstInfo{"BSF",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY66,                                                                   0}},
    {0xBD, 1, X86InstInfo{"BSR",     TYPE_INST, FLAGS_MODRM | FLAGS_NO_OVERLAY66,                                                                   0}},
    {0xBE, 1, X86InstInfo{"MOVSX",   TYPE_INST, GenFlagsSrcSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_NO_OVERLAY,                                        0}},
    {0xBF, 1, X86InstInfo{"MOVSX",   TYPE_INST, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_NO_OVERLAY,                                       0}},

    {0xC0, 1, X86InstInfo{"XADD",    TYPE_INST, GenFlagsSameSize(SIZE_8BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST,                                                       0}},
    {0xC1, 1, X86InstInfo{"XADD",    TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_NO_OVERLAY,                                                                 0}},
    {0xC2, 1, X86InstInfo{"CMPPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                     1}},
    {0xC3, 1, X86InstInfo{"MOVNTI",  TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST,                                                            0}},
    {0xC4, 1, X86InstInfo{"PINSRW",  TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX | FLAGS_SF_SRC_GPR,           1}},
    {0xC5, 1, X86InstInfo{"PEXTRW",  TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS | FLAGS_SF_MMX, 1}},
    {0xC6, 1, X86InstInfo{"SHUFPS",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                                                     1}},
    {0xC7, 1, X86InstInfo{"",        TYPE_GROUP_9, FLAGS_NO_OVERLAY,                                                                                               0}},
    {0xC8, 8, X86InstInfo{"BSWAP",   TYPE_INST, FLAGS_SF_REX_IN_BYTE | FLAGS_NO_OVERLAY,                                                                           0}},

    {0xD0, 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                                                         0}},
    {0xD1, 1, X86InstInfo{"PSRLW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD2, 1, X86InstInfo{"PSRLD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD3, 1, X86InstInfo{"PSRLQ",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD4, 1, X86InstInfo{"PADDQ",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD5, 1, X86InstInfo{"PMULLW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD6, 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                                                         0}},
    {0xD7, 1, X86InstInfo{"PMOVMSKB", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_SF_MMX_SRC,                                  0}},
    {0xD8, 1, X86InstInfo{"PSUBUSB",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xD9, 1, X86InstInfo{"PSUBUSW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDA, 1, X86InstInfo{"PMINUB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDB, 1, X86InstInfo{"PAND",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDC, 1, X86InstInfo{"PADDUSB",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDD, 1, X86InstInfo{"PADDUSW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDE, 1, X86InstInfo{"PMAXUB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xDF, 1, X86InstInfo{"PANDN",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},

    {0xE0, 1, X86InstInfo{"PAVGB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE1, 1, X86InstInfo{"PSRAW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE2, 1, X86InstInfo{"PSRAD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE3, 1, X86InstInfo{"PAVGW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                       0}},
    {0xE4, 1, X86InstInfo{"PMULHUW",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE5, 1, X86InstInfo{"PMULHW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE6, 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                                                         0}},
    {0xE7, 1, X86InstInfo{"MOVNTQ",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                   0}},
    {0xE8, 1, X86InstInfo{"PSUBSB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xE9, 1, X86InstInfo{"PSUBSW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xEA, 1, X86InstInfo{"PMINSW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xEB, 1, X86InstInfo{"POR",      TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xEC, 1, X86InstInfo{"PADDSB",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xED, 1, X86InstInfo{"PADDSW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xEE, 1, X86InstInfo{"PMAXSW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xEF, 1, X86InstInfo{"PXOR",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},

    {0xF0, 1, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                                                         0}},
    {0xF1, 1, X86InstInfo{"PSLLW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF2, 1, X86InstInfo{"PSLLD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF3, 1, X86InstInfo{"PSLLQ",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF4, 1, X86InstInfo{"PMULUDQ",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF5, 1, X86InstInfo{"PMADDWD",  TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF6, 1, X86InstInfo{"PSADBW",   TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF7, 1, X86InstInfo{"MASKMOVQ", TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF8, 1, X86InstInfo{"PSUBB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xF9, 1, X86InstInfo{"PSUBW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFA, 1, X86InstInfo{"PSUBD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFB, 1, X86InstInfo{"PSUBQ",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFC, 1, X86InstInfo{"PADDB",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFD, 1, X86InstInfo{"PADDW",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFE, 1, X86InstInfo{"PADDD",    TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX,                                      0}},
    {0xFF, 1, X86InstInfo{"UD0",      TYPE_INST, FLAGS_BLOCK_END,                                                                                           0}},

#ifndef _WIN32
    // FEX reserved instructions
    // Unused x86 encoding instruction.

    {0x3E, 1, X86InstInfo{"CALLBACKRET",  TYPE_INST, FLAGS_BLOCK_END | FLAGS_NO_OVERLAY | FLAGS_SETS_RIP,                                                                          0}},

    // This was originally used by VIA to jump to its alternative instruction set. Used for OP_THUNK
    {0x3F, 1, X86InstInfo{"ALTINST",      TYPE_INST, FLAGS_BLOCK_END | FLAGS_NO_OVERLAY | FLAGS_SETS_RIP,                                                            0}},
#endif
  };

  GenerateTable(Table.data(), TwoByteOpTable, std::size(TwoByteOpTable));

  IR::InstallToTable(Table, IR::OpDispatch_TwoByteOpTable);

  return Table;
}();

constexpr std::array<X86InstInfo, MAX_REP_MOD_TABLE_SIZE> RepModOps = []() consteval {
  std::array<X86InstInfo, MAX_REP_MOD_TABLE_SIZE> Table{};

  constexpr U8U8InfoStruct RepModOpTable[] = {
    {0x0, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},

    {0x10, 1, X86InstInfo{"MOVSS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x11, 1, X86InstInfo{"MOVSS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                    0}},
    {0x12, 1, X86InstInfo{"MOVSLDUP",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  0}},
    {0x13, 3, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x16, 1, X86InstInfo{"MOVSHDUP",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  0}},
    {0x17, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x19, 7, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},

    {0x20, 4, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0x24, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x2A, 1, X86InstInfo{"CVTSI2SS",  TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 0}},
    {0x2B, 1, X86InstInfo{"MOVNTSS",   TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {0x2C, 1, X86InstInfo{"CVTTSS2SI", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},
    {0x2D, 1, X86InstInfo{"CVTSS2SI",  TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},
    {0x2E, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0x30, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0x40, 16, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0x50, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x51, 1, X86InstInfo{"SQRTSS",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x52, 1, X86InstInfo{"RSQRTSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x53, 1, X86InstInfo{"RCPSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x54, 4, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x58, 1, X86InstInfo{"ADDSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x59, 1, X86InstInfo{"MULSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5A, 1, X86InstInfo{"CVTSS2SD",  TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,   0}},
    {0x5B, 1, X86InstInfo{"CVTTPS2DQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  0}},
    {0x5C, 1, X86InstInfo{"SUBSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5D, 1, X86InstInfo{"MINSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5E, 1, X86InstInfo{"DIVSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5F, 1, X86InstInfo{"MAXSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},

    {0x60, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x68, 7, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x6F, 1, X86InstInfo{"MOVDQU",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  0}},

    {0x70, 1, X86InstInfo{"PSHUFHW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  1}},
    {0x71, 3, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0x74, 4, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x78, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0x7E, 1, X86InstInfo{"MOVQ",      TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,   0}},
    {0x7F, 1, X86InstInfo{"MOVDQU",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,  0}},

    {0x80, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0x90, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0xA0, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},

    {0xB0, 8, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0xB8, 1, X86InstInfo{"POPCNT",    TYPE_INST, FLAGS_MODRM,                                      0}},
    {0xB9, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBA, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBB, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xBC, 1, X86InstInfo{"TZCNT",     TYPE_INST, FLAGS_MODRM,                                      0}},
    {0xBD, 1, X86InstInfo{"LZCNT",     TYPE_INST, FLAGS_MODRM,                                      0}},
    {0xBE, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0xC0, 2, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
    {0xC2, 1, X86InstInfo{"CMPSS",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    1}},
    {0xC3, 5, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xC8, 8, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},

    {0xD0, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xD6, 1, X86InstInfo{"MOVQ2DQ",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_SRC, 0}},
    {0xD7, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xD8, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0xE0, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xE6, 1, X86InstInfo{"CVTDQ2PD",  TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,  0}},
    {0xE7, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xE8, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0xF0, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xF8, 7, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},
    {0xFF, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                     0}},
  };

  GenerateTableWithCopy(Table.data(), RepModOpTable, std::size(RepModOpTable), SecondBaseOps.data());

  IR::InstallToTable(Table, IR::OpDispatch_SecondaryRepModTables);
  return Table;
}();

constexpr std::array<X86InstInfo, MAX_REPNE_MOD_TABLE_SIZE> RepNEModOps = []() consteval {
  std::array<X86InstInfo, MAX_REPNE_MOD_TABLE_SIZE> Table{};

  constexpr U8U8InfoStruct RepNEModOpTable[] = {
    {0x0, 16, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                     0}},

    {0x10, 1, X86InstInfo{"MOVSD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                  0}},
    {0x11, 1, X86InstInfo{"MOVSD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                  0}},
    {0x12, 1, X86InstInfo{"MOVDDUP",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                  0}},
    {0x13, 6, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                        0}},
    {0x19, 7, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                     0}},

    {0x20, 4, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0x24, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x2A, 1, X86InstInfo{"CVTSI2SD",  TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR,                    0}},
    {0x2B, 1, X86InstInfo{"MOVNTSD",   TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {0x2C, 1, X86InstInfo{"CVTTSD2SI", TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},
    {0x2D, 1, X86InstInfo{"CVTSD2SI",  TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},
    {0x2E, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0x30, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0x40, 16, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0x50, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x51, 1, X86InstInfo{"SQRTSD",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {0x52, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x58, 1, X86InstInfo{"ADDSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x59, 1, X86InstInfo{"MULSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5A, 1, X86InstInfo{"CVTSD2SS",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5B, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x5C, 1, X86InstInfo{"SUBSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5D, 1, X86InstInfo{"MINSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5E, 1, X86InstInfo{"DIVSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},
    {0x5F, 1, X86InstInfo{"MAXSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    0}},

    {0x60, 16, X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0x70, 1, X86InstInfo{"PSHUFLW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                   1}},
    {0x71, 3, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0x74, 4, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x78, 1, X86InstInfo{"INSERTQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,2}},
    {0x79, 1, X86InstInfo{"INSERTQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS, 0}},
    {0x7A, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0x7C, 1, X86InstInfo{"HADDPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                   0}},
    {0x7D, 1, X86InstInfo{"HSUBPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                   0}},
    {0x7E, 2, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0x80, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0x90, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0xA0, 16, X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0xB0, 8,  X86InstInfo{"",         TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0xB8, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0xB9, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBA, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBB, 5,  X86InstInfo{"",         TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xC0, 2, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},
    {0xC2, 1, X86InstInfo{"CMPSD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                    1}},
    {0xC3, 5, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xC8, 8, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                      0}},

    {0xD0, 1, X86InstInfo{"ADDSUBPS",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                   0}},
    {0xD1, 5, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xD6, 1, X86InstInfo{"MOVDQ2Q",   TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_DST,     0}},
    {0xD7, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xD8, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0xE0, 6, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xE6, 1, X86InstInfo{"CVTPD2DQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                   0}},
    {0xE7, 1, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xE8, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},

    {0xF0, 1, X86InstInfo{"LDDQU",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS,0}},
    {0xF1, 7, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
    {0xF8, 8, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                                         0}},
  };

  GenerateTableWithCopy(Table.data(), RepNEModOpTable,   std::size(RepNEModOpTable), SecondBaseOps.data());

  IR::InstallToTable(Table, IR::OpDispatch_SecondaryRepNEModTables);
  return Table;
}();

constexpr std::array<X86InstInfo, MAX_OPSIZE_MOD_TABLE_SIZE> OpSizeModOps = []() consteval {
  std::array<X86InstInfo, MAX_OPSIZE_MOD_TABLE_SIZE> Table{};

  constexpr U8U8InfoStruct OpSizeModOpTable[] = {
    {0x0, 16, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},

    {0x10, 1, X86InstInfo{"MOVUPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x11, 1, X86InstInfo{"MOVUPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                         0}},
    {0x12, 1, X86InstInfo{"MOVLPD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS,      0}},
    {0x13, 1, X86InstInfo{"MOVLPD",     TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,      0}},
    {0x14, 1, X86InstInfo{"UNPCKLPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x15, 1, X86InstInfo{"UNPCKHPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x16, 1, X86InstInfo{"MOVHPD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS,      0}},
    {0x17, 1, X86InstInfo{"MOVHPD",     TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,      0}},
    {0x18, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0x19, 7, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},

    {0x20, 4, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0x24, 4, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},

    {0x28, 1, X86InstInfo{"MOVAPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x29, 1, X86InstInfo{"MOVAPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                         0}},
    {0x2A, 1, X86InstInfo{"CVTPI2PD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_SRC,                                                                   0}},
    {0x2B, 1, X86InstInfo{"MOVNTPD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,      0}},
    {0x2C, 1, X86InstInfo{"CVTTPD2PI",  TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_DST,                          0}},
    {0x2D, 1, X86InstInfo{"CVTPD2PI",   TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MMX_DST,                          0}},
    {0x2E, 1, X86InstInfo{"UCOMISD",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                          0}},
    {0x2F, 1, X86InstInfo{"COMISD",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                          0}},

    {0x30, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0x40, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},

    {0x50, 1, X86InstInfo{"MOVMSKPD",   TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR,     0}},
    {0x51, 1, X86InstInfo{"SQRTPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x52, 2, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0x54, 1, X86InstInfo{"ANDPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                        0}},
    {0x55, 1, X86InstInfo{"ANDNPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x56, 1, X86InstInfo{"ORPD",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x57, 1, X86InstInfo{"XORPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x58, 1, X86InstInfo{"ADDPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x59, 1, X86InstInfo{"MULPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5A, 1, X86InstInfo{"CVTPD2PS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5B, 1, X86InstInfo{"CVTPS2DQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5C, 1, X86InstInfo{"SUBPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5D, 1, X86InstInfo{"MINPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5E, 1, X86InstInfo{"DIVPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x5F, 1, X86InstInfo{"MAXPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},

    {0x60, 1, X86InstInfo{"PUNPCKLBW",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x61, 1, X86InstInfo{"PUNPCKLWD",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x62, 1, X86InstInfo{"PUNPCKLDQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x63, 1, X86InstInfo{"PACKSSWB",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x64, 1, X86InstInfo{"PCMPGTB",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x65, 1, X86InstInfo{"PCMPGTW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x66, 1, X86InstInfo{"PCMPGTD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x67, 1, X86InstInfo{"PACKUSWB",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x68, 1, X86InstInfo{"PUNPCKHBW",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x69, 1, X86InstInfo{"PUNPCKHWD",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x6A, 1, X86InstInfo{"PUNPCKHDQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x6B, 1, X86InstInfo{"PACKSSDW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x6C, 1, X86InstInfo{"PUNPCKLQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x6D, 1, X86InstInfo{"PUNPCKHQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x6E, 1, X86InstInfo{"MOVD",       TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS,      0}},
    {0x6F, 1, X86InstInfo{"MOVDQA",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},

    {0x70, 1, X86InstInfo{"PSHUFD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         1}},
    {0x71, 3, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0x74, 1, X86InstInfo{"PCMPEQB",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x75, 1, X86InstInfo{"PCMPEQW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x76, 1, X86InstInfo{"PCMPEQD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x77, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0x78, 1, X86InstInfo{"",           TYPE_GROUP_17, FLAGS_NONE,                                                              0}},

    {0x79, 1, X86InstInfo{"EXTRQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,                         0}},
    {0x7A, 2, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0x7C, 1, X86InstInfo{"HADDPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x7D, 1, X86InstInfo{"HSUBPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0x7E, 1, X86InstInfo{"MOVD",       TYPE_INST, GenFlagsSrcSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS,      0}},
    {0x7F, 1, X86InstInfo{"MOVDQA",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,                         0}},

    {0x80, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0x90, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0xA0, 16, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0xB0, 8, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0xB8, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0xB9, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBA, 1, X86InstInfo{"",          TYPE_COPY_OTHER, FLAGS_NONE,                                        0}},
    {0xBB, 5, X86InstInfo{"",          TYPE_INVALID, FLAGS_NONE,                                        0}},

    {0xC0, 2, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
    {0xC2, 1, X86InstInfo{"CMPPD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         1}},
    {0xC3, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0xC4, 1, X86InstInfo{"PINSRW",     TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS,      1}},
    {0xC5, 1, X86InstInfo{"PEXTRW",     TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS,      1}},
    {0xC6, 1, X86InstInfo{"SHUFPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         1}},
    {0xC7, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0xC8, 8, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},

    {0xD0, 1, X86InstInfo{"ADDSUBPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD1, 1, X86InstInfo{"PSRLW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD2, 1, X86InstInfo{"PSRLD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD3, 1, X86InstInfo{"PSRLQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD4, 1, X86InstInfo{"PADDQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD5, 1, X86InstInfo{"PMULLW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD6, 1, X86InstInfo{"MOVQ",       TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,       0}},
    {0xD7, 1, X86InstInfo{"PMOVMSKB",   TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR,      0}},
    {0xD8, 1, X86InstInfo{"PSUBUSB",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xD9, 1, X86InstInfo{"PSUBUSW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDA, 1, X86InstInfo{"PMINUB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDB, 1, X86InstInfo{"PAND",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDC, 1, X86InstInfo{"PADDUSB",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDD, 1, X86InstInfo{"PADDUSW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDE, 1, X86InstInfo{"PMAXUB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xDF, 1, X86InstInfo{"PANDN",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},

    {0xE0, 1, X86InstInfo{"PAVGB",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE1, 1, X86InstInfo{"PSRAW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE2, 1, X86InstInfo{"PSRAD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE3, 1, X86InstInfo{"PAVGW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE4, 1, X86InstInfo{"PMULHUW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE5, 1, X86InstInfo{"PMULHW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE6, 1, X86InstInfo{"CVTTPD2DQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE7, 1, X86InstInfo{"MOVNTDQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS,      0}},
    {0xE8, 1, X86InstInfo{"PSUBSB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xE9, 1, X86InstInfo{"PSUBSW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xEA, 1, X86InstInfo{"PMINSW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xEB, 1, X86InstInfo{"POR",        TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xEC, 1, X86InstInfo{"PADDSB",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xED, 1, X86InstInfo{"PADDSW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xEE, 1, X86InstInfo{"PMAXSW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xEF, 1, X86InstInfo{"PXOR",       TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},

    {0xF0, 1, X86InstInfo{"",           TYPE_INVALID, FLAGS_NONE,                                                               0}},
    {0xF1, 1, X86InstInfo{"PSLLW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF2, 1, X86InstInfo{"PSLLD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF3, 1, X86InstInfo{"PSLLQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF4, 1, X86InstInfo{"PMULUDQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF5, 1, X86InstInfo{"PMADDWD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF6, 1, X86InstInfo{"PSADBW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF7, 1, X86InstInfo{"MASKMOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS,                         0}},
    {0xF8, 1, X86InstInfo{"PSUBB",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xF9, 1, X86InstInfo{"PSUBW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xFA, 1, X86InstInfo{"PSUBD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xFB, 1, X86InstInfo{"PSUBQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xFC, 1, X86InstInfo{"PADDB",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xFD, 1, X86InstInfo{"PADDW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,                         0}},
    {0xFE, 1, X86InstInfo{"PADDD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS,       0}},
    {0xFF, 1, X86InstInfo{"",           TYPE_COPY_OTHER, FLAGS_NONE,                                                            0}},
  };

  GenerateTableWithCopy(Table.data(), OpSizeModOpTable, std::size(OpSizeModOpTable), SecondBaseOps.data());

  IR::InstallToTable(Table, IR::OpDispatch_SecondaryOpSizeModTables);
  return Table;
}();
}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher/VEXTables.h"

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;

namespace AVX128 {
  using namespace IR;
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
  constexpr DispatchTableEntry BaseTable[] = {
    {OPD(1, 0b00, 0x10), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b01, 0x10), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b10, 0x10), 1, &OpDispatchBuilder::AVX128_VMOVSS},
    {OPD(1, 0b11, 0x10), 1, &OpDispatchBuilder::AVX128_VMOVSD},
    {OPD(1, 0b00, 0x11), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b01, 0x11), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b10, 0x11), 1, &OpDispatchBuilder::AVX128_VMOVSS},
    {OPD(1, 0b11, 0x11), 1, &OpDispatchBuilder::AVX128_VMOVSD},

    {OPD(1, 0b00, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVLP},
    {OPD(1, 0b01, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVLP},
    {OPD(1, 0b10, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVSLDUP},
    {OPD(1, 0b11, 0x12), 1, &OpDispatchBuilder::AVX128_VMOVDDUP},
    {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP},
    {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::AVX128_VMOVLP},

    {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::AVX128_VMOVHP},
    {OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_VMOVHP},
    {OPD(1, 0b10, 0x16), 1, &OpDispatchBuilder::AVX128_VMOVSHDUP},
    {OPD(1, 0b00, 0x17), 1, &OpDispatchBuilder::AVX128_VMOVHP},
    {OPD(1, 0b01, 0x17), 1, &OpDispatchBuilder::AVX128_VMOVHP},

    {OPD(1, 0b00, 0x28), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b01, 0x28), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::AVX128_VMOVAPS},

    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertCVTGPR_To_FPR, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},
    {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},

    {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_CVTFPR_To_GPR, OpSize::i32Bit, false>},
    {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_CVTFPR_To_GPR, OpSize::i64Bit, false>},

    {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_CVTFPR_To_GPR, OpSize::i32Bit, true>},
    {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_CVTFPR_To_GPR, OpSize::i64Bit, true>},

    {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_UCOMISx, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_UCOMISx, OpSize::i64Bit>},
    {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_UCOMISx, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_UCOMISx, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_MOVMSK, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_MOVMSK, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFSQRT, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRSQRT, OpSize::i32Bit>},
    {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VFRECP, OpSize::i32Bit>},
    {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>},
    {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>},

    {OPD(1, 0b00, 0x55), 1, &OpDispatchBuilder::AVX128_VANDN},
    {OPD(1, 0b01, 0x55), 1, &OpDispatchBuilder::AVX128_VANDN},

    {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>},
    {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>},

    {OPD(1, 0b00, 0x57), 1, &OpDispatchBuilder::AVX128_VectorXOR},
    {OPD(1, 0b01, 0x57), 1, &OpDispatchBuilder::AVX128_VectorXOR},

    {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFADD, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMUL, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalar_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float, OpSize::i32Bit, false>},
    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int, OpSize::i32Bit, true>},
    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int, OpSize::i32Bit, false>},

    {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFSUB, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMIN, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFDIV, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VFMAX, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorScalarInsertALU, IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPACKSS, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPACKUS, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPACKSS, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKL, OpSize::i64Bit>},
    {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPUNPCKH, OpSize::i64Bit>},
    {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR},

    {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},

    {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPERMILImm, OpSize::i32Bit>},
    {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, false>},
    {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSHUFW, true>},

    {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::AVX128_VZERO},

    {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHADDP, IR::OP_VFADDP, OpSize::i64Bit>},
    {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHADDP, IR::OP_VFADDP, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHSUBP, OpSize::i64Bit>},
    {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHSUBP, OpSize::i32Bit>},

    {OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::AVX128_MOVBetweenGPR_FPR},
    {OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::AVX128_MOVQ},

    {OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},
    {OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::AVX128_VMOVAPS},

    {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFCMP, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFCMP, OpSize::i64Bit>},
    {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalarFCMP, OpSize::i32Bit>},
    {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalarFCMP, OpSize::i64Bit>},

    {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::AVX128_VPINSRW},
    {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_PExtr, OpSize::i16Bit>},

    {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VSHUF, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VSHUF, OpSize::i64Bit>},

    {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VADDSUBP, OpSize::i64Bit>},
    {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VADDSUBP, OpSize::i32Bit>},

    {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL
    {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL
    {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i64Bit, IROps::OP_VUSHRSWIDE>}, // VPSRL
    {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::AVX128_MOVQ},
    {OPD(1, 0b01, 0xD7), 1, &OpDispatchBuilder::AVX128_MOVMSKB},

    {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VAND, OpSize::i128Bit>},
    {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUQADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDF), 1, &OpDispatchBuilder::AVX128_VANDN},

    {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VSSHRSWIDE>}, // VPSRA
    {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VSSHRSWIDE>}, // VPSRA
    {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VURAVG, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMULHW, false>},
    {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMULHW, true>},

    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int, OpSize::i64Bit, false>},
    {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Int_To_Float, OpSize::i32Bit, true>},
    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_Vector_CVT_Float_To_Int, OpSize::i64Bit, true>},

    {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},

    {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VOR, OpSize::i128Bit>},
    {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSQADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEF), 1, &OpDispatchBuilder::AVX128_VectorXOR},

    {OPD(1, 0b11, 0xF0), 1, &OpDispatchBuilder::AVX128_MOVVectorUnaligned},
    {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i16Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL
    {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i32Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL
    {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftWideImpl, OpSize::i64Bit, IROps::OP_VUSHLSWIDE>}, // VPSLL
    {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMULL, OpSize::i32Bit, false>},
    {OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::AVX128_VPMADDWD},
    {OPD(1, 0b01, 0xF6), 1, &OpDispatchBuilder::AVX128_VPSADBW},
    {OPD(1, 0b01, 0xF7), 1, &OpDispatchBuilder::AVX128_MASKMOV},

    {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSUB, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VADD, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x00), 1, &OpDispatchBuilder::AVX128_VPSHUFB},
    {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHADDP, IR::OP_VADDP, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VHADDP, IR::OP_VADDP, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x03), 1, &OpDispatchBuilder::AVX128_VPHADDSW},
    {OPD(2, 0b01, 0x04), 1, &OpDispatchBuilder::AVX128_VPMADDUBSW},

    {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPHSUB, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPHSUB, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::AVX128_VPHSUBSW},

    {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSIGN, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSIGN, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPSIGN, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0B), 1, &OpDispatchBuilder::AVX128_VPMULHRSW},
    {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPERMILReg, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPERMILReg, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VTESTP, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VTESTP, OpSize::i64Bit>},


    {OPD(2, 0b01, 0x13), 1, &OpDispatchBuilder::AVX128_VCVTPH2PS},
    {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::AVX128_VPERMD},
    {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::AVX128_PTest},
    {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i128Bit>},
    {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorUnary, IR::OP_VABS, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i16Bit, true>},
    {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i64Bit, true>},
    {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i64Bit, true>},
    {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i32Bit, OpSize::i64Bit, true>},

    {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMULL, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPEQ, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::AVX128_MOVVectorNT},
    {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPACKUS, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VMASKMOV, OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VMASKMOV, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VMASKMOV, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x2F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VMASKMOV, OpSize::i64Bit, true>},

    {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i16Bit, false>},
    {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i8Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i16Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ExtendVectorElements, OpSize::i32Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x36), 1, &OpDispatchBuilder::AVX128_VPERMD},

    {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VCMPGT, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMIN, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMIN, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VSMAX, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VUMAX, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorALU, IR::OP_VMUL, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x41), 1, &OpDispatchBuilder::AVX128_PHMINPOSUW},
    {OPD(2, 0b01, 0x45), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VUSHR>}, // VPSRLV
    {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VSSHR>}, // VPSRAVD
    {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VariableShiftImpl, IROps::OP_VUSHL>}, // VPSLLV

    {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i128Bit>},

    {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBROADCAST, OpSize::i16Bit>},

    {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMASKMOV, false>},
    {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPMASKMOV, true>},

    {OPD(2, 0b01, 0x90), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPGATHER, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x91), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPGATHER, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x92), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPGATHER, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x93), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPGATHER, OpSize::i64Bit>},

    {OPD(2, 0b01, 0x96), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, true, 1, 3, 2>},  // VFMADDSUB
    {OPD(2, 0b01, 0x97), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, false, 1, 3, 2>}, // VFMSUBADD

    {OPD(2, 0b01, 0x98), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 1, 3, 2>}, // VFMADD
    {OPD(2, 0b01, 0x99), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 1, 3, 2>}, // VFMADD
    {OPD(2, 0b01, 0x9A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 1, 3, 2>}, // VFMSUB
    {OPD(2, 0b01, 0x9B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 1, 3, 2>}, // VFMSUB
    {OPD(2, 0b01, 0x9C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 1, 3, 2>}, // VFNMADD
    {OPD(2, 0b01, 0x9D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 1, 3, 2>}, // VFNMADD
    {OPD(2, 0b01, 0x9E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 1, 3, 2>}, // VFNMSUB
    {OPD(2, 0b01, 0x9F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 1, 3, 2>}, // VFNMSUB

    {OPD(2, 0b01, 0xA8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 2, 1, 3>}, // VFMADD
    {OPD(2, 0b01, 0xA9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 2, 1, 3>}, // VFMADD
    {OPD(2, 0b01, 0xAA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 2, 1, 3>}, // VFMSUB
    {OPD(2, 0b01, 0xAB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 2, 1, 3>}, // VFMSUB
    {OPD(2, 0b01, 0xAC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 2, 1, 3>}, // VFNMADD
    {OPD(2, 0b01, 0xAD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 2, 1, 3>}, // VFNMADD
    {OPD(2, 0b01, 0xAE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 2, 1, 3>}, // VFNMSUB
    {OPD(2, 0b01, 0xAF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 2, 1, 3>}, // VFNMSUB

    {OPD(2, 0b01, 0xB8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLA, 2, 3, 1>}, // VFMADD
    {OPD(2, 0b01, 0xB9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLASCALARINSERT, 2, 3, 1>}, // VFMADD
    {OPD(2, 0b01, 0xBA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFMLS, 2, 3, 1>}, // VFMSUB
    {OPD(2, 0b01, 0xBB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFMLSSCALARINSERT, 2, 3, 1>}, // VFMSUB
    {OPD(2, 0b01, 0xBC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLA, 2, 3, 1>}, // VFNMADD
    {OPD(2, 0b01, 0xBD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLASCALARINSERT, 2, 3, 1>}, // VFNMADD
    {OPD(2, 0b01, 0xBE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAImpl, IR::OP_VFNMLS, 2, 3, 1>}, // VFNMSUB
    {OPD(2, 0b01, 0xBF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAScalarImpl, IR::OP_VFNMLSSCALARINSERT, 2, 3, 1>}, // VFNMSUB

    {OPD(2, 0b01, 0xA6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, true, 2, 1, 3>},  // VFMADDSUB
    {OPD(2, 0b01, 0xA7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, false, 2, 1, 3>}, // VFMSUBADD

    {OPD(2, 0b01, 0xB6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, true, 2, 3, 1>},  // VFMADDSUB
    {OPD(2, 0b01, 0xB7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VFMAddSubImpl, false, 2, 3, 1>}, // VFMSUBADD

    {OPD(2, 0b01, 0xDB), 1, &OpDispatchBuilder::AVX128_VAESImc},
    {OPD(2, 0b01, 0xDC), 1, &OpDispatchBuilder::AVX128_VAESEnc},
    {OPD(2, 0b01, 0xDD), 1, &OpDispatchBuilder::AVX128_VAESEncLast},
    {OPD(2, 0b01, 0xDE), 1, &OpDispatchBuilder::AVX128_VAESDec},
    {OPD(2, 0b01, 0xDF), 1, &OpDispatchBuilder::AVX128_VAESDecLast},

    {OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::AVX128_VPERMQ},
    {OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::AVX128_VPERMQ},
    {OPD(3, 0b01, 0x02), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBLEND, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPERMILImm, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VPERMILImm, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::AVX128_VPERM2},
    {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorRound, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorRound, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalarRound, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_InsertScalarRound, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x0C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBLEND, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x0D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBLEND, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x0E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VBLEND, OpSize::i16Bit>},
    {OPD(3, 0b01, 0x0F), 1, &OpDispatchBuilder::AVX128_VPALIGNR},

    {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_PExtr, OpSize::i8Bit>},
    {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_PExtr, OpSize::i16Bit>},
    {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_PExtr, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_PExtr, OpSize::i32Bit>},

    {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::AVX128_VINSERT},
    {OPD(3, 0b01, 0x19), 1, &OpDispatchBuilder::AVX128_VEXTRACT128},
    {OPD(3, 0b01, 0x1D), 1, &OpDispatchBuilder::AVX128_VCVTPS2PH},
    {OPD(3, 0b01, 0x20), 1, &OpDispatchBuilder::AVX128_VPINSRB},
    {OPD(3, 0b01, 0x21), 1, &OpDispatchBuilder::AVX128_VINSERTPS},
    {OPD(3, 0b01, 0x22), 1, &OpDispatchBuilder::AVX128_VPINSRDQ},

    {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::AVX128_VINSERT},
    {OPD(3, 0b01, 0x39), 1, &OpDispatchBuilder::AVX128_VEXTRACT128},

    {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VDPP, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VDPP, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x42), 1, &OpDispatchBuilder::AVX128_VMPSADBW},
    {OPD(3, 0b01, 0x44), 1, &OpDispatchBuilder::AVX128_VPCLMULQDQ},

    {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::AVX128_VPERM2},

    {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorVariableBlend, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorVariableBlend, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorVariableBlend, OpSize::i8Bit>},

    {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::AVX128_VPCMPESTRM},
    {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::AVX128_VPCMPESTRI},
    {OPD(3, 0b01, 0x62), 1, &OpDispatchBuilder::AVX128_VPCMPISTRM},
    {OPD(3, 0b01, 0x63), 1, &OpDispatchBuilder::AVX128_VPCMPISTRI},

    {OPD(3, 0b01, 0xDF), 1, &OpDispatchBuilder::AVX128_VAESKeyGenAssist},
  };
#undef OPD

#define OPD(group, pp, opcode) (((group - X86Tables::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode))
  constexpr DispatchTableEntry TableGroupOps[] {
    // VPSRLI
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VUSHRI>},
    // VPSLLI
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VSHLI>},
    // VPSRAI
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i16Bit, IROps::OP_VSSHRI>},

    // VPSRLI
    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VUSHRI>},
    // VPSLLI
    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VSHLI>},
    // VPSRAI
    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i32Bit, IROps::OP_VSSHRI>},

    // VPSRLI
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i64Bit, IROps::OP_VUSHRI>},
    // VPSRLDQ
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b011), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ShiftDoubleImm, OpDispatchBuilder::ShiftDirection::RIGHT>},
    // VPSLLI
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1,
     &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_VectorShiftImmImpl, OpSize::i64Bit, IROps::OP_VSHLI>},
    // VPSLLDQ
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b111), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVX128_ShiftDoubleImm, OpDispatchBuilder::ShiftDirection::LEFT>},

    ///< Use the regular implementation. It just happens to be in the VEX table.
    {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b010), 1, &OpDispatchBuilder::LDMXCSR},
    {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b011), 1, &OpDispatchBuilder::STMXCSR},
  };
#undef OPD
}

namespace AVX256 {
  using namespace IR;
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
  constexpr DispatchTableEntry BaseTable[] = {
    {OPD(1, 0b00, 0x10), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
    {OPD(1, 0b01, 0x10), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
    {OPD(1, 0b10, 0x10), 1, &OpDispatchBuilder::VMOVSSOp},
    {OPD(1, 0b11, 0x10), 1, &OpDispatchBuilder::VMOVSDOp},
    {OPD(1, 0b00, 0x11), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
    {OPD(1, 0b01, 0x11), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},
    {OPD(1, 0b10, 0x11), 1, &OpDispatchBuilder::VMOVSSOp},
    {OPD(1, 0b11, 0x11), 1, &OpDispatchBuilder::VMOVSDOp},

    {OPD(1, 0b00, 0x12), 1, &OpDispatchBuilder::VMOVLPOp},
    {OPD(1, 0b01, 0x12), 1, &OpDispatchBuilder::VMOVLPOp},
    {OPD(1, 0b10, 0x12), 1, &OpDispatchBuilder::VMOVSLDUPOp},
    {OPD(1, 0b11, 0x12), 1, &OpDispatchBuilder::VMOVDDUPOp},
    {OPD(1, 0b00, 0x13), 1, &OpDispatchBuilder::VMOVLPOp},
    {OPD(1, 0b01, 0x13), 1, &OpDispatchBuilder::VMOVLPOp},

    {OPD(1, 0b00, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x16), 1, &OpDispatchBuilder::VMOVHPOp},
    {OPD(1, 0b01, 0x16), 1, &OpDispatchBuilder::VMOVHPOp},
    {OPD(1, 0b10, 0x16), 1, &OpDispatchBuilder::VMOVSHDUPOp},
    {OPD(1, 0b00, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},
    {OPD(1, 0b01, 0x17), 1, &OpDispatchBuilder::VMOVHPOp},

    {OPD(1, 0b00, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b01, 0x28), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b00, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b01, 0x29), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},

    {OPD(1, 0b10, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i32Bit>},
    {OPD(1, 0b11, 0x2A), 1, &OpDispatchBuilder::AVXInsertCVTGPR_To_FPR<OpSize::i64Bit>},

    {OPD(1, 0b00, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp},
    {OPD(1, 0b01, 0x2B), 1, &OpDispatchBuilder::MOVVectorNTOp},

    {OPD(1, 0b10, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, false>},
    {OPD(1, 0b11, 0x2C), 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, false>},

    {OPD(1, 0b10, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i32Bit, true>},
    {OPD(1, 0b11, 0x2D), 1, &OpDispatchBuilder::CVTFPR_To_GPR<OpSize::i64Bit, true>},

    {OPD(1, 0b00, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp<OpSize::i32Bit>},
    {OPD(1, 0b01, 0x2E), 1, &OpDispatchBuilder::UCOMISxOp<OpSize::i64Bit>},
    {OPD(1, 0b00, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<OpSize::i32Bit>},
    {OPD(1, 0b01, 0x2F), 1, &OpDispatchBuilder::UCOMISxOp<OpSize::i64Bit>},

    {OPD(1, 0b00, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x50), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVMSKOp, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x51), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFSQRT, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x51), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFSQRTSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x52), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRSQRT, OpSize::i32Bit>},
    {OPD(1, 0b10, 0x52), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRSQRTSCALARINSERT, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x53), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VFRECP, OpSize::i32Bit>},
    {OPD(1, 0b10, 0x53), 1, &OpDispatchBuilder::AVXVectorScalarUnaryInsertALUOp<IR::OP_VFRECPSCALARINSERT, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>},
    {OPD(1, 0b01, 0x54), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>},

    {OPD(1, 0b00, 0x55), 1, &OpDispatchBuilder::VANDNOp},
    {OPD(1, 0b01, 0x55), 1, &OpDispatchBuilder::VANDNOp},

    {OPD(1, 0b00, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>},
    {OPD(1, 0b01, 0x56), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>},

    {OPD(1, 0b00, 0x57), 1, &OpDispatchBuilder::AVXVectorXOROp},
    {OPD(1, 0b01, 0x57), 1, &OpDispatchBuilder::AVXVectorXOROp},

    {OPD(1, 0b00, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFADD, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x58), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFADDSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMUL, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x59), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMULSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i64Bit, OpSize::i32Bit, true>},
    {OPD(1, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::Vector_CVT_Float_To_Float, OpSize::i32Bit, OpSize::i64Bit, true>},
    {OPD(1, 0b10, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i64Bit, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5A), 1, &OpDispatchBuilder::AVXInsertScalar_CVT_Float_To_Float<OpSize::i32Bit, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, false>},
    {OPD(1, 0b01, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, true>},
    {OPD(1, 0b10, 0x5B), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i32Bit, false>},

    {OPD(1, 0b00, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFSUB, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5C), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFSUBSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMIN, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5D), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMINSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFDIV, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5E), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFDIVSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b00, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x5F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VFMAX, OpSize::i64Bit>},
    {OPD(1, 0b10, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i32Bit>},
    {OPD(1, 0b11, 0x5F), 1, &OpDispatchBuilder::AVXVectorScalarInsertALUOp<IR::OP_VFMAXSCALARINSERT, OpSize::i64Bit>},

    {OPD(1, 0b01, 0x60), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x61), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x62), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x63), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x64), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x65), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x66), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x67), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x68), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x69), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x6A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x6B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKSSOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x6C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKLOp, OpSize::i64Bit>},
    {OPD(1, 0b01, 0x6D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPUNPCKHOp, OpSize::i64Bit>},
    {OPD(1, 0b01, 0x6E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>},

    {OPD(1, 0b01, 0x6F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b10, 0x6F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},

    {OPD(1, 0b01, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i32Bit, true>},
    {OPD(1, 0b10, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i16Bit, false>},
    {OPD(1, 0b11, 0x70), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSHUFWOp, OpSize::i16Bit, true>},

    {OPD(1, 0b01, 0x74), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i8Bit>},
    {OPD(1, 0b01, 0x75), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i16Bit>},
    {OPD(1, 0b01, 0x76), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i32Bit>},

    {OPD(1, 0b00, 0x77), 1, &OpDispatchBuilder::VZEROOp},

    {OPD(1, 0b01, 0x7C), 1, &OpDispatchBuilder::VHADDPOp<IR::OP_VFADDP, OpSize::i64Bit>},
    {OPD(1, 0b11, 0x7C), 1, &OpDispatchBuilder::VHADDPOp<IR::OP_VFADDP, OpSize::i32Bit>},
    {OPD(1, 0b01, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, OpSize::i64Bit>},
    {OPD(1, 0b11, 0x7D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VHSUBPOp, OpSize::i32Bit>},

    {OPD(1, 0b01, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVBetweenGPR_FPR, OpDispatchBuilder::VectorOpType::AVX>},
    {OPD(1, 0b10, 0x7E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>},

    {OPD(1, 0b01, 0x7F), 1, &OpDispatchBuilder::VMOVAPS_VMOVAPDOp},
    {OPD(1, 0b10, 0x7F), 1, &OpDispatchBuilder::VMOVUPS_VMOVUPDOp},

    {OPD(1, 0b00, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<OpSize::i32Bit>},
    {OPD(1, 0b01, 0xC2), 1, &OpDispatchBuilder::AVXVFCMPOp<OpSize::i64Bit>},
    {OPD(1, 0b10, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<OpSize::i32Bit>},
    {OPD(1, 0b11, 0xC2), 1, &OpDispatchBuilder::AVXInsertScalarFCMPOp<OpSize::i64Bit>},

    {OPD(1, 0b01, 0xC4), 1, &OpDispatchBuilder::VPINSRWOp},
    {OPD(1, 0b01, 0xC5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},

    {OPD(1, 0b00, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xC6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VSHUFOp, OpSize::i64Bit>},

    {OPD(1, 0b01, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<OpSize::i64Bit>},
    {OPD(1, 0b11, 0xD0), 1, &OpDispatchBuilder::VADDSUBPOp<OpSize::i32Bit>},

    {OPD(1, 0b01, 0xD1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xD2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xD3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLDOp, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xD4), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xD5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xD6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::MOVQOp, OpDispatchBuilder::VectorOpType::AVX>},
    {OPD(1, 0b01, 0xD7), 1, &OpDispatchBuilder::MOVMSKOpOne},

    {OPD(1, 0b01, 0xD8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xDA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VAND, OpSize::i128Bit>},
    {OPD(1, 0b01, 0xDC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUQADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xDE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xDF), 1, &OpDispatchBuilder::VANDNOp},

    {OPD(1, 0b01, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xE2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xE3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VURAVG, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xE4), 1, &OpDispatchBuilder::VPMULHWOp<false>},
    {OPD(1, 0b01, 0xE5), 1, &OpDispatchBuilder::VPMULHWOp<true>},

    {OPD(1, 0b01, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, false>},
    {OPD(1, 0b10, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Int_To_Float<OpSize::i32Bit, true>},
    {OPD(1, 0b11, 0xE6), 1, &OpDispatchBuilder::Vector_CVT_Float_To_Int<OpSize::i64Bit, true>},

    {OPD(1, 0b01, 0xE7), 1, &OpDispatchBuilder::MOVVectorNTOp},

    {OPD(1, 0b01, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VOR, OpSize::i128Bit>},
    {OPD(1, 0b01, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSQADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xEF), 1, &OpDispatchBuilder::AVXVectorXOROp},

    {OPD(1, 0b11, 0xF0), 1, &OpDispatchBuilder::MOVVectorUnalignedOp},
    {OPD(1, 0b01, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLOp, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xF4), 1, &OpDispatchBuilder::VPMULLOp<OpSize::i32Bit, false>},
    {OPD(1, 0b01, 0xF5), 1, &OpDispatchBuilder::VPMADDWDOp},
    {OPD(1, 0b01, 0xF6), 1, &OpDispatchBuilder::VPSADBWOp},
    {OPD(1, 0b01, 0xF7), 1, &OpDispatchBuilder::MASKMOVOp},

    {OPD(1, 0b01, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i32Bit>},
    {OPD(1, 0b01, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSUB, OpSize::i64Bit>},
    {OPD(1, 0b01, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i8Bit>},
    {OPD(1, 0b01, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i16Bit>},
    {OPD(1, 0b01, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VADD, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x00), 1, &OpDispatchBuilder::VPSHUFBOp},
    {OPD(2, 0b01, 0x01), 1, &OpDispatchBuilder::VHADDPOp<IR::OP_VADDP, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x02), 1, &OpDispatchBuilder::VHADDPOp<IR::OP_VADDP, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x03), 1, &OpDispatchBuilder::VPHADDSWOp},
    {OPD(2, 0b01, 0x04), 1, &OpDispatchBuilder::VPMADDUBSWOp},

    {OPD(2, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x06), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPHSUBOp, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x07), 1, &OpDispatchBuilder::VPHSUBSWOp},

    {OPD(2, 0b01, 0x08), 1, &OpDispatchBuilder::VPSIGN<OpSize::i8Bit>},
    {OPD(2, 0b01, 0x09), 1, &OpDispatchBuilder::VPSIGN<OpSize::i16Bit>},
    {OPD(2, 0b01, 0x0A), 1, &OpDispatchBuilder::VPSIGN<OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0B), 1, &OpDispatchBuilder::VPMULHRSWOp},
    {OPD(2, 0b01, 0x0C), 1, &OpDispatchBuilder::VPERMILRegOp<OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0D), 1, &OpDispatchBuilder::VPERMILRegOp<OpSize::i64Bit>},
    {OPD(2, 0b01, 0x0E), 1, &OpDispatchBuilder::VTESTPOp<OpSize::i32Bit>},
    {OPD(2, 0b01, 0x0F), 1, &OpDispatchBuilder::VTESTPOp<OpSize::i64Bit>},

    {OPD(2, 0b01, 0x13), 1, &OpDispatchBuilder::VCVTPH2PSOp},
    {OPD(2, 0b01, 0x16), 1, &OpDispatchBuilder::VPERMDOp},
    {OPD(2, 0b01, 0x17), 1, &OpDispatchBuilder::PTestOp},
    {OPD(2, 0b01, 0x18), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x19), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x1A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i128Bit>},
    {OPD(2, 0b01, 0x1C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x1D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x1E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorUnaryOp, IR::OP_VABS, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x20), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, true>},
    {OPD(2, 0b01, 0x21), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x22), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, true>},
    {OPD(2, 0b01, 0x23), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x24), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, true>},
    {OPD(2, 0b01, 0x25), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, true>},

    {OPD(2, 0b01, 0x28), 1, &OpDispatchBuilder::VPMULLOp<OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x29), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPEQ, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x2A), 1, &OpDispatchBuilder::MOVVectorNTOp},
    {OPD(2, 0b01, 0x2B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPACKUSOp, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x2C), 1, &OpDispatchBuilder::VMASKMOVOp<OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x2D), 1, &OpDispatchBuilder::VMASKMOVOp<OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x2E), 1, &OpDispatchBuilder::VMASKMOVOp<OpSize::i32Bit, true>},
    {OPD(2, 0b01, 0x2F), 1, &OpDispatchBuilder::VMASKMOVOp<OpSize::i64Bit, true>},

    {OPD(2, 0b01, 0x30), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i16Bit, false>},
    {OPD(2, 0b01, 0x31), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x32), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i8Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x33), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i32Bit, false>},
    {OPD(2, 0b01, 0x34), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i16Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x35), 1, &OpDispatchBuilder::ExtendVectorElements<OpSize::i32Bit, OpSize::i64Bit, false>},
    {OPD(2, 0b01, 0x36), 1, &OpDispatchBuilder::VPERMDOp},

    {OPD(2, 0b01, 0x37), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VCMPGT, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x38), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x39), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMIN, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x3B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMIN, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x3D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VSMAX, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x3E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i16Bit>},
    {OPD(2, 0b01, 0x3F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VUMAX, OpSize::i32Bit>},

    {OPD(2, 0b01, 0x40), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorALUOp, IR::OP_VMUL, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x41), 1, &OpDispatchBuilder::PHMINPOSUWOp},
    {OPD(2, 0b01, 0x45), 1, &OpDispatchBuilder::VPSRLVOp},
    {OPD(2, 0b01, 0x46), 1, &OpDispatchBuilder::VPSRAVDOp},
    {OPD(2, 0b01, 0x47), 1, &OpDispatchBuilder::VPSLLVOp},

    {OPD(2, 0b01, 0x58), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i32Bit>},
    {OPD(2, 0b01, 0x59), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i64Bit>},
    {OPD(2, 0b01, 0x5A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i128Bit>},

    {OPD(2, 0b01, 0x78), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i8Bit>},
    {OPD(2, 0b01, 0x79), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VBROADCASTOp, OpSize::i16Bit>},

    {OPD(2, 0b01, 0x8C), 1, &OpDispatchBuilder::VPMASKMOVOp<false>},
    {OPD(2, 0b01, 0x8E), 1, &OpDispatchBuilder::VPMASKMOVOp<true>},

    {OPD(2, 0b01, 0x90), 1, &OpDispatchBuilder::VPGATHER<OpSize::i32Bit>},
    {OPD(2, 0b01, 0x91), 1, &OpDispatchBuilder::VPGATHER<OpSize::i64Bit>},
    {OPD(2, 0b01, 0x92), 1, &OpDispatchBuilder::VPGATHER<OpSize::i32Bit>},
    {OPD(2, 0b01, 0x93), 1, &OpDispatchBuilder::VPGATHER<OpSize::i64Bit>},

    {OPD(2, 0b01, 0x96), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, true, 1, 3, 2>},  // VFMADDSUB
    {OPD(2, 0b01, 0x97), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, false, 1, 3, 2>}, // VFMSUBADD

    {OPD(2, 0b01, 0x98), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, false, 1, 3, 2>},  // VFMADD
    {OPD(2, 0b01, 0x99), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, true, 1, 3, 2>},   // VFMADD
    {OPD(2, 0b01, 0x9A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, false, 1, 3, 2>},  // VFMSUB
    {OPD(2, 0b01, 0x9B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, true, 1, 3, 2>},   // VFMSUB
    {OPD(2, 0b01, 0x9C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, false, 1, 3, 2>}, // VFNMADD
    {OPD(2, 0b01, 0x9D), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, true, 1, 3, 2>},  // VFNMADD
    {OPD(2, 0b01, 0x9E), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, false, 1, 3, 2>}, // VFNMSUB
    {OPD(2, 0b01, 0x9F), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, true, 1, 3, 2>},  // VFNMSUB

    {OPD(2, 0b01, 0xA8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, false, 2, 1, 3>},  // VFMADD
    {OPD(2, 0b01, 0xA9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, true, 2, 1, 3>},   // VFMADD
    {OPD(2, 0b01, 0xAA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, false, 2, 1, 3>},  // VFMSUB
    {OPD(2, 0b01, 0xAB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, true, 2, 1, 3>},   // VFMSUB
    {OPD(2, 0b01, 0xAC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, false, 2, 1, 3>}, // VFNMADD
    {OPD(2, 0b01, 0xAD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, true, 2, 1, 3>},  // VFNMADD
    {OPD(2, 0b01, 0xAE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, false, 2, 1, 3>}, // VFNMSUB
    {OPD(2, 0b01, 0xAF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, true, 2, 1, 3>},  // VFNMSUB

    {OPD(2, 0b01, 0xB8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, false, 2, 3, 1>},  // VFMADD
    {OPD(2, 0b01, 0xB9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLA, true, 2, 3, 1>},   // VFMADD
    {OPD(2, 0b01, 0xBA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, false, 2, 3, 1>},  // VFMSUB
    {OPD(2, 0b01, 0xBB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFMLS, true, 2, 3, 1>},   // VFMSUB
    {OPD(2, 0b01, 0xBC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, false, 2, 3, 1>}, // VFNMADD
    {OPD(2, 0b01, 0xBD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLA, true, 2, 3, 1>},  // VFNMADD
    {OPD(2, 0b01, 0xBE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, false, 2, 3, 1>}, // VFNMSUB
    {OPD(2, 0b01, 0xBF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAImpl, IR::OP_VFNMLS, true, 2, 3, 1>},  // VFNMSUB

    {OPD(2, 0b01, 0xA6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, true, 2, 1, 3>},  // VFMADDSUB
    {OPD(2, 0b01, 0xA7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, false, 2, 1, 3>}, // VFMSUBADD

    {OPD(2, 0b01, 0xB6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, true, 2, 3, 1>},  // VFMADDSUB
    {OPD(2, 0b01, 0xB7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VFMAddSubImpl, false, 2, 3, 1>}, // VFMSUBADD

    {OPD(2, 0b01, 0xDB), 1, &OpDispatchBuilder::AESImcOp},
    {OPD(2, 0b01, 0xDC), 1, &OpDispatchBuilder::VAESEncOp},
    {OPD(2, 0b01, 0xDD), 1, &OpDispatchBuilder::VAESEncLastOp},
    {OPD(2, 0b01, 0xDE), 1, &OpDispatchBuilder::VAESDecOp},
    {OPD(2, 0b01, 0xDF), 1, &OpDispatchBuilder::VAESDecLastOp},

    {OPD(3, 0b01, 0x00), 1, &OpDispatchBuilder::VPERMQOp},
    {OPD(3, 0b01, 0x01), 1, &OpDispatchBuilder::VPERMQOp},
    {OPD(3, 0b01, 0x02), 1, &OpDispatchBuilder::VPBLENDDOp},
    {OPD(3, 0b01, 0x04), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x05), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPERMILImmOp, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x06), 1, &OpDispatchBuilder::VPERM2Op},
    {OPD(3, 0b01, 0x08), 1, &OpDispatchBuilder::AVXVectorRound<OpSize::i32Bit>},
    {OPD(3, 0b01, 0x09), 1, &OpDispatchBuilder::AVXVectorRound<OpSize::i64Bit>},
    {OPD(3, 0b01, 0x0A), 1, &OpDispatchBuilder::AVXInsertScalarRound<OpSize::i32Bit>},
    {OPD(3, 0b01, 0x0B), 1, &OpDispatchBuilder::AVXInsertScalarRound<OpSize::i64Bit>},
    {OPD(3, 0b01, 0x0C), 1, &OpDispatchBuilder::VPBLENDDOp},
    {OPD(3, 0b01, 0x0D), 1, &OpDispatchBuilder::VBLENDPDOp},
    {OPD(3, 0b01, 0x0E), 1, &OpDispatchBuilder::VPBLENDWOp},
    {OPD(3, 0b01, 0x0F), 1, &OpDispatchBuilder::VPALIGNROp},

    {OPD(3, 0b01, 0x14), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i8Bit>},
    {OPD(3, 0b01, 0x15), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i16Bit>},
    {OPD(3, 0b01, 0x16), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x17), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::PExtrOp, OpSize::i32Bit>},

    {OPD(3, 0b01, 0x18), 1, &OpDispatchBuilder::VINSERTOp},
    {OPD(3, 0b01, 0x19), 1, &OpDispatchBuilder::VEXTRACT128Op},
    {OPD(3, 0b01, 0x1D), 1, &OpDispatchBuilder::VCVTPS2PHOp},
    {OPD(3, 0b01, 0x20), 1, &OpDispatchBuilder::VPINSRBOp},
    {OPD(3, 0b01, 0x21), 1, &OpDispatchBuilder::VINSERTPSOp},
    {OPD(3, 0b01, 0x22), 1, &OpDispatchBuilder::VPINSRDQOp},

    {OPD(3, 0b01, 0x38), 1, &OpDispatchBuilder::VINSERTOp},
    {OPD(3, 0b01, 0x39), 1, &OpDispatchBuilder::VEXTRACT128Op},

    {OPD(3, 0b01, 0x40), 1, &OpDispatchBuilder::VDPPOp<OpSize::i32Bit>},
    {OPD(3, 0b01, 0x41), 1, &OpDispatchBuilder::VDPPOp<OpSize::i64Bit>},
    {OPD(3, 0b01, 0x42), 1, &OpDispatchBuilder::VMPSADBWOp},
    {OPD(3, 0b01, 0x44), 1, &OpDispatchBuilder::VPCLMULQDQOp},

    {OPD(3, 0b01, 0x46), 1, &OpDispatchBuilder::VPERM2Op},

    {OPD(3, 0b01, 0x4A), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i32Bit>},
    {OPD(3, 0b01, 0x4B), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i64Bit>},
    {OPD(3, 0b01, 0x4C), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::AVXVectorVariableBlend, OpSize::i8Bit>},

    {OPD(3, 0b01, 0x60), 1, &OpDispatchBuilder::VPCMPESTRMOp},
    {OPD(3, 0b01, 0x61), 1, &OpDispatchBuilder::VPCMPESTRIOp},
    {OPD(3, 0b01, 0x62), 1, &OpDispatchBuilder::VPCMPISTRMOp},
    {OPD(3, 0b01, 0x63), 1, &OpDispatchBuilder::VPCMPISTRIOp},

    {OPD(3, 0b01, 0xDF), 1, &OpDispatchBuilder::AESKeyGenAssist},
  };
#undef OPD

#define OPD(group, pp, opcode) (((group - X86Tables::TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode))
  constexpr DispatchTableEntry TableGroupOps[] {
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i16Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i16Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_12, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, OpSize::i16Bit>},

    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i32Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i32Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_13, 1, 0b100), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRAIOp, OpSize::i32Bit>},

    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b010), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSRLIOp, OpSize::i64Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b011), 1, &OpDispatchBuilder::VPSRLDQOp},
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b110), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::VPSLLIOp, OpSize::i64Bit>},
    {OPD(X86Tables::TYPE_VEX_GROUP_14, 1, 0b111), 1, &OpDispatchBuilder::VPSLLDQOp},

    {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b010), 1, &OpDispatchBuilder::LDMXCSR},
    {OPD(X86Tables::TYPE_VEX_GROUP_15, 0, 0b011), 1, &OpDispatchBuilder::STMXCSR},
  };
#undef OPD
}

auto BaseTableLambda = [](const auto RuntimeTable) consteval {
  std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> Table{};
#define OPD(map_select, pp, opcode) (((map_select - 1) << 10) | (pp << 8) | (opcode))
  constexpr U16U8InfoStruct VEXTable[] = {
    // Map 0 (Reserved)
    // VEX Map 1
    {OPD(1, 0b00, 0x10), 1, X86InstInfo{"VMOVUPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x10), 1, X86InstInfo{"VMOVUPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x10), 1, X86InstInfo{"VMOVSS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x10), 1, X86InstInfo{"VMOVSD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x11), 1, X86InstInfo{"VMOVUPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x11), 1, X86InstInfo{"VMOVUPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x11), 1, X86InstInfo{"VMOVSS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x11), 1, X86InstInfo{"VMOVSD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x12), 1, X86InstInfo{"VMOVLPS",TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b01, 0x12), 1, X86InstInfo{"VMOVLPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b10, 0x12), 1, X86InstInfo{"VMOVSLDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0x12), 1, X86InstInfo{"VMOVDDUP",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x13), 1, X86InstInfo{"VMOVLPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b01, 0x13), 1, X86InstInfo{"VMOVLPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},

    {OPD(1, 0b00, 0x14), 1, X86InstInfo{"VUNPCKLPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x14), 1, X86InstInfo{"VUNPCKLPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x15), 1, X86InstInfo{"VUNPCKHPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x15), 1, X86InstInfo{"VUNPCKHPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x16), 1, X86InstInfo{"VMOV(L)HPS",TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b01, 0x16), 1, X86InstInfo{"VMOVHPD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_1ST_SRC | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b10, 0x16), 1, X86InstInfo{"VMOVSHDUP", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x17), 1, X86InstInfo{"VMOVHPS",   TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},
    {OPD(1, 0b01, 0x17), 1, X86InstInfo{"VMOVHPD",   TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},

    {OPD(1, 0b00, 0x50), 1, X86InstInfo{"VMOVMSKPS", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},
    {OPD(1, 0b01, 0x50), 1, X86InstInfo{"VMOVMSKPD", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR, 0}},

    {OPD(1, 0b00, 0x51), 1, X86InstInfo{"VSQRTPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x51), 1, X86InstInfo{"VSQRTPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x51), 1, X86InstInfo{"VSQRTSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x51), 1, X86InstInfo{"VSQRTSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x52), 1, X86InstInfo{"VRSQRTPS",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x52), 1, X86InstInfo{"VRSQRTSS",  TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x53), 1, X86InstInfo{"VRCPPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x53), 1, X86InstInfo{"VRCPSS",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x54), 1, X86InstInfo{"VANDPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x54), 1, X86InstInfo{"VANDPD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x55), 1, X86InstInfo{"VANDNPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x55), 1, X86InstInfo{"VANDNPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x56), 1, X86InstInfo{"VORPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x56), 1, X86InstInfo{"VORPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x57), 1, X86InstInfo{"VXORPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x57), 1, X86InstInfo{"VXORPD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x60), 1, X86InstInfo{"VPUNPCKLBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x61), 1, X86InstInfo{"VPUNPCKLWD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x62), 1, X86InstInfo{"VPUNPCKLDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x63), 1, X86InstInfo{"VPACKSSWB",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x64), 1, X86InstInfo{"VPCMPGTB",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x65), 1, X86InstInfo{"VPCMPGTW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x66), 1, X86InstInfo{"VPCMPGTD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x67), 1, X86InstInfo{"VPACKUSWB",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x70), 1, X86InstInfo{"VPSHUFD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b10, 0x70), 1, X86InstInfo{"VPSHUFHW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b11, 0x70), 1, X86InstInfo{"VPSHUFLW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},

    {OPD(1, 0b01, 0x71), 1, X86InstInfo{"",           TYPE_VEX_GROUP_12, FLAGS_NONE, 0}}, // VEX Group 12
    {OPD(1, 0b01, 0x72), 1, X86InstInfo{"",           TYPE_VEX_GROUP_13, FLAGS_NONE, 0}}, // VEX Group 13
    {OPD(1, 0b01, 0x73), 1, X86InstInfo{"",           TYPE_VEX_GROUP_14, FLAGS_NONE, 0}}, // VEX Group 14

    {OPD(1, 0b01, 0x74), 1, X86InstInfo{"VPCMPEQB",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x75), 1, X86InstInfo{"VPCMPEQW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x76), 1, X86InstInfo{"VPCMPEQD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x77), 1, X86InstInfo{"VZERO*",     TYPE_INST, GenFlagsDstSize(SIZE_128BIT), 0}},

    {OPD(1, 0b00, 0xC2), 1, X86InstInfo{"VCMPccPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b01, 0xC2), 1, X86InstInfo{"VCMPccPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b10, 0xC2), 1, X86InstInfo{"VCMPccSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_L_IGNORE | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b11, 0xC2), 1, X86InstInfo{"VCMPccSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_L_IGNORE | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},

    {OPD(1, 0b01, 0xC4), 1, X86InstInfo{"VPINSRW",    TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_SF_SRC_GPR | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},
    {OPD(1, 0b01, 0xC5), 1, X86InstInfo{"VPEXTRW",    TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},

    {OPD(1, 0b00, 0xC6), 1, X86InstInfo{"VSHUFPS",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(1, 0b01, 0xC6), 1, X86InstInfo{"VSHUFPD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},

    // The above ops are defined from `Table A-17. VEX Opcode Map 1, Low Nibble = [0h:7h]` of AMD Architecture programmer's manual Volume 3
    // This table doesn't state which VEX.pp is for which instruction
    // XXX: Confirm all the above encoding opcodes

    {OPD(1, 0b00, 0x28), 1, X86InstInfo{"VMOVAPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x28), 1, X86InstInfo{"VMOVAPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x29), 1, X86InstInfo{"VMOVAPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x29), 1, X86InstInfo{"VMOVAPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b10, 0x2A), 1, X86InstInfo{"VCVTSI2SS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x2A), 1, X86InstInfo{"VCVTSI2SD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x2B), 1, X86InstInfo{"VMOVNTPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x2B), 1, X86InstInfo{"VMOVNTPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b10, 0x2C), 1, X86InstInfo{"VCVTTSS2SI",   TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x2C), 1, X86InstInfo{"VCVTTSD2SI",   TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b10, 0x2D), 1, X86InstInfo{"VCVTSS2SI",   TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x2D), 1, X86InstInfo{"VCVTSD2SI",   TYPE_INST, FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x2E), 1, X86InstInfo{"VUCOMISS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b01, 0x2E), 1, X86InstInfo{"VUCOMISD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x2F), 1, X86InstInfo{"VCOMISS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b01, 0x2F), 1, X86InstInfo{"VCOMISD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x58), 1, X86InstInfo{"VADDPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x58), 1, X86InstInfo{"VADDPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x58), 1, X86InstInfo{"VADDSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x58), 1, X86InstInfo{"VADDSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x59), 1, X86InstInfo{"VMULPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x59), 1, X86InstInfo{"VMULPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x59), 1, X86InstInfo{"VMULSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x59), 1, X86InstInfo{"VMULSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x5A), 1, X86InstInfo{"VCVTPS2PD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5A), 1, X86InstInfo{"VCVTPD2PS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5A), 1, X86InstInfo{"VCVTSS2SD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_L_IGNORE | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0x5A), 1, X86InstInfo{"VCVTSD2SS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_L_IGNORE |FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x5B), 1, X86InstInfo{"VCVTDQ2PS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5B), 1, X86InstInfo{"VCVTPS2DQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5B), 1, X86InstInfo{"VCVTTPS2DQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0x5C), 1, X86InstInfo{"VSUBPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5C), 1, X86InstInfo{"VSUBPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5C), 1, X86InstInfo{"VSUBSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x5C), 1, X86InstInfo{"VSUBSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x5D), 1, X86InstInfo{"VMINPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5D), 1, X86InstInfo{"VMINPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5D), 1, X86InstInfo{"VMINSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x5D), 1, X86InstInfo{"VMINSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x5E), 1, X86InstInfo{"VDIVPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5E), 1, X86InstInfo{"VDIVPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5E), 1, X86InstInfo{"VDIVSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x5E), 1, X86InstInfo{"VDIVSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b00, 0x5F), 1, X86InstInfo{"VMAXPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x5F), 1, X86InstInfo{"VMAXPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x5F), 1, X86InstInfo{"VMAXSS",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(1, 0b11, 0x5F), 1, X86InstInfo{"VMAXSD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(1, 0b01, 0x68), 1, X86InstInfo{"VPUNPCKHBW",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x69), 1, X86InstInfo{"VPUNPCKHWD",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x6A), 1, X86InstInfo{"VPUNPCKHDQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x6B), 1, X86InstInfo{"VPACKSSDW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x6C), 1, X86InstInfo{"VPUNPCKLQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x6D), 1, X86InstInfo{"VPUNPCKHQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0x6E), 1, X86InstInfo{"VMOV*",       TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0 | FLAGS_SF_SRC_GPR, 0}},

    {OPD(1, 0b01, 0x6F), 1, X86InstInfo{"VMOVDQA",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x6F), 1, X86InstInfo{"VMOVDQU",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x7C), 1, X86InstInfo{"VHADDPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0x7C), 1, X86InstInfo{"VHADDPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x7D), 1, X86InstInfo{"VHSUBPD",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0x7D), 1, X86InstInfo{"VHSUBPS",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x7E), 1, X86InstInfo{"VMOV*",     TYPE_INST, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_VEX_L_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x7E), 1, X86InstInfo{"VMOVQ",     TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0x7F), 1, X86InstInfo{"VMOVDQA",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0x7F), 1, X86InstInfo{"VMOVDQU",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b00, 0xAE), 1, X86InstInfo{"",     TYPE_VEX_GROUP_15, FLAGS_NONE, 0}}, // VEX Group 15
    {OPD(1, 0b01, 0xAE), 1, X86InstInfo{"",     TYPE_VEX_GROUP_15, FLAGS_NONE, 0}}, // VEX Group 15
    {OPD(1, 0b10, 0xAE), 1, X86InstInfo{"",     TYPE_VEX_GROUP_15, FLAGS_NONE, 0}}, // VEX Group 15
    {OPD(1, 0b11, 0xAE), 1, X86InstInfo{"",     TYPE_VEX_GROUP_15, FLAGS_NONE, 0}}, // VEX Group 15

    {OPD(1, 0b01, 0xD0), 1, X86InstInfo{"VADDSUBPD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0xD0), 1, X86InstInfo{"VADDSUBPS",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xD1), 1, X86InstInfo{"VPSRLW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD2), 1, X86InstInfo{"VPSRLD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD3), 1, X86InstInfo{"VPSRLQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD4), 1, X86InstInfo{"VPADDQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD5), 1, X86InstInfo{"VPMULLW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD6), 1, X86InstInfo{"VMOVQ",       TYPE_INST, GenFlagsSameSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_L_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD7), 1, X86InstInfo{"VPMOVMSKB",   TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_SF_DST_GPR | FLAGS_SF_MOD_REG_ONLY, 0}},

    {OPD(1, 0b01, 0xD8), 1, X86InstInfo{"VPSUBUSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xD9), 1, X86InstInfo{"VPSUBUSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDA), 1, X86InstInfo{"VPMINUB",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDB), 1, X86InstInfo{"VPAND",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDC), 1, X86InstInfo{"VPADDUSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDD), 1, X86InstInfo{"VPADDUSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDE), 1, X86InstInfo{"VPMAXUB",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xDF), 1, X86InstInfo{"VPANDN",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xE0), 1, X86InstInfo{"VPAVGB",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE1), 1, X86InstInfo{"VPSRAW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE2), 1, X86InstInfo{"VPSRAD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE3), 1, X86InstInfo{"VPAVGW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE4), 1, X86InstInfo{"VPMULHUW",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE5), 1, X86InstInfo{"VPMULHW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xE6), 1, X86InstInfo{"VCVTTPD2DQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b10, 0xE6), 1, X86InstInfo{"VCVTDQ2PD",   TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b11, 0xE6), 1, X86InstInfo{"VCVTPD2DQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xE7), 1, X86InstInfo{"VMOVNTDQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xE8), 1, X86InstInfo{"VPSUBSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xE9), 1, X86InstInfo{"VPSUBSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xEA), 1, X86InstInfo{"VPMINSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xEB), 1, X86InstInfo{"VPOR",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xEC), 1, X86InstInfo{"VPADDSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xED), 1, X86InstInfo{"VPADDSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xEE), 1, X86InstInfo{"VPMAXSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xEF), 1, X86InstInfo{"VPXOR",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b11, 0xF0), 1, X86InstInfo{"VLDDQU",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0}},

    {OPD(1, 0b01, 0xF1), 1, X86InstInfo{"VPSLLW",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF2), 1, X86InstInfo{"VPSLLD",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF3), 1, X86InstInfo{"VPSLLQ",      TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF4), 1, X86InstInfo{"VPMULUDQ",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF5), 1, X86InstInfo{"VPMADDWD",    TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF6), 1, X86InstInfo{"VPSADBW",     TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF7), 1, X86InstInfo{"VMASKMOVDQU", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},

    {OPD(1, 0b01, 0xF8), 1, X86InstInfo{"VPSUBB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xF9), 1, X86InstInfo{"VPSUBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xFA), 1, X86InstInfo{"VPSUBD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xFB), 1, X86InstInfo{"VPSUBQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xFC), 1, X86InstInfo{"VPADDB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xFD), 1, X86InstInfo{"VPADDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(1, 0b01, 0xFE), 1, X86InstInfo{"VPADDD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    // VEX Map 2
    {OPD(2, 0b01, 0x00), 1, X86InstInfo{"VPSHUFB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x01), 1, X86InstInfo{"VPHADDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x02), 1, X86InstInfo{"VPHADDD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x03), 1, X86InstInfo{"VPHADDSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x04), 1, X86InstInfo{"VPMADDUBSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x05), 1, X86InstInfo{"VPHSUBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x06), 1, X86InstInfo{"VPHSUBD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x07), 1, X86InstInfo{"VPHSUBSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x08), 1, X86InstInfo{"VPSIGNB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x09), 1, X86InstInfo{"VPSIGNW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0A), 1, X86InstInfo{"VPSIGND", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0B), 1, X86InstInfo{"VPMULHRSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0C), 1, X86InstInfo{"VPERMILPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0D), 1, X86InstInfo{"VPERMILPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0E), 1, X86InstInfo{"VTESTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x0F), 1, X86InstInfo{"VTESTPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x13), 1, X86InstInfo{"VCVTPH2PS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x16), 1, X86InstInfo{"VPERMPS", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x17), 1, X86InstInfo{"VPTEST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x18), 1, X86InstInfo{"VBROADCASTSS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x19), 1, X86InstInfo{"VBROADCASTSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x1A), 1, X86InstInfo{"VBROADCASTF128", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_SF_MOD_MEM_ONLY | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x1C), 1, X86InstInfo{"VPABSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x1D), 1, X86InstInfo{"VPABSW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x1E), 1, X86InstInfo{"VPABSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x20), 1, X86InstInfo{"VPMOVSXBW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x21), 1, X86InstInfo{"VPMOVSXBD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x22), 1, X86InstInfo{"VPMOVSXBQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x23), 1, X86InstInfo{"VPMOVSXWD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x24), 1, X86InstInfo{"VPMOVSXWQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x25), 1, X86InstInfo{"VPMOVSXDQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x28), 1, X86InstInfo{"VPMULDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x29), 1, X86InstInfo{"VPCMPEQQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2A), 1, X86InstInfo{"VMOVNTDQA", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2B), 1, X86InstInfo{"VPACKUSDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2C), 1, X86InstInfo{"VMASKMOVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2D), 1, X86InstInfo{"VMASKMOVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2E), 1, X86InstInfo{"VMASKMOVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x2F), 1, X86InstInfo{"VMASKMOVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x30), 1, X86InstInfo{"VPMOVZXBW", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x31), 1, X86InstInfo{"VPMOVZXBD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x32), 1, X86InstInfo{"VPMOVZXBQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_16BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x33), 1, X86InstInfo{"VPMOVZXWD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x34), 1, X86InstInfo{"VPMOVZXWQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x35), 1, X86InstInfo{"VPMOVZXDQ", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x36), 1, X86InstInfo{"VPERMD", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x37), 1, X86InstInfo{"VPCMPGTQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x38), 1, X86InstInfo{"VPMINSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x39), 1, X86InstInfo{"VPMINSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3A), 1, X86InstInfo{"VPMINUW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3B), 1, X86InstInfo{"VPMINUD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3C), 1, X86InstInfo{"VPMAXSB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3D), 1, X86InstInfo{"VPMAXSD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3E), 1, X86InstInfo{"VPMAXUW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x3F), 1, X86InstInfo{"VPMAXUD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x40), 1, X86InstInfo{"VPMULLD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x41), 1, X86InstInfo{"VPHMINPOSUW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 0}},
    {OPD(2, 0b01, 0x45), 1, X86InstInfo{"VPSRLV", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x46), 1, X86InstInfo{"VPSRAVD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x47), 1, X86InstInfo{"VPSLLV", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x58), 1, X86InstInfo{"VPBROADCASTD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x59), 1, X86InstInfo{"VPBROADCASTQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x5A), 1, X86InstInfo{"VBROADCASTI128", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_SF_MOD_MEM_ONLY | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x78), 1, X86InstInfo{"VPBROADCASTB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x79), 1, X86InstInfo{"VPBROADCASTW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x8C), 1, X86InstInfo{"VPMASKMOV", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x8E), 1, X86InstInfo{"VPMASKMOV", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_MEM_ONLY | FLAGS_SF_MOD_DST | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x90), 1, X86InstInfo{"VPGATHERDD/Q", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_2ND_SRC | FLAGS_VEX_VSIB | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x91), 1, X86InstInfo{"VPGATHERQD/Q", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_2ND_SRC | FLAGS_VEX_VSIB | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x92), 1, X86InstInfo{"VGATHERDPS/D", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_2ND_SRC | FLAGS_VEX_VSIB | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x93), 1, X86InstInfo{"VGATHERQPS/D", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_2ND_SRC | FLAGS_VEX_VSIB | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x96), 1, X86InstInfo{"VFMADDSUB132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x97), 1, X86InstInfo{"VFMSUBADD132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0x98), 1, X86InstInfo{"VFMADD132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x99), 1, X86InstInfo{"VFMADD132_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0x9A), 1, X86InstInfo{"VFMSUB132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x9B), 1, X86InstInfo{"VFMSUB132_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0x9C), 1, X86InstInfo{"VFNMADD132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x9D), 1, X86InstInfo{"VFNMADD132_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0x9E), 1, X86InstInfo{"VFNMSUB132", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0x9F), 1, X86InstInfo{"VFNMSUB132_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(2, 0b01, 0xA8), 1, X86InstInfo{"VFMADD213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xA9), 1, X86InstInfo{"VFMADD213_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xAA), 1, X86InstInfo{"VFMSUB213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xAB), 1, X86InstInfo{"VFMSUB213_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xAC), 1, X86InstInfo{"VFNMADD213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xAD), 1, X86InstInfo{"VFNMADD213_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xAE), 1, X86InstInfo{"VFNMSUB213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xAF), 1, X86InstInfo{"VFNMSUB213_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(2, 0b01, 0xB8), 1, X86InstInfo{"VFMADD231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xB9), 1, X86InstInfo{"VFMADD231_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xBA), 1, X86InstInfo{"VFMSUB231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xBB), 1, X86InstInfo{"VFMSUB231_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xBC), 1, X86InstInfo{"VFNMADD231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xBD), 1, X86InstInfo{"VFNMADD231_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},
    {OPD(2, 0b01, 0xBE), 1, X86InstInfo{"VFNMSUB231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xBF), 1, X86InstInfo{"VFNMSUB231_S", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_IGNORE, 0}},

    {OPD(2, 0b01, 0xA6), 1, X86InstInfo{"VFMADDSUB213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xA7), 1, X86InstInfo{"VFMSUBADD213", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0xB6), 1, X86InstInfo{"VFMADDSUB231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xB7), 1, X86InstInfo{"VFMSUBADD231", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b01, 0xDB), 1, X86InstInfo{"VAESIMC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xDC), 1, X86InstInfo{"VAESENC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xDD), 1, X86InstInfo{"VAESENCLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xDE), 1, X86InstInfo{"VAESDEC", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},
    {OPD(2, 0b01, 0xDF), 1, X86InstInfo{"VAESDECLAST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 0}},

    {OPD(2, 0b00, 0xF2), 1, X86InstInfo{"ANDN", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_1ST_SRC, 0}},

    {OPD(2, 0b00, 0xF3), 1, X86InstInfo{"", TYPE_VEX_GROUP_17, FLAGS_NONE, 0}}, // VEX Group 17
    {OPD(2, 0b01, 0xF3), 1, X86InstInfo{"", TYPE_VEX_GROUP_17, FLAGS_NONE, 0}}, // VEX Group 17
    {OPD(2, 0b10, 0xF3), 1, X86InstInfo{"", TYPE_VEX_GROUP_17, FLAGS_NONE, 0}}, // VEX Group 17
    {OPD(2, 0b11, 0xF3), 1, X86InstInfo{"", TYPE_VEX_GROUP_17, FLAGS_NONE, 0}}, // VEX Group 17

    {OPD(2, 0b00, 0xF5), 1, X86InstInfo{"BZHI", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_2ND_SRC, 0}},
    // AMD reference manual is incorrect. PEXT actually maps to 0b10, not 0b01.
    {OPD(2, 0b10, 0xF5), 1, X86InstInfo{"PEXT", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC, 0}},
    {OPD(2, 0b11, 0xF5), 1, X86InstInfo{"PDEP", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC, 0}},

    {OPD(2, 0b11, 0xF6), 1, X86InstInfo{"MULX", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC, 0}},

    {OPD(2, 0b00, 0xF7), 1, X86InstInfo{"BEXTR", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_2ND_SRC, 0}},
    {OPD(2, 0b01, 0xF7), 1, X86InstInfo{"SHLX", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_2ND_SRC, 0}},
    {OPD(2, 0b10, 0xF7), 1, X86InstInfo{"SARX", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_2ND_SRC, 0}},
    {OPD(2, 0b11, 0xF7), 1, X86InstInfo{"SHRX", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_2ND_SRC, 0}},

    // VEX Map 3
    {OPD(3, 0b01, 0x00), 1, X86InstInfo{"VPERMQ", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_1 | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x01), 1, X86InstInfo{"VPERMPD", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_1 | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x02), 1, X86InstInfo{"VPBLENDD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x04), 1, X86InstInfo{"VPERMILPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x05), 1, X86InstInfo{"VPERMILPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x06), 1, X86InstInfo{"VPERM2F128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_REX_W_0 | FLAGS_XMM_FLAGS | FLAGS_VEX_L_1, 1}},

    {OPD(3, 0b01, 0x08), 1, X86InstInfo{"VROUNDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x09), 1, X86InstInfo{"VROUNDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0A), 1, X86InstInfo{"VROUNDSS", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0B), 1, X86InstInfo{"VROUNDSD", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_64BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0C), 1, X86InstInfo{"VBLENDPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0D), 1, X86InstInfo{"VBLENDPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0E), 1, X86InstInfo{"VPBLENDW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x0F), 1, X86InstInfo{"VPALIGNR", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x14), 1, X86InstInfo{"VPEXTRB", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x15), 1, X86InstInfo{"VPEXTRW", TYPE_INST, GenFlagsSizes(SIZE_16BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x16), 1, X86InstInfo{"VPEXTRD", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x17), 1, X86InstInfo{"VEXTRACTPS", TYPE_INST, GenFlagsSizes(SIZE_32BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_SF_MOD_DST | FLAGS_SF_DST_GPR | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x18), 1, X86InstInfo{"VINSERTF128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x19), 1, X86InstInfo{"VEXTRACTF128", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x1D), 1, X86InstInfo{"VCVTPS2PH", TYPE_INST, GenFlagsSizes(SIZE_64BIT, SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x20), 1, X86InstInfo{"VPINSRB", TYPE_INST, GenFlagsDstSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1}},
    {OPD(3, 0b01, 0x21), 1, X86InstInfo{"VINSERTPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x22), 1, X86InstInfo{"VPINSR{D,Q}", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_SF_SRC_GPR, 1}},

    {OPD(3, 0b01, 0x38), 1, X86InstInfo{"VINSERTI128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x39), 1, X86InstInfo{"VEXTRACTI128", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_SF_MOD_DST | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x40), 1, X86InstInfo{"VDPPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x41), 1, X86InstInfo{"VDPPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},
    {OPD(3, 0b01, 0x42), 1, X86InstInfo{"VMPSADBW", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x44), 1, X86InstInfo{"VPCLMULQDQ", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x46), 1, X86InstInfo{"VPERM2I128", TYPE_INST, GenFlagsSameSize(SIZE_256BIT) | FLAGS_MODRM | FLAGS_VEX_L_1 | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x4A), 1, X86InstInfo{"VBLENDVPS", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x4B), 1, X86InstInfo{"VBLENDVPD", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},
    {OPD(3, 0b01, 0x4C), 1, X86InstInfo{"VPBLENDVB", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_REX_W_0 | FLAGS_VEX_1ST_SRC | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b01, 0x5C), 1, X86InstInfo{"VFMADDSUBPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x5D), 1, X86InstInfo{"VFMADDSUBPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x5E), 1, X86InstInfo{"VFMSUBADDPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x5F), 1, X86InstInfo{"VFMSUBADDPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4

    {OPD(3, 0b01, 0x60), 1, X86InstInfo{"VPCMPESTRM", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},
    {OPD(3, 0b01, 0x61), 1, X86InstInfo{"VPCMPESTRI", TYPE_INST, GenFlagsSizes(SIZE_128BIT, SIZE_32BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},
    {OPD(3, 0b01, 0x62), 1, X86InstInfo{"VPCMPISTRM", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},
    {OPD(3, 0b01, 0x63), 1, X86InstInfo{"VPCMPISTRI", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_XMM_FLAGS | FLAGS_VEX_L_0, 1}},

    {OPD(3, 0b01, 0x68), 1, X86InstInfo{"VFMADDPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x69), 1, X86InstInfo{"VFMADDPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6A), 1, X86InstInfo{"VFMADDSS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6B), 1, X86InstInfo{"VFMADDSD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6C), 1, X86InstInfo{"VFMSUBPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6D), 1, X86InstInfo{"VFMSUBPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6E), 1, X86InstInfo{"VFMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x6F), 1, X86InstInfo{"VFMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4

    {OPD(3, 0b01, 0x78), 1, X86InstInfo{"VFNMADDPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x79), 1, X86InstInfo{"VFNMADDPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7A), 1, X86InstInfo{"VFNMADDSS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7B), 1, X86InstInfo{"VFNMADDSD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7C), 1, X86InstInfo{"VFNMSUBPS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7D), 1, X86InstInfo{"VFNMSUBPD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7E), 1, X86InstInfo{"VFNMSUBSS", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4
    {OPD(3, 0b01, 0x7F), 1, X86InstInfo{"VFNMSUBSD", TYPE_UNDEC, FLAGS_NONE, 0}}, ///< FMA4

    {OPD(3, 0b01, 0xDF), 1, X86InstInfo{"VAESKEYGENASSIST", TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_VEX_L_0 | FLAGS_XMM_FLAGS, 1}},

    {OPD(3, 0b11, 0xF0), 1, X86InstInfo{"RORX", TYPE_INST, FLAGS_MODRM | FLAGS_VEX_L_0, 1}},

    // VEX Map 4 - 31 (Reserved)
  };
#undef OPD

  GenerateTable(Table.data(), VEXTable, std::size(VEXTable));

  IR::InstallToTable(Table, IR::OpDispatch_VEXTable);
  IR::InstallToTable(Table, RuntimeTable);
  return Table;
};

auto GroupTableLambda = [](const auto RuntimeTable) consteval {
  std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> Table{};

#define OPD(group, pp, opcode) (((group - TYPE_VEX_GROUP_12) << 4) | (pp << 3) | (opcode))
  constexpr U8U8InfoStruct VEXGroupTable[] = {
    {OPD(TYPE_VEX_GROUP_12, 1, 0b010), 1, X86InstInfo{"VPSRLW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_12, 1, 0b100), 1, X86InstInfo{"VPSRAW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_12, 1, 0b110), 1, X86InstInfo{"VPSLLW",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},

    {OPD(TYPE_VEX_GROUP_13, 1, 0b010), 1, X86InstInfo{"VPSRLD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_13, 1, 0b100), 1, X86InstInfo{"VPSRAD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_13, 1, 0b110), 1, X86InstInfo{"VPSLLD",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},

    {OPD(TYPE_VEX_GROUP_14, 1, 0b010), 1, X86InstInfo{"VPSRLQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_14, 1, 0b011), 1, X86InstInfo{"VPSRLDQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_14, 1, 0b110), 1, X86InstInfo{"VPSLLQ",   TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},
    {OPD(TYPE_VEX_GROUP_14, 1, 0b111), 1, X86InstInfo{"VPSLLDQ",  TYPE_INST, GenFlagsSameSize(SIZE_128BIT) | FLAGS_MODRM | FLAGS_SF_MOD_REG_ONLY | FLAGS_VEX_DST | FLAGS_XMM_FLAGS, 1}},

    {OPD(TYPE_VEX_GROUP_15, 0, 0b010), 1, X86InstInfo{"VLDMXCSR", TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_L_0 | FLAGS_SF_MOD_MEM_ONLY, 0}},
    {OPD(TYPE_VEX_GROUP_15, 0, 0b011), 1, X86InstInfo{"VSTMXCSR", TYPE_INST, GenFlagsSameSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_VEX_L_0 | FLAGS_SF_MOD_MEM_ONLY, 0}},

    {OPD(TYPE_VEX_GROUP_17, 0, 0b001), 1, X86InstInfo{"BLSR",     TYPE_INST, FLAGS_MODRM | FLAGS_VEX_DST, 0}},
    {OPD(TYPE_VEX_GROUP_17, 0, 0b010), 1, X86InstInfo{"BLSMSK",   TYPE_INST, FLAGS_MODRM | FLAGS_VEX_DST, 0}},
    {OPD(TYPE_VEX_GROUP_17, 0, 0b011), 1, X86InstInfo{"BLSI",     TYPE_INST, FLAGS_MODRM | FLAGS_VEX_DST, 0}},
  };
#undef OPD

  GenerateTable(Table.data(), VEXGroupTable, std::size(VEXGroupTable));

  IR::InstallToTable(Table, IR::OpDispatch_VEXGroupTable);
  IR::InstallToTable(Table, RuntimeTable);
  return Table;
};

const std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps = BaseTableLambda(std::to_array(AVX256::BaseTable));
const std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> VEXTableGroupOps = GroupTableLambda(std::to_array(AVX256::TableGroupOps));

const std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps_AVX128 = BaseTableLambda(std::to_array(AVX128::BaseTable));
const std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> VEXTableGroupOps_AVX128 = GroupTableLambda(std::to_array(AVX128::TableGroupOps));
}


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/X86Tables.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#pragma once

#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <array>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>

namespace FEXCore::IR {
class OpDispatchBuilder;
}

namespace FEXCore::X86Tables {
struct X86InstInfo;

namespace DecodeFlags {
  constexpr uint32_t FLAG_OPERAND_SIZE = (1 << 0);
  constexpr uint32_t FLAG_ADDRESS_SIZE = (1 << 1);
  constexpr uint32_t FLAG_LOCK = (1 << 2);
  constexpr uint32_t FLAG_LEGACY_PREFIX = (1 << 3);
  constexpr uint32_t FLAG_REX_PREFIX = (1 << 4);
  constexpr uint32_t FLAG_VSIB_BYTE = (1 << 5);
  constexpr uint32_t FLAG_OPTION_AVX_W = (1 << 6);
  constexpr uint32_t FLAG_REX_WIDENING = (1 << 7);
  constexpr uint32_t FLAG_REX_XGPR_B = (1 << 8);
  constexpr uint32_t FLAG_REX_XGPR_X = (1 << 9);
  constexpr uint32_t FLAG_REX_XGPR_R = (1 << 10);
  constexpr uint32_t FLAG_NO_PREFIX = (0b000 << 11);
  constexpr uint32_t FLAG_ES_PREFIX = (0b001 << 11);
  constexpr uint32_t FLAG_CS_PREFIX = (0b010 << 11);
  constexpr uint32_t FLAG_SS_PREFIX = (0b011 << 11);
  constexpr uint32_t FLAG_DS_PREFIX = (0b100 << 11);
  constexpr uint32_t FLAG_FS_PREFIX = (0b101 << 11);
  constexpr uint32_t FLAG_GS_PREFIX = (0b110 << 11);
  constexpr uint32_t FLAG_SEGMENTS = (0b111 << 11);
  constexpr uint32_t FLAG_FORCE_TSO = (1 << 14);
  constexpr uint32_t FLAG_DECODED_MODRM = (1 << 15);
  constexpr uint32_t FLAG_DECODED_SIB = (1 << 16);
  constexpr uint32_t FLAG_REP_PREFIX = (1 << 17);
  constexpr uint32_t FLAG_REPNE_PREFIX = (1 << 18);
  // Size flags
  constexpr uint32_t FLAG_SIZE_DST_OFF = 19;
  constexpr uint32_t FLAG_SIZE_SRC_OFF = FLAG_SIZE_DST_OFF + 3;
  constexpr uint32_t SIZE_MASK = 0b111;
  constexpr uint32_t SIZE_DEF = 0b000; // This should be invalid past decoding
  constexpr uint32_t SIZE_8BIT = 0b001;
  constexpr uint32_t SIZE_16BIT = 0b010;
  constexpr uint32_t SIZE_32BIT = 0b011;
  constexpr uint32_t SIZE_64BIT = 0b100;
  constexpr uint32_t SIZE_128BIT = 0b101;
  constexpr uint32_t SIZE_256BIT = 0b110;

  constexpr uint32_t FLAG_OPADDR_OFF = (FLAG_SIZE_SRC_OFF + 3);
  constexpr uint32_t FLAG_OPADDR_STACKSIZE = 4; // Two level deep stack
  constexpr uint32_t FLAG_OPADDR_FLAG_SIZE = 2;
  constexpr uint32_t FLAG_OPADDR_MASK = (((1 << FLAG_OPADDR_STACKSIZE) - 1) << FLAG_OPADDR_OFF);

  // 00 = NONE
  constexpr uint32_t FLAG_OPERAND_SIZE_LAST = 0b01;
  constexpr uint32_t FLAG_WIDENING_SIZE_LAST = 0b10;

  constexpr uint32_t GetSizeDstFlags(uint32_t Flags) {
    return (Flags >> FLAG_SIZE_DST_OFF) & SIZE_MASK;
  }
  constexpr uint32_t GetSizeSrcFlags(uint32_t Flags) {
    return (Flags >> FLAG_SIZE_SRC_OFF) & SIZE_MASK;
  }

  constexpr uint32_t GenSizeDstSize(uint32_t Size) {
    return Size << FLAG_SIZE_DST_OFF;
  }
  constexpr uint32_t GenSizeSrcSize(uint32_t Size) {
    return Size << FLAG_SIZE_SRC_OFF;
  }

  constexpr uint32_t GetOpAddr(uint32_t Flags, uint32_t Index) {
    return (((Flags & FLAG_OPADDR_MASK) >> FLAG_OPADDR_OFF) >> (Index * 2)) & ((1 << FLAG_OPADDR_FLAG_SIZE) - 1);
  }

  inline void PushOpAddr(uint32_t* Flags, uint32_t Flag) {
    uint32_t TmpFlags = *Flags;
    uint32_t BottomOfStack = ((TmpFlags & FLAG_OPADDR_MASK) >> FLAG_OPADDR_OFF) & ((1 << FLAG_OPADDR_FLAG_SIZE) - 1);

    TmpFlags &= ~(FLAG_OPADDR_MASK);
    TmpFlags |= (BottomOfStack << (FLAG_OPADDR_OFF + FLAG_OPADDR_FLAG_SIZE)) | (Flag << FLAG_OPADDR_OFF);

    *Flags = TmpFlags;
  }

  inline void PopOpAddrIf(uint32_t* Flags, uint32_t Flag) {
    uint32_t TmpFlags = *Flags;
    uint32_t BottomOfStack = ((TmpFlags & FLAG_OPADDR_MASK) >> FLAG_OPADDR_OFF) & ((1 << FLAG_OPADDR_FLAG_SIZE) - 1);

    // Only pop the stack if the bottom flag is the one we care about
    // Necessary for escape prefixes that overlap regular prefixes
    if (BottomOfStack != Flag) {
      return;
    }

    uint32_t TopOfStack = ((TmpFlags & FLAG_OPADDR_MASK) >> (FLAG_OPADDR_OFF + FLAG_OPADDR_FLAG_SIZE)) & ((1 << FLAG_OPADDR_FLAG_SIZE) - 1);

    TmpFlags &= ~(FLAG_OPADDR_MASK);
    TmpFlags |= (TopOfStack << FLAG_OPADDR_OFF);

    *Flags = TmpFlags;
  }

} // namespace DecodeFlags

struct DecodedOperand {
  enum class OpType : uint8_t {
    Nothing,
    GPR,
    GPRDirect,
    GPRIndirect,
    GPRIndirectRelocation,
    RIPRelative,
    RIPRelativeRelocation,
    Literal,
    LiteralRelocation,
    SIB,
    SIBRelocation
  };

  bool IsNone() const {
    return Type == OpType::Nothing;
  }
  bool IsGPR() const {
    return Type == OpType::GPR;
  }
  bool IsGPRDirect() const {
    return Type == OpType::GPRDirect;
  }
  bool IsGPRIndirect() const {
    return Type == OpType::GPRIndirect;
  }
  bool IsGPRIndirectRelocation() const {
    return Type == OpType::GPRIndirectRelocation;
  }
  bool IsRIPRelative() const {
    return Type == OpType::RIPRelative;
  }
  bool IsRIPRelativeRelocation() const {
    return Type == OpType::RIPRelativeRelocation;
  }
  bool IsLiteral() const {
    return Type == OpType::Literal;
  }
  bool IsLiteralRelocation() const {
    return Type == OpType::LiteralRelocation;
  }
  bool IsSIB() const {
    return Type == OpType::SIB;
  }
  bool IsSIBRelocation() const {
    return Type == OpType::SIBRelocation;
  }

  uint64_t Literal() const {
    LOGMAN_THROW_A_FMT(IsLiteral(), "Precondition: must be a literal");
    return Data.Literal.Value;
  }

  union TypeUnion {
    struct GPRType {
      bool HighBits;
      uint8_t GPR;
      auto operator<=>(const GPRType&) const = default;
    } GPR;

    struct {
      int64_t Displacement;
      uint8_t GPR;
    } GPRIndirect; // Shared with GPRIndirectRelocation

    struct {
      int64_t Value;
    } RIPLiteral; // Shared with RIPLiteralRelocation

    struct LiteralType {
      uint64_t Value;
      uint8_t Size;
    } Literal;

    struct {
      int64_t EntrypointOffset;
    } LiteralRelocation;

    struct {
      int64_t Offset;
      uint8_t Scale;
      uint8_t Index; // ~0 invalid
      uint8_t Base;  // ~0 invalid
    } SIB;           // Shared with SIBRelocation
  };

  TypeUnion Data;
  OpType Type;
};

struct DecodedInst {
  uint64_t PC;

  DecodedOperand Dest;
  DecodedOperand Src[3];

  // Constains the dispatcher handler pointer
  const X86InstInfo* TableInfo;

  uint32_t Flags;
  uint16_t OP;
  uint8_t OPRaw;

  uint8_t ModRM;
  uint8_t SIB;
  uint8_t InstSize;
  int8_t REXIndex;
};

union ModRMDecoded {
  uint8_t Hex {};
  struct {
    uint8_t rm  : 3;
    uint8_t reg : 3;
    uint8_t mod : 2;
  };
};

union SIBDecoded {
  uint8_t Hex {};
  struct {
    uint8_t base  : 3;
    uint8_t index : 3;
    uint8_t scale : 2;
  };
};

enum InstType {
  TYPE_UNKNOWN,
  TYPE_LEGACY_PREFIX,
  TYPE_PREFIX,
  TYPE_REX_PREFIX,
  TYPE_SECONDARY_TABLE_PREFIX,
  TYPE_X87_TABLE_PREFIX,
  TYPE_VEX_TABLE_PREFIX,
  TYPE_INST,
  TYPE_X87 = TYPE_INST,
  TYPE_INVALID,
  TYPE_COPY_OTHER,
  // Changes `X86InstInfo::OpcodeDispatcher` member to use the `Indirect` version.
  // Points to a 2 member array of X86InstInfo to choose instruction description based on executing bitness.
  TYPE_ARCH_DISPATCHER,

  // Must be in order
  // Groups 1, 1a, 2, 3, 4, 5, 11 are for the primary op table
  // Groups 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, p are for the secondary op table
  TYPE_GROUP_1,
  TYPE_GROUP_1A,
  TYPE_GROUP_2,
  TYPE_GROUP_3,
  TYPE_GROUP_4,
  TYPE_GROUP_5,
  TYPE_GROUP_11,

  // Must be in order
  // Groups 6-p Are for the secondary op table
  TYPE_GROUP_6,
  TYPE_GROUP_7,
  TYPE_GROUP_8,
  TYPE_GROUP_9,
  TYPE_GROUP_10,
  TYPE_GROUP_12,
  TYPE_GROUP_13,
  TYPE_GROUP_14,
  TYPE_GROUP_15,
  TYPE_GROUP_16,
  TYPE_GROUP_17,
  TYPE_GROUP_P,

  // The secondary op extension table allows further extensions
  // Group 7 allows additional extensions to this table
  TYPE_SECOND_GROUP_MODRM,

  TYPE_VEX_GROUP_12,
  TYPE_VEX_GROUP_13,
  TYPE_VEX_GROUP_14,
  TYPE_VEX_GROUP_15,
  TYPE_VEX_GROUP_17,

  TYPE_GROUP_EVEX,

  // Exists in the table but isn't decoded correctly
  TYPE_UNDEC = TYPE_INVALID,
  TYPE_MMX = TYPE_INVALID,
  TYPE_PRIV = TYPE_INVALID,
  TYPE_0F38_TABLE = TYPE_INVALID,
  TYPE_0F3A_TABLE = TYPE_INVALID,
  TYPE_3DNOW_TABLE = TYPE_INVALID,
};

namespace InstFlags {

  using InstFlagType = uint64_t;

  constexpr InstFlagType FLAGS_NONE = 0;
  // The secondary Opcode Map uses prefix bytes to overlay more instruction
  // But some instructions need to ignore this overlay and consume these prefixes.
  constexpr InstFlagType FLAGS_NO_OVERLAY = (1ULL << 0);
  // Some instructions partially ignore overlay
  // Ignore OpSize (0x66) in this case
  constexpr InstFlagType FLAGS_NO_OVERLAY66 = (1ULL << 1);
  constexpr InstFlagType FLAGS_DEBUG_MEM_ACCESS = (1ULL << 2);
  // Only SEXT if the instruction is operating in 64bit operand size
  constexpr InstFlagType FLAGS_SRC_SEXT64BIT = (1ULL << 3);
  constexpr InstFlagType FLAGS_BLOCK_END = (1ULL << 4);
  constexpr InstFlagType FLAGS_SETS_RIP = (1ULL << 5);

  constexpr InstFlagType FLAGS_DISPLACE_SIZE_MUL_2 = (1ULL << 6);
  constexpr InstFlagType FLAGS_DISPLACE_SIZE_DIV_2 = (1ULL << 7);
  constexpr InstFlagType FLAGS_SRC_SEXT = (1ULL << 8);
  constexpr InstFlagType FLAGS_MEM_OFFSET = (1ULL << 9);

  // Enables XMM based subflags
  // Current reserved range for this SF is [10, 15]
  constexpr InstFlagType FLAGS_XMM_FLAGS = (1ULL << 10);

  // X87 flags aliased to XMM flags selection
  // Allows X87 instruction table that is abusing the flag for 64BIT selection to work
  constexpr InstFlagType FLAGS_X87_FLAGS = (1ULL << 10);

  // Non-XMM subflags
  constexpr InstFlagType FLAGS_SF_DST_RAX = (1ULL << 11);
  constexpr InstFlagType FLAGS_SF_DST_RDX = (1ULL << 12);
  constexpr InstFlagType FLAGS_SF_SRC_RAX = (1ULL << 13);
  constexpr InstFlagType FLAGS_SF_SRC_RCX = (1ULL << 14);
  constexpr InstFlagType FLAGS_SF_REX_IN_BYTE = (1ULL << 15);

  // XMM subflags
  constexpr InstFlagType FLAGS_SF_UNUSED = (1ULL << 11); // No assigned behavior yet
  constexpr InstFlagType FLAGS_SF_DST_GPR = (1ULL << 12);
  constexpr InstFlagType FLAGS_SF_SRC_GPR = (1ULL << 13);
  constexpr InstFlagType FLAGS_SF_MMX_DST = (1ULL << 14);
  constexpr InstFlagType FLAGS_SF_MMX_SRC = (1ULL << 15);
  constexpr InstFlagType FLAGS_SF_MMX = FLAGS_SF_MMX_DST | FLAGS_SF_MMX_SRC;

  // Enables MODRM specific subflags
  // Current reserved range for this SF is [14, 17]
  constexpr InstFlagType FLAGS_MODRM = (1ULL << 16);

  // With ModRM SF flag enabled
  // Direction of ModRM. Dst ^ Src
  // Set means destination is rm bits
  // Unset means src is rm bits
  constexpr InstFlagType FLAGS_SF_MOD_DST = (1ULL << 17);

  // If the instruction is restricted to mem or reg only
  // 0b00 = Regular ModRM support
  // 0b01 = Memory accesses only
  // 0b10 = Register accesses only
  // 0b11 = <Reserved>
  constexpr InstFlagType FLAGS_SF_MOD_MEM_ONLY = (1ULL << 18);
  constexpr InstFlagType FLAGS_SF_MOD_REG_ONLY = (1ULL << 19);

  constexpr InstFlagType FLAGS_SF_MOD_ZERO_REG = (1ULL << 20);

  // x87
  constexpr InstFlagType FLAGS_POP = (1ULL << 21);

  // Whether or not the instruction has a VEX prefix for the dest, first, or second source.
  constexpr InstFlagType FLAGS_VEX_SRC_MASK = (0b11ULL << 22);
  constexpr InstFlagType FLAGS_VEX_NO_OPERAND = (0b00ULL << 22);
  constexpr InstFlagType FLAGS_VEX_DST = (0b01ULL << 22);
  constexpr InstFlagType FLAGS_VEX_1ST_SRC = (0b10ULL << 22);
  constexpr InstFlagType FLAGS_VEX_2ND_SRC = (0b11ULL << 22);
  // Whether or not the instruction has a VSIB byte
  constexpr InstFlagType FLAGS_VEX_VSIB = (1ULL << 24);
  constexpr InstFlagType FLAGS_VEX_L_IGNORE = (1ULL << 25);
  constexpr InstFlagType FLAGS_VEX_L_0 = (1ULL << 26);
  constexpr InstFlagType FLAGS_VEX_L_1 = (1ULL << 27);

  constexpr InstFlagType FLAGS_REX_W_0 = (1ULL << 28);
  constexpr InstFlagType FLAGS_REX_W_1 = (1ULL << 29);

  constexpr InstFlagType FLAGS_CALL = (1ULL << 30);

  constexpr InstFlagType FLAGS_SIZE_DST_OFF = 58;
  constexpr InstFlagType FLAGS_SIZE_SRC_OFF = FLAGS_SIZE_DST_OFF + 3;

  constexpr InstFlagType SIZE_MASK = 0b111;
  constexpr InstFlagType SIZE_DEF = 0b000;
  constexpr InstFlagType SIZE_8BIT = 0b001;
  constexpr InstFlagType SIZE_16BIT = 0b010;
  constexpr InstFlagType SIZE_32BIT = 0b011;
  constexpr InstFlagType SIZE_64BIT = 0b100;
  constexpr InstFlagType SIZE_128BIT = 0b101;
  constexpr InstFlagType SIZE_256BIT = 0b110;
  constexpr InstFlagType SIZE_64BITDEF = 0b111; // Default mode is 64bit instead of typical 32bit

#ifndef _WIN32
  constexpr uint32_t DEFAULT_SYSCALL_FLAGS = FLAGS_NO_OVERLAY;
#else
                                                // Syscall ends a block on WIN32 because the instruction can update the CPU's RIP.
  constexpr uint32_t DEFAULT_SYSCALL_FLAGS = FLAGS_NO_OVERLAY | FLAGS_BLOCK_END;
#endif

  constexpr InstFlagType GetSizeDstFlags(InstFlagType Flags) {
    return (Flags >> FLAGS_SIZE_DST_OFF) & SIZE_MASK;
  }
  constexpr InstFlagType GetSizeSrcFlags(InstFlagType Flags) {
    return (Flags >> FLAGS_SIZE_SRC_OFF) & SIZE_MASK;
  }

  constexpr InstFlagType GenFlagsDstSize(InstFlagType Size) {
    return Size << FLAGS_SIZE_DST_OFF;
  }
  constexpr InstFlagType GenFlagsSrcSize(InstFlagType Size) {
    return Size << FLAGS_SIZE_SRC_OFF;
  }
  constexpr InstFlagType GenFlagsSameSize(InstFlagType Size) {
    return (Size << FLAGS_SIZE_DST_OFF) | (Size << FLAGS_SIZE_SRC_OFF);
  }
  constexpr InstFlagType GenFlagsSizes(InstFlagType Dest, InstFlagType Src) {
    return (Dest << FLAGS_SIZE_DST_OFF) | (Src << FLAGS_SIZE_SRC_OFF);
  }

// If it has an xmm subflag
#define HAS_XMM_SUBFLAG(x, flag) \
  (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag)))

// If it has non-xmm subflag
#define HAS_NON_XMM_SUBFLAG(x, flag) (((x) & (FEXCore::X86Tables::InstFlags::FLAGS_XMM_FLAGS | (flag))) == (flag))
} // namespace InstFlags

constexpr uint8_t OpToIndex(uint8_t Op) {
  switch (Op) {
  // Group 1
  case 0x80: return 0;
  case 0x81: return 1;
  case 0x82: return 2;
  case 0x83: return 3;
  // Group 2
  case 0xC0: return 0;
  case 0xC1: return 1;
  case 0xD0: return 2;
  case 0xD1: return 3;
  case 0xD2: return 4;
  case 0xD3: return 5;
  // Group 3
  case 0xF6: return 0;
  case 0xF7: return 1;
  // Group 4
  case 0xFE: return 0;
  // Group 5
  case 0xFF: return 0;
  // Group 11
  case 0xC6: return 0;
  case 0xC7: return 1;
  }
  return 0;
}

using DecodedOp = const DecodedInst*;
using OpDispatchPtr = void (IR::OpDispatchBuilder::*)(DecodedOp);

union OpDispatchPtrWrapper {
  OpDispatchPtr OpDispatch;
  const struct X86InstInfo* Indirect;
};

struct X86InstInfo {
  const char* Name;
  InstType Type;
  InstFlags::InstFlagType Flags; ///< Must be larger than InstFlags enum
  uint8_t MoreBytes;
  OpDispatchPtrWrapper OpcodeDispatcher;

  bool operator==(const X86InstInfo& b) const {
    if (strcmp(Name, b.Name) != 0 || Type != b.Type || Flags != b.Flags || MoreBytes != b.MoreBytes) {
      return false;
    }

    // We don't care if the opcode dispatcher differs
    return true;
  }
  bool operator!=(const X86InstInfo& b) const {
    return !operator==(b);
  }
};

static_assert(std::is_trivially_copyable_v<X86InstInfo>);

constexpr size_t MAX_PRIMARY_TABLE_SIZE = 256;
constexpr size_t MAX_SECOND_TABLE_SIZE = 256;
constexpr size_t MAX_REP_MOD_TABLE_SIZE = 256;
constexpr size_t MAX_REPNE_MOD_TABLE_SIZE = 256;
constexpr size_t MAX_OPSIZE_MOD_TABLE_SIZE = 256;
// 6 (groups) | 6 (max indexes) | 8 ops = 0b111'111'111 = 9 bits
constexpr size_t MAX_INST_GROUP_TABLE_SIZE = 512;
// 12 (groups) | 3(max indexes) | 8 ops = 0b1111'11'111 = 9 bits
constexpr size_t MAX_INST_SECOND_GROUP_TABLE_SIZE = 512;
constexpr size_t MAX_X87_TABLE_SIZE = 1 << 11;
constexpr size_t MAX_SECOND_MODRM_TABLE_SIZE = 32;
// (3 bit prefixes) | 8 bit opcode
constexpr size_t MAX_0F_38_TABLE_SIZE = (1 << 11);
// 1 REX | 1 prefixes | 8 bit opcode
constexpr size_t MAX_0F_3A_TABLE_SIZE = (1 << 11);
constexpr size_t MAX_3DNOW_TABLE_SIZE = 256;
// VEX
// map_select(2 bits for now) | vex.pp (2 bits) | opcode (8bit)
constexpr size_t MAX_VEX_TABLE_SIZE = (1 << 13);
// VEX group ops
// group select (3 bits for now) | ModRM opcode (3 bits)
constexpr size_t MAX_VEX_GROUP_TABLE_SIZE = (1 << 7);

extern const std::array<X86InstInfo, MAX_PRIMARY_TABLE_SIZE> BaseOps;
extern const std::array<X86InstInfo, MAX_SECOND_TABLE_SIZE> SecondBaseOps;
extern const std::array<X86InstInfo, MAX_REP_MOD_TABLE_SIZE> RepModOps;
extern const std::array<X86InstInfo, MAX_REPNE_MOD_TABLE_SIZE> RepNEModOps;
extern const std::array<X86InstInfo, MAX_OPSIZE_MOD_TABLE_SIZE> OpSizeModOps;

extern const std::array<X86InstInfo, MAX_INST_GROUP_TABLE_SIZE> PrimaryInstGroupOps;
extern const std::array<X86InstInfo, MAX_INST_SECOND_GROUP_TABLE_SIZE> SecondInstGroupOps;
extern const std::array<X86InstInfo, MAX_SECOND_MODRM_TABLE_SIZE> SecondModRMTableOps;
extern const std::array<X86InstInfo, MAX_X87_TABLE_SIZE> X87F80Ops;
extern const std::array<X86InstInfo, MAX_X87_TABLE_SIZE> X87F64Ops;
extern const std::array<X86InstInfo, MAX_3DNOW_TABLE_SIZE> DDDNowOps;
extern const std::array<X86InstInfo, MAX_0F_38_TABLE_SIZE> H0F38TableOps;
extern const std::array<X86InstInfo, MAX_0F_3A_TABLE_SIZE> H0F3ATableOps;

// VEX
extern const std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps;
extern const std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> VEXTableGroupOps;

extern const std::array<X86InstInfo, MAX_VEX_TABLE_SIZE> VEXTableOps_AVX128;
extern const std::array<X86InstInfo, MAX_VEX_GROUP_TABLE_SIZE> VEXTableGroupOps_AVX128;

template<typename OpcodeType>
struct X86TablesInfoStruct {
  OpcodeType first;
  uint8_t second;
  X86InstInfo Info;
};
using U8U8InfoStruct = X86TablesInfoStruct<uint8_t>;
using U16U8InfoStruct = X86TablesInfoStruct<uint16_t>;

template<typename OpcodeType>
constexpr static inline void GenerateTable(X86InstInfo* FinalTable, const X86TablesInfoStruct<OpcodeType>* LocalTable, size_t TableSize) {
  for (size_t j = 0; j < TableSize; ++j) {
    const X86TablesInfoStruct<OpcodeType>& Op = LocalTable[j];
    auto OpNum = Op.first;
    const X86InstInfo& Info = Op.Info;
    for (uint32_t i = 0; i < Op.second; ++i) {
      if (FinalTable[OpNum + i].Type != TYPE_UNKNOWN) {
        LOGMAN_MSG_A_FMT("Duplicate Entry {}->{}", FinalTable[OpNum + i].Name, Info.Name);
      }
      if (FinalTable[OpNum + i].OpcodeDispatcher.OpDispatch) {
        LOGMAN_MSG_A_FMT("Already installed an OpcodeDispatcher for 0x{:x}", OpNum + i);
      }
      FinalTable[OpNum + i] = Info;
    }
  }
};

template<typename OpcodeType>
constexpr static inline void GenerateTableWithCopy(X86InstInfo* FinalTable, const X86TablesInfoStruct<OpcodeType>* LocalTable,
                                                   size_t TableSize, const X86InstInfo* OtherLocal) {
  for (size_t j = 0; j < TableSize; ++j) {
    const X86TablesInfoStruct<OpcodeType>& Op = LocalTable[j];
    auto OpNum = Op.first;
    const X86InstInfo& Info = Op.Info;
    for (uint32_t i = 0; i < Op.second; ++i) {
      if (FinalTable[OpNum + i].Type != TYPE_UNKNOWN) {
        LOGMAN_MSG_A_FMT("Duplicate Entry {}->{}", FinalTable[OpNum + i].Name, Info.Name);
      }
      if (Info.Type == TYPE_COPY_OTHER) {
        FinalTable[OpNum + i] = OtherLocal[OpNum + i];
      } else {
        FinalTable[OpNum + i] = Info;
      }
    }
  }
};

template<typename OpcodeType>
constexpr static inline void GenerateX87Table(X86InstInfo* FinalTable, const X86TablesInfoStruct<OpcodeType>* LocalTable, size_t TableSize) {
  for (size_t j = 0; j < TableSize; ++j) {
    const X86TablesInfoStruct<OpcodeType>& Op = LocalTable[j];
    auto OpNum = Op.first;
    const X86InstInfo& Info = Op.Info;
    for (uint32_t i = 0; i < Op.second; ++i) {
      if (FinalTable[OpNum + i].Type != TYPE_UNKNOWN) {
        LOGMAN_MSG_A_FMT("Duplicate Entry {}->{}", FinalTable[OpNum + i].Name, Info.Name);
      }
      if ((OpNum & 0b11'000'000) == 0b11'000'000) {
        // If the mod field is 0b11 then it is a regular op
        FinalTable[OpNum + i] = Info;
      } else {
        // If the mod field is !0b11 then this instruction is duplicated through the whole mod [0b00, 0b10] range
        // and the modrm.rm space because that is used part of the instruction encoding
        if ((OpNum & 0b11'000'000) != 0) {
          ERROR_AND_DIE_FMT("Only support mod field of zero in this path");
        }
        for (uint16_t mod = 0b00'000'000; mod < 0b11'000'000; mod += 0b01'000'000) {
          for (uint16_t rm = 0b000; rm < 0b1'000; ++rm) {
            FinalTable[(OpNum | mod | rm) + i] = Info;
          }
        }
      }
    }
  }
};

FEX_DEFINE_ENUM_FMT_PASSTHROUGH(FEXCore::X86Tables::DecodedOperand::OpType);

} // namespace FEXCore::X86Tables


================================================
FILE: FEXCore/Source/Interface/Core/X86Tables/X87Tables.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: frontend|x86-tables
$end_info$
*/

#include "Interface/Core/X86Tables/X86Tables.h"
#include "Interface/Core/OpcodeDispatcher.h"

#include <iterator>

namespace FEXCore::X86Tables {
using namespace InstFlags;
using namespace IR;
// Top bit indicating if it needs to be repeated with {0x40, 0x80} or'd in
// All OPDReg versions need it
#define OPDReg(op, reg) ((1 << 15) | ((op - 0xD8) << 8) | (reg << 3))
#define OPD(op, modrmop) (((op - 0xD8) << 8) | modrmop)
constexpr std::array<DispatchTableEntry, 140> X87F64OpTable = {{
  {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xD8, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xD8, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xD0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xD8, 0xD8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::i32Bit>},

  // 1 = Invalid

  {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>},

  {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>},

  {OPDReg(0xD9, 4) | 0x00, 8, &OpDispatchBuilder::X87LDENVF64},

  {OPDReg(0xD9, 5) | 0x00, 8, &OpDispatchBuilder::X87FLDCWF64},

  {OPDReg(0xD9, 6) | 0x00, 8, &OpDispatchBuilder::X87FNSTENV},

  {OPDReg(0xD9, 7) | 0x00, 8, &OpDispatchBuilder::X87FSTCW},

  {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDFromStack>},
  {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP
  // D1 = Invalid
  // D8 = Invalid
  {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80STACKCHANGESIGN, false>},
  {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80STACKABS, false>},
  // E2 = Invalid
  {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTSTF64},
  {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM},
  // E6 = Invalid
  {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x3FF0000000000000>}, // 1.0
  {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x400A934F0979A372>}, // log2l(10)
  {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x3FF71547652B82FE>}, // log2l(e)
  {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x400921FB54442D18>}, // pi
  {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x3FD34413509F79FF>}, // log10l(2)
  {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0x3FE62E42FEFA39EF>}, // log(2)
  {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64_Const, 0>},                  // 0.0

  // EF = Invalid
  {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80F2XM1STACK, false>},
  {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>},
  {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>},
  {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>},
  {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACTF64},
  {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>},
  {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>},
  {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
  {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREMSTACK, true>},
  {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, true>},
  {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SQRTSTACK, false>},
  {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINCOSSTACK, true>},
  {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ROUNDSTACK, false>},
  {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SCALESTACK, false>},
  {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINSTACK, true>},
  {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80COSSTACK, true>},

  {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDA, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDA, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV},
  // E0 = Invalid
  // E8 = Invalid
  {OPD(0xDA, 0xE9), 1,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>},
  // EA = Invalid
  // F0 = Invalid
  // F8 = Invalid

  {OPDReg(0xDB, 0) | 0x00, 8, &OpDispatchBuilder::FILDF64},

  {OPDReg(0xDB, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, true>},

  {OPDReg(0xDB, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, false>},

  {OPDReg(0xDB, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, false>},

  // 4 = Invalid

  {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::f80Bit>},

  // 6 = Invalid

  {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::f80Bit>},


  {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV},
  // E0 = Invalid
  {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::FNCLEX},
  {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT},
  // E4 = Invalid
  {OPD(0xDB, 0xE8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
  {OPD(0xDB, 0xF0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},

  // F8 = Invalid

  {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDC, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDC, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xD0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDC, 0xD8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},

  {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDF64, OpSize::i64Bit>},

  {OPDReg(0xDD, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, true>},

  {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>},

  {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>},

  {OPDReg(0xDD, 4) | 0x00, 8, &OpDispatchBuilder::X87FRSTOR},

  // 5 = Invalid
  {OPDReg(0xDD, 6) | 0x00, 8, &OpDispatchBuilder::X87FNSAVE},

  {OPDReg(0xDD, 7) | 0x00, 8, &OpDispatchBuilder::X87FNSTSW},

  {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE},
  {OPD(0xDD, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, // register-register from regular X87
  {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>}, //^

  {OPD(0xDD, 0xE0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDD, 0xE8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADDF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMULF64, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xD0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDE, 0xD9), 1,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>},
  {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUBF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIVF64, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},

  {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILDF64},

  {OPDReg(0xDF, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, true>},

  {OPDReg(0xDF, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, false>},

  {OPDReg(0xDF, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, false>},

  {OPDReg(0xDF, 4) | 0x00, 8, &OpDispatchBuilder::FBLDF64},

  {OPDReg(0xDF, 5) | 0x00, 8, &OpDispatchBuilder::FILDF64},

  {OPDReg(0xDF, 6) | 0x00, 8, &OpDispatchBuilder::FBSTPF64},

  {OPDReg(0xDF, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FISTF64, false>},

  // XXX: This should also set the x87 tag bits to empty
  // We don't support this currently, so just pop the stack
  {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
  {OPD(0xDF, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xDF, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},
  {OPD(0xDF, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},

  {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW},
  {OPD(0xDF, 0xE8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
  {OPD(0xDF, 0xF0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMIF64, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
}};

constexpr std::array<DispatchTableEntry, 140> X87F80OpTable = {{
  {OPDReg(0xD8, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i32Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xD8, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xD8, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD8, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xD8, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xD8, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xD8, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},
  {OPD(0xD8, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xD9, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::i32Bit>},

  // 1 = Invalid

  {OPDReg(0xD9, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>},

  {OPDReg(0xD9, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i32Bit>},

  {OPDReg(0xD9, 4) | 0x00, 8, &OpDispatchBuilder::X87LDENV},

  {OPDReg(0xD9, 5) | 0x00, 8, &OpDispatchBuilder::X87FLDCW}, // XXX: stubbed FLDCW

  {OPDReg(0xD9, 6) | 0x00, 8, &OpDispatchBuilder::X87FNSTENV},

  {OPDReg(0xD9, 7) | 0x00, 8, &OpDispatchBuilder::X87FSTCW},

  {OPD(0xD9, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLDFromStack>},
  {OPD(0xD9, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xD9, 0xD0), 1, &OpDispatchBuilder::NOPOp}, // FNOP
  // D1 = Invalid
  // D8 = Invalid
  {OPD(0xD9, 0xE0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80STACKCHANGESIGN, false>},
  {OPD(0xD9, 0xE1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80STACKABS, false>},
  // E2 = Invalid
  {OPD(0xD9, 0xE4), 1, &OpDispatchBuilder::FTST},
  {OPD(0xD9, 0xE5), 1, &OpDispatchBuilder::X87FXAM},
  // E6 = Invalid
  {OPD(0xD9, 0xE8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_ONE>},     // 1.0
  {OPD(0xD9, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10>}, // log2l(10)
  {OPD(0xD9, 0xEA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E>}, // log2l(e)
  {OPD(0xD9, 0xEB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_PI>},     // pi
  {OPD(0xD9, 0xEC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2>}, // log10l(2)
  {OPD(0xD9, 0xED), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_X87_LOG_2>},   // log(2)
  {OPD(0xD9, 0xEE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD_Const, NamedVectorConstant::NAMED_VECTOR_ZERO>},        // 0.0

  // EF = Invalid
  {OPD(0xD9, 0xF0), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80F2XM1STACK, false>},
  {OPD(0xD9, 0xF1), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, false>},
  {OPD(0xD9, 0xF2), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80PTANSTACK, true>},
  {OPD(0xD9, 0xF3), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ATANSTACK, false>},
  {OPD(0xD9, 0xF4), 1, &OpDispatchBuilder::X87FXTRACT},
  {OPD(0xD9, 0xF5), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREM1STACK, true>},
  {OPD(0xD9, 0xF6), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, false>},
  {OPD(0xD9, 0xF7), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
  {OPD(0xD9, 0xF8), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80FPREMSTACK, true>},
  {OPD(0xD9, 0xF9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87FYL2X, true>},
  {OPD(0xD9, 0xFA), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SQRTSTACK, false>},
  {OPD(0xD9, 0xFB), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINCOSSTACK, true>},
  {OPD(0xD9, 0xFC), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80ROUNDSTACK, false>},
  {OPD(0xD9, 0xFD), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SCALESTACK, false>},
  {OPD(0xD9, 0xFE), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80SINSTACK, true>},
  {OPD(0xD9, 0xFF), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87OpHelper, OP_F80COSSTACK, true>},

  {OPDReg(0xDA, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i32Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDA, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i32Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDA, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDA, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i32Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDA, 0xC0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xC8), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xD0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDA, 0xD8), 8, &OpDispatchBuilder::X87FCMOV},
  // E0 = Invalid
  // E8 = Invalid
  {OPD(0xDA, 0xE9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>},
  // EA = Invalid
  // F0 = Invalid
  // F8 = Invalid

  {OPDReg(0xDB, 0) | 0x00, 8, &OpDispatchBuilder::FILD},

  {OPDReg(0xDB, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, true>},

  {OPDReg(0xDB, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, false>},

  {OPDReg(0xDB, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, false>},

  // 4 = Invalid

  {OPDReg(0xDB, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::f80Bit>},

  // 6 = Invalid

  {OPDReg(0xDB, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::f80Bit>},


  {OPD(0xDB, 0xC0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xC8), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xD0), 8, &OpDispatchBuilder::X87FCMOV},
  {OPD(0xDB, 0xD8), 8, &OpDispatchBuilder::X87FCMOV},
  // E0 = Invalid
  {OPD(0xDB, 0xE2), 1, &OpDispatchBuilder::FNCLEX},
  {OPD(0xDB, 0xE3), 1, &OpDispatchBuilder::FNINIT},
  // E4 = Invalid
  {OPD(0xDB, 0xE8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
  {OPD(0xDB, 0xF0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},

  // F8 = Invalid

  {OPDReg(0xDC, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i64Bit, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDC, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i64Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDC, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 5) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i64Bit, false, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDC, 7) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i64Bit, false, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDC, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDC, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDC, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDC, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},

  {OPDReg(0xDD, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FLD, OpSize::i64Bit>},

  {OPDReg(0xDD, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, true>},

  {OPDReg(0xDD, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>},

  {OPDReg(0xDD, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FST, OpSize::i64Bit>},

  {OPDReg(0xDD, 4) | 0x00, 8, &OpDispatchBuilder::X87FRSTOR},

  // 5 = Invalid
  {OPDReg(0xDD, 6) | 0x00, 8, &OpDispatchBuilder::X87FNSAVE},

  {OPDReg(0xDD, 7) | 0x00, 8, &OpDispatchBuilder::X87FNSTSW},

  {OPD(0xDD, 0xC0), 8, &OpDispatchBuilder::X87FFREE},
  {OPD(0xDD, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xDD, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},
  {OPD(0xDD, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},

  {OPD(0xDD, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDD, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 0) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::i16Bit, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 2) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 3) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::i16Bit, true, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},

  {OPDReg(0xDE, 4) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 5) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 6) | 0x00, 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i16Bit, true, false, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPDReg(0xDE, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::i16Bit, true, true, OpDispatchBuilder::OpResult::RES_ST0>},

  {OPD(0xDE, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FADD, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xC8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FMUL, OpSize::f80Bit, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, false>},
  {OPD(0xDE, 0xD9), 1, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_X87, true>},
  {OPD(0xDE, 0xE0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xE8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSUB, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xF0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, true, OpDispatchBuilder::OpResult::RES_STI>},
  {OPD(0xDE, 0xF8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FDIV, OpSize::f80Bit, false, false, OpDispatchBuilder::OpResult::RES_STI>},

  {OPDReg(0xDF, 0) | 0x00, 8, &OpDispatchBuilder::FILD},

  {OPDReg(0xDF, 1) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, true>},

  {OPDReg(0xDF, 2) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, false>},

  {OPDReg(0xDF, 3) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, false>},

  {OPDReg(0xDF, 4) | 0x00, 8, &OpDispatchBuilder::FBLD},

  {OPDReg(0xDF, 5) | 0x00, 8, &OpDispatchBuilder::FILD},

  {OPDReg(0xDF, 6) | 0x00, 8, &OpDispatchBuilder::FBSTP},

  {OPDReg(0xDF, 7) | 0x00, 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FIST, false>},

  // XXX: This should also set the x87 tag bits to empty
  // We don't support this currently, so just pop the stack
  {OPD(0xDF, 0xC0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::X87ModifySTP, true>},
  {OPD(0xDF, 0xC8), 8, &OpDispatchBuilder::FXCH},
  {OPD(0xDF, 0xD0), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},
  {OPD(0xDF, 0xD8), 8, &OpDispatchBuilder::Bind<&OpDispatchBuilder::FSTToStack>},

  {OPD(0xDF, 0xE0), 8, &OpDispatchBuilder::X87FNSTSW},
  {OPD(0xDF, 0xE8), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
  {OPD(0xDF, 0xF0), 8,
   &OpDispatchBuilder::Bind<&OpDispatchBuilder::FCOMI, OpSize::f80Bit, false, OpDispatchBuilder::FCOMIFlags::FLAGS_RFLAGS, false>},
}};
#undef OPD
#undef OPDReg

auto GenerateX87TableLambda = [](const auto DispatchTable) consteval {
#define OPD(op, modrmop) (((op - 0xD8) << 8) | modrmop)
#define OPDReg(op, reg) (((op - 0xD8) << 8) | (reg << 3))
 std::array<X86InstInfo, MAX_X87_TABLE_SIZE> Table{};
  constexpr U16U8InfoStruct X87OpTable[] = {
    // 0xD8
    {OPDReg(0xD8, 0), 1, X86InstInfo{"FADD",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 1), 1, X86InstInfo{"FMUL",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 2), 1, X86InstInfo{"FCOM",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 3), 1, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_MODRM | FLAGS_POP, 0}},
    {OPDReg(0xD8, 4), 1, X86InstInfo{"FSUB",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 5), 1, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 6), 1, X86InstInfo{"FDIV",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD8, 7), 1, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_MODRM, 0}},
      //  / 0
      {OPD(0xD8, 0xC0), 8, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xD8, 0xC8), 8, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xD8, 0xD0), 8, X86InstInfo{"FCOM", TYPE_X87, FLAGS_NONE, 0}},
      //  / 3
      {OPD(0xD8, 0xD8), 8, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_POP, 0}},
      //  / 4
      {OPD(0xD8, 0xE0), 8, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xD8, 0xE8), 8, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0}},
      //  / 6
      {OPD(0xD8, 0xF0), 8, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0}},
      //  / 7
      {OPD(0xD8, 0xF8), 8, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0}},
    // 0xD9
    {OPDReg(0xD9, 0), 1, X86InstInfo{"FLD",     TYPE_INST, FLAGS_MODRM, 0}},
    {OPDReg(0xD9, 1), 1, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE, 0}},
    {OPDReg(0xD9, 2), 1, X86InstInfo{"FST",     TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xD9, 3), 1, X86InstInfo{"FSTP",    TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xD9, 4), 1, X86InstInfo{"FLDENV",  TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xD9, 5), 1, X86InstInfo{"FLDCW",   TYPE_X87, GenFlagsSameSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xD9, 6), 1, X86InstInfo{"FNSTENV", TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xD9, 7), 1, X86InstInfo{"FNSTCW",  TYPE_INST, GenFlagsSameSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
      //  / 0
      {OPD(0xD9, 0xC0), 8, X86InstInfo{"FLD",   TYPE_INST, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xD9, 0xC8), 8, X86InstInfo{"FXCH",  TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xD9, 0xD0), 1, X86InstInfo{"FNOP",  TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xD1), 7, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 3
      {OPD(0xD9, 0xD8), 8, X86InstInfo{"",      TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 4
      {OPD(0xD9, 0xE0), 1, X86InstInfo{"FCHS", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE1), 1, X86InstInfo{"FABS", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE2), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE4), 1, X86InstInfo{"FTST", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE5), 1, X86InstInfo{"FXAM", TYPE_INST,  FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE6), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xD9, 0xE8), 1, X86InstInfo{"FLD1", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xE9), 1, X86InstInfo{"FLDL2T", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xEA), 1, X86InstInfo{"FLDL2E", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xEB), 1, X86InstInfo{"FLDPI", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xEC), 1, X86InstInfo{"FLDLG2", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xED), 1, X86InstInfo{"FLDLN2", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xEE), 1, X86InstInfo{"FLDZ", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xEF), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 6
      {OPD(0xD9, 0xF0), 1, X86InstInfo{"F2XM1", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF1), 1, X86InstInfo{"FYL2X", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF2), 1, X86InstInfo{"FPTAN", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF3), 1, X86InstInfo{"FPATAN", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF4), 1, X86InstInfo{"FXTRACT", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF5), 1, X86InstInfo{"FPREM1", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF6), 1, X86InstInfo{"FDECSTP", TYPE_X87, FLAGS_POP, 0}},
      {OPD(0xD9, 0xF7), 1, X86InstInfo{"FINCSTP", TYPE_X87, FLAGS_POP, 0}},
      //  / 7
      {OPD(0xD9, 0xF8), 1, X86InstInfo{"FPREM", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xF9), 1, X86InstInfo{"FYL2XP1", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFA), 1, X86InstInfo{"FSQRT", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFB), 1, X86InstInfo{"FSINCOS", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFC), 1, X86InstInfo{"FRNDINT", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFD), 1, X86InstInfo{"FSCALE", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFE), 1, X86InstInfo{"FSIN", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xD9, 0xFF), 1, X86InstInfo{"FCOS", TYPE_X87, FLAGS_NONE, 0}},
    // 0xDA
    {OPDReg(0xDA, 0), 1, X86InstInfo{"FIADD", TYPE_X87,  GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 1), 1, X86InstInfo{"FIMUL", TYPE_X87,  GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 2), 1, X86InstInfo{"FICOM", TYPE_X87,  GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 3), 1, X86InstInfo{"FICOMP", TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_POP, 0}},
    {OPDReg(0xDA, 4), 1, X86InstInfo{"FISUB", TYPE_X87,  GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 5), 1, X86InstInfo{"FISUBR", TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 6), 1, X86InstInfo{"FIDIV", TYPE_X87,  GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDA, 7), 1, X86InstInfo{"FIDIVR", TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
      //  / 0
      {OPD(0xDA, 0xC0), 8, X86InstInfo{"FCMOVB", TYPE_X87, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xDA, 0xC8), 8, X86InstInfo{"FCMOVE", TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xDA, 0xD0), 8, X86InstInfo{"FCMOVBE", TYPE_X87, FLAGS_NONE, 0}},
      //  / 3
      {OPD(0xDA, 0xD8), 8, X86InstInfo{"FCMOVU", TYPE_X87, FLAGS_NONE, 0}},
      //  / 4
      {OPD(0xDA, 0xE0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xDA, 0xE8), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      {OPD(0xDA, 0xE9), 1, X86InstInfo{"FUCOMPP", TYPE_X87, FLAGS_POP, 0}},
      {OPD(0xDA, 0xEA), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 6
      {OPD(0xDA, 0xF0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 7
      {OPD(0xDA, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
    // 0xDB
    {OPDReg(0xDB, 0), 1, X86InstInfo{"FILD",   TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDB, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDB, 2), 1, X86InstInfo{"FIST",   TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xDB, 3), 1, X86InstInfo{"FISTP",  TYPE_X87, GenFlagsSrcSize(SIZE_32BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDB, 4), 1, X86InstInfo{"",       TYPE_INVALID, FLAGS_NONE, 0}},
    {OPDReg(0xDB, 5), 1, X86InstInfo{"FLD",    TYPE_X87,    FLAGS_MODRM, 0}},
    {OPDReg(0xDB, 6), 1, X86InstInfo{"",       TYPE_INVALID, FLAGS_NONE, 0}},
    {OPDReg(0xDB, 7), 1, X86InstInfo{"FSTP",   TYPE_X87,   FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
      //  / 0
      {OPD(0xDB, 0xC0), 8, X86InstInfo{"FCMOVNB", TYPE_X87, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xDB, 0xC8), 8, X86InstInfo{"FCMOVNE", TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xDB, 0xD0), 8, X86InstInfo{"FCMOVNBE", TYPE_X87, FLAGS_NONE, 0}},
      //  / 3
      {OPD(0xDB, 0xD8), 8, X86InstInfo{"FCMOVNU", TYPE_X87, FLAGS_NONE, 0}},
      //  / 4
      {OPD(0xDB, 0xE0), 2, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      {OPD(0xDB, 0xE2), 1, X86InstInfo{"FNCLEX", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xDB, 0xE3), 1, X86InstInfo{"FNINIT", TYPE_X87, FLAGS_NONE, 0}},
      {OPD(0xDB, 0xE4), 4, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xDB, 0xE8), 8, X86InstInfo{"FUCOMI", TYPE_INST, FLAGS_NONE, 0}},
      //  / 6
      {OPD(0xDB, 0xF0), 8, X86InstInfo{"FCOMI", TYPE_X87, FLAGS_NONE, 0}},
      //  / 7
      {OPD(0xDB, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
    // 0xDC
    {OPDReg(0xDC, 0), 1, X86InstInfo{"FADD", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 1), 1, X86InstInfo{"FMUL", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 2), 1, X86InstInfo{"FCOM", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 3), 1, X86InstInfo{"FCOMP", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS | FLAGS_POP, 0}},
    {OPDReg(0xDC, 4), 1, X86InstInfo{"FSUB", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 5), 1, X86InstInfo{"FSUBR", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 6), 1, X86InstInfo{"FDIV", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDC, 7), 1, X86InstInfo{"FDIVR", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
      //  / 0
      {OPD(0xDC, 0xC0), 8, X86InstInfo{"FADD", TYPE_X87, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xDC, 0xC8), 8, X86InstInfo{"FMUL", TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xDC, 0xD0), 8, X86InstInfo{"FCOM", TYPE_X87, FLAGS_X87_FLAGS, 0}},
      //  / 3
      {OPD(0xDC, 0xD8), 8, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_X87_FLAGS | FLAGS_POP, 0}},
      //  / 4
      {OPD(0xDC, 0xE0), 8, X86InstInfo{"FSUBR", TYPE_X87, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xDC, 0xE8), 8, X86InstInfo{"FSUB", TYPE_X87, FLAGS_NONE, 0}},
      //  / 6
      {OPD(0xDC, 0xF0), 8, X86InstInfo{"FDIVR", TYPE_X87, FLAGS_NONE, 0}},
      //  / 7
      {OPD(0xDC, 0xF8), 8, X86InstInfo{"FDIV", TYPE_X87, FLAGS_NONE, 0}},
    // 0xDD
    {OPDReg(0xDD, 0), 1, X86InstInfo{"FLD", TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xDD, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDD, 2), 1, X86InstInfo{"FST", TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xDD, 3), 1, X86InstInfo{"FSTP", TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDD, 4), 1, X86InstInfo{"FRSTOR", TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xDD, 5), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
    {OPDReg(0xDD, 6), 1, X86InstInfo{"FNSAVE", TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xDD, 7), 1, X86InstInfo{"FNSTSW", TYPE_X87, GenFlagsSameSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
      //  / 0
      {OPD(0xDD, 0xC0), 8, X86InstInfo{"FFREE", TYPE_X87, FLAGS_NONE, 0}},
      //  / 1
      {OPD(0xDD, 0xC8), 8, X86InstInfo{"FXCH",  TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xDD, 0xD0), 8, X86InstInfo{"FST", TYPE_INST, FLAGS_SF_MOD_DST, 0}},
      //  / 3
      {OPD(0xDD, 0xD8), 8, X86InstInfo{"FSTP", TYPE_X87, FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
      //  / 4
      {OPD(0xDD, 0xE0), 8, X86InstInfo{"FUCOM", TYPE_X87, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xDD, 0xE8), 8, X86InstInfo{"FUCOMP", TYPE_X87, FLAGS_POP, 0}},
      //  / 6
      {OPD(0xDD, 0xF0), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 7
      {OPD(0xDD, 0xF8), 8, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
    // 0xDE
    {OPDReg(0xDE, 0), 1, X86InstInfo{"FIADD", TYPE_X87,  GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 1), 1, X86InstInfo{"FIMUL", TYPE_X87,  GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 2), 1, X86InstInfo{"FICOM", TYPE_X87,  GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 3), 1, X86InstInfo{"FICOMP", TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_POP, 0}},
    {OPDReg(0xDE, 4), 1, X86InstInfo{"FISUB", TYPE_X87,  GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 5), 1, X86InstInfo{"FISUBR", TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 6), 1, X86InstInfo{"FIDIV", TYPE_X87,  GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDE, 7), 1, X86InstInfo{"FIDIVR", TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
      //  / 0
      {OPD(0xDE, 0xC0), 8, X86InstInfo{"FADDP", TYPE_X87, FLAGS_POP, 0}},
      //  / 1
      {OPD(0xDE, 0xC8), 8, X86InstInfo{"FMULP", TYPE_X87, FLAGS_POP, 0}},
      //  / 2
      {OPD(0xDE, 0xD0), 8, X86InstInfo{"FCOMP", TYPE_X87, FLAGS_X87_FLAGS | FLAGS_POP, 0}},
      //  / 3
      {OPD(0xDE, 0xD8), 1, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      {OPD(0xDE, 0xD9), 1, X86InstInfo{"FCOMPP", TYPE_X87, FLAGS_POP, 0}},
      {OPD(0xDE, 0xDA), 6, X86InstInfo{"", TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 4
      {OPD(0xDE, 0xE0), 8, X86InstInfo{"FSUBRP", TYPE_X87, FLAGS_POP, 0}},
      //  / 5
      {OPD(0xDE, 0xE8), 8, X86InstInfo{"FSUBP", TYPE_X87, FLAGS_POP, 0}},
      //  / 6
      {OPD(0xDE, 0xF0), 8, X86InstInfo{"FDIVRP", TYPE_X87, FLAGS_POP, 0}},
      //  / 7
      {OPD(0xDE, 0xF8), 8, X86InstInfo{"FDIVP", TYPE_X87, FLAGS_POP, 0}},
    // 0xDF
    {OPDReg(0xDF, 0), 1, X86InstInfo{"FILD", TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM, 0}},
    {OPDReg(0xDF, 1), 1, X86InstInfo{"FISTTP", TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDF, 2), 1, X86InstInfo{"FIST",   TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST, 0}},
    {OPDReg(0xDF, 3), 1, X86InstInfo{"FISTP",  TYPE_X87, GenFlagsSrcSize(SIZE_16BIT) | FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDF, 4), 1, X86InstInfo{"FBLD", TYPE_X87, FLAGS_MODRM, 0}},
    {OPDReg(0xDF, 5), 1, X86InstInfo{"FILD", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS, 0}},
    {OPDReg(0xDF, 6), 1, X86InstInfo{"FBSTP", TYPE_X87, FLAGS_MODRM | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
    {OPDReg(0xDF, 7), 1, X86InstInfo{"FISTP", TYPE_X87, GenFlagsSrcSize(SIZE_64BIT) | FLAGS_MODRM | FLAGS_X87_FLAGS | FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
      //  / 0
      //  This instruction is a bit special. This is an undocumented(Almost) x87 instruction.
      //  https://en.wikipedia.org/wiki/X86_instruction_listings#Undocumented_x87_instructions
      //  https://www.pagetable.com/?p=16
      //  AMD Athlon Processor x86 Code Optimization Guide - `Use FFREEP Macro to Pop One Register from the FPU Stack`
      //  ISA architecture manuals don't talk about this instruction at all
      //  At some point the Nvidia OpenGL binary driver uses this instruction.
      //  GCC may also end up emitting this instruction in some rare edge case!
      //  Almost all x86 CPUs implement this, and it is expected to be around
      {OPD(0xDF, 0xC0), 8, X86InstInfo{"FFREEP",  TYPE_X87, FLAGS_POP, 0}},
      //  / 1
      {OPD(0xDF, 0xC8), 8, X86InstInfo{"FXCH",    TYPE_X87, FLAGS_NONE, 0}},
      //  / 2
      {OPD(0xDF, 0xD0), 8, X86InstInfo{"FSTP",    TYPE_X87, FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
      //  / 3
      {OPD(0xDF, 0xD8), 8, X86InstInfo{"FSTP",    TYPE_X87, FLAGS_SF_MOD_DST | FLAGS_POP, 0}},
      //  / 4
      {OPD(0xDF, 0xE0), 1, X86InstInfo{"FNSTSW",  TYPE_INST, GenFlagsSameSize(SIZE_16BIT) | FLAGS_SF_DST_RAX, 0}},
      {OPD(0xDF, 0xE1), 7, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE, 0}},
      //  / 5
      {OPD(0xDF, 0xE8), 8, X86InstInfo{"FUCOMIP", TYPE_INST,    FLAGS_POP, 0}},
      //  / 6
      {OPD(0xDF, 0xF0), 8, X86InstInfo{"FCOMIP",  TYPE_X87,   FLAGS_POP, 0}},
      //  / 7
      {OPD(0xDF, 0xF8), 8, X86InstInfo{"",        TYPE_INVALID, FLAGS_NONE, 0}},
  };
#undef OPD
#undef OPDReg

  auto InstallToX87Table = [](auto& FinalTable, auto& LocalTable) {
    for (auto Op : LocalTable) {
      auto OpNum = Op.Op;
      bool Repeat = (OpNum & 0x8000) != 0;
      OpNum = OpNum & 0x7FF;
      auto Dispatcher = Op.Ptr;
      for (uint8_t i = 0; i < Op.Count; ++i) {
        LOGMAN_THROW_A_FMT(FinalTable[OpNum + i].OpcodeDispatcher.OpDispatch == nullptr, "Duplicate Entry");

        FinalTable[OpNum + i].OpcodeDispatcher.OpDispatch = Dispatcher;

        // Flag to indicate if we need to repeat this op in {0x40, 0x80} ranges
        if (Repeat) {
          FinalTable[(OpNum | 0x40) + i].OpcodeDispatcher.OpDispatch = Dispatcher;
          FinalTable[(OpNum | 0x80) + i].OpcodeDispatcher.OpDispatch = Dispatcher;
        }
      }
    }
  };

  GenerateX87Table(Table.data(), X87OpTable, std::size(X87OpTable));
  InstallToX87Table(Table, DispatchTable);
  return Table;
};

constexpr std::array<X86InstInfo, MAX_X87_TABLE_SIZE> X87F80Ops = GenerateX87TableLambda(X87F80OpTable);
constexpr std::array<X86InstInfo, MAX_X87_TABLE_SIZE> X87F64Ops = GenerateX87TableLambda(X87F64OpTable);

}


================================================
FILE: FEXCore/Source/Interface/GDBJIT/GDBJIT.cpp
================================================
// SPDX-License-Identifier: MIT
#include "GDBJIT.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SourcecodeResolver.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/vector.h>

#if defined(GDB_SYMBOLS_ENABLED)

#include <FEXCore/Debug/GDBReaderInterface.h>

extern "C" {
enum jit_actions_t { JIT_NOACTION = 0, JIT_REGISTER_FN, JIT_UNREGISTER_FN };

struct jit_code_entry {
  jit_code_entry* next_entry;
  jit_code_entry* prev_entry;
  const char* symfile_addr;
  uint64_t symfile_size;
};

struct jit_descriptor {
  uint32_t version;
  /* This type should be jit_actions_t, but we use uint32_t
     to be explicit about the bitwidth.  */
  uint32_t action_flag;
  jit_code_entry* relevant_entry;
  jit_code_entry* first_entry;
};

/* Make sure to specify the version statically, because the
   debugger may check the version before we can set it.  */

constinit jit_descriptor __jit_debug_descriptor = {.version = 1};

/* GDB puts a breakpoint in this function.  */
void __attribute__((noinline)) __jit_debug_register_code() {
  asm volatile("" ::"r"(&__jit_debug_descriptor));
};
}

namespace FEXCore {

void GDBJITRegister(const FEXCore::ExecutableFileInfo& Entry, uintptr_t VAFileStart, uint64_t GuestRIP, uintptr_t HostEntry,
                    FEXCore::Core::DebugData& DebugData) {
  auto map = Entry.SourcecodeMap.get();

  if (map) {
    auto FileOffset = GuestRIP - VAFileStart;

    auto Sym = map->FindSymbolMapping(FileOffset);

    auto SymName = HLE::SourcecodeSymbolMapping::SymName(Sym, Entry.Filename, HostEntry, FileOffset);

    fextl::vector<gdb_line_mapping> Lines;
    for (const auto& GuestOpcode : DebugData.GuestOpcodes) {
      auto Line = map->FindLineMapping(GuestRIP + GuestOpcode.GuestEntryOffset - VAFileStart);
      if (Line) {
        Lines.push_back({Line->LineNumber, HostEntry + GuestOpcode.HostEntryOffset});
      }
    }

    size_t size = sizeof(info_t) + 1 * sizeof(blocks_t) + Lines.size() * sizeof(gdb_line_mapping);

    auto mem = (uint8_t*)malloc(size);
    auto base = mem;
    info_t* info = (info_t*)mem;
    mem += sizeof(info_t);

    strncpy(info->filename, map->SourceFile.c_str(), 511);

    info->nblocks = 1;

    auto blocks = (blocks_t*)mem;
    info->blocks_ofs = mem - base;

    mem += info->nblocks * sizeof(blocks_t);

    for (int i = 0; i < info->nblocks; i++) {
      strncpy(blocks[i].name, SymName.c_str(), 511);
      blocks[i].start = HostEntry;
      blocks[i].end = HostEntry + DebugData.HostCodeSize;
    }

    info->nlines = Lines.size();

    auto lines = (gdb_line_mapping*)mem;
    info->lines_ofs = mem - base;
    mem += info->nlines * sizeof(gdb_line_mapping);

    if (info->nlines) {
      memcpy(lines, Lines.data(), info->nlines * sizeof(gdb_line_mapping));
    }

    auto entry = new jit_code_entry {0, 0, 0, 0};

    entry->symfile_addr = (const char*)info;
    entry->symfile_size = size;

    if (__jit_debug_descriptor.first_entry) {
      __jit_debug_descriptor.relevant_entry->next_entry = entry;
      entry->prev_entry = __jit_debug_descriptor.relevant_entry;
    } else {
      __jit_debug_descriptor.first_entry = entry;
    }

    __jit_debug_descriptor.relevant_entry = entry;
    __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
    __jit_debug_register_code();
  }
}
} // namespace FEXCore
#else
namespace FEXCore {
void GDBJITRegister(const FEXCore::ExecutableFileInfo&, uintptr_t, uint64_t, uintptr_t, FEXCore::Core::DebugData&) {
  ERROR_AND_DIE_FMT("GDBSymbols support not compiled in");
}
} // namespace FEXCore
#endif


================================================
FILE: FEXCore/Source/Interface/GDBJIT/GDBJIT.h
================================================
// SPDX-License-Identifier: MIT

#include <FEXCore/Core/CodeCache.h>
#include <Interface/Core/JIT/DebugData.h>

namespace FEXCore {
void GDBJITRegister(const FEXCore::ExecutableFileInfo&, uintptr_t VAFileStart, uint64_t GuestRIP, uintptr_t HostEntry, FEXCore::Core::DebugData&);
}


================================================
FILE: FEXCore/Source/Interface/IR/IR.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/ThreadPoolAllocator.h>
#include <FEXCore/IR/IR.h>

#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/sstream.h>

#include <array>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <iterator>
#include <type_traits>

namespace FEXCore::IR {

class OrderedNode;

/**
 * @brief The IROp_Header is an dynamically sized array
 * At the end it contains a uint8_t for the number of arguments that Op has
 * Then there is an unsized array of NodeWrapper arguments for the number of arguments this op has
 * The op structures that are including the header must ensure that they pad themselves correctly to the number of arguments used
 */
struct IROp_Header;

/**
 * @brief Represents the ID of a given IR node.
 *
 * Intended to provide strong typing from other integer values
 * to prevent passing incorrect values to certain API functions.
 */
struct NodeID final {
  using value_type = uint32_t;

  constexpr NodeID() noexcept = default;
  constexpr explicit NodeID(value_type Value_) noexcept
    : Value {Value_} {}

  constexpr NodeID(const NodeID&) noexcept = default;
  constexpr NodeID& operator=(const NodeID&) noexcept = default;

  constexpr NodeID(NodeID&&) noexcept = default;
  constexpr NodeID& operator=(NodeID&&) noexcept = default;

  [[nodiscard]]
  constexpr bool IsValid() const noexcept {
    return Value != 0;
  }
  [[nodiscard]]
  constexpr bool IsInvalid() const noexcept {
    return !IsValid();
  }
  constexpr void Invalidate() noexcept {
    Value = 0;
  }

  [[nodiscard]] constexpr auto operator<=>(const NodeID&) const noexcept = default;

  friend std::ostream& operator<<(std::ostream& out, NodeID ID) {
    out << ID.Value;
    return out;
  }
  friend std::istream& operator>>(std::istream& in, NodeID& ID) {
    in >> ID.Value;
    return in;
  }

  value_type Value {};
};

/**
 * @brief This is a very simple wrapper for our node pointers
 * You probably don't want to use this directly
 * Use OpNodeWrapper and OrderedNodeWrapper types below instead
 *
 * This is necessary to allow two things
 *  - Reduce memory usage by having the pointer be an 32bit offset rather than the whole 64bit pointer
 *  - Actually use an offset from a base so we aren't storing pointers for everything
 *    - Makes IR list copying be as cheap as a memcpy
 * Downsides
 *  - The IR nodes have to be allocated out of a linear array of memory
 *  - We currently only allow a 32bit offset, so *only* 4 million nodes per list
 *  - We have to have the base offset live somewhere else
 *  - Has to be POD and trivially copyable
 *  - Makes every real node access turn in to a [Base + Offset] access
 *  - Can be confusing if you're mixing OpNodeWrapper and OrderedNodeWrapper usage
 */
template<typename Type>
struct FEX_PACKED NodeWrapperBase final {
  // 32bit or 64bit offset doesn't matter for addressing.
  // We use uint32_t to be more memory efficient (Cuts our node list size in half)
  using NodeOffsetType = uint32_t;
  NodeOffsetType NodeOffset;

  explicit NodeWrapperBase() = default;

  [[nodiscard]]
  static NodeWrapperBase WrapOffset(NodeOffsetType Offset) {
    NodeWrapperBase Wrapped;
    Wrapped.NodeOffset = Offset;
    return Wrapped;
  }

  [[nodiscard]]
  static NodeWrapperBase WrapPtr(uintptr_t Base, uintptr_t Value) {
    NodeWrapperBase Wrapped;
    Wrapped.SetOffset(Base, Value);
    return Wrapped;
  }

  [[nodiscard]]
  static void* UnwrapNode(uintptr_t Base, NodeWrapperBase Node) {
    return Node.GetNode(Base);
  }

  [[nodiscard]]
  NodeID ID() const;

  [[nodiscard]]
  bool IsInvalid() const {
    return NodeOffset == 0;
  }

  [[nodiscard]]
  bool IsImmediate() const {
    return NodeOffset & (1u << 31);
  }

  [[nodiscard]]
  bool HasKill() const {
    return NodeOffset & (1u << 30);
  }

  void ClearKill() {
    NodeOffset &= ~(1u << 30);
  }

  void SetKill() {
    NodeOffset |= (1u << 30);
  }

  [[nodiscard]]
  bool IsPointer() const {
    return !IsImmediate() && !HasKill();
  }

  [[nodiscard]]
  Type* GetNode(uintptr_t Base) {
    LOGMAN_THROW_A_FMT(IsPointer(), "Precondition");
    return reinterpret_cast<Type*>(Base + NodeOffset);
  }
  [[nodiscard]]
  const Type* GetNode(uintptr_t Base) const {
    LOGMAN_THROW_A_FMT(IsPointer(), "Precondition");
    return reinterpret_cast<const Type*>(Base + NodeOffset);
  }

  void SetOffset(uintptr_t Base, uintptr_t Value) {
    NodeOffset = Value - Base;
    LOGMAN_THROW_A_FMT(IsPointer(), "Offsets are within 2GiB range");
  }

  void SetInvalid() {
    NodeOffset = 0;
    LOGMAN_THROW_A_FMT(IsInvalid(), "Zero state");
  }

  void SetImmediate(uint32_t Immediate) {
    LOGMAN_THROW_A_FMT(Immediate < (1u << 31), "Bounded");
    NodeOffset = Immediate | (1u << 31);
    LOGMAN_THROW_A_FMT(IsImmediate(), "Encoded above");
  }

  [[nodiscard]]
  uint32_t GetImmediate() const {
    LOGMAN_THROW_A_FMT(IsImmediate(), "Precondition: must be an immediate");
    return NodeOffset & ~(1u << 31);
  }

  [[nodiscard]]
  friend constexpr bool operator==(const NodeWrapperBase<Type>&, const NodeWrapperBase<Type>&) = default;

  [[nodiscard]]
  static NodeWrapperBase<Type> FromImmediate(uint32_t Immediate) {
    NodeWrapperBase<Type> A;
    A.SetImmediate(Immediate);
    return A;
  }
};

static_assert(std::is_trivially_copyable_v<NodeWrapperBase<OrderedNode>>);

static_assert(sizeof(NodeWrapperBase<OrderedNode>) == sizeof(uint32_t));

using OpNodeWrapper = NodeWrapperBase<IROp_Header>;
using OrderedNodeWrapper = NodeWrapperBase<OrderedNode>;

struct OrderedNodeHeader {
  OpNodeWrapper Value;
  OrderedNodeWrapper Next;
  OrderedNodeWrapper Previous;
};

static_assert(sizeof(OrderedNodeHeader) == sizeof(uint32_t) * 3);

/**
 * @brief This is a node in our IR representation
 * Is a doubly linked list node that lives in a representation of a linearly allocated node list
 * The links in the nodes can live in a list independent of the data IR data
 *
 * ex.
 *  Region1 : ... <-> <OrderedNode> <-> <OrderedNode> <-> ...
 *                    | *<Value>        |
 *                    v                 v
 *  Region2 : <IROp>..<IROp>..<IROp>..<IROp>
 *
 *  In this example the OrderedNodes are allocated in one linear memory region (Not necessarily contiguous with one another linking)
 *  The second region is contiguous but they don't have any relationship with one another directly
 */
class OrderedNode final {
public:
  // These three values are laid out very specifically to make it fast to access the NodeWrappers specifically
  OrderedNodeHeader Header;
  uint32_t NumUses;

  // After RA, the register allocated for the node. This is the register for the
  // node at the time it is written, even if it is shuffled into other registers
  // later. In other words, it is the register destination of the instruction
  // represented by this OrderedNode.
  //
  // This is the raw value of a PhysicalRegister data structure.
  uint8_t Reg;
  uint8_t Pad[3];

  using value_type = OrderedNodeWrapper;

  OrderedNode() = default;

  /**
   * @brief Appends a node to this current node
   *
   * Before. <Prev> <-> <Current> <-> <Next>
   * After.  <Prev> <-> <Current> <-> <Node> <-> Next
   *
   * @return Pointer to the node being added
   */
  value_type append(uintptr_t Base, value_type Node) {
    // Set Next Node's Previous to incoming node
    SetPrevious(Base, Header.Next, Node);

    // Set Incoming node's links to this node's links
    SetPrevious(Base, Node, Wrapped(Base));
    SetNext(Base, Node, Header.Next);

    // Set this node's next to the incoming node
    SetNext(Base, Wrapped(Base), Node);

    // Return the node we are appending
    return Node;
  }

  OrderedNode* append(uintptr_t Base, OrderedNode* Node) {
    value_type WNode = Node->Wrapped(Base);
    // Set Next Node's Previous to incoming node
    SetPrevious(Base, Header.Next, WNode);

    // Set Incoming node's links to this node's links
    SetPrevious(Base, WNode, Wrapped(Base));
    SetNext(Base, WNode, Header.Next);

    // Set this node's next to the incoming node
    SetNext(Base, Wrapped(Base), WNode);

    // Return the node we are appending
    return Node;
  }

  /**
   * @brief Prepends a node to the current node
   * Before. <Prev> <-> <Current> <-> <Next>
   * After.  <Prev> <-> <Node> <-> <Current> <-> Next
   *
   * @return Pointer to the node being added
   */
  value_type prepend(uintptr_t Base, value_type Node) {
    // Set the previous node's next to the incoming node
    SetNext(Base, Header.Previous, Node);

    // Set the incoming node's links
    SetPrevious(Base, Node, Header.Previous);
    SetNext(Base, Node, Wrapped(Base));

    // Set the current node's link
    SetPrevious(Base, Wrapped(Base), Node);

    // Return the node we are prepending
    return Node;
  }

  OrderedNode* prepend(uintptr_t Base, OrderedNode* Node) {
    value_type WNode = Node->Wrapped(Base);
    // Set the previous node's next to the incoming node
    SetNext(Base, Header.Previous, WNode);

    // Set the incoming node's links
    SetPrevious(Base, WNode, Header.Previous);
    SetNext(Base, WNode, Wrapped(Base));

    // Set the current node's link
    SetPrevious(Base, Wrapped(Base), WNode);

    // Return the node we are prepending
    return Node;
  }

  /**
   * @brief Gets the remaining size of the blocks from this point onward
   *
   * Doesn't find the head of the list
   *
   */
  [[nodiscard]]
  size_t size(uintptr_t Base) const {
    size_t Size = 1;
    // Walk the list forward until we hit a sentinel
    value_type Current = Header.Next;
    while (Current.NodeOffset != 0) {
      ++Size;
      OrderedNode* RealNode = Current.GetNode(Base);
      Current = RealNode->Header.Next;
    }
    return Size;
  }

  void Unlink(uintptr_t Base) {
    // This removes the node from the list. Orphaning it
    // Before: <Previous> <-> <Current> <-> <Next>
    // After: <Previous <-> <Next>
    SetNext(Base, Header.Previous, Header.Next);
    SetPrevious(Base, Header.Next, Header.Previous);
  }

  [[nodiscard]]
  const IROp_Header* Op(uintptr_t Base) const {
    return Header.Value.GetNode(Base);
  }
  [[nodiscard]]
  IROp_Header* Op(uintptr_t Base) {
    return Header.Value.GetNode(Base);
  }

  [[nodiscard]]
  uint32_t GetUses() const {
    return NumUses;
  }

  void AddUse() {
    ++NumUses;
  }
  void RemoveUse() {
    --NumUses;
  }

  [[nodiscard]]
  value_type Wrapped(uintptr_t Base) const {
    value_type Tmp;
    Tmp.SetOffset(Base, reinterpret_cast<uintptr_t>(this));
    return Tmp;
  }

private:
  [[nodiscard]]
  value_type WrappedOffset(uint32_t Offset) const {
    value_type Tmp;
    Tmp.NodeOffset = Offset;
    return Tmp;
  }

  static void SetPrevious(uintptr_t Base, value_type Node, value_type New) {
    OrderedNode* RealNode = Node.GetNode(Base);
    RealNode->Header.Previous = New;
  }

  static void SetNext(uintptr_t Base, value_type Node, value_type New) {
    OrderedNode* RealNode = Node.GetNode(Base);
    RealNode->Header.Next = New;
  }

  void SetUses(uint32_t Uses) {
    NumUses = Uses;
  }
};

static_assert(std::is_trivially_constructible_v<OrderedNode>);
static_assert(std::is_trivially_copyable_v<OrderedNode>);
static_assert(offsetof(OrderedNode, Header) == 0);
static_assert(sizeof(OrderedNode) == (sizeof(OrderedNodeHeader) + 2 * sizeof(uint32_t)));

// This is temporary. We are transitioning away from OrderedNode's in favour of
// flat Ref words. To ease porting, we have this typedef. Eventually OrderedNode
// will be removed and this typedef will be replaced by something like:
//
//  struct Ref {
//     uint Flags : 1;
//     uint ID : 23;
//     uint Reg : 8;
//  };
using Ref = OrderedNode*;

/* This iterator can be used to step though nodes.
 * Due to how our IR is laid out, this can be used to either step
 * though the CodeBlocks or though the code within a single block.
 */
class NodeIterator {
public:
  struct value_type final {
    OrderedNode* Node;
    IROp_Header* Header;
  };
  using size_type = std::size_t;
  using difference_type = std::ptrdiff_t;
  using reference = value_type&;
  using const_reference = const value_type&;
  using pointer = value_type*;
  using const_pointer = const value_type*;
  using iterator = NodeIterator;
  using const_iterator = const NodeIterator;
  using reverse_iterator = iterator;
  using const_reverse_iterator = const_iterator;
  using iterator_category = std::bidirectional_iterator_tag;

  NodeIterator(uintptr_t Base, uintptr_t IRBase)
    : BaseList {Base}
    , IRList {IRBase} {}
  explicit NodeIterator(uintptr_t Base, uintptr_t IRBase, OrderedNodeWrapper Ptr)
    : BaseList {Base}
    , IRList {IRBase}
    , Node {Ptr} {}

  [[nodiscard]]
  bool operator==(const NodeIterator& rhs) const {
    return Node.NodeOffset == rhs.Node.NodeOffset;
  }

  [[nodiscard]]
  bool operator!=(const NodeIterator& rhs) const {
    return !operator==(rhs);
  }

  NodeIterator operator++() {
    OrderedNodeHeader* RealNode = reinterpret_cast<OrderedNodeHeader*>(Node.GetNode(BaseList));
    Node = RealNode->Next;
    return *this;
  }

  NodeIterator operator--() {
    OrderedNodeHeader* RealNode = reinterpret_cast<OrderedNodeHeader*>(Node.GetNode(BaseList));
    Node = RealNode->Previous;
    return *this;
  }

  [[nodiscard]]
  value_type operator*() {
    OrderedNode* RealNode = Node.GetNode(BaseList);
    return {RealNode, RealNode->Op(IRList)};
  }

  [[nodiscard]]
  value_type operator()() {
    OrderedNode* RealNode = Node.GetNode(BaseList);
    return {RealNode, RealNode->Op(IRList)};
  }

  [[nodiscard]]
  NodeID ID() const {
    return Node.ID();
  }

  [[nodiscard]]
  static NodeIterator Invalid() {
    return NodeIterator(0, 0);
  }

protected:
  uintptr_t BaseList {};
  uintptr_t IRList {};
  OrderedNodeWrapper Node {};
};

// This must directly match bytes to the named opsize.
// Implicit sized IR operations does math to get between sizes.
enum class OpSize : uint8_t {
  iUnsized = 0,
  i8Bit = 1,
  i16Bit = 2,
  i32Bit = 4,
  i64Bit = 8,
  f80Bit = 10,
  i128Bit = 16,
  i256Bit = 32,
  iInvalid = 0xFF,
};

enum class FloatCompareOp : uint8_t {
  EQ = 0,
  LT,
  LE,
  UNO,
  NEQ,
  ORD,
};

enum class ShiftType : uint8_t {
  LSL = 0,
  LSR,
  ASR,
  ROR,
};

enum class BranchHint : uint8_t { None = 0, Call, Return, CheckTF };


// Converts a size stored as an integer in to an OpSize enum.
// This is a nop operation and will be eliminated by the compiler.
static inline OpSize SizeToOpSize(uint8_t Size) {
  switch (Size) {
  case 0: return OpSize::iUnsized;
  case 1: return OpSize::i8Bit;
  case 2: return OpSize::i16Bit;
  case 4: return OpSize::i32Bit;
  case 8: return OpSize::i64Bit;
  case 10: return OpSize::f80Bit;
  case 16: return OpSize::i128Bit;
  case 32: return OpSize::i256Bit;
  case 0xFF: return OpSize::iInvalid;
  default: FEX_UNREACHABLE;
  }
}

// This is a nop operation and will be eliminated by the compiler.
static inline uint8_t OpSizeToSize(IR::OpSize Size) {
  switch (Size) {
  case OpSize::iUnsized: return 0;
  case OpSize::i8Bit: return 1;
  case OpSize::i16Bit: return 2;
  case OpSize::i32Bit: return 4;
  case OpSize::i64Bit: return 8;
  case OpSize::f80Bit: return 10;
  case OpSize::i128Bit: return 16;
  case OpSize::i256Bit: return 32;
  case OpSize::iInvalid: return 0xFF;
  default: FEX_UNREACHABLE;
  }
}

static inline uint16_t OpSizeAsBits(IR::OpSize Size) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::OpSizeToSize(Size) * 8u;
}

template<typename T>
requires (std::is_integral_v<T>)
static inline OpSize operator<<(IR::OpSize Size, T Shift) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) << Shift);
}

template<typename T>
requires (std::is_integral_v<T>)
static inline OpSize operator>>(IR::OpSize Size, T Shift) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) >> Shift);
}

static inline OpSize operator/(IR::OpSize Size, IR::OpSize Divisor) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) / IR::OpSizeToSize(Divisor));
}

template<typename T>
requires (std::is_integral_v<T>)
static inline OpSize operator/(IR::OpSize Size, T Divisor) {
  LOGMAN_THROW_A_FMT(Size != IR::OpSize::iInvalid, "Invalid Size");
  return IR::SizeToOpSize(IR::OpSizeToSize(Size) / Divisor);
}

static inline uint8_t NumElements(IR::OpSize RegisterSize, IR::OpSize ElementSize) {
  LOGMAN_THROW_A_FMT(RegisterSize != IR::OpSize::iInvalid && ElementSize != IR::OpSize::iInvalid && RegisterSize != IR::OpSize::iUnsized &&
                       ElementSize != IR::OpSize::iUnsized,
                     "Invalid Size");
  return IR::OpSizeToSize(RegisterSize) / IR::OpSizeToSize(ElementSize);
}

#define IROP_ENUM
#define IROP_STRUCTS
#define IROP_SIZES
#define IROP_REG_CLASSES
#include <FEXCore/IR/IRDefines.inc>

/* This iterator can be used to step though every single node in a multi-block in SSA order.
 *
 * Iterates in the order of:
 *
 * end <-- CodeBlockA <--> BlockAInst1 <--> BlockAInst2 <--> CodeBlockB <--> BlockBInst1 <--> BlockBInst2 --> end
 */
class AllNodesIterator : public NodeIterator {
public:
  AllNodesIterator(uintptr_t Base, uintptr_t IRBase)
    : NodeIterator(Base, IRBase) {}
  explicit AllNodesIterator(uintptr_t Base, uintptr_t IRBase, OrderedNodeWrapper Ptr)
    : NodeIterator(Base, IRBase, Ptr) {}
  AllNodesIterator(NodeIterator other)
    : NodeIterator(other) {} // Allow NodeIterator to be upgraded

  AllNodesIterator operator++() {
    OrderedNodeHeader* RealNode = reinterpret_cast<OrderedNodeHeader*>(Node.GetNode(BaseList));
    auto IROp = Node.GetNode(BaseList)->Op(IRList);

    // If this is the last node of a codeblock, we need to continue to the next block
    if (IROp->Op == OP_ENDBLOCK) {
      auto EndBlock = IROp->C<IROp_EndBlock>();

      auto CurrentBlock = EndBlock->BlockHeader.GetNode(BaseList);
      Node = CurrentBlock->Header.Next;
    } else if (IROp->Op == OP_CODEBLOCK) {
      auto CodeBlock = IROp->C<IROp_CodeBlock>();

      Node = CodeBlock->Begin;
    } else {
      Node = RealNode->Next;
    }

    return *this;
  }

  AllNodesIterator operator--() {
    auto IROp = Node.GetNode(BaseList)->Op(IRList);

    if (IROp->Op == OP_BEGINBLOCK) {
      auto BeginBlock = IROp->C<IROp_EndBlock>();

      Node = BeginBlock->BlockHeader;
    } else if (IROp->Op == OP_CODEBLOCK) {
      auto PrevBlockWrapper = Node.GetNode(BaseList)->Header.Previous;
      auto PrevCodeBlock = PrevBlockWrapper.GetNode(BaseList)->Op(IRList)->C<IROp_CodeBlock>();

      Node = PrevCodeBlock->Last;
    } else {
      Node = Node.GetNode(BaseList)->Header.Previous;
    }

    return *this;
  }

  [[nodiscard]]
  static AllNodesIterator Invalid() {
    return AllNodesIterator(0, 0);
  }
};

class IRListView;
class IREmitter;

template<typename Type>
inline NodeID NodeWrapperBase<Type>::ID() const {
  return NodeID(NodeOffset / sizeof(IR::OrderedNode));
}

[[nodiscard]]
bool IsBlockExit(FEXCore::IR::IROps Op);

void Dump(fextl::stringstream* out, const IRListView* IR);

constexpr auto format_as(FEXCore::IR::NodeID ID) {
  return ID.Value;
}

FEX_DEFINE_ENUM_FMT_PASSTHROUGH(FEXCore::IR::FenceType)
FEX_DEFINE_ENUM_FMT_PASSTHROUGH(FEXCore::IR::MemOffsetType)
FEX_DEFINE_ENUM_FMT_PASSTHROUGH(FEXCore::IR::OpSize)
FEX_DEFINE_ENUM_FMT_PASSTHROUGH(FEXCore::IR::RegClass)
} // namespace FEXCore::IR

template<>
struct std::hash<FEXCore::IR::NodeID> {
  size_t operator()(const FEXCore::IR::NodeID& ID) const noexcept {
    return std::hash<FEXCore::IR::NodeID::value_type> {}(ID.Value);
  }
};


================================================
FILE: FEXCore/Source/Interface/IR/IR.json
================================================
{
  "Docs": [
    "IRTypes define types that can be used directly in the IR.",
    "These will translate to the underlying C types when stored in the op data",
    "",
    "SSA types are special cased",
    "  SSA = untyped",
    "  GPR = GPR class type",
    "  FPR = FPR class type",
    "Declaring the SSA types correctly will allow validation passes to ensure the op is getting passed correct arguments",
    "",
    "Arguments must always follow a particular order. <Type>:<Prefix><Name>",
    "Type must always be an IRType",
    "Prefix currently can be one of the following: #, $",
    "  #: This is a temporary argument that is in the IR Emitter arguments",
    "    - This will not be stored in the resulting IR op data structure",
    "  $: This is a value that will be stored inside of the IR op data structure",
    "    - If it is type SSA, GPR, or FPR then it is an SSA type",
    "    - These will get added to the SSA argument union to ensure RA happens",
    "",
    "IR op definition follows the structure of <SSA Type> = <IROp> <Arguments>",
    "",
    "Eg:",
    "IR op with no result and no arguments",
    "  CallbackReturn",
    "",
    "IR op with result and no arguments",
    "  GPR = ProcessorID",
    "",
    "IR op with no result and non-SSA argument",
    "  Fence FenceType:$Type",
    "",
    "IR op with no result and SSA arguments",
    "  SetRoundingMode GPR:$Mode",
    "",
    "IR op with result and SSA arguments",
    "  GPR = Add GPR:$Src1, GPR:$Src2",

    "",
    "## Op members ##",
    "* Desc",
    "  * List of text for documenting this IR op.",
    "* OpClass",
    "  * Textual class to group IR ops by type",
    "* DestClass",
    "  * SSA class of the return when the return type is `SSA`",
    "  * Not used if the destination type is one of {GPR, FPR}",
    "* DestSize",
    "  * The size of the destination type",
    "* EmitValidation",
    "  * List of validations to emit for the IR emitter",
    "  * These are validations that can't be automatically inferred and need to be hand-written",
    ""
  ],
  "Enums": {
    "class CondClass : uint8_t": [
      "EQ    = 0,",
      "NEQ   = 1,",
      "UGE   = 2,",
      "ULT   = 3,",
      "MI    = 4,",
      "PL    = 5,",
      "VS    = 6,",
      "VC    = 7,",
      "UGT   = 8,",
      "ULE   = 9,",
      "SGE   = 10,",
      "SLT   = 11,",
      "SGT   = 12,",
      "SLE   = 13,",
      "TSTZ  = 14, /* bit test zero */",
      "TSTNZ = 15, /* bit test nonzero */",
      "",
      "FLU   = 16, /* float less or unordered */",
      "FGE   = 17, /* float greater or equal */",
      "FLEU  = 18, /* float less or equal or unordered */",
      "FGT   = 19, /* float greater */",
      "FU    = 20, /* float unordered */",
      "FNU   = 21, /* float not unordered */",
      "",
      "AL    = 32, /* always */"
    ],
    "class FenceType : uint8_t": [
      "Load      = 0,",
      "Store     = 1,",
      "LoadStore = 2,",
      "Inst      = 3,"
    ],
    "class MemOffsetType : uint8_t": [
      "SXTX = 0,",
      "UXTW = 1,",
      "SXTW = 2,"
    ],
    "class RegClass : uint32_t": [
      "Invalid  = 0,",
      "GPR      = 1,",
      "GPRFixed = 2,",
      "FPR      = 3,",
      "FPRFixed = 4,",
      "Complex  = 5,"
    ],
    "class RoundMode : uint8_t": [
      "Nearest     = 0,",
      "NegInfinity = 1,",
      "PosInfinity = 2,",
      "TowardsZero = 3, /* Truncate */",
      "Host        = 4,"
    ],
    "class ConstPad : uint8_t": [
      "NoPad = 0,",
      "DoPad = 1,",
      "AutoPad = 2,"
    ]
  },
  "Defines": [
    "constexpr uint8_t NumClasses {6}",

    "constexpr uint8_t FCMP_FLAG_EQ        = 0",
    "constexpr uint8_t FCMP_FLAG_LT        = 1",
    "constexpr uint8_t FCMP_FLAG_UNORDERED = 2",

    "struct BreakDefinition {",
    "  uint16_t ErrorRegister;",
    "  uint8_t Signal;",
    "  uint8_t TrapNumber;",
    "  uint8_t si_code;",
    "};"
  ],
  "IRTypes" : {
    "i1":  "bool",
    "i8":  "int8_t",
    "i16": "int16_t",
    "i32": "int32_t",
    "i64": "int64_t",
    "u8":  "uint8_t",
    "u16": "uint16_t",
    "u32": "uint32_t",
    "u64": "uint64_t",
    "OpSize": "FEXCore::IR::OpSize",
    "SSA": "OrderedNode*",
    "GPR": "OrderedNode*",
    "FPR": "OrderedNode*",
    "FenceType": "FenceType",
    "RegisterClass": "RegClass",
    "CondClass": "CondClass",
    "SHA256Sum": "SHA256Sum",
    "MemOffsetType": "MemOffsetType",
    "BreakDefinition": "BreakDefinition",
    "RoundType": "RoundMode",
    "ConstPad": "ConstPad",
    "FloatCompareOp": "FloatCompareOp",
    "NamedVectorConstant": "FEXCore::IR::NamedVectorConstant",
    "IndexNamedVectorConstant": "FEXCore::IR::IndexNamedVectorConstant",
    "ShiftType": "FEXCore::IR::ShiftType",
    "BranchHint": "FEXCore::IR::BranchHint",
    "Array16": "std::array<uint8_t, 16>"
  },
  "Ops": {
    "Misc": {
      "Dummy": {
        "HasSideEffects": true,
        "SwitchGen": false,
        "JITDispatchOverride": "NoOp"
      },
      "IRHeader SSA:$Blocks, u64:$OriginalRIP, u32:$BlockCount, u32:$NumHostInstructions, u32:$SpillSlots, i1:$PostRA{false}, i1:$HasX87{false}, i1:$ReadsParity{false}": {
        "SwitchGen": false,
        "JITDispatchOverride": "NoOp"
      },
      "CodeBlock SSA:$Begin, SSA:$Last, u32:$ID, i1:$EntryPoint{false}, u32:$GuestEntryOffset{0}": {
        "SwitchGen": false,
        "RAOverride": "0",
        "JITDispatchOverride": "NoOp"
      },
      "BeginBlock SSA:$BlockHeader": {
        "HasSideEffects": true,
        "SwitchGen": false,
        "RAOverride": "0",
        "JITDispatchOverride": "NoOp"
      },
      "InvalidateFlags u64:$Flags": {
        "HasSideEffects": true,
        "JITDispatchOverride": "NoOp"
      },

      "EndBlock SSA:$BlockHeader": {
        "HasSideEffects": true,
        "SwitchGen": false,
        "RAOverride": "0",
        "JITDispatchOverride": "NoOp"
      },

      "GuestOpcode u32:$GuestEntryOffset": {
        "Desc": ["Marks the beginning of a guest opcode"],
        "HasSideEffects": true
      },

      "GPR = ValidateCode Array16:$CodeOriginal, GPR:$Address, u8:$CodeLength": {
        "HasSideEffects": true,
        "HasDest": true,
        "DestSize": "OpSize::i64Bit"
      },

      "ThreadRemoveCodeEntry": {
        "HasSideEffects": true
      },

      "GPR = ProcessorID": {
        "Desc": ["Returns the processor ID correlating to the current running CPU",
                 "This may be out of date by time this instruction is executed so care must be taken",
                 "This same information can be gotten from syscall getcpu(&cpu, &node)",
                 "uint32_t Res = (node << 12) | cpu;",
                 "This means it has a limitation of 4096 CPU cores. Which is fine and matches x86 behaviour"
                ],
        "DestSize": "OpSize::i64Bit"
      },
      "GPR = GetRoundingMode": {
        "Desc": ["Gets the current rounding mode options"
                ],
        "DestSize": "OpSize::i32Bit"
      },

      "SetRoundingMode GPR:$RoundMode, i1:$SetDAZ, GPR:$MXCSR": {
        "Desc": ["Sets the current rounding mode options for the thread"
                ],
        "HasSideEffects": true
      },
      "GPR = PushRoundingMode u8:$RoundMode": {
        "Desc": ["Override the current rounding mode options for the thread, returning old FPCR"
                ],
        "DestSize": "OpSize::i64Bit",
        "HasSideEffects": true
      },
      "PopRoundingMode GPR:$FPCR": {
        "Desc": ["Resets rounding mode after PushRoundingMode operation"
                ],
        "HasSideEffects": true
      },
      "Print SSA:$Value": {
        "HasSideEffects": true,
        "Desc": ["Debug operation that prints an SSA value to the console",
                 "May only print 64bits of the value"]
      },
      "GPR = AllocateGPR i1:$ForPair": {
        "Desc": ["Silly pseudo-instruction to allocate a register for a future destination",
                 "Note: if an instruction uses allocated destinations-as-sources,",
                 "it cannot use a regular destination too. This ensures RA correctness.",
                 "This is a kludge to deal with the IR's lack of multiple destinations",
                 "If ForPair is set, RA will try to allocate the base of a register pair"],
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = AllocateFPR OpSize:#RegisterSize, OpSize:#ElementSize": {
        "Desc": ["Like AllocateGPR, but for FPR"],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "JITDispatch": false
      },
      "GPR = AllocateGPRAfter GPR:$After": {
        "Desc": ["Silly pseudo-instruction to allocate a register for a future destination",
                 "This is a kludge to deal with the IR's lack of multiple destinations",
                 "RA will attempt to allocate to the register after $After.",
                 "It may not succeed."],
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "GPR = RDRAND i1:$GetReseeded": {
        "Desc": ["Uses the hardware random number generator to generate a 64bit number",
                 "The boolean argument asks if we should be reading the reseeded number or not",
                 "Reseeded RNG calculation is more expensive and will be heavier to use",
                 "Returns the 64-bit number",
                 "Sets the Z flag if the number is valid.",
                 "RNG hardware is allowed to fail early and return. Software must always check this"
                ],
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "Yield": {
        "HasSideEffects": true,
        "Desc": ["This is a hint instruction that the CPU is likely to do a spin so it might want to pause to help out SMP",
                 "Can be implemented as a NOP if necessary"]
      },
      "WFET GPR:$Upper, GPR:$Lower": {
        "HasSideEffects": true,
        "Desc": [
          "Implement a low power wait attempting to sleep until RDTSC >= Upper:Lower.",
          "Will spuriously wake up."
	]
      },
      "MonoBackpatcherWrite OpSize:$Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": [ "Writes and invalidates the target address with the invalidation mutex locked. This is a fault-avoiding",
                  "replacement for the atomic SMC writes used in the mono callsite backpatcher." ],
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      }
    },
    "Branch": {
      "Jump SSA:$TargetBlock": {
        "HasSideEffects": true,
        "RAOverride": "0"
      },
      "CondJump SSA:$Cmp1, SSA:$Cmp2, SSA:$TrueBlock, SSA:$FalseBlock, CondClass:$Cond{CondClass::NEQ}, OpSize:$CompareSize{OpSize::iInvalid}, i1:$FromNZCV{false}": {
        "Inline": ["", "AddSub"],
        "HasSideEffects": true,
        "RAOverride": "2"
      },
      "ExitFunction OpSize:#Size, GPR:$NewRIP, BranchHint:$Hint, GPR:$CallReturnAddress, SSA:$CallReturnBlock": {
        "Desc": ["Exits the current JIT function with a target RIP"
                ],
        "Inline": ["Any"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "RAOverride": "2"
      },
      "Break BreakDefinition:$Reason": {
        "HasSideEffects": true
      },
      "CallbackReturn": {
        "HasSideEffects": true
      },
      "GPR = Syscall GPR:$SyscallID, GPR:$Arg0, GPR:$Arg1, GPR:$Arg2, GPR:$Arg3, GPR:$Arg4, GPR:$Arg5": {
        "HasSideEffects": true,
        "Desc": ["Dispatches a guest syscall through to the SyscallHandler class"
                ],
        "DestSize": "OpSize::i64Bit"
      },

      "Thunk GPR:$ArgPtr, SHA256Sum:$ThunkNameHash": {
        "HasSideEffects": true
      },

      "GPR:$EAX, GPR:$EBX, GPR:$ECX, GPR:$EDX = CPUID GPR:$Function, GPR:$Leaf": {
        "Desc": ["Calls in to the CPUID handler function to return emulated CPUID"],
        "DestSize": "OpSize::i32Bit",
        "HasSideEffects": true
      },
      "GPR:$EAX, GPR:$EDX = XGetBV GPR:$Function": {
        "Desc": ["Calls in to the XCR handler function to return emulated XCR"],
        "DestSize": "OpSize::i32Bit",
        "HasSideEffects": true
      }
    },
    "Moves": {
      "GPR = Copy GPR:$Source": {
        "Desc": ["GPR copy, generated by RA to split live ranges"],
        "DestSize": "OpSize::i64Bit"
      }
    },
    "StaticRA": {
      "SSA = LoadRegister u32:$Reg, RegisterClass:$Class, OpSize:#Size": {
        "Desc": ["Loads a value from the given register",
                 "Size must match the execution mode."],
        "DestSize": "Size"
      },

      "GPR = LoadPF OpSize:#Size": {
        "Desc": ["Loads raw PF"],
        "DestSize": "Size"
      },

      "GPR = LoadAF OpSize:#Size": {
        "Desc": ["Loads raw PF"],
        "DestSize": "Size"
      },

      "SSA = StoreRegister SSA:$Value, OpSize:#Size": {
         "HasSideEffects": true,
        "Desc": ["Stores a value to a given register.",
                 "Size must match the execution mode."],
        "DestSize": "Size"
      },

      "StorePF GPR:$Value, OpSize:#Size": {
        "HasSideEffects": true,
        "Desc": ["Stores raw PF"],
        "DestSize": "Size"
      },

      "StoreAF GPR:$Value, OpSize:#Size": {
        "HasSideEffects": true,
        "Desc": ["Stores raw AF"],
        "DestSize": "Size"
      }
    },
    "Memory": {
      "SSA = LoadContext OpSize:#ByteSize, RegisterClass:$Class, u32:$Offset": {
        "Desc": ["Loads a value from the context with offset",
                 "Dest = Ctx[Offset]"
                ],
        "DestSize": "ByteSize",
        "EmitValidation": [
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't LoadContext to GPR\"",
          "!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't LoadContext to XMM\""
        ]
      },

      "SSA:$Value1, SSA:$Value2 = LoadContextPair OpSize:#ByteSize, RegisterClass:$Class, u32:$Offset": {
        "Desc": ["Loads a pair of values from the context with offset",
                 "Value0 = Ctx[Offset], Value1 = Ctx[Offset + ByteSize]"
                ],
        "HasSideEffects": true,
        "DestSize": "ByteSize",
        "EmitValidation": [
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't LoadContext to GPR\"",
          "!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't LoadContext to XMM\""
        ]
      },

      "StoreContext OpSize:#ByteSize, RegisterClass:$Class, SSA:$Value, u32:$Offset": {
        "Desc": ["Stores a value to the context with offset",
                 "Ctx[Offset] = Value",
                 "Zero Extends if value's type is too small",
                 "Truncates if value's type is too large"
                ],
        "Inline": ["Zero", ""],
        "HasSideEffects": true,
        "DestSize": "ByteSize",
        "EmitValidation": [
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't StoreContext to GPR\"",
          "!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't StoreContext to XMM\""
        ]
      },

      "StoreContextPair OpSize:#ByteSize, RegisterClass:$Class, SSA:$Value1, SSA:$Value2, u32:$Offset": {
        "Desc": ["Stores a pair of values to the context with offset",
                 "Ctx[Offset] = Value1, Ctx[Offset + ByteSize] = Value2",
                 "Zero Extends if value's type is too small",
                 "Truncates if value's type is too large"
                ],
        "HasSideEffects": true,
        "DestSize": "ByteSize",
        "EmitValidation": [
          "WalkFindRegClass($Value1) == $Class",
          "WalkFindRegClass($Value2) == $Class",
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($Offset >= offsetof(Core::CPUState, gregs[0]) && $Offset < offsetof(Core::CPUState, gregs[16])) && \"Can't StoreContext to GPR\"",
          "!($Offset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $Offset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't StoreContext to XMM\""
        ]
      },

      "SSA = LoadContextIndexed GPR:$Index, OpSize:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": {
        "Desc": ["Loads a value from the context with offset and indexed by SSA value",
                 "Dest = Ctx[BaseOffset + Index * Stride]"
                ],
        "DestSize": "ByteSize",
        "EmitValidation": [
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($BaseOffset >= offsetof(Core::CPUState, gregs[0]) && $BaseOffset < offsetof(Core::CPUState, gregs[16])) && \"Can't LoadContextIndexed to GPR\"",
          "!($BaseOffset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $BaseOffset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't LoadContextIndexed to XMM\""
        ]
      },
      "StoreContextIndexed SSA:$Value, GPR:$Index, OpSize:#ByteSize, u32:$BaseOffset, u32:$Stride, RegisterClass:$Class": {
        "HasSideEffects": true,
        "Desc": ["Stores a value to the context with offset and indexed by SSA value",
                 "Ctx[BaseOffset + Index * Stride] = Value"
                ],
        "DestSize": "ByteSize",
        "EmitValidation": [
          "WalkFindRegClass($Value) == $Class",
          "($Class == RegClass::GPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit)) || $Class == RegClass::FPR",
          "($Class == RegClass::FPR && (#ByteSize == IR::OpSize::i8Bit || #ByteSize == IR::OpSize::i16Bit || #ByteSize == IR::OpSize::i32Bit || #ByteSize == IR::OpSize::i64Bit || #ByteSize == IR::OpSize::i128Bit || #ByteSize == IR::OpSize::i256Bit)) || $Class == RegClass::GPR",
          "!($BaseOffset >= offsetof(Core::CPUState, gregs[0]) && $BaseOffset < offsetof(Core::CPUState, gregs[16])) && \"Can't StoreContextIndexed to GPR\"",
          "!($BaseOffset >= offsetof(Core::CPUState, xmm.avx.data[0]) && $BaseOffset < offsetof(Core::CPUState, xmm.avx.data[16])) && \"Can't StoreContextIndexed to XMM\""
        ]
      },
      "GPR = FormContextAddress OpSize:#Size, GPR:$Index, u32:$Stride": {
        "Desc": ["Forms an address into the context structure indexed by SSA value",
                 "Dest = Ctx + Index * Stride",
                 "This allows backends to compute the address once and reuse it for multiple memory operations",
                 "Stride must be a power of 2"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "#Size == IR::OpSize::i64Bit"
        ]
      },

      "SpillRegister SSA:$Value, u32:$Slot, RegisterClass:$Class": {
        "HasSideEffects": true,
        "Desc": ["Spills an SSA value to memory",
                 "Spill slots are register allocated and has live ranges calculated to handle slot calculation",
                 "!Don't use this op. It is for RA to handle spilling and filling!"
                ],
        "EmitValidation": [
          "WalkFindRegClass($Value) == $Class"
        ]
      },

      "SSA = FillRegister OpSize:#Size, OpSize:#ElementSize, u32:$Slot, RegisterClass:$Class": {
        "Desc": ["Fills a register from a spill slot",
                 "Spill slots are register allocated and has live ranges calculated to handle slot calculation",
                 "!Don't use this op. It is for RA to handle spilling and filling!"
                ],
        "DestSize": "Size",
        "ElementSize": "ElementSize"
      },

      "GPR = LoadNZCV": {
        "Desc": ["Loads value of NZCV register"],
        "DestSize": "OpSize::i32Bit"
      },

      "StoreNZCV GPR:$Value": {
        "HasSideEffects": true,
        "Desc": ["Stores value to NZCV register"],
        "DestSize": "OpSize::i32Bit"
      },

      "GPR = LoadDF": {
        "Desc": ["Loads the decimal flag from the context object in -1/1",
                  "representation for easy consumption"
                ],
        "DestSize": "OpSize::i64Bit"
      },

      "SSA = LoadMem RegisterClass:$Class, OpSize:#Size, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Inline": ["", "Mem"],
        "DestSize": "Size"
      },

      "SSA:$Value1, SSA:$Value2 = LoadMemPair RegisterClass:$Class, OpSize:#Size, GPR:$Addr, u32:$Offset": {
        "Desc": ["Load a pair of values from memory."],
        "DestSize": "Size",
        "HasSideEffects": true
      },

      "StoreMem RegisterClass:$Class, OpSize:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": [ "Stores a value to memory.",
                  "Zero Extends if value's type is too small",
                  "Truncates if value's type is too large"
                ],
        "Inline": ["Zero", "", "Mem"],
        "HasSideEffects": true,
        "DestSize": "Size"
      },

      "StoreMemPair RegisterClass:$Class, OpSize:#Size, SSA:$Value1, SSA:$Value2, GPR:$Addr, u32:$Offset": {
        "Desc": [ "Stores a pair of values to memory.",
                  "Zero Extends if value's type is too small",
                  "Truncates if value's type is too large"
                ],
        "Inline": ["Zero", "Zero"],
        "HasSideEffects": true,
        "DestSize": "Size"
      },

      "StoreMemX87SVEOptPredicate OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Value, GPR:$Addr": {
        "Desc": [ "Stores a value to memory using SVE predicate mask that's designed",
                  "specifically for use in the X87 SVE Ldst optimization." ],
        "DestSize": "RegisterSize",
        "HasSideEffects": true,
        "ElementSize": "ElementSize"
      },
      "FPR = LoadMemX87SVEOptPredicate OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Addr": {
        "Desc": [ "Loads a value to memory using SVE predicate mask that's designed",
                  "specifically for use in the X87 SVE Ldst optimization." ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "SSA = LoadMemTSO RegisterClass:$Class, OpSize:#Size, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": ["Does a x86 TSO compatible load from memory. Offset must be Invalid()."
                ],
        "Inline": ["", "Memtso"],
        "DestSize": "Size"
      },

      "StoreMemTSO RegisterClass:$Class, OpSize:#Size, SSA:$Value, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": ["Does a x86 TSO compatible store to memory. Offset must be Invalid()."
                ],
        "Inline": ["Zero", "", "Memtso"],
        "HasSideEffects": true,
        "DestSize": "Size"
      },

      "FPR = VLoadVectorMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Mask, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": ["Does a masked load similar to VPMASKMOV/VMASKMOV where the upper bit of each element",
                 "determines whether or not that element will be loaded from memory"],
        "ImplicitFlagClobber": true,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "VStoreVectorMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Mask, FPR:$Data, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": ["Does a masked store similar to VPMASKMOV/VMASKMOV where the upper bit of each element",
                 "determines whether or not that element will be stored to memory"],
        "HasSideEffects": true,
        "ImplicitFlagClobber": true,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VLoadVectorGatherMasked OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Incoming, FPR:$Mask, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, OpSize:$VectorIndexElementSize, u8:$OffsetScale, u8:$DataElementOffsetStart, u8:$IndexElementOffsetStart, OpSize:$AddrSize": {
        "Desc": [
          "Does a masked load similar to VPGATHERD* where the upper bit of each element",
          "determines whether or not that element will be loaded from memory.",
          "Most of VSIB encoding is passed directly through to the IR operation."
        ],
        "TiedSource": 0,
        "ImplicitFlagClobber": true,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "$VectorIndexElementSize == OpSize::i32Bit || $VectorIndexElementSize == OpSize::i64Bit"
        ]
      },
      "FPR = VLoadVectorGatherMaskedQPS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Incoming, FPR:$MaskReg, GPR:$AddrBase, FPR:$VectorIndexLow, FPR:$VectorIndexHigh, u8:$OffsetScale, OpSize:$AddrSize": {
        "Desc": [
          "Does a masked load similar to VPGATHERQPS where the upper bit of each element",
          "determines whether or not that element will be loaded from memory.",
          "Most of VSIB encoding is passed directly through to the IR operation.",
          "Only supports the case of 32-bit data element sizes from 64-bit addresses"
        ],
        "TiedSource": 0,
        "ImplicitFlagClobber": true,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "ElementSize == OpSize::i32Bit",
          "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\""
        ]
      },
      "FPR = VLoadVectorElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$DstSrc, u8:$Index, GPR:$Addr": {
        "Desc": ["Does a memory load to a single element of a vector.",
                 "Leaves the rest of the vector's data intact.",
                 "Matches arm64 ld1 semantics"],
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "VStoreVectorElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Value, u8:$Index, GPR:$Addr": {
        "Desc": ["Does a memory store of a single element of a vector.",
                 "Matches arm64 st1 semantics"],
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VBroadcastFromMem OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Address": {
        "Desc": ["Broadcasts an ElementSize value from memory into each element of a vector."],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "GPR = Push OpSize:#Size, OpSize:$ValueSize, GPR:$Value, GPR:$Addr": {
        "Desc": [
          "Pushes a value to the address, returning the new pointer after incrementing.",
          "The address is decremented by the value size while.",
          "The return value size is the size of the current operating mode"
        ],
        "TiedSource": 1,
        "HasSideEffects": true,
        "DestSize": "Size"
      },
      "PushTwo OpSize:#Size, OpSize:$ValueSize, GPR:$Value1, GPR:$Value2, GPR:$Addr": {
        "Desc": [
          "Push two values to the address, incrementing the pointer in the place.",
          "Fused post-RA so doesn't have a destination."
        ],
        "HasSideEffects": true
      },
      "GPR = RMWHandle GPR:$Value": {
        "Desc": [
          "This is a special move that indicates the result will be poisoned by a non-SSA instruction writing to its result.",
          "In effect, it serves to prevent invalid optimizations with non-SSA instructions."
        ],
        "DestSize": "OpSize::i64Bit",
        "HasSideEffects": true,
        "TiedSource": 0
      },
      "GPR:$Addr, GPR:$Value = Pop OpSize:$Size, GPR:$Addr": {
        "Desc": [
          "Pops a value from the address, updating the new pointer after incrementing.",
          "The address is incremented by the size via an RMW source/destintaion."
        ],
        "HasSideEffects": true,
        "DestSize": "Size"
      },
      "GPR:$Addr, GPR:$Value1, GPR:$Value2 = PopTwo OpSize:$Size, GPR:$Addr": {
        "Desc": ["Pop two values from the address. Fused post-RA."],
        "HasSideEffects": true,
        "DestSize": "Size"
      },
      "GPR = MemSet i1:$IsAtomic, OpSize:$Size, GPR:$Prefix, GPR:$Addr, GPR:$Value, GPR:$Length, GPR:$Direction": {
        "Desc": ["Duplicates behaviour of x86 STOS repeat",
                 "Returns the final address that gets generated without the prefix appended."
                ],
        "Inline": ["", "", "Zero", "", "Any"],
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "GPR:$DstAddress, GPR:$SrcAddress = MemCpy i1:$IsAtomic, OpSize:$Size, GPR:$Dest, GPR:$Src, GPR:$Length, GPR:$Direction": {
        "Desc": ["Duplicates behaviour of x86 MOVS repeat",
                 "Returns the final addresses after they have been incremented or decremented"
                ],
        "Inline": ["", "", "", "Any"],
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "CacheLineClear GPR:$Addr, i1:$Serialize": {
        "Desc": ["Does a 64 byte cacheline clear at the address specified",
                 "Only clears the data cachelines. Doesn't do any zeroing",
                 "Can skip serialization if requested."
                ],
        "HasSideEffects": true
      },
      "CacheLineClean GPR:$Addr": {
        "Desc": ["Does a 64 byte cacheline cleanat the address specified",
                 "Only cleans the data cachelines. Doesn't do any zeroing",
                 "Skips the invalidation step of the CacheLineClear operation"
                ],
        "HasSideEffects": true
      },
      "CacheLineZero GPR:$Addr": {
        "Desc": ["Does a 64 byte zero at the address specified",
                 "Writing zeroes to memory",
                 "It is specifically non-temporal and weakly ordered",
                 "This matches CLZero behaviour"
                ],
        "HasSideEffects": true
      },
      "Fence FenceType:$Fence": {
        "Desc": ["Does a memory fence operation of the desired type",
                 "FenceType::Load: Ensures load memory operations are serialized",
                 "FenceType::Store: Ensures store memory operations are serialized",
                 "FenceType::LoadStore: Ensures loads and store memory operations are serialized",
                 "FenceType::Inst: Instruction barrier. Ensures all instructions after this point will be explicitly fetched",
                 "Ensures the memory operations are globally visible"
                ],
        "HasSideEffects": true
      },
      "Prefetch i1:$ForStore, i1:$Stream, i8:$CacheLevel, GPR:$Addr, GPR:$Offset, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": ["Does a cacheline prefetch operation"
                ],
        "Inline": ["", "Mem"],
        "EmitValidation": ["CacheLevel > 0 && CacheLevel < 4"],
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "VStoreNonTemporal OpSize:#RegisterSize, FPR:$Value, GPR:$Addr, i8:$Offset": {
        "Desc": ["Does a non-temporal memory store of a vector.",
                 "Matches arm64 SVE stnt1b semantics.",
                 "Specifically weak-memory model ordered to match x86 non-temporal stores."
        ],
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit",
          "Offset % IR::OpSizeToSize(RegisterSize) == 0"
        ]
      },
      "VStoreNonTemporalPair OpSize:#RegisterSize, FPR:$ValueLow, FPR:$ValueHigh, GPR:$Addr, i8:$Offset": {
        "Desc": ["Does a non-temporal memory store of two vector registers.",
                 "Matches arm64 stnp semantics.",
                 "Specifically weak-memory model ordered to match x86 non-temporal stores."
        ],
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i128Bit",
          "Offset % IR::OpSizeToSize(RegisterSize) == 0"
        ]
      },
      "FPR = VLoadNonTemporal OpSize:#RegisterSize, GPR:$Addr, i8:$Offset": {
        "Desc": ["Does a non-temporal memory load of a vector.",
                 "Matches arm64 SVE ldnt1b semantics.",
                 "Specifically weak-memory model ordered to match x86 non-temporal stores."
        ],
        "HasSideEffects": true,
        "DestSize": "RegisterSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i256Bit",
          "Offset % IR::OpSizeToSize(RegisterSize) == 0"
        ]
      },
      "ContextClear u32:$Offset, u32:$Size": {
        "Desc": [
          "Clears a region of the context by CLZero size",
          "Both the offset and size alignment need to be by CLZero size"
        ],
        "HasSideEffects": true,
        "EmitValidation": [
          "Offset % 64 == 0",
          "Size % 64 == 0"
        ]
      }
    },
    "Atomic": {
      "GPR = CAS OpSize:#Size, GPR:$Expected, GPR:$Desired, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Does a compare and swap of values to a memory location",
                 "This mostly matches the C++ atomic_compare_exchange_strong function",
                 "Dest = atomic_compare_exchange_strong(%Addr, %Expected, %Desired)",
                 "Depending on if the value in %Addr is Expected the results destination will be different",
                 "Behaves like the following but atomically",
                 "Dest = %Expected",
                 "if (deref(%Addr) != %Expected) Dest = deref(%Addr)"
                ],
        "TiedSource": 0,
        "DestSize": "Size",
        "ImplicitFlagClobber": true,
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR:$Lo, GPR:$Hi = CASPair OpSize:#Size, GPR:$ExpectedLo, GPR:$ExpectedHi, GPR:$DesiredLo, GPR:$DesiredHi, GPR:$Addr": {
        "Desc": ["Does a compare and exchange with two pairs of values",
                 "ssa0 is the comparison value",
                 "ssa1 is the new value",
                 "ssa2 is the memory location",
                 "Returns the lower & upper halves of the value in memory."
                ],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicSwap OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer swap"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchAdd OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and add",
                 "Atomically fetches %Addr and adds %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchSub OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and sub",
                 "Atomically fetches %Addr and subtracts %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchAnd OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and binary and",
                 "Atomically fetches %Addr and binary ands %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchCLR OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and binary clear",
                 "Atomically fetches %Addr and binary clears %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "Matches ARM ldclral semantics",
                 "eg: Dest[Addr] &= ~Value",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchOr OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and binary or",
                 "Atomically fetches %Addr and binary ors %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchXor OpSize:#Size, GPR:$Value, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and binary exclusive or",
                 "Atomically fetches %Addr and binary exclusive ors %value to the memory location",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AtomicFetchNeg OpSize:#Size, GPR:$Addr": {
        "HasSideEffects": true,
        "Desc": ["Atomic integer fetch and two's complement negate",
                 "Dest is the value prior to operating on the value in memory",
                 "IR layout must match NonFetch-variant, otherwise DCE IR optimization breaks!"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "TelemetrySetValue GPR:$Value, u8:$TelemetryValueIndex": {
        "HasSideEffects": true,
        "Desc": ["Set Telemetry value if the passed in 32-bit value isn't zero.",
                 "Only useful for 32-bit applications."
                ],
        "ImplicitFlagClobber": true,
        "DestSize": "OpSize::i64Bit"
      }
    },
    "ALU": {
      "GPR = EntrypointOffset OpSize:#Size, i64:$Offset": {
        "Desc": ["Returns the <entrypoint> + Offset address",
                 "When the size is 4 bytes then 32-bit overflow and underflow needs to work"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "InlineEntrypointOffset OpSize:#Size, i64:$Offset": {
        "Desc": ["Returns the <entrypoint> + Offset address",
                 "When the size is 4 bytes then 32-bit overflow and underflow needs to work"
                ],
        "HasSideEffects": true,
        "RAOverride": "0",
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "GPR = Constant i64:$Constant, ConstPad:$Pad{IR::ConstPad::NoPad}, i32:$MaxBytes{0}": {
        "Desc": ["Generates a 64bit constant inside of a GPR",
                 "Unsupported to create a constant in FPR"
                ],
        "DestSize": "OpSize::i64Bit",
        "EmitValidation": [
          "MaxBytes >= 0 && MaxBytes <= 8 && (MaxBytes & 1) == 0",
          "MaxBytes == 0 || (Constant >> (MaxBytes * 8)) == 0"
        ]
      },

      "InlineConstant i64:$Constant": {
        "Desc": ["Generates a 64bit constant to be used directly, non-FPR"],
        "HasSideEffects": true,
        "RAOverride": "0",
        "DestSize": "OpSize::i64Bit"
      },

      "GPR = CycleCounter i1:$SelfSynchronizingLoads": {
        "Desc": ["Returns the host 64bit cycle counter",
                 "Useful when emulating rdtsc",
                 "Be careful, the frequency of this counter changes based on host",
                 "On AArch64 make sure to query the CNTFRQ_EL0 system register to get the frequency",
                 "On x86-64 make sure to query CPUID fn8000_0008[EDX_8] for constant TSC",
                 "x86-64 constant frequency lives in MSR_PLATFORM_INFO. Which is only available to kernel",
                 "Part of the ART frequency equation can be pulled from CPUID fn0000_0015[EBX & EAX]",
                 "But it's missing the ART multiplier still?",
                 "If the self-synchronizing flag is toggled then all instructions and loads must be completed before the cycle counter read"
                ],
        "DestSize": "OpSize::i64Bit"
      },

      "GPR = Neg OpSize:#Size, GPR:$Src, CondClass:$Cond{CondClass::AL}": {
        "Desc": ["Integer negation, with optional predication",
                 "Dest = Cond ? -Src : Src",
                 "Will truncate to 64 or 32bits"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Not OpSize:#Size, GPR:$Src": {
        "Desc": ["Integer binary not",
                 "op:",
                 "Dest = ~Src"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Popcount OpSize:#Size, GPR:$Src": {
        "Desc": ["Population count of source register",
                 "Returns the number of bits set"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = FindLSB OpSize:#Size, GPR:$Src": {
        "Desc": ["Find least-significant-bit set",
                 "Returns the index of the least significant bit set",
                 "Undefined result if Src is zero."
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = FindMSB OpSize:#Size, GPR:$Src": {
        "Desc": ["Find most-significant-bit set",
                 "Returns the index of the most significant bit set",
                 "Undefined result if Src is zero."
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = FindTrailingZeroes OpSize:#Size, GPR:$Src": {
        "Desc": ["Counts the number of trailing zero bits in a GPR",
                 "Returns the number of bits that are zero trailing",
                 "In the case of zero returns the size in bits of the input"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = CountLeadingZeroes OpSize:#Size, GPR:$Src": {
        "Desc": ["Counts the number of leading zero bits in a GPR",
                 "Returns the number of bits that are zero leading",
                 "In the case of zero returns the size in bits of the input"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Rev OpSize:#Size, GPR:$Src": {
        "Desc": ["Reverses the byte order of the register",
                 "Specifically 8bit byte swap size. (Not 16bit or 32bit word swapping)"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i16Bit || Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Rbit OpSize:#Size, GPR:$Src": {
        "Desc": ["Reverses the bit order of the register"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Add OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer Add",
                  "Will truncate to 64 or 32bits"
                ],
        "Inline": ["", "LargeAddSub"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Adc OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer Add with carry",
                  "Will truncate to 64 or 32bits"
                ],
        "Inline": ["Zero", ""],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Sbb OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer Subtract with carry/borrow",
                  "Will truncate to 64 or 32bits"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AddShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
        "Desc": [ "Integer Add with shifted register",
                  "Will truncate to 64 or 32bits",
                  "Dest = Src1 + (Src2 << ShiftAmount)"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "Shift != ShiftType::ROR"
        ]
      },
      "GPR = AddWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer add. Truncates and sets NZCV per AddNZCV"],
        "Inline": ["", "LargeAddSub"],
        "DestSize": "Size",
        "HasSideEffects": true,
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "AddNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the sum of two GPRs"],
        "Inline": ["", "LargeAddSub"],
        "HasSideEffects": true,
        "DestSize": "Size"
      },
      "SetSmallNZV OpSize:#Size, GPR:$Src": {
        "Desc": ["Set NZV with a SETF instruction. Preserves CF."],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i8Bit || Size == FEXCore::IR::OpSize::i16Bit"
        ]
      },
      "CarryInvert": {
        "Desc": ["Invert carry flag in NZCV"],
        "HasSideEffects": true
      },
      "AXFlag GPR:$V_inv": {
        "Desc": ["After an FCmp, converts NZCV flags from the Arm format to a mysterious eXternal format",
                 "On FlagM2-less platforms, takes the inverted 1/0 overflow flag"],
        "HasSideEffects": true
      },
      "GPR = Parity GPR:$Raw, i1:$Mask, i1:$Invert": {
        "Desc": ["Calculates PF"],
        "DestSize": "OpSize::i32Bit"
      },
      "RmifNZCV GPR:$Src, u8:$Rotate, u8:$Mask": {
        "Desc": ["Rotate, mask, and insert into NZCV on FlagM platforms"],
        "Inline": ["Zero", ""],
        "HasSideEffects": true
      },
      "CondAddNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2, CondClass:$Cond, u8:$FalseNZCV": {
        "Desc": ["If condition is true, set NZCV per sum of GPRs, else force NZCV to a constant."],
        "Inline": ["Zero", "AddSub"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "CondSubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2, CondClass:$Cond, u8:$FalseNZCV": {
        "Desc": ["If condition is true, set NZCV per difference of GPRs, else force NZCV to a constant."],
        "Inline": ["Zero", "AddSub"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AdcWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Adds and set NZCV for the sum of two GPRs and carry-in given as NZCV"],
        "Inline": ["Zero", ""],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AdcZero OpSize:#Size, GPR:$Src1": {
        "Desc": ["Adds GPR with inverted carry-in"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AdcZeroWithFlags OpSize:#Size, GPR:$Src1": {
        "Desc": ["Adds and set NZCV for the sum of GPR and inverted carry-in given as NZCV"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = SbbWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Subtracts and set NZCV for the difference of two GPRs and carry-in given as NZCV"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "AdcNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the sum of two GPRs and carry-in given as NZCV"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "SbbNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the difference of two GPRs and carry-in given as NZCV"],
        "HasSideEffects": true,
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Sub OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer Sub",
                  "Will truncate to 64 or 32bits"
                ],
        "Inline": ["SubtractZero", "LargeAddSub"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = SubShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
        "Desc": [ "Integer Sub with shifted register",
                  "Will truncate to 64 or 32bits"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "Shift != ShiftType::ROR"
        ]
      },
      "GPR = SubWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": [ "Integer Sub. Truncates and sets NZCV per SubNZCV"],
        "Inline": ["SubtractZero", "LargeAddSub"],
        "DestSize": "Size",
        "HasSideEffects": true,
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "CmpPairZ OpSize:#Size, GPR:$Src1Lo, GPR:$Src1Hi, GPR:$Src2Lo, GPR:$Src2Hi": {
        "Desc": ["Compares register pairs and sets Z accordingly, preserving N/Z/V.",
                 "This accelerates cmpxchg."],
        "HasSideEffects": true,
        "DestSize": "Size"
      },
      "SubNZCV OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the difference of two GPRs. ",
                 "Carry flag uses arm64 definition, inverted x86.",
                 ""],
        "Inline": ["Zero", "LargeAddSub"],
        "DestSize": "Size",
        "HasSideEffects": true
      },
      "GPR = Or OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer binary or"
                ],
        "DestSize": "Size",
        "Inline": ["", "Logical"],
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Orlshl OpSize:#Size, GPR:$Src1, GPR:$Src2, u8:$BitShift": {
        "Desc": ["Integer binary or with logical shift left"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Orlshr OpSize:#Size, GPR:$Src1, GPR:$Src2, u8:$BitShift": {
        "Desc": ["Integer binary or with logical shift right"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Ornror OpSize:#Size, GPR:$Src1, GPR:$Src2, u8:$BitShift": {
        "Desc": ["Integer binary or with NOT on second source and rotation right"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Xor OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer binary exclusive or"],
        "Inline": ["", "Logical"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = XorShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
        "Desc": [ "Integer binary exclusive or with shifted register"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = XornShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
        "Desc": [ "Integer binary exclusive or not with shifted register"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = And OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer binary and"],
        "Inline": ["", "Logical"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AndShift OpSize:#Size, GPR:$Src1, GPR:$Src2, ShiftType:$Shift{ShiftType::LSL}, u8:$ShiftAmount{0}": {
        "Desc": [ "Integer binary and with shifted register"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = AndWithFlags OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer binary and"
                ],
        "Inline": ["", "Logical"],
        "DestSize": "Size",
        "TiedSource": 0,
        "HasSideEffects": true
      },
      "GPR = Andn OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer binary AND NOT. Performs the equivalent of Src1 & ~Src2"],
        "DestSize": "Size",
        "Inline": ["", "Logical"],
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "TestNZ OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the binary AND of two GPRs, setting N and Z accordingly and zeroing C and V"],
        "Inline": ["", "Logical"],
        "DestSize": "Size",
        "HasSideEffects": true
      },
      "TestZ OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Set NZCV for the binary AND of two GPRs, setting Z accordingly and zeroing C and V. N is undefined."],
        "DestSize": "Size",
        "HasSideEffects": true
      },
      "GPR = Lshl OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer logical shift left"],
        "Inline": ["", "Any"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Lshr OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer logical shift right"],
        "Inline": ["", "Any"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Ashr OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer arithmetic shift right"],
        "Inline": ["", "Any"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = ShiftFlags OpSize:$Size, GPR:$Result, GPR:$Src1, ShiftType:$Shift, GPR:$Src2, GPR:$PFInput, i1:$InvertCF": {
        "Desc": ["Set NZCV flags for specified variable integer shift with given result.",
                 "Returns updated raw PF."],
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "RotateFlags OpSize:$Size, GPR:$Result, GPR:$Shift, i1:$Left": {
        "Desc": ["Set NZCV flags for specified variable integer rotate with given result."],
        "HasSideEffects": true
      },
      "GPR = Ror OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer rotate right"],
        "Inline": ["", "Any"],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Mul OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer signed multiplication"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = UMul OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer unsigned multiplication"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = UMull GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer unsigned multiplication long",
                 "Multiplies two 32-bit numbers, returning a 64-bit destination register."
                ],
        "DestSize": "FEXCore::IR::OpSize::i64Bit"
      },
      "GPR = SMull GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer signed multiplication long",
                 "Multiplies two 32-bit numbers, returning a 64-bit destination register."
                ],
        "DestSize": "FEXCore::IR::OpSize::i64Bit"
      },
      "GPR = MulH OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer signed multiply returning high results",
                 "op:",
                 "Tmp <size * 2> = Src1 * Src2;",
                 "Dest = Tmp >> (size * 8);"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = UMulH OpSize:#Size, GPR:$Src1, GPR:$Src2": {
        "Desc": ["Integer unsigned multiply returning high results",
                 "op:",
                 "Tmp <size * 2> = Src1 * Src2;",
                 "Dest = Tmp >> (size * 8);"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Bfi OpSize:#Size, u8:$Width, u8:$lsb, GPR:$Dest, GPR:$Src": {
        "Desc": ["Copies a bitfield from one GPR to another",
                 "The source bitfield is from Src[Width:0]",
                 "The bitfield is copied in to Dest[(Width + lsb):lsb]"
                ],
        "DestSize": "Size",
        "TiedSource": 0,
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "(Width + lsb) <= IR::OpSizeAsBits(Size)"
        ]
      },
      "GPR = Bfxil OpSize:#Size, u8:$Width, u8:$lsb, GPR:$Dest, GPR:$Src": {
        "Desc": ["Copies a bitfield from one GPR to another",
                 "Inserting in to the low bits of the destination",
                 "The source bitfield is from Src[(Width + lsb):lsb]",
                 "The bitfield is copied in to Dest[Width:0]"
                ],
        "DestSize": "Size",
        "TiedSource": 0,
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "(Width + lsb) <= IR::OpSizeAsBits(Size)"
        ]
      },
      "GPR = Bfe OpSize:#Size, u8:$Width, u8:$lsb, GPR:$Src": {
        "Desc": ["Extracts a bitfield from one GPR with zext",
                 "The source bitfield is from Src[Width:0]",
                 "The bitfield is then zero extended"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "(Width + lsb) <= IR::OpSizeAsBits(Size)"
        ]
      },
      "GPR = Sbfe OpSize:#Size, u8:$Width, u8:$lsb, GPR:$Src": {
        "Desc": ["Extracts a bitfield from one GPR with sext",
                 "The source bitfield is from Src[Width:0]",
                 "The bitfield is then sign extended"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit",
          "(Width + lsb) <= IR::OpSizeAsBits(Size)"
        ]
      },
      "GPR = NZCVSelect OpSize:#ResultSize, CondClass:$Cond, GPR:$TrueVal, GPR:$FalseVal": {
        "Desc": ["Select based on value in NZCV flags",
                 "op:",
                 "Dest = Cond ? TrueVal : FalseVal"
                ],
        "DestSize": "ResultSize",
        "EmitValidation": [
          "ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "FPR = NZCVSelectV OpSize:#ResultSize, CondClass:$Cond, FPR:$TrueVal, FPR:$FalseVal": {
        "Desc": [
          "Select based on value in NZCV flags, where TrueVal and FalseVal are both FPRs.",
          "op:",
          "Dest = Cond ? TrueVal : FalseVal"
        ],
        "DestSize": "ResultSize"
      },
      "GPR = NZCVSelectIncrement OpSize:#ResultSize, CondClass:$Cond, GPR:$TrueVal, GPR:$FalseVal": {
        "Desc": ["Select and increment based on value in NZCV flags",
                 "op:",
                 "Dest = Cond ? TrueVal : (FalseVal + 1)"
                ],
        "DestSize": "ResultSize",
        "EmitValidation": [
          "ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = Select OpSize:#ResultSize, OpSize:$CompareSize, CondClass:$Cond, SSA:$Cmp1, SSA:$Cmp2, GPR:$TrueVal, GPR:$FalseVal": {
        "Desc": ["Ternary selection of GPRs",
                 "op:",
                 "Dest = Cmp1 <Cond> Cmp2 ? TrueVal : FalseVal"
                ],
        "Inline": ["", "AddSub", "", ""],
        "DestSize": "ResultSize",
        "ImplicitFlagClobber": true,
        "EmitValidation": [
          "CompareSize == FEXCore::IR::OpSize::i32Bit || CompareSize == FEXCore::IR::OpSize::i64Bit || CompareSize == FEXCore::IR::OpSize::i128Bit",
          "ResultSize == FEXCore::IR::OpSize::i32Bit || ResultSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = MaskGenerateFromBitWidth GPR:$BitWidth": {
        "Desc": ["Generates a bit mask from with a value from [0, 63]",
                 "0 is special cased to full-mask",
                 "Special operation for SSE4a bitmask generation."
        ],
        "DestSize": "FEXCore::IR::OpSize::i64Bit",
        "ImplicitFlagClobber": true
      },

      "GPR = Extr OpSize:#Size, GPR:$Upper, GPR:$Lower, u8:$LSB": {
        "Desc": ["Concats the two GPRs to create a value that is the size of the full two GPRs",
                 "It then extracts a bitfield width that size of a GPR from the LSB",
                 "Valid LSB range is 0-31 for 32bit and 0-63 for 64bit",
                 "<Size * 2> ConcatValue = $Upper:$Lower",
                 "Result = ConcatValue<LSB+Size - 1: LSB>"
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },
      "GPR = PDep OpSize:#Size, GPR:$Input, GPR:$Mask": {
        "Desc": ["Performs a parallel bit deposit.",
                 "Takes the contiguous low-order bits and deposits them into",
                 "the destination at the locations specified by the Mask."
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "GPR = PExt OpSize:#Size, GPR:$Input, GPR:$Mask": {
        "Desc": ["Performs a parallel bit extract.",
                 "Each bit set in the mask will select the corresponding bit in the Input",
                 "and transfers them to the lower contiguous bits in the destination."
                ],
        "DestSize": "Size",
        "EmitValidation": [
          "Size == FEXCore::IR::OpSize::i32Bit || Size == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "GPR:$Quotient, GPR:$Remainder = Div OpSize:#Size, GPR:$Lower, GPR:$Upper, GPR:$Divisor": {
        "Desc": ["Integer long signed division returning lower bits",
                 "The Lower and Upper registers will be concated together to generate a dividend twice the size",
                 "Then the divisor divides the temporary dividend and returns the results in the original sized register",
                 "If Upper is invalid, this is a non-long division."
                ],
        "DestSize": "Size",
        "HasSideEffects": true
      },
      "GPR:$Quotient, GPR:$Remainder = UDiv OpSize:#Size, GPR:$Lower, GPR:$Upper, GPR:$Divisor": {
        "Desc": ["Integer long unsigned division returning lower bits",
                 "The Lower and Upper registers will be concated together to generate a dividend twice the size",
                 "Then the divisor divides the temporary dividend and returns the results in the original sized register",
                 "If Upper is invalid, this is a non-long division."
                ],
        "DestSize": "Size",
        "HasSideEffects": true
      },

      "Float to GPR": {"Ignore": 1},
      "GPR = VExtractToGPR OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$Index": {
        "Desc": ["Extracts an element from a vector and places it in a GPR",
                 "The element that is extracted from the vector is zero extended to the GPR size"
                ],
        "DestSize": "ElementSize"
      },

      "GPR = Float_ToGPR_S OpSize:#DestElementSize, OpSize:$SrcElementSize, FPR:$Scalar": {
        "Desc": ["Moves the scalar element to a GPR with conversion",
                 "Converts the 32bit or 64bit float to an signed integer",
                 "Rounding mode determined by host flag's rounding mode"
                ],
        "DestSize": "DestElementSize"
      },

      "GPR = Float_ToGPR_ZS OpSize:#DestElementSize, OpSize:$SrcElementSize, FPR:$Scalar": {
        "Desc": ["Moves the scalar element to a GPR with conversion",
                 "Converts the 32bit or 64bit float to an signed integer rounding towards zero (Truncating)"
                ],
        "DestSize": "DestElementSize"
      },

      "FCmp OpSize:$ElementSize, FPR:$Scalar1, FPR:$Scalar2": {
        "Desc": ["Does a scalar unordered compare and sets NZCV accordingly.",
                 "NZCV follows Arm conventions, a separate AXFLAG instruction is required for x86",
                 "Ordering flag result is true if either float input is NaN"
                ],
        "HasSideEffects": true
      }
    },
    "VectorScalar": {
      "FPR = VFAddScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'add' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFSubScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'sub' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFMulScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'mul' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFDivScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'div' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFMinScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'min' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics.",
                 "Additionally matches x86 zero and NaN semantics",
                 "If both source operands are zero, return the second operand (in the case of negative and positive zero)",
                 "If either source operand is NaN then return the second operand."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "ImplicitFlagClobber": true
      },
      "FPR = VFMaxScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'max' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics.",
                 "Additionally matches x86 zero and NaN semantics",
                 "If both source operands are zero, return the second operand (in the case of negative and positive zero)",
                 "If either source operand is NaN then return the second operand."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "ImplicitFlagClobber": true
      },
      "FPR = VFSqrtScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'sqrt' on Vector2, inserting in to Vector1 and storing in to the destination.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFRSqrtScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'rsqrt' on Vector2, inserting in to Vector1 and storing in to the destination.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFRecpScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'recip' on Vector2, inserting in to Vector1 and storing in to the destination.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFToFScalarInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'cvt' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "DstElementSize"
      },
      "FPR = VSToFVectorInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector1, FPR:$Vector2, i8:$HasTwoElements, i1:$ZeroUpperBits": {
        "Desc": ["Does a Vector 'scvt' between Vector1 and Vector2.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics.",
                 "HasTwoElements is slightly different than most of these scalar operations.",
                 "Handles the edge case of cvtpi2ps xmm0, mm0 which is two elements in the lower 64-bits"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "DstElementSize"
      },
      "FPR = VSToFGPRInsert OpSize:#RegisterSize, OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Vector, GPR:$Src, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'cvt' between Vector1 and GPR.",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "DstElementSize"
      },
      "FPR = VFToIScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, RoundType:$Round, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar round float to integral on Vector2, inserting in to Vector1 and storing in to the destination.",
                 "Rounding mode determined by argument",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FloatCompareOp:$Op, i1:$ZeroUpperBits": {
        "Desc": ["Does a scalar 'cmp' between Vector1 and Vecto2, inserting in to Vector1 and storing in to the destination.",
                 "Compare op determined by argument",
                 "Inserting the result in to the lower element of Vector1 and returning the results.",
                 "If ZeroUpperBits is set then in a 256-bit wide operation it will zero the upper 128-bits of the destination.",
                 "For 128-bit operation this matches SSE insert semantics.",
                 "For 256-bit operation with ZeroUpperBits, this matches AVX insert semantics."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFMLAScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (Vector1 * Vector2) + Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.",
          "Upper elements copied from Upper"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VFMLSScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (Vector1 * Vector2) - Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.",
          "Upper elements copied from Upper"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VFNMLAScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (-Vector1 * Vector2) + Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.",
          "Upper elements copied from Upper"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VFNMLSScalarInsert OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Upper, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (-Vector1 * Vector2) - Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending.",
          "Upper elements copied from Upper"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VFCopySign OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1,  FPR:$Vector2": {
        "Desc": ["Returns a vector where each element has has the magniture of each corresponding element in vector1 and the sign of vector 2."],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      }
    },
    "Vector": {
      "FPR = VMov OpSize:#RegisterSize, FPR:$Source": {
        "Desc" : ["Copy vector register",
                  "When Register size is smaller than Source register size,",
                  "this op is defined to truncate and zero extend"
                 ],
        "DestSize": "RegisterSize"
      },

      "FPR = VectorImm OpSize:#RegisterSize, OpSize:#ElementSize, u8:$Immediate, u8:$ShiftAmount{0}": {
        "Desc": ["Generates a vector with each element containg the immediate zexted"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = LoadNamedVectorConstant OpSize:#RegisterSize, NamedVectorConstant:$Constant": {
        "Desc": ["Load a named vector constant.",
                 "The list of vector constants can be found in <FEXCore/IR/IR.h>"
                ],
        "DestSize": "RegisterSize"
      },
      "FPR = LoadNamedVectorIndexedConstant OpSize:#RegisterSize, IndexNamedVectorConstant:$Constant, u32:$Index": {
        "Desc": ["Load a named vector constant from Indexable table.",
                 "Index needs to be aligned register size.",
                 "The list of indexable vector constants can be found in <FEXCore/IR/IR.h>"
                ],
        "DestSize": "RegisterSize"
      },
      "FPR = VNeg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VNot OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VAbs OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does an signed integer absolute"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VPopcount OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does a popcount for each element of the register"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VAddV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does a horizontal vector add of elements across the source vector",
                 "Result is a zero extended scalar"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUMinV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does a horizontal vector unsigned minimum of elements across the source vector",
                 "Result is a zero extended scalar"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUMaxV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does a horizontal vector unsigned maximum of elements across the source vector",
                 "Result is a zero extended scalar"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFAbs OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VFNeg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VFRecp OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": [
          "Reciprocal value - matches the precision required by the x86 spec.",
          "It has a relative error of at most 1.5 * 2^-12"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFRecpPrecision OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": [
          "Similar to VFRecp but carrying more precision for 3DNow!",
          "It provides at least 14 bits precision, with a relative error of at most 2^-14"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i64Bit || RegisterSize == FEXCore::IR::OpSize::i32Bit",
          "ElementSize == FEXCore::IR::OpSize::i32Bit"
        ]
      },

      "FPR = VFSqrt OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VFRSqrt OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": [
          "Reciprocal Square Root - matches the precision required by the x86 spec.",
          "It has a relative error of at most 1.5 * 2^-12"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFRSqrtPrecision OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": [
          "Similar to VFRSqrt but carrying more precision for 3DNow!",
          "It provides at least 15 bits precision, with a relative error of at most 2^-15"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i64Bit || RegisterSize == FEXCore::IR::OpSize::i32Bit",
          "ElementSize == FEXCore::IR::OpSize::i32Bit"
        ]
      },

      "FPR = VCMPEQZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VCMPGTZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Vector compare signed greater than",
                 "Each element is compared, if the result is true then the resulting element is ~0, else zero",
                 "Compares the vector against zero"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VCMPLTZ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Vector compare signed less than",
                 "Each element is compared, if the result is true then the resulting element is ~0, else zero",
                 "Compares the vector against zero"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VDupElement OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$Index": {
        "Desc": ["Duplicates one element from the source register across the whole register"],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VShlI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i8Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift > 0"
        ]
      },
      "FPR = VUShrI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i8Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift > 0"
        ]
      },
      "FPR = VUShraI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$DestVector, FPR:$Vector, u8:$BitShift": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i8Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift > 0 && BitShift <= IR::OpSizeAsBits(ElementSize)"
        ]
      },
      "FPR = VSShrI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i8Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift > 0"
        ]
      },

      "FPR = VUShrNI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "TiedSource": 0,
        "Desc": "Unsigned shifts right each element and then narrows to the next lower element size",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i16Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift <= IR::OpSizeAsBits(ElementSize)"
        ]
      },

      "FPR = VUShrNI2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$BitShift": {
        "TiedSource": 0,
        "Desc": ["Unsigned shifts right each element and then narrows to the next lower element size",
                 "Inserts results in to the high elements of the first argument"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1",
        "EmitValidation": [
          "ElementSize >= FEXCore::IR::OpSize::i16Bit && ElementSize <= FEXCore::IR::OpSize::i64Bit",
          "BitShift > 0 && BitShift <= IR::OpSizeAsBits(ElementSize)"
        ]
      },
      "FPR = VSXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Sign extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Sign extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSSHLL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": {
        "Desc": "Sign extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSSHLL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift{0}": {
        "Desc": ["Sign extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUXTL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Zero extends elements from the source element size to the next size up",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUXTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Zero extends elements from the source element size to the next size up",
                 "Source elements come from the upper half of the register"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSQXTN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSQXTN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSQXTNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": ["Does both VSQXTN and VSQXTN2 in a combined operation."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSQXTUN OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSQXTUN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSQXTUNPair OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": ["Does both VSQXTUN and VSQXTUN2 in a combined operation."
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1"
      },
      "FPR = VSRSHR OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "Desc": ["Signed rounding shift right by immediate",
                 "Exactly matching Arm64 srshr semantics"
                ],
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSQSHL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, u8:$BitShift": {
        "Desc": ["Signed satuating shift left by immediate",
                 "Exactly matching Arm64 sqshl semantics"
                ],
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VRev32 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc" : ["Reverses elements in 32-bit halfwords",
                  "Available element size: 1byte, 2 byte"
                 ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VRev64 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc" : ["Reverses elements in 64-bit halfwords",
                  "Available element size: 1byte, 2 byte, 4 byte"
                 ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VAnd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i256Bit || RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "FPR = VAndn OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i256Bit || RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "FPR = VOrn OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i256Bit || RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "FPR = VOr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i256Bit || RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "FPR = VXor OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "EmitValidation": [
          "RegisterSize == FEXCore::IR::OpSize::i256Bit || RegisterSize == FEXCore::IR::OpSize::i128Bit || RegisterSize == FEXCore::IR::OpSize::i64Bit"
        ]
      },

      "FPR = VUQAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VUQSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VSQAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VSQSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VAddP OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": "Does a horizontal pairwise add of elements across the two source vectors",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VURAvg OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": ["Does an unsigned rounded average", "dst_elem = (src1_elem + src2_elem + 1) >> 1"],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VZip OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VZip2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUnZip OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUnZip2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VTrn OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VTrn2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VFAdd OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFAddP OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "Desc": "Does a horizontal pairwise add of elements across the two source vectors with float element types",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFAddV OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Does a horizontal float vector add of elements across the source vector",
                 "Result is a zero extended scalar"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFSub OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFMul OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFDiv OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VFMin OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VFMax OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 0
      },
      "FPR = VMul OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSMull OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": [ "Does a signed integer multiply with extend.",
                  "ElementSize is the source size"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Multiplies the high elements with size extension",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VSMull2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Multiplies the high elements with size extension",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUMulH OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Wide unsigned multiply returning the high results",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSMulH OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": "Wide signed multiply returning the high results",

        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUABDL OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": ["Unsigned Absolute Difference Long"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUABDL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": ["Unsigned Absolute Difference Long",
                 "Using the high elements of the source vectors"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1"
      },
      "FPR = VUShl OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUShr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSShr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftVector, i1:$RangeCheck": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUShlS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUShrS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSShrS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUShrSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VSShrSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VUShlSWide OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, FPR:$ShiftScalar": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VInsElement OpSize:#RegisterSize, OpSize:#ElementSize, u8:$DestIdx, u8:$SrcIdx, FPR:$DestVector, FPR:$SrcVector": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VInsGPR OpSize:#RegisterSize, OpSize:#ElementSize, u8:$DestIdx, FPR:$DestVector, GPR:$Src": {
        "TiedSource": 0,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VExtr OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper, u8:$Index": {
        "Desc": ["Concats two vector registers together and extracts a full width register from the element index",
                 "Index is an element index. So it is offset by ElementSize argument",
                 "op:",
                 "TmpVector <RegisterSize *2> = concat(Upper:Lower)",
                 "Dest = TmpVector >> (ElementSize * Index * 8); // Or can be thought of `concat(&TmpVector[Index], i128)`"
                ],
        "TiedSource": 1,
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VCMPEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VCMPGT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "Desc": ["Vector compare signed greater than",
                 "Each element is compared, if the result is true then the resulting element is ~0, else zero"
                ],

        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPNEQ OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPLT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPGT OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPLE OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPORD OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFCMPUNO OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VTBL1 OpSize:#RegisterSize, FPR:$VectorTable, FPR:$VectorIndices": {
        "Desc": ["Does a vector table lookup from one register in to the destination",
                 "Lookup is byte sized per byte element.",
                 "Any index larger than what the registers provide will result in zero for that element",
                 "Table is always treated as a 128bit register",
                 "Indices matches destination size. Either 64bit or 128bit"
                ],
        "DestSize": "RegisterSize"
      },
      "FPR = VTBL2 OpSize:#RegisterSize, FPR:$VectorTable1, FPR:$VectorTable2, FPR:$VectorIndices": {
        "Desc": ["Does a vector table lookup from two registers in to the destination",
                 "Lookup is byte sized per byte element.",
                 "Any index larger than what the registers provide will result in zero for that element",
                 "Table is always treated as a two 128bit registers",
                 "Indices matches destination size. Either 64bit or 128bit",
                 "Careful about not using sequential table registers, will result in some moves if they aren't sequential."
                ],
        "DestSize": "RegisterSize"
      },
      "FPR = VTBX1 OpSize:#RegisterSize, FPR:$VectorSrcDst, FPR:$VectorTable, FPR:$VectorIndices": {
        "Desc": ["Does a vector table lookup from one register in to the destination",
                 "Lookup is byte sized per byte element.",
                 "Any index larger than what the registers provide will result in not modifying that element",
                 "Table is always treated as a 128bit register",
                 "Indices matches destination size. Either 64bit or 128bit"
                ],
        "TiedSource": 0,
        "DestSize": "RegisterSize"
      },
      "FPR = VBSL OpSize:#RegisterSize, FPR:$VectorMask, FPR:$VectorTrue, FPR:$VectorFalse": {
        "Desc": ["Does a vector bitwise select.",
                 "If the bit in the field is 1 then the corresponding bit is pulled from VectorTrue",
                 "If the bit in the field is 0 then the corresponding bit is pulled from VectorFalse"
                ],
        "TiedSource": 0,
        "DestSize": "RegisterSize"
      },

      "GPR = VPCMPESTRX FPR:$LHS, FPR:$RHS, GPR:$RAX, GPR:$RDX, u16:$Control": {
        "Desc": ["Performs intermediate behavior analogous to the x86 PCMPESTRI/PCMPESTRM instruction",
                 "This will return the intermediate result of a PCMPESTR-type operation, but NOT the final",
                 "result. This must be derived from the intermediate result",

                 "NOTE: On top of returning the intermediate result, the returned value also combines the status",
                 "flags into the upper 16-bits of the 32-bit result, as these can also be derived over the",
                 "course of creating the intermediate result"
                ],
        "DestSize": "OpSize::i32Bit",
        "JITDispatch": false
      },
      "GPR = VPCMPISTRX FPR:$LHS, FPR:$RHS, u8:$Control": {
        "Desc": ["Performs intermediate behavior analogous to the x86 PCMPISTRI/PCMPISTRM instruction",
                 "This will return the intermediate result of a PCMPISTR-type operation, but NOT the final",
                 "result. This must be derived from the intermediate result",

                 "NOTE: On top of returning the intermediate result, the returned value also combines the status",
                 "flags into the upper 16-bits of the 32-bit result, as these can also be derived over the",
                 "course of creating the intermediate result"
                ],
        "DestSize": "OpSize::i32Bit",
        "JITDispatch": false
      },
      "FPR = VFCADD OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, u16:$Rotate": {
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = VFMLA OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (Vector1 * Vector2) + Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 2
      },
      "FPR = VFMLS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (Vector1 * Vector2) - Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 2
      },
      "FPR = VFNMLA OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (-Vector1 * Vector2) + Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 2
      },
      "FPR = VFNMLS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector1, FPR:$Vector2, FPR:$Addend": {
        "Desc": [
          "Dest = (-Vector1 * Vector2) - Addend",
          "This explicitly matches x86 FMA semantics because ARM semantics are mind-bending."
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize",
        "TiedSource": 2
      }
    },
    "Conv": {
      "FPR = VCastFromGPR OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Src": {
        "Desc": ["Moves a GPR to a Vector register with zero extension to full length of the register.",
                 "No conversion is done on the data as it moves register files"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VDupFromGPR OpSize:#RegisterSize, OpSize:#ElementSize, GPR:$Src": {
        "Desc": ["Broadcasts a value in a GPR into each ElementSize-sized element in a vector"],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },

      "FPR = VLoadTwoGPRs GPR:$Lower, GPR:$Upper": {
        "Desc": ["Moves two 64-bit registers to a vector register optimally"],
        "DestSize": "OpSize::i128Bit",
        "ElementSize": "OpSize::i64Bit"
      },

      "FPR = Float_FromGPR_S OpSize:#DstElementSize, OpSize:$SrcElementSize, GPR:$Src": {
        "Desc": ["Scalar op: Converts signed GPR to Scalar float",
                 "Zeroes the upper bits of the vector register"
                ],
        "DestSize": "DstElementSize"
      },
      "FPR = Float_FToF OpSize:#DstElementSize, OpSize:$SrcElementSize, FPR:$Scalar": {
        "Desc": ["Scalar op: Converts float from one size to another",
                 "Zeroes the upper bits of the vector register"
                ],
        "DestSize": "DstElementSize"
      },

      "FPR = Vector_SToF OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Vector op: Converts signed integer to same size float",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = Vector_FToS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": ["Vector op: Converts float to signed integer, rounding towards zero",
                 "Rounding mode determined by host rounding mode"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = Vector_FToZS OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": "Vector op: Converts float to signed integer, rounding towards zero",
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = Vector_FToF OpSize:#RegisterSize, OpSize:#DestElementSize, FPR:$Vector, OpSize:$SrcElementSize": {
        "Desc": "Vector op: Converts float from source element size to destination size (fp32<->fp64)",
        "DestSize": "RegisterSize",
        "ElementSize": "DestElementSize"
      },

      "FPR = VFCVTL2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector": {
        "Desc": [
          "Vector op: Converts float from source element size to destination size (fp32->fp64)",
          "Selecting from the high half of the register."
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize << 1",
        "EmitValidation": [
          "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\""
        ]
      },
      "FPR = VFCVTN2 OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$VectorLower, FPR:$VectorUpper": {
        "TiedSource": 0,
        "Desc": [
          "Vector op: Converts float from source element size and inserting in to the high bits.",
          "Bottom half is untouched",
          "Narrowing to the element size below what is passed in.",
          "F64->F32, F32->F16"
        ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize >> 1",
        "EmitValidation": [
          "RegisterSize != FEXCore::IR::OpSize::i256Bit && \"What does 256-bit mean in this context?\""
        ]
      },
      "FPR = Vector_FToI OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, RoundType:$Round": {
        "Desc": ["Vector op: Rounds float to integral",
                 "Rounding mode determined by argument"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = Vector_FToISized OpSize:#RegisterSize, OpSize:#ElementSize, FPR:$Vector, i1:$HostRound, OpSize:$IntSize": {
        "Desc": ["Vector op: Rounds float to sized integral",
                 "Either host rounding or round-to-zero",
                 "Rounding mode determined by argument"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "ElementSize"
      },
      "FPR = Vector_F64ToI32 OpSize:#RegisterSize, FPR:$Vector, RoundType:$Round, i1:$EnsureZeroUpperHalf": {
        "Desc": ["Vector op: Rounds 64-bit float to 32-bit integral with round mode",
                 "Matches CVTPD2DQ/CVTTPD2DQ behaviour"
                ],
        "DestSize": "RegisterSize",
        "ElementSize": "FEXCore::IR::OpSize::i32Bit"
      }
    },
    "Crypto": {
      "FPR = VAESImc FPR:$Vector": {
        "Desc": "Does a stage of the inverse mix column transformation",
        "DestSize": "OpSize::i128Bit"
      },
      "FPR = VAESEnc OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": {
        "Desc": "Does a step of AES encryption",
        "DestSize": "RegisterSize"
      },
      "FPR = VAESEncLast OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": {
        "Desc": "Does the last step of AES encryption",
        "DestSize": "RegisterSize"
      },
      "FPR = VAESDec OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": {
        "Desc": "Does a step of AES decryption",
        "DestSize": "RegisterSize"
      },
      "FPR = VAESDecLast OpSize:#RegisterSize, FPR:$State, FPR:$Key, FPR:$ZeroReg": {
        "Desc": "Does the last step of AES decryption",
        "DestSize": "RegisterSize"
      },
      "FPR = VAESKeyGenAssist FPR:$Src, FPR:$KeyGenTBLSwizzle, FPR:$ZeroReg, u8:$RCON": {
        "Desc": "Assists in key generation",
        "DestSize": "OpSize::i128Bit"
      },
      "FPR = VSha1H FPR:$Src": {
        "Desc": "Does vector scalar SHA1H instruction",
        "DestSize": "FEXCore::IR::OpSize::i32Bit"
      },
      "FPR = VSha1C FPR:$Src1, FPR:$Src2, FPR:$Src3": {
        "Desc": "Does vector SHA1C instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha1M FPR:$Src1, FPR:$Src2, FPR:$Src3": {
        "Desc": "Does vector SHA1M instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha1P FPR:$Src1, FPR:$Src2, FPR:$Src3": {
        "Desc": "Does vector SHA1P instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha1SU1 FPR:$Src1, FPR:$Src2": {
        "Desc": "Does vector scalar SHA1H instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha256U0 FPR:$Src1, FPR:$Src2": {
        "Desc": "Does vector scalar VSha256U0 instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha256U1 FPR:$Src1, FPR:$Src2": {
        "Desc": "Does vector scalar VSha256U1 instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit"
      },
      "FPR = VSha256H FPR:$Src1, FPR:$Src2, FPR:$Src3": {
        "Desc": "Does vector scalar VSha256H instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "FPR = VSha256H2 FPR:$Src1, FPR:$Src2, FPR:$Src3": {
        "Desc": "Does vector scalar VSha256H2 instruction",
        "DestSize": "FEXCore::IR::OpSize::i128Bit",
        "TiedSource": 0
      },
      "GPR = CRC32 GPR:$Src1, GPR:$Src2, OpSize:$SrcSize": {
        "Desc": ["CRC32 using polynomial 0x1EDC6F41"
                ],
        "DestSize": "OpSize::i32Bit"
      },
      "FPR = PCLMUL OpSize:#RegisterSize, FPR:$Src1, FPR:$Src2, u8:$Selector": {
        "Desc": [
          "Performs carryless multiplication of 64-bit elements depending on the selector.",
          "Selector = 0b00000000: Uses low 64-bit elements from both input vectors",
          "Selector = 0b00000001: Uses high 64-bit element from Src1 and low 64-bit element from Src2",
          "Selector = 0b00010000: Uses low 64-bit element from Src1 and high 64-bit element from Src2",
          "Selector = 0b00010001: Uses high 64-bit elements from both input vectors"
        ],
        "DestSize": "RegisterSize"
      }
    },
    "F64": {
      "FPR = F64ATAN FPR:$Src1, FPR:$Src2": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64FPREM FPR:$Src1, FPR:$Src2": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64FPREM1 FPR:$Src1, FPR:$Src2": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64SCALE FPR:$Src1, FPR:$Src2": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64F2XM1 FPR:$Src": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64FYL2X FPR:$Src, FPR:$Src2": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": false
      },
      "FPR = F64TAN FPR:$Src": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": true
      },
      "FPR = F64SIN FPR:$Src": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": true
      },
      "FPR = F64COS FPR:$Src": {
        "DestSize": "OpSize::i64Bit",
        "JITDispatch": true
      },
      "FPR:$Sin, FPR:$Cos = F64SINCOS FPR:$Src": {
        "DestSize": "OpSize::i64Bit",
        "HasSideEffects": true,
        "JITDispatch": false
      }
    },
    "F80": {
      "GPR = SyncStackToSlow": {
        "Desc": [
          "Synchronizes the virtual stack environment to the physical registers.",
          "Returns the current stack top."
        ],
        "X87": true,
        "HasSideEffects": true,
        "DestSize": "OpSize::i64Bit"
      },
      "StackForceSlow": {
        "Desc": [
          "Forces the slow path."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "InitStack": {
        "Desc": [
          "Initializes the stack by marking all tags as invalid and setting top to zero."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "IncStackTop": {
        "Desc": [
          "Increase stack top-pointer."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "DecStackTop": {
        "Desc": [
          "Decrease stack top-pointer."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "InvalidateStack u8:$StackLocation": {
        "Desc": [
          "Marks the value in TOP+$StackLocation as empty / invalid 0b11.",
          "If the StackLocation is 0xff, we invalidate all locations."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "PushStack FPR:$X80Src, FPR:$OriginalValue, OpSize:$LoadSize": {
        "Desc": [
          "Pushes the provided X80Src source on to the x87 stack.",
          "Tracks OriginalValue as the original value of X80Src. OriginalValue can be Invalid() in which case no tracking is done.",
          "Opsize is 128bit for F80 values, 64-bit for low precision.",
          "LoadSize the original load size, i.e. of size of OriginalValue.",
          "Float: 80-bit, 64-bit, 32-bit"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "CopyPushStack u8:$StackLocation": {
        "Desc": [
          "Pushes an element already on the stack onto the top."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "StoreStackMem OpSize:$SourceSize, OpSize:$StoreSize, GPR:$Addr, GPR:$Offset, OpSize:$Align, MemOffsetType:$OffsetType, u8:$OffsetScale": {
        "Desc": [
          "Takes the top value off the x87 stack and stores it to memory.",
          "SourceSize is 128bit for F80 values, 64-bit for low precision.",
          "StoreSize is the store size for conversion:",
          "Float: 80-bit, 64-bit, or 32-bit"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "StoreStackToStack u8:$StackLocation": {
        "Desc": [
          "Takes the top value off the x87 stack and stores it to stack location TOP+StackLocation",
          "Float: 80-bit, 64-bit, or 32-bit",
          "Int: 64-bit, 32-bit, 16-bit"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "PopStackDestroy": {
        "Desc": [
          "Pops the top value off the stack but doesn't save it anywhere."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "FPR = ReadStackValue u8:$StackLocation": {
        "Desc": [
          "Reads a value off the stack at the offset"
        ],
        "DestSize": "OpSize::i128Bit",
        "X87": true
      },
      "GPR = StackValidTag u8:$StackLocation": {
        "Desc": [
          "Returns 1 if the value in location TOP+$StackLocation is valid, 0 otherwise."
        ],
        "DestSize": "OpSize::i32Bit",
        "X87": true
      },
      "F80AddStack u8:$SrcStack1, u8:$SrcStack2": {
        "Desc": [
          "Adds two stack locations together, storing the result in to the first stack location"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80AddValue u8:$SrcStack, FPR:$X80Src": {
        "Desc": [
          "Adds a operand value to a stack location. The result stored in to the stack location provided."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "FPR = F80Add FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80SubStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
        "Desc": [
          "Subtracts the value in stack location TOP+$SrcStack2 from the value in stack location TOP+$SrcStack1.",
          "The result is stored in stack location TOP+$DstStack."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80SubValue u8:$SrcStack, FPR:$X80Src": {
        "Desc": [
          "Subtracts the value $X80Src from the value in stack location TOP+$SrcStack.",
          "The result is stored in stack location TOP."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80SubRValue FPR:$X80Src, u8:$SrcStack": {
        "Desc": [
          "Subtracts the value in stack location TOP+$SrcStack from the value $X80Src.",
          "The result is stored in stack location TOP."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "FPR = F80Sub FPR:$X80Src1, FPR:$X80Src2": {
        "Desc": [
          "Subtracts the value in $X80Src1 from the value in $X80Src2.",
          "The result is returned.",
          "`FPR = X80Src2 - X80Src1`"
        ],
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80MulStack u8:$SrcStack1, u8:$SrcStack2": {
        "Desc": [
          "Multiplies two stack locations together, storing the result in to the first stack location"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80MulValue u8:$SrcStack, FPR:$X80Src": {
        "Desc": [
          "Multiplies a operand value to a stack location. The result stored in to the stack location provided."
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "FPR = F80Mul FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80DivStack u8:$DstStack, u8:$SrcStack1, u8:$SrcStack2": {
        "Desc": [
          "Divides the value in stack location TOP+$SrcStack1 by the value in stack location TOP+$SrcStack2.",
          "The result is stored in stack location TOP+$DstStack.",
          "`FPR|Stack[TOP+DstStack] = Stack[TOP+SrcStack1] / Stack[TOP+SrcStack2]`"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80DivValue u8:$SrcStack, FPR:$X80Src": {
        "Desc": [
          "Divides the value in stack location TOP+$SrcStack by the value $X80Src.",
          "The result is stored in stack location TOP and returned.",
          "`FPR|Stack[TOP] = Stack[TOP+SrcStack] / X80Src`"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "F80DivRValue FPR:$X80Src, u8:$SrcStack": {
        "Desc": [
          "Divides the value X80Src by the value in stack location TOP+$SrcStack.",
          "The result is stored in stack location TOP.",
          "`FPR|Stack[TOP] = X80Src / Stack[TOP+SrcStack]`"
        ],
        "HasSideEffects": true,
        "X87": true
      },
      "FPR = F80Div FPR:$X80Src1, FPR:$X80Src2": {
        "Desc": [
          "Divides the value in $X80Src1 by the value in $X80Src2.",
          "The result is returned.",
          "`FPR = X80Src1 / X80Src2`"
        ],
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80StackXchange u8:$SrcStack": {
        "Desc": [
          "Exchanges the value at the top of the stack with the value at TOP+$SrcStack."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80StackChangeSign": {
        "Desc": [
          "Complements the sign bit of the value at the top of the stack.",
          "Returns the new value at the top of the stack."
        ],
        "HasSideEffects": true,
        "DestSize": "OpSize::i128Bit",
        "X87": true
      },
      "FPR = F80StackAbs": {
        "Desc": [
          "Clears the sign bit of the value at the top of the stack.",
          "Returns the new value at the top of the stack."
        ],
        "HasSideEffects": true,
        "DestSize": "OpSize::i128Bit",
        "X87": true
      },
      "F80PTANStack": {
        "Desc": [
          "Computes the approximate tangent of the source operand in register ST(0), stores the result in ST(0), and pushes a 1.0 onto the FPU register stack."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80ATANStack": {
        "Desc": [
          "Computes arctan(st1/st0) and stores it in st0. Then pops the stack."
        ],
        "DestSize": "OpSize::i128Bit",
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80ATAN FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80FPREMStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80FPREM FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80FPREM1Stack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80FPREM1 FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80SCALEStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80SCALE FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80CVT OpSize:#Size, FPR:$X80Src": {
        "DestSize": "Size",
        "JITDispatch": false
      },
      "GPR = F80CVTInt OpSize:#Size, FPR:$X80Src, i1:$Truncate": {
        "DestSize": "Size",
        "JITDispatch": false
      },
      "FPR = F80CVTTo FPR:$X80Src, OpSize:$SrcSize": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80CVTToInt GPR:$Src, OpSize:$SrcSize": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80RoundStack": {
        "Desc": [
          "Replaces the value at the top of the stack with its nearest integral value."
        ],
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80Round FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80F2XM1Stack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80F2XM1 FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80TAN FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80SINStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80SIN FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80COSStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80COS FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR:$Sin, FPR:$Cos = F80SINCOS FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "HasSideEffects": true,
        "JITDispatch": false
      },
      "F80SINCOSStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "F80SQRTStack": {
        "X87": true,
        "HasSideEffects": true
      },
      "FPR = F80SQRT FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80XTRACT_EXP FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80XTRACT_SIG FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "GPR = F80StackTest u8:$SrcStack": {
        "Desc": [
          "Does comparison between value in stack at TOP + SrcStack"
        ],
        "DestSize": "OpSize::i32Bit",
        "X87": true
      },
      "GPR = F80CmpStack u8:$SrcStack": {
        "Desc": [
          "Does a scalar unordered compare between the value at the top of the stack and the value in stack position TOP+$SrcStack and stores the flags in to a GPR",
          "Ordering flag result is true if either float input is NaN"
        ],
        "DestSize": "OpSize::i32Bit",
        "X87": true
      },
      "GPR = F80CmpValue FPR:$X80Src": {
        "Desc": [
          "Does a scalar unordered compare between the value at the top of the stack and $X80Src and stores the asked for flags in to a GPR",
          "Ordering flag result is true if either float input is NaN"
        ],
        "DestSize": "OpSize::i32Bit",
        "HasSideEffects": true,
        "X87": true
      },
      "GPR = F80Cmp FPR:$X80Src1, FPR:$X80Src2": {
        "Desc": ["Does a scalar unordered compare and stores the flags in to a GPR",
                 "Ordering flag result is true if either float input is NaN"
                ],
        "DestSize": "OpSize::i32Bit",
        "JITDispatch": false
      },
      "FPR = F80BCDLoad FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80BCDStore FPR:$X80Src": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "FPR = F80FYL2XStack": {
        "Desc": [
          "Computes ST1 * log2(ST0)",
          "Stores the result in ST1, and pops the top of the stack.",
          "Returns the new value at the top of the stack, i.e. the result of the operation."
        ],
        "HasSideEffects": true,
        "DestSize": "OpSize::i128Bit",
        "X87": true
      },
      "FPR = F80FYL2X FPR:$X80Src1, FPR:$X80Src2": {
        "DestSize": "OpSize::i128Bit",
        "JITDispatch": false
      },
      "F80VBSLStack OpSize:#RegisterSize, FPR:$VectorMask, u8:$SrcStack1, u8:$SrcStack2": {
        "Desc": [
          "Does a vector bitwise select.",
          "If the bit in the field is 1 then the corresponding bit is pulled from VectorTrue",
          "If the bit in the field is 0 then the corresponding bit is pulled from VectorFalse",
          "Writes the result to the top of the stack."
        ],
        "X87": true,
        "HasSideEffects": true
      }
    },
    "Backend": {
      "Last": {
        "HasSideEffects": true
      }
    }
  }
}


================================================
FILE: FEXCore/Source/Interface/IR/IRDumper.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: ir|dumper ~ IR -> Text
tags: ir|dumper
$end_info$
*/

#include "Interface/IR/IntrusiveIRList.h"
#include "Interface/IR/RegisterAllocationData.h"

#include <FEXCore/IR/IR.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/sstream.h>

#include <algorithm>
#include <array>
#include <ostream>
#include <stdint.h>
#include <string_view>
#include <iomanip>

namespace FEXCore::IR {
#define IROP_GETNAME_IMPL
#define IROP_GETRAARGS_IMPL
#define IROP_REG_CLASSES_IMPL
#define IROP_HASSIDEEFFECTS_IMPL
#define IROP_SIZES_IMPL
#define IROP_GETHASDEST_IMPL

#include <FEXCore/IR/IRDefines.inc>

static void PrintArg(fextl::stringstream* out, const IRListView*, const SHA256Sum& Arg) {
  *out << fextl::fmt::format("sha256:{:02x}", fmt::join(Arg.data, ""));
}

static void PrintArg(fextl::stringstream* out, const IRListView*, uint64_t Arg) {
  *out << fextl::fmt::format("#{:#x}", Arg);
}

static void PrintArg(fextl::stringstream* out, const IRListView*, CondClass Arg) {
  if (Arg == CondClass::AL) {
    *out << "ALWAYS";
    return;
  }

  static constexpr std::array<std::string_view, 22> CondNames = {"EQ",  "NEQ", "UGE",  "ULT", "MI",  "PL",  "VS",   "VC",
                                                                 "UGT", "ULE", "SGE",  "SLT", "SGT", "SLE", "TSTZ", "TSTNZ",
                                                                 "FLU", "FGE", "FLEU", "FGT", "FU",  "FNU"};

  *out << CondNames[FEXCore::ToUnderlying(Arg)];
}

static void PrintArg(fextl::stringstream* out, const IRListView*, MemOffsetType Arg) {
  static constexpr std::array<std::string_view, 3> Names = {
    "SXTX",
    "UXTW",
    "SXTW",
  };

  *out << Names[FEXCore::ToUnderlying(Arg)];
}

static void PrintArg(fextl::stringstream* out, const IRListView*, RegClass Arg) {
  *out << [Arg] {
    switch (Arg) {
    case RegClass::Invalid: return "Invalid";
    case RegClass::GPR: return "GPR";
    case RegClass::GPRFixed: return "GPRFixed";
    case RegClass::FPR: return "FPR";
    case RegClass::FPRFixed: return "FPRFixed";
    case RegClass::Complex: return "Complex";
    }
    return "<Unknown RegClass Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView* IR, OrderedNodeWrapper Arg) {
  if (Arg.IsImmediate()) {
    auto PhyReg = PhysicalRegister(Arg);

    switch (PhyReg.AsRegClass()) {
    case RegClass::GPR: *out << "r"; break;
    case RegClass::GPRFixed: *out << "R"; break;
    case RegClass::FPR: *out << "v"; break;
    case RegClass::FPRFixed: *out << "V"; break;
    case RegClass::Complex: *out << "c"; break;
    case RegClass::Invalid: *out << "invalid"; break;
    default: *out << "unknown"; break;
    }

    if (PhyReg.AsRegClass() != RegClass::Invalid) {
      *out << std::dec << uint32_t(PhyReg.Reg);
    }

    return;
  }

  auto [CodeNode, IROp] = IR->at(Arg)();
  const auto ArgID = Arg.ID();

  if (ArgID.IsInvalid()) {
    *out << "%Invalid";
  } else {
    *out << "%" << std::dec << ArgID;
  }

  if (GetHasDest(IROp->Op)) {
    auto ElementSize = IROp->ElementSize;
    uint32_t NumElements = 0;
    if (IROp->ElementSize == OpSize::iUnsized) {
      ElementSize = IROp->Size;
    }

    if (ElementSize != OpSize::iUnsized) {
      NumElements = IR::NumElements(IROp->Size, ElementSize);
    }

    *out << " i" << std::dec << IR::OpSizeAsBits(ElementSize);

    if (NumElements > 1) {
      *out << "v" << std::dec << NumElements;
    }
  }
}

static void PrintArg(fextl::stringstream* out, const IRListView*, FenceType Arg) {
  *out << [Arg] {
    switch (Arg) {
    case FenceType::Load: return "Loads";
    case FenceType::Store: return "Stores";
    case FenceType::LoadStore: return "LoadStores";
    case FenceType::Inst: return "Instruction";
    }
    return "<Unknown Fence Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, RoundMode Arg) {
  *out << [Arg] {
    switch (Arg) {
    case RoundMode::Nearest: return "Nearest";
    case RoundMode::NegInfinity: return "-Inf";
    case RoundMode::PosInfinity: return "+Inf";
    case RoundMode::TowardsZero: return "Towards Zero";
    case RoundMode::Host: return "Host";
    }
    return "<Unknown Round Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, ConstPad Arg) {
  *out << [Arg] {
    switch (Arg) {
    case ConstPad::NoPad: return "NoPad";
    case ConstPad::DoPad: return "DoPad";
    case ConstPad::AutoPad: return "AutoPad";
    }
    return "<Unknown ConstPad Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, NamedVectorConstant Arg) {
  *out << [Arg] {
    // clang-format off
    switch (Arg) {
      case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX:
        return "u16_incremental_index";
      case NamedVectorConstant::NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER:
        return "u16_incremental_index_upper";
      case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT:
        return "addsubps_invert";
      case NamedVectorConstant::NAMED_VECTOR_PADDSUBPS_INVERT_UPPER:
        return "addsubps_invert_upper";
      case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT:
        return "addsubpd_invert";
      case NamedVectorConstant::NAMED_VECTOR_PADDSUBPD_INVERT_UPPER:
        return "addsubpd_invert_upper";
      case NamedVectorConstant::NAMED_VECTOR_PSUBADDPS_INVERT:
        return "subaddps_invert";
      case NamedVectorConstant::NAMED_VECTOR_PSUBADDPS_INVERT_UPPER:
        return "subaddps_invert_upper";
      case NamedVectorConstant::NAMED_VECTOR_PSUBADDPD_INVERT:
        return "subaddpd_invert";
      case NamedVectorConstant::NAMED_VECTOR_PSUBADDPD_INVERT_UPPER:
        return "subaddpd_invert_upper";
      case NamedVectorConstant::NAMED_VECTOR_MOVMSKPS_SHIFT:
        return "movmskps_shift";
      case NamedVectorConstant::NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE:
        return "aeskeygenassist_swizzle";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_0110B:
        return "blendps_0110b";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_0111B:
        return "blendps_0111b";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_1001B:
        return "blendps_1001b";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_1011B:
        return "blendps_1011b";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_1101B:
        return "blendps_1101b";
      case NamedVectorConstant::NAMED_VECTOR_BLENDPS_1110B:
        return "blendps_1110b";
      case NamedVectorConstant::NAMED_VECTOR_MOVMASKB:
        return "movmaskb";
      case NamedVectorConstant::NAMED_VECTOR_MOVMASKB_UPPER:
        return "movmaskb_upper";
      case NamedVectorConstant::NAMED_VECTOR_ZERO:
        return "vectorzero";
      case NamedVectorConstant::NAMED_VECTOR_X87_ONE:
        return "x87_1_0";
      case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_10:
        return "x87_log2_10";
      case NamedVectorConstant::NAMED_VECTOR_X87_LOG2_E:
        return "x87_log2_e";
      case NamedVectorConstant::NAMED_VECTOR_X87_PI:
        return "x87_pi";
      case NamedVectorConstant::NAMED_VECTOR_X87_LOG10_2:
        return "x87_log10_2";
      case NamedVectorConstant::NAMED_VECTOR_X87_LOG_2:
        return "x87_log2";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F32_I32:
        return "cvtmax_f32_i32";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F32_I32_UPPER:
        return "cvtmax_f32_i32_upper";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F32_I64:
        return "cvtmax_f32_i64";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F64_I32:
        return "cvtmax_f64_i32";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F64_I32_UPPER:
        return "cvtmax_f64_i32_upper";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_F64_I64:
        return "cvtmax_f64_i64";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_I32:
        return "cvtmax_i32";
      case NamedVectorConstant::NAMED_VECTOR_CVTMAX_I64:
        return "cvtmax_i64";
      case NamedVectorConstant::NAMED_VECTOR_F80_SIGN_MASK:
        return "f80_sign_mask";
      case NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K0:
        return "sha1rnds_k0";
      case NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K1:
        return "sha1rnds_k1";
      case NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K2:
        return "sha1rnds_k2";
      case NamedVectorConstant::NAMED_VECTOR_SHA1RNDS_K3:
        return "sha1rnds_k3";
      case NamedVectorConstant::NAMED_VECTOR_MAX:
        return "<Programming Error: Printing MAX value>";
    }
    return "<Unknown Named Vector Constant>";
    // clang-format on
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, IndexNamedVectorConstant Arg) {
  *out << [Arg] {
    // clang-format off
    switch (Arg) {
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFLW:
      return "pshuflw";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFHW:
      return "pshufhw";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PSHUFD:
      return "pshufd";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_SHUFPS:
      return "shufps";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPS_MASK:
      return "dpps_mask";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_DPPD_MASK:
      return "dppd_mask";
    case IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_PBLENDW:
      return "pblendw";
    case INDEXED_NAMED_VECTOR_MAX:
      return "<Programming Error: Printing MAX value>";
    }
    return "<Unknown Indexed Named Vector Constant>";
    // clang-format on
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, OpSize Arg) {
  *out << [Arg] {
    switch (Arg) {
    case OpSize::iUnsized: return "Unsized";
    case OpSize::i8Bit: return "i8";
    case OpSize::i16Bit: return "i16";
    case OpSize::i32Bit: return "i32";
    case OpSize::i64Bit: return "i64";
    case OpSize::f80Bit: return "f80";
    case OpSize::i128Bit: return "i128";
    case OpSize::i256Bit: return "i256";
    case OpSize::iInvalid: return "Invalid";
    }
    return "<Unknown OpSize Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, FloatCompareOp Arg) {
  *out << [Arg] {
    switch (Arg) {
    case FloatCompareOp::EQ: return "FEQ";
    case FloatCompareOp::LT: return "FLT";
    case FloatCompareOp::LE: return "FLE";
    case FloatCompareOp::UNO: return "UNO";
    case FloatCompareOp::NEQ: return "NEQ";
    case FloatCompareOp::ORD: return "ORD";
    }
    return "<Unknown FloatCompareOp Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, FEXCore::IR::BreakDefinition Arg) {
  *out << "{" << Arg.ErrorRegister << ".";
  *out << static_cast<uint32_t>(Arg.Signal) << ".";
  *out << static_cast<uint32_t>(Arg.TrapNumber) << ".";
  *out << static_cast<uint32_t>(Arg.si_code) << "}";
}

static void PrintArg(fextl::stringstream* out, const IRListView*, ShiftType Arg) {
  *out << [Arg] {
    switch (Arg) {
    case ShiftType::LSL: return "LSL";
    case ShiftType::LSR: return "LSR";
    case ShiftType::ASR: return "ASR";
    case ShiftType::ROR: return "ROR";
    }
    return "<Unknown Shift Type>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, BranchHint Arg) {
  *out << [Arg] {
    switch (Arg) {
    case BranchHint::None: return "None";
    case BranchHint::Call: return "Call";
    case BranchHint::Return: return "Return";
    case BranchHint::CheckTF: return "CheckTF";
    }
    return "<Unknown Branch Hint>";
  }();
}

static void PrintArg(fextl::stringstream* out, const IRListView*, const std::array<uint8_t, 0x10>& Arg) {
  *out << fextl::fmt::format("{:02x}", fmt::join(Arg, ""));
}

void Dump(fextl::stringstream* out, const IRListView* IR) {
  auto HeaderOp = IR->GetHeader();

  int8_t CurrentIndent = 0;
  auto AddIndent = [&out, &CurrentIndent]() {
    for (uint8_t i = 0; i < CurrentIndent; ++i) {
      *out << "\t";
    }
  };

  ++CurrentIndent;
  AddIndent();
  *out << fextl::fmt::format("(%0) IRHeader %{}, #{:#x}, #{}, #{}\n", HeaderOp->Blocks.ID(), +HeaderOp->OriginalRIP, +HeaderOp->BlockCount,
                             +HeaderOp->NumHostInstructions);

  for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) {
    {
      auto BlockIROp = BlockHeader->C<FEXCore::IR::IROp_CodeBlock>();

      AddIndent();
      *out << "(%" << IR->GetID(BlockNode) << ") " << "CodeBlock ";

      *out << "%" << BlockIROp->Begin.ID() << ", ";
      *out << "%" << BlockIROp->Last.ID() << std::endl;
    }

    ++CurrentIndent;
    for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) {
      const auto ID = IR->GetID(CodeNode);
      const auto Name = FEXCore::IR::GetName(IROp->Op);

      {
        AddIndent();
        if (GetHasDest(IROp->Op)) {

          auto ElementSize = IROp->ElementSize;
          uint8_t NumElements = 0;
          if (IROp->ElementSize != OpSize::iUnsized) {
            ElementSize = IROp->Size;
          }

          if (ElementSize != OpSize::iUnsized) {
            NumElements = IR::NumElements(IROp->Size, ElementSize);
          }

          *out << "%" << std::dec << ID;

          auto PhyReg = PhysicalRegister(CodeNode);
          if (!PhyReg.IsInvalid()) {
            switch (PhyReg.AsRegClass()) {
            case RegClass::GPR: *out << "(r"; break;
            case RegClass::GPRFixed: *out << "(R"; break;
            case RegClass::FPR: *out << "(v"; break;
            case RegClass::FPRFixed: *out << "(V"; break;
            case RegClass::Complex: *out << "(complex"; break;
            case RegClass::Invalid: *out << "(invalid"; break;
            default: *out << "(unknown"; break;
            }
            if (PhyReg.AsRegClass() != RegClass::Invalid) {
              *out << std::dec << uint32_t(PhyReg.Reg) << ")";
            } else {
              *out << ")";
            }
          }

          *out << " i" << std::dec << IR::OpSizeAsBits(ElementSize);

          if (NumElements > 1) {
            *out << "v" << std::dec << NumElements;
          }

          *out << " = ";
        } else {

          auto ElementSize = IROp->ElementSize;
          if (IROp->ElementSize == OpSize::iUnsized) {
            ElementSize = IROp->Size;
          }
          uint32_t NumElements = 0;
          if (ElementSize != OpSize::iUnsized) {
            NumElements = IR::NumElements(IROp->Size, ElementSize);
          }

          *out << "(%" << std::dec << ID << ' ';
          *out << 'i' << std::dec << IR::OpSizeAsBits(ElementSize);
          if (NumElements > 1) {
            *out << 'v' << std::dec << NumElements;
          }
          *out << ") ";
        }
        *out << Name;

#define IROP_ARGPRINTER_HELPER
#include <FEXCore/IR/IRDefines.inc>
      default: *out << "<Unknown Args>"; break;
      }

      //*out << " (" <<  std::dec << CodeNode->GetUses() << ")";

      *out << "\n";
    }
  }

  CurrentIndent = std::max(0, CurrentIndent - 1);
}
}
}


================================================
FILE: FEXCore/Source/Interface/IR/IREmitter.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: ir|emitter ~ C++ Functions to generate IR. See IR.json for spec.
tags: ir|emitter
$end_info$
*/

#include "Interface/IR/IREmitter.h"

#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstdint>
#include <cstring>

namespace FEXCore::IR {

static bool IsFragmentExit(FEXCore::IR::IROps Op) {
  switch (Op) {
  case OP_EXITFUNCTION:
  case OP_BREAK: return true;
  default: return false;
  }
}

bool IsBlockExit(FEXCore::IR::IROps Op) {
  switch (Op) {
  case OP_JUMP:
  case OP_CONDJUMP: return true;
  default: return IsFragmentExit(Op);
  }
}

RegClass IREmitter::WalkFindRegClass(Ref Node) {
  auto Class = GetOpRegClass(Node);
  switch (Class) {
  case RegClass::GPR:
  case RegClass::FPR:
  case RegClass::GPRFixed:
  case RegClass::FPRFixed:
  case RegClass::Invalid: return Class;
  default: break;
  }

  // Complex case, needs to be handled on an op by op basis
  uintptr_t DataBegin = DualListData.DataBegin();

  FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin);

  switch (IROp->Op) {
  case IROps::OP_LOADREGISTER: {
    auto Op = IROp->C<IROp_LoadRegister>();
    return Op->Class;
    break;
  }
  case IROps::OP_LOADCONTEXT: {
    auto Op = IROp->C<IROp_LoadContext>();
    return Op->Class;
    break;
  }
  case IROps::OP_LOADCONTEXTINDEXED: {
    auto Op = IROp->C<IROp_LoadContextIndexed>();
    return Op->Class;
    break;
  }
  case IROps::OP_FILLREGISTER: {
    auto Op = IROp->C<IROp_FillRegister>();
    return Op->Class;
    break;
  }
  case IROps::OP_LOADMEM: {
    auto Op = IROp->C<IROp_LoadMem>();
    return Op->Class;
    break;
  }
  case IROps::OP_LOADMEMTSO: {
    auto Op = IROp->C<IROp_LoadMemTSO>();
    return Op->Class;
    break;
  }
  default: LOGMAN_MSG_A_FMT("Unhandled op type: {} {} in argument class validation", ToUnderlying(IROp->Op), GetOpName(Node)); break;
  }
  return RegClass::Invalid;
}

void IREmitter::ResetWorkingList() {
  DualListData.Reset();
  CodeBlocks.clear();
  CurrentWriteCursor = nullptr;
  // This is necessary since we do "null" pointer checks
  InvalidNode = reinterpret_cast<Ref>(DualListData.ListAllocate(sizeof(OrderedNode)));
  memset(InvalidNode, 0, sizeof(OrderedNode));
  CurrentCodeBlock = nullptr;
}

void IREmitter::ReplaceAllUsesWithRange(Ref Node, Ref NewNode, AllNodesIterator Begin, AllNodesIterator End) {
  uintptr_t ListBegin = DualListData.ListBegin();
  auto NodeId = Node->Wrapped(ListBegin).ID();

  while (Begin != End) {
    auto [RealNode, IROp] = Begin();

    const uint8_t NumArgs = IR::GetArgs(IROp->Op);
    for (uint8_t i = 0; i < NumArgs; ++i) {
      if (IROp->Args[i].ID() == NodeId) {
        Node->RemoveUse();
        NewNode->AddUse();
        IROp->Args[i].NodeOffset = NewNode->Wrapped(ListBegin).NodeOffset;

        // We can stop searching once all uses of the node are gone.
        if (Node->NumUses == 0) {
          return;
        }
      }
    }

    ++Begin;
  }
}

void IREmitter::ReplaceNodeArgument(Ref Node, uint8_t Arg, Ref NewArg) {
  uintptr_t ListBegin = DualListData.ListBegin();
  uintptr_t DataBegin = DualListData.DataBegin();

  FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin);
  OrderedNodeWrapper OldArgWrapper = IROp->Args[Arg];
  Ref OldArg = OldArgWrapper.GetNode(ListBegin);
  OldArg->RemoveUse();
  NewArg->AddUse();
  IROp->Args[Arg].NodeOffset = NewArg->Wrapped(ListBegin).NodeOffset;
}

void IREmitter::RemoveArgUses(Ref Node) {
  uintptr_t ListBegin = DualListData.ListBegin();
  uintptr_t DataBegin = DualListData.DataBegin();

  FEXCore::IR::IROp_Header* IROp = Node->Op(DataBegin);

  const uint8_t NumArgs = IR::GetArgs(IROp->Op);
  for (uint8_t i = 0; i < NumArgs; ++i) {
    auto ArgNode = IROp->Args[i].GetNode(ListBegin);
    ArgNode->RemoveUse();
  }
}

void IREmitter::RemovePostRA(Ref Node) {
  Node->Unlink(DualListData.ListBegin());
}

void IREmitter::Remove(Ref Node) {
  RemoveArgUses(Node);

  Node->Unlink(DualListData.ListBegin());
}

IREmitter::IRPair<IROp_CodeBlock> IREmitter::CreateNewCodeBlockAfter(Ref insertAfter) {
  auto OldCursor = GetWriteCursor();

  auto CodeNode = CreateCodeNode();

  if (insertAfter) {
    LinkCodeBlocks(insertAfter, CodeNode);
  } else {
    LOGMAN_THROW_A_FMT(CurrentCodeBlock != nullptr, "CurrentCodeBlock must not be null here");

    // Find last block
    auto LastBlock = CurrentCodeBlock;

    while (LastBlock->Header.Next.GetNode(DualListData.ListBegin()) != InvalidNode) {
      LastBlock = LastBlock->Header.Next.GetNode(DualListData.ListBegin());
    }

    // Append it after the last block
    LinkCodeBlocks(LastBlock, CodeNode);
  }

  SetWriteCursor(OldCursor);

  return CodeNode;
}

void IREmitter::SetCurrentCodeBlock(Ref Node) {
  CurrentCodeBlock = Node;
  LOGMAN_THROW_A_FMT(Node->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Node wasn't codeblock. It was '{}'",
                     IR::GetName(Node->Op(DualListData.DataBegin())->Op));
  SetWriteCursor(Node->Op(DualListData.DataBegin())->CW<IROp_CodeBlock>()->Begin.GetNode(DualListData.ListBegin()));

  // Constants are pooled only within a single block.
  NrConstants = 0;
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/IREmitter.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "CodeEmitter/Emitter.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IntrusiveIRList.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/IR/IR.h>

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <stdint.h>
#include <string.h>

namespace FEXCore::IR {

class IREmitter {
public:
  IREmitter(FEXCore::Utils::IntrusivePooledAllocator& ThreadAllocator, bool SupportsTSOImm9)
    : DualListData {ThreadAllocator, 8 * 1024 * 1024}
    , SupportsTSOImm9(SupportsTSOImm9) {}

  virtual ~IREmitter() = default;

  void ReownOrClaimBuffer() {
    DualListData.ReownOrClaimBuffer();

    // Reset the working list on new buffer.
    ResetWorkingList();
  }

  void DelayedDisownBuffer() {
    DualListData.DelayedDisownBuffer();
  }

  IRListView ViewIR() {
    return IRListView(&DualListData);
  }

  /**
   * @name IR allocation routines
   *
   * @{ */

  RegClass WalkFindRegClass(Ref Node);

  // These inlining helpers are used by IRDefines.inc so define first.
  Ref InlineMem(OpSize Size, Ref Offset, MemOffsetType OffsetType, uint8_t& OffsetScale, bool TSO = false) {
    uint64_t Imm {};
    if (OffsetType != MemOffsetType::SXTX || !IsValueConstant(WrapNode(Offset), &Imm)) {
      return Offset;
    }

    // The immediate may be scaled in the IR, we need to correct for that.
    Imm *= OffsetScale;

    // Signed immediate unscaled 9-bit range for both regular and LRCPC2 ops.
    bool IsSIMM9 = ((int64_t)Imm >= -256) && ((int64_t)Imm <= 255);
    IsSIMM9 &= (SupportsTSOImm9 || !TSO);

    // Extended offsets for regular loadstore only.
    LOGMAN_THROW_A_FMT(Size >= IR::OpSize::i8Bit && Size <= IR::OpSize::i256Bit, "Must be sized");

    bool IsExtended = (Imm & (IR::OpSizeToSize(Size) - 1)) == 0 && Imm / IR::OpSizeToSize(Size) <= 4095;
    IsExtended &= !TSO;

    if (IsSIMM9 || IsExtended) {
      OffsetScale = 1;
      return _InlineConstant(Imm);
    } else {
      return Offset;
    }
  }

#define DEF_INLINE(Type, Variable, Filter)                          \
  Ref Inline##Type(OpSize Size, Ref Source) {                       \
    uint64_t Variable;                                              \
    if (IsValueConstant(WrapNode(Source), &Variable) && (Filter)) { \
      return _InlineConstant(Variable);                             \
    } else {                                                        \
      return Source;                                                \
    }                                                               \
  }

  DEF_INLINE(Any, _, true)
  DEF_INLINE(Zero, X, X == 0)
  DEF_INLINE(AddSub, X, ARMEmitter::IsImmAddSub(X))
  DEF_INLINE(LargeAddSub, X, ARMEmitter::IsImmAddSub(X) && Size >= OpSize::i32Bit);
  DEF_INLINE(Logical, X, ARMEmitter::Emitter::IsImmLogical(X, std::max((int)IR::OpSizeAsBits(Size), 32)));

  Ref InlineSubtractZero(OpSize Size, Ref Src1, Ref Src2) {
    // Only inline a zero if we won't inline the other source.
    return IsValueConstant(WrapNode(Src2)) ? Src1 : InlineZero(Size, Src1);
  }
#undef DEF_INLINE

// These handlers add cost to the constructor and destructor
// If it becomes an issue then blow them away
// GCC also generates some pretty atrocious code around these
// Use Clang!
#define IROP_ALLOCATE_HELPERS
#define IROP_DISPATCH_HELPERS
#include <FEXCore/IR/IRDefines.inc>
  IRPair<IROp_Jump> _Jump() {
    return _Jump(InvalidNode);
  }
  IRPair<IROp_CondJump> _CondJump(Ref ssa0, CondClass cond = CondClass::NEQ) {
    return _CondJump(ssa0, _Constant(0), InvalidNode, InvalidNode, cond, GetOpSize(ssa0));
  }
  IRPair<IROp_CondJump> _CondJump(Ref ssa0, Ref ssa1, Ref ssa2, CondClass cond = CondClass::NEQ) {
    return _CondJump(ssa0, _Constant(0), ssa1, ssa2, cond, GetOpSize(ssa0));
  }

  IRPair<IROp_LoadContext> _LoadContextGPR(OpSize ByteSize, uint32_t Offset) {
    return _LoadContext(ByteSize, RegClass::GPR, Offset);
  }
  IRPair<IROp_LoadContext> _LoadContextFPR(OpSize ByteSize, uint32_t Offset) {
    return _LoadContext(ByteSize, RegClass::FPR, Offset);
  }
  IRPair<IROp_StoreContext> _StoreContextGPR(OpSize ByteSize, Ref Value, uint32_t Offset) {
    return _StoreContext(ByteSize, RegClass::GPR, Value, Offset);
  }
  IRPair<IROp_StoreContext> _StoreContextFPR(OpSize ByteSize, Ref Value, uint32_t Offset) {
    return _StoreContext(ByteSize, RegClass::FPR, Value, Offset);
  }

  IRPair<IROp_LoadContextIndexed> _LoadContextGPRIndexed(Ref Index, OpSize ByteSize, uint32_t BaseOffset, uint32_t Stride) {
    return _LoadContextIndexed(Index, ByteSize, BaseOffset, Stride, RegClass::GPR);
  }
  IRPair<IROp_LoadContextIndexed> _LoadContextFPRIndexed(Ref Index, OpSize ByteSize, uint32_t BaseOffset, uint32_t Stride) {
    return _LoadContextIndexed(Index, ByteSize, BaseOffset, Stride, RegClass::FPR);
  }
  IRPair<IROp_StoreContextIndexed> _StoreContextGPRIndexed(Ref Value, Ref Index, OpSize ByteSize, uint32_t BaseOffset, uint32_t Stride) {
    return _StoreContextIndexed(Value, Index, ByteSize, BaseOffset, Stride, RegClass::GPR);
  }
  IRPair<IROp_StoreContextIndexed> _StoreContextFPRIndexed(Ref Value, Ref Index, OpSize ByteSize, uint32_t BaseOffset, uint32_t Stride) {
    return _StoreContextIndexed(Value, Index, ByteSize, BaseOffset, Stride, RegClass::FPR);
  }

  IRPair<IROp_LoadMem> _LoadMem(RegClass Class, OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    return _LoadMem(Class, Size, ssa0, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_LoadMem> _LoadMemGPR(OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    return _LoadMem(RegClass::GPR, Size, ssa0, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_LoadMem> _LoadMemGPR(OpSize Size, Ref Addr, Ref Offset, OpSize Align, MemOffsetType OffsetType, uint8_t OffsetScale) {
    return _LoadMem(RegClass::GPR, Size, Addr, Offset, Align, OffsetType, OffsetScale);
  }
  IRPair<IROp_LoadMem> _LoadMemFPR(OpSize Size, Ref ssa0, OpSize Align = OpSize::i8Bit) {
    return _LoadMem(RegClass::FPR, Size, ssa0, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_LoadMem> _LoadMemFPR(OpSize Size, Ref Addr, Ref Offset, OpSize Align, MemOffsetType OffsetType, uint8_t OffsetScale) {
    return _LoadMem(RegClass::FPR, Size, Addr, Offset, Align, OffsetType, OffsetScale);
  }
  IRPair<IROp_StoreMem> _StoreMem(RegClass Class, OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMem(Class, Size, Value, Addr, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_StoreMem> _StoreMemGPR(OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMem(RegClass::GPR, Size, Value, Addr, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_StoreMem> _StoreMemGPR(OpSize Size, Ref Value, Ref Addr, Ref Offset, OpSize Align, MemOffsetType OffsetType, uint8_t OffsetScale) {
    return _StoreMem(RegClass::GPR, Size, Value, Addr, Offset, Align, OffsetType, OffsetScale);
  }
  IRPair<IROp_StoreMem> _StoreMemFPR(OpSize Size, Ref Addr, Ref Value, OpSize Align = OpSize::i8Bit) {
    return _StoreMem(RegClass::FPR, Size, Value, Addr, Invalid(), Align, MemOffsetType::SXTX, 1);
  }
  IRPair<IROp_StoreMem> _StoreMemFPR(OpSize Size, Ref Value, Ref Addr, Ref Offset, OpSize Align, MemOffsetType OffsetType, uint8_t OffsetScale) {
    return _StoreMem(RegClass::FPR, Size, Value, Addr, Offset, Align, OffsetType, OffsetScale);
  }

  IRPair<IROp_StoreMemPair> _StoreMemPairGPR(OpSize Size, Ref Value1, Ref Value2, Ref Addr, uint32_t Offset) {
    return _StoreMemPair(RegClass::GPR, Size, Value1, Value2, Addr, Offset);
  }
  IRPair<IROp_StoreMemPair> _StoreMemPairFPR(OpSize Size, Ref Value1, Ref Value2, Ref Addr, uint32_t Offset) {
    return _StoreMemPair(RegClass::FPR, Size, Value1, Value2, Addr, Offset);
  }

  IRPair<IROp_Select> Select01(FEXCore::IR::OpSize CompareSize, CondClass Cond, OrderedNode* Cmp1, OrderedNode* Cmp2) {
    return _Select(OpSize::i64Bit, CompareSize, Cond, Cmp1, Cmp2, _InlineConstant(1), _InlineConstant(0));
  }

  IRPair<IROp_Select> To01(FEXCore::IR::OpSize CompareSize, OrderedNode* Cmp1) {
    return Select01(CompareSize, CondClass::NEQ, Cmp1, Constant(0));
  }

  IRPair<IROp_NZCVSelect> _NZCVSelect01(CondClass Cond) {
    return _NZCVSelect(OpSize::i64Bit, Cond, _InlineConstant(1), _InlineConstant(0));
  }

  Ref Addsub(IR::OpSize Size, IROps Op, IROps NegatedOp, Ref Src1, uint64_t Src2) {
    // Sign-extend the constant
    if (Size == OpSize::i32Bit) {
      Src2 = (int64_t)(int32_t)Src2;
    }

    // Negative constants need to be negated to inline.
    if (Src2 & (1ull << 63) && ARMEmitter::IsImmAddSub(-Src2)) {
      Op = NegatedOp;
      Src2 = -Src2;
    }

    auto Dest = _Add(Size, Src1, Constant(Src2));
    Dest.first->Header.Op = Op;
    return Dest;
  }

  Ref Add(IR::OpSize Size, Ref Src1, uint64_t Src2) {
    return Addsub(Size, OP_ADD, OP_SUB, Src1, Src2);
  }

  Ref Sub(IR::OpSize Size, Ref Src1, uint64_t Src2) {
    return Addsub(Size, OP_SUB, OP_ADD, Src1, Src2);
  }

  Ref AddWithFlags(IR::OpSize Size, Ref Src1, uint64_t Src2) {
    return Addsub(Size, OP_ADDWITHFLAGS, OP_SUBWITHFLAGS, Src1, Src2);
  }

  Ref SubWithFlags(IR::OpSize Size, Ref Src1, uint64_t Src2) {
    return Addsub(Size, OP_SUBWITHFLAGS, OP_ADDWITHFLAGS, Src1, Src2);
  }

#define DEF_ADDSUB(Op)                                \
  Ref Op(IR::OpSize Size, Ref Src1, Ref Src2) {       \
    uint64_t Constant;                                \
    if (IsValueConstant(WrapNode(Src2), &Constant)) { \
      return Op(Size, Src1, Constant);                \
    } else {                                          \
      return _##Op(Size, Src1, Src2);                 \
    }                                                 \
  }

  DEF_ADDSUB(Add)
  DEF_ADDSUB(Sub)
  DEF_ADDSUB(AddWithFlags)
  DEF_ADDSUB(SubWithFlags)

  struct ConstantData {
    int64_t Value;
    ConstPad Pad;
    int32_t MaxBytes;
    [[nodiscard]] auto operator<=>(const ConstantData&) const noexcept = default;
  };
  ConstantData Constants[32];
  Ref ConstantRefs[32];
  uint32_t NrConstants;

  Ref Constant(int64_t Value, ConstPad Pad = IR::ConstPad::NoPad, int32_t MaxBytes = 0) {
    const ConstantData Data {
      .Value = Value,
      .Pad = Pad,
      .MaxBytes = MaxBytes,
    };
    // Search for the constant in the pool.
    for (unsigned i = 0; i < std::min(NrConstants, 32u); ++i) {
      if (Constants[i] == Data) {
        return ConstantRefs[i];
      }
    }

    // Otherwise, materialize a fresh constant and pool it.
    Ref R = _Constant(Value, Pad, MaxBytes);
    unsigned i = (NrConstants++) & 31;
    Constants[i] = Data;
    ConstantRefs[i] = R;
    return R;
  }

  Ref Invalid() {
    return InvalidNode;
  }

  void SetJumpTarget(IR::IROp_Jump* Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting Jump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));

    Op->Header.Args[0].NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }
  void SetTrueJumpTarget(IR::IROp_CondJump* Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting CondJump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));

    Op->TrueBlock.NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }
  void SetFalseJumpTarget(IR::IROp_CondJump* Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting CondJump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));

    Op->FalseBlock.NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }

  void SetJumpTarget(IRPair<IROp_Jump> Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting Jump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));

    Op.first->Header.Args[0].NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }
  void SetTrueJumpTarget(IRPair<IROp_CondJump> Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting CondJump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));
    Op.first->TrueBlock.NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }
  void SetFalseJumpTarget(IRPair<IROp_CondJump> Op, Ref Target) {
    LOGMAN_THROW_A_FMT(Target->Op(DualListData.DataBegin())->Op == OP_CODEBLOCK, "Tried setting CondJump target to %{} {}",
                       Target->Wrapped(DualListData.ListBegin()).ID(), IR::GetName(Target->Op(DualListData.DataBegin())->Op));
    Op.first->FalseBlock.NodeOffset = Target->Wrapped(DualListData.ListBegin()).NodeOffset;
  }

  /**  @} */
  RegClass WalkFindRegClass(OrderedNodeWrapper ssa) {
    Ref RealNode = ssa.GetNode(DualListData.ListBegin());
    return WalkFindRegClass(RealNode);
  }

  bool IsValueConstant(OrderedNodeWrapper ssa, uint64_t* Constant = nullptr) {
    Ref RealNode = ssa.GetNode(DualListData.ListBegin());
    FEXCore::IR::IROp_Header* IROp = RealNode->Op(DualListData.DataBegin());
    if (IROp->Op == OP_CONSTANT) {
      auto Op = IROp->C<IR::IROp_Constant>();
      if (Constant) {
        *Constant = Op->Constant;
      }
      return true;
    }
    return false;
  }

  bool IsValueInlineConstant(OrderedNodeWrapper ssa) {
    Ref RealNode = ssa.GetNode(DualListData.ListBegin());
    FEXCore::IR::IROp_Header* IROp = RealNode->Op(DualListData.DataBegin());
    if (IROp->Op == OP_INLINECONSTANT) {
      return true;
    }
    return false;
  }

  FEXCore::IR::IROp_Header* GetOpHeader(OrderedNodeWrapper ssa) {
    Ref RealNode = ssa.GetNode(DualListData.ListBegin());
    return RealNode->Op(DualListData.DataBegin());
  }

  Ref UnwrapNode(OrderedNodeWrapper ssa) {
    return ssa.GetNode(DualListData.ListBegin());
  }

  OrderedNodeWrapper WrapNode(Ref node) {
    return node->Wrapped(DualListData.ListBegin());
  }

  NodeIterator GetIterator(OrderedNodeWrapper wrapper) {
    return NodeIterator(DualListData.ListBegin(), DualListData.DataBegin(), wrapper);
  }

  void ReplaceAllUsesWithRange(Ref Node, Ref NewNode, AllNodesIterator Begin, AllNodesIterator End);

  void ReplaceUsesWithAfter(Ref Node, Ref NewNode, AllNodesIterator After) {
    ++After;
    ReplaceAllUsesWithRange(Node, NewNode, After, AllNodesIterator(DualListData.ListBegin(), DualListData.DataBegin()));
  }

  void ReplaceUsesWithAfter(Ref Node, Ref NewNode, Ref After) {
    auto Wrapped = After->Wrapped(DualListData.ListBegin());
    AllNodesIterator It = AllNodesIterator(DualListData.ListBegin(), DualListData.DataBegin(), Wrapped);

    ReplaceUsesWithAfter(Node, NewNode, It);
  }

  void ReplaceNodeArgument(Ref Node, uint8_t Arg, Ref NewArg);

  void Remove(Ref Node);
  void RemovePostRA(Ref Node);

  void CopyData(const IREmitter& rhs) {
    LOGMAN_THROW_A_FMT(rhs.DualListData.DataBackingSize() <= DualListData.DataBackingSize(), "Trying to take ownership of data that is too "
                                                                                             "large");
    LOGMAN_THROW_A_FMT(rhs.DualListData.ListBackingSize() <= DualListData.ListBackingSize(), "Trying to take ownership of data that is too "
                                                                                             "large");
    DualListData.CopyData(rhs.DualListData);
    InvalidNode = rhs.InvalidNode->Wrapped(rhs.DualListData.ListBegin()).GetNode(DualListData.ListBegin());
    CurrentWriteCursor = rhs.CurrentWriteCursor;
    CodeBlocks = rhs.CodeBlocks;
    for (auto& CodeBlock : CodeBlocks) {
      CodeBlock = CodeBlock->Wrapped(rhs.DualListData.ListBegin()).GetNode(DualListData.ListBegin());
    }
  }

  void SetWriteCursor(Ref Node) {
    CurrentWriteCursor = Node;
  }

  // Set cursor to write before Node
  void SetWriteCursorBefore(Ref Node) {
    auto IR = ViewIR();
    auto Before = IR.at(Node);
    --Before;

    SetWriteCursor((*Before).Node);
  }

  Ref GetWriteCursor() {
    return CurrentWriteCursor;
  }

  Ref GetCurrentBlock() {
    return CurrentCodeBlock;
  }

  /**
   * @brief This creates an orphaned code node
   * The IROp backing is in the correct list but the OrderedNode lives outside of the list
   *
   * XXX: This is because we don't want code blocks to interleave with current instruction IR ops currently
   * We can change this behaviour once we remove the old BeginBlock/EndBlock types
   *
   * @return OrderedNode
   */
  IRPair<IROp_CodeBlock> CreateCodeNode(bool EntryPoint = false, uint32_t GuestEntryOffset = 0) {
    SetWriteCursor(nullptr); // Orphan from any previous nodes

    auto ID = ViewIR().GetHeader()->BlockCount++;
    auto CodeNode = _CodeBlock(InvalidNode, InvalidNode, ID, EntryPoint, GuestEntryOffset);

    CodeBlocks.emplace_back(CodeNode);

    SetWriteCursor(nullptr); // Orphan from any future nodes

    auto Begin = _BeginBlock(CodeNode);
    CodeNode.first->Begin = Begin.Node->Wrapped(DualListData.ListBegin());

    auto EndBlock = _EndBlock(CodeNode);
    CodeNode.first->Last = EndBlock.Node->Wrapped(DualListData.ListBegin());

    return CodeNode;
  }

  /**
   * @name Links codeblocks together
   * Codeblocks are singly linked so we need to walk the list forward if the linked block isn't isn't the last
   *
   * eq.
   * CodeNode->Next -> Next
   * to
   * CodeNode->Next -> New -> Next
   *
   * @{ */
  /**  @} */
  void LinkCodeBlocks(Ref CodeNode, Ref Next) {
    [[maybe_unused]] auto CurrentIROp = CodeNode->Op(DualListData.DataBegin())->CW<FEXCore::IR::IROp_CodeBlock>();
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(CurrentIROp->Header.Op == IROps::OP_CODEBLOCK, "Invalid");
#endif

    CodeNode->append(DualListData.ListBegin(), Next);
  }

  IRPair<IROp_CodeBlock> CreateNewCodeBlockAtEnd() {
    return CreateNewCodeBlockAfter(nullptr);
  }
  IRPair<IROp_CodeBlock> CreateNewCodeBlockAfter(Ref insertAfter);
  void SetCurrentCodeBlock(Ref Node);

protected:
  void RemoveArgUses(Ref Node);

  Ref CreateNode(IROp_Header* Op) {
    uintptr_t ListBegin = DualListData.ListBegin();
    size_t Size = sizeof(OrderedNode);
    void* Ptr = DualListData.ListAllocate(Size);
    Ref Node = new (Ptr) OrderedNode();
    Node->Header.Value.SetOffset(DualListData.DataBegin(), reinterpret_cast<uintptr_t>(Op));

    if (CurrentWriteCursor) {
      CurrentWriteCursor->append(ListBegin, Node);
    }
    CurrentWriteCursor = Node;
    return Node;
  }

  Ref GetNode(uint32_t SSANode) {
    uintptr_t ListBegin = DualListData.ListBegin();
    Ref Node = reinterpret_cast<Ref>(ListBegin + SSANode * sizeof(OrderedNode));
    return Node;
  }

  Ref EmplaceOrphanedNode(Ref OldNode) {
    size_t Size = sizeof(OrderedNode);
    Ref Ptr = reinterpret_cast<Ref>(DualListData.ListAllocate(Size));
    memcpy(Ptr, OldNode, Size);
    return Ptr;
  }

  // MMX State can be either MMX (for 64bit) or x87 FPU (for 80bit)
  enum { MMXState_MMX, MMXState_X87 } MMXState = MMXState_MMX;

  // Overriden by dispatcher, stubbed for IR tests
  virtual void RecordX87Use() {}
  virtual void ChgStateX87_MMX() {}
  virtual void ChgStateMMX_X87() {}
  virtual void SaveNZCV(IROps Op) {}

  Ref CurrentWriteCursor = nullptr;

  // These could be combined with a little bit of work to be more efficient with memory usage. Isn't a big deal
  DualIntrusiveAllocatorThreadPool DualListData;

  Ref InvalidNode {};
  Ref CurrentCodeBlock {};
  fextl::vector<Ref> CodeBlocks;
  uint64_t Entry {};
  bool SupportsTSOImm9 {};

private:
  void ResetWorkingList();
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/IntrusiveIRList.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Interface/IR/IR.h"

#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/ThreadPoolAllocator.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>

namespace FEXCore::IR {
/**
 * @brief This is purely an intrusive allocator
 * This doesn't support any form of ordering at all
 * Just provides a chunk of memory for allocating IR nodes from
 *
 * Can potentially support reallocation if we are smart and make sure to invalidate anything holding a true pointer
 */
class DualIntrusiveAllocator {
public:
  [[nodiscard]]
  bool DataCheckSize(size_t Size) const {
    size_t NewOffset = DataCurrentOffset + Size;
    return NewOffset <= MemorySize;
  }

  [[nodiscard]]
  bool ListCheckSize(size_t Size) const {
    size_t NewOffset = ListCurrentOffset + Size;
    return NewOffset <= MemorySize;
  }

  [[nodiscard]]
  void* DataAllocate(size_t Size) {
    LOGMAN_THROW_A_FMT(DataCheckSize(Size), "Ran out of space in DualIntrusiveAllocator during allocation");
    size_t NewOffset = DataCurrentOffset + Size;
    uintptr_t NewPointer = Data + DataCurrentOffset;
    DataCurrentOffset = NewOffset;
    return reinterpret_cast<void*>(NewPointer);
  }

  [[nodiscard]]
  void* ListAllocate(size_t Size) {
    LOGMAN_THROW_A_FMT(ListCheckSize(Size), "Ran out of space in DualIntrusiveAllocator during allocation");
    size_t NewOffset = ListCurrentOffset + Size;
    uintptr_t NewPointer = List + ListCurrentOffset;
    ListCurrentOffset = NewOffset;
    return reinterpret_cast<void*>(NewPointer);
  }

  [[nodiscard]]
  size_t DataSize() const {
    return DataCurrentOffset;
  }
  [[nodiscard]]
  size_t DataBackingSize() const {
    return MemorySize;
  }

  [[nodiscard]]
  size_t ListSize() const {
    return ListCurrentOffset;
  }
  [[nodiscard]]
  size_t ListBackingSize() const {
    return MemorySize;
  }

  [[nodiscard]]
  uintptr_t DataBegin() const {
    return Data;
  }
  [[nodiscard]]
  uintptr_t ListBegin() const {
    return List;
  }

  void Reset() {
    DataCurrentOffset = 0;
    ListCurrentOffset = 0;
  }

  void CopyData(const DualIntrusiveAllocator& rhs) {
    DataCurrentOffset = rhs.DataCurrentOffset;
    ListCurrentOffset = rhs.ListCurrentOffset;
    memcpy(reinterpret_cast<void*>(Data), reinterpret_cast<void*>(rhs.Data), DataCurrentOffset);
    memcpy(reinterpret_cast<void*>(List), reinterpret_cast<void*>(rhs.List), ListCurrentOffset);
  }

protected:
  DualIntrusiveAllocator(size_t Size)
    : MemorySize {Size} {}

  uintptr_t Data {};
  uintptr_t List {};
  size_t DataCurrentOffset {0};
  size_t ListCurrentOffset {0};
  size_t MemorySize {};
};

class DualIntrusiveAllocatorMalloc final : public DualIntrusiveAllocator {
public:
  DualIntrusiveAllocatorMalloc(size_t Size)
    : DualIntrusiveAllocator {Size} {
    Data = reinterpret_cast<uintptr_t>(FEXCore::Allocator::malloc(Size * 2));
    List = reinterpret_cast<uintptr_t>(Data + Size);
  }

  ~DualIntrusiveAllocatorMalloc() {
    FEXCore::Allocator::free(reinterpret_cast<void*>(Data));
  }
};

class DualIntrusiveAllocatorThreadPool final : public DualIntrusiveAllocator {
public:
  DualIntrusiveAllocatorThreadPool(FEXCore::Utils::IntrusivePooledAllocator& ThreadAllocator, size_t Size)
    : DualIntrusiveAllocator {Size}
    , PoolObject {ThreadAllocator, Size * 2} {}
  void ReownOrClaimBuffer() {
    Data = PoolObject.ReownOrClaimBuffer();
    List = Data + MemorySize;
  }

  void DelayedDisownBuffer() {
    PoolObject.DelayedDisownBuffer();
  }

private:
  Utils::PoolBufferWithTimedRetirement<uintptr_t, 5000, 500> PoolObject;
};

class IRListView final {
public:
  IRListView() = delete;

  IRListView(DualIntrusiveAllocator* Data)
    : IRListView(reinterpret_cast<void*>(Data->DataBegin()), reinterpret_cast<void*>(Data->ListBegin()), Data->DataSize(), Data->ListSize()) {}

  IRListView(IRListView* Old)
    : IRListView(Old->IRDataInternal, Old->ListDataInternal, Old->DataSize, Old->ListSize) {}

  IRListView(void* IRData_, void* ListData_, size_t DataSize_, size_t ListSize_)
    : IRDataInternal(IRData_)
    , ListDataInternal(ListData_)
    , DataSize(DataSize_)
    , ListSize(ListSize_) {}

  [[nodiscard]]
  size_t GetInlineSize() const {
    static_assert(sizeof(*this) == 32);
    return sizeof(*this) + DataSize + ListSize;
  }

  [[nodiscard]]
  size_t GetDataSize() const {
    return DataSize;
  }
  [[nodiscard]]
  size_t GetListSize() const {
    return ListSize;
  }
  [[nodiscard]]
  size_t GetSSACount() const {
    return ListSize / sizeof(OrderedNode);
  }

  [[nodiscard]]
  NodeID GetID(const Ref Node) const {
    return Node->Wrapped(GetListData()).ID();
  }

  [[nodiscard]]
  Ref GetHeaderNode() const {
    OrderedNodeWrapper Wrapped;
    Wrapped.NodeOffset = sizeof(OrderedNode);
    return Wrapped.GetNode(GetListData());
  }

  [[nodiscard]]
  IROp_IRHeader* GetHeader() const {
    return GetOp<IROp_IRHeader>(GetHeaderNode());
  }

  [[nodiscard]]
  unsigned PostRA() const {
    return GetHeader()->PostRA;
  }

  [[nodiscard]]
  unsigned SpillSlots() const {
    return GetHeader()->SpillSlots;
  }

  template<typename T>
  [[nodiscard]]
  T* GetOp(Ref Node) const {
    auto OpHeader = Node->Op(GetData());
    auto Op = OpHeader->template CW<T>();

    // If we are casting to something narrower than just the header, check the opcode.
    if constexpr (!std::is_same<T, IROp_Header>::value) {
      LOGMAN_THROW_A_FMT(Op->OPCODE == Op->Header.Op, "Expected Node to be '{}'. Found '{}' instead", GetName(Op->OPCODE),
                         GetName(Op->Header.Op));
    }

    return Op;
  }

  template<typename T>
  [[nodiscard]]
  T* GetOp(OrderedNodeWrapper Wrapper) const {
    auto Node = Wrapper.GetNode(GetListData());
    return GetOp<T>(Node);
  }

  [[nodiscard]]
  Ref GetNode(OrderedNodeWrapper Wrapper) const {
    return Wrapper.GetNode(GetListData());
  }

  ///< Gets an OrderedNode from the IRListView as an OrderedNodeWrapper.
  [[nodiscard]]
  OrderedNodeWrapper WrapNode(Ref Node) const {
    return Node->Wrapped(GetListData());
  }

private:
  struct BlockRange {
    using iterator = NodeIterator;
    const IRListView* View;

    BlockRange(const IRListView* parent)
      : View(parent) {};

    [[nodiscard]]
    iterator begin() const noexcept {
      auto Header = View->GetHeader();
      return iterator(View->GetListData(), View->GetData(), Header->Blocks);
    }

    [[nodiscard]]
    iterator end() const noexcept {
      return iterator(View->GetListData(), View->GetData());
    }
  };

  struct CodeRange {
    using iterator = NodeIterator;
    const IRListView* View;
    const OrderedNodeWrapper BlockWrapper;

    CodeRange(const IRListView* parent, OrderedNodeWrapper block)
      : View(parent)
      , BlockWrapper(block) {};

    [[nodiscard]]
    iterator begin() const noexcept {
      auto Block = View->GetOp<IROp_CodeBlock>(BlockWrapper);
      return iterator(View->GetListData(), View->GetData(), Block->Begin);
    }

    [[nodiscard]]
    iterator end() const noexcept {
      return iterator(View->GetListData(), View->GetData());
    }
  };

  struct AllCodeRange {
    using iterator = AllNodesIterator; // Diffrent Iterator
    const IRListView* View;

    AllCodeRange(const IRListView* parent)
      : View(parent) {};

    [[nodiscard]]
    iterator begin() const noexcept {
      auto Header = View->GetHeader();
      return iterator(View->GetListData(), View->GetData(), Header->Blocks);
    }

    [[nodiscard]]
    iterator end() const noexcept {
      return iterator(View->GetListData(), View->GetData());
    }
  };

public:
  using iterator = NodeIterator;

  [[nodiscard]]
  BlockRange GetBlocks() const {
    return BlockRange(this);
  }

  [[nodiscard]]
  CodeRange GetCode(const Ref block) const {
    return CodeRange(this, block->Wrapped(GetListData()));
  }

  [[nodiscard]]
  AllCodeRange GetAllCode() const {
    return AllCodeRange(this);
  }

  [[nodiscard]]
  iterator begin() const noexcept {
    OrderedNodeWrapper Wrapped;
    Wrapped.NodeOffset = sizeof(OrderedNode);
    return iterator(GetListData(), GetData(), Wrapped);
  }

  /**
   * @brief This is not an iterator that you can reverse iterator through!
   *
   * @return Our iterator sentinel to ensure ending correctly
   */
  [[nodiscard]]
  iterator end() const noexcept {
    OrderedNodeWrapper Wrapped;
    Wrapped.NodeOffset = 0;
    return iterator(GetListData(), GetData(), Wrapped);
  }

  /**
   * @brief Convert a OrderedNodeWrapper to an interator that we can iterate over
   * @return Iterator for this op
   */
  [[nodiscard]]
  iterator at(OrderedNodeWrapper Wrapped) const noexcept {
    return iterator(GetListData(), GetData(), Wrapped);
  }

  [[nodiscard]]
  iterator at(NodeID ID) const noexcept {
    OrderedNodeWrapper Wrapped;
    Wrapped.NodeOffset = ID.Value * sizeof(OrderedNode);
    return iterator(GetListData(), GetData(), Wrapped);
  }

  [[nodiscard]]
  iterator at(const Ref Node) const noexcept {
    const auto ListData = GetListData();
    auto Wrapped = Node->Wrapped(ListData);
    return iterator(ListData, GetData(), Wrapped);
  }

  [[nodiscard]]
  uintptr_t GetData() const {
    return reinterpret_cast<uintptr_t>(IRDataInternal ? IRDataInternal : InlineData);
  }

  [[nodiscard]]
  uintptr_t GetListData() const {
    return reinterpret_cast<uintptr_t>(ListDataInternal ? ListDataInternal : &InlineData[DataSize]);
  }

private:
  void* IRDataInternal;
  void* ListDataInternal;
  size_t DataSize;
  size_t ListSize;
  uint8_t InlineData[0];
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/PassManager.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: ir|opts ~ IR to IR Optimization
tags: ir|opts
desc: Defines which passes are run, and runs them
$end_info$
*/

#include "Interface/Context/Context.h"
#include "Interface/IR/PassManager.h"
#include "Interface/IR/Passes.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Profiler.h>

namespace FEXCore::IR {
class IREmitter;

void PassManager::Finalize() {
  if (!PassManagerDumpIR()) {
    // Not configured to dump any IR, just return.
    return;
  }

  auto it = Passes.begin();
  // Walk the passes and add them where asked.
  if (PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::BEFOREOPT) {
    // Insert at the start.
    it = InsertAt(it, Debug::CreateIRDumper());
    ++it; // Skip what we inserted.
  }

  if ((PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::BEFOREPASS) ||
      (PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::AFTERPASS)) {

    bool SkipFirstBefore = PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::BEFOREOPT;
    for (; it != Passes.end();) {
      if (PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::BEFOREPASS) {
        if (SkipFirstBefore) {
          // If we need to skip the first one, then continue.
          SkipFirstBefore = false;
          ++it;
          continue;
        }

        // Insert before
        it = InsertAt(it, Debug::CreateIRDumper());
        ++it; // Skip what we inserted.
      }

      ++it; // Skip current pass.
      if (PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::AFTERPASS) {
        // Insert after
        it = InsertAt(it, Debug::CreateIRDumper());
        ++it; // Skip what we inserted.
      }
    }
  }
  if (PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::AFTEROPT) {
    if (!(PassManagerDumpIR() & FEXCore::Config::PassManagerDumpIR::AFTERPASS)) {
      // Insert final IRDumper.
      InsertAt(Passes.end(), Debug::CreateIRDumper());
    }
  }
}

void PassManager::AddDefaultPasses(FEXCore::Context::ContextImpl* ctx) {
  FEX_CONFIG_OPT(DisablePasses, O0);

  if (!DisablePasses()) {
    InsertPass(CreateX87StackOptimizationPass(ctx->HostFeatures, ctx->Config.Is64BitMode ? IR::OpSize::i64Bit : IR::OpSize::i32Bit));
    InsertPass(CreateDeadFlagCalculationEliminination());
  }
}

void PassManager::AddDefaultValidationPasses() {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  InsertValidationPass(Validation::CreateIRValidation(), "IRValidation");
#endif
}

void PassManager::InsertRegisterAllocationPass(FEXCore::Context::ContextImpl* ctx) {
  InsertPass(IR::CreateRegisterAllocationPass(&ctx->CPUID), "RA");
}

void PassManager::Run(IREmitter* IREmit) {
  FEXCORE_PROFILE_SCOPED("PassManager::Run");

  for (const auto& Pass : Passes) {
    Pass->Run(IREmit);
  }

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  for (const auto& Pass : ValidationPasses) {
    Pass->Run(IREmit);
  }
#endif
}
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/PassManager.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
$end_info$
*/

#pragma once

#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/ThreadPoolAllocator.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>

#include <functional>
#include <utility>

namespace FEXCore::Context {
class ContextImpl;
}

namespace FEXCore::HLE {
class SyscallHandler;
}

namespace FEXCore::IR {
class PassManager;
class IREmitter;

class Pass {
public:
  virtual ~Pass() = default;
  virtual void Run(IREmitter* IREmit) = 0;

  void RegisterPassManager(PassManager* _Manager) {
    Manager = _Manager;
  }

protected:
  PassManager* Manager {};
};

class PassManager final {
public:
  void AddDefaultPasses(FEXCore::Context::ContextImpl* ctx);
  void AddDefaultValidationPasses();
  Pass* InsertPass(fextl::unique_ptr<Pass> Pass, fextl::string Name = "") {
    auto PassPtr = InsertAt(Passes.end(), std::move(Pass))->get();

    if (!Name.empty()) {
      NameToPassMaping[Name] = PassPtr;
    }
    return PassPtr;
  }

  void InsertRegisterAllocationPass(FEXCore::Context::ContextImpl* ctx);

  void Run(IREmitter* IREmit);

  bool HasPass(fextl::string Name) const {
    return NameToPassMaping.contains(Name);
  }

  template<typename T>
  T* GetPass(fextl::string Name) {
    return dynamic_cast<T*>(NameToPassMaping[Name]);
  }

  Pass* GetPass(fextl::string Name) {
    return NameToPassMaping[Name];
  }

  void RegisterSyscallHandler(FEXCore::HLE::SyscallHandler* Handler) {
    SyscallHandler = Handler;
  }

  void Finalize();

protected:
  FEXCore::HLE::SyscallHandler* SyscallHandler {};

private:
  using PassArrayType = fextl::vector<fextl::unique_ptr<Pass>>;
  PassArrayType::iterator InsertAt(PassArrayType::iterator pos, fextl::unique_ptr<Pass> Pass) {
    Pass->RegisterPassManager(this);
    return Passes.insert(pos, std::move(Pass));
  }
  PassArrayType Passes;
  fextl::unordered_map<fextl::string, Pass*> NameToPassMaping;

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  fextl::vector<fextl::unique_ptr<Pass>> ValidationPasses;
  void InsertValidationPass(fextl::unique_ptr<Pass> Pass, fextl::string Name = "") {
    Pass->RegisterPassManager(this);
    auto PassPtr = ValidationPasses.emplace_back(std::move(Pass)).get();

    if (!Name.empty()) {
      NameToPassMaping[Name] = PassPtr;
    }
  }
#endif

  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  FEX_CONFIG_OPT(PassManagerDumpIR, PASSMANAGERDUMPIR);
};
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|debug
desc: Prints IR
$end_info$
*/

#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"
#include "Interface/Core/OpcodeDispatcher.h"

#include <FEXCore/IR/IR.h>

namespace FEXCore::IR::Debug {
class IRDumper final : public FEXCore::IR::Pass {
public:
  IRDumper();
  void Run(IREmitter* IREmit) override;

private:
  FEX_CONFIG_OPT(DumpIR, DUMPIR);
  bool DumpToFile {};
  bool DumpToLog {};
};

IRDumper::IRDumper() {
  const auto& DumpIRStr = DumpIR();
  if (DumpIRStr == "stderr" || DumpIRStr == "stdout" || DumpIRStr == "no") {
    // Intentionally do nothing
  } else if (DumpIRStr == "server") {
    DumpToLog = true;
  } else {
    DumpToFile = true;
  }
}

void IRDumper::Run(IREmitter* IREmit) {
  FEXCore::File::File FD {};
  if (DumpIR() == "stderr") {
    FD = FEXCore::File::File::GetStdERR();
  } else if (DumpIR() == "stdout") {
    FD = FEXCore::File::File::GetStdOUT();
  }

  auto IR = IREmit->ViewIR();
  auto HeaderOp = IR.GetHeader();
  LOGMAN_THROW_A_FMT(HeaderOp->Header.Op == OP_IRHEADER, "First op wasn't IRHeader");

  // DumpIRStr might be no if not dumping but ShouldDump is set in OpDisp
  if (DumpToFile) {
    const auto fileName = fextl::fmt::format("{}/{:x}{}", DumpIR(), +HeaderOp->OriginalRIP, IR.PostRA() ? "-post.ir" : "-pre.ir");
    FD = FEXCore::File::File(fileName.c_str(),
                             FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);
  }

  if (FD.IsValid() || DumpToLog) {
    fextl::stringstream out;
    FEXCore::IR::Dump(&out, &IR);
    if (FD.IsValid()) {
      fextl::fmt::print(FD, "IR-{} 0x{:x}:\n{}\n@@@@@\n", IR.PostRA() ? "post" : "pre", +HeaderOp->OriginalRIP, out.str());
    } else {
      LogMan::Msg::IFmt("IR-{} 0x{:x}:\n{}\n@@@@@\n", IR.PostRA() ? "post" : "pre", +HeaderOp->OriginalRIP, out.str());
    }
  }
}

fextl::unique_ptr<FEXCore::IR::Pass> CreateIRDumper() {
  return fextl::make_unique<IRDumper>();
}
} // namespace FEXCore::IR::Debug


================================================
FILE: FEXCore/Source/Interface/IR/Passes/IRValidation.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
desc: Sanity checking pass
$end_info$
*/

#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include "Interface/IR/RegisterAllocationData.h"
#include "Interface/IR/Passes/IRValidation.h"
#include "Interface/IR/Passes/RegisterAllocationPass.h"

#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <memory>
#include <stddef.h>
#include <unordered_map>
#include <utility>

namespace FEXCore::IR::Validation {


IRValidation::~IRValidation() {
  NodeIsLive.Free();
}

void IRValidation::Run(IREmitter* IREmit) {
  FEXCORE_PROFILE_SCOPED("PassManager::IRValidation");

  bool HadError = false;
  bool HadWarning = false;

  fextl::ostringstream Errors;
  fextl::ostringstream Warnings;

  auto CurrentIR = IREmit->ViewIR();

  OffsetToBlockMap.clear();
  EntryBlock = nullptr;

  uint32_t Count = CurrentIR.GetSSACount();
  if (Count > MaxNodes) {
    NodeIsLive.Realloc(Count);
  }

  fextl::vector<uint32_t> Uses(Count, 0);

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  auto HeaderOp = CurrentIR.GetHeader();
  LOGMAN_THROW_A_FMT(HeaderOp->Header.Op == OP_IRHEADER, "First op wasn't IRHeader");
#endif

  for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
    auto BlockIROp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
    LOGMAN_THROW_A_FMT(BlockIROp->Header.Op == OP_CODEBLOCK, "IR type failed to be a code block");

    if (!EntryBlock) {
      EntryBlock = BlockNode;
    }

    const auto BlockID = CurrentIR.GetID(BlockNode);
    BlockInfo* CurrentBlock = &OffsetToBlockMap.try_emplace(BlockID).first->second;

    // We only allow defs local to a single block, so clear live set per block
    NodeIsLive.MemClear(Count);

    for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
      const auto ID = CurrentIR.GetID(CodeNode);
      const auto OpSize = IROp->Size;

      if (GetHasDest(IROp->Op)) {
        HadError |= OpSize == IR::OpSize::iInvalid;
        // Does the op have a destination of size 0?
        if (OpSize == IR::OpSize::iInvalid) {
          Errors << "%" << ID << ": Had destination but with no size" << std::endl;
        }

        // Does the node have zero uses? Should have been DCE'd
        if (CodeNode->GetUses() == 0) {
          HadWarning |= true;
          Warnings << "%" << ID << ": Destination created but had no uses" << std::endl;
        }

        if (CurrentIR.PostRA()) {
          // After RA, the destination needs to be assigned a register and class
          auto PhyReg = PhysicalRegister(CodeNode);

          const auto ExpectedClass = IR::GetRegClass(IROp->Op);
          const auto AssignedClass = PhyReg.AsRegClass();

          // If no register class was assigned
          if (AssignedClass == IR::RegClass::Invalid) {
            HadError |= true;
            Errors << "%" << ID << ": Had destination but with no register class assigned" << std::endl;
          }

          // If no physical register was assigned
          if (PhyReg.IsInvalid()) {
            HadError |= true;
            Errors << "%" << ID << ": Had destination but with no register assigned" << std::endl;
          }

          // Assigned class wasn't the expected class and it is a non-complex op
          if (AssignedClass != ExpectedClass && ExpectedClass != IR::RegClass::Complex) {
            HadWarning |= true;
            Warnings << "%" << ID << ": Destination had register class " << uint32_t(AssignedClass) << " When register class "
                     << uint32_t(ExpectedClass) << " Was expected" << std::endl;
          }
        }
      }

      uint8_t NumArgs = IR::GetRAArgs(IROp->Op);

      for (uint32_t i = 0; i < NumArgs; ++i) {
        OrderedNodeWrapper Arg = IROp->Args[i];
        const auto ArgID = Arg.ID();
        if (Arg.IsImmediate()) {
          continue;
        }

        IROps Op = CurrentIR.GetOp<IROp_Header>(Arg)->Op;

        if (ArgID.IsValid()) {
          Uses[ArgID.Value]++;
        }

        // We do not validate the location of inline constants because it's
        // irrelevant, they're ignored by RA and always inlined to where they
        // need to be. This lets us pool inline constants globally.
        bool Ignore = (Op == OP_IRHEADER || Op == OP_INLINECONSTANT);

        if (!Ignore && ArgID.IsValid() && !NodeIsLive.Get(ArgID.Value)) {
          HadError |= true;
          Errors << "%" << ID << ": Arg[" << i << "] references invalid %" << ArgID << std::endl;
        }
      }

      NodeIsLive.Set(ID.Value);

      switch (IROp->Op) {
      case IR::OP_EXITFUNCTION: {
        CurrentBlock->HasExit = true;
        break;
      }
      case IR::OP_CONDJUMP: {
        auto Op = IROp->C<IR::IROp_CondJump>();

        OrderedNode* TrueTargetNode = CurrentIR.GetNode(Op->TrueBlock);
        OrderedNode* FalseTargetNode = CurrentIR.GetNode(Op->FalseBlock);

        CurrentBlock->Successors.emplace_back(TrueTargetNode);
        CurrentBlock->Successors.emplace_back(FalseTargetNode);

        const FEXCore::IR::IROp_Header* TrueTargetOp = CurrentIR.GetOp<IROp_Header>(TrueTargetNode);
        const FEXCore::IR::IROp_Header* FalseTargetOp = CurrentIR.GetOp<IROp_Header>(FalseTargetNode);

        if (TrueTargetOp->Op != OP_CODEBLOCK) {
          HadError |= true;
          Errors << "CondJump %" << ID << ": True Target Jumps to Op that isn't the begining of a block" << std::endl;
        } else {
          auto Block = OffsetToBlockMap.try_emplace(Op->TrueBlock.ID()).first;
          Block->second.Predecessors.emplace_back(BlockNode);
        }

        if (FalseTargetOp->Op != OP_CODEBLOCK) {
          HadError |= true;
          Errors << "CondJump %" << ID << ": False Target Jumps to Op that isn't the begining of a block" << std::endl;
        } else {
          auto Block = OffsetToBlockMap.try_emplace(Op->FalseBlock.ID()).first;
          Block->second.Predecessors.emplace_back(BlockNode);
        }

        break;
      }
      case IR::OP_JUMP: {
        auto Op = IROp->C<IR::IROp_Jump>();
        OrderedNode* TargetNode = CurrentIR.GetNode(Op->Header.Args[0]);
        CurrentBlock->Successors.emplace_back(TargetNode);

        const FEXCore::IR::IROp_Header* TargetOp = CurrentIR.GetOp<IROp_Header>(TargetNode);
        if (TargetOp->Op != OP_CODEBLOCK) {
          HadError |= true;
          Errors << "Jump %" << ID << ": Jump to Op that isn't the begining of a block" << std::endl;
        } else {
          auto Block = OffsetToBlockMap.try_emplace(Op->Header.Args[0].ID()).first;
          Block->second.Predecessors.emplace_back(BlockNode);
        }
        break;
      }
      default:
        // LOGMAN_MSG_A_FMT("Unknown IR Op: {}({})", IROp->Op, FEXCore::IR::GetName(IROp->Op));
        break;
      }
    }

    // Blocks can only have zero (Exit), 1 (Unconditional branch) or 2 (Conditional) successors
    size_t NumSuccessors = CurrentBlock->Successors.size();
    if (NumSuccessors > 2) {
      HadError |= true;
      Errors << "%" << BlockID << " Has " << NumSuccessors << " successors which is too many" << std::endl;
    }

    {
      auto GetOp = [](auto Code) {
        auto [CodeNode, IROp] = Code();
        return IROp->Op;
      };

      auto CodeCurrent = CurrentIR.at(BlockIROp->Last);

      // Last instruction in the block must be EndBlock
      {
        auto Op = GetOp(CodeCurrent);
        if (Op != IR::OP_ENDBLOCK) {
          HadError |= true;
          Errors << "%" << BlockID << " Failed to end block with EndBlock" << std::endl;
        }
      }

      --CodeCurrent;

      // Blocks need to have an instruction that leaves the block in some way before the EndBlock instruction
      {
        auto Op = GetOp(CodeCurrent);
        if (!IsBlockExit(Op)) {
          HadError |= true;
          Errors << "%" << BlockID << " Didn't have a block exit IR op as its last instruction" << std::endl;
        }
      }
    }
  }

  // Use counts are only relevant pre-RA.
  if (!CurrentIR.PostRA()) {
    for (uint32_t i = 0; i < CurrentIR.GetSSACount(); i++) {
      auto [Node, IROp] = CurrentIR.at(IR::NodeID {i})();
      if (Node->NumUses != Uses[i] && IROp->Op != OP_CODEBLOCK && IROp->Op != OP_IRHEADER) {
        HadError |= true;
        Errors << "%" << i << " Has " << Uses[i] << " Uses, but reports " << Node->NumUses << std::endl;
      }
    }
  }

  HadWarning = false;
  if (HadError || HadWarning) {
    fextl::stringstream Out;
    FEXCore::IR::Dump(&Out, &CurrentIR);

    if (HadError) {
      Out << "Errors:" << std::endl << Errors.str() << std::endl;
    }

    if (HadWarning) {
      Out << "Warnings:" << std::endl << Warnings.str() << std::endl;
    }

    LogMan::Msg::EFmt("{}", Out.str());

    LOGMAN_MSG_A_FMT("Encountered IR validation Error");

    Errors.clear();
    Warnings.clear();
  }
}

fextl::unique_ptr<FEXCore::IR::Pass> CreateIRValidation() {
  return fextl::make_unique<IRValidation>();
}
} // namespace FEXCore::IR::Validation


================================================
FILE: FEXCore/Source/Interface/IR/Passes/IRValidation.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Common/BitSet.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>

namespace FEXCore::IR::Validation {

struct BlockInfo {
  bool HasExit;
  const OrderedNode* BlockNode;

  fextl::vector<OrderedNode*> Predecessors;
  fextl::vector<OrderedNode*> Successors;
};

class IRValidation final : public FEXCore::IR::Pass {
public:
  ~IRValidation();
  void Run(IREmitter* IREmit) override;

private:

  BitSet<uint64_t> NodeIsLive {};
  OrderedNode* EntryBlock {};
  fextl::unordered_map<IR::NodeID, BlockInfo> OffsetToBlockMap;
  size_t MaxNodes {};
};
} // namespace FEXCore::IR::Validation


================================================
FILE: FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
$end_info$
*/

#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/deque.h>
#include <FEXCore/fextl/vector.h>

// Flag bit flags
#define FLAG_V (1U << 0)
#define FLAG_C (1U << 1)
#define FLAG_Z (1U << 2)
#define FLAG_N (1U << 3)
#define FLAG_P (1U << 4)
#define FLAG_A (1U << 5)

#define FLAG_ZCV (FLAG_Z | FLAG_C | FLAG_V)
#define FLAG_NZCV (FLAG_N | FLAG_ZCV)
#define FLAG_ALL (FLAG_NZCV | FLAG_A | FLAG_P)

namespace FEXCore::IR {

struct FlagInfoUnpacked {
  // Set of flags read by the instruction.
  unsigned Read;

  // Set of flags written by the instruction. Happens AFTER the reads.
  unsigned Write;

  // If true, the instruction can be be eliminated if its flag writes can all be
  // eliminated.
  bool CanEliminate;

  // If set, the opcode can be replaced with Replacement if its flag writes can
  // all be eliminated, or ReplacementNoWrite if its register write can be
  // eliminated.
  IROps Replacement;
  IROps ReplacementNoWrite;

  // Needs speical handling
  bool Special;
};

struct FlagInfo {
  uint64_t Raw;

  static constexpr struct FlagInfo Pack(struct FlagInfoUnpacked F) {
    uint64_t R = F.Read | (F.Write << 8) | (F.CanEliminate << 16) | (((uint64_t)F.Replacement) << 32) |
                 ((uint64_t)F.ReplacementNoWrite << 48) | (F.Special ? (1ull << 63) : 0);
    return {.Raw = R};
  }

  bool Trivial() const {
    return Raw == 0;
  }

  unsigned Read() const {
    return Bits(0, 8);
  }

  unsigned Write() const {
    return Bits(8, 8);
  }

  bool CanEliminate() const {
    return Bits(16, 1);
  }

  bool Special() const {
    return Bits(63, 1);
  }

  IROps Replacement() const {
    return (IROps)Bits(32, 16);
  }

  IROps ReplacementNoWrite() const {
    return (IROps)Bits(48, 16);
  }

private:
  unsigned Bits(unsigned Start, unsigned Count) const {
    return (Raw >> Start) & ((1u << Count) - 1);
  }
};

struct BlockInfo {
  fextl::vector<uint32_t> Predecessors;
  Ref Node;
  uint8_t Flags;
  bool InWorklist;
};

struct ControlFlowGraph {
  fextl::vector<BlockInfo> BlockMap;
  IRListView& IR;

  void Init(fextl::deque<uint32_t>& Worklist, uint32_t BlockCount) {
    BlockMap.resize(BlockCount);

    for (unsigned ID = 0; ID < BlockCount; ++ID) {
      // Add the block with conservative flags and already in the worklist.
      auto Info = BlockInfo {{}, nullptr, FLAG_ALL, true};

      // Add some initial capacity
      Info.Predecessors.reserve(2);

      BlockMap[ID] = std::move(Info);
      Worklist.push_back(ID);
    }
  }

  BlockInfo* Get(uint32_t Block) {
    return &BlockMap[Block];
  }

  BlockInfo* Get(IROp_CodeBlock* Block) {
    return &BlockMap[Block->ID];
  }

  BlockInfo* Get(OrderedNodeWrapper Block) {
    return Get(IR.GetOp<IR::IROp_CodeBlock>(Block));
  }

  void RecordEdge(uint32_t From, OrderedNodeWrapper To) {
    auto Info = Get(To);
    Info->Predecessors.push_back(From);
  }

  void AddWorklist(fextl::deque<uint32_t>& Worklist, uint32_t Block) {
    auto Info = Get(Block);
    if (!Info->InWorklist) {
      Info->InWorklist = true;
      Worklist.push_front(Block);
    }
  }
};

class DeadFlagCalculationEliminination final : public FEXCore::IR::Pass {
public:
  void Run(IREmitter* IREmit) override;

private:
  FlagInfo Classify(IROp_Header* Node);
  unsigned FlagsForCondClassType(CondClass Cond);
  bool EliminateDeadCode(IREmitter* IREmit, Ref CodeNode, IROp_Header* IROp);
  void FoldBranch(IREmitter* IREmit, IRListView& CurrentIR, IROp_CondJump* Op, Ref CodeNode);
  CondClass X86ToArmFloatCond(CondClass X86);
  bool ProcessBlock(IREmitter* IREmit, IRListView& CurrentIR, Ref Block, ControlFlowGraph& CFG);
  void OptimizeParity(IREmitter* IREmit, IRListView& CurrentIR, ControlFlowGraph& CFG);
};

unsigned DeadFlagCalculationEliminination::FlagsForCondClassType(CondClass Cond) {
  switch (Cond) {
  case CondClass::AL: return 0;

  case CondClass::MI:
  case CondClass::PL: return FLAG_N;

  case CondClass::EQ:
  case CondClass::NEQ: return FLAG_Z;

  case CondClass::UGE:
  case CondClass::ULT: return FLAG_C;

  case CondClass::VS:
  case CondClass::VC:
  case CondClass::FU:
  case CondClass::FNU: return FLAG_V;

  case CondClass::UGT:
  case CondClass::ULE: return FLAG_Z | FLAG_C;

  case CondClass::SGE:
  case CondClass::SLT:
  case CondClass::FLU:
  case CondClass::FGE: return FLAG_N | FLAG_V;

  case CondClass::SGT:
  case CondClass::SLE:
  case CondClass::FLEU:
  case CondClass::FGT: return FLAG_N | FLAG_Z | FLAG_V;

  default: LOGMAN_THROW_A_FMT(false, "unknown cond class type"); return FLAG_NZCV;
  }
}

constexpr FlagInfo ClassifyConst(IROps Op) {
  switch (Op) {
  case OP_ANDWITHFLAGS:
    return FlagInfo::Pack({
      .Write = FLAG_NZCV,
      .Replacement = OP_AND,
      .ReplacementNoWrite = OP_TESTNZ,
    });

  case OP_ADDWITHFLAGS:
    return FlagInfo::Pack({
      .Write = FLAG_NZCV,
      .Replacement = OP_ADD,
      .ReplacementNoWrite = OP_ADDNZCV,
    });

  case OP_SUBWITHFLAGS:
    return FlagInfo::Pack({
      .Write = FLAG_NZCV,
      .Replacement = OP_SUB,
      .ReplacementNoWrite = OP_SUBNZCV,
    });

  case OP_ADCWITHFLAGS:
    return FlagInfo::Pack({
      .Read = FLAG_C,
      .Write = FLAG_NZCV,
      .Replacement = OP_ADC,
      .ReplacementNoWrite = OP_ADCNZCV,
    });

  case OP_ADCZEROWITHFLAGS:
    return FlagInfo::Pack({
      .Read = FLAG_C,
      .Write = FLAG_NZCV,
      .Replacement = OP_ADCZERO,
    });

  case OP_SBBWITHFLAGS:
    return FlagInfo::Pack({
      .Read = FLAG_C,
      .Write = FLAG_NZCV,
      .Replacement = OP_SBB,
      .ReplacementNoWrite = OP_SBBNZCV,
    });

  case OP_SHIFTFLAGS:
    // _ShiftFlags conditionally sets NZCV+PF, which we model here as a
    // read-modify-write. Logically, it also conditionally makes AF undefined,
    // which we model by omitting AF from both Read and Write sets (since
    // "cond ? AF : undef" may be optimized to "AF").
    return FlagInfo::Pack({
      .Read = FLAG_NZCV | FLAG_P,
      .Write = FLAG_NZCV | FLAG_P,
      .CanEliminate = true,
    });

  case OP_ROTATEFLAGS:
    // _RotateFlags conditionally sets CV, again modeled as RMW.
    return FlagInfo::Pack({
      .Read = FLAG_C | FLAG_V,
      .Write = FLAG_C | FLAG_V,
      .CanEliminate = true,
    });

  case OP_RDRAND: return FlagInfo::Pack({.Write = FLAG_NZCV});

  case OP_ADDNZCV:
  case OP_SUBNZCV:
  case OP_TESTNZ:
  case OP_FCMP:
  case OP_STORENZCV:
    return FlagInfo::Pack({
      .Write = FLAG_NZCV,
      .CanEliminate = true,
    });

  case OP_AXFLAG:
    // Per the Arm spec, axflag reads Z/V/C but not N. It writes all flags.
    return FlagInfo::Pack({
      .Read = FLAG_ZCV,
      .Write = FLAG_NZCV,
      .CanEliminate = true,
    });

  case OP_CMPPAIRZ:
    return FlagInfo::Pack({
      .Write = FLAG_Z,
      .CanEliminate = true,
    });

  case OP_CARRYINVERT:
    return FlagInfo::Pack({
      .Read = FLAG_C,
      .Write = FLAG_C,
      .CanEliminate = true,
    });

  case OP_SETSMALLNZV:
    return FlagInfo::Pack({
      .Write = FLAG_N | FLAG_Z | FLAG_V,
      .CanEliminate = true,
    });

  case OP_LOADNZCV: return FlagInfo::Pack({.Read = FLAG_NZCV});

  case OP_ADC:
  case OP_ADCZERO:
  case OP_SBB: return FlagInfo::Pack({.Read = FLAG_C});

  case OP_ADCNZCV:
  case OP_SBBNZCV:
    return FlagInfo::Pack({
      .Read = FLAG_C,
      .Write = FLAG_NZCV,
      .CanEliminate = true,
    });

  case OP_LOADPF: return FlagInfo::Pack({.Read = FLAG_P});
  case OP_LOADAF: return FlagInfo::Pack({.Read = FLAG_A});
  case OP_STOREPF: return FlagInfo::Pack({.Write = FLAG_P, .CanEliminate = true});
  case OP_STOREAF: return FlagInfo::Pack({.Write = FLAG_A, .CanEliminate = true});

  case OP_NZCVSELECT:
  case OP_NZCVSELECTV:
  case OP_NZCVSELECTINCREMENT:
  case OP_NEG:
  case OP_CONDJUMP:
  case OP_CONDSUBNZCV:
  case OP_CONDADDNZCV:
  case OP_RMIFNZCV:
  case OP_INVALIDATEFLAGS: return FlagInfo::Pack({.Special = true});
  default: return FlagInfo::Pack({});
  }
}

constexpr auto FlagInfos = std::invoke([] {
  std::array<FlagInfo, OP_LAST> ret = {};

  for (unsigned i = 0; i < OP_LAST; ++i) {
    ret[i] = ClassifyConst((IROps)i);
  }

  return ret;
});

FlagInfo DeadFlagCalculationEliminination::Classify(IROp_Header* IROp) {
  FlagInfo Info = FlagInfos[IROp->Op];
  if (!Info.Special()) {
    return Info;
  }

  switch (IROp->Op) {
  case OP_NZCVSELECT:
  case OP_NZCVSELECTINCREMENT: {
    auto Op = IROp->CW<IR::IROp_NZCVSelect>();
    return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
  }

  case OP_NZCVSELECTV: {
    auto Op = IROp->CW<IR::IROp_NZCVSelectV>();
    return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
  }

  case OP_NEG: {
    auto Op = IROp->CW<IR::IROp_Neg>();
    return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
  }

  case OP_CONDJUMP: {
    auto Op = IROp->CW<IR::IROp_CondJump>();
    if (!Op->FromNZCV) {
      return FlagInfo::Pack({});
    }

    return FlagInfo::Pack({.Read = FlagsForCondClassType(Op->Cond)});
  }

  case OP_CONDSUBNZCV:
  case OP_CONDADDNZCV: {
    auto Op = IROp->CW<IR::IROp_CondAddNZCV>();
    return FlagInfo::Pack({
      .Read = FlagsForCondClassType(Op->Cond),
      .Write = FLAG_NZCV,
      .CanEliminate = true,
    });
  }

  case OP_RMIFNZCV: {
    auto Op = IROp->CW<IR::IROp_RmifNZCV>();

    static_assert(FLAG_N == (1 << 3), "rmif mask lines up with our bits");
    static_assert(FLAG_Z == (1 << 2), "rmif mask lines up with our bits");
    static_assert(FLAG_C == (1 << 1), "rmif mask lines up with our bits");
    static_assert(FLAG_V == (1 << 0), "rmif mask lines up with our bits");

    return FlagInfo::Pack({
      .Write = Op->Mask,
      .CanEliminate = true,
    });
  }

  case OP_INVALIDATEFLAGS: {
    auto Op = IROp->CW<IR::IROp_InvalidateFlags>();
    unsigned Flags = 0;

    // TODO: Make this translation less silly
    if (Op->Flags & (1u << X86State::RFLAG_SF_RAW_LOC)) {
      Flags |= FLAG_N;
    }

    if (Op->Flags & (1u << X86State::RFLAG_ZF_RAW_LOC)) {
      Flags |= FLAG_Z;
    }

    if (Op->Flags & (1u << X86State::RFLAG_CF_RAW_LOC)) {
      Flags |= FLAG_C;
    }

    if (Op->Flags & (1u << X86State::RFLAG_OF_RAW_LOC)) {
      Flags |= FLAG_V;
    }

    if (Op->Flags & (1u << X86State::RFLAG_PF_RAW_LOC)) {
      Flags |= FLAG_P;
    }

    if (Op->Flags & (1u << X86State::RFLAG_AF_RAW_LOC)) {
      Flags |= FLAG_A;
    }

    // The mental model of InvalidateFlags is writing undefined values to all
    // of the selected flags, allowing the write-after-write optimizations to
    // optimize invalidate-after-write for free.
    return FlagInfo::Pack({
      .Write = Flags,
      .CanEliminate = true,
    });
  }

  default: LOGMAN_THROW_A_FMT(false, "invalid special op"); FEX_UNREACHABLE;
  }

  FEX_UNREACHABLE;
}

// General purpose dead code elimination. Returns whether flag handling should
// be skipped (because it was removed or could not possibly affect flags).
bool DeadFlagCalculationEliminination::EliminateDeadCode(IREmitter* IREmit, Ref CodeNode, IROp_Header* IROp) {
  // Can't remove anything used or with side effects.
  if (CodeNode->GetUses() > 0 || IR::HasSideEffects(IROp->Op)) {
    return false;
  }

  IREmit->Remove(CodeNode);
  return true;
}

CondClass DeadFlagCalculationEliminination::X86ToArmFloatCond(CondClass X86) {
  // Table of x86 condition codes that map to arm64 condition codes, in the
  // sense that fcmp+axflag+branch(x86) is equivalent to fcmp+branch(arm).
  //
  // E would be "equal or unordered", no condition code.
  // G would be "greater than or less than", no condition code.
  //
  // SF/OF conditions are trivial and therefore shouldn't actually be generated
  switch (X86) {
  case CondClass::UGE /* A  */: return CondClass::FGE /* GE */;
  case CondClass::UGT /* AE */: return CondClass::FGT /* GT */;
  case CondClass::ULT /* B  */: return CondClass::SLT /* LT */;
  case CondClass::ULE /* BE */: return CondClass::SLE /* LE */;
  case CondClass::SLE /* LE */: return CondClass::SLE /* LE */;
  default: return CondClass::AL;
  }
}

void DeadFlagCalculationEliminination::FoldBranch(IREmitter* IREmit, IRListView& CurrentIR, IROp_CondJump* Op, Ref CodeNode) {
  // Skip past StoreRegisters at the end -- they don't touch flags.
  auto PrevWrap = CodeNode->Header.Previous;
  while (CurrentIR.GetOp<IR::IROp_Header>(PrevWrap)->Op == OP_STOREREGISTER ||
         CurrentIR.GetOp<IR::IROp_Header>(PrevWrap)->Op == OP_STOREPF || CurrentIR.GetOp<IR::IROp_Header>(PrevWrap)->Op == OP_STOREAF) {
    PrevWrap = CurrentIR.GetNode(PrevWrap)->Header.Previous;
  }

  auto Prev = CurrentIR.GetOp<IR::IROp_Header>(PrevWrap);
  if (Prev->Op == OP_AXFLAG) {
    // Pattern match a branch fed by AXFLAG.
    CondClass ArmCond = X86ToArmFloatCond(Op->Cond);
    if (ArmCond == CondClass::AL) {
      return;
    }

    Op->Cond = ArmCond;
  } else if (Prev->Op == OP_SUBNZCV) {
    // Pattern match a branch fed by a compare. We could also handle bit tests
    // here, but tbz/tbnz has a limited offset range which we don't have a way to
    // deal with yet. Let's hope that's not a big deal.
    if (!(Op->Cond == CondClass::NEQ || Op->Cond == CondClass::EQ) || (Prev->Size < OpSize::i32Bit)) {
      return;
    }

    auto SecondArg = CurrentIR.GetOp<IR::IROp_Header>(Prev->Args[1]);
    if (SecondArg->Op != OP_INLINECONSTANT || SecondArg->C<IR::IROp_InlineConstant>()->Constant != 0) {
      return;
    }

    // We've matched. Fold the compare into branch.
    IREmit->ReplaceNodeArgument(CodeNode, 0, CurrentIR.GetNode(Prev->Args[0]));
    IREmit->ReplaceNodeArgument(CodeNode, 1, CurrentIR.GetNode(Prev->Args[1]));
    Op->FromNZCV = false;
    Op->CompareSize = Prev->Size;
  } else {
    return;
  }

  // The compare/test/axflag sets flags but does not write registers. Flags are
  // dead after the jump. The jump does not read flags anymore.  There is no
  // intervening instruction. Therefore the compare is dead.
  IREmit->Remove(CurrentIR.GetNode(PrevWrap));
}

/**
 * @brief This pass removes dead code locally.
 */
bool DeadFlagCalculationEliminination::ProcessBlock(IREmitter* IREmit, IRListView& CurrentIR, Ref Block, ControlFlowGraph& CFG) {
  uint32_t FlagsRead = FLAG_ALL;

  // Reverse iteration is not yet working with the iterators
  auto BlockIROp = CurrentIR.GetOp<IR::IROp_CodeBlock>(Block);

  // We grab these nodes this way so we can iterate easily
  auto CodeBegin = CurrentIR.at(BlockIROp->Begin);
  auto CodeLast = CurrentIR.at(BlockIROp->Last);

  // Advance past EndBlock to get at the exit.
  --CodeLast;

  // Initialize the FlagsRead mask according to the exit instruction.
  auto [ExitNode, ExitOp] = CodeLast();
  if (ExitOp->Op == IR::OP_CONDJUMP) {
    auto Op = ExitOp->CW<IR::IROp_CondJump>();
    FlagsRead = CFG.Get(Op->TrueBlock)->Flags | CFG.Get(Op->FalseBlock)->Flags;
  } else if (ExitOp->Op == IR::OP_JUMP) {
    FlagsRead = CFG.Get(ExitOp->Args[0])->Flags;
  }

  // Iterate the block in reverse
  while (true) {
    auto [CodeNode, IROp] = CodeLast();

    // Optimizing flags can cause earlier flag reads to become dead but dead
    // flag reads should not impede optimiation of earlier dead flag writes.
    // We must DCE as we go to ensure we converge in a single iteration.
    if (!EliminateDeadCode(IREmit, CodeNode, IROp)) {
      // Optimiation algorithm: For each flag written...
      //
      //  If the flag has a later read (per FlagsRead), remove the flag from
      //  FlagsRead, since the reader is covered by this write.
      //
      //  Else, there is no later read, so remove the flag write (if we can).
      //  This is the active part of the optimization.
      //
      // Then, add each flag read to FlagsRead.
      //
      // This order is important: instructions that read-modify-write flags
      // (like adcs) first read flags, then write flags. Since we're iterating
      // the block backwards, that means we handle the write first.
      struct FlagInfo Info = Classify(IROp);

      if (!Info.Trivial()) {
        bool Eliminated = false;

        if ((FlagsRead & Info.Write()) == 0) {
          if ((Info.CanEliminate() || Info.Replacement()) && CodeNode->GetUses() == 0) {
            IREmit->Remove(CodeNode);
            Eliminated = true;
          } else if (Info.Replacement()) {
            IROp->Op = Info.Replacement();
          }
        } else if (Info.ReplacementNoWrite() && CodeNode->GetUses() == 0) {
          IROp->Op = Info.ReplacementNoWrite();
        }

        // If we don't care about the sign or carry, we can optimize testnz.
        // Carry is inverted between testz and testnz so we check that too. Note
        // this flag is outside of the if, since the TestNZ might result from
        // optimizing AndWithFlags, and we need to converge locally in a single
        // iteration.
        if (IROp->Op == OP_TESTNZ && IROp->Size < OpSize::i32Bit && !(FlagsRead & (FLAG_N | FLAG_C))) {
          IROp->Op = OP_TESTZ;
        }

        FlagsRead &= ~Info.Write();

        // If we eliminated the instruction, we eliminate its read too. This
        // check is required to ensure the pass converges locally in a single
        // iteration.
        if (!Eliminated) {
          FlagsRead |= Info.Read();
        }
      }
    }

    // Iterate in reverse
    if (CodeLast == CodeBegin) {
      break;
    }
    --CodeLast;
  }

  // For the purposes of global propagation, the content of our progress doesn't
  // matter -- only the difference in our final FlagsRead contributes to changes
  // in the predecessors.
  uint32_t OldFlagsRead = CFG.Get(BlockIROp->ID)->Flags;
  CFG.Get(BlockIROp->ID)->Flags = FlagsRead;
  return (OldFlagsRead != FlagsRead);
}

void DeadFlagCalculationEliminination::OptimizeParity(IREmitter* IREmit, IRListView& CurrentIR, ControlFlowGraph& CFG) {
  // Mapping for flags inside this pass.
  const uint8_t PARTIAL = 0;
  const uint8_t FULL = 1;

  // Initialize conservatively: all blocks need full parity. This initialization
  // matters for proper handling of backedges.
  for (auto [Block, BlockHeader] : CurrentIR.GetBlocks()) {
    auto ID = BlockHeader->C<IROp_CodeBlock>()->ID;
    CFG.Get(ID)->Flags = FULL;
  }

  for (auto [Block, BlockHeader] : CurrentIR.GetBlocks()) {
    const auto ID = BlockHeader->C<IROp_CodeBlock>()->ID;
    const auto& Predecessors = CFG.Get(ID)->Predecessors;
    bool Full = false;

    if (Predecessors.empty()) {
      // Conservatively assume there was full parity before the start block
      Full = true;
    } else {
      // If any predecessor needs full parity at the end, we need full parity.
      for (auto Pred : Predecessors) {
        Full |= (CFG.Get(Pred)->Flags == FULL);
      }
    }

    for (auto [CodeNode, IROp] : CurrentIR.GetCode(Block)) {
      if (IROp->Op == OP_STOREPF) {
        auto Op = IROp->CW<IR::IROp_StorePF>();
        auto Generator = CurrentIR.GetOp<IR::IROp_Header>(Op->Value);

        // Determine if we only write 0/1 to the parity flag.
        Full = true;
        if (Generator->Op == OP_NZCVSELECT) {
          auto C0 = CurrentIR.GetOp<IR::IROp_Header>(Generator->Args[0]);
          auto C1 = CurrentIR.GetOp<IR::IROp_Header>(Generator->Args[1]);
          if (C0->Op == C1->Op && C0->Op == OP_INLINECONSTANT) {
            auto IC0 = CurrentIR.GetOp<IR::IROp_InlineConstant>(Generator->Args[0]);
            auto IC1 = CurrentIR.GetOp<IR::IROp_InlineConstant>(Generator->Args[1]);

            // We need the full 8 if the constant has upper bits set.
            Full = (IC0->Constant | IC1->Constant) & ~1;
          }
        }
      } else if (IROp->Op == OP_PARITY && !Full) {
        // Eliminate parity calculations if it's only 1-bit.
        auto Parity = IROp->C<IROp_Parity>();
        Ref Value = CurrentIR.GetNode(Parity->Raw);

        if (Parity->Invert) {
          IREmit->SetWriteCursor(CodeNode);
          Value = IREmit->_Xor(OpSize::i32Bit, Value, IREmit->_InlineConstant(1));
        }

        IREmit->ReplaceUsesWithAfter(CodeNode, Value, CurrentIR.at(CodeNode));
        IREmit->Remove(CodeNode);
      }
    }

    // Record our final state for our successors to read.
    CFG.Get(ID)->Flags = Full ? FULL : PARTIAL;
  }
}

void DeadFlagCalculationEliminination::Run(IREmitter* IREmit) {
  FEXCORE_PROFILE_SCOPED("PassManager::DFE");

  auto CurrentIR = IREmit->ViewIR();
  fextl::deque<uint32_t> Worklist;

  // Initialize CFG
  ControlFlowGraph CFG {.IR = CurrentIR};
  CFG.Init(Worklist, CurrentIR.GetHeader()->BlockCount);

  // Gather CFG
  for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
    auto Block = BlockHeader->C<IROp_CodeBlock>();
    auto CodeLast = CurrentIR.at(Block->Last);
    --CodeLast;
    auto [ExitNode, ExitOp] = CodeLast();
    if (ExitOp->Op == IR::OP_CONDJUMP) {
      auto Op = ExitOp->CW<IR::IROp_CondJump>();

      CFG.RecordEdge(Block->ID, Op->TrueBlock);
      CFG.RecordEdge(Block->ID, Op->FalseBlock);
    } else if (ExitOp->Op == IR::OP_JUMP) {
      CFG.RecordEdge(Block->ID, ExitOp->Args[0]);
    }

    CFG.Get(Block->ID)->Node = BlockNode;
  }

  // After processing a block, if we made progress, we must process its
  // predecessors to propagate globally. A block will be reprocessed only if
  // there is a loop backedge.
  for (; !Worklist.empty(); Worklist.pop_back()) {
    auto Block = Worklist.back();
    auto Info = CFG.Get(Block);
    Info->InWorklist = false;

    if (ProcessBlock(IREmit, CurrentIR, Info->Node, CFG)) {
      for (auto Pred : Info->Predecessors) {
        CFG.AddWorklist(Worklist, Pred);
      }
    }
  }

  // Fold compares into branches now that we're otherwise optimized. This needs
  // to run after eliminating carries etc and it needs the global flag metadata.
  // But it only needs to run once, we don't do it in the loop.
  for (auto [Block, _] : CurrentIR.GetBlocks()) {
    // Grab the jump
    auto BlockIROp = CurrentIR.GetOp<IR::IROp_CodeBlock>(Block);
    auto CodeLast = CurrentIR.at(BlockIROp->Last);
    --CodeLast;

    auto [ExitNode, ExitOp] = CodeLast();
    if (ExitOp->Op == IR::OP_CONDJUMP) {
      auto Op = ExitOp->CW<IR::IROp_CondJump>();
      uint32_t FlagsOut = CFG.Get(Op->TrueBlock)->Flags | CFG.Get(Op->FalseBlock)->Flags;

      if ((FlagsOut & FLAG_NZCV) == 0 && Op->FromNZCV) {
        FoldBranch(IREmit, CurrentIR, Op, ExitNode);
      }
    }
  }

  if (CurrentIR.GetHeader()->ReadsParity) {
    OptimizeParity(IREmit, CurrentIR, CFG);
  }
}

fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination() {
  return fextl::make_unique<DeadFlagCalculationEliminination>();
}

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
$end_info$
*/

#include "Interface/IR/Passes/RegisterAllocationPass.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/RegisterAllocationData.h"
#include "Interface/IR/Passes.h"
#include "Interface/Core/CPUID.h"
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/vector.h>
#include <bit>
#include <cstdint>

using namespace FEXCore;

namespace FEXCore::IR {
namespace {
  struct RegisterClassData {
    uint32_t Available;
    uint32_t Count;

    // If bit R of Available is 0, then RegToSSA[R] is the node currently
    // allocated to R. Else, RegToSSA[R] is UNDEFINED, no need to clear this
    // when freeing registers.
    Ref RegToSSA[32];
  };

  IR::RegClass GetRegClassFromNode(IR::IRListView* IR, IR::IROp_Header* IROp) {
    const auto Class = IR::GetRegClass(IROp->Op);
    if (Class != IR::RegClass::Complex) {
      return Class;
    }

    // Complex register class handling
    switch (IROp->Op) {
    case IR::OP_LOADCONTEXT: return IROp->C<IR::IROp_LoadContext>()->Class;
    case IR::OP_LOADREGISTER: return IROp->C<IR::IROp_LoadRegister>()->Class;
    case IR::OP_LOADCONTEXTINDEXED: return IROp->C<IR::IROp_LoadContextIndexed>()->Class;
    case IR::OP_LOADMEM:
    case IR::OP_LOADMEMTSO: return IROp->C<IR::IROp_LoadMem>()->Class;
    case IR::OP_FILLREGISTER: return IROp->C<IR::IROp_FillRegister>()->Class;
    default: return IR::RegClass::Invalid;
    }
  };
} // Anonymous namespace

class ConstrainedRAPass final : public RegisterAllocationPass {
public:
  explicit ConstrainedRAPass(const FEXCore::CPUIDEmu* CPUID)
    : CPUID {CPUID} {}
  void Run(IREmitter* IREmit) override;
  void AddRegisters(IR::RegClass Class, uint32_t RegisterCount) override;
  bool TryPostRAMerge(Ref LastNode, Ref CodeNode, IROp_Header* IROp);

private:
  RegisterClassData Classes[IR::NumClasses];

  IREmitter* IREmit {};
  IRListView* IR {};
  const FEXCore::CPUIDEmu* CPUID {};

  // Map of nodes to their preferred register, to coalesce load/store reg.
  fextl::vector<PhysicalRegister> PreferredReg;

  // Map of assigned registers. Does not grow beyond the initial set.
  fextl::vector<PhysicalRegister> SSAToReg;

  // Maps defs to their assigned spill slot + 1, or 0 if not spilled.
  fextl::vector<unsigned> SpillSlots;

  // Next-use distance relative to the block end of each source, last first.
  fextl::vector<uint32_t> SourcesNextUses;

  // Sources that have been seen
  fextl::vector<bool> Seen;

  // SourcesNextUses is read backwards, this tracks the index
  int64_t SourceIndex {};

  bool Rematerializable(IROp_Header* IROp) {
    return IROp->Op == OP_CONSTANT;
  }

  Ref InsertFill(Ref Node) {
    IROp_Header* IROp = IR->GetOp<IROp_Header>(Node);

    // Remat if we can
    if (Rematerializable(IROp)) {
      const auto Op = IROp->C<IR::IROp_Constant>();
      uint64_t Const = Op->Constant;
      return IREmit->_Constant(Const, Op->Pad, Op->MaxBytes);
    }

    // Otherwise fill from stack
    uint32_t SlotPlusOne = SpillSlots[IR->GetID(Node).Value];
    LOGMAN_THROW_A_FMT(SlotPlusOne >= 1, "Node must have been spilled");

    const auto RegClass = GetRegClassFromNode(IR, IROp);
    return IREmit->_FillRegister(IROp->Size, IROp->ElementSize, SlotPlusOne - 1, RegClass);
  };

  // IP of next-use of each source. IPs are measured from the end of the
  // block, so we don't need to size the block up-front.
  fextl::vector<uint32_t> NextUses;

  bool AnySpilled {};

  bool IsValidArg(OrderedNodeWrapper Arg) {
    if (Arg.IsInvalid()) {
      return false;
    }

    auto Op = IR->GetOp<IROp_Header>(Arg)->Op;
    return Op != OP_INLINECONSTANT && Op != OP_INLINEENTRYPOINTOFFSET;
  };

  RegisterClassData* GetClass(PhysicalRegister Reg) {
    return &Classes[Reg.Class];
  };

  uint32_t GetRegBits(PhysicalRegister Reg) {
    return 1 << Reg.Reg;
  };

  bool IsInRegisterFile(Ref Node) {
    auto ID = IR->GetID(Node).Value;
    LOGMAN_THROW_A_FMT(ID < SSAToReg.size(), "Only old nodes looked up");

    PhysicalRegister Reg = SSAToReg[ID];
    RegisterClassData* Class = GetClass(Reg);

    return (Class->Available & GetRegBits(Reg)) == 0 && Class->RegToSSA[Reg.Reg] == Node;
  };

  void FreeReg(PhysicalRegister Reg) {
    RegisterClassData* Class = GetClass(Reg);
    uint32_t RegBits = GetRegBits(Reg);

    LOGMAN_THROW_A_FMT(!(Class->Available & RegBits), "Register double-free");

    Class->Available |= RegBits;
  };

  bool HasSource(IROp_Header* I, PhysicalRegister Reg) {
    int NumArgs = IR::GetRAArgs(I->Op);
    for (int s = 0; s < NumArgs; ++s) {
      if (I->Args[s].IsImmediate()) {
        // When spilling for a destination, we'll see register sources
        if (PhysicalRegister(I->Args[s]) == Reg) {
          return true;
        }
      } else {
        // When spilling for SRA correctness, we'll see SSA sources. This is
        // pretty obscure.
        auto V = I->Args[s];
        V.ClearKill();

        if (IsValidArg(V) && SSAToReg[V.ID().Value] == Reg) {
          return true;
        }
      }
    }

    return false;
  };

  Ref DecodeSRANode(const IROp_Header* IROp, Ref Node) {
    if (IROp->Op == OP_LOADREGISTER || IROp->Op == OP_LOADPF || IROp->Op == OP_LOADAF) {
      return Node;
    } else if (IROp->Op == OP_STOREREGISTER) {
      auto V = IROp->C<IR::IROp_StorePF>()->Value;
      V.ClearKill();
      return IR->GetNode(V);
    } else if (IROp->Op == OP_STOREPF || IROp->Op == OP_STOREAF) {
      auto V = IROp->C<IR::IROp_StorePF>()->Value;
      V.ClearKill();
      return IR->GetNode(V);
    }

    return nullptr;
  };

  PhysicalRegister DecodeSRAReg(const IROp_Header* IROp, Ref Node) {
    uint8_t FlagOffset = Classes[FEXCore::ToUnderlying(RegClass::GPRFixed)].Count - 2;

    if (IROp->Op == OP_STOREREGISTER) {
      return PhysicalRegister(Node);
    } else if (IROp->Op == OP_LOADPF || IROp->Op == OP_STOREPF) {
      return PhysicalRegister {RegClass::GPRFixed, FlagOffset};
    } else if (IROp->Op == OP_LOADAF || IROp->Op == OP_STOREAF) {
      return PhysicalRegister {RegClass::GPRFixed, uint8_t(FlagOffset + 1)};
    } else {
      const IROp_LoadRegister* Op = IROp->C<IR::IROp_LoadRegister>();

      LOGMAN_THROW_A_FMT(Op->Class == RegClass::GPR || Op->Class == RegClass::FPR, "SRA classes");
      if (Op->Class == RegClass::FPR) {
        return PhysicalRegister {RegClass::FPRFixed, uint8_t(Op->Reg)};
      } else {
        return PhysicalRegister {RegClass::GPRFixed, uint8_t(Op->Reg)};
      }
    }
  };

  bool IsTrivial(Ref Node, const IROp_Header* Header) {
    switch (Header->Op) {
    case OP_ALLOCATEGPR: return true;
    case OP_ALLOCATEGPRAFTER: return true;
    case OP_ALLOCATEFPR: return true;
    case OP_RMWHANDLE: return PhysicalRegister(Node) == PhysicalRegister(Header->Args[0]);
    case OP_LOADREGISTER: return PhysicalRegister(Node) == DecodeSRAReg(Header, Node);
    case OP_STOREREGISTER: return PhysicalRegister(Header->Args[0]) == DecodeSRAReg(Header, Node);
    default: return false;
    }
  }

  // Helper macro to walk the set bits b in a 32-bit word x, using ffs to get
  // the next set bit and then clearing on each iteration.
#define foreach_bit(b, x) for (uint32_t __x = (x), b; ((b) = __builtin_ffs(__x) - 1, __x); __x &= ~(1 << (b)))

  void CalculateNextUses(IROp_CodeBlock* BlockIROp, IROp_Header* Until) {
    SourcesNextUses.clear();
    NextUses.resize(IR->GetSSACount(), 0);

    // IP relative to the end of the block.
    uint32_t IP = 1;

    // We grab these nodes this way so we can iterate easily
    auto CodeBegin = IR->at(BlockIROp->Begin);
    auto CodeLast = IR->at(BlockIROp->Last);

    while (1) {
      auto [CodeNode, IROp] = CodeLast();
      if (IROp == Until) {
        break;
      }
      // End of iteration gunk

      const int NumArgs = IR::GetRAArgs(IROp->Op);
      for (int i = NumArgs - 1; i >= 0; --i) {
        auto V = IROp->Args[i];
        V.ClearKill();

        if (IsValidArg(V)) {
          const uint32_t Index = V.ID().Value;

          SourcesNextUses.push_back(NextUses[Index]);
          NextUses[Index] = IP;
        }
      }

      // IP is relative to block end and we iterate backwards, so increment.
      ++IP;

      // Rest is iteration gunk
      if (CodeLast == CodeBegin) {
        break;
      }
      --CodeLast;
    }

    SourceIndex = SourcesNextUses.size();
  }

  void SpillReg(RegisterClassData* Class, IROp_CodeBlock* Block, IROp_Header* Exclude) {
    // We're about to use next-use information, so calculate it.
    if (!AnySpilled) {
      CalculateNextUses(Block, Exclude);
    }

    // Find the best node to spill according to the "furthest-first" heuristic.
    // Since we defined IPs relative to the end of the block, the furthest
    // next-use has the /smallest/ unsigned IP.
    Ref Candidate = nullptr;
    uint32_t BestDistance = UINT32_MAX;
    uint8_t BestReg = ~0;
    uint32_t Allocated = ((1u << Class->Count) - 1) & ~Class->Available;

    foreach_bit(i, Allocated) {
      Ref Node = Class->RegToSSA[i];
      auto Reg = SSAToReg[IR->GetID(Node).Value];

      LOGMAN_THROW_A_FMT(Node != nullptr, "Invariant3");
      LOGMAN_THROW_A_FMT(Reg.Reg == i, "Invariant4");

      // Skip any source used by the current instruction, it is unspillable.
      if (!HasSource(Exclude, Reg)) {
        uint32_t NextUse = NextUses[IR->GetID(Node).Value];

        // Prioritize remat over spilling. It is typically cheaper to remat a
        // constant multiple times than to spill a single value.
        if (!Rematerializable(IR->GetOp<IROp_Header>(Node))) {
          NextUse += 100000;
        }

        if (NextUse < BestDistance) {
          BestDistance = NextUse;
          BestReg = i;
          Candidate = Node;
        }
      }
    }

    LOGMAN_THROW_A_FMT(Candidate != nullptr, "must've found something..");

    PhysicalRegister Reg = SSAToReg[IR->GetID(Candidate).Value];
    LOGMAN_THROW_A_FMT(Reg.Reg == BestReg, "Invariant6");

    IROp_Header* Header = IR->GetOp<IROp_Header>(Candidate);
    uint32_t Value = IR->GetID(Candidate).Value;
    bool Spilled = !SpillSlots.empty() && SpillSlots[Value] != 0;

    // If we already spilled the Candidate, we don't need to spill again.
    // Similarly, if we can rematerialize the instruction, we don't spill it.
    if (!Spilled && Header->Op != OP_CONSTANT) {
      LOGMAN_THROW_A_FMT(Reg.AsRegClass() == GetRegClassFromNode(IR, Header), "Consistent");

      // SpillSlots allocation is deferred.
      if (SpillSlots.empty()) {
        SpillSlots.resize(IR->GetSSACount(), 0);
      }

      // TODO: we should colour spill slots
      uint32_t Slot = IR->GetHeader()->SpillSlots++;

      // We must map here in case we're spilling something we shuffled.
      auto SpillOp = IREmit->_SpillRegister(OrderedNodeWrapper::FromImmediate(Reg.Raw), Slot, Reg.AsRegClass());
      SpillOp.first->Header.Size = Header->Size;
      SpillOp.first->Header.ElementSize = Header->ElementSize;
      SpillSlots[Value] = Slot + 1;
    }

    // Now that we've spilled the value, take it out of the register file
    FreeReg(Reg);
    AnySpilled = true;
  };

  void RemapReg(Ref Node, PhysicalRegister Reg) {
    RegisterClassData* Class = GetClass(Reg);
    Class->RegToSSA[Reg.Reg] = Node;

    uint32_t Index = IR->GetID(Node).Value;
    if (Index < SSAToReg.size()) {
      SSAToReg[Index] = Reg;
    }
  };

  // Record a given assignment of register Reg to Node.
  void SetReg(Ref Node, PhysicalRegister Reg) {
    RegisterClassData* Class = GetClass(Reg);
    uint32_t RegBits = GetRegBits(Reg);

    LOGMAN_THROW_A_FMT((Class->Available & RegBits) == RegBits, "Precondition");

    Class->Available &= ~RegBits;

    RemapReg(Node, Reg);
    Node->Reg = Reg.Raw;
  };

  // Assign a register for a given Node, spilling if necessary.
  void AssignReg(IROp_Header* IROp, IROp_CodeBlock* Block, Ref CodeNode, IROp_Header* Pivot) {
    const uint32_t Node = IR->GetID(CodeNode).Value;

    // Prioritize preferred registers.
    if (Node < PreferredReg.size()) {
      if (PhysicalRegister Reg = PreferredReg[Node]; !Reg.IsInvalid()) {
        RegisterClassData* Class = GetClass(Reg);
        uint32_t RegBits = GetRegBits(Reg);

        if ((Class->Available & RegBits) == RegBits) {
          SetReg(CodeNode, Reg);
          return;
        }
      }
    }

    // Try to handle tied registers. This can fail, the JIT will insert moves.
    if (int TiedIdx = IR::TiedSource(IROp->Op); TiedIdx >= 0) {
      auto Reg = PhysicalRegister(IROp->Args[TiedIdx]);
      RegisterClassData* Class = GetClass(Reg);
      uint32_t RegBits = GetRegBits(Reg);

      if (Reg.AsRegClass() != RegClass::GPRFixed && Reg.AsRegClass() != RegClass::FPRFixed && (Class->Available & RegBits) == RegBits) {
        SetReg(CodeNode, Reg);
        return;
      }
    }

    // Try to coalesce reserved pairs. Just a heuristic to remove some moves.
    if (IROp->Op == OP_ALLOCATEGPR && IROp->C<IROp_AllocateGPR>()->ForPair) {
      uint32_t Available = Classes[FEXCore::ToUnderlying(RegClass::GPR)].Available;

      // Only choose base register R if R and R + 1 are both free
      Available &= (Available >> 1);

      // Only consider aligned registers in the pair region
      constexpr uint32_t EVEN_BITS = 0x55555555;
      Available &= (EVEN_BITS & ((1u << PairRegs) - 1));

      if (Available) {
        unsigned Reg = std::countr_zero(Available);
        SetReg(CodeNode, PhysicalRegister(RegClass::GPR, Reg));
        return;
      }
    } else if (IROp->Op == OP_ALLOCATEGPRAFTER) {
      uint32_t Available = Classes[FEXCore::ToUnderlying(RegClass::GPR)].Available;
      auto After = PhysicalRegister(IROp->Args[0]);
      if ((After.Reg & 1) == 0 && Available & (1ull << (After.Reg + 1))) {
        SetReg(CodeNode, PhysicalRegister(RegClass::GPR, After.Reg + 1));
        return;
      }
    }

    RegClass ClassType = GetRegClassFromNode(IR, IROp);
    RegisterClassData* Class = &Classes[FEXCore::ToUnderlying(ClassType)];

    // Spill to make room in the register file.
    if (!Class->Available) {
      IREmit->SetWriteCursorBefore(CodeNode);
      SpillReg(Class, Block, Pivot);
    }

    // Assign a free register in the appropriate class.
    LOGMAN_THROW_A_FMT(Class->Available != 0, "Post-condition of spilling");
    unsigned Reg = std::countr_zero(Class->Available);
    SetReg(CodeNode, PhysicalRegister(ClassType, Reg));
  };
};

void ConstrainedRAPass::AddRegisters(IR::RegClass Class, uint32_t RegisterCount) {
  LOGMAN_THROW_A_FMT(RegisterCount <= 31, "Up to 31 regs supported");

  Classes[FEXCore::ToUnderlying(Class)].Count = RegisterCount;
}

inline bool KillMove(IROp_Header* LastOp, IROp_Header* IROp, Ref LastNode, Ref CodeNode) {
  // 32-bit moves in x86_64 are represented as a Bfe, detect them.
  if (LastOp->Op == OP_BFE && LastOp->C<IR::IROp_Bfe>()->lsb == 0 && LastOp->C<IR::IROp_Bfe>()->Width == 32) {
    auto Op = IROp->Op;

    if (Op == OP_AND) {
      // Rewrite "mov wA, wB; and xA, xA, xC" into "and wA, wB, wC", since
      // ((b & 0xffffffff) & c) == (b & c) & 0xffffffff.
      IROp->Size = OpSize::i32Bit;
      return true;
    } else if (IROp->Size == OpSize::i32Bit) {
      return Op == OP_OR || Op == OP_XOR || Op == OP_AND || Op == OP_SUB || Op == OP_LSHL || Op == OP_LSHR || Op == OP_ASHR;
    }
  }

  return LastOp->Op == OP_STOREREGISTER;
}

inline bool IsSignext(const IROp_Header* IROp, OrderedNodeWrapper Src, OpSize Size) {
  if (IROp->Op == OP_SBFE) {
    auto Sbfe = IROp->C<IR::IROp_Sbfe>();
    return Sbfe->Width == 1 && Sbfe->lsb == (IR::OpSizeAsBits(Size) - 1) && Sbfe->Src == Src;
  } else {
    return false;
  }
}

inline bool IsZero(const IROp_Header* IROp) {
  return IROp->Op == OP_CONSTANT && IROp->C<IROp_Constant>()->Constant == 0;
}

bool ConstrainedRAPass::TryPostRAMerge(Ref LastNode, Ref CodeNode, IROp_Header* IROp) {
  auto LastOp = IR->GetOp<IROp_Header>(LastNode);

  if (IROp->Op == OP_PUSH && LastOp->Op == OP_PUSH) {
    auto SP = PhysicalRegister(CodeNode);
    auto Push = IR->GetOp<IROp_Push>(CodeNode);
    auto LastPush = IR->GetOp<IROp_Push>(LastNode);

    if (LastOp->Size == IROp->Size && LastPush->ValueSize == Push->ValueSize && SP == PhysicalRegister(LastNode) &&
        SP == PhysicalRegister(IROp->Args[1]) && SP == PhysicalRegister(LastOp->Args[1]) && SP != PhysicalRegister(IROp->Args[0]) &&
        SP != PhysicalRegister(LastOp->Args[0]) && Push->ValueSize >= OpSize::i32Bit) {

      IREmit->SetWriteCursorBefore(LastNode);
      IREmit->_PushTwo(IROp->Size, Push->ValueSize, IROp->Args[0], LastOp->Args[0], IROp->Args[1]);
      IREmit->RemovePostRA(CodeNode);
      return true;
    }
  } else if (IROp->Op == OP_POP) {
    auto SP = PhysicalRegister(IROp->Args[0]);

    if (LastOp->Op == OP_POP && LastOp->Size == IROp->Size && IROp->Size >= OpSize::i32Bit && SP == PhysicalRegister(LastOp->Args[0])) {
      IREmit->SetWriteCursorBefore(LastNode);
      IREmit->_PopTwo(IROp->Size, IROp->Args[0], LastOp->Args[1], IROp->Args[1]);
      IREmit->RemovePostRA(CodeNode);
      return true;
    }
  } else if ((IROp->Op == OP_DIV || IROp->Op == OP_UDIV) && IROp->Size >= OpSize::i32Bit) {
    // If Upper came from a sign/zero extension, we only need a 64-bit division.
    auto Op = IROp->CW<IR::IROp_Div>();
    if (!Op->Upper.IsInvalid() && PhysicalRegister(Op->Upper) == PhysicalRegister(LastNode)) {
      if (IROp->Op == OP_DIV ? IsSignext(LastOp, Op->Lower, IROp->Size) : IsZero(LastOp)) {
        Op->Upper.SetInvalid();
        return PhysicalRegister(LastNode) == PhysicalRegister(Op->OutRemainder);
      }
    }
  } else if (IROp->Op == OP_XGETBV && PhysicalRegister(IROp->Args[0]) == PhysicalRegister(LastNode) && LastOp->Op == OP_CONSTANT) {
    // Try to constant fold
    uint64_t ConstantFunction = LastOp->C<IROp_Constant>()->Constant;
    auto Op = IROp->CW<IR::IROp_XGetBV>();
    if (CPUID->DoesXCRFunctionReportConstantData(ConstantFunction)) {
      const auto Result = CPUID->RunXCRFunction(ConstantFunction);
      IREmit->SetWriteCursorBefore(CodeNode);
      IREmit->_Constant(Result.eax).Node->Reg = PhysicalRegister(Op->OutEAX).Raw;
      IREmit->_Constant(Result.edx).Node->Reg = PhysicalRegister(Op->OutEDX).Raw;
      IREmit->RemovePostRA(CodeNode);
      return false;
    }
  } else if (IROp->Op == OP_CPUID && PhysicalRegister(IROp->Args[0]) == PhysicalRegister(LastNode) && LastOp->Op == OP_CONSTANT) {
    // Try to constant fold. As a limitation of merging only 2 instructions, we
    // can only handle constant functions, not constant leafs. This could be
    // lifted if we generalized at a (significant) complexity cost.
    uint64_t ConstantFunction = LastOp->C<IROp_Constant>()->Constant;
    auto Op = IROp->CW<IR::IROp_CPUID>();

    const auto SupportsConstant = CPUID->DoesFunctionReportConstantData(ConstantFunction);
    if (SupportsConstant.SupportsConstantFunction == CPUIDEmu::SupportsConstant::CONSTANT &&
        SupportsConstant.NeedsLeaf != CPUIDEmu::NeedsLeafConstant::NEEDSLEAFCONSTANT) {
      const auto Result = CPUID->RunFunction(ConstantFunction, 0 /* leaf */);

      IREmit->SetWriteCursorBefore(CodeNode);
      IREmit->_Fence(IR::FenceType::Inst);
      IREmit->_Constant(Result.eax).Node->Reg = PhysicalRegister(Op->OutEAX).Raw;
      IREmit->_Constant(Result.ebx).Node->Reg = PhysicalRegister(Op->OutEBX).Raw;
      IREmit->_Constant(Result.ecx).Node->Reg = PhysicalRegister(Op->OutECX).Raw;
      IREmit->_Constant(Result.edx).Node->Reg = PhysicalRegister(Op->OutEDX).Raw;
      IREmit->RemovePostRA(CodeNode);
      return false;
    }
  }

  // Merge moves that are immediately consumed.
  //
  // x86 code inserts such moves to workaround x86's 2-address code. Because
  // arm64 is 3-address code, we can optimize these out.
  //
  // Note we rely on the short-circuiting here.
  if (PhysicalRegister(LastNode) == PhysicalRegister(CodeNode) && KillMove(LastOp, IROp, LastNode, CodeNode)) {
    LOGMAN_THROW_A_FMT(!PhysicalRegister(CodeNode).IsInvalid(), "invariant");

    int NumArgs = IR::GetRAArgs(IROp->Op);
    for (int s = 0; s < NumArgs; ++s) {
      if (IROp->Args[s].IsImmediate() && PhysicalRegister(IROp->Args[s]) == PhysicalRegister(LastNode)) {
        IROp->Args[s].SetImmediate(PhysicalRegister(LastOp->Args[0]).Raw);
      }
    }

    return true;
  }

  return false;
}

void ConstrainedRAPass::Run(IREmitter* IREmit_) {
  FEXCORE_PROFILE_SCOPED("PassManager::RA");

  IREmit = IREmit_;
  auto IR_ = IREmit->ViewIR();
  IR = &IR_;

  PreferredReg.resize(IR->GetSSACount(), PhysicalRegister::Invalid());
  SSAToReg.resize(IR->GetSSACount(), PhysicalRegister::Invalid());
  Seen.resize(IR->GetSSACount(), false);

  for (auto [BlockNode, BlockHeader] : IR->GetBlocks()) {
    // Spilling is local, so reset this per-block
    AnySpilled = false;

    // At the start of each block, all registers are available.
    for (auto& Class : Classes) {
      Class.Available = (1u << Class.Count) - 1;
    }

    auto BlockIROp = BlockHeader->CW<IR::IROp_CodeBlock>();

    // Backwards pass: analyze kill bits and SRA affinities
    {
      // Reverse iteration is not yet working with the iterators
      // We grab these nodes this way so we can iterate easily
      auto CodeBegin = IR->at(BlockIROp->Begin);
      auto CodeLast = IR->at(BlockIROp->Last);

      while (1) {
        auto [CodeNode, IROp] = CodeLast();
        // End of iteration gunk

        // Record preferred registers for SRA. We also record the Node accessing
        // each register, used below. Since we initialized Class->Available,
        // RegToSSA is otherwise undefined so we can stash our temps there.
        if (auto Node = DecodeSRANode(IROp, CodeNode); Node != nullptr) {
          auto Reg = DecodeSRAReg(IROp, CodeNode);

          PreferredReg[IR->GetID(Node).Value] = Reg;
          GetClass(Reg)->RegToSSA[Reg.Reg] = CodeNode;
        }

        // Coalescing an SRA store is equivalent to hoisting the store,
        // implying write-after-write and read-after-write hazards. We can only
        // coalesce if there is no intervening load/store.
        //
        // Since we're walking backwards, RegToSSA tracks
        // the first load/store after CodeNode. That first instruction is the
        // store in question iff there is no intervening load/store.
        //
        // Reset PreferredReg if that is not the case, ensuring SRA correctness.
        if (auto Reg = PreferredReg[IR->GetID(CodeNode).Value]; !Reg.IsInvalid()) {
          auto Node = GetClass(Reg)->RegToSSA[Reg.Reg];
          IROp_Header* Header = IR->GetOp<IROp_Header>(Node);

          if (CodeNode != DecodeSRANode(Header, Node)) {
            PreferredReg[IR->GetID(CodeNode).Value] = PhysicalRegister::Invalid();
          }
        }

        const int NumArgs = IR::GetRAArgs(IROp->Op);
        for (int i = NumArgs - 1; i >= 0; --i) {
          const auto& Arg = IROp->Args[i];
          if (!Arg.IsInvalid()) {
            const uint32_t Index = Arg.ID().Value;
            if (!Seen[Index]) {
              Seen[Index] = true;
              IROp->Args[i].SetKill();
            }
          }
        }

        // Rest is iteration gunk
        if (CodeLast == CodeBegin) {
          break;
        }
        --CodeLast;
      }
    }

    // NextUses currently contains first use distances, the exact initialization
    // assumed by the forward pass. Do not reset it.

    // Last nontrivial instruction, for merging as we go.
    Ref LastNode = nullptr;

    // Forward pass: Assign registers, spilling & optimizing as we go.
    for (auto [CodeNode, IROp] : IR->GetCode(BlockNode)) {
      bool AnySpilledBeforeThisInstruction = AnySpilled;

      // These do not read or write registers, and must be skipped for merging.
      // Since we'd be doing this check anyway for merging, do the check now so
      // we can skip the rest of the logic too.
      if (IROp->Op == OP_GUESTOPCODE || IROp->Op == OP_INLINECONSTANT) {
        continue;
      }

      // Static registers must be consistent at SRA load/store. Evict to ensure.
      if (auto Node = DecodeSRANode(IROp, CodeNode); Node != nullptr) {
        auto Reg = DecodeSRAReg(IROp, CodeNode);
        RegisterClassData* Class = &Classes[Reg.Class];

        if (!(Class->Available & (1u << Reg.Reg))) {
          Ref Old = Class->RegToSSA[Reg.Reg];

          if (Old != Node) {
            // Before inserting instructions, we need to set the cursor and
            // reset LastNode so we don't merge across an inserted copy.
            // Otherwise, we would erroneously miss the copy when determining if
            // we can merge, and end up unsoundly merging a mov+xchg sequence.
            IREmit->SetWriteCursorBefore(CodeNode);
            LastNode = nullptr;

            Ref Copy;

            if (Reg.AsRegClass() == RegClass::FPRFixed) {
              IROp_Header* Header = IR->GetOp<IROp_Header>(Old);
              Copy = IREmit->_VMov(Header->Size, OrderedNodeWrapper::FromImmediate(Reg.Raw));
            } else {
              Copy = IREmit->_Copy(OrderedNodeWrapper::FromImmediate(Reg.Raw));
            }

            FreeReg(Reg);
            AssignReg(IR->GetOp<IROp_Header>(Copy), BlockIROp, Copy, IROp);
            RemapReg(Old, PhysicalRegister(Copy));
          }
        }
      }

      // Fill all sources that are not already in the register file.
      //
      // This happens before freeing killed sources, since we need all sources in
      // the register file simultaneously.
      //
      // Also update next-use info, again only relevant if we've spilled.
      int NumArgs = IR::GetRAArgs(IROp->Op);

      if (AnySpilledBeforeThisInstruction) {
        for (int s = 0; s < NumArgs; ++s) {
          auto V = IROp->Args[s];
          V.ClearKill();

          if (!IsValidArg(V)) {
            continue;
          }

          Ref Old = IR->GetNode(V);

          SourceIndex--;
          LOGMAN_THROW_A_FMT(SourceIndex >= 0, "Consistent source count");
          NextUses[V.ID().Value] = SourcesNextUses[SourceIndex];

          if (!IsInRegisterFile(Old)) {
            IREmit->SetWriteCursorBefore(CodeNode);
            LastNode = nullptr;

            Ref Fill = InsertFill(Old);

            AssignReg(IR->GetOp<IROp_Header>(Fill), BlockIROp, Fill, IROp);
            RemapReg(Old, PhysicalRegister(Fill));
          }
        }
      }

      for (int s = 0; s < NumArgs; ++s) {
        if (IROp->Args[s].IsInvalid()) {
          continue;
        }

        bool Kill = IROp->Args[s].HasKill();
        IROp->Args[s].ClearKill();
        Ref Node = IR->GetNode(IROp->Args[s]);
        auto ID = IR->GetID(Node).Value;
        auto Reg = SSAToReg[ID];

        if (!Reg.IsInvalid()) {
          if (Kill) {
            LOGMAN_THROW_A_FMT(IsInRegisterFile(Node), "sources in file");
            FreeReg(Reg);
          }

          IROp->Args[s].SetImmediate(Reg.Raw);
        }
      }

      // Assign destinations.
      if (GetHasDest(IROp->Op) && PhysicalRegister(CodeNode).IsInvalid()) {
        AssignReg(IROp, BlockIROp, CodeNode, IROp);
      }

      if (IsTrivial(CodeNode, IROp)) {
        // Delete instructions that only exist for RA
        IREmit->RemovePostRA(CodeNode);
      } else if (LastNode && TryPostRAMerge(LastNode, CodeNode, IROp)) {
        // Merge adjacent instructions
        IREmit->RemovePostRA(LastNode);
        LastNode = nullptr;
      } else {
        LastNode = CodeNode;
      }
    }

    if (AnySpilled) {
      LOGMAN_THROW_A_FMT(SourceIndex == 0, "Consistent source count in block");
    }
  }

  PreferredReg.clear();
  SSAToReg.clear();
  SpillSlots.clear();
  NextUses.clear();
  Seen.clear();

  IR->GetHeader()->PostRA = true;
}

fextl::unique_ptr<IR::RegisterAllocationPass> CreateRegisterAllocationPass(const FEXCore::CPUIDEmu* CPUID) {
  return fextl::make_unique<ConstrainedRAPass>(CPUID);
}
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: ir|opts
$end_info$
*/

#pragma once
#include "Interface/IR/PassManager.h"

#include <memory>
#include <stdint.h>

namespace FEXCore::IR {
enum class RegClass : uint32_t;

class RegisterAllocationPass : public FEXCore::IR::Pass {
public:
  virtual void AddRegisters(RegClass Class, uint32_t RegisterCount) = 0;

  // Number of GPRs usable for pairs at start of GPR set. Must be even.
  uint32_t PairRegs {};
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/Passes/x87StackOptimizationPass.cpp
================================================
// SPDX-License-Identifier: MIT
#include "FEXCore/Utils/LogManager.h"
#include "Interface/Core/Interpreter/Fallbacks/FallbackOpHandler.h"
#include "Interface/IR/IR.h"
#include "Interface/IR/IREmitter.h"
#include "Interface/IR/PassManager.h"
#include "FEXCore/IR/IR.h"
#include "FEXCore/Utils/Profiler.h"
#include "FEXCore/Core/HostFeatures.h"
#include "Interface/Core/Addressing.h"

#include <array>
#include <cstddef>
#include <cstdint>
#include <stdint.h>

// This file adds a pass to process X87 stack instructions.
// These instructions are marked in IR.json with `X87: true` and are generated
// by X87 guest instructions.
// The way is works is that there's a virtual stack `StackData`, where we load and store
// and apply the operations in a block of code. Once the block finishes, we emit the necessary operations
// that we recorded onto the virtual stack. This allows us to save a lot of code movement
// to and from stack registers, top management and valid flags. It also allows us to
// perform memcpy optimizations like the one performed in STORESTACKMEM.
//
// By default we run on the fast path - i.e. we assume all values are in the stack and we have a complete
// stack overview. However, if we encounter a value that's not in the virtual stack - maybe it was added
// to the stack in a previous block, we move onto the slow path which loads and stores values to the stack
// registers.
// Once in a slow path, we won't return to the fast pass until the beginning of the following block.

namespace FEXCore::IR {

// FIXME(pmatos): copy from OpcodeDispatcher.h
inline uint32_t MMBaseOffset() {
  return static_cast<uint32_t>(offsetof(Core::CPUState, mm[0][0]));
}

// Similar helper to the one in OpcodeDispatcher.h except we do not
// need to handle flags, etc.
template<typename T>
void DeriveOp(Ref& RefV, IROps NewOp, IREmitter::IRPair<T> Expr) {
  Expr.first->Header.Op = NewOp;
  RefV = Expr;
}

enum class StackSlot { UNUSED, INVALID, VALID };
// FixedSizeStack is a model of the x87 Stack where each element in this
// fixed size stack lives at an offset from top. The top of the stack is at
// index 0.
template<typename T>
class FixedSizeStack {
public:
  struct StackSlotEntry final {
    StackSlot Type;
    T Value;
  };

  static constexpr uint8_t size = 8;

  // Real top as an offset from stored top value (or the one at the beginning of the block)
  // For example, if we start and push a value to our simulated stack, because we don't
  // update top straight away the TopOffset is 1.
  // If SlowPath is true, then TopOffset is always zero.
  int8_t TopOffset = 0;

  FixedSizeStack()
    : buffer(FixedSizeStack::size, {StackSlot::UNUSED, T::Invalid}) {}

  void push(const T& Value) {
    rotate();
    buffer.front() = {StackSlot::VALID, Value};
  }

  // Rotate the elements with the direction controlled by Right
  void rotate(bool Right = true) {
    if (Right) {
      std::rotate(buffer.begin(), buffer.end() - 1, buffer.end());
      TopOffset++;
    } else {
      std::rotate(buffer.begin(), buffer.begin() + 1, buffer.end());
      TopOffset--;
    }
  }

  void pop() {
    buffer.front() = {StackSlot::INVALID, T::Invalid};
    rotate(false);
  }

  const StackSlotEntry& top(size_t Offset = 0) const {
    return buffer[Offset];
  }

  void setTop(T Value, size_t Offset = 0) {
    buffer[Offset] = {StackSlot::VALID, Value};
  }

  bool isValid(size_t Offset) const {
    return buffer[Offset].first;
  }

  void clear() {
    for (auto& Elem : buffer) {
      Elem = {StackSlot::UNUSED, T::Invalid};
    }
    TopOffset = 0;
  }

  void dump() const {
    LogMan::Msg::DFmt("-- Stack");

    for (size_t i = 0; i < 8; i++) {
      const auto& [Valid, Element] = buffer[i];
      if (Valid == StackSlot::VALID) {
        LogMan::Msg::DFmt("| ST{}: 0x{:x}", i, (uintptr_t)(Element.StackDataNode));
      } else if (Valid == StackSlot::INVALID) {
        LogMan::Msg::DFmt("| ST{}: INVALID", i);
      }
    }
    LogMan::Msg::DFmt("--");
  }

  void setTagInvalid(size_t Index) {
    buffer[Index].Type = StackSlot::INVALID;
  }

  // Returns a mask to set in AbridgedTagWord
  uint8_t getValidMask() {
    uint8_t Mask = 0;
    for (size_t i = 0; i < buffer.size(); i++) {
      if (buffer[i].Type == StackSlot::VALID) {
        Mask |= 1U << i;
      }
    }
    return Mask;
  }

  // Returns a mask to set in AbridgedTagWord
  uint8_t getInvalidMask() {
    uint8_t Mask = 0;
    for (size_t i = 0; i < buffer.size(); i++) {
      if (buffer[i].Type == StackSlot::INVALID) {
        Mask |= 1U << i;
      }
    }
    return Mask;
  }

private:
  fextl::vector<StackSlotEntry> buffer;
};

class X87StackOptimization final : public Pass {
public:
  X87StackOptimization(const FEXCore::HostFeatures& Features, OpSize GPROpSize)
    : Features(Features)
    , GPROpSize(GPROpSize) {
    FEX_CONFIG_OPT(ReducedPrecision, X87REDUCEDPRECISION);
    ReducedPrecisionMode = ReducedPrecision;
  }
  void Run(IREmitter* Emit) override;

private:
  const FEXCore::HostFeatures& Features;
  const OpSize GPROpSize;
  bool ReducedPrecisionMode;
  FEX_CONFIG_OPT(DisableVixlIndirectCalls, DISABLE_VIXL_INDIRECT_RUNTIME_CALLS);

  // Helpers
  Ref RotateRight8(uint32_t V, Ref Amount);

  void F80SplitStore_Helper(const IROp_StoreStackMem* Op, Ref StackNode, Ref AddrNode, Ref Offset, OpSize Align, MemOffsetType OffsetType,
                            uint8_t OffsetScale) {
    IREmit->_StoreMemFPR(OpSize::i64Bit, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
    auto Upper = IREmit->_VExtractToGPR(OpSize::i128Bit, OpSize::i64Bit, StackNode, 1);

    // Store the Upper part of the register (the remaining 2 bytes) into memory.
    AddressMode A {.Base = AddrNode,
                   .Index = Op->Offset.IsInvalid() ? nullptr : Offset,
                   .Offset = 8,
                   .IndexType = MemOffsetType::SXTX,
                   .IndexScale = OffsetScale,
                   .AddrSize = OpSize::i64Bit};
    A = SelectAddressMode(IREmit, A, GPROpSize, Features.SupportsTSOImm9, false, false, OpSize::i16Bit);
    IREmit->_StoreMemGPR(OpSize::i16Bit, Upper, A.Base, A.Index, OpSize::i64Bit, MemOffsetType::SXTX, A.IndexScale);
  }

  void Store80BitToMem(const IROp_StoreStackMem* Op, Ref StackNode, Ref AddrNode, Ref Offset, OpSize Align, MemOffsetType OffsetType,
                       uint8_t OffsetScale) {
    if (Features.SupportsSVE128 || Features.SupportsSVE256) {
      AddressMode A {.Base = AddrNode,
                     .Index = Op->Offset.IsInvalid() ? nullptr : Offset,
                     .IndexType = MemOffsetType::SXTX,
                     .IndexScale = OffsetScale,
                     .AddrSize = OpSize::i64Bit};
      AddrNode = LoadEffectiveAddress(IREmit, A, GPROpSize, false);
      IREmit->_StoreMemX87SVEOptPredicate(OpSize::i128Bit, OpSize::i16Bit, StackNode, AddrNode);
    } else {
      F80SplitStore_Helper(Op, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
    }
  }

  void StoreStackMem_Helper(const IROp_StoreStackMem* Op, Ref StackNode) {
    LOGMAN_THROW_A_FMT(!ReducedPrecisionMode, "Full precision mode expected.");

    Ref AddrNode = IR->GetNode(Op->Addr);
    Ref Offset = IR->GetNode(Op->Offset);
    OpSize Align = Op->Align;
    MemOffsetType OffsetType = Op->OffsetType;
    uint8_t OffsetScale = Op->OffsetScale;

    // Normal Precision Mode
    switch (Op->StoreSize) {
    case OpSize::i32Bit:
    case OpSize::i64Bit: {
      StackNode = IREmit->_F80CVT(Op->StoreSize, StackNode);
      IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
      break;
    }

    case OpSize::f80Bit: {
      Store80BitToMem(Op, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
      break;
    }
    default: ERROR_AND_DIE_FMT("Unsupported x87 size");
    }
  }

  // Performs a store to memory from a value the stack passed in as StackNode.
  // This is the version dealing with the reduced precision case.
  void StoreStackMem_Reduced_Helper(const IROp_StoreStackMem* Op, Ref StackNode) {
    LOGMAN_THROW_A_FMT(ReducedPrecisionMode, "Reduced precision mode expected.");

    Ref AddrNode = IR->GetNode(Op->Addr);
    Ref Offset = IR->GetNode(Op->Offset);
    OpSize Align = Op->Align;
    MemOffsetType OffsetType = Op->OffsetType;
    uint8_t OffsetScale = Op->OffsetScale;

    switch (Op->StoreSize) {
    case OpSize::i32Bit: {
      StackNode = IREmit->_Float_FToF(OpSize::i32Bit, OpSize::i64Bit, StackNode);
      [[fallthrough]];
    }
    case OpSize::i64Bit: {
      IREmit->_StoreMemFPR(Op->StoreSize, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
      break;
    }

    case OpSize::f80Bit: {
      StackNode = IREmit->_F80CVTTo(StackNode, OpSize::i64Bit);
      Store80BitToMem(Op, StackNode, AddrNode, Offset, Align, OffsetType, OffsetScale);
      break;
    }
    default: ERROR_AND_DIE_FMT("Unsupported x87 size");
    }
  }

  // Handles a Unary operation.
  // Takes the op we are handling, the Node for the reduced precision case and the node for the normal case.
  // Depending on the type of Op64, we might need to pass a couple of extra constant arguments, this happens
  // when VFOp64 is true.
  void HandleUnop(IROps Op64, bool VFOp64, IROps Op80);
  void HandleBinopValue(IROps Op64, bool VFOp64, IROps Op80, uint8_t DestStackOffset, bool MarkDestValid, uint8_t StackOffset,
                        Ref ValueNode, bool Reverse = false);
  void HandleBinopStack(IROps Op64, bool VFOp64, IROps Op80, uint8_t DestStackOffset, uint8_t StackOffset1, uint8_t StackOffset2,
                        bool Reverse = false);

  // Top Management Helpers
  /// Set the valid tag for Value as valid (if Valid is true), or invalid (if Valid is false).
  void SetX87ValidTag(uint8_t Offset, bool Valid);
  // Generates slow code to load/store a value from an offset from the top of the stack
  Ref LoadStackValueAtOffset_Slow(uint8_t Offset = 0);
  void StoreStackValueAtOffset_Slow(Ref Value, uint8_t Offset = 0, bool SetValid = true);
  // Update Top value in slow path for a pop
  void UpdateTopForPop_Slow();
  void UpdateTopForPush_Slow();
  // Synchronizes the current simulated stack with the actual values.
  // Returns a new value for Top, that's synchronized between the simulated stack
  // and the actual FPU stack.
  Ref SynchronizeStackValues();
  // Moves us from the fast to the slow path if ShouldMigrate is true.
  void MigrateToSlowPathIf(bool ShouldMigrate);
  // Top Cache Management
  Ref GetTopWithCache_Slow();
  Ref GetOffsetTopWithCache_Slow(uint8_t Offset, bool Reverse = false);
  Ref GetOffsetTopAddressWithCache_Slow(uint8_t Offset);
  void SetTopWithCache_Slow(Ref Value);
  Ref GetX87ValidTag_Slow(uint8_t Offset);
  // Resets fields to initial values
  void Reset();

  struct StackMemberInfo {
    StackMemberInfo() = delete;
    StackMemberInfo(Ref Data)
      : StackDataNode(Data) {}
    StackMemberInfo(Ref Data, Ref Source, OpSize Size)
      : StackDataNode(Data)
      , Source({Size, Source}) {}
    Ref StackDataNode {}; // Reference to the data in the Stack.
                          // This is the source data node in the stack format, possibly converted to 64/80 bits.
    struct StackMemberData final {
      OpSize Size;
      Ref Node;
    };

    static const StackMemberInfo Invalid;

    // Tuple is only valid if we have information about the Source of the Stack Data Node.
    // In it's valid then OpSize is the original source size and Ref is the original source node.
    std::optional<StackMemberData> Source {};
  };

  // StackData, TopCache need to be always properly set to ensure
  // they reflect the current state of the FPU. This sync only makes sense while
  // taking the fast path. Once in the slow path, these don't make sense anymore
  // and we are syncing everything.

  // Index on vector is offset to top value at start of block
  // If slow path is true, then StackData is always empty.
  FixedSizeStack<StackMemberInfo> StackData;

  void InvalidateCaches();
  void InvalidateCachedRegs();

  // Path Migration helper management
  std::optional<StackMemberInfo> MigrateToSlowPath_IfInvalid(uint8_t Offset = 0);
  Ref LoadStackValue(uint8_t Offset = 0);
  void StoreStackValue(Ref Value, uint8_t Offset = 0, bool SetValid = false);
  void StackPop();

  // Cache for Constants
  // ConstantPoll[i] has IREmit->_Constant(i);
  std::array<Ref, 8> ConstantPool {};
  Ref GetConstant(ssize_t Offset);

  // Cached value for Top
  // If slowpath is false, then TopCache is nullptr.
  bool FlushTopPending = false;
  std::array<bool, 8> FlushValuesPending {};
  bool FlushValidPending = false;
  void FlushCachedRegs();

  Ref GetFTW();

  Ref FTWCached {};
  std::array<Ref, 8> TopOffsetCache {};
  std::array<Ref, 8> TopOffsetAddressCache {};
  std::array<Ref, 8> TopValueCache {};
  std::array<StackSlot, 8> TopValidCache {};

  // Are we on the slow path?
  // Once we enter the slow path, we never come out.
  // This just simplifies the code atm. If there's a need to return to the fast path in the future
  // we can implement that but I would expect that there would be very few cases where that's necessary.
  // On the slow path TopCache is always the last obtained version of top.
  // TopOffset is ignored
  bool SlowPath = false;
  // Keeping IREmitter not to pass arguments around
  IREmitter* IREmit = nullptr;
  IRListView* IR = nullptr;
};

inline const X87StackOptimization::StackMemberInfo X87StackOptimization::StackMemberInfo::Invalid {nullptr};

inline void X87StackOptimization::InvalidateCaches() {
  InvalidateCachedRegs();
  ConstantPool.fill(nullptr);
}

inline void X87StackOptimization::InvalidateCachedRegs() {
  FlushCachedRegs();
  FTWCached = {};
  TopOffsetCache.fill(nullptr);
  TopOffsetAddressCache.fill(nullptr);
  TopValueCache.fill(nullptr);
  TopValidCache.fill(StackSlot::UNUSED);
}

inline void X87StackOptimization::Reset() {
  SlowPath = false;
  StackData.clear();
  InvalidateCaches();
}

inline Ref X87StackOptimization::GetConstant(ssize_t Offset) {
  if (Offset < 0 || Offset >= X87StackOptimization::ConstantPool.size()) {
    // not dealt by pool
    return IREmit->_Constant(Offset);
  }
  if (ConstantPool[Offset] == nullptr) {

    ConstantPool[Offset] = IREmit->_Constant(Offset);
  }
  return ConstantPool[Offset];
}

inline void X87StackOptimization::MigrateToSlowPathIf(bool ShouldMigrate) {
  if (ShouldMigrate && !SlowPath) {
    SynchronizeStackValues();
    StackData.clear();
    SlowPath = true;
  }
}

inline Ref X87StackOptimization::GetTopWithCache_Slow() {
  if (!TopOffsetCache[0]) {
    TopOffsetCache[0] = IREmit->_LoadContextGPR(OpSize::i8Bit, offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC);
  }
  return TopOffsetCache[0];
}

inline Ref X87StackOptimization::GetOffsetTopWithCache_Slow(uint8_t Offset, bool Reverse) {
  if (Reverse) {
    Offset = 8 - Offset;
  }

  Offset &= 7;

  if (TopOffsetCache[Offset]) {
    return TopOffsetCache[Offset];
  }

  auto* OffsetTop = GetTopWithCache_Slow();
  if (Offset != 0) {
    OffsetTop = IREmit->_And(OpSize::i32Bit, IREmit->Add(OpSize::i32Bit, OffsetTop, Offset), GetConstant(7));
    // GetTopWithCache_Slow already sets the cache so we don't need to set it here for offset == 0
    TopOffsetCache[Offset] = OffsetTop;
  }

  return OffsetTop;
}

inline Ref X87StackOptimization::GetOffsetTopAddressWithCache_Slow(uint8_t Offset) {
  if (TopOffsetAddressCache[Offset]) {
    return TopOffsetAddressCache[Offset];
  }

  Ref OffsetRef = GetOffsetTopWithCache_Slow(Offset);
  TopOffsetAddressCache[Offset] = IREmit->_FormContextAddress(OpSize::i64Bit, OffsetRef, 16);

  return TopOffsetAddressCache[Offset];
}

inline void X87StackOptimization::SetTopWithCache_Slow(Ref Value) {
  InvalidateCachedRegs();
  TopOffsetCache[0] = Value;
  FlushTopPending = true;
}

inline Ref X87StackOptimization::GetFTW() {
  if (!FTWCached) {
    FTWCached = IREmit->_LoadContextGPR(OpSize::i8Bit, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
  }
  return FTWCached;
}

inline void X87StackOptimization::SetX87ValidTag(uint8_t Offset, bool Valid) {
  TopValidCache[Offset] = Valid ? StackSlot::VALID : StackSlot::INVALID;
  FlushValidPending = true;
}

inline Ref X87StackOptimization::GetX87ValidTag_Slow(uint8_t Offset) {
  switch (TopValidCache[Offset]) {
  case StackSlot::UNUSED:
    return IREmit->_And(OpSize::i32Bit, IREmit->_Lshr(OpSize::i32Bit, GetFTW(), GetOffsetTopWithCache_Slow(Offset)), GetConstant(1));
  case StackSlot::INVALID: return GetConstant(0);
  case StackSlot::VALID: return GetConstant(1);
  }
}

inline Ref X87StackOptimization::LoadStackValueAtOffset_Slow(uint8_t Offset) {
  OrderedNode* TopOffsetAddress = GetOffsetTopAddressWithCache_Slow(Offset);
  auto Size = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  if (!TopValueCache[Offset]) {
    TopValueCache[Offset] = IREmit->_LoadMemFPR(Size, TopOffsetAddress, IREmit->_InlineConstant(MMBaseOffset()), Size, MemOffsetType::SXTX, 1);
  }
  return TopValueCache[Offset];
}

inline void X87StackOptimization::StoreStackValueAtOffset_Slow(Ref Value, uint8_t Offset, bool SetValid) {
  TopValueCache[Offset] = Value;
  FlushValuesPending[Offset] = true;
  // mark it valid
  // In some cases we might already know it has been previously set as valid so we don't need to do it again
  if (SetValid) {
    SetX87ValidTag(Offset, true);
  }
}

inline Ref X87StackOptimization::RotateRight8(uint32_t V, Ref Amount) {
  return IREmit->_Lshr(OpSize::i32Bit, GetConstant(V | (V << 8)), Amount);
}

inline std::optional<X87StackOptimization::StackMemberInfo> X87StackOptimization::MigrateToSlowPath_IfInvalid(uint8_t Offset) {
  const auto& [Valid, StackMember] = StackData.top(Offset);
  MigrateToSlowPathIf(Valid != StackSlot::VALID);
  if (Valid == StackSlot::VALID) {
    return StackMember;
  }
  return {};
}

inline Ref X87StackOptimization::LoadStackValue(uint8_t Offset) {
  const auto& StackValue = MigrateToSlowPath_IfInvalid(Offset);
  return SlowPath ? LoadStackValueAtOffset_Slow(Offset) : StackValue->StackDataNode;
}

inline void X87StackOptimization::StoreStackValue(Ref Value, uint8_t Offset, bool SetValid) {
  if (SlowPath) {
    StoreStackValueAtOffset_Slow(Value, Offset, SetValid);
  } else {
    StackData.setTop(StackMemberInfo {Value}, Offset);
  }
}

inline void X87StackOptimization::StackPop() {
  if (SlowPath) {
    UpdateTopForPop_Slow();
  } else {
    StackData.pop();
  }
}


void X87StackOptimization::HandleUnop(IROps Op64, bool VFOp64, IROps Op80) {
  Ref St0 = LoadStackValue();
  Ref Value {};

  if (ReducedPrecisionMode) {
    if (VFOp64) {
      DeriveOp(Value, Op64, IREmit->_VFSqrt(OpSize::i64Bit, OpSize::i64Bit, St0));
    } else {
      DeriveOp(Value, Op64, IREmit->_F64SIN(St0));
    }
  } else {
    DeriveOp(Value, Op80, IREmit->_F80SQRT(St0));
  }

  StoreStackValue(Value);
}


void X87StackOptimization::HandleBinopValue(IROps Op64, bool VFOp64, IROps Op80, uint8_t DestStackOffset, bool MarkDestValid,
                                            uint8_t StackOffset, Ref ValueNode, bool Reverse) {
  LOGMAN_THROW_A_FMT(!Reverse || VFOp64, "There are no reverse operations using non VFOp64 ops");
  auto StackNode = LoadStackValue(StackOffset);

  Ref Node = {};
  if (ReducedPrecisionMode) {
    if (Reverse) {
      DeriveOp(Node, Op64, IREmit->_VFAdd(OpSize::i64Bit, OpSize::i64Bit, ValueNode, StackNode));
    } else {
      if (VFOp64) {
        DeriveOp(Node, Op64, IREmit->_VFAdd(OpSize::i64Bit, OpSize::i64Bit, StackNode, ValueNode));
      } else {
        DeriveOp(Node, Op64, IREmit->_F64FPREM(StackNode, ValueNode));
      }
    }
  } else {
    if (Reverse) {
      DeriveOp(Node, Op80, IREmit->_F80Add(ValueNode, StackNode));
    } else {
      DeriveOp(Node, Op80, IREmit->_F80Add(StackNode, ValueNode));
    }
  }

  StoreStackValue(Node, DestStackOffset, MarkDestValid && StackOffset != DestStackOffset);
}

void X87StackOptimization::HandleBinopStack(IROps Op64, bool VFOp64, IROps Op80, uint8_t DestStackOffset, uint8_t StackOffset1,
                                            uint8_t StackOffset2, bool Reverse) {
  auto StackNode = LoadStackValue(StackOffset2);
  HandleBinopValue(Op64, VFOp64, Op80, DestStackOffset, StackOffset2 != DestStackOffset, StackOffset1, StackNode, Reverse);
}

inline void X87StackOptimization::UpdateTopForPop_Slow() {
  // Pop the top of the x87 stack
  GetOffsetTopWithCache_Slow(1);
  std::rotate(TopOffsetCache.begin(), std::next(TopOffsetCache.begin()), TopOffsetCache.end());
  std::rotate(TopOffsetAddressCache.begin(), std::next(TopOffsetAddressCache.begin()), TopOffsetAddressCache.end());
  std::rotate(TopValueCache.begin(), std::next(TopValueCache.begin()), TopValueCache.end());
  std::rotate(FlushValuesPending.begin(), std::next(FlushValuesPending.begin()), FlushValuesPending.end());
  std::rotate(TopValidCache.begin(), std::next(TopValidCache.begin()), TopValidCache.end());
  FlushTopPending = true;
}

inline void X87StackOptimization::UpdateTopForPush_Slow() {
  // Pop the top of the x87 stack
  GetOffsetTopWithCache_Slow(1, true);
  std::rotate(TopOffsetCache.begin(), std::prev(TopOffsetCache.end()), TopOffsetCache.end());
  std::rotate(TopOffsetAddressCache.begin(), std::prev(TopOffsetAddressCache.end()), TopOffsetAddressCache.end());
  std::rotate(TopValueCache.begin(), std::prev(TopValueCache.end()), TopValueCache.end());
  std::rotate(FlushValuesPending.begin(), std::prev(FlushValuesPending.end()), FlushValuesPending.end());
  std::rotate(TopValidCache.begin(), std::prev(TopValidCache.end()), TopValidCache.end());
  FlushTopPending = true;
}

void X87StackOptimization::FlushCachedRegs() {
  if (FlushTopPending) {
    IREmit->_StoreContextGPR(OpSize::i8Bit, TopOffsetCache[0], offsetof(FEXCore::Core::CPUState, flags) + FEXCore::X86State::X87FLAG_TOP_LOC);
    FlushTopPending = false;
  }

  auto Size = ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit;
  for (size_t i = 0; i < FlushValuesPending.size(); i++) {
    if (FlushValuesPending[i]) {
      OrderedNode* TopOffsetAddress = GetOffsetTopAddressWithCache_Slow(i);
      IREmit->_StoreMemFPR(Size, TopValueCache[i], TopOffsetAddress, IREmit->_InlineConstant(MMBaseOffset()), Size, MemOffsetType::SXTX, 1);
      // store
      FlushValuesPending[i] = false;
    }
  }

  if (FlushValidPending) {
    uint8_t ValidMask = 0;
    uint8_t InvalidMask = 0;
    for (auto It = TopValidCache.rbegin(); It != TopValidCache.rend(); It++) {
      ValidMask <<= 1;
      InvalidMask <<= 1;
      if (*It == StackSlot::VALID) {
        ValidMask |= 1;
      } else if (*It == StackSlot::INVALID) {
        InvalidMask |= 1;
      }
    }

    if (ValidMask || InvalidMask) {
      Ref NewFTW = [&]() {
        if (ValidMask == 0xff || InvalidMask == 0xff) {
          // If InvalidMask == 0xff then ValidMask = 0
          return GetConstant(ValidMask);
        } else {
          Ref NewFTW = GetFTW();
          Ref RotAmount {};
          if (std::popcount(ValidMask) == 1) {
            uint8_t BitIdx = std::countr_zero(ValidMask);
            Ref RegMask = IREmit->_Lshl(OpSize::i32Bit, GetConstant(1), GetOffsetTopWithCache_Slow(BitIdx));
            NewFTW = IREmit->_Or(OpSize::i32Bit, NewFTW, RegMask);
          } else if (ValidMask) {
            RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), GetTopWithCache_Slow());
            // perform a rotate right on mask by top
            NewFTW = IREmit->_Or(OpSize::i32Bit, NewFTW, RotateRight8(ValidMask, RotAmount));
          }

          if (std::popcount(InvalidMask) == 1) {
            uint8_t BitIdx = std::countr_zero(InvalidMask);
            Ref RegMask = IREmit->_Lshl(OpSize::i32Bit, GetConstant(1), GetOffsetTopWithCache_Slow(BitIdx));
            NewFTW = IREmit->_Andn(OpSize::i32Bit, NewFTW, RegMask);
          } else if (InvalidMask) {
            if (!RotAmount) {
              RotAmount = IREmit->_Sub(OpSize::i32Bit, GetConstant(8), GetTopWithCache_Slow());
            }
            NewFTW = IREmit->_Andn(OpSize::i32Bit, NewFTW, RotateRight8(InvalidMask, RotAmount));
          }
          return NewFTW;
        }
      }();

      IREmit->_StoreContextGPR(OpSize::i8Bit, NewFTW, offsetof(FEXCore::Core::CPUState, AbridgedFTW));
      FTWCached = NewFTW;
    }

    FlushValidPending = false;
  }
}

// We synchronize stack values in a few occasions but one of the most important of those,
// is when we move from fast to a slow path and need to make sure that the context is properly
// written.
Ref X87StackOptimization::SynchronizeStackValues() {
  if (SlowPath) {
    return GetTopWithCache_Slow();
  }

  // Store new top which is now the original top minus recorded top offset
  // Careful with underflow wraparound.
  const auto TopOffset = StackData.TopOffset;

  if (TopOffset != 0) {
    Ref NewTop = GetOffsetTopWithCache_Slow(TopOffset, true);
    SetTopWithCache_Slow(NewTop);
  }
  StackData.TopOffset = 0;

  // Before leaving we need to write the current values in the stack to
  // context so that the values are correct. Copy SourceDataNode in the
  // stack to the respective mmX register.
  Ref TopValue = GetTopWithCache_Slow();
  for (size_t i = 0; i < StackData.size; ++i) {
    const auto& [Valid, StackMember] = StackData.top(i);

    if (Valid == StackSlot::VALID) {
      StoreStackValueAtOffset_Slow(StackMember.StackDataNode, i, false);
    }
  }
  { // Set valid tags
    uint8_t ValidMask = StackData.getValidMask();
    uint8_t InvalidMask = StackData.getInvalidMask();
    for (auto& Elem : TopValidCache) {
      Elem = (ValidMask & 1) ? StackSlot::VALID : ((InvalidMask & 1) ? StackSlot::INVALID : StackSlot::UNUSED);

      ValidMask >>= 1;
      InvalidMask >>= 1;
    }
    FlushValidPending = true;
  }

  return TopValue;
}

void X87StackOptimization::Run(IREmitter* Emit) {
  FEXCORE_PROFILE_SCOPED("PassManager::x87StackOpt");

  auto CurrentIR = Emit->ViewIR();
  auto* HeaderOp = CurrentIR.GetHeader();
  LOGMAN_THROW_A_FMT(HeaderOp->Header.Op == OP_IRHEADER, "First op wasn't IRHeader");

  if (!HeaderOp->HasX87) {
    // If there is no x87 in this, just early exit.
    return;
  }

  // Initialize IREmit member
  IREmit = Emit;
  IR = &CurrentIR;

  // Run optimization proper
  for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
    auto BlockIROp = BlockHeader->CW<FEXCore::IR::IROp_CodeBlock>();
    // Each time we deal with a new block we need to start over.
    // The optimization should run per-block
    Reset();

    IREmit->SetCurrentCodeBlock(BlockNode);
    for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
      if (!LoweredX87(IROp->Op)) {
        continue;
      }
      IREmit->SetWriteCursor(CodeNode);
      switch (IROp->Op) {
      case OP_F80ADDSTACK: {
        const auto* Op = IROp->C<IROp_F80AddStack>();
        HandleBinopStack(OP_VFADD, true, OP_F80ADD, Op->SrcStack1, Op->SrcStack1, Op->SrcStack2);
        break;
      }

      case OP_F80SUBSTACK: {
        const auto* Op = IROp->C<IROp_F80SubStack>();
        HandleBinopStack(OP_VFSUB, true, OP_F80SUB, Op->DstStack, Op->SrcStack1, Op->SrcStack2);
        break;
      }

      case OP_F80MULSTACK: {
        const auto* Op = IROp->C<IROp_F80MulStack>();
        HandleBinopStack(OP_VFMUL, true, OP_F80MUL, Op->SrcStack1, Op->SrcStack1, Op->SrcStack2);
        break;
      }

      case OP_F80DIVSTACK: {
        const auto* Op = IROp->C<IROp_F80DivStack>();
        HandleBinopStack(OP_VFDIV, true, OP_F80DIV, Op->DstStack, Op->SrcStack1, Op->SrcStack2);
        break;
      }

      case OP_F80FPREMSTACK: {
        HandleBinopStack(OP_F64FPREM, false, OP_F80FPREM, 0, 0, 1);
        break;
      }

      case OP_F80FPREM1STACK: {
        HandleBinopStack(OP_F64FPREM1, false, OP_F80FPREM1, 0, 0, 1);
        break;
      }

      case OP_F80SCALESTACK: {
        HandleBinopStack(OP_F64SCALE, false, OP_F80SCALE, 0, 0, 1);
        break;
      }

      case OP_F80FYL2XSTACK: {
        HandleBinopStack(OP_F64FYL2X, false, OP_F80FYL2X, 1, 0, 1);
        StackPop();
        break;
      }

      case OP_F80ATANSTACK: {
        HandleBinopStack(OP_F64ATAN, false, OP_F80ATAN, 1, 1, 0);
        StackPop();
        break;
      }

      case OP_F80ADDVALUE: {
        const auto* Op = IROp->C<IROp_F80AddValue>();
        HandleBinopValue(OP_VFADD, true, OP_F80ADD, 0, true, Op->SrcStack, CurrentIR.GetNode(Op->X80Src));
        break;
      }

      case OP_F80SUBRVALUE:
      case OP_F80SUBVALUE: {
        const auto* Op = IROp->C<IROp_F80SubValue>();
        HandleBinopValue(OP_VFSUB, true, OP_F80SUB, 0, true, Op->SrcStack, CurrentIR.GetNode(Op->X80Src), IROp->Op == OP_F80SUBRVALUE);
        break;
      }

      case OP_F80DIVRVALUE:
      case OP_F80DIVVALUE: {
        const auto* Op = IROp->C<IROp_F80DivValue>();
        HandleBinopValue(OP_VFDIV, true, OP_F80DIV, 0, true, Op->SrcStack, CurrentIR.GetNode(Op->X80Src), IROp->Op == OP_F80DIVRVALUE);
        break;
      }

      case OP_F80MULVALUE: {
        const auto* Op = IROp->C<IROp_F80MulValue>();
        HandleBinopValue(OP_VFMUL, true, OP_F80MUL, 0, true, Op->SrcStack, CurrentIR.GetNode(Op->X80Src));
        break;
      }

      case OP_F80SQRTSTACK: {
        HandleUnop(OP_VFSQRT, true, OP_F80SQRT);
        break;
      }

      case OP_F80SINSTACK: {
        HandleUnop(OP_F64SIN, false, OP_F80SIN);
        break;
      }

      case OP_F80COSSTACK: {
        HandleUnop(OP_F64COS, false, OP_F80COS);
        break;
      }

      case OP_F80F2XM1STACK: {
        HandleUnop(OP_F64F2XM1, false, OP_F80F2XM1);
        break;
      }


      case OP_F80PTANSTACK: {
        HandleUnop(OP_F64TAN, false, OP_F80TAN);
        Ref OneConst {};
        if (ReducedPrecisionMode) {
          OneConst = IREmit->_VCastFromGPR(OpSize::i64Bit, OpSize::i64Bit, GetConstant(0x3FF0000000000000));
        } else {
          OneConst = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, NamedVectorConstant::NAMED_VECTOR_X87_ONE);
        }

        if (SlowPath) {
          UpdateTopForPush_Slow();
          StoreStackValueAtOffset_Slow(OneConst);
        } else {
          StackData.push(StackMemberInfo {OneConst});
        }
        break;
      }

      case OP_F80SINCOSSTACK: {
        Ref St0 = LoadStackValue();

        Ref SinValue {};
        Ref CosValue {};

        if (ReducedPrecisionMode) {
          SinValue = IREmit->_F64SIN(St0);
          CosValue = IREmit->_F64COS(St0);
        }
#ifdef VIXL_SIMULATOR
        else if (DisableVixlIndirectCalls() == 0) {
          SinValue = IREmit->_F80SIN(St0);
          CosValue = IREmit->_F80COS(St0);
        }
#endif
        else {
          SinValue = IREmit->_AllocateFPR(OpSize::i128Bit, OpSize::i128Bit);
          CosValue = IREmit->_AllocateFPR(OpSize::i128Bit, OpSize::i128Bit);
          IREmit->_F80SINCOS(St0, SinValue, CosValue);
        }

        // Push values
        if (SlowPath) {
          StoreStackValueAtOffset_Slow(SinValue, 0, false);
          UpdateTopForPush_Slow();
          StoreStackValueAtOffset_Slow(CosValue, 0, true);
        } else {
          StackData.setTop(StackMemberInfo {SinValue});
          StackData.push(StackMemberInfo {CosValue});
        }
        break;
      }

      case OP_INITSTACK: {
        StackData.clear();
        InvalidateCachedRegs();
        break;
      }

      case OP_INVALIDATESTACK: {
        const auto* Op = IROp->C<IROp_ReadStackValue>();
        auto Offset = Op->StackLocation;

        if (Offset != 0xff) { // invalidate single offset
          if (SlowPath) {
            SetX87ValidTag(Offset, false);
          } else {
            StackData.setTagInvalid(Offset);
          }
        } else { // invalidate all
          if (SlowPath) {
            TopValidCache.fill(StackSlot::INVALID);
            FlushValidPending = true;
          } else {
            for (size_t i = 0; i < StackData.size; i++) {
              StackData.setTagInvalid(i);
            }
          }
        }
        break;
      }

      case OP_PUSHSTACK: {
        const auto* Op = IROp->C<IROp_PushStack>();
        auto* SourceNode = CurrentIR.GetNode(Op->X80Src);

        if (SlowPath) {
          UpdateTopForPush_Slow();
          StoreStackValueAtOffset_Slow(SourceNode);
        } else {
          auto* SourceNode = CurrentIR.GetNode(Op->X80Src);
          if (Op->OriginalValue.IsInvalid()) {
            // No original value to track - just push the converted data
            StackData.push(StackMemberInfo {SourceNode});
          } else {
            auto* OriginalNode = CurrentIR.GetNode(Op->OriginalValue);
            StackData.push(StackMemberInfo {SourceNode, OriginalNode, Op->LoadSize});
          }
        }
        break;
      }

      case OP_COPYPUSHSTACK: {
        const auto* Op = IROp->C<IROp_CopyPushStack>();
        auto Offset = Op->StackLocation;
        auto Value = MigrateToSlowPath_IfInvalid(Offset);

        if (SlowPath) {
          Ref St0 = LoadStackValueAtOffset_Slow(Offset);
          UpdateTopForPush_Slow();
          StoreStackValueAtOffset_Slow(St0);
        } else {
          StackData.push(*Value);
        }
        break;
      }

      case OP_READSTACKVALUE: {
        const auto* Op = IROp->C<IROp_ReadStackValue>();
        auto Offset = Op->StackLocation;
        Ref NewValue = LoadStackValue(Offset);

        IREmit->ReplaceUsesWithAfter(CodeNode, NewValue, CodeNode);
        break;
      }

      case OP_STACKVALIDTAG: {
        // Returns 0 if value is valid and 1 otherwise.
        const auto* Op = IROp->C<IROp_StackValidTag>();
        auto Offset = Op->StackLocation;
        auto Value = MigrateToSlowPath_IfInvalid(Offset);

        Ref Tag {};
        if (SlowPath) {
          Tag = GetX87ValidTag_Slow(Offset);
        } else {
          Tag = Value ? GetConstant(1) : GetConstant(0);
        }

        IREmit->ReplaceUsesWithAfter(CodeNode, Tag, CodeNode);
        break;
      }

      case OP_STORESTACKMEM: {
        const auto* Op = IROp->C<IROp_StoreStackMem>();
        const auto& Value = MigrateToSlowPath_IfInvalid();
        Ref StackNode = SlowPath ? LoadStackValueAtOffset_Slow() : Value->StackDataNode;
        Ref AddrNode = CurrentIR.GetNode(Op->Addr);
        Ref Offset = CurrentIR.GetNode(Op->Offset);
        OpSize Align = Op->Align;
        MemOffsetType OffsetType = Op->OffsetType;
        uint8_t OffsetScale = Op->OffsetScale;

        // On the fast path we can optimize memory copies.
        // If we are doing:
        // fld dword [rax]
        // fst dword [rbx]
        // We can optimize this to:
        // ldr w2, [x0]
        // str w2, [x1]
        // or similar. As long as the source size and dest size are one and the same.
        // This will avoid any conversions between source and stack element size and conversion back.
        OpSize StoreSize = Op->StoreSize;
        LOGMAN_THROW_A_FMT(Op->StoreSize == OpSize::i32Bit || Op->StoreSize == OpSize::i64Bit || Op->StoreSize == OpSize::f80Bit,
                           "Invalid store size in x87 store stack mem");
        if (!SlowPath && Value->Source && Value->Source->Size == StoreSize) {
          Ref SourceValue = Value->Source->Node;
          if (Op->StoreSize == OpSize::f80Bit) {
            Store80BitToMem(Op, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale);
          } else {
            IREmit->_StoreMemFPR(StoreSize, SourceValue, AddrNode, Offset, Align, OffsetType, OffsetScale);
          }
          break;
        }

        if (ReducedPrecisionMode) {
          StoreStackMem_Reduced_Helper(Op, StackNode);
          break;
        }

        StoreStackMem_Helper(Op, StackNode);
        break;
      }

      case OP_STORESTACKTOSTACK: { // stores top of stack in another place in stack.
        const auto* Op = IROp->C<IROp_StoreStackToStack>();
        auto Offset = Op->StackLocation;

        if (Offset != 0) {
          auto Value = MigrateToSlowPath_IfInvalid();

          // Need to store st0 to stack location - basically a copy.
          if (SlowPath) {
            StoreStackValueAtOffset_Slow(LoadStackValueAtOffset_Slow(), Offset);
          } else {
            StackData.setTop(*Value, Offset);
          }
        }
        break;
      }
      case OP_POPSTACKDESTROY: {
        if (SlowPath) {
          SetX87ValidTag(0, false);
        }
        StackPop();
        break;
      }

      case OP_F80STACKXCHANGE: {
        const auto* Op = IROp->C<IROp_F80StackXchange>();
        auto Offset = Op->SrcStack;

        if (Offset == 0) {
          // No-op
          break;
        }

        const auto [ValidTop, StackMemberTop] = StackData.top(0);
        const auto [ValidOffset, StackMemberOffset] = StackData.top(Offset);

        if (ValidTop != StackSlot::VALID || ValidOffset != StackSlot::VALID) {
          // Slow path: do actual memory operations
          Ref ValueTop = LoadStackValue();
          Ref ValueOffset = LoadStackValue(Offset);
          StoreStackValue(ValueOffset);
          StoreStackValue(ValueTop, Offset);
        } else {
          // Fast path: swap complete StackMemberInfo preserving Source metadata
          StackData.setTop(StackMemberOffset, 0);
          StackData.setTop(StackMemberTop, Offset);
        }
        break;
      }

      case OP_F80STACKCHANGESIGN: {
        Ref Value = LoadStackValue();

        // We need a couple of intermediate instructions to change the sign
        // of a value
        Ref ResultNode {};
        if (ReducedPrecisionMode) {
          ResultNode = IREmit->_VFNeg(OpSize::i64Bit, OpSize::i64Bit, Value);
        } else {
          Ref HelperNode = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, IR::NamedVectorConstant::NAMED_VECTOR_F80_SIGN_MASK);
          ResultNode = IREmit->_VXor(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode);
        }
        StoreStackValue(ResultNode);
        break;
      }

      case OP_F80STACKABS: {
        Ref Value = LoadStackValue();

        Ref ResultNode {};
        if (ReducedPrecisionMode) {
          ResultNode = IREmit->_VFAbs(OpSize::i64Bit, OpSize::i64Bit, Value);
        } else {
          // Intermediate insts
          Ref HelperNode = IREmit->_LoadNamedVectorConstant(OpSize::i128Bit, IR::NamedVectorConstant::NAMED_VECTOR_F80_SIGN_MASK);
          ResultNode = IREmit->_VAndn(OpSize::i128Bit, OpSize::i8Bit, Value, HelperNode);
        }
        StoreStackValue(ResultNode);
        break;
      }

      case OP_F80CMPSTACK: {
        const auto* Op = IROp->C<IROp_F80CmpStack>();
        auto Offset = Op->SrcStack;
        Ref StackValue1 = LoadStackValue();
        Ref StackValue2 = LoadStackValue(Offset);

        Ref CmpNode {};
        if (ReducedPrecisionMode) {
          CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackValue1, StackValue2);
        } else {
          CmpNode = IREmit->_F80Cmp(StackValue1, StackValue2);
        }

        IREmit->ReplaceUsesWithAfter(CodeNode, CmpNode, CodeNode);
        break;
      }
      case OP_F80STACKTEST: {
        const auto* Op = IROp->C<IROp_F80StackTest>();
        auto Offset = Op->SrcStack;
        auto StackNode = LoadStackValue(Offset);
        Ref ZeroConst = IREmit->_VCastFromGPR(ReducedPrecisionMode ? OpSize::i64Bit : OpSize::i128Bit, OpSize::i64Bit, GetConstant(0));

        Ref CmpNode {};
        if (ReducedPrecisionMode) {
          CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackNode, ZeroConst);
        } else {
          CmpNode = IREmit->_F80Cmp(StackNode, ZeroConst);
        }
        IREmit->ReplaceUsesWithAfter(CodeNode, CmpNode, CodeNode);
        break;
      }


      case OP_F80CMPVALUE: {
        const auto* Op = IROp->C<IROp_F80CmpValue>();
        const auto& Value = CurrentIR.GetNode(Op->X80Src);
        auto StackNode = LoadStackValue();

        Ref CmpNode {};
        if (ReducedPrecisionMode) {
          CmpNode = IREmit->_FCmp(OpSize::i64Bit, StackNode, Value);
        } else {
          CmpNode = IREmit->_F80Cmp(StackNode, Value);
        }
        IREmit->ReplaceUsesWithAfter(CodeNode, CmpNode, CodeNode);
        break;
      }

      case OP_SYNCSTACKTOSLOW: {
        // This synchronizes stack values but doesn't necessarily moves us off the FastPath!
        Ref NewTop = SynchronizeStackValues();
        FlushCachedRegs();
        IREmit->ReplaceUsesWithAfter(CodeNode, NewTop, CodeNode);
        break;
      }

      case OP_STACKFORCESLOW: {
        MigrateToSlowPathIf(true);
        InvalidateCachedRegs();
        break;
      }

      case OP_INCSTACKTOP: {
        if (SlowPath) {
          UpdateTopForPop_Slow();
        } else {
          StackData.rotate(false);
        }
        break;
      }

      case OP_DECSTACKTOP: {
        if (SlowPath) {
          UpdateTopForPush_Slow();
        } else {
          StackData.rotate(true);
        }
        break;
      }

      case OP_F80ROUNDSTACK: {
        Ref St0 = LoadStackValue();

        Ref Value {};
        if (ReducedPrecisionMode) {
          Value = IREmit->_Vector_FToI(OpSize::i64Bit, OpSize::i64Bit, St0, RoundMode::Host);
        } else {
          Value = IREmit->_F80Round(St0);
        }
        StoreStackValue(Value);
        break;
      }

      case OP_F80VBSLSTACK: {
        const auto* Op = IROp->C<IROp_F80VBSLStack>();

        auto StackOffset1 = Op->SrcStack1;
        auto StackOffset2 = Op->SrcStack2;
        Ref Value1 = LoadStackValue(StackOffset1);
        Ref Value2 = LoadStackValue(StackOffset2);

        Ref StackNode = IREmit->_VBSL(OpSize::i128Bit, CurrentIR.GetNode(Op->VectorMask), Value1, Value2);
        StoreStackValue(StackNode, 0, StackOffset1 && StackOffset2);
        break;
      }

      default: LOGMAN_THROW_A_FMT(false, "IROp was expected to be lowered");
      }
      IREmit->Remove(CodeNode);
    }

    auto Last = CurrentIR.at(BlockIROp->Last);
    --Last;
    auto [LastCodeNode, LastIROp] = Last();
    LOGMAN_THROW_A_FMT(IsBlockExit(LastIROp->Op), "must be exit");
    IREmit->SetWriteCursorBefore(LastCodeNode);
    SynchronizeStackValues();
    FlushCachedRegs();
  }

  return;
}

fextl::unique_ptr<Pass> CreateX87StackOptimizationPass(const FEXCore::HostFeatures& Features, OpSize GPROpSize) {
  return fextl::make_unique<X87StackOptimization>(Features, GPROpSize);
}
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/Passes.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/memory.h>

namespace FEXCore {
class CPUIDEmu;
struct HostFeatures;
} // namespace FEXCore

namespace FEXCore::Utils {
class IntrusivePooledAllocator;
}

namespace FEXCore::IR {
class Pass;
class RegisterAllocationPass;

fextl::unique_ptr<FEXCore::IR::Pass> CreateDeadFlagCalculationEliminination();
fextl::unique_ptr<FEXCore::IR::RegisterAllocationPass> CreateRegisterAllocationPass(const FEXCore::CPUIDEmu* CPUID);
fextl::unique_ptr<FEXCore::IR::Pass> CreateX87StackOptimizationPass(const FEXCore::HostFeatures&, OpSize GPROpSize);

namespace Validation {
  fextl::unique_ptr<FEXCore::IR::Pass> CreateIRValidation();
} // namespace Validation

namespace Debug {
  fextl::unique_ptr<FEXCore::IR::Pass> CreateIRDumper();
}
} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Interface/IR/RegisterAllocationData.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "IR.h"
#include <cstdint>

namespace FEXCore::IR {

union PhysicalRegister {
  uint8_t Raw;
  struct {
    // 32 maximum physical registers
    uint8_t Reg : 5;
    // 8 Maximum classes
    uint8_t Class : 3;
  };

  bool operator==(const PhysicalRegister& Other) const {
    return Raw == Other.Raw;
  }

  PhysicalRegister(RegClass Class, uint8_t Reg)
    : Reg(Reg)
    , Class(uint8_t(Class)) {}

  PhysicalRegister(OrderedNodeWrapper Arg)
    : Raw(Arg.GetImmediate()) {}

  PhysicalRegister(Ref Node)
    : Raw(Node->Reg) {}

  RegClass AsRegClass() const {
    return RegClass {Class};
  }

  static const PhysicalRegister Invalid() {
    return PhysicalRegister(RegClass::Invalid, 0);
  }

  bool IsInvalid() const {
    static_assert(uint8_t(RegClass::Invalid) == 0);
    return Raw == 0;
  }
};

static_assert(sizeof(PhysicalRegister) == 1);

} // namespace FEXCore::IR


================================================
FILE: FEXCore/Source/Utils/Allocator/64BitAllocator.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Utils/Allocator/FlexBitSet.h"
#include "Utils/Allocator/HostAllocator.h"
#include "Utils/Allocator/IntrusiveArenaAllocator.h"
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXHeaderUtils/Syscalls.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <array>
#include <cassert>
#include <cerrno>
#include <cstddef>
#include <cstdint>
#include <list>
#include <memory>
#include <mutex>
#include <new>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <sys/user.h>
#include <type_traits>
#include <utility>

namespace Alloc::OSAllocator {

thread_local FEXCore::Core::InternalThreadState* TLSThread {};

class OSAllocator_64Bit final : public Alloc::HostAllocator {
public:
  OSAllocator_64Bit();
  OSAllocator_64Bit(fextl::vector<FEXCore::Allocator::MemoryRegion>& Regions);

  virtual ~OSAllocator_64Bit();
  void* AllocateSlab(size_t Size) override {
    return nullptr;
  }
  void DeallocateSlab(void* Ptr, size_t Size) override {}

  void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override;
  int Munmap(void* addr, size_t length) override;

  void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) override {
    AllocationMutex.lock();
  }

  void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) override {
    if (Child) {
      AllocationMutex.StealAndDropActiveLocks();
    } else {
      AllocationMutex.unlock();
    }
  }

private:
  // Upper bound is the maximum virtual address space of the host processor
  uintptr_t UPPER_BOUND = (1ULL << 57);

  // Lower bound is the starting of the range just past the lower 32bits
  constexpr static uintptr_t LOWER_BOUND = 0x1'0000'0000ULL;

  uintptr_t UPPER_BOUND_PAGE = UPPER_BOUND / FEXCore::Utils::FEX_PAGE_SIZE;
  constexpr static uintptr_t LOWER_BOUND_PAGE = LOWER_BOUND / FEXCore::Utils::FEX_PAGE_SIZE;

  struct ReservedVMARegion {
    uintptr_t Base;
    // Could be number of pages if we want to pack this in to 12 bytes
    uint64_t RegionSize;
  };

  bool MergeReservedRegionIfPossible(ReservedVMARegion* Region, uintptr_t NextPtr, uint64_t NextSize) {
    constexpr uint64_t MaxReservedRegionSize = 64ULL * 1024 * 1024 * 1024; // 64GB
    uintptr_t RegionEnd = Region->Base + Region->RegionSize;
    uint64_t NewRegionSize = Region->RegionSize + NextSize;
    if (RegionEnd == NextPtr && NewRegionSize <= MaxReservedRegionSize) {
      // Append the contiguous region
      Region->RegionSize = NewRegionSize;
      return true;
    }
    return false;
  }

  struct LiveVMARegion {
    ReservedVMARegion* SlabInfo;
    uint64_t FreeSpace {};
    uint64_t NumManagedPages {};
    uint32_t LastPageAllocation {};
    bool HadMunmap {};

    // Align UsedPages so it pads to the next page.
    // Necessary to take advantage of madvise zero page pooling.
    using FlexBitElementType = uint64_t;
    alignas(FEXCore::Utils::FEX_PAGE_SIZE) FEXCore::FlexBitSet<FlexBitElementType> UsedPages;

    // This returns the size of the LiveVMARegion in addition to the flex set that tracks the used data
    // The LiveVMARegion lives at the start of the VMA region which means on initialization we need to set that
    // tracked ranged as used immediately
    static size_t GetFEXManagedVMARegionSize(size_t Size) {
      // One element per page

      // 0x10'0000'0000 bytes
      // 0x100'0000 Pages
      // 1 bit per page for tracking means 0x20'0000 (Pages / 8) bytes of flex space
      // Which is 2MB of tracking
      const uint64_t NumElements = Size >> FEXCore::Utils::FEX_PAGE_SHIFT;
      return sizeof(LiveVMARegion) + FEXCore::FlexBitSet<FlexBitElementType>::SizeInBytes(NumElements);
    }

    static void InitializeVMARegionUsed(LiveVMARegion* Region, size_t AdditionalSize) {
      size_t SizeOfLiveRegion =
        FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(Region->SlabInfo->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE);
      size_t SizePlusManagedData = SizeOfLiveRegion + AdditionalSize;

      Region->FreeSpace = Region->SlabInfo->RegionSize - SizePlusManagedData;

      size_t NumManagedPages = SizePlusManagedData >> FEXCore::Utils::FEX_PAGE_SHIFT;
      size_t ManagedSize = NumManagedPages << FEXCore::Utils::FEX_PAGE_SHIFT;

      // Use madvise to set the full tracking region to zero.
      // This ensures unused pages are zero, while not having the backing pages consuming memory.
      ::madvise(Region->UsedPages.Memory + ManagedSize, (Region->SlabInfo->RegionSize >> FEXCore::Utils::FEX_PAGE_SHIFT) - ManagedSize,
                MADV_DONTNEED);

      // Use madvise to claim WILLNEED on the beginning pages for initial state tracking.
      // Improves performance of the following MemClear by not doing a page level fault dance for data necessary to track >170TB of used pages.
      ::madvise(Region->UsedPages.Memory, ManagedSize, MADV_WILLNEED);

      // Set our reserved pages
      Region->UsedPages.MemSet(NumManagedPages);
      Region->LastPageAllocation = NumManagedPages;
      Region->NumManagedPages = NumManagedPages;
    }
  };

  static_assert(sizeof(LiveVMARegion) == FEXCore::Utils::FEX_PAGE_SIZE, "Needs to be the size of a page");

  static_assert(std::is_trivially_copyable<LiveVMARegion>::value, "Needs to be trivially copyable");
  static_assert(offsetof(LiveVMARegion, UsedPages) == sizeof(LiveVMARegion), "FlexBitSet needs to be at the end");

  using ReservedRegionListType = fex_pmr::list<ReservedVMARegion*>;
  using LiveRegionListType = fex_pmr::list<LiveVMARegion*>;
  ReservedRegionListType* ReservedRegions {};
  LiveRegionListType* LiveRegions {};

  Alloc::ForwardOnlyIntrusiveArenaAllocator* ObjectAlloc {};
  FEXCore::ForkableUniqueMutex AllocationMutex;
  void DetermineVASize();

  LiveVMARegion* MakeRegionActive(ReservedRegionListType::iterator ReservedIterator, uint64_t UsedSize) {
    ReservedVMARegion* ReservedRegion = *ReservedIterator;

    ReservedRegions->erase(ReservedIterator);

    // mprotect the new region we've allocated
    size_t SizeOfLiveRegion =
      FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(ReservedRegion->RegionSize), FEXCore::Utils::FEX_PAGE_SIZE);
    size_t SizePlusManagedData = UsedSize + SizeOfLiveRegion;

    auto Res = mprotect(reinterpret_cast<void*>(ReservedRegion->Base), SizePlusManagedData, PROT_READ | PROT_WRITE);
    LOGMAN_THROW_A_FMT(Res != -1, "Couldn't mprotect region: {} '{}' Likely occurs when running out of memory or Maximum VMAs", errno,
                       strerror(errno));

    FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(ReservedRegion->Base), SizePlusManagedData);
    LiveVMARegion* LiveRange = new (reinterpret_cast<void*>(ReservedRegion->Base)) LiveVMARegion();

    // Copy over the reserved data
    LiveRange->SlabInfo = ReservedRegion;

    // Initialize VMA
    LiveVMARegion::InitializeVMARegionUsed(LiveRange, UsedSize);

    // Add to our active tracked ranges
    auto LiveIter = LiveRegions->emplace_back(LiveRange);
    return LiveIter;
  }

  void AllocateMemoryRegions(fextl::vector<FEXCore::Allocator::MemoryRegion>& Ranges);
  LiveVMARegion* FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd);
};

void OSAllocator_64Bit::DetermineVASize() {
  size_t Bits = FEXCore::Allocator::DetermineVASize();
  uintptr_t Size = 1ULL << Bits;

  UPPER_BOUND = Size;

#if ARCHITECTURE_x86_64 // Last page cannot be allocated on x86
  UPPER_BOUND -= FEXCore::Utils::FEX_PAGE_SIZE;
#endif

  UPPER_BOUND_PAGE = UPPER_BOUND / FEXCore::Utils::FEX_PAGE_SIZE;
}

OSAllocator_64Bit::LiveVMARegion* OSAllocator_64Bit::FindLiveRegionForAddress(uintptr_t Addr, uintptr_t AddrEnd) {
  LiveVMARegion* LiveRegion {};

  // Check active slabs to see if we can fit this
  for (auto it = LiveRegions->begin(); it != LiveRegions->end(); ++it) {
    uintptr_t RegionBegin = (*it)->SlabInfo->Base;
    uintptr_t RegionEnd = RegionBegin + (*it)->SlabInfo->RegionSize;

    if (Addr >= RegionBegin && AddrEnd < RegionEnd) {
      LiveRegion = *it;
      // Leave our loop
      break;
    }
  }

  // Couldn't find an active region that fit
  // Check reserved regions
  if (!LiveRegion) {
    // Didn't have a slab that fit this range
    // Check our reserved regions to see if we have one that fits
    for (auto it = ReservedRegions->begin(); it != ReservedRegions->end(); ++it) {
      ReservedVMARegion* ReservedRegion = *it;
      uintptr_t RegionEnd = ReservedRegion->Base + ReservedRegion->RegionSize;
      if (Addr >= ReservedRegion->Base && AddrEnd < RegionEnd) {
        // Found one, let's make it active
        LiveRegion = MakeRegionActive(it, 0);
        break;
      }
    }
  }

  return LiveRegion;
}

void* OSAllocator_64Bit::Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
  if (addr != 0 && addr < reinterpret_cast<void*>(LOWER_BOUND)) {
    // If we are asked to allocate something outside of the 64-bit space
    // Then we need to just hand this to the OS
    return ::mmap(addr, length, prot, flags, fd, offset);
  }

  uint64_t Addr = reinterpret_cast<uint64_t>(addr);
  // Addr must be page aligned
  if (Addr & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return reinterpret_cast<void*>(-EINVAL);
  }

  // If FD is provided then offset must also be page aligned
  if (fd != -1 && offset & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return reinterpret_cast<void*>(-EINVAL);
  }

  // 64bit address overflow
  if (Addr + length < Addr) {
    return reinterpret_cast<void*>(-EOVERFLOW);
  }

  bool Fixed = (flags & MAP_FIXED) || (flags & MAP_FIXED_NOREPLACE);
  length = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);

  uint64_t AddrEnd = Addr + length;
  size_t NumberOfPages = length / FEXCore::Utils::FEX_PAGE_SIZE;

  // This needs a mutex to be thread safe
  auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(AllocationMutex, TLSThread);

  uint64_t AllocatedOffset {};
  LiveVMARegion* LiveRegion {};

  if (Fixed || Addr != 0) {
    LiveRegion = FindLiveRegionForAddress(Addr, AddrEnd);
  }

again:

  struct RangeResult final {
    LiveVMARegion* RegionInsertedInto;
    void* Ptr;
  };

  auto CheckIfRangeFits = [&AllocatedOffset](LiveVMARegion* Region, uint64_t length, int prot, int flags, int fd, off_t offset,
                                             uint64_t StartingPosition = 0) -> RangeResult {
    uint64_t AllocatedPage {~0ULL};
    uint64_t NumberOfPages = length >> FEXCore::Utils::FEX_PAGE_SHIFT;

    if (Region->FreeSpace >= length) {
      uint64_t LastAllocation =
        StartingPosition ? (StartingPosition - Region->SlabInfo->Base) >> FEXCore::Utils::FEX_PAGE_SHIFT : Region->LastPageAllocation;
      size_t RegionNumberOfPages = Region->SlabInfo->RegionSize >> FEXCore::Utils::FEX_PAGE_SHIFT;


      if (Region->HadMunmap) {
        // Backward scan
        // We need to do a backward scan first to fill any holes
        // Otherwise we will very quickly run out of VMA regions (65k maximum)
        auto SearchResult = Region->UsedPages.BackwardScanForRange<true>(LastAllocation, NumberOfPages, Region->NumManagedPages);

        AllocatedPage = SearchResult.FoundElement;

        // If we didn't even have a one page free in the backward search, then unclaim HadMunmap.
        // Switching over to default forward search.
        if (SearchResult.FoundElement == ~0ULL && !SearchResult.FoundHole) {
          Region->HadMunmap = false;
        }
      }

      // Foward Scan
      if (AllocatedPage == ~0ULL) {
        auto SearchResult = Region->UsedPages.ForwardScanForRange<true>(LastAllocation, NumberOfPages, RegionNumberOfPages);
        AllocatedPage = SearchResult.FoundElement;
      }

      if (AllocatedPage != ~0ULL) {
        AllocatedOffset = Region->SlabInfo->Base + AllocatedPage * FEXCore::Utils::FEX_PAGE_SIZE;

        // We need to setup protections for this
        void* MMapResult = ::mmap(reinterpret_cast<void*>(AllocatedOffset), length, prot, (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, fd, offset);

        if (MMapResult == MAP_FAILED) {
          return RangeResult {Region, reinterpret_cast<void*>(-errno)};
        }
        return RangeResult {Region, MMapResult};
      }
    }

    return {};
  };

  if (Fixed) {
    // Found a region let's allocate to it
    if (LiveRegion) {
      // Found a slab that fits this
      if (flags & MAP_FIXED_NOREPLACE) {
        auto Fits = CheckIfRangeFits(LiveRegion, length, prot, flags, fd, offset, Addr);
        if (Fits.RegionInsertedInto && Fits.Ptr == reinterpret_cast<void*>(Addr)) {
          // We fit correctly
          AllocatedOffset = Addr;
        } else {
          // Intersected with something that already existed
          return reinterpret_cast<void*>(-EEXIST);
        }
      } else {
        // We need to mmap the file to this location
        void* MMapResult = ::mmap(reinterpret_cast<void*>(Addr), length, prot, (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED, fd, offset);

        if (MMapResult == MAP_FAILED) {
          return reinterpret_cast<void*>(-errno);
        }

        AllocatedOffset = Addr;
      }
      // Fall through to live region tracking
    }
  } else {
    // Check our active slabs to see if we can fit the allocation
    // Slightly different than fixed since it doesn't need exact placement
    if (LiveRegion && Addr != 0) {
      // We found a LiveRegion that could hold this address. Let's try to place it
      // Check if this area is free
      auto Fits = CheckIfRangeFits(LiveRegion, length, prot, flags, fd, offset, Addr);
      if (Fits.RegionInsertedInto && Fits.Ptr == reinterpret_cast<void*>(Addr)) {
        // We fit correctly
        AllocatedOffset = Addr;
      } else {
        // Couldn't fit
        // We can continue past this point still
        LiveRegion = nullptr;
      }
    }

    if (!LiveRegion) {
      for (auto it = LiveRegions->begin(); it != LiveRegions->end(); ++it) {
        auto Fits = CheckIfRangeFits(*it, length, prot, flags, fd, offset);
        if (Fits.RegionInsertedInto && Fits.Ptr == reinterpret_cast<void*>(AllocatedOffset)) {
          // We fit correctly
          LiveRegion = Fits.RegionInsertedInto;
          break;
        }

        // Couldn't fit but mmap gave us an error
        if (!Fits.RegionInsertedInto && Fits.Ptr) {
          return Fits.Ptr;
        }

        // nullptr on both means no error and couldn't fit
      }
    }

    if (!LiveRegion) {
      // Couldn't find a fit in the live regions
      // Allocate a new reserved region
      size_t lengthOfLiveRegion = FEXCore::AlignUp(LiveVMARegion::GetFEXManagedVMARegionSize(length), FEXCore::Utils::FEX_PAGE_SIZE);
      size_t lengthPlusManagedData = length + lengthOfLiveRegion;
      for (auto it = ReservedRegions->begin(); it != ReservedRegions->end(); ++it) {
        if ((*it)->RegionSize >= lengthPlusManagedData) {
          MakeRegionActive(it, 0);
          goto again;
        }
      }
    }
  }

  if (LiveRegion) {
    // Mark the pages as used
    uintptr_t RegionBegin = LiveRegion->SlabInfo->Base;
    uintptr_t MappedBegin = (AllocatedOffset - RegionBegin) >> FEXCore::Utils::FEX_PAGE_SHIFT;
    size_t PagesSet {};

    for (size_t i = 0; i < NumberOfPages; ++i) {
      PagesSet += LiveRegion->UsedPages.TestAndSet(MappedBegin + i) == false;
    }

    // Change our last allocation region
    LiveRegion->LastPageAllocation = MappedBegin + NumberOfPages;
    LiveRegion->FreeSpace -= PagesSet * FEXCore::Utils::FEX_PAGE_SIZE;
    LOGMAN_THROW_A_FMT(LiveRegion->FreeSpace <= LiveRegion->SlabInfo->RegionSize,
                       "Corrupt LiveRegion free space! 0x{:x} > 0x{:x}. After allocating 0x{:x} (0x{:x} overlapped)", LiveRegion->FreeSpace,
                       LiveRegion->SlabInfo->RegionSize, length, PagesSet);
  }

  if (!AllocatedOffset) {
    AllocatedOffset = -ENOMEM;
  }
  return reinterpret_cast<void*>(AllocatedOffset);
}

int OSAllocator_64Bit::Munmap(void* addr, size_t length) {
  if (addr < reinterpret_cast<void*>(LOWER_BOUND)) {
    // If we are asked to allocate something outside of the 64-bit space
    // Then we need to just hand this to the OS
    return ::munmap(addr, length);
  }

  uint64_t Addr = reinterpret_cast<uint64_t>(addr);

  if (Addr & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return -EINVAL;
  }

  if (length & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return -EINVAL;
  }

  if (Addr + length < Addr) {
    return -EOVERFLOW;
  }

  // This needs a mutex to be thread safe
  auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(AllocationMutex, TLSThread);

  length = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);

  uintptr_t PtrBegin = reinterpret_cast<uintptr_t>(addr);
  uintptr_t PtrEnd = PtrBegin + length;
  // Walk all of the live ranges and find this slab then delete it
  for (auto it = LiveRegions->begin(); it != LiveRegions->end(); ++it) {
    uintptr_t RegionBegin = (*it)->SlabInfo->Base;
    uintptr_t RegionEnd = RegionBegin + (*it)->SlabInfo->RegionSize;

    if (RegionBegin <= PtrBegin && RegionEnd > PtrEnd) {
      // Live region fully encompasses slab range

      uint64_t FreedPages {};
      uint32_t SlabPageBegin = (PtrBegin - RegionBegin) >> FEXCore::Utils::FEX_PAGE_SHIFT;
      uint64_t PagesToFree = length >> FEXCore::Utils::FEX_PAGE_SHIFT;

      for (size_t i = 0; i < PagesToFree; ++i) {
        FreedPages += (*it)->UsedPages.TestAndClear(SlabPageBegin + i) ? 1 : 0;
      }

      if (FreedPages != 0) {
        // If we were contiuous freeing then make sure to give back the physical address space
        // If the region was locked then madvise won't remove the physical backing
        // This woul be a bug in the frontend application
        // So be careful with mlock/munlock
        ::madvise(addr, length, MADV_DONTNEED);
        ::mmap(addr, length, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
      }

      (*it)->FreeSpace += FreedPages * FEXCore::Utils::FEX_PAGE_SIZE;

      // Set the last allocated page to the minimum of last page allocation or this slab
      // This will let us more quickly fill holes
      (*it)->LastPageAllocation = std::min((*it)->LastPageAllocation, SlabPageBegin);

      (*it)->HadMunmap = true;

      // XXX: Move region back to reserved list
      return 0;
    }
  }

  // If it didn't match at all then no error
  return 0;
}

void OSAllocator_64Bit::AllocateMemoryRegions(fextl::vector<FEXCore::Allocator::MemoryRegion>& Ranges) {
  // Need to allocate the ObjectAlloc up front. Find a region that is larger than our minimum size first.
  const size_t ObjectAllocSize = 64 * 1024 * 1024;

  for (auto& it : Ranges) {
    if (ObjectAllocSize > it.Size) {
      continue;
    }

    // Allocate up to 64 MiB the first allocation for an intrusive allocator
    mprotect(it.Ptr, ObjectAllocSize, PROT_READ | PROT_WRITE);

    // This enables the kernel to use transparent large pages in the allocator which can reduce memory pressure
    ::madvise(it.Ptr, ObjectAllocSize, MADV_HUGEPAGE);

    FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(it.Ptr), ObjectAllocSize);

    ObjectAlloc = new (it.Ptr) Alloc::ForwardOnlyIntrusiveArenaAllocator(it.Ptr, ObjectAllocSize);
    ReservedRegions = ObjectAlloc->new_construct(ReservedRegions, ObjectAlloc);
    LiveRegions = ObjectAlloc->new_construct(LiveRegions, ObjectAlloc);

    if (it.Size >= ObjectAllocSize) {
      // Modify region size
      it.Size -= ObjectAllocSize;
      (uint8_t*&)it.Ptr += ObjectAllocSize;
    }

    break;
  }

  if (!ObjectAlloc) {
    ERROR_AND_DIE_FMT("Couldn't allocate object allocator!");
  }

  for (auto [Ptr, AllocationSize] : Ranges) {
    // Skip using any regions that are <= two pages. FEX's VMA allocator requires two pages
    // for tracking data. So three pages are minimum for a single page VMA allocation.
    if (AllocationSize <= (FEXCore::Utils::FEX_PAGE_SIZE * 2)) {
      continue;
    }

    ReservedVMARegion* Region = ObjectAlloc->new_construct<ReservedVMARegion>();
    Region->Base = reinterpret_cast<uint64_t>(Ptr);
    Region->RegionSize = AllocationSize;
    ReservedRegions->emplace_back(Region);
  }
}


OSAllocator_64Bit::OSAllocator_64Bit() {
  DetermineVASize();

  auto Ranges = FEXCore::Allocator::StealMemoryRegion(LOWER_BOUND, UPPER_BOUND);

  AllocateMemoryRegions(Ranges);
}

OSAllocator_64Bit::OSAllocator_64Bit(fextl::vector<FEXCore::Allocator::MemoryRegion>& Regions) {
  AllocateMemoryRegions(Regions);
}

OSAllocator_64Bit::~OSAllocator_64Bit() {
  // This needs a mutex to be thread safe
  auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(AllocationMutex, TLSThread);

  // Walk the pages and deallocate
  // First walk the live regions
  for (auto it = LiveRegions->begin(); it != LiveRegions->end(); ++it) {
    ::munmap(reinterpret_cast<void*>((*it)->SlabInfo->Base), (*it)->SlabInfo->RegionSize);
  }

  // Now walk the reserved regions
  for (auto it = ReservedRegions->begin(); it != ReservedRegions->end(); ++it) {
    ::munmap(reinterpret_cast<void*>((*it)->Base), (*it)->RegionSize);
  }
}

fextl::unique_ptr<Alloc::HostAllocator> Create64BitAllocator() {
  return fextl::make_unique<OSAllocator_64Bit>();
}

template<class T>
struct alloc_delete : public std::default_delete<T> {
  void operator()(T* ptr) const {
    if (ptr) {
      const auto size = sizeof(T);
      const auto MinPage = FEXCore::AlignUp(size, FEXCore::Utils::FEX_PAGE_SIZE);

      std::destroy_at(ptr);
      ::munmap(ptr, MinPage);
    }
  }

  template<typename U>
  requires (std::is_base_of_v<U, T>)
  operator fextl::default_delete<U>() {
    return fextl::default_delete<U>();
  }
};

template<class T, class... Args>
requires (!std::is_array_v<T>)
fextl::unique_ptr<T> make_alloc_unique(FEXCore::Allocator::MemoryRegion& Base, Args&&... args) {
  const auto size = sizeof(T);
  const auto MinPage = FEXCore::AlignUp(size, FEXCore::Utils::FEX_PAGE_SIZE);
  if (Base.Size < size || MinPage != FEXCore::Utils::FEX_PAGE_SIZE) {
    ERROR_AND_DIE_FMT("Couldn't fit allocator in to page!");
  }

  auto ptr = ::mmap(Base.Ptr, MinPage, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
  if (ptr == MAP_FAILED) {
    ERROR_AND_DIE_FMT("Couldn't allocate memory region");
  }

  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(ptr), MinPage);

  // Remove the page from the base region.
  // Could be zero after this.
  Base.Size -= MinPage;
  Base.Ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Base.Ptr) + MinPage);

  auto Result = ::new (ptr) T(std::forward<Args>(args)...);
  return fextl::unique_ptr<T, alloc_delete<T>>(Result);
}

fextl::unique_ptr<Alloc::HostAllocator> Create64BitAllocatorWithRegions(fextl::vector<FEXCore::Allocator::MemoryRegion>& Regions) {
  // This is a bit tricky as we can't allocate memory safely except from the Regions provided. Otherwise we might overwrite memory pages we
  // don't own. Scan the memory regions and find the smallest one.
  FEXCore::Allocator::MemoryRegion& Smallest = Regions[0];
  for (auto& it : Regions) {
    if (it.Size <= Smallest.Size) {
      Smallest = it;
    }
  }

  return make_alloc_unique<OSAllocator_64Bit>(Smallest, Regions);
}

} // namespace Alloc::OSAllocator

namespace FEXCore::Allocator {
void RegisterTLSData(FEXCore::Core::InternalThreadState* Thread) {
  Alloc::OSAllocator::TLSThread = Thread;
}

void UninstallTLSData(FEXCore::Core::InternalThreadState* Thread) {
  Alloc::OSAllocator::TLSThread = nullptr;
}
} // namespace FEXCore::Allocator


================================================
FILE: FEXCore/Source/Utils/Allocator/FlexBitSet.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstddef>
#include <cstdint>
#include <cstring>
#include <type_traits>

namespace FEXCore {

template<typename T>
struct FlexBitSet final {
  using ElementType = T;
  constexpr static size_t MinimumSize = sizeof(ElementType);
  constexpr static size_t MinimumSizeBits = sizeof(ElementType) * 8;

  T Memory[];

  bool Get(size_t Element) const {
    return (Memory[Element / MinimumSizeBits] & (1ULL << (Element % MinimumSizeBits))) != 0;
  }
  bool TestAndClear(size_t Element) {
    bool Value = Get(Element);
    Memory[Element / MinimumSizeBits] &= ~(1ULL << (Element % MinimumSizeBits));
    return Value;
  }
  bool TestAndSet(size_t Element) {
    bool Value = Get(Element);
    Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits));
    return Value;
  }
  void Set(size_t Element) {
    Memory[Element / MinimumSizeBits] |= (1ULL << (Element % MinimumSizeBits));
  }
  void Clear(size_t Element) {
    Memory[Element / MinimumSizeBits] &= ~(1ULL << (Element % MinimumSizeBits));
  }
  void MemClear(size_t Elements) {
    memset(Memory, 0, FEXCore::AlignUp(Elements / MinimumSizeBits, MinimumSizeBits));
  }
  void MemSet(size_t Elements) {
    memset(Memory, 0xFF, FEXCore::AlignUp(Elements / MinimumSizeBits, MinimumSizeBits));
  }

  // Range scanning results
  struct BitsetScanResults {
    // Which element was found. ~0ULL if not found.
    size_t FoundElement;
    // During the scan, found a hole in the allocations that didn't fit.
    bool FoundHole;
  };

  // TODO: Make {Forward,Backward}ScanForRange faster
  // Currently these functions test a single bit at a time, which is fairly costly.
  // The compiler emits a full element load per iteration, wasting a bunch of time on loads.
  // If we change these functions to have a pre-amble and post-amble to align the primary loop to the element size then this can go
  // significantly faster.
  //
  // Once the element scanning is aligned to the element size, we can then use native count leading zero(CLZ) and count trailing zero(CTZ)
  // instructions on a full element to scan uint64_t elements per loop iteration.

  // Implementation details:
  // Template argument WantUnset
  // Used to determine if the desired range is for set or unset ranges.
  // Typically `WantUnset` should be true. Used for finding a unset range inside of a range will set elements.
  //
  // @param BeginningElement - The first element in the set to start scanning from.
  // @param ElementCount - How many elements to find a range for fitting.
  // @param MinimumElement - Minimum element in the set to search to
  //
  // @return The scan results
  template<bool WantUnset>
  BitsetScanResults BackwardScanForRange(size_t BeginningElement, size_t ElementCount, size_t MinimumElement) {
    bool FoundHole {};

    // Final element to iterate to.
    const size_t FinalElement = MinimumElement + ElementCount - 1;

    for (size_t CurrentPage = BeginningElement; CurrentPage >= FinalElement;) {
      size_t Remaining = ElementCount;
      LOGMAN_THROW_A_FMT(CurrentPage <= BeginningElement && CurrentPage >= FinalElement, "BackwardScanForRange: Scanning less than "
                                                                                         "available range");

      while (Remaining) {
        if (this->Get(CurrentPage - Remaining + 1) == WantUnset) {
          // Has an intersecting range
          break;
        }
        --Remaining;
      }

      if (Remaining) {
        // If we found at least one Element hole then track that
        if (Remaining != ElementCount) {
          FoundHole = true;
        }

        // Didn't find a slab range
        CurrentPage -= Remaining;
      } else {
        // We have a slab range
        return BitsetScanResults {CurrentPage - ElementCount + 1, FoundHole};
      }
    }

    return BitsetScanResults {~0ULL, FoundHole};
  }

  // @param BeginningElement - The first element in the set to start scanning from.
  // @param ElementCount - How many elements to find a range for fitting.
  // @param ElementsInSet - How many elements are in the full set.
  //
  // @return The scan results
  template<bool WantUnset>
  BitsetScanResults ForwardScanForRange(size_t BeginningElement, size_t ElementCount, size_t ElementsInSet) {
    bool FoundHole {};

    // Final element to iterate to.
    const size_t FinalElement = ElementsInSet - ElementCount + 1;

    for (size_t CurrentElement = BeginningElement; CurrentElement <= FinalElement;) {
      // If we have enough free space, check if we have enough free pages that are contiguous
      size_t Remaining = ElementCount;

      LOGMAN_THROW_A_FMT(CurrentElement >= BeginningElement && CurrentElement <= FinalElement, "ForwardScanForRange: Scanning less than "
                                                                                               "available range");

      while (Remaining) {
        if (this->Get(CurrentElement + Remaining - 1) == WantUnset) {
          // Has an intersecting range
          break;
        }
        --Remaining;
      }

      if (Remaining) {
        // If we found at least one Element hole then track that
        if (Remaining != ElementCount) {
          FoundHole = true;
        }

        // Didn't find a slab range
        CurrentElement += Remaining;
      } else {
        // We have a slab range
        return BitsetScanResults {CurrentElement, FoundHole};
      }
    }

    return BitsetScanResults {~0ULL, FoundHole};
  }

  // This very explicitly doesn't let you take an address
  // Is only a getter
  bool operator[](size_t Element) const {
    return Get(Element);
  }

  // Returns the number of bits required to hold the number of elements.
  // Just rounds up to the MinimumSizeInBits.
  constexpr static size_t SizeInBits(uint64_t Elements) {
    return FEXCore::AlignUp(Elements, MinimumSizeBits);
  }
  // Returns the number of bytes required to hold the number of elements.
  constexpr static size_t SizeInBytes(uint64_t Elements) {
    return SizeInBits(Elements) / 8;
  }
};

static_assert(sizeof(FlexBitSet<uint64_t>) == 0, "This needs to be a flex member");
static_assert(std::is_trivially_copyable_v<FlexBitSet<uint64_t>>, "Needs to be trivially copyable");

} // namespace FEXCore


================================================
FILE: FEXCore/Source/Utils/Allocator/HostAllocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/vector.h>

#include <cstddef>
#include <sys/types.h>

namespace FEXCore::Allocator {
struct MemoryRegion;
}
namespace FEXCore::Core {
struct InternalThreadState;
}

namespace Alloc {
// HostAllocator is just a page pased slab allocator
// Similar to mmap and munmap only mapping at the page level
class HostAllocator {
public:
  HostAllocator() = default;
  virtual ~HostAllocator() = default;
  virtual void* AllocateSlab(size_t Size) = 0;
  virtual void DeallocateSlab(void* Ptr, size_t Size) = 0;

  virtual void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
    return nullptr;
  }
  virtual int Munmap(void* addr, size_t length) {
    return -1;
  }

  virtual void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) {}
  virtual void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) {}
};

class GlobalAllocator {
public:
  HostAllocator* Alloc {};
  GlobalAllocator(HostAllocator* _Alloc)
    : Alloc {_Alloc} {}

  virtual ~GlobalAllocator() = default;
  virtual void* malloc(size_t Size) = 0;
  virtual void* calloc(size_t num, size_t size) = 0;
  virtual void* realloc(void* ptr, size_t size) = 0;
  virtual void* memalign(size_t alignment, size_t size) = 0;
  virtual void free(void* ptr) = 0;
};
} // namespace Alloc

namespace Alloc::OSAllocator {
fextl::unique_ptr<Alloc::HostAllocator> Create64BitAllocator();
fextl::unique_ptr<Alloc::HostAllocator> Create64BitAllocatorWithRegions(fextl::vector<FEXCore::Allocator::MemoryRegion>& Regions);
static inline void ReleaseAllocatorWorkaround(fextl::unique_ptr<Alloc::HostAllocator> Allocator) {
  // XXX: This is currently a leak.
  // We can't work around this yet until static initializers that allocate memory are completely removed from our codebase
  // The allocator is also intrusively allocated, so the unique_ptr tries to double free the HostAllocator object.
  // Luckily we only remove this on process shutdown, so the kernel will do the cleanup for us
  Allocator.release();
}

} // namespace Alloc::OSAllocator


================================================
FILE: FEXCore/Source/Utils/Allocator/IntrusiveArenaAllocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "FlexBitSet.h"
#include "HostAllocator.h"

#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>

#include <bitset>
#include <cstddef>
#include <memory_resource>
namespace fex_pmr = std::pmr;
#include <sys/user.h>

#include <mutex>

namespace Alloc {
class ForwardOnlyIntrusiveArenaAllocator final : public fex_pmr::memory_resource {
public:
  ForwardOnlyIntrusiveArenaAllocator(void* Ptr, size_t _Size)
    : Begin {reinterpret_cast<uintptr_t>(Ptr)}
    , Size {_Size} {
    LastAllocation = sizeof(ForwardOnlyIntrusiveArenaAllocator);
  }

  ~ForwardOnlyIntrusiveArenaAllocator() = default;

  template<class U, class... Args>
  U* new_construct(Args&&... args) {
    void* Ptr = do_allocate(sizeof(U), alignof(U));
    return new (Ptr) U(args...);
  }

  template<class U, class... Args>
  U* new_construct(U* Class, Args&&... args) {
    void* Ptr = do_allocate(sizeof(U), alignof(U));
    return new (Ptr) U(args...);
  }

  size_t AmountAllocated() const {
    return LastAllocation;
  }

private:
  void* do_allocate(std::size_t bytes, std::size_t alignment) override {
    size_t PreviousAligned = FEXCore::AlignUp(LastAllocation, alignment);
    size_t NewOffset = PreviousAligned + bytes;

    if (NewOffset > Size) {
      return nullptr;
    }

    LastAllocation = NewOffset;

    return reinterpret_cast<void*>(Begin + PreviousAligned);
  }

  void do_deallocate(void*, std::size_t, std::size_t) override {
    // Do nothing
  }

  bool do_is_equal(const fex_pmr::memory_resource& other) const noexcept override {
    // Only if the allocator pointers are the same are they equal
    if (this == &other) {
      return true;
    }
    // We don't share state with another allocator so we can't share anything
    return false;
  }

  uintptr_t Begin;
  size_t Size;
  size_t LastAllocation {};
};

class IntrusiveArenaAllocator final : public fex_pmr::memory_resource {
public:
  IntrusiveArenaAllocator(void* Ptr, size_t _Size)
    : Begin {reinterpret_cast<uintptr_t>(Ptr)}
    , Size {_Size} {
    uint64_t NumberOfPages = _Size / FEXCore::Utils::FEX_PAGE_SIZE;
    uint64_t UsedBits =
      FEXCore::AlignUp(sizeof(IntrusiveArenaAllocator) + Size / FEXCore::Utils::FEX_PAGE_SIZE / 8, FEXCore::Utils::FEX_PAGE_SIZE);
    for (size_t i = 0; i < UsedBits; ++i) {
      UsedPages.Set(i);
    }

    FreePages = NumberOfPages - UsedBits;
  }

  template<class U, class... Args>
  U* new_construct(Args&&... args) {
    void* Ptr = do_allocate(sizeof(U), alignof(U));
    return new (Ptr) U(args...);
  }

  template<class U, class... Args>
  U* new_construct(U* Class, Args&&... args) {
    void* Ptr = do_allocate(sizeof(U), alignof(U));
    return new (Ptr) U(args...);
  }

  uintptr_t GetSlabBase() const {
    return Begin;
  }
  uint64_t GetSlabSize() const {
    return Size;
  }
  uint64_t GetFreePages() const {
    return FreePages;
  }

private:
  void* do_allocate(std::size_t bytes, std::size_t alignment) override {
    std::scoped_lock<std::mutex> lk {AllocationMutex};

    size_t NumberPages = FEXCore::AlignUp(bytes, FEXCore::Utils::FEX_PAGE_SIZE) / FEXCore::Utils::FEX_PAGE_SIZE;

    uintptr_t AllocatedOffset {};

try_again:
    for (uintptr_t CurrentPage = LastAllocatedPageOffset; CurrentPage <= (Size - NumberPages);) {
      size_t Remaining = NumberPages;

      while (Remaining) {
        if (UsedPages[CurrentPage + Remaining - 1]) {
          // Has an intersecting range
          break;
        }
        --Remaining;
      }

      if (Remaining) {
        // Didn't find an allocation range
        CurrentPage += Remaining;
      } else {
        // We have a range to allocate
        AllocatedOffset = CurrentPage;
        break;
      }
    }

    if (!AllocatedOffset && LastAllocatedPageOffset != 0) {
      // Try again but starting from the beginning
      LastAllocatedPageOffset = 0;
      // Using goto so we don't have recursive mutex shenanigans
      goto try_again;
    }

    // Allocated offset must be valid or zero at this point
    if (AllocatedOffset) {
      // Map the range as no longer available
      for (size_t i = 0; i < NumberPages; ++i) {
        UsedPages.Set(AllocatedOffset + i);
      }

      LastAllocatedPageOffset = AllocatedOffset + NumberPages;

      // Now convert this base page to a pointer and return it
      return reinterpret_cast<void*>(Begin + AllocatedOffset * FEXCore::Utils::FEX_PAGE_SIZE);
    }

    return nullptr;
  }

  void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
    std::scoped_lock<std::mutex> lk {AllocationMutex};

    uintptr_t PageOffset = (reinterpret_cast<uintptr_t>(p) - Begin) / FEXCore::Utils::FEX_PAGE_SIZE;
    size_t NumPages = FEXCore::AlignUp(bytes, FEXCore::Utils::FEX_PAGE_SIZE) / FEXCore::Utils::FEX_PAGE_SIZE;

    // Walk the allocation list and deallocate
    uint64_t FreedPages {};
    for (size_t i = 0; i < NumPages; ++i) {
      FreedPages += UsedPages.TestAndClear(PageOffset + i) ? 1 : 0;
    }
    FreePages += FreedPages;
  }

  bool do_is_equal(const fex_pmr::memory_resource& other) const noexcept override {
    // Only if the allocator pointers are the same are they equal
    if (this == &other) {
      return true;
    }
    // We don't share state with another allocator so we can't share anything
    return false;
  }

  uintptr_t Begin;
  size_t Size;
  uint64_t FreePages {};
  size_t LastAllocatedPageOffset {};
  std::mutex AllocationMutex {};
  // For up to 64GB regions this will require up to 2MB tracking
  // Needs to be the last element
  FEXCore::FlexBitSet<uint64_t> UsedPages;
};
} // namespace Alloc


================================================
FILE: FEXCore/Source/Utils/Allocator.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Utils/Allocator/HostAllocator.h"
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/PrctlUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/memory_resource.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <algorithm>
#include <array>
#include <cctype>
#include <cerrno>
#include <charconv>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <fcntl.h>
#ifndef _WIN32
#include <sys/mman.h>
#include <sys/user.h>
#endif

namespace fextl::pmr {
static fextl::pmr::default_resource FEXDefaultResource;
std::pmr::memory_resource* get_default_resource() {
  return &FEXDefaultResource;
}
} // namespace fextl::pmr

#ifndef _WIN32
namespace FEXCore::Allocator {
MMAP_Hook mmap {::mmap};
MUNMAP_Hook munmap {::munmap};

uint64_t HostVASize {};

using GLIBC_MALLOC_Hook = void* (*)(size_t, const void* caller);
using GLIBC_REALLOC_Hook = void* (*)(void*, size_t, const void* caller);
using GLIBC_FREE_Hook = void (*)(void*, const void* caller);

fextl::unique_ptr<Alloc::HostAllocator> Alloc64 {};

void* FEX_mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
  void* Result = Alloc64->Mmap(addr, length, prot, flags, fd, offset);
  if (Result >= (void*)-4096) {
    errno = -(uint64_t)Result;
    return (void*)-1;
  }

  if (flags & MAP_ANONYMOUS) {
    VirtualName("FEXMem", Result, length);
  }
  return Result;
}

void VirtualName(const char* Name, void* Ptr, size_t Size) {
  static bool Supports {true};
  if (Supports) {
    auto Result = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, Ptr, Size, Name);
    if (Result == -1) {
      // Disable any additional attempts.
      Supports = false;
    }
  }
}

int FEX_munmap(void* addr, size_t length) {
  int Result = Alloc64->Munmap(addr, length);

  if (Result != 0) {
    errno = -Result;
    return -1;
  }
  return Result;
}

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"

static void AssignHookOverrides(size_t PageSize) {
  SetupAllocatorHooks(FEX_mmap, FEX_munmap);
  FEXCore::Allocator::mmap = FEX_mmap;
  FEXCore::Allocator::munmap = FEX_munmap;
  InitializeAllocator(PageSize);
}

void SetupHooks(size_t PageSize) {
  Alloc64 = Alloc::OSAllocator::Create64BitAllocator();
  AssignHookOverrides(PageSize);
}

void ClearHooks() {
  SetupAllocatorHooks(::mmap, ::munmap);
  FEXCore::Allocator::mmap = ::mmap;
  FEXCore::Allocator::munmap = ::munmap;

  Alloc::OSAllocator::ReleaseAllocatorWorkaround(std::move(Alloc64));
}
#pragma GCC diagnostic pop

FEX_DEFAULT_VISIBILITY size_t DetermineVASize() {
  if (HostVASize) {
    return HostVASize;
  }

  static constexpr std::array<uintptr_t, 7> TLBSizes = {
    57, 52, 48, 47, 42, 39, 36,
  };

  for (auto Bits : TLBSizes) {
    uintptr_t Size = 1ULL << Bits;
    // Just try allocating
    // We can't actually determine VA size on ARM safely
    auto Find = [](uintptr_t Size) -> bool {
      for (int i = 0; i < 64; ++i) {
        // Try grabbing a some of the top pages of the range
        // x86 allocates some high pages in the top end
        void* Ptr = ::mmap(reinterpret_cast<void*>(Size - FEXCore::Utils::FEX_PAGE_SIZE * i), FEXCore::Utils::FEX_PAGE_SIZE, PROT_NONE,
                           MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        if (Ptr != (void*)~0ULL) {
          ::munmap(Ptr, FEXCore::Utils::FEX_PAGE_SIZE);
          if (Ptr == (void*)(Size - FEXCore::Utils::FEX_PAGE_SIZE * i)) {
            return true;
          }
        }
      }
      return false;
    };

    if (Find(Size)) {
      HostVASize = Bits;
      return Bits;
    }
  }

  LOGMAN_MSG_A_FMT("Couldn't determine host VA size");
  FEX_UNREACHABLE;
}

#define STEAL_LOG(...) // fprintf(stderr, __VA_ARGS__)

fextl::vector<MemoryRegion> CollectMemoryGaps(uintptr_t Begin, uintptr_t End, int MapsFD) {
  fextl::vector<MemoryRegion> Regions;

  uintptr_t RegionEnd = 0;

  char Buffer[2048];
  const char* Cursor = Buffer;
  ssize_t Remaining = 0;

  bool EndOfFileReached = false;

  while (true) {
    const auto line_begin = Cursor;
    auto line_end = std::find(line_begin, Cursor + Remaining, '\n');

    // Check if the buffered data covers the entire line.
    // If not, try buffering more data.
    if (line_end == Cursor + Remaining) {
      if (EndOfFileReached) {
        // No more data to buffer. Add remaining memory and return.
        const auto MapBegin = std::max(RegionEnd, Begin);
        STEAL_LOG("[%d] EndOfFile; MapBegin: %016lX MapEnd: %016lX\n", __LINE__, MapBegin, End);
        if (End > MapBegin) {
          Regions.push_back({(void*)MapBegin, End - MapBegin});
        }

        return Regions;
      }

      // Move pending content back to the beginning, then buffer more data.
      std::copy(Cursor, Cursor + Remaining, std::begin(Buffer));
      auto PendingBytes = Remaining;
      do {
        Remaining = read(MapsFD, Buffer + PendingBytes, sizeof(Buffer) - PendingBytes);
      } while (Remaining == -1 && errno == EAGAIN);

      if (Remaining < sizeof(Buffer) - PendingBytes) {
        EndOfFileReached = true;
      }

      Remaining += PendingBytes;

      Cursor = Buffer;
      continue;
    }

    // Parse mapped region in the format "fffff7cc3000-fffff7cc4000 r--p ..."
    {
      uintptr_t RegionBegin {};
      auto result = std::from_chars(Cursor, line_end, RegionBegin, 16);
      LogMan::Throw::AFmt(result.ec == std::errc {} && *result.ptr == '-', "Unexpected line format");
      Cursor = result.ptr + 1;

      // Add gap between the previous region and the current one
      const auto MapBegin = std::max(RegionEnd, Begin);
      const auto MapEnd = std::min(RegionBegin, End);
      if (MapEnd > MapBegin) {
        Regions.push_back({(void*)MapBegin, MapEnd - MapBegin});
      }

      result = std::from_chars(Cursor, line_end, RegionEnd, 16);
      LogMan::Throw::AFmt(result.ec == std::errc {} && *result.ptr == ' ', "Unexpected line format");
      Cursor = result.ptr + 1;

      STEAL_LOG("[%d] parsed line: RegionBegin=%016lX RegionEnd=%016lX\n", __LINE__, RegionBegin, RegionEnd);

      if (RegionEnd >= End) {
        // Early return if we are completely beyond the allocation space.
        return Regions;
      }
    }

    Remaining -= line_end + 1 - line_begin;
    Cursor = line_end + 1;
  }
  FEX_UNREACHABLE;
}

fextl::vector<MemoryRegion> StealMemoryRegion(uintptr_t Begin, uintptr_t End) {
  const uintptr_t StackLocation_u64 = reinterpret_cast<uintptr_t>(alloca(0));

  const int MapsFD = open("/proc/self/maps", O_RDONLY);
  LogMan::Throw::AFmt(MapsFD != -1, "Failed to open /proc/self/maps");

  auto Regions = CollectMemoryGaps(Begin, End, MapsFD);
  close(MapsFD);

  // If the memory bounds include the stack, blocking all memory regions will
  // limit the stack size to the current value. To allow some stack growth,
  // we don't block the memory gap directly below the stack memory but
  // instead map it as readable+writable.
  {
    auto StackRegionIt = std::find_if(Regions.begin(), Regions.end(), [StackLocation_u64](auto& Region) {
      return reinterpret_cast<uintptr_t>(Region.Ptr) + Region.Size > StackLocation_u64;
    });

    // If no gap crossing the stack pointer was found but the SP is within
    // the given bounds, the stack mapping is right after the last gap.
    bool IsStackMapping = StackRegionIt != Regions.end() || StackLocation_u64 <= End;

    if (IsStackMapping && StackRegionIt != Regions.begin() &&
        reinterpret_cast<uintptr_t>(std::prev(StackRegionIt)->Ptr) + std::prev(StackRegionIt)->Size <= End) {
      // Allocate the region under the stack as READ | WRITE so the stack can still grow
      --StackRegionIt;

      auto Alloc =
        ::mmap(StackRegionIt->Ptr, StackRegionIt->Size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED, -1, 0);

      LogMan::Throw::AFmt(Alloc != MAP_FAILED, "StealMemoryRegion:Stack: mmap({}, {:x}) failed: {}", fmt::ptr(StackRegionIt->Ptr),
                          StackRegionIt->Size, errno);
      LogMan::Throw::AFmt(Alloc == StackRegionIt->Ptr, "mmap returned {} instead of {}", Alloc, fmt::ptr(StackRegionIt->Ptr));

      Regions.erase(StackRegionIt);
    }
  }

  // Block remaining memory gaps
  for (auto RegionIt = Regions.begin(); RegionIt != Regions.end(); ++RegionIt) {
    auto Alloc = ::mmap(RegionIt->Ptr, RegionIt->Size, PROT_NONE, MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE | MAP_FIXED_NOREPLACE, -1, 0);

    LogMan::Throw::AFmt(Alloc != MAP_FAILED, "StealMemoryRegion: mmap({}, {:x}) failed: {}", fmt::ptr(RegionIt->Ptr), RegionIt->Size, errno);
    LogMan::Throw::AFmt(Alloc == RegionIt->Ptr, "mmap returned {} instead of {}", Alloc, fmt::ptr(RegionIt->Ptr));
  }

  return Regions;
}

fextl::vector<MemoryRegion> Setup48BitAllocatorIfExists(size_t PageSize) {
  size_t Bits = FEXCore::Allocator::DetermineVASize();
  if (Bits < 48) {
    return {};
  }

  uintptr_t Begin48BitVA = 0x0'8000'0000'0000ULL;
  uintptr_t End48BitVA = 0x1'0000'0000'0000ULL;
  auto Regions = StealMemoryRegion(Begin48BitVA, End48BitVA);

  Alloc64 = Alloc::OSAllocator::Create64BitAllocatorWithRegions(Regions);
  AssignHookOverrides(PageSize);

  return Regions;
}

void ReclaimMemoryRegion(const fextl::vector<MemoryRegion>& Regions) {
  for (const auto& Region : Regions) {
    ::munmap(Region.Ptr, Region.Size);
  }
}

void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) {
  if (Alloc64) {
    Alloc64->LockBeforeFork(Thread);
  }
}

void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) {
  if (Alloc64) {
    Alloc64->UnlockAfterFork(Thread, Child);
  }
}
} // namespace FEXCore::Allocator
#endif


================================================
FILE: FEXCore/Source/Utils/Allocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEXCore::Allocator {
void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread);
void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child);
} // namespace FEXCore::Allocator


================================================
FILE: FEXCore/Source/Utils/AllocatorHooks.cpp
================================================
// SPDX-License-Identifier: MIT
#ifdef ENABLE_FEX_ALLOCATOR
#include <rpmalloc/rpmalloc.h>
#ifndef _WIN32
#include <linux/prctl.h>
#include <sys/prctl.h>
#include <sys/mman.h>
#else
#define NTDDI_VERSION 0x0A000005
#include <memoryapi.h>
#endif
#endif

#include <cstdint>
#include <malloc.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

namespace FEXCore::Allocator {
using mmap_hook_type = void* (*)(void* addr, size_t length, int prot, int flags, int fd, off_t offset);
using munmap_hook_type = int (*)(void* addr, size_t length);

#ifdef ENABLE_FEX_ALLOCATOR
typedef void* (*rp_mmap_hook_type)(size_t size, size_t alignment, size_t* offset, size_t* mapped_size);
typedef void (*rp_munmap_hook_type)(void* address, size_t offset, size_t mapped_size);
extern "C" rp_mmap_hook_type rp_mmap_hook;
extern "C" rp_munmap_hook_type rp_munmap_hook;

#ifndef _WIN32
mmap_hook_type fex_mmap_hook = ::mmap;
munmap_hook_type fex_munmap_hook = ::munmap;
#endif

// Assume a 64KB page size until told otherwise.
static rpmalloc_config_t global_config {
  .page_size = 64 * 1024,
  // THP causes crashes for some reason.
  .enable_huge_pages = 0,
  .disable_decommit = 0,
  .page_name = "FEXAllocator",
  .huge_page_name = "FEXAllocator",
  .unmap_on_finalize = 0,
};

void* malloc(size_t size) {
  return ::rpmalloc(size);
}
void* calloc(size_t n, size_t size) {
  return ::rpcalloc(n, size);
}
void* memalign(size_t align, size_t s) {
  return ::rpmemalign(align, s);
}
void* valloc(size_t size) {
  return ::rpaligned_alloc(global_config.page_size, size);
}
int posix_memalign(void** r, size_t a, size_t s) {
  void* ptr;
  auto res = ::rpposix_memalign(&ptr, a, s);
  *r = ptr;
  return res;
}
void* realloc(void* ptr, size_t size) {
  return ::rprealloc(ptr, size);
}
void free(void* ptr) {
  return ::rpfree(ptr);
}
size_t malloc_usable_size(void* ptr) {
  return ::rpmalloc_usable_size(ptr);
}
void* aligned_alloc(size_t a, size_t s) {
  return ::rpaligned_alloc(a, s);
}
void aligned_free(void* ptr) {
  return ::rpfree(ptr);
}

void InitializeThread() {
  rpmalloc_thread_initialize();
}

#ifndef _WIN32
[[nodiscard]]
constexpr uint64_t AlignUp(uint64_t value, uint64_t size) {
  return value + (size - value % size) % size;
}

static void* FEX_rp_mmap(size_t size, size_t alignment, size_t* offset, size_t* mapped_size) {
#define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
  // If the alignment is less than the operating page size then alignment is guaranteed. Just remove it.
  if (alignment < global_config.page_size) {
    alignment = 0;
  }

  size_t map_size = AlignUp(size + alignment, global_config.page_size);
  auto ptr = fex_mmap_hook(0, map_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);

  if (ptr == MAP_FAILED) {
    ptr = nullptr;
  } else {
#ifndef PR_SET_VMA
#define PR_SET_VMA 0x53564d41
#endif

#ifndef PR_SET_VMA_ANON_NAME
#define PR_SET_VMA_ANON_NAME 0
#endif
    prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, ptr, map_size, global_config.page_name);

    // Disable HUGEPAGE on allocation from rpmalloc.
    madvise(ptr, map_size, MADV_NOHUGEPAGE);
  }

  if (ptr == nullptr) {
    fprintf(stderr, "Failed to map VMA region.");
    return nullptr;
  }

  if (alignment) {
    size_t padding = ((uintptr_t)ptr & (uintptr_t)(alignment - 1));
    if (padding) {
      padding = alignment - padding;
    }
    ptr = pointer_offset(ptr, padding);
    *offset = padding;
  }
  *mapped_size = map_size;
  return ptr;
}

static void FEX_rp_memory_commit(void* address, size_t size) {
  // NOP-implementation.
}

static void FEX_rp_memory_decommit(void* address, size_t size) {
  if (global_config.disable_decommit) {
    return;
  }

  if (madvise(address, size, MADV_DONTNEED)) {
    fprintf(stderr, "Failed to decommit VMA region.");
  }
}

static void FEX_rp_memory_unmap(void* address, size_t offset, size_t mapped_size) {
  address = pointer_offset(address, -(int32_t)offset);
  int Result = fex_munmap_hook(address, mapped_size);
  if (Result == -1) {
    fprintf(stderr, "Failed to unmap VMA region.");
  }
#undef pointer_offset
}

void SetupAllocatorHooks(mmap_hook_type MMapHook, munmap_hook_type MunmapHook) {
  fex_mmap_hook = MMapHook;
  fex_munmap_hook = MunmapHook;
}

static rpmalloc_interface_t global_interface {
  .memory_map = FEX_rp_mmap,
  .memory_commit = FEX_rp_memory_commit,
  .memory_decommit = FEX_rp_memory_decommit,
  .memory_unmap = FEX_rp_memory_unmap,
  .map_fail_callback = nullptr,
  .error_callback = nullptr,
};

void InitializeAllocator(size_t PageSize) {
  global_config.page_size = PageSize;
  rpmalloc_initialize_config(&global_interface, &global_config);
  rp_mmap_hook = FEX_rp_mmap;
  rp_munmap_hook = FEX_rp_memory_unmap;
}
#endif

#elif defined(_WIN32)
#error "Tried building _WIN32 without jemalloc"

#else
void InitializeThread() {}

void* malloc(size_t size) {
  return ::malloc(size);
}
void* calloc(size_t n, size_t size) {
  return ::calloc(n, size);
}
void* memalign(size_t align, size_t s) {
  return ::memalign(align, s);
}
void* valloc(size_t size) {
  return ::valloc(size);
}
int posix_memalign(void** r, size_t a, size_t s) {
  return ::posix_memalign(r, a, s);
}
void* realloc(void* ptr, size_t size) {
  return ::realloc(ptr, size);
}
void free(void* ptr) {
  return ::free(ptr);
}
size_t malloc_usable_size(void* ptr) {
  return ::malloc_usable_size(ptr);
}
void* aligned_alloc(size_t a, size_t s) {
  return ::aligned_alloc(a, s);
}
void aligned_free(void* ptr) {
  return ::free(ptr);
}

void SetupAllocatorHooks(mmap_hook_type MMapHook, munmap_hook_type MunmapHook) {}

void InitializeAllocator(size_t PageSize) {}

#endif
} // namespace FEXCore::Allocator


================================================
FILE: FEXCore/Source/Utils/AllocatorOverride.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>

#include <fmt/format.h>

#include <cstddef>
#include <cstdint>
#include <sstream>
#include <unistd.h>

extern "C" {
// The majority of FEX internal code should avoid using the glibc allocator. To ensure glibc allocations don't accidentally slip
// in, FEX overrides these glibc functions with faulting variants.
//
// A notable exception is thunks, which should still use glibc allocations and avoid using `fextl::` namespace.
//
// Other minor exceptions throughout FEX use the `YesIKnowImNotSupposedToUseTheGlibcAllocator` helper to temporarily disable faulting.
#define GLIBC_ALIAS_FUNCTION(func) __attribute__((alias(#func), visibility("default")))
extern void* __libc_calloc(size_t, size_t);
void* calloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_calloc);

extern void __libc_free(void*);
void free(void*) GLIBC_ALIAS_FUNCTION(fault_free);

extern void* __libc_malloc(size_t);
void* malloc(size_t) GLIBC_ALIAS_FUNCTION(fault_malloc);

extern void* __libc_memalign(size_t, size_t);
void* memalign(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_memalign);

extern void* __libc_realloc(void*, size_t);
void* realloc(void*, size_t) GLIBC_ALIAS_FUNCTION(fault_realloc);

extern void* __libc_valloc(size_t);
void* valloc(size_t) GLIBC_ALIAS_FUNCTION(fault_valloc);

extern int __posix_memalign(void**, size_t, size_t);
int posix_memalign(void**, size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_posix_memalign);

extern size_t __malloc_usable_size(void*);
size_t malloc_usable_size(void*) GLIBC_ALIAS_FUNCTION(fault_malloc_usable_size);

// Reuse __libc_memalign
void* aligned_alloc(size_t, size_t) GLIBC_ALIAS_FUNCTION(fault_aligned_alloc);
}

namespace FEXCore::Allocator {
// Enable or disable allocation faulting globally.
static bool GlobalEvaluate {};

// Enable or disable allocation faulting per-thread.
static thread_local uint64_t SkipEvalForThread {};

// Internal memory allocation hooks to allow non-faulting allocations through.
auto calloc_ptr = __libc_calloc;
auto free_ptr = __libc_free;
auto malloc_ptr = __libc_malloc;
auto memalign_ptr = __libc_memalign;
auto realloc_ptr = __libc_realloc;
auto valloc_ptr = __libc_valloc;
auto posix_memalign_ptr = ::posix_memalign;
auto malloc_usable_size_ptr = ::malloc_usable_size;
auto aligned_alloc_ptr = __libc_memalign;

// Constructor for per-thread allocation faulting check.
YesIKnowImNotSupposedToUseTheGlibcAllocator::YesIKnowImNotSupposedToUseTheGlibcAllocator() {
  ++SkipEvalForThread;
}

// Destructor for per-thread allocation faulting check.
YesIKnowImNotSupposedToUseTheGlibcAllocator::~YesIKnowImNotSupposedToUseTheGlibcAllocator() {
  --SkipEvalForThread;
}

// Hard disabling of per-thread allocation fault checking.
// No coming back from this, used on thread destruction.
FEX_DEFAULT_VISIBILITY void YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable() {
  // Just set it to half of its maximum value so it never wraps back around.
  SkipEvalForThread = std::numeric_limits<decltype(SkipEvalForThread)>::max() / 2;
}

// Enable global fault checking.
void SetupFaultEvaluate() {
  GlobalEvaluate = true;
}

// Disable global fault checking.
void ClearFaultEvaluate() {
  GlobalEvaluate = false;
}

// Evaluate if a glibc hooked allocation should fault.
void EvaluateReturnAddress(void* Return) {
  if (!GlobalEvaluate) {
    // Fault evaluation disabled globally.
    return;
  }

  if (SkipEvalForThread) {
    // Fault evaluation currently disabled for this thread.
    return;
  }

  // We don't know where we are when allocating. Make sure to be safe and generate the string on the stack.
  // Print an error message to let a developer know that an allocation faulted.
  char Tmp[512];
  auto Res = fmt::format_to_n(Tmp, 512, "ERROR: Requested memory using non-FEX allocator at 0x{:x}\n", reinterpret_cast<uint64_t>(Return));
  Tmp[Res.size] = 0;
  write(STDERR_FILENO, Tmp, Res.size);

  // Trap the execution to stop FEX in its tracks.
  FEX_TRAP_EXECUTION;
}
} // namespace FEXCore::Allocator

extern "C" {
// These are the glibc allocator override symbols.
// These will override the glibc allocators and then check if the allocation should fault.
void* fault_calloc(size_t n, size_t size) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::calloc_ptr(n, size);
}
void fault_free(void* ptr) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  FEXCore::Allocator::free_ptr(ptr);
}
void* fault_malloc(size_t size) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::malloc_ptr(size);
}
void* fault_memalign(size_t align, size_t s) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::memalign_ptr(align, s);
}
void* fault_realloc(void* ptr, size_t size) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::realloc_ptr(ptr, size);
}
void* fault_valloc(size_t size) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::valloc_ptr(size);
}
int fault_posix_memalign(void** r, size_t a, size_t s) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::posix_memalign_ptr(r, a, s);
}
size_t fault_malloc_usable_size(void* ptr) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::malloc_usable_size_ptr(ptr);
}
void* fault_aligned_alloc(size_t a, size_t s) {
  FEXCore::Allocator::EvaluateReturnAddress(__builtin_extract_return_addr(__builtin_return_address(0)));
  return FEXCore::Allocator::aligned_alloc_ptr(a, s);
}
}


================================================
FILE: FEXCore/Source/Utils/ArchHelpers/Arm64.cpp
================================================
// SPDX-License-Identifier: MIT

#include "Interface/Core/CPUBackend.h"
#include "Interface/Context/Context.h"
#include "Utils/SpinWaitLock.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>

#include <atomic>
#include <cstdint>

namespace FEXCore::ArchHelpers::Arm64 {
constexpr uint32_t CASPAL_MASK = 0xBF'E0'FC'00;
constexpr uint32_t CASPAL_INST = 0x08'60'FC'00;

constexpr uint32_t CASAL_MASK = 0x3F'E0'FC'00;
constexpr uint32_t CASAL_INST = 0x08'E0'FC'00;

constexpr uint32_t ATOMIC_MEM_MASK = 0x3B200C00;
constexpr uint32_t ATOMIC_MEM_INST = 0x38200000;

constexpr uint32_t RCPC2_MASK = 0x3F'E0'0C'00;
constexpr uint32_t LDAPUR_INST = 0x19'40'00'00;
constexpr uint32_t STLUR_INST = 0x19'00'00'00;

constexpr uint32_t LDAXP_MASK = 0xBF'FF'80'00;
constexpr uint32_t LDAXP_INST = 0x88'7F'80'00;

constexpr uint32_t STLXP_MASK = 0xBF'E0'80'00;
constexpr uint32_t STLXP_INST = 0x88'20'80'00;

constexpr uint32_t LDAXR_MASK = 0x3F'FF'FC'00;
constexpr uint32_t LDAXR_INST = 0x08'5F'FC'00;
constexpr uint32_t LDAR_INST = 0x08'DF'FC'00;
constexpr uint32_t LDAPR_INST = 0x38'BF'C0'00;
constexpr uint32_t STLR_INST = 0x08'9F'FC'00;

constexpr uint32_t STLXR_MASK = 0x3F'E0'FC'00;
constexpr uint32_t STLXR_INST = 0x08'00'FC'00;

// Load/store register (register offset) (Rm encoded as xzr)
constexpr uint32_t LDSTREGISTER_MASK = 0b0011'1111'1111'1111'1111'1100'0000'0000;
constexpr uint32_t LDR_INST = 0b0011'1000'0111'1111'0110'1000'0000'0000;
constexpr uint32_t STR_INST = 0b0011'1000'0011'1111'0110'1000'0000'0000;

constexpr uint32_t LDSTUNSCALED_MASK = 0b0011'1011'1110'0000'0000'1100'0000'0000;
constexpr uint32_t LDUR_INST = 0b0011'1000'0100'0000'0000'0000'0000'0000;
constexpr uint32_t STUR_INST = 0b0011'1000'0000'0000'0000'0000'0000'0000;

constexpr uint32_t LDSTP_MASK = 0b0011'1011'1000'0000'0000'0000'0000'0000;
constexpr uint32_t STP_INST = 0b0010'1001'0000'0000'0000'0000'0000'0000;

constexpr uint32_t CBNZ_MASK = 0x7F'00'00'00;
constexpr uint32_t CBNZ_INST = 0x35'00'00'00;

constexpr uint32_t ALU_OP_MASK = 0x7F'20'00'00;
constexpr uint32_t ADD_INST = 0x0B'00'00'00;
constexpr uint32_t SUB_INST = 0x4B'00'00'00;
constexpr uint32_t ADD_SHIFT_INST = 0x0B'20'00'00;
constexpr uint32_t SUB_SHIFT_INST = 0x4B'20'00'00;
constexpr uint32_t CMP_INST = 0x6B'00'00'00;
constexpr uint32_t CMP_SHIFT_INST = 0x6B'20'00'00;
constexpr uint32_t AND_INST = 0x0A'00'00'00;
constexpr uint32_t BIC_INST = 0x0A'20'00'00;
constexpr uint32_t OR_INST = 0x2A'00'00'00;
constexpr uint32_t ORN_INST = 0x2A'20'00'00;
constexpr uint32_t EOR_INST = 0x4A'00'00'00;
constexpr uint32_t EON_INST = 0x4A'20'00'00;

constexpr uint32_t CCMP_MASK = 0x7F'E0'0C'10;
constexpr uint32_t CCMP_INST = 0x7A'40'00'00;

constexpr uint32_t CLREX_MASK = 0xFF'FF'F0'FF;
constexpr uint32_t CLREX_INST = 0xD5'03'30'5F;

enum ExclusiveAtomicPairType {
  TYPE_SWAP,
  TYPE_ADD,
  TYPE_SUB,
  TYPE_AND,
  TYPE_BIC,
  TYPE_OR,
  TYPE_ORN,
  TYPE_EOR,
  TYPE_EON,
  TYPE_NEG, // This is just a sub with zero. Need to know the differences
};

// Load ops are 4 bits
// Acquire and release bits are independent on the instruction
constexpr uint32_t ATOMIC_ADD_OP = 0b0000;
constexpr uint32_t ATOMIC_CLR_OP = 0b0001;
constexpr uint32_t ATOMIC_EOR_OP = 0b0010;
constexpr uint32_t ATOMIC_SET_OP = 0b0011;
constexpr uint32_t ATOMIC_SWAP_OP = 0b1000;

constexpr uint32_t REGISTER_MASK = 0b11111;
constexpr uint32_t RD_OFFSET = 0;
constexpr uint32_t RN_OFFSET = 5;
constexpr uint32_t RM_OFFSET = 16;

constexpr uint32_t DMB = 0b1101'0101'0000'0011'0011'0000'1011'1111 | 0b1011'0000'0000; // Inner shareable all

constexpr uint32_t DMB_LD = 0b1101'0101'0000'0011'0011'0000'1011'1111 | 0b1101'0000'0000; // Inner shareable load

static constexpr uint32_t GetRdReg(uint32_t Instr) {
  return (Instr >> RD_OFFSET) & REGISTER_MASK;
}

static constexpr uint32_t GetRnReg(uint32_t Instr) {
  return (Instr >> RN_OFFSET) & REGISTER_MASK;
}

static constexpr uint32_t GetRmReg(uint32_t Instr) {
  return (Instr >> RM_OFFSET) & REGISTER_MASK;
}

static void ClearICache(void* Begin, std::size_t Length) {
  __builtin___clear_cache(static_cast<char*>(Begin), static_cast<char*>(Begin) + Length);
}

static __uint128_t LoadAcquire128(uint64_t Addr) {
  __uint128_t Result {};
  uint64_t Lower;
  uint64_t Upper;
  // This specifically avoids using std::atomic<__uint128_t>
  // std::atomic helper does a ldaxp + stxp pair that crashes when the page is only mapped readable
  __asm volatile(
    R"(
  ldaxp %[ResultLower], %[ResultUpper], [%[Addr]];
  clrex;
)"
    : [ResultLower] "=r"(Lower), [ResultUpper] "=r"(Upper)
    : [Addr] "r"(Addr)
    : "memory");
  Result = Upper;
  Result <<= 64;
  Result |= Lower;
  return Result;
}

static uint64_t LoadAcquire64(uint64_t Addr) {
  auto Atom = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
  return Atom.load(std::memory_order_acquire);
}

static bool StoreCAS64(uint64_t& Expected, uint64_t Val, uint64_t Addr) {
  auto Atom = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
  return Atom.compare_exchange_strong(Expected, Val);
}

static uint32_t LoadAcquire32(uint64_t Addr) {
  auto Atom = std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(Addr));
  return Atom.load(std::memory_order_acquire);
}

static bool StoreCAS32(uint32_t& Expected, uint32_t Val, uint64_t Addr) {
  auto Atom = std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(Addr));
  return Atom.compare_exchange_strong(Expected, Val);
}

static uint8_t LoadAcquire8(uint64_t Addr) {
  auto Atom = std::atomic_ref<uint8_t>(*reinterpret_cast<uint8_t*>(Addr));
  return Atom.load(std::memory_order_acquire);
}

static bool StoreCAS8(uint8_t& Expected, uint8_t Val, uint64_t Addr) {
  auto Atom = std::atomic_ref<uint8_t>(*reinterpret_cast<uint8_t*>(Addr));
  return Atom.compare_exchange_strong(Expected, Val);
}

static uint16_t DoLoad16(uint64_t Addr) {
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) == 15) {
    // Address crosses over 16byte or 64byte threshold
    // Needs two loads
    uint64_t AddrUpper = Addr + 1;
    uint8_t ActualUpper {};
    uint8_t ActualLower {};
    // Careful ordering here
    ActualUpper = LoadAcquire8(AddrUpper);
    ActualLower = LoadAcquire8(Addr);

    uint16_t Result = ActualUpper;
    Result <<= 8;
    Result |= ActualLower;
    return Result;
  } else {
    AlignmentMask = 0b111;
    if ((Addr & AlignmentMask) == 7) {
      // Crosses 8byte boundary
      // Needs 128bit load
      // Fits within a 16byte region
      uint64_t Alignment = Addr & 0b1111;
      Addr &= ~0b1111ULL;

      __uint128_t TmpResult = LoadAcquire128(Addr);

      // Zexts the result
      uint16_t Result = TmpResult >> (Alignment * 8);
      return Result;
    } else {
      AlignmentMask = 0b11;
      if ((Addr & AlignmentMask) == 3) {
        // Crosses 4byte boundary
        // Needs 64bit Load
        uint64_t Alignment = Addr & AlignmentMask;
        Addr &= ~AlignmentMask;

        auto Atomic = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
        uint64_t TmpResult = Atomic.load();

        // Zexts the result
        uint16_t Result = TmpResult >> (Alignment * 8);
        return Result;
      } else {
        // Fits within 4byte boundary
        // Only needs 32bit Load
        // Only alignment offset will be 1 here
        uint64_t Alignment = Addr & AlignmentMask;
        Addr &= ~AlignmentMask;

        auto Atomic = std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(Addr));
        uint32_t TmpResult = Atomic.load();

        // Zexts the result
        uint16_t Result = TmpResult >> (Alignment * 8);
        return Result;
      }
    }
  }
}

static uint32_t DoLoad32(uint64_t Addr) {
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) > 12) {
    // Address crosses over 16byte threshold
    // Needs dual 32bit load
    uint64_t Alignment = Addr & 0b11;
    Addr &= ~0b11ULL;

    uint64_t AddrUpper = Addr + 4;

    // Careful ordering here
    uint32_t ActualUpper = LoadAcquire32(AddrUpper);
    uint32_t ActualLower = LoadAcquire32(Addr);

    uint64_t Result = ActualUpper;
    Result <<= 32;
    Result |= ActualLower;
    return Result >> (Alignment * 8);
  } else {
    AlignmentMask = 0b111;
    if ((Addr & AlignmentMask) >= 5) {
      // Crosses 8byte boundary
      // Needs 128bit load
      // Fits within a 16byte region
      uint64_t Alignment = Addr & 0b1111;
      Addr &= ~0b1111ULL;

      __uint128_t TmpResult = LoadAcquire128(Addr);

      return TmpResult >> (Alignment * 8);
    } else {
      // Fits within 8byte boundary
      // Only needs 64bit CAS
      // Alignments can be [1,5)
      uint64_t Alignment = Addr & AlignmentMask;
      Addr &= ~AlignmentMask;

      auto Atomic = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
      uint64_t TmpResult = Atomic.load();

      return TmpResult >> (Alignment * 8);
    }
  }
}

static uint64_t DoLoad64(uint64_t Addr) {
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) > 8) {
    uint64_t Alignment = Addr & 0b111;
    Addr &= ~0b111ULL;
    uint64_t AddrUpper = Addr + 8;

    // Crosses a 16byte boundary
    // Needs two 8 byte loads
    uint64_t ActualUpper {};
    uint64_t ActualLower {};
    // Careful ordering here
    ActualUpper = LoadAcquire64(AddrUpper);
    ActualLower = LoadAcquire64(Addr);

    __uint128_t Result = ActualUpper;
    Result <<= 64;
    Result |= ActualLower;
    return Result >> (Alignment * 8);
  } else {
    // Fits within a 16byte region
    uint64_t Alignment = Addr & AlignmentMask;
    Addr &= ~AlignmentMask;
    __uint128_t TmpResult = LoadAcquire128(Addr);
    uint64_t Result = TmpResult >> (Alignment * 8);
    return Result;
  }
}

static __uint128_t DoLoad128(uint64_t Addr) {
  // Any misalignment here means we cross a 16byte boundary
  // So we need two 128bit loads
  uint64_t Alignment = Addr & 0b1111;
  Addr &= ~0b1111ULL;
  uint64_t AddrUpper = Addr + 16;

  union AlignedData {
    struct {
      __uint128_t Lower;
      __uint128_t Upper;
    } Large;
    struct {
      uint8_t Data[32];
    } Bytes;
  };

  AlignedData* Data = reinterpret_cast<AlignedData*>(alloca(sizeof(AlignedData)));
  Data->Large.Upper = LoadAcquire128(AddrUpper);
  Data->Large.Lower = LoadAcquire128(Addr);

  __uint128_t Result {};
  memcpy(&Result, &Data->Bytes.Data[Alignment], sizeof(Result));
  return Result;
}

static bool RunCASPAL(uint64_t* GPRs, uint32_t Size, uint32_t DesiredReg1, uint32_t DesiredReg2, uint32_t ExpectedReg1,
                      uint32_t ExpectedReg2, uint32_t AddressReg, uint32_t* StrictSplitLockMutex) {

  std::optional<FEXCore::Utils::SpinWaitLock::UniqueSpinMutex<uint32_t>> Lock {};
  if (Size == 0) {
    // 32bit
    uint64_t Addr = GPRs[AddressReg];

    uint32_t DesiredLower = GPRs[DesiredReg1];
    uint32_t DesiredUpper = GPRs[DesiredReg2];

    uint32_t ExpectedLower = GPRs[ExpectedReg1];
    uint32_t ExpectedUpper = GPRs[ExpectedReg2];

    // Cross-cacheline CAS doesn't work on ARM
    // It isn't even guaranteed to work on x86
    // Intel will do a "split lock" which locks the full bus
    // AMD will tear instead
    // Both cross-cacheline and cross 16byte both need dual CAS loops that can tear
    // ARMv8.4 LSE2 solves all atomic issues except cross-cacheline

    // Check for Split lock across a cacheline
    if ((Addr & 63) > 56) {
      FEXCORE_TELEMETRY_SET(TYPE_HAS_SPLIT_LOCKS, 1);
      if (StrictSplitLockMutex && !Lock.has_value()) {
        Lock.emplace(StrictSplitLockMutex);
      }
    }

    uint64_t AlignmentMask = 0b1111;
    if ((Addr & AlignmentMask) > 8) {
      FEXCORE_TELEMETRY_SET(TYPE_16BYTE_SPLIT, 1);
      if (StrictSplitLockMutex && !Lock.has_value()) {
        Lock.emplace(StrictSplitLockMutex);
      }

      uint64_t Alignment = Addr & 0b111;
      Addr &= ~0b111ULL;
      uint64_t AddrUpper = Addr + 8;

      // Crosses a 16byte boundary
      // Need to do 256bit atomic, but since that doesn't exist we need to do a dual CAS loop
      __uint128_t Mask = ~0ULL;
      Mask <<= Alignment * 8;
      __uint128_t NegMask = ~Mask;
      __uint128_t TmpExpected {};
      __uint128_t TmpDesired {};

      __uint128_t Desired = DesiredUpper;
      Desired <<= 32;
      Desired |= DesiredLower;
      Desired <<= Alignment * 8;

      __uint128_t Expected = ExpectedUpper;
      Expected <<= 32;
      Expected |= ExpectedLower;
      Expected <<= Alignment * 8;

      while (1) {
        __uint128_t LoadOrderUpper = LoadAcquire64(AddrUpper);
        LoadOrderUpper <<= 64;
        __uint128_t TmpActual = LoadOrderUpper | LoadAcquire64(Addr);

        // Set up expected
        TmpExpected = TmpActual;
        TmpExpected &= NegMask;
        TmpExpected |= Expected;

        // Set up desired
        TmpDesired = TmpExpected;
        TmpDesired &= NegMask;
        TmpDesired |= Desired;

        uint64_t TmpExpectedLower = TmpExpected;
        uint64_t TmpExpectedUpper = TmpExpected >> 64;

        uint64_t TmpDesiredLower = TmpDesired;
        uint64_t TmpDesiredUpper = TmpDesired >> 64;

        if (TmpExpected == TmpActual) {
          if (StoreCAS64(TmpExpectedUpper, TmpDesiredUpper, AddrUpper)) {
            if (StoreCAS64(TmpExpectedLower, TmpDesiredLower, Addr)) {
              // Stored successfully
              return true;
            } else {
              // CAS managed to tear, we can't really solve this
              // Continue down the path to let the guest know values weren't expected
              FEXCORE_TELEMETRY_SET(TYPE_CAS_128BIT_TEAR, 1);
            }
          }

          TmpExpected = TmpExpectedUpper;
          TmpExpected <<= 64;
          TmpExpected |= TmpExpectedLower;
        } else {
          // Mismatch up front
          TmpExpected = TmpActual;
        }

        // Not successful
        // Now we need to check the results to see if we need to try again
        __uint128_t FailedResultOurBits = TmpExpected & Mask;
        __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

        __uint128_t FailedDesiredOurBits = TmpDesired & Mask;
        __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
        if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
          // If the bits changed that weren't part of our regular CAS then we need to try again
          continue;
        }
        if ((FailedResultOurBits ^ FailedDesiredOurBits) != 0) {
          // If the bits changed that we were wanting to change then we have failed and can return
          // We need to extract the bits and return them in EXPECTED
          uint64_t FailedResult = FailedResultOurBits >> (Alignment * 8);
          GPRs[ExpectedReg1] = FailedResult & ~0U;
          GPRs[ExpectedReg2] = FailedResult >> 32;
          return true;
        }

        // This happens in the case that between Load and CAS that something has store our desired in to the memory location
        // This means our CAS fails because what we wanted to store was already stored
        uint64_t FailedResult = FailedResultOurBits >> (Alignment * 8);
        GPRs[ExpectedReg1] = FailedResult & ~0U;
        GPRs[ExpectedReg2] = FailedResult >> 32;
        return true;
      }
    } else {
      // Fits within a 16byte region
      uint64_t Alignment = Addr & 0b1111;
      Addr &= ~0b1111ULL;
      auto Atomic128 = std::atomic_ref<__uint128_t>(*reinterpret_cast<__uint128_t*>(Addr));

      __uint128_t Mask = ~0ULL;
      Mask <<= Alignment * 8;
      __uint128_t NegMask = ~Mask;
      __uint128_t TmpExpected {};
      __uint128_t TmpDesired {};

      __uint128_t Desired = (uint64_t)DesiredUpper << 32 | DesiredLower;
      Desired <<= Alignment * 8;

      __uint128_t Expected = (uint64_t)ExpectedUpper << 32 | ExpectedLower;
      Expected <<= Alignment * 8;

      while (1) {
        TmpExpected = Atomic128.load();

        // Set up expected
        TmpExpected &= NegMask;
        TmpExpected |= Expected;

        // Set up desired
        TmpDesired = TmpExpected;
        TmpDesired &= NegMask;
        TmpDesired |= Desired;

        bool CASResult = Atomic128.compare_exchange_strong(TmpExpected, TmpDesired);
        if (CASResult) {
          // Successful, so we are done
          return true;
        } else {
          // Not successful
          // Now we need to check the results to see if we need to try again
          __uint128_t FailedResultOurBits = TmpExpected & Mask;
          __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

          __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
          if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
            // If the bits changed that weren't part of our regular CAS then we need to try again
            continue;
          }

          // This happens in the case that between Load and CAS that something has store our desired in to the memory location
          // This means our CAS fails because what we wanted to store was already stored
          uint64_t FailedResult = FailedResultOurBits >> (Alignment * 8);
          GPRs[ExpectedReg1] = FailedResult & ~0U;
          GPRs[ExpectedReg2] = FailedResult >> 32;
          return true;
        }
      }
    }
  }
  return false;
}

static bool HandleCASPAL(uint32_t Instr, uint64_t* GPRs, uint32_t* StrictSplitLockMutex) {
  uint32_t Size = (Instr >> 30) & 1;

  uint32_t DesiredReg1 = Instr & 0b11111;
  uint32_t DesiredReg2 = DesiredReg1 + 1;
  uint32_t ExpectedReg1 = (Instr >> 16) & 0b11111;
  uint32_t ExpectedReg2 = ExpectedReg1 + 1;
  uint32_t AddressReg = (Instr >> 5) & 0b11111;

  return RunCASPAL(GPRs, Size, DesiredReg1, DesiredReg2, ExpectedReg1, ExpectedReg2, AddressReg, StrictSplitLockMutex);
}

static uint64_t HandleCASPAL_ARMv8(uint32_t Instr, uintptr_t ProgramCounter, uint64_t* GPRs, uint32_t* StrictSplitLockMutex) {
  // caspair
  // [1] ldaxp(TMP2.W(), TMP3.W(), MemOperand(MemSrc)); <-- DataReg & AddrReg
  // [2] cmp(TMP2.W(), Expected.first.W()); <-- ExpectedReg1
  // [3] ccmp(TMP3.W(), Expected.second.W(), NoFlag, Condition::eq); <-- ExpectedREg2
  // [4] b(&LoopNotExpected, Condition::ne);
  // [5] stlxp(TMP2.W(), Desired.first.W(), Desired.second.W(), MemOperand(MemSrc)); <-- DesiredReg
  // [6] cbnz(TMP2.W(), &LoopTop);
  // [7] mov(Dst.first.W(), Expected.first.W());
  // [8] mov(Dst.second.W(), Expected.second.W());
  // [9] b(&LoopExpected);
  // [10] mov(Dst.first.W(), TMP2.W());
  // [11] mov(Dst.second.W(), TMP3.W());
  // [12] clrex();

  uint32_t* PC = (uint32_t*)ProgramCounter;

  uint32_t Size = (Instr >> 30) & 1;
  uint32_t AddrReg = (Instr >> 5) & 0x1F;
  uint32_t DataReg = Instr & 0x1F;
  uint32_t DataReg2 = (Instr >> 10) & 0x1F;

  uint32_t ExpectedReg1 {};
  uint32_t ExpectedReg2 {};

  uint32_t DesiredReg1 {};
  uint32_t DesiredReg2 {};

  if (Size == 1) {
    // 64-bit pair happens on paranoid vector loads
    // [1] ldaxp(TMP1, TMP2, MemSrc);
    // [2] clrex();
    //
    // 64-bit pair happens on paranoid vector stores
    // [1] ldaxp(xzr, TMP3, MemSrc); // <- Can hit SIGBUS
    // [2] stlxp(TMP3, TMP1, TMP2, MemSrc); // <- Can also hit SIGBUS
    // [3] cbnz(TMP3, &B); // < Overwritten with DMB

    if (DataReg == 31) {
    } else {
      uint32_t NextInstr = PC[1];
      if ((NextInstr & ArchHelpers::Arm64::CLREX_MASK) == ArchHelpers::Arm64::CLREX_INST) {
        uint64_t Addr = GPRs[AddrReg];

        auto Res = DoLoad128(Addr);
        // We set the result register if it isn't a zero register
        if (DataReg != 31) {
          GPRs[DataReg] = Res;
        }
        if (DataReg2 != 31) {
          GPRs[DataReg2] = Res >> 64;
        }

        // Skip ldaxp and clrex
        return 2 * sizeof(uint32_t);
      }
    }
    return 0;
  }

  // Only 32-bit pairs
  for (int i = 1; i < 10; i++) {
    uint32_t NextInstr = PC[i];
    if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST ||
        (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) {
      ExpectedReg1 = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::CCMP_MASK) == ArchHelpers::Arm64::CCMP_INST) {
      ExpectedReg2 = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::STLXP_MASK) == ArchHelpers::Arm64::STLXP_INST) {
      DesiredReg1 = (NextInstr & 0x1F);
      DesiredReg2 = (NextInstr >> 10) & 0x1F;
    }
  }

  // mov expected into the temp registers used by JIT
  GPRs[DataReg] = GPRs[ExpectedReg1];
  GPRs[DataReg2] = GPRs[ExpectedReg2];

  if (RunCASPAL(GPRs, Size, DesiredReg1, DesiredReg2, DataReg, DataReg2, AddrReg, StrictSplitLockMutex)) {
    return 9 * sizeof(uint32_t); // skip to mov + clrex
  } else {
    return 0;
  }
}

template<typename T>
using CASExpectedFn = T (*)(T Src, T Expected);
template<typename T>
using CASDesiredFn = T (*)(T Src, T Desired);

template<bool Retry>
static uint16_t DoCAS16(uint16_t DesiredSrc, uint16_t ExpectedSrc, uint64_t Addr, CASExpectedFn<uint16_t> ExpectedFunction,
                        CASDesiredFn<uint16_t> DesiredFunction, uint32_t* StrictSplitLockMutex) {
  std::optional<FEXCore::Utils::SpinWaitLock::UniqueSpinMutex<uint32_t>> Lock {};

  if ((Addr & 63) == 63) {
    FEXCORE_TELEMETRY_SET(TYPE_HAS_SPLIT_LOCKS, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }
  }

  // 16 bit
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) == 15) {
    FEXCORE_TELEMETRY_SET(TYPE_16BYTE_SPLIT, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }

    // Address crosses over 16byte or 64byte threshold
    // Need a dual 8bit CAS loop
    uint64_t AddrUpper = Addr + 1;

    while (1) {
      uint8_t ActualUpper {};
      uint8_t ActualLower {};
      // Careful ordering here
      ActualUpper = LoadAcquire8(AddrUpper);
      ActualLower = LoadAcquire8(Addr);

      uint16_t Actual = ActualUpper;
      Actual <<= 8;
      Actual |= ActualLower;

      uint16_t Desired = DesiredFunction(Actual, DesiredSrc);
      uint8_t DesiredLower = Desired;
      uint8_t DesiredUpper = Desired >> 8;

      uint16_t Expected = ExpectedFunction(Actual, ExpectedSrc);
      uint8_t ExpectedLower = Expected;
      uint8_t ExpectedUpper = Expected >> 8;

      bool Tear = false;
      if (ActualUpper == ExpectedUpper && ActualLower == ExpectedLower) {
        if (StoreCAS8(ExpectedUpper, DesiredUpper, AddrUpper)) {
          if (StoreCAS8(ExpectedLower, DesiredLower, Addr)) {
            // Stored successfully
            return Expected;
          } else {
            // CAS managed to tear, we can't really solve this
            // Continue down the path to let the guest know values weren't expected
            Tear = true;
            FEXCORE_TELEMETRY_SET(TYPE_CAS_16BIT_TEAR, 1);
          }
        }

        ActualLower = ExpectedLower;
      }

      // If the bits changed that we were wanting to change then we have failed and can return
      // We need to extract the bits and return them in EXPECTED
      uint16_t FailedResult = ActualUpper;
      FailedResult <<= 8;
      FailedResult |= ActualLower;

      if constexpr (Retry) {
        if (Tear) {
          // If we are retrying and tearing then we can't do anything here
          // XXX: Resolve with TME
          return FailedResult;
        } else {
          // We can retry safely
        }
      } else {
        // Without Retry (CAS) then we have failed regardless of tear
        // CAS failed but handled successfully
        return FailedResult;
      }
    }
  } else {
    AlignmentMask = 0b111;
    if ((Addr & AlignmentMask) == 7) {
      // Crosses 8byte boundary
      // Needs 128bit CAS
      // Fits within a 16byte region
      uint64_t Alignment = Addr & 0b1111;
      Addr &= ~0b1111ULL;
      auto Atomic128 = std::atomic_ref<__uint128_t>(*reinterpret_cast<__uint128_t*>(Addr));

      __uint128_t Mask = 0xFFFF;
      Mask <<= Alignment * 8;
      __uint128_t NegMask = ~Mask;
      __uint128_t TmpExpected {};
      __uint128_t TmpDesired {};

      while (1) {
        TmpExpected = Atomic128.load();

        __uint128_t Desired = DesiredFunction(TmpExpected >> (Alignment * 8), DesiredSrc);
        Desired <<= Alignment * 8;

        __uint128_t Expected = ExpectedFunction(TmpExpected >> (Alignment * 8), ExpectedSrc);
        Expected <<= Alignment * 8;

        // Set up expected
        TmpExpected &= NegMask;
        TmpExpected |= Expected;

        // Set up desired
        TmpDesired = TmpExpected;
        TmpDesired &= NegMask;
        TmpDesired |= Desired;

        bool CASResult = Atomic128.compare_exchange_strong(TmpExpected, TmpDesired);
        if (CASResult) {
          // Successful, so we are done
          return Expected >> (Alignment * 8);
        } else {
          if constexpr (Retry) {
            // If we failed but we have enabled retry then just retry without checking results
            // CAS can't retry but atomic memory ops need to retry until passing
            continue;
          }
          // Not successful
          // Now we need to check the results to see if we need to try again
          __uint128_t FailedResultOurBits = TmpExpected & Mask;
          __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

          __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
          if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
            // If the bits changed that weren't part of our regular CAS then we need to try again
            continue;
          }

          // This happens in the case that between Load and CAS that something has store our desired in to the memory location
          // This means our CAS fails because what we wanted to store was already stored
          uint16_t FailedResult = FailedResultOurBits >> (Alignment * 8);
          // CAS failed but handled successfully
          return FailedResult;
        }
      }
    } else {
      AlignmentMask = 0b11;
      if ((Addr & AlignmentMask) == 3) {
        // Crosses 4byte boundary
        // Needs 64bit CAS
        uint64_t Alignment = Addr & AlignmentMask;
        Addr &= ~AlignmentMask;

        uint64_t Mask = 0xFFFF;
        Mask <<= Alignment * 8;

        uint64_t NegMask = ~Mask;

        uint64_t TmpExpected {};
        uint64_t TmpDesired {};

        auto Atomic = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
        while (1) {
          TmpExpected = Atomic.load();

          uint64_t Desired = DesiredFunction(TmpExpected >> (Alignment * 8), DesiredSrc);
          Desired <<= Alignment * 8;

          uint64_t Expected = ExpectedFunction(TmpExpected >> (Alignment * 8), ExpectedSrc);
          Expected <<= Alignment * 8;

          // Set up expected
          TmpExpected &= NegMask;
          TmpExpected |= Expected;

          // Set up desired
          TmpDesired = TmpExpected;
          TmpDesired &= NegMask;
          TmpDesired |= Desired;

          bool CASResult = Atomic.compare_exchange_strong(TmpExpected, TmpDesired);
          if (CASResult) {
            // Successful, so we are done
            return Expected >> (Alignment * 8);
          } else {
            if constexpr (Retry) {
              // If we failed but we have enabled retry then just retry without checking results
              // CAS can't retry but atomic memory ops need to retry until passing
              continue;
            }
            // Not successful
            // Now we need to check the results to see if we can try again
            uint64_t FailedResultOurBits = TmpExpected & Mask;
            uint64_t FailedResultNotOurBits = TmpExpected & NegMask;

            uint64_t FailedDesiredNotOurBits = TmpDesired & NegMask;

            if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
              // If the bits changed that weren't part of our regular CAS then we need to try again
              continue;
            }

            // This happens in the case that between Load and CAS that something has store our desired in to the memory location
            // This means our CAS fails because what we wanted to store was already stored
            uint16_t FailedResult = FailedResultOurBits >> (Alignment * 8);
            // CAS failed but handled successfully
            return FailedResult;
          }
        }
      } else {
        // Fits within 4byte boundary
        // Only needs 32bit CAS
        // Only alignment offset will be 1 here
        uint64_t Alignment = Addr & AlignmentMask;
        Addr &= ~AlignmentMask;

        uint32_t Mask = 0xFFFF;
        Mask <<= Alignment * 8;

        uint32_t NegMask = ~Mask;

        uint32_t TmpExpected {};
        uint32_t TmpDesired {};

        auto Atomic = std::atomic_ref<uint32_t>(*reinterpret_cast<uint32_t*>(Addr));
        while (1) {
          TmpExpected = Atomic.load();


          uint32_t Desired = DesiredFunction(TmpExpected >> (Alignment * 8), DesiredSrc);
          Desired <<= Alignment * 8;

          uint32_t Expected = ExpectedFunction(TmpExpected >> (Alignment * 8), ExpectedSrc);
          Expected <<= Alignment * 8;

          // Set up expected
          TmpExpected &= NegMask;
          TmpExpected |= Expected;

          // Set up desired
          TmpDesired = TmpExpected;
          TmpDesired &= NegMask;
          TmpDesired |= Desired;

          bool CASResult = Atomic.compare_exchange_strong(TmpExpected, TmpDesired);
          if (CASResult) {
            // Successful, so we are done
            return Expected >> (Alignment * 8);
          } else {
            if constexpr (Retry) {
              // If we failed but we have enabled retry then just retry without checking results
              // CAS can't retry but atomic memory ops need to retry until passing
              continue;
            }
            // Not successful
            // Now we need to check the results to see if we can try again
            uint32_t FailedResultOurBits = TmpExpected & Mask;
            uint32_t FailedResultNotOurBits = TmpExpected & NegMask;

            uint32_t FailedDesiredNotOurBits = TmpDesired & NegMask;

            if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
              // If the bits changed that weren't part of our regular CAS then we need to try again
              continue;
            }

            // This happens in the case that between Load and CAS that something has store our desired in to the memory location
            // This means our CAS fails because what we wanted to store was already stored
            uint16_t FailedResult = FailedResultOurBits >> (Alignment * 8);
            // CAS failed but handled successfully
            return FailedResult;
          }
        }
      }
    }
  }
}

template<bool Retry>
static uint32_t DoCAS32(uint32_t DesiredSrc, uint32_t ExpectedSrc, uint64_t Addr, CASExpectedFn<uint32_t> ExpectedFunction,
                        CASDesiredFn<uint32_t> DesiredFunction, uint32_t* StrictSplitLockMutex) {
  std::optional<FEXCore::Utils::SpinWaitLock::UniqueSpinMutex<uint32_t>> Lock {};

  if ((Addr & 63) > 60) {
    FEXCORE_TELEMETRY_SET(TYPE_HAS_SPLIT_LOCKS, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }
  }

  // 32 bit
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) > 12) {
    FEXCORE_TELEMETRY_SET(TYPE_16BYTE_SPLIT, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }

    // Address crosses over 16byte threshold
    // Needs dual 4 byte CAS loop
    uint64_t Alignment = Addr & 0b11;
    Addr &= ~0b11;

    uint64_t AddrUpper = Addr + 4;

    uint64_t Mask = ~0U;
    Mask <<= Alignment * 8;
    uint64_t NegMask = ~Mask;

    // Careful ordering here
    while (1) {
      uint64_t LoadOrderUpper = LoadAcquire32(AddrUpper);
      LoadOrderUpper <<= 32;
      uint64_t TmpActual = LoadOrderUpper | LoadAcquire32(Addr);

      uint64_t Desired = DesiredFunction(TmpActual >> (Alignment * 8), DesiredSrc);
      uint64_t Expected = ExpectedFunction(TmpActual >> (Alignment * 8), ExpectedSrc);

      uint64_t TmpExpected = TmpActual;
      TmpExpected &= NegMask;
      TmpExpected |= Expected << (Alignment * 8);

      uint64_t TmpDesired = TmpExpected;
      TmpDesired &= NegMask;
      TmpDesired |= Desired << (Alignment * 8);

      bool Tear = false;
      if (TmpExpected == TmpActual) {
        uint32_t TmpExpectedLower = TmpExpected;
        uint32_t TmpExpectedUpper = TmpExpected >> 32;

        uint32_t TmpDesiredLower = TmpDesired;
        uint32_t TmpDesiredUpper = TmpDesired >> 32;

        if (StoreCAS32(TmpExpectedUpper, TmpDesiredUpper, AddrUpper)) {
          if (StoreCAS32(TmpExpectedLower, TmpDesiredLower, Addr)) {
            // Stored successfully
            return Expected;
          } else {
            // CAS managed to tear, we can't really solve this
            // Continue down the path to let the guest know values weren't expected
            Tear = true;
            FEXCORE_TELEMETRY_SET(TYPE_CAS_32BIT_TEAR, 1);
          }
        }

        TmpExpected = TmpExpectedUpper;
        TmpExpected <<= 32;
        TmpExpected |= TmpExpectedLower;
      } else {
        // Mismatch up front
        TmpExpected = TmpActual;
      }

      // Not successful
      // Now we need to check the results to see if we need to try again
      uint64_t FailedResultOurBits = TmpExpected & Mask;
      uint64_t FailedResultNotOurBits = TmpExpected & NegMask;

      uint64_t FailedDesiredNotOurBits = TmpDesired & NegMask;
      if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
        // If the bits changed that weren't part of our regular CAS then we need to try again
        continue;
      }

      // This happens in the case that between Load and CAS that something has store our desired in to the memory location
      // This means our CAS fails because what we wanted to store was already stored
      uint32_t FailedResult = FailedResultOurBits >> (Alignment * 8);

      if constexpr (Retry) {
        if (Tear) {
          // If we are retrying and tearing then we can't do anything here
          // XXX: Resolve with TME
          return FailedResult;
        } else {
          // We can retry safely
        }
      } else {
        // Without Retry (CAS) then we have failed regardless of tear
        // CAS failed but handled successfully
        return FailedResult;
      }
    }
  } else {
    AlignmentMask = 0b111;
    if ((Addr & AlignmentMask) >= 5) {
      // Crosses 8byte boundary
      // Needs 128bit CAS
      // Fits within a 16byte region
      uint64_t Alignment = Addr & 0b1111;
      Addr &= ~0b1111ULL;
      auto Atomic128 = std::atomic_ref<__uint128_t>(*reinterpret_cast<__uint128_t*>(Addr));

      __uint128_t Mask = ~0U;
      Mask <<= Alignment * 8;
      __uint128_t NegMask = ~Mask;
      __uint128_t TmpExpected {};
      __uint128_t TmpDesired {};

      while (1) {
        __uint128_t TmpActual = Atomic128.load();

        __uint128_t Desired = DesiredFunction(TmpActual >> (Alignment * 8), DesiredSrc);
        __uint128_t Expected = ExpectedFunction(TmpActual >> (Alignment * 8), ExpectedSrc);

        // Set up expected
        TmpExpected = TmpActual;
        TmpExpected &= NegMask;
        TmpExpected |= Expected << (Alignment * 8);

        // Set up desired
        TmpDesired = TmpExpected;
        TmpDesired &= NegMask;
        TmpDesired |= Desired << (Alignment * 8);

        bool CASResult = Atomic128.compare_exchange_strong(TmpExpected, TmpDesired);
        if (CASResult) {
          // Stored successfully
          return Expected;
        } else {
          if constexpr (Retry) {
            // If we failed but we have enabled retry then just retry without checking results
            // CAS can't retry but atomic memory ops need to retry until passing
            continue;
          }

          // Not successful
          // Now we need to check the results to see if we need to try again
          __uint128_t FailedResultOurBits = TmpExpected & Mask;
          __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

          __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
          if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
            // If the bits changed that weren't part of our regular CAS then we need to try again
            continue;
          }

          // This happens in the case that between Load and CAS that something has store our desired in to the memory location
          // This means our CAS fails because what we wanted to store was already stored
          uint32_t FailedResult = FailedResultOurBits >> (Alignment * 8);
          // CAS failed but handled successfully
          return FailedResult;
        }
      }
    } else {
      // Fits within 8byte boundary
      // Only needs 64bit CAS
      // Alignments can be [1,5)
      uint64_t Alignment = Addr & AlignmentMask;
      Addr &= ~AlignmentMask;

      uint64_t Mask = ~0U;
      Mask <<= Alignment * 8;

      uint64_t NegMask = ~Mask;

      uint64_t TmpExpected {};
      uint64_t TmpDesired {};

      auto Atomic = std::atomic_ref<uint64_t>(*reinterpret_cast<uint64_t*>(Addr));
      while (1) {
        uint64_t TmpActual = Atomic.load();

        uint64_t Desired = DesiredFunction(TmpActual >> (Alignment * 8), DesiredSrc);
        uint64_t Expected = ExpectedFunction(TmpActual >> (Alignment * 8), ExpectedSrc);

        // Set up expected
        TmpExpected = TmpActual;
        TmpExpected &= NegMask;
        TmpExpected |= Expected << (Alignment * 8);

        // Set up desired
        TmpDesired = TmpExpected;
        TmpDesired &= NegMask;
        TmpDesired |= Desired << (Alignment * 8);

        bool CASResult = Atomic.compare_exchange_strong(TmpExpected, TmpDesired);
        if (CASResult) {
          // Stored successfully
          return Expected;
        } else {
          if constexpr (Retry) {
            // If we failed but we have enabled retry then just retry without checking results
            // CAS can't retry but atomic memory ops need to retry until passing
            continue;
          }

          // Not successful
          // Now we need to check the results to see if we can try again
          uint64_t FailedResultOurBits = TmpExpected & Mask;
          uint64_t FailedResultNotOurBits = TmpExpected & NegMask;

          uint64_t FailedDesiredNotOurBits = TmpDesired & NegMask;

          if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
            // If the bits changed that weren't part of our regular CAS then we need to try again
            continue;
          }

          // This happens in the case that between Load and CAS that something has store our desired in to the memory location
          // This means our CAS fails because what we wanted to store was already stored
          uint32_t FailedResult = FailedResultOurBits >> (Alignment * 8);
          // CAS failed but handled successfully
          return FailedResult;
        }
      }
    }
  }
}

template<bool Retry>
static uint64_t DoCAS64(uint64_t DesiredSrc, uint64_t ExpectedSrc, uint64_t Addr, CASExpectedFn<uint64_t> ExpectedFunction,
                        CASDesiredFn<uint64_t> DesiredFunction, uint32_t* StrictSplitLockMutex) {
  std::optional<FEXCore::Utils::SpinWaitLock::UniqueSpinMutex<uint32_t>> Lock {};

  if ((Addr & 63) > 56) {
    FEXCORE_TELEMETRY_SET(TYPE_HAS_SPLIT_LOCKS, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }
  }

  // 64bit
  uint64_t AlignmentMask = 0b1111;
  if ((Addr & AlignmentMask) > 8) {
    FEXCORE_TELEMETRY_SET(TYPE_16BYTE_SPLIT, 1);
    if (StrictSplitLockMutex && !Lock.has_value()) {
      Lock.emplace(StrictSplitLockMutex);
    }

    uint64_t Alignment = Addr & 0b111;
    Addr &= ~0b111ULL;
    uint64_t AddrUpper = Addr + 8;

    // Crosses a 16byte boundary
    // Need to do 256bit atomic, but since that doesn't exist we need to do a dual CAS loop
    __uint128_t Mask = ~0ULL;
    Mask <<= Alignment * 8;
    __uint128_t NegMask = ~Mask;
    __uint128_t TmpExpected {};
    __uint128_t TmpDesired {};

    while (1) {
      __uint128_t LoadOrderUpper = LoadAcquire64(AddrUpper);
      LoadOrderUpper <<= 64;
      __uint128_t TmpActual = LoadOrderUpper | LoadAcquire64(Addr);

      __uint128_t Desired = DesiredFunction(TmpActual >> (Alignment * 8), DesiredSrc);
      __uint128_t Expected = ExpectedFunction(TmpActual >> (Alignment * 8), ExpectedSrc);

      // Set up expected
      TmpExpected = TmpActual;
      TmpExpected &= NegMask;
      TmpExpected |= Expected << (Alignment * 8);

      // Set up desired
      TmpDesired = TmpExpected;
      TmpDesired &= NegMask;
      TmpDesired |= Desired << (Alignment * 8);

      uint64_t TmpExpectedLower = TmpExpected;
      uint64_t TmpExpectedUpper = TmpExpected >> 64;

      uint64_t TmpDesiredLower = TmpDesired;
      uint64_t TmpDesiredUpper = TmpDesired >> 64;

      bool Tear = false;
      if (TmpExpected == TmpActual) {
        if (StoreCAS64(TmpExpectedUpper, TmpDesiredUpper, AddrUpper)) {
          if (StoreCAS64(TmpExpectedLower, TmpDesiredLower, Addr)) {
            // Stored successfully
            return Expected;
          } else {
            // CAS managed to tear, we can't really solve this
            // Continue down the path to let the guest know values weren't expected
            Tear = true;
            FEXCORE_TELEMETRY_SET(TYPE_CAS_64BIT_TEAR, 1);
          }
        }

        TmpExpected = TmpExpectedUpper;
        TmpExpected <<= 64;
        TmpExpected |= TmpExpectedLower;
      } else {
        // Mismatch up front
        TmpExpected = TmpActual;
      }

      // Not successful
      // Now we need to check the results to see if we need to try again
      __uint128_t FailedResultOurBits = TmpExpected & Mask;
      __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

      __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
      if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
        // If the bits changed that weren't part of our regular CAS then we need to try again
        continue;
      }

      // This happens in the case that between Load and CAS that something has store our desired in to the memory location
      // This means our CAS fails because what we wanted to store was already stored
      uint64_t FailedResult = FailedResultOurBits >> (Alignment * 8);

      if constexpr (Retry) {
        if (Tear) {
          // If we are retrying and tearing then we can't do anything here
          // XXX: Resolve with TME
          return FailedResult;
        } else {
          // We can retry safely
        }
      } else {
        // Without Retry (CAS) then we have failed regardless of tear
        // CAS failed but handled successfully
        return FailedResult;
      }
    }
  } else {
    // Fits within a 16byte region
    uint64_t Alignment = Addr & AlignmentMask;
    Addr &= ~AlignmentMask;
    auto Atomic128 = std::atomic_ref<__uint128_t>(*reinterpret_cast<__uint128_t*>(Addr));

    __uint128_t Mask = ~0ULL;
    Mask <<= Alignment * 8;
    __uint128_t NegMask = ~Mask;
    __uint128_t TmpExpected {};
    __uint128_t TmpDesired {};

    while (1) {
      __uint128_t TmpActual = Atomic128.load();

      __uint128_t Desired = DesiredFunction(TmpActual >> (Alignment * 8), DesiredSrc);
      __uint128_t Expected = ExpectedFunction(TmpActual >> (Alignment * 8), ExpectedSrc);

      // Set up expected
      TmpExpected = TmpActual;
      TmpExpected &= NegMask;
      TmpExpected |= Expected << (Alignment * 8);

      // Set up desired
      TmpDesired = TmpExpected;
      TmpDesired &= NegMask;
      TmpDesired |= Desired << (Alignment * 8);

      bool CASResult = Atomic128.compare_exchange_strong(TmpExpected, TmpDesired);
      if (CASResult) {
        // Stored successfully
        return Expected;
      } else {
        if constexpr (Retry) {
          // If we failed but we have enabled retry then just retry without checking results
          // CAS can't retry but atomic memory ops need to retry until passing
          continue;
        }

        // Not successful
        // Now we need to check the results to see if we need to try again
        __uint128_t FailedResultOurBits = TmpExpected & Mask;
        __uint128_t FailedResultNotOurBits = TmpExpected & NegMask;

        __uint128_t FailedDesiredNotOurBits = TmpDesired & NegMask;
        if ((FailedResultNotOurBits ^ FailedDesiredNotOurBits) != 0) {
          // If the bits changed that weren't part of our regular CAS then we need to try again
          continue;
        }

        // This happens in the case that between Load and CAS that something has store our desired in to the memory location
        // This means our CAS fails because what we wanted to store was already stored
        uint64_t FailedResult = FailedResultOurBits >> (Alignment * 8);
        // CAS failed but handled successfully
        return FailedResult;
      }
    }
  }
}

static std::optional<uint64_t> DoCAS(uint32_t Size, uint64_t Desired, uint64_t Expected, uint64_t Addr, uint32_t* StrictSplitLockMutex) {
  // Cross-cacheline CAS doesn't work on ARM
  // It isn't even guaranteed to work on x86
  // Intel will do a "split lock" which locks the full bus
  // AMD will tear instead
  // Both cross-cacheline and cross 16byte both need dual CAS loops that can tear
  // ARMv8.4 LSE2 solves all atomic issues except cross-cacheline
  // ARM's TME extension solves the cross-cacheline problem

  // 8bit can't be unaligned
  // Only need to handle 16, 32, 64
  if (Size == 2) {
    auto Res = DoCAS16<false>(
      Desired, Expected, Addr,
      [](uint16_t, uint16_t Expected) -> uint16_t {
        // Expected is just Expected
        return Expected;
      },
      [](uint16_t, uint16_t Desired) -> uint16_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return Res;
  } else if (Size == 4) {
    auto Res = DoCAS32<false>(
      Desired, Expected, Addr,
      [](uint32_t, uint32_t Expected) -> uint32_t {
        // Expected is just Expected
        return Expected;
      },
      [](uint32_t, uint32_t Desired) -> uint32_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return Res;
  } else if (Size == 8) {
    auto Res = DoCAS64<false>(
      Desired, Expected, Addr,
      [](uint64_t, uint64_t Expected) -> uint64_t {
        // Expected is just Expected
        return Expected;
      },
      [](uint64_t, uint64_t Desired) -> uint64_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return Res;
  }

  return std::nullopt;
}

static bool RunCASAL(uint64_t* GPRs, uint32_t Size, uint32_t DesiredReg, uint32_t ExpectedReg, uint32_t AddressReg, uint32_t* StrictSplitLockMutex) {
  std::optional<uint64_t> Res = DoCAS(Size, GPRs[DesiredReg], GPRs[ExpectedReg], GPRs[AddressReg], StrictSplitLockMutex);
  if (!Res.has_value()) {
    return false;
  }

  // Regardless of pass or fail
  // We set the result register if it isn't a zero register
  if (ExpectedReg != 31) {
    GPRs[ExpectedReg] = *Res;
  }
  return true;
}

static bool HandleCASAL(uint64_t* GPRs, uint32_t Instr, uint32_t* StrictSplitLockMutex) {
  uint32_t Size = 1 << (Instr >> 30);

  uint32_t DesiredReg = Instr & 0b11111;
  uint32_t ExpectedReg = (Instr >> 16) & 0b11111;
  uint32_t AddressReg = (Instr >> 5) & 0b11111;
  return RunCASAL(GPRs, Size, DesiredReg, ExpectedReg, AddressReg, StrictSplitLockMutex);
}

static bool HandleAtomicMemOp(uint32_t Instr, uint64_t* GPRs, uint32_t* StrictSplitLockMutex) {
  uint32_t Size = 1 << (Instr >> 30);
  uint32_t ResultReg = Instr & 0b11111;
  uint32_t SourceReg = (Instr >> 16) & 0b11111;
  uint32_t AddressReg = (Instr >> 5) & 0b11111;

  uint64_t Addr = GPRs[AddressReg];

  uint8_t Op = (Instr >> 12) & 0xF;

  if (Size == 2) {
    auto NOPExpected = [](uint16_t SrcVal, uint16_t) -> uint16_t {
      return SrcVal;
    };

    auto ADDDesired = [](uint16_t SrcVal, uint16_t Desired) -> uint16_t {
      return SrcVal + Desired;
    };

    auto CLRDesired = [](uint16_t SrcVal, uint16_t Desired) -> uint16_t {
      return SrcVal & ~Desired;
    };

    auto EORDesired = [](uint16_t SrcVal, uint16_t Desired) -> uint16_t {
      return SrcVal ^ Desired;
    };

    auto SETDesired = [](uint16_t SrcVal, uint16_t Desired) -> uint16_t {
      return SrcVal | Desired;
    };

    auto SWAPDesired = [](uint16_t SrcVal, uint16_t Desired) -> uint16_t {
      return Desired;
    };

    CASDesiredFn<uint16_t> DesiredFunction {};

    switch (Op) {
    case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break;
    case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break;
    case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break;
    case ATOMIC_SET_OP: DesiredFunction = SETDesired; break;
    case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false;
    }

    auto Res = DoCAS16<true>(GPRs[SourceReg],
                             0, // Unused
                             Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);
    // If we passed and our destination register is not zero
    // Then we need to update the result register with what was in memory
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
    return true;
  } else if (Size == 4) {
    auto NOPExpected = [](uint32_t SrcVal, uint32_t) -> uint32_t {
      return SrcVal;
    };

    auto ADDDesired = [](uint32_t SrcVal, uint32_t Desired) -> uint32_t {
      return SrcVal + Desired;
    };

    auto CLRDesired = [](uint32_t SrcVal, uint32_t Desired) -> uint32_t {
      return SrcVal & ~Desired;
    };

    auto EORDesired = [](uint32_t SrcVal, uint32_t Desired) -> uint32_t {
      return SrcVal ^ Desired;
    };

    auto SETDesired = [](uint32_t SrcVal, uint32_t Desired) -> uint32_t {
      return SrcVal | Desired;
    };

    auto SWAPDesired = [](uint32_t SrcVal, uint32_t Desired) -> uint32_t {
      return Desired;
    };

    CASDesiredFn<uint32_t> DesiredFunction {};

    switch (Op) {
    case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break;
    case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break;
    case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break;
    case ATOMIC_SET_OP: DesiredFunction = SETDesired; break;
    case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false;
    }

    auto Res = DoCAS32<true>(GPRs[SourceReg],
                             0, // Unused
                             Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);
    // If we passed and our destination register is not zero
    // Then we need to update the result register with what was in memory
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
    return true;
  } else if (Size == 8) {
    auto NOPExpected = [](uint64_t SrcVal, uint64_t) -> uint64_t {
      return SrcVal;
    };

    auto ADDDesired = [](uint64_t SrcVal, uint64_t Desired) -> uint64_t {
      return SrcVal + Desired;
    };

    auto CLRDesired = [](uint64_t SrcVal, uint64_t Desired) -> uint64_t {
      return SrcVal & ~Desired;
    };

    auto EORDesired = [](uint64_t SrcVal, uint64_t Desired) -> uint64_t {
      return SrcVal ^ Desired;
    };

    auto SETDesired = [](uint64_t SrcVal, uint64_t Desired) -> uint64_t {
      return SrcVal | Desired;
    };

    auto SWAPDesired = [](uint64_t SrcVal, uint64_t Desired) -> uint64_t {
      return Desired;
    };

    CASDesiredFn<uint64_t> DesiredFunction {};

    switch (Op) {
    case ATOMIC_ADD_OP: DesiredFunction = ADDDesired; break;
    case ATOMIC_CLR_OP: DesiredFunction = CLRDesired; break;
    case ATOMIC_EOR_OP: DesiredFunction = EORDesired; break;
    case ATOMIC_SET_OP: DesiredFunction = SETDesired; break;
    case ATOMIC_SWAP_OP: DesiredFunction = SWAPDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", Op); return false;
    }

    auto Res = DoCAS64<true>(GPRs[SourceReg],
                             0, // Unused
                             Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);
    // If we passed and our destination register is not zero
    // Then we need to update the result register with what was in memory
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
    return true;
  }

  return false;
}

static bool HandleAtomicLoad(uint32_t Instr, uint64_t* GPRs, int64_t Offset, Core::UnalignedExclusiveStore* Store = nullptr) {
  uint32_t Size = 1 << (Instr >> 30);

  uint32_t ResultReg = Instr & 0b11111;
  uint32_t AddressReg = (Instr >> 5) & 0b11111;

  uint64_t Addr = GPRs[AddressReg] + Offset;
  uint64_t Res;

  if (Size == 2) {
    Res = DoLoad16(Addr);
    // We set the result register if it isn't a zero register
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
  } else if (Size == 4) {
    Res = DoLoad32(Addr);
    // We set the result register if it isn't a zero register
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
  } else if (Size == 8) {
    Res = DoLoad64(Addr);
    // We set the result register if it isn't a zero register
    if (ResultReg != 31) {
      GPRs[ResultReg] = Res;
    }
  } else {
    return false;
  }

  if (Store) {
    Store->Addr = Addr;
    Store->Store = Res;
    Store->Size = Size;
  }
  return true;
}

static bool HandleAtomicStore(uint32_t Instr, uint64_t* GPRs, int64_t Offset, uint32_t* StrictSplitLockMutex) {
  uint32_t Size = 1 << (Instr >> 30);

  uint32_t DataReg = Instr & 0x1F;
  uint32_t AddressReg = (Instr >> 5) & 0b11111;

  uint64_t Addr = GPRs[AddressReg] + Offset;

  constexpr bool DoRetry = false;
  if (Size == 2) {
    DoCAS16<DoRetry>(
      GPRs[DataReg],
      0, // Unused
      Addr,
      [](uint16_t SrcVal, uint16_t) -> uint16_t {
        // Expected is just src
        return SrcVal;
      },
      [](uint16_t, uint16_t Desired) -> uint16_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return true;
  } else if (Size == 4) {
    DoCAS32<DoRetry>(
      GPRs[DataReg],
      0, // Unused
      Addr,
      [](uint32_t SrcVal, uint32_t) -> uint32_t {
        // Expected is just src
        return SrcVal;
      },
      [](uint32_t, uint32_t Desired) -> uint32_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return true;
  } else if (Size == 8) {
    DoCAS64<DoRetry>(
      GPRs[DataReg],
      0, // Unused
      Addr,
      [](uint64_t SrcVal, uint64_t) -> uint64_t {
        // Expected is just src
        return SrcVal;
      },
      [](uint64_t, uint64_t Desired) -> uint64_t {
        // Desired is just Desired
        return Desired;
      },
      StrictSplitLockMutex);
    return true;
  }

  return false;
}

static uint64_t HandleCAS_NoAtomics(uintptr_t ProgramCounter, uint64_t* GPRs, uint32_t* StrictSplitLockMutex) {
  // ARMv8.0 CAS
  // [1] ldaxrb(TMP2.W(), MemOperand(MemSrc))
  // [2] cmp (TMP2.W(), Expected.W())
  // [3] b
  // [4] stlxrb(TMP3.W(), Desired.W(), MemOperand(MemSrc)
  // [5] cbnz
  // [6] mov
  // [7] b
  // [8] mov (.., TMP2.W());
  // [9] clrex

  uint32_t* PC = (uint32_t*)ProgramCounter;
  uint32_t Instr = PC[0];
  uint32_t Size = 1 << (Instr >> 30);
  uint32_t AddressReg = GetRnReg(Instr);
  uint32_t ResultReg = GetRdReg(Instr); // TMP2
  uint32_t DesiredReg = 0;
  uint32_t ExpectedReg = 0;
  for (size_t i = 1; i < 6; ++i) {
    uint32_t NextInstr = PC[i];
    if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      // Just double check that the memory destination matches
      const uint32_t StoreAddressReg = GetRnReg(NextInstr);
      LOGMAN_THROW_A_FMT(StoreAddressReg == AddressReg, "StoreExclusive memory register didn't match the store exclusive register");
#endif
      DesiredReg = GetRdReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST ||
               (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) {
      ExpectedReg = GetRmReg(NextInstr);
    }
  }
  // set up CASAL by doing mov(TMP2, Expected)
  GPRs[ResultReg] = GPRs[ExpectedReg];

  if (RunCASAL(GPRs, Size, DesiredReg, ResultReg, AddressReg, StrictSplitLockMutex)) {
    return 7 * sizeof(uint32_t); // jump to mov to allocated register
  } else {
    return 0;
  }
}

static uint64_t HandleAtomicLoadstoreExclusive(uintptr_t ProgramCounter, uint64_t* GPRs, uint32_t* StrictSplitLockMutex) {
  uint32_t* PC = (uint32_t*)ProgramCounter;
  uint32_t Instr = PC[0];

  // Atomic Add
  // [1] ldaxrb(TMP2.W(), MemOperand(MemSrc));
  // [2] add(TMP2.W(), TMP2.W(), GetReg<RA_32>(Op->Header.Args[1].ID()));
  // [3] stlxrb(TMP2.W(), TMP2.W(), MemOperand(MemSrc));
  // [4] cbnz(TMP2.W(), &LoopTop);
  //
  // Atomic Fetch Add
  // [1] ldaxrb(TMP2.W(), MemOperand(MemSrc));
  // [2] add(TMP3.W(), TMP2.W(), GetReg<RA_32>(Op->Header.Args[1].ID()));
  // [3] stlxrb(TMP4.W(), TMP3.W(), MemOperand(MemSrc));
  // [4] cbnz(TMP4.W(), &LoopTop);
  // [5] mov(GetReg<RA_32>(Node), TMP2.W());
  //
  // Atomic Swap
  //
  // [1] ldaxrb(TMP2.W(), MemOperand(MemSrc));
  // [2] stlxrb(TMP4.W(), GetReg<RA_32>(Op->Header.Args[1].ID()), MemOperand(MemSrc));
  // [3] cbnz(TMP4.W(), &LoopTop);
  // [4] uxtb(GetReg<RA_64>(Node), TMP2.W());
  //
  // ASSUMPTIONS:
  // - Both cases:
  //   - The [2]ALU op: (Non NEG case)
  //     - First source is from [1]ldaxr
  //     - Second source is incoming value
  //   - The [2]ALU op: (NEG case)
  //     - First source is zero register
  //     - The second source is the from [1]ldaxr
  //   - No ALU op: (SWAP case)
  //     - No DataSourceRegister
  //
  // - In Atomic case (non-fetch)
  //   - The [3]stlxr instruction status + memory register are the SAME register
  //
  // - In Atomic FETCH case
  //   - The [3]stlxr instruction's status + memory register are never the same register
  //   - The [5]mov instruction source is always the destination register from [1] ldaxr*
  uint32_t ResultReg = GetRdReg(Instr);
  uint32_t AddressReg = GetRnReg(Instr);
  uint64_t Addr = GPRs[AddressReg];

  size_t NumInstructionsToSkip = 0;

  // Are we an Atomic op or AtomicFetch?
  bool AtomicFetch = false;

  // This is the register that is the incoming source to the ALU operation
  // <DataResultReg> = <Load Exclusive Value> <Op> <DataSourceReg>
  // NEG case is special
  // <DataResultReg> = Zero <Sub> <Load Exclusive Value>
  // DataSourceRegister must always be the Rm register
  uint32_t DataSourceReg {};
  ExclusiveAtomicPairType AtomicOp {ExclusiveAtomicPairType::TYPE_SWAP};

  // Scan forward at most five instructions to find our instructions
  for (size_t i = 1; i < 6; ++i) {
    uint32_t NextInstr = PC[i];
    if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ADD_INST ||
        (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ADD_SHIFT_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_ADD;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_INST ||
               (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::SUB_SHIFT_INST) {
      uint32_t RnReg = GetRnReg(NextInstr);
      if (RnReg == REGISTER_MASK) {
        // Zero reg means neg
        AtomicOp = ExclusiveAtomicPairType::TYPE_NEG;
      } else {
        AtomicOp = ExclusiveAtomicPairType::TYPE_SUB;
      }
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_INST ||
               (NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::CMP_SHIFT_INST) {
      return HandleCAS_NoAtomics(ProgramCounter, GPRs, StrictSplitLockMutex); // ARMv8.0 CAS
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::AND_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_AND;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::BIC_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_BIC;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::OR_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_OR;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::ORN_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_ORN;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EOR_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_EOR;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::ALU_OP_MASK) == ArchHelpers::Arm64::EON_INST) {
      AtomicOp = ExclusiveAtomicPairType::TYPE_EON;
      DataSourceReg = GetRmReg(NextInstr);
    } else if ((NextInstr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      // Just double check that the memory destination matches
      const uint32_t StoreAddressReg = GetRnReg(NextInstr);
      LOGMAN_THROW_A_FMT(StoreAddressReg == AddressReg, "StoreExclusive memory register didn't match the store exclusive register");
#endif
      uint32_t StatusReg = GetRmReg(NextInstr);
      uint32_t StoreResultReg = GetRdReg(NextInstr);
      // We are an atomic fetch instruction if the data register isn't the status register
      AtomicFetch = !(StatusReg == StoreResultReg);
      if (AtomicOp == ExclusiveAtomicPairType::TYPE_SWAP) {
        // In the case of swap we don't have an ALU op inbetween
        // Source is directly in STLXR
        DataSourceReg = StoreResultReg;
      }
    } else if ((NextInstr & ArchHelpers::Arm64::CBNZ_MASK) == ArchHelpers::Arm64::CBNZ_INST) {
      // Found the CBNZ, we want to skip to just after this instruction when done
      NumInstructionsToSkip = i + 1;
      // This is the last instruction we care about. Leave now
      break;
    } else {
      LogMan::Msg::AFmt("Unknown instruction 0x{:08x}", NextInstr);
    }
  }

  uint32_t Size = 1 << (Instr >> 30);

  constexpr bool DoRetry = true;

  auto NOPExpected = []<typename AtomicType>(AtomicType SrcVal, AtomicType) -> AtomicType {
    return SrcVal;
  };

  auto ADDDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal + Desired;
  };

  auto SUBDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal - Desired;
  };

  auto ANDDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal & Desired;
  };

  auto BICDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal & ~Desired;
  };

  auto ORDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal | Desired;
  };

  auto ORNDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal | ~Desired;
  };

  auto EORDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal ^ Desired;
  };

  auto EONDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return SrcVal ^ ~Desired;
  };

  auto NEGDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return -SrcVal;
  };

  auto SWAPDesired = []<typename AtomicType>(AtomicType SrcVal, AtomicType Desired) -> AtomicType {
    return Desired;
  };

  if (Size == 2) {
    using AtomicType = uint16_t;
    CASDesiredFn<AtomicType> DesiredFunction {};

    switch (AtomicOp) {
    case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break;
    case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break;
    case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break;
    case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break;
    case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break;
    case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break;
    case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break;
    case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break;
    case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break;
    case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false;
    }

    auto Res = DoCAS16<DoRetry>(GPRs[DataSourceReg],
                                0, // Unused
                                Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);

    if (AtomicFetch && ResultReg != 31) {
      // On atomic fetch then we store the resulting value back in to the loadacquire destination register
      // We want the memory value BEFORE the ALU op
      GPRs[ResultReg] = Res;
    }
  } else if (Size == 4) {
    using AtomicType = uint32_t;
    CASDesiredFn<AtomicType> DesiredFunction {};

    switch (AtomicOp) {
    case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break;
    case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break;
    case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break;
    case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break;
    case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break;
    case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break;
    case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break;
    case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break;
    case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break;
    case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false;
    }

    auto Res = DoCAS32<DoRetry>(GPRs[DataSourceReg],
                                0, // Unused
                                Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);

    if (AtomicFetch && ResultReg != 31) {
      // On atomic fetch then we store the resulting value back in to the loadacquire destination register
      // We want the memory value BEFORE the ALU op
      GPRs[ResultReg] = Res;
    }
  } else if (Size == 8) {
    using AtomicType = uint64_t;
    CASDesiredFn<AtomicType> DesiredFunction {};

    switch (AtomicOp) {
    case ExclusiveAtomicPairType::TYPE_SWAP: DesiredFunction = SWAPDesired; break;
    case ExclusiveAtomicPairType::TYPE_ADD: DesiredFunction = ADDDesired; break;
    case ExclusiveAtomicPairType::TYPE_SUB: DesiredFunction = SUBDesired; break;
    case ExclusiveAtomicPairType::TYPE_AND: DesiredFunction = ANDDesired; break;
    case ExclusiveAtomicPairType::TYPE_BIC: DesiredFunction = BICDesired; break;
    case ExclusiveAtomicPairType::TYPE_OR: DesiredFunction = ORDesired; break;
    case ExclusiveAtomicPairType::TYPE_ORN: DesiredFunction = ORNDesired; break;
    case ExclusiveAtomicPairType::TYPE_EOR: DesiredFunction = EORDesired; break;
    case ExclusiveAtomicPairType::TYPE_EON: DesiredFunction = EONDesired; break;
    case ExclusiveAtomicPairType::TYPE_NEG: DesiredFunction = NEGDesired; break;
    default: LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}", FEXCore::ToUnderlying(AtomicOp)); return false;
    }

    auto Res = DoCAS64<DoRetry>(GPRs[DataSourceReg],
                                0, // Unused
                                Addr, NOPExpected, DesiredFunction, StrictSplitLockMutex);
    if (AtomicFetch && ResultReg != 31) {
      // On atomic fetch then we store the resulting value back in to the loadacquire destination register
      // We want the memory value BEFORE the ALU op
      GPRs[ResultReg] = Res;
    }
  }

  // Multiply by 4 for number of bytes to skip
  return NumInstructionsToSkip * 4;
}

[[nodiscard]]
std::optional<int32_t> HandleUnalignedAccess(FEXCore::Core::InternalThreadState* Thread, UnalignedHandlerType HandleType,
                                             uintptr_t ProgramCounter, uint64_t* GPRs, bool IsJIT) {
#ifdef ARCHITECTURE_arm64
  constexpr bool is_arm64 = true;
#else
  constexpr bool is_arm64 = false;
#endif

  if constexpr (!is_arm64) {
    return std::nullopt;
  }

  uint32_t* PC = (uint32_t*)ProgramCounter;
  uint32_t Instr = PC[0];

  // 1 = 16bit
  // 2 = 32bit
  // 3 = 64bit
  uint32_t Size = (Instr & 0xC000'0000) >> 30;
  uint32_t AddrReg = (Instr >> 5) & 0x1F;
  uint32_t DataReg = Instr & 0x1F;

  auto CTX = static_cast<Context::ContextImpl*>(Thread->CTX);
  uint32_t* StrictSplitLockMutex {CTX->Config.StrictInProcessSplitLocks ? &CTX->StrictSplitLockMutex : nullptr};

  if (!IsJIT) [[unlikely]] {
    if ((Instr & LDAXR_MASK) == LDAR_INST ||  // LDAR*
        (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR*
      if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, 0)) {
        // Skip this instruction now
        return 4;
      } else {
        LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
        return std::nullopt;
      }
    } else if ((Instr & LDAXR_MASK) == STLR_INST) { // STLR*
      if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, 0, StrictSplitLockMutex)) {
        // Skip this instruction now
        return 4;
      } else {
        LogMan::Msg::EFmt("Unhandled JIT SIGBUS STLR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
        return std::nullopt;
      }
    } else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR*
      // Extract the 9-bit offset from the instruction
      int32_t Offset = static_cast<int32_t>(Instr) << 11 >> 23;
      if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, Offset)) {
        // Skip this instruction now
        return 4;
      } else {
        LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDAPUR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
        return std::nullopt;
      }
    } else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR*
      // Extract the 9-bit offset from the instruction
      int32_t Offset = static_cast<int32_t>(Instr) << 11 >> 23;
      if (ArchHelpers::Arm64::HandleAtomicStore(Instr, GPRs, Offset, StrictSplitLockMutex)) {
        // Skip this instruction now
        return 4;
      } else {
        LogMan::Msg::EFmt("Unhandled JIT SIGBUS LDLUR*: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
        return std::nullopt;
      }
    } else if ((Instr & ArchHelpers::Arm64::LDAXR_MASK) == ArchHelpers::Arm64::LDAXR_INST) { // LDAXR*
      if (ArchHelpers::Arm64::HandleAtomicLoad(Instr, GPRs, 0, &Thread->ExclusiveStore)) {
        return 4;
      }
    } else if ((Instr & ArchHelpers::Arm64::STLXR_MASK) == ArchHelpers::Arm64::STLXR_INST) { // STLXR*
      uint32_t StatusReg = Instr << 11 >> 27;
      // // Emulate exclusive store by validating the address and value against the last unaligned LDAXR*.
      if (GPRs[AddrReg] != Thread->ExclusiveStore.Addr || Size > Thread->ExclusiveStore.Size) {
        if (StatusReg != 31) {
          GPRs[StatusReg] = 1;
        }
        return 4;
      }
      if (std::optional<uint64_t> Prev =
            DoCAS(Size, DataReg == 31 ? 0 : GPRs[DataReg], Thread->ExclusiveStore.Store, GPRs[AddrReg], StrictSplitLockMutex)) {
        if (StatusReg != 31) {
          GPRs[StatusReg] = !!memcmp(&Thread->ExclusiveStore.Store, &*Prev, Size);
        }
        Thread->ExclusiveStore.Size = 0;
        return 4;
      }
    }
    return 0;
  }

  const auto Frame = Thread->CurrentFrame;
  const uint64_t BlockBegin = Frame->State.InlineJITBlockHeader;
  auto InlineHeader = reinterpret_cast<const CPU::CPUBackend::JITCodeHeader*>(BlockBegin);
  auto InlineTail = reinterpret_cast<CPU::CPUBackend::JITCodeTail*>(Frame->State.InlineJITBlockHeader + InlineHeader->OffsetToBlockTail);

  // Check some instructions first that don't do any backpatching.
  if ((Instr & ArchHelpers::Arm64::CASPAL_MASK) == ArchHelpers::Arm64::CASPAL_INST) { // CASPAL
    if (ArchHelpers::Arm64::HandleCASPAL(Instr, GPRs, StrictSplitLockMutex)) {
      // Skip this instruction now
      return 4;
    } else {
      LogMan::Msg::EFmt("Unhandled JIT SIGBUS CASPAL: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
      return std::nullopt;
    }
  } else if ((Instr & ArchHelpers::Arm64::CASAL_MASK) == ArchHelpers::Arm64::CASAL_INST) { // CASAL
    if (ArchHelpers::Arm64::HandleCASAL(GPRs, Instr, StrictSplitLockMutex)) {
      // Skip this instruction now
      return 4;
    } else {
      LogMan::Msg::EFmt("Unhandled JIT SIGBUS CASAL: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
      return std::nullopt;
    }
  } else if ((Instr & LDAXR_MASK) == LDAR_INST ||  // LDAR*
             (Instr & LDAXR_MASK) == LDAPR_INST || // LDAPR*
             (Instr & LDAXR_MASK) == STLR_INST) {  // STLR*
    // This must fall through to the spin-lock implementation below.
    // This mask has a partial overlap with ATOMIC_MEM_INST so we need to check this here.
  } else if ((Instr & ArchHelpers::Arm64::ATOMIC_MEM_MASK) == ArchHelpers::Arm64::ATOMIC_MEM_INST) { // Atomic memory op
    if (ArchHelpers::Arm64::HandleAtomicMemOp(Instr, GPRs, StrictSplitLockMutex)) {
      // Skip this instruction now
      return 4;
    } else {
      uint8_t Op = (PC[0] >> 12) & 0xF;
      LogMan::Msg::EFmt("Unhandled JIT SIGBUS Atomic mem op 0x{:02x}: PC: 0x{:x} Instruction: 0x{:08x}\n", Op, ProgramCounter, PC[0]);
      return std::nullopt;
    }
  } else if ((Instr & ArchHelpers::Arm64::LDAXR_MASK) == ArchHelpers::Arm64::LDAXR_INST) { // LDAXR*
    uint64_t BytesToSkip = ArchHelpers::Arm64::HandleAtomicLoadstoreExclusive(ProgramCounter, GPRs, StrictSplitLockMutex);
    if (BytesToSkip) {
      // Skip this instruction now
      return BytesToSkip;
    }
    // Explicit fallthrough to the backpatch handler below!
  } else if ((Instr & ArchHelpers::Arm64::LDAXP_MASK) == ArchHelpers::Arm64::LDAXP_INST) { // LDAXP
    // Should be compare and swap pair only. LDAXP not used elsewhere
    uint64_t BytesToSkip = ArchHelpers::Arm64::HandleCASPAL_ARMv8(Instr, ProgramCounter, GPRs, StrictSplitLockMutex);
    if (BytesToSkip) {
      // Skip this instruction now
      return BytesToSkip;
    } else {
      LogMan::Msg::EFmt("Unhandled JIT SIGBUS CASPAL: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
      return std::nullopt;
    }
  }

  // Lock code mutex during any SIGBUS handling that potentially changes code.
  // Due to code buffer sharing between threads, code must be carefully backpatched from last to first.
  // Multiple threads can be attempting to handle the SIGBUS or even be executing the code being backpatched.
  FEXCore::Utils::SpinWaitLock::UniqueSpinMutex lk(&InlineTail->SpinLockFutex);

  if ((Instr & LDAXR_MASK) == LDAR_INST ||  // LDAR*
      (Instr & LDAXR_MASK) == LDAPR_INST) { // LDAPR*
    uint32_t LDR = LDR_INST;
    LDR |= Size << 30;
    LDR |= AddrReg << 5;
    LDR |= DataReg;
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      // Ordering matters with cross-thread visibility!
      std::atomic_ref<uint32_t>(PC[1]).store(DMB_LD, std::memory_order_release); // Back-patch the half-barrier.
    }
    std::atomic_ref<uint32_t>(PC[0]).store(LDR, std::memory_order_release);
    ClearICache(&PC[0], 8);
    // With the instruction modified, now execute again.
    return 0;
  } else if ((Instr & LDAXR_MASK) == STLR_INST) { // STLR*
    uint32_t STR = STR_INST;
    STR |= Size << 30;
    STR |= AddrReg << 5;
    STR |= DataReg;
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      std::atomic_ref<uint32_t>(PC[-1]).store(DMB, std::memory_order_release); // Back-patch the half-barrier.
    }
    std::atomic_ref<uint32_t>(PC[0]).store(STR, std::memory_order_release);
    ClearICache(&PC[-1], 8);
    // Back up one instruction and have another go
    return -4;
  } else if ((Instr & RCPC2_MASK) == LDAPUR_INST) { // LDAPUR*
    // Extract the 9-bit offset from the instruction
    uint32_t LDUR = LDUR_INST;
    LDUR |= Size << 30;
    LDUR |= AddrReg << 5;
    LDUR |= DataReg;
    LDUR |= Instr & (0b1'1111'1111 << 12);
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      // Ordering matters with cross-thread visibility!
      std::atomic_ref<uint32_t>(PC[1]).store(DMB_LD, std::memory_order_release); // Back-patch the half-barrier.
    }
    std::atomic_ref<uint32_t>(PC[0]).store(LDUR, std::memory_order_release);
    ClearICache(&PC[0], 8);
    // With the instruction modified, now execute again.
    return 0;
  } else if ((Instr & RCPC2_MASK) == STLUR_INST) { // STLUR*
    uint32_t STUR = STUR_INST;
    STUR |= Size << 30;
    STUR |= AddrReg << 5;
    STUR |= DataReg;
    STUR |= Instr & (0b1'1111'1111 << 12);
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      std::atomic_ref<uint32_t>(PC[-1]).store(DMB, std::memory_order_release); // Back-patch the half-barrier.
    }
    std::atomic_ref<uint32_t>(PC[0]).store(STUR, std::memory_order_release);

    ClearICache(&PC[-1], 8);
    // Back up one instruction and have another go
    return -4;
  }

  // Check if another thread backpatched this instruction before this thread got here
  // Since we got here, this can happen in a couple situations:
  // - Unhandled instruction (Shouldn't occur, FEX programmer error added a new unhandled atomic)
  // - Another thread backpatched an atomic access to be a non-atomic access
  auto AtomicInst = std::atomic_ref<uint32_t>(PC[0]).load(std::memory_order_acquire);
  if ((AtomicInst & LDSTREGISTER_MASK) == LDR_INST || (AtomicInst & LDSTUNSCALED_MASK) == LDUR_INST) {
    // This atomic instruction was backpatched to a load.
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      // Check if the next instruction is a DMB.
      auto DMBInst = std::atomic_ref<uint32_t>(PC[1]).load(std::memory_order_acquire);
      if (DMBInst == DMB_LD) {
        return 0;
      }
    } else {
      // No DMB instruction with this HandleType.
      return 0;
    }
  } else if ((AtomicInst & LDSTREGISTER_MASK) == STR_INST || (AtomicInst & LDSTUNSCALED_MASK) == STUR_INST) {
    if (HandleType != UnalignedHandlerType::NonAtomic) {
      // Check if the previous instruction is a DMB.
      auto DMBInst = std::atomic_ref<uint32_t>(PC[-1]).load(std::memory_order_acquire);
      if (DMBInst == DMB) {
        // Return handled, make sure to adjust PC so we run the DMB.
        return -4;
      }
    } else {
      // No DMB instruction with this HandleType.
      return 0;
    }
  } else if (AtomicInst == DMB) {
    // ARMv8.0-a LDAXP backpatch handling. Will have turned in to the following:
    // - PC[0] = DMB
    // - PC[1] = STP
    // - PC[2] = DMB
    auto STPInst = std::atomic_ref<uint32_t>(PC[1]).load(std::memory_order_acquire);
    auto DMBInst = std::atomic_ref<uint32_t>(PC[2]).load(std::memory_order_acquire);
    if ((STPInst & LDSTP_MASK) == STP_INST && DMBInst == DMB) {
      // Code that was backpatched is what was expected for ARMv8.0-a LDAXP.
      return 0;
    }
  }

  LogMan::Msg::EFmt("Unhandled JIT SIGBUS: PC: 0x{:x} Instruction: 0x{:08x}\n", ProgramCounter, PC[0]);
  return std::nullopt;
}


} // namespace FEXCore::ArchHelpers::Arm64


================================================
FILE: FEXCore/Source/Utils/ArchHelpers/Arm64_stubs.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <stdint.h>

namespace FEXCore::ArchHelpers::Arm64 {

#ifndef ARCHITECTURE_arm64
// These are stub implementations that exist only to allow instantiating the arm64 jit
// on non arm platforms.

// Obvously such a configuration can't do the actual arm64-specific stuff

std::optional<int32_t>
HandleUnalignedAccess(FEXCore::Core::InternalThreadState* Thread, UnalignedHandlerType HandleType, uintptr_t ProgramCounter, uint64_t* GPRs) {
  ERROR_AND_DIE_FMT("HandleAtomicMemOp Not Implemented");
}

#endif

} // namespace FEXCore::ArchHelpers::Arm64


================================================
FILE: FEXCore/Source/Utils/BucketList.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstddef>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/memory.h>

namespace FEXCore {

// BucketList is an optimized container, it includes an inline array of Size
// and can overflow to a linked list of further buckets
//
// To optimize for best performance, Size should be big enough to allocate one or two
// buckets for the typical case
// Picking a Size so sizeof(Bucket<...>) is a power of two is also a small win
template<size_t _Size, typename T = uint32_t>
struct BucketList {
  static constexpr size_t Size = _Size;

  T Items[Size];
  fextl::unique_ptr<BucketList<Size, T>> Next;

  void Clear() {
    Items[0] = T {};
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    for (size_t i = 1; i < Size; i++) {
      Items[i] = T {0xDEADBEEF};
    }
#endif
    Next.reset();
  }

  BucketList() {
    Clear();
  }

  template<typename EnumeratorFn>
  void Iterate(EnumeratorFn Enumerator) const {
    size_t i = 0;
    auto Bucket = this;

    while (true) {
      auto Item = Bucket->Items[i];
      if (Item == T {}) {
        break;
      }

      Enumerator(Item);

      if (++i == Size) {
        LOGMAN_THROW_A_FMT(Bucket->Next != nullptr, "Interference bug");
        Bucket = Bucket->Next.get();
        i = 0;
      }
    }
  }

  template<typename EnumeratorFn>
  bool Find(EnumeratorFn Enumerator) const {
    size_t i = 0;
    auto Bucket = this;

    while (true) {
      auto Item = Bucket->Items[i];
      if (Item == T {}) {
        break;
      }

      if (Enumerator(Item)) {
        return true;
      }

      if (++i == Size) {
        LOGMAN_THROW_A_FMT(Bucket->Next != nullptr, "Bucket in bad state");
        Bucket = Bucket->Next.get();
        i = 0;
      }
    }

    return false;
  }

  void Append(T Val) {
    auto that = this;

    while (that->Next) {
      that = that->Next.get();
    }

    size_t i;
    for (i = 0; i < Size; i++) {
      if (that->Items[i] == T {}) {
        that->Items[i] = Val;
        break;
      }
    }

    if (i < (Size - 1)) {
      that->Items[i + 1] = T {};
    } else {
      that->Next = fextl::make_unique<BucketList<Size, T>>();
    }
  }
  void Erase(T Val) {
    size_t i = 0;
    auto that = this;
    auto foundThat = this;
    size_t foundI = 0;

    while (true) {
      if (that->Items[i] == Val) {
        foundThat = that;
        foundI = i;
        break;
      } else if (++i == Size) {
        i = 0;
        LOGMAN_THROW_A_FMT(that->Next != nullptr, "Bucket::Erase but element not contained");
        that = that->Next.get();
      }
    }

    while (true) {
      if (that->Items[i] == T {}) {
        foundThat->Items[foundI] = that->Items[i - 1];
        that->Items[i - 1] = T {};
        break;
      } else if (++i == Size) {
        if (that->Next->Items[0] == T {}) {
          that->Next.reset();
          foundThat->Items[foundI] = that->Items[Size - 1];
          that->Items[Size - 1] = T {};
          break;
        }
        i = 0;
        that = that->Next.get();
      }
    }
  }
};

} // namespace FEXCore


================================================
FILE: FEXCore/Source/Utils/Config.h
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/string.h>

namespace FEXCore::Config {
const fextl::string& GetTelemetryDirectory();
}


================================================
FILE: FEXCore/Source/Utils/FileLoading.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/FileLoading.h>

#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <fcntl.h>
#include <sys/stat.h>
#include <span>
#include <unistd.h>
#ifdef _WIN32
#include <fstream>
#endif

namespace FEXCore::FileLoading {

#ifndef _WIN32
template<typename T>
static bool LoadFileImpl(T& Data, const fextl::string& Filepath, size_t FixedSize) {
  int FD = open(Filepath.c_str(), O_RDONLY);

  if (FD == -1) {
    return false;
  }

  size_t FileSize {};
  if (FixedSize == 0) {
    struct stat buf;
    if (fstat(FD, &buf) == 0) {
      FileSize = buf.st_size;
    }
  } else {
    FileSize = FixedSize;
  }

  ssize_t CurrentOffset = 0;
  ssize_t Read = -1;
  bool LoadedFile {};
  if (FileSize) {
    // File size is known upfront
    Data.resize(FileSize);
    size_t Remaining = FileSize;
    while (CurrentOffset != FileSize && (Read = pread(FD, &Data.at(CurrentOffset), Remaining, CurrentOffset)) > 0) {
      CurrentOffset += Read;
      Remaining -= Read;
    }

    LoadedFile = CurrentOffset == FileSize && Read != -1;
  } else {
    // The file is either empty or its size is unknown (e.g. procfs data).
    // Try reading in chunks instead
    constexpr size_t READ_SIZE = 4096;
    Data.resize(READ_SIZE);

    while ((Read = pread(FD, &Data.at(CurrentOffset), READ_SIZE, CurrentOffset)) > 0) {
      CurrentOffset += Read;
      if ((CurrentOffset + READ_SIZE) > Data.size()) {
        Data.resize(CurrentOffset + READ_SIZE);
      }
    }

    if (Read == -1) {
      Data.clear();
      close(FD);
      return false;
    }

    // Final resize to ensure there is no garbage data past the end.
    Data.resize(CurrentOffset + Read);

    LoadedFile = true;
  }
  close(FD);
  return LoadedFile;
}

ssize_t LoadFileToBuffer(const fextl::string& Filepath, std::span<char> Buffer) {
  int FD = open(Filepath.c_str(), O_RDONLY);

  if (FD == -1) {
    return -1;
  }

  ssize_t Read = pread(FD, Buffer.data(), Buffer.size(), 0);
  close(FD);
  return Read;
}

#else
template<typename T>
static bool LoadFileImpl(T& Data, const fextl::string& Filepath, size_t FixedSize) {
  std::ifstream f(Filepath.c_str(), std::ios::binary | std::ios::ate);
  if (f.fail()) {
    return false;
  }
  auto Size = f.tellg();
  f.seekg(0, std::ios::beg);
  Data.resize(Size);
  f.read(Data.data(), Size);
  return !f.fail();
}

ssize_t LoadFileToBuffer(const fextl::string& Filepath, std::span<char> Buffer) {
  std::ifstream f(Filepath.c_str(), std::ios::binary | std::ios::ate);
  return f.readsome(Buffer.data(), Buffer.size());
}

#endif

bool LoadFile(fextl::vector<char>& Data, const fextl::string& Filepath, size_t FixedSize) {
  return LoadFileImpl(Data, Filepath, FixedSize);
}

bool LoadFile(fextl::string& Data, const fextl::string& Filepath, size_t FixedSize) {
  return LoadFileImpl(Data, Filepath, FixedSize);
}

} // namespace FEXCore::FileLoading


================================================
FILE: FEXCore/Source/Utils/ForcedAssert.cpp
================================================
// SPDX-License-Identifier: MIT
namespace FEXCore::Assert {
// This function can not be inlined
[[noreturn]]
__attribute__((noinline, naked)) void ForcedAssert() {
#ifdef ARCHITECTURE_x86_64
  asm volatile("ud2");
#else
  asm volatile("hlt #1");
#endif
}
} // namespace FEXCore::Assert


================================================
FILE: FEXCore/Source/Utils/LogManager.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|log-manager
$end_info$
*/

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/fmt.h>

namespace LogMan {

namespace Throw {
  ThrowHandler Handler {};
  void InstallHandler(ThrowHandler _Handler) {
    Handler = _Handler;
  }
  void UnInstallHandler() {
    Handler = nullptr;
  }

  void MFmt(const char* fmt, const fmt::format_args& args) {
    if (Handler) {
      auto msg = fextl::fmt::vformat(fmt, args);
      Handler(msg.c_str());
    }

    FEX_TRAP_EXECUTION;
  }
} // namespace Throw

namespace Msg {
  MsgHandler Handler {};
  void InstallHandler(MsgHandler _Handler) {
    Handler = _Handler;
  }
  void UnInstallHandler() {
    Handler = nullptr;
  }

  void MFmtImpl(DebugLevels level, const char* fmt, const fmt::format_args& args) {
    if (Handler) {
      const auto msg = fextl::fmt::vformat(fmt, args);
      Handler(level, msg.c_str());
    }
  }

} // namespace Msg
} // namespace LogMan


================================================
FILE: FEXCore/Source/Utils/LongJump.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/LongJump.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstring>

namespace FEXCore::UncheckedLongJump {
#if defined(ARCHITECTURE_arm64)
[[nodiscard]]
FEX_DEFAULT_VISIBILITY FEX_NAKED uint64_t SetJump(JumpBuf& Buffer) {
  __asm volatile(R"(
      // x0 contains the jumpbuffer
      stp x19, x20, [x0, #( 0 * 8)];
      stp x21, x22, [x0, #( 2 * 8)];
      stp x23, x24, [x0, #( 4 * 8)];
      stp x25, x26, [x0, #( 6 * 8)];
      stp x27, x28, [x0, #( 8 * 8)];
      stp x29, x30, [x0, #(10 * 8)];

      // FPRs
      stp d8,   d9, [x0, #(12 * 8)];
      stp d10, d11, [x0, #(14 * 8)];
      stp d12, d13, [x0, #(16 * 8)];
      stp d14, d15, [x0, #(18 * 8)];

      // Move SP in to a temporary to store.
      mov x1, sp;
      str x1,  [x0, #(20 * 8)];

      // Return zero to signify this is the SetJump.
      mov x0, #0;
      ret;
    )" ::
                   : "memory");
}

[[noreturn]]
FEX_DEFAULT_VISIBILITY FEX_NAKED void LongJump(const JumpBuf& Buffer, uint64_t Value) {
  __asm volatile(R"(
      // x0 contains the jumpbuffer
      ldp x19, x20, [x0, #( 0 * 8)];
      ldp x21, x22, [x0, #( 2 * 8)];
      ldp x23, x24, [x0, #( 4 * 8)];
      ldp x25, x26, [x0, #( 6 * 8)];
      ldp x27, x28, [x0, #( 8 * 8)];
      ldp x29, x30, [x0, #(10 * 8)];

      // FPRs
      ldp d8,   d9, [x0, #(12 * 8)];
      ldp d10, d11, [x0, #(14 * 8)];
      ldp d12, d13, [x0, #(16 * 8)];
      ldp d14, d15, [x0, #(18 * 8)];

      // Load SP in to temporary then move
      ldr x0,  [x0, #(20 * 8)];
      mov sp, x0;

      // Move value in to result register
      mov x0, x1;
      ret;
    )" ::
                   : "memory");
}

FEX_DEFAULT_VISIBILITY void ManuallyLoadJumpBuf(const JumpBuf& Buffer, uint64_t Value, uint64_t* GPRs, __uint128_t* FPRs, uint64_t* PC) {
  // First 12 values are registers [x19,x30].
  memcpy(&GPRs[19], &Buffer.Registers[0], sizeof(uint64_t) * 12);

  // Next 8 values are [D8,D15]
  // Retain upper 64-bits of the register, only modifying lower 64-bits.
  for (size_t i = 0; i < 8; ++i) {
    memcpy(&FPRs[8 + i], &Buffer.Registers[12 + i], sizeof(uint64_t));
  }

  // Last value is stack pointer
  memcpy(&GPRs[31], &Buffer.Registers[20], sizeof(uint64_t));

  // Load the expected value in to X0
  GPRs[0] = Value;

  // Load the PC with the current LR.
  *PC = GPRs[30];
}

#else
[[nodiscard]]
FEX_DEFAULT_VISIBILITY FEX_NAKED uint64_t SetJump(JumpBuf& Buffer) {
  __asm volatile(R"(
    .intel_syntax noprefix;
    // rdi contains the jumpbuffer
    mov [rdi + (0 * 8)], rbx;
    mov [rdi + (1 * 8)], rsp;
    mov [rdi + (2 * 8)], rbp;
    mov [rdi + (3 * 8)], r12;
    mov [rdi + (4 * 8)], r13;
    mov [rdi + (5 * 8)], r14;
    mov [rdi + (6 * 8)], r15;

    // Return address is on the stack, load it and store
    mov rsi, [rsp];
    mov [rdi + (7 * 8)], rsi;

    // Return zero to signify this is the SetJump.
    mov rax, 0;
    ret;

    .att_syntax prefix;
    )" ::
                   : "memory");
}

[[noreturn]]
FEX_DEFAULT_VISIBILITY FEX_NAKED void LongJump(const JumpBuf& Buffer, uint64_t Value) {
  __asm volatile(R"(
    .intel_syntax noprefix;
    // rdi contains the jumpbuffer
    mov rbx, [rdi + (0 * 8)];
    mov rsp, [rdi + (1 * 8)];
    mov rbp, [rdi + (2 * 8)];
    mov r12, [rdi + (3 * 8)];
    mov r13, [rdi + (4 * 8)];
    mov r14, [rdi + (5 * 8)];
    mov r15, [rdi + (6 * 8)];

    // Move value in to result register
    mov rax, rsi;

    // Pop the dead return address off the stack
    pop rsi;

    // Load the original return address from the jumpbuffer
    mov rsi, [rdi + (7 * 8)];

    // Return using a jump
    jmp rsi;

    .att_syntax prefix;
    )" ::
                   : "memory");
}

FEX_DEFAULT_VISIBILITY void ManuallyLoadJumpBuf(JumpBuf& Buffer, uint64_t Value, uint64_t* GPRs, __uint128_t* FPRs, uint64_t* PC) {
  LOGMAN_MSG_A_FMT("This is unimplemented on x86-64");
}

#endif
} // namespace FEXCore::UncheckedLongJump


================================================
FILE: FEXCore/Source/Utils/MemberFunctionToPointer.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/LogManager.h>

#include <cstdint>

namespace FEXCore::Utils {

/**
 * @brief Casts a class's member function pointer to a raw pointer that we can JIT
 *
 * Has additional validation to ensure we aren't casting a class member that is invalid
 */
template<typename PointerToMemberType>
class MemberFunctionToPointerCast final {
public:
  MemberFunctionToPointerCast(PointerToMemberType Function) {
    memcpy(&PMF, &Function, sizeof(PMF));
  }

  uintptr_t GetConvertedPointer() const {
#ifdef ARCHITECTURE_x86_64
    // Itanium C++ ABI (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#member-function-pointers)
    // Low bit of ptr specifies if this Member function pointer is virtual or not
    // Throw an assert if we were trying to cast a virtual member
    LOGMAN_THROW_A_FMT((PMF.ptr & 1) == 0, "C++ Pointer-To-Member representation didn't have low bit set to 0. Are you trying to cast a "
                                           "virtual member?");
#elif defined(ARCHITECTURE_arm64)
    // C++ ABI for the Arm 64-bit Architecture (IHI 0059E)
    // 4.2.1 Representation of pointer to member function
    // Differs from Itanium specification
    LOGMAN_THROW_A_FMT(PMF.adj == 0, "C++ Pointer-To-Member representation didn't have adj == 0. Are you trying to cast a virtual member?");
#else
#error "Don't know how to cast Member to function here. Likely just Itanium"
#endif
    return PMF.ptr;
  }

  // Gets the vtable entry position of a virtual member function.
  size_t GetVTableOffset() const {
#ifdef ARCHITECTURE_x86_64
    // Itanium C++ ABI (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#member-function-pointers)
    // Low bit of ptr specifies if this Member function pointer is virtual or not
    // Throw an assert if we are not loading a virtual member.
    LOGMAN_THROW_A_FMT((PMF.ptr & 1) == 1, "C++ Pointer-To-Member representation didn't have low bit set to 1. This cast only works for "
                                           "virtual members.");
    return PMF.ptr & ~1ULL;
#elif defined(ARCHITECTURE_arm64)
    // C++ ABI for the Arm 64-bit Architecture (IHI 0059E)
    // 4.2.1 Representation of pointer to member function
    // Differs from Itanium specification
    LOGMAN_THROW_A_FMT((PMF.adj & 1) == 1, "C++ Pointer-To-Member representation didn't have adj == 1. This cast only works for virtual "
                                           "members.");
    return PMF.ptr;
#else
#error "Don't know how to cast Member to function here. Likely just Itanium"
#endif
  }

  // Gets the pointer to the vtable entry for the object passed it.
  template<typename Class>
  uintptr_t GetVTableEntry(Class* VirtualClass) const {
    // VTable is always stored at the beginning of a class object.
    uintptr_t* VTable = *reinterpret_cast<uintptr_t**>(VirtualClass);

    size_t Offset = GetVTableOffset() / sizeof(void*);
    return VTable[Offset];
  }

private:
  struct PointerToMember {
    uintptr_t ptr;
    uintptr_t adj;
  };

  PointerToMember PMF;

  // Ensure the representation of PointerToMember matches
  static_assert(sizeof(PMF) == sizeof(PointerToMemberType));
};
} // namespace FEXCore::Utils


================================================
FILE: FEXCore/Source/Utils/Profiler.cpp
================================================
// SPDX-License-Identifier: MIT
#include <cstdint>
#include <fcntl.h>
#ifndef _WIN32
#include <linux/magic.h>
#include <sys/stat.h>
#include <sys/vfs.h>
#endif

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>

#ifdef ENABLE_FEXCORE_PROFILER
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
#include <array>
#include <limits.h>
#include <time.h>
#ifndef _WIN32
static inline uint64_t GetTime() {
  // We want the time in the least amount of overhead possible
  // clock_gettime will do a VDSO call with the least amount of overhead
  struct timespec ts;
  clock_gettime(CLOCK_MONOTONIC, &ts);
  return ts.tv_sec * 1'000'000'000ULL + ts.tv_nsec;
}
#else

static inline uint64_t GetTime() {
  // GetTime needs to return nanoseconds, query the interface.
  static uint64_t FrequencyScale = {};
  if (!FrequencyScale) [[unlikely]] {
    LARGE_INTEGER Frequency {};
    while (!QueryPerformanceFrequency(&Frequency))
      ;
    constexpr uint64_t NanosecondsInSecond = 1'000'000'000ULL;

    // On WINE this will always result in a scale of 100.
    FrequencyScale = NanosecondsInSecond / Frequency.QuadPart;
  }
  LARGE_INTEGER ticks;
  while (!QueryPerformanceCounter(&ticks))
    ;
  return ticks.QuadPart * FrequencyScale;
}

#endif

namespace FEXCore::Profiler {
ProfilerBlock::ProfilerBlock(const std::string_view Format)
  : DurationBegin {GetTime()}
  , Format {Format} {}

ProfilerBlock::~ProfilerBlock() {
  auto Duration = GetTime() - DurationBegin;
  TraceObject(Format, Duration);
}
} // namespace FEXCore::Profiler

namespace GPUVis {
// ftrace FD for writing trace data.
// Needs to be a raw FD since we hold this open for the entire application execution.
static int TraceFD {-1};

// Need to search the paths to find the real trace path
static std::array<const char*, 2> TraceFSDirectories {
  "/sys/kernel/tracing",
  "/sys/kernel/debug/tracing",
};

void Init() {
  FEX_CONFIG_OPT(EnableGpuvisProfiling, ENABLEGPUVISPROFILING);
  if (!EnableGpuvisProfiling()) {
    return;
  }
  for (auto Path : TraceFSDirectories) {
#ifdef _WIN32
    constexpr auto flags = O_WRONLY;
#else
    constexpr auto flags = O_WRONLY | O_CLOEXEC;
#endif
    fextl::string FilePath = fextl::fmt::format("{}/trace_marker", Path);
    TraceFD = open(FilePath.c_str(), flags);
    if (TraceFD != -1) {
      // Opened TraceFD, early exit
      break;
    }
  }
}

void Shutdown() {
  if (TraceFD != -1) {
    close(TraceFD);
    TraceFD = -1;
  }
}

void TraceObject(const std::string_view Format, uint64_t Duration) {
  if (TraceFD != -1) {
    // Print the duration as something that began negative duration ago
    const auto StringSize = Format.size() + strlen(" (lduration=-)\n") + 22;
    auto Event = reinterpret_cast<char*>(alloca(StringSize));
    auto Res = ::fmt::format_to_n(Event, StringSize, "{} (lduration=-{})\n", Format, Duration);
    write(TraceFD, Event, Res.size);
  }
}

void TraceObject(const std::string_view Format) {
  if (TraceFD != -1) {
    const auto StringSize = Format.size() + 1;
    auto Event = reinterpret_cast<char*>(alloca(StringSize));
    auto Res = ::fmt::format_to_n(Event, StringSize, "{}\n", Format);
    write(TraceFD, Event, Res.size);
  }
}
} // namespace GPUVis
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
#include "tracy/Tracy.hpp"
namespace Tracy {
static int EnableAfterFork = 0;
static bool Enable = false;

void Init(std::string_view ProgramName, std::string_view ProgramPath) {
  const char* ProfileTargetName = getenv("FEX_PROFILE_TARGET_NAME"); // Match by application name
  const char* ProfileTargetPath = getenv("FEX_PROFILE_TARGET_PATH"); // Match by path suffix
  const char* WaitForFork = getenv("FEX_PROFILE_WAIT_FOR_FORK");     // Don't enable profiling until the process forks N times
  bool Matched = (ProfileTargetName && ProgramName == ProfileTargetName) || (ProfileTargetPath && ProgramPath.ends_with(ProfileTargetPath));
  if (Matched && WaitForFork) {
    EnableAfterFork = std::atoi(WaitForFork);
  }
  Enable = Matched && !EnableAfterFork;
  if (Enable) {
    tracy::StartupProfiler();
    LogMan::Msg::IFmt("Tracy profiling started");
  } else if (EnableAfterFork) {
    LogMan::Msg::IFmt("Tracy profiling will start after fork");
  }
}

void PostForkAction(bool IsChild) {
  if (Enable) {
    // Tracy does not support multiprocess profiling
    LogMan::Msg::EFmt("Warning: Profiling a process with forks is not supported. Set the environment variable "
                      "FEX_PROFILE_WAIT_FOR_FORK=<n> to start profiling after the n-th fork.");
  }

  if (IsChild) {
    Enable = false;
    return;
  }

  if (EnableAfterFork > 1) {
    --EnableAfterFork;
    LogMan::Msg::IFmt("Tracy profiling will start after {} forks", EnableAfterFork);
  } else if (EnableAfterFork == 1) {
    Enable = true;
    EnableAfterFork = 0;
    tracy::StartupProfiler();
    LogMan::Msg::IFmt("Tracy profiling started");
  }
}

void Shutdown() {
  if (Tracy::Enable) {
    LogMan::Msg::IFmt("Stopping Tracy profiling");
    tracy::ShutdownProfiler();
  }
}

void TraceObject(const std::string_view Format, uint64_t Duration) {}

void TraceObject(const std::string_view Format) {
  if (Tracy::Enable) {
    TracyMessage(Format.data(), Format.size());
  }
}
} // namespace Tracy
#else
#error Unknown profiler backend
#endif
#endif

namespace FEXCore::Profiler {

#ifdef ENABLE_FEXCORE_PROFILER
void Init(std::string_view ProgramName, std::string_view ProgramPath) {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
  GPUVis::Init();
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  Tracy::Init(ProgramName, ProgramPath);
#endif
}

void PostForkAction(bool IsChild) {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  Tracy::PostForkAction(IsChild);
#endif
}

bool IsActive() {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
  // Always active
  return true;
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  // Active if previously enabled
  return Tracy::Enable;
#endif
}

void Shutdown() {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
  GPUVis::Shutdown();
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  Tracy::Shutdown();
#endif
}

void TraceObject(const std::string_view Format, uint64_t Duration) {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
  GPUVis::TraceObject(Format, Duration);
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  Tracy::TraceObject(Format, Duration);
#endif
}

void TraceObject(const std::string_view Format) {
#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_GPUVIS
  GPUVis::TraceObject(Format);
#elif FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
  Tracy::TraceObject(Format);
#endif
}

#endif
} // namespace FEXCore::Profiler


================================================
FILE: FEXCore/Source/Utils/SpinWaitLock.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Utils/SpinWaitLock.h"

namespace FEXCore::Utils::SpinWaitLock {
#ifdef ARCHITECTURE_arm64
constexpr uint64_t NanosecondsInSecond = 1'000'000'000ULL;

static uint64_t GetCycleCounterFrequency() {
  uint64_t Result {};
  __asm("mrs %[Res], CNTFRQ_EL0" : [Res] "=r"(Result));
  return Result;
}

static uint64_t CalculateCyclesPerNanosecond() {
  // Snapdragon devices historically use a 19.2Mhz cycle counter frequency
  // This means that the number of cycles per nanosecond ends up being 52.0833...
  //
  // ARMv8.6 and ARMv9.1 requires the cycle counter frequency to be 1Ghz.
  // This means the number of cycles per nanosecond ends up being 1.
  uint64_t CounterFrequency = GetCycleCounterFrequency();
  return NanosecondsInSecond / CounterFrequency;
}

uint64_t CycleCounterFrequency = GetCycleCounterFrequency();
uint64_t CyclesPerNanosecond = CalculateCyclesPerNanosecond();
#endif
} // namespace FEXCore::Utils::SpinWaitLock


================================================
FILE: FEXCore/Source/Utils/SpinWaitLock.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <atomic>
#include <chrono>
#include <mutex>
#include <type_traits>

#include <FEXCore/fextl/functional.h>
#include <FEXCore/Utils/EnumUtils.h>

namespace FEXCore::Utils::SpinWaitLock {
/**
 * @brief This provides routines to implement implement an "efficient spin-loop" using ARM's WFE and exclusive monitor interfaces.
 *
 * Spin-loops on mobile devices with a battery can be a bad idea as they burn a bunch of power. This attempts to mitigate some of the impact
 * by putting the CPU in to a lower-power state using WFE.
 * On platforms tested, WFE will put the CPU in to a lower power state for upwards of 0.11ms(!) per WFE. Which isn't a significant amount of
 * time but should still have power savings. Ideally WFE would be able to keep the CPU in a lower power state for longer. This also has the
 * added benefit that atomics aren't abusing the caches when spinning on a cacheline, which has knock-on powersaving benefits.
 *
 * This short timeout is because the Linux kernel has a 100 microsecond architecture timer which wakes up WFE and WFI. Nothing can be
 * improved beyond that period.
 *
 * FEAT_WFxT adds a new instruction with a timeout, but since the spurious wake-up is so aggressive it isn't worth using.
 *
 * It should be noted that this implementation has a few dozen cycles of start-up time. Which means the overhead for invoking this
 * implementation is slightly higher than a true spin-loop. The hot loop body itself is only three instructions so it is quite efficient.
 *
 * On non-ARM platforms it is truly a spin-loop, which is okay for debugging only.
 */
#ifdef ARCHITECTURE_arm64

#define LOADEXCLUSIVE(LoadExclusiveOp, RegSize)                 \
  /* Prime the exclusive monitor with the passed in address. */ \
  #LoadExclusiveOp " %" #RegSize "[Result], [%[Futex]];\n"

#define SPINLOOP_BODY(LoadAtomicOp, RegSize)                               \
  /* WFE will wait for either the memory to change or spurious wake-up. */ \
  "wfe;\n" /* Load with acquire to get the result of memory. */            \
    #LoadAtomicOp " %" #RegSize "[Result], [%[Futex]];\n"

#define SPINLOOP_WFE_LDX_8BIT LOADEXCLUSIVE(ldaxrb, w)
#define SPINLOOP_WFE_LDX_16BIT LOADEXCLUSIVE(ldaxrh, w)
#define SPINLOOP_WFE_LDX_32BIT LOADEXCLUSIVE(ldaxr, w)
#define SPINLOOP_WFE_LDX_64BIT LOADEXCLUSIVE(ldaxr, x)

#define SPINLOOP_8BIT SPINLOOP_BODY(ldarb, w)
#define SPINLOOP_16BIT SPINLOOP_BODY(ldarh, w)
#define SPINLOOP_32BIT SPINLOOP_BODY(ldar, w)
#define SPINLOOP_64BIT SPINLOOP_BODY(ldar, x)

extern uint64_t CycleCounterFrequency;
extern uint64_t CyclesPerNanosecond;

///< Get the raw cycle counter which is synchronizing.
/// `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature.
static inline uint64_t GetCycleCounter() {
  uint64_t Result {};
  __asm volatile(R"(
      isb;
      mrs %[Res], CNTVCT_EL0;
    )"
                 : [Res] "=r"(Result));
  return Result;
}

///< Converts nanoseconds to number of cycles.
/// If the cycle counter is 1Ghz then this is a direct 1:1 map.
static inline uint64_t ConvertNanosecondsToCycles(const std::chrono::nanoseconds& Nanoseconds) {
  const auto NanosecondCount = Nanoseconds.count();
  return NanosecondCount / CyclesPerNanosecond;
}

static inline uint8_t LoadExclusive(uint8_t* Futex) {
  uint8_t Result {};
  __asm volatile(SPINLOOP_WFE_LDX_8BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint16_t LoadExclusive(uint16_t* Futex) {
  uint16_t Result {};
  __asm volatile(SPINLOOP_WFE_LDX_16BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint32_t LoadExclusive(uint32_t* Futex) {
  uint32_t Result {};
  __asm volatile(SPINLOOP_WFE_LDX_32BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint64_t LoadExclusive(uint64_t* Futex) {
  uint64_t Result {};
  __asm volatile(SPINLOOP_WFE_LDX_64BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint8_t WFELoadAtomic(uint8_t* Futex) {
  uint8_t Result {};
  __asm volatile(SPINLOOP_8BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint16_t WFELoadAtomic(uint16_t* Futex) {
  uint16_t Result {};
  __asm volatile(SPINLOOP_16BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint32_t WFELoadAtomic(uint32_t* Futex) {
  uint32_t Result {};
  __asm volatile(SPINLOOP_32BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

static inline uint64_t WFELoadAtomic(uint64_t* Futex) {
  uint64_t Result {};
  __asm volatile(SPINLOOP_64BIT : [Result] "=r"(Result), [Futex] "+r"(Futex)::"memory");

  return Result;
}

template<typename Pred, typename T>
static inline void WaitPred(T* Futex, T ComparisonValue) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  T Result = AtomicFutex.load();

  while (!Pred {}(Result, ComparisonValue)) {
    Result = LoadExclusive(Futex);
    if (Pred {}(Result, ComparisonValue)) {
      return;
    }

    Result = WFELoadAtomic(Futex);
  }
}

template<typename T, typename TT>
static inline bool Wait(T* Futex, TT ExpectedValue, const std::chrono::nanoseconds& Timeout) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);

  T Result = AtomicFutex.load();

  // Early exit if possible.
  if (Result == ExpectedValue) {
    return true;
  }

  const auto TimeoutCycles = ConvertNanosecondsToCycles(Timeout);
  const auto Begin = GetCycleCounter();

  do {
    Result = LoadExclusive(Futex);
    if (Result == ExpectedValue) {
      return true;
    }
    Result = WFELoadAtomic(Futex);

    const auto CurrentCycleCounter = GetCycleCounter();
    if ((CurrentCycleCounter - Begin) >= TimeoutCycles) {
      // Couldn't get value before timeout.
      return false;
    }
  } while (Result != ExpectedValue);

  // We got our result.
  return true;
}

template bool Wait<uint8_t>(uint8_t*, uint8_t, const std::chrono::nanoseconds&);
template bool Wait<uint16_t>(uint16_t*, uint16_t, const std::chrono::nanoseconds&);
template bool Wait<uint32_t>(uint32_t*, uint32_t, const std::chrono::nanoseconds&);
template bool Wait<uint64_t>(uint64_t*, uint64_t, const std::chrono::nanoseconds&);

template<typename T>
static inline T OneShotWFEBitComparison(T* Futex, T Mask, T Comp) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  T Result = AtomicFutex.load();

  // Early exit if possible.
  if ((Result & Mask) == Comp) {
    return Result;
  }

  Result = LoadExclusive(Futex);
  if ((Result & Mask) == Comp) {
    return Result;
  }

  // Waits for write and returns result.
  Result = WFELoadAtomic(Futex);
  return Result;
}

#else

template<typename Pred, typename T>
static inline void WaitPred(T* Futex, T ComparisonValue) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  T Result = AtomicFutex.load();

  while (!Pred {}(Result, ComparisonValue)) {
    Result = AtomicFutex.load();
  }
}

template<typename T, typename TT>
static inline bool Wait(T* Futex, TT ExpectedValue, const std::chrono::nanoseconds& Timeout) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);

  T Result = AtomicFutex.load();

  // Early exit if possible.
  if (Result == ExpectedValue) {
    return true;
  }

  const auto Begin = std::chrono::high_resolution_clock::now();

  do {
    Result = AtomicFutex.load();

    const auto CurrentCycleCounter = std::chrono::high_resolution_clock::now();
    if ((CurrentCycleCounter - Begin) >= Timeout) {
      // Couldn't get value before timeout.
      return false;
    }
  } while (Result != ExpectedValue);

  // We got our result.
  return true;
}
#endif

template<typename T, typename TT = T>
static inline void Wait(T* Futex, TT ExpectedValue) {
  WaitPred<std::equal_to<>, T>(Futex, ExpectedValue);
}

template void Wait<uint8_t>(uint8_t*, uint8_t);
template void Wait<uint16_t>(uint16_t*, uint16_t);
template void Wait<uint32_t>(uint32_t*, uint32_t);
template void Wait<uint64_t>(uint64_t*, uint64_t);

template<typename T>
static inline void lock(T* Futex) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  T Expected {};
  T Desired {1};

  // Try to CAS immediately.
  if (AtomicFutex.compare_exchange_strong(Expected, Desired)) {
    return;
  }

  do {
    // Wait until the futex is unlocked.
    Wait(Futex, 0);
    Expected = 0;
  } while (!AtomicFutex.compare_exchange_strong(Expected, Desired));
}

template<typename T>
static inline bool try_lock(T* Futex) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  T Expected {};
  T Desired {1};

  // Try to CAS immediately.
  if (AtomicFutex.compare_exchange_strong(Expected, Desired)) {
    return true;
  }

  return false;
}

template<typename T>
static inline void unlock(T* Futex) {
  auto AtomicFutex = std::atomic_ref<T>(*Futex);
  AtomicFutex.store(0);
}

#undef SPINLOOP_8BIT
#undef SPINLOOP_16BIT
#undef SPINLOOP_32BIT
#undef SPINLOOP_64BIT
template<typename T>
class UniqueSpinMutex final {
public:
  // Move-only type
  UniqueSpinMutex(const UniqueSpinMutex&) = delete;
  UniqueSpinMutex& operator=(const UniqueSpinMutex&) = delete;
  UniqueSpinMutex(UniqueSpinMutex&& rhs) = default;
  UniqueSpinMutex& operator=(UniqueSpinMutex&&) = default;

  UniqueSpinMutex(T* Futex)
    : Futex {Futex} {
    FEXCore::Utils::SpinWaitLock::lock(Futex);
  }

  ~UniqueSpinMutex() {
    FEXCore::Utils::SpinWaitLock::unlock(Futex);
  }
private:
  T* Futex;
};
} // namespace FEXCore::Utils::SpinWaitLock


================================================
FILE: FEXCore/Source/Utils/Telemetry.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/File.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXHeaderUtils/Filesystem.h>

#include "Utils/Config.h"

#include <array>
#include <stddef.h>
#include <string_view>
#include <system_error>

namespace FEXCore::Telemetry {
#ifndef FEX_DISABLE_TELEMETRY
std::array<Value, FEXCore::Telemetry::TelemetryType::TYPE_LAST> TelemetryValues = {{}};
const std::array<std::string_view, FEXCore::Telemetry::TelemetryType::TYPE_LAST> TelemetryNames {
  "64byte Split Locks",
  "16byte Split atomics",
  "EVEX instructions (AVX512)",
  "16bit CAS Tear",
  "32bit CAS Tear",
  "64bit CAS Tear",
  "128bit CAS Tear",
  "Crash mask",
  "Write 32-bit Segment ES",
  "Write 32-bit Segment SS",
  "Write 32-bit Segment CS",
  "Write 32-bit Segment DS",
  "Uses 32-bit Segment ES",
  "Uses 32-bit Segment SS",
  "Uses 32-bit Segment CS",
  "Uses 32-bit Segment DS",
  "Non-Canonical 64-bit address access",
};

static bool Enabled {true};
void Initialize() {
  FEX_CONFIG_OPT(DisableTelemetry, DISABLETELEMETRY);
  if (DisableTelemetry) {
    Enabled = false;
    return;
  }

  const auto& DataDirectory = Config::GetTelemetryDirectory();

  // Ensure the folder structure is created for our configuration
  if (!FHU::Filesystem::Exists(DataDirectory) && !FHU::Filesystem::CreateDirectories(DataDirectory)) {
    LogMan::Msg::IFmt("Couldn't create telemetry Folder");
  }
}

void Shutdown(const fextl::string& ApplicationName) {
  if (!Enabled) {
    return;
  }

  auto DataDirectory = Config::GetTelemetryDirectory() + ApplicationName + ".telem";

  // Retain a single backup if the telemetry already existed.
  auto Backup = DataDirectory + ".bck";

  // Failure on rename is okay.
  (void)FHU::Filesystem::RenameFile(DataDirectory, Backup);

  auto File = FEXCore::File::File(DataDirectory.c_str(),
                                  FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);

  if (File.IsValid()) {
    for (size_t i = 0; i < TelemetryType::TYPE_LAST; ++i) {
      auto& Name = TelemetryNames.at(i);
      auto& Data = TelemetryValues.at(i);
      fextl::fmt::print(File, "{}: {}\n", Name, Data.load());
    }
    File.Flush();
  }
}

#endif
} // namespace FEXCore::Telemetry


================================================
FILE: FEXCore/Source/Utils/Threads.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/fextl/memory.h>

#include <pthread.h>
#include <unistd.h>

namespace FEXCore::Threads {
static fextl::unique_ptr<FEXCore::Threads::Thread> CreateThread_Default(ThreadFunc Func, void* Arg) {
  ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!");
}

static void CleanupAfterFork_Default() {
  ERROR_AND_DIE_FMT("Frontend didn't setup thread creation!");
}

static FEXCore::Threads::Pointers Ptrs = {
  .CreateThread = CreateThread_Default,
  .CleanupAfterFork = CleanupAfterFork_Default,
};

fextl::unique_ptr<FEXCore::Threads::Thread> FEXCore::Threads::Thread::Create(ThreadFunc Func, void* Arg) {
  return Ptrs.CreateThread(Func, Arg);
}

void FEXCore::Threads::Thread::CleanupAfterFork() {
  return Ptrs.CleanupAfterFork();
}

void FEXCore::Threads::Thread::SetInternalPointers(const Pointers& _Ptrs) {
  Ptrs = _Ptrs;
}
} // namespace FEXCore::Threads


================================================
FILE: FEXCore/Source/Utils/WritePriorityMutex.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <atomic>
#include <cstdint>

#if !defined(_WIN32)
#include <linux/futex.h> /* Definition of FUTEX_* constants */
#include <sys/syscall.h> /* Definition of SYS_* constants */
#include <unistd.h>
#else
#include <synchapi.h>
#endif

#include <FEXCore/Utils/LogManager.h>

#include "Utils/SpinWaitLock.h"

namespace FEXCore::Utils::WritePriorityMutex {

// A custom mutex that prioritizes exclusive locks.
// In highly contested scenarios, this can help minimize overall contention time.
//
// Features:
//  - Up to 32767 pending exclusive locks ("writers")
//  - Up to 32767 pending shared_locks ("readers")
//  - Low-overhead waiting via WFE with a fallback to futex on timeout
//  - Direct writer->reader hand-off and vice-versa to further reduce overhead
//
// Trade-offs:
//  - No guaranteed order of wake-ups besides prioritizing writers
//  - No support for recursive locking
//  - We can't use FUTEX_LOCK_PI to enable priority inheritance
class Mutex final {
public:
  Mutex() = default;

  // Move-only type
  Mutex(const Mutex&) = delete;
  Mutex& operator=(const Mutex&) = delete;
  Mutex(Mutex&& rhs) = delete;
  Mutex& operator=(Mutex&&) = delete;

  void lock() {
    // Try a non-blocking lock first.
    if (try_lock()) {
      return;
    }

    // Try a quick WFE write-lock.
    if (Attempt_WFE_WriteLock()) {
      return;
    }

    // Still couldn't get it. Start waiting.
    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);

    uint32_t Expected {};
    uint32_t Desired {};
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    Expected = AtomicFutex.load(std::memory_order_relaxed);
    do {
      // Increment the number of write waiters.
      Desired = Expected + WRITE_WAITER_INCREMENT;

      LOGMAN_THROW_A_FMT((Desired & WRITE_WAITER_COUNT_MASK) != 0, "Overflow in write-waiters!");
    } while (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire) == false);
#else
    // Increment the number of writers waiting. The following loop will attempt to acquire the write-lock while decrementing the waiter count.
    Expected = AtomicFutex.fetch_add(WRITE_WAITER_INCREMENT);
    Desired = Expected + WRITE_WAITER_INCREMENT;
#endif

    // Thread added to waiter list.
    Expected = Desired;

    while (true) {
      bool Sleep = false;

      do {
        if ((Expected & WRITE_OWNED_BIT) == 0 && (Expected & READ_OWNER_COUNT_MASK) == 0) {
          // If not write-owned, and no read-owners, try to acquire.
          LOGMAN_THROW_A_FMT((Expected & WRITE_WAITER_COUNT_MASK) != 0, "Underflow in write-waiters!");

          // Add write-owned bit.
          Desired = Expected | WRITE_OWNED_BIT;

          // Remove ourselves from the wait list.
          Desired -= WRITE_WAITER_INCREMENT;

          Sleep = false;
        } else {
          // Already write-owned or read-locked. Go to sleep.
          Desired = Expected;
          Sleep = true;
          break;
        }

      } while (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire) == false);

      if (!Sleep) {
        // Acquired early.
        LOGMAN_THROW_A_FMT((Desired & WRITE_OWNED_BIT) == WRITE_OWNED_BIT, "Somehow acquired a write-lock without it being set!");
        return;
      }

      // Two paths to get here.
      // Desired[31] = 1 (WRITE_OWNED_BIT)
      // OR
      // Desired[15:0] != 0 (READ_OWNER_COUNT_MASK)
      // Meaning that there was already a writer that owned the lock, or reads were owning it.
      // This thread already incremented `WRITE_WAITER_INCREMENT` before this loop.
      // - Linux waits for the full 32-bits to change (With bitset wakeup).
      // - Win32 also waits for the full 32-bits to change (with offset addr on the reader side to reduce stampeding).
      FutexWaitForWriteAvailable(Desired);

      Expected = AtomicFutex.load(std::memory_order_relaxed);
    }
  }

  void lock_shared() {
    // Try an uncontended lock first.
    if (try_lock_shared()) {
      return;
    }

    // Try a quick WFE read-lock.
    if (Attempt_WFE_ReadLock()) {
      return;
    }

    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);

    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);
    uint32_t Desired {};

    while (true) {
      bool Sleep = false;
      do {
        if ((Expected & WRITE_OWNED_BIT) == 0 && (Expected & WRITE_WAITER_COUNT_MASK) == 0) {
          // If no write-owner and no write-waiting, try and acquire.

          Desired = Expected + READ_OWNER_INCREMENT;
          LOGMAN_THROW_A_FMT((Desired & READ_OWNER_COUNT_MASK) != 0, "Overflow in read-owners!");
          Sleep = false;
        } else {
          // Waiting for lock to become available. Add to waiters.
          Desired = Expected | READ_WAITER_BIT;
          Sleep = true;
        }
      } while (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire) == false);

      if (!Sleep) {
        // Acquired early.
        LOGMAN_THROW_A_FMT((Desired & WRITE_OWNED_BIT) != WRITE_OWNED_BIT, "Somehow read-locked and got a write lock!");
        return;
      }

      // Only one path to get here.
      // Desired[31][29:16] != 0 (Either writer-owned, or writer-waiting)
      // Desired[30][15:0]  == READ_WAIT_BIT and number of read-owners (draining to zero as write-side is set)
      // - Linux waits for full 32-bit futex.
      // - Win32 waits for upper 16-bits to not match (Either zero writer owned, writer-wait is draining, and `READ_WAITER_BIT` changed).
      // Can get some spurious wake-ups which will `or` the `READ_WAITER_BIT` again, which does nothing.
      FutexWaitForReadAvailable(Desired);

      Expected = AtomicFutex.load(std::memory_order_relaxed);
    }
  }

  void unlock() {
    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);

    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);
    uint32_t Desired {};
    do {
      LOGMAN_THROW_A_FMT((Expected & WRITE_OWNED_BIT) == WRITE_OWNED_BIT, "Trying to write-unlock something not write-locked!");
      // Remove the exclusive lock bit.
      Desired = Expected & ~WRITE_OWNED_BIT;

      // If no more writers, then make sure to clear the read-waiters bit as well.
      if ((Desired & WRITE_WAITER_COUNT_MASK) == 0) {
        Desired &= ~READ_WAITER_BIT;
      }
    } while (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire) == false);

    // `Expected` has old value. Containing `READ_WAITER_BIT` which was just masked off, and also `WRITE_WAITER_COUNT_MASK`.
    //
    // Two paths here to be careful about dead-locking other waiters:
    // - If there are any writers waiting, those get priority to wake.
    // - If there are zero writers waiting, and there are read waiters then make sure to wake them all.
    // Failure to send wake events can cause readers to "infinitely" hang! (ignoring spurious wake-up).
    if ((Expected & WRITE_WAITER_COUNT_MASK)) {
      // Handle write-write handoff.
      FutexWakeWriter();
    } else if ((Expected & READ_WAITER_BIT)) {
      // Handle write-reader handoff.
      FutexWakeReaders();
    }
  }

  void unlock_shared() {
    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);

    uint32_t Desired {};
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);
    do {
      LOGMAN_THROW_A_FMT((Expected & WRITE_OWNED_BIT) != WRITE_OWNED_BIT, "Trying to read-unlock something write-locked!");
      LOGMAN_THROW_A_FMT((Expected & READ_OWNER_COUNT_MASK) != 0, "Trying to read-unlock something not read-locked!");

      // Decrement the shared counter.
      Desired = Expected - READ_OWNER_INCREMENT;
    } while (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire) == false);
#else
    Desired = AtomicFutex.fetch_sub(READ_OWNER_INCREMENT) - READ_OWNER_INCREMENT;
#endif

    // Handle read->write handoff if there are any waiting writers, and no readers left.
    // Only one path here but still need to be careful to not dead-lock waiting writers.
    // - If there are waiters /but/ this is not the final unlock_shared, then don't wake writer.
    //   - Writer would wake and immediately sleep again if we woke on every unlock_shared.
    // - If there are waiters and this is the final unlock_shared, then wake a /single/ writer.
    // - We ignore any reader-waiters here as they must wait their turn for writers that are waiting.
    if ((Desired & WRITE_WAITER_COUNT_MASK) && (Desired & READ_OWNER_COUNT_MASK) == 0) {
      FutexWakeWriter();
    }
  }

  bool try_lock() {
    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);

    uint32_t Expected = 0;

    // Try and grab the owned bit.
    uint32_t Desired = WRITE_OWNED_BIT;

    // try to CAS immediately.
    return AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire);
  }

  // Can race with other threads trying to lock shared!
  bool try_lock_shared() {
    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);
    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);

    // Exclusively owned or has a list of waiting owners. Can't pass.
    if ((Expected & WRITE_OWNED_BIT) || (Expected & WRITE_WAITER_COUNT_MASK)) {
      return false;
    }

    // Try to add reader.
    uint32_t Desired = Expected + READ_OWNER_INCREMENT;
    LOGMAN_THROW_A_FMT((Desired & READ_OWNER_COUNT_MASK) != 0, "Overflow in read-owners!");

    // Uncontended mutex check
    return AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire);
  }

#if !defined(_WIN32)
  // Initialize the internal mutex object to its default initializer state.
  // Should only ever be used in the child process when a Linux fork() has occured.
  void StealAndDropActiveLocks() {
    Futex = 0;
  }
#endif

private:

#if !defined(_WIN32)
  void FutexWaitForWriteAvailable(uint32_t Expected) {
    ::syscall(SYS_futex, &Futex, FUTEX_PRIVATE_FLAG | FUTEX_WAIT_BITSET, Expected, nullptr, nullptr, FUTEX_BITSET_WAIT_WRITERS);
  }

  // Read-lock waiting for writers to drain out.
  void FutexWaitForReadAvailable(uint32_t Expected) {
    ::syscall(SYS_futex, &Futex, FUTEX_PRIVATE_FLAG | FUTEX_WAIT_BITSET, Expected, nullptr, nullptr, FUTEX_BITSET_WAIT_READERS);
  }

  // Read-Lock or Write-lock unlocked, wake one writer.
  // - Read->Write handoff.
  // - Write->Write handoff.
  void FutexWakeWriter() {
    ::syscall(SYS_futex, &Futex, FUTEX_PRIVATE_FLAG | FUTEX_WAKE_BITSET, 1, nullptr, nullptr, FUTEX_BITSET_WAIT_WRITERS);
  }

  // Write-lock unlocked, wake read-locks waiting.
  void FutexWakeReaders() {
    // Wake all readers.
    ::syscall(SYS_futex, &Futex, FUTEX_PRIVATE_FLAG | FUTEX_WAKE_BITSET, INT_MAX, nullptr, nullptr, FUTEX_BITSET_WAIT_READERS);
  }
#else
  // Writers wait for the full 32-bit futex.
  void FutexWaitForWriteAvailable(uint32_t Expected) {
    WaitOnAddress(&Futex, &Expected, sizeof(Futex), INFINITE);
  }

  // Readers wait for Futex bits [31:16] to be zero.
  void FutexWaitForReadAvailable(uint32_t Expected) {
    auto ReadWaiterAddress = reinterpret_cast<uint8_t*>(&Futex) + 2;
    uint16_t smol_Expected = Expected >> 16;
    WaitOnAddress(ReadWaiterAddress, &smol_Expected, sizeof(smol_Expected), INFINITE);
  }

  void FutexWakeWriter() {
    WakeByAddressSingle(&Futex);
  }

  void FutexWakeReaders() {
    auto ReadWaiterAddress = reinterpret_cast<uint8_t*>(&Futex) + 2;
    WakeByAddressAll(ReadWaiterAddress);
  }
#endif

  // Reuse the SpinWaitLock WFE implementations for read/write lock acquiring with WFE.
  // Can't reuse the spin-lock directly as some bit-representations are different.
  // WFE-write-lock is less likely to occur the more read-lock threads are participating. Can still occur so good to try.
  // WFE-read-lock is actually quite likely to succeed.
  // Return: true if the lock was acquired.
  bool Attempt_WFE_WriteLock() {
#ifdef ARCHITECTURE_arm64
    const auto Begin = FEXCore::Utils::SpinWaitLock::GetCycleCounter();
    auto Now = Begin;
    const auto Duration = FEXCore::Utils::SpinWaitLock::CycleCounterFrequency / CYCLECOUNT_DIVISOR;

    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);
    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);

    while ((Now - Begin) < Duration) {
      if (Expected == 0) {
        // Try and grab the owned bit.
        uint32_t Desired = WRITE_OWNED_BIT;

        if (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire)) {
          return true;
        }
      }

      // One-shot attempt to wait for mask to be zero.
      Expected = FEXCore::Utils::SpinWaitLock::OneShotWFEBitComparison(&Futex, ~0U, 0U);
      Now = FEXCore::Utils::SpinWaitLock::GetCycleCounter();
    }
#endif

    return false;
  }

  // Return: true if the lock was acquired.
  bool Attempt_WFE_ReadLock() {
#ifdef ARCHITECTURE_arm64
    // Spin on a WFE for a short-amount of time, waiting for write-owned and writer-count to be zero.
    //  - Attempt to acquire read-lock at that point.
    //  - Don't add read-waiters bit on failure, return false.
    const auto Begin = FEXCore::Utils::SpinWaitLock::GetCycleCounter();
    auto Now = Begin;
    const auto Duration = FEXCore::Utils::SpinWaitLock::CycleCounterFrequency / CYCLECOUNT_DIVISOR;

    auto AtomicFutex = std::atomic_ref<uint32_t>(Futex);
    uint32_t Expected = AtomicFutex.load(std::memory_order_relaxed);
    uint32_t Desired {};

    while ((Now - Begin) < Duration) {
      if ((Expected & WRITE_OWNED_BIT) == 0 && (Expected & WRITE_WAITER_COUNT_MASK) == 0) {
        // If no write-owner and no write-waiting, try and acquire.

        Desired = Expected + READ_OWNER_INCREMENT;
        LOGMAN_THROW_A_FMT((Desired & READ_OWNER_COUNT_MASK) != 0, "Overflow in read-owners!");
        if (AtomicFutex.compare_exchange_strong(Expected, Desired, std::memory_order_acq_rel, std::memory_order_acquire)) {
          return true;
        }
      }

      // One-shot attempt to wait for mask to be zero.
      Expected = FEXCore::Utils::SpinWaitLock::OneShotWFEBitComparison(&Futex, WRITE_OWNED_BIT | WRITE_WAITER_COUNT_MASK, 0U);
      Now = FEXCore::Utils::SpinWaitLock::GetCycleCounter();
    }
#endif

    return false;
  }

  constexpr static uint32_t WRITE_OWNED_BIT = 1U << 31;
  constexpr static uint32_t READ_WAITER_BIT = 1U << 30;
  constexpr static uint32_t WRITE_WAITER_OFFSET = 16;
  constexpr static uint32_t WRITE_WAITER_INCREMENT = 1U << WRITE_WAITER_OFFSET;
  constexpr static uint32_t READ_OWNER_INCREMENT = 1;

  // Count masks
  constexpr static uint32_t WRITE_WAITER_COUNT_MASK = 0x3FFFU << WRITE_WAITER_OFFSET;
  constexpr static uint32_t READ_OWNER_COUNT_MASK = 0xFFFFU;

  // Independent futex bit-set masks.
  // Wait for readers to drain.
  constexpr static uint32_t FUTEX_BITSET_WAIT_READERS = 1U << 0;
  // Wait for writers to drain.
  constexpr static uint32_t FUTEX_BITSET_WAIT_WRITERS = 1U << 1;

  // Only spin on WFE for 0.01ms (10k ns).
  constexpr static uint64_t CYCLECOUNT_DIVISOR = 1'000'000'000ULL / 10'000U;

  // Layout:
  //    Bits[31]: Write-lock bit.
  //    Bits[30]: Read-waiter bit.
  // Bits[29:16]: Write-waiter count.
  //  Bits[15:0]: Read-owner count.
  uint32_t Futex {};
};
} // namespace FEXCore::Utils::WritePriorityMutex


================================================
FILE: FEXCore/Source/Utils/variable_length_integer.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <cstdio>
#include <cstdint>
#include <cstddef>
#include <limits>

namespace FEXCore::Utils {
// Variable length signed integer
// The most common encoded size is 8-bit positive, but other values can occur
//
// 8-bit:
// bit[7] = 0 - 8-bit
// bit[6:0] = 7-bit encoding
//
// 16-bit:
// byte1[7:6] = 0b10 - 16-bit
// byte1[5:0] = top 6-bits
// byte2[7:0] = Bottom 8-bits bits
//
// 32-bit
// byte1[7:5] = 0b110 - 32-bit
// byte1[4:0] = <reserved>
// word[31:0] = signed word
//
// 64-bit
// byte1[7:5] = 0b111 - 64-bit
// byte1[4:0] = <reserved>
// dword[63:0] = signed dword
struct vl64 final {
  static size_t EncodedSize(int64_t Data) {
    if (Data >= vl8_min && Data <= vl8_max) {
      return sizeof(vl8_enc);
    } else if (Data >= vl16_min && Data <= vl16_max) {
      return sizeof(vl16_enc);
    } else if (Data >= vl32_min && Data <= vl32_max) {
      return sizeof(vl32_enc);
    }
    return sizeof(vl64_enc);
  }

  struct Decoded {
    int64_t Integer;
    size_t Size;
  };

  static Decoded Decode(const uint8_t* data) {
    auto vl8_type = reinterpret_cast<const vl8_enc*>(data);
    auto vl16_type = reinterpret_cast<const vl16_enc*>(data);
    auto vl32_type = reinterpret_cast<const vl32_enc*>(data);
    auto vl64_type = reinterpret_cast<const vl64_enc*>(data);

    if (vl8_type->Type == vl8_type_header) {
      return {vl8_type->Integer, sizeof(vl8_enc)};
    } else if (vl16_type->HighBits.Type == vl16_type_header) {
      return {vl16_type->Integer(), sizeof(vl16_enc)};
    } else if (vl32_type->Type == vl32_type_header) {
      return {vl32_type->Integer, sizeof(vl32_enc)};
    }
    return {vl64_type->Integer, sizeof(vl64_enc)};
  }

  static size_t Encode(uint8_t* dst, int64_t Data) {
    auto vl8_type = reinterpret_cast<vl8_enc*>(dst);
    auto vl16_type = reinterpret_cast<vl16_enc*>(dst);
    auto vl32_type = reinterpret_cast<vl32_enc*>(dst);
    auto vl64_type = reinterpret_cast<vl64_enc*>(dst);

    if (Data >= vl8_min && Data <= vl8_max) {
      *vl8_type = {
        .Integer = static_cast<int8_t>(Data),
        .Type = vl8_type_header,
      };
      return sizeof(vl8_enc);
    } else if (Data >= vl16_min && Data <= vl16_max) {
      *vl16_type = {
        .HighBits {
          .Top = static_cast<int8_t>((Data >> 8) & 0xFF),
          .Type = vl16_type_header,
        },
        .LowBits = static_cast<uint8_t>(Data & 0xFF),
      };
      return sizeof(vl16_enc);
    } else if (Data >= vl32_min && Data <= vl32_max) {
      *vl32_type = {
        .Type = vl32_type_header,
        .Integer = static_cast<int32_t>(Data),
      };
      return sizeof(vl32_enc);
    }

    *vl64_type = {
      .Type = vl64_type_header,
      .Integer = Data,
    };
    return sizeof(vl64_enc);
  }

private:

  struct vl8_enc {
    int8_t Integer : 7;
    uint8_t Type   : 1;
  };
  static_assert(sizeof(vl8_enc) == 1);

  struct vl16_enc {
    struct {
      int8_t Top   : 6;
      uint8_t Type : 2;
    } HighBits;
    uint8_t LowBits;

    int64_t Integer() const {
      int16_t Value {};
      Value |= (HighBits.Top << 8);
      Value |= LowBits;
      return (Value << 2) >> 2;
    }
  };
  static_assert(sizeof(vl16_enc) == 2);

  struct FEX_PACKED vl32_enc {
    uint8_t Type;
    int32_t Integer;
  };
  static_assert(sizeof(vl32_enc) == 5);

  struct FEX_PACKED vl64_enc {
    uint8_t Type;
    int64_t Integer;
  };
  static_assert(sizeof(vl64_enc) == 9);

  // Maximum ranges for encodings.

  // vl8 can hold a signed 7-bit integer.
  // Encoded in one 8-bit value.
  constexpr static int64_t vl8_encoded_bits = 7;
  constexpr static int64_t vl8_type_header = 0;
  constexpr static int64_t vl8_min = std::numeric_limits<int64_t>::min() >> ((sizeof(int64_t) * 8) - vl8_encoded_bits);
  constexpr static int64_t vl8_max = std::numeric_limits<int64_t>::max() >> ((sizeof(int64_t) * 8) - vl8_encoded_bits);

  // vl16 can hold a signed 14-bit integer.
  // Encoded in one 16-bit value.
  constexpr static int64_t vl16_encoded_bits = 14;
  constexpr static int64_t vl16_type_header = 0b10;
  constexpr static int64_t vl16_min = std::numeric_limits<int64_t>::min() >> ((sizeof(int64_t) * 8) - vl16_encoded_bits);
  constexpr static int64_t vl16_max = std::numeric_limits<int64_t>::max() >> ((sizeof(int64_t) * 8) - vl16_encoded_bits);

  // vl32 can hold a signed 32-bit integer.
  // Encoded in 8-bit and 32-bit value;
  constexpr static int64_t vl32_encoded_bits = 32;
  constexpr static int64_t vl32_type_header = 0b1100'0000;
  constexpr static int64_t vl32_min = std::numeric_limits<int32_t>::min();
  constexpr static int64_t vl32_max = std::numeric_limits<int32_t>::max();

  // vl64 can hold a signed 32-bit integer.
  // Encoded in 8-bit and 64-bit value.
  constexpr static int64_t vl64_encoded_bits = 64;
  constexpr static int64_t vl64_type_header = 0b1110'0000;
  constexpr static int64_t vl64_min = std::numeric_limits<int64_t>::min();
  constexpr static int64_t vl64_max = std::numeric_limits<int64_t>::max();
};

// Variable length pair that optimizes around FEXCore's JITRIPReconstruction.
//
// 8-bit:
// bit[7]   = 0 - 8-bit
// bit[6:4] = 3-bit unsigned - 1. [1 - 8] range.
// bit[3:0] = 4-bit unsigned divided by 4 - 1. [4 - 64 byte] range.
//
// 16-bit:
// byte1[7:6] = 0b10 - 16-bit
// byte1[5:0] = 6-bit signed value [-32 - 31] range
// byte2[7:0] = 8-bit signed value divided by 4. [-512 - 508] byte range.
//
// 32-bit and 64-bit don't attempt to do any compression beyond range checks.
// 32-bit
// byte1[7:5] = 0b110 - 32-bit
// byte1[4:0] = <reserved>
// word1[31:0] = signed word
// word2[31:0] = signed word
//
// 64-bit
// byte1[7:5] = 0b111 - 64-bit
// byte1[4:0] = <reserved>
// dword1[63:0] = signed dword
// dword2[63:0] = signed dword

struct vl64pair final {
public:
  static size_t EncodedSize(uint64_t data_arm, uint64_t data_rip) {
    if (can_encode_vl8(data_arm, data_rip)) {
      return sizeof(vl8_enc);
    } else if (can_encode_vl16(data_arm, data_rip)) {
      return sizeof(vl16_enc);
    } else if (can_encode_vl32(data_arm, data_rip)) {
      return sizeof(vl32_enc);
    }
    return sizeof(vl64_enc);
  }

  struct Decoded {
    uint64_t IntegerARMPC;
    uint64_t IntegerX86RIP;
    size_t Size;
  };

  static Decoded Decode(const uint8_t* data) {
    auto vl8_type = reinterpret_cast<const vl8_enc*>(data);
    auto vl16_type = reinterpret_cast<const vl16_enc*>(data);
    auto vl32_type = reinterpret_cast<const vl32_enc*>(data);
    auto vl64_type = reinterpret_cast<const vl64_enc*>(data);

    if (vl8_type->Type == vl8_type_header) {
      return Decode(vl8_type);
    } else if (vl16_type->HighBits.Type == vl16_type_header) {
      return Decode(vl16_type);
    } else if (vl32_type->Type == vl32_type_header) {
      return Decode(vl32_type);
    }
    return {vl64_type->IntegerARMPC, vl64_type->IntegerX86RIP, sizeof(vl64_enc)};
  }

  static size_t Encode(uint8_t* dst, uint64_t data_arm, uint64_t data_rip) {
    auto vl8_type = reinterpret_cast<vl8_enc*>(dst);
    auto vl16_type = reinterpret_cast<vl16_enc*>(dst);
    auto vl32_type = reinterpret_cast<vl32_enc*>(dst);
    auto vl64_type = reinterpret_cast<vl64_enc*>(dst);

    if (can_encode_vl8(data_arm, data_rip)) {
      *vl8_type = {
        .IntegerARMPC = static_cast<uint8_t>((data_arm - 1) >> vl8_arm_align_bits),
        .IntegerX86RIP = static_cast<uint8_t>(data_rip - 1),
        .Type = vl8_type_header,
      };
      return sizeof(vl8_enc);
    } else if (can_encode_vl16(data_arm, data_rip)) {
      *vl16_type = {
        .HighBits {
          .IntegerX86RIP = static_cast<int8_t>(static_cast<int64_t>(data_rip)),
          .Type = vl16_type_header,
        },
        .IntegerARMPC = static_cast<int8_t>(static_cast<int64_t>(data_arm) >> vl8_arm_align_bits),
      };
      return sizeof(vl16_enc);
    } else if (can_encode_vl32(data_arm, data_rip)) {
      *vl32_type = {
        .Type = vl32_type_header,
        .IntegerARMPC = static_cast<int32_t>(data_arm),
        .IntegerX86RIP = static_cast<int32_t>(data_rip),
      };
      return sizeof(vl32_enc);
    }

    *vl64_type = {
      .Type = vl64_type_header,
      .IntegerARMPC = data_arm,
      .IntegerX86RIP = data_rip,
    };
    return sizeof(vl64_enc);
  }

private:
  struct vl8_enc {
    uint8_t IntegerARMPC  : 4;
    uint8_t IntegerX86RIP : 3;
    uint8_t Type          : 1;
  };
  static_assert(sizeof(vl8_enc) == 1);

  static inline Decoded Decode(const vl8_enc* enc) {
    const uint64_t data_arm = enc->IntegerARMPC;
    const uint64_t data_rip = enc->IntegerX86RIP;
    return {(data_arm + 1) << vl8_arm_align_bits, data_rip + 1, sizeof(vl8_enc)};
  }

  struct vl16_enc {
    struct {
      int8_t IntegerX86RIP : 6;
      uint8_t Type         : 2;
    } HighBits;
    int8_t IntegerARMPC;
  };
  static_assert(sizeof(vl16_enc) == 2);

  static inline Decoded Decode(const vl16_enc* enc) {
    int64_t arm_pc = enc->IntegerARMPC << vl8_arm_align_bits;
    int64_t x86_rip = enc->HighBits.IntegerX86RIP;
    return {static_cast<uint64_t>(arm_pc), static_cast<uint64_t>(x86_rip), sizeof(vl16_enc)};
  }

  struct FEX_PACKED vl32_enc {
    uint8_t Type;
    int32_t IntegerARMPC;
    int32_t IntegerX86RIP;
  };
  static_assert(sizeof(vl32_enc) == 9);

  static inline Decoded Decode(const vl32_enc* enc) {
    int64_t arm_pc = enc->IntegerARMPC;
    int64_t x86_rip = enc->IntegerX86RIP;
    return {static_cast<uint64_t>(arm_pc), static_cast<uint64_t>(x86_rip), sizeof(vl32_enc)};
  }

  struct FEX_PACKED vl64_enc {
    uint8_t Type;
    uint64_t IntegerARMPC;
    uint64_t IntegerX86RIP;
  };
  static_assert(sizeof(vl64_enc) == 17);

  // vl8 can hold a two small unsigned integers.
  // Encoded in 8-bit.
  constexpr static int64_t vl8_type_header = 0;
  constexpr static int64_t vl8_arm_min = 1;
  constexpr static int64_t vl8_arm_max = 16;
  constexpr static int64_t vl8_arm_align_bits = 2;
  constexpr static int64_t vl8_arm_shift_mask = (1U << vl8_arm_align_bits) - 1;
  constexpr static int64_t vl8_pc_min = 1;
  constexpr static int64_t vl8_pc_max = 8;
  static bool can_encode_vl8(uint64_t data_arm, uint64_t data_rip) {
    // GuestPC can only be [1,8] bytes.
    if (data_rip < vl8_pc_min || data_rip > vl8_pc_max) {
      return false;
    }
    // Unaligned doesn't fit at all.
    if (data_arm & vl8_arm_shift_mask) {
      return false;
    }

    // HostPC can only be [1,16] instructions.
    int64_t ShiftedHostPC = data_arm >> vl8_arm_align_bits;
    if (ShiftedHostPC < vl8_arm_min || ShiftedHostPC > vl8_arm_max) {
      return false;
    }

    return true;
  }

  // vl16 can hold a two small signed integers
  // Encoded in one 16-bit value.
  constexpr static int64_t vl16_type_header = 0b10;
  constexpr static int64_t vl16_arm_min = -128;
  constexpr static int64_t vl16_arm_max = 127;
  constexpr static int64_t vl16_arm_align_bits = 2;
  constexpr static int64_t vl16_arm_shift_mask = (1U << vl16_arm_align_bits) - 1;
  constexpr static int64_t vl16_pc_min = -32;
  constexpr static int64_t vl16_pc_max = 31;
  static bool can_encode_vl16(int64_t data_arm, int64_t data_rip) {
    // GuestPC can only be [-32,31] bytes.
    if (data_rip < vl16_pc_min || data_rip > vl16_pc_max) {
      return false;
    }

    // Unaligned doesn't fit at all.
    if (data_arm & vl16_arm_shift_mask) {
      return false;
    }

    // HostPC can only be [-128,127] instructions.
    int64_t ShiftedHostPC = data_arm >> vl16_arm_align_bits;
    if (ShiftedHostPC < vl16_arm_min || ShiftedHostPC > vl16_arm_max) {
      return false;
    }

    return true;
  }

  // vl32 can hold a two 32-bit integers.
  // Encoded in 8-bit and two 32-bit values.
  constexpr static int64_t vl32_type_header = 0b1100'0000;
  constexpr static int64_t vl32_min = std::numeric_limits<int32_t>::min();
  constexpr static int64_t vl32_max = std::numeric_limits<int32_t>::max();
  static bool can_encode_vl32(int64_t data_arm, int64_t data_rip) {
    if (data_rip < vl32_min || data_rip > vl32_max) {
      return false;
    }
    if (data_arm < vl32_min || data_arm > vl32_max) {
      return false;
    }

    return true;
  }

  // vl64 can hold a two 64-bit integers.
  // Encoded in 8-bit and two 64-bit values.
  constexpr static int64_t vl64_type_header = 0b1110'0000;
};

} // namespace FEXCore::Utils


================================================
FILE: FEXCore/docs/CPUBackends.md
================================================
# FEXCore CPU Backends
---
FEXCore supports multiple CPU emulation backends. All of which ingest the IR that we have been generating.

## IR Interpreter
The first one is the easiest. This just walks the IR list and interprets the IR as it goes through it. It isn't meant to be fast and is for debugging purposes.
This is used to easily inspect what is going on with the code generation and making sure logic is sound. Will most likely last in to perpetuity since it isn't exactly difficult to maintain and it is useful to have around

## IR JIT
**Not yet implemented**
This is meant to be our first JIT of call and will serve multiple purposes. It'll be the JIT that is used for our runtime compilation of code.
This means it needs to be fast during compilation and have decent runtime performance.
Good chance that we will need to implement multiple of these depending on host architecture with some code reuse between them.
This JIT will also be what we use for gathering sampling data for passing off to another JIT for tiered recompilation and offline compilation later.
Should use xbyak for our x86-64 host and Vixl for our AArch64 host. For other targets in the future we will see what is available

# Future ideas
---
* Create an inline ASM or JIT'd dispatcher loop. Will allow our JITs to be more optimal by reserving more registers for guest state.
* WebAssmembly or other browser language?
  * Might allow decent runtime performance of things emulated in a browser. Could be interesting.


================================================
FILE: FEXCore/docs/CustomCPUBackend.md
================================================
# FEXCore custom CPU backends
---
Custom CPU backends can be useful for testing purposes or wanting to support situations that FEXCore doesn't currently understand.
The FEXCore::Context namespace provides a `SetCustomCPUBackendFactory` function for providing a factory function pointer to the core. This function will be used if the `DEFAULTCORE` configuration option is set to `CUSTOM`.
If the guest code creates more threads then the CPU factory function will be invoked for creating a CPUBackend per thread. If you don't want a unique CPUBackend object per thread then that needs to be handled by the user.

It's recommended to store the pointers provided to the factory function for later use.
`FEXCore::Context::Context*` - Is a pointer to previously generated context object
`FEXCore::Core::ThreadState*` - Is a pointer to a thread's state. Lives for as long as the guest thread is alive.
To use this factory, one must override the provided `FEXCore::CPU::CPUBackend` class with a custom one. This factory function then should return a newly allocated class.

`FEXCore::CPU::CPUBackend::GetName` - Returns an `std::string` for the name of this core
`FEXCore::CPU::CPUBackend::CompileCode` - Provides the CPUBackend with potentially an IR and DebugData for compiling code. Returns a pointer that needs to be long lasting to a piece of code that will be executed for the particular RIP.
`FEXCore::CPU::CPUBackend::Initialize` - Called after the guest memory is initialized and all state is ready for the code to start initializing. Gets called just before the CPUBackend starts executing code for the first time.


================================================
FILE: FEXCore/docs/Frontend.md
================================================
# FEXCore Frontend
---
The FEXCore frontend's job is to translate an incoming x86-64 instruction stream in to a more easily digested version of x86.
This effectively expands x86-64 instruction encodings to be more easily ingested later on in the process.
This ends up being essential to allowing our IR translation step to be less strenuous. It can decode a "common" expanded instruction format rather than various things that x86-supports.
For a simple example, x86-64's primary op table has ALU ops that duplicate themselves at least six times with minor differences between each. The frontend is able to decode a large amount of these ops to the "same" op that the IR translation understands more readily.
This works for most instructions that follow a common decoding scheme, although there are instructions that don't follow the rules and must be handled explicitly elsewhere.

An example of decoded instructions:
```
00 C0: add al,al
04 01: add al, 0x1
```
These two instructions have a different encoding scheme but they are just an add.
They end up decoding to a generic format with the same destination operand but different sources.
May look subtle but there end up being far more complex cases and we don't want to handle hundreds of instructions differently.
After the frontend is done decoding the instruction stream, it passes the output over to the OpDispatcher for translating to our IR.

## Multiblock
---
The Frontend has an additional duty. Since it is the main piece of code that understands the guest x86-64 code; It is also what does analysis of control flow to determine if we can end up compiling multiple blocks of guest code.
The Frontend already has to determine if it has hit a block ending instruction. This is anything that changes control flow. This feeds in to the analysis system to look at conditional branches to see if we can keep compiling code at the target location in the same functional unit.

Short example:
```
test eax, eax
jne .Continue
ret           <--- We can continue past this instruction, which is an unconditional block ender
.Continue:
```

These sorts of patterns crop up extensively in compiled code. A large amount of traditional JITs will end up ending the block at any sort of conditional branch instruction.
If the analysis can determine the target conditional branch location, we can then know that the code can keep compiling past an unconditional block ender instruction.
This works for both backwards branches and forward branches.

### Additional reading
---
There are other emulators out there that implement multiblock JIT compilation with some success.
The best example of this that I know of is the [Dolphin GameCube and Wii Emulator](https://github.com/dolphin-emu/dolphin) Where I implemented the initial multiblock implementation.
One of the major limitations with a console emulator is that you can run in to infinite loops on backedges when using multiblock compilation. This is due to console emulation being able to run an infinite loop and let Interrupts or some other state cause it to break out.
Luckily since we are a userspace emulator we don't have to deal with this problem. If an application has written an infinite loop, then without another thread running, it'll be a true infinite loop.
Additionally luckily is that we are going to emulate the strong memory model of x86-64 and also support true threads, this will mean that we don't need to do any manual thread scheduling in our emulator and switch between virtual threads.


================================================
FILE: FEXCore/docs/IR.md
================================================
# FEXCore IR
---
The IR for the FEXCore is an SSA based IR that is generated from the incoming x86-64 assembly.
SSA is quite nice to work with when translating the x86-64 code to the IR, when optimizing that code with custom optimization passes, and also passing that IR to our CPU backends.

## Emulation IR considerations
* We have explicitly sized IR variables
  * Supports traditional element sizes of 1,2,4,8 bytes and some 16byte ops
  * Supports arbitrary number of vector elements
  * The op determines if something is float or integer based.
* Clear separation of scalar IR ops and vector IR ops
  * ex, MUL versus VMUL
* We have explicit Load/Store context IR ops
  * This allows us to have a clear separation between guest memory and tracked x86-64 state
* We have an explicit CPUID IR op
  * This allows us to return fairly complex data (4 registers of data) and also having an easier optimization for constant CPUID functions
  * So if we const-prop the CPUID function then it'll just const-prop further along
* We have an explicit syscall op
  * The syscall op is fairly complex as well, same with CPUID that if the syscall function is const-prop then we can directly call the syscall handler
  * Can save overhead by removing call overheads
* The IR supports branching from one block to another
  * Has a conditional branch instruction that either branches to the target branch or falls through to the next block
  * Has an unconditional branch to explicitly jump to a block instead of falling through
  * **There is a desire to follow LLVM semantics around block limitations but it isn't currently strictly enforced**
* Supports a debug `Print` Op for printing out values for debug viewing
* Supports explicit Load/Store memory IR ops
  * This is for accessing guest memory and will do the memory offset translation in to the VM's memory space
  * This is done by just adding the VM memory base to the 64bit address passed in
  * This is done in a manner that the application **can** escape from the VM and isn't meant to be safe
  * There is an option for JITs to validate the memory region prior to accessing for ensuring correctness
* IR is generated from a JSON file, fairly straightforward to extend.
  * Read the python generation file to determine the extent of what it can do

## IR function considerations
The first SSA node is a special case node that is considered invalid. This means %0 will always be invalid for "null" node checks
The first real SSA node also has to be a IRHeader node. This means it is safe to assume that %1 will always be an IRHeader.


```(%%1) IRHeader 0x41a9a0, %%2, 5```

The header provides information about that function like the entry point address.
Additionally it also points to the first `CodeBlock` IROp


```(%%2) CodeBlock %%7, %%168, %%3```


* The `CodeBlock` Op is a jump target and must be treated as if it'll be jumped to from other blocks
  * It contains pointers to the starting op and ending op and they are inclusive
  * It also contains a pointer to the next CodeBlock in a singly linked list
  * The last CodeBlock will point to the InvalidNode as the next block


### Example code block

```
(%%3) CodeBlock %%169, %%173, %%4
	(%%169) BeginBlock %3
	%170 i64 = Constant 0x41a9e1
	(%%171) StoreContext %170 i64, 0x8, 0x0
	(%%172) ExitFunction
	(%%173) EndBlock %3
```

* BeginBlock points back to the CodeBlock SSA which helps with iterating across multiple blocks
* EndBlock the ending op of a CodeBlock and also points back to the CodeBlock SSA.
* ExitFunction will leave the function immediately and return back to the dispatcher
* Every IR Op has an SSA value associated with it used for tracking the op itself
	* If the IROp doesn't have a real destination then it is invalid to use it as an argument in most other ops

## In-memory representation

The in-memory representation of the IR may be a bit confusing when initially viewed and once dealing with optimizations then it may be confusing as well.
Currently the IR Generation is tied to the `OpDispatchBuilder` class. This class handles translating decoded x86 to our IR representation.
When generating IR inside of the `OpDispatchBuilder` it is straight forward, just call the IR generation ops.

### FEXCore::IR::IntrusiveAllocator
This is an intrusive allocator that is used by the `OpDispatchBuilder` for storing IR data. It is a simple linear arena allocator without resizing capabilities.

### OpDispatchBuilder
OpDispatchBuilder provides `IRListView ViewIR()` for handling the IR outside of the class:
* Returns a wrapper container class the allows you to view the IR. This doesn't take ownership of the IR data.
* If the OpDispatcherBuilder changes its IR then changes are also visible to this class

This class uses two IntrusiveAllocator objects for tracking IR data. `ListData` and `Data` are the object names.
* `ListData` is for tracking the doubly linked list of nodes
	* This ONLY allocates `FEXCore::IR::OrderedNode` objects
	* When an OrderedNode is allocated its allocation location (NodeOffset) is just the offset from the base pointer
	* This allows us to only use uint32_t memory offsets to compact the IR
	* Additionally using offsets allows us the freedom to freely move our IR in memory without costly pointer adjustment
	* This means everything is fixed size allocated (SSA Node number calculation is just `AllocationOffset / sizeof(OrderedNode)`
	* OrderedNodes are what the SSA arguments are pointing to in the end


### OrderedNode
This is a doubly linked list of all of our IR nodes. This allows us to walk forward or backward over the IR and they must be ordered correctly to ensure dominance of SSA values.
* Contains `OrderedNodeHeader`
	* Contains `OpNodeWrapper Value`
		* Points to the `IROp_Header` backing op for this SSA node
	* Contains `OrderedNodeWrapper Next`
		* Points to the next `OrderedNode`
	* Contains `OrderedNodeWrapper Previous`
		* Points to the previous `OrderedNode`
* Contains the NumUses
	* This allows us to easily walk to the list backwards and DCE the ops that have NumUses == 0
* `IROp_Header *Op(uintptr_t Base)`
	* Allows you to get the backing IR data for this SSA value

### NodeWrapperBase<typename Type> - Type for `OrderedNodeHeader` and `OpNodeWrapper`
* `using OpNodeWrapper = NodeWrapperBase<IROp_Header>`
* `using OrderedNodeWrapper = NodeWrapperBase<OrderedNode>`
* This is a class to let you more easily convert NodeOffsets in to their real backing pointer
* `GetNode(uintptr_t Base)` allows you to pass in the base pointer from the backing Intrusive allocator and get the object
	* **This can be confusing**
	* A good rule of thumb is to only ever use `GetNode(ListDataBegin)` with OrderedNodeWrapper
	* Then once you have the `OrderedNode*` from GetNode, Use the `Op(IRDataBegin)` function to get the IR data.
	* I do **NOT** recommend using `GetNode` directly from `OpNodeWrapper` as it is VERY easy to mess it up

### NodeIterator
Provides a fairly straightforward interface that allows easily walking the IR nodes with C++ increment and decrement operations.
Only iterates over a single block

#### Example usage
```cpp
	IR::NodeIterator After = ...;
	IR::NodeIterator End = ...;

	while (After != End) {
		// NodeIterator() returns a pair of pointers to the OrderedNode and IROp data
		// You can unpack the result with structured bindings
		auto [CodeNode, IROp] = After();

		// IROp_Header contains a bunch of information about the IR object
		// We can convert it with the object's C<typename Type> or CW<typename Type> functions

		switch(IROp->Op) {
			case IR::OP_ADD: {
				FEXCore::IR::IROp_Add const *Op = IROp->C<FEXCore::IR::IROp_Add>();
				/* We can now access members inside of IROp_Add that were previously unavailable
					 You can still access the header definitions from Op->Header */
				break;
			}
			/* ... */
		}
		// Go to the next IR Op
		++After;
	}

```

### AllNodesIterator
This is like NodeIterator, except that it will cross block boundaries.

### IRListView.GetBlocks()
Provides a range for easy iterating over all the blocks in a multi-block with NodeIterator

#### Example usage
```c++
	for (auto [BlockNode, BlockHeader] : CurrentIR.GetBlocks()) {
		// Do stuff for each block
	}
```

### IRListView.GetCode(BlockNode)
Provides a range for easy iterating over all the code in a block

#### Example usage
```c++
	for (auto [CodeNode, IROp] : CurrentIR.GetCode(BlockNode)) {
		// Do stuff for each op

		switch(IROp->Op) {
			case IR::OP_ADD: {
				FEXCore::IR::IROp_Add const *Op = IROp->C<FEXCore::IR::IROp_Add>();
				// Do stuff for each Add op.

				break;
			}
		}
	}
```

### IRListView.GetAllCode()
Like GetCode, except it uses AllNodesIterator to allow easy iterating over every single op in the entire Multiblock

#### Example usage
```c++
	for (auto [CodeNode, IROp] : CurrentIR.GetAllCode()) {
		// Do stuff for each op
	}
```

## JSON file
An example of what the IR json looks like
```
"StoreContext": {
  "SSAArgs": "1",
  "Args": [
    "uint8_t", "Size",
    "uint32_t", "Offset"
  ]
},
```
The json entry name will be the name of the IR op and the dispatcher function.
This means you'll get a `_Add(...)` dispatcher function generated

### JSON IR element options
* `HasDest`
  * This is used on ops that return a value. Used for tracking of if ops return data
* `SSAArgs`
  * These are the number of arguments that the op consumes that are SSA based
  * Needs to come from previous ops that had a destination
* `SSANames`
  * Allows you to name the SSA arguments in an op
  * Otherwise the Op names will only be able to be accessed from the Header of the IR through its arguments array
* `Args`
  * These are defined arguments that are stored in the IR encoding that aren't SSA based
  * Useful for things that are constant encoded and won't change after the fact
* `FixedDestSize`
  * This allows you to override the op's destination size in bytes
  * Most ops with implicitly calculate their destination size through the maximum sizes of the IR arguments passed in
* `DestSize`
  * This allows an IR size override that isn't just a size in bytes
  * This can let the size of the op be another argument or something more extensive
* `RAOverride`
  * This allows an op to take regular SSA arguments (So optimization passes will still be aware of them) but also not have them be register allocated
  * Useful for block handling ops, where blocks aren't something that get register allocated but still need to have their uses tracked
* `HelperGen`
  * If there is a complex IR Op that needs to be defined but you don't want an automatic dispatcher generated then this disables the generation of the
    dispatcher
* `Last`
  * This is a special element only used for the last element in the list


================================================
FILE: FEXCore/docs/MemoryModelEmulation.md
================================================
# What is x86-TSO and what is different compared to ARM's weak memory model?
x86's memory model is a very strictly coherent memory model that effectively mandates that all memory accesses are "atomic". While atomicity is
actually a bit more strict, we actually need to emulate it in ARM using atomic instructions. We are also required to emulate this strictness with
unaligned accesses, which is due to x86 CPUs allowing unaligned atomics for "free" within a cacheline. Intel also takes this a step more and allowing
full atomics with a feature called "split-locks", AMD gains this same feature in Zen 5.

# Emulating loads
Due to x86 SIB addressing, this can happen on most instructions. FEX emulates these in a variety of ways depending on features.
Most instructions are emulated with an atomic instruction but we also implement a feature called "half-barrier" atomics for unaligned atomics.

## Base ARMv8.0
- Addressing limitations
  - Register only
This is emulated using an atomic load instruction plus a nop.
- On unaligned access the code gets backpatched to a non-atomic load plus a memory barrier

## FEAT_LRCPC
- Addressing limitations
  - Register only
This matches the base ARMv8.0 implementation but adds new instructions that match x86-TSO behaviour, making the emulation slightly quicker.
- On unaligned access it still gets backpatched to non-atomic load plus a memory barrier.

## FEAT_LRCPC2
- Addressing limitations
  - Register plus 9-bit signed immediate (-256, 255)
Adds some new instructions that allow immediate encoding inside of the previous LRCPC instructions

## FEAT_LRCPC3
Adds a handful of GPR instructions that aren't super interesting

FEX doesn't currently implement these since no hardware supports it.

- ldapr - Post-index load for stack
- ldiapr - Post-index load pair for stack
- stilp - pre-index store pair for stack
- stlr - pre-index store for stack

# Emulating stores
Again due to x86 SIB addressing, this can also happen on most instructions. There are less options for FEX with this extension, so in most cases this
just turns in to an atomic store with half-barrier backpatching for unaligned accesses

## FEAT_LRCPC, FEAT_LRCPC2
Adds nothing for emulating stores

## FEAT_LRCPC3

# Emulating atomic instructions
x86 has atomic memory operations that can do a variety of operations. For unaligned atomic operations FEX will emulate the operation inside the signal
handler if it happens to be unaligned.

## CASPair - cmpxchg

## Base ARMv8.0
- Addressing limitations
  - Register only
This is emulated with a ldaxp+stlxp pair of instructions.

## FEAT_LSE
- Addressing limitations
  - Register only
Adds a new caspal instruction that does the operation almost exactly like x86.

## CAS - cmpxchg8b/cmpxchg16b

## Base ARMv8.0
- Addressing limitations
  - Register only
Similar to CASPair but now only uses a ldaxr+stlxr pair

## FEAT_LSE
- Addressing limitations
  - Register only
Similar to CASPair adds a new casal instruction that operates basically like x86

# AtomicFetch<Op>
## Op from the following list
- Add
- Sub
- And
- CLR
- Or
- Xor
- Neg
- Swap

## Base ARMv8.0
- Addressing limitations
  - Register only
All operations get emulated with an ldaxr+stlxr+<op> instruction

## FEAT_LSE
- Addressing limitations
  - Register only
Almost all operations now have a native atomic memory operation instruction. The only outlier is atomicNeg which doesn't have an LSE equivalent and
uses the ARMv8.0 implementation.

# Vector loads
Since almost all memory accesses on x86 are TSO, this includes vector operations.

## Base ARMv8.0
- Addressing limitations
  - Register plus 9-bit signed immediate (-256, 255)
  - Register plus 12-bit unsigned scaled immediate (Scaled by access size)
Emulated using half-barriers, which means a load+dmb

## FEAT_LRCPC3
- LDAP1 added for element loads. Register only address encoding
- LDAPUR added for vector register loads, supports 9-bit simm offset

# Vector stores
Just like loads, these are emulated using half-barriers

## Base ARMv8.0
- Addressing limitations
  - Register plus 9-bit signed immediate (-256, 255)
  - Register plus 12-bit unsigned scaled immediate (Scaled by access size)
Emulated using half-barriers, which means a dmb+str


## FEAT_LRCPC3
- STL1 added for element stores. Register only address encoding
- STLUR added for vector register stores, supports 9-bit simm offset

# Addressing limitations depending on operating mode
## GPR loadstores
### TSO Emulation disabled
- Register only (ldr/str)
- Register + Register + scale (ldr/str)
- Register + 9-bit simm (ldur/stru)
- Register + 12-bit unsigned scaled imm (ldr/str)

### TSO Emulation enabled
- Register only (ldar/stlr)
- Register only (ldapr/stlr) - FEAT_LRCPC
- Register + 9-bit simm (ldapr/stlur) - FEAT_LRCPC2

## Vector loadstores
### TSO Emulation disabled
- Register only (ldr/str)
- Register + Register + scale (ldr/str)
- Register + 9-bit simm (ldur/stru)
- Register + 12-bit unsigned scaled imm (ldr/str)

### TSO Emulation enabled
- Same as TSO emulation disabled due to half-barrier implementation

### TSO Emulation enabled (FEAT_LRCPC3)
- Register only (ldap1/stl1) - Element loadstore
- Register + 9-bit simm (ldapur/stlur)

## Atomic memory operations
Always TSO emulation enabled, always register only.


================================================
FILE: FEXCore/docs/OpDispatcher.md
================================================
# FEXCore OpDispatcher
---
The OpDispatcher is the step of the recompiler that takes the output from the Frontend and translates it to our IR.
Since the x86-64 instruction set is so large (>1000 instructions in the current FEXCore tables) we need to reduce this down to something more manageable.
We will ingest our decoded x86-64 instructions and translate them down to more basic IR operations. The number of IR ops are currently in the dozens which is a lot easier to handle.
Once we have translated to the IR then we need to pass the IR over to optimization passes or our JIT cores.

Ex:
```
 mov rax,0x1
 mov rdi,0x1
 mov rsi,0x20
 mov rdx,0x1
 syscall 
 hlt
 ```
 Translates to the IR of:
 ```
BeginBlock
        %8 i32 = Constant 0x1
        StoreContext 0x8, 0x8, %8
        %64 i32 = Constant 0x1
        StoreContext 0x8, 0x30, %64
        %120 i32 = Constant 0x1f
        StoreContext 0x8, 0x28, %120
        %176 i32 = Constant 0x1
        StoreContext 0x8, 0x20, %176
        %232 i64 = LoadContext 0x8, 0x8
        %264 i64 = LoadContext 0x8, 0x30
        %296 i64 = LoadContext 0x8, 0x28
        %328 i64 = LoadContext 0x8, 0x20
        %360 i64 = LoadContext 0x8, 0x58
        %392 i64 = LoadContext 0x8, 0x48
        %424 i64 = LoadContext 0x8, 0x50
        %456 i64 = Syscall %232, %264, %296, %328, %360, %392, %424
        StoreContext 0x8, 0x8, %456
        BeginBlock
        EndBlock 0x1e
        ExitFunction
```
### Multiblock
---
An additional duty of the OpDispatcher is to handle the metadata that the Frontend provides for supporting multiblock.
The IR provides most of the functionality required for supporting robust branching and function creation required for generating large blocks of code translated from x86-64 emulation.
This is required since in the ideal situation we will be doing function level translation of x86-64 guest code to our IR.
The IR is currently lacking any idea of flags or PHI nodes, which can be problematic when optimizing branch heavy code. The good thing is that the LLVM JIT can use a mem to reg pass to minimize a large number of this code.
It **will** be required to improve the IR further once the runtime JIT becomes a higher priority


================================================
FILE: FEXCore/include/FEXCore/Config/Config.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumOperators.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>

#include <algorithm>
#include <array>
#include <charconv>
#include <cstdint>
#include <optional>
#include <type_traits>
#include <variant>

namespace FEXCore::Config {
namespace Handler {
  static inline std::optional<fextl::string> SMCCheckHandler(std::string_view Value) {
    if (Value == "none") {
      return "0";
    } else if (Value == "mtrack") {
      return "1";
    } else if (Value == "full") {
      return "2";
    }
    return "0";
  }
} // namespace Handler

enum ConfigOption {
#define OPT_BASE(type, group, enum, json, default) CONFIG_##enum,
#include <FEXCore/Config/ConfigValues.inl>
};

#define ENUMDEFINES
#include <FEXCore/Config/ConfigOptions.inl>

enum ConfigSMCChecks {
  CONFIG_SMC_NONE,
  CONFIG_SMC_MTRACK,
  CONFIG_SMC_FULL,
};

enum class LayerType {
  LAYER_GLOBAL_MAIN, ///< /usr/share/fex-emu/Config.json by default
  LAYER_MAIN,
  LAYER_ARGUMENTS,
  LAYER_GLOBAL_STEAM_APP,
  LAYER_GLOBAL_APP,
  LAYER_LOCAL_STEAM_APP,
  LAYER_LOCAL_APP,
  LAYER_USER_OVERRIDE,
  LAYER_ENVIRONMENT,
  LAYER_TOP,
};

template<typename PairTypes, typename ArrayPairType>
static inline std::optional<fextl::string> EnumParser(const ArrayPairType& EnumPairs, const std::string_view View) {
  uint64_t EnumMask {};
  auto Results = std::from_chars(View.data(), View.data() + View.size(), EnumMask);
  if (Results.ec == std::errc()) {
    // If the data is a valid number, just pass it through.
    return std::nullopt;
  }

  auto Begin = 0;
  auto End = View.find_first_of(',');
  std::string_view Option = View.substr(Begin, End);
  while (Option.size() != 0) {
    auto EnumValue =
      std::find_if(EnumPairs.begin(), EnumPairs.end(), [Option](const PairTypes& Value) -> bool { return Value.first == Option; });

    if (EnumValue == EnumPairs.end()) {
      LogMan::Msg::IFmt("Skipping Unknown option: {}", Option);
    } else {
      EnumMask |= FEXCore::ToUnderlying(EnumValue->second);
    }

    if (End == std::string::npos) {
      break;
    }
    Begin = End + 1;
    End = View.find_first_of(',', Begin);
    Option = View.substr(Begin, End - Begin);
  }

  return fextl::fmt::format("{}", EnumMask);
}

using StringArrayType = fextl::list<fextl::string>;

namespace detail {
  template<ConfigOption Option>
  struct ConfigOptionInfo;
#define DEFINE_METAINFO(type, enum, default)             \
  template<>                                             \
  struct ConfigOptionInfo<ConfigOption::CONFIG_##enum> { \
    using Type = type;                                   \
    static auto Default() {                              \
      extern default;                                    \
      return enum;                                       \
    }                                                    \
  };
#define OPT_BASE(type, group, enum, json, default) DEFINE_METAINFO(type, enum, const type enum)
#define OPT_STR(group, enum, json, default) DEFINE_METAINFO(fextl::string, enum, const std::string_view enum)
#define OPT_STRARRAY(group, enum, json, default) DEFINE_METAINFO(StringArrayType, enum, const std::string_view enum)
#include <FEXCore/Config/ConfigValues.inl>
} // namespace detail

FEX_DEFAULT_VISIBILITY void SetDataDirectory(std::string_view Path, bool Global);
FEX_DEFAULT_VISIBILITY void SetConfigDirectory(const std::string_view Path, bool Global);
FEX_DEFAULT_VISIBILITY void SetConfigFileLocation(std::string_view Path, bool Global);

FEX_DEFAULT_VISIBILITY const fextl::string& GetDataDirectory(bool Global = false);
FEX_DEFAULT_VISIBILITY const fextl::string& GetConfigDirectory(bool Global);
FEX_DEFAULT_VISIBILITY const fextl::string& GetConfigFileLocation(bool Global = false);
FEX_DEFAULT_VISIBILITY fextl::string GetApplicationConfig(const std::string_view Program, bool Global);

using LayerValue = std::variant< fextl::string, StringArrayType, uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, uint64_t, int64_t, bool >;

using LayerOptions = fextl::unordered_map<ConfigOption, LayerValue>;

class FEX_DEFAULT_VISIBILITY Layer {
public:
  explicit Layer(const LayerType _Type);
  virtual ~Layer();

  virtual void Load() = 0;

  bool OptionExists(ConfigOption Option) const {
    return OptionMap.find(Option) != OptionMap.end();
  }

  std::optional<StringArrayType*> All(ConfigOption Option) {
    const auto it = OptionMap.find(Option);
    if (it == OptionMap.end()) {
      return std::nullopt;
    }

    auto& Value = it->second;
    LOGMAN_THROW_A_FMT(std::holds_alternative<StringArrayType>(Value), "Tried to get config of invalid type!");

    return &std::get<StringArrayType>(Value);
  }

  std::optional<fextl::string*> Get(ConfigOption Option) {
    const auto it = OptionMap.find(Option);
    if (it == OptionMap.end()) {
      return std::nullopt;
    }

    auto& Value = it->second;
    LOGMAN_THROW_A_FMT(std::holds_alternative<fextl::string>(Value), "Tried to get config of invalid type!");

    return &std::get<fextl::string>(Value);
  }

  // Set will overwrite the object with a fextl::string without tests.
  void Set(ConfigOption Option, const char* Data) {
    LOGMAN_THROW_A_FMT(Data != nullptr, "Data can't be null");
    OptionMap[Option].emplace<fextl::string>(fextl::string(Data));
  }

  void Set(ConfigOption Option, std::string_view Data) {
    OptionMap[Option].emplace<fextl::string>(fextl::string(Data));
  }

  void Set(ConfigOption Option, fextl::string Data) {
    OptionMap[Option].emplace<fextl::string>(std::move(Data));
  }

  void Set(ConfigOption Option, std::optional<fextl::string> Data) {
    if (Data) {
      OptionMap[Option].emplace<fextl::string>(std::move(*Data));
    }
  }

  // AppendStrArrayValue will append strings to its StringArrayType.
  // If the value was previously a different type, then throw an assert.
  void AppendStrArrayValue(ConfigOption Option, std::string_view Data) {
    auto it = OptionMap.find(Option);
    if (it == OptionMap.end()) {
      // If the option didn't exist as a StringArrayType yet, emplace it.
      it = OptionMap.emplace(Option, StringArrayType {}).first;
    }

    auto& Value = it->second;
    LOGMAN_THROW_A_FMT(std::holds_alternative<StringArrayType>(Value), "Tried to get config of invalid type!");
    std::get<StringArrayType>(Value).emplace_back(Data);
  }

  void Erase(ConfigOption Option) {
    OptionMap.erase(Option);
  }

  LayerType GetLayerType() const {
    return Type;
  }
  const LayerOptions& GetOptionMap() const {
    return OptionMap;
  }

protected:
  const LayerType Type;
  LayerOptions OptionMap;
};

FEX_DEFAULT_VISIBILITY void Initialize();
FEX_DEFAULT_VISIBILITY void Shutdown();

FEX_DEFAULT_VISIBILITY void Load();
FEX_DEFAULT_VISIBILITY void ReloadMetaLayer();
FEX_DEFAULT_VISIBILITY fextl::string FindContainer();
FEX_DEFAULT_VISIBILITY fextl::string FindContainerPrefix();

FEX_DEFAULT_VISIBILITY void AddLayer(fextl::unique_ptr<FEXCore::Config::Layer> _Layer);

FEX_DEFAULT_VISIBILITY bool Exists(ConfigOption Option);
FEX_DEFAULT_VISIBILITY std::optional<StringArrayType*> All(ConfigOption Option);
template<typename T>
FEX_DEFAULT_VISIBILITY std::optional<T> GetConv(ConfigOption Option);
FEX_DEFAULT_VISIBILITY std::optional<fextl::string*> Get(ConfigOption Option);
FEX_DEFAULT_VISIBILITY void Set(ConfigOption Option, std::string_view Data);
FEX_DEFAULT_VISIBILITY void Erase(ConfigOption Option);

template<typename T>
class FEX_DEFAULT_VISIBILITY Value {
public:
  // Single value type.
  template<typename TT = T>
  requires (std::is_fundamental_v<TT> || std::is_same_v<TT, fextl::string>)
  Value(FEXCore::Config::ConfigOption Option, TT Default) {
    ValueData = GetIfExists(Option, Default);
  }

  template<typename TT = T>
  requires (std::is_fundamental_v<TT> || std::is_same_v<TT, fextl::string>)
  Value(FEXCore::Config::ConfigOption Option, std::string_view Default) {
    ValueData = GetIfExists(Option, Default);
  }

  operator T() const {
    return ValueData;
  }

  T operator()() const requires (std::is_fundamental_v<T>)
  {
    return ValueData;
  }

  const fextl::string& operator()() const requires (std::is_same_v<T, fextl::string>)
  {
    return ValueData;
  }

  Value(T Value) requires (!std::is_same_v<T, StringArrayType>)
  {
    ValueData = std::move(Value);
  }

  // Array value types.
  Value(FEXCore::Config::ConfigOption Option, std::string_view) requires (std::is_same_v<T, StringArrayType>)
  {
    GetListIfExists(Option, &ValueData);
  }

  StringArrayType& All() requires (std::is_same_v<T, StringArrayType>)
  {
    return ValueData;
  }

private:
  T ValueData {};

  static T GetIfExists(FEXCore::Config::ConfigOption Option, T Default);
  static T GetIfExists(FEXCore::Config::ConfigOption Option, std::string_view Default);

  static void GetListIfExists(FEXCore::Config::ConfigOption Option, StringArrayType* List);
};

/**
 * Wrapper around Value that automatically picks the default for the given ConfigOption
 */
template<ConfigOption Option>
struct FEX_DEFAULT_VISIBILITY Getter : public Value<typename detail::ConfigOptionInfo<Option>::Type> {
  using OptionInfo = detail::ConfigOptionInfo<Option>;
  Getter()
    : Value<typename OptionInfo::Type> {Option, OptionInfo::Default()} {}
};

/**
 * Helper for reading a config value with caching.
 *
 * Typically this is used to declare class members so that the value is read
 * on construction of the parent.
 */
#define FEX_CONFIG_OPT(name, enum) FEXCore::Config::Getter<FEXCore::Config::ConfigOption::CONFIG_##enum> name {}

#define OPT_BASE(type, group, enum, json, default)                              \
  /**                                                                           \
   *  Helper for reading a config value.                                        \
   *                                                                            \
   *  In contrast to FEX_CONFIG_OPT, this can be used in arbitrary expressions, \
   *  at the expense of not caching the value. Use Getter instead if the value  \
   *  is read frequently.                                                       \
   */                                                                           \
  inline auto Get_##enum() {                                                    \
    return Getter<FEXCore::Config::ConfigOption::CONFIG_##enum> {};             \
  }
#include <FEXCore/Config/ConfigValues.inl>

} // namespace FEXCore::Config


================================================
FILE: FEXCore/include/FEXCore/Core/CPUID.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>

namespace FEXCore::CPUID {
struct FunctionResults {
  uint32_t eax, ebx, ecx, edx;
};

struct XCRResults {
  uint32_t eax, edx;
};
} // namespace FEXCore::CPUID


================================================
FILE: FEXCore/include/FEXCore/Core/CodeCache.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/functional.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/fextl/robin_map.h>

#include <atomic>
#include <cstdint>
#include <mutex>
#include <optional>
#include <shared_mutex>
#include <span>
#include <unistd.h>

namespace FEXCore {

namespace Core {
  struct InternalThreadState;
} // namespace Core

namespace HLE {
  struct SourcecodeMap;
} // namespace HLE

enum class GuestRelocationType : uint32_t {
  Rel32,
  Rel64,
  // Skip blocks containing this relocation
  Skip,
};

// Generic information associated with an executable file.
struct ExecutableFileInfo {
  ~ExecutableFileInfo();

#if __clang_major__ < 16
  // Workaround for broken aggregate-initialization with std::piecewise_construct
  ExecutableFileInfo(fextl::unique_ptr<HLE::SourcecodeMap>, uint64_t, fextl::string);
  ExecutableFileInfo() = default;
#endif

  // This legacy field must be assignable through const-references
  mutable fextl::unique_ptr<HLE::SourcecodeMap> SourcecodeMap;

  uint64_t FileId = 0;
  fextl::string Filename;
  fextl::robin_map<uint32_t, GuestRelocationType> Relocations;
};

// Information associated with a specific section of an executable file
struct ExecutableFileSectionInfo {
  const ExecutableFileInfo& FileInfo;

  // Start address that the file is mapped to.
  // NOTE: Since executable files may be mapped multiple times, this can depend on the queried section.
  uintptr_t FileStartVA;

  // Start address of the section mapping
  uintptr_t BeginVA;

  // End address that of the section mapping
  uintptr_t EndVA;
};

using CodeMapFileId = uint64_t;

/**
 * Code maps capture information required for offline code cache generation
 * and are written to disk during execution of FEX.
 *
 * Almost all CodeMap data will be an Entry that indicates blocks to be
 * compiled for cache generation. The reserved value `LoadExternalLibrary`
 * indicates that an instance of ExternalLibraryInfo follows (the entry data
 * itself should be skipped in that case).
 */
struct CodeMap {
  // Describes the location of an entry block compiled during execution
  struct FEX_PACKED Entry {
    CodeMapFileId FileId;
    uint32_t BlockOffset;
  };

  // Describes an external library referenced during execution
  struct ExternalLibraryInfo {
    CodeMapFileId ExternalFileId;

    // null-terminated file path; EITHER relative to the main executable OR an absolute path OR starting with a magic identifier:
    // - WINE/: Path to Wine/Proton installation
    // - WINEPREFIX/: Path to Wine/Proton prefix
    // - SLR/: Path to Steam Linux Runtime
    // At runtime, FEX will always dump absolute paths
    char Path[];
    // Followed by padding to a 4 byte boundary
  };

  // Followed by ExternalLibraryInfo
  static constexpr Entry LoadExternalLibrary = {0xffff'ffff'ffff'ffff, 0xffff'ffff};

  struct FEX_PACKED SetExecutableFileId {
    Entry Marker = {0xffff'ffff'ffff'ffff, 0xffff'fffe};
    CodeMapFileId ExecutableFileId;
  };

  struct ParsedContents {
    fextl::string Filename;
    fextl::set<uint64_t> Blocks;
    bool IsExecutable = false;
  };

  // Follows scheme fileid[-nomb]
  // The nomb ("no multiblock") suffix signifies that the code map is for use without multiblock, only.
  static fextl::string GetBaseFilename(const ExecutableFileInfo& MainExecutable, bool AddNombSuffix);

  static fextl::map<CodeMapFileId, ParsedContents> ParseCodeMap(std::ifstream& File);
};

struct CodeMapOpener {
  virtual ~CodeMapOpener() = default;
  virtual int OpenCodeMapFile() = 0;
};

class CodeMapWriter {
public:
  CodeMapWriter(CodeMapOpener&, bool OpenEagerly = false);
  ~CodeMapWriter();

  // Checks if writing is enabled. Calls to this functions may also be interpreted as signals that writes are about to happen
  bool IsWriteEnabled(const ExecutableFileSectionInfo&);

  void ResetAfterFork() {
    if (CodeMapFD.value_or(-1) != -1) {
      close(CodeMapFD.value());
      CodeMapFD.reset();
    }
    BufferOffset = 0;
    KnownFileIds.clear();
  }

  bool IsBackingFD(int FD) const {
    if (FD == CodeMapFD) {
      LogMan::Msg::DFmt("Hiding directory entry for code map FD");
      return true;
    }
    return false;
  }

  void AppendBlock(const FEXCore::ExecutableFileSectionInfo&, uint64_t Entry);
  void AppendLibraryLoad(const FEXCore::ExecutableFileInfo&);
  void AppendSetMainExecutable(const FEXCore::ExecutableFileInfo&);

  // Thread-safely commit any pending data to disk
  void Flush(size_t Offset);

private:
  // Queues data into an internal ring buffer.
  // Call Flush() to commit the data to disk.
  void AppendData(std::span<const std::byte> Data);

  // Commit given data range to disk
  void Flush(size_t Offset, std::unique_lock<std::shared_mutex>&);

  std::shared_mutex Mutex;
  fextl::vector<std::byte> Buffer;
  std::atomic<size_t> BufferOffset {0};

  fextl::set<CodeMapFileId> KnownFileIds;

  // std::nullopt: We haven't requested a CodeMapFD yet
  // value is -1:  We requested a CodeMapFD but FEXServer told us not to write any data
  // other values: Code map writing is active
  std::optional<int> CodeMapFD;

  CodeMapOpener& FileOpener;
};

class AbstractCodeCache {
public:
  virtual ~AbstractCodeCache() = default;

  /**
   * Computes a unique identifier for the referenced binary file to be used for
   * generating the code map.
   * This identifier is independent of FEX build/runtime configuration and
   * stable across FEX updates.
   */
  virtual uint64_t ComputeCodeMapId(std::string_view Filename, int FD) = 0;

  /**
   * Loads a code cache from mapped memory and appends it to the current Core state.
   * TODO: Optionally recompiles all contained code blocks at runtime for validation.
   * Returns false if the provided cache file is invalid, and true otherwise.
   */
  virtual bool LoadData(Core::InternalThreadState*, std::byte* MappedCacheFile, const ExecutableFileSectionInfo&) = 0;

  /**
   * Bundles the current Core state (CodeBuffer, GuestToHostMapping, ...) to a code cache and writes it to the given file descriptor.
   * Returns true on success.
   */
  virtual bool SaveData(Core::InternalThreadState&, int TargetFD, const ExecutableFileSectionInfo&, uint64_t SerializedBaseAddress) = 0;

  /**
   * Function to be called before compiling any code for caching purposes
   */
  virtual void InitiateCacheGeneration() = 0;
};

} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Core/Context.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <functional>
#include <stdint.h>

#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/Core/CPUID.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/IntervalList.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

namespace FEXCore {
struct HostFeatures;
class ForkableSharedMutex;
class ThunkHandler;
} // namespace FEXCore

namespace FEXCore::Core {
struct CPUState;
struct InternalThreadState;
} // namespace FEXCore::Core

namespace FEXCore::HLE {
class SyscallHandler;
} // namespace FEXCore::HLE

namespace FEXCore::IR {
class IREmitter;
} // namespace FEXCore::IR

namespace FEXCore::Context {

enum OperatingMode {
  MODE_32BIT,
  MODE_64BIT,
};

using CodeRangeInvalidationFn = std::function<void(uint64_t start, uint64_t Length)>;

using CustomIREntrypointHandler = std::function<void(uintptr_t Entrypoint, IR::IREmitter*)>;

using ExitHandler = std::function<void(Core::InternalThreadState* Thread)>;

class Context {
public:
  virtual ~Context() = default;
  /**
   * @brief [[threadsafe]] Create a new FEXCore context object
   *
   * This is necessary to do when running threaded contexts
   *
   * @return a new context object
   */
  FEX_DEFAULT_VISIBILITY static fextl::unique_ptr<FEXCore::Context::Context> CreateNewContext(const FEXCore::HostFeatures& Features);

  /**
   * @brief Allows setting up in memory code and other things prior to launchign code execution
   *
   * @param CTX The context that we created
   * @param Loader The loader that will be doing all the code loading
   *
   * @return true if we loaded code
   */
  FEX_DEFAULT_VISIBILITY virtual bool InitCore() = 0;

  /**
   * @brief Executes the supplied thread context on the current thread until a return is requested
   */
  FEX_DEFAULT_VISIBILITY virtual void ExecuteThread(FEXCore::Core::InternalThreadState* Thread) = 0;

  FEX_DEFAULT_VISIBILITY virtual bool CheckIfBlockIsCacheable(FEXCore::Core::InternalThreadState& Thread, uint64_t GuestRIP, uint64_t MaxInst) = 0;
  FEX_DEFAULT_VISIBILITY virtual void CompileRIP(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP) = 0;
  FEX_DEFAULT_VISIBILITY virtual void CompileRIPCount(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestRIP, uint64_t MaxInst) = 0;

  FEX_DEFAULT_VISIBILITY virtual void HandleCallback(FEXCore::Core::InternalThreadState* Thread, uint64_t RIP) = 0;

  FEX_DEFAULT_VISIBILITY virtual bool IsAddressInCurrentBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t Size) = 0;
  FEX_DEFAULT_VISIBILITY virtual bool IsCurrentBlockSingleInst(FEXCore::Core::InternalThreadState* Thread) = 0;
  FEX_DEFAULT_VISIBILITY virtual uint64_t GetGuestBlockEntry(FEXCore::Core::InternalThreadState* Thread) = 0;

  ///< State reconstruction helpers
  ///< Reconstructs the guest RIP from the passed in thread context and related Host PC.
  FEX_DEFAULT_VISIBILITY virtual uint64_t RestoreRIPFromHostPC(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC) = 0;
  /**
   * @brief Reconstructs a compacted EFLAGS from FEX's internal EFLAG representation.
   *
   * @param Thread The thread getting the state reconstructed
   * @param WasInJIT If the code was in the JIT at the time.
   * @param HostGPRs The host Arm64 GPRs at the point of state inside the JIT.
   * @param PSTATE The Arm64 PState value.
   *
   * If WasInJIT is false then HostGPRs and PSTATE is ignored, with the assumption that the FEX JIT has already stored all state in to the
   * ThreadState object.
   *
   * @return x86 EFLAGS reconstructed
   */
  FEX_DEFAULT_VISIBILITY virtual uint32_t
  ReconstructCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, bool WasInJIT, const uint64_t* HostGPRs, uint64_t PSTATE) = 0;
  ///< Sets FEX's internal EFLAGS representation to the passed in compacted form.
  FEX_DEFAULT_VISIBILITY virtual void SetFlagsFromCompactedEFLAGS(FEXCore::Core::InternalThreadState* Thread, uint32_t EFLAGS) = 0;

  FEX_DEFAULT_VISIBILITY virtual void
  ReconstructXMMRegisters(const FEXCore::Core::InternalThreadState* Thread, __uint128_t* XMM_Low, __uint128_t* YMM_High) = 0;
  FEX_DEFAULT_VISIBILITY virtual void
  SetXMMRegistersFromState(FEXCore::Core::InternalThreadState* Thread, const __uint128_t* XMM_Low, const __uint128_t* YMM_High) = 0;

  /**
   * @brief Create a new thread object that doesn't inherit any state.
   * Used to create FEX thread objects in preparation for creating a true OS thread.
   *
   * @param InitialRIP The starting RIP of this thread
   * @param StackPointer The starting RSP of this thread
   * @param NewThreadState The thread state to inherit from if not nullptr.
   *
   * @return A new InternalThreadState object for using with a new guest thread.
   */

  FEX_DEFAULT_VISIBILITY virtual FEXCore::Core::InternalThreadState*
  CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState = nullptr) = 0;

  FEX_DEFAULT_VISIBILITY virtual void DestroyThread(FEXCore::Core::InternalThreadState* Thread) = 0;
#ifndef _WIN32
  FEX_DEFAULT_VISIBILITY virtual void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) {}
  FEX_DEFAULT_VISIBILITY virtual void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) {}
#endif
  FEX_DEFAULT_VISIBILITY virtual void SetSignalDelegator(FEXCore::SignalDelegator* SignalDelegation) = 0;
  FEX_DEFAULT_VISIBILITY virtual void SetSyscallHandler(FEXCore::HLE::SyscallHandler* Handler) = 0;
  FEX_DEFAULT_VISIBILITY virtual void SetThunkHandler(FEXCore::ThunkHandler* Handler) = 0;

  FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunction(uint32_t Function, uint32_t Leaf) = 0;
  FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::XCRResults RunXCRFunction(uint32_t Function) = 0;
  FEX_DEFAULT_VISIBILITY virtual FEXCore::CPUID::FunctionResults RunCPUIDFunctionName(uint32_t Function, uint32_t Leaf, uint32_t CPU) = 0;

  virtual AbstractCodeCache& GetCodeCache() = 0;
  virtual void SetCodeMapWriter(fextl::unique_ptr<CodeMapWriter>) = 0;
  virtual void FlushAndCloseCodeMap() = 0;

  FEX_DEFAULT_VISIBILITY virtual void ClearCodeCache(FEXCore::Core::InternalThreadState* Thread, bool NewCodeBuffer = true) = 0;
  FEX_DEFAULT_VISIBILITY virtual void InvalidateCodeBuffersCodeRange(uint64_t Start, uint64_t Length) = 0;
  FEX_DEFAULT_VISIBILITY virtual void
  InvalidateThreadCachedCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) = 0;
  FEX_DEFAULT_VISIBILITY virtual FEXCore::ForkableSharedMutex& GetCodeInvalidationMutex() = 0;

  FEX_DEFAULT_VISIBILITY virtual void
  ConfigureAOTGen(FEXCore::Core::InternalThreadState* Thread, fextl::set<uint64_t>* ExternalBranches, uint64_t SectionMaxAddress) = 0;

  /**
   * @brief Checks if a PC is inside any code buffer used by the thread's JIT.
   *
   * @param Thread Which thread's code buffers to check inside of.
   * @param Address The PC to check against.
   *
   * @return true if PC is inside the thread's code buffers.
   */
  FEX_DEFAULT_VISIBILITY virtual bool IsAddressInCodeBuffer(FEXCore::Core::InternalThreadState* Thread, uintptr_t Address) const = 0;

  /**
   * @brief Informs the context if hardware TSO is supported.
   * Once hardware TSO is enabled, then TSO emulation through atomics is disabled and relies on the hardware.
   *
   * @param HardwareTSOSupported If the hardware supports the TSO memory model or not.
   */
  FEX_DEFAULT_VISIBILITY virtual void SetHardwareTSOSupport(bool HardwareTSOSupported) = 0;

  /**
   * @brief Enable exiting the JIT when HLT is hit.
   *
   * This is to workaround a bug in Wine's longjump function which breaks our unittests.
   *
   */
  FEX_DEFAULT_VISIBILITY virtual void EnableExitOnHLT() = 0;

  /**
   * @brief Adds a new Thunk trampoline handler
   *
   * @param Entrypoint The guest PC that the custom thunk trampoline IR handler will be installed at.
   * @param GuestThunkEntrypoint The thunk entrypoint that the IR handler will redirect to.
   */
  FEX_DEFAULT_VISIBILITY virtual void AddThunkTrampolineIRHandler(uintptr_t Entrypoint, uintptr_t GuestThunkEntrypoint) = 0;

  /**
   * @brief Adds additional per-instruction granularity TSO enable/disable information for the given range.
   *
   * @param ValidRanges The set of address ranges covered by this information
   * @param Instructions The set of instruction addresses within the given ranges for which TSO should be enabled
   */
  FEX_DEFAULT_VISIBILITY virtual void AddForceTSOInformation(const IntervalList<uint64_t>& ValidRanges, fextl::set<uint64_t>&& Instructions) = 0;

  FEX_DEFAULT_VISIBILITY virtual void RemoveForceTSOInformation(uint64_t Address, uint64_t Size) = 0;

  FEX_DEFAULT_VISIBILITY virtual void MarkMonoDetected() = 0;

  FEX_DEFAULT_VISIBILITY virtual void MarkMonoBackpatcherBlock(uint64_t BlockEntry) = 0;
private:
};
} // namespace FEXCore::Context


================================================
FILE: FEXCore/include/FEXCore/Core/CoreState.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/Telemetry.h>

#include <atomic>
#include <cstddef>
#include <cstring>
#include <stdint.h>
#include <string_view>
#include <type_traits>

namespace FEXCore::Core {
// Wrapper around std::atomic using std::memory_order_relaxed.
// This allows compilers to emit more performant code at the expense of visibly tearing.
// In particular, increments/decrements may visibly tear if a signal is received half-way through.
//
// Prefer std::atomic with default memory ordering unless you really know what you're doing.
// Primarily this ensure program ordering when signals are concerned.
template<typename T>
class NonAtomicRefCounter {
public:
  void Increment(T Value) {
    // Specifically avoiding fetch_add here because that will turn in to ldxr+stxr or lock xadd.
    // FEX very specifically wants to use simple loadstore instructions for this
    //
    // ARM64 ex:
    // ldr x0, [x1];
    // add x0, x0, #1;
    // str x0, [x1];
    //
    // x86-64 ex:
    // inc qword [rax];
    auto Current = AtomicVariable.load(std::memory_order_relaxed);
    AtomicVariable.store(Current + Value, std::memory_order_relaxed);
  }

  // Returns original value.
  // x86-64 needs to know the result on decrement.
  T Decrement(T Value) {
    // Specifically avoiding fetch_sub here because that will turn into ldxr+stxr or lock xadd.
    // FEX very specifically wants to use simple loadstore instructions for this
    //
    // ARM64 ex:
    // ldr x0, [x1];
    // sub x0, x0, #1;
    // str x0, [x1];
    //
    // x86-64 ex:
    // dec qword [rax];
    auto Current = AtomicVariable.load(std::memory_order_relaxed);
    AtomicVariable.store(Current - Value, std::memory_order_relaxed);
    return Current;
  }

  T Load() const {
    return AtomicVariable.load(std::memory_order_relaxed);
  }

  void Store(T Value) {
    AtomicVariable.store(Value, std::memory_order_relaxed);
  }

private:
  std::atomic<T> AtomicVariable;
};
static_assert(std::is_standard_layout_v<NonAtomicRefCounter<uint64_t>>, "Needs to be standard layout");
static_assert(std::is_trivially_copyable_v<NonAtomicRefCounter<uint64_t>>, "needs to be trivially copyable");
static_assert(sizeof(NonAtomicRefCounter<uint64_t>) == sizeof(uint64_t), "Needs to be correct size");

struct alignas(64) CPUState {
  // Allows more efficient handling of the register
  // file in the event AVX is not supported.
  union XMMRegs {
    struct AVX {
      uint64_t data[16][4];
    };
    struct SSE {
      uint64_t data[16][2];
      uint64_t pad[16][2];
    };

    AVX avx;
    SSE sse;
  };

  // Cacheline: 0
  uint64_t InlineJITBlockHeader {};
  // Reference counter for FEX's per-thread deferred signals.
  // Counts the nesting depth of program sections that cause signals to be deferred.
  NonAtomicRefCounter<uint64_t> DeferredSignalRefCount;

  // PF/AF raw values. Really only a byte of each matters, but this layout
  // (32-bits and in the first 256 bytes) is necessary to use ldp/stp to
  // spill/fill these togethers efficiently.
  // pf_raw must be initialized to 1 so that reconstructed PF = 0 (matching x86 reset state).
  // PF reconstruction: popcount(pf_raw ^ 1) & 1, so pf_raw=1 gives PF=0.
  uint32_t pf_raw {1};
  uint32_t af_raw {};

  uint64_t rip {}; ///< Current core's RIP. May not be entirely accurate while JIT is active

  uint64_t gregs[16] {};
  uint64_t L1Pointer {};
  uint64_t L1Mask {};
  uint64_t callret_sp {};
  uint64_t _pad1 {};

  // Cacheline: 1,2,3,4
  // The high 128-bits of AVX registers when not being emulated by SVE256.
  uint64_t avx_high[16][2];

  // Cacheline: 5-12
  XMMRegs xmm {};

  // Cacheline: 13 and onwards.
  // Raw segment register indexes
  uint16_t es_idx {}, cs_idx {}, ss_idx {}, ds_idx {};
  uint16_t gs_idx {}, fs_idx {};
  uint32_t mxcsr {};

  // Segment registers holding base addresses
  uint32_t es_cached {}, cs_cached {}, ss_cached {}, ds_cached {};
  uint64_t gs_cached {};
  uint64_t fs_cached {};
  uint8_t flags[48] {};
  uint64_t mm[8][2] {};

  // 32bit x86 state
  struct gdt_segment {
    uint16_t Limit0;
    uint16_t Base0;
    uint16_t Base1  : 8;
    uint16_t Type   : 4;
    uint16_t S      : 1;
    uint16_t DPL    : 2;
    uint16_t P      : 1;
    uint16_t Limit1 : 4;
    uint16_t AVL    : 1;
    uint16_t L      : 1;
    uint16_t D      : 1;
    uint16_t G      : 1;
    uint16_t Base2  : 8;
  };

  // Array of segments (Access offset matches segment selector TI bit)
  // 0 : GDT
  // 1 : LDT
  // Segments are global to the process.
  // GDT segments are only 32-objects in size.
  //   - Kernel allocates a handful of these for various things.
  //   - Three are reserved for user-space to setup TLS segments in
  // LDT segments are entirely controlled by userspace.
  //   - Kernel allocates up to 8192 ldt segments.
  gdt_segment* segment_arrays[2] {};

  static gdt_segment* GetSegmentFromIndex(CPUState& State, uint16_t Selector) {
    auto base = State.segment_arrays[(Selector >> 2) & 1];
    return &base[Selector >> 3];
  }

  static uint32_t CalculateGDTBase(gdt_segment GDT) {
    uint32_t Base {};
    Base |= GDT.Base2 << 24;
    Base |= GDT.Base1 << 16;
    Base |= GDT.Base0;
    return Base;
  }

  static uint32_t CalculateGDTLimit(gdt_segment GDT) {
    uint32_t Limit {};
    Limit |= GDT.Limit1 << 16;
    Limit |= GDT.Limit0;
    return Limit;
  }

  static void SetGDTBase(gdt_segment* GDT, uint32_t Base) {
    GDT->Base0 = Base;
    GDT->Base1 = Base >> 16;
    GDT->Base2 = Base >> 24;
  }

  static void SetGDTLimit(gdt_segment* GDT, uint32_t Limit) {
    GDT->Limit0 = Limit;
    GDT->Limit1 = Limit >> 16;
  }

  uint16_t FCW {0x37F};
  uint8_t AbridgedFTW {};

  uint8_t _pad2[5];
  // PF/AF are statically mapped as-if they were r16/r17 (which do not exist in
  // x86 otherwise). This allows a straightforward mapping for SRA.
  static constexpr uint8_t PF_AS_GREG = 16;
  static constexpr uint8_t AF_AS_GREG = 17;

  static constexpr size_t FLAG_SIZE = sizeof(flags[0]);
  static constexpr size_t GDT_SIZE = sizeof(gdt_segment);
  static_assert(GDT_SIZE == sizeof(uint64_t), "Segments required to be 8-byte in size.");
  static constexpr size_t GPR_REG_SIZE = sizeof(gregs[0]);
  static constexpr size_t XMM_AVX_REG_SIZE = sizeof(xmm.avx.data[0]);
  static constexpr size_t XMM_SSE_REG_SIZE = XMM_AVX_REG_SIZE / 2;
  static constexpr size_t MM_REG_SIZE = sizeof(mm[0]);

  // Only the first 32 bits are defined.
  static constexpr size_t NUM_EFLAG_BITS = 32;
  static constexpr size_t NUM_FLAGS = sizeof(flags) / FLAG_SIZE;
  static constexpr size_t NUM_GPRS = sizeof(gregs) / GPR_REG_SIZE;
  static constexpr size_t NUM_XMMS = sizeof(xmm) / XMM_AVX_REG_SIZE;
  static constexpr size_t NUM_MMS = sizeof(mm) / MM_REG_SIZE;
  CPUState() {
#ifndef NDEBUG
    // Initialize default CPU state
    rip = ~0ULL;
    // Initialize xmm state with garbage to catch spurious incorrect xmm usage.
    for (auto& xmm : xmm.avx.data) {
      xmm[0] = 0xDEADBEEFULL;
      xmm[1] = 0xBAD0DAD1ULL;
      xmm[2] = 0xDEADCAFEULL;
      xmm[3] = 0xBAD2CAD3ULL;
    }
#endif

    flags[X86State::RFLAG_RESERVED_LOC] = 1; ///< Reserved - Always 1.
    flags[X86State::RFLAG_IF_LOC] = 1;       ///< Interrupt flag - Always 1.

    // DF needs to be initialized to 0 to comply with the Linux ABI. However,
    // we encode DF as 1/-1 within the JIT, so we have to write 0x1 here to
    // zero DF.
    flags[X86State::RFLAG_DF_RAW_LOC] = 0x1;

    // Likewise, SF/ZF/CF/OF must be cleared. This would be simply zeroing
    // NZCV... but we invert CF inside the JIT. So set just bit 29 (carry).
    flags[X86State::RFLAG_NZCV_3_LOC] = (1 << (29 - 24));

    // Default mxcsr value
    // All exception masks enabled.
    mxcsr = 0x1F80;
  }

  // TODO: This should be moved to the frontend.
  constexpr static uint32_t DEFAULT_USER_CS = 6;

  // Follows encoding of the TI bit in segment selector encoding.
  constexpr static uint32_t SEGMENT_ARRAY_INDEX_GDT = 0;
  constexpr static uint32_t SEGMENT_ARRAY_INDEX_LDT = 1;
  Core::CPUState::gdt_segment private_gdt[32] {};
};
static_assert(std::is_trivially_copyable_v<CPUState>, "Needs to be trivial");
static_assert(std::is_standard_layout_v<CPUState>, "This needs to be standard layout");
static_assert(alignof(CPUState) == 64, "CPUState needs to be 64-byte aligned!");
static_assert(offsetof(CPUState, avx_high) % 64 == 0, "avx_high needs to be 64-byte aligned!");
static_assert(offsetof(CPUState, xmm) % 32 == 0, "xmm needs to be 256-bit aligned!");
static_assert(offsetof(CPUState, mm) % 16 == 0, "mm needs to be 128-bit aligned!");
static_assert(offsetof(CPUState, gregs[15]) <= 504, "gregs maximum offset must be <= 504 for ldp/stp to work");
static_assert(offsetof(CPUState, DeferredSignalRefCount) % 8 == 0, "Needs to be 8-byte aligned");
static_assert(offsetof(CPUState, L1Pointer) <= 504, "This needs to be <= 504 for ldp");
static_assert(offsetof(CPUState, L1Mask) == (offsetof(CPUState, L1Pointer) + 8), "These two variables are paired");
static_assert(offsetof(CPUState, pf_raw) <= 252, "pf_raw must be within ldp imm offset range");
static_assert((offsetof(CPUState, pf_raw) + 4) == offsetof(CPUState, af_raw), "pf_raw and af_raw must be sequential");

// Some CPU architectures have a penalty for alignment of ldp/stp not being 2 * <element_size>.
static_assert(offsetof(CPUState, gregs[0]) % 16 == 0, "gregs should be 16-byte aligned");
static_assert(offsetof(CPUState, pf_raw) % 8 == 0, "pf_raw must be 8-byte aligned.");

struct InternalThreadState;

enum FallbackHandlerIndex {
  OPINDEX_F80CVTTO_4 = 0,
  OPINDEX_F80CVTTO_8,
  OPINDEX_F80CVT_4,
  OPINDEX_F80CVT_8,
  OPINDEX_F80CVTINT_2,
  OPINDEX_F80CVTINT_4,
  OPINDEX_F80CVTINT_8,
  OPINDEX_F80CVTINT_TRUNC2,
  OPINDEX_F80CVTINT_TRUNC4,
  OPINDEX_F80CVTINT_TRUNC8,
  OPINDEX_F80CMP,
  OPINDEX_F80CVTTOINT_2,
  OPINDEX_F80CVTTOINT_4,

  // Unary
  OPINDEX_F80ROUND,
  OPINDEX_F80F2XM1,
  OPINDEX_F80TAN,
  OPINDEX_F80SQRT,
  OPINDEX_F80SIN,
  OPINDEX_F80COS,
  OPINDEX_F80SINCOS,
  OPINDEX_F80XTRACT_EXP,
  OPINDEX_F80XTRACT_SIG,
  OPINDEX_F80BCDSTORE,
  OPINDEX_F80BCDLOAD,

  // Binary
  OPINDEX_F80ADD,
  OPINDEX_F80SUB,
  OPINDEX_F80MUL,
  OPINDEX_F80DIV,
  OPINDEX_F80FYL2X,
  OPINDEX_F80ATAN,
  OPINDEX_F80FPREM1,
  OPINDEX_F80FPREM,
  OPINDEX_F80SCALE,

  // Double Precision
  OPINDEX_F64SIN,
  OPINDEX_F64COS,
  OPINDEX_F64SINCOS,
  OPINDEX_F64TAN,
  OPINDEX_F64ATAN,
  OPINDEX_F64F2XM1,
  OPINDEX_F64FYL2X,
  OPINDEX_F64FPREM,
  OPINDEX_F64FPREM1,
  OPINDEX_F64SCALE,

  // SSE4.2 string instructions
  OPINDEX_VPCMPESTRX,
  OPINDEX_VPCMPISTRX,

  // Maximum
  OPINDEX_MAX,
};

struct FallbackABIInfo {
  uint64_t ABIHandler;
  uint64_t Func;
};

struct JITPointers {

  // Process specific
  uint64_t PrintValue {};
  uint64_t PrintVectorValue {};
  uint64_t ThreadRemoveCodeEntryFromJIT {};
  uint64_t CPUIDObj {};
  uint64_t CPUIDFunction {};
  uint64_t XCRFunction {};
  uint64_t SyscallHandlerObj {};
  uint64_t SyscallHandlerFunc {};
  uint64_t ExitFunctionLink {};
  uint64_t MonoBackpatcherWrite {};
  uint64_t LUDIV {};
  uint64_t LDIV {};
  uint64_t ThunkCallbackRet {};

  // Handles returning/calling ARM64EC code from the JIT, expects the target PC in TMP3
  uint64_t ExitFunctionEC {};

  FallbackABIInfo FallbackHandlerPointers[FallbackHandlerIndex::OPINDEX_MAX];
  uint64_t NamedVectorConstantPointers[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX];
  uint64_t IndexedNamedVectorConstantPointers[FEXCore::IR::IndexNamedVectorConstant::INDEXED_NAMED_VECTOR_MAX];
  uint64_t TelemetryValueAddresses[FEXCore::Telemetry::TYPE_LAST];

  /**
   * @name Dispatcher pointers
   * @{ */
  uint64_t DispatcherLoopTop {};
  uint64_t DispatcherLoopTopFillSRA {};
  uint64_t DispatcherLoopTopEnterEC {};
  uint64_t DispatcherLoopTopEnterECFillSRA {};
  uint64_t ExitFunctionLinker {};
  uint64_t ThreadStopHandlerSpillSRA {};
  uint64_t ThreadPauseHandlerSpillSRA {};
  uint64_t GuestSignal_SIGILL {};
  uint64_t GuestSignal_SIGTRAP {};
  uint64_t GuestSignal_SIGSEGV {};
  uint64_t SignalReturnHandler {};
  uint64_t SignalReturnHandlerRT {};
  uint64_t L2Pointer {};
  uint64_t LUDIVHandler {};
  uint64_t LDIVHandler {};
  uint64_t F64SinHandler {};
  uint64_t F64CosHandler {};
  uint64_t F64TanHandler {};
  /**  @} */

  // Copy of process-wide named vector constants data.
  alignas(16) uint64_t NamedVectorConstants[FEXCore::IR::NamedVectorConstant::NAMED_VECTOR_CONST_POOL_MAX][2];
};

// Each guest JIT frame has one of these
struct CpuStateFrame {
  CPUState State;

  /**
   * @brief Stack location for the CPU backends to return the stack pointer to
   *
   * Allows the CPU cores to do a long jump out of their execution and safely shut down
   */
  uint64_t ReturningStackLocation {};

  /**
   * @brief If we are in an inline syscall we need to store a bit of additional information about this
   *
   * ARM64:
   *  - Bit 15: In syscall
   *  - Bit 14-0: Number of static registers spilled
   */
  uint64_t InSyscallInfo {};

  uint32_t SignalHandlerRefCounter {};

  struct alignas(8) SynchronousFaultDataStruct {
    bool FaultToTopAndGeneratedException {};
    uint8_t Signal;
    uint8_t TrapNo;
    uint8_t si_code;
    uint16_t err_code;
    uint16_t _pad : 16;
  } SynchronousFaultData;

  InternalThreadState* Thread;

#ifdef ARCHITECTURE_arm64ec
  // Set by the kernel on ARM64EC whenever the JIT should cooperatively suspend running guest code.
  uint32_t SuspendDoorbell {};
#endif

  // Pointers that the JIT needs to load to remove relocations
  JITPointers Pointers;
};
static_assert(offsetof(CpuStateFrame, State) == 0, "CPUState must be first member in CpuStateFrame");
static_assert(offsetof(CpuStateFrame, Pointers) % 8 == 0, "JITPointers need to be aligned to 8 bytes");
static_assert(offsetof(CpuStateFrame, Pointers) + sizeof(CpuStateFrame::Pointers) <= 32760, "JITPointers maximum pointer needs to be less "
                                                                                            "than architecture maximum 32768");

static_assert(std::is_standard_layout<CpuStateFrame>::value, "This needs to be standard layout");
static_assert(sizeof(CpuStateFrame::SynchronousFaultData) == 8, "This needs to be 8 bytes");
static_assert(alignof(CpuStateFrame::SynchronousFaultDataStruct) == 8, "This needs to be 8 bytes");
static_assert(offsetof(CpuStateFrame, SynchronousFaultData) % 8 == 0, "This needs to be aligned");
} // namespace FEXCore::Core


================================================
FILE: FEXCore/include/FEXCore/Core/HostFeatures.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/vector.h>
#include <cstdint>

namespace FEXCore {
struct HostFeatures {
  /**
   * @brief Backend features that change how codegen is generated from IR
   *
   * Specifically things that affect the IR->Codegen process
   * Not the x86->IR process
   */
  uint32_t DCacheLineSize {};
  uint32_t ICacheLineSize {};
  bool SupportsCacheMaintenanceOps {};
  bool SupportsAES {};
  bool SupportsCRC {};
  bool SupportsCLZERO {};
  bool SupportsAtomics {};
  bool SupportsRCPC {};
  bool SupportsTSOImm9 {};
  bool SupportsRAND {};
  bool SupportsAVX {};
  bool SupportsSVE128 {};
  bool SupportsSVE256 {};
  bool SupportsSHA {};
  bool SupportsPMULL_128Bit {};
  bool SupportsCSSC {};
  bool SupportsFCMA {};
  bool SupportsFlagM {};
  bool SupportsFlagM2 {};
  bool SupportsRPRES {};
  bool SupportsPreserveAllABI {};
  bool SupportsAES256 {};
  bool SupportsSVEBitPerm {};
  bool SupportsCPUIndexInTPIDRRO {};
  bool SupportsFRINTTS {};
  bool SupportsECV {};
  bool SupportsWFXT {};
  bool Supports3DNow {};
  bool SupportsSSE4a {};
  bool SupportsMOPS {};

  // Float exception behaviour
  bool SupportsAFP {};
  bool SupportsFloatExceptions {};

  // Flag if this is InstCountCI
  bool IsInstCountCI {};

  // MIDR information
  // Also used for determining number of CPU cores for CPUID
  fextl::vector<uint32_t> CPUMIDRs;
};
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Core/SignalDelegator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <array>
#include <cstdint>
#include <csignal>

namespace FEXCore {
namespace Core {
  struct InternalThreadState;

  enum SignalNumber {
#ifndef _WIN32
    FAULT_SIGSEGV = SIGSEGV,
    FAULT_SIGTRAP = SIGTRAP,
    FAULT_SIGILL = SIGILL,
#else
    FAULT_SIGSEGV = 11,
    FAULT_SIGTRAP = 5,
    FAULT_SIGILL = 4,
#endif
  };
} // namespace Core

struct SignalDelegatorConfig {
  using SRAIndexMapping = std::array<uint8_t, 16>;

  // Dispatcher information
  uint64_t DispatcherBegin;
  uint64_t DispatcherEnd;

  // Dispatcher entrypoint.
  uint64_t AbsoluteLoopTopAddress {};
  uint64_t AbsoluteLoopTopAddressFillSRA {};

  // Signal return pointers.
  uint64_t SignalHandlerReturnAddress {};
  uint64_t SignalHandlerReturnAddressRT {};

  // Pause handlers.
  uint64_t PauseReturnInstruction {};
  uint64_t ThreadPauseHandlerAddressSpillSRA {};
  uint64_t ThreadPauseHandlerAddress {};

  // Stop handlers.
  uint64_t ThreadStopHandlerAddressSpillSRA;
  uint64_t ThreadStopHandlerAddress {};

  // SRA information.
  uint16_t SRAGPRCount;
  uint16_t SRAFPRCount;

  // SRA index mapping.
  SRAIndexMapping SRAGPRMapping;
  SRAIndexMapping SRAFPRMapping;
};

class SignalDelegator {
public:
  virtual ~SignalDelegator() = default;

  void SetConfig(const SignalDelegatorConfig& Config) {
    this->Config = Config;
  }

  const SignalDelegatorConfig& GetConfig() const {
    return Config;
  }

  virtual uintptr_t GetThunkCallbackRET() const {
    return 0;
  }

protected:
  SignalDelegatorConfig Config;
};
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Core/Thunks.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|thunks
$end_info$
*/

#pragma once

namespace FEXCore::IR {
struct SHA256Sum;
}

namespace FEXCore {
typedef void ThunkedFunction(void* ArgsRv);

class ThunkHandler {
public:
  virtual ~ThunkHandler() = default;
  virtual ThunkedFunction* LookupThunk(const IR::SHA256Sum& sha256) = 0;
};
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Core/X86Enums.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>

namespace FEXCore::X86State {
/**
 * @name The ordered of the GPRs from name to index
 * @{ */
enum X86Reg : uint32_t {
  REG_RAX = 0,
  REG_RCX = 1,
  REG_RDX = 2,
  REG_RBX = 3,
  REG_RSP = 4,
  REG_RBP = 5,
  REG_RSI = 6,
  REG_RDI = 7,
  REG_R8 = 8,
  REG_R9 = 9,
  REG_R10 = 10,
  REG_R11 = 11,
  REG_R12 = 12,
  REG_R13 = 13,
  REG_R14 = 14,
  REG_R15 = 15,
  REG_XMM_0 = 16,
  REG_XMM_1 = 17,
  REG_XMM_2 = 18,
  REG_XMM_3 = 19,
  REG_XMM_4 = 20,
  REG_XMM_5 = 21,
  REG_XMM_6 = 22,
  REG_XMM_7 = 23,
  REG_XMM_8 = 24,
  REG_XMM_9 = 25,
  REG_XMM_10 = 26,
  REG_XMM_11 = 27,
  REG_XMM_12 = 28,
  REG_XMM_13 = 29,
  REG_XMM_14 = 30,
  REG_XMM_15 = 31,
  REG_MM_0 = 32,
  REG_MM_1 = 33,
  REG_MM_2 = 34,
  REG_MM_3 = 35,
  REG_MM_4 = 36,
  REG_MM_5 = 37,
  REG_MM_6 = 38,
  REG_MM_7 = 39,
  REG_INVALID = 255,
};
/**  @} */

/**
 * @name RFLAG register bit locations
 * @{ */
enum X86RegLocation : uint32_t {
  RFLAG_CF_RAW_LOC = 0,   // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_RESERVED_LOC = 1, // Reserved Bit, Read-as-1
  RFLAG_PF_RAW_LOC = 2,   // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_AF_RAW_LOC = 4,   // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_ZF_RAW_LOC = 6,   // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_SF_RAW_LOC = 7,   // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_TF_RAW_LOC = 8,   // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_IF_LOC = 9,
  RFLAG_DF_RAW_LOC = 10, // Contains multiple bits, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_OF_RAW_LOC = 11, // Not used directly, needs to be reconstructed using `ReconstructCompactedEFLAGS`
  RFLAG_IOPL_LOC = 12,
  RFLAG_NT_LOC = 14,
  RFLAG_RF_LOC = 16,
  RFLAG_VM_LOC = 17,
  RFLAG_AC_LOC = 18,
  RFLAG_VIF_LOC = 19,
  RFLAG_VIP_LOC = 20,
  RFLAG_ID_LOC = 21,

  // So we can implement arm64-like flag manipulaton on the x86 jit..
  // SF/ZF/CF/OF packed into a 32-bit word, matching arm64's NZCV structure (not semantics).
  RFLAG_NZCV_LOC = 24,
  RFLAG_NZCV_1_LOC = 25,
  RFLAG_NZCV_2_LOC = 26,
  RFLAG_NZCV_3_LOC = 27,

  // So we can share flag handling logic, we put x87 flags after RFLAGS
  X87FLAG_BASE = 32,
  X87FLAG_IE_LOC = 32,
  X87FLAG_DE_LOC = 33,
  X87FLAG_ZE_LOC = 34,
  X87FLAG_OE_LOC = 35,
  X87FLAG_UE_LOC = 36,
  X87FLAG_PE_LOC = 37,
  X87FLAG_SF_LOC = 38,
  X87FLAG_ES_LOC = 39,
  X87FLAG_C0_LOC = 40,
  X87FLAG_C1_LOC = 41,
  X87FLAG_C2_LOC = 42,
  X87FLAG_TOP_LOC = 43, // 3 Bits wide
  X87FLAG_C3_LOC = 46,
  X87FLAG_B_LOC = 47,
};

// X86 trap number definitions
enum X86TrapNo : uint32_t {
  X86_TRAPNO_DE = 0,        // Divide-by-zero
  X86_TRAPNO_DB = 1,        // Debug
  X86_TRAPNO_NMI = 2,       // Non-maskable interrupt
  X86_TRAPNO_BP = 3,        // Breakpoint
  X86_TRAPNO_OF = 4,        // Overflow
  X86_TRAPNO_BR = 5,        // Bound range exceeded
  X86_TRAPNO_UD = 6,        // Invalid opcode
  X86_TRAPNO_NM = 7,        // Device not available
  X86_TRAPNO_DF = 8,        // Double fault
  X86_TRAPNO_OLD_MF = 9,    // Coprocessor segment overrun
  X86_TRAPNO_TS = 10,       // Invalid TSS
  X86_TRAPNO_NP = 11,       // Segment not present
  X86_TRAPNO_SS = 12,       // Stack segmentation fault
  X86_TRAPNO_GP = 13,       // General Protection fault
  X86_TRAPNO_PF = 14,       // Page fault
  X86_TRAPNO_SPURIOUS = 15, // Spurious interrupt
  X86_TRAPNO_MF = 16,       // X87 float exception
  X86_TRAPNO_AC = 17,       // Alignment check
  X86_TRAPNO_MC = 18,       // Machine check
  X86_TRAPNO_XF = 19,       // SIMD floating point exception
  X86_TRAPNO_VE = 20,       // Virtualization exception
  X86_TRAPNO_CP = 21,       // Control protection exception
  X86_TRAPNO_VC = 29,       // VMM communication exception
  X86_TRAPNO_IRET = 32,     // IRET exception
};

// X86 page fault error code bits
// Populates siginfo gregs[REG_ERR]
enum X86PageFaultBit : uint32_t {
  X86_PF_PROT = (1 << 0),  // 0: No page found 1: protection fault
  X86_PF_WRITE = (1 << 1), // 0: Access was read 1: Access was write
  X86_PF_USER = (1 << 2),  // 0: Kernel mode access 1: user-mode access
  X86_PF_RSV = (1 << 3),   // 1: Reserved bit?
  X86_PF_INSTR = (1 << 4), // 1: Fault from instruction fetch
  X86_PF_PK = (1 << 5),    // 1: Protection keys block access
  X86_PF_SGX = (1 << 6),   // 1: SGX MMU fault
};

} // namespace FEXCore::X86State


================================================
FILE: FEXCore/include/FEXCore/Debug/GDBReaderInterface.h
================================================
// SPDX-License-Identifier: MIT
#include <cstddef>
#include <cstdint>

#include <gdb/jit-reader.h>

// everything is stored inline as it is marshaled cross process by gdb

struct blocks_t {
  char name[512];
  GDB_CORE_ADDR start;
  GDB_CORE_ADDR end;
};

struct info_t {
  char filename[512];

  ptrdiff_t blocks_ofs;
  ptrdiff_t lines_ofs;

  int nblocks;
  int nlines;
};


================================================
FILE: FEXCore/include/FEXCore/Debug/InternalThreadState.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Utils/AllocatorHooks.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/LongJump.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/vector.h>

#include <cstddef>
#include <cstdint>
#include <shared_mutex>
#include <type_traits>

namespace FEXCore {
class LookupCache;
class CompileService;
struct JITSymbolBuffer;
} // namespace FEXCore

namespace FEXCore::Context {
class Context;
}

namespace FEXCore::CPU {
class CPUBackend;
} // namespace FEXCore::CPU

namespace FEXCore::Frontend {
class Decoder;
}

namespace FEXCore::IR {
class OpDispatchBuilder;
class PassManager;
} // namespace FEXCore::IR

namespace FEXCore::SHMStats {
struct ThreadStats;
};

namespace FEXCore::Core {

// Special-purpose replacement for std::unique_ptr to allow InternalThreadState to be standard layout.
// Since a NonMovableUniquePtr is neither copyable nor movable, its only function is to own and release the contained object.
template<typename T>
struct NonMovableUniquePtr {
  NonMovableUniquePtr() noexcept = default;
  NonMovableUniquePtr(const NonMovableUniquePtr&) = delete;
  NonMovableUniquePtr& operator=(const NonMovableUniquePtr& UPtr) = delete;

  NonMovableUniquePtr& operator=(fextl::unique_ptr<T> UPtr) noexcept {
    Ptr = UPtr.release();
    return *this;
  }

  ~NonMovableUniquePtr() {
    fextl::default_delete<T> {}(Ptr);
  }

  T* operator->() const noexcept {
    return Ptr;
  }

  std::add_lvalue_reference_t<T> operator*() const noexcept {
    return *Ptr;
  }

  T* get() const noexcept {
    return Ptr;
  }

  explicit operator bool() const noexcept {
    return Ptr != nullptr;
  }

private:
  T* Ptr = nullptr;
};
static_assert(!std::is_move_constructible_v<NonMovableUniquePtr<int>>);
static_assert(!std::is_move_assignable_v<NonMovableUniquePtr<int>>);

// Store used for unaligned LDAXR*/STLXR* emulation.
struct UnalignedExclusiveStore {
  uint64_t Addr;
  uint64_t Store;
  uint8_t Size;
};

struct alignas(FEXCore::Utils::FEX_PAGE_SIZE) InternalThreadState : public FEXCore::Allocator::FEXAllocOperators {
  FEXCore::Core::CpuStateFrame* const CurrentFrame = &BaseFrameState;

  FEXCore::Context::Context* const CTX;

  NonMovableUniquePtr<FEXCore::IR::OpDispatchBuilder> OpDispatcher;

  NonMovableUniquePtr<FEXCore::CPU::CPUBackend> CPUBackend;
  NonMovableUniquePtr<FEXCore::LookupCache> LookupCache;

  NonMovableUniquePtr<FEXCore::Frontend::Decoder> FrontendDecoder;
  NonMovableUniquePtr<FEXCore::IR::PassManager> PassManager;
  NonMovableUniquePtr<JITSymbolBuffer> SymbolBuffer;

  std::shared_ptr<FEXCore::CompileService> CompileService;

  std::shared_mutex ObjectCacheRefCounter {};

  // This pointer is owned by the frontend.
  FEXCore::SHMStats::ThreadStats* ThreadStats {};

  UnalignedExclusiveStore ExclusiveStore;

  ///< Data pointer for exclusive use by the frontend
  void* FrontendPtr;

  static constexpr size_t CALLRET_STACK_SIZE {0x400000};

  // The low address of the call-ret stack allocation (not including guard pages)
  void* CallRetStackBase {};

  uintptr_t JITGuardPage {};
  uint64_t JITGuardOverflowArgument {};
  FEXCore::UncheckedLongJump::JumpBuf RestartJump;

  // BaseFrameState should always be at the end, directly before the interrupt fault page
  FEXCore::Core::CpuStateFrame BaseFrameState {};

  // Can be reprotected as RO to trigger an interrupt at generated code block entrypoints
  alignas(FEXCore::Utils::FEX_PAGE_SIZE) uint8_t InterruptFaultPage[FEXCore::Utils::FEX_PAGE_SIZE];
};
static_assert(std::is_standard_layout_v<FEXCore::Core::InternalThreadState>);
static_assert((offsetof(FEXCore::Core::InternalThreadState, InterruptFaultPage) - offsetof(FEXCore::Core::InternalThreadState, BaseFrameState)) <
                FEXCore::Utils::FEX_PAGE_SIZE,
              "Fault page is outside of immediate range from CPU state");
static_assert(sizeof(FEXCore::Core::InternalThreadState) == (FEXCore::Utils::FEX_PAGE_SIZE * 2));

} // namespace FEXCore::Core


================================================
FILE: FEXCore/include/FEXCore/HLE/SourcecodeResolver.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <memory>
#include <filesystem>

namespace FEXCore::HLE {

struct SourcecodeLineMapping {
  uintptr_t FileGuestBegin;
  uintptr_t FileGuestEnd;

  int LineNumber;
};

struct SourcecodeSymbolMapping {
  uintptr_t FileGuestBegin;
  uintptr_t FileGuestEnd;

  fextl::string Name;

  static fextl::string SymName(const SourcecodeSymbolMapping* Sym, const fextl::string& GuestFilename, uintptr_t HostEntry, uintptr_t FileBegin) {
    if (Sym) {
      auto SymOffset = FileBegin - Sym->FileGuestBegin;
      if (SymOffset) {
        return fextl::fmt::format("{}: {}+{} @{:x}", std::filesystem::path(GuestFilename).stem().string(), Sym->Name, SymOffset, HostEntry);
      } else {
        return fextl::fmt::format("{}: {} @{:x}", std::filesystem::path(GuestFilename).stem().string(), Sym->Name, HostEntry);
      }
    } else {
      return fextl::fmt::format("{}: +{} @{:x}", std::filesystem::path(GuestFilename).stem().string(), FileBegin, HostEntry);
    }
  }
};

struct SourcecodeMap {
  fextl::string SourceFile;
  fextl::vector<SourcecodeLineMapping> SortedLineMappings;
  fextl::vector<SourcecodeSymbolMapping> SortedSymbolMappings;

  template<typename F>
  void IterateLineMappings(uintptr_t FileBegin, uintptr_t Size, const F& Callback) const {
    auto Begin = FileBegin;
    auto End = FileBegin + Size;

    auto Found = std::lower_bound(SortedLineMappings.cbegin(), SortedLineMappings.cend(), Begin,
                                  [](const auto& Range, const auto Position) { return Range.FileGuestEnd <= Position; });

    while (Found != SortedLineMappings.cend()) {
      if (Found->FileGuestBegin < End && Found->FileGuestEnd > Begin) {
        Callback(Found);
      } else {
        break;
      }
      Found++;
    }
  }

  const SourcecodeLineMapping* FindLineMapping(uintptr_t FileBegin) const {
    return Find(FileBegin, SortedLineMappings);
  }

  const SourcecodeSymbolMapping* FindSymbolMapping(uintptr_t FileBegin) const {
    return Find(FileBegin, SortedSymbolMappings);
  }
private:
  template<typename VecT>
  const typename VecT::value_type* Find(uintptr_t FileBegin, const VecT& SortedMappings) const {
    auto Found = std::lower_bound(SortedMappings.cbegin(), SortedMappings.cend(), FileBegin,
                                  [](const auto& Range, const auto Position) { return Range.FileGuestEnd <= Position; });

    if (Found != SortedMappings.end() && Found->FileGuestBegin <= FileBegin && Found->FileGuestEnd > FileBegin) {
      return &(*Found);
    } else {
      return {};
    }
  }
};

class SourcecodeResolver {
public:
  virtual fextl::unique_ptr<SourcecodeMap> GenerateMap(std::string_view GuestBinaryFile, std::string_view GuestBinaryFileId) = 0;
};
} // namespace FEXCore::HLE


================================================
FILE: FEXCore/include/FEXCore/HLE/SyscallHandler.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>
#include <optional>

#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/fextl/string.h>

namespace FEXCore::Context {
class Context;
}

namespace FEXCore::Core {
struct InternalThreadState;
struct CpuStateFrame;
} // namespace FEXCore::Core

namespace FEXCore::HLE {
struct SyscallArguments {
  static constexpr std::size_t MAX_ARGS = 7;
  uint64_t Argument[MAX_ARGS];
};

struct SyscallABI {
  // Expectation is that the backend will be aware of how to modify the arguments based on numbering
  // Only GPRs expected
  uint8_t NumArgs;
  // If the syscall has a return then it should be stored in the ABI specific syscall register
  // Linux = RAX
  bool HasReturn;

  int32_t HostSyscallNumber;
};

enum class SyscallOSABI {
  OS_UNKNOWN,
  OS_LINUX64,
  OS_LINUX32,
  OS_GENERIC, // No JIT-side argument handling, spill/fill all regs.
};

struct ExecutableRangeInfo {
  uint64_t Base;
  uint64_t Size;
  bool Writable;
};

class SyscallHandler;
class SourcecodeResolver;

class SyscallHandler {
public:
  virtual ~SyscallHandler() = default;

  virtual uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) = 0;

  SyscallOSABI GetOSABI() const {
    return OSABI;
  }
  virtual void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) {}
  virtual void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) {}
  virtual void MarkOvercommitRange(uint64_t Start, uint64_t Length) {}
  virtual void UnmarkOvercommitRange(uint64_t Start, uint64_t Length) {}
  virtual ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) = 0;
  virtual std::optional<ExecutableFileSectionInfo> LookupExecutableFileSection(Core::InternalThreadState* Thread, uint64_t GuestAddr) = 0;

  virtual void PreCompile() {}

  virtual SourcecodeResolver* GetSourcecodeResolver() {
    return nullptr;
  }

  virtual void SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame) {}

protected:
  SyscallOSABI OSABI;
};
} // namespace FEXCore::HLE


================================================
FILE: FEXCore/include/FEXCore/IR/IR.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/EnumOperators.h>

#include <compare>
#include <cstdint>
#include <cstring>

namespace FEXCore::IR {

// This enum of named vector constants are linked to an array in CPUBackend.cpp.
// This is used with the IROp `LoadNamedVectorConstant` to load a vector constant
// that would otherwise be costly to materialize.
enum NamedVectorConstant : uint8_t {
  NAMED_VECTOR_INCREMENTAL_U16_INDEX = 0,
  NAMED_VECTOR_INCREMENTAL_U16_INDEX_UPPER,
  NAMED_VECTOR_PADDSUBPS_INVERT,
  NAMED_VECTOR_PADDSUBPS_INVERT_UPPER,
  NAMED_VECTOR_PADDSUBPD_INVERT,
  NAMED_VECTOR_PADDSUBPD_INVERT_UPPER,
  NAMED_VECTOR_PSUBADDPS_INVERT,
  NAMED_VECTOR_PSUBADDPS_INVERT_UPPER,
  NAMED_VECTOR_PSUBADDPD_INVERT,
  NAMED_VECTOR_PSUBADDPD_INVERT_UPPER,
  NAMED_VECTOR_MOVMSKPS_SHIFT,
  NAMED_VECTOR_AESKEYGENASSIST_SWIZZLE,
  NAMED_VECTOR_BLENDPS_0110B,
  NAMED_VECTOR_BLENDPS_0111B,
  NAMED_VECTOR_BLENDPS_1001B,
  NAMED_VECTOR_BLENDPS_1011B,
  NAMED_VECTOR_BLENDPS_1101B,
  NAMED_VECTOR_BLENDPS_1110B,
  NAMED_VECTOR_MOVMASKB,
  NAMED_VECTOR_MOVMASKB_UPPER,

  NAMED_VECTOR_X87_ONE,
  NAMED_VECTOR_X87_LOG2_10,
  NAMED_VECTOR_X87_LOG2_E,
  NAMED_VECTOR_X87_PI,
  NAMED_VECTOR_X87_LOG10_2,
  NAMED_VECTOR_X87_LOG_2,

  NAMED_VECTOR_CVTMAX_F32_I32,
  NAMED_VECTOR_CVTMAX_F32_I32_UPPER,
  NAMED_VECTOR_CVTMAX_F32_I64,
  NAMED_VECTOR_CVTMAX_F64_I32,
  NAMED_VECTOR_CVTMAX_F64_I32_UPPER,
  NAMED_VECTOR_CVTMAX_F64_I64,
  NAMED_VECTOR_CVTMAX_I32,
  NAMED_VECTOR_CVTMAX_I64,
  NAMED_VECTOR_F80_SIGN_MASK,
  NAMED_VECTOR_SHA1RNDS_K0,
  NAMED_VECTOR_SHA1RNDS_K1,
  NAMED_VECTOR_SHA1RNDS_K2,
  NAMED_VECTOR_SHA1RNDS_K3,

  NAMED_VECTOR_CONST_POOL_MAX,
  // Beginning of named constants that don't have a constant pool backing.
  NAMED_VECTOR_ZERO = NAMED_VECTOR_CONST_POOL_MAX,
  NAMED_VECTOR_MAX,
};

// This enum of named vector constants are linked to an array in CPUBackend.cpp.
// This is used with the IROp `LoadNamedVectorIndexedConstant` to load a vector constant
// that would otherwise be costly to materialize.
enum IndexNamedVectorConstant : uint8_t {
  INDEXED_NAMED_VECTOR_PSHUFLW = 0,
  INDEXED_NAMED_VECTOR_PSHUFHW,
  INDEXED_NAMED_VECTOR_PSHUFD,
  INDEXED_NAMED_VECTOR_SHUFPS,
  INDEXED_NAMED_VECTOR_DPPS_MASK,
  INDEXED_NAMED_VECTOR_DPPD_MASK,
  INDEXED_NAMED_VECTOR_PBLENDW,
  INDEXED_NAMED_VECTOR_MAX,
};

struct SHA256Sum final {
  uint8_t data[32];
  [[nodiscard]] auto operator<=>(const SHA256Sum&) const noexcept = default;
};

typedef void ThunkedFunction(void* ArgsRv);

struct ThunkDefinition final {
  SHA256Sum Sum;
  ThunkedFunction* ThunkFunction;
};

} // namespace FEXCore::IR


================================================
FILE: FEXCore/include/FEXCore/Utils/Allocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <sys/types.h>

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEXCore::Allocator {
FEX_DEFAULT_VISIBILITY void SetupHooks(size_t PageSize);
FEX_DEFAULT_VISIBILITY void ClearHooks();

FEX_DEFAULT_VISIBILITY size_t DetermineVASize();

#ifdef GLIBC_ALLOCATOR_FAULT
// Glibc hooks should only fault once we are in main.
// Required since glibc allocator hooking will catch things before FEX has control.
FEX_DEFAULT_VISIBILITY void SetupFaultEvaluate();
// Glibc hook faulting needs to be disabled when leaving main.
// Required since glibc does some state teardown after main.
FEX_DEFAULT_VISIBILITY void ClearFaultEvaluate();

class FEX_DEFAULT_VISIBILITY YesIKnowImNotSupposedToUseTheGlibcAllocator final {
public:
  FEX_DEFAULT_VISIBILITY YesIKnowImNotSupposedToUseTheGlibcAllocator();
  FEX_DEFAULT_VISIBILITY ~YesIKnowImNotSupposedToUseTheGlibcAllocator();
  FEX_DEFAULT_VISIBILITY static void HardDisable();
};

class FEX_DEFAULT_VISIBILITY GLIBCScopedFault final {
public:
  GLIBCScopedFault() {
    FEXCore::Allocator::SetupFaultEvaluate();
  }
  ~GLIBCScopedFault() {
    FEXCore::Allocator::ClearFaultEvaluate();
  }
};
#else
FEX_DEFAULT_VISIBILITY inline void SetupFaultEvaluate() {}
FEX_DEFAULT_VISIBILITY inline void ClearFaultEvaluate() {}

class FEX_DEFAULT_VISIBILITY YesIKnowImNotSupposedToUseTheGlibcAllocator final {
public:
  FEX_DEFAULT_VISIBILITY YesIKnowImNotSupposedToUseTheGlibcAllocator() {}
  FEX_DEFAULT_VISIBILITY ~YesIKnowImNotSupposedToUseTheGlibcAllocator() {}
  FEX_DEFAULT_VISIBILITY static inline void HardDisable() {}
};

class FEX_DEFAULT_VISIBILITY GLIBCScopedFault final {
public:
  GLIBCScopedFault() {
    // nop
  }
  ~GLIBCScopedFault() {
    // nop
  }
};
#endif

struct MemoryRegion {
  void* Ptr;
  size_t Size;
};

FEX_DEFAULT_VISIBILITY fextl::vector<MemoryRegion> CollectMemoryGaps(uintptr_t Begin, uintptr_t End, int MapsFD);
FEX_DEFAULT_VISIBILITY fextl::vector<MemoryRegion> StealMemoryRegion(uintptr_t Begin, uintptr_t End);
FEX_DEFAULT_VISIBILITY void ReclaimMemoryRegion(const fextl::vector<MemoryRegion>& Regions);
// When running a 64-bit executable on ARM then userspace guest only gets 47 bits of VA
// This is a feature of x86-64 where the kernel gets a full 128TB of VA space
// x86-64 canonical addresses with bit 48 set will sign extend the address (Ignoring LA57)
// AArch64 canonical addresses are only up to bits 48/52 with the remainder being other things
// Use this to reserve the top 128TB of VA so the guest never see it
// Returns nullptr on host VA < 48bits
FEX_DEFAULT_VISIBILITY fextl::vector<MemoryRegion> Setup48BitAllocatorIfExists(size_t PageSize);

#ifndef _WIN32
FEX_DEFAULT_VISIBILITY void RegisterTLSData(FEXCore::Core::InternalThreadState* Thread);
FEX_DEFAULT_VISIBILITY void UninstallTLSData(FEXCore::Core::InternalThreadState* Thread);
#endif
} // namespace FEXCore::Allocator


================================================
FILE: FEXCore/include/FEXCore/Utils/AllocatorHooks.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/EnumOperators.h>
#include <FEXCore/Utils/LogManager.h>

#ifndef _WIN32
#include <stdlib.h>
#include <malloc.h>
#include <sys/mman.h>
#else
#define NTDDI_VERSION 0x0A000005
#include <memoryapi.h>
#endif

#include <new>
#include <cstddef>
#include <cstdint>
#include <sys/types.h>

namespace FEXCore::Allocator {
enum class ProtectOptions : uint32_t {
  None = 0,
  Read = (1U << 0),
  Write = (1U << 1),
  Exec = (1U << 2),
};
FEX_DEF_NUM_OPS(ProtectOptions)

enum class THPControl {
  Enable,
  Disable,
};

#ifdef _WIN32
inline void* VirtualAlloc(void* Base, size_t Size, bool Execute = false, bool Commit = true) {
  // Allocate top-down to avoid polluting the lower VA space, as even on 64-bit some programs (i.e. LuaJIT) require allocations below 4GB.
  DWORD Flags = (Commit ? MEM_COMMIT : 0) | MEM_RESERVE | MEM_TOP_DOWN;
#ifdef ARCHITECTURE_arm64ec
  MEM_EXTENDED_PARAMETER Parameter {};
  if (Execute) {
    Parameter.Type = MemExtendedParameterAttributeFlags;
    Parameter.ULong64 = MEM_EXTENDED_PARAMETER_EC_CODE;
  };
  return ::VirtualAlloc2(nullptr, Base, Size, Flags, Execute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, Execute ? &Parameter : nullptr,
                         Execute ? 1 : 0);
#else
  return ::VirtualAlloc(Base, Size, Flags, Execute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#endif
}

inline void* VirtualAlloc(size_t Size, bool Execute = false, bool Commit = true) {
  return VirtualAlloc(nullptr, Size, Execute, Commit);
}

inline void VirtualFree(void* Ptr, size_t Size) {
  ::VirtualFree(Ptr, 0, MEM_RELEASE);
}

inline void VirtualDontNeed(void* Ptr, size_t Size, bool Recommit = true) {
  // Zero the page-aligned region, preserving permissions.
  MEMORY_BASIC_INFORMATION Info;
  ::VirtualQuery(Ptr, &Info, sizeof(Info));
  ::VirtualFree(Ptr, Size, MEM_DECOMMIT);
  if (Recommit) {
    ::VirtualAlloc(Ptr, Size, MEM_COMMIT, Info.Protect);
  }
}

inline bool VirtualProtect(void* Ptr, size_t Size, ProtectOptions options) {
  DWORD prot {PAGE_NOACCESS};

  if (options == ProtectOptions::None) {
    prot = PAGE_NOACCESS;
  } else if (options == ProtectOptions::Read) {
    prot = PAGE_READONLY;
  } else if (options == (ProtectOptions::Read | ProtectOptions::Write)) {
    prot = PAGE_READWRITE;
  } else if (options == (ProtectOptions::Read | ProtectOptions::Exec)) {
    prot = PAGE_EXECUTE_READ;
  } else if (options == (ProtectOptions::Read | ProtectOptions::Write | ProtectOptions::Exec)) {
    prot = PAGE_EXECUTE_READWRITE;
  } else {
    LOGMAN_MSG_A_FMT("Unknown VirtualProtect options combination");
  }

  return ::VirtualProtect(Ptr, Size, prot, nullptr) == 0;
}

inline void VirtualName(const char*, void*, size_t) {}
inline void VirtualTHPControl(void* Ptr, size_t Size, THPControl Control) {}

#else
using MMAP_Hook = void* (*)(void*, size_t, int, int, int, off_t);
using MUNMAP_Hook = int (*)(void*, size_t);

FEX_DEFAULT_VISIBILITY extern MMAP_Hook mmap;
FEX_DEFAULT_VISIBILITY extern MUNMAP_Hook munmap;
FEX_DEFAULT_VISIBILITY extern void VirtualName(const char* Name, void* Ptr, size_t Size);

// All commit parameters are ignored here, they are unnecessary as Linux supports overcommit

inline void* VirtualAlloc(size_t Size, bool Execute = false, bool Commit = true) {
  return FEXCore::Allocator::mmap(nullptr, Size, PROT_READ | PROT_WRITE | (Execute ? PROT_EXEC : 0), MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}

inline void* VirtualAlloc(void* Base, size_t Size, bool Execute = false, bool Commit = true) {
  return FEXCore::Allocator::mmap(Base, Size, PROT_READ | PROT_WRITE | (Execute ? PROT_EXEC : 0), MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
}

inline void VirtualFree(void* Ptr, size_t Size) {
  FEXCore::Allocator::munmap(Ptr, Size);
}
inline void VirtualDontNeed(void* Ptr, size_t Size, bool Recommit = true) {
  ::madvise(reinterpret_cast<void*>(Ptr), Size, MADV_DONTNEED);
}
inline bool VirtualProtect(void* Ptr, size_t Size, ProtectOptions options) {
  int prot {PROT_NONE};
  if ((options & ProtectOptions::Read) == ProtectOptions::Read) {
    prot |= PROT_READ;
  }
  if ((options & ProtectOptions::Write) == ProtectOptions::Write) {
    prot |= PROT_WRITE;
  }
  if ((options & ProtectOptions::Exec) == ProtectOptions::Exec) {
    prot |= PROT_EXEC;
  }

  return ::mprotect(Ptr, Size, prot) == 0;
}

inline void VirtualTHPControl(void* Ptr, size_t Size, THPControl Control) {
  ::madvise(Ptr, Size, Control == THPControl::Enable ? MADV_HUGEPAGE : MADV_NOHUGEPAGE);
}

#endif

// Memory allocation routines to be defined externally.
// This allows to use jemalloc for emulation while using the normal allocator
// for host tools without building FEXCore twice.
void* malloc(size_t size);
void* calloc(size_t n, size_t size);
void* memalign(size_t align, size_t s);
void* valloc(size_t size);
int posix_memalign(void** r, size_t a, size_t s);
void* realloc(void* ptr, size_t size);
void free(void* ptr);
size_t malloc_usable_size(void* ptr);
void* aligned_alloc(size_t a, size_t s);
void aligned_free(void* ptr);

FEX_DEFAULT_VISIBILITY extern void InitializeThread();

#ifndef _WIN32
void InitializeAllocator(size_t PageSize);
void SetupAllocatorHooks(void* (*)(void* addr, size_t length, int prot, int flags, int fd, off_t offset), int (*)(void* addr, size_t length));
#endif

struct FEXAllocOperators {
  FEXAllocOperators() = default;

  void* operator new(size_t size) {
    return FEXCore::Allocator::malloc(size);
  }

  void* operator new(size_t size, std::align_val_t align) {
    return FEXCore::Allocator::aligned_alloc(static_cast<size_t>(align), size);
  }

  void operator delete(void* ptr) {
    return FEXCore::Allocator::free(ptr);
  }

  void operator delete(void* ptr, std::align_val_t align) {
    return FEXCore::Allocator::aligned_free(ptr);
  }
};
} // namespace FEXCore::Allocator


================================================
FILE: FEXCore/include/FEXCore/Utils/ArchHelpers/Arm64.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <stdint.h>
#include <optional>

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEXCore::ArchHelpers::Arm64 {
enum class UnalignedHandlerType {
  ///< Backpatch unaligned access to half-barrier based atomic.
  HalfBarrier,
  ///< Backpatch unaligned access to non-atomic.
  NonAtomic,
};

/**
 * @brief On ARM64 handles an unaligned memory access that the JIT has done.
 *
 * This is an OS agnostic handler where the frontend must provide FEXCore with the information necessary to know if this is safe.
 * This does not check if the PC is within a JIT code buffer, the frontend must provide that safety with `CPUBackend::IsAddressInCodeBuffer`.
 *
 * @param HandleType Type of TSO handling to use.
 * @param ProgramCounter The location in memory for the instruction that did the access
 * @param GPRs The array of GPRs from the signal context. This will be modified and the host context needs to be updated on signal return.
 *
 * @return Returns a value if the unaligned access has been handled with how many bytes to modify the host PC
 * by. FEXCore will return a positive or negative offset depending on internal handling.
 */
[[nodiscard]]
FEX_DEFAULT_VISIBILITY std::optional<int32_t> HandleUnalignedAccess(
  FEXCore::Core::InternalThreadState* Thread, UnalignedHandlerType HandleType, uintptr_t ProgramCounter, uint64_t* GPRs, bool IsJIT = true);
} // namespace FEXCore::ArchHelpers::Arm64


================================================
FILE: FEXCore/include/FEXCore/Utils/CompilerDefs.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
// Contains general abstractions related to compilers used to build FEX.

// Specifies the minimum alignment for a variable or structure field, measured in bytes.
#define FEX_ALIGNED(alignment) __attribute__((aligned(alignment)))

// Allows annotating declarations with extra information.
#define FEX_ANNOTATE(annotation_str) __attribute__((annotate(annotation_str)))

// Makes the attributed entity have the default DSO visibility level.
// Compiler options can affect the visibility of symbols. This attribute
// overrides said changes. This gives entities external linkage.
#define FEX_DEFAULT_VISIBILITY __attribute__((visibility("default")))

// Indicates that the specified function doesn't need a function prologue/epilogue.
// emitted for it by the compiler.
#define FEX_NAKED __attribute__((naked))

// Specifies that a structure member or structure itself should have the smallest possible alignment.
#define FEX_PACKED __attribute__((packed))

// Causes execution to exit abnormally.
#define FEX_TRAP_EXECUTION FEXCore::Assert::ForcedAssert()

// Dictates to the compiler that the path this is on should not be reachable
// from normal execution control flow. If normal execution does reach this,
// then program behavior is undefined.
#define FEX_UNREACHABLE __builtin_unreachable()

// Like offsetof but for array members with a dynamic element index
#define ARRAY_OFFSETOF(Type, ArrayMember, Index) (offsetof(Type, ArrayMember) + sizeof(Type::ArrayMember[0]) * (Index))

namespace FEXCore::Assert {
// This function can not be inlined
[[noreturn]]
FEX_DEFAULT_VISIBILITY void ForcedAssert();
} // namespace FEXCore::Assert


================================================
FILE: FEXCore/include/FEXCore/Utils/EnumOperators.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <type_traits>

#define FEX_DEF_ENUM_CLASS_BIN_OP(Enum, Op)                       \
  inline constexpr Enum operator Op(Enum lhs, Enum rhs) {         \
    using Type = std::underlying_type_t<Enum>;                    \
    Type _lhs = static_cast<Type>(lhs);                           \
    Type _rhs = static_cast<Type>(rhs);                           \
    return static_cast<Enum>(_lhs Op _rhs);                       \
  }                                                               \
  inline constexpr uint64_t operator Op(uint64_t lhs, Enum rhs) { \
    using Type = std::underlying_type_t<Enum>;                    \
    Type _rhs = static_cast<Type>(rhs);                           \
    return lhs Op _rhs;                                           \
  }

#define FEX_DEF_ENUM_CLASS_UNARY_OP(Enum, Op)   \
  inline constexpr Enum operator Op(Enum rhs) { \
    using Type = std::underlying_type_t<Enum>;  \
    Type _rhs = static_cast<Type>(rhs);         \
    return static_cast<Enum>(Op _rhs);          \
  }

#define FEX_DEF_NUM_OPS(Enum)        \
  FEX_DEF_ENUM_CLASS_BIN_OP(Enum, |) \
  FEX_DEF_ENUM_CLASS_BIN_OP(Enum, &) \
  FEX_DEF_ENUM_CLASS_BIN_OP(Enum, ^) \
  FEX_DEF_ENUM_CLASS_UNARY_OP(Enum, ~)


================================================
FILE: FEXCore/include/FEXCore/Utils/EnumUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

// Header for various utilities related to operating with enums

#include <type_traits>

namespace FEXCore {

// Macro that defines all of the built in operators for conveniently using
// enum classes as flag types without needing to define all of the basic
// boilerplate.
#define FEX_DECLARE_ENUM_FLAG_OPERATORS(type)                        \
  [[nodiscard]]                                                      \
  constexpr type operator|(type a, type b) noexcept {                \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<type>(static_cast<T>(a) | static_cast<T>(b)); \
  }                                                                  \
  [[nodiscard]]                                                      \
  constexpr type operator&(type a, type b) noexcept {                \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<type>(static_cast<T>(a) & static_cast<T>(b)); \
  }                                                                  \
  [[nodiscard]]                                                      \
  constexpr type operator^(type a, type b) noexcept {                \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<type>(static_cast<T>(a) ^ static_cast<T>(b)); \
  }                                                                  \
  constexpr type& operator|=(type& a, type b) noexcept {             \
    a = a | b;                                                       \
    return a;                                                        \
  }                                                                  \
  constexpr type& operator&=(type& a, type b) noexcept {             \
    a = a & b;                                                       \
    return a;                                                        \
  }                                                                  \
  constexpr type& operator^=(type& a, type b) noexcept {             \
    a = a ^ b;                                                       \
    return a;                                                        \
  }                                                                  \
  [[nodiscard]]                                                      \
  constexpr type operator~(type key) noexcept {                      \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<type>(~static_cast<T>(key));                  \
  }                                                                  \
  [[nodiscard]]                                                      \
  constexpr bool True(type key) noexcept {                           \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<T>(key) != 0;                                 \
  }                                                                  \
  [[nodiscard]]                                                      \
  constexpr bool False(type key) noexcept {                          \
    using T = std::underlying_type_t<type>;                          \
    return static_cast<T>(key) == 0;                                 \
  }

// Macro that defines a fmt formatter for a reasonable case where an enum
// is formatted as a purely integral type based on its underlying type.
#define FEX_DEFINE_ENUM_FMT_PASSTHROUGH(type) \
  constexpr auto format_as(type t) {          \
    return FEXCore::ToUnderlying(t);          \
  }

// Equivalent to C++23's std::to_underlying.
template<typename Enum>
[[nodiscard]]
constexpr std::underlying_type_t<Enum> ToUnderlying(Enum e) noexcept {
  return static_cast<std::underlying_type_t<Enum>>(e);
}

} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Utils/Event.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <atomic>
#include <condition_variable>
#include <mutex>

class Event final {
private:
  /**
   * @brief Literally just an atomic bool that we are using for this class
   */
  class Flag final {
  public:
    bool TestAndSet(bool SetValue = true) {
      bool Expected = !SetValue;
      return Value.compare_exchange_strong(Expected, SetValue);
    }

    bool TestAndClear() {
      return TestAndSet(false);
    }

  private:
    std::atomic_bool Value {false};
  };

public:
  ~Event() {
    NotifyAll();
  }
  void NotifyOne() {
    if (FlagObject.TestAndSet()) {
      std::lock_guard<std::mutex> lk(MutexObject);
      CondObject.notify_one();
    }
  }

  void NotifyAll() {
    if (FlagObject.TestAndSet()) {
      std::lock_guard<std::mutex> lk(MutexObject);
      CondObject.notify_all();
    }
  }

  void Wait() {
    // Have we signaled before we started waiting?
    if (FlagObject.TestAndClear()) {
      return;
    }

    std::unique_lock<std::mutex> lk(MutexObject);
    CondObject.wait(lk, [this] { return FlagObject.TestAndClear(); });
  }

  template<class Rep, class Period>
  bool WaitFor(const std::chrono::duration<Rep, Period>& time) {
    // Have we signaled before we started waiting?
    if (FlagObject.TestAndClear()) {
      return true;
    }

    std::unique_lock<std::mutex> lk(MutexObject);
    bool DidSignal = CondObject.wait_for(lk, time, [this] { return FlagObject.TestAndClear(); });
    return DidSignal;
  }

private:
  Flag FlagObject;
  std::mutex MutexObject;
  std::condition_variable CondObject;
};


================================================
FILE: FEXCore/include/FEXCore/Utils/FPState.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>

namespace FEXCore::FPState {
enum class X87Tag : uint8_t { Valid = 0b00, Zero = 0b01, Special = 0b10, Empty = 0b11 };

static inline X87Tag GetX87Tag(uint64_t (&Reg)[2], bool Valid) {
  if (!Valid) {
    return X87Tag::Empty;
  }

  const uint64_t Exponent = Reg[1] & 0x7fff;
  if (Exponent == 0x7fff) {
    // (Pseudo) NaN / Inf
    return X87Tag::Special;
  }

  const bool JBit = Reg[0] & (1ULL << 63);
  if (Exponent == 0) {
    const uint64_t Fraction = Reg[0] & ((1ULL << 63) - 1);
    if (!JBit && !Fraction) {
      return X87Tag::Zero;
    } else {
      // (Pseudo) Subnormal
      return X87Tag::Special;
    }
  }

  if (JBit) {
    // Normal
    return X87Tag::Valid;
  } else {
    // Invalid
    return X87Tag::Special;
  }
}

static inline uint16_t ConvertFromAbridgedFTW(uint16_t FSW, uint64_t (&MM)[8][2], uint8_t AbridgedFTW) {
  const uint32_t StackTop = (FSW >> 11) & 0b111;

  uint16_t FTW = 0;
  for (uint32_t i = 0; i < 8; i++) {
    // The AMD manually incorrectly states there is a direct mapping here, only the intel-manual correctly states
    // the stack-relative behaviour
    const uint16_t StackIndex = (i - StackTop) & 0b111;
    const X87Tag Tag = GetX87Tag(MM[StackIndex], AbridgedFTW & (1 << i));
    FTW |= static_cast<uint8_t>(Tag) << (2 * i);
  }

  return FTW;
}

static inline uint8_t ConvertToAbridgedFTW(uint16_t FTW) {
  uint8_t AbridgedFTW = 0;

  for (uint32_t i = 0; i < 8; i++) {
    const X87Tag Tag = static_cast<X87Tag>((FTW >> (2 * i)) & 3);
    AbridgedFTW |= ((Tag == X87Tag::Empty) ? 0 : 1) << i;
  }

  return AbridgedFTW;
}
} // namespace FEXCore::FPState


================================================
FILE: FEXCore/include/FEXCore/Utils/File.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/Utils/EnumOperators.h>

#ifndef _WIN32
#include <fcntl.h>
#include <unistd.h>
#else
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#undef ERROR
#endif

namespace FEXCore::File {
enum class FileModes : uint32_t {
  READ = (1U << 0),
  WRITE = (1U << 1),
  CREATE = (1U << 2),
  TRUNCATE = (1U << 3),
};

enum class SeekOp {
  BEGIN,
  CURRENT,
  END,
};

FEX_DEF_NUM_OPS(FileModes)

class File final {
public:
#ifndef _WIN32
  using FileHandleType = int;
#else
  using FileHandleType = HANDLE;
#endif

  File() = default;

  File(const char* Filepath, FileModes Modes) {
#ifndef _WIN32
    auto Disp = TranslateModes(Modes);
    Handle = open(Filepath, Disp, DEFAULT_USER_PERMS);
    IsValidHandle = Handle != -1;
#else
    auto Disp = TranslateModes(Modes);
    if (Disp.CreationFlag == OPEN_ALWAYS && Disp.TruncateOnExist) {
      // If Open + Truncate then try to open with truncate behaviour first.
      Handle = CreateFileA(Filepath, Disp.Access, DEFAULT_SHARE_MODE, nullptr, TRUNCATE_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
      if (Handle == INVALID_HANDLE_VALUE && GetLastError() == ERROR_FILE_NOT_FOUND) {
        // File didn't exist, just open.
        Handle = CreateFileA(Filepath, Disp.Access, DEFAULT_SHARE_MODE, nullptr, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, nullptr);
      }
    } else {
      Handle = CreateFileA(Filepath, Disp.Access, DEFAULT_SHARE_MODE, nullptr, Disp.CreationFlag, FILE_ATTRIBUTE_NORMAL, nullptr);
    }
    IsValidHandle = Handle != INVALID_HANDLE_VALUE;
#endif
  }

  /**
   * @brief Write Bytes to File
   *
   * @param Buffer The buffer to write.
   * @param Bytes The number of bytes to write.
   *
   * @return The number of bytes actually written or -1 on error.
   */
  ssize_t Write(const void* Buffer, size_t Bytes) {
#ifndef _WIN32
    return write(Handle, Buffer, Bytes);
#else
    DWORD BytesWritten {};
    auto Result = WriteFile(Handle, Buffer, Bytes, &BytesWritten, nullptr);
    if (Result) {
      return BytesWritten;
    }
    // Some error, match Linux side.
    return -1;
#endif
  }

  ssize_t Write(const std::string_view Data) {
    return Write(Data.data(), Data.size());
  }

  /**
   * @brief Read at most Bytes in to the buffer.
   *
   * @param Buffer The buffer where the data is read in to.
   * @param Bytes The size of the buffer.
   *
   * @return The number of bytes read or -1 on error.
   */
  ssize_t Read(void* Buffer, size_t Bytes) {
#ifndef _WIN32
    return read(Handle, Buffer, Bytes);
#else
    DWORD BytesRead {};
    auto Result = ReadFile(Handle, Buffer, Bytes, &BytesRead, nullptr);
    if (Result) {
      return BytesRead;
    }
    // Some error, match Linux side.
    return -1;
#endif
  }

  ~File() {
    if (!IsValidHandle) {
      return;
    }
    if (!ShouldClose) {
      return;
    }
#ifndef _WIN32
    close(Handle);
#else
    CloseHandle(Handle);
#endif
  }

  /**
   * @brief Gets a File object that points to stdout
   */
  static File GetStdOUT() {
#ifndef _WIN32
    return File(STDOUT_FILENO, false);
#else
    return File(GetStdHandle(STD_OUTPUT_HANDLE), false);
#endif
  }

  /**
   * @brief Gets a File object that points to stderr
   */
  static File GetStdERR() {
#ifndef _WIN32
    return File(STDERR_FILENO, false);
#else
    return File(GetStdHandle(STD_ERROR_HANDLE), false);
#endif
  }

  /**
   * @brief Returns if the file handle is valid.
   */
  bool IsValid() const {
    return IsValidHandle;
  }

  /**
   * @brief Flush the file contents to the output file backing.
   *
   * @return True if the flush occured.
   */
  bool Flush() {
#ifndef _WIN32
    return fsync(Handle) == 0;
#else
    return FlushFileBuffers(Handle);
#endif
  }

  /**
   * @brief Seek the file pointer location.
   *
   * @param Distance The distance to travel.
   * @param Op The operation from where to start the travel.
   *
   * @return The current file pointer location or -1.
   */
  ssize_t Seek(ssize_t Distance, SeekOp Op) {
#ifndef _WIN32
    return lseek(Handle, Distance, TranslateSeek(Op));
#else
    LARGE_INTEGER NewDistance {.QuadPart = Distance};
    LARGE_INTEGER NewPointer;
    auto Result = SetFilePointerEx(Handle, NewDistance, &NewPointer, TranslateSeek(Op));
    if (Result) {
      return NewPointer.QuadPart;
    }
    // Some error, match Linux side.
    return -1;
#endif
  }

protected:

  File(FileHandleType Handle, bool ShouldClose)
    : ShouldClose {ShouldClose}
    , IsValidHandle {true}
    , Handle {Handle} {}
private:
  bool ShouldClose {};
  bool IsValidHandle {};

  FileHandleType Handle {};
#ifndef _WIN32
  static constexpr int DEFAULT_USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO;

  static uint32_t TranslateModes(FileModes Modes) {
    uint32_t Mode {};
    if ((Modes & FileModes::READ) == FileModes::READ) {
      Mode |= O_RDONLY;
    }
    if ((Modes & FileModes::WRITE) == FileModes::WRITE) {
      Mode |= O_WRONLY;
    }
    if ((Modes & FileModes::CREATE) == FileModes::CREATE) {
      Mode |= O_CREAT;
    }
    if ((Modes & FileModes::TRUNCATE) == FileModes::TRUNCATE) {
      Mode |= O_TRUNC;
    }

    // Always enable CLOEXEC so that the FD is closed on execve.
    // FEXCore never wants to leak FDs across execve using this interface.
    Mode |= O_CLOEXEC;
    return Mode;
  }

  static uint32_t TranslateSeek(SeekOp Op) {
    switch (Op) {
    case SeekOp::BEGIN: return SEEK_SET;
    case SeekOp::CURRENT: return SEEK_CUR;
    case SeekOp::END: return SEEK_END;
    default: FEX_UNREACHABLE;
    }
  }
#else
  static constexpr int DEFAULT_SHARE_MODE = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
  struct Disposition {
    uint32_t CreationFlag;
    uint32_t Access;
    bool TruncateOnExist;
  };
  static Disposition TranslateModes(FileModes Modes) {
    Disposition Disp {};
    if ((Modes & FileModes::READ) == FileModes::READ) {
      Disp.Access |= GENERIC_READ;
    }
    if ((Modes & FileModes::WRITE) == FileModes::WRITE) {
      Disp.Access |= GENERIC_WRITE;
    }
    if ((Modes & FileModes::CREATE) == FileModes::CREATE) {
      Disp.CreationFlag = CREATE_ALWAYS;
    } else {
      Disp.CreationFlag = OPEN_ALWAYS;
    }

    if ((Modes & FileModes::TRUNCATE) == FileModes::TRUNCATE) {
      Disp.TruncateOnExist = true;
    }

    return Disp;
  }

  static uint32_t TranslateSeek(SeekOp Op) {
    switch (Op) {
    case SeekOp::BEGIN: return FILE_BEGIN;
    case SeekOp::CURRENT: return FILE_CURRENT;
    case SeekOp::END: return FILE_END;
    default: FEX_UNREACHABLE;
    }
  }
#endif
};
} // namespace FEXCore::File


================================================
FILE: FEXCore/include/FEXCore/Utils/FileLoading.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/Utils/CompilerDefs.h>

#include <span>

namespace FEXCore::FileLoading {
/**
 * @brief Loads a filepath in to a vector of data
 *
 * @param Data The vector to load the file data in to
 * @param Filepath The filepath to load
 *
 * @return true on file loaded, false on failure
 */
FEX_DEFAULT_VISIBILITY bool LoadFile(fextl::vector<char>& Data, const fextl::string& Filepath, size_t FixedSize = 0);
FEX_DEFAULT_VISIBILITY bool LoadFile(fextl::string& Data, const fextl::string& Filepath, size_t FixedSize = 0);

/**
 * @brief Loads a filepath in to a buffer of data with a fixed size
 *
 * @param Filepath The filepath to load
 * @param Buffer The buffer to load the data in to. Attempting to read the full size of the span
 *
 * @return The amount of data read or -1 on error.
 */
FEX_DEFAULT_VISIBILITY ssize_t LoadFileToBuffer(const fextl::string& Filepath, std::span<char> Buffer);
} // namespace FEXCore::FileLoading


================================================
FILE: FEXCore/include/FEXCore/Utils/InterruptableConditionVariable.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <atomic>
#include <chrono>
#include <climits>
#include <cstdint>
#ifndef _WIN32
#include <linux/futex.h>
#include <sys/syscall.h>
#else
#include <errhandlingapi.h>
#include <synchapi.h>
#include <winerror.h>
#endif
#include <unistd.h>

namespace FEXCore {
/**
 * @brief A condition variable that is robust against use of longjmp in signal handlers.
 *
 * This is opposed to common `std::condition_variable` implementations:
 * Longjmp'ing in a signal handler while interrupting a pending `wait_for()`
 * call can leave the condition variable in an invalid state that breaks later
 * uses of that object and may cause hangs as a consequence.
 */
#ifndef _WIN32
class InterruptableConditionVariable final {
public:
  bool Wait(struct timespec* Timeout = nullptr) {
    while (true) {
      uint32_t Expected = SIGNALED;
      uint32_t Desired = UNSIGNALED;

      // If the mutex was already signaled then we can early exit
      if (Mutex.compare_exchange_strong(Expected, Desired)) {
        return true;
      }

      constexpr int Op = FUTEX_WAIT | FUTEX_PRIVATE_FLAG;
      // WAIT will keep sleeping on the futex word while it is `val`
      int Result = ::syscall(SYS_futex, &Mutex, Op,
                             Desired, // val
                             Timeout, // Timeout/val2
                             nullptr, // Addr2
                             0);      // val3

      if (Timeout && Result == -1 && errno == ETIMEDOUT) {
        return false;
      }
    }
  }

  template<class Rep, class Period>
  bool WaitFor(const std::chrono::duration<Rep, Period>& time) {
    struct timespec Timeout {};
    auto SecondsDuration = std::chrono::duration_cast<std::chrono::seconds>(time);
    Timeout.tv_sec = SecondsDuration.count();
    Timeout.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(time - SecondsDuration).count();
    return Wait(&Timeout);
  }

  void NotifyOne() {
    DoNotify(1);
  }

  void NotifyAll() {
    // Maximum number of waiters
    DoNotify(INT_MAX);
  }

private:
  std::atomic<uint32_t> Mutex {};
  constexpr static uint32_t SIGNALED = 1;
  constexpr static uint32_t UNSIGNALED = 0;

  void DoNotify(int Waiters) {
    uint32_t Expected = UNSIGNALED;
    uint32_t Desired = SIGNALED;

    // If the mutex was in an unsignaled state then signal
    if (Mutex.compare_exchange_strong(Expected, Desired)) {
      constexpr int Op = FUTEX_WAKE | FUTEX_PRIVATE_FLAG;

      ::syscall(SYS_futex, &Mutex, Op,
                Waiters, // val - Number of waiters to wake
                0,       // val2
                &Mutex,  // Addr2 - Mutex to do the operation on
                0);      // val3
    }
  }
};
#else
class InterruptableConditionVariable final {
public:
  bool Wait(struct timespec* Timeout = nullptr) {
    while (true) {
      uint32_t Expected = SIGNALED;
      uint32_t Desired = UNSIGNALED;

      // If the mutex was already signaled then we can early exit
      if (Mutex.compare_exchange_strong(Expected, Desired)) {
        return true;
      }
      // Windows only supports millisecond granularity.
      const uint32_t TimeoutMS = Timeout ? Timeout->tv_sec * 1000 + (Timeout->tv_nsec / 1000000) : 0;

      // WaitOnAddress returns when the value at `Address` differs from the value at `CompareAddress`.
      bool Result = WaitOnAddress(&Mutex, &Desired, 4, TimeoutMS);

      if (Timeout && Result == false && GetLastError() == ERROR_TIMEOUT) {
        return false;
      }
    }
  }

  template<class Rep, class Period>
  bool WaitFor(const std::chrono::duration<Rep, Period>& time) {
    struct timespec Timeout {};
    auto SecondsDuration = std::chrono::duration_cast<std::chrono::seconds>(time);
    Timeout.tv_sec = SecondsDuration.count();
    Timeout.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(time - SecondsDuration).count();
    return Wait(&Timeout);
  }

  void NotifyOne() {
    DoNotify(false);
  }

  void NotifyAll() {
    // Maximum number of waiters
    DoNotify(true);
  }

private:
  std::atomic<uint32_t> Mutex {};
  constexpr static uint32_t SIGNALED = 1;
  constexpr static uint32_t UNSIGNALED = 0;

  void DoNotify(bool All) {
    uint32_t Expected = UNSIGNALED;
    uint32_t Desired = SIGNALED;

    // If the mutex was in an unsignaled state then signal
    if (Mutex.compare_exchange_strong(Expected, Desired)) {
      if (All) {
        WakeByAddressAll(&Mutex);
      } else {
        WakeByAddressSingle(&Mutex);
      }
    }
  }
};

#endif
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Utils/IntervalList.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <utility>
#include <algorithm>

#include <FEXCore/fextl/vector.h>

namespace FEXCore {
template<typename SizeType>
class IntervalList {
public:
  using DifferenceType = decltype(std::declval<SizeType>() - std::declval<SizeType>());

  struct Interval {
    SizeType Offset;
    SizeType End;

    Interval() = default;

    Interval(SizeType Offset, SizeType End)
      : Offset {Offset}
      , End {End} {}
  };

private:
  fextl::vector<Interval> Intervals; ///< list of intervals sorted by their end offset

public:
  struct QueryResult {
    bool Enclosed;       ///< If the given offset was enclosed by an interval
    DifferenceType Size; ///< Size of the interval starting from the query offset, or distance to the next interval if
                         /// `Enclosed` is false (if there is no next interval, size is 0)
    Interval Interval;   ///< The interval that the query offset is enclosed by, or the next interval if `Enclosed` is false
  };

  using const_iterator = typename fextl::vector<Interval>::const_iterator;

  const_iterator begin() const {
    return Intervals.begin();
  }
  const_iterator end() const {
    return Intervals.end();
  }

  void Clear() {
    Intervals.clear();
  }

  bool Empty() const {
    return Intervals.empty();
  }

  void Insert(Interval Entry) {
    if (Entry.Offset == Entry.End) {
      return;
    }

    auto [FirstIt, EndIt] =
      std::equal_range(Intervals.begin(), Intervals.end(), Entry, [](const auto& LHS, const auto& RHS) { return LHS.End <= RHS.Offset; });

    if (FirstIt == EndIt) {
      // No overlaps
      Intervals.insert(FirstIt, Entry);
      return;
    }

    auto LastIt = std::prev(EndIt);
    // FirstIt/LastIt are the lowest/highest offset intervals respectively that overlap with the new interval

    const SizeType Offset = std::min(Entry.Offset, FirstIt->Offset);
    const SizeType End = std::max(LastIt->End, Entry.End);

    // Erase all overlapping entries but the first
    const auto EraseStartIt = std::next(FirstIt);
    const auto EraseEndIt = std::next(LastIt);
    LastIt = Intervals.erase(EraseStartIt, EraseEndIt);
    FirstIt = std::prev(LastIt);

    FirstIt->Offset = Offset;
    FirstIt->End = End;
  }

  void Insert(const IntervalList<SizeType>& Other) {
    for (const auto& Interval : Other.Intervals) {
      Insert(Interval);
    }
  }

  void Remove(Interval Entry) {
    if (Entry.Offset == Entry.End) {
      return;
    }

    auto [FirstIt, EndIt] =
      std::equal_range(Intervals.begin(), Intervals.end(), Entry, [](const auto& LHS, const auto& RHS) { return LHS.End <= RHS.Offset; });

    if (FirstIt == EndIt) {
      // No intersecting intervals present, nothing more to do
      return;
    }

    if (FirstIt->Offset < Entry.Offset && FirstIt->End > Entry.End) {
      // The interval to be removed is fully enclosed by an existing interval

      // Break the single interval into two smaller intervals on either side on the interval being removed
      const auto FirstPredecessorIt = Intervals.insert(FirstIt, *FirstIt);
      FirstIt = std::next(FirstPredecessorIt);
      FirstPredecessorIt->End = Entry.Offset;
      FirstIt->Offset = Entry.End;
      return;
    }

    auto LastIt = std::prev(EndIt);
    // FirstIt/LastIt are the lowest/highest offset intervals respectively that overlap with the new interval

    if (FirstIt->Offset < Entry.Offset) {
      // The first overlap straddles the start of the interval to be removed
      FirstIt->End = Entry.Offset;
      if (FirstIt == LastIt) {
        // No more overlaps left, nothing more to do
        return;
      } else {
        FirstIt++;
      }
    }

    if (LastIt->End > Entry.End) {
      // The last overlap straddles the end of the interval to be removed
      LastIt->Offset = Entry.End;
      if (LastIt == FirstIt) {
        // No more overlaps left, nothing more to do
        return;
      } else {
        LastIt--;
      }
    }

    // Now none of the overlaps straddle the edges of the interval to be removed they can all be erased
    const auto EraseStartIt = FirstIt;
    const auto EraseEndIt = std::next(LastIt);
    Intervals.erase(EraseStartIt, EraseEndIt);
  }

  QueryResult Query(SizeType Offset) const {
    const auto It = std::upper_bound(Intervals.begin(), Intervals.end(), Offset, [](const auto& LHS, const auto& RHS) {
      return LHS < RHS.End;
    }); // Lowest offset interval that (maybe) overlaps with the query offset

    if (It == Intervals.end()) { // No overlaps past offset
      return {false, 0, {}};
    } else if (It->Offset > Offset) { // No overlap, return the distance to the next possible overlap
      return {false, It->Offset - Offset, *It};
    } else { // Overlap, return the distance to the end of the overlap
      return {true, It->End - Offset, *It};
    }
  }

  bool Intersect(Interval Entry) const {
    const auto It = std::upper_bound(Intervals.begin(), Intervals.end(), Entry, [](const auto& LHS, const auto& RHS) {
      return LHS.Offset < RHS.End;
    }); // Lowest offset interval that (maybe) overlaps with the query offset

    return It != Intervals.end() && It->Offset < Entry.End;
  }

  bool Contains(Interval Entry) const {
    const auto It = std::upper_bound(Intervals.begin(), Intervals.end(), Entry, [](const auto& LHS, const auto& RHS) {
      return LHS.Offset < RHS.End;
    }); // Lowest offset interval that (maybe) overlaps with the query offset

    return It != Intervals.end() && It->Offset <= Entry.Offset && It->End >= Entry.End;
  }
};

} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Utils/LogManager.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <cstdarg>

#include <fmt/format.h>
#include <fmt/color.h>

namespace LogMan {
enum DebugLevels : uint32_t {
  NONE = 0,   ///< Expect zero messages
  ASSERT = 1, ///< Assert throwing
  ERROR = 2,  ///< Only Errors printed
  DEBUG = 3,  ///< Debug messages added
  INFO = 4,   ///< Info messages added
};

static inline const char* DebugLevelStr(uint32_t Level) {
  switch (Level) {
  case NONE: return "NONE";
  case ASSERT: return "A";
  case ERROR: return "E";
  case DEBUG: return "D";
  case INFO: return "I";
  default: return "???"; break;
  }
}

static inline fmt::text_style DebugLevelStyle(uint32_t Level) {
  switch (Level) {
  case LogMan::ASSERT: return fmt::bg(fmt::color::red) | fmt::emphasis::bold | fmt::fg(fmt::color::white);
  case LogMan::ERROR: return fmt::fg(fmt::color::red);
  case LogMan::DEBUG: return fmt::fg(fmt::color::gray);
  case LogMan::INFO: return fmt::fg(fmt::color::green);
  default: return {}; break;
  }
}

constexpr DebugLevels MSG_LEVEL = INFO;

// Note that all logging functions with the Fmt or _FMT suffix on them expect
// format strings as used by fmtlib (or C++ std::format).

namespace Throw {
  using ThrowHandler = void (*)(const char* Message);
  FEX_DEFAULT_VISIBILITY void InstallHandler(ThrowHandler Handler);
  FEX_DEFAULT_VISIBILITY void UnInstallHandler();

  [[noreturn]]
  void MFmt(const char* fmt, const fmt::format_args& args);

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  template<typename... Args>
  static inline void AFmt(bool Value, const char* fmt, const Args&... args) {
    if (MSG_LEVEL < ASSERT || Value) {
      return;
    }
    MFmt(fmt, fmt::make_format_args(args...));
  }

#define LOGMAN_THROW_A_FMT(pred, format, ...)                                                                             \
  do {                                                                                                                    \
    if (!(pred)) {                                                                                                        \
      LogMan::Throw::AFmt(false, "{}:{}, {}: " format, __FILE_NAME__, __LINE__, __FUNCTION__ __VA_OPT__(, ) __VA_ARGS__); \
    }                                                                                                                     \
  } while (0)
#else
  static inline void AFmt(bool, const char*, ...) {}
#define LOGMAN_THROW_A_FMT(pred, ...) \
  do {                                \
    (void)(pred);                     \
  } while (0)
#endif

} // namespace Throw

namespace Msg {
  using MsgHandler = void (*)(DebugLevels Level, const char* Message);
  FEX_DEFAULT_VISIBILITY void InstallHandler(MsgHandler Handler);
  FEX_DEFAULT_VISIBILITY void UnInstallHandler();

  // Fmt-capable interface.

  FEX_DEFAULT_VISIBILITY void MFmtImpl(DebugLevels level, const char* fmt, const fmt::format_args& args);

  template<typename... Args>
  static inline void MFmt(DebugLevels level, const char* fmt, const Args&... args) {
    MFmtImpl(level, fmt, fmt::make_format_args(args...));
  }

  template<typename... Args>
  static inline void EFmt(const char* fmt, const Args&... args) {
    if (MSG_LEVEL < ERROR) {
      return;
    }
    MFmtImpl(ERROR, fmt, fmt::make_format_args(args...));
  }

  template<typename... Args>
  static inline void DFmt(const char* fmt, const Args&... args) {
    if (MSG_LEVEL < DEBUG) {
      return;
    }
    MFmtImpl(DEBUG, fmt, fmt::make_format_args(args...));
  }

  template<typename... Args>
  static inline void IFmt(const char* fmt, const Args&... args) {
    if (MSG_LEVEL < INFO) {
      return;
    }
    MFmtImpl(INFO, fmt, fmt::make_format_args(args...));
  }

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  template<typename... Args>
  static inline void AFmt(const char* fmt, const Args&... args) {
    if (MSG_LEVEL < ASSERT) {
      return;
    }
    MFmtImpl(ASSERT, fmt, fmt::make_format_args(args...));
    FEX_TRAP_EXECUTION;
  }
#define LOGMAN_MSG_A_FMT(...)       \
  do {                              \
    LogMan::Msg::AFmt(__VA_ARGS__); \
  } while (0)
#else
  template<typename... Args>
  static inline void AFmt(const char*, const Args&...) {}
#define LOGMAN_MSG_A_FMT(...) \
  do {                        \
  } while (0)
#endif

#define WARN_ONCE_FMT(...)            \
  do {                                \
    static bool Warned {};            \
    if (!Warned) {                    \
      LogMan::Msg::DFmt(__VA_ARGS__); \
      Warned = true;                  \
    }                                 \
  } while (0);

#define ERROR_AND_DIE_FMT(...)                      \
  do {                                              \
    LogMan::Msg::MFmt(LogMan::ASSERT, __VA_ARGS__); \
    FEX_TRAP_EXECUTION;                             \
  } while (0)

} // namespace Msg
} // namespace LogMan


================================================
FILE: FEXCore/include/FEXCore/Utils/LongJump.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Utils/CompilerDefs.h>

#include <cstdint>

// Reimplementation of longjmp without glibc fortification checks.
// This is useful when false positives need to be avoided or when using
// a libc implementation that does not implement std::longjmp.
namespace FEXCore::UncheckedLongJump {
// JumpBuf definition needs to be public because the frontend needs to understand it.
#if defined(ARCHITECTURE_arm64)
struct JumpBuf {
  // All the registers that are required by AAPCS64 to save.
  // GPRs
  // X19, X20, X21, X22,
  // X23, X24, X25, X26,
  // X27, X28, X29, X30,
  //
  // Lower 64-bits:
  //  V8,  V9, V10, V11,
  // V12, V13, V14, V15,
  //
  // SP,
  uint64_t Registers[21];
};
#else
struct JumpBuf {
  // Registers to preserve
  // RBX, RSP, RBP, R12, R13, R14, R15,
  // <return address>
  uint64_t Registers[8];
};
#endif

[[nodiscard]] FEX_DEFAULT_VISIBILITY uint64_t SetJump(JumpBuf& Buffer);
[[noreturn]] FEX_DEFAULT_VISIBILITY void LongJump(const JumpBuf& Buffer, uint64_t Value);
FEX_DEFAULT_VISIBILITY void ManuallyLoadJumpBuf(const JumpBuf& Buffer, uint64_t Value, uint64_t* GPRs, __uint128_t* FPRs, uint64_t* PC);
} // namespace FEXCore::UncheckedLongJump


================================================
FILE: FEXCore/include/FEXCore/Utils/MathUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/LogManager.h>

#include <bit>
#include <cstdint>
#include <type_traits>

namespace FEXCore {
[[nodiscard]]
constexpr uint64_t AlignUp(uint64_t value, uint64_t size) {
  return value + (size - value % size) % size;
}

[[nodiscard]]
constexpr uint64_t AlignDown(uint64_t value, uint64_t size) {
  return value - value % size;
}

// Returns the ilog2 of a power-of-2 integer.
// Asserts in the case that the passed in integer is not a power-of-2.
template<typename T>
requires (std::is_unsigned_v<T>)
[[nodiscard]]
constexpr T ilog2(T Value) {
  LOGMAN_THROW_A_FMT(std::has_single_bit(Value), "ilog2 requires popcount to be one");
  return std::countr_zero(Value);
}

// Divide a number by a power-of-2 by avoiding integer division.
// Can be a faster implementation than regular integer divide.
// Divisor requires to be power-of-2, is enforced in ilog2 helper.
template<typename T, typename TT>
requires (std::is_unsigned_v<T> && std::is_unsigned_v<TT>)
[[nodiscard]]
constexpr T DividePow2(T Dividend, TT Divisor) {
  return Dividend >> ilog2(Divisor);
}
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Utils/PrctlUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#ifndef _WIN32
#include <linux/prctl.h>
#include <sys/mman.h>
#include <sys/user.h>
#include <sys/prctl.h>

#ifndef PR_SET_VMA
#define PR_SET_VMA 0x53564d41
#endif

#ifndef PR_SET_VMA_ANON_NAME
#define PR_SET_VMA_ANON_NAME 0
#endif

#ifndef PR_GET_MEM_MODEL
#define PR_GET_MEM_MODEL 0x6d4d444c
#endif
#ifndef PR_SET_MEM_MODEL
#define PR_SET_MEM_MODEL 0x4d4d444c
#endif
#ifndef PR_SET_MEM_MODEL_DEFAULT
#define PR_SET_MEM_MODEL_DEFAULT 0
#endif
#ifndef PR_SET_MEM_MODEL_TSO
#define PR_SET_MEM_MODEL_TSO 1
#endif

#ifndef PR_GET_COMPAT_INPUT
#define PR_GET_COMPAT_INPUT 0x63494e50
#endif
#ifndef PR_SET_COMPAT_INPUT
#define PR_SET_COMPAT_INPUT 0x43494e50
#endif
#ifndef PR_SET_COMPAT_INPUT_DISABLE
#define PR_SET_COMPAT_INPUT_DISABLE 0
#endif
#ifndef PR_SET_COMPAT_INPUT_ENABLE
#define PR_SET_COMPAT_INPUT_ENABLE 1
#endif

#ifndef PR_GET_SHADOW_STACK_STATUS
#define PR_GET_SHADOW_STACK_STATUS 74
#endif
#ifndef PR_LOCK_SHADOW_STACK_STATUS
#define PR_LOCK_SHADOW_STACK_STATUS 76
#endif
#ifndef PR_SHADOW_STACK_ENABLE
#define PR_SHADOW_STACK_ENABLE (1ULL << 0)
#endif

#endif // ifndef _WIN32


================================================
FILE: FEXCore/include/FEXCore/Utils/Profiler.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>
#include <string_view>

#include <FEXCore/Utils/CompilerDefs.h>

#define FEXCORE_PROFILER_BACKEND_OFF 0
#define FEXCORE_PROFILER_BACKEND_GPUVIS 1
#define FEXCORE_PROFILER_BACKEND_TRACY 2

#if defined(ENABLE_FEXCORE_PROFILER) && FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
#include "tracy/Tracy.hpp"
#endif

namespace FEXCore::Profiler {
#define UniqueScopeName2(name, line) name##line
#define UniqueScopeName(name, line) UniqueScopeName2(name, line)

#ifdef ENABLE_FEXCORE_PROFILER

FEX_DEFAULT_VISIBILITY void Init(std::string_view ProgramName, std::string_view ProgramPath);
FEX_DEFAULT_VISIBILITY void PostForkAction(bool IsChild);
FEX_DEFAULT_VISIBILITY bool IsActive();
FEX_DEFAULT_VISIBILITY void Shutdown();
FEX_DEFAULT_VISIBILITY void TraceObject(const std::string_view Format);
FEX_DEFAULT_VISIBILITY void TraceObject(const std::string_view Format, uint64_t Duration);

// Declare an instantaneous profiler event.
#define FEXCORE_PROFILE_INSTANT(name) FEXCore::Profiler::TraceObject(name)

#if FEXCORE_PROFILER_BACKEND == FEXCORE_PROFILER_BACKEND_TRACY
// Declare a scoped profile block variable with a fixed name.
#define FEXCORE_PROFILE_SCOPED(name) ZoneNamedN(___tracy_scoped_zone, name, ::FEXCore::Profiler::IsActive())
#else
// A class that follows scoping rules to generate a profile duration block
class ProfilerBlock final {
public:
  ProfilerBlock(const std::string_view Format);

  ~ProfilerBlock();

private:
  uint64_t DurationBegin;
  const std::string_view Format;
};

// Declare a scoped profile block variable with a fixed name.
#define FEXCORE_PROFILE_SCOPED(name) FEXCore::Profiler::ProfilerBlock UniqueScopeName(ScopedBlock_, __LINE__)(name)
#endif

#else
inline void Init(std::string_view ProgramName, std::string_view ProgramPath) {}
inline void PostForkAction(bool IsChild) {}
inline void Shutdown() {}
inline void TraceObject(const std::string_view Format) {}
inline void TraceObject(const std::string_view, uint64_t) {}

#define FEXCORE_PROFILE_INSTANT(...) \
  do {                               \
  } while (0)
#define FEXCORE_PROFILE_SCOPED(...) \
  do {                              \
  } while (0)

#endif
} // namespace FEXCore::Profiler


================================================
FILE: FEXCore/include/FEXCore/Utils/SHMStats.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <atomic>
#include <cstddef>
#include <cstdint>

#ifdef ARCHITECTURE_x86_64
#include <x86intrin.h>
#endif

namespace FEXCore::SHMStats {
#ifdef ARCHITECTURE_arm64
/**
 * @brief Get the raw cycle counter with synchronizing isb.
 *
 * `CNTVCTSS_EL0` also does the same thing, but requires the FEAT_ECV feature.
 */
static inline uint64_t GetCycleCounter() {
  uint64_t Result {};
  __asm volatile(R"(
      isb;
      mrs %[Res], CNTVCT_EL0;
    )"
                 : [Res] "=r"(Result));
  return Result;
}
#else
static inline uint64_t GetCycleCounter() {
  unsigned dummy;
  uint64_t tsc = __rdtscp(&dummy);
  return tsc;
}
#endif
// FEXCore live-stats
constexpr uint8_t STATS_VERSION = 2;
enum class AppType : uint8_t {
  LINUX_32,
  LINUX_64,
  WIN_ARM64EC,
  WIN_WOW64,
};

// Only append new members to the end of {ThreadStatsHeader, ThreadStats} to allow old tools time to support new information.
// FEX isn't guaranteeing /not/ breaking compatibility with versions, but trying to not cause too much churn.
struct ThreadStatsHeader {
  uint8_t Version;
  AppType app_type;
  uint16_t ThreadStatsSize;
  char fex_version[48];
  std::atomic<uint32_t> Head;
  std::atomic<uint32_t> Size;
  uint32_t Pad;
};

struct ThreadStats {
  std::atomic<uint32_t> Next;
  std::atomic<uint32_t> TID;

  // Accumulated time (In unscaled CPU cycles!)
  uint64_t AccumulatedJITTime;
  uint64_t AccumulatedSignalTime;

  // Accumulated event counts
  uint64_t AccumulatedSIGBUSCount;
  uint64_t AccumulatedSMCCount;
  uint64_t AccumulatedFloatFallbackCount;

  uint64_t AccumulatedCacheMissCount;
  uint64_t AccumulatedCacheReadLockTime;
  uint64_t AccumulatedCacheWriteLockTime;

  uint64_t AccumulatedJITCount;
};

// Ensure 16-byte alignment to take advantage of ARM single-copy atomicity.
static_assert(sizeof(ThreadStats) % 16 == 0, "Needs to be 16-byte aligned!");

template<typename T, size_t FlatOffset = 0>
class AccumulationBlock final {
public:
  AccumulationBlock(T* Stat)
    : Begin {Stat ? GetCycleCounter() : 0}
    , Stat {Stat} {}

  ~AccumulationBlock() {
    if (Stat) {
      const auto Duration = GetCycleCounter() - Begin + FlatOffset;
      auto ref = std::atomic_ref<T>(*Stat);
      ref.fetch_add(Duration, std::memory_order_relaxed);
    }
  }

private:
  uint64_t Begin;
  T* Stat;
};
#define UniqueScopeName2(name, line) name##line
#define UniqueScopeName(name, line) UniqueScopeName2(name, line)

#define FEXCORE_PROFILE_ACCUMULATION(ThreadState, Stat)                                                                          \
  FEXCore::SHMStats::AccumulationBlock<decltype(ThreadState->ThreadStats->Stat)> UniqueScopeName(ScopedAccumulation_, __LINE__)( \
    ThreadState->ThreadStats ? &ThreadState->ThreadStats->Stat : nullptr);
#define FEXCORE_PROFILE_INSTANT_INCREMENT(ThreadState, Stat, value) \
  do {                                                              \
    if (ThreadState->ThreadStats) {                                 \
      ThreadState->ThreadStats->Stat += value;                      \
    }                                                               \
  } while (0)

} // namespace FEXCore::SHMStats


================================================
FILE: FEXCore/include/FEXCore/Utils/SignalScopeGuards.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Debug/InternalThreadState.h>

#include <atomic>
#include <cstdint>
#include <mutex>
#include <optional>
#include <signal.h>
#ifndef _WIN32
#include <sys/syscall.h>
#endif
#include <unistd.h>
#include <variant>

namespace FEXCore {
#ifndef _WIN32
// Replacement for std::mutexes to deal with unlocking issues in the face of Linux fork() semantics.
//
// A fork() only clones the parent's calling thread. Other threads are silently dropped, which permanently leaves any mutexes owned by them locked.
// To address this issue, ForkableUniqueMutex and ForkableSharedMutex provide a way to forcefully remove any dangling locks and reset the mutexes to their default state.
class ForkableUniqueMutex final {
public:
  ForkableUniqueMutex()
    : Mutex(PTHREAD_MUTEX_INITIALIZER) {}

  // Move-only type
  ForkableUniqueMutex(const ForkableUniqueMutex&) = delete;
  ForkableUniqueMutex& operator=(const ForkableUniqueMutex&) = delete;
  ForkableUniqueMutex(ForkableUniqueMutex&& rhs) = default;
  ForkableUniqueMutex& operator=(ForkableUniqueMutex&&) = default;

  void lock() {
    const auto Result = pthread_mutex_lock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == 0, "{} failed to lock with {}", __func__, Result);
  }
  void unlock() {
    const auto Result = pthread_mutex_unlock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == 0, "{} failed to unlock with {}", __func__, Result);
  }
  // Initialize the internal pthread object to its default initializer state.
  // Should only ever be used in the child process when a Linux fork() has occured.
  void StealAndDropActiveLocks() {
    Mutex = PTHREAD_MUTEX_INITIALIZER;
  }

  // Asserts that the mutex isn't exclusively owned by the calling thread.
  void check_lock_owned_by_self() {
    const auto Result = pthread_mutex_lock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == EDEADLK, "User of unique lock must have already locked mutex as write!");
  }

private:
  pthread_mutex_t Mutex;
};

class ForkableSharedMutex final {
public:
  ForkableSharedMutex()
    : Mutex(PTHREAD_RWLOCK_INITIALIZER) {}

  // Move-only type
  ForkableSharedMutex(const ForkableSharedMutex&) = delete;
  ForkableSharedMutex& operator=(const ForkableSharedMutex&) = delete;
  ForkableSharedMutex(ForkableSharedMutex&& rhs) = default;
  ForkableSharedMutex& operator=(ForkableSharedMutex&&) = default;

  void lock() {
    const auto Result = pthread_rwlock_wrlock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == 0, "{} failed to lock with {}", __func__, Result);
  }
  void unlock() {
    const auto Result = pthread_rwlock_unlock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == 0, "{} failed to unlock with {}", __func__, Result);
  }
  void lock_shared() {
    const auto Result = pthread_rwlock_rdlock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == 0, "{} failed to lock with {}", __func__, Result);
  }

  void unlock_shared() {
    unlock();
  }

  bool try_lock() {
    const auto Result = pthread_rwlock_trywrlock(&Mutex);
    return Result == 0;
  }

  bool try_lock_shared() {
    const auto Result = pthread_rwlock_tryrdlock(&Mutex);
    return Result == 0;
  }

  // Asserts that the rwlock isn't exclusively owned by the calling thread.
  void check_lock_owned_by_self_as_write() {
    const auto Result = pthread_rwlock_wrlock(&Mutex);
    LOGMAN_THROW_A_FMT(Result == EDEADLK, "User of rwlock must have already locked mutex as write!");
  }

  // Initialize the internal pthread object to its default initializer state.
  // Should only ever be used in the child process when a Linux fork() has occured.
  void StealAndDropActiveLocks() {
    Mutex = PTHREAD_RWLOCK_INITIALIZER;
  }
private:
  pthread_rwlock_t Mutex;
};

// Helper class to manage deferred signal refcounting within a block scope
class DeferredSignalRefCountGuard final {
public:
  explicit DeferredSignalRefCountGuard(FEXCore::Core::InternalThreadState* Thread)
    : Thread(Thread) {
    // Needs to be atomic so that operations can't end up getting reordered around this.
    Thread->CurrentFrame->State.DeferredSignalRefCount.Increment(1);
  }

  // Move-only type
  DeferredSignalRefCountGuard(const DeferredSignalRefCountGuard&) = delete;
  DeferredSignalRefCountGuard& operator=(DeferredSignalRefCountGuard&) = delete;
  DeferredSignalRefCountGuard(DeferredSignalRefCountGuard&& rhs)
    : Thread(rhs.Thread) {
    rhs.Thread = nullptr;
  }

  ~DeferredSignalRefCountGuard() {
    if (Thread) {
#ifdef ARCHITECTURE_x86_64
      // Needs to be atomic so that operations can't end up getting reordered around this.
      // Without this, the refcount and the signal access could get reordered.
      auto Result = Thread->CurrentFrame->State.DeferredSignalRefCount.Decrement(1);

      // X86-64 must do an additional check around the store.
      if ((Result - 1) == 0) {
        // Must happen after the refcount store
        auto InterruptFaultPage = reinterpret_cast<Core::NonAtomicRefCounter<uint64_t>*>(&Thread->InterruptFaultPage);
        InterruptFaultPage->Store(0);
      }
#else
      Thread->CurrentFrame->State.DeferredSignalRefCount.Decrement(1);
      auto InterruptFaultPage = reinterpret_cast<Core::NonAtomicRefCounter<uint64_t>*>(&Thread->InterruptFaultPage);
      InterruptFaultPage->Store(0);
#endif
    }
  }
private:
  FEXCore::Core::InternalThreadState* Thread;
};

// Helper class to mask POSIX signals within a block scope
class ScopedSignalMasker final {
public:
  explicit ScopedSignalMasker(uint64_t Mask)
    : OriginalMask(0) {
    // Mask all signals, storing the original incoming mask
    ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &*OriginalMask, sizeof(*OriginalMask));
  }

  // Move-only type
  ScopedSignalMasker(const ScopedSignalMasker&) = delete;
  ScopedSignalMasker& operator=(ScopedSignalMasker&) = delete;
  ScopedSignalMasker(ScopedSignalMasker&& rhs)
    : OriginalMask(rhs.OriginalMask) {
    rhs.OriginalMask.reset();
  }

  ~ScopedSignalMasker() {
    if (OriginalMask) {
      ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &OriginalMask, nullptr, sizeof(*OriginalMask));
    }
  }
private:
  std::optional<uint64_t> OriginalMask {};
};

/**
 * @brief Produces a wrapper object around a scoped lock of the given mutex
 * while ensuring POSIX signals are masked while the mutex is locked
 *
 * Use this to prevent reentrancy issues of C++ mutexes with certain signal handlers.
 * Common examples of such issues are:
 * - C++ mutexes not unlocking due to a signal handler calling longjmp from within a scope owning the mutex
 * - The signal handler itself using a mutex that would be re-locked if the handler gets invoked
 *   again before unlocking
 *
 * Ownership of the returned object may be moved, but it is NOT SAFE to move across threads.
 */
template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto MaskSignalsAndLockMutex(MutexType& mutex, uint64_t Mask = ~0ULL) {
  // Signals are masked first, and then the lock is acquired
  struct {
    ScopedSignalMasker mask;
    LockType<MutexType> lock;
  } scope_guard {ScopedSignalMasker {Mask}, LockType<MutexType> {mutex}};
  return scope_guard;
}

/**
 * @brief Produces a wrapper object around a scoped lock of the given mutex
 * while bumping the Thread's deferred signal refcount while the mutex is
 * locked.
 */
template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto GuardSignalDeferringSection(MutexType& mutex, FEXCore::Core::InternalThreadState* Thread, uint64_t Mask = ~0ULL) {
  // Refcount is incremented first, and then the lock is acquired.
  struct {
    std::optional<DeferredSignalRefCountGuard> refcount;
    LockType<MutexType> lock;
  } scope_guard = {DeferredSignalRefCountGuard {Thread}, LockType<MutexType> {mutex}};
  return scope_guard;
}

// Like GuardSignalDeferringSection but falls back to masking signals when Thread is nullptr
template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto GuardSignalDeferringSectionWithFallback(MutexType& mutex, FEXCore::Core::InternalThreadState* Thread, uint64_t Mask = ~0ULL) {
  using ExtraGuard = std::variant<ScopedSignalMasker, DeferredSignalRefCountGuard>;

  struct {
    ExtraGuard refcount_or_mask;
    LockType<MutexType> lock;
  } scope_guard {Thread ? ExtraGuard {DeferredSignalRefCountGuard {Thread}} : ExtraGuard {ScopedSignalMasker {Mask}}};
  scope_guard.lock = LockType<MutexType> {mutex};
  return scope_guard;
}

#else

// Dummy implementations as Windows doesn't support forking or async signals.
class ForkableUniqueMutex final : public std::mutex {
public:
  void StealAndDropActiveLocks() {
    LogMan::Msg::AFmt("{} is unsupported on WIN32 builds!", __func__);
  }
};

class ForkableSharedMutex final : public std::shared_mutex {
public:
  void StealAndDropActiveLocks() {
    LogMan::Msg::AFmt("{} is unsupported on WIN32 builds!", __func__);
  }
};

template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto MaskSignalsAndLockMutex(MutexType& mutex, uint64_t Mask = ~0ULL) {
  return LockType<MutexType> {mutex};
}

template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto GuardSignalDeferringSection(MutexType& mutex, FEXCore::Core::InternalThreadState* Thread, uint64_t Mask = ~0ULL) {
  return LockType<MutexType> {mutex};
}

template<template<typename> class LockType = std::unique_lock, typename MutexType>
[[nodiscard]]
static auto GuardSignalDeferringSectionWithFallback(MutexType& mutex, FEXCore::Core::InternalThreadState* Thread, uint64_t Mask = ~0ULL) {
  return LockType<MutexType> {mutex};
}

#endif
} // namespace FEXCore


================================================
FILE: FEXCore/include/FEXCore/Utils/StringUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>

namespace FEXCore::StringUtils {
// Trim the left side of the string of whitespace and new lines
inline fextl::string LeftTrim(fextl::string String, std::string_view TrimTokens = " \t\n\r\f\v") {
  size_t pos = fextl::string::npos;
  if ((pos = String.find_first_not_of(TrimTokens)) != fextl::string::npos) {
    String.erase(0, pos);
  }

  return String;
}

// Trim the right side of the string of whitespace and new lines
inline fextl::string RightTrim(fextl::string String, std::string_view TrimTokens = " \t\n\r\f\v") {
  size_t pos = fextl::string::npos;
  if ((pos = String.find_last_not_of(TrimTokens)) != fextl::string::npos) {
    String.erase(String.begin() + pos + 1, String.end());
  }

  return String;
}

// Trim both the left and right of the string of whitespace and new lines
inline fextl::string Trim(fextl::string String, std::string_view TrimTokens = " \t\n\r\f\v") {
  return RightTrim(LeftTrim(std::move(String), TrimTokens), TrimTokens);
}

inline fextl::string& ReplaceAllInPlace(fextl::string& Str, std::string_view Token, std::string_view New) {
  const auto OriginalTokenSize = Token.size();
  const auto NewTokenSize = New.size();

  size_t TokenPos {};
  auto TokenIter = Str.find(Token, TokenPos);
  while (TokenIter != Str.npos) {
    Str.replace(TokenIter, OriginalTokenSize, New);
    TokenPos += NewTokenSize;
    TokenIter = Str.find(Token, TokenPos);
  }

  return Str;
}

} // namespace FEXCore::StringUtils


================================================
FILE: FEXCore/include/FEXCore/Utils/Telemetry.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/fextl/string.h>

#include <array>
#include <atomic>
#include <stdint.h>

namespace FEXCore::Telemetry {
enum TelemetryType {
  TYPE_HAS_SPLIT_LOCKS,
  TYPE_16BYTE_SPLIT,
  TYPE_USES_EVEX_OPS,
  TYPE_CAS_16BIT_TEAR,
  TYPE_CAS_32BIT_TEAR,
  TYPE_CAS_64BIT_TEAR,
  TYPE_CAS_128BIT_TEAR,
  TYPE_CRASH_MASK,
  // If a 32-bit application is writing a non-zero value to segments.
  TYPE_WRITES_32BIT_SEGMENT_ES,
  TYPE_WRITES_32BIT_SEGMENT_SS,
  TYPE_WRITES_32BIT_SEGMENT_CS,
  TYPE_WRITES_32BIT_SEGMENT_DS,
  // If a 32-bit application is prefix/using a non-zero segment on memory access.
  TYPE_USES_32BIT_SEGMENT_ES,
  TYPE_USES_32BIT_SEGMENT_SS,
  TYPE_USES_32BIT_SEGMENT_CS,
  TYPE_USES_32BIT_SEGMENT_DS,
  TYPE_UNHANDLED_NONCANONICAL_ADDRESS,
  TYPE_LAST,
};

#ifndef FEX_DISABLE_TELEMETRY
using Value = std::atomic<uint64_t>;

FEX_DEFAULT_VISIBILITY extern std::array<Value, FEXCore::Telemetry::TelemetryType::TYPE_LAST> TelemetryValues;
// This returns the internal structure to the telemetry data structures
// One must be careful with placing these in the hot path of code execution
// It can be fairly costly, especially in the static version where it puts barriers in the code
inline Value& GetTelemetryValue(TelemetryType Type) {
  return FEXCore::Telemetry::TelemetryValues[Type];
}

FEX_DEFAULT_VISIBILITY void Initialize();
FEX_DEFAULT_VISIBILITY void Shutdown(const fextl::string& ApplicationName);

// Telemetry object declaration
// Telemetry ALU operations
// These are typically 3-4 instructions depending on what you're doing
#define FEXCORE_TELEMETRY_SET(Type, Value)                                      \
  do {                                                                          \
    auto& Name = FEXCore::Telemetry::TelemetryValues[FEXCore::Telemetry::Type]; \
    Name = Value;                                                               \
  } while (0)
#define FEXCORE_TELEMETRY_OR(Type, Value)                                       \
  do {                                                                          \
    auto& Name = FEXCore::Telemetry::TelemetryValues[FEXCore::Telemetry::Type]; \
    Name |= Value;                                                              \
  } while (0)
#define FEXCORE_TELEMETRY_INC(Type, Value)                                      \
  do {                                                                          \
    auto& Name = FEXCore::Telemetry::TelemetryValues[FEXCore::Telemetry::Type]; \
    Name++;                                                                     \
  } while (0)

#else
static inline void Initialize() {}
static inline void Shutdown(const fextl::string& ApplicationName) {}

#define FEXCORE_TELEMETRY_INIT(Name, Type)
#define FEXCORE_TELEMETRY(Name, Value) \
  do {                                 \
  } while (0)
#define FEXCORE_TELEMETRY_SET(Name, Value) \
  do {                                     \
  } while (0)
#define FEXCORE_TELEMETRY_OR(Name, Value) \
  do {                                    \
  } while (0)
#define FEXCORE_TELEMETRY_INC(Name) \
  do {                              \
  } while (0)
#endif
} // namespace FEXCore::Telemetry


================================================
FILE: FEXCore/include/FEXCore/Utils/ThreadPoolAllocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/list.h>

#include <atomic>
#include <chrono>
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <optional>
#include <type_traits>

namespace FEXCore::Utils {
/**
 * @brief An intrusive thread pool allocator
 *
 * Requires coordination between the allocator and its clients to efficiently share memory allocations between threads.
 *
 * The `Client` in this case referring to the location in code allocating a `MemoryBuffer` from the allocator.
 *   - The client must `Claim` a buffer to allocate it
 *   - In claiming a buffer, the allocator is passed a `BufferOwnedFlag` that is updated by both the allocator and client.
 *   - When the client is done with the buffer it must `Disown` or `Unclaim` the buffer.
 *     - `Disown` the buffer when it is expected to be used again soon.
 *       - This is relatively cheap.
 *     - `Unclaim` when the buffer won't be used again for an extended period.
 *       - This is expensive and requires a mutex shared between threads
 *     - `PoolBufferWithTimedRetirement` helper class provided to help with this.
 *
 * Once the client has disowned a buffer then the allocator is free to reclaim the buffer when another thread is trying to `Claim` a new buffer.
 * The buffer getting claimed from a disowned client must have had its last use greater than the defined `DURATION` before it has a chance to get
 * reclaimed by the Allocator.
 *
 * During buffer reclaiming is also when unclaimed buffers get freed. This means active threads are able to clean up idle thread's unused memory.
 */
class IntrusivePooledAllocator {
public:
  struct MemoryBuffer;
  /**
   * @brief Container for tracking the buffers
   *
   * We're using fextl::list explicitly because its iterators aren't invalidated when the list is adjusted.
   * if we had list types that we can atomically erase and append elements then unclaiming could be made cheaper.
   */
  using ContainerType = fextl::list<MemoryBuffer*>;
  /**
   * @brief steady_clock to ensure long running applications don't hit any timeskip problems.
   */
  using ClockType = std::chrono::steady_clock;
  /**
   * @brief Atomic flag state for letting the client know if it owns the buffer
   */
  enum class ClientFlags : uint32_t {
    FLAG_FREE = 0,
    FLAG_OWNED = 1,
    FLAG_DISOWNED = 3,
  };

  using BufferOwnedFlag = std::atomic<ClientFlags>;

  struct MemoryBuffer : public FEXCore::Allocator::FEXAllocOperators {
    MemoryBuffer(void* Ptr, size_t Size, std::chrono::time_point<ClockType> LastUsed)
      : Ptr {Ptr}
      , Size {Size}
      , LastUsed {LastUsed} {}

    void* Ptr;
    size_t Size;
    std::atomic<std::chrono::time_point<ClockType>> LastUsed;
    BufferOwnedFlag* CurrentClientOwnedFlag {};
  };
  // Ensure that the atomic objects of MemoryBuffer are lock free
  static_assert(decltype(MemoryBuffer::LastUsed) {}.is_always_lock_free, "Oops, needs to be lock free");
  static_assert(std::remove_pointer<decltype(MemoryBuffer::CurrentClientOwnedFlag)>::type {}.is_always_lock_free, "Oops, needs to be lock "
                                                                                                                  "free");

  /**
   * @brief Lets the client easily check if they own the buffer or not
   *
   * @param CurrentClientFlag Client owned flag
   *
   * @return Is the client buffer owned at the point of checking
   */
  static bool IsClientBufferOwned(BufferOwnedFlag& CurrentClientFlag) {
    return CurrentClientFlag.load() == ClientFlags::FLAG_OWNED;
  }

  /**
   * @brief Lets the client easily check if the buffer was freed
   *
   * @param CurrentClientFlag Client owned flag
   *
   * @return Is the client buffer owned at the point of checking
   */
  static bool IsClientBufferFree(BufferOwnedFlag& CurrentClientFlag) {
    return CurrentClientFlag.load() == ClientFlags::FLAG_FREE;
  }

  /**
   * @brief Allocates and claims a buffer that is tracked from the thread pool
   *
   * @param Size
   * @param CurrentClientFlag
   *
   * Once a buffer is claimed, the pool allocator can not reclaim this buffer until it is "Disowned"
   *
   * @return iterator to the internal tracking container
   */
  ContainerType::iterator ClaimBuffer(size_t Size, BufferOwnedFlag* CurrentClientFlag) {
    std::unique_lock lk {AllocationMutex};
    auto Buffer = ClaimBufferImpl(Size);
    (*Buffer)->CurrentClientOwnedFlag = CurrentClientFlag;
    CurrentClientFlag->store(ClientFlags::FLAG_OWNED);
    return Buffer;
  }

  /**
   * @brief Immediately release the buffer back to the allocator, given it has not been reclaimed
   *
   * @param Buffer - The iterator that was previously given with ClaimBuffer
   */
  void UnclaimBuffer(const ContainerType::iterator& Buffer, BufferOwnedFlag* ClientFlag) {
    // Transition the buffer to free, unclaiming if it wasn't free prior.
    if (ClientFlag->exchange(ClientFlags::FLAG_FREE) != ClientFlags::FLAG_FREE) {
      std::unique_lock lk {AllocationMutex};
      UnclaimBufferImpl(Buffer);
    }
  }

  /**
   * @brief Set internal flags of buffer claiming that the buffer is relinquished ownership
   *
   * @param Buffer - The iterator that was previously given with ClaimBuffer
   *
   * Once the buffer is disowned, the allocator can take back ownership of the buffer at any time
   *
   * Use ReownOrClaimBuffer if you want to attempt reusing a buffer being held on to.
   */
  void DisownBuffer(ContainerType::iterator Buffer) {
    // Client still owns the buffer but isn't using it
    // Allows us to claim it back if necessary
    (*Buffer)->LastUsed.store(ClockType::now(), std::memory_order_relaxed);
    (*Buffer)->CurrentClientOwnedFlag->store(ClientFlags::FLAG_DISOWNED);
  }

  /**
   * @brief Try to reown a buffer that was previously disowned
   *
   * @param Buffer - The buffer we previously disowned
   * @param Size - The size of the buffer
   * @param CurrentClientFlag - The client tracked flag
   *
   * Once DisownBuffer has been called, it is unsafe to use the buffer until it has been reowned
   * Always reown a buffer before use!
   *
   * @return The original buffer passed in on successful reown, otherwise std::nullopt
   */
  std::optional<ContainerType::iterator> TryToReownBuffer(const ContainerType::iterator& Buffer, size_t Size, BufferOwnedFlag* CurrentClientFlag) {
    ClientFlags Expected = ClientFlags::FLAG_DISOWNED;
    if (!CurrentClientFlag->compare_exchange_strong(Expected, ClientFlags::FLAG_OWNED)) {
      return std::nullopt;
    }

    // If we managed to change the flag from DISOWNED to OWNED then we have successfully reclaimed
    // Finish setting up state
    (*Buffer)->LastUsed.store(ClockType::now(), std::memory_order_relaxed);
    return Buffer;
  }

  /**
   * @brief Try to reown a buffer that was previously disowned, failing that, claim a new buffer
   *
   * @param Buffer - The buffer we previously disowned
   * @param Size - The size of the buffer
   * @param CurrentClientFlag - The client tracked flag
   *
   * Once DisownBuffer has been called, it is unsafe to use the buffer until it has been reowned
   * Always reown a buffer before use!
   *
   * @return The original buffer passed in on successful reown, otherwise a new buffer
   */
  ContainerType::iterator ReownOrClaimBuffer(const ContainerType::iterator& Buffer, size_t Size, BufferOwnedFlag* CurrentClientFlag) {
    auto Reowned = TryToReownBuffer(Buffer, Size, CurrentClientFlag);
    if (Reowned) {
      return Reowned.value();
    }

    // Couldn't reclaim, just get a new buffer
    return ClaimBuffer(Size, CurrentClientFlag);
  }

  virtual ~IntrusivePooledAllocator() = default;

  // XXX: Is this a good amount?
  /**
   * @brief Duration before the allocator will reclaim buffers that the client claimed AND disowned
   *
   * Pool allocator will not attempt to reclaim client owned buffers, would be unsafe to do so.
   */
  constexpr static std::chrono::duration DURATION {std::chrono::seconds(5)};

protected:
  IntrusivePooledAllocator() = default;

  ContainerType::iterator ClaimBufferImpl(size_t Size) {
    auto BuffersEnd = UnclaimedBuffers.end();
    ContainerType::iterator BestFit = BuffersEnd;
    ContainerType::iterator UnsizedFit = BuffersEnd;

    auto Now = ClockType::now();
    // Move any expired ClaimedBuffers to UnclaimedBuffers
    {
      // Spin the non-owned buffers and see if we can take ones past the period
      for (auto it = ClaimedBuffers.begin(); it != ClaimedBuffers.end();) {
        // 1) Can't take anything that the client has still claimed
        // 2) Needs to still be last used beyond our time threshold
        // 3) Only take the oldest buffer
        if ((*it)->CurrentClientOwnedFlag->load() == ClientFlags::FLAG_DISOWNED) {
          auto UsedTime = (*it)->LastUsed.load(std::memory_order_relaxed);
          if ((Now - UsedTime) >= DURATION) {
            ClientFlags Expected = ClientFlags::FLAG_DISOWNED;
            if ((*it)->CurrentClientOwnedFlag->compare_exchange_strong(Expected, ClientFlags::FLAG_FREE)) {
              // We managed to take away ownership
              // Put it back in the regular pool and come back to it
              (*it)->CurrentClientOwnedFlag = nullptr;
              UnclaimedBuffers.emplace_back(*it);
              it = ClaimedBuffers.erase(it);
              continue;
            }
          }
        }

        ++it;
      }
    }

    // Find an unclaimed buffer that is >= Size and Free up to one unclaimed buffer that has expired
    {
      // Walk all the allocations and find a buffer that fits
      for (auto it = UnclaimedBuffers.begin(); it != BuffersEnd; ++it) {
        if ((*it)->Size == Size) {
          BestFit = it;
          break;
        }

        if ((*it)->Size > Size) {
          UnsizedFit = it;
        }
      }

      // If we didn't have an exact fit then use an unsized fit
      if (BestFit == BuffersEnd) {
        BestFit = UnsizedFit;
      }

      // Free up to one unclaimed buffer that has expired
      {
        std::chrono::time_point<ClockType> LRUTime {};
        ContainerType::iterator LastUsed = BuffersEnd;

        // Walk all the allocations and find a buffer to erase
        for (auto it = UnclaimedBuffers.begin(); it != UnclaimedBuffers.end(); ++it) {
          // Ensure that the LRU value is past our duration threshold and isn't the one we are claiming
          // Also only select a single memory region
          if (it != BestFit) {
            auto UsedTime = (*it)->LastUsed.load(std::memory_order_relaxed);
            if ((Now - UsedTime) >= DURATION && UsedTime > LRUTime) {
              LastUsed = it;
              LRUTime = UsedTime;
            }
          }
        }

        // If we found a buffer then free it
        if (LastUsed != BuffersEnd) {
          Free((*LastUsed)->Ptr, (*LastUsed)->Size);
          delete *LastUsed;
          UnclaimedBuffers.erase(LastUsed);
        }
      }

      if (BestFit != UnclaimedBuffers.end()) {
        MemoryBuffer* Buffer = *BestFit;
        UnclaimedBuffers.erase(BestFit);
        return ClaimedBuffers.emplace(ClaimedBuffers.end(), Buffer);
      }
    }

    // Need to allocate a new buffer, couldn't fit
    auto Data = Alloc(Size);
    return ClaimedBuffers.emplace(ClaimedBuffers.end(), new MemoryBuffer {Data, Size, ClockType::now()});
  }

  void UnclaimBufferImpl(ContainerType::iterator Buffer) {
    (*Buffer)->CurrentClientOwnedFlag = nullptr;
    UnclaimedBuffers.emplace_back(*Buffer);
    ClaimedBuffers.erase(Buffer);
  }

  void FreeAllBuffers() {
    for (auto it : UnclaimedBuffers) {
      Free(it->Ptr, it->Size);
      delete it;
    }

    for (auto it : ClaimedBuffers) {
      Free(it->Ptr, it->Size);
      delete it;
    }

    UnclaimedBuffers.clear();
    ClaimedBuffers.clear();
  }

  /**
   * @brief List of buffers that this pool allocator itself owns
   */
  ContainerType UnclaimedBuffers;

  /**
   * @brief List of buffers that are client claimed
   */
  ContainerType ClaimedBuffers;

  /**
   * @brief Mutex to ensure thread safety while shuffling buffers around and allocating
   */
  std::mutex AllocationMutex;

private:
  /**
   * @brief Allocates the buffer
   *
   * @param Size of the object to allocate
   *
   * @return pointer
   */
  virtual void* Alloc(size_t Size) = 0;
  /**
   * @brief Frees the buffer
   *
   * @param Ptr buffer pointer
   * @param Size buffer size
   */
  virtual void Free(void* Ptr, size_t Size) = 0;
};

/**
 * @brief Thread pool allocator that allocates and frees objects using malloc
 */
class PooledAllocatorMalloc final : public IntrusivePooledAllocator {
public:
  PooledAllocatorMalloc() = default;

  virtual ~PooledAllocatorMalloc() {
    FreeAllBuffers();
  }

private:
  void* Alloc(size_t Size) override {
    return FEXCore::Allocator::malloc(Size);
  }

  void Free(void* Ptr, size_t Size) override {
    FEXCore::Allocator::free(Ptr);
  }
};

/**
 * @brief Thread pool allocator that allocates and frees objects that uses mmap
 */
class PooledAllocatorVirtual final : public IntrusivePooledAllocator {
public:
  PooledAllocatorVirtual() = default;
  PooledAllocatorVirtual(const char* Name)
    : Name {Name} {}

  virtual ~PooledAllocatorVirtual() {
    FreeAllBuffers();
  }

private:
  void* Alloc(size_t Size) override {
    auto Result = FEXCore::Allocator::VirtualAlloc(Size);
    if (Name) {
      FEXCore::Allocator::VirtualName(Name, Result, Size);
    }
    return Result;
  }

  void Free(void* Ptr, size_t Size) override {
    FEXCore::Allocator::VirtualFree(Ptr, Size);
  }

  const char* Name {};
};

/**
 * @brief Thread pool allocator that allocates and frees objects that uses mmap, with a guard page.
 *
 * The last page of the size provided has the guard.
 */
class PooledAllocatorVirtualWithGuard final : public IntrusivePooledAllocator {
public:
  PooledAllocatorVirtualWithGuard() = default;
  PooledAllocatorVirtualWithGuard(const char* Name)
    : Name {Name} {}

  virtual ~PooledAllocatorVirtualWithGuard() {
    FreeAllBuffers();
  }

private:
  void* Alloc(size_t Size) override {
    auto Ptr = FEXCore::Allocator::VirtualAlloc(Size);
    uintptr_t LastPageAddr = AlignDown(reinterpret_cast<uintptr_t>(Ptr) + Size - 1, FEXCore::Utils::FEX_PAGE_SIZE);
    if (!FEXCore::Allocator::VirtualProtect(reinterpret_cast<void*>(LastPageAddr), FEXCore::Utils::FEX_PAGE_SIZE,
                                            FEXCore::Allocator::ProtectOptions::None)) {
      LogMan::Msg::EFmt("Failed to mprotect last page of code buffer.");
    }
    if (Name) {
      FEXCore::Allocator::VirtualName(Name, Ptr, Size);
    }
    return Ptr;
  }

  void Free(void* Ptr, size_t Size) override {
    FEXCore::Allocator::VirtualFree(Ptr, Size);
  }

  const char* Name {};
};

/**
 * @brief Wrapper around the pool allocator for delayed pool reclaiming
 *
 * This is expected to be used in high frequency buffer temporary usage.
 * Instead of quickly unclaiming and reclaiming the buffer while the the code is hot,
 * This instead will do the cheap operation of disowning the buffer until the code path cools down enough.
 * Once the code path stops disowning the codepath more times than `PeriodFrequency` during `PeriodMS` then
 * it will immediately unclaim.
 *
 * Implications:
 *   - The object will always be claimed for at *least* `PeriodFrequency`
 *   - The object will still *always* be disowned after each temporary use
 *     - This allows the pool allocator to reclaim a buffer from a sleeping thread
 *
 * Performance characteristics:
 *  - Disowning is cheap.
 *    - Last-used timestamp update
 *    - atomic_bool clear to signify it is disowned
 *
 *  - Reowning is relatively cheap (When buffer is still owned).
 *    - atomic_bool load to check if the object is still owned
 *      - atomic<uint32_t> CAS to change the object to `OWNED` state
 *        - Resolves a race condition where the `Allocator` can be in the process of reclaiming the buffer from the client
 *      - Last-used timestamp update
 *      - atomic_bool<relaxed> set to signify owned
 *      - atomic<uint32_t> set to change object to `OWNED` state
 *    - When object isn't owned, then allocate a new buffer from the pool
 *
 *  - Unclaiming is fairly costly
 *    - Requires owning a mutex, shared between all threads using the `Allocator`
 *    - Updating two fextl::list containers to give the ownership back to the `Allocator`
 *
 *  - Claiming is very costly
 *    - Requires owning a mutex, shared between all threads using the `Allocator`
 *    - Scans two fextl::list containers to find the best fit buffer
 *    - Or allocates another buffer when that fails
 *    - Frees stale buffers opportunistically
 */
template<typename Type, size_t PeriodMS, size_t PeriodFrequency>
class PoolBufferWithTimedRetirement final {
  // If the delayed object reclaimer is more than the thread pool allocator's duration then the pool allocator would always need to reclaim
  // the buffer rather than giving it back.
  static_assert(std::chrono::duration(std::chrono::milliseconds(PeriodMS)) <= IntrusivePooledAllocator::DURATION, "DeplayedObjectReclaimer "
                                                                                                                  "period needs to be "
                                                                                                                  "lower or equal to the "
                                                                                                                  "pool allocator "
                                                                                                                  "duration");

public:
  PoolBufferWithTimedRetirement(IntrusivePooledAllocator& Allocator, size_t Size)
    : ThreadAllocator {Allocator}
    , Size {Size} {}

  ~PoolBufferWithTimedRetirement() {
    UnclaimBuffer();
  }

  struct AllocationInfo {
    Type Ptr;
    size_t Size;
  };

  /**
   * @brief Return the owned buffer or allocate another one from the `Allocator`
   *
   * The buffer is guaranteed to have at least `Size` bytes of data.
   * The initial data in the buffer is undefined, even when the buffer is just reowned.
   *
   * @param NewSize Optional new size for managed data
   *
   * @return A usable pointer of type `Type` and the size of the backing store.
   */
  AllocationInfo ReownOrClaimBufferWithSize(std::optional<size_t> NewSize = std::nullopt) {
    // Check if we can cheaply re-own a previous buffer
    std::optional Buffer =
      IntrusivePooledAllocator::IsClientBufferOwned(ClientOwnedFlag) ? Info : ThreadAllocator.TryToReownBuffer(Info, Size, &ClientOwnedFlag);

    // Ensure the now owned buffer has enough space. If not, unclaim it and proceed to claim a new one
    if (NewSize && Buffer && (**Buffer)->Size < NewSize.value()) {
      UnclaimBuffer();
      Buffer.reset();
    }

    // Claim a new buffer if needed
    Size = NewSize.value_or(Size);
    if (!Buffer) {
      Buffer = ThreadAllocator.ClaimBuffer(Size, &ClientOwnedFlag);
    }

    Info = *Buffer;

    // Putting a memset here is very handy for using thread sanitizer to find buffer usage races
    // Leaving this here for future excavation that will definitely occur here
    // memset((*Info)->Ptr, 0, Size);

    return {
      .Ptr = reinterpret_cast<Type>((*Info)->Ptr),
      .Size = (*Info)->Size,
    };
  }

  Type ReownOrClaimBuffer(std::optional<size_t> NewSize = std::nullopt) {
    return ReownOrClaimBufferWithSize(NewSize).Ptr;
  }

  /**
   * @brief Disown or unclaim the buffer, letting the `Allocator` know it can reclaim the buffer
   *
   * Once the `ReownOrClaimBuffer` function has been used, this must be called to let the `Allocator` know it is safe to reclaim a buffer.
   *
   * This will first Disown the buffer; which is cheap.
   *
   * If the frequency of use is below the threshold then immediately `UnclaimBuffer` so that `Allocator` can reuse it.
   */
  void DelayedDisownBuffer() {
    LOGMAN_THROW_A_FMT(FEXCore::Utils::IntrusivePooledAllocator::IsClientBufferOwned(ClientOwnedFlag), "Tried to disown buffer when client "
                                                                                                       "doesn't own it");

    // Always disown but not always unclaim
    // Disowning = cheap, unclaiming = expensive
    ThreadAllocator.DisownBuffer(Info);

    auto Now = std::chrono::steady_clock::now();
    if ((Now - Previous) >= std::chrono::duration(std::chrono::milliseconds(PeriodMS))) {
      if (CountPer < PeriodFrequency) {
        // Only unclaim the buffer if our buffer usage isn't excessive in the last period
        UnclaimBuffer();
      }
      CountPer = 0;
      Previous = Now;
    }
    ++CountPer;
  }

  /**
   * @brief Completely unclaim the buffer
   *
   * Useful if it is known that the buffer won't be used again for a period and can be given back
   * to the `Allocator` immediately.
   *
   * Necessary if an object is going to be freed from memory, so the `Allocator` can't update the `ClientOwnedFlag`
   *
   * Only use in that edge case! Otherwise use `DelayedDisownBuffer`
   */
  void UnclaimBuffer() {
    ThreadAllocator.UnclaimBuffer(Info, &ClientOwnedFlag);
  }

private:
  // Thread allocator
  FEXCore::Utils::IntrusivePooledAllocator& ThreadAllocator;

  // Buffer size
  size_t Size;

  // Buffer ownership tracking
  FEXCore::Utils::IntrusivePooledAllocator::ContainerType::iterator Info {};
  FEXCore::Utils::IntrusivePooledAllocator::BufferOwnedFlag ClientOwnedFlag {FEXCore::Utils::IntrusivePooledAllocator::ClientFlags::FLAG_FREE};

  // Threshold counting
  uint64_t CountPer {};
  std::chrono::steady_clock::time_point Previous;
};
} // namespace FEXCore::Utils


================================================
FILE: FEXCore/include/FEXCore/Utils/Threads.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/memory.h>

namespace FEXCore::Threads {
using ThreadFunc = void* (*)(void* user_ptr);

class Thread;
using CreateThreadFunc = fextl::unique_ptr<Thread> (*)(ThreadFunc Func, void* Arg);
using CleanupAfterForkFunc = void (*)();

struct Pointers {
  CreateThreadFunc CreateThread;
  CleanupAfterForkFunc CleanupAfterFork;
};

// API
class Thread {
public:
  virtual ~Thread() = default;
  virtual bool joinable() = 0;
  virtual bool join(void** ret) = 0;
  virtual bool detach() = 0;
  virtual bool IsSelf() = 0;

  /**
   * @name Calls provided API functions
   * @{ */

  static fextl::unique_ptr<Thread> Create(ThreadFunc Func, void* Arg);

  static void CleanupAfterFork();

  /**  @} */

  // Set API functions
  static void SetInternalPointers(const Pointers& _Ptrs);
};
} // namespace FEXCore::Threads


================================================
FILE: FEXCore/include/FEXCore/Utils/TypeDefines.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>

namespace FEXCore::Utils {
// FEX assumes an operating page size of 4096
// To work around build systems that build on a 16k/64k page size, define our page size here
// Don't use the system provided PAGE_SIZE define because of this.
constexpr size_t FEX_PAGE_SIZE = 4096;
constexpr size_t FEX_PAGE_SHIFT = 12;
constexpr size_t FEX_PAGE_MASK = ~(FEX_PAGE_SIZE - 1);
} // namespace FEXCore::Utils


================================================
FILE: FEXCore/include/FEXCore/fextl/allocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Utils/AllocatorHooks.h>

#include <memory>

namespace fextl {
/**
 * @brief C++ allocator class interface in to FEXCore::Allocator for memory allocations.
 */
template<typename T>
class FEXAlloc : public std::allocator<T> {
public:
  using value_type = T;
  using propagate_on_container_move_assignment = std::true_type;

  FEXAlloc() noexcept {}
  template<class U>
  FEXAlloc(const FEXAlloc<U>&) noexcept {}

  inline value_type* allocate(std::size_t n) {
    return reinterpret_cast<value_type*>(::FEXCore::Allocator::aligned_alloc(alignof(value_type), n * sizeof(value_type)));
  }

  inline void deallocate(value_type* p, size_t) noexcept {
    ::FEXCore::Allocator::aligned_free(p);
  }

  inline bool operator==(const FEXAlloc&) const {
    return true;
  }
};
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/deque.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <deque>

namespace fextl {
template<class T, class Allocator = fextl::FEXAlloc<T>>
using deque = std::deque<T, Allocator>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/fmt.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/Utils/File.h>

#include <fmt/format.h>
#include <fmt/ranges.h>
#include <unistd.h>

namespace fextl::fmt {
template<typename T, size_t SIZE = ::fmt::inline_buffer_size, typename Allocator = fextl::FEXAlloc<T>>
using basic_memory_buffer = ::fmt::basic_memory_buffer<T, SIZE, Allocator>;
using memory_buffer = fextl::fmt::basic_memory_buffer<char>;

template<class OutputIt, class... Args>
OutputIt format_to(OutputIt out, ::fmt::format_string<Args...> fmt, Args&&... args) {
  return ::fmt::vformat_to(out, fmt.str, ::fmt::make_format_args(args...));
}

template<typename Char, size_t SIZE>
FMT_NODISCARD auto to_string(const fextl::fmt::basic_memory_buffer<Char, SIZE>& buf) -> fextl::basic_string<Char> {
  auto size = buf.size();
  ::fmt::detail::assume(size < std::basic_string<Char>().max_size());
  return fextl::basic_string<Char>(buf.data(), size);
}

FMT_INLINE fextl::string vformat(::fmt::string_view fmt, ::fmt::format_args args) {
  // Don't optimize the "{}" case to keep the binary size small and because it
  // can be better optimized in fmt::format anyway.
  auto buffer = memory_buffer();
  ::fmt::detail::vformat_to(buffer, fmt, args);
  return fextl::fmt::to_string(buffer);
}

template<typename... T>
FMT_NODISCARD FMT_INLINE auto format(::fmt::format_string<T...> fmt, T&&... args) -> fextl::string {
  return fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
}

#ifndef _WIN32
template<typename... T>
FMT_INLINE auto print(::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  write(STDOUT_FILENO, String.c_str(), String.size());
}

template<typename... T>
FMT_INLINE auto print(int FD, ::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  write(FD, String.c_str(), String.size());
}
#else
template<typename... T>
FMT_INLINE auto print(::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  auto f = FEXCore::File::File::GetStdOUT();
  f.Write(String.c_str(), String.size());
}

template<typename... T>
FMT_INLINE auto print(HANDLE File, ::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  WriteFile(File, String.c_str(), String.size(), nullptr, nullptr);
}
#endif
template<typename... T>
FMT_INLINE auto print(FEXCore::File::File& f, ::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  f.Write(String.c_str(), String.size());
}

template<typename... T>
FMT_INLINE auto print(std::FILE* f, ::fmt::format_string<T...> fmt, T&&... args) -> void {
  auto String = fextl::fmt::vformat(fmt, ::fmt::make_format_args(args...));
  write(fileno(f), String.c_str(), String.size());
}
} // namespace fextl::fmt


================================================
FILE: FEXCore/include/FEXCore/fextl/forward_list.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <forward_list>

namespace fextl {
template<class T, class Allocator = fextl::FEXAlloc<T>>
using forward_list = std::forward_list<T, Allocator>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/functional.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/AllocatorHooks.h>

#include <functional>
#include <type_traits>
#include <utility>

namespace fextl {

/**
 * Equivalent to std::move_only_function but uses FEXCore::Allocator routines
 * for non-function pointers.
 */
template<typename F, void* (*Alloc)(size_t, size_t) = ::FEXCore::Allocator::aligned_alloc, void (*Dealloc)(void*) = ::FEXCore::Allocator::aligned_free>
class move_only_function;

template<typename R, typename... Args, void* (*Alloc)(size_t, size_t), void (*Dealloc)(void*)>
class move_only_function<R(Args...), Alloc, Dealloc> {
public:
  template<typename F>
  requires std::is_invocable_r_v<R, F, Args...>
  move_only_function(F&& f) noexcept(std::is_nothrow_move_constructible_v<F>) {
    if constexpr (std::is_convertible_v<F, R (*)(Args...)>) {
      // Argument is a function pointer, a captureless lambda, or a stateless function object.
      // std::function can store these without allocation
      internal = std::move(f);
    } else if constexpr (std::is_nothrow_constructible_v<std::function<R(Args...)>, F>) {
      // If construction is guaranteed not to throw an exception, this implies
      // the std::function implementation won't allocate memory!
      internal = std::move(f);
    } else {
      // Other arguments require allocation, which is a problem since
      // std::function doesn't allow allocator customization. Implementations
      // are generally able to avoid allocation for lambdas with a single
      // pointer capture however. We can exploit this special case by wrapping
      // the actual argument in a lambda that points an external storage
      // location.

      static_assert(!std::is_pointer_v<F>, "Pointer types must manually be dereferenced");

      // First, relocate argument to a location returned from FEX's allocators
      using Fnoref = std::remove_reference_t<F>;
      storage = Alloc(alignof(Fnoref), sizeof(Fnoref));
      auto moved_lambda = new (storage) Fnoref {std::move(f)};

      // Second, wrap the relocated argument in a single-capture lambda
      auto wrapped_lambda = [moved_lambda](Args... args) {
        return (*moved_lambda)(std::forward<Args>(args)...);
      };

      // Third, assign the result to std::function, ensuring it's indeed
      // allocation-free by checking for nothrow-constructibility
      static_assert(noexcept(internal = std::move(wrapped_lambda)), "This implementation of std::function "
                                                                    "does not support implementing "
                                                                    "fextl::move_only_function");
      internal = std::move(wrapped_lambda);

      // Finally, if a destructor must be called, generate a pointer to its destructor
      if constexpr (!std::is_trivially_destructible_v<Fnoref>) {
        internal_destructor = [](move_only_function* self) {
          reinterpret_cast<Fnoref*>(self->storage)->~Fnoref();
        };
      }
    }
  }

  move_only_function() noexcept {}
  move_only_function(std::nullptr_t) noexcept {}
  move_only_function(const move_only_function&) = delete;
  move_only_function(move_only_function&& other) noexcept {
    *this = std::move(other);
  }

  move_only_function& operator=(move_only_function&& other) noexcept {
    if (!other && internal_destructor) {
      this->~move_only_function();
    }
    internal = std::exchange(other.internal, nullptr);
    internal_destructor = std::exchange(other.internal_destructor, nullptr);
    storage = std::exchange(other.storage, nullptr);
    return *this;
  }

  ~move_only_function() {
    if (internal_destructor) {
      internal_destructor(this);
    }
    Dealloc(storage);
  }

  R operator()(Args... args) const {
    return internal(std::forward<Args>(args)...);
  }

  explicit operator bool() const noexcept {
    return (bool)internal;
  }

private:
  std::function<R(Args...)> internal;
  void (*internal_destructor)(move_only_function*) = nullptr;
  void* storage = nullptr;
};
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/list.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <list>

namespace fextl {
template<class T, class Allocator = fextl::FEXAlloc<T>>
using list = std::list<T, Allocator>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/map.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <map>

namespace fextl {
template<class Key, class T, class Compare = std::less<Key>, class Allocator = fextl::FEXAlloc<std::pair<const Key, T>>>
using map = std::map<Key, T, Compare, Allocator>;

template<class Key, class T, class Compare = std::less<Key>, class Allocator = fextl::FEXAlloc<std::pair<const Key, T>>>
using multimap = std::multimap<Key, T, Compare, Allocator>;
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/memory.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <memory>
#include <new>

namespace fextl {
template<class T>
struct default_delete : public std::default_delete<T> {
  void operator()(T* ptr) const {
    if (ptr) {
      std::destroy_at(ptr);
      FEXCore::Allocator::aligned_free(ptr);
    }
  }

  template<typename U>
  requires (std::is_base_of_v<U, T>)
  operator fextl::default_delete<U>() {
    return fextl::default_delete<U>();
  }
};

template<class T, class Deleter = fextl::default_delete<T>>
using unique_ptr = std::unique_ptr<T, Deleter>;

template<class T>
using shared_ptr = std::shared_ptr<T>;

template<class T, class... Args>
requires (!std::is_array_v<T>)
fextl::unique_ptr<T> make_unique(Args&&... args) {
  auto ptr = FEXCore::Allocator::aligned_alloc(alignof(T), sizeof(T));
  auto Result = ::new (ptr) T(std::forward<Args>(args)...);
  return fextl::unique_ptr<T>(Result);
}

template<class T, class... Args>
requires (!std::is_array_v<T>)
fextl::shared_ptr<T> make_shared(Args&&... args) {
  return std::allocate_shared<T>(fextl::FEXAlloc<T> {}, std::forward<Args>(args)...);
}
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/memory_resource.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/allocator.h>
#include <FEXCore/fextl/list.h>

#include <memory_resource>
#include <fmt/format.h>

namespace fextl {
namespace pmr {
  class default_resource : public std::pmr::memory_resource {
  private:
    void* do_allocate(std::size_t bytes, std::size_t alignment) override {
      return FEXCore::Allocator::memalign(alignment, bytes);
    }

    void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
      return FEXCore::Allocator::aligned_free(p);
    }

    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
      return this == &other;
    }
  };

  FEX_DEFAULT_VISIBILITY std::pmr::memory_resource* get_default_resource();

  /**
   * @brief A `std::pmr::monotonic_buffer_resource` compatible class.
   *
   * Allocates internal buffers on page boundaries and names them for buffer tracking.
   */
  class named_monotonic_page_buffer_resource final : public std::pmr::memory_resource {
  public:
    explicit named_monotonic_page_buffer_resource(const char* Name)
      : Name {Name} {}

    void release() noexcept {
      for (auto& Iter : Buffers) {
        FEXCore::Allocator::VirtualFree(Iter.Buffer, Iter.BufferSize);
      }
      Buffers.clear();

      CurrentBufferRemaining = 0;
      CurrentAllocationSize = FEXCore::Utils::FEX_PAGE_SIZE;
    }

  protected:
    void* do_allocate(std::size_t bytes, std::size_t alignment) override {
      LOGMAN_THROW_A_FMT(bytes != 0, "Nope");
      LOGMAN_THROW_A_FMT(alignment <= FEXCore::Utils::FEX_PAGE_SIZE, "Nope");

      // Wow, an actual use case of std::align in the wild.
      void* NewPointer = std::align(alignment, bytes, CurrentBuffer, CurrentBufferRemaining);
      if (!NewPointer) [[unlikely]] {
        AllocateNewBuffer(bytes, alignment);
        NewPointer = CurrentBuffer;
      }

      CurrentBuffer = static_cast<char*>(CurrentBuffer) + bytes;
      CurrentBufferRemaining -= bytes;

      return NewPointer;
    }

    void do_deallocate(void*, std::size_t, std::size_t) override {
      // Explicit no-op.
    }

    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
      return this == &other;
    }

  private:
    const char* Name;

    // Allocate a new buffer that can at least fit the passed in bytes with alignment.
    void AllocateNewBuffer(std::size_t bytes, std::size_t) {
      bytes = FEXCore::AlignUp(bytes, CurrentAllocationSize);
      void* Ptr = FEXCore::Allocator::VirtualAlloc(bytes);
      if (Name) {
        FEXCore::Allocator::VirtualName(Name, Ptr, bytes);
      }

      Buffers.emplace_back(BufferData {
        .Buffer = Ptr,
        .BufferSize = bytes,
      });

      CurrentBuffer = Ptr;
      CurrentBufferRemaining = bytes;

      // Multiply the allocation size by 1.5 for the next allocation
      // Avoid double math because of ugly conversions.
      CurrentAllocationSize = FEXCore::AlignUp(CurrentAllocationSize + (CurrentAllocationSize >> 1), FEXCore::Utils::FEX_PAGE_SIZE);
    }

    // Current buffer management.
    void* CurrentBuffer {};
    size_t CurrentBufferRemaining {};

    struct BufferData final {
      void* Buffer;
      size_t BufferSize;
    };

    fextl::list<BufferData> Buffers {};

    size_t CurrentAllocationSize = FEXCore::Utils::FEX_PAGE_SIZE;
  };

  /**
   * @brief This is similar to the std::pmr::monotonic_buffer_resource.
   *
   * The difference is that class doesn't have ownership of the backing memory and
   * it also doesn't have any growth factor.
   *
   * If the amount of memory allocated is overrun then this will overwrite memory unless assertions are enabled.
   *
   * Ensure that you know how much memory you're going to use before using this class.
   */
  class fixed_size_monotonic_buffer_resource final : public std::pmr::memory_resource {
  public:
    fixed_size_monotonic_buffer_resource(void* Base, [[maybe_unused]] size_t Size)
      : Ptr {reinterpret_cast<uint64_t>(Base)}
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      , PtrEnd {reinterpret_cast<uint64_t>(Base) + Size}
      , Size {Size}
#endif
    {
    }
    void* do_allocate(std::size_t bytes, std::size_t alignment) override {
      uint64_t NewPtr = FEXCore::AlignUp((uint64_t)Ptr, alignment);
      Ptr = NewPtr + bytes;
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      if (Ptr >= PtrEnd) {
        LogMan::Msg::AFmt("Fail: Only allocated: {} ({} this time) bytes. Tried allocating at ptr offset: {}.\n", Size, bytes,
                          (uint64_t)(Ptr - (PtrEnd - Size)));
        FEX_TRAP_EXECUTION;
      }
#endif
      return reinterpret_cast<void*>(NewPtr);
    }

    void do_deallocate(void* p, std::size_t bytes, std::size_t alignment) override {
      // noop
    }

    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
      return this == &other;
    }
  private:
    uint64_t Ptr;
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    uint64_t PtrEnd;
    size_t Size;
#endif
  };
} // namespace pmr
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/queue.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>
#include <FEXCore/fextl/deque.h>

#include <queue>

namespace fextl {
template<class T, class Container = fextl::deque<T>>
using queue = std::queue<T, Container>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/robin_map.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <ankerl/unordered_dense.h>

namespace fextl {
template<class Key, class T, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<std::pair<Key, T>>>
using robin_map = ankerl::unordered_dense::map<Key, T, Hash, KeyEqual, Allocator>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/robin_set.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <ankerl/unordered_dense.h>

namespace fextl {
template<class Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<Key>>
using robin_set = ankerl::unordered_dense::set<Key, Hash, KeyEqual, Allocator>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/set.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <set>

namespace fextl {
template<class Key, class Compare = std::less<Key>, class Allocator = fextl::FEXAlloc<Key>>
using set = std::set<Key, Compare, Allocator>;

template<class Key, class Compare = std::less<Key>, class Allocator = fextl::FEXAlloc<Key>>
using multiset = std::multiset<Key, Compare, Allocator>;
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/sstream.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <sstream>

namespace fextl {
template<class CharT, class Traits = std::char_traits<CharT>, class Allocator = fextl::FEXAlloc<CharT>>
using basic_stringbuf = std::basic_stringbuf<CharT, Traits, Allocator>;

template<class CharT, class Traits = std::char_traits<CharT>, class Allocator = fextl::FEXAlloc<CharT>>
using basic_istringstream = std::basic_istringstream<CharT, Traits, Allocator>;

template<class CharT, class Traits = std::char_traits<CharT>, class Allocator = fextl::FEXAlloc<CharT>>
using basic_ostringstream = std::basic_ostringstream<CharT, Traits, Allocator>;

template<class CharT, class Traits = std::char_traits<CharT>, class Allocator = fextl::FEXAlloc<CharT>>
using basic_stringstream = std::basic_stringstream<CharT, Traits, Allocator>;

using stringbuf = fextl::basic_stringbuf<char>;
using istringstream = fextl::basic_istringstream<char>;
using ostringstream = fextl::basic_ostringstream<char>;
using stringstream = fextl::basic_stringstream<char>;
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/stack.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>
#include <FEXCore/fextl/deque.h>

#include <stack>

namespace fextl {
template<class T, class Container = fextl::deque<T>>
using stack = std::stack<T, Container>;
}


================================================
FILE: FEXCore/include/FEXCore/fextl/string.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <functional>
#include <string>

namespace fextl {
template<class CharT, class Traits = std::char_traits<CharT>, class Allocator = fextl::FEXAlloc<CharT>>
using basic_string = std::basic_string<CharT, Traits, Allocator>;

using string = fextl::basic_string<char>;
} // namespace fextl

template<>
struct std::hash<fextl::string> {
  std::size_t operator()(const fextl::string& s) const noexcept {
    return std::hash<std::string_view> {}(s);
  };
};


================================================
FILE: FEXCore/include/FEXCore/fextl/unordered_map.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <unordered_map>

namespace fextl {
template<class Key, class T, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<std::pair<const Key, T>>>
using unordered_map = std::unordered_map<Key, T, Hash, KeyEqual, Allocator>;

template<class Key, class T, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<std::pair<const Key, T>>>
using unordered_multimap = std::unordered_multimap<Key, T, Hash, KeyEqual, Allocator>;
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/unordered_set.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <unordered_set>

namespace fextl {
template<class Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<Key>>
using unordered_set = std::unordered_set<Key, Hash, KeyEqual, Allocator>;

template<class Key, class Hash = std::hash<Key>, class KeyEqual = std::equal_to<Key>, class Allocator = fextl::FEXAlloc<Key>>
using unordered_multiset = std::unordered_multiset<Key, Hash, KeyEqual, Allocator>;
} // namespace fextl


================================================
FILE: FEXCore/include/FEXCore/fextl/vector.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/allocator.h>

#include <vector>

namespace fextl {
template<class T, class Allocator = fextl::FEXAlloc<T>>
using vector = std::vector<T, Allocator>;
}


================================================
FILE: FEXCore/include/git_version.h.in
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <array>
#include <cstdint>

static constexpr std::array<uint8_t, 20> GIT_HASH = {@GIT_HASH_ARRAY@};
#define GIT_DESCRIBE_STRING "@GIT_DESCRIBE_STRING@"


================================================
FILE: FEXCore/unittests/APITests/Allocator.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_range.hpp>

#include "Utils/Allocator/HostAllocator.h"
#include <FEXCore/Utils/Allocator.h>
#include <sys/mman.h>

template<typename T>
bool HasSyscallError(T Result) {
  constexpr uint64_t MAX_ERRNO = 0xFFFF'FFFF'FFFF'0001ULL;
  return reinterpret_cast<uint64_t>(Result) >= MAX_ERRNO;
}

TEST_CASE("Allocator - Fixed replacement") {
  const auto RegionSize = 128 * 1024 * 1024;
  fextl::vector<FEXCore::Allocator::MemoryRegion> MemoryRegions {};
  for (size_t i = 0; i < 2; ++i) {
    auto Ptr = mmap(nullptr, RegionSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    MemoryRegions.emplace_back(FEXCore::Allocator::MemoryRegion {
      .Ptr = Ptr,
      .Size = RegionSize,
    });
  }

  auto Allocator = Alloc::OSAllocator::Create64BitAllocatorWithRegions(MemoryRegions);
  auto Base = Allocator->Mmap(nullptr, 4096, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(!HasSyscallError(Base));

  // Allocate perfectly overlapping pages. Allocate as many pages as the region.
  // FEX had a bug where the allocator could run out of memory with MAP_FIXED.
  for (size_t i = 0; i < (RegionSize / 4096); ++i) {
    auto NewBase = Allocator->Mmap(Base, 4096, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    REQUIRE(Base == NewBase);
  }

  Alloc::OSAllocator::ReleaseAllocatorWorkaround(std::move(Allocator));
}

TEST_CASE("Allocator - Non-Fit") {
  const auto RegionSize = 128 * 1024 * 1024;
  fextl::vector<FEXCore::Allocator::MemoryRegion> MemoryRegions {};
  for (size_t i = 0; i < 2; ++i) {
    auto Ptr = mmap(nullptr, RegionSize, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    MemoryRegions.emplace_back(FEXCore::Allocator::MemoryRegion {
      .Ptr = Ptr,
      .Size = RegionSize,
    });
  }

  auto Allocator = Alloc::OSAllocator::Create64BitAllocatorWithRegions(MemoryRegions);
  auto Base = Allocator->Mmap(nullptr, RegionSize / 4, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(!HasSyscallError(Base));

  // Try to allocate within the whole VMA size minus a small amount.
  // FEX had a bug where if the allocation fit within a VMA region, it would try and allocate past the end without checking.
  // Only occurred when `MAP_FIXED` was used.
  auto NewBase = Allocator->Mmap(Base, RegionSize - (4096 * 64), PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  // Must either fit in the VMA region, or fail.
  // - If it matches previous allocation, then it fit in the VMA region.
  //   - This can happen if FEX's allocator gains support for VMA merging.
  // - If it errors, then it doesn't fit in the VMA region.
  REQUIRE((NewBase == Base || HasSyscallError(NewBase)));

  Alloc::OSAllocator::ReleaseAllocatorWorkaround(std::move(Allocator));
}


================================================
FILE: FEXCore/unittests/APITests/CMakeLists.txt
================================================
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS *.cpp)

set(LIBS fmt::fmt vixl::vixl Catch2::Catch2WithMain FEXCore_Base JemallocLibs)
foreach(TEST ${TESTS})
  get_filename_component(TEST_NAME ${TEST} NAME_WLE)
  add_executable(FEXCore_Tests_${TEST_NAME} ${TEST})
  target_link_libraries(FEXCore_Tests_${TEST_NAME} PRIVATE ${LIBS})
  target_include_directories(FEXCore_Tests_${TEST_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../Source/")
  set_target_properties(FEXCore_Tests_${TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/FEXCore_Tests")
  catch_discover_tests(FEXCore_Tests_${TEST_NAME} TEST_SUFFIX ".${TEST_NAME}.FEXCore_Tests")
endforeach()

add_custom_target(fexcore_apitests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.FEXCore_Tests$$")


================================================
FILE: FEXCore/unittests/APITests/FileLoading.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/FileLoading.h>
#include <catch2/catch_test_macros.hpp>

TEST_CASE("LoadFile-Doesn'tExist") {
  fextl::string MapsFile;
  auto Read = FEXCore::FileLoading::LoadFile(MapsFile, "/tmp/a/b/c/d/e/z");
  REQUIRE(MapsFile.size() == 0);
  REQUIRE(Read == false);
}

TEST_CASE("LoadFile-procfs") {
  fextl::string MapsFile;
  FEXCore::FileLoading::LoadFile(MapsFile, "/proc/self/maps");
  REQUIRE(MapsFile.size() != 0);
}

TEST_CASE("LoadFile-Buffer") {
  fextl::string MapsFile;
  MapsFile.resize(16);
  auto Read = FEXCore::FileLoading::LoadFileToBuffer("/proc/self/maps", MapsFile);
  REQUIRE(MapsFile.size() == Read);
}


================================================
FILE: FEXCore/unittests/APITests/FlexBitSet.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_range.hpp>

#include "Utils/Allocator/FlexBitSet.h"
#include <sys/mman.h>

TEST_CASE("FlexBitSet - Sizing") {
  // Ensure that FlexBitSet sizing is correct.

  // Size of zero shouldn't take any space.
  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBytes(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBytes(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBytes(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBytes(0) == 0);

  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBits(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBits(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBits(0) == 0);
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBits(0) == 0);

  // Size of 1 should take one sizeof(ElementSize) size
  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBytes(1) == sizeof(uint8_t));
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBytes(1) == sizeof(uint16_t));
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBytes(1) == sizeof(uint32_t));
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBytes(1) == sizeof(uint64_t));

  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBits(1) == sizeof(uint8_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBits(1) == sizeof(uint16_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBits(1) == sizeof(uint32_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBits(1) == sizeof(uint64_t) * 8);

  // Size of `sizeof(ElementSize) * 8` should take one sizeof(ElementSize) size
  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBytes(sizeof(uint8_t) * 8) == sizeof(uint8_t));
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBytes(sizeof(uint16_t) * 8) == sizeof(uint16_t));
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBytes(sizeof(uint32_t) * 8) == sizeof(uint32_t));
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBytes(sizeof(uint64_t) * 8) == sizeof(uint64_t));

  CHECK(FEXCore::FlexBitSet<uint8_t>::SizeInBits(sizeof(uint8_t) * 8) == sizeof(uint8_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint16_t>::SizeInBits(sizeof(uint16_t) * 8) == sizeof(uint16_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint32_t>::SizeInBits(sizeof(uint32_t) * 8) == sizeof(uint32_t) * 8);
  CHECK(FEXCore::FlexBitSet<uint64_t>::SizeInBits(sizeof(uint64_t) * 8) == sizeof(uint64_t) * 8);
}

TEST_CASE("FlexBitSet - Limit") {
  // Ensure that the FlexBitSet doesn't read past the limits, and returns correct indexes.
  const auto Size = 4096 * 3;
  auto Ptr = mmap(nullptr, Size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  auto PtrMiddle = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Ptr) + 4096);
  REQUIRE(mprotect(PtrMiddle, 4096, PROT_READ | PROT_WRITE) != -1);

  using ElementType = uint8_t;
  const size_t NumElements = 4096 * 8;
  auto FlexBit = reinterpret_cast<FEXCore::FlexBitSet<ElementType>*>(PtrMiddle);

  for (size_t i = 0; i < NumElements; ++i) {
    auto Result = FlexBit->ForwardScanForRange<true>(i, 1, NumElements);
    CHECK(Result.FoundElement == i);
  }

  for (size_t i = 0; i < NumElements; ++i) {
    auto Result = FlexBit->BackwardScanForRange<true>(i, 1, 0);
    CHECK(Result.FoundElement == i);
  }
}


================================================
FILE: FEXCore/unittests/APITests/FutexSpinTest.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Utils/SpinWaitLock.h"
#include <catch2/catch_test_macros.hpp>
#include <chrono>
#include <thread>

constexpr auto SleepAmount = std::chrono::milliseconds(250);

TEST_CASE("FutexSpin-Timed-8bit") {
  uint8_t Test {};

  auto now = std::chrono::high_resolution_clock::now();
  FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount);
  auto end = std::chrono::high_resolution_clock::now();
  auto diff = end - now;

  // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late.
  REQUIRE(std::chrono::duration_cast<std::chrono::nanoseconds>(diff) >= std::chrono::duration_cast<std::chrono::nanoseconds>(SleepAmount));
}

TEST_CASE("FutexSpin-Sleep-8bit") {
  constexpr auto SleepAmount = std::chrono::seconds(1);

  uint8_t Test {};
  std::atomic<uint8_t> ActualSpinLoop {};
  std::chrono::nanoseconds SleptAmount;

  std::thread t([&Test, &SleptAmount, &ActualSpinLoop]() {
    auto now = std::chrono::high_resolution_clock::now();
    ActualSpinLoop.store(1);
    FEXCore::Utils::SpinWaitLock::Wait(&Test, 1);
    auto end = std::chrono::high_resolution_clock::now();
    SleptAmount = end - now;
  });

  // Wait until the second thread lets us know to stop waiting sleeping.
  while (ActualSpinLoop.load() == 0)
    ;

  // sleep this thread for the sleep amount.
  std::this_thread::sleep_for(SleepAmount);

  // Set the futex
  FEXCore::Utils::SpinWaitLock::lock(&Test);

  // Wait for the thread to get done.
  t.join();

  // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late.
  REQUIRE(SleptAmount >= std::chrono::duration_cast<std::chrono::nanoseconds>(SleepAmount));
}

TEST_CASE("FutexSpin-Timed-16bit") {
  uint16_t Test {};

  auto now = std::chrono::high_resolution_clock::now();
  FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount);
  auto end = std::chrono::high_resolution_clock::now();
  auto diff = end - now;

  // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late.
  REQUIRE(std::chrono::duration_cast<std::chrono::nanoseconds>(diff) >= std::chrono::duration_cast<std::chrono::nanoseconds>(SleepAmount));
}

TEST_CASE("FutexSpin-Timed-32bit") {
  uint32_t Test {};

  auto now = std::chrono::high_resolution_clock::now();
  FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount);
  auto end = std::chrono::high_resolution_clock::now();
  auto diff = end - now;

  // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late.
  REQUIRE(std::chrono::duration_cast<std::chrono::nanoseconds>(diff) >= std::chrono::duration_cast<std::chrono::nanoseconds>(SleepAmount));
}

TEST_CASE("FutexSpin-Timed-64bit") {
  uint64_t Test {};

  auto now = std::chrono::high_resolution_clock::now();
  FEXCore::Utils::SpinWaitLock::Wait(&Test, 1, SleepAmount);
  auto end = std::chrono::high_resolution_clock::now();
  auto diff = end - now;

  // The futex spinwait needs to have slept for at /least/ the amount specified. It will always run slightly late.
  REQUIRE(std::chrono::duration_cast<std::chrono::nanoseconds>(diff) >= std::chrono::duration_cast<std::chrono::nanoseconds>(SleepAmount));
}


================================================
FILE: FEXCore/unittests/APITests/ILog2.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/Utils/MathUtils.h>
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_range.hpp>

TEST_CASE("ILog2") {
  auto i = GENERATE(range(0, 64));
  REQUIRE(FEXCore::ilog2(1ull << i) == i);
}

TEST_CASE("DividePow2") {
  auto j = GENERATE(range(0, 64));
  auto i = GENERATE(range(0, 64));
  REQUIRE(FEXCore::DividePow2(1ull << j, 1ull << i) == ((1ull << j) / (1ull << i)));
}


================================================
FILE: FEXCore/unittests/APITests/vl_integer.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators_range.hpp>
#include <catch2/generators/catch_generators_random.hpp>

#include "Utils/variable_length_integer.h"

#include <limits>

TEST_CASE("vl-size") {
  // Check 8-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64::EncodedSize(-64) == 1);
  CHECK(FEXCore::Utils::vl64::EncodedSize(63) == 1);

  // Check 16-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64::EncodedSize(-8192) == 2);
  CHECK(FEXCore::Utils::vl64::EncodedSize(8191) == 2);

  // Check 32-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64::EncodedSize(std::numeric_limits<int32_t>::min()) == 5);
  CHECK(FEXCore::Utils::vl64::EncodedSize(std::numeric_limits<int32_t>::max()) == 5);

  // Check 64-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64::EncodedSize(std::numeric_limits<int64_t>::min()) == 9);
  CHECK(FEXCore::Utils::vl64::EncodedSize(std::numeric_limits<int64_t>::max()) == 9);
}

TEST_CASE("vl8 - in memory - encode/decode") {
  uint8_t data[1];
  REQUIRE(FEXCore::Utils::vl64::Encode(data, 0) == 1);
  CHECK(data[0] == 0);
  auto Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.Integer == 0);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, 63) == 1);
  CHECK(data[0] == 0b0011'1111);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.Integer == 63);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -1) == 1);
  CHECK(data[0] == 0b0111'1111);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.Integer == -1);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -64) == 1);
  CHECK(data[0] == 0b0100'0000);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.Integer == -64);
}

TEST_CASE("vl16 - in memory - encode/decode") {
  uint8_t data[2];

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -65) == 2);
  CHECK((uint64_t)data[0] == 0b1011'1111);
  CHECK((uint64_t)data[1] == 0b1011'1111);
  auto Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.Integer == -65);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -66) == 2);
  CHECK((uint64_t)data[0] == 0b1011'1111);
  CHECK((uint64_t)data[1] == 0b1011'1110);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.Integer == -66);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, 64) == 2);
  CHECK((uint64_t)data[0] == 0b1000'0000);
  CHECK((uint64_t)data[1] == 0b0100'0000);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.Integer == 64);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, 8191) == 2);
  CHECK((uint64_t)data[0] == 0b1001'1111);
  CHECK((uint64_t)data[1] == 0b1111'1111);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.Integer == 8191);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -8192) == 2);
  CHECK((uint64_t)data[0] == 0b1010'0000);
  CHECK((uint64_t)data[1] == 0b0000'0000);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.Integer == -8192);
}

TEST_CASE("vl32 - in memory - encode/decode") {
  uint8_t data[5];
  int32_t result {};

  REQUIRE(FEXCore::Utils::vl64::Encode(data, 8192) == 5);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == 8192);
  auto Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 5);
  CHECK(Dec.Integer == 8192);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, -8193) == 5);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == -8193);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 5);
  CHECK(Dec.Integer == -8193);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, std::numeric_limits<int32_t>::min()) == 5);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::min());
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 5);
  CHECK(Dec.Integer == std::numeric_limits<int32_t>::min());

  REQUIRE(FEXCore::Utils::vl64::Encode(data, std::numeric_limits<int32_t>::max()) == 5);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::max());
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 5);
  CHECK(Dec.Integer == std::numeric_limits<int32_t>::max());
}

TEST_CASE("vl64 - in memory - encode/decode") {
  uint8_t data[9];
  int64_t result {};

  REQUIRE(FEXCore::Utils::vl64::Encode(data, static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1) == 9);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1);
  auto Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.Integer == static_cast<int64_t>(std::numeric_limits<int32_t>::min()) - 1);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1) == 9);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1);
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.Integer == static_cast<int64_t>(std::numeric_limits<int32_t>::max()) + 1);

  REQUIRE(FEXCore::Utils::vl64::Encode(data, std::numeric_limits<int64_t>::min()) == 9);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::min());
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.Integer == std::numeric_limits<int64_t>::min());

  REQUIRE(FEXCore::Utils::vl64::Encode(data, std::numeric_limits<int64_t>::max()) == 9);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::max());
  Dec = FEXCore::Utils::vl64::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.Integer == std::numeric_limits<int64_t>::max());
}

TEST_CASE("vl64pair-size") {
  // Check 8-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(4, 1) == 1);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(64, 8) == 1);

  // Interlaced 8-bit minimum and maximum
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(4, 8) == 1);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(64, 1) == 1);

  // Check 16-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(-512, -32) == 2);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(504, 31) == 2);

  // Interlaced 16-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(-512, 31) == 2);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(504, -32) == 2);

  // Check 32-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min()) == 9);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()) == 9);

  // Interlaced 32-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()) == 9);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::min()) == 9);

  // Check 64-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::min()) == 17);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()) == 17);

  // Interlaced 64-bit minimum and maximum.
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max()) == 17);
  CHECK(FEXCore::Utils::vl64pair::EncodedSize(std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()) == 17);
}

TEST_CASE("vl8pair - in memory - encode/decode") {
  uint8_t data[1];
  // Minimum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (1 * 4), 1) == 1);
  CHECK(data[0] == 0);
  auto Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.IntegerARMPC == (1 * 4));
  CHECK(Dec.IntegerX86RIP == 1);

  // Maximum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (16 * 4), 8) == 1);
  CHECK(data[0] == 0b0111'1111);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.IntegerARMPC == (16 * 4));
  CHECK(Dec.IntegerX86RIP == 8);

  // Minimum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (1 * 4), 8) == 1);
  CHECK(data[0] == 0b0111'0000);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.IntegerARMPC == (1 * 4));
  CHECK(Dec.IntegerX86RIP == 8);

  // Maximum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (16 * 4), 1) == 1);
  CHECK(data[0] == 0b0000'1111);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 1);
  CHECK(Dec.IntegerARMPC == (16 * 4));
  CHECK(Dec.IntegerX86RIP == 1);
}

TEST_CASE("vl16pair - in memory - encode/decode") {
  uint8_t data[2];

  // vl8pair Minimum - 1, Minimum - 1
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, 0, 0) == 2);
  CHECK((uint64_t)data[0] == 0b1000'0000);
  CHECK((uint64_t)data[1] == 0b0000'0000);
  auto Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == 0);
  CHECK(Dec.IntegerX86RIP == 0);

  // vl8pair Maximum + 1, Maximum + 1
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (17 * 4), 9) == 2);
  CHECK((uint64_t)data[0] == 0b1000'1001);
  CHECK((uint64_t)data[1] == 0b0001'0001);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == (17 * 4));
  CHECK(Dec.IntegerX86RIP == 9);

  // Minimum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (-128 * 4), -32) == 2);
  CHECK((uint64_t)data[0] == 0b1010'0000);
  CHECK((uint64_t)data[1] == 0b1000'0000);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == (-128 * 4));
  CHECK(Dec.IntegerX86RIP == -32);

  // Maximum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (127 * 4), 31) == 2);
  CHECK((uint64_t)data[0] == 0b1001'1111);
  CHECK((uint64_t)data[1] == 0b0111'1111);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == (127 * 4));
  CHECK(Dec.IntegerX86RIP == 31);

  // Interleaved Minimum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (-128 * 4), 31) == 2);
  CHECK((uint64_t)data[0] == 0b1001'1111);
  CHECK((uint64_t)data[1] == 0b1000'0000);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == (-128 * 4));
  CHECK(Dec.IntegerX86RIP == 31);

  // Interleaved Maximum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, (127 * 4), -32) == 2);
  CHECK((uint64_t)data[0] == 0b1010'0000);
  CHECK((uint64_t)data[1] == 0b0111'1111);
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 2);
  CHECK(Dec.IntegerARMPC == (127 * 4));
  CHECK(Dec.IntegerX86RIP == -32);
}

TEST_CASE("vl32pair - in memory - encode/decode") {
  uint8_t data[9];
  int32_t result {};

  // Minimum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::min()) == 9);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::min());
  memcpy(&result, &data[1 + sizeof(int32_t)], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::min());
  auto Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int32_t>::min());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int32_t>::min());

  // Maximum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()) == 9);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::max());
  memcpy(&result, &data[1 + sizeof(int32_t)], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::max());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int32_t>::max());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int32_t>::max());

  // Interleaved Minimum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int32_t>::min(), std::numeric_limits<int32_t>::max()) == 9);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::min());
  memcpy(&result, &data[1 + sizeof(int32_t)], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::max());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int32_t>::min());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int32_t>::max());

  // Interleaved Maximum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::min()) == 9);
  CHECK(data[0] == 0b1100'0000);
  memcpy(&result, &data[1], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::max());
  memcpy(&result, &data[1 + sizeof(int32_t)], sizeof(int32_t));
  CHECK(result == std::numeric_limits<int32_t>::min());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 9);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int32_t>::max());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int32_t>::min());
}

TEST_CASE("vl64pair - in memory - encode/decode") {
  uint8_t data[17];
  int64_t result {};

  // Minimum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::min()) == 17);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::min());
  memcpy(&result, &data[1 + sizeof(int64_t)], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::min());
  auto Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 17);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int64_t>::min());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int64_t>::min());

  // Maximum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::max()) == 17);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::max());
  memcpy(&result, &data[1 + sizeof(int64_t)], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::max());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 17);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int64_t>::max());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int64_t>::max());

  // Interleaved Minimum, Maximum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int64_t>::min(), std::numeric_limits<int64_t>::max()) == 17);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::min());
  memcpy(&result, &data[1 + sizeof(int64_t)], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::max());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 17);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int64_t>::min());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int64_t>::max());

  // Interleaved Maximum, Minimum
  REQUIRE(FEXCore::Utils::vl64pair::Encode(data, std::numeric_limits<int64_t>::max(), std::numeric_limits<int64_t>::min()) == 17);
  CHECK(data[0] == 0b1110'0000);
  memcpy(&result, &data[1], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::max());
  memcpy(&result, &data[1 + sizeof(int64_t)], sizeof(int64_t));
  CHECK(result == std::numeric_limits<int64_t>::min());
  Dec = FEXCore::Utils::vl64pair::Decode(data);
  CHECK(Dec.Size == 17);
  CHECK(Dec.IntegerARMPC == std::numeric_limits<int64_t>::max());
  CHECK(Dec.IntegerX86RIP == std::numeric_limits<int64_t>::min());
}


================================================
FILE: FEXCore/unittests/CMakeLists.txt
================================================
if (NOT MINGW)
  add_subdirectory(Emitter/)
  add_subdirectory(APITests/)
endif()


================================================
FILE: FEXCore/unittests/Emitter/ALU_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: PC relative") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)adr(Reg::r30, &Label);

    CHECK(DisassembleEncoding(1) == 0x10fffffe);
  }

  {
    ForwardLabel Label;
    (void)adr(Reg::r30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x1000003e);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)adr(Reg::r30, &Label);

    CHECK(DisassembleEncoding(1) == 0x10fffffe);
  }

  {
    BiDirectionalLabel Label;
    (void)adr(Reg::r30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x1000003e);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)adrp(Reg::r30, &Label);

    CHECK(DisassembleEncoding(1) == 0x9000001e);
  }

  {
    ForwardLabel Label;
    (void)adrp(Reg::r30, &Label);
    // Move label a page away
    for (size_t i = 0; i < 1023; ++i) {
      nop();
    }
    (void)Bind(&Label);

    CHECK(DisassembleEncoding(0) == 0xb000001e);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)adrp(Reg::r30, &Label);

    CHECK(DisassembleEncoding(1) == 0x9000001e);
  }

  {
    BiDirectionalLabel Label;
    (void)adrp(Reg::r30, &Label);
    // Move label a page away
    for (size_t i = 0; i < 1023; ++i) {
      nop();
    }
    (void)Bind(&Label);

    CHECK(DisassembleEncoding(0) == 0xb000001e);
  }

  {
    // Will generate adr.
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);

    (void)LongAddressGen(Reg::r30, &Label);
    CHECK(DisassembleEncoding(1) == 0x10fffffe);
  }
  {
    // Will generate nop + nop + adr.
    ForwardLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(2) == 0x1000003e);
  }
  {
    // Will generate adr.
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)LongAddressGen(Reg::r30, &Label);

    CHECK(DisassembleEncoding(1) == 0x10fffffe);
  }

  {
    // Will generate nop + nop + adr.
    BiDirectionalLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(1) == 0xd503201f);
    CHECK(DisassembleEncoding(2) == 0x1000003e);
  }

  {
    // Will generate adrp.
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);

    // Move adrp 1MB away.
    for (size_t i = 0; i < (1 * 1024 * 1024 / 4); ++i) {
      nop();
    }

    (void)LongAddressGen(Reg::r30, &Label);
    nop();
    CHECK(DisassembleEncoding(262145) == 0x90fff81e);
    CHECK(DisassembleEncoding(262146) == 0xd503201f);
  }

  {
    // Will generate nop + nop + adrp.
    ForwardLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);

    // Move label 1MB away, plus a page, and then aligned to a page.
    for (size_t i = 0; i < ((1 * 1024 * 1024 + 4096) / 4 - 3); ++i) {
      nop();
    }

    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(1) == 0xd503201f);
    CHECK(DisassembleEncoding(2) == 0x9000081e);
  }

  {
    // Will generate nop + adrp + add.
    ForwardLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);

    // Move label 1MB away, plus a page, plus one instruction.
    for (size_t i = 0; i < ((1 * 1024 * 1024 + 4096) / 4 - 1); ++i) {
      nop();
    }

    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(1) == 0xb000081e);
    CHECK(DisassembleEncoding(2) == 0x910013de);
  }


  {
    // Will generate adrp.
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);

    // Move adrp 1MB away.
    for (size_t i = 0; i < (1 * 1024 * 1024 / 4); ++i) {
      nop();
    }

    (void)LongAddressGen(Reg::r30, &Label);
    nop();
    CHECK(DisassembleEncoding(262145) == 0x90fff81e);
    CHECK(DisassembleEncoding(262146) == 0xd503201f);
  }

  {
    // Will generate nop + nop + adrp.
    BiDirectionalLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);

    // Move label 1MB away, plus a page, and then aligned to a page.
    for (size_t i = 0; i < ((1 * 1024 * 1024 + 4096) / 4 - 3); ++i) {
      nop();
    }

    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(1) == 0xd503201f);
    CHECK(DisassembleEncoding(2) == 0x9000081e);
  }

  {
    // Will generate nop + adrp + add.
    BiDirectionalLabel Label;
    (void)LongAddressGen(Reg::r30, &Label);

    // Move label 1MB away, plus a page, plus one instruction.
    for (size_t i = 0; i < ((1 * 1024 * 1024 + 4096) / 4 - 1); ++i) {
      nop();
    }

    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd503201f);
    CHECK(DisassembleEncoding(1) == 0xb000081e);
    CHECK(DisassembleEncoding(2) == 0x910013de);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Add/subtract immediate") {
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 0, false), "add w29, w28, #0x0 (0)");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 4095, false), "add w29, w28, #0xfff (4095)");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 0, true), "add w29, w28, #0x0 (0)");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 4095, true), "add w29, w28, #0xfff000 (16773120)");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, 16773120), "add w29, w28, #0xfff000 (16773120)");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, 0, false), "add x29, x28, #0x0 (0)");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, 4095, false), "add x29, x28, #0xfff (4095)");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, 0, true), "add x29, x28, #0x0 (0)");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, 4095, true), "add x29, x28, #0xfff000 (16773120)");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, 16773120), "add x29, x28, #0xfff000 (16773120)");

  TEST_SINGLE(add(Size::i64Bit, Reg::rsp, Reg::rsp, 0, false), "mov sp, sp");
  TEST_SINGLE(add(Size::i64Bit, Reg::rsp, Reg::rsp, 4095, false), "add sp, sp, #0xfff (4095)");
  TEST_SINGLE(add(Size::i64Bit, Reg::rsp, Reg::rsp, 0, true), "mov sp, sp");
  TEST_SINGLE(add(Size::i64Bit, Reg::rsp, Reg::rsp, 4095, true), "add sp, sp, #0xfff000 (16773120)");
  TEST_SINGLE(add(Size::i64Bit, Reg::rsp, Reg::rsp, 16773120), "add sp, sp, #0xfff000 (16773120)");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, 0, false), "adds w29, w28, #0x0 (0)");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, 4095, false), "adds w29, w28, #0xfff (4095)");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, 0, true), "adds w29, w28, #0x0 (0)");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, 4095, true), "adds w29, w28, #0xfff000 (16773120)");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, 16773120), "adds w29, w28, #0xfff000 (16773120)");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, 0, false), "adds x29, x28, #0x0 (0)");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, 4095, false), "adds x29, x28, #0xfff (4095)");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, 0, true), "adds x29, x28, #0x0 (0)");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, 4095, true), "adds x29, x28, #0xfff000 (16773120)");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, 16773120), "adds x29, x28, #0xfff000 (16773120)");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, 0, false), "cmn w28, #0x0 (0)");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, 4095, false), "cmn w28, #0xfff (4095)");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, 0, true), "cmn w28, #0x0 (0)");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, 4095, true), "cmn w28, #0xfff000 (16773120)");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, 16773120), "cmn w28, #0xfff000 (16773120)");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, 0, false), "cmn x28, #0x0 (0)");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, 4095, false), "cmn x28, #0xfff (4095)");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, 0, true), "cmn x28, #0x0 (0)");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, 4095, true), "cmn x28, #0xfff000 (16773120)");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, 16773120), "cmn x28, #0xfff000 (16773120)");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, 0, false), "sub w29, w28, #0x0 (0)");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, 4095, false), "sub w29, w28, #0xfff (4095)");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, 0, true), "sub w29, w28, #0x0 (0)");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, 4095, true), "sub w29, w28, #0xfff000 (16773120)");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, 16773120), "sub w29, w28, #0xfff000 (16773120)");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, 0, false), "sub x29, x28, #0x0 (0)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, 4095, false), "sub x29, x28, #0xfff (4095)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, 0, true), "sub x29, x28, #0x0 (0)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, 4095, true), "sub x29, x28, #0xfff000 (16773120)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, 16773120), "sub x29, x28, #0xfff000 (16773120)");

  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, 0, false), "sub sp, sp, #0x0 (0)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, 4095, false), "sub sp, sp, #0xfff (4095)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, 0, true), "sub sp, sp, #0x0 (0)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, 4095, true), "sub sp, sp, #0xfff000 (16773120)");
  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, 16773120), "sub sp, sp, #0xfff000 (16773120)");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, 0, false), "subs w29, w28, #0x0 (0)");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, 4095, false), "subs w29, w28, #0xfff (4095)");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, 0, true), "subs w29, w28, #0x0 (0)");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, 4095, true), "subs w29, w28, #0xfff000 (16773120)");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, 16773120), "subs w29, w28, #0xfff000 (16773120)");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, 0, false), "subs x29, x28, #0x0 (0)");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, 4095, false), "subs x29, x28, #0xfff (4095)");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, 0, true), "subs x29, x28, #0x0 (0)");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, 4095, true), "subs x29, x28, #0xfff000 (16773120)");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, 16773120), "subs x29, x28, #0xfff000 (16773120)");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r28, 0, false), "cmp w28, #0x0 (0)");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r28, 4095, false), "cmp w28, #0xfff (4095)");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r28, 0, true), "cmp w28, #0x0 (0)");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r28, 4095, true), "cmp w28, #0xfff000 (16773120)");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r28, 16773120), "cmp w28, #0xfff000 (16773120)");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r28, 0, false), "cmp x28, #0x0 (0)");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r28, 4095, false), "cmp x28, #0xfff (4095)");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r28, 0, true), "cmp x28, #0x0 (0)");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r28, 4095, true), "cmp x28, #0xfff000 (16773120)");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r28, 16773120), "cmp x28, #0xfff000 (16773120)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Min/max immediate") {
  TEST_SINGLE(smax(Size::i32Bit, Reg::r29, Reg::r28, 1), "smax w29, w28, #1");
  TEST_SINGLE(smax(Size::i32Bit, Reg::r29, Reg::r28, 127), "smax w29, w28, #127");
  TEST_SINGLE(smax(Size::i32Bit, Reg::r29, Reg::r28, -128), "smax w29, w28, #-128");
  TEST_SINGLE(smax(Size::i64Bit, Reg::r29, Reg::r28, 1), "smax x29, x28, #1");
  TEST_SINGLE(smax(Size::i64Bit, Reg::r29, Reg::r28, 127), "smax x29, x28, #127");
  TEST_SINGLE(smax(Size::i64Bit, Reg::r29, Reg::r28, -128), "smax x29, x28, #-128");

  TEST_SINGLE(umax(Size::i32Bit, Reg::r29, Reg::r28, 0), "umax w29, w28, #0");
  TEST_SINGLE(umax(Size::i32Bit, Reg::r29, Reg::r28, 255), "umax w29, w28, #255");
  TEST_SINGLE(umax(Size::i64Bit, Reg::r29, Reg::r28, 0), "umax x29, x28, #0");
  TEST_SINGLE(umax(Size::i64Bit, Reg::r29, Reg::r28, 255), "umax x29, x28, #255");

  TEST_SINGLE(smin(Size::i32Bit, Reg::r29, Reg::r28, 1), "smin w29, w28, #1");
  TEST_SINGLE(smin(Size::i32Bit, Reg::r29, Reg::r28, 127), "smin w29, w28, #127");
  TEST_SINGLE(smin(Size::i32Bit, Reg::r29, Reg::r28, -128), "smin w29, w28, #-128");
  TEST_SINGLE(smin(Size::i64Bit, Reg::r29, Reg::r28, 1), "smin x29, x28, #1");
  TEST_SINGLE(smin(Size::i64Bit, Reg::r29, Reg::r28, 127), "smin x29, x28, #127");
  TEST_SINGLE(smin(Size::i64Bit, Reg::r29, Reg::r28, -128), "smin x29, x28, #-128");

  TEST_SINGLE(umin(Size::i32Bit, Reg::r29, Reg::r28, 0), "umin w29, w28, #0");
  TEST_SINGLE(umin(Size::i32Bit, Reg::r29, Reg::r28, 255), "umin w29, w28, #255");
  TEST_SINGLE(umin(Size::i64Bit, Reg::r29, Reg::r28, 0), "umin x29, x28, #0");
  TEST_SINGLE(umin(Size::i64Bit, Reg::r29, Reg::r28, 255), "umin x29, x28, #255");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Logical immediate") {
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, 1), "and w29, w28, #0x1");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, -2), "and w29, w28, #0xfffffffe");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, 1), "and x29, x28, #0x1");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, -2), "and x29, x28, #0xfffffffffffffffe");

  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, 1), "and w29, w28, #0xfffffffe");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, -2), "and w29, w28, #0x1");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, 1), "and x29, x28, #0xfffffffffffffffe");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, -2), "and x29, x28, #0x1");

  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, 1), "ands w29, w28, #0x1");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, -2), "ands w29, w28, #0xfffffffe");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, 1), "ands x29, x28, #0x1");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, -2), "ands x29, x28, #0xfffffffffffffffe");

  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, 1), "ands w29, w28, #0xfffffffe");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, -2), "ands w29, w28, #0x1");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, 1), "ands x29, x28, #0xfffffffffffffffe");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, -2), "ands x29, x28, #0x1");

  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, 1), "orr w29, w28, #0x1");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, -2), "orr w29, w28, #0xfffffffe");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, 1), "orr x29, x28, #0x1");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, -2), "orr x29, x28, #0xfffffffffffffffe");

  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, 1), "eor w29, w28, #0x1");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, -2), "eor w29, w28, #0xfffffffe");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, 1), "eor x29, x28, #0x1");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, -2), "eor x29, x28, #0xfffffffffffffffe");

  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, 1), "tst w28, #0x1");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, -2), "tst w28, #0xfffffffe");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, 1), "tst x28, #0x1");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, -2), "tst x28, #0xfffffffffffffffe");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Move wide immediate") {
  TEST_SINGLE(movn(Size::i32Bit, Reg::r29, 0x4243, 0), "mov w29, #0xffffbdbc");
  TEST_SINGLE(movn(Size::i32Bit, Reg::r29, 0x4243, 16), "mov w29, #0xbdbcffff");

  TEST_SINGLE(movn(Size::i64Bit, Reg::r29, 0x4243, 0), "mov x29, #0xffffffffffffbdbc");
  TEST_SINGLE(movn(Size::i64Bit, Reg::r29, 0x4243, 16), "mov x29, #0xffffffffbdbcffff");
  TEST_SINGLE(movn(Size::i64Bit, Reg::r29, 0x4243, 32), "mov x29, #0xffffbdbcffffffff");
  TEST_SINGLE(movn(Size::i64Bit, Reg::r29, 0x4243, 48), "mov x29, #0xbdbcffffffffffff");

  TEST_SINGLE(mov(Size::i32Bit, Reg::r29, 0x4243), "mov w29, #0x4243");
  TEST_SINGLE(mov(Size::i64Bit, Reg::r29, 0x4243), "mov x29, #0x4243");

  TEST_SINGLE(mov(WReg::w29, 0x4243), "mov w29, #0x4243");
  TEST_SINGLE(mov(XReg::x29, 0x4243), "mov x29, #0x4243");

  TEST_SINGLE(movz(Size::i32Bit, Reg::r29, 0x4243, 0), "mov w29, #0x4243");
  TEST_SINGLE(movz(Size::i32Bit, Reg::r29, 0x4243, 16), "mov w29, #0x42430000");

  TEST_SINGLE(movz(Size::i64Bit, Reg::r29, 0x4243, 0), "mov x29, #0x4243");
  TEST_SINGLE(movz(Size::i64Bit, Reg::r29, 0x4243, 16), "mov x29, #0x42430000");
  TEST_SINGLE(movz(Size::i64Bit, Reg::r29, 0x4243, 32), "mov x29, #0x424300000000");
  TEST_SINGLE(movz(Size::i64Bit, Reg::r29, 0x4243, 48), "mov x29, #0x4243000000000000");

  TEST_SINGLE(movk(Size::i32Bit, Reg::r29, 0x4243, 0), "movk w29, #0x4243");
  TEST_SINGLE(movk(Size::i32Bit, Reg::r29, 0x4243, 16), "movk w29, #0x4243, lsl #16");

  TEST_SINGLE(movk(Size::i64Bit, Reg::r29, 0x4243, 0), "movk x29, #0x4243");
  TEST_SINGLE(movk(Size::i64Bit, Reg::r29, 0x4243, 16), "movk x29, #0x4243, lsl #16");
  TEST_SINGLE(movk(Size::i64Bit, Reg::r29, 0x4243, 32), "movk x29, #0x4243, lsl #32");
  TEST_SINGLE(movk(Size::i64Bit, Reg::r29, 0x4243, 48), "movk x29, #0x4243, lsl #48");

  TEST_SINGLE(movn(WReg::w29, 0x4243, 0), "mov w29, #0xffffbdbc");
  TEST_SINGLE(movn(WReg::w29, 0x4243, 16), "mov w29, #0xbdbcffff");
  TEST_SINGLE(movz(WReg::w29, 0x4243, 0), "mov w29, #0x4243");
  TEST_SINGLE(movz(WReg::w29, 0x4243, 16), "mov w29, #0x42430000");
  TEST_SINGLE(movk(WReg::w29, 0x4243, 0), "movk w29, #0x4243");
  TEST_SINGLE(movk(WReg::w29, 0x4243, 16), "movk w29, #0x4243, lsl #16");

  TEST_SINGLE(movn(XReg::x29, 0x4243, 0), "mov x29, #0xffffffffffffbdbc");
  TEST_SINGLE(movn(XReg::x29, 0x4243, 16), "mov x29, #0xffffffffbdbcffff");
  TEST_SINGLE(movn(XReg::x29, 0x4243, 32), "mov x29, #0xffffbdbcffffffff");
  TEST_SINGLE(movn(XReg::x29, 0x4243, 48), "mov x29, #0xbdbcffffffffffff");
  TEST_SINGLE(movz(XReg::x29, 0x4243, 0), "mov x29, #0x4243");
  TEST_SINGLE(movz(XReg::x29, 0x4243, 16), "mov x29, #0x42430000");
  TEST_SINGLE(movz(XReg::x29, 0x4243, 32), "mov x29, #0x424300000000");
  TEST_SINGLE(movz(XReg::x29, 0x4243, 48), "mov x29, #0x4243000000000000");
  TEST_SINGLE(movk(XReg::x29, 0x4243, 0), "movk x29, #0x4243");
  TEST_SINGLE(movk(XReg::x29, 0x4243, 16), "movk x29, #0x4243, lsl #16");
  TEST_SINGLE(movk(XReg::x29, 0x4243, 32), "movk x29, #0x4243, lsl #32");
  TEST_SINGLE(movk(XReg::x29, 0x4243, 48), "movk x29, #0x4243, lsl #48");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Bitfield") {
  TEST_SINGLE(sxtb(Size::i32Bit, Reg::r29, Reg::r28), "sxtb w29, w28");
  TEST_SINGLE(sxtb(Size::i64Bit, Reg::r29, Reg::r28), "sxtb x29, w28");

  TEST_SINGLE(sxth(Size::i32Bit, Reg::r29, Reg::r28), "sxth w29, w28");
  TEST_SINGLE(sxth(Size::i64Bit, Reg::r29, Reg::r28), "sxth x29, w28");

  TEST_SINGLE(sxtw(XReg::x29, WReg::w28), "sxtw x29, w28");

  TEST_SINGLE(sbfx(Size::i32Bit, Reg::r29, Reg::r28, 4, 16), "sbfx w29, w28, #4, #16");
  TEST_SINGLE(sbfx(Size::i64Bit, Reg::r29, Reg::r28, 4, 16), "sbfx x29, x28, #4, #16");

  TEST_SINGLE(asr(Size::i32Bit, Reg::r29, Reg::r28, 17), "asr w29, w28, #17");
  TEST_SINGLE(asr(Size::i64Bit, Reg::r29, Reg::r28, 17), "asr x29, x28, #17");

  TEST_SINGLE(bfc(Size::i32Bit, Reg::r29, 4, 3), "bfc w29, #4, #3");
  TEST_SINGLE(bfc(Size::i32Bit, Reg::r29, 27, 3), "bfc w29, #27, #3");

  TEST_SINGLE(bfc(Size::i64Bit, Reg::r29, 4, 3), "bfc x29, #4, #3");
  TEST_SINGLE(bfc(Size::i64Bit, Reg::r29, 57, 3), "bfc x29, #57, #3");

  TEST_SINGLE(bfxil(Size::i32Bit, Reg::r29, Reg::r28, 4, 3), "bfxil w29, w28, #4, #3");
  TEST_SINGLE(bfxil(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "bfxil w29, w28, #27, #3");

  TEST_SINGLE(bfxil(Size::i64Bit, Reg::r29, Reg::r28, 4, 3), "bfxil x29, x28, #4, #3");
  TEST_SINGLE(bfxil(Size::i64Bit, Reg::r29, Reg::r28, 57, 3), "bfxil x29, x28, #57, #3");

  TEST_SINGLE(sbfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz w29, w28, #5, #3");
  TEST_SINGLE(sbfiz(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "sbfiz w29, w28, #27, #3");

  TEST_SINGLE(sbfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "sbfiz x29, x28, #5, #3");
  TEST_SINGLE(sbfiz(Size::i64Bit, Reg::r29, Reg::r28, 54, 3), "sbfiz x29, x28, #54, #3");

  TEST_SINGLE(ubfiz(Size::i32Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz w29, w28, #5, #3");
  TEST_SINGLE(ubfiz(Size::i32Bit, Reg::r29, Reg::r28, 27, 3), "ubfiz w29, w28, #27, #3");

  TEST_SINGLE(ubfiz(Size::i64Bit, Reg::r29, Reg::r28, 5, 3), "ubfiz x29, x28, #5, #3");
  TEST_SINGLE(ubfiz(Size::i64Bit, Reg::r29, Reg::r28, 54, 3), "ubfiz x29, x28, #54, #3");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Extract") {
  TEST_SINGLE(extr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, 0), "extr w29, w28, w27, #0");
  TEST_SINGLE(extr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, 16), "extr w29, w28, w27, #16");

  TEST_SINGLE(extr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, 0), "extr x29, x28, x27, #0");
  TEST_SINGLE(extr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, 16), "extr x29, x28, x27, #16");
  TEST_SINGLE(extr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, 32), "extr x29, x28, x27, #32");
  TEST_SINGLE(extr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, 48), "extr x29, x28, x27, #48");

  TEST_SINGLE(ror(Size::i32Bit, Reg::r29, Reg::r28, 0), "ror w29, w28, #0");
  TEST_SINGLE(ror(Size::i32Bit, Reg::r29, Reg::r28, 16), "ror w29, w28, #16");

  TEST_SINGLE(ror(Size::i64Bit, Reg::r29, Reg::r28, 0), "ror x29, x28, #0");
  TEST_SINGLE(ror(Size::i64Bit, Reg::r29, Reg::r28, 16), "ror x29, x28, #16");
  TEST_SINGLE(ror(Size::i64Bit, Reg::r29, Reg::r28, 32), "ror x29, x28, #32");
  TEST_SINGLE(ror(Size::i64Bit, Reg::r29, Reg::r28, 48), "ror x29, x28, #48");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Data processing - 2 source") {
  TEST_SINGLE(udiv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "udiv w29, w28, w27");
  TEST_SINGLE(sdiv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "sdiv w29, w28, w27");
  TEST_SINGLE(lslv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "lsl w29, w28, w27");
  TEST_SINGLE(lsrv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "lsr w29, w28, w27");
  TEST_SINGLE(asrv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "asr w29, w28, w27");
  TEST_SINGLE(rorv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "ror w29, w28, w27");
  TEST_SINGLE(crc32b(WReg::w29, WReg::w28, WReg::w27), "crc32b w29, w28, w27");
  TEST_SINGLE(crc32h(WReg::w29, WReg::w28, WReg::w27), "crc32h w29, w28, w27");
  TEST_SINGLE(crc32w(WReg::w29, WReg::w28, WReg::w27), "crc32w w29, w28, w27");
  TEST_SINGLE(crc32cb(WReg::w29, WReg::w28, WReg::w27), "crc32cb w29, w28, w27");
  TEST_SINGLE(crc32ch(WReg::w29, WReg::w28, WReg::w27), "crc32ch w29, w28, w27");
  TEST_SINGLE(crc32cw(WReg::w29, WReg::w28, WReg::w27), "crc32cw w29, w28, w27");
  TEST_SINGLE(smax(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "smax w29, w28, w27");
  TEST_SINGLE(umax(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "umax w29, w28, w27");
  TEST_SINGLE(smin(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "smin w29, w28, w27");
  TEST_SINGLE(umin(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "umin w29, w28, w27");

  TEST_SINGLE(udiv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "udiv x29, x28, x27");
  TEST_SINGLE(sdiv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "sdiv x29, x28, x27");
  TEST_SINGLE(lslv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "lsl x29, x28, x27");
  TEST_SINGLE(lsrv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "lsr x29, x28, x27");
  TEST_SINGLE(asrv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "asr x29, x28, x27");
  TEST_SINGLE(rorv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "ror x29, x28, x27");
  TEST_SINGLE(smax(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "smax x29, x28, x27");
  TEST_SINGLE(umax(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "umax x29, x28, x27");
  TEST_SINGLE(smin(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "smin x29, x28, x27");
  TEST_SINGLE(umin(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "umin x29, x28, x27");

  if (false) {
    // vixl doesn't support this instruction.
    TEST_SINGLE(subp(XReg::x29, XReg::x28, XReg::x27), "subp x29, x28, x27");
    TEST_SINGLE(irg(XReg::x29, XReg::x28, XReg::x27), "irg x29, x28, x27");
    TEST_SINGLE(gmi(XReg::x29, XReg::x28, XReg::x27), "gmi x29, x28, x27");
  }

  TEST_SINGLE(pacga(XReg::x29, XReg::x28, XReg::x27), "pacga x29, x28, x27");
  TEST_SINGLE(crc32x(XReg::x29, XReg::x28, XReg::x27), "crc32x w29, w28, x27");
  TEST_SINGLE(crc32cx(XReg::x29, XReg::x28, XReg::x27), "crc32cx w29, w28, x27");

  if (false) {
    // vixl doesn't support this instruction.
    TEST_SINGLE(subps(XReg::x29, XReg::x28, XReg::x27), "subps x29, x28, x27");
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Data processing - 1 source") {
  TEST_SINGLE(rbit(Size::i32Bit, Reg::r29, Reg::r28), "rbit w29, w28");
  TEST_SINGLE(rbit(Size::i64Bit, Reg::r29, Reg::r28), "rbit x29, x28");

  TEST_SINGLE(rev16(Size::i32Bit, Reg::r29, Reg::r28), "rev16 w29, w28");
  TEST_SINGLE(rev16(Size::i64Bit, Reg::r29, Reg::r28), "rev16 x29, x28");

  TEST_SINGLE(rev(WReg::w29, WReg::w28), "rev w29, w28");
  TEST_SINGLE(rev32(XReg::x29, XReg::x28), "rev32 x29, x28");

  TEST_SINGLE(clz(Size::i32Bit, Reg::r29, Reg::r28), "clz w29, w28");
  TEST_SINGLE(clz(Size::i64Bit, Reg::r29, Reg::r28), "clz x29, x28");

  TEST_SINGLE(cls(Size::i32Bit, Reg::r29, Reg::r28), "cls w29, w28");
  TEST_SINGLE(cls(Size::i64Bit, Reg::r29, Reg::r28), "cls x29, x28");

  TEST_SINGLE(rev(XReg::x29, XReg::x28), "rev x29, x28");
  TEST_SINGLE(rev(Size::i32Bit, Reg::r29, Reg::r28), "rev w29, w28");
  TEST_SINGLE(rev(Size::i64Bit, Reg::r29, Reg::r28), "rev x29, x28");

  TEST_SINGLE(ctz(Size::i32Bit, Reg::r29, Reg::r28), "ctz w29, w28");
  TEST_SINGLE(ctz(Size::i64Bit, Reg::r29, Reg::r28), "ctz x29, x28");

  TEST_SINGLE(cnt(Size::i32Bit, Reg::r29, Reg::r28), "cnt w29, w28");
  TEST_SINGLE(cnt(Size::i64Bit, Reg::r29, Reg::r28), "cnt x29, x28");

  TEST_SINGLE(abs(Size::i32Bit, Reg::r29, Reg::r28), "abs w29, w28");
  TEST_SINGLE(abs(Size::i64Bit, Reg::r29, Reg::r28), "abs x29, x28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: PAUTH") {
  // TODO: Implement in the emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Logical - shifted register") {
  TEST_SINGLE(mov(Size::i32Bit, Reg::r29, Reg::r28), "mov w29, w28");
  TEST_SINGLE(mov(Size::i64Bit, Reg::r29, Reg::r28), "mov x29, x28");

  TEST_SINGLE(mov(WReg::w29, WReg::w28), "mov w29, w28");
  TEST_SINGLE(mov(XReg::x29, XReg::x28), "mov x29, x28");

  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSL, 0), "mvn w29, w28");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSL, 1), "mvn w29, w28, lsl #1");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSL, 31), "mvn w29, w28, lsl #31");

  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSR, 0), "mvn w29, w28");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSR, 1), "mvn w29, w28, lsr #1");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSR, 31), "mvn w29, w28, lsr #31");

  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ASR, 0), "mvn w29, w28");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ASR, 1), "mvn w29, w28, asr #1");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ASR, 31), "mvn w29, w28, asr #31");

  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ROR, 0), "mvn w29, w28");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ROR, 1), "mvn w29, w28, ror #1");
  TEST_SINGLE(mvn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ROR, 31), "mvn w29, w28, ror #31");

  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSL, 0), "mvn x29, x28");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSL, 1), "mvn x29, x28, lsl #1");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSL, 63), "mvn x29, x28, lsl #63");

  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSR, 0), "mvn x29, x28");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSR, 1), "mvn x29, x28, lsr #1");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSR, 63), "mvn x29, x28, lsr #63");

  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ASR, 0), "mvn x29, x28");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ASR, 1), "mvn x29, x28, asr #1");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ASR, 63), "mvn x29, x28, asr #63");

  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ROR, 0), "mvn x29, x28");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ROR, 1), "mvn x29, x28, ror #1");
  TEST_SINGLE(mvn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ROR, 63), "mvn x29, x28, ror #63");

  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "and w29, w28, w27");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "and w29, w28, w27, lsl #1");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "and w29, w28, w27, lsl #31");

  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "and w29, w28, w27");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "and w29, w28, w27, lsr #1");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "and w29, w28, w27, lsr #31");

  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "and w29, w28, w27");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "and w29, w28, w27, asr #1");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "and w29, w28, w27, asr #31");

  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "and w29, w28, w27");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "and w29, w28, w27, ror #1");
  TEST_SINGLE(and_(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "and w29, w28, w27, ror #31");

  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "and x29, x28, x27");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "and x29, x28, x27, lsl #1");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "and x29, x28, x27, lsl #63");

  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "and x29, x28, x27");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "and x29, x28, x27, lsr #1");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "and x29, x28, x27, lsr #63");

  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "and x29, x28, x27");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "and x29, x28, x27, asr #1");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "and x29, x28, x27, asr #63");

  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "and x29, x28, x27");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "and x29, x28, x27, ror #1");
  TEST_SINGLE(and_(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "and x29, x28, x27, ror #63");

  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "ands w29, w28, w27");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "ands w29, w28, w27, lsl #1");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "ands w29, w28, w27, lsl #31");

  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "ands w29, w28, w27");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "ands w29, w28, w27, lsr #1");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "ands w29, w28, w27, lsr #31");

  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "ands w29, w28, w27");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "ands w29, w28, w27, asr #1");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "ands w29, w28, w27, asr #31");

  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "ands w29, w28, w27");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "ands w29, w28, w27, ror #1");
  TEST_SINGLE(ands(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "ands w29, w28, w27, ror #31");

  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "ands x29, x28, x27");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "ands x29, x28, x27, lsl #1");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "ands x29, x28, x27, lsl #63");

  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "ands x29, x28, x27");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "ands x29, x28, x27, lsr #1");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "ands x29, x28, x27, lsr #63");

  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "ands x29, x28, x27");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "ands x29, x28, x27, asr #1");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "ands x29, x28, x27, asr #63");

  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "ands x29, x28, x27");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "ands x29, x28, x27, ror #1");
  TEST_SINGLE(ands(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "ands x29, x28, x27, ror #63");

  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "bic w29, w28, w27");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "bic w29, w28, w27, lsl #1");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "bic w29, w28, w27, lsl #31");

  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "bic w29, w28, w27");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "bic w29, w28, w27, lsr #1");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "bic w29, w28, w27, lsr #31");

  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "bic w29, w28, w27");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "bic w29, w28, w27, asr #1");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "bic w29, w28, w27, asr #31");

  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "bic w29, w28, w27");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "bic w29, w28, w27, ror #1");
  TEST_SINGLE(bic(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "bic w29, w28, w27, ror #31");

  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "bic x29, x28, x27");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "bic x29, x28, x27, lsl #1");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "bic x29, x28, x27, lsl #63");

  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "bic x29, x28, x27");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "bic x29, x28, x27, lsr #1");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "bic x29, x28, x27, lsr #63");

  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "bic x29, x28, x27");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "bic x29, x28, x27, asr #1");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "bic x29, x28, x27, asr #63");

  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "bic x29, x28, x27");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "bic x29, x28, x27, ror #1");
  TEST_SINGLE(bic(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "bic x29, x28, x27, ror #63");

  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "bics w29, w28, w27");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "bics w29, w28, w27, lsl #1");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "bics w29, w28, w27, lsl #31");

  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "bics w29, w28, w27");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "bics w29, w28, w27, lsr #1");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "bics w29, w28, w27, lsr #31");

  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "bics w29, w28, w27");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "bics w29, w28, w27, asr #1");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "bics w29, w28, w27, asr #31");

  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "bics w29, w28, w27");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "bics w29, w28, w27, ror #1");
  TEST_SINGLE(bics(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "bics w29, w28, w27, ror #31");

  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "bics x29, x28, x27");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "bics x29, x28, x27, lsl #1");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "bics x29, x28, x27, lsl #63");

  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "bics x29, x28, x27");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "bics x29, x28, x27, lsr #1");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "bics x29, x28, x27, lsr #63");

  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "bics x29, x28, x27");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "bics x29, x28, x27, asr #1");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "bics x29, x28, x27, asr #63");

  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "bics x29, x28, x27");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "bics x29, x28, x27, ror #1");
  TEST_SINGLE(bics(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "bics x29, x28, x27, ror #63");

  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "orr w29, w28, w27");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "orr w29, w28, w27, lsl #1");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "orr w29, w28, w27, lsl #31");

  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "orr w29, w28, w27");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "orr w29, w28, w27, lsr #1");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "orr w29, w28, w27, lsr #31");

  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "orr w29, w28, w27");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "orr w29, w28, w27, asr #1");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "orr w29, w28, w27, asr #31");

  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "orr w29, w28, w27");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "orr w29, w28, w27, ror #1");
  TEST_SINGLE(orr(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "orr w29, w28, w27, ror #31");

  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "orr x29, x28, x27");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "orr x29, x28, x27, lsl #1");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "orr x29, x28, x27, lsl #63");

  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "orr x29, x28, x27");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "orr x29, x28, x27, lsr #1");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "orr x29, x28, x27, lsr #63");

  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "orr x29, x28, x27");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "orr x29, x28, x27, asr #1");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "orr x29, x28, x27, asr #63");

  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "orr x29, x28, x27");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "orr x29, x28, x27, ror #1");
  TEST_SINGLE(orr(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "orr x29, x28, x27, ror #63");

  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "orn w29, w28, w27");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "orn w29, w28, w27, lsl #1");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "orn w29, w28, w27, lsl #31");

  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "orn w29, w28, w27");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "orn w29, w28, w27, lsr #1");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "orn w29, w28, w27, lsr #31");

  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "orn w29, w28, w27");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "orn w29, w28, w27, asr #1");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "orn w29, w28, w27, asr #31");

  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "orn w29, w28, w27");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "orn w29, w28, w27, ror #1");
  TEST_SINGLE(orn(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "orn w29, w28, w27, ror #31");

  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "orn x29, x28, x27");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "orn x29, x28, x27, lsl #1");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "orn x29, x28, x27, lsl #63");

  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "orn x29, x28, x27");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "orn x29, x28, x27, lsr #1");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "orn x29, x28, x27, lsr #63");

  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "orn x29, x28, x27");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "orn x29, x28, x27, asr #1");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "orn x29, x28, x27, asr #63");

  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "orn x29, x28, x27");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "orn x29, x28, x27, ror #1");
  TEST_SINGLE(orn(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "orn x29, x28, x27, ror #63");

  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "eor w29, w28, w27");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "eor w29, w28, w27, lsl #1");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "eor w29, w28, w27, lsl #31");

  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "eor w29, w28, w27");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "eor w29, w28, w27, lsr #1");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "eor w29, w28, w27, lsr #31");

  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "eor w29, w28, w27");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "eor w29, w28, w27, asr #1");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "eor w29, w28, w27, asr #31");

  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "eor w29, w28, w27");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "eor w29, w28, w27, ror #1");
  TEST_SINGLE(eor(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "eor w29, w28, w27, ror #31");

  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "eor x29, x28, x27");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "eor x29, x28, x27, lsl #1");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "eor x29, x28, x27, lsl #63");

  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "eor x29, x28, x27");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "eor x29, x28, x27, lsr #1");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "eor x29, x28, x27, lsr #63");

  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "eor x29, x28, x27");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "eor x29, x28, x27, asr #1");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "eor x29, x28, x27, asr #63");

  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "eor x29, x28, x27");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "eor x29, x28, x27, ror #1");
  TEST_SINGLE(eor(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "eor x29, x28, x27, ror #63");

  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "eon w29, w28, w27");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "eon w29, w28, w27, lsl #1");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 31), "eon w29, w28, w27, lsl #31");

  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "eon w29, w28, w27");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "eon w29, w28, w27, lsr #1");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 31), "eon w29, w28, w27, lsr #31");

  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "eon w29, w28, w27");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "eon w29, w28, w27, asr #1");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 31), "eon w29, w28, w27, asr #31");

  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "eon w29, w28, w27");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "eon w29, w28, w27, ror #1");
  TEST_SINGLE(eon(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 31), "eon w29, w28, w27, ror #31");

  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 0), "eon x29, x28, x27");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 1), "eon x29, x28, x27, lsl #1");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSL, 63), "eon x29, x28, x27, lsl #63");

  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 0), "eon x29, x28, x27");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 1), "eon x29, x28, x27, lsr #1");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::LSR, 63), "eon x29, x28, x27, lsr #63");

  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 0), "eon x29, x28, x27");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 1), "eon x29, x28, x27, asr #1");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ASR, 63), "eon x29, x28, x27, asr #63");

  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 0), "eon x29, x28, x27");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 1), "eon x29, x28, x27, ror #1");
  TEST_SINGLE(eon(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ShiftType::ROR, 63), "eon x29, x28, x27, ror #63");

  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSL, 0), "tst w28, w27");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSL, 1), "tst w28, w27, lsl #1");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSL, 31), "tst w28, w27, lsl #31");

  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSR, 0), "tst w28, w27");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSR, 1), "tst w28, w27, lsr #1");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::LSR, 31), "tst w28, w27, lsr #31");

  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ASR, 0), "tst w28, w27");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ASR, 1), "tst w28, w27, asr #1");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ASR, 31), "tst w28, w27, asr #31");

  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ROR, 0), "tst w28, w27");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ROR, 1), "tst w28, w27, ror #1");
  TEST_SINGLE(tst(Size::i32Bit, Reg::r28, Reg::r27, ShiftType::ROR, 31), "tst w28, w27, ror #31");

  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSL, 0), "tst x28, x27");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSL, 1), "tst x28, x27, lsl #1");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSL, 63), "tst x28, x27, lsl #63");

  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSR, 0), "tst x28, x27");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSR, 1), "tst x28, x27, lsr #1");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::LSR, 63), "tst x28, x27, lsr #63");

  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ASR, 0), "tst x28, x27");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ASR, 1), "tst x28, x27, asr #1");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ASR, 63), "tst x28, x27, asr #63");

  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ROR, 0), "tst x28, x27");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ROR, 1), "tst x28, x27, ror #1");
  TEST_SINGLE(tst(Size::i64Bit, Reg::r28, Reg::r27, ShiftType::ROR, 63), "tst x28, x27, ror #63");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: AddSub - shifted register") {
  {
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28), "add x30, x29, x28");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28), "add w30, w29, w28");

    // LSL
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "add x30, x29, x28, lsl #1");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "add w30, w29, w28, lsl #1");
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 63), "add x30, x29, x28, lsl #63");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 31), "add w30, w29, w28, lsl #31");

    // LSR
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "add x30, x29, x28, lsr #1");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "add w30, w29, w28, lsr #1");
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 63), "add x30, x29, x28, lsr #63");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 31), "add w30, w29, w28, lsr #31");

    // ASR
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "add x30, x29, x28, asr #1");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "add w30, w29, w28, asr #1");
    TEST_SINGLE(add(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 63), "add x30, x29, x28, asr #63");
    TEST_SINGLE(add(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 31), "add w30, w29, w28, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28), "adds x30, x29, x28");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28), "adds w30, w29, w28");

    // LSL
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "adds x30, x29, x28, lsl #1");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "adds w30, w29, w28, lsl #1");
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 63), "adds x30, x29, x28, lsl #63");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 31), "adds w30, w29, w28, lsl #31");

    // LSR
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "adds x30, x29, x28, lsr #1");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "adds w30, w29, w28, lsr #1");
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 63), "adds x30, x29, x28, lsr #63");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 31), "adds w30, w29, w28, lsr #31");

    // ASR
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "adds x30, x29, x28, asr #1");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "adds w30, w29, w28, asr #1");
    TEST_SINGLE(adds(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 63), "adds x30, x29, x28, asr #63");
    TEST_SINGLE(adds(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 31), "adds w30, w29, w28, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28), "cmn x29, x28");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28), "cmn w29, w28");

    // LSL
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSL, 1), "cmn x29, x28, lsl #1");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSL, 1), "cmn w29, w28, lsl #1");
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSL, 63), "cmn x29, x28, lsl #63");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSL, 31), "cmn w29, w28, lsl #31");

    // LSR
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSR, 1), "cmn x29, x28, lsr #1");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSR, 1), "cmn w29, w28, lsr #1");
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::LSR, 63), "cmn x29, x28, lsr #63");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::LSR, 31), "cmn w29, w28, lsr #31");

    // ASR
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ASR, 1), "cmn x29, x28, asr #1");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ASR, 1), "cmn w29, w28, asr #1");
    TEST_SINGLE(cmn(Size::i64Bit, Reg::r29, Reg::r28, ShiftType::ASR, 63), "cmn x29, x28, asr #63");
    TEST_SINGLE(cmn(Size::i32Bit, Reg::r29, Reg::r28, ShiftType::ASR, 31), "cmn w29, w28, asr #31");

    // ROR
    // Unsupported
  }

  // FEX had a bug with this
  TEST_SINGLE(sub(Size::i64Bit, Reg::rsp, Reg::rsp, Reg::r0, ShiftType::LSL, 0), "neg xzr, x0");

  {
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28), "sub x30, x29, x28");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28), "sub w30, w29, w28");

    // LSL
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "sub x30, x29, x28, lsl #1");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "sub w30, w29, w28, lsl #1");
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 63), "sub x30, x29, x28, lsl #63");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 31), "sub w30, w29, w28, lsl #31");

    // LSR
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "sub x30, x29, x28, lsr #1");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "sub w30, w29, w28, lsr #1");
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 63), "sub x30, x29, x28, lsr #63");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 31), "sub w30, w29, w28, lsr #31");

    // ASR
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "sub x30, x29, x28, asr #1");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "sub w30, w29, w28, asr #1");
    TEST_SINGLE(sub(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 63), "sub x30, x29, x28, asr #63");
    TEST_SINGLE(sub(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 31), "sub w30, w29, w28, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28), "subs x30, x29, x28");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28), "subs w30, w29, w28");

    // LSL
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "subs x30, x29, x28, lsl #1");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 1), "subs w30, w29, w28, lsl #1");
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 63), "subs x30, x29, x28, lsl #63");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSL, 31), "subs w30, w29, w28, lsl #31");

    // LSR
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "subs x30, x29, x28, lsr #1");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 1), "subs w30, w29, w28, lsr #1");
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 63), "subs x30, x29, x28, lsr #63");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::LSR, 31), "subs w30, w29, w28, lsr #31");

    // ASR
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "subs x30, x29, x28, asr #1");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 1), "subs w30, w29, w28, asr #1");
    TEST_SINGLE(subs(Size::i64Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 63), "subs x30, x29, x28, asr #63");
    TEST_SINGLE(subs(Size::i32Bit, Reg::r30, Reg::r29, Reg::r28, ShiftType::ASR, 31), "subs w30, w29, w28, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29), "neg x30, x29");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29), "neg w30, w29");

    // LSL
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "neg x30, x29, lsl #1");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "neg w30, w29, lsl #1");
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 63), "neg x30, x29, lsl #63");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 31), "neg w30, w29, lsl #31");

    // LSR
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "neg x30, x29, lsr #1");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "neg w30, w29, lsr #1");
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 63), "neg x30, x29, lsr #63");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 31), "neg w30, w29, lsr #31");

    // ASR
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "neg x30, x29, asr #1");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "neg w30, w29, asr #1");
    TEST_SINGLE(neg(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 63), "neg x30, x29, asr #63");
    TEST_SINGLE(neg(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 31), "neg w30, w29, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29), "cmp x30, x29");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29), "cmp w30, w29");

    // LSL
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "cmp x30, x29, lsl #1");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "cmp w30, w29, lsl #1");
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 63), "cmp x30, x29, lsl #63");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 31), "cmp w30, w29, lsl #31");

    // LSR
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "cmp x30, x29, lsr #1");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "cmp w30, w29, lsr #1");
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 63), "cmp x30, x29, lsr #63");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 31), "cmp w30, w29, lsr #31");

    // ASR
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "cmp x30, x29, asr #1");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "cmp w30, w29, asr #1");
    TEST_SINGLE(cmp(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 63), "cmp x30, x29, asr #63");
    TEST_SINGLE(cmp(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 31), "cmp w30, w29, asr #31");

    // ROR
    // Unsupported
  }

  {
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29), "negs x30, x29");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29), "negs w30, w29");

    // LSL
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "negs x30, x29, lsl #1");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 1), "negs w30, w29, lsl #1");
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSL, 63), "negs x30, x29, lsl #63");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSL, 31), "negs w30, w29, lsl #31");

    // LSR
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "negs x30, x29, lsr #1");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 1), "negs w30, w29, lsr #1");
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::LSR, 63), "negs x30, x29, lsr #63");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::LSR, 31), "negs w30, w29, lsr #31");

    // ASR
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "negs x30, x29, asr #1");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 1), "negs w30, w29, asr #1");
    TEST_SINGLE(negs(Size::i64Bit, Reg::r30, Reg::r29, ShiftType::ASR, 63), "negs x30, x29, asr #63");
    TEST_SINGLE(negs(Size::i32Bit, Reg::r30, Reg::r29, ShiftType::ASR, 31), "negs w30, w29, asr #31");

    // ROR
    // Unsupported
  }
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: AddSub - extended register") {
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "add w29, w28, w27, uxtb");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "add w29, w28, w27, uxtb #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "add w29, w28, w27, uxtb #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "add w29, w28, w27, uxtb #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "add w29, w28, w27, uxtb #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "add w29, w28, w27, uxth");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "add w29, w28, w27, uxth #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "add w29, w28, w27, uxth #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "add w29, w28, w27, uxth #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "add w29, w28, w27, uxth #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "add w29, w28, w27, uxtw");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "add w29, w28, w27, uxtw #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "add w29, w28, w27, uxtw #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "add w29, w28, w27, uxtw #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "add w29, w28, w27, uxtw #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "add w29, w28, x27, uxtx");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "add w29, w28, x27, uxtx #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "add w29, w28, x27, uxtx #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "add w29, w28, x27, uxtx #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "add w29, w28, x27, uxtx #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "add w29, w28, w27, sxtb");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "add w29, w28, w27, sxtb #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "add w29, w28, w27, sxtb #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "add w29, w28, w27, sxtb #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "add w29, w28, w27, sxtb #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "add w29, w28, w27, sxth");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "add w29, w28, w27, sxth #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "add w29, w28, w27, sxth #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "add w29, w28, w27, sxth #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "add w29, w28, w27, sxth #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "add w29, w28, w27, sxtw");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "add w29, w28, w27, sxtw #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "add w29, w28, w27, sxtw #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "add w29, w28, w27, sxtw #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "add w29, w28, w27, sxtw #4");

  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "add w29, w28, x27, sxtx");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "add w29, w28, x27, sxtx #1");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "add w29, w28, x27, sxtx #2");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "add w29, w28, x27, sxtx #3");
  TEST_SINGLE(add(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "add w29, w28, x27, sxtx #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "add x29, x28, w27, uxtb");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "add x29, x28, w27, uxtb #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "add x29, x28, w27, uxtb #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "add x29, x28, w27, uxtb #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "add x29, x28, w27, uxtb #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "add x29, x28, w27, uxth");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "add x29, x28, w27, uxth #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "add x29, x28, w27, uxth #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "add x29, x28, w27, uxth #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "add x29, x28, w27, uxth #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "add x29, x28, w27, uxtw");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "add x29, x28, w27, uxtw #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "add x29, x28, w27, uxtw #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "add x29, x28, w27, uxtw #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "add x29, x28, w27, uxtw #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "add x29, x28, x27, uxtx");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "add x29, x28, x27, uxtx #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "add x29, x28, x27, uxtx #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "add x29, x28, x27, uxtx #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "add x29, x28, x27, uxtx #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "add x29, x28, w27, sxtb");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "add x29, x28, w27, sxtb #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "add x29, x28, w27, sxtb #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "add x29, x28, w27, sxtb #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "add x29, x28, w27, sxtb #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "add x29, x28, w27, sxth");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "add x29, x28, w27, sxth #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "add x29, x28, w27, sxth #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "add x29, x28, w27, sxth #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "add x29, x28, w27, sxth #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "add x29, x28, w27, sxtw");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "add x29, x28, w27, sxtw #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "add x29, x28, w27, sxtw #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "add x29, x28, w27, sxtw #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "add x29, x28, w27, sxtw #4");

  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "add x29, x28, x27, sxtx");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "add x29, x28, x27, sxtx #1");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "add x29, x28, x27, sxtx #2");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "add x29, x28, x27, sxtx #3");
  TEST_SINGLE(add(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "add x29, x28, x27, sxtx #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "adds w29, w28, w27, uxtb");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "adds w29, w28, w27, uxtb #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "adds w29, w28, w27, uxtb #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "adds w29, w28, w27, uxtb #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "adds w29, w28, w27, uxtb #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "adds w29, w28, w27, uxth");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "adds w29, w28, w27, uxth #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "adds w29, w28, w27, uxth #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "adds w29, w28, w27, uxth #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "adds w29, w28, w27, uxth #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "adds w29, w28, w27, uxtw");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "adds w29, w28, w27, uxtw #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "adds w29, w28, w27, uxtw #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "adds w29, w28, w27, uxtw #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "adds w29, w28, w27, uxtw #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "adds w29, w28, x27, uxtx");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "adds w29, w28, x27, uxtx #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "adds w29, w28, x27, uxtx #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "adds w29, w28, x27, uxtx #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "adds w29, w28, x27, uxtx #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "adds w29, w28, w27, sxtb");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "adds w29, w28, w27, sxtb #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "adds w29, w28, w27, sxtb #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "adds w29, w28, w27, sxtb #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "adds w29, w28, w27, sxtb #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "adds w29, w28, w27, sxth");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "adds w29, w28, w27, sxth #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "adds w29, w28, w27, sxth #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "adds w29, w28, w27, sxth #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "adds w29, w28, w27, sxth #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "adds w29, w28, w27, sxtw");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "adds w29, w28, w27, sxtw #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "adds w29, w28, w27, sxtw #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "adds w29, w28, w27, sxtw #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "adds w29, w28, w27, sxtw #4");

  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "adds w29, w28, x27, sxtx");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "adds w29, w28, x27, sxtx #1");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "adds w29, w28, x27, sxtx #2");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "adds w29, w28, x27, sxtx #3");
  TEST_SINGLE(adds(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "adds w29, w28, x27, sxtx #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "adds x29, x28, w27, uxtb");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "adds x29, x28, w27, uxtb #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "adds x29, x28, w27, uxtb #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "adds x29, x28, w27, uxtb #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "adds x29, x28, w27, uxtb #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "adds x29, x28, w27, uxth");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "adds x29, x28, w27, uxth #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "adds x29, x28, w27, uxth #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "adds x29, x28, w27, uxth #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "adds x29, x28, w27, uxth #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "adds x29, x28, w27, uxtw");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "adds x29, x28, w27, uxtw #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "adds x29, x28, w27, uxtw #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "adds x29, x28, w27, uxtw #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "adds x29, x28, w27, uxtw #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "adds x29, x28, x27, uxtx");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "adds x29, x28, x27, uxtx #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "adds x29, x28, x27, uxtx #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "adds x29, x28, x27, uxtx #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "adds x29, x28, x27, uxtx #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "adds x29, x28, w27, sxtb");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "adds x29, x28, w27, sxtb #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "adds x29, x28, w27, sxtb #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "adds x29, x28, w27, sxtb #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "adds x29, x28, w27, sxtb #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "adds x29, x28, w27, sxth");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "adds x29, x28, w27, sxth #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "adds x29, x28, w27, sxth #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "adds x29, x28, w27, sxth #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "adds x29, x28, w27, sxth #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "adds x29, x28, w27, sxtw");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "adds x29, x28, w27, sxtw #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "adds x29, x28, w27, sxtw #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "adds x29, x28, w27, sxtw #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "adds x29, x28, w27, sxtw #4");

  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "adds x29, x28, x27, sxtx");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "adds x29, x28, x27, sxtx #1");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "adds x29, x28, x27, sxtx #2");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "adds x29, x28, x27, sxtx #3");
  TEST_SINGLE(adds(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "adds x29, x28, x27, sxtx #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "cmn w28, w27, uxtb");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "cmn w28, w27, uxtb #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "cmn w28, w27, uxtb #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "cmn w28, w27, uxtb #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "cmn w28, w27, uxtb #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "cmn w28, w27, uxth");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "cmn w28, w27, uxth #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "cmn w28, w27, uxth #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "cmn w28, w27, uxth #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "cmn w28, w27, uxth #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_32, 0), "cmn w28, w27");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_32, 1), "cmn w28, w27, lsl #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_32, 2), "cmn w28, w27, lsl #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_32, 3), "cmn w28, w27, lsl #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_32, 4), "cmn w28, w27, lsl #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 0), "cmn w28, x27");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 1), "cmn w28, x27, lsl #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 2), "cmn w28, x27, lsl #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 3), "cmn w28, x27, lsl #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 4), "cmn w28, x27, lsl #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "cmn w28, w27, sxtb");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "cmn w28, w27, sxtb #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "cmn w28, w27, sxtb #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "cmn w28, w27, sxtb #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "cmn w28, w27, sxtb #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "cmn w28, w27, sxth");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "cmn w28, w27, sxth #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "cmn w28, w27, sxth #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "cmn w28, w27, sxth #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "cmn w28, w27, sxth #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "cmn w28, w27, sxtw");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "cmn w28, w27, sxtw #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "cmn w28, w27, sxtw #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "cmn w28, w27, sxtw #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "cmn w28, w27, sxtw #4");

  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "cmn w28, x27, sxtx");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "cmn w28, x27, sxtx #1");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "cmn w28, x27, sxtx #2");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "cmn w28, x27, sxtx #3");
  TEST_SINGLE(cmn(Size::i32Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "cmn w28, x27, sxtx #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "cmn x28, w27, uxtb");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "cmn x28, w27, uxtb #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "cmn x28, w27, uxtb #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "cmn x28, w27, uxtb #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "cmn x28, w27, uxtb #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "cmn x28, w27, uxth");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "cmn x28, w27, uxth #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "cmn x28, w27, uxth #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "cmn x28, w27, uxth #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "cmn x28, w27, uxth #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "cmn x28, w27, uxtw");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "cmn x28, w27, uxtw #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "cmn x28, w27, uxtw #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "cmn x28, w27, uxtw #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "cmn x28, w27, uxtw #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 0), "cmn x28, x27");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 1), "cmn x28, x27, lsl #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 2), "cmn x28, x27, lsl #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 3), "cmn x28, x27, lsl #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::LSL_64, 4), "cmn x28, x27, lsl #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "cmn x28, w27, sxtb");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "cmn x28, w27, sxtb #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "cmn x28, w27, sxtb #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "cmn x28, w27, sxtb #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "cmn x28, w27, sxtb #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "cmn x28, w27, sxth");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "cmn x28, w27, sxth #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "cmn x28, w27, sxth #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "cmn x28, w27, sxth #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "cmn x28, w27, sxth #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "cmn x28, w27, sxtw");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "cmn x28, w27, sxtw #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "cmn x28, w27, sxtw #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "cmn x28, w27, sxtw #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "cmn x28, w27, sxtw #4");

  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "cmn x28, x27, sxtx");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "cmn x28, x27, sxtx #1");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "cmn x28, x27, sxtx #2");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "cmn x28, x27, sxtx #3");
  TEST_SINGLE(cmn(Size::i64Bit, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "cmn x28, x27, sxtx #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "sub w29, w28, w27, uxtb");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "sub w29, w28, w27, uxtb #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "sub w29, w28, w27, uxtb #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "sub w29, w28, w27, uxtb #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "sub w29, w28, w27, uxtb #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "sub w29, w28, w27, uxth");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "sub w29, w28, w27, uxth #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "sub w29, w28, w27, uxth #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "sub w29, w28, w27, uxth #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "sub w29, w28, w27, uxth #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "sub w29, w28, w27, uxtw");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "sub w29, w28, w27, uxtw #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "sub w29, w28, w27, uxtw #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "sub w29, w28, w27, uxtw #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "sub w29, w28, w27, uxtw #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "sub w29, w28, x27, uxtx");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "sub w29, w28, x27, uxtx #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "sub w29, w28, x27, uxtx #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "sub w29, w28, x27, uxtx #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "sub w29, w28, x27, uxtx #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "sub w29, w28, w27, sxtb");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "sub w29, w28, w27, sxtb #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "sub w29, w28, w27, sxtb #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "sub w29, w28, w27, sxtb #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "sub w29, w28, w27, sxtb #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "sub w29, w28, w27, sxth");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "sub w29, w28, w27, sxth #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "sub w29, w28, w27, sxth #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "sub w29, w28, w27, sxth #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "sub w29, w28, w27, sxth #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "sub w29, w28, w27, sxtw");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "sub w29, w28, w27, sxtw #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "sub w29, w28, w27, sxtw #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "sub w29, w28, w27, sxtw #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "sub w29, w28, w27, sxtw #4");

  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "sub w29, w28, x27, sxtx");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "sub w29, w28, x27, sxtx #1");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "sub w29, w28, x27, sxtx #2");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "sub w29, w28, x27, sxtx #3");
  TEST_SINGLE(sub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "sub w29, w28, x27, sxtx #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "sub x29, x28, w27, uxtb");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "sub x29, x28, w27, uxtb #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "sub x29, x28, w27, uxtb #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "sub x29, x28, w27, uxtb #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "sub x29, x28, w27, uxtb #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "sub x29, x28, w27, uxth");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "sub x29, x28, w27, uxth #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "sub x29, x28, w27, uxth #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "sub x29, x28, w27, uxth #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "sub x29, x28, w27, uxth #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "sub x29, x28, w27, uxtw");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "sub x29, x28, w27, uxtw #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "sub x29, x28, w27, uxtw #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "sub x29, x28, w27, uxtw #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "sub x29, x28, w27, uxtw #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "sub x29, x28, x27, uxtx");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "sub x29, x28, x27, uxtx #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "sub x29, x28, x27, uxtx #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "sub x29, x28, x27, uxtx #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "sub x29, x28, x27, uxtx #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "sub x29, x28, w27, sxtb");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "sub x29, x28, w27, sxtb #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "sub x29, x28, w27, sxtb #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "sub x29, x28, w27, sxtb #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "sub x29, x28, w27, sxtb #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "sub x29, x28, w27, sxth");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "sub x29, x28, w27, sxth #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "sub x29, x28, w27, sxth #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "sub x29, x28, w27, sxth #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "sub x29, x28, w27, sxth #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "sub x29, x28, w27, sxtw");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "sub x29, x28, w27, sxtw #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "sub x29, x28, w27, sxtw #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "sub x29, x28, w27, sxtw #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "sub x29, x28, w27, sxtw #4");

  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "sub x29, x28, x27, sxtx");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "sub x29, x28, x27, sxtx #1");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "sub x29, x28, x27, sxtx #2");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "sub x29, x28, x27, sxtx #3");
  TEST_SINGLE(sub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "sub x29, x28, x27, sxtx #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "subs w29, w28, w27, uxtb");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "subs w29, w28, w27, uxtb #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "subs w29, w28, w27, uxtb #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "subs w29, w28, w27, uxtb #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "subs w29, w28, w27, uxtb #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "subs w29, w28, w27, uxth");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "subs w29, w28, w27, uxth #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "subs w29, w28, w27, uxth #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "subs w29, w28, w27, uxth #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "subs w29, w28, w27, uxth #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "subs w29, w28, w27, uxtw");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "subs w29, w28, w27, uxtw #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "subs w29, w28, w27, uxtw #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "subs w29, w28, w27, uxtw #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "subs w29, w28, w27, uxtw #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "subs w29, w28, x27, uxtx");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "subs w29, w28, x27, uxtx #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "subs w29, w28, x27, uxtx #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "subs w29, w28, x27, uxtx #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "subs w29, w28, x27, uxtx #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "subs w29, w28, w27, sxtb");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "subs w29, w28, w27, sxtb #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "subs w29, w28, w27, sxtb #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "subs w29, w28, w27, sxtb #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "subs w29, w28, w27, sxtb #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "subs w29, w28, w27, sxth");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "subs w29, w28, w27, sxth #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "subs w29, w28, w27, sxth #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "subs w29, w28, w27, sxth #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "subs w29, w28, w27, sxth #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "subs w29, w28, w27, sxtw");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "subs w29, w28, w27, sxtw #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "subs w29, w28, w27, sxtw #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "subs w29, w28, w27, sxtw #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "subs w29, w28, w27, sxtw #4");

  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "subs w29, w28, x27, sxtx");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "subs w29, w28, x27, sxtx #1");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "subs w29, w28, x27, sxtx #2");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "subs w29, w28, x27, sxtx #3");
  TEST_SINGLE(subs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "subs w29, w28, x27, sxtx #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 0), "subs x29, x28, w27, uxtb");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 1), "subs x29, x28, w27, uxtb #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 2), "subs x29, x28, w27, uxtb #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 3), "subs x29, x28, w27, uxtb #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTB, 4), "subs x29, x28, w27, uxtb #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 0), "subs x29, x28, w27, uxth");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 1), "subs x29, x28, w27, uxth #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 2), "subs x29, x28, w27, uxth #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 3), "subs x29, x28, w27, uxth #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTH, 4), "subs x29, x28, w27, uxth #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 0), "subs x29, x28, w27, uxtw");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 1), "subs x29, x28, w27, uxtw #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 2), "subs x29, x28, w27, uxtw #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 3), "subs x29, x28, w27, uxtw #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTW, 4), "subs x29, x28, w27, uxtw #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 0), "subs x29, x28, x27, uxtx");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 1), "subs x29, x28, x27, uxtx #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 2), "subs x29, x28, x27, uxtx #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 3), "subs x29, x28, x27, uxtx #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::UXTX, 4), "subs x29, x28, x27, uxtx #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 0), "subs x29, x28, w27, sxtb");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 1), "subs x29, x28, w27, sxtb #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 2), "subs x29, x28, w27, sxtb #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 3), "subs x29, x28, w27, sxtb #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTB, 4), "subs x29, x28, w27, sxtb #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 0), "subs x29, x28, w27, sxth");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 1), "subs x29, x28, w27, sxth #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 2), "subs x29, x28, w27, sxth #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 3), "subs x29, x28, w27, sxth #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTH, 4), "subs x29, x28, w27, sxth #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 0), "subs x29, x28, w27, sxtw");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 1), "subs x29, x28, w27, sxtw #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 2), "subs x29, x28, w27, sxtw #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 3), "subs x29, x28, w27, sxtw #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTW, 4), "subs x29, x28, w27, sxtw #4");

  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 0), "subs x29, x28, x27, sxtx");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 1), "subs x29, x28, x27, sxtx #1");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 2), "subs x29, x28, x27, sxtx #2");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 3), "subs x29, x28, x27, sxtx #3");
  TEST_SINGLE(subs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, ExtendedType::SXTX, 4), "subs x29, x28, x27, sxtx #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 0), "cmp w29, w28, uxtb");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 1), "cmp w29, w28, uxtb #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 2), "cmp w29, w28, uxtb #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 3), "cmp w29, w28, uxtb #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 4), "cmp w29, w28, uxtb #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 0), "cmp w29, w28, uxth");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 1), "cmp w29, w28, uxth #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 2), "cmp w29, w28, uxth #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 3), "cmp w29, w28, uxth #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 4), "cmp w29, w28, uxth #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_32, 0), "cmp w29, w28");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_32, 1), "cmp w29, w28, lsl #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_32, 2), "cmp w29, w28, lsl #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_32, 3), "cmp w29, w28, lsl #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_32, 4), "cmp w29, w28, lsl #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 0), "cmp w29, x28");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 1), "cmp w29, x28, lsl #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 2), "cmp w29, x28, lsl #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 3), "cmp w29, x28, lsl #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 4), "cmp w29, x28, lsl #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 0), "cmp w29, w28, sxtb");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 1), "cmp w29, w28, sxtb #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 2), "cmp w29, w28, sxtb #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 3), "cmp w29, w28, sxtb #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 4), "cmp w29, w28, sxtb #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 0), "cmp w29, w28, sxth");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 1), "cmp w29, w28, sxth #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 2), "cmp w29, w28, sxth #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 3), "cmp w29, w28, sxth #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 4), "cmp w29, w28, sxth #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 0), "cmp w29, w28, sxtw");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 1), "cmp w29, w28, sxtw #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 2), "cmp w29, w28, sxtw #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 3), "cmp w29, w28, sxtw #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 4), "cmp w29, w28, sxtw #4");

  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 0), "cmp w29, x28, sxtx");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 1), "cmp w29, x28, sxtx #1");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 2), "cmp w29, x28, sxtx #2");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 3), "cmp w29, x28, sxtx #3");
  TEST_SINGLE(cmp(Size::i32Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 4), "cmp w29, x28, sxtx #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 0), "cmp x29, w28, uxtb");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 1), "cmp x29, w28, uxtb #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 2), "cmp x29, w28, uxtb #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 3), "cmp x29, w28, uxtb #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTB, 4), "cmp x29, w28, uxtb #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 0), "cmp x29, w28, uxth");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 1), "cmp x29, w28, uxth #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 2), "cmp x29, w28, uxth #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 3), "cmp x29, w28, uxth #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTH, 4), "cmp x29, w28, uxth #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTW, 0), "cmp x29, w28, uxtw");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTW, 1), "cmp x29, w28, uxtw #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTW, 2), "cmp x29, w28, uxtw #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTW, 3), "cmp x29, w28, uxtw #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::UXTW, 4), "cmp x29, w28, uxtw #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 0), "cmp x29, x28");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 1), "cmp x29, x28, lsl #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 2), "cmp x29, x28, lsl #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 3), "cmp x29, x28, lsl #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::LSL_64, 4), "cmp x29, x28, lsl #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 0), "cmp x29, w28, sxtb");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 1), "cmp x29, w28, sxtb #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 2), "cmp x29, w28, sxtb #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 3), "cmp x29, w28, sxtb #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTB, 4), "cmp x29, w28, sxtb #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 0), "cmp x29, w28, sxth");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 1), "cmp x29, w28, sxth #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 2), "cmp x29, w28, sxth #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 3), "cmp x29, w28, sxth #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTH, 4), "cmp x29, w28, sxth #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 0), "cmp x29, w28, sxtw");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 1), "cmp x29, w28, sxtw #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 2), "cmp x29, w28, sxtw #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 3), "cmp x29, w28, sxtw #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTW, 4), "cmp x29, w28, sxtw #4");

  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 0), "cmp x29, x28, sxtx");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 1), "cmp x29, x28, sxtx #1");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 2), "cmp x29, x28, sxtx #2");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 3), "cmp x29, x28, sxtx #3");
  TEST_SINGLE(cmp(Size::i64Bit, Reg::r29, Reg::r28, ExtendedType::SXTX, 4), "cmp x29, x28, sxtx #4");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: AddSub - with carry") {
  TEST_SINGLE(adc(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "adc w29, w28, w27");
  TEST_SINGLE(adc(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "adc x29, x28, x27");

  TEST_SINGLE(adcs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "adcs w29, w28, w27");
  TEST_SINGLE(adcs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "adcs x29, x28, x27");

  TEST_SINGLE(sbc(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "sbc w29, w28, w27");
  TEST_SINGLE(sbc(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "sbc x29, x28, x27");

  TEST_SINGLE(sbcs(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "sbcs w29, w28, w27");
  TEST_SINGLE(sbcs(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "sbcs x29, x28, x27");

  TEST_SINGLE(ngc(Size::i32Bit, Reg::r29, Reg::r27), "ngc w29, w27");
  TEST_SINGLE(ngc(Size::i64Bit, Reg::r29, Reg::r27), "ngc x29, x27");

  TEST_SINGLE(ngcs(Size::i32Bit, Reg::r29, Reg::r27), "ngcs w29, w27");
  TEST_SINGLE(ngcs(Size::i64Bit, Reg::r29, Reg::r27), "ngcs x29, x27");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Rotate right into flags") {
  TEST_SINGLE(rmif(XReg::x30, 63, 0b0000), "rmif x30, #63, #nzcv");
  TEST_SINGLE(rmif(XReg::x30, 63, 0b0001), "rmif x30, #63, #nzcV");
  TEST_SINGLE(rmif(XReg::x30, 63, 0b0010), "rmif x30, #63, #nzCv");
  TEST_SINGLE(rmif(XReg::x30, 63, 0b0100), "rmif x30, #63, #nZcv");
  TEST_SINGLE(rmif(XReg::x30, 63, 0b1000), "rmif x30, #63, #Nzcv");
  TEST_SINGLE(rmif(XReg::x30, 63, 0b1111), "rmif x30, #63, #NZCV");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Evaluate into flags") {
  TEST_SINGLE(setf8(WReg::w30), "setf8 w30");
  TEST_SINGLE(setf16(WReg::w30), "setf16 w30");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Carry flag invert") {
  TEST_SINGLE(cfinv(), "cfinv");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Arm to eXternal FLAG") {
  TEST_SINGLE(axflag(), "axflag");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: eXternal to Arm FLAG") {
  TEST_SINGLE(xaflag(), "xaflag");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Conditional compare - register") {
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_AL), "ccmn w29, w28, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_AL), "ccmn w29, w28, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn w29, w28, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_AL), "ccmn w29, w28, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_AL), "ccmn w29, w28, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn w29, w28, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_EQ), "ccmn w29, w28, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn w29, w28, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn w29, w28, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn w29, w28, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn w29, w28, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn w29, w28, #NZCV, eq");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_AL), "ccmn x29, x28, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_AL), "ccmn x29, x28, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn x29, x28, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_AL), "ccmn x29, x28, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_AL), "ccmn x29, x28, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn x29, x28, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_EQ), "ccmn x29, x28, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn x29, x28, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn x29, x28, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn x29, x28, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn x29, x28, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn x29, x28, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_AL), "ccmp w29, w28, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_AL), "ccmp w29, w28, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp w29, w28, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_AL), "ccmp w29, w28, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_AL), "ccmp w29, w28, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp w29, w28, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_EQ), "ccmp w29, w28, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp w29, w28, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp w29, w28, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp w29, w28, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp w29, w28, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp w29, w28, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_AL), "ccmp x29, x28, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_AL), "ccmp x29, x28, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp x29, x28, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_AL), "ccmp x29, x28, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_AL), "ccmp x29, x28, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp x29, x28, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::None, Condition::CC_EQ), "ccmp x29, x28, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp x29, x28, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp x29, x28, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp x29, x28, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp x29, x28, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, Reg::r28, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp x29, x28, #NZCV, eq");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Conditional compare - immediate") {
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_AL), "ccmn w29, #0, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_AL), "ccmn w29, #0, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn w29, #0, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_AL), "ccmn w29, #0, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_AL), "ccmn w29, #0, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn w29, #0, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_EQ), "ccmn w29, #0, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn w29, #0, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn w29, #0, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn w29, #0, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn w29, #0, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn w29, #0, #NZCV, eq");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_AL), "ccmn x29, #0, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_AL), "ccmn x29, #0, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn x29, #0, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_AL), "ccmn x29, #0, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_AL), "ccmn x29, #0, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn x29, #0, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_EQ), "ccmn x29, #0, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn x29, #0, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn x29, #0, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn x29, #0, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn x29, #0, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn x29, #0, #NZCV, eq");

  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_AL), "ccmn w29, #31, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_AL), "ccmn w29, #31, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn w29, #31, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_AL), "ccmn w29, #31, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_AL), "ccmn w29, #31, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn w29, #31, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_EQ), "ccmn w29, #31, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn w29, #31, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn w29, #31, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn w29, #31, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn w29, #31, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn w29, #31, #NZCV, eq");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_AL), "ccmn x29, #31, #nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_AL), "ccmn x29, #31, #Nzcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_AL), "ccmn x29, #31, #nZcv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_AL), "ccmn x29, #31, #nzCv, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_AL), "ccmn x29, #31, #nzcV, al");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmn x29, #31, #NZCV, al");

  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_EQ), "ccmn x29, #31, #nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_EQ), "ccmn x29, #31, #Nzcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmn x29, #31, #nZcv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_EQ), "ccmn x29, #31, #nzCv, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_EQ), "ccmn x29, #31, #nzcV, eq");
  TEST_SINGLE(ccmn(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmn x29, #31, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_AL), "ccmp w29, #0, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_AL), "ccmp w29, #0, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp w29, #0, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_AL), "ccmp w29, #0, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_AL), "ccmp w29, #0, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp w29, #0, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_EQ), "ccmp w29, #0, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp w29, #0, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp w29, #0, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp w29, #0, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp w29, #0, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp w29, #0, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_AL), "ccmp x29, #0, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_AL), "ccmp x29, #0, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp x29, #0, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_AL), "ccmp x29, #0, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_AL), "ccmp x29, #0, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp x29, #0, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::None, Condition::CC_EQ), "ccmp x29, #0, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp x29, #0, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp x29, #0, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp x29, #0, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp x29, #0, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 0, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp x29, #0, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_AL), "ccmp w29, #31, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_AL), "ccmp w29, #31, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp w29, #31, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_AL), "ccmp w29, #31, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_AL), "ccmp w29, #31, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp w29, #31, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_EQ), "ccmp w29, #31, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp w29, #31, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp w29, #31, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp w29, #31, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp w29, #31, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i32Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp w29, #31, #NZCV, eq");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_AL), "ccmp x29, #31, #nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_AL), "ccmp x29, #31, #Nzcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_AL), "ccmp x29, #31, #nZcv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_AL), "ccmp x29, #31, #nzCv, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_AL), "ccmp x29, #31, #nzcV, al");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_AL), "ccmp x29, #31, #NZCV, al");

  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::None, Condition::CC_EQ), "ccmp x29, #31, #nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_N, Condition::CC_EQ), "ccmp x29, #31, #Nzcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_Z, Condition::CC_EQ), "ccmp x29, #31, #nZcv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_C, Condition::CC_EQ), "ccmp x29, #31, #nzCv, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_V, Condition::CC_EQ), "ccmp x29, #31, #nzcV, eq");
  TEST_SINGLE(ccmp(Size::i64Bit, Reg::r29, 31, StatusFlags::Flag_NZCV, Condition::CC_EQ), "ccmp x29, #31, #NZCV, eq");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Conditional select") {
  TEST_SINGLE(csel(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csel w29, w28, w27, eq");
  TEST_SINGLE(csel(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csel x29, x28, x27, eq");
  TEST_SINGLE(cset(Size::i32Bit, Reg::r29, Condition::CC_EQ), "cset w29, eq");
  TEST_SINGLE(cset(Size::i64Bit, Reg::r29, Condition::CC_EQ), "cset x29, eq");
  TEST_SINGLE(csinc(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csinc w29, w28, w27, eq");
  TEST_SINGLE(csinc(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csinc x29, x28, x27, eq");
  TEST_SINGLE(csinv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csinv w29, w28, w27, eq");
  TEST_SINGLE(csinv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csinv x29, x28, x27, eq");
  TEST_SINGLE(csneg(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csneg w29, w28, w27, eq");
  TEST_SINGLE(csneg(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_EQ), "csneg x29, x28, x27, eq");
  TEST_SINGLE(cneg(Size::i32Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cneg w29, w28, eq");
  TEST_SINGLE(cneg(Size::i64Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cneg x29, x28, eq");

  TEST_SINGLE(cinc(Size::i32Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cinc w29, w28, eq");
  TEST_SINGLE(cinc(Size::i64Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cinc x29, x28, eq");
  TEST_SINGLE(cinv(Size::i32Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cinv w29, w28, eq");
  TEST_SINGLE(cinv(Size::i64Bit, Reg::r29, Reg::r28, Condition::CC_EQ), "cinv x29, x28, eq");
  TEST_SINGLE(csetm(Size::i32Bit, Reg::r29, Condition::CC_EQ), "csetm w29, eq");
  TEST_SINGLE(csetm(Size::i64Bit, Reg::r29, Condition::CC_EQ), "csetm x29, eq");

  TEST_SINGLE(csel(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csel w29, w28, w27, al");
  TEST_SINGLE(csel(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csel x29, x28, x27, al");
  TEST_SINGLE(cset(Size::i32Bit, Reg::r29, Condition::CC_AL), "csinc w29, wzr, wzr, nv");
  TEST_SINGLE(cset(Size::i64Bit, Reg::r29, Condition::CC_AL), "csinc x29, xzr, xzr, nv");
  TEST_SINGLE(csinc(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csinc w29, w28, w27, al");
  TEST_SINGLE(csinc(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csinc x29, x28, x27, al");
  TEST_SINGLE(csinv(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csinv w29, w28, w27, al");
  TEST_SINGLE(csinv(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csinv x29, x28, x27, al");
  TEST_SINGLE(csneg(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csneg w29, w28, w27, al");
  TEST_SINGLE(csneg(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Condition::CC_AL), "csneg x29, x28, x27, al");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ALU: Data processing - 3 source") {
  TEST_SINGLE(madd(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Reg::r26), "madd w29, w28, w27, w26");
  TEST_SINGLE(madd(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Reg::r26), "madd x29, x28, x27, x26");
  TEST_SINGLE(mul(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "mul w29, w28, w27");
  TEST_SINGLE(mul(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "mul x29, x28, x27");
  TEST_SINGLE(msub(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27, Reg::r26), "msub w29, w28, w27, w26");
  TEST_SINGLE(msub(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27, Reg::r26), "msub x29, x28, x27, x26");
  TEST_SINGLE(mneg(Size::i32Bit, Reg::r29, Reg::r28, Reg::r27), "mneg w29, w28, w27");
  TEST_SINGLE(mneg(Size::i64Bit, Reg::r29, Reg::r28, Reg::r27), "mneg x29, x28, x27");

  TEST_SINGLE(smaddl(XReg::x29, WReg::w28, WReg::w27, XReg::x26), "smaddl x29, w28, w27, x26");
  TEST_SINGLE(smull(XReg::x29, WReg::w28, WReg::w27), "smull x29, w28, w27");
  TEST_SINGLE(smsubl(XReg::x29, WReg::w28, WReg::w27, XReg::x26), "smsubl x29, w28, w27, x26");
  TEST_SINGLE(smnegl(XReg::x29, WReg::w28, WReg::w27), "smnegl x29, w28, w27");
  TEST_SINGLE(smulh(XReg::x29, XReg::x28, XReg::x27), "smulh x29, x28, x27");

  TEST_SINGLE(umaddl(XReg::x29, WReg::w28, WReg::w27, XReg::x26), "umaddl x29, w28, w27, x26");
  TEST_SINGLE(umull(XReg::x29, WReg::w28, WReg::w27), "umull x29, w28, w27");
  TEST_SINGLE(umsubl(XReg::x29, WReg::w28, WReg::w27, XReg::x26), "umsubl x29, w28, w27, x26");
  TEST_SINGLE(umnegl(XReg::x29, WReg::w28, WReg::w27), "umnegl x29, w28, w27");
  TEST_SINGLE(umulh(XReg::x29, XReg::x28, XReg::x27), "umulh x29, x28, x27");
}


================================================
FILE: FEXCore/unittests/Emitter/ASIMD_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic AES") {
  TEST_SINGLE(aese(VReg::v30, VReg::v29), "aese v30.16b, v29.16b");
  TEST_SINGLE(aesd(VReg::v30, VReg::v29), "aesd v30.16b, v29.16b");
  TEST_SINGLE(aesmc(VReg::v30, VReg::v29), "aesmc v30.16b, v29.16b");
  TEST_SINGLE(aesimc(VReg::v30, VReg::v29), "aesimc v30.16b, v29.16b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic three-register SHA") {
  TEST_SINGLE(sha1c(VReg::v30, SReg::s29, VReg::v28), "sha1c q30, s29, v28.4s");
  TEST_SINGLE(sha1p(VReg::v30, SReg::s29, VReg::v28), "sha1p q30, s29, v28.4s");
  TEST_SINGLE(sha1m(VReg::v30, SReg::s29, VReg::v28), "sha1m q30, s29, v28.4s");
  TEST_SINGLE(sha1su0(VReg::v30, VReg::v29, VReg::v28), "sha1su0 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sha256h(VReg::v30, VReg::v29, VReg::v28), "sha256h q30, q29, v28.4s");
  TEST_SINGLE(sha256h2(VReg::v30, VReg::v29, VReg::v28), "sha256h2 q30, q29, v28.4s");
  TEST_SINGLE(sha256su1(VReg::v30, VReg::v29, VReg::v28), "sha256su1 v30.4s, v29.4s, v28.4s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic two-register SHA") {
  TEST_SINGLE(sha1h(SReg::s30, SReg::s29), "sha1h s30, s29");
  TEST_SINGLE(sha1su1(VReg::v30, VReg::v29), "sha1su1 v30.4s, v29.4s");
  TEST_SINGLE(sha256su0(VReg::v30, VReg::v29), "sha256su0 v30.4s, v29.4s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD table lookup") {
  TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q25), "tbl v30.16b, {v26.16b}, v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q26, DReg::d25), "tbl v30.8b, {v26.16b}, v25.8b");
  TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q25), "tbx v30.16b, {v26.16b}, v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q26, DReg::d25), "tbx v30.8b, {v26.16b}, v25.8b");

  TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b}, v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b}, v25.8b");
  TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b}, v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b}, v25.8b");

  TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b}, v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b}, v25.8b");
  TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b}, v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b}, v25.8b");

  TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b");
  TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b, v28.16b}, v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, QReg::q28, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b, v28.16b}, v25.8b");

  TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b}, v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b}, v25.8b");
  TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b, v28.16b}, v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, QReg::q28, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b, v28.16b}, v25.8b");

  TEST_SINGLE(tbl(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbl v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                  "v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbl v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b");
  TEST_SINGLE(tbl(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbl v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                     "v25.16b");
  TEST_SINGLE(tbl(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbl v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                     "v25.8b");

  TEST_SINGLE(tbx(QReg::q30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, QReg::q25), "tbx v30.16b, {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                  "v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q31, QReg::q0, QReg::q1, QReg::q2, DReg::d25), "tbx v30.8b, {v31.16b, v0.16b, v1.16b, v2.16b}, v25.8b");
  TEST_SINGLE(tbx(QReg::q30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, QReg::q25), "tbx v30.16b, {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                     "v25.16b");
  TEST_SINGLE(tbx(DReg::d30, QReg::q26, QReg::q27, QReg::q28, QReg::q29, DReg::d25), "tbx v30.8b, {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                     "v25.8b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD permute") {
  // Commented out lines showcase unallocated encodings.
  TEST_SINGLE(uzp1(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uzp1 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uzp1(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uzp1 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uzp1(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uzp1 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(uzp1(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uzp1 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uzp1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uzp1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uzp1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uzp1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp1 v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(trn1(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "trn1 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(trn1(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "trn1 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(trn1(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "trn1 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(trn1(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "trn1 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(trn1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(trn1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(trn1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(trn1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn1 v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(zip1(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "zip1 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(zip1(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "zip1 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(zip1(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "zip1 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(zip1(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "zip1 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(zip1(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(zip1(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(zip1(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(zip1(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip1 v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uzp2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uzp2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uzp2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(uzp2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uzp2 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uzp2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uzp2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uzp2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uzp2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uzp2 v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(trn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "trn2 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(trn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "trn2 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(trn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "trn2 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(trn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "trn2 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(trn2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(trn2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(trn2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(trn2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "trn2 v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(zip2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "zip2 v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(zip2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "zip2 v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(zip2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "zip2 v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(zip2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "zip2 v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(zip2(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(zip2(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(zip2(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(zip2(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "zip2 v30.1d, v29.1d, v28.1d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD extract") {
  TEST_SINGLE(ext(QReg::q30, QReg::q29, QReg::q28, 0), "ext v30.16b, v29.16b, v28.16b, #0");
  TEST_SINGLE(ext(QReg::q30, QReg::q29, QReg::q28, 15), "ext v30.16b, v29.16b, v28.16b, #15");
  TEST_SINGLE(ext(DReg::d30, DReg::d29, DReg::d28, 0), "ext v30.8b, v29.8b, v28.8b, #0");
  TEST_SINGLE(ext(DReg::d30, DReg::d29, DReg::d28, 7), "ext v30.8b, v29.8b, v28.8b, #7");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD copy") {
  // Commented out lines showcase unallocated encodings.
  TEST_SINGLE(dup(SubRegSize::i8Bit, QReg::q30, QReg::q29, 0), "dup v30.16b, v29.b[0]");
  TEST_SINGLE(dup(SubRegSize::i16Bit, QReg::q30, QReg::q29, 0), "dup v30.8h, v29.h[0]");
  TEST_SINGLE(dup(SubRegSize::i32Bit, QReg::q30, QReg::q29, 0), "dup v30.4s, v29.s[0]");
  TEST_SINGLE(dup(SubRegSize::i64Bit, QReg::q30, QReg::q29, 0), "dup v30.2d, v29.d[0]");
  TEST_SINGLE(dup(SubRegSize::i8Bit, QReg::q30, QReg::q29, 15), "dup v30.16b, v29.b[15]");
  TEST_SINGLE(dup(SubRegSize::i16Bit, QReg::q30, QReg::q29, 7), "dup v30.8h, v29.h[7]");
  TEST_SINGLE(dup(SubRegSize::i32Bit, QReg::q30, QReg::q29, 3), "dup v30.4s, v29.s[3]");
  TEST_SINGLE(dup(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "dup v30.2d, v29.d[1]");

  TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, DReg::d29, 0), "dup v30.8b, v29.b[0]");
  TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, DReg::d29, 0), "dup v30.4h, v29.h[0]");
  TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, DReg::d29, 0), "dup v30.2s, v29.s[0]");
  // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 0), "dup v30.1d, v29.d[0]");
  TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, DReg::d29, 15), "dup v30.8b, v29.b[15]");
  TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "dup v30.4h, v29.h[7]");
  TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, DReg::d29, 3), "dup v30.2s, v29.s[3]");
  // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "dup v30.1d, v29.d[1]");

  TEST_SINGLE(dup(SubRegSize::i8Bit, QReg::q30, Reg::r29), "dup v30.16b, w29");
  TEST_SINGLE(dup(SubRegSize::i16Bit, QReg::q30, Reg::r29), "dup v30.8h, w29");
  TEST_SINGLE(dup(SubRegSize::i32Bit, QReg::q30, Reg::r29), "dup v30.4s, w29");
  TEST_SINGLE(dup(SubRegSize::i64Bit, QReg::q30, Reg::r29), "dup v30.2d, x29");

  TEST_SINGLE(dup(SubRegSize::i8Bit, DReg::d30, Reg::r29), "dup v30.8b, w29");
  TEST_SINGLE(dup(SubRegSize::i16Bit, DReg::d30, Reg::r29), "dup v30.4h, w29");
  TEST_SINGLE(dup(SubRegSize::i32Bit, DReg::d30, Reg::r29), "dup v30.2s, w29");
  // TEST_SINGLE(dup(SubRegSize::i64Bit, DReg::d30, Reg::r29), "dup v30.1d, x29");
  TEST_SINGLE(smov<SubRegSize::i8Bit>(XReg::x29, VReg::v30, 0), "smov x29, v30.b[0]");
  TEST_SINGLE(smov<SubRegSize::i8Bit>(XReg::x29, VReg::v30, 15), "smov x29, v30.b[15]");
  TEST_SINGLE(smov<SubRegSize::i16Bit>(XReg::x29, VReg::v30, 0), "smov x29, v30.h[0]");
  TEST_SINGLE(smov<SubRegSize::i16Bit>(XReg::x29, VReg::v30, 7), "smov x29, v30.h[7]");
  TEST_SINGLE(smov<SubRegSize::i32Bit>(XReg::x29, VReg::v30, 0), "smov x29, v30.s[0]");
  TEST_SINGLE(smov<SubRegSize::i32Bit>(XReg::x29, VReg::v30, 3), "smov x29, v30.s[3]");

  TEST_SINGLE(smov<SubRegSize::i8Bit>(WReg::w29, VReg::v30, 0), "smov w29, v30.b[0]");
  TEST_SINGLE(smov<SubRegSize::i8Bit>(WReg::w29, VReg::v30, 15), "smov w29, v30.b[15]");
  TEST_SINGLE(smov<SubRegSize::i16Bit>(WReg::w29, VReg::v30, 0), "smov w29, v30.h[0]");
  TEST_SINGLE(smov<SubRegSize::i16Bit>(WReg::w29, VReg::v30, 7), "smov w29, v30.h[7]");

  TEST_SINGLE(umov<SubRegSize::i8Bit>(Reg::r29, VReg::v30, 0), "umov w29, v30.b[0]");
  TEST_SINGLE(umov<SubRegSize::i8Bit>(Reg::r29, VReg::v30, 15), "umov w29, v30.b[15]");
  TEST_SINGLE(umov<SubRegSize::i16Bit>(Reg::r29, VReg::v30, 0), "umov w29, v30.h[0]");
  TEST_SINGLE(umov<SubRegSize::i16Bit>(Reg::r29, VReg::v30, 7), "umov w29, v30.h[7]");
  TEST_SINGLE(umov<SubRegSize::i32Bit>(Reg::r29, VReg::v30, 0), "mov w29, v30.s[0]");
  TEST_SINGLE(umov<SubRegSize::i32Bit>(Reg::r29, VReg::v30, 3), "mov w29, v30.s[3]");
  TEST_SINGLE(umov<SubRegSize::i64Bit>(Reg::r29, VReg::v30, 0), "mov x29, v30.d[0]");
  TEST_SINGLE(umov<SubRegSize::i64Bit>(Reg::r29, VReg::v30, 1), "mov x29, v30.d[1]");

  TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, Reg::r29), "mov v30.b[0], w29");
  TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 0, Reg::r29), "mov v30.h[0], w29");
  TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 0, Reg::r29), "mov v30.s[0], w29");
  TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 0, Reg::r29), "mov v30.d[0], x29");
  TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 15, Reg::r29), "mov v30.b[15], w29");
  TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 7, Reg::r29), "mov v30.h[7], w29");
  TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 3, Reg::r29), "mov v30.s[3], w29");
  TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 1, Reg::r29), "mov v30.d[1], x29");

  TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 0, VReg::v29, 15), "mov v30.b[0], v29.b[15]");
  TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 0, VReg::v29, 7), "mov v30.h[0], v29.h[7]");
  TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 0, VReg::v29, 3), "mov v30.s[0], v29.s[3]");
  TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 0, VReg::v29, 1), "mov v30.d[0], v29.d[1]");
  TEST_SINGLE(ins(SubRegSize::i8Bit, VReg::v30, 15, VReg::v29, 0), "mov v30.b[15], v29.b[0]");
  TEST_SINGLE(ins(SubRegSize::i16Bit, VReg::v30, 7, VReg::v29, 0), "mov v30.h[7], v29.h[0]");
  TEST_SINGLE(ins(SubRegSize::i32Bit, VReg::v30, 3, VReg::v29, 0), "mov v30.s[3], v29.s[0]");
  TEST_SINGLE(ins(SubRegSize::i64Bit, VReg::v30, 1, VReg::v29, 0), "mov v30.d[1], v29.d[0]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same (FP16)") {
  TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmulx(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(frecps(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(frsqrts(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(faddp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(facge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fdiv(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(facgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.8h, v29.8h, v28.8h");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register miscellaneous (FP16)") {
  TEST_SINGLE(frintn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frintn v30.8h, v29.8h");
  TEST_SINGLE(frintm(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frintm v30.8h, v29.8h");
  TEST_SINGLE(fcvtns(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtns v30.8h, v29.8h");
  TEST_SINGLE(fcvtms(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtms v30.8h, v29.8h");
  TEST_SINGLE(fcvtas(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtas v30.8h, v29.8h");
  TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29), "scvtf v30.8h, v29.8h");
  TEST_SINGLE(fcmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcmgt v30.8h, v29.8h, #0.0");
  TEST_SINGLE(fcmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcmeq v30.8h, v29.8h, #0.0");
  TEST_SINGLE(fcmlt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcmlt v30.8h, v29.8h, #0.0");
  TEST_SINGLE(fabs(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fabs v30.8h, v29.8h");
  TEST_SINGLE(frintp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frintp v30.8h, v29.8h");
  TEST_SINGLE(frintz(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frintz v30.8h, v29.8h");
  TEST_SINGLE(fcvtps(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtps v30.8h, v29.8h");
  TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtzs v30.8h, v29.8h");
  TEST_SINGLE(frecpe(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frecpe v30.8h, v29.8h");
  TEST_SINGLE(frinta(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frinta v30.8h, v29.8h");
  TEST_SINGLE(frintx(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frintx v30.8h, v29.8h");
  TEST_SINGLE(fcvtnu(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtnu v30.8h, v29.8h");
  TEST_SINGLE(fcvtmu(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtmu v30.8h, v29.8h");
  TEST_SINGLE(fcvtau(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtau v30.8h, v29.8h");
  TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29), "ucvtf v30.8h, v29.8h");
  TEST_SINGLE(fcmge(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcmge v30.8h, v29.8h, #0.0");
  TEST_SINGLE(fcmle(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcmle v30.8h, v29.8h, #0.0");
  TEST_SINGLE(fneg(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fneg v30.8h, v29.8h");
  TEST_SINGLE(frinti(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frinti v30.8h, v29.8h");
  TEST_SINGLE(fcvtpu(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtpu v30.8h, v29.8h");
  TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtzu v30.8h, v29.8h");
  TEST_SINGLE(frsqrte(SubRegSize::i16Bit, QReg::q30, QReg::q29), "frsqrte v30.8h, v29.8h");
  TEST_SINGLE(fsqrt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fsqrt v30.8h, v29.8h");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three-register extension") {
  TEST_SINGLE(sdot(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sdot v30.4s, v29.16b, v28.16b");
  TEST_SINGLE(sdot(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sdot v30.2s, v29.8b, v28.8b");

  TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q28), "usdot v30.4s, v29.16b, v28.16b");
  TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d28), "usdot v30.2s, v29.8b, v28.8b");

  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlah v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlah v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqrdmlah(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlah v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlsh v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmlsh v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqrdmlsh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmlsh v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(udot(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "udot v30.4s, v29.16b, v28.16b");
  TEST_SINGLE(udot(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.2s, v29.8b, v28.8b");
  // TEST_SINGLE(udot(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "udot v30.1d, v29.8b, v28.8b");

  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.16b, v29.16b, v28.16b, #0");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.8h, v29.8h, v28.8h, #0");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.4s, v29.4s, v28.4s, #0");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_0), "fcmla v30.2d, v29.2d, v28.2d, #0");
  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.8b, v29.8b, v28.8b, #0");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.4h, v29.4h, v28.4h, #0");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.2s, v29.2s, v28.2s, #0");
  // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_0), "fcmla v30.1d, v29.1d, v28.1d, #0");

  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.16b, v29.16b, v28.16b, #90");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.8h, v29.8h, v28.8h, #90");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.4s, v29.4s, v28.4s, #90");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcmla v30.2d, v29.2d, v28.2d, #90");
  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.8b, v29.8b, v28.8b, #90");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.4h, v29.4h, v28.4h, #90");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.2s, v29.2s, v28.2s, #90");
  // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcmla v30.1d, v29.1d, v28.1d, #90");

  // Vixl disassembler has a bug that claims 8-bit fcmla exists
  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.16b, v29.16b, v28.16b, #180");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.8h, v29.8h, v28.8h, #180");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.4s, v29.4s, v28.4s, #180");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_180), "fcmla v30.2d, v29.2d, v28.2d, #180");
  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.8b, v29.8b, v28.8b, #180");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.4h, v29.4h, v28.4h, #180");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.2s, v29.2s, v28.2s, #180");
  // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_180), "fcmla v30.1d, v29.1d, v28.1d, #180");

  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.16b, v29.16b, v28.16b, #270");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.8h, v29.8h, v28.8h, #270");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.4s, v29.4s, v28.4s, #270");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcmla v30.2d, v29.2d, v28.2d, #270");
  // TEST_SINGLE(fcmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.8b, v29.8b, v28.8b, #270");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.4h, v29.4h, v28.4h, #270");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.2s, v29.2s, v28.2s, #270");
  // TEST_SINGLE(fcmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcmla v30.1d, v29.1d, v28.1d, #270");

  // Vixl disassembler has a bug that claims 8-bit fcadd exists
  // TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.16b, v29.16b, v28.16b, #90");
  TEST_SINGLE(fcadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.8h, v29.8h, v28.8h, #90");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.4s, v29.4s, v28.4s, #90");
  TEST_SINGLE(fcadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_90), "fcadd v30.2d, v29.2d, v28.2d, #90");
  // TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.8b, v29.8b, v28.8b, #90");
  TEST_SINGLE(fcadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.4h, v29.4h, v28.4h, #90");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.2s, v29.2s, v28.2s, #90");
  // TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_90), "fcadd v30.1d, v29.1d, v28.1d, #90");

  // TEST_SINGLE(fcadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.16b, v29.16b, v28.16b, #270");
  TEST_SINGLE(fcadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.8h, v29.8h, v28.8h, #270");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.4s, v29.4s, v28.4s, #270");
  TEST_SINGLE(fcadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28, Rotation::ROTATE_270), "fcadd v30.2d, v29.2d, v28.2d, #270");
  // TEST_SINGLE(fcadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.8b, v29.8b, v28.8b, #270");
  TEST_SINGLE(fcadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.4h, v29.4h, v28.4h, #270");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.2s, v29.2s, v28.2s, #270");
  // TEST_SINGLE(fcadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28, Rotation::ROTATE_270), "fcadd v30.1d, v29.1d, v28.1d, #270");

  // TODO: Enable once vixl disassembler supports these instructions
  // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28), "bfdot v30.4s, v29.8h, v28.8h");
  // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28), "bfdot v30.2s, v29.4h, v28.4h");
  // TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v28), "bfmlalb v30.4s, v29.8h, v28.8h");
  // TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v28), "bfmlalt v30.4s, v29.8h, v28.8h");

  TEST_SINGLE(smmla(VReg::v30, VReg::v29, VReg::v28), "smmla v30.4s, v29.16b, v28.16b");
  TEST_SINGLE(usmmla(VReg::v30, VReg::v29, VReg::v28), "usmmla v30.4s, v29.16b, v28.16b");
  // TODO: Enable once vixl disassembler supports these instructions
  // TEST_SINGLE(bfmmla(VReg::v30, VReg::v29, VReg::v28), "bfmmla v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(ummla(VReg::v30, VReg::v29, VReg::v28), "ummla v30.4s, v29.16b, v28.16b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD two-register miscellaneous") {
  // Commented out lines showcase unallocated encodings.
  TEST_SINGLE(rev64(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev64 v30.16b, v29.16b");
  TEST_SINGLE(rev64(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev64 v30.8h, v29.8h");
  TEST_SINGLE(rev64(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev64 v30.4s, v29.4s");
  // TEST_SINGLE(rev64(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev64 v30.2d, v29.2d");

  TEST_SINGLE(rev64(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev64 v30.8b, v29.8b");
  TEST_SINGLE(rev64(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev64 v30.4h, v29.4h");
  TEST_SINGLE(rev64(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev64 v30.2s, v29.2s");
  // TEST_SINGLE(rev64(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev64 v30.1d, v29.1d");

  TEST_SINGLE(rev16(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev16 v30.16b, v29.16b");
  // TEST_SINGLE(rev16(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev16 v30.8h, v29.8h");
  // TEST_SINGLE(rev16(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev16 v30.4s, v29.4s");
  // TEST_SINGLE(rev16(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev16 v30.2d, v29.2d");

  TEST_SINGLE(rev16(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev16 v30.8b, v29.8b");
  // TEST_SINGLE(rev16(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev16 v30.4h, v29.4h");
  // TEST_SINGLE(rev16(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev16 v30.2s, v29.2s");
  // TEST_SINGLE(rev16(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev16 v30.1d, v29.1d");

  // TEST_SINGLE(saddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlp v30.16b, v29.16b");
  TEST_SINGLE(saddlp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "saddlp v30.8h, v29.16b");
  TEST_SINGLE(saddlp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "saddlp v30.4s, v29.8h");
  TEST_SINGLE(saddlp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "saddlp v30.2d, v29.4s");

  // TEST_SINGLE(saddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlp v30.8b, v29.8b");
  TEST_SINGLE(saddlp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "saddlp v30.4h, v29.8b");
  TEST_SINGLE(saddlp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "saddlp v30.2s, v29.4h");
  TEST_SINGLE(saddlp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "saddlp v30.1d, v29.2s");

  TEST_SINGLE(suqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29), "suqadd v30.16b, v29.16b");
  TEST_SINGLE(suqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29), "suqadd v30.8h, v29.8h");
  TEST_SINGLE(suqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29), "suqadd v30.4s, v29.4s");
  TEST_SINGLE(suqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29), "suqadd v30.2d, v29.2d");

  TEST_SINGLE(suqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29), "suqadd v30.8b, v29.8b");
  TEST_SINGLE(suqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29), "suqadd v30.4h, v29.4h");
  TEST_SINGLE(suqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29), "suqadd v30.2s, v29.2s");
  // TEST_SINGLE(suqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "suqadd v30.1d, v29.1d");

  TEST_SINGLE(cls(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cls v30.16b, v29.16b");
  TEST_SINGLE(cls(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cls v30.8h, v29.8h");
  TEST_SINGLE(cls(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cls v30.4s, v29.4s");
  // TEST_SINGLE(cls(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cls v30.2d, v29.2d");

  TEST_SINGLE(cls(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cls v30.8b, v29.8b");
  TEST_SINGLE(cls(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cls v30.4h, v29.4h");
  TEST_SINGLE(cls(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cls v30.2s, v29.2s");
  // TEST_SINGLE(cls(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cls v30.1d, v29.1d");

  TEST_SINGLE(cnt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cnt v30.16b, v29.16b");
  // TEST_SINGLE(cnt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cnt v30.8h, v29.8h");
  // TEST_SINGLE(cnt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cnt v30.4s, v29.4s");
  // TEST_SINGLE(cnt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cnt v30.2d, v29.2d");

  TEST_SINGLE(cnt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cnt v30.8b, v29.8b");
  // TEST_SINGLE(cnt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cnt v30.4h, v29.4h");
  // TEST_SINGLE(cnt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cnt v30.2s, v29.2s");
  // TEST_SINGLE(cnt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cnt v30.1d, v29.1d");

  // TEST_SINGLE(sadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sadalp v30.16b, v29.16b");
  TEST_SINGLE(sadalp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sadalp v30.8h, v29.16b");
  TEST_SINGLE(sadalp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sadalp v30.4s, v29.8h");
  TEST_SINGLE(sadalp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sadalp v30.2d, v29.4s");

  // TEST_SINGLE(sadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sadalp v30.8b, v29.8b");
  TEST_SINGLE(sadalp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sadalp v30.4h, v29.8b");
  TEST_SINGLE(sadalp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sadalp v30.2s, v29.4h");
  TEST_SINGLE(sadalp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sadalp v30.1d, v29.2s");

  TEST_SINGLE(sqabs(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqabs v30.16b, v29.16b");
  TEST_SINGLE(sqabs(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqabs v30.8h, v29.8h");
  TEST_SINGLE(sqabs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqabs v30.4s, v29.4s");
  TEST_SINGLE(sqabs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqabs v30.2d, v29.2d");

  TEST_SINGLE(sqabs(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqabs v30.8b, v29.8b");
  TEST_SINGLE(sqabs(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqabs v30.4h, v29.4h");
  TEST_SINGLE(sqabs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqabs v30.2s, v29.2s");
  // TEST_SINGLE(sqabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqabs v30.1d, v29.1d");

  TEST_SINGLE(cmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmgt v30.16b, v29.16b, #0");
  TEST_SINGLE(cmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmgt v30.8h, v29.8h, #0");
  TEST_SINGLE(cmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cmgt v30.4s, v29.4s, #0");
  TEST_SINGLE(cmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cmgt v30.2d, v29.2d, #0");

  TEST_SINGLE(cmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmgt v30.8b, v29.8b, #0");
  TEST_SINGLE(cmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmgt v30.4h, v29.4h, #0");
  TEST_SINGLE(cmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmgt v30.2s, v29.2s, #0");
  // TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmgt v30.1d, v29.1d, #0");

  TEST_SINGLE(cmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmeq v30.16b, v29.16b, #0");
  TEST_SINGLE(cmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmeq v30.8h, v29.8h, #0");
  TEST_SINGLE(cmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cmeq v30.4s, v29.4s, #0");
  TEST_SINGLE(cmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cmeq v30.2d, v29.2d, #0");

  TEST_SINGLE(cmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmeq v30.8b, v29.8b, #0");
  TEST_SINGLE(cmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmeq v30.4h, v29.4h, #0");
  TEST_SINGLE(cmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmeq v30.2s, v29.2s, #0");
  // TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmeq v30.1d, v29.1d, #0");

  TEST_SINGLE(cmlt(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmlt v30.16b, v29.16b, #0");
  TEST_SINGLE(cmlt(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmlt v30.8h, v29.8h, #0");
  TEST_SINGLE(cmlt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cmlt v30.4s, v29.4s, #0");
  TEST_SINGLE(cmlt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cmlt v30.2d, v29.2d, #0");

  TEST_SINGLE(cmlt(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmlt v30.8b, v29.8b, #0");
  TEST_SINGLE(cmlt(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmlt v30.4h, v29.4h, #0");
  TEST_SINGLE(cmlt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmlt v30.2s, v29.2s, #0");
  // TEST_SINGLE(cmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmlt v30.1d, v29.1d, #0");

  TEST_SINGLE(abs(SubRegSize::i8Bit, QReg::q30, QReg::q29), "abs v30.16b, v29.16b");
  TEST_SINGLE(abs(SubRegSize::i16Bit, QReg::q30, QReg::q29), "abs v30.8h, v29.8h");
  TEST_SINGLE(abs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "abs v30.4s, v29.4s");
  TEST_SINGLE(abs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "abs v30.2d, v29.2d");

  TEST_SINGLE(abs(SubRegSize::i8Bit, DReg::d30, DReg::d29), "abs v30.8b, v29.8b");
  TEST_SINGLE(abs(SubRegSize::i16Bit, DReg::d30, DReg::d29), "abs v30.4h, v29.4h");
  TEST_SINGLE(abs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "abs v30.2s, v29.2s");
  // TEST_SINGLE(abs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "abs v30.1d, v29.1d");

  TEST_SINGLE(xtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "xtn v30.8b, v29.8h");
  TEST_SINGLE(xtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "xtn v30.4h, v29.4s");
  TEST_SINGLE(xtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "xtn v30.2s, v29.2d");
  // TEST_SINGLE(xtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn v30.2d, v29.1d");

  TEST_SINGLE(xtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "xtn v30.8b, v29.8h");
  TEST_SINGLE(xtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "xtn v30.4h, v29.4s");
  TEST_SINGLE(xtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "xtn v30.2s, v29.2d");
  // TEST_SINGLE(xtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn v30.1d, v29.1d");

  TEST_SINGLE(xtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "xtn2 v30.16b, v29.8h");
  TEST_SINGLE(xtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "xtn2 v30.8h, v29.4s");
  TEST_SINGLE(xtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "xtn2 v30.4s, v29.2d");
  // TEST_SINGLE(xtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "xtn2 v30.2d, v29.1d");

  TEST_SINGLE(xtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "xtn2 v30.16b, v29.8h");
  TEST_SINGLE(xtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "xtn2 v30.8h, v29.4s");
  TEST_SINGLE(xtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "xtn2 v30.4s, v29.2d");
  // TEST_SINGLE(xtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "xtn2 v30.2d, v29.1d");

  TEST_SINGLE(sqxtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtn v30.8b, v29.8h");
  TEST_SINGLE(sqxtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtn v30.4h, v29.4s");
  TEST_SINGLE(sqxtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtn v30.2s, v29.2d");
  // TEST_SINGLE(sqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn v30.2d, v29.1d");

  TEST_SINGLE(sqxtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtn v30.8b, v29.8h");
  TEST_SINGLE(sqxtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtn v30.4h, v29.4s");
  TEST_SINGLE(sqxtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtn v30.2s, v29.2d");
  // TEST_SINGLE(sqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn v30.1d, v29.1d");

  TEST_SINGLE(sqxtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtn2 v30.16b, v29.8h");
  TEST_SINGLE(sqxtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtn2 v30.8h, v29.4s");
  TEST_SINGLE(sqxtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtn2 v30.4s, v29.2d");
  // TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtn2 v30.2d, v29.1d");

  TEST_SINGLE(sqxtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtn2 v30.16b, v29.8h");
  TEST_SINGLE(sqxtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtn2 v30.8h, v29.4s");
  TEST_SINGLE(sqxtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtn2 v30.4s, v29.2d");
  // TEST_SINGLE(sqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtn2 v30.2d, v29.1d");

  // TEST_SINGLE(fcvtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn v30.8b, v29.8h");
  TEST_SINGLE(fcvtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtn v30.4h, v29.4s");
  TEST_SINGLE(fcvtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtn v30.2s, v29.2d");
  // TEST_SINGLE(fcvtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn v30.2d, v29.1d");

  // TEST_SINGLE(fcvtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn v30.8b, v29.8h");
  TEST_SINGLE(fcvtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtn v30.4h, v29.4s");
  TEST_SINGLE(fcvtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtn v30.2s, v29.2d");
  // TEST_SINGLE(fcvtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn v30.1d, v29.1d");

  // TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtn2 v30.16b, v29.8h");
  TEST_SINGLE(fcvtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtn2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtn2 v30.4s, v29.2d");
  // TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtn2 v30.2d, v29.1d");

  // TEST_SINGLE(fcvtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtn2 v30.16b, v29.8h");
  TEST_SINGLE(fcvtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtn2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtn2 v30.4s, v29.2d");
  // TEST_SINGLE(fcvtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtn2 v30.2d, v29.1d");

  // TEST_SINGLE(fcvtl(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl v30.8b, v29.8h");
  // TEST_SINGLE(fcvtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl v30.4h, v29.4s");
  TEST_SINGLE(fcvtl(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtl v30.4s, v29.4h");
  TEST_SINGLE(fcvtl(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtl v30.2d, v29.2s");

  // TEST_SINGLE(fcvtl(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl v30.8b, v29.8h");
  // TEST_SINGLE(fcvtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl v30.4h, v29.4s");
  TEST_SINGLE(fcvtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtl v30.4s, v29.4h");
  TEST_SINGLE(fcvtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtl v30.2d, v29.2s");

  // TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtl2 v30.16b, v29.8h");
  // TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtl2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtl2 v30.4s, v29.8h");
  TEST_SINGLE(fcvtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtl2 v30.2d, v29.4s");

  // TEST_SINGLE(fcvtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtl2 v30.16b, v29.8h");
  // TEST_SINGLE(fcvtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtl2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtl2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtl2 v30.4s, v29.8h");
  TEST_SINGLE(fcvtl2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtl2 v30.2d, v29.4s");

  TEST_SINGLE(frintn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintn v30.4s, v29.4s");
  TEST_SINGLE(frintn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintn v30.2d, v29.2d");
  TEST_SINGLE(frintn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintn v30.2s, v29.2s");
  // TEST_SINGLE(frintn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintn v30.1d, v29.1d");

  TEST_SINGLE(frintm(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintm v30.4s, v29.4s");
  TEST_SINGLE(frintm(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintm v30.2d, v29.2d");
  TEST_SINGLE(frintm(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintm v30.2s, v29.2s");
  // TEST_SINGLE(frintm(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintm v30.1d, v29.1d");

  TEST_SINGLE(fcvtns(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtns v30.4s, v29.4s");
  TEST_SINGLE(fcvtns(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtns v30.2d, v29.2d");
  TEST_SINGLE(fcvtns(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtns v30.2s, v29.2s");
  // TEST_SINGLE(fcvtns(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtns v30.1d, v29.1d");

  TEST_SINGLE(fcvtms(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtms v30.4s, v29.4s");
  TEST_SINGLE(fcvtms(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtms v30.2d, v29.2d");
  TEST_SINGLE(fcvtms(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtms v30.2s, v29.2s");
  // TEST_SINGLE(fcvtms(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtms v30.1d, v29.1d");

  TEST_SINGLE(fcvtas(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtas v30.4s, v29.4s");
  TEST_SINGLE(fcvtas(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtas v30.2d, v29.2d");
  TEST_SINGLE(fcvtas(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtas v30.2s, v29.2s");
  // TEST_SINGLE(fcvtas(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtas v30.1d, v29.1d");

  TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29), "scvtf v30.4s, v29.4s");
  TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29), "scvtf v30.2d, v29.2d");
  TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29), "scvtf v30.2s, v29.2s");
  // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "scvtf v30.1d, v29.1d");

  TEST_SINGLE(frint32z(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint32z v30.4s, v29.4s");
  TEST_SINGLE(frint32z(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint32z v30.2d, v29.2d");
  TEST_SINGLE(frint32z(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint32z v30.2s, v29.2s");
  // TEST_SINGLE(frint32z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32z v30.1d, v29.1d");

  TEST_SINGLE(frint64z(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint64z v30.4s, v29.4s");
  TEST_SINGLE(frint64z(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint64z v30.2d, v29.2d");
  TEST_SINGLE(frint64z(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint64z v30.2s, v29.2s");
  // TEST_SINGLE(frint64z(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64z v30.1d, v29.1d");

  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmgt v30.4s, v29.4s, #0.0");
  TEST_SINGLE(fcmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmgt v30.2d, v29.2d, #0.0");
  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmgt v30.2s, v29.2s, #0.0");
  // TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmgt v30.1d, v29.1d, #0.0");

  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmeq v30.4s, v29.4s, #0.0");
  TEST_SINGLE(fcmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmeq v30.2d, v29.2d, #0.0");
  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmeq v30.2s, v29.2s, #0.0");
  // TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmeq v30.1d, v29.1d, #0.0");

  TEST_SINGLE(fcmlt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmlt v30.4s, v29.4s, #0.0");
  TEST_SINGLE(fcmlt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmlt v30.2d, v29.2d, #0.0");
  TEST_SINGLE(fcmlt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmlt v30.2s, v29.2s, #0.0");
  // TEST_SINGLE(fcmlt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmlt v30.1d, v29.1d, #0.0");

  TEST_SINGLE(fabs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fabs v30.4s, v29.4s");
  TEST_SINGLE(fabs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fabs v30.2d, v29.2d");
  TEST_SINGLE(fabs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fabs v30.2s, v29.2s");
  // TEST_SINGLE(fabs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fabs v30.1d, v29.1d");

  TEST_SINGLE(frintp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintp v30.4s, v29.4s");
  TEST_SINGLE(frintp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintp v30.2d, v29.2d");
  TEST_SINGLE(frintp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintp v30.2s, v29.2s");
  // TEST_SINGLE(frintp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintp v30.1d, v29.1d");

  TEST_SINGLE(frintz(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintz v30.4s, v29.4s");
  TEST_SINGLE(frintz(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintz v30.2d, v29.2d");
  TEST_SINGLE(frintz(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintz v30.2s, v29.2s");
  // TEST_SINGLE(frintz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintz v30.1d, v29.1d");

  TEST_SINGLE(fcvtps(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtps v30.4s, v29.4s");
  TEST_SINGLE(fcvtps(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtps v30.2d, v29.2d");
  TEST_SINGLE(fcvtps(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtps v30.2s, v29.2s");
  // TEST_SINGLE(fcvtps(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtps v30.1d, v29.1d");

  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtzs v30.4s, v29.4s");
  TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtzs v30.2d, v29.2d");
  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtzs v30.2s, v29.2s");
  // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzs v30.1d, v29.1d");

  TEST_SINGLE(urecpe(SubRegSize::i32Bit, QReg::q30, QReg::q29), "urecpe v30.4s, v29.4s");
  // TEST_SINGLE(urecpe(SubRegSize::i64Bit, QReg::q30, QReg::q29), "urecpe v30.2d, v29.2d");
  TEST_SINGLE(urecpe(SubRegSize::i32Bit, DReg::d30, DReg::d29), "urecpe v30.2s, v29.2s");
  // TEST_SINGLE(urecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "urecpe v30.1d, v29.1d");

  TEST_SINGLE(frecpe(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frecpe v30.4s, v29.4s");
  TEST_SINGLE(frecpe(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frecpe v30.2d, v29.2d");
  TEST_SINGLE(frecpe(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frecpe v30.2s, v29.2s");
  // TEST_SINGLE(frecpe(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frecpe v30.1d, v29.1d");

  TEST_SINGLE(rev32(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rev32 v30.16b, v29.16b");
  TEST_SINGLE(rev32(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rev32 v30.8h, v29.8h");
  // TEST_SINGLE(rev32(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rev32 v30.4s, v29.4s");
  // TEST_SINGLE(rev32(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rev32 v30.2d, v29.2d");

  TEST_SINGLE(rev32(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rev32 v30.8b, v29.8b");
  TEST_SINGLE(rev32(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rev32 v30.4h, v29.4h");
  // TEST_SINGLE(rev32(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rev32 v30.2s, v29.2s");
  // TEST_SINGLE(rev32(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rev32 v30.1d, v29.1d");

  // TEST_SINGLE(uaddlp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlp v30.16b, v29.16b");
  TEST_SINGLE(uaddlp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uaddlp v30.8h, v29.16b");
  TEST_SINGLE(uaddlp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uaddlp v30.4s, v29.8h");
  TEST_SINGLE(uaddlp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uaddlp v30.2d, v29.4s");

  // TEST_SINGLE(uaddlp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlp v30.8b, v29.8b");
  TEST_SINGLE(uaddlp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uaddlp v30.4h, v29.8b");
  TEST_SINGLE(uaddlp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uaddlp v30.2s, v29.4h");
  TEST_SINGLE(uaddlp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uaddlp v30.1d, v29.2s");

  TEST_SINGLE(usqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29), "usqadd v30.16b, v29.16b");
  TEST_SINGLE(usqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29), "usqadd v30.8h, v29.8h");
  TEST_SINGLE(usqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29), "usqadd v30.4s, v29.4s");
  TEST_SINGLE(usqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29), "usqadd v30.2d, v29.2d");

  TEST_SINGLE(usqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29), "usqadd v30.8b, v29.8b");
  TEST_SINGLE(usqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29), "usqadd v30.4h, v29.4h");
  TEST_SINGLE(usqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29), "usqadd v30.2s, v29.2s");
  // TEST_SINGLE(usqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29), "usqadd v30.1d, v29.1d");

  TEST_SINGLE(clz(SubRegSize::i8Bit, QReg::q30, QReg::q29), "clz v30.16b, v29.16b");
  TEST_SINGLE(clz(SubRegSize::i16Bit, QReg::q30, QReg::q29), "clz v30.8h, v29.8h");
  TEST_SINGLE(clz(SubRegSize::i32Bit, QReg::q30, QReg::q29), "clz v30.4s, v29.4s");
  // TEST_SINGLE(clz(SubRegSize::i64Bit, QReg::q30, QReg::q29), "clz v30.2d, v29.2d");

  TEST_SINGLE(clz(SubRegSize::i8Bit, DReg::d30, DReg::d29), "clz v30.8b, v29.8b");
  TEST_SINGLE(clz(SubRegSize::i16Bit, DReg::d30, DReg::d29), "clz v30.4h, v29.4h");
  TEST_SINGLE(clz(SubRegSize::i32Bit, DReg::d30, DReg::d29), "clz v30.2s, v29.2s");
  // TEST_SINGLE(clz(SubRegSize::i64Bit, DReg::d30, DReg::d29), "clz v30.1d, v29.1d");

  // TEST_SINGLE(uadalp(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uadalp v30.16b, v29.16b");
  TEST_SINGLE(uadalp(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uadalp v30.8h, v29.16b");
  TEST_SINGLE(uadalp(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uadalp v30.4s, v29.8h");
  TEST_SINGLE(uadalp(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uadalp v30.2d, v29.4s");

  // TEST_SINGLE(uadalp(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uadalp v30.8b, v29.8b");
  TEST_SINGLE(uadalp(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uadalp v30.4h, v29.8b");
  TEST_SINGLE(uadalp(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uadalp v30.2s, v29.4h");
  TEST_SINGLE(uadalp(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uadalp v30.1d, v29.2s");

  TEST_SINGLE(sqneg(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqneg v30.16b, v29.16b");
  TEST_SINGLE(sqneg(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqneg v30.8h, v29.8h");
  TEST_SINGLE(sqneg(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqneg v30.4s, v29.4s");
  TEST_SINGLE(sqneg(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqneg v30.2d, v29.2d");

  TEST_SINGLE(sqneg(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqneg v30.8b, v29.8b");
  TEST_SINGLE(sqneg(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqneg v30.4h, v29.4h");
  TEST_SINGLE(sqneg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqneg v30.2s, v29.2s");
  // TEST_SINGLE(sqneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqneg v30.1d, v29.1d");

  TEST_SINGLE(cmge(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmge v30.16b, v29.16b, #0");
  TEST_SINGLE(cmge(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmge v30.8h, v29.8h, #0");
  TEST_SINGLE(cmge(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cmge v30.4s, v29.4s, #0");
  TEST_SINGLE(cmge(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cmge v30.2d, v29.2d, #0");

  TEST_SINGLE(cmge(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmge v30.8b, v29.8b, #0");
  TEST_SINGLE(cmge(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmge v30.4h, v29.4h, #0");
  TEST_SINGLE(cmge(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmge v30.2s, v29.2s, #0");
  // TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmge v30.1d, v29.1d, #0");
  //
  TEST_SINGLE(cmle(SubRegSize::i8Bit, QReg::q30, QReg::q29), "cmle v30.16b, v29.16b, #0");
  TEST_SINGLE(cmle(SubRegSize::i16Bit, QReg::q30, QReg::q29), "cmle v30.8h, v29.8h, #0");
  TEST_SINGLE(cmle(SubRegSize::i32Bit, QReg::q30, QReg::q29), "cmle v30.4s, v29.4s, #0");
  TEST_SINGLE(cmle(SubRegSize::i64Bit, QReg::q30, QReg::q29), "cmle v30.2d, v29.2d, #0");

  TEST_SINGLE(cmle(SubRegSize::i8Bit, DReg::d30, DReg::d29), "cmle v30.8b, v29.8b, #0");
  TEST_SINGLE(cmle(SubRegSize::i16Bit, DReg::d30, DReg::d29), "cmle v30.4h, v29.4h, #0");
  TEST_SINGLE(cmle(SubRegSize::i32Bit, DReg::d30, DReg::d29), "cmle v30.2s, v29.2s, #0");
  // TEST_SINGLE(cmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "cmle v30.1d, v29.1d, #0");

  TEST_SINGLE(neg(SubRegSize::i8Bit, QReg::q30, QReg::q29), "neg v30.16b, v29.16b");
  TEST_SINGLE(neg(SubRegSize::i16Bit, QReg::q30, QReg::q29), "neg v30.8h, v29.8h");
  TEST_SINGLE(neg(SubRegSize::i32Bit, QReg::q30, QReg::q29), "neg v30.4s, v29.4s");
  TEST_SINGLE(neg(SubRegSize::i64Bit, QReg::q30, QReg::q29), "neg v30.2d, v29.2d");

  TEST_SINGLE(neg(SubRegSize::i8Bit, DReg::d30, DReg::d29), "neg v30.8b, v29.8b");
  TEST_SINGLE(neg(SubRegSize::i16Bit, DReg::d30, DReg::d29), "neg v30.4h, v29.4h");
  TEST_SINGLE(neg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "neg v30.2s, v29.2s");
  // TEST_SINGLE(neg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "neg v30.1d, v29.1d");

  TEST_SINGLE(sqxtun(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtun v30.8b, v29.8h");
  TEST_SINGLE(sqxtun(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtun v30.4h, v29.4s");
  TEST_SINGLE(sqxtun(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtun v30.2s, v29.2d");
  // TEST_SINGLE(sqxtun(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun v30.2d, v29.1d");

  TEST_SINGLE(sqxtun(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtun v30.8b, v29.8h");
  TEST_SINGLE(sqxtun(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtun v30.4h, v29.4s");
  TEST_SINGLE(sqxtun(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtun v30.2s, v29.2d");
  // TEST_SINGLE(sqxtun(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun v30.1d, v29.1d");

  TEST_SINGLE(sqxtun2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sqxtun2 v30.16b, v29.8h");
  TEST_SINGLE(sqxtun2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sqxtun2 v30.8h, v29.4s");
  TEST_SINGLE(sqxtun2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sqxtun2 v30.4s, v29.2d");
  // TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sqxtun2 v30.2d, v29.1d");

  TEST_SINGLE(sqxtun2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sqxtun2 v30.16b, v29.8h");
  TEST_SINGLE(sqxtun2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sqxtun2 v30.8h, v29.4s");
  TEST_SINGLE(sqxtun2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sqxtun2 v30.4s, v29.2d");
  // TEST_SINGLE(sqxtun2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sqxtun2 v30.2d, v29.1d");

  // TEST_SINGLE(shll(SubRegSize::i8Bit, DReg::d30, DReg::d29), "shll v30.8b, v29.8b, #0");
  TEST_SINGLE(shll(SubRegSize::i16Bit, DReg::d30, DReg::d29), "shll v30.8h, v29.8b, #8");
  TEST_SINGLE(shll(SubRegSize::i32Bit, DReg::d30, DReg::d29), "shll v30.4s, v29.4h, #16");
  TEST_SINGLE(shll(SubRegSize::i64Bit, DReg::d30, DReg::d29), "shll v30.2d, v29.2s, #32");

  // TEST_SINGLE(shll2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "shll2 v30.16b, v29.16b, #0");
  TEST_SINGLE(shll2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "shll2 v30.8h, v29.16b, #8");
  TEST_SINGLE(shll2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "shll2 v30.4s, v29.8h, #16");
  TEST_SINGLE(shll2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "shll2 v30.2d, v29.4s, #32");

  TEST_SINGLE(uqxtn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uqxtn v30.8b, v29.8h");
  TEST_SINGLE(uqxtn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uqxtn v30.4h, v29.4s");
  TEST_SINGLE(uqxtn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uqxtn v30.2s, v29.2d");
  // TEST_SINGLE(uqxtn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn v30.2d, v29.1d");

  TEST_SINGLE(uqxtn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uqxtn v30.8b, v29.8h");
  TEST_SINGLE(uqxtn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uqxtn v30.4h, v29.4s");
  TEST_SINGLE(uqxtn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uqxtn v30.2s, v29.2d");
  // TEST_SINGLE(uqxtn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn v30.1d, v29.1d");

  TEST_SINGLE(uqxtn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uqxtn2 v30.16b, v29.8h");
  TEST_SINGLE(uqxtn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uqxtn2 v30.8h, v29.4s");
  TEST_SINGLE(uqxtn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uqxtn2 v30.4s, v29.2d");
  // TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uqxtn2 v30.2d, v29.1d");

  TEST_SINGLE(uqxtn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uqxtn2 v30.16b, v29.8h");
  TEST_SINGLE(uqxtn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uqxtn2 v30.8h, v29.4s");
  TEST_SINGLE(uqxtn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uqxtn2 v30.4s, v29.2d");
  // TEST_SINGLE(uqxtn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uqxtn2 v30.2d, v29.1d");
  //
  // TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn v30.8b, v29.8h");
  // TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn v30.4h, v29.4s");
  TEST_SINGLE(fcvtxn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtxn v30.2s, v29.2d");
  // TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn v30.2d, v29.1d");

  // TEST_SINGLE(fcvtxn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn v30.8b, v29.8h");
  // TEST_SINGLE(fcvtxn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn v30.4h, v29.4s");
  TEST_SINGLE(fcvtxn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtxn v30.2s, v29.2d");
  // TEST_SINGLE(fcvtxn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn v30.1d, v29.1d");

  // TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.16b, v29.8h");
  // TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtxn2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.4s, v29.2d");
  // TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtxn2 v30.2d, v29.1d");

  // TEST_SINGLE(fcvtxn2(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.16b, v29.8h");
  // TEST_SINGLE(fcvtxn2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.8h, v29.4s");
  TEST_SINGLE(fcvtxn2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.4s, v29.2d");
  // TEST_SINGLE(fcvtxn2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtxn2 v30.2d, v29.1d");


  TEST_SINGLE(frinta(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frinta v30.4s, v29.4s");
  TEST_SINGLE(frinta(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frinta v30.2d, v29.2d");
  TEST_SINGLE(frinta(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frinta v30.2s, v29.2s");
  // TEST_SINGLE(frinta(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinta v30.1d, v29.1d");

  TEST_SINGLE(frintx(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frintx v30.4s, v29.4s");
  TEST_SINGLE(frintx(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frintx v30.2d, v29.2d");
  TEST_SINGLE(frintx(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frintx v30.2s, v29.2s");
  // TEST_SINGLE(frintx(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frintx v30.1d, v29.1d");

  TEST_SINGLE(fcvtnu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtnu v30.4s, v29.4s");
  TEST_SINGLE(fcvtnu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtnu v30.2d, v29.2d");
  TEST_SINGLE(fcvtnu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtnu v30.2s, v29.2s");
  // TEST_SINGLE(fcvtnu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtnu v30.1d, v29.1d");

  TEST_SINGLE(fcvtmu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtmu v30.4s, v29.4s");
  TEST_SINGLE(fcvtmu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtmu v30.2d, v29.2d");
  TEST_SINGLE(fcvtmu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtmu v30.2s, v29.2s");
  // TEST_SINGLE(fcvtmu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtmu v30.1d, v29.1d");

  TEST_SINGLE(fcvtau(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtau v30.4s, v29.4s");
  TEST_SINGLE(fcvtau(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtau v30.2d, v29.2d");
  TEST_SINGLE(fcvtau(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtau v30.2s, v29.2s");
  // TEST_SINGLE(fcvtau(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtau v30.1d, v29.1d");

  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29), "ucvtf v30.4s, v29.4s");
  TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29), "ucvtf v30.2d, v29.2d");
  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29), "ucvtf v30.2s, v29.2s");
  // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ucvtf v30.1d, v29.1d");

  TEST_SINGLE(frint32x(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint32x v30.4s, v29.4s");
  TEST_SINGLE(frint32x(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint32x v30.2d, v29.2d");
  TEST_SINGLE(frint32x(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint32x v30.2s, v29.2s");
  // TEST_SINGLE(frint32x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint32x v30.1d, v29.1d");

  TEST_SINGLE(frint64x(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frint64x v30.4s, v29.4s");
  TEST_SINGLE(frint64x(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frint64x v30.2d, v29.2d");
  TEST_SINGLE(frint64x(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frint64x v30.2s, v29.2s");
  // TEST_SINGLE(frint64x(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frint64x v30.1d, v29.1d");

  TEST_SINGLE(not_(SubRegSize::i8Bit, QReg::q30, QReg::q29), "mvn v30.16b, v29.16b");
  // TEST_SINGLE(not_(SubRegSize::i16Bit, QReg::q30, QReg::q29), "not v30.8h, v29.8h");
  // TEST_SINGLE(not_(SubRegSize::i32Bit, QReg::q30, QReg::q29), "not v30.4s, v29.4s");
  // TEST_SINGLE(not_(SubRegSize::i64Bit, QReg::q30, QReg::q29), "not v30.2d, v29.2d");

  TEST_SINGLE(not_(SubRegSize::i8Bit, DReg::d30, DReg::d29), "mvn v30.8b, v29.8b");
  // TEST_SINGLE(not_(SubRegSize::i16Bit, DReg::d30, DReg::d29), "not v30.4h, v29.4h");
  // TEST_SINGLE(not_(SubRegSize::i32Bit, DReg::d30, DReg::d29), "not v30.2s, v29.2s");
  // TEST_SINGLE(not_(SubRegSize::i64Bit, DReg::d30, DReg::d29), "not v30.1d, v29.1d");

  TEST_SINGLE(mvn(SubRegSize::i8Bit, QReg::q30, QReg::q29), "mvn v30.16b, v29.16b");
  // TEST_SINGLE(mvn(SubRegSize::i16Bit, QReg::q30, QReg::q29), "mvn v30.8h, v29.8h");
  // TEST_SINGLE(mvn(SubRegSize::i32Bit, QReg::q30, QReg::q29), "mvn v30.4s, v29.4s");
  // TEST_SINGLE(mvn(SubRegSize::i64Bit, QReg::q30, QReg::q29), "mvn v30.2d, v29.2d");

  TEST_SINGLE(mvn(SubRegSize::i8Bit, DReg::d30, DReg::d29), "mvn v30.8b, v29.8b");
  // TEST_SINGLE(mvn(SubRegSize::i16Bit, DReg::d30, DReg::d29), "mvn v30.4h, v29.4h");
  // TEST_SINGLE(mvn(SubRegSize::i32Bit, DReg::d30, DReg::d29), "mvn v30.2s, v29.2s");
  // TEST_SINGLE(mvn(SubRegSize::i64Bit, DReg::d30, DReg::d29), "mvn v30.1d, v29.1d");

  TEST_SINGLE(rbit(SubRegSize::i8Bit, QReg::q30, QReg::q29), "rbit v30.16b, v29.16b");
  // TEST_SINGLE(rbit(SubRegSize::i16Bit, QReg::q30, QReg::q29), "rbit v30.8h, v29.8h");
  // TEST_SINGLE(rbit(SubRegSize::i32Bit, QReg::q30, QReg::q29), "rbit v30.4s, v29.4s");
  // TEST_SINGLE(rbit(SubRegSize::i64Bit, QReg::q30, QReg::q29), "rbit v30.2d, v29.2d");

  TEST_SINGLE(rbit(SubRegSize::i8Bit, DReg::d30, DReg::d29), "rbit v30.8b, v29.8b");
  // TEST_SINGLE(rbit(SubRegSize::i16Bit, DReg::d30, DReg::d29), "rbit v30.4h, v29.4h");
  // TEST_SINGLE(rbit(SubRegSize::i32Bit, DReg::d30, DReg::d29), "rbit v30.2s, v29.2s");
  // TEST_SINGLE(rbit(SubRegSize::i64Bit, DReg::d30, DReg::d29), "rbit v30.1d, v29.1d");

  TEST_SINGLE(fcmge(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmge v30.4s, v29.4s, #0.0");
  TEST_SINGLE(fcmge(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmge v30.2d, v29.2d, #0.0");
  TEST_SINGLE(fcmge(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmge v30.2s, v29.2s, #0.0");
  // TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmge v30.1d, v29.1d, #0.0");

  TEST_SINGLE(fcmle(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcmle v30.4s, v29.4s, #0.0");
  TEST_SINGLE(fcmle(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcmle v30.2d, v29.2d, #0.0");
  TEST_SINGLE(fcmle(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcmle v30.2s, v29.2s, #0.0");
  // TEST_SINGLE(fcmle(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcmle v30.1d, v29.1d, #0.0");

  TEST_SINGLE(fneg(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fneg v30.4s, v29.4s");
  TEST_SINGLE(fneg(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fneg v30.2d, v29.2d");
  TEST_SINGLE(fneg(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fneg v30.2s, v29.2s");
  // TEST_SINGLE(fneg(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fneg v30.1d, v29.1d");

  TEST_SINGLE(frinti(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frinti v30.4s, v29.4s");
  TEST_SINGLE(frinti(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frinti v30.2d, v29.2d");
  TEST_SINGLE(frinti(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frinti v30.2s, v29.2s");
  // TEST_SINGLE(frinti(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frinti v30.1d, v29.1d");

  TEST_SINGLE(fcvtpu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtpu v30.4s, v29.4s");
  TEST_SINGLE(fcvtpu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtpu v30.2d, v29.2d");
  TEST_SINGLE(fcvtpu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtpu v30.2s, v29.2s");
  // TEST_SINGLE(fcvtpu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtpu v30.1d, v29.1d");

  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fcvtzu v30.4s, v29.4s");
  TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fcvtzu v30.2d, v29.2d");
  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fcvtzu v30.2s, v29.2s");
  // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fcvtzu v30.1d, v29.1d");

  TEST_SINGLE(ursqrte(SubRegSize::i32Bit, QReg::q30, QReg::q29), "ursqrte v30.4s, v29.4s");
  // TEST_SINGLE(ursqrte(SubRegSize::i64Bit, QReg::q30, QReg::q29), "ursqrte v30.2d, v29.2d");
  TEST_SINGLE(ursqrte(SubRegSize::i32Bit, DReg::d30, DReg::d29), "ursqrte v30.2s, v29.2s");
  // TEST_SINGLE(ursqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "ursqrte v30.1d, v29.1d");

  TEST_SINGLE(frsqrte(SubRegSize::i32Bit, QReg::q30, QReg::q29), "frsqrte v30.4s, v29.4s");
  TEST_SINGLE(frsqrte(SubRegSize::i64Bit, QReg::q30, QReg::q29), "frsqrte v30.2d, v29.2d");
  TEST_SINGLE(frsqrte(SubRegSize::i32Bit, DReg::d30, DReg::d29), "frsqrte v30.2s, v29.2s");
  // TEST_SINGLE(frsqrte(SubRegSize::i64Bit, DReg::d30, DReg::d29), "frsqrte v30.1d, v29.1d");

  TEST_SINGLE(fsqrt(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fsqrt v30.4s, v29.4s");
  TEST_SINGLE(fsqrt(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fsqrt v30.2d, v29.2d");
  TEST_SINGLE(fsqrt(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fsqrt v30.2s, v29.2s");
  // TEST_SINGLE(fsqrt(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fsqrt v30.1d, v29.1d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD across lanes") {
  // TEST_SINGLE(saddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "saddlv v30.16b, v29.16b");
  TEST_SINGLE(saddlv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "saddlv h30, v29.16b");
  TEST_SINGLE(saddlv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "saddlv s30, v29.8h");
  TEST_SINGLE(saddlv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "saddlv d30, v29.4s");

  // TEST_SINGLE(saddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "saddlv v30.8b, v29.8b");
  TEST_SINGLE(saddlv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "saddlv h30, v29.8b");
  TEST_SINGLE(saddlv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "saddlv s30, v29.4h");
  // TEST_SINGLE(saddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "saddlv d30, v29.1d");

  TEST_SINGLE(smaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "smaxv b30, v29.16b");
  TEST_SINGLE(smaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "smaxv h30, v29.8h");
  TEST_SINGLE(smaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "smaxv s30, v29.4s");
  // TEST_SINGLE(smaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "smaxv d30, v29.4s");

  TEST_SINGLE(smaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "smaxv b30, v29.8b");
  TEST_SINGLE(smaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "smaxv h30, v29.4h");
  // TEST_SINGLE(smaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "smaxv s30, v29.2s");
  // TEST_SINGLE(smaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "smaxv d30, v29.1d");

  TEST_SINGLE(sminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "sminv b30, v29.16b");
  TEST_SINGLE(sminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sminv h30, v29.8h");
  TEST_SINGLE(sminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sminv s30, v29.4s");
  // TEST_SINGLE(sminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sminv d30, v29.4s");

  TEST_SINGLE(sminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "sminv b30, v29.8b");
  TEST_SINGLE(sminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sminv h30, v29.4h");
  // TEST_SINGLE(sminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sminv s30, v29.2s");
  // TEST_SINGLE(sminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sminv d30, v29.1d");

  TEST_SINGLE(addv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "addv b30, v29.16b");
  TEST_SINGLE(addv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "addv h30, v29.8h");
  TEST_SINGLE(addv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "addv s30, v29.4s");
  // TEST_SINGLE(addv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "addv d30, v29.4s");

  TEST_SINGLE(addv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "addv b30, v29.8b");
  TEST_SINGLE(addv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "addv h30, v29.4h");
  // TEST_SINGLE(addv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "addv s30, v29.2s");
  // TEST_SINGLE(addv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "addv d30, v29.1d");

  // TEST_SINGLE(uaddlv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uaddlv v30.16b, v29.16b");
  TEST_SINGLE(uaddlv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uaddlv h30, v29.16b");
  TEST_SINGLE(uaddlv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uaddlv s30, v29.8h");
  TEST_SINGLE(uaddlv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uaddlv d30, v29.4s");

  // TEST_SINGLE(uaddlv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uaddlv v30.8b, v29.8b");
  TEST_SINGLE(uaddlv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uaddlv h30, v29.8b");
  TEST_SINGLE(uaddlv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uaddlv s30, v29.4h");
  // TEST_SINGLE(uaddlv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uaddlv d30, v29.1d");

  TEST_SINGLE(umaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "umaxv b30, v29.16b");
  TEST_SINGLE(umaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "umaxv h30, v29.8h");
  TEST_SINGLE(umaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "umaxv s30, v29.4s");
  // TEST_SINGLE(umaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "umaxv d30, v29.4s");

  TEST_SINGLE(umaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "umaxv b30, v29.8b");
  TEST_SINGLE(umaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "umaxv h30, v29.4h");
  // TEST_SINGLE(umaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "umaxv s30, v29.2s");
  // TEST_SINGLE(umaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "umaxv d30, v29.1d");

  TEST_SINGLE(uminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "uminv b30, v29.16b");
  TEST_SINGLE(uminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uminv h30, v29.8h");
  TEST_SINGLE(uminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uminv s30, v29.4s");
  // TEST_SINGLE(uminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uminv d30, v29.4s");

  TEST_SINGLE(uminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "uminv b30, v29.8b");
  TEST_SINGLE(uminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uminv h30, v29.4h");
  // TEST_SINGLE(uminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uminv s30, v29.2s");
  // TEST_SINGLE(uminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uminv d30, v29.1d");

  // TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxnmv b30, v29.16b");
  TEST_SINGLE(fmaxnmv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fmaxnmv h30, v29.8h");
  TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fmaxnmv s30, v29.4s");
  // TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxnmv d30, v29.4s");

  // TEST_SINGLE(fmaxnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxnmv b30, v29.8b");
  TEST_SINGLE(fmaxnmv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fmaxnmv h30, v29.4h");
  // TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxnmv s30, v29.2s");
  // TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxnmv d30, v29.1d");

  // TEST_SINGLE(fmaxv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fmaxv b30, v29.16b");
  TEST_SINGLE(fmaxv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fmaxv h30, v29.8h");
  TEST_SINGLE(fmaxv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fmaxv s30, v29.4s");
  // TEST_SINGLE(fmaxv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fmaxv d30, v29.4s");

  // TEST_SINGLE(fmaxv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fmaxv b30, v29.8b");
  TEST_SINGLE(fmaxv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fmaxv h30, v29.4h");
  // TEST_SINGLE(fmaxv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fmaxv s30, v29.2s");
  // TEST_SINGLE(fmaxv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fmaxv d30, v29.1d");

  // TEST_SINGLE(fminnmv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminnmv b30, v29.16b");
  TEST_SINGLE(fminnmv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fminnmv h30, v29.8h");
  TEST_SINGLE(fminnmv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fminnmv s30, v29.4s");
  // TEST_SINGLE(fminnmv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminnmv d30, v29.4s");

  // TEST_SINGLE(fminnmv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminnmv b30, v29.8b");
  TEST_SINGLE(fminnmv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fminnmv h30, v29.4h");
  // TEST_SINGLE(fminnmv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminnmv s30, v29.2s");
  // TEST_SINGLE(fminnmv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminnmv d30, v29.1d");

  // TEST_SINGLE(fminv(SubRegSize::i8Bit, QReg::q30, QReg::q29), "fminv b30, v29.16b");
  TEST_SINGLE(fminv(SubRegSize::i16Bit, QReg::q30, QReg::q29), "fminv h30, v29.8h");
  TEST_SINGLE(fminv(SubRegSize::i32Bit, QReg::q30, QReg::q29), "fminv s30, v29.4s");
  // TEST_SINGLE(fminv(SubRegSize::i64Bit, QReg::q30, QReg::q29), "fminv d30, v29.4s");

  // TEST_SINGLE(fminv(SubRegSize::i8Bit, DReg::d30, DReg::d29), "fminv b30, v29.8b");
  TEST_SINGLE(fminv(SubRegSize::i16Bit, DReg::d30, DReg::d29), "fminv h30, v29.4h");
  // TEST_SINGLE(fminv(SubRegSize::i32Bit, DReg::d30, DReg::d29), "fminv s30, v29.2s");
  // TEST_SINGLE(fminv(SubRegSize::i64Bit, DReg::d30, DReg::d29), "fminv d30, v29.1d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three different") {
  // TEST_SINGLE(saddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(saddl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(saddl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(saddl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saddl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(saddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(saddl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(saddl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(saddl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saddl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(saddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(saddw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.8h, v29.8h, v28.8b");
  TEST_SINGLE(saddw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.4s, v29.4s, v28.4h");
  TEST_SINGLE(saddw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saddw v30.2d, v29.2d, v28.2s");

  // TEST_SINGLE(saddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(saddw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.8h, v29.8h, v28.16b");
  TEST_SINGLE(saddw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.4s, v29.4s, v28.8h");
  TEST_SINGLE(saddw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saddw2 v30.2d, v29.2d, v28.4s");

  // TEST_SINGLE(ssubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(ssubl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(ssubl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(ssubl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ssubl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(ssubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(ssubl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(ssubl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(ssubl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ssubl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(ssubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(ssubw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.8h, v29.8h, v28.8b");
  TEST_SINGLE(ssubw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.4s, v29.4s, v28.4h");
  TEST_SINGLE(ssubw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ssubw v30.2d, v29.2d, v28.2s");

  // TEST_SINGLE(ssubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(ssubw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.8h, v29.8h, v28.16b");
  TEST_SINGLE(ssubw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.4s, v29.4s, v28.8h");
  TEST_SINGLE(ssubw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ssubw2 v30.2d, v29.2d, v28.4s");

  TEST_SINGLE(addhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.8b, v29.8h, v28.8h");
  TEST_SINGLE(addhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.4h, v29.4s, v28.4s");
  TEST_SINGLE(addhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.2s, v29.2d, v28.2d");
  // TEST_SINGLE(addhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addhn v30.2d, v29.2d, v28.2s");

  TEST_SINGLE(addhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.16b, v29.8h, v28.8h");
  TEST_SINGLE(addhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.8h, v29.4s, v28.4s");
  TEST_SINGLE(addhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.4s, v29.2d, v28.2d");
  // TEST_SINGLE(addhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "addhn2 v30.2d, v29.2d, v28.4s");

  // TEST_SINGLE(sabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sabal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(sabal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(sabal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabal v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(sabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sabal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(sabal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(sabal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabal2 v30.2d, v29.4s, v28.4s");

  TEST_SINGLE(subhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.8b, v29.8h, v28.8h");
  TEST_SINGLE(subhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.4h, v29.4s, v28.4s");
  TEST_SINGLE(subhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.2s, v29.2d, v28.2d");
  // TEST_SINGLE(subhn(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "subhn v30.2d, v29.2d, v28.2s");

  TEST_SINGLE(subhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.16b, v29.8h, v28.8h");
  TEST_SINGLE(subhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.8h, v29.4s, v28.4s");
  TEST_SINGLE(subhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.4s, v29.2d, v28.2d");
  // TEST_SINGLE(subhn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "subhn2 v30.2d, v29.2d, v28.4s");

  // TEST_SINGLE(sabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sabdl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(sabdl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(sabdl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabdl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(sabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sabdl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(sabdl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(sabdl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabdl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(smlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(smlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(smlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smlal v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(smlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(smlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(smlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smlal2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(sqdmlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(sqdmlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlal v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(sqdmlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(sqdmlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlal2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(smlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(smlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(smlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smlsl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(smlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(smlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(smlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smlsl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(sqdmlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(sqdmlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmlsl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(sqdmlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(sqdmlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmlsl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(smull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(smull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(smull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smull v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(smull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(smull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(smull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smull2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(sqdmull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(sqdmull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(sqdmull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmull v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(sqdmull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(sqdmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(sqdmull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmull2 v30.2d, v29.4s, v28.4s");

  TEST_SINGLE(pmull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(pmull(SubRegSize::i128Bit, DReg::d30, DReg::d29, DReg::d28), "pmull v30.1q, v29.1d, v28.1d");

  TEST_SINGLE(pmull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(pmull2(SubRegSize::i128Bit, QReg::q30, QReg::q29, QReg::q28), "pmull2 v30.1q, v29.2d, v28.2d");

  // TEST_SINGLE(uaddl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uaddl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(uaddl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(uaddl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaddl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(uaddl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uaddl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(uaddl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(uaddl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaddl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(uaddw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uaddw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.8h, v29.8h, v28.8b");
  TEST_SINGLE(uaddw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.4s, v29.4s, v28.4h");
  TEST_SINGLE(uaddw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaddw v30.2d, v29.2d, v28.2s");

  // TEST_SINGLE(uaddw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uaddw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.8h, v29.8h, v28.16b");
  TEST_SINGLE(uaddw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.4s, v29.4s, v28.8h");
  TEST_SINGLE(uaddw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaddw2 v30.2d, v29.2d, v28.4s");

  // TEST_SINGLE(usubl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(usubl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(usubl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(usubl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "usubl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(usubl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(usubl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(usubl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(usubl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "usubl2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(usubw(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(usubw(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.8h, v29.8h, v28.8b");
  TEST_SINGLE(usubw(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.4s, v29.4s, v28.4h");
  TEST_SINGLE(usubw(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "usubw v30.2d, v29.2d, v28.2s");

  // TEST_SINGLE(usubw2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(usubw2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.8h, v29.8h, v28.16b");
  TEST_SINGLE(usubw2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.4s, v29.4s, v28.8h");
  TEST_SINGLE(usubw2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "usubw2 v30.2d, v29.2d, v28.4s");

  TEST_SINGLE(raddhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "raddhn v30.8b, v29.8h, v28.8h");
  TEST_SINGLE(raddhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "raddhn v30.4h, v29.4s, v28.4s");
  TEST_SINGLE(raddhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "raddhn v30.2s, v29.2d, v28.2d");

  TEST_SINGLE(raddhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "raddhn2 v30.16b, v29.8h, v28.8h");
  TEST_SINGLE(raddhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "raddhn2 v30.8h, v29.4s, v28.4s");
  TEST_SINGLE(raddhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "raddhn2 v30.4s, v29.2d, v28.2d");

  // TEST_SINGLE(uabal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uabal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(uabal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(uabal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabal v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(uabal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uabal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(uabal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(uabal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabal2 v30.2d, v29.4s, v28.4s");

  TEST_SINGLE(rsubhn(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "rsubhn v30.8b, v29.8h, v28.8h");
  TEST_SINGLE(rsubhn(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "rsubhn v30.4h, v29.4s, v28.4s");
  TEST_SINGLE(rsubhn(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "rsubhn v30.2s, v29.2d, v28.2d");

  TEST_SINGLE(rsubhn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "rsubhn2 v30.16b, v29.8h, v28.8h");
  TEST_SINGLE(rsubhn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "rsubhn2 v30.8h, v29.4s, v28.4s");
  TEST_SINGLE(rsubhn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "rsubhn2 v30.4s, v29.2d, v28.2d");

  // TEST_SINGLE(uabdl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uabdl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(uabdl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(uabdl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabdl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(uabdl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uabdl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(uabdl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(uabdl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabdl2 v30.2d, v29.4s, v28.4s");


  // TEST_SINGLE(umlal(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umlal(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(umlal(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(umlal(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umlal v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(umlal2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umlal2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(umlal2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(umlal2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umlal2 v30.2d, v29.4s, v28.4s");

  // TEST_SINGLE(umlsl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umlsl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(umlsl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(umlsl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umlsl v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(umlsl2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umlsl2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(umlsl2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(umlsl2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umlsl2 v30.2d, v29.4s, v28.4s");


  // TEST_SINGLE(umull(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umull(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.8h, v29.8b, v28.8b");
  TEST_SINGLE(umull(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(umull(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umull v30.2d, v29.2s, v28.2s");

  // TEST_SINGLE(umull2(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umull2(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.8h, v29.16b, v28.16b");
  TEST_SINGLE(umull2(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.4s, v29.8h, v28.8h");
  TEST_SINGLE(umull2(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umull2 v30.2d, v29.4s, v28.4s");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD three same") {
  TEST_SINGLE(shadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(shadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(shadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(shadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(shadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(shadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(shadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(shadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(srhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(srhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(srhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(srhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "srhadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(srhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(srhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(srhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(srhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srhadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(shsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(shsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(shsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(shsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "shsub v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(shsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(shsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(shsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(shsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "shsub v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqsub v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sqsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqsub v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmgt(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmgt v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmgt v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmge(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmge v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmge v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(srshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(srshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(srshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(srshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "srshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(srshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(srshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(srshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(srshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "srshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqrshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqrshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sqrshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqrshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sqrshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqrshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqrshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(smax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(smax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(smax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(smax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smax v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(smax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(smax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(smax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smax v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(smin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(smin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(smin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(smin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smin v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(smin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(smin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(smin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smin v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(sabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sabd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sabd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(saba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(saba(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(saba(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(saba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "saba v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(saba(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(saba(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(saba(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(saba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "saba v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(add(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(add(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(add(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(add(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "add v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(add(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(add(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(add(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(add(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "add v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmtst(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmtst(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmtst(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmtst(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmtst v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmtst(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmtst(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmtst(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmtst(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmtst v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(mla(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(mla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mla v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(mla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(mla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(mla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mla v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(mul(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(mul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(mul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mul v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(mul(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(mul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(mul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mul v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(smaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(smaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(smaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(smaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "smaxp v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(smaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(smaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(smaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(smaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "smaxp v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(sminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sminp v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sminp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(sqdmulh(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqdmulh v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqdmulh v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(addp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(addp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(addp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(addp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "addp v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(addp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(addp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(addp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(addp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "addp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnm v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnm v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmla(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmla v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmla v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmla(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmla v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fadd(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fadd v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fadd v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fadd v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmulx(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmulx v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmulx(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmulx(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmulx(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmulx v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmulx(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmulx(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmulx(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmulx(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmulx v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fcmeq(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fcmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fcmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmeq v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fcmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fcmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fcmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmeq v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmax(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmax v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmax v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmax v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(frecps(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "frecps v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(frecps(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(frecps(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(frecps(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "frecps v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(frecps(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(frecps(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(frecps(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(frecps(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frecps v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(and_(QReg::q30, QReg::q29, QReg::q28), "and v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(and_(DReg::d30, DReg::d29, DReg::d28), "and v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(fmlal(QReg::q30, QReg::q29, QReg::q28), "fmlal v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(fmlal(DReg::d30, DReg::d29, DReg::d28), "fmlal v30.2s, v29.2h, v28.2h");

  TEST_SINGLE(fmlal2(QReg::q30, QReg::q29, QReg::q28), "fmlal2 v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(fmlal2(DReg::d30, DReg::d29, DReg::d28), "fmlal2 v30.2s, v29.2h, v28.2h");

  TEST_SINGLE(bic(QReg::q30, QReg::q29, QReg::q28), "bic v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(bic(DReg::d30, DReg::d29, DReg::d28), "bic v30.8b, v29.8b, v28.8b");

  // TEST_SINGLE(fminnm(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fminnm v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fminnm(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminnm(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fminnm(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminnm v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fminnm(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fminnm(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fminnm(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fminnm(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnm v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmls(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmls v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmls v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmls(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmls v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fsub(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fsub v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fsub v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fsub v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmin(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmin v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmin v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmin v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(frsqrts(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(frsqrts(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(frsqrts(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(frsqrts(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "frsqrts v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(frsqrts(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(frsqrts(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(frsqrts(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(frsqrts(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "frsqrts v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(orr(QReg::q30, QReg::q29, QReg::q28), "orr v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(orr(DReg::d30, DReg::d29, DReg::d28), "orr v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(mov(QReg::q30, QReg::q29), "mov v30.16b, v29.16b");
  TEST_SINGLE(mov(DReg::d30, DReg::d29), "mov v30.8b, v29.8b");

  TEST_SINGLE(fmlsl(QReg::q30, QReg::q29, QReg::q28), "fmlsl v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(fmlsl(DReg::d30, DReg::d29, DReg::d28), "fmlsl v30.2s, v29.2h, v28.2h");

  TEST_SINGLE(fmlsl2(QReg::q30, QReg::q29, QReg::q28), "fmlsl2 v30.4s, v29.4h, v28.4h");
  TEST_SINGLE(fmlsl2(DReg::d30, DReg::d29, DReg::d28), "fmlsl2 v30.2s, v29.2h, v28.2h");

  TEST_SINGLE(orn(QReg::q30, QReg::q29, QReg::q28), "orn v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(orn(DReg::d30, DReg::d29, DReg::d28), "orn v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(uhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uqadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uqadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uqadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(urhadd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(urhadd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(urhadd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(urhadd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "urhadd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(urhadd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(urhadd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(urhadd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(urhadd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urhadd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uhsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uhsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uhsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uhsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uhsub v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uhsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uhsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uhsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uhsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uhsub v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uqsub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uqsub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqsub v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uqsub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uqsub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqsub v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmhi(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmhi(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmhi(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmhi(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmhi v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmhi(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmhi(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmhi(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmhi(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhi v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmhs(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmhs(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmhs(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmhs(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmhs v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmhs(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmhs(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmhs(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmhs(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmhs v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(ushl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(ushl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(ushl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(ushl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "ushl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(ushl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(ushl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(ushl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(ushl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "ushl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(urshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(urshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(urshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(urshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "urshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(urshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(urshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(urshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(urshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "urshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uqrshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uqrshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uqrshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uqrshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uqrshl v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uqrshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uqrshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uqrshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uqrshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uqrshl v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(umax(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(umax(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(umax(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(umax(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umax v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(umax(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umax(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(umax(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(umax(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umax v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(umin(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(umin(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(umin(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(umin(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umin v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(umin(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umin(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(umin(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(umin(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umin v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uabd(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uabd v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uabd v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uaba(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uaba(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uaba(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uaba(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uaba v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uaba(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uaba(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uaba(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uaba(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uaba v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(sub(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sub(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sub(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(sub(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sub v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(sub(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sub(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sub(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sub(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sub v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(cmeq(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(cmeq(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(cmeq(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(cmeq(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "cmeq v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(cmeq(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(cmeq(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(cmeq(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(cmeq(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "cmeq v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(mls(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(mls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(mls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "mls v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(mls(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(mls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(mls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "mls v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(pmul(QReg::q30, QReg::q29, QReg::q28), "pmul v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(pmul(DReg::d30, DReg::d29, DReg::d28), "pmul v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(umaxp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(umaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(umaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(umaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "umaxp v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(umaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(umaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(umaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(umaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "umaxp v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(uminp(SubRegSize::i8Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(uminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(uminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(uminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "uminp v30.2d, v29.2d, v28.2d");

  TEST_SINGLE(uminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(uminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(uminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(uminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "uminp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.4s, v29.4s, v28.4s");
  // TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "sqrdmulh v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.8b, v29.8b, v28.8b");
  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "sqrdmulh v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxnmp v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxnmp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(faddp(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "faddp v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(faddp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(faddp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(faddp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "faddp v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(faddp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(faddp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(faddp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(faddp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "faddp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmul(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmul v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmul v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmul(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmul v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fcmge(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fcmge v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fcmge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fcmge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmge v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fcmge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fcmge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fcmge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fcmge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmge v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(facge(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "facge v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(facge(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(facge(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(facge(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "facge v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(facge(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(facge(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(facge(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(facge(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facge v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fmaxp(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fmaxp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fmaxp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fmaxp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fmaxp v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fmaxp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fmaxp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fmaxp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fmaxp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fmaxp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fdiv(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fdiv v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fdiv(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fdiv(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fdiv(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fdiv v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fdiv(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fdiv(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fdiv(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fdiv(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fdiv v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(eor(QReg::q30, QReg::q29, QReg::q28), "eor v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(eor(DReg::d30, DReg::d29, DReg::d28), "eor v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(bsl(QReg::q30, QReg::q29, QReg::q28), "bsl v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(bsl(DReg::d30, DReg::d29, DReg::d28), "bsl v30.8b, v29.8b, v28.8b");

  // TEST_SINGLE(fminnmp(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fminnmp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminnmp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fminnmp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminnmp v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fminnmp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fminnmp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fminnmp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fminnmp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminnmp v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fabd(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fabd v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fabd(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fabd(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fabd(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fabd v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fabd(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fabd(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fabd(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fabd(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fabd v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fcmgt(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fcmgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fcmgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fcmgt v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fcmgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fcmgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fcmgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fcmgt v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(facgt(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "facgt v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(facgt(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(facgt(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(facgt(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "facgt v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(facgt(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(facgt(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(facgt(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(facgt(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "facgt v30.1d, v29.1d, v28.1d");

  // TEST_SINGLE(fminp(SubRegSize::i8Bit,  QReg::q30, QReg::q29, QReg::q28), "fminp v30.16b, v29.16b, v28.16b");
  // TEST_SINGLE(fminp(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.8h, v29.8h, v28.8h");
  TEST_SINGLE(fminp(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.4s, v29.4s, v28.4s");
  TEST_SINGLE(fminp(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q28), "fminp v30.2d, v29.2d, v28.2d");

  // TEST_SINGLE(fminp(SubRegSize::i8Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.8b, v29.8b, v28.8b");
  // TEST_SINGLE(fminp(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.4h, v29.4h, v28.4h");
  TEST_SINGLE(fminp(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.2s, v29.2s, v28.2s");
  // TEST_SINGLE(fminp(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d28), "fminp v30.1d, v29.1d, v28.1d");

  TEST_SINGLE(bit(QReg::q30, QReg::q29, QReg::q28), "bit v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(bit(DReg::d30, DReg::d29, DReg::d28), "bit v30.8b, v29.8b, v28.8b");

  TEST_SINGLE(bif(QReg::q30, QReg::q29, QReg::q28), "bif v30.16b, v29.16b, v28.16b");
  TEST_SINGLE(bif(DReg::d30, DReg::d29, DReg::d28), "bif v30.8b, v29.8b, v28.8b");
}

#if TEST_FP16
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD modified immediate : fp16") {
  TEST_SINGLE(fmov(SubRegSize::i16Bit, QReg::q30, 1.0), "fmov v30.8h, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, DReg::d30, 1.0), "fmov v30.4h, #0x70 (1.0000)");
}
#endif

TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD modified immediate") {
  // XXX: ORR - 32-bit/16-bit
  // XXX: MOVI - Shifting ones
  TEST_SINGLE(fmov(SubRegSize::i32Bit, QReg::q30, 1.0), "fmov v30.4s, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, QReg::q30, 1.0), "fmov v30.2d, #0x70 (1.0000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, DReg::d30, 1.0), "fmov v30.2s, #0x70 (1.0000)");
  // TEST_SINGLE(fmov(SubRegSize::i64Bit, DReg::d30, 1.0), "fmov v30.1d, #0x70 (1.0000)");

  // XXX: MVNI - Shifted immediate
  // XXX: BIC
  TEST_SINGLE(movi(SubRegSize::i8Bit, QReg::q30, 0xFE), "movi v30.16b, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i16Bit, QReg::q30, 0xFE, 0), "movi v30.8h, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i16Bit, QReg::q30, 0xFE, 8), "movi v30.8h, #0xfe, lsl #8");
  TEST_SINGLE(movi(SubRegSize::i32Bit, QReg::q30, 0xFE, 0), "movi v30.4s, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i32Bit, QReg::q30, 0xFE, 8), "movi v30.4s, #0xfe, lsl #8");
  TEST_SINGLE(movi(SubRegSize::i32Bit, QReg::q30, 0xFE, 16), "movi v30.4s, #0xfe, lsl #16");
  TEST_SINGLE(movi(SubRegSize::i32Bit, QReg::q30, 0xFE, 24), "movi v30.4s, #0xfe, lsl #24");
  TEST_SINGLE(movi(SubRegSize::i64Bit, QReg::q30, 0xFF00FF), "movi v30.2d, #0xff00ff");

  TEST_SINGLE(movi(SubRegSize::i8Bit, DReg::d30, 0xFE), "movi v30.8b, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i16Bit, DReg::d30, 0xFE, 0), "movi v30.4h, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i16Bit, DReg::d30, 0xFE, 8), "movi v30.4h, #0xfe, lsl #8");
  TEST_SINGLE(movi(SubRegSize::i32Bit, DReg::d30, 0xFE, 0), "movi v30.2s, #0xfe");
  TEST_SINGLE(movi(SubRegSize::i32Bit, DReg::d30, 0xFE, 8), "movi v30.2s, #0xfe, lsl #8");
  TEST_SINGLE(movi(SubRegSize::i32Bit, DReg::d30, 0xFE, 16), "movi v30.2s, #0xfe, lsl #16");
  TEST_SINGLE(movi(SubRegSize::i32Bit, DReg::d30, 0xFE, 24), "movi v30.2s, #0xfe, lsl #24");
  TEST_SINGLE(movi(SubRegSize::i64Bit, DReg::d30, 0xFF00000000000000ULL), "movi d30, #0xff00000000000000");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD shift by immediate") {
  TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sshr v30.16b, v29.16b, #1");
  TEST_SINGLE(sshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sshr v30.16b, v29.16b, #7");
  TEST_SINGLE(sshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshr v30.8h, v29.8h, #1");
  TEST_SINGLE(sshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sshr v30.8h, v29.8h, #15");
  TEST_SINGLE(sshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshr v30.4s, v29.4s, #1");
  TEST_SINGLE(sshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sshr v30.4s, v29.4s, #31");
  TEST_SINGLE(sshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshr v30.2d, v29.2d, #1");
  TEST_SINGLE(sshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sshr v30.2d, v29.2d, #63");

  TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sshr v30.8b, v29.8b, #1");
  TEST_SINGLE(sshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sshr v30.8b, v29.8b, #7");
  TEST_SINGLE(sshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshr v30.4h, v29.4h, #1");
  TEST_SINGLE(sshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sshr v30.4h, v29.4h, #15");
  TEST_SINGLE(sshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshr v30.2s, v29.2s, #1");
  TEST_SINGLE(sshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sshr v30.2s, v29.2s, #31");
  // TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sshr v30.1d, v29.1d, #1");
  // TEST_SINGLE(sshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sshr v30.1d, v29.1d, #63");

  TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ssra v30.16b, v29.16b, #1");
  TEST_SINGLE(ssra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ssra v30.16b, v29.16b, #7");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ssra v30.8h, v29.8h, #1");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ssra v30.8h, v29.8h, #15");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ssra v30.4s, v29.4s, #1");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ssra v30.4s, v29.4s, #31");
  TEST_SINGLE(ssra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ssra v30.2d, v29.2d, #1");
  TEST_SINGLE(ssra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ssra v30.2d, v29.2d, #63");

  TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ssra v30.8b, v29.8b, #1");
  TEST_SINGLE(ssra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ssra v30.8b, v29.8b, #7");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ssra v30.4h, v29.4h, #1");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ssra v30.4h, v29.4h, #15");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ssra v30.2s, v29.2s, #1");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ssra v30.2s, v29.2s, #31");
  // TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "ssra v30.1d, v29.1d, #1");
  // TEST_SINGLE(ssra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ssra v30.1d, v29.1d, #63");

  TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srshr v30.16b, v29.16b, #1");
  TEST_SINGLE(srshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srshr v30.16b, v29.16b, #7");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srshr v30.8h, v29.8h, #1");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "srshr v30.8h, v29.8h, #15");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srshr v30.4s, v29.4s, #1");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "srshr v30.4s, v29.4s, #31");
  TEST_SINGLE(srshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srshr v30.2d, v29.2d, #1");
  TEST_SINGLE(srshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "srshr v30.2d, v29.2d, #63");

  TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srshr v30.8b, v29.8b, #1");
  TEST_SINGLE(srshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srshr v30.8b, v29.8b, #7");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srshr v30.4h, v29.4h, #1");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "srshr v30.4h, v29.4h, #15");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srshr v30.2s, v29.2s, #1");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "srshr v30.2s, v29.2s, #31");
  // TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "srshr v30.1d, v29.1d, #1");
  // TEST_SINGLE(srshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srshr v30.1d, v29.1d, #63");

  TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "srsra v30.16b, v29.16b, #1");
  TEST_SINGLE(srsra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "srsra v30.16b, v29.16b, #7");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "srsra v30.8h, v29.8h, #1");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "srsra v30.8h, v29.8h, #15");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "srsra v30.4s, v29.4s, #1");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "srsra v30.4s, v29.4s, #31");
  TEST_SINGLE(srsra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "srsra v30.2d, v29.2d, #1");
  TEST_SINGLE(srsra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "srsra v30.2d, v29.2d, #63");

  TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "srsra v30.8b, v29.8b, #1");
  TEST_SINGLE(srsra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "srsra v30.8b, v29.8b, #7");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "srsra v30.4h, v29.4h, #1");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "srsra v30.4h, v29.4h, #15");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "srsra v30.2s, v29.2s, #1");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "srsra v30.2s, v29.2s, #31");
  // TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "srsra v30.1d, v29.1d, #1");
  // TEST_SINGLE(srsra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "srsra v30.1d, v29.1d, #63");

  TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shl v30.16b, v29.16b, #1");
  TEST_SINGLE(shl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shl v30.16b, v29.16b, #7");
  TEST_SINGLE(shl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shl v30.8h, v29.8h, #1");
  TEST_SINGLE(shl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "shl v30.8h, v29.8h, #15");
  TEST_SINGLE(shl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shl v30.4s, v29.4s, #1");
  TEST_SINGLE(shl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "shl v30.4s, v29.4s, #31");
  TEST_SINGLE(shl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "shl v30.2d, v29.2d, #1");
  TEST_SINGLE(shl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "shl v30.2d, v29.2d, #63");

  TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shl v30.8b, v29.8b, #1");
  TEST_SINGLE(shl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shl v30.8b, v29.8b, #7");
  TEST_SINGLE(shl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shl v30.4h, v29.4h, #1");
  TEST_SINGLE(shl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "shl v30.4h, v29.4h, #15");
  TEST_SINGLE(shl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shl v30.2s, v29.2s, #1");
  TEST_SINGLE(shl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "shl v30.2s, v29.2s, #31");
  // TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "shl v30.1d, v29.1d, #1");
  // TEST_SINGLE(shl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shl v30.1d, v29.1d, #63");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshl v30.16b, v29.16b, #1");
  TEST_SINGLE(sqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshl v30.16b, v29.16b, #7");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshl v30.8h, v29.8h, #1");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshl v30.8h, v29.8h, #15");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshl v30.4s, v29.4s, #1");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshl v30.4s, v29.4s, #31");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshl v30.2d, v29.2d, #1");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshl v30.2d, v29.2d, #63");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshl v30.8b, v29.8b, #1");
  TEST_SINGLE(sqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshl v30.8b, v29.8b, #7");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshl v30.4h, v29.4h, #1");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshl v30.4h, v29.4h, #15");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshl v30.2s, v29.2s, #1");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshl v30.2s, v29.2s, #31");
  // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqshl v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshl v30.1d, v29.1d, #63");

  TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "shrn v30.8b, v29.8h, #1");
  TEST_SINGLE(shrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "shrn v30.8b, v29.8h, #7");
  TEST_SINGLE(shrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "shrn v30.4h, v29.4s, #1");
  TEST_SINGLE(shrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "shrn v30.4h, v29.4s, #15");
  TEST_SINGLE(shrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "shrn v30.2s, v29.2d, #1");
  TEST_SINGLE(shrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "shrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "shrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(shrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "shrn v30.1d, v29.1d, #63");

  TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(shrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "shrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(shrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(shrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "shrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(shrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "shrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(shrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "shrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "shrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(shrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "shrn2 v30.2d, v29.2d, #63");

  TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "rshrn v30.8b, v29.8h, #1");
  TEST_SINGLE(rshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "rshrn v30.8b, v29.8h, #7");
  TEST_SINGLE(rshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "rshrn v30.4h, v29.4s, #1");
  TEST_SINGLE(rshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "rshrn v30.4h, v29.4s, #15");
  TEST_SINGLE(rshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "rshrn v30.2s, v29.2d, #1");
  TEST_SINGLE(rshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "rshrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "rshrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(rshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "rshrn v30.1d, v29.1d, #63");

  TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(rshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "rshrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(rshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(rshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "rshrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(rshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "rshrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(rshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "rshrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "rshrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(rshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "rshrn2 v30.2d, v29.2d, #63");

  TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.8b, v29.8h, #1");
  TEST_SINGLE(sqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrn v30.8b, v29.8h, #7");
  TEST_SINGLE(sqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.4h, v29.4s, #1");
  TEST_SINGLE(sqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshrn v30.4h, v29.4s, #15");
  TEST_SINGLE(sqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrn v30.2s, v29.2d, #1");
  TEST_SINGLE(sqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqshrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrn v30.1d, v29.1d, #63");

  TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(sqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(sqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(sqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(sqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(sqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "sqshrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(sqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrn2 v30.2d, v29.2d, #63");

  TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.8b, v29.8h, #1");
  TEST_SINGLE(sqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrn v30.8b, v29.8h, #7");
  TEST_SINGLE(sqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.4h, v29.4s, #1");
  TEST_SINGLE(sqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqrshrn v30.4h, v29.4s, #15");
  TEST_SINGLE(sqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrn v30.2s, v29.2d, #1");
  TEST_SINGLE(sqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqrshrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqrshrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrn v30.1d, v29.1d, #63");

  TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(sqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(sqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(sqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqrshrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(sqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(sqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqrshrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "sqrshrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(sqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrn2 v30.2d, v29.2d, #63");

  // TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "sshll v30.8b, v29.8h, #1");
  // TEST_SINGLE(sshll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "sshll v30.8b, v29.8h, #7");
  TEST_SINGLE(sshll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sshll v30.8h, v29.8b, #1");
  TEST_SINGLE(sshll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "sshll v30.8h, v29.8b, #7");
  TEST_SINGLE(sshll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sshll v30.4s, v29.4h, #1");
  TEST_SINGLE(sshll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 15), "sshll v30.4s, v29.4h, #15");
  TEST_SINGLE(sshll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "sshll v30.2d, v29.2s, #1");
  TEST_SINGLE(sshll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 31), "sshll v30.2d, v29.2s, #31");

  // TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "sshll2 v30.16b, v29.8h, #1");
  // TEST_SINGLE(sshll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "sshll2 v30.16b, v29.8h, #7");
  TEST_SINGLE(sshll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.8h, v29.16b, #1");
  TEST_SINGLE(sshll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 7), "sshll2 v30.8h, v29.16b, #7");
  TEST_SINGLE(sshll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.4s, v29.8h, #1");
  TEST_SINGLE(sshll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 15), "sshll2 v30.4s, v29.8h, #15");
  TEST_SINGLE(sshll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sshll2 v30.2d, v29.4s, #1");
  TEST_SINGLE(sshll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 31), "sshll2 v30.2d, v29.4s, #31");

  // TEST_SINGLE(sxtl(SubRegSize::i8Bit, QReg::q30, QReg::q29),   "sxtl v30.8b, v29.8h");
  TEST_SINGLE(sxtl(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl v30.8h, v29.8b");
  TEST_SINGLE(sxtl(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl v30.4s, v29.4h");
  TEST_SINGLE(sxtl(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl v30.2d, v29.2s");

  // TEST_SINGLE(sxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29),   "sxtl v30.8b, v29.8h");
  TEST_SINGLE(sxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl v30.8h, v29.8b");
  TEST_SINGLE(sxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl v30.4s, v29.4h");
  TEST_SINGLE(sxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl v30.2d, v29.2s");

  // TEST_SINGLE(sxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29),   "sxtl2 v30.16b, v29.8h");
  TEST_SINGLE(sxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "sxtl2 v30.8h, v29.16b");
  TEST_SINGLE(sxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "sxtl2 v30.4s, v29.8h");
  TEST_SINGLE(sxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "sxtl2 v30.2d, v29.4s");

  // TEST_SINGLE(sxtl2(SubRegSize::i8Bit, DReg::d30, DReg::d29),   "sxtl2 v30.16b, v29.8h");
  TEST_SINGLE(sxtl2(SubRegSize::i16Bit, DReg::d30, DReg::d29), "sxtl2 v30.8h, v29.16b");
  TEST_SINGLE(sxtl2(SubRegSize::i32Bit, DReg::d30, DReg::d29), "sxtl2 v30.4s, v29.8h");
  TEST_SINGLE(sxtl2(SubRegSize::i64Bit, DReg::d30, DReg::d29), "sxtl2 v30.2d, v29.4s");

  // TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "scvtf v30.16b, v29.16b, #1");
  // TEST_SINGLE(scvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "scvtf v30.16b, v29.16b, #7");
  TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "scvtf v30.8h, v29.8h, #1");
  TEST_SINGLE(scvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "scvtf v30.8h, v29.8h, #15");
  TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "scvtf v30.4s, v29.4s, #1");
  TEST_SINGLE(scvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "scvtf v30.4s, v29.4s, #31");
  TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "scvtf v30.2d, v29.2d, #1");
  TEST_SINGLE(scvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "scvtf v30.2d, v29.2d, #63");

  // TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "scvtf v30.8b, v29.8b, #1");
  // TEST_SINGLE(scvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "scvtf v30.8b, v29.8b, #7");
  TEST_SINGLE(scvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "scvtf v30.4h, v29.4h, #1");
  TEST_SINGLE(scvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "scvtf v30.4h, v29.4h, #15");
  TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "scvtf v30.2s, v29.2s, #1");
  TEST_SINGLE(scvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "scvtf v30.2s, v29.2s, #31");
  // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "scvtf v30.1d, v29.1d, #1");
  // TEST_SINGLE(scvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "scvtf v30.1d, v29.1d, #63");

  // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "fcvtzs v30.16b, v29.16b, #1");
  // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "fcvtzs v30.16b, v29.16b, #7");
  TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.8h, v29.8h, #1");
  TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "fcvtzs v30.8h, v29.8h, #15");
  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.4s, v29.4s, #1");
  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "fcvtzs v30.4s, v29.4s, #31");
  TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzs v30.2d, v29.2d, #1");
  TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "fcvtzs v30.2d, v29.2d, #63");

  // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "fcvtzs v30.8b, v29.8b, #1");
  // TEST_SINGLE(fcvtzs(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "fcvtzs v30.8b, v29.8b, #7");
  TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.4h, v29.4h, #1");
  TEST_SINGLE(fcvtzs(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "fcvtzs v30.4h, v29.4h, #15");
  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzs v30.2s, v29.2s, #1");
  TEST_SINGLE(fcvtzs(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "fcvtzs v30.2s, v29.2s, #31");
  // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "fcvtzs v30.1d, v29.1d, #1");
  // TEST_SINGLE(fcvtzs(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzs v30.1d, v29.1d, #63");

  TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ushr v30.16b, v29.16b, #1");
  TEST_SINGLE(ushr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ushr v30.16b, v29.16b, #7");
  TEST_SINGLE(ushr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushr v30.8h, v29.8h, #1");
  TEST_SINGLE(ushr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ushr v30.8h, v29.8h, #15");
  TEST_SINGLE(ushr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushr v30.4s, v29.4s, #1");
  TEST_SINGLE(ushr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ushr v30.4s, v29.4s, #31");
  TEST_SINGLE(ushr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushr v30.2d, v29.2d, #1");
  TEST_SINGLE(ushr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ushr v30.2d, v29.2d, #63");

  TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ushr v30.8b, v29.8b, #1");
  TEST_SINGLE(ushr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ushr v30.8b, v29.8b, #7");
  TEST_SINGLE(ushr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushr v30.4h, v29.4h, #1");
  TEST_SINGLE(ushr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ushr v30.4h, v29.4h, #15");
  TEST_SINGLE(ushr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushr v30.2s, v29.2s, #1");
  TEST_SINGLE(ushr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ushr v30.2s, v29.2s, #31");
  // TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "ushr v30.1d, v29.1d, #1");
  // TEST_SINGLE(ushr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ushr v30.1d, v29.1d, #63");

  TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "usra v30.16b, v29.16b, #1");
  TEST_SINGLE(usra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "usra v30.16b, v29.16b, #7");
  TEST_SINGLE(usra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "usra v30.8h, v29.8h, #1");
  TEST_SINGLE(usra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "usra v30.8h, v29.8h, #15");
  TEST_SINGLE(usra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "usra v30.4s, v29.4s, #1");
  TEST_SINGLE(usra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "usra v30.4s, v29.4s, #31");
  TEST_SINGLE(usra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "usra v30.2d, v29.2d, #1");
  TEST_SINGLE(usra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "usra v30.2d, v29.2d, #63");

  TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "usra v30.8b, v29.8b, #1");
  TEST_SINGLE(usra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "usra v30.8b, v29.8b, #7");
  TEST_SINGLE(usra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "usra v30.4h, v29.4h, #1");
  TEST_SINGLE(usra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "usra v30.4h, v29.4h, #15");
  TEST_SINGLE(usra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "usra v30.2s, v29.2s, #1");
  TEST_SINGLE(usra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "usra v30.2s, v29.2s, #31");
  // TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "usra v30.1d, v29.1d, #1");
  // TEST_SINGLE(usra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "usra v30.1d, v29.1d, #63");

  TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "urshr v30.16b, v29.16b, #1");
  TEST_SINGLE(urshr(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "urshr v30.16b, v29.16b, #7");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "urshr v30.8h, v29.8h, #1");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "urshr v30.8h, v29.8h, #15");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "urshr v30.4s, v29.4s, #1");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "urshr v30.4s, v29.4s, #31");
  TEST_SINGLE(urshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "urshr v30.2d, v29.2d, #1");
  TEST_SINGLE(urshr(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "urshr v30.2d, v29.2d, #63");

  TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "urshr v30.8b, v29.8b, #1");
  TEST_SINGLE(urshr(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "urshr v30.8b, v29.8b, #7");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "urshr v30.4h, v29.4h, #1");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "urshr v30.4h, v29.4h, #15");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "urshr v30.2s, v29.2s, #1");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "urshr v30.2s, v29.2s, #31");
  // TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "urshr v30.1d, v29.1d, #1");
  // TEST_SINGLE(urshr(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "urshr v30.1d, v29.1d, #63");

  TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "ursra v30.16b, v29.16b, #1");
  TEST_SINGLE(ursra(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "ursra v30.16b, v29.16b, #7");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ursra v30.8h, v29.8h, #1");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ursra v30.8h, v29.8h, #15");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ursra v30.4s, v29.4s, #1");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ursra v30.4s, v29.4s, #31");
  TEST_SINGLE(ursra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ursra v30.2d, v29.2d, #1");
  TEST_SINGLE(ursra(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ursra v30.2d, v29.2d, #63");

  TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "ursra v30.8b, v29.8b, #1");
  TEST_SINGLE(ursra(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "ursra v30.8b, v29.8b, #7");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ursra v30.4h, v29.4h, #1");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ursra v30.4h, v29.4h, #15");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ursra v30.2s, v29.2s, #1");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ursra v30.2s, v29.2s, #31");
  // TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "ursra v30.1d, v29.1d, #1");
  // TEST_SINGLE(ursra(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ursra v30.1d, v29.1d, #63");

  TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sri v30.16b, v29.16b, #1");
  TEST_SINGLE(sri(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sri v30.16b, v29.16b, #7");
  TEST_SINGLE(sri(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sri v30.8h, v29.8h, #1");
  TEST_SINGLE(sri(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sri v30.8h, v29.8h, #15");
  TEST_SINGLE(sri(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sri v30.4s, v29.4s, #1");
  TEST_SINGLE(sri(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sri v30.4s, v29.4s, #31");
  TEST_SINGLE(sri(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sri v30.2d, v29.2d, #1");
  TEST_SINGLE(sri(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sri v30.2d, v29.2d, #63");

  TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sri v30.8b, v29.8b, #1");
  TEST_SINGLE(sri(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sri v30.8b, v29.8b, #7");
  TEST_SINGLE(sri(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sri v30.4h, v29.4h, #1");
  TEST_SINGLE(sri(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sri v30.4h, v29.4h, #15");
  TEST_SINGLE(sri(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sri v30.2s, v29.2s, #1");
  TEST_SINGLE(sri(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sri v30.2s, v29.2s, #31");
  // TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sri v30.1d, v29.1d, #1");
  // TEST_SINGLE(sri(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sri v30.1d, v29.1d, #63");

  TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sli v30.16b, v29.16b, #1");
  TEST_SINGLE(sli(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sli v30.16b, v29.16b, #7");
  TEST_SINGLE(sli(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sli v30.8h, v29.8h, #1");
  TEST_SINGLE(sli(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sli v30.8h, v29.8h, #15");
  TEST_SINGLE(sli(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sli v30.4s, v29.4s, #1");
  TEST_SINGLE(sli(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sli v30.4s, v29.4s, #31");
  TEST_SINGLE(sli(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sli v30.2d, v29.2d, #1");
  TEST_SINGLE(sli(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sli v30.2d, v29.2d, #63");

  TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sli v30.8b, v29.8b, #1");
  TEST_SINGLE(sli(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sli v30.8b, v29.8b, #7");
  TEST_SINGLE(sli(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sli v30.4h, v29.4h, #1");
  TEST_SINGLE(sli(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sli v30.4h, v29.4h, #15");
  TEST_SINGLE(sli(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sli v30.2s, v29.2s, #1");
  TEST_SINGLE(sli(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sli v30.2s, v29.2s, #31");
  // TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sli v30.1d, v29.1d, #1");
  // TEST_SINGLE(sli(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sli v30.1d, v29.1d, #63");

  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.16b, v29.16b, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshlu v30.16b, v29.16b, #7");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.8h, v29.8h, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshlu v30.8h, v29.8h, #15");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.4s, v29.4s, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshlu v30.4s, v29.4s, #31");
  TEST_SINGLE(sqshlu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "sqshlu v30.2d, v29.2d, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshlu v30.2d, v29.2d, #63");

  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.8b, v29.8b, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshlu v30.8b, v29.8b, #7");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.4h, v29.4h, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshlu v30.4h, v29.4h, #15");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshlu v30.2s, v29.2s, #1");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshlu v30.2s, v29.2s, #31");
  // TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqshlu v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqshlu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshlu v30.1d, v29.1d, #63");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshl v30.16b, v29.16b, #1");
  TEST_SINGLE(uqshl(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshl v30.16b, v29.16b, #7");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshl v30.8h, v29.8h, #1");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqshl v30.8h, v29.8h, #15");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshl v30.4s, v29.4s, #1");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqshl v30.4s, v29.4s, #31");
  TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "uqshl v30.2d, v29.2d, #1");
  TEST_SINGLE(uqshl(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqshl v30.2d, v29.2d, #63");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshl v30.8b, v29.8b, #1");
  TEST_SINGLE(uqshl(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshl v30.8b, v29.8b, #7");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshl v30.4h, v29.4h, #1");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqshl v30.4h, v29.4h, #15");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshl v30.2s, v29.2s, #1");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqshl v30.2s, v29.2s, #31");
  // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "uqshl v30.1d, v29.1d, #1");
  // TEST_SINGLE(uqshl(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshl v30.1d, v29.1d, #63");

  TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.8b, v29.8h, #1");
  TEST_SINGLE(sqshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqshrun v30.8b, v29.8h, #7");
  TEST_SINGLE(sqshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.4h, v29.4s, #1");
  TEST_SINGLE(sqshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqshrun v30.4h, v29.4s, #15");
  TEST_SINGLE(sqshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqshrun v30.2s, v29.2d, #1");
  TEST_SINGLE(sqshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqshrun v30.2s, v29.2d, #31");
  // TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqshrun v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqshrun v30.1d, v29.1d, #63");

  TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.16b, v29.8h, #1");
  TEST_SINGLE(sqshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqshrun2 v30.16b, v29.8h, #7");
  TEST_SINGLE(sqshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.8h, v29.4s, #1");
  TEST_SINGLE(sqshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqshrun2 v30.8h, v29.4s, #15");
  TEST_SINGLE(sqshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqshrun2 v30.4s, v29.2d, #1");
  TEST_SINGLE(sqshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqshrun2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "sqshrun2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(sqshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqshrun2 v30.2d, v29.2d, #63");

  TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.8b, v29.8h, #1");
  TEST_SINGLE(sqrshrun(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "sqrshrun v30.8b, v29.8h, #7");
  TEST_SINGLE(sqrshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.4h, v29.4s, #1");
  TEST_SINGLE(sqrshrun(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "sqrshrun v30.4h, v29.4s, #15");
  TEST_SINGLE(sqrshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "sqrshrun v30.2s, v29.2d, #1");
  TEST_SINGLE(sqrshrun(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "sqrshrun v30.2s, v29.2d, #31");
  // TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "sqrshrun v30.1d, v29.1d, #1");
  // TEST_SINGLE(sqrshrun(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "sqrshrun v30.1d, v29.1d, #63");

  TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.16b, v29.8h, #1");
  TEST_SINGLE(sqrshrun2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "sqrshrun2 v30.16b, v29.8h, #7");
  TEST_SINGLE(sqrshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.8h, v29.4s, #1");
  TEST_SINGLE(sqrshrun2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "sqrshrun2 v30.8h, v29.4s, #15");
  TEST_SINGLE(sqrshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "sqrshrun2 v30.4s, v29.2d, #1");
  TEST_SINGLE(sqrshrun2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "sqrshrun2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "sqrshrun2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(sqrshrun2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "sqrshrun2 v30.2d, v29.2d, #63");

  TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.8b, v29.8h, #1");
  TEST_SINGLE(uqshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqshrn v30.8b, v29.8h, #7");
  TEST_SINGLE(uqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.4h, v29.4s, #1");
  TEST_SINGLE(uqshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqshrn v30.4h, v29.4s, #15");
  TEST_SINGLE(uqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqshrn v30.2s, v29.2d, #1");
  TEST_SINGLE(uqshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqshrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "uqshrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(uqshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqshrn v30.1d, v29.1d, #63");

  TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(uqshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqshrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(uqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(uqshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqshrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(uqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqshrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(uqshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqshrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "uqshrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(uqshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqshrn2 v30.2d, v29.2d, #63");

  TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.8b, v29.8h, #1");
  TEST_SINGLE(uqrshrn(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7), "uqrshrn v30.8b, v29.8h, #7");
  TEST_SINGLE(uqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.4h, v29.4s, #1");
  TEST_SINGLE(uqrshrn(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "uqrshrn v30.4h, v29.4s, #15");
  TEST_SINGLE(uqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "uqrshrn v30.2s, v29.2d, #1");
  TEST_SINGLE(uqrshrn(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "uqrshrn v30.2s, v29.2d, #31");
  // TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "uqrshrn v30.1d, v29.1d, #1");
  // TEST_SINGLE(uqrshrn(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "uqrshrn v30.1d, v29.1d, #63");

  TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.16b, v29.8h, #1");
  TEST_SINGLE(uqrshrn2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7), "uqrshrn2 v30.16b, v29.8h, #7");
  TEST_SINGLE(uqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.8h, v29.4s, #1");
  TEST_SINGLE(uqrshrn2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "uqrshrn2 v30.8h, v29.4s, #15");
  TEST_SINGLE(uqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "uqrshrn2 v30.4s, v29.2d, #1");
  TEST_SINGLE(uqrshrn2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "uqrshrn2 v30.4s, v29.2d, #31");
  // TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1),  "uqrshrn2 v30.2d, v29.2d, #1");
  // TEST_SINGLE(uqrshrn2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "uqrshrn2 v30.2d, v29.2d, #63");

  // TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "ushll v30.8b, v29.8h, #1");
  // TEST_SINGLE(ushll(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "ushll v30.8b, v29.8h, #7");
  TEST_SINGLE(ushll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ushll v30.8h, v29.8b, #1");
  TEST_SINGLE(ushll(SubRegSize::i16Bit, DReg::d30, DReg::d29, 7), "ushll v30.8h, v29.8b, #7");
  TEST_SINGLE(ushll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ushll v30.4s, v29.4h, #1");
  TEST_SINGLE(ushll(SubRegSize::i32Bit, DReg::d30, DReg::d29, 15), "ushll v30.4s, v29.4h, #15");
  TEST_SINGLE(ushll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1), "ushll v30.2d, v29.2s, #1");
  TEST_SINGLE(ushll(SubRegSize::i64Bit, DReg::d30, DReg::d29, 31), "ushll v30.2d, v29.2s, #31");

  // TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "ushll2 v30.16b, v29.8h, #1");
  // TEST_SINGLE(ushll2(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "ushll2 v30.16b, v29.8h, #7");
  TEST_SINGLE(ushll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.8h, v29.16b, #1");
  TEST_SINGLE(ushll2(SubRegSize::i16Bit, QReg::q30, QReg::q29, 7), "ushll2 v30.8h, v29.16b, #7");
  TEST_SINGLE(ushll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.4s, v29.8h, #1");
  TEST_SINGLE(ushll2(SubRegSize::i32Bit, QReg::q30, QReg::q29, 15), "ushll2 v30.4s, v29.8h, #15");
  TEST_SINGLE(ushll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ushll2 v30.2d, v29.4s, #1");
  TEST_SINGLE(ushll2(SubRegSize::i64Bit, QReg::q30, QReg::q29, 31), "ushll2 v30.2d, v29.4s, #31");

  // TEST_SINGLE(uxtl(SubRegSize::i8Bit, DReg::d30, DReg::d29),   "uxtl v30.8b, v29.8h");
  TEST_SINGLE(uxtl(SubRegSize::i16Bit, DReg::d30, DReg::d29), "uxtl v30.8h, v29.8b");
  TEST_SINGLE(uxtl(SubRegSize::i32Bit, DReg::d30, DReg::d29), "uxtl v30.4s, v29.4h");
  TEST_SINGLE(uxtl(SubRegSize::i64Bit, DReg::d30, DReg::d29), "uxtl v30.2d, v29.2s");

  // TEST_SINGLE(uxtl2(SubRegSize::i8Bit, QReg::q30, QReg::q29),   "uxtl2 v30.16b, v29.8h");
  TEST_SINGLE(uxtl2(SubRegSize::i16Bit, QReg::q30, QReg::q29), "uxtl2 v30.8h, v29.16b");
  TEST_SINGLE(uxtl2(SubRegSize::i32Bit, QReg::q30, QReg::q29), "uxtl2 v30.4s, v29.8h");
  TEST_SINGLE(uxtl2(SubRegSize::i64Bit, QReg::q30, QReg::q29), "uxtl2 v30.2d, v29.4s");

  // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "ucvtf v30.16b, v29.16b, #1");
  // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "ucvtf v30.16b, v29.16b, #7");
  TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.8h, v29.8h, #1");
  TEST_SINGLE(ucvtf(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "ucvtf v30.8h, v29.8h, #15");
  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.4s, v29.4s, #1");
  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "ucvtf v30.4s, v29.4s, #31");
  TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "ucvtf v30.2d, v29.2d, #1");
  TEST_SINGLE(ucvtf(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "ucvtf v30.2d, v29.2d, #63");

  // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "ucvtf v30.8b, v29.8b, #1");
  // TEST_SINGLE(ucvtf(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "ucvtf v30.8b, v29.8b, #7");
  TEST_SINGLE(ucvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.4h, v29.4h, #1");
  TEST_SINGLE(ucvtf(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "ucvtf v30.4h, v29.4h, #15");
  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "ucvtf v30.2s, v29.2s, #1");
  TEST_SINGLE(ucvtf(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "ucvtf v30.2s, v29.2s, #31");
  // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "ucvtf v30.1d, v29.1d, #1");
  // TEST_SINGLE(ucvtf(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "ucvtf v30.1d, v29.1d, #63");

  // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 1),   "fcvtzu v30.16b, v29.16b, #1");
  // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, QReg::q30, QReg::q29, 7),   "fcvtzu v30.16b, v29.16b, #7");
  TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.8h, v29.8h, #1");
  TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, QReg::q30, QReg::q29, 15), "fcvtzu v30.8h, v29.8h, #15");
  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.4s, v29.4s, #1");
  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, QReg::q30, QReg::q29, 31), "fcvtzu v30.4s, v29.4s, #31");
  TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 1), "fcvtzu v30.2d, v29.2d, #1");
  TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, QReg::q30, QReg::q29, 63), "fcvtzu v30.2d, v29.2d, #63");

  // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 1),   "fcvtzu v30.8b, v29.8b, #1");
  // TEST_SINGLE(fcvtzu(SubRegSize::i8Bit, DReg::d30, DReg::d29, 7),   "fcvtzu v30.8b, v29.8b, #7");
  TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.4h, v29.4h, #1");
  TEST_SINGLE(fcvtzu(SubRegSize::i16Bit, DReg::d30, DReg::d29, 15), "fcvtzu v30.4h, v29.4h, #15");
  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 1), "fcvtzu v30.2s, v29.2s, #1");
  TEST_SINGLE(fcvtzu(SubRegSize::i32Bit, DReg::d30, DReg::d29, 31), "fcvtzu v30.2s, v29.2s, #31");
  // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 1),  "fcvtzu v30.1d, v29.1d, #1");
  // TEST_SINGLE(fcvtzu(SubRegSize::i64Bit, DReg::d30, DReg::d29, 63), "fcvtzu v30.1d, v29.1d, #63");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Advanced SIMD vector x indexed element") {
  TEST_SINGLE(smlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(smlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlal v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(smlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlal v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(smlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(smlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlal2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlal2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlal2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlal2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(smlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlal2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(sqdmlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(sqdmlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlal v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(sqdmlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlal v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(sqdmlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(sqdmlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlal2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlal2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(sqdmlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlal2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(smlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(smlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlsl v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(smlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlsl v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(smlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(smlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smlsl2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smlsl2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smlsl2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smlsl2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(smlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smlsl2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(sqdmlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(sqdmlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlsl v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(sqdmlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlsl v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(sqdmlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(sqdmlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlsl2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmlsl2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(sqdmlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmlsl2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(mul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mul v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(mul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mul v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mul v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mul v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mul v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(mul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mul v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(mul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mul v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(mul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mul v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mul v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mul v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mul v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(mul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mul v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(smull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(smull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smull v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(smull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smull v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(smull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(smull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "smull2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "smull2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "smull2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "smull2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(smull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "smull2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(sqdmull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(sqdmull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmull v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(sqdmull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmull v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(sqdmull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(sqdmull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmull2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "sqdmull2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(sqdmull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "sqdmull2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqdmulh v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqdmulh v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqdmulh v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqdmulh v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqdmulh v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqdmulh v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqdmulh v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqdmulh v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqdmulh v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqdmulh v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqdmulh v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqdmulh v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmulh v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmulh v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmulh v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmulh v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmulh v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmulh v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmulh v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmulh v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmulh v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmulh v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmulh v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmulh v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(sdot(QReg::q30, QReg::q29, QReg::q28, 0), "sdot v30.4s, v29.16b, v28.4b[0]");
  TEST_SINGLE(sdot(QReg::q30, QReg::q29, QReg::q28, 3), "sdot v30.4s, v29.16b, v28.4b[3]");

  TEST_SINGLE(sdot(QReg::q30, QReg::q29, QReg::q15, 0), "sdot v30.4s, v29.16b, v15.4b[0]");
  TEST_SINGLE(sdot(QReg::q30, QReg::q29, QReg::q15, 3), "sdot v30.4s, v29.16b, v15.4b[3]");

  TEST_SINGLE(sdot(DReg::d30, DReg::d29, DReg::d28, 0), "sdot v30.2s, v29.8b, v28.4b[0]");
  TEST_SINGLE(sdot(DReg::d30, DReg::d29, DReg::d28, 3), "sdot v30.2s, v29.8b, v28.4b[3]");

  TEST_SINGLE(sdot(DReg::d30, DReg::d29, DReg::d15, 0), "sdot v30.2s, v29.8b, v15.4b[0]");
  TEST_SINGLE(sdot(DReg::d30, DReg::d29, DReg::d15, 3), "sdot v30.2s, v29.8b, v15.4b[3]");

  TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmla v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(fmla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "fmla v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(fmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmla v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(fmla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "fmla v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmls v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(fmls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "fmls v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(fmls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmls v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(fmls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "fmls v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmul v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "fmul v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(fmul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmul v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "fmul v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(sudot(QReg::q30, QReg::q29, QReg::q28, 0), "sudot v30.4s, v29.16b, v28.4b[0]");
  TEST_SINGLE(sudot(QReg::q30, QReg::q29, QReg::q28, 3), "sudot v30.4s, v29.16b, v28.4b[3]");

  TEST_SINGLE(sudot(QReg::q30, QReg::q29, QReg::q15, 0), "sudot v30.4s, v29.16b, v15.4b[0]");
  TEST_SINGLE(sudot(QReg::q30, QReg::q29, QReg::q15, 3), "sudot v30.4s, v29.16b, v15.4b[3]");

  TEST_SINGLE(sudot(DReg::d30, DReg::d29, DReg::d28, 0), "sudot v30.2s, v29.8b, v28.4b[0]");
  TEST_SINGLE(sudot(DReg::d30, DReg::d29, DReg::d28, 3), "sudot v30.2s, v29.8b, v28.4b[3]");

  TEST_SINGLE(sudot(DReg::d30, DReg::d29, DReg::d15, 0), "sudot v30.2s, v29.8b, v15.4b[0]");
  TEST_SINGLE(sudot(DReg::d30, DReg::d29, DReg::d15, 3), "sudot v30.2s, v29.8b, v15.4b[3]");

  // Unimplemented in vixl disassembler
  // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 0), "bfdot v30.4s, v29.8h, v28.2h[0]");
  // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q28, 3), "bfdot v30.4s, v29.8h, v28.2h[3]");

  // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 0), "bfdot v30.4s, v29.8h, v15.2h[0]");
  // TEST_SINGLE(bfdot(QReg::q30, QReg::q29, QReg::q15, 3), "bfdot v30.4s, v29.8h, v15.2h[3]");

  // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 0), "bfdot v30.2s, v29.4h, v28.2h[0]");
  // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d28, 3), "bfdot v30.2s, v29.4h, v28.2h[3]");

  // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 0), "bfdot v30.2s, v29.4h, v15.2h[0]");
  // TEST_SINGLE(bfdot(DReg::d30, DReg::d29, DReg::d15, 3), "bfdot v30.2s, v29.4h, v15.2h[3]");

  TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmla v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmla v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(fmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmla v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "fmla v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmla v30.2d, v29.2d, v15.d[0]");
  TEST_SINGLE(fmla(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmla v30.2d, v29.2d, v15.d[1]");

  // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmla v30.1d, v29.1d, v15.d[0]");
  // TEST_SINGLE(fmla(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmla v30.1d, v29.1d, v15.d[1]");

  TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmls v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmls v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(fmls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmls v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "fmls v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmls v30.2d, v29.2d, v15.d[0]");
  TEST_SINGLE(fmls(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmls v30.2d, v29.2d, v15.d[1]");

  // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmls v30.1d, v29.1d, v15.d[0]");
  // TEST_SINGLE(fmls(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmls v30.1d, v29.1d, v15.d[1]");

  TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmul v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "fmul v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(fmul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmul v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "fmul v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 0), "fmul v30.2d, v29.2d, v15.d[0]");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, QReg::q30, QReg::q29, QReg::q15, 1), "fmul v30.2d, v29.2d, v15.d[1]");

  // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 0), "fmul v30.1d, v29.1d, v15.d[0]");
  // TEST_SINGLE(fmul(SubRegSize::i64Bit, DReg::d30, DReg::d29, DReg::d15, 1), "fmul v30.1d, v29.1d, v15.d[1]");

  TEST_SINGLE(fmlal(QReg::q30, QReg::q29, QReg::q15, 0), "fmlal v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(fmlal(QReg::q30, QReg::q29, QReg::q15, 7), "fmlal v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(fmlal(DReg::d30, DReg::d29, DReg::d15, 0), "fmlal v30.2s, v29.2h, v15.h[0]");
  TEST_SINGLE(fmlal(DReg::d30, DReg::d29, DReg::d15, 7), "fmlal v30.2s, v29.2h, v15.h[7]");

  TEST_SINGLE(fmlal2(QReg::q30, QReg::q29, QReg::q15, 0), "fmlal2 v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(fmlal2(QReg::q30, QReg::q29, QReg::q15, 7), "fmlal2 v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(fmlal2(DReg::d30, DReg::d29, DReg::d15, 0), "fmlal2 v30.2s, v29.2h, v15.h[0]");
  TEST_SINGLE(fmlal2(DReg::d30, DReg::d29, DReg::d15, 7), "fmlal2 v30.2s, v29.2h, v15.h[7]");

  TEST_SINGLE(fmlsl(QReg::q30, QReg::q29, QReg::q15, 0), "fmlsl v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(fmlsl(QReg::q30, QReg::q29, QReg::q15, 7), "fmlsl v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(fmlsl(DReg::d30, DReg::d29, DReg::d15, 0), "fmlsl v30.2s, v29.2h, v15.h[0]");
  TEST_SINGLE(fmlsl(DReg::d30, DReg::d29, DReg::d15, 7), "fmlsl v30.2s, v29.2h, v15.h[7]");

  TEST_SINGLE(fmlsl2(QReg::q30, QReg::q29, QReg::q15, 0), "fmlsl2 v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(fmlsl2(QReg::q30, QReg::q29, QReg::q15, 7), "fmlsl2 v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(fmlsl2(DReg::d30, DReg::d29, DReg::d15, 0), "fmlsl2 v30.2s, v29.2h, v15.h[0]");
  TEST_SINGLE(fmlsl2(DReg::d30, DReg::d29, DReg::d15, 7), "fmlsl2 v30.2s, v29.2h, v15.h[7]");

  TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q28, 0), "usdot v30.4s, v29.16b, v28.4b[0]");
  TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q28, 3), "usdot v30.4s, v29.16b, v28.4b[3]");

  TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q15, 0), "usdot v30.4s, v29.16b, v15.4b[0]");
  TEST_SINGLE(usdot(QReg::q30, QReg::q29, QReg::q15, 3), "usdot v30.4s, v29.16b, v15.4b[3]");

  TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d28, 0), "usdot v30.2s, v29.8b, v28.4b[0]");
  TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d28, 3), "usdot v30.2s, v29.8b, v28.4b[3]");

  TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d15, 0), "usdot v30.2s, v29.8b, v15.4b[0]");
  TEST_SINGLE(usdot(DReg::d30, DReg::d29, DReg::d15, 3), "usdot v30.2s, v29.8b, v15.4b[3]");

  // Unimplemented in vixl disassembler
  // TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalb v30.4s, v29.8h, v15.h[0]");
  // TEST_SINGLE(bfmlalb(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalb v30.4s, v29.8h, v15.h[7]");

  // TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 0), "bfmlalt v30.4s, v29.8h, v15.h[0]");
  // TEST_SINGLE(bfmlalt(VReg::v30, VReg::v29, VReg::v15, 7), "bfmlalt v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mla v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(mla(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mla v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mla v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mla v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mla v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(mla(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mla v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(mla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mla v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(mla(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mla v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mla v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mla v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mla v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(mla(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mla v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(umlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(umlal(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlal v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(umlal(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlal v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(umlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(umlal2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlal2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlal2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlal2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlal2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(umlal2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlal2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(mls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mls v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(mls(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "mls v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "mls v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "mls v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "mls v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(mls(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "mls v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(mls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mls v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(mls(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "mls v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "mls v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "mls v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "mls v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(mls(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "mls v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(umlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(umlsl(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlsl v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(umlsl(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlsl v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(umlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(umlsl2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umlsl2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umlsl2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umlsl2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umlsl2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(umlsl2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umlsl2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(umull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull v30.4s, v29.4h, v15.h[0]");
  TEST_SINGLE(umull(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umull v30.4s, v29.4h, v15.h[7]");

  TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull v30.2d, v29.2s, v28.s[0]");
  TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull v30.2d, v29.2s, v28.s[3]");

  TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull v30.2d, v29.2s, v15.s[0]");
  TEST_SINGLE(umull(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umull v30.2d, v29.2s, v15.s[3]");

  TEST_SINGLE(umull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull2 v30.4s, v29.8h, v15.h[0]");
  TEST_SINGLE(umull2(SubRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v15, 7), "umull2 v30.4s, v29.8h, v15.h[7]");

  TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "umull2 v30.2d, v29.4s, v28.s[0]");
  TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 3), "umull2 v30.2d, v29.4s, v28.s[3]");

  TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 0), "umull2 v30.2d, v29.4s, v15.s[0]");
  TEST_SINGLE(umull2(SubRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v15, 3), "umull2 v30.2d, v29.4s, v15.s[3]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlah v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmlah v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlah v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlah v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlah v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmlah v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlah v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmlah v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlah v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlah v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlah v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmlah v30.2s, v29.2s, v15.s[3]");

  TEST_SINGLE(udot(QReg::q30, QReg::q29, QReg::q28, 0), "udot v30.4s, v29.16b, v28.4b[0]");
  TEST_SINGLE(udot(QReg::q30, QReg::q29, QReg::q28, 3), "udot v30.4s, v29.16b, v28.4b[3]");

  TEST_SINGLE(udot(QReg::q30, QReg::q29, QReg::q15, 0), "udot v30.4s, v29.16b, v15.4b[0]");
  TEST_SINGLE(udot(QReg::q30, QReg::q29, QReg::q15, 3), "udot v30.4s, v29.16b, v15.4b[3]");

  TEST_SINGLE(udot(DReg::d30, DReg::d29, DReg::d28, 0), "udot v30.2s, v29.8b, v28.4b[0]");
  TEST_SINGLE(udot(DReg::d30, DReg::d29, DReg::d28, 3), "udot v30.2s, v29.8b, v28.4b[3]");

  TEST_SINGLE(udot(DReg::d30, DReg::d29, DReg::d15, 0), "udot v30.2s, v29.8b, v15.4b[0]");
  TEST_SINGLE(udot(DReg::d30, DReg::d29, DReg::d15, 3), "udot v30.2s, v29.8b, v15.4b[3]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlsh v30.8h, v29.8h, v15.h[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, QReg::q30, QReg::q29, QReg::q15, 7), "sqrdmlsh v30.8h, v29.8h, v15.h[7]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 0), "sqrdmlsh v30.4s, v29.4s, v28.s[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q28, 3), "sqrdmlsh v30.4s, v29.4s, v28.s[3]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 0), "sqrdmlsh v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, QReg::q30, QReg::q29, QReg::q15, 3), "sqrdmlsh v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlsh v30.4h, v29.4h, v15.h[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, DReg::d30, DReg::d29, DReg::d15, 7), "sqrdmlsh v30.4h, v29.4h, v15.h[7]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 0), "sqrdmlsh v30.2s, v29.2s, v28.s[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d28, 3), "sqrdmlsh v30.2s, v29.2s, v28.s[3]");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 0), "sqrdmlsh v30.2s, v29.2s, v15.s[0]");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, DReg::d30, DReg::d29, DReg::d15, 3), "sqrdmlsh v30.2s, v29.2s, v15.s[3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic three-register, imm2") {
  TEST_SINGLE(sm3tt1a(VReg::v30, VReg::v29, VReg::v15, 0), "sm3tt1a v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sm3tt1a(VReg::v30, VReg::v29, VReg::v15, 1), "sm3tt1a v30.4s, v29.4s, v15.s[1]");
  TEST_SINGLE(sm3tt1a(VReg::v30, VReg::v29, VReg::v15, 2), "sm3tt1a v30.4s, v29.4s, v15.s[2]");
  TEST_SINGLE(sm3tt1a(VReg::v30, VReg::v29, VReg::v15, 3), "sm3tt1a v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sm3tt1b(VReg::v30, VReg::v29, VReg::v15, 0), "sm3tt1b v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sm3tt1b(VReg::v30, VReg::v29, VReg::v15, 1), "sm3tt1b v30.4s, v29.4s, v15.s[1]");
  TEST_SINGLE(sm3tt1b(VReg::v30, VReg::v29, VReg::v15, 2), "sm3tt1b v30.4s, v29.4s, v15.s[2]");
  TEST_SINGLE(sm3tt1b(VReg::v30, VReg::v29, VReg::v15, 3), "sm3tt1b v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sm3tt2a(VReg::v30, VReg::v29, VReg::v15, 0), "sm3tt2a v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sm3tt2a(VReg::v30, VReg::v29, VReg::v15, 1), "sm3tt2a v30.4s, v29.4s, v15.s[1]");
  TEST_SINGLE(sm3tt2a(VReg::v30, VReg::v29, VReg::v15, 2), "sm3tt2a v30.4s, v29.4s, v15.s[2]");
  TEST_SINGLE(sm3tt2a(VReg::v30, VReg::v29, VReg::v15, 3), "sm3tt2a v30.4s, v29.4s, v15.s[3]");

  TEST_SINGLE(sm3tt2b(VReg::v30, VReg::v29, VReg::v15, 0), "sm3tt2b v30.4s, v29.4s, v15.s[0]");
  TEST_SINGLE(sm3tt2b(VReg::v30, VReg::v29, VReg::v15, 1), "sm3tt2b v30.4s, v29.4s, v15.s[1]");
  TEST_SINGLE(sm3tt2b(VReg::v30, VReg::v29, VReg::v15, 2), "sm3tt2b v30.4s, v29.4s, v15.s[2]");
  TEST_SINGLE(sm3tt2b(VReg::v30, VReg::v29, VReg::v15, 3), "sm3tt2b v30.4s, v29.4s, v15.s[3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic three-register SHA 512") {
  TEST_SINGLE(sha512h(VReg::v30, VReg::v29, VReg::v15), "sha512h q30, q29, v15.2d");
  TEST_SINGLE(sha512h2(VReg::v30, VReg::v29, VReg::v15), "sha512h2 q30, q29, v15.2d");
  TEST_SINGLE(sha512su1(VReg::v30, VReg::v29, VReg::v15), "sha512su1 v30.2d, v29.2d, v15.2d");
  TEST_SINGLE(rax1(VReg::v30, VReg::v29, VReg::v15), "rax1 v30.2d, v29.2d, v15.2d");
  TEST_SINGLE(sm3partw1(VReg::v30, VReg::v29, VReg::v15), "sm3partw1 v30.4s, v29.4s, v15.4s");
  TEST_SINGLE(sm3partw2(VReg::v30, VReg::v29, VReg::v15), "sm3partw2 v30.4s, v29.4s, v15.4s");
  TEST_SINGLE(sm4ekey(VReg::v30, VReg::v29, VReg::v15), "sm4ekey v30.4s, v29.4s, v15.4s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic four-register") {
  TEST_SINGLE(eor3(VReg::v30, VReg::v29, VReg::v15, VReg::v7), "eor3 v30.16b, v29.16b, v15.16b, v7.16b");
  TEST_SINGLE(bcax(VReg::v30, VReg::v29, VReg::v15, VReg::v7), "bcax v30.16b, v29.16b, v15.16b, v7.16b");
  TEST_SINGLE(sm3ss1(VReg::v30, VReg::v29, VReg::v15, VReg::v7), "sm3ss1 v30.4s, v29.4s, v15.4s, v7.4s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Cryptographic two-register SHA 512") {
  TEST_SINGLE(sha512su0(VReg::v30, VReg::v29), "sha512su0 v30.2d, v29.2d");
  TEST_SINGLE(sm4e(VReg::v30, VReg::v29), "sm4e v30.4s, v29.4s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating-point and fixed-point") {
  TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf h29, w30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf h29, w30, #32");
  TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf s29, w30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf s29, w30, #32");
  TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "scvtf d29, w30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "scvtf d29, w30, #32");

  TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf h29, x30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf h29, x30, #64");
  TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf s29, x30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf s29, x30, #64");
  TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "scvtf d29, x30, #1");
  TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "scvtf d29, x30, #64");

  TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf h29, w30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf h29, w30, #32");
  TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf s29, w30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf s29, w30, #32");
  TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 1), "ucvtf d29, w30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i32Bit, Reg::r30, 32), "ucvtf d29, w30, #32");

  TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf h29, x30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf h29, x30, #64");
  TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf s29, x30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf s29, x30, #64");
  TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 1), "ucvtf d29, x30, #1");
  TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v29, Size::i64Bit, Reg::r30, 64), "ucvtf d29, x30, #64");

  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs w30, h29, #1");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 32), "fcvtzs w30, h29, #32");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs w30, s29, #1");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 32), "fcvtzs w30, s29, #32");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs w30, d29, #1");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 32), "fcvtzs w30, d29, #32");

  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzs x30, h29, #1");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 64), "fcvtzs x30, h29, #64");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzs x30, s29, #1");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 64), "fcvtzs x30, s29, #64");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzs x30, d29, #1");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 64), "fcvtzs x30, d29, #64");

  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu w30, h29, #1");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 32), "fcvtzu w30, h29, #32");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu w30, s29, #1");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 32), "fcvtzu w30, s29, #32");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu w30, d29, #1");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 32), "fcvtzu w30, d29, #32");

  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 1), "fcvtzu x30, h29, #1");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i16Bit, VReg::v29, 64), "fcvtzu x30, h29, #64");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 1), "fcvtzu x30, s29, #1");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i32Bit, VReg::v29, 64), "fcvtzu x30, s29, #64");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 1), "fcvtzu x30, d29, #1");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r30, ScalarRegSize::i64Bit, VReg::v29, 64), "fcvtzu x30, d29, #64");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: ASIMD: Conversion between floating-point and integer") {
  TEST_SINGLE(fcvtns(Size::i32Bit, Reg::r29, HReg::h30), "fcvtns w29, h30");
  TEST_SINGLE(fcvtns(Size::i64Bit, Reg::r29, HReg::h30), "fcvtns x29, h30");
  TEST_SINGLE(fcvtns(Size::i32Bit, Reg::r29, SReg::s30), "fcvtns w29, s30");
  TEST_SINGLE(fcvtns(Size::i64Bit, Reg::r29, SReg::s30), "fcvtns x29, s30");
  TEST_SINGLE(fcvtns(Size::i32Bit, Reg::r29, DReg::d30), "fcvtns w29, d30");
  TEST_SINGLE(fcvtns(Size::i64Bit, Reg::r29, DReg::d30), "fcvtns x29, d30");

  TEST_SINGLE(fcvtnu(Size::i32Bit, Reg::r29, HReg::h30), "fcvtnu w29, h30");
  TEST_SINGLE(fcvtnu(Size::i64Bit, Reg::r29, HReg::h30), "fcvtnu x29, h30");
  TEST_SINGLE(fcvtnu(Size::i32Bit, Reg::r29, SReg::s30), "fcvtnu w29, s30");
  TEST_SINGLE(fcvtnu(Size::i64Bit, Reg::r29, SReg::s30), "fcvtnu x29, s30");
  TEST_SINGLE(fcvtnu(Size::i32Bit, Reg::r29, DReg::d30), "fcvtnu w29, d30");
  TEST_SINGLE(fcvtnu(Size::i64Bit, Reg::r29, DReg::d30), "fcvtnu x29, d30");

  TEST_SINGLE(scvtf(Size::i32Bit, HReg::h30, Reg::r29), "scvtf h30, w29");
  TEST_SINGLE(scvtf(Size::i64Bit, HReg::h30, Reg::r29), "scvtf h30, x29");
  TEST_SINGLE(scvtf(Size::i32Bit, SReg::s30, Reg::r29), "scvtf s30, w29");
  TEST_SINGLE(scvtf(Size::i64Bit, SReg::s30, Reg::r29), "scvtf s30, x29");
  TEST_SINGLE(scvtf(Size::i32Bit, DReg::d30, Reg::r29), "scvtf d30, w29");
  TEST_SINGLE(scvtf(Size::i64Bit, DReg::d30, Reg::r29), "scvtf d30, x29");

  TEST_SINGLE(ucvtf(Size::i32Bit, HReg::h30, Reg::r29), "ucvtf h30, w29");
  TEST_SINGLE(ucvtf(Size::i64Bit, HReg::h30, Reg::r29), "ucvtf h30, x29");
  TEST_SINGLE(ucvtf(Size::i32Bit, SReg::s30, Reg::r29), "ucvtf s30, w29");
  TEST_SINGLE(ucvtf(Size::i64Bit, SReg::s30, Reg::r29), "ucvtf s30, x29");
  TEST_SINGLE(ucvtf(Size::i32Bit, DReg::d30, Reg::r29), "ucvtf d30, w29");
  TEST_SINGLE(ucvtf(Size::i64Bit, DReg::d30, Reg::r29), "ucvtf d30, x29");

  TEST_SINGLE(fcvtas(Size::i32Bit, Reg::r29, HReg::h30), "fcvtas w29, h30");
  TEST_SINGLE(fcvtas(Size::i64Bit, Reg::r29, HReg::h30), "fcvtas x29, h30");
  TEST_SINGLE(fcvtas(Size::i32Bit, Reg::r29, SReg::s30), "fcvtas w29, s30");
  TEST_SINGLE(fcvtas(Size::i64Bit, Reg::r29, SReg::s30), "fcvtas x29, s30");
  TEST_SINGLE(fcvtas(Size::i32Bit, Reg::r29, DReg::d30), "fcvtas w29, d30");
  TEST_SINGLE(fcvtas(Size::i64Bit, Reg::r29, DReg::d30), "fcvtas x29, d30");

  TEST_SINGLE(fcvtau(Size::i32Bit, Reg::r29, HReg::h30), "fcvtau w29, h30");
  TEST_SINGLE(fcvtau(Size::i64Bit, Reg::r29, HReg::h30), "fcvtau x29, h30");
  TEST_SINGLE(fcvtau(Size::i32Bit, Reg::r29, SReg::s30), "fcvtau w29, s30");
  TEST_SINGLE(fcvtau(Size::i64Bit, Reg::r29, SReg::s30), "fcvtau x29, s30");
  TEST_SINGLE(fcvtau(Size::i32Bit, Reg::r29, DReg::d30), "fcvtau w29, d30");
  TEST_SINGLE(fcvtau(Size::i64Bit, Reg::r29, DReg::d30), "fcvtau x29, d30");

  TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, HReg::h30), "fmov w29, h30");
  TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, HReg::h30), "fmov x29, h30");
  TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, SReg::s30), "fmov w29, s30");
  // TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, SReg::s30), "fmov x29, s30");
  // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, DReg::d30), "fmov w29, d30");
  TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, DReg::d30), "fmov x29, d30");

  // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, false), "fmov w29, s30");
  TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, VReg::v30, false), "fmov x29, d30");

  // TEST_SINGLE(fmov(Size::i32Bit, Reg::r29, VReg::v30, true), "fmov w29, s30");
  TEST_SINGLE(fmov(Size::i64Bit, Reg::r29, VReg::v30, true), "fmov x29, v30.D[1]");

  TEST_SINGLE(fmov(Size::i32Bit, HReg::h30, Reg::r29), "fmov h30, w29");
  TEST_SINGLE(fmov(Size::i64Bit, HReg::h30, Reg::r29), "fmov h30, x29");
  TEST_SINGLE(fmov(Size::i32Bit, SReg::s30, Reg::r29), "fmov s30, w29");
  // TEST_SINGLE(fmov(Size::i64Bit, SReg::s30, Reg::r29), "fmov s30, x29");
  // TEST_SINGLE(fmov(Size::i32Bit, DReg::d30, Reg::r29), "fmov d30, w29");
  TEST_SINGLE(fmov(Size::i64Bit, DReg::d30, Reg::r29), "fmov d30, x29");

  // TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, false), "fmov s30, w29");
  TEST_SINGLE(fmov(Size::i64Bit, VReg::v30, Reg::r29, false), "fmov d30, x29");

  // TEST_SINGLE(fmov(Size::i32Bit, VReg::v30, Reg::r29, true), "fmov d30, x29");
  TEST_SINGLE(fmov(Size::i64Bit, VReg::v30, Reg::r29, true), "fmov v30.D[1], x29");

  TEST_SINGLE(fcvtps(Size::i32Bit, Reg::r29, HReg::h30), "fcvtps w29, h30");
  TEST_SINGLE(fcvtps(Size::i64Bit, Reg::r29, HReg::h30), "fcvtps x29, h30");
  TEST_SINGLE(fcvtps(Size::i32Bit, Reg::r29, SReg::s30), "fcvtps w29, s30");
  TEST_SINGLE(fcvtps(Size::i64Bit, Reg::r29, SReg::s30), "fcvtps x29, s30");
  TEST_SINGLE(fcvtps(Size::i32Bit, Reg::r29, DReg::d30), "fcvtps w29, d30");
  TEST_SINGLE(fcvtps(Size::i64Bit, Reg::r29, DReg::d30), "fcvtps x29, d30");

  TEST_SINGLE(fcvtpu(Size::i32Bit, Reg::r29, HReg::h30), "fcvtpu w29, h30");
  TEST_SINGLE(fcvtpu(Size::i64Bit, Reg::r29, HReg::h30), "fcvtpu x29, h30");
  TEST_SINGLE(fcvtpu(Size::i32Bit, Reg::r29, SReg::s30), "fcvtpu w29, s30");
  TEST_SINGLE(fcvtpu(Size::i64Bit, Reg::r29, SReg::s30), "fcvtpu x29, s30");
  TEST_SINGLE(fcvtpu(Size::i32Bit, Reg::r29, DReg::d30), "fcvtpu w29, d30");
  TEST_SINGLE(fcvtpu(Size::i64Bit, Reg::r29, DReg::d30), "fcvtpu x29, d30");

  TEST_SINGLE(fcvtms(Size::i32Bit, Reg::r29, HReg::h30), "fcvtms w29, h30");
  TEST_SINGLE(fcvtms(Size::i64Bit, Reg::r29, HReg::h30), "fcvtms x29, h30");
  TEST_SINGLE(fcvtms(Size::i32Bit, Reg::r29, SReg::s30), "fcvtms w29, s30");
  TEST_SINGLE(fcvtms(Size::i64Bit, Reg::r29, SReg::s30), "fcvtms x29, s30");
  TEST_SINGLE(fcvtms(Size::i32Bit, Reg::r29, DReg::d30), "fcvtms w29, d30");
  TEST_SINGLE(fcvtms(Size::i64Bit, Reg::r29, DReg::d30), "fcvtms x29, d30");

  TEST_SINGLE(fcvtmu(Size::i32Bit, Reg::r29, HReg::h30), "fcvtmu w29, h30");
  TEST_SINGLE(fcvtmu(Size::i64Bit, Reg::r29, HReg::h30), "fcvtmu x29, h30");
  TEST_SINGLE(fcvtmu(Size::i32Bit, Reg::r29, SReg::s30), "fcvtmu w29, s30");
  TEST_SINGLE(fcvtmu(Size::i64Bit, Reg::r29, SReg::s30), "fcvtmu x29, s30");
  TEST_SINGLE(fcvtmu(Size::i32Bit, Reg::r29, DReg::d30), "fcvtmu w29, d30");
  TEST_SINGLE(fcvtmu(Size::i64Bit, Reg::r29, DReg::d30), "fcvtmu x29, d30");

  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r29, HReg::h30), "fcvtzs w29, h30");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r29, HReg::h30), "fcvtzs x29, h30");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r29, SReg::s30), "fcvtzs w29, s30");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r29, SReg::s30), "fcvtzs x29, s30");
  TEST_SINGLE(fcvtzs(Size::i32Bit, Reg::r29, DReg::d30), "fcvtzs w29, d30");
  TEST_SINGLE(fcvtzs(Size::i64Bit, Reg::r29, DReg::d30), "fcvtzs x29, d30");

  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r29, HReg::h30), "fcvtzu w29, h30");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r29, HReg::h30), "fcvtzu x29, h30");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r29, SReg::s30), "fcvtzu w29, s30");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r29, SReg::s30), "fcvtzu x29, s30");
  TEST_SINGLE(fcvtzu(Size::i32Bit, Reg::r29, DReg::d30), "fcvtzu w29, d30");
  TEST_SINGLE(fcvtzu(Size::i64Bit, Reg::r29, DReg::d30), "fcvtzu x29, d30");
}


================================================
FILE: FEXCore/unittests/Emitter/Branch_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Conditional branch immediate") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)b(Condition::CC_PL, &Label);

    CHECK(DisassembleEncoding(1) == 0x54ffffe5);
  }

  {
    ForwardLabel Label;
    (void)b(Condition::CC_PL, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x54000025);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)b(Condition::CC_PL, &Label);

    CHECK(DisassembleEncoding(1) == 0x54ffffe5);
  }

  {
    BiDirectionalLabel Label;
    (void)b(Condition::CC_PL, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x54000025);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Branch consistent conditional") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)bc(Condition::CC_PL, &Label);

    CHECK(DisassembleEncoding(1) == 0x54fffff5);
  }

  {
    ForwardLabel Label;
    (void)bc(Condition::CC_PL, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x54000035);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)bc(Condition::CC_PL, &Label);

    CHECK(DisassembleEncoding(1) == 0x54fffff5);
  }

  {
    BiDirectionalLabel Label;
    (void)bc(Condition::CC_PL, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x54000035);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Unconditional branch register") {
  TEST_SINGLE(br(Reg::r29), "br x29");
  TEST_SINGLE(blr(Reg::r29), "blr x29");
  TEST_SINGLE(ret(), "ret");
  TEST_SINGLE(ret(Reg::r29), "ret x29");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Unconditional branch immediate") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)b(&Label);

    CHECK(DisassembleEncoding(1) == 0x17ffffff);
  }

  {
    ForwardLabel Label;
    (void)b(&Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x14000001);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)b(&Label);

    CHECK(DisassembleEncoding(1) == 0x17ffffff);
  }

  {
    BiDirectionalLabel Label;
    (void)b(&Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x14000001);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)bl(&Label);

    CHECK(DisassembleEncoding(1) == 0x97ffffff);
  }

  {
    ForwardLabel Label;
    (void)bl(&Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x94000001);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)bl(&Label);

    CHECK(DisassembleEncoding(1) == 0x97ffffff);
  }

  {
    BiDirectionalLabel Label;
    (void)bl(&Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x94000001);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Compare and branch") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbz(Size::i32Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0x34fffffd);
  }

  {
    ForwardLabel Label;
    (void)cbz(Size::i32Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3400003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbz(Size::i32Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0x34fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)cbz(Size::i32Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3400003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbz(Size::i64Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0xb4fffffd);
  }

  {
    ForwardLabel Label;
    (void)cbz(Size::i64Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb400003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbz(Size::i64Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0xb4fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)cbz(Size::i64Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb400003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbnz(Size::i32Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0x35fffffd);
  }

  {
    ForwardLabel Label;
    (void)cbnz(Size::i32Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3500003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbnz(Size::i32Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0x35fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)cbnz(Size::i32Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3500003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbnz(Size::i64Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0xb5fffffd);
  }

  {
    ForwardLabel Label;
    (void)cbnz(Size::i64Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb500003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)cbnz(Size::i64Bit, Reg::r29, &Label);

    CHECK(DisassembleEncoding(1) == 0xb5fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)cbnz(Size::i64Bit, Reg::r29, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb500003d);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Branch: Test and branch immediate") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbz(Reg::r29, 0, &Label);

    CHECK(DisassembleEncoding(1) == 0x3607fffd);
  }

  {
    ForwardLabel Label;
    (void)tbz(Reg::r29, 0, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3600003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbz(Reg::r29, 0, &Label);

    CHECK(DisassembleEncoding(1) == 0x3607fffd);
  }

  {
    BiDirectionalLabel Label;
    (void)tbz(Reg::r29, 0, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3600003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbz(Reg::r29, 63, &Label);

    CHECK(DisassembleEncoding(1) == 0xb6fffffd);
  }

  {
    ForwardLabel Label;
    (void)tbz(Reg::r29, 63, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb6f8003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbz(Reg::r29, 63, &Label);

    CHECK(DisassembleEncoding(1) == 0xb6fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)tbz(Reg::r29, 63, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb6f8003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbnz(Reg::r29, 0, &Label);

    CHECK(DisassembleEncoding(1) == 0x3707fffd);
  }

  {
    ForwardLabel Label;
    (void)tbnz(Reg::r29, 0, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3700003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbnz(Reg::r29, 0, &Label);

    CHECK(DisassembleEncoding(1) == 0x3707fffd);
  }

  {
    BiDirectionalLabel Label;
    (void)tbnz(Reg::r29, 0, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x3700003d);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbnz(Reg::r29, 63, &Label);

    CHECK(DisassembleEncoding(1) == 0xb7fffffd);
  }

  {
    ForwardLabel Label;
    (void)tbnz(Reg::r29, 63, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb7f8003d);
  }

  {
    BiDirectionalLabel Label;
    (void)Bind(&Label);
    dc32(0);
    (void)tbnz(Reg::r29, 63, &Label);

    CHECK(DisassembleEncoding(1) == 0xb7fffffd);
  }

  {
    BiDirectionalLabel Label;
    (void)tbnz(Reg::r29, 63, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xb7f8003d);
  }
}


================================================
FILE: FEXCore/unittests/Emitter/CMakeLists.txt
================================================
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS *.cpp)

set(LIBS fmt::fmt vixl::vixl Catch2::Catch2WithMain FEXCore_Base JemallocLibs)
foreach(TEST ${TESTS})
  get_filename_component(TEST_NAME ${TEST} NAME_WLE)
  add_executable(Emitter_${TEST_NAME} ${TEST})
  target_link_libraries(Emitter_${TEST_NAME} PRIVATE ${LIBS})
  target_include_directories(Emitter_${TEST_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../Source/")
  set_target_properties(Emitter_${TEST_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/EmitterTests")
  catch_discover_tests(Emitter_${TEST_NAME} TEST_SUFFIX ".${TEST_NAME}.Emitter")
endforeach()

add_custom_target(emitter_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.Emitter$$")


================================================
FILE: FEXCore/unittests/Emitter/Loadstore_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Compare and swap pair") {
  TEST_SINGLE(casp(Size::i32Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "casp w28, w29, w26, w27, [x30]");
  TEST_SINGLE(casp(Size::i64Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "casp x28, x29, x26, x27, [x30]");

  TEST_SINGLE(caspa(Size::i32Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspa w28, w29, w26, w27, [x30]");
  TEST_SINGLE(caspa(Size::i64Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspa x28, x29, x26, x27, [x30]");

  TEST_SINGLE(caspl(Size::i32Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspl w28, w29, w26, w27, [x30]");
  TEST_SINGLE(caspl(Size::i64Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspl x28, x29, x26, x27, [x30]");

  TEST_SINGLE(caspal(Size::i32Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspal w28, w29, w26, w27, [x30]");
  TEST_SINGLE(caspal(Size::i64Bit, Reg::r28, Reg::r29, Reg::r26, Reg::r27, Reg::r30), "caspal x28, x29, x26, x27, [x30]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store multiple structures") {
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, Reg::r30), "ld1 {v26.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, Reg::r30), "ld1 {v26.8b}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, Reg::r30), "ld1 {v26.8h}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, Reg::r30), "ld1 {v26.4h}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, Reg::r30), "ld1 {v26.4s}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, Reg::r30), "ld1 {v26.2s}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, Reg::r30), "ld1 {v26.2d}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, Reg::r30), "ld1 {v26.1d}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30), "ld1 {v31.16b, v0.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30), "ld1 {v31.8b, v0.8b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.16b, v27.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.8b, v27.8b}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.8h, v27.8h}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.4h, v27.4h}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.4s, v27.4s}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.2s, v27.2s}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30), "ld1 {v26.2d, v27.2d}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30), "ld1 {v26.1d, v27.1d}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.16b, v27.16b, v28.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.8b, v27.8b, v28.8b}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.8h, v27.8h, v28.8h}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.4h, v27.4h, v28.4h}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.4s, v27.4s, v28.4s}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.2s, v27.2s, v28.2s}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld1 {v26.2d, v27.2d, v28.2d}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld1 {v26.1d, v27.1d, v28.1d}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                            "[x30]");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30]");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, Reg::r30), "st1 {v26.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, Reg::r30), "st1 {v26.8b}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, Reg::r30), "st1 {v26.8h}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, Reg::r30), "st1 {v26.4h}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, Reg::r30), "st1 {v26.4s}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, Reg::r30), "st1 {v26.2s}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, Reg::r30), "st1 {v26.2d}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, Reg::r30), "st1 {v26.1d}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30), "st1 {v31.16b, v0.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30), "st1 {v31.8b, v0.8b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.16b, v27.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.8b, v27.8b}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.8h, v27.8h}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.4h, v27.4h}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.4s, v27.4s}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.2s, v27.2s}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30), "st1 {v26.2d, v27.2d}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30), "st1 {v26.1d, v27.1d}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.16b, v27.16b, v28.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.8b, v27.8b, v28.8b}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.8h, v27.8h, v28.8h}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.4h, v27.4h, v28.4h}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.4s, v27.4s, v28.4s}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.2s, v27.2s, v28.2s}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st1 {v26.2d, v27.2d, v28.2d}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st1 {v26.1d, v27.1d, v28.1d}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                            "[x30]");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, [x30]");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30), "ld2 {v31.16b, v0.16b}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30), "ld2 {v31.8b, v0.8b}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.16b, v27.16b}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2 {v26.8b, v27.8b}, [x30]");

  TEST_SINGLE(ld2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.8h, v27.8h}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2 {v26.4h, v27.4h}, [x30]");

  TEST_SINGLE(ld2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.4s, v27.4s}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2 {v26.2s, v27.2s}, [x30]");

  TEST_SINGLE(ld2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2 {v26.2d, v27.2d}, [x30]");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30), "st2 {v31.16b, v0.16b}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30), "st2 {v31.8b, v0.8b}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.16b, v27.16b}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30), "st2 {v26.8b, v27.8b}, [x30]");

  TEST_SINGLE(st2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.8h, v27.8h}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30), "st2 {v26.4h, v27.4h}, [x30]");

  TEST_SINGLE(st2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.4s, v27.4s}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30), "st2 {v26.2s, v27.2s}, [x30]");

  TEST_SINGLE(st2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30), "st2 {v26.2d, v27.2d}, [x30]");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3 {v31.16b, v0.16b, v1.16b}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3 {v31.8b, v0.8b, v1.8b}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.16b, v27.16b, v28.16b}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3 {v26.8b, v27.8b, v28.8b}, [x30]");

  TEST_SINGLE(ld3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.8h, v27.8h, v28.8h}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3 {v26.4h, v27.4h, v28.4h}, [x30]");

  TEST_SINGLE(ld3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.4s, v27.4s, v28.4s}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3 {v26.2s, v27.2s, v28.2s}, [x30]");

  TEST_SINGLE(ld3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3 {v26.2d, v27.2d, v28.2d}, [x30]");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "st3 {v31.16b, v0.16b, v1.16b}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "st3 {v31.8b, v0.8b, v1.8b}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.16b, v27.16b, v28.16b}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st3 {v26.8b, v27.8b, v28.8b}, [x30]");

  TEST_SINGLE(st3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.8h, v27.8h, v28.8h}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st3 {v26.4h, v27.4h, v28.4h}, [x30]");

  TEST_SINGLE(st3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.4s, v27.4s, v28.4s}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "st3 {v26.2s, v27.2s, v28.2s}, [x30]");

  TEST_SINGLE(st3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "st3 {v26.2d, v27.2d, v28.2d}, [x30]");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                            "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]");

  TEST_SINGLE(ld4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30]");

  TEST_SINGLE(ld4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30]");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30]");

  TEST_SINGLE(ld4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                            "[x30]");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, [x30]");

  TEST_SINGLE(st4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, [x30]");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st4 {v26.4h, v27.4h, v28.4h, v29.4h}, [x30]");

  TEST_SINGLE(st4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.4s, v27.4s, v28.4s, v29.4s}, [x30]");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "st4 {v26.2s, v27.2s, v28.2s, v29.2s}, [x30]");

  TEST_SINGLE(st4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "st4 {v26.2d, v27.2d, v28.2d, v29.2d}, [x30]");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "unallocated (NEONLoadStoreMultiStruct)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store multiple structures (post-indexed)") {
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1 {v26.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1 {v26.8b}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1 {v26.8h}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1 {v26.4h}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1 {v26.4s}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1 {v26.2s}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1 {v26.2d}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1 {v26.1d}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, Reg::r30, 16), "ld1 {v26.16b}, [x30], #16");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, Reg::r30, 8), "ld1 {v26.8b}, [x30], #8");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, Reg::r30, 16), "ld1 {v26.8h}, [x30], #16");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, Reg::r30, 8), "ld1 {v26.4h}, [x30], #8");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, Reg::r30, 16), "ld1 {v26.4s}, [x30], #16");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, Reg::r30, 8), "ld1 {v26.2s}, [x30], #8");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, Reg::r30, 16), "ld1 {v26.2d}, [x30], #16");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, Reg::r30, 8), "ld1 {v26.1d}, [x30], #8");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.8h, v27.8h}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.4h, v27.4h}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.4s, v27.4s}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.2s, v27.2s}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, 32), "ld1 {v31.16b, v0.16b}, [x30], #32");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, 16), "ld1 {v31.8b, v0.8b}, [x30], #16");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.16b, v27.16b}, [x30], #32");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.8b, v27.8b}, [x30], #16");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.8h, v27.8h}, [x30], #32");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.4h, v27.4h}, [x30], #16");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.4s, v27.4s}, [x30], #32");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.2s, v27.2s}, [x30], #16");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld1 {v26.2d, v27.2d}, [x30], #32");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld1 {v26.1d, v27.1d}, [x30], #16");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b, v28.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b, v28.8b}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.8h, v27.8h, v28.8h}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.4h, v27.4h, v28.4h}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.4s, v27.4s, v28.4s}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.2s, v27.2s, v28.2s}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d, v28.2d}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d, v28.1d}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld1 {v31.16b, v0.16b, v1.16b}, [x30], #48");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld1 {v31.8b, v0.8b, v1.8b}, [x30], #24");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.16b, v27.16b, v28.16b}, [x30], #48");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.8b, v27.8b, v28.8b}, [x30], #24");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.8h, v27.8h, v28.8h}, [x30], #48");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.4h, v27.4h, v28.4h}, [x30], #24");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.4s, v27.4s, v28.4s}, [x30], #48");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.2s, v27.2s, v28.2s}, [x30], #24");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld1 {v26.2d, v27.2d, v28.2d}, [x30], #48");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld1 {v26.1d, v27.1d, v28.1d}, [x30], #24");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.16b, v27.16b, v28.16b, "
                                                                                                      "v29.16b}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.8b, v27.8b, v28.8b, "
                                                                                                      "v29.8b}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.8h, v27.8h, v28.8h, "
                                                                                                       "v29.8h}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.4h, v27.4h, v28.4h, "
                                                                                                       "v29.4h}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.4s, v27.4s, v28.4s, "
                                                                                                       "v29.4s}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.2s, v27.2s, v28.2s, "
                                                                                                       "v29.2s}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld1 {v26.2d, v27.2d, v28.2d, "
                                                                                                       "v29.2d}, [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld1 {v26.1d, v27.1d, v28.1d, "
                                                                                                       "v29.1d}, [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld1 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                             "[x30], #64");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], "
                                                                                             "#32");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                                "[x30], #64");
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                                "[x30], #32");

  TEST_SINGLE(ld1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(ld1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(ld1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld1 {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld1 {v26.1d, v27.1d, v28.1d, v29.1d}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, Reg::r30, Reg::r29), "st1 {v26.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, Reg::r30, Reg::r29), "st1 {v26.8b}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, Reg::r30, Reg::r29), "st1 {v26.8h}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, Reg::r30, Reg::r29), "st1 {v26.4h}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, Reg::r30, Reg::r29), "st1 {v26.4s}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, Reg::r30, Reg::r29), "st1 {v26.2s}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, Reg::r30, Reg::r29), "st1 {v26.2d}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, Reg::r30, Reg::r29), "st1 {v26.1d}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, Reg::r30, 16), "st1 {v26.16b}, [x30], #16");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, Reg::r30, 8), "st1 {v26.8b}, [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, Reg::r30, 16), "st1 {v26.8h}, [x30], #16");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, Reg::r30, 8), "st1 {v26.4h}, [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, Reg::r30, 16), "st1 {v26.4s}, [x30], #16");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, Reg::r30, 8), "st1 {v26.2s}, [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, Reg::r30, 16), "st1 {v26.2d}, [x30], #16");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, Reg::r30, 8), "st1 {v26.1d}, [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.8h, v27.8h}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.4h, v27.4h}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.4s, v27.4s}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.2s, v27.2s}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, 32), "st1 {v31.16b, v0.16b}, [x30], #32");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, 16), "st1 {v31.8b, v0.8b}, [x30], #16");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.16b, v27.16b}, [x30], #32");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.8b, v27.8b}, [x30], #16");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.8h, v27.8h}, [x30], #32");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.4h, v27.4h}, [x30], #16");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.4s, v27.4s}, [x30], #32");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.2s, v27.2s}, [x30], #16");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st1 {v26.2d, v27.2d}, [x30], #32");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st1 {v26.1d, v27.1d}, [x30], #16");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b, v28.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b, v28.8b}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.8h, v27.8h, v28.8h}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.4h, v27.4h, v28.4h}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.4s, v27.4s, v28.4s}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.2s, v27.2s, v28.2s}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d, v28.2d}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d, v28.1d}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st1 {v31.16b, v0.16b, v1.16b}, [x30], #48");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st1 {v31.8b, v0.8b, v1.8b}, [x30], #24");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.16b, v27.16b, v28.16b}, [x30], #48");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.8b, v27.8b, v28.8b}, [x30], #24");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.8h, v27.8h, v28.8h}, [x30], #48");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.4h, v27.4h, v28.4h}, [x30], #24");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.4s, v27.4s, v28.4s}, [x30], #48");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.2s, v27.2s, v28.2s}, [x30], #24");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st1 {v26.2d, v27.2d, v28.2d}, [x30], #48");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st1 {v26.1d, v27.1d, v28.1d}, [x30], #24");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.16b, v27.16b, v28.16b, "
                                                                                                      "v29.16b}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.8b, v27.8b, v28.8b, "
                                                                                                      "v29.8b}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.8h, v27.8h, v28.8h, "
                                                                                                       "v29.8h}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.4h, v27.4h, v28.4h, "
                                                                                                       "v29.4h}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.4s, v27.4s, v28.4s, "
                                                                                                       "v29.4s}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.2s, v27.2s, v28.2s, "
                                                                                                       "v29.2s}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st1 {v26.2d, v27.2d, v28.2d, "
                                                                                                       "v29.2d}, [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st1 {v26.1d, v27.1d, v28.1d, "
                                                                                                       "v29.1d}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st1 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                             "[x30], #64");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st1 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], "
                                                                                             "#32");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                                "[x30], #64");
  TEST_SINGLE(st1<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                                "[x30], #32");

  TEST_SINGLE(st1<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(st1<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(st1<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st1 {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st1 {v26.1d, v27.1d, v28.1d, v29.1d}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2 {v31.16b, v0.16b}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2 {v31.8b, v0.8b}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.16b, v27.16b}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2 {v26.8b, v27.8b}, [x30], x29");

  TEST_SINGLE(ld2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.8h, v27.8h}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2 {v26.4h, v27.4h}, [x30], x29");

  TEST_SINGLE(ld2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.4s, v27.4s}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2 {v26.2s, v27.2s}, [x30], x29");

  TEST_SINGLE(ld2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2 {v26.2d, v27.2d}, [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, 32), "ld2 {v31.16b, v0.16b}, [x30], #32");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, 16), "ld2 {v31.8b, v0.8b}, [x30], #16");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.16b, v27.16b}, [x30], #32");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld2 {v26.8b, v27.8b}, [x30], #16");

  TEST_SINGLE(ld2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.8h, v27.8h}, [x30], #32");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld2 {v26.4h, v27.4h}, [x30], #16");

  TEST_SINGLE(ld2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.4s, v27.4s}, [x30], #32");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld2 {v26.2s, v27.2s}, [x30], #16");

  TEST_SINGLE(ld2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "ld2 {v26.2d, v27.2d}, [x30], #32");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "st2 {v31.16b, v0.16b}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "st2 {v31.8b, v0.8b}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.16b, v27.16b}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st2 {v26.8b, v27.8b}, [x30], x29");

  TEST_SINGLE(st2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.8h, v27.8h}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st2 {v26.4h, v27.4h}, [x30], x29");

  TEST_SINGLE(st2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.4s, v27.4s}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "st2 {v26.2s, v27.2s}, [x30], x29");

  TEST_SINGLE(st2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "st2 {v26.2d, v27.2d}, [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, 32), "st2 {v31.16b, v0.16b}, [x30], #32");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, 16), "st2 {v31.8b, v0.8b}, [x30], #16");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.16b, v27.16b}, [x30], #32");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st2 {v26.8b, v27.8b}, [x30], #16");

  TEST_SINGLE(st2<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.8h, v27.8h}, [x30], #32");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st2 {v26.4h, v27.4h}, [x30], #16");

  TEST_SINGLE(st2<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.4s, v27.4s}, [x30], #32");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "st2 {v26.2s, v27.2s}, [x30], #16");

  TEST_SINGLE(st2<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, 32), "st2 {v26.2d, v27.2d}, [x30], #32");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.16b, v27.16b, v28.16b}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3 {v26.8b, v27.8b, v28.8b}, [x30], x29");

  TEST_SINGLE(ld3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.8h, v27.8h, v28.8h}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3 {v26.4h, v27.4h, v28.4h}, [x30], x29");

  TEST_SINGLE(ld3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.4s, v27.4s, v28.4s}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3 {v26.2s, v27.2s, v28.2s}, [x30], x29");

  TEST_SINGLE(ld3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3 {v26.2d, v27.2d, v28.2d}, [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated "
                                                                                            "(NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "ld3 {v31.16b, v0.16b, v1.16b}, [x30], #48");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "ld3 {v31.8b, v0.8b, v1.8b}, [x30], #24");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.16b, v27.16b, v28.16b}, [x30], #48");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3 {v26.8b, v27.8b, v28.8b}, [x30], #24");

  TEST_SINGLE(ld3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.8h, v27.8h, v28.8h}, [x30], #48");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3 {v26.4h, v27.4h, v28.4h}, [x30], #24");

  TEST_SINGLE(ld3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.4s, v27.4s, v28.4s}, [x30], #48");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3 {v26.2s, v27.2s, v28.2s}, [x30], #24");

  TEST_SINGLE(ld3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "ld3 {v26.2d, v27.2d, v28.2d}, [x30], #48");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "st3 {v31.16b, v0.16b, v1.16b}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "st3 {v31.8b, v0.8b, v1.8b}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.16b, v27.16b, v28.16b}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st3 {v26.8b, v27.8b, v28.8b}, [x30], x29");

  TEST_SINGLE(st3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.8h, v27.8h, v28.8h}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st3 {v26.4h, v27.4h, v28.4h}, [x30], x29");

  TEST_SINGLE(st3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.4s, v27.4s, v28.4s}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "st3 {v26.2s, v27.2s, v28.2s}, [x30], x29");

  TEST_SINGLE(st3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "st3 {v26.2d, v27.2d, v28.2d}, [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "unallocated "
                                                                                            "(NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 48), "st3 {v31.16b, v0.16b, v1.16b}, [x30], #48");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 24), "st3 {v31.8b, v0.8b, v1.8b}, [x30], #24");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.16b, v27.16b, v28.16b}, [x30], #48");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st3 {v26.8b, v27.8b, v28.8b}, [x30], #24");

  TEST_SINGLE(st3<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.8h, v27.8h, v28.8h}, [x30], #48");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st3 {v26.4h, v27.4h, v28.4h}, [x30], #24");

  TEST_SINGLE(st3<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.4s, v27.4s, v28.4s}, [x30], #48");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "st3 {v26.2s, v27.2s, v28.2s}, [x30], #24");

  TEST_SINGLE(st3<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 48), "st3 {v26.2d, v27.2d, v28.2d}, [x30], #48");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "unallocated (NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.16b, v27.16b, v28.16b, "
                                                                                                      "v29.16b}, [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.8b, v27.8b, v28.8b, "
                                                                                                      "v29.8b}, [x30], x29");

  TEST_SINGLE(ld4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.8h, v27.8h, v28.8h, "
                                                                                                       "v29.8h}, [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.4h, v27.4h, v28.4h, "
                                                                                                       "v29.4h}, [x30], x29");

  TEST_SINGLE(ld4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.4s, v27.4s, v28.4s, "
                                                                                                       "v29.4s}, [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4 {v26.2s, v27.2s, v28.2s, "
                                                                                                       "v29.2s}, [x30], x29");

  TEST_SINGLE(ld4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4 {v26.2d, v27.2d, v28.2d, "
                                                                                                       "v29.2d}, [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated "
                                                                                                       "(NEONLoadStoreMultiStructPostIndex"
                                                                                                       ")");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "ld4 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                             "[x30], #64");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "ld4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], "
                                                                                             "#32");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                                "[x30], #64");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                                "[x30], #32");

  TEST_SINGLE(ld4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(ld4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4 {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(ld4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "ld4 {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated "
                                                                                                 "(NEONLoadStoreMultiStructPostIndex)");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, "
                                                                                                   "[x30], x29");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.16b, v27.16b, v28.16b, "
                                                                                                      "v29.16b}, [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.8b, v27.8b, v28.8b, "
                                                                                                      "v29.8b}, [x30], x29");

  TEST_SINGLE(st4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.8h, v27.8h, v28.8h, "
                                                                                                       "v29.8h}, [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.4h, v27.4h, v28.4h, "
                                                                                                       "v29.4h}, [x30], x29");

  TEST_SINGLE(st4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.4s, v27.4s, v28.4s, "
                                                                                                       "v29.4s}, [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "st4 {v26.2s, v27.2s, v28.2s, "
                                                                                                       "v29.2s}, [x30], x29");

  TEST_SINGLE(st4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "st4 {v26.2d, v27.2d, v28.2d, "
                                                                                                       "v29.2d}, [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "unallocated "
                                                                                                       "(NEONLoadStoreMultiStructPostIndex"
                                                                                                       ")");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 64), "st4 {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                             "[x30], #64");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 32), "st4 {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], "
                                                                                             "#32");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                                "[x30], #64");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                                "[x30], #32");

  TEST_SINGLE(st4<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(st4<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "st4 {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                                 "[x30], #32");

  TEST_SINGLE(st4<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 64), "st4 {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                                 "[x30], #64");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "unallocated "
                                                                                                 "(NEONLoadStoreMultiStructPostIndex)");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: ASIMD loadstore single") {
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30), "ld1 {v26.b}[0], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30), "ld1 {v26.h}[0], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30), "ld1 {v26.s}[0], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30), "ld1 {v26.d}[0], [x30]");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30), "ld1 {v26.b}[15], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30), "ld1 {v26.h}[7], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30), "ld1 {v26.s}[3], [x30]");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30), "ld1 {v26.d}[1], [x30]");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(DReg::d26, Reg::r30), "ld1r {v26.8b}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(DReg::d26, Reg::r30), "ld1r {v26.4h}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(DReg::d26, Reg::r30), "ld1r {v26.2s}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(DReg::d26, Reg::r30), "ld1r {v26.1d}, [x30]");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(QReg::q26, Reg::r30), "ld1r {v26.16b}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(QReg::q26, Reg::r30), "ld1r {v26.8h}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(QReg::q26, Reg::r30), "ld1r {v26.4s}, [x30]");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(QReg::q26, Reg::r30), "ld1r {v26.2d}, [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30), "st1 {v26.b}[0], [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30), "st1 {v26.h}[0], [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30), "st1 {v26.s}[0], [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30), "st1 {v26.d}[0], [x30]");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30), "st1 {v26.b}[15], [x30]");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30), "st1 {v26.h}[7], [x30]");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30), "st1 {v26.s}[3], [x30]");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30), "st1 {v26.d}[1], [x30]");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30), "ld2 {v31.b, v0.b}[0], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.b, v27.b}[0], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.h, v27.h}[0], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.s, v27.s}[0], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "ld2 {v26.d, v27.d}[0], [x30]");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30), "ld2 {v26.b, v27.b}[15], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30), "ld2 {v26.h, v27.h}[7], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30), "ld2 {v26.s, v27.s}[3], [x30]");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30), "ld2 {v26.d, v27.d}[1], [x30]");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30), "ld2r {v31.8b, v0.8b}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.8b, v27.8b}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.4h, v27.4h}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.2s, v27.2s}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30), "ld2r {v26.1d, v27.1d}, [x30]");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30), "ld2r {v31.16b, v0.16b}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.16b, v27.16b}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.8h, v27.8h}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.4s, v27.4s}, [x30]");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30), "ld2r {v26.2d, v27.2d}, [x30]");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30), "st2 {v31.b, v0.b}[0], [x30]");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.b, v27.b}[0], [x30]");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.h, v27.h}[0], [x30]");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.s, v27.s}[0], [x30]");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30), "st2 {v26.d, v27.d}[0], [x30]");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30), "st2 {v26.b, v27.b}[15], [x30]");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30), "st2 {v26.h, v27.h}[7], [x30]");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30), "st2 {v26.s, v27.s}[3], [x30]");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30), "st2 {v26.d, v27.d}[1], [x30]");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "ld3 {v31.b, v0.b, v1.b}[0], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.b, v27.b, v28.b}[0], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.h, v27.h, v28.h}[0], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.s, v27.s, v28.s}[0], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "ld3 {v26.d, v27.d, v28.d}[0], [x30]");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30), "ld3 {v26.b, v27.b, v28.b}[15], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30), "ld3 {v26.h, v27.h, v28.h}[7], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30), "ld3 {v26.s, v27.s, v28.s}[3], [x30]");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30), "ld3 {v26.d, v27.d, v28.d}[1], [x30]");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30), "ld3r {v31.8b, v0.8b, v1.8b}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.8b, v27.8b, v28.8b}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.4h, v27.4h, v28.4h}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.2s, v27.2s, v28.2s}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30), "ld3r {v26.1d, v27.1d, v28.1d}, [x30]");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30), "ld3r {v31.16b, v0.16b, v1.16b}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.16b, v27.16b, v28.16b}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.8h, v27.8h, v28.8h}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.4s, v27.4s, v28.4s}, [x30]");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30), "ld3r {v26.2d, v27.2d, v28.2d}, [x30]");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30), "st3 {v31.b, v0.b, v1.b}[0], [x30]");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.b, v27.b, v28.b}[0], [x30]");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.h, v27.h, v28.h}[0], [x30]");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.s, v27.s, v28.s}[0], [x30]");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30), "st3 {v26.d, v27.d, v28.d}[0], [x30]");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30), "st3 {v26.b, v27.b, v28.b}[15], [x30]");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30), "st3 {v26.h, v27.h, v28.h}[7], [x30]");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30), "st3 {v26.s, v27.s, v28.s}[3], [x30]");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30), "st3 {v26.d, v27.d, v28.d}[1], [x30]");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], "
                                                                                               "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], "
                                                                                                "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], "
                                                                                                "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], "
                                                                                                "[x30]");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], "
                                                                                                "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], "
                                                                                                "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], "
                                                                                                "[x30]");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], "
                                                                                                "[x30]");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30]");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                             "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                              "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                              "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, "
                                                                                              "[x30]");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, [x30]");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.16b, v27.16b, v28.16b, v29.16b}, "
                                                                                             "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                              "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                              "[x30]");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                              "[x30]");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30]");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[0], "
                                                                                               "[x30]");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[0], "
                                                                                                "[x30]");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[0], "
                                                                                                "[x30]");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[0], "
                                                                                                "[x30]");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30), "st4 {v26.b, v27.b, v28.b, v29.b}[15], "
                                                                                                "[x30]");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30), "st4 {v26.h, v27.h, v28.h, v29.h}[7], "
                                                                                                "[x30]");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30), "st4 {v26.s, v27.s, v28.s, v29.s}[3], "
                                                                                                "[x30]");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30), "st4 {v26.d, v27.d, v28.d, v29.d}[1], "
                                                                                                "[x30]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Advanced SIMD load/store single structure (post-indexed)") {
  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30, 1), "ld1 {v26.b}[0], [x30], #1");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30, 2), "ld1 {v26.h}[0], [x30], #2");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30, 4), "ld1 {v26.s}[0], [x30], #4");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30, 8), "ld1 {v26.d}[0], [x30], #8");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30, 1), "ld1 {v26.b}[15], [x30], #1");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30, 2), "ld1 {v26.h}[7], [x30], #2");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30, 4), "ld1 {v26.s}[3], [x30], #4");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30, 8), "ld1 {v26.d}[1], [x30], #8");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(DReg::d26, Reg::r30, 1), "ld1r {v26.8b}, [x30], #1");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(DReg::d26, Reg::r30, 2), "ld1r {v26.4h}, [x30], #2");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(DReg::d26, Reg::r30, 4), "ld1r {v26.2s}, [x30], #4");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(DReg::d26, Reg::r30, 8), "ld1r {v26.1d}, [x30], #8");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(QReg::q26, Reg::r30, 1), "ld1r {v26.16b}, [x30], #1");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(QReg::q26, Reg::r30, 2), "ld1r {v26.8h}, [x30], #2");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(QReg::q26, Reg::r30, 4), "ld1r {v26.4s}, [x30], #4");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(QReg::q26, Reg::r30, 8), "ld1r {v26.2d}, [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30, 1), "st1 {v26.b}[0], [x30], #1");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30, 2), "st1 {v26.h}[0], [x30], #2");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30, 4), "st1 {v26.s}[0], [x30], #4");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30, 8), "st1 {v26.d}[0], [x30], #8");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30, 1), "st1 {v26.b}[15], [x30], #1");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30, 2), "st1 {v26.h}[7], [x30], #2");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30, 4), "st1 {v26.s}[3], [x30], #4");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30, 8), "st1 {v26.d}[1], [x30], #8");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30, 2), "ld2 {v31.b, v0.b}[0], [x30], #2");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 2), "ld2 {v26.b, v27.b}[0], [x30], #2");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 4), "ld2 {v26.h, v27.h}[0], [x30], #4");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 8), "ld2 {v26.s, v27.s}[0], [x30], #8");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 16), "ld2 {v26.d, v27.d}[0], [x30], #16");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30, 2), "ld2 {v26.b, v27.b}[15], [x30], #2");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30, 4), "ld2 {v26.h, v27.h}[7], [x30], #4");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30, 8), "ld2 {v26.s, v27.s}[3], [x30], #8");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30, 16), "ld2 {v26.d, v27.d}[1], [x30], #16");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, 2), "ld2r {v31.8b, v0.8b}, [x30], #2");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, 2), "ld2r {v26.8b, v27.8b}, [x30], #2");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, 4), "ld2r {v26.4h, v27.4h}, [x30], #4");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, 8), "ld2r {v26.2s, v27.2s}, [x30], #8");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, 16), "ld2r {v26.1d, v27.1d}, [x30], #16");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, 2), "ld2r {v31.16b, v0.16b}, [x30], #2");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, 2), "ld2r {v26.16b, v27.16b}, [x30], #2");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, 4), "ld2r {v26.8h, v27.8h}, [x30], #4");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, 8), "ld2r {v26.4s, v27.4s}, [x30], #8");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, 16), "ld2r {v26.2d, v27.2d}, [x30], #16");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30, 2), "st2 {v31.b, v0.b}[0], [x30], #2");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 2), "st2 {v26.b, v27.b}[0], [x30], #2");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 4), "st2 {v26.h, v27.h}[0], [x30], #4");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 8), "st2 {v26.s, v27.s}[0], [x30], #8");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30, 16), "st2 {v26.d, v27.d}[0], [x30], #16");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30, 2), "st2 {v26.b, v27.b}[15], [x30], #2");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30, 4), "st2 {v26.h, v27.h}[7], [x30], #4");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30, 8), "st2 {v26.s, v27.s}[3], [x30], #8");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30, 16), "st2 {v26.d, v27.d}[1], [x30], #16");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "ld3 {v31.b, v0.b, v1.b}[0], [x30], #3");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "ld3 {v26.b, v27.b, v28.b}[0], [x30], #3");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "ld3 {v26.h, v27.h, v28.h}[0], [x30], #6");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 12), "ld3 {v26.s, v27.s, v28.s}[0], [x30], #12");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 24), "ld3 {v26.d, v27.d, v28.d}[0], [x30], #24");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 3), "ld3 {v26.b, v27.b, v28.b}[15], [x30], #3");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 6), "ld3 {v26.h, v27.h, v28.h}[7], [x30], #6");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 12), "ld3 {v26.s, v27.s, v28.s}[3], [x30], #12");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 24), "ld3 {v26.d, v27.d, v28.d}[1], [x30], #24");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, 3), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], #3");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 3), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], #3");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 6), "ld3r {v26.4h, v27.4h, v28.4h}, [x30], #6");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 12), "ld3r {v26.2s, v27.2s, v28.2s}, [x30], #12");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, 24), "ld3r {v26.1d, v27.1d, v28.1d}, [x30], #24");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, 3), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], #3");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 3), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], #3");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 6), "ld3r {v26.8h, v27.8h, v28.8h}, [x30], #6");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 12), "ld3r {v26.4s, v27.4s, v28.4s}, [x30], #12");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, 24), "ld3r {v26.2d, v27.2d, v28.2d}, [x30], #24");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, 3), "st3 {v31.b, v0.b, v1.b}[0], [x30], #3");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[0], [x30], #3");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[0], [x30], #6");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 12), "st3 {v26.s, v27.s, v28.s}[0], [x30], #12");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, 24), "st3 {v26.d, v27.d, v28.d}[0], [x30], #24");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, 3), "st3 {v26.b, v27.b, v28.b}[15], [x30], #3");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, 6), "st3 {v26.h, v27.h, v28.h}[7], [x30], #6");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, 12), "st3 {v26.s, v27.s, v28.s}[3], [x30], #12");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, 24), "st3 {v26.d, v27.d, v28.d}[1], [x30], #24");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], "
                                                                                               "#4");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[0], "
                                                                                                  "[x30], #4");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[0], "
                                                                                                   "[x30], #8");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[0], "
                                                                                                    "[x30], #16");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[0], "
                                                                                                    "[x30], #32");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "ld4 {v26.b, v27.b, v28.b, v29.b}[15], "
                                                                                                   "[x30], #4");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "ld4 {v26.h, v27.h, v28.h, v29.h}[7], "
                                                                                                   "[x30], #8");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "ld4 {v26.s, v27.s, v28.s, v29.s}[3], "
                                                                                                    "[x30], #16");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "ld4 {v26.d, v27.d, v28.d, v29.d}[1], "
                                                                                                    "[x30], #32");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, 4), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, [x30], "
                                                                                             "#4");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 4), "ld4r {v26.8b, v27.8b, v28.8b, v29.8b}, "
                                                                                                "[x30], #4");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 8), "ld4r {v26.4h, v27.4h, v28.4h, v29.4h}, "
                                                                                                 "[x30], #8");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 16), "ld4r {v26.2s, v27.2s, v28.2s, v29.2s}, "
                                                                                                  "[x30], #16");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, 32), "ld4r {v26.1d, v27.1d, v28.1d, v29.1d}, "
                                                                                                  "[x30], #32");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, 4), "ld4r {v31.16b, v0.16b, v1.16b, v2.16b}, "
                                                                                             "[x30], #4");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 4), "ld4r {v26.16b, v27.16b, v28.16b, "
                                                                                                "v29.16b}, [x30], #4");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 8), "ld4r {v26.8h, v27.8h, v28.8h, v29.8h}, "
                                                                                                 "[x30], #8");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 16), "ld4r {v26.4s, v27.4s, v28.4s, v29.4s}, "
                                                                                                  "[x30], #16");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, 32), "ld4r {v26.2d, v27.2d, v28.2d, v29.2d}, "
                                                                                                  "[x30], #32");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, 4), "st4 {v31.b, v0.b, v1.b, v2.b}[0], [x30], "
                                                                                               "#4");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[0], "
                                                                                                  "[x30], #4");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[0], "
                                                                                                   "[x30], #8");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[0], "
                                                                                                    "[x30], #16");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[0], "
                                                                                                    "[x30], #32");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, 4), "st4 {v26.b, v27.b, v28.b, v29.b}[15], "
                                                                                                   "[x30], #4");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, 8), "st4 {v26.h, v27.h, v28.h, v29.h}[7], "
                                                                                                   "[x30], #8");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, 16), "st4 {v26.s, v27.s, v28.s, v29.s}[3], "
                                                                                                    "[x30], #16");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, 32), "st4 {v26.d, v27.d, v28.d, v29.d}[1], "
                                                                                                    "[x30], #32");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.b}[0], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.h}[0], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.s}[0], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "ld1 {v26.d}[0], [x30], x29");

  TEST_SINGLE(ld1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30, Reg::r29), "ld1 {v26.b}[15], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30, Reg::r29), "ld1 {v26.h}[7], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30, Reg::r29), "ld1 {v26.s}[3], [x30], x29");
  TEST_SINGLE(ld1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30, Reg::r29), "ld1 {v26.d}[1], [x30], x29");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.8b}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.4h}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.2s}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(DReg::d26, Reg::r30, Reg::r29), "ld1r {v26.1d}, [x30], x29");

  TEST_SINGLE(ld1r<SubRegSize::i8Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.16b}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i16Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.8h}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i32Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.4s}, [x30], x29");
  TEST_SINGLE(ld1r<SubRegSize::i64Bit>(QReg::q26, Reg::r30, Reg::r29), "ld1r {v26.2d}, [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.b}[0], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.h}[0], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.s}[0], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 0, Reg::r30, Reg::r29), "st1 {v26.d}[0], [x30], x29");

  TEST_SINGLE(st1<SubRegSize::i8Bit>(VReg::v26, 15, Reg::r30, Reg::r29), "st1 {v26.b}[15], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i16Bit>(VReg::v26, 7, Reg::r30, Reg::r29), "st1 {v26.h}[7], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i32Bit>(VReg::v26, 3, Reg::r30, Reg::r29), "st1 {v26.s}[3], [x30], x29");
  TEST_SINGLE(st1<SubRegSize::i64Bit>(VReg::v26, 1, Reg::r30, Reg::r29), "st1 {v26.d}[1], [x30], x29");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "ld2 {v31.b, v0.b}[0], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[0], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.h, v27.h}[0], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.s, v27.s}[0], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "ld2 {v26.d, v27.d}[0], [x30], x29");

  TEST_SINGLE(ld2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "ld2 {v26.b, v27.b}[15], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "ld2 {v26.h, v27.h}[7], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "ld2 {v26.s, v27.s}[3], [x30], x29");
  TEST_SINGLE(ld2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "ld2 {v26.d, v27.d}[1], [x30], x29");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, Reg::r30, Reg::r29), "ld2r {v31.8b, v0.8b}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.8b, v27.8b}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.4h, v27.4h}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.2s, v27.2s}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, Reg::r30, Reg::r29), "ld2r {v26.1d, v27.1d}, [x30], x29");

  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, Reg::r30, Reg::r29), "ld2r {v31.16b, v0.16b}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.16b, v27.16b}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.8h, v27.8h}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.4s, v27.4s}, [x30], x29");
  TEST_SINGLE(ld2r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, Reg::r30, Reg::r29), "ld2r {v26.2d, v27.2d}, [x30], x29");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v31, VReg::v0, 0, Reg::r30, Reg::r29), "st2 {v31.b, v0.b}[0], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[0], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.h, v27.h}[0], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.s, v27.s}[0], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 0, Reg::r30, Reg::r29), "st2 {v26.d, v27.d}[0], [x30], x29");

  TEST_SINGLE(st2<SubRegSize::i8Bit>(VReg::v26, VReg::v27, 15, Reg::r30, Reg::r29), "st2 {v26.b, v27.b}[15], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i16Bit>(VReg::v26, VReg::v27, 7, Reg::r30, Reg::r29), "st2 {v26.h, v27.h}[7], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i32Bit>(VReg::v26, VReg::v27, 3, Reg::r30, Reg::r29), "st2 {v26.s, v27.s}[3], [x30], x29");
  TEST_SINGLE(st2<SubRegSize::i64Bit>(VReg::v26, VReg::v27, 1, Reg::r30, Reg::r29), "st2 {v26.d, v27.d}[1], [x30], x29");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "ld3 {v31.b, v0.b, v1.b}[0], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[0], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.h, v27.h, v28.h}[0], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.s, v27.s, v28.s}[0], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "ld3 {v26.d, v27.d, v28.d}[0], [x30], x29");

  TEST_SINGLE(ld3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "ld3 {v26.b, v27.b, v28.b}[15], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "ld3 {v26.h, v27.h, v28.h}[7], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "ld3 {v26.s, v27.s, v28.s}[3], [x30], x29");
  TEST_SINGLE(ld3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "ld3 {v26.d, v27.d, v28.d}[1], [x30], x29");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, Reg::r30, Reg::r29), "ld3r {v31.8b, v0.8b, v1.8b}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.8b, v27.8b, v28.8b}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.4h, v27.4h, v28.4h}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.2s, v27.2s, v28.2s}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, Reg::r30, Reg::r29), "ld3r {v26.1d, v27.1d, v28.1d}, [x30], x29");

  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, Reg::r30, Reg::r29), "ld3r {v31.16b, v0.16b, v1.16b}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.16b, v27.16b, v28.16b}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.8h, v27.8h, v28.8h}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.4s, v27.4s, v28.4s}, [x30], x29");
  TEST_SINGLE(ld3r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, Reg::r30, Reg::r29), "ld3r {v26.2d, v27.2d, v28.2d}, [x30], x29");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, 0, Reg::r30, Reg::r29), "st3 {v31.b, v0.b, v1.b}[0], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[0], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.h, v27.h, v28.h}[0], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.s, v27.s, v28.s}[0], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 0, Reg::r30, Reg::r29), "st3 {v26.d, v27.d, v28.d}[0], [x30], x29");

  TEST_SINGLE(st3<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, 15, Reg::r30, Reg::r29), "st3 {v26.b, v27.b, v28.b}[15], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, 7, Reg::r30, Reg::r29), "st3 {v26.h, v27.h, v28.h}[7], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, 3, Reg::r30, Reg::r29), "st3 {v26.s, v27.s, v28.s}[3], [x30], x29");
  TEST_SINGLE(st3<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, 1, Reg::r30, Reg::r29), "st3 {v26.d, v27.d, v28.d}[1], [x30], x29");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "ld4 {v31.b, v0.b, v1.b, v2.b}[0], "
                                                                                                      "[x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, "
                                                                                                         "v29.b}[0], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, "
                                                                                                          "v29.h}[0], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, "
                                                                                                          "v29.s}[0], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, "
                                                                                                          "v29.d}[0], [x30], x29");

  TEST_SINGLE(ld4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "ld4 {v26.b, v27.b, v28.b, "
                                                                                                          "v29.b}[15], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "ld4 {v26.h, v27.h, v28.h, "
                                                                                                          "v29.h}[7], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "ld4 {v26.s, v27.s, v28.s, "
                                                                                                          "v29.s}[3], [x30], x29");
  TEST_SINGLE(ld4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "ld4 {v26.d, v27.d, v28.d, "
                                                                                                          "v29.d}[1], [x30], x29");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d31, DReg::d0, DReg::d1, DReg::d2, Reg::r30, Reg::r29), "ld4r {v31.8b, v0.8b, v1.8b, v2.8b}, "
                                                                                                    "[x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.8b, v27.8b, v28.8b, "
                                                                                                       "v29.8b}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.4h, v27.4h, v28.4h, "
                                                                                                        "v29.4h}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.2s, v27.2s, v28.2s, "
                                                                                                        "v29.2s}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(DReg::d26, DReg::d27, DReg::d28, DReg::d29, Reg::r30, Reg::r29), "ld4r {v26.1d, v27.1d, v28.1d, "
                                                                                                        "v29.1d}, [x30], x29");

  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q31, QReg::q0, QReg::q1, QReg::q2, Reg::r30, Reg::r29), "ld4r {v31.16b, v0.16b, v1.16b, "
                                                                                                    "v2.16b}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i8Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.16b, v27.16b, v28.16b, "
                                                                                                       "v29.16b}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i16Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.8h, v27.8h, v28.8h, "
                                                                                                        "v29.8h}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i32Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.4s, v27.4s, v28.4s, "
                                                                                                        "v29.4s}, [x30], x29");
  TEST_SINGLE(ld4r<SubRegSize::i64Bit>(QReg::q26, QReg::q27, QReg::q28, QReg::q29, Reg::r30, Reg::r29), "ld4r {v26.2d, v27.2d, v28.2d, "
                                                                                                        "v29.2d}, [x30], x29");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v31, VReg::v0, VReg::v1, VReg::v2, 0, Reg::r30, Reg::r29), "st4 {v31.b, v0.b, v1.b, v2.b}[0], "
                                                                                                      "[x30], x29");
  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, "
                                                                                                         "v29.b}[0], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, "
                                                                                                          "v29.h}[0], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, "
                                                                                                          "v29.s}[0], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 0, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, "
                                                                                                          "v29.d}[0], [x30], x29");

  TEST_SINGLE(st4<SubRegSize::i8Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 15, Reg::r30, Reg::r29), "st4 {v26.b, v27.b, v28.b, "
                                                                                                          "v29.b}[15], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i16Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 7, Reg::r30, Reg::r29), "st4 {v26.h, v27.h, v28.h, "
                                                                                                          "v29.h}[7], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i32Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 3, Reg::r30, Reg::r29), "st4 {v26.s, v27.s, v28.s, "
                                                                                                          "v29.s}[3], [x30], x29");
  TEST_SINGLE(st4<SubRegSize::i64Bit>(VReg::v26, VReg::v27, VReg::v28, VReg::v29, 1, Reg::r30, Reg::r29), "st4 {v26.d, v27.d, v28.d, "
                                                                                                          "v29.d}[1], [x30], x29");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore exclusive pair") {
  TEST_SINGLE(stxp(Size::i32Bit, Reg::r28, Reg::r29, Reg::r30, Reg::r28), "stxp w28, w29, w30, [x28]");
  TEST_SINGLE(stxp(Size::i64Bit, Reg::r28, Reg::r29, Reg::r30, Reg::r28), "stxp w28, x29, x30, [x28]");

  TEST_SINGLE(stlxp(Size::i32Bit, Reg::r28, Reg::r29, Reg::r30, Reg::r28), "stlxp w28, w29, w30, [x28]");
  TEST_SINGLE(stlxp(Size::i64Bit, Reg::r28, Reg::r29, Reg::r30, Reg::r28), "stlxp w28, x29, x30, [x28]");

  TEST_SINGLE(ldxp(Size::i32Bit, Reg::r29, Reg::r30, Reg::r28), "ldxp w29, w30, [x28]");
  TEST_SINGLE(ldxp(Size::i64Bit, Reg::r29, Reg::r30, Reg::r28), "ldxp x29, x30, [x28]");

  TEST_SINGLE(ldaxp(Size::i32Bit, Reg::r29, Reg::r30, Reg::r28), "ldaxp w29, w30, [x28]");
  TEST_SINGLE(ldaxp(Size::i64Bit, Reg::r29, Reg::r30, Reg::r28), "ldaxp x29, x30, [x28]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore exclusive register") {
  TEST_SINGLE(stxrb(Reg::r30, Reg::r29, Reg::r28), "stxrb w30, w29, [x28]");
  TEST_SINGLE(stlxrb(Reg::r30, Reg::r29, Reg::r28), "stlxrb w30, w29, [x28]");

  TEST_SINGLE(ldxrb(Reg::r30, Reg::r29), "ldxrb w30, [x29]");
  TEST_SINGLE(ldaxrb(Reg::r30, Reg::r29), "ldaxrb w30, [x29]");

  TEST_SINGLE(stxrh(Reg::r30, Reg::r29, Reg::r28), "stxrh w30, w29, [x28]");
  TEST_SINGLE(stlxrh(Reg::r30, Reg::r29, Reg::r28), "stlxrh w30, w29, [x28]");

  TEST_SINGLE(ldxrh(Reg::r30, Reg::r29), "ldxrh w30, [x29]");
  TEST_SINGLE(ldaxrh(Reg::r30, Reg::r29), "ldaxrh w30, [x29]");

  TEST_SINGLE(stxr(WReg::w30, WReg::w29, Reg::r28), "stxr w30, w29, [x28]");
  TEST_SINGLE(stlxr(WReg::w30, WReg::w29, Reg::r28), "stlxr w30, w29, [x28]");

  TEST_SINGLE(ldxr(WReg::w30, Reg::r29), "ldxr w30, [x29]");
  TEST_SINGLE(ldaxr(WReg::w30, Reg::r29), "ldaxr w30, [x29]");

  TEST_SINGLE(stxr(XReg::x30, XReg::x29, Reg::r28), "stxr w30, x29, [x28]");
  TEST_SINGLE(stlxr(WReg::w30, XReg::x29, Reg::r28), "stlxr w30, x29, [x28]");

  TEST_SINGLE(ldxr(XReg::x30, Reg::r29), "ldxr x30, [x29]");
  TEST_SINGLE(ldaxr(XReg::x30, Reg::r29), "ldaxr x30, [x29]");

  TEST_SINGLE(stxr(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "stxrb w30, w29, [x28]");
  TEST_SINGLE(stlxr(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "stlxrb w30, w29, [x28]");
  TEST_SINGLE(stxr(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "stxrh w30, w29, [x28]");
  TEST_SINGLE(stlxr(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "stlxrh w30, w29, [x28]");
  TEST_SINGLE(stxr(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "stxr w30, w29, [x28]");
  TEST_SINGLE(stlxr(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "stlxr w30, w29, [x28]");
  TEST_SINGLE(stxr(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "stxr w30, x29, [x28]");
  TEST_SINGLE(stlxr(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "stlxr w30, x29, [x28]");

  TEST_SINGLE(ldxr(SubRegSize::i8Bit, Reg::r30, Reg::r29), "ldxrb w30, [x29]");
  TEST_SINGLE(ldaxr(SubRegSize::i8Bit, Reg::r30, Reg::r29), "ldaxrb w30, [x29]");
  TEST_SINGLE(ldxr(SubRegSize::i16Bit, Reg::r30, Reg::r29), "ldxrh w30, [x29]");
  TEST_SINGLE(ldaxr(SubRegSize::i16Bit, Reg::r30, Reg::r29), "ldaxrh w30, [x29]");
  TEST_SINGLE(ldxr(SubRegSize::i32Bit, Reg::r30, Reg::r29), "ldxr w30, [x29]");
  TEST_SINGLE(ldaxr(SubRegSize::i32Bit, Reg::r30, Reg::r29), "ldaxr w30, [x29]");
  TEST_SINGLE(ldxr(SubRegSize::i64Bit, Reg::r30, Reg::r29), "ldxr x30, [x29]");
  TEST_SINGLE(ldaxr(SubRegSize::i64Bit, Reg::r30, Reg::r29), "ldaxr x30, [x29]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Load/store ordered") {
  TEST_SINGLE(stllrb(Reg::r30, Reg::r29), "stllrb w30, [x29]");
  TEST_SINGLE(stlrb(Reg::r30, Reg::r29), "stlrb w30, [x29]");
  TEST_SINGLE(ldlarb(Reg::r30, Reg::r29), "ldlarb w30, [x29]");
  TEST_SINGLE(ldarb(Reg::r30, Reg::r29), "ldarb w30, [x29]");

  TEST_SINGLE(stllrh(Reg::r30, Reg::r29), "stllrh w30, [x29]");
  TEST_SINGLE(stlrh(Reg::r30, Reg::r29), "stlrh w30, [x29]");
  TEST_SINGLE(ldlarh(Reg::r30, Reg::r29), "ldlarh w30, [x29]");
  TEST_SINGLE(ldarh(Reg::r30, Reg::r29), "ldarh w30, [x29]");

  TEST_SINGLE(stllr(WReg::w30, Reg::r29), "stllr w30, [x29]");
  TEST_SINGLE(stlr(WReg::w30, Reg::r29), "stlr w30, [x29]");
  TEST_SINGLE(ldlar(WReg::w30, Reg::r29), "ldlar w30, [x29]");
  TEST_SINGLE(ldar(WReg::w30, Reg::r29), "ldar w30, [x29]");

  TEST_SINGLE(stllr(XReg::x30, Reg::r29), "stllr x30, [x29]");
  TEST_SINGLE(stlr(XReg::x30, Reg::r29), "stlr x30, [x29]");
  TEST_SINGLE(ldlar(XReg::x30, Reg::r29), "ldlar x30, [x29]");
  TEST_SINGLE(ldar(XReg::x30, Reg::r29), "ldar x30, [x29]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Compare and swap") {
  TEST_SINGLE(casb(Reg::r30, Reg::r29, Reg::r28), "casb w30, w29, [x28]");
  TEST_SINGLE(caslb(Reg::r30, Reg::r29, Reg::r28), "caslb w30, w29, [x28]");
  TEST_SINGLE(casab(Reg::r30, Reg::r29, Reg::r28), "casab w30, w29, [x28]");
  TEST_SINGLE(casalb(Reg::r30, Reg::r29, Reg::r28), "casalb w30, w29, [x28]");

  TEST_SINGLE(cash(Reg::r30, Reg::r29, Reg::r28), "cash w30, w29, [x28]");
  TEST_SINGLE(caslh(Reg::r30, Reg::r29, Reg::r28), "caslh w30, w29, [x28]");
  TEST_SINGLE(casah(Reg::r30, Reg::r29, Reg::r28), "casah w30, w29, [x28]");
  TEST_SINGLE(casalh(Reg::r30, Reg::r29, Reg::r28), "casalh w30, w29, [x28]");

  TEST_SINGLE(cas(WReg::w30, WReg::w29, Reg::r28), "cas w30, w29, [x28]");
  TEST_SINGLE(casl(WReg::w30, WReg::w29, Reg::r28), "casl w30, w29, [x28]");
  TEST_SINGLE(casa(WReg::w30, WReg::w29, Reg::r28), "casa w30, w29, [x28]");
  TEST_SINGLE(casal(WReg::w30, WReg::w29, Reg::r28), "casal w30, w29, [x28]");

  TEST_SINGLE(cas(XReg::x30, XReg::x29, Reg::r28), "cas x30, x29, [x28]");
  TEST_SINGLE(casl(XReg::x30, XReg::x29, Reg::r28), "casl x30, x29, [x28]");
  TEST_SINGLE(casa(XReg::x30, XReg::x29, Reg::r28), "casa x30, x29, [x28]");
  TEST_SINGLE(casal(XReg::x30, XReg::x29, Reg::r28), "casal x30, x29, [x28]");

  TEST_SINGLE(cas(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "casb w30, w29, [x28]");
  TEST_SINGLE(cas(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "cash w30, w29, [x28]");
  TEST_SINGLE(cas(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "cas w30, w29, [x28]");
  TEST_SINGLE(cas(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "cas x30, x29, [x28]");

  TEST_SINGLE(casl(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "caslb w30, w29, [x28]");
  TEST_SINGLE(casl(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "caslh w30, w29, [x28]");
  TEST_SINGLE(casl(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "casl w30, w29, [x28]");
  TEST_SINGLE(casl(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "casl x30, x29, [x28]");

  TEST_SINGLE(casa(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "casab w30, w29, [x28]");
  TEST_SINGLE(casa(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "casah w30, w29, [x28]");
  TEST_SINGLE(casa(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "casa w30, w29, [x28]");
  TEST_SINGLE(casa(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "casa x30, x29, [x28]");

  TEST_SINGLE(casal(SubRegSize::i8Bit, Reg::r30, Reg::r29, Reg::r28), "casalb w30, w29, [x28]");
  TEST_SINGLE(casal(SubRegSize::i16Bit, Reg::r30, Reg::r29, Reg::r28), "casalh w30, w29, [x28]");
  TEST_SINGLE(casal(SubRegSize::i32Bit, Reg::r30, Reg::r29, Reg::r28), "casal w30, w29, [x28]");
  TEST_SINGLE(casal(SubRegSize::i64Bit, Reg::r30, Reg::r29, Reg::r28), "casal x30, x29, [x28]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: LDAPR/STLR unscaled immediate") {
  TEST_SINGLE(stlurb(Reg::r30, Reg::r29, -256), "stlurb w30, [x29, #-256]");
  TEST_SINGLE(stlurb(Reg::r30, Reg::r29, 255), "stlurb w30, [x29, #255]");

  TEST_SINGLE(ldapurb(Reg::r30, Reg::r29, -256), "ldapurb w30, [x29, #-256]");
  TEST_SINGLE(ldapurb(Reg::r30, Reg::r29, 255), "ldapurb w30, [x29, #255]");

  TEST_SINGLE(ldapursb(WReg::w30, Reg::r29, -256), "ldapursb w30, [x29, #-256]");
  TEST_SINGLE(ldapursb(WReg::w30, Reg::r29, 255), "ldapursb w30, [x29, #255]");
  TEST_SINGLE(ldapursb(XReg::x30, Reg::r29, -256), "ldapursb x30, [x29, #-256]");
  TEST_SINGLE(ldapursb(XReg::x30, Reg::r29, 255), "ldapursb x30, [x29, #255]");

  TEST_SINGLE(stlurh(Reg::r30, Reg::r29, -256), "stlurh w30, [x29, #-256]");
  TEST_SINGLE(stlurh(Reg::r30, Reg::r29, 255), "stlurh w30, [x29, #255]");

  TEST_SINGLE(ldapurh(Reg::r30, Reg::r29, -256), "ldapurh w30, [x29, #-256]");
  TEST_SINGLE(ldapurh(Reg::r30, Reg::r29, 255), "ldapurh w30, [x29, #255]");

  TEST_SINGLE(ldapursh(WReg::w30, Reg::r29, -256), "ldapursh w30, [x29, #-256]");
  TEST_SINGLE(ldapursh(WReg::w30, Reg::r29, 255), "ldapursh w30, [x29, #255]");
  TEST_SINGLE(ldapursh(XReg::x30, Reg::r29, -256), "ldapursh x30, [x29, #-256]");
  TEST_SINGLE(ldapursh(XReg::x30, Reg::r29, 255), "ldapursh x30, [x29, #255]");

  TEST_SINGLE(stlur(WReg::w30, Reg::r29, -256), "stlur w30, [x29, #-256]");
  TEST_SINGLE(stlur(WReg::w30, Reg::r29, 255), "stlur w30, [x29, #255]");

  TEST_SINGLE(ldapur(WReg::w30, Reg::r29, -256), "ldapur w30, [x29, #-256]");
  TEST_SINGLE(ldapur(WReg::w30, Reg::r29, 255), "ldapur w30, [x29, #255]");

  TEST_SINGLE(ldapursw(XReg::x30, Reg::r29, -256), "ldapursw x30, [x29, #-256]");
  TEST_SINGLE(ldapursw(XReg::x30, Reg::r29, 255), "ldapursw x30, [x29, #255]");

  TEST_SINGLE(stlur(XReg::x30, Reg::r29, -256), "stlur x30, [x29, #-256]");
  TEST_SINGLE(stlur(XReg::x30, Reg::r29, 255), "stlur x30, [x29, #255]");

  TEST_SINGLE(ldapur(XReg::x30, Reg::r29, -256), "ldapur x30, [x29, #-256]");
  TEST_SINGLE(ldapur(XReg::x30, Reg::r29, 255), "ldapur x30, [x29, #255]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Load register literal") {
  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldr(WReg::w30, &Label);

    CHECK(DisassembleEncoding(1) == 0x18fffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldr(SReg::s30, &Label);

    CHECK(DisassembleEncoding(1) == 0x1cfffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldr(XReg::x30, &Label);

    CHECK(DisassembleEncoding(1) == 0x58fffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldr(DReg::d30, &Label);

    CHECK(DisassembleEncoding(1) == 0x5cfffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldrsw(XReg::x30, &Label);

    CHECK(DisassembleEncoding(1) == 0x98fffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    ldr(QReg::q30, &Label);

    CHECK(DisassembleEncoding(1) == 0x9cfffffe);
  }

  {
    BackwardLabel Label;
    (void)Bind(&Label);
    dc32(0);
    prfm(Prefetch::PLDL1KEEP, &Label);

    CHECK(DisassembleEncoding(1) == 0xd8ffffe0);
  }

  {
    ForwardLabel Label;
    ldr(WReg::w30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x1800003e);
  }

  {
    ForwardLabel Label;
    ldr(SReg::s30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x1c00003e);
  }

  {
    ForwardLabel Label;
    ldr(XReg::x30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x5800003e);
  }

  {
    ForwardLabel Label;
    ldr(DReg::d30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x5c00003e);
  }

  {
    ForwardLabel Label;
    ldrsw(XReg::x30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x9800003e);
  }

  {
    ForwardLabel Label;
    ldr(QReg::q30, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0x9c00003e);
  }

  {
    ForwardLabel Label;
    prfm(Prefetch::PLDL1KEEP, &Label);
    (void)Bind(&Label);
    dc32(0);

    CHECK(DisassembleEncoding(0) == 0xd8000020);
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Memory copy/set") {
  // Note: Some of these aren't implemented in vixl at the moment, however
  //       we supply the cases to change over to once they are. This is good,
  //       because when we update, the unimplemented cases will naturally fail,
  //       facilitating the switch.

  TEST_SINGLE(cpyfp(XReg::x30, XReg::x28, XReg::x29), "cpyfp [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfp(XReg::x17, XReg::x20, XReg::x19), "cpyfp [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfm(XReg::x30, XReg::x28, XReg::x29), "cpyfm [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfm(XReg::x17, XReg::x20, XReg::x19), "cpyfm [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfe(XReg::x30, XReg::x28, XReg::x29), "cpyfe [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfe(XReg::x17, XReg::x20, XReg::x19), "cpyfe [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpyfpwt(XReg::x30, XReg::x28, XReg::x29), "cpyfpwt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfpwt(XReg::x17, XReg::x20, XReg::x19), "cpyfpwt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfpwt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfpwt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmwt(XReg::x30, XReg::x28, XReg::x29), "cpyfmwt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmwt(XReg::x17, XReg::x20, XReg::x19), "cpyfmwt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmwt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmwt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfewt(XReg::x30, XReg::x28, XReg::x29), "cpyfewt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfewt(XReg::x17, XReg::x20, XReg::x19), "cpyfewt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfewt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfewt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfprt(XReg::x30, XReg::x28, XReg::x29), "cpyfprt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfprt(XReg::x17, XReg::x20, XReg::x19), "cpyfprt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfprt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfprt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmrt(XReg::x30, XReg::x28, XReg::x29), "cpyfmrt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmrt(XReg::x17, XReg::x20, XReg::x19), "cpyfmrt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmrt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmrt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfert(XReg::x30, XReg::x28, XReg::x29), "cpyfert [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfert(XReg::x17, XReg::x20, XReg::x19), "cpyfert [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfert(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfert(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfpt(XReg::x30, XReg::x28, XReg::x29), "cpyfpt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfpt(XReg::x17, XReg::x20, XReg::x19), "cpyfpt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfpt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfpt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmt(XReg::x30, XReg::x28, XReg::x29), "cpyfmt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmt(XReg::x17, XReg::x20, XReg::x19), "cpyfmt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfet(XReg::x30, XReg::x28, XReg::x29), "cpyfet [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfet(XReg::x17, XReg::x20, XReg::x19), "cpyfet [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfet(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfet(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpyfpwn(XReg::x30, XReg::x28, XReg::x29), "cpyfpwn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfpwn(XReg::x17, XReg::x20, XReg::x19), "cpyfpwn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfmwn(XReg::x30, XReg::x28, XReg::x29), "cpyfmwn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfmwn(XReg::x17, XReg::x20, XReg::x19), "cpyfmwn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfewn(XReg::x30, XReg::x28, XReg::x29), "cpyfewn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfewn(XReg::x17, XReg::x20, XReg::x19), "cpyfewn [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpyfpwtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfpwtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfpwtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfpwtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfpwtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfpwtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmwtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfmwtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmwtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfmwtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmwtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmwtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfewtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfewtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfewtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfewtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfewtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfewtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfprtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfprtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfprtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfprtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfprtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfprtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmrtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfmrtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmrtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfmrtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmrtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmrtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfertwn(XReg::x30, XReg::x28, XReg::x29), "cpyfertwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfertwn(XReg::x17, XReg::x20, XReg::x19), "cpyfertwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfertwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfertwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfptwn(XReg::x30, XReg::x28, XReg::x29), "cpyfptwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfptwn(XReg::x17, XReg::x20, XReg::x19), "cpyfptwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfptwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfptwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmtwn(XReg::x30, XReg::x28, XReg::x29), "cpyfmtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmtwn(XReg::x17, XReg::x20, XReg::x19), "cpyfmtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfetwn(XReg::x30, XReg::x28, XReg::x29), "cpyfetwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfetwn(XReg::x17, XReg::x20, XReg::x19), "cpyfetwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfetwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfetwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpyfprn(XReg::x30, XReg::x28, XReg::x29), "cpyfprn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfprn(XReg::x17, XReg::x20, XReg::x19), "cpyfprn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfmrn(XReg::x30, XReg::x28, XReg::x29), "cpyfmrn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfmrn(XReg::x17, XReg::x20, XReg::x19), "cpyfmrn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfern(XReg::x30, XReg::x28, XReg::x29), "cpyfern [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfern(XReg::x17, XReg::x20, XReg::x19), "cpyfern [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpyfpwtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfpwtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfpwtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfpwtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfpwtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfpwtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmwtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfmwtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmwtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfmwtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmwtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmwtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfewtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfewtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfewtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfewtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfewtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfewtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfprtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfprtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfprtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfprtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfprtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfprtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmrtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfmrtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmrtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfmrtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmrtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmrtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfertrn(XReg::x30, XReg::x28, XReg::x29), "cpyfertrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfertrn(XReg::x17, XReg::x20, XReg::x19), "cpyfertrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfertrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfertrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfptrn(XReg::x30, XReg::x28, XReg::x29), "cpyfptrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfptrn(XReg::x17, XReg::x20, XReg::x19), "cpyfptrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfptrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfptrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmtrn(XReg::x30, XReg::x28, XReg::x29), "cpyfmtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmtrn(XReg::x17, XReg::x20, XReg::x19), "cpyfmtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfetrn(XReg::x30, XReg::x28, XReg::x29), "cpyfetrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfetrn(XReg::x17, XReg::x20, XReg::x19), "cpyfetrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfetrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfetrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpyfpn(XReg::x30, XReg::x28, XReg::x29), "cpyfpn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfpn(XReg::x17, XReg::x20, XReg::x19), "cpyfpn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfmn(XReg::x30, XReg::x28, XReg::x29), "cpyfmn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfmn(XReg::x17, XReg::x20, XReg::x19), "cpyfmn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyfen(XReg::x30, XReg::x28, XReg::x29), "cpyfen [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyfen(XReg::x17, XReg::x20, XReg::x19), "cpyfen [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpyfpwtn(XReg::x30, XReg::x28, XReg::x29), "cpyfpwtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfpwtn(XReg::x17, XReg::x20, XReg::x19), "cpyfpwtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfpwtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfpwtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmwtn(XReg::x30, XReg::x28, XReg::x29), "cpyfmwtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmwtn(XReg::x17, XReg::x20, XReg::x19), "cpyfmwtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmwtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmwtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfewtn(XReg::x30, XReg::x28, XReg::x29), "cpyfewtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfewtn(XReg::x17, XReg::x20, XReg::x19), "cpyfewtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfewtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfewtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfprtn(XReg::x30, XReg::x28, XReg::x29), "cpyfprtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfprtn(XReg::x17, XReg::x20, XReg::x19), "cpyfprtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfprtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfprtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmrtn(XReg::x30, XReg::x28, XReg::x29), "cpyfmrtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmrtn(XReg::x17, XReg::x20, XReg::x19), "cpyfmrtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmrtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmrtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfertn(XReg::x30, XReg::x28, XReg::x29), "cpyfertn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfertn(XReg::x17, XReg::x20, XReg::x19), "cpyfertn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfertn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfertn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfptn(XReg::x30, XReg::x28, XReg::x29), "cpyfptn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfptn(XReg::x17, XReg::x20, XReg::x19), "cpyfptn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfptn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfptn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfmtn(XReg::x30, XReg::x28, XReg::x29), "cpyfmtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfmtn(XReg::x17, XReg::x20, XReg::x19), "cpyfmtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfmtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfmtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyfetn(XReg::x30, XReg::x28, XReg::x29), "cpyfetn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyfetn(XReg::x17, XReg::x20, XReg::x19), "cpyfetn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyfetn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyfetn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(setp(XReg::x30, XReg::x28, XReg::x29), "setp [x30]!, x28!, x29");
  TEST_SINGLE(setp(XReg::x17, XReg::x20, XReg::x19), "setp [x17]!, x20!, x19");

  TEST_SINGLE(setm(XReg::x30, XReg::x28, XReg::x29), "setm [x30]!, x28!, x29");
  TEST_SINGLE(setm(XReg::x17, XReg::x20, XReg::x19), "setm [x17]!, x20!, x19");

  TEST_SINGLE(sete(XReg::x30, XReg::x28, XReg::x29), "sete [x30]!, x28!, x29");
  TEST_SINGLE(sete(XReg::x17, XReg::x20, XReg::x19), "sete [x17]!, x20!, x19");

  // TEST_SINGLE(setpt(XReg::x30, XReg::x28, XReg::x29), "setpt [x30]!, x28!, x29");
  // TEST_SINGLE(setpt(XReg::x17, XReg::x20, XReg::x19), "setpt [x17]!, x20!, x19");
  TEST_SINGLE(setpt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setpt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setmt(XReg::x30, XReg::x28, XReg::x29), "setmt [x30]!, x28!, x29");
  // TEST_SINGLE(setmt(XReg::x17, XReg::x20, XReg::x19), "setmt [x17]!, x20!, x19");
  TEST_SINGLE(setmt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setmt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setet(XReg::x30, XReg::x28, XReg::x29), "setet [x30]!, x28!, x29");
  // TEST_SINGLE(setet(XReg::x17, XReg::x20, XReg::x19), "setet [x17]!, x20!, x19");
  TEST_SINGLE(setet(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setet(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(setpn(XReg::x30, XReg::x28, XReg::x29), "setpn [x30]!, x28!, x29");
  TEST_SINGLE(setpn(XReg::x17, XReg::x20, XReg::x19), "setpn [x17]!, x20!, x19");

  TEST_SINGLE(setmn(XReg::x30, XReg::x28, XReg::x29), "setmn [x30]!, x28!, x29");
  TEST_SINGLE(setmn(XReg::x17, XReg::x20, XReg::x19), "setmn [x17]!, x20!, x19");

  TEST_SINGLE(seten(XReg::x30, XReg::x28, XReg::x29), "seten [x30]!, x28!, x29");
  TEST_SINGLE(seten(XReg::x17, XReg::x20, XReg::x19), "seten [x17]!, x20!, x19");

  // TEST_SINGLE(setptn(XReg::x30, XReg::x28, XReg::x29), "setptn [x30]!, x28!, x29");
  // TEST_SINGLE(setptn(XReg::x17, XReg::x20, XReg::x19), "setptn [x17]!, x20!, x19");
  TEST_SINGLE(setptn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setptn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setmtn(XReg::x30, XReg::x28, XReg::x29), "setmtn [x30]!, x28!, x29");
  // TEST_SINGLE(setmtn(XReg::x17, XReg::x20, XReg::x19), "setmtn [x17]!, x20!, x19");
  TEST_SINGLE(setmtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setmtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setetn(XReg::x30, XReg::x28, XReg::x29), "setetn [x30]!, x28!, x29");
  // TEST_SINGLE(setetn(XReg::x17, XReg::x20, XReg::x19), "setetn [x17]!, x20!, x19");
  TEST_SINGLE(setetn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setetn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpyp(XReg::x30, XReg::x28, XReg::x29), "cpyp [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyp(XReg::x17, XReg::x20, XReg::x19), "cpyp [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpym(XReg::x30, XReg::x28, XReg::x29), "cpym [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpym(XReg::x17, XReg::x20, XReg::x19), "cpym [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpye(XReg::x30, XReg::x28, XReg::x29), "cpye [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpye(XReg::x17, XReg::x20, XReg::x19), "cpye [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpypwt(XReg::x30, XReg::x28, XReg::x29), "cpypwt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpypwt(XReg::x17, XReg::x20, XReg::x19), "cpypwt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpypwt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpypwt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymwt(XReg::x30, XReg::x28, XReg::x29), "cpymwt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymwt(XReg::x17, XReg::x20, XReg::x19), "cpymwt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymwt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymwt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyewt(XReg::x30, XReg::x28, XReg::x29), "cpyewt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyewt(XReg::x17, XReg::x20, XReg::x19), "cpyewt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyewt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyewt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyprt(XReg::x30, XReg::x28, XReg::x29), "cpyprt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyprt(XReg::x17, XReg::x20, XReg::x19), "cpyprt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyprt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyprt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymrt(XReg::x30, XReg::x28, XReg::x29), "cpymrt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymrt(XReg::x17, XReg::x20, XReg::x19), "cpymrt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymrt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymrt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyert(XReg::x30, XReg::x28, XReg::x29), "cpyert [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyert(XReg::x17, XReg::x20, XReg::x19), "cpyert [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyert(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyert(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpypt(XReg::x30, XReg::x28, XReg::x29), "cpypt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpypt(XReg::x17, XReg::x20, XReg::x19), "cpypt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpypt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpypt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymt(XReg::x30, XReg::x28, XReg::x29), "cpymt [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymt(XReg::x17, XReg::x20, XReg::x19), "cpymt [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyet(XReg::x30, XReg::x28, XReg::x29), "cpyet [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyet(XReg::x17, XReg::x20, XReg::x19), "cpyet [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyet(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyet(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpypwn(XReg::x30, XReg::x28, XReg::x29), "cpypwn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpypwn(XReg::x17, XReg::x20, XReg::x19), "cpypwn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpymwn(XReg::x30, XReg::x28, XReg::x29), "cpymwn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpymwn(XReg::x17, XReg::x20, XReg::x19), "cpymwn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyewn(XReg::x30, XReg::x28, XReg::x29), "cpyewn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyewn(XReg::x17, XReg::x20, XReg::x19), "cpyewn [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpypwtwn(XReg::x30, XReg::x28, XReg::x29), "cpypwtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpypwtwn(XReg::x17, XReg::x20, XReg::x19), "cpypwtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpypwtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpypwtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymwtwn(XReg::x30, XReg::x28, XReg::x29), "cpymwtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymwtwn(XReg::x17, XReg::x20, XReg::x19), "cpymwtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymwtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymwtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyewtwn(XReg::x30, XReg::x28, XReg::x29), "cpyewtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyewtwn(XReg::x17, XReg::x20, XReg::x19), "cpyewtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyewtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyewtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyprtwn(XReg::x30, XReg::x28, XReg::x29), "cpyprtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyprtwn(XReg::x17, XReg::x20, XReg::x19), "cpyprtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyprtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyprtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymrtwn(XReg::x30, XReg::x28, XReg::x29), "cpymrtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymrtwn(XReg::x17, XReg::x20, XReg::x19), "cpymrtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymrtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymrtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyertwn(XReg::x30, XReg::x28, XReg::x29), "cpyertwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyertwn(XReg::x17, XReg::x20, XReg::x19), "cpyertwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyertwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyertwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyptwn(XReg::x30, XReg::x28, XReg::x29), "cpyptwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyptwn(XReg::x17, XReg::x20, XReg::x19), "cpyptwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyptwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyptwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymtwn(XReg::x30, XReg::x28, XReg::x29), "cpymtwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymtwn(XReg::x17, XReg::x20, XReg::x19), "cpymtwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymtwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymtwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyetwn(XReg::x30, XReg::x28, XReg::x29), "cpyetwn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyetwn(XReg::x17, XReg::x20, XReg::x19), "cpyetwn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyetwn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyetwn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpyprn(XReg::x30, XReg::x28, XReg::x29), "cpyprn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyprn(XReg::x17, XReg::x20, XReg::x19), "cpyprn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpymrn(XReg::x30, XReg::x28, XReg::x29), "cpymrn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpymrn(XReg::x17, XReg::x20, XReg::x19), "cpymrn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyern(XReg::x30, XReg::x28, XReg::x29), "cpyern [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyern(XReg::x17, XReg::x20, XReg::x19), "cpyern [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpypwtrn(XReg::x30, XReg::x28, XReg::x29), "cpypwtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpypwtrn(XReg::x17, XReg::x20, XReg::x19), "cpypwtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpypwtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpypwtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymwtrn(XReg::x30, XReg::x28, XReg::x29), "cpymwtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymwtrn(XReg::x17, XReg::x20, XReg::x19), "cpymwtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymwtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymwtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyewtrn(XReg::x30, XReg::x28, XReg::x29), "cpyewtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyewtrn(XReg::x17, XReg::x20, XReg::x19), "cpyewtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyewtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyewtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyprtrn(XReg::x30, XReg::x28, XReg::x29), "cpyprtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyprtrn(XReg::x17, XReg::x20, XReg::x19), "cpyprtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyprtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyprtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymrtrn(XReg::x30, XReg::x28, XReg::x29), "cpymrtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymrtrn(XReg::x17, XReg::x20, XReg::x19), "cpymrtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymrtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymrtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyertrn(XReg::x30, XReg::x28, XReg::x29), "cpyertrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyertrn(XReg::x17, XReg::x20, XReg::x19), "cpyertrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyertrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyertrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyptrn(XReg::x30, XReg::x28, XReg::x29), "cpyptrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyptrn(XReg::x17, XReg::x20, XReg::x19), "cpyptrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyptrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyptrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymtrn(XReg::x30, XReg::x28, XReg::x29), "cpymtrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymtrn(XReg::x17, XReg::x20, XReg::x19), "cpymtrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymtrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymtrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyetrn(XReg::x30, XReg::x28, XReg::x29), "cpyetrn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyetrn(XReg::x17, XReg::x20, XReg::x19), "cpyetrn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyetrn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyetrn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(cpypn(XReg::x30, XReg::x28, XReg::x29), "cpypn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpypn(XReg::x17, XReg::x20, XReg::x19), "cpypn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpymn(XReg::x30, XReg::x28, XReg::x29), "cpymn [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpymn(XReg::x17, XReg::x20, XReg::x19), "cpymn [x17]!, [x20]!, x19!");

  TEST_SINGLE(cpyen(XReg::x30, XReg::x28, XReg::x29), "cpyen [x30]!, [x28]!, x29!");
  TEST_SINGLE(cpyen(XReg::x17, XReg::x20, XReg::x19), "cpyen [x17]!, [x20]!, x19!");

  // TEST_SINGLE(cpypwtn(XReg::x30, XReg::x28, XReg::x29), "cpypwtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpypwtn(XReg::x17, XReg::x20, XReg::x19), "cpypwtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpypwtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpypwtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymwtn(XReg::x30, XReg::x28, XReg::x29), "cpymwtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymwtn(XReg::x17, XReg::x20, XReg::x19), "cpymwtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymwtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymwtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyewtn(XReg::x30, XReg::x28, XReg::x29), "cpyewtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyewtn(XReg::x17, XReg::x20, XReg::x19), "cpyewtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyewtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyewtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyprtn(XReg::x30, XReg::x28, XReg::x29), "cpyprtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyprtn(XReg::x17, XReg::x20, XReg::x19), "cpyprtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyprtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyprtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymrtn(XReg::x30, XReg::x28, XReg::x29), "cpymrtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymrtn(XReg::x17, XReg::x20, XReg::x19), "cpymrtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymrtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymrtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyertn(XReg::x30, XReg::x28, XReg::x29), "cpyertn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyertn(XReg::x17, XReg::x20, XReg::x19), "cpyertn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyertn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyertn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyptn(XReg::x30, XReg::x28, XReg::x29), "cpyptn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyptn(XReg::x17, XReg::x20, XReg::x19), "cpyptn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyptn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyptn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpymtn(XReg::x30, XReg::x28, XReg::x29), "cpymtn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpymtn(XReg::x17, XReg::x20, XReg::x19), "cpymtn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpymtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpymtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(cpyetn(XReg::x30, XReg::x28, XReg::x29), "cpyetn [x30]!, [x28]!, x29!");
  // TEST_SINGLE(cpyetn(XReg::x17, XReg::x20, XReg::x19), "cpyetn [x17]!, [x20]!, x19!");
  TEST_SINGLE(cpyetn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(cpyetn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(setgp(XReg::x30, XReg::x28, XReg::x29), "setgp [x30]!, x28!, x29");
  TEST_SINGLE(setgp(XReg::x17, XReg::x20, XReg::x19), "setgp [x17]!, x20!, x19");

  TEST_SINGLE(setgm(XReg::x30, XReg::x28, XReg::x29), "setgm [x30]!, x28!, x29");
  TEST_SINGLE(setgm(XReg::x17, XReg::x20, XReg::x19), "setgm [x17]!, x20!, x19");

  TEST_SINGLE(setge(XReg::x30, XReg::x28, XReg::x29), "setge [x30]!, x28!, x29");
  TEST_SINGLE(setge(XReg::x17, XReg::x20, XReg::x19), "setge [x17]!, x20!, x19");

  // TEST_SINGLE(setgpt(XReg::x30, XReg::x28, XReg::x29), "setgpt [x30]!, x28!, x29");
  // TEST_SINGLE(setgpt(XReg::x17, XReg::x20, XReg::x19), "setgpt [x17]!, x20!, x19");
  TEST_SINGLE(setgpt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setgpt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setgmt(XReg::x30, XReg::x28, XReg::x29), "setgmt [x30]!, x28!, x29");
  // TEST_SINGLE(setgmt(XReg::x17, XReg::x20, XReg::x19), "setgmt [x17]!, x20!, x19");
  TEST_SINGLE(setgmt(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setgmt(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setget(XReg::x30, XReg::x28, XReg::x29), "setget [x30]!, x28!, x29");
  // TEST_SINGLE(setget(XReg::x17, XReg::x20, XReg::x19), "setget [x17]!, x20!, x19");
  TEST_SINGLE(setget(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setget(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  TEST_SINGLE(setgpn(XReg::x30, XReg::x28, XReg::x29), "setgpn [x30]!, x28!, x29");
  TEST_SINGLE(setgpn(XReg::x17, XReg::x20, XReg::x19), "setgpn [x17]!, x20!, x19");

  TEST_SINGLE(setgmn(XReg::x30, XReg::x28, XReg::x29), "setgmn [x30]!, x28!, x29");
  TEST_SINGLE(setgmn(XReg::x17, XReg::x20, XReg::x19), "setgmn [x17]!, x20!, x19");

  TEST_SINGLE(setgen(XReg::x30, XReg::x28, XReg::x29), "setgen [x30]!, x28!, x29");
  TEST_SINGLE(setgen(XReg::x17, XReg::x20, XReg::x19), "setgen [x17]!, x20!, x19");

  // TEST_SINGLE(setgptn(XReg::x30, XReg::x28, XReg::x29), "setgptn [x30]!, x28!, x29");
  // TEST_SINGLE(setgptn(XReg::x17, XReg::x20, XReg::x19), "setgptn [x17]!, x20!, x19");
  TEST_SINGLE(setgptn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setgptn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setgmtn(XReg::x30, XReg::x28, XReg::x29), "setgmtn [x30]!, x28!, x29");
  // TEST_SINGLE(setgmtn(XReg::x17, XReg::x20, XReg::x19), "setgmtn [x17]!, x20!, x19");
  TEST_SINGLE(setgmtn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setgmtn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");

  // TEST_SINGLE(setgetn(XReg::x30, XReg::x28, XReg::x29), "setgetn [x30]!, x28!, x29");
  // TEST_SINGLE(setgetn(XReg::x17, XReg::x20, XReg::x19), "setgetn [x17]!, x20!, x19");
  TEST_SINGLE(setgetn(XReg::x30, XReg::x28, XReg::x29), "unimplemented (Unimplemented)");
  TEST_SINGLE(setgetn(XReg::x17, XReg::x20, XReg::x19), "unimplemented (Unimplemented)");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore no-allocate pair") {
  TEST_SINGLE(stnp(WReg::w30, WReg::w28, Reg::r29, -256), "stnp w30, w28, [x29, #-256]");
  TEST_SINGLE(stnp(WReg::w30, WReg::w28, Reg::r29, 252), "stnp w30, w28, [x29, #252]");

  TEST_SINGLE(ldnp(WReg::w30, WReg::w28, Reg::r29, -256), "ldnp w30, w28, [x29, #-256]");
  TEST_SINGLE(ldnp(WReg::w30, WReg::w28, Reg::r29, 252), "ldnp w30, w28, [x29, #252]");

  TEST_SINGLE(stnp(SReg::s30, SReg::s28, Reg::r29, -256), "stnp s30, s28, [x29, #-256]");
  TEST_SINGLE(stnp(SReg::s30, SReg::s28, Reg::r29, 252), "stnp s30, s28, [x29, #252]");

  TEST_SINGLE(ldnp(SReg::s30, SReg::s28, Reg::r29, -256), "ldnp s30, s28, [x29, #-256]");
  TEST_SINGLE(ldnp(SReg::s30, SReg::s28, Reg::r29, 252), "ldnp s30, s28, [x29, #252]");

  TEST_SINGLE(stnp(XReg::x30, XReg::x28, Reg::r29, -512), "stnp x30, x28, [x29, #-512]");
  TEST_SINGLE(stnp(XReg::x30, XReg::x28, Reg::r29, 504), "stnp x30, x28, [x29, #504]");

  TEST_SINGLE(ldnp(XReg::x30, XReg::x28, Reg::r29, -512), "ldnp x30, x28, [x29, #-512]");
  TEST_SINGLE(ldnp(XReg::x30, XReg::x28, Reg::r29, 504), "ldnp x30, x28, [x29, #504]");

  TEST_SINGLE(stnp(DReg::d30, DReg::d28, Reg::r29, -512), "stnp d30, d28, [x29, #-512]");
  TEST_SINGLE(stnp(DReg::d30, DReg::d28, Reg::r29, 504), "stnp d30, d28, [x29, #504]");

  TEST_SINGLE(ldnp(DReg::d30, DReg::d28, Reg::r29, -512), "ldnp d30, d28, [x29, #-512]");
  TEST_SINGLE(ldnp(DReg::d30, DReg::d28, Reg::r29, 504), "ldnp d30, d28, [x29, #504]");

  TEST_SINGLE(stnp(QReg::q30, QReg::q28, Reg::r29, -1024), "stnp q30, q28, [x29, #-1024]");
  TEST_SINGLE(stnp(QReg::q30, QReg::q28, Reg::r29, 1008), "stnp q30, q28, [x29, #1008]");

  TEST_SINGLE(ldnp(QReg::q30, QReg::q28, Reg::r29, -1024), "ldnp q30, q28, [x29, #-1024]");
  TEST_SINGLE(ldnp(QReg::q30, QReg::q28, Reg::r29, 1008), "ldnp q30, q28, [x29, #1008]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register pair post-indexed") {
  TEST_SINGLE(stp<IndexType::POST>(WReg::w30, WReg::w28, Reg::r29, -256), "stp w30, w28, [x29], #-256");
  TEST_SINGLE(stp<IndexType::POST>(WReg::w30, WReg::w28, Reg::r29, 252), "stp w30, w28, [x29], #252");

  TEST_SINGLE(ldp<IndexType::POST>(WReg::w30, WReg::w28, Reg::r29, -256), "ldp w30, w28, [x29], #-256");
  TEST_SINGLE(ldp<IndexType::POST>(WReg::w30, WReg::w28, Reg::r29, 252), "ldp w30, w28, [x29], #252");

  TEST_SINGLE(ldpsw<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, -256), "ldpsw x30, x28, [x29], #-256");
  TEST_SINGLE(ldpsw<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, 252), "ldpsw x30, x28, [x29], #252");

  TEST_SINGLE(stp<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, -512), "stp x30, x28, [x29], #-512");
  TEST_SINGLE(stp<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, 504), "stp x30, x28, [x29], #504");

  TEST_SINGLE(ldp<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, -512), "ldp x30, x28, [x29], #-512");
  TEST_SINGLE(ldp<IndexType::POST>(XReg::x30, XReg::x28, Reg::r29, 504), "ldp x30, x28, [x29], #504");

  TEST_SINGLE(stp<IndexType::POST>(SReg::s30, SReg::s28, Reg::r29, -256), "stp s30, s28, [x29], #-256");
  TEST_SINGLE(stp<IndexType::POST>(SReg::s30, SReg::s28, Reg::r29, 252), "stp s30, s28, [x29], #252");

  TEST_SINGLE(ldp<IndexType::POST>(SReg::s30, SReg::s28, Reg::r29, -256), "ldp s30, s28, [x29], #-256");
  TEST_SINGLE(ldp<IndexType::POST>(SReg::s30, SReg::s28, Reg::r29, 252), "ldp s30, s28, [x29], #252");

  TEST_SINGLE(stp<IndexType::POST>(DReg::d30, DReg::d28, Reg::r29, -512), "stp d30, d28, [x29], #-512");
  TEST_SINGLE(stp<IndexType::POST>(DReg::d30, DReg::d28, Reg::r29, 504), "stp d30, d28, [x29], #504");

  TEST_SINGLE(ldp<IndexType::POST>(DReg::d30, DReg::d28, Reg::r29, -512), "ldp d30, d28, [x29], #-512");
  TEST_SINGLE(ldp<IndexType::POST>(DReg::d30, DReg::d28, Reg::r29, 504), "ldp d30, d28, [x29], #504");

  TEST_SINGLE(stp<IndexType::POST>(QReg::q30, QReg::q28, Reg::r29, -1024), "stp q30, q28, [x29], #-1024");
  TEST_SINGLE(stp<IndexType::POST>(QReg::q30, QReg::q28, Reg::r29, 1008), "stp q30, q28, [x29], #1008");

  TEST_SINGLE(ldp<IndexType::POST>(QReg::q30, QReg::q28, Reg::r29, -1024), "ldp q30, q28, [x29], #-1024");
  TEST_SINGLE(ldp<IndexType::POST>(QReg::q30, QReg::q28, Reg::r29, 1008), "ldp q30, q28, [x29], #1008");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register pair offset") {
  TEST_SINGLE(stp<IndexType::OFFSET>(WReg::w30, WReg::w28, Reg::r29, -256), "stp w30, w28, [x29, #-256]");
  TEST_SINGLE(stp<IndexType::OFFSET>(WReg::w30, WReg::w28, Reg::r29, 252), "stp w30, w28, [x29, #252]");

  TEST_SINGLE(ldp<IndexType::OFFSET>(WReg::w30, WReg::w28, Reg::r29, -256), "ldp w30, w28, [x29, #-256]");
  TEST_SINGLE(ldp<IndexType::OFFSET>(WReg::w30, WReg::w28, Reg::r29, 252), "ldp w30, w28, [x29, #252]");

  TEST_SINGLE(ldpsw<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, -256), "ldpsw x30, x28, [x29, #-256]");
  TEST_SINGLE(ldpsw<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, 252), "ldpsw x30, x28, [x29, #252]");

  TEST_SINGLE(stp<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, -512), "stp x30, x28, [x29, #-512]");
  TEST_SINGLE(stp<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, 504), "stp x30, x28, [x29, #504]");

  TEST_SINGLE(ldp<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, -512), "ldp x30, x28, [x29, #-512]");
  TEST_SINGLE(ldp<IndexType::OFFSET>(XReg::x30, XReg::x28, Reg::r29, 504), "ldp x30, x28, [x29, #504]");

  TEST_SINGLE(stp<IndexType::OFFSET>(SReg::s30, SReg::s28, Reg::r29, -256), "stp s30, s28, [x29, #-256]");
  TEST_SINGLE(stp<IndexType::OFFSET>(SReg::s30, SReg::s28, Reg::r29, 252), "stp s30, s28, [x29, #252]");

  TEST_SINGLE(ldp<IndexType::OFFSET>(SReg::s30, SReg::s28, Reg::r29, -256), "ldp s30, s28, [x29, #-256]");
  TEST_SINGLE(ldp<IndexType::OFFSET>(SReg::s30, SReg::s28, Reg::r29, 252), "ldp s30, s28, [x29, #252]");

  TEST_SINGLE(stp<IndexType::OFFSET>(DReg::d30, DReg::d28, Reg::r29, -512), "stp d30, d28, [x29, #-512]");
  TEST_SINGLE(stp<IndexType::OFFSET>(DReg::d30, DReg::d28, Reg::r29, 504), "stp d30, d28, [x29, #504]");

  TEST_SINGLE(ldp<IndexType::OFFSET>(DReg::d30, DReg::d28, Reg::r29, -512), "ldp d30, d28, [x29, #-512]");
  TEST_SINGLE(ldp<IndexType::OFFSET>(DReg::d30, DReg::d28, Reg::r29, 504), "ldp d30, d28, [x29, #504]");

  TEST_SINGLE(stp<IndexType::OFFSET>(QReg::q30, QReg::q28, Reg::r29, -1024), "stp q30, q28, [x29, #-1024]");
  TEST_SINGLE(stp<IndexType::OFFSET>(QReg::q30, QReg::q28, Reg::r29, 1008), "stp q30, q28, [x29, #1008]");

  TEST_SINGLE(ldp<IndexType::OFFSET>(QReg::q30, QReg::q28, Reg::r29, -1024), "ldp q30, q28, [x29, #-1024]");
  TEST_SINGLE(ldp<IndexType::OFFSET>(QReg::q30, QReg::q28, Reg::r29, 1008), "ldp q30, q28, [x29, #1008]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register pair pre-indexed") {
  TEST_SINGLE(stp<IndexType::PRE>(WReg::w30, WReg::w28, Reg::r29, -256), "stp w30, w28, [x29, #-256]!");
  TEST_SINGLE(stp<IndexType::PRE>(WReg::w30, WReg::w28, Reg::r29, 252), "stp w30, w28, [x29, #252]!");

  TEST_SINGLE(ldp<IndexType::PRE>(WReg::w30, WReg::w28, Reg::r29, -256), "ldp w30, w28, [x29, #-256]!");
  TEST_SINGLE(ldp<IndexType::PRE>(WReg::w30, WReg::w28, Reg::r29, 252), "ldp w30, w28, [x29, #252]!");

  TEST_SINGLE(ldpsw<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, -256), "ldpsw x30, x28, [x29, #-256]!");
  TEST_SINGLE(ldpsw<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, 252), "ldpsw x30, x28, [x29, #252]!");

  TEST_SINGLE(stp<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, -512), "stp x30, x28, [x29, #-512]!");
  TEST_SINGLE(stp<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, 504), "stp x30, x28, [x29, #504]!");

  TEST_SINGLE(ldp<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, -512), "ldp x30, x28, [x29, #-512]!");
  TEST_SINGLE(ldp<IndexType::PRE>(XReg::x30, XReg::x28, Reg::r29, 504), "ldp x30, x28, [x29, #504]!");

  TEST_SINGLE(stp<IndexType::PRE>(SReg::s30, SReg::s28, Reg::r29, -256), "stp s30, s28, [x29, #-256]!");
  TEST_SINGLE(stp<IndexType::PRE>(SReg::s30, SReg::s28, Reg::r29, 252), "stp s30, s28, [x29, #252]!");

  TEST_SINGLE(ldp<IndexType::PRE>(SReg::s30, SReg::s28, Reg::r29, -256), "ldp s30, s28, [x29, #-256]!");
  TEST_SINGLE(ldp<IndexType::PRE>(SReg::s30, SReg::s28, Reg::r29, 252), "ldp s30, s28, [x29, #252]!");

  TEST_SINGLE(stp<IndexType::PRE>(DReg::d30, DReg::d28, Reg::r29, -512), "stp d30, d28, [x29, #-512]!");
  TEST_SINGLE(stp<IndexType::PRE>(DReg::d30, DReg::d28, Reg::r29, 504), "stp d30, d28, [x29, #504]!");

  TEST_SINGLE(ldp<IndexType::PRE>(DReg::d30, DReg::d28, Reg::r29, -512), "ldp d30, d28, [x29, #-512]!");
  TEST_SINGLE(ldp<IndexType::PRE>(DReg::d30, DReg::d28, Reg::r29, 504), "ldp d30, d28, [x29, #504]!");

  TEST_SINGLE(stp<IndexType::PRE>(QReg::q30, QReg::q28, Reg::r29, -1024), "stp q30, q28, [x29, #-1024]!");
  TEST_SINGLE(stp<IndexType::PRE>(QReg::q30, QReg::q28, Reg::r29, 1008), "stp q30, q28, [x29, #1008]!");

  TEST_SINGLE(ldp<IndexType::PRE>(QReg::q30, QReg::q28, Reg::r29, -1024), "ldp q30, q28, [x29, #-1024]!");
  TEST_SINGLE(ldp<IndexType::PRE>(QReg::q30, QReg::q28, Reg::r29, 1008), "ldp q30, q28, [x29, #1008]!");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register immediate post-indexed") {
  TEST_SINGLE(strb<IndexType::POST>(Reg::r30, Reg::r29, -256), "strb w30, [x29], #-256");
  TEST_SINGLE(strb<IndexType::POST>(Reg::r30, Reg::r29, 255), "strb w30, [x29], #255");
  TEST_SINGLE(ldrb<IndexType::POST>(Reg::r30, Reg::r29, -256), "ldrb w30, [x29], #-256");
  TEST_SINGLE(ldrb<IndexType::POST>(Reg::r30, Reg::r29, 255), "ldrb w30, [x29], #255");

  TEST_SINGLE(strb<IndexType::POST>(VReg::v30, Reg::r29, -256), "str b30, [x29], #-256");
  TEST_SINGLE(strb<IndexType::POST>(VReg::v30, Reg::r29, 255), "str b30, [x29], #255");
  TEST_SINGLE(ldrb<IndexType::POST>(VReg::v30, Reg::r29, -256), "ldr b30, [x29], #-256");
  TEST_SINGLE(ldrb<IndexType::POST>(VReg::v30, Reg::r29, 255), "ldr b30, [x29], #255");

  TEST_SINGLE(ldrsb<IndexType::POST>(WReg::w30, Reg::r29, -256), "ldrsb w30, [x29], #-256");
  TEST_SINGLE(ldrsb<IndexType::POST>(WReg::w30, Reg::r29, 255), "ldrsb w30, [x29], #255");
  TEST_SINGLE(ldrsb<IndexType::POST>(XReg::x30, Reg::r29, -256), "ldrsb x30, [x29], #-256");
  TEST_SINGLE(ldrsb<IndexType::POST>(XReg::x30, Reg::r29, 255), "ldrsb x30, [x29], #255");

  TEST_SINGLE(strh<IndexType::POST>(Reg::r30, Reg::r29, -256), "strh w30, [x29], #-256");
  TEST_SINGLE(strh<IndexType::POST>(Reg::r30, Reg::r29, 255), "strh w30, [x29], #255");
  TEST_SINGLE(ldrh<IndexType::POST>(Reg::r30, Reg::r29, -256), "ldrh w30, [x29], #-256");
  TEST_SINGLE(ldrh<IndexType::POST>(Reg::r30, Reg::r29, 255), "ldrh w30, [x29], #255");

  TEST_SINGLE(strh<IndexType::POST>(VReg::v30, Reg::r29, -256), "str h30, [x29], #-256");
  TEST_SINGLE(strh<IndexType::POST>(VReg::v30, Reg::r29, 255), "str h30, [x29], #255");
  TEST_SINGLE(ldrh<IndexType::POST>(VReg::v30, Reg::r29, -256), "ldr h30, [x29], #-256");
  TEST_SINGLE(ldrh<IndexType::POST>(VReg::v30, Reg::r29, 255), "ldr h30, [x29], #255");

  TEST_SINGLE(ldrsh<IndexType::POST>(WReg::w30, Reg::r29, -256), "ldrsh w30, [x29], #-256");
  TEST_SINGLE(ldrsh<IndexType::POST>(WReg::w30, Reg::r29, 255), "ldrsh w30, [x29], #255");
  TEST_SINGLE(ldrsh<IndexType::POST>(XReg::x30, Reg::r29, -256), "ldrsh x30, [x29], #-256");
  TEST_SINGLE(ldrsh<IndexType::POST>(XReg::x30, Reg::r29, 255), "ldrsh x30, [x29], #255");

  TEST_SINGLE(str<IndexType::POST>(WReg::w30, Reg::r29, -256), "str w30, [x29], #-256");
  TEST_SINGLE(str<IndexType::POST>(WReg::w30, Reg::r29, 255), "str w30, [x29], #255");
  TEST_SINGLE(ldr<IndexType::POST>(WReg::w30, Reg::r29, -256), "ldr w30, [x29], #-256");
  TEST_SINGLE(ldr<IndexType::POST>(WReg::w30, Reg::r29, 255), "ldr w30, [x29], #255");

  TEST_SINGLE(str<IndexType::POST>(SReg::s30, Reg::r29, -256), "str s30, [x29], #-256");
  TEST_SINGLE(str<IndexType::POST>(SReg::s30, Reg::r29, 255), "str s30, [x29], #255");
  TEST_SINGLE(ldr<IndexType::POST>(SReg::s30, Reg::r29, -256), "ldr s30, [x29], #-256");
  TEST_SINGLE(ldr<IndexType::POST>(SReg::s30, Reg::r29, 255), "ldr s30, [x29], #255");

  TEST_SINGLE(ldrsw<IndexType::POST>(XReg::x30, Reg::r29, -256), "ldrsw x30, [x29], #-256");
  TEST_SINGLE(ldrsw<IndexType::POST>(XReg::x30, Reg::r29, 255), "ldrsw x30, [x29], #255");

  TEST_SINGLE(str<IndexType::POST>(XReg::x30, Reg::r29, -256), "str x30, [x29], #-256");
  TEST_SINGLE(str<IndexType::POST>(XReg::x30, Reg::r29, 255), "str x30, [x29], #255");
  TEST_SINGLE(ldr<IndexType::POST>(XReg::x30, Reg::r29, -256), "ldr x30, [x29], #-256");
  TEST_SINGLE(ldr<IndexType::POST>(XReg::x30, Reg::r29, 255), "ldr x30, [x29], #255");

  TEST_SINGLE(str<IndexType::POST>(DReg::d30, Reg::r29, -256), "str d30, [x29], #-256");
  TEST_SINGLE(str<IndexType::POST>(DReg::d30, Reg::r29, 255), "str d30, [x29], #255");
  TEST_SINGLE(ldr<IndexType::POST>(DReg::d30, Reg::r29, -256), "ldr d30, [x29], #-256");
  TEST_SINGLE(ldr<IndexType::POST>(DReg::d30, Reg::r29, 255), "ldr d30, [x29], #255");

  TEST_SINGLE(str<IndexType::POST>(QReg::q30, Reg::r29, -256), "str q30, [x29], #-256");
  TEST_SINGLE(str<IndexType::POST>(QReg::q30, Reg::r29, 255), "str q30, [x29], #255");
  TEST_SINGLE(ldr<IndexType::POST>(QReg::q30, Reg::r29, -256), "ldr q30, [x29], #-256");
  TEST_SINGLE(ldr<IndexType::POST>(QReg::q30, Reg::r29, 255), "ldr q30, [x29], #255");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register immediate pre-indexed") {
  TEST_SINGLE(strb<IndexType::PRE>(Reg::r30, Reg::r29, -256), "strb w30, [x29, #-256]!");
  TEST_SINGLE(strb<IndexType::PRE>(Reg::r30, Reg::r29, 255), "strb w30, [x29, #255]!");
  TEST_SINGLE(ldrb<IndexType::PRE>(Reg::r30, Reg::r29, -256), "ldrb w30, [x29, #-256]!");
  TEST_SINGLE(ldrb<IndexType::PRE>(Reg::r30, Reg::r29, 255), "ldrb w30, [x29, #255]!");

  TEST_SINGLE(strb<IndexType::PRE>(VReg::v30, Reg::r29, -256), "str b30, [x29, #-256]!");
  TEST_SINGLE(strb<IndexType::PRE>(VReg::v30, Reg::r29, 255), "str b30, [x29, #255]!");
  TEST_SINGLE(ldrb<IndexType::PRE>(VReg::v30, Reg::r29, -256), "ldr b30, [x29, #-256]!");
  TEST_SINGLE(ldrb<IndexType::PRE>(VReg::v30, Reg::r29, 255), "ldr b30, [x29, #255]!");

  TEST_SINGLE(ldrsb<IndexType::PRE>(WReg::w30, Reg::r29, -256), "ldrsb w30, [x29, #-256]!");
  TEST_SINGLE(ldrsb<IndexType::PRE>(WReg::w30, Reg::r29, 255), "ldrsb w30, [x29, #255]!");
  TEST_SINGLE(ldrsb<IndexType::PRE>(XReg::x30, Reg::r29, -256), "ldrsb x30, [x29, #-256]!");
  TEST_SINGLE(ldrsb<IndexType::PRE>(XReg::x30, Reg::r29, 255), "ldrsb x30, [x29, #255]!");

  TEST_SINGLE(strh<IndexType::PRE>(Reg::r30, Reg::r29, -256), "strh w30, [x29, #-256]!");
  TEST_SINGLE(strh<IndexType::PRE>(Reg::r30, Reg::r29, 255), "strh w30, [x29, #255]!");
  TEST_SINGLE(ldrh<IndexType::PRE>(Reg::r30, Reg::r29, -256), "ldrh w30, [x29, #-256]!");
  TEST_SINGLE(ldrh<IndexType::PRE>(Reg::r30, Reg::r29, 255), "ldrh w30, [x29, #255]!");

  TEST_SINGLE(strh<IndexType::PRE>(VReg::v30, Reg::r29, -256), "str h30, [x29, #-256]!");
  TEST_SINGLE(strh<IndexType::PRE>(VReg::v30, Reg::r29, 255), "str h30, [x29, #255]!");
  TEST_SINGLE(ldrh<IndexType::PRE>(VReg::v30, Reg::r29, -256), "ldr h30, [x29, #-256]!");
  TEST_SINGLE(ldrh<IndexType::PRE>(VReg::v30, Reg::r29, 255), "ldr h30, [x29, #255]!");

  TEST_SINGLE(ldrsh<IndexType::PRE>(WReg::w30, Reg::r29, -256), "ldrsh w30, [x29, #-256]!");
  TEST_SINGLE(ldrsh<IndexType::PRE>(WReg::w30, Reg::r29, 255), "ldrsh w30, [x29, #255]!");
  TEST_SINGLE(ldrsh<IndexType::PRE>(XReg::x30, Reg::r29, -256), "ldrsh x30, [x29, #-256]!");
  TEST_SINGLE(ldrsh<IndexType::PRE>(XReg::x30, Reg::r29, 255), "ldrsh x30, [x29, #255]!");

  TEST_SINGLE(str<IndexType::PRE>(WReg::w30, Reg::r29, -256), "str w30, [x29, #-256]!");
  TEST_SINGLE(str<IndexType::PRE>(WReg::w30, Reg::r29, 255), "str w30, [x29, #255]!");
  TEST_SINGLE(ldr<IndexType::PRE>(WReg::w30, Reg::r29, -256), "ldr w30, [x29, #-256]!");
  TEST_SINGLE(ldr<IndexType::PRE>(WReg::w30, Reg::r29, 255), "ldr w30, [x29, #255]!");

  TEST_SINGLE(str<IndexType::PRE>(SReg::s30, Reg::r29, -256), "str s30, [x29, #-256]!");
  TEST_SINGLE(str<IndexType::PRE>(SReg::s30, Reg::r29, 255), "str s30, [x29, #255]!");
  TEST_SINGLE(ldr<IndexType::PRE>(SReg::s30, Reg::r29, -256), "ldr s30, [x29, #-256]!");
  TEST_SINGLE(ldr<IndexType::PRE>(SReg::s30, Reg::r29, 255), "ldr s30, [x29, #255]!");

  TEST_SINGLE(ldrsw<IndexType::PRE>(XReg::x30, Reg::r29, -256), "ldrsw x30, [x29, #-256]!");
  TEST_SINGLE(ldrsw<IndexType::PRE>(XReg::x30, Reg::r29, 255), "ldrsw x30, [x29, #255]!");

  TEST_SINGLE(str<IndexType::PRE>(XReg::x30, Reg::r29, -256), "str x30, [x29, #-256]!");
  TEST_SINGLE(str<IndexType::PRE>(XReg::x30, Reg::r29, 255), "str x30, [x29, #255]!");
  TEST_SINGLE(ldr<IndexType::PRE>(XReg::x30, Reg::r29, -256), "ldr x30, [x29, #-256]!");
  TEST_SINGLE(ldr<IndexType::PRE>(XReg::x30, Reg::r29, 255), "ldr x30, [x29, #255]!");

  TEST_SINGLE(str<IndexType::PRE>(DReg::d30, Reg::r29, -256), "str d30, [x29, #-256]!");
  TEST_SINGLE(str<IndexType::PRE>(DReg::d30, Reg::r29, 255), "str d30, [x29, #255]!");
  TEST_SINGLE(ldr<IndexType::PRE>(DReg::d30, Reg::r29, -256), "ldr d30, [x29, #-256]!");
  TEST_SINGLE(ldr<IndexType::PRE>(DReg::d30, Reg::r29, 255), "ldr d30, [x29, #255]!");

  TEST_SINGLE(str<IndexType::PRE>(QReg::q30, Reg::r29, -256), "str q30, [x29, #-256]!");
  TEST_SINGLE(str<IndexType::PRE>(QReg::q30, Reg::r29, 255), "str q30, [x29, #255]!");
  TEST_SINGLE(ldr<IndexType::PRE>(QReg::q30, Reg::r29, -256), "ldr q30, [x29, #-256]!");
  TEST_SINGLE(ldr<IndexType::PRE>(QReg::q30, Reg::r29, 255), "ldr q30, [x29, #255]!");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register unprivileged") {
  if (false) {
    // vixl can't disassemble this class of instructions.
    TEST_SINGLE(sttrb(Reg::r30, Reg::r29, -256), "sttrb w30, [x29, #-256]");
    TEST_SINGLE(sttrb(Reg::r30, Reg::r29, 255), "sttrb w30, [x29, #255]");

    TEST_SINGLE(ldtrb(Reg::r30, Reg::r29, -256), "ldtrb w30, [x29, #-256]");
    TEST_SINGLE(ldtrb(Reg::r30, Reg::r29, 255), "ldtrb w30, [x29, #255]");

    TEST_SINGLE(ldtrsb(WReg::w30, Reg::r29, -256), "ldtrsb w30, [x29, #-256]");
    TEST_SINGLE(ldtrsb(WReg::w30, Reg::r29, 255), "ldtrsb w30, [x29, #255]");
    TEST_SINGLE(ldtrsb(XReg::x30, Reg::r29, -256), "ldtrsb x30, [x29, #-256]");
    TEST_SINGLE(ldtrsb(XReg::x30, Reg::r29, 255), "ldtrsb x30, [x29, #255]");

    TEST_SINGLE(sttrh(Reg::r30, Reg::r29, -256), "sttrh w30, [x29, #-256]");
    TEST_SINGLE(sttrh(Reg::r30, Reg::r29, 255), "sttrh w30, [x29, #255]");

    TEST_SINGLE(ldtrh(Reg::r30, Reg::r29, -256), "ldtrh w30, [x29, #-256]");
    TEST_SINGLE(ldtrh(Reg::r30, Reg::r29, 255), "ldtrh w30, [x29, #255]");

    TEST_SINGLE(ldtrsh(WReg::w30, Reg::r29, -256), "ldtrsh w30, [x29, #-256]");
    TEST_SINGLE(ldtrsh(WReg::w30, Reg::r29, 255), "ldtrsh w30, [x29, #255]");
    TEST_SINGLE(ldtrsh(XReg::x30, Reg::r29, -256), "ldtrsh x30, [x29, #-256]");
    TEST_SINGLE(ldtrsh(XReg::x30, Reg::r29, 255), "ldtrsh x30, [x29, #255]");

    TEST_SINGLE(sttr(WReg::w30, Reg::r29, -256), "sttr w30, [x29, #-256]");
    TEST_SINGLE(sttr(WReg::w30, Reg::r29, 255), "sttr w30, [x29, #255]");

    TEST_SINGLE(ldtr(WReg::w30, Reg::r29, -256), "ldtr w30, [x29, #-256]");
    TEST_SINGLE(ldtr(WReg::w30, Reg::r29, 255), "ldtr w30, [x29, #255]");

    TEST_SINGLE(ldtrsw(XReg::x30, Reg::r29, -256), "ldtrsw x30, [x29, #-256]");
    TEST_SINGLE(ldtrsw(XReg::x30, Reg::r29, 255), "ldtrsw x30, [x29, #255]");

    TEST_SINGLE(sttr(XReg::x30, Reg::r29, -256), "sttr x30, [x29, #-256]");
    TEST_SINGLE(sttr(XReg::x30, Reg::r29, 255), "sttr x30, [x29, #255]");

    TEST_SINGLE(ldtr(XReg::x30, Reg::r29, -256), "ldtr x30, [x29, #-256]");
    TEST_SINGLE(ldtr(XReg::x30, Reg::r29, 255), "ldtr x30, [x29, #255]");
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Atomic memory operations") {
  TEST_SINGLE(stadd(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddb w30, [x29]");
  TEST_SINGLE(stadd(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddh w30, [x29]");
  TEST_SINGLE(stadd(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stadd w30, [x29]");
  TEST_SINGLE(stadd(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stadd x30, [x29]");

  TEST_SINGLE(staddl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddlb w30, [x29]");
  TEST_SINGLE(staddl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddlh w30, [x29]");
  TEST_SINGLE(staddl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "staddl w30, [x29]");
  TEST_SINGLE(staddl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "staddl x30, [x29]");

  TEST_SINGLE(stadda(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddab w30, [x29]");
  TEST_SINGLE(stadda(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddah w30, [x29]");
  TEST_SINGLE(stadda(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stadda w30, [x29]");
  TEST_SINGLE(stadda(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stadda x30, [x29]");

  TEST_SINGLE(staddal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "staddalb w30, [x29]");
  TEST_SINGLE(staddal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "staddalh w30, [x29]");
  TEST_SINGLE(staddal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "staddal w30, [x29]");
  TEST_SINGLE(staddal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "staddal x30, [x29]");

  TEST_SINGLE(stclr(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrb w30, [x29]");
  TEST_SINGLE(stclr(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrh w30, [x29]");
  TEST_SINGLE(stclr(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclr w30, [x29]");
  TEST_SINGLE(stclr(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclr x30, [x29]");

  TEST_SINGLE(stclrl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrlb w30, [x29]");
  TEST_SINGLE(stclrl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrlh w30, [x29]");
  TEST_SINGLE(stclrl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclrl w30, [x29]");
  TEST_SINGLE(stclrl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclrl x30, [x29]");

  TEST_SINGLE(stclra(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclrab w30, [x29]");
  TEST_SINGLE(stclra(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclrah w30, [x29]");
  TEST_SINGLE(stclra(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclra w30, [x29]");
  TEST_SINGLE(stclra(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclra x30, [x29]");

  TEST_SINGLE(stclral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stclralb w30, [x29]");
  TEST_SINGLE(stclral(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stclralh w30, [x29]");
  TEST_SINGLE(stclral(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stclral w30, [x29]");
  TEST_SINGLE(stclral(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stclral x30, [x29]");

  TEST_SINGLE(stset(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetb w30, [x29]");
  TEST_SINGLE(stset(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stseth w30, [x29]");
  TEST_SINGLE(stset(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stset w30, [x29]");
  TEST_SINGLE(stset(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stset x30, [x29]");

  TEST_SINGLE(stsetl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetlb w30, [x29]");
  TEST_SINGLE(stsetl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetlh w30, [x29]");
  TEST_SINGLE(stsetl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsetl w30, [x29]");
  TEST_SINGLE(stsetl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsetl x30, [x29]");

  TEST_SINGLE(stseta(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetab w30, [x29]");
  TEST_SINGLE(stseta(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetah w30, [x29]");
  TEST_SINGLE(stseta(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stseta w30, [x29]");
  TEST_SINGLE(stseta(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stseta x30, [x29]");

  TEST_SINGLE(stsetal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsetalb w30, [x29]");
  TEST_SINGLE(stsetal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsetalh w30, [x29]");
  TEST_SINGLE(stsetal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsetal w30, [x29]");
  TEST_SINGLE(stsetal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsetal x30, [x29]");

  TEST_SINGLE(steor(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorb w30, [x29]");
  TEST_SINGLE(steor(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorh w30, [x29]");
  TEST_SINGLE(steor(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steor w30, [x29]");
  TEST_SINGLE(steor(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steor x30, [x29]");

  TEST_SINGLE(steorl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorlb w30, [x29]");
  TEST_SINGLE(steorl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorlh w30, [x29]");
  TEST_SINGLE(steorl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steorl w30, [x29]");
  TEST_SINGLE(steorl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steorl x30, [x29]");

  TEST_SINGLE(steora(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steorab w30, [x29]");
  TEST_SINGLE(steora(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steorah w30, [x29]");
  TEST_SINGLE(steora(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steora w30, [x29]");
  TEST_SINGLE(steora(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steora x30, [x29]");

  TEST_SINGLE(steoral(SubRegSize::i8Bit, Reg::r30, Reg::r29), "steoralb w30, [x29]");
  TEST_SINGLE(steoral(SubRegSize::i16Bit, Reg::r30, Reg::r29), "steoralh w30, [x29]");
  TEST_SINGLE(steoral(SubRegSize::i32Bit, Reg::r30, Reg::r29), "steoral w30, [x29]");
  TEST_SINGLE(steoral(SubRegSize::i64Bit, Reg::r30, Reg::r29), "steoral x30, [x29]");

  TEST_SINGLE(stsmax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxb w30, [x29]");
  TEST_SINGLE(stsmax(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxh w30, [x29]");
  TEST_SINGLE(stsmax(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmax w30, [x29]");
  TEST_SINGLE(stsmax(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmax x30, [x29]");

  TEST_SINGLE(stsmaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxlb w30, [x29]");
  TEST_SINGLE(stsmaxl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxlh w30, [x29]");
  TEST_SINGLE(stsmaxl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxl w30, [x29]");
  TEST_SINGLE(stsmaxl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxl x30, [x29]");

  TEST_SINGLE(stsmaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxab w30, [x29]");
  TEST_SINGLE(stsmaxa(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxah w30, [x29]");
  TEST_SINGLE(stsmaxa(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxa w30, [x29]");
  TEST_SINGLE(stsmaxa(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxa x30, [x29]");

  TEST_SINGLE(stsmaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsmaxalb w30, [x29]");
  TEST_SINGLE(stsmaxal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsmaxalh w30, [x29]");
  TEST_SINGLE(stsmaxal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmaxal w30, [x29]");
  TEST_SINGLE(stsmaxal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmaxal x30, [x29]");

  TEST_SINGLE(stsmin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminb w30, [x29]");
  TEST_SINGLE(stsmin(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminh w30, [x29]");
  TEST_SINGLE(stsmin(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmin w30, [x29]");
  TEST_SINGLE(stsmin(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmin x30, [x29]");

  TEST_SINGLE(stsminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminlb w30, [x29]");
  TEST_SINGLE(stsminl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminlh w30, [x29]");
  TEST_SINGLE(stsminl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsminl w30, [x29]");
  TEST_SINGLE(stsminl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsminl x30, [x29]");

  TEST_SINGLE(stsmina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminab w30, [x29]");
  TEST_SINGLE(stsmina(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminah w30, [x29]");
  TEST_SINGLE(stsmina(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsmina w30, [x29]");
  TEST_SINGLE(stsmina(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsmina x30, [x29]");

  TEST_SINGLE(stsminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stsminalb w30, [x29]");
  TEST_SINGLE(stsminal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stsminalh w30, [x29]");
  TEST_SINGLE(stsminal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stsminal w30, [x29]");
  TEST_SINGLE(stsminal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stsminal x30, [x29]");

  TEST_SINGLE(stumax(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxb w30, [x29]");
  TEST_SINGLE(stumax(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxh w30, [x29]");
  TEST_SINGLE(stumax(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumax w30, [x29]");
  TEST_SINGLE(stumax(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumax x30, [x29]");

  TEST_SINGLE(stumaxl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxlb w30, [x29]");
  TEST_SINGLE(stumaxl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxlh w30, [x29]");
  TEST_SINGLE(stumaxl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxl w30, [x29]");
  TEST_SINGLE(stumaxl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxl x30, [x29]");

  TEST_SINGLE(stumaxa(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxab w30, [x29]");
  TEST_SINGLE(stumaxa(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxah w30, [x29]");
  TEST_SINGLE(stumaxa(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxa w30, [x29]");
  TEST_SINGLE(stumaxa(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxa x30, [x29]");

  TEST_SINGLE(stumaxal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stumaxalb w30, [x29]");
  TEST_SINGLE(stumaxal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stumaxalh w30, [x29]");
  TEST_SINGLE(stumaxal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumaxal w30, [x29]");
  TEST_SINGLE(stumaxal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumaxal x30, [x29]");

  TEST_SINGLE(stumin(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminb w30, [x29]");
  TEST_SINGLE(stumin(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminh w30, [x29]");
  TEST_SINGLE(stumin(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumin w30, [x29]");
  TEST_SINGLE(stumin(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumin x30, [x29]");

  TEST_SINGLE(stuminl(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminlb w30, [x29]");
  TEST_SINGLE(stuminl(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminlh w30, [x29]");
  TEST_SINGLE(stuminl(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stuminl w30, [x29]");
  TEST_SINGLE(stuminl(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stuminl x30, [x29]");

  TEST_SINGLE(stumina(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminab w30, [x29]");
  TEST_SINGLE(stumina(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminah w30, [x29]");
  TEST_SINGLE(stumina(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stumina w30, [x29]");
  TEST_SINGLE(stumina(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stumina x30, [x29]");

  TEST_SINGLE(stuminal(SubRegSize::i8Bit, Reg::r30, Reg::r29), "stuminalb w30, [x29]");
  TEST_SINGLE(stuminal(SubRegSize::i16Bit, Reg::r30, Reg::r29), "stuminalh w30, [x29]");
  TEST_SINGLE(stuminal(SubRegSize::i32Bit, Reg::r30, Reg::r29), "stuminal w30, [x29]");
  TEST_SINGLE(stuminal(SubRegSize::i64Bit, Reg::r30, Reg::r29), "stuminal x30, [x29]");

  TEST_SINGLE(ldswp(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "swpb w30, w28, [x29]");
  TEST_SINGLE(ldswp(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "swph w30, w28, [x29]");
  TEST_SINGLE(ldswp(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "swp w30, w28, [x29]");
  TEST_SINGLE(ldswp(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "swp x30, x28, [x29]");

  TEST_SINGLE(ldswpl(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "swplb w30, w28, [x29]");
  TEST_SINGLE(ldswpl(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "swplh w30, w28, [x29]");
  TEST_SINGLE(ldswpl(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "swpl w30, w28, [x29]");
  TEST_SINGLE(ldswpl(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "swpl x30, x28, [x29]");

  TEST_SINGLE(ldswpa(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "swpab w30, w28, [x29]");
  TEST_SINGLE(ldswpa(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "swpah w30, w28, [x29]");
  TEST_SINGLE(ldswpa(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "swpa w30, w28, [x29]");
  TEST_SINGLE(ldswpa(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "swpa x30, x28, [x29]");

  TEST_SINGLE(ldswpal(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "swpalb w30, w28, [x29]");
  TEST_SINGLE(ldswpal(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "swpalh w30, w28, [x29]");
  TEST_SINGLE(ldswpal(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "swpal w30, w28, [x29]");
  TEST_SINGLE(ldswpal(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "swpal x30, x28, [x29]");

  TEST_SINGLE(ldadd(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddb w30, w28, [x29]");
  TEST_SINGLE(ldadd(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddh w30, w28, [x29]");
  TEST_SINGLE(ldadd(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldadd w30, w28, [x29]");
  TEST_SINGLE(ldadd(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldadd x30, x28, [x29]");

  TEST_SINGLE(ldaddl(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddlb w30, w28, [x29]");
  TEST_SINGLE(ldaddl(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddlh w30, w28, [x29]");
  TEST_SINGLE(ldaddl(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddl w30, w28, [x29]");
  TEST_SINGLE(ldaddl(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddl x30, x28, [x29]");

  TEST_SINGLE(ldadda(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddab w30, w28, [x29]");
  TEST_SINGLE(ldadda(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddah w30, w28, [x29]");
  TEST_SINGLE(ldadda(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldadda w30, w28, [x29]");
  TEST_SINGLE(ldadda(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldadda x30, x28, [x29]");

  TEST_SINGLE(ldaddal(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddalb w30, w28, [x29]");
  TEST_SINGLE(ldaddal(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddalh w30, w28, [x29]");
  TEST_SINGLE(ldaddal(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddal w30, w28, [x29]");
  TEST_SINGLE(ldaddal(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldaddal x30, x28, [x29]");

  TEST_SINGLE(ldclr(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrb w30, w28, [x29]");
  TEST_SINGLE(ldclr(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrh w30, w28, [x29]");
  TEST_SINGLE(ldclr(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldclr w30, w28, [x29]");
  TEST_SINGLE(ldclr(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldclr x30, x28, [x29]");

  TEST_SINGLE(ldclrl(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrlb w30, w28, [x29]");
  TEST_SINGLE(ldclrl(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrlh w30, w28, [x29]");
  TEST_SINGLE(ldclrl(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrl w30, w28, [x29]");
  TEST_SINGLE(ldclrl(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrl x30, x28, [x29]");

  TEST_SINGLE(ldclra(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrab w30, w28, [x29]");
  TEST_SINGLE(ldclra(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldclrah w30, w28, [x29]");
  TEST_SINGLE(ldclra(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldclra w30, w28, [x29]");
  TEST_SINGLE(ldclra(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldclra x30, x28, [x29]");

  TEST_SINGLE(ldclral(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldclralb w30, w28, [x29]");
  TEST_SINGLE(ldclral(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldclralh w30, w28, [x29]");
  TEST_SINGLE(ldclral(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldclral w30, w28, [x29]");
  TEST_SINGLE(ldclral(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldclral x30, x28, [x29]");

  TEST_SINGLE(ldset(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetb w30, w28, [x29]");
  TEST_SINGLE(ldset(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldseth w30, w28, [x29]");
  TEST_SINGLE(ldset(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldset w30, w28, [x29]");
  TEST_SINGLE(ldset(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldset x30, x28, [x29]");

  TEST_SINGLE(ldsetl(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetlb w30, w28, [x29]");
  TEST_SINGLE(ldsetl(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetlh w30, w28, [x29]");
  TEST_SINGLE(ldsetl(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetl w30, w28, [x29]");
  TEST_SINGLE(ldsetl(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetl x30, x28, [x29]");

  TEST_SINGLE(ldseta(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetab w30, w28, [x29]");
  TEST_SINGLE(ldseta(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetah w30, w28, [x29]");
  TEST_SINGLE(ldseta(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldseta w30, w28, [x29]");
  TEST_SINGLE(ldseta(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldseta x30, x28, [x29]");

  TEST_SINGLE(ldsetal(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetalb w30, w28, [x29]");
  TEST_SINGLE(ldsetal(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetalh w30, w28, [x29]");
  TEST_SINGLE(ldsetal(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetal w30, w28, [x29]");
  TEST_SINGLE(ldsetal(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldsetal x30, x28, [x29]");

  TEST_SINGLE(ldeor(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorb w30, w28, [x29]");
  TEST_SINGLE(ldeor(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorh w30, w28, [x29]");
  TEST_SINGLE(ldeor(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldeor w30, w28, [x29]");
  TEST_SINGLE(ldeor(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldeor x30, x28, [x29]");

  TEST_SINGLE(ldeorl(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorlb w30, w28, [x29]");
  TEST_SINGLE(ldeorl(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorlh w30, w28, [x29]");
  TEST_SINGLE(ldeorl(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorl w30, w28, [x29]");
  TEST_SINGLE(ldeorl(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorl x30, x28, [x29]");

  TEST_SINGLE(ldeora(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorab w30, w28, [x29]");
  TEST_SINGLE(ldeora(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldeorah w30, w28, [x29]");
  TEST_SINGLE(ldeora(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldeora w30, w28, [x29]");
  TEST_SINGLE(ldeora(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldeora x30, x28, [x29]");

  TEST_SINGLE(ldeoral(SubRegSize::i8Bit, Reg::r30, Reg::r28, Reg::r29), "ldeoralb w30, w28, [x29]");
  TEST_SINGLE(ldeoral(SubRegSize::i16Bit, Reg::r30, Reg::r28, Reg::r29), "ldeoralh w30, w28, [x29]");
  TEST_SINGLE(ldeoral(SubRegSize::i32Bit, Reg::r30, Reg::r28, Reg::r29), "ldeoral w30, w28, [x29]");
  TEST_SINGLE(ldeoral(SubRegSize::i64Bit, Reg::r30, Reg::r28, Reg::r29), "ldeoral x30, x28, [x29]");

  TEST_SINGLE(ldaddb(Reg::r30, Reg::r28, Reg::r29), "ldaddb w30, w28, [x29]");
  TEST_SINGLE(ldclrb(Reg::r30, Reg::r28, Reg::r29), "ldclrb w30, w28, [x29]");
  TEST_SINGLE(ldeorb(Reg::r30, Reg::r28, Reg::r29), "ldeorb w30, w28, [x29]");
  TEST_SINGLE(ldsetb(Reg::r30, Reg::r28, Reg::r29), "ldsetb w30, w28, [x29]");
  TEST_SINGLE(ldsmaxb(Reg::r30, Reg::r28, Reg::r29), "ldsmaxb w30, w28, [x29]");
  TEST_SINGLE(ldsminb(Reg::r30, Reg::r28, Reg::r29), "ldsminb w30, w28, [x29]");
  TEST_SINGLE(ldumaxb(Reg::r30, Reg::r28, Reg::r29), "ldumaxb w30, w28, [x29]");
  TEST_SINGLE(lduminb(Reg::r30, Reg::r28, Reg::r29), "lduminb w30, w28, [x29]");
  TEST_SINGLE(ldswpb(Reg::r30, Reg::r28, Reg::r29), "swpb w30, w28, [x29]");
  TEST_SINGLE(ldaddlb(Reg::r30, Reg::r28, Reg::r29), "ldaddlb w30, w28, [x29]");
  TEST_SINGLE(ldclrlb(Reg::r30, Reg::r28, Reg::r29), "ldclrlb w30, w28, [x29]");
  TEST_SINGLE(ldeorlb(Reg::r30, Reg::r28, Reg::r29), "ldeorlb w30, w28, [x29]");
  TEST_SINGLE(ldsetlb(Reg::r30, Reg::r28, Reg::r29), "ldsetlb w30, w28, [x29]");
  TEST_SINGLE(ldsmaxlb(Reg::r30, Reg::r28, Reg::r29), "ldsmaxlb w30, w28, [x29]");
  TEST_SINGLE(ldsminlb(Reg::r30, Reg::r28, Reg::r29), "ldsminlb w30, w28, [x29]");
  TEST_SINGLE(ldumaxlb(Reg::r30, Reg::r28, Reg::r29), "ldumaxlb w30, w28, [x29]");
  TEST_SINGLE(lduminlb(Reg::r30, Reg::r28, Reg::r29), "lduminlb w30, w28, [x29]");
  TEST_SINGLE(ldswplb(Reg::r30, Reg::r28, Reg::r29), "swplb w30, w28, [x29]");
  TEST_SINGLE(ldaddab(Reg::r30, Reg::r28, Reg::r29), "ldaddab w30, w28, [x29]");
  TEST_SINGLE(ldclrab(Reg::r30, Reg::r28, Reg::r29), "ldclrab w30, w28, [x29]");
  TEST_SINGLE(ldeorab(Reg::r30, Reg::r28, Reg::r29), "ldeorab w30, w28, [x29]");
  TEST_SINGLE(ldsetab(Reg::r30, Reg::r28, Reg::r29), "ldsetab w30, w28, [x29]");
  TEST_SINGLE(ldsmaxab(Reg::r30, Reg::r28, Reg::r29), "ldsmaxab w30, w28, [x29]");
  TEST_SINGLE(ldsminab(Reg::r30, Reg::r28, Reg::r29), "ldsminab w30, w28, [x29]");
  TEST_SINGLE(ldumaxab(Reg::r30, Reg::r28, Reg::r29), "ldumaxab w30, w28, [x29]");
  TEST_SINGLE(lduminab(Reg::r30, Reg::r28, Reg::r29), "lduminab w30, w28, [x29]");
  TEST_SINGLE(ldswpab(Reg::r30, Reg::r28, Reg::r29), "swpab w30, w28, [x29]");
  TEST_SINGLE(ldaddalb(Reg::r30, Reg::r28, Reg::r29), "ldaddalb w30, w28, [x29]");
  TEST_SINGLE(ldclralb(Reg::r30, Reg::r28, Reg::r29), "ldclralb w30, w28, [x29]");
  TEST_SINGLE(ldeoralb(Reg::r30, Reg::r28, Reg::r29), "ldeoralb w30, w28, [x29]");
  TEST_SINGLE(ldsetalb(Reg::r30, Reg::r28, Reg::r29), "ldsetalb w30, w28, [x29]");
  TEST_SINGLE(ldsmaxalb(Reg::r30, Reg::r28, Reg::r29), "ldsmaxalb w30, w28, [x29]");
  TEST_SINGLE(ldsminalb(Reg::r30, Reg::r28, Reg::r29), "ldsminalb w30, w28, [x29]");
  TEST_SINGLE(ldumaxalb(Reg::r30, Reg::r28, Reg::r29), "ldumaxalb w30, w28, [x29]");
  TEST_SINGLE(lduminalb(Reg::r30, Reg::r28, Reg::r29), "lduminalb w30, w28, [x29]");
  TEST_SINGLE(ldswpalb(Reg::r30, Reg::r28, Reg::r29), "swpalb w30, w28, [x29]");

  TEST_SINGLE(ldaddh(Reg::r30, Reg::r28, Reg::r29), "ldaddh w30, w28, [x29]");
  TEST_SINGLE(ldclrh(Reg::r30, Reg::r28, Reg::r29), "ldclrh w30, w28, [x29]");
  TEST_SINGLE(ldeorh(Reg::r30, Reg::r28, Reg::r29), "ldeorh w30, w28, [x29]");
  TEST_SINGLE(ldseth(Reg::r30, Reg::r28, Reg::r29), "ldseth w30, w28, [x29]");
  TEST_SINGLE(ldsmaxh(Reg::r30, Reg::r28, Reg::r29), "ldsmaxh w30, w28, [x29]");
  TEST_SINGLE(ldsminh(Reg::r30, Reg::r28, Reg::r29), "ldsminh w30, w28, [x29]");
  TEST_SINGLE(ldumaxh(Reg::r30, Reg::r28, Reg::r29), "ldumaxh w30, w28, [x29]");
  TEST_SINGLE(lduminh(Reg::r30, Reg::r28, Reg::r29), "lduminh w30, w28, [x29]");
  TEST_SINGLE(ldswph(Reg::r30, Reg::r28, Reg::r29), "swph w30, w28, [x29]");
  TEST_SINGLE(ldaddlh(Reg::r30, Reg::r28, Reg::r29), "ldaddlh w30, w28, [x29]");
  TEST_SINGLE(ldclrlh(Reg::r30, Reg::r28, Reg::r29), "ldclrlh w30, w28, [x29]");
  TEST_SINGLE(ldeorlh(Reg::r30, Reg::r28, Reg::r29), "ldeorlh w30, w28, [x29]");
  TEST_SINGLE(ldsetlh(Reg::r30, Reg::r28, Reg::r29), "ldsetlh w30, w28, [x29]");
  TEST_SINGLE(ldsmaxlh(Reg::r30, Reg::r28, Reg::r29), "ldsmaxlh w30, w28, [x29]");
  TEST_SINGLE(ldsminlh(Reg::r30, Reg::r28, Reg::r29), "ldsminlh w30, w28, [x29]");
  TEST_SINGLE(ldumaxlh(Reg::r30, Reg::r28, Reg::r29), "ldumaxlh w30, w28, [x29]");
  TEST_SINGLE(lduminlh(Reg::r30, Reg::r28, Reg::r29), "lduminlh w30, w28, [x29]");
  TEST_SINGLE(ldswplh(Reg::r30, Reg::r28, Reg::r29), "swplh w30, w28, [x29]");
  TEST_SINGLE(ldaddah(Reg::r30, Reg::r28, Reg::r29), "ldaddah w30, w28, [x29]");
  TEST_SINGLE(ldclrah(Reg::r30, Reg::r28, Reg::r29), "ldclrah w30, w28, [x29]");
  TEST_SINGLE(ldeorah(Reg::r30, Reg::r28, Reg::r29), "ldeorah w30, w28, [x29]");
  TEST_SINGLE(ldsetah(Reg::r30, Reg::r28, Reg::r29), "ldsetah w30, w28, [x29]");
  TEST_SINGLE(ldsmaxah(Reg::r30, Reg::r28, Reg::r29), "ldsmaxah w30, w28, [x29]");
  TEST_SINGLE(ldsminah(Reg::r30, Reg::r28, Reg::r29), "ldsminah w30, w28, [x29]");
  TEST_SINGLE(ldumaxah(Reg::r30, Reg::r28, Reg::r29), "ldumaxah w30, w28, [x29]");
  TEST_SINGLE(lduminah(Reg::r30, Reg::r28, Reg::r29), "lduminah w30, w28, [x29]");
  TEST_SINGLE(ldswpah(Reg::r30, Reg::r28, Reg::r29), "swpah w30, w28, [x29]");
  TEST_SINGLE(ldaddalh(Reg::r30, Reg::r28, Reg::r29), "ldaddalh w30, w28, [x29]");
  TEST_SINGLE(ldclralh(Reg::r30, Reg::r28, Reg::r29), "ldclralh w30, w28, [x29]");
  TEST_SINGLE(ldeoralh(Reg::r30, Reg::r28, Reg::r29), "ldeoralh w30, w28, [x29]");
  TEST_SINGLE(ldsetalh(Reg::r30, Reg::r28, Reg::r29), "ldsetalh w30, w28, [x29]");
  TEST_SINGLE(ldsmaxalh(Reg::r30, Reg::r28, Reg::r29), "ldsmaxalh w30, w28, [x29]");
  TEST_SINGLE(ldsminalh(Reg::r30, Reg::r28, Reg::r29), "ldsminalh w30, w28, [x29]");
  TEST_SINGLE(ldumaxalh(Reg::r30, Reg::r28, Reg::r29), "ldumaxalh w30, w28, [x29]");
  TEST_SINGLE(lduminalh(Reg::r30, Reg::r28, Reg::r29), "lduminalh w30, w28, [x29]");
  TEST_SINGLE(ldswpalh(Reg::r30, Reg::r28, Reg::r29), "swpalh w30, w28, [x29]");

  TEST_SINGLE(ldadd(WReg::w30, WReg::w28, Reg::r29), "ldadd w30, w28, [x29]");
  TEST_SINGLE(ldclr(WReg::w30, WReg::w28, Reg::r29), "ldclr w30, w28, [x29]");
  TEST_SINGLE(ldeor(WReg::w30, WReg::w28, Reg::r29), "ldeor w30, w28, [x29]");
  TEST_SINGLE(ldset(WReg::w30, WReg::w28, Reg::r29), "ldset w30, w28, [x29]");
  TEST_SINGLE(ldsmax(WReg::w30, WReg::w28, Reg::r29), "ldsmax w30, w28, [x29]");
  TEST_SINGLE(ldsmin(WReg::w30, WReg::w28, Reg::r29), "ldsmin w30, w28, [x29]");
  TEST_SINGLE(ldumax(WReg::w30, WReg::w28, Reg::r29), "ldumax w30, w28, [x29]");
  TEST_SINGLE(ldumin(WReg::w30, WReg::w28, Reg::r29), "ldumin w30, w28, [x29]");
  TEST_SINGLE(ldswp(WReg::w30, WReg::w28, Reg::r29), "swp w30, w28, [x29]");
  TEST_SINGLE(ldaddl(WReg::w30, WReg::w28, Reg::r29), "ldaddl w30, w28, [x29]");
  TEST_SINGLE(ldclrl(WReg::w30, WReg::w28, Reg::r29), "ldclrl w30, w28, [x29]");
  TEST_SINGLE(ldeorl(WReg::w30, WReg::w28, Reg::r29), "ldeorl w30, w28, [x29]");
  TEST_SINGLE(ldsetl(WReg::w30, WReg::w28, Reg::r29), "ldsetl w30, w28, [x29]");
  TEST_SINGLE(ldsmaxl(WReg::w30, WReg::w28, Reg::r29), "ldsmaxl w30, w28, [x29]");
  TEST_SINGLE(ldsminl(WReg::w30, WReg::w28, Reg::r29), "ldsminl w30, w28, [x29]");
  TEST_SINGLE(ldumaxl(WReg::w30, WReg::w28, Reg::r29), "ldumaxl w30, w28, [x29]");
  TEST_SINGLE(lduminl(WReg::w30, WReg::w28, Reg::r29), "lduminl w30, w28, [x29]");
  TEST_SINGLE(ldswpl(WReg::w30, WReg::w28, Reg::r29), "swpl w30, w28, [x29]");
  TEST_SINGLE(ldadda(WReg::w30, WReg::w28, Reg::r29), "ldadda w30, w28, [x29]");
  TEST_SINGLE(ldclra(WReg::w30, WReg::w28, Reg::r29), "ldclra w30, w28, [x29]");
  TEST_SINGLE(ldeora(WReg::w30, WReg::w28, Reg::r29), "ldeora w30, w28, [x29]");
  TEST_SINGLE(ldseta(WReg::w30, WReg::w28, Reg::r29), "ldseta w30, w28, [x29]");
  TEST_SINGLE(ldsmaxa(WReg::w30, WReg::w28, Reg::r29), "ldsmaxa w30, w28, [x29]");
  TEST_SINGLE(ldsmina(WReg::w30, WReg::w28, Reg::r29), "ldsmina w30, w28, [x29]");
  TEST_SINGLE(ldumaxa(WReg::w30, WReg::w28, Reg::r29), "ldumaxa w30, w28, [x29]");
  TEST_SINGLE(ldumina(WReg::w30, WReg::w28, Reg::r29), "ldumina w30, w28, [x29]");
  TEST_SINGLE(ldswpa(WReg::w30, WReg::w28, Reg::r29), "swpa w30, w28, [x29]");
  TEST_SINGLE(ldaddal(WReg::w30, WReg::w28, Reg::r29), "ldaddal w30, w28, [x29]");
  TEST_SINGLE(ldclral(WReg::w30, WReg::w28, Reg::r29), "ldclral w30, w28, [x29]");
  TEST_SINGLE(ldeoral(WReg::w30, WReg::w28, Reg::r29), "ldeoral w30, w28, [x29]");
  TEST_SINGLE(ldsetal(WReg::w30, WReg::w28, Reg::r29), "ldsetal w30, w28, [x29]");
  TEST_SINGLE(ldsmaxal(WReg::w30, WReg::w28, Reg::r29), "ldsmaxal w30, w28, [x29]");
  TEST_SINGLE(ldsminal(WReg::w30, WReg::w28, Reg::r29), "ldsminal w30, w28, [x29]");
  TEST_SINGLE(ldumaxal(WReg::w30, WReg::w28, Reg::r29), "ldumaxal w30, w28, [x29]");
  TEST_SINGLE(lduminal(WReg::w30, WReg::w28, Reg::r29), "lduminal w30, w28, [x29]");
  TEST_SINGLE(ldswpal(WReg::w30, WReg::w28, Reg::r29), "swpal w30, w28, [x29]");

  TEST_SINGLE(ldadd(XReg::x30, XReg::x28, Reg::r29), "ldadd x30, x28, [x29]");
  TEST_SINGLE(ldclr(XReg::x30, XReg::x28, Reg::r29), "ldclr x30, x28, [x29]");
  TEST_SINGLE(ldeor(XReg::x30, XReg::x28, Reg::r29), "ldeor x30, x28, [x29]");
  TEST_SINGLE(ldset(XReg::x30, XReg::x28, Reg::r29), "ldset x30, x28, [x29]");
  TEST_SINGLE(ldsmax(XReg::x30, XReg::x28, Reg::r29), "ldsmax x30, x28, [x29]");
  TEST_SINGLE(ldsmin(XReg::x30, XReg::x28, Reg::r29), "ldsmin x30, x28, [x29]");
  TEST_SINGLE(ldumax(XReg::x30, XReg::x28, Reg::r29), "ldumax x30, x28, [x29]");
  TEST_SINGLE(ldumin(XReg::x30, XReg::x28, Reg::r29), "ldumin x30, x28, [x29]");
  TEST_SINGLE(ldswp(XReg::x30, XReg::x28, Reg::r29), "swp x30, x28, [x29]");
  TEST_SINGLE(ldaddl(XReg::x30, XReg::x28, Reg::r29), "ldaddl x30, x28, [x29]");
  TEST_SINGLE(ldclrl(XReg::x30, XReg::x28, Reg::r29), "ldclrl x30, x28, [x29]");
  TEST_SINGLE(ldeorl(XReg::x30, XReg::x28, Reg::r29), "ldeorl x30, x28, [x29]");
  TEST_SINGLE(ldsetl(XReg::x30, XReg::x28, Reg::r29), "ldsetl x30, x28, [x29]");
  TEST_SINGLE(ldsmaxl(XReg::x30, XReg::x28, Reg::r29), "ldsmaxl x30, x28, [x29]");
  TEST_SINGLE(ldsminl(XReg::x30, XReg::x28, Reg::r29), "ldsminl x30, x28, [x29]");
  TEST_SINGLE(ldumaxl(XReg::x30, XReg::x28, Reg::r29), "ldumaxl x30, x28, [x29]");
  TEST_SINGLE(lduminl(XReg::x30, XReg::x28, Reg::r29), "lduminl x30, x28, [x29]");
  TEST_SINGLE(ldswpl(XReg::x30, XReg::x28, Reg::r29), "swpl x30, x28, [x29]");
  TEST_SINGLE(ldadda(XReg::x30, XReg::x28, Reg::r29), "ldadda x30, x28, [x29]");
  TEST_SINGLE(ldclra(XReg::x30, XReg::x28, Reg::r29), "ldclra x30, x28, [x29]");
  TEST_SINGLE(ldeora(XReg::x30, XReg::x28, Reg::r29), "ldeora x30, x28, [x29]");
  TEST_SINGLE(ldseta(XReg::x30, XReg::x28, Reg::r29), "ldseta x30, x28, [x29]");
  TEST_SINGLE(ldsmaxa(XReg::x30, XReg::x28, Reg::r29), "ldsmaxa x30, x28, [x29]");
  TEST_SINGLE(ldsmina(XReg::x30, XReg::x28, Reg::r29), "ldsmina x30, x28, [x29]");
  TEST_SINGLE(ldumaxa(XReg::x30, XReg::x28, Reg::r29), "ldumaxa x30, x28, [x29]");
  TEST_SINGLE(ldumina(XReg::x30, XReg::x28, Reg::r29), "ldumina x30, x28, [x29]");
  TEST_SINGLE(ldswpa(XReg::x30, XReg::x28, Reg::r29), "swpa x30, x28, [x29]");
  TEST_SINGLE(ldaddal(XReg::x30, XReg::x28, Reg::r29), "ldaddal x30, x28, [x29]");
  TEST_SINGLE(ldclral(XReg::x30, XReg::x28, Reg::r29), "ldclral x30, x28, [x29]");
  TEST_SINGLE(ldeoral(XReg::x30, XReg::x28, Reg::r29), "ldeoral x30, x28, [x29]");
  TEST_SINGLE(ldsetal(XReg::x30, XReg::x28, Reg::r29), "ldsetal x30, x28, [x29]");
  TEST_SINGLE(ldsmaxal(XReg::x30, XReg::x28, Reg::r29), "ldsmaxal x30, x28, [x29]");
  TEST_SINGLE(ldsminal(XReg::x30, XReg::x28, Reg::r29), "ldsminal x30, x28, [x29]");
  TEST_SINGLE(ldumaxal(XReg::x30, XReg::x28, Reg::r29), "ldumaxal x30, x28, [x29]");
  TEST_SINGLE(lduminal(XReg::x30, XReg::x28, Reg::r29), "lduminal x30, x28, [x29]");
  TEST_SINGLE(ldswpal(XReg::x30, XReg::x28, Reg::r29), "swpal x30, x28, [x29]");

  TEST_SINGLE(ldaprb(WReg::w30, Reg::r29), "ldaprb w30, [x29]");
  TEST_SINGLE(ldaprh(WReg::w30, Reg::r29), "ldaprh w30, [x29]");
  TEST_SINGLE(ldapr(WReg::w30, Reg::r29), "ldapr w30, [x29]");
  TEST_SINGLE(ldapr(XReg::x30, Reg::r29), "ldapr x30, [x29]");

  if (false) {
    // vixl can't disassemble this class of instructions.
    TEST_SINGLE(st64bv0(XReg::x30, XReg::x28, Reg::r29), "st64bv0 x30, x28, [x29]");
    TEST_SINGLE(st64bv(XReg::x30, XReg::x28, Reg::r29), "st64bv x30, x28, [x29]");
    TEST_SINGLE(st64b(XReg::x30, Reg::r29), "st64bv x30, [x29]");
    TEST_SINGLE(ld64b(XReg::x30, Reg::r29), "ld64b x30, [x29]");
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore register-register offset") {
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, false), "strb w30, [x28, x29]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, true), "strb w30, [x28, x29, lsl #0]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, false), "strb w30, [x28, w29, uxtw]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, true), "strb w30, [x28, w29, uxtw #0]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, false), "strb w30, [x28, w29, sxtw]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, true), "strb w30, [x28, w29, sxtw #0]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, false), "strb w30, [x28, x29, sxtx]");
  TEST_SINGLE(strb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, true), "strb w30, [x28, x29, sxtx #0]");

  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, false), "ldrb w30, [x28, x29]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, true), "ldrb w30, [x28, x29, lsl #0]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, false), "ldrb w30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, true), "ldrb w30, [x28, w29, uxtw #0]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, false), "ldrb w30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, true), "ldrb w30, [x28, w29, sxtw #0]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, false), "ldrb w30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, true), "ldrb w30, [x28, x29, sxtx #0]");

  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, false), "ldrsb w30, [x28, x29]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, true), "ldrsb w30, [x28, x29, lsl #0]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, false), "ldrsb w30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, true), "ldrsb w30, [x28, w29, uxtw #0]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, false), "ldrsb w30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, true), "ldrsb w30, [x28, w29, sxtw #0]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, false), "ldrsb w30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, true), "ldrsb w30, [x28, x29, sxtx #0]");

  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, false), "ldrsb x30, [x28, x29]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, true), "ldrsb x30, [x28, x29, lsl #0]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, false), "ldrsb x30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, true), "ldrsb x30, [x28, w29, uxtw #0]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, false), "ldrsb x30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, true), "ldrsb x30, [x28, w29, sxtw #0]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, false), "ldrsb x30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, true), "ldrsb x30, [x28, x29, sxtx #0]");

  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "strh w30, [x28, x29]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "strh w30, [x28, x29, lsl #1]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "strh w30, [x28, w29, uxtw]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "strh w30, [x28, w29, uxtw #1]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "strh w30, [x28, w29, sxtw]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "strh w30, [x28, w29, sxtw #1]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "strh w30, [x28, x29, sxtx]");
  TEST_SINGLE(strh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "strh w30, [x28, x29, sxtx #1]");

  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldrh w30, [x28, x29]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "ldrh w30, [x28, x29, lsl #1]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldrh w30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "ldrh w30, [x28, w29, uxtw #1]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldrh w30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "ldrh w30, [x28, w29, sxtw #1]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldrh w30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "ldrh w30, [x28, x29, sxtx #1]");

  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldrsh w30, [x28, x29]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "ldrsh w30, [x28, x29, lsl #1]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldrsh w30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "ldrsh w30, [x28, w29, uxtw #1]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldrsh w30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "ldrsh w30, [x28, w29, sxtw #1]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldrsh w30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "ldrsh w30, [x28, x29, sxtx #1]");

  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldrsh x30, [x28, x29]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "ldrsh x30, [x28, x29, lsl #1]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldrsh x30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "ldrsh x30, [x28, w29, uxtw #1]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldrsh x30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "ldrsh x30, [x28, w29, sxtw #1]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldrsh x30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "ldrsh x30, [x28, x29, sxtx #1]");

  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str w30, [x28, x29]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 2), "str w30, [x28, x29, lsl #2]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str w30, [x28, w29, uxtw]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 2), "str w30, [x28, w29, uxtw #2]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str w30, [x28, w29, sxtw]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 2), "str w30, [x28, w29, sxtw #2]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str w30, [x28, x29, sxtx]");
  TEST_SINGLE(str(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 2), "str w30, [x28, x29, sxtx #2]");

  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr w30, [x28, x29]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 2), "ldr w30, [x28, x29, lsl #2]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr w30, [x28, w29, uxtw]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::UXTW, 2), "ldr w30, [x28, w29, uxtw #2]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr w30, [x28, w29, sxtw]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTW, 2), "ldr w30, [x28, w29, sxtw #2]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr w30, [x28, x29, sxtx]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r28, Reg::r29, ExtendedType::SXTX, 2), "ldr w30, [x28, x29, sxtx #2]");

  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldrsw x30, [x28, x29]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 2), "ldrsw x30, [x28, x29, lsl #2]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldrsw x30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 2), "ldrsw x30, [x28, w29, uxtw #2]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldrsw x30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 2), "ldrsw x30, [x28, w29, sxtw #2]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldrsw x30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 2), "ldrsw x30, [x28, x29, sxtx #2]");

  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str x30, [x28, x29]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 3), "str x30, [x28, x29, lsl #3]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str x30, [x28, w29, uxtw]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 3), "str x30, [x28, w29, uxtw #3]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str x30, [x28, w29, sxtw]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 3), "str x30, [x28, w29, sxtw #3]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str x30, [x28, x29, sxtx]");
  TEST_SINGLE(str(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 3), "str x30, [x28, x29, sxtx #3]");

  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr x30, [x28, x29]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 3), "ldr x30, [x28, x29, lsl #3]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr x30, [x28, w29, uxtw]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::UXTW, 3), "ldr x30, [x28, w29, uxtw #3]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr x30, [x28, w29, sxtw]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTW, 3), "ldr x30, [x28, w29, sxtw #3]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr x30, [x28, x29, sxtx]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r28, Reg::r29, ExtendedType::SXTX, 3), "ldr x30, [x28, x29, sxtx #3]");

  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "prfm pldl1keep, [x28, x29]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::LSL_64, 3), "prfm pldl1keep, [x28, x29, lsl #3]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "prfm pldl1keep, [x28, w29, uxtw]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::UXTW, 3), "prfm pldl1keep, [x28, w29, uxtw #3]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "prfm pldl1keep, [x28, w29, sxtw]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::SXTW, 3), "prfm pldl1keep, [x28, w29, sxtw #3]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "prfm pldl1keep, [x28, x29, sxtx]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r28, Reg::r29, ExtendedType::SXTX, 3), "prfm pldl1keep, [x28, x29, sxtx #3]");

  TEST_SINGLE(strb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64), "str b30, [x28, x29]");
  TEST_SINGLE(strb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW), "str b30, [x28, w29, uxtw]");
  TEST_SINGLE(strb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW), "str b30, [x28, w29, sxtw]");
  TEST_SINGLE(strb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX), "str b30, [x28, x29, sxtx]");

  TEST_SINGLE(ldrb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64), "ldr b30, [x28, x29]");
  TEST_SINGLE(ldrb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW), "ldr b30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW), "ldr b30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrb(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX), "ldr b30, [x28, x29, sxtx]");

  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str h30, [x28, x29]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "str h30, [x28, x29, lsl #1]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str h30, [x28, w29, uxtw]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "str h30, [x28, w29, uxtw #1]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str h30, [x28, w29, sxtw]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "str h30, [x28, w29, sxtw #1]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str h30, [x28, x29, sxtx]");
  TEST_SINGLE(strh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "str h30, [x28, x29, sxtx #1]");

  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr h30, [x28, x29]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 1), "ldr h30, [x28, x29, lsl #1]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr h30, [x28, w29, uxtw]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::UXTW, 1), "ldr h30, [x28, w29, uxtw #1]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr h30, [x28, w29, sxtw]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTW, 1), "ldr h30, [x28, w29, sxtw #1]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr h30, [x28, x29, sxtx]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r28, Reg::r29, ExtendedType::SXTX, 1), "ldr h30, [x28, x29, sxtx #1]");

  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str s30, [x28, x29]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 2), "str s30, [x28, x29, lsl #2]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str s30, [x28, w29, uxtw]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::UXTW, 2), "str s30, [x28, w29, uxtw #2]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str s30, [x28, w29, sxtw]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTW, 2), "str s30, [x28, w29, sxtw #2]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str s30, [x28, x29, sxtx]");
  TEST_SINGLE(str(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTX, 2), "str s30, [x28, x29, sxtx #2]");

  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr s30, [x28, x29]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 2), "ldr s30, [x28, x29, lsl #2]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr s30, [x28, w29, uxtw]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::UXTW, 2), "ldr s30, [x28, w29, uxtw #2]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr s30, [x28, w29, sxtw]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTW, 2), "ldr s30, [x28, w29, sxtw #2]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr s30, [x28, x29, sxtx]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r28, Reg::r29, ExtendedType::SXTX, 2), "ldr s30, [x28, x29, sxtx #2]");

  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str d30, [x28, x29]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 3), "str d30, [x28, x29, lsl #3]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str d30, [x28, w29, uxtw]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::UXTW, 3), "str d30, [x28, w29, uxtw #3]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str d30, [x28, w29, sxtw]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTW, 3), "str d30, [x28, w29, sxtw #3]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str d30, [x28, x29, sxtx]");
  TEST_SINGLE(str(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTX, 3), "str d30, [x28, x29, sxtx #3]");

  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr d30, [x28, x29]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 3), "ldr d30, [x28, x29, lsl #3]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr d30, [x28, w29, uxtw]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::UXTW, 3), "ldr d30, [x28, w29, uxtw #3]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr d30, [x28, w29, sxtw]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTW, 3), "ldr d30, [x28, w29, sxtw #3]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr d30, [x28, x29, sxtx]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r28, Reg::r29, ExtendedType::SXTX, 3), "ldr d30, [x28, x29, sxtx #3]");

  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "str q30, [x28, x29]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 4), "str q30, [x28, x29, lsl #4]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "str q30, [x28, w29, uxtw]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::UXTW, 4), "str q30, [x28, w29, uxtw #4]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "str q30, [x28, w29, sxtw]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTW, 4), "str q30, [x28, w29, sxtw #4]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "str q30, [x28, x29, sxtx]");
  TEST_SINGLE(str(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTX, 4), "str q30, [x28, x29, sxtx #4]");

  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 0), "ldr q30, [x28, x29]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::LSL_64, 4), "ldr q30, [x28, x29, lsl #4]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::UXTW, 0), "ldr q30, [x28, w29, uxtw]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::UXTW, 4), "ldr q30, [x28, w29, uxtw #4]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTW, 0), "ldr q30, [x28, w29, sxtw]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTW, 4), "ldr q30, [x28, w29, sxtw #4]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTX, 0), "ldr q30, [x28, x29, sxtx]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r28, Reg::r29, ExtendedType::SXTX, 4), "ldr q30, [x28, x29, sxtx #4]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore PAC") {
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 0), "ldraa x30, [x29]");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, -4096), "ldraa x30, [x29, #-4096]");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 512), "ldraa x30, [x29, #512]");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 4088), "ldraa x30, [x29, #4088]");

  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 0), "ldraa x30, [x29]!");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, -4096), "ldraa x30, [x29, #-4096]!");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 512), "ldraa x30, [x29, #512]!");
  TEST_SINGLE(ldraa(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 4088), "ldraa x30, [x29, #4088]!");

  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 0), "ldrab x30, [x29]");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, -4096), "ldrab x30, [x29, #-4096]");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 512), "ldrab x30, [x29, #512]");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::OFFSET, 4088), "ldrab x30, [x29, #4088]");

  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 0), "ldrab x30, [x29]!");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, -4096), "ldrab x30, [x29, #-4096]!");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 512), "ldrab x30, [x29, #512]!");
  TEST_SINGLE(ldrab(XReg::x30, XReg::x29, ARMEmitter::IndexType::PRE, 4088), "ldrab x30, [x29, #4088]!");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Loadstore: Loadstore unsigned immediate") {
  TEST_SINGLE(strb(Reg::r30, Reg::r29, 0), "strb w30, [x29]");
  TEST_SINGLE(strb(Reg::r30, Reg::r29, 4095), "strb w30, [x29, #4095]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r29, 0), "ldrb w30, [x29]");
  TEST_SINGLE(ldrb(Reg::r30, Reg::r29, 4095), "ldrb w30, [x29, #4095]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r29, 0), "ldrsb w30, [x29]");
  TEST_SINGLE(ldrsb(WReg::w30, Reg::r29, 4095), "ldrsb w30, [x29, #4095]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r29, 0), "ldrsb x30, [x29]");
  TEST_SINGLE(ldrsb(XReg::x30, Reg::r29, 4095), "ldrsb x30, [x29, #4095]");
  TEST_SINGLE(ldrb(VReg::v30, Reg::r29, 0), "ldr b30, [x29]");
  TEST_SINGLE(ldrb(VReg::v30, Reg::r29, 4095), "ldr b30, [x29, #4095]");
  TEST_SINGLE(strb(VReg::v30, Reg::r29, 0), "str b30, [x29]");
  TEST_SINGLE(strb(VReg::v30, Reg::r29, 4095), "str b30, [x29, #4095]");

  TEST_SINGLE(strh(Reg::r30, Reg::r29, 0), "strh w30, [x29]");
  TEST_SINGLE(strh(Reg::r30, Reg::r29, 8190), "strh w30, [x29, #8190]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r29, 0), "ldrh w30, [x29]");
  TEST_SINGLE(ldrh(Reg::r30, Reg::r29, 8190), "ldrh w30, [x29, #8190]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r29, 0), "ldrsh w30, [x29]");
  TEST_SINGLE(ldrsh(WReg::w30, Reg::r29, 8190), "ldrsh w30, [x29, #8190]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r29, 0), "ldrsh x30, [x29]");
  TEST_SINGLE(ldrsh(XReg::x30, Reg::r29, 8190), "ldrsh x30, [x29, #8190]");

  TEST_SINGLE(ldrh(VReg::v30, Reg::r29, 0), "ldr h30, [x29]");
  TEST_SINGLE(ldrh(VReg::v30, Reg::r29, 8190), "ldr h30, [x29, #8190]");
  TEST_SINGLE(strh(VReg::v30, Reg::r29, 0), "str h30, [x29]");
  TEST_SINGLE(strh(VReg::v30, Reg::r29, 8190), "str h30, [x29, #8190]");

  TEST_SINGLE(str(WReg::w30, Reg::r29, 0), "str w30, [x29]");
  TEST_SINGLE(str(WReg::w30, Reg::r29, 16380), "str w30, [x29, #16380]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r29, 0), "ldr w30, [x29]");
  TEST_SINGLE(ldr(WReg::w30, Reg::r29, 16380), "ldr w30, [x29, #16380]");

  TEST_SINGLE(ldrsw(XReg::x30, Reg::r29, 0), "ldrsw x30, [x29]");
  TEST_SINGLE(ldrsw(XReg::x30, Reg::r29, 16380), "ldrsw x30, [x29, #16380]");

  TEST_SINGLE(ldr(SReg::s30, Reg::r29, 0), "ldr s30, [x29]");
  TEST_SINGLE(ldr(SReg::s30, Reg::r29, 16380), "ldr s30, [x29, #16380]");
  TEST_SINGLE(str(SReg::s30, Reg::r29, 0), "str s30, [x29]");
  TEST_SINGLE(str(SReg::s30, Reg::r29, 16380), "str s30, [x29, #16380]");

  TEST_SINGLE(str(XReg::x30, Reg::r29, 0), "str x30, [x29]");
  TEST_SINGLE(str(XReg::x30, Reg::r29, 32760), "str x30, [x29, #32760]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r29, 0), "ldr x30, [x29]");
  TEST_SINGLE(ldr(XReg::x30, Reg::r29, 32760), "ldr x30, [x29, #32760]");

  TEST_SINGLE(ldr(SubRegSize::i8Bit, Reg::r30, Reg::r29, 0), "ldrb w30, [x29]");
  TEST_SINGLE(ldr(SubRegSize::i8Bit, Reg::r30, Reg::r29, 4095), "ldrb w30, [x29, #4095]");
  TEST_SINGLE(ldr(SubRegSize::i16Bit, Reg::r30, Reg::r29, 0), "ldrh w30, [x29]");
  TEST_SINGLE(ldr(SubRegSize::i16Bit, Reg::r30, Reg::r29, 8190), "ldrh w30, [x29, #8190]");
  TEST_SINGLE(ldr(SubRegSize::i32Bit, Reg::r30, Reg::r29, 0), "ldr w30, [x29]");
  TEST_SINGLE(ldr(SubRegSize::i32Bit, Reg::r30, Reg::r29, 16380), "ldr w30, [x29, #16380]");
  TEST_SINGLE(ldr(SubRegSize::i64Bit, Reg::r30, Reg::r29, 0), "ldr x30, [x29]");
  TEST_SINGLE(ldr(SubRegSize::i64Bit, Reg::r30, Reg::r29, 32760), "ldr x30, [x29, #32760]");

  TEST_SINGLE(str(SubRegSize::i8Bit, Reg::r30, Reg::r29, 0), "strb w30, [x29]");
  TEST_SINGLE(str(SubRegSize::i8Bit, Reg::r30, Reg::r29, 4095), "strb w30, [x29, #4095]");
  TEST_SINGLE(str(SubRegSize::i16Bit, Reg::r30, Reg::r29, 0), "strh w30, [x29]");
  TEST_SINGLE(str(SubRegSize::i16Bit, Reg::r30, Reg::r29, 8190), "strh w30, [x29, #8190]");
  TEST_SINGLE(str(SubRegSize::i32Bit, Reg::r30, Reg::r29, 0), "str w30, [x29]");
  TEST_SINGLE(str(SubRegSize::i32Bit, Reg::r30, Reg::r29, 16380), "str w30, [x29, #16380]");
  TEST_SINGLE(str(SubRegSize::i64Bit, Reg::r30, Reg::r29, 0), "str x30, [x29]");
  TEST_SINGLE(str(SubRegSize::i64Bit, Reg::r30, Reg::r29, 32760), "str x30, [x29, #32760]");

  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r29, 0), "prfm pldl1keep, [x29]");
  TEST_SINGLE(prfm(Prefetch::PLDL1KEEP, Reg::r29, 32760), "prfm pldl1keep, [x29, #32760]");

  TEST_SINGLE(ldr(DReg::d30, Reg::r29, 0), "ldr d30, [x29]");
  TEST_SINGLE(ldr(DReg::d30, Reg::r29, 32760), "ldr d30, [x29, #32760]");
  TEST_SINGLE(str(DReg::d30, Reg::r29, 0), "str d30, [x29]");
  TEST_SINGLE(str(DReg::d30, Reg::r29, 32760), "str d30, [x29, #32760]");

  TEST_SINGLE(ldr(QReg::q30, Reg::r29, 0), "ldr q30, [x29]");
  TEST_SINGLE(ldr(QReg::q30, Reg::r29, 65520), "ldr q30, [x29, #65520]");
  TEST_SINGLE(str(QReg::q30, Reg::r29, 0), "str q30, [x29]");
  TEST_SINGLE(str(QReg::q30, Reg::r29, 65520), "str q30, [x29, #65520]");
}


================================================
FILE: FEXCore/unittests/Emitter/SVE_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Base Encodings") {
  TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "mov z30.b, b29");
  TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "mov z30.b, z29.b[1]");
  TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 63), "mov z30.b, z29.b[63]");

  TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "mov z30.h, h29");
  TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "mov z30.h, z29.h[1]");
  TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 31), "mov z30.h, z29.h[31]");

  TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "mov z30.s, s29");
  TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "mov z30.s, z29.s[1]");
  TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "mov z30.s, z29.s[15]");

  TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "mov z30.d, d29");
  TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "mov z30.d, z29.d[1]");
  TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 7), "mov z30.d, z29.d[7]");

  TEST_SINGLE(dup(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, 0), "mov z30.q, q29");
  TEST_SINGLE(dup(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, 1), "mov z30.q, z29.q[1]");
  TEST_SINGLE(dup(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, 3), "mov z30.q, z29.q[3]");

  TEST_SINGLE(sel(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.b, p6, z29.b, z28.b");
  TEST_SINGLE(sel(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.h, p6, z29.h, z28.h");
  TEST_SINGLE(sel(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.s, p6, z29.s, z28.s");
  TEST_SINGLE(sel(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.d, p6, z29.d, z28.d");
  // TEST_SINGLE(sel(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29, ZReg::z28), "sel z30.q, p6, z29.q, z28.q");

  TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.b, p6/m, z29.b");
  TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.h, p6/m, z29.h");
  TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.s, p6/m, z29.s");
  TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.d, p6/m, z29.d");
  // TEST_SINGLE(mov(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "mov z30.q, p6/m, z29.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract vectors (unpredicated)") {
  TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.b, z29.b, z28.b");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.h, z29.h, z28.h");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.s, z29.s, z28.s");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.d, z29.d, z28.d");
  // TEST_SINGLE(add(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "add z30.q, z29.q, z28.q");

  TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.b, z29.b, z28.b");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.h, z29.h, z28.h");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.s, z29.s, z28.s");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.d, z29.d, z28.d");
  // TEST_SINGLE(sub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sub z30.q, z29.q, z28.q");

  TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.b, z29.b, z28.b");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.h, z29.h, z28.h");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.s, z29.s, z28.s");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.d, z29.d, z28.d");
  // TEST_SINGLE(sqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqadd z30.q, z29.q, z28.q");

  TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.b, z29.b, z28.b");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.h, z29.h, z28.h");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.s, z29.s, z28.s");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.d, z29.d, z28.d");
  // TEST_SINGLE(uqadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqadd z30.q, z29.q, z28.q");

  TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.b, z29.b, z28.b");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.h, z29.h, z28.h");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.s, z29.s, z28.s");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.d, z29.d, z28.d");
  // TEST_SINGLE(sqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqsub z30.q, z29.q, z28.q");

  TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.b, z29.b, z28.b");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.h, z29.h, z28.h");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.s, z29.s, z28.s");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.d, z29.d, z28.d");
  // TEST_SINGLE(uqsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uqsub z30.q, z29.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE address generation") {
  TEST_SINGLE(adr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z31), "adr z30.s, [z29.s, z31.s]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31), "adr z30.d, [z29.d, z31.d]");

  TEST_SINGLE(adr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 1), "adr z30.s, [z29.s, z31.s, lsl #1]");
  TEST_SINGLE(adr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 2), "adr z30.s, [z29.s, z31.s, lsl #2]");
  TEST_SINGLE(adr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 3), "adr z30.s, [z29.s, z31.s, lsl #3]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 1), "adr z30.d, [z29.d, z31.d, lsl #1]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 2), "adr z30.d, [z29.d, z31.d, lsl #2]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_LSL, 3), "adr z30.d, [z29.d, z31.d, lsl #3]");

  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_UXTW, 0), "adr z30.d, [z29.d, z31.d, uxtw]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_UXTW, 1), "adr z30.d, [z29.d, z31.d, uxtw #1]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_UXTW, 2), "adr z30.d, [z29.d, z31.d, uxtw #2]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_UXTW, 3), "adr z30.d, [z29.d, z31.d, uxtw #3]");

  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_SXTW, 0), "adr z30.d, [z29.d, z31.d, sxtw]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_SXTW, 1), "adr z30.d, [z29.d, z31.d, sxtw #1]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_SXTW, 2), "adr z30.d, [z29.d, z31.d, sxtw #2]");
  TEST_SINGLE(adr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z31, SVEModType::MOD_SXTW, 3), "adr z30.d, [z29.d, z31.d, sxtw #3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE table lookup (three sources)") {
  TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.b, {z29.b}, z28.b");
  TEST_SINGLE(tbl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.h, {z29.h}, z28.h");
  TEST_SINGLE(tbl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.s, {z29.s}, z28.s");
  TEST_SINGLE(tbl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.d, {z29.d}, z28.d");
  // TEST_SINGLE(tbl(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbl z30.q, {z29.q}, z28.q");

  TEST_SINGLE(tbl(SubRegSize::i8Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.b, {z29.b, z30.b}, z28.b");
  TEST_SINGLE(tbl(SubRegSize::i16Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.h, {z29.h, z30.h}, z28.h");
  TEST_SINGLE(tbl(SubRegSize::i32Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.s, {z29.s, z30.s}, z28.s");
  TEST_SINGLE(tbl(SubRegSize::i64Bit, ZReg::z31, ZReg::z29, ZReg::z30, ZReg::z28), "tbl z31.d, {z29.d, z30.d}, z28.d");

  TEST_SINGLE(tbx(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.b, z29.b, z28.b");
  TEST_SINGLE(tbx(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.h, z29.h, z28.h");
  TEST_SINGLE(tbx(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.s, z29.s, z28.s");
  TEST_SINGLE(tbx(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.d, z29.d, z28.d");
  // TEST_SINGLE(tbx(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "tbx z30.q, z29.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector elements") {
  TEST_SINGLE(zip1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.b, z29.b, z28.b");
  TEST_SINGLE(zip1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.h, z29.h, z28.h");
  TEST_SINGLE(zip1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.s, z29.s, z28.s");
  TEST_SINGLE(zip1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip1 z30.d, z29.d, z28.d");

  TEST_SINGLE(zip2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.b, z29.b, z28.b");
  TEST_SINGLE(zip2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.h, z29.h, z28.h");
  TEST_SINGLE(zip2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.s, z29.s, z28.s");
  TEST_SINGLE(zip2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "zip2 z30.d, z29.d, z28.d");

  TEST_SINGLE(uzp1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.b, z29.b, z28.b");
  TEST_SINGLE(uzp1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.h, z29.h, z28.h");
  TEST_SINGLE(uzp1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.s, z29.s, z28.s");
  TEST_SINGLE(uzp1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp1 z30.d, z29.d, z28.d");

  TEST_SINGLE(uzp2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.b, z29.b, z28.b");
  TEST_SINGLE(uzp2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.h, z29.h, z28.h");
  TEST_SINGLE(uzp2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.s, z29.s, z28.s");
  TEST_SINGLE(uzp2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uzp2 z30.d, z29.d, z28.d");

  TEST_SINGLE(trn1(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.b, z29.b, z28.b");
  TEST_SINGLE(trn1(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.h, z29.h, z28.h");
  TEST_SINGLE(trn1(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.s, z29.s, z28.s");
  TEST_SINGLE(trn1(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn1 z30.d, z29.d, z28.d");

  TEST_SINGLE(trn2(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.b, z29.b, z28.b");
  TEST_SINGLE(trn2(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.h, z29.h, z28.h");
  TEST_SINGLE(trn2(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.s, z29.s, z28.s");
  TEST_SINGLE(trn2(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "trn2 z30.d, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with unsigned immediate") {
  TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.b, p5/z, z30.b, #0");
  TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.h, p5/z, z30.h, #0");
  TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.s, p5/z, z30.s, #0");
  TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphi p6.d, p5/z, z30.d, #0");
  TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.b, p5/z, z30.b, #127");
  TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.h, p5/z, z30.h, #127");
  TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.s, p5/z, z30.s, #127");
  TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphi p6.d, p5/z, z30.d, #127");

  TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.b, p5/z, z30.b, #0");
  TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.h, p5/z, z30.h, #0");
  TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.s, p5/z, z30.s, #0");
  TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmphs p6.d, p5/z, z30.d, #0");
  TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.b, p5/z, z30.b, #127");
  TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.h, p5/z, z30.h, #127");
  TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.s, p5/z, z30.s, #127");
  TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmphs p6.d, p5/z, z30.d, #127");

  TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.b, p5/z, z30.b, #0");
  TEST_SINGLE(cmplo(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.h, p5/z, z30.h, #0");
  TEST_SINGLE(cmplo(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.s, p5/z, z30.s, #0");
  TEST_SINGLE(cmplo(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmplo p6.d, p5/z, z30.d, #0");
  TEST_SINGLE(cmplo(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.b, p5/z, z30.b, #127");
  TEST_SINGLE(cmplo(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.h, p5/z, z30.h, #127");
  TEST_SINGLE(cmplo(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.s, p5/z, z30.s, #127");
  TEST_SINGLE(cmplo(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmplo p6.d, p5/z, z30.d, #127");

  TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.b, p5/z, z30.b, #0");
  TEST_SINGLE(cmpls(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.h, p5/z, z30.h, #0");
  TEST_SINGLE(cmpls(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.s, p5/z, z30.s, #0");
  TEST_SINGLE(cmpls(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 0), "cmpls p6.d, p5/z, z30.d, #0");
  TEST_SINGLE(cmpls(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.b, p5/z, z30.b, #127");
  TEST_SINGLE(cmpls(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.h, p5/z, z30.h, #127");
  TEST_SINGLE(cmpls(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.s, p5/z, z30.s, #127");
  TEST_SINGLE(cmpls(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 127), "cmpls p6.d, p5/z, z30.d, #127");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with signed immediate") {
  TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpeq p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpeq p6.d, p5/z, z30.d, #15");

  TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpgt p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpgt p6.d, p5/z, z30.d, #15");

  TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpge p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpge p6.d, p5/z, z30.d, #15");

  TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmplt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmplt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmplt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmplt p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmplt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmplt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmplt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmplt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmplt p6.d, p5/z, z30.d, #15");

  TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmple(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmple(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmple(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmple p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmple(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmple(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmple(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmple(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmple p6.d, p5/z, z30.d, #15");

  TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.b, p5/z, z30.b, #-16");
  TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.h, p5/z, z30.h, #-16");
  TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.s, p5/z, z30.s, #-16");
  TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, -16), "cmpne p6.d, p5/z, z30.d, #-16");
  TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.b, p5/z, z30.b, #15");
  TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.h, p5/z, z30.h, #15");
  TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.s, p5/z, z30.s, #15");
  TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, 15), "cmpne p6.d, p5/z, z30.d, #15");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate logical operations") {
  TEST_SINGLE(and_(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "and p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(ands(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "ands p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(mov(PReg::p6, PReg::p5.Merging(), PReg::p4), "mov p6.b, p5/m, p4.b");
  TEST_SINGLE(mov(PReg::p6, PReg::p5.Zeroing(), PReg::p4), "mov p6.b, p5/z, p4.b");
  TEST_SINGLE(movs(PReg::p6, PReg::p5.Zeroing(), PReg::p4), "movs p6.b, p5/z, p4.b");

  TEST_SINGLE(bic(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "bic p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(bics(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "bics p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(eor(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "eor p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(eors(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "eors p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(not_(PReg::p6, PReg::p5.Zeroing(), PReg::p4), "not p6.b, p5/z, p4.b");

  TEST_SINGLE(sel(PReg::p6, PReg::p5, PReg::p4, PReg::p3), "sel p6.b, p5, p4.b, p3.b");
  TEST_SINGLE(orr(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "orr p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(mov(PReg::p6, PReg::p5), "mov p6.b, p5.b");
  TEST_SINGLE(orn(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "orn p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(nor(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "nor p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(nand(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "nand p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(orrs(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "orrs p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(movs(PReg::p6, PReg::p5), "movs p6.b, p5.b");

  TEST_SINGLE(orns(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "orns p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(nors(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "nors p6.b, p5/z, p4.b, p3.b");
  TEST_SINGLE(nands(PReg::p6, PReg::p5.Zeroing(), PReg::p4, PReg::p3), "nands p6.b, p5/z, p4.b, p3.b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast predicate element") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer clamp") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 character match") {
  TEST_SINGLE(match(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "match p8.b, p6/z, z30.b, z29.b");
  TEST_SINGLE(match(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "match p8.h, p6/z, z30.h, z29.h");

  TEST_SINGLE(nmatch(SubRegSize::i8Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "nmatch p8.b, p6/z, z30.b, z29.b");
  TEST_SINGLE(nmatch(SubRegSize::i16Bit, PReg::p8, PReg::p6.Zeroing(), ZReg::z30, ZReg::z29), "nmatch p8.h, p6/z, z30.h, z29.h");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert precision odd elements") {
  TEST_SINGLE(fcvtxnt(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtxnt z30.s, p6/m, z29.d");
  TEST_SINGLE(fcvtnt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.h, p6/m, z29.s");
  TEST_SINGLE(fcvtnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.s, p6/m, z29.d");
  // TEST_SINGLE(fcvtnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtnt z30.d, p6/m, z29.d");

  // TEST_SINGLE(fcvtlt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.h, p6/m, z29.b");
  TEST_SINGLE(fcvtlt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.s, p6/m, z29.h");
  TEST_SINGLE(fcvtlt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtlt z30.d, p6/m, z29.s");


  // void fcvtxnt(ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {
  /////< Size is destination size
  // void fcvtnt(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {
  /////< Size is destination size
  // void fcvtlt(ARMEmitter::SubRegSize size, ARMEmitter::ZRegister zd, ARMEmitter::PRegister pg, ARMEmitter::ZRegister zn) {

  // XXX: BFCVTNT
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 floating-point pairwise operations") {
  // TEST_SINGLE(faddp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "faddp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(faddp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(faddp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(faddp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(faddp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "faddp z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmaxnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmaxnmp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmaxnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmaxnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmaxnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmaxnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnmp z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fminnmp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fminnmp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fminnmp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fminnmp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fminnmp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fminnmp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnmp z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmax z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmin z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point complex add") {
  TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.h, p6/m, "
                                                                                                                   "z30.h, z28.h, #90");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.s, p6/m, "
                                                                                                                   "z30.s, z28.s, #90");
  TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_90), "fcadd z30.d, p6/m, "
                                                                                                                   "z30.d, z28.d, #90");

  TEST_SINGLE(fcadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.h, p6/m, "
                                                                                                                    "z30.h, z28.h, #270");
  TEST_SINGLE(fcadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.s, p6/m, "
                                                                                                                    "z30.s, z28.s, #270");
  TEST_SINGLE(fcadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28, Rotation::ROTATE_270), "fcadd z30.d, p6/m, "
                                                                                                                    "z30.d, z28.d, #270");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add (vector)") {
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.h, p6/m, "
                                                                                                                  "z10.h, z28.h, #0");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.s, p6/m, "
                                                                                                                  "z10.s, z28.s, #0");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_0), "fcmla z30.d, p6/m, "
                                                                                                                  "z10.d, z28.d, #0");

  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.h, p6/m, "
                                                                                                                   "z10.h, z28.h, #90");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.s, p6/m, "
                                                                                                                   "z10.s, z28.s, #90");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_90), "fcmla z30.d, p6/m, "
                                                                                                                   "z10.d, z28.d, #90");

  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.h, p6/m, "
                                                                                                                    "z10.h, z28.h, #180");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.s, p6/m, "
                                                                                                                    "z10.s, z28.s, #180");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_180), "fcmla z30.d, p6/m, "
                                                                                                                    "z10.d, z28.d, #180");

  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.h, p6/m, "
                                                                                                                    "z10.h, z28.h, #270");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.s, p6/m, "
                                                                                                                    "z10.s, z28.s, #270");
  TEST_SINGLE(fcmla(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z10, ZReg::z28, Rotation::ROTATE_270), "fcmla z30.d, p6/m, "
                                                                                                                    "z10.d, z28.d, #270");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add (indexed)") {
  TEST_SINGLE(fmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmla z30.h, z29.h, z7.h[7]");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmla z30.s, z29.s, z7.s[3]");
  TEST_SINGLE(fmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmla z30.d, z29.d, z15.d[1]");

  TEST_SINGLE(fmls(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmls z30.h, z29.h, z7.h[7]");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmls z30.s, z29.s, z7.s[3]");
  TEST_SINGLE(fmls(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmls z30.d, z29.d, z15.d[1]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point complex multiply-add (indexed)") {
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 0, Rotation::ROTATE_0), "fcmla z30.h, z10.h, z7.h[0], #0");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 0, Rotation::ROTATE_0), "fcmla z30.s, z10.s, z15.s[0], #0");

  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 1, Rotation::ROTATE_90), "fcmla z30.h, z10.h, z7.h[1], #90");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_90), "fcmla z30.s, z10.s, z15.s[1], #90");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_180), "fcmla z30.s, z10.s, z15.s[1], #180");
  TEST_SINGLE(fcmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z10, ZReg::z15, 1, Rotation::ROTATE_270), "fcmla z30.s, z10.s, z15.s[1], #270");

  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 2, Rotation::ROTATE_180), "fcmla z30.h, z10.h, z7.h[2], #180");
  TEST_SINGLE(fcmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z10, ZReg::z7, 3, Rotation::ROTATE_270), "fcmla z30.h, z10.h, z7.h[3], #270");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply (indexed)") {
  TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmul z30.h, z29.h, z7.h[7]");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmul z30.s, z29.s, z7.s[3]");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z15, 1), "fmul z30.d, z29.d, z15.d[1]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating point matrix multiply accumulate") {
  TEST_SINGLE(fmmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmmla z30.s, z29.s, z28.s");
  TEST_SINGLE(fmmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmmla z30.d, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point compare vectors") {
  TEST_SINGLE(fcmeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmeq p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmeq p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(fcmeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmeq p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(fcmgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmgt p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmgt p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(fcmgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmgt p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(fcmge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmge p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(fcmge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmge p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(fcmge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmge p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(fcmne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmne p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(fcmne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmne p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(fcmne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmne p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(fcmuo(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmuo p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(fcmuo(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmuo p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(fcmuo(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "fcmuo p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(facge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(facge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(facge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(facgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(facgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(facgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(facle(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.h, p5/z, z29.h, z30.h");
  TEST_SINGLE(facle(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.s, p5/z, z29.s, z30.s");
  TEST_SINGLE(facle(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facge p6.d, p5/z, z29.d, z30.d");

  TEST_SINGLE(faclt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.h, p5/z, z29.h, z30.h");
  TEST_SINGLE(faclt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.s, p5/z, z29.s, z30.s");
  TEST_SINGLE(faclt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "facgt p6.d, p5/z, z29.d, z30.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic (unpredicated)") {
  // TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "fadd z30.b, z29.b, z28.b");
  TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.h, z29.h, z28.h");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.s, z29.s, z28.s");
  TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.d, z29.d, z28.d");
  // TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fadd z30.q, z29.q, z28.q");

  // TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "fsub z30.b, z29.b, z28.b");
  TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.h, z29.h, z28.h");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.s, z29.s, z28.s");
  TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.d, z29.d, z28.d");
  // TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fsub z30.q, z29.q, z28.q");

  // TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "fmul z30.b, z29.b, z28.b");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.h, z29.h, z28.h");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.s, z29.s, z28.s");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.d, z29.d, z28.d");
  // TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmul z30.q, z29.q, z28.q");

  // TEST_SINGLE(ftsmul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "ftsmul z30.b, z29.b, z28.b");
  TEST_SINGLE(ftsmul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.h, z29.h, z28.h");
  TEST_SINGLE(ftsmul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.s, z29.s, z28.s");
  TEST_SINGLE(ftsmul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.d, z29.d, z28.d");
  // TEST_SINGLE(ftsmul(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftsmul z30.q, z29.q, z28.q");

  // TEST_SINGLE(frecps(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "frecps z30.b, z29.b, z28.b");
  TEST_SINGLE(frecps(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.h, z29.h, z28.h");
  TEST_SINGLE(frecps(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.s, z29.s, z28.s");
  TEST_SINGLE(frecps(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.d, z29.d, z28.d");
  // TEST_SINGLE(frecps(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frecps z30.q, z29.q, z28.q");

  // TEST_SINGLE(frsqrts(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28),   "frsqrts z30.b, z29.b, z28.b");
  TEST_SINGLE(frsqrts(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.h, z29.h, z28.h");
  TEST_SINGLE(frsqrts(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.s, z29.s, z28.s");
  TEST_SINGLE(frsqrts(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.d, z29.d, z28.d");
  // TEST_SINGLE(frsqrts(SubRegSize::i128Bit, ZReg::z30, ZReg::z29, ZReg::z28), "frsqrts z30.q, z29.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point recursive reduction") {
  TEST_SINGLE(faddv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "faddv h30, p7, z28.h");
  TEST_SINGLE(faddv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z28), "faddv s30, p7, z28.s");
  TEST_SINGLE(faddv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z28), "faddv d30, p7, z28.d");

  TEST_SINGLE(fmaxnmv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxnmv h30, p7, z28.h");
  TEST_SINGLE(fmaxnmv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxnmv s30, p7, z28.s");
  TEST_SINGLE(fmaxnmv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxnmv d30, p7, z28.d");

  TEST_SINGLE(fminnmv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "fminnmv h30, p7, z28.h");
  TEST_SINGLE(fminnmv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z28), "fminnmv s30, p7, z28.s");
  TEST_SINGLE(fminnmv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z28), "fminnmv d30, p7, z28.d");

  TEST_SINGLE(fmaxv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxv h30, p7, z28.h");
  TEST_SINGLE(fmaxv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxv s30, p7, z28.s");
  TEST_SINGLE(fmaxv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z28), "fmaxv d30, p7, z28.d");

  TEST_SINGLE(fminv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z28), "fminv h30, p7, z28.h");
  TEST_SINGLE(fminv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z28), "fminv s30, p7, z28.s");
  TEST_SINGLE(fminv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z28), "fminv d30, p7, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply-accumulate writing addend (predicated)") {
  TEST_SINGLE(mla(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.b, p7/m, z28.b, z29.b");
  TEST_SINGLE(mla(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.h, p7/m, z28.h, z29.h");
  TEST_SINGLE(mla(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.s, p7/m, z28.s, z29.s");
  TEST_SINGLE(mla(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mla z30.d, p7/m, z28.d, z29.d");

  TEST_SINGLE(mls(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.b, p7/m, z28.b, z29.b");
  TEST_SINGLE(mls(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.h, p7/m, z28.h, z29.h");
  TEST_SINGLE(mls(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.s, p7/m, z28.s, z29.s");
  TEST_SINGLE(mls(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mls z30.d, p7/m, z28.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply-add writing multiplicand (predicated)") {
  TEST_SINGLE(mad(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.b, p7/m, z28.b, z29.b");
  TEST_SINGLE(mad(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.h, p7/m, z28.h, z29.h");
  TEST_SINGLE(mad(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.s, p7/m, z28.s, z29.s");
  TEST_SINGLE(mad(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "mad z30.d, p7/m, z28.d, z29.d");

  TEST_SINGLE(msb(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.b, p7/m, z28.b, z29.b");
  TEST_SINGLE(msb(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.h, p7/m, z28.h, z29.h");
  TEST_SINGLE(msb(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.s, p7/m, z28.s, z29.s");
  TEST_SINGLE(msb(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z28, ZReg::z29), "msb z30.d, p7/m, z28.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract vectors (predicated)") {
  TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "add z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sub z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "subr z30.d, p7/m, z30.d, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max/difference (predicated)") {
  TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smax z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smax z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smax z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smax z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umax z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umax z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umax z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umax z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smin z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smin z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smin z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "smin z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umin z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umin z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umin z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "umin z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(sabd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sabd z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(sabd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sabd z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(sabd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sabd z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(sabd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sabd z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(uabd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uabd z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(uabd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uabd z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(uabd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uabd z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(uabd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uabd z30.d, p6/m, z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply vectors (predicated)") {
  TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.b, p7/m, z30.b, z29.b");
  TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.h, p7/m, z30.h, z29.h");
  TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "mul z30.d, p7/m, z30.d, z29.d");

  TEST_SINGLE(smulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.b, p7/m, z30.b, z29.b");
  TEST_SINGLE(smulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.h, p7/m, z30.h, z29.h");
  TEST_SINGLE(smulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(smulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "smulh z30.d, p7/m, z30.d, z29.d");

  TEST_SINGLE(umulh(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.b, p7/m, z30.b, z29.b");
  TEST_SINGLE(umulh(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.h, p7/m, z30.h, z29.h");
  TEST_SINGLE(umulh(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(umulh(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "umulh z30.d, p7/m, z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer divide vectors (predicated)") {
  TEST_SINGLE(sdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdiv z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(sdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdiv z30.d, p7/m, z30.d, z29.d");

  TEST_SINGLE(udiv(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udiv z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(udiv(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udiv z30.d, p7/m, z30.d, z29.d");

  TEST_SINGLE(sdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdivr z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(sdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "sdivr z30.d, p7/m, z30.d, z29.d");

  TEST_SINGLE(udivr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udivr z30.s, p7/m, z30.s, z29.s");
  TEST_SINGLE(udivr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "udivr z30.d, p7/m, z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical operations (predicated)") {
  TEST_SINGLE(orr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(orr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(orr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(orr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.d, p6/m, z30.d, z29.d");
  // TEST_SINGLE(orr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "orr z30.q, p6/m, z30.q, z29.q");

  TEST_SINGLE(eor(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(eor(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(eor(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(eor(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.d, p6/m, z30.d, z29.d");
  // TEST_SINGLE(eor(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "eor z30.q, p6/m, z30.q, z29.q");

  TEST_SINGLE(and_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(and_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(and_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(and_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.d, p6/m, z30.d, z29.d");
  // TEST_SINGLE(and_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "and z30.q, p6/m, z30.q, z29.q");

  TEST_SINGLE(bic(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(bic(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(bic(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(bic(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.d, p6/m, z30.d, z29.d");
  // TEST_SINGLE(bic(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "bic z30.q, p6/m, z30.q, z29.q");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add reduction (predicated)") {
  TEST_SINGLE(saddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.b");
  TEST_SINGLE(saddv(SubRegSize::i16Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.h");
  TEST_SINGLE(saddv(SubRegSize::i32Bit, DReg::d30, PReg::p7, ZReg::z29), "saddv d30, p7, z29.s");

  TEST_SINGLE(uaddv(SubRegSize::i8Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.b");
  TEST_SINGLE(uaddv(SubRegSize::i16Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.h");
  TEST_SINGLE(uaddv(SubRegSize::i32Bit, DReg::d30, PReg::p7, ZReg::z29), "uaddv d30, p7, z29.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max reduction (predicated)") {
  TEST_SINGLE(smaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv b30, p6, z29.b");
  TEST_SINGLE(smaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv h30, p6, z29.h");
  TEST_SINGLE(smaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv s30, p6, z29.s");
  TEST_SINGLE(smaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "smaxv d30, p6, z29.d");

  TEST_SINGLE(umaxv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv b30, p6, z29.b");
  TEST_SINGLE(umaxv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv h30, p6, z29.h");
  TEST_SINGLE(umaxv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv s30, p6, z29.s");
  TEST_SINGLE(umaxv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "umaxv d30, p6, z29.d");

  TEST_SINGLE(sminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv b30, p6, z29.b");
  TEST_SINGLE(sminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv h30, p6, z29.h");
  TEST_SINGLE(sminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv s30, p6, z29.s");
  TEST_SINGLE(sminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "sminv d30, p6, z29.d");

  TEST_SINGLE(uminv(SubRegSize::i8Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv b30, p6, z29.b");
  TEST_SINGLE(uminv(SubRegSize::i16Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv h30, p6, z29.h");
  TEST_SINGLE(uminv(SubRegSize::i32Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv s30, p6, z29.s");
  TEST_SINGLE(uminv(SubRegSize::i64Bit, VReg::v30, PReg::p6, ZReg::z29), "uminv d30, p6, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE constructive prefix (predicated)") {
  TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.b, p6/m, z29.b");
  TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.h, p6/m, z29.h");
  TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.s, p6/m, z29.s");
  TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.d, p6/m, z29.d");
  // TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "movprfx z30.q, p6/m, z29.q");
  TEST_SINGLE(movprfx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.b, p6/z, z29.b");
  TEST_SINGLE(movprfx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.h, p6/z, z29.h");
  TEST_SINGLE(movprfx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.s, p6/z, z29.s");
  TEST_SINGLE(movprfx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.d, p6/z, z29.d");
  // TEST_SINGLE(movprfx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29), "movprfx z30.q, p6/z, z29.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical reduction (predicated)") {
  TEST_SINGLE(orv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "orv b30, p7, z29.b");
  TEST_SINGLE(orv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "orv h30, p7, z29.h");
  TEST_SINGLE(orv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "orv s30, p7, z29.s");
  TEST_SINGLE(orv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "orv d30, p7, z29.d");

  TEST_SINGLE(eorv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv b30, p7, z29.b");
  TEST_SINGLE(eorv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv h30, p7, z29.h");
  TEST_SINGLE(eorv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv s30, p7, z29.s");
  TEST_SINGLE(eorv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "eorv d30, p7, z29.d");

  TEST_SINGLE(andv(SubRegSize::i8Bit, VReg::v30, PReg::p7, ZReg::z29), "andv b30, p7, z29.b");
  TEST_SINGLE(andv(SubRegSize::i16Bit, VReg::v30, PReg::p7, ZReg::z29), "andv h30, p7, z29.h");
  TEST_SINGLE(andv(SubRegSize::i32Bit, VReg::v30, PReg::p7, ZReg::z29), "andv s30, p7, z29.s");
  TEST_SINGLE(andv(SubRegSize::i64Bit, VReg::v30, PReg::p7, ZReg::z29), "andv d30, p7, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by immediate (predicated)") {
  TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asr z30.b, p6/m, z30.b, #1");
  TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 8), "asr z30.b, p6/m, z30.b, #8");
  TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asr z30.h, p6/m, z30.h, #1");
  TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 16), "asr z30.h, p6/m, z30.h, #16");
  TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asr z30.s, p6/m, z30.s, #1");
  TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 32), "asr z30.s, p6/m, z30.s, #32");
  TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asr z30.d, p6/m, z30.d, #1");
  TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 64), "asr z30.d, p6/m, z30.d, #64");

  TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "lsr z30.b, p6/m, z30.b, #1");
  TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 8), "lsr z30.b, p6/m, z30.b, #8");
  TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "lsr z30.h, p6/m, z30.h, #1");
  TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 16), "lsr z30.h, p6/m, z30.h, #16");
  TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "lsr z30.s, p6/m, z30.s, #1");
  TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 32), "lsr z30.s, p6/m, z30.s, #32");
  TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "lsr z30.d, p6/m, z30.d, #1");
  TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 64), "lsr z30.d, p6/m, z30.d, #64");

  TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "lsl z30.b, p6/m, z30.b, #0");
  TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 7), "lsl z30.b, p6/m, z30.b, #7");
  TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "lsl z30.h, p6/m, z30.h, #0");
  TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 15), "lsl z30.h, p6/m, z30.h, #15");
  TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "lsl z30.s, p6/m, z30.s, #0");
  TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 31), "lsl z30.s, p6/m, z30.s, #31");
  TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "lsl z30.d, p6/m, z30.d, #0");
  TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 63), "lsl z30.d, p6/m, z30.d, #63");

  TEST_SINGLE(asrd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asrd z30.b, p6/m, z30.b, #1");
  TEST_SINGLE(asrd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 8), "asrd z30.b, p6/m, z30.b, #8");
  TEST_SINGLE(asrd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asrd z30.h, p6/m, z30.h, #1");
  TEST_SINGLE(asrd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 16), "asrd z30.h, p6/m, z30.h, #16");
  TEST_SINGLE(asrd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asrd z30.s, p6/m, z30.s, #1");
  TEST_SINGLE(asrd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 32), "asrd z30.s, p6/m, z30.s, #32");
  TEST_SINGLE(asrd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "asrd z30.d, p6/m, z30.d, #1");
  TEST_SINGLE(asrd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 64), "asrd z30.d, p6/m, z30.d, #64");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshl z30.b, p6/m, z30.b, #0");
  TEST_SINGLE(sqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 7), "sqshl z30.b, p6/m, z30.b, #7");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshl z30.h, p6/m, z30.h, #0");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 15), "sqshl z30.h, p6/m, z30.h, #15");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshl z30.s, p6/m, z30.s, #0");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 31), "sqshl z30.s, p6/m, z30.s, #31");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshl z30.d, p6/m, z30.d, #0");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 63), "sqshl z30.d, p6/m, z30.d, #63");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "uqshl z30.b, p6/m, z30.b, #0");
  TEST_SINGLE(uqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 7), "uqshl z30.b, p6/m, z30.b, #7");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "uqshl z30.h, p6/m, z30.h, #0");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 15), "uqshl z30.h, p6/m, z30.h, #15");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "uqshl z30.s, p6/m, z30.s, #0");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 31), "uqshl z30.s, p6/m, z30.s, #31");
  TEST_SINGLE(uqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "uqshl z30.d, p6/m, z30.d, #0");
  TEST_SINGLE(uqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 63), "uqshl z30.d, p6/m, z30.d, #63");

  TEST_SINGLE(srshr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "srshr z30.b, p6/m, z30.b, #1");
  TEST_SINGLE(srshr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 8), "srshr z30.b, p6/m, z30.b, #8");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "srshr z30.h, p6/m, z30.h, #1");
  TEST_SINGLE(srshr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 16), "srshr z30.h, p6/m, z30.h, #16");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "srshr z30.s, p6/m, z30.s, #1");
  TEST_SINGLE(srshr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 32), "srshr z30.s, p6/m, z30.s, #32");
  TEST_SINGLE(srshr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "srshr z30.d, p6/m, z30.d, #1");
  TEST_SINGLE(srshr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 64), "srshr z30.d, p6/m, z30.d, #64");

  TEST_SINGLE(urshr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "urshr z30.b, p6/m, z30.b, #1");
  TEST_SINGLE(urshr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 8), "urshr z30.b, p6/m, z30.b, #8");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "urshr z30.h, p6/m, z30.h, #1");
  TEST_SINGLE(urshr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 16), "urshr z30.h, p6/m, z30.h, #16");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "urshr z30.s, p6/m, z30.s, #1");
  TEST_SINGLE(urshr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 32), "urshr z30.s, p6/m, z30.s, #32");
  TEST_SINGLE(urshr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 1), "urshr z30.d, p6/m, z30.d, #1");
  TEST_SINGLE(urshr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 64), "urshr z30.d, p6/m, z30.d, #64");

  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshlu z30.b, p6/m, z30.b, #0");
  TEST_SINGLE(sqshlu(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 7), "sqshlu z30.b, p6/m, z30.b, #7");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshlu z30.h, p6/m, z30.h, #0");
  TEST_SINGLE(sqshlu(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 15), "sqshlu z30.h, p6/m, z30.h, #15");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshlu z30.s, p6/m, z30.s, #0");
  TEST_SINGLE(sqshlu(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 31), "sqshlu z30.s, p6/m, z30.s, #31");
  TEST_SINGLE(sqshlu(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 0), "sqshlu z30.d, p6/m, z30.d, #0");
  TEST_SINGLE(sqshlu(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, 63), "sqshlu z30.d, p6/m, z30.d, #63");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by vector (predicated)") {
  TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(asrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(asrr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(asrr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(asrr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "asrr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(lsrr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(lsrr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(lsrr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(lsrr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lsrr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(lslr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(lslr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(lslr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(lslr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "lslr z30.d, p6/m, z30.d, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by wide elements (predicated)") {
  TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.b, p7/m, z30.b, z29.d");
  TEST_SINGLE(asr_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.h, p7/m, z30.h, z29.d");
  TEST_SINGLE(asr_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "asr z30.s, p7/m, z30.s, z29.d");

  TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.b, p7/m, z30.b, z29.d");
  TEST_SINGLE(lsr_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.h, p7/m, z30.h, z29.d");
  TEST_SINGLE(lsr_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsr z30.s, p7/m, z30.s, z29.d");

  TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.b, p7/m, z30.b, z29.d");
  TEST_SINGLE(lsl_wide(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.h, p7/m, z30.h, z29.d");
  TEST_SINGLE(lsl_wide(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z29), "lsl z30.s, p7/m, z30.s, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer unary operations (predicated)") {
  // TEST_SINGLE(sxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "sxtb z30.b, p6/m, z29.b");
  TEST_SINGLE(sxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.h, p6/m, z29.h");
  TEST_SINGLE(sxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.s, p6/m, z29.s");
  TEST_SINGLE(sxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.d, p6/m, z29.d");
  // TEST_SINGLE(sxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtb z30.q, p6/m, z29.q");

  // TEST_SINGLE(uxtb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "uxtb z30.b, p6/m, z29.b");
  TEST_SINGLE(uxtb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.h, p6/m, z29.h");
  TEST_SINGLE(uxtb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.s, p6/m, z29.s");
  TEST_SINGLE(uxtb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.d, p6/m, z29.d");
  // TEST_SINGLE(uxtb(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtb z30.q, p6/m, z29.q");

  // TEST_SINGLE(sxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "sxth z30.b, p6/m, z29.b");
  // TEST_SINGLE(sxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "sxth z30.h, p6/m, z29.h");
  TEST_SINGLE(sxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.s, p6/m, z29.s");
  TEST_SINGLE(sxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.d, p6/m, z29.d");
  // TEST_SINGLE(sxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxth z30.q, p6/m, z29.q");

  // TEST_SINGLE(uxth(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "uxth z30.b, p6/m, z29.b");
  // TEST_SINGLE(uxth(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "uxth z30.h, p6/m, z29.h");
  TEST_SINGLE(uxth(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.s, p6/m, z29.s");
  TEST_SINGLE(uxth(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.d, p6/m, z29.d");
  // TEST_SINGLE(uxth(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxth z30.q, p6/m, z29.q");

  // TEST_SINGLE(sxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "sxtw z30.b, p6/m, z29.b");
  // TEST_SINGLE(sxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "sxtw z30.h, p6/m, z29.h");
  // TEST_SINGLE(sxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "sxtw z30.s, p6/m, z29.s");
  TEST_SINGLE(sxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.d, p6/m, z29.d");
  // TEST_SINGLE(sxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sxtw z30.q, p6/m, z29.q");

  // TEST_SINGLE(uxtw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "uxtw z30.b, p6/m, z29.b");
  // TEST_SINGLE(uxtw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "uxtw z30.h, p6/m, z29.h");
  // TEST_SINGLE(uxtw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),  "uxtw z30.s, p6/m, z29.s");
  TEST_SINGLE(uxtw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.d, p6/m, z29.d");
  // TEST_SINGLE(uxtw(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uxtw z30.q, p6/m, z29.q");

  TEST_SINGLE(abs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.b, p6/m, z29.b");
  TEST_SINGLE(abs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.h, p6/m, z29.h");
  TEST_SINGLE(abs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.s, p6/m, z29.s");
  TEST_SINGLE(abs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.d, p6/m, z29.d");
  // TEST_SINGLE(abs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "abs z30.q, p6/m, z29.q");

  TEST_SINGLE(neg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.b, p6/m, z29.b");
  TEST_SINGLE(neg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.h, p6/m, z29.h");
  TEST_SINGLE(neg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.s, p6/m, z29.s");
  TEST_SINGLE(neg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.d, p6/m, z29.d");
  // TEST_SINGLE(neg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "neg z30.q, p6/m, z29.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise unary operations (predicated)") {
  TEST_SINGLE(cls(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.b, p6/m, z29.b");
  TEST_SINGLE(cls(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.h, p6/m, z29.h");
  TEST_SINGLE(cls(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.s, p6/m, z29.s");
  TEST_SINGLE(cls(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.d, p6/m, z29.d");
  // TEST_SINGLE(cls(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cls z30.q, p6/m, z29.q");

  TEST_SINGLE(clz(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.b, p6/m, z29.b");
  TEST_SINGLE(clz(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.h, p6/m, z29.h");
  TEST_SINGLE(clz(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.s, p6/m, z29.s");
  TEST_SINGLE(clz(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.d, p6/m, z29.d");
  // TEST_SINGLE(clz(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "clz z30.q, p6/m, z29.q");

  TEST_SINGLE(cnt(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.b, p6/m, z29.b");
  TEST_SINGLE(cnt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.h, p6/m, z29.h");
  TEST_SINGLE(cnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.s, p6/m, z29.s");
  TEST_SINGLE(cnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.d, p6/m, z29.d");
  // TEST_SINGLE(cnt(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnt z30.q, p6/m, z29.q");

  TEST_SINGLE(cnot(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.b, p6/m, z29.b");
  TEST_SINGLE(cnot(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.h, p6/m, z29.h");
  TEST_SINGLE(cnot(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.s, p6/m, z29.s");
  TEST_SINGLE(cnot(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.d, p6/m, z29.d");
  // TEST_SINGLE(cnot(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "cnot z30.q, p6/m, z29.q");

  // TEST_SINGLE(fabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "fabs z30.b, p6/m, z29.b");
  TEST_SINGLE(fabs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.h, p6/m, z29.h");
  TEST_SINGLE(fabs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.s, p6/m, z29.s");
  TEST_SINGLE(fabs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.d, p6/m, z29.d");
  // TEST_SINGLE(fabs(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fabs z30.q, p6/m, z29.q");

  // TEST_SINGLE(fneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29),   "fneg z30.b, p6/m, z29.b");
  TEST_SINGLE(fneg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.h, p6/m, z29.h");
  TEST_SINGLE(fneg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.s, p6/m, z29.s");
  TEST_SINGLE(fneg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.d, p6/m, z29.d");
  // TEST_SINGLE(fneg(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fneg z30.q, p6/m, z29.q");

  TEST_SINGLE(not_(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.b, p6/m, z29.b");
  TEST_SINGLE(not_(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.h, p6/m, z29.h");
  TEST_SINGLE(not_(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.s, p6/m, z29.s");
  TEST_SINGLE(not_(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.d, p6/m, z29.d");
  // TEST_SINGLE(not_(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "not z30.q, p6/m, z29.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical operations (unpredicated)") {
  TEST_SINGLE(and_(ZReg::z30, ZReg::z29, ZReg::z28), "and z30.d, z29.d, z28.d");
  TEST_SINGLE(orr(ZReg::z30, ZReg::z29, ZReg::z28), "orr z30.d, z29.d, z28.d");
  TEST_SINGLE(mov(ZReg::z30, ZReg::z29), "mov z30.d, z29.d");
  TEST_SINGLE(eor(ZReg::z30, ZReg::z29, ZReg::z28), "eor z30.d, z29.d, z28.d");
  TEST_SINGLE(bic(ZReg::z30, ZReg::z29, ZReg::z28), "bic z30.d, z29.d, z28.d");

  TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "xar z30.b, z30.b, z29.b, #1");
  TEST_SINGLE(xar(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "xar z30.b, z30.b, z29.b, #8");
  TEST_SINGLE(xar(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "xar z30.h, z30.h, z29.h, #1");
  TEST_SINGLE(xar(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "xar z30.h, z30.h, z29.h, #16");
  TEST_SINGLE(xar(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "xar z30.s, z30.s, z29.s, #1");
  TEST_SINGLE(xar(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "xar z30.s, z30.s, z29.s, #32");
  TEST_SINGLE(xar(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "xar z30.d, z30.d, z29.d, #1");
  TEST_SINGLE(xar(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "xar z30.d, z30.d, z29.d, #64");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise ternary operations") {
  TEST_SINGLE(eor3(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "eor3 z30.d, z30.d, z28.d, z29.d");
  TEST_SINGLE(bsl(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "bsl z30.d, z30.d, z28.d, z29.d");
  TEST_SINGLE(bcax(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "bcax z30.d, z30.d, z28.d, z29.d");
  TEST_SINGLE(bsl1n(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "bsl1n z30.d, z30.d, z28.d, z29.d");
  TEST_SINGLE(bsl2n(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "bsl2n z30.d, z30.d, z28.d, z29.d");
  TEST_SINGLE(nbsl(ZReg::z30, ZReg::z30, ZReg::z28, ZReg::z29), "nbsl z30.d, z30.d, z28.d, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Index Generation") {
  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, -16), "index z30.b, #-16, #-16");
  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, 15), "index z30.b, #-16, #15");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, -16), "index z30.h, #-16, #-16");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, 15), "index z30.h, #-16, #15");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, -16), "index z30.s, #-16, #-16");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, 15), "index z30.s, #-16, #15");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, -16), "index z30.d, #-16, #-16");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, 15), "index z30.d, #-16, #15");

  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, -16), "index z30.b, w29, #-16");
  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, 15), "index z30.b, w29, #15");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, -16), "index z30.h, w29, #-16");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, 15), "index z30.h, w29, #15");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, -16), "index z30.s, w29, #-16");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, 15), "index z30.s, w29, #15");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, -16), "index z30.d, x29, #-16");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, 15), "index z30.d, x29, #15");

  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, -16, WReg::w29), "index z30.b, #-16, w29");
  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, 15, WReg::w29), "index z30.b, #15, w29");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, -16, WReg::w29), "index z30.h, #-16, w29");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, 15, WReg::w29), "index z30.h, #15, w29");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, -16, WReg::w29), "index z30.s, #-16, w29");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, 15, WReg::w29), "index z30.s, #15, w29");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, -16, XReg::x29), "index z30.d, #-16, x29");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, 15, XReg::x29), "index z30.d, #15, x29");

  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28");
  TEST_SINGLE(index(SubRegSize::i8Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.b, w29, w28");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.h, w29, w28");
  TEST_SINGLE(index(SubRegSize::i16Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.h, w29, w28");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.s, w29, w28");
  TEST_SINGLE(index(SubRegSize::i32Bit, ZReg::z30, WReg::w29, WReg::w28), "index z30.s, w29, w28");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, XReg::x28), "index z30.d, x29, x28");
  TEST_SINGLE(index(SubRegSize::i64Bit, ZReg::z30, XReg::x29, XReg::x28), "index z30.d, x29, x28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE stack frame adjustment") {
  TEST_SINGLE(addvl(XReg::rsp, XReg::rsp, -32), "addvl sp, sp, #-32");
  TEST_SINGLE(addvl(XReg::rsp, XReg::rsp, 31), "addvl sp, sp, #31");
  TEST_SINGLE(addvl(XReg::x30, XReg::x29, 15), "addvl x30, x29, #15");

  TEST_SINGLE(addpl(XReg::rsp, XReg::rsp, -32), "addpl sp, sp, #-32");
  TEST_SINGLE(addpl(XReg::rsp, XReg::rsp, 31), "addpl sp, sp, #31");
  TEST_SINGLE(addpl(XReg::x30, XReg::x29, 15), "addpl x30, x29, #15");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Streaming SVE stack frame adjustment") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE stack frame size") {
  TEST_SINGLE(rdvl(XReg::x30, -32), "rdvl x30, #-32");
  TEST_SINGLE(rdvl(XReg::x30, 31), "rdvl x30, #31");
  TEST_SINGLE(rdvl(XReg::x30, 15), "rdvl x30, #15");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: Streaming SVE stack frame size") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply vectors (unpredicated)") {
  TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "mul z30.b, z29.b, z28.b");
  TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "mul z30.h, z29.h, z28.h");
  TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "mul z30.s, z29.s, z28.s");
  TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "mul z30.d, z29.d, z28.d");

  TEST_SINGLE(smulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smulh z30.b, z29.b, z28.b");
  TEST_SINGLE(smulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smulh z30.h, z29.h, z28.h");
  TEST_SINGLE(smulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smulh z30.s, z29.s, z28.s");
  TEST_SINGLE(smulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smulh z30.d, z29.d, z28.d");

  TEST_SINGLE(umulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umulh z30.b, z29.b, z28.b");
  TEST_SINGLE(umulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umulh z30.h, z29.h, z28.h");
  TEST_SINGLE(umulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umulh z30.s, z29.s, z28.s");
  TEST_SINGLE(umulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umulh z30.d, z29.d, z28.d");

  TEST_SINGLE(pmul(ZReg::z30, ZReg::z29, ZReg::z28), "pmul z30.b, z29.b, z28.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 signed saturating doubling multiply high (unpredicated)") {
  TEST_SINGLE(sqdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.b, z29.b, z28.b");
  TEST_SINGLE(sqdmulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.h, z29.h, z28.h");
  TEST_SINGLE(sqdmulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.s, z29.s, z28.s");
  TEST_SINGLE(sqdmulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmulh z30.d, z29.d, z28.d");

  TEST_SINGLE(sqrdmulh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.b, z29.b, z28.b");
  TEST_SINGLE(sqrdmulh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.h, z29.h, z28.h");
  TEST_SINGLE(sqrdmulh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.s, z29.s, z28.s");
  TEST_SINGLE(sqrdmulh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmulh z30.d, z29.d, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by wide elements (unpredicated)") {
  TEST_SINGLE(asr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.b, z29.b, z28.d");
  TEST_SINGLE(asr_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.h, z29.h, z28.d");
  TEST_SINGLE(asr_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "asr z30.s, z29.s, z28.d");

  TEST_SINGLE(lsr_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.b, z29.b, z28.d");
  TEST_SINGLE(lsr_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.h, z29.h, z28.d");
  TEST_SINGLE(lsr_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsr z30.s, z29.s, z28.d");

  TEST_SINGLE(lsl_wide(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.b, z29.b, z28.d");
  TEST_SINGLE(lsl_wide(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.h, z29.h, z28.d");
  TEST_SINGLE(lsl_wide(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "lsl z30.s, z29.s, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise shift by immediate (unpredicated)") {
  TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "asr z30.b, z29.b, #1");
  TEST_SINGLE(asr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "asr z30.b, z29.b, #8");
  TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "asr z30.h, z29.h, #1");
  TEST_SINGLE(asr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "asr z30.h, z29.h, #16");
  TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "asr z30.s, z29.s, #1");
  TEST_SINGLE(asr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "asr z30.s, z29.s, #32");
  TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "asr z30.d, z29.d, #1");
  TEST_SINGLE(asr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "asr z30.d, z29.d, #64");

  TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.b, z29.b, #1");
  TEST_SINGLE(lsr(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "lsr z30.b, z29.b, #8");
  TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.h, z29.h, #1");
  TEST_SINGLE(lsr(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "lsr z30.h, z29.h, #16");
  TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.s, z29.s, #1");
  TEST_SINGLE(lsr(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "lsr z30.s, z29.s, #32");
  TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "lsr z30.d, z29.d, #1");
  TEST_SINGLE(lsr(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "lsr z30.d, z29.d, #64");

  TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.b, z29.b, #0");
  TEST_SINGLE(lsl(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "lsl z30.b, z29.b, #7");
  TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.h, z29.h, #0");
  TEST_SINGLE(lsl(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "lsl z30.h, z29.h, #15");
  TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.s, z29.s, #0");
  TEST_SINGLE(lsl(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 31), "lsl z30.s, z29.s, #31");
  TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "lsl z30.d, z29.d, #0");
  TEST_SINGLE(lsl(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 63), "lsl z30.d, z29.d, #63");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point trig select coefficient") {
  TEST_SINGLE(ftssel(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftssel z30.h, z29.h, z28.h");
  TEST_SINGLE(ftssel(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftssel z30.s, z29.s, z28.s");
  TEST_SINGLE(ftssel(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ftssel z30.d, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point exponential accelerator") {
  TEST_SINGLE(fexpa(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "fexpa z30.h, z29.h");
  TEST_SINGLE(fexpa(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "fexpa z30.s, z29.s");
  TEST_SINGLE(fexpa(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "fexpa z30.d, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE constructive prefix (unpredicated)") {
  TEST_SINGLE(movprfx(ZReg::z30, ZReg::z29), "movprfx z30, z29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec vector by element count") {
  TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqinch z30.h, pow2");
  TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqinch z30.h, vl256, mul #7");
  TEST_SINGLE(sqinch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqinch z30.h, all, mul #16");

  TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqinch z30.h, pow2");
  TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqinch z30.h, vl256, mul #7");
  TEST_SINGLE(uqinch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqinch z30.h, all, mul #16");

  TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdech z30.h, pow2");
  TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdech z30.h, vl256, mul #7");
  TEST_SINGLE(sqdech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdech z30.h, all, mul #16");

  TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdech z30.h, pow2");
  TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdech z30.h, vl256, mul #7");
  TEST_SINGLE(uqdech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdech z30.h, all, mul #16");

  TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqincw z30.s, pow2");
  TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincw z30.s, vl256, mul #7");
  TEST_SINGLE(sqincw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqincw z30.s, all, mul #16");

  TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqincw z30.s, pow2");
  TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincw z30.s, vl256, mul #7");
  TEST_SINGLE(uqincw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqincw z30.s, all, mul #16");

  TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdecw z30.s, pow2");
  TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecw z30.s, vl256, mul #7");
  TEST_SINGLE(sqdecw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdecw z30.s, all, mul #16");

  TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdecw z30.s, pow2");
  TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecw z30.s, vl256, mul #7");
  TEST_SINGLE(uqdecw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdecw z30.s, all, mul #16");

  TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqincd z30.d, pow2");
  TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqincd z30.d, vl256, mul #7");
  TEST_SINGLE(sqincd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqincd z30.d, all, mul #16");

  TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqincd z30.d, pow2");
  TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqincd z30.d, vl256, mul #7");
  TEST_SINGLE(uqincd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqincd z30.d, all, mul #16");

  TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "sqdecd z30.d, pow2");
  TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "sqdecd z30.d, vl256, mul #7");
  TEST_SINGLE(sqdecd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "sqdecd z30.d, all, mul #16");

  TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "uqdecd z30.d, pow2");
  TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "uqdecd z30.d, vl256, mul #7");
  TEST_SINGLE(uqdecd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "uqdecd z30.d, all, mul #16");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE element count") {
  TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntb x30, pow2");
  TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntb x30, vl256, mul #7");
  TEST_SINGLE(cntb(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntb x30, all, mul #16");

  TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_POW2, 1), "cnth x30, pow2");
  TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_VL256, 7), "cnth x30, vl256, mul #7");
  TEST_SINGLE(cnth(XReg::x30, PredicatePattern::SVE_ALL, 16), "cnth x30, all, mul #16");

  TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntw x30, pow2");
  TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntw x30, vl256, mul #7");
  TEST_SINGLE(cntw(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntw x30, all, mul #16");

  TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_POW2, 1), "cntd x30, pow2");
  TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_VL256, 7), "cntd x30, vl256, mul #7");
  TEST_SINGLE(cntd(XReg::x30, PredicatePattern::SVE_ALL, 16), "cntd x30, all, mul #16");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec vector by element count") {
  TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_POW2, 1), "inch z30.h, pow2");
  TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_VL256, 7), "inch z30.h, vl256, mul #7");
  TEST_SINGLE(inch(ZReg::z30, PredicatePattern::SVE_ALL, 16), "inch z30.h, all, mul #16");

  TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_POW2, 1), "dech z30.h, pow2");
  TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_VL256, 7), "dech z30.h, vl256, mul #7");
  TEST_SINGLE(dech(ZReg::z30, PredicatePattern::SVE_ALL, 16), "dech z30.h, all, mul #16");

  TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "incw z30.s, pow2");
  TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incw z30.s, vl256, mul #7");
  TEST_SINGLE(incw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "incw z30.s, all, mul #16");

  TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_POW2, 1), "decw z30.s, pow2");
  TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decw z30.s, vl256, mul #7");
  TEST_SINGLE(decw(ZReg::z30, PredicatePattern::SVE_ALL, 16), "decw z30.s, all, mul #16");

  TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "incd z30.d, pow2");
  TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "incd z30.d, vl256, mul #7");
  TEST_SINGLE(incd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "incd z30.d, all, mul #16");

  TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_POW2, 1), "decd z30.d, pow2");
  TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_VL256, 7), "decd z30.d, vl256, mul #7");
  TEST_SINGLE(decd(ZReg::z30, PredicatePattern::SVE_ALL, 16), "decd z30.d, all, mul #16");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec register by element count") {
  TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_POW2, 1), "incb x30, pow2");
  TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_VL256, 7), "incb x30, vl256, mul #7");
  TEST_SINGLE(incb(XReg::x30, PredicatePattern::SVE_ALL, 16), "incb x30, all, mul #16");

  TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_POW2, 1), "decb x30, pow2");
  TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_VL256, 7), "decb x30, vl256, mul #7");
  TEST_SINGLE(decb(XReg::x30, PredicatePattern::SVE_ALL, 16), "decb x30, all, mul #16");

  TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_POW2, 1), "inch x30, pow2");
  TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_VL256, 7), "inch x30, vl256, mul #7");
  TEST_SINGLE(inch(XReg::x30, PredicatePattern::SVE_ALL, 16), "inch x30, all, mul #16");

  TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_POW2, 1), "dech x30, pow2");
  TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_VL256, 7), "dech x30, vl256, mul #7");
  TEST_SINGLE(dech(XReg::x30, PredicatePattern::SVE_ALL, 16), "dech x30, all, mul #16");

  TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_POW2, 1), "incw x30, pow2");
  TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_VL256, 7), "incw x30, vl256, mul #7");
  TEST_SINGLE(incw(XReg::x30, PredicatePattern::SVE_ALL, 16), "incw x30, all, mul #16");

  TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_POW2, 1), "decw x30, pow2");
  TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_VL256, 7), "decw x30, vl256, mul #7");
  TEST_SINGLE(decw(XReg::x30, PredicatePattern::SVE_ALL, 16), "decw x30, all, mul #16");

  TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_POW2, 1), "incd x30, pow2");
  TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_VL256, 7), "incd x30, vl256, mul #7");
  TEST_SINGLE(incd(XReg::x30, PredicatePattern::SVE_ALL, 16), "incd x30, all, mul #16");

  TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_POW2, 1), "decd x30, pow2");
  TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_VL256, 7), "decd x30, vl256, mul #7");
  TEST_SINGLE(decd(XReg::x30, PredicatePattern::SVE_ALL, 16), "decd x30, all, mul #16");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec register by element count") {
  TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincb x30, pow2");
  TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincb x30, vl256, mul #7");
  TEST_SINGLE(sqincb(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincb x30, all, mul #16");

  TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincb x30, w30, pow2");
  TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincb x30, w30, vl256, mul #7");
  TEST_SINGLE(sqincb(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincb x30, w30, all, mul #16");

  TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincb x30, pow2");
  TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincb x30, vl256, mul #7");
  TEST_SINGLE(uqincb(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincb x30, all, mul #16");

  TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincb w30, pow2");
  TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincb w30, vl256, mul #7");
  TEST_SINGLE(uqincb(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincb w30, all, mul #16");

  TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecb x30, pow2");
  TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, vl256, mul #7");
  TEST_SINGLE(sqdecb(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecb x30, all, mul #16");

  TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecb x30, w30, pow2");
  TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecb x30, w30, vl256, mul #7");
  TEST_SINGLE(sqdecb(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecb x30, w30, all, mul #16");

  TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecb x30, pow2");
  TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecb x30, vl256, mul #7");
  TEST_SINGLE(uqdecb(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecb x30, all, mul #16");

  TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecb w30, pow2");
  TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecb w30, vl256, mul #7");
  TEST_SINGLE(uqdecb(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecb w30, all, mul #16");

  TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqinch x30, pow2");
  TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqinch x30, vl256, mul #7");
  TEST_SINGLE(sqinch(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqinch x30, all, mul #16");

  TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqinch x30, w30, pow2");
  TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqinch x30, w30, vl256, mul #7");
  TEST_SINGLE(sqinch(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqinch x30, w30, all, mul #16");

  TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqinch x30, pow2");
  TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqinch x30, vl256, mul #7");
  TEST_SINGLE(uqinch(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqinch x30, all, mul #16");

  TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqinch w30, pow2");
  TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqinch w30, vl256, mul #7");
  TEST_SINGLE(uqinch(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqinch w30, all, mul #16");

  TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdech x30, pow2");
  TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdech x30, vl256, mul #7");
  TEST_SINGLE(sqdech(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdech x30, all, mul #16");

  TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdech x30, w30, pow2");
  TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdech x30, w30, vl256, mul #7");
  TEST_SINGLE(sqdech(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdech x30, w30, all, mul #16");

  TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdech x30, pow2");
  TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdech x30, vl256, mul #7");
  TEST_SINGLE(uqdech(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdech x30, all, mul #16");

  TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdech w30, pow2");
  TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdech w30, vl256, mul #7");
  TEST_SINGLE(uqdech(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdech w30, all, mul #16");

  TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincw x30, pow2");
  TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincw x30, vl256, mul #7");
  TEST_SINGLE(sqincw(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincw x30, all, mul #16");

  TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincw x30, w30, pow2");
  TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincw x30, w30, vl256, mul #7");
  TEST_SINGLE(sqincw(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincw x30, w30, all, mul #16");

  TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincw x30, pow2");
  TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincw x30, vl256, mul #7");
  TEST_SINGLE(uqincw(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincw x30, all, mul #16");

  TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincw w30, pow2");
  TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincw w30, vl256, mul #7");
  TEST_SINGLE(uqincw(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincw w30, all, mul #16");

  TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecw x30, pow2");
  TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, vl256, mul #7");
  TEST_SINGLE(sqdecw(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecw x30, all, mul #16");

  TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecw x30, w30, pow2");
  TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecw x30, w30, vl256, mul #7");
  TEST_SINGLE(sqdecw(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecw x30, w30, all, mul #16");

  TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecw x30, pow2");
  TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecw x30, vl256, mul #7");
  TEST_SINGLE(uqdecw(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecw x30, all, mul #16");

  TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecw w30, pow2");
  TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecw w30, vl256, mul #7");
  TEST_SINGLE(uqdecw(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecw w30, all, mul #16");

  TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqincd x30, pow2");
  TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqincd x30, vl256, mul #7");
  TEST_SINGLE(sqincd(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqincd x30, all, mul #16");

  TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqincd x30, w30, pow2");
  TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqincd x30, w30, vl256, mul #7");
  TEST_SINGLE(sqincd(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqincd x30, w30, all, mul #16");

  TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqincd x30, pow2");
  TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqincd x30, vl256, mul #7");
  TEST_SINGLE(uqincd(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqincd x30, all, mul #16");

  TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqincd w30, pow2");
  TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqincd w30, vl256, mul #7");
  TEST_SINGLE(uqincd(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqincd w30, all, mul #16");

  TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_POW2, 1), "sqdecd x30, pow2");
  TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, vl256, mul #7");
  TEST_SINGLE(sqdecd(XReg::x30, PredicatePattern::SVE_ALL, 16), "sqdecd x30, all, mul #16");

  TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_POW2, 1), "sqdecd x30, w30, pow2");
  TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "sqdecd x30, w30, vl256, mul #7");
  TEST_SINGLE(sqdecd(WReg::w30, PredicatePattern::SVE_ALL, 16), "sqdecd x30, w30, all, mul #16");

  TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_POW2, 1), "uqdecd x30, pow2");
  TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_VL256, 7), "uqdecd x30, vl256, mul #7");
  TEST_SINGLE(uqdecd(XReg::x30, PredicatePattern::SVE_ALL, 16), "uqdecd x30, all, mul #16");

  TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_POW2, 1), "uqdecd w30, pow2");
  TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_VL256, 7), "uqdecd w30, vl256, mul #7");
  TEST_SINGLE(uqdecd(WReg::w30, PredicatePattern::SVE_ALL, 16), "uqdecd w30, all, mul #16");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Bitwise Immediate") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE bitwise logical with immediate (unpredicated)") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Integer Wide Immediate - Predicated") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE copy integer immediate (predicated)") {
  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128")
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.h, p6/m, #-128");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.s, p6/m, #-128");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.d, p6/m, #-128");

  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127");
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.h, p6/m, #127");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.s, p6/m, #127");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.d, p6/m, #127");

  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.h, p6/m, #-128, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.s, p6/m, #-128, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.d, p6/m, #-128, lsl #8");

  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.h, p6/m, #127, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.s, p6/m, #127, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.d, p6/m, #127, lsl #8");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.b, p6/m, #-128")
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.h, p6/m, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.s, p6/m, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -128), "mov z30.d, p6/m, #-128");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.b, p6/m, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.h, p6/m, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.s, p6/m, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 127), "mov z30.d, p6/m, #127");

  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.h, p6/m, #-128, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.s, p6/m, #-128, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -32768), "mov z30.d, p6/m, #-128, lsl #8");

  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.h, p6/m, #127, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.s, p6/m, #127, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 32512), "mov z30.d, p6/m, #127, lsl #8");

  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128")
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.h, p6/z, #-128");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.s, p6/z, #-128");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.d, p6/z, #-128");

  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127");
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.h, p6/z, #127");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.s, p6/z, #127");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.d, p6/z, #127");

  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.h, p6/z, #-128, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.s, p6/z, #-128, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.d, p6/z, #-128, lsl #8");

  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.h, p6/z, #127, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.s, p6/z, #127, lsl #8");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.d, p6/z, #127, lsl #8");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.b, p6/z, #-128")
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.h, p6/z, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.s, p6/z, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -128), "mov z30.d, p6/z, #-128");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.b, p6/z, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.h, p6/z, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.s, p6/z, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 127), "mov z30.d, p6/z, #127");

  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.h, p6/z, #-128, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.s, p6/z, #-128, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), -32768), "mov z30.d, p6/z, #-128, lsl #8");

  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.h, p6/z, #127, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.s, p6/z, #127, lsl #8");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), 32512), "mov z30.d, p6/z, #127, lsl #8");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Unpredicated") {
  TEST_SINGLE(dup(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29");
  TEST_SINGLE(dup(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29");
  TEST_SINGLE(dup(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29");
  TEST_SINGLE(dup(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29");

  TEST_SINGLE(mov(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "mov z30.b, w29");
  TEST_SINGLE(mov(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "mov z30.h, w29");
  TEST_SINGLE(mov(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "mov z30.s, w29");
  TEST_SINGLE(mov(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "mov z30.d, x29");

  TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, Reg::r29), "insr z30.b, w29");
  TEST_SINGLE(insr(SubRegSize::i16Bit, ZReg::z30, Reg::r29), "insr z30.h, w29");
  TEST_SINGLE(insr(SubRegSize::i32Bit, ZReg::z30, Reg::r29), "insr z30.s, w29");
  TEST_SINGLE(insr(SubRegSize::i64Bit, ZReg::z30, Reg::r29), "insr z30.d, x29");

  TEST_SINGLE(insr(SubRegSize::i8Bit, ZReg::z30, VReg::v29), "insr z30.b, b29");
  TEST_SINGLE(insr(SubRegSize::i16Bit, ZReg::z30, VReg::v29), "insr z30.h, h29");
  TEST_SINGLE(insr(SubRegSize::i32Bit, ZReg::z30, VReg::v29), "insr z30.s, s29");
  TEST_SINGLE(insr(SubRegSize::i64Bit, ZReg::z30, VReg::v29), "insr z30.d, d29");

  TEST_SINGLE(rev(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "rev z30.b, z29.b");
  TEST_SINGLE(rev(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "rev z30.h, z29.h");
  TEST_SINGLE(rev(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "rev z30.s, z29.s");
  TEST_SINGLE(rev(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "rev z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE unpack vector elements") {
  // TEST_SINGLE(sunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29),   "sunpklo z30.b, z29.b");
  TEST_SINGLE(sunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpklo z30.h, z29.b");
  TEST_SINGLE(sunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpklo z30.s, z29.h");
  TEST_SINGLE(sunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpklo z30.d, z29.s");
  // TEST_SINGLE(sunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpklo z30.q, z29.q");

  // TEST_SINGLE(sunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29),   "sunpkhi z30.b, z29.b");
  TEST_SINGLE(sunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.h, z29.b");
  TEST_SINGLE(sunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.s, z29.h");
  TEST_SINGLE(sunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.d, z29.s");
  // TEST_SINGLE(sunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "sunpkhi z30.q, z29.q");

  // TEST_SINGLE(uunpklo(SubRegSize::i8Bit, ZReg::z30, ZReg::z29),   "uunpklo z30.b, z29.b");
  TEST_SINGLE(uunpklo(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpklo z30.h, z29.b");
  TEST_SINGLE(uunpklo(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpklo z30.s, z29.h");
  TEST_SINGLE(uunpklo(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpklo z30.d, z29.s");
  // TEST_SINGLE(uunpklo(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpklo z30.q, z29.q");

  // TEST_SINGLE(uunpkhi(SubRegSize::i8Bit, ZReg::z30, ZReg::z29),   "uunpkhi z30.b, z29.b");
  TEST_SINGLE(uunpkhi(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.h, z29.b");
  TEST_SINGLE(uunpkhi(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.s, z29.h");
  TEST_SINGLE(uunpkhi(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.d, z29.s");
  // TEST_SINGLE(uunpkhi(SubRegSize::i128Bit, ZReg::z30, ZReg::z29), "uunpkhi z30.q, z29.q");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Predicate") {
  TEST_SINGLE(rev(SubRegSize::i8Bit, PReg::p15, PReg::p14), "rev p15.b, p14.b");
  TEST_SINGLE(rev(SubRegSize::i16Bit, PReg::p15, PReg::p14), "rev p15.h, p14.h");
  TEST_SINGLE(rev(SubRegSize::i32Bit, PReg::p15, PReg::p14), "rev p15.s, p14.s");
  TEST_SINGLE(rev(SubRegSize::i64Bit, PReg::p15, PReg::p14), "rev p15.d, p14.d");

  TEST_SINGLE(punpklo(PReg::p15, PReg::p14), "punpklo p15.h, p14.b");
  TEST_SINGLE(punpkhi(PReg::p15, PReg::p14), "punpkhi p15.h, p14.b");

  TEST_SINGLE(zip1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.b, p14.b, p13.b");
  TEST_SINGLE(zip1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.h, p14.h, p13.h");
  TEST_SINGLE(zip1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.s, p14.s, p13.s");
  TEST_SINGLE(zip1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "zip1 p15.d, p14.d, p13.d");

  TEST_SINGLE(zip2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.b, p14.b, p13.b");
  TEST_SINGLE(zip2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.h, p14.h, p13.h");
  TEST_SINGLE(zip2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.s, p14.s, p13.s");
  TEST_SINGLE(zip2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "zip2 p15.d, p14.d, p13.d");

  TEST_SINGLE(uzp1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.b, p14.b, p13.b");
  TEST_SINGLE(uzp1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.h, p14.h, p13.h");
  TEST_SINGLE(uzp1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.s, p14.s, p13.s");
  TEST_SINGLE(uzp1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "uzp1 p15.d, p14.d, p13.d");

  TEST_SINGLE(uzp2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.b, p14.b, p13.b");
  TEST_SINGLE(uzp2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.h, p14.h, p13.h");
  TEST_SINGLE(uzp2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.s, p14.s, p13.s");
  TEST_SINGLE(uzp2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "uzp2 p15.d, p14.d, p13.d");

  TEST_SINGLE(trn1(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.b, p14.b, p13.b");
  TEST_SINGLE(trn1(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.h, p14.h, p13.h");
  TEST_SINGLE(trn1(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.s, p14.s, p13.s");
  TEST_SINGLE(trn1(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "trn1 p15.d, p14.d, p13.d");

  TEST_SINGLE(trn2(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.b, p14.b, p13.b");
  TEST_SINGLE(trn2(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.h, p14.h, p13.h");
  TEST_SINGLE(trn2(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.s, p14.s, p13.s");
  TEST_SINGLE(trn2(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p13), "trn2 p15.d, p14.d, p13.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Predicated - Base") {
  // CPY (SIMD&FP scalar)
  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.b, p7/m, b30");
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.h, p7/m, h30");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.s, p7/m, s30");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), VReg::v30), "mov z30.d, p7/m, d30");

  // TEST_SINGLE(compact(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z29),   "compact z30.b, p6, z29.b");
  // TEST_SINGLE(compact(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z29),  "compact z30.h, p6, z29.h");
  TEST_SINGLE(compact(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.s, p6, z29.s");
  TEST_SINGLE(compact(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.d, p6, z29.d");
  // TEST_SINGLE(compact(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z29), "compact z30.q, p6, z29.q");

  // CPY (scalar)
  TEST_SINGLE(cpy(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.b, p7/m, wsp");
  TEST_SINGLE(cpy(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.h, p7/m, wsp");
  TEST_SINGLE(cpy(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), WReg::rsp), "mov z30.s, p7/m, wsp");
  TEST_SINGLE(cpy(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), XReg::rsp), "mov z30.d, p7/m, sp");

  TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.b, p6, {z28.b, "
                                                                                                          "z29.b}");
  TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.h, p6, {z28.h, "
                                                                                                           "z29.h}");
  TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.s, p6, {z28.s, "
                                                                                                           "z29.s}");
  TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.d, p6, {z28.d, "
                                                                                                           "z29.d}");
  TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z31, ZReg::z0), "splice z30.d, p6, {z31.d, "
                                                                                                          "z0.d}");
  // TEST_SINGLE(splice<OpType::Constructive>(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z28, ZReg::z29), "splice z30.q, p6, {z28.q, z29.q}");

  TEST_SINGLE(splice<OpType::Destructive>(SubRegSize::i8Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.b, p6, z30.b, z28.b");
  TEST_SINGLE(splice<OpType::Destructive>(SubRegSize::i16Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.h, p6, z30.h, z28.h");
  TEST_SINGLE(splice<OpType::Destructive>(SubRegSize::i32Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.s, p6, z30.s, z28.s");
  TEST_SINGLE(splice<OpType::Destructive>(SubRegSize::i64Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.d, p6, z30.d, z28.d");
  // TEST_SINGLE(splice<OpType::Destructive>(SubRegSize::i128Bit, ZReg::z30, PReg::p6, ZReg::z30, ZReg::z28), "splice z30.q, p6, z30.q, z28.q");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE extract element to general register") {
  TEST_SINGLE(lasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.b");
  TEST_SINGLE(lasta(SubRegSize::i16Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.h");
  TEST_SINGLE(lasta(SubRegSize::i32Bit, WReg::w30, PReg::p7, ZReg::z30), "lasta w30, p7, z30.s");
  TEST_SINGLE(lasta(SubRegSize::i64Bit, XReg::x30, PReg::p7, ZReg::z30), "lasta x30, p7, z30.d");

  TEST_SINGLE(lastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.b");
  TEST_SINGLE(lastb(SubRegSize::i16Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.h");
  TEST_SINGLE(lastb(SubRegSize::i32Bit, WReg::w30, PReg::p7, ZReg::z30), "lastb w30, p7, z30.s");
  TEST_SINGLE(lastb(SubRegSize::i64Bit, XReg::x30, PReg::p7, ZReg::z30), "lastb x30, p7, z30.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE extract element to SIMD&FP scalar register") {
  TEST_SINGLE(lasta(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lasta b30, p7, z29.b");
  TEST_SINGLE(lasta(SubRegSize::i16Bit, HReg::h30, PReg::p7, ZReg::z29), "lasta h30, p7, z29.h");
  TEST_SINGLE(lasta(SubRegSize::i32Bit, SReg::s30, PReg::p7, ZReg::z29), "lasta s30, p7, z29.s");
  TEST_SINGLE(lasta(SubRegSize::i64Bit, DReg::d30, PReg::p7, ZReg::z29), "lasta d30, p7, z29.d");

  TEST_SINGLE(lastb(SubRegSize::i8Bit, BReg::b30, PReg::p7, ZReg::z29), "lastb b30, p7, z29.b");
  TEST_SINGLE(lastb(SubRegSize::i16Bit, HReg::h30, PReg::p7, ZReg::z29), "lastb h30, p7, z29.h");
  TEST_SINGLE(lastb(SubRegSize::i32Bit, SReg::s30, PReg::p7, ZReg::z29), "lastb s30, p7, z29.s");
  TEST_SINGLE(lastb(SubRegSize::i64Bit, DReg::d30, PReg::p7, ZReg::z29), "lastb d30, p7, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE reverse within elements") {
  // TEST_SINGLE(revb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.b, p6/m, z29.b");
  TEST_SINGLE(revb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.h, p6/m, z29.h");
  TEST_SINGLE(revb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.s, p6/m, z29.s");
  TEST_SINGLE(revb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revb z30.d, p6/m, z29.d");

  // TEST_SINGLE(revh(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.b, p6/m, z29.b");
  // TEST_SINGLE(revh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.h, p6/m, z29.h");
  TEST_SINGLE(revh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.s, p6/m, z29.s");
  TEST_SINGLE(revh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revh z30.d, p6/m, z29.d");

  // TEST_SINGLE(revw(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.b, p6/m, z29.b");
  // TEST_SINGLE(revw(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.h, p6/m, z29.h");
  // TEST_SINGLE(revw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.s, p6/m, z29.s");
  TEST_SINGLE(revw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "revw z30.d, p6/m, z29.d");

  TEST_SINGLE(rbit(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "rbit z30.b, p6/m, z29.b");
  TEST_SINGLE(rbit(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "rbit z30.h, p6/m, z29.h");
  TEST_SINGLE(rbit(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "rbit z30.s, p6/m, z29.s");
  TEST_SINGLE(rbit(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "rbit z30.d, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally broadcast element to vector") {
  TEST_SINGLE(clasta(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.b, p7, z30.b, z29.b");
  TEST_SINGLE(clasta(SubRegSize::i16Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.h, p7, z30.h, z29.h");
  TEST_SINGLE(clasta(SubRegSize::i32Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.s, p7, z30.s, z29.s");
  TEST_SINGLE(clasta(SubRegSize::i64Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clasta z30.d, p7, z30.d, z29.d");

  TEST_SINGLE(clastb(SubRegSize::i8Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.b, p7, z30.b, z29.b");
  TEST_SINGLE(clastb(SubRegSize::i16Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.h, p7, z30.h, z29.h");
  TEST_SINGLE(clastb(SubRegSize::i32Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.s, p7, z30.s, z29.s");
  TEST_SINGLE(clastb(SubRegSize::i64Bit, ZReg::z30, PReg::p7, ZReg::z30, ZReg::z29), "clastb z30.d, p7, z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally extract element to SIMD&FP scalar") {
  TEST_SINGLE(clasta(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta b30, p7, b30, z29.b");
  TEST_SINGLE(clasta(SubRegSize::i16Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta h30, p7, h30, z29.h");
  TEST_SINGLE(clasta(SubRegSize::i32Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta s30, p7, s30, z29.s");
  TEST_SINGLE(clasta(SubRegSize::i64Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clasta d30, p7, d30, z29.d");

  TEST_SINGLE(clastb(SubRegSize::i8Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb b30, p7, b30, z29.b");
  TEST_SINGLE(clastb(SubRegSize::i16Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb h30, p7, h30, z29.h");
  TEST_SINGLE(clastb(SubRegSize::i32Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb s30, p7, s30, z29.s");
  TEST_SINGLE(clastb(SubRegSize::i64Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "clastb d30, p7, d30, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE reverse doublewords") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally extract element to general register") {
  TEST_SINGLE(clasta(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.b");
  TEST_SINGLE(clasta(SubRegSize::i16Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.h");
  TEST_SINGLE(clasta(SubRegSize::i32Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clasta w30, p7, w30, z29.s");
  TEST_SINGLE(clasta(SubRegSize::i64Bit, XReg::x30, PReg::p7, XReg::x30, ZReg::z29), "clasta x30, p7, x30, z29.d");

  TEST_SINGLE(clastb(SubRegSize::i8Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.b");
  TEST_SINGLE(clastb(SubRegSize::i16Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.h");
  TEST_SINGLE(clastb(SubRegSize::i32Bit, WReg::w30, PReg::p7, WReg::w30, ZReg::z29), "clastb w30, p7, w30, z29.s");
  TEST_SINGLE(clastb(SubRegSize::i64Bit, XReg::x30, PReg::p7, XReg::x30, ZReg::z29), "clastb x30, p7, x30, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Permute Vector - Extract") {
  TEST_SINGLE(ext<ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 0), "ext z30.b, z30.b, z29.b, #0");
  TEST_SINGLE(ext<ARMEmitter::OpType::Destructive>(ZReg::z30, ZReg::z30, ZReg::z29, 255), "ext z30.b, z30.b, z29.b, #255");

  TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 0), "ext z30.b, {z28.b, z29.b}, #0");
  TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z28, ZReg::z29, 255), "ext z30.b, {z28.b, z29.b}, #255");
  TEST_SINGLE(ext<ARMEmitter::OpType::Constructive>(ZReg::z30, ZReg::z31, ZReg::z0, 255), "ext z30.b, {z31.b, z0.b}, #255");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE permute vector segments") {
  // TODO: Implement in emitter.
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare vectors") {
  TEST_SINGLE(cmpeq(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmpeq(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmpeq(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmpeq(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(cmpge(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmpge(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmpge(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmpge(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(cmpgt(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmpgt(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmpgt(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmpgt(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(cmphi(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmphi(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmphi(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmphi(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(cmphs(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmphs(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmphs(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmphs(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.d, p5/z, z30.d, z29.d");

  TEST_SINGLE(cmpne(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.b");
  TEST_SINGLE(cmpne(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.h, p5/z, z30.h, z29.h");
  TEST_SINGLE(cmpne(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.s, p5/z, z30.s, z29.s");
  TEST_SINGLE(cmpne(SubRegSize::i64Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.d, p5/z, z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare with wide elements") {
  TEST_SINGLE(cmpeq_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmpeq_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmpeq_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpeq p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmpgt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmpgt_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmpgt_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpgt p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmpge_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmpge_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmpge_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpge p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmphi_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmphi_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmphi_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphi p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmphs_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmphs_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmphs_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmphs p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmplt_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmplt_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmplt_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplt p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmple_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmple_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmple_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmple p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmplo_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmplo_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmplo_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmplo p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmpls_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmpls_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmpls_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpls p6.s, p5/z, z30.s, z29.d");

  TEST_SINGLE(cmpne_wide(SubRegSize::i8Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.b, p5/z, z30.b, z29.d");
  TEST_SINGLE(cmpne_wide(SubRegSize::i16Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.h, p5/z, z30.h, z29.d");
  TEST_SINGLE(cmpne_wide(SubRegSize::i32Bit, PReg::p6, PReg::p5.Zeroing(), ZReg::z30, ZReg::z29), "cmpne p6.s, p5/z, z30.s, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE propagate break from previous partition") {
  TEST_SINGLE(brkpa(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpa p15.b, p14/z, p13.b, p12.b");
  TEST_SINGLE(brkpas(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpas p15.b, p14/z, p13.b, p12.b");
  TEST_SINGLE(brkpb(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpb p15.b, p14/z, p13.b, p12.b");
  TEST_SINGLE(brkpbs(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p12), "brkpbs p15.b, p14/z, p13.b, p12.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE propagate break to next partition") {
  TEST_SINGLE(brkn(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkn p15.b, p14/z, p13.b, p15.b");
  TEST_SINGLE(brkns(PReg::p15, PReg::p14.Zeroing(), PReg::p13, PReg::p15), "brkns p15.b, p14/z, p13.b, p15.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE partition break condition") {
  TEST_SINGLE(brka(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brka p15.b, p14/z, p13.b");
  TEST_SINGLE(brka(PReg::p15, PReg::p14.Merging(), PReg::p13), "brka p15.b, p14/m, p13.b");
  TEST_SINGLE(brkas(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkas p15.b, p14/z, p13.b");

  TEST_SINGLE(brkb(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkb p15.b, p14/z, p13.b");
  TEST_SINGLE(brkb(PReg::p15, PReg::p14.Merging(), PReg::p13), "brkb p15.b, p14/m, p13.b");
  TEST_SINGLE(brkbs(PReg::p15, PReg::p14.Zeroing(), PReg::p13), "brkbs p15.b, p14/z, p13.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Predicate Misc") {
  TEST_SINGLE(pnext(SubRegSize::i8Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.b, p14, p15.b");
  TEST_SINGLE(pnext(SubRegSize::i16Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.h, p14, p15.h");
  TEST_SINGLE(pnext(SubRegSize::i32Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.s, p14, p15.s");
  TEST_SINGLE(pnext(SubRegSize::i64Bit, PReg::p15, PReg::p14, PReg::p15), "pnext p15.d, p14, p15.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate test") {
  TEST_SINGLE(ptest(PReg::p6, PReg::p5), "ptest p6, p5.b");
  TEST_SINGLE(ptest(PReg::p15, PReg::p14), "ptest p15, p14.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate first active") {
  TEST_SINGLE(pfirst(PReg::p6, PReg::p5, PReg::p6), "pfirst p6.b, p5, p6.b");
  TEST_SINGLE(pfirst(PReg::p15, PReg::p14, PReg::p15), "pfirst p15.b, p14, p15.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate zero") {
  TEST_SINGLE(pfalse(PReg::p6), "pfalse p6.b");
  TEST_SINGLE(pfalse(PReg::p15), "pfalse p15.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate read from FFR (predicated)") {
  TEST_SINGLE(rdffr(PReg::p6, PReg::p5.Zeroing()), "rdffr p6.b, p5/z");
  TEST_SINGLE(rdffr(PReg::p15, PReg::p14.Zeroing()), "rdffr p15.b, p14/z");
  TEST_SINGLE(rdffrs(PReg::p6, PReg::p5.Zeroing()), "rdffrs p6.b, p5/z");
  TEST_SINGLE(rdffrs(PReg::p15, PReg::p14.Zeroing()), "rdffrs p15.b, p14/z");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate read from FFR (unpredicated)") {
  TEST_SINGLE(rdffr(PReg::p6), "rdffr p6.b");
  TEST_SINGLE(rdffr(PReg::p15), "rdffr p15.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate initialize") {
  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrue p6.b, pow2");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrue p6.h, pow2");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrue p6.s, pow2");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrue p6.d, pow2");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrues p6.b, pow2");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrues p6.h, pow2");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrues p6.s, pow2");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_POW2), "ptrues p6.d, pow2");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrue p6.b, vl1");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrue p6.h, vl1");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrue p6.s, vl1");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrue p6.d, vl1");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrues p6.b, vl1");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrues p6.h, vl1");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrues p6.s, vl1");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL1), "ptrues p6.d, vl1");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrue p6.b, vl2");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrue p6.h, vl2");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrue p6.s, vl2");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrue p6.d, vl2");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrues p6.b, vl2");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrues p6.h, vl2");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrues p6.s, vl2");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL2), "ptrues p6.d, vl2");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrue p6.b, vl3");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrue p6.h, vl3");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrue p6.s, vl3");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrue p6.d, vl3");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrues p6.b, vl3");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrues p6.h, vl3");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrues p6.s, vl3");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL3), "ptrues p6.d, vl3");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrue p6.b, vl4");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrue p6.h, vl4");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrue p6.s, vl4");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrue p6.d, vl4");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrues p6.b, vl4");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrues p6.h, vl4");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrues p6.s, vl4");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL4), "ptrues p6.d, vl4");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrue p6.b, vl5");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrue p6.h, vl5");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrue p6.s, vl5");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrue p6.d, vl5");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrues p6.b, vl5");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrues p6.h, vl5");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrues p6.s, vl5");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL5), "ptrues p6.d, vl5");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrue p6.b, vl6");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrue p6.h, vl6");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrue p6.s, vl6");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrue p6.d, vl6");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrues p6.b, vl6");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrues p6.h, vl6");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrues p6.s, vl6");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL6), "ptrues p6.d, vl6");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrue p6.b, vl7");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrue p6.h, vl7");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrue p6.s, vl7");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrue p6.d, vl7");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrues p6.b, vl7");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrues p6.h, vl7");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrues p6.s, vl7");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL7), "ptrues p6.d, vl7");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrue p6.b, vl8");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrue p6.h, vl8");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrue p6.s, vl8");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrue p6.d, vl8");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrues p6.b, vl8");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrues p6.h, vl8");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrues p6.s, vl8");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL8), "ptrues p6.d, vl8");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrue p6.b, vl16");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrue p6.h, vl16");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrue p6.s, vl16");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrue p6.d, vl16");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrues p6.b, vl16");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrues p6.h, vl16");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrues p6.s, vl16");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL16), "ptrues p6.d, vl16");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrue p6.b, vl32");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrue p6.h, vl32");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrue p6.s, vl32");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrue p6.d, vl32");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrues p6.b, vl32");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrues p6.h, vl32");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrues p6.s, vl32");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL32), "ptrues p6.d, vl32");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrue p6.b, vl64");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrue p6.h, vl64");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrue p6.s, vl64");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrue p6.d, vl64");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrues p6.b, vl64");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrues p6.h, vl64");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrues p6.s, vl64");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL64), "ptrues p6.d, vl64");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrue p6.b, vl128");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrue p6.h, vl128");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrue p6.s, vl128");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrue p6.d, vl128");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrues p6.b, vl128");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrues p6.h, vl128");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrues p6.s, vl128");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL128), "ptrues p6.d, vl128");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrue p6.b, vl256");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrue p6.h, vl256");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrue p6.s, vl256");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrue p6.d, vl256");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrues p6.b, vl256");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrues p6.h, vl256");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrues p6.s, vl256");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_VL256), "ptrues p6.d, vl256");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrue p6.b, mul4");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrue p6.h, mul4");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrue p6.s, mul4");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrue p6.d, mul4");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrues p6.b, mul4");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrues p6.h, mul4");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrues p6.s, mul4");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_MUL4), "ptrues p6.d, mul4");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrue p6.b, mul3");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrue p6.h, mul3");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrue p6.s, mul3");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrue p6.d, mul3");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrues p6.b, mul3");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrues p6.h, mul3");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrues p6.s, mul3");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_MUL3), "ptrues p6.d, mul3");

  TEST_SINGLE(ptrue(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrue p6.b");
  TEST_SINGLE(ptrue(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrue p6.h");
  TEST_SINGLE(ptrue(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrue p6.s");
  TEST_SINGLE(ptrue(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrue p6.d");

  TEST_SINGLE(ptrues(SubRegSize::i8Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrues p6.b");
  TEST_SINGLE(ptrues(SubRegSize::i16Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrues p6.h");
  TEST_SINGLE(ptrues(SubRegSize::i32Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrues p6.s");
  TEST_SINGLE(ptrues(SubRegSize::i64Bit, PReg::p6, PredicatePattern::SVE_ALL), "ptrues p6.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer compare scalar count and limit") {
  TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.b, x30, x29");
  TEST_SINGLE(whilege(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.h, x30, x29");
  TEST_SINGLE(whilege(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.s, x30, x29");
  TEST_SINGLE(whilege(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilege p15.d, x30, x29");
  TEST_SINGLE(whilege(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.b, w30, w29");
  TEST_SINGLE(whilege(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.h, w30, w29");
  TEST_SINGLE(whilege(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.s, w30, w29");
  TEST_SINGLE(whilege(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilege p15.d, w30, w29");

  TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.b, x30, x29");
  TEST_SINGLE(whilegt(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.h, x30, x29");
  TEST_SINGLE(whilegt(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.s, x30, x29");
  TEST_SINGLE(whilegt(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilegt p15.d, x30, x29");
  TEST_SINGLE(whilegt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.b, w30, w29");
  TEST_SINGLE(whilegt(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.h, w30, w29");
  TEST_SINGLE(whilegt(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.s, w30, w29");
  TEST_SINGLE(whilegt(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilegt p15.d, w30, w29");

  TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.b, x30, x29");
  TEST_SINGLE(whilelt(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.h, x30, x29");
  TEST_SINGLE(whilelt(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.s, x30, x29");
  TEST_SINGLE(whilelt(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilelt p15.d, x30, x29");
  TEST_SINGLE(whilelt(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.b, w30, w29");
  TEST_SINGLE(whilelt(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.h, w30, w29");
  TEST_SINGLE(whilelt(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.s, w30, w29");
  TEST_SINGLE(whilelt(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilelt p15.d, w30, w29");

  TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.b, x30, x29");
  TEST_SINGLE(whilele(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.h, x30, x29");
  TEST_SINGLE(whilele(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.s, x30, x29");
  TEST_SINGLE(whilele(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilele p15.d, x30, x29");
  TEST_SINGLE(whilele(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.b, w30, w29");
  TEST_SINGLE(whilele(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.h, w30, w29");
  TEST_SINGLE(whilele(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.s, w30, w29");
  TEST_SINGLE(whilele(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilele p15.d, w30, w29");

  TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.b, x30, x29");
  TEST_SINGLE(whilehs(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.h, x30, x29");
  TEST_SINGLE(whilehs(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.s, x30, x29");
  TEST_SINGLE(whilehs(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilehs p15.d, x30, x29");
  TEST_SINGLE(whilehs(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.b, w30, w29");
  TEST_SINGLE(whilehs(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.h, w30, w29");
  TEST_SINGLE(whilehs(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.s, w30, w29");
  TEST_SINGLE(whilehs(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilehs p15.d, w30, w29");

  TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.b, x30, x29");
  TEST_SINGLE(whilehi(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.h, x30, x29");
  TEST_SINGLE(whilehi(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.s, x30, x29");
  TEST_SINGLE(whilehi(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilehi p15.d, x30, x29");
  TEST_SINGLE(whilehi(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.b, w30, w29");
  TEST_SINGLE(whilehi(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.h, w30, w29");
  TEST_SINGLE(whilehi(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.s, w30, w29");
  TEST_SINGLE(whilehi(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilehi p15.d, w30, w29");

  TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.b, x30, x29");
  TEST_SINGLE(whilelo(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.h, x30, x29");
  TEST_SINGLE(whilelo(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.s, x30, x29");
  TEST_SINGLE(whilelo(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilelo p15.d, x30, x29");
  TEST_SINGLE(whilelo(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.b, w30, w29");
  TEST_SINGLE(whilelo(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.h, w30, w29");
  TEST_SINGLE(whilelo(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.s, w30, w29");
  TEST_SINGLE(whilelo(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilelo p15.d, w30, w29");

  TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.b, x30, x29");
  TEST_SINGLE(whilels(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.h, x30, x29");
  TEST_SINGLE(whilels(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.s, x30, x29");
  TEST_SINGLE(whilels(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilels p15.d, x30, x29");
  TEST_SINGLE(whilels(SubRegSize::i8Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.b, w30, w29");
  TEST_SINGLE(whilels(SubRegSize::i16Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.h, w30, w29");
  TEST_SINGLE(whilels(SubRegSize::i32Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.s, w30, w29");
  TEST_SINGLE(whilels(SubRegSize::i64Bit, PReg::p15, WReg::w30, WReg::w29), "whilels p15.d, w30, w29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE conditionally terminate scalars") {
  TEST_SINGLE(ctermeq(XReg::x30, XReg::x29), "ctermeq x30, x29");
  TEST_SINGLE(ctermeq(WReg::w30, WReg::w29), "ctermeq w30, w29");

  TEST_SINGLE(ctermne(XReg::x30, XReg::x29), "ctermne x30, x29");
  TEST_SINGLE(ctermne(WReg::w30, WReg::w29), "ctermne w30, w29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE pointer conflict compare") {
  TEST_SINGLE(whilewr(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.b, x30, x29");
  TEST_SINGLE(whilewr(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.h, x30, x29");
  TEST_SINGLE(whilewr(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.s, x30, x29");
  TEST_SINGLE(whilewr(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilewr p15.d, x30, x29");

  TEST_SINGLE(whilerw(SubRegSize::i8Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.b, x30, x29");
  TEST_SINGLE(whilerw(SubRegSize::i16Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.h, x30, x29");
  TEST_SINGLE(whilerw(SubRegSize::i32Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.s, x30, x29");
  TEST_SINGLE(whilerw(SubRegSize::i64Bit, PReg::p15, XReg::x30, XReg::x29), "whilerw p15.d, x30, x29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer add/subtract immediate (unpredicated)") {
  TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "add z30.b, z30.b, #0");
  TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "add z30.b, z30.b, #127");
  TEST_SINGLE(add(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "add z30.b, z30.b, #255");

  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "add z30.h, z30.h, #0");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "add z30.h, z30.h, #127");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "add z30.h, z30.h, #255");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "add z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "add z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(add(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "add z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "add z30.s, z30.s, #0");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "add z30.s, z30.s, #127");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "add z30.s, z30.s, #255");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "add z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "add z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(add(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "add z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "add z30.d, z30.d, #0");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "add z30.d, z30.d, #127");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "add z30.d, z30.d, #255");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "add z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "add z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(add(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "add z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sub z30.b, z30.b, #0");
  TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sub z30.b, z30.b, #127");
  TEST_SINGLE(sub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sub z30.b, z30.b, #255");

  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sub z30.h, z30.h, #0");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sub z30.h, z30.h, #127");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sub z30.h, z30.h, #255");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sub z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sub z30.s, z30.s, #0");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sub z30.s, z30.s, #127");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sub z30.s, z30.s, #255");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sub z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sub z30.d, z30.d, #0");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sub z30.d, z30.d, #127");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sub z30.d, z30.d, #255");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sub z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sub z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(sub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sub z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "subr z30.b, z30.b, #0");
  TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "subr z30.b, z30.b, #127");
  TEST_SINGLE(subr(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "subr z30.b, z30.b, #255");

  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "subr z30.h, z30.h, #0");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "subr z30.h, z30.h, #127");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "subr z30.h, z30.h, #255");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "subr z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "subr z30.s, z30.s, #0");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "subr z30.s, z30.s, #127");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "subr z30.s, z30.s, #255");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "subr z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "subr z30.d, z30.d, #0");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "subr z30.d, z30.d, #127");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "subr z30.d, z30.d, #255");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "subr z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "subr z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(subr(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "subr z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.b, z30.b, #0");
  TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.b, z30.b, #127");
  TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.b, z30.b, #255");

  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.h, z30.h, #0");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.h, z30.h, #127");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.h, z30.h, #255");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.s, z30.s, #0");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.s, z30.s, #127");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.s, z30.s, #255");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqadd z30.d, z30.d, #0");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqadd z30.d, z30.d, #127");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqadd z30.d, z30.d, #255");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqadd z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sqadd z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sqadd z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.b, z30.b, #0");
  TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.b, z30.b, #127");
  TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.b, z30.b, #255");

  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.h, z30.h, #0");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.h, z30.h, #127");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.h, z30.h, #255");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.s, z30.s, #0");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.s, z30.s, #127");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.s, z30.s, #255");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqadd z30.d, z30.d, #0");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqadd z30.d, z30.d, #127");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqadd z30.d, z30.d, #255");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqadd z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "uqadd z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "uqadd z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.b, z30.b, #0");
  TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.b, z30.b, #127");
  TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.b, z30.b, #255");

  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.h, z30.h, #0");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.h, z30.h, #127");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.h, z30.h, #255");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.s, z30.s, #0");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.s, z30.s, #127");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.s, z30.s, #255");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "sqsub z30.d, z30.d, #0");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "sqsub z30.d, z30.d, #127");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "sqsub z30.d, z30.d, #255");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "sqsub z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "sqsub z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "sqsub z30.d, z30.d, #255, lsl #8");

  TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.b, z30.b, #0");
  TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.b, z30.b, #127");
  TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.b, z30.b, #255");

  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.h, z30.h, #0");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.h, z30.h, #127");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.h, z30.h, #255");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.h, z30.h, #1, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.h, z30.h, #127, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.h, z30.h, #255, lsl #8");

  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.s, z30.s, #0");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.s, z30.s, #127");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.s, z30.s, #255");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.s, z30.s, #1, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.s, z30.s, #127, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.s, z30.s, #255, lsl #8");

  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "uqsub z30.d, z30.d, #0");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "uqsub z30.d, z30.d, #127");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "uqsub z30.d, z30.d, #255");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 256), "uqsub z30.d, z30.d, #1, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 32512), "uqsub z30.d, z30.d, #127, lsl #8");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 65280), "uqsub z30.d, z30.d, #255, lsl #8");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer min/max immediate (unpredicated)") {
  TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smax z30.b, z30.b, #0");
  TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smax z30.b, z30.b, #-128");
  TEST_SINGLE(smax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smax z30.b, z30.b, #127");

  TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smax z30.h, z30.h, #0");
  TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smax z30.h, z30.h, #-128");
  TEST_SINGLE(smax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smax z30.h, z30.h, #127");

  TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smax z30.s, z30.s, #0");
  TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smax z30.s, z30.s, #-128");
  TEST_SINGLE(smax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smax z30.s, z30.s, #127");

  TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smax z30.d, z30.d, #0");
  TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smax z30.d, z30.d, #-128");
  TEST_SINGLE(smax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smax z30.d, z30.d, #127");

  TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "smin z30.b, z30.b, #0");
  TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "smin z30.b, z30.b, #-128");
  TEST_SINGLE(smin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "smin z30.b, z30.b, #127");

  TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "smin z30.h, z30.h, #0");
  TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "smin z30.h, z30.h, #-128");
  TEST_SINGLE(smin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "smin z30.h, z30.h, #127");

  TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "smin z30.s, z30.s, #0");
  TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "smin z30.s, z30.s, #-128");
  TEST_SINGLE(smin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "smin z30.s, z30.s, #127");

  TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "smin z30.d, z30.d, #0");
  TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "smin z30.d, z30.d, #-128");
  TEST_SINGLE(smin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "smin z30.d, z30.d, #127");

  TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umax z30.b, z30.b, #0");
  TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umax z30.b, z30.b, #127");
  TEST_SINGLE(umax(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umax z30.b, z30.b, #255");

  TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umax z30.h, z30.h, #0");
  TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umax z30.h, z30.h, #127");
  TEST_SINGLE(umax(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umax z30.h, z30.h, #255");

  TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umax z30.s, z30.s, #0");
  TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umax z30.s, z30.s, #127");
  TEST_SINGLE(umax(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umax z30.s, z30.s, #255");

  TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umax z30.d, z30.d, #0");
  TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umax z30.d, z30.d, #127");
  TEST_SINGLE(umax(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umax z30.d, z30.d, #255");

  TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "umin z30.b, z30.b, #0");
  TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "umin z30.b, z30.b, #127");
  TEST_SINGLE(umin(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 255), "umin z30.b, z30.b, #255");

  TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "umin z30.h, z30.h, #0");
  TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "umin z30.h, z30.h, #127");
  TEST_SINGLE(umin(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 255), "umin z30.h, z30.h, #255");

  TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "umin z30.s, z30.s, #0");
  TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "umin z30.s, z30.s, #127");
  TEST_SINGLE(umin(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 255), "umin z30.s, z30.s, #255");

  TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "umin z30.d, z30.d, #0");
  TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "umin z30.d, z30.d, #127");
  TEST_SINGLE(umin(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 255), "umin z30.d, z30.d, #255");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer multiply immediate (unpredicated)") {
  TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 0), "mul z30.b, z30.b, #0");
  TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, -128), "mul z30.b, z30.b, #-128");
  TEST_SINGLE(mul(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, 127), "mul z30.b, z30.b, #127");

  TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 0), "mul z30.h, z30.h, #0");
  TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, -128), "mul z30.h, z30.h, #-128");
  TEST_SINGLE(mul(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, 127), "mul z30.h, z30.h, #127");

  TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 0), "mul z30.s, z30.s, #0");
  TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, -128), "mul z30.s, z30.s, #-128");
  TEST_SINGLE(mul(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, 127), "mul z30.s, z30.s, #127");

  TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 0), "mul z30.d, z30.d, #0");
  TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, -128), "mul z30.d, z30.d, #-128");
  TEST_SINGLE(mul(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, 127), "mul z30.d, z30.d, #127");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast integer immediate (unpredicated)") {
  TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, -128), "mov z30.b, #-128");
  TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, -128), "mov z30.h, #-128");
  TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, -128), "mov z30.s, #-128");
  TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, -128), "mov z30.d, #-128");

  TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, 127), "mov z30.b, #127");
  TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, 127), "mov z30.h, #127");
  TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, 127), "mov z30.s, #127");
  TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, 127), "mov z30.d, #127");

  // TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, -32768), "mov z30.b, #-128");
  TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, -32768), "mov z30.h, #-128, lsl #8");
  TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, -32768), "mov z30.s, #-128, lsl #8");
  TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, -32768), "mov z30.d, #-128, lsl #8");

  // TEST_SINGLE(dup_imm(SubRegSize::i8Bit, ZReg::z30, 32512), "mov z30.b, #127");
  TEST_SINGLE(dup_imm(SubRegSize::i16Bit, ZReg::z30, 32512), "mov z30.h, #127, lsl #8");
  TEST_SINGLE(dup_imm(SubRegSize::i32Bit, ZReg::z30, 32512), "mov z30.s, #127, lsl #8");
  TEST_SINGLE(dup_imm(SubRegSize::i64Bit, ZReg::z30, 32512), "mov z30.d, #127, lsl #8");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, -128), "mov z30.b, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, -128), "mov z30.h, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, -128), "mov z30.s, #-128");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, -128), "mov z30.d, #-128");

  TEST_SINGLE(mov_imm(SubRegSize::i8Bit, ZReg::z30, 127), "mov z30.b, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i16Bit, ZReg::z30, 127), "mov z30.h, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i32Bit, ZReg::z30, 127), "mov z30.s, #127");
  TEST_SINGLE(mov_imm(SubRegSize::i64Bit, ZReg::z30, 127), "mov z30.d, #127");
}

#if TEST_FP16
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast floating-point immediate (predicated) : fp16") {
  TEST_SINGLE(fcpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.h, p6/m, #0xc0 (-0.1250)");
  TEST_SINGLE(fcpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.h, p6/m, #0x60 (0.5000)");
  TEST_SINGLE(fcpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.h, p6/m, #0x70 (1.0000)");
  TEST_SINGLE(fcpy(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.h, p6/m, #0x3f (31.0000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.h, p6/m, #0xc0 (-0.1250)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.h, p6/m, #0x60 (0.5000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.h, p6/m, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.h, p6/m, #0x3f (31.0000)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast floating-point immediate (unpredicated)") {
  TEST_SINGLE(fdup(SubRegSize::i16Bit, ZReg::z30, -0.125), "fmov z30.h, #0xc0 (-0.1250)");
  TEST_SINGLE(fdup(SubRegSize::i16Bit, ZReg::z30, 0.5), "fmov z30.h, #0x60 (0.5000)");
  TEST_SINGLE(fdup(SubRegSize::i16Bit, ZReg::z30, 1.0), "fmov z30.h, #0x70 (1.0000)");
  TEST_SINGLE(fdup(SubRegSize::i16Bit, ZReg::z30, 31.0), "fmov z30.h, #0x3f (31.0000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, -0.125), "fmov z30.h, #0xc0 (-0.1250)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, 0.5), "fmov z30.h, #0x60 (0.5000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, 1.0), "fmov z30.h, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i16Bit, ZReg::z30, 31.0), "fmov z30.h, #0x3f (31.0000)");
}
#endif

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast floating-point immediate (predicated)") {
  TEST_SINGLE(fcpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.s, p6/m, #0xc0 (-0.1250)");
  TEST_SINGLE(fcpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.d, p6/m, #0xc0 (-0.1250)");

  TEST_SINGLE(fcpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.s, p6/m, #0x60 (0.5000)");
  TEST_SINGLE(fcpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.d, p6/m, #0x60 (0.5000)");

  TEST_SINGLE(fcpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.s, p6/m, #0x70 (1.0000)");
  TEST_SINGLE(fcpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.d, p6/m, #0x70 (1.0000)");

  TEST_SINGLE(fcpy(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.s, p6/m, #0x3f (31.0000)");
  TEST_SINGLE(fcpy(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.d, p6/m, #0x3f (31.0000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.s, p6/m, #0xc0 (-0.1250)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), -0.125), "fmov z30.d, p6/m, #0xc0 (-0.1250)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.s, p6/m, #0x60 (0.5000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 0.5), "fmov z30.d, p6/m, #0x60 (0.5000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.s, p6/m, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 1.0), "fmov z30.d, p6/m, #0x70 (1.0000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.s, p6/m, #0x3f (31.0000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), 31.0), "fmov z30.d, p6/m, #0x3f (31.0000)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE broadcast floating-point immediate (unpredicated)") {
  TEST_SINGLE(fdup(SubRegSize::i32Bit, ZReg::z30, -0.125), "fmov z30.s, #0xc0 (-0.1250)");
  TEST_SINGLE(fdup(SubRegSize::i64Bit, ZReg::z30, -0.125), "fmov z30.d, #0xc0 (-0.1250)");

  TEST_SINGLE(fdup(SubRegSize::i32Bit, ZReg::z30, 0.5), "fmov z30.s, #0x60 (0.5000)");
  TEST_SINGLE(fdup(SubRegSize::i64Bit, ZReg::z30, 0.5), "fmov z30.d, #0x60 (0.5000)");

  TEST_SINGLE(fdup(SubRegSize::i32Bit, ZReg::z30, 1.0), "fmov z30.s, #0x70 (1.0000)");
  TEST_SINGLE(fdup(SubRegSize::i64Bit, ZReg::z30, 1.0), "fmov z30.d, #0x70 (1.0000)");

  TEST_SINGLE(fdup(SubRegSize::i32Bit, ZReg::z30, 31.0), "fmov z30.s, #0x3f (31.0000)");
  TEST_SINGLE(fdup(SubRegSize::i64Bit, ZReg::z30, 31.0), "fmov z30.d, #0x3f (31.0000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, -0.125), "fmov z30.s, #0xc0 (-0.1250)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, -0.125), "fmov z30.d, #0xc0 (-0.1250)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, 0.5), "fmov z30.s, #0x60 (0.5000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, 0.5), "fmov z30.d, #0x60 (0.5000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, 1.0), "fmov z30.s, #0x70 (1.0000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, 1.0), "fmov z30.d, #0x70 (1.0000)");

  TEST_SINGLE(fmov(SubRegSize::i32Bit, ZReg::z30, 31.0), "fmov z30.s, #0x3f (31.0000)");
  TEST_SINGLE(fmov(SubRegSize::i64Bit, ZReg::z30, 31.0), "fmov z30.d, #0x3f (31.0000)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE predicate count") {
  TEST_SINGLE(cntp(SubRegSize::i8Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.b");
  TEST_SINGLE(cntp(SubRegSize::i16Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.h");
  TEST_SINGLE(cntp(SubRegSize::i32Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.s");
  TEST_SINGLE(cntp(SubRegSize::i64Bit, XReg::x30, PReg::p15, PReg::p7), "cntp x30, p15, p7.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec vector by predicate count") {
  TEST_SINGLE(sqincp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "sqincp z30.h, p15");
  TEST_SINGLE(sqincp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "sqincp z30.s, p15");
  TEST_SINGLE(sqincp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "sqincp z30.d, p15");

  TEST_SINGLE(uqincp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "uqincp z30.h, p15");
  TEST_SINGLE(uqincp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "uqincp z30.s, p15");
  TEST_SINGLE(uqincp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "uqincp z30.d, p15");

  TEST_SINGLE(sqdecp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "sqdecp z30.h, p15");
  TEST_SINGLE(sqdecp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "sqdecp z30.s, p15");
  TEST_SINGLE(sqdecp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "sqdecp z30.d, p15");

  TEST_SINGLE(uqdecp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "uqdecp z30.h, p15");
  TEST_SINGLE(uqdecp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "uqdecp z30.s, p15");
  TEST_SINGLE(uqdecp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "uqdecp z30.d, p15");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE saturating inc/dec register by predicate count") {
  TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqincp x30, p15.b");
  TEST_SINGLE(sqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "sqincp x30, p15.h");
  TEST_SINGLE(sqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "sqincp x30, p15.s");
  TEST_SINGLE(sqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "sqincp x30, p15.d");

  TEST_SINGLE(sqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.b, w30");
  TEST_SINGLE(sqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.h, w30");
  TEST_SINGLE(sqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.s, w30");
  TEST_SINGLE(sqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15, WReg::w30), "sqincp x30, p15.d, w30");

  TEST_SINGLE(uqincp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqincp x30, p15.b");
  TEST_SINGLE(uqincp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "uqincp x30, p15.h");
  TEST_SINGLE(uqincp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "uqincp x30, p15.s");
  TEST_SINGLE(uqincp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "uqincp x30, p15.d");

  TEST_SINGLE(uqincp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqincp w30, p15.b");
  TEST_SINGLE(uqincp(SubRegSize::i16Bit, WReg::w30, PReg::p15), "uqincp w30, p15.h");
  TEST_SINGLE(uqincp(SubRegSize::i32Bit, WReg::w30, PReg::p15), "uqincp w30, p15.s");
  TEST_SINGLE(uqincp(SubRegSize::i64Bit, WReg::w30, PReg::p15), "uqincp w30, p15.d");

  TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.b");
  TEST_SINGLE(sqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.h");
  TEST_SINGLE(sqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.s");
  TEST_SINGLE(sqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "sqdecp x30, p15.d");

  TEST_SINGLE(sqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.b, w30");
  TEST_SINGLE(sqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.h, w30");
  TEST_SINGLE(sqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.s, w30");
  TEST_SINGLE(sqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15, WReg::w30), "sqdecp x30, p15.d, w30");

  TEST_SINGLE(uqdecp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.b");
  TEST_SINGLE(uqdecp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.h");
  TEST_SINGLE(uqdecp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.s");
  TEST_SINGLE(uqdecp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "uqdecp x30, p15.d");

  TEST_SINGLE(uqdecp(SubRegSize::i8Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.b");
  TEST_SINGLE(uqdecp(SubRegSize::i16Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.h");
  TEST_SINGLE(uqdecp(SubRegSize::i32Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.s");
  TEST_SINGLE(uqdecp(SubRegSize::i64Bit, WReg::w30, PReg::p15), "uqdecp w30, p15.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec vector by predicate count") {
  TEST_SINGLE(incp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "incp z30.h, p15");
  TEST_SINGLE(incp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "incp z30.s, p15");
  TEST_SINGLE(incp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "incp z30.d, p15");

  TEST_SINGLE(decp(SubRegSize::i16Bit, ZReg::z30, PReg::p15), "decp z30.h, p15");
  TEST_SINGLE(decp(SubRegSize::i32Bit, ZReg::z30, PReg::p15), "decp z30.s, p15");
  TEST_SINGLE(decp(SubRegSize::i64Bit, ZReg::z30, PReg::p15), "decp z30.d, p15");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE inc/dec register by predicate count") {
  TEST_SINGLE(incp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "incp x30, p15.b");
  TEST_SINGLE(incp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "incp x30, p15.h");
  TEST_SINGLE(incp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "incp x30, p15.s");
  TEST_SINGLE(incp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "incp x30, p15.d");

  TEST_SINGLE(decp(SubRegSize::i8Bit, XReg::x30, PReg::p15), "decp x30, p15.b");
  TEST_SINGLE(decp(SubRegSize::i16Bit, XReg::x30, PReg::p15), "decp x30, p15.h");
  TEST_SINGLE(decp(SubRegSize::i32Bit, XReg::x30, PReg::p15), "decp x30, p15.s");
  TEST_SINGLE(decp(SubRegSize::i64Bit, XReg::x30, PReg::p15), "decp x30, p15.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE FFR write from predicate") {
  TEST_SINGLE(wrffr(PReg::p7), "wrffr p7.b");
  TEST_SINGLE(wrffr(PReg::p15), "wrffr p15.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE FFR initialise") {
  TEST_SINGLE(setffr(), "setffr");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Integer Multiply-Add - Unpredicated") {
  TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.s, z29.b, z28.b, #0");
  TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.s, z29.b, z28.b, #90");
  TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cdot z30.s, z29.b, z28.b, #180");
  TEST_SINGLE(cdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cdot z30.s, z29.b, z28.b, #270");

  TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cdot z30.d, z29.h, z28.h, #0");
  TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cdot z30.d, z29.h, z28.h, #90");
  TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cdot z30.d, z29.h, z28.h, #180");
  TEST_SINGLE(cdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cdot z30.d, z29.h, z28.h, #270");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer dot product (unpredicated)") {
  TEST_SINGLE(sdot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sdot z30.s, z29.b, z28.b");
  TEST_SINGLE(sdot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sdot z30.d, z29.h, z28.h");

  TEST_SINGLE(udot(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "udot z30.s, z29.b, z28.b");
  TEST_SINGLE(udot(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "udot z30.d, z29.h, z28.h");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add interleaved long") {
  TEST_SINGLE(sqdmlalbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalbt z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlalbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalbt z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlalbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalbt z30.d, z29.s, z28.s");

  TEST_SINGLE(sqdmlslbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslbt z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlslbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslbt z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlslbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslbt z30.d, z29.s, z28.s");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 complex integer multiply-add") {
  TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.b, z29.b, z28.b, #0");
  TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.b, z29.b, z28.b, #90");
  TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.b, z29.b, z28.b, #180");
  TEST_SINGLE(cmla(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.b, z29.b, z28.b, #270");

  TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.h, z29.h, z28.h, #0");
  TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.h, z29.h, z28.h, #90");
  TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.h, z29.h, z28.h, #180");
  TEST_SINGLE(cmla(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.h, z29.h, z28.h, #270");

  TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.s, z29.s, z28.s, #0");
  TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.s, z29.s, z28.s, #90");
  TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.s, z29.s, z28.s, #180");
  TEST_SINGLE(cmla(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.s, z29.s, z28.s, #270");

  TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "cmla z30.d, z29.d, z28.d, #0");
  TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "cmla z30.d, z29.d, z28.d, #90");
  TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "cmla z30.d, z29.d, z28.d, #180");
  TEST_SINGLE(cmla(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "cmla z30.d, z29.d, z28.d, #270");

  TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.b, z29.b, z28.b, #0");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.b, z29.b, z28.b, #90");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.b, z29.b, z28.b, #180");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.b, z29.b, z28.b, #270");

  TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.h, z29.h, z28.h, #0");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.h, z29.h, z28.h, #90");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.h, z29.h, z28.h, #180");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.h, z29.h, z28.h, #270");

  TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.s, z29.s, z28.s, #0");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.s, z29.s, z28.s, #90");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.s, z29.s, z28.s, #180");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.s, z29.s, z28.s, #270");

  TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_0), "sqrdcmlah z30.d, z29.d, z28.d, #0");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_90), "sqrdcmlah z30.d, z29.d, z28.d, #90");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_180), "sqrdcmlah z30.d, z29.d, z28.d, #180");
  TEST_SINGLE(sqrdcmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28, Rotation::ROTATE_270), "sqrdcmlah z30.d, z29.d, z28.d, #270");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply-add long") {
  TEST_SINGLE(smlalb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalb z30.h, z29.b, z28.b");
  TEST_SINGLE(smlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalb z30.s, z29.h, z28.h");
  TEST_SINGLE(smlalb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalb z30.d, z29.s, z28.s");

  TEST_SINGLE(smlalt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalt z30.h, z29.b, z28.b");
  TEST_SINGLE(smlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalt z30.s, z29.h, z28.h");
  TEST_SINGLE(smlalt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlalt z30.d, z29.s, z28.s");

  TEST_SINGLE(umlalb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalb z30.h, z29.b, z28.b");
  TEST_SINGLE(umlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalb z30.s, z29.h, z28.h");
  TEST_SINGLE(umlalb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalb z30.d, z29.s, z28.s");

  TEST_SINGLE(umlalt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalt z30.h, z29.b, z28.b");
  TEST_SINGLE(umlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalt z30.s, z29.h, z28.h");
  TEST_SINGLE(umlalt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlalt z30.d, z29.s, z28.s");

  TEST_SINGLE(smlslb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslb z30.h, z29.b, z28.b");
  TEST_SINGLE(smlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslb z30.s, z29.h, z28.h");
  TEST_SINGLE(smlslb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslb z30.d, z29.s, z28.s");

  TEST_SINGLE(smlslt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslt z30.h, z29.b, z28.b");
  TEST_SINGLE(smlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslt z30.s, z29.h, z28.h");
  TEST_SINGLE(smlslt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smlslt z30.d, z29.s, z28.s");

  TEST_SINGLE(umlslb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslb z30.h, z29.b, z28.b");
  TEST_SINGLE(umlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslb z30.s, z29.h, z28.h");
  TEST_SINGLE(umlslb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslb z30.d, z29.s, z28.s");

  TEST_SINGLE(umlslt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslt z30.h, z29.b, z28.b");
  TEST_SINGLE(umlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslt z30.s, z29.h, z28.h");
  TEST_SINGLE(umlslt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umlslt z30.d, z29.s, z28.s");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add long") {
  TEST_SINGLE(sqdmlalb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalb z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalb z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlalb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalb z30.d, z29.s, z28.s");

  TEST_SINGLE(sqdmlalt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalt z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalt z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlalt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlalt z30.d, z29.s, z28.s");

  TEST_SINGLE(sqdmlslb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslb z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslb z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlslb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslb z30.d, z29.s, z28.s");

  TEST_SINGLE(sqdmlslt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslt z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslt z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmlslt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmlslt z30.d, z29.s, z28.s");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating multiply-add high") {
  TEST_SINGLE(sqrdmlah(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.b, z29.b, z28.b");
  TEST_SINGLE(sqrdmlah(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.h, z29.h, z28.h");
  TEST_SINGLE(sqrdmlah(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.s, z29.s, z28.s");
  TEST_SINGLE(sqrdmlah(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlah z30.d, z29.d, z28.d");

  TEST_SINGLE(sqrdmlsh(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.b, z29.b, z28.b");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.h, z29.h, z28.h");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.s, z29.s, z28.s");
  TEST_SINGLE(sqrdmlsh(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqrdmlsh z30.d, z29.d, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE mixed sign dot product") {
  TEST_SINGLE(usdot(ZReg::z30, ZReg::z29, ZReg::z28), "usdot z30.s, z29.b, z28.b");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer pairwise add and accumulate long") {
  TEST_SINGLE(sadalp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sadalp z30.h, p6/m, z29.b");
  TEST_SINGLE(sadalp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sadalp z30.s, p6/m, z29.h");
  TEST_SINGLE(sadalp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sadalp z30.d, p6/m, z29.s");

  TEST_SINGLE(uadalp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uadalp z30.h, p6/m, z29.b");
  TEST_SINGLE(uadalp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uadalp z30.s, p6/m, z29.h");
  TEST_SINGLE(uadalp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "uadalp z30.d, p6/m, z29.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer unary operations (predicated)") {
  TEST_SINGLE(urecpe(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "urecpe z30.s, p6/m, z29.s");

  TEST_SINGLE(ursqrte(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "ursqrte z30.s, p6/m, z29.s");

  TEST_SINGLE(sqabs(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.b, p6/m, z29.b");
  TEST_SINGLE(sqabs(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.h, p6/m, z29.h");
  TEST_SINGLE(sqabs(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.s, p6/m, z29.s");
  TEST_SINGLE(sqabs(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqabs z30.d, p6/m, z29.d");

  TEST_SINGLE(sqneg(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.b, p6/m, z29.b");
  TEST_SINGLE(sqneg(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.h, p6/m, z29.h");
  TEST_SINGLE(sqneg(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.s, p6/m, z29.s");
  TEST_SINGLE(sqneg(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "sqneg z30.d, p6/m, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating/rounding bitwise shift left (predicated)") {
  TEST_SINGLE(srshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(srshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(srshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(srshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(urshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(urshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(urshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(urshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(srshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(srshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(srshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(srshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "srshlr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(urshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(urshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(urshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(urshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "urshlr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(sqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(sqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(sqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(sqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(uqshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(uqshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(uqshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(uqshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(sqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(sqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(sqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(sqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(uqrshl(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(uqrshl(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(uqrshl(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(uqrshl(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshl z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(sqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(sqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(sqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(sqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqshlr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(uqshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(uqshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(uqshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(uqshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqshlr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(sqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(sqrshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(sqrshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(sqrshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "sqrshlr z30.d, p6/m, z30.d, z29.d");

  TEST_SINGLE(uqrshlr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.b, p6/m, z30.b, z29.b");
  TEST_SINGLE(uqrshlr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.h, p6/m, z30.h, z29.h");
  TEST_SINGLE(uqrshlr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.s, p6/m, z30.s, z29.s");
  TEST_SINGLE(uqrshlr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z29), "uqrshlr z30.d, p6/m, z30.d, z29.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer halving add/subtract (predicated)") {
  TEST_SINGLE(shadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(shadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(shadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(shadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(shadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shadd z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(uhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(uhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(uhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(uhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(uhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhadd z30.q, p6/m, z30.q, z28.q");
  TEST_SINGLE(shsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(shsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(shsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(shsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(shsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsub z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(uhsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(uhsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(uhsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(uhsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(uhsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsub z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(srhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(srhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(srhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(srhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(srhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "srhadd z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(urhadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(urhadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(urhadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(urhadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(urhadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "urhadd z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(shsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(shsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(shsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(shsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(shsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "shsubr z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(uhsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(uhsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(uhsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(uhsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(uhsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uhsubr z30.q, p6/m, z30.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer pairwise arithmetic") {
  TEST_SINGLE(addp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(addp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(addp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(addp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(addp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "addp z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(smaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(smaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(smaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(smaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(smaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "smaxp z30.q, p6/m, z30.q, z28.q");

  TEST_SINGLE(umaxp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(umaxp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(umaxp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(umaxp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(umaxp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "umaxp z30.q, p6/m, z30.q, z28.q");


  TEST_SINGLE(sminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(sminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(sminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(sminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(sminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "sminp z30.q, p6/m, z30.q, z28.q");


  TEST_SINGLE(uminp(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(uminp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(uminp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(uminp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(uminp(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "uminp z30.q, p6/m, z30.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating add/subtract") {
  TEST_SINGLE(sqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(sqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(sqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(sqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqadd z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(uqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(uqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(uqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(uqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqadd z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(sqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(sqsub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(sqsub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(sqsub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsub z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(uqsub(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(uqsub(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(uqsub(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(uqsub(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsub z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(suqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(suqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(suqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(suqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "suqadd z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(usqadd(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(usqadd(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(usqadd(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(usqadd(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "usqadd z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(sqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(sqsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(sqsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(sqsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "sqsubr z30.d, p7/m, z30.d, z28.d");

  TEST_SINGLE(uqsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.b, p7/m, z30.b, z28.b");
  TEST_SINGLE(uqsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.h, p7/m, z30.h, z28.h");
  TEST_SINGLE(uqsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.s, p7/m, z30.s, z28.s");
  TEST_SINGLE(uqsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z30, ZReg::z28), "uqsubr z30.d, p7/m, z30.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract long") {
  // TEST_SINGLE(saddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.b, z29.b, z28.b");
  TEST_SINGLE(saddlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.h, z29.b, z28.b");
  TEST_SINGLE(saddlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.s, z29.h, z28.h");
  TEST_SINGLE(saddlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlb z30.d, z29.s, z28.s");

  // TEST_SINGLE(saddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.b, z29.b, z28.b");
  TEST_SINGLE(saddlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.h, z29.b, z28.b");
  TEST_SINGLE(saddlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.s, z29.h, z28.h");
  TEST_SINGLE(saddlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlt z30.d, z29.s, z28.s");

  // TEST_SINGLE(uaddlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.b, z29.b, z28.b");
  TEST_SINGLE(uaddlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.h, z29.b, z28.b");
  TEST_SINGLE(uaddlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.s, z29.h, z28.h");
  TEST_SINGLE(uaddlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlb z30.d, z29.s, z28.s");

  // TEST_SINGLE(uaddlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.b, z29.b, z28.b");
  TEST_SINGLE(uaddlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.h, z29.b, z28.b");
  TEST_SINGLE(uaddlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.s, z29.h, z28.h");
  TEST_SINGLE(uaddlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddlt z30.d, z29.s, z28.s");

  // TEST_SINGLE(ssublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.b, z29.b, z28.b");
  TEST_SINGLE(ssublb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.h, z29.b, z28.b");
  TEST_SINGLE(ssublb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.s, z29.h, z28.h");
  TEST_SINGLE(ssublb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublb z30.d, z29.s, z28.s");

  // TEST_SINGLE(ssublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.b, z29.b, z28.b");
  TEST_SINGLE(ssublt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.h, z29.b, z28.b");
  TEST_SINGLE(ssublt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.s, z29.h, z28.h");
  TEST_SINGLE(ssublt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublt z30.d, z29.s, z28.s");

  // TEST_SINGLE(usublb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.b, z29.b, z28.b");
  TEST_SINGLE(usublb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.h, z29.b, z28.b");
  TEST_SINGLE(usublb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.s, z29.h, z28.h");
  TEST_SINGLE(usublb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublb z30.d, z29.s, z28.s");

  // TEST_SINGLE(usublt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.b, z29.b, z28.b");
  TEST_SINGLE(usublt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.h, z29.b, z28.b");
  TEST_SINGLE(usublt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.s, z29.h, z28.h");
  TEST_SINGLE(usublt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usublt z30.d, z29.s, z28.s");

  // TEST_SINGLE(sabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.b, z29.b, z28.b");
  TEST_SINGLE(sabdlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.h, z29.b, z28.b");
  TEST_SINGLE(sabdlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.s, z29.h, z28.h");
  TEST_SINGLE(sabdlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlb z30.d, z29.s, z28.s");

  // TEST_SINGLE(sabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.b, z29.b, z28.b");
  TEST_SINGLE(sabdlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.h, z29.b, z28.b");
  TEST_SINGLE(sabdlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.s, z29.h, z28.h");
  TEST_SINGLE(sabdlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sabdlt z30.d, z29.s, z28.s");

  // TEST_SINGLE(uabdlb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.b, z29.b, z28.b");
  TEST_SINGLE(uabdlb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.h, z29.b, z28.b");
  TEST_SINGLE(uabdlb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.s, z29.h, z28.h");
  TEST_SINGLE(uabdlb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlb z30.d, z29.s, z28.s");

  // TEST_SINGLE(uabdlt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.b, z29.b, z28.b");
  TEST_SINGLE(uabdlt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.h, z29.b, z28.b");
  TEST_SINGLE(uabdlt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.s, z29.h, z28.h");
  TEST_SINGLE(uabdlt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uabdlt z30.d, z29.s, z28.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract wide") {
  TEST_SINGLE(saddwb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwb z30.h, z29.h, z28.b");
  TEST_SINGLE(saddwb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwb z30.s, z29.s, z28.h");
  TEST_SINGLE(saddwb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwb z30.d, z29.d, z28.s");

  TEST_SINGLE(saddwt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwt z30.h, z29.h, z28.b");
  TEST_SINGLE(saddwt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwt z30.s, z29.s, z28.h");
  TEST_SINGLE(saddwt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddwt z30.d, z29.d, z28.s");

  TEST_SINGLE(uaddwb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwb z30.h, z29.h, z28.b");
  TEST_SINGLE(uaddwb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwb z30.s, z29.s, z28.h");
  TEST_SINGLE(uaddwb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwb z30.d, z29.d, z28.s");

  TEST_SINGLE(uaddwt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwt z30.h, z29.h, z28.b");
  TEST_SINGLE(uaddwt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwt z30.s, z29.s, z28.h");
  TEST_SINGLE(uaddwt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "uaddwt z30.d, z29.d, z28.s");

  TEST_SINGLE(ssubwb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwb z30.h, z29.h, z28.b");
  TEST_SINGLE(ssubwb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwb z30.s, z29.s, z28.h");
  TEST_SINGLE(ssubwb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwb z30.d, z29.d, z28.s");

  TEST_SINGLE(ssubwt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwt z30.h, z29.h, z28.b");
  TEST_SINGLE(ssubwt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwt z30.s, z29.s, z28.h");
  TEST_SINGLE(ssubwt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubwt z30.d, z29.d, z28.s");

  TEST_SINGLE(usubwb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwb z30.h, z29.h, z28.b");
  TEST_SINGLE(usubwb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwb z30.s, z29.s, z28.h");
  TEST_SINGLE(usubwb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwb z30.d, z29.d, z28.s");

  TEST_SINGLE(usubwt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwt z30.h, z29.h, z28.b");
  TEST_SINGLE(usubwt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwt z30.s, z29.s, z28.h");
  TEST_SINGLE(usubwt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "usubwt z30.d, z29.d, z28.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer multiply long") {
  // TEST_SINGLE(sqdmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.b, z29.b, z28.b");
  TEST_SINGLE(sqdmullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullb z30.d, z29.s, z28.s");

  // TEST_SINGLE(sqdmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.b, z29.b, z28.b");
  TEST_SINGLE(sqdmullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.h, z29.b, z28.b");
  TEST_SINGLE(sqdmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.s, z29.h, z28.h");
  TEST_SINGLE(sqdmullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "sqdmullt z30.d, z29.s, z28.s");

  // TEST_SINGLE(pmullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.b, z29.b, z28.b");
  TEST_SINGLE(pmullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.h, z29.b, z28.b");
  // TEST_SINGLE(pmullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.s, z29.h, z28.h");
  TEST_SINGLE(pmullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullb z30.d, z29.s, z28.s");

  // TEST_SINGLE(pmullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.b, z29.b, z28.b");
  TEST_SINGLE(pmullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.h, z29.b, z28.b");
  // TEST_SINGLE(pmullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.s, z29.h, z28.h");
  TEST_SINGLE(pmullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "pmullt z30.d, z29.s, z28.s");

  // TEST_SINGLE(smullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.b, z29.b, z28.b");
  TEST_SINGLE(smullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.h, z29.b, z28.b");
  TEST_SINGLE(smullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.s, z29.h, z28.h");
  TEST_SINGLE(smullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullb z30.d, z29.s, z28.s");

  // TEST_SINGLE(smullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.b, z29.b, z28.b");
  TEST_SINGLE(smullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.h, z29.b, z28.b");
  TEST_SINGLE(smullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.s, z29.h, z28.h");
  TEST_SINGLE(smullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "smullt z30.d, z29.s, z28.s");

  // TEST_SINGLE(umullb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.b, z29.b, z28.b");
  TEST_SINGLE(umullb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.h, z29.b, z28.b");
  TEST_SINGLE(umullb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.s, z29.h, z28.h");
  TEST_SINGLE(umullb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullb z30.d, z29.s, z28.s");

  // TEST_SINGLE(umullt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.b, z29.b, z28.b");
  TEST_SINGLE(umullt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.h, z29.b, z28.b");
  TEST_SINGLE(umullt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.s, z29.h, z28.h");
  TEST_SINGLE(umullt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "umullt z30.d, z29.s, z28.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift left long") {
  TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.h, z29.b, #0");
  TEST_SINGLE(sshllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllb z30.h, z29.b, #7");
  TEST_SINGLE(sshllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.s, z29.h, #0");
  TEST_SINGLE(sshllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sshllb z30.s, z29.h, #15");
  TEST_SINGLE(sshllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllb z30.d, z29.s, #0");
  TEST_SINGLE(sshllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sshllb z30.d, z29.s, #31");

  TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.h, z29.b, #0");
  TEST_SINGLE(sshllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sshllt z30.h, z29.b, #7");
  TEST_SINGLE(sshllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.s, z29.h, #0");
  TEST_SINGLE(sshllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sshllt z30.s, z29.h, #15");
  TEST_SINGLE(sshllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sshllt z30.d, z29.s, #0");
  TEST_SINGLE(sshllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sshllt z30.d, z29.s, #31");

  TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.h, z29.b, #0");
  TEST_SINGLE(ushllb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllb z30.h, z29.b, #7");
  TEST_SINGLE(ushllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.s, z29.h, #0");
  TEST_SINGLE(ushllb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "ushllb z30.s, z29.h, #15");
  TEST_SINGLE(ushllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllb z30.d, z29.s, #0");
  TEST_SINGLE(ushllb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "ushllb z30.d, z29.s, #31");

  TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.h, z29.b, #0");
  TEST_SINGLE(ushllt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "ushllt z30.h, z29.b, #7");
  TEST_SINGLE(ushllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.s, z29.h, #0");
  TEST_SINGLE(ushllt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "ushllt z30.s, z29.h, #15");
  TEST_SINGLE(ushllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "ushllt z30.d, z29.s, #0");
  TEST_SINGLE(ushllt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "ushllt z30.d, z29.s, #31");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract interleaved long") {
  TEST_SINGLE(saddlbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlbt z30.h, z29.b, z28.b");
  TEST_SINGLE(saddlbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlbt z30.s, z29.h, z28.h");
  TEST_SINGLE(saddlbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "saddlbt z30.d, z29.s, z28.s");

  TEST_SINGLE(ssublbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublbt z30.h, z29.b, z28.b");
  TEST_SINGLE(ssublbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublbt z30.s, z29.h, z28.h");
  TEST_SINGLE(ssublbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssublbt z30.d, z29.s, z28.s");

  TEST_SINGLE(ssubltb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubltb z30.h, z29.b, z28.b");
  TEST_SINGLE(ssubltb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubltb z30.s, z29.h, z28.h");
  TEST_SINGLE(ssubltb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "ssubltb z30.d, z29.s, z28.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise exclusive-or interleaved") {
  TEST_SINGLE(eorbt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.b, z29.b, z28.b");
  TEST_SINGLE(eorbt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.h, z29.h, z28.h");
  TEST_SINGLE(eorbt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.s, z29.s, z28.s");
  TEST_SINGLE(eorbt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eorbt z30.d, z29.d, z28.d");

  TEST_SINGLE(eortb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.b, z29.b, z28.b");
  TEST_SINGLE(eortb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.h, z29.h, z28.h");
  TEST_SINGLE(eortb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.s, z29.s, z28.s");
  TEST_SINGLE(eortb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "eortb z30.d, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer matrix multiply accumulate") {
  TEST_SINGLE(smmla(ZReg::z30, ZReg::z29, ZReg::z28), "smmla z30.s, z29.b, z28.b");
  TEST_SINGLE(usmmla(ZReg::z30, ZReg::z29, ZReg::z28), "usmmla z30.s, z29.b, z28.b");
  TEST_SINGLE(ummla(ZReg::z30, ZReg::z29, ZReg::z28), "ummla z30.s, z29.b, z28.b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise permute") {
  TEST_SINGLE(bext(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.b, z29.b, z28.b");
  TEST_SINGLE(bext(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.h, z29.h, z28.h");
  TEST_SINGLE(bext(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.s, z29.s, z28.s");
  TEST_SINGLE(bext(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bext z30.d, z29.d, z28.d");

  TEST_SINGLE(bdep(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.b, z29.b, z28.b");
  TEST_SINGLE(bdep(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.h, z29.h, z28.h");
  TEST_SINGLE(bdep(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.s, z29.s, z28.s");
  TEST_SINGLE(bdep(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bdep z30.d, z29.d, z28.d");

  TEST_SINGLE(bgrp(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.b, z29.b, z28.b");
  TEST_SINGLE(bgrp(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.h, z29.h, z28.h");
  TEST_SINGLE(bgrp(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.s, z29.s, z28.s");
  TEST_SINGLE(bgrp(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bgrp z30.d, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 complex integer add") {
  TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.b, z30.b, z29.b, #90");
  TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.h, z30.h, z29.h, #90");
  TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.s, z30.s, z29.s, #90");
  TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "cadd z30.d, z30.d, z29.d, #90");

  TEST_SINGLE(cadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.b, z30.b, z29.b, #270");
  TEST_SINGLE(cadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.h, z30.h, z29.h, #270");
  TEST_SINGLE(cadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.s, z30.s, z29.s, #270");
  TEST_SINGLE(cadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "cadd z30.d, z30.d, z29.d, #270");

  TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.b, z30.b, z29.b, #90");
  TEST_SINGLE(sqcadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.h, z30.h, z29.h, #90");
  TEST_SINGLE(sqcadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.s, z30.s, z29.s, #90");
  TEST_SINGLE(sqcadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_90), "sqcadd z30.d, z30.d, z29.d, #90");

  TEST_SINGLE(sqcadd(SubRegSize::i8Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.b, z30.b, z29.b, #270");
  TEST_SINGLE(sqcadd(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.h, z30.h, z29.h, #270");
  TEST_SINGLE(sqcadd(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.s, z30.s, z29.s, #270");
  TEST_SINGLE(sqcadd(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z29, Rotation::ROTATE_270), "sqcadd z30.d, z30.d, z29.d, #270");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer absolute difference and accumulate long") {
  TEST_SINGLE(sabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.h, z29.b, z30.b");
  TEST_SINGLE(sabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.s, z29.h, z30.h");
  TEST_SINGLE(sabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalb z28.d, z29.s, z30.s");

  TEST_SINGLE(sabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.h, z29.b, z30.b");
  TEST_SINGLE(sabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.s, z29.h, z30.h");
  TEST_SINGLE(sabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sabalt z28.d, z29.s, z30.s");

  TEST_SINGLE(uabalb(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.h, z29.b, z30.b");
  TEST_SINGLE(uabalb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.s, z29.h, z30.h");
  TEST_SINGLE(uabalb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalb z28.d, z29.s, z30.s");

  TEST_SINGLE(uabalt(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.h, z29.b, z30.b");
  TEST_SINGLE(uabalt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.s, z29.h, z30.h");
  TEST_SINGLE(uabalt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uabalt z28.d, z29.s, z30.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract long with carry") {
  TEST_SINGLE(adclb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "adclb z28.s, z29.s, z30.s");
  TEST_SINGLE(adclb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "adclb z28.d, z29.d, z30.d");

  TEST_SINGLE(adclt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "adclt z28.s, z29.s, z30.s");
  TEST_SINGLE(adclt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "adclt z28.d, z29.d, z30.d");

  TEST_SINGLE(sbclb(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sbclb z28.s, z29.s, z30.s");
  TEST_SINGLE(sbclb(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sbclb z28.d, z29.d, z30.d");

  TEST_SINGLE(sbclt(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sbclt z28.s, z29.s, z30.s");
  TEST_SINGLE(sbclt(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "sbclt z28.d, z29.d, z30.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift right and accumulate") {
  TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.b, z29.b, #1");
  TEST_SINGLE(ssra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ssra z30.b, z29.b, #8");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.h, z29.h, #1");
  TEST_SINGLE(ssra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "ssra z30.h, z29.h, #16");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.s, z29.s, #1");
  TEST_SINGLE(ssra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "ssra z30.s, z29.s, #32");
  TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ssra z30.d, z29.d, #1");
  TEST_SINGLE(ssra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "ssra z30.d, z29.d, #64");

  TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "usra z30.b, z29.b, #1");
  TEST_SINGLE(usra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "usra z30.b, z29.b, #8");
  TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "usra z30.h, z29.h, #1");
  TEST_SINGLE(usra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "usra z30.h, z29.h, #16");
  TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "usra z30.s, z29.s, #1");
  TEST_SINGLE(usra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "usra z30.s, z29.s, #32");
  TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "usra z30.d, z29.d, #1");
  TEST_SINGLE(usra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "usra z30.d, z29.d, #64");

  TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.b, z29.b, #1");
  TEST_SINGLE(srsra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "srsra z30.b, z29.b, #8");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.h, z29.h, #1");
  TEST_SINGLE(srsra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "srsra z30.h, z29.h, #16");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.s, z29.s, #1");
  TEST_SINGLE(srsra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "srsra z30.s, z29.s, #32");
  TEST_SINGLE(srsra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "srsra z30.d, z29.d, #1");
  TEST_SINGLE(srsra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "srsra z30.d, z29.d, #64");

  TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.b, z29.b, #1");
  TEST_SINGLE(ursra(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "ursra z30.b, z29.b, #8");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.h, z29.h, #1");
  TEST_SINGLE(ursra(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "ursra z30.h, z29.h, #16");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.s, z29.s, #1");
  TEST_SINGLE(ursra(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "ursra z30.s, z29.s, #32");
  TEST_SINGLE(ursra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "ursra z30.d, z29.d, #1");
  TEST_SINGLE(ursra(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "ursra z30.d, z29.d, #64");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift and insert") {
  TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sri z30.b, z29.b, #1");
  TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sri z30.b, z29.b, #4");
  TEST_SINGLE(sri(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sri z30.b, z29.b, #8");
  TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sri z30.h, z29.h, #1");
  TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "sri z30.h, z29.h, #15");
  TEST_SINGLE(sri(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sri z30.h, z29.h, #16");
  TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sri z30.s, z29.s, #1");
  TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sri z30.s, z29.s, #15");
  TEST_SINGLE(sri(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sri z30.s, z29.s, #32");
  TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 1), "sri z30.d, z29.d, #1");
  TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sri z30.d, z29.d, #31");
  TEST_SINGLE(sri(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 64), "sri z30.d, z29.d, #64");

  TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 0), "sli z30.b, z29.b, #0");
  TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 4), "sli z30.b, z29.b, #4");
  TEST_SINGLE(sli(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 7), "sli z30.b, z29.b, #7");
  TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 0), "sli z30.h, z29.h, #0");
  TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 7), "sli z30.h, z29.h, #7");
  TEST_SINGLE(sli(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 15), "sli z30.h, z29.h, #15");
  TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 0), "sli z30.s, z29.s, #0");
  TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 15), "sli z30.s, z29.s, #15");
  TEST_SINGLE(sli(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 31), "sli z30.s, z29.s, #31");
  TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 0), "sli z30.d, z29.d, #0");
  TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 31), "sli z30.d, z29.d, #31");
  TEST_SINGLE(sli(SubRegSize::i64Bit, ZReg::z30, ZReg::z29, 63), "sli z30.d, z29.d, #63");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer absolute difference and accumulate") {
  TEST_SINGLE(saba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.b, z29.b, z30.b");
  TEST_SINGLE(saba(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.h, z29.h, z30.h");
  TEST_SINGLE(saba(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.s, z29.s, z30.s");
  TEST_SINGLE(saba(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "saba z28.d, z29.d, z30.d");

  TEST_SINGLE(uaba(SubRegSize::i8Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.b, z29.b, z30.b");
  TEST_SINGLE(uaba(SubRegSize::i16Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.h, z29.h, z30.h");
  TEST_SINGLE(uaba(SubRegSize::i32Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.s, z29.s, z30.s");
  TEST_SINGLE(uaba(SubRegSize::i64Bit, ZReg::z28, ZReg::z29, ZReg::z30), "uaba z28.d, z29.d, z30.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 saturating extract narrow") {
  TEST_SINGLE(sqxtnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.b, z29.h");
  TEST_SINGLE(sqxtnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.h, z29.s");
  TEST_SINGLE(sqxtnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.s, z29.d");
  // TEST_SINGLE(sqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnb z30.d, z29.q");

  TEST_SINGLE(sqxtnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.b, z29.h");
  TEST_SINGLE(sqxtnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.h, z29.s");
  TEST_SINGLE(sqxtnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.s, z29.d");
  // TEST_SINGLE(sqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtnt z30.d, z29.q");

  TEST_SINGLE(uqxtnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.b, z29.h");
  TEST_SINGLE(uqxtnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.h, z29.s");
  TEST_SINGLE(uqxtnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.s, z29.d");
  // TEST_SINGLE(uqxtnb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnb z30.d, z29.q");

  TEST_SINGLE(uqxtnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.b, z29.h");
  TEST_SINGLE(uqxtnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.h, z29.s");
  TEST_SINGLE(uqxtnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.s, z29.d");
  // TEST_SINGLE(uqxtnt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "uqxtnt z30.d, z29.q");

  TEST_SINGLE(sqxtunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.b, z29.h");
  TEST_SINGLE(sqxtunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.h, z29.s");
  TEST_SINGLE(sqxtunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.s, z29.d");
  // TEST_SINGLE(sqxtunb(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunb z30.d, z29.q");

  TEST_SINGLE(sqxtunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.b, z29.h");
  TEST_SINGLE(sqxtunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.h, z29.s");
  TEST_SINGLE(sqxtunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.s, z29.d");
  // TEST_SINGLE(sqxtunt(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "sqxtunt z30.d, z29.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 bitwise shift right narrow") {
  TEST_SINGLE(sqshrunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqshrunb z30.b, z29.h, #1");
  TEST_SINGLE(sqshrunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqshrunb z30.b, z29.h, #8");
  TEST_SINGLE(sqshrunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqshrunb z30.h, z29.s, #1");
  TEST_SINGLE(sqshrunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqshrunb z30.h, z29.s, #16");
  TEST_SINGLE(sqshrunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqshrunb z30.s, z29.d, #1");
  TEST_SINGLE(sqshrunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqshrunb z30.s, z29.d, #32");

  TEST_SINGLE(sqshrunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqshrunt z30.b, z29.h, #1");
  TEST_SINGLE(sqshrunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqshrunt z30.b, z29.h, #8");
  TEST_SINGLE(sqshrunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqshrunt z30.h, z29.s, #1");
  TEST_SINGLE(sqshrunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqshrunt z30.h, z29.s, #16");
  TEST_SINGLE(sqshrunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqshrunt z30.s, z29.d, #1");
  TEST_SINGLE(sqshrunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqshrunt z30.s, z29.d, #32");

  TEST_SINGLE(sqrshrunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunb z30.b, z29.h, #1");
  TEST_SINGLE(sqrshrunb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqrshrunb z30.b, z29.h, #8");
  TEST_SINGLE(sqrshrunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunb z30.h, z29.s, #1");
  TEST_SINGLE(sqrshrunb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqrshrunb z30.h, z29.s, #16");
  TEST_SINGLE(sqrshrunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunb z30.s, z29.d, #1");
  TEST_SINGLE(sqrshrunb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqrshrunb z30.s, z29.d, #32");

  TEST_SINGLE(sqrshrunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunt z30.b, z29.h, #1");
  TEST_SINGLE(sqrshrunt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqrshrunt z30.b, z29.h, #8");
  TEST_SINGLE(sqrshrunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunt z30.h, z29.s, #1");
  TEST_SINGLE(sqrshrunt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqrshrunt z30.h, z29.s, #16");
  TEST_SINGLE(sqrshrunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqrshrunt z30.s, z29.d, #1");
  TEST_SINGLE(sqrshrunt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqrshrunt z30.s, z29.d, #32");

  TEST_SINGLE(shrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "shrnb z30.b, z29.h, #1");
  TEST_SINGLE(shrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "shrnb z30.b, z29.h, #8");
  TEST_SINGLE(shrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "shrnb z30.h, z29.s, #1");
  TEST_SINGLE(shrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "shrnb z30.h, z29.s, #16");
  TEST_SINGLE(shrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "shrnb z30.s, z29.d, #1");
  TEST_SINGLE(shrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "shrnb z30.s, z29.d, #32");

  TEST_SINGLE(shrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "shrnt z30.b, z29.h, #1");
  TEST_SINGLE(shrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "shrnt z30.b, z29.h, #8");
  TEST_SINGLE(shrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "shrnt z30.h, z29.s, #1");
  TEST_SINGLE(shrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "shrnt z30.h, z29.s, #16");
  TEST_SINGLE(shrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "shrnt z30.s, z29.d, #1");
  TEST_SINGLE(shrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "shrnt z30.s, z29.d, #32");

  TEST_SINGLE(rshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "rshrnb z30.b, z29.h, #1");
  TEST_SINGLE(rshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "rshrnb z30.b, z29.h, #8");
  TEST_SINGLE(rshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "rshrnb z30.h, z29.s, #1");
  TEST_SINGLE(rshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "rshrnb z30.h, z29.s, #16");
  TEST_SINGLE(rshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "rshrnb z30.s, z29.d, #1");
  TEST_SINGLE(rshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "rshrnb z30.s, z29.d, #32");

  TEST_SINGLE(rshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "rshrnt z30.b, z29.h, #1");
  TEST_SINGLE(rshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "rshrnt z30.b, z29.h, #8");
  TEST_SINGLE(rshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "rshrnt z30.h, z29.s, #1");
  TEST_SINGLE(rshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "rshrnt z30.h, z29.s, #16");
  TEST_SINGLE(rshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "rshrnt z30.s, z29.d, #1");
  TEST_SINGLE(rshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "rshrnt z30.s, z29.d, #32");

  TEST_SINGLE(sqshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqshrnb z30.b, z29.h, #1");
  TEST_SINGLE(sqshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqshrnb z30.b, z29.h, #8");
  TEST_SINGLE(sqshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqshrnb z30.h, z29.s, #1");
  TEST_SINGLE(sqshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqshrnb z30.h, z29.s, #16");
  TEST_SINGLE(sqshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqshrnb z30.s, z29.d, #1");
  TEST_SINGLE(sqshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqshrnb z30.s, z29.d, #32");

  TEST_SINGLE(sqshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqshrnt z30.b, z29.h, #1");
  TEST_SINGLE(sqshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqshrnt z30.b, z29.h, #8");
  TEST_SINGLE(sqshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqshrnt z30.h, z29.s, #1");
  TEST_SINGLE(sqshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqshrnt z30.h, z29.s, #16");
  TEST_SINGLE(sqshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqshrnt z30.s, z29.d, #1");
  TEST_SINGLE(sqshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqshrnt z30.s, z29.d, #32");

  TEST_SINGLE(sqrshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnb z30.b, z29.h, #1");
  TEST_SINGLE(sqrshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqrshrnb z30.b, z29.h, #8");
  TEST_SINGLE(sqrshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnb z30.h, z29.s, #1");
  TEST_SINGLE(sqrshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqrshrnb z30.h, z29.s, #16");
  TEST_SINGLE(sqrshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnb z30.s, z29.d, #1");
  TEST_SINGLE(sqrshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqrshrnb z30.s, z29.d, #32");

  TEST_SINGLE(sqrshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnt z30.b, z29.h, #1");
  TEST_SINGLE(sqrshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "sqrshrnt z30.b, z29.h, #8");
  TEST_SINGLE(sqrshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnt z30.h, z29.s, #1");
  TEST_SINGLE(sqrshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "sqrshrnt z30.h, z29.s, #16");
  TEST_SINGLE(sqrshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "sqrshrnt z30.s, z29.d, #1");
  TEST_SINGLE(sqrshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "sqrshrnt z30.s, z29.d, #32");

  TEST_SINGLE(uqshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "uqshrnb z30.b, z29.h, #1");
  TEST_SINGLE(uqshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "uqshrnb z30.b, z29.h, #8");
  TEST_SINGLE(uqshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "uqshrnb z30.h, z29.s, #1");
  TEST_SINGLE(uqshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "uqshrnb z30.h, z29.s, #16");
  TEST_SINGLE(uqshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "uqshrnb z30.s, z29.d, #1");
  TEST_SINGLE(uqshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "uqshrnb z30.s, z29.d, #32");

  TEST_SINGLE(uqshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "uqshrnt z30.b, z29.h, #1");
  TEST_SINGLE(uqshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "uqshrnt z30.b, z29.h, #8");
  TEST_SINGLE(uqshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "uqshrnt z30.h, z29.s, #1");
  TEST_SINGLE(uqshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "uqshrnt z30.h, z29.s, #16");
  TEST_SINGLE(uqshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "uqshrnt z30.s, z29.d, #1");
  TEST_SINGLE(uqshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "uqshrnt z30.s, z29.d, #32");

  TEST_SINGLE(uqrshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnb z30.b, z29.h, #1");
  TEST_SINGLE(uqrshrnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "uqrshrnb z30.b, z29.h, #8");
  TEST_SINGLE(uqrshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnb z30.h, z29.s, #1");
  TEST_SINGLE(uqrshrnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "uqrshrnb z30.h, z29.s, #16");
  TEST_SINGLE(uqrshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnb z30.s, z29.d, #1");
  TEST_SINGLE(uqrshrnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "uqrshrnb z30.s, z29.d, #32");

  TEST_SINGLE(uqrshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnt z30.b, z29.h, #1");
  TEST_SINGLE(uqrshrnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, 8), "uqrshrnt z30.b, z29.h, #8");
  TEST_SINGLE(uqrshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnt z30.h, z29.s, #1");
  TEST_SINGLE(uqrshrnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, 16), "uqrshrnt z30.h, z29.s, #16");
  TEST_SINGLE(uqrshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 1), "uqrshrnt z30.s, z29.d, #1");
  TEST_SINGLE(uqrshrnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, 32), "uqrshrnt z30.s, z29.d, #32");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 integer add/subtract narrow high part") {
  TEST_SINGLE(addhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.b, z29.h, z28.h");
  TEST_SINGLE(addhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.h, z29.s, z28.s");
  TEST_SINGLE(addhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnb z30.s, z29.d, z28.d");

  TEST_SINGLE(addhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.b, z29.h, z28.h");
  TEST_SINGLE(addhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.h, z29.s, z28.s");
  TEST_SINGLE(addhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "addhnt z30.s, z29.d, z28.d");

  TEST_SINGLE(raddhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.b, z29.h, z28.h");
  TEST_SINGLE(raddhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.h, z29.s, z28.s");
  TEST_SINGLE(raddhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnb z30.s, z29.d, z28.d");

  TEST_SINGLE(raddhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.b, z29.h, z28.h");
  TEST_SINGLE(raddhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.h, z29.s, z28.s");
  TEST_SINGLE(raddhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "raddhnt z30.s, z29.d, z28.d");

  TEST_SINGLE(subhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.b, z29.h, z28.h");
  TEST_SINGLE(subhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.h, z29.s, z28.s");
  TEST_SINGLE(subhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnb z30.s, z29.d, z28.d");

  TEST_SINGLE(subhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.b, z29.h, z28.h");
  TEST_SINGLE(subhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.h, z29.s, z28.s");
  TEST_SINGLE(subhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "subhnt z30.s, z29.d, z28.d");

  TEST_SINGLE(rsubhnb(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.b, z29.h, z28.h");
  TEST_SINGLE(rsubhnb(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.h, z29.s, z28.s");
  TEST_SINGLE(rsubhnb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnb z30.s, z29.d, z28.d");

  TEST_SINGLE(rsubhnt(SubRegSize::i8Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.b, z29.h, z28.h");
  TEST_SINGLE(rsubhnt(SubRegSize::i16Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.h, z29.s, z28.s");
  TEST_SINGLE(rsubhnt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "rsubhnt z30.s, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 Histogram Computation") {
  TEST_SINGLE(histcnt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.s, p6/z, z29.s, z28.s");
  TEST_SINGLE(histcnt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), ZReg::z29, ZReg::z28), "histcnt z30.d, p6/z, z29.d, z28.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 Histogram Computation - Segment") {
  TEST_SINGLE(histseg(ZReg::z30, ZReg::z29, ZReg::z28), "histseg z30.b, z29.b, z28.b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 crypto unary operations") {
  TEST_SINGLE(aesimc(ZReg::z7, ZReg::z7), "aesimc z7.b, z7.b");
  TEST_SINGLE(aesimc(ZReg::z31, ZReg::z31), "aesimc z31.b, z31.b");

  TEST_SINGLE(aesmc(ZReg::z7, ZReg::z7), "aesmc z7.b, z7.b");
  TEST_SINGLE(aesmc(ZReg::z31, ZReg::z31), "aesmc z31.b, z31.b");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 crypto destructive binary operations") {
  TEST_SINGLE(aesd(ZReg::z7, ZReg::z7, ZReg::z8), "aesd z7.b, z7.b, z8.b");
  TEST_SINGLE(aesd(ZReg::z30, ZReg::z30, ZReg::z31), "aesd z30.b, z30.b, z31.b");

  TEST_SINGLE(aese(ZReg::z7, ZReg::z7, ZReg::z8), "aese z7.b, z7.b, z8.b");
  TEST_SINGLE(aese(ZReg::z30, ZReg::z30, ZReg::z31), "aese z30.b, z30.b, z31.b");

  TEST_SINGLE(sm4e(ZReg::z7, ZReg::z7, ZReg::z8), "sm4e z7.s, z7.s, z8.s");
  TEST_SINGLE(sm4e(ZReg::z30, ZReg::z30, ZReg::z31), "sm4e z30.s, z30.s, z31.s");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE2 crypto constructive binary operations") {
  TEST_SINGLE(sm4ekey(ZReg::z0, ZReg::z1, ZReg::z2), "sm4ekey z0.s, z1.s, z2.s");
  TEST_SINGLE(sm4ekey(ZReg::z29, ZReg::z30, ZReg::z31), "sm4ekey z29.s, z30.s, z31.s");

  TEST_SINGLE(rax1(ZReg::z0, ZReg::z1, ZReg::z2), "rax1 z0.d, z1.d, z2.d");
  TEST_SINGLE(rax1(ZReg::z29, ZReg::z30, ZReg::z31), "rax1 z29.d, z30.d, z31.d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE BFloat16 floating-point dot product (indexed)") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add long (indexed)") {
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "fmlalb z30.s, z29.h, z7.h[0]");
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmlalb z30.s, z29.h, z7.h[3]");
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmlalb z30.s, z29.h, z7.h[7]");

  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "fmlalt z30.s, z29.h, z7.h[0]");
  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmlalt z30.s, z29.h, z7.h[3]");
  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmlalt z30.s, z29.h, z7.h[7]");

  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "fmlslb z30.s, z29.h, z7.h[0]");
  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmlslb z30.s, z29.h, z7.h[3]");
  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmlslb z30.s, z29.h, z7.h[7]");

  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "fmlslt z30.s, z29.h, z7.h[0]");
  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "fmlslt z30.s, z29.h, z7.h[3]");
  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "fmlslt z30.s, z29.h, z7.h[7]");

  // XXX: vixl's diassembler doesn't support these. Re-enable when it does
  //      or upon switching disassemblers.

  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "bfmlalb z30.s, z29.h, z7.h[0]");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "bfmlalb z30.s, z29.h, z7.h[3]");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "bfmlalb z30.s, z29.h, z7.h[7]");

  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "bfmlalt z30.s, z29.h, z7.h[0]");
  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "bfmlalt z30.s, z29.h, z7.h[3]");
  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "bfmlalt z30.s, z29.h, z7.h[7]");

  // TEST_SINGLE(bfmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "bfmlslb z30.s, z29.h, z7.h[0]");
  // TEST_SINGLE(bfmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "bfmlslb z30.s, z29.h, z7.h[3]");
  // TEST_SINGLE(bfmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "bfmlslb z30.s, z29.h, z7.h[7]");

  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 0), "bfmlslt z30.s, z29.h, z7.h[0]");
  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 3), "bfmlslt z30.s, z29.h, z7.h[3]");
  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z7, 7), "bfmlslt z30.s, z29.h, z7.h[7]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE BFloat16 floating-point dot product") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-add long") {
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalb z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalb z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalb z30.s, z29.h, z28.h");

  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalt z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalt z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlalt z30.s, z29.h, z28.h");

  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslb z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslb z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlslb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslb z30.s, z29.h, z28.h");

  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslt z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslt z30.s, z29.h, z28.h");
  TEST_SINGLE(fmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "fmlslt z30.s, z29.h, z28.h");

  // XXX: vixl's diassembler doesn't support these. Re-enable when it does
  //      or upon switching disassemblers.

  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalb z30.s, z29.h, z28.h");

  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlalt z30.s, z29.h, z28.h");

  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlalb(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslb z30.s, z29.h, z28.h");

  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h");
  // TEST_SINGLE(bfmlslt(SubRegSize::i32Bit, ZReg::z30, ZReg::z29, ZReg::z28), "bfmlslt z30.s, z29.h, z28.h");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic (predicated)") {
  TEST_SINGLE(ftmad(SubRegSize::i16Bit, ZReg::z30, ZReg::z30, ZReg::z28, 7), "ftmad z30.h, z30.h, z28.h, #7");
  TEST_SINGLE(ftmad(SubRegSize::i32Bit, ZReg::z30, ZReg::z30, ZReg::z28, 7), "ftmad z30.s, z30.s, z28.s, #7");
  TEST_SINGLE(ftmad(SubRegSize::i64Bit, ZReg::z30, ZReg::z30, ZReg::z28, 7), "ftmad z30.d, z30.d, z28.d, #7");

  // TEST_SINGLE(fadd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fadd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fadd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fadd z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fsub(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fsub z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fsub(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsub z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmul(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmul z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmul(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmul z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fsubr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fsubr z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fsubr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fsubr z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmaxnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmaxnm z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmaxnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmaxnm z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fminnm(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fminnm z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fminnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fminnm(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fminnm z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmax(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmax z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmax(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmax z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmin(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmin z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmin(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmin z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fabd(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fabd z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fabd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fabd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fabd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fabd(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fabd z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fscale(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fscale z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fscale(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fscale(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fscale(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fscale(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fscale z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fmulx(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fmulx z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fmulx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fmulx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fmulx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fmulx(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fmulx z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fdiv(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fdiv z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fdiv(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fdiv(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fdiv(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fdiv(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdiv z30.q, p6/m, z30.q, z28.q");

  // TEST_SINGLE(fdivr(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28),   "fdivr z30.b, p6/m, z30.b, z28.b");
  TEST_SINGLE(fdivr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.h, p6/m, z30.h, z28.h");
  TEST_SINGLE(fdivr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.s, p6/m, z30.s, z28.s");
  TEST_SINGLE(fdivr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.d, p6/m, z30.d, z28.d");
  // TEST_SINGLE(fdivr(SubRegSize::i128Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z30, ZReg::z28), "fdivr z30.q, p6/m, z30.q, z28.q");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point arithmetic with immediate (predicated)") {
  TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.h, p6/m, z30.h, #0.5");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.s, p6/m, z30.s, #0.5");
  TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fadd z30.d, p6/m, z30.d, #0.5");
  TEST_SINGLE(fadd(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fadd(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fadd(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fadd z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.h, p6/m, z30.h, #0.5");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.s, p6/m, z30.s, #0.5");
  TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsub z30.d, p6/m, z30.d, #0.5");
  TEST_SINGLE(fsub(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fsub(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fsub(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsub z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.h, p6/m, z30.h, #0.5");
  TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.s, p6/m, z30.s, #0.5");
  TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_0_5), "fsubr z30.d, p6/m, z30.d, #0.5");
  TEST_SINGLE(fsubr(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fsubr(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fsubr(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFAddSubImm::_1_0), "fsubr z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.h, p6/m, z30.h, #0.5");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.s, p6/m, z30.s, #0.5");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_0_5), "fmul z30.d, p6/m, z30.d, #0.5");
  TEST_SINGLE(fmul(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.h, p6/m, z30.h, #2.0");
  TEST_SINGLE(fmul(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.s, p6/m, z30.s, #2.0");
  TEST_SINGLE(fmul(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMulImm::_2_0), "fmul z30.d, p6/m, z30.d, #2.0");

  TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmaxnm z30.h, p6/m, z30.h, #0.0");
  TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmaxnm z30.s, p6/m, z30.s, #0.0");
  TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmaxnm z30.d, p6/m, z30.d, #0.0");
  TEST_SINGLE(fmaxnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmaxnm z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fmaxnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmaxnm z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fmaxnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmaxnm z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fminnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fminnm z30.h, p6/m, z30.h, #0.0");
  TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fminnm z30.s, p6/m, z30.s, #0.0");
  TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fminnm z30.d, p6/m, z30.d, #0.0");
  TEST_SINGLE(fminnm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fminnm z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fminnm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fminnm z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fminnm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fminnm z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.h, p6/m, z30.h, #0.0");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.s, p6/m, z30.s, #0.0");
  TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmax z30.d, p6/m, z30.d, #0.0");
  TEST_SINGLE(fmax(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fmax(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fmax(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmax z30.d, p6/m, z30.d, #1.0");

  TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.h, p6/m, z30.h, #0.0");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.s, p6/m, z30.s, #0.0");
  TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_0_0), "fmin z30.d, p6/m, z30.d, #0.0");
  TEST_SINGLE(fmin(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.h, p6/m, z30.h, #1.0");
  TEST_SINGLE(fmin(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.s, p6/m, z30.s, #1.0");
  TEST_SINGLE(fmin(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), SVEFMaxMinImm::_1_0), "fmin z30.d, p6/m, z30.d, #1.0");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Memory - 32-bit Gather and Unsized Contiguous") {
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1b {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1b {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1b {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1b {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ld1b {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1b {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1b {z30.s}, p6/z, [z31.s, #31]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1b {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1b {z30.d}, p6/z, [z31.d, #31]");

  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1sb {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1sb {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1sb {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1sb {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ld1sb {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sb {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1sb {z30.s}, p6/z, [z31.s, #31]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sb {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ld1sb {z30.d}, p6/z, [z31.d, #31]");

  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ld1d {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d, uxtw]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ld1d {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d, sxtw]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "ld1d {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d, uxtw #3]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "ld1d {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d, sxtw #3]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "ld1d {z30.d}, p6/z, [x30, "
                                                                                                                "z31.d, lsl #3]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ld1d {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d]");

  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1d {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), "ld1d {z30.d}, p6/z, [z31.d, #248]");

  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)),
              "ld1h {z30.d}, p6/z, [x30, z31.d, lsl #1]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ld1h {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1h {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1h {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1h {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1h {z30.d}, p6/z, [x30, z31.d, sxtw]");

  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1h {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1h {z30.s}, p6/z, [z31.s, #62]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1h {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1h {z30.d}, p6/z, [z31.d, #62]");

  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1sh {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1sh {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1sh {z30.d}, p6/z, [x30, z31.d, sxtw]");

  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sh {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1sh {z30.s}, p6/z, [z31.s, #62]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sh {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ld1sh {z30.d}, p6/z, [z31.d, #62]");

  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)),
              "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]");
  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)),
              "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)),
              "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)),
              "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)),
              "ld1w {z30.d}, p6/z, [x30, z31.d, lsl #2]");

  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1w {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1w {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ld1w {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ld1w {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ld1w {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1w {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1w {z30.s}, p6/z, [z31.s, #124]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1w {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1w {z30.d}, p6/z, [z31.d, #124]");

  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ld1sw {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d, uxtw]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ld1sw {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d, sxtw]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "ld1sw {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d, uxtw #2]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "ld1sw {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d, sxtw #2]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "ld1sw {z30.d}, p6/z, "
                                                                                                                 "[x30, z31.d, lsl #2]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ld1sw {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d]");

  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ld1sw {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ld1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ld1sw {z30.d}, p6/z, [z31.d, #124]");

  TEST_SINGLE(ldff1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1b {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ldff1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1b {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1b {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1b {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ldff1b {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1b {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ldff1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1b {z30.s}, p6/z, [z31.s, "
                                                                                                       "#31]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1b {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1b {z30.d}, p6/z, [z31.d, "
                                                                                                       "#31]");

  TEST_SINGLE(ldff1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1sb {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ldff1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1sb {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1sb {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1sb {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ldff1sb {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sb {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ldff1sb<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1sb {z30.s}, p6/z, [z31.s, "
                                                                                                        "#31]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sb {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 31)), "ldff1sb {z30.d}, p6/z, [z31.d, "
                                                                                                        "#31]");

  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ldff1d {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ldff1d {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d, sxtw]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "ldff1d {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d, uxtw #3]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "ldff1d {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d, sxtw #3]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "ldff1d {z30.d}, p6/z, "
                                                                                                                  "[x30, z31.d, lsl #3]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ldff1d {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d]");

  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1d {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1d(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 248)), "ldff1d {z30.d}, p6/z, [z31.d, #248]");

  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d, lsl #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1h {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1h {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1h {z30.d}, p6/z, [x30, z31.d, sxtw]");

  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1h {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1h {z30.s}, p6/z, [z31.s, "
                                                                                                       "#62]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1h {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1h {z30.d}, p6/z, [z31.d, "
                                                                                                       "#62]");

  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d, lsl #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1sh {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1sh {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1sh {z30.d}, p6/z, [x30, z31.d, sxtw]");

  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sh {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1sh {z30.s}, p6/z, [z31.s, "
                                                                                                        "#62]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sh {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 62)), "ldff1sh {z30.d}, p6/z, [z31.d, "
                                                                                                        "#62]");

  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)),
              "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw #2]");
  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)),
              "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw #2]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw #2]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw #2]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d, lsl #2]");

  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1w {z30.s}, p6/z, [x30, z31.s, uxtw]");
  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1w {z30.s}, p6/z, [x30, z31.s, sxtw]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d, uxtw]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d, sxtw]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)),
              "ldff1w {z30.d}, p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1w {z30.s}, p6/z, [z31.s]");
  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1w {z30.s}, p6/z, [z31.s, "
                                                                                                        "#124]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1w {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1w {z30.d}, p6/z, [z31.d, "
                                                                                                        "#124]");

  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "ldff1sw {z30.d}, "
                                                                                                                    "p6/z, [x30, z31.d, "
                                                                                                                    "uxtw]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "ldff1sw {z30.d}, "
                                                                                                                    "p6/z, [x30, z31.d, "
                                                                                                                    "sxtw]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "ldff1sw {z30.d}, "
                                                                                                                    "p6/z, [x30, z31.d, "
                                                                                                                    "uxtw #2]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "ldff1sw {z30.d}, "
                                                                                                                    "p6/z, [x30, z31.d, "
                                                                                                                    "sxtw #2]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "ldff1sw {z30.d}, p6/z, "
                                                                                                                   "[x30, z31.d, lsl #2]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "ldff1sw {z30.d}, "
                                                                                                                    "p6/z, [x30, z31.d]");

  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 0)), "ldff1sw {z30.d}, p6/z, [z31.d]");
  TEST_SINGLE(ldff1sw(ZReg::z30, PReg::p6.Zeroing(), SVEMemOperand(ZReg::z31, 124)), "ldff1sw {z30.d}, p6/z, [z31.d, #124]");

  TEST_SINGLE(ldr(PReg::p6, XReg::x29, 0), "ldr p6, [x29]");
  TEST_SINGLE(ldr(PReg::p6, XReg::x29, -256), "ldr p6, [x29, #-256, mul vl]");
  TEST_SINGLE(ldr(PReg::p6, XReg::x29, 255), "ldr p6, [x29, #255, mul vl]");

  TEST_SINGLE(ldr(ZReg::z30, XReg::x29, 0), "ldr z30, [x29]");
  TEST_SINGLE(ldr(ZReg::z30, XReg::x29, -256), "ldr z30, [x29, #-256, mul vl]");
  TEST_SINGLE(ldr(ZReg::z30, XReg::x29, 255), "ldr z30, [x29, #255, mul vl]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast element") {
  TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.b}, p6/z, [x29]");
  TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.b}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rb(SubRegSize::i8Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.b}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.h}, p6/z, [x29]");
  TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.h}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.h}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.s}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.s}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rb {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rb {z30.d}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rb {z30.d}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.h}, p6/z, [x29]");
  TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.h}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rsb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.h}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.s}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rsb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.s}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsb {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 31), "ld1rsb {z30.d}, p6/z, [x29, #31]");
  TEST_SINGLE(ld1rsb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 63), "ld1rsb {z30.d}, p6/z, [x29, #63]");

  TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.h}, p6/z, [x29]");
  TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.h}, p6/z, [x29, #64]");
  TEST_SINGLE(ld1rh(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.h}, p6/z, [x29, #126]");

  TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.s}, p6/z, [x29, #64]");
  TEST_SINGLE(ld1rh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.s}, p6/z, [x29, #126]");

  TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rh {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rh {z30.d}, p6/z, [x29, #64]");
  TEST_SINGLE(ld1rh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rh {z30.d}, p6/z, [x29, #126]");

  TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.s}, p6/z, [x29, #64]");
  TEST_SINGLE(ld1rsh(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rsh {z30.s}, p6/z, [x29, #126]");

  TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsh {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 64), "ld1rsh {z30.d}, p6/z, [x29, #64]");
  TEST_SINGLE(ld1rsh(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 126), "ld1rsh {z30.d}, p6/z, [x29, #126]");

  TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.s}, p6/z, [x29, #128]");
  TEST_SINGLE(ld1rw(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.s}, p6/z, [x29, #252]");

  TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rw {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rw {z30.d}, p6/z, [x29, #128]");
  TEST_SINGLE(ld1rw(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rw {z30.d}, p6/z, [x29, #252]");

  TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rsw {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 128), "ld1rsw {z30.d}, p6/z, [x29, #128]");
  TEST_SINGLE(ld1rsw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 252), "ld1rsw {z30.d}, p6/z, [x29, #252]");

  TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rd {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 256), "ld1rd {z30.d}, p6/z, [x29, #256]");
  TEST_SINGLE(ld1rd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 504), "ld1rd {z30.d}, p6/z, [x29, #504]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous non-temporal load (scalar plus immediate)") {
  TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1b {z31.b}, p6/z, [x29]");
  TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1b {z31.b}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ldnt1b(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1b {z31.b}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1h {z31.h}, p6/z, [x29]");
  TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1h {z31.h}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ldnt1h(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1h {z31.h}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1w {z31.s}, p6/z, [x29]");
  TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1w {z31.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ldnt1w(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1w {z31.s}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, 0), "ldnt1d {z31.d}, p6/z, [x29]");
  TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, -8), "ldnt1d {z31.d}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ldnt1d(ZReg::z31, PReg::p6, Reg::r29, 7), "ldnt1d {z31.d}, p6/z, [x29, #7, mul vl]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus scalar)") {
  TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z31.b, z0.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2b {z26.b, z27.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, "
                                                                                             "x30]");
  TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z31.b, z0.b, z1.b, z2.b}, "
                                                                                                     "p6/z, [x29, x30]");
  TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4b {z26.b, z27.b, z28.b, "
                                                                                                        "z29.b}, p6/z, [x29, x30]");

  TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z31.h, z0.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2h {z26.h, z27.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29, x30, lsl "
                                                                                           "#1]");
  TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, x30, "
                                                                                             "lsl #1]");
  TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z31.h, z0.h, z1.h, z2.h}, "
                                                                                                     "p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4h {z26.h, z27.h, z28.h, "
                                                                                                        "z29.h}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z31.s, z0.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2w {z26.s, z27.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29, x30, lsl "
                                                                                           "#2]");
  TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, x30, "
                                                                                             "lsl #2]");
  TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z31.s, z0.s, z1.s, z2.s}, "
                                                                                                     "p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4w {z26.s, z27.s, z28.s, "
                                                                                                        "z29.s}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z31.d, z0.d}, p6/z, [x29, x30, lsl #3]");
  TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld2d {z26.d, z27.d}, p6/z, [x29, x30, lsl #3]");
  TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29, x30, lsl "
                                                                                           "#3]");
  TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, x30, "
                                                                                             "lsl #3]");
  TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z31.d, z0.d, z1.d, z2.d}, "
                                                                                                     "p6/z, [x29, x30, lsl #3]");
  TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld4d {z26.d, z27.d, z28.d, "
                                                                                                        "z29.d}, p6/z, [x29, x30, lsl #3]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadword (scalar plus immediate)") {
  TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqb {z30.b}, p6/z, [x29]");
  TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqb {z30.b}, p6/z, [x29, #-128]");
  TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqb {z30.b}, p6/z, [x29, #112]");

  TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rob {z30.b}, p6/z, [x29]");
  TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rob {z30.b}, p6/z, [x29, #-256]");
  TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rob {z30.b}, p6/z, [x29, #224]");

  TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqh {z30.h}, p6/z, [x29]");
  TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqh {z30.h}, p6/z, [x29, #-128]");
  TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqh {z30.h}, p6/z, [x29, #112]");

  TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1roh {z30.h}, p6/z, [x29]");
  TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1roh {z30.h}, p6/z, [x29, #-256]");
  TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1roh {z30.h}, p6/z, [x29, #224]");

  TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqw {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqw {z30.s}, p6/z, [x29, #-128]");
  TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqw {z30.s}, p6/z, [x29, #112]");

  TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1row {z30.s}, p6/z, [x29]");
  TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1row {z30.s}, p6/z, [x29, #-256]");
  TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1row {z30.s}, p6/z, [x29, #224]");

  TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rqd {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -128), "ld1rqd {z30.d}, p6/z, [x29, #-128]");
  TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 112), "ld1rqd {z30.d}, p6/z, [x29, #112]");

  TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 0), "ld1rod {z30.d}, p6/z, [x29]");
  TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, -256), "ld1rod {z30.d}, p6/z, [x29, #-256]");
  TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, 224), "ld1rod {z30.d}, p6/z, [x29, #224]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load and broadcast quadword (scalar plus scalar)") {
  TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqb {z30.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1rqb(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqb {z30.b}, p6/z, [x29, x30]");

  TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rob {z30.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1rob(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rob {z30.b}, p6/z, [x29, x30]");

  TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqh {z30.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1rqh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqh {z30.h}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1roh {z30.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1roh(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1roh {z30.h}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqw {z30.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld1rqw(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqw {z30.s}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1row {z30.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld1row(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1row {z30.s}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqd {z30.d}, p6/z, [x29, x30, lsl #3]");
  TEST_SINGLE(ld1rqd(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rqd {z30.d}, p6/z, [x29, x30, lsl #3]");

  TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rod {z30.d}, p6/z, [x29, x30, lsl #3]");
  TEST_SINGLE(ld1rod(ZReg::z30, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1rod {z30.d}, p6/z, [x29, x30, lsl #3]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE load multiple structures (scalar plus immediate)") {
  TEST_SINGLE(ld2b(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z31.b, z0.b}, p6/z, [x29]");
  TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2b {z26.b, z27.b}, p6/z, [x29]");
  TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2b {z26.b, z27.b}, p6/z, [x29, #-16, mul vl]");
  TEST_SINGLE(ld2b(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2b {z26.b, z27.b}, p6/z, [x29, #14, mul vl]");

  TEST_SINGLE(ld2h(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2h {z31.h, z0.h}, p6/z, [x29]");
  TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2h {z26.h, z27.h}, p6/z, [x29]");
  TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2h {z26.h, z27.h}, p6/z, [x29, #-16, mul vl]");
  TEST_SINGLE(ld2h(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2h {z26.h, z27.h}, p6/z, [x29, #14, mul vl]");

  TEST_SINGLE(ld2w(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2w {z31.s, z0.s}, p6/z, [x29]");
  TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2w {z26.s, z27.s}, p6/z, [x29]");
  TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2w {z26.s, z27.s}, p6/z, [x29, #-16, mul vl]");
  TEST_SINGLE(ld2w(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2w {z26.s, z27.s}, p6/z, [x29, #14, mul vl]");

  TEST_SINGLE(ld2d(ZReg::z31, ZReg::z0, PReg::p6.Zeroing(), Reg::r29, 0), "ld2d {z31.d, z0.d}, p6/z, [x29]");
  TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 0), "ld2d {z26.d, z27.d}, p6/z, [x29]");
  TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, -16), "ld2d {z26.d, z27.d}, p6/z, [x29, #-16, mul vl]");
  TEST_SINGLE(ld2d(ZReg::z26, ZReg::z27, PReg::p6.Zeroing(), Reg::r29, 14), "ld2d {z26.d, z27.d}, p6/z, [x29, #14, mul vl]");

  TEST_SINGLE(ld3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3b {z31.b, z0.b, z1.b}, p6/z, [x29]");
  TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29]");
  TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #-24, mul "
                                                                                        "vl]");
  TEST_SINGLE(ld3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3b {z26.b, z27.b, z28.b}, p6/z, [x29, #21, mul "
                                                                                       "vl]");

  TEST_SINGLE(ld3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3h {z31.h, z0.h, z1.h}, p6/z, [x29]");
  TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29]");
  TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #-24, mul "
                                                                                        "vl]");
  TEST_SINGLE(ld3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3h {z26.h, z27.h, z28.h}, p6/z, [x29, #21, mul "
                                                                                       "vl]");

  TEST_SINGLE(ld3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3w {z31.s, z0.s, z1.s}, p6/z, [x29]");
  TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29]");
  TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #-24, mul "
                                                                                        "vl]");
  TEST_SINGLE(ld3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3w {z26.s, z27.s, z28.s}, p6/z, [x29, #21, mul "
                                                                                       "vl]");

  TEST_SINGLE(ld3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6.Zeroing(), Reg::r29, 0), "ld3d {z31.d, z0.d, z1.d}, p6/z, [x29]");
  TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 0), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29]");
  TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, -24), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #-24, mul "
                                                                                        "vl]");
  TEST_SINGLE(ld3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6.Zeroing(), Reg::r29, 21), "ld3d {z26.d, z27.d, z28.d}, p6/z, [x29, #21, mul "
                                                                                       "vl]");

  TEST_SINGLE(ld4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z31.b, z0.b, z1.b, z2.b}, p6/z, "
                                                                                              "[x29]");
  TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4b {z26.b, z27.b, z28.b, z29.b}, p6/z, "
                                                                                                 "[x29]");
  TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4b {z26.b, z27.b, z28.b, z29.b}, "
                                                                                                   "p6/z, [x29, #-32, mul vl]");
  TEST_SINGLE(ld4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4b {z26.b, z27.b, z28.b, z29.b}, "
                                                                                                  "p6/z, [x29, #28, mul vl]");

  TEST_SINGLE(ld4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z31.h, z0.h, z1.h, z2.h}, p6/z, "
                                                                                              "[x29]");
  TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4h {z26.h, z27.h, z28.h, z29.h}, p6/z, "
                                                                                                 "[x29]");
  TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4h {z26.h, z27.h, z28.h, z29.h}, "
                                                                                                   "p6/z, [x29, #-32, mul vl]");
  TEST_SINGLE(ld4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4h {z26.h, z27.h, z28.h, z29.h}, "
                                                                                                  "p6/z, [x29, #28, mul vl]");

  TEST_SINGLE(ld4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z31.s, z0.s, z1.s, z2.s}, p6/z, "
                                                                                              "[x29]");
  TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4w {z26.s, z27.s, z28.s, z29.s}, p6/z, "
                                                                                                 "[x29]");
  TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4w {z26.s, z27.s, z28.s, z29.s}, "
                                                                                                   "p6/z, [x29, #-32, mul vl]");
  TEST_SINGLE(ld4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4w {z26.s, z27.s, z28.s, z29.s}, "
                                                                                                  "p6/z, [x29, #28, mul vl]");

  TEST_SINGLE(ld4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z31.d, z0.d, z1.d, z2.d}, p6/z, "
                                                                                              "[x29]");
  TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 0), "ld4d {z26.d, z27.d, z28.d, z29.d}, p6/z, "
                                                                                                 "[x29]");
  TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, -32), "ld4d {z26.d, z27.d, z28.d, z29.d}, "
                                                                                                   "p6/z, [x29, #-32, mul vl]");
  TEST_SINGLE(ld4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6.Zeroing(), Reg::r29, 28), "ld4d {z26.d, z27.d, z28.d, z29.d}, "
                                                                                                  "p6/z, [x29, #28, mul vl]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar plus immediate)") {
  TEST_SINGLE(ld1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1b {z26.b}, p6/z, [x29]");
  TEST_SINGLE(ld1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1b {z26.h}, p6/z, [x29]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1b {z26.s}, p6/z, [x29]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1b {z26.d}, p6/z, [x29]");

  TEST_SINGLE(ld1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1b {z26.b}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1b {z26.h}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1b {z26.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1b {z26.d}, p6/z, [x29, #-8, mul vl]");

  TEST_SINGLE(ld1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1b {z26.b}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1b {z26.h}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1b {z26.s}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1b {z26.d}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sw {z26.d}, p6/z, [x29]");
  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sw {z26.d}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sw {z26.d}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.s}, p6/z, [x29]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1w {z26.d}, p6/z, [x29]");
  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1w {z26.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1w {z26.d}, p6/z, [x29, #-8, mul vl]");

  // TEST_SINGLE(ld1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.b}, p6/z, [x29]");
  TEST_SINGLE(ld1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.h}, p6/z, [x29]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.s}, p6/z, [x29]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1h {z26.d}, p6/z, [x29]");

  // TEST_SINGLE(ld1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.b}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.h}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1h {z26.d}, p6/z, [x29, #-8, mul vl]");

  // TEST_SINGLE(ld1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.b}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.h}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.s}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1h {z26.d}, p6/z, [x29, #7, mul vl]");

  // TEST_SINGLE(ld1sh<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.b}, p6/z, [x29]");
  // TEST_SINGLE(ld1sh<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.h}, p6/z, [x29]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.s}, p6/z, [x29]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sh {z26.d}, p6/z, [x29]");

  // TEST_SINGLE(ld1sh<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.b}, p6/z, [x29, #-8, mul vl]");
  // TEST_SINGLE(ld1sh<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.h}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sh {z26.d}, p6/z, [x29, #-8, mul vl]");

  // TEST_SINGLE(ld1sh<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.b}, p6/z, [x29, #7, mul vl]");
  // TEST_SINGLE(ld1sh<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.h}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.s}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sh {z26.d}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sw {z26.d}, p6/z, [x29]");
  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sw {z26.d}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sw {z26.d}, p6/z, [x29, #7, mul vl]");

  // TEST_SINGLE(ld1sb<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.b}, p6/z, [x29]");
  TEST_SINGLE(ld1sb<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.h}, p6/z, [x29]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.s}, p6/z, [x29]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1sb {z26.d}, p6/z, [x29]");

  // TEST_SINGLE(ld1sb<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.b}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.h}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.s}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1sb {z26.d}, p6/z, [x29, #-8, mul vl]");

  // TEST_SINGLE(ld1sb<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.b}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.h}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.s}, p6/z, [x29, #7, mul vl]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1sb {z26.d}, p6/z, [x29, #7, mul vl]");

  TEST_SINGLE(ld1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 0), "ld1d {z26.d}, p6/z, [x29]");
  TEST_SINGLE(ld1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, -8), "ld1d {z26.d}, p6/z, [x29, #-8, mul vl]");
  TEST_SINGLE(ld1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, 7), "ld1d {z26.d}, p6/z, [x29, #7, mul vl]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar plus scalar)") {
  TEST_SINGLE(st1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.b}, p6, [x29, x28]");
  TEST_SINGLE(st1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.h}, p6, [x29, x28]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.s}, p6, [x29, x28]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1b {z26.d}, p6, [x29, x28]");

  // TEST_SINGLE(st1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.b}, p6, [x29, x28, lsl #1]");
  TEST_SINGLE(st1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.h}, p6, [x29, x28, lsl #1]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.s}, p6, [x29, x28, lsl #1]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1h {z26.d}, p6, [x29, x28, lsl #1]");

  // TEST_SINGLE(st1w<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.b}, p6, [x29, x28, lsl #2]");
  // TEST_SINGLE(st1w<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.h}, p6, [x29, x28, lsl #2]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.s}, p6, [x29, x28, lsl #2]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1w {z26.d}, p6, [x29, x28, lsl #2]");

  TEST_SINGLE(st1d(ZReg::z26, PReg::p6, Reg::r29, Reg::r28), "st1d {z26.d}, p6, [x29, x28, lsl #3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous load (scalar plus scalar)") {
  TEST_SINGLE(ld1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.h}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.s}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1b {z26.d}, p6/z, [x29, x30]");

  // TEST_SINGLE(ld1sb<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1sb<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.h}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1sb<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.s}, p6/z, [x29, x30]");
  TEST_SINGLE(ld1sb<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sb {z26.d}, p6/z, [x29, x30]");

  // TEST_SINGLE(ld1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.b}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.s}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1h {z26.d}, p6/z, [x29, x30, lsl #1]");

  // TEST_SINGLE(ld1sh<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.b}, p6/z, [x29, x30, lsl #1]");
  // TEST_SINGLE(ld1sh<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.s}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ld1sh<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sh {z26.d}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ld1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1w {z26.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ld1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1w {z26.d}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ld1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1sw {z26.d}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ld1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ld1d {z26.d}, p6/z, [x29, x30, lsl #3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous first-fault load (scalar plus scalar)") {
  TEST_SINGLE(ldff1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.b}, p6/z, [x29, x30]");
  TEST_SINGLE(ldff1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.h}, p6/z, [x29, x30]");
  TEST_SINGLE(ldff1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.s}, p6/z, [x29, x30]");
  TEST_SINGLE(ldff1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1b {z26.d}, p6/z, [x29, x30]");

  TEST_SINGLE(ldff1sb<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sb {z26.h}, p6/z, [x29, x30]");
  TEST_SINGLE(ldff1sb<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sb {z26.s}, p6/z, [x29, x30]");
  TEST_SINGLE(ldff1sb<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sb {z26.d}, p6/z, [x29, x30]");

  TEST_SINGLE(ldff1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1h {z26.h}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1h {z26.s}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ldff1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1h {z26.d}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ldff1sh<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sh {z26.s}, p6/z, [x29, x30, lsl #1]");
  TEST_SINGLE(ldff1sh<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sh {z26.d}, p6/z, [x29, x30, lsl #1]");

  TEST_SINGLE(ldff1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1w {z26.s}, p6/z, [x29, x30, lsl #2]");
  TEST_SINGLE(ldff1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1w {z26.d}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ldff1sw(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1sw {z26.d}, p6/z, [x29, x30, lsl #2]");

  TEST_SINGLE(ldff1d(ZReg::z26, PReg::p6.Zeroing(), Reg::r29, Reg::r30), "ldff1d {z26.d}, p6/z, [x29, x30, lsl #3]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point round to integral value") {
  TEST_SINGLE(frinti(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinti z30.h, p6/m, z29.h");
  TEST_SINGLE(frinti(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinti z30.s, p6/m, z29.s");
  TEST_SINGLE(frinti(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinti z30.d, p6/m, z29.d");
  TEST_SINGLE(frintx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintx z30.h, p6/m, z29.h");
  TEST_SINGLE(frintx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintx z30.s, p6/m, z29.s");
  TEST_SINGLE(frintx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintx z30.d, p6/m, z29.d");
  TEST_SINGLE(frinta(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinta z30.h, p6/m, z29.h");
  TEST_SINGLE(frinta(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinta z30.s, p6/m, z29.s");
  TEST_SINGLE(frinta(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frinta z30.d, p6/m, z29.d");
  TEST_SINGLE(frintn(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintn z30.h, p6/m, z29.h");
  TEST_SINGLE(frintn(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintn z30.s, p6/m, z29.s");
  TEST_SINGLE(frintn(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintn z30.d, p6/m, z29.d");
  TEST_SINGLE(frintz(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintz z30.h, p6/m, z29.h");
  TEST_SINGLE(frintz(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintz z30.s, p6/m, z29.s");
  TEST_SINGLE(frintz(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintz z30.d, p6/m, z29.d");
  TEST_SINGLE(frintm(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintm z30.h, p6/m, z29.h");
  TEST_SINGLE(frintm(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintm z30.s, p6/m, z29.s");
  TEST_SINGLE(frintm(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintm z30.d, p6/m, z29.d");
  TEST_SINGLE(frintp(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintp z30.h, p6/m, z29.h");
  TEST_SINGLE(frintp(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintp z30.s, p6/m, z29.s");
  TEST_SINGLE(frintp(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frintp z30.d, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert precision") {
  TEST_SINGLE(fcvt(SubRegSize::i16Bit, SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.h, p6/m, z29.s");
  TEST_SINGLE(fcvt(SubRegSize::i16Bit, SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.h, p6/m, z29.d");

  TEST_SINGLE(fcvt(SubRegSize::i32Bit, SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.s, p6/m, z29.h");
  TEST_SINGLE(fcvt(SubRegSize::i32Bit, SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.s, p6/m, z29.d");

  TEST_SINGLE(fcvt(SubRegSize::i64Bit, SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.d, p6/m, z29.h");
  TEST_SINGLE(fcvt(SubRegSize::i64Bit, SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvt z30.d, p6/m, z29.s");

  TEST_SINGLE(fcvtx(ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fcvtx z30.s, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point unary operations") {
  TEST_SINGLE(frecpx(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frecpx z30.h, p6/m, z29.h");
  TEST_SINGLE(frecpx(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frecpx z30.s, p6/m, z29.s");
  TEST_SINGLE(frecpx(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "frecpx z30.d, p6/m, z29.d");

  TEST_SINGLE(fsqrt(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fsqrt z30.h, p6/m, z29.h");
  TEST_SINGLE(fsqrt(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fsqrt z30.s, p6/m, z29.s");
  TEST_SINGLE(fsqrt(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "fsqrt z30.d, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE integer convert to floating-point") {
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.h, p6/m, z29.h");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.h, p6/m, z29.s");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.h, p6/m, z29.d");

  // TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.s, p6/m, z29.h");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.s, p6/m, z29.s");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.s, p6/m, z29.d");

  // TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "scvtf z30.d, p6/m, z29.h");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "scvtf z30.d, p6/m, z29.s");
  TEST_SINGLE(scvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "scvtf z30.d, p6/m, z29.d");

  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.h, p6/m, z29.h");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.h, p6/m, z29.s");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.h, p6/m, z29.d");

  // TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.s, p6/m, z29.h");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.s, p6/m, z29.s");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.s, p6/m, z29.d");

  // TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "ucvtf z30.d, p6/m, z29.h");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "ucvtf z30.d, p6/m, z29.s");
  TEST_SINGLE(ucvtf(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "ucvtf z30.d, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point convert to integer") {
  TEST_SINGLE(flogb(SubRegSize::i16Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "flogb z30.h, p6/m, z29.h");
  TEST_SINGLE(flogb(SubRegSize::i32Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "flogb z30.s, p6/m, z29.s");
  TEST_SINGLE(flogb(SubRegSize::i64Bit, ZReg::z30, PReg::p6.Merging(), ZReg::z29), "flogb z30.d, p6/m, z29.d");

  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzs z30.h, p6/m, z29.h");
  // TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.h, p6/m, z29.s");
  // TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.h, p6/m, z29.d");

  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzs z30.s, p6/m, z29.h");
  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.s, p6/m, z29.s");
  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.s, p6/m, z29.d");

  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzs z30.d, p6/m, z29.h");
  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzs z30.d, p6/m, z29.s");
  TEST_SINGLE(fcvtzs(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzs z30.d, p6/m, z29.d");

  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzu z30.h, p6/m, z29.h");
  // TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.h, p6/m, z29.s");
  // TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i16Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzu z30.h, p6/m, z29.d");

  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzu z30.s, p6/m, z29.h");
  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.s, p6/m, z29.s");
  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i32Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzu z30.s, p6/m, z29.d");

  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i16Bit), "fcvtzu z30.d, p6/m, z29.h");
  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i32Bit), "fcvtzu z30.d, p6/m, z29.s");
  TEST_SINGLE(fcvtzu(ZReg::z30, SubRegSize::i64Bit, PReg::p6.Merging(), ZReg::z29, SubRegSize::i64Bit), "fcvtzu z30.d, p6/m, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point reciprocal estimate (unpredicated)") {
  TEST_SINGLE(frecpe(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "frecpe z30.h, z29.h");
  TEST_SINGLE(frecpe(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "frecpe z30.s, z29.s");
  TEST_SINGLE(frecpe(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "frecpe z30.d, z29.d");

  TEST_SINGLE(frsqrte(SubRegSize::i16Bit, ZReg::z30, ZReg::z29), "frsqrte z30.h, z29.h");
  TEST_SINGLE(frsqrte(SubRegSize::i32Bit, ZReg::z30, ZReg::z29), "frsqrte z30.s, z29.s");
  TEST_SINGLE(frsqrte(SubRegSize::i64Bit, ZReg::z30, ZReg::z29), "frsqrte z30.d, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point serial reduction (predicated)") {
  TEST_SINGLE(fadda(SubRegSize::i16Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "fadda h30, p7, h30, z29.h");
  TEST_SINGLE(fadda(SubRegSize::i32Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "fadda s30, p7, s30, z29.s");
  TEST_SINGLE(fadda(SubRegSize::i64Bit, VReg::v30, PReg::p7, VReg::v30, ZReg::z29), "fadda d30, p7, d30, z29.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point compare with zero") {
  TEST_SINGLE(fcmge(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmge p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmge(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmge p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmge(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmge p15.d, p7/z, z30.d, #0.0");

  TEST_SINGLE(fcmgt(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmgt p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmgt(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmgt p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmgt(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmgt p15.d, p7/z, z30.d, #0.0");

  TEST_SINGLE(fcmlt(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmlt p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmlt(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmlt p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmlt(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmlt p15.d, p7/z, z30.d, #0.0");

  TEST_SINGLE(fcmle(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmle p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmle(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmle p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmle(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmle p15.d, p7/z, z30.d, #0.0");

  TEST_SINGLE(fcmeq(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmeq p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmeq(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmeq p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmeq(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmeq p15.d, p7/z, z30.d, #0.0");

  TEST_SINGLE(fcmne(SubRegSize::i16Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmne p15.h, p7/z, z30.h, #0.0");
  TEST_SINGLE(fcmne(SubRegSize::i32Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmne p15.s, p7/z, z30.s, #0.0");
  TEST_SINGLE(fcmne(SubRegSize::i64Bit, PReg::p15, PReg::p7.Zeroing(), ZReg::z30), "fcmne p15.d, p7/z, z30.d, #0.0");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-accumulate writing addend") {
  TEST_SINGLE(fmla(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmla z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fmla(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmla z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fmla(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmla z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fmls(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmls z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fmls(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmls z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fmls(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmls z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fnmla(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmla z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fnmla(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmla z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fnmla(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmla z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fnmls(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmls z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fnmls(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmls z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fnmls(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmls z30.d, p7/m, z29.d, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE floating-point multiply-accumulate writing multiplicand") {
  TEST_SINGLE(fmad(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmad z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fmad(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmad z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fmad(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmad z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fmsb(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmsb z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fmsb(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmsb z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fmsb(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fmsb z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fnmad(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmad z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fnmad(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmad z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fnmad(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmad z30.d, p7/m, z29.d, z28.d");

  TEST_SINGLE(fnmsb(SubRegSize::i16Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmsb z30.h, p7/m, z29.h, z28.h");
  TEST_SINGLE(fnmsb(SubRegSize::i32Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmsb z30.s, p7/m, z29.s, z28.s");
  TEST_SINGLE(fnmsb(SubRegSize::i64Bit, ZReg::z30, PReg::p7.Merging(), ZReg::z29, ZReg::z28), "fnmsb z30.d, p7/m, z29.d, z28.d");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE store multiple structures (scalar plus scalar)") {
  TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2b {z31.b, z0.b}, p6, [x29, x30]");
  TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2b {z26.b, z27.b}, p6, [x29, x30]");
  TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3b {z31.b, z0.b, z1.b}, p6, [x29, x30]");
  TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3b {z26.b, z27.b, z28.b}, p6, [x29, x30]");
  TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29, "
                                                                                           "x30]");
  TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, "
                                                                                              "[x29, x30]");

  TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2h {z31.h, z0.h}, p6, [x29, x30, lsl #1]");
  TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2h {z26.h, z27.h}, p6, [x29, x30, lsl #1]");
  TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3h {z31.h, z0.h, z1.h}, p6, [x29, x30, lsl #1]");
  TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3h {z26.h, z27.h, z28.h}, p6, [x29, x30, lsl #1]");
  TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29, x30, "
                                                                                           "lsl #1]");
  TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, "
                                                                                              "[x29, x30, lsl #1]");

  TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2w {z31.s, z0.s}, p6, [x29, x30, lsl #2]");
  TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2w {z26.s, z27.s}, p6, [x29, x30, lsl #2]");
  TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3w {z31.s, z0.s, z1.s}, p6, [x29, x30, lsl #2]");
  TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3w {z26.s, z27.s, z28.s}, p6, [x29, x30, lsl #2]");
  TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29, x30, "
                                                                                           "lsl #2]");
  TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, "
                                                                                              "[x29, x30, lsl #2]");

  TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, Reg::r30), "st2d {z31.d, z0.d}, p6, [x29, x30, lsl #3]");
  TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, Reg::r30), "st2d {z26.d, z27.d}, p6, [x29, x30, lsl #3]");
  TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, Reg::r30), "st3d {z31.d, z0.d, z1.d}, p6, [x29, x30, lsl #3]");
  TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, Reg::r30), "st3d {z26.d, z27.d, z28.d}, p6, [x29, x30, lsl #3]");
  TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, Reg::r30), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29, x30, "
                                                                                           "lsl #3]");
  TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, Reg::r30), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, "
                                                                                              "[x29, x30, lsl #3]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous non-temporal store (scalar plus immediate)") {
  TEST_SINGLE(stnt1b(ZReg::z31, PReg::p6, Reg::r29, 0), "stnt1b {z31.b}, p6, [x29]");
  TEST_SINGLE(stnt1b(ZReg::z31, PReg::p6, Reg::r29, -8), "stnt1b {z31.b}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(stnt1b(ZReg::z31, PReg::p6, Reg::r29, 7), "stnt1b {z31.b}, p6, [x29, #7, mul vl]");

  TEST_SINGLE(stnt1h(ZReg::z31, PReg::p6, Reg::r29, 0), "stnt1h {z31.h}, p6, [x29]");
  TEST_SINGLE(stnt1h(ZReg::z31, PReg::p6, Reg::r29, -8), "stnt1h {z31.h}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(stnt1h(ZReg::z31, PReg::p6, Reg::r29, 7), "stnt1h {z31.h}, p6, [x29, #7, mul vl]");

  TEST_SINGLE(stnt1w(ZReg::z31, PReg::p6, Reg::r29, 0), "stnt1w {z31.s}, p6, [x29]");
  TEST_SINGLE(stnt1w(ZReg::z31, PReg::p6, Reg::r29, -8), "stnt1w {z31.s}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(stnt1w(ZReg::z31, PReg::p6, Reg::r29, 7), "stnt1w {z31.s}, p6, [x29, #7, mul vl]");

  TEST_SINGLE(stnt1d(ZReg::z31, PReg::p6, Reg::r29, 0), "stnt1d {z31.d}, p6, [x29]");
  TEST_SINGLE(stnt1d(ZReg::z31, PReg::p6, Reg::r29, -8), "stnt1d {z31.d}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(stnt1d(ZReg::z31, PReg::p6, Reg::r29, 7), "stnt1d {z31.d}, p6, [x29, #7, mul vl]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE store multiple structures (scalar plus immediate)") {
  TEST_SINGLE(st2b(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2b {z31.b, z0.b}, p6, [x29]");
  TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2b {z26.b, z27.b}, p6, [x29]");
  TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2b {z26.b, z27.b}, p6, [x29, #-16, mul vl]");
  TEST_SINGLE(st2b(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2b {z26.b, z27.b}, p6, [x29, #14, mul vl]");

  TEST_SINGLE(st2h(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2h {z31.h, z0.h}, p6, [x29]");
  TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2h {z26.h, z27.h}, p6, [x29]");
  TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2h {z26.h, z27.h}, p6, [x29, #-16, mul vl]");
  TEST_SINGLE(st2h(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2h {z26.h, z27.h}, p6, [x29, #14, mul vl]");

  TEST_SINGLE(st2w(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2w {z31.s, z0.s}, p6, [x29]");
  TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2w {z26.s, z27.s}, p6, [x29]");
  TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2w {z26.s, z27.s}, p6, [x29, #-16, mul vl]");
  TEST_SINGLE(st2w(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2w {z26.s, z27.s}, p6, [x29, #14, mul vl]");

  TEST_SINGLE(st2d(ZReg::z31, ZReg::z0, PReg::p6, Reg::r29, 0), "st2d {z31.d, z0.d}, p6, [x29]");
  TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 0), "st2d {z26.d, z27.d}, p6, [x29]");
  TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, -16), "st2d {z26.d, z27.d}, p6, [x29, #-16, mul vl]");
  TEST_SINGLE(st2d(ZReg::z26, ZReg::z27, PReg::p6, Reg::r29, 14), "st2d {z26.d, z27.d}, p6, [x29, #14, mul vl]");

  TEST_SINGLE(st3b(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3b {z31.b, z0.b, z1.b}, p6, [x29]");
  TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3b {z26.b, z27.b, z28.b}, p6, [x29]");
  TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3b {z26.b, z27.b, z28.b}, p6, [x29, #-24, mul vl]");
  TEST_SINGLE(st3b(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3b {z26.b, z27.b, z28.b}, p6, [x29, #21, mul vl]");

  TEST_SINGLE(st3h(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3h {z31.h, z0.h, z1.h}, p6, [x29]");
  TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3h {z26.h, z27.h, z28.h}, p6, [x29]");
  TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3h {z26.h, z27.h, z28.h}, p6, [x29, #-24, mul vl]");
  TEST_SINGLE(st3h(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3h {z26.h, z27.h, z28.h}, p6, [x29, #21, mul vl]");

  TEST_SINGLE(st3w(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3w {z31.s, z0.s, z1.s}, p6, [x29]");
  TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3w {z26.s, z27.s, z28.s}, p6, [x29]");
  TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3w {z26.s, z27.s, z28.s}, p6, [x29, #-24, mul vl]");
  TEST_SINGLE(st3w(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3w {z26.s, z27.s, z28.s}, p6, [x29, #21, mul vl]");

  TEST_SINGLE(st3d(ZReg::z31, ZReg::z0, ZReg::z1, PReg::p6, Reg::r29, 0), "st3d {z31.d, z0.d, z1.d}, p6, [x29]");
  TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 0), "st3d {z26.d, z27.d, z28.d}, p6, [x29]");
  TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, -24), "st3d {z26.d, z27.d, z28.d}, p6, [x29, #-24, mul vl]");
  TEST_SINGLE(st3d(ZReg::z26, ZReg::z27, ZReg::z28, PReg::p6, Reg::r29, 21), "st3d {z26.d, z27.d, z28.d}, p6, [x29, #21, mul vl]");

  TEST_SINGLE(st4b(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4b {z31.b, z0.b, z1.b, z2.b}, p6, [x29]");
  TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29]");
  TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, "
                                                                                         "#-32, mul vl]");
  TEST_SINGLE(st4b(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4b {z26.b, z27.b, z28.b, z29.b}, p6, [x29, #28, "
                                                                                        "mul vl]");

  TEST_SINGLE(st4h(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4h {z31.h, z0.h, z1.h, z2.h}, p6, [x29]");
  TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29]");
  TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, "
                                                                                         "#-32, mul vl]");
  TEST_SINGLE(st4h(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4h {z26.h, z27.h, z28.h, z29.h}, p6, [x29, #28, "
                                                                                        "mul vl]");

  TEST_SINGLE(st4w(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4w {z31.s, z0.s, z1.s, z2.s}, p6, [x29]");
  TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29]");
  TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, "
                                                                                         "#-32, mul vl]");
  TEST_SINGLE(st4w(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4w {z26.s, z27.s, z28.s, z29.s}, p6, [x29, #28, "
                                                                                        "mul vl]");

  TEST_SINGLE(st4d(ZReg::z31, ZReg::z0, ZReg::z1, ZReg::z2, PReg::p6, Reg::r29, 0), "st4d {z31.d, z0.d, z1.d, z2.d}, p6, [x29]");
  TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 0), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29]");
  TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, -32), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, "
                                                                                         "#-32, mul vl]");
  TEST_SINGLE(st4d(ZReg::z26, ZReg::z27, ZReg::z28, ZReg::z29, PReg::p6, Reg::r29, 28), "st4d {z26.d, z27.d, z28.d, z29.d}, p6, [x29, #28, "
                                                                                        "mul vl]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE contiguous store (scalar plus immediate)") {
  TEST_SINGLE(st1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1b {z26.b}, p6, [x29]");
  TEST_SINGLE(st1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1b {z26.h}, p6, [x29]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1b {z26.s}, p6, [x29]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1b {z26.d}, p6, [x29]");

  TEST_SINGLE(st1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1b {z26.b}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1b {z26.h}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1b {z26.s}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1b {z26.d}, p6, [x29, #-8, mul vl]");

  TEST_SINGLE(st1b<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.b}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.h}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.s}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1b {z26.d}, p6, [x29, #7, mul vl]");

  // TEST_SINGLE(st1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.b}, p6, [x29]");
  TEST_SINGLE(st1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.h}, p6, [x29]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.s}, p6, [x29]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1h {z26.d}, p6, [x29]");

  // TEST_SINGLE(st1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.b}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.h}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.s}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1h {z26.d}, p6, [x29, #-8, mul vl]");

  // TEST_SINGLE(st1h<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.b}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.h}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.s}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1h {z26.d}, p6, [x29, #7, mul vl]");

  // TEST_SINGLE(st1w<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.b}, p6, [x29]");
  // TEST_SINGLE(st1w<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.h}, p6, [x29]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.s}, p6, [x29]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 0), "st1w {z26.d}, p6, [x29]");

  // TEST_SINGLE(st1w<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.b}, p6, [x29, #-8, mul vl]");
  // TEST_SINGLE(st1w<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.h}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.s}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, -8), "st1w {z26.d}, p6, [x29, #-8, mul vl]");

  // TEST_SINGLE(st1w<SubRegSize::i8Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.b}, p6, [x29, #7, mul vl]");
  // TEST_SINGLE(st1w<SubRegSize::i16Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.h}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.s}, p6, [x29, #7, mul vl]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z26, PReg::p6, Reg::r29, 7), "st1w {z26.d}, p6, [x29, #7, mul vl]");

  TEST_SINGLE(st1d(ZReg::z26, PReg::p6, Reg::r29, 0), "st1d {z26.d}, p6, [x29]");
  TEST_SINGLE(st1d(ZReg::z26, PReg::p6, Reg::r29, -8), "st1d {z26.d}, p6, [x29, #-8, mul vl]");
  TEST_SINGLE(st1d(ZReg::z26, PReg::p6, Reg::r29, 7), "st1d {z26.d}, p6, [x29, #7, mul vl]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Scatters") {
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1b {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, uxtw]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1b {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, sxtw]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1b {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, uxtw]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1b {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, sxtw]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1b {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d]");

  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1b {z30.s}, p6, [z31.s]");
  TEST_SINGLE(st1b<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), "st1b {z30.s}, p6, [z31.s, #31]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1b {z30.d}, p6, [z31.d]");
  TEST_SINGLE(st1b<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 31)), "st1b {z30.d}, p6, [z31.d, #31]");

  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), "st1h {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, uxtw "
                                                                                                                           "#1]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), "st1h {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, sxtw "
                                                                                                                           "#1]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 1)), "st1h {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, uxtw "
                                                                                                                           "#1]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 1)), "st1h {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, sxtw "
                                                                                                                           "#1]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 1)), "st1h {z30.d}, "
                                                                                                                          "p6, [x30, "
                                                                                                                          "z31.d, lsl #1]");

  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1h {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, uxtw]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1h {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, sxtw]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1h {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, uxtw]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1h {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, sxtw]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1h {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d]");

  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1h {z30.s}, p6, [z31.s]");
  TEST_SINGLE(st1h<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), "st1h {z30.s}, p6, [z31.s, #62]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1h {z30.d}, p6, [z31.d]");
  TEST_SINGLE(st1h<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 62)), "st1h {z30.d}, p6, [z31.d, #62]");

  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "st1w {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, uxtw "
                                                                                                                           "#2]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "st1w {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, sxtw "
                                                                                                                           "#2]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 2)), "st1w {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, uxtw "
                                                                                                                           "#2]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 2)), "st1w {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, sxtw "
                                                                                                                           "#2]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 2)), "st1w {z30.d}, "
                                                                                                                          "p6, [x30, "
                                                                                                                          "z31.d, lsl #2]");

  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1w {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, uxtw]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1w {z30.s}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.s, sxtw]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1w {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, uxtw]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1w {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d, sxtw]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1w {z30.d}, "
                                                                                                                           "p6, [x30, "
                                                                                                                           "z31.d]");

  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1w {z30.s}, p6, [z31.s]");
  TEST_SINGLE(st1w<SubRegSize::i32Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), "st1w {z30.s}, p6, [z31.s, #124]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1w {z30.d}, p6, [z31.d]");
  TEST_SINGLE(st1w<SubRegSize::i64Bit>(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 124)), "st1w {z30.d}, p6, [z31.d, #124]");

  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 3)), "st1d {z30.d}, p6, [x30, z31.d, "
                                                                                                       "uxtw #3]");
  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 3)), "st1d {z30.d}, p6, [x30, z31.d, "
                                                                                                       "sxtw #3]");
  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_LSL, 3)), "st1d {z30.d}, p6, [x30, z31.d, lsl "
                                                                                                      "#3]");

  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_UXTW, 0)), "st1d {z30.d}, p6, [x30, z31.d, "
                                                                                                       "uxtw]");
  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_SXTW, 0)), "st1d {z30.d}, p6, [x30, z31.d, "
                                                                                                       "sxtw]");
  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(XReg::x30, ZReg::z31, SVEModType::MOD_NONE, 0)), "st1d {z30.d}, p6, [x30, z31.d]");

  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 0)), "st1d {z30.d}, p6, [z31.d]");
  TEST_SINGLE(st1d(ZReg::z30, PReg::p6, SVEMemOperand(ZReg::z31, 248)), "st1d {z30.d}, p6, [z31.d, #248]");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: SVE: SVE Unsized Stores") {
  TEST_SINGLE(str(PReg::p6, XReg::x29, 0), "str p6, [x29]");
  TEST_SINGLE(str(PReg::p6, XReg::x29, -256), "str p6, [x29, #-256, mul vl]");
  TEST_SINGLE(str(PReg::p6, XReg::x29, 255), "str p6, [x29, #255, mul vl]");

  TEST_SINGLE(str(ZReg::z30, XReg::x29, 0), "str z30, [x29]");
  TEST_SINGLE(str(ZReg::z30, XReg::x29, -256), "str z30, [x29, #-256, mul vl]");
  TEST_SINGLE(str(ZReg::z30, XReg::x29, 255), "str z30, [x29, #255, mul vl]");
}


================================================
FILE: FEXCore/unittests/Emitter/Scalar_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar copy") {
  TEST_SINGLE(dup(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 0), "mov b30, v29.b[0]");
  TEST_SINGLE(dup(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 15), "mov b30, v29.b[15]");
  TEST_SINGLE(mov(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 0), "mov b30, v29.b[0]");
  TEST_SINGLE(mov(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 15), "mov b30, v29.b[15]");

  TEST_SINGLE(dup(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 0), "mov h30, v29.h[0]");
  TEST_SINGLE(dup(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 7), "mov h30, v29.h[7]");
  TEST_SINGLE(mov(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 0), "mov h30, v29.h[0]");
  TEST_SINGLE(mov(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 7), "mov h30, v29.h[7]");

  TEST_SINGLE(dup(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 0), "mov s30, v29.s[0]");
  TEST_SINGLE(dup(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 3), "mov s30, v29.s[3]");
  TEST_SINGLE(mov(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 0), "mov s30, v29.s[0]");
  TEST_SINGLE(mov(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 3), "mov s30, v29.s[3]");

  TEST_SINGLE(dup(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 0), "mov d30, v29.d[0]");
  TEST_SINGLE(dup(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "mov d30, v29.d[1]");
  TEST_SINGLE(mov(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 0), "mov d30, v29.d[0]");
  TEST_SINGLE(mov(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "mov d30, v29.d[1]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three same FP16") {
  TEST_SINGLE(fmulx(HReg::h30, HReg::h29, HReg::h28), "fmulx h30, h29, h28");
  TEST_SINGLE(fcmeq(HReg::h30, HReg::h29, HReg::h28), "fcmeq h30, h29, h28");
  TEST_SINGLE(frecps(HReg::h30, HReg::h29, HReg::h28), "frecps h30, h29, h28");
  TEST_SINGLE(frsqrts(HReg::h30, HReg::h29, HReg::h28), "frsqrts h30, h29, h28");
  TEST_SINGLE(fcmge(HReg::h30, HReg::h29, HReg::h28), "fcmge h30, h29, h28");
  TEST_SINGLE(facge(HReg::h30, HReg::h29, HReg::h28), "facge h30, h29, h28");
  TEST_SINGLE(fabd(HReg::h30, HReg::h29, HReg::h28), "fabd h30, h29, h28");
  TEST_SINGLE(fcmgt(HReg::h30, HReg::h29, HReg::h28), "fcmgt h30, h29, h28");
  TEST_SINGLE(facgt(HReg::h30, HReg::h29, HReg::h28), "facgt h30, h29, h28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar two-register miscellaneous FP16") {
  TEST_SINGLE(fcvtns(HReg::h30, HReg::h29), "fcvtns h30, h29");
  TEST_SINGLE(fcvtms(HReg::h30, HReg::h29), "fcvtms h30, h29");
  TEST_SINGLE(fcvtas(HReg::h30, HReg::h29), "fcvtas h30, h29");
  TEST_SINGLE(scvtf(HReg::h30, HReg::h29), "scvtf h30, h29");
  TEST_SINGLE(fcmgt(HReg::h30, HReg::h29), "fcmgt h30, h29, #0.0");
  TEST_SINGLE(fcmeq(HReg::h30, HReg::h29), "fcmeq h30, h29, #0.0");
  TEST_SINGLE(fcmlt(HReg::h30, HReg::h29), "fcmlt h30, h29, #0.0");
  TEST_SINGLE(fcvtps(HReg::h30, HReg::h29), "fcvtps h30, h29");
  TEST_SINGLE(fcvtzs(HReg::h30, HReg::h29), "fcvtzs h30, h29");
  TEST_SINGLE(frecpe(HReg::h30, HReg::h29), "frecpe h30, h29");
  TEST_SINGLE(frecpx(HReg::h30, HReg::h29), "frecpx h30, h29");
  TEST_SINGLE(fcvtnu(HReg::h30, HReg::h29), "fcvtnu h30, h29");
  TEST_SINGLE(fcvtmu(HReg::h30, HReg::h29), "fcvtmu h30, h29");
  TEST_SINGLE(fcvtau(HReg::h30, HReg::h29), "fcvtau h30, h29");
  TEST_SINGLE(ucvtf(HReg::h30, HReg::h29), "ucvtf h30, h29");
  TEST_SINGLE(fcmge(HReg::h30, HReg::h29), "fcmge h30, h29, #0.0");
  TEST_SINGLE(fcmle(HReg::h30, HReg::h29), "fcmle h30, h29, #0.0");
  TEST_SINGLE(fcvtpu(HReg::h30, HReg::h29), "fcvtpu h30, h29");
  TEST_SINGLE(fcvtzu(HReg::h30, HReg::h29), "fcvtzu h30, h29");
  TEST_SINGLE(frsqrte(HReg::h30, HReg::h29), "frsqrte h30, h29");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three same extra") {
  TEST_SINGLE(sqrdmlah(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmlah h30, h29, h28");
  TEST_SINGLE(sqrdmlah(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmlah s30, s29, s28");

  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmlsh h30, h29, h28");
  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmlsh s30, s29, s28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar two-register miscellaneous") {
  // Commented out lines showcase unallocated encodings.
  TEST_SINGLE(suqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "suqadd b30, b29");
  TEST_SINGLE(suqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "suqadd h30, h29");
  TEST_SINGLE(suqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "suqadd s30, s29");
  TEST_SINGLE(suqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "suqadd d30, d29");

  TEST_SINGLE(sqabs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqabs b30, b29");
  TEST_SINGLE(sqabs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqabs h30, h29");
  TEST_SINGLE(sqabs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqabs s30, s29");
  TEST_SINGLE(sqabs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqabs d30, d29");

  // TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "cmgt b30, b29, #0");
  // TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmgt h30, h29, #0");
  // TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmgt s30, s29, #0");
  TEST_SINGLE(cmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmgt d30, d29, #0");

  // TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "cmeq b30, b29, #0");
  // TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmeq h30, h29, #0");
  // TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmeq s30, s29, #0");
  TEST_SINGLE(cmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmeq d30, d29, #0");

  // TEST_SINGLE(cmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "cmlt b30, b29, #0");
  // TEST_SINGLE(cmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmlt h30, h29, #0");
  // TEST_SINGLE(cmlt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmlt s30, s29, #0");
  TEST_SINGLE(cmlt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmlt d30, d29, #0");

  // TEST_SINGLE(abs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "abs b30, b29");
  // TEST_SINGLE(abs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "abs h30, h29");
  // TEST_SINGLE(abs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "abs s30, s29");
  TEST_SINGLE(abs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "abs d30, d29");

  TEST_SINGLE(sqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtn b30, h29");
  TEST_SINGLE(sqxtn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqxtn h30, s29");
  TEST_SINGLE(sqxtn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqxtn s30, d29");
  // TEST_SINGLE(sqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtn d30, d29");

  // TEST_SINGLE(fcvtns(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtns b30, b29");
  // TEST_SINGLE(fcvtns(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtns h30, h29");
  TEST_SINGLE(fcvtns(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtns s30, s29");
  TEST_SINGLE(fcvtns(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtns d30, d29");

  // TEST_SINGLE(fcvtms(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtms b30, b29");
  // TEST_SINGLE(fcvtms(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtms h30, h29");
  TEST_SINGLE(fcvtms(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtms s30, s29");
  TEST_SINGLE(fcvtms(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtms d30, d29");

  // TEST_SINGLE(fcvtas(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtas b30, b29");
  // TEST_SINGLE(fcvtas(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtas h30, h29");
  TEST_SINGLE(fcvtas(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtas s30, s29");
  TEST_SINGLE(fcvtas(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtas d30, d29");

  // TEST_SINGLE(scvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "scvtf b30, b29");
  // TEST_SINGLE(scvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "scvtf h30, h29");
  TEST_SINGLE(scvtf(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "scvtf s30, s29");
  TEST_SINGLE(scvtf(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "scvtf d30, d29");

  // TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcmeq b30, b29");
  // TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmeq h30, h29");
  TEST_SINGLE(fcmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmeq s30, s29, #0.0");
  TEST_SINGLE(fcmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmeq d30, d29, #0.0");

  // TEST_SINGLE(fcmlt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcmlt b30, b29");
  // TEST_SINGLE(fcmlt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmlt h30, h29");
  TEST_SINGLE(fcmlt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmlt s30, s29, #0.0");
  TEST_SINGLE(fcmlt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmlt d30, d29, #0.0");

  // TEST_SINGLE(fcvtps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtps b30, b29");
  // TEST_SINGLE(fcvtps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtps h30, h29");
  TEST_SINGLE(fcvtps(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtps s30, s29");
  TEST_SINGLE(fcvtps(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtps d30, d29");

  // TEST_SINGLE(fcvtzs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtzs b30, b29");
  // TEST_SINGLE(fcvtzs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzs h30, h29");
  TEST_SINGLE(fcvtzs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtzs s30, s29");
  TEST_SINGLE(fcvtzs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtzs d30, d29");

  // TEST_SINGLE(frecpe(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "frecpe b30, b29");
  // TEST_SINGLE(frecpe(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpe h30, h29");
  TEST_SINGLE(frecpe(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frecpe s30, s29");
  TEST_SINGLE(frecpe(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frecpe d30, d29");

  // TEST_SINGLE(frecpx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "frecpx b30, b29");
  // TEST_SINGLE(frecpx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frecpx h30, h29");
  TEST_SINGLE(frecpx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frecpx s30, s29");
  TEST_SINGLE(frecpx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frecpx d30, d29");

  TEST_SINGLE(usqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "usqadd b30, b29");
  TEST_SINGLE(usqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "usqadd h30, h29");
  TEST_SINGLE(usqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "usqadd s30, s29");
  TEST_SINGLE(usqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "usqadd d30, d29");

  TEST_SINGLE(sqneg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqneg b30, b29");
  TEST_SINGLE(sqneg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqneg h30, h29");
  TEST_SINGLE(sqneg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqneg s30, s29");
  TEST_SINGLE(sqneg(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqneg d30, d29");

  // TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "cmge b30, b29");
  // TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmge h30, h29");
  // TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmge s30, s29");
  TEST_SINGLE(cmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmge d30, d29, #0");

  // TEST_SINGLE(cmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "cmle b30, b29");
  // TEST_SINGLE(cmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "cmle h30, h29");
  // TEST_SINGLE(cmle(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "cmle s30, s29");
  TEST_SINGLE(cmle(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "cmle d30, d29, #0");

  // TEST_SINGLE(neg(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "neg b30, b29");
  // TEST_SINGLE(neg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "neg h30, h29");
  // TEST_SINGLE(neg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "neg s30, s29");
  TEST_SINGLE(neg(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "neg d30, d29");

  TEST_SINGLE(sqxtun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "sqxtun b30, h29");
  TEST_SINGLE(sqxtun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "sqxtun h30, s29");
  TEST_SINGLE(sqxtun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "sqxtun s30, d29");
  // TEST_SINGLE(sqxtun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "sqxtun d30, d29");

  TEST_SINGLE(uqxtn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "uqxtn b30, h29");
  TEST_SINGLE(uqxtn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "uqxtn h30, s29");
  TEST_SINGLE(uqxtn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "uqxtn s30, d29");
  // TEST_SINGLE(uqxtn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "uqxtn d30, d29");

  // TEST_SINGLE(fcvtxn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtxn b30, b29");
  // TEST_SINGLE(fcvtxn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtxn h30, h29");
  TEST_SINGLE(fcvtxn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtxn s30, d29");
  // TEST_SINGLE(fcvtxn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtxn d30, d29");

  // TEST_SINGLE(fcvtnu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtnu b30, b29");
  // TEST_SINGLE(fcvtnu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtnu h30, h29");
  TEST_SINGLE(fcvtnu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtnu s30, s29");
  TEST_SINGLE(fcvtnu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtnu d30, d29");

  // TEST_SINGLE(fcvtmu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtmu b30, b29");
  // TEST_SINGLE(fcvtmu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtmu h30, h29");
  TEST_SINGLE(fcvtmu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtmu s30, s29");
  TEST_SINGLE(fcvtmu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtmu d30, d29");

  // TEST_SINGLE(fcvtau(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtau b30, b29");
  // TEST_SINGLE(fcvtau(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtau h30, h29");
  TEST_SINGLE(fcvtau(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtau s30, s29");
  TEST_SINGLE(fcvtau(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtau d30, d29");

  // TEST_SINGLE(ucvtf(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "ucvtf b30, b29");
  // TEST_SINGLE(ucvtf(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "ucvtf h30, h29");
  TEST_SINGLE(ucvtf(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "ucvtf s30, s29");
  TEST_SINGLE(ucvtf(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "ucvtf d30, d29");

  // TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcmge b30, b29");
  // TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmge h30, h29");
  TEST_SINGLE(fcmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmge s30, s29, #0.0");
  TEST_SINGLE(fcmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmge d30, d29, #0.0");

  // TEST_SINGLE(fcmle(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcmle b30, b29");
  // TEST_SINGLE(fcmle(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmle h30, h29");
  TEST_SINGLE(fcmle(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmle s30, s29, #0.0");
  TEST_SINGLE(fcmle(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmle d30, d29, #0.0");

  // TEST_SINGLE(fcvtpu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtpu b30, b29");
  // TEST_SINGLE(fcvtpu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtpu h30, h29");
  TEST_SINGLE(fcvtpu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtpu s30, s29");
  TEST_SINGLE(fcvtpu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtpu d30, d29");

  // TEST_SINGLE(fcvtzu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fcvtzu b30, b29");
  // TEST_SINGLE(fcvtzu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcvtzu h30, h29");
  TEST_SINGLE(fcvtzu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcvtzu s30, s29");
  TEST_SINGLE(fcvtzu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcvtzu d30, d29");

  // TEST_SINGLE(frsqrte(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "frsqrte b30, b29");
  // TEST_SINGLE(frsqrte(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frsqrte h30, h29");
  TEST_SINGLE(frsqrte(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frsqrte s30, s29");
  TEST_SINGLE(frsqrte(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frsqrte d30, d29");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar pairwise") {
  // Commented out lines showcase unallocated encodings.
  // TEST_SINGLE(addp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "addp b30, b29");
  // TEST_SINGLE(addp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "addp h30, h29");
  // TEST_SINGLE(addp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "addp s30, s29");
  TEST_SINGLE(addp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "addp d30, v29.2d");

  TEST_SINGLE(fmaxnmp(HReg::h30, HReg::h29), "fmaxnmp h30, v29.2h");
  // TEST_SINGLE(fmaxnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fmaxnmp b30, b29");
  // TEST_SINGLE(fmaxnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxnmp h30, h29");
  TEST_SINGLE(fmaxnmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fmaxnmp s30, v29.2s");
  TEST_SINGLE(fmaxnmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fmaxnmp d30, v29.2d");

  TEST_SINGLE(faddp(HReg::h30, HReg::h29), "faddp h30, v29.2h");
  // TEST_SINGLE(faddp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "faddp b30, b29");
  // TEST_SINGLE(faddp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "faddp h30, h29");
  TEST_SINGLE(faddp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "faddp s30, v29.2s");
  TEST_SINGLE(faddp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "faddp d30, v29.2d");

  TEST_SINGLE(fmaxp(HReg::h30, HReg::h29), "fmaxp h30, v29.2h");
  // TEST_SINGLE(fmaxp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fmaxp b30, b29");
  // TEST_SINGLE(fmaxp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmaxp h30, h29");
  TEST_SINGLE(fmaxp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fmaxp s30, v29.2s");
  TEST_SINGLE(fmaxp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fmaxp d30, v29.2d");

  TEST_SINGLE(fminnmp(HReg::h30, HReg::h29), "fminnmp h30, v29.2h");
  // TEST_SINGLE(fminnmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fminnmp b30, b29");
  // TEST_SINGLE(fminnmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminnmp h30, h29");
  TEST_SINGLE(fminnmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fminnmp s30, v29.2s");
  TEST_SINGLE(fminnmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fminnmp d30, v29.2d");

  TEST_SINGLE(fminp(HReg::h30, HReg::h29), "fminp h30, v29.2h");
  // TEST_SINGLE(fminp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29),  "fminp b30, b29");
  // TEST_SINGLE(fminp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fminp h30, h29");
  TEST_SINGLE(fminp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fminp s30, v29.2s");
  TEST_SINGLE(fminp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fminp d30, v29.2d");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three different") {
  // Commented out lines showcase unallocated encodings.
  // TEST_SINGLE(sqdmlal(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28),  "sqdmlal v30.16b, v29.16b, v28.v16b");
  // TEST_SINGLE(sqdmlal(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal v30.16b, v29.16b, v28.v16b");
  TEST_SINGLE(sqdmlal(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal s30, h29, h28");
  TEST_SINGLE(sqdmlal(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlal d30, s29, s28");

  // TEST_SINGLE(sqdmlsl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28),  "sqdmlsl v30.16b, v29.16b, v28.v16b");
  // TEST_SINGLE(sqdmlsl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl v30.16b, v29.16b, v28.v16b");
  TEST_SINGLE(sqdmlsl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl s30, h29, h28");
  TEST_SINGLE(sqdmlsl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmlsl d30, s29, s28");

  // TEST_SINGLE(sqdmull(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28),  "sqdmull v30.16b, v29.16b, v28.v16b");
  // TEST_SINGLE(sqdmull(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull v30.16b, v29.16b, v28.v16b");
  TEST_SINGLE(sqdmull(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull s30, h29, h28");
  TEST_SINGLE(sqdmull(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmull d30, s29, s28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar three same") {
  TEST_SINGLE(sqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqadd b30, b29, b28");
  TEST_SINGLE(sqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqadd h30, h29, h28");
  TEST_SINGLE(sqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqadd s30, s29, s28");
  TEST_SINGLE(sqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqadd d30, d29, d28");

  TEST_SINGLE(sqsub(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub b30, b29, b28");
  TEST_SINGLE(sqsub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub h30, h29, h28");
  TEST_SINGLE(sqsub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub s30, s29, s28");
  TEST_SINGLE(sqsub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqsub d30, d29, d28");

  // TEST_SINGLE(cmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt b30, b29, b28");
  // TEST_SINGLE(cmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt h30, h29, h28");
  // TEST_SINGLE(cmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt s30, s29, s28");
  TEST_SINGLE(cmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmgt d30, d29, d28");

  // TEST_SINGLE(cmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmge b30, b29, b28");
  // TEST_SINGLE(cmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmge h30, h29, h28");
  // TEST_SINGLE(cmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmge s30, s29, s28");
  TEST_SINGLE(cmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmge d30, d29, d28");

  // TEST_SINGLE(sshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sshl b30, b29, b28");
  // TEST_SINGLE(sshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sshl h30, h29, h28");
  // TEST_SINGLE(sshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sshl s30, s29, s28");
  TEST_SINGLE(sshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sshl d30, d29, d28");

  TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl b30, b29, b28");
  TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl h30, h29, h28");
  TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl s30, s29, s28");
  TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqshl d30, d29, d28");

  // TEST_SINGLE(srshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "srshl b30, b29, b28");
  // TEST_SINGLE(srshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "srshl h30, h29, h28");
  // TEST_SINGLE(srshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "srshl s30, s29, s28");
  TEST_SINGLE(srshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "srshl d30, d29, d28");

  TEST_SINGLE(sqrshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl b30, b29, b28");
  TEST_SINGLE(sqrshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl h30, h29, h28");
  TEST_SINGLE(sqrshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl s30, s29, s28");
  TEST_SINGLE(sqrshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqrshl d30, d29, d28");

  // TEST_SINGLE(add(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "add b30, b29, b28");
  // TEST_SINGLE(add(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "add h30, h29, h28");
  // TEST_SINGLE(add(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "add s30, s29, s28");
  TEST_SINGLE(add(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "add d30, d29, d28");

  // TEST_SINGLE(cmtst(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst b30, b29, b28");
  // TEST_SINGLE(cmtst(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst h30, h29, h28");
  // TEST_SINGLE(cmtst(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst s30, s29, s28");
  TEST_SINGLE(cmtst(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmtst d30, d29, d28");

  // TEST_SINGLE(sqdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh b30, b29, b28");
  TEST_SINGLE(sqdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh h30, h29, h28");
  TEST_SINGLE(sqdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh s30, s29, s28");
  // TEST_SINGLE(sqdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqdmulh d30, d29, d28");

  // TEST_SINGLE(fmulx(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx b30, b29, b28");
  // TEST_SINGLE(fmulx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx h30, h29, h28");
  TEST_SINGLE(fmulx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx s30, s29, s28");
  TEST_SINGLE(fmulx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmulx d30, d29, d28");

  // TEST_SINGLE(fcmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq b30, b29, b28");
  // TEST_SINGLE(fcmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq h30, h29, h28");
  TEST_SINGLE(fcmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq s30, s29, s28");
  TEST_SINGLE(fcmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmeq d30, d29, d28");

  // TEST_SINGLE(frecps(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frecps b30, b29, b28");
  // TEST_SINGLE(frecps(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frecps h30, h29, h28");
  TEST_SINGLE(frecps(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "frecps s30, s29, s28");
  TEST_SINGLE(frecps(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "frecps d30, d29, d28");

  // TEST_SINGLE(frsqrts(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts b30, b29, b28");
  // TEST_SINGLE(frsqrts(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts h30, h29, h28");
  TEST_SINGLE(frsqrts(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts s30, s29, s28");
  TEST_SINGLE(frsqrts(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "frsqrts d30, d29, d28");

  TEST_SINGLE(uqadd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqadd b30, b29, b28");
  TEST_SINGLE(uqadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "uqadd h30, h29, h28");
  TEST_SINGLE(uqadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqadd s30, s29, s28");
  TEST_SINGLE(uqadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqadd d30, d29, d28");

  TEST_SINGLE(uqsub(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub b30, b29, b28");
  TEST_SINGLE(uqsub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub h30, h29, h28");
  TEST_SINGLE(uqsub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub s30, s29, s28");
  TEST_SINGLE(uqsub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqsub d30, d29, d28");

  // TEST_SINGLE(cmhi(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi b30, b29, b28");
  // TEST_SINGLE(cmhi(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi h30, h29, h28");
  // TEST_SINGLE(cmhi(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi s30, s29, s28");
  TEST_SINGLE(cmhi(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmhi d30, d29, d28");

  // TEST_SINGLE(cmhs(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs b30, b29, b28");
  // TEST_SINGLE(cmhs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs h30, h29, h28");
  // TEST_SINGLE(cmhs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs s30, s29, s28");
  TEST_SINGLE(cmhs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmhs d30, d29, d28");

  // TEST_SINGLE(ushl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "ushl b30, b29, b28");
  // TEST_SINGLE(ushl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "ushl h30, h29, h28");
  // TEST_SINGLE(ushl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "ushl s30, s29, s28");
  TEST_SINGLE(ushl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "ushl d30, d29, d28");

  TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl b30, b29, b28");
  TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl h30, h29, h28");
  TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl s30, s29, s28");
  TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqshl d30, d29, d28");

  // TEST_SINGLE(urshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "urshl b30, b29, b28");
  // TEST_SINGLE(urshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "urshl h30, h29, h28");
  // TEST_SINGLE(urshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "urshl s30, s29, s28");
  TEST_SINGLE(urshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "urshl d30, d29, d28");

  TEST_SINGLE(uqrshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl b30, b29, b28");
  TEST_SINGLE(uqrshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl h30, h29, h28");
  TEST_SINGLE(uqrshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl s30, s29, s28");
  TEST_SINGLE(uqrshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "uqrshl d30, d29, d28");

  // TEST_SINGLE(sub(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sub b30, b29, b28");
  // TEST_SINGLE(sub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sub h30, h29, h28");
  // TEST_SINGLE(sub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sub s30, s29, s28");
  TEST_SINGLE(sub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sub d30, d29, d28");

  // TEST_SINGLE(cmeq(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq b30, b29, b28");
  // TEST_SINGLE(cmeq(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq h30, h29, h28");
  // TEST_SINGLE(cmeq(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq s30, s29, s28");
  TEST_SINGLE(cmeq(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "cmeq d30, d29, d28");

  // TEST_SINGLE(sqrdmulh(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh b30, b29, b28");
  TEST_SINGLE(sqrdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh h30, h29, h28");
  TEST_SINGLE(sqrdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh s30, s29, s28");
  // TEST_SINGLE(sqrdmulh(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "sqrdmulh d30, d29, d28");

  // TEST_SINGLE(fcmge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge b30, b29, b28");
  // TEST_SINGLE(fcmge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge h30, h29, h28");
  TEST_SINGLE(fcmge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge s30, s29, s28");
  TEST_SINGLE(fcmge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmge d30, d29, d28");

  // TEST_SINGLE(facge(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facge b30, b29, b28");
  // TEST_SINGLE(facge(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facge h30, h29, h28");
  TEST_SINGLE(facge(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "facge s30, s29, s28");
  TEST_SINGLE(facge(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "facge d30, d29, d28");

  // TEST_SINGLE(fabd(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fabd b30, b29, b28");
  // TEST_SINGLE(fabd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fabd h30, h29, h28");
  TEST_SINGLE(fabd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fabd s30, s29, s28");
  TEST_SINGLE(fabd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fabd d30, d29, d28");

  // TEST_SINGLE(fcmgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt b30, b29, b28");
  // TEST_SINGLE(fcmgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt h30, h29, h28");
  TEST_SINGLE(fcmgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt s30, s29, s28");
  TEST_SINGLE(fcmgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fcmgt d30, d29, d28");

  // TEST_SINGLE(facgt(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, VReg::v28), "facgt b30, b29, b28");
  // TEST_SINGLE(facgt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "facgt h30, h29, h28");
  TEST_SINGLE(facgt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "facgt s30, s29, s28");
  TEST_SINGLE(facgt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "facgt d30, d29, d28");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar shift by immediate") {
  // TODO: Implement `UCVTF, FCVTZU' in emitter
  // TEST_SINGLE(sshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "sshr b30, b29, #1");
  // TEST_SINGLE(sshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "sshr b30, b29, #7");
  // TEST_SINGLE(sshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "sshr h30, h29, #1");
  // TEST_SINGLE(sshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sshr h30, h29, #15");
  // TEST_SINGLE(sshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "sshr s30, s29, #1");
  // TEST_SINGLE(sshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sshr s30, s29, #31");
  TEST_SINGLE(sshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sshr d30, d29, #1");
  TEST_SINGLE(sshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sshr d30, d29, #63");

  // TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "ssra b30, b29, #1");
  // TEST_SINGLE(ssra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "ssra b30, b29, #7");
  // TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "ssra h30, h29, #1");
  // TEST_SINGLE(ssra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ssra h30, h29, #15");
  // TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "ssra s30, s29, #1");
  // TEST_SINGLE(ssra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ssra s30, s29, #31");
  TEST_SINGLE(ssra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ssra d30, d29, #1");
  TEST_SINGLE(ssra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ssra d30, d29, #63");

  // TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "srshr b30, b29, #1");
  // TEST_SINGLE(srshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "srshr b30, b29, #7");
  // TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "srshr h30, h29, #1");
  // TEST_SINGLE(srshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srshr h30, h29, #15");
  // TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "srshr s30, s29, #1");
  // TEST_SINGLE(srshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srshr s30, s29, #31");
  TEST_SINGLE(srshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srshr d30, d29, #1");
  TEST_SINGLE(srshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "srshr d30, d29, #63");

  // TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "srsra b30, b29, #1");
  // TEST_SINGLE(srsra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "srsra b30, b29, #7");
  // TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "srsra h30, h29, #1");
  // TEST_SINGLE(srsra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "srsra h30, h29, #15");
  // TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "srsra s30, s29, #1");
  // TEST_SINGLE(srsra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "srsra s30, s29, #31");
  TEST_SINGLE(srsra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "srsra d30, d29, #1");
  TEST_SINGLE(srsra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "srsra d30, d29, #63");

  // TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "shl b30, b29, #1");
  // TEST_SINGLE(shl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "shl b30, b29, #7");
  // TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "shl h30, h29, #1");
  // TEST_SINGLE(shl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "shl h30, h29, #15");
  // TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "shl s30, s29, #1");
  // TEST_SINGLE(shl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "shl s30, s29, #31");
  TEST_SINGLE(shl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "shl d30, d29, #1");
  TEST_SINGLE(shl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "shl d30, d29, #63");

  TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshl b30, b29, #1");
  TEST_SINGLE(sqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshl b30, b29, #7");
  TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshl h30, h29, #1");
  TEST_SINGLE(sqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshl h30, h29, #15");
  TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshl s30, s29, #1");
  TEST_SINGLE(sqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshl s30, s29, #31");
  TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshl d30, d29, #1");
  TEST_SINGLE(sqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshl d30, d29, #63");

  TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrn b30, h29, #1");
  TEST_SINGLE(sqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrn b30, h29, #7");
  TEST_SINGLE(sqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrn h30, s29, #1");
  TEST_SINGLE(sqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshrn h30, s29, #15");
  TEST_SINGLE(sqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrn s30, d29, #1");
  TEST_SINGLE(sqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshrn s30, d29, #31");
  // TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "sqshrn d30, d29, #1");
  // TEST_SINGLE(sqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrn d30, d29, #63");

  TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrn b30, h29, #1");
  TEST_SINGLE(sqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrn b30, h29, #7");
  TEST_SINGLE(sqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrn h30, s29, #1");
  TEST_SINGLE(sqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqrshrn h30, s29, #15");
  TEST_SINGLE(sqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrn s30, d29, #1");
  TEST_SINGLE(sqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqrshrn s30, d29, #31");
  // TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "sqrshrn d30, d29, #1");
  // TEST_SINGLE(sqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrn d30, d29, #63");

  // TODO: Implement `SCVTF, FCVTZS` in emitter
  // TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "ushr b30, b29, #1");
  // TEST_SINGLE(ushr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "ushr b30, b29, #7");
  // TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "ushr h30, h29, #1");
  // TEST_SINGLE(ushr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ushr h30, h29, #15");
  // TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "ushr s30, s29, #1");
  // TEST_SINGLE(ushr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ushr s30, s29, #31");
  TEST_SINGLE(ushr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ushr d30, d29, #1");
  TEST_SINGLE(ushr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ushr d30, d29, #63");

  // TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "usra b30, b29, #1");
  // TEST_SINGLE(usra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "usra b30, b29, #7");
  // TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "usra h30, h29, #1");
  // TEST_SINGLE(usra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "usra h30, h29, #15");
  // TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "usra s30, s29, #1");
  // TEST_SINGLE(usra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "usra s30, s29, #31");
  TEST_SINGLE(usra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "usra d30, d29, #1");
  TEST_SINGLE(usra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "usra d30, d29, #63");

  // TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "urshr b30, b29, #1");
  // TEST_SINGLE(urshr(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "urshr b30, b29, #7");
  // TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "urshr h30, h29, #1");
  // TEST_SINGLE(urshr(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "urshr h30, h29, #15");
  // TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "urshr s30, s29, #1");
  // TEST_SINGLE(urshr(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "urshr s30, s29, #31");
  TEST_SINGLE(urshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "urshr d30, d29, #1");
  TEST_SINGLE(urshr(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "urshr d30, d29, #63");

  // TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "ursra b30, b29, #1");
  // TEST_SINGLE(ursra(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "ursra b30, b29, #7");
  // TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "ursra h30, h29, #1");
  // TEST_SINGLE(ursra(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "ursra h30, h29, #15");
  // TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "ursra s30, s29, #1");
  // TEST_SINGLE(ursra(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "ursra s30, s29, #31");
  TEST_SINGLE(ursra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "ursra d30, d29, #1");
  TEST_SINGLE(ursra(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "ursra d30, d29, #63");

  // TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "sri b30, b29, #1");
  // TEST_SINGLE(sri(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "sri b30, b29, #7");
  // TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "sri h30, h29, #1");
  // TEST_SINGLE(sri(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sri h30, h29, #15");
  // TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "sri s30, s29, #1");
  // TEST_SINGLE(sri(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sri s30, s29, #31");
  TEST_SINGLE(sri(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sri d30, d29, #1");
  TEST_SINGLE(sri(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sri d30, d29, #63");

  // TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1),   "sli b30, b29, #1");
  // TEST_SINGLE(sli(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7),   "sli b30, b29, #7");
  // TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1),  "sli h30, h29, #1");
  // TEST_SINGLE(sli(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sli h30, h29, #15");
  // TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1),  "sli s30, s29, #1");
  // TEST_SINGLE(sli(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sli s30, s29, #31");
  TEST_SINGLE(sli(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sli d30, d29, #1");
  TEST_SINGLE(sli(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sli d30, d29, #63");

  TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshlu b30, b29, #1");
  TEST_SINGLE(sqshlu(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshlu b30, b29, #7");
  TEST_SINGLE(sqshlu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshlu h30, h29, #1");
  TEST_SINGLE(sqshlu(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshlu h30, h29, #15");
  TEST_SINGLE(sqshlu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshlu s30, s29, #1");
  TEST_SINGLE(sqshlu(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshlu s30, s29, #31");
  TEST_SINGLE(sqshlu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "sqshlu d30, d29, #1");
  TEST_SINGLE(sqshlu(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshlu d30, d29, #63");

  TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshl b30, b29, #1");
  TEST_SINGLE(uqshl(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshl b30, b29, #7");
  TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshl h30, h29, #1");
  TEST_SINGLE(uqshl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqshl h30, h29, #15");
  TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshl s30, s29, #1");
  TEST_SINGLE(uqshl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqshl s30, s29, #31");
  TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1), "uqshl d30, d29, #1");
  TEST_SINGLE(uqshl(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqshl d30, d29, #63");

  TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqshrun b30, h29, #1");
  TEST_SINGLE(sqshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqshrun b30, h29, #7");
  TEST_SINGLE(sqshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqshrun h30, s29, #1");
  TEST_SINGLE(sqshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqshrun h30, s29, #15");
  TEST_SINGLE(sqshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqshrun s30, d29, #1");
  TEST_SINGLE(sqshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqshrun s30, d29, #31");
  // TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "sqshrun d30, d29, #1");
  // TEST_SINGLE(sqshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqshrun d30, d29, #63");

  TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "sqrshrun b30, h29, #1");
  TEST_SINGLE(sqrshrun(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "sqrshrun b30, h29, #7");
  TEST_SINGLE(sqrshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "sqrshrun h30, s29, #1");
  TEST_SINGLE(sqrshrun(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "sqrshrun h30, s29, #15");
  TEST_SINGLE(sqrshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "sqrshrun s30, d29, #1");
  TEST_SINGLE(sqrshrun(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "sqrshrun s30, d29, #31");
  // TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "sqrshrun d30, d29, #1");
  // TEST_SINGLE(sqrshrun(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "sqrshrun d30, d29, #63");

  TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqshrn b30, h29, #1");
  TEST_SINGLE(uqshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqshrn b30, h29, #7");
  TEST_SINGLE(uqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqshrn h30, s29, #1");
  TEST_SINGLE(uqshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqshrn h30, s29, #15");
  TEST_SINGLE(uqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqshrn s30, d29, #1");
  TEST_SINGLE(uqshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqshrn s30, d29, #31");
  // TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "uqshrn d30, d29, #1");
  // TEST_SINGLE(uqshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqshrn d30, d29, #63");

  TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 1), "uqrshrn b30, h29, #1");
  TEST_SINGLE(uqrshrn(ScalarRegSize::i8Bit, VReg::v30, VReg::v29, 7), "uqrshrn b30, h29, #7");
  TEST_SINGLE(uqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 1), "uqrshrn h30, s29, #1");
  TEST_SINGLE(uqrshrn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, 15), "uqrshrn h30, s29, #15");
  TEST_SINGLE(uqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 1), "uqrshrn s30, d29, #1");
  TEST_SINGLE(uqrshrn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, 31), "uqrshrn s30, d29, #31");
  // TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 1),  "uqrshrn d30, d29, #1");
  // TEST_SINGLE(uqrshrn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, 63), "uqrshrn d30, d29, #63");

  // TODO: Implement `UCVTF, FCVTZU' in emitter
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Advanced SIMD scalar x indexed element") {
  TEST_SINGLE(sqdmlal(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqdmlal s30, h29, v15.h[4]");
  TEST_SINGLE(sqdmlal(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlal s30, h29, v15.h[7]");
  TEST_SINGLE(sqdmlal(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlal d30, s29, v28.s[0]");
  TEST_SINGLE(sqdmlal(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlal d30, s29, v28.s[3]");

  TEST_SINGLE(sqdmlsl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqdmlsl s30, h29, v15.h[4]");
  TEST_SINGLE(sqdmlsl(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmlsl s30, h29, v15.h[7]");
  TEST_SINGLE(sqdmlsl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmlsl d30, s29, v28.s[0]");
  TEST_SINGLE(sqdmlsl(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmlsl d30, s29, v28.s[3]");

  TEST_SINGLE(sqdmull(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqdmull s30, h29, v15.h[4]");
  TEST_SINGLE(sqdmull(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmull s30, h29, v15.h[7]");
  TEST_SINGLE(sqdmull(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmull d30, s29, v28.s[0]");
  TEST_SINGLE(sqdmull(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmull d30, s29, v28.s[3]");

  TEST_SINGLE(sqdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqdmulh h30, h29, v15.h[4]");
  TEST_SINGLE(sqdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqdmulh h30, h29, v15.h[7]");
  TEST_SINGLE(sqdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqdmulh s30, s29, v28.s[0]");
  TEST_SINGLE(sqdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqdmulh s30, s29, v28.s[3]");

  TEST_SINGLE(sqrdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqrdmulh h30, h29, v15.h[4]");
  TEST_SINGLE(sqrdmulh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqrdmulh h30, h29, v15.h[7]");
  TEST_SINGLE(sqrdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqrdmulh s30, s29, v28.s[0]");
  TEST_SINGLE(sqrdmulh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqrdmulh s30, s29, v28.s[3]");

  TEST_SINGLE(fmla(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "fmla h30, h29, v15.h[4]");
  TEST_SINGLE(fmla(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "fmla h30, h29, v15.h[7]");
  TEST_SINGLE(fmla(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmla s30, s29, v28.s[0]");
  TEST_SINGLE(fmla(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "fmla s30, s29, v28.s[3]");
  TEST_SINGLE(fmla(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmla d30, d29, v28.d[0]");
  TEST_SINGLE(fmla(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 1), "fmla d30, d29, v28.d[1]");

  TEST_SINGLE(fmls(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "fmls h30, h29, v15.h[4]");
  TEST_SINGLE(fmls(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "fmls h30, h29, v15.h[7]");
  TEST_SINGLE(fmls(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmls s30, s29, v28.s[0]");
  TEST_SINGLE(fmls(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "fmls s30, s29, v28.s[3]");
  TEST_SINGLE(fmls(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmls d30, d29, v28.d[0]");
  TEST_SINGLE(fmls(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 1), "fmls d30, d29, v28.d[1]");

  TEST_SINGLE(fmul(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "fmul h30, h29, v15.h[4]");
  TEST_SINGLE(fmul(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "fmul h30, h29, v15.h[7]");
  TEST_SINGLE(fmul(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmul s30, s29, v28.s[0]");
  TEST_SINGLE(fmul(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "fmul s30, s29, v28.s[3]");
  TEST_SINGLE(fmul(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmul d30, d29, v28.d[0]");
  TEST_SINGLE(fmul(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 1), "fmul d30, d29, v28.d[1]");

  TEST_SINGLE(sqrdmlah(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqrdmlah h30, h29, v15.h[4]");
  TEST_SINGLE(sqrdmlah(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqrdmlah h30, h29, v15.h[7]");
  TEST_SINGLE(sqrdmlah(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqrdmlah s30, s29, v28.s[0]");
  TEST_SINGLE(sqrdmlah(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqrdmlah s30, s29, v28.s[3]");

  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "sqrdmlsh h30, h29, v15.h[4]");
  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "sqrdmlsh h30, h29, v15.h[7]");
  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "sqrdmlsh s30, s29, v28.s[0]");
  TEST_SINGLE(sqrdmlsh(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "sqrdmlsh s30, s29, v28.s[3]");

  TEST_SINGLE(fmulx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 4), "fmulx h30, h29, v15.h[4]");
  TEST_SINGLE(fmulx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v15, 7), "fmulx h30, h29, v15.h[7]");
  TEST_SINGLE(fmulx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmulx s30, s29, v28.s[0]");
  TEST_SINGLE(fmulx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28, 3), "fmulx s30, s29, v28.s[3]");
  TEST_SINGLE(fmulx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 0), "fmulx d30, d29, v28.d[0]");
  TEST_SINGLE(fmulx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28, 1), "fmulx d30, d29, v28.d[1]");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-processing (1 source)") {
  TEST_SINGLE(fmov(SReg::s30, SReg::s29), "fmov s30, s29");
  TEST_SINGLE(fabs(SReg::s30, SReg::s29), "fabs s30, s29");
  TEST_SINGLE(fneg(SReg::s30, SReg::s29), "fneg s30, s29");
  TEST_SINGLE(fsqrt(SReg::s30, SReg::s29), "fsqrt s30, s29");
  TEST_SINGLE(fcvt(DReg::d30, SReg::s29), "fcvt d30, s29");
  TEST_SINGLE(fcvt(HReg::h30, SReg::s29), "fcvt h30, s29");
  TEST_SINGLE(frintn(SReg::s30, SReg::s29), "frintn s30, s29");
  TEST_SINGLE(frintp(SReg::s30, SReg::s29), "frintp s30, s29");
  TEST_SINGLE(frintm(SReg::s30, SReg::s29), "frintm s30, s29");
  TEST_SINGLE(frintz(SReg::s30, SReg::s29), "frintz s30, s29");
  TEST_SINGLE(frinta(SReg::s30, SReg::s29), "frinta s30, s29");
  TEST_SINGLE(frintx(SReg::s30, SReg::s29), "frintx s30, s29");
  TEST_SINGLE(frinti(SReg::s30, SReg::s29), "frinti s30, s29");
  TEST_SINGLE(frint32z(SReg::s30, SReg::s29), "frint32z s30, s29");
  TEST_SINGLE(frint32x(SReg::s30, SReg::s29), "frint32x s30, s29");
  TEST_SINGLE(frint64z(SReg::s30, SReg::s29), "frint64z s30, s29");
  TEST_SINGLE(frint64x(SReg::s30, SReg::s29), "frint64x s30, s29");

  TEST_SINGLE(fmov(DReg::d30, DReg::d29), "fmov d30, d29");
  TEST_SINGLE(fabs(DReg::d30, DReg::d29), "fabs d30, d29");
  TEST_SINGLE(fneg(DReg::d30, DReg::d29), "fneg d30, d29");
  TEST_SINGLE(fsqrt(DReg::d30, DReg::d29), "fsqrt d30, d29");
  TEST_SINGLE(fcvt(SReg::s30, DReg::d29), "fcvt s30, d29");
  if (false) {
    // vixl doesn't support this instruction.
    TEST_SINGLE(bfcvt(HReg::h30, SReg::s29), "bfcvt h30, s29");
  }
  TEST_SINGLE(fcvt(HReg::h30, DReg::d29), "fcvt h30, d29");
  TEST_SINGLE(frintn(DReg::d30, DReg::d29), "frintn d30, d29");
  TEST_SINGLE(frintp(DReg::d30, DReg::d29), "frintp d30, d29");
  TEST_SINGLE(frintm(DReg::d30, DReg::d29), "frintm d30, d29");
  TEST_SINGLE(frintz(DReg::d30, DReg::d29), "frintz d30, d29");
  TEST_SINGLE(frinta(DReg::d30, DReg::d29), "frinta d30, d29");
  TEST_SINGLE(frintx(DReg::d30, DReg::d29), "frintx d30, d29");
  TEST_SINGLE(frinti(DReg::d30, DReg::d29), "frinti d30, d29");
  TEST_SINGLE(frint32z(DReg::d30, DReg::d29), "frint32z d30, d29");
  TEST_SINGLE(frint32x(DReg::d30, DReg::d29), "frint32x d30, d29");
  TEST_SINGLE(frint64z(DReg::d30, DReg::d29), "frint64z d30, d29");
  TEST_SINGLE(frint64x(DReg::d30, DReg::d29), "frint64x d30, d29");

  TEST_SINGLE(fmov(HReg::h30, HReg::h29), "fmov h30, h29");
  TEST_SINGLE(fabs(HReg::h30, HReg::h29), "fabs h30, h29");
  TEST_SINGLE(fneg(HReg::h30, HReg::h29), "fneg h30, h29");
  TEST_SINGLE(fsqrt(HReg::h30, HReg::h29), "fsqrt h30, h29");
  TEST_SINGLE(fcvt(SReg::s30, HReg::h29), "fcvt s30, h29");
  TEST_SINGLE(fcvt(DReg::d30, HReg::h29), "fcvt d30, h29");
  TEST_SINGLE(frintn(HReg::h30, HReg::h29), "frintn h30, h29");
  TEST_SINGLE(frintp(HReg::h30, HReg::h29), "frintp h30, h29");
  TEST_SINGLE(frintm(HReg::h30, HReg::h29), "frintm h30, h29");
  TEST_SINGLE(frintz(HReg::h30, HReg::h29), "frintz h30, h29");
  TEST_SINGLE(frinta(HReg::h30, HReg::h29), "frinta h30, h29");
  TEST_SINGLE(frintx(HReg::h30, HReg::h29), "frintx h30, h29");
  TEST_SINGLE(frinti(HReg::h30, HReg::h29), "frinti h30, h29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-processing (1 source sized)") {
  TEST_SINGLE(fmov(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fmov s30, s29");
  TEST_SINGLE(fabs(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fabs s30, s29");
  TEST_SINGLE(fneg(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fneg s30, s29");
  TEST_SINGLE(fsqrt(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fsqrt s30, s29");
  TEST_SINGLE(frintn(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frintn s30, s29");
  TEST_SINGLE(frintp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frintp s30, s29");
  TEST_SINGLE(frintm(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frintm s30, s29");
  TEST_SINGLE(frintz(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frintz s30, s29");
  TEST_SINGLE(frinta(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frinta s30, s29");
  TEST_SINGLE(frintx(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frintx s30, s29");
  TEST_SINGLE(frinti(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frinti s30, s29");
  TEST_SINGLE(frint32z(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frint32z s30, s29");
  TEST_SINGLE(frint32x(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frint32x s30, s29");
  TEST_SINGLE(frint64z(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frint64z s30, s29");
  TEST_SINGLE(frint64x(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "frint64x s30, s29");

  TEST_SINGLE(fmov(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fmov d30, d29");
  TEST_SINGLE(fabs(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fabs d30, d29");
  TEST_SINGLE(fneg(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fneg d30, d29");
  TEST_SINGLE(fsqrt(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fsqrt d30, d29");
  TEST_SINGLE(frintn(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frintn d30, d29");
  TEST_SINGLE(frintp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frintp d30, d29");
  TEST_SINGLE(frintm(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frintm d30, d29");
  TEST_SINGLE(frintz(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frintz d30, d29");
  TEST_SINGLE(frinta(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frinta d30, d29");
  TEST_SINGLE(frintx(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frintx d30, d29");
  TEST_SINGLE(frinti(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frinti d30, d29");
  TEST_SINGLE(frint32z(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frint32z d30, d29");
  TEST_SINGLE(frint32x(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frint32x d30, d29");
  TEST_SINGLE(frint64z(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frint64z d30, d29");
  TEST_SINGLE(frint64x(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "frint64x d30, d29");

  TEST_SINGLE(fmov(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fmov h30, h29");
  TEST_SINGLE(fabs(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fabs h30, h29");
  TEST_SINGLE(fneg(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fneg h30, h29");
  TEST_SINGLE(fsqrt(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fsqrt h30, h29");
  TEST_SINGLE(frintn(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frintn h30, h29");
  TEST_SINGLE(frintp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frintp h30, h29");
  TEST_SINGLE(frintm(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frintm h30, h29");
  TEST_SINGLE(frintz(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frintz h30, h29");
  TEST_SINGLE(frinta(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frinta h30, h29");
  TEST_SINGLE(frintx(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frintx h30, h29");
  TEST_SINGLE(frinti(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "frinti h30, h29");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point compare") {
  // Commented out lines showcase unallocated encodings.
  // TEST_SINGLE(fcmp(ScalarRegSize::i8Bit, VReg::v30, VReg::v29), "fcmp b30, b29");
  TEST_SINGLE(fcmp(ScalarRegSize::i16Bit, VReg::v30, VReg::v29), "fcmp h30, h29");
  TEST_SINGLE(fcmp(ScalarRegSize::i32Bit, VReg::v30, VReg::v29), "fcmp s30, s29");
  TEST_SINGLE(fcmp(ScalarRegSize::i64Bit, VReg::v30, VReg::v29), "fcmp d30, d29");

  TEST_SINGLE(fcmp(SReg::s30, SReg::s29), "fcmp s30, s29");
  TEST_SINGLE(fcmp(SReg::s30), "fcmp s30, #0.0");
  TEST_SINGLE(fcmpe(SReg::s30, SReg::s29), "fcmpe s30, s29");
  TEST_SINGLE(fcmpe(SReg::s30), "fcmpe s30, #0.0");

  TEST_SINGLE(fcmp(DReg::d30, DReg::d29), "fcmp d30, d29");
  TEST_SINGLE(fcmp(DReg::d30), "fcmp d30, #0.0");
  TEST_SINGLE(fcmpe(DReg::d30, DReg::d29), "fcmpe d30, d29");
  TEST_SINGLE(fcmpe(DReg::d30), "fcmpe d30, #0.0");

  TEST_SINGLE(fcmp(HReg::h30, HReg::h29), "fcmp h30, h29");
  TEST_SINGLE(fcmp(HReg::h30), "fcmp h30, #0.0");
  TEST_SINGLE(fcmpe(HReg::h30, HReg::h29), "fcmpe h30, h29");
  TEST_SINGLE(fcmpe(HReg::h30), "fcmpe h30, #0.0");
}

#if TEST_FP16
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point immediate : fp16") {
  TEST_SINGLE(fmov(ScalarRegSize::i16Bit, VReg::v30, 1.0), "fmov h30, #0x70 (1.0000)");
  float Decoding[] = {
    2.000000, 4.000000, 8.000000,  16.000000, 0.125000, 0.250000, 0.500000,  1.000000,  2.125000, 4.250000, 8.500000,  17.000000,
    0.132812, 0.265625, 0.531250,  1.062500,  2.250000, 4.500000, 9.000000,  18.000000, 0.140625, 0.281250, 0.562500,  1.125000,
    2.375000, 4.750000, 9.500000,  19.000000, 0.148438, 0.296875, 0.593750,  1.187500,  2.500000, 5.000000, 10.000000, 20.000000,
    0.156250, 0.312500, 0.625000,  1.250000,  2.625000, 5.250000, 10.500000, 21.000000, 0.164062, 0.328125, 0.656250,  1.312500,
    2.750000, 5.500000, 11.000000, 22.000000, 0.171875, 0.343750, 0.687500,  1.375000,  2.875000, 5.750000, 11.500000, 23.000000,
    0.179688, 0.359375, 0.718750,  1.437500,  3.000000, 6.000000, 12.000000, 24.000000, 0.187500, 0.375000, 0.750000,  1.500000,
    3.125000, 6.250000, 12.500000, 25.000000, 0.195312, 0.390625, 0.781250,  1.562500,  3.250000, 6.500000, 13.000000, 26.000000,
    0.203125, 0.406250, 0.812500,  1.625000,  3.375000, 6.750000, 13.500000, 27.000000, 0.210938, 0.421875, 0.843750,  1.687500,
    3.500000, 7.000000, 14.000000, 28.000000, 0.218750, 0.437500, 0.875000,  1.750000,  3.625000, 7.250000, 14.500000, 29.000000,
    0.226562, 0.453125, 0.906250,  1.812500,  3.750000, 7.500000, 15.000000, 30.000000, 0.234375, 0.468750, 0.937500,  1.875000,
    3.875000, 7.750000, 15.500000, 31.000000, 0.242188, 0.484375, 0.968750,  1.937500,
  };

  const char* DecodingString[] = {
    "fmov h30, #0x0 (2.0000)",  "fmov h30, #0x10 (4.0000)", "fmov h30, #0x20 (8.0000)",  "fmov h30, #0x30 (16.0000)",
    "fmov h30, #0x40 (0.1250)", "fmov h30, #0x50 (0.2500)", "fmov h30, #0x60 (0.5000)",  "fmov h30, #0x70 (1.0000)",
    "fmov h30, #0x1 (2.1250)",  "fmov h30, #0x11 (4.2500)", "fmov h30, #0x21 (8.5000)",  "fmov h30, #0x31 (17.0000)",
    "fmov h30, #0x41 (0.1328)", "fmov h30, #0x51 (0.2656)", "fmov h30, #0x61 (0.5312)",  "fmov h30, #0x71 (1.0625)",
    "fmov h30, #0x2 (2.2500)",  "fmov h30, #0x12 (4.5000)", "fmov h30, #0x22 (9.0000)",  "fmov h30, #0x32 (18.0000)",
    "fmov h30, #0x42 (0.1406)", "fmov h30, #0x52 (0.2812)", "fmov h30, #0x62 (0.5625)",  "fmov h30, #0x72 (1.1250)",
    "fmov h30, #0x3 (2.3750)",  "fmov h30, #0x13 (4.7500)", "fmov h30, #0x23 (9.5000)",  "fmov h30, #0x33 (19.0000)",
    "fmov h30, #0x43 (0.1484)", "fmov h30, #0x53 (0.2969)", "fmov h30, #0x63 (0.5938)",  "fmov h30, #0x73 (1.1875)",
    "fmov h30, #0x4 (2.5000)",  "fmov h30, #0x14 (5.0000)", "fmov h30, #0x24 (10.0000)", "fmov h30, #0x34 (20.0000)",
    "fmov h30, #0x44 (0.1562)", "fmov h30, #0x54 (0.3125)", "fmov h30, #0x64 (0.6250)",  "fmov h30, #0x74 (1.2500)",
    "fmov h30, #0x5 (2.6250)",  "fmov h30, #0x15 (5.2500)", "fmov h30, #0x25 (10.5000)", "fmov h30, #0x35 (21.0000)",
    "fmov h30, #0x45 (0.1641)", "fmov h30, #0x55 (0.3281)", "fmov h30, #0x65 (0.6562)",  "fmov h30, #0x75 (1.3125)",
    "fmov h30, #0x6 (2.7500)",  "fmov h30, #0x16 (5.5000)", "fmov h30, #0x26 (11.0000)", "fmov h30, #0x36 (22.0000)",
    "fmov h30, #0x46 (0.1719)", "fmov h30, #0x56 (0.3438)", "fmov h30, #0x66 (0.6875)",  "fmov h30, #0x76 (1.3750)",
    "fmov h30, #0x7 (2.8750)",  "fmov h30, #0x17 (5.7500)", "fmov h30, #0x27 (11.5000)", "fmov h30, #0x37 (23.0000)",
    "fmov h30, #0x47 (0.1797)", "fmov h30, #0x57 (0.3594)", "fmov h30, #0x67 (0.7188)",  "fmov h30, #0x77 (1.4375)",
    "fmov h30, #0x8 (3.0000)",  "fmov h30, #0x18 (6.0000)", "fmov h30, #0x28 (12.0000)", "fmov h30, #0x38 (24.0000)",
    "fmov h30, #0x48 (0.1875)", "fmov h30, #0x58 (0.3750)", "fmov h30, #0x68 (0.7500)",  "fmov h30, #0x78 (1.5000)",
    "fmov h30, #0x9 (3.1250)",  "fmov h30, #0x19 (6.2500)", "fmov h30, #0x29 (12.5000)", "fmov h30, #0x39 (25.0000)",
    "fmov h30, #0x49 (0.1953)", "fmov h30, #0x59 (0.3906)", "fmov h30, #0x69 (0.7812)",  "fmov h30, #0x79 (1.5625)",
    "fmov h30, #0xa (3.2500)",  "fmov h30, #0x1a (6.5000)", "fmov h30, #0x2a (13.0000)", "fmov h30, #0x3a (26.0000)",
    "fmov h30, #0x4a (0.2031)", "fmov h30, #0x5a (0.4062)", "fmov h30, #0x6a (0.8125)",  "fmov h30, #0x7a (1.6250)",
    "fmov h30, #0xb (3.3750)",  "fmov h30, #0x1b (6.7500)", "fmov h30, #0x2b (13.5000)", "fmov h30, #0x3b (27.0000)",
    "fmov h30, #0x4b (0.2109)", "fmov h30, #0x5b (0.4219)", "fmov h30, #0x6b (0.8438)",  "fmov h30, #0x7b (1.6875)",
    "fmov h30, #0xc (3.5000)",  "fmov h30, #0x1c (7.0000)", "fmov h30, #0x2c (14.0000)", "fmov h30, #0x3c (28.0000)",
    "fmov h30, #0x4c (0.2188)", "fmov h30, #0x5c (0.4375)", "fmov h30, #0x6c (0.8750)",  "fmov h30, #0x7c (1.7500)",
    "fmov h30, #0xd (3.6250)",  "fmov h30, #0x1d (7.2500)", "fmov h30, #0x2d (14.5000)", "fmov h30, #0x3d (29.0000)",
    "fmov h30, #0x4d (0.2266)", "fmov h30, #0x5d (0.4531)", "fmov h30, #0x6d (0.9062)",  "fmov h30, #0x7d (1.8125)",
    "fmov h30, #0xe (3.7500)",  "fmov h30, #0x1e (7.5000)", "fmov h30, #0x2e (15.0000)", "fmov h30, #0x3e (30.0000)",
    "fmov h30, #0x4e (0.2344)", "fmov h30, #0x5e (0.4688)", "fmov h30, #0x6e (0.9375)",  "fmov h30, #0x7e (1.8750)",
    "fmov h30, #0xf (3.8750)",  "fmov h30, #0x1f (7.7500)", "fmov h30, #0x2f (15.5000)", "fmov h30, #0x3f (31.0000)",
    "fmov h30, #0x4f (0.2422)", "fmov h30, #0x5f (0.4844)", "fmov h30, #0x6f (0.9688)",  "fmov h30, #0x7f (1.9375)",
  };

  for (size_t i = 0; i < (sizeof(Decoding) / sizeof(Decoding[0])); ++i) {
    TEST_SINGLE(fmov(ScalarRegSize::i16Bit, VReg::v30, Decoding[i]), DecodingString[i]);
  }
}
#endif

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point immediate") {
  TEST_SINGLE(fmov(ScalarRegSize::i32Bit, VReg::v30, 1.0), "fmov s30, #0x70 (1.0000)");
  TEST_SINGLE(fmov(ScalarRegSize::i64Bit, VReg::v30, 1.0), "fmov d30, #0x70 (1.0000)");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point conditional compare") {
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::None, Condition::CC_AL), "fccmp s30, s29, #nzcv, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_N, Condition::CC_AL), "fccmp s30, s29, #Nzcv, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmp s30, s29, #nZcv, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_C, Condition::CC_AL), "fccmp s30, s29, #nzCv, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_V, Condition::CC_AL), "fccmp s30, s29, #nzcV, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmp s30, s29, #NZCV, al");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::None, Condition::CC_EQ), "fccmp s30, s29, #nzcv, eq");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmp s30, s29, #Nzcv, eq");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmp s30, s29, #nZcv, eq");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmp s30, s29, #nzCv, eq");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmp s30, s29, #nzcV, eq");
  TEST_SINGLE(fccmp(SReg::s30, SReg::s29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmp s30, s29, #NZCV, eq");

  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::None, Condition::CC_AL), "fccmpe s30, s29, #nzcv, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_N, Condition::CC_AL), "fccmpe s30, s29, #Nzcv, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmpe s30, s29, #nZcv, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_C, Condition::CC_AL), "fccmpe s30, s29, #nzCv, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_V, Condition::CC_AL), "fccmpe s30, s29, #nzcV, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmpe s30, s29, #NZCV, al");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::None, Condition::CC_EQ), "fccmpe s30, s29, #nzcv, eq");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmpe s30, s29, #Nzcv, eq");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmpe s30, s29, #nZcv, eq");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmpe s30, s29, #nzCv, eq");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmpe s30, s29, #nzcV, eq");
  TEST_SINGLE(fccmpe(SReg::s30, SReg::s29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmpe s30, s29, #NZCV, eq");

  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::None, Condition::CC_AL), "fccmp d30, d29, #nzcv, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_N, Condition::CC_AL), "fccmp d30, d29, #Nzcv, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmp d30, d29, #nZcv, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_C, Condition::CC_AL), "fccmp d30, d29, #nzCv, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_V, Condition::CC_AL), "fccmp d30, d29, #nzcV, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmp d30, d29, #NZCV, al");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::None, Condition::CC_EQ), "fccmp d30, d29, #nzcv, eq");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmp d30, d29, #Nzcv, eq");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmp d30, d29, #nZcv, eq");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmp d30, d29, #nzCv, eq");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmp d30, d29, #nzcV, eq");
  TEST_SINGLE(fccmp(DReg::d30, DReg::d29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmp d30, d29, #NZCV, eq");

  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::None, Condition::CC_AL), "fccmpe d30, d29, #nzcv, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_N, Condition::CC_AL), "fccmpe d30, d29, #Nzcv, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmpe d30, d29, #nZcv, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_C, Condition::CC_AL), "fccmpe d30, d29, #nzCv, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_V, Condition::CC_AL), "fccmpe d30, d29, #nzcV, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmpe d30, d29, #NZCV, al");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::None, Condition::CC_EQ), "fccmpe d30, d29, #nzcv, eq");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmpe d30, d29, #Nzcv, eq");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmpe d30, d29, #nZcv, eq");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmpe d30, d29, #nzCv, eq");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmpe d30, d29, #nzcV, eq");
  TEST_SINGLE(fccmpe(DReg::d30, DReg::d29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmpe d30, d29, #NZCV, eq");

  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::None, Condition::CC_AL), "fccmp h30, h29, #nzcv, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_N, Condition::CC_AL), "fccmp h30, h29, #Nzcv, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmp h30, h29, #nZcv, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_C, Condition::CC_AL), "fccmp h30, h29, #nzCv, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_V, Condition::CC_AL), "fccmp h30, h29, #nzcV, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmp h30, h29, #NZCV, al");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::None, Condition::CC_EQ), "fccmp h30, h29, #nzcv, eq");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmp h30, h29, #Nzcv, eq");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmp h30, h29, #nZcv, eq");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmp h30, h29, #nzCv, eq");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmp h30, h29, #nzcV, eq");
  TEST_SINGLE(fccmp(HReg::h30, HReg::h29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmp h30, h29, #NZCV, eq");

  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::None, Condition::CC_AL), "fccmpe h30, h29, #nzcv, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_N, Condition::CC_AL), "fccmpe h30, h29, #Nzcv, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_Z, Condition::CC_AL), "fccmpe h30, h29, #nZcv, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_C, Condition::CC_AL), "fccmpe h30, h29, #nzCv, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_V, Condition::CC_AL), "fccmpe h30, h29, #nzcV, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_NZCV, Condition::CC_AL), "fccmpe h30, h29, #NZCV, al");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::None, Condition::CC_EQ), "fccmpe h30, h29, #nzcv, eq");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_N, Condition::CC_EQ), "fccmpe h30, h29, #Nzcv, eq");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_Z, Condition::CC_EQ), "fccmpe h30, h29, #nZcv, eq");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_C, Condition::CC_EQ), "fccmpe h30, h29, #nzCv, eq");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_V, Condition::CC_EQ), "fccmpe h30, h29, #nzcV, eq");
  TEST_SINGLE(fccmpe(HReg::h30, HReg::h29, StatusFlags::Flag_NZCV, Condition::CC_EQ), "fccmpe h30, h29, #NZCV, eq");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-processing (2 source)") {
  TEST_SINGLE(fmul(SReg::s30, SReg::s29, SReg::s28), "fmul s30, s29, s28");
  TEST_SINGLE(fdiv(SReg::s30, SReg::s29, SReg::s28), "fdiv s30, s29, s28");
  TEST_SINGLE(fadd(SReg::s30, SReg::s29, SReg::s28), "fadd s30, s29, s28");
  TEST_SINGLE(fsub(SReg::s30, SReg::s29, SReg::s28), "fsub s30, s29, s28");
  TEST_SINGLE(fmax(SReg::s30, SReg::s29, SReg::s28), "fmax s30, s29, s28");
  TEST_SINGLE(fmin(SReg::s30, SReg::s29, SReg::s28), "fmin s30, s29, s28");
  TEST_SINGLE(fmaxnm(SReg::s30, SReg::s29, SReg::s28), "fmaxnm s30, s29, s28");
  TEST_SINGLE(fminnm(SReg::s30, SReg::s29, SReg::s28), "fminnm s30, s29, s28");
  TEST_SINGLE(fnmul(SReg::s30, SReg::s29, SReg::s28), "fnmul s30, s29, s28");

  TEST_SINGLE(fmul(DReg::d30, DReg::d29, DReg::d28), "fmul d30, d29, d28");
  TEST_SINGLE(fdiv(DReg::d30, DReg::d29, DReg::d28), "fdiv d30, d29, d28");
  TEST_SINGLE(fadd(DReg::d30, DReg::d29, DReg::d28), "fadd d30, d29, d28");
  TEST_SINGLE(fsub(DReg::d30, DReg::d29, DReg::d28), "fsub d30, d29, d28");
  TEST_SINGLE(fmax(DReg::d30, DReg::d29, DReg::d28), "fmax d30, d29, d28");
  TEST_SINGLE(fmin(DReg::d30, DReg::d29, DReg::d28), "fmin d30, d29, d28");
  TEST_SINGLE(fmaxnm(DReg::d30, DReg::d29, DReg::d28), "fmaxnm d30, d29, d28");
  TEST_SINGLE(fminnm(DReg::d30, DReg::d29, DReg::d28), "fminnm d30, d29, d28");
  TEST_SINGLE(fnmul(DReg::d30, DReg::d29, DReg::d28), "fnmul d30, d29, d28");

  TEST_SINGLE(fmul(HReg::h30, HReg::h29, HReg::h28), "fmul h30, h29, h28");
  TEST_SINGLE(fdiv(HReg::h30, HReg::h29, HReg::h28), "fdiv h30, h29, h28");
  TEST_SINGLE(fadd(HReg::h30, HReg::h29, HReg::h28), "fadd h30, h29, h28");
  TEST_SINGLE(fsub(HReg::h30, HReg::h29, HReg::h28), "fsub h30, h29, h28");
  TEST_SINGLE(fmax(HReg::h30, HReg::h29, HReg::h28), "fmax h30, h29, h28");
  TEST_SINGLE(fmin(HReg::h30, HReg::h29, HReg::h28), "fmin h30, h29, h28");
  TEST_SINGLE(fmaxnm(HReg::h30, HReg::h29, HReg::h28), "fmaxnm h30, h29, h28");
  TEST_SINGLE(fminnm(HReg::h30, HReg::h29, HReg::h28), "fminnm h30, h29, h28");
  TEST_SINGLE(fnmul(HReg::h30, HReg::h29, HReg::h28), "fnmul h30, h29, h28");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-processing (2 source sized)") {
  TEST_SINGLE(fmul(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmul s30, s29, s28");
  TEST_SINGLE(fdiv(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fdiv s30, s29, s28");
  TEST_SINGLE(fadd(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fadd s30, s29, s28");
  TEST_SINGLE(fsub(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fsub s30, s29, s28");
  TEST_SINGLE(fmax(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmax s30, s29, s28");
  TEST_SINGLE(fmin(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmin s30, s29, s28");
  TEST_SINGLE(fmaxnm(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fmaxnm s30, s29, s28");
  TEST_SINGLE(fminnm(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fminnm s30, s29, s28");
  TEST_SINGLE(fnmul(ScalarRegSize::i32Bit, VReg::v30, VReg::v29, VReg::v28), "fnmul s30, s29, s28");

  TEST_SINGLE(fmul(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmul d30, d29, d28");
  TEST_SINGLE(fdiv(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fdiv d30, d29, d28");
  TEST_SINGLE(fadd(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fadd d30, d29, d28");
  TEST_SINGLE(fsub(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fsub d30, d29, d28");
  TEST_SINGLE(fmax(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmax d30, d29, d28");
  TEST_SINGLE(fmin(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmin d30, d29, d28");
  TEST_SINGLE(fmaxnm(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fmaxnm d30, d29, d28");
  TEST_SINGLE(fminnm(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fminnm d30, d29, d28");
  TEST_SINGLE(fnmul(ScalarRegSize::i64Bit, VReg::v30, VReg::v29, VReg::v28), "fnmul d30, d29, d28");

  TEST_SINGLE(fmul(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmul h30, h29, h28");
  TEST_SINGLE(fdiv(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fdiv h30, h29, h28");
  TEST_SINGLE(fadd(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fadd h30, h29, h28");
  TEST_SINGLE(fsub(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fsub h30, h29, h28");
  TEST_SINGLE(fmax(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmax h30, h29, h28");
  TEST_SINGLE(fmin(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmin h30, h29, h28");
  TEST_SINGLE(fmaxnm(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fmaxnm h30, h29, h28");
  TEST_SINGLE(fminnm(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fminnm h30, h29, h28");
  TEST_SINGLE(fnmul(ScalarRegSize::i16Bit, VReg::v30, VReg::v29, VReg::v28), "fnmul h30, h29, h28");
}

TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point conditional select") {
  TEST_SINGLE(fcsel(SReg::s30, SReg::s29, SReg::s28, Condition::CC_AL), "fcsel s30, s29, s28, al");
  TEST_SINGLE(fcsel(SReg::s30, SReg::s29, SReg::s28, Condition::CC_EQ), "fcsel s30, s29, s28, eq");

  TEST_SINGLE(fcsel(DReg::d30, DReg::d29, DReg::d28, Condition::CC_AL), "fcsel d30, d29, d28, al");
  TEST_SINGLE(fcsel(DReg::d30, DReg::d29, DReg::d28, Condition::CC_EQ), "fcsel d30, d29, d28, eq");

  TEST_SINGLE(fcsel(HReg::h30, HReg::h29, HReg::h28, Condition::CC_AL), "fcsel h30, h29, h28, al");
  TEST_SINGLE(fcsel(HReg::h30, HReg::h29, HReg::h28, Condition::CC_EQ), "fcsel h30, h29, h28, eq");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: Scalar: Floating-point data-processing (3 source)") {
  TEST_SINGLE(fmadd(SReg::s30, SReg::s29, SReg::s28, SReg::s27), "fmadd s30, s29, s28, s27");
  TEST_SINGLE(fmsub(SReg::s30, SReg::s29, SReg::s28, SReg::s27), "fmsub s30, s29, s28, s27");
  TEST_SINGLE(fnmadd(SReg::s30, SReg::s29, SReg::s28, SReg::s27), "fnmadd s30, s29, s28, s27");
  TEST_SINGLE(fnmsub(SReg::s30, SReg::s29, SReg::s28, SReg::s27), "fnmsub s30, s29, s28, s27");

  TEST_SINGLE(fmadd(DReg::d30, DReg::d29, DReg::d28, DReg::d27), "fmadd d30, d29, d28, d27");
  TEST_SINGLE(fmsub(DReg::d30, DReg::d29, DReg::d28, DReg::d27), "fmsub d30, d29, d28, d27");
  TEST_SINGLE(fnmadd(DReg::d30, DReg::d29, DReg::d28, DReg::d27), "fnmadd d30, d29, d28, d27");
  TEST_SINGLE(fnmsub(DReg::d30, DReg::d29, DReg::d28, DReg::d27), "fnmsub d30, d29, d28, d27");

  TEST_SINGLE(fmadd(HReg::h30, HReg::h29, HReg::h28, HReg::h27), "fmadd h30, h29, h28, h27");
  TEST_SINGLE(fmsub(HReg::h30, HReg::h29, HReg::h28, HReg::h27), "fmsub h30, h29, h28, h27");
  TEST_SINGLE(fnmadd(HReg::h30, HReg::h29, HReg::h28, HReg::h27), "fnmadd h30, h29, h28, h27");
  TEST_SINGLE(fnmsub(HReg::h30, HReg::h29, HReg::h28, HReg::h27), "fnmsub h30, h29, h28, h27");
}


================================================
FILE: FEXCore/unittests/Emitter/System_Tests.cpp
================================================
// SPDX-License-Identifier: MIT
#include "TestDisassembler.h"

#include <catch2/catch_test_macros.hpp>
#include <fcntl.h>

using namespace ARMEmitter;

TEST_CASE_METHOD(TestDisassembler, "Emitter: Reserved") {
  TEST_SINGLE(udf(0), "udf #0x0");
  TEST_SINGLE(udf(0xFFFF), "udf #0xffff");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System with result") {
  // TODO: Implement in emitter.
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System Instruction") {
  // TODO: AT
  // TODO: CFP
  // TODO: CPP
  // vixl doesn't understand a bunch of data cache operation names.
  TEST_SINGLE(dc(DataCacheOperation::IVAC, Reg::r30), "sys #0, C7, C6, #1, x30");
  TEST_SINGLE(dc(DataCacheOperation::ISW, Reg::r30), "sys #0, C7, C6, #2, x30");
  TEST_SINGLE(dc(DataCacheOperation::CSW, Reg::r30), "sys #0, C7, C10, #2, x30");
  TEST_SINGLE(dc(DataCacheOperation::CISW, Reg::r30), "sys #0, C7, C14, #2, x30");
  TEST_SINGLE(dc(DataCacheOperation::ZVA, Reg::r30), "dc zva, x30");
  TEST_SINGLE(dc(DataCacheOperation::CVAC, Reg::r30), "dc cvac, x30");
  TEST_SINGLE(dc(DataCacheOperation::CVAU, Reg::r30), "dc cvau, x30");
  TEST_SINGLE(dc(DataCacheOperation::CIVAC, Reg::r30), "dc civac, x30");

  TEST_SINGLE(dc(DataCacheOperation::IGVAC, Reg::r30), "sys #0, C7, C6, #3, x30");
  TEST_SINGLE(dc(DataCacheOperation::IGSW, Reg::r30), "sys #0, C7, C6, #4, x30");
  TEST_SINGLE(dc(DataCacheOperation::IGDVAC, Reg::r30), "sys #0, C7, C6, #5, x30");
  TEST_SINGLE(dc(DataCacheOperation::IGDSW, Reg::r30), "sys #0, C7, C6, #6, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGSW, Reg::r30), "sys #0, C7, C10, #4, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGDSW, Reg::r30), "sys #0, C7, C10, #6, x30");
  TEST_SINGLE(dc(DataCacheOperation::CIGSW, Reg::r30), "sys #0, C7, C14, #4, x30");
  TEST_SINGLE(dc(DataCacheOperation::CIGDSW, Reg::r30), "sys #0, C7, C14, #6, x30");

  TEST_SINGLE(dc(DataCacheOperation::GVA, Reg::r30), "dc gva, x30");
  TEST_SINGLE(dc(DataCacheOperation::GZVA, Reg::r30), "dc gzva, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGVAC, Reg::r30), "dc cgvac, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGDVAC, Reg::r30), "dc cgdvac, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGVAP, Reg::r30), "dc cgvap, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGDVAP, Reg::r30), "dc cgdvap, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGVADP, Reg::r30), "sys #3, C7, C13, #3, x30");
  TEST_SINGLE(dc(DataCacheOperation::CGDVADP, Reg::r30), "sys #3, C7, C13, #5, x30");
  TEST_SINGLE(dc(DataCacheOperation::CIGVAC, Reg::r30), "dc cigvac, x30");
  TEST_SINGLE(dc(DataCacheOperation::CIGDVAC, Reg::r30), "dc cigdvac, x30");

  TEST_SINGLE(dc(DataCacheOperation::CVAP, Reg::r30), "dc cvap, x30");

  TEST_SINGLE(dc(DataCacheOperation::CVADP, Reg::r30), "dc cvadp, x30");

  // TODO: DVP
  // TODO: IC
  // TODO: TLBI
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: Exception generation") {
  TEST_SINGLE(svc(65535), "svc #0xffff");
  TEST_SINGLE(hvc(65535), "hvc #0xffff");
  TEST_SINGLE(smc(65535), "smc #0xffff");
  TEST_SINGLE(brk(65535), "brk #0xffff");
  TEST_SINGLE(hlt(65535), "hlt #0xffff");
  TEST_SINGLE(tcancel(65535), "unimplemented (Unimplemented)");
  TEST_SINGLE(dcps1(65535), "dcps1 {#0xffff}");
  TEST_SINGLE(dcps2(65535), "dcps2 {#0xffff}");
  TEST_SINGLE(dcps3(65535), "dcps3 {#0xffff}");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System instructions with register argument") {
  if (false) {
    // Unsupported in vixl.
    TEST_SINGLE(wfet(Reg::r30), "wfet x30");
    TEST_SINGLE(wfit(Reg::r30), "wfit x30");
  }
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: Hints") {
  TEST_SINGLE(nop(), "nop");
  TEST_SINGLE(yield(), "yield");
  TEST_SINGLE(wfe(), "wfe");
  TEST_SINGLE(wfi(), "wfi");
  TEST_SINGLE(sev(), "sev");
  TEST_SINGLE(sevl(), "sevl");
  TEST_SINGLE(dgh(), "dgh");
  TEST_SINGLE(csdb(), "csdb");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: Barriers") {
  TEST_SINGLE(clrex(0), "clrex #0x0");
  TEST_SINGLE(clrex(15), "clrex");

  TEST_SINGLE(dsb(BarrierScope::OSHLD), "dsb oshld");
  TEST_SINGLE(dsb(BarrierScope::OSHST), "dsb oshst");
  TEST_SINGLE(dsb(BarrierScope::OSH), "dsb osh");
  TEST_SINGLE(dsb(BarrierScope::NSHLD), "dsb nshld");
  TEST_SINGLE(dsb(BarrierScope::NSHST), "dsb nshst");
  TEST_SINGLE(dsb(BarrierScope::NSH), "dsb nsh");
  TEST_SINGLE(dsb(BarrierScope::ISHLD), "dsb ishld");
  TEST_SINGLE(dsb(BarrierScope::ISHST), "dsb ishst");
  TEST_SINGLE(dsb(BarrierScope::ISH), "dsb ish");
  TEST_SINGLE(dsb(BarrierScope::LD), "dsb ld");
  TEST_SINGLE(dsb(BarrierScope::ST), "dsb st");
  TEST_SINGLE(dsb(BarrierScope::SY), "dsb sy");

  TEST_SINGLE(dmb(BarrierScope::OSHLD), "dmb oshld");
  TEST_SINGLE(dmb(BarrierScope::OSHST), "dmb oshst");
  TEST_SINGLE(dmb(BarrierScope::OSH), "dmb osh");
  TEST_SINGLE(dmb(BarrierScope::NSHLD), "dmb nshld");
  TEST_SINGLE(dmb(BarrierScope::NSHST), "dmb nshst");
  TEST_SINGLE(dmb(BarrierScope::NSH), "dmb nsh");
  TEST_SINGLE(dmb(BarrierScope::ISHLD), "dmb ishld");
  TEST_SINGLE(dmb(BarrierScope::ISHST), "dmb ishst");
  TEST_SINGLE(dmb(BarrierScope::ISH), "dmb ish");
  TEST_SINGLE(dmb(BarrierScope::LD), "dmb ld");
  TEST_SINGLE(dmb(BarrierScope::ST), "dmb st");
  TEST_SINGLE(dmb(BarrierScope::SY), "dmb sy");

  TEST_SINGLE(isb(), "isb");

  TEST_SINGLE(sb(), "sb");
  TEST_SINGLE(tcommit(), "tcommit");
}
TEST_CASE_METHOD(TestDisassembler, "Emitter: System: System register move") {
  // vixl doesn't have decoding for a bunch of these.
  // Also most of these aren't writeable from el0, just testing the encoding.
  TEST_SINGLE(msr(SystemRegister::CTR_EL0, Reg::r30), "msr S3_3_c0_c0_1, x30");
  TEST_SINGLE(msr(SystemRegister::DCZID_EL0, Reg::r30), "msr dczid_el0, x30");
  TEST_SINGLE(msr(SystemRegister::TPIDR_EL0, Reg::r30), "msr S3_3_c13_c0_2, x30");
  TEST_SINGLE(msr(SystemRegister::RNDR, Reg::r30), "msr rndr, x30");
  TEST_SINGLE(msr(SystemRegister::RNDRRS, Reg::r30), "msr rndrrs, x30");
  TEST_SINGLE(msr(SystemRegister::NZCV, Reg::r30), "msr nzcv, x30");
  TEST_SINGLE(msr(SystemRegister::FPCR, Reg::r30), "msr fpcr, x30");
  TEST_SINGLE(msr(SystemRegister::TPIDRRO_EL0, Reg::r30), "msr S3_3_c13_c0_3, x30");
  TEST_SINGLE(msr(SystemRegister::CNTFRQ_EL0, Reg::r30), "msr S3_3_c14_c0_0, x30");
  TEST_SINGLE(msr(SystemRegister::CNTVCT_EL0, Reg::r30), "msr S3_3_c14_c0_2, x30");

  TEST_SINGLE(mrs(Reg::r30, SystemRegister::CTR_EL0), "mrs x30, S3_3_c0_c0_1");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::DCZID_EL0), "mrs x30, dczid_el0");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::TPIDR_EL0), "mrs x30, S3_3_c13_c0_2");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::RNDR), "mrs x30, rndr");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::RNDRRS), "mrs x30, rndrrs");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::NZCV), "mrs x30, nzcv");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::FPCR), "mrs x30, fpcr");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::TPIDRRO_EL0), "mrs x30, S3_3_c13_c0_3");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::CNTFRQ_EL0), "mrs x30, S3_3_c14_c0_0");
  TEST_SINGLE(mrs(Reg::r30, SystemRegister::CNTVCT_EL0), "mrs x30, S3_3_c14_c0_2");
}


================================================
FILE: FEXCore/unittests/Emitter/TestDisassembler.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>
#include <CodeEmitter/Emitter.h>

#include <aarch64/cpu-aarch64.h>
#include <aarch64/instructions-aarch64.h>
#include <aarch64/disasm-aarch64.h>

#include <sys/mman.h>

class TestDisassembler : public ARMEmitter::Emitter {
public:
  TestDisassembler() {
    fp = tmpfile();
    Disasm = std::make_unique<vixl::aarch64::PrintDisassembler>(fp);
    // 2MB code size.
    const size_t CodeSize = 2 * 1024 * 1024;
    SetBuffer(reinterpret_cast<uint8_t*>(mmap(nullptr, CodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0)), CodeSize);
    BufferBegin = GetCursorAddress<const vixl::aarch64::Instruction*>();
  }
  ~TestDisassembler() {
    fclose(fp);
  }

  fextl::string DisassembleSingle() {
    HandleDisasm();
    char Tmp[512];
    uint64_t Addr;
    uint32_t Encoding;
    int Num = fscanf(fp, "0x%lx %x %[^\n]\n", &Addr, &Encoding, Tmp);
    if (Num != 3) {
      return "<Invalid>";
    }
    ResetFP();

    return Tmp;
  }

  uint32_t DisassembleEncoding(size_t Offset = 0) {
    const uint32_t* Values = reinterpret_cast<const uint32_t*>(GetBufferBase());
    SetCursorOffset(0);
    ResetFP();
    return Values[Offset];
  }

  fextl::string DisassembleString() {
    HandleDisasm();
    fextl::string Decoded {};
    char Tmp[512];
    uint64_t Addr;
    uint32_t Encoding;
    while (fscanf(fp, "0x%lx %x %[^\n]\n", &Addr, &Encoding, Tmp) == 3) {
      Decoded += std::string_view(Tmp);
      Decoded += "\n";
    }

    ResetFP();
    return Decoded;
  }
private:
  void HandleDisasm() {
    const auto BufferEnd = GetCursorAddress<const vixl::aarch64::Instruction*>();
    Disasm->DisassembleBuffer(BufferBegin, BufferEnd);
    SetCursorOffset(0);
    fseek(fp, 0, SEEK_SET);
  }
  void ResetFP() {
    fseek(fp, 0, SEEK_SET);
  }
  FILE* fp;
  const vixl::aarch64::Instruction* BufferBegin;
  std::unique_ptr<vixl::aarch64::PrintDisassembler> Disasm;
};

#define TEST_SINGLE(emit, expected) \
  { CHECK((emit, DisassembleSingle()) == expected); }

// Float16 disabled until we have a Float16 storage type with unittests.
#define TEST_FP16 0


================================================
FILE: FEXHeaderUtils/CMakeLists.txt
================================================
add_library(FEXHeaderUtils INTERFACE)

# Check for syscall support here
check_cxx_source_compiles("
  #include <sched.h>
  int main() {
  return ::getcpu(nullptr, nullptr);
  }"
  HAS_SYSCALL_GETCPU)
if (HAS_SYSCALL_GETCPU)
  message(STATUS "Has getcpu helper")
  target_compile_definitions(FEXHeaderUtils INTERFACE HAS_SYSCALL_GETCPU=1)
endif()

check_cxx_source_compiles("
  #include <unistd.h>
  int main() {
  return ::gettid();
  }"
  HAS_SYSCALL_GETTID)
if (HAS_SYSCALL_GETTID)
  message(STATUS "Has gettid helper")
  target_compile_definitions(FEXHeaderUtils INTERFACE HAS_SYSCALL_GETTID=1)
endif()

check_cxx_source_compiles("
  #include <signal.h>
  int main() {
  return ::tgkill(0, 0, 0);
  }"
  HAS_SYSCALL_TGKILL)
if (HAS_SYSCALL_TGKILL)
  message(STATUS "Has tgkill helper")
  target_compile_definitions(FEXHeaderUtils INTERFACE HAS_SYSCALL_TGKILL=1)
endif()

check_cxx_source_compiles("
  #include <sys/stat.h>
  int main() {
  return ::statx(0, nullptr, 0, 0, nullptr);
  }"
  HAS_SYSCALL_STATX)
if (HAS_SYSCALL_STATX)
  message(STATUS "Has statx helper")
  target_compile_definitions(FEXHeaderUtils INTERFACE HAS_SYSCALL_STATX=1)
endif()

check_cxx_source_compiles("
  #include <stdio.h>
  int main() {
  return ::renameat2(0, nullptr, 0, nullptr, 0);
  }"
  HAS_SYSCALL_RENAMEAT2)
if (HAS_SYSCALL_RENAMEAT2)
  message(STATUS "Has renameat2 helper")
  target_compile_definitions(FEXHeaderUtils INTERFACE HAS_SYSCALL_RENAMEAT2=1)
endif()

target_include_directories(FEXHeaderUtils INTERFACE .)


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/BitUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

// Header for various utilities that operate on bits and bytes.

#include <bit>
#include <climits>
#include <cstddef>
#include <cstdint>
#include <type_traits>

namespace FEXCore {

// Determines the number of bits inside of a given type.
template<typename T>
[[nodiscard]]
constexpr size_t BitSize() noexcept {
  return sizeof(T) * CHAR_BIT;
}

// Swaps the bytes of a 16-bit unsigned value.
[[nodiscard]]
inline uint16_t BSwap16(uint16_t value) noexcept {
#ifdef __GNUC__
  return __builtin_bswap16(value);
#else
  return (value >> 8) | (value << 8);
#endif
}

// Swaps the bytes of a 32-bit unsigned value.
[[nodiscard]]
inline uint32_t BSwap32(uint32_t value) noexcept {
#ifdef __GNUC__
  return __builtin_bswap32(value);
#else
  return ((value & 0xFF000000U) >> 24) | ((value & 0x00FF0000U) >> 8) | ((value & 0x0000FF00U) << 8) | ((value & 0x000000FFU) << 24);
#endif
}

// Swaps the bytes of a 64-bit unsigned value.
[[nodiscard]]
inline uint64_t BSwap64(uint64_t value) noexcept {
#ifdef __GNUC__
  return __builtin_bswap64(value);
#else
  return ((value & 0xFF00000000000000ULL) >> 56) | ((value & 0x00FF000000000000ULL) >> 40) | ((value & 0x0000FF0000000000ULL) >> 24) |
         ((value & 0x000000FF00000000ULL) >> 8) | ((value & 0x00000000FF000000ULL) << 8) | ((value & 0x0000000000FF0000ULL) << 24) |
         ((value & 0x000000000000FF00ULL) << 40) | ((value & 0x00000000000000FFULL) << 56);
#endif
}

// Finds the first least-significant set bit within a given value.
// Note that all returned indices are 1-based, not 0-based.
template<typename T>
[[nodiscard]]
constexpr int FindFirstSetBit(T value) noexcept {
  static_assert(std::is_unsigned_v<T>, "Type must be unsigned.");

  if (value == 0) {
    return 0;
  }

  const int trailing_zeroes = std::countr_zero(value);
  return trailing_zeroes + 1;
}

} // namespace FEXCore


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/Filesystem.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/memory_resource.h>
#include <FEXCore/fextl/string.h>

#include <algorithm>
#include <fcntl.h>
#include <memory_resource>
#include <string_view>
#ifndef _WIN32
#include <linux/limits.h>
#include <sys/sendfile.h>
#else
#include <filesystem>
#endif
#include <sys/stat.h>
#include <unistd.h>

namespace FHU::Filesystem {
enum class CreateDirectoryResult {
  CREATED,
  EXISTS,
  ERROR,
};

enum class CopyOptions {
  NONE,
  SKIP_EXISTING,
  OVERWRITE_EXISTING,
};

/**
 * @brief Check if a filepath exists.
 *
 * @param Path The path to check for.
 *
 * @return True if the file exists, False if it doesn't.
 */
inline bool Exists(const char* Path) {
  return access(Path, F_OK) == 0;
}

inline bool Exists(const fextl::string& Path) {
  return access(Path.c_str(), F_OK) == 0;
}

/**
 * @brief Renames a file and overwrites if it already exists.
 *
 * @return No error on rename.
 */
[[nodiscard]]
inline std::error_code RenameFile(const fextl::string& From, const fextl::string& To) {
  return rename(From.c_str(), To.c_str()) == 0 ? std::error_code {} : std::make_error_code(std::errc::io_error);
}

#ifndef _WIN32
inline bool ExistsAt(int FD, const fextl::string& Path) {
  return faccessat(FD, Path.c_str(), F_OK, 0) == 0;
}

/**
 * @brief Creates a directory at the provided path.
 *
 * @param Path The path to create a directory at.
 *
 * @return Result enum depending.
 */
inline CreateDirectoryResult CreateDirectory(const fextl::string& Path) {
  auto Result = ::mkdir(Path.c_str(), 0777);
  if (Result == 0) {
    return CreateDirectoryResult::CREATED;
  }

  if (Result == -1 && errno == EEXIST) {
    // If it exists, we need to check if it is a file or folder.
    struct stat buf;
    if (stat(Path.c_str(), &buf) == 0) {
      // Check to see if the path is a file or folder. Following symlinks.
      return S_ISDIR(buf.st_mode) ? CreateDirectoryResult::EXISTS : CreateDirectoryResult::ERROR;
    }
  }

  // Couldn't create, or the path that existed wasn't a folder.
  return CreateDirectoryResult::ERROR;
}

/**
 * @brief Creates a directory tree with the provided path.
 *
 * @param Path The path to create a tree at.
 *
 * @return True if the directory tree was created or already exists.
 */
inline bool CreateDirectories(const fextl::string& Path) {
  // Try to create the directory initially.
  if (CreateDirectory(Path) != CreateDirectoryResult::ERROR) {
    return true;
  }

  // Walk the path in reverse and create paths as we go.
  fextl::string TmpPath {Path.substr(0, Path.rfind('/', Path.size() - 1))};
  if (!TmpPath.empty() && CreateDirectories(TmpPath)) {
    return CreateDirectory(Path) != CreateDirectoryResult::ERROR;
  }
  return false;
}

/**
 * @brief Extracts the filename component from a file path.
 *
 * @param Path The path to create a directory at.
 *
 * @return The filename component of the path.
 */
inline fextl::string GetFilename(const fextl::string& Path) {
  auto LastSeparator = Path.rfind('/');
  if (LastSeparator == fextl::string::npos) {
    // No separator. Likely relative `.`, `..`, `<Application Name>`, or empty string.
    return Path;
  }

  return Path.substr(LastSeparator + 1);
}

inline std::string_view GetFilename(std::string_view Path) {
  auto LastSeparator = Path.rfind('/');
  if (LastSeparator == fextl::string::npos) {
    // No separator. Likely relative `.`, `..`, `<Application Name>`, or empty string.
    return Path;
  }

  return Path.substr(LastSeparator + 1);
}

inline fextl::string ParentPath(const fextl::string& Path) {
  auto LastSeparator = Path.rfind('/');

  if (LastSeparator == fextl::string::npos) {
    // No separator. Likely relative `.`, `..`, `<Application Name>`, or empty string.
    if (Path == "." || Path == "..") {
      // In this edge-case, return nothing to match std::filesystem::path::parent_path behaviour.
      return {};
    }
    return Path;
  }

  if (LastSeparator == 0) {
    // In the case of root, just return.
    return "/";
  }

  auto SubString = Path.substr(0, LastSeparator);

  while (SubString.size() > 1 && SubString.ends_with("/")) {
    // If the substring still ended with `/` then we need to string that off as well.
    --LastSeparator;
    SubString = Path.substr(0, LastSeparator);
  }

  return SubString;
}

inline bool IsRelative(const std::string_view Path) {
  return !Path.starts_with('/');
}

inline bool IsAbsolute(const std::string_view Path) {
  return Path.starts_with('/');
}

/**
 * @brief Copy a file from a location to another
 *
 * Behaves similarly to std::filesystem::copy_file but with less copy options.
 *
 * @param From Source file location.
 * @param To Destination file location.
 * @param Options Copy options.
 *
 * @return True if the copy succeeded, false otherwise.
 */
inline bool CopyFile(const fextl::string& From, const fextl::string& To, CopyOptions Options = CopyOptions::NONE) {
  const bool DestExists = Exists(To);
  if (Options == CopyOptions::SKIP_EXISTING && DestExists) {
    // If the destination file exists already and the skip existing flag is set then
    // return true without error.
    return true;
  }

  if (Options == CopyOptions::OVERWRITE_EXISTING && DestExists) {
    // If we are overwriting and the file exists then we want to use `sendfile` to overwrite
    int SourceFD = open(From.c_str(), O_RDONLY | O_CLOEXEC);
    if (SourceFD == -1) {
      return false;
    }

    int DestinationFD = open(To.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0200);
    if (DestinationFD == -1) {
      close(SourceFD);
      return false;
    }

    struct stat buf;
    if (fstat(SourceFD, &buf) != 0) {
      close(DestinationFD);
      close(SourceFD);
      return false;
    }

    // Set the destination permissions to the original source permissions.
    if (fchmod(DestinationFD, buf.st_mode) != 0) {
      close(DestinationFD);
      close(SourceFD);
      return false;
    }
    bool Result = sendfile(DestinationFD, SourceFD, nullptr, buf.st_size) == buf.st_size;
    close(DestinationFD);
    close(SourceFD);
    return Result;
  }

  if (!DestExists) {
    // If the destination doesn't exist then just use rename.
    return rename(From.c_str(), To.c_str()) == 0;
  }

  return false;
}

inline fextl::string LexicallyNormal(const fextl::string& Path) {
  const auto PathSize = Path.size();

  // Early exit on empty paths.
  if (PathSize == 0) {
    return {};
  }

  const auto IsAbsolutePath = IsAbsolute(Path);
  const auto EndsWithSeparator = Path.ends_with('/');
  // Count the number of separators up front
  const auto SeparatorCount = std::count(Path.begin(), Path.end(), '/');

  // Use std::list to store path elements to avoid iterator invalidation on insert/erase.
  // The list is allocated on stack to be more optimal. The size is determined by the
  // maximum number of list objects (separator count plus 2) multiplied by the list
  // element size (32-bytes per element: the string_view itself and the prev/next pointers).
  size_t DataSize = (sizeof(std::string_view) + sizeof(void*) * 2) * (SeparatorCount + 2);
  void* Data = alloca(DataSize);
  fextl::pmr::fixed_size_monotonic_buffer_resource mbr(Data, DataSize);
  std::pmr::polymorphic_allocator<std::byte> pa {&mbr};
  std::pmr::list<std::string_view> Parts {pa};

  size_t CurrentOffset {};
  do {
    auto FoundSeperator = Path.find('/', CurrentOffset);
    if (FoundSeperator == Path.npos) {
      FoundSeperator = PathSize;
    }

    const auto Begin = Path.begin() + CurrentOffset;
    const auto End = Path.begin() + FoundSeperator;
    const auto Size = End - Begin;

    // Only insert parts that contain data.
    if (Size != 0) {
      Parts.emplace_back(std::string_view(Begin, End));
    }

    if (Size == 0) {
      // If the view is empty, skip over the separator.
      FoundSeperator += 1;
    }

    CurrentOffset = FoundSeperator;
  } while (CurrentOffset != PathSize);

  size_t CurrentIterDistance {};
  for (auto iter = Parts.begin(); iter != Parts.end();) {
    auto& Part = *iter;
    if (Part == ".") {
      // Erase '.' directory parts if not at root.
      if (CurrentIterDistance > 0 || IsAbsolutePath) {
        // Erasing this iterator, don't increase iter distances
        iter = Parts.erase(iter);
        continue;
      }
    }

    if (Part == "..") {
      if (CurrentIterDistance > 0) {
        // If not at root then remove both this iterator and the previous one.
        // ONLY if the previous iterator is also not ".."
        //
        // If the previous iterator is '.' then /only/ erase the previous iterator.
        auto PreviousIter = iter;
        --PreviousIter;

        if (*PreviousIter == ".") {
          // Erasing the previous iterator, iterator distance has subtracted by one
          --CurrentIterDistance;
          Parts.erase(PreviousIter);
        } else if (*PreviousIter != "..") {
          // Erasing the previous iterator, iterator distance has subtracted by one
          // Also erasing current iterator, which means iterator distance also doesn't increase by one.
          --CurrentIterDistance;
          Parts.erase(PreviousIter);
          iter = Parts.erase(iter);
          continue;
        }
      } else if (IsAbsolutePath) {
        // `..` at the base. Just remove this
        iter = Parts.erase(iter);
        continue;
      }
    }

    // Interator distance increased by one.
    ++CurrentIterDistance;
    ++iter;
  }


  // Add a final separator unless the last element is ellipses.
  const bool NeedsFinalSeparator = EndsWithSeparator && (!Parts.empty() && Parts.back() != "." && Parts.back() != "..");
  return fextl::fmt::format("{}{}{}", IsAbsolutePath ? "/" : "", fmt::join(Parts, "/"), NeedsFinalSeparator ? "/" : "");
}

inline char* Absolute(const char* Path, char Fill[PATH_MAX]) {
  return realpath(Path, Fill);
}
#else
inline fextl::string PathToString(const std::filesystem::path& path) {
  return path.string<char, std::char_traits<char>, fextl::FEXAlloc<char>>();
}

inline CreateDirectoryResult CreateDirectory(const fextl::string& Path) {
  std::error_code ec;
  if (std::filesystem::exists(Path, ec)) {
    return CreateDirectoryResult::EXISTS;
  }

  return std::filesystem::create_directory(Path, ec) ? CreateDirectoryResult::CREATED : CreateDirectoryResult::ERROR;
}

inline bool CreateDirectories(const fextl::string& Path) {
  std::error_code ec;
  return std::filesystem::exists(Path, ec) || std::filesystem::create_directories(Path, ec);
}

inline fextl::string GetFilename(const fextl::string& Path) {
  return PathToString(std::filesystem::path(Path).filename());
}

inline std::string_view GetFilename(std::string_view Path) {
  auto Filename = PathToString(std::filesystem::path(Path).filename());
  return Path.substr(Path.size() - Filename.size());
}

inline fextl::string ParentPath(const fextl::string& Path) {
  return PathToString(std::filesystem::path(Path).parent_path());
}

inline bool IsRelative(const std::string_view Path) {
  return std::filesystem::path(Path).is_relative();
}

inline bool IsAbsolute(const std::string_view Path) {
  return std::filesystem::path(Path).is_absolute();
}

inline bool CopyFile(const fextl::string& From, const fextl::string& To, CopyOptions Options = CopyOptions::NONE) {
  std::filesystem::copy_options options {};
  if (Options == CopyOptions::SKIP_EXISTING) {
    options = std::filesystem::copy_options::skip_existing;
  } else if (Options == CopyOptions::OVERWRITE_EXISTING) {
    options = std::filesystem::copy_options::overwrite_existing;
  }

  std::error_code ec;
  return std::filesystem::copy_file(From, To, options, ec);
}

inline fextl::string LexicallyNormal(const fextl::string& Path) {
  return PathToString(std::filesystem::path(Path).lexically_normal());
}

inline char* Absolute(const char* Path, char Fill[PATH_MAX]) {
  std::error_code ec;
  const auto PathAbsolute = std::filesystem::absolute(Path, ec);
  if (!ec) {
    strncpy(Fill, PathAbsolute.string().c_str(), sizeof(*Fill));
    return Fill;
  }

  return nullptr;
}
#endif

} // namespace FHU::Filesystem


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/RingBuffer.h
================================================
#pragma once
#include <bit>
#include <cstddef>

namespace FHU {
// This is a fast thread-local non-blocking ring-buffer.
// Very useful for debugging purposes to see the history of something.
template<typename T, size_t Elements>
requires (std::has_single_bit(Elements) && std::is_trivially_copyable_v<T>)
class [[deprecated("Not for production use")]] NonBlockRingBuffer final {
public:
  void emplace(T Val) {
    Ring[Current] = Val;
    Current = (Current + 1) & (Elements - 1);
  }

private:
  T Ring[Elements] {};
  size_t Current {};
};
} // namespace FHU


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/StringArgumentParser.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/vector.h>
#include <FEXCore/fextl/fmt.h>

#include <algorithm>
#include <string_view>

namespace FHU {

/**
 * @brief Parses a string of arguments, returning a vector of string_views.
 *
 * @param ArgumentString The string of arguments to parse
 *
 * @return The array of parsed arguments
 */
static inline fextl::vector<std::string_view> ParseArgumentsFromString(const std::string_view ArgumentString) {
  fextl::vector<std::string_view> Arguments;

  auto Begin = ArgumentString.begin();
  auto ArgEnd = Begin;
  const auto End = ArgumentString.end();
  while (ArgEnd != End && Begin != End) {
    // The end of an argument ends with a space or the end of the interpreter line.
    ArgEnd = std::find(Begin, End, ' ');

    if (Begin != ArgEnd) {
      const auto View = std::string_view(Begin, ArgEnd - Begin);
      if (!View.empty()) {
        Arguments.emplace_back(View);
      }
    }

    Begin = ArgEnd + 1;
  }

  return Arguments;
}
} // namespace FHU


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/SymlinkChecks.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>

#include <fcntl.h>
#include <sys/stat.h>
#include <span>
#include <unistd.h>

namespace FHU::Symlinks {
#ifndef _WIN32
// Checks to see if a filepath is a symlink.
inline bool IsSymlink(const fextl::string& Filename) {
  struct stat Buffer {};
  int Result = lstat(Filename.c_str(), &Buffer);
  return Result == 0 && S_ISLNK(Buffer.st_mode);
}

// Resolves a symlink path.
// Doesn't handle recursive symlinks.
// Doesn't append null terminator character.
// Returns a string_view of the resolved path, or an empty view on error.
inline std::string_view ResolveSymlink(const fextl::string& Filename, std::span<char> ResultBuffer) {
  ssize_t Result = readlink(Filename.c_str(), ResultBuffer.data(), ResultBuffer.size());
  if (Result == -1) {
    return {};
  }

  return std::string_view(ResultBuffer.data(), Result);
}
#endif
} // namespace FHU::Symlinks


================================================
FILE: FEXHeaderUtils/FEXHeaderUtils/Syscalls.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/LogManager.h>

#include <cstdint>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#ifndef _WIN32
#include <syscall.h>
#else
#include <processthreadsapi.h>
#endif
#include <sys/stat.h>
#include <unistd.h>

namespace FHU::Syscalls {
#ifndef MAP_FIXED_NOREPLACE
#define MAP_FIXED_NOREPLACE 0x100000
#endif

#ifndef SEM_STAT_ANY
#define SEM_STAT_ANY 20
#endif

#ifndef SHM_STAT_ANY
#define SHM_STAT_ANY 15
#endif

#ifndef MSG_STAT_ANY
#define MSG_STAT_ANY 13
#endif

#ifndef CLONE_PIDFD
#define CLONE_PIDFD 0x00001000
#endif

#if defined(__aarch64__) || defined(_M_ARM64)
#ifndef SYS_statx
#define SYS_statx 291
#endif
#elif defined(__x86_64__) || defined(_M_X64)
#ifndef SYS_statx
#define SYS_statx 332
#endif
#endif

// Common syscall numbers
#ifndef SYS_pidfd_open
#define SYS_pidfd_open 434
#endif

#ifndef _WIN32
inline int32_t getcpu(uint32_t* cpu, uint32_t* node) {
  // Third argument is unused
#if defined(HAS_SYSCALL_GETCPU) && HAS_SYSCALL_GETCPU
  return ::getcpu(cpu, node);
#else
  return ::syscall(SYS_getcpu, cpu, node, nullptr);
#endif
}

inline int32_t gettid() {
#if defined(HAS_SYSCALL_GETTID) && HAS_SYSCALL_GETTID
  return ::gettid();
#else
  return ::syscall(SYS_gettid);
#endif
}

inline int32_t tgkill(pid_t tgid, pid_t tid, int sig) {
#if defined(HAS_SYSCALL_TGKILL) && HAS_SYSCALL_TGKILL
  return ::tgkill(tgid, tid, sig);
#else
  return ::syscall(SYS_tgkill, tgid, tid, sig);
#endif
}

inline int32_t statx(int dirfd, const char* pathname, int32_t flags, uint32_t mask, void* statxbuf) {
#if defined(HAS_SYSCALL_STATX) && HAS_SYSCALL_STATX
  return ::statx(dirfd, pathname, flags, mask, reinterpret_cast<struct statx* __restrict>(statxbuf));
#else
  return ::syscall(SYS_statx, dirfd, pathname, flags, mask, statxbuf);
#endif
}

inline int32_t renameat2(int olddirfd, const char* oldpath, int newdirfd, const char* newpath, unsigned int flags) {
#if defined(HAS_SYSCALL_RENAMEAT2) && HAS_SYSCALL_RENAMEAT2
  return ::renameat2(olddirfd, oldpath, newdirfd, newpath, flags);
#else
  return ::syscall(SYS_renameat2, olddirfd, oldpath, newdirfd, newpath, flags);
#endif
}

inline int32_t pidfd_open(pid_t pid, unsigned int flags) {
  return ::syscall(SYS_pidfd_open, pid, flags);
}
#else

inline int32_t getcpu(uint32_t* cpu, uint32_t* node) {
  if (cpu) {
    *cpu = GetCurrentProcessorNumber();
  }
  if (node) {
    *node = 0;
  }
  return 0;
}

inline int32_t tgkill(pid_t tgid, pid_t tid, int sig) {
  ERROR_AND_DIE_FMT("Unsupported");
  return 0;
}

inline int32_t gettid() {
  return GetCurrentThreadId();
}

#endif

} // namespace FHU::Syscalls


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2019 Ryan Houdek <Sonicadvance1@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: Readme.md
================================================
[中文](https://github.com/FEX-Emu/FEX/blob/main/docs/Readme_CN.md)
# FEX: Emulate x86 Programs on ARM64
FEX allows you to run x86 applications on ARM64 Linux devices, similar to qemu-user and box64.
It offers broad compatibility with both 32-bit and 64-bit binaries, and it can be used alongside Wine/Proton to play Windows games.

It supports forwarding API calls to host system libraries like OpenGL or Vulkan to reduce emulation overhead.
An experimental code cache helps minimize in-game stuttering as much as possible.
Furthermore, a per-app configuration system allows tweaking performance per game, e.g. by skipping costly memory model emulation.
We also provide a user-friendly FEXConfig GUI to explore and change these settings.

## Prerequisites
FEX requires ARMv8.0+ hardware. It has been tested with the following Linux distributions, though others are likely to work as well:

- Arch Linux
- Fedora Linux
- openSUSE
- Ubuntu 22.04/24.04/24.10/25.04

An x86-64 RootFS is required and can be downloaded using our `FEXRootFSFetcher` tool for many distributions.
For other distributions you will need to generate your own RootFS (our [wiki page](https://wiki.fex-emu.com/index.php/Development:Setting_up_RootFS) might help).

## Quick Start
### For Ubuntu 22.04, 24.04, 24.10 and 25.04
Execute the following command in the terminal to install FEX through a PPA.

```sh
curl --silent https://raw.githubusercontent.com/FEX-Emu/FEX/main/Scripts/InstallFEX.py | python3
```

This command will walk you through installing FEX through a PPA, and downloading a RootFS for use with FEX.

### For other Distributions
Follow the guide on the official FEX-Emu Wiki [here](https://wiki.fex-emu.com/index.php/Development:Setting_up_FEX).

### Navigating the Source
See the [Source Outline](docs/SourceOutline.md) for more information.


================================================
FILE: Scripts/CI_FetchRootFS.py
================================================
#!/usr/bin/python3
import xxhash
import sys
import os
import shutil
import subprocess

def GetDistroInfo():
    DistroName = "Unknown"
    DistroVersion = "Unknown"

    with open("/etc/lsb-release", 'r') as f:
        while True:
            Line = f.readline()
            if not Line:
                break
            Split = Line.split("=")
            if Split[0] == "DISTRIB_ID":
                DistroName = Split[1].lower().rstrip()
            if Split[0] == "DISTRIB_RELEASE":
                DistroVersion = Split[1].rstrip()

    return [DistroName, DistroVersion]

def FindBestImageFit(Distro, links_file):
    CurrentFitSize = 0
    BestFitDistro = None
    BestFitDistroVersion = None
    BestFitReadableName = None
    BestFitImagePath = None
    BestFitHash = None

    with open(links_file, 'r') as f:
        while True:
            # Order:
            # Distro Name
            # Distro Version
            # User readable name
            # File Path
            # Hash

            DistroName = f.readline().strip()
            if not DistroName:
                break

            DistroVersion = f.readline().strip()
            DistroReadableName = f.readline().strip()
            DistroImagePath = f.readline().strip()
            DistroHash = f.readline().strip()

            FitRate = 0
            if (DistroName == Distro[0] or
                DistroName == None):
                FitRate += 1

            if (DistroVersion == Distro[1] or
                DistroVersion == None):
                FitRate += 1

            if FitRate > CurrentFitSize:
                CurrentFitSize = FitRate
                BestFitDistro = DistroName
                BestFitDistroVersion = DistroVersion
                BestFitReadableName = DistroReadableName
                BestFitImagePath = DistroImagePath
                BestFitHash = DistroHash

    return [BestFitDistro, BestFitDistroVersion, BestFitReadableName, BestFitImagePath, int(BestFitHash, 16)]


def HashFile(file):
    # 32MB buffer size
    BUFFER_SIZE = 32 * 1024 * 1024

    x = xxhash.xxh3_64(seed=0)
    b = bytearray(BUFFER_SIZE)
    mv = memoryview(b)

    with open(file, 'rb') as f:
        while n := f.readinto(mv):
            x.update(mv[:n])

    return int.from_bytes(x.digest(), "big")

def RemoveRootFSFolder(RootFSPath):
    print("Removing previous rootfs extraction before copying")
    shutil.rmtree(RootFSPath, ignore_errors = True)
    # Recreate the folder
    os.makedirs(RootFSPath)

def CheckFilesystemForFS(RootFSMountPath, RootFSPath, DistroFit):
    # Check if rootfs mount path exists
    if (not os.path.exists(RootFSMountPath) or
        not os.path.isdir(RootFSMountPath)):
        print("RootFS mount path is wrong")
        return False

    # Check if rootfs path exists
    if (not os.path.exists(RootFSPath) or
        not os.path.isdir(RootFSPath)):
        # Create this directory
        os.makedirs(RootFSPath)

    # Check if rootfs path exists
    if not os.path.isdir(RootFSPath):
        print("RootFS path is not a directory")
        return False

    # Check rootfs folder for image, copy and extract as necessary
    MountRootFSImagePath = RootFSMountPath + DistroFit[3]
    RootFSImagePath = RootFSPath + "/" + os.path.basename(DistroFit[3])
    NeedsExtraction = False
    PreviouslyExistingRootFS = False

    if not os.path.exists(MountRootFSImagePath):
        print("Image {} doesn't exist".format(MountRootFSImagePath))
        return False

    if not os.path.exists(RootFSImagePath):
        # Copy over
        print("RootFS image doesn't exist. Copying")
        RemoveRootFSFolder(RootFSPath)
        shutil.copyfile(MountRootFSImagePath, RootFSImagePath)
        NeedsExtraction = True

    # Check if the image needs to be extracted
    if not os.path.exists(RootFSPath + "/usr"):
        NeedsExtraction = True
    else:
        PreviouslyExistingRootFS = True

    # Now hash the image
    RootFSHash = HashFile(RootFSImagePath)
    if RootFSHash != DistroFit[4]:
        print("Hash {} did not match {}, copying new image".format(hex(RootFSHash), hex(DistroFit[4])))

        if PreviouslyExistingRootFS:
            RemoveRootFSFolder(RootFSPath)

        shutil.copyfile(MountRootFSImagePath, RootFSImagePath)
        NeedsExtraction = True

    if NeedsExtraction:
        print("Extracting rootfs")

        CmdResult = subprocess.call(["unsquashfs", "-f", "-d", RootFSPath, RootFSImagePath])
        if CmdResult != 0:
            print("Couldn't extract squashfs. Removing image file to be safe")
            os.remove(RootFSImagePath)
            return False

    if not os.path.exists(RootFSPath + "/usr"):
        print("Couldn't extract squashfs. Removing image file to be safe")
        os.remove(RootFSImagePath)
        return False

    print("RootFS successfully checked and extracted")

    return True

def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    FEX_ROOTFS_MOUNT = os.getenv("FEX_ROOTFS_MOUNT")
    FEX_ROOTFS_PATH = os.getenv("FEX_ROOTFS_PATH")

    if FEX_ROOTFS_MOUNT == None:
        print("Need FEX_ROOTFS_MOUNT set")
        sys.exit(1)

    if FEX_ROOTFS_PATH == None:
        print("Need FEX_ROOTFS_PATH set")
        sys.exit(1)

    if shutil.which("unsquashfs") is None:
        print("CI system didn't have unsquashfs installed")
        sys.exit(1)

    Distro = GetDistroInfo()
    DistroFit = FindBestImageFit(Distro, FEX_ROOTFS_MOUNT + "/RootFS_links.txt")

    if CheckFilesystemForFS(FEX_ROOTFS_MOUNT, FEX_ROOTFS_PATH, DistroFit) == False:
        print("Couldn't load filesystem rootfs")
        sys.exit(1)

    return 0

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/CheckBinfmtNotInstall.sh
================================================
#!/bin/sh

for binfmt in "$@"; do
  result=0
  if command -v update-binfmts >/dev/null; then
	# If we found the binfmt_misc file passed in then error
    update-binfmts --find "$binfmt" 1>&- 2>&- && result=1
  fi

  # If the binfmt_misc file exists then error
  [ -f "$binfmt" ] && result=1

  if [ $result -eq 1 ]; then
    echo "==============================================================="
    echo "$binfmt binfmt file is installed!"
    echo "This conflicts with FEX-Emu's binfmt_misc!"
    echo "This will cause issues when running FEX-Emu through binfmt_misc"
    echo "Not installing until you uninstall this binfmt_misc file!"
    echo "==============================================================="
    exit 1
  fi
done

exit 0


================================================
FILE: Scripts/ClassifyCPU.py
================================================
#!/usr/bin/python3
import sys
import platform

def ListContainsRequired(Features, RequiredFeatures):
    for Req in RequiredFeatures:
        if not Req in Features:
            return False
    return True

def GetCPUFeaturesVersion():

    # Also LOR but kernel doesn't expose this
    v8_1Mandatory = ["atomics", "asimdrdm", "crc32"]
    v8_2Mandatory = v8_1Mandatory + ["dcpop"]
    v8_3Mandatory = v8_2Mandatory + ["fcma", "jscvt", "lrcpc", "paca", "pacg"]
    v8_4Mandatory = v8_3Mandatory + ["asimddp", "flagm", "ilrcpc", "uscat"]

    #  fphp asimdhp asimddp

    File = open("/proc/cpuinfo", "r")
    Lines = File.readlines()
    File.close()

    # Minimum spec is ARMv8.0
    _ArchVersion = "8.0"
    for Line in Lines:
        if "Features" in Line:
            Features = Line.split(":")[1].strip().split(" ")

            # We don't care beyond 8.4 right now
            if ListContainsRequired(Features, v8_4Mandatory):
                _ArchVersion = "8.4"
            elif ListContainsRequired(Features, v8_3Mandatory):
                _ArchVersion = "8.3"
            elif ListContainsRequired(Features, v8_2Mandatory):
                _ArchVersion = "8.2"
            elif ListContainsRequired(Features, v8_1Mandatory):
                _ArchVersion = "8.1"
            break;

    return _ArchVersion

def main():
    if (platform.machine() == "aarch64"):
        print("ARMv{}".format(GetCPUFeaturesVersion()))
    elif (platform.machine() == "x86_64"):
        print("x64")

    sys.exit(0)

if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: Scripts/DefinitionExtract.py
================================================
#!/usr/bin/python3
import clang.cindex
from clang.cindex import CursorKind
from clang.cindex import TypeKind
from clang.cindex import TranslationUnit
import sys
from dataclasses import dataclass
import subprocess
import logging
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

@dataclass
class TypeDefinition:
    TYPE_UNKNOWN = 0
    TYPE_STRUCT = 1
    TYPE_UNION = 2
    TYPE_FIELD = 3
    TYPE_VARDECL = 4

    name: str
    type: int
    def __init__(self, Name, Type):
        self.name = Name
        self.type = Type

    @property
    def Name(self):
        return self.name
    @property
    def Type(self):
        return self.type

@dataclass
class AliasType:
    ALIAS_X86_32  = 0
    ALIAS_X86_64  = 1
    ALIAS_AARCH64 = 2
    ALIAS_WIN32   = 3
    ALIAS_WIN64   = 4
    Name: str
    AliasType: int
    def __init__(self, Name, Type):
        self.Name = Name
        self.AliasType = Type

@dataclass
class StructDefinition(TypeDefinition):
    Size: int
    Aliases: list
    Members: list
    ExpectFEXMatch: bool

    def __init__(self, Name, Size):
        super(StructDefinition, self).__init__(Name, TypeDefinition.TYPE_STRUCT)
        self.Size = Size
        self.Aliases = []
        self.Members = []
        self.ExpectFEXMatch = False

@dataclass
class UnionDefinition(TypeDefinition):
    Size: int
    Aliases: list
    Members: list
    ExpectFEXMatch: bool

    def __init__(self, Name, Size):
        super(UnionDefinition, self).__init__(Name, TypeDefinition.TYPE_UNION)
        self.Size = Size
        self.Aliases = []
        self.Members = []
        self.ExpectFEXMatch = False

@dataclass
class FieldDefinition(TypeDefinition):
    Size: int
    OffsetOf: int
    Alignment: int
    def __init__(self, Name, Size, OffsetOf, Alignment):
        super(FieldDefinition, self).__init__(Name, TypeDefinition.TYPE_FIELD)
        self.Size = Size
        self.OffsetOf = OffsetOf
        self.Alignment = Alignment

@dataclass
class VarDeclDefinition(TypeDefinition):
    Size: int
    Aliases: list
    ExpectFEXMatch: bool
    Value: str

    def __init__(self, Name, Size):
        super(VarDeclDefinition, self).__init__(Name, TypeDefinition.TYPE_VARDECL)
        self.Size = Size
        self.Aliases = []
        self.ExpectFEXMatch = False

@dataclass
class ArchDB:
    Parsed: bool
    ArchName: str
    NamespaceScope: list
    CurrentNamespace: str
    TU: TranslationUnit
    Structs: dict
    Unions: dict
    VarDecls: dict
    FieldDecls: list
    def __init__(self, ArchName):
        self.Parsed = True
        self.ArchName = ArchName
        self.NamespaceScope = []
        self.CurrentNamespace = ""
        self.TU = None
        self.Structs = {}
        self.Unions = {}
        self.VarDecls = {}
        self.FieldDecls = []

@dataclass
class FunctionDecl:
    Name: str
    Ret: str
    Params: list

    def __init__(self, Name, Ret):
        self.Name = Name
        self.Ret = Ret
        self.Params = []

FunctionDecls = []

def HandleFunctionDeclCursor(Arch, Cursor):
    if (Cursor.is_definition()):
        return Arch

    #logging.critical ("Unhandled FunctionDeclCursor {0}-{1}-{2}-{3}".format(Cursor.kind, Cursor.type.spelling, Cursor.spelling,
    #    Cursor.result_type.spelling))

    Function = FunctionDecl(Cursor.spelling, Cursor.result_type.spelling)

    for Child in Cursor.get_children():
        if (Child.kind == CursorKind.TYPE_REF):
            # This will give us the return type
            # We skip this since we get it at the start instead
            pass
        elif (Child.kind == CursorKind.PARM_DECL):
            # This gives us a parameter type
            Function.Params.append(Child.type.spelling)
        elif (Child.kind == CursorKind.ASM_LABEL_ATTR):
            # Whatever you are we don't care about you
            return Arch
        elif (Child.kind == CursorKind.WARN_UNUSED_RESULT_ATTR):
            # Whatever you are we don't care about you
            return Arch
        elif (Child.kind == CursorKind.VISIBILITY_ATTR or
              Child.kind == CursorKind.UNEXPOSED_ATTR or
              Child.kind == CursorKind.CONST_ATTR or
              Child.kind == CursorKind.PURE_ATTR):
            pass
        else:
            logging.critical ("\tUnhandled FunctionDeclCursor {0}-{1}-{2}".format(Child.kind, Child.type.spelling, Child.spelling))
            sys.exit(-1)

    FunctionDecls.append(Function)
    return Arch

def PrintFunctionDecls():
    for Decl in FunctionDecls:
        print("template<>\nstruct fex_gen_config<{}> {{}};".format(Decl.Name))

def FindClangArguments(OriginalArguments):
    AddedArguments = ["clang"]
    AddedArguments.extend(OriginalArguments)
    AddedArguments.extend(["-v", "-x", "c++", "-S", "-"])
    Proc = subprocess.Popen(AddedArguments, stderr = subprocess.PIPE, stdin = subprocess.DEVNULL)
    NewIncludes = []
    BeginSearch = False
    while True:
        Line = Proc.stderr.readline().strip()

        if not Line:
            Proc.terminate()
            break

        if (Line == b"End of search list."):
            BeginSearch = False
            Proc.terminate()
            break

        if (BeginSearch == True):
            NewIncludes.append("-I" + Line.decode('ascii'))

        if (Line == b"#include <...> search starts here:"):
            BeginSearch = True

    # Add back original arguments
    NewIncludes.extend(OriginalArguments)
    return NewIncludes

def SetNamespace(Arch):
    Arch.CurrentNamespace = ""
    for Namespace in Arch.NamespaceScope:
        Arch.CurrentNamespace = Arch.CurrentNamespace + Namespace + "::"

def HandleStructDeclCursor(Arch, Cursor, NameOverride = ""):
    # Append namespace
    CursorName = ""
    StructType = Cursor.type
    if (len(StructType.spelling) == 0):
        CursorName = NameOverride
    else:
        CursorName = StructType.spelling

    if (len(CursorName) != 0):
        Arch.NamespaceScope.append(CursorName)
        SetNamespace(Arch)

    Struct = StructDefinition(
        Name = CursorName,
        Size = StructType.get_size())

    # Handle children
    Arch.Structs[Struct.Name] = HandleStructElements(Arch, Struct, Cursor)

    # Pop namespace off
    if (len(CursorName) != 0):
        Arch.NamespaceScope.pop()
        SetNamespace(Arch)

    return Arch

def HandleUnionDeclCursor(Arch, Cursor, NameOverride = ""):
    # Append namespace
    CursorName = ""

    if (len(Cursor.spelling) == 0):
        CursorName = NameOverride
    else:
        CursorName = Cursor.spelling

    if (len(CursorName) != 0):
        Arch.NamespaceScope.append(CursorName)
        SetNamespace(Arch)

    UnionType = Cursor.type
    Union = UnionDefinition(
        Name = CursorName,
        Size = UnionType.get_size())
    Arch.Unions[Union.Name] = Union

    # Handle children
    Arch.Unions[Union.Name] = HandleStructElements(Arch, Union, Cursor)

    # Pop namespace off
    if (len(CursorName) != 0):
        Arch.NamespaceScope.pop()
        SetNamespace(Arch)

    return Arch

def HandleVarDeclCursor(Arch, Cursor):
    CursorName = Cursor.spelling
    DeclType = Cursor.type
    Def = Cursor.get_definition()

    VarDecl = VarDeclDefinition(
        Name = CursorName,
        Size = DeclType.get_size())
    Arch.VarDecls[VarDecl.Name] = HandleVarDeclElements(Arch, VarDecl, Cursor)
    return Arch

def HandleVarDeclElements(Arch, VarDecl, Cursor):
    for Child in Cursor.get_children():

        if (Child.kind == CursorKind.ANNOTATE_ATTR):
            if (Child.spelling.startswith("ioctl-alias-")):
                Sections = Child.spelling.split("-")
                if (Sections[2] == "x86_32"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_X86_32))
                elif (Sections[2] == "x86_64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_X86_64))
                elif (Sections[2] == "aarch64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_AARCH64))
                elif (Sections[2] == "win32"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_WIN32))
                elif (Sections[2] == "win64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_WIN64))
                else:
                    logging.critical ("Can't handle alias type '{0}'".format(Child.spelling))
                    Arch.Parsed = False
            elif (Child.spelling == "fex-match"):
                VarDecl.ExpectedFEXMatch = True
            else:
                # Unknown annotation
                pass
        elif (Child.kind == CursorKind.TYPE_REF or
              Child.kind == CursorKind.UNEXPOSED_EXPR or
              Child.kind == CursorKind.PAREN_EXPR or
              Child.kind == CursorKind.BINARY_OPERATOR
              ):
              pass

    return VarDecl

def HandleTypeDefDeclCursor(Arch, Cursor):
    TypeDefType = Cursor.underlying_typedef_type
    CanonicalType = TypeDefType.get_canonical()

    TypeDefName = Cursor.type.get_typedef_name()

    if (TypeDefType.kind == TypeKind.ELABORATED and CanonicalType.kind == TypeKind.RECORD):
        if (len(TypeDefName) != 0):
            HandleTypeDefDecl(Arch, Cursor, TypeDefName)

	    # Append namespace
            Arch.NamespaceScope.append(TypeDefName)
            SetNamespace(Arch)

            Arch = HandleCursor(Arch, Cursor)
            #StructType = Cursor.type
            #Struct = StructDefinition(
            #    Name = TypeDefName,
            #    Size = CanonicalType.get_size())
            #Arch.Structs[TypeDefName] = Struct

            ## Handle children
            #Arch.Structs[TypeDefName] = HandleStructElements(Arch, Struct, Cursor)

            # Pop namespace off
            Arch.NamespaceScope.pop()
            SetNamespace(Arch)
    else:
        if (len(TypeDefName) != 0):
            Def = Cursor.get_definition()

            VarDecl = VarDeclDefinition(
                Name = TypeDefName,
                Size = CanonicalType.get_size())
            Arch.VarDecls[VarDecl.Name] = HandleVarDeclElements(Arch, VarDecl, Cursor)

    return Arch

def HandleStructElements(Arch, Struct, Cursor):
    for Child in Cursor.get_children():
        # logging.info ("\t\tStruct/Union Children: Cursor \"{0}{1}\" of kind {2}".format(Arch.CurrentNamespace, Child.spelling, Child.kind))
        if (Child.kind == CursorKind.ANNOTATE_ATTR):
            if (Child.spelling.startswith("alias-")):
                Sections = Child.spelling.split("-")
                if (Sections[1] == "x86_32"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_X86_32))
                elif (Sections[1] == "x86_64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_X86_64))
                elif (Sections[1] == "aarch64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_AARCH64))
                elif (Sections[1] == "win32"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_WIN32))
                elif (Sections[1] == "win64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_WIN64))
                else:
                    logging.critical ("Can't handle alias type '{0}'".format(Child.spelling))
                    Arch.Parsed = False

            elif (Child.spelling == "fex-match"):
                Struct.ExpectedFEXMatch = True
            else:
                # Unknown annotation
                pass
        elif (Child.kind == CursorKind.FIELD_DECL):
            ParentType = Cursor.type
            FieldType = Child.type
            Field = FieldDefinition(
                Name = Child.spelling,
                Size = FieldType.get_size(),
                OffsetOf = ParentType.get_offset(Child.spelling),
                Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            Struct.Members.append(Field)
            Arch.FieldDecls.append(Field)
        elif (Child.kind == CursorKind.STRUCT_DECL):
            ParentType = Cursor.type
            FieldType = Child.type
            Field = FieldDefinition(
                Name = Child.spelling,
                Size = FieldType.get_size(),
                OffsetOf = ParentType.get_offset(Child.spelling),
                Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            Struct.Members.append(Field)
            Arch.FieldDecls.append(Field)
            Arch = HandleStructDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.UNION_DECL):
            Struct = HandleStructElements(Arch, Struct, Child)
            #ParentType = Cursor.type
            #FieldType = Child.type
            #Field = FieldDefinition(
            #    Name = Child.spelling,
            #    Size = FieldType.get_size(),
            #    OffsetOf = ParentType.get_offset(Child.spelling),
            #    Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            #Struct.Members.append(Field)
            #Arch.FieldDecls.append(Field)
            #Arch = HandleUnionDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        else:
            Arch = HandleCursor(Arch, Child)

    return Struct

def HandleTypeDefDecl(Arch, Cursor, Name):
    for Child in Cursor.get_children():
        if (Child.kind == CursorKind.UNION_DECL):
            pass
        elif (Child.kind == CursorKind.STRUCT_DECL):
            Arch = HandleStructDeclCursor(Arch, Child, Name)
        elif (Child.kind == CursorKind.UNION_DECL):
            Arch = HandleUnionDeclCursor(Arch, Child, Name)
        elif (Child.kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.TYPE_REF or
              Child.kind == CursorKind.NAMESPACE_REF or
              Child.kind == CursorKind.TEMPLATE_REF or
              Child.kind == CursorKind.ALIGNED_ATTR):
            # Safe to pass on
            pass
        else:
            logging.critical ("Unhandled TypedefDecl {0}-{1}-{2}".format(Child.kind, Child.type.spelling, Child.spelling))

def HandleCursor(Arch, Cursor):
    if (Cursor.kind.is_invalid()):
        Diags = TU.diagnostics
        for Diag in Diags:
            logging.warning (Diag.format())

        Arch.Parsed = False
        return

    for Child in Cursor.get_children():
        if (Child.kind == CursorKind.TRANSLATION_UNIT):
            Arch = HandleCursor(Arch, Child)
        elif (Child.kind == CursorKind.FIELD_DECL):
            pass
        elif (Child.kind == CursorKind.UNION_DECL):
            Arch = HandleUnionDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.STRUCT_DECL):
            Arch = HandleStructDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.VAR_DECL):
            Arch = HandleVarDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.NAMESPACE):
            # Append namespace
            Arch.NamespaceScope.append(Child.spelling)
            SetNamespace(Arch)

            # Handle children
            Arch = HandleCursor(Arch, Child)

            # Pop namespace off
            Arch.NamespaceScope.pop()
            SetNamespace(Arch)
        elif (Child.kind == CursorKind.TYPE_REF):
            # Safe to pass on
            pass
        elif (Child.kind == CursorKind.FUNCTION_DECL):
            # For function printing
            Arch = HandleFunctionDeclCursor(Arch, Child)
        else:
            Arch = HandleCursor(Arch, Child)

    return Arch

def GetDB(Arch, filename, args):
    Index = clang.cindex.Index.create()
    try:
        TU = Index.parse(filename, args=args, options=TranslationUnit.PARSE_INCOMPLETE)
    except TranslationUnitLoadError:
        Arch.Parsed = False
        Diags = TU.diagnostics
        for Diag in Diags:
            logging.warning (Diag.format())

        return

    Arch.TU = TU
    FunctionDecls.clear()
    HandleCursor(Arch, TU.cursor)

    # Get diagnostics
    Diags = TU.diagnostics
    if (len(Diags) != 0):
        logging.warning ("Diagnostics from Arch: {0}".format(Arch.ArchName))

    for Diag in Diags:
        logging.warning (Diag.format())

    return Arch

def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    if (len(sys.argv) < 2):
        print ("usage: %s <Header.hpp> <clang arguments...>" % (sys.argv[0]))

    Header = ""
    BaseArgs = []

    # Parse our arguments
    Header = sys.argv[1]

    # Add arguments for clang
    for ArgIndex in range(2, len(sys.argv)):
        BaseArgs.append(sys.argv[ArgIndex])

    args_x86_64 = [
        "-isystem", "/usr/include/x86_64-linux-gnu",
        "-isystem", "/usr/x86_64-linux-gnu/include/c++/10/x86_64-linux-gnu/",
        "-isystem", "/usr/x86_64-linux-gnu/include/",
        "-O2",
        "--target=x86_64-linux-unknown",
        "-DARCHITECTURE_x86_64",
    ]

    # Add all the arguments to the different lists
    args_x86_64.extend(BaseArgs)

    # We need to find the default arguments through clang invocations
    args_x86_64 = FindClangArguments(args_x86_64)

    Arch_x86_64 = ArchDB("x86_64")
    Arch_x86_64 = GetDB(Arch_x86_64, Header, args_x86_64)
    PrintFunctionDecls()

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/FEXUpdateAOTIRCache.sh
================================================
#!/bin/sh
FEX=${1:-FEXLoader}
echo "Using $FEX"

for fileid in ~/.fex-emu/aotir/*.path; do
	filename=$(cat "$fileid")
	args=""

	# if L is 6 chars from the end, use localflags
	case $fileid in
		*L?????) _abi=--abilocalflags ;;
		*)    _abi=--no-abilocalflags ;;
	esac

	# if T is 7 chars from the end, use tso
	case $fileid in
		*T??????) _tso=--tsoenabled ;;
		*)     _tso=--no-tsoenabled ;;
	esac

	# if S is 8 chars from the end, use full smc
	case $fileid in
		*S???????) _smc=full ;;
		*)         _smc=mman ;;
	esac

	if [ -f "${fileid%.path}.aotir" ]; then
		echo "$(basename "$fileid") has already been generated"
	else
		echo "Processing $(basename "$fileid") ($filename) with $args"
		$FEX --aotirgenerate "$_abi" "$_tso" --smc="$_smc" "$filename"
	fi
done


================================================
FILE: Scripts/GenerateSyscallNumbers.py
================================================
#!/usr/bin/python3
from dataclasses import dataclass
import math
import sys
import logging
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

# Usage of this script is `Scripts/GenerateSyscallNumbers.py <Path to Linux directory>`
# This will then parse the syscall headers and format them in an enum
# Then this will be output in stdout
# This output should then be checked and copied to the following headers, splitting up the enums:
#   - Source/Tests/LinuxSyscalls/x32/SyscallsEnum.h
#   - Source/Tests/LinuxSyscalls/x64/SyscallsEnum.h
#   - Source/Tests/LinuxSyscalls/Arm64/SyscallsEnum.h
# `FEX_Syscalls_Common` is provided in the output as just an indicator for which syscalls are using the common
# syscall interface.

@dataclass
class SyscallDefinition:
    arch: str
    syscall_number: int
    abi: str
    name: str
    entry: str
    def __init__(self, Arch, SyscallNumber, ABI, Name, Entry):
        self.arch = Arch
        self.syscall_number = SyscallNumber
        self.abi = ABI
        self.name = Name
        self.entry = Entry

    @property
    def Arch(self):
        return self.arch

    @property
    def Number(self):
        return self.syscall_number

    @property
    def ABI(self):
        return self.abi

    @property
    def Name(self):
        return self.name

    @property
    def EntryName(self):
        return self.entry

Syscallx64File = "/arch/x86/entry/syscalls/syscall_64.tbl"
Syscallx86File = "/arch/x86/entry/syscalls/syscall_32.tbl"
SyscallArm64File = "/include/uapi/asm-generic/unistd.h"

# Syscall names that had naming conflict with some global definitions
# Renamed to work around that issue
DefinitionRenameDict = {
    "pread64": "pread_64",
    "pwrite64": "pwrite_64",
    "prlimit64": "prlimit_64",
    # musl/Alpine Linux defines `fstatat64` as a define that points to `fstatat`.
    # Rename it to avoid global define conflicts.
    "fstatat64": "fstatat_64",
}

Definitions_x64 = []
Definitions_x64_dict = {}
Definitions_x86 = []
Definitions_x86_dict = {}
Definitions_Arm64 = []
Definitions_Arm64_dict = {}

NumArches = 0
SyscallDefinitions = {}

def ParseArchSyscalls(Defs, DefsDict, Arch, FilePath, IgnoreArch):
    global NumArches
    global SyscallDefinitions
    syscall_file = open(FilePath, "r")
    text_lines = syscall_file.readlines()
    syscall_file.close()

    NumArches += 1
    for line in text_lines:
        line = line.strip()

        # Skip lines that are a comment
        if line.startswith("#") or len(line) == 0:
            continue

        # Format: <Number> <ABI> <Name> <Entry Name>
        split_text = line.split()

        Num = split_text[0]
        ABI = split_text[1]

        # If the ABI is on the ignore list then don't store it
        if ABI in IgnoreArch:
            continue

        Name = split_text[2]
        if (len(split_text) < 4):
            # This sometimes happens if the host doesn't have the entry
            EntryName = "<None>"
        else:
            EntryName = split_text[3]

        if Name in DefinitionRenameDict:
            Name = DefinitionRenameDict[Name]

        Def = SyscallDefinition(Arch, Num, ABI, Name, EntryName)

        Defs.append(Def)
        if not Name in SyscallDefinitions:
            SyscallDefinitions[Name] = []

        SyscallDefinitions[Name].append(Def)

def ParseCommonArchSyscalls(Defs, DefsDict, Arch, FilePath):
    global NumArches
    global SyscallDefinitions
    syscall_file = open(FilePath, "r")
    text_lines = syscall_file.readlines()
    syscall_file.close()

    NumArches += 1
    SyscallNumbers = {}
    for line in text_lines:
        line = line.strip()

        if len(line) == 0:
            continue

        # Check for NR defines
        if (line.startswith("#define __NR_") or
           line.startswith("#define __NR3264_")):
            # This line is defining a syscall for us
            # eg: #define __NR_io_setup 0
            line = line.removeprefix("#define __NR_")
            line = line.removeprefix("#define __NR3264_")
            split_text = line.split(" ")

            # Store this for later
            Name = split_text[0]

            # Need to do len here since some lines are multiple spaces between define name and value
            SyscallNumbers[Name] = split_text[len(split_text) - 1]
            continue

        BeginsString = ""
        # Check for __SC_COMP and __SYSCALL defines
        if line.startswith("__SYSCALL("):
            BeginsString = "__SYSCALL("
        elif line.startswith("__SC_COMP("):
            BeginsString = "__SC_COMP("
        elif line.startswith("__SC_3264("):
            BeginsString = "__SC_3264("
        elif line.startswith("__SC_COMP_3264("):
            BeginsString = "__SC_COMP_3264("
        else:
            continue

        line = line.removeprefix(BeginsString)

        if line.startswith("__NR_"):
            BeginsString = "__NR_"
        elif line.startswith("__NR3264_"):
            BeginsString = "__NR3264_"

        line = line.removeprefix(BeginsString)

        split_text = line.split(",")

        Name = split_text[0]
        Num = SyscallNumbers[Name]
        ABI = Arch
        EntryName = split_text[1].strip().split(")")[0]

        if Name in DefinitionRenameDict:
            Name = DefinitionRenameDict[Name]

        Def = SyscallDefinition(Arch, Num, ABI, Name, EntryName)

        Defs.append(Def)
        if not Name in SyscallDefinitions:
            SyscallDefinitions[Name] = []

        SyscallDefinitions[Name].append(Def)

def ExportSyscallDefines(Defs, DefsDict, Arch, UnsupportedDefs):
    AlreadyExported = []

    print("enum Syscalls_{} {{".format(Arch))
    for Def in Defs:
        if Def.EntryName == "<None>":
            print("  // No entrypoint. -ENOSYS")
        print("  SYSCALL_{}_{} = {},".format(Arch, Def.Name, Def.Number))
        AlreadyExported.append(Def.Name)

    # Print ourselves a max
    Max = 1 << (int(math.log(len(Defs), 2)) + 1)
    print("  SYSCALL_{}_MAX = {},".format(Arch, Max))

    if len(UnsupportedDefs) != 0:
        # Print out syscalls that don't exist on this architecture
        print("")
        print("  // Unsupported syscalls on this host")

        for DefList in UnsupportedDefs:
            for Def in DefList:
                # If the syscall name exists in the full definition dictionary
                # but DOESN'T exist in our current arch AND exists in the Unsupported dicts
                # Then we need to export it as an unnamed syscall entry
                if Def.Name in AlreadyExported:
                    continue

                print("  SYSCALL_{}_{} = ~0,".format(Arch, Def.Name))

                AlreadyExported.append(Def.name)

    print("};")

def ExportCommonSyscallDefines():
    global Definitions_Arm64
    global SyscallDefinitions

    print("enum FEX_Syscalls_Common {")
    for Def in Definitions_Arm64:
        # Check the dict to ensure the definitions exist everywhere
        if not Def.Name in SyscallDefinitions:
            continue

        Defs = SyscallDefinitions[Def.Name]
        if len(Defs) != NumArches:
            continue

        Number = Def.Number
        Matches = True
        for AllDef in Defs:
            if AllDef.Number != Def.Number:
                Matches = False

        if not Matches:
            continue

        for AllDef in Defs:
            if AllDef.EntryName == "<None>":
                print("  // {} No entrypoint. -ENOSYS".format(AllDef.Arch))

        print("  SYS_{} = {},".format(Def.Name, Def.Number))


    # Find a max between all architectures
    Maximums = []
    for Defs in [Definitions_x64, Definitions_x86, Definitions_Arm64]:
        Maximums.append(1 << (int(math.log(len(Defs), 2)) + 1))
    print("  SYSCALL_MAX = {},".format(max(Maximums)))

    print("};")


def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    if (len(sys.argv) < 2):
        print ("usage: %s <Linux git tree>" % (sys.argv[0]))

    LinuxPath = sys.argv[1]


    ParseArchSyscalls(Definitions_x86, Definitions_x86_dict, "x86", LinuxPath + Syscallx86File, [])
    ParseArchSyscalls(Definitions_x64, Definitions_x64_dict, "x64", LinuxPath + Syscallx64File, ["x32"])
    ParseCommonArchSyscalls(Definitions_Arm64, Definitions_Arm64_dict, "Arm64", LinuxPath + SyscallArm64File)

    ExportSyscallDefines(Definitions_x86, Definitions_x86_dict, "x86",[])
    ExportSyscallDefines(Definitions_x64, Definitions_x64_dict, "x64", [Definitions_x86])
    ExportSyscallDefines(Definitions_Arm64, Definitions_Arm64_dict, "Arm64", [Definitions_x86, Definitions_x64])

    ExportCommonSyscallDefines()

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/InstallFEX.py
================================================
#!/usr/bin/python3
import os
import subprocess
import platform
import sys
import re

try:
    from packaging.version import Version as version_check
except:
    from pkg_resources import parse_version as version_check

_Arch = None
def GetArch():
    global _Arch

    if _Arch == None:
        _Arch = subprocess.check_output(['uname', '-m']).decode("utf-8").strip()
    return _Arch

_Distro = None
def GetDistro():
    global _Distro

    # Query files in order
    # /etc/lsb-release
    # /etc/os-release

    if _Distro == None:
        if os.path.exists("/etc/lsb-release"):
            File = open("/etc/lsb-release", "r")
            Lines = File.readlines()
            File.close()

            Found = 0
            Distro = ""
            Version = ""
            for Line in Lines:
                Key, Val = Line.split("=", 1)

                if Key == "DISTRIB_ID":
                    Distro = Val.strip().lower()
                    Found+=1
                if Key == "DISTRIB_RELEASE":
                    Version = Val.strip()
                    Found+=1

            if Found == 2:
                _Distro = [Distro, Version]
                return _Distro

        if os.path.exists("/etc/os-release"):
            File = open("/etc/os-release", "r")
            Lines = File.readlines()
            File.close()

            Found = 0
            Distro = ""
            Version = ""
            for Line in Lines:
                Key, Val = Line.split("=", 1)

                if Key == "ID":
                    Distro = Val.strip()
                    Found+=1
                if Key == "VERSION_ID":
                    # Strip the double quotes from the version id
                    Version = Val.strip()[1:-1]
                    Found+=1

            if Found == 2:
                _Distro = [Distro, Version]
                return _Distro

        # Unknown
        _Distro = ["Unknown", "0.0"]

    return _Distro

def IsSupportedArch():
    Arch = GetArch()
    return Arch == "aarch64"

def IsSupportedDistro():
    Distro = GetDistro()

    # We only support Ubuntu
    if Distro[0] == "ubuntu":
        return Distro[1] in {"22.04", "24.04", "24.10", "25.04", "25.10"}

    return False

_ArchVersion = None
def ListContainsRequired(Features, RequiredFeatures):
    for Req in RequiredFeatures:
        if not Req in Features:
            return False
    return True

def GetCPUFeaturesVersion():
    global _ArchVersion

    # Also LOR but kernel doesn't expose this
    v8_1Mandatory = ["atomics", "asimdrdm", "crc32"]
    v8_2Mandatory = v8_1Mandatory + ["dcpop"]
    v8_3Mandatory = v8_2Mandatory + ["fcma", "jscvt", "lrcpc", "paca", "pacg"]
    v8_4Mandatory = v8_3Mandatory + ["asimddp", "flagm", "ilrcpc", "uscat"]

    #  fphp asimdhp asimddp

    if _ArchVersion == None:
        File = open("/proc/cpuinfo", "r")
        Lines = File.readlines()
        File.close()

        # Minimum spec is ARMv8.0
        _ArchVersion = "8.0"
        for Line in Lines:
            if "Features" in Line:
                Features = Line.split(":")[1].strip().split(" ")

                # We don't care beyond 8.4 right now
                if ListContainsRequired(Features, v8_4Mandatory):
                    _ArchVersion = "8.4"
                elif ListContainsRequired(Features, v8_3Mandatory):
                    _ArchVersion = "8.3"
                elif ListContainsRequired(Features, v8_2Mandatory):
                    _ArchVersion = "8.2"
                elif ListContainsRequired(Features, v8_1Mandatory):
                    _ArchVersion = "8.1"
                break;

    return _ArchVersion

_PPAInstalled = None
FEXPPA_REGEX = r".*\/fex-emu\/fex\/ubuntu$"

def GetPPAStatus():
    global _PPAInstalled

    if _PPAInstalled == None:
        _PPAInstalled = False

        CacheResults = subprocess.check_output(['apt-cache', 'policy']).decode("utf-8")

        for Line in CacheResults.split("\n"):
            if "http" in Line:
                Line = Line.strip()
                LineSplit = Line.split(" ")

                # 'status' 'URL' 'series' 'arch' 'type'
                if re.match(FEXPPA_REGEX, LineSplit[1]):
                    _PPAInstalled = True
                    break

    return _PPAInstalled

def InstallPPA():
    print ("Installing PPA: ppa:fex-emu/fex")
    print ("This bit will ask for your password")

    DidInstall = False
    try:
        CmdResult = subprocess.run(["sudo", "add-apt-repository", "-y", "ppa:fex-emu/fex"])
        DidInstall = CmdResult.returncode == 0
    except KeyboardInterrupt:
        DidInstall = False
        pass

    if DidInstall:
        print("PPA installed")
    else:
        print("PPA failed to install")

    return DidInstall

ARMVersionToPackage = {
    "8.0": "fex-emu-armv8.0",
    "8.1": "fex-emu-armv8.0",
    "8.2": "fex-emu-armv8.2",
    "8.3": "fex-emu-armv8.2",
    "8.4": "fex-emu-armv8.4",
}

def GetPackagesToInstall():
    return [
        ARMVersionToPackage[GetCPUFeaturesVersion()],
        "fex-emu-binfmt32",
        "fex-emu-binfmt64",
    ]

def UpdatePPA():
    print ("Updating apt sources")
    print ("This bit will ask for your password")

    DidUpdate = False
    try:
        CmdResult = subprocess.run(["sudo", "apt-get", "update"])
        DidUpdate = CmdResult.returncode == 0
    except KeyboardInterrupt:
        DidUpdate = False
        pass

    if DidUpdate:
        print("PPA installed")
    else:
        print("PPA failed to install")

    return DidUpdate

def InstallPackages(PackagesToInstall):
    DidInstall = False
    try:
        CmdResult = subprocess.run(["sudo", "apt-get", "-y", "install"] + PackagesToInstall)
        DidInstall = CmdResult.returncode == 0
    except KeyboardInterrupt:
        print ("Keyboard interrupt")
        DidInstall = False
        pass

    if DidInstall:
        print("Packages updated")
    else:
        print("Packages failed to update")

    return DidInstall

def CheckAndInstallPackageUpdates(PackagesToInstall, InstallIfNotFound=False):
    for Package in PackagesToInstall[:]:
        UpgradableStatus = subprocess.check_output(["apt", "list", "--upgradable", Package], stderr=None).decode("utf-8")
        Found = False
        for Line in UpgradableStatus.split("\n"):
            # If the package exists to be upgraded then it will appear in this list
            # We need to check multiple lines
            # $ apt list --upgradable <Package>
            # With upgrade available
            # Listing... Done
            # <Package>/<Repo> <NewVersion> <arch> [upgradable from: <Installed version>]
            # Without upgrade available
            # Listing... Done
            # <EOF>
            if Package in Line and "upgradable" in Line:
                Found = True

        if InstallIfNotFound == False and Found == False:
            PackagesToInstall.remove(Package)

    if len(PackagesToInstall) > 0:
        print ("Found updates for packages: {}".format(PackagesToInstall))
        print ("This bit may ask for your password")

        return InstallPackages(PackagesToInstall)

    return True

def CheckPackageInstallStatus():
    PackagesToInstall = GetPackagesToInstall()
    for Package in PackagesToInstall[:]:
        CmdResult = subprocess.run(["dpkg", "-s", Package], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        if CmdResult.returncode == 0:
            PackagesToInstall.remove(Package)

    return PackagesToInstall

def InstallPackages(Packages):
    print("Installing packages: {}".format(Packages))

    DidInstall = False
    try:
        CmdResult = subprocess.run(["sudo", "apt-get", "-y", "install"] + Packages)
        DidInstall = CmdResult.returncode == 0
    except KeyboardInterrupt:
        print ("Keyboard interrupt")
        DidInstall = False
        pass

    if DidInstall:
        print("Packages installed")
    else:
        print("Packages failed to install")

    return DidInstall

_RootFSPath = None
def GetRootFSPath():
    global _RootFSPath

    if _RootFSPath == None:
        # Follows the same logic as FEXCore::Config::GetDataDirectory()
        HomeDir = os.getenv("HOME")
        if HomeDir == None:
            HomeDir = os.getenv("PWD")
        if HomeDir == None:
            HomeDir = "."

        Path = HomeDir + "/.local/share"
        DataXDG = os.getenv("XDG_DATA_HOME")
        if DataXDG != None:
            Path = DataXDG

        Path = Path + "/fex-emu"

        DataOverride = os.getenv("FEX_APP_DATA_LOCATION")

        if DataOverride != None:
            Path = DataOverride

        LegacyDir = HomeDir + "/.fex-emu"
        if os.path.isdir(LegacyDir):
            Path = LegacyDir

        _RootFSPath = Path + "/RootFS/"

    return _RootFSPath

def CheckRootFSInstallStatus():
    Distro = GetDistro()[1] # Extract Ubuntu version number, e.g. "23.10"

    DistroUnderscore = Distro.replace(".", "_")
    Filename = "Ubuntu_{}.ero".format(DistroUnderscore)
    if os.path.exists(GetRootFSPath() + Filename):
            return True

    Filename = "Ubuntu_{}.sqsh".format(DistroUnderscore)
    if os.path.exists(GetRootFSPath() + Filename):
            return True

    # Couldn't find. Either no rootfs installed or unsupported distro.
    return False

def TryInstallRootFS():
    DidInstall = False
    try:
        with open("/dev/tty", "r") as tty:
            CmdResult = subprocess.run(["FEXRootFSFetcher"], stdin=tty)
        DidInstall = CmdResult.returncode == 0
    except KeyboardInterrupt:
        print ("Keyboard interrupt")
        DidInstall = False
        pass
    except OSError:
        print ("No TTY available")
        DidInstall = False
        pass
    return DidInstall

def TryBasicProgramExecution():
    CmdResult = subprocess.run(["FEX", "/usr/bin/uname", "-a"])
    return CmdResult.returncode == 0

def ExitWithStatus(Status):
    # Remove the cached credentials
    subprocess.run(["sudo", "-K"])
    sys.exit(Status)

def GetKernelVersion():
    # eg: `6.14.4-061404-generic`
    return platform.uname().release.split("-")[0]

def IsSupportedKernel():
    return version_check(GetKernelVersion()) >= version_check("5.15")

def main():
    # Only run on supported arch
    if not IsSupportedArch():
        print ( "{} is not a supported architecture".format(GetArch()))
        ExitWithStatus(-1)

    # Only run on a new enough kernel
    if not IsSupportedKernel():
        print ( "Kernel {} is too old. FEX needs 5.15 minimum".format(GetKernelVersion()))
        ExitWithStatus(-1)

    if not IsSupportedDistro():
        Distro = GetDistro()
        print ( "'{} {}' is not a supported distro".format(Distro[0], Distro[1]))
        ExitWithStatus(-1)

    if GetDistro()[0] == "ubuntu":
        print ("Getting PPA status: {}".format(("NotInstalled", "Installed")[GetPPAStatus()]))

        if GetPPAStatus():
            if not UpdatePPA():
                print ("apt sources failed to update. Not continuing")
                ExitWithStatus(-1)
            if not CheckAndInstallPackageUpdates(GetPackagesToInstall()):
                print ("apt packages failed to update. Not continuing")
                ExitWithStatus(-1)
        else:
            if not CheckAndInstallPackageUpdates(["software-properties-common"], True):
                print ("software-properties-common package failed to update. Not continuing")
                ExitWithStatus(-1)

            if not InstallPPA():
                print ("PPA failed to install. Not continuing")
                ExitWithStatus(-1)

        Packages = CheckPackageInstallStatus()
        if len(Packages) > 0:
            if not InstallPackages(Packages):
                print ("Failed to install packages. Not continuing")
                ExitWithStatus(-1)

        if not CheckRootFSInstallStatus():
            print ("RootFS not found. Running FEXRootFSFetcher to get rootfs")
            if not TryInstallRootFS():
                print ("Failed to install RootFS. Not continuing")
                ExitWithStatus(-1)

    print ("FEX is now installed. Trying basic program run")
    if not TryBasicProgramExecution():
        print ("FEX failed to run. Not continuing")
        ExitWithStatus(-1)

    print ("")
    print ("===================================================")
    print ("FEX test run executed. You should be set to run FEX")
    print ("===================================================")
    print ("Usage examples:")
    print ("# steam is a bash script. Wrap with FEXBash")
    print ("\tFEXBash steam")
    print ("# Full path execution execution will wrap the application if it exists in the rootfs")
    print ("\tFEX /usr/bin/uname")
    print ("# Freestanding x86/x86-64 programs can be executed directly. binfmt_misc will redirect to FEX")
    print ("\t$HOME/PetalCrashOnline.AppImage")
    print ("# If you need a terminal that emulates everything.")
    print ("# Run FEXBash without arguments. Double check uname to see if running under FEX")
    print ("\tFEXBash")

    ExitWithStatus(0)

if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: Scripts/InstructionCountParser.py
================================================
#!/usr/bin/python3
import base64
from dataclasses import dataclass
from enum import Flag
import json
import struct
import sys
import subprocess
import os
import logging
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

@dataclass
class TestData:
    name: str
    expectedinstructioncount: int
    code: bytes
    instructions: list
    def __init__(self, Name, ExpectedInstructionCount, Code, Instructions):
        self.name = Name
        self.expectedinstructioncount = ExpectedInstructionCount
        self.code = Code
        self.instructions = Instructions

    @property
    def Name(self):
        return self.name

    @property
    def ExpectedInstructionCount(self):
        return self.expectedinstructioncount

    @property
    def Code(self):
        return self.code

    @property
    def Instructions(self):
        return self.instructions

TestDataMap = {}
class HostFeatures(Flag) :
    FEATURE_ANY    = 0
    FEATURE_SVE128 = (1 << 0)
    FEATURE_SVE256 = (1 << 1)
    FEATURE_CLZERO = (1 << 2)
    FEATURE_RNG    = (1 << 3)
    FEATURE_FCMA   = (1 << 4)
    FEATURE_CSSC   = (1 << 5)
    FEATURE_AFP    = (1 << 6)
    FEATURE_RPRES  = (1 << 7)
    FEATURE_FLAGM  = (1 << 8)
    FEATURE_FLAGM2 = (1 << 9)
    FEATURE_CRYPTO = (1 << 10)
    FEATURE_AES256 = (1 << 11)
    FEATURE_SVEBITPERM = (1 << 12)
    FEATURE_TSO    = (1 << 13)
    FEATURE_LRCPC  = (1 << 14)
    FEATURE_LRCPC2 = (1 << 15)
    FEATURE_FRINTTS = (1 << 16)
    FEATURE_MOPS   = (1 << 17)

HostFeaturesLookup = {
    "SVE128"  : HostFeatures.FEATURE_SVE128,
    "SVE256"  : HostFeatures.FEATURE_SVE256,
    "CLZERO"  : HostFeatures.FEATURE_CLZERO,
    "RNG"     : HostFeatures.FEATURE_RNG,
    "FCMA"    : HostFeatures.FEATURE_FCMA,
    "CSSC"    : HostFeatures.FEATURE_CSSC,
    "AFP"     : HostFeatures.FEATURE_AFP,
    "RPRES"   : HostFeatures.FEATURE_RPRES,
    "FLAGM"   : HostFeatures.FEATURE_FLAGM,
    "FLAGM2"  : HostFeatures.FEATURE_FLAGM2,
    "CRYPTO"  : HostFeatures.FEATURE_CRYPTO,
    "AES256"  : HostFeatures.FEATURE_AES256,
    "SVEBITPERM" : HostFeatures.FEATURE_SVEBITPERM,
    "TSO" : HostFeatures.FEATURE_TSO,
    "LRCPC" : HostFeatures.FEATURE_LRCPC,
    "LRCPC2" : HostFeatures.FEATURE_LRCPC2,
    "FRINTTS" : HostFeatures.FEATURE_FRINTTS,
    "MOPS"    : HostFeatures.FEATURE_MOPS,
}

def GetHostFeatures(data):
    HostFeaturesData = HostFeatures.FEATURE_ANY
    if not (type(data) is list):
        sys.exit("Features value must be list of features")

    for data_key in data:
        data_key = data_key.upper()
        if not (data_key in HostFeaturesLookup):
            sys.exit("Invalid host feature")

        HostFeaturesData |= HostFeaturesLookup[data_key]
    return HostFeaturesData

def parse_json_data(json_filepath, json_filename, json_data, output_binary_path):
    Bitness = 64
    EnabledHostFeatures = HostFeatures.FEATURE_ANY
    DisabledHostFeatures = HostFeatures.FEATURE_ANY
    OptionEnvironmentVariables = {}

    if "Features" in json_data:
        items = json_data["Features"]
        if ("Bitness" in items):
            Bitness = int(items["Bitness"])

        if ("EnabledHostFeatures" in items):
            EnabledHostFeatures = GetHostFeatures(items["EnabledHostFeatures"])

        if ("DisabledHostFeatures" in items):
            DisabledHostFeatures = GetHostFeatures(items["DisabledHostFeatures"])

        if ("Env" in items):
            data = items["Env"]
            if not (type(data) is dict):
                sys.exit("Environment variables value must be list of key:value pairs")

            for data_key, data_val in data.items():
                OptionEnvironmentVariables[data_key] = data_val

    for key, items in json_data["Instructions"].items():
        ExpectedInstructionCount = 0
        Instructions = []
        if ("ExpectedInstructionCount" in items):
            ExpectedInstructionCount = int(items["ExpectedInstructionCount"])

        if ("Skip" in items):
                if items["Skip"].upper() == "YES":
                    continue

        if "x86Insts" in items:
            Instructions = items["x86Insts"]
        else:
            # No list of instructions, only one which is the key.
            Instructions.append(key)
        TestName = base64.b64encode("{}.{}.{}".format(str(hash(json_filepath)), json_filename, key).encode("ascii")).decode("ascii")
        tmp_asm = "/tmp/{}.asm".format(TestName)
        tmp_asm_out = "/tmp/{}.asm.o".format(TestName)
        logging.info("'{}' -> '{}' -> '{}'".format(key, tmp_asm, tmp_asm_out))

        if TestName in TestDataMap:
            sys.exit("Duplicate test name {} in tests".format(TestName))

        with open(tmp_asm, "w") as tmp_asm_file:
            tmp_asm_file.write("BITS {};\n".format(Bitness))
            for Inst in Instructions:
                tmp_asm_file.write("{}\n".format(Inst))

        Process = subprocess.Popen(["nasm", tmp_asm, "-o", tmp_asm_out])
        Process.wait()
        ResultCode = Process.returncode

        if ResultCode != 0:
            os.remove(tmp_asm)
            logging.error("Nasm failed to execute")
            logging.error("Couldn't compile: '{}'".format(key))
            return ResultCode

        if not os.path.exists(tmp_asm_out):
            logging.error("Nasm didn't emit code?")
            os.remove(tmp_asm)
            return 1

        logging.info("Generated asm file")

        with open(tmp_asm_out, "rb") as tmp_asm_out_file:
            binary_hex = tmp_asm_out_file.read()

        TestDataMap[TestName] = TestData(key, ExpectedInstructionCount, binary_hex, Instructions)

        os.remove(tmp_asm)
        os.remove(tmp_asm_out)

        # Output the test data as follows
        # struct TestInfo;
        # struct DataHeader {
        #   uint64_t Bitness;
        #   uint64_t NumTests;
        #   uint64_t EnabledHostFeatures;
        #   uint64_t DisabledHostFeatures;
        #   uint64_t EnvironmentVariableCount;
        #   char env[];
        #   TestInfo Tests[NumTests];
        # };
        # struct TestInfo {
        #   char InstName[128];
        #   int64_t ExpectedInstructionCount;
        #   uint64_t CodeSize;
        #   uint64_t x86InstCount;
        #   uint32_t Cookie;
        #   uint8_t Code[CodeSize];
        # };

    MemData = bytes()

    # Add the header
    MemData += struct.pack('Q', Bitness)
    MemData += struct.pack('Q', len(TestDataMap))
    MemData += struct.pack('Q', EnabledHostFeatures.value)
    MemData += struct.pack('Q', DisabledHostFeatures.value)
    MemData += struct.pack('Q', len(OptionEnvironmentVariables.items()))

    # Write environment variables
    for key, val in OptionEnvironmentVariables.items():
        MemData += key.encode()
        MemData += struct.pack('B', 0)
        MemData += val.encode()
        MemData += struct.pack('B', 0)

    # Add each test
    for key, item in TestDataMap.items():
        MemData += struct.pack('128s', item.Name.encode("ascii"))
        MemData += struct.pack('q', item.ExpectedInstructionCount)
        MemData += struct.pack('Q', len(item.Code))
        MemData += struct.pack('Q', len(item.Instructions))
        MemData += struct.pack('I', 0x41424344)
        MemData += item.Code

    logging.info("Code goign to {}".format(output_binary_path))
    with open(output_binary_path, "wb") as output_binary_file:
        output_binary_file.write(MemData)

    return 0

def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    if (len(sys.argv) < 3):
        logging.critical ("usage: %s <PerformanceTests.json> <output_folder>" % (sys.argv[0]))

    json_path = sys.argv[1]
    output_binary_path = sys.argv[2]

    try:
        with open(json_path) as json_file:
            json_text = json_file.read()
    except IOError:
        logging.error("IOError!")
        return 1

    try:
        json_data = json.loads(json_text)
        if not isinstance(json_data, dict):
            raise TypeError('JSON data must be a dict')

        return parse_json_data(json_path, os.path.basename(json_path), json_data, output_binary_path)

    except ValueError as ve:
        logging.error(f'JSON error: {ve}')

        return 1

    return 0

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/NeedDisabledSVE.py
================================================
#!/usr/bin/python3

# Qualcomm in their infinite wisdom decided to disable SVE in a handful of SoCs.
# When compiling for a specific CPU architecture or `-mcpu=native`, we need to ensure
# that SVE is disabled on these platforms that had the feature disabled.
# Check for the handful of Cortex CPUs that support SVE in hardware, but are disabled
# in software.
import re
import sys

def GetCPUFeatures():
    File = open("/proc/cpuinfo", "r")
    Lines = File.readlines()
    File.close()

    for Line in Lines:
        if "Features" in Line:
            Features = Line.split(":")[1].strip().split(" ")
            return Features

SnapdragonIDsWithDisabledSVE = {
    # Snapdragon 8 Gen 3
    tuple([0x41, 0xd82]): True, # Cortex-X4
    tuple([0x41, 0xd81]): True, # Cortex-A720
    tuple([0x41, 0xd80]): True, # Cortex-A520

    # Snapdragon 8 Gen 2
    tuple([0x41, 0xd4e]): True, # Cortex-X3
    tuple([0x41, 0xd4d]): True, # Cortex-A715
    tuple([0x41, 0xd47]): True, # Cortex-A710
    tuple([0x41, 0xd46]): True, # Cortex-A510

    # Snapdragon 8 Gen 1
    tuple([0x41, 0xd48]): True, # Cortex-X2
    # A710
    # A510
}

def IsAffectedSnapdragon():
    cpuinfo = []

    with open("/proc/cpuinfo") as cpuinfo_file:
        current_implementer = 0
        current_part = 0
        for line in cpuinfo_file:
            line = line.strip()
            if "CPU implementer" in line:
                current_implementer = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
            if "CPU part" in line:
                current_part = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
                cpuinfo += {tuple([current_implementer, current_part])}

    for core in cpuinfo:
        if SnapdragonIDsWithDisabledSVE.get(core):
            return True

    return False


def main():
    Features = GetCPUFeatures()

    # If SVE is reported from cpuinfo just return.
    if "sve" in Features:
        return 0

    if IsAffectedSnapdragon():
        return 1

    return 0

if __name__ == "__main__":
    sys.exit(main())


================================================
FILE: Scripts/StructPackVerifier.py
================================================
#!/usr/bin/python3
import clang.cindex
from clang.cindex import CursorKind
from clang.cindex import TypeKind
from clang.cindex import TranslationUnit
import sys
from dataclasses import dataclass
import subprocess
import logging
logger = logging.getLogger()
logger.setLevel(logging.WARNING)

# These defines are temporarily defined since python3-clang doesn't yet support these.
# Once this tool gets switched over to C++ then this won't be an issue.
# Type definitions redeclared from `clang/include/clang-c/Index.h`

# Expression that references a C++20 concept.
CursorKind.CONCEPTSPECIALIZATIONEXPR = CursorKind(153),

# Expression that references a C++20 concept.
CursorKind.REQUIRESEXPR = CursorKind(154),

# C++2a std::bit_cast expression.
CursorKind.BUILTINBITCASTEXPR = CursorKind(280)

try:
    # a concept declaration.
    CursorKind.CONCEPTDECL = CursorKind(604),
except:
    pass

@dataclass
class TypeDefinition:
    TYPE_UNKNOWN = 0
    TYPE_STRUCT = 1
    TYPE_UNION = 2
    TYPE_FIELD = 3
    TYPE_VARDECL = 4

    name: str
    type: int
    def __init__(self, Name, Type):
        self.name = Name
        self.type = Type

    @property
    def Name(self):
        return self.name
    @property
    def Type(self):
        return self.type

@dataclass
class AliasType:
    ALIAS_X86_32  = 0
    ALIAS_X86_64  = 1
    ALIAS_AARCH64 = 2
    ALIAS_WIN32   = 3
    ALIAS_WIN64   = 4
    Name: str
    AliasType: int
    def __init__(self, Name, Type):
        self.Name = Name
        self.AliasType = Type

@dataclass
class StructDefinition(TypeDefinition):
    Size: int
    Aliases: list
    Members: list
    ExpectFEXMatch: bool

    def __init__(self, Name, Size):
        super(StructDefinition, self).__init__(Name, TypeDefinition.TYPE_STRUCT)
        self.Size = Size
        self.Aliases = []
        self.Members = []
        self.ExpectFEXMatch = False

@dataclass
class UnionDefinition(TypeDefinition):
    Size: int
    Aliases: list
    Members: list
    ExpectFEXMatch: bool

    def __init__(self, Name, Size):
        super(UnionDefinition, self).__init__(Name, TypeDefinition.TYPE_UNION)
        self.Size = Size
        self.Aliases = []
        self.Members = []
        self.ExpectFEXMatch = False

@dataclass
class FieldDefinition(TypeDefinition):
    Size: int
    OffsetOf: int
    Alignment: int
    def __init__(self, Name, Size, OffsetOf, Alignment):
        super(FieldDefinition, self).__init__(Name, TypeDefinition.TYPE_FIELD)
        self.Size = Size
        self.OffsetOf = OffsetOf
        self.Alignment = Alignment

@dataclass
class VarDeclDefinition(TypeDefinition):
    Size: int
    Aliases: list
    ExpectFEXMatch: bool
    Value: str

    def __init__(self, Name, Size):
        super(VarDeclDefinition, self).__init__(Name, TypeDefinition.TYPE_VARDECL)
        self.Size = Size
        self.Aliases = []
        self.ExpectFEXMatch = False

@dataclass
class ArchDB:
    Parsed: bool
    ArchName: str
    NamespaceScope: list
    CurrentNamespace: str
    TU: TranslationUnit
    Structs: dict
    Unions: dict
    VarDecls: dict
    FieldDecls: list
    def __init__(self, ArchName):
        self.Parsed = True
        self.ArchName = ArchName
        self.NamespaceScope = []
        self.CurrentNamespace = ""
        self.TU = None
        self.Structs = {}
        self.Unions = {}
        self.VarDecls = {}
        self.FieldDecls = []

class DBList:
    DBs: list
    def __init__(self, DB32, DB64, DBAArch64, DBWin32, DBWin64):
        self.DBs = [DB32, DB64, DBAArch64, DBWin32, DBWin64]

def FindClangArguments(OriginalArguments):
    AddedArguments = ["clang"]
    AddedArguments.extend(OriginalArguments)
    AddedArguments.extend(["-v", "-x", "c++", "-S", "-"])
    Proc = subprocess.Popen(AddedArguments, stderr = subprocess.PIPE, stdin = subprocess.DEVNULL)
    NewIncludes = []
    BeginSearch = False
    while True:
        Line = Proc.stderr.readline().strip()

        if not Line:
            Proc.terminate()
            break

        if (Line == b"End of search list."):
            BeginSearch = False
            Proc.terminate()
            break

        if (BeginSearch == True):
            NewIncludes.append("-I" + Line.decode('ascii'))

        if (Line == b"#include <...> search starts here:"):
            BeginSearch = True

    # Add back original arguments
    NewIncludes.extend(OriginalArguments)
    return NewIncludes

def SetNamespace(Arch):
    Arch.CurrentNamespace = ""
    for Namespace in Arch.NamespaceScope:
        Arch.CurrentNamespace = Arch.CurrentNamespace + Namespace + "::"

def HandleStructDeclCursor(Arch, Cursor, NameOverride = ""):
    # Append namespace
    CursorName = ""
    StructType = Cursor.type
    if (len(StructType.spelling) == 0):
        CursorName = NameOverride
    else:
        CursorName = StructType.spelling

    if (len(CursorName) != 0):
        Arch.NamespaceScope.append(CursorName)
        SetNamespace(Arch)

    Struct = StructDefinition(
        Name = CursorName,
        Size = StructType.get_size())

    # Handle children
    Arch.Structs[Struct.Name] = HandleStructElements(Arch, Struct, Cursor)

    # Pop namespace off
    if (len(CursorName) != 0):
        Arch.NamespaceScope.pop()
        SetNamespace(Arch)

    return Arch

def HandleUnionDeclCursor(Arch, Cursor, NameOverride = ""):
    # Append namespace
    CursorName = ""

    if (len(Cursor.spelling) == 0):
        CursorName = NameOverride
    else:
        CursorName = Cursor.spelling

    if (len(CursorName) != 0):
        Arch.NamespaceScope.append(CursorName)
        SetNamespace(Arch)

    UnionType = Cursor.type
    Union = UnionDefinition(
        Name = CursorName,
        Size = UnionType.get_size())
    Arch.Unions[Union.Name] = Union

    # Handle children
    Arch.Unions[Union.Name] = HandleStructElements(Arch, Union, Cursor)

    # Pop namespace off
    if (len(CursorName) != 0):
        Arch.NamespaceScope.pop()
        SetNamespace(Arch)

    return Arch

def HandleVarDeclCursor(Arch, Cursor):
    CursorName = Cursor.spelling
    DeclType = Cursor.type
    Def = Cursor.get_definition()

    VarDecl = VarDeclDefinition(
        Name = CursorName,
        Size = DeclType.get_size())
    Arch.VarDecls[VarDecl.Name] = HandleVarDeclElements(Arch, VarDecl, Cursor)
    return Arch

def HandleVarDeclElements(Arch, VarDecl, Cursor):
    for Child in Cursor.get_children():

        if (Child.kind == CursorKind.ANNOTATE_ATTR):
            if (Child.spelling.startswith("ioctl-alias-")):
                Sections = Child.spelling.split("-")
                if (Sections[2] == "x86_32"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_X86_32))
                elif (Sections[2] == "x86_64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_X86_64))
                elif (Sections[2] == "aarch64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_AARCH64))
                elif (Sections[2] == "win32"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_WIN32))
                elif (Sections[2] == "win64"):
                    VarDecl.Aliases.append(AliasType(Sections[3], AliasType.ALIAS_WIN64))
                else:
                    logging.critical ("Can't handle alias type '{0}'".format(Child.spelling))
                    Arch.Parsed = False
            elif (Child.spelling == "fex-match"):
                VarDecl.ExpectFEXMatch = True
            else:
                # Unknown annotation
                pass
        elif (Child.kind == CursorKind.TYPE_REF or
              Child.kind == CursorKind.UNEXPOSED_EXPR or
              Child.kind == CursorKind.PAREN_EXPR or
              Child.kind == CursorKind.BINARY_OPERATOR
              ):
              pass

    return VarDecl


def HandleTypeDefDeclCursor(Arch, Cursor):
    TypeDefType = Cursor.underlying_typedef_type
    CanonicalType = TypeDefType.get_canonical()

    TypeDefName = Cursor.type.get_typedef_name()

    if (TypeDefType.kind == TypeKind.ELABORATED and CanonicalType.kind == TypeKind.RECORD):
        if (len(TypeDefName) != 0):
            HandleTypeDefDecl(Arch, Cursor, TypeDefName)

            # Append namespace
            Arch.NamespaceScope.append(TypeDefName)
            SetNamespace(Arch)

            Arch = HandleCursor(Arch, Cursor)
            #StructType = Cursor.type
            #Struct = StructDefinition(
            #    Name = TypeDefName,
            #    Size = CanonicalType.get_size())
            #Arch.Structs[TypeDefName] = Struct

            ## Handle children
            #Arch.Structs[TypeDefName] = HandleStructElements(Arch, Struct, Cursor)

            # Pop namespace off
            Arch.NamespaceScope.pop()
            SetNamespace(Arch)
    else:
        if (len(TypeDefName) != 0):
            Def = Cursor.get_definition()

            VarDecl = VarDeclDefinition(
                Name = TypeDefName,
                Size = CanonicalType.get_size())
            Arch.VarDecls[VarDecl.Name] = HandleVarDeclElements(Arch, VarDecl, Cursor)

    return Arch

def HandleStructElements(Arch, Struct, Cursor):
    for Child in Cursor.get_children():
        # logging.info ("\t\tStruct/Union Children: Cursor \"{0}{1}\" of kind {2}".format(Arch.CurrentNamespace, Child.spelling, Child.kind))
        if (Child.kind == CursorKind.ANNOTATE_ATTR):
            if (Child.spelling.startswith("alias-")):
                Sections = Child.spelling.split("-")
                if (Sections[1] == "x86_32"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_X86_32))
                elif (Sections[1] == "x86_64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_X86_64))
                elif (Sections[1] == "aarch64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_AARCH64))
                elif (Sections[1] == "win32"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_WIN32))
                elif (Sections[1] == "win64"):
                    Struct.Aliases.append(AliasType(Sections[2], AliasType.ALIAS_WIN64))
                else:
                    logging.critical ("Can't handle alias type '{0}'".format(Child.spelling))
                    Arch.Parsed = False

            elif (Child.spelling == "fex-match"):
                Struct.ExpectFEXMatch = True
            else:
                # Unknown annotation
                pass
        elif (Child.kind == CursorKind.FIELD_DECL):
            ParentType = Cursor.type
            FieldType = Child.type
            Field = FieldDefinition(
                Name = Child.spelling,
                Size = FieldType.get_size(),
                OffsetOf = ParentType.get_offset(Child.spelling),
                Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            Struct.Members.append(Field)
            Arch.FieldDecls.append(Field)
        elif (Child.kind == CursorKind.STRUCT_DECL):
            ParentType = Cursor.type
            FieldType = Child.type
            Field = FieldDefinition(
                Name = Child.spelling,
                Size = FieldType.get_size(),
                OffsetOf = ParentType.get_offset(Child.spelling),
                Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            Struct.Members.append(Field)
            Arch.FieldDecls.append(Field)
            Arch = HandleStructDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.UNION_DECL):
            Struct = HandleStructElements(Arch, Struct, Child)
            #ParentType = Cursor.type
            #FieldType = Child.type
            #Field = FieldDefinition(
            #    Name = Child.spelling,
            #    Size = FieldType.get_size(),
            #    OffsetOf = ParentType.get_offset(Child.spelling),
            #    Alignment = FieldType.get_align())

            #logging.info ("\t{0}".format(Child.spelling))
            #logging.info ("\t\tSize of type: {0}".format(FieldType.get_size()));
            #logging.info ("\t\tAlignment of type: {0}".format(FieldType.get_align()));
            #logging.info ("\t\tOffsetof of type: {0}".format(ParentType.get_offset(Child.spelling)));
            #Struct.Members.append(Field)
            #Arch.FieldDecls.append(Field)
            #Arch = HandleUnionDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        else:
            Arch = HandleCursor(Arch, Child)

    return Struct

def HandleTypeDefDecl(Arch, Cursor, Name):
    for Child in Cursor.get_children():
        if (Child.kind == CursorKind.UNION_DECL):
            pass
        elif (Child.kind == CursorKind.STRUCT_DECL):
            Arch = HandleStructDeclCursor(Arch, Child, Name)
        elif (Child.kind == CursorKind.UNION_DECL):
            Arch = HandleUnionDeclCursor(Arch, Child, Name)
        elif (Child.kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        elif (Child.kind == CursorKind.TYPE_REF or
              Child.kind == CursorKind.NAMESPACE_REF or
              Child.kind == CursorKind.TEMPLATE_REF or
              Child.kind == CursorKind.ALIGNED_ATTR):
            # Safe to pass on
            pass
        else:
            logging.critical ("Unhandled TypedefDecl {0}-{1}-{2}".format(Child.kind, Child.type.spelling, Child.spelling))

def HandleCursor(Arch, Cursor):
    if (Cursor.kind.is_invalid()):
        Diags = TU.diagnostics
        for Diag in Diags:
            logging.warning (Diag.format())

        Arch.Parsed = False
        return

    for Child in Cursor.get_children():
        kind = Child.kind
        if (kind == CursorKind.TRANSLATION_UNIT):
            Arch = HandleCursor(Arch, Child)
        elif (kind == CursorKind.FIELD_DECL):
            pass
        elif (kind == CursorKind.UNION_DECL):
            Arch = HandleUnionDeclCursor(Arch, Child)
        elif (kind == CursorKind.STRUCT_DECL):
            Arch = HandleStructDeclCursor(Arch, Child)
        elif (kind == CursorKind.TYPEDEF_DECL):
            Arch = HandleTypeDefDeclCursor(Arch, Child)
        elif (kind == CursorKind.VAR_DECL):
            Arch = HandleVarDeclCursor(Arch, Child)
        elif (kind == CursorKind.NAMESPACE):
            # Append namespace
            Arch.NamespaceScope.append(Child.spelling)
            SetNamespace(Arch)

            # Handle children
            Arch = HandleCursor(Arch, Child)

            # Pop namespace off
            Arch.NamespaceScope.pop()
            SetNamespace(Arch)
        elif (kind == CursorKind.TYPE_REF):
            # Safe to pass on
            pass
        else:
            Arch = HandleCursor(Arch, Child)

    return Arch

def GetDB(Arch, filename, args):
    Index = clang.cindex.Index.create()
    try:
        TU = Index.parse(filename, args=args, options=TranslationUnit.PARSE_INCOMPLETE)
    except TranslationUnitLoadError:
        Arch.Parsed = False
        Diags = TU.diagnostics
        for Diag in Diags:
            logging.warning (Diag.format())

        return

    Arch.TU = TU
    HandleCursor(Arch, TU.cursor)

    # Get diagnostics
    Diags = TU.diagnostics
    if (len(Diags) != 0):
        logging.warning ("Diagnostics from Arch: {0}".format(Arch.ArchName))

    for Diag in Diags:
        logging.warning (Diag.format())

    return Arch

def GetCompar(ComparisonName, DBs):
    if (ComparisonName.lower() == "x86_32"):
        return DBs.DBs[AliasType.ALIAS_X86_32]
    elif (ComparisonName.lower() == "x86_64"):
        return DBs.DBs[AliasType.ALIAS_X86_64]
    elif (ComparisonName.lower() == "win32"):
        return DBs.DBs[AliasType.ALIAS_WIN32]
    elif (ComparisonName.lower() == "win64"):
        return DBs.DBs[AliasType.ALIAS_WIN64]
    elif (ComparisonName.lower() == "aarch64"):
        return DBs.DBs[AliasType.ALIAS_AARCH64]

def PrintMissingMembers(Struct1, Struct2):
    for Member1 in Struct1.Members:
        WasMissing = True
        for Member2 in Struct2.Members:
            if (Member1.Name == Member2.Name):
                WasMissing = False
                break
        if (WasMissing):
            logging.error ("\t'{0}' member '{1}' Doesn't exist in '{2}'".format(Struct1.Name, Member1.Name, Struct2.Name));

def CompareStructs(Struct1, Struct2):
    HadWarning = False
    HadError = False

    # Check if the struct size is a mismatch
    if (Struct1.Size != Struct2.Size):
        logging.warning ("\t#### Warning: Struct size mismatch. {0} != {1}".format(Struct1.Size, Struct2.Size))
        logging.warning ("\t\tMight not be a problem if struct isn't in used inside another struct, or end of object")
        HadWarning = True

    # Check if the number of members differ
    if (len(Struct1.Members) != len(Struct2.Members)):
        logging.error ("@@@@ ERROR: Struct fields mismatch! Number of fields don't match! {0} != {1}".format(len(Struct1.Members), len(Struct2.Members)));
        PrintMissingMembers(Struct1, Struct2)
        PrintMissingMembers(Struct2, Struct1)
        HadError = True
    else:
        # Compare the members themselves
        for StructMemberIndex in range(0, len(Struct1.Members)):
            Member1 = Struct1.Members[StructMemberIndex]
            Member2 = Struct2.Members[StructMemberIndex]
            if (Member1.Type == TypeDefinition.TYPE_FIELD):
                if (Member1.Size != Member2.Size):
                    logging.error ("\t@@@@ ERROR: Member '{0}' mismatch Size! {1} != {2}".format(Member1.Name, Member1.Size, Member2.Size));
                    HadError = True
                if (Member1.OffsetOf != Member2.OffsetOf):
                    logging.error ("\t@@@@ ERROR: Member '{0}' mismatch OffsetOf! {1} != {2}".format(Member1.Name, Member1.OffsetOf, Member2.OffsetOf));
                    HadError = True
                if (Member1.Alignment != Member2.Alignment):
                    logging.error ("\t@@@@ ERROR: Member '{0}' mismatch Alignment! {1} != {2}".format(Member1.Name, Member1.Alignment, Member2.Alignment));
                    logging.error ("\t\tProbably not a problem if offset and size matches");
                    HadWarning = True
            else:
                logging.critical ("Oops, didn't handle member type {0}".format(Member1.Type))
        pass

    return not (HadWarning or HadError)

def CompareAliases(DB, DBs):
    Passed = True
    for StructKey, StructDef in DB.Structs.items():
        if (len(StructKey) == 0):
            # XXX: Oops, shouldn't have anonymous structs
            continue

        if (len(StructDef.Aliases) != 0):
            logging.info ("Comparing Aliases {0}".format(StructDef.Name))

        for Alias in StructDef.Aliases:
            OtherDB = DBs.DBs[Alias.AliasType]
            OtherStruct = OtherDB.Structs.get(Alias.Name)
            if (OtherStruct == None):
                logging.critical ("Couldn't find alias {0} in {1} DB".format(Alias.Name, OtherDB.ArchName))
                Passed = False
                continue

            ThisAlias = CompareStructs(StructDef, OtherStruct)
            if not (ThisAlias):
                logging.error ("Couldn't Alias to Arch {0} successfully".format(OtherDB.ArchName))
            Passed &= ThisAlias

    for VarDeclKey, VarDecl in DB.VarDecls.items():
        if (len(VarDeclKey) == 0):
            # XXX: Oops, shouldn't have anonymous vardecls
            continue


        for Alias in VarDecl.Aliases:
            OtherDB = DBs.DBs[Alias.AliasType]
            OtherAlias = OtherDB.VarDecls.get(Alias.Name)
            if (OtherAlias == None):
                logging.critical ("Couldn't find alias {0} in {1} DB".format(Alias.Name, OtherDB.ArchName))
                Passed = False
                continue

            if (VarDecl.Size != OtherAlias.Size):
                logging.critical("VarDecl: {0}/{1} didn't match {2}/{3}: {4:08X} != {5:08X}".format(VarDeclKey, DB.ArchName, Alias.Name,
                    OtherDB.ArchName,
                    VarDecl.Size, OtherAlias.Size))
                Passed = False
                continue


    return Passed

def CompareCrossArch(DB1, DB2):
    Passed = True
    for StructKey, StructDef in DB1.Structs.items():
        if (len(StructKey) == 0):
            # XXX: Oops, shouldn't have anonymous structs
            continue

        logging.info ("Comparing crossArch {0}".format(StructDef.Name))
        if (StructDef.ExpectFEXMatch):
            Struct2 = DB2.Structs.get(StructDef.Name)
            if (Struct2 == None):
                logging.critical ("Couldn't find Struct {0} in {1} DB".format(StructDef.Name, DB2.ArchName))
                Passed = False
                continue

            Passed &= CompareStructs(StructDef, Struct2)

    return Passed

def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    if (len(sys.argv) < 2):
        print ("usage: %s <options> <Header.hpp> <clang arguments...>" % (sys.argv[0]))
        print ("\t-c1 <Type1>: Base Comparison Type");
        print ("\t-c2 <Type2>: Second Comparison Type");
        print ("\t-win: Parse Windows");
        sys.exit ("\t-no-linux: Do not parse Linux");

    ParseLinux = True
    ParseWindows = False

    Header = ""
    Comparison1 = ""
    Comparison2 = ""
    BaseArgs = []

    StartOfArgs = 0

    # Parse our arguments
    ArgIndex = 1
    while ArgIndex < len(sys.argv):
        Arg = sys.argv[ArgIndex]
        if (Arg == "--"):
            StartOfArgs = ArgIndex + 1
            break;

        if (Arg == "-c1"):
            ArgIndex += 1
            Comparison1 = sys.argv[ArgIndex]
        elif (Arg == "-c2"):
            ArgIndex += 1
            Comparison2 = sys.argv[ArgIndex]
        elif (Arg == "-win"):
            ParseWindows = True
        elif (Arg == "-no-linux"):
           ParseLinux = False
        else:
            Header = Arg
            StartOfArgs = ArgIndex + 1
            break

        # Increment
        ArgIndex += 1

    # Add arguments for clang
    for ArgIndex in range(StartOfArgs, len(sys.argv)):
        BaseArgs.append(sys.argv[ArgIndex])

    args_x86_32 = [
        "-isystem", "/usr/i686-linux-gnu/include",
        "-O2",
        "-m32",
        "--target=i686-linux-unknown",
    ]

    args_x86_64 = [
        "-isystem", "/usr/x86_64-linux-gnu/include",
        "-O2",
        "--target=x86_64-linux-unknown",
        "-DARCHITECTURE_x86_64",
    ]

    args_aarch64 = [
        "-isystem", "/usr/aarch64-linux-gnu/include",
        "-O2",
        "--target=aarch64-linux-unknown",
        "-DARCHITECTURE_arm64",
    ]

    args_x86_win32 = [
        "-I/usr/lib/gcc/i686-w64-mingw32/10-win32/include/c++/",
        "-I/usr/lib/gcc/i686-w64-mingw32/10-win32/include/c++/i686-w64-mingw32/",
        "-O2",
        "-m32",
        "--target=i686-pc-win32",
    ]

    args_x86_win64 = [
        "-I/usr/lib/gcc/x86_64-w64-mingw32/10-win32/include/c++/",
        "-I/usr/lib/gcc/x86_64-w64-mingw32/10-win32/include/c++/x86_64-w64-mingw32/",
        "-O2",
        "--target=x86_64-pc-win32",
    ]

    # Add all the arguments to the different lists
    args_x86_32.extend(BaseArgs)
    args_x86_64.extend(BaseArgs)
    args_aarch64.extend(BaseArgs)
    args_x86_win32.extend(BaseArgs)
    args_x86_win64.extend(BaseArgs)

    # We need to find the default arguments through clang invocations
    args_x86_32 = FindClangArguments(args_x86_32)
    args_x86_64 = FindClangArguments(args_x86_64)
    args_aarch64 = FindClangArguments(args_aarch64)

    args_x86_win32 = FindClangArguments(args_x86_win32)
    args_x86_win64 = FindClangArguments(args_x86_win64)

    Arch_x86_32 = ArchDB("x86_32")
    Arch_x86_64 = ArchDB("x86_64")

    Arch_aarch64 = ArchDB("aarch64")
    Arch_x86_win32 = ArchDB("win32")
    Arch_x86_win64 = ArchDB("win64")

    if (ParseLinux):
        Arch_x86_32 = GetDB(Arch_x86_32, Header, args_x86_32)
        Arch_x86_64 = GetDB(Arch_x86_64, Header, args_x86_64)
        Arch_aarch64 = GetDB(Arch_aarch64, Header, args_aarch64)

        if not (Arch_x86_32.Parsed):
            logging.critical ("Couldn't parse:{0}".format(Arch_x86_32.ArchName))

        if not (Arch_x86_64.Parsed):
            logging.critical ("Couldn't parse:{0}".format(Arch_x86_64.ArchName))

        if not (Arch_aarch64.Parsed):
            logging.critical ("Couldn't parse:{0}".format(Arch_aarch64.ArchName))

    if (ParseWindows):
        Arch_x86_win32 = GetDB(Arch_x86_win32, Header, args_x86_win32)
        Arch_x86_win64 = GetDB(Arch_x86_win64, Header, args_x86_win64)

        if not (Arch_x86_win32.Parsed):
            logging.critical ("Couldn't parse:{0}".format(Arch_x86_win32.ArchName))

        if not (Arch_x86_win64.Parsed):
            logging.critical ("Couldn't parse:{0}".format(Arch_x86_win64.ArchName))

    DBs = DBList(Arch_x86_32,
        Arch_x86_64,
        Arch_aarch64,
        Arch_x86_win32,
        Arch_x86_win64)

    Result = 0
    if (len(Comparison1) != 0 and len(Comparison2) != 0):
        CompDB1 = GetCompar(Comparison1, DBs)
        CompDB2 = GetCompar(Comparison2, DBs)

        # Now compare across the two compared architectures
        Result = 0 if CompareCrossArch(CompDB1, CompDB2) else 1
    elif (len(Comparison1) != 0):
        CompDB1 = GetCompar(Comparison1, DBs)

        # First compare the aliases to make sure we are matching
        Result = 0 if CompareAliases(CompDB1, DBs) else 1

    if (Result == 1):
        logging.error("Execution environment")
        Args = "[ "
        for Arg in sys.argv:
            Args += Arg + ", "
        Args += " ]"
        logging.error(Args)
        Args = ""
        for Arg in sys.argv:
            Args += "\"" + Arg + "\" "

        logging.error(Args)
    return Result

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/Threaded_Lockstep_Runner.py
================================================
#!/usr/bin/python3
import os
import sys
import glob
from threading import Thread
import subprocess
import multiprocessing
from shutil import which

if sys.version_info[0] < 3:
        raise Exception("Python 3 or a more recent version is required.")

if (len(sys.argv) < 3):
    sys.exit("We need two arguments. Location of LockStepRunner and folder containing the tests")

# Remove our SHM regions if they still exist
SHM_Files = glob.glob("/dev/shm/*_Lockstep")
for file in SHM_Files:
    os.remove(file)

UnitTests = sorted(glob.glob(sys.argv[2] + "*"))
UnitTestsSize = len(UnitTests)
Threads = [None] * UnitTestsSize
Results = [None] * UnitTestsSize
ThreadResults = [[None] * 2] * UnitTestsSize
MaxFileNameStringLen = 0

def Threaded_Runner(Args, ID, Client):
    Log = open("Log_" + str(ID) + "_" + str(Client), "w")
    Log.write("Args: %s\n" % " ".join(Args))
    Log.flush()
    Process = subprocess.Popen(Args, stdout=Log, stderr=Log)
    Process.wait()
    Log.flush()
    ThreadResults[ID][Client] = Process.returncode

def Threaded_Manager(Runner, ID, File):
    ServerArgs = ["catchsegv", Runner, "-c", "vm", "-n", "1", "-I", "R" + str(ID), File]
    ClientArgs = ["catchsegv", Runner, "-c", "vm", "-n", "1", "-I", "R" + str(ID), "-C"]

    if which("catchsegv") is None:
        ServerArgs.pop(0)
        ClientArgs.pop(0)

    ServerThread = Thread(target = Threaded_Runner, args = (ServerArgs, ID, 0))
    ClientThread = Thread(target = Threaded_Runner, args = (ClientArgs, ID, 1))

    ServerThread.start()
    ClientThread.start()

    ClientThread.join()
    ServerThread.join()

    # The server is the one we should listen to for results
    if (ThreadResults[ID][1] != 0 and ThreadResults[ID][0] == 0):
        # If the client died for some reason but server thought we were fine then take client data
        Results[ID] = ThreadResults[ID][1]
    else:
        # Else just take the server data
        Results[ID] = ThreadResults[ID][0]

    DupLen = MaxFileNameStringLen - len(UnitTests[ID])

    if (Results[ID] == 0):
        print("\t'%s'%s - PASSED ID: %d - 0" % (UnitTests[ID], " "*DupLen, ID))
    else:
        print("\t'%s'%s - FAILED ID: %d - %s" % (UnitTests[ID], " "*DupLen, ID, hex(Results[ID])))

RunnerSlot = 0
MaxRunnerSlots = min(32, multiprocessing.cpu_count() / 2)
RunnerSlots = [None] * MaxRunnerSlots
for RunnerID in range(UnitTestsSize):
    File = UnitTests[RunnerID]
    print("'%s' Running Test" % File)
    MaxFileNameStringLen = max(MaxFileNameStringLen, len(File))
    Threads[RunnerID] = Thread(target = Threaded_Manager, args = (sys.argv[1], RunnerID, File))
    Threads[RunnerID].start()
    if (MaxRunnerSlots != 0):
        RunnerSlots[RunnerSlot] = Threads[RunnerID]
        RunnerSlot += 1
        if (RunnerSlot == MaxRunnerSlots):
            for i in range(MaxRunnerSlots):
                RunnerSlots[i].join()
            RunnerSlot = 0

for i in range(UnitTestsSize):
    Threads[i].join()

print("====== PASSED RESULTS ======")
for i in range(UnitTestsSize):
    DupLen = MaxFileNameStringLen - len(UnitTests[i])
    if (Results[i] == 0):
        print("\t'%s'%s - PASSED ID: %d - 0" % (UnitTests[i], " "*DupLen, i))

print("====== FAILED RESULTS ======")
for i in range(UnitTestsSize):
    DupLen = MaxFileNameStringLen - len(UnitTests[i])
    if (Results[i] != 0):
        print("\t'%s'%s - FAILED ID: %d - %s" % (UnitTests[i], " "*DupLen, i, hex(Results[i])))


================================================
FILE: Scripts/UpdateInstructionCountJson.py
================================================
#!/usr/bin/python3
import json
import logging
import sys
logger = logging.getLogger()
logger.setLevel(logging.ERROR)

def insert_before(d, key, item):
    items = list(d.items())
    items.insert(list(d.keys()).index(key), item)
    return dict(items)

def update_performance_numbers(performance_json_path, performance_json, new_json_numbers):
    for key, items in new_json_numbers.items():
        if len(key) == 0:
            continue

        if not key in performance_json["Instructions"]:
            logging.error("{} didn't exist in performance json file?".format(key))
            return 1

        if "ExpectedInstructionCount" in items:
            performance_json["Instructions"][key]["ExpectedInstructionCount"] = items["ExpectedInstructionCount"]
        if "ExpectedArm64ASM" in items:
            performance_json["Instructions"][key]["ExpectedArm64ASM"] = items["ExpectedArm64ASM"]
        if "x86Insts" in performance_json["Instructions"][key]:
            d = performance_json["Instructions"][key]
            d.pop('x86InstructionCount', None)
            d = insert_before(d, "ExpectedInstructionCount",
                              ("x86InstructionCount", len(d["x86Insts"])))
            performance_json["Instructions"][key] = d

    # Output to the original file.
    with open(performance_json_path, "w") as json_file:
        json.dump(performance_json, json_file, indent=2)
        json_file.write("\n")

def main():
    if sys.version_info[0] < 3:
        logging.critical ("Python 3 or a more recent version is required.")

    if (len(sys.argv) < 3):
        logging.critical ("usage: %s <PerformanceTests.json> <NewNumbers.json>" % (sys.argv[0]))

    performance_json_path = sys.argv[1]
    new_json_numbers = sys.argv[2]

    try:
        with open(new_json_numbers) as json_file:
            new_json_numbers_text = json_file.read()
    except IOError:
        # If there isn't any new json numbers for this file, then it is safe to skip.
        return 0

    try:
        with open(performance_json_path) as json_file:
            performance_json_text = json_file.read()
    except IOError:
        logging.error("IOError!")
        return 1

    try:
        performance_json_data = json.loads(performance_json_text)
        if not isinstance(performance_json_data, dict):
            raise TypeError('JSON data must be a dict')

        new_json_numbers_data = json.loads(new_json_numbers_text)
        if not isinstance(new_json_numbers_data, dict):
            raise TypeError('JSON data must be a dict')

        return update_performance_numbers(performance_json_path, performance_json_data, new_json_numbers_data)
    except ValueError as ve:
        logging.error(f'JSON error: {ve}')
        return 1

    return 0

if __name__ == "__main__":
    # execute only if run as a script
    sys.exit(main())


================================================
FILE: Scripts/aarch64_fit_native.py
================================================
#!/usr/bin/python3
import re
import sys
try:
    from packaging.version import Version as version_check
except:
    from pkg_resources import parse_version as version_check

# Order this list from oldest to newest
# try not to list something newer than our minimum compiler supported version
BigCoreIDs = {
        # ARM
        tuple([0x41, 0xd07]): "cortex-a57",
        tuple([0x41, 0xd08]): "cortex-a72",
        tuple([0x41, 0xd09]): "cortex-a73",
        tuple([0x41, 0xd0a]): "cortex-a75",
        tuple([0x41, 0xd0b]): "cortex-a76",
        tuple([0x41, 0xd0d]): "cortex-a77",
        tuple([0x41, 0xd41]): "cortex-a78",
        tuple([0x41, 0xd4b]): "cortex-a78c",
        tuple([0x41, 0xd44]): "cortex-x1",
        tuple([0x41, 0xd4c]):
            [ ["cortex-x1", "0.0"],
              ["cortex-x1c", "14.0"],
            ],
        tuple([0x41, 0xd47]):
            [ ["cortex-a78", "0.0"],
              ["cortex-a710", "14.0"],
            ],
        tuple([0x41, 0xd48]):
            [ ["cortex-x1", "0.0"],
              ["cortex-x2", "14.0"],
            ],
        tuple([0x41, 0xd4d]):
            [ ["cortex-a78", "0.0"],
              ["cortex-a715", "17.0"],
            ],
        tuple([0x41, 0xd81]):
            [ ["cortex-a78", "0.0"],
              ["cortex-a720", "18.0"],
            ],
        tuple([0x41, 0xd87]):
            [ ["cortex-a78", "0.0"],
              ["cortex-a725", "19.0"],
            ],
        tuple([0x41, 0xd85]):
            [ ["cortex-a78", "0.0"],
              ["cortex-x925", "19.0"],
            ],
        # Neoverse-N class
        tuple([0x41, 0xd0c]): "neoverse-n1",
        tuple([0x41, 0xd49]): "neoverse-n2",
        tuple([0x41, 0xd8e]):
            [ ["cortex-a78", "0.0"],
              ["neoverse-n3", "19.0"],
            ],

        # Neoverse-V class
        tuple([0x41, 0xd40]): "neoverse-v1",
        tuple([0x41, 0xd4f]):
            [ ["cortex-a78", "0.0"],
              ["neoverse-v2", "16.0"],
            ],
        tuple([0x41, 0xd83]):
            [ ["cortex-a78", "0.0"],
              ["neoverse-v3ae", "19.0"],
            ],
        tuple([0x41, 0xd84]):
            [ ["cortex-a78", "0.0"],
              ["neoverse-v3", "19.0"],
            ],
        ## Nvidia
        tuple([0x4e, 0x004]): "carmel", # Carmel
        # Qualcomm
        tuple([0x51, 0x800]): "cortex-a73", # Kryo 2xx Gold
        tuple([0x51, 0x802]): "cortex-a75", # Kryo 3xx Gold
        tuple([0x51, 0x804]): "cortex-a76", # Kryo 4xx Gold
        # Apple M1 Parallels hypervisor
        tuple([0x41, 0x0]):
            [ ["apple-a13", "0.0"], # If we aren't on 12.0+
              ["apple-a14", "12.0"], # Only exists in 12.0+
            ],
        # QEmu HVF 10.2+
        tuple([0x61, 0]): "apple-a13", # Can't determine variant, choose lowest.
        # Ampere Computing
        tuple([0xc0, 0xac3]): "ampere1",
        tuple([0xc0, 0xac4]): "ampere1a",
        tuple([0xc0, 0xac5]): "ampere1b",
        tuple([0xc0, 0xac7]): "ampere1c",
}

LittleCoreIDs = {
        # ARM
        tuple([0x41, 0xd04]): "cortex-a35",
        tuple([0x41, 0xd03]): "cortex-a53",
        tuple([0x41, 0xd05]): "cortex-a55",
        tuple([0x41, 0xd46]):
            [ ["cortex-a55", "0.0"],
              ["cortex-a510", "14.0"],
            ],
        tuple([0x41, 0xd80]):
            [ ["cortex-a55", "0.0"],
              ["cortex-a520", "18.0"],
            ],
        # Qualcomm
        tuple([0x51, 0x801]): "cortex-a53", # Kryo 2xx Silver
        tuple([0x51, 0x803]): "cortex-a55", # Kryo 3xx Silver
        tuple([0x51, 0x805]): "cortex-a55", # Kryo 4xx/5xx Silver
}

# Args: </proc/cpuinfo file> <clang version>
if (len(sys.argv) < 3):
    sys.exit()

clang_version = sys.argv[2]
cpuinfo = []
with open(sys.argv[1]) as cpuinfo_file:
    current_implementer = 0
    current_part = 0
    for line in cpuinfo_file:
        line = line.strip()
        if "CPU implementer" in line:
            current_implementer = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
        if "CPU part" in line:
            current_part = int(re.findall(r'0x[0-9A-F]+', line, re.I)[0], 16)
            cpuinfo += {tuple([current_implementer, current_part])}

largest_big = "cortex-a57"
largest_little = "cortex-a53"

for core in cpuinfo:
    if BigCoreIDs.get(core):
        IDList = BigCoreIDs.get(core)
        if type(IDList) is list:
            for ID in IDList:
                if version_check(clang_version) >= version_check(ID[1]):
                    largest_big = ID[0]
        else:
            largest_big = BigCoreIDs.get(core)

    if LittleCoreIDs.get(core):
        largest_little = LittleCoreIDs.get(core)

# We only want the big core output
print(largest_big)
# print(largest_little)


================================================
FILE: Scripts/changelog_generator.py
================================================
#!/bin/env python3
import sys

import re

# Handles the following formats:
#
# <commit message> -> goes in Misc category
# <Category>: <commit message> -> Goes in <Category>
# <Category>/<Tag>: <commit message> -> Goes in <Category>/<Tag>

Meta = { }
for line in sys.stdin.readlines():
    if detailed := re.findall("^([A-Za-z0-9]+)/([A-Za-z0-9]+):(.+)$", line):
        detailed = detailed[0]
        meta = detailed[0].strip() + "/" + detailed[1].strip()
        if meta not in Meta:
            Meta[meta] = []
        Meta[meta].append(detailed[2].strip())
    elif category := re.findall("(^[A-Za-z0-9]+):(.+)$", line):
        category = category[0]
        if category[0].strip() not in Meta:
            Meta[category[0].strip()] = []
        Meta[category[0].strip()].append(category[1].strip())
    else:
        if "_Misc" not in Meta:
            Meta["_Misc"] = []
        Meta["_Misc"].append(line.strip())

print("FEX Release {0}".format(sys.argv[1]))

Category = ""
Tag = ""
for item in sorted(Meta.items()):
    if item[0] == "_Misc":
        tag = "Misc"
    else:
        tag = item[0]

    category = tag.split("/")[0]
    if category != Category:
        Category = category
        Tag = ""
        print("")
        print("- " + category)
    if Tag != tag and tag != category:
        Tag = tag
        print("")
        print("  - " + tag.split("/")[1])

    for change in item[1]:
        if Tag == "":
            print("  - " + change)
        else:
            print("    - " + change)


================================================
FILE: Scripts/doc_outline_generator.py
================================================
#!/usr/bin/python3

# Tag Format
#  $info$
#  glossary: <name> ~ <definition> (Optional, registers or replaces a glossary entry)
#  glossary: IR ~ Intermidiate Representation, a of storage for our high-level opcode representation
#  glossary: SSA ~ Single Static Assignment, a form of representing IR in memory
#  glossary: Basic Block ~ A block of instructions with no control flow, terminated by control flow
#  glossary: Fragment ~ A Collection of basic blocks, possible an entire guest function or a fraction of it
#  category: <name> ~ <description> (Optional, registers or replaces a category description)
#  category: backend ~ Concerns itself with generating binary code from (optimized) IR
#  meta: <name> ~ <description> (Optional, registers or replaces a meta, aka tag, description)
#  meta: backend|arm64 ~ Arm64 Splatter backend
#  tags: <meta name> [, <meta name>, ...] (Required if info tag exists)
#  tags: backend|arm64
#  desc: <short file description> (Optional)
#  desc: Main glue logic of the arm64 splatter backend
#  $end_info$

import re
from pathlib import Path
import sys

if (len(sys.argv) != 4):
    print("doc_outline_generator GIT_DIR SRC_DIR LINK_PREFIX")
    sys.exit(-2)

Base = Path(sys.argv[1])
Root = Path(sys.argv[2])
Prefix = sys.argv[3]

Paths = []

for path in Root.rglob('*.c'):
    Paths.append(path)

for path in Root.rglob('*.cpp'):
    Paths.append(path)

for path in Root.rglob('*.cc'):
    Paths.append(path)

for path in Root.rglob('*.h'):
    Paths.append(path)

for path in Root.rglob('*.hpp'):
    Paths.append(path)

CategoryLabels = { }
MetaLabels = { }
GlossaryLabels = { }

Meta = { }
Desc = { }

for path in Paths:
    with path.open() as file:
        txt = file.read()
        x = re.findall("\$info\$([^\$]*)\$end_info\$", txt)
        if x:
            for entry in x[0].strip().split("\n"):
                name = entry.split(":", 1)[0].strip();
                val = entry.split(":", 1)[1].strip();
                if name == "category":
                    cat_name = val.split("~", 1)[0].strip();
                    cat_val = val.split("~", 1)[1].strip();
                    CategoryLabels[cat_name] = cat_val
                elif name == "meta":
                    meta_name = val.split("~", 1)[0].strip();
                    meta_val = val.split("~", 1)[1].strip();
                    MetaLabels[meta_name] = meta_val
                elif name == "glossary":
                    glossary_name = val.split("~", 1)[0].strip();
                    glossary_val = val.split("~", 1)[1].strip();
                    GlossaryLabels[glossary_name] = glossary_val
                elif name == "tags":
                    for meta_name in val.split(","):
                        if meta_name.strip() not in Meta:
                            Meta[meta_name.strip()] = []
                        Meta[meta_name.strip()].append(path)
                elif name == "desc":
                    Desc[path] = val;
                else:
                    print("Error")
                    sys.exit(-1)


Readme = None
if (Root / "README.md").is_file():
    Readme = Root / "README.md"

if (Root / "Readme.md").is_file():
    Readme = Root / "Readme.md"

print("## " + Root.relative_to(Base).as_posix())

if Readme:
    print("See [" + Root.name + "/" + Readme.name + "](" + Prefix + Readme.relative_to(Base).as_posix() + ") for more details")

print("")

if (GlossaryLabels):
    print ("### Glossary")
    print("")
    for item in GlossaryLabels.items():
        print("- " + item[0] + ": " + item[1])
    print("")
    print("")

Category = ""
for item in sorted(Meta.items()):
    meta = item[0]
    category = meta.split("|")[0]
    topic = meta.split("|")[1]
    if Category != category:
        if Category != "":
            print("")
            print("")
        Category = category
        print("### " + Category)
        if Category in CategoryLabels:
            print(CategoryLabels[Category])
        print("")

    print("#### " + topic)
    if meta in MetaLabels:
            print(MetaLabels[meta])

    for path in sorted(item[1]):
        if path in Desc:
            print("- [" + path.name + "](" + Prefix + path.relative_to(Base).as_posix() + ")" + ": " + Desc[path])
        else:
            print("- [" + path.name + "](" + Prefix + path.relative_to(Base).as_posix() + ")")
    print("")


================================================
FILE: Scripts/generate_changelog.sh
================================================
#!/bin/sh

if [ "$#" -ne 2 ]; then
	echo "$0: <PEV-TAG> <NEXT-TAG>"
	exit 255
fi

git log "$1..HEAD"  --pretty="%b (%h)" --abbrev-commit --merges | Scripts/changelog_generator.py "$2"


================================================
FILE: Scripts/generate_doc_outline.sh
================================================
#!/bin/sh

echo "# $(git describe --always)"
echo

./Scripts/doc_outline_generator.py  "$(pwd)" "$(pwd)/FEXCore" "../"
./Scripts/doc_outline_generator.py  "$(pwd)" "$(pwd)/ThunkLibs" "../"
./Scripts/doc_outline_generator.py  "$(pwd)" "$(pwd)/Source/Tests" "../"
./Scripts/doc_outline_generator.py  "$(pwd)" "$(pwd)/unittests" "../"

# These don't have useful documentation at this point
#./Scripts/doc_outline_generator.py  "`pwd`" "`pwd`/Scripts" "../"
#./Scripts/doc_outline_generator.py  "`pwd`" "`pwd`/Source/Common" "../"


================================================
FILE: Scripts/generate_release.sh
================================================
#!/bin/sh

# Allow release maintainer to override PREVIOUS and CURRENT by setting it before launching the script
PREVIOUS=${PREVIOUS:-FEX-$(date --date='-1 month' +%y%m)}
CURRENT=${CURRENT:-FEX-$(date +%y%m)}

if ! git rev-list "$PREVIOUS" > /dev/null 2>&1 ; then
  echo "$PREVIOUS tag doesn't exist"
  exit
fi

if git rev-list "$CURRENT" > /dev/null 2>&1 ; then
  echo "$CURRENT tag already exists!"
  exit
fi

echo "Tagging $CURRENT, previous release $PREVIOUS"
echo "Press Ctrl-C to cancel within 10 seconds"
sleep 10

git tag "$CURRENT" -a -m "temporary"
Scripts/generate_doc_outline.sh > docs/SourceOutline.md
git commit docs/SourceOutline.md -m "Docs: Update for release $CURRENT"
git tag -d "$CURRENT"
git tag -a "$CURRENT" -m "$(Scripts/generate_changelog.sh "$PREVIOUS" "$CURRENT")" --edit

echo "Inspect if everything went smoothly via 'git log -6 $CURRENT' "
echo "if all is good, do 'git push upstream $CURRENT'"


================================================
FILE: Scripts/guest_test_runner.py
================================================
#!/usr/bin/python3
import json
import os
import sys
import subprocess

def DoesFEXSupportAVX(mode):
    # Check if FEX indicates support for AVX
    fex_interpreter_path = os.path.dirname(sys.argv[7]) + "/FEX"

    args = list()
    args.append(fex_interpreter_path)
    args.append('/bin/cat')
    args.append('/proc/cpuinfo')

    process = subprocess.run(args, capture_output=True, text=True)
    output = process.stdout

    for line in output:
        if 'flags' in line:
            flags = line.split(':')[1].strip().split(' ')
            return 'avx' in flags and 'avx2' in flags
    return False

def TestRequiresAVXSupport():
    # Check if the test itself requires AVX
    exe_path = sys.argv[len(sys.argv) - 1]
    json_path = os.path.dirname(os.path.dirname(exe_path)) + '/requirements/' + os.path.basename(exe_path) + '.json'

    try:
        with open(json_path) as json_file:
            try:
                json_data = json.load(json_file)
                if not isinstance(json_data, dict):
                    raise TypeError('JSON data must be a dict')

                if "AVX" in json_data["HostFeatures"]:
                    return True
            except ValueError as ve:
                print(f'JSON error: {ve}')
                pass
    except IOError:
        # If we get here, then we don't have a corresponding JSON
        # file for the associated test, and can assume there's no
        # feature requirements for the test.
        pass
    return False

def LoadTestsFile(File):
    Dict = {}
    if not os.path.exists(File):
        return Dict

    with open(File) as dtf:
        for line in dtf:
            test = line.split("#")[0].strip() # remove comments and empty spaces
            if len(test) > 0:
                Dict[test] = 1

    return Dict

def LoadTestsFileResults(File):
    Dict = {}
    if not os.path.exists(File):
        return Dict

    with open(File) as dtf:
        for line in dtf:
            test = line.split("#")[0].strip() # remove comments and empty spaces
            if len(test) > 0:
                parts = line.split(" ")
                Dict[parts[0]] = int(parts[1])

    return Dict


# Args: <Known Failures file> <ExpectedOutputsFile> <DisabledTestsFile> <FlakeTestsFile> <TestName> <Mode> <FexExecutable> <FexArgs>...

# fexargs should also include the test executable

if (len(sys.argv) < 7):
    sys.exit()

known_failures_file = sys.argv[1]
expected_output_file = sys.argv[2]
disabled_tests_file = sys.argv[3]
flake_tests_file = sys.argv[4]
test_name = sys.argv[5]
mode = sys.argv[6]
fexecutable = sys.argv[7]
StartingFEXArgsOffset = 8

# If the test requires AVX and FEX doesn't support it, just pass the test and move on
if TestRequiresAVXSupport() and not DoesFEXSupportAVX(mode):
    sys.exit(0)

# Open test expected information files and load in to dictionaries.
known_failures = LoadTestsFile(known_failures_file)
expected_output = LoadTestsFileResults(expected_output_file)
disabled_tests = LoadTestsFile(disabled_tests_file)
flake_tests = LoadTestsFile(flake_tests_file)

# run with timeout to avoid locking up
RunnerArgs = []

RunnerArgs.append(fexecutable)

# Add the rest of the arguments
for i in range(len(sys.argv) - StartingFEXArgsOffset):
    RunnerArgs.append(sys.argv[StartingFEXArgsOffset + i])

# print(RunnerArgs)

ResultCode = 0

# Handle flakes
TryCount = 1
if (flake_tests.get(test_name)):
    TryCount = 5

if (disabled_tests.get(test_name)):
    # This error code tells ctest that the test was skipped
    sys.exit(125)

# expect zero by default
if (not test_name in expected_output):
    expected_output[test_name] = 0

if ResultCode == 0:
    for Try in range(TryCount):
        # Run the test and wait for it to end to get the result
        print(RunnerArgs)
        Process = subprocess.Popen(RunnerArgs)
        Process.wait()
        ResultCode = Process.returncode

        # Break if the expected output is the result code
        if (expected_output[test_name] == ResultCode):
            break

if (expected_output[test_name] != ResultCode):
    if (test_name in expected_output):
        print("test failed, expected is", expected_output[test_name], "but got", ResultCode)
    else:
        print("Test doesn't have expected output,", test_name)

    if (known_failures.get(test_name)):
        print("Passing because it was expected to fail")
        # failed and expected to fail -- pass the test
        sys.exit(0)
    else:
        # failed and unexpected to fail -- fail the test
        sys.exit(1)
else:
    print("test passed with", ResultCode)
    if (known_failures.get(test_name)):
        print("Failing because it was expected to fail")
        # passed and expected to fail -- fail the test
        sys.exit(1)
    else:
        # passed and expected to pass -- pass the test
        sys.exit(0)


================================================
FILE: Scripts/json_asm_config_parse.py
================================================
import sys
from json_config_parse import parse_json

if (len(sys.argv) < 3):
    sys.exit()

output_file = sys.argv[2]
asm_file = open(sys.argv[1], "r")
asm_text = asm_file.read()
asm_file.close()

json_text = asm_text.split("%ifdef CONFIG")
if (len(json_text) > 1):
        json_text = json_text[1].split("%endif")
        if (len(json_text) > 1):
            json_text = json_text[0].strip()

            parse_json(json_text, output_file)


================================================
FILE: Scripts/json_config_parse.py
================================================
from enum import Flag
import json
import struct
import sys

class Regs(Flag):
    REG_NONE  = 0
    REG_RIP   = (1 << 0)
    REG_RAX   = (1 << 1)
    REG_RBX   = (1 << 2)
    REG_RCX   = (1 << 3)
    REG_RDX   = (1 << 4)
    REG_RSI   = (1 << 5)
    REG_RDI   = (1 << 6)
    REG_RBP   = (1 << 7)
    REG_RSP   = (1 << 8)
    REG_R8    = (1 << 9)
    REG_R9    = (1 << 10)
    REG_R10   = (1 << 11)
    REG_R11   = (1 << 12)
    REG_R12   = (1 << 13)
    REG_R13   = (1 << 14)
    REG_R14   = (1 << 15)
    REG_R15   = (1 << 16)
    REG_XMM0  = (1 << 17)
    REG_XMM1  = (1 << 18)
    REG_XMM2  = (1 << 19)
    REG_XMM3  = (1 << 20)
    REG_XMM4  = (1 << 21)
    REG_XMM5  = (1 << 22)
    REG_XMM6  = (1 << 23)
    REG_XMM7  = (1 << 24)
    REG_XMM8  = (1 << 25)
    REG_XMM9  = (1 << 26)
    REG_XMM10 = (1 << 27)
    REG_XMM11 = (1 << 28)
    REG_XMM12 = (1 << 29)
    REG_XMM13 = (1 << 30)
    REG_XMM14 = (1 << 31)
    REG_XMM15 = (1 << 32)
    REG_GS    = (1 << 33)
    REG_FS    = (1 << 34)
    REG_MM0   = (1 << 35)
    REG_MM1   = (1 << 36)
    REG_MM2   = (1 << 37)
    REG_MM3   = (1 << 38)
    REG_MM4   = (1 << 39)
    REG_MM5   = (1 << 40)
    REG_MM6   = (1 << 41)
    REG_MM7   = (1 << 42)
    REG_MM8   = (1 << 43)
    REG_ALL   = (1 << 44) - 1
    REG_INVALID = (1 << 44)

class ABI(Flag) :
    ABI_SYSTEMV = 0
    ABI_WIN64   = 1
    ABI_NONE    = 2

class Mode(Flag) :
    MODE_32   = 0
    MODE_64   = 1

class HostFeatures(Flag) :
    FEATURE_ANY      = 0
    FEATURE_3DNOW    = (1 << 0)
    FEATURE_SSE4A    = (1 << 1)
    FEATURE_AVX      = (1 << 2)
    FEATURE_RAND     = (1 << 3)
    FEATURE_SHA      = (1 << 4)
    FEATURE_CLZERO   = (1 << 5)
    FEATURE_BMI1     = (1 << 6)
    FEATURE_BMI2     = (1 << 7)
    FEATURE_CLWB     = (1 << 8)
    FEATURE_LINUX    = (1 << 9)
    FEATURE_AES256   = (1 << 10)
    FEATURE_AFP      = (1 << 11)
    FEATURE_SSSE3    = (1 << 12)
    FEATURE_SSE4_1   = (1 << 13)
    FEATURE_SSE4_2   = (1 << 14)
    FEATURE_AES      = (1 << 15)
    FEATURE_PCLMUL   = (1 << 16)
    FEATURE_MOVBE    = (1 << 17)
    FEATURE_ADX      = (1 << 18)
    FEATURE_XSAVE    = (1 << 19)
    FEATURE_RDPID    = (1 << 20)
    FEATURE_CLFLOPT  = (1 << 21)
    FEATURE_FSGSBASE = (1 << 22)
    FEATURE_EMMI     = (1 << 23)

RegStringLookup = {
    "NONE":  Regs.REG_NONE,
    "RAX":   Regs.REG_RAX,
    "RIP":   Regs.REG_RIP,
    "RBX":   Regs.REG_RBX,
    "RCX":   Regs.REG_RCX,
    "RDX":   Regs.REG_RDX,
    "RSI":   Regs.REG_RSI,
    "RDI":   Regs.REG_RDI,
    "RBP":   Regs.REG_RBP,
    "RSP":   Regs.REG_RSP,
    "R8":    Regs.REG_R8,
    "R9":    Regs.REG_R9,
    "R10":   Regs.REG_R10,
    "R11":   Regs.REG_R11,
    "R12":   Regs.REG_R12,
    "R13":   Regs.REG_R13,
    "R14":   Regs.REG_R14,
    "R15":   Regs.REG_R15,
    "XMM0":  Regs.REG_XMM0,
    "XMM1":  Regs.REG_XMM1,
    "XMM2":  Regs.REG_XMM2,
    "XMM3":  Regs.REG_XMM3,
    "XMM4":  Regs.REG_XMM4,
    "XMM5":  Regs.REG_XMM5,
    "XMM6":  Regs.REG_XMM6,
    "XMM7":  Regs.REG_XMM7,
    "XMM8":  Regs.REG_XMM8,
    "XMM9":  Regs.REG_XMM9,
    "XMM10": Regs.REG_XMM10,
    "XMM11": Regs.REG_XMM11,
    "XMM12": Regs.REG_XMM12,
    "XMM13": Regs.REG_XMM13,
    "XMM14": Regs.REG_XMM14,
    "XMM15": Regs.REG_XMM15,
    "GS":    Regs.REG_GS,
    "FS":    Regs.REG_FS,
    "ALL":   Regs.REG_ALL,
    "MM0":   Regs.REG_MM0,
    "MM1":   Regs.REG_MM1,
    "MM2":   Regs.REG_MM2,
    "MM3":   Regs.REG_MM3,
    "MM4":   Regs.REG_MM4,
    "MM5":   Regs.REG_MM5,
    "MM6":   Regs.REG_MM6,
    "MM7":   Regs.REG_MM7,
    "MM8":   Regs.REG_MM8,
}

ABIStringLookup = {
    "SYSTEMV": ABI.ABI_SYSTEMV,
    "WIN64": ABI.ABI_WIN64,
    "NONE": ABI.ABI_NONE,
}

ModeStringLookup = {
    "32BIT": Mode.MODE_32,
    "64BIT": Mode.MODE_64,
}

HostFeaturesLookup = {
    "3DNOW"    : HostFeatures.FEATURE_3DNOW,
    "SSE4A"    : HostFeatures.FEATURE_SSE4A,
    "AVX"      : HostFeatures.FEATURE_AVX,
    "RAND"     : HostFeatures.FEATURE_RAND,
    "SHA"      : HostFeatures.FEATURE_SHA,
    "CLZERO"   : HostFeatures.FEATURE_CLZERO,
    "BMI1"     : HostFeatures.FEATURE_BMI1,
    "BMI2"     : HostFeatures.FEATURE_BMI2,
    "CLWB"     : HostFeatures.FEATURE_CLWB,
    "LINUX"    : HostFeatures.FEATURE_LINUX,
    "AES256"   : HostFeatures.FEATURE_AES256,
    "AFP"      : HostFeatures.FEATURE_AFP,
    "SSSE3"    : HostFeatures.FEATURE_SSSE3,
    "SSE4.1"   : HostFeatures.FEATURE_SSE4_1,
    "SSE4.2"   : HostFeatures.FEATURE_SSE4_2,
    "AES"      : HostFeatures.FEATURE_AES,
    "PCLMUL"   : HostFeatures.FEATURE_PCLMUL,
    "MOVBE"    : HostFeatures.FEATURE_MOVBE,
    "ADX"      : HostFeatures.FEATURE_ADX,
    "XSAVE"    : HostFeatures.FEATURE_XSAVE,
    "RDPID"    : HostFeatures.FEATURE_RDPID,
    "CLFLOPT"  : HostFeatures.FEATURE_CLFLOPT,
    "FSGSBASE" : HostFeatures.FEATURE_FSGSBASE,
    "EMMI"     : HostFeatures.FEATURE_EMMI,
}

def parse_hexstring(s):
    length = 0
    byte_data = []
    for num in s.split(' '):
        if s.startswith("0x"):
            num = num[2:]
        while len(num) > 0:
            byte_num = num[-2:]
            byte_data.append(int(byte_num, 16))
            length += 1
            num = num[0:-2]
    return length, byte_data


def parse_json(json_text, output_file):
    # Default options
    OptionMatch = Regs.REG_INVALID
    OptionIgnore = Regs.REG_NONE
    OptionABI = ABI.ABI_SYSTEMV
    OptionMode = Mode.MODE_64
    OptionHostFeatures = HostFeatures.FEATURE_ANY
    OptionStackSize = 4096
    OptionEntryPoint = 1
    OptionRegData = {}
    OptionMemoryRegions = {}
    OptionMemoryData = {}
    OptionEnvironmentVariables = {}

    json_object = json.loads(json_text)
    json_object = {k.upper(): v for k, v in json_object.items()}

    # Begin parsing the JSON
    if ("MATCH" in json_object):
        data = json_object["MATCH"]
        if (type(data) is str):
            data = [data]

        for data_val in data:
            data_val = data_val.upper()
            if not (data_val in RegStringLookup):
                sys.exit("Invalid Match register option")
            if (OptionMatch == Regs.REG_INVALID):
                OptionMatch = Regs.REG_NONE
            RegOption = RegStringLookup[data_val]
            OptionMatch = OptionMatch | RegOption

    if ("IGNORE" in json_object):
        data = json_object["IGNORE"]
        if (type(data) is str):
            data = [data]

        for data_val in data:
            data_val = data_val.upper()
            if not (data_val in RegStringLookup):
                sys.exit("Invalid Ignore register option")
            if (OptionMatch == Regs.REG_INVALID):
                OptionMatch = Regs.REG_NONE
            RegOption = RegStringLookup[data_val]
            OptionIgnore = OptionIgnore | RegOption

    if ("ABI" in json_object):
        data = json_object["ABI"]
        data = data.upper()
        if not (data in ABIStringLookup):
            sys.exit("Invalid ABI")
        OptionABI = ABIStringLookup[data]

    if ("MODE" in json_object):
        data = json_object["MODE"]
        data = data.upper()
        if not (data in ModeStringLookup):
            sys.exit("Invalid Mode")
        OptionMode = ModeStringLookup[data]

    if ("HOSTFEATURES" in json_object):
        data = json_object["HOSTFEATURES"]
        if not (type(data) is list):
            sys.exit("HostFeatures value must be list of features")

        for data_key in data:
            data_key = data_key.upper()
            if not (data_key in HostFeaturesLookup):
                sys.exit("Invalid host feature '{}'".format(data_key))

            OptionHostFeatures |= HostFeaturesLookup[data_key]

    if ("STACKSIZE" in json_object):
        data = json_object["STACKSIZE"]
        OptionStackSize = int(data, 0)

    if ("ENTRYPOINT" in json_object):
        data = json_object["ENTRYPOINT"]
        data = int(data, 0)
        if (data == 0):
            sys.exit("Invalid entrypoint of 0")
        OptionEntryPoint = data

    if ("MEMORYREGIONS" in json_object):
        data = json_object["MEMORYREGIONS"]
        if not (type(data) is dict):
            sys.exit("MemoryRegions value must be list of key:value pairs")
        for data_key, data_val in data.items():
            OptionMemoryRegions[int(data_key, 0)] = int(data_val, 0)

    if ("REGDATA" in json_object):
        data = json_object["REGDATA"]
        if not (type(data) is dict):
            sys.exit("RegData value must be list of key:value pairs")
        for data_key, data_val in data.items():
            data_key = data_key.upper()
            if not (data_key in RegStringLookup):
                sys.exit("Invalid RegData register option")

            data_key_index = RegStringLookup[data_key]
            data_key_values = []

            # Create a list of values for this register as an integer
            if (type(data_val) is list):
                for data_key_value in data_val:
                    data_key_values.append(int(data_key_value, 0))
            else:
                data_key_values.append(int(data_val, 0))
            OptionRegData[data_key_index] = data_key_values

    if ("MEMORYDATA" in json_object):
        data = json_object["MEMORYDATA"]
        if not (type(data) is dict):
            sys.exit("MemoryData value must be list of key:value pairs")
        for data_key, data_val in data.items():
            length, byte_data = parse_hexstring(data_val)
            OptionMemoryData[int(data_key, 0)] = (length, byte_data)

    if ("ENV" in json_object):
        data = json_object["ENV"]
        if not (type(data) is dict):
            sys.exit("Environment variables value must be list of key:value pairs")

        for data_key, data_val in data.items():
            OptionEnvironmentVariables[data_key] = data_val

    # If Match option wasn't touched then set it to the default
    if (OptionMatch == Regs.REG_INVALID):
        OptionMatch = Regs.REG_NONE


    memRegions = bytes()
    regData = bytes()
    memData = bytes()
    envData = bytes()

    # Write memory regions
    for key, val in OptionMemoryRegions.items():
        memRegions += struct.pack('Q', key)
        memRegions += struct.pack('Q', val)

    # Write Register values
    for reg_key, reg_val in OptionRegData.items():
        regData += struct.pack('I', len(reg_val))
        regData += struct.pack('Q', reg_key.value)
        for val in reg_val:
            regData += struct.pack('Q', val)

    # Write Memory data
    for reg_key, reg_val in OptionMemoryData.items():
        length, data = reg_val
        memData += struct.pack('Q', reg_key) # address
        memData += struct.pack('I', length)
        for byte in data:
            memData += struct.pack('B', byte)

    # Write environment variables
    for key, val in OptionEnvironmentVariables.items():
        envData += key.encode()
        envData += struct.pack('B', 0)
        envData += val.encode()
        envData += struct.pack('B', 0)

    config_file = open(output_file, "wb")
    config_file.write(struct.pack('Q', OptionMatch.value))
    config_file.write(struct.pack('Q', OptionIgnore.value))
    config_file.write(struct.pack('Q', OptionStackSize))
    config_file.write(struct.pack('Q', OptionEntryPoint))
    config_file.write(struct.pack('I', OptionABI.value))
    config_file.write(struct.pack('I', OptionMode.value))
    config_file.write(struct.pack('I', OptionHostFeatures.value))

    # Total length of header, including offsets/counts below
    headerLength = (8 * 4) + (4 * 3) + (4 * 8)
    offset = headerLength

    #  memory regions offset/count
    config_file.write(struct.pack('I', offset))
    config_file.write(struct.pack('I', len(OptionMemoryRegions)))
    offset += len(memRegions)

    # register values offset/count
    config_file.write(struct.pack('I', offset))
    config_file.write(struct.pack('I', len(OptionRegData)))
    offset += len(regData)

    # memory data offset/count
    config_file.write(struct.pack('I', offset))
    config_file.write(struct.pack('I', len(OptionMemoryData)))
    offset += len(memData)

    # environment data offset/count
    config_file.write(struct.pack('I', offset))
    config_file.write(struct.pack('I', len(OptionEnvironmentVariables)))
    offset += len(envData)

    # write out the actual data for memory regions, reg data and memory data
    config_file.write(memRegions)
    config_file.write(regData)
    config_file.write(memData)
    config_file.write(envData)

    config_file.close()


================================================
FILE: Scripts/json_ir_config_parse.py
================================================
import sys
from json_config_parse import parse_json

if (len(sys.argv) < 3):
    sys.exit()

output_file = sys.argv[2]
asm_file = open(sys.argv[1], "r")
asm_text = asm_file.read()
asm_file.close()

json_text = asm_text.split(";%ifdef CONFIG")
if (len(json_text) > 1):
        json_text = json_text[1].split(";%endif")
        if (len(json_text) > 1):
            json_text = json_text[0].strip()
            # We need to walk each line of text and remove the comment line
            json_text = json_text.splitlines(False)
            parsed_lines = ""
            for line in json_text:
                line = line.strip()
                if (line[0] != ';'):
                    sys.exit("Config line needs to start with a comment character ;")
                line = line.lstrip(";")

                parsed_lines = parsed_lines + line + '\n'

            parsed_lines = parsed_lines.strip()
            parse_json(parsed_lines, output_file)


================================================
FILE: Scripts/reformat.sh
================================================
#!/bin/sh -e

# Save current directory
DIR=$(pwd)

# Parse arguments
CHANGED_ONLY=false
TARGET_DIR=""

while [ $# -gt 0 ]; do
    case $1 in
        --changed) # Only reformat changed files (staged and unstaged)
            CHANGED_ONLY=true
            shift
            ;;
        *)
            TARGET_DIR=$1
            shift
            ;;
    esac
done

# Change to the directory passed as argument if any
if [ -n "$TARGET_DIR" ]; then
    cd "$TARGET_DIR"
fi

# Reformat files
if [ "$CHANGED_ONLY" = true ]; then
    # Check for unstaged deletions
    if git ls-files -d | head -1 | grep -q .; then
        echo "Error: Unstaged deletions detected. Please stage or discard deletions before formatting."
        exit 1
    fi

    CHANGED_FILES=$(git ls-files -m '*.cpp' '*.h' '*.inl')
    if [ -n "$CHANGED_FILES" ]; then
        echo "$CHANGED_FILES" | xargs -d '\n' -n 1 -P "$(nproc)" clang-format-19 -i
    else
        echo "No changed files to format."
    fi
else
    # Reformat whole tree (original behavior)
    git ls-files -z '*.cpp' '*.h' '*.inl' | xargs -0 -n 1 -P "$(nproc)" clang-format-19 -i
fi

cd "$DIR"


================================================
FILE: Scripts/testharness_runner.py
================================================
#!/usr/bin/python3
import sys
import subprocess
from os import path
from shutil import which

# Args: <Known Failures file> <Known Failures Type File> <DisabledTestsFile> <DisabledTestsTypeFile> <DisabledTestsRunnerFile> <TestName> <FullTestName> <Test Harness Executable> <Args>...

if (len(sys.argv) < 8):
    sys.exit()

known_failures = {}
disabled_tests = {}
known_failures_file = sys.argv[1]
known_failures_type_file = sys.argv[2]
disabled_tests_file = sys.argv[3]
disabled_tests_type_file = sys.argv[4]
disabled_tests_runner_file = sys.argv[5]

current_test = sys.argv[6]
full_test_name = sys.argv[7]
runner = sys.argv[8]
args_start_index = 9

# Open the known failures file and add it to a dictionary
with open(known_failures_file) as kff:
    for line in kff:
        known_failures[line.strip()] = 1

if path.exists(known_failures_type_file):
    with open(known_failures_type_file) as dtf:
        for line in dtf:
            known_failures[line.strip()] = 1

with open(disabled_tests_file) as dtf:
    for line in dtf:
        disabled_tests[line.strip()] = 1

if path.exists(disabled_tests_type_file):
    with open(disabled_tests_type_file) as dtf:
        for line in dtf:
            disabled_tests[line.strip()] = 1

if path.exists(disabled_tests_runner_file):
    with open(disabled_tests_runner_file) as dtf:
        for line in dtf:
            disabled_tests[line.strip()] = 1

RunnerArgs = ["catchsegv", runner]

if which("catchsegv") is None:
    RunnerArgs.pop(0)
# Add the rest of the arguments
for i in range(len(sys.argv) - args_start_index):
    RunnerArgs.append(sys.argv[args_start_index + i])

if (disabled_tests.get(current_test)):
    # This error code tells ctest that the test was skipped
    sys.exit(125)

# Run the test and wait for it to end to get the result
Process = subprocess.Popen(RunnerArgs)
Process.wait()
ResultCode = Process.returncode

# Check for known failures - try full test name first, then partial test name
is_known_failure = known_failures.get(full_test_name) or known_failures.get(current_test)

if (is_known_failure):
    # If the test is on the known failures list
    if (ResultCode):
        # If we errored but are on the known failures list then "pass" the test
        sys.exit(0)
    else:
        # If we didn't error but are in the known failure list then we need to fail the test
        sys.exit(1)
else:
    # Just return the result code if we don't have this test as a known failure
    sys.exit(ResultCode)


================================================
FILE: Scripts/update_instcountci.sh
================================================
#!/bin/sh -e

# Make sure we actually build
ninja

# Run tests, ignoring the retval since there will be changes.
ninja instcountci_tests || true

# Now we can update.
ninja instcountci_update_tests

# Commit the result in bulk.
git add -u :/unittests/InstructionCountCI/*.json
git commit -sm "InstCountCI: Update"


================================================
FILE: Source/CMakeLists.txt
================================================
# Disable strict aliasing for all build modes
# See discussion in https://github.com/FEX-Emu/FEX/pull/4494#issuecomment-2800608944
# for background context.
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-fno-strict-aliasing>)

add_subdirectory(Common/)
add_subdirectory(Tools/)

if (MINGW)
  add_subdirectory(Windows/)
endif()


================================================
FILE: Source/Common/ArgumentLoader.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/ArgumentLoader.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <stdint.h>

namespace FEX::ArgLoader {
void FEX::ArgLoader::ArgLoader::PreLoad() {
  RemainingArgs.clear();
  ProgramArguments.clear();

  // Skip argument 0, which will be the interpreter
  for (int i = 1; i < argc; ++i) {
    RemainingArgs.emplace_back(argv[i]);
  }

  // Put the interpreter in ProgramArguments
  ProgramArguments.emplace_back(argv[0]);
}

} // namespace FEX::ArgLoader


================================================
FILE: Source/Common/ArgumentLoader.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

namespace FEX::ArgLoader {
class ArgLoader final : public FEXCore::Config::Layer {
public:
  explicit ArgLoader(int argc, char** argv)
    : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_ARGUMENTS)
    , argc {argc}
    , argv {argv} {
    PreLoad();
  }

  void Load() override {
    // Intentional no-op.
  }
  void PreLoad();
  fextl::vector<fextl::string> Get() {
    return RemainingArgs;
  }
  fextl::vector<fextl::string> GetParsedArgs() {
    return ProgramArguments;
  }

private:
  int argc {};
  char** argv {};

  fextl::vector<fextl::string> RemainingArgs {};
  fextl::vector<fextl::string> ProgramArguments {};
};

} // namespace FEX::ArgLoader


================================================
FILE: Source/Common/Async.h
================================================
// SPDX-License-Identifier: MIT
/**
 * Helper framework to enable asynchronous IO operations on file descriptor objects (networking, files).
 *
 * Strongly inspired by Boost.Asio.
 */
#pragma once

#include <algorithm>
#include <cassert>
#include <chrono>
#include <optional>
#include <poll.h>
#include <span>
#include <utility>
#include <vector>
#include <unistd.h>

#include <FEXCore/fextl/functional.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/Utils/LogManager.h>

namespace fasio {

enum class error {
  success,
  timeout,      // User-specified timeout expired
  eof,          // Permanently reached end of data stream (e.g. because socket connection was closed by peer)
  invalid,      // Invalid input parameters
  generic_errno // Read errno for details
};

/**
 * This selects which action to trigger when returning from a reactor callback.
 * The default (drop) will drop the callback so that the caller can register
 * a new one.
 */
enum class post_callback {
  drop,         // Drop the callback
  repeat,       // Continue using the same callback
  stop_reactor, // Triggers exit from run()
};

/**
 * Core event loop for asynchronous code. Corresponds to asio::io_context,
 * specialized for multiplexing file descriptors via ppoll().
 *
 * A reactor tracks a set of file descriptors and calls user-provided callbacks
 * when they become ready. For example, the callback for a network socket will
 * be called when data is ready to be reveived on the socket.
 *
 */
struct poll_reactor {
private:
  std::vector<pollfd> PollFDs;
  std::optional<int> CurrentFD; // FD that is currently being processed

  bool is_stopped = false;
  int AsyncStopRequest[2] = {-1, -1};

  // Maps FD to callback
  fextl::map<int, fextl::move_only_function<post_callback(error)>> callbacks;

  struct Event {
    pollfd FD;
    bool Erase = false;
    bool Insert = false;
  };
  std::vector<Event> QueuedEvents;

public:
  ~poll_reactor() {
    if (AsyncStopRequest[0]) {
      ::close(AsyncStopRequest[0]);
      ::close(AsyncStopRequest[1]);
    }
  }

  // Adds an internal FD to wake up and exit the reactor when stop_async() is called from any thread.
  void enable_async_stop() {
    ::pipe(AsyncStopRequest);
    PollFDs.push_back(pollfd {.fd = AsyncStopRequest[0], .events = POLLHUP, .revents = 0});
    callbacks[AsyncStopRequest[0]] = [](error) {
      return post_callback::stop_reactor;
    };
  }

  void stop_async() {
    if (AsyncStopRequest[1] == -1) {
      ERROR_AND_DIE_FMT("Tried to use stop_async without calling enable_async_stop during setup");
    }
    // Wake up run() thread by closing this pipe endpoint
    ::close(AsyncStopRequest[1]);
  }

  void cleanup() {
    callbacks.clear();
  }

  [[nodiscard]]
  bool stopped() const {
    return is_stopped;
  }

  error run_one(std::optional<std::chrono::nanoseconds> Timeout = std::nullopt) {
    // Process events queued before entering wait loop
    update_fd_list();

    timespec ts = to_timespec(Timeout.value_or(std::chrono::nanoseconds {0}));

    // ppoll may return EINTR/EAGAIN, so a loop is used here. Normally, we return in the first iteration.
    while (true) {
      int Result = ::ppoll(PollFDs.data(), PollFDs.size(), Timeout ? &ts : nullptr, nullptr);

      if (Result < 0) {
        if (errno == EINTR || errno == EAGAIN) {
          continue;
        }
        return error::generic_errno;
      } else if (Result == 0) {
        return error::timeout;
      } else {
        // Walk the FDs and see if we got any results
        for (auto& ActiveFD : PollFDs) {
          if (ActiveFD.revents == 0) {
            continue;
          }
          if (Result-- == 0) {
            break;
          }

          if (ActiveFD.revents & POLLIN) {
            // NOTE: For sockets, this is triggered on close, too. Pipes only report POLLHUP, however.
            CurrentFD = ActiveFD.fd;

            auto Callback = std::move(callbacks[ActiveFD.fd]);
            if (!Callback) {
              ERROR_AND_DIE_FMT("Data available for reading on FD {} but no read callback registered", ActiveFD.fd);
            }
            auto Ret = Callback(error::success);
            if (Ret == post_callback::repeat) {
              callbacks[ActiveFD.fd] = std::move(Callback);
            } else if (Ret == post_callback::drop) {
              // If no new callback was registered, drop the FD from the list and skip any remaining events
              if (!callbacks.contains(ActiveFD.fd)) {
                QueuedEvents.push_back(Event {.FD = {.fd = ActiveFD.fd}, .Erase = true});
                ActiveFD.revents = 0;
              }
            } else if (Ret == post_callback::stop_reactor) {
              is_stopped = true;
            }
            CurrentFD.reset();
          }
          if (ActiveFD.revents & (POLLHUP | POLLERR | POLLNVAL | POLLRDHUP)) {
            auto Callback = std::move(callbacks[ActiveFD.fd]);
            if (Callback) {
              is_stopped |= (Callback(error::eof) == post_callback::stop_reactor);
            }
            // Error or hangup, erase the socket from our list
            QueuedEvents.push_back(Event {.FD = {.fd = ActiveFD.fd}, .Erase = true});
          }

          ActiveFD.revents = 0;
        }

        if (is_stopped) {
          cleanup();
          return error::success;
        }

        update_fd_list();
        return error::success;
      }
    }
  }

  error run(std::optional<std::chrono::nanoseconds> Timeout = std::nullopt) {
    while (true) {
      auto Result = run_one(Timeout);
      if (Result != error::success || is_stopped) {
        cleanup();
        return Result;
      }
    }
  }

  void bind_handler(pollfd FD, fextl::move_only_function<post_callback(error)> Callback) {
    [[maybe_unused]] auto Previous = std::exchange(callbacks[FD.fd], std::move(Callback));
    assert(!Previous && "May not queue multiple async operations");

    // Add the FD to the poll list if it's not already contained
    if (CurrentFD != FD.fd && PollFDs.end() == std::find_if(PollFDs.begin(), PollFDs.end(), [&](auto& Prev) { return FD.fd == Prev.fd; })) {
      QueuedEvents.push_back(Event {.FD = FD, .Insert = true});
    }
  }

private:
  timespec to_timespec(std::chrono::nanoseconds Duration) {
    timespec Timespec {};
    auto Seconds = std::chrono::duration_cast<std::chrono::seconds>(Duration);
    Timespec.tv_sec = Seconds.count();
    Timespec.tv_nsec = std::chrono::duration_cast<std::chrono::nanoseconds>(Duration - Seconds).count();
    return Timespec;
  }

  void update_fd_list() {
    for (auto& Event : QueuedEvents) {
      if (Event.Erase) {
        std::iter_swap(std::find_if(PollFDs.begin(), PollFDs.end(), [&](auto& FD) { return FD.fd == Event.FD.fd; }), std::prev(PollFDs.end()));
        PollFDs.pop_back();
        callbacks.erase(Event.FD.fd);
      }

      if (Event.Insert) {
        PollFDs.push_back(Event.FD);
      }
    }
    QueuedEvents.clear();
  }
};

/**
 * Corresponds to asio::mutable_buffer.
 */
struct mutable_buffer {
  std::span<std::byte> Data;
  mutable_buffer* Next = nullptr;

  // Optional FD to send/receive via ancillary buffer.
  // This may only be used with non-empty data, and there may only be up to one FD per buffer chain
  std::optional<int*> FD;

  size_t size() const {
    size_t Ret = 0;
    const mutable_buffer* Current = this;
    do {
      Ret += Current->Data.size_bytes();
      Current = Current->Next;
    } while (Current);

    if (Ret == 0) {
      assert(!FD);
    }
    return Ret;
  }

  int consume_fd() {
    assert(FD);
    return **std::exchange(FD, std::nullopt);
  }

  mutable_buffer& operator+=(size_t NumBytes) {
    mutable_buffer* Current = this;
    while (Current->Next && NumBytes >= Current->Data.size_bytes()) {
      NumBytes -= Data.size_bytes();
      Current = Current->Next;
      assert(Current->FD == std::nullopt);
    }
    auto FD = std::exchange(this->FD, std::nullopt);
    *this = *Current;
    Data = Data.subspan(std::min(Data.size_bytes(), NumBytes));
    this->FD = FD;
    return *Current;
  }

  size_t count_chunks() const {
    size_t Ret = 1;
    const mutable_buffer* Current = this;
    while (Current->Next) {
      Current = Current->Next;
      ++Ret;
    }
    return Ret;
  }
};

inline mutable_buffer Chained(std::span<mutable_buffer> Buffers) {
  for (size_t i = 0; i + 1 < Buffers.size(); ++i) {
    Buffers[i].Next = &Buffers[i + 1];
  }
  return Buffers[0];
}

/**
 * Corresponds to asio::dynamic_vector_buffer.
 */
struct dynamic_vector_buffer {
  fextl::vector<std::byte>& Data;

  // Maximum number of bytes to grow to
  size_t max_size = Data.capacity();
};

/**
 * Asynchronously reads data from the given stream until MatchPredicate reports a match. The read
 * is queued to the stream's reactor and will progress whenever data is available.
 *
 * MatchPredicate must have the signature pair<Iter, bool>(Iter, Iter):
 * - The input iterators provide the range of new data bytes
 * - The returned boolean indicates if a match was found
 * - The returned iterator is the match location or the location at which to continue testing after the next read
 *
 * The read data will be appended to Buffers. Data past the match returned from the last read data will also be included.
 *
 * Corresponds to asio::async_read_until.
 */
template<typename AsyncReadStream, typename MatchPredicate, typename OnComplete>
requires std::is_invocable_r_v<void, OnComplete, error, size_t>
void async_read_until(AsyncReadStream& Stream, dynamic_vector_buffer Buffers, MatchPredicate Predicate, OnComplete UserCallback) {
  struct Callback {
    size_t BeginPos;
    size_t EndPos;
    AsyncReadStream& Stream;
    dynamic_vector_buffer Buffers;
    MatchPredicate Predicate;
    OnComplete UserCallback;

    void operator()(error Err, size_t BytesRead, std::optional<int> FD) {
      if (Err != error::success) {
        UserCallback(Err, 0);
        return;
      }

      // Start with the predicate check to avoid fetching data unnecessarily
      EndPos += BytesRead;
      if (EndPos != BeginPos) {
        auto Begin = Buffers.Data.begin() + BeginPos;
        auto End = Buffers.Data.begin() + EndPos;
        auto [It, Found] = Predicate(Begin, End);
        BeginPos = It - Buffers.Data.begin();
        if (Found) {
          Buffers.Data.resize(EndPos); // Shrink down to size of data actually received
          UserCallback(error::success, BeginPos);
          return;
        }
      }

      // Fill the entire remaining capacity, or resize for a minimum of 512 bytes
      auto BytesToRead = std::max<size_t>(std::min(Buffers.Data.capacity(), Buffers.max_size) - EndPos, 512);
      if (Buffers.Data.size() + BytesToRead > Buffers.max_size) {
        ERROR_AND_DIE_FMT("Out of buffer space");
      }

      Buffers.Data.resize(EndPos + BytesToRead);

      // Queue data read.
      // On completion, Reader will check if enough data was received and will queue more reads if needed.
      Stream.async_read_some(mutable_buffer {std::span {Buffers.Data}}, *this);
    }
  };

  // Check existing data for a predicate match, then initiate async reading if necessary
  Callback {0, Buffers.Data.size(), Stream, Buffers, std::move(Predicate), std::move(UserCallback)}(error::success, 0, std::nullopt);
}

using read_callback = fextl::move_only_function<void(error, size_t, std::optional<int>)>;

/**
 * Synchronously reads fixed-length data from the given Stream.
 *
 * The length is inferred from the size of the output buffer(s).
 *
 * Corresponds to asio::read.
 */
template<typename AsyncReadStream>
std::size_t read(AsyncReadStream& Stream, mutable_buffer Buffers, error& ec) {
  size_t TotalBytesRead = 0;
  while (Buffers.size() != 0 || Buffers.FD) {
    auto BytesRead = Stream.read_some(Buffers, ec);
    TotalBytesRead += BytesRead;
    if (Buffers.FD) {
      LOGMAN_THROW_A_FMT(**Buffers.FD != -1, "Receiver requested a file descriptor but none was sent");
      (void)Buffers.consume_fd();
    }
    Buffers += BytesRead;
    if (ec != error::success) {
      return TotalBytesRead;
    }
  }
  ec = error::success;
  return TotalBytesRead;
}

/**
 * Synchronously writes fixed-length data to the given Stream.
 *
 * The length is inferred from the size of the input buffer(s).
 *
 * Corresponds to asio::write.
 */
template<typename AsyncReadStream>
std::size_t write(AsyncReadStream& Stream, mutable_buffer Buffers, error& ec) {
  size_t TotalBytesWritten = 0;
  while (Buffers.size() != 0 || Buffers.FD) {
    auto BytesWritten = Stream.write_some(Buffers, ec);
    TotalBytesWritten += BytesWritten;
    if (Buffers.FD) {
      (void)Buffers.consume_fd();
    }
    Buffers += BytesWritten;
    if (ec != error::success) {
      return TotalBytesWritten;
    }
  }
  ec = error::success;
  return TotalBytesWritten;
}

/**
 * Owning RAII wrapper around a file descriptor.
 *
 * Corresponds to asio::posix::descriptor.
 */
struct posix_descriptor {
  poll_reactor* Reactor = nullptr;
  int FD = -1;

  posix_descriptor(poll_reactor& Reactor, int FD)
    : Reactor(&Reactor)
    , FD(FD) {}

  posix_descriptor(posix_descriptor&& Other)
    : Reactor(Other.Reactor)
    , FD(std::exchange(Other.FD, -1)) {}

  posix_descriptor& operator=(posix_descriptor&& Other) {
    if (&Other == this) {
      return *this;
    }
    posix_descriptor::~posix_descriptor();
    Reactor = Other.Reactor;
    FD = std::exchange(Other.FD, -1);
    return *this;
  }

  ~posix_descriptor() {
    if (FD != -1) {
      ::close(FD);
    }
  }

  /**
   * Wait until there is data available to read on this object, then execute the given callback
   */
  template<typename Fn>
  requires std::is_invocable_r_v<post_callback, Fn, error>
  void async_wait(Fn Callback) {
    Reactor->bind_handler(
      pollfd {
        .fd = FD,
        .events = POLLIN,
        .revents = 0,
      },
      std::move(Callback));
  }
};

} // namespace fasio


================================================
FILE: Source/Common/AsyncNet.h
================================================
// SPDX-License-Identifier: MIT
/**
 * Socket wrappers for asynchronous programming with fasio
 */
#pragma once

#include <Common/Async.h>

#include <sys/socket.h>
#include <sys/un.h>

namespace fasio {

/**
 * Non-owning wrapper around a socket.
 *
 * Corresponds to asio::local::stream_protocol::socket.
 */
struct tcp_socket {
  poll_reactor* Reactor = nullptr;
  int FD;

  // Constructor for synchronous and asynchronous operation
  tcp_socket(poll_reactor& Reactor_, int FD_)
    : Reactor(&Reactor_)
    , FD(FD_) {}

  // Constructor for purely synchronous operation
  tcp_socket(int FD_)
    : FD(FD_) {}

  /**
   * Queues an asynchronous operation that will run the completion callback
   * once at least one byte of data was received
   */
  template<typename OnComplete>
  requires std::is_invocable_r_v<void, OnComplete, error, size_t, std::optional<int>>
  void async_read_some(mutable_buffer Buffers, OnComplete UserCallback) {
    auto Callback = [Buffers, Socket = FD, UserCallback = std::move(UserCallback)](error ec) mutable {
      if (ec != error::success) {
        UserCallback(ec, 0, std::nullopt);
        return post_callback::drop;
      }

      auto BytesRead = read_some_from_fd(Buffers, ec, Socket);
      if (ec != error::success) {
        UserCallback(ec, BytesRead, std::nullopt);
      } else {
        UserCallback(ec, BytesRead, Buffers.FD ? std::optional {**Buffers.FD} : std::nullopt);
      }
      return post_callback::drop;
    };

    Reactor->bind_handler(
      pollfd {
        .fd = FD,
        .events = POLLIN | POLLPRI | POLLRDHUP,
        .revents = 0,
      },
      std::move(Callback));
  }

  /**
   * Blocks until at least one byte of data was received
   */
  size_t read_some(const mutable_buffer& Buffers, error& ec) {
    return read_some_from_fd(Buffers, ec, FD);
  }

  /**
   * Blocks until at least one byte of data was sent
   */
  size_t write_some(const mutable_buffer& Buffers, error& ec) {
    auto iov = (iovec*)alloca(sizeof(mutable_buffer) * Buffers.count_chunks());
    decltype(msghdr::msg_iovlen) NumIovs = 0;
    for (auto Buffer = &Buffers; Buffer; Buffer = Buffer->Next) {
      iov[NumIovs].iov_base = Buffer->Data.data();
      iov[NumIovs].iov_len = Buffer->Data.size_bytes();
      ++NumIovs;
    }
    msghdr msg {
      .msg_name = nullptr,
      .msg_namelen = 0,
      .msg_iov = iov,
      .msg_iovlen = NumIovs,
    };

    // Setup the ancillary buffer. This is where we will be getting pipe FDs
    // We only need 4 bytes for the FD
    constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int));
    alignas(cmsghdr) uint8_t AncBuf[CMSG_SIZE];

    if (Buffers.FD) {
      // Enable ancillary buffer
      msg.msg_control = AncBuf;
      msg.msg_controllen = CMSG_SIZE;

      // Now we need to setup the ancillary buffer data. We are only sending an FD
      cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
      cmsg->cmsg_len = CMSG_LEN(sizeof(int));
      cmsg->cmsg_level = SOL_SOCKET;
      cmsg->cmsg_type = SCM_RIGHTS;

      // We are giving the daemon the write side of the pipe
      memcpy(CMSG_DATA(cmsg), Buffers.FD.value(), sizeof(int));
    }

    ssize_t Ret;
    do {
      Ret = ::sendmsg(FD, &msg, 0);
    } while (Ret < 0 && (errno == EINTR || errno == EAGAIN));
    if (Ret < 0) {
      ec = error::generic_errno;
      return 0;
    }
    ec = error::success;
    return Ret;
  }

private:
  static size_t read_some_from_fd(const mutable_buffer& Buffers, error& ec, int FD) {
    auto iov = (iovec*)alloca(sizeof(mutable_buffer) * Buffers.count_chunks());
    decltype(msghdr::msg_iovlen) NumIovs = 0;
    for (auto Buffer = &Buffers; Buffer; Buffer = Buffer->Next) {
      iov[NumIovs].iov_base = Buffer->Data.data();
      iov[NumIovs].iov_len = Buffer->Data.size_bytes();
      ++NumIovs;
    }
    msghdr msg {
      .msg_name = nullptr,
      .msg_namelen = 0,
      .msg_iov = iov,
      .msg_iovlen = NumIovs,
    };

    // If requested, set up a 4-byte ancillary buffer for receiving a file descriptor
    constexpr size_t CMSG_SIZE = CMSG_SPACE(sizeof(int));
    alignas(cmsghdr) uint8_t AncBuf[CMSG_SIZE];

    if (Buffers.FD) {
      // Enable ancillary buffer
      msg.msg_control = AncBuf;
      msg.msg_controllen = CMSG_SIZE;
    }

    ssize_t BytesRead;
    do {
      BytesRead = ::recvmsg(FD, &msg, 0);
    } while (BytesRead < 0 && (errno == EINTR || errno == EAGAIN));
    if (BytesRead < 0) {
      if (errno != 0) {
        ec = error::generic_errno;
        return 0;
      }
    } else if (BytesRead == 0) {
      ec = error::eof;
      return 0;
    }

    struct cmsghdr* cmsg = CMSG_FIRSTHDR(&msg);
    if (Buffers.FD &&
        (cmsg == nullptr || cmsg->cmsg_len != CMSG_LEN(sizeof(int)) || cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS)) {
      // Not a failure since some data was read for the main message
      **Buffers.FD = -1;
      ec = error::success;
      return BytesRead;
    }

    if (Buffers.FD) {
      memcpy(*Buffers.FD, CMSG_DATA(cmsg), sizeof(FD));
    }

    ec = error::success;
    return BytesRead;
  }
};

/**
 * Owning wrapper around a server socket that listens for connections after
 * creation. Clients can be accepted asynchronously using async_accept().
 *
 * Corresponds to asio::local::stream_protocol::acceptor.
 */
struct tcp_acceptor {
  poll_reactor& Reactor;
  int FD;

  tcp_acceptor(tcp_acceptor&& other)
    : Reactor(other.Reactor)
    , FD(other.FD) {
    other.FD = -1;
  }

  ~tcp_acceptor() {
    if (FD != -1) {
      ::close(FD);
    }
  }

  tcp_acceptor& operator=(tcp_acceptor&& other) {
    FD = std::exchange(other.FD, -1);
    return *this;
  }

  static std::optional<tcp_acceptor> create(poll_reactor& Reactor, bool abstract, std::string_view Name, int MaxPending = SOMAXCONN) {
    // Create the initial unix socket
    int FD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
    if (FD == -1) {
      return {};
    }

    sockaddr_un addr {};
    addr.sun_family = AF_UNIX;

    if (Name.size() > sizeof(addr.sun_path) - 1) {
      ERROR_AND_DIE_FMT("Invalid FEXServer socket name: {}", Name);
    }

    auto NameEnd = addr.sun_path;
    if (!abstract) {
      // sun_path is null-terminated
      NameEnd = std::copy(Name.begin(), Name.end(), addr.sun_path);
      *NameEnd++ = 0;
    } else {
      // Abstract AF_UNIX sockets start with \0 but aren't null-terminated
      addr.sun_path[0] = 0;
      NameEnd = std::copy(Name.begin(), Name.end(), addr.sun_path + 1);
    }

    // Bind the socket to the path
    int Result = bind(FD, reinterpret_cast<sockaddr*>(&addr), sizeof(addr.sun_family) + (NameEnd - addr.sun_path));
    if (Result == -1) {
      ::close(FD);
      return {};
    }

    Result = ::listen(FD, MaxPending);
    if (Result == -1) {
      ::close(FD);
      return {};
    }

    return tcp_acceptor(Reactor, FD);
  }

  void async_accept(fextl::move_only_function<post_callback(error, std::optional<tcp_socket>)> OnAccept) {
    Reactor.bind_handler(
      {
        .fd = FD,
        .events = POLLIN,
        .revents = 0,
      },
      [ServerFD = FD, &Reactor = Reactor, OnAccept = std::move(OnAccept)](error ec) mutable {
        if (ec != error::success) {
          return post_callback::drop;
        }

        sockaddr_storage Addr {};
        socklen_t AddrSize {};
        int NewFD;
        do {
          NewFD = ::accept(ServerFD, reinterpret_cast<sockaddr*>(&Addr), &AddrSize);
        } while (NewFD < 0 && (errno == EINTR || errno == EAGAIN));
        if (NewFD < 0) {
          return OnAccept(error::generic_errno, std::nullopt);
        }

        return OnAccept(error::success, tcp_socket {Reactor, NewFD});
      });
  }

private:
  tcp_acceptor(poll_reactor& Reactor_, int FD_)
    : Reactor(Reactor_)
    , FD(FD_) {}
};
static_assert(!std::is_copy_constructible_v<tcp_acceptor>);
static_assert(!std::is_copy_assignable_v<tcp_acceptor>);

} // namespace fasio


================================================
FILE: Source/Common/CMakeLists.txt
================================================
add_subdirectory(cpp-optparse/)

set(NAME Common)
set(SRCS
  Config.cpp
  CPUInfo.cpp
  ArgumentLoader.cpp
  HostFeatures.cpp
  JSONPool.cpp
  SHMStats.cpp
  VolatileMetadata.cpp)

if (NOT MINGW)
  list(APPEND SRCS
    FEXServerClient.cpp
    FileFormatCheck.cpp
    Linux/SBRKAllocations.cpp)
endif()

add_library(${NAME} STATIC ${SRCS})
target_link_libraries(${NAME} FEXCore_Base cpp-optparse tiny-json::tiny-json FEXHeaderUtils range-v3::range-v3)
target_include_directories(${NAME} PRIVATE ${CMAKE_BINARY_DIR}/generated)

set_target_properties(${NAME} PROPERTIES
  C_VISIBILITY_PRESET hidden
  CXX_VISIBILITY_PRESET hidden
  VISIBILITY_INLINES_HIDDEN TRUE)


================================================
FILE: Source/Common/CPUInfo.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXHeaderUtils/Filesystem.h>

#include <fmt/compile.h>
#include <fmt/format.h>

#include <cstddef>
#include <cstdint>
#ifdef _WIN32
#include <thread>
#else
#include <linux/limits.h>
#endif

namespace FEX::CPUInfo {
#ifndef _WIN32
uint32_t CalculateNumberOfCPUs() {
  constexpr auto parse_string = FMT_COMPILE("/sys/devices/system/cpu/cpu{}");
  constexpr auto max_parse_size = ::fmt::formatted_size(parse_string, UINT32_MAX);
  char Tmp[max_parse_size];
  size_t CPUs = 1;

  for (;; ++CPUs) {
    auto Size = fmt::format_to_n(Tmp, max_parse_size, parse_string, CPUs);
    Tmp[Size.size] = 0;
    if (!FHU::Filesystem::Exists(Tmp)) {
      break;
    }
  }

  return CPUs;
}
#else
uint32_t CalculateNumberOfCPUs() {
  // May not return correct number of cores if some are parked.
  return std::thread::hardware_concurrency();
}
#endif
} // namespace FEX::CPUInfo


================================================
FILE: Source/Common/CPUInfo.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>

namespace FEX::CPUInfo {
/**
 * @brief Calculate the number of CPUs in the system regardless of affinity mask.
 *
 * @return The number of CPUs in the system.
 */
uint32_t CalculateNumberOfCPUs();
} // namespace FEX::CPUInfo


================================================
FILE: Source/Common/Config.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/ArgumentLoader.h"
#include "Common/Config.h"
#include "Common/JSONPool.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/SymlinkChecks.h>

#include <cstring>
#include <fmt/format.h>
#include <functional>
#ifndef _WIN32
#include <linux/limits.h>
#include <pwd.h>
#endif
#include <optional>
#include <utility>
#include <tiny-json.h>

#include <range/v3/view/split.hpp>
#include <range/v3/view/transform.hpp>

namespace FEX::Config {
namespace JSON {
  static void LoadJSonConfig(const fextl::string& Config, std::function<void(const char* Name, const char* ConfigSring)> Func) {
    fextl::vector<char> Data;
    if (!FEXCore::FileLoading::LoadFile(Data, Config)) {
      return;
    }

    FEX::JSON::JsonAllocator Pool {};
    const json_t* json = FEX::JSON::CreateJSON(Data, Pool);

    if (!json) {
      ERROR_AND_DIE_FMT("Failed to parse JSON from file '{}' - invalid JSON format", Config);
    }

    const json_t* ConfigList = json_getProperty(json, "Config");

    if (!ConfigList) {
      // This is a non-error if the configuration file exists but no Config section
      return;
    }

    for (const json_t* ConfigItem = json_getChild(ConfigList); ConfigItem != nullptr; ConfigItem = json_getSibling(ConfigItem)) {
      const char* ConfigName = json_getName(ConfigItem);
      const char* ConfigString = json_getValue(ConfigItem);

      if (!ConfigName) {
        LogMan::Msg::EFmt("JSON file '{}': Couldn't get config name for an item", Config);
        return;
      }

      if (!ConfigString) {
        LogMan::Msg::EFmt("JSON file '{}': Couldn't get value for config item '{}'", Config, ConfigName);
        return;
      }

      Func(ConfigName, ConfigString);
    }
  }
} // namespace JSON

static constexpr std::pair<std::string_view, FEXCore::Config::ConfigOption> ConfigLookup[] {
#define OPT_BASE(type, group, enum, json, default) {#json, FEXCore::Config::ConfigOption::CONFIG_##enum},
#include <FEXCore/Config/ConfigValues.inl>
};

static char* SaveLayerToJSON(char* JsonBuffer, const FEXCore::Config::Layer* Layer) {
  JsonBuffer = json_objOpen(JsonBuffer, "Config");
  for (auto& it : Layer->GetOptionMap()) {
    std::string_view Name {};
    for (auto& name_it : ConfigLookup) {
      if (name_it.second == it.first) {
        Name = name_it.first;
        break;
      }
    }
    if (std::holds_alternative<fextl::string>(it.second)) {
      JsonBuffer = json_str(JsonBuffer, Name.data(), std::get<fextl::string>(it.second).c_str());
    } else if (std::holds_alternative<FEXCore::Config::StringArrayType>(it.second)) {
      for (auto& var : std::get<FEXCore::Config::StringArrayType>(it.second)) {
        JsonBuffer = json_str(JsonBuffer, Name.data(), var.c_str());
      }
    } else {
      LogMan::Msg::AFmt("Trying to store config with pre-converted type");
    }
  }
  return json_objClose(JsonBuffer);
}

void SaveLayerToJSON(const fextl::string& Filename, const FEXCore::Config::Layer* Layer, const fextl::unordered_map<fextl::string, bool>& HostLibs) {
  char Buffer[4096];
  char* Dest {};
  Dest = json_objOpen(Buffer, nullptr);

  Dest = SaveLayerToJSON(Dest, Layer);

  Dest = json_objOpen(Dest, "ThunksDB");
  for (auto& [Name, Enabled] : HostLibs) {
    Dest = json_int(Dest, Name.c_str(), Enabled);
  }
  Dest = json_objClose(Dest);

  Dest = json_objClose(Dest);
  json_end(Dest);

  LogMan::Throw::AFmt(Dest <= std::end(Buffer), "Exceeded JSON buffer size");

  auto File = FEXCore::File::File(Filename.c_str(),
                                  FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);

  if (File.IsValid()) {
    File.Write(Buffer, strlen(Buffer));
  }
}

void SaveLayerToJSON(const fextl::string& Filename, const FEXCore::Config::Layer* Layer) {
  fextl::unordered_map<fextl::string, bool> HostLibsDB;

  // Load existing ThunksDB entry to persist it
  {
    fextl::vector<char> FileData;
    if (!FEXCore::FileLoading::LoadFile(FileData, Filename)) {
      goto WriteConfig;
    }

    // Find bounds of previously existing Config entry (if any)
    FEX::JSON::JsonAllocator Pool {};
    const json_t* json = FEX::JSON::CreateJSON(FileData, Pool);
    if (!json) {
      goto WriteConfig;
    }

    const json_t* ThunksDB = json_getProperty(json, "ThunksDB");
    if (!ThunksDB) {
      goto WriteConfig;
    }

    for (const json_t* Item = json_getChild(ThunksDB); Item != nullptr; Item = json_getSibling(Item)) {
      HostLibsDB.emplace(json_getName(Item), (json_getInteger(Item) != 0));
    }
  }

WriteConfig:
  SaveLayerToJSON(Filename, Layer, HostLibsDB);
}

// Application loaders
class OptionMapper : public FEXCore::Config::Layer {
public:
  explicit OptionMapper(FEXCore::Config::LayerType Layer);

protected:
  void MapNameToOption(const char* ConfigName, const char* ConfigString);
  void SetCurrentConfigFile(const fextl::string& Filename) {
    CurrentConfigFile = Filename;
  }
  fextl::string CurrentConfigFile;
};

class MainLoader final : public OptionMapper {
public:
  explicit MainLoader(FEXCore::Config::LayerType Type);
  explicit MainLoader(fextl::string ConfigFile);
  explicit MainLoader(FEXCore::Config::LayerType Type, std::string_view ConfigFile);

  void Load() override;

private:
  fextl::string Config;
};

class AppLoader final : public OptionMapper {
public:
  explicit AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type);
  void Load();

private:
  fextl::string Config;
};

class EnvLoader final : public FEXCore::Config::Layer {
public:
  explicit EnvLoader(char* const _envp[]);
  void Load() override;

private:
  char* const* envp;
};

OptionMapper::OptionMapper(FEXCore::Config::LayerType Layer)
  : FEXCore::Config::Layer(Layer) {}

void OptionMapper::MapNameToOption(const char* ConfigName, const char* ConfigString) {
  std::optional<FEXCore::Config::ConfigOption> KeyOptionValue;
  for (auto& it : ConfigLookup) {
    if (it.first != ConfigName) {
      continue;
    }

    KeyOptionValue = it.second;
    break;
  }

  if (!KeyOptionValue.has_value()) {
    LogMan::Msg::IFmt("Unknown configuration option '{}' in JSON config file '{}'", ConfigName, CurrentConfigFile);
    return;
  }

  const auto KeyOption = *KeyOptionValue;
  const auto KeyName = std::string_view(ConfigName);
  const auto Value_View = std::string_view(ConfigString);
#define JSONLOADER
#include <FEXCore/Config/ConfigOptions.inl>
}

MainLoader::MainLoader(FEXCore::Config::LayerType Type)
  : OptionMapper(Type)
  , Config {FEXCore::Config::GetConfigFileLocation(Type == FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN)} {}

MainLoader::MainLoader(fextl::string ConfigFile)
  : OptionMapper(FEXCore::Config::LayerType::LAYER_MAIN)
  , Config {std::move(ConfigFile)} {}


MainLoader::MainLoader(FEXCore::Config::LayerType Type, std::string_view ConfigFile)
  : OptionMapper(Type)
  , Config {ConfigFile} {}

void MainLoader::Load() {
  SetCurrentConfigFile(Config);
  JSON::LoadJSonConfig(Config, [this](const char* Name, const char* ConfigString) { MapNameToOption(Name, ConfigString); });
}

AppLoader::AppLoader(const fextl::string& Filename, FEXCore::Config::LayerType Type)
  : OptionMapper(Type) {
  const bool Global = Type == FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP || Type == FEXCore::Config::LayerType::LAYER_GLOBAL_APP;
  Config = FEXCore::Config::GetApplicationConfig(Filename, Global);

  // Immediately load so we can reload the meta layer
  Load();
}

void AppLoader::Load() {
  SetCurrentConfigFile(Config);
  JSON::LoadJSonConfig(Config, [this](const char* Name, const char* ConfigString) { MapNameToOption(Name, ConfigString); });
}

EnvLoader::EnvLoader(char* const _envp[])
  : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_ENVIRONMENT)
  , envp {_envp} {}

void EnvLoader::Load() {
  using EnvMapType = fextl::unordered_map<std::string_view, fextl::string>;
  EnvMapType EnvMap;

  for (const char* const* pvar = envp; pvar && *pvar; pvar++) {
    std::string_view Var(*pvar);

    ///< All FEX environment variables start with `FEX_`
    if (!Var.starts_with("FEX_")) {
      continue;
    }

    size_t pos = Var.rfind('=');
    if (fextl::string::npos == pos) {
      continue;
    }

    std::string_view Key = Var.substr(0, pos);
    std::string_view Value_View {Var.substr(pos + 1)};
    std::optional<fextl::string> Value;

#define ENVLOADER
#include <FEXCore/Config/ConfigOptions.inl>

    if (Value) {
      EnvMap.insert_or_assign(Key, std::move(*Value));
    } else {
      EnvMap.insert_or_assign(Key, Value_View);
    }
  }

  auto GetVar = [](const EnvMapType& EnvMap, std::string_view id) -> std::optional<std::string_view> {
    const auto EnvEntry = EnvMap.find(id);
    if (EnvEntry != EnvMap.end()) {
      return EnvEntry->second;
    }

    // If envp[] was empty, search using std::getenv()
    const char* vs = std::getenv(id.data());
    if (vs) {
      return vs;
    } else {
      return std::nullopt;
    }
  };

  std::optional<std::string_view> Value;

  // Walk all the environment options and corresponding config option.
#define OPT_BASE(type, group, enum, json, default) \
  Value = GetVar(EnvMap, "FEX_" #enum);            \
  if (Value.has_value()) Set(FEXCore::Config::ConfigOption::CONFIG_##enum, *Value);
#define OPT_STRARRAY(group, enum, json, default) \
  Value = GetVar(EnvMap, "FEX_" #enum);          \
  if (Value.has_value()) AppendStrArrayValue(FEXCore::Config::ConfigOption::CONFIG_##enum, *Value);

#include <FEXCore/Config/ConfigValues.inl>
}

fextl::unique_ptr<FEXCore::Config::Layer> CreateGlobalMainLayer() {
  return fextl::make_unique<MainLoader>(FEXCore::Config::LayerType::LAYER_GLOBAL_MAIN);
}

fextl::unique_ptr<FEXCore::Config::Layer> CreateMainLayer(const fextl::string* File) {
  if (File) {
    return fextl::make_unique<MainLoader>(*File);
  } else {
    return fextl::make_unique<MainLoader>(FEXCore::Config::LayerType::LAYER_MAIN);
  }
}

fextl::unique_ptr<FEXCore::Config::Layer> CreateUserOverrideLayer(std::string_view AppConfig) {
  return fextl::make_unique<MainLoader>(FEXCore::Config::LayerType::LAYER_USER_OVERRIDE, AppConfig);
}

fextl::unique_ptr<FEXCore::Config::Layer> CreateAppLayer(const fextl::string& Filename, FEXCore::Config::LayerType Type) {
  return fextl::make_unique<AppLoader>(Filename, Type);
}

fextl::unique_ptr<FEXCore::Config::Layer> CreateEnvironmentLayer(char* const _envp[]) {
  return fextl::make_unique<EnvLoader>(_envp);
}

fextl::string RecoverGuestProgramFilename(fextl::string Program, bool ExecFDInterp, int ProgramFDFromEnv) {
  // If executed with a FEX FD then the Program argument might be empty.
  // In this case we need to scan the FD node to recover the application binary that exists on disk.
  // Only do this if the Program argument is empty, since we would prefer the application's expectation
  // of application name.
  if (ProgramFDFromEnv != -1 && Program.empty()) {
    // Get the `dev` node of the execveat fd string.
    Program = fextl::fmt::format("/dev/fd/{}", ProgramFDFromEnv);
  }

  // If we were provided a relative path then we need to canonicalize it to become absolute.
  // If the program name isn't resolved to an absolute path then glibc breaks inside it's `_dl_get_origin` function.
  // This is because we rewrite `/proc/self/exe` to the absolute program path calculated in here.
  if (!Program.starts_with('/')) {
    char ExistsTempPath[PATH_MAX];
    char* RealPath = FHU::Filesystem::Absolute(Program.c_str(), ExistsTempPath);
    if (RealPath) {
      Program = RealPath;
    }
  }

  // If FEX was invoked through an FD path (either binfmt_misc or execveat) then we need to check the
  // Program to see if it is a symlink to find the real path.
  //
  // binfmt_misc: Arg[0] is actually the execve `pathname` argument or `/dev/fd/<FD>` path
  //   - `pathname` with execve (See Side Note)
  //   - FD path with execveat and FD doesn't have an existing file on the disk
  //
  // ProgramFDFromEnv: Arg[0] is Application provided data or `/dev/fd/<FD>` from above fix-up.
  //   - execveat was either passed no arguments (argv=NULL) or the first argument is an empty string (argv[0]="").
  //   - FD path with execveat and FD doesn't have an existing file on the disk
  //
  // Side Note:
  //  The `execve` syscall doesn't take an FD but binfmt_misc will give FEX an FD to execute still.
  //  Arg[0] will always contain the `pathname` argument provided to execve.
  //  It does not resolve symlinks, and it does not convert the path to absolute.
  //
  // Examples:
  //  - Regular execve. Application must exist on disk.
  //    execve binfmt_misc args layout:   `FEX <Path provided to execve pathname> <user provided argv[0]> <user provided argv[n]>...`
  //  - Regular execveat with FD. FD is backed by application on disk.
  //    execveat binfmt_misc args layout: `FEX <Path provided to execve pathname> <user provided argv[0]> <user provided argv[n]>...`
  //  - Regular execveat with FD. FD points to file on disk that has been deleted.
  //    execveat binfmt_misc args layout: `FEX /dev/fd/<FD> <user provided argv[0]> <user provided argv[n]>...`
#ifndef _WIN32
  if (ExecFDInterp || ProgramFDFromEnv != -1) {
    // Only in the case that FEX is executing an FD will the program argument potentially be a symlink.
    // This symlink will be in the style of `/dev/fd/<FD>`.
    //
    // If the argument /is/ a symlink then resolve its path to get the original application name.
    if (FHU::Symlinks::IsSymlink(Program)) {
      char Filename[PATH_MAX];
      auto SymlinkPath = FHU::Symlinks::ResolveSymlink(Program, Filename);
      if (SymlinkPath.starts_with('/')) {
        // This file was executed through an FD.
        // Remove the ` (deleted)` text if the file was deleted after the fact.
        // Otherwise just get the symlink without the deleted text.
        return fextl::string {SymlinkPath.substr(0, SymlinkPath.rfind(" (deleted)"))};
      }
    }
  }
#endif

  return Program;
}

ApplicationNames GetApplicationNames(const fextl::vector<fextl::string>& Args, bool ExecFDInterp, int ProgramFDFromEnv) {
  if (Args.empty()) {
    // Early exit if we weren't passed an argument
    return {};
  }

  fextl::string Program {};
  fextl::string ProgramName {};

  Program = RecoverGuestProgramFilename(Args[0], ExecFDInterp, ProgramFDFromEnv);

  bool Wine = false;
  for (size_t CurrentProgramNameIndex = 0; CurrentProgramNameIndex < Args.size(); ++CurrentProgramNameIndex) {
    auto CurrentProgramName = FHU::Filesystem::GetFilename(Args[CurrentProgramNameIndex]);

    if (CurrentProgramName == "wine-preloader" || CurrentProgramName == "wine64-preloader") {
      // Wine preloader is required to be in the format of `wine-preloader <wine executable>`
      // The preloader doesn't execve the executable, instead maps it directly itself
      // Skip the next argument since we know it is wine (potentially with custom wine executable name)
      ++CurrentProgramNameIndex;
      Wine = true;
    } else if (CurrentProgramName == "wine" || CurrentProgramName == "wine64") {
      // Next argument, this isn't the program we want
      //
      // If we are running wine or wine64 then we should check the next argument for the application name instead.
      // wine will change the active program name with `setprogname` or `prctl(PR_SET_NAME`.
      // Since FEX needs this data far earlier than libraries we need a different check.
      Wine = true;
    } else {
      if (Wine == true) {
        // If this was path separated with '\' then we need to check that.
        auto WinSeparator = CurrentProgramName.find_last_of('\\');
        if (WinSeparator != CurrentProgramName.npos) {
          // Used windows separators
          CurrentProgramName = CurrentProgramName.substr(WinSeparator + 1);
        }
      }

      ProgramName = std::move(CurrentProgramName);

      // Past any wine program names
      break;
    }
  }

  return ApplicationNames {std::move(Program), std::move(ProgramName)};
}

void LoadConfig(fextl::string ProgramName, char** const envp, const PortableInformation& PortableInfo) {
  const bool IsPortable = PortableInfo.IsPortable;
  FEX::Config::InitializeConfigs(PortableInfo);
  FEXCore::Config::Initialize();
  if (!IsPortable) {
    FEXCore::Config::AddLayer(CreateGlobalMainLayer());
  }
  FEXCore::Config::AddLayer(CreateMainLayer());

  if (!ProgramName.empty()) {
    if (!IsPortable) {
      FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_GLOBAL_APP));
    }
    FEXCore::Config::AddLayer(CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_LOCAL_APP));

    auto SteamID = getenv("SteamAppId");
    if (SteamID) {
      // If a SteamID exists then let's search for Steam application configs as well.
      // We want to key off both the SteamAppId number /and/ the executable since we may not want to thunk all binaries.
      fextl::string SteamAppName = fextl::fmt::format("Steam_{}_{}", SteamID, ProgramName);
      if (!IsPortable) {
        FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP));
      }
      FEXCore::Config::AddLayer(CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP));
    }
  }

  const char* AppConfig = getenv("FEX_APP_CONFIG");
  if (AppConfig) {
    fextl::string AppConfigStr = AppConfig;
    if (IsPortable && FHU::Filesystem::IsRelative(AppConfig)) {
      AppConfigStr = PortableInfo.InterpreterPath + AppConfigStr;
    }

    if (FHU::Filesystem::Exists(AppConfigStr)) {
      FEXCore::Config::AddLayer(CreateUserOverrideLayer(AppConfigStr));
    }
  }

  FEXCore::Config::AddLayer(CreateEnvironmentLayer(envp));
  FEXCore::Config::Load();
}

#ifndef _WIN32
fextl::string FindUserHomeThroughUID() {
  // `getpwuid` allocates memory, parse `/etc/passwd` manually.
  // Format is trivial: `<name>:<password hash>:<uid>:<gid>:<comment>:<home>:<shell>`

  fextl::vector<char> Data;
  if (!FEXCore::FileLoading::LoadFile(Data, "/etc/passwd")) {
    return {};
  }

  auto to_string_view = [](auto rng) {
    return std::string_view(&*rng.begin(), ranges::distance(rng));
  };

  const auto uid = geteuid();

  for (const auto entry : ranges::views::split(Data, '\n') | ranges::views::transform(to_string_view)) {
    const auto elements = ranges::views::split(entry, ':') | ranges::views::transform(to_string_view);
    // Reject bad entries.
    if (std::distance(elements.begin(), elements.end()) != 7) {
      continue;
    }

    auto iter = elements.begin();
    ++iter; // name
    ++iter; // password hash
    ++iter; // comment
    // uid
    const auto uid_s = *iter;
    ++iter;
    ++iter; // gid
    // home
    const auto home_s = *iter;
    ++iter;
    ++iter; // shell

    uint64_t element_uid;
    auto Results = std::from_chars(uid_s.begin(), uid_s.end(), element_uid, 10);

    // Error parsing.
    if (Results.ptr == uid_s.begin()) {
      continue;
    }

    if (element_uid == uid) {
      return fextl::string(home_s);
    }
  }

  return {};
}

fextl::string GetHomeDirectory() {
  const char* HomeDir = getenv("HOME");

  // Try to get home directory from uid
  if (!HomeDir || !FHU::Filesystem::Exists(HomeDir)) {
    auto UIDHome = FindUserHomeThroughUID();
    if (!UIDHome.empty() && FHU::Filesystem::Exists(UIDHome)) {
      return UIDHome;
    }
  }

  // try the PWD
  if (!HomeDir || !FHU::Filesystem::Exists(HomeDir)) {
    HomeDir = getenv("PWD");
  }

  // Still doesn't exit? You get local
  if (!HomeDir || !FHU::Filesystem::Exists(HomeDir)) {
    HomeDir = ".";
  }

  return HomeDir;
}
#else
fextl::string GetHomeDirectory() {
  const char* HomeDir = getenv("WINEHOMEDIR");
  if (HomeDir) {
    // Skip over the \??\ prefix in the NT path since we want a DOS path
    HomeDir += 4;
  };

  if (!HomeDir) {
    HomeDir = getenv("LOCALAPPDATA");
  }

  if (!HomeDir) {
    HomeDir = ".";
  }

  return HomeDir;
}
#endif

fextl::string GetDataDirectory(bool Global, const PortableInformation& PortableInfo) {
#ifdef FEX_STEAM_SUPPORT
  const char* SteamDataPath = getenv("STEAM_COMPAT_DATA_PATH");
  if (SteamDataPath) {
    return fextl::fmt::format("{}/fex-emu/", SteamDataPath);
  }
#endif

  const char* DataOverride = getenv("FEX_APP_DATA_LOCATION");

  if (PortableInfo.IsPortable && (Global || !DataOverride)) {
    return fextl::fmt::format("{}/fex-emu/", PortableInfo.InterpreterPath);
  }

  if (Global) {
    return GLOBAL_DATA_DIRECTORY;
  }

  auto HomeDir = GetHomeDirectory();
  const char* DataXDG = getenv("XDG_DATA_HOME");
  const fextl::string LegacyDir = fextl::string {HomeDir} + "/.fex-emu/";

  // If $HOME/.fex-emu exists, use that
  if (FHU::Filesystem::Exists(LegacyDir)) {
    return LegacyDir;
  }

  fextl::string DataDir {};
  if (DataOverride) {
    // Data override will override the complete directory
    DataDir = DataOverride;
  } else {
    // use ~/.local/share if XDG_DATA_HOME is unset
    DataDir = DataXDG ? DataXDG : fmt::format("{}/.local/share", HomeDir);
    DataDir += "/fex-emu/";
  }

  return DataDir;
}

fextl::string GetConfigDirectory(bool Global, const PortableInformation& PortableInfo) {
  const char* ConfigOverride = getenv("FEX_APP_CONFIG_LOCATION");
  if (PortableInfo.IsPortable && Global) {
    return fextl::fmt::format("{}/fex-emu/", PortableInfo.InterpreterPath);
  } else if (ConfigOverride && !Global) {
    fextl::string AppConfigStr = ConfigOverride;
    if (FHU::Filesystem::IsRelative(AppConfigStr)) {
      AppConfigStr = PortableInfo.InterpreterPath + AppConfigStr;
    }

    return AppConfigStr;
  }

#ifdef FEX_STEAM_SUPPORT
  const char* SteamDataPath = getenv("STEAM_COMPAT_DATA_PATH");
  if (SteamDataPath) {
    return fextl::fmt::format("{}/fex-emu/", SteamDataPath);
  }
#endif

  fextl::string ConfigDir;
  if (Global) {
    return GLOBAL_DATA_DIRECTORY;
  }

  auto HomeDir = GetHomeDirectory();
  const char* ConfigXDG = getenv("XDG_CONFIG_HOME");

  const fextl::string LegacyDir = fextl::string {HomeDir} + "/.fex-emu/";

  // If $HOME/.fex-emu exists, use that
  if (FHU::Filesystem::Exists(LegacyDir)) {
    return LegacyDir;
  }

  if (ConfigOverride) {
    // Config override will override the complete directory
    ConfigDir = ConfigOverride;
  } else {
    // use ~/.config if XDG_CONFIG_HOME is unset
    ConfigDir = ConfigXDG ? ConfigXDG : fmt::format("{}/.config", HomeDir);
    ConfigDir += "/fex-emu/";
  }


  return ConfigDir;
}

fextl::string GetCacheDirectory() {
  const char* CacheOverride = getenv("FEX_APP_CACHE_LOCATION");
  if (CacheOverride) {
    return CacheOverride;
  }

#ifndef _WIN32
#ifdef FEX_STEAM_SUPPORT
  const char* SteamDataPath = getenv("STEAM_COMPAT_SHADER_PATH");
  if (SteamDataPath) {
    return fextl::fmt::format("{}/fex-emu/", SteamDataPath);
  }
#endif

  auto HomeDir = GetHomeDirectory();
  const char* CacheXDG = getenv("XDG_CACHE_HOME");
  return (CacheXDG ? fextl::string {CacheXDG} : (fextl::string {HomeDir} + "/.cache")) + "/fex-emu/";
#else
  const char* PrefixAppData = getenv("LOCALAPPDATA");
  return PrefixAppData ? (fextl::string {PrefixAppData} + "\\fex-emu\\") : fextl::string {".\\"};
#endif
}

fextl::string GetConfigFileLocation(bool Global, const PortableInformation& PortableInfo) {
  return GetConfigDirectory(Global, PortableInfo) + "Config.json";
}

void InitializeConfigs(const PortableInformation& PortableInfo) {
  FEXCore::Config::SetDataDirectory(GetDataDirectory(false, PortableInfo), false);
  FEXCore::Config::SetDataDirectory(GetDataDirectory(true, PortableInfo), true);
  FEXCore::Config::SetConfigDirectory(GetConfigDirectory(false, PortableInfo), false);
  FEXCore::Config::SetConfigDirectory(GetConfigDirectory(true, PortableInfo), true);
  FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(false, PortableInfo), false);
  FEXCore::Config::SetConfigFileLocation(GetConfigFileLocation(true, PortableInfo), true);
}
} // namespace FEX::Config


================================================
FILE: Source/Common/Config.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>

#include <string_view>

namespace FEX::ArgLoader {
class ArgLoader;
}
/**
 * @brief This is a singleton for storing global configuration state
 */
namespace FEX::Config {
class EmptyMapper : public FEXCore::Config::Layer {
public:
  explicit EmptyMapper()
    : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_MAIN) {}
  void Load() override {}

protected:
};

void SaveLayerToJSON(const fextl::string& Filename, const FEXCore::Config::Layer* Layer);
void SaveLayerToJSON(const fextl::string& Filename, const FEXCore::Config::Layer* Layer, const fextl::unordered_map<fextl::string, bool>& HostLibs);

struct ApplicationNames {
  // This is the full path to the program (if it exists).
  fextl::string ProgramPath;
  // This is the program executable name (if it exists).
  fextl::string ProgramName;
};

struct PortableInformation {
  bool IsPortable;
  // Path of folder containing FEX (including / at the end)
  fextl::string InterpreterPath;
};

/**
 * @param ExecFDInterp If FEX was executed with binfmt_misc FD argument
 * @param ProgramFDFromEnv The execveat FD argument passed through FEX
 *
 * @return The application name and path structure
 */
ApplicationNames GetApplicationNames(const fextl::vector<fextl::string>& Args, bool ExecFDInterp, int ProgramFDFromEnv);

/**
 * @brief Loads the FEX and application configurations for the application that is getting ready to run.
 *
 * @param ProgramName Optional program name, if non-empty application specific configurations will be loaded
 * @param envp Optional `envp` passed to main(...)
 */
void LoadConfig(fextl::string ProgramName = {}, char** const envp = nullptr, const PortableInformation& PortableInfo = {});

fextl::string GetHomeDirectory();

fextl::string GetDataDirectory(const PortableInformation& PortableInfo);
fextl::string GetConfigDirectory(bool Global, const PortableInformation& PortableInfo);
fextl::string GetConfigFileLocation(bool Global, const PortableInformation& PortableInfo);
fextl::string GetCacheDirectory();

void InitializeConfigs(const PortableInformation& PortableInfo);

/**
 * @brief Loads the global FEX config
 *
 * @return unique_ptr for that layer
 */
fextl::unique_ptr<FEXCore::Config::Layer> CreateGlobalMainLayer();

/**
 * @brief Loads the main application config
 *
 * @param File Optional override to load a specific config file in to the main layer
 * Shouldn't be commonly used
 *
 * @return unique_ptr for that layer
 */
fextl::unique_ptr<FEXCore::Config::Layer> CreateMainLayer(const fextl::string* File = nullptr);
fextl::unique_ptr<FEXCore::Config::Layer> CreateUserOverrideLayer(std::string_view AppConfig);

/**
 * @brief Create an application configuration loader
 *
 * @param Filename Application filename component
 * @param Global Load the global configuration or user accessible file
 *
 * @return unique_ptr for that layer
 */
fextl::unique_ptr<FEXCore::Config::Layer> CreateAppLayer(const fextl::string& Filename, FEXCore::Config::LayerType Type);

/**
 * @brief iCreate an environment configuration loader
 *
 * @param _envp[] The environment array from main
 *
 * @return unique_ptr for that layer
 */
fextl::unique_ptr<FEXCore::Config::Layer> CreateEnvironmentLayer(char* const _envp[]);
} // namespace FEX::Config


================================================
FILE: Source/Common/FDUtils.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/fmt.h>

#include <fcntl.h>
#include <linux/limits.h>
#include <unistd.h>

namespace FEX {

[[nodiscard]]
inline int get_fdpath(int fd, char* SymlinkPath) {
  auto Path = fextl::fmt::format("/proc/self/fd/{}", fd);
  return readlinkat(AT_FDCWD, Path.c_str(), SymlinkPath, PATH_MAX);
}

} // namespace FEX


================================================
FILE: Source/Common/FEXServerClient.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/AsyncNet.h"
#include "Common/Config.h"
#include "FDUtils.h"
#include "Common/FEXServerClient.h"

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <cstdlib>
#include <fcntl.h>
#include <linux/limits.h>
#include <unistd.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <thread>
#include <cstring>

namespace FEXServerClient {
Logging::PacketHeader Logging::FillHeader(PacketTypes Type) {
  Logging::PacketHeader Msg {
    .PacketType = Type,
    .PID = ::getpid(),
    .TID = FHU::Syscalls::gettid(),
  };
  clock_gettime(CLOCK_MONOTONIC, &Msg.Timestamp);

  return Msg;
}

int RequestPIDFDPacket(int ServerSocket, PacketType Type) {
  fasio::tcp_socket Socket {ServerSocket};
  FEXServerRequestPacket Req {
    .Header {
      .Type = Type,
    },
  };

  // Send request
  fasio::error ec;
  write(Socket, fasio::mutable_buffer {std::as_writable_bytes(std::span {&Req, 1})}, ec);
  if (ec != fasio::error::success) {
    return -1;
  }

  // Wait for success response and log FD
  FEXServerResultPacket Res {};
  fasio::mutable_buffer ResBuffer {std::as_writable_bytes(std::span {&Res, 1})};
  int NewFD = -1;
  ResBuffer.FD = &NewFD;
  auto BytesRead = Socket.read_some(ResBuffer, ec);
  if (ec != fasio::error::success || BytesRead != sizeof(Res) || Res.Header.Type != PacketType::TYPE_SUCCESS) {
    return -1;
  }

  return NewFD;
}

static int ServerFD {-1};

fextl::string GetServerLockFolder() {
  return FEXCore::Config::GetDataDirectory() + "Server/";
}

fextl::string GetServerLockFile() {
  return GetServerLockFolder() + "Server.lock";
}

fextl::string GetServerRootFSLockFile() {
  return GetServerLockFolder() + "RootFS.lock";
}

fextl::string GetTempFolder() {
  const std::array<const char*, 5> Vars = {
    "XDG_RUNTIME_DIR", "TMPDIR", "TMP", "TEMP", "TEMPDIR",
  };

  for (auto& Var : Vars) {
    auto Path = getenv(Var);
    if (Path) {
      // If one of the env variable-driven paths works then use that.
      return Path;
    }
  }

  // Fallback to `/tmp/` if no env vars are set.
  // Might not be ideal but we don't have much of a choice.
  return fextl::string {"/tmp"};
}

fextl::string GetServerMountFolder() {
  // We need a FEXServer mount directory that has some tricky requirements.
  // - We don't want to use `/tmp/` if possible.
  //   - systemd services use `PrivateTmp` feature to gives services their own tmp.
  //   - We will use this as a fallback path /only/.
  // - Can't be `[$XDG_DATA_HOME,$HOME]/.fex-emu/`
  //   - Might be mounted with a filesystem (sshfs) which can't handle mount points inside it.
  //
  // Directories it can be in:
  // - $XDG_RUNTIME_DIR if set
  //   - Is typically `/run/user/<UID>/`
  //   - systemd `PrivateTmp` feature doesn't touch this.
  //   - If this path doesn't exist then fallback to `/tmp/` as a last resort.
  //   - pressure-vessel explicitly creates an internal XDG_RUNTIME_DIR inside its chroot.
  //     - This is okay since pressure-vessel rbinds the FEX rootfs from the host to `/run/pressure-vessel/interpreter-root`.
  auto Folder = GetTempFolder();

  if (FEXCore::Config::FindContainer() == "pressure-vessel") {
    // In pressure-vessel the mount point changes location.
    // This is due to pressure-vesssel being a chroot environment.
    // It by default maps the host-filesystem to `/run/host/` so we need to redirect.
    // After pressure-vessel is fully set up it will set the `FEX_ROOTFS` environment variable,
    // which FEX will pick up.
    Folder = "/run/host/" + Folder;
  }

  return Folder;
}

fextl::string GetServerSocketName() {
  FEX_CONFIG_OPT(ServerSocketPath, SERVERSOCKETPATH);
  if (ServerSocketPath().empty()) {
    return fextl::fmt::format("{}.FEXServer.Socket", ::getuid());
  }
  return ServerSocketPath;
}

fextl::string GetServerSocketPath() {
  fextl::string name {};
#ifndef FEX_STEAM_SUPPORT
  FEX_CONFIG_OPT(ServerSocketPath, SERVERSOCKETPATH);

  name = ServerSocketPath();

  if (name.starts_with("/")) {
    return name;
  }

  auto Folder = GetTempFolder();
#else
  // Under Steam the FEXServer's socket is a game-specific directory.
  auto Folder = GetServerLockFolder();
#endif

  if (name.empty()) {
    return fextl::fmt::format("{}/{}.FEXServer.Socket", Folder, ::getuid());
  } else {
    return fextl::fmt::format("{}/{}", Folder, name);
  }
}

int GetServerFD() {
  return ServerFD;
}

int ConnectToServer(ConnectionOption ConnectionOption) {
  int SocketFD {-1};
  size_t SizeOfAddr {};
  struct sockaddr_un addr {};
  size_t SizeOfSocketString {};

  // Create the initial unix socket
  SocketFD = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
  if (SocketFD == -1) {
    LogMan::Msg::EFmt("Couldn't open AF_UNIX socket {}", errno);
    return -1;
  }

  // Steam doesn't get to connect to global sockets.
#ifndef FEX_STEAM_SUPPORT
  auto ServerSocketName = GetServerSocketName();

  // AF_UNIX has a special feature for named socket paths.
  // If the name of the socket begins with `\0` then it is an "abstract" socket address.
  // The entirety of the name is used as a path to a socket that doesn't have any filesystem backing.
  addr.sun_family = AF_UNIX;
  SizeOfSocketString = std::min(ServerSocketName.size() + 1, sizeof(addr.sun_path) - 1);
  addr.sun_path[0] = 0; // Abstract AF_UNIX sockets start with \0
  strncpy(addr.sun_path + 1, ServerSocketName.data(), SizeOfSocketString);
  // Include final null character.
  SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString;

  if (connect(SocketFD, reinterpret_cast<struct sockaddr*>(&addr), SizeOfAddr) == -1) {
    if (ConnectionOption == ConnectionOption::Default || errno != ECONNREFUSED) {
      LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} {}", ServerSocketName, errno);
    }
  } else {
    return SocketFD;
  }
#endif

  // Try again with a path-based socket, since abstract sockets will fail if we have been
  // placed in a new netns as part of a sandbox.
  auto ServerSocketPath = GetServerSocketPath();

  addr.sun_family = AF_UNIX;
  SizeOfSocketString = std::min(ServerSocketPath.size(), sizeof(addr.sun_path) - 1);
  strncpy(addr.sun_path, ServerSocketPath.data(), SizeOfSocketString);
  SizeOfAddr = sizeof(addr.sun_family) + SizeOfSocketString;
  if (connect(SocketFD, reinterpret_cast<struct sockaddr*>(&addr), SizeOfAddr) == -1) {
    if (ConnectionOption == ConnectionOption::Default || (errno != ECONNREFUSED && errno != ENOENT)) {
      LogMan::Msg::EFmt("Couldn't connect to FEXServer socket {} {}", ServerSocketPath, errno);
    }
  } else {
    return SocketFD;
  }

  close(SocketFD);
  return -1;
}

bool SetupClient(std::string_view InterpreterPath) {
  ServerFD = FEXServerClient::ConnectToAndStartServer(InterpreterPath);
  if (ServerFD == -1) {
    return false;
  }

  // If we were started in a container then we want to use the rootfs that they provided.
  // In the pressure-vessel case this is a combination of our rootfs and the steam soldier runtime.
  if (FEXCore::Config::FindContainer() != "pressure-vessel") {
    fextl::string RootFSPath = FEXServerClient::RequestRootFSPath(ServerFD);

    //// If everything has passed then we can now update the rootfs path
    FEXCore::Config::Set(FEXCore::Config::CONFIG_ROOTFS, RootFSPath);
  }

  return true;
}

int StartServer(std::string_view InterpreterPath, int watch_fd) {
  int LocalServerFD {-1};
  // Couldn't connect to the server. Start one

  // Open some pipes for letting us know when the server is ready
  int fds[2] {};
  if (pipe2(fds, 0) != 0) {
    LogMan::Msg::EFmt("Couldn't open pipe");
    return -1;
  }

  // Extract directory from InterpreterPath
  fextl::string InterpreterDir {InterpreterPath};
  size_t LastSlash = InterpreterDir.rfind('/');
  if (LastSlash != fextl::string::npos) {
    InterpreterDir = InterpreterDir.substr(0, LastSlash);
  }

  fextl::string FEXServerPath = fextl::fmt::format("{}/FEXServer", InterpreterDir);
  // Check if a local FEXServer next to FEX exists
  // If it does then it takes priority over the installed one
  if (!FHU::Filesystem::Exists(FEXServerPath)) {
    FEXServerPath = "FEXServer";
  }

  // Set-up our SIGCHLD handler to ignore the signal.
  // This is early in the initialization stage so no handlers have been installed.
  //
  // We want to ignore the signal so that if FEXServer starts in daemon mode, it
  // doesn't leave a zombie process around waiting for something to get the result.
  struct sigaction action {};
  action.sa_handler = SIG_IGN;
  sigaction(SIGCHLD, &action, &action);

  pid_t pid = fork();
  if (pid == 0) {
    // Child
    close(fds[0]); // Close read end of pipe

    const char* argv[6];

    auto pipe_string = fextl::fmt::format("{}", fds[1]);
    auto watch_fd_string = fextl::fmt::format("{}", watch_fd);
    size_t arg_count {};
    argv[arg_count++] = FEXServerPath.c_str();
    argv[arg_count++] = "--wait_pipe";
    argv[arg_count++] = pipe_string.c_str();

    if (watch_fd != -1) {
      argv[arg_count++] = "--watch_fd";
      argv[arg_count++] = watch_fd_string.c_str();
    }

    argv[arg_count++] = nullptr;

    if (execvp(argv[0], (char* const*)argv) == -1) {
      // Let the parent know that we couldn't execute for some reason
      uint64_t error {1};
      write(fds[1], &error, sizeof(error));

      // Give a hopefully helpful error message for users
      LogMan::Msg::EFmt("Couldn't execute: {}", argv[0]);
      LogMan::Msg::EFmt("This means the squashFS rootfs won't be mounted.");
      LogMan::Msg::EFmt("Expect errors!");
      // Destroy this fork
      exit(1);
    }

    FEX_UNREACHABLE;
  } else {
    // Parent
    // Wait for the child to exit so we can check if it is mounted or not
    close(fds[1]); // Close write end of the pipe

    // Wait for a message from FEXServer
    pollfd PollFD;
    PollFD.fd = fds[0];
    PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL;

    // Wait for a result on the pipe that isn't EINTR
    while (poll(&PollFD, 1, -1) == -1 && errno == EINTR)
      ;

    // Check if child signaled an error
    uint64_t error = 0;
    ssize_t bytes_read = read(fds[0], &error, sizeof(error));
    close(fds[0]);
    if (bytes_read > 0 && error != 0) {
      return -1;
    }

    for (size_t i = 0; i < 5; ++i) {
      LocalServerFD = ConnectToServer(ConnectionOption::Default);

      if (LocalServerFD != -1) {
        break;
      }

      std::this_thread::sleep_for(std::chrono::seconds(1));
    }

    if (LocalServerFD == -1) {
      // Still couldn't connect to the socket.
      LogMan::Msg::EFmt("Couldn't connect to FEXServer socket after launching the process");
    }
  }

  // Restore the original SIGCHLD handler if it existed.
  sigaction(SIGCHLD, &action, nullptr);

  return LocalServerFD;
}

int ConnectToAndStartServer(std::string_view InterpreterPath) {
  int LocalServerFD = ConnectToServer(ConnectionOption::NoPrintConnectionError);
  if (LocalServerFD == -1) {
    LocalServerFD = StartServer(InterpreterPath);
  }
  return LocalServerFD;
}

/**
 * @name Packet request functions
 * @{ */
void RequestServerKill(int ServerSocket) {
  FEXServerRequestPacket Req {
    .Header {
      .Type = PacketType::TYPE_KILL,
    },
  };

  write(ServerSocket, &Req, sizeof(Req.BasicRequest));
}

int RequestLogFD(int ServerSocket) {
  return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_LOG_FD);
}

fextl::string RequestRootFSPath(int ServerSocket) {
  FEXServerRequestPacket Req {
    .Header {
      .Type = PacketType::TYPE_GET_ROOTFS_PATH,
    },
  };

  int Result = write(ServerSocket, &Req, sizeof(Req.BasicRequest));
  if (Result != -1) {
    // Wait for success response with data
    fextl::vector<char> Data(PATH_MAX + sizeof(FEXServerResultPacket));

    ssize_t DataResult = recv(ServerSocket, Data.data(), Data.size(), 0);
    if (DataResult >= sizeof(FEXServerResultPacket)) {
      FEXServerResultPacket* ResultPacket = reinterpret_cast<FEXServerResultPacket*>(Data.data());
      if (ResultPacket->Header.Type == PacketType::TYPE_GET_ROOTFS_PATH && ResultPacket->MountPath.Length > 0) {
        return fextl::string(ResultPacket->MountPath.Mount);
      }
    }
  }

  return {};
}

int RequestPIDFD(int ServerSocket) {
  return RequestPIDFDPacket(ServerSocket, PacketType::TYPE_GET_PID_FD);
}

void PopulateCodeCache(int ServerSocket, int ProgramFD, bool HasMultiblock) {
  fasio::error ec;
  fasio::tcp_socket Socket {ServerSocket};

  // Send request
  FEXServerRequestPacket Req {
    .Header {.Type = HasMultiblock ? PacketType::TYPE_POPULATE_CODE_CACHE : PacketType::TYPE_POPULATE_CODE_CACHE_NO_MULTIBLOCK}};

  fasio::mutable_buffer WriteBuffer {std::as_writable_bytes(std::span {&Req, 1})};
  WriteBuffer.FD = &ProgramFD;
  write(Socket, WriteBuffer, ec);
  if (ec != fasio::error::success) {
    return;
  }

  // Wait for success response to ensure FEXServer completed any pending cache generation.
  // The cache loading code handles missing caches gracefully, so we don't
  // actually care about the result here.
  FEXServerResultPacket Res {};
  fasio::mutable_buffer ResBuffer {std::as_writable_bytes(std::span {&Res, 1})};
  read(Socket, ResBuffer, ec);
}

int RequestCodeMapFD(int ServerSocket, int ProgramFD, bool HasMultiblock) {
  fasio::tcp_socket Socket {ServerSocket};
  FEXServerRequestPacket Req {
    .Header {
      .Type = HasMultiblock ? PacketType::TYPE_QUERY_CODE_MAP : PacketType::TYPE_QUERY_CODE_MAP_NO_MULTIBLOCK,
    },
  };

  // Send request
  fasio::error ec;
  {
    fasio::mutable_buffer WriteBuffer {std::as_writable_bytes(std::span {&Req, 1})};
    WriteBuffer.FD = &ProgramFD;
    write(Socket, WriteBuffer, ec);
    if (ec != fasio::error::success) {
      return -1;
    }
  }

  // Wait for success response and log FD
  FEXServerResultPacket Res {};
  fasio::mutable_buffer ResBuffer {std::as_writable_bytes(std::span {&Res, 1})};
  int NewFD = -1;
  ResBuffer.FD = &NewFD;
  read(Socket, ResBuffer, ec);
  if (ec != fasio::error::success || Res.Header.Type != PacketType::TYPE_SUCCESS) {
    return -1;
  }

  return NewFD;
}

/**  @} */

/**
 * @name FEX logging through FEXServer
 * @{ */

void MsgHandler(int FD, LogMan::DebugLevels Level, const char* Message) {
  size_t MsgLen = strlen(Message) + 1;

  Logging::PacketMsg Msg;
  Msg.Header = Logging::FillHeader(Logging::PacketTypes::TYPE_MSG);
  Msg.MessageLength = MsgLen;
  Msg.Level = Level;

  const iovec vec[2] = {
    {
      .iov_base = &Msg,
      .iov_len = sizeof(Msg),
    },
    {
      .iov_base = const_cast<char*>(Message),
      .iov_len = Msg.MessageLength,
    },
  };

  writev(FD, vec, 2);
}

void AssertHandler(int FD, const char* Message) {
  MsgHandler(FD, LogMan::DebugLevels::ASSERT, Message);
}
/**  @} */
} // namespace FEXServerClient


================================================
FILE: Source/Common/FEXServerClient.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/string.h>

#include <cstddef>
#include <cstdint>
#include <ctime>
#include <string_view>

namespace LogMan {
enum DebugLevels : uint32_t;
}

namespace FEXServerClient {
enum class PacketType {
  // Request and Result
  TYPE_KILL,
  TYPE_GET_LOG_FD,
  TYPE_GET_ROOTFS_PATH,
  TYPE_GET_PID_FD,
  TYPE_POPULATE_CODE_CACHE,
  TYPE_POPULATE_CODE_CACHE_NO_MULTIBLOCK,
  TYPE_QUERY_CODE_MAP,
  TYPE_QUERY_CODE_MAP_NO_MULTIBLOCK,

  // Result only
  TYPE_SUCCESS,
  TYPE_ERROR,
};

union FEXServerRequestPacket {
  struct Header {
    PacketType Type;
  } Header;

  struct {
    struct Header Header;
  } BasicRequest;
};

union FEXServerResultPacket {
  struct Header {
    PacketType Type;
  } Header;

  struct {
    struct Header Header;
    int32_t PID;
  } PID;

  struct {
    struct Header Header;
    size_t Length;
    char Mount[0];
  } MountPath;
};

constexpr size_t MAXIMUM_REQUEST_PACKET_SIZE = sizeof(FEXServerRequestPacket);

fextl::string GetServerLockFolder();
fextl::string GetServerLockFile();
fextl::string GetServerRootFSLockFile();
fextl::string GetTempFolder();
fextl::string GetServerMountFolder();
fextl::string GetServerSocketName();
fextl::string GetServerSocketPath();
int GetServerFD();

bool SetupClient(std::string_view InterpreterPath);

/**
 * @brief Start a FEXServer instance if possible
 *
 * @return socket FD for communicating with server
 */
int StartServer(std::string_view InterpreterPath, int watch_fd = -1);

/**
 * @brief Connect to and start a FEXServer instance if required
 *
 * @return socket FD for communicating with server
 */
int ConnectToAndStartServer(std::string_view InterpreterPath);

enum class ConnectionOption {
  Default,
  NoPrintConnectionError,
};
/**
 * @brief Connect to a FEXServer instance if it exists
 *
 * @return socket FD for communicating with server
 */
int ConnectToServer(ConnectionOption ConnectionOption = ConnectionOption::Default);

/**
 * @name Packet request functions
 * @{ */
/**
 * @brief Request the server to be killed
 *
 * @param ServerSocket - Socket to the server
 */
void RequestServerKill(int ServerSocket);

/**
 * @brief Request a FEXServer to give us a log FD to write in to
 *
 * @param ServerSocket - Socket to the server
 *
 * @return FD for logging in to
 */
int RequestLogFD(int ServerSocket);

fextl::string RequestRootFSPath(int ServerSocket);

/**
 * @brief Request a FEXServer to give us a pidfd of the process
 *
 * @param ServerSocket - Socket to the server
 *
 * @return FD for pidfd
 */
int RequestPIDFD(int ServerSocket);

/**
 * @brief Request FEXServer to populate the disk cache for the given executable
 *        and any libraries referenced in its code map
 *
 * @param ServerSocket - Socket to the server
 * @param ProgramFD - FD for program binary
 * @param HasMultiblock - true if multiblock is enabled (used for selecting code maps)
 */
void PopulateCodeCache(int ServerSocket, int ProgramFD, bool HasMultiblock);

/**
 * @brief Request FEXServer to create a new code map for disk cache population
 *
 * @param ServerSocket - Socket to the server
 * @param ProgramFD - FD for program binary
 *
 * @return FD to write code map to
 */
int RequestCodeMapFD(int ServerSocket, int ProgramFD, bool HasMultiblock);

/**  @} */

/**
 * @name FEX logging through FEXServer
 * @{ */
namespace Logging {
  enum class PacketTypes : uint32_t {
    TYPE_MSG,
  };

  struct PacketHeader {
    struct timespec Timestamp {};
    PacketTypes PacketType {};
    int32_t PID {};
    int32_t TID {};
    uint32_t Pad {};
    char Data[0];
  };

  struct PacketMsg {
    PacketHeader Header {};
    size_t MessageLength;
    uint32_t Level {};
    uint32_t Pad {};
  };

  static_assert(sizeof(PacketHeader) == 32, "Wrong size");

  PacketHeader FillHeader(PacketTypes Type);
} // namespace Logging

void MsgHandler(int FD, LogMan::DebugLevels Level, const char* Message);
void AssertHandler(int FD, const char* Message);
/**  @} */
} // namespace FEXServerClient


================================================
FILE: Source/Common/FileFormatCheck.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/string.h>

#include <fcntl.h>
#include <stdint.h>
#include <sys/stat.h>
#include <unistd.h>

namespace FEX::FormatCheck {
bool IsSquashFS(const fextl::string& Filename) {
  // If it is a regular file then we need to check if it is a valid archive
  struct SquashFSHeader {
    uint32_t magic;
    uint32_t inode_count;
    uint32_t mtime;
    uint32_t block_size;
    uint32_t fragment_entry_count;
    uint16_t compression_id;
    uint16_t block_log;
    uint16_t flags;
    uint16_t id_count;
    uint16_t version_major;
    uint16_t version_minor;
    uint64_t More[8]; // More things that don't matter to us
  };

  SquashFSHeader Header {};
  int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC);
  if (fd == -1) {
    return false;
  }

  if (pread(fd, reinterpret_cast<char*>(&Header), sizeof(SquashFSHeader), 0) != sizeof(SquashFSHeader)) {
    close(fd);
    return false;
  }

  close(fd);

  // Make sure the cookie matches
  if (Header.magic == 0x73717368) {
    // Sanity check the version
    uint32_t version = (uint32_t)Header.version_major << 16 | Header.version_minor;
    if (version >= 0x00040000) {
      // Everything is sane, we can add it
      return true;
    }
  }
  return false;
}

bool IsEroFS(const fextl::string& Filename) {
  // v1 of EroFS has a 128byte header
  // This lives within a fixed offset inside of the first superblock of the file
  // Each superblock is 4096bytes
  //
  // We only care about the uint32_t at the start of this offset which is the cookie
  struct EroFSHeader {
    uint32_t Magic;
    // Additional data after this if necessary in the future.
  };

  constexpr size_t HEADER_OFFSET = 1024;
  constexpr uint32_t COOKIE_MAGIC_V1 = 0xE0F5E1E2;

  EroFSHeader Header {};
  int fd = open(Filename.c_str(), O_RDONLY | O_CLOEXEC);
  if (fd == -1) {
    return false;
  }

  if (pread(fd, reinterpret_cast<char*>(&Header), sizeof(EroFSHeader), HEADER_OFFSET) != sizeof(EroFSHeader)) {
    close(fd);
    return false;
  }

  close(fd);

  return Header.Magic == COOKIE_MAGIC_V1;
}
} // namespace FEX::FormatCheck


================================================
FILE: Source/Common/FileFormatCheck.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/string.h>

namespace FEX::FormatCheck {
bool IsSquashFS(const fextl::string& Filename);
bool IsEroFS(const fextl::string& Filename);
} // namespace FEX::FormatCheck


================================================
FILE: Source/Common/FileMappingBaseAddress.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <span>

#include <elf.h>

namespace FEXCore {

/**
 * Infers the base virtual address from a file mapping (as described by parameters to a single
 * call to mmap()).
 *
 * Usually the base address can uniquely be inferred, but in edge cases multiple possible
 * candidates are returned.
 *
 * The file offset of any given mapping need not match its virtual address offset from the base
 * mapping (file offset = 0). Instead, this function searches the corresponding ELF program headers
 * for an entry that generated the given file mapping.
 */
inline fextl::vector<uint64_t>
InferMappingBaseAddress(std::span<const Elf64_Phdr> ProgramHeaders, uint64_t Addr, uint64_t Size, uint64_t FileOffset, int AccessFlags) {
  fextl::vector<uint64_t> Ret;
  for (auto& phdr : ProgramHeaders) {
    if (phdr.p_type != PT_LOAD) {
      // Skip headers that don't trigger memory mappings
      continue;
    }

    if ((phdr.p_flags & (PF_X | PF_W | PF_R)) != (AccessFlags & (PF_X | PF_W | PF_R))) {
      continue;
    }

    // The mapped file offset must be included at the start of the section header
    auto SegmentStartOffset = phdr.p_offset - (phdr.p_vaddr & 0xfff);
    if (FileOffset >= SegmentStartOffset && FileOffset < SegmentStartOffset + phdr.p_filesz &&
        (FileOffset & Utils::FEX_PAGE_MASK) == (phdr.p_offset & Utils::FEX_PAGE_MASK)) {
      // Compute VA offset relative to the base mapping
      Ret.push_back(Addr - (phdr.p_vaddr - (phdr.p_offset & 0xfff)) + (ProgramHeaders[0].p_vaddr - (ProgramHeaders[0].p_offset & 0xfff)) -
                    (FileOffset - SegmentStartOffset));
    }
  }

  return Ret;
}
} // namespace FEXCore


================================================
FILE: Source/Common/HostFeatures.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/CPUInfo.h"
#include "Common/HostFeatures.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/StringUtils.h>

#include <range/v3/view/split.hpp>
#include <range/v3/view/transform.hpp>

#ifdef ARCHITECTURE_x86_64
#include "Common/X86Features.h"
#endif

namespace FEX {

void FillMIDRInformationViaLinux(FEXCore::HostFeatures* Features) {
  auto Cores = FEX::CPUInfo::CalculateNumberOfCPUs();
  Features->CPUMIDRs.resize(Cores);
#ifdef ARCHITECTURE_arm64
  for (size_t i = 0; i < Cores; ++i) {
    std::error_code ec {};
    fextl::string MIDRPath = fextl::fmt::format("/sys/devices/system/cpu/cpu{}/regs/identification/midr_el1", i);
    std::array<char, 18> Data;
    // Needs to be a fixed size since depending on kernel it will try to read a full page of data and fail
    // Only read 18 bytes for a 64bit value prefixed with 0x
    if (FEXCore::FileLoading::LoadFileToBuffer(MIDRPath, Data) == sizeof(Data)) {
      uint64_t MIDR {};
      auto Results = std::from_chars(Data.data() + 2, Data.data() + sizeof(Data), MIDR, 16);
      if (Results.ec == std::errc()) {
        // Truncate to 32-bits, top 32-bits are all reserved in MIDR
        Features->CPUMIDRs[i] = static_cast<uint32_t>(MIDR);
      }
    }
  }
#endif
}

#if defined(ARCHITECTURE_arm64) && !defined(VIXL_SIMULATOR)
__attribute__((naked)) static uint64_t ReadSVEVectorLengthInBits() {
  ///< Can't use rdvl instruction directly because compilers will complain that sve/sme is required.
  __asm(R"(
  .word 0x04bf5100 // rdvl x0, #8
  ret;
  )");
}
#else
[[maybe_unused]]
static int ReadSVEVectorLengthInBits() {
  // Return unsupported
  return 0;
}
#endif

#ifdef ARCHITECTURE_arm64
#define GetSysReg(name, reg)                         \
  static uint64_t Get_##name() {                     \
    uint64_t Result {};                              \
    __asm("mrs %[Res], " #reg : [Res] "=r"(Result)); \
    return Result;                                   \
  }

GetSysReg(ISAR0_EL1, ID_AA64ISAR0_EL1);
GetSysReg(PFR0_EL1, ID_AA64PFR0_EL1);
GetSysReg(PFR1_EL1, ID_AA64PFR1_EL1);
GetSysReg(MIDR_EL1, MIDR_EL1);
GetSysReg(ISAR1_EL1, ID_AA64ISAR1_EL1);
GetSysReg(MMFR0_EL1, ID_AA64MMFR0_EL1);
GetSysReg(MMFR2_EL1, ID_AA64MMFR2_EL1);
GetSysReg(ZFR0_EL1, s3_0_c0_c4_4); // Can't request by name
GetSysReg(MMFR1_EL1, ID_AA64MMFR1_EL1);
GetSysReg(ISAR2_EL1, ID_AA64ISAR2_EL1);
GetSysReg(DCZID_EL0, DCZID_EL0);

class CPUFeaturesFromID final : public FEX::CPUFeatures {
public:
  CPUFeaturesFromID() {
    ISAR0.SetReg(Get_ISAR0_EL1());
    PFR0.SetReg(Get_PFR0_EL1());
    PFR1.SetReg(Get_PFR1_EL1());
    MIDR.SetReg(Get_MIDR_EL1());
    ISAR1.SetReg(Get_ISAR1_EL1());
    MMFR0.SetReg(Get_MMFR0_EL1());
    MMFR2.SetReg(Get_MMFR2_EL1());
    MMFR1.SetReg(Get_MMFR1_EL1());
    ISAR2.SetReg(Get_ISAR2_EL1());
    DCZID.SetReg(Get_DCZID_EL0());

    if (PFR0.SupportsSVE()) {
      // Can only query if SVE is supported.
      ZFR0.SetReg(Get_ZFR0_EL1());
    }
    FillFeatureFlags();

    if (Supports(CPUFeatures::Feature::SVE2)) {
      SVEVL.SetReg(ReadSVEVectorLengthInBits());
    }
  }
};

FEX::CPUFeatures GetCPUFeaturesFromIDRegisters() {
  return CPUFeaturesFromID {};
}
#endif

class CPUFeaturesFromConfig final : public FEX::CPUFeatures {
public:
  CPUFeaturesFromConfig(std::string_view Config) {
    auto to_string_view = [](auto rng) {
      return std::string_view(&*rng.begin(), ranges::distance(rng));
    };

    for (auto Option : ranges::views::split(Config, ',') | ranges::views::transform(to_string_view)) {
      auto OptionData = ranges::views::split(Option, '=') | ranges::views::transform(to_string_view);
      auto OptionDataBegin = ranges::begin(OptionData);
      auto OptionDataEnd = ranges::end(OptionData);

      if (OptionDataBegin == OptionDataEnd) {
        continue;
      }

      auto Key = *OptionDataBegin;
      if (Key.empty()) {
        continue;
      }

      ++OptionDataBegin;
      if (OptionDataBegin == OptionDataEnd) {
        continue;
      }
      auto Value = *OptionDataBegin;
      uint64_t ValueHex {};
      char* str_end {};
      ValueHex = std::strtoull(Value.data(), &str_end, 16);

      if (str_end == Value.data()) {
        LogMan::Msg::EFmt("Couldn't parse '{}={}'\n", Key, Value);
        continue;
      }

      if (Key == "isar0") {
        ISAR0.SetReg(ValueHex);
      } else if (Key == "isar1") {
        ISAR1.SetReg(ValueHex);
      } else if (Key == "isar2") {
        ISAR2.SetReg(ValueHex);
      } else if (Key == "pfr0") {
        PFR0.SetReg(ValueHex);
      } else if (Key == "pfr1") {
        PFR1.SetReg(ValueHex);
      } else if (Key == "midr") {
        MIDR.SetReg(ValueHex);
      } else if (Key == "mmfr0") {
        MMFR0.SetReg(ValueHex);
      } else if (Key == "mmfr1") {
        MMFR1.SetReg(ValueHex);
      } else if (Key == "mmfr2") {
        MMFR2.SetReg(ValueHex);
      } else if (Key == "zfr0") {
        ZFR0.SetReg(ValueHex);
      } else if (Key == "dczid") {
        DCZID.SetReg(ValueHex);
      } else if (Key == "svevl") {
        SVEVL.SetReg(ValueHex);
      } else {
        LogMan::Msg::EFmt("Unknown Key: {}", Key);
      }
    }

    FillFeatureFlags();
  }
};

FEX::CPUFeatures GetCPUFeaturesFromConfig(std::string_view Config) {
  return CPUFeaturesFromConfig {Config};
}

class CPUFeaturesAll final : public FEX::CPUFeatures {
public:
  CPUFeaturesAll() {
    // Special case, just set all feature flags
    for (uint32_t i = 0; i < FEXCore::ToUnderlying(FEX::CPUFeatures::Feature::MAX); ++i) {
      SetFeature(FEX::CPUFeatures::Feature {i});
    }

    // Report unsupported for DCZVA
    DCZID.SetReg(0b1'0000);
  }
};

void FEX::CPUFeatures::FillFeatureFlags() {
  // ISAR0
  if (ISAR0.SupportsAES()) {
    SetFeature(Feature::AES);
  }
  if (ISAR0.SupportsPMULL()) {
    SetFeature(Feature::PMULL);
  }
  if (ISAR0.SupportsSHA1()) {
    SetFeature(Feature::SHA1);
  }
  if (ISAR0.SupportsSHA2()) {
    SetFeature(Feature::SHA2);
  }
  if (ISAR0.SupportsSHA512()) {
    SetFeature(Feature::SHA512);
  }
  if (ISAR0.SupportsCRC32()) {
    SetFeature(Feature::CRC32);
  }
  if (ISAR0.SupportsLSE()) {
    SetFeature(Feature::LSE);
  }
  if (ISAR0.SupportsLSE128()) {
    SetFeature(Feature::LSE128);
  }
  if (ISAR0.SupportsTME()) {
    SetFeature(Feature::TME);
  }
  if (ISAR0.SupportsRDM()) {
    SetFeature(Feature::RDM);
  }
  if (ISAR0.SupportsSHA3()) {
    SetFeature(Feature::SHA3);
  }
  if (ISAR0.SupportsSM3()) {
    SetFeature(Feature::SM3);
  }
  if (ISAR0.SupportsSM4()) {
    SetFeature(Feature::SM4);
  }
  if (ISAR0.SupportsDotProd()) {
    SetFeature(Feature::DotProd);
  }
  if (ISAR0.SupportsFlagM()) {
    SetFeature(Feature::FlagM);
  }
  if (ISAR0.SupportsFlagM2()) {
    SetFeature(Feature::FlagM2);
  }
  if (ISAR0.SupportsRNDR()) {
    SetFeature(Feature::RNDR);
  }

  // PFR0
  if (PFR0.SupportsFP()) {
    SetFeature(Feature::FP);
  }
  if (PFR0.SupportsHP()) {
    SetFeature(Feature::FP16);
  }
  if (PFR0.SupportsAdvSIMD()) {
    SetFeature(Feature::ASIMD);
  }
  if (PFR0.SupportsASIMDHP()) {
    SetFeature(Feature::ASIMD16);
  }
  if (PFR0.SupportsRAS()) {
    SetFeature(Feature::RAS);
  }
  if (PFR0.SupportsSVE()) {
    SetFeature(Feature::SVE);
  }
  if (PFR0.SupportsDIT()) {
    SetFeature(Feature::DIT);
  }
  if (PFR0.SupportsCSV2()) {
    SetFeature(Feature::CSV2);
  }
  if (PFR0.SupportsCSV3()) {
    SetFeature(Feature::CSV3);
  }

  // PFR1
  if (PFR1.SupportsBTI()) {
    SetFeature(Feature::BTI);
  }
  if (PFR1.SupportsSSBS()) {
    SetFeature(Feature::SSBS);
  }
  if (PFR1.SupportsSSBS()) {
    SetFeature(Feature::SSBS2);
  }
  if (PFR1.SupportsMTE()) {
    SetFeature(Feature::MTE);
  }
  if (PFR1.SupportsMTE2()) {
    SetFeature(Feature::MTE2);
  }
  if (PFR1.SupportsMTE3()) {
    SetFeature(Feature::MTE3);
  }
  if (PFR1.SupportsSME()) {
    SetFeature(Feature::SME);
  }
  if (PFR1.SupportsSME2()) {
    SetFeature(Feature::SME2);
  }

  // ISAR1
  if (ISAR1.SupportsDPB()) {
    SetFeature(Feature::DPB);
  }
  if (ISAR1.SupportsDPB2()) {
    SetFeature(Feature::DPB2);
  }
  if (ISAR1.SupportsJSCVT()) {
    SetFeature(Feature::JSCVT);
  }
  if (ISAR1.SupportsFCMA()) {
    SetFeature(Feature::FCMA);
  }
  if (ISAR1.SupportsLRCPC()) {
    SetFeature(Feature::LRCPC);
  }
  if (ISAR1.SupportsLRCPC2()) {
    SetFeature(Feature::LRCPC2);
  }
  if (ISAR1.SupportsLRCPC3()) {
    SetFeature(Feature::LRCPC3);
  }
  if (ISAR1.SupportsFRINTTS()) {
    SetFeature(Feature::FRINTTS);
  }
  if (ISAR1.SupportsSB()) {
    SetFeature(Feature::SB);
  }
  if (ISAR1.SupportsSPECRES()) {
    SetFeature(Feature::SPECRES);
  }
  if (ISAR1.SupportsSPECRES2()) {
    SetFeature(Feature::SPECRES2);
  }
  if (ISAR1.SupportsBF16()) {
    SetFeature(Feature::BF16);
  }
  if (ISAR1.SupportsSME_F64F64()) {
    SetFeature(Feature::SME_F64F64);
  }
  if (ISAR1.SupportsI8MM()) {
    SetFeature(Feature::I8MM);
  }
  if (ISAR1.SupportsXS()) {
    SetFeature(Feature::XS);
  }
  if (ISAR1.SupportsLS64()) {
    SetFeature(Feature::LS64);
  }
  if (ISAR1.SupportsLS64_V()) {
    SetFeature(Feature::LS64_V);
  }
  if (ISAR1.SupportsLS64_ACCDATA()) {
    SetFeature(Feature::LS64_ACCDATA);
  }

  // MMFR0
  if (MMFR0.SupportsECV()) {
    SetFeature(Feature::ECV);
  }

  // MMFR2
  if (MMFR2.SupportsLSE2()) {
    SetFeature(Feature::LSE2);
  }

  // ZFR0
  if (Supports(Feature::SVE)) {
    if (ZFR0.SupportsSVE2()) {
      SetFeature(Feature::SVE2);
    }
    if (ZFR0.SupportsSVE2_1()) {
      SetFeature(Feature::SVE2_1);
    }
    if (ZFR0.SupportsSVE_AES()) {
      SetFeature(Feature::SVE_AES);
    }
    if (ZFR0.SupportsSVE_PMULL128()) {
      SetFeature(Feature::SVE_PMULL128);
    }
    if (ZFR0.SupportsSVE_BitPerm()) {
      SetFeature(Feature::SVE_BitPerm);
    }
    if (ZFR0.SupportsSVE_BF16()) {
      SetFeature(Feature::SVE_BF16);
    }
    if (ZFR0.SupportsSVE_B16B16()) {
      SetFeature(Feature::SVE_B16B16);
    }
    if (ZFR0.SupportsSVE_SHA3()) {
      SetFeature(Feature::SVE_SHA3);
    }
    if (ZFR0.SupportsSVE_SM4()) {
      SetFeature(Feature::SVE_SM4);
    }
    if (ZFR0.SupportsSVE_I8MM()) {
      SetFeature(Feature::SVE_I8MM);
    }
    if (ZFR0.SupportsSVE_F32MM()) {
      SetFeature(Feature::SVE_F32MM);
    }
    if (ZFR0.SupportsSVE_F64MM()) {
      SetFeature(Feature::SVE_F64MM);
    }
  }

  // MMFR1
  if (MMFR1.SupportsAFP()) {
    SetFeature(Feature::AFP);
  }

  // ISAR2
  if (ISAR2.SupportsWFxt()) {
    SetFeature(Feature::WFxt);
  }
  if (ISAR2.SupportsRPRES()) {
    SetFeature(Feature::RPRES);
  }
  if (ISAR2.SupportsPACQARMA3()) {
    SetFeature(Feature::PACQARMA3);
  }
  if (ISAR2.SupportsMOPS()) {
    SetFeature(Feature::MOPS);
  }
  if (ISAR2.SupportsHBC()) {
    SetFeature(Feature::HBC);
  }
  if (ISAR2.SupportsCLRBHB()) {
    SetFeature(Feature::CLRBHB);
  }
  if (ISAR2.SupportsSYSREG128()) {
    SetFeature(Feature::SYSREG128);
  }
  if (ISAR2.SupportsSYSINSTR128()) {
    SetFeature(Feature::SYSINSTR128);
  }
  if (ISAR2.SupportsPRFMSLC()) {
    SetFeature(Feature::PRFMSLC);
  }
  if (ISAR2.SupportsRPRFM()) {
    SetFeature(Feature::RPRFM);
  }
  if (ISAR2.SupportsCSSC()) {
    SetFeature(Feature::CSSC);
  }
}

#ifdef ARCHITECTURE_arm64
static uint32_t GetFPCR() {
  uint64_t Result {};
  __asm("mrs %[Res], FPCR" : [Res] "=r"(Result));
  return Result;
}

static void SetFPCR(uint64_t Value) {
  __asm("msr FPCR, %[Value]" ::[Value] "r"(Value));
}

#endif

static void OverrideFeatures(FEXCore::HostFeatures* Features, uint64_t ForceSVEWidth) {
  // Override features if the user has specifically called for it.
  FEX_CONFIG_OPT(HostFeatures, HOSTFEATURES);
  if (!HostFeatures()) {
    // Early exit if no features are overriden.
    return;
  }

#define ENABLE_DISABLE_OPTION(FeatureName, name, enum_name)                                                                        \
  do {                                                                                                                             \
    const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0;                          \
    const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0;                            \
    LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive"); \
    const bool AlreadyEnabled = Features->FeatureName;                                                                             \
    const bool Result = (AlreadyEnabled | Enable##name) & !Disable##name;                                                          \
    Features->FeatureName = Result;                                                                                                \
  } while (0)

#define GET_SINGLE_OPTION(name, enum_name)                                                              \
  const bool Disable##name = (HostFeatures() & FEXCore::Config::HostFeatures::DISABLE##enum_name) != 0; \
  const bool Enable##name = (HostFeatures() & FEXCore::Config::HostFeatures::ENABLE##enum_name) != 0;   \
  LogMan::Throw::AFmt(!(Disable##name && Enable##name), "Disabling and Enabling CPU feature (" #name ") is mutually exclusive");

  ENABLE_DISABLE_OPTION(SupportsAVX, AVX, AVX);
  ENABLE_DISABLE_OPTION(SupportsSVE128, SVE, SVE);
  ENABLE_DISABLE_OPTION(SupportsAFP, AFP, AFP);
  ENABLE_DISABLE_OPTION(SupportsRCPC, LRCPC, LRCPC);
  ENABLE_DISABLE_OPTION(SupportsTSOImm9, LRCPC2, LRCPC2);
  ENABLE_DISABLE_OPTION(SupportsCSSC, CSSC, CSSC);
  ENABLE_DISABLE_OPTION(SupportsPMULL_128Bit, PMULL128, PMULL128);
  ENABLE_DISABLE_OPTION(SupportsRAND, RNG, RNG);
  ENABLE_DISABLE_OPTION(SupportsCLZERO, CLZERO, CLZERO);
  ENABLE_DISABLE_OPTION(SupportsAtomics, Atomics, ATOMICS);
  ENABLE_DISABLE_OPTION(SupportsFCMA, FCMA, FCMA);
  ENABLE_DISABLE_OPTION(SupportsFlagM, FlagM, FLAGM);
  ENABLE_DISABLE_OPTION(SupportsFlagM2, FlagM2, FLAGM2);
  ENABLE_DISABLE_OPTION(SupportsFRINTTS, FRINTTS, FRINTTS);
  ENABLE_DISABLE_OPTION(SupportsRPRES, RPRES, RPRES);
  ENABLE_DISABLE_OPTION(SupportsSVEBitPerm, SVEBITPERM, SVEBITPERM);
  ENABLE_DISABLE_OPTION(SupportsPreserveAllABI, PRESERVEALLABI, PRESERVEALLABI);
  ENABLE_DISABLE_OPTION(SupportsWFXT, WFXT, WFXT);
  ENABLE_DISABLE_OPTION(Supports3DNow, 3DNOW, 3DNOW);
  ENABLE_DISABLE_OPTION(SupportsSSE4a, SSE4A, SSE4A);
  ENABLE_DISABLE_OPTION(SupportsMOPS, MOPS, MOPS);
  GET_SINGLE_OPTION(Crypto, CRYPTO);

#undef ENABLE_DISABLE_OPTION
#undef GET_SINGLE_OPTION

  if (EnableCrypto) {
    Features->SupportsAES = true;
    Features->SupportsCRC = true;
    Features->SupportsSHA = true;
    Features->SupportsPMULL_128Bit = true;
    Features->SupportsAES256 = true;
  } else if (DisableCrypto) {
    Features->SupportsAES = false;
    Features->SupportsCRC = false;
    Features->SupportsSHA = false;
    Features->SupportsPMULL_128Bit = false;
    Features->SupportsAES256 = false;
  }

  ///< Only force enable SVE256 if SVE is already enabled and ForceSVEWidth is set to >= 256.
  Features->SupportsSVE256 = ForceSVEWidth && ForceSVEWidth >= 256;
}

static void HandleErrata(FEXCore::HostFeatures* HostFeatures, uint64_t MIDR) {
  constexpr uint32_t Implementer_ARM = 0x41;
  constexpr uint32_t PartNum_V2 = 0xd4f;
  constexpr uint32_t PartNum_V3 = 0xd84;
  constexpr uint32_t PartNum_V3AE = 0xd83;
  constexpr uint32_t PartNum_X3 = 0xd4e;
  constexpr uint32_t PartNum_X4 = 0xd82;
  constexpr uint32_t PartNum_X925 = 0xd85;
  constexpr uint32_t PartNum_C1Ultra = 0xd8c;
  constexpr uint32_t PartNum_C1Premium = 0xd90;

  constexpr uint32_t Implementer_QCOM = 0x51;
  constexpr uint32_t PartNum_Oryon1 = 0x001;

  auto GetMIDRImplementer = [](uint32_t MIDR) -> uint32_t {
    return (MIDR >> 24) & 0xFF;
  };

  auto GetMIDRPartNum = [](uint32_t MIDR) -> uint32_t {
    return (MIDR >> 4) & 0xFFF;
  };

  const uint32_t MIDR_Implementer = GetMIDRImplementer(MIDR);
  const uint32_t MIDR_PartNum = GetMIDRPartNum(MIDR);

#ifdef ARCHITECTURE_arm64
  if (MIDR_Implementer == Implementer_QCOM && MIDR_PartNum == PartNum_Oryon1) {
    // Work around an errata in Qualcomm's Oryon.
    // While this CPU implements the RAND extension:
    // - The RNDR register works.
    // - The RNDRRS register will never read a random number. (Always return failure)
    // This is contrary to x86 RNG behaviour where it allows spurious failure with RDSEED, but guarantees eventual success.
    // This manifested itself on Linux when an x86 processor failed to guarantee forward progress and boot of services would infinite
    // loop. Just disable this extension if this CPU is detected.
    HostFeatures->SupportsRAND = false;
  }
#endif

  // The LDAPUR instruction suffers from significant performance issues on many ARM implementations. This is
  // listed in the official Cortex errata list as follows:
  //
  // 3877900
  // LDAPUR, LDAPURB, LDAPURH instructions have stricter memory ordering than required
  //
  // LDAPUR instructions execute with full Load-Acquire ordering instead of the relaxed ordering described
  // in the LDAPUR pseudocode. This might cause significant performance degradation in workloads that do
  // not require this stricter memory ordering. Note that this erratum only affects the unscaled versions of
  // LDAPUR (LDAPUR, LDAPURB, LDAPURH), and not LDAPR (LDAPR, LDAPRB, LDAPRH).
  //
  // The list of cores to disable its use on was taken from the following LLVM PR that accomplishes the same
  // thing: https://github.com/llvm/llvm-project/pull/124274
  for (uint32_t CoreIndex = 0; CoreIndex < HostFeatures->CPUMIDRs.size(); CoreIndex++) {
    const uint32_t CoreMIDR = HostFeatures->CPUMIDRs[CoreIndex];
    const uint32_t Core_MIDR_Implementer = GetMIDRImplementer(CoreMIDR);
    const uint32_t Core_MIDR_PartNum = GetMIDRPartNum(CoreMIDR);

    bool IgnoreLRCPC2 = (Core_MIDR_Implementer == Implementer_ARM) &&
                        ((Core_MIDR_PartNum == PartNum_V2) || (Core_MIDR_PartNum == PartNum_V3) || (Core_MIDR_PartNum == PartNum_X3) ||
                         (Core_MIDR_PartNum == PartNum_X4) || (Core_MIDR_PartNum == PartNum_X925) || (Core_MIDR_PartNum == PartNum_V3AE) ||
                         (Core_MIDR_PartNum == PartNum_C1Ultra) || (Core_MIDR_PartNum == PartNum_C1Premium));

    if (IgnoreLRCPC2) {
      HostFeatures->SupportsTSOImm9 = false;
      break;
    }
  }
}

void FetchHostFeatures(FEX::CPUFeatures& Features, FEXCore::HostFeatures& HostFeatures, bool SupportsCacheMaintenanceOps, uint64_t CTR,
                       uint64_t MIDR) {
  FEX_CONFIG_OPT(ForceSVEWidth, FORCESVEWIDTH);
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);

  HostFeatures.SupportsCacheMaintenanceOps = SupportsCacheMaintenanceOps;

  HostFeatures.SupportsAES = Features.Supports(CPUFeatures::Feature::AES);
  HostFeatures.SupportsCRC = Features.Supports(CPUFeatures::Feature::CRC32);
  HostFeatures.SupportsSHA = Features.Supports(CPUFeatures::Feature::SHA1) && Features.Supports(CPUFeatures::Feature::SHA2);
  HostFeatures.SupportsAtomics = Features.Supports(CPUFeatures::Feature::LSE);
  HostFeatures.SupportsRAND = Features.Supports(CPUFeatures::Feature::RNDR);

  // Only supported when FEAT_AFP is supported
  HostFeatures.SupportsAFP = Features.Supports(CPUFeatures::Feature::AFP);
  HostFeatures.SupportsRCPC = Features.Supports(CPUFeatures::Feature::LRCPC);
  HostFeatures.SupportsTSOImm9 = Features.Supports(CPUFeatures::Feature::LRCPC2);
  HostFeatures.SupportsPMULL_128Bit = Features.Supports(CPUFeatures::Feature::PMULL);
  HostFeatures.SupportsCSSC = Features.Supports(CPUFeatures::Feature::CSSC);
  HostFeatures.SupportsFCMA = Features.Supports(CPUFeatures::Feature::FCMA);
  HostFeatures.SupportsFlagM = Features.Supports(CPUFeatures::Feature::FlagM);
  HostFeatures.SupportsFlagM2 = Features.Supports(CPUFeatures::Feature::FlagM2);
  HostFeatures.SupportsFRINTTS = Features.Supports(CPUFeatures::Feature::FRINTTS);
  HostFeatures.SupportsRPRES = Features.Supports(CPUFeatures::Feature::RPRES);
  HostFeatures.SupportsSVEBitPerm = Features.Supports(CPUFeatures::Feature::SVE_BitPerm);
  HostFeatures.SupportsECV = Features.Supports(CPUFeatures::Feature::ECV);
  HostFeatures.SupportsWFXT = Features.Supports(CPUFeatures::Feature::WFxt);

#ifdef VIXL_SIMULATOR
  // Hardcode enable SVE with 256-bit wide registers.
  HostFeatures.SupportsSVE128 = ForceSVEWidth() ? ForceSVEWidth() >= 128 : true;
  HostFeatures.SupportsSVE256 = ForceSVEWidth() ? ForceSVEWidth() >= 256 : true;
  HostFeatures.SupportsMOPS = true;

  // Simulator has a hardcoded ZVA size of 64-bytes.
  HostFeatures.SupportsCLZERO = true;
  HostFeatures.SupportsAES = true;
  HostFeatures.SupportsCRC = true;
  HostFeatures.SupportsAVX = true;
  HostFeatures.SupportsSHA = true;
  HostFeatures.SupportsPMULL_128Bit = true;
  HostFeatures.SupportsAES256 = true;

  // Simulator doesn't support these
  HostFeatures.SupportsRPRES = false;
  HostFeatures.SupportsAFP = false;
#else
  HostFeatures.SupportsSVE128 = Features.Supports(CPUFeatures::Feature::SVE2);
  HostFeatures.SupportsSVE256 = Features.Supports(CPUFeatures::Feature::SVE2) && Features.GetSVEVectorLengthInBits() >= 256;
  HostFeatures.SupportsMOPS = Features.Supports(CPUFeatures::Feature::MOPS);

  // Check if we can support cacheline clears
  if (Features.GetDCZID().SupportsDCZVA()) {
    // If the DC ZVA size matches the emulated cache line size
    // This means we can use the instruction
    constexpr static uint64_t CACHELINE_SIZE = 64;
    HostFeatures.SupportsCLZERO = Features.GetDCZID().BlockSizeInBytes() == CACHELINE_SIZE;
  }
#endif

  HostFeatures.SupportsAVX = true;
  HostFeatures.SupportsAES256 = HostFeatures.SupportsAVX && HostFeatures.SupportsAES;
  HostFeatures.SupportsPreserveAllABI = FEX_HAS_PRESERVE_ALL_ATTR;

  if (CTR) {
    HostFeatures.DCacheLineSize = 4 << ((CTR >> 16) & 0xF);
    HostFeatures.ICacheLineSize = 4 << (CTR & 0xF);
  } else {
    HostFeatures.DCacheLineSize = 64;
    HostFeatures.ICacheLineSize = 64;
  }

  if (!HostFeatures.SupportsAtomics) {
    WARN_ONCE_FMT("Host CPU doesn't support atomics. Expect bad performance");
  }

#ifdef _WIN32
  // Disable 3DNow! by default to better match the set of extensions exposed on modern CPUs.
  // This works around a bug that manifests in some games using native d3dx9 DLLs (most easily reproduced in WoW64 builds).
  // For example, Fallout: New Vegas and some old EA games will run with a blackscreen.
  HostFeatures.Supports3DNow = false;
#else
  HostFeatures.Supports3DNow = true;
#endif

#ifdef ARCHITECTURE_arm64
  // Test if this CPU supports float exception trapping by attempting to enable
  // On unsupported these bits are architecturally defined as RAZ/WI
  constexpr uint32_t ExceptionEnableTraps = (1U << 8) |  // Invalid Operation float exception trap enable
                                            (1U << 9) |  // Divide by zero float exception trap enable
                                            (1U << 10) | // Overflow float exception trap enable
                                            (1U << 11) | // Underflow float exception trap enable
                                            (1U << 12) | // Inexact float exception trap enable
                                            (1U << 15);  // Input Denormal float exception trap enable

  uint32_t OriginalFPCR = GetFPCR();
  uint32_t FPCR = OriginalFPCR | ExceptionEnableTraps;
  SetFPCR(FPCR);
  FPCR = GetFPCR();
  HostFeatures.SupportsFloatExceptions = (FPCR & ExceptionEnableTraps) == ExceptionEnableTraps;

  // Set FPCR back to original just in case anything changed
  SetFPCR(OriginalFPCR);
#endif

#if defined(ARCHITECTURE_x86_64) && !defined(VIXL_SIMULATOR)
  FEX::X86::Features Feature {};
  HostFeatures.SupportsAES = Feature.Feat_aes;
  HostFeatures.SupportsCRC = Feature.Feat_crc;
  HostFeatures.SupportsRAND = Feature.Feat_rand;
  HostFeatures.SupportsRCPC = true;
  HostFeatures.SupportsTSOImm9 = true;
  HostFeatures.SupportsAVX = Feature.Feat_avx;
  HostFeatures.SupportsSHA = Feature.Feat_sha;
  HostFeatures.SupportsPMULL_128Bit = Feature.Feat_pclmulqdq;
  HostFeatures.SupportsAES256 = Feature.Feat_aes;
  HostFeatures.SupportsCLZERO = Feature.Feat_clzero;

  HostFeatures.SupportsAFP = true;
  HostFeatures.SupportsFloatExceptions = true;
#endif

  HandleErrata(&HostFeatures, MIDR);
  OverrideFeatures(&HostFeatures, ForceSVEWidth());
}

FEXCore::HostFeatures FetchHostFeatures() {
  FEX_CONFIG_OPT(CPUFeatureRegisters, CPUFEATUREREGISTERS);

  CPUFeatures Features {};
  if (!CPUFeatureRegisters().empty()) {
    Features = GetCPUFeaturesFromConfig(CPUFeatureRegisters());
  } else {
#ifdef ARCHITECTURE_x86_64
    Features = CPUFeaturesAll {};

    // Vixl simulator doesn't support AFP.
    Features.RemoveFeature(CPUFeatures::Feature::AFP);
    // Vixl simulator doesn't support RPRES.
    Features.RemoveFeature(CPUFeatures::Feature::RPRES);
#else
    Features = GetCPUFeaturesFromIDRegisters();
#endif
  }

  uint64_t CTR = 0;
  uint64_t MIDR = 0;
#ifdef ARCHITECTURE_arm64
  // We need to get the CPU's cache line size
  // We expect sane targets that have correct cacheline sizes across clusters
  __asm volatile("mrs %[ctr], ctr_el0" : [ctr] "=r"(CTR));
  __asm volatile("mrs %[midr], midr_el1" : [midr] "=r"(MIDR));
#endif

  FEXCore::HostFeatures HostFeatures = {};
  FillMIDRInformationViaLinux(&HostFeatures);
  FetchHostFeatures(Features, HostFeatures, true, CTR, MIDR);

  HostFeatures.SupportsCPUIndexInTPIDRRO = false;
  return HostFeatures;
}
} // namespace FEX


================================================
FILE: Source/Common/HostFeatures.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/Utils/EnumUtils.h>

#include <cstddef>

namespace FEX {
class CPUFeatures {
public:
  class FeatureReg {
  public:
    void SetReg(uint64_t _Reg) {
      Reg = _Reg;
    }

    uint64_t Get() const {
      return Reg;
    }
  protected:
    // All feature flag fields are 4-bits.
    uint64_t GetField(uint64_t Offset) const {
      return (Reg >> Offset) & 0b1111;
    }
    uint64_t Reg {};
  };

  enum class Feature : uint32_t {
    // ISAR0
    AES,
    PMULL,
    SHA1,
    SHA2,
    SHA512,
    CRC32,
    LSE,
    LSE128,
    TME,
    RDM,
    SHA3,
    SM3,
    SM4,
    DotProd,
    FlagM,
    FlagM2,
    RNDR,
    // PFR0
    FP,
    FP16,
    ASIMD,
    ASIMD16,
    RAS,
    SVE,
    DIT,
    CSV2,
    CSV3,
    // PFR1
    BTI,
    SSBS,
    SSBS2,
    MTE,
    MTE2,
    MTE3,
    SME,
    SME2,
    // ISAR1
    DPB,
    DPB2,
    JSCVT,
    FCMA,
    LRCPC,
    LRCPC2,
    LRCPC3,
    FRINTTS,
    SB,
    SPECRES,
    SPECRES2,
    BF16,
    SME_F64F64,
    I8MM,
    XS,
    LS64,
    LS64_V,
    LS64_ACCDATA,
    // MMFR0
    ECV,
    // MMFR2
    LSE2,
    // ZFR0
    SVE2,
    SVE2_1,
    SVE_AES,
    SVE_PMULL128,
    SVE_BitPerm,
    SVE_BF16,
    SVE_B16B16,
    SVE_SHA3,
    SVE_SM4,
    SVE_I8MM,
    SVE_F32MM,
    SVE_F64MM,
    // MMFR1
    AFP,
    // ISAR2
    WFxt,
    RPRES,
    PACQARMA3,
    MOPS,
    HBC,
    CLRBHB,
    SYSREG128,
    SYSINSTR128,
    PRFMSLC,
    RPRFM,
    CSSC,
    // Max indicator
    MAX,
  };

  class DCZIDReg final : public FeatureReg {
  public:
    bool SupportsDCZVA() const {
      return (Reg & DCZID_DZP_MASK) == 0;
    }

    uint32_t BlockSizeInBytes() const {
      uint32_t DCZID_Log2 = Reg & DCZID_BS_MASK;
      return (1 << DCZID_Log2) * sizeof(uint32_t);
    }

  private:
    // Data Zero Prohibited flag
    // 0b0 = ZVA/GVA/GZVA permitted
    // 0b1 = ZVA/GVA/GZVA prohibited
    [[maybe_unused]] constexpr static uint32_t DCZID_DZP_MASK = 0b1'0000;
    // Log2 of the blocksize in 32-bit words
    [[maybe_unused]] constexpr static uint32_t DCZID_BS_MASK = 0b0'1111;
  };

  // This list is informed by Linux kernel's `Documentation/arch/arm64/cpu-feature-registers.rst`
  enum class FeatureRegType {
    ISAR0_EL1,
    PFR0_EL1,
    PFR1_EL1,
    MIDR_EL1,
    ISAR1_EL1,
    MMFR0_EL1,
    MMFR2_EL1,
    ZFR0_EL1,
    MMFR1_EL1,
    ISAR2_EL1,
  };

#define FIELD_FETCHER(feature, field, minimum_field) \
  bool Supports##feature() const {                   \
    return GetField(field) >= minimum_field;         \
  }

  class ISAR0Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(AES, AES, 0b0001);
    FIELD_FETCHER(PMULL, AES, 0b0010);

    FIELD_FETCHER(SHA1, SHA1, 0b0001);

    FIELD_FETCHER(SHA2, SHA2, 0b0001);
    FIELD_FETCHER(SHA512, SHA2, 0b0010);

    FIELD_FETCHER(CRC32, CRC32, 0b0001);

    FIELD_FETCHER(LSE, Atomic, 0b0010);
    FIELD_FETCHER(LSE128, Atomic, 0b0011);

    FIELD_FETCHER(TME, TME, 0b0001);

    FIELD_FETCHER(RDM, RDM, 0b0001);

    FIELD_FETCHER(SHA3, SHA3, 0b0001);

    FIELD_FETCHER(SM3, SM3, 0b0001);

    FIELD_FETCHER(SM4, SM4, 0b0001);

    FIELD_FETCHER(DotProd, DP, 0b0001);

    FIELD_FETCHER(FHM, FHM, 0b0001);

    FIELD_FETCHER(FlagM, TS, 0b0001);
    FIELD_FETCHER(FlagM2, TS, 0b0010);

    FIELD_FETCHER(TLBIOS, TLB, 0b0001);
    FIELD_FETCHER(TLBIRANGE, TLB, 0b0010);

    FIELD_FETCHER(RNDR, RNDR, 0b0001);

  private:
    enum Field {
      RES0 = 0 * 4,
      AES = 1 * 4,
      SHA1 = 2 * 4,
      SHA2 = 3 * 4,
      CRC32 = 4 * 4,
      Atomic = 5 * 4,
      TME = 6 * 4,
      RDM = 7 * 4,
      SHA3 = 8 * 4,
      SM3 = 9 * 4,
      SM4 = 10 * 4,
      DP = 11 * 4,
      FHM = 12 * 4,
      TS = 13 * 4,
      TLB = 14 * 4,
      RNDR = 15 * 4,
    };
  };

  class PFR0Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(AA64_EL0, EL0, 0b0001);
    FIELD_FETCHER(AA32_EL0, EL0, 0b0010);

    FIELD_FETCHER(AA64_EL1, EL1, 0b0001);
    FIELD_FETCHER(AA32_EL1, EL1, 0b0010);

    FIELD_FETCHER(AA64_EL2, EL2, 0b0001);
    FIELD_FETCHER(AA32_EL2, EL2, 0b0010);

    FIELD_FETCHER(AA64_EL3, EL3, 0b0001);
    FIELD_FETCHER(AA32_EL3, EL3, 0b0010);

    bool SupportsFP() const {
      return GetField(FP) != 0b1111;
    }
    FIELD_FETCHER(HP, FP, 0b0001);

    bool SupportsAdvSIMD() const {
      return GetField(AdvSIMD) != 0b1111;
    }
    FIELD_FETCHER(ASIMDHP, AdvSIMD, 0b0001);

    FIELD_FETCHER(GIC4_0, GIC, 0b0001);
    FIELD_FETCHER(GIC4_1, GIC, 0b0011);

    FIELD_FETCHER(RAS, RAS, 0b0001);
    FIELD_FETCHER(RAS1_1, RAS, 0b0010);
    FIELD_FETCHER(RAS2, RAS, 0b0011);

    FIELD_FETCHER(SVE, SVE, 0b0001);

    FIELD_FETCHER(SEL2, SEL2, 0b0001);

    uint64_t MPAM_Major() const {
      return GetField(MPAM);
    }

    FIELD_FETCHER(AMU1, AMU, 0b0001);
    FIELD_FETCHER(AMU1_1, AMU, 0b0010);

    FIELD_FETCHER(DIT, DIT, 0b0001);

    FIELD_FETCHER(RME, RME, 0b0001);

    FIELD_FETCHER(CSV2, CSV2, 0b0001);
    FIELD_FETCHER(CSV2_2, CSV2, 0b0010);
    FIELD_FETCHER(CSV2_3, CSV2, 0b0011);

    FIELD_FETCHER(CSV3, CSV3, 0b0001);

  private:
    enum Field {
      EL0 = 0 * 4,
      EL1 = 1 * 4,
      EL2 = 2 * 4,
      EL3 = 3 * 4,
      FP = 4 * 4,
      AdvSIMD = 5 * 4,
      GIC = 6 * 4,
      RAS = 7 * 4,
      SVE = 8 * 4,
      SEL2 = 9 * 4,
      MPAM = 10 * 4,
      AMU = 11 * 4,
      DIT = 12 * 4,
      RME = 13 * 4,
      CSV2 = 14 * 4,
      CSV3 = 15 * 4,
    };
  };

  class PFR1Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(BTI, BT, 0b0001);

    FIELD_FETCHER(SSBS, SSBS, 0b0001);
    FIELD_FETCHER(SSBS2, SSBS, 0b0010);

    FIELD_FETCHER(MTE, MTE, 0b0001);
    FIELD_FETCHER(MTE2, MTE, 0b0010);
    FIELD_FETCHER(MTE3, MTE, 0b0011);

    uint64_t RAS_Minor() const {
      return GetField(RAS_frac);
    }
    uint64_t MPAM_Minor() const {
      return GetField(MPAM_frac);
    }

    FIELD_FETCHER(SME, SME, 0b0001);
    FIELD_FETCHER(SME2, SME, 0b0010);

    FIELD_FETCHER(RNDR_trap, RNDR_trap, 0b0001);

    uint64_t CSV2_Minor() const {
      return GetField(CSV2_frac);
    }

    FIELD_FETCHER(NMI, NMI, 0b0001);

    uint64_t MTE_Minor() const {
      return GetField(MTE_frac);
    }

    FIELD_FETCHER(GCS, GCS, 0b0001);

    FIELD_FETCHER(THE, THE, 0b0001);

    FIELD_FETCHER(MTEX, MTEX, 0b0001);

    FIELD_FETCHER(DoubleFault2, DF2, 0b0001);

    FIELD_FETCHER(PFAR, PFAR, 0b0001);

  private:
    enum Field {
      BT = 0 * 4,
      SSBS = 1 * 4,
      MTE = 2 * 4,
      RAS_frac = 3 * 4,
      MPAM_frac = 4 * 4,
      RES0 = 5 * 4,
      SME = 6 * 4,
      RNDR_trap = 7 * 4,
      CSV2_frac = 8 * 4,
      NMI = 9 * 4,
      MTE_frac = 10 * 4,
      GCS = 11 * 4,
      THE = 12 * 4,
      MTEX = 13 * 4,
      DF2 = 14 * 4,
      PFAR = 15 * 4,
    };
  };

  class MIDRReg final : public FeatureReg {
  public:
    uint64_t GetRevision() const {
      return GetField(Revision);
    }
    uint64_t GetPartNum() const {
      return (Reg >> 4) & 0xFFF;
    }
    uint64_t GetArchitecture() const {
      return GetField(Architecture);
    }
    uint64_t GetVariant() const {
      return GetField(Variant);
    }
    uint64_t GetImplementer() const {
      return (Reg >> 24) & 0xFFFF;
    }

  private:
    enum Field {
      Revision = 0 * 4,
      // Partnum is 3 fields [15:4]
      Architecture = 4 * 4,
      Variant = 5 * 4,
      // Implementer is 2 fields [31:24]
      // Upper 32-bits is entirely reserved
    };
  };

  class ISAR1Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(DPB, DPB, 0b0001);
    FIELD_FETCHER(DPB2, DPB, 0b0010);

    // Ignoring APA and API

    FIELD_FETCHER(JSCVT, JSCVT, 0b0001);

    FIELD_FETCHER(FCMA, FCMA, 0b0001);

    FIELD_FETCHER(LRCPC, LRCPC, 0b0001);
    FIELD_FETCHER(LRCPC2, LRCPC, 0b0010);
    FIELD_FETCHER(LRCPC3, LRCPC, 0b0011);

    // Ignoring GPA and GPI

    FIELD_FETCHER(FRINTTS, FRINTTS, 0b0001);

    FIELD_FETCHER(SB, SB, 0b0001);

    FIELD_FETCHER(SPECRES, SPECRES, 0b0001);
    FIELD_FETCHER(SPECRES2, SPECRES, 0b0010);

    FIELD_FETCHER(BF16, BF16, 0b0001);
    FIELD_FETCHER(SME_F64F64, BF16, 0b0010);

    FIELD_FETCHER(DGH, DGH, 0b0001);

    FIELD_FETCHER(I8MM, I8MM, 0b0001);

    FIELD_FETCHER(XS, XS, 0b0001);

    FIELD_FETCHER(LS64, LS64, 0b0001);
    FIELD_FETCHER(LS64_V, LS64, 0b0010);
    FIELD_FETCHER(LS64_ACCDATA, LS64, 0b0011);

  private:
    enum Field {
      DPB = 0 * 4,
      APA = 1 * 4,
      API = 2 * 4,
      JSCVT = 3 * 4,
      FCMA = 4 * 4,
      LRCPC = 5 * 4,
      GPA = 6 * 4,
      GPI = 7 * 4,
      FRINTTS = 8 * 4,
      SB = 9 * 4,
      SPECRES = 10 * 4,
      BF16 = 11 * 4,
      DGH = 12 * 4,
      I8MM = 13 * 4,
      XS = 14 * 4,
      LS64 = 15 * 4,
    };
  };

  class MMFR0Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(ECV, ECV, 0b0010);

  private:
    enum Field {
      PARange = 0 * 4,
      ASIDBits = 1 * 4,
      BigEnd = 2 * 4,
      SNSMem = 3 * 4,
      BigEndEL0 = 4 * 4,
      TGran16 = 5 * 4,
      TGran64 = 6 * 4,
      TGran4 = 7 * 4,
      TGran16_2 = 8 * 4,
      TGran64_2 = 9 * 4,
      TGran4_2 = 10 * 4,
      ExS = 11 * 4,
      RES0 = 12 * 4,
      RES1 = 13 * 4,
      FGT = 14 * 4,
      ECV = 15 * 4,
    };
  };

  class MMFR2Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(LSE2, AT, 0b0001);

  private:
    enum Field {
      CnP = 0 * 4,
      UAO = 1 * 4,
      LSM = 2 * 4,
      IESB = 3 * 4,
      VARange = 4 * 4,
      CCIDX = 5 * 4,
      NV = 6 * 4,
      ST = 7 * 4,
      AT = 8 * 4,
      IDS = 9 * 4,
      FWB = 10 * 4,
      RES0 = 11 * 4,
      TTL = 12 * 4,
      BBM = 13 * 4,
      EVT = 14 * 4,
      E0PD = 15 * 4,
    };
  };

  class ZFR0Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(SVE2, SVEver, 0b0001);
    FIELD_FETCHER(SVE2_1, SVEver, 0b0010);

    FIELD_FETCHER(SVE_AES, AES, 0b0001);
    FIELD_FETCHER(SVE_PMULL128, AES, 0b0010);

    FIELD_FETCHER(SVE_BitPerm, BitPerm, 0b0001);

    FIELD_FETCHER(SVE_BF16, BF16, 0b0001);
    FIELD_FETCHER(SME_F64F64, BF16, 0b0010);

    FIELD_FETCHER(SVE_B16B16, B16B16, 0b0010);

    FIELD_FETCHER(SVE_SHA3, SHA3, 0b0001);

    FIELD_FETCHER(SVE_SM4, SM4, 0b0001);

    FIELD_FETCHER(SVE_I8MM, I8MM, 0b0001);

    FIELD_FETCHER(SVE_F32MM, F32MM, 0b0001);

    FIELD_FETCHER(SVE_F64MM, F64MM, 0b0001);

  private:
    enum Field {
      SVEver = 0 * 4,
      AES = 1 * 4,
      RES0 = 2 * 4,
      RES1 = 3 * 4,
      BitPerm = 4 * 4,
      BF16 = 5 * 4,
      B16B16 = 6 * 4,
      RES2 = 7 * 4,
      SHA3 = 8 * 4,
      RES3 = 9 * 4,
      SM4 = 10 * 4,
      I8MM = 11 * 4,
      RES4 = 12 * 4,
      F32MM = 13 * 4,
      F64MM = 14 * 4,
      RES5 = 15 * 4,
    };
  };

  class MMFR1Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(AFP, AFP, 0b0001);

  private:
    enum Field {
      HAFDBS = 0 * 4,
      VMIDBits = 1 * 4,
      VH = 2 * 4,
      HPDS = 3 * 4,
      LO = 4 * 4,
      PAN = 5 * 4,
      SpecSEI = 6 * 4,
      XNX = 7 * 4,
      TWED = 8 * 4,
      ETS = 9 * 4,
      HCX = 10 * 4,
      AFP = 11 * 4,
      nTLBPA = 12 * 4,
      TIDCP1 = 13 * 4,
      CMOW = 14 * 4,
      ECBHB = 15 * 4,
    };
  };

  class ISAR2Reg final : public FeatureReg {
  public:
    FIELD_FETCHER(WFxt, WFxt, 0b0010);

    FIELD_FETCHER(RPRES, RPRES, 0b0001);

    FIELD_FETCHER(PACQARMA3, GPA3, 0b0001);

    FIELD_FETCHER(MOPS, MOPS, 0b0001);

    FIELD_FETCHER(HBC, BC, 0b0001);

    uint64_t PAC_Minor() const {
      return GetField(PAC_frac);
    }

    FIELD_FETCHER(CLRBHB, CLRBHB, 0b0001);

    FIELD_FETCHER(SYSREG128, SYSREG_128, 0b0001);

    FIELD_FETCHER(SYSINSTR128, SYSINSTR_128, 0b0001);

    FIELD_FETCHER(PRFMSLC, PRFMSLC, 0b0001);

    FIELD_FETCHER(RPRFM, RPRFM, 0b0001);

    FIELD_FETCHER(CSSC, CSSC, 0b0001);

  private:
    enum Field {
      WFxt = 0 * 4,
      RPRES = 1 * 4,
      GPA3 = 2 * 4,
      APA3 = 3 * 4,
      MOPS = 4 * 4,
      BC = 5 * 4,
      PAC_frac = 6 * 4,
      CLRBHB = 7 * 4,
      SYSREG_128 = 8 * 4,
      SYSINSTR_128 = 9 * 4,
      PRFMSLC = 10 * 4,
      RES0 = 11 * 4,
      RPRFM = 12 * 4,
      CSSC = 13 * 4,
      RES1 = 14 * 4,
      ATS1A = 15 * 4,
    };
  };

  class SVEVLReg final : public FeatureReg {};
#undef FIELD_FETCHER


  ISAR0Reg ISAR0;
  PFR0Reg PFR0;
  PFR1Reg PFR1;
  MIDRReg MIDR;
  ISAR1Reg ISAR1;
  MMFR0Reg MMFR0;
  ZFR0Reg ZFR0;
  MMFR2Reg MMFR2;
  MMFR1Reg MMFR1;
  ISAR2Reg ISAR2;
  DCZIDReg DCZID;
  SVEVLReg SVEVL;

  static_assert(FEXCore::ToUnderlying(Feature::MAX) < 128);
  static_assert((FEXCore::ToUnderlying(Feature::MAX) / (sizeof(uint64_t) * 8)) == 1);

  bool Supports(Feature feat) const {
    const size_t DWordSelect = FEXCore::ToUnderlying(feat) / (sizeof(uint64_t) * 8);
    const size_t BitSelect = FEXCore::ToUnderlying(feat) - (DWordSelect * (sizeof(uint64_t) * 8));
    return (FeatureBits[DWordSelect] >> BitSelect) & 1;
  }

  void RemoveFeature(Feature feat) {
    const size_t DWordSelect = FEXCore::ToUnderlying(feat) / (sizeof(uint64_t) * 8);
    const size_t BitSelect = FEXCore::ToUnderlying(feat) - (DWordSelect * (sizeof(uint64_t) * 8));
    FeatureBits[DWordSelect] &= ~(1ULL << BitSelect);
  }

  const DCZIDReg& GetDCZID() const {
    return DCZID;
  }

  uint64_t GetSVEVectorLengthInBits() const {
    return SVEVL.Get();
  }

protected:
  void FillFeatureFlags();

  uint64_t FeatureBits[(FEXCore::ToUnderlying(Feature::MAX) / (sizeof(uint64_t) * 8)) + 1] {};

  void SetFeature(Feature feat) {
    const size_t DWordSelect = FEXCore::ToUnderlying(feat) / (sizeof(uint64_t) * 8);
    const size_t BitSelect = FEXCore::ToUnderlying(feat) - (DWordSelect * (sizeof(uint64_t) * 8));
    FeatureBits[DWordSelect] |= 1ULL << BitSelect;
  }
};

void FillMIDRInformationViaLinux(FEXCore::HostFeatures* Features);

void FetchHostFeatures(FEX::CPUFeatures& Features, FEXCore::HostFeatures& HostFeatures, bool SupportsCacheMaintenanceOps, uint64_t CTR,
                       uint64_t MIDR);
FEXCore::HostFeatures FetchHostFeatures();
FEX::CPUFeatures GetCPUFeaturesFromIDRegisters();
} // namespace FEX


================================================
FILE: Source/Common/JSONPool.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/JSONPool.h"

namespace FEX::JSON {
static json_t* PoolInit(jsonPool_t* Pool) {
  auto* alloc = static_cast<JsonAllocator*>(Pool);
  return &*alloc->json_objects.emplace(alloc->json_objects.end());
}

static json_t* PoolAlloc(jsonPool_t* Pool) {
  auto* alloc = static_cast<JsonAllocator*>(Pool);
  return &*alloc->json_objects.emplace(alloc->json_objects.end());
}

JsonAllocator::JsonAllocator()
  : jsonPool_t {
      .init = PoolInit,
      .alloc = PoolAlloc,
    } {}
} // namespace FEX::JSON


================================================
FILE: Source/Common/JSONPool.h
================================================
// SPDX-License-Identifier: MIT

#include <FEXCore/fextl/list.h>

#include <iterator>
#include <tiny-json.h>

namespace FEX::JSON {
struct JsonAllocator : jsonPool_t {
  fextl::list<json_t> json_objects;

  JsonAllocator();
};

template<typename T>
const json_t* CreateJSON(T& Container, JsonAllocator& Allocator) {
  if (std::empty(Container)) {
    return nullptr;
  }

  return json_createWithPool(std::data(Container), &Allocator);
}
} // namespace FEX::JSON


================================================
FILE: Source/Common/Linux/SBRKAllocations.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>

#include <sys/mman.h>

namespace FEX::SBRKAllocations {
// This function disables glibc's ability to allocate memory through the `sbrk` interface.
// This is run early in the lifecycle of FEX in order to make sure no 64-bit pointers can make it to the guest 32-bit application.
//
// How this works is that this allocates a single page at the current sbrk pointer (aligned upward to page size). This makes it
// so that when the sbrk syscall is used to allocate more memory, it fails with an ENOMEM since it runs in to the allocated guard page.
//
// glibc notices the sbrk failure and falls back to regular mmap based allocations when this occurs. Ensuring that memory can still be allocated.
void* DisableSBRKAllocations() {
  void* INVALID_PTR = reinterpret_cast<void*>(~0ULL);
  // Get the starting sbrk pointer.
  void* StartingSBRK = sbrk(0);
  if (StartingSBRK == INVALID_PTR) {
    // If sbrk is already returning invalid pointers then nothing to do here.
    return INVALID_PTR;
  }

  // Now allocate the next page after the sbrk address to ensure it can't grow.
  // In most cases at the start of `main` this will already be page aligned, which means subsequent `sbrk`
  // calls won't allocate any memory through that.
  void* AlignedBRK = reinterpret_cast<void*>(FEXCore::AlignUp(reinterpret_cast<uintptr_t>(StartingSBRK), FEXCore::Utils::FEX_PAGE_SIZE));
  void* AfterBRK =
    ::mmap(AlignedBRK, FEXCore::Utils::FEX_PAGE_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE | MAP_NORESERVE, -1, 0);
  if (AfterBRK == INVALID_PTR) {
    // Couldn't allocate the page after the aligned brk? This should never happen.
    // FEXCore::LogMan isn't configured yet so we just need to print the message.
    fextl::fmt::print("Couldn't allocate page after SBRK.\n");
    FEX_TRAP_EXECUTION;
    return INVALID_PTR;
  }

  // Now that the page after sbrk is allocated, FEX needs to consume the remaining sbrk space.
  // This will be anywhere from [0, 4096) bytes.
  // Start allocating from 1024 byte increments just to make any steps a bit faster.
  intptr_t IncrementAmount = 1024;
  for (; IncrementAmount != 0; IncrementAmount >>= 1) {
    while (sbrk(IncrementAmount) != INVALID_PTR)
      ;
  }
  return AlignedBRK;
}

void ReenableSBRKAllocations(void* Ptr) {
  const void* INVALID_PTR = reinterpret_cast<void*>(~0ULL);
  if (Ptr != INVALID_PTR) {
    munmap(Ptr, FEXCore::Utils::FEX_PAGE_SIZE);
  }
}
} // namespace FEX::SBRKAllocations


================================================
FILE: Source/Common/Linux/SBRKAllocations.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

namespace FEX::SBRKAllocations {
// Disable allocations through glibc's sbrk allocation method.
// Returns a pointer at the end of the sbrk region.
void* DisableSBRKAllocations();

// Allow sbrk again. Pass in the pointer returned by `DisableSBRKAllocations`
void ReenableSBRKAllocations(void* Ptr);
} // namespace FEX::SBRKAllocations


================================================
FILE: Source/Common/SHMStats.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/SHMStats.h"
#include "git_version.h"

#include <FEXCore/Debug/InternalThreadState.h>

namespace FEX::SHMStats {
void StatAllocBase::SaveHeader(FEXCore::SHMStats::AppType AppType) {
  if (!Base) {
    return;
  }

  Head = reinterpret_cast<FEXCore::SHMStats::ThreadStatsHeader*>(Base);
  Head->Size.store(CurrentSize, std::memory_order_relaxed);
  Head->Version = FEXCore::SHMStats::STATS_VERSION;
  Head->app_type = AppType;
  Head->ThreadStatsSize = sizeof(FEXCore::SHMStats::ThreadStats);

  std::string_view GitString = GIT_DESCRIBE_STRING;
  strncpy(Head->fex_version, GitString.data(), std::min(GitString.size(), sizeof(Head->fex_version)));

  Stats = reinterpret_cast<FEXCore::SHMStats::ThreadStats*>(reinterpret_cast<uint64_t>(Base) + sizeof(FEXCore::SHMStats::ThreadStatsHeader));

  RemainingSlots = TotalSlotsFromSize();
}

bool StatAllocBase::AllocateMoreSlots() {
  const auto OriginalSlotCount = TotalSlotsFromSize();

  uint32_t NewSize = FrontendAllocateSlots(CurrentSize * 2);

  if (NewSize == CurrentSize) {
    return false;
  }

  CurrentSize = NewSize;
  Head->Size.store(CurrentSize, std::memory_order_relaxed);
  RemainingSlots = TotalSlotsFromSize() - OriginalSlotCount;

  return true;
}

FEXCore::SHMStats::ThreadStats* StatAllocBase::AllocateSlot(uint32_t TID) {
  if (!RemainingSlots) {
    if (!AllocateMoreSlots()) {
      return nullptr;
    }
  }

  // Find a free slot
  store_memory_barrier();
  FEXCore::SHMStats::ThreadStats* AllocatedSlot {};
  for (size_t i = 0; i < TotalSlotsFromSize(); ++i) {
    AllocatedSlot = &Stats[i];
    if (AllocatedSlot->TID.load(std::memory_order_relaxed) == 0) {
      break;
    }
  }

  --RemainingSlots;

  // Slot might be reused, just zero it now.
  memset(AllocatedSlot, 0, sizeof(*AllocatedSlot));

  // TID != 0 means slot is allocated.
  AllocatedSlot->TID.store(TID, std::memory_order_relaxed);

  // Setup singly-linked list
  if (Head->Head.load(std::memory_order_relaxed) == 0) {
    Head->Head.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed);
  } else {
    StatTail->Next.store(OffsetFromStat(AllocatedSlot), std::memory_order_relaxed);
  }

  // Update the tail.
  StatTail = AllocatedSlot;
  return AllocatedSlot;
}

void StatAllocBase::DeallocateSlot(FEXCore::SHMStats::ThreadStats* AllocatedSlot) {
  if (!AllocatedSlot) {
    return;
  }

  // TID == 0 will signal the reader to ignore this slot & deallocate it!
  AllocatedSlot->TID.store(0, std::memory_order_relaxed);

  store_memory_barrier();

  const auto SlotOffset = OffsetFromStat(AllocatedSlot);
  const auto AllocatedSlotNext = AllocatedSlot->Next.load(std::memory_order_relaxed);

  const bool IsTail = AllocatedSlot == StatTail;

  // Update the linked list.
  if (Head->Head == SlotOffset) {
    Head->Head.store(AllocatedSlotNext, std::memory_order_relaxed);
    if (IsTail) {
      StatTail = nullptr;
    }
  } else {
    for (size_t i = 0; i < TotalSlotsFromSize(); ++i) {
      auto Slot = &Stats[i];
      auto NextSlotOffset = Slot->Next.load(std::memory_order_relaxed);

      if (NextSlotOffset == SlotOffset) {
        Slot->Next.store(AllocatedSlotNext, std::memory_order_relaxed);

        if (IsTail) {
          // This slot is now the tail.
          StatTail = Slot;
        }
        break;
      }
    }
  }

  ++RemainingSlots;
}

} // namespace FEX::SHMStats


================================================
FILE: Source/Common/SHMStats.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Common|SHMStats
desc: Frontend profiler common code
$end_info$
*/
#pragma once
#include <FEXCore/Utils/SHMStats.h>

namespace FEXCore::Core {
struct InternalThreadState;
}

#ifdef ARCHITECTURE_arm64
static inline void store_memory_barrier() {
  asm volatile("dmb ishst;" ::: "memory");
}

#else
static inline void store_memory_barrier() {
  // Intentionally empty.
  // x86 is strongly memory ordered with regular loadstores. No need for barrier.
}
#endif

namespace FEX::SHMStats {
class StatAllocBase {
public:
  virtual ~StatAllocBase() = default;

protected:
  FEXCore::SHMStats::ThreadStats* AllocateSlot(uint32_t TID);
  void DeallocateSlot(FEXCore::SHMStats::ThreadStats* AllocatedSlot);

  uint32_t OffsetFromStat(FEXCore::SHMStats::ThreadStats* Stat) const {
    return reinterpret_cast<uint64_t>(Stat) - reinterpret_cast<uint64_t>(Base);
  }
  uint32_t TotalSlotsFromSize() const {
    return (CurrentSize - sizeof(FEXCore::SHMStats::ThreadStatsHeader)) / sizeof(FEXCore::SHMStats::ThreadStats) - 1;
  }
  static uint32_t TotalSlotsFromSize(uint32_t Size) {
    return (Size - sizeof(FEXCore::SHMStats::ThreadStatsHeader)) / sizeof(FEXCore::SHMStats::ThreadStats) - 1;
  }

  static uint32_t SlotIndexFromOffset(uint32_t Offset) {
    return (Offset - sizeof(FEXCore::SHMStats::ThreadStatsHeader)) / sizeof(FEXCore::SHMStats::ThreadStats);
  }

  void SaveHeader(FEXCore::SHMStats::AppType AppType);

  void* Base {};
  uint32_t CurrentSize {};
  FEXCore::SHMStats::ThreadStatsHeader* Head {};
  FEXCore::SHMStats::ThreadStats* Stats {};
  FEXCore::SHMStats::ThreadStats* StatTail {};
  uint32_t RemainingSlots {};

  // Limited to 4MB which should be a few hundred threads of tracking capability.
  // I (Sonicadvance1) wanted to reserve 128MB of VA space because it's cheap, but ran in to a bug when running WINE.
  // WINE allocates [0x7fff'fe00'0000, 0x7fff'ffff'0000) which /consistently/ overlaps with FEX's sigaltstack.
  // This only occurs when this stat allocation size is large as the top-down allocation pushes the alt-stack further.
  // Additionally, only occurs on 48-bit VA systems, as mmap on lesser VA will fail regardless.
  // TODO: Bump allocation size up once FEXCore's allocator can first use the 128TB of blocked VA space on 48-bit systems.
  constexpr static uint32_t MAX_STATS_SIZE = 4 * 1024 * 1024;

private:
  virtual uint32_t FrontendAllocateSlots(uint32_t NewSize) = 0;
  bool AllocateMoreSlots();
};

} // namespace FEX::SHMStats


================================================
FILE: Source/Common/VolatileMetadata.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/VolatileMetadata.h"

#include <FEXCore/Utils/LogManager.h>

#include <cstdlib>

#include <range/v3/view/split.hpp>
#include <range/v3/view/transform.hpp>

namespace FEX::VolatileMetadata {
fextl::unordered_map<fextl::string, ExtendedVolatileMetadata> ParseExtendedVolatileMetadata(std::string_view ListOfDescriptors) {
  // Parsing: `<module>;<address begin>-<address-end>,<more addresses>;<instruction offset to force TSO>:`
  if (ListOfDescriptors.empty()) {
    return {};
  }

  fextl::unordered_map<fextl::string, ExtendedVolatileMetadata> ExtendedMetaData {};

  auto to_string_view = [](auto rng) {
    return std::string_view(&*rng.begin(), ranges::distance(rng));
  };
  for (auto module_config : ranges::views::split(ListOfDescriptors, ':') | ranges::views::transform(to_string_view)) {
    if (module_config.empty()) {
      continue;
    }

    auto sections = ranges::views::split(module_config, ';') | ranges::views::transform(to_string_view);
    auto section = ranges::begin(sections);
    const auto sections_end = ranges::end(sections);

    // Module name handling
    std::string_view section_str = *section;
    if (section_str.empty()) {
      continue;
    }

    auto current_module = ExtendedMetaData
                            .insert_or_assign(fextl::string(section_str),
                                              ExtendedVolatileMetadata {
                                                .ModuleTSODisabled = true,
                                              })
                            .first;
    ++section;

    // Address range handling
    if (section != sections_end) {
      std::string_view section_str = *section;
      if (section_str.empty()) {
        continue;
      }

      current_module->second.ModuleTSODisabled = false;

      // Walk all the address ranges provided.
      for (auto tso_region_view : ranges::views::split(section_str, ',') | ranges::views::transform(to_string_view)) {
        if (tso_region_view.empty()) {
          continue;
        }

        uint64_t begin {}, end {};
        char* str_end;
        begin = std::strtoull(tso_region_view.data(), &str_end, 16);
        LOGMAN_THROW_A_FMT(tso_region_view.data() != str_end, "Couldn't parse begin {}", tso_region_view);

        // Skip `-` separator.
        ++str_end;

        LOGMAN_THROW_A_FMT(str_end != tso_region_view.end(), "Couldn't parse end {}", tso_region_view);
        auto str_begin = str_end;
        end = std::strtoull(str_begin, &str_end, 16);
        LOGMAN_THROW_A_FMT(str_begin != str_end, "Couldn't parse end {}", tso_region_view);

        current_module->second.VolatileValidRanges.Insert({begin, end});
      }

      ++section;
    }

    // Individual instruction handling
    if (section != sections_end) {
      std::string_view section_str = *section;
      if (section_str.empty()) {
        continue;
      }

      for (auto tso_region_view : ranges::views::split(section_str, ',') | ranges::views::transform(to_string_view)) {
        if (tso_region_view.empty()) {
          continue;
        }

        uint64_t offset {};
        char* str_end;
        offset = std::strtoull(tso_region_view.data(), &str_end, 16);
        LOGMAN_THROW_A_FMT(tso_region_view.data() != str_end, "Couldn't parse offset {}", tso_region_view);

        current_module->second.VolatileInstructions.insert(offset);
      }

      ++section;
    }

    LOGMAN_THROW_A_FMT(section == sections_end, "Expected ':' or end of input, got {}", *section);
  }

  return ExtendedMetaData;
}
} // namespace FEX::VolatileMetadata


================================================
FILE: Source/Common/VolatileMetadata.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/IntervalList.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>

#include <string_view>

namespace FEX::VolatileMetadata {
struct ExtendedVolatileMetadata {
  FEXCore::IntervalList<uint64_t> VolatileValidRanges;
  fextl::set<uint64_t> VolatileInstructions;
  bool ModuleTSODisabled;
};

fextl::unordered_map<fextl::string, ExtendedVolatileMetadata> ParseExtendedVolatileMetadata(std::string_view ListOfDescriptors);

inline void ApplyFEXExtendedVolatileMetadata(FEX::VolatileMetadata::ExtendedVolatileMetadata& ExtendedMetaData,
                                             fextl::set<uint64_t>& VolatileInstructions, FEXCore::IntervalList<uint64_t>& VolatileValidRanges,
                                             uint64_t Address, uint64_t EndAddress, uint64_t FileOffset = 0, uint64_t FileOffsetEnd = ~0ULL) {
  // Load FEX extended volatile metadata.
  // Walk the volatile instructions first if they exist.
  for (const auto it_inst : ExtendedMetaData.VolatileInstructions) {
    const auto inst_address = it_inst + Address;
    if (inst_address < EndAddress) {
      VolatileInstructions.emplace(Address + it_inst);
    } else {
      LogMan::Msg::DFmt("Volatile instruction 0x{:x} couldn't fit in to module range [0x{:x}, 0x{:x}). Not adding anymore volatile "
                        "instructions. Inspect your config!",
                        inst_address, Address, EndAddress);
      return;
    }
  }

  // Walk the volatile list
  for (const auto it_ranges : ExtendedMetaData.VolatileValidRanges) {
    if (it_ranges.Offset >= FileOffset && it_ranges.End < FileOffsetEnd) {
      VolatileValidRanges.Insert({Address + it_ranges.Offset - FileOffset, Address + it_ranges.End - FileOffset});
    }
  }

  // If it is fully disabled, then set the entire module range
  if (ExtendedMetaData.ModuleTSODisabled) {
    VolatileValidRanges.Clear();
    VolatileValidRanges.Insert({Address, EndAddress});
  }
}

} // namespace FEX::VolatileMetadata


================================================
FILE: Source/Common/X86Features.h
================================================
#pragma once
#include <cstdint>

#ifdef ARCHITECTURE_x86_64
#include <cpuid.h>

namespace FEX::X86 {
class Features final {
public:
  Features() {
    cpuid_data data {};
    data = cpuid(0);

    if (data.eax >= 1) {
      auto data_1 = cpuid(0x1);

      Feat_aes = data_1.ecx & (1U << 25);
      Feat_crc = data_1.ecx & (1U << 20);
      Feat_rand = data_1.ecx & (1U << 30);
      Feat_pclmulqdq = data_1.ecx & (1U << 1);
      Feat_avx = data_1.ecx & (1U << 28);
      Feat_ssse3 = data_1.ecx & (1U << 9);
      Feat_sse4_1 = data_1.ecx & (1U << 19);
      Feat_sse4_2 = data_1.ecx & (1U << 20);
      Feat_movbe = data_1.ecx & (1U << 22);
      Feat_xsave = data_1.ecx & (1U << 26);
    }

    if (data.eax >= 7) {
      auto data_7 = cpuid(0x7);
      Feat_fsgsbase = data_7.ebx & (1U << 0);
      Feat_bmi1 = data_7.ebx & (1U << 3);
      Feat_avx &= data_7.ebx & (1U << 5);
      Feat_bmi2 = data_7.ebx & (1U << 8);
      Feat_clwb = data_7.ebx & (1U << 24);
      Feat_rand &= data_7.ebx & (1U << 18);
      Feat_adx = data_7.ebx & (1U << 19);
      Feat_clflopt = data_7.ebx & (1U << 23);
      Feat_sha = data_7.ebx & (1U << 29);
      Feat_vaes = data_7.ecx & (1U << 9);
      Feat_pclmulqdq &= data_7.ecx & (1U << 10);
      Feat_rdpid = data_7.ecx & (1U << 22);
    }

    data = cpuid(0x8000'0000U);
    if (data.eax >= 0x8000'0001U) {
      auto data_8000_0001 = cpuid(0x8000'0001U);
      Feat_3dnow = (data_8000_0001.edx >> 30) == 0b11;
      Feat_sse4a = data_8000_0001.ecx & (1U << 6);
    }

    if (data.eax >= 0x8000'0008U) {
      auto data_8000_0008 = cpuid(0x8000'0008U);

      Feat_clzero = data_8000_0008.ebx & 1;
    }
  }

  // Features.
  bool Feat_3dnow {};
  bool Feat_sse4a {};
  bool Feat_bmi1 {};
  bool Feat_bmi2 {};
  bool Feat_clwb {};
  bool Feat_aes {};
  bool Feat_crc {};
  bool Feat_rand {};
  bool Feat_sha {};
  bool Feat_pclmulqdq {};
  bool Feat_vaes {};
  bool Feat_clzero {};
  bool Feat_avx {};
  bool Feat_ssse3 {};
  bool Feat_sse4_1 {};
  bool Feat_sse4_2 {};
  bool Feat_movbe {};
  bool Feat_adx {};
  bool Feat_xsave {};
  bool Feat_rdpid {};
  bool Feat_clflopt {};
  bool Feat_fsgsbase {};

private:
  struct cpuid_data {
    uint32_t eax, ebx, ecx, edx;
  };

  cpuid_data cpuid(uint32_t Function, uint32_t Leaf = 0) {
    cpuid_data data;
    __cpuid_count(Function, Leaf, data.eax, data.ebx, data.ecx, data.edx);
    return data;
  }
};
} // namespace FEX::X86
#endif


================================================
FILE: Source/Steam/CMakeLists.txt
================================================
set(LIBS FEXCore Common CommonTools JemallocLibs)

add_executable(FEXCompatTool CompatTool.cpp)

target_link_libraries(FEXCompatTool PRIVATE ${LIBS})

install(TARGETS FEXCompatTool RUNTIME
  DESTINATION /
  COMPONENT Runtime)

add_executable(FEXServerManager ServerManager.cpp)

target_link_libraries(FEXServerManager PRIVATE ${LIBS})

install(TARGETS FEXServerManager RUNTIME
  DESTINATION bin
  COMPONENT Runtime)

# Description json and VERSION files are installed into the depot root
configure_file(VERSIONS.txt.in ${CMAKE_CURRENT_BINARY_DIR}/VERSIONS.txt)
install(FILES
    ${CMAKE_CURRENT_BINARY_DIR}/VERSIONS.txt
    ConfigTemplate.json
    emulator.json
    toolmanifest.vdf
  DESTINATION /
  COMPONENT Runtime)


================================================
FILE: Source/Steam/CompatTool.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|FEXCompatTool
desc: Used for launching games from Steam
$end_info$
*/

#include "PortabilityInfo.h"
#include "Common/Config.h"
#include "FEXCore/Utils/FileLoading.h"
#include "FEXCore/Utils/StringUtils.h"
#include "FEXHeaderUtils/Filesystem.h"

#include <stdlib.h>
#include <tiny-json.h>

fextl::string GenerateSteamConfigTemplate(const FEX::Config::PortableInformation& PortableInfo) {
  const auto ConfigTemplatePath = PortableInfo.InterpreterPath + "ConfigTemplate.json";
  if (!FHU::Filesystem::Exists(ConfigTemplatePath)) {
    return {};
  }

  fextl::string Data;
  if (!FEXCore::FileLoading::LoadFile(Data, ConfigTemplatePath)) {
    return {};
  }

  // Try and find a mount point.
  fextl::string MountPoint {};
  const char* RuntimeDir = getenv("XDG_RUNTIME_DIR");
  if (RuntimeDir) {
    MountPoint = fextl::fmt::format("{}/fexrootfs/", RuntimeDir);
  } else {
    const auto UserDirectory = fextl::fmt::format("/run/user/{}", geteuid());
    if (FHU::Filesystem::Exists(UserDirectory)) {
      MountPoint = fextl::fmt::format("{}/fexrootfs/", UserDirectory);
    } else {
      const char* CacheDir = getenv("XDG_CACHE_HOME");
      if (CacheDir) {
        MountPoint = fextl::fmt::format("{}/fexrootfs/", CacheDir);
      } else {
        // We tried really hard to find a mount path.
        MountPoint = "~/.cache/fexrootfs/";
      }
    }
  }

  // Update the @FEX_COMPAT_TOOL@ config to point to the root of the depot.
  FEXCore::StringUtils::ReplaceAllInPlace(Data, "@FEX_COMPAT_TOOL@", PortableInfo.InterpreterPath);

  // TODO: This path is getting phased out.
  FEXCore::StringUtils::ReplaceAllInPlace(Data, "@FEX_ROOTFS_PATH@", MountPoint);

  // Save the json.
  const auto ConfigPath = FEX::Config::GetConfigDirectory(false, PortableInfo);
  const auto ConfigLocation = ConfigPath + "Config.json";
  if (!FHU::Filesystem::CreateDirectories(ConfigPath)) {
    return {};
  }

  auto File = FEXCore::File::File(ConfigLocation.c_str(),
                                  FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);

  if (!File.IsValid()) {
    return {};
  }

  File.Write(Data.data(), Data.size());
  return ConfigPath;
}

fextl::string GenerateSteamAppConfig(const FEX::Config::PortableInformation& PortableInfo) {
  const auto user_config = getenv("FEX_APP_CONFIG");
  if (user_config) {
    // If user supplied config then don't use Steam config.
    return {};
  }

  // Current supported Steam options.
  struct SteamOptions {
    bool TSO = true;
    bool Multiblock = true;
    bool Thunks_GL = false;
    bool Thunks_Vulkan = false;
    bool EnableLogging = false;
  };
  SteamOptions Options {};

  // Game overrides.
  const auto steam_fex_tso = getenv("STEAM_FEX_TSOENABLED");
  if (steam_fex_tso) {
    Options.TSO = std::strtoull(steam_fex_tso, nullptr, 0) != 0;
  }

  const auto steam_fex_multiblock = getenv("STEAM_FEX_MULTIBLOCK");
  if (steam_fex_multiblock) {
    Options.Multiblock = std::strtoull(steam_fex_multiblock, nullptr, 0) != 0;
  }

  const auto steam_fex_logging = getenv("STEAM_FEX_LOG");
  if (steam_fex_logging) {
    Options.EnableLogging = std::strtoull(steam_fex_logging, nullptr, 0) != 0;
  }

  // UI overrides.
  const auto steam_fex_compat = getenv("STEAM_COMPAT_FEX_CONFIG");
  if (steam_fex_compat) {
    const auto steam_fex_compat_view = std::string_view(steam_fex_compat);
    if (steam_fex_compat_view.find("TSOEnabled:1") != steam_fex_compat_view.npos) {
      Options.TSO = true;
    }
    if (steam_fex_compat_view.find("Multiblock:1") != steam_fex_compat_view.npos) {
      Options.Multiblock = true;
    }
    if (steam_fex_compat_view.find("ThunksDB_GL:1") != steam_fex_compat_view.npos) {
      Options.Thunks_GL = true;
    }
    if (steam_fex_compat_view.find("ThunksDB_Vulkan:1") != steam_fex_compat_view.npos) {
      Options.Thunks_Vulkan = true;
    }
  }

  // Create the json.
  char Buffer[4096];
  char* Dest {};
  Dest = json_objOpen(Buffer, nullptr);
  {
    Dest = json_objOpen(Dest, "Config");
    Dest = json_str(Dest, "TSOEnabled", Options.TSO ? "1" : "0");
    Dest = json_str(Dest, "Multiblock", Options.Multiblock ? "1" : "0");
    Dest = json_str(Dest, "SilentLog", Options.EnableLogging ? "0" : "1");
    if (Options.EnableLogging) {
      Dest = json_str(Dest, "OutputLog", "server");
    }
    Dest = json_objClose(Dest);
  }

  {
    Dest = json_objOpen(Dest, "ThunksDB");
    Dest = json_str(Dest, "GL", Options.Thunks_GL ? "1" : "0");
    Dest = json_str(Dest, "Vulkan", Options.Thunks_Vulkan ? "1" : "0");
    Dest = json_objClose(Dest);
  }

  Dest = json_objClose(Dest);
  json_end(Dest);

  // Save the json.
  const auto ConfigPath = FEX::Config::GetConfigDirectory(false, PortableInfo);
  const auto ConfigLocation = ConfigPath + "app_config.json";
  if (!FHU::Filesystem::CreateDirectories(ConfigPath)) {
    return {};
  }

  auto File = FEXCore::File::File(ConfigLocation.c_str(),
                                  FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);

  if (!File.IsValid()) {
    return {};
  }

  File.Write(Buffer, strlen(Buffer));
  return ConfigLocation;
}

int main(int argc, const char** argv) {
  const auto PortableInfo = FEX::ReadPortabilityInformation();

  const auto TemplateConfigPath = GenerateSteamConfigTemplate(PortableInfo);
  const auto AppConfigPath = GenerateSteamAppConfig(PortableInfo);

  if (!TemplateConfigPath.empty()) {
    setenv("FEX_APP_CONFIG_LOCATION", TemplateConfigPath.c_str(), true);
  }

  if (!AppConfigPath.empty()) {
    setenv("FEX_APP_CONFIG", AppConfigPath.c_str(), true);
  }

  const auto FEXInterpreterPath = PortableInfo.InterpreterPath + "usr/bin/FEX";

  // Due to no arguments for this application, just replace argv[0] and execve again.
  argv[0] = FEXInterpreterPath.c_str();
  execv(FEXInterpreterPath.c_str(), const_cast<char* const*>(argv));

  // Save errno as it can change after calling `perror`.
  const auto saved_errno = errno;

  perror(argv[0]);

  if (saved_errno == ENOENT) {
    return 127;
  }

  return 126;
}


================================================
FILE: Source/Steam/ConfigTemplate.json
================================================
{
    "Config": {
        "X87ReducedPrecision": "1",
        "RootFS": "@FEX_ROOTFS_PATH@/",
        "ThunkHostLibs": "@FEX_COMPAT_TOOL@/usr/lib/aarch64-linux-gnu/fex-emu/HostThunks",
        "ThunkGuestLibs": "@FEX_COMPAT_TOOL@/usr/share/fex-emu/GuestThunks",
        "ProfileStats": "1"
    }
}


================================================
FILE: Source/Steam/ServerManager.cpp
================================================
// SPDX-License-Identifier: MIT
#include "PortabilityInfo.h"
#include "Common/FEXServerClient.h"

#include <cstdio>
#include <errno.h>
#include <unistd.h>
#include <poll.h>

void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  const auto Style = fmt::text_style {};
  const auto Output = fextl::fmt::format("{} {}\n", fmt::styled(LogMan::DebugLevelStr(Level), Style), Message);
  write(STDERR_FILENO, Output.c_str(), Output.size());
  fsync(STDERR_FILENO);
}

void AssertHandler(const char* Message) {
  return MsgHandler(LogMan::ASSERT, Message);
}

void SignalPVToContinue(int* original_stdout) {
  // Tell pressure-vessel that the startup was a success.
  const auto ReadyMsg = "READY=1\n";
  write(*original_stdout, ReadyMsg, strlen(ReadyMsg));

  // pressure-vessel is waiting for EOF on STDOUT from this process to ensure it can run FEX processes.
  close(*original_stdout);
  *original_stdout = -1;
}

struct PipesType {
  int read_pipe {-1};
  int write_pipe {-1};
};

PipesType get_pipe() {
  PipesType pipes {};
  pipe(&pipes.read_pipe);
  return pipes;
}

int main(int argc, const char** argv, char** const envp) {
  LogMan::Throw::InstallHandler(AssertHandler);
  LogMan::Msg::InstallHandler(MsgHandler);

  const auto PortableInfo = FEX::ReadPortabilityInformation();
  FEX::Config::LoadConfig({}, envp, PortableInfo);

  // Reload the meta layer
  FEXCore::Config::ReloadMetaLayer();

  // Move the ready-indicator pipe from stdout to some other fd,
  // and mark it so the FEXServer won't inherit it. Otherwise the FEXServer
  // will hold it open, preventing pressure-vessel from detecting that
  // we are ready.
  int original_stdout = fcntl(STDOUT_FILENO, F_DUPFD_CLOEXEC, /* minimum fd = */ 3);
  if (original_stdout < 0) {
    perror("F_DUPFD_CLOEXEC");
    return 126;
  }
  // Replace stdout with a copy of our original stderr.
  if (dup2(STDERR_FILENO, STDOUT_FILENO) != STDOUT_FILENO) {
    perror("dup2");
    return 126;
  }

  auto pipes = get_pipe();

  // Set the write side to close on exec.
  fcntl(pipes.write_pipe, F_SETFD, FD_CLOEXEC);

  // Give the read end of the pipe to FEXServer.
  auto ServerFD = FEXServerClient::StartServer(PortableInfo.InterpreterPath, pipes.read_pipe);

  if (ServerFD == -1) {
    perror("Couldn't start FEXServer");
    return 126;
  }

  // FEXServer is now running. Tell PV to continue.
  SignalPVToContinue(&original_stdout);

  // Don't need the read pipe anymore.
  close(pipes.read_pipe);
  pipes.read_pipe = -1;

  // Now that the server is started and watching our pipe, we can close the returned FD, as it'll stay open as long as the pipe is open.
  close(ServerFD);
  ServerFD = -1;

  // Do a blocking read, discarding any written data and wait for EOF.
  while (true) {
    char buf[4096];
    auto read_len = ::read(STDIN_FILENO, buf, sizeof(buf));
    if (read_len < 0) {
      if (errno == EINTR || errno == EAGAIN) {
        // Interrupted, try again.
        continue;
      } else {
        // Error on read.
        break;
      }
    } else if (read_len == 0) {
      // EOF
      break;
    }
  }

  // Terminating will clean-up.
  return 0;
}


================================================
FILE: Source/Steam/VERSIONS.txt.in
================================================
FEX describe	@GIT_DESCRIBE_STRING@
FEX bash	@GIT_HASH@


================================================
FILE: Source/Steam/emulator.json
================================================
{
  "emulator_v0": {
    "argv": "./usr/bin/FEX",
    "environment": { "FEX_PORTABLE": "1" },
    "container_argv": "./usr/bin/FEX",
    "container_environment": { "FEX_ROOTFS": "" },
    "main_argv": "./FEXCompatTool",
    "server_argv": "./usr/bin/FEXServerManager",
    "emulated_architectures": ["x86_64-linux-gnu", "i386-linux-gnu"],
    "required_architectures": ["aarch64-linux-gnu"],
    "required_libraries": ["libc.so.6", "libstdc++.so.6"]
  }
}


================================================
FILE: Source/Steam/toolmanifest.vdf
================================================
"manifest"
{
  "commandline" "/fex-compat-tool %verb% --"
  "filter_exclusive_priority" "2"
  "version" "2"
  "use_tool_subprocess_reaper" "1"
  "compatmanager_layer_name" "fex"
}


================================================
FILE: Source/Tools/CMakeLists.txt
================================================
add_subdirectory(CommonTools)

if (NOT MINGW)
  if (BUILD_FEXCONFIG)
    find_package(Qt6 COMPONENTS Qml Quick Widgets QUIET)
    if (NOT Qt6_FOUND)
      find_package(Qt5 COMPONENTS Qml Quick Widgets REQUIRED)
    endif()

    add_subdirectory(FEXConfig/)
  endif()

  if (ENABLE_GDB_SYMBOLS)
    add_subdirectory(FEXGDBReader/)
  endif()

  if (NOT BUILD_STEAM_SUPPORT)
    add_subdirectory(FEXRootFSFetcher/)
  endif()

  add_subdirectory(FEXGetConfig/)
  add_subdirectory(FEXServer/)
  add_subdirectory(FEXBash/)
  add_subdirectory(FEXOfflineCompiler/)
  add_subdirectory(CodeSizeValidation/)
  add_subdirectory(LinuxEmulation/)

  add_subdirectory(FEXInterpreter/)
  add_subdirectory(pidof/)
  if (BUILD_TESTING)
    add_subdirectory(TestHarnessRunner/)
  endif()
endif()


================================================
FILE: Source/Tools/CodeSizeValidation/CMakeLists.txt
================================================
list(APPEND LIBS FEXCore Common CommonTools JemallocLibs)

add_executable(CodeSizeValidation Main.cpp)
target_include_directories(CodeSizeValidation PRIVATE ${CMAKE_BINARY_DIR}/generated)

target_link_libraries(CodeSizeValidation PRIVATE ${LIBS} ${PTHREAD_LIB})


================================================
FILE: Source/Tools/CodeSizeValidation/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include "DummyHandlers.h"
#include "Common/HostFeatures.h"
#include "FEXCore/Core/Context.h"
#include "FEXCore/Debug/InternalThreadState.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/File.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/SignalScopeGuards.h>

#include <sys/stat.h>

namespace CodeSize {
class CodeSizeValidation final {
public:
  CodeSizeValidation() {
    constexpr uint64_t Code_start_page = 0x1'0000;

    CodeStart = FEXCore::Allocator::mmap(reinterpret_cast<void*>(Code_start_page), MAX_CODE_SIZE, PROT_READ | PROT_WRITE,
                                         MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (reinterpret_cast<uint64_t>(CodeStart) != Code_start_page) {
      LogMan::Msg::AFmt("Couldn't allocate test region!");
      FEXCore::Allocator::VirtualFree(CodeStart, MAX_CODE_SIZE);
      CodeStart = nullptr;
      return;
    }
  }

  struct InstructionStats {
    uint64_t GuestCodeInstructions {};
    uint64_t HostCodeInstructions {};

    uint64_t HeaderSize {};
    uint64_t TailSize {};
  };

  using CodeLines = fextl::vector<fextl::string>;
  struct InstructionData {
    InstructionStats first;
    CodeLines second;
  };

  bool ParseMessage(const char* Message);

  InstructionData CompileAndGetStats(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread, const void* Data,
                                     size_t SizeBytes, int32_t MaxInst = -1) {
    if (SizeBytes > MAX_CODE_SIZE) {
      LogMan::Msg::AFmt("x86 code too large!");
    }

    {
      auto CodeInvalidationlk = FEXCore::GuardSignalDeferringSection(CTX->GetCodeInvalidationMutex(), Thread);
      CTX->InvalidateCodeBuffersCodeRange(reinterpret_cast<uint64_t>(CodeStart), MAX_CODE_SIZE);
      CTX->InvalidateThreadCachedCodeRange(Thread, reinterpret_cast<uint64_t>(CodeStart), MAX_CODE_SIZE);
    }

    ClearStats();
    memcpy(CodeStart, Data, SizeBytes);

    if (MaxInst == -1) {
      // Compile the NOP.
      CTX->CompileRIP(Thread, reinterpret_cast<uint64_t>(CodeStart));
    } else {
      CTX->CompileRIPCount(Thread, reinterpret_cast<uint64_t>(CodeStart), MaxInst);
    }
    return CurrentStats;
  }

  bool InfoPrintingDisabled() const {
    return SetupInfoDisabled;
  }

  void CalculateBaseStats(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread);
private:
  void ClearStats() {
    CurrentStats = {};
  }

  uint64_t CurrentRIPParse {};
  bool ConsumingDisassembly {};
  InstructionData CurrentStats {};

  ssize_t HeaderSize {-1};

  void* CodeStart {};
  constexpr static size_t MAX_CODE_SIZE = 512 * 1024 * 1024;

  bool SetupInfoDisabled {};
};

constexpr std::string_view RIPMessage = "RIP: 0x";
constexpr std::string_view GuestCodeMessage = "Guest Code instructions: ";
constexpr std::string_view DisassembleBeginMessage = "Disassemble Begin";
constexpr std::string_view DisassembleEndMessage = "Disassemble End";
constexpr std::string_view BlowUpMsg = "Blow-up Amt: ";

static std::string_view SanitizeDisassembly(std::string_view Message) {
  auto it = Message.find(" (addr");
  // If it contains an address calculation, strip it out.
  Message = Message.substr(0, it);
  if (Message.find("adrp ") != std::string_view::npos || Message.find("adr ") != std::string_view::npos) {
    Message = Message.substr(0, Message.find(" #"));
  }
  return Message;
}

bool CodeSizeValidation::ParseMessage(const char* Message) {
  // std::string_view doesn't have contains until c++23.
  std::string_view MessageView {Message};
  if (MessageView.find(RIPMessage) != MessageView.npos) {
    // New RIP found
    std::string_view RIPView = std::string_view {Message + RIPMessage.size()};
    std::from_chars(RIPView.data(), RIPView.end(), CurrentRIPParse, 16);
    ClearStats();
    return false;
  }

  if (MessageView.find(GuestCodeMessage) != MessageView.npos) {
    std::string_view CodeSizeView = std::string_view {Message + GuestCodeMessage.size()};
    std::from_chars(CodeSizeView.data(), CodeSizeView.end(), CurrentStats.first.GuestCodeInstructions);
    return false;
  }
  if (MessageView.find(DisassembleBeginMessage) != MessageView.npos) {
    ConsumingDisassembly = true;
    // Just so the output isn't a mess.
    return false;
  }
  if (MessageView.find(DisassembleEndMessage) != MessageView.npos) {
    ConsumingDisassembly = false;
    // Just so the output isn't a mess.

    // Remove the header and tails.
    if (HeaderSize != -1) {
      CurrentStats.second.erase(CurrentStats.second.begin(), CurrentStats.second.begin() + HeaderSize);
    }
    // Find the first `udf #0x420f` and remove everything from that point onward.
    auto EraseBegin = std::find(CurrentStats.second.begin(), CurrentStats.second.end(), "udf #0x420f");
    CurrentStats.second.erase(EraseBegin, CurrentStats.second.end());
    CurrentStats.first.HostCodeInstructions = CurrentStats.second.size();
    return false;
  }

  if (MessageView.find(BlowUpMsg) != MessageView.npos) {
    return false;
  }

  if (ConsumingDisassembly) {
    // Currently consuming disassembly. Each line will be a single line of disassembly.
    CurrentStats.second.push_back(fextl::string(SanitizeDisassembly(Message)));
    return false;
  }

  return true;
}

void CodeSizeValidation::CalculateBaseStats(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread) {
  SetupInfoDisabled = true;

  // Known hardcoded instructions that will generate blocks of particular sizes.
  // NOP will never generate any instructions.
  constexpr static uint8_t NOP[] = {
    0x90,
  };

  // Compile the NOP.
  auto NOPStats = CompileAndGetStats(CTX, Thread, NOP, sizeof(NOP), 1);

  // Expected format.
  // adr x0, #-0x4 (addr 0x7fffe9880054)
  // str x0, [x28, #184]
  // udf #0x420f
  // ldr x0, pc+8 (addr 0x7fffe988006c)
  // blr x0
  // unallocated (Unallocated)
  // udf #0x7fff
  // unallocated (Unallocated)
  // udf #0x0
  //
  // First two lines are the header.
  // Next comes the implementation (0 instruction size for nop).
  // Then comes the `udf #0x420f` which signifies the end of the function.
  // After that is the tail.
  HeaderSize = NOPStats.second.size();

  SetupInfoDisabled = false;
}

static CodeSizeValidation* Validation {};
} // namespace CodeSize

void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  const char* CharLevel {LogMan::DebugLevelStr(Level)};

  if (Level == LogMan::INFO) {
    // Disassemble information is sent through the Info log level.
    if (!CodeSize::Validation->ParseMessage(Message)) {
      return;
    }
    if (CodeSize::Validation->InfoPrintingDisabled()) {
      return;
    }
  }

  fextl::fmt::print("{} {}\n", CharLevel, Message);
}

void AssertHandler(const char* Message) {
  fextl::fmt::print("A {}\n", Message);

  // make sure buffers are flushed
  fflush(nullptr);
}

struct TestInfo {
  char TestInst[128];
  int64_t ExpectedInstructionCount;
  uint64_t CodeSize;
  uint64_t x86InstCount;
  uint32_t Cookie;
  uint8_t Code[];
};

struct TestHeader {
  uint64_t Bitness;
  uint64_t NumTests {};
  uint64_t EnabledHostFeatures;
  uint64_t DisabledHostFeatures;
  uint64_t EnvironmentVariableCount;
  uint8_t Data[];
};

static void* TestData;
static size_t TestDataSize;
static const TestHeader* TestHeaderData {};
static const TestInfo* TestsStart {};
static fextl::vector<std::pair<std::string_view, std::string_view>> EnvironmentVariables {};

static bool TestInstructions(FEXCore::Context::Context* CTX, FEXCore::Core::InternalThreadState* Thread, const char* UpdatedInstructionCountsPath) {
  LogMan::Msg::IFmt("Compiling code");

  // Tell FEXCore to compile all the instructions upfront.
  const TestInfo* CurrentTest = TestsStart;
  fextl::vector<CodeSize::CodeSizeValidation::InstructionData> TestData {};
  TestData.resize(TestHeaderData->NumTests);
  for (size_t i = 0; i < TestHeaderData->NumTests; ++i) {
    uint64_t CodeRIP = (uint64_t)&CurrentTest->Code[0];
    LogMan::Msg::IFmt("Compiling instruction '{}'", CurrentTest->TestInst);

    TestData[i] =
      CodeSize::Validation->CompileAndGetStats(CTX, Thread, reinterpret_cast<void*>(CodeRIP), CurrentTest->CodeSize, CurrentTest->x86InstCount);

    // Go to the next test.
    CurrentTest = reinterpret_cast<const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]);
  }

  bool TestsPassed {true};

  // Get all the data for the instructions compiled.
  CurrentTest = TestsStart;
  for (size_t i = 0; i < TestHeaderData->NumTests; ++i) {
    // Get the instruction stats.
    const auto INSTStats = &TestData[i];

    LogMan::Msg::IFmt("Testing instruction '{}': {} host instructions", CurrentTest->TestInst, INSTStats->first.HostCodeInstructions);

    // Show the code if the count of instructions changed to something we didn't expect.
    bool ShouldShowCode = INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount;

    if (ShouldShowCode) {
      for (const auto& Line : INSTStats->second) {
        LogMan::Msg::EFmt("\t{}", Line);
      }
    }

    if (INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount) {
      LogMan::Msg::EFmt("Fail: '{}': {} host instructions", CurrentTest->TestInst, INSTStats->first.HostCodeInstructions);
      LogMan::Msg::EFmt("Fail: Test took {} instructions but we expected {} instructions!", INSTStats->first.HostCodeInstructions,
                        CurrentTest->ExpectedInstructionCount);

      // Fail the test if the instruction count has changed at all.
      TestsPassed = false;
    }

    // Go to the next test.
    CurrentTest = reinterpret_cast<const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]);
  }

  if (UpdatedInstructionCountsPath) {
    // Unlink the file.
    unlink(UpdatedInstructionCountsPath);

    FEXCore::File::File FD(UpdatedInstructionCountsPath,
                           FEXCore::File::FileModes::WRITE | FEXCore::File::FileModes::CREATE | FEXCore::File::FileModes::TRUNCATE);

    if (!FD.IsValid()) {
      // If we couldn't open the file then early exit this.
      LogMan::Msg::EFmt("Couldn't open {} for updating instruction counts", UpdatedInstructionCountsPath);
      return TestsPassed;
    }

    FD.Write("{\n", 2);

    CurrentTest = TestsStart;
    for (size_t i = 0; i < TestHeaderData->NumTests; ++i) {
      // Get the instruction stats.
      const auto INSTStats = &TestData[i];

      FD.Write(fextl::fmt::format("\t\"{}\": {{\n", CurrentTest->TestInst));

      if (INSTStats->first.HostCodeInstructions != CurrentTest->ExpectedInstructionCount) {
        FD.Write(fextl::fmt::format("\t\t\"ExpectedInstructionCount\": {},\n", INSTStats->first.HostCodeInstructions));
      }

      FD.Write(fextl::fmt::format("\t\t\"ExpectedArm64ASM\": [\n", INSTStats->first.HostCodeInstructions));
      for (auto it = INSTStats->second.begin(); it != INSTStats->second.end(); ++it) {
        const auto& Line = *it;
        const auto NextIt = it + 1;
        FD.Write(fextl::fmt::format("\t\t\t\"{}\"{}\n", Line, NextIt != INSTStats->second.end() ? "," : ""));
      }
      FD.Write(fextl::fmt::format("\t\t]\n", INSTStats->first.HostCodeInstructions));

      FD.Write(fextl::fmt::format("\t}},\n", CurrentTest->TestInst));

      // Go to the next test.
      CurrentTest = reinterpret_cast<const TestInfo*>(&CurrentTest->Code[CurrentTest->CodeSize]);
    }

    // Print a null member
    FD.Write(fextl::fmt::format("\t\"\": \"\""));

    FD.Write("}\n", 2);
  }
  return TestsPassed;
}

bool LoadTests(const char* Path) {
  int FD = open(Path, O_RDONLY | O_CLOEXEC);
  if (FD == -1) {
    return false;
  }

  struct stat buf;
  if (fstat(FD, &buf) == -1) {
    close(FD);
    return false;
  }

  TestDataSize = buf.st_size;
  TestData = FEXCore::Allocator::mmap(nullptr, TestDataSize, PROT_READ, MAP_PRIVATE, FD, 0);
  if (reinterpret_cast<uint64_t>(TestData) == ~0ULL) {
    close(FD);
    return false;
  }

  close(FD);

  TestHeaderData = reinterpret_cast<const TestHeader*>(TestData);

  // Need to walk past the environment variables to get to the actual tests.
  const uint8_t* Data = TestHeaderData->Data;
  for (size_t i = 0; i < TestHeaderData->EnvironmentVariableCount; ++i) {
    // Environment variables are a pair of null terminated strings.
    Data += strlen(reinterpret_cast<const char*>(Data)) + 1;
    Data += strlen(reinterpret_cast<const char*>(Data)) + 1;
  }
  TestsStart = reinterpret_cast<const TestInfo*>(Data);
  return true;
}

namespace {
static const fextl::vector<std::pair<const char*, FEXCore::Config::ConfigOption>> EnvConfigLookup = {{
#define OPT_BASE(type, group, enum, json, default) {"FEX_" #enum, FEXCore::Config::ConfigOption::CONFIG_##enum},
#include <FEXCore/Config/ConfigValues.inl>
}};

// Claims to be a local application config layer
class TestEnvLoader final : public FEXCore::Config::Layer {
public:
  explicit TestEnvLoader()
    : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_LOCAL_APP) {
    Load();
  }

  void Load() override {
    fextl::unordered_map<std::string_view, std::string> EnvMap;
    const uint8_t* Data = TestHeaderData->Data;
    for (size_t i = 0; i < TestHeaderData->EnvironmentVariableCount; ++i) {
      // Environment variables are a pair of null terminated strings.
      const std::string_view Key = reinterpret_cast<const char*>(Data);
      Data += strlen(reinterpret_cast<const char*>(Data)) + 1;

      const std::string_view Value_View = reinterpret_cast<const char*>(Data);
      Data += strlen(reinterpret_cast<const char*>(Data)) + 1;
      std::optional<fextl::string> Value;

#define ENVLOADER
#include <FEXCore/Config/ConfigOptions.inl>

      if (Value) {
        EnvMap.insert_or_assign(Key, *Value);
      } else {
        EnvMap.insert_or_assign(Key, Value_View);
      }
    }

    auto GetVar = [&](const std::string_view id) -> std::optional<std::string_view> {
      const auto it = EnvMap.find(id);
      if (it == EnvMap.end()) {
        return std::nullopt;
      }

      return it->second;
    };

    for (auto& it : EnvConfigLookup) {
      if (auto Value = GetVar(it.first); Value) {
#define OPT_BASE(type, group, enum, json, default) // Nothing
#define OPT_STRARRAY(group, enum, json, default)                        \
  else if (it.second == FEXCore::Config::ConfigOption::CONFIG_##enum) { \
    AppendStrArrayValue(it.second, *Value);                             \
  }

        if (false) {
        }
#include <FEXCore/Config/ConfigValues.inl>
        else {
          Set(it.second, *Value);
        }
      }
    }
  }

private:
  fextl::vector<std::pair<std::string_view, std::string_view>> Env;
};

class SimpleSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators {
public:
  SimpleSyscallHandler() {
    // Just claim to be linux 64-bit for simplicity.
    OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX64;
  }
  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
    // Don't do anything
    return 0;
  }

  // These are no-ops implementations of the SyscallHandler API
  std::optional<FEXCore::ExecutableFileSectionInfo>
  LookupExecutableFileSection(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) override {
    return std::nullopt;
  }

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override {
    return {0, UINT64_MAX, true};
  }
};
} // namespace

int main(int argc, char** argv, char** const envp) {
  FEXCore::Allocator::GLIBCScopedFault GLIBFaultScope;

  // Initialize early as the message handlers use it.
  CodeSize::CodeSizeValidation Validation {};
  CodeSize::Validation = &Validation;

  LogMan::Throw::InstallHandler(AssertHandler);
  LogMan::Msg::InstallHandler(MsgHandler);
  FEXCore::Config::Initialize();
  FEXCore::Config::Load();

  if (argc < 2) {
    LogMan::Msg::EFmt("Usage: {} <Test binary> [Changed instruction count.json]", argv[0]);
    return 1;
  }

  if (!LoadTests(argv[1])) {
    LogMan::Msg::EFmt("Couldn't load tests from {}", argv[1]);
    return 1;
  }

  FEXCore::Config::AddLayer(fextl::make_unique<TestEnvLoader>());
  FEXCore::Config::ReloadMetaLayer();

  // Setup configurations that this tool needs
  // Maximum one instruction.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_MAXINST, "1");
  // Enable block disassembly.
  FEXCore::Config::Set(
    FEXCore::Config::CONFIG_DISASSEMBLE,
    fextl::fmt::format("{}", static_cast<uint64_t>(FEXCore::Config::Disassemble::BLOCKS | FEXCore::Config::Disassemble::STATS)));
  // Choose bitness.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, TestHeaderData->Bitness == 64 ? "1" : "0");
  // Disable telemetry, it can affect instruction counts.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_DISABLETELEMETRY, "1");
  // Disable vixl simulator indirect calls as it can affect instruction counts.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_DISABLE_VIXL_INDIRECT_RUNTIME_CALLS, "1");

  FEXCore::Config::Set(FEXCore::Config::CONFIG_TSOENABLED, "0");

  // Host feature override. Only supports overriding SVE width.
  enum HostFeatures {
    FEATURE_SVE128 = (1U << 0),
    FEATURE_SVE256 = (1U << 1),
    FEATURE_CLZERO = (1U << 2),
    FEATURE_RNG = (1U << 3),
    FEATURE_FCMA = (1U << 4),
    FEATURE_CSSC = (1U << 5),
    FEATURE_AFP = (1U << 6),
    FEATURE_RPRES = (1U << 7),
    FEATURE_FLAGM = (1U << 8),
    FEATURE_FLAGM2 = (1U << 9),
    FEATURE_CRYPTO = (1U << 10),
    FEATURE_AES256 = (1U << 11),
    FEATURE_SVEBITPERM = (1U << 12),
    FEATURE_TSO = (1U << 13),
    FEATURE_LRCPC = (1U << 14),
    FEATURE_LRCPC2 = (1U << 15),
    FEATURE_FRINTTS = (1U << 16),
    FEATURE_MOPS = (1U << 17),
  };

  uint64_t SVEWidth = 0;
  uint64_t HostFeatureControl {};
  if (TestHeaderData->EnabledHostFeatures & FEATURE_SVE128) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLESVE);
    SVEWidth = 128;
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_SVE256) {
    SVEWidth = 256;
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_CLZERO) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLECLZERO);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_RNG) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLERNG);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_FCMA) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEFCMA);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_CSSC) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLECSSC);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_AFP) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEAFP);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_RPRES) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLERPRES);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_FLAGM) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEFLAGM);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_FLAGM2) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEFLAGM2);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_CRYPTO) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLECRYPTO);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_SVEBITPERM) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLESVEBITPERM);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLELRCPC);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_LRCPC2) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLELRCPC2);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_FRINTTS) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEFRINTTS);
  }
  if (TestHeaderData->EnabledHostFeatures & FEATURE_MOPS) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEMOPS);
  }

  if (TestHeaderData->EnabledHostFeatures & FEATURE_TSO) {
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "1");
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "1");
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "1");
  } else {
    // Override the TSO default setting, since TSO is not relevant for most tests
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "0");
  }

  // Always enable ARMv8.1 LSE atomics.
  HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEATOMICS);

  if (TestHeaderData->DisabledHostFeatures & FEATURE_SVE128) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLESVE);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_CLZERO) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLECLZERO);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_RNG) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLERNG);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_FCMA) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEFCMA);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_CSSC) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLECSSC);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_AFP) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEAFP);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_RPRES) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLERPRES);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_FLAGM) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEFLAGM);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_FLAGM2) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEFLAGM2);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_CRYPTO) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLECRYPTO);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_SVEBITPERM) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLESVEBITPERM);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLELRCPC);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_LRCPC2) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLELRCPC2);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_FRINTTS) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEFRINTTS);
  }
  if (TestHeaderData->DisabledHostFeatures & FEATURE_MOPS) {
    HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::DISABLEMOPS);
  }

  if (TestHeaderData->DisabledHostFeatures & FEATURE_TSO) {
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_TSOENABLED, "0");
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_VECTORTSOENABLED, "0");
    FEXCore::Config::Set(FEXCore::Config::ConfigOption::CONFIG_MEMCPYSETTSOENABLED, "0");
  }

  // Always enable preserve_all abi.
  HostFeatureControl |= static_cast<uint64_t>(FEXCore::Config::HostFeatures::ENABLEPRESERVEALLABI);

  FEXCore::Config::Set(FEXCore::Config::CONFIG_HOSTFEATURES, fextl::fmt::format("{}", HostFeatureControl));
  FEXCore::Config::Set(FEXCore::Config::CONFIG_FORCESVEWIDTH, fextl::fmt::format("{}", SVEWidth));

  // Create FEXCore context.
  fextl::unique_ptr<FEXCore::Context::Context> CTX;
  {
    auto HostFeatures = FEX::FetchHostFeatures();
    HostFeatures.IsInstCountCI = true;
    CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
  }

  auto SignalDelegation = FEX::DummyHandlers::CreateSignalDelegator();
  auto SyscallHandler = fextl::make_unique<SimpleSyscallHandler>();

  CTX->SetSignalDelegator(SignalDelegation.get());
  CTX->SetSyscallHandler(SyscallHandler.get());
  if (!CTX->InitCore()) {
    return -1;
  }
  auto ParentThread = CTX->CreateThread(0, 0);

  // GDT data
  FEXCore::Core::CPUState::gdt_segment gdt[32] {};

  {
    auto Frame = ParentThread->CurrentFrame;
    // GDT and LDT are tracked per thread.
    Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &gdt[0];
    // TODO: LDTs are currently unsupported, mirror them to GDT.
    Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &gdt[0];

    // Default code segment indexes match the numbers that the Linux kernel uses.
    Frame->State.cs_idx = FEXCore::Core::CPUState::DEFAULT_USER_CS << 3;
    auto GDT = FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx);
    FEXCore::Core::CPUState::SetGDTBase(GDT, 0);
    FEXCore::Core::CPUState::SetGDTLimit(GDT, 0xF'FFFFU);
    Frame->State.cs_cached =
      FEXCore::Core::CPUState::CalculateGDTBase(*FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));

    if (TestHeaderData->Bitness == 64) {
      GDT->L = 1; // L = Long Mode = 64-bit
      GDT->D = 0; // D = Default Operand SIze = Reserved
    } else {
      GDT->L = 0; // L = Long Mode = 32-bit
      GDT->D = 1; // D = Default Operand Size = 32-bit
    }
  }

  // Calculate the base stats for instruction testing.
  CodeSize::Validation->CalculateBaseStats(CTX.get(), ParentThread);

  // Test all the instructions.
  auto Result = TestInstructions(CTX.get(), ParentThread, argc >= 2 ? argv[2] : nullptr) ? 0 : 1;
  CTX->DestroyThread(ParentThread);

  FEXCore::Allocator::VirtualFree(TestData, TestDataSize);
  return Result;
}


================================================
FILE: Source/Tools/CommonTools/CMakeLists.txt
================================================
set(SRCS DummyHandlers.cpp)

if (NOT MINGW)
  list(APPEND SRCS Linux/Utils/ELFContainer.cpp)
endif()

add_library(CommonTools STATIC ${SRCS})
target_link_libraries(CommonTools FEXCore_Base FEXHeaderUtils)
target_include_directories(CommonTools PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})


================================================
FILE: Source/Tools/CommonTools/CodeLoader.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <unistd.h>

namespace FEX {

/**
 * @brief Code loader class so the CPU backend can load code in a generic fashion
 *
 * This class is expected to have multiple different style of code loaders
 */
class CodeLoader {
public:
  struct AuxvResult {
    uint64_t address;
    uint64_t size;
  };

  virtual ~CodeLoader() = default;

  /**
   * @brief CPU Core uses this to choose what the stack size should be for this code
   */
  virtual uint64_t StackSize() const = 0;

  /**
   * Returns the initial stack pointer
   */
  virtual uint64_t GetStackPointer() const = 0;

  /**
   * @brief Function to return the guest RIP that the code should start out at
   */
  virtual uint64_t DefaultRIP() const = 0;

  virtual fextl::vector<const char*> GetExecveArguments() const {
    return {};
  }

  virtual AuxvResult GetAuxv() const {
    return {};
  }

  virtual uint64_t GetBaseOffset() const {
    return 0;
  }

  const fextl::vector<fextl::string>& GetApplicationArguments() const {
    return ApplicationArgs;
  }

protected:
  fextl::vector<fextl::string> ApplicationArgs;
};

} // namespace FEX


================================================
FILE: Source/Tools/CommonTools/DummyHandlers.cpp
================================================
// SPDX-License-Identifier: MIT
#include "DummyHandlers.h"

namespace FEX::DummyHandlers {
thread_local FEXCore::Core::InternalThreadState* TLSThread;

void DummySignalDelegator::RegisterTLSState(FEXCore::Core::InternalThreadState* Thread) {
  TLSThread = Thread;
}

void DummySignalDelegator::UninstallTLSState(FEXCore::Core::InternalThreadState* Thread) {
  TLSThread = nullptr;
}

FEXCore::Core::InternalThreadState* DummySignalDelegator::GetTLSThread() {
  return TLSThread;
}

fextl::unique_ptr<FEXCore::HLE::SyscallHandler> CreateSyscallHandler() {
  return fextl::make_unique<DummySyscallHandler>();
}

fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> CreateSignalDelegator() {
  return fextl::make_unique<DummySignalDelegator>();
}
} // namespace FEX::DummyHandlers


================================================
FILE: Source/Tools/CommonTools/DummyHandlers.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/AllocatorHooks.h>

#include <FEXCore/fextl/memory.h>

namespace FEX::DummyHandlers {

class DummySyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators {
public:
  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
    // Don't do anything
    return 0;
  }

  // These are no-ops implementations of the SyscallHandler API
  std::optional<FEXCore::ExecutableFileSectionInfo> LookupExecutableFileSection(FEXCore::Core::InternalThreadState*, uint64_t) override {
    return std::nullopt;
  }

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override {
    return {0, UINT64_MAX, true};
  }
};

class DummySignalDelegator final : public FEXCore::SignalDelegator, public FEXCore::Allocator::FEXAllocOperators {
public:
  FEXCore::Core::InternalThreadState* GetBackingTLSThread() {
    return GetTLSThread();
  }

protected:
  void RegisterTLSState(FEXCore::Core::InternalThreadState* Thread);
  void UninstallTLSState(FEXCore::Core::InternalThreadState* Thread);

private:
  FEXCore::Core::InternalThreadState* GetTLSThread();
};

fextl::unique_ptr<FEXCore::HLE::SyscallHandler> CreateSyscallHandler();
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> CreateSignalDelegator();
} // namespace FEX::DummyHandlers


================================================
FILE: Source/Tools/CommonTools/HarnessHelpers.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "CodeLoader.h"
#include "Common/Config.h"

#include <array>
#include <bitset>
#include <cassert>
#include <cstring>
#include <fcntl.h>

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/BitUtils.h>
#include <FEXHeaderUtils/Syscalls.h>
#include <unistd.h>

namespace FEX::HarnessHelper {
inline bool CompareStates(const FEXCore::Core::CPUState& State1, const FEXCore::Core::CPUState& State2, uint64_t MatchMask, bool OutputGPRs,
                          bool SupportsAVX) {
  bool Matches = true;

  const auto DumpGPRs = [OutputGPRs](const fextl::string& Name, uint64_t A, uint64_t B) {
    if (!OutputGPRs) {
      return;
    }
    if (A == B) {
      return;
    }

    fextl::fmt::print("{}: 0x{:016x} {} 0x{:016x}\n", Name, A, A == B ? "==" : "!=", B);
  };

  const auto CheckGPRs = [&Matches, DumpGPRs](const fextl::string& Name, uint64_t A, uint64_t B) {
    DumpGPRs(Name, A, B);
    Matches &= A == B;
  };

  // RIP
  if (MatchMask & 1) {
    CheckGPRs("RIP", State1.rip, State2.rip);
  }

  MatchMask >>= 1;

  // GPRS
  for (unsigned i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; ++i, MatchMask >>= 1) {
    if (MatchMask & 1) {
      CheckGPRs(fextl::fmt::format("GPR{}", i), State1.gregs[i], State2.gregs[i]);
    }
  }

  // XMM
  if (SupportsAVX) {
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i, MatchMask >>= 1) {
      if (MatchMask & 1) {
        CheckGPRs(fextl::fmt::format("XMM0_{}", i), State1.xmm.avx.data[i][0], State2.xmm.avx.data[i][0]);
        CheckGPRs(fextl::fmt::format("XMM1_{}", i), State1.xmm.avx.data[i][1], State2.xmm.avx.data[i][1]);
      }
    }
  } else {
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i, MatchMask >>= 1) {
      if (MatchMask & 1) {
        CheckGPRs(fextl::fmt::format("XMM0_{}", i), State1.xmm.sse.data[i][0], State2.xmm.sse.data[i][0]);
        CheckGPRs(fextl::fmt::format("XMM1_{}", i), State1.xmm.sse.data[i][1], State2.xmm.sse.data[i][1]);
      }
    }
  }

  // GS
  if (MatchMask & 1) {
    CheckGPRs("GS", State1.gs_cached, State2.gs_cached);
  }
  MatchMask >>= 1;

  // FS
  if (MatchMask & 1) {
    CheckGPRs("FS", State1.fs_cached, State2.fs_cached);
  }

  return Matches;
}

class ConfigLoader final {
public:
  void Init(const fextl::string& ConfigFilename) {
    FEXCore::FileLoading::LoadFile(RawConfigFile, ConfigFilename);
    memcpy(&BaseConfig, RawConfigFile.data(), sizeof(ConfigStructBase));
    GetEnvironmentOptions();
  }

  fextl::vector<std::pair<std::string_view, std::string_view>> GetEnvironmentOptions() {
    fextl::vector<std::pair<std::string_view, std::string_view>> Env {};

    uintptr_t DataOffset = BaseConfig.OptionEnvOptionOffset;
    for (unsigned i = 0; i < BaseConfig.OptionEnvOptionCount; ++i) {
      // Environment variables are null terminated strings
      std::string_view Key = RawConfigFile.data() + DataOffset;
      std::string_view Value = RawConfigFile.data() + DataOffset + Key.size() + 1;
      DataOffset += Key.size() + Value.size() + 2;
      Env.emplace_back(Key, Value);
    }
    return Env;
  }

  bool CompareStates(const FEXCore::Core::CPUState* State1, const FEXCore::Core::CPUState* State2, bool SupportsAVX) {
    bool Matches = true;
    uint64_t MatchMask = BaseConfig.OptionMatch & ~BaseConfig.OptionIgnore;
    if (State1 && State2) {
      Matches &= FEX::HarnessHelper::CompareStates(*State1, *State2, MatchMask, ConfigDumpGPRs(), SupportsAVX);
    }

    if (BaseConfig.OptionRegDataCount > 0) {
      static constexpr std::array<uint64_t, 43> OffsetArrayAVX = {{
        offsetof(FEXCore::Core::CPUState, rip),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSI]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBP]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R8]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R9]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R10]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R11]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R12]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R13]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R14]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R15]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[0][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[1][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[2][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[3][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[4][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[5][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[6][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[7][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[8][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[9][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[10][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[11][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[12][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[13][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[14][0]),
        offsetof(FEXCore::Core::CPUState, xmm.avx.data[15][0]),
        offsetof(FEXCore::Core::CPUState, gs_cached),
        offsetof(FEXCore::Core::CPUState, fs_cached),
        offsetof(FEXCore::Core::CPUState, mm[0][0]),
        offsetof(FEXCore::Core::CPUState, mm[1][0]),
        offsetof(FEXCore::Core::CPUState, mm[2][0]),
        offsetof(FEXCore::Core::CPUState, mm[3][0]),
        offsetof(FEXCore::Core::CPUState, mm[4][0]),
        offsetof(FEXCore::Core::CPUState, mm[5][0]),
        offsetof(FEXCore::Core::CPUState, mm[6][0]),
        offsetof(FEXCore::Core::CPUState, mm[7][0]),
      }};
      static constexpr std::array<uint64_t, 43> OffsetArraySSE = {{
        offsetof(FEXCore::Core::CPUState, rip),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RAX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RCX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDX]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSI]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RDI]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RBP]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_RSP]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R8]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R9]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R10]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R11]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R12]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R13]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R14]),
        offsetof(FEXCore::Core::CPUState, gregs[FEXCore::X86State::REG_R15]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[0][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[1][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[2][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[3][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[4][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[5][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[6][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[7][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[8][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[9][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[10][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[11][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[12][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[13][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[14][0]),
        offsetof(FEXCore::Core::CPUState, xmm.sse.data[15][0]),
        offsetof(FEXCore::Core::CPUState, gs_cached),
        offsetof(FEXCore::Core::CPUState, fs_cached),
        offsetof(FEXCore::Core::CPUState, mm[0][0]),
        offsetof(FEXCore::Core::CPUState, mm[1][0]),
        offsetof(FEXCore::Core::CPUState, mm[2][0]),
        offsetof(FEXCore::Core::CPUState, mm[3][0]),
        offsetof(FEXCore::Core::CPUState, mm[4][0]),
        offsetof(FEXCore::Core::CPUState, mm[5][0]),
        offsetof(FEXCore::Core::CPUState, mm[6][0]),
        offsetof(FEXCore::Core::CPUState, mm[7][0]),
      }};

      uintptr_t DataOffset = BaseConfig.OptionRegDataOffset;
      for (unsigned i = 0; i < BaseConfig.OptionRegDataCount; ++i) {
        RegDataStructBase* RegData = reinterpret_cast<RegDataStructBase*>(RawConfigFile.data() + DataOffset);
        [[maybe_unused]] std::bitset<64> RegFlags = RegData->RegKey;
        assert(RegFlags.count() == 1 && "Must set reg data explicitly per register");

        size_t NameIndex = FEXCore::FindFirstSetBit(RegData->RegKey) - 1;
        auto Offset = SupportsAVX ? OffsetArrayAVX[NameIndex] : OffsetArraySSE[NameIndex];
        uint64_t* State1Data = reinterpret_cast<uint64_t*>(reinterpret_cast<uint64_t>(State1) + Offset);
        uint64_t* State2Data = reinterpret_cast<uint64_t*>(reinterpret_cast<uint64_t>(State2) + Offset);

        const auto DumpGPRs = [this](const fextl::string& Name, uint64_t A, uint64_t B) {
          if (!ConfigDumpGPRs()) {
            return;
          }

          fextl::fmt::print("{}: 0x{:016x} {} 0x{:016x} (Expected)\n", Name, A, A == B ? "==" : "!=", B);
        };

        const auto CheckGPRs = [&Matches, DumpGPRs](const fextl::string& Name, uint64_t A, uint64_t B) {
          DumpGPRs(Name, A, B);
          Matches &= A == B;
        };

        for (size_t j = 0; j < RegData->RegDataCount; ++j) {
          fextl::string Name;
          if (NameIndex == 0) { // RIP
            Name = "RIP";
          } else if (NameIndex >= 1 && NameIndex < 17) {
            Name = fextl::fmt::format("GPR{}", NameIndex - 1);
          } else if (NameIndex >= 17 && NameIndex < 33) {
            Name = fextl::fmt::format("XMM[{}][{}]", NameIndex - 17, j);
          } else if (NameIndex == 33) {
            Name = "gs";
          } else if (NameIndex == 34) {
            Name = "fs";
          } else if (NameIndex >= 35 && NameIndex < 43) {
            Name = fextl::fmt::format("MM[{}][{}]", NameIndex - 35, j);
          }

          if (State1) {
            CheckGPRs(fextl::fmt::format("Core1: {}: ", Name), State1Data[j], RegData->RegValues[j]);
          }
          if (State2) {
            CheckGPRs(fextl::fmt::format("Core2: {}: ", Name), State2Data[j], RegData->RegValues[j]);
          }
        }

        // Get the correct data offset
        DataOffset += sizeof(RegDataStructBase) + RegData->RegDataCount * 8;
      }
    }
    return Matches;
  }

  fextl::map<uintptr_t, size_t> GetMemoryRegions() {
    fextl::map<uintptr_t, size_t> regions;

    uintptr_t DataOffset = BaseConfig.OptionMemoryRegionOffset;
    for (unsigned i = 0; i < BaseConfig.OptionMemoryRegionCount; ++i) {
      MemoryRegionBase* Region = reinterpret_cast<MemoryRegionBase*>(RawConfigFile.data() + DataOffset);
      regions[Region->Region] = Region->Size;

      DataOffset += sizeof(MemoryRegionBase);
    }

    return regions;
  }

  void LoadMemory() {
    uintptr_t DataOffset = BaseConfig.OptionMemDataOffset;
    for (unsigned i = 0; i < BaseConfig.OptionMemDataCount; ++i) {
      MemDataStructBase* MemData = reinterpret_cast<MemDataStructBase*>(RawConfigFile.data() + DataOffset);
      memcpy(reinterpret_cast<void*>(MemData->address), &MemData->data, MemData->length);
      DataOffset += sizeof(MemDataStructBase) + MemData->length;
    }
  }

  bool Is64BitMode() const {
    return BaseConfig.OptionMode == 1;
  }

  enum HostFeatures {
    FEATURE_ANY = 0,
    FEATURE_3DNOW = (1 << 0),
    FEATURE_SSE4A = (1 << 1),
    FEATURE_AVX = (1 << 2),
    FEATURE_RAND = (1 << 3),
    FEATURE_SHA = (1 << 4),
    FEATURE_CLZERO = (1 << 5),
    FEATURE_BMI1 = (1 << 6),
    FEATURE_BMI2 = (1 << 7),
    FEATURE_CLWB = (1 << 8),
    FEATURE_LINUX = (1 << 9),
    FEATURE_AES256 = (1 << 10),
    FEATURE_AFP = (1 << 11),
    FEATURE_SSSE3 = (1 << 12),
    FEATURE_SSE4_1 = (1 << 13),
    FEATURE_SSE4_2 = (1 << 14),
    FEATURE_AES = (1 << 15),
    FEATURE_PCLMUL = (1 << 16),
    FEATURE_MOVBE = (1 << 17),
    FEATURE_ADX = (1 << 18),
    FEATURE_XSAVE = (1 << 19),
    FEATURE_RDPID = (1 << 20),
    FEATURE_CLFLOPT = (1 << 21),
    FEATURE_FSGSBASE = (1 << 22),
    FEATURE_EMMI = (1 << 23),
  };

  bool Requires3DNow() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_3DNOW;
  }
  bool RequiresSSE4A() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_SSE4A;
  }
  bool RequiresAVX() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_AVX;
  }
  bool RequiresRAND() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_RAND;
  }
  bool RequiresSHA() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_SHA;
  }
  bool RequiresCLZERO() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLZERO;
  }
  bool RequiresBMI1() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI1;
  }
  bool RequiresBMI2() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_BMI2;
  }
  bool RequiresCLWB() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLWB;
  }
  bool RequiresLinux() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_LINUX;
  }
  bool RequiresAES256() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_AES256;
  }
  bool RequiresAFP() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_AFP;
  }
  bool RequiresSSSE3() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_SSSE3;
  }
  bool RequiresSSE4_1() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_SSE4_1;
  }
  bool RequiresSSE4_2() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_SSE4_2;
  }
  bool RequiresAES() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_AES;
  }
  bool RequiresPCLMUL() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_PCLMUL;
  }
  bool RequiresMOVBE() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_MOVBE;
  }
  bool RequiresADX() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_ADX;
  }
  bool RequiresXSAVE() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_XSAVE;
  }
  bool RequiresRDPID() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_RDPID;
  }
  bool RequiresCLFLOPT() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_CLFLOPT;
  }
  bool RequiresFSGSBase() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_FSGSBASE;
  }
  bool RequiresEMMI() const {
    return BaseConfig.OptionHostFeatures & HostFeatures::FEATURE_EMMI;
  }

private:
  FEX_CONFIG_OPT(ConfigDumpGPRs, DUMPGPRS);

  struct ConfigStructBase {
    uint64_t OptionMatch;
    uint64_t OptionIgnore;
    uint64_t OptionStackSize;
    uint64_t OptionEntryPoint;
    uint32_t OptionABI;
    uint32_t OptionMode;
    uint32_t OptionHostFeatures;
    uint32_t OptionMemoryRegionOffset;
    uint32_t OptionMemoryRegionCount;
    uint32_t OptionRegDataOffset;
    uint32_t OptionRegDataCount;
    uint32_t OptionMemDataOffset;
    uint32_t OptionMemDataCount;
    uint32_t OptionEnvOptionOffset;
    uint32_t OptionEnvOptionCount;
    uint8_t AdditionalData[];
  } FEX_PACKED;

  struct MemoryRegionBase {
    uint64_t Region;
    uint64_t Size;
  } FEX_PACKED;

  struct RegDataStructBase {
    uint32_t RegDataCount;
    uint64_t RegKey;
    uint64_t RegValues[];
  } FEX_PACKED;

  struct MemDataStructBase {
    uint64_t address;
    uint32_t length;
    uint8_t data[];
  } FEX_PACKED;

  fextl::vector<char> RawConfigFile;
  ConfigStructBase BaseConfig;
};

class HarnessCodeLoader final : public FEX::CodeLoader {
public:

  HarnessCodeLoader(const fextl::string& Filename, const fextl::string& ConfigFilename) {
    FEXCore::FileLoading::LoadFile(RawASMFile, Filename);

    Config.Init(ConfigFilename);
  }

  uint64_t StackSize() const override {
    const auto Page = sysconf(_SC_PAGESIZE);
    return Page > 0 ? Page : FEXCore::Utils::FEX_PAGE_SIZE;
  }

  uint64_t GetStackPointer() const override {
    LOGMAN_MSG_A_FMT("This should be unused.");
    FEX_UNREACHABLE;
  }

  uint64_t DefaultRIP() const override {
    return RIP;
  }

  bool MapMemory(const std::function<void*(uint64_t, size_t)>& DoMMap) {
    bool LimitedSize = true;
    auto AllocPageSize = sysconf(_SC_PAGESIZE);
    if (AllocPageSize <= 0) {
      AllocPageSize = FEXCore::Utils::FEX_PAGE_SIZE;
    }

    if (LimitedSize) {
      DoMMap(0xe000'0000, AllocPageSize * 10);

      // SIB8
      // We test [-128, -126] (Bottom)
      // We test [-8, 8] (Middle)
      // We test [120, 127] (Top)
      // Can fit in two pages
      DoMMap(0xe800'0000 - AllocPageSize, AllocPageSize * 2);
    } else {
      // This is scratch memory location and SIB8 location
      DoMMap(0xe000'0000, 0x1000'0000);
      // This is for large SIB 32bit displacement testing
      DoMMap(0x2'0000'0000, 0x1'0000'1000);
    }

    // Map in the memory region for the test file
#ifndef _WIN32
    size_t Length = FEXCore::AlignUp(RawASMFile.size(), FEXCore::Utils::FEX_PAGE_SIZE);
    auto ASMPtr = DoMMap(Code_start_page, Length);
#else
    // Special magic DOS area that starts at 0x1'0000
    auto ASMPtr = DoMMap(1, 0x110000 - 1);
#endif
    LOGMAN_THROW_A_FMT((uint64_t)ASMPtr == Code_start_page, "Couldn't allocate code at expected page: 0x{:x} != 0x{:x}", (uint64_t)ASMPtr,
                       Code_start_page);
    memcpy(ASMPtr, RawASMFile.data(), RawASMFile.size());
    RIP = Code_start_page;

    // Map the memory regions the test file asks for
    for (auto& [region, size] : Config.GetMemoryRegions()) {
      DoMMap(region, size);
    }

    if (!Config.Is64BitMode()) {
      // 32-bit gets a fixed page allocated for stack.
      DoMMap(STACK_OFFSET, StackSize());
    }

    LoadMemory();

    return true;
  }

  void LoadMemory() {
    // Memory base here starts at the start location we passed back with GetLayout()
    // This will write at [CODE_START_RANGE + 0, RawFile.size() )
    Config.LoadMemory();
  }

  fextl::vector<std::pair<std::string_view, std::string_view>> GetEnvironmentOptions() {
    return Config.GetEnvironmentOptions();
  }

  bool CompareStates(const FEXCore::Core::CPUState* State1, const FEXCore::Core::CPUState* State2, bool SupportsAVX) {
    return Config.CompareStates(State1, State2, SupportsAVX);
  }

  bool Is64BitMode() const {
    return Config.Is64BitMode();
  }
  bool Requires3DNow() const {
    return Config.Requires3DNow();
  }
  bool RequiresSSE4A() const {
    return Config.RequiresSSE4A();
  }
  bool RequiresAVX() const {
    return Config.RequiresAVX();
  }
  bool RequiresRAND() const {
    return Config.RequiresRAND();
  }
  bool RequiresSHA() const {
    return Config.RequiresSHA();
  }
  bool RequiresCLZERO() const {
    return Config.RequiresCLZERO();
  }
  bool RequiresBMI1() const {
    return Config.RequiresBMI1();
  }
  bool RequiresBMI2() const {
    return Config.RequiresBMI2();
  }
  bool RequiresCLWB() const {
    return Config.RequiresCLWB();
  }
  bool RequiresLinux() const {
    return Config.RequiresLinux();
  }
  bool RequiresAES256() const {
    return Config.RequiresAES256();
  }
  bool RequiresAFP() const {
    return Config.RequiresAFP();
  }
  bool RequiresSSSE3() const {
    return Config.RequiresSSSE3();
  }
  bool RequiresSSE4_1() const {
    return Config.RequiresSSE4_1();
  }
  bool RequiresSSE4_2() const {
    return Config.RequiresSSE4_2();
  }
  bool RequiresAES() const {
    return Config.RequiresAES();
  }
  bool RequiresPCLMUL() const {
    return Config.RequiresPCLMUL();
  }
  bool RequiresMOVBE() const {
    return Config.RequiresMOVBE();
  }
  bool RequiresADX() const {
    return Config.RequiresADX();
  }
  bool RequiresXSAVE() const {
    return Config.RequiresXSAVE();
  }
  bool RequiresRDPID() const {
    return Config.RequiresRDPID();
  }
  bool RequiresCLFLOPT() const {
    return Config.RequiresCLFLOPT();
  }
  bool RequiresFSGSBase() const {
    return Config.RequiresFSGSBase();
  }
  bool RequiresEMMI() const {
    return Config.RequiresEMMI();
  }

private:
  constexpr static uint64_t STACK_OFFSET = 0xc000'0000;
  // Zero is special case to know when we are done
  uint64_t Code_start_page = 0x1'0000;
  uint64_t RIP {};

  fextl::vector<char> RawASMFile;
  ConfigLoader Config;
};

} // namespace FEX::HarnessHelper


================================================
FILE: Source/Tools/CommonTools/Linux/Utils/ELFContainer.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|elf-parsing
desc: Loads and parses an elf to memory. Also handles some loading & logic.
$end_info$
*/

#include "Linux/Utils/ELFContainer.h"
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/SymlinkChecks.h>

#include <algorithm>
#include <cstring>
#include <elf.h>
#include <fcntl.h>
#include <memory>
#include <linux/limits.h>
#include <system_error>
#include <sys/stat.h>
#include <unistd.h>

namespace ELFLoader {

static ELFContainer::ELFType CheckELFType(uint8_t* Data) {
  if (Data[EI_MAG0] != ELFMAG0 || Data[EI_MAG1] != ELFMAG1 || Data[EI_MAG2] != ELFMAG2 || Data[EI_MAG3] != ELFMAG3) {
    return ELFContainer::ELFType::TYPE_NONE;
  }

  if (Data[EI_CLASS] == ELFCLASS32) {
    Elf32_Ehdr* Header = reinterpret_cast<Elf32_Ehdr*>(Data);
    if (Header->e_machine == EM_386) {
      return ELFContainer::ELFType::TYPE_X86_32;
    }
  } else if (Data[EI_CLASS] == ELFCLASS64) {
    Elf64_Ehdr* Header = reinterpret_cast<Elf64_Ehdr*>(Data);
    if (Header->e_machine == EM_X86_64) {
      return ELFContainer::ELFType::TYPE_X86_64;
    }
  }

  return ELFContainer::ELFType::TYPE_OTHER_ELF;
}

ELFContainer::ELFType ELFContainer::GetELFType(const fextl::string& Filename) {
  // Open the Filename to determine if it is a shebang file.
  int FD = open(Filename.c_str(), O_RDONLY | O_CLOEXEC);
  if (FD == -1) {
    return ELFType::TYPE_NONE;
  }

  auto ELFType = GetELFType(FD);
  close(FD);
  return ELFType;
}

ELFContainer::ELFType ELFContainer::GetELFType(int FD) {
  // We don't know the state of the FD coming in since this might be a guest tracked FD.
  // Need to be extra careful here not to adjust file offsets and status flags.
  //
  // We can't use dup since that makes the FD have the same underlying state backing both FDs.

  // We need to first determine the file size through fstat.
  struct stat buf {};
  if (fstat(FD, &buf) == -1) {
    // Couldn't get size.
    return ELFType::TYPE_NONE;
  }

  constexpr size_t ELFHeaderSize = std::max(sizeof(Elf32_Ehdr), sizeof(Elf64_Ehdr));
  if (buf.st_size < ELFHeaderSize) {
    // Is not a valid ELF.
    return ELFType::TYPE_NONE;
  }

  std::array<char, ELFHeaderSize> RawFile;

  // Read the header so we can tell if it is a supported ELF file.
  // Can't adjust file offset, so use pread.
  if (pread(FD, RawFile.data(), RawFile.size(), 0) != RawFile.size()) {
    // Couldn't read
    LogMan::Msg::EFmt("Couldn't read potential ELF FD");
    return ELFType::TYPE_NONE;
  }

  return CheckELFType(reinterpret_cast<uint8_t*>(RawFile.data()));
}

ELFContainer::ELFContainer(const fextl::string& Filename, const fextl::string& RootFS, bool CustomInterpreter) {
  Loaded = true;
  if (!LoadELF(Filename)) {
    LogMan::Msg::EFmt("Couldn't Load ELF file");
    Loaded = false;
    return;
  }

  if (InterpreterHeader._64 && !CustomInterpreter) {
    // If we we are dynamic application then we have an interpreter program header
    // We need to load that ELF instead if it exists
    // We are no longer dynamic since we are executing the interpreter
    const char* RawString {};
    if (Mode == MODE_32BIT) {
      RawString = &RawFile.at(InterpreterHeader._32->p_offset);
    } else {
      RawString = &RawFile.at(InterpreterHeader._64->p_offset);
    }
    fextl::string RootFSLink = RootFS + RawString;
    char Filename[PATH_MAX];
    while (FHU::Symlinks::IsSymlink(RootFSLink)) {
      // Do some special handling if the RootFS's linker is a symlink
      // Ubuntu's rootFS by default provides an absolute location symlink to the linker
      // Resolve this around back to the rootfs
      const auto SymlinkTarget = FHU::Symlinks::ResolveSymlink(RootFSLink, Filename);
      if (FHU::Filesystem::IsAbsolute(SymlinkTarget)) {
        RootFSLink = RootFS;
        RootFSLink += SymlinkTarget;
      } else {
        break;
      }
    }
    if (LoadELF(RootFSLink)) {
      // Found the interpreter in the rootfs
    } else if (!LoadELF(RawString)) {
      LogMan::Msg::EFmt("Failed to find guest ELF's interpter '{}'", RawString);
      LogMan::Msg::EFmt("Did you forget to set an x86 rootfs? Currently '{}'", RootFS);
      Loaded = false;
      return;
    }
  } else if (InterpreterHeader._64) {
    GetDynamicLibs();
  }


  CalculateMemoryLayouts();
  CalculateSymbols();
}

ELFContainer::~ELFContainer() {
  NecessaryLibs.clear();
  SymbolMapByAddress.clear();
  SymbolMap.clear();
  Symbols.clear();
  ProgramHeaders.clear();
  SectionHeaders.clear();
  RawFile.clear();
}

bool ELFContainer::LoadELF(const fextl::string& Filename) {
  if (!FEXCore::FileLoading::LoadFile(RawFile, Filename)) {
    return false;
  }

  InterpreterHeader._64 = nullptr;

  SectionHeaders.clear();
  ProgramHeaders.clear();

  uint8_t* Ident = reinterpret_cast<uint8_t*>(RawFile.data());

  if (Ident[EI_MAG0] != ELFMAG0 || Ident[EI_MAG1] != ELFMAG1 || Ident[EI_MAG2] != ELFMAG2 || Ident[EI_MAG3] != ELFMAG3) {
    LogMan::Msg::EFmt("ELF missing magic cookie");
    return false;
  }

  if (Ident[EI_CLASS] == ELFCLASS32) {
    return LoadELF_32();
  } else if (Ident[EI_CLASS] == ELFCLASS64) {
    return LoadELF_64();
  }

  LogMan::Msg::EFmt("Unknown ELF type");
  return false;
}

bool ELFContainer::LoadELF_32() {
  Mode = MODE_32BIT;

  memcpy(&Header, reinterpret_cast<Elf32_Ehdr*>(RawFile.data()), sizeof(Elf32_Ehdr));
  LOGMAN_THROW_A_FMT(Header._32.e_phentsize == sizeof(Elf32_Phdr), "PH Entry size wasn't correct size");
  LOGMAN_THROW_A_FMT(Header._32.e_shentsize == sizeof(Elf32_Shdr), "PH Entry size wasn't correct size");

  if (Header._32.e_machine != EM_386) {
    LogMan::Msg::DFmt("32bit ELF wasn't x86 based");
    return false;
  }

  SectionHeaders.resize(Header._32.e_shnum);
  ProgramHeaders.resize(Header._32.e_phnum);

  Elf32_Shdr* RawShdrs = reinterpret_cast<Elf32_Shdr*>(&RawFile.at(Header._32.e_shoff));
  Elf32_Phdr* RawPhdrs = reinterpret_cast<Elf32_Phdr*>(&RawFile.at(Header._32.e_phoff));

  for (uint32_t i = 0; i < Header._32.e_shnum; ++i) {
    SectionHeaders[i]._32 = &RawShdrs[i];
  }

  for (uint32_t i = 0; i < Header._32.e_phnum; ++i) {
    ProgramHeaders[i]._32 = &RawPhdrs[i];
    if (ProgramHeaders[i]._32->p_type == PT_INTERP) {
      InterpreterHeader = ProgramHeaders[i];
      DynamicLinker = reinterpret_cast<const char*>(&RawFile.at(InterpreterHeader._32->p_offset));
    }
  }

  DynamicProgram = Header._32.e_type != ET_EXEC;

  // Default BRK size
  BRKSize = FEXCore::Utils::FEX_PAGE_SIZE;

  return true;
}

bool ELFContainer::LoadELF_64() {
  Mode = MODE_64BIT;

  memcpy(&Header, reinterpret_cast<Elf64_Ehdr*>(RawFile.data()), sizeof(Elf64_Ehdr));
  LOGMAN_THROW_A_FMT(Header._64.e_phentsize == 56, "PH Entry size wasn't 56");
  LOGMAN_THROW_A_FMT(Header._64.e_shentsize == 64, "PH Entry size wasn't 64");

  if (Header._64.e_machine != EM_X86_64) {
    LogMan::Msg::DFmt("64bit ELF wasn't x86-64 based");
    return false;
  }

  SectionHeaders.resize(Header._64.e_shnum);
  ProgramHeaders.resize(Header._64.e_phnum);

  Elf64_Shdr* RawShdrs = reinterpret_cast<Elf64_Shdr*>(&RawFile.at(Header._64.e_shoff));
  Elf64_Phdr* RawPhdrs = reinterpret_cast<Elf64_Phdr*>(&RawFile.at(Header._64.e_phoff));

  for (uint32_t i = 0; i < Header._64.e_shnum; ++i) {
    SectionHeaders[i]._64 = &RawShdrs[i];
  }

  for (uint32_t i = 0; i < Header._64.e_phnum; ++i) {
    ProgramHeaders[i]._64 = &RawPhdrs[i];
    if (ProgramHeaders[i]._64->p_type == PT_INTERP) {
      InterpreterHeader = ProgramHeaders[i];
      DynamicLinker = reinterpret_cast<const char*>(&RawFile.at(InterpreterHeader._64->p_offset));
    }
  }

  DynamicProgram = Header._64.e_type != ET_EXEC;

  // Default BRK size
  BRKSize = 0x1000'0000;

  return true;
}

void ELFContainer::WriteLoadableSections(MemoryWriter Writer, uint64_t Offset) {
  if (Mode == MODE_32BIT) {
    for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) {
      const Elf32_Phdr* hdr = ProgramHeaders.at(i)._32;
      if (hdr->p_type == PT_LOAD) {
        // LogMan::Msg::DFmt("PT_LOAD: Base: {} Offset: [0x{:x}, 0x{:x})", Offset, hdr->p_paddr, hdr->p_filesz);
        Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz);
      }

      if (hdr->p_type == PT_TLS) {
        Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz);
      }
    }
  } else {
    for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) {
      const Elf64_Phdr* hdr = ProgramHeaders.at(i)._64;
      if (hdr->p_type == PT_LOAD) {
        Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz);
      }

      if (hdr->p_type == PT_TLS) {
        Writer(&RawFile.at(hdr->p_offset), Offset + hdr->p_paddr, hdr->p_filesz);
      }
    }
  }
}

const ELFSymbol* ELFContainer::GetSymbol(const char* Name) {
  auto Sym = SymbolMap.find(Name);
  if (Sym == SymbolMap.end()) {
    return nullptr;
  }
  return Sym->second;
}
const ELFSymbol* ELFContainer::GetSymbol(uint64_t Address) {
  auto Sym = SymbolMapByAddress.find(Address);
  if (Sym == SymbolMapByAddress.end()) {
    return nullptr;
  }
  return Sym->second;
}
const ELFSymbol* ELFContainer::GetSymbolInRange(RangeType Address) {
  auto Sym = SymbolMapByAddress.upper_bound(Address.first);
  if (Sym != SymbolMapByAddress.begin()) {
    --Sym;
  }
  if (Sym == SymbolMapByAddress.end()) {
    return nullptr;
  }

  if ((Sym->second->Address + Sym->second->Size) < Address.first) {
    return nullptr;
  }

  return Sym->second;
}

void ELFContainer::CalculateMemoryLayouts() {
  uint64_t MinPhysAddr = ~0ULL;
  uint64_t MaxPhysAddr = 0;
  uint64_t PhysMemSize = 0;

  if (Mode == MODE_32BIT) {
    for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) {
      Elf32_Phdr* hdr = ProgramHeaders.at(i)._32;
      if (hdr->p_memsz > 0) {
        MinPhysAddr = std::min(MinPhysAddr, static_cast<uint64_t>(hdr->p_paddr));
        MaxPhysAddr = std::max(MaxPhysAddr, static_cast<uint64_t>(hdr->p_paddr) + hdr->p_memsz);
      }
      if (hdr->p_type == PT_TLS) {
        TLSHeader._32 = hdr;
      }
    }
  } else {
    for (uint32_t i = 0; i < ProgramHeaders.size(); ++i) {
      Elf64_Phdr* hdr = ProgramHeaders.at(i)._64;

      // Many elfs have program region labeled .GNU_STACK which is empty and has a null address.
      // It's used to mark the memory protection flags of the stack.
      //
      // We need to ignore such empty sections, or we will mistakenly assume the elf starts at zero.
      if (hdr->p_memsz > 0) {
        MinPhysAddr = std::min(MinPhysAddr, static_cast<uint64_t>(hdr->p_paddr));
        MaxPhysAddr = std::max(MaxPhysAddr, static_cast<uint64_t>(hdr->p_paddr + hdr->p_memsz));
      }
      if (hdr->p_type == PT_TLS) {
        TLSHeader._64 = hdr;
      }
    }
  }

  // Calculate BRK
  MaxPhysAddr = FEXCore::AlignUp(MaxPhysAddr, FEXCore::Utils::FEX_PAGE_SIZE);
  BRKBase = MaxPhysAddr;
  MaxPhysAddr += BRKSize;

  PhysMemSize = MaxPhysAddr - MinPhysAddr;

  MinPhysicalMemoryLocation = MinPhysAddr;
  MaxPhysicalMemoryLocation = MaxPhysAddr;
  PhysicalMemorySize = PhysMemSize;
}

void ELFContainer::CalculateSymbols() {
  // Find the symbol table
  if (Mode == MODE_32BIT) {
    const Elf32_Shdr* SymTabHeader {nullptr};
    const Elf32_Shdr* StringTableHeader {nullptr};
    const char* StrTab {nullptr};

    const Elf32_Shdr* DynSymTabHeader {nullptr};
    const Elf32_Shdr* DynStringTableHeader {nullptr};
    const char* DynStrTab {nullptr};

    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (hdr->sh_type == SHT_SYMTAB) {
        SymTabHeader = hdr;
        break;
      }
    }

    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (hdr->sh_type == SHT_DYNSYM) {
        DynSymTabHeader = hdr;
        break;
      }
    }

    if (!SymTabHeader && !DynSymTabHeader) {
      LogMan::Msg::IFmt("No Symbol table");
      return;
    }

    uint64_t NumSymTabSymbols = 0;
    uint64_t NumDynSymSymbols = 0;
    if (SymTabHeader) {
      LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong");
      LOGMAN_THROW_A_FMT(SymTabHeader->sh_entsize == sizeof(Elf32_Sym), "Entry size doesn't match symbol entry");

      StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._32;
      StrTab = &RawFile.at(StringTableHeader->sh_offset);
      NumSymTabSymbols = SymTabHeader->sh_size / SymTabHeader->sh_entsize;
    }

    if (DynSymTabHeader) {
      LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong");
      LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf32_Sym), "Entry size doesn't match symbol entry");

      DynStringTableHeader = SectionHeaders.at(DynSymTabHeader->sh_link)._32;
      DynStrTab = &RawFile.at(DynStringTableHeader->sh_offset);
      NumDynSymSymbols = DynSymTabHeader->sh_size / DynSymTabHeader->sh_entsize;
    }

    uint64_t NumSymbols = NumSymTabSymbols + NumDynSymSymbols;

    Symbols.resize(NumSymbols);
    for (uint64_t i = 0; i < NumSymTabSymbols; ++i) {
      uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize;
      const Elf32_Sym* Symbol = reinterpret_cast<const Elf32_Sym*>(&RawFile.at(offset));
      if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) {
        const char* Name = &StrTab[Symbol->st_name];
        if (Name[0] != '\0') {
          ELFSymbol* DefinedSymbol = &Symbols.at(i);
          DefinedSymbol->FileOffset = offset;
          DefinedSymbol->Address = Symbol->st_value;
          DefinedSymbol->Size = Symbol->st_size;
          DefinedSymbol->Type = ELF32_ST_TYPE(Symbol->st_info);
          DefinedSymbol->Bind = ELF32_ST_BIND(Symbol->st_info);
          DefinedSymbol->Name = Name;
          DefinedSymbol->SectionIndex = Symbol->st_shndx;

          SymbolMap[DefinedSymbol->Name] = DefinedSymbol;
          SymbolMapByAddress[DefinedSymbol->Address] = DefinedSymbol;
        }
      }
    }

    for (uint64_t i = 0; i < NumDynSymSymbols; ++i) {
      uint64_t offset = DynSymTabHeader->sh_offset + i * DynSymTabHeader->sh_entsize;
      const Elf32_Sym* Symbol = reinterpret_cast<const Elf32_Sym*>(&RawFile.at(offset));
      if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) {
        const char* Name = &DynStrTab[Symbol->st_name];
        if (Name[0] != '\0') {
          ELFSymbol* DefinedSymbol = &Symbols.at(NumSymTabSymbols + i);
          DefinedSymbol->FileOffset = offset;
          DefinedSymbol->Address = Symbol->st_value;
          DefinedSymbol->Size = Symbol->st_size;
          DefinedSymbol->Type = ELF32_ST_TYPE(Symbol->st_info);
          DefinedSymbol->Bind = ELF32_ST_BIND(Symbol->st_info);
          DefinedSymbol->Name = Name;
          DefinedSymbol->SectionIndex = Symbol->st_shndx;

          SymbolMap[DefinedSymbol->Name] = DefinedSymbol;
          SymbolMapByAddress[DefinedSymbol->Address] = DefinedSymbol;
        }
      }
    }

    const Elf32_Shdr* StrHeader = SectionHeaders.at(Header._32.e_shstrndx)._32;
    const char* SHStrings = &RawFile.at(StrHeader->sh_offset);
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (strcmp(&SHStrings[hdr->sh_name], ".eh_frame_hdr") == 0) {
        auto eh_frame_hdr = &RawFile.at(hdr->sh_offset);
        // we only handle this specific unwind table encoding
        if (eh_frame_hdr[0] == 1 && eh_frame_hdr[1] == 0x1B && eh_frame_hdr[2] == 0x3 && eh_frame_hdr[3] == 0x3b) {
          // ptr enc : 4 bytes, signed, pcrel
          // fde count : 4 bytes udata
          // table enc : 4 bytes, signed, datarel
          int fde_count = *(int*)(eh_frame_hdr + 8);
          UnwindEntries.clear();
          UnwindEntries.reserve(fde_count);

          struct entry {
            int32_t pc;
            int32_t fde;
          };

          entry* Table = (entry*)(eh_frame_hdr + 12);
          for (int f = 0; f < fde_count; f++) {
            uintptr_t Entry = (uintptr_t)(Table[f].pc + hdr->sh_offset);
            UnwindEntries.push_back(Entry);
          }
        }
        break;
      }
    }
  } else {
    const Elf64_Shdr* SymTabHeader {nullptr};
    const Elf64_Shdr* StringTableHeader {nullptr};
    const char* StrTab {nullptr};

    const Elf64_Shdr* DynSymTabHeader {nullptr};
    const Elf64_Shdr* DynStringTableHeader {nullptr};
    const char* DynStrTab {nullptr};

    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (hdr->sh_type == SHT_SYMTAB) {
        SymTabHeader = hdr;
        break;
      }
    }

    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (hdr->sh_type == SHT_DYNSYM) {
        DynSymTabHeader = hdr;
        break;
      }
    }

    if (!SymTabHeader && !DynSymTabHeader) {
      LogMan::Msg::IFmt("No Symbol table");
      return;
    }

    uint64_t NumSymTabSymbols = 0;
    uint64_t NumDynSymSymbols = 0;
    if (SymTabHeader) {
      LOGMAN_THROW_A_FMT(SymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong");
      LOGMAN_THROW_A_FMT(SymTabHeader->sh_entsize == sizeof(Elf64_Sym), "Entry size doesn't match symbol entry");

      StringTableHeader = SectionHeaders.at(SymTabHeader->sh_link)._64;
      StrTab = &RawFile.at(StringTableHeader->sh_offset);
      NumSymTabSymbols = SymTabHeader->sh_size / SymTabHeader->sh_entsize;
    }

    if (DynSymTabHeader) {
      LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_link < SectionHeaders.size(), "Symbol table string table section is wrong");
      LOGMAN_THROW_A_FMT(DynSymTabHeader->sh_entsize == sizeof(Elf64_Sym), "Entry size doesn't match symbol entry");

      DynStringTableHeader = SectionHeaders.at(DynSymTabHeader->sh_link)._64;
      DynStrTab = &RawFile.at(DynStringTableHeader->sh_offset);
      NumDynSymSymbols = DynSymTabHeader->sh_size / DynSymTabHeader->sh_entsize;
    }

    uint64_t NumSymbols = NumSymTabSymbols + NumDynSymSymbols;

    Symbols.resize(NumSymbols);
    for (uint64_t i = 0; i < NumSymTabSymbols; ++i) {
      uint64_t offset = SymTabHeader->sh_offset + i * SymTabHeader->sh_entsize;
      const Elf64_Sym* Symbol = reinterpret_cast<const Elf64_Sym*>(&RawFile.at(offset));
      if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) {
        const char* Name = &StrTab[Symbol->st_name];
        if (Name[0] != '\0') {
          ELFSymbol* DefinedSymbol = &Symbols.at(i);
          DefinedSymbol->FileOffset = offset;
          DefinedSymbol->Address = Symbol->st_value;
          DefinedSymbol->Size = Symbol->st_size;
          DefinedSymbol->Type = ELF64_ST_TYPE(Symbol->st_info);
          DefinedSymbol->Bind = ELF64_ST_BIND(Symbol->st_info);
          DefinedSymbol->Name = Name;
          DefinedSymbol->SectionIndex = Symbol->st_shndx;

          SymbolMap[DefinedSymbol->Name] = DefinedSymbol;
          SymbolMapByAddress[DefinedSymbol->Address] = DefinedSymbol;
        }
      }
    }

    for (uint64_t i = 0; i < NumDynSymSymbols; ++i) {
      uint64_t offset = DynSymTabHeader->sh_offset + i * DynSymTabHeader->sh_entsize;
      const Elf64_Sym* Symbol = reinterpret_cast<const Elf64_Sym*>(&RawFile.at(offset));
      if (ELF64_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) {
        const char* Name = &DynStrTab[Symbol->st_name];
        if (Name[0] != '\0') {
          ELFSymbol* DefinedSymbol = &Symbols.at(NumSymTabSymbols + i);
          DefinedSymbol->FileOffset = offset;
          DefinedSymbol->Address = Symbol->st_value;
          DefinedSymbol->Size = Symbol->st_size;
          DefinedSymbol->Type = ELF64_ST_TYPE(Symbol->st_info);
          DefinedSymbol->Bind = ELF64_ST_BIND(Symbol->st_info);
          DefinedSymbol->Name = Name;
          DefinedSymbol->SectionIndex = Symbol->st_shndx;

          SymbolMap[DefinedSymbol->Name] = DefinedSymbol;
          SymbolMapByAddress[DefinedSymbol->Address] = DefinedSymbol;
        }
      }
    }

    const Elf64_Shdr* StrHeader = SectionHeaders.at(Header._64.e_shstrndx)._64;
    const char* SHStrings = &RawFile.at(StrHeader->sh_offset);
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (strcmp(&SHStrings[hdr->sh_name], ".eh_frame_hdr") == 0) {
        auto eh_frame_hdr = &RawFile.at(hdr->sh_offset);
        // we only handle this specific unwind table encoding
        if (eh_frame_hdr[0] == 1 && eh_frame_hdr[1] == 0x1B && eh_frame_hdr[2] == 0x3 && eh_frame_hdr[3] == 0x3b) {
          // ptr enc : 4 bytes, signed, pcrel
          // fde count : 4 bytes udata
          // table enc : 4 bytes, signed, datarel
          int fde_count = *(int*)(eh_frame_hdr + 8);
          UnwindEntries.clear();
          UnwindEntries.reserve(fde_count);

          struct entry {
            int32_t pc;
            int32_t fde;
          };

          entry* Table = (entry*)(eh_frame_hdr + 12);
          for (int f = 0; f < fde_count; f++) {
            uintptr_t Entry = (uintptr_t)(Table[f].pc + hdr->sh_offset);
            UnwindEntries.push_back(Entry);
          }
        }
        break;
      }
    }
  }
}

void ELFContainer::GetDynamicLibs() {
  if (Mode == MODE_32BIT) {
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (hdr->sh_type == SHT_DYNAMIC) {
        const Elf32_Shdr* StrHeader = SectionHeaders.at(hdr->sh_link)._32;
        const char* SHStrings = &RawFile.at(StrHeader->sh_offset);

        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; i < Entries; ++j) {
          const Elf32_Dyn* Dynamic = reinterpret_cast<const Elf32_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize));
          if (Dynamic->d_tag == DT_NULL) {
            break;
          }
          if (Dynamic->d_tag == DT_NEEDED) {
            NecessaryLibs.emplace_back(&SHStrings[Dynamic->d_un.d_val]);
          }
        }
      }
    }
  } else {
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (hdr->sh_type == SHT_DYNAMIC) {
        const Elf64_Shdr* StrHeader = SectionHeaders.at(hdr->sh_link)._64;
        const char* SHStrings = &RawFile.at(StrHeader->sh_offset);

        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; i < Entries; ++j) {
          const Elf64_Dyn* Dynamic = reinterpret_cast<const Elf64_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize));
          if (Dynamic->d_tag == DT_NULL) {
            break;
          }
          if (Dynamic->d_tag == DT_NEEDED) {
            NecessaryLibs.emplace_back(&SHStrings[Dynamic->d_un.d_val]);
          }
        }
      }
    }
  }
}

void ELFContainer::AddSymbols(SymbolAdder Adder) {
  for (auto& Sym : Symbols) {
    if (Sym.FileOffset) {
      Adder(&Sym);
    }
  }
}
void ELFContainer::AddUnwindEntries(UnwindAdder Adder) {
  for (auto Entry : UnwindEntries) {
    Adder(Entry);
  }
}

void ELFContainer::FixupRelocations(void* ELFBase, uint64_t GuestELFBase, SymbolGetter Getter) {
  if (Mode == MODE_32BIT) {
  } else {
    const Elf64_Shdr* RelaHeader {nullptr};
    const Elf64_Shdr* DynSymHeader {nullptr};

    const Elf64_Shdr* StringTableHeader {nullptr};
    const char* StrTab {nullptr};

    for (size_t i = 0; i < SectionHeaders.size(); ++i) {
      const auto* hdr = SectionHeaders[i]._64;
      if (hdr->sh_type == SHT_REL) {
        LogMan::Msg::DFmt("Unhandled REL section");
      } else if (hdr->sh_type == SHT_RELA) {
        RelaHeader = hdr;

        if (RelaHeader->sh_info != 0) {
          LOGMAN_THROW_A_FMT(RelaHeader->sh_info < SectionHeaders.size(), "Rela header pointers to invalid GOT header");
        }

        if (RelaHeader->sh_link != 0) {
          LOGMAN_THROW_A_FMT(RelaHeader->sh_link < SectionHeaders.size(), "Rela header pointers to invalid dyndym header");
          DynSymHeader = SectionHeaders.at(RelaHeader->sh_link)._64;

          StringTableHeader = SectionHeaders.at(DynSymHeader->sh_link)._64;
          StrTab = &RawFile.at(StringTableHeader->sh_offset);
        }

        const size_t EntryCount = RelaHeader->sh_size / RelaHeader->sh_entsize;
        const auto* Entries = reinterpret_cast<const Elf64_Rela*>(&RawFile.at(RelaHeader->sh_offset));

        for (size_t j = 0; j < EntryCount; ++j) {
          const auto* Entry = &Entries[j];
          const uint32_t Sym = Entry->r_info >> 32;
          const uint32_t Type = Entry->r_info & ~0U;
          const Elf64_Sym* EntrySymbol {nullptr};
          const char* EntrySymbolName {nullptr};
          if (DynSymHeader && Sym != 0) {
            LOGMAN_THROW_A_FMT(DynSymHeader->sh_entsize == sizeof(Elf64_Sym), "Oops, entry size doesn't match");

            const uint64_t offset = DynSymHeader->sh_offset + Sym * DynSymHeader->sh_entsize;
            EntrySymbol = reinterpret_cast<const Elf64_Sym*>(&RawFile.at(offset));
            EntrySymbolName = &StrTab[EntrySymbol->st_name];
          }

          if (Type == R_X86_64_IRELATIVE) { // 37/0x25
            // Indirect (B + A)
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            *Location = GuestELFBase + Entry->r_addend;
          } else if (Type == R_X86_64_64) {
            // S + A
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            if (EntrySymbol != nullptr) {
              auto ELFSym = Getter(EntrySymbolName, 0);
              if (ELFSym != nullptr) {
                *Location = ELFSym->Address + Entry->r_addend;
              } else {
                *Location = 0xDEADBEEFBAD0DAD2ULL;
              }
            } else {
              *Location = 0xDEADBEEFBAD0DAD2ULL;
            }
          } else if (Type == R_X86_64_RELATIVE) {
            // B + A
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            *Location = GuestELFBase + Entry->r_addend;
          } else if (Type == R_X86_64_GLOB_DAT) {
            // XXX: This is way wrong
            // S
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            if (EntrySymbol != nullptr) {
              auto ELFSym = Getter(EntrySymbolName, 2); // Leave out Symbols from the main executable and only grab non-weak

              if (!ELFSym) {
                ELFSym = Getter(EntrySymbolName, 0);
              }
              if (!ELFSym) {
                ELFSym = Getter(EntrySymbolName, 3);
              }

              if (ELFSym != nullptr) {
                *Location = ELFSym->Address;
              } else {
                // XXX: This seems to be a loader edge case that if the symbol doesn't exist
                // and it is a weakly defined GLOB_DAT type then it is allowed to continue?
                // If we set Location to a value then apps crash
              }
            } else {
              *Location = 0xDEADBEEFBAD0DAD1ULL;
            }
          } else if (Type == R_X86_64_JUMP_SLOT) {
            // S
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            if (EntrySymbol != nullptr) {
              auto ELFSym = Getter(EntrySymbolName, 0);
              if (!ELFSym) { // XXX: Try again
                ELFSym = Getter(EntrySymbolName, 3);
              }

              if (ELFSym != nullptr) {
                *Location = ELFSym->Address;
              } else {
                // XXX: This seems to be a loader edge case that if the symbol doesn't exist
                // and it is a weakly defined GLOB_DAT type then it is allowed to continue?
                *Location = 0xDEADBEEFBAD0DAD5ULL;
              }
            } else {
              *Location = 0xDEADBEEFBAD0DAD4ULL;
            }
          } else if (Type == R_X86_64_DTPMOD64) {
            // XXX: This is supposed to be the ID of the module that the symbol comes from for TLS purposes?
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            *Location = 0;
          } else if (Type == R_X86_64_DTPOFF64) {
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            if (EntrySymbol != nullptr) {
              *Location = EntrySymbol->st_value + Entry->r_addend;
            } else {
              *Location = 0xDEADBEEFBAD0DAD6ULL;
            }
          } else if (Type == R_X86_64_TPOFF64) {
            uint64_t* Location = reinterpret_cast<uint64_t*>(reinterpret_cast<uintptr_t>(ELFBase) + Entry->r_offset);
            if (EntrySymbol != nullptr) {
              // XXX: This is supposed to be a symbol with a TLS offset?
              *Location = EntrySymbol->st_value + Entry->r_addend;
            } else {
              // If we set Location to a value then apps crash
              // *Location = 0xDEADBEEFBAD0DAD3ULL;
              LogMan::Msg::DFmt("TPOFF without Entry? {:x} + {:x} + {:x}", GuestELFBase, TLSHeader._64->p_paddr, Entry->r_addend);
              if (1) {
                *Location = TLSHeader._64->p_paddr + Entry->r_addend;
              } else if (Entry->r_offset == 0x1e3dc8) {
                *Location = 0xDEADBEEFBAD0DAD8ULL;
              } else {
                *Location = Entry->r_addend - 0xb00'0;
              }
            }
          } else {
            LogMan::Msg::DFmt("Unknown relocation type: {}(0x{:x})", Type, Type);
          }
        }
      }
    }
  }
}

void ELFContainer::GetInitLocations(uint64_t GuestELFBase, fextl::vector<uint64_t>* Locations) {
  if (Mode == MODE_32BIT) {
    // If INIT exists then add that first
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (hdr->sh_type == SHT_DYNAMIC) {
        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; i < Entries; ++j) {
          const Elf32_Dyn* Dynamic = reinterpret_cast<const Elf32_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize));
          if (Dynamic->d_tag == DT_NULL) {
            break;
          }
          if (Dynamic->d_tag == DT_INIT) {
            Locations->emplace_back(GuestELFBase + Dynamic->d_un.d_val);
          }
        }
      }
    }

    // Fill init_array
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf32_Shdr* hdr = SectionHeaders.at(i)._32;
      if (hdr->sh_type == SHT_INIT_ARRAY) {
        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; j < Entries; ++j) {
          Locations->emplace_back(GuestELFBase + *reinterpret_cast<const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)));
        }
      }
    }
  } else {
    // If INIT exists then add that first
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (hdr->sh_type == SHT_DYNAMIC) {
        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; i < Entries; ++j) {
          const Elf64_Dyn* Dynamic = reinterpret_cast<const Elf64_Dyn*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize));
          if (Dynamic->d_tag == DT_NULL) {
            break;
          }
          if (Dynamic->d_tag == DT_INIT) {
            Locations->emplace_back(GuestELFBase + Dynamic->d_un.d_val);
          }
        }
      }
    }

    // Fill init_array
    for (uint32_t i = 0; i < SectionHeaders.size(); ++i) {
      const Elf64_Shdr* hdr = SectionHeaders.at(i)._64;
      if (hdr->sh_type == SHT_INIT_ARRAY) {
        size_t Entries = hdr->sh_size / hdr->sh_entsize;
        for (size_t j = 0; j < Entries; ++j) {
          Locations->emplace_back(GuestELFBase + *reinterpret_cast<const uint64_t*>(&RawFile.at(hdr->sh_offset + j * hdr->sh_entsize)));
        }
      }
    }
  }
}

} // namespace ELFLoader


================================================
FILE: Source/Tools/CommonTools/Linux/Utils/ELFContainer.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <elf.h>
#include <functional>
#include <stddef.h>
#include <utility>

// Add macros which are missing in some versions of <elf.h>
#ifndef ELF32_ST_VISIBILITY
#define ELF32_ST_VISIBILITY(o) ((o) & 0x3)
#endif

#ifndef ELF64_ST_VISIBILITY
#define ELF64_ST_VISIBILITY(o) ((o) & 0x3)
#endif

namespace ELFLoader {
struct ELFSymbol {
  uint64_t FileOffset;
  uint64_t Address;
  uint64_t Size;
  uint8_t Type;
  uint8_t Bind;
  uint16_t SectionIndex;
  const char* Name;
};

class ELFContainer {
public:
  ELFContainer(const fextl::string& Filename, const fextl::string& RootFS, bool CustomInterpreter);
  ~ELFContainer();

  uint64_t GetEntryPoint() const {
    if (Mode == MODE_32BIT) {
      return Header._32.e_entry;
    } else {
      return Header._64.e_entry;
    }
  }

  struct MemoryLayout final {
    uint64_t MinPhysicalMemoryLocation;
    uint64_t MaxPhysicalMemoryLocation;
    uint64_t PhysicalMemorySize;
  };

  MemoryLayout GetLayout() const {
    return {MinPhysicalMemoryLocation, MaxPhysicalMemoryLocation, PhysicalMemorySize};
  }

  struct BRKInfo {
    uint64_t Base;
    uint64_t Size;
  };

  BRKInfo GetBRKInfo() const {
    return {BRKBase, BRKSize};
  }

  // Data, Physical, Size
  using MemoryWriter = std::function<void(void*, uint64_t, uint64_t)>;
  void WriteLoadableSections(MemoryWriter Writer, uint64_t Offset = 0);

  const ELFSymbol* GetSymbol(const char* Name);
  const ELFSymbol* GetSymbol(uint64_t Address);

  using RangeType = std::pair<uint64_t, uint64_t>;
  const ELFSymbol* GetSymbolInRange(RangeType Address);

  bool WasDynamic() const {
    return DynamicProgram;
  }
  bool HasDynamicLinker() const {
    return !DynamicLinker.empty();
  }
  bool WasLoaded() const {
    return Loaded;
  }
  fextl::string& InterpreterLocation() {
    return DynamicLinker;
  }

  const fextl::vector<const char*>* GetNecessaryLibs() const {
    return &NecessaryLibs;
  }

  using SymbolGetter = std::function<ELFSymbol*(const char*, uint8_t)>;
  void FixupRelocations(void* ELFBase, uint64_t GuestELFBase, SymbolGetter Getter);

  using SymbolAdder = std::function<void(ELFSymbol*)>;
  void AddSymbols(SymbolAdder Adder);

  using UnwindAdder = std::function<void(uintptr_t)>;
  void AddUnwindEntries(UnwindAdder Adder);

  void GetInitLocations(uint64_t GuestELFBase, fextl::vector<uint64_t>* Locations);


  bool HasTLS() const {
    return TLSHeader._64 != nullptr;
  }
  uint64_t GetTLSBase() const {
    if (GetMode() == ELFMode::MODE_64BIT) {
      return TLSHeader._64->p_vaddr;
    } else {
      return TLSHeader._32->p_vaddr;
    }
  }

  enum ELFMode {
    MODE_32BIT,
    MODE_64BIT,
  };

  ELFMode GetMode() const {
    return Mode;
  }
  size_t GetProgramHeaderCount() const {
    return ProgramHeaders.size();
  }

  enum ELFType {
    TYPE_NONE,
    TYPE_X86_64,
    TYPE_X86_32,
    TYPE_OTHER_ELF,
  };
  static ELFType GetELFType(const fextl::string& Filename);
  static ELFType GetELFType(int FD);
  static bool IsSupportedELF(const fextl::string& Filename) {
    ELFType Type = GetELFType(Filename);
    return Type == TYPE_X86_64 || Type == TYPE_X86_32;
  }

private:
  bool LoadELF(const fextl::string& Filename);
  bool LoadELF_32();
  bool LoadELF_64();
  void CalculateMemoryLayouts();
  void CalculateSymbols();
  void GetDynamicLibs();

  fextl::vector<char> RawFile;
  union {
    Elf32_Ehdr _32;
    Elf64_Ehdr _64;
  } Header;

  union SectionHeader {
    Elf32_Shdr* _32;
    Elf64_Shdr* _64;
  };

  union ProgramHeader {
    Elf32_Phdr* _32;
    Elf64_Phdr* _64;
  };

  ELFMode Mode;
  fextl::vector<SectionHeader> SectionHeaders;
  fextl::vector<ProgramHeader> ProgramHeaders;
  fextl::vector<ELFSymbol> Symbols;
  fextl::vector<uintptr_t> UnwindEntries;
  fextl::unordered_map<fextl::string, ELFSymbol*> SymbolMap;
  fextl::map<uint64_t, ELFSymbol*> SymbolMapByAddress;

  fextl::vector<const char*> NecessaryLibs;

  uint64_t MinPhysicalMemoryLocation {0};
  uint64_t MaxPhysicalMemoryLocation {0};
  uint64_t PhysicalMemorySize {0};

  uint64_t BRKBase {};
  uint64_t BRKSize {};
  ProgramHeader InterpreterHeader {};
  bool DynamicProgram {false};
  fextl::string DynamicLinker;
  ProgramHeader TLSHeader {};
  bool Loaded {false};
};

} // namespace ELFLoader


================================================
FILE: Source/Tools/CommonTools/Linux/Utils/ELFParser.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <elf.h>
#include <fcntl.h>
#include <optional>
#include <unistd.h>

#include "Linux/Utils/ELFContainer.h"

/*
  Simpler elf parser, checks for the elf MAGIC COOKIE
  and loads the phdrs
  Also keeps an fd open
*/

struct ELFParser {
  Elf64_Ehdr ehdr;
  fextl::vector<Elf64_Phdr> phdrs;
  std::optional<fextl::vector<Elf64_Shdr>> shdrs;
  ::ELFLoader::ELFContainer::ELFType type {::ELFLoader::ELFContainer::TYPE_NONE};

  fextl::string InterpreterElf;
  int fd {-1};

  bool ReadElf(int NewFD) {
    Closefd();
    static_assert(EI_CLASS == 4);

    fd = NewFD;
    type = ::ELFLoader::ELFContainer::TYPE_NONE;
    shdrs.reset();

    if (fd == -1) {
      // Likely just doesn't exist
      return false;
    }

    // Get file size
    off_t Size = lseek(fd, 0, SEEK_END);

    if (Size < 4) {
      // Likely invalid can't fit header
      return false;
    }

    // Reset to beginning
    if (lseek(fd, 0, SEEK_SET) == -1) {
      return false;
    }

    uint8_t header[5];
    if (pread(fd, header, sizeof(header), 0) == -1) {
      LogMan::Msg::EFmt("Failed to read elf header from '{}'", fd);
      return false;
    }

    if (header[0] != ELFMAG0 || header[1] != ELFMAG1 || header[2] != ELFMAG2 || header[3] != ELFMAG3) {
      LogMan::Msg::EFmt("Elf header from '{}' doesn't match ELF MAGIC", fd);
      return false;
    }

    type = ::ELFLoader::ELFContainer::TYPE_OTHER_ELF;

    if (header[EI_CLASS] == ELFCLASS32) {
      Elf32_Ehdr hdr32;
      if (pread(fd, &hdr32, sizeof(hdr32), 0) == -1) {
        LogMan::Msg::EFmt("Failed to read Ehdr32 from '{}'", fd);
        return false;
      }

      // do the sizes match up as expected?

      // check elf header
      if (hdr32.e_ehsize != sizeof(hdr32)) {
        LogMan::Msg::EFmt("Invalid e_ehsize32 from '{}'", fd);
        return false;
      }

      // check program header
      if (hdr32.e_phentsize != sizeof(Elf32_Phdr)) {
        LogMan::Msg::EFmt("Invalid e_phentsize32 from '{}'", fd);
        return false;
      }

      // Convert to 64 bit header
      for (int i = 0; i < EI_NIDENT; i++) {
        ehdr.e_ident[i] = hdr32.e_ident[i];
      }

#define COPY(name) ehdr.name = hdr32.name
      COPY(e_type);
      COPY(e_machine);
      COPY(e_version);
      COPY(e_entry);
      COPY(e_phoff);
      COPY(e_shoff);
      COPY(e_flags);
      COPY(e_ehsize);
      COPY(e_phentsize);
      COPY(e_phnum);
      COPY(e_shentsize);
      COPY(e_shnum);
      COPY(e_shstrndx);
#undef COPY

      if (ehdr.e_machine != EM_386) {
        LogMan::Msg::EFmt("Invalid e_machine from '{}'", fd);
        return false;
      }

      type = ::ELFLoader::ELFContainer::TYPE_X86_32;
    } else if (header[EI_CLASS] == ELFCLASS64) {
      if (pread(fd, &ehdr, sizeof(ehdr), 0) == -1) {
        LogMan::Msg::EFmt("Failed to read Ehdr64 from '{}'", fd);
        return false;
      }

      // do the sizes match up as expected?

      // check elf header
      if (ehdr.e_ehsize != sizeof(ehdr)) {
        LogMan::Msg::EFmt("Invalid e_ehsize64 from '{}'", fd);
        return false;
      }

      // check program header
      if (ehdr.e_phentsize != sizeof(Elf64_Phdr)) {
        LogMan::Msg::EFmt("Invalid e_phentsize64 from '{}'", fd);
        return false;
      }

      if (ehdr.e_machine != EM_X86_64) {
        LogMan::Msg::EFmt("Invalid e_machine64 from '{}'", fd);
        return false;
      }

      type = ::ELFLoader::ELFContainer::TYPE_X86_64;
    } else {
      // Unexpected elf type
      LogMan::Msg::EFmt("Unexpected elf type from '{}'", fd);
      return false;
    }

    // sanity check program header count
    if (ehdr.e_phnum < 1 || ehdr.e_phnum > 65536 / ehdr.e_phentsize) {
      LogMan::Msg::EFmt("Too many program headers '{}'", fd);
      return false;
    }

    // sanity check program header offset size.
    if (ehdr.e_phoff > Size || (ehdr.e_phentsize * ehdr.e_phnum) > (Size - ehdr.e_phoff)) {
      LogMan::Msg::EFmt("Program headers exceeds size of program");
      return false;
    }

    if (type == ::ELFLoader::ELFContainer::TYPE_X86_32) {
      fextl::vector<Elf32_Phdr> phdrs32(ehdr.e_phnum);

      if (pread(fd, phdrs32.data(), sizeof(Elf32_Phdr) * ehdr.e_phnum, ehdr.e_phoff) == -1) {
        LogMan::Msg::EFmt("Failed to read phdr32 from '{}'", fd);
        return false;
      }

      // Convert to 64 bit program headers
      phdrs.resize(ehdr.e_phnum);

      for (int i = 0; i < ehdr.e_phnum; i++) {
#define COPY(name) phdrs[i].name = phdrs32[i].name

        COPY(p_type);
        COPY(p_offset);
        COPY(p_vaddr);
        COPY(p_paddr);
        COPY(p_filesz);
        COPY(p_memsz);
        COPY(p_flags);
        COPY(p_align);

#undef COPY
      }
    } else {
      phdrs.resize(ehdr.e_phnum);

      if (pread(fd, phdrs.data(), sizeof(Elf64_Phdr) * ehdr.e_phnum, ehdr.e_phoff) == -1) {
        LogMan::Msg::EFmt("Failed to read phdr64 from '{}'", fd);
        return false;
      }
    }

    for (const auto& phdr : phdrs) {
      if (phdr.p_type == PT_INTERP) {
        InterpreterElf.resize(phdr.p_filesz);

        if (pread(fd, InterpreterElf.data(), phdr.p_filesz, phdr.p_offset) == -1) {
          LogMan::Msg::EFmt("Failed to read interpreter from '{}'", fd);
          return false;
        }
      }
    }

    return true;
  }

  ptrdiff_t FileToVA(off_t FileOffset) const {
    for (const auto& phdr : phdrs) {
      if (phdr.p_offset <= FileOffset && (phdr.p_offset + phdr.p_filesz) > FileOffset) {
        auto SectionFileOffset = FileOffset - phdr.p_offset;

        if (SectionFileOffset < phdr.p_memsz) {
          return SectionFileOffset + phdr.p_vaddr;
        }
      }
    }

    return {};
  }

  off_t VAToFile(ptrdiff_t VAOffset) const {
    for (const auto& phdr : phdrs) {
      if (phdr.p_vaddr <= VAOffset && (phdr.p_vaddr + phdr.p_memsz) > VAOffset) {
        auto SectionVAOffset = VAOffset - phdr.p_vaddr;

        if (SectionVAOffset < phdr.p_filesz) {
          return SectionVAOffset + phdr.p_offset;
        }
      }
    }

    return {};
  }

  bool ReadElf(const fextl::string& file) {
    int NewFD = ::open(file.c_str(), O_RDONLY);

    return ReadElf(NewFD);
  }

  /**
   * Checks if DT_TEXTREL/DF_TEXTREL exist in the PT_DYNAMIC segment.
   *
   * These indicate that the ELF has relocations that cover to read-only code
   * pages. The dynamic loader will temporarily map these pages as writeable
   * to apply the relocations.
   */
  bool HasCodeRelocations() const {
    if (fd == -1) {
      return false;
    }

    auto phdr_it = std::ranges::find_if(phdrs, [](auto& phdr) { return phdr.p_type == PT_DYNAMIC; });
    if (phdr_it == phdrs.end()) {
      return false;
    }

    if (type == ::ELFLoader::ELFContainer::TYPE_X86_32) {
      return HasCodeRelocations<Elf32_Dyn>(*phdr_it);
    } else {
      return HasCodeRelocations<Elf64_Dyn>(*phdr_it);
    }
  }

  template<typename Elf_Dyn>
  bool HasCodeRelocations(const Elf64_Phdr& phdr) const {
    const size_t EntryCount = phdr.p_filesz / sizeof(Elf_Dyn);
    fextl::vector<Elf_Dyn> Entries(EntryCount);

    if (pread(fd, Entries.data(), phdr.p_filesz, phdr.p_offset) == -1) {
      return false;
    }

    for (auto& Entry : Entries) {
      if (Entry.d_tag == DT_NULL) {
        break;
      }
      if (Entry.d_tag == DT_TEXTREL) {
        return true;
      }
      if (Entry.d_tag == DT_FLAGS && (Entry.d_un.d_val & DF_TEXTREL)) {
        return true;
      }
    }

    return false;
  }

  /**
   * Parses relocation sections (SHT_REL/SHT_RELA) and returns a map of
   * offsets to relocations that FEX's JIT must know about.
   */
  fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> PopulateRelocations() {
    if (fd == -1 || !EnsureSectionHeadersLoaded()) {
      return {};
    }

    fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> Relocations;
    bool Is32Bit = (type == ::ELFLoader::ELFContainer::TYPE_X86_32);

    for (const auto& shdr : *shdrs) {
      if (shdr.sh_entsize == 0) {
        continue;
      }

      const size_t EntryCount = shdr.sh_size / shdr.sh_entsize;

      if (!Is32Bit) {
        if (shdr.sh_type == SHT_REL) {
          LOGMAN_THROW_A_FMT(false, "Unexpected relocation section type");
        } else if (shdr.sh_type == SHT_RELA) {
          fextl::vector<Elf64_Rela> Entries(EntryCount);
          if (pread(fd, Entries.data(), shdr.sh_size, shdr.sh_offset) == -1) {
            LOGMAN_THROW_A_FMT(false, "Failed to read RELA section");
          }
          for (auto& Entry : Entries) {
            auto RelocType = ClassifyRelocation64(ELF64_R_TYPE(Entry.r_info));
            if (RelocType) {
              Relocations.emplace(static_cast<uint32_t>(Entry.r_offset), *RelocType);
            }
          }
        }
      } else {
        if (shdr.sh_type == SHT_REL) {
          fextl::vector<Elf32_Rel> Entries(EntryCount);
          if (pread(fd, Entries.data(), shdr.sh_size, shdr.sh_offset) == -1) {
            LOGMAN_THROW_A_FMT(false, "Failed to read REL section");
          }
          for (auto& Entry : Entries) {
            auto RelocType = ClassifyRelocation32(ELF32_R_TYPE(Entry.r_info));
            if (RelocType) {
              Relocations.emplace(static_cast<uint32_t>(Entry.r_offset), *RelocType);
            }
          }
        } else if (shdr.sh_type == SHT_RELA) {
          fextl::vector<Elf32_Rela> Entries(EntryCount);
          if (pread(fd, Entries.data(), shdr.sh_size, shdr.sh_offset) == -1) {
            LOGMAN_THROW_A_FMT(false, "Failed to read RELA section");
          }
          for (auto& Entry : Entries) {
            auto RelocType = ClassifyRelocation32(ELF32_R_TYPE(Entry.r_info));
            if (RelocType) {
              Relocations.emplace(static_cast<uint32_t>(Entry.r_offset), *RelocType);
            }
          }
        }
      }
    }

    return Relocations;
  }

  /**
   * Returns underlying 32-bit relocation entries.
   * SHT_REL entries are implicitly converted to Elf32_Rela.
   */
  fextl::vector<Elf32_Rela> ReadRawRelocations32() {
    if (fd == -1 || type != ::ELFLoader::ELFContainer::TYPE_X86_32 || !EnsureSectionHeadersLoaded()) {
      return {};
    }

    // Load dynamic symbol table (find SHT_DYNSYM section)
    fextl::vector<Elf32_Sym> DynSyms;
    auto DynsymHeader = std::ranges::find_if(*shdrs, [](auto& shdr) { return shdr.sh_type == SHT_DYNSYM; });
    if (DynsymHeader != shdrs->end()) {
      size_t SymCount = DynsymHeader->sh_size / sizeof(Elf32_Sym);
      DynSyms.resize(SymCount);
      if (pread(fd, DynSyms.data(), DynsymHeader->sh_size, DynsymHeader->sh_offset) == -1) {
        LOGMAN_MSG_A_FMT("Could not load DYNSYM section");
      }
    }

    fextl::vector<Elf32_Rela> Result;
    for (const auto& shdr : *shdrs) {
      if (shdr.sh_entsize == 0) {
        continue;
      }

      const size_t EntryCount = shdr.sh_size / shdr.sh_entsize;

      if (shdr.sh_type == SHT_REL) {
        fextl::vector<Elf32_Rel> Entries(EntryCount);
        if (pread(fd, Entries.data(), shdr.sh_size, shdr.sh_offset) == -1) {
          LOGMAN_MSG_A_FMT("Could not load REL section");
        }
        for (auto& Entry : Entries) {
          auto Sym = ELF32_R_SYM(Entry.r_info);
          int32_t Addend = (Sym < DynSyms.size()) ? static_cast<int32_t>(DynSyms[Sym].st_value) : 0;
          Result.push_back(Elf32_Rela {Entry.r_offset, Entry.r_info, Addend});
        }
      } else if (shdr.sh_type == SHT_RELA) {
        fextl::vector<Elf32_Rela> Entries(EntryCount);
        if (pread(fd, Entries.data(), shdr.sh_size, shdr.sh_offset) == -1) {
          LOGMAN_MSG_A_FMT("Could not load RELA section");
        }
        Result.insert(Result.end(), Entries.begin(), Entries.end());
      }
    }

    return Result;
  }

  void Closefd() {
    if (fd != -1) {
      close(fd);
      fd = -1;
    }
  }

  ~ELFParser() {
    Closefd();
  }

private:
  /// Returns true if loading section headers succeeded
  bool EnsureSectionHeadersLoaded() {
    if (shdrs.has_value()) {
      return !shdrs->empty();
    }

    if (fd == -1 || ehdr.e_shoff == 0 || ehdr.e_shnum == 0) {
      shdrs.emplace();
      return false;
    }

    if (type == ::ELFLoader::ELFContainer::TYPE_X86_64) {
      shdrs.emplace(ehdr.e_shnum);
      if (pread(fd, shdrs->data(), sizeof(Elf64_Shdr) * ehdr.e_shnum, ehdr.e_shoff) == -1) {
        shdrs->clear();
        return false;
      }
    } else {
      fextl::vector<Elf32_Shdr> shdrs32(ehdr.e_shnum);
      if (pread(fd, shdrs32.data(), sizeof(Elf32_Shdr) * ehdr.e_shnum, ehdr.e_shoff) == -1) {
        shdrs.emplace();
        return false;
      }

      shdrs.emplace(ehdr.e_shnum);
      for (int i = 0; i < ehdr.e_shnum; i++) {
#define COPY(name) (*shdrs)[i].name = shdrs32[i].name
        COPY(sh_name);
        COPY(sh_type);
        COPY(sh_flags);
        COPY(sh_addr);
        COPY(sh_offset);
        COPY(sh_size);
        COPY(sh_link);
        COPY(sh_info);
        COPY(sh_addralign);
        COPY(sh_entsize);
#undef COPY
      }
    }

    return !shdrs->empty();
  }

  static std::optional<FEXCore::GuestRelocationType> ClassifyRelocation32(uint32_t Type) {
    if (Type == R_386_RELATIVE || Type == R_386_32) {
      return FEXCore::GuestRelocationType::Rel32;
    } else if (Type == R_386_PC32) {
      // Currently not handled
      return FEXCore::GuestRelocationType::Skip;
    } else if (Type == R_386_TLS_TPOFF) {
      // Currently not handled
      return FEXCore::GuestRelocationType::Skip;
    }
    return std::nullopt;
  }

  static std::optional<FEXCore::GuestRelocationType> ClassifyRelocation64(uint32_t Type) {
    if (Type == R_X86_64_RELATIVE || Type == R_X86_64_64) {
      return FEXCore::GuestRelocationType::Rel64;
    } else if (Type == R_X86_64_32) {
      return FEXCore::GuestRelocationType::Rel32;
    }
    return std::nullopt;
  }
};


================================================
FILE: Source/Tools/CommonTools/PortabilityInfo.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Common/Config.h"

namespace FEX {
static inline std::optional<fextl::string> GetSelfPath() {
  // Read the FEX path from `/proc/self/exe` which is always a symlink to the absolute path of the executable running.
  // This way we can get the parent path that the application is executing from.
  char SelfPath[PATH_MAX];
  auto Result = readlink("/proc/self/exe", SelfPath, PATH_MAX);
  if (Result == -1) {
    return std::nullopt;
  }

  std::string_view SelfPathView {SelfPath, std::min<size_t>(PATH_MAX, Result)};
  return fextl::string {SelfPathView.substr(0, SelfPathView.find_last_of('/') + 1)};
}

static inline FEX::Config::PortableInformation ReadPortabilityInformation() {
  const char* PortableConfig = getenv("FEX_PORTABLE");
  if (!PortableConfig) {
    return {false, {}};
  }

  uint32_t Value {};
  std::string_view PortableView {PortableConfig};

  if (std::from_chars(PortableView.data(), PortableView.data() + PortableView.size(), Value).ec != std::errc {} || Value == 0) {
    return {false, {}};
  }

  auto SelfPath = GetSelfPath();
  if (!SelfPath) {
    return {false, {}};
  }

  // Extract the absolute path from the FEX path
  return {true, *SelfPath};
}
} // namespace FEX


================================================
FILE: Source/Tools/FEXBash/CMakeLists.txt
================================================
add_executable(FEXBash FEXBash.cpp)

target_link_libraries(FEXBash PRIVATE FEXCore Common JemallocLibs)

LinkerGC(FEXBash)

install(TARGETS FEXBash RUNTIME
  DESTINATION bin
  COMPONENT Runtime)


================================================
FILE: Source/Tools/FEXBash/FEXBash.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|FEXBash
desc: Launches bash under FEX and passes arguments via -c to it
$end_info$
*/

#include <FEXCore/fextl/fmt.h>

#include <filesystem>
#include <string>
#include <unistd.h>
#include <vector>

int main(int argc, char** argv, char** const envp) {
  // Skip argv[0].
  const int ArgCount = argc - 1;
  const bool EmptyArgs = ArgCount == 0;

  std::vector<const char*> Argv;
  // FEX will handle finding bash in the rootfs
  // Use /bin/sh for -c commands and /bin/bash for interactive mode
  const char* BashPath = EmptyArgs ? "/bin/bash" : "/bin/sh";

  std::string FEXPath = std::filesystem::path(argv[0]).parent_path().string() + "/FEX";

  // Check if a local FEX to FEXBash exists
  // If it does then it takes priority over the installed one
  if (!std::filesystem::exists(FEXPath)) {
    char FEXBashPath[PATH_MAX];
    auto Result = readlink("/proc/self/exe", FEXBashPath, PATH_MAX);
    if (Result != -1) {
      FEXPath = std::filesystem::path(&FEXBashPath[0], &FEXBashPath[Result]).parent_path().string() + "/FEX";
    }

    if (!std::filesystem::exists(FEXPath)) {
      fmt::print(stderr, "Could not locate FEX executable\n");
      std::abort();
    }
  }
  const char* FEXArgs[] = {
    FEXPath.c_str(),
    BashPath,
    "-c",
  };

  // Remove -c argument if arguments are empty
  // Lets us start an emulated bash instance
  const size_t FEXArgsCount = std::size(FEXArgs) - (EmptyArgs ? 1 : 0);

  Argv.resize(ArgCount + FEXArgsCount);

  // Pass in the FEX arguments
  for (size_t i = 0; i < FEXArgsCount; ++i) {
    Argv[i] = FEXArgs[i];
  }

  // Bring in passed in arguments
  for (size_t i = 0; i < ArgCount; ++i) {
    Argv[i + FEXArgsCount] = argv[i + 1];
  }

  // Set --norc when no arguments are passed so PS1 doesn't get overwritten
  const char* NoRC = "--norc";
  if (EmptyArgs) {
    Argv.emplace_back(NoRC);
  }

  Argv.emplace_back(nullptr);

  // Prepend `FEXBash>` to PS1 to be less confusing about running under emulation
  // In most cases PS1 isn't an environment variable, but instead a shell variable
  // But in case the user has set the PS1 environment variable then still prepend
  //
  // To get the shell variables as an environment variable then you can do `PS1=$PS1 FEXBash`
  std::vector<const char*> Envp {};
  char* PS1Env {};
  for (unsigned i = 0;; ++i) {
    if (envp[i] == nullptr) {
      break;
    }
    if (strstr(envp[i], "PS1=") == envp[i]) {
      PS1Env = envp[i];
    } else {
      Envp.emplace_back(envp[i]);
    }
  }

  std::string PS1 = "PS1=FEXBash-\\u@\\h:\\w> ";
  if (PS1Env) {
    PS1 += &PS1Env[strlen("PS1=")];
  }
  Envp.emplace_back(PS1.c_str());
  Envp.emplace_back(nullptr);

  return execve(Argv[0], const_cast<char* const*>(Argv.data()), const_cast<char* const*>(&Envp[0]));
}


================================================
FILE: Source/Tools/FEXConfig/CMakeLists.txt
================================================
set(CMAKE_AUTOMOC ON)

add_executable(FEXConfig)
target_sources(FEXConfig PRIVATE Main.cpp Main.h)

target_link_libraries(FEXConfig PRIVATE Common JemallocDummy)
if (Qt6_FOUND)
  qt_add_resources(QT_RESOURCES qml6.qrc)
  target_link_libraries(FEXConfig PRIVATE Qt6::Qml Qt6::Quick Qt6::Widgets)
else()
  qt_add_resources(QT_RESOURCES qml5.qrc)
  target_link_libraries(FEXConfig PRIVATE Qt5::Qml Qt5::Quick Qt5::Widgets)
endif()
target_sources(FEXConfig PRIVATE ${QT_RESOURCES})

LinkerGC(FEXConfig)

install(TARGETS FEXConfig RUNTIME
  DESTINATION bin
  COMPONENT Runtime)


================================================
FILE: Source/Tools/FEXConfig/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Main.h"

#include <Common/Async.h>
#include <Common/Config.h>
#include <Common/FileFormatCheck.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/Config/Config.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <QApplication>
#include <QMessageBox>
#include <QQmlApplicationEngine>
#include <QQuickWindow>

#include <sys/inotify.h>
#include <poll.h>

#include <charconv>
#include <cstdlib>
#include <filesystem>
#include <stdexcept>
#include <thread>
#include <utility>

namespace fextl {
// Helper to convert a std::filesystem::path to a fextl::string.
inline fextl::string string_from_path(const std::filesystem::path& Path) {
  return Path.string().c_str();
}
} // namespace fextl

static fextl::unique_ptr<FEXCore::Config::Layer> LoadedConfig {};
static fextl::map<FEXCore::Config::ConfigOption, std::pair<std::string, std::string_view>> ConfigToNameLookup;
static fextl::map<std::string, FEXCore::Config::ConfigOption> NameToConfigLookup;

#include "Common/JSONPool.h"
#include <FEXCore/Utils/FileLoading.h>

static void LoadThunkDatabase(fextl::unordered_map<fextl::string, bool>& HostLibsDB, bool Global) {
  auto ThunkDBPath = FEXCore::Config::GetConfigDirectory(Global) + "ThunksDB.json";
  fextl::vector<char> FileData;
  if (!FEXCore::FileLoading::LoadFile(FileData, ThunkDBPath)) {
    return;
  }

  FEX::JSON::JsonAllocator Pool {};
  const json_t* json = FEX::JSON::CreateJSON(FileData, Pool);
  if (!json) {
    ERROR_AND_DIE_FMT("Failed to parse JSON from ThunkDB file '{}' - invalid JSON format", ThunkDBPath);
  }

  const json_t* DB = json_getProperty(json, "DB");
  if (!DB || JSON_OBJ != json_getType(DB)) {
    return;
  }

  for (const json_t* Library = json_getChild(DB); Library != nullptr; Library = json_getSibling(Library)) {
    HostLibsDB[json_getName(Library)] = false;
  }
}

ConfigModel::ConfigModel() {
  setItemRoleNames(QHash<int, QByteArray> {{Qt::DisplayRole, "display"}, {Qt::UserRole + 1, "optionType"}, {Qt::UserRole + 2, "optionValue"}});
  Reload();
}

void ConfigModel::Reload() {
  const auto& Options = LoadedConfig->GetOptionMap();

  beginResetModel();
  removeRows(0, rowCount());
  for (auto& Option : Options) {
    if (!LoadedConfig->OptionExists(Option.first)) {
      continue;
    }
    if (std::holds_alternative<fextl::list<fextl::string>>(Option.second)) {
      // Omit string lists from the model since they require special handling
      continue;
    }

    auto& [Name, TypeId] = ConfigToNameLookup.find(Option.first)->second;
    auto Item = new QStandardItem(QString::fromStdString(Name));

    const char* OptionType = TypeId.data();
    Item->setData(OptionType, Qt::UserRole + 1);
    Item->setData(QString::fromStdString(std::get<fextl::string>(Option.second).c_str()), Qt::UserRole + 2);
    appendRow(Item);
  }
  endResetModel();
}

bool ConfigModel::has(const QString& Name, bool) const {
  return LoadedConfig->OptionExists(NameToConfigLookup.at(Name.toStdString()));
}

void ConfigModel::erase(const QString& Name) {
  assert(has(Name, false));
  LoadedConfig->Erase(NameToConfigLookup.at(Name.toStdString()));
  Reload();
}

bool ConfigModel::getBool(const QString& Name, bool) const {
  auto ret = LoadedConfig->Get(NameToConfigLookup.at(Name.toStdString()));
  if (!ret || !*ret) {
    throw std::runtime_error("Could not find setting");
  }
  return **ret == "1";
}

void ConfigModel::setBool(const QString& Name, bool Value) {
  LoadedConfig->Set(NameToConfigLookup.at(Name.toStdString()), Value ? "1" : "0");
  Reload();
}

void ConfigModel::setString(const QString& Name, const QString& Value) {
  LoadedConfig->Set(NameToConfigLookup.at(Name.toStdString()), Value.toStdString());
  Reload();
}

void ConfigModel::setStringList(const QString& Name, const QStringList& Values) {
  const auto& Option = NameToConfigLookup.at(Name.toStdString());
  LoadedConfig->Erase(Option);
  for (auto& Value : Values) {
    LoadedConfig->AppendStrArrayValue(Option, Value.toStdString().c_str());
  }
  Reload();
}

void ConfigModel::setInt(const QString& Name, int Value) {
  LoadedConfig->Set(NameToConfigLookup.at(Name.toStdString()), std::to_string(Value));
  Reload();
}

QString ConfigModel::getString(const QString& Name, bool) const {
  auto ret = LoadedConfig->Get(NameToConfigLookup.at(Name.toStdString()));
  if (!ret || !*ret) {
    throw std::runtime_error("Could not find setting");
  }
  return QString::fromUtf8((*ret)->c_str());
}

QStringList ConfigModel::getStringList(const QString& Name, bool) const {
  auto Values = LoadedConfig->All(NameToConfigLookup.at(Name.toStdString()));
  if (!Values || !*Values) {
    return {};
  }
  QStringList Ret;
  for (auto& Value : **Values) {
    Ret.append(Value.c_str());
  }
  return Ret;
}

int ConfigModel::getInt(const QString& Name, bool) const {
  auto ret = LoadedConfig->Get(NameToConfigLookup.at(Name.toStdString()));
  if (!ret || !*ret) {
    throw std::runtime_error("Could not find setting");
  }
  int value;
  auto res = std::from_chars(&*(*ret)->begin(), &*(*ret)->end(), value);
  if (res.ptr != &*(*ret)->end()) {
    throw std::runtime_error("Could not parse integer");
  }
  return value;
}

static void LoadDefaultSettings() {
  LoadedConfig = fextl::make_unique<FEX::Config::EmptyMapper>();
#define OPT_BASE(type, group, enum, json, default) LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default));
#define OPT_STR(group, enum, json, default) LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, default);
#define OPT_STRARRAY(group, enum, json, default) // Do nothing
#define OPT_STRENUM(group, enum, json, default) \
  LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default)));
#include <FEXCore/Config/ConfigValues.inl>

  // Erase unnamed options which shouldn't be set
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE);
}

static void ConfigInit(fextl::string ConfigFilename) {
#define OPT_BASE(type, group, enum, json, default)                                 \
  ConfigToNameLookup[FEXCore::Config::ConfigOption::CONFIG_##enum].first = #json;  \
  ConfigToNameLookup[FEXCore::Config::ConfigOption::CONFIG_##enum].second = #type; \
  NameToConfigLookup[#json] = FEXCore::Config::ConfigOption::CONFIG_##enum;
#include <FEXCore/Config/ConfigValues.inl>
#undef OPT_BASE

  // Ensure config and RootFS directories exist
  std::error_code ec {};
  std::filesystem::path Dirs[] = {std::filesystem::absolute(ConfigFilename).parent_path(),
                                  std::filesystem::absolute(FEXCore::Config::GetDataDirectory()) / "RootFS/"};
  for (auto& Dir : Dirs) {
    bool created = std::filesystem::create_directories(Dir, ec);
    if (created) {
      qInfo() << "Created folder" << Dir.c_str();
    }
    if (ec) {
      QMessageBox err(QMessageBox::Critical, "Failed to create directory", QString("Failed to create \"%1\" folder").arg(Dir.c_str()),
                      QMessageBox::Ok);
      err.exec();
      std::exit(EXIT_FAILURE);
      return;
    }
  }
}

HostLibsModel::HostLibsModel() {
  // Load list of available libraries
  LoadThunkDatabase(HostLibsDB, true);
  LoadThunkDatabase(HostLibsDB, false);
}

void HostLibsModel::Reload(const fextl::string& Path) {
  for (auto& [_, Enabled] : HostLibsDB) {
    Enabled = false;
  }

  {
    fextl::vector<char> FileData;
    if (!FEXCore::FileLoading::LoadFile(FileData, Path)) {
      goto RenderItems;
    }

    FEX::JSON::JsonAllocator Pool {};
    const json_t* json = FEX::JSON::CreateJSON(FileData, Pool);
    if (!json) {
      goto RenderItems;
    }

    const json_t* ThunksDB = json_getProperty(json, "ThunksDB");
    if (!ThunksDB) {
      goto RenderItems;
    }

    for (const json_t* Item = json_getChild(ThunksDB); Item != nullptr; Item = json_getSibling(Item)) {
      auto DBObject = HostLibsDB.find(json_getName(Item));
      if (DBObject != HostLibsDB.end()) {
        DBObject->second = (json_getInteger(Item) != 0);
      }
    }
  }

RenderItems:
  beginResetModel();
  removeRows(0, rowCount());
  for (auto& [Name, Enabled] : HostLibsDB) {
    auto Item = new QStandardItem(QString::fromUtf8(Name.c_str()));
    Item->setData(Enabled, Qt::CheckStateRole);
    appendRow(Item);
  }
  endResetModel();
}

QHash<int, QByteArray> HostLibsModel::roleNames() const {
  auto ret = QStandardItemModel::roleNames();
  ret[Qt::CheckStateRole] = "checked";
  return ret;
}

bool HostLibsModel::setData(const QModelIndex& index, const QVariant& value, int role) {
  std::next(HostLibsDB.begin(), index.row())->second = value.toBool();
  return QStandardItemModel::setData(index, value, role);
}

RootFSModel::RootFSModel() {
  auto INotifyFD = inotify_init1(IN_NONBLOCK | IN_CLOEXEC);

  fextl::string RootFS = FEXCore::Config::GetDataDirectory(false) + "RootFS/";
  int LocalFolderWD = inotify_add_watch(INotifyFD, RootFS.c_str(), IN_CREATE | IN_DELETE);

  RootFS = FEXCore::Config::GetDataDirectory(true) + "RootFS/";
  int GlobalFolderWD = inotify_add_watch(INotifyFD, RootFS.c_str(), IN_CREATE | IN_DELETE);
  if (INotifyFD != -1 && (LocalFolderWD != -1 || GlobalFolderWD != -1)) {
    INotifyReactor.enable_async_stop();
    Thread = std::thread {&RootFSModel::INotifyThreadFunc, this, INotifyFD};
  } else {
    qWarning() << "Could not set up inotify. RootFS folder won't be monitored for changes.";
  }

  // Load initial data
  Reload();
}

RootFSModel::~RootFSModel() {
  INotifyReactor.stop_async();
  Thread.join();
}

void RootFSModel::Reload() {
  beginResetModel();
  removeRows(0, rowCount());

  std::vector<QString> NamedRootFS {};
  for (auto Global : {false, true}) {
    const fextl::string RootFS = FEXCore::Config::GetDataDirectory(Global) + "RootFS/";

    std::error_code ec;
    for (auto& it : std::filesystem::directory_iterator(RootFS, ec)) {
      std::string Path {};
      if (Global) {
        // If global then keep the full path.
        Path = it.path();
      } else {
        // If local then only use the filename.
        Path = it.path().filename();
      }

      if (it.is_directory()) {
        NamedRootFS.push_back(QString::fromStdString(Path));
      } else if (it.is_regular_file()) {
        // If it is a regular file then we need to check if it is a valid archive
        if (it.path().extension() == ".sqsh" && FEX::FormatCheck::IsSquashFS(fextl::string_from_path(it.path()))) {
          NamedRootFS.push_back(QString::fromStdString(Path));
        } else if (it.path().extension() == ".ero" && FEX::FormatCheck::IsEroFS(fextl::string_from_path(it.path()))) {
          NamedRootFS.push_back(QString::fromStdString(Path));
        }
      }
    }
  }

  std::sort(NamedRootFS.begin(), NamedRootFS.end(), [](const QString& a, const QString& b) { return QString::localeAwareCompare(a, b) < 0; });
  for (auto& Entry : NamedRootFS) {
    appendRow(new QStandardItem(Entry));
  }

  endResetModel();
}

bool RootFSModel::hasItem(const QString& Name) const {
  return !findItems(Name, Qt::MatchExactly).empty();
}

QUrl RootFSModel::getBaseUrl() const {
  return QUrl::fromLocalFile(QString::fromStdString(FEXCore::Config::GetDataDirectory().c_str()) + "RootFS/");
}

void RootFSModel::INotifyThreadFunc(int INotifyFD) {
  fasio::posix_descriptor INotify {INotifyReactor, INotifyFD};

  INotify.async_wait([this, INotifyFD](fasio::error ec) {
    // Spin through the events, we don't actually care what they are
    constexpr size_t DATA_SIZE = (16 * (sizeof(struct inotify_event) + NAME_MAX + 1));
    char buf[DATA_SIZE];
    while (read(INotifyFD, buf, DATA_SIZE) > 0)
      ;

    // Queue update to the data model
    QMetaObject::invokeMethod(this, "Reload");
    return fasio::post_callback::repeat;
  });

  INotifyReactor.run();
}

// Returns true on success
static bool OpenFile(fextl::string Filename) {
  std::error_code ec {};
  if (!std::filesystem::exists(Filename, ec)) {
    return false;
  }

  LoadedConfig = FEX::Config::CreateMainLayer(&Filename);
  LoadedConfig->Load();

  // Load default options and only overwrite only if the option didn't exist
#define OPT_BASE(type, group, enum, json, default)                                            \
  if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) {            \
    LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(default)); \
  }
#define OPT_STR(group, enum, json, default)                                        \
  if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) { \
    LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, default);      \
  }
#define OPT_STRARRAY(group, enum, json, default) // Do nothing
#define OPT_STRENUM(group, enum, json, default)                                                                      \
  if (!LoadedConfig->OptionExists(FEXCore::Config::ConfigOption::CONFIG_##enum)) {                                   \
    LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_##enum, std::to_string(FEXCore::ToUnderlying(default))); \
  }
#include <FEXCore/Config/ConfigValues.inl>

  // Erase unnamed options which shouldn't be set
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_INTERPRETER_INSTALLED);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_FILENAME);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_APP_CONFIG_NAME);
  LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_IS64BIT_MODE);

  return true;
}

ConfigRuntime::ConfigRuntime(const QString& ConfigFilename) {
  HostLibs.Reload(ConfigFilename.toStdString().c_str());

  qmlRegisterSingletonInstance<ConfigModel>("FEX.ConfigModel", 1, 0, "ConfigModel", &ConfigModelInst);
  qmlRegisterSingletonInstance<HostLibsModel>("FEX.HostLibsModel", 1, 0, "HostLibsModel", &HostLibs);
  qmlRegisterSingletonInstance<RootFSModel>("FEX.RootFSModel", 1, 0, "RootFSModel", &RootFSList);
  Engine.load(QUrl("qrc:/main.qml"));

  Window = qobject_cast<QQuickWindow*>(Engine.rootObjects().first());
  if (!ConfigFilename.isEmpty()) {
    Window->setProperty("configFilename", QUrl::fromLocalFile(ConfigFilename));
  } else {
    Window->setProperty("configFilename", QUrl::fromLocalFile(FEXCore::Config::GetConfigFileLocation().c_str()));
    Window->setProperty("configDirty", true);
    Window->setProperty("loadedDefaults", true);
  }

  ConfigRuntime::connect(Window, SIGNAL(selectedConfigFile(const QUrl&)), this, SLOT(onLoad(const QUrl&)));
  ConfigRuntime::connect(Window, SIGNAL(triggeredSave(const QUrl&)), this, SLOT(onSave(const QUrl&)));
  ConfigRuntime::connect(&ConfigModelInst, SIGNAL(modelReset()), Window, SLOT(refreshUI()));
}

void ConfigRuntime::onSave(const QUrl& Filename) {
  // If no RootFS is selected, assume another Config layer is setting it up and drop it from the local configuration
  auto RootFS = LoadedConfig->Get(FEXCore::Config::ConfigOption::CONFIG_ROOTFS).value_or(nullptr);
  if (RootFS && RootFS->empty()) {
    LoadedConfig->Erase(FEXCore::Config::ConfigOption::CONFIG_ROOTFS);
  }

  qInfo() << "Saving to" << Filename.toLocalFile().toStdString().c_str();
  FEX::Config::SaveLayerToJSON(Filename.toLocalFile().toStdString().c_str(), LoadedConfig.get(), HostLibs.HostLibsDB);
}

void ConfigRuntime::onLoad(const QUrl& Filename) {
  // TODO: Distinguish between "load" and "overlay".
  //       Currently, the new configuration is overlaid on top of the previous one.

  if (!OpenFile(Filename.toLocalFile().toStdString().c_str())) {
    // This basically never happens because OpenFile performs no actual syntax checks.
    // Treat as fatal since the UI state wouldn't be consistent after ignoring the error.
    QMessageBox err(QMessageBox::Critical, tr("Could not load config file"), tr("Failed to load \"%1\"").arg(Filename.toLocalFile()),
                    QMessageBox::Ok);
    err.exec();
    QApplication::exit();
    return;
  }

  ConfigModelInst.Reload();
  RootFSList.Reload();
  HostLibs.Reload(Filename.toLocalFile().toStdString().c_str());

  QMetaObject::invokeMethod(Window, "refreshUI");
}

int main(int Argc, char** Argv) {
  QApplication App(Argc, Argv);

  FEX::Config::InitializeConfigs(FEX::Config::PortableInformation {});
  fextl::string ConfigFilename = Argc > 1 ? Argv[1] : FEXCore::Config::GetConfigFileLocation();
  ConfigInit(ConfigFilename);

  qInfo() << "Opening" << ConfigFilename.c_str();
  if (!OpenFile(ConfigFilename)) {
    // Load defaults if not found
    ConfigFilename.clear();
    LoadDefaultSettings();
  }

  ConfigRuntime Runtime(ConfigFilename.c_str());
  App.setWindowIcon(QIcon(":/icon.png"));
  return App.exec();
}


================================================
FILE: Source/Tools/FEXConfig/Main.h
================================================
// SPDX-License-Identifier: MIT
#include <Common/Async.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>

#include <QStandardItemModel>
#include <QQmlApplicationEngine>

#include <thread>

class QQuickWindow;

class ConfigModel : public QStandardItemModel {
  Q_OBJECT
  QML_ELEMENT
  QML_SINGLETON

public:
  ConfigModel();

  void Reload();

public slots:
  bool has(const QString&, bool unused) const;
  void erase(const QString&);

  bool getBool(const QString&, bool unused) const;
  QString getString(const QString&, bool unused) const;
  QStringList getStringList(const QString&, bool unused) const;
  int getInt(const QString&, bool unused) const;

  void setBool(const QString&, bool);
  void setString(const QString&, const QString&);
  void setStringList(const QString&, const QStringList&);
  void setInt(const QString&, int value);
};

class HostLibsModel : public QStandardItemModel {
  Q_OBJECT
  QML_ELEMENT
  QML_SINGLETON

public:
  fextl::unordered_map<fextl::string, bool> HostLibsDB;

  HostLibsModel();

  QHash<int, QByteArray> roleNames() const override;

  bool setData(const QModelIndex&, const QVariant&, int role) override;

  void Reload(const fextl::string& Filename);
};

class RootFSModel : public QStandardItemModel {
  Q_OBJECT
  QML_ELEMENT
  QML_SINGLETON

  std::thread Thread;
  fasio::poll_reactor INotifyReactor;

  void INotifyThreadFunc(int INotifyFD);

public:
  RootFSModel();
  ~RootFSModel();

public slots:
  void Reload();

  bool hasItem(const QString&) const;

  QUrl getBaseUrl() const;
};

class ConfigRuntime : public QObject {
  Q_OBJECT

  QQmlApplicationEngine Engine;
  QQuickWindow* Window = nullptr;
  RootFSModel RootFSList;
  ConfigModel ConfigModelInst;
  HostLibsModel HostLibs;

public:
  ConfigRuntime(const QString& ConfigFilename);

public slots:
  void onSave(const QUrl&);
  void onLoad(const QUrl&);
};


================================================
FILE: Source/Tools/FEXConfig/main.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick 2.15
import QtQuick.Controls 2.15
import QtQuick.Layouts 1.15

import FEX.ConfigModel 1.0
import FEX.HostLibsModel 1.0
import FEX.RootFSModel 1.0

// Qt 6 changed the API of the Dialogs module slightly.
// The differences are abstracted away in this import:
import "qrc:/dialogs"

ApplicationWindow {
    id: root

    visible: true
    width: 540
    height: 585
    minimumWidth: 500
    minimumHeight: 450
    title: configDirty ? qsTr("FEX configuration *") : qsTr("FEX configuration")

    property url configFilename

    property bool configDirty: false
    property bool loadedDefaults: false
    property bool closeConfirmed: false

    signal selectedConfigFile(name: url)
    signal triggeredSave(name: url)

    // Property used to force reloading any elements that read ConfigModel
    property bool refreshCache: false

    onConfigDirtyChanged: {
        if (!configDirty) {
            // We either just saved or loaded a file
            loadedDefaults = false
        }
    }

    function refreshUI() {
        refreshCache = !refreshCache
    }

    function urlToLocalFile(theurl: url): string {
        var str = theurl.toString()
        if (str.startsWith("file://")) {
            return decodeURIComponent(str.substring(7))
        }
        if (str.startsWith("file:")) {
            return decodeURIComponent(str.substring(5))
        }

        return str;
    }

    FileDialog {
        id: openFileDialog
        property bool isSaving: false

        title: isSaving ? qsTr("Save FEX configuration") : qsTr("Open FEX configuration")
        nameFilters: [ qsTr("Config files(*.json)"), qsTr("All files(*)") ]

        selectExisting: !isSaving

        property var onNextAccept: null

        // Prompts the user for an existing file and calls the callback on completion
        function loadAndThen(callback) {
            isSaving = false
            console.assert(!onNextAccept, "Tried to open dialog multiple times")
            onNextAccept = callback
            open()
        }

        // Prompts the user for a new or existing file and calls the callback on completion
        function saveAndThen(callback) {
            isSaving = true
            console.assert(!onNextAccept, "Tried to open dialog multiple times")
            onNextAccept = callback
            open()
        }

        onAccepted: {
            if (!isSaving) {
                root.selectedConfigFile(selectedFile)
            }
            configFilename = selectedFile
            configDirty = false
            if (onNextAccept) {
                onNextAccept()
                onNextAccept = null
            }
        }

        onRejected: onNextAccept = null
    }

    MessageDialog {
        id: confirmCloseDialog
        title: qsTr("Save changes")
        text: configFilename.toString() === "" ? qsTr("Save changes before quitting?") : qsTr("Save changes to %1 before quitting?").arg(urlToLocalFile(configFilename))
        buttons: buttonSave | buttonDiscard | buttonCancel

        onButtonClicked: (button) => {
            switch (button) {
            case buttonSave:
                if (configFilename.toString() === "") {
                    // Filename not yet set => trigger "Save As" dialog
                    openFileDialog.saveAndThen(() => {
                        save(configFilename)
                        root.close()
                    });
                    return
                }
                save(configFilename)
                root.close()
                break

            case buttonDiscard:
                closeConfirmed = true
                root.close()
                break
            }
        }
    }

    onClosing: (close) => {
        if (configDirty) {
            close.accepted = closeConfirmed
            onTriggered: confirmCloseDialog.open()
        }
    }

    function save(filename: url) {
        if (filename.toString() === "") {
            filename = configFilename
        }

        if (filename.toString() === "") {
            // Filename not yet set => trigger "Save As" dialog
            openFileDialog.saveAndThen(() => {
                save(configFilename)
            });
            return
        }

        triggeredSave(filename)
        configDirty = false
    }

    menuBar: MenuBar {
        Menu {
            title: qsTr("&File")
            Action {
                text: qsTr("&Open...")
                shortcut: StandardKey.Open
                // TODO: Ask to discard pending changes first
                onTriggered: openFileDialog.loadAndThen(() => {})
            }
            Action {
                text: qsTr("&Save")
                shortcut: StandardKey.Save
                onTriggered: root.save("")
            }
            Action {
                text: qsTr("Save &as...")
                shortcut: StandardKey.SaveAs
                onTriggered: {
                    openFileDialog.saveAndThen(() => {
                        root.save(configFilename)
                    });
                }
            }
            MenuSeparator {}
            Action {
                text: qsTr("&Quit")
                shortcut: StandardKey.Quit
                onTriggered: close()
            }
        }
    }

    header: TabBar {
        id: tabBar
        currentIndex: 0

        readonly property int advancedIndex: 4

        TabButton {
            text: qsTr("General")
        }
        TabButton {
            text: qsTr("Emulation")
        }
        TabButton {
            text: qsTr("CPU")
        }
        TabButton {
            text: qsTr("Libraries")
        }
        TabButton {
            text: qsTr("Advanced")
        }
    }

    component ConfigCheckBox: CheckBox {
        property string config
        property string tooltip
        property bool invert: false

        ToolTip.visible: (visualFocus || hovered) && tooltip !== ""
        ToolTip.text: tooltip

        onToggled: {
            configDirty = true
            ConfigModel.setBool(config, checked ^ invert)
        }

        checkState: config === "" ? Qt.PartiallyChecked
                    : !ConfigModel.has(config, refreshCache) ? Qt.PartiallyChecked
                    : (ConfigModel.getBool(config, refreshCache) ^ invert) ? Qt.Checked
                    : Qt.Unchecked
    }

    component ConfigSpinBox: SpinBox {
        property string config

        editable: true

        textFromValue: (val) => {
            if (valueFromConfig === "") {
                return qsTr("(not set)");
            }

            return val.toString()
        }

        onValueModified: {
            configDirty = true
            ConfigModel.setInt(config, value)
        }

        property string valueFromConfig: config === "" ? 0 : ConfigModel.has(config, refreshCache) ? ConfigModel.getInt(config, refreshCache).toString() : ""

        value: valueFromConfig
        from: 0
        to: 1 << 30
    }

    component ConfigTextField: TextField {
        property string config
        property bool hasData: config !== "" && ConfigModel.has(config, refreshCache)
        text: hasData ? ConfigModel.getString(config, refreshCache) : "(none set)"
        enabled: hasData

        onTextEdited: {
            configDirty = true
            ConfigModel.setString(config, text)
        }
    }

    component ConfigTextFieldForPath: RowLayout {
        property string text
        property string config

        property var dialog: FileDialog {}

        FileDialog { id: fileSelectorDialog }

        Label { text: parent.text }
        ConfigTextField {
            Layout.fillWidth: true
            config: parent.config
            readOnly: true
        }

        Button {
            icon.name: "search"
            onClicked: dialog.open()
        }

        Component.onCompleted: {
            dialog.accepted.connect(() => {
                var selectedPath = (dialog instanceof FileDialog ? dialog.selectedFile : dialog.selectedFolder)

                configDirty = true
                ConfigModel.setString(config, urlToLocalFile(selectedPath))
            })
        }
    }

    StackLayout {
        anchors.fill: parent

        currentIndex: tabBar.currentIndex

        component ScrollablePage: ScrollView {
            id: outer

            readonly property var visibleScrollbarWidth: ScrollBar.vertical.visible ? ScrollBar.vertical.width : 0

            // Children given by the user will be moved into the inner Column
            default property alias content: inner.children

            property alias itemSpacing: inner.spacing

            Column {
                id: inner

                spacing: 8
                padding: 8

                // This must be explicitly set via the id, since parent doesn't seem to be recognized within Column
                width: outer.width - outer.visibleScrollbarWidth
            }
        }

        // Environment settings
        ScrollablePage {
            GroupBox {
                id: rootfsGroupBox
                title: qsTr("RootFS:")
                width: parent.width - parent.padding * 2

                ColumnLayout {
                    width: rootfsGroupBox.width - rootfsGroupBox.padding * 2
                    ScrollView {
                        Layout.fillWidth: true
                        Layout.maximumHeight: 150
                        clip: true

                        Column {
                            id: rootfsList

                            property string selectedItem
                            property string explicitEntry

                            spacing: 4

                            Component.onCompleted: {
                                var initState = (ref) => {
                                    selectedItem = ConfigModel.has("RootFS", ref) ? ConfigModel.getString("RootFS", ref) : ""

                                    // RootFSModel only lists entries in the $FEX_HOME/RootFS/ folder.
                                    // If a custom path is selected, add it as a dedicated entry
                                    if (selectedItem !== "" && !RootFSModel.hasItem(selectedItem)) {
                                        explicitEntry = selectedItem

                                        // Make visible once needed.
                                        // Conversely, if the user selects something else after, keep the old option visible to allow easy undoing
                                        fallbackRootfsEntry.visible = true
                                    }
                                }

                                initState(false)
                                root.refreshCacheChanged.connect(initState)
                            }

                            function updateRootFS(fileOrFolder: url) {
                                configDirty = true
                                var base = urlToLocalFile(RootFSModel.getBaseUrl())
                                var file = urlToLocalFile(fileOrFolder)
                                if (file.startsWith(base)) {
                                    file = file.substring(base.length)
                                }

                                ConfigModel.setString("RootFS", file)
                                refreshUI()
                            }

                            component RootFSRadioDelegate: RadioButton {
                                property var name

                                text: name
                                checked: rootfsList.selectedItem === name

                                onToggled: {
                                    configDirty = true;
                                    ConfigModel.setString("RootFS", name)
                                }
                            }

                            RootFSRadioDelegate {
                                id: fallbackRootfsEntry
                                visible: false
                                name: rootfsList.explicitEntry
                            }
                            Repeater {
                                model: RootFSModel
                                delegate: RootFSRadioDelegate { name: model.display }
                            }
                        }
                    }
                    RowLayout {
                        FileDialog {
                            id: addRootfsFileDialog
                            title: qsTr("Select RootFS file")
                            nameFilters: [ qsTr("SquashFS and EroFS (*.sqsh *.ero)"), qsTr("All files(*)") ]
                            currentFolder: RootFSModel.getBaseUrl()
                            onAccepted: rootfsList.updateRootFS(fileUrl)
                        }

                        FolderDialog {
                            id: addRootfsFolderDialog
                            title: qsTr("Select RootFS folder")
                            currentFolder: RootFSModel.getBaseUrl()
                            onAccepted: rootfsList.updateRootFS(selectedFolder)
                        }

                        Button {
                            text: qsTr("Add archive")
                            icon.name: "document-open"
                            onClicked: addRootfsFileDialog.open()
                        }
                        Button {
                            text: qsTr("Add folder")
                            icon.name: "folder"
                            onClicked: addRootfsFolderDialog.open()
                        }
                    }
                }
            }

            GroupBox {
                title: qsTr("Logging:")
                width: parent.width - parent.padding * 2

                label: ConfigCheckBox {
                    id: loggingEnabledCheckBox
                    config: "SilentLog"
                    text: qsTr("Logging:")
                    invert: true
                }

                ColumnLayout {
                    enabled: loggingEnabledCheckBox.checked

                    anchors.left: parent ? parent.left : undefined
                    anchors.right: parent ? parent.right : undefined

                    RowLayout {
                        Label { text: qsTr("Log to:") }

                        ComboBox {
                            id: loggingComboBox
                            property string configValue: ConfigModel.has("OutputLog", refreshCache) ? ConfigModel.getString("OutputLog", refreshCache) : ""

                            currentIndex: configValue === "" ? -1 : configValue == "server" ? 0 : configValue == "stderr" ? 1 : 2

                            onActivated: {
                                configDirty = true
                                var configNames = [ "server", "stderr" ]
                                if (currentIndex != -1 && currentIndex < 2) {
                                    ConfigModel.setString("OutputLog", configNames[currentIndex])
                                } else {
                                    // Set by text field below
                                }
                            }

                            model: ListModel {
                                ListElement { text: "FEXServer" }
                                ListElement { text: "stderr" }
                                ListElement { text: qsTr("File...") }
                            }
                        }

                        ConfigTextFieldForPath {
                            visible: loggingComboBox.currentIndex === 2
                            config: "OutputLog"
                        }
                    }
                }
            }
        }

        // Emulation settings
        ScrollablePage {
            RowLayout {
                Label { text: qsTr("SMC detection:") }
                ComboBox {
                    currentIndex: ConfigModel.has("SMCChecks", refreshCache) ? ConfigModel.getInt("SMCChecks", refreshCache) : -1

                    onActivated: {
                        configDirty = true
                        ConfigModel.setInt("SMCChecks", currentIndex)
                    }

                    model: ListModel {
                        ListElement { text: qsTr("None") }
                        ListElement { text: qsTr("MTrack") }
                        ListElement { text: qsTr("Full") }
                    }
                }
            }

            GroupBox {
                title: qsTr("Memory model:")
                width: parent.width - parent.padding * 2

                ColumnLayout {
                    anchors.left: parent ? parent.left : undefined
                    anchors.right: parent ? parent.right : undefined

                    ButtonGroup {
                        id: tsoButtonGroup
                        buttons: [tso1, tso2]
                        checkedButton: ConfigModel.getBool("TSOEnabled", refreshCache) ? tso2 : tso1

                        property int pendingItemChange: -1

                        function onClickedButton(index: int) {
                            pendingItemChange = index;

                            configDirty = true;

                            var newIndex = pendingItemChange
                            var TSOEnabled = newIndex === 1
                            ConfigModel.setBool("TSOEnabled", TSOEnabled)

                            pendingItemChange = -1;
                        }

                        onClicked: {
                            if (pendingItemChange !== -1) {
                                return;
                            }
                            pendingItemChange = tso1.checked ? 0 : tso2.checked ? 1 : tso3.checked ? 2 : -1;
                            if (pendingItemChange) {
                                // Undetermined state, leave as is
                                return;
                            }

                            var newIndex = pendingItemChange
                            var TSOEnabled = newIndex === 1
                            ConfigModel.setBool("TSOEnabled", TSOEnabled)

                            pendingItemChange = -1;
                        }
                    }

                    ColumnLayout {
                        RadioButton {
                            id: tso1
                            text: qsTr("Inaccurate")
                            onToggled: tsoButtonGroup.onClickedButton(0)
                        }

                        ColumnLayout {
                            RadioButton {
                                id: tso2
                                text: qsTr("Accurate (TSO)")
                                onToggled: tsoButtonGroup.onClickedButton(1)
                            }

                            ColumnLayout {
                                visible: tso2.checked

                                ConfigCheckBox {
                                    text: qsTr("... for vector instructions")
                                    tooltip: qsTr("Controls TSO emulation on vector load/store instructions")
                                    config: "VectorTSOEnabled"
                                }
                                ConfigCheckBox {
                                    text: qsTr("... for memcpy instructions")
                                    tooltip: qsTr("Controls TSO emulation on memcpy/memset instructions")
                                    config: "MemcpySetTSOEnabled"
                                }
                                ConfigCheckBox {
                                    text: qsTr("... for unaligned half-barriers")
                                    tooltip: qsTr("Controls half-barrier TSO emulation on unaligned load/store instructions")
                                    config: "HalfBarrierTSOEnabled"
                                }
                            }
                        }
                    }

                    ConfigCheckBox {
                        topPadding: 4
                        text: qsTr("Enable non-tearing split-lock atomics")
                        config: "StrictInProcessSplitLocks"
                    }
                    ConfigCheckBox {
                        topPadding: 4
                        text: qsTr("Use PE volatile metadata for ARM64EC")
                        config: "VolatileMetadata"
                    }
                }
            }

            component EnvVarList: GroupBox {
                width: parent.width - parent.padding * 2

                property bool ofHost: false

                ColumnLayout {
                    anchors.left: parent ? parent.left : undefined
                    anchors.right: parent ? parent.right : undefined

                    spacing: 0

                    id: envGroup
                    property var values: ConfigModel.getStringList(ofHost ? "HostEnv" : "Env", refreshCache)

                    property int editedIndex: -1
                    Repeater {
                        model: parent.values
                        Layout.fillWidth: true

                        RowLayout {
                            property bool isEditing: envGroup.editedIndex === index

                            ItemDelegate {
                                text: modelData;
                                visible: !parent.isEditing
                                onClicked: envGroup.editedIndex = index

                            }
                            TextField {
                                id: envVarEditTextField
                                visible: parent.isEditing;
                                text: modelData

                                onEditingFinished: {
                                    envGroup.editedIndex = -1
                                    if (text === modelData) {
                                        return
                                    }

                                    var newValues = envGroup.values
                                    newValues[model.index] = text
                                    configDirty = true
                                    ConfigModel.setStringList(ofHost ? "HostEnv" : "Env", newValues)
                                }
                            }
                            Button {
                                visible: parent.isEditing
                                icon.name: "list-remove"
                                onClicked: {
                                    envGroup.editedIndex = -1
                                    var newValues = []
                                    for (var i = 0; i < envGroup.values.length; ++i) {
                                        if (i != index) {
                                            newValues.push(envGroup.values[i])
                                        }
                                    }

                                    configDirty = true
                                    ConfigModel.setStringList(ofHost ? "HostEnv" : "Env", newValues)
                                }
                            }
                        }
                    }

                    RowLayout {
                        TextField {
                            id: envVarTextField
                            Layout.fillWidth: true

                            onAccepted: {
                                var newValues = envGroup.values
                                newValues.push(envVarTextField.text)
                                configDirty = true
                                ConfigModel.setStringList(ofHost ? "HostEnv" : "Env", newValues)
                                text = ""
                            }
                        }
                        Button {
                            icon.name : "list-add"
                            enabled: envVarTextField.text !== ""
                            onClicked: envVarTextField.onAccepted()
                        }
                    }
                }
            }

            EnvVarList {
                title: qsTr("Guest environment variables:")
            }

            EnvVarList {
                title: qsTr("Host environment variables:")
                ofHost: true
            }
        }

        // CPU settings
        ScrollablePage {
            ConfigCheckBox {
                text: qsTr("Multiblock")
                config: "Multiblock"
            }

            RowLayout {
                Layout.fillWidth: true

                Label { text: qsTr("Block size:") }
                ConfigSpinBox {
                    config: "MaxInst"
                    from: 0
                    to: 1 << 30
                }
            }

            ConfigCheckBox {
                text: qsTr("Reduced x87 precision")
                config: "X87ReducedPrecision"
            }

            ConfigCheckBox {
                text: qsTr("Disable JIT optimization passes")
                config: "O0"
            }
        }

        // Libraries settings
        ScrollablePage {
            GroupBox {
                title: qsTr("Library forwarding:")
                width: parent.width - parent.padding * 2

                ColumnLayout {
                    anchors.left: parent ? parent.left : undefined
                    anchors.right: parent ? parent.right : undefined

                    id: libfwdConfig

                    property url configDir: (() => {
                        var configPath = urlToLocalFile(configFilename)
                        var slashIndex = configPath.lastIndexOf('/')
                        if (slashIndex === -1) {
                            return ""
                        }
                        return "file://" + configPath.substr(0, slashIndex)
                    })()

                    ConfigTextFieldForPath {
                        text: qsTr("Host library folder:")
                        config: "ThunkHostLibs"
                        dialog: FolderDialog {
                            title: qsTr("Select path for host libraries")
                            currentFolder: libfwdConfig.configDir
                        }
                    }
                    ConfigTextFieldForPath {
                        text: qsTr("Guest library folder:")
                        config: "ThunkGuestLibs"
                        dialog: FolderDialog {
                            title: qsTr("Select path for guest libraries")
                            currentFolder: libfwdConfig.configDir
                        }
                    }
                }
            }

            GroupBox {
                id: hostLibsGroupBox
                title: qsTr("Use host library for:")
                width: parent.width - parent.padding * 2

                ColumnLayout {
                    width: hostLibsGroupBox.width - hostLibsGroupBox.padding * 2
                    ScrollView {
                        Layout.fillWidth: true
                        Layout.maximumHeight: 200
                        clip: true

                        Column {
                            property string selectedItem
                            property string explicitEntry

                            spacing: 4

                            Component.onCompleted: {
                                // root.refreshCacheChanged.connect(initState)
                            }

                            Repeater {
                                model: HostLibsModel
                                delegate: CheckBox {
                                    text: model.display
                                    visible: text !== "fex_thunk_test" // Hide test library
                                    checked: (root.refreshCache, model.checked)
                                    onToggled: {
                                        configDirty = true
                                        model.checked = checked
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        // Advanced settings
        // NOTE: This is wrapped in a Loader that dynamically instantiates/destroys the page contents whenever the tab is selected.
        //       This avoids costly UI updates for its UI elements.
        // TODO: Options contained multiple times in JSON aren't listed (neither are they in old FEXConfig though)
        Loader { sourceComponent: tabBar.currentIndex === tabBar.advancedIndex ? advancedSettingsPage : null }
        Component {
            id: advancedSettingsPage
            ScrollablePage {
                itemSpacing: 0
                Frame {
                    width: parent.width - parent.padding * 2
                    id: frame
                    Column {
                        Repeater {
                            model: ConfigModel
                            delegate: RowLayout {
                                width: frame.width - frame.padding * 2

                                Label {
                                    id: label
                                    text: display
                                }

                                ConfigCheckBox {
                                    visible: optionType == "bool"
                                    config: visible ? label.text : ""
                                }

                                ConfigTextField {
                                    Layout.fillWidth: true
                                    visible: optionType == "fextl::string"
                                    config: visible ? label.text : ""
                                }

                                ConfigSpinBox {
                                    visible: optionType.startsWith("int") || optionType.startsWith("uint")
                                    config: visible ? label.text : ""
                                    from: 0
                                    to: 1 << 30
                                }

                                // Spacing
                                Item {
                                    Layout.fillWidth: true
                                }

                                Button {
                                    icon.name: "list-remove"
                                    onClicked: {
                                        ConfigModel.erase(label.text)
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }

    footer: Pane {
        anchors.left: parent.left
        anchors.right: parent.right

        padding: 0

        ColumnLayout {
            anchors.left: parent.left
            anchors.right: parent.right
            spacing: 0

            ToolSeparator {
                Layout.fillWidth: true
                orientation: Qt.Horizontal

                // Override padding from theme.
                // Some themes use verticalPadding, others topPadding/bottomPadding, so we set them all.
                verticalPadding: 0
                bottomPadding: 0
                topPadding: 0
            }

            Label {
                Layout.alignment: Qt.AlignHCenter
                enabled: false
                text: loadedDefaults
                        ? qsTr("Config.json not found — loaded defaults")
                        : qsTr("Editing %1").arg(urlToLocalFile(configFilename))
            }
        }
    }
}


================================================
FILE: Source/Tools/FEXConfig/qml5.qrc
================================================
<RCC>
  <qresource prefix="/">
    <file>main.qml</file>
    <file>icon.png</file>
  </qresource>
  <qresource prefix="/dialogs">
    <file alias="FileDialog.qml">qt5/FileDialog.qml</file>
    <file alias="FolderDialog.qml">qt5/FolderDialog.qml</file>
    <file alias="MessageDialog.qml">qt5/MessageDialog.qml</file>
  </qresource>
</RCC>


================================================
FILE: Source/Tools/FEXConfig/qml6.qrc
================================================
<RCC>
  <qresource prefix="/">
    <file>main.qml</file>
    <file>icon.png</file>
  </qresource>
  <qresource prefix="/dialogs">
    <file alias="FileDialog.qml">qt6/FileDialog.qml</file>
    <file alias="FolderDialog.qml">qt6/FolderDialog.qml</file>
    <file alias="MessageDialog.qml">qt6/MessageDialog.qml</file>
  </qresource>
</RCC>


================================================
FILE: Source/Tools/FEXConfig/qt5/FileDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick.Dialogs 1.3 as FromQt

FromQt.FileDialog {
    property url selectedFile
    property url currentFolder

    folder: currentFolder
    onAccepted: selectedFile = fileUrl
}


================================================
FILE: Source/Tools/FEXConfig/qt5/FolderDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick.Dialogs 1.3 as FromQt

FromQt.FileDialog {
    property url currentFolder
    property url selectedFolder

    folder: currentFolder

    selectFolder: true

    onAccepted: selectedFolder = fileUrl
}


================================================
FILE: Source/Tools/FEXConfig/qt5/MessageDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick 2.15
import QtQuick.Dialogs 1.3 as FromQt

Item {
    id: dialogParent
    property alias text: child.text
    property alias title: child.title

    readonly property int buttonSave: FromQt.Dialog.Save
    readonly property int buttonDiscard: FromQt.Dialog.Discard
    readonly property int buttonCancel: FromQt.Dialog.Cancel

    property int buttons

    signal buttonClicked(button: int)

    property bool pendingResult: false

    function open() {
        // Workaround for QTBUG-91650, due to which signals may get emitted twice
        pendingResult = true
        child.open()
    }

    FromQt.MessageDialog {
        id: child

        standardButtons: buttons

        onAccepted: {
            if (pendingResult) {
                dialogParent.buttonClicked(buttonSave)
                pendingResult = false
            }
        }
        onDiscard: {
            if (pendingResult) {
                dialogParent.buttonClicked(buttonDiscard)
                pendingResult = false
            }
        }
        onRejected: {
            if (pendingResult) {
                dialogParent.buttonClicked(buttonCancel)
                pendingResult = false
            }
        }
    }
}


================================================
FILE: Source/Tools/FEXConfig/qt6/FileDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick.Dialogs as FromQt

FromQt.FileDialog {
    property bool selectExisting: true
    property bool selectMultiple: false
    fileMode: selectMultiple ? FileDialog.OpenFiles : selectExisting ? FileDialog.OpenFile : FileDialog.SaveFile
}


================================================
FILE: Source/Tools/FEXConfig/qt6/FolderDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick.Dialogs as FromQt

FromQt.FolderDialog {
}


================================================
FILE: Source/Tools/FEXConfig/qt6/MessageDialog.qml
================================================
// SPDX-License-Identifier: MIT
import QtQuick.Dialogs as FromQt

FromQt.MessageDialog {
    readonly property int buttonSave: MessageDialog.Save
    readonly property int buttonDiscard: MessageDialog.Discard
    readonly property int buttonCancel: MessageDialog.Cancel
}


================================================
FILE: Source/Tools/FEXGDBReader/CMakeLists.txt
================================================
add_library(FEXGDBReader SHARED FEXGDBReader.cpp)

install(TARGETS FEXGDBReader RUNTIME
  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}/gdb
  COMPONENT Development)

target_include_directories(FEXGDBReader PRIVATE ${CMAKE_BINARY_DIR}/generated)

# We don't actually link, but this is a nice way to get the include dirs
target_link_libraries(FEXGDBReader PRIVATE Common)


================================================
FILE: Source/Tools/FEXGDBReader/FEXGDBReader.cpp
================================================
// SPDX-License-Identifier: MIT
#include <cstddef>
#include <cstdio>
#include <unordered_map>
#include <mutex>
#include <string>

#include <FEXCore/Debug/GDBReaderInterface.h>

GDB_DECLARE_GPL_COMPATIBLE_READER;

#define debugf(...)

extern "C" {
static enum gdb_status read_debug_info(struct gdb_reader_funcs* self, struct gdb_symbol_callbacks* cbs, void* memory, long memory_sz) {

  info_t* info = (info_t*)memory;
  blocks_t* blocks = (blocks_t*)(info->blocks_ofs + (long)memory);
  gdb_line_mapping* lines = (gdb_line_mapping*)(info->lines_ofs + (long)memory);
  debugf("info: %p\n", info);
  debugf("info: s %p\n", info->filename);
  debugf("info: s %s\n", info->filename);
  debugf("info: l %d\n", info->nlines);
  debugf("info: b %d\n", info->nblocks);

  struct gdb_object* object = cbs->object_open(cbs);
  struct gdb_symtab* symtab = cbs->symtab_open(cbs, object, info->filename);

  for (int i = 0; i < info->nblocks; i++) {
    debugf("info: %d\n", i);
    debugf("info: %lx\n", blocks[i].start);
    debugf("info: %lx\n", blocks[i].end);
    debugf("info: %s\n", blocks[i].name);
    cbs->block_open(cbs, symtab, NULL, blocks[i].start, blocks[i].end, blocks[i].name);
  }

  debugf("info: lines %d\n", info->nlines);
  debugf("info: lines %p\n", lines);

  for (int i = 0; i < info->nlines; i++) {
    debugf("info: line: %d\n", i);
    debugf("info: line pc: %lx\n", lines[i].pc);
    debugf("info: line file: %d\n", lines[i].line);
  }
  cbs->line_mapping_add(cbs, symtab, info->nlines, lines);

  // don't close here, symtab and object are cached
  cbs->symtab_close(cbs, symtab);
  cbs->object_close(cbs, object);
  return GDB_SUCCESS;
}

enum gdb_status unwind_frame(struct gdb_reader_funcs* self, struct gdb_unwind_callbacks* cbs) {
  return GDB_SUCCESS;
}

struct gdb_frame_id get_frame_id(struct gdb_reader_funcs* self, struct gdb_unwind_callbacks* cbs) {
  struct gdb_frame_id frame = {0x1234000, 0};
  return frame;
}

void destroy_reader(struct gdb_reader_funcs* self) {}

extern struct gdb_reader_funcs* gdb_init_reader(void) {
  static struct gdb_reader_funcs funcs = {GDB_READER_INTERFACE_VERSION, NULL, read_debug_info, unwind_frame, get_frame_id, destroy_reader};
  return &funcs;
}
}


================================================
FILE: Source/Tools/FEXGetConfig/CMakeLists.txt
================================================
add_executable(FEXGetConfig Main.cpp)

list(APPEND LIBS Common JemallocDummy)

LinkerGC(FEXGetConfig)

install(TARGETS FEXGetConfig RUNTIME
  DESTINATION bin
  COMPONENT Runtime)

target_link_libraries(FEXGetConfig PRIVATE ${LIBS})

target_include_directories(FEXGetConfig PRIVATE ${CMAKE_BINARY_DIR}/generated)


================================================
FILE: Source/Tools/FEXGetConfig/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/cpp-optparse/OptionParser.h"
#include "Common/Config.h"
#include "Common/FEXServerClient.h"
#include "Common/HostFeatures.h"
#include "git_version.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/PrctlUtils.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <cstdio>
#include <filesystem>
#include <string>
#include <sys/prctl.h>

namespace {
struct TSOEmulationFacts {
  bool LSE {}, LSE2 {};
  bool HardwareTSO {};
  bool LRCPC1 {}, LRCPC2 {}, LRCPC3 {};
};

#ifdef ARCHITECTURE_arm64
bool CheckForHardwareTSO() {
  // Check to see if this is supported.
  auto Result = prctl(PR_GET_MEM_MODEL, 0, 0, 0, 0);
  if (Result == -1) {
    // Unsupported, early exit.
    return false;
  }

  if (Result == PR_SET_MEM_MODEL_DEFAULT) {
    // Try to set the TSO mode if we are currently default.
    Result = prctl(PR_SET_MEM_MODEL, PR_SET_MEM_MODEL_TSO, 0, 0, 0);
    if (Result == 0) {
      Result = prctl(PR_SET_MEM_MODEL, PR_SET_MEM_MODEL_DEFAULT, 0, 0, 0);
      return true;
    }
  }
  return false;
}

enum ISAR0_FIELDS {
  LSE = 20,
};

enum ISAR1_FIELDS {
  LRCPC = 20,
};

enum MMFR2_FIELDS {
  AT = 32,
};

constexpr static uint32_t IDFIELDMASK = 0b1111;
uint64_t GetISAR0() {
  uint64_t Result {};
  asm("mrs %0, ID_AA64ISAR0_EL1;" : "=r"(Result));
  return Result;
}

uint64_t GetISAR1() {
  uint64_t Result {};
  asm("mrs %0, ID_AA64ISAR1_EL1;" : "=r"(Result));
  return Result;
}

uint64_t GetMMFR2() {
  uint64_t Result {};
  asm("mrs %0, ID_AA64MMFR2_EL1;" : "=r"(Result));
  return Result;
}

TSOEmulationFacts GetTSOEmulationFacts() {
  const auto ISAR0 = GetISAR0();
  const auto ISAR1 = GetISAR1();
  const auto MMFR2 = GetMMFR2();

  return {
    .LSE = ((ISAR0 >> ISAR0_FIELDS::LSE) & IDFIELDMASK) >= 0b0010,
    .LSE2 = ((MMFR2 >> MMFR2_FIELDS::AT) & IDFIELDMASK) >= 0b0001,
    .HardwareTSO = CheckForHardwareTSO(),
    .LRCPC1 = ((ISAR1 >> ISAR1_FIELDS::LRCPC) & IDFIELDMASK) >= 0b0001,
    .LRCPC2 = ((ISAR1 >> ISAR1_FIELDS::LRCPC) & IDFIELDMASK) >= 0b0010,
    .LRCPC3 = ((ISAR1 >> ISAR1_FIELDS::LRCPC) & IDFIELDMASK) >= 0b0011,
  };
}
#else
TSOEmulationFacts GetTSOEmulationFacts() {
  return {};
}
#endif
} // namespace

int main(int argc, char** argv, char** envp) {
  FEX::Config::InitializeConfigs(FEX::Config::PortableInformation {});
  FEXCore::Config::Initialize();
  FEXCore::Config::AddLayer(FEX::Config::CreateGlobalMainLayer());
  FEXCore::Config::AddLayer(FEX::Config::CreateMainLayer());
  // No FEX arguments passed through command line
  FEXCore::Config::AddLayer(FEX::Config::CreateEnvironmentLayer(envp));

  // Load the arguments
  optparse::OptionParser Parser = optparse::OptionParser().description("Simple application to get a couple of FEX options");

  Parser.add_option("--install-prefix").action("store_true").help("Print the FEX install prefix");

  Parser.add_option("--app").help("Load an application profile for this application if it exists");

  Parser.add_option("--current-rootfs").action("store_true").help("Print the directory that contains the FEX rootfs. Mounted in the case of squashfs");

  Parser.add_option("--tso-emulation-info").action("store_true").help("Print how FEX is emulating the x86-TSO memory model.");

#ifdef ARCHITECTURE_arm64
  Parser.add_option("--identification-reg-info").action("store_true").help("Print identification registers");
#endif

  Parser.add_option("--version").action("store_true").help("Print the installed FEX-Emu version");

  optparse::Values Options = Parser.parse_args(argc, argv);

  if (Options.is_set_by_user("app")) {
    // Load the application config if one was provided
    const auto ProgramName = FHU::Filesystem::GetFilename(Options["app"]);
    FEXCore::Config::AddLayer(FEX::Config::CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_GLOBAL_APP));
    FEXCore::Config::AddLayer(FEX::Config::CreateAppLayer(ProgramName, FEXCore::Config::LayerType::LAYER_LOCAL_APP));

    auto SteamID = getenv("SteamAppId");
    if (SteamID) {
      // If a SteamID exists then let's search for Steam application configs as well.
      // We want to key off both the SteamAppId number /and/ the executable since we may not want to thunk all binaries.
      const auto SteamAppName = fextl::fmt::format("Steam_{}_{}", SteamID, ProgramName);
      FEXCore::Config::AddLayer(FEX::Config::CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_GLOBAL_STEAM_APP));
      FEXCore::Config::AddLayer(FEX::Config::CreateAppLayer(SteamAppName, FEXCore::Config::LayerType::LAYER_LOCAL_STEAM_APP));
    }
  }

  FEXCore::Config::Load();

  // Reload the meta layer
  FEXCore::Config::ReloadMetaLayer();

  if (Options.is_set_by_user("version")) {
    fprintf(stdout, GIT_DESCRIBE_STRING "\n");
  }

  if (Options.is_set_by_user("install_prefix")) {
    char SelfPath[PATH_MAX];
    auto Result = readlink("/proc/self/exe", SelfPath, PATH_MAX);
    if (Result == -1) {
      Result = 0;
    }
    auto InstallPrefix = std::filesystem::path(&SelfPath[0], &SelfPath[Result]).parent_path().parent_path().string();
    fprintf(stdout, "%s\n", InstallPrefix.c_str());
  }

  if (Options.is_set_by_user("current_rootfs")) {
    int ServerFD = FEXServerClient::ConnectToServer();
    if (ServerFD != -1) {
      auto RootFS = FEXServerClient::RequestRootFSPath(ServerFD);
      if (!RootFS.empty()) {
        fprintf(stdout, "%s\n", RootFS.c_str());
      }
    }
  }

  if (Options.is_set_by_user("tso_emulation_info")) {
    auto TSOFacts = GetTSOEmulationFacts();
    const char* GPRMemoryTSOEmulation {};
    const char* MemcpyMemoryTSOEmulation {};
    const char* VectorMemoryTSOEmulation {};
    const char* UnalignedMemoryLoadStoreTSOEmulation {};

    if (TSOFacts.HardwareTSO) {
      GPRMemoryTSOEmulation = "\e[32mHardware TSO\e[0m";
    } else if (TSOFacts.LRCPC3) {
      GPRMemoryTSOEmulation = "\e[32mLRCPC3\e[0m";
    } else if (TSOFacts.LRCPC2) {
      GPRMemoryTSOEmulation = "\e[32mLRCPC2\e[0m";
    } else if (TSOFacts.LRCPC1) {
      GPRMemoryTSOEmulation = "\e[32mLRCPC\e[0m";
    } else {
      GPRMemoryTSOEmulation = "\e[31mAtomics\e[0m";
    }

    // Memcpy only uses Hardware TSO, LRCPC, and Atomics.
    if (TSOFacts.HardwareTSO) {
      MemcpyMemoryTSOEmulation = "\e[32mHardware TSO\e[0m";
    } else if (TSOFacts.LRCPC1) {
      MemcpyMemoryTSOEmulation = "\e[32mLRCPC\e[0m";
    } else {
      MemcpyMemoryTSOEmulation = "\e[31mAtomics\e[0m";
    }

    if (TSOFacts.HardwareTSO) {
      VectorMemoryTSOEmulation = "\e[32mHardware TSO\e[0m";
    } else if (TSOFacts.LRCPC3) {
      VectorMemoryTSOEmulation = "\e[32mLRCPC3\e[0m";
    } else {
      VectorMemoryTSOEmulation = "\e[31mHalf-Barriers\e[0m";
    }

    if (TSOFacts.HardwareTSO) {
      UnalignedMemoryLoadStoreTSOEmulation = "\e[32mHardware TSO\e[0m";
    } else {
      UnalignedMemoryLoadStoreTSOEmulation = "\e[31mHalf-Barriers\e[0m";
    }

    fprintf(stdout, "Hardware Features:\n");
    fprintf(stdout, "\tMemory atomics emulation method:      %s\n", TSOFacts.LSE ? "\e[32mLSE\e[0m" : "\e[31mLL/SC\e[0m");
    fprintf(stdout, "\tUnaligned atomic memory granularity:  %s\n", TSOFacts.LSE2 ? "\e[32m16-byte\e[0m" : "\e[31mNatural alignment\e[0m");
    ///< TODO: Once TME is supported by hardware this can change.
    fprintf(stdout, "\tUnaligned memory loadstore emulation: %s\n", UnalignedMemoryLoadStoreTSOEmulation);
    fprintf(stdout, "\t16-Byte split-lock atomic emulation:  %s\n", TSOFacts.LSE ? "\e[31mTearing CAS loops\e[0m" : "\e[31mTearing LL/SC loops\e[0m");
    fprintf(stdout, "\t64-Byte split-lock atomic emulation:  %s\n", TSOFacts.LSE ? "\e[31mTearing CAS loops\e[0m" : "\e[31mTearing LL/SC loops\e[0m");
    fprintf(stdout, "\tGPR memory model emulation:           %s\n", GPRMemoryTSOEmulation);
    fprintf(stdout, "\tMemcpy memory model emulation:        %s\n", MemcpyMemoryTSOEmulation);
    fprintf(stdout, "\tVector memory model emulation:        %s\n", VectorMemoryTSOEmulation);

    FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
    FEX_CONFIG_OPT(MemcpySetTSOEnabled, MEMCPYSETTSOENABLED);
    FEX_CONFIG_OPT(VectorTSOEnabled, VECTORTSOENABLED);
    FEX_CONFIG_OPT(HalfBarrierTSOEnabled, HALFBARRIERTSOENABLED);
    FEX_CONFIG_OPT(StrictInProcessSplitLocks, STRICTINPROCESSSPLITLOCKS);
    fprintf(stderr, "Strict: %d\n", StrictInProcessSplitLocks());

    fprintf(stdout, "\nConfiguration:\n");
    fprintf(stdout, "\tTSO Emulation:                        %s\n", TSOEnabled() ? "Enabled" : "Disabled");
    fprintf(stdout, "\tMemcpy TSO Emulation:                 %s\n", TSOEnabled() && MemcpySetTSOEnabled() ? "Enabled" : "Disabled");
    fprintf(stdout, "\tVector TSO Emulation:                 %s\n", TSOEnabled() && VectorTSOEnabled() ? "Enabled" : "Disabled");
    fprintf(stdout, "\tHalf-barrier unaligned TSO emulation: %s\n", TSOEnabled() && HalfBarrierTSOEnabled() ? "Enabled" : "Disabled");
    fprintf(stdout, "\t16-Byte strict split-lock emulation:  %s\n", StrictInProcessSplitLocks() ? "In-process mutex" : "Tearing");
    fprintf(stdout, "\t64-Byte strict split-lock emulation:  %s\n", StrictInProcessSplitLocks() ? "In-process mutex" : "Tearing");
  }

#ifdef ARCHITECTURE_arm64
  if (Options.is_set_by_user("identification_reg_info")) {
    auto Features = FEX::GetCPUFeaturesFromIDRegisters();
    fextl::string features {};
    features += fmt::format("isar0=0x{:x},", Features.ISAR0.Get());
    features += fmt::format("isar1=0x{:x},", Features.ISAR1.Get());
    features += fmt::format("isar2=0x{:x},", Features.ISAR2.Get());
    features += fmt::format("pfr0=0x{:x},", Features.PFR0.Get());
    features += fmt::format("pfr1=0x{:x},", Features.PFR1.Get());
    features += fmt::format("midr=0x{:x},", Features.MIDR.Get());
    features += fmt::format("mmfr0=0x{:x},", Features.MMFR0.Get());
    features += fmt::format("mmfr1=0x{:x},", Features.MMFR1.Get());
    features += fmt::format("mmfr2=0x{:x},", Features.MMFR2.Get());
    features += fmt::format("zfr0=0x{:x},", Features.ZFR0.Get());
    features += fmt::format("dczid=0x{:x},", Features.DCZID.Get());
    features += fmt::format("svevl=0x{:x}", Features.SVEVL.Get());
    fprintf(stderr, "Features: '%s'\n", features.c_str());
  }
#endif

  return 0;
}


================================================
FILE: Source/Tools/FEXInterpreter/AOT/AOTGenerator.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/CPUInfo.h"

#include "ELFCodeLoader.h"
#include "Linux/Utils/ELFContainer.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/queue.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <cstddef>
#include <sys/resource.h>
#include <sys/sysinfo.h>
#include <thread>

namespace FEX::AOT {
void AOTGenSection(FEXCore::Context::Context* CTX, ELFCodeLoader::LoadedSection& Section) {
  // Make sure this section is executable and big enough
  if (!Section.Executable || Section.Size < 16) {
    return;
  }

  fextl::set<uintptr_t> InitialBranchTargets;

  // Load the ELF again with symbol parsing this time
  ELFLoader::ELFContainer container {Section.Filename, "", true};

  // Add symbols to the branch targets list
  container.AddSymbols([&](ELFLoader::ELFSymbol* sym) {
    auto Destination = sym->Address + Section.ElfBase;

    if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) {
      return; // outside of current section, unlikely to be real code
    }

    InitialBranchTargets.insert(Destination);
  });

  LogMan::Msg::IFmt("Symbol seed: {}", InitialBranchTargets.size());

  // Add unwind entries to the branch target list
  container.AddUnwindEntries([&](uintptr_t Entry) {
    auto Destination = Entry + Section.ElfBase;

    if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) {
      return; // outside of current section, unlikely to be real code
    }

    InitialBranchTargets.insert(Destination);
  });

  LogMan::Msg::IFmt("Symbol + Unwind seed: {}", InitialBranchTargets.size());

  // Scan the executable section and try to find function entries
  for (size_t Offset = 0; Offset < (Section.Size - 16); Offset++) {
    uint8_t* pCode = (uint8_t*)(Section.Base + Offset);

    // Possible CALL <disp32>
    if (*pCode == 0xE8) {
      uintptr_t Destination = (int)(pCode[1] | (pCode[2] << 8) | (pCode[3] << 16) | (pCode[4] << 24));
      Destination += (uintptr_t)pCode + 5;

      auto DestinationPtr = (uint8_t*)Destination;

      if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) {
        continue; // outside of current section, unlikely to be real code
      }

      if (DestinationPtr[0] == 0 && DestinationPtr[1] == 0) {
        continue; // add al, [rax], unlikely to be real code
      }

      InitialBranchTargets.insert(Destination);
    }

    // endbr64 marker marks an indirect branch destination
    if (pCode[0] == 0xf3 && pCode[1] == 0x0f && pCode[2] == 0x1e && pCode[3] == 0xfa) {
      InitialBranchTargets.insert((uintptr_t)pCode);
    }
  }

  uint64_t SectionMaxAddress = Section.Base + Section.Size;

  fextl::set<uint64_t> Compiled;
  std::atomic<int> counter = 0;

  fextl::queue<uint64_t> BranchTargets;

  // Setup BranchTargets, Compiled sets from InitiaBranchTargets

  Compiled.insert(InitialBranchTargets.begin(), InitialBranchTargets.end());
  for (auto BranchTarget : InitialBranchTargets) {
    BranchTargets.push(BranchTarget);
  }

  InitialBranchTargets.clear();


  std::mutex QueueMutex;
  fextl::vector<std::thread> ThreadPool;

  // This code is tricky to refactor so it doesn't allocate memory through glibc.
  FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc;
  const auto Cores = FEX::CPUInfo::CalculateNumberOfCPUs();
  for (int i = 0; i < Cores; i++) {
    std::thread thd([&BranchTargets, CTX, &counter, &Compiled, &Section, &QueueMutex, SectionMaxAddress]() {
      // Set the priority of the thread so it doesn't overwhelm the system when running in the background
      setpriority(PRIO_PROCESS, FHU::Syscalls::gettid(), 19);

      // Setup thread - Each compilation thread uses its own backing FEX thread
      auto Thread = CTX->CreateThread(0, 0);
      fextl::set<uint64_t> ExternalBranchesLocal;
      CTX->ConfigureAOTGen(Thread, &ExternalBranchesLocal, SectionMaxAddress);

      for (;;) {
        uint64_t BranchTarget;

        // Get a entrypoint to process from the queue
        QueueMutex.lock();
        if (BranchTargets.empty()) {
          QueueMutex.unlock();
          break; // no entrypoint to process - exit
        }

        BranchTarget = BranchTargets.front();
        BranchTargets.pop();
        QueueMutex.unlock();

        // Compile entrypoint
        counter++;
        CTX->CompileRIP(Thread, BranchTarget);

        // Are there more branches?
        if (ExternalBranchesLocal.size() > 0) {
          // Add them to the "to process" list
          QueueMutex.lock();
          for (auto Destination : ExternalBranchesLocal) {
            if (!(Destination >= Section.Base && Destination <= (Section.Base + Section.Size))) {
              continue;
            }
            if (Compiled.contains(Destination)) {
              continue;
            }
            Compiled.insert(Destination);
            BranchTargets.push(Destination);
          }
          QueueMutex.unlock();
          ExternalBranchesLocal.clear();
        }
      }

      // All entryproints processed, cleanup this thread
      CTX->DestroyThread(Thread);
      // This thread is now getting abandoned. Disable glibc allocator checking so glibc can safely cleanup its internal allocations.
      FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable();
    });

    // Add to the thread pool
    ThreadPool.push_back(std::move(thd));
  }

  // Make sure all threads are finished
  for (auto& Thread : ThreadPool) {
    Thread.join();
  }

  ThreadPool.clear();

  LogMan::Msg::IFmt("\nAll Done: {}", counter.load());
}
} // namespace FEX::AOT


================================================
FILE: Source/Tools/FEXInterpreter/AOT/AOTGenerator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "ELFCodeLoader.h"

namespace FEX::AOT {
void AOTGenSection(FEXCore::Context::Context* CTX, ELFCodeLoader::LoadedSection& Section);
}


================================================
FILE: Source/Tools/FEXInterpreter/CMakeLists.txt
================================================
list(APPEND LIBS FEXCore Common JemallocLibs LinuxEmulation
  CommonTools ${PTHREAD_LIB} fmt::fmt)

set(DEFINES)
if (ENABLE_VIXL_SIMULATOR)
  list(APPEND DEFINES -DVIXL_SIMULATOR=1)
endif()

add_executable(FEX
  FEXInterpreter.cpp
  AOT/AOTGenerator.cpp)

target_compile_definitions(FEX PRIVATE ${DEFINES})

# Enable FEX APIs to be used by targets that use target_link_libraries on FEX
set_target_properties(FEX PROPERTIES
  ENABLE_EXPORTS 1
  C_VISIBILITY_PRESET hidden
  CXX_VISIBILITY_PRESET hidden
  VISIBILITY_INLINES_HIDDEN TRUE)

target_include_directories(FEX PRIVATE ${CMAKE_BINARY_DIR}/generated)

target_link_libraries(FEX PRIVATE ${LIBS})

target_compile_options(FEX PRIVATE ${FEX_TUNE_COMPILE_FLAGS})

LinkerGC(FEX)

install(TARGETS FEX RUNTIME
    DESTINATION bin
    COMPONENT Runtime)

# Create a copy of FEX with legacy names until phased out.
install(PROGRAMS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/FEX
  RENAME FEXInterpreter
  DESTINATION bin
  COMPONENT LegacyRuntime)

if (ARCHITECTURE_arm64)
  if (NOT USE_LEGACY_BINFMTMISC)
    # Just restart the systemd service
    add_custom_target(binfmt_misc
      echo "Restarting systemd service now."
      COMMAND "service" "systemd-binfmt" "restart")
  else()
    # Check for conflicting binfmt before installing
    set(CONFLICTING_BINFMTS_32
      ${CMAKE_INSTALL_PREFIX}/share/binfmts/qemu-i386
      ${CMAKE_INSTALL_PREFIX}/share/binfmts/box86)
    set(CONFLICTING_BINFMTS_64
      ${CMAKE_INSTALL_PREFIX}/share/binfmts/qemu-x86_64
      ${CMAKE_INSTALL_PREFIX}/share/binfmts/box64)

    find_program(UPDATE_BINFMTS_PROGRAM update-binfmts)
    if (UPDATE_BINFMTS_PROGRAM)
      add_custom_target(binfmt_misc
        echo "Attempting to install FEX binfmt_misc now."
        COMMAND "${CMAKE_SOURCE_DIR}/Scripts/CheckBinfmtNotInstall.sh" ${CONFLICTING_BINFMTS_32}
        COMMAND "${CMAKE_SOURCE_DIR}/Scripts/CheckBinfmtNotInstall.sh" ${CONFLICTING_BINFMTS_64}
        COMMAND "update-binfmts" "--importdir=${CMAKE_INSTALL_PREFIX}/share/binfmts/" "--import" "FEX-x86"
        COMMAND "update-binfmts" "--importdir=${CMAKE_INSTALL_PREFIX}/share/binfmts/" "--import" "FEX-x86_64"
        COMMAND ${CMAKE_COMMAND} -E
        echo "FEX binfmt_misc installed")

      if(TARGET uninstall)
        add_custom_target(uninstall_binfmt_misc
          COMMAND update-binfmts --unimport FEX-x86 || (exit 0)
          COMMAND update-binfmts --unimport FEX-x86_64 || (exit 0))

        add_dependencies(uninstall uninstall_binfmt_misc)
      endif()
    else()
      # In the case of update-binfmts not being available (Arch for example) then we need to install manually
      add_custom_target(binfmt_misc
        COMMAND ${CMAKE_COMMAND} -E
          echo "Attempting to remove FEX misc prior to install. Ignore permission denied"
        COMMAND ${CMAKE_COMMAND} -E
          echo -1 > /proc/sys/fs/binfmt_misc/FEX-x86 || (exit 0)
        COMMAND ${CMAKE_COMMAND} -E
          echo -1 > /proc/sys/fs/binfmt_misc/FEX-x86_64 || (exit 0)
        COMMAND ${CMAKE_COMMAND} -E
          echo "Attempting to install FEX misc now."
        COMMAND ${CMAKE_COMMAND} -E
          echo
          ':FEX-x86:M:0:\\x7fELF\\x01\\x01\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x03\\x00:\\xff\\xff\\xff\\xff\\xff\\xfe\\xfe\\x00\\x00\\x00\\x00\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff\\xff:${CMAKE_INSTALL_PREFIX}/bin/FEX:POCF' > /proc/sys/fs/binfmt_misc/register
        COMMAND ${CMAKE_COMMAND} -E
          echo
          ':FEX-x86_64:M:0:\\x7fELF\\x02\\x01\\x01\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x02\\x00\\x3e\\x00:\\xff\\xff\\xff\\xff\\xff\\xfe\\xfe\\x00\\x00\\x00\\x00\\xff\\xff\\xff\\xff\\xff\\xfe\\xff\\xff\\xff:${CMAKE_INSTALL_PREFIX}/bin/FEX:POCF' > /proc/sys/fs/binfmt_misc/register
        COMMAND ${CMAKE_COMMAND} -E
          echo "binfmt_misc FEX installed")

      if(TARGET uninstall)
        add_custom_target(uninstall_binfmt_misc
          COMMAND ${CMAKE_COMMAND} -E
            echo -1 > /proc/sys/fs/binfmt_misc/FEX-x86 || (exit 0)
          COMMAND ${CMAKE_COMMAND} -E
            echo -1 > /proc/sys/fs/binfmt_misc/FEX-x86_64 || (exit 0))

        add_dependencies(uninstall uninstall_binfmt_misc)
      endif()
    endif()
  endif()
endif()


================================================
FILE: Source/Tools/FEXInterpreter/ELFCodeLoader.h
================================================
// SPDX-License-Identifier: MIT

#pragma once

#include "ArchHelpers/UContext.h"
#include "CodeLoader.h"
#include "Common/FDUtils.h"
#include "FEXCore/Utils/Allocator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "VDSO_Emulation.h"
#include "Linux/Utils/ELFParser.h"

#include <cstring>

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Syscalls.h>
#include <FEXHeaderUtils/SymlinkChecks.h>

#include <elf.h>
#include <fcntl.h>
#include <fmt/format.h>
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/prctl.h>
#include <sys/random.h>
#include <linux/prctl.h>

#define PAGE_START(x) ((x) & ~(uintptr_t)(4095))
#define PAGE_OFFSET(x) ((x) & 4095)
#define PAGE_ALIGN(x) (((x) + 4095) & ~(uintptr_t)(4095))

class ELFCodeLoader final : public FEX::CodeLoader {
  ELFParser MainElf {};
  ELFParser InterpElf {};

  bool ElfValid {false};
  bool ExecutableStack {false};
  bool ExecuteAll {false};
  bool HasStackHeader {false};
  uintptr_t MainElfBase {};
  uintptr_t InterpeterElfBase {};
  uintptr_t MainElfEntrypoint {};
  uintptr_t Entrypoint {};
  uintptr_t BrkStart {};
  uintptr_t StackPointer {};

  // This calculates the map size for ET_DYN type ELF files.
  // Can't be used for ET_EXEC ELF files because they can have large virtual mapping holes.
  static size_t CalculateDYNELFSize(const fextl::vector<Elf64_Phdr>& headers) {
    bool had_pt_load = false;
    size_t min_map_address = ~0ULL;
    size_t max_map_address = 0;
    for (const auto& it : headers) {
      if (it.p_type != PT_LOAD) {
        // Skip everything but PT_LOAD.
        continue;
      }

      had_pt_load = true;
      min_map_address = std::min(min_map_address, PAGE_START(it.p_vaddr));
      max_map_address = std::max(max_map_address, it.p_vaddr + it.p_memsz);
    }

    if (!had_pt_load) {
      // No load segments, so need to be safe and return zero.
      return 0;
    }

    return FEXCore::AlignUp(max_map_address - min_map_address, FEXCore::Utils::FEX_PAGE_SIZE);
  }

  bool MapFile(const ELFParser& file, uintptr_t Base, const Elf64_Phdr& Header, int prot, int flags,
               FEX::HLE::SyscallMmapInterface* const Handler, FEXCore::Core::InternalThreadState* Thread) {

    auto addr = Base + PAGE_START(Header.p_vaddr);
    auto size = Header.p_filesz + PAGE_OFFSET(Header.p_vaddr);
    auto off = Header.p_offset - PAGE_OFFSET(Header.p_vaddr);

    size = PAGE_ALIGN(size);
    if (size == 0) {
      // PT_LOAD section without a file size
      // Will need to have a memory size that is not zero instead
      return true;
    }

    void* rv = Handler->GuestMmap(Thread, (void*)addr, size, prot, flags, file.fd, off);

    if (FEX::HLE::HasSyscallError(rv)) {
      // uhoh, something went wrong
      LogMan::Msg::EFmt("MapFile: Some elf mapping failed, {}, fd: {}\n", errno, file.fd);
      return false;
    } else {
      char Tmp[PATH_MAX];
      auto PathLength = FEX::get_fdpath(file.fd, Tmp);
      if (PathLength != -1) {
        Sections.push_back({Base, (uintptr_t)rv, size, (off_t)off, fextl::string(Tmp, PathLength), (prot & PROT_EXEC) != 0});
      }

      return true;
    }
  }

  int MapFlags(const Elf64_Phdr& Header) {
    int rv = 0;

    if (Header.p_flags & PF_R) {
      rv |= PROT_READ;
    }

    if (Header.p_flags & PF_W) {
      rv |= PROT_WRITE;
    }

    if (Header.p_flags & PF_X) {
      rv |= PROT_EXEC;
    }

    return rv;
  }

  std::optional<uintptr_t> LoadElfFile(ELFParser& Elf, uintptr_t* BrkBase, FEX::HLE::SyscallMmapInterface* const Handler,
                                       FEXCore::Core::InternalThreadState* Thread, uint64_t LoadHint = 0) {

    uintptr_t LoadBase = 0;
    uintptr_t BrkLoadBase = 0;
    const bool DynELF = Elf.ehdr.e_type == ET_DYN;
    const bool NeedsLateBRKMap = BrkBase && !DynELF;

    if (DynELF) {
      // Allocate a base address plus BRK padding.
      auto TotalSize = CalculateDYNELFSize(Elf.phdrs) + (BrkBase ? BRK_SIZE : 0);
      LoadBase =
        (uintptr_t)Handler->GuestMmap(Thread, reinterpret_cast<void*>(LoadHint), TotalSize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
      if (FEX::HLE::HasSyscallError(LoadBase)) {
        return {};
      }

      // fprintf(stderr, "elf %d: %lx-%lx\n", Elf.fd, LoadBase, LoadBase + TotalSize);
      if (BrkBase) {
        BrkLoadBase = LoadBase + (TotalSize - BRK_SIZE);
      }
    }

    for (const auto& Header : Elf.phdrs) {
      if (Header.p_type != PT_LOAD) {
        continue;
      }

      int MapProt = MapFlags(Header);
      int MapType = MAP_PRIVATE | MAP_DENYWRITE | MAP_FIXED;

      if (!MapFile(Elf, LoadBase, Header, MapProt, MapType, Handler, Thread)) {
        return {};
      }

      if (Header.p_memsz > Header.p_filesz) {
        // clear bss
        auto BSSStart = LoadBase + Header.p_vaddr + Header.p_filesz;
        auto BSSPageStart = PAGE_ALIGN(BSSStart);
        auto BSSPageEnd = PAGE_ALIGN(LoadBase + Header.p_vaddr + Header.p_memsz);

        // Only clear padding bytes if the section is writable
        if (Header.p_flags & PF_W) {
          memset((void*)BSSStart, 0, BSSPageStart - BSSStart);
        }

        if (BSSPageStart != BSSPageEnd) {
          auto bss = Handler->GuestMmap(Thread, (void*)BSSPageStart, BSSPageEnd - BSSPageStart, MapProt, MapType | MAP_ANONYMOUS, -1, 0);
          if (FEX::HLE::HasSyscallError(bss)) {
            LogMan::Msg::EFmt("Failed to allocate BSS @ {}, {}\n", fmt::ptr(bss), errno);
            return {};
          }
        }
      }

      if (NeedsLateBRKMap) {
        // Keep track of highest address for BRK in the case of non-dynamic ELF files.
        auto memend = LoadBase + Header.p_vaddr + Header.p_memsz;

        // track elf_brk
        if (memend > BrkLoadBase) {
          BrkLoadBase = FEXCore::AlignUp(memend, FEXCore::Utils::FEX_PAGE_SIZE);
        }
      }
    }

    if (NeedsLateBRKMap) {
      // Map the BRK after ELF if possible.
      BrkLoadBase =
        (uintptr_t)Handler->GuestMmap(Thread, reinterpret_cast<void*>(BrkLoadBase), BRK_SIZE, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
      if (FEX::HLE::HasSyscallError(BrkLoadBase)) {
        // This isn't a catastrophic failure. This just means the BRK conflicted with the ELF.
        BrkLoadBase = 0;
      }
    }

    if (BrkBase) {
      *BrkBase = BrkLoadBase;
    }

    return LoadBase;
  }

  static bool GetRandom(void* Data, size_t DataSize) {
    ssize_t Result {};
    do {
      // This is guaranteed to not be interrupted by a signal,
      // since fewer than 256 bytes of RNG data are requested
      Result = getrandom(Data, DataSize, 0);
    } while (Result != -1 && Result != DataSize);

    return Result != -1;
  }

public:

  static fextl::string ResolveRootfsFile(const fextl::string& File, fextl::string RootFS) {
    // If the path is relative then just run that
    if (File[0] != '/') {
      return File;
    }

    fextl::string RootFSLink = RootFS + File;

    char Filename[PATH_MAX];
    while (FHU::Symlinks::IsSymlink(RootFSLink.c_str())) {
      // Do some special handling if the RootFS's linker is a symlink
      // Ubuntu's rootFS by default provides an absolute location symlink to the linker
      // Resolve this around back to the rootfs
      auto SymlinkPath = FHU::Symlinks::ResolveSymlink(RootFSLink.c_str(), Filename);
      if (SymlinkPath.starts_with('/')) {
        RootFSLink = RootFS;
        RootFSLink += SymlinkPath;
      } else {
        break;
      }
    }

    return RootFSLink;
  }

  struct LoadedSection {
    uintptr_t ElfBase;
    uintptr_t Base;
    size_t Size;
    off_t Offs;
    fextl::string Filename;
    bool Executable;
  };

  fextl::vector<LoadedSection> Sections;

  ELFCodeLoader(const fextl::string& Filename, int ProgramFDFromEnv, const fextl::string& RootFS, const fextl::vector<fextl::string>& args,
                const fextl::vector<fextl::string>& ParsedArgs, char** const envp = nullptr,
                FEXCore::Config::Value<FEXCore::Config::StringArrayType>* AdditionalEnvp = nullptr, bool SkipInterpreter = false) {
    ApplicationArgs = args;

    bool LoadedWithFD = false;
    int FD = getauxval(AT_EXECFD);

    if (ProgramFDFromEnv != -1) {
      // If we passed the execve FD to us then use that.
      FD = ProgramFDFromEnv;
    }

    // If we are provided an EXECFD then attempt to execute that first
    // This happens in the case of binfmt_misc usage
    if (FD != 0) {
      if (!MainElf.ReadElf(FD)) {
        return;
      }
      LoadedWithFD = true;
    } else {
      if (!MainElf.ReadElf(ResolveRootfsFile(Filename, RootFS)) && !MainElf.ReadElf(Filename)) {
        return;
      }
    }

    // If we have loaded with EXECFD then we have binfmt_misc preserve argv[0] also set
    // This adds an additional argument to our argument list that we need to ignore
    // argv[0] = FEX
    // argv[1] = <Path to binary>
    // argv[2] = <original user typed path to binary>
    // If our kernel if v5.12 or higher then
    // We can check if this exists by checking auxv[AT_FLAGS] for AT_FLAGS_PRESERVE_ARGV0
    // Else we need to make an assumption that if we were loaded with FD that we have preserve enabled

    uint64_t AtFlags = getauxval(AT_FLAGS);
#ifndef AT_FLAGS_PRESERVE_ARGV0
#define AT_FLAGS_PRESERVE_ARGV0 1
#endif
    uint32_t HostKernel = FEX::HLE::SyscallHandler::CalculateHostKernelVersion();
    if ((HostKernel >= FEX::HLE::SyscallHandler::KernelVersion(5, 12, 0) && (AtFlags & AT_FLAGS_PRESERVE_ARGV0)) || LoadedWithFD) {

      // Erase the initial argument from the list in this case
      ApplicationArgs.erase(ApplicationArgs.begin());
    }

    // Append any additional arguments from config
    const auto& AdditionalArgs = AdditionalArguments.All();
    ApplicationArgs.insert(ApplicationArgs.end(), AdditionalArgs.begin(), AdditionalArgs.end());

    if (!MainElf.InterpreterElf.empty() && !SkipInterpreter) {
      if (!InterpElf.ReadElf(ResolveRootfsFile(MainElf.InterpreterElf, RootFS)) && !InterpElf.ReadElf(MainElf.InterpreterElf)) {
        return;
      }

      if (!InterpElf.InterpreterElf.empty()) {
        return;
      }

      if (InterpElf.type != MainElf.type) {
        return;
      }
    }

    ElfValid = true;

    if (envp) {
      // If we had envp passed in then make sure to set it up on the guest
      for (size_t i = 0;; ++i) {
        if (envp[i] == nullptr) {
          break;
        }
        EnvironmentVariables.emplace_back(envp[i]);
      }
    }

    if (AdditionalEnvp) {
      const auto& EnvpList = AdditionalEnvp->All();
      EnvironmentVariables.insert(EnvironmentVariables.end(), EnvpList.begin(), EnvpList.end());
    }

    if (InjectLibSegFault()) {
      EnvironmentVariables.emplace_back("LD_PRELOAD=libSegFault.so");
    }

    // Calculate argument and envp backing sizes
    for (const auto& Arg : ApplicationArgs) {
      ArgumentBackingSize += Arg.size() + 1;
    }
    for (const auto& EnvVar : EnvironmentVariables) {
      EnvironmentBackingSize += EnvVar.size() + 1;
    }

    for (const auto& Arg : ParsedArgs) {
      LoaderArgs.emplace_back(Arg.c_str());
    }
  }

  void FreeSections() {
    Sections.clear();
  }

  uint64_t StackSize() const override {
    return STACK_SIZE;
  }
  uint64_t GetStackPointer() const override {
    return StackPointer;
  }
  uint64_t DefaultRIP() const override {
    return Entrypoint;
  }

  struct auxv32_t {
    uint32_t key;
    uint32_t val;
  };

  struct auxv_t {
    uint64_t key;
    uint64_t val;
  };

  int GetMainElfFD() const {
    return MainElf.fd;
  }

  std::optional<uintptr_t> LoadMainElfFile(uintptr_t* BrkBase, FEX::HLE::SyscallMmapInterface* const Handler,
                                           FEXCore::Core::InternalThreadState* Thread, uint64_t LoadHint = 0) {
    return LoadElfFile(MainElf, BrkBase, Handler, Thread, LoadHint);
  }

  bool MapMemory(FEX::HLE::SyscallMmapInterface* const Handler, FEXCore::Core::InternalThreadState* Thread) {
    for (const auto& Header : MainElf.phdrs) {
      if (Header.p_type == PT_GNU_STACK) {
        HasStackHeader = true;
        if (Header.p_flags & PF_X) {
          ExecutableStack = true;
        }
      }

      // We ignore LOPROC..HIPROC here, kernel has a platform specific hook about it
      // Both for the main and the interpreter elf
    }

    if (!HasStackHeader && !Is64BitMode()) {
      // 32-bit behavior
      ExecutableStack = true;
      ExecuteAll = true;
    }

    // Set the process personality here
    // Also, what about ADDR_LIMIT_3GB & co ?
    uint32_t Personality = personality(~0ULL);
    Personality |= ExecuteAll ? READ_IMPLIES_EXEC : 0;
    if (-1 == personality(Personality)) {
      LogMan::Msg::EFmt("Setting personality failed");
      return false;
    }

    if (Thread) {
      // Update the thread persona.
      auto ThreadObject = static_cast<FEX::HLE::ThreadStateObject*>(Thread->FrontendPtr);
      ThreadObject->persona = Personality;
    }

    // What about ASLR and such ?
    // ADDR_LIMIT_3GB STACK -> 0xc0000000 else -> 0xFFFFe000

    // map stack here, so that nothing gets mapped there
    // This works with both 64-bit and 32-bit. The mapper will only give us a function in the correct region
    //
    // MAP_GROWSDOWN is required here. The default stack pointer allocated by the kernel is mapped with it.
    // Some libraries (like libfmod) will have a PT_GNU_STACK with executable stack bit set
    // On dlopen glibc will check its current stack allocation permission bits (using internal expectations of allocation, not
    // /proc/self/maps) If stack hasn't been allocated as executable then it will proceed to mprotect the range with the executable bit set
    // Then it will mprotect the base stack page with `PROT_READ|PROT_WRITE|PROT_EXEC|PROT_GROWSDOWN`
    // If the original stack memory region wasn't allocated with MAP_GROWSDOWN then the mprotect with PROT_GROWSDOWN will fail with EINVAL
    //
    // This is still technically a memory leak if the stack grows, but since the primary thread's stack only gets destroyed on process
    // close, this is fine.

    // Stacks need to be allocated at the hint location just like on a real x86 system.
    // These are 128MB regions on both x86-64 and x86.
    //
    // These are required to be in the correct location taking up the appropriate 128MB of space, otherwise the wine preloader crashes FEX.
    // This is due to the wine-preloader hardcoding addresses [0x7FFFFE000000 - 0x7FFFFFFF0000) as a top-down
    // allocation region. They use mmap with MAP_FIXED, ignoring any previously mapped area at that location and overwriting it.
    // Wine-preloader is expecting to allocate 32MB out of the total 128MB stack space in this case. Leaving 96MB for the application.
    //
    // If FEX doesn't allocate the stack in this region (nullptr mmap hint) then later allocations that FEX does will /eventually/
    // end up inside of this address space that wine allocates. This usually ends up being a JIT CodeBuffer, which zeroes the memory and
    // faults with a SIGILL.
    //
    // On the upside, this more accurately emulates how the kernel allocates stack space for the application when hinting at the location.
    //
    void* StackPointerBase {};
    auto VASize = FEXCore::Allocator::DetermineVASize();
    uint64_t StackHint {};
    if (Is64BitMode()) {
      if (VASize > 47) {
        // If VA size is at least as large as minimum x86 specification, then set to max.
        VASize = 47;
      }

      // Calculate the highest point the stack could go.
      StackHint = (1ULL << VASize) - FULL_STACK_SIZE;
    } else {
      // Needs to be under the 4GB VA space.
      StackHint = 0x1'0000'0000ULL - FULL_STACK_SIZE;
    }

    auto PageSize = sysconf(_SC_PAGESIZE);
    PageSize = PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE;

    do {
      // Allocate the base of the full 128MB stack range.
      StackPointerBase = Handler->GuestMmap(Thread, reinterpret_cast<void*>(StackHint), FULL_STACK_SIZE, PROT_NONE,
                                            MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_GROWSDOWN | MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
      // Scan-downward until we fit.
      StackHint -= PageSize;
    } while (FEX::HLE::HasSyscallError(StackPointerBase) && static_cast<int64_t>(StackHint) > 0);

    if (FEX::HLE::HasSyscallError(StackPointerBase)) {
      LogMan::Msg::EFmt("Allocating stack failed");
      return false;
    }

    // Allocate with permissions the 8MB of regular stack size.
    StackPointer = reinterpret_cast<uintptr_t>(Handler->GuestMmap(
      Thread, reinterpret_cast<void*>(reinterpret_cast<uint64_t>(StackPointerBase) + FULL_STACK_SIZE - StackSize()), StackSize(),
      PROT_READ | PROT_WRITE | (ExecutableStack ? PROT_EXEC : 0), MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK | MAP_GROWSDOWN, -1, 0));

    if (FEX::HLE::HasSyscallError(StackPointer)) {
      LogMan::Msg::EFmt("Allocating stack failed");
      return false;
    }

    // Load the interpreter ELF first.
    // This allows the top-down allocation of the kernel to put this at the top of the VA space.
    // This matches behaviour of native execution more closely.
    //
    // eg:
    // 555555554000-555555558000 r--p 00000000 103:0a 1311400                   /usr/bin/ls
    // 555555558000-55555556c000 r-xp 00004000 103:0a 1311400                   /usr/bin/ls
    // 55555556c000-555555574000 r--p 00018000 103:0a 1311400                   /usr/bin/ls
    // 555555575000-555555577000 rw-p 00020000 103:0a 1311400                   /usr/bin/ls
    // 555555577000-555555578000 rw-p 00000000 00:00 0                          [heap]
    // 7ffff7fbb000-7ffff7fbd000 rw-p 00000000 00:00 0
    // 7ffff7fbd000-7ffff7fc1000 r--p 00000000 00:00 0                          [vvar]
    // 7ffff7fc1000-7ffff7fc3000 r-xp 00000000 00:00 0                          [vdso]
    // 7ffff7fc3000-7ffff7fc5000 r--p 00000000 103:0a 1316948                   /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
    // 7ffff7fc5000-7ffff7fef000 r-xp 00002000 103:0a 1316948                   /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
    // 7ffff7fef000-7ffff7ffa000 r--p 0002c000 103:0a 1316948                   /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
    // 7ffff7ffb000-7ffff7fff000 rw-p 00037000 103:0a 1316948                   /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
    // 7ffffffdd000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]
    // ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]
    //
    // ARM:
    // 55ccaf8b1000-55ccaf8b5000 r--p 00000000 00:2a 4                          /tmp/.FEXMount178532-oiFrTF/usr/bin/ls
    // 55ccaf8b5000-55ccaf8c9000 r-xp 00004000 00:2a 4                          /tmp/.FEXMount178532-oiFrTF/usr/bin/ls
    // 55ccaf8c9000-55ccaf8d1000 r--p 00018000 00:2a 4                          /tmp/.FEXMount178532-oiFrTF/usr/bin/ls
    // 55ccaf8d1000-55ccaf8d2000 ---p 00000000 00:00 0
    // 55ccaf8d2000-55ccaf8d4000 rw-p 00020000 00:2a 4                          /tmp/.FEXMount178532-oiFrTF/usr/bin/ls
    // 55ccaf8d4000-55ccb00d5000 rw-p 00000000 00:00 0
    // <... Snip of misc allocations ...>
    // 7fffff6c2000-7fffff6c4000 r--p 00000000 00:2a 22 /tmp/.FEXMount178532-oiFrTF/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 7fffff6c4000-7fffff6ee000
    // r-xp 00002000 00:2a 22                         /tmp/.FEXMount178532-oiFrTF/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 7fffff6ee000-7fffff6f9000
    // r--p 0002c000 00:2a 22                         /tmp/.FEXMount178532-oiFrTF/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 7fffff6f9000-7fffff6fa000
    // ---p 00000000 00:00 0 7fffff6fa000-7fffff6fe000 rw-p 00037000 00:2a 22
    // /tmp/.FEXMount178532-oiFrTF/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2 7fffff7fe000-7fffffffe000 rw-p 00000000 00:00 0 7fffffffe000-7ffffffff000
    // r--p 00000000 08:82 7082611                    /usr/share/fex-emu/GuestThunks/libVDSO-guest.so 7ffffffff000-800000000000 rw-p
    // 00000000 00:00 0
    uint64_t ELFLoadHint = 0;

    if (!MainElf.InterpreterElf.empty()) {
      uint64_t InterpLoadBase = 0;
      if (auto elf = LoadElfFile(InterpElf, nullptr, Handler, Thread)) {
        InterpLoadBase = *elf;
      } else {
        LogMan::Msg::EFmt("Failed to load interpreter elf file");
        return false;
      }

      InterpeterElfBase = InterpLoadBase + InterpElf.phdrs.front().p_vaddr - InterpElf.phdrs.front().p_offset;
      Entrypoint = InterpLoadBase + InterpElf.ehdr.e_entry;

      // If the ELF has an interpreter and is dynamic then we should provide a address hint for loading.
      // The kernel calculates this `load_bias` by dividing the task size by three then multiplying by two.
      // It then also offsets by a random number for ASLR purposes.
      //
      // Random number that gets added to the base needs to be in the number of bits (multiplied by pages):
      // 64-bit: [28, 32] bits
      // 32-bit: [8, 16] bits
      // By default the /minimum/ number of bits is used here.
      constexpr uint64_t TASK_SIZE_64 = (1ULL << 47);
      constexpr uint64_t TASK_SIZE_32 = (1ULL << 32);
      if (Is64BitMode()) {
        // Ensure that if we are running on a 36-bit VA system, we don't try hinting that an ELF should
        // live way outside the VA space.
        uint64_t HostVASize = 1ULL << FEXCore::Allocator::DetermineVASize();
        ELFLoadHint = std::min(HostVASize, TASK_SIZE_64) / 3 * 2;
      } else {
        ELFLoadHint = TASK_SIZE_32 / 3 * 2;
      }
#define ASLR_LOAD
#ifdef ASLR_LOAD
      // Only enable ASLR randomization if the personality has it enabled.
      bool NoRandomize = (Personality & ADDR_NO_RANDOMIZE) == ADDR_NO_RANDOMIZE;

      if (!NoRandomize) {
        constexpr uint64_t ASLR_BITS_64 = 28;
        constexpr uint64_t ASLR_BITS_32 = 8;
        uint64_t ASLR_Offset {};
        if (!GetRandom(&ASLR_Offset, sizeof(ASLR_Offset))) {
          // getrandom failed for some reason.
          ASLR_Offset = 0;
          LogMan::Msg::EFmt("RNG failed. ASLR will not work.");
        }

        if (Is64BitMode()) {
          ASLR_Offset &= (1ULL << ASLR_BITS_64) - 1;
        } else {
          ASLR_Offset &= (1ULL << ASLR_BITS_32) - 1;
        }

        ASLR_Offset <<= FEXCore::Utils::FEX_PAGE_SHIFT;
        ELFLoadHint += ASLR_Offset;
      }
#endif
      // Align the mapping
      ELFLoadHint &= FEXCore::Utils::FEX_PAGE_MASK;
    }

    // load the main elf

    uintptr_t LoadBase = 0;

    if (auto elf = LoadElfFile(MainElf, &BrkStart, Handler, Thread, ELFLoadHint)) {
      LoadBase = *elf;
      if (MainElf.ehdr.e_type == ET_DYN) {
        BaseOffset = LoadBase;
      }
    } else {
      LogMan::Msg::EFmt("Failed to load elf file");
      return false;
    }

    if (BrkStart) {
      // BRK usually comes directly after where the ELF is loaded.
      // If a value was returned then we have mapped the entire `BRK_SIZE` and need to change protections.
      BrkStart =
        (uint64_t)Handler->GuestMmap(Thread, (void*)BrkStart, BRK_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
      if (FEX::HLE::HasSyscallError(BrkStart)) {
        LogMan::Msg::EFmt("Failed to allocate BRK @ {:x}, {}\n", BrkStart, errno);
        return false;
      }
    }

    MainElfBase = LoadBase + MainElf.phdrs.front().p_vaddr - MainElf.phdrs.front().p_offset;
    MainElfEntrypoint = LoadBase + MainElf.ehdr.e_entry;

    if (MainElf.InterpreterElf.empty()) {
      InterpeterElfBase = 0;
      Entrypoint = MainElfEntrypoint;
    }

    // All done

    // Setup AuxVars
    AuxVariables.emplace_back(auxv_t {11, getauxval(AT_UID)});            // AT_UID
    AuxVariables.emplace_back(auxv_t {12, getauxval(AT_EUID)});           // AT_EUID
    AuxVariables.emplace_back(auxv_t {13, getauxval(AT_GID)});            // AT_GID
    AuxVariables.emplace_back(auxv_t {14, getauxval(AT_EGID)});           // AT_EGID
    AuxVariables.emplace_back(auxv_t {17, getauxval(AT_CLKTCK)});         // AT_CLKTIK
    AuxVariables.emplace_back(auxv_t {6, FEXCore::Utils::FEX_PAGE_SIZE}); // AT_PAGESIZE
    AuxRandom = &AuxVariables.emplace_back(auxv_t {25, ~0ULL});           // AT_RANDOM
    AuxVariables.emplace_back(auxv_t {23, getauxval(AT_SECURE)});         // AT_SECURE
    AuxVariables.emplace_back(auxv_t {8, 0});                             // AT_FLAGS
    AuxVariables.emplace_back(auxv_t {5, MainElf.phdrs.size()});          // AT_PHNUM
    AuxVariables.emplace_back(auxv_t {16, HWCap});                        // AT_HWCAP
    AuxVariables.emplace_back(auxv_t {26, HWCap2});                       // AT_HWCAP2
    AuxVariables.emplace_back(auxv_t {51, CalculateSignalStackSize()});   // AT_MINSIGSTKSZ
    AuxPlatform = &AuxVariables.emplace_back(auxv_t {24, ~0ULL});         // AT_PLATFORM
    AuxExecFN = &AuxVariables.emplace_back(auxv_t {AT_EXECFN, ~0ULL});    // AT_EXECFN

    if (Is64BitMode()) {
      AuxVariables.emplace_back(auxv_t {4, 0x38}); // AT_PHENT
    } else {
      AuxVariables.emplace_back(auxv_t {4, 0x20}); // AT_PHENT

      auto VSyscallEntry = FEX::VDSO::GetVSyscallEntry(VDSOBase);
      if (!VSyscallEntry) [[unlikely]] {
        // If the VDSO thunk doesn't exist then we might not have a vsyscall entry.
        // Newer glibc requires vsyscall to exist now. So let's allocate a buffer and stick a vsyscall in to it.
        auto VSyscallPage =
          Handler->GuestMmap(Thread, nullptr, FEXCore::Utils::FEX_PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
        constexpr static uint8_t VSyscallCode[] = {
          0xcd, 0x80, // int 0x80
          0xc3,       // ret
        };
        memcpy(VSyscallPage, VSyscallCode, sizeof(VSyscallCode));
        mprotect(VSyscallPage, FEXCore::Utils::FEX_PAGE_SIZE, PROT_READ);
        VSyscallEntry = reinterpret_cast<uint64_t>(VSyscallPage);
      }

      AuxVariables.emplace_back(auxv_t {32, VSyscallEntry}); // AT_SYSINFO - Entry point to syscall
    }

    if (VDSOBase) {
      AuxVariables.emplace_back(auxv_t {33, reinterpret_cast<uint64_t>(VDSOBase)}); // AT_SYSINFO_EHDR - Address of the start of VDSO
    }

    AuxVariables.emplace_back(auxv_t {3, MainElfBase + MainElf.ehdr.e_phoff}); // Program header
    AuxVariables.emplace_back(auxv_t {7, InterpeterElfBase});                  // AT_BASE - Interpreter address
    AuxVariables.emplace_back(auxv_t {9, MainElfEntrypoint});                  // AT_ENTRY

    AuxVariables.emplace_back(auxv_t {0, 0}); // Null ender

    SetupStack();

    return true;
  }

  void CloseFDs() {
    MainElf.Closefd();
    InterpElf.Closefd();
  }

  // Helper for stack setup
  template<typename PointerType, typename AuxType, size_t PointerSize>
  static void SetupPointers(uintptr_t StackPointer, uint64_t AuxVOffset, uint64_t ArgumentOffset, uint64_t EnvpOffset,
                            const fextl::vector<fextl::string>& Args, const fextl::vector<fextl::string>& EnvironmentVariables,
                            const fextl::list<auxv_t>& AuxVariables, uint64_t* AuxTabBase, uint64_t* AuxTabSize) {
    // Pointer list offsets
    PointerType* ArgumentPointers = reinterpret_cast<PointerType*>(StackPointer + PointerSize);
    PointerType* PadPointers = reinterpret_cast<PointerType*>(StackPointer + PointerSize + Args.size() * PointerSize);
    PointerType* EnvpPointers = reinterpret_cast<PointerType*>(StackPointer + PointerSize + Args.size() * PointerSize + PointerSize);
    AuxType* AuxVPointers = reinterpret_cast<AuxType*>(StackPointer + AuxVOffset);

    // Arguments memory lives after everything else
    uint8_t* ArgumentBackingBase = reinterpret_cast<uint8_t*>(StackPointer + ArgumentOffset);
    uint8_t* EnvpBackingBase = reinterpret_cast<uint8_t*>(StackPointer + EnvpOffset);
    PointerType ArgumentBackingBaseGuest = StackPointer + ArgumentOffset;
    PointerType EnvpBackingBaseGuest = StackPointer + EnvpOffset;

    *reinterpret_cast<PointerType*>(StackPointer + 0) = Args.size();
    PadPointers[0] = 0;

    // If we don't have any, just make sure the first is nullptr
    EnvpPointers[0] = 0;

    uint64_t CurrentOffset = 0;
    for (size_t i = 0; i < Args.size(); ++i) {
      size_t ArgSize = Args[i].size();
      // Set the pointer to this argument
      ArgumentPointers[i] = ArgumentBackingBaseGuest + CurrentOffset;
      if (ArgSize > 0) {
        // Copy the string in to the final location
        memcpy(reinterpret_cast<void*>(ArgumentBackingBase + CurrentOffset), Args[i].data(), ArgSize);
      }

      // Set the null terminator for the string
      *reinterpret_cast<uint8_t*>(ArgumentBackingBase + CurrentOffset + ArgSize) = 0;

      CurrentOffset += ArgSize + 1;
    }

    CurrentOffset = 0;
    for (size_t i = 0; i < EnvironmentVariables.size(); ++i) {
      size_t EnvpSize = EnvironmentVariables[i].size();
      // Set the pointer to this argument
      EnvpPointers[i] = EnvpBackingBaseGuest + CurrentOffset;

      // Copy the string in to the final location
      if (EnvpSize) {
        memcpy(reinterpret_cast<void*>(EnvpBackingBase + CurrentOffset), EnvironmentVariables[i].data(), EnvpSize);
      }

      // Set the null terminator for the string
      *reinterpret_cast<uint8_t*>(EnvpBackingBase + CurrentOffset + EnvpSize) = 0;

      CurrentOffset += EnvpSize + 1;
    }

    // Last envp needs to be nullptr
    EnvpPointers[EnvironmentVariables.size()] = 0;

    for (size_t i = 0; const auto& Variable : AuxVariables) {
      AuxVPointers[i].key = Variable.key;
      AuxVPointers[i].val = Variable.val;
      ++i;
    }

    *AuxTabBase = reinterpret_cast<uint64_t>(AuxVPointers);
    *AuxTabSize = sizeof(AuxType) * AuxVariables.size();
  }

  // Get the current memory map from /proc/self/stat
  static bool GetCurrentMap(struct prctl_mm_map& map) {

    // /proc/self/stat has 52 fields of at most 20 digits each (UINT64_MAX).
    // 52*20 = 1040, so 2048 is a conservative upper bound
    char stat_buffer[2048];
    ssize_t bytes_read = FEXCore::FileLoading::LoadFileToBuffer("/proc/self/stat", stat_buffer);

    // Ensure we don't read past the end into garbage data
    stat_buffer[std::clamp(bytes_read, 0L, static_cast<ssize_t>(sizeof(stat_buffer)) - 1)] = '\0';

    // See man proc_pid_stat
    int items_read = sscanf(stat_buffer,
                            "%*d %*s %*c %*d %*d "      // 1 to 5
                            "%*d %*d %*d %*u %*u "      // 6 to 10
                            "%*u %*u %*u %*u %*u "      // 11 to 15
                            "%*d %*d %*d %*d %*d "      // 16 to 20
                            "%*d %*u %*u %*d %*u "      // 21 to 25
                            "%llu %llu %llu %*u %*u "   // 26 to 30
                            "%*u %*u %*u %*u %*u "      // 31 to 35
                            "%*u %*u %*d %*d %*u "      // 36 to 40
                            "%*u %*u %*u %*d %llu "     // 40 to 45
                            "%llu %llu %llu %llu %llu " // 46 to 50
                            "%llu",                     // 51
                            &map.start_code, &map.end_code, &map.start_stack, &map.start_data, &map.end_data, &map.start_brk,
                            &map.arg_start, &map.arg_end, &map.env_start, &map.env_end);

    if (items_read != 10) {
      return false;
    }

    map.brk = reinterpret_cast<uint64_t>(sbrk(0));

    // The kernel will leave these values unchanged, see implementation in sys.c
    map.auxv = NULL;
    map.auxv_size = 0;
    map.exe_fd = -1;

    return true;
  }

  // Point the OS to our new stack's argument data
  void RemapArgumentData(uintptr_t NewArgStart, uint64_t ArgSize) {
    struct prctl_mm_map map {};
    if (GetCurrentMap(map)) {
      map.arg_start = NewArgStart;
      map.arg_end = NewArgStart + ArgSize;

      int r = prctl(PR_SET_MM, PR_SET_MM_MAP, &map, sizeof(map), 0L);
      if (r != 0) {
        LogMan::Msg::EFmt("Failed to remap /proc/pid/cmdline data (prctl failed: result {}, errno {})", r, errno);
      }
    } else {
      LogMan::Msg::EFmt("Failed to remap /proc/pid/cmdline data (GetCurrentMap failed)");
    }
  }

  // Setups the stack initial data (argv, envp, auxv)
  void SetupStack() {
    StackPointer += StackSize();
    // Set up our initial CPU state
    uint64_t SizeOfPointer = Is64BitMode() ? 8 : 4;

    uint64_t TotalArgumentMemSize {};

    TotalArgumentMemSize += SizeOfPointer;                               // Argument counter size
    TotalArgumentMemSize += SizeOfPointer * ApplicationArgs.size();      // Pointers to strings
    TotalArgumentMemSize += SizeOfPointer;                               // Padding for something
    TotalArgumentMemSize += SizeOfPointer * EnvironmentVariables.size(); // Argument location for envp
    TotalArgumentMemSize += SizeOfPointer;                               // envp nullptr ender

    uint64_t AuxVOffset = TotalArgumentMemSize;
    if (SizeOfPointer == 8) {
      TotalArgumentMemSize += sizeof(auxv_t) * AuxVariables.size();
    } else {
      TotalArgumentMemSize += sizeof(auxv32_t) * AuxVariables.size();
    }

    ArgumentOffset = TotalArgumentMemSize;
    TotalArgumentMemSize += ArgumentBackingSize;

    uint64_t EnvpOffset = TotalArgumentMemSize;
    TotalArgumentMemSize += EnvironmentBackingSize;

    uint64_t PlatformNameLocation = TotalArgumentMemSize;
    TotalArgumentMemSize += platform_string_max_size;

    uint64_t ExecFNLocation = TotalArgumentMemSize;
    TotalArgumentMemSize += ApplicationArgs[0].size() + 1;

    // Align the argument block to 16 bytes to keep the stack aligned
    TotalArgumentMemSize = FEXCore::AlignUp(TotalArgumentMemSize, 16);

    // Random number location
    uint64_t RandomNumberLocation = TotalArgumentMemSize;
    TotalArgumentMemSize += 16;

    // Offset the stack by how much memory we need
    StackPointer -= TotalArgumentMemSize;

    // Setup our AUXP values that need memory now that the stack is setup
    AuxPlatform->val = StackPointer + PlatformNameLocation;
    char* PlatformLoc = reinterpret_cast<char*>(AuxPlatform->val);
    memset(PlatformLoc, 0, platform_string_max_size);
    if (Is64BitMode()) {
      strncpy(PlatformLoc, platform_name_x86_64.data(), platform_string_max_size);
    } else {
      strncpy(PlatformLoc, platform_name_i686.data(), platform_string_max_size);
    }

    // Random value is always 128bits
    AuxRandom->val = StackPointer + RandomNumberLocation;
    uint64_t* RandomLoc = reinterpret_cast<uint64_t*>(AuxRandom->val);
    uint64_t* HostRandom = reinterpret_cast<uint64_t*>(getauxval(AT_RANDOM));
    if (HostRandom) {
      // Pass through the host's random values
      RandomLoc[0] = HostRandom[0];
      RandomLoc[1] = HostRandom[1];
    } else {
      // Nothing provided from the kernel, generate our own random values.
      if (!GetRandom(&RandomLoc[0], sizeof(uint64_t) * 2)) {
        // getrandom failed for some reason.
        RandomLoc[0] = 0;
        RandomLoc[1] = 0;
        LogMan::Msg::EFmt("RNG failed. AT_RANDOM will not be random.");
      }
    }

    // Setup ExecFN aux
    AuxExecFN->val = StackPointer + ExecFNLocation;
    const auto InvocationName = reinterpret_cast<char*>(AuxExecFN->val);
    strncpy(InvocationName, ApplicationArgs[0].c_str(), ApplicationArgs[0].size() + 1);

    // Stack setup
    // [0, 8):   Argument Count
    // [8, 16):  Argument Pointer 0
    // [16, 24): Argument Pointer 1
    // ....
    // [Pad1, +8): Some Pointer
    // [envp, +8): envp pointer
    // [Pad2End, +8): Argument String 0
    // [+8, +8): String 1
    // ...
    // [argvend, +8): envp[0]
    // ...
    // [envpend, +8): nullptr

    if (SizeOfPointer == 8) {
      SetupPointers<uint64_t, auxv_t, 8>(StackPointer, AuxVOffset, ArgumentOffset, EnvpOffset, ApplicationArgs, EnvironmentVariables,
                                         AuxVariables, &AuxTabBase, &AuxTabSize);
    } else {
      SetupPointers<uint32_t, auxv32_t, 4>(StackPointer, AuxVOffset, ArgumentOffset, EnvpOffset, ApplicationArgs, EnvironmentVariables,
                                           AuxVariables, &AuxTabBase, &AuxTabSize);
    }

    RemapArgumentData(StackPointer + ArgumentOffset, ArgumentBackingSize);
#if defined(HAS_PROGRAM_INVOCATION_NAME) && HAS_PROGRAM_INVOCATION_NAME
    // Set the glibc invocation names to the process name.
    // Mesa uses this to determine application profiles.
    // Necessary when thunking is enabled otherwise mesa would only see FEX.

    std::string_view INV = std::string_view(InvocationName, ApplicationArgs[0].size());
    auto short_name = InvocationName;
    auto iter = INV.rfind('/');
    if (iter != INV.npos) {
      short_name = &InvocationName[iter + 1];
    }

    program_invocation_name = InvocationName;
    program_invocation_short_name = short_name;
#endif
  }

  fextl::vector<const char*> GetExecveArguments() const override {
    return LoaderArgs;
  }

  AuxvResult GetAuxv() const override {
    return {
      .address = AuxTabBase,
      .size = AuxTabSize,
    };
  }

  uint64_t GetBaseOffset() const override {
    return BaseOffset;
  }

  uint64_t GetMainElfBase() const {
    return MainElfBase;
  }

  bool Is64BitMode() const {
    return MainElf.type == ::ELFLoader::ELFContainer::TYPE_X86_64;
  }

  ::ELFLoader::ELFContainer::BRKInfo GetBRKInfo() {
    return ::ELFLoader::ELFContainer::BRKInfo {BrkStart, BRK_SIZE};
  }

  bool ELFWasLoaded() {
    return ElfValid;
  }

  void SetVDSOBase(void* Base) {
    VDSOBase = Base;
  }

  void CalculateHWCaps(FEXCore::Context::Context* ctx) {
    // HWCAP is just CPUID function 0x1, the EDX result
    auto res_1 = ctx->RunCPUIDFunction(1, 0);
    auto res_7 = ctx->RunCPUIDFunction(7, 0);

    HWCap = res_1.edx;

    // HWCAP2 is as follows:
    // Bits:
    // 0 - MONITOR/MWAIT available in CPL3
    // 1 - FSGSBASE instructions available in CPL3
    HWCap2 = (res_7.ebx & 1) ? (1U << 1) : 0; // FSGSBase is exposed if CPUID_7_ebx[0] is set.

    // We need to know if we support AVX for AT_MINSIGSTKSZ
    SupportsAVX = !!(res_1.ecx & (1U << 28));
  }

  uint64_t CalculateSignalStackSize() const {
    // We must calculate the required signal stack size that the "kernel" consumes.
    // For FEX this means the amount of state we store in to the guest stack, not including the amount
    // that FEX stores in to the host stack as well.
    //
    // This needs to match what we do in FEXCore's dispatcher (Which should at some point be moved to the frontend).
    //
    // This roughly means that we need to calculate the combined size of:
    // - xstate or _libc_fstate depending on AVX support
    // - ucontext_t
    // - siginfo_t
    // Size of state requiring to be stored is different between 32-bit and 64-bit.

    uint64_t Result {};
    if (Is64BitMode()) {
      Result += sizeof(FEXCore::x86_64::ucontext_t);
      Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86_64::ucontext_t));
      if (SupportsAVX) {
        Result += sizeof(FEXCore::x86_64::xstate);
        Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86_64::xstate));
      } else {
        Result += sizeof(FEXCore::x86_64::_libc_fpstate);
        Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86_64::_libc_fpstate));
      }

      Result += sizeof(siginfo_t);
      Result = FEXCore::AlignUp(Result, alignof(siginfo_t));
    } else {
      Result += sizeof(FEXCore::x86::ucontext_t);
      Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86::ucontext_t));
      if (SupportsAVX) {
        Result += sizeof(FEXCore::x86::xstate);
        Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86::xstate));
      } else {
        Result += sizeof(FEXCore::x86::_libc_fpstate);
        Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86::_libc_fpstate));
      }

      Result += sizeof(FEXCore::x86::siginfo_t);
      Result = FEXCore::AlignUp(Result, alignof(FEXCore::x86::siginfo_t));
    }

    return Result;
  }

  constexpr static uint64_t BRK_SIZE = 8 * 1024 * 1024;
  constexpr static uint64_t STACK_SIZE = 8 * 1024 * 1024;
  constexpr static uint64_t FULL_STACK_SIZE = 128 * 1024 * 1024;

  fextl::vector<fextl::string> EnvironmentVariables;
  fextl::vector<const char*> LoaderArgs;

  fextl::list<auxv_t> AuxVariables;
  uint64_t AuxTabBase {}, AuxTabSize {};
  uint64_t ArgumentBackingSize {};
  uint64_t ArgumentOffset {};
  uint64_t EnvironmentBackingSize {};
  uint64_t BaseOffset {};
  void* VDSOBase {};
  uint64_t HWCap {};
  uint64_t HWCap2 {};
  bool SupportsAVX {};

  auxv_t* AuxRandom {};
  auxv_t* AuxPlatform {};
  auxv_t* AuxExecFN {};

  static constexpr std::string_view platform_name_x86_64 = "x86_64";
  static constexpr std::string_view platform_name_i686 = "i686";
  // Need to include null character.
  static constexpr size_t platform_string_max_size = std::max(platform_name_x86_64.size(), platform_name_i686.size()) + 1;

  FEX_CONFIG_OPT(AdditionalArguments, ADDITIONALARGUMENTS);
  FEX_CONFIG_OPT(InjectLibSegFault, INJECTLIBSEGFAULT);
};


================================================
FILE: Source/Tools/FEXInterpreter/FEXInterpreter.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|FEX
desc: Glues the ELF loader, FEXCore and LinuxSyscalls to launch an elf under fex
$end_info$
*/

#include "Common/ArgumentLoader.h"
#include "Common/FEXServerClient.h"
#include "Common/Config.h"
#include "Common/HostFeatures.h"
#include "Common/Linux/SBRKAllocations.h"
#include "PortabilityInfo.h"
#include "ELFCodeLoader.h"
#include "VDSO_Emulation.h"
#include "LinuxSyscalls/GdbServer.h"
#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Utils/Threads.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include "Linux/Utils/ELFContainer.h"
#include "Thunks.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Telemetry.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/PrctlUtils.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/StringArgumentParser.h>

#include <atomic>
#include <cerrno>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <elf.h>
#include <fcntl.h>
#include <mutex>
#include <queue>
#include <set>
#include <sys/auxv.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/select.h>
#include <system_error>
#include <thread>
#include <unistd.h>
#include <utility>

#include <sys/sysinfo.h>
#include <sys/signal.h>

namespace FEX::Logging {
static bool SilentLog {};
static int OutputFD {STDERR_FILENO};

// Set an empty style to disable coloring when FEXServer output is e.g. piped to a file
static bool DisableOutputColors {};

void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  if (SilentLog) {
    return;
  }

  const auto Style = DisableOutputColors ? fmt::text_style {} : LogMan::DebugLevelStyle(Level);
  const auto Output = fextl::fmt::format("{} {}\n", fmt::styled(LogMan::DebugLevelStr(Level), Style), Message);
  write(OutputFD, Output.c_str(), Output.size());
  fsync(OutputFD);
}

void AssertHandler(const char* Message) {
  return MsgHandler(LogMan::ASSERT, Message);
}

namespace FEXServer {
  static int FEXServerFD {-1};

  void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
    FEXServerClient::MsgHandler(FEXServerFD, Level, Message);
  }

  void AssertHandler(const char* Message) {
    FEXServerClient::AssertHandler(FEXServerFD, Message);
  }
} // namespace FEXServer

void Init() {
  FEX_CONFIG_OPT(SilentLog, SILENTLOG);
  FEX_CONFIG_OPT(OutputLog, OUTPUTLOG);
  FEX::Logging::SilentLog = SilentLog();

  if (SilentLog()) {
    LogMan::Throw::UnInstallHandler();
    LogMan::Msg::UnInstallHandler();
  } else {
    const auto& LogFile = OutputLog();
    // If stderr or stdout then we need to dup the FD
    // In some cases some applications will close stderr and stdout
    // then redirect the FD to either a log OR some cases just not use
    // stderr/stdout and the FD will be reused for regular FD ops.
    //
    // We want to maintain the original output location otherwise we
    // can run in to problems of writing to some file
    auto LogFD = OutputFD;
    if (LogFile == "stderr") {
      LogFD = dup(STDERR_FILENO);
    } else if (LogFile == "server") {
      Logging::FEXServer::FEXServerFD = FEXServerClient::RequestLogFD(FEXServerClient::GetServerFD());
      if (FEXServer::FEXServerFD != -1) {
        LogMan::Throw::InstallHandler(Logging::FEXServer::AssertHandler);
        LogMan::Msg::InstallHandler(Logging::FEXServer::MsgHandler);
      }
    } else if (!LogFile.empty()) {
      constexpr int USER_PERMS = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
      LogFD = open(LogFile.c_str(), O_CREAT | O_CLOEXEC | O_WRONLY, USER_PERMS);
    }

    if (LogFD == -1) {
      LogMan::Msg::EFmt("Couldn't open log file. Going Silent.");
      Logging::SilentLog = true;
    } else {
      OutputFD = LogFD;
    }
  }
  DisableOutputColors = !isatty(OutputFD);
}

} // namespace FEX::Logging

namespace FEX::Allocator {

fextl::vector<FEXCore::Allocator::MemoryRegion> InitMemoryRegions(bool Is64Bit) {
  const auto PageSize = sysconf(_SC_PAGESIZE);
  if (Is64Bit) {
    // Destroy the 48th bit if it exists
    return FEXCore::Allocator::Setup48BitAllocatorIfExists(PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE);
  }

  // Reserve [0x1_0000_0000, 0x2_0000_0000).
  // Safety net if 32-bit address calculation overflows in to 64-bit range.
  constexpr uint64_t First64BitAddr = 0x1'0000'0000ULL;
  return FEXCore::Allocator::StealMemoryRegion(First64BitAddr, First64BitAddr + First64BitAddr);
}

fextl::unique_ptr<FEX::HLE::MemAllocator> InitAllocator(bool Is64Bit) {
  if (Is64Bit) {
    return {};
  }

  const auto PageSize = sysconf(_SC_PAGESIZE);

  // Setup our userspace allocator
  FEXCore::Allocator::SetupHooks(PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE);
  auto Allocator = FEX::HLE::CreatePassthroughAllocator();

  // Now that the upper 32-bit address space is blocked for future allocations,
  // exhaust all of jemalloc's remaining internal allocations that it reserved before.
  // TODO: It's unclear how reliably this exhausts those reserves
  // TODO: This will likely consume one arena inside the 32-bit VA space.
  //   - (HdkR): I've noticed jemalloc consuming an 8MB arena commonly.
  FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc;
  void* data;
  do {
    data = malloc(0x1);
  } while (reinterpret_cast<uintptr_t>(data) >> 32 != 0);
  free(data);

  return Allocator;
}

void Shutdown(fextl::vector<FEXCore::Allocator::MemoryRegion>&& MemoryRegions) {
  FEXCore::Allocator::ClearHooks();
  FEXCore::Allocator::ReclaimMemoryRegion(MemoryRegions);
}
} // namespace FEX::Allocator

bool InterpreterHandler(fextl::string* Filename, const fextl::string& RootFS, fextl::vector<fextl::string>* args) {
  int FD {-1};

  // Attempt to open the filename from the rootfs first.
  FD = open(fextl::fmt::format("{}{}", RootFS, *Filename).c_str(), O_RDONLY | O_CLOEXEC);
  if (FD == -1) {
    // Failing that, attempt to open the filename directly.
    FD = open(Filename->c_str(), O_RDONLY | O_CLOEXEC);
    if (FD == -1) {
      return false;
    }
  }

  std::array<char, 257> Header;
  const auto ChunkSize = 257l;
  const auto ReadSize = pread(FD, Header.data(), ChunkSize, 0);
  close(FD);

  const auto Data = std::span<char>(Header.data(), ReadSize);

  // Is the file large enough for shebang
  if (ReadSize <= 2) {
    return false;
  }

  // Handle shebang files
  if (Data[0] == '#' && Data[1] == '!') {
    std::string_view InterpreterLine {Data.begin() + 2, // strip off "#!" prefix
                                      std::find(Data.begin(), Data.end(), '\n')};
    const auto ShebangArguments = FHU::ParseArgumentsFromString(InterpreterLine);

    // Executable argument
    *Filename = ShebangArguments.at(0);

    // Insert all the arguments at the start
    args->insert(args->begin(), ShebangArguments.begin(), ShebangArguments.end());
  }
  return true;
}

/**
 * @brief Queries if FEX is installed as a binfmt_misc interpreter
 *
 * @param ExecutedWithFD If FEX was executed using a binfmt_misc FD handle from the kernel
 * @param Portable Portability information about FEX being run in portable mode
 *
 * @return true if the binfmt_misc handlers are installed and being used
 */
bool QueryInterpreterInstalled(bool ExecutedWithFD, const FEX::Config::PortableInformation& Portable) {
  if (Portable.IsPortable) {
    // Don't use binfmt interpreter even if it's installed
    return false;
  }

  // Check if FEX's binfmt_misc handlers are both installed.
  // The explicit check can be omitted if FEX was executed from an FD,
  // since this only happens if the kernel launched FEX through binfmt_misc
  return ExecutedWithFD || (access("/proc/sys/fs/binfmt_misc/FEX-x86", F_OK) == 0 && access("/proc/sys/fs/binfmt_misc/FEX-x86_64", F_OK) == 0);
}

namespace FEX::Kernel {
namespace TSO {
  void SetupTSOEmulation(FEXCore::Context::Context* CTX) {
    // Check to see if this is supported.
    auto Result = prctl(PR_GET_MEM_MODEL, 0, 0, 0, 0);
    if (Result == -1) {
      // Unsupported, early exit.
      return;
    }

    FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);

    if (!TSOEnabled()) {
      // TSO emulation isn't even enabled, early exit.
      return;
    }

    if (Result == PR_SET_MEM_MODEL_DEFAULT) {
      // Try to set the TSO mode if we are currently default.
      Result = prctl(PR_SET_MEM_MODEL, PR_SET_MEM_MODEL_TSO, 0, 0, 0);
      if (Result == 0) {
        // TSO mode successfully enabled. Tell the context to disable TSO emulation through atomics.
        // This flag gets inherited on thread creation, so FEX only needs to set it at the start.
        CTX->SetHardwareTSOSupport(true);
      }
    }
  }
} // namespace TSO

namespace CompatInput {
  void SetupCompatInput(bool enable) {
    // Check to see if this is supported.
    auto Result = prctl(PR_GET_COMPAT_INPUT, 0, 0, 0, 0);
    if (Result == -1) {
      // Unsupported, early exit.
      return;
    }

    if (enable) {
      prctl(PR_SET_COMPAT_INPUT, PR_SET_COMPAT_INPUT_ENABLE, 0, 0, 0);
    } else {
      prctl(PR_SET_COMPAT_INPUT, PR_SET_COMPAT_INPUT_DISABLE, 0, 0, 0);
    }
  }
} // namespace CompatInput

namespace GCS {
  void CheckForGCS() {
    uint64_t ShadowStackWord {};
    if (prctl(PR_GET_SHADOW_STACK_STATUS, &ShadowStackWord, 0, 0, 0) == -1) {
      return;
    }

    // Kernel supports shadow stack.
    if (ShadowStackWord & PR_SHADOW_STACK_ENABLE) {
      // Welp.
      ERROR_AND_DIE_FMT("Shadow stack is enabled which FEX is incompatible with!");
    }

    // Disable if we've gotten this far, to ensure guest can't try.
    prctl(PR_LOCK_SHADOW_STACK_STATUS, ~0ULL, 0, 0, 0);
  }
} // namespace GCS

namespace UnalignedAtomic {
  void SetupKernelUnalignedAtomics() {
#ifndef PR_ARM64_SET_UNALIGN_ATOMIC
#define PR_ARM64_SET_UNALIGN_ATOMIC 0x46455849
#define PR_ARM64_UNALIGN_ATOMIC_EMULATE (1UL << 0)
#define PR_ARM64_UNALIGN_ATOMIC_BACKPATCH (1UL << 1)
#define PR_ARM64_UNALIGN_ATOMIC_STRICT_SPLIT_LOCKS (1UL << 2)
#endif

    // Interfaces with downstream FEX kernel patches to control unaligned atomic handling
    FEX_CONFIG_OPT(StrictInProcessSplitLocks, STRICTINPROCESSSPLITLOCKS);
    FEX_CONFIG_OPT(KernelUnalignedAtomicBackpatching, KERNELUNALIGNEDATOMICBACKPATCHING);

    uint64_t Flags = (StrictInProcessSplitLocks() ? PR_ARM64_UNALIGN_ATOMIC_STRICT_SPLIT_LOCKS : 0) |
                     (KernelUnalignedAtomicBackpatching() ? PR_ARM64_UNALIGN_ATOMIC_BACKPATCH : 0) | PR_ARM64_UNALIGN_ATOMIC_EMULATE;

    prctl(PR_ARM64_SET_UNALIGN_ATOMIC, Flags, 0, 0, 0);
  }
} // namespace UnalignedAtomic

void Init(bool Is64Bit, FEXCore::Context::Context* CTX) {
  // Setup TSO hardware emulation immediately after initializing the context.
  TSO::SetupTSOEmulation(CTX);
  UnalignedAtomic::SetupKernelUnalignedAtomics();

  if (!Is64Bit) {
    // Tell the kernel we want to use the compat input syscalls even though we're
    // a 64 bit process.
    CompatInput::SetupCompatInput(true);
  } else {
    // Our parent could be an instance running a 32 bit application, so we need
    // to disable compat input if we're running a 64 bit one ourselves.
    CompatInput::SetupCompatInput(false);
  }
}

} // namespace FEX::Kernel

/**
 * @brief Get an FD from an environment variable and then unset the environment variable.
 *
 * @param Env The environment variable to extract the FD from.
 *
 * @return -1 if the variable didn't exist.
 */
static int StealFEXFDFromEnv(const char* Env) {
  int FEXFD {-1};
  const char* FEXFDStr = getenv(Env);
  if (FEXFDStr) {
    const std::string_view FEXFDView {FEXFDStr};
    std::from_chars(FEXFDView.data(), FEXFDView.data() + FEXFDView.size(), FEXFD, 10);
    unsetenv(Env);
  }
  return FEXFD;
}

int main(int argc, char** argv, char** const envp) {
  auto SBRKPointer = FEX::SBRKAllocations::DisableSBRKAllocations();
  FEXCore::Allocator::GLIBCScopedFault GLIBFaultScope;

  const bool ExecutedWithFD = getauxval(AT_EXECFD) != 0;
  const auto PortableInfo = FEX::ReadPortabilityInformation();
  const bool InterpreterInstalled = QueryInterpreterInstalled(ExecutedWithFD, PortableInfo);

  int FEXFD {StealFEXFDFromEnv("FEX_EXECVEFD")};
  int FEXSeccompFD {StealFEXFDFromEnv("FEX_SECCOMPFD")};

  // Early init trivial handlers.
  LogMan::Throw::InstallHandler(FEX::Logging::AssertHandler);
  LogMan::Msg::InstallHandler(FEX::Logging::MsgHandler);

  auto ArgsLoader = fextl::make_unique<FEX::ArgLoader::ArgLoader>(argc, argv);
  auto Args = ArgsLoader->Get();
  auto ParsedArgs = ArgsLoader->GetParsedArgs();
  auto Program = FEX::Config::GetApplicationNames(Args, ExecutedWithFD, FEXFD);
  if (Program.ProgramPath.empty() && FEXFD == -1) {
    // Early exit if we weren't passed an argument
    return 0;
  }

  FEX::Kernel::GCS::CheckForGCS();

  FEX::Config::LoadConfig(Program.ProgramName, envp, PortableInfo);

  // Reload the meta layer
  FEXCore::Config::ReloadMetaLayer();
  FEXCore::Config::Set(FEXCore::Config::CONFIG_INTERPRETER_INSTALLED, InterpreterInstalled ? "1" : "0");
#ifdef VIXL_SIMULATOR
  // If running under the vixl simulator, ensure that indirect runtime calls are enabled.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_DISABLE_VIXL_INDIRECT_RUNTIME_CALLS, "0");
#endif

  if (FEXSeccompFD != -1) {
    // seccomp inheritance happens unconditionally.
    FEXCore::Config::Set(FEXCore::Config::CONFIG_NEEDSSECCOMP, "1");
  }

  // Early check for process stall
  // Doesn't use CONFIG_ROOTFS and we don't want it to spin up a squashfs instance
  FEX_CONFIG_OPT(StallProcess, STALLPROCESS);
  FEX_CONFIG_OPT(StartupSleep, STARTUPSLEEP);
  FEX_CONFIG_OPT(StartupSleepProcName, STARTUPSLEEPPROCNAME);
  if (StallProcess) {
    while (1) {
      // Stall this process out forever
      select(0, nullptr, nullptr, nullptr, nullptr);
    }
  }

  // Ensure FEXServer is setup before config options try to pull CONFIG_ROOTFS
  auto SelfPath = FEX::GetSelfPath();
  if (!FEXServerClient::SetupClient(SelfPath.value_or(argv[0]))) {
    LogMan::Msg::EFmt("FEXServerClient: Failure to setup client");
    return -1;
  }

  FEX_CONFIG_OPT(LDPath, ROOTFS);
  FEX_CONFIG_OPT(Environment, ENV);
  FEX_CONFIG_OPT(HostEnvironment, HOSTENV);

  FEX::Logging::Init();

  if (StartupSleep() && (StartupSleepProcName().empty() || Program.ProgramName == StartupSleepProcName())) {
    LogMan::Msg::IFmt("[{}][{}] Sleeping for {} seconds", ::getpid(), Program.ProgramName, StartupSleep());
    std::this_thread::sleep_for(std::chrono::seconds(StartupSleep()));
  }

  FEXCore::Telemetry::Initialize();

  if (!LDPath().empty() && Program.ProgramPath.starts_with(LDPath())) {
    // From this point on, ProgramPath needs to not have the LDPath prefixed on to it.
    auto RootFSLength = LDPath().size();
    if (Program.ProgramPath.at(RootFSLength) != '/') {
      // Ensure the modified path starts as an absolute path.
      // This edge case can occur when ROOTFS ends with '/' and passed a path like `<ROOTFS>usr/bin/true`.
      --RootFSLength;
    }

    Program.ProgramPath.erase(0, RootFSLength);
  }

  bool ProgramExists = InterpreterHandler(&Program.ProgramPath, LDPath(), &Args);

  if (!ExecutedWithFD && FEXFD == -1 && !ProgramExists) {
    // Early exit if the program passed in doesn't exist
    // Will prevent a crash later
    fextl::fmt::print(stderr, "{}: command not found\n", Program.ProgramPath);
    return -ENOEXEC;
  }

  uint32_t KernelVersion = FEX::HLE::SyscallHandler::CalculateHostKernelVersion();
  if (KernelVersion < FEX::HLE::SyscallHandler::KernelVersion(5, 15)) {
    LogMan::Msg::EFmt("FEX requires kernel 5.15 minimum. Expect problems.");
  }

  // Before we go any further, set all of our host environment variables that the config has provided
  for (auto& HostEnv : HostEnvironment.All()) {
    // We are going to keep these alive in memory.
    // No need to split the string with setenv
    putenv(HostEnv.data());
  }

  ELFCodeLoader Loader {Program.ProgramPath, FEXFD, LDPath(), Args, ParsedArgs, envp, &Environment};
  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, Loader.Is64BitMode() ? "1" : "0");

  if (!Loader.ELFWasLoaded()) {
    // Loader couldn't load this program for some reason
    fextl::fmt::print(stderr, "Invalid or Unsupported elf file.\n");
#ifdef ARCHITECTURE_arm64
    fextl::fmt::print(stderr, "This is likely due to a misconfigured x86-64 RootFS\n");
    fextl::fmt::print(stderr, "Current RootFS path set to '{}'\n", LDPath());
    if (LDPath().empty() || FHU::Filesystem::Exists(LDPath()) == false) {
      fextl::fmt::print(stderr, "RootFS path doesn't exist. This is required on AArch64 hosts\n");
      fextl::fmt::print(stderr, "Use FEXRootFSFetcher to download a RootFS\n");
    }
#endif
    return -ENOEXEC;
  }

  if (ExecutedWithFD) {
    // Don't need to canonicalize Program.ProgramPath, Config loader will have resolved this already.
    FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_FILENAME, Program.ProgramPath);
    FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_CONFIG_NAME, Program.ProgramName);
  } else if (FEXFD != -1) {
    // Anonymous program.
    FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_FILENAME, "<Anonymous>");
    FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_CONFIG_NAME, "<Anonymous>");
  } else {
    {
      char ExistsTempPath[PATH_MAX];
      char* RealPath = realpath(Program.ProgramPath.c_str(), ExistsTempPath);
      if (RealPath) {
        FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_FILENAME, fextl::string(RealPath));
      } else {
        // Can happen when jumping in to pressure-vessel.
        // `/usr/lib/pressure-vessel/from-host/libexec/steam-runtime-tools-0/pv-adverb` can't get resolved.
        FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_FILENAME, Program.ProgramPath);
      }
    }
    FEXCore::Config::Set(FEXCore::Config::CONFIG_APP_CONFIG_NAME, Program.ProgramName);
  }

  // Setup Thread handlers, so FEXCore can create threads.
  auto StackTracker = FEX::LinuxEmulation::Threads::SetupThreadHandlers();

  auto MemoryRegions = FEX::Allocator::InitMemoryRegions(Loader.Is64BitMode());
  auto Allocator = FEX::Allocator::InitAllocator(Loader.Is64BitMode());

  FEXCore::Profiler::Init(Program.ProgramName, Program.ProgramPath);

  bool SupportsAVX {};
  fextl::unique_ptr<FEXCore::Context::Context> CTX;
  {
    auto HostFeatures = FEX::FetchHostFeatures();
    CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
    SupportsAVX = HostFeatures.SupportsAVX;
  }

  FEX::Kernel::Init(Loader.Is64BitMode(), CTX.get());

  auto SignalDelegation = FEX::HLE::CreateSignalDelegator(CTX.get(), Program.ProgramName, SupportsAVX);
  auto ThunkHandler = FEX::HLE::CreateThunkHandler();

  auto SyscallHandler = Loader.Is64BitMode() ?
                          FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get(), ThunkHandler.get()) :
                          FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), ThunkHandler.get(), std::move(Allocator));
  SyscallHandler->SetCodeLoader(&Loader);
  CTX->SetSignalDelegator(SignalDelegation.get());
  CTX->SetSyscallHandler(SyscallHandler.get());
  CTX->SetThunkHandler(ThunkHandler.get());

  if (FEXCore::Config::Get_ENABLECODECACHINGWIP()) {
    CTX->SetCodeMapWriter(fextl::make_unique<FEXCore::CodeMapWriter>(*SyscallHandler));
  }

  FEX_CONFIG_OPT(GdbServer, GDBSERVER);
  fextl::unique_ptr<FEX::GdbServer> DebugServer;
  if (GdbServer) {
    DebugServer = fextl::make_unique<FEX::GdbServer>(CTX.get(), SignalDelegation.get(), SyscallHandler.get());
  }

  // Now that we have the syscall handler. Track some FDs that are FEX owned.
  if (FEX::Logging::OutputFD > 2) {
    SyscallHandler->FM.TrackFEXFD(FEX::Logging::OutputFD);
  }
  SyscallHandler->FM.TrackFEXFD(FEXServerClient::GetServerFD());
  if (FEX::Logging::FEXServer::FEXServerFD != -1) {
    SyscallHandler->FM.TrackFEXFD(FEX::Logging::FEXServer::FEXServerFD);
  }

  if (!CTX->InitCore()) {
    return 1;
  }

  // Create a thread without a RIP or stack pointer setup initially.
  auto ParentThread = SyscallHandler->TM.CreateThread(0, 0);
  SyscallHandler->TM.TrackThread(ParentThread);
  SignalDelegation->RegisterTLSState(ParentThread);
  ThunkHandler->RegisterTLSState(ParentThread);

  SyscallHandler->DeserializeSeccompFD(ParentThread, FEXSeccompFD);

  // Load VDSO in to memory prior to mapping our ELFs.
  auto VDSOMapping = FEX::VDSO::LoadVDSOThunks(ParentThread->Thread, Loader.Is64BitMode(), SyscallHandler.get());

  // Pass in our VDSO thunks
  ThunkHandler->AppendThunkDefinitions(FEX::VDSO::GetVDSOThunkDefinitions(Loader.Is64BitMode()));
  SignalDelegation->SetVDSOSymbols();

  {
    Loader.SetVDSOBase(VDSOMapping.VDSOBase);
    Loader.CalculateHWCaps(CTX.get());

    if (!Loader.MapMemory(SyscallHandler.get(), ParentThread->Thread)) {
      // failed to map
      LogMan::Msg::EFmt("Failed to map {}-bit elf file.", Loader.Is64BitMode() ? 64 : 32);
      return -ENOEXEC;
    }
  }

  auto BRKInfo = Loader.GetBRKInfo();

  SyscallHandler->DefaultProgramBreak(BRKInfo.Base, BRKInfo.Size);

  // Request code cache generation
  if (FEXCore::Config::Get_ENABLECODECACHINGWIP()) {
    FEXServerClient::PopulateCodeCache(FEXServerClient::GetServerFD(), Loader.GetMainElfFD(), FEXCore::Config::Get_MULTIBLOCK());
  }

  // Pull RIP and stack pointer from loader and set the thread data to it.
  ParentThread->Thread->CurrentFrame->State.rip = Loader.DefaultRIP();
  ParentThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = Loader.GetStackPointer();

  // Close the loader FDs after everything has been parsed and mapped.
  Loader.CloseFDs();

  CTX->ExecuteThread(ParentThread->Thread);

  DebugServer.reset();
  SyscallHandler->TM.Stop();

  auto ProgramStatus = ParentThread->StatusCode;

  FEX::VDSO::UnloadVDSOMapping(ParentThread->Thread, SyscallHandler.get(), VDSOMapping);

  SignalDelegation->UninstallTLSState(ParentThread);
  SyscallHandler->TM.DestroyThread(ParentThread);

  DebugServer.reset();
  SyscallHandler.reset();
  SignalDelegation.reset();

  FEX::LinuxEmulation::Threads::Shutdown(std::move(StackTracker));

  Loader.FreeSections();

  FEXCore::Config::Shutdown();

  LogMan::Throw::UnInstallHandler();
  LogMan::Msg::UnInstallHandler();

  FEX::Allocator::Shutdown(std::move(MemoryRegions));

  // Allocator is now original system allocator
  FEXCore::Telemetry::Shutdown(Program.ProgramName);
  FEXCore::Profiler::Shutdown();

  FEX::SBRKAllocations::ReenableSBRKAllocations(SBRKPointer);

  return ProgramStatus;
}


================================================
FILE: Source/Tools/FEXOfflineCompiler/CMakeLists.txt
================================================
add_executable(FEXOfflineCompiler Main.cpp)

target_link_libraries(FEXOfflineCompiler PRIVATE
  Common
  CommonTools
  cpp-optparse
  FEXCore
  JemallocLibs
  LinuxEmulation
  ${PTHREAD_LIB}
  fmt::fmt)

LinkerGC(FEXOfflineCompiler)

install(TARGETS FEXOfflineCompiler RUNTIME
  DESTINATION bin
  COMPONENT Runtime)


================================================
FILE: Source/Tools/FEXOfflineCompiler/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include "../FEXInterpreter/ELFCodeLoader.h"
#include <DummyHandlers.h>
#include <PortabilityInfo.h>
#include <Thunks.h>

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/HostFeatures.h>

#include <Common/ArgumentLoader.h>
#include <Common/Config.h>
#include <Common/FEXServerClient.h>
#include <Common/HostFeatures.h>

#include <OptionParser.h>

#include <fmt/printf.h>

#include <fstream>
#include <optional>

class AOTSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEX::HLE::SyscallMmapInterface {
public:
  AOTSyscallHandler(FEXCore::HLE::SyscallOSABI SyscallOSABI) {
    OSABI = SyscallOSABI;
  }

  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
    // Don't do anything
    return 0;
  }

  FEXCore::ExecutableFileInfo FileInfo;
  std::map<uint64_t, uint64_t> FileRanges;

  uintptr_t VAFileStart = 0;

  // These are no-ops implementations of the SyscallHandler API
  std::optional<FEXCore::ExecutableFileSectionInfo> LookupExecutableFileSection(FEXCore::Core::InternalThreadState*, uint64_t Address) override {
    auto It = FileRanges.upper_bound(Address - VAFileStart);
    LOGMAN_THROW_A_FMT(It != FileRanges.begin(), "Could not find associated file mapping");
    --It;
    LOGMAN_THROW_A_FMT(VAFileStart + It->first + It->second > Address, "Could not find associated file mapping for {:#x}", Address);
    return FEXCore::ExecutableFileSectionInfo {FileInfo, VAFileStart, VAFileStart + It->first, VAFileStart + It->first + It->second};
  }

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override {
    return {0, UINT64_MAX, true};
  }

  void* GuestMmap(FEXCore::Core::InternalThreadState*, void* addr, size_t Size, int prot, int Flags, int fd, off_t offset) override {
    // Force writeable to allow applying relocations
    auto Ret = mmap(addr, Size, prot | PROT_WRITE, Flags, fd, offset);
    if (Ret != MAP_FAILED && VAFileStart == 0) {
      VAFileStart = reinterpret_cast<uintptr_t>(Ret);
    }
    FileRanges[reinterpret_cast<uintptr_t>(Ret) - VAFileStart] = Size;
    return Ret;
  }

  uint64_t GuestMunmap(FEXCore::Core::InternalThreadState*, void* addr, uint64_t length) override {
    return munmap(addr, length);
  }
};

static void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  fmt::print("[{}] {}\n", LogMan::DebugLevelStr(Level), Message);
}

static void AssertHandler(const char* Message) {
  fmt::print("[A] {}\n", Message);
}

namespace FEXCore {
inline bool operator<(const ExecutableFileInfo& a, const ExecutableFileInfo& b) noexcept {
  return a.FileId < b.FileId;
}
} // namespace FEXCore

template<>
struct std::hash<FEXCore::ExecutableFileInfo> {
  std::size_t operator()(const FEXCore::ExecutableFileInfo& Val) const noexcept {
    return Val.FileId;
  }
};

// Placeholder data to ensure the compile thread doesn't de-reference nullptr data
static FEXCore::Core::CPUState::gdt_segment gdt[32] {};

static FEXCore::Core::InternalThreadState* SetupCompileThread(FEXCore::Context::Context& CTX, bool Is64Bit) {
  auto Thread = CTX.CreateThread(0, 0);

  auto Frame = Thread->CurrentFrame;
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &gdt[0];
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &gdt[0];

  Frame->State.cs_idx = FEXCore::Core::CPUState::DEFAULT_USER_CS << 3;
  auto GDT = FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx);
  FEXCore::Core::CPUState::SetGDTBase(GDT, 0);
  FEXCore::Core::CPUState::SetGDTLimit(GDT, 0xFFFFFU);
  Frame->State.cs_cached =
    FEXCore::Core::CPUState::CalculateGDTBase(*FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));

  if (Is64Bit) {
    GDT->L = 1; // L = Long Mode = 64-bit
    GDT->D = 0; // D = Default Operand SIze = Reserved
  } else {
    GDT->L = 0; // L = Long Mode = 32-bit
    GDT->D = 1; // D = Default Operand Size = 32-bit
  }

  return Thread;
}

// Returns filename of generated cache on success
static std::optional<std::string> GenerateSingleCache(FEXCore::ExecutableFileInfo& Binary, fextl::set<uintptr_t> BlockList, std::string_view OutDir) {
  uint64_t CodeCacheConfigId = 0; // TODO: Make unique to active configuration

  ELFCodeLoader Loader(Binary.Filename.c_str(), -1, "", fextl::vector<fextl::string> {Binary.Filename.c_str()},
                       fextl::vector<fextl::string> {}, nullptr, nullptr, true /* skip interpreter */);
  if (!Loader.ELFWasLoaded()) {
    fmt::print("Invalid or unsupported ELF file.\n");
    return std::nullopt;
  }

  const bool Is64Bit = Loader.Is64BitMode();
  auto SyscallOSABI = Is64Bit ? FEXCore::HLE::SyscallOSABI::OS_LINUX64 : FEXCore::HLE::SyscallOSABI::OS_LINUX32;
  auto SyscallHandler = std::make_unique<AOTSyscallHandler>(SyscallOSABI);

  // Populate relocations from ELF file
  {
    ELFParser RelocParser;
    RelocParser.ReadElf(Binary.Filename);
    Binary.Relocations = RelocParser.PopulateRelocations();
    SyscallHandler->FileInfo.Relocations = Binary.Relocations;
  }

  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, Is64Bit ? "1" : "0");

  // Load HostFeatures
  auto HostFeatures = FEX::FetchHostFeatures();

  if (!std::filesystem::exists(Binary.Filename)) {
    fmt::print("File {} does not exist\n", Binary.Filename);
    // TODO: Pressure vessel hits this
    return /*EXIT_FAILURE*/ std::nullopt;
  }

  auto CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);

  Loader.CalculateHWCaps(CTX.get());

  auto SignalDelegation = std::make_unique<FEX::DummyHandlers::DummySignalDelegator>();
  CTX->SetSignalDelegator(SignalDelegation.get());
  CTX->SetSyscallHandler(SyscallHandler.get());
  auto ThunkHandler = FEX::HLE::CreateThunkHandler();
  CTX->SetThunkHandler(ThunkHandler.get());

  if (!CTX->InitCore()) {
    return std::nullopt;
  }

  if (!Is64Bit) {
    const auto PageSize = sysconf(_SC_PAGESIZE);
    // Block upper address space
    FEXCore::Allocator::SetupHooks(PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE);
  }

  auto Thread = SetupCompileThread(*CTX, Is64Bit);

  {
    auto ElfBase = Loader.LoadMainElfFile(nullptr, SyscallHandler.get(), Thread);
    if (!ElfBase.has_value()) {
      ERROR_AND_DIE_FMT("Failed to load ELF file {} ({})", Binary.Filename, Binary.FileId);
    }

    {
      ELFParser RelocParser;
      RelocParser.ReadElf(Binary.Filename);
      auto relocs32 = RelocParser.ReadRawRelocations32();

      for (auto& reloc : relocs32) {
        if (ELF32_R_TYPE(reloc.r_info) == R_386_RELATIVE) {
          // The FEX-relocation is applied on top of this during cache serialization, so this must be countered
          uint32_t val = *reinterpret_cast<uint32_t*>(SyscallHandler->VAFileStart + reloc.r_offset) + SyscallHandler->VAFileStart;
          memcpy(reinterpret_cast<uint32_t*>(SyscallHandler->VAFileStart + reloc.r_offset), &val, sizeof(val));
        } else if (ELF32_R_TYPE(reloc.r_info) == R_386_32) {
          // The FEX-relocation is applied on top of this during cache serialization, so this must be countered
          uint32_t* orig = reinterpret_cast<uint32_t*>(SyscallHandler->VAFileStart + reloc.r_offset);
          uint32_t val = *orig + reloc.r_addend + SyscallHandler->VAFileStart;
          memcpy(orig, &val, sizeof(val));
        }
      }
    }
  }

  CTX->GetCodeCache().InitiateCacheGeneration();

  {
    std::vector<std::unique_ptr<ELFCodeLoader>> LoaderMem;

    fmt::print(stderr, "Compiling code...\n");
    FEX_CONFIG_OPT(MaxInst, MAXINST);
    for (auto Addr : BlockList) {
      if (!CTX->CheckIfBlockIsCacheable(*Thread, Addr + SyscallHandler->VAFileStart, MaxInst)) {
        continue;
      }

      CTX->CompileRIP(Thread, Addr + SyscallHandler->VAFileStart);
    }

    auto Filename = fmt::format("{}{}-{:016x}", OutDir, FEXCore::CodeMap::GetBaseFilename(Binary, false), CodeCacheConfigId);
    auto FilenameNew = Filename + ".new";
    int fd = open(FilenameNew.c_str(), O_CREAT | O_WRONLY, 0644);
    {
      auto Entry = SyscallHandler->LookupExecutableFileSection(Thread, SyscallHandler->VAFileStart).value();
      CTX->GetCodeCache().SaveData(*Thread, fd, Entry, 0 /* TODO: Use static base address information if available */);
    }
    std::filesystem::rename(FilenameNew.c_str(), Filename.c_str());
    close(fd);
    return Filename;
  }
}

// Command handler that parses the given code map and generates a code cache for the selected x86 binary.
// If no binary is selected explicitly, it is inferred from the code map ExecutableFileId block.
static int GenerateCache(int argc, const char** argv) {
  optparse::OptionParser Parser {};
  Parser.add_option("--outdir").set_default(FEX::Config::GetCacheDirectory() + "cache").help("Output directory for generated cache files");
  Parser.add_option("--fileid").help("Select binary to generate cache for");

  optparse::Values Options = Parser.parse_args(argc, argv);
  if (Parser.args().size() != 1) {
    Parser.print_usage();
    return 1;
  }
  const fextl::string CodeMapPath = Parser.args()[0];

  std::ifstream Codemap(CodeMapPath.c_str(), std::ios_base::binary);
  if (!Codemap) {
    fmt::print("Could not open {}\n", CodeMapPath);
    return 1;
  }

  FEXCore::ExecutableFileInfo ProgramName;
  std::map<FEXCore::ExecutableFileInfo, fextl::set<uintptr_t>> Data;
  {
    auto Parsed = FEXCore::CodeMap::ParseCodeMap(Codemap);

    // If an explicit file id is selected, use it.
    // Otherwise, fall back to an IsExecutable marker (or pick the first entry if there's only one)
    auto ExplicitFileId = strtoull(((fextl::string)Options.get("fileid")).data(), nullptr, 16);
    if (ExplicitFileId) {
      ProgramName.FileId = ExplicitFileId;
      ProgramName.Filename = Parsed.at(ExplicitFileId).Filename;
    }

    for (auto& [FileId, Contents] : Parsed) {
      if (!ExplicitFileId && (Contents.IsExecutable || Parsed.size() == 1)) {
        ProgramName.FileId = FileId;
        ProgramName.Filename = Contents.Filename;
      }
      Data.emplace(std::piecewise_construct, std::forward_as_tuple(nullptr, FileId, std::move(Contents.Filename)),
                   std::forward_as_tuple(std::move(Contents.Blocks)));
    }
  }
  if (!ProgramName.FileId) {
    fmt::print("Cannot generate cache from unsanitized code map {}", CodeMapPath);
    return 1;
  }

  for (auto& [File, Blocks] : Data) {
    if (!Blocks.empty()) {
      fmt::print("Parsed {} codemap entries for {} ({:016x})\n", Blocks.size(), File.Filename, File.FileId);
    } else {
      fmt::print("Found dependency {} ({:016x})\n", File.Filename, File.FileId);
    }
  }

  if (!Data.contains(ProgramName)) {
    throw std::runtime_error(fmt::format("Input code map {} did not contain {} ({:016x})", CodeMapPath, ProgramName.Filename, ProgramName.FileId));
  }

  fextl::string OutDir(Options.get("outdir"));
  if (!OutDir.ends_with('/')) {
    OutDir.push_back('/');
  }
  std::filesystem::create_directories(OutDir);

  const auto PortableInfo = FEX::ReadPortabilityInformation();
  char* envp[] = {nullptr};
  FEX::Config::LoadConfig("", envp, PortableInfo);

  auto NumBlocks = Data.at(ProgramName).size();
  auto GeneratedCache = GenerateSingleCache(ProgramName, Data.at(ProgramName), OutDir);
  if (GeneratedCache) {
    fmt::print("Successfully populated cache {} ({} blocks) via {}\n\n", GeneratedCache.value(), NumBlocks,
               std::filesystem::path {CodeMapPath}.filename().string());
  }
  return GeneratedCache ? 0 : 1;
}

int main(int argc, char** argv) {
  LogMan::Throw::InstallHandler(AssertHandler);
  LogMan::Msg::InstallHandler(MsgHandler);

  std::vector<const char*> Args {argv + 1, argv + argc};
  auto CommandName = std::string {basename(argv[0])} + " " + (argc > 1 ? argv[1] : "");
  Args[0] = CommandName.c_str();

  if (argc >= 2 && argv[1] == std::string_view {"generate"}) {
    return GenerateCache(argc - 1, Args.data());
  } else {
    fmt::print("Usage: {} <command>\n\n", basename(argv[0]));
    fmt::print("Commands:\n");
    fmt::print("  generate\tTrigger cache generation from combined code map\n");
    return EXIT_FAILURE;
  }
}


================================================
FILE: Source/Tools/FEXRootFSFetcher/CMakeLists.txt
================================================
add_executable(FEXRootFSFetcher Main.cpp XXFileHash.cpp)
list(APPEND LIBS FEXCore Common JemallocDummy xxHash::xxhash ${PTHREAD_LIB})

LinkerGC(FEXRootFSFetcher)

install(TARGETS FEXRootFSFetcher RUNTIME
  DESTINATION bin
  COMPONENT Runtime)

target_link_libraries(FEXRootFSFetcher PRIVATE ${LIBS})


================================================
FILE: Source/Tools/FEXRootFSFetcher/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include "Common/cpp-optparse/OptionParser.h"
#include "Common/JSONPool.h"
#include "XXFileHash.h"

#include "Common/Config.h"

#include <array>
#include <filesystem>
#include <fstream>
#include <functional>
#include <iostream>
#include <unistd.h>
#include <optional>
#include <span>
#include <sstream>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>

#include <tiny-json.h>

namespace ArgOptions {
bool AssumeYes = false;
enum class CompressedImageOption {
  OPTION_ASK,
  OPTION_EXTRACT,
  OPTION_ASIS,
};

CompressedImageOption CompressedUsageOption {CompressedImageOption::OPTION_ASK};

enum class ListQueryOption {
  OPTION_ASK,
  OPTION_FIRST,
};

ListQueryOption DistroListOption {ListQueryOption::OPTION_ASK};

fextl::vector<fextl::string> RemainingArgs;

std::string DistroName {};
std::string DistroVersion {};

enum class UIOverrideOption {
  Default,
  TTY,
  Zenity,
};

UIOverrideOption UIOption {UIOverrideOption::Default};

void ParseArguments(int argc, char** argv) {
  optparse::OptionParser Parser = optparse::OptionParser().description("Tool for fetching RootFS from FEXServers").add_help_option(true);

  Parser.add_option("-y", "--assume-yes").action("store_true").help("Assume yes to prompts");

  Parser.add_option("-x", "--extract").action("store_true").help("Extract compressed image");

  Parser.add_option("-a", "--as-is").action("store_true").help("Use compressed image as-is");

  Parser.add_option("--distro-name").help("Which distro name to select");

  Parser.add_option("--distro-version").help("Which distro version to select");

  Parser.add_option("--distro-list-first").action("store_true").help("When presented the distro-list option, automatically select the first distro if there isn't an exact match.");

  Parser.add_option("--force-ui").choices({"default", "tty", "zenity"}).set_default("default").help("Override which UI to use for selection");

  optparse::Values Options = Parser.parse_args(argc, argv);

  if (Options.is_set_by_user("assume_yes")) {
    AssumeYes = Options.get("assume_yes");
  }

  if (Options.is_set_by_user("extract")) {
    CompressedUsageOption = CompressedImageOption::OPTION_EXTRACT;
  }

  if (Options.is_set_by_user("as_is")) {
    CompressedUsageOption = CompressedImageOption::OPTION_ASIS;
  }

  if (Options.is_set_by_user("distro_list_first")) {
    DistroListOption = ListQueryOption::OPTION_FIRST;
  }

  if (Options.is_set_by_user("distro_name")) {
    DistroName = Options["distro_name"];
  }

  if (Options.is_set_by_user("distro_version")) {
    DistroVersion = Options["distro_version"];
  }

  if (Options.is_set_by_user("force_ui")) {
    const auto& Option = Options["force_ui"];
    if (Option == "tty") {
      UIOption = UIOverrideOption::TTY;
    } else if (Option == "zenity") {
      UIOption = UIOverrideOption::Zenity;
    }
  }

  RemainingArgs = Parser.args();
}
} // namespace ArgOptions

namespace Exec {
int32_t ExecAndWaitForResponse(const char* path, char* const* args) {
  pid_t pid = fork();
  if (pid == 0) {
    execvp(path, args);
    _exit(-1);
  } else {
    int32_t Status {};
    waitpid(pid, &Status, 0);
    if (WIFEXITED(Status)) {
      return (int8_t)WEXITSTATUS(Status);
    }
  }

  return -1;
}

int32_t ExecAndWaitForResponseRedirect(const char* path, char* const* args, int stdoutRedirect = -2, int stderrRedirect = -2) {
  pid_t pid = fork();
  if (pid == 0) {
    if (stdoutRedirect == -1) {
      close(STDOUT_FILENO);
    } else if (stdoutRedirect == -2) {
      // Do nothing
    } else {
      if (stdoutRedirect != STDOUT_FILENO) {
        close(STDOUT_FILENO);
      }
      dup2(stdoutRedirect, STDOUT_FILENO);
    }
    if (stderrRedirect == -1) {
      close(STDERR_FILENO);
    } else if (stderrRedirect == -2) {
      // Do nothing
    } else {
      if (stderrRedirect != STDOUT_FILENO) {
        close(STDERR_FILENO);
      }
      dup2(stderrRedirect, STDERR_FILENO);
    }
    execvp(path, args);
    _exit(-1);
  } else {
    int32_t Status {};
    while (waitpid(pid, &Status, 0) == -1 && errno == EINTR)
      ;
    if (WIFEXITED(Status)) {
      return (int8_t)WEXITSTATUS(Status);
    }
  }

  return -1;
}

std::string ExecAndWaitForResponseText(const char* path, char* const* args) {
  int fd[2];
  pipe(fd);

  pid_t pid = fork();

  if (pid == 0) {
    close(fd[0]); // Close read side

    // Redirect stdout to pipe
    dup2(fd[1], STDOUT_FILENO);

    // Close stderr
    close(STDERR_FILENO);

    // We can now close the pipe since the duplications take care of the rest
    close(fd[1]);

    execvp(path, args);
    _exit(-1);
  } else {
    close(fd[1]); // Close write side

    // Nothing larger than this
    char Buffer[1024] {};
    std::string Output {};

    // Read the pipe until it closes
    while (size_t Size = read(fd[0], Buffer, sizeof(Buffer))) {
      Output += std::string_view(Buffer, Size);
    }

    int32_t Status {};
    while (waitpid(pid, &Status, 0) == -1 && errno == EINTR)
      ;
    if (WIFEXITED(Status)) {
      // Return what we've read
      close(fd[0]);
      return Output;
    }
  }

  return {};
}
} // namespace Exec

namespace WorkingAppsTester {
static bool Has_Curl {false};
static bool Has_Squashfuse {false};
static bool Has_Unsquashfs {false};
static bool Has_Zenity {false};

// EroFS specific
static bool Has_EroFSFuse {false};
static bool Has_EroFSFsck {false};

void CheckCurl() {
  // Check if curl exists on the host
  const std::array<const char*, 3> ExecveArgs = {
    "curl",
    "-V",
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), -1, -1);
  Has_Curl = Result != -1;
}

void CheckSquashfuse() {
  const std::array<const char*, 3> ExecveArgs = {
    "squashfuse",
    "--help",
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), -1, -1);
  Has_Squashfuse = Result != -1;
}

void CheckUnsquashfs() {
  const std::array<const char*, 3> ExecveArgs = {
    "unsquashfs",
    // since unsquashfs 4.7.1, -help-all is needed to list decompressors.
    // also works with older versions.
    "-help-all",
    nullptr,
  };

  int fd = ::syscall(SYS_memfd_create, "stdout", 0);
  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), fd, fd);
  Has_Unsquashfs = Result != -1;
  if (Has_Unsquashfs) {
    // Seek back to the start
    lseek(fd, 0, SEEK_SET);

    // Unsquashfs needs to support zstd
    // Scan its output to find the zstd compressor
    FILE* fp = fdopen(fd, "r");
    char* Line {nullptr};
    size_t Len;

    bool ReadingDecompressors = false;
    bool SupportsZSTD = false;
    while (getline(&Line, &Len, fp) != -1) {
      if (!ReadingDecompressors) {
        if (strstr(Line, "Decompressors available")) {
          ReadingDecompressors = true;
        }
      } else {
        if (strstr(Line, "zstd")) {
          SupportsZSTD = true;
        }
      }
    }

    free(Line);
    fclose(fp);

    // Disable unsquashfs if it doesn't support ZSTD
    if (!SupportsZSTD) {
      Has_Unsquashfs = false;
    }
  }
  close(fd);
}
void CheckZenity() {
  // Check if zenity exists on the host
  std::array<const char*, 3> ExecveArgs = {
    "zenity",
    "-h",
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), -1, -1);
  Has_Zenity = Result != -1;
}

// EroFS specific tests
void CheckEroFSFuse() {
  std::array<const char*, 3> ExecveArgs = {
    "erofsfuse",
    "--help",
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), -1, -1);
  Has_EroFSFuse = Result != -1;
}

void CheckEroFSFsck() {
  std::array<const char*, 3> ExecveArgs = {
    "fsck.erofs",
    "-V",
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponseRedirect(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()), -1, -1);
  Has_EroFSFsck = Result != -1;
}

void Init() {
  CheckCurl();
  CheckSquashfuse();
  CheckUnsquashfs();
  CheckZenity();
  CheckEroFSFuse();
  CheckEroFSFsck();
}
} // namespace WorkingAppsTester

namespace DistroQuery {
struct DistroInfo {
  std::string DistroName;
  std::string DistroVersion;
  bool RollingRelease;
  bool Unknown;
};

DistroInfo GetDistroInfo() {
  // Detect these files in order
  //
  // /etc/lsb-release
  // eg:
  // DISTRIB_ID=Ubuntu
  // DISTRIB_RELEASE=21.10
  // DISTRIB_CODENAME=impish
  // DISTRIB_DESCRIPTION="Ubuntu 21.10"
  //
  // /etc/os-release
  // eg:
  // PRETTY_NAME="Ubuntu 21.10"
  // NAME="Ubuntu"
  // VERSION_ID="21.10"
  // VERSION="21.10 (Impish Indri)"
  // VERSION_CODENAME=impish
  // ID=ubuntu
  // ID_LIKE=debian
  // HOME_URL="https://www.ubuntu.com/"
  // SUPPORT_URL="https://help.ubuntu.com/"
  // BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
  // PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
  // UBUNTU_CODENAME=impish
  //
  // /etc/debian_version
  // eg:
  // 11.0
  //
  // uname -r
  // eg:
  // 5.13.0-22-generic
  DistroInfo Info {};
  uint32_t FoundCount {};

  if (std::filesystem::exists("/etc/lsb-release")) {
    std::fstream File("/etc/lsb-release", std::fstream::in);
    std::string Line;
    while (std::getline(File, Line)) {
      if (File.eof() || FoundCount == 2) {
        break;
      }

      std::stringstream ss(Line);
      std::string Key, Value;
      std::getline(ss, Key, '=');
      std::getline(ss, Value, '=');

      if (Key == "DISTRIB_ID") {
        auto ToLower = [](auto Str) {
          std::transform(Str.begin(), Str.end(), Str.begin(), [](unsigned char c) { return std::tolower(c); });
          return Str;
        };
        Info.DistroName = ToLower(Value);
        ++FoundCount;
      } else if (Key == "DISTRIB_RELEASE") {
        Info.DistroVersion = std::move(Value);
        ++FoundCount;
      }
    }
  }

  if (FoundCount == 2) {
    Info.Unknown = false;
    if (Info.DistroName == "arch") {
      Info.RollingRelease = true;
    }
    return Info;
  }
  FoundCount = 0;

  if (std::filesystem::exists("/etc/os-release")) {
    std::fstream File("/etc/os-release", std::fstream::in);
    std::string Line;
    while (std::getline(File, Line)) {
      if (File.eof() || FoundCount == 2) {
        break;
      }

      std::stringstream ss(Line);
      std::string Key, Value;
      std::getline(ss, Key, '=');
      std::getline(ss, Value, '=');

      if (Key == "ID") {
        Info.DistroName = std::move(Value);
        ++FoundCount;
      } else if (Key == "VERSION_ID") {
        // Ubuntu provides VERSION_ID
        // Strip the two quotes from the VERSION_ID
        Value = Value.substr(1, Value.size() - 2);
        Info.DistroVersion = std::move(Value);
        ++FoundCount;
      } else if (Key == "IMAGE_VERSION") {
        // Arch provides IMAGE_VERSION
        Info.DistroVersion = std::move(Value);
        ++FoundCount;
      }
    }
  }

  if (FoundCount == 2) {
    Info.Unknown = false;
    if (Info.DistroName == "arch") {
      Info.RollingRelease = true;
    }
    return Info;
  }
  FoundCount = 0;

  if (std::filesystem::exists("/etc/debian_version")) {
    std::fstream File("/etc/debian_version", std::fstream::in);
    std::string Line;

    Info.DistroName = "debian";
    ++FoundCount;
    while (std::getline(File, Line)) {
      Info.DistroVersion = Line;
      ++FoundCount;
    }
  }

  if (FoundCount == 2) {
    Info.Unknown = false;
    return Info;
  }

  Info.DistroName = "Unknown";
  Info.DistroVersion = {};
  Info.Unknown = true;
  return Info;
}
} // namespace DistroQuery

namespace WebFileFetcher {
struct FileTargets {
  // These two are for matching version checks
  std::string DistroMatch;
  std::string VersionMatch;

  // This is a human readable name
  std::string DistroName;

  // This is the URL
  fextl::string URL;

  // This is the hash of the file
  std::string Hash;

  // FileType
  enum class FileType {
    TYPE_UNKNOWN,
    TYPE_SQUASHFS,
    TYPE_EROFS,
  };
  FileType Type;
};

const static std::string DownloadURL = "https://rootfs.fex-emu.gg/RootFS_links.json";

std::string DownloadToString(const std::string& URL) {
  std::array<const char*, 3> ExecveArgs = {
    "curl",
    URL.c_str(),
    nullptr,
  };

  return Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()));
}

bool DownloadToPath(const fextl::string& URL, const fextl::string& Path) {
  auto filename = URL.substr(URL.find_last_of('/') + 1);
  auto PathName = Path + filename;

  std::array<const char*, 5> ExecveArgs = {
    "curl", URL.c_str(), "-o", PathName.c_str(), nullptr,
  };

  return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data())) == 0;
}

bool DownloadToPathWithZenityProgress(const fextl::string& URL, const fextl::string& Path) {
  auto filename = URL.substr(URL.find_last_of('/') + 1);
  auto PathName = Path + filename;

  // -# for progress bar
  // -o for output file
  // -f for silent fail
  std::string CurlPipe = fmt::format("curl -C - -#f {} -o {} 2>&1", URL, PathName);
  const std::string StdBuf = "stdbuf -oL tr '\\r' '\\n'";
  const std::string SedBuf = "sed -u 's/[^0-9]*\\([0-9]*\\).*/\\1/'";
  // zenity --auto-close can't be used since `curl -C` for whatever reason prints 100% at the start.
  // Making zenity vanish immediately
  const std::string ZenityBuf = "zenity --time-remaining --progress --no-cancel --title 'Downloading'";
  std::string BigArgs = fmt::format("{} | {} | {} | {}", CurlPipe, StdBuf, SedBuf, ZenityBuf);
  std::array<const char*, 4> ExecveArgs = {
    "/bin/sh",
    "-c",
    BigArgs.c_str(),
    nullptr,
  };

  return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data())) == 0;
}

std::optional<std::vector<FileTargets>> GetRootFSLinks() {
  // Decode the filetargets
  std::string Data = DownloadToString(DownloadURL);

  if (Data.empty()) {
    return std::nullopt;
  }

  FEX::JSON::JsonAllocator Pool {};
  const json_t* json = FEX::JSON::CreateJSON(Data, Pool);

  if (!json) {
    fmt::print(stderr, "Failed to parse JSON from RootFSLinks file '{}' - invalid JSON format", Data);
    std::abort();
  }

  const json_t* RootList = json_getProperty(json, "v1");

  if (!RootList) {
    fprintf(stderr, "Couldn't get root list");
    return {};
  }

  std::vector<FileTargets> Targets;

  for (const json_t* RootItem = json_getChild(RootList); RootItem != nullptr; RootItem = json_getSibling(RootItem)) {

    FileTargets Target {};
    Target.DistroName = json_getName(RootItem);

    for (const json_t* DataItem = json_getChild(RootItem); DataItem != nullptr; DataItem = json_getSibling(DataItem)) {
      auto DataName = std::string_view {json_getName(DataItem)};

      if (DataName == "DistroMatch") {
        Target.DistroMatch = json_getValue(DataItem);
      } else if (DataName == "DistroVersion") {
        Target.VersionMatch = json_getValue(DataItem);
      } else if (DataName == "URL") {
        Target.URL = json_getValue(DataItem);
      } else if (DataName == "Hash") {
        Target.Hash = json_getValue(DataItem);
      } else if (DataName == "Type") {
        auto DataValue = std::string_view {json_getValue(DataItem)};
        if (DataValue == "squashfs") {
          Target.Type = FileTargets::FileType::TYPE_SQUASHFS;
        } else if (DataValue == "erofs") {
          Target.Type = FileTargets::FileType::TYPE_EROFS;
        } else {
          Target.Type = FileTargets::FileType::TYPE_UNKNOWN;
        }
      }
    }
    bool SupportsSquashFS = WorkingAppsTester::Has_Squashfuse || WorkingAppsTester::Has_Unsquashfs;
    bool SupportsEroFS = WorkingAppsTester::Has_EroFSFuse;
    if ((Target.Type == FileTargets::FileType::TYPE_SQUASHFS && SupportsSquashFS) ||
        (Target.Type == FileTargets::FileType::TYPE_EROFS && SupportsEroFS)) {
      // If we don't understand the type, then we can't use this.
      // Additionally if the type is erofs but the user doesn't have erofsfuse, then we can't use this
      Targets.emplace_back(Target);
    }
  }

  return Targets;
}
} // namespace WebFileFetcher

namespace Zenity {
bool ExecWithQuestion(const fextl::string& Question) {
  fextl::string TextArg = "--text=" + Question;
  const char* Args[] = {
    "zenity",
    "--question",
    TextArg.c_str(),
    nullptr,
  };

  int32_t Result = Exec::ExecAndWaitForResponse(Args[0], const_cast<char* const*>(Args));
  // 0 on Yes, 1 on No
  return Result == 0;
}

void ExecWithInfo(const fextl::string& Text) {
  fextl::string TextArg = "--text=" + Text;
  const char* Args[] = {
    "zenity",
    "--info",
    TextArg.c_str(),
    nullptr,
  };

  Exec::ExecAndWaitForResponse(Args[0], const_cast<char* const*>(Args));
}

bool AskForConfirmation(const fextl::string& Question) {
  return ArgOptions::AssumeYes || ExecWithQuestion(Question);
}

int32_t AskForConfirmationList(const fextl::string& Text, const std::span<const fextl::string> Arguments) {
  fextl::string TextArg = "--text=" + Text;

  std::vector<const char*> ExecveArgs = {
    "zenity", "--list", TextArg.c_str(), "--hide-header", "--column=Index", "--column=Text", "--hide-column=1",
  };

  std::vector<fextl::string> NumberArgs;
  for (size_t i = 0; i < Arguments.size(); ++i) {
    NumberArgs.emplace_back(std::to_string(i));
  }

  for (size_t i = 0; i < Arguments.size(); ++i) {
    const auto& Arg = Arguments[i];
    ExecveArgs.emplace_back(NumberArgs[i].c_str());
    ExecveArgs.emplace_back(Arg.c_str());
  }
  ExecveArgs.emplace_back(nullptr);

  auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()));
  if (Result.empty()) {
    return -1;
  }
  return std::stoi(Result);
}

int32_t AskForComplexConfirmationList(const std::string& Text, const std::span<const std::string> Arguments) {
  std::string TextArg = "--text=" + Text;

  std::vector<const char*> ExecveArgs = {
    "zenity",
    "--list",
    TextArg.c_str(),
  };

  for (auto& Arg : Arguments) {
    ExecveArgs.emplace_back(Arg.c_str());
  }
  ExecveArgs.emplace_back(nullptr);

  auto Result = Exec::ExecAndWaitForResponseText(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data()));
  if (Result.empty()) {
    return -1;
  }
  return std::stoi(Result);
}

int32_t AskForDistroSelection(DistroQuery::DistroInfo& Info, const std::span<const WebFileFetcher::FileTargets> Targets) {
  // Search for an exact match
  int32_t DistroIndex = -1;
  if (!Info.Unknown) {
    for (size_t i = 0; i < Targets.size(); ++i) {
      const auto& Target = Targets[i];

      bool ExactMatch = Target.DistroMatch == Info.DistroName && (Info.RollingRelease || Target.VersionMatch == Info.DistroVersion);
      if (ExactMatch) {
        fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName);
        if (ExecWithQuestion(Question)) {
          DistroIndex = i;
          break;
        }
      }
    }
  }

  if (DistroIndex != -1) {
    return DistroIndex;
  }

  if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) {
    // Return the first option if not an exact match.
    return 0;
  }

  std::vector<std::string> Args;

  Args.emplace_back("--column=Index");
  Args.emplace_back("--column=Distro");
  Args.emplace_back("--hide-column=1");
  for (size_t i = 0; i < Targets.size(); ++i) {
    const auto& Target = Targets[i];
    Args.emplace_back(std::to_string(i));
    Args.emplace_back(Target.DistroName);
  }

  std::string Text = "RootFS list selection";
  return AskForComplexConfirmationList(Text, Args);
}

bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) {
  fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/";
  auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1);
  auto PathName = RootFS + filename;
  uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16);

  std::error_code ec;
  if (std::filesystem::exists(PathName, ec)) {
    const std::array<const fextl::string, 2> Args {
      "Overwrite",
      "Validate",
    };
    fextl::string Text = filename + " already exists. What do you want to do?";
    int Result = AskForConfirmationList(Text, Args);
    if (Result == -1) {
      return false;
    }

    auto Res = XXFileHash::HashFile(PathName);
    if (Result == 0) {
      if (Res == ExpectedHash) {
        fextl::string Text = fextl::fmt::format("{} matches expected hash. Skipping download", filename);
        ExecWithInfo(Text);
        return false;
      }
    } else if (Result == 1) {
      if (Res != ExpectedHash) {
        return AskForConfirmation("RootFS doesn't match hash!\nDo you want to redownload?");
      } else {
        fextl::string Text = fextl::fmt::format("{} matches expected hash", filename);
        ExecWithInfo(Text);
        return false;
      }
    }
  }

  return true;
}

bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) {
  fextl::string Text = fextl::fmt::format("Selected Rootfs: {}\n", Target.DistroName);
  Text += fmt::format("\tURL: {}\n", Target.URL);
  Text += fmt::format("Are you sure that you want to download this image");

  if (AskForConfirmation(Text)) {
    fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/";
    std::error_code ec {};
    if (!std::filesystem::exists(RootFS, ec)) {
      // Doesn't exist, create the the folder as a user convenience
      if (!std::filesystem::create_directories(RootFS, ec)) {
        // Well I guess we failed
        Text = fmt::format("Couldn't create {} path for storing RootFS", RootFS);
        ExecWithInfo(Text);
        return false;
      }
    }

    if (!WebFileFetcher::DownloadToPathWithZenityProgress(Target.URL, RootFS)) {
      return false;
    }

    return true;
  }
  return false;
}
} // namespace Zenity

namespace TTY {
bool AskForConfirmation(const fextl::string& Question) {
  if (ArgOptions::AssumeYes) {
    return true;
  }

  auto ToLowerInPlace = [](auto& Str) {
    std::transform(Str.begin(), Str.end(), Str.begin(), [](unsigned char c) { return std::tolower(c); });
  };

  std::cout << Question << std::endl;
  std::cout << "Response {y,yes,1} or {n,no,0}" << std::endl;
  std::string Response;
  std::cin >> Response;

  ToLowerInPlace(Response);
  if (Response == "y" || Response == "yes" || Response == "1") {
    return true;
  } else if (Response == "n" || Response == "no" || Response == "0") {
    return false;
  } else {
    std::cout << "Unknown response. Assuming no" << std::endl;
    return false;
  }
}

void ExecWithInfo(const fextl::string& Text) {
  std::cout << Text << std::endl;
}

int32_t AskForConfirmationList(const fextl::string& Text, std::span<const fextl::string> List) {
  fmt::print("{}\n", Text);
  fmt::print("Options:\n");
  fmt::print("\t0: Cancel\n");

  for (size_t i = 0; i < List.size(); ++i) {
    fmt::print("\t{}: {}\n", i + 1, List[i]);
  }

  fmt::print("\t\nResponse {{1-{}}} or 0 to cancel\n", List.size());
  fextl::string Response;
  std::cin >> Response;

  int32_t ResponseInt = std::stol(Response.data(), nullptr, 0);
  if (ResponseInt == 0) {
    return -1;
  } else if (ResponseInt >= 1 && (ResponseInt - 1) < List.size()) {
    return ResponseInt - 1;
  } else {
    std::cout << "Unknown response. Assuming cancel" << std::endl;
    return -1;
  }
}

int32_t AskForDistroSelection(DistroQuery::DistroInfo& Info, const std::span<const WebFileFetcher::FileTargets> Targets) {
  // Search for an exact match
  int32_t DistroIndex = -1;
  if (!Info.Unknown) {
    for (size_t i = 0; i < Targets.size(); ++i) {
      const auto& Target = Targets[i];

      bool ExactMatch = Target.DistroMatch == Info.DistroName && Target.VersionMatch == Info.DistroVersion;
      if (ExactMatch) {
        fextl::string Question = fextl::fmt::format("Found exact match for distro '{}'. Do you want to select this image?", Target.DistroName);
        if (AskForConfirmation(Question)) {
          DistroIndex = i;
          break;
        }
      }
    }
  }

  if (DistroIndex != -1) {
    return DistroIndex;
  }

  if (ArgOptions::DistroListOption == ArgOptions::ListQueryOption::OPTION_FIRST) {
    // Return the first option if not an exact match.
    return 0;
  }

  std::vector<fextl::string> Args;
  for (size_t i = 0; i < Targets.size(); ++i) {
    const auto& Target = Targets[i];
    Args.emplace_back(Target.DistroName);
  }

  fextl::string Text = "RootFS list selection";
  return AskForConfirmationList(Text, Args);
}

bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) {
  fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/";
  auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1);
  auto PathName = RootFS + filename;
  uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16);

  std::error_code ec;
  if (std::filesystem::exists(PathName, ec)) {
    const std::array<fextl::string, 2> Args {
      "Overwrite",
      "Validate",
    };
    fextl::string Text = filename + " already exists. What do you want to do?";
    int Result = AskForConfirmationList(Text, Args);
    if (Result == -1) {
      return false;
    }
    fmt::print("Validating RootFS hash...\n");
    auto Res = XXFileHash::HashFile(PathName);
    if (Result == 0) {
      if (Res == ExpectedHash) {
        fmt::print("{} matches expected hash. Skipping downloading\n", filename);
        return false;
      }
    } else if (Result == 1) {
      if (Res != ExpectedHash) {
        fmt::print("RootFS doesn't match hash!\n");
        return AskForConfirmation("Do you want to redownload?");
      } else {
        fmt::print("{} matches expected hash\n", filename);
        return false;
      }
    }
  }

  return true;
}

bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) {
  fmt::print("Selected Rootfs: {}\n", Target.DistroName);
  fmt::print("\tURL: {}\n", Target.URL);

  if (AskForConfirmation("Are you sure that you want to download this image")) {
    fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/";
    std::error_code ec {};
    if (!std::filesystem::exists(RootFS, ec)) {
      // Doesn't exist, create the the folder as a user convenience
      if (!std::filesystem::create_directories(RootFS, ec)) {
        // Well I guess we failed
        fmt::print("Couldn't create {} path for storing RootFS\n", RootFS);
        return false;
      }
    }
    auto DoDownload = [&Target, &RootFS]() -> bool {
      if (!WebFileFetcher::DownloadToPath(Target.URL, RootFS)) {
        fmt::print("Couldn't download RootFS\n");
        return false;
      }

      return true;
    };

    while (DoDownload() == false) {
      if (AskForConfirmation("Curl RootFS download failed. Do you want to retry?")) {
        // Loop to retry
      } else {
        return false;
      }
    }

    // Got here then we passed
    return true;
  }
  return false;
}
} // namespace TTY

namespace {
std::function<bool(const fextl::string& Question)> _AskForConfirmation;
std::function<void(const fextl::string& Text)> _ExecWithInfo;
std::function<int32_t(const fextl::string& Text, const std::span<const fextl::string> List)> _AskForConfirmationList;
std::function<int32_t(DistroQuery::DistroInfo& Info, const std::span<const WebFileFetcher::FileTargets> Targets)> _AskForDistroSelection;
std::function<bool(const WebFileFetcher::FileTargets& Target)> _ValidateCheckExists;
std::function<bool(const WebFileFetcher::FileTargets& Target)> _ValidateDownloadSelection;

void CheckTTY() {
  bool IsTTY {};
  if (ArgOptions::UIOption == ArgOptions::UIOverrideOption::Default) {
    IsTTY = isatty(STDOUT_FILENO);
  } else {
    IsTTY = ArgOptions::UIOption == ArgOptions::UIOverrideOption::TTY;
  }

  if (!WorkingAppsTester::Has_Zenity) {
    // Force TTY if zenity isn't installed.
    if (ArgOptions::UIOption == ArgOptions::UIOverrideOption::Zenity) {
      fmt::print("Zenity isn't executable. Falling back to TTY mode\n");
    }
    IsTTY = true;
  }

  if (IsTTY) {
    _AskForConfirmation = TTY::AskForConfirmation;
    _ExecWithInfo = TTY::ExecWithInfo;
    _AskForConfirmationList = TTY::AskForConfirmationList;
    _AskForDistroSelection = TTY::AskForDistroSelection;
    _ValidateCheckExists = TTY::ValidateCheckExists;
    _ValidateDownloadSelection = TTY::ValidateDownloadSelection;
  } else {
    _AskForConfirmation = Zenity::AskForConfirmation;
    _ExecWithInfo = Zenity::ExecWithInfo;
    _AskForConfirmationList = Zenity::AskForConfirmationList;
    _AskForDistroSelection = Zenity::AskForDistroSelection;
    _ValidateCheckExists = Zenity::ValidateCheckExists;
    _ValidateDownloadSelection = Zenity::ValidateDownloadSelection;
  }
}

bool AskForConfirmation(const fextl::string& Question) {
  return _AskForConfirmation(Question);
}

void ExecWithInfo(const fextl::string& Text) {
  _ExecWithInfo(Text);
}

int32_t AskForConfirmationList(const fextl::string& Text, const std::span<const fextl::string> Arguments) {
  return _AskForConfirmationList(Text, Arguments);
}

int32_t AskForDistroSelection(const std::span<const WebFileFetcher::FileTargets> Targets) {
  auto Info = DistroQuery::GetDistroInfo();

  if (!ArgOptions::DistroName.empty()) {
    Info.DistroName = ArgOptions::DistroName;
  }
  if (!ArgOptions::DistroVersion.empty()) {
    Info.DistroVersion = ArgOptions::DistroVersion;
  }

  return _AskForDistroSelection(Info, Targets);
}

bool ValidateCheckExists(const WebFileFetcher::FileTargets& Target) {
  return _ValidateCheckExists(Target);
}

bool ValidateDownloadSelection(const WebFileFetcher::FileTargets& Target) {
  return _ValidateDownloadSelection(Target);
}
} // namespace

namespace ConfigSetter {
void SetRootFSAsDefault(const fextl::string& RootFS) {
  fextl::string Filename = FEXCore::Config::GetConfigFileLocation();
  auto LoadedConfig = FEX::Config::CreateMainLayer(&Filename);
  LoadedConfig->Load();
  LoadedConfig->Set(FEXCore::Config::ConfigOption::CONFIG_ROOTFS, RootFS);
  FEX::Config::SaveLayerToJSON(Filename, LoadedConfig.get());
}
} // namespace ConfigSetter

namespace UnSquash {
bool UnsquashRootFS(const fextl::string& Path, const fextl::string& RootFS, const fextl::string& FolderName) {
  auto TargetFolder = Path + FolderName;

  std::error_code ec;
  if (std::filesystem::exists(TargetFolder, ec)) {
    fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?";
    if (AskForConfirmation(Question)) {
      if (std::filesystem::remove_all(TargetFolder, ec) == ~0ULL) {
        ExecWithInfo("Couldn't remove previous directory. Won't extract.");
        return false;
      }
    } else {
      return false;
    }
  }

  const std::array<const char*, 6> ExecveArgs = {
    "unsquashfs", "-f", "-d", TargetFolder.c_str(), RootFS.c_str(), nullptr,
  };

  return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data())) == 0;
}

bool ExtractEroFS(const fextl::string& Path, const fextl::string& RootFS, const fextl::string& FolderName) {
  auto TargetFolder = Path + FolderName;

  std::error_code ec;
  if (std::filesystem::exists(TargetFolder, ec)) {
    fextl::string Question = "Target folder \"" + FolderName + "\" already exists. Overwrite?";
    if (AskForConfirmation(Question)) {
      if (std::filesystem::remove_all(TargetFolder, ec) == ~0ULL) {
        ExecWithInfo("Couldn't remove previous directory. Won't extract.");
        return false;
      }
    } else {
      return false;
    }
  }

  ExecWithInfo("Extracting Erofs. This might take a few minutes.");

  const auto ExtractOption = fmt::format("--extract={}", TargetFolder);
  const std::array<const char*, 4> ExecveArgs = {
    "fsck.erofs",
    ExtractOption.c_str(),
    RootFS.c_str(),
    nullptr,
  };

  return Exec::ExecAndWaitForResponse(ExecveArgs[0], const_cast<char* const*>(ExecveArgs.data())) == 0;
}
} // namespace UnSquash

int main(int argc, char** argv, char** const envp) {
  FEX::Config::LoadConfig({}, envp);

  // Reload the meta layer
  FEXCore::Config::ReloadMetaLayer();

  ArgOptions::ParseArguments(argc, argv);

  WorkingAppsTester::Init();

  CheckTTY();

  if (ArgOptions::RemainingArgs.size()) {
    auto Res = XXFileHash::HashFile(ArgOptions::RemainingArgs[0]);
    if (Res.has_value()) {
      fmt::print("{} has hash: {:x}\n", ArgOptions::RemainingArgs[0], Res.value());
    } else {
      fmt::print("Couldn't generate hash for {}\n", ArgOptions::RemainingArgs[0]);
    }
    return 0;
  }

  // Check if curl exists on the host
  if (!WorkingAppsTester::Has_Curl) {
    ExecWithInfo("curl is required to use this tool. Please install curl before using.");
    return -1;
  }
  if (!WorkingAppsTester::Has_Squashfuse && !WorkingAppsTester::Has_Unsquashfs && !WorkingAppsTester::Has_EroFSFuse) {
    // We need at least one tool to mount or extract image files
    ExecWithInfo("squashfuse, unsquashfs, or erofsfuse is required to use this tool. Please install one before using.");
    return -1;
  }

  FEX_CONFIG_OPT(LDPath, ROOTFS);

  std::error_code ec;
  fextl::string Question {};
  if (LDPath().empty() || std::filesystem::exists(LDPath(), ec) == false) {
    Question = "RootFS not found. Do you want to try and download one?";
  } else {
    Question = "RootFS is already in use. Do you want to check the download list?";
  }

  if (AskForConfirmation(Question)) {
    auto TargetReturn = WebFileFetcher::GetRootFSLinks();
    if (!TargetReturn.has_value()) {
      ExecWithInfo("Couldn't download rootfs list from the server. Try again in a minute or report on the fex-emu issue tracker.");
      return -1;
    }

    auto Targets = TargetReturn.value();

    if (Targets.empty()) {
      ExecWithInfo("Couldn't parse rootfs definition URL.");
      return -1;
    }

    int32_t DistroIndex = AskForDistroSelection(Targets);
    if (DistroIndex != -1) {
      const auto& Target = Targets[DistroIndex];
      fextl::string RootFS = FEXCore::Config::GetDataDirectory() + "RootFS/";
      auto filename = Target.URL.substr(Target.URL.find_last_of('/') + 1);
      auto PathName = RootFS + filename;

      if (!ValidateCheckExists(Target)) {
        // Keep going
      } else {
        auto ValidateDownload = [&Target, &PathName]() -> std::pair<int32_t, bool> {
          std::error_code ec;
          if (ValidateDownloadSelection(Target)) {
            uint64_t ExpectedHash = std::stoul(Target.Hash, nullptr, 16);

            if (std::filesystem::exists(PathName, ec)) {
              auto Res = XXFileHash::HashFile(PathName);
              if (Res != ExpectedHash) {
                fextl::string Text = fextl::fmt::format("Couldn't hash the rootfs or hash didn't match\n");
                Text += fmt::format("Hash {:x} != Expected Hash {:x}\n", Res.value_or(0), ExpectedHash);
                ExecWithInfo(Text);
                return std::make_pair(-1, true);
              }
            } else {
              ExecWithInfo("Correctly downloaded RootFS but doesn't exist?");
              return std::make_pair(-1, false);
            }
          } else {
            ExecWithInfo("Couldn't download rootfs for some reason.");
            return std::make_pair(-1, false);
          }

          return std::make_pair(0, false);
        };

        std::pair<int32_t, bool> Result {};
        while ((Result = ValidateDownload()).second == true && Result.first == -1) {

          if (AskForConfirmation("Do you want to try downloading the RootFS again?")) {
            // Continue the loop
          } else {
            // Didn't want to retry, just exit now
            return Result.first;
          }
        }

        // Early exit on other errors
        if (Result.first == -1 && Result.second == false) {
          return Result.first;
        }
      }

      struct ExtractStrings {
        const char* ExtractOrAsIs;
        const char* AsIsSinceMounterNonFunctional;
        const char* AsIsSinceExtractorNonFunctional;
        const char* AsIsSinceNothingWorks;
      };

      ArgOptions::CompressedImageOption UseImageAs {ArgOptions::CompressedUsageOption};
      bool HasExtractor {};
      bool HasMounter {};
      std::function<bool(const fextl::string& Path, const fextl::string& RootFS, const fextl::string& FolderName)> ExtractHelper;
      ExtractStrings ExtractingStrings;
      if (Target.Type == WebFileFetcher::FileTargets::FileType::TYPE_SQUASHFS) {
        HasExtractor = WorkingAppsTester::Has_Unsquashfs;
        HasMounter = WorkingAppsTester::Has_Squashfuse;
        ExtractHelper = UnSquash::UnsquashRootFS;
        ExtractingStrings = {
          "Do you wish to extract the squashfs file or use it as-is?",
          "Squashfuse doesn't work. Do you wish to extract the squashfs file?",
          "Unsquashfs doesn't work. Do you want to use the squashfs file as-is?",
          "Unsquashfs and squashfuse isn't working. Leaving rootfs as-is",
        };
      } else if (Target.Type == WebFileFetcher::FileTargets::FileType::TYPE_EROFS) {
        HasExtractor = WorkingAppsTester::Has_EroFSFsck;
        HasMounter = WorkingAppsTester::Has_EroFSFuse;
        ExtractHelper = UnSquash::ExtractEroFS;
        ExtractingStrings = {
          "Do you wish to extract the erofs file or use it as-is?",
          "erofsfuse doesn't work. Do you wish to extract the erofs file?",
          "Extracting erofs doesn't work. Do you want to use the erofs file as-is?",
          "Extracting erofs and erofsfuse isn't working. Leaving rootfs as-is",
        };
      }

      int32_t Result {};
      std::vector<fextl::string> Args = {
        "Extract",
        "As-Is",
      };

      if (UseImageAs == ArgOptions::CompressedImageOption::OPTION_ASK) {
        if (HasExtractor) {
          if (HasMounter) {
            Result = AskForConfirmationList(ExtractingStrings.ExtractOrAsIs, Args);
            if (Result == 0) {
              UseImageAs = ArgOptions::CompressedImageOption::OPTION_EXTRACT;
            } else if (Result == 1) {
              UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS;
            }
          } else {
            Args.pop_back();
            Result = AskForConfirmationList(ExtractingStrings.AsIsSinceMounterNonFunctional, Args);
            if (Result == 0) {
              UseImageAs = ArgOptions::CompressedImageOption::OPTION_EXTRACT;
            }
          }
        } else {
          if (HasMounter) {
            Args.erase(Args.begin());
            Result = AskForConfirmationList(ExtractingStrings.AsIsSinceExtractorNonFunctional, Args);
            if (Result == 0) {
              // We removed an argument, Just change "As-Is" from 0 to 1 for later logic to work
              UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS;
            }
          } else {
            Args.erase(Args.begin());
            ExecWithInfo(ExtractingStrings.AsIsSinceNothingWorks);
            UseImageAs = ArgOptions::CompressedImageOption::OPTION_ASIS;
          }
        }
      }

      if (UseImageAs == ArgOptions::CompressedImageOption::OPTION_EXTRACT) {
        auto FolderName = filename.substr(0, filename.find_last_of('.'));
        if (ExtractHelper(RootFS, PathName, FolderName)) {
          // Remove the image file suffix since we extracted to that.
          filename = std::move(FolderName);
        }
      }

      if (AskForConfirmation("Do you wish to set this RootFS as default?")) {
        ConfigSetter::SetRootFSAsDefault(filename);
        fextl::string Text = fextl::fmt::format("{} set as default RootFS\n", filename);
        ExecWithInfo(Text);
      }
    }
  }

  return 0;
}


================================================
FILE: Source/Tools/FEXRootFSFetcher/XXFileHash.cpp
================================================
// SPDX-License-Identifier: MIT
#include "XXFileHash.h"

#include <chrono>
#include <fcntl.h>
#include <fmt/format.h>
#include <unistd.h>
#include <vector>
#include <xxhash.h>

namespace XXFileHash {
// 32MB blocks
constexpr static size_t BLOCK_SIZE = 32 * 1024 * 1024;
std::optional<uint64_t> HashFile(const fextl::string& Filepath) {
  int fd = open(Filepath.c_str(), O_RDONLY);
  if (fd == -1) {
    return std::nullopt;
  }

  XXH3_state_t* State {};
  auto HadError = [fd, &State]() {
    close(fd);
    if (State) {
      XXH3_freeState(State);
    }
    return std::nullopt;
  };
  // Get file size
  off_t Size = lseek(fd, 0, SEEK_END);
  if (Size == -1) {
    return HadError();
  }

  // Reset to beginning
  if (lseek(fd, 0, SEEK_SET) == -1) {
    return HadError();
  }

  // Set up XXHash state
  State = XXH3_createState();
  const XXH64_hash_t Seed = 0;

  if (!State) {
    return HadError();
  }

  if (XXH3_64bits_reset_withSeed(State, Seed) == XXH_ERROR) {
    return HadError();
  }

  const double SizeD = Size;
  std::vector<char> Data(BLOCK_SIZE);
  off_t CurrentOffset = 0;
  auto Now = std::chrono::high_resolution_clock::now();

  // Let the kernel know that we will be reading linearly
  posix_fadvise(fd, 0, Size, POSIX_FADV_SEQUENTIAL);
  while (CurrentOffset < Size) {

    ssize_t Result = pread(fd, Data.data(), BLOCK_SIZE, CurrentOffset);
    if (Result == -1) {
      return HadError();
    }

    if (XXH3_64bits_update(State, Data.data(), Result) == XXH_ERROR) {
      return HadError();
    }
    auto Cur = std::chrono::high_resolution_clock::now();
    auto Dur = Cur - Now;
    if (Dur >= std::chrono::seconds(1)) {
      fmt::print("{:.2}% hashed\n", (double)CurrentOffset / SizeD * 100.0);
      Now = Cur;
    }
    CurrentOffset += Result;
  }

  const XXH64_hash_t Hash = XXH3_64bits_digest(State);
  XXH3_freeState(State);

  close(fd);
  return Hash;
}
} // namespace XXFileHash


================================================
FILE: Source/Tools/FEXRootFSFetcher/XXFileHash.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>

#include <optional>

namespace XXFileHash {
std::optional<uint64_t> HashFile(const fextl::string& Filepath);
}


================================================
FILE: Source/Tools/FEXServer/ArgumentLoader.cpp
================================================
// SPDX-License-Identifier: MIT
#include "ArgumentLoader.h"
#include "Common/cpp-optparse/OptionParser.h"
#include "PipeScanner.h"
#include "ProcessPipe.h"

#include "git_version.h"

#include <fmt/format.h>

namespace FEXServer::Config {
static fextl::string Version = "FEX-Emu (" GIT_DESCRIBE_STRING ") ";

FEXServerOptions Load(int argc, char** argv) {
  FEXServerOptions FEXOptions {};
  optparse::OptionParser Parser = optparse::OptionParser().version(Version);

  Parser.add_option("-k", "--kill").action("store_true").set_default(false).help("Shutdown an already active FEXServer");

  Parser.add_option("-f", "--foreground").action("store_true").set_default(false).help("Run this FEXServer in the foreground");

  Parser.add_option("-p", "--persistent").action("store").type("int").set_default(0).set_optional_value(true).metavar("n").help("Make FEXServer persistent. Optional number of seconds");

  Parser.add_option("-w", "--wait").action("store_true").set_default(false).help("Wait for the FEXServer to shutdown");
  Parser.add_option("--wait_pipe").action("store").type("int").set_default(-1).set_optional_value(true);
  Parser.add_option("--watch_fd").action("store").type("int").set_default(-1).set_optional_value(true).help("Adds FD to watch list of active processes");

  Parser.add_option("-v").action("version").help("Version string");

  optparse::Values Options = Parser.parse_args(argc, argv);

  FEXOptions.Kill = Options.get("kill");
  FEXOptions.Foreground = Options.get("foreground");
  FEXOptions.Wait = Options.get("wait");
  if (FEXOptions.Wait) {
    FEXOptions.Foreground = true;
  }
  FEXOptions.PersistentTimeout = Options.get("persistent");

  int WaitPipe = Options.get("wait_pipe");
  if (WaitPipe != -1) {
    PipeScanner::SetWaitPipe(WaitPipe);
  }
  ProcessPipe::SetWatchFD(Options.get("watch_fd"));
  return FEXOptions;
}
} // namespace FEXServer::Config


================================================
FILE: Source/Tools/FEXServer/ArgumentLoader.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>

namespace FEXServer::Config {
struct FEXServerOptions {
  bool Kill;
  bool Foreground;
  bool Wait;
  uint32_t PersistentTimeout;
};

FEXServerOptions Load(int argc, char** argv);
} // namespace FEXServer::Config


================================================
FILE: Source/Tools/FEXServer/CMakeLists.txt
================================================
add_executable(FEXServer
  Main.cpp
  ArgumentLoader.cpp
  Logger.cpp
  PipeScanner.cpp
  ProcessPipe.cpp
  SquashFS.cpp)

target_include_directories(FEXServer PRIVATE ${CMAKE_BINARY_DIR}/generated)

target_link_libraries(FEXServer PRIVATE FEXCore Common CommonTools JemallocDummy ${PTHREAD_LIB})

LinkerGC(FEXServer)

install(TARGETS FEXServer RUNTIME
  DESTINATION bin
  COMPONENT Runtime)


================================================
FILE: Source/Tools/FEXServer/Logger.cpp
================================================
// SPDX-License-Identifier: MIT
#include <Common/Async.h>
#include <Common/FEXServerClient.h>

#include <thread>
#include <vector>

namespace Logging {
void ClientMsgHandler(int FD, FEXServerClient::Logging::PacketMsg* const Msg, const char* MsgStr);
}

namespace Logger {
int LogClientQueuePipe[2];
std::thread LogThread;

void HandleLogData(int Socket) {
  std::vector<uint8_t> Data(1500);
  size_t CurrentRead {};
  while (true) {
    int Read = read(Socket, &Data.at(CurrentRead), Data.size() - CurrentRead);
    if (Read > 0) {
      CurrentRead += Read;
      if (CurrentRead == Data.size()) {
        Data.resize(Data.size() << 1);
      } else {
        // No more to read
        break;
      }
    } else if (Read == 0) {
      // Socket closed
      return;
    } else {
      if (errno == EWOULDBLOCK) {
        // no error
      } else {
        perror("read");
      }
      break;
    }
  }

  size_t CurrentOffset {};
  while (CurrentOffset < CurrentRead) {
    FEXServerClient::Logging::PacketHeader* Header = reinterpret_cast<FEXServerClient::Logging::PacketHeader*>(&Data[CurrentOffset]);
    if (Header->PacketType == FEXServerClient::Logging::PacketTypes::TYPE_MSG) {
      FEXServerClient::Logging::PacketMsg* Msg = reinterpret_cast<FEXServerClient::Logging::PacketMsg*>(&Data[CurrentOffset]);
      const char* MsgText = reinterpret_cast<const char*>(&Data[CurrentOffset + sizeof(FEXServerClient::Logging::PacketMsg)]);
      Logging::ClientMsgHandler(Socket, Msg, MsgText);

      CurrentOffset += sizeof(FEXServerClient::Logging::PacketMsg) + Msg->MessageLength;
    } else {
      CurrentOffset = CurrentRead;
    }
  }
}

void LogThreadFunc() {
  fasio::poll_reactor Reactor;

  auto Pipe = fasio::posix_descriptor {Reactor, LogClientQueuePipe[0]};
  fextl::vector<fasio::posix_descriptor> Clients;

  // Wait for AppendLogFD to send file descriptors over LogClientQueuePipe.
  // When data becomes ready, we read the FD and register it to the reactor.
  Pipe.async_wait([&](fasio::error ec) {
    if (ec != fasio::error::success) {
      return fasio::post_callback::stop_reactor;
    }

    int ReceivedFD;
    read(Pipe.FD, &ReceivedFD, sizeof(ReceivedFD));

    // Register client and set up read callback
    Clients.emplace_back(Reactor, ReceivedFD);
    Clients.back().async_wait([&Clients, ReceivedFD](fasio::error ec) {
      if (ec != fasio::error::success) {
        std::iter_swap(std::find_if(Clients.begin(), Clients.end(), [=](auto& desc) { return desc.FD == ReceivedFD; }), std::prev(Clients.end()));
        Clients.pop_back();
        return fasio::post_callback::drop;
      }

      HandleLogData(ReceivedFD);
      return fasio::post_callback::repeat;
    });

    return fasio::post_callback::repeat;
  });

  Reactor.run();
}

void StartLogThread() {
  pipe2(LogClientQueuePipe, 0);

  LogThread = std::thread(LogThreadFunc);
}

void AppendLogFD(int FD) {
  write(LogClientQueuePipe[1], &FD, sizeof(FD));
}

bool LogThreadRunning() {
  return LogThread.joinable();
}

void Shutdown() {
  close(LogClientQueuePipe[1]);

  if (LogThread.joinable()) {
    LogThread.join();
  }
}
} // namespace Logger


================================================
FILE: Source/Tools/FEXServer/Logger.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

namespace Logger {
void AppendLogFD(int FD);
void StartLogThread();
bool LogThreadRunning();
void Shutdown();
} // namespace Logger


================================================
FILE: Source/Tools/FEXServer/Main.cpp
================================================
// SPDX-License-Identifier: MIT
#include "ArgumentLoader.h"
#include "Logger.h"
#include "PipeScanner.h"
#include "PortabilityInfo.h"
#include "ProcessPipe.h"
#include "SquashFS.h"
#include "Common/ArgumentLoader.h"
#include "Common/Config.h"
#include "Common/FEXServerClient.h"

#include <fmt/color.h>

#include <chrono>
#include <ctime>
#include <dirent.h>
#include <fcntl.h>
#include <filesystem>
#include <iterator>
#include <mutex>
#include <optional>
#include <poll.h>
#include <sys/prctl.h>
#include <sys/signal.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <termios.h>
#include <thread>
#include <unistd.h>

static timespec StartTime {};

// Set an empty style to disable coloring when FEXServer output is e.g. piped to a file
static std::optional<fmt::text_style> DisableColors = isatty(STDOUT_FILENO) ? std::nullopt : std::optional {fmt::text_style {}};

namespace Logging {
void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  const auto Output = fmt::format("{} {}\n", fmt::styled(LogMan::DebugLevelStr(Level), DisableColors.value_or(DebugLevelStyle(Level))), Message);
  write(STDOUT_FILENO, Output.c_str(), Output.size());
}

void AssertHandler(const char* Message) {
  return MsgHandler(LogMan::ASSERT, Message);
}

void ClientMsgHandler(int FD, FEXServerClient::Logging::PacketMsg* const Msg, const char* MsgStr) {
  if (!StartTime.tv_sec && !StartTime.tv_nsec) {
    StartTime = Msg->Header.Timestamp;
  }
  auto seconds = Msg->Header.Timestamp.tv_sec - StartTime.tv_sec - (Msg->Header.Timestamp.tv_nsec < StartTime.tv_nsec);
  auto nanos = (1'000'000'000 + Msg->Header.Timestamp.tv_nsec - StartTime.tv_nsec) % 1'000'000'000;
  char Metadata[128];
  auto Cursor =
    fmt::format_to(&Metadata[0], DisableColors.value_or(LogMan::DebugLevelStyle(Msg->Level)), "{}", LogMan::DebugLevelStr(Msg->Level));
  Cursor = fmt::format_to(Cursor, DisableColors.value_or(fmt::fg(fmt::color::light_gray)), " {}|{} ", Msg->Header.PID, Msg->Header.TID);
  Cursor = fmt::format_to(Cursor, DisableColors.value_or(fmt::fg(fmt::color::gray)), "{}.{:03}", seconds, nanos / 1000000);
  *Cursor = 0;
  auto Output = fmt::format("{} {}\n", Metadata, MsgStr);
  write(STDERR_FILENO, Output.c_str(), Output.size());
}
} // namespace Logging

namespace {
void ActionHandler(int sig, siginfo_t* info, void* context) {
  // TODO: Fix this
  if (sig == SIGINT) {
    // Someone trying to kill us. Shutdown.
    ProcessPipe::Shutdown();

    // Clear "^C" string that most terminals print when pressing Ctrl+C.
    fprintf(stderr, "\r");
    return;
  }
  _exit(1);
}

void ActionIgnore(int sig, siginfo_t* info, void* context) {}

void SetupSignals() {
  // Setup our signal handlers now so we can capture some events
  struct sigaction act {};
  act.sa_sigaction = ActionHandler;
  act.sa_flags = SA_SIGINFO;

  // SIGTERM if something is trying to terminate us
  sigaction(SIGTERM, &act, nullptr);
  // SIGINT if something is trying to terminate us
  sigaction(SIGINT, &act, nullptr);

  // SIGUSR1 just to interrupt syscalls
  act.sa_sigaction = ActionIgnore;
  sigaction(SIGUSR1, &act, nullptr);

  // Ignore SIGPIPE, we will be checking for pipe closure which could send this signal
  signal(SIGPIPE, SIG_IGN);
  // Reset SIGCHLD which is likely SIG_IGN if FEX started the server.
  // We now wait for child processes with waitpid, newer libfuse also requires SIGCHLD to not be ignored by child processes.
  signal(SIGCHLD, SIG_DFL);
}

/**
 * @brief Deparents itself by forking and terminating the parent process.
 */
void DeparentSelf() {
  auto SystemdEnv = getenv("INVOCATION_ID");
  if (SystemdEnv) {
    // If FEXServer was launched through systemd then don't deparent, otherwise systemd kills the entire server.
    return;
  }

  pid_t pid = fork();

  if (pid != 0) {
    // Parent is leaving to force this process to deparent itself
    // This lets this process become the child of whatever the reaper parent is
    _exit(0);
  }
}
} // namespace

int main(int argc, char** argv, char** const envp) {
  auto Options = FEXServer::Config::Load(argc, argv);

  SetupSignals();

  if (Options.Foreground) {
    LogMan::Throw::InstallHandler(Logging::AssertHandler);
    LogMan::Msg::InstallHandler(Logging::MsgHandler);
  }

  if (!Options.Foreground) {
    DeparentSelf();
  }

  FEX::Config::LoadConfig({}, envp, FEX::ReadPortabilityInformation());

  // Reload the meta layer
  FEXCore::Config::ReloadMetaLayer();

  if (Options.Wait) {
    int ServerPipe = FEXServerClient::ConnectToServer();
    if (ServerPipe != -1) {
      int FEXServerPID = FEXServerClient::RequestPIDFD(ServerPipe);
      close(ServerPipe);
      if (FEXServerPID != -1) {
        LogMan::Msg::IFmt("[FEXServer] Waiting for FEXServer to close");
        // We can't use waitid (P_PIDFD) here because the active FEXServer isn't a child of this process.
        // Use poll instead which will return once the pidfd closes.
        pollfd PollFD;
        PollFD.fd = FEXServerPID;
        PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL;

        // Wait for a result on the pipe that isn't EINTR
        while (poll(&PollFD, 1, -1) == -1 && errno == EINTR)
          ;

        LogMan::Msg::IFmt("[FEXServer] FEXServer shutdown");
      }
      PipeScanner::ClosePipes();
    }
    return 0;
  }

  if (Options.Kill) {
    int ServerPipe = FEXServerClient::ConnectToServer();

    if (ServerPipe != -1) {
      FEXServerClient::RequestServerKill(ServerPipe);
      LogMan::Msg::DFmt("[FEXServer] Sent kill packet");
      PipeScanner::ClosePipes();
    }
    return 0;
  }

  if (!ProcessPipe::InitializeServerPipe()) {
    // Someone else already owns the FEXServer pipe
    PipeScanner::ClosePipes();
    return -1;
  }

  // Steam doesn't get to connect to global sockets.
#ifndef FEX_STEAM_SUPPORT
  if (!ProcessPipe::InitializeServerSocket(true)) {
    // Couldn't create server socket for some reason
    PipeScanner::ClosePipes();
    return -1;
  }
#endif

  if (!ProcessPipe::InitializeServerSocket(false)) {
    // Couldn't create server socket for some reason
    PipeScanner::ClosePipes();
    return -1;
  }

  // Switch this process over to a new session id
  // Probably not required but allows this to become the process group leader of its session
  ::setsid();

  // Set process as a subreaper so subprocesses can't escape
  if (::prctl(PR_SET_CHILD_SUBREAPER, 1) == -1) [[unlikely]] {
    // If subreaper failed then squashfuse/erofsfuse can escape, which isn't fatal.
    LogMan::Msg::DFmt("[FEXServer] Couldn't set subreaper.");
  }

  bool EnableLoggingThread = Options.Foreground;
#ifndef FEX_STEAM_SUPPORT
  // If running with Steam support then always enable the logging thread.
  EnableLoggingThread = true;
#endif

  if (EnableLoggingThread) {
    // Only start a log thread if we are in the foreground.
    // Prevents FEX from trying to log to nothing.
    Logger::StartLogThread();
  }

  if (!SquashFS::InitializeSquashFS()) {
    LogMan::Msg::DFmt("[FEXServer] Couldn't mount squashfs");
    return -1;
  }

  // Close the pipes we found at the start
  // This will let FEX know we are ready
  PipeScanner::ClosePipes();

  ProcessPipe::SetConfiguration(Options.Foreground, Options.PersistentTimeout ?: 1);

  // Actually spin up the request thread.
  // Any applications that were waiting for the socket to accept will then go through here.
  ProcessPipe::WaitForRequests();

  SquashFS::UnmountRootFS();

  Logger::Shutdown();

  return 0;
}


================================================
FILE: Source/Tools/FEXServer/PipeScanner.cpp
================================================
// SPDX-License-Identifier: MIT
#include <dirent.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <vector>
#include <fcntl.h>

namespace PipeScanner {
std::vector<int> IncomingPipes {};
void SetWaitPipe(int FD) {
  int flags = fcntl(FD, F_GETFD);
  flags |= FD_CLOEXEC;
  fcntl(FD, F_SETFD, flags);
  IncomingPipes.emplace_back(FD);
}

void ClosePipes() {
  for (auto pipe : IncomingPipes) {
    close(pipe);
  }
  IncomingPipes.clear();
}
} // namespace PipeScanner


================================================
FILE: Source/Tools/FEXServer/PipeScanner.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
namespace PipeScanner {
void SetWaitPipe(int FD);
void ClosePipes();
} // namespace PipeScanner


================================================
FILE: Source/Tools/FEXServer/ProcessPipe.cpp
================================================
// SPDX-License-Identifier: MIT
#include "FEXHeaderUtils/Syscalls.h"
#include "Logger.h"
#include "SquashFS.h"

#include <Common/AsyncNet.h>
#include <Common/Config.h>
#include <Common/FDUtils.h>
#include <Common/FEXServerClient.h>

#include <FEXCore/Core/CodeCache.h>
#include <FEXCore/HLE/SourcecodeResolver.h>

#include <fmt/ranges.h>

#include <atomic>
#include <cassert>
#include <fcntl.h>
#include <filesystem>
#include <fstream>
#include <poll.h>
#include <string>
#include <sys/file.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <vector>

#include <xxhash.h>

namespace FEXCore {
inline bool operator<(const FEXCore::ExecutableFileInfo& a, const FEXCore::ExecutableFileInfo& b) noexcept {
  return a.FileId < b.FileId;
}
} // namespace FEXCore

template<>
struct std::hash<FEXCore::ExecutableFileInfo> {
  std::size_t operator()(const FEXCore::ExecutableFileInfo& Val) const noexcept {
    return Val.FileId;
  }
};

namespace ProcessPipe {
constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO;
int ServerLockFD {-1};
int WatchFD {-1};
std::optional<fasio::tcp_acceptor> ServerAcceptor;
std::optional<fasio::tcp_acceptor> ServerFSAcceptor;
int NumClients = 0;
time_t RequestTimeout {10};
bool Foreground {false};
std::vector<struct pollfd> PollFDs {};

// FD count watching
constexpr size_t static MAX_FD_DISTANCE = 32;
rlimit MaxFDs {};
std::atomic<size_t> NumFilesOpened {};

// Path to directory for unprocessed code maps dumped by FEX
static std::string NewCodeMapDirectory;

// Path to directory for processed code maps (suitable for cache generation)
static std::string ReadyCodeMapDirectory;

void SetWatchFD(int FD) {
  WatchFD = FD;
}

size_t GetNumFilesOpen() {
  // Walk /proc/self/fd/ to see how many open files we currently have
  const std::filesystem::path self {"/proc/self/fd/"};

  return std::distance(std::filesystem::directory_iterator {self}, std::filesystem::directory_iterator {});
}

void GetMaxFDs() {
  // Get our kernel limit for the number of open files
  if (getrlimit(RLIMIT_NOFILE, &MaxFDs) != 0) {
    fprintf(stderr, "[FEXMountDaemon] getrlimit(RLIMIT_NOFILE) returned error %d %s\n", errno, strerror(errno));
  }

  // Walk /proc/self/fd/ to see how many open files we currently have
  NumFilesOpened = GetNumFilesOpen();
}

void CheckRaiseFDLimit() {
  if (NumFilesOpened < (MaxFDs.rlim_cur - MAX_FD_DISTANCE)) {
    // No need to raise the limit.
    return;
  }

  if (MaxFDs.rlim_cur == MaxFDs.rlim_max) {
    fprintf(stderr, "[FEXMountDaemon] Our open FD limit is already set to max and we are wanting to increase it\n");
    fprintf(stderr, "[FEXMountDaemon] FEXMountDaemon will now no longer be able to track new instances of FEX\n");
    fprintf(stderr, "[FEXMountDaemon] Current limit is %zd(hard %zd) FDs and we are at %zd\n", MaxFDs.rlim_cur, MaxFDs.rlim_max,
            GetNumFilesOpen());
    fprintf(stderr, "[FEXMountDaemon] Ask your administrator to raise your kernel's hard limit on open FDs\n");
    return;
  }

  rlimit NewLimit = MaxFDs;

  // Just multiply by two
  NewLimit.rlim_cur <<= 1;

  // Now limit to the hard max
  NewLimit.rlim_cur = std::min(NewLimit.rlim_cur, NewLimit.rlim_max);

  if (setrlimit(RLIMIT_NOFILE, &NewLimit) != 0) {
    fprintf(stderr, "[FEXMountDaemon] Couldn't raise FD limit to %zd even though our hard limit is %zd\n", NewLimit.rlim_cur, NewLimit.rlim_max);
  } else {
    // Set the new limit
    MaxFDs = NewLimit;
  }
}

bool InitializeServerPipe() {
  auto ServerFolder = FEXServerClient::GetServerLockFolder();

  std::error_code ec {};
  if (!std::filesystem::exists(ServerFolder, ec)) {
    // Doesn't exist, create the the folder as a user convenience
    if (!std::filesystem::create_directories(ServerFolder, ec)) {
      LogMan::Msg::EFmt("Couldn't create server pipe folder at: {}", ServerFolder);
      return false;
    }
  }

  auto ServerLockPath = FEXServerClient::GetServerLockFile();

  // Now this is some tricky locking logic to ensure that we only ever have one server running
  // The logic is as follows:
  // - Try to make the lock file
  // - If Exists then check to see if it is a stale handle
  //   - Stale checking means opening the file that we know exists
  //   - Then we try getting a write lock
  //   - If we fail to get the write lock, then leave
  //   - Otherwise continue down the codepath and degrade to read lock
  // - Else try to acquire a write lock to ensure only one FEXServer exists
  //
  // - Once a write lock is acquired, downgrade it to a read lock
  //   - This ensures that future FEXServers won't race to create multiple read locks
  int Ret = open(ServerLockPath.c_str(), O_RDWR | O_CREAT | O_CLOEXEC | O_EXCL, USER_PERMS);
  ServerLockFD = Ret;

  if (Ret == -1 && errno == EEXIST) {
    // If the lock exists then it might be a stale connection.
    // Check the lock status to see if another process is still alive.
    ServerLockFD = open(ServerLockPath.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS);
    if (ServerLockFD != -1) {
      // Now that we have opened the file, try to get a write lock.
      struct flock lk {
        .l_type = F_WRLCK,
        .l_whence = SEEK_SET,
        .l_start = 0,
        .l_len = 0,
      };
      Ret = fcntl(ServerLockFD, F_SETLK, &lk);

      if (Ret != -1) {
        // Write lock was gained, we can now continue onward.
      } else {
        // We couldn't get a write lock, this means that another process already owns a lock on the lock
        close(ServerLockFD);
        ServerLockFD = -1;
        return false;
      }
    } else {
      // File couldn't get opened even though it existed?
      // Must have raced something here.
      return false;
    }
  } else if (Ret == -1) {
    // Unhandled error.
    LogMan::Msg::EFmt("Unable to create FEXServer named lock file at: {} {} {}", ServerLockPath, errno, strerror(errno));
    return false;
  } else {
    // FIFO file was created. Try to get a write lock
    struct flock lk {
      .l_type = F_WRLCK,
      .l_whence = SEEK_SET,
      .l_start = 0,
      .l_len = 0,
    };
    Ret = fcntl(ServerLockFD, F_SETLK, &lk);

    if (Ret == -1) {
      // Couldn't get a write lock, something else must have got it
      close(ServerLockFD);
      ServerLockFD = -1;
      return false;
    }
  }

  // Now that a write lock is held, downgrade it to a read lock
  struct flock lk {
    .l_type = F_RDLCK,
    .l_whence = SEEK_SET,
    .l_start = 0,
    .l_len = 0,
  };
  Ret = fcntl(ServerLockFD, F_SETLK, &lk);

  if (Ret == -1) {
    // This shouldn't occur
    LogMan::Msg::EFmt("Unable to downgrade a write lock to a read lock {} {} {}", ServerLockPath, errno, strerror(errno));
    close(ServerLockFD);
    ServerLockFD = -1;
    return false;
  }

  return true;
}

static fasio::poll_reactor Reactor;

void HandleSocketData(fasio::tcp_socket&);

bool InitializeServerSocket(bool abstract) {
  fextl::string ServerSocketName;
  if (abstract) {
    ServerSocketName = FEXServerClient::GetServerSocketName();
  } else {
    ServerSocketName = FEXServerClient::GetServerSocketPath();
    // Unlink the socket file if it exists
    // We are being asked to create a daemon, not error check
    // We don't care if this failed or not
    unlink(ServerSocketName.c_str());
  }
  auto Acceptor = fasio::tcp_acceptor::create(Reactor, abstract, ServerSocketName);
  if (!Acceptor) {
    LogMan::Msg::EFmt("Failed to create FEXServer socket: error {} ({})", errno, strerror(errno));
    return false;
  }

  Acceptor->async_accept([](fasio::error ec, std::optional<fasio::tcp_socket> Socket) {
    if (ec != fasio::error::success) {
      if (ec == fasio::error::generic_errno) {
        LogMan::Msg::EFmt("FEXServer failed to establish client connection: error {} ({})", errno, strerror(errno));
      }
      // Ignore error and wait for next connection
      return fasio::post_callback::repeat;
    }

    int FD = Socket->FD;
    ++NumClients;
    Reactor.bind_handler(
      pollfd {
        .fd = FD,
        .events = POLLIN | POLLPRI | POLLRDHUP,
        .revents = 0,
      },
      [Socket = std::move(Socket).value()](fasio::error ec) mutable {
        if (ec != fasio::error::success) {
          close(Socket.FD);
          --NumClients;
          return fasio::post_callback::drop;
        }
        HandleSocketData(Socket);
        // Wait for next data
        return fasio::post_callback::repeat;
      });

    // Wait for next connection
    return fasio::post_callback::repeat;
  });

  (abstract ? ServerAcceptor : ServerFSAcceptor) = std::move(Acceptor).value();
  return true;
}

void SendEmptyErrorPacket(fasio::tcp_socket& Socket) {
  FEXServerClient::FEXServerResultPacket Res {
    .Header {
      .Type = FEXServerClient::PacketType::TYPE_ERROR,
    },
  };

  fasio::mutable_buffer Data = {.Data = std::as_writable_bytes(std::span(&Res, 1))};
  fasio::error ec;
  write(Socket, Data, ec);
}

void SendFDSuccessPacket(fasio::tcp_socket& Socket, int FD) {
  FEXServerClient::FEXServerResultPacket Res {
    .Header {
      .Type = FEXServerClient::PacketType::TYPE_SUCCESS,
    },
  };

  fasio::mutable_buffer Data = {.Data = std::as_writable_bytes(std::span(&Res, 1)), .FD = &FD};
  fasio::error ec;
  write(Socket, Data, ec);
}

// Discovers any pending code maps, parses their contents into a runtime data structure, and deletes them
static std::map<FEXCore::ExecutableFileInfo, fextl::set<uintptr_t>>
ImportPendingCodeMaps(const FEXCore::ExecutableFileInfo& MainFileId, bool HasMultiblock) {
  // Detect code maps by checking file name suffixes by counting up an index.
  // Code maps that are ready for reading must be non-empty and flock(FLOCK_EX) must succeed:
  // - If empty, we tried generating the cache before the client could even lock it
  // - If exclusively lockable, we know the client either closed or crashed
  std::vector<std::string> CodeMaps;
  for (int Index = 0; true; ++Index) {
    auto CodeMap = fmt::format("{}/{}.{}.bin", NewCodeMapDirectory, FEXCore::CodeMap::GetBaseFilename(MainFileId, !HasMultiblock), Index);
    auto FD = open(CodeMap.c_str(), O_RDONLY);
    if (FD == -1) {
      break;
    }

    // Acquire exclusive lock to ensure the client process is done writing data.
    // Also ensure the file is non-empty, otherwise we're racing the client in acquiring the initial lock.
    struct stat FileStats;
    fstat(FD, &FileStats);
    if (FileStats.st_size == 0 || flock(FD, LOCK_EX | LOCK_NB) != 0) {
      fmt::print("Code map {} is still in use, skipping\n", CodeMap);
      // Still being written to by a client process, so skip this file
      // TODO: Rename from X.n.bin to X.0.bin (once the latter has been removed!) to ensure we'll catch it on next run
      close(FD);
      continue;
    } else {
      fmt::print("Found code map {}, queuing for merge\n", CodeMap);
    }
    close(FD);
    CodeMaps.push_back(CodeMap);
  }

  // Update merged code map
  std::map<FEXCore::ExecutableFileInfo, fextl::set<uintptr_t>> ImportedCodeMaps;
  if (!CodeMaps.empty()) {
    fmt::print("Found {} new code maps, updating reference code map\n", CodeMaps.size());

    for (auto& CodeMap : CodeMaps) {
      std::ifstream Incoming(CodeMap, std::ios_base::binary);
      auto NewBlocks = FEXCore::CodeMap::ParseCodeMap(Incoming);
      for (auto& [FileId, Contents] : NewBlocks) {
        ImportedCodeMaps.emplace(std::piecewise_construct, std::forward_as_tuple(nullptr, FileId, std::move(Contents.Filename)),
                                 std::forward_as_tuple(std::move(Contents.Blocks)));
      }
    }
  }

  // Delete all imported code maps
  for (auto& CodeMapFile : CodeMaps) {
    std::filesystem::remove(CodeMapFile);
    // TODO: Rename any pending (not finalized) code maps to PROGRAMNAME.0.bin so it will be found on the next run
  }

  return ImportedCodeMaps;
}

/**
 * Writes aggregated code map data into a single code map file that is ready to be used for cache generation
 */
static void WriteNewCodeMap(const FEXCore::ExecutableFileInfo& File, const std::string& OutputName, const fextl::set<uintptr_t>& Blocks,
                            bool IsMainFile, const auto& Dependencies) {
  fmt::print("Writing {} blocks to {}\n", Blocks.size(), OutputName);

  struct CodeMapOpener : FEXCore::CodeMapOpener {
    CodeMapOpener(const std::string& Filename) {
      FD = creat(Filename.c_str(), 0644);
    }

    int OpenCodeMapFile() override {
      return FD;
    }

    int FD;
  };

  CodeMapOpener CodeMapOpener(OutputName);
  FEXCore::CodeMapWriter OutputCodeMap(CodeMapOpener, true);
  if (IsMainFile) {
    // List the main executable and all used libraries
    OutputCodeMap.AppendSetMainExecutable(File);

    for (auto& [Dependency, _] : Dependencies) {
      OutputCodeMap.AppendLibraryLoad(Dependency);
    }
  } else {
    // List only the library itself
    OutputCodeMap.AppendLibraryLoad(File);
  }

  for (auto& Block : Blocks) {
    OutputCodeMap.AppendBlock(FEXCore::ExecutableFileSectionInfo {File, 0}, Block);
  }
}

enum class NeedsCacheRefresh {
  No,
  Yes,
};

/**
 * Checks and processes new code maps generated by FEX for the given application.
 *
 * Processed code maps are merged into the reference code map and deleted afterwards.
 *
 * The returned map is a list of all dependencies of the main executables discovered,
 * associated with a flag to indicate need for cache regeneration.
 */
static std::map<FEXCore::ExecutableFileInfo, NeedsCacheRefresh> AggregateCodeMaps(const FEXCore::ExecutableFileInfo& MainFileId, bool HasMultiblock) {
  std::map<FEXCore::ExecutableFileInfo, NeedsCacheRefresh> Result;

  // Read all dependencies discovered in previous runs
  {
    auto MainFileCodeMapPath = fmt::format("{}/{}", ReadyCodeMapDirectory, FEXCore::CodeMap::GetBaseFilename(MainFileId, !HasMultiblock));
    std::ifstream MainFileCodeMap(MainFileCodeMapPath, std::ios_base::binary);
    for (auto& [FileId, Contents] : FEXCore::CodeMap::ParseCodeMap(MainFileCodeMap)) {
      Result.emplace(std::piecewise_construct, std::forward_as_tuple(nullptr, FileId, Contents.Filename),
                     std::forward_as_tuple(NeedsCacheRefresh::No));
    }
  }

  // Accumulate information from new code maps
  auto IncomingCodeMap = ImportPendingCodeMaps(MainFileId, HasMultiblock);
  for (auto& [File, _] : IncomingCodeMap) {
    Result.emplace(std::piecewise_construct, std::forward_as_tuple(nullptr, File.FileId, File.Filename),
                   std::forward_as_tuple(NeedsCacheRefresh::No));
  }

  // For each referenced library, add referenced offsets to that library's reference code map
  for (auto& [File, Blocks] : IncomingCodeMap) {
    const auto BinaryName = std::string {FEXCore::CodeMap::GetBaseFilename(File, !HasMultiblock)};
    auto OutputName = fmt::format("{}/{}", ReadyCodeMapDirectory, BinaryName);

    // Check if the new code maps add any new information to the previous code map
    if (auto ReferenceCodeMap = std::ifstream(OutputName, std::ios_base::binary)) {
      auto PreviousBlocks = FEXCore::CodeMap::ParseCodeMap(ReferenceCodeMap).at(File.FileId).Blocks;
      auto NumPreviousBlocks = PreviousBlocks.size();
      Blocks.merge(std::move(PreviousBlocks));
      if (Blocks.size() == NumPreviousBlocks) {
        // No new blocks => no need to regenerate the corresponding cache
        continue;
      } else {
        fmt::println("  Found {} new blocks ({} total) in code map {} for {}", Blocks.size(), NumPreviousBlocks, BinaryName, File.Filename);
      }
    }

    // Update code map and queue for cache generation
    std::map<FEXCore::ExecutableFileInfo, NeedsCacheRefresh> Empty;
    WriteNewCodeMap(File, OutputName, Blocks, true, File.FileId == MainFileId.FileId ? Result : Empty);
    Result.at(File) = NeedsCacheRefresh::Yes;
  }

  return Result;
}

int32_t EmbedSubprocess(const char* path, char* const* args) {
  pid_t pid = fork();
  if (pid == 0) {
    execvp(path, args);
    _exit(-1);
  } else {
    int32_t Status {};
    while (waitpid(pid, &Status, 0) == -1 && errno == EINTR)
      ;
    if (WIFEXITED(Status)) {
      return (int8_t)WEXITSTATUS(Status);
    }
  }

  return -1;
}

/**
 * Spawn a FEXOfflineCompiler instance to generate a code cache from the given code map
 */
static int RunOfflineCompiler(const char* CodeMap) {
  const char* ExecveArgs[] = {"FEXOfflineCompiler", "generate", CodeMap, nullptr};
  return EmbedSubprocess("FEXOfflineCompiler", const_cast<char* const*>(&ExecveArgs[0]));
};

void HandleSocketData(fasio::tcp_socket& Socket) {
  std::vector<uint8_t> Data(1500);

  // Get the current number of FDs of the process before we start handling sockets.
  GetMaxFDs();

  int inFD = -1;
  fasio::mutable_buffer buffer = {std::as_writable_bytes(std::span(Data)), nullptr, &inFD};

  {
    fasio::error ec;

    auto Read = Socket.read_some(buffer, ec);
    if (ec == fasio::error::success) {
      assert(Read >= sizeof(FEXServerClient::FEXServerRequestPacket));
      buffer = {buffer.Data.subspan(0, Read)};
    } else if (ec == fasio::error::eof) {
      return;
    } else {
      perror("read");
      return;
    }
  }

  while (buffer.size() > 0) {
    FEXServerClient::FEXServerRequestPacket* Req = reinterpret_cast<FEXServerClient::FEXServerRequestPacket*>(Data.data());
    switch (Req->Header.Type) {
    case FEXServerClient::PacketType::TYPE_KILL:
      Reactor.stop_async();
      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest);
      break;
    case FEXServerClient::PacketType::TYPE_GET_LOG_FD: {
      if (Logger::LogThreadRunning()) {
        int fds[2] {};
        pipe2(fds, 0);
        // 0 = Read
        // 1 = Write
        Logger::AppendLogFD(fds[0]);

        SendFDSuccessPacket(Socket, fds[1]);

        // Close the write side now, doesn't matter to us
        close(fds[1]);

        // Check if we need to increase the FD limit.
        ++NumFilesOpened;
        CheckRaiseFDLimit();
      } else {
        // Log thread isn't running. Let FEX know it can't have one.
        SendEmptyErrorPacket(Socket);
      }

      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::Header);
      break;
    }
    case FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH: {
      const fextl::string& MountFolder = SquashFS::GetMountFolder();

      FEXServerClient::FEXServerResultPacket Res {
        .MountPath {
          .Header {
            .Type = FEXServerClient::PacketType::TYPE_GET_ROOTFS_PATH,
          },
          .Length = MountFolder.size() + 1,
        },
      };

      char Null {};

      fasio::mutable_buffer Data[] = {
        {.Data = std::as_writable_bytes(std::span(&Res, 1))},
        {.Data = std::as_writable_bytes(std::span(const_cast<fextl::string&>(MountFolder)))},
        {.Data = std::as_writable_bytes(std::span(&Null, 1))},
      };
      fasio::error ec;
      write(Socket, Chained(Data), ec);

      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::BasicRequest);
      break;
    }
    case FEXServerClient::PacketType::TYPE_GET_PID_FD: {
      int FD = FHU::Syscalls::pidfd_open(::getpid(), 0);

      if (FD < 0) {
        // Couldn't get PIDFD due to too old of kernel.
        // Return a pipe to track the same information.
        //
        int fds[2];
        pipe2(fds, O_CLOEXEC);
        SendFDSuccessPacket(Socket, fds[0]);

        // Close the read side now, doesn't matter to us
        close(fds[0]);

        // Check if we need to increase the FD limit.
        ++NumFilesOpened;
        CheckRaiseFDLimit();

        // Write side will naturally close on process exit, letting the other process know we have exited.
      } else {
        SendFDSuccessPacket(Socket, FD);

        // Close the FD now since we've sent it
        close(FD);
      }

      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::Header);
      break;
    }

    case FEXServerClient::PacketType::TYPE_POPULATE_CODE_CACHE:
    case FEXServerClient::PacketType::TYPE_POPULATE_CODE_CACHE_NO_MULTIBLOCK: {
      char Tmp[PATH_MAX];
      int TmpLen = FEX::get_fdpath(inFD, Tmp);
      assert(TmpLen != -1);

      std::filesystem::path Path {std::string_view(Tmp, TmpLen)};
      auto filename_hash = XXH3_64bits(Tmp, TmpLen);
      const bool HasMultiblock = (Req->Header.Type == FEXServerClient::PacketType::TYPE_POPULATE_CODE_CACHE);

      FEXCore::ExecutableFileInfo MainFileId = {nullptr, filename_hash, fextl::string(Tmp, TmpLen)};
      fmt::print("Requested {}cache generation for {}\n", HasMultiblock ? "" : "nomb-", MainFileId.Filename);

      auto GetCacheFilename = [](const FEXCore::ExecutableFileInfo& FileId) {
        return fmt::format("{}cache/{}-{:016x}", FEX::Config::GetCacheDirectory(), FEXCore::CodeMap::GetBaseFilename(FileId, false),
                           0 /* TODO: Use unique cache id */);
      };

      // Update code maps; any update necessitates an update of the corresponding cache
      auto Binaries = AggregateCodeMaps(MainFileId, HasMultiblock);

      // Check for other conditions that require a cache refresh even when the code map didn't change
      for (auto& [FileInfo, NeedsRefresh] : Binaries) {
        if (NeedsRefresh == NeedsCacheRefresh::Yes) {
          // Already queued for cache generation, no need for further checks
          continue;
        }

        // Trigger cache generation for this file if no cache exists or if the cache is older than the most recent update to its code map
        std::error_code ec;
        const auto BinaryName = FEXCore::CodeMap::GetBaseFilename(FileInfo, !HasMultiblock);
        const auto MergedCodeMapFilename = fmt::format("{}/{}", ReadyCodeMapDirectory, BinaryName);
        const auto LastCodeMapUpdate = std::filesystem::last_write_time(MergedCodeMapFilename, ec);
        if (std::filesystem::last_write_time(GetCacheFilename(FileInfo), ec) < LastCodeMapUpdate || ec) {
          fmt::println("  Scheduling update for {} cache for {}", ec ? "missing" : "outdated", BinaryName);
          NeedsRefresh = NeedsCacheRefresh::Yes;
        }
      }

      // Trigger offline-compile for each binary that needs it
      for (const auto& [File, NeedsRefresh] : Binaries) {
        if (NeedsRefresh != NeedsCacheRefresh::Yes) {
          continue;
        }

        const auto BinaryName = (std::string)FEXCore::CodeMap::GetBaseFilename(File, !HasMultiblock);
        fmt::println("Generating cache for {}", BinaryName);
        int Status = RunOfflineCompiler(fmt::format("{}/{}", ReadyCodeMapDirectory, BinaryName).c_str());
        if (Status != 0) {
          fmt::println("ERROR: Cache generation failed with status {}", Status);
        }
      }

      FEXServerClient::FEXServerResultPacket Res {
        .Header {
          .Type = FEXServerClient::PacketType::TYPE_SUCCESS,
        },
      };

      fasio::mutable_buffer Data = {.Data = std::as_writable_bytes(std::span(&Res, 1))};
      fasio::error ec;
      write(Socket, Data, ec);
      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::Header);
      close(inFD);
      inFD = -1;
      break;
    }

    case FEXServerClient::PacketType::TYPE_QUERY_CODE_MAP:
    case FEXServerClient::PacketType::TYPE_QUERY_CODE_MAP_NO_MULTIBLOCK: {
      char Tmp[PATH_MAX];
      int TmpLen = FEX::get_fdpath(inFD, Tmp);
      assert(TmpLen != -1);
      std::filesystem::path BinaryPath = std::string_view(Tmp, TmpLen);
      // TODO: Move to common code
      const auto filename_hash = XXH3_64bits(Tmp, TmpLen);
      const bool HasMultiblock = (Req->Header.Type == FEXServerClient::PacketType::TYPE_QUERY_CODE_MAP);

      FEXServerClient::FEXServerResultPacket Res {
        .Header {
          .Type = FEXServerClient::PacketType::TYPE_SUCCESS,
        },
      };

      // Find first code map that doesn't exist yet
      int Index = 0;
      std::string Filename;
      do {
        Filename = fmt::format("{}/{}.{}.bin", NewCodeMapDirectory,
                               FEXCore::CodeMap::GetBaseFilename(
                                 FEXCore::ExecutableFileInfo {nullptr, filename_hash, (fextl::string)BinaryPath.string()}, !HasMultiblock),
                               Index++);
      } while (std::filesystem::exists(Filename));

      std::filesystem::create_directories(NewCodeMapDirectory);
      std::filesystem::create_directories(ReadyCodeMapDirectory);
      auto CodeMapFD = open(Filename.c_str(), O_CREAT | O_CLOEXEC | O_WRONLY, 0644);

      fasio::mutable_buffer Data = {.Data = std::as_writable_bytes(std::span(&Res, 1)),
                                    .FD = (CodeMapFD != -1 ? std::optional {&CodeMapFD} : std::nullopt)};
      fasio::error ec;
      write(Socket, Data, ec);
      buffer += sizeof(FEXServerClient::FEXServerRequestPacket::Header);
      close(inFD);
      inFD = -1;
      close(CodeMapFD);
      break;
    }

    // Invalid
    case FEXServerClient::PacketType::TYPE_ERROR:
    default:
      // Something sent us an invalid packet. Drop this client and continue
      LogMan::Msg::EFmt("Invalid FEXServer packet received: {:02x}", fmt::join(buffer.Data, ""));
      close(Socket.FD);
      return;
    }
  }

  if (inFD != -1) {
    LogMan::Msg::EFmt("Received unused FD argument");
    close(inFD);
  }
}

void CloseConnections() {
  // Close the server pipe so new processes will know to spin up a new FEXServer.
  // This one is closing
  close(ServerLockFD);

  // Close the server socket so no more connections can be started
  ServerAcceptor.reset();
  ServerFSAcceptor.reset();
}

void WaitForRequests() {
  if (WatchFD != -1) {
    // Add a fake client.
    ++NumClients;
    Reactor.bind_handler(
      pollfd {
        .fd = WatchFD,
        .events = POLLPRI | POLLRDHUP,
        .revents = 0,
      },
      [InternalWatchFD = WatchFD](fasio::error ec) mutable {
        if (ec != fasio::error::success) {
          close(InternalWatchFD);
          --NumClients;
          return fasio::post_callback::drop;
        }
        // Wait for next data
        return fasio::post_callback::repeat;
      });
  }

  Reactor.enable_async_stop();

  while (true) {
    std::optional Timeout = std::chrono::seconds {RequestTimeout};
    if (Foreground || NumClients > 0) {
      Timeout.reset();
    }
    auto Result = Reactor.run_one(Timeout);
    if (Result != fasio::error::success || Reactor.stopped()) {
      Reactor.cleanup();
      break;
    }
  }

  LogMan::Msg::DFmt("[FEXServer] Shutting Down");

  CloseConnections();
}

void SetConfiguration(bool Foreground, uint32_t PersistentTimeout) {
  ProcessPipe::Foreground = Foreground;
  ProcessPipe::RequestTimeout = PersistentTimeout;

  NewCodeMapDirectory = FEX::Config::GetCacheDirectory() + "codemap/new";
  ReadyCodeMapDirectory = FEX::Config::GetCacheDirectory() + "codemap/ready";
}

void Shutdown() {
  Reactor.stop_async();
}
} // namespace ProcessPipe


================================================
FILE: Source/Tools/FEXServer/ProcessPipe.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>

namespace ProcessPipe {
bool InitializeServerPipe();
bool InitializeServerSocket(bool abstract);
void WaitForRequests();
void SetConfiguration(bool Foreground, uint32_t PersistentTimeout);
void Shutdown();
void SetWatchFD(int FD);
} // namespace ProcessPipe


================================================
FILE: Source/Tools/FEXServer/SquashFS.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Common/FEXServerClient.h"
#include "Common/FileFormatCheck.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/string.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <fcntl.h>
#include <filesystem>
#include <sys/poll.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <thread>

namespace SquashFS {

constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO;
int ServerRootFSLockFD {-1};
int FuseMountPID {};
fextl::string MountFolder {};

void ShutdownImagePID() {
  if (FuseMountPID) {
    FHU::Syscalls::tgkill(FuseMountPID, FuseMountPID, SIGINT);
  }
}

bool InitializeSquashFSPipe() {
  auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile();

  int Ret = open(RootFSLockFile.c_str(), O_CREAT | O_RDWR | O_TRUNC | O_EXCL | O_CLOEXEC, USER_PERMS);
  ServerRootFSLockFD = Ret;
  if (Ret == -1 && errno == EEXIST) {
    // If the fifo exists then it might be a stale connection.
    // Check the lock status to see if another process is still alive.
    ServerRootFSLockFD = open(RootFSLockFile.c_str(), O_RDWR | O_CLOEXEC, USER_PERMS);
    if (ServerRootFSLockFD != -1) {
      // Now that we have opened the file, try to get a write lock.
      flock lk {
        .l_type = F_WRLCK,
        .l_whence = SEEK_SET,
        .l_start = 0,
        .l_len = 0,
      };
      Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk);

      if (Ret != -1) {
        // Write lock was gained, we can now continue onward.
      } else {
        // We couldn't get a write lock, this means that another process already owns a lock on the fifo
        close(ServerRootFSLockFD);
        ServerRootFSLockFD = -1;
        return false;
      }
    } else {
      // File couldn't get opened even though it existed?
      // Must have raced something here.
      return false;
    }
  } else if (Ret == -1) {
    // Unhandled error.
    LogMan::Msg::EFmt("[FEXServer] Unable to create FEXServer RootFS lock file at: {} {} {}", RootFSLockFile, errno, strerror(errno));
    return false;
  } else {
    // FIFO file was created. Try to get a write lock
    flock lk {
      .l_type = F_WRLCK,
      .l_whence = SEEK_SET,
      .l_start = 0,
      .l_len = 0,
    };
    Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk);

    if (Ret == -1) {
      // Couldn't get a write lock, something else must have got it
      close(ServerRootFSLockFD);
      ServerRootFSLockFD = -1;
      return false;
    }
  }

  return true;
}

bool DowngradeRootFSPipeToReadLock() {
  flock lk {
    .l_type = F_RDLCK,
    .l_whence = SEEK_SET,
    .l_start = 0,
    .l_len = 0,
  };
  int Ret = fcntl(ServerRootFSLockFD, F_SETLK, &lk);

  if (Ret == -1) {
    // This shouldn't occur
    LogMan::Msg::EFmt("[FEXServer] Unable to downgrade a rootfs write lock to a read lock {} {}", errno, strerror(errno));
    close(ServerRootFSLockFD);
    ServerRootFSLockFD = -1;
    return false;
  }

  return true;
}

bool MountRootFSImagePath(const fextl::string& SquashFS, bool EroFS) {
  pid_t ParentTID = ::getpid();
  MountFolder = fmt::format("{}/.FEXMount{}-XXXXXX", FEXServerClient::GetServerMountFolder(), ParentTID);
  char* MountFolderStr = MountFolder.data();

  // Make the temporary mount folder
  if (mkdtemp(MountFolderStr) == nullptr) {
    LogMan::Msg::EFmt("[FEXServer] Couldn't create temporary mount name: {}", MountFolder);
    return false;
  }

  // Change the permissions
  if (chmod(MountFolderStr, 0777) != 0) {
    LogMan::Msg::EFmt("[FEXServer] Couldn't change permissions on temporary mount: {}", MountFolder);
    rmdir(MountFolderStr);
    return false;
  }

  // Create local FDs so our internal forks can communicate
  int fds[2];
  pipe2(fds, 0);

  int pid = fork();
  if (pid == 0) {
    // Child
    close(fds[0]); // Close read side
    const char* argv[4];
    argv[0] = EroFS ? "erofsfuse" : "squashfuse";
    argv[1] = SquashFS.c_str();
    argv[2] = MountFolder.c_str();
    argv[3] = nullptr;

    // Try and execute {erofsfuse, squashfuse} to mount our rootfs
    if (execvpe(argv[0], (char* const*)argv, environ) == -1) {
      // Give a hopefully helpful error message for users
      LogMan::Msg::EFmt("[FEXServer] '{}' Couldn't execute for some reason: {} {}\n", argv[0], errno, strerror(errno));
      LogMan::Msg::EFmt("[FEXServer] To mount squashfs rootfs files you need {} installed\n", argv[0]);
      LogMan::Msg::EFmt("[FEXServer] Check your FUSE setup.\n");

      // Let the parent know that we couldn't execute for some reason
      uint64_t error {1};
      write(fds[1], &error, sizeof(error));

      // End the child
      exit(1);
    }
  } else {
    FuseMountPID = pid;
    // Parent
    // Wait for the child to exit
    // This will happen with execvpe of squashmount or exit on failure
    while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR)
      ;

    // Check the child pipe for messages
    pollfd PollFD;
    PollFD.fd = fds[0];
    PollFD.events = POLLIN;

    int Result = poll(&PollFD, 1, 0);

    if (Result == 1 && PollFD.revents & POLLIN) {
      // Child couldn't execvpe for whatever reason
      // Remove the mount path and leave Just in case it was created
      rmdir(MountFolderStr);

      // Close the pipe now
      close(fds[0]);

      LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs\n");
      return false;
    }

    // Close the pipe now
    close(fds[0]);
  }

  // Write to the lock file where we are mounted
  write(ServerRootFSLockFD, MountFolder.c_str(), MountFolder.size());
  fdatasync(ServerRootFSLockFD);

  return true;
}

void UnmountRootFS() {
  FEX_CONFIG_OPT(LDPath, ROOTFS);
  if (!FEX::FormatCheck::IsSquashFS(LDPath()) && !FEX::FormatCheck::IsEroFS(LDPath())) {
    return;
  }

  SquashFS::ShutdownImagePID();

  // Handle final mount removal
  // fusermount for unmounting the mountpoint, then the {erfsfuse, squashfuse} will exit automatically
  int pid = fork();

  if (pid == 0) {
    const char* argv[5];
    argv[0] = "fusermount3";
    argv[1] = "-u";
    argv[2] = "-q";
    argv[3] = MountFolder.c_str();
    argv[4] = nullptr;

    if (execvp(argv[0], (char* const*)argv) == -1) {
      // Try again with `fusermount`
      argv[0] = "fusermount";
      if (execvp(argv[0], (char* const*)argv) == -1) {
        fprintf(stderr, "fusermount{3,} failed to execute. You may have an mount living at '%s' to clean up now\n", MountFolder.c_str());
        fprintf(stderr, "Try `%s %s %s %s`\n", argv[0], argv[1], argv[2], argv[3]);
        exit(1);
      }
    }
  } else {
    // Wait for fusermount to leave
    while (waitpid(pid, nullptr, 0) == -1 && errno == EINTR)
      ;

    // Remove the mount path
    rmdir(MountFolder.c_str());

    // Remove the rootfs lock file
    auto RootFSLockFile = FEXServerClient::GetServerRootFSLockFile();
    unlink(RootFSLockFile.c_str());
  }
}

bool InitializeSquashFS() {
  FEX_CONFIG_OPT(LDPath, ROOTFS);

  MountFolder = LDPath();

  bool IsSquashFS {false};
  bool IsEroFS {false};

  // Check if the image is an EroFS
  IsEroFS = FEX::FormatCheck::IsEroFS(MountFolder);

  if (!IsEroFS) {
    // Check if the image is an SquashFS
    IsSquashFS = FEX::FormatCheck::IsSquashFS(MountFolder);
  }

  if (!IsSquashFS && !IsEroFS) {
    // If this isn't a rootfs image then we have nothing to do here
    return true;
  }

  if (!InitializeSquashFSPipe()) {
    LogMan::Msg::EFmt("[FEXServer] Couldn't initialize SquashFSPipe");
    return false;
  }

  // Setup rootfs here
  if (!MountRootFSImagePath(LDPath(), IsEroFS)) {
    LogMan::Msg::EFmt("[FEXServer] Couldn't mount squashfs path");
    return false;
  }

  if (!DowngradeRootFSPipeToReadLock()) {
    LogMan::Msg::EFmt("[FEXServer] Couldn't downgrade read lock");
    return false;
  }

  return true;
}

const fextl::string& GetMountFolder() {
  return MountFolder;
}
} // namespace SquashFS


================================================
FILE: Source/Tools/FEXServer/SquashFS.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/string.h>

namespace SquashFS {
bool InitializeSquashFS();
void UnmountRootFS();
const fextl::string& GetMountFolder();
} // namespace SquashFS


================================================
FILE: Source/Tools/LinuxEmulation/ArchHelpers/MContext.cpp
================================================
// SPDX-License-Identifier: MIT
#include "ArchHelpers/MContext.h"

namespace FEX::ArchHelpers::Context {
#ifdef ARCHITECTURE_arm64
std::string_view GetESRName(uint64_t ESR) {
  switch ((ESR & ESR1_EC) >> 26) {
  case 0b000'000: return "Unknown";
  case 0b000'001: return "Trapped WF*";
  case 0b000'011: return "Trapped MCR/MRC";
  case 0b000'100: return "Trapped MCRR/MRRC";
  case 0b000'101: return "Trapped MCR/MRC (coproc==0b1110)";
  case 0b000'110: return "Trapped LDC/STC";
  case 0b000'111: return "Trapped SME;SVE,ASIMD,FP";
  case 0b001'010: return "Trapped non-covered instruction";
  case 0b001'100: return "Trapped MRRC (coproc==0b1110)";
  case 0b001'101: return "Branch target exception";
  case 0b001'110: return "Illegal Execution State";
  case 0b010'001: return "AArch32 SVC";
  case 0b010'100: return "Trapped MSRR/MRRS/System instruction";
  case 0b010'101: return "AArch64 SVC";
  case 0b011'000: return "Trapped MSR/MRS/System instruction";
  case 0b011'001: return "Trapped SVE from ZEN";
  case 0b011'011: return "TSTART Exception";
  case 0b011'100: return "PAC Exception";
  case 0b011'101: return "Trapped SME from SMEN";
  case 0b100'000: return "Instruction abort";
  case 0b100'001: return "Instruction abort w/o change to exception level";
  case 0b100'010: return "PC Alignment fault";
  case 0b100'100: return "Data abort";
  case 0b100'101: return "Data abort w/o change to exception level";
  case 0b100'110: return "SP Alignment fault";
  case 0b100'111: return "Memory operation exception";
  case 0b101'000: return "AArch32 Trapped FP Exception";
  case 0b101'100: return "AArch64 Trapped FP Exception";
  case 0b101'101: return "GCS exception";
  case 0b101'111: return "SError exception";
  case 0b110'000: return "BP Exception";
  case 0b110'001: return "BP Exception w/o change to exception level";
  case 0b110'010: return "Software step Exception";
  case 0b110'011: return "Software step Exception w/o change to exception level";
  case 0b110'100: return "Watchpoint Exception";
  case 0b110'101: return "Watchpoit Exception w/o change to exception level";
  case 0b111'000: return "AArch32 BKPT";
  case 0b111'100: return "AArch64 BRK";
  case 0b111'101: return "Profiling Exception";
  default: return "Reserved";
  }
}
#endif
} // namespace FEX::ArchHelpers::Context


================================================
FILE: Source/Tools/LinuxEmulation/ArchHelpers/MContext.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "UContext.h"

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>

#include <signal.h>
#include <string.h>
#ifndef _WIN32
#include <ucontext.h>
#endif
#include <stdint.h>
#include <type_traits>

namespace FEX::ArchHelpers::Context {
#ifndef _WIN32

enum ContextFlags : uint32_t {
  CONTEXT_FLAG_INJIT = (1U << 0),
};

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
constexpr uint64_t STACK_COOKIE_MAGIC = 0x4142434445464748ULL;
#endif

struct X86ContextBackup {
  // Host State
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  // During debug builds, insert a cookie on the stack.
  // This is useful for validation that the stack is trying to be restored from the correct location.
  // During stack restore, we ensure this is set to the value we expect.
  // If given an incorrect stack location, or corrupted stack then this cookie will be wrong.
  uint64_t StackCookie;
#endif
  // RIP and RSP is stored in GPRs here
  uint64_t GPRs[23];
  FEXCore::x86_64::_libc_fpstate FPRState;
  uint64_t sa_mask;
  uint16_t InSyscallInfo;
  bool FaultToTopAndGeneratedException;

  // Guest state
  int Signal;
  uint32_t Flags;
  uint64_t OriginalRIP;
  uint64_t FPStateLocation;
  uint64_t UContextLocation;
  uint64_t SigInfoLocation;
  FEXCore::Core::CPUState GuestState;
  static constexpr int RedZoneSize = 128;
};

struct ArmContextBackup {
  // Host State
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  uint64_t StackCookie;
#endif
  uint64_t GPRs[31];
  uint64_t PrevSP;
  uint64_t PrevPC;
  uint64_t PState;
  uint32_t FPSR;
  uint32_t FPCR;
  __uint128_t FPRs[32];
  uint64_t sa_mask;
  uint16_t InSyscallInfo;
  bool FaultToTopAndGeneratedException;

  // Guest state
  int Signal;
  uint32_t Flags;
  uint64_t OriginalRIP;
  uint64_t FPStateLocation;
  uint64_t UContextLocation;
  uint64_t SigInfoLocation;
  FEXCore::Core::CPUState GuestState;

  // Arm64 doesn't have a red zone
  static constexpr int RedZoneSize = 0;
};

static inline ucontext_t* GetUContext(void* ucontext) {
  ucontext_t* _context = (ucontext_t*)ucontext;
  return _context;
}

static inline mcontext_t* GetMContext(void* ucontext) {
  ucontext_t* _context = (ucontext_t*)ucontext;
  return &_context->uc_mcontext;
}


#ifdef ARCHITECTURE_arm64

constexpr uint32_t FPR_MAGIC = 0x46508001U;
constexpr uint32_t ESR1_MAGIC = 0x45535201U;

struct HostCTXHeader {
  uint32_t Magic;
  uint32_t Size;
};

struct HostFPRState {
  HostCTXHeader Head;
  uint32_t FPSR;
  uint32_t FPCR;
  __uint128_t FPRs[32];
};

struct HostESRState {
  HostCTXHeader Head;
  uint64_t ESR;
};

static inline uint64_t GetSp(void* ucontext) {
  return GetMContext(ucontext)->sp;
}

static inline uint64_t GetPc(void* ucontext) {
  return GetMContext(ucontext)->pc;
}

static inline uint64_t* GetArmPc(void* ucontext) {
  return reinterpret_cast<uint64_t*>(&GetMContext(ucontext)->pc);
}

static inline void SetSp(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->sp = val;
}

static inline void SetPc(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->pc = val;
}

static inline uint64_t GetState(void* ucontext) {
  return GetMContext(ucontext)->regs[28];
}

static inline void SetState(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->regs[28] = val;
}

static inline void SetFillSRASingleInst(void* ucontext, bool SingleInst) {
  GetMContext(ucontext)->regs[1] = SingleInst;
}

static inline uint64_t GetArmReg(void* ucontext, uint32_t id) {
  return GetMContext(ucontext)->regs[id];
}

static inline uint64_t GetArmPState(void* ucontext) {
  return GetMContext(ucontext)->pstate;
}

static inline uint64_t* GetArmGPRs(void* ucontext) {
  return reinterpret_cast<uint64_t*>(GetMContext(ucontext)->regs);
}

static inline void SetArmReg(void* ucontext, uint32_t id, uint64_t val) {
  GetMContext(ucontext)->regs[id] = val;
}

static inline __uint128_t GetArmFPR(void* ucontext, uint32_t id) {
  auto MContext = GetMContext(ucontext);
  HostFPRState* HostState = reinterpret_cast<HostFPRState*>(&MContext->__reserved[0]);
  LOGMAN_THROW_A_FMT(HostState->Head.Magic == FPR_MAGIC, "Wrong FPR Magic: 0x{:08x}", HostState->Head.Magic);

  return HostState->FPRs[id];
}

static inline __uint128_t* GetArmFPRs(void* ucontext) {
  auto MContext = GetMContext(ucontext);
  HostFPRState* HostState = reinterpret_cast<HostFPRState*>(&MContext->__reserved[0]);
  LOGMAN_THROW_A_FMT(HostState->Head.Magic == FPR_MAGIC, "Wrong FPR Magic: 0x{:08x}", HostState->Head.Magic);

  return &HostState->FPRs[0];
}

static inline uint64_t GetArmESR(void* ucontext) {
  auto MContext = GetMContext(ucontext);

  size_t i = 0;
  auto HostState = reinterpret_cast<HostCTXHeader*>(&MContext->__reserved[i]);
  do {
    if (HostState->Magic == ESR1_MAGIC) {
      auto ESR = reinterpret_cast<HostESRState*>(HostState);
      return ESR->ESR;
    }
    i += HostState->Size;
    HostState = reinterpret_cast<HostCTXHeader*>(&MContext->__reserved[i]);
  } while (HostState->Size != 0);

  return 0;
}

constexpr static uint64_t ESR1_EC = 0b111111U << 26;
constexpr static uint64_t ESR1_EC_DataAbort = 0b100100U << 26;

// Write-Not-Read flag
// When set - Abort is due to a write
constexpr static uint64_t ESR1_WNR = 1 << 6;

// DFSC - Default Status Code
// Translation fault - No page mapped
// Permissions fault - Page mapped but with incorrect permission from access.
constexpr static uint64_t ESR1_DataAbort_DFSC = 0b111111;
constexpr static uint64_t ESR1_DataAbort_TranslationFault_EL0 = 0b000111;
constexpr static uint64_t ESR1_DataAbort_PermissionFault_EL0 = 0b001111;
constexpr static uint64_t ESR1_DataAbort_Level = 0b11;
constexpr static uint64_t ESR1_DataAbort_Level_EL3 = 0b00;
constexpr static uint64_t ESR1_DataAbort_Level_EL2 = 0b01;
constexpr static uint64_t ESR1_DataAbort_Level_EL1 = 0b10;
constexpr static uint64_t ESR1_DataAbort_Level_EL0 = 0b11;

std::string_view GetESRName(uint64_t ESR);

static inline uint32_t GetProtectFlags(void* ucontext) {
  uint64_t ESR = GetArmESR(ucontext);
  LOGMAN_THROW_A_FMT((ESR & ESR1_EC) == ESR1_EC_DataAbort, "Unknown ESR1 EC type: 0x{:x} != 0x{:x}. Received '{}'", ESR & ESR1_EC,
                     ESR1_EC_DataAbort, GetESRName(ESR));

  uint32_t ProtectFlags {};
  if ((ESR & ESR1_DataAbort_Level) == ESR1_DataAbort_Level_EL0) {
    // Always a user error for us.
    ProtectFlags |= FEXCore::X86State::X86_PF_USER;
  }

  if (ESR & ESR1_WNR) {
    // Fault was due to a write
    ProtectFlags |= FEXCore::X86State::X86_PF_WRITE;
  }

  // PF_PROT is not returned to user on x86, so don't return the difference between permission fault and translation fault.
  return ProtectFlags;
}

using ContextBackup = ArmContextBackup;
template<typename T>
static inline void BackupContext(void* ucontext, T* Backup) {
  if constexpr (std::is_same<T, ArmContextBackup>::value) {
    auto _ucontext = GetUContext(ucontext);
    auto _mcontext = GetMContext(ucontext);

    memcpy(&Backup->GPRs[0], &_mcontext->regs[0], 31 * sizeof(uint64_t));
    Backup->PrevSP = ArchHelpers::Context::GetSp(ucontext);
    Backup->PrevPC = ArchHelpers::Context::GetPc(ucontext);
    Backup->PState = _mcontext->pstate;

    // Host FPR state starts at _mcontext->reserved[0];
    HostFPRState* HostState = reinterpret_cast<HostFPRState*>(&_mcontext->__reserved[0]);
    LOGMAN_THROW_A_FMT(HostState->Head.Magic == FPR_MAGIC, "Wrong FPR Magic: 0x{:08x}", HostState->Head.Magic);
    Backup->FPSR = HostState->FPSR;
    Backup->FPCR = HostState->FPCR;
    memcpy(&Backup->FPRs[0], &HostState->FPRs[0], 32 * sizeof(__uint128_t));

    // Save the signal mask so we can restore it
    memcpy(&Backup->sa_mask, &_ucontext->uc_sigmask, sizeof(uint64_t));

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    Backup->StackCookie = STACK_COOKIE_MAGIC;
#endif
  } else {
    // This must be a runtime error
    ERROR_AND_DIE_FMT("Wrong context type");
  }
}

template<typename T>
static inline void RestoreContext(void* ucontext, T* Backup) {
  if constexpr (std::is_same<T, ArmContextBackup>::value) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(Backup->StackCookie == STACK_COOKIE_MAGIC, "Stack cookie didn't match! 0x{:x}", Backup->StackCookie);
#endif

    auto _ucontext = GetUContext(ucontext);
    auto _mcontext = GetMContext(ucontext);

    HostFPRState* HostState = reinterpret_cast<HostFPRState*>(&_mcontext->__reserved[0]);
    LOGMAN_THROW_A_FMT(HostState->Head.Magic == FPR_MAGIC, "Wrong FPR Magic: 0x{:08x}", HostState->Head.Magic);
    memcpy(&HostState->FPRs[0], &Backup->FPRs[0], 32 * sizeof(__uint128_t));
    HostState->FPCR = Backup->FPCR;
    HostState->FPSR = Backup->FPSR;

    // Restore GPRs and other state
    _mcontext->pstate = Backup->PState;
    ArchHelpers::Context::SetPc(ucontext, Backup->PrevPC);
    ArchHelpers::Context::SetSp(ucontext, Backup->PrevSP);
    memcpy(&_mcontext->regs[0], &Backup->GPRs[0], 31 * sizeof(uint64_t));

    // Restore the signal mask now
    memcpy(&_ucontext->uc_sigmask, &Backup->sa_mask, sizeof(uint64_t));
  } else {
    // This must be a runtime error
    ERROR_AND_DIE_FMT("Wrong context type");
  }
}

#endif

#ifdef ARCHITECTURE_x86_64

static inline uint64_t GetSp(void* ucontext) {
  return GetMContext(ucontext)->gregs[REG_RSP];
}

static inline uint64_t GetPc(void* ucontext) {
  return GetMContext(ucontext)->gregs[REG_RIP];
}

static inline void SetSp(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->gregs[REG_RSP] = val;
}

static inline void SetPc(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->gregs[REG_RIP] = val;
}

static inline uint64_t GetState(void* ucontext) {
  return GetMContext(ucontext)->gregs[REG_R14];
}

static inline void SetState(void* ucontext, uint64_t val) {
  GetMContext(ucontext)->gregs[REG_R14] = val;
}

static inline void SetFillSRASingleInst(void* ucontext, bool SingleInst) {
  ERROR_AND_DIE_FMT("Not implemented for x86 host");
}

static inline uint64_t GetArmReg(void* ucontext, uint32_t id) {
  ERROR_AND_DIE_FMT("Not impelented for x86 host");
}

static inline void SetArmReg(void* ucontext, uint32_t id, uint64_t val) {
  ERROR_AND_DIE_FMT("Not impelented for x86 host");
}

static inline __uint128_t GetArmFPR(void* ucontext, uint32_t id) {
  ERROR_AND_DIE_FMT("Not implemented for x86 host");
}

static inline uint64_t GetArmPState(void* ucontext) {
  ERROR_AND_DIE_FMT("Not implemented for x86 host");
}

static inline uint64_t* GetArmGPRs(void* ucontext) {
  ERROR_AND_DIE_FMT("Not implemented for x86 host");
}

static inline uint32_t GetProtectFlags(void* ucontext) {
  return GetMContext(ucontext)->gregs[REG_ERR];
}

using ContextBackup = X86ContextBackup;
template<typename T>
static inline void BackupContext(void* ucontext, T* Backup) {
  if constexpr (std::is_same<T, X86ContextBackup>::value) {
    auto _ucontext = GetUContext(ucontext);
    auto _mcontext = GetMContext(ucontext);

    // Copy the GPRs
    memcpy(&Backup->GPRs[0], &_mcontext->gregs[0], sizeof(X86ContextBackup::GPRs));
    // Copy the FPRState
    memcpy(&Backup->FPRState, _mcontext->fpregs, sizeof(X86ContextBackup::FPRState));
    // XXX: Save 256bit and 512bit AVX register state

    // Save the signal mask so we can restore it
    memcpy(&Backup->sa_mask, &_ucontext->uc_sigmask, sizeof(uint64_t));

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    Backup->StackCookie = STACK_COOKIE_MAGIC;
#endif
  } else {
    // This must be a runtime error
    ERROR_AND_DIE_FMT("Wrong context type");
  }
}

template<typename T>
static inline void RestoreContext(void* ucontext, T* Backup) {
  if constexpr (std::is_same<T, X86ContextBackup>::value) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(Backup->StackCookie == STACK_COOKIE_MAGIC, "Stack cookie didn't match! 0x{:x}", Backup->StackCookie);
#endif

    auto _ucontext = GetUContext(ucontext);
    auto _mcontext = GetMContext(ucontext);

    // Copy the GPRs
    memcpy(&_mcontext->gregs[0], &Backup->GPRs[0], sizeof(X86ContextBackup::GPRs));
    // Copy the FPRState
    memcpy(_mcontext->fpregs, &Backup->FPRState, sizeof(X86ContextBackup::FPRState));

    // Restore the signal mask now
    memcpy(&_ucontext->uc_sigmask, &Backup->sa_mask, sizeof(uint64_t));
  } else {
    // This must be a runtime error
    ERROR_AND_DIE_FMT("Wrong context type");
  }
}

#endif
#else

#endif
} // namespace FEX::ArchHelpers::Context


================================================
FILE: Source/Tools/LinuxEmulation/ArchHelpers/UContext.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <signal.h>

namespace FEXCore {
namespace x86_64 {
  // uc_flags flags
  ///< Has extended FP state
  constexpr uint64_t UC_FP_XSTATE = (1ULL << 0);
  ///< Set when kernel saves SS register from 64bit code
  constexpr uint64_t UC_SIGCONTEXT_SS = (1ULL << 1);
  ///< Set when kernel will strictly restore the SS
  constexpr uint64_t UC_STRICT_RESTORE_SS = (1ULL << 2);

  ///< Describes the signal stack
  struct FEX_PACKED stack_t {
    void* ss_sp;
    int32_t ss_flags;
    uint32_t : 32;
    size_t ss_size;
  };
  static_assert(sizeof(FEXCore::x86_64::stack_t) == 24, "This needs to be the right size");

  /**
   * Describes the software specific bytes added at the end of the
   * fpstate to identify whether or not an extended context area is
   * present and what kind of extended features are present in said
   * context area.
   */
  struct FEX_PACKED fpx_sw_bytes {
    static constexpr uint32_t FP_XSTATE_MAGIC_1 = 0x46505853;
    static constexpr uint32_t FP_XSTATE_MAGIC_2 = 0x46505845;

    enum FeatureFlag : uint32_t {
      FEATURE_FP = 1U << 0,
      FEATURE_SSE = 1U << 1,
      FEATURE_YMM = 1U << 2,
      FEATURE_BNDREGS = 1U << 3,
      FEATURE_BNDCSR = 1U << 4,
      FEATURE_OPMASK = 1U << 5,
      FEATURE_ZMM_Hi256 = 1U << 6,
      FEATURE_Hi16_ZMM = 1U << 7,
      FEATURE_PT_UNIMPL = 1U << 8,
      FEATURE_PKRU = 1U << 9,
      FEATURE_PASID = 1U << 10,
      FEATURE_RESERVED11 = 1U << 11,
      FEATURE_RESERVED12 = 1U << 12,
      FEATURE_RESERVED13 = 1U << 13,
      FEATURE_RESERVED14 = 1U << 14,
      FEATURE_LBR = 1U << 15,
      FEATURE_RESERVED16 = 1U << 16,
      FEATURE_XTILE_CFG = 1U << 17,
      FEATURE_XTILE_DATA = 1U << 18,
    };

    bool HasExtendedContext() const {
      return magic1 == FP_XSTATE_MAGIC_1;
    }

    bool HasYMMH() const {
      return (xfeatures & FEATURE_YMM) != 0;
    }

    // If magic1 is set to FP_XSTATE_MAGIC_1, then the encompassing
    // frame is an xstate frame. If 0, then it's a legacy frame.
    uint32_t magic1;

    // Total size of the fpstate area
    // - magic1 = 0                 -> sizeof(fpstate)
    // - magic1 = FP_XSTATE_MAGIC_1 -> sizeof(xstate) + extensions (if any)
    uint32_t extended_size;

    // Feature bitmask describing supported features.
    uint64_t xfeatures;

    // Actual XSAVE state size, based on above xfeatures
    uint32_t xstate_size;

    // Reserved data
    uint32_t padding[7];
  };
  static_assert(sizeof(fpx_sw_bytes) == 48);

  struct FEX_PACKED _libc_fpstate {
    // This is in FXSAVE format
    uint16_t fcw;
    uint16_t fsw;
    uint16_t ftw;
    uint16_t fop;
    uint64_t fip;
    uint64_t fdp;
    uint32_t mxcsr;
    uint32_t mxcsr_mask;
    __uint128_t _st[8];
    __uint128_t _xmm[16];
    uint32_t _res[12];

    // Linux uses 12 of the bytes relegated for software purposes
    // to store info describing any existing XSAVE context data.
    fpx_sw_bytes sw_reserved;
  };
  static_assert(sizeof(FEXCore::x86_64::_libc_fpstate) == 512, "This needs to be the right size");

  struct FEX_PACKED xstate_header {
    uint64_t xfeatures;
    uint64_t reserved1[2];
    uint64_t reserved2[5];
  };
  static_assert(sizeof(xstate_header) == 64);

  struct FEX_PACKED ymmh_state {
    __uint128_t ymmh_space[16];
  };
  static_assert(sizeof(ymmh_state) == 256);

  struct FEX_PACKED magic2 {
    uint32_t pad;
    uint32_t magic;
  };
  static_assert(sizeof(magic2) == sizeof(uint64_t));

  /**
   * Extended state that includes both the main fpstate
   * and the extended state.
   */
  struct FEX_PACKED xstate {
    _libc_fpstate fpstate;
    xstate_header xstate_hdr;
    ymmh_state ymmh;
    magic2 magic2 {};
  };
  static_assert(sizeof(xstate) == 840);

  ///< The order of these must match the GNU ordering
  enum ContextRegs {
    FEX_REG_R8 = 0,
    FEX_REG_R9,
    FEX_REG_R10,
    FEX_REG_R11,
    FEX_REG_R12,
    FEX_REG_R13,
    FEX_REG_R14,
    FEX_REG_R15,
    FEX_REG_RDI,
    FEX_REG_RSI,
    FEX_REG_RBP,
    FEX_REG_RBX,
    FEX_REG_RDX,
    FEX_REG_RAX,
    FEX_REG_RCX,
    FEX_REG_RSP,
    FEX_REG_RIP,
    FEX_REG_EFL,
    FEX_REG_CSGSFS,
    FEX_REG_ERR,
    FEX_REG_TRAPNO,
    FEX_REG_OLDMASK,
    FEX_REG_CR2,
  };
  static_assert(FEX_REG_CR2 == 22, "Oops");

  struct FEX_PACKED mcontext_t {
    uint64_t gregs[23];
    FEXCore::x86_64::_libc_fpstate* fpregs;
    uint64_t __reserved[8];
  };
  static_assert(sizeof(FEXCore::x86_64::mcontext_t) == 256, "This needs to be the right size");

  struct FEX_PACKED sigset_t {
    uint64_t val[16];
  };
  static_assert(sizeof(FEXCore::x86_64::sigset_t) == 128, "This needs to be the right size");

  struct FEX_PACKED ucontext_t {
    uint64_t uc_flags;
    FEXCore::x86_64::ucontext_t* uc_link;
    FEXCore::x86_64::stack_t uc_stack;
    FEXCore::x86_64::mcontext_t uc_mcontext;
    FEXCore::x86_64::sigset_t uc_sigmask;
  };
  static_assert(offsetof(FEXCore::x86_64::ucontext_t, uc_mcontext) == 40, "Needs to be correct");

  static_assert(sizeof(FEXCore::x86_64::ucontext_t) == 424, "This needs to be the right size");
} // namespace x86_64

namespace x86 {
  // uc_flags flags
  ///< Has extended FP state
  constexpr uint64_t UC_FP_XSTATE = (1ULL << 0);

  ///< The order of these must match the GNU ordering
  enum ContextRegs {
    FEX_REG_GS = 0,
    FEX_REG_FS,
    FEX_REG_ES,
    FEX_REG_DS,
    FEX_REG_RDI,
    FEX_REG_RSI,
    FEX_REG_RBP,
    FEX_REG_RSP,
    FEX_REG_RBX,
    FEX_REG_RDX,
    FEX_REG_RCX,
    FEX_REG_RAX,
    FEX_REG_TRAPNO,
    FEX_REG_ERR,
    FEX_REG_EIP,
    FEX_REG_CS,
    FEX_REG_EFL,
    FEX_REG_UESP,
    FEX_REG_SS
  };
  static_assert(FEX_REG_SS == 18, "Oops");

  union sigval_t {
    int sival_int;
    uint32_t sival_ptr; // XXX: Should be compat_ptr<void>
  };

  struct FEX_PACKED siginfo_t {
    int si_signo;
    int si_errno;
    int si_code;
    union {
      uint32_t pad[29];
      /* tgkill siginfo_t */
      struct {
        int32_t pid;
        int32_t uid;
      } _kill;
      /* SIGPOLL */
      struct {
        int32_t band;
        int32_t fd;
      } _poll;
      /* SIGILL, SIGFPE, SIGSEGV, SIBUS */
      struct {
        uint32_t addr;
      } _sigfault;
      /* SIGCHLD */
      struct {
        int32_t pid;
        int32_t uid;
        int32_t status;
        int32_t utime;
        int32_t stime;
      } _sigchld;
      /* RT signals */
      struct {
        int32_t pid;
        int32_t uid;
        union {
          int32_t sival_int;
          uint32_t sival_ptr; // compat_ptr
        } sigval;
      } _rt;
      /* SIGALRM, SIGVTALRM */
      struct {
        int tid;
        int overrun;
        FEXCore::x86::sigval_t sigval;
      } _timer;
      /* SIGSYS */
      struct {
        uint32_t call_addr; // compat_ptr
        int32_t syscall;
        uint32_t arch;
      } _sigsys;
    } _sifields;

    union HostSigInfo_t {
      // This anonymous struct needs to match the host definition
      struct {
        uint32_t si_signo;
        uint32_t si_errno;
        uint32_t si_code;

        uint32_t __pad0;

        // Pad[28] is a union for all the sifields
        uint32_t _pad[28];
      } FEXDef;
      ::siginfo_t host {};
    };
    static_assert(sizeof(HostSigInfo_t) == 128, "This needs to be the right size");

    siginfo_t() = delete;

    operator ::siginfo_t() const {
      // The definition of siginfo_t changes depending on the host environment
      // It is guaranteed to be 128 bytes and the kernel interface is the same for all of them
      // Since we only run on Linux
      HostSigInfo_t val {};

      val.FEXDef.si_signo = si_signo;
      val.FEXDef.si_errno = si_errno;
      val.FEXDef.si_code = si_code;

      // Host siginfo has a pad member that is set to zeros
      val.FEXDef.__pad0 = 0;

      // Copy over the union
      // The union is different sizes on 64-bit versus 32-bit
      memcpy(val.FEXDef._pad, _sifields.pad, std::min(sizeof(val.FEXDef._pad), sizeof(_sifields.pad)));

      return val.host;
    }

    siginfo_t(::siginfo_t val) {
      HostSigInfo_t host;
      host.host = val;

      si_signo = host.FEXDef.si_signo;
      si_errno = host.FEXDef.si_errno;
      si_code = host.FEXDef.si_code;

      // Copy over the union
      // The union is different sizes on 64-bit versus 32-bit
      memcpy(_sifields.pad, host.FEXDef._pad, std::min(sizeof(host.FEXDef._pad), sizeof(_sifields.pad)));
    }
    static_assert(offsetof(::siginfo_t, si_signo) == offsetof(HostSigInfo_t, FEXDef.si_signo), "si_signo in wrong location?");
    static_assert(offsetof(::siginfo_t, si_errno) == offsetof(HostSigInfo_t, FEXDef.si_errno), "si_errno in wrong location?");
    static_assert(offsetof(::siginfo_t, si_code) == offsetof(HostSigInfo_t, FEXDef.si_code), "si_code in wrong location?");
  };
  static_assert(sizeof(FEXCore::x86::siginfo_t) == 128, "This needs to be the right size");

  struct FEX_PACKED stack_t {
    uint32_t ss_sp; // XXX: should be compat_ptr<void>
    int ss_flags;
    uint32_t ss_size;
  };

  static_assert(sizeof(FEXCore::x86::stack_t) == 12, "This needs to be the right size");

  struct FEX_PACKED mcontext_t {
    uint32_t gregs[19];
    uint32_t fpregs; // XXX: should be compat_ptr<FEXCore::x86::_libc_fpstate>
    uint32_t oldmask;
    uint32_t cr2;
  };
  static_assert(sizeof(FEXCore::x86::mcontext_t) == 88, "This needs to be the right size");

  struct _libc_fpreg {
    uint16_t significand[4];
    uint16_t exponent;
  };
  static_assert(sizeof(FEXCore::x86::_libc_fpreg) == 10, "This needs to be the right size");

  // Same layout on both x86 and x86_64
  using fpx_sw_bytes = x86_64::fpx_sw_bytes;
  using xstate_header = x86_64::xstate_header;
  using ymmh_state = x86_64::ymmh_state;

  enum fpstate_magic {
    // Legacy fpstate
    MAGIC_FPU = 0xFFFF'0000,
    // Contains extended state information
    MAGIC_XFPSTATE = 0x0,
  };
  struct FEX_PACKED _libc_fpstate {
    uint32_t fcw;
    uint32_t fsw;
    uint32_t ftw;
    uint32_t fop;
    uint32_t cssel;
    uint32_t dataoff;
    uint32_t datasel;
    FEXCore::x86::_libc_fpreg _st[8];
    uint32_t status;

    // Extended FPU data
    uint32_t pad[6]; // Ignored FXSR data
    uint32_t mxcsr;
    uint32_t reserved;
    __uint128_t _st_pad[8];   // Ignored st data
    __uint128_t _xmm[8];      // First 8 XMM registers
    uint32_t pad2[44];        // Second 8 XMM registers plus padding
    fpx_sw_bytes sw_reserved; // extended state encoding
  };
  static_assert(sizeof(FEXCore::x86::_libc_fpstate) == 624, "This needs to be the right size");

  /**
   * Extended state that includes both the main fpstate
   * and the extended state.
   */
  struct FEX_PACKED xstate {
    _libc_fpstate fpstate;
    xstate_header xstate_hdr;
    ymmh_state ymmh;
    x86_64::magic2 magic2 {};
  };
  static_assert(sizeof(xstate) == 952);

  struct FEX_PACKED ucontext_t {
    uint32_t uc_flags;
    uint32_t uc_link; // XXX: should be a compat_ptr<FEXCore::x86::ucontext_t>
    FEXCore::x86::stack_t uc_stack;
    FEXCore::x86::mcontext_t uc_mcontext;
    FEXCore::x86_64::sigset_t uc_sigmask; // This matches across architectures
  };
  static_assert(sizeof(FEXCore::x86::ucontext_t) == 236, "This needs to be the right size");

  ///< Non-rt signal context.
  //
  // Needs to match the format expected from signal handlers without SA_SIGINFO set.
  struct sigcontext {
    uint32_t gs;
    uint32_t fs;
    uint32_t es;
    uint32_t ds;
    uint32_t di;
    uint32_t si;
    uint32_t bp;
    uint32_t sp;
    uint32_t bx;
    uint32_t dx;
    uint32_t cx;
    uint32_t ax;
    uint32_t trapno;
    uint32_t err;
    uint32_t ip;
    uint32_t cs;
    uint32_t flags;
    uint32_t sp_at_signal;
    uint32_t ss;

    uint32_t fpstate;
    uint32_t oldmask;
    uint32_t cr2;
  };
} // namespace x86
} // namespace FEXCore


================================================
FILE: Source/Tools/LinuxEmulation/ArchHelpers/WinContext.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#ifdef _WIN32
#include <FEXCore/Utils/LogManager.h>
#include <winnt.h>

namespace FEX::ArchHelpers::Context {
#ifdef ARCHITECTURE_arm64
static inline uint64_t GetSp(PCONTEXT Context) {
  return Context->Sp;
}

static inline uint64_t GetPc(PCONTEXT Context) {
  return Context->Pc;
}

static inline void SetSp(PCONTEXT Context, uint64_t val) {
  Context->Sp = val;
}

static inline void SetPc(PCONTEXT Context, uint64_t val) {
  Context->Pc = val;
}

static inline uint64_t GetState(PCONTEXT Context) {
  return Context->X28;
}

static inline void SetState(PCONTEXT Context, uint64_t val) {
  Context->X28 = val;
}

static inline uint64_t* GetArmGPRs(PCONTEXT Context) {
  return Context->X;
}
#endif

#ifdef ARCHITECTURE_x86_64
static inline uint64_t GetSp(PCONTEXT Context) {
  return Context->Rsp;
}

static inline uint64_t GetPc(PCONTEXT Context) {
  return Context->Rip;
}

static inline void SetSp(PCONTEXT Context, uint64_t val) {
  Context->Rsp = val;
}

static inline void SetPc(PCONTEXT Context, uint64_t val) {
  Context->Rip = val;
}

static inline uint64_t GetState(PCONTEXT Context) {
  return Context->R14;
}

static inline void SetState(PCONTEXT Context, uint64_t val) {
  Context->R14 = val;
}

static inline uint64_t* GetArmGPRs(PCONTEXT Context) {
  ERROR_AND_DIE_FMT("Not implemented for x86 host");
}

#endif

} // namespace FEX::ArchHelpers::Context

#endif


================================================
FILE: Source/Tools/LinuxEmulation/CMakeLists.txt
================================================
# TODO: why is this add_compile_options instead of target?
add_compile_options(-fno-operator-names)

add_library(LinuxEmulation STATIC
  VDSO_Emulation.cpp
  Thunks.cpp
  ArchHelpers/MContext.cpp
  GdbServer/Info.cpp
  LinuxSyscalls/GdbServer.cpp
  LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp
  LinuxSyscalls/FaultSafeUserMemAccess.cpp
  LinuxSyscalls/FileManagement.cpp
  LinuxSyscalls/LinuxAllocator.cpp
  LinuxSyscalls/Seccomp/SeccompEmulator.cpp
  LinuxSyscalls/Seccomp/BPFEmitter.cpp
  LinuxSyscalls/Seccomp/Dumper.cpp
  LinuxSyscalls/SignalDelegator.cpp
  LinuxSyscalls/Syscalls.cpp
  LinuxSyscalls/SyscallsSMCTracking.cpp
  LinuxSyscalls/SyscallsVMATracking.cpp
  LinuxSyscalls/ThreadManager.cpp
  LinuxSyscalls/SignalDelegator/GuestFramesManagement.cpp
  LinuxSyscalls/Utils/Threads.cpp
  LinuxSyscalls/x32/Syscalls.cpp
  LinuxSyscalls/x32/EPoll.cpp
  LinuxSyscalls/x32/FD.cpp
  LinuxSyscalls/x32/FS.cpp
  LinuxSyscalls/x32/Info.cpp
  LinuxSyscalls/x32/IO.cpp
  LinuxSyscalls/x32/Memory.cpp
  LinuxSyscalls/x32/Msg.cpp
  LinuxSyscalls/x32/NotImplemented.cpp
  LinuxSyscalls/x32/Semaphore.cpp
  LinuxSyscalls/x32/Sched.cpp
  LinuxSyscalls/x32/Signals.cpp
  LinuxSyscalls/x32/Socket.cpp
  LinuxSyscalls/x32/Stubs.cpp
  LinuxSyscalls/x32/Thread.cpp
  LinuxSyscalls/x32/Time.cpp
  LinuxSyscalls/x32/Timer.cpp
  LinuxSyscalls/x32/IoctlEmulation.cpp
  LinuxSyscalls/x64/EPoll.cpp
  LinuxSyscalls/x64/FD.cpp
  LinuxSyscalls/x64/Info.cpp
  LinuxSyscalls/x64/Memory.cpp
  LinuxSyscalls/x64/NotImplemented.cpp
  LinuxSyscalls/x64/Semaphore.cpp
  LinuxSyscalls/x64/Signals.cpp
  LinuxSyscalls/x64/Thread.cpp
  LinuxSyscalls/x64/Syscalls.cpp
  LinuxSyscalls/x64/Time.cpp
  LinuxSyscalls/Syscalls/EPoll.cpp
  LinuxSyscalls/Syscalls/FD.cpp
  LinuxSyscalls/Syscalls/FS.cpp
  LinuxSyscalls/Syscalls/Passthrough.cpp
  LinuxSyscalls/Syscalls/Info.cpp
  LinuxSyscalls/Syscalls/IO.cpp
  LinuxSyscalls/Syscalls/Memory.cpp
  LinuxSyscalls/Syscalls/Signals.cpp
  LinuxSyscalls/Syscalls/Thread.cpp
  LinuxSyscalls/Syscalls/Timer.cpp
  LinuxSyscalls/Syscalls/NotImplemented.cpp
  LinuxSyscalls/Syscalls/Stubs.cpp)

target_compile_options(LinuxEmulation PRIVATE
  -Wall
  -Werror=cast-qual
  -Werror=ignored-qualifiers
  -Werror=implicit-fallthrough

  -Wno-trigraphs
  -fwrapv)

set_target_properties(LinuxEmulation PROPERTIES
  C_VISIBILITY_PRESET hidden
  CXX_VISIBILITY_PRESET hidden
  VISIBILITY_INLINES_HIDDEN TRUE)

target_include_directories(LinuxEmulation PRIVATE
  ${CMAKE_BINARY_DIR}/generated
  ${CMAKE_CURRENT_SOURCE_DIR}/
  ${PROJECT_SOURCE_DIR}/External/drm-headers/include/)

target_include_directories(LinuxEmulation INTERFACE
  ${CMAKE_CURRENT_SOURCE_DIR}/)

target_link_libraries(LinuxEmulation PRIVATE
  Common
  CommonTools)

target_link_libraries(LinuxEmulation INTERFACE FEXCore)

set(HEADERS_TO_VERIFY
  # These need to match structs to 32bit structs
  LinuxSyscalls/x32/Types.h          x86_32
  LinuxSyscalls/x32/Ioctl/asound.h   x86_32
  LinuxSyscalls/x32/Ioctl/drm.h      x86_32
  LinuxSyscalls/x32/Ioctl/streams.h  x86_32
  LinuxSyscalls/x32/Ioctl/usbdev.h   x86_32
  LinuxSyscalls/x32/Ioctl/input.h    x86_32
  LinuxSyscalls/x32/Ioctl/sockios.h  x86_32
  LinuxSyscalls/x32/Ioctl/joystick.h x86_32
  LinuxSyscalls/x32/Ioctl/v4l2.h     x86_32

  # This needs to match structs to 64bit structs
  LinuxSyscalls/x64/Types.h          x86_64)

list(LENGTH HEADERS_TO_VERIFY ARG_COUNT)
math(EXPR ARG_COUNT "${ARG_COUNT}-1")

set(ARGS
  "-x" "c++"
  "-std=c++20"
  "-fno-operator-names"
  "-I${PROJECT_SOURCE_DIR}/External/drm-headers/include/")

# Global include directories
get_directory_property (INC_DIRS INCLUDE_DIRECTORIES)
list(TRANSFORM INC_DIRS PREPEND "-I")
list(APPEND ARGS ${INC_DIRS})

# FEXCore directories
get_target_property(INC_DIRS FEXCore INTERFACE_INCLUDE_DIRECTORIES)
list(TRANSFORM INC_DIRS PREPEND "-I")
list(APPEND ARGS ${INC_DIRS})

get_target_property(INC_DIRS LinuxEmulation INTERFACE_INCLUDE_DIRECTORIES)
list(TRANSFORM INC_DIRS PREPEND "-I")
list(APPEND ARGS ${INC_DIRS})

foreach(Index RANGE 0 ${ARG_COUNT} 2)
  math(EXPR TEST_TYPE_INDEX "${Index}+1")

  list(GET HEADERS_TO_VERIFY ${Index} HEADER)
  list(GET HEADERS_TO_VERIFY ${TEST_TYPE_INDEX} TEST_TYPE)

  file(RELATIVE_PATH REL_HEADER ${CMAKE_BINARY_DIR} "${CMAKE_CURRENT_SOURCE_DIR}/${HEADER}")
  set(TEST_NAME "${TEST_DESC}/Test_verify_${HEADER}")
  set(TEST_NAME_ARCH "${TEST_DESC}/Test_verify_arch_${HEADER}")

  add_test(NAME ${TEST_NAME}_x86_64
    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/StructPackVerifier.py" "-c1" "x86_64" "${REL_HEADER}" ${ARGS})

  add_test(NAME ${TEST_NAME}_aarch64
    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/StructPackVerifier.py" "-c1" "aarch64" "${REL_HEADER}" ${ARGS})

  add_test(NAME ${TEST_NAME_ARCH}_x86_64
    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/StructPackVerifier.py" "-c1" "x86_64" "-c2" "${TEST_TYPE}" "${REL_HEADER}" ${ARGS})

  add_test(NAME ${TEST_NAME_ARCH}_aarch64
    WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/StructPackVerifier.py" "-c1" "aarch64" "-c2" "${TEST_TYPE}" "${REL_HEADER}" ${ARGS})

  set_property(TEST ${TEST_NAME}_x86_64 APPEND PROPERTY DEPENDS "${HEADER}")
  set_property(TEST ${TEST_NAME}_aarch64 APPEND PROPERTY DEPENDS "${HEADER}")
  set_property(TEST ${TEST_NAME_ARCH}_x86_64 APPEND PROPERTY DEPENDS "${HEADER}")
  set_property(TEST ${TEST_NAME_ARCH}_aarch64 APPEND PROPERTY DEPENDS "${HEADER}")
endforeach()

add_custom_target(struct_verifier
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "Test_verify*")


================================================
FILE: Source/Tools/LinuxEmulation/GdbServer/Info.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|gdbserver
desc: Provides a gdb interface to the guest state
$end_info$
*/

#include "GdbServer/Info.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/StringUtils.h>

#include <array>
#include <string_view>

namespace FEX::GDB::Info {
constexpr std::array<std::string_view, 22> FlagNames = {
  "CF", "", "PF", "", "AF", "", "ZF", "SF", "TF", "IF", "DF", "OF", "IOPL", "", "NT", "", "RF", "VM", "AC", "VIF", "VIP", "ID",
};

const std::string_view& GetFlagName(unsigned Bit) {
  LOGMAN_THROW_A_FMT(Bit < 22, "Bit position too large");
  return FlagNames[Bit];
}

std::string_view GetGRegName(unsigned Reg) {
  switch (Reg) {
  case FEXCore::X86State::REG_RAX: return "rax";
  case FEXCore::X86State::REG_RBX: return "rbx";
  case FEXCore::X86State::REG_RCX: return "rcx";
  case FEXCore::X86State::REG_RDX: return "rdx";
  case FEXCore::X86State::REG_RSP: return "rsp";
  case FEXCore::X86State::REG_RBP: return "rbp";
  case FEXCore::X86State::REG_RSI: return "rsi";
  case FEXCore::X86State::REG_RDI: return "rdi";
  case FEXCore::X86State::REG_R8: return "r8";
  case FEXCore::X86State::REG_R9: return "r9";
  case FEXCore::X86State::REG_R10: return "r10";
  case FEXCore::X86State::REG_R11: return "r11";
  case FEXCore::X86State::REG_R12: return "r12";
  case FEXCore::X86State::REG_R13: return "r13";
  case FEXCore::X86State::REG_R14: return "r14";
  case FEXCore::X86State::REG_R15: return "r15";
  default: FEX_UNREACHABLE;
  }
}

fextl::string GetThreadName(uint32_t PID, uint32_t ThreadID) {
  const auto ThreadFile = fextl::fmt::format("/proc/{}/task/{}/comm", PID, ThreadID);
  fextl::string ThreadName;
  FEXCore::FileLoading::LoadFile(ThreadName, ThreadFile);
  // Trim out the potential newline, breaks GDB if it exists.
  return FEXCore::StringUtils::Trim(ThreadName);
}

fextl::string BuildOSXML() {
  fextl::ostringstream xml;

  xml << "<?xml version='1.0'?>\n";

  xml << "<!DOCTYPE target SYSTEM \"osdata.dtd\">\n";
  xml << "<osdata type=\"processes\">";
  // XXX
  xml << "</osdata>";

  xml << std::flush;

  return xml.str();
}

fextl::string BuildTargetXML(bool Is64Bit) {
  fextl::ostringstream xml;

  xml << "<?xml version='1.0'?>\n";
  xml << "<!DOCTYPE target SYSTEM 'gdb-target.dtd'>\n";
  xml << "<target>\n";
  if (Is64Bit) {
    xml << "<architecture>i386:x86-64</architecture>\n";
  } else {
    xml << "<architecture>i386</architecture>\n";
  }
  xml << "<osabi>GNU/Linux</osabi>\n";
  xml << "<feature name='org.gnu.gdb.i386.core'>\n";

  xml << "<flags id='fex_eflags' size='4'>\n";
  // flags register
  for (int i = 0; i < 22; i++) {
    auto name = GDB::Info::GetFlagName(i);
    if (name.empty()) {
      continue;
    }
    xml << "\t<field name='" << name << "' start='" << i << "' end='" << i << "' />\n";
  }
  xml << "</flags>\n";

  int32_t TargetSize {};
  auto reg = [&](std::string_view name, std::string_view type, int size) {
    TargetSize += size;
    xml << "<reg name='" << name << "' bitsize='" << size << "' type='" << type << "' />" << std::endl;
  };

  // Register ordering.
  // We want to just memcpy our x86 state to gdb, so we tell it the ordering.

  // GPRs
  for (uint32_t i = 0; i < FEXCore::Core::CPUState::NUM_GPRS; i++) {
    reg(GDB::Info::GetGRegName(i), "int64", 64);
  }

  reg("rip", "code_ptr", 64);

  reg("eflags", "fex_eflags", 32);

  // Fake registers which GDB requires, but we don't support;
  // We stick them past the end of our cpu state.

  // non-userspace segment registers
  reg("cs", "int32", 32);
  reg("ss", "int32", 32);
  reg("ds", "int32", 32);
  reg("es", "int32", 32);

  reg("fs", "int32", 32);
  reg("gs", "int32", 32);

  // x87 stack
  for (int i = 0; i < 8; i++) {
    reg(fextl::fmt::format("st{}", i), "i387_ext", 80);
  }

  // x87 control
  reg("fctrl", "int32", 32);
  reg("fstat", "int32", 32);
  reg("ftag", "int32", 32);
  reg("fiseg", "int32", 32);
  reg("fioff", "int32", 32);
  reg("foseg", "int32", 32);
  reg("fooff", "int32", 32);
  reg("fop", "int32", 32);


  xml << "</feature>\n";
  xml << "<feature name='org.gnu.gdb.i386.sse'>\n";
  xml <<
    R"(<vector id="v4f" type="ieee_single" count="4"/>
        <vector id="v2d" type="ieee_double" count="2"/>
        <vector id="v16i8" type="int8" count="16"/>
        <vector id="v8i16" type="int16" count="8"/>
        <vector id="v4i32" type="int32" count="4"/>
        <vector id="v2i64" type="int64" count="2"/>
        <union id="vec128">
          <field name="v4_float" type="v4f"/>
          <field name="v2_double" type="v2d"/>
          <field name="v16_int8" type="v16i8"/>
          <field name="v8_int16" type="v8i16"/>
          <field name="v4_int32" type="v4i32"/>
          <field name="v2_int64" type="v2i64"/>
          <field name="uint128" type="uint128"/>
        </union>
        )";

  // SSE regs
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) {
    reg(fextl::fmt::format("xmm{}", i), "vec128", 128);
  }

  reg("mxcsr", "int", 32);

  xml << "</feature>\n";

  xml << "<feature name='org.gnu.gdb.i386.avx'>";
  xml <<
    R"(<vector id="v4f" type="ieee_single" count="4"/>
        <vector id="v2d" type="ieee_double" count="2"/>
        <vector id="v16i8" type="int8" count="16"/>
        <vector id="v8i16" type="int16" count="8"/>
        <vector id="v4i32" type="int32" count="4"/>
        <vector id="v2i64" type="int64" count="2"/>
        <union id="vec128">
          <field name="v4_float" type="v4f"/>
          <field name="v2_double" type="v2d"/>
          <field name="v16_int8" type="v16i8"/>
          <field name="v8_int16" type="v8i16"/>
          <field name="v4_int32" type="v4i32"/>
          <field name="v2_int64" type="v2i64"/>
          <field name="uint128" type="uint128"/>
        </union>
        )";
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) {
    reg(fmt::format("ymm{}h", i), "vec128", 128);
  }
  xml << "</feature>\n";

  xml << "</target>";
  xml << std::flush;

  return xml.str();
}

} // namespace FEX::GDB::Info


================================================
FILE: Source/Tools/LinuxEmulation/GdbServer/Info.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|gdbserver
desc: Provides a gdb interface to the guest state
$end_info$
*/
#pragma once

#include <FEXCore/fextl/string.h>

#include <cstdint>
#include <string_view>

namespace FEXCore::X86State {
enum X86Reg : uint32_t;
}

namespace FEX::GDB::Info {
/**
 * @brief Returns textual name of bit location from EFLAGs register.
 *
 * @param Bit Which bit of EFLAG to query
 */
const std::string_view& GetFlagName(unsigned Bit);

/**
 * @brief Returns the textual name of a GPR register
 *
 * @param Reg Index of the register to fetch
 */
std::string_view GetGRegName(unsigned Reg);

/**
 * @brief Fetches the thread's name
 *
 * @param PID The program id of the application
 * @param ThreadID The thread id of the program
 */
fextl::string GetThreadName(uint32_t PID, uint32_t ThreadID);

/**
 * @brief Returns the GDB specific construct of OS describing XML.
 */
fextl::string BuildOSXML();

/**
 * @brief Returns the GDB specific construct of target describing XML.
 */
fextl::string BuildTargetXML(bool Is64Bit);
} // namespace FEX::GDB::Info


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Arm64/SyscallsEnum.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-arm64
$end_info$
*/
#pragma once

namespace FEX::HLE::Arm64 {
///< Enum containing all Arm64 linux syscalls for the host kernel
enum Syscalls_Arm64 {
  SYSCALL_Arm64_io_setup = 0,
  SYSCALL_Arm64_io_destroy = 1,
  SYSCALL_Arm64_io_submit = 2,
  SYSCALL_Arm64_io_cancel = 3,
  SYSCALL_Arm64_io_getevents = 4,
  SYSCALL_Arm64_setxattr = 5,
  SYSCALL_Arm64_lsetxattr = 6,
  SYSCALL_Arm64_fsetxattr = 7,
  SYSCALL_Arm64_getxattr = 8,
  SYSCALL_Arm64_lgetxattr = 9,
  SYSCALL_Arm64_fgetxattr = 10,
  SYSCALL_Arm64_listxattr = 11,
  SYSCALL_Arm64_llistxattr = 12,
  SYSCALL_Arm64_flistxattr = 13,
  SYSCALL_Arm64_removexattr = 14,
  SYSCALL_Arm64_lremovexattr = 15,
  SYSCALL_Arm64_fremovexattr = 16,
  SYSCALL_Arm64_getcwd = 17,
  SYSCALL_Arm64_lookup_dcookie = 18,
  SYSCALL_Arm64_eventfd2 = 19,
  SYSCALL_Arm64_epoll_create1 = 20,
  SYSCALL_Arm64_epoll_ctl = 21,
  SYSCALL_Arm64_epoll_pwait = 22,
  SYSCALL_Arm64_dup = 23,
  SYSCALL_Arm64_dup3 = 24,
  SYSCALL_Arm64_fcntl = 25,
  SYSCALL_Arm64_inotify_init1 = 26,
  SYSCALL_Arm64_inotify_add_watch = 27,
  SYSCALL_Arm64_inotify_rm_watch = 28,
  SYSCALL_Arm64_ioctl = 29,
  SYSCALL_Arm64_ioprio_set = 30,
  SYSCALL_Arm64_ioprio_get = 31,
  SYSCALL_Arm64_flock = 32,
  SYSCALL_Arm64_mknodat = 33,
  SYSCALL_Arm64_mkdirat = 34,
  SYSCALL_Arm64_unlinkat = 35,
  SYSCALL_Arm64_symlinkat = 36,
  SYSCALL_Arm64_linkat = 37,
  SYSCALL_Arm64_renameat = 38,
  SYSCALL_Arm64_umount2 = 39,
  SYSCALL_Arm64_mount = 40,
  SYSCALL_Arm64_pivot_root = 41,
  SYSCALL_Arm64_nfsservctl = 42,
  SYSCALL_Arm64_statfs = 43,
  SYSCALL_Arm64_fstatfs = 44,
  SYSCALL_Arm64_truncate = 45,
  SYSCALL_Arm64_ftruncate = 46,
  SYSCALL_Arm64_fallocate = 47,
  SYSCALL_Arm64_faccessat = 48,
  SYSCALL_Arm64_chdir = 49,
  SYSCALL_Arm64_fchdir = 50,
  SYSCALL_Arm64_chroot = 51,
  SYSCALL_Arm64_fchmod = 52,
  SYSCALL_Arm64_fchmodat = 53,
  SYSCALL_Arm64_fchownat = 54,
  SYSCALL_Arm64_fchown = 55,
  SYSCALL_Arm64_openat = 56,
  SYSCALL_Arm64_close = 57,
  SYSCALL_Arm64_vhangup = 58,
  SYSCALL_Arm64_pipe2 = 59,
  SYSCALL_Arm64_quotactl = 60,
  SYSCALL_Arm64_getdents64 = 61,
  SYSCALL_Arm64_lseek = 62,
  SYSCALL_Arm64_read = 63,
  SYSCALL_Arm64_write = 64,
  SYSCALL_Arm64_readv = 65,
  SYSCALL_Arm64_writev = 66,
  SYSCALL_Arm64_pread_64 = 67,
  SYSCALL_Arm64_pwrite_64 = 68,
  SYSCALL_Arm64_preadv = 69,
  SYSCALL_Arm64_pwritev = 70,
  SYSCALL_Arm64_sendfile = 71,
  SYSCALL_Arm64_pselect6 = 72,
  SYSCALL_Arm64_ppoll = 73,
  SYSCALL_Arm64_signalfd4 = 74,
  SYSCALL_Arm64_vmsplice = 75,
  SYSCALL_Arm64_splice = 76,
  SYSCALL_Arm64_tee = 77,
  SYSCALL_Arm64_readlinkat = 78,
  SYSCALL_Arm64_fstatat = 79,
  SYSCALL_Arm64_fstat = 80,
  SYSCALL_Arm64_sync = 81,
  SYSCALL_Arm64_fsync = 82,
  SYSCALL_Arm64_fdatasync = 83,
  SYSCALL_Arm64_sync_file_range2 = 84,
  SYSCALL_Arm64_sync_file_range = 84,
  SYSCALL_Arm64_timerfd_create = 85,
  SYSCALL_Arm64_timerfd_settime = 86,
  SYSCALL_Arm64_timerfd_gettime = 87,
  SYSCALL_Arm64_utimensat = 88,
  SYSCALL_Arm64_acct = 89,
  SYSCALL_Arm64_capget = 90,
  SYSCALL_Arm64_capset = 91,
  SYSCALL_Arm64_personality = 92,
  SYSCALL_Arm64_exit = 93,
  SYSCALL_Arm64_exit_group = 94,
  SYSCALL_Arm64_waitid = 95,
  SYSCALL_Arm64_set_tid_address = 96,
  SYSCALL_Arm64_unshare = 97,
  SYSCALL_Arm64_futex = 98,
  SYSCALL_Arm64_set_robust_list = 99,
  SYSCALL_Arm64_get_robust_list = 100,
  SYSCALL_Arm64_nanosleep = 101,
  SYSCALL_Arm64_getitimer = 102,
  SYSCALL_Arm64_setitimer = 103,
  SYSCALL_Arm64_kexec_load = 104,
  SYSCALL_Arm64_init_module = 105,
  SYSCALL_Arm64_delete_module = 106,
  SYSCALL_Arm64_timer_create = 107,
  SYSCALL_Arm64_timer_gettime = 108,
  SYSCALL_Arm64_timer_getoverrun = 109,
  SYSCALL_Arm64_timer_settime = 110,
  SYSCALL_Arm64_timer_delete = 111,
  SYSCALL_Arm64_clock_settime = 112,
  SYSCALL_Arm64_clock_gettime = 113,
  SYSCALL_Arm64_clock_getres = 114,
  SYSCALL_Arm64_clock_nanosleep = 115,
  SYSCALL_Arm64_syslog = 116,
  SYSCALL_Arm64_ptrace = 117,
  SYSCALL_Arm64_sched_setparam = 118,
  SYSCALL_Arm64_sched_setscheduler = 119,
  SYSCALL_Arm64_sched_getscheduler = 120,
  SYSCALL_Arm64_sched_getparam = 121,
  SYSCALL_Arm64_sched_setaffinity = 122,
  SYSCALL_Arm64_sched_getaffinity = 123,
  SYSCALL_Arm64_sched_yield = 124,
  SYSCALL_Arm64_sched_get_priority_max = 125,
  SYSCALL_Arm64_sched_get_priority_min = 126,
  SYSCALL_Arm64_sched_rr_get_interval = 127,
  SYSCALL_Arm64_restart_syscall = 128,
  SYSCALL_Arm64_kill = 129,
  SYSCALL_Arm64_tkill = 130,
  SYSCALL_Arm64_tgkill = 131,
  SYSCALL_Arm64_sigaltstack = 132,
  SYSCALL_Arm64_rt_sigsuspend = 133,
  SYSCALL_Arm64_rt_sigaction = 134,
  SYSCALL_Arm64_rt_sigprocmask = 135,
  SYSCALL_Arm64_rt_sigpending = 136,
  SYSCALL_Arm64_rt_sigtimedwait = 137,
  SYSCALL_Arm64_rt_sigqueueinfo = 138,
  SYSCALL_Arm64_rt_sigreturn = 139,
  SYSCALL_Arm64_setpriority = 140,
  SYSCALL_Arm64_getpriority = 141,
  SYSCALL_Arm64_reboot = 142,
  SYSCALL_Arm64_setregid = 143,
  SYSCALL_Arm64_setgid = 144,
  SYSCALL_Arm64_setreuid = 145,
  SYSCALL_Arm64_setuid = 146,
  SYSCALL_Arm64_setresuid = 147,
  SYSCALL_Arm64_getresuid = 148,
  SYSCALL_Arm64_setresgid = 149,
  SYSCALL_Arm64_getresgid = 150,
  SYSCALL_Arm64_setfsuid = 151,
  SYSCALL_Arm64_setfsgid = 152,
  SYSCALL_Arm64_times = 153,
  SYSCALL_Arm64_setpgid = 154,
  SYSCALL_Arm64_getpgid = 155,
  SYSCALL_Arm64_getsid = 156,
  SYSCALL_Arm64_setsid = 157,
  SYSCALL_Arm64_getgroups = 158,
  SYSCALL_Arm64_setgroups = 159,
  SYSCALL_Arm64_uname = 160,
  SYSCALL_Arm64_sethostname = 161,
  SYSCALL_Arm64_setdomainname = 162,
  SYSCALL_Arm64_getrlimit = 163,
  SYSCALL_Arm64_setrlimit = 164,
  SYSCALL_Arm64_getrusage = 165,
  SYSCALL_Arm64_umask = 166,
  SYSCALL_Arm64_prctl = 167,
  SYSCALL_Arm64_getcpu = 168,
  SYSCALL_Arm64_gettimeofday = 169,
  SYSCALL_Arm64_settimeofday = 170,
  SYSCALL_Arm64_adjtimex = 171,
  SYSCALL_Arm64_getpid = 172,
  SYSCALL_Arm64_getppid = 173,
  SYSCALL_Arm64_getuid = 174,
  SYSCALL_Arm64_geteuid = 175,
  SYSCALL_Arm64_getgid = 176,
  SYSCALL_Arm64_getegid = 177,
  SYSCALL_Arm64_gettid = 178,
  SYSCALL_Arm64_sysinfo = 179,
  SYSCALL_Arm64_mq_open = 180,
  SYSCALL_Arm64_mq_unlink = 181,
  SYSCALL_Arm64_mq_timedsend = 182,
  SYSCALL_Arm64_mq_timedreceive = 183,
  SYSCALL_Arm64_mq_notify = 184,
  SYSCALL_Arm64_mq_getsetattr = 185,
  SYSCALL_Arm64_msgget = 186,
  SYSCALL_Arm64_msgctl = 187,
  SYSCALL_Arm64_msgrcv = 188,
  SYSCALL_Arm64_msgsnd = 189,
  SYSCALL_Arm64_semget = 190,
  SYSCALL_Arm64_semctl = 191,
  SYSCALL_Arm64_semtimedop = 192,
  SYSCALL_Arm64_semop = 193,
  SYSCALL_Arm64_shmget = 194,
  SYSCALL_Arm64_shmctl = 195,
  SYSCALL_Arm64_shmat = 196,
  SYSCALL_Arm64_shmdt = 197,
  SYSCALL_Arm64_socket = 198,
  SYSCALL_Arm64_socketpair = 199,
  SYSCALL_Arm64_bind = 200,
  SYSCALL_Arm64_listen = 201,
  SYSCALL_Arm64_accept = 202,
  SYSCALL_Arm64_connect = 203,
  SYSCALL_Arm64_getsockname = 204,
  SYSCALL_Arm64_getpeername = 205,
  SYSCALL_Arm64_sendto = 206,
  SYSCALL_Arm64_recvfrom = 207,
  SYSCALL_Arm64_setsockopt = 208,
  SYSCALL_Arm64_getsockopt = 209,
  SYSCALL_Arm64_shutdown = 210,
  SYSCALL_Arm64_sendmsg = 211,
  SYSCALL_Arm64_recvmsg = 212,
  SYSCALL_Arm64_readahead = 213,
  SYSCALL_Arm64_brk = 214,
  SYSCALL_Arm64_munmap = 215,
  SYSCALL_Arm64_mremap = 216,
  SYSCALL_Arm64_add_key = 217,
  SYSCALL_Arm64_request_key = 218,
  SYSCALL_Arm64_keyctl = 219,
  SYSCALL_Arm64_clone = 220,
  SYSCALL_Arm64_execve = 221,
  SYSCALL_Arm64_mmap = 222,
  SYSCALL_Arm64_fadvise64 = 223,
  SYSCALL_Arm64_swapon = 224,
  SYSCALL_Arm64_swapoff = 225,
  SYSCALL_Arm64_mprotect = 226,
  SYSCALL_Arm64_msync = 227,
  SYSCALL_Arm64_mlock = 228,
  SYSCALL_Arm64_munlock = 229,
  SYSCALL_Arm64_mlockall = 230,
  SYSCALL_Arm64_munlockall = 231,
  SYSCALL_Arm64_mincore = 232,
  SYSCALL_Arm64_madvise = 233,
  SYSCALL_Arm64_remap_file_pages = 234,
  SYSCALL_Arm64_mbind = 235,
  SYSCALL_Arm64_get_mempolicy = 236,
  SYSCALL_Arm64_set_mempolicy = 237,
  SYSCALL_Arm64_migrate_pages = 238,
  SYSCALL_Arm64_move_pages = 239,
  SYSCALL_Arm64_rt_tgsigqueueinfo = 240,
  SYSCALL_Arm64_perf_event_open = 241,
  SYSCALL_Arm64_accept4 = 242,
  SYSCALL_Arm64_recvmmsg = 243,
  SYSCALL_Arm64_wait4 = 260,
  SYSCALL_Arm64_prlimit_64 = 261,
  SYSCALL_Arm64_fanotify_init = 262,
  SYSCALL_Arm64_fanotify_mark = 263,
  SYSCALL_Arm64_name_to_handle_at = 264,
  SYSCALL_Arm64_open_by_handle_at = 265,
  SYSCALL_Arm64_clock_adjtime = 266,
  SYSCALL_Arm64_syncfs = 267,
  SYSCALL_Arm64_setns = 268,
  SYSCALL_Arm64_sendmmsg = 269,
  SYSCALL_Arm64_process_vm_readv = 270,
  SYSCALL_Arm64_process_vm_writev = 271,
  SYSCALL_Arm64_kcmp = 272,
  SYSCALL_Arm64_finit_module = 273,
  SYSCALL_Arm64_sched_setattr = 274,
  SYSCALL_Arm64_sched_getattr = 275,
  SYSCALL_Arm64_renameat2 = 276,
  SYSCALL_Arm64_seccomp = 277,
  SYSCALL_Arm64_getrandom = 278,
  SYSCALL_Arm64_memfd_create = 279,
  SYSCALL_Arm64_bpf = 280,
  SYSCALL_Arm64_execveat = 281,
  SYSCALL_Arm64_userfaultfd = 282,
  SYSCALL_Arm64_membarrier = 283,
  SYSCALL_Arm64_mlock2 = 284,
  SYSCALL_Arm64_copy_file_range = 285,
  SYSCALL_Arm64_preadv2 = 286,
  SYSCALL_Arm64_pwritev2 = 287,
  SYSCALL_Arm64_pkey_mprotect = 288,
  SYSCALL_Arm64_pkey_alloc = 289,
  SYSCALL_Arm64_pkey_free = 290,
  SYSCALL_Arm64_statx = 291,
  SYSCALL_Arm64_io_pgetevents = 292,
  SYSCALL_Arm64_rseq = 293,
  SYSCALL_Arm64_kexec_file_load = 294,
  SYSCALL_Arm64_clock_gettime64 = 403,
  SYSCALL_Arm64_clock_settime64 = 404,
  SYSCALL_Arm64_clock_adjtime64 = 405,
  SYSCALL_Arm64_clock_getres_time64 = 406,
  SYSCALL_Arm64_clock_nanosleep_time64 = 407,
  SYSCALL_Arm64_timer_gettime64 = 408,
  SYSCALL_Arm64_timer_settime64 = 409,
  SYSCALL_Arm64_timerfd_gettime64 = 410,
  SYSCALL_Arm64_timerfd_settime64 = 411,
  SYSCALL_Arm64_utimensat_time64 = 412,
  SYSCALL_Arm64_pselect6_time64 = 413,
  SYSCALL_Arm64_ppoll_time64 = 414,
  SYSCALL_Arm64_io_pgetevents_time64 = 416,
  SYSCALL_Arm64_recvmmsg_time64 = 417,
  SYSCALL_Arm64_mq_timedsend_time64 = 418,
  SYSCALL_Arm64_mq_timedreceive_time64 = 419,
  SYSCALL_Arm64_semtimedop_time64 = 420,
  SYSCALL_Arm64_rt_sigtimedwait_time64 = 421,
  SYSCALL_Arm64_futex_time64 = 422,
  SYSCALL_Arm64_sched_rr_get_interval_time64 = 423,
  SYSCALL_Arm64_pidfd_send_signal = 424,
  SYSCALL_Arm64_io_uring_setup = 425,
  SYSCALL_Arm64_io_uring_enter = 426,
  SYSCALL_Arm64_io_uring_register = 427,
  SYSCALL_Arm64_open_tree = 428,
  SYSCALL_Arm64_move_mount = 429,
  SYSCALL_Arm64_fsopen = 430,
  SYSCALL_Arm64_fsconfig = 431,
  SYSCALL_Arm64_fsmount = 432,
  SYSCALL_Arm64_fspick = 433,
  SYSCALL_Arm64_pidfd_open = 434,
  SYSCALL_Arm64_clone3 = 435,
  SYSCALL_Arm64_close_range = 436,
  SYSCALL_Arm64_openat2 = 437,
  SYSCALL_Arm64_pidfd_getfd = 438,
  SYSCALL_Arm64_faccessat2 = 439,
  SYSCALL_Arm64_process_madvise = 440,
  SYSCALL_Arm64_epoll_pwait2 = 441,
  SYSCALL_Arm64_mount_setattr = 442,
  SYSCALL_Arm64_quotactl_fd = 443,
  SYSCALL_Arm64_landlock_create_ruleset = 444,
  SYSCALL_Arm64_landlock_add_rule = 445,
  SYSCALL_Arm64_landlock_restrict_self = 446,
  SYSCALL_Arm64_memfd_secret = 447,
  SYSCALL_Arm64_process_mrelease = 448,
  SYSCALL_Arm64_futex_waitv = 449,
  SYSCALL_Arm64_set_mempolicy_home_node = 450,
  SYSCALL_Arm64_cachestat = 451,
  SYSCALL_Arm64_fchmodat2 = 452,
  SYSCALL_Arm64_map_shadow_stack = 453,
  SYSCALL_Arm64_futex_wake = 454,
  SYSCALL_Arm64_futex_wait = 455,
  SYSCALL_Arm64_futex_requeue = 456,
  SYSCALL_Arm64_statmount = 457,
  SYSCALL_Arm64_listmount = 458,
  SYSCALL_Arm64_lsm_get_self_attr = 459,
  SYSCALL_Arm64_lsm_set_self_attr = 460,
  SYSCALL_Arm64_lsm_list_modules = 461,
  SYSCALL_Arm64_mseal = 462,
  SYSCALL_Arm64_setxattrat = 463,
  SYSCALL_Arm64_getxattrat = 464,
  SYSCALL_Arm64_listxattrat = 465,
  SYSCALL_Arm64_removexattrat = 466,
  SYSCALL_Arm64_MAX = 512,

  // Unsupported syscalls on this host
  SYSCALL_Arm64_fork = ~0,
  SYSCALL_Arm64_open = ~0,
  SYSCALL_Arm64_waitpid = ~0,
  SYSCALL_Arm64_creat = ~0,
  SYSCALL_Arm64_link = ~0,
  SYSCALL_Arm64_unlink = ~0,
  SYSCALL_Arm64_time = ~0,
  SYSCALL_Arm64_mknod = ~0,
  SYSCALL_Arm64_chmod = ~0,
  SYSCALL_Arm64_lchown = ~0,
  SYSCALL_Arm64_break = ~0,
  SYSCALL_Arm64_oldstat = ~0,
  SYSCALL_Arm64_umount = ~0,
  SYSCALL_Arm64_stime = ~0,
  SYSCALL_Arm64_alarm = ~0,
  SYSCALL_Arm64_oldfstat = ~0,
  SYSCALL_Arm64_pause = ~0,
  SYSCALL_Arm64_utime = ~0,
  SYSCALL_Arm64_stty = ~0,
  SYSCALL_Arm64_gtty = ~0,
  SYSCALL_Arm64_access = ~0,
  SYSCALL_Arm64_nice = ~0,
  SYSCALL_Arm64_ftime = ~0,
  SYSCALL_Arm64_rename = ~0,
  SYSCALL_Arm64_mkdir = ~0,
  SYSCALL_Arm64_rmdir = ~0,
  SYSCALL_Arm64_pipe = ~0,
  SYSCALL_Arm64_prof = ~0,
  SYSCALL_Arm64_signal = ~0,
  SYSCALL_Arm64_lock = ~0,
  SYSCALL_Arm64_mpx = ~0,
  SYSCALL_Arm64_ulimit = ~0,
  SYSCALL_Arm64_oldolduname = ~0,
  SYSCALL_Arm64_ustat = ~0,
  SYSCALL_Arm64_dup2 = ~0,
  SYSCALL_Arm64_getpgrp = ~0,
  SYSCALL_Arm64_sigaction = ~0,
  SYSCALL_Arm64_sgetmask = ~0,
  SYSCALL_Arm64_ssetmask = ~0,
  SYSCALL_Arm64_sigsuspend = ~0,
  SYSCALL_Arm64_sigpending = ~0,
  SYSCALL_Arm64_select = ~0,
  SYSCALL_Arm64_symlink = ~0,
  SYSCALL_Arm64_oldlstat = ~0,
  SYSCALL_Arm64_readlink = ~0,
  SYSCALL_Arm64_uselib = ~0,
  SYSCALL_Arm64_readdir = ~0,
  SYSCALL_Arm64_profil = ~0,
  SYSCALL_Arm64_ioperm = ~0,
  SYSCALL_Arm64_socketcall = ~0,
  SYSCALL_Arm64_stat = ~0,
  SYSCALL_Arm64_lstat = ~0,
  SYSCALL_Arm64_olduname = ~0,
  SYSCALL_Arm64_iopl = ~0,
  SYSCALL_Arm64_idle = ~0,
  SYSCALL_Arm64_vm86old = ~0,
  SYSCALL_Arm64_ipc = ~0,
  SYSCALL_Arm64_sigreturn = ~0,
  SYSCALL_Arm64_modify_ldt = ~0,
  SYSCALL_Arm64_sigprocmask = ~0,
  SYSCALL_Arm64_create_module = ~0,
  SYSCALL_Arm64_get_kernel_syms = ~0,
  SYSCALL_Arm64_bdflush = ~0,
  SYSCALL_Arm64_sysfs = ~0,
  SYSCALL_Arm64_afs_syscall = ~0,
  SYSCALL_Arm64__llseek = ~0,
  SYSCALL_Arm64_getdents = ~0,
  SYSCALL_Arm64__newselect = ~0,
  SYSCALL_Arm64__sysctl = ~0,
  SYSCALL_Arm64_vm86 = ~0,
  SYSCALL_Arm64_query_module = ~0,
  SYSCALL_Arm64_poll = ~0,
  SYSCALL_Arm64_chown = ~0,
  SYSCALL_Arm64_getpmsg = ~0,
  SYSCALL_Arm64_putpmsg = ~0,
  SYSCALL_Arm64_vfork = ~0,
  SYSCALL_Arm64_ugetrlimit = ~0,
  SYSCALL_Arm64_mmap2 = ~0,
  SYSCALL_Arm64_truncate64 = ~0,
  SYSCALL_Arm64_ftruncate64 = ~0,
  SYSCALL_Arm64_stat64 = ~0,
  SYSCALL_Arm64_lstat64 = ~0,
  SYSCALL_Arm64_fstat64 = ~0,
  SYSCALL_Arm64_lchown32 = ~0,
  SYSCALL_Arm64_getuid32 = ~0,
  SYSCALL_Arm64_getgid32 = ~0,
  SYSCALL_Arm64_geteuid32 = ~0,
  SYSCALL_Arm64_getegid32 = ~0,
  SYSCALL_Arm64_setreuid32 = ~0,
  SYSCALL_Arm64_setregid32 = ~0,
  SYSCALL_Arm64_getgroups32 = ~0,
  SYSCALL_Arm64_setgroups32 = ~0,
  SYSCALL_Arm64_fchown32 = ~0,
  SYSCALL_Arm64_setresuid32 = ~0,
  SYSCALL_Arm64_getresuid32 = ~0,
  SYSCALL_Arm64_setresgid32 = ~0,
  SYSCALL_Arm64_getresgid32 = ~0,
  SYSCALL_Arm64_chown32 = ~0,
  SYSCALL_Arm64_setuid32 = ~0,
  SYSCALL_Arm64_setgid32 = ~0,
  SYSCALL_Arm64_setfsuid32 = ~0,
  SYSCALL_Arm64_setfsgid32 = ~0,
  SYSCALL_Arm64_fcntl64 = ~0,
  SYSCALL_Arm64_sendfile64 = ~0,
  SYSCALL_Arm64_set_thread_area = ~0,
  SYSCALL_Arm64_get_thread_area = ~0,
  SYSCALL_Arm64_epoll_create = ~0,
  SYSCALL_Arm64_epoll_wait = ~0,
  SYSCALL_Arm64_statfs64 = ~0,
  SYSCALL_Arm64_fstatfs64 = ~0,
  SYSCALL_Arm64_utimes = ~0,
  SYSCALL_Arm64_fadvise64_64 = ~0,
  SYSCALL_Arm64_vserver = ~0,
  SYSCALL_Arm64_inotify_init = ~0,
  SYSCALL_Arm64_futimesat = ~0,
  SYSCALL_Arm64_fstatat_64 = ~0,
  SYSCALL_Arm64_signalfd = ~0,
  SYSCALL_Arm64_eventfd = ~0,
  SYSCALL_Arm64_arch_prctl = ~0,
  SYSCALL_Arm64_tuxcall = ~0,
  SYSCALL_Arm64_security = ~0,
  SYSCALL_Arm64_epoll_ctl_old = ~0,
  SYSCALL_Arm64_epoll_wait_old = ~0,
  SYSCALL_Arm64_newfstatat = ~0,
  SYSCALL_Arm64_uretprobe = ~0,
};
} // namespace FEX::HLE::Arm64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
desc: Emulated /proc/cpuinfo, version, osrelease, etc
$end_info$
*/

#include "CodeLoader.h"

#include "Common/CPUInfo.h"
#include "Common/FDUtils.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/EmulatedFiles/EmulatedFiles.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CPUID.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <git_version.h>

#include <cstring>
#include <fcntl.h>
#include <filesystem>
#include <ostream>
#include <stdio.h>
#include <system_error>
#include <unistd.h>
#include <utility>

namespace FEX::EmulatedFile {
/**
 * @brief Generates a temporary file using raw FDs
 *
 * Since we are hooking syscalls that are expecting to use raw FDs, we need to make sure to also use raw FDs.
 * The guest application can leave these FDs dangling.
 *
 * Using glibc tmpfile creates a FILE which glibc tracks and will try cleaning up on application exit.
 * If we are running a 32-bit application then this dangling FILE will be allocated using the FEX allcator
 * Which will have already been cleaned up on shutdown.
 *
 * Dangling raw FD is safe since if the guest doesn't close them, then the kernel cleans them up on application close.
 *
 * @return A temporary file that we can use
 */
static int GenTmpFD(const char* pathname, int flags) {
  uint32_t memfd_flags {MFD_ALLOW_SEALING};
  if (flags & O_CLOEXEC) {
    memfd_flags |= MFD_CLOEXEC;
  }

  return memfd_create(pathname, memfd_flags);
}

// Seal the tmpfd features by sealing them all.
// Makes the tmpfd read-only.
static void SealTmpFD(int fd) {
  int ret = fcntl(fd, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_FUTURE_WRITE);
  if (ret == -1) [[unlikely]] {
    // This shouldn't ever happen, but also isn't fatal.
    LogMan::Msg::EFmt("Couldn't seal tmpfd! {}", errno);
  }
}

fextl::string GenerateCPUInfo(FEXCore::Context::Context* ctx, uint32_t CPUCores) {
  fextl::ostringstream cpu_stream {};
  auto res_0 = ctx->RunCPUIDFunction(0, 0);
  auto res_1 = ctx->RunCPUIDFunction(1, 0);
  auto res_6 = ctx->RunCPUIDFunction(6, 0);
  auto res_7 = ctx->RunCPUIDFunction(7, 0);
  auto res_7_1 = ctx->RunCPUIDFunction(7, 1);
  auto res_d_1 = ctx->RunCPUIDFunction(0xD, 1);
  auto res_10 = ctx->RunCPUIDFunction(0x10, 0);

  auto res_8000_0001 = ctx->RunCPUIDFunction(0x8000'0001, 0);
  auto res_8000_0007 = ctx->RunCPUIDFunction(0x8000'0007, 0);
  auto res_8000_0008 = ctx->RunCPUIDFunction(0x8000'0008, 0);
  auto res_8000_000a = ctx->RunCPUIDFunction(0x8000'000a, 0);
  auto res_8000_001f = ctx->RunCPUIDFunction(0x8000'001f, 0);

  union VendorID {
    struct {
      uint32_t id;
      char Str[13];
    };
    struct {
      FEXCore::CPUID::FunctionResults cpuid;
      uint8_t null;
    };
  };

  union ModelName {
    struct {
      char Str[49];
    };
    struct {
      FEXCore::CPUID::FunctionResults cpuid_2;
      FEXCore::CPUID::FunctionResults cpuid_3;
      FEXCore::CPUID::FunctionResults cpuid_4;
      uint8_t null;
    };
  };

  union Info {
    FEXCore::CPUID::FunctionResults cpuid;
    struct {
      unsigned Stepping   : 4;
      unsigned Model      : 4;
      unsigned FamilyID   : 4;
      unsigned Type       : 4;
      unsigned ExModelID  : 4;
      unsigned ExFamilyID : 8;
      unsigned            : 4;
    };
  };

  VendorID vendorid {};
  vendorid.cpuid = {res_0.eax, res_0.ebx, res_0.edx, res_0.ecx};
  vendorid.null = 0;

  Info info {res_1};

  uint32_t Family = info.FamilyID + (info.FamilyID == 0xF ? info.ExFamilyID : 0);
  fextl::ostringstream flags_data {};
  // Generate the flags data up front
  // This is the same per core
  {
    auto add_flag_if = [&flags_data](bool flag, const char* name) {
      if (flag) {
        flags_data << name << " ";
      }
    };

    add_flag_if(res_1.edx & (1 << 0), "fpu");
    add_flag_if(res_1.edx & (1 << 1), "vme");
    add_flag_if(res_1.edx & (1 << 2), "de");
    add_flag_if(res_1.edx & (1 << 3), "pse");
    add_flag_if(res_1.edx & (1 << 4), "tsc");
    add_flag_if(res_1.edx & (1 << 5), "msr");
    add_flag_if(res_1.edx & (1 << 6), "pae");
    add_flag_if(res_1.edx & (1 << 7), "mce");
    add_flag_if(res_1.edx & (1 << 8), "cx8");
    add_flag_if(res_1.edx & (1 << 9), "apic");
    add_flag_if(res_1.edx & (1 << 11), "sep");
    add_flag_if(res_1.edx & (1 << 12), "mtrr");
    add_flag_if(res_1.edx & (1 << 13), "pge");
    add_flag_if(res_1.edx & (1 << 14), "mca");
    add_flag_if(res_1.edx & (1 << 15), "cmov");
    add_flag_if(res_1.edx & (1 << 16), "pat");
    add_flag_if(res_1.edx & (1 << 17), "pse36");
    add_flag_if(res_1.edx & (1 << 18), "pn");
    add_flag_if(res_1.edx & (1 << 19), "clflush");
    add_flag_if(res_1.edx & (1 << 21), "ds");   // XXX
    add_flag_if(res_1.edx & (1 << 22), "acpi"); // XXX
    add_flag_if(res_1.edx & (1 << 23), "mmx");
    add_flag_if(res_1.edx & (1 << 24), "fxsr");
    add_flag_if(res_1.edx & (1 << 25), "sse");
    add_flag_if(res_1.edx & (1 << 26), "sse2");
    add_flag_if(res_1.edx & (1 << 27), "ss");
    add_flag_if(res_1.edx & (1 << 28), "ht");
    add_flag_if(res_1.edx & (1 << 29), "tm");
    add_flag_if(res_1.edx & (1 << 30), "ia64");
    add_flag_if(res_1.edx & (1 << 31), "pbe");

    add_flag_if(res_8000_0001.edx & (1 << 11), "syscall");
    add_flag_if(res_8000_0001.edx & (1 << 19), "mp");
    add_flag_if(res_8000_0001.edx & (1 << 20), "nx");
    add_flag_if(res_8000_0001.edx & (1 << 22), "mmxext");
    add_flag_if(res_8000_0001.edx & (1 << 25), "fxsr_opt");
    add_flag_if(res_8000_0001.edx & (1 << 26), "pdpe1gb");
    add_flag_if(res_8000_0001.edx & (1 << 27), "rdtscp");
    add_flag_if(res_8000_0001.edx & (1 << 29), "lm");
    add_flag_if(res_8000_0001.edx & (1 << 31), "3dnow");
    add_flag_if(res_8000_0001.edx & (1 << 30), "3dnowext");

    add_flag_if(res_8000_0007.edx & (1 << 8), "constant_tsc");

    // We are not a uniprocessor running in SMP mode
    add_flag_if(false, "up");
    // Timer is always running
    add_flag_if(true, "art");
    // No Intel perfmon
    add_flag_if(false, "arch_perfmon");
    // No precise event based sampling
    add_flag_if(false, "pebs");
    // No branch trace store
    add_flag_if(false, "bts");

    add_flag_if(true, "rep_good");
    add_flag_if(res_8000_0007.edx & (1 << 12), "tm");

    // Always support long nop
    add_flag_if(true, "nopl");

    // Always expose topology information
    add_flag_if(true, "xtoplogy");

    // Atom/geode only?
    add_flag_if(false, "tsc_reliable");
    add_flag_if(res_8000_0007.edx & (1 << 8), "nonstop_tsc");

    // We always support CPUID
    add_flag_if(true, "cpuid");
    add_flag_if(Family > 0x16, "extd_apicid");
    add_flag_if(false, "amd_dcm"); // Never claim to be a multi node processor
    add_flag_if(res_8000_0007.edx & (1 << 11), "aperfmperf");

    // Need to check ARM documentation if we can support this?
    add_flag_if(false, "nonstop_tsc_s3");

    // We can calculate this flag on AArch64
    add_flag_if(true, "tsc_known_freq");

    add_flag_if(res_1.ecx & (1 << 0), "pni");
    add_flag_if(res_1.ecx & (1 << 1), "pclmulqdq");
    add_flag_if(res_1.ecx & (1 << 2), "dtes64");
    add_flag_if(res_1.ecx & (1 << 3), "monitor");
    add_flag_if(res_1.ecx & (1 << 4), "ds_cpl");
    add_flag_if(res_1.ecx & (1 << 5), "vmx");
    add_flag_if(res_1.ecx & (1 << 6), "smx");
    add_flag_if(res_1.ecx & (1 << 7), "est");
    add_flag_if(res_1.ecx & (1 << 8), "tm2");
    add_flag_if(res_1.ecx & (1 << 9), "ssse3");
    add_flag_if(res_1.ecx & (1 << 10), "cid");
    add_flag_if(res_1.ecx & (1 << 11), "sdbg");
    add_flag_if(res_1.ecx & (1 << 12), "fma");
    add_flag_if(res_1.ecx & (1 << 13), "cx16");
    add_flag_if(res_1.ecx & (1 << 14), "xptr");
    add_flag_if(res_1.ecx & (1 << 15), "pdcm");
    add_flag_if(res_1.ecx & (1 << 17), "pcid");
    add_flag_if(res_1.ecx & (1 << 18), "dca");
    add_flag_if(res_1.ecx & (1 << 19), "sse4_1");
    add_flag_if(res_1.ecx & (1 << 20), "sse4_2");
    add_flag_if(res_1.ecx & (1 << 21), "x2apic");
    add_flag_if(res_1.ecx & (1 << 22), "movbe");
    add_flag_if(res_1.ecx & (1 << 23), "popcnt");
    add_flag_if(res_1.ecx & (1 << 24), "tsc_deadline_timer");
    add_flag_if(res_1.ecx & (1 << 25), "aes");
    add_flag_if(res_1.ecx & (1 << 26), "xsave");
    add_flag_if(res_1.ecx & (1 << 27), "oxsave");
    add_flag_if(res_1.ecx & (1 << 28), "avx");
    add_flag_if(res_1.ecx & (1 << 29), "f16c");
    add_flag_if(res_1.ecx & (1 << 30), "rdrand");
    add_flag_if(res_1.ecx & (1 << 31), "hypervisor");

    add_flag_if(res_8000_0001.ecx & (1 << 0), "lahf_lm");
    add_flag_if(res_8000_0001.ecx & (1 << 1), "cmp_legacy");
    add_flag_if(res_8000_0001.ecx & (1 << 2), "svm");
    add_flag_if(res_8000_0001.ecx & (1 << 3), "extapic");
    add_flag_if(res_8000_0001.ecx & (1 << 4), "cr8_legacy");
    add_flag_if(res_8000_0001.ecx & (1 << 5), "abm");
    add_flag_if(res_8000_0001.ecx & (1 << 6), "sse4a");
    add_flag_if(res_8000_0001.ecx & (1 << 7), "misalignsse");
    add_flag_if(res_8000_0001.ecx & (1 << 8), "3dnowprefetch");
    add_flag_if(res_8000_0001.ecx & (1 << 9), "osvw");
    add_flag_if(res_8000_0001.ecx & (1 << 10), "ibs");
    add_flag_if(res_8000_0001.ecx & (1 << 11), "xop");
    add_flag_if(res_8000_0001.ecx & (1 << 12), "skinit");
    add_flag_if(res_8000_0001.ecx & (1 << 13), "wdt");
    add_flag_if(res_8000_0001.ecx & (1 << 15), "lwp");
    add_flag_if(res_8000_0001.ecx & (1 << 16), "fma4");
    add_flag_if(res_8000_0001.ecx & (1 << 17), "tce");
    add_flag_if(res_8000_0001.ecx & (1 << 19), "nodeid_msr");
    add_flag_if(res_8000_0001.ecx & (1 << 21), "tbm");
    add_flag_if(res_8000_0001.ecx & (1 << 22), "topoext");
    add_flag_if(res_8000_0001.ecx & (1 << 23), "perfctr_core");
    add_flag_if(res_8000_0001.ecx & (1 << 24), "perfctr_nb");
    add_flag_if(res_8000_0001.ecx & (1 << 26), "bpext");
    add_flag_if(res_8000_0001.ecx & (1 << 27), "ptsc");
    add_flag_if(res_8000_0001.ecx & (1 << 28), "perfctr_llc");
    add_flag_if(res_8000_0001.ecx & (1 << 29), "mwaitx");

    // We don't support ring 3 supporting mwait
    add_flag_if(false, "ring3mwait");
    // We don't support Intel CPUID fault support
    add_flag_if(false, "cpuid_fault");
    add_flag_if(res_8000_0007.edx & (1 << 9), "cpb");
    add_flag_if(res_6.ecx & (1 << 3), "epb");
    add_flag_if(res_10.ebx & (1 << 1), "cat_l3");
    add_flag_if(res_10.ebx & (1 << 2), "cat_l2");
    add_flag_if(false, "invpcid_single");
    add_flag_if(res_8000_0007.edx & (1 << 7), "hw_pstate");
    add_flag_if(res_8000_001f.eax & (1 << 0), "sme");

    // Kernel page table isolation.
    add_flag_if(false, "pti");

    // We don't support Intel's Protected Processor Inventory Number
    add_flag_if(false, "intel_ppin");

    add_flag_if(res_8000_0008.ebx & (1 << 6), "mba");
    add_flag_if(res_8000_001f.eax & (1 << 1), "sev");

    { // Speculative bug workarounds
      // We don't claim to have these bugs, so we don't need to claim these flags
      add_flag_if(res_7.edx & (1 << 31), "ssbd");
      add_flag_if(false, "ibrs");
      add_flag_if(false, "ibpb");

      add_flag_if(res_7.edx & (1 << 27), "stibp");

      add_flag_if(false, "ibrs_enhanced");
    }

    // We don't support Intel's TPR Shadow feature
    add_flag_if(false, "tpr_shadow");
    // Intel virtual NMI
    add_flag_if(false, "vnmi");
    // Intel FlexPriority
    add_flag_if(false, "flexpriority");
    // Intel Extended page table
    add_flag_if(false, "ept");
    // Intel virtual processor ID
    add_flag_if(false, "vpid");

    // Prefer VMMCall to VMCall
    add_flag_if(false, "vmmcall");
    // Intel extended page table access dirty bit
    add_flag_if(false, "ept_ad");
    add_flag_if(res_7.ebx & (1 << 0), "fsgsbase");
    add_flag_if(res_7.ebx & (1 << 1), "tsc_adjust");
    add_flag_if(res_7.ebx & (1 << 2), "sgx");
    add_flag_if(res_7.ebx & (1 << 3), "bmi1");
    add_flag_if(res_7.ebx & (1 << 4), "hle");
    add_flag_if(res_7.ebx & (1 << 5), "avx2");
    add_flag_if(res_7.ebx & (1 << 6), "fdp_excptn_only");
    add_flag_if(res_7.ebx & (1 << 7), "smep");
    add_flag_if(res_7.ebx & (1 << 8), "bmi2");
    add_flag_if(res_7.ebx & (1 << 9), "erms");
    add_flag_if(res_7.ebx & (1 << 10), "invpcid");
    add_flag_if(res_7.ebx & (1 << 11), "rtm");
    add_flag_if(res_7.ebx & (1 << 12), "rdt_m");
    add_flag_if(res_7.ebx & (1 << 13), "depc_fpu_cs_ds");
    add_flag_if(res_7.ebx & (1 << 14), "mpx");
    add_flag_if(res_7.ebx & (1 << 15), "rdt_a");
    add_flag_if(res_7.ebx & (1 << 16), "avx512f");
    add_flag_if(res_7.ebx & (1 << 17), "avx512dq");
    add_flag_if(res_7.ebx & (1 << 18), "rdseed");
    add_flag_if(res_7.ebx & (1 << 19), "adx");
    add_flag_if(res_7.ebx & (1 << 20), "smap");
    add_flag_if(res_7.ebx & (1 << 21), "avx512ifma");
    add_flag_if(res_7.ebx & (1 << 23), "clflushopt");
    add_flag_if(res_7.ebx & (1 << 24), "clwb");
    add_flag_if(res_7.ebx & (1 << 25), "intel_pt");
    add_flag_if(res_7.ebx & (1 << 26), "avx512pf");
    add_flag_if(res_7.ebx & (1 << 27), "avx512er");
    add_flag_if(res_7.ebx & (1 << 28), "avx512cd");
    add_flag_if(res_7.ebx & (1 << 29), "sha_ni");
    add_flag_if(res_7.ebx & (1 << 30), "avx512bw");
    add_flag_if(res_7.ebx & (1 << 31), "avx512vl");
    add_flag_if(res_d_1.eax & (1 << 0), "xsaveopt");
    add_flag_if(res_d_1.eax & (1 << 1), "xsavec");
    add_flag_if(res_d_1.eax & (1 << 2), "xgetbv1");
    add_flag_if(res_d_1.eax & (1 << 3), "xsaves");
    add_flag_if(res_d_1.eax & (1 << 4), "xfd");

    add_flag_if(res_7_1.eax & (1 << 5), "avx512_bf16");
    add_flag_if(res_8000_0008.ebx & (1 << 0), "clzero");
    add_flag_if(res_8000_0008.ebx & (1 << 1), "irperf");
    add_flag_if(res_8000_0008.ebx & (1 << 2), "xsaveerptr");

    // Intel digital thermal sensor
    add_flag_if(false, "dtherm");
    // Intel turbo boost
    add_flag_if(false, "ida");
    add_flag_if(res_6.eax & (1 << 2), "arat");
    // Power limit notification controls
    add_flag_if(false, "pln");
    // Intel package thermal status
    add_flag_if(false, "pts");

    // Intel Hardware P-state features
    add_flag_if(false, "hwp");
    add_flag_if(false, "hwp_notify");
    add_flag_if(false, "hwp_act_window");
    add_flag_if(false, "hwp_epp");
    add_flag_if(false, "hwp_pkg_req");

    add_flag_if(res_8000_000a.ebx & (1 << 0), "npt");
    add_flag_if(res_8000_000a.ebx & (1 << 1), "lbrv");
    add_flag_if(res_8000_000a.ebx & (1 << 2), "svm_lock");
    add_flag_if(res_8000_000a.ebx & (1 << 3), "nrip_save");
    add_flag_if(res_8000_000a.ebx & (1 << 4), "tsc_scale");
    add_flag_if(res_8000_000a.ebx & (1 << 5), "vmcb_clean");
    add_flag_if(res_8000_000a.ebx & (1 << 6), "flushbyasid");
    add_flag_if(res_8000_000a.ebx & (1 << 7), "decodeassists");
    add_flag_if(res_8000_000a.ebx & (1 << 10), "pausefilter");
    add_flag_if(res_8000_000a.ebx & (1 << 12), "pfthreshold");
    add_flag_if(res_8000_000a.ebx & (1 << 13), "avic");
    add_flag_if(res_8000_000a.ebx & (1 << 15), "v_vmsave_vmload");
    add_flag_if(res_8000_000a.ebx & (1 << 16), "vgif");

    add_flag_if(res_7.ecx & (1 << 1), "avx512vbmi");
    add_flag_if(res_7.ecx & (1 << 2), "umip");
    add_flag_if(res_7.ecx & (1 << 3), "pku");
    add_flag_if(res_7.ecx & (1 << 4), "ospke");
    add_flag_if(res_7.ecx & (1 << 5), "waitpkg");
    add_flag_if(res_7.ecx & (1 << 6), "avx512_vbmi2");
    add_flag_if(res_7.ecx & (1 << 8), "gfni");
    add_flag_if(res_7.ecx & (1 << 9), "vaes");
    add_flag_if(res_7.ecx & (1 << 10), "vpclmulqdq");
    add_flag_if(res_7.ecx & (1 << 11), "avx512_vnni");
    add_flag_if(res_7.ecx & (1 << 12), "avx512_bitalg");
    add_flag_if(res_7.ecx & (1 << 13), "tme");
    add_flag_if(res_7.ecx & (1 << 14), "avx512_vpopcntdq");
    add_flag_if(res_7.ecx & (1 << 16), "la57");
    add_flag_if(res_7.ecx & (1 << 22), "rdpid");
    add_flag_if(res_7.ecx & (1 << 24), "bus_lock_detect");
    add_flag_if(res_7.ecx & (1 << 25), "cldemote");
    add_flag_if(res_7.ecx & (1 << 27), "movdiri");
    add_flag_if(res_7.ecx & (1 << 28), "movdir64b");
    add_flag_if(res_7.ecx & (1 << 29), "enqcmd");
    add_flag_if(res_7.ecx & (1 << 30), "sqx_lc");

    add_flag_if(res_8000_0007.ebx & (1 << 0), "overflow_recov");
    add_flag_if(res_8000_0007.ebx & (1 << 1), "succor");
    add_flag_if(res_8000_0007.ebx & (1 << 3), "smca");

    add_flag_if(res_7.edx & (1 << 2), "avx512_4vnniw");
    add_flag_if(res_7.edx & (1 << 3), "avx512_4fmaps");
    add_flag_if(res_7.edx & (1 << 4), "fsrm");
    add_flag_if(res_7.edx & (1 << 8), "avx512_vp2intersect");
    add_flag_if(res_7.edx & (1 << 10), "md_clear");
    add_flag_if(res_7.edx & (1 << 14), "serialize");
    add_flag_if(res_7.edx & (1 << 18), "pconfig");
    add_flag_if(res_7.edx & (1 << 19), "arch_lbr");
    add_flag_if(res_7.edx & (1 << 20), "ibt");
    add_flag_if(res_7.edx & (1 << 22), "amx_bf16");
    add_flag_if(res_7.edx & (1 << 23), "avx512_fp16");
    add_flag_if(res_7.edx & (1 << 24), "amx_tile");
    add_flag_if(res_7.edx & (1 << 25), "amx_int8");
    add_flag_if(res_7.edx & (1 << 28), "flush_l1d");
    add_flag_if(res_7.edx & (1 << 29), "arch_capabilities");
  }

  // Get the cycle counter frequency from CPUID function 15h.
  auto res_15 = ctx->RunCPUIDFunction(0x15, 0);
  // Frequency is calculated in Hz, we need to convert it to megahertz since FEX is guaranteed to return >= 1Ghz.
  // x86 Bogomips is calculated as an equation based on the clock speed of the CPU (Or TSC) divided by 500k jiffies.
  // A `jiffie` is an internal metric for the kernel's `HZ` frequency which is usually between 100 and 1000.
  // Userspace can't query this HZ config option, so assume 1000Hz since that's common.
  // This gives a 1Ghz ARMv9.2 CPU a Bogomips of 2Ghz.
  constexpr double HzInMhz = 1000000.0;
  constexpr double HzInKhz = 1000.0;
  constexpr double BogomipsJiffyPrecision = 1'000.0;
  constexpr double BogoMipsDivisor = 500'000.0 / BogomipsJiffyPrecision;

  const double Frequency = 1.0 / (static_cast<double>(res_15.eax) / (static_cast<double>(res_15.ebx) * static_cast<double>(res_15.ecx)));
  const double FrequencyMhz = Frequency / HzInMhz;
  const double FrequencyKhz = Frequency / HzInKhz;
  const double Bogomips = FrequencyKhz / BogoMipsDivisor;
  // Generate the cycle counter frequency string in the format expected by cpuinfo.
  // ex: `4000.000`
  const auto FrequencyString = fextl::fmt::format("{:.3f}", FrequencyMhz);
  const auto BogomipsString = fextl::fmt::format("{:.2f}", Bogomips);

  for (int i = 0; i < CPUCores; ++i) {
    cpu_stream << "processor\t: " << i << std::endl; // Logical id
    cpu_stream << "vendor_id\t: " << vendorid.Str << std::endl;
    cpu_stream << "cpu family\t: " << Family << std::endl;
    cpu_stream << "model\t\t: " << (info.Model + (info.FamilyID >= 6 ? (info.ExModelID << 4) : 0)) << std::endl;
    ModelName modelname {};
    auto res_8000_0002 = ctx->RunCPUIDFunctionName(0x8000'0002, 0, i);
    auto res_8000_0003 = ctx->RunCPUIDFunctionName(0x8000'0003, 0, i);
    auto res_8000_0004 = ctx->RunCPUIDFunctionName(0x8000'0004, 0, i);
    modelname.cpuid_2 = res_8000_0002;
    modelname.cpuid_3 = res_8000_0003;
    modelname.cpuid_4 = res_8000_0004;
    modelname.null = 0;

    cpu_stream << "model name\t: " << modelname.Str << std::endl;
    cpu_stream << "stepping\t: " << info.Stepping << std::endl;
    cpu_stream << "microcode\t: 0x0" << std::endl;
    cpu_stream << "cpu MHz\t\t: " << FrequencyString << std::endl;
    cpu_stream << "cache size\t: 512 KB" << std::endl;
    cpu_stream << "physical id\t: 0" << std::endl;          // Socket id (always 0 for a single socket system)
    cpu_stream << "siblings\t: " << CPUCores << std::endl;  // Number of logical cores
    cpu_stream << "core id\t\t: " << i << std::endl;        // Physical id
    cpu_stream << "cpu cores\t: " << CPUCores << std::endl; // Number of physical cores
    cpu_stream << "apicid\t\t: " << i << std::endl;
    cpu_stream << "initial apicid\t: " << i << std::endl;
    cpu_stream << "fpu\t\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl;
    cpu_stream << "fpu_exception\t: " << (res_1.edx & (1 << 0) ? "yes" : "no") << std::endl;
    cpu_stream << "cpuid level\t: " << vendorid.id << std::endl;
    cpu_stream << "wp\t\t: yes" << std::endl;
    cpu_stream << "flags\t\t: " << flags_data.str() << std::endl;

    // We don't have any bugs, don't question it
    cpu_stream << "bugs\t\t: " << std::endl;
    cpu_stream << "bogomips\t: " << BogomipsString << std::endl;
    // These next four aren't necessarily correct
    cpu_stream << "TLB size\t: 2560 4K pages" << std::endl;
    cpu_stream << "clflush size\t: 64" << std::endl;
    cpu_stream << "cache_alignment\t : 64" << std::endl;

    // Cortex-A is 40 or 44 bits physical, and 48/52 virtual
    // Choose the lesser configuration
    cpu_stream << "address sizes\t: 40 bits physical, 48 bits virtual" << std::endl;

    // No power management but required to report
    cpu_stream << "power management: " << std::endl;

    cpu_stream << std::endl;
  }

  return cpu_stream.str();
}

EmulatedFDManager::EmulatedFDManager(FEXCore::Context::Context* ctx)
  : CTX {ctx}
  , ThreadsConfig {FEX::CPUInfo::CalculateNumberOfCPUs()} {
  FDReadCreators["/proc/cpuinfo"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t {
    // Only allow a single thread to initialize the cpu_info.
    // Jit in-case multiple threads try to initialize at once.
    // Check if deferred cpuinfo initialization has occured.
    std::call_once(cpu_info_initialized, [&]() { cpu_info = GenerateCPUInfo(ctx, ThreadsConfig); });

    int FD = GenTmpFD(pathname, flags);
    write(FD, cpu_info.data(), cpu_info.size());
    lseek(FD, 0, SEEK_SET);
    SealTmpFD(FD);
    return FD;
  };

  FDReadCreators["/proc/sys/kernel/osrelease"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags,
                                                     mode_t mode) -> int32_t {
    int FD = GenTmpFD(pathname, flags);
    uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion();
    char Tmp[64] {};
    snprintf(Tmp, sizeof(Tmp), "%d.%d.%d\n", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion),
             FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion));
    // + 1 to ensure null at the end
    write(FD, Tmp, strlen(Tmp) + 1);
    lseek(FD, 0, SEEK_SET);
    SealTmpFD(FD);
    return FD;
  };

  FDReadCreators["/proc/version"] = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t {
    int FD = GenTmpFD(pathname, flags);
    // UTS version NEEDS to be in a format that can pass to `date -d`
    // Format of this is Linux version <Release> (<Compile By>@<Compile Host>) (<Linux Compiler>) #<version> {SMP, PREEMPT, PREEMPT_RT} <UTS version>\n"
    const char kernel_version[] = "Linux version %d.%d.%d (FEX@FEX) (clang) #" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__ "\n";
    uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion();
    char Tmp[sizeof(kernel_version) + 64] {};
    snprintf(Tmp, sizeof(Tmp), kernel_version, FEX::HLE::SyscallHandler::KernelMajor(GuestVersion),
             FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion));
    // + 1 to ensure null at the end
    write(FD, Tmp, strlen(Tmp) + 1);
    lseek(FD, 0, SEEK_SET);
    SealTmpFD(FD);
    return FD;
  };

  // Wine reads this to ensure TSC is trusted by the kernel. Otherwise it falls back to maximum clock speed of the CPU cores.
  // Without this, games like Horizon Zero Dawn would run their physics in slow-motion.
  FDReadCreators["/sys/devices/system/clocksource/clocksource0/current_clocksource"] =
    [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t {
    int FD = GenTmpFD(pathname, flags);
    const char source[] = "tsc\n";
    // + 1 to ensure null at the end
    write(FD, source, strlen(source) + 1);
    lseek(FD, 0, SEEK_SET);
    SealTmpFD(FD);
    return FD;
  };

  auto NumCPUCores = [&](FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) -> int32_t {
    int FD = GenTmpFD(pathname, flags);
    write(FD, cpus_online.data(), cpus_online.size());
    lseek(FD, 0, SEEK_SET);
    SealTmpFD(FD);
    return FD;
  };

  FDReadCreators["/sys/devices/system/cpu/online"] = NumCPUCores;
  FDReadCreators["/sys/devices/system/cpu/present"] = NumCPUCores;

  fextl::string procAuxv = fextl::fmt::format("/proc/{}/auxv", getpid());

  FDReadCreators[procAuxv] = &EmulatedFDManager::ProcAuxv;
  FDReadCreators["/proc/self/auxv"] = &EmulatedFDManager::ProcAuxv;

  if (ThreadsConfig > 1) {
    cpus_online = fextl::fmt::format("0-{}", ThreadsConfig - 1);
  } else {
    cpus_online = "0";
  }
}

EmulatedFDManager::~EmulatedFDManager() {}

int32_t EmulatedFDManager::Open(const char* pathname, int flags, uint32_t mode) {
  auto Creator = FDReadCreators.end();
  if (pathname) {
    Creator = FDReadCreators.find(pathname);
  }

  if (Creator == FDReadCreators.end()) {
    return -1;
  }

  return Creator->second(CTX, AT_FDCWD, pathname, flags, mode);
}

int32_t EmulatedFDManager::ProcAuxv(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode) {
  const auto [auxvBase, auxvSize] = FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv();
  if (auxvBase == 0) {
    LogMan::Msg::DFmt("Failed to get Auxv stack address");
    return -1;
  }

  int FD = GenTmpFD(pathname, flags);
  write(FD, (void*)auxvBase, auxvSize);
  lseek(FD, 0, SEEK_SET);
  SealTmpFD(FD);
  return FD;
}
} // namespace FEX::EmulatedFile


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/EmulatedFiles/EmulatedFiles.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
$end_info$
*/

#pragma once
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/string.h>

#include <cstdint>
#include <functional>
#include <sys/types.h>

namespace FEXCore::Context {
class Context;
}

namespace FEX::EmulatedFile {
class EmulatedFDManager {
public:
  EmulatedFDManager(FEXCore::Context::Context* ctx);
  ~EmulatedFDManager();
  int32_t Open(const char* pathname, int flags, uint32_t mode);

private:
  FEXCore::Context::Context* CTX;
  fextl::string cpus_online {};
  std::once_flag cpu_info_initialized {};
  fextl::string cpu_info {};
  using FDReadStringFunc = std::function<int32_t(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode)>;
  fextl::unordered_map<fextl::string, FDReadStringFunc> FDReadCreators;

  static int32_t ProcAuxv(FEXCore::Context::Context* ctx, int32_t fd, const char* pathname, int32_t flags, mode_t mode);
  const uint32_t ThreadsConfig;
};
} // namespace FEX::EmulatedFile


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/FaultSafeUserMemAccess.cpp
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/Syscalls.h"

namespace FEX::HLE::FaultSafeUserMemAccess {
#ifdef ARCHITECTURE_arm64
__attribute__((naked)) size_t CopyFromUser(void* Dest, const void* Src, size_t Size) {
  __asm volatile(R"(
  // Early exit if a memcpy of size zero.
  cbz x2, 2f;

  1:
  .globl CopyFromUser_FaultInst
  CopyFromUser_FaultInst:
    ldrb w3, [x1], 1; // <- This line can fault.
    strb w3, [x0], 1;
    sub x2, x2, 1;
    cbnz x2, 1b;
2:
    mov x0, 0;
    ret;
  )" ::
                   : "memory");
}

__attribute__((naked)) size_t CopyToUser(void* Dest, const void* Src, size_t Size) {
  __asm volatile(R"(
  // Early exit if a memcpy of size zero.
  cbz x2, 2f;

  1:
    ldrb w3, [x1], 1;
  .globl CopyToUser_FaultInst
  CopyToUser_FaultInst:
    strb w3, [x0], 1; // <- This line can fault.
    sub x2, x2, 1;
    cbnz x2, 1b;
2:
    mov x0, 0;
    ret;
  )" ::
                   : "memory");
}

extern "C" uint64_t CopyFromUser_FaultInst;
void* const CopyFromUser_FaultLocation = &CopyFromUser_FaultInst;

extern "C" uint64_t CopyToUser_FaultInst;
void* const CopyToUser_FaultLocation = &CopyToUser_FaultInst;

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED && defined(ARCHITECTURE_arm64)
__attribute__((naked)) bool VerifyIsReadableImpl(const void* Src, size_t Size) {
  __asm volatile(R"(
  // Early exit if size is zero.
  cbz x1, 2f;

  1:
  .globl UserReadable_FaultInst
  UserReadable_FaultInst:
  ldrb wzr, [x0], 1; // <- This line can fault.
  sub x1, x1, 1;
  cbnz x1, 1b;

  2:
  mov x0, 1;
  ret;
  )" ::
                   : "memory");
}

__attribute__((naked)) bool VerifyIsOnlyWritable(void* Src, size_t Size) {
  __asm volatile(R"(
  // Early exit if size is zero.
  cbz x1, 2f;

  1:
  ldrb w2, [x0];
  .globl UserWritable_FaultInst
  UserWritable_FaultInst:
  strb w2, [x0], 1; // <- This line can fault.

  sub x1, x1, 1;
  cbnz x1, 1b;

  2:
  mov x0, 1;
  ret;
  )" ::
                   : "memory");
}

__attribute__((naked)) bool VerifyIsStringReadableMaxSizeImpl(const char* Src, size_t MaxSize) {
  __asm volatile(R"(
  1:
  cbz x1, 2f;

  .globl UserStringReadable_FaultInst
  UserStringReadable_FaultInst:
  ldrb w2, [x0], 1; //< This line can fault.
  sub x1, x1, 1;
  cbnz x2, 1b;

  2:
  mov x0, 1;
  ret;
  )" ::
                   : "memory");
}

void VerifyIsReadable(const void* Src, size_t Size) {
  LOGMAN_THROW_A_FMT(VerifyIsReadableImpl(Src, Size), "EFAULT needs readable!");
}

void VerifyIsStringReadable(const char* Src) {
  LOGMAN_THROW_A_FMT(VerifyIsStringReadableMaxSizeImpl(Src, ~0ULL), "EFAULT needs string readable!");
}

void VerifyIsStringReadableMaxSize(const char* Src, size_t MaxSize) {
  LOGMAN_THROW_A_FMT(VerifyIsStringReadableMaxSizeImpl(Src, MaxSize), "EFAULT needs string readable!");
}

void VerifyIsReadableOrNull(const void* Src, size_t Size) {
  if (Src == nullptr) {
    return;
  }

  LOGMAN_THROW_A_FMT(VerifyIsReadableImpl(Src, Size), "EFAULT needs readable!");
}

void VerifyIsWritable(void* Src, size_t Size) {
  ///< Checking if writable needs to check if readable first.
  VerifyIsReadable(Src, Size);

  LOGMAN_THROW_A_FMT(VerifyIsOnlyWritable(Src, Size), "EFAULT needs writable!");
}

void VerifyIsWritableOrNull(void* Src, size_t Size) {
  if (Src == nullptr) {
    return;
  }

  ///< Checking if writable needs to check if readable first.
  VerifyIsReadable(Src, Size);
  LOGMAN_THROW_A_FMT(VerifyIsOnlyWritable(Src, Size), "EFAULT needs writable!");
}

extern "C" uint64_t UserReadable_FaultInst;
void* const UserReadable_FaultLocation = &UserReadable_FaultInst;

extern "C" uint64_t UserWritable_FaultInst;
void* const UserWritable_FaultLocation = &UserWritable_FaultInst;

extern "C" uint64_t UserStringReadable_FaultInst;
void* const UserStringReadable_FaultLocation = &UserStringReadable_FaultInst;
#endif

bool IsFaultLocation(uint64_t PC) {
  bool IsMemcpyFault = false;
  IsMemcpyFault |= reinterpret_cast<void*>(PC) == CopyToUser_FaultLocation;
  IsMemcpyFault |= reinterpret_cast<void*>(PC) == CopyFromUser_FaultLocation;
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED && defined(ARCHITECTURE_arm64)
  IsMemcpyFault |= reinterpret_cast<void*>(PC) == UserReadable_FaultLocation;
  IsMemcpyFault |= reinterpret_cast<void*>(PC) == UserWritable_FaultLocation;
  IsMemcpyFault |= reinterpret_cast<void*>(PC) == UserStringReadable_FaultLocation;
#endif
  return IsMemcpyFault;
}

#else
size_t CopyFromUser(void* Dest, const void* Src, size_t Size) {
  memcpy(Dest, Src, Size);
  return Size;
}

size_t CopyToUser(void* Dest, const void* Src, size_t Size) {
  memcpy(Dest, Src, Size);
  return Size;
}

bool IsFaultLocation(uint64_t PC) {
  return false;
}
#endif
} // namespace FEX::HLE::FaultSafeUserMemAccess


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
desc: Rootfs overlay logic
$end_info$
*/

#include "Common/Config.h"
#include "Common/FDUtils.h"
#include "Common/JSONPool.h"

#include "FEXCore/Config/Config.h"
#include "LinuxSyscalls/FileManagement.h"
#include "LinuxSyscalls/EmulatedFiles/EmulatedFiles.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/SymlinkChecks.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <algorithm>
#include <errno.h>
#include <cstring>
#include <linux/openat2.h>
#include <fcntl.h>
#include <filesystem>
#include <optional>
#include <stdio.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/xattr.h>
#include <syscall.h>
#include <system_error>
#include <unistd.h>
#include <utility>

#include <tiny-json.h>

namespace FEX::HLE {
bool FileManager::RootFSPathExists(const char* Filepath) const {
  LOGMAN_THROW_A_FMT(Filepath && Filepath[0] == '/', "Filepath needs to be absolute");
  return FHU::Filesystem::ExistsAt(RootFSFD, Filepath + 1);
}

void FileManager::LoadThunkDatabase(fextl::unordered_map<fextl::string, ThunkDBObject>& ThunkDB, bool Global) {
  auto ThunkDBPath = FEXCore::Config::GetConfigDirectory(Global) + "ThunksDB.json";
  fextl::vector<char> FileData;
  if (FEXCore::FileLoading::LoadFile(FileData, ThunkDBPath)) {

    // If the thunksDB file exists then we need to check if the rootfs supports multi-arch or not.
    const bool RootFSIsMultiarch = RootFSPathExists("/usr/lib/x86_64-linux-gnu/") || RootFSPathExists("/usr/lib/i386-linux-gnu/");

    fextl::vector<fextl::string> PathPrefixes {};
    if (RootFSIsMultiarch) {
      // Multi-arch debian distros have a fairly complex arrangement of filepaths.
      // These fractal out to the combination of library prefixes with arch suffixes.
      constexpr static std::array<std::string_view, 4> LibPrefixes = {
        "/usr/lib",
        "/usr/local/lib",
        "/lib",
        "/usr/lib/pressure-vessel/overrides/lib",
      };

      // We only need to generate 32-bit or 64-bit depending on the operating mode.
      const auto ArchPrefix = Is64BitMode() ? "x86_64-linux-gnu" : "i386-linux-gnu";

      for (auto Prefix : LibPrefixes) {
        PathPrefixes.emplace_back(fextl::fmt::format("{}/{}", Prefix, ArchPrefix));
      }
    } else {
      // Non multi-arch supporting distros like Fedora and Debian have a much more simple layout.
      // lib/ folders refer to 32-bit library folders.
      // li64/ folders refer to 64-bit library folders.
      constexpr static std::array<std::string_view, 4> LibPrefixes = {
        "/usr",
        "/usr/local",
        "", // root, the '/' will be appended in the next step.
        "/usr/lib/pressure-vessel/overrides",
      };

      // We only need to generate 32-bit or 64-bit depending on the operating mode.
      const auto ArchPrefix = Is64BitMode() ? "lib64" : "lib";

      for (auto Prefix : LibPrefixes) {
        PathPrefixes.emplace_back(fextl::fmt::format("{}/{}", Prefix, ArchPrefix));
      }
    }

    FEX::JSON::JsonAllocator Pool {};
    const json_t* json = FEX::JSON::CreateJSON(FileData, Pool);

    if (!json) {
      ERROR_AND_DIE_FMT("Failed to parse JSON from ThunkDB file '{}' - invalid JSON format", ThunkDBPath);
    }

    const json_t* DB = json_getProperty(json, "DB");
    if (!DB || JSON_OBJ != json_getType(DB)) {
      return;
    }

    auto HomeDirectory = FEX::Config::GetHomeDirectory();

    for (const json_t* Library = json_getChild(DB); Library != nullptr; Library = json_getSibling(Library)) {
      // Get the user defined name for the library
      const char* LibraryName = json_getName(Library);
      auto DBObject = ThunkDB.insert_or_assign(LibraryName, ThunkDBObject {}).first;

      // Walk the libraries items to get the data
      for (const json_t* LibraryItem = json_getChild(Library); LibraryItem != nullptr; LibraryItem = json_getSibling(LibraryItem)) {
        std::string_view ItemName = json_getName(LibraryItem);

        if (ItemName == "Library") {
          // "Library": "libGL-guest.so"
          DBObject->second.LibraryName = json_getValue(LibraryItem);
        } else if (ItemName == "Depends") {
          jsonType_t PropertyType = json_getType(LibraryItem);
          if (PropertyType == JSON_TEXT) {
            DBObject->second.Depends.emplace(json_getValue(LibraryItem));
          } else if (PropertyType == JSON_ARRAY) {
            for (const json_t* Depend = json_getChild(LibraryItem); Depend != nullptr; Depend = json_getSibling(Depend)) {
              DBObject->second.Depends.emplace(json_getValue(Depend));
            }
          }
        } else if (ItemName == "Overlay") {
          auto AddWithReplacement = [HomeDirectory, &PathPrefixes](ThunkDBObject& DBObject, std::string_view LibraryItem) {
            // Walk through template string and fill in prefixes from right to left

            using namespace std::string_view_literals;
            const std::pair PrefixHome {"@HOME@"sv, LibraryItem.find("@HOME@")};
            const std::pair PrefixLib {"@PREFIX_LIB@"sv, LibraryItem.find("@PREFIX_LIB@")};

            fextl::string::size_type PrefixPositions[] = {
              PrefixHome.second,
              PrefixLib.second,
            };
            // Sort offsets in descending order to enable safe in-place replacement
            std::sort(std::begin(PrefixPositions), std::end(PrefixPositions), std::greater<> {});

            for (const auto& LibPrefix : PathPrefixes) {
              fextl::string Replacement(LibraryItem);
              for (auto PrefixPos : PrefixPositions) {
                if (PrefixPos == fextl::string::npos) {
                  continue;
                } else if (PrefixPos == PrefixHome.second) {
                  Replacement.replace(PrefixPos, PrefixHome.first.size(), HomeDirectory);
                } else if (PrefixPos == PrefixLib.second) {
                  Replacement.replace(PrefixPos, PrefixLib.first.size(), LibPrefix);
                }
              }
              DBObject.Overlays.emplace_back(std::move(Replacement));

              if (PrefixLib.second == fextl::string::npos) {
                // Don't repeat for other LibPrefixes entries if the prefix wasn't used
                break;
              }
            }
          };

          jsonType_t PropertyType = json_getType(LibraryItem);
          if (PropertyType == JSON_TEXT) {
            AddWithReplacement(DBObject->second, json_getValue(LibraryItem));
          } else if (PropertyType == JSON_ARRAY) {
            for (const json_t* Overlay = json_getChild(LibraryItem); Overlay != nullptr; Overlay = json_getSibling(Overlay)) {
              AddWithReplacement(DBObject->second, json_getValue(Overlay));
            }
          }
        }
      }
    }
  }
}

FileManager::FileManager(FEXCore::Context::Context* ctx)
  : EmuFD {ctx} {
  const auto& ThunkConfigFile = ThunkConfig();

  // We try to load ThunksDB from:
  // - FEX global config
  // - FEX user config
  // - Defined ThunksConfig option
  // - Steam AppConfig Global
  // - AppConfig Global
  // - Steam AppConfig Local
  // - AppConfig Local
  // - AppConfig override
  // This doesn't support the classic thunks interface.

  const auto& AppName = AppConfigName();
  fextl::vector<fextl::string> ConfigPaths {
    FEXCore::Config::GetConfigFileLocation(true),
    FEXCore::Config::GetConfigFileLocation(false),
    ThunkConfigFile,
  };

  auto SteamID = getenv("SteamAppId");
  if (SteamID) {
    // If a SteamID exists then let's search for Steam application configs as well.
    // We want to key off both the SteamAppId number /and/ the executable since we may not want to thunk all binaries.
    fextl::string SteamAppName = fextl::fmt::format("Steam_{}_{}", SteamID, AppName);

    // Steam application configs interleaved with non-steam for priority sorting.
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(SteamAppName, true));
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(AppName, true));
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(SteamAppName, false));
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(AppName, false));
  } else {
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(AppName, true));
    ConfigPaths.emplace_back(FEXCore::Config::GetApplicationConfig(AppName, false));
  }

  const char* AppConfig = getenv("FEX_APP_CONFIG");
  if (AppConfig) {
    ConfigPaths.emplace_back(AppConfig);
  }

  if (!LDPath().empty()) {
    RootFSFD = open(LDPath().c_str(), O_DIRECTORY | O_PATH | O_CLOEXEC);
    if (RootFSFD == -1) {
      RootFSFD = AT_FDCWD;
    } else {
      TrackFEXFD(RootFSFD);
    }
  }

  fextl::unordered_map<fextl::string, ThunkDBObject> ThunkDB;
  LoadThunkDatabase(ThunkDB, true);
  LoadThunkDatabase(ThunkDB, false);

  for (const auto& Path : ConfigPaths) {
    fextl::vector<char> FileData;
    if (FEXCore::FileLoading::LoadFile(FileData, Path)) {
      FEX::JSON::JsonAllocator Pool {};

      // If a thunks DB property exists then we pull in data from the thunks database
      const json_t* json = FEX::JSON::CreateJSON(FileData, Pool);
      if (!json) {
        continue;
      }

      const json_t* ThunksDB = json_getProperty(json, "ThunksDB");
      if (!ThunksDB) {
        continue;
      }

      for (const json_t* Item = json_getChild(ThunksDB); Item != nullptr; Item = json_getSibling(Item)) {
        const char* LibraryName = json_getName(Item);
        bool LibraryEnabled = json_getInteger(Item) != 0;
        // If the library is enabled then find it in the DB
        auto DBObject = ThunkDB.find(LibraryName);
        if (DBObject != ThunkDB.end()) {
          DBObject->second.Enabled = LibraryEnabled;
        }
      }
    }
  }

  // Now that we loaded the thunks object, walk through and ensure dependencies are enabled as well
  auto ThunkGuestPath = ThunkGuestLibs();
  while (ThunkGuestPath.ends_with('/')) {
    ThunkGuestPath.pop_back();
  }
  if (!Is64BitMode()) {
    ThunkGuestPath += "_32";
  }
  for (const auto& DBObject : ThunkDB) {
    if (!DBObject.second.Enabled) {
      continue;
    }

    // Recursively add paths for this thunk library and its dependencies to ThunkOverlays.
    // Using a local struct for this is slightly less ugly than using self-capturing lambdas
    struct {
      decltype(FileManager::ThunkOverlays)& ThunkOverlays;
      decltype(ThunkDB)& DB;
      const fextl::string& ThunkGuestPath;
      bool Is64BitMode;

      void SetupOverlay(const ThunkDBObject& DBDepend) {
        auto ThunkPath = fextl::fmt::format("{}/{}", ThunkGuestPath, DBDepend.LibraryName);
        if (!FHU::Filesystem::Exists(ThunkPath)) {
          if (!Is64BitMode) {
            // Guest libraries not existing is expected since not all libraries are thunked on 32-bit
            return;
          }
          ERROR_AND_DIE_FMT("Requested thunking via guest library \"{}\" that does not exist", ThunkPath);
        }

        for (const auto& Overlay : DBDepend.Overlays) {
          // Direct full path in guest RootFS to our overlay file
          ThunkOverlays.emplace(Overlay, ThunkPath);
        }
      };

      void InsertDependencies(const fextl::unordered_set<fextl::string>& Depends) {
        for (const auto& Depend : Depends) {
          auto& DBDepend = DB.at(Depend);
          if (DBDepend.Enabled) {
            continue;
          }

          SetupOverlay(DBDepend);

          // Mark enabled and recurse into dependencies
          DBDepend.Enabled = true;
          InsertDependencies(DBDepend.Depends);
        }
      };
    } DBObjectHandler {ThunkOverlays, ThunkDB, ThunkGuestPath, Is64BitMode()};

    DBObjectHandler.SetupOverlay(DBObject.second);
    DBObjectHandler.InsertDependencies(DBObject.second.Depends);
  }

  if (false) {
    // Useful for debugging
    if (ThunkOverlays.size()) {
      LogMan::Msg::IFmt("Thunk Overlays:");
      for (const auto& [Overlay, ThunkPath] : ThunkOverlays) {
        LogMan::Msg::IFmt("\t{} -> {}", Overlay, ThunkPath);
      }
    }
  }

  // Keep an fd open for /proc, to bypass chroot-style sandboxes
  ProcFD = open("/proc", O_RDONLY | O_CLOEXEC);
  if (ProcFD != -1) {
    // Track the st_dev of /proc, to check for inode equality
    struct stat Buffer;
    auto Result = fstat(ProcFD, &Buffer);
    if (Result >= 0) {
      ProcFSDev = Buffer.st_dev;
    }
  } else {
    LogMan::Msg::EFmt("Couldn't open `/proc`. Is ProcFS mounted? FEX won't be able to track FD conflicts");
  }

  UpdatePID(::getpid());
}

FileManager::~FileManager() {
  close(RootFSFD);
}

size_t FileManager::GetRootFSPrefixLen(const char* pathname, size_t len, bool AliasedOnly) const {
  if (len < 2 ||            // If no pathname or root
      pathname[0] != '/') { // If we are getting root
    return 0;
  }

  const auto& RootFSPath = LDPath();
  if (RootFSPath.empty()) { // If RootFS doesn't exist
    return 0;
  }

  auto RootFSLen = RootFSPath.length();
  if (RootFSPath.ends_with("/")) {
    RootFSLen -= 1;
  }

  if (RootFSLen > len) {
    return 0;
  }

  if (memcmp(pathname, RootFSPath.c_str(), RootFSLen) || (len > RootFSLen && pathname[RootFSLen] != '/')) {
    return 0; // If the path is not within the RootFS
  }

  if (AliasedOnly) {
    fextl::string Path(pathname, len); // Need to nul-terminate so copy

    struct stat HostStat {};
    struct stat RootFSStat {};
    if (lstat(Path.c_str(), &RootFSStat)) {
      LogMan::Msg::DFmt("GetRootFSPrefixLen: lstat on RootFS path failed: {}", std::string_view(pathname, len));
      return 0; // RootFS path does not exist?
    }
    if (lstat(Path.c_str() + RootFSLen, &HostStat)) {
      return 0; // Host path does not exist or not accessible
    }
    // Note: We do not check st_dev, since the RootFS might be
    // an overlayfs mount that changes it. This means there could
    // be false positives. However, since we check the size too,
    // this is highly unlikely (an overlaid file would need to
    // have the same exact size and coincidentally the same
    // inode number as on the host, which is implausible for things
    // like binaries and libraries).
    if (RootFSStat.st_size != HostStat.st_size || RootFSStat.st_ino != HostStat.st_ino || RootFSStat.st_mode != HostStat.st_mode) {
      return 0; // Host path is a different file
    }
  }

  return RootFSLen;
}

ssize_t FileManager::StripRootFSPrefix(char* pathname, ssize_t len, bool leaky) const {
  if (len < 0) {
    return len;
  }

  auto Prefix = GetRootFSPrefixLen(pathname, len, false);
  if (Prefix == 0) {
    return len;
  }

  if (Prefix == len) {
    if (leaky) {
      // Getting the root, without a trailing /. This is a hack pressure-vessel uses to get the FEX RootFS,
      // so we have to leak it here...
      LogMan::Msg::DFmt("Leaking RootFS path for pressure-vessel");
      return len;
    } else {
      ::strcpy(pathname, "/");
      return 1;
    }
  }

  ::memmove(pathname, pathname + Prefix, len - Prefix);
  pathname[len - Prefix] = '\0';

  return len - Prefix;
}

fextl::string FileManager::GetHostPath(fextl::string& Path, bool AliasedOnly) const {
  auto Prefix = GetRootFSPrefixLen(Path.c_str(), Path.length(), AliasedOnly);

  if (Prefix == 0) {
    return {};
  }

  auto ret = Path.substr(Prefix);
  if (ret.empty()) { // Getting the root
    ret = "/";
  }

  return ret;
}

fextl::string FileManager::GetEmulatedPath(const char* pathname, bool FollowSymlink) const {
  if (!pathname ||                  // If no pathname
      pathname[0] != '/' ||         // If relative
      strcmp(pathname, "/") == 0) { // If we are getting root
    return {};
  }

  auto thunkOverlay = ThunkOverlays.find(pathname);
  if (thunkOverlay != ThunkOverlays.end()) {
    return thunkOverlay->second;
  }

  const auto& RootFSPath = LDPath();
  if (RootFSPath.empty()) { // If RootFS doesn't exist
    return {};
  }

  fextl::string Path = RootFSPath + pathname;
  if (FollowSymlink) {
    char Filename[PATH_MAX];
    while (FEX::HLE::IsSymlink(AT_FDCWD, Path.c_str())) {
      auto SymlinkSize = FEX::HLE::GetSymlink(AT_FDCWD, Path.c_str(), Filename, PATH_MAX - 1);
      if (SymlinkSize > 0 && Filename[0] == '/') {
        Path = RootFSPath;
        Path += std::string_view(Filename, SymlinkSize);
      } else {
        break;
      }
    }
  }
  return Path;
}

FileManager::EmulatedFDPathResult
FileManager::GetEmulatedFDPath(int dirfd, const char* pathname, bool FollowSymlink, FDPathTmpData& TmpFilename) const {
  constexpr auto NoEntry = EmulatedFDPathResult {-1, nullptr};

  if (!pathname) {
    // No pathname.
    return NoEntry;
  }

  if (pathname[0] == '/') {
    // If the path is absolute then dirfd is ignored.
    dirfd = AT_FDCWD;
  }

  if (pathname[0] != '/' || // If relative
      pathname[1] == 0 ||   // If we are getting root
      dirfd != AT_FDCWD) {  // If dirfd isn't special FDCWD
    return NoEntry;
  }

  auto thunkOverlay = ThunkOverlays.find(pathname);
  if (thunkOverlay != ThunkOverlays.end()) {
    return EmulatedFDPathResult {AT_FDCWD, thunkOverlay->second.c_str()};
  }

  if (RootFSFD == AT_FDCWD) {
    // If RootFS doesn't exist
    return NoEntry;
  }

  // Starting subpath is the pathname passed in.
  const char* SubPath = pathname;

  // Current index for the temporary path to use.
  uint32_t CurrentIndex {};

  // The two temporary paths.
  const std::array<char*, 2> TmpPaths = {
    TmpFilename[0],
    TmpFilename[1],
  };

  if (FollowSymlink) {
    // Check if the combination of RootFS FD and subpath with the front '/' stripped off is a symlink.
    bool HadAtLeastOne {};
    struct stat Buffer {};
    for (;;) {
      // We need to check if the filepath exists and is a symlink.
      // If the initial filepath doesn't exist then early exit.
      // If it did exist at some state then trace it all all the way to the final link.
      int Result = fstatat(RootFSFD, &SubPath[1], &Buffer, AT_SYMLINK_NOFOLLOW);
      if (Result != 0 && errno == ENOENT && !HadAtLeastOne) {
        // Initial file didn't exist at all
        return NoEntry;
      }

      const bool IsLink = Result == 0 && S_ISLNK(Buffer.st_mode);

      HadAtLeastOne = true;

      if (IsLink) {
        // Choose the current temporary working path.
        auto CurrentTmp = TmpPaths[CurrentIndex];

        // Get the symlink of RootFS FD + stripped subpath.
        auto SymlinkSize = FEX::HLE::GetSymlink(RootFSFD, &SubPath[1], CurrentTmp, PATH_MAX - 1);

        // This might be a /proc symlink into the RootFS, so strip it in that case.
        SymlinkSize = StripRootFSPrefix(CurrentTmp, SymlinkSize, false);

        if (SymlinkSize > 1 && CurrentTmp[0] == '/') {
          // If the symlink is absolute and not the root:
          // 1) Zero terminate it.
          // 2) Set the path as our current subpath.
          // 3) Switch to the next temporary index. (We don't want to overwrite the current one on the next loop iteration).
          // 4) Run the loop again.
          CurrentTmp[SymlinkSize] = 0;
          SubPath = CurrentTmp;
          CurrentIndex ^= 1;
        } else {
          // If the path wasn't a symlink or wasn't absolute.
          // 1) Break early, returning the previous found result.
          // 2) If first iteration then we return `pathname`.
          break;
        }
      } else {
        break;
      }
    }
  }

  // Return the pair of rootfs FD plus relative subpath by stripping off the front '/'
  return EmulatedFDPathResult {RootFSFD, &SubPath[1]};
}

///< Returns true if the pathname is self and symlink flags are set NOFOLLOW.
bool FileManager::IsSelfNoFollow(const char* Pathname, int flags) const {
  const bool Follow = (flags & AT_SYMLINK_NOFOLLOW) == 0;
  if (Follow) {
    // If we are following the self symlink then we don't care about this.
    return false;
  }

  if (!Pathname) {
    return false;
  }

  char PidSelfPath[50];
  snprintf(PidSelfPath, sizeof(PidSelfPath), "/proc/%i/exe", CurrentPID);

  return strcmp(Pathname, "/proc/self/exe") == 0 || strcmp(Pathname, "/proc/thread-self/exe") == 0 || strcmp(Pathname, PidSelfPath) == 0;
}

std::optional<std::string_view> FileManager::GetSelf(const char* Pathname) const {
  if (!Pathname) {
    return std::nullopt;
  }

  char PidSelfPath[50];
  snprintf(PidSelfPath, sizeof(PidSelfPath), "/proc/%i/exe", CurrentPID);

  if (strcmp(Pathname, "/proc/self/exe") == 0 || strcmp(Pathname, "/proc/thread-self/exe") == 0 || strcmp(Pathname, PidSelfPath) == 0) {
    return Filename();
  }

  return Pathname;
}

static bool ShouldSkipOpenInEmu(int flags) {
  if (flags & O_CREAT) {
    // If trying to create a file then skip checking in emufd
    return true;
  }

  if (flags & O_WRONLY) {
    // If the file is trying to be open with write permissions then skip.
    return true;
  }

  if (flags & O_APPEND) {
    // If the file is trying to be open with append options then skip.
    return true;
  }

  return false;
}

bool FileManager::ReplaceEmuFd(int fd, int flags, uint32_t mode) {
  char Tmp[PATH_MAX + 1];

  if (fd < 0) {
    return false;
  }

  // Get the path of the file we just opened
  auto PathLength = FEX::get_fdpath(fd, Tmp);
  if (PathLength == -1) {
    return false;
  }
  Tmp[PathLength] = '\0';

  // And try to open via EmuFD
  auto EmuFd = EmuFD.Open(Tmp, flags, mode);
  if (EmuFd == -1) {
    return false;
  }

  // If we succeeded, swap out the fd
  ::dup2(EmuFd, fd);
  ::close(EmuFd);
  return true;
}

uint64_t FileManager::Open(const char* pathname, int flags, uint32_t mode) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;
  int fd = -1;

  if (!ShouldSkipOpenInEmu(flags)) {
    FDPathTmpData TmpFilename;
    auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename);
    if (Path.FD != -1) {
      FEX::HLE::open_how how = {
        .flags = (uint64_t)flags,
        .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this
        .resolve = (Path.FD == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT,    // AT_FDCWD means it's a thunk and not via RootFS
      };
      fd = ::syscall(SYSCALL_DEF(openat2), Path.FD, Path.Path, &how, sizeof(how));

      if (fd == -1 && errno == EXDEV) {
        // This means a magic symlink (/proc/foo) was involved. In this case we
        // just punt and do the access without RESOLVE_IN_ROOT.
        fd = ::syscall(SYSCALL_DEF(openat), Path.FD, Path.Path, flags, mode);
      }
    }

    // Open through RootFS failed (probably nonexistent), so open directly.
    if (fd == -1) {
      fd = ::open(SelfPath, flags, mode);
    }

    ReplaceEmuFd(fd, flags, mode);
  } else {
    fd = ::open(SelfPath, flags, mode);
  }

  return fd;
}

uint64_t FileManager::Close(int fd) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  if (CheckIfFDInTrackedSet(fd)) {
    LogMan::Msg::EFmt("{} closing FEX FD {}", __func__, fd);
    RemoveFEXFD(fd);
  }
#endif

  return ::close(fd);
}

uint64_t FileManager::CloseRange(unsigned int first, unsigned int last, unsigned int flags) {
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  if (!(flags & CLOSE_RANGE_CLOEXEC) && CheckIfFDRangeInTrackedSet(first, last)) {
    LogMan::Msg::EFmt("{} closing FEX FDs in range ({}, {})", __func__, first, last);
    RemoveFEXFDRange(first, last);
  }
#endif

  return ::syscall(SYSCALL_DEF(close_range), first, last, flags);
}

uint64_t FileManager::Stat(const char* pathname, void* buf) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  // Stat follows symlinks
  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::fstatat(Path.FD, Path.Path, reinterpret_cast<struct stat*>(buf), 0);
    if (Result != -1) {
      return Result;
    }
  }
  return ::stat(SelfPath, reinterpret_cast<struct stat*>(buf));
}

uint64_t FileManager::Lstat(const char* pathname, void* buf) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  // lstat does not follow symlinks
  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::fstatat(Path.FD, Path.Path, reinterpret_cast<struct stat*>(buf), AT_SYMLINK_NOFOLLOW);
    if (Result != -1) {
      return Result;
    }
  }

  return ::lstat(pathname, reinterpret_cast<struct stat*>(buf));
}

uint64_t FileManager::Access(const char* pathname, [[maybe_unused]] int mode) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  // Access follows symlinks
  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, true, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::faccessat(Path.FD, Path.Path, mode, 0);
    if (Result != -1) {
      return Result;
    }
  }
  return ::access(SelfPath, mode);
}

uint64_t FileManager::FAccessat(int dirfd, const char* pathname, int mode) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dirfd, SelfPath, true, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::syscall(SYSCALL_DEF(faccessat), Path.FD, Path.Path, mode);
    if (Result != -1) {
      return Result;
    }
  }

  return ::syscall(SYS_faccessat, dirfd, SelfPath, mode);
}

uint64_t FileManager::FAccessat2(int dirfd, const char* pathname, int mode, int flags) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::syscall(SYSCALL_DEF(faccessat2), Path.FD, Path.Path, mode, flags);
    if (Result != -1) {
      return Result;
    }
  }

  return ::syscall(SYSCALL_DEF(faccessat2), dirfd, SelfPath, mode, flags);
}

uint64_t FileManager::Readlink(const char* pathname, char* buf, size_t bufsiz) {
  // calculate the non-self link to exe
  // Some executables do getpid, stat("/proc/$pid/exe")
  char PidSelfPath[50];
  snprintf(PidSelfPath, 50, "/proc/%i/exe", CurrentPID);

  if (strcmp(pathname, "/proc/self/exe") == 0 || strcmp(pathname, "/proc/thread-self/exe") == 0 || strcmp(pathname, PidSelfPath) == 0) {
    const auto& App = Filename();
    strncpy(buf, App.c_str(), bufsiz);
    return std::min(bufsiz, App.size());
  }

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, pathname, false, TmpFilename);
  uint64_t Result = -1;
  if (Path.FD != -1) {
    Result = ::readlinkat(Path.FD, Path.Path, buf, bufsiz);

    if (Result == -1 && errno == EINVAL) {
      // This means that the file wasn't a symlink
      // This is expected behaviour
      return -1;
    }
  }
  if (Result == -1) {
    Result = ::readlink(pathname, buf, bufsiz);
  }

  // We might have read a /proc/self/fd/* link. If so, strip the RootFS prefix from it.
  return StripRootFSPrefix(buf, Result, true);
}

uint64_t FileManager::Chmod(const char* pathname, mode_t mode) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::fchmodat(Path.FD, Path.Path, mode, 0);
    if (Result != -1) {
      return Result;
    }
  }
  return ::chmod(SelfPath, mode);
}

uint64_t FileManager::Readlinkat(int dirfd, const char* pathname, char* buf, size_t bufsiz) {
  // calculate the non-self link to exe
  // Some executables do getpid, stat("/proc/$pid/exe")
  // Can't use `GetSelf` directly here since readlink{at,} returns EINVAL if it isn't a symlink
  // Self is always a symlink and isn't expected to fail

  fextl::string Path {};
  if (((pathname && pathname[0] != '/') || // If pathname exists then it must not be absolute
       !pathname) &&
      dirfd != AT_FDCWD) {
    // Passed in a dirfd that isn't magic FDCWD
    // We need to get the path from the fd now
    char Tmp[PATH_MAX] = "";
    auto PathLength = FEX::get_fdpath(dirfd, Tmp);
    if (PathLength != -1) {
      Path = fextl::string(Tmp, PathLength);
    }

    if (pathname) {
      if (!Path.empty()) {
        // If the path returned empty then we don't need a separator
        Path += "/";
      }
      Path += pathname;
    }
  } else {
    if (!pathname || strlen(pathname) == 0) {
      return -1;
    } else if (pathname) {
      Path = pathname;
    }
  }

  char PidSelfPath[50];
  snprintf(PidSelfPath, 50, "/proc/%i/exe", CurrentPID);

  if (Path == "/proc/self/exe" || Path == "/proc/thread-self/exe" || Path == PidSelfPath) {
    const auto& App = Filename();
    strncpy(buf, App.c_str(), bufsiz);
    return std::min(bufsiz, App.size());
  }

  FDPathTmpData TmpFilename;
  auto NewPath = GetEmulatedFDPath(dirfd, pathname, false, TmpFilename);
  uint64_t Result = -1;

  if (NewPath.FD != -1) {
    Result = ::readlinkat(NewPath.FD, NewPath.Path, buf, bufsiz);

    if (Result == -1 && errno == EINVAL) {
      // This means that the file wasn't a symlink
      // This is expected behaviour
      return -1;
    }
  }

  if (Result == -1) {
    Result = ::readlinkat(dirfd, pathname, buf, bufsiz);
  }

  // We might have read a /proc/self/fd/* link. If so, strip the RootFS prefix from it.
  return StripRootFSPrefix(buf, Result, true);
}

uint64_t FileManager::Openat([[maybe_unused]] int dirfs, const char* pathname, int flags, uint32_t mode) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  int32_t fd = -1;

  if (!ShouldSkipOpenInEmu(flags)) {
    FDPathTmpData TmpFilename;
    auto Path = GetEmulatedFDPath(dirfs, SelfPath, false, TmpFilename);
    if (Path.FD != -1) {
      FEX::HLE::open_how how = {
        .flags = (uint64_t)flags,
        .mode = (flags & (O_CREAT | O_TMPFILE)) ? mode & 07777 : 0, // openat2() is stricter about this,
        .resolve = (Path.FD == AT_FDCWD) ? 0u : RESOLVE_IN_ROOT,    // AT_FDCWD means it's a thunk and not via RootFS
      };
      fd = ::syscall(SYSCALL_DEF(openat2), Path.FD, Path.Path, &how, sizeof(how));
      if (fd == -1 && errno == EXDEV) {
        // This means a magic symlink (/proc/foo) was involved. In this case we
        // just punt and do the access without RESOLVE_IN_ROOT.
        fd = ::syscall(SYSCALL_DEF(openat), Path.FD, Path.Path, flags, mode);
      }
    }

    // Open through RootFS failed (probably nonexistent), so open directly.
    if (fd == -1) {
      fd = ::syscall(SYSCALL_DEF(openat), dirfs, SelfPath, flags, mode);
    }

    ReplaceEmuFd(fd, flags, mode);
  } else {
    fd = ::syscall(SYSCALL_DEF(openat), dirfs, SelfPath, flags, mode);
  }

  return fd;
}

uint64_t FileManager::Openat2(int dirfs, const char* pathname, FEX::HLE::open_how* how, size_t usize) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  int32_t fd = -1;

  if (!ShouldSkipOpenInEmu(how->flags)) {
    FDPathTmpData TmpFilename;
    auto Path = GetEmulatedFDPath(dirfs, SelfPath, false, TmpFilename);
    if (Path.FD != -1 && !(how->resolve & RESOLVE_IN_ROOT)) {
      // AT_FDCWD means it's a thunk and not via RootFS
      if (Path.FD != AT_FDCWD) {
        how->resolve |= RESOLVE_IN_ROOT;
      }
      fd = ::syscall(SYSCALL_DEF(openat2), Path.FD, Path.Path, how, usize);
      how->resolve &= ~RESOLVE_IN_ROOT;
      if (fd == -1 && errno == EXDEV) {
        // This means a magic symlink (/proc/foo) was involved. In this case we
        // just punt and do the access without RESOLVE_IN_ROOT.
        fd = ::syscall(SYSCALL_DEF(openat2), Path.FD, Path.Path, how, usize);
      }
    }

    // Open through RootFS failed (probably nonexistent), so open directly.
    if (fd == -1) {
      fd = ::syscall(SYSCALL_DEF(openat2), dirfs, SelfPath, how, usize);
    }

    ReplaceEmuFd(fd, how->flags, how->mode);
  } else {
    fd = ::syscall(SYSCALL_DEF(openat2), dirfs, SelfPath, how, usize);
  }

  return fd;
}

uint64_t FileManager::Statx(int dirfd, const char* pathname, int flags, uint32_t mask, struct statx* statxbuf) {
  if (IsSelfNoFollow(pathname, flags)) {
    // If we aren't following the symlink for self then we need to return data about the symlink itself.
    // Let's just /actually/ return FEX symlink information in this case.
    return FHU::Syscalls::statx(dirfd, pathname, flags, mask, statxbuf);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = FHU::Syscalls::statx(Path.FD, Path.Path, flags, mask, statxbuf);
    if (Result != -1) {
      return Result;
    }
  }
  return FHU::Syscalls::statx(dirfd, SelfPath, flags, mask, statxbuf);
}

uint64_t FileManager::Mknod(const char* pathname, mode_t mode, dev_t dev) {
  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(AT_FDCWD, SelfPath, false, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::mknodat(Path.FD, Path.Path, mode, dev);
    if (Result != -1) {
      return Result;
    }
  }
  return ::mknod(SelfPath, mode, dev);
}

uint64_t FileManager::Statfs(const char* path, void* buf) {
  auto Path = GetEmulatedPath(path);
  if (!Path.empty()) {
    uint64_t Result = ::statfs(Path.c_str(), reinterpret_cast<struct statfs*>(buf));
    if (Result != -1) {
      return Result;
    }
  }
  return ::statfs(path, reinterpret_cast<struct statfs*>(buf));
}

uint64_t FileManager::NewFSStatAt(int dirfd, const char* pathname, struct stat* buf, int flag) {
  if (IsSelfNoFollow(pathname, flag)) {
    // See Statx
    return ::fstatat(dirfd, pathname, buf, flag);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flag & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::fstatat(Path.FD, Path.Path, buf, flag);
    if (Result != -1) {
      return Result;
    }
  }
  return ::fstatat(dirfd, SelfPath, buf, flag);
}

uint64_t FileManager::NewFSStatAt64(int dirfd, const char* pathname, struct stat64* buf, int flag) {
  if (IsSelfNoFollow(pathname, flag)) {
    // See Statx
    return ::fstatat64(dirfd, pathname, buf, flag);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dirfd, SelfPath, (flag & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = ::fstatat64(Path.FD, Path.Path, buf, flag);
    if (Result != -1) {
      return Result;
    }
  }
  return ::fstatat64(dirfd, SelfPath, buf, flag);
}

uint64_t FileManager::Setxattr(const char* path, const char* name, const void* value, size_t size, int flags) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, true);
  if (!Path.empty()) {
    uint64_t Result = ::setxattr(Path.c_str(), name, value, size, flags);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::setxattr(SelfPath, name, value, size, flags);
}

uint64_t FileManager::LSetxattr(const char* path, const char* name, const void* value, size_t size, int flags) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, false);
  if (!Path.empty()) {
    uint64_t Result = ::lsetxattr(Path.c_str(), name, value, size, flags);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::lsetxattr(SelfPath, name, value, size, flags);
}

uint64_t FileManager::Getxattr(const char* path, const char* name, void* value, size_t size) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, true);
  if (!Path.empty()) {
    uint64_t Result = ::getxattr(Path.c_str(), name, value, size);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::getxattr(SelfPath, name, value, size);
}

uint64_t FileManager::LGetxattr(const char* path, const char* name, void* value, size_t size) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, false);
  if (!Path.empty()) {
    uint64_t Result = ::lgetxattr(Path.c_str(), name, value, size);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::lgetxattr(SelfPath, name, value, size);
}

uint64_t FileManager::Listxattr(const char* path, char* list, size_t size) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, true);
  if (!Path.empty()) {
    uint64_t Result = ::listxattr(Path.c_str(), list, size);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::listxattr(SelfPath, list, size);
}

uint64_t FileManager::LListxattr(const char* path, char* list, size_t size) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, false);
  if (!Path.empty()) {
    uint64_t Result = ::llistxattr(Path.c_str(), list, size);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::llistxattr(SelfPath, list, size);
}

uint64_t FileManager::Removexattr(const char* path, const char* name) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, true);
  if (!Path.empty()) {
    uint64_t Result = ::removexattr(Path.c_str(), name);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::removexattr(SelfPath, name);
}

uint64_t FileManager::LRemovexattr(const char* path, const char* name) {
  auto NewPath = GetSelf(path);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  auto Path = GetEmulatedPath(SelfPath, false);
  if (!Path.empty()) {
    uint64_t Result = ::lremovexattr(Path.c_str(), name);
    if (Result != -1 || errno != ENOENT) {
      return Result;
    }
  }

  return ::lremovexattr(SelfPath, name);
}

uint64_t FileManager::SetxattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name, const xattr_args* uargs, size_t usize) {
  if (IsSelfNoFollow(pathname, at_flags)) {
    // See Statx
    return syscall(SYSCALL_DEF(setxattrat), dfd, pathname, at_flags, name, uargs, usize);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dfd, SelfPath, (at_flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = syscall(SYSCALL_DEF(setxattrat), Path.FD, Path.Path, at_flags, name, uargs, usize);
    if (Result != -1) {
      return Result;
    }
  }
  return syscall(SYSCALL_DEF(setxattrat), dfd, SelfPath, at_flags, name, uargs, usize);
}

uint64_t FileManager::GetxattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name, const xattr_args* uargs, size_t usize) {
  if (IsSelfNoFollow(pathname, at_flags)) {
    // See Statx
    return syscall(SYSCALL_DEF(getxattrat), dfd, pathname, at_flags, name, uargs, usize);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dfd, SelfPath, (at_flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = syscall(SYSCALL_DEF(getxattrat), Path.FD, Path.Path, at_flags, name, uargs, usize);
    if (Result != -1) {
      return Result;
    }
  }
  return syscall(SYSCALL_DEF(getxattrat), dfd, SelfPath, at_flags, name, uargs, usize);
}

uint64_t FileManager::ListxattrAt(int dfd, const char* pathname, uint32_t at_flags, char* list, size_t size) {
  if (IsSelfNoFollow(pathname, at_flags)) {
    // See Statx
    return syscall(SYSCALL_DEF(listxattrat), dfd, pathname, at_flags, list, size);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dfd, SelfPath, (at_flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = syscall(SYSCALL_DEF(listxattrat), Path.FD, Path.Path, at_flags, list, size);
    if (Result != -1) {
      return Result;
    }
  }
  return syscall(SYSCALL_DEF(listxattrat), dfd, SelfPath, at_flags, list, size);
}

uint64_t FileManager::RemovexattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name) {
  if (IsSelfNoFollow(pathname, at_flags)) {
    // See Statx
    return syscall(SYSCALL_DEF(removexattrat), dfd, pathname, at_flags, name);
  }

  auto NewPath = GetSelf(pathname);
  const char* SelfPath = NewPath ? NewPath->data() : nullptr;

  FDPathTmpData TmpFilename;
  auto Path = GetEmulatedFDPath(dfd, SelfPath, (at_flags & AT_SYMLINK_NOFOLLOW) == 0, TmpFilename);
  if (Path.FD != -1) {
    uint64_t Result = syscall(SYSCALL_DEF(removexattrat), Path.FD, Path.Path, at_flags, name);
    if (Result != -1) {
      return Result;
    }
  }
  return syscall(SYSCALL_DEF(removexattrat), dfd, SelfPath, at_flags, name);
}

void FileManager::UpdatePID(uint32_t PID) {
  CurrentPID = PID;

  // Track the inode of /proc/self/fd/<RootFSFD>, to be able to hide it
  auto FDpath = fextl::fmt::format("self/fd/{}", RootFSFD);
  struct stat Buffer {};
  int Result = fstatat(ProcFD, FDpath.c_str(), &Buffer, AT_SYMLINK_NOFOLLOW);
  if (Result >= 0) {
    RootFSFDInode = Buffer.st_ino;
  } else {
    // Probably in a strict sandbox
    RootFSFDInode = 0;
    ProcFDInode = 0;
    return;
  }

  // And track the ProcFSFD itself
  FDpath = fextl::fmt::format("self/fd/{}", ProcFD);
  Result = fstatat(ProcFD, FDpath.c_str(), &Buffer, AT_SYMLINK_NOFOLLOW);
  if (Result >= 0) {
    ProcFDInode = Buffer.st_ino;
  } else {
    // ??
    ProcFDInode = 0;
    return;
  }
}

bool FileManager::IsProtectedFile(int ParentDirFD, uint64_t inode) const {
  // Check if we have to hide this entry
  const char* Match = nullptr;
  if (inode == RootFSFDInode) {
    Match = "RootFS";
  } else if (inode == ProcFDInode) {
    Match = "/proc";
  } else if (inode == CodeMapInode) {
    Match = "code map";
  }
  if (Match) {
    struct stat Buffer;
    if (fstat(ParentDirFD, &Buffer) >= 0) {
      if (Buffer.st_dev == ProcFSDev) {
        LogMan::Msg::DFmt("Hiding directory entry for {} FD", Match);
        return true;
      }
    }
  }
  return false;
}

void FileManager::SetProtectedCodeMapFD(int FD) {
  if (FD == -1) {
    CodeMapInode = 0;
    return;
  }

  auto FDPath = fextl::fmt::format("self/fd/{}", FD);
  struct stat Buffer {};
  auto Result = fstatat(ProcFD, FDPath.c_str(), &Buffer, AT_SYMLINK_NOFOLLOW);
  if (Result >= 0) {
    CodeMapInode = Buffer.st_ino;
  } else {
    CodeMapInode = 0;
  }
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/FileManagement.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
$end_info$
*/

#pragma once
#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/unordered_set.h>
#include <FEXCore/fextl/vector.h>

#include <array>
#include <cstddef>
#include <cstdint>
#include <fcntl.h>
#include <functional>
#include <mutex>
#include <linux/limits.h>
#include <optional>
#include <sys/stat.h>
#include <unistd.h>

#include "LinuxSyscalls/EmulatedFiles/EmulatedFiles.h"

namespace FEXCore::Context {
class Context;
}

namespace FEX::HLE {
[[maybe_unused]]
static bool IsSymlink(int FD, const char* Filename) {
  // Checks to see if a filepath is a symlink.
  struct stat Buffer {};
  int Result = fstatat(FD, Filename, &Buffer, AT_SYMLINK_NOFOLLOW);
  return Result == 0 && S_ISLNK(Buffer.st_mode);
}

[[maybe_unused]]
static ssize_t GetSymlink(int FD, const char* Filename, char* ResultBuffer, size_t ResultBufferSize) {
  return readlinkat(FD, Filename, ResultBuffer, ResultBufferSize);
}

struct open_how;

class FileManager final {
public:
  FileManager() = delete;
  FileManager(FileManager&&) = delete;

  FileManager(FEXCore::Context::Context* ctx);
  ~FileManager();
  uint64_t Open(const char* pathname, int flags, uint32_t mode);
  uint64_t Close(int fd);
  uint64_t CloseRange(unsigned int first, unsigned int last, unsigned int flags);
  uint64_t Stat(const char* pathname, void* buf);
  uint64_t Lstat(const char* path, void* buf);
  uint64_t Access(const char* pathname, int mode);
  uint64_t FAccessat(int dirfd, const char* pathname, int mode);
  uint64_t FAccessat2(int dirfd, const char* pathname, int mode, int flags);
  uint64_t Readlink(const char* pathname, char* buf, size_t bufsiz);
  uint64_t Chmod(const char* pathname, mode_t mode);
  uint64_t Readlinkat(int dirfd, const char* pathname, char* buf, size_t bufsiz);
  uint64_t Openat(int dirfs, const char* pathname, int flags, uint32_t mode);
  uint64_t Openat2(int dirfs, const char* pathname, FEX::HLE::open_how* how, size_t usize);
  uint64_t Statx(int dirfd, const char* pathname, int flags, uint32_t mask, struct statx* statxbuf);
  uint64_t Mknod(const char* pathname, mode_t mode, dev_t dev);
  uint64_t NewFSStatAt(int dirfd, const char* pathname, struct stat* buf, int flag);
  uint64_t NewFSStatAt64(int dirfd, const char* pathname, struct stat64* buf, int flag);
  uint64_t Setxattr(const char* path, const char* name, const void* value, size_t size, int flags);
  uint64_t LSetxattr(const char* path, const char* name, const void* value, size_t size, int flags);
  uint64_t Getxattr(const char* path, const char* name, void* value, size_t size);
  uint64_t LGetxattr(const char* path, const char* name, void* value, size_t size);
  uint64_t Listxattr(const char* path, char* list, size_t size);
  uint64_t LListxattr(const char* path, char* list, size_t size);
  uint64_t Removexattr(const char* path, const char* name);
  uint64_t LRemovexattr(const char* path, const char* name);
  struct xattr_args {
    uint64_t value;
    uint32_t size;
    uint32_t flags;
  };

  uint64_t SetxattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name, const xattr_args* uargs, size_t usize);
  uint64_t GetxattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name, const xattr_args* uargs, size_t usize);
  uint64_t ListxattrAt(int dfd, const char* pathname, uint32_t at_flags, char* list, size_t size);
  uint64_t RemovexattrAt(int dfd, const char* pathname, uint32_t at_flags, const char* name);

  // vfs
  uint64_t Statfs(const char* path, void* buf);

  void UpdatePID(uint32_t PID);
  // Helper to detect FEX-internal files from their inode and parent directory FD.
  // This is useful to deal with Chromium/CEF, which closes any FDs reported in /proc/self/fd/.
  bool IsProtectedFile(int ParentDirFD, uint64_t inode) const;
  void SetProtectedCodeMapFD(int FD);

  fextl::string GetEmulatedPath(const char* pathname, bool FollowSymlink = false) const;
  fextl::string GetHostPath(fextl::string& Path, bool AliasedOnly) const;

  bool ReplaceEmuFd(int fd, int flags, uint32_t mode);

#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  void TrackFEXFD(int FD) noexcept {
    std::lock_guard lk(FEXTrackingFDMutex);
    FEXTrackingFDs.emplace(FD);
  }

  void RemoveFEXFD(int FD) noexcept {
    std::lock_guard lk(FEXTrackingFDMutex);
    FEXTrackingFDs.erase(FD);
  }

  void RemoveFEXFDRange(int begin, int end) noexcept {
    std::lock_guard lk(FEXTrackingFDMutex);

    std::erase_if(FEXTrackingFDs, [begin, end](int FD) { return FD >= begin && (FD <= end || end == -1); });
  }

  bool CheckIfFDInTrackedSet(int FD) const noexcept {
    std::lock_guard lk(FEXTrackingFDMutex);
    return FEXTrackingFDs.contains(FD);
  }

  bool CheckIfFDRangeInTrackedSet(int begin, int end) const noexcept {
    std::lock_guard lk(FEXTrackingFDMutex);
    // Just linear scan since the number of tracking FDs is low.
    for (auto it : FEXTrackingFDs) {
      if (it >= begin && (it <= end || end == -1)) {
        return true;
      }
    }
    return false;
  }

#else
  void TrackFEXFD(int FD) const noexcept {}
  bool CheckIfFDInTrackedSet(int FD) const noexcept {
    return false;
  }
  void RemoveFEXFD(int FD) const noexcept {}
  void RemoveFEXFDRange(int begin, int end) const noexcept {}
  bool CheckIfFDRangeInTrackedSet(int begin, int end) const noexcept {
    return false;
  }
#endif

private:
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  mutable std::mutex FEXTrackingFDMutex;
  fextl::set<int> FEXTrackingFDs;
#endif

  using FDPathTmpData = std::array<char[PATH_MAX], 2>;
  struct EmulatedFDPathResult final {
    int FD;
    const char* Path;
  };
  EmulatedFDPathResult GetEmulatedFDPath(int dirfd, const char* pathname, bool FollowSymlink, FDPathTmpData& TmpFilename) const;

  std::optional<std::string_view> GetSelf(const char* Pathname) const;
  bool IsSelfNoFollow(const char* Pathname, int flags) const;

  bool RootFSPathExists(const char* Filepath) const;
  size_t GetRootFSPrefixLen(const char* pathname, size_t len, bool AliasedOnly) const;
  ssize_t StripRootFSPrefix(char* pathname, ssize_t len, bool leaky) const;

  struct ThunkDBObject {
    fextl::string LibraryName;
    fextl::unordered_set<fextl::string> Depends;
    fextl::vector<fextl::string> Overlays;
    bool Enabled {};
  };
  void LoadThunkDatabase(fextl::unordered_map<fextl::string, ThunkDBObject>& ThunkDB, bool Global);
  FEX::EmulatedFile::EmulatedFDManager EmuFD;

  fextl::map<fextl::string, fextl::string, std::less<>> ThunkOverlays;

  FEX_CONFIG_OPT(Filename, APP_FILENAME);
  FEX_CONFIG_OPT(LDPath, ROOTFS);
  FEX_CONFIG_OPT(ThunkGuestLibs, THUNKGUESTLIBS);
  FEX_CONFIG_OPT(ThunkConfig, THUNKCONFIG);
  FEX_CONFIG_OPT(AppConfigName, APP_CONFIG_NAME);
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  uint32_t CurrentPID {};
  int RootFSFD {AT_FDCWD};
  int ProcFD {0};
  int64_t RootFSFDInode = 0;
  int64_t ProcFDInode = 0;
  int64_t CodeMapInode = 0;
  dev_t ProcFSDev;
};
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|gdbserver
desc: Provides a gdb interface to the guest state
$end_info$
*/

#include "CodeLoader.h"
#include "GdbServer/Info.h"

#include <cstdlib>
#include <cstdio>
#include <iomanip>
#include <memory>
#include <optional>
#include <string_view>

#include <Common/FEXServerClient.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/StringUtils.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <atomic>
#include <cstring>
#ifndef _WIN32
#include <elf.h>
#include <netdb.h>
#include <sys/socket.h>
#endif
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <stddef.h>
#include <string_view>
#include <sys/stat.h>
#include <sys/un.h>
#include <sys/utsname.h>
#include <unistd.h>
#include <utility>

#include "LinuxSyscalls/GdbServer.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/ThreadManager.h"

namespace FEX {

#ifndef _WIN32
void GdbServer::Break(FEXCore::Core::InternalThreadState* Thread, int signal) {
  std::lock_guard lk(sendMutex);
  if (!CommsSocket) {
    return;
  }

  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
  // Current debugging thread switches to the thread that is breaking.
  CurrentDebuggingThread = ThreadObject->ThreadInfo.TID.load();

  const auto str = fextl::fmt::format("T{:02x}thread:{:x};", signal, CurrentDebuggingThread);
  SendPacket(*CommsSocket, str);
}

void GdbServer::WaitForThreadWakeup() {
  // Wait for gdbserver to tell us to wake up
  ThreadBreakEvent.Wait();
}

GdbServer::~GdbServer() {
  CloseListenSocket();

  if (gdbServerThread->joinable()) {
    gdbServerThread->join(nullptr);
  }
}

GdbServer::GdbServer(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* SignalDelegation, FEX::HLE::SyscallHandler* const SyscallHandler)
  : CTX(ctx)
  , SyscallHandler {SyscallHandler}
  , SignalDelegation {SignalDelegation} {
  // Pass all signals by default
  std::fill(PassSignals.begin(), PassSignals.end(), true);

  // This is a total hack as there is currently no way to resume once hitting a segfault
  // But it's semi-useful for debugging.
  for (uint32_t Signal = 0; Signal <= FEX::HLE::SignalDelegator::MAX_SIGNALS; ++Signal) {
    SignalDelegation->RegisterHostSignalHandler(
      Signal,
      [this](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) {
        if (PassSignals[Signal]) {
          // Pass signal to the guest
          return false;
        }

        auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
        ThreadObject->GdbInfo = {};
        ThreadObject->GdbInfo->Signal = Signal;

        this->SignalDelegation->SpillSRA(Thread, ucontext, Thread->CurrentFrame->InSyscallInfo);

        // Let GDB know that we have a signal
        this->Break(Thread, Signal);

        WaitForThreadWakeup();
        ThreadObject->GdbInfo.reset();

        return true;
      },
      true);
  }

  StartThread();
}

static int calculateChecksum(const fextl::string& packet) {
  unsigned char checksum = 0;
  for (const char& c : packet) {
    checksum += c;
  }
  return checksum;
}

static fextl::string hexstring(fextl::istringstream& ss, int delm) {
  fextl::string ret;

  char hexString[3] = {0, 0, 0};
  while (ss.peek() != delm) {
    ss.read(hexString, 2);
    int c = std::strtoul(hexString, nullptr, 16);
    ret.push_back((char)c);
  }

  if (delm != -1) {
    ss.get();
  }

  return ret;
}

static fextl::string appendHex(const char* data, size_t length) {
  return fextl::fmt::format("{:02x}", fmt::join(data, data + length, ""));
}

static fextl::string encodeHex(const unsigned char* data, size_t length) {
  fextl::ostringstream ss;

  for (size_t i = 0; i < length; i++) {
    ss << std::setfill('0') << std::setw(2) << std::hex << int(data[i]);
  }
  return ss.str();
}

static fextl::string encodeHex(std::string_view str) {
  return encodeHex(reinterpret_cast<const unsigned char*>(str.data()), str.size());
}

// Packet parser
// Takes a serial stream and reads a single packet
// Un-escapes chars, checks the checksum and request a retransmit if it fails.
// Once the checksum is validated, it acknowledges and returns the packet in a string
fextl::string GdbServer::ReadPacket(const std::span<std::byte>& RawMessage) {
  fextl::string packet {};

  // The GDB "Remote Serial Protocal" was originally 7bit clean for use on serial ports.
  // Binary data is useally hex encoded. However some later extentions just put
  // raw 8bit binary data.

  // Packets are in the format
  // $<data>#<checksum>
  // where any $ or # in the packet body are escaped ('}' followed by the char XORed with 0x20)
  // The checksum is a single unsigned byte sum of the data, hex encoded.

  if (RawMessage.empty() || (char)RawMessage[0] != '$') {
    ERROR_AND_DIE_FMT("Expected GDB protocol messages to start with '$'");
  }

  for (auto It = std::next(RawMessage.begin()); It != RawMessage.end(); ++It) {
    char c = (char)*It;
    switch (c) {
    case '$': // start of packet
      ERROR_AND_DIE_FMT("Unescaped control character");
      break;

    case '}': // escape char
    {
      if (std::next(It) == RawMessage.end()) {
        ERROR_AND_DIE_FMT("Missing character after escape indicator");
      }
      char escaped = (char)*++It;
      packet.push_back(escaped ^ 0x20);
      break;
    }

    case '#': // end of packet
    {
      if (RawMessage.end() - It <= 2) {
        ERROR_AND_DIE_FMT("Missing checksum at end of packet");
      }

      char hexString[3] = {0, 0, 0};
      hexString[0] = (char)*++It;
      hexString[1] = (char)*++It;
      int expected_checksum = std::strtoul(hexString, nullptr, 16);

      if (calculateChecksum(packet) == expected_checksum) {
        return packet;
      } else {
        LogMan::Msg::EFmt("Received Invalid Packet: ${}#{:02x}", packet, expected_checksum);
      }
      break;
    }

    default: packet.push_back(c); break;
    }
  }

  return "";
}

static fextl::string escapePacket(const fextl::string& packet) {
  fextl::ostringstream ss;

  for (const auto& c : packet) {
    switch (c) {
    case '$':
    case '#':
    case '*':
    case '}': {
      char escaped = c ^ 0x20;
      ss << '}' << (escaped);
      break;
    }
    default: ss << c; break;
    }
  }

  return ss.str();
}

void GdbServer::SendPacket(fasio::tcp_socket& Socket, const fextl::string& packet) {
  const auto escaped = escapePacket(packet);
  auto str = fextl::fmt::format("${}#{:02x}", escaped, calculateChecksum(escaped));

  fasio::error ec;
  write(Socket, fasio::mutable_buffer {std::as_writable_bytes(std::span {str})}, ec);
}

void GdbServer::SendACK(fasio::tcp_socket& Socket, bool NACK) {
  if (NoAckMode) {
    return;
  }

  if (NACK) {
    std::string_view message = "-";
    send(Socket.FD, message.data(), message.size(), 0);
  } else {
    std::string_view message = "+";
    send(Socket.FD, message.data(), message.size(), 0);
  }

  if (SettingNoAckMode) {
    NoAckMode = true;
    SettingNoAckMode = false;
  }
}

const FEX::HLE::ThreadStateObject* GdbServer::FindThreadByTID(uint32_t TID) {
  auto Threads = SyscallHandler->TM.GetThreads();

  for (auto& Thread : *Threads) {
    if (Thread->ThreadInfo.TID != TID) {
      continue;
    }

    return Thread;
  }

  // Return parent thread if TID isn't found.
  return Threads->at(0);
}

GdbServer::GDBContextDefinition GdbServer::GenerateContextDefinition(const FEX::HLE::ThreadStateObject* ThreadObject) {
  GDBContextDefinition GDB {};
  FEXCore::Core::CPUState state {};

  // Copy the thread state.
  memcpy(&state, ThreadObject->Thread->CurrentFrame, sizeof(state));

  // Encode the GDB context definition
  memcpy(&GDB.gregs[0], &state.gregs[0], sizeof(GDB.gregs));
  GDB.rip = ThreadObject->Thread->CurrentFrame->State.rip;
  GDB.eflags = CTX->ReconstructCompactedEFLAGS(ThreadObject->Thread, false, nullptr, 0);

  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
    memcpy(&GDB.mm[i], &state.mm[i], sizeof(GDB.mm[i]));
  }

  GDB.fctrl = state.FCW;

  GDB.fstat = static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_TOP_LOC]) << 11;
  GDB.fstat |= static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_C0_LOC]) << 8;
  GDB.fstat |= static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_C1_LOC]) << 9;
  GDB.fstat |= static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_C2_LOC]) << 10;
  GDB.fstat |= static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_C3_LOC]) << 14;
  GDB.fstat |= static_cast<uint32_t>(state.flags[FEXCore::X86State::X87FLAG_IE_LOC]);

  __uint128_t XMM_Low[FEXCore::Core::CPUState::NUM_XMMS];
  __uint128_t YMM_High[FEXCore::Core::CPUState::NUM_XMMS];

  CTX->ReconstructXMMRegisters(ThreadObject->Thread, XMM_Low, YMM_High);
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
    memcpy(&GDB.xmm[i][0], &XMM_Low[i], sizeof(__uint128_t));
    memcpy(&GDB.xmm[i][2], &YMM_High[i], sizeof(__uint128_t));
  }

  return GDB;
}

void GdbServer::buildLibraryMap() {
  if (!LibraryMapChanged) {
    // No need to update
    return;
  }

  fextl::ostringstream xml;

  fextl::string MapsFile;
  FEXCore::FileLoading::LoadFile(MapsFile, "/proc/self/maps");
  fextl::istringstream MapsStream(MapsFile);

  fextl::string Line;

  struct FileData {
    uint64_t Begin;
  };

  fextl::map<fextl::string, fextl::vector<FileData>> SegmentMaps;

  // 7ff5dd6d2000-7ff5dd6d3000 rw-p 0000a000 103:0b 1881447                   /usr/lib/x86_64-linux-gnu/libnss_compat.so.2
  const fextl::string& RuntimeExecutable = Filename();
  while (std::getline(MapsStream, Line)) {
    auto ss = fextl::istringstream(Line);
    fextl::string Tmp;
    fextl::string Begin;
    fextl::string Name;
    std::getline(ss, Begin, '-');
    std::getline(ss, Tmp, ' '); // End
    std::getline(ss, Tmp, ' '); // Perm
    std::getline(ss, Tmp, ' '); // Inode
    std::getline(ss, Tmp, ' '); // devid
    std::getline(ss, Tmp, ' '); // Some garbage
    std::getline(ss, Name, '\n');

    if (strstr(Name.c_str(), "aarch64") != nullptr) {
      // If the library comes from aarch64, just skip it
      // Reduces the amount of memory gdb fetches
      continue;
    }

    Name = FEXCore::StringUtils::Trim(Name);

    struct stat sb {};
    if (stat(Name.c_str(), &sb) != -1) {
      if (S_ISCHR(sb.st_mode)) {
        // Skip this special file type
        // Fixes GDB trying to read dri render nodes
        continue;
      }
    }

    // Skip empty entries, the entry from the process, and also anything like [heap]
    if (!Name.empty() && Name != RuntimeExecutable && Name[0] != '[') {
      FileData data {
        .Begin = std::strtoul(Begin.c_str(), nullptr, 16),
      };

      SegmentMaps[Name].emplace_back(data);
    }
  }

  xml << "<library-list>\n";
  for (auto& Array : SegmentMaps) {
    xml << "\t<library name=\"" << Array.first << "\">\n";
    for (auto& Data : Array.second) {
      xml << "\t\t<segment address=\"0x" << std::hex << Data.Begin << "\"/>\n";
    }
    xml << "\t</library>\n";
  }

  xml << "</library-list>\n";

  LibraryMapString = xml.str();
  LibraryMapChanged = false;
}

// Binary data transfer handlers

GdbServer::HandledPacketType GdbServer::XferCommandExecFile(const fextl::string& annex, int offset, int length) {
  int annex_pid;
  if (annex.empty()) {
    annex_pid = getpid();
  } else {
    auto ss_pid = fextl::istringstream(annex);
    ss_pid >> std::hex >> annex_pid;
  }

  if (annex_pid == getpid()) {
    return {EncodeXferString(Filename(), offset, length), HandledPacketType::TYPE_ACK};
  }

  return {"E00", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::XferCommandFeatures(const fextl::string& annex, int offset, int length) {
  if (annex == "target.xml") {
    return {EncodeXferString(GDB::Info::BuildTargetXML(Is64BitMode()), offset, length), HandledPacketType::TYPE_ACK};
  }

  return {"E00", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::XferCommandThreads(const fextl::string& annex, int offset, int length) {
  if (offset == 0) {
    auto Threads = SyscallHandler->TM.GetThreads();

    ThreadString.clear();
    fextl::ostringstream ss;
    ss << "<threads>\n";
    for (auto& Thread : *Threads) {
      // Thread id is in hex without 0x prefix
      const auto ThreadName = GDB::Info::GetThreadName(::getpid(), Thread->ThreadInfo.TID);
      ss << "<thread id=\"" << std::hex << Thread->ThreadInfo.TID << "\"";
      if (!ThreadName.empty()) {
        ss << " name=\"" << ThreadName << "\"";
      }
      ss << "/>\n";
    }

    ss << "</threads>\n";
    ss << std::flush;
    ThreadString = ss.str();
  }

  return {EncodeXferString(ThreadString, offset, length), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::XferCommandOSData(const fextl::string& annex, int offset, int length) {
  if (offset == 0) {
    OSDataString = GDB::Info::BuildOSXML();
  }
  return {EncodeXferString(OSDataString, offset, length), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::XferCommandLibraries(const fextl::string& annex, int offset, int length) {
  if (offset == 0) {
    // Attempt to rebuild when reading from zero
    buildLibraryMap();
  }
  return {EncodeXferString(LibraryMapString, offset, length), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::XferCommandAuxv(const fextl::string& annex, int offset, int length) {
  const auto* CodeLoader = SyscallHandler->GetCodeLoader();
  const auto [auxv_ptr, auxv_size] = CodeLoader->GetAuxv();

  fextl::string data;
  if (Is64BitMode()) {
    data.resize(auxv_size);
    memcpy(data.data(), reinterpret_cast<void*>(auxv_ptr), data.size());
  } else {
    // We need to transcode from 32-bit auxv_t to 64-bit
    data.resize(auxv_size / sizeof(Elf32_auxv_t) * sizeof(Elf64_auxv_t));
    size_t NumAuxv = auxv_size / sizeof(Elf32_auxv_t);
    for (size_t i = 0; i < NumAuxv; ++i) {
      Elf32_auxv_t* auxv = reinterpret_cast<Elf32_auxv_t*>(auxv_ptr + i * sizeof(Elf32_auxv_t));
      Elf64_auxv_t tmp;
      tmp.a_type = auxv->a_type;
      tmp.a_un.a_val = auxv->a_un.a_val;
      memcpy(data.data() + i * sizeof(Elf64_auxv_t), &tmp, sizeof(Elf64_auxv_t));
    }
  }

  return {EncodeXferString(data, offset, length), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::handleXfer(const fextl::string& packet) {
  fextl::string object;
  fextl::string rw;
  fextl::string annex;
  int offset;
  int length;

  // Parse Xfer message
  {
    auto ss = fextl::istringstream(packet);
    fextl::string expectXfer;
    char expectComma;

    std::getline(ss, expectXfer, ':');
    std::getline(ss, object, ':');
    std::getline(ss, rw, ':');
    std::getline(ss, annex, ':');

    ss >> std::hex >> offset;
    ss.get(expectComma);
    ss >> std::hex >> length;

    // Bail on any errors
    if (ss.fail() || !ss.eof() || expectXfer != "qXfer" || rw != "read" || expectComma != ',') {
      return {"E00", HandledPacketType::TYPE_ACK};
    }
  }

  // Specific object documentation: https://sourceware.org/gdb/current/onlinedocs/gdb.html/General-Query-Packets.html#qXfer-read
  if (object == "auxv") {
    return XferCommandAuxv(annex, offset, length);
  }

  // btrace
  // btrace-conf

  if (object == "exec-file") {
    return XferCommandExecFile(annex, offset, length);
  }

  if (object == "features") {
    return XferCommandFeatures(annex, offset, length);
  }

  if (object == "libraries") {
    return XferCommandLibraries(annex, offset, length);
  }

  // libraries-svr4
  // memory-map
  // sdata
  // siginfo:read
  // siginfo:write

  if (object == "threads") {
    return XferCommandThreads(annex, offset, length);
  }

  // traceframe-info
  // uib
  // fdpic

  if (object == "osdata") {
    return XferCommandOSData(annex, offset, length);
  }

  return {"", HandledPacketType::TYPE_UNKNOWN};
}

static size_t CheckMemMapping(uint64_t Address, size_t Size) {
  uint64_t AddressEnd = Address + Size;
  fextl::string MapsFile;
  FEXCore::FileLoading::LoadFile(MapsFile, "/proc/self/maps");
  fextl::istringstream MapsStream(MapsFile);

  fextl::string Line;

  while (std::getline(MapsStream, Line)) {
    if (MapsStream.eof()) {
      break;
    }
    uint64_t Begin, End;
    char r, w, x, p;
    if (sscanf(Line.c_str(), "%lx-%lx %c%c%c%c", &Begin, &End, &r, &w, &x, &p) == 6) {
      if (Begin <= Address && End > Address) {
        ssize_t Overrun {};
        if (AddressEnd > End) {
          Overrun = AddressEnd - End;
        }
        return Size - Overrun;
      }
    }
  }

  return 0;
}

GdbServer::HandledPacketType GdbServer::handleProgramOffsets() {
  auto CodeLoader = SyscallHandler->GetCodeLoader();
  uint64_t BaseOffset = CodeLoader->GetBaseOffset();
  fextl::string str = fextl::fmt::format("Text={:x};Data={:x};Bss={:x}", BaseOffset, BaseOffset, BaseOffset);
  return {std::move(str), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::ThreadAction(char action, uint32_t tid) {
  switch (action) {
  case 'c': {
    SyscallHandler->TM.Run();
    ThreadBreakEvent.NotifyAll();
    SyscallHandler->TM.WaitForThreadsToRun();
    return {"", HandledPacketType::TYPE_ONLYACK};
  }
  case 's': {
    SyscallHandler->TM.Step();
    SendPacketPair({"OK", HandledPacketType::TYPE_ACK});
    fextl::string str = fextl::fmt::format("T05thread:{:02x};", getpid());
    if (LibraryMapChanged) {
      // If libraries have changed then let gdb know
      str += "library:1;";
    }

    SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK});
    return {"OK", HandledPacketType::TYPE_ACK};
  }
  case 't':
    // This thread isn't part of the thread pool
    SyscallHandler->TM.Stop();
    return {"OK", HandledPacketType::TYPE_ACK};
  default: return {"E00", HandledPacketType::TYPE_ACK};
  }
}

// Command handlers
GdbServer::HandledPacketType GdbServer::CommandEnableExtendedMode(const fextl::string& packet) {
  return {"OK", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandQueryHalted(const fextl::string& packet) {
  // Indicates the reason that the thread has stopped
  // Behaviour changes if the target is in non-stop mode
  // Binja doesn't support S response here
  fextl::string str = fextl::fmt::format("T00thread:{:x};", getpid());
  return {std::move(str), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandContinue(const fextl::string& packet) {
  // Continue
  return ThreadAction('c', 0);
}

GdbServer::HandledPacketType GdbServer::CommandDetach(const fextl::string& packet) {
  // Detach
  // Ensure the threads are back in running state on detach
  SyscallHandler->TM.Run();
  SyscallHandler->TM.WaitForThreadsToRun();
  return {"OK", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandReadRegisters(const fextl::string& packet) {
  // We might be running while we try reading
  // Pause up front
  SyscallHandler->TM.Pause();
  const FEX::HLE::ThreadStateObject* CurrentThread = FindThreadByTID(CurrentDebuggingThread);
  const size_t NumGPR = Is64BitMode() ? FEXCore::Core::CPUState::NUM_GPRS : FEXCore::Core::CPUState::NUM_GPRS / 2;
  const size_t GPRSize = Is64BitMode() ? sizeof(uint64_t) : sizeof(uint32_t);
  const size_t NumXMM = Is64BitMode() ? FEXCore::Core::CPUState::NUM_XMMS : FEXCore::Core::CPUState::NUM_XMMS / 2;
  const size_t XMMSize = Is64BitMode() ? sizeof(__uint128_t) * 2 : sizeof(__uint128_t);
  fextl::string str;
  auto GDB = GenerateContextDefinition(CurrentThread);
  for (size_t i = 0; i < NumGPR; ++i) {
    str += appendHex(reinterpret_cast<const char*>(&GDB.gregs[i]), GPRSize);
  }
  str += appendHex(reinterpret_cast<const char*>(&GDB.rip), GPRSize);
  str += appendHex(reinterpret_cast<const char*>(&GDB.eflags), sizeof(uint32_t));

  str += appendHex(reinterpret_cast<const char*>(&GDB.cs), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.ss), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.ds), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.es), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.fs), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.gs), sizeof(uint32_t));
  for (auto& mm : GDB.mm) {
    str += appendHex(reinterpret_cast<const char*>(&mm), sizeof(X80Float));
  }

  str += appendHex(reinterpret_cast<const char*>(&GDB.fctrl), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.fstat), sizeof(uint32_t));
  str += appendHex(reinterpret_cast<const char*>(&GDB.dummies), sizeof(GDB.dummies));

  for (size_t i = 0; i < NumXMM; ++i) {
    str += appendHex(reinterpret_cast<const char*>(&GDB.xmm[i]), XMMSize);
  }

  str += appendHex(reinterpret_cast<const char*>(&GDB.mxcsr), sizeof(uint32_t));

  return {std::move(str), HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandThreadOp(const fextl::string& packet) {
  const auto match = [&](const char* str) -> bool {
    return packet.rfind(str, 0) == 0;
  };

  if (match("Hc")) {
    // Sets thread to this ID for stepping
    // This is deprecated and vCont should be used instead
    auto ss = fextl::istringstream(packet);
    ss.seekg(strlen("Hc"));
    ss >> std::hex >> CurrentDebuggingThread;

    SyscallHandler->TM.Pause();
    return {"OK", HandledPacketType::TYPE_ACK};
  }

  if (match("Hg")) {
    // Sets thread for "other" operations
    auto ss = fextl::istringstream(packet);
    ss.seekg(strlen("Hg"));
    ss >> std::hex >> CurrentDebuggingThread;

    // This must return quick otherwise IDA complains
    SyscallHandler->TM.Pause();
    return {"OK", HandledPacketType::TYPE_ACK};
  }

  return {"", HandledPacketType::TYPE_UNKNOWN};
}

GdbServer::HandledPacketType GdbServer::CommandKill(const fextl::string& packet) {
  SyscallHandler->TM.Stop();
  SyscallHandler->TM.WaitForIdle(); // Block until exit
  return {"", HandledPacketType::TYPE_NONE};
}

GdbServer::HandledPacketType GdbServer::CommandMemory(const fextl::string& packet) {
  bool write;
  size_t addr;
  size_t length;
  fextl::string data;

  auto ss = fextl::istringstream(packet);
  write = ss.get() == 'M';
  ss >> std::hex >> addr;
  ss.get(); // discard comma
  ss >> std::hex >> length;

  if (write) {
    ss.get();                 // discard colon
    data = hexstring(ss, -1); // grab data until end of file.
  }

  // validate packet
  if (ss.fail() || !ss.eof() || (write && (data.length() != length))) {
    return {"E00", HandledPacketType::TYPE_ACK};
  }

  length = CheckMemMapping(addr, length);
  if (length == 0) {
    return {"E00", HandledPacketType::TYPE_ACK};
  }

  // TODO: check we are in a valid memory range
  //       Also, clamp length
  void* ptr = reinterpret_cast<void*>(addr);

  if (write) {
    std::memcpy(ptr, data.data(), data.length());
    // TODO: invalidate any code
    return {"OK", HandledPacketType::TYPE_ACK};
  } else {
    return {encodeHex((unsigned char*)ptr, length), HandledPacketType::TYPE_ACK};
  }
}

GdbServer::HandledPacketType GdbServer::CommandReadReg(const fextl::string& packet) {
  size_t addr;
  auto ss = fextl::istringstream(packet);
  ss.get(); // Drop first letter
  ss >> std::hex >> addr;

  const FEX::HLE::ThreadStateObject* CurrentThread = FindThreadByTID(CurrentDebuggingThread);
  auto GDB = GenerateContextDefinition(CurrentThread);

  if (addr >= offsetof(GDBContextDefinition, gregs[0]) && addr < offsetof(GDBContextDefinition, gregs[16])) {
    return {encodeHex((unsigned char*)(&GDB.gregs[addr / sizeof(uint64_t)]), sizeof(uint64_t)), HandledPacketType::TYPE_ACK};
  } else if (addr == offsetof(GDBContextDefinition, rip)) {
    return {encodeHex((unsigned char*)(&GDB.rip), sizeof(uint64_t)), HandledPacketType::TYPE_ACK};
  } else if (addr == offsetof(GDBContextDefinition, eflags)) {
    return {encodeHex((unsigned char*)(&GDB.eflags), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  } else if (addr >= offsetof(GDBContextDefinition, cs) && addr < offsetof(GDBContextDefinition, mm[0])) {
    uint32_t Empty {};
    return {encodeHex((unsigned char*)(&Empty), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  } else if (addr >= offsetof(GDBContextDefinition, mm[0]) && addr < offsetof(GDBContextDefinition, mm[8])) {
    return {encodeHex((unsigned char*)(&GDB.mm[(addr - offsetof(GDBContextDefinition, mm[0])) / sizeof(X80Float)]), sizeof(X80Float)),
            HandledPacketType::TYPE_ACK};
  } else if (addr == offsetof(GDBContextDefinition, fctrl)) {
    return {encodeHex((unsigned char*)(&GDB.fctrl), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  } else if (addr == offsetof(GDBContextDefinition, fstat)) {
    return {encodeHex((unsigned char*)(&GDB.fstat), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  } else if (addr >= offsetof(GDBContextDefinition, dummies[0]) && addr < offsetof(GDBContextDefinition, dummies[6])) {
    return {encodeHex((unsigned char*)(&GDB.dummies[0]), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  } else if (addr >= offsetof(GDBContextDefinition, xmm[0][0]) && addr < offsetof(GDBContextDefinition, xmm[16][0])) {
    const auto XmmIndex = (addr - offsetof(GDBContextDefinition, xmm[0][0])) / FEXCore::Core::CPUState::XMM_AVX_REG_SIZE;
    return {encodeHex(reinterpret_cast<const uint8_t*>(&GDB.xmm[XmmIndex]), FEXCore::Core::CPUState::XMM_AVX_REG_SIZE), HandledPacketType::TYPE_ACK};
  } else if (addr == offsetof(GDBContextDefinition, mxcsr)) {
    return {encodeHex((unsigned char*)(&GDB.mxcsr), sizeof(uint32_t)), HandledPacketType::TYPE_ACK};
  }

  LogMan::Msg::EFmt("Unknown GDB register 0x{:x}", addr);
  return {"E00", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandQuery(const fextl::string& packet) {
  const auto match = [&](const char* str) -> bool {
    return packet.rfind(str, 0) == 0;
  };
  const auto MatchStr = [](const fextl::string& Str, const char* str) -> bool {
    return Str.rfind(str, 0) == 0;
  };

  const auto split = [](const fextl::string& Str, char deliminator) -> fextl::vector<fextl::string> {
    fextl::vector<fextl::string> Elements;
    fextl::istringstream Input(Str);
    for (fextl::string line; std::getline(Input, line); Elements.emplace_back(line))
      ;
    return Elements;
  };

  if (match("QNonStop:")) {
    auto ss = fextl::istringstream(packet);
    ss.seekg(fextl::string("QNonStop:").size());
    ss.get(); // discard colon
    ss >> NonStopMode;
    return {"OK", HandledPacketType::TYPE_ACK};
  }
  if (match("qSupported:")) {
    // eg: qSupported:multiprocess+;swbreak+;hwbreak+;qRelocInsn+;fork-events+;vfork-events+;exec-events+;vContSupported+;QThreadEvents+;no-resumed+;memory-tagging+;xmlRegisters=i386
    auto Features = split(packet.substr(strlen("qSupported:")), ';');

    // For feature documentation
    // https://sourceware.org/gdb/current/onlinedocs/gdb/General-Query-Packets.html#qSupported
    fextl::string SupportedFeatures {};

    // Required features
    SupportedFeatures += "PacketSize=32768;";
    SupportedFeatures += "xmlRegisters=i386;";

    SupportedFeatures += "qXfer:auxv:read+;";
    SupportedFeatures += "qXfer:exec-file:read+;";
    SupportedFeatures += "qXfer:features:read+;";
    SupportedFeatures += "qXfer:libraries:read+;";
    // Don't enable this feature. If enabled then gdb doesn't query for
    // memory-map updates post-launch. Resulting in the inability to
    // disassemble code from loaded libraries.
    // gdbserver running on a true host also doesn't use this feature.
    // It is likely used for embedded environments where you have a fixed
    // memory map.
    // SupportedFeatures += "qXfer:memory-map:read+;";
    SupportedFeatures += "qXfer:siginfo:read+;";
    SupportedFeatures += "qXfer:siginfo:write+;";
    SupportedFeatures += "qXfer:threads:read+;";
    SupportedFeatures += "QCatchSignals+;";
    SupportedFeatures += "QPassSignals+;";
    SupportedFeatures += "QNonStop+;";

    SupportedFeatures += "qXfer:osdata:read+;";
    SupportedFeatures += "QStartNoAckMode+;";

    // TODO: Support breakpoints
    // SupportedFeatures += "swbreak+;";
    // SupportedFeatures += "hwbreak+;";
    // SupportedFeatures += "BreakpointCommands+;";

    // TODO: If we want to support conditional breakpoints then we need to support single stepping.
    // SupportedFeatures += "ConditionalBreakpoints+;";

    for (auto& Feature : Features) {
      if (MatchStr(Feature, "swbreak+")) {
        SupportedFeatures += "swbreak+;";
      }
      if (MatchStr(Feature, "hwbreak+")) {
        SupportedFeatures += "hwbreak+;";
      }
      if (MatchStr(Feature, "vContSupported+")) {
        SupportedFeatures += "vContSupported+;";
      }

      // Unsupported:
      //  multiprocess
      //  qRelocInsn
      //  fork-events
      //  vfork-events
      //  exec-events
      //  QThreadEvents
      //  no-resumed
      //  memory-tagging
    }
    return {std::move(SupportedFeatures), HandledPacketType::TYPE_ACK};
  }
  if (match("qAttached")) {
    return {"tnotrun:0", HandledPacketType::TYPE_ACK}; // We don't currently support launching executables from gdb.
  }
  if (match("qXfer")) {
    return handleXfer(packet);
  }
  if (match("qOffsets")) {
    return handleProgramOffsets();
  }
  if (match("qTStatus")) {
    // We don't support trace experiments
    return {"", HandledPacketType::TYPE_ACK};
  }
  if (match("qfThreadInfo")) {
    auto Threads = SyscallHandler->TM.GetThreads();

    fextl::ostringstream ss;
    ss << "m";
    for (size_t i = 0; i < Threads->size(); ++i) {
      auto Thread = Threads->at(i);
      ss << std::hex << Thread->ThreadInfo.TID;
      if (i != (Threads->size() - 1)) {
        ss << ",";
      }
    }
    return {ss.str(), HandledPacketType::TYPE_ACK};
  }
  if (match("qsThreadInfo")) {
    return {"l", HandledPacketType::TYPE_ACK};
  }
  if (match("qThreadExtraInfo")) {
    auto ss = fextl::istringstream(packet);
    ss.seekg(fextl::string("qThreadExtraInfo").size());
    ss.get(); // discard comma
    uint32_t ThreadID;
    ss >> std::hex >> ThreadID;
    auto ThreadName = GDB::Info::GetThreadName(::getpid(), ThreadID);
    return {encodeHex((unsigned char*)ThreadName.data(), ThreadName.size()), HandledPacketType::TYPE_ACK};
  }
  if (match("qC")) {
    // Returns the current Thread ID
    auto Threads = SyscallHandler->TM.GetThreads();
    fextl::ostringstream ss;
    ss << "m" << std::hex << Threads->at(0)->ThreadInfo.TID;
    return {ss.str(), HandledPacketType::TYPE_ACK};
  }
  if (match("QStartNoAckMode")) {
    SettingNoAckMode = true;
    return {"OK", HandledPacketType::TYPE_ACK};
  }
  if (match("qSymbol")) {
    auto ss = fextl::istringstream(packet);
    ss.seekg(fextl::string("qSymbol").size());
    ss.get(); // discard colon
    fextl::string Symbol_Val, Symbol_name;
    std::getline(ss, Symbol_Val, ':');
    std::getline(ss, Symbol_name, ':');

    if (Symbol_Val.empty() && Symbol_name.empty()) {
      return {"OK", HandledPacketType::TYPE_ACK};
    } else {
      return {"", HandledPacketType::TYPE_UNKNOWN};
    }
  }

  if (match("QPassSignals")) {
    // First set all signals as unpassed
    std::fill(PassSignals.begin(), PassSignals.end(), false);

    // eg: QPassSignals:e;10;14;17;1a;1b;1c;21;24;25;2c;4c;97;
    auto ss = fextl::istringstream(packet);
    ss.seekg(fextl::string("QPassSignals").size());
    ss.get(); // discard colon

    // We now have a semi-colon deliminated list of signals to pass to the guest process
    for (fextl::string tmp; std::getline(ss, tmp, ';');) {
      uint32_t Signal = std::stoi(tmp.c_str(), nullptr, 16);
      if (Signal < FEX::HLE::SignalDelegator::MAX_SIGNALS) {
        PassSignals[Signal] = true;
      }
    }

    return {"OK", HandledPacketType::TYPE_ACK};
  }

  // lldb specific queries
  if (match("qHostInfo")) {
    // Returns Key:Value pairs separated by ;
    // eg:
    // triple:7838365f36342d70632d6c696e75782d676e75;
    // ptrsize:8;
    // distribution_id:7562756e7475;
    // watchpoint_exceptions_received:after;
    // endian:little;
    // os_version:6.3.3;
    // os_build:362e332e332d3036303330332d67656e65726963;
    // os_kernel:2332303233303531373133333620534d5020505245454d50545f44594e414d494320576564204d61792031372031333a34353a3139205554432032303233;
    // hostname:7279616e682d545235303030;
    fextl::string HostFeatures {};

    // 64-bit always returned for the host environment.
    // qProcessInfo will return i386 or not.
    HostFeatures += fextl::fmt::format("triple:{};", encodeHex("x86_64-pc-linux-gnu"));
    HostFeatures += "ptrsize:8;";

    // Always little-endian.
    HostFeatures += "endian:little;";

    struct utsname buf {};
    if (uname(&buf) != -1) {
      uint32_t Major {};
      uint32_t Minor {};
      uint32_t Patch {};

      // Parse kernel version in the form of `<Major>.<Minor>.<Patch>[Optional Data]`
      const auto End = buf.release + sizeof(buf.release);
      auto Results = std::from_chars(buf.release, End, Major, 10);
      Results = std::from_chars(Results.ptr + 1, End, Minor, 10);
      Results = std::from_chars(Results.ptr + 1, End, Patch, 10);

      HostFeatures += fextl::fmt::format("os_version:{}.{}.{};", Major, Minor, Patch);

      // os_build returns the release untouched.
      HostFeatures += fextl::fmt::format("os_build:{};", encodeHex(buf.release));
      HostFeatures += fextl::fmt::format("os_kernel:{};", encodeHex(buf.version));
      HostFeatures += fextl::fmt::format("hostname:{};", encodeHex(buf.nodename));
    }

    // TODO: distribution_id should be fetched with `lsb_release -i`
    // TODO: watchpoint_exceptions_received is unsupported
    return {std::move(HostFeatures), HandledPacketType::TYPE_ACK};
  }
  if (match("qGetWorkingDir")) {
    char Tmp[PATH_MAX];
    if (getcwd(Tmp, PATH_MAX)) {
      return {encodeHex(Tmp), HandledPacketType::TYPE_ACK};
    }
    return {"E00", HandledPacketType::TYPE_ACK};
  }
  return {"", HandledPacketType::TYPE_UNKNOWN};
}

GdbServer::HandledPacketType GdbServer::CommandSingleStep(const fextl::string& packet) {
  return ThreadAction('s', 0);
}

GdbServer::HandledPacketType GdbServer::CommandQueryThreadAlive(const fextl::string& packet) {
  return {"OK", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::HandlevFile(const fextl::string& packet) {
  const auto match = [&](const fextl::string& str) -> std::optional<fextl::istringstream> {
    if (packet.rfind(str, 0) == 0) {
      auto ss = fextl::istringstream(packet);
      ss.seekg(str.size());
      return ss;
    }
    return std::nullopt;
  };

  const auto F = [](int result) -> fextl::string {
    return fextl::fmt::format("F{:x}", result);
  };
  const auto F_error = []() -> fextl::string {
    return fextl::fmt::format("F-1,{:x}", errno);
  };
  const auto F_data = [](int result, const fextl::string& data) -> fextl::string {
    // Binary encoded data is raw appended to the end
    return fextl::fmt::format("F{:#x};", result) + data;
  };

  std::optional<fextl::istringstream> ss;
  if ((ss = match("vFile:open:"))) {
    fextl::string filename;
    int flags;
    int mode;

    filename = hexstring(*ss, ',');
    *ss >> std::hex >> flags;
    ss->get(); // discard comma
    *ss >> std::hex >> mode;

    return {F(open(filename.c_str(), flags, mode)), HandledPacketType::TYPE_ACK};
  }
  if ((ss = match("vFile:setfs:"))) {
    int pid;
    *ss >> pid;

    return {F(pid == 0 ? 0 : -1), HandledPacketType::TYPE_ACK}; // Only support the common filesystem
  }
  if ((ss = match("vFile:close:"))) {
    int fd;
    *ss >> std::hex >> fd;
    close(fd);
    return {F(0), HandledPacketType::TYPE_ACK};
  }
  if ((ss = match("vFile:pread:"))) {
    int fd, count, offset;

    *ss >> std::hex >> fd;
    ss->get(); // discard comma
    *ss >> std::hex >> count;
    ss->get(); // discard comma
    *ss >> std::hex >> offset;

    fextl::string data(count, '\0');
    if (lseek(fd, offset, SEEK_SET) < 0) {
      return {F_error(), HandledPacketType::TYPE_ACK};
    }
    int ret = read(fd, data.data(), count);
    if (ret < 0) {
      return {F_error(), HandledPacketType::TYPE_ACK};
    }

    if (ret == 0) {
      return {F(0), HandledPacketType::TYPE_ACK};
    }

    data.resize(ret);
    return {F_data(ret, data), HandledPacketType::TYPE_ACK};
  }

  return {"", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::HandlevCont(const fextl::string& packet) {
  const auto match = [&](const fextl::string& str) -> std::optional<fextl::istringstream> {
    if (packet.rfind(str, 0) == 0) {
      auto ss = fextl::istringstream(packet);
      ss.seekg(str.size());
      return ss;
    }
    return std::nullopt;
  };

  std::optional<fextl::istringstream> ss;
  if ((ss = match("vCont?"))) {
    return {"vCont;c;t;s;r", HandledPacketType::TYPE_ACK}; // We support continue, step and terminate
    // FIXME: We also claim to support continue with signal... because it's compulsory
  }

  if ((ss = match("vCont;"))) {
    char action {};
    int thread {};

    action = ss->get();

    if (ss->peek() == ':') {
      ss->get();
      *ss >> std::hex >> thread;
    }

    if (ss->fail()) {
      return {"E00", HandledPacketType::TYPE_ACK};
    }

    return ThreadAction(action, thread);
  }

  return {"", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandMultiLetterV(const fextl::string& packet) {
  // TODO: vAttach
  if (packet.starts_with("vCont")) {
    return HandlevCont(packet);
  }

  // TODO: vCtrlC

  if (packet.starts_with("vFile")) {
    return HandlevFile(packet);
  }

  if (packet.starts_with("vKill")) {
    tgkill(::getpid(), ::getpid(), SIGKILL);
  }

  // TODO: vRun
  // TODO: vStopped

  return {"", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandBreakpoint(const fextl::string& packet) {
  auto ss = fextl::istringstream(packet);

  // Don't do anything with set breakpoints yet
  [[maybe_unused]] bool Set {};
  uint64_t Addr;
  uint64_t Type;
  Set = ss.get() == 'Z';

  ss >> std::hex >> Addr;
  ss.get(); // discard comma
  ss >> std::hex >> Type;

  SyscallHandler->TM.Pause();
  return {"OK", HandledPacketType::TYPE_ACK};
}

GdbServer::HandledPacketType GdbServer::CommandUnknown(const fextl::string& packet) {
  return {"", HandledPacketType::TYPE_UNKNOWN};
}

GdbServer::HandledPacketType GdbServer::ProcessPacket(const fextl::string& packet) {
  // Packet commands list: https://sourceware.org/gdb/current/onlinedocs/gdb.html/Packets.html#Packets

  switch (packet[0]) {
  // Command: $!
  // - Desc: Enable extended mode
  // - Args: <None>
  case '!': return CommandEnableExtendedMode(packet);
  // Command: $?
  // - Desc: Sent on connection first established to query the reason the target halted.
  case '?': return CommandQueryHalted(packet);
  // Command: $A
  // - Desc: Initialized argv[] array passed in to the program.
  // - Args: arglen,argnum,arg,...
  case 'A': return CommandUnknown(packet);
  // Command: $b
  // - Desc: Change the serial line speed to baud
  // - Args: baud
  // - Deprecated: Behaviour isn't well-defined.
  case 'b': return CommandUnknown(packet);
  // Command: $B
  // - Desc: Set or clear a breadpoint at address
  // - Args: addr,mode
  // - Deprecated: Use $Z and $z instead.
  case 'B': return CommandUnknown(packet);
  // Command: $c
  // - Desc: Continue execution of process
  // - Args: [addr]
  // - Deprecated: See $vCont for multi-threaded support.
  case 'c': return CommandContinue(packet);
  // Command: $C
  // - Desc: Continue execution of process with signal
  // - Args: sig[;addr]
  // - Deprecated: See $vCont for multi-threaded support.
  case 'C': return CommandUnknown(packet);
  // Command: $d
  // - Desc: Toggle debug flag
  // - Args: <None>
  // - Deprecated: Use $q or $Q instead.
  case 'd': return CommandUnknown(packet);
  // Command: $D
  // - Desc: Detach GDB from the remote system
  // - Args: [;pid]
  case 'D': return CommandDetach(packet);
  // Command: $F
  // - Desc: A reply from GDB to the `F` packet sent by the target. Part of the File-I/O protocol.
  // - Args: RC,EE,CF;XX
  case 'F': return CommandUnknown(packet);
  // Command: $g
  // - Desc: Read general registers
  // - Args: <None>
  case 'g': return CommandReadRegisters(packet);
  // Command: $G
  // - Desc: Write general registers
  // - Args: XX...
  case 'G': return CommandUnknown(packet);
  // Command: $H
  // - Desc: Sets thread for subsequent operations
  // - Args: op thread-id
  case 'H': return CommandThreadOp(packet);
  // Command: $i
  // - Desc: Step the remote target by a single clock cycle
  // - Args: [addr[,nnn]]
  case 'i': return CommandUnknown(packet);
  // Command: $I
  // - Desc: Signal, then cycle step
  // - Args: <None>
  case 'I': return CommandUnknown(packet);
  // Command: $k
  // - Desc: kill process
  case 'k': return CommandKill(packet);
  // Command: $m
  // - Desc: Read addressable memory
  // - Args: addr length
  case 'm':
  // Command: $M
  // - Desc: Write addressable memory
  // - Args: addr length
  case 'M': return CommandMemory(packet);
  // Command: $p
  // - Desc: Read the value of a register
  // - Args: index
  case 'p': return CommandReadReg(packet);
  // Command: $q
  // - Desc: General query fetching
  // - Args: Name params...
  case 'q':
  // Command: $Q
  // - Desc: General query setting
  // - Args: Name params...
  case 'Q': return CommandQuery(packet);
  // Command: $r
  // - Desc: Reset the entire system
  // - Args: <None>
  // - Deprecated: Use $R instead.
  case 'r': return CommandUnknown(packet);
  // Command: $R
  // - Desc: Restart the program beging debugged
  // - Args: XX
  case 'R': return CommandUnknown(packet);
  // Command: $s
  // - Desc: Single step
  // - Args: [addr]
  case 's': return CommandSingleStep(packet);
  // Command: $S
  // - Desc: Step with Signal
  // - Args: sig[;addr]
  // - Deprecated: See $vCont for multi-threaded support.
  case 'S': return CommandUnknown(packet);
  // Command: $t
  // - Desc: Search backwards started at address with pattern and mask.
  // - Args: addr:PP,MM
  case 't': return CommandUnknown(packet);
  // Command: $T
  // - Desc: Find out if the thread is alive
  // - Args: thread-id
  case 'T': return CommandQueryThreadAlive(packet);
  // Command: $v<Operation>
  // - Desc: Multi-letter command
  case 'v': return CommandMultiLetterV(packet);
  // Command: $X
  // - Desc: Write data to memory
  // - Args: addr,length:XX...
  case 'X': return CommandUnknown(packet);
  // Command: $z
  // - Desc: Insert a type of breakpoint or watchpoint
  // - Args: type,addr,kind
  case 'z':
  // Command: $Z
  // - Desc: Remove a type of breakpoint or watchpoint
  // - Args: type,addr,kind
  case 'Z': return CommandBreakpoint(packet);
  default: return {"", HandledPacketType::TYPE_UNKNOWN};
  }
}

void GdbServer::SendPacketPair(const HandledPacketType& response) {
  std::lock_guard lk(sendMutex);
  if (response.TypeResponse == HandledPacketType::TYPE_ACK || response.TypeResponse == HandledPacketType::TYPE_ONLYACK) {
    SendACK(*CommsSocket, false);
  } else if (response.TypeResponse == HandledPacketType::TYPE_NACK || response.TypeResponse == HandledPacketType::TYPE_ONLYNACK) {
    SendACK(*CommsSocket, true);
  }

  if (response.TypeResponse == HandledPacketType::TYPE_UNKNOWN) {
    SendPacket(*CommsSocket, "");
  } else if (response.TypeResponse != HandledPacketType::TYPE_ONLYNACK && response.TypeResponse != HandledPacketType::TYPE_ONLYACK &&
             response.TypeResponse != HandledPacketType::TYPE_NONE) {
    SendPacket(*CommsSocket, response.Response);
  }
}

std::pair<fextl::vector<std::byte>::iterator, bool>
GdbServer::MatchPacket(fextl::vector<std::byte>::iterator begin, fextl::vector<std::byte>::iterator end) {
  if (CommsBuffer.empty()) {
    return std::make_pair(begin, false);
  }
  switch ((char)CommsBuffer[0]) {
  case '+':
  case '-':
  case '\x03':
    // No further data
    return std::make_pair(std::next(begin), true);

  case '$': {
    // Message format: $packet-data#checksum, where checksum is a single byte.
    auto match = std::find(begin, end, (std::byte)'#');
    if (match == end) {
      // No match; fetch more data
      return std::make_pair(end, false);
    } else if (end - match <= 2) {
      // Found '#' but missing the checksum bytes
      return std::make_pair(match, false);
    } else {
      return std::make_pair(std::next(match, 3), true);
    }
    break;
  }

  default: ERROR_AND_DIE_FMT("Unexpected character at beginning of GDB packet: {}", CommsBuffer[0]);
  }
}

void GdbServer::HandlePacket(fasio::error ec, size_t BytesInMessage) {
  if (ec != fasio::error::success || BytesInMessage == 0) {
    ERROR_AND_DIE_FMT("Failed");
  }

  char c = (char)CommsBuffer[0];
  switch (c) {
  case '$': {
    auto packet = ReadPacket(std::span {CommsBuffer}.subspan(0, BytesInMessage));
    auto response = ProcessPacket(packet);
    SendPacketPair(response);
    if (response.TypeResponse == HandledPacketType::TYPE_UNKNOWN) {
      LogMan::Msg::DFmt("Unknown packet {}", packet);
    }
    break;
  }
  case '+':
    // ACK, do nothing.
    break;
  case '-':
    // NAK, Resend requested
    {
      std::lock_guard lk(sendMutex);
      SendPacket(*CommsSocket, {});
    }
    break;
  case '\x03': { // ASCII EOT
    SyscallHandler->TM.Pause();
    fextl::string str = fextl::fmt::format("T02thread:{:02x};", getpid());
    if (LibraryMapChanged) {
      // If libraries have changed then let gdb know
      str += "library:1;";
    }
    SendPacketPair({std::move(str), HandledPacketType::TYPE_ACK});
    break;
  }
  default: LogMan::Msg::DFmt("GdbServer: Unexpected byte {} ({:02x})", c, c);
  }

  CommsBuffer.erase(CommsBuffer.begin(), CommsBuffer.begin() + BytesInMessage);

  async_read_until(*CommsSocket, fasio::dynamic_vector_buffer {CommsBuffer}, std::bind_front(&GdbServer::MatchPacket, this),
                   std::bind_front(&GdbServer::HandlePacket, this));
}


void GdbServer::GdbServerLoop() {
  OpenListenSocket();
  if (!Acceptor) {
    // Couldn't open socket, just exit.
    return;
  }

  Acceptor->async_accept([this](fasio::error ec, std::optional<fasio::tcp_socket> Socket) {
    if (ec != fasio::error::success) {
      // Listen socket error or shutting down
      LogMan::Msg::EFmt("[GdbServer] gdbserver shutting down: {}");
      close(CommsSocket->FD);
      CommsSocket.reset();
      // Repeat to wait for another connection
      return fasio::post_callback::repeat;
    }

    CommsSocket.emplace(*std::move(Socket));

    // Receive packet data
    async_read_until(*CommsSocket, fasio::dynamic_vector_buffer {CommsBuffer}, std::bind_front(&GdbServer::MatchPacket, this),
                     std::bind_front(&GdbServer::HandlePacket, this));

    // Repeat to catch disconnect events
    return fasio::post_callback::repeat;
  });

  CommsBuffer.reserve(1000);

  // Enter event loop
  Reactor.run();

  // Shut down
  std::lock_guard lk(sendMutex);
  if (CommsSocket) {
    close(CommsSocket->FD);
    CommsSocket.reset();
  }

  CloseListenSocket();
}
static void* ThreadHandler(void* Arg) {
  HLE::ThreadManager::SetThreadName("FEX:gdbserver");
  auto This = reinterpret_cast<FEX::GdbServer*>(Arg);
  This->GdbServerLoop();
  return nullptr;
}

void GdbServer::StartThread() {
  uint64_t OldMask = HLE::ThreadManager::SetSignalMask(~0ULL);
  gdbServerThread = FEXCore::Threads::Thread::Create(ThreadHandler, this);
  HLE::ThreadManager::SetSignalMask(OldMask);
}

void GdbServer::OpenListenSocket() {
  const auto GdbUnixPath = fextl::fmt::format("{}/FEX_gdbserver/", FEXServerClient::GetTempFolder());
  if (FHU::Filesystem::CreateDirectory(GdbUnixPath) == FHU::Filesystem::CreateDirectoryResult::ERROR) {
    LogMan::Msg::EFmt("[GdbServer] Couldn't create gdbserver folder {}", GdbUnixPath);
    return;
  }

  GdbUnixSocketPath = fextl::fmt::format("{}{}-gdb", GdbUnixPath, ::getpid());

  for (int attempt = 0; attempt < 2; ++attempt) {
    Acceptor = fasio::tcp_acceptor::create(Reactor, false, GdbUnixSocketPath, 1);
    if (Acceptor) {
      break;
    }

    // This can happen periodically with execve. unlink the path and try again.
    // The PID is reused but FEX likely started a gdbserver thread for the PID before execve.
    unlink(GdbUnixSocketPath.c_str());
  }

  if (!Acceptor) {
    LogMan::Msg::EFmt("[GdbServer] Couldn't bind AF_UNIX socket '{}': {} {}\n", GdbUnixSocketPath, errno, strerror(errno));
    return;
  }

  LogMan::Msg::IFmt("[GdbServer] Waiting for connection on {}", GdbUnixSocketPath);
  LogMan::Msg::IFmt("[GdbServer] gdb-multiarch -ex \"set debug remote 1\" -ex \"target extended-remote {}\"", GdbUnixSocketPath);
}

void GdbServer::CloseListenSocket() {
  Acceptor.reset();
  unlink(GdbUnixSocketPath.c_str());
}

#endif
} // namespace FEX


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/GdbServer.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: glue|gdbserver
$end_info$
*/
#pragma once

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Utils/Event.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>

#include <Common/AsyncNet.h>

#include <atomic>
#include <mutex>
#include <stdint.h>

#include "LinuxSyscalls/SignalDelegator.h"

namespace FEX {

class GdbServer {
public:
  GdbServer(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* SignalDelegation, FEX::HLE::SyscallHandler* const SyscallHandler);
  ~GdbServer();

  // Public for threading
  void GdbServerLoop();

  void AlertLibrariesChanged() {
    LibraryMapChanged = true;
  }

private:
  void Break(FEXCore::Core::InternalThreadState* Thread, int signal);

  void OpenListenSocket();
  void CloseListenSocket();
  void StartThread();
  fextl::string ReadPacket(const std::span<std::byte>& stream);
  void SendPacket(fasio::tcp_socket&, const fextl::string& packet);

  void SendACK(fasio::tcp_socket&, bool NACK);

  Event ThreadBreakEvent {};
  void WaitForThreadWakeup();

  struct HandledPacketType {
    fextl::string Response {};
    enum ResponseType {
      TYPE_NONE,
      TYPE_UNKNOWN,
      TYPE_ACK,
      TYPE_NACK,
      TYPE_ONLYACK,
      TYPE_ONLYNACK,
    };
    ResponseType TypeResponse {};
  };

  void SendPacketPair(const HandledPacketType& packetPair);
  HandledPacketType ProcessPacket(const fextl::string& packet);
  HandledPacketType handleProgramOffsets();

  HandledPacketType ThreadAction(char action, uint32_t tid);

  // Binary data transfer handlers
  // XFer function to correctly encode any reply
  static fextl::string EncodeXferString(const fextl::string& data, int offset, int length) {
    if (offset == data.size()) {
      return "l";
    }
    if (offset >= data.size()) {
      return "E34"; // ERANGE
    }
    if ((data.size() - offset) > length) {
      return "m" + data.substr(offset, length);
    }
    return "l" + data.substr(offset);
  };

  HandledPacketType XferCommandExecFile(const fextl::string& annex, int offset, int length);
  HandledPacketType XferCommandFeatures(const fextl::string& annex, int offset, int length);
  HandledPacketType XferCommandThreads(const fextl::string& annex, int offset, int length);
  HandledPacketType XferCommandOSData(const fextl::string& annex, int offset, int length);
  HandledPacketType XferCommandLibraries(const fextl::string& annex, int offset, int length);
  HandledPacketType XferCommandAuxv(const fextl::string& annex, int offset, int length);
  HandledPacketType handleXfer(const fextl::string& packet);

  HandledPacketType HandlevFile(const fextl::string& packet);
  HandledPacketType HandlevCont(const fextl::string& packet);

  // Command handlers
  HandledPacketType CommandEnableExtendedMode(const fextl::string& packet);
  HandledPacketType CommandQueryHalted(const fextl::string& packet);
  HandledPacketType CommandContinue(const fextl::string& packet);
  HandledPacketType CommandDetach(const fextl::string& packet);
  HandledPacketType CommandReadRegisters(const fextl::string& packet);
  HandledPacketType CommandThreadOp(const fextl::string& packet);
  HandledPacketType CommandKill(const fextl::string& packet);
  HandledPacketType CommandMemory(const fextl::string& packet);
  HandledPacketType CommandReadReg(const fextl::string& packet);
  HandledPacketType CommandQuery(const fextl::string& packet);
  HandledPacketType CommandSingleStep(const fextl::string& packet);
  HandledPacketType CommandQueryThreadAlive(const fextl::string& packet);
  HandledPacketType CommandMultiLetterV(const fextl::string& packet);
  HandledPacketType CommandBreakpoint(const fextl::string& packet);
  HandledPacketType CommandUnknown(const fextl::string& packet);

  /**
   * @brief Returns the ThreadStateObject for the matching TID, or parent thread if TID isn't found
   *
   * @param TID Which TID to search for
   */
  const FEX::HLE::ThreadStateObject* FindThreadByTID(uint32_t TID);

  struct X80Float {
    uint8_t Data[10];
  };

  struct FEX_PACKED GDBContextDefinition {
    uint64_t gregs[FEXCore::Core::CPUState::NUM_GPRS];
    uint64_t rip;
    uint32_t eflags;
    uint32_t cs, ss, ds, es, fs, gs;
    X80Float mm[FEXCore::Core::CPUState::NUM_MMS];
    uint32_t fctrl;
    uint32_t fstat;
    uint32_t dummies[6];
    uint64_t xmm[FEXCore::Core::CPUState::NUM_XMMS][4];
    uint32_t mxcsr;
  };

  GDBContextDefinition GenerateContextDefinition(const FEX::HLE::ThreadStateObject* ThreadObject);

  FEXCore::Context::Context* CTX;
  FEX::HLE::SyscallHandler* const SyscallHandler;
  FEX::HLE::SignalDelegator* SignalDelegation;
  fextl::unique_ptr<FEXCore::Threads::Thread> gdbServerThread;
  fasio::poll_reactor Reactor;
  std::optional<fasio::tcp_acceptor> Acceptor;
  std::optional<fasio::tcp_socket> CommsSocket;
  fextl::vector<std::byte> CommsBuffer;

  std::pair<fextl::vector<std::byte>::iterator, bool> MatchPacket(fextl::vector<std::byte>::iterator begin, fextl::vector<std::byte>::iterator end);
  void HandlePacket(fasio::error ec, size_t BytesInMessage);

  std::mutex sendMutex;
  bool SettingNoAckMode {false};
  bool NoAckMode {false};
  bool NonStopMode {false};
  fextl::string ThreadString {};
  fextl::string OSDataString {};
  void buildLibraryMap();
  std::atomic<bool> LibraryMapChanged = true;
  fextl::string LibraryMapString {};

  // Used to keep track of which signals to pass to the guest
  std::array<bool, FEX::HLE::SignalDelegator::MAX_SIGNALS + 1> PassSignals {};
  uint32_t CurrentDebuggingThread {};
  fextl::string GdbUnixSocketPath {};
  FEX_CONFIG_OPT(Filename, APP_FILENAME);
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
};

} // namespace FEX


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.cpp
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/Syscalls.h"

#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXHeaderUtils/Syscalls.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>

#include <bitset>
#include <linux/mman.h>
#include <unistd.h>
#include <sys/user.h>
#include <sys/mman.h>
#include <sys/shm.h>

#ifndef MREMAP_DONTUNMAP
#define MREMAP_DONTUNMAP 4
#endif

namespace FEX::HLE {
class MemAllocator32Bit final : public FEX::HLE::MemAllocator {
private:
  static constexpr uint64_t BASE_KEY = 16;
  const uint64_t TOP_KEY = 0xFFFF'F000ULL >> FEXCore::Utils::FEX_PAGE_SHIFT;
  const uint64_t TOP_KEY32BIT = 0x7FFF'F000ULL >> FEXCore::Utils::FEX_PAGE_SHIFT;

public:
  MemAllocator32Bit() {
    // First 16 pages are taken by the Linux kernel
    for (size_t i = 0; i < 16; ++i) {
      MappedPages.set(i);
    }
    // Take the top page as well
    MappedPages.set(TOP_KEY);
    if (SearchDown) {
      LastScanLocation = TOP_KEY;
      LastKeyLocation = TOP_KEY;
      LastKeyLocation32Bit = TOP_KEY32BIT;
      FindPageRangePtr = &MemAllocator32Bit::FindPageRange_TopDown;
    } else {
      LastScanLocation = BASE_KEY;
      LastKeyLocation = BASE_KEY;
      FindPageRangePtr = &MemAllocator32Bit::FindPageRange;
    }
  }

  void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override;
  int Munmap(void* addr, size_t length) override;
  void* Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) override;
  uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) override;
  uint64_t Shmdt(const void* shmaddr) override;
  static constexpr bool SearchDown = true;

  // PageAddr is a page already shifted to page index
  // PagesLength is the number of pages
  void SetUsedPages(uint64_t PageAddr, size_t PagesLength) {
    // Set the range as mapped
    for (size_t i = 0; i < PagesLength; ++i) {
      MappedPages.set(PageAddr + i);
    }
  }

  // PageAddr is a page already shifted to page index
  // PagesLength is the number of pages
  void SetFreePages(uint64_t PageAddr, size_t PagesLength) {
    // Set the range as unused
    for (size_t i = 0; i < PagesLength; ++i) {
      MappedPages.reset(PageAddr + i);
    }
  }

private:
  // Set that contains 4k mapped pages
  // This is the full 32bit memory range
  std::bitset<0x10'0000> MappedPages;
  fextl::map<uint32_t, int> PageToShm {};
  uint64_t LastScanLocation {};
  uint64_t LastKeyLocation {};
  uint64_t LastKeyLocation32Bit {};
  std::mutex AllocMutex {};
  uint64_t FindPageRange(uint64_t Start, size_t Pages) const;
  uint64_t FindPageRange_TopDown(uint64_t Start, size_t Pages) const;
  using FindHandler = uint64_t (MemAllocator32Bit::*)(uint64_t Start, size_t Pages) const;
  FindHandler FindPageRangePtr {};
};

uint64_t MemAllocator32Bit::FindPageRange(uint64_t Start, size_t Pages) const {
  // Linear range scan
  while (Start != TOP_KEY) {
    bool Free = true;
    if ((Start + Pages) > TOP_KEY) {
      return 0;
    }
    uint64_t Offset = 0;
    for (; Offset < Pages; ++Offset) {
      if (MappedPages.test(Start + Offset)) {
        Free = false;
        break;
      }
    }

    if (Free) {
      return Start;
    }
    Start += Offset + 1;
  }

  return 0;
}

uint64_t MemAllocator32Bit::FindPageRange_TopDown(uint64_t Start, size_t Pages) const {
  // Linear range scan
  while (Start >= BASE_KEY && Start <= TOP_KEY) {
    bool Free = true;

    uint64_t Offset = 0;
    for (; Offset < Pages; ++Offset) {
      if (MappedPages.test(Start - Offset)) {
        Free = false;
        break;
      }
    }

    if (Free) {
      return Start - Offset;
    }
    Start -= Offset + 1;
  }

  return 0;
}

void* MemAllocator32Bit::Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) {
  std::scoped_lock<std::mutex> lk {AllocMutex};
  size_t PagesLength = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;

  uintptr_t Addr = reinterpret_cast<uintptr_t>(addr);
  uintptr_t PageAddr = Addr >> FEXCore::Utils::FEX_PAGE_SHIFT;

  // Define MAP_FIXED_NOREPLACE ourselves to ensure we always parse this flag
  constexpr int FEX_MAP_FIXED_NOREPLACE = 0x100000;
  bool Fixed = ((flags & MAP_FIXED) || (flags & FEX_MAP_FIXED_NOREPLACE));

  // Both Addr and length must be page aligned
  if (Addr & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return reinterpret_cast<void*>(-EINVAL);
  }

  // If we do have an fd then offset must be page aligned
  if (fd != -1 && offset & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return reinterpret_cast<void*>(-EINVAL);
  }

  if (Addr + length > std::numeric_limits<uint32_t>::max()) {
    return reinterpret_cast<void*>(-EOVERFLOW);
  }

  // Check reserved range
  if (Fixed && PageAddr < 16) {
    return reinterpret_cast<void*>(-EINVAL);
  }

  if (!Fixed) {
    // If we aren't mapping fixed the ignore the address input
    Addr = 0;
    PageAddr = 0;
  }

  bool Map32Bit = flags & FEX::HLE::X86_64_MAP_32BIT;

  // Remove the MAP_32BIT flag if it exists now
  flags &= ~FEX::HLE::X86_64_MAP_32BIT;

  auto AllocateNoHint = [&]() -> void* {
    bool Wrapped = false;
    uint64_t BottomPage = Map32Bit && (LastScanLocation >= LastKeyLocation32Bit) ? LastKeyLocation32Bit : LastScanLocation;
restart: {
  // Linear range scan
  uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength);
  if (LowerPage == 0) {
    // Try again but this time from the start
    BottomPage = Map32Bit ? LastKeyLocation32Bit : LastKeyLocation;
    LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength);
  }

  uint64_t UpperPage = LowerPage + PagesLength;
  if (LowerPage == 0) {
    return reinterpret_cast<void*>(-ENOMEM);
  }
  {
    // Try and map the range
    void* MappedPtr =
      ::mmap(reinterpret_cast<void*>(LowerPage << FEXCore::Utils::FEX_PAGE_SHIFT), length, prot, flags | FEX_MAP_FIXED_NOREPLACE, fd, offset);

    if (MappedPtr == MAP_FAILED && errno != EEXIST) {
      return reinterpret_cast<void*>(-errno);
    } else if (MappedPtr == MAP_FAILED || MappedPtr >= reinterpret_cast<void*>(TOP_KEY << FEXCore::Utils::FEX_PAGE_SHIFT)) {
      // Handles the case where MAP_FIXED_NOREPLACE failed with MAP_FAILED
      // or if the host system's kernel isn't new enough then it returns the wrong pointer
      if (MappedPtr != MAP_FAILED && MappedPtr >= reinterpret_cast<void*>(TOP_KEY << FEXCore::Utils::FEX_PAGE_SHIFT)) {
        // Make sure to munmap this so we don't leak memory
        ::munmap(MappedPtr, length);
      }

      if (UpperPage == TOP_KEY) {
        BottomPage = BASE_KEY;
        Wrapped = true;
        goto restart;
      } else if (Wrapped && LowerPage >= LastScanLocation) {
        // We linear scanned the entire memory range. Give up
        return (void*)(uintptr_t)-errno;
      } else {
        // Try again
        if (SearchDown) {
          --BottomPage;
        } else {
          ++BottomPage;
        }
        goto restart;
      }
    } else {
      if (SearchDown) {
        LastScanLocation = LowerPage;
      } else {
        LastScanLocation = UpperPage;
      }
      SetUsedPages(LowerPage, PagesLength);
      return MappedPtr;
    }
  }
}
  };

  // Find a region that fits our address
  if (Addr == 0) {
    return AllocateNoHint();
  } else {
    void* MappedPtr = ::mmap(reinterpret_cast<void*>(PageAddr << FEXCore::Utils::FEX_PAGE_SHIFT),
                             PagesLength << FEXCore::Utils::FEX_PAGE_SHIFT, prot, flags, fd, offset);

    if (MappedPtr >= reinterpret_cast<void*>(TOP_KEY << FEXCore::Utils::FEX_PAGE_SHIFT) && (flags & FEX_MAP_FIXED_NOREPLACE)) {
      // Handles the case where MAP_FIXED_NOREPLACE isn't handled by the host system's
      // kernel and returns the wrong pointer
      // Make sure to munmap this so we don't leak memory
      ::munmap(MappedPtr, length);
      return reinterpret_cast<void*>(-EEXIST);
    } else if (MappedPtr != MAP_FAILED) {
      SetUsedPages(PageAddr, PagesLength);
      return MappedPtr;
    } else {
      return reinterpret_cast<void*>(-errno);
    }
  }
  return 0;
}

int MemAllocator32Bit::Munmap(void* addr, size_t length) {
  std::scoped_lock<std::mutex> lk {AllocMutex};
  size_t PagesLength = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;

  uintptr_t Addr = reinterpret_cast<uintptr_t>(addr);
  uintptr_t PageAddr = Addr >> FEXCore::Utils::FEX_PAGE_SHIFT;

  uintptr_t PageEnd = PageAddr + PagesLength;

  // Both Addr and length must be page aligned
  if (Addr & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return -EINVAL;
  }

  if (length & ~FEXCore::Utils::FEX_PAGE_MASK) {
    return -EINVAL;
  }

  if (Addr + length > std::numeric_limits<uint32_t>::max()) {
    return -EOVERFLOW;
  }

  // Check reserved range
  if (PageAddr < 16) {
    // Return success for these
    return 0;
  }

  while (PageAddr != PageEnd) {
    // Always pass to munmap, it may be something allocated we aren't tracking
    int Result = ::munmap(reinterpret_cast<void*>(PageAddr << FEXCore::Utils::FEX_PAGE_SHIFT), FEXCore::Utils::FEX_PAGE_SIZE);
    if (Result != 0) {
      return -errno;
    }

    if (MappedPages.test(PageAddr)) {
      MappedPages.reset(PageAddr);
    }

    ++PageAddr;
  }

  return 0;
}

void* MemAllocator32Bit::Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) {
  size_t OldPagesLength = FEXCore::AlignUp(old_size, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
  size_t NewPagesLength = FEXCore::AlignUp(new_size, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;

  {
    std::scoped_lock<std::mutex> lk {AllocMutex};
    if (flags & MREMAP_FIXED) {
      void* MappedPtr = ::mremap(old_address, old_size, new_size, flags, new_address);

      if (MappedPtr != MAP_FAILED) {
        if (!(flags & MREMAP_DONTUNMAP)) {
          // Unmap the old location
          uintptr_t OldAddr = reinterpret_cast<uintptr_t>(old_address);
          SetFreePages(OldAddr >> FEXCore::Utils::FEX_PAGE_SHIFT, OldPagesLength);
        }

        // Map the new pages
        uintptr_t NewAddr = reinterpret_cast<uintptr_t>(MappedPtr);
        SetUsedPages(NewAddr >> FEXCore::Utils::FEX_PAGE_SHIFT, NewPagesLength);
      } else {
        return reinterpret_cast<void*>(-errno);
      }
    } else {
      uintptr_t OldAddr = reinterpret_cast<uintptr_t>(old_address);
      uintptr_t OldPageAddr = OldAddr >> FEXCore::Utils::FEX_PAGE_SHIFT;

      if (NewPagesLength < OldPagesLength) {
        void* MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE);

        if (MappedPtr != MAP_FAILED) {
          // Clear the pages that we just shrunk
          size_t NewPagesLength = FEXCore::AlignUp(new_size, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
          uintptr_t NewPageAddr = reinterpret_cast<uintptr_t>(MappedPtr) >> FEXCore::Utils::FEX_PAGE_SHIFT;
          SetFreePages(NewPageAddr + NewPagesLength, OldPagesLength - NewPagesLength);
          return MappedPtr;
        } else {
          return reinterpret_cast<void*>(-errno);
        }
      } else {
        // Scan the region forward from our first region's endd to see if it can be extended
        bool CanExtend {true};

        for (size_t i = OldPagesLength; i < NewPagesLength; ++i) {
          if (MappedPages[OldPageAddr + i]) {
            CanExtend = false;
            break;
          }
        }

        if (CanExtend) {
          void* MappedPtr = ::mremap(old_address, old_size, new_size, flags & ~MREMAP_MAYMOVE);

          if (MappedPtr != MAP_FAILED) {
            // Map the new pages
            size_t NewPagesLength = FEXCore::AlignUp(new_size, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
            uintptr_t NewAddr = reinterpret_cast<uintptr_t>(MappedPtr);
            SetUsedPages(NewAddr >> FEXCore::Utils::FEX_PAGE_SHIFT, NewPagesLength);
            return MappedPtr;
          } else if (!(flags & MREMAP_MAYMOVE)) {
            // We have one more chance if MAYMOVE is specified
            return reinterpret_cast<void*>(-errno);
          }
        }
      }
    }
  }

  // Flags can not contain MREMAP_FIXED at this point
  // Flags might contain MREMAP_MAYMOVE and/or MREMAP_DONTUNMAP
  // New Size is >= old size

  // First, try and allocate a region the size of the new size
  void* MappedPtr = this->Mmap(nullptr, new_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  std::scoped_lock<std::mutex> lk {AllocMutex};
  if (FEX::HLE::HasSyscallError(MappedPtr)) {
    // Couldn't find a region that fit our space
    return MappedPtr;
  }

  // Good news, we found a region
  // This will overwrite the previous mmap if it succeeds
  MappedPtr = ::mremap(old_address, old_size, new_size, flags | MREMAP_FIXED | MREMAP_MAYMOVE, MappedPtr);

  if (MappedPtr != MAP_FAILED) {
    if (!(flags & MREMAP_DONTUNMAP) && MappedPtr != old_address) {
      // If we have both MREMAP_DONTUNMAP not set and the new pointer is at a new location
      // Make sure to clear the old mapping
      uintptr_t OldAddr = reinterpret_cast<uintptr_t>(old_address);
      SetFreePages(OldAddr >> FEXCore::Utils::FEX_PAGE_SHIFT, OldPagesLength);
    }

    // Map the new pages
    size_t NewPagesLength = FEXCore::AlignUp(new_size, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
    uintptr_t NewAddr = reinterpret_cast<uintptr_t>(MappedPtr);
    SetUsedPages(NewAddr >> FEXCore::Utils::FEX_PAGE_SHIFT, NewPagesLength);
    return MappedPtr;
  }

  // Failed
  return reinterpret_cast<void*>(-errno);
}

uint64_t MemAllocator32Bit::Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) {
  std::scoped_lock<std::mutex> lk {AllocMutex};

  if (shmaddr != nullptr) {
    // shmaddr must be valid
    uint64_t Result = reinterpret_cast<uint64_t>(::shmat(shmid, shmaddr, shmflg));
    if (Result != -1) {
      uint32_t SmallRet = Result >> 32;
      if (!(SmallRet == 0 || SmallRet == ~0U)) {
        LOGMAN_MSG_A_FMT("Syscall returning something with data in the upper 32bits! BUG!");
        return -ENOMEM;
      }

      uintptr_t NewAddr = reinterpret_cast<uintptr_t>(Result);
      uintptr_t NewPageAddr = NewAddr >> FEXCore::Utils::FEX_PAGE_SHIFT;

      // Add to the map
      PageToShm[NewPageAddr] = shmid;

      *ResultAddress = Result;

      // We must get the shm size and track it
      struct shmid_ds buf {};

      if (shmctl(shmid, IPC_STAT, &buf) == 0) {
        // Map the new pages
        size_t NewPagesLength = buf.shm_segsz >> FEXCore::Utils::FEX_PAGE_SHIFT;
        SetUsedPages(NewPageAddr, NewPagesLength);
      }

      // Zero on working result
      Result = 0;
    } else {
      Result = -errno;
    }
    return Result;
  } else {
    // We must get the shm size and track it
    struct shmid_ds buf {};
    uint64_t PagesLength {};

    if (shmctl(shmid, IPC_STAT, &buf) == 0) {
      PagesLength = FEXCore::AlignUp(buf.shm_segsz, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
    } else {
      return -EINVAL;
    }

    bool Wrapped = false;
    uint64_t BottomPage = LastScanLocation;
restart: {
  // Linear range scan
  uint64_t LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength);
  if (LowerPage == 0) {
    // Try again but this time from the start
    BottomPage = LastKeyLocation;
    LowerPage = (this->*FindPageRangePtr)(BottomPage, PagesLength);
  }

  uint64_t UpperPage = LowerPage + PagesLength;
  if (LowerPage == 0) {
    return -ENOMEM;
  }
  {
    // Try and map the range
    void* MappedPtr = ::shmat(shmid, reinterpret_cast<const void*>(LowerPage << FEXCore::Utils::FEX_PAGE_SHIFT), shmflg);

    if (MappedPtr == MAP_FAILED) {
      if (UpperPage == TOP_KEY) {
        BottomPage = LastKeyLocation;
        Wrapped = true;
        goto restart;
      } else if (Wrapped && LowerPage >= LastScanLocation) {
        // We linear scanned the entire memory range. Give up
        return -errno;
      } else {
        // Try again
        BottomPage += PagesLength;
        goto restart;
      }
    } else {
      if (SearchDown) {
        LastScanLocation = LowerPage;
      } else {
        LastScanLocation = UpperPage;
      }
      // Set the range as mapped
      SetUsedPages(LowerPage, PagesLength);

      *ResultAddress = reinterpret_cast<uint64_t>(MappedPtr);

      // Add to the map
      PageToShm[LowerPage] = shmid;

      // Zero on working result
      return 0;
    }
  }
}
  }
}
uint64_t MemAllocator32Bit::Shmdt(const void* shmaddr) {
  std::scoped_lock<std::mutex> lk {AllocMutex};

  uint32_t AddrPage = reinterpret_cast<uint64_t>(shmaddr) >> FEXCore::Utils::FEX_PAGE_SHIFT;
  auto it = PageToShm.find(AddrPage);

  if (it == PageToShm.end()) {
    // Page wasn't mapped
    return -EINVAL;
  }

  int shmid = it->second;
  struct shmid_ds buf {};
  if (shmctl(shmid, IPC_STAT, &buf) == 0) {
    size_t PagesLength = FEXCore::AlignUp(buf.shm_segsz, FEXCore::Utils::FEX_PAGE_SIZE) >> FEXCore::Utils::FEX_PAGE_SHIFT;
    SetFreePages(AddrPage, PagesLength);
  } else {
    LOGMAN_MSG_A_FMT("Failed to get shm size during shmdt");
  }

  uint64_t Result = ::shmdt(shmaddr);

  if (Result == 0) {
    PageToShm.erase(it);
  }

  SYSCALL_ERRNO();
}

class MemAllocatorPassThrough final : public FEX::HLE::MemAllocator {
public:
  void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) override {
    uint64_t Result = (uint64_t)::mmap(addr, length, prot, flags, fd, offset);
    if (Result == ~0ULL) {
      return reinterpret_cast<void*>(-errno);
    }
    return reinterpret_cast<void*>(Result);
  }

  int Munmap(void* addr, size_t length) override {
    uint64_t Result = (uint64_t)::munmap(addr, length);
    SYSCALL_ERRNO();
  }

  void* Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) override {
    uint64_t Result = (uint64_t)::mremap(old_address, old_size, new_size, flags, new_address);
    if (Result == ~0ULL) {
      return reinterpret_cast<void*>(-errno);
    }
    return reinterpret_cast<void*>(Result);
  }

  uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) override {
    uint64_t Result = (uint64_t)::shmat(shmid, reinterpret_cast<const void*>(shmaddr), shmflg);
    if (Result != ~0ULL) {
      *ResultAddress = Result;
      Result = 0;
    }
    SYSCALL_ERRNO();
  }

  uint64_t Shmdt(const void* shmaddr) override {
    uint64_t Result = ::shmdt(shmaddr);
    SYSCALL_ERRNO();
  }
};

fextl::unique_ptr<FEX::HLE::MemAllocator> Create32BitAllocator() {
  return fextl::make_unique<MemAllocator32Bit>();
}

fextl::unique_ptr<FEX::HLE::MemAllocator> CreatePassthroughAllocator() {
  return fextl::make_unique<MemAllocatorPassThrough>();
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/LinuxAllocator.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/memory.h>

#include <cstdint>
#include <memory>

namespace FEX::HLE {
constexpr uint32_t X86_64_MAP_32BIT = 0x40;

class MemAllocator {
public:
  virtual ~MemAllocator() = default;
  virtual void* Mmap(void* addr, size_t length, int prot, int flags, int fd, off_t offset) = 0;
  virtual int Munmap(void* addr, size_t length) = 0;
  virtual void* Mremap(void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) = 0;
  virtual uint64_t Shmat(int shmid, const void* shmaddr, int shmflg, uint32_t* ResultAddress) = 0;
  virtual uint64_t Shmdt(const void* shmaddr) = 0;
};

fextl::unique_ptr<FEX::HLE::MemAllocator> Create32BitAllocator();
fextl::unique_ptr<FEX::HLE::MemAllocator> CreatePassthroughAllocator();
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Seccomp/BPFEmitter.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Seccomp/BPFEmitter.h"

#include <FEXCore/Utils/AllocatorHooks.h>
#include <FEXCore/Utils/LogManager.h>

#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

namespace FEX::HLE {

#define EMIT_INST(x)               \
  do {                             \
    if constexpr (CalculateSize) { \
      OpSize += 4;                 \
    } else {                       \
      x;                           \
    }                              \
  } while (0)

#define RETURN_ERROR(x)                                                                \
  if constexpr (CalculateSize) {                                                       \
    return ~0ULL;                                                                      \
  } else {                                                                             \
    static_assert(x == -EINVAL, "Early return error evaluation only supports EINVAL"); \
    return x;                                                                          \
  }

#define RETURN_SUCCESS()           \
  do {                             \
    if constexpr (CalculateSize) { \
      return OpSize;               \
    } else {                       \
      return 0;                    \
    }                              \
  } while (0)

#define VALIDATE(cond)      \
  do {                      \
    if (!(cond)) {          \
      RETURN_ERROR(-EINVAL) \
    }                       \
  } while (0)

using SizeErrorCheck = decltype([](uint64_t Result) -> bool { return Result == ~0ULL; });
using EmissionErrorCheck = decltype([](uint64_t Result) { return Result != 0; });

// Register selection comes from function signature.
constexpr auto REG_A = ARMEmitter::WReg::w0;
constexpr auto REG_X = ARMEmitter::WReg::w1;
constexpr auto REG_TMP = ARMEmitter::WReg::w2;
constexpr auto REG_TMP2 = ARMEmitter::WReg::w3;
constexpr auto REG_SECCOMP_DATA = ARMEmitter::XReg::x4;

template<bool CalculateSize>
uint64_t BPFEmitter::HandleLoad(uint32_t BPFIP, const sock_filter* Inst) {
  VALIDATE(BPF_SIZE(Inst->code) == BPF_W);
  [[maybe_unused]] size_t OpSize {};

  const auto DestReg = BPF_CLASS(Inst->code) == BPF_LD ? REG_A : REG_X;

  switch (BPF_MODE(Inst->code)) {
  case BPF_IMM: {
    auto Const = ConstPool.try_emplace(Inst->k, ARMEmitter::ForwardLabel {});
    EMIT_INST(ldr(DestReg, &Const.first->second));
    break;
  }
  case BPF_ABS: {
    // ABS has some restrictions
    // - Must be 4-byte aligned
    // - Must be less than the size of seccomp_data
    const auto Offset = Inst->k;

    // Need to be 4-byte aligned.
    VALIDATE((Offset & 0b11) == 0);
    // Ensure accessing inside of seccomp_data.
    VALIDATE(Offset < sizeof(seccomp_data));

    EMIT_INST(ldr(DestReg, REG_SECCOMP_DATA, Offset));
    break;
  }
  case BPF_MEM:
    // Must be smaller than scratch space size.
    VALIDATE(Inst->k < 16);

    EMIT_INST(ldr(DestReg, REG_SECCOMP_DATA, ARRAY_OFFSETOF(WorkingBuffer, ScratchMemory, Inst->k)));
    break;
  case BPF_LEN:
    // Just returns the length of seccomp_data.
    EMIT_INST(movz(DestReg, sizeof(seccomp_data)));
    break;
  case BPF_IND:
  case BPF_MSH:
  default: RETURN_ERROR(-EINVAL); // Unsupported
  }

  RETURN_SUCCESS();
}

template<bool CalculateSize>
uint64_t BPFEmitter::HandleStore(uint32_t BPFIP, const sock_filter* Inst) {
  VALIDATE(BPF_SIZE(Inst->code) == BPF_W);

  [[maybe_unused]] size_t OpSize {};

  const auto SrcReg = BPF_CLASS(Inst->code) == BPF_LD ? REG_A : REG_X;
  // Must be smaller than scratch space size.
  VALIDATE(Inst->k < 16);

  EMIT_INST(str(SrcReg, REG_SECCOMP_DATA, ARRAY_OFFSETOF(WorkingBuffer, ScratchMemory, Inst->k)));

  RETURN_SUCCESS();
}

template<bool CalculateSize>
uint64_t BPFEmitter::HandleALU(uint32_t BPFIP, const sock_filter* Inst) {
  [[maybe_unused]] size_t OpSize {};
  const auto SrcType = BPF_SRC(Inst->code);
  const auto Op = BPF_OP(Inst->code);

  switch (Op) {
  case BPF_ADD:
  case BPF_SUB:
  case BPF_MUL:
  case BPF_DIV:
  case BPF_OR:
  case BPF_AND:
  case BPF_LSH:
  case BPF_RSH:
  case BPF_MOD:
  case BPF_XOR: {
    auto SrcReg = REG_X;
    if (SrcType == BPF_K) {
      SrcReg = REG_TMP;
      auto Const = ConstPool.try_emplace(Inst->k, ARMEmitter::ForwardLabel {});
      EMIT_INST(ldr(SrcReg, &Const.first->second));
    }

    switch (Op) {
    case BPF_ADD: EMIT_INST(add(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_SUB: EMIT_INST(sub(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_MUL: EMIT_INST(mul(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_DIV:
      // Specifically unsigned.
      EMIT_INST(udiv(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg));
      break;
    case BPF_OR: EMIT_INST(orr(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_AND: EMIT_INST(and_(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_LSH: EMIT_INST(lslv(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_RSH: EMIT_INST(lsrv(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    case BPF_MOD:
      // Specifically unsigned.
      EMIT_INST(udiv(ARMEmitter::Size::i32Bit, REG_TMP2, REG_A, SrcReg));
      EMIT_INST(msub(ARMEmitter::Size::i32Bit, REG_A, REG_TMP2, SrcReg, REG_A));
      break;
    case BPF_XOR: EMIT_INST(eor(ARMEmitter::Size::i32Bit, REG_A, REG_A, SrcReg)); break;
    default: RETURN_ERROR(-EINVAL);
    }

    break;
  }
  case BPF_NEG:
    // Only BPF_K supported on NEG.
    VALIDATE(SrcType == BPF_K);

    EMIT_INST(neg(ARMEmitter::Size::i32Bit, REG_A, REG_A));
    break;

  default: RETURN_ERROR(-EINVAL);
  }

  RETURN_SUCCESS();
}

template<bool CalculateSize>
uint64_t BPFEmitter::HandleJmp(uint32_t BPFIP, uint32_t NumInst, const sock_filter* Inst) {
  [[maybe_unused]] size_t OpSize {};
  const auto SrcType = BPF_SRC(Inst->code);
  const auto Op = BPF_OP(Inst->code);

  switch (Op) {
  case BPF_JA: {
    // Only BPF_K supported on JA.
    VALIDATE(SrcType == BPF_K);

    // BPF IP register is effectively only 32-bit. Treat k constant like a signed integer.
    // This allows it to jump anywhere in the program.
    // But! Loops are EXPLICITLY disallowed inside of BPF programs.
    // This is to prevent DOS style attacks through BPF programs.
    uint64_t Target = BPFIP + Inst->k + 1;
    // Must not jump past the end.
    VALIDATE(Target < NumInst);

    JumpLabelIterator TargetLabel {};

    if constexpr (!CalculateSize) {
      TargetLabel = JumpLabels.try_emplace(Target, ARMEmitter::ForwardLabel {}).first;
    }

    EMIT_INST((void)b(&TargetLabel->second));
    break;
  }
  case BPF_JEQ:
  case BPF_JGT:
  case BPF_JGE:
  case BPF_JSET: {
    auto CompareSrcReg = REG_X;
    if (SrcType == BPF_K) {
      CompareSrcReg = REG_TMP;
      auto Const = ConstPool.try_emplace(Inst->k, ARMEmitter::ForwardLabel {});
      EMIT_INST(ldr(CompareSrcReg, &Const.first->second));
    }
    uint32_t TargetTrue = BPFIP + Inst->jt + 1;
    uint32_t TargetFalse = BPFIP + Inst->jf + 1;

    // Must not jump past the end.
    VALIDATE(TargetTrue < NumInst && TargetFalse < NumInst);

    ARMEmitter::Condition CompareResultOp;
    if (Op == BPF_JEQ) {
      CompareResultOp = ARMEmitter::Condition::CC_EQ;
      EMIT_INST(cmp(ARMEmitter::Size::i32Bit, REG_A, CompareSrcReg));
    } else if (Op == BPF_JGT) {
      CompareResultOp = ARMEmitter::Condition::CC_HI;
      EMIT_INST(cmp(ARMEmitter::Size::i32Bit, REG_A, CompareSrcReg));
    } else if (Op == BPF_JGE) {
      CompareResultOp = ARMEmitter::Condition::CC_HS;
      EMIT_INST(cmp(ARMEmitter::Size::i32Bit, REG_A, CompareSrcReg));
    } else if (Op == BPF_JSET) {
      CompareResultOp = ARMEmitter::Condition::CC_NE;
      EMIT_INST(tst(ARMEmitter::Size::i32Bit, REG_A, CompareSrcReg));
    } else {
      RETURN_ERROR(-EINVAL);
    }

    JumpLabelIterator TargetTrueLabel {};
    JumpLabelIterator TargetFalseLabel {};

    if constexpr (!CalculateSize) {
      TargetTrueLabel = JumpLabels.try_emplace(TargetTrue, ARMEmitter::ForwardLabel {}).first;
      TargetFalseLabel = JumpLabels.try_emplace(TargetFalse, ARMEmitter::ForwardLabel {}).first;
    }

    EMIT_INST((void)b(CompareResultOp, &TargetTrueLabel->second));
    EMIT_INST((void)b(&TargetFalseLabel->second));
    break;
  }
  default: RETURN_ERROR(-EINVAL); // Unknown jump type
  }

  RETURN_SUCCESS();
}

template<bool CalculateSize>
uint64_t BPFEmitter::HandleRet(uint32_t BPFIP, const sock_filter* Inst) {
  [[maybe_unused]] size_t OpSize {};
  const auto RValSrc = BPF_RVAL(Inst->code);
  switch (RValSrc) {
  case BPF_K: {
    auto Const = ConstPool.try_emplace(Inst->k, ARMEmitter::ForwardLabel {});
    EMIT_INST(ldr(ARMEmitter::WReg::w0, &Const.first->second));
    break;
  }
  case BPF_X: EMIT_INST(mov(ARMEmitter::WReg::w0, REG_X)); break;
  case BPF_A:
    // w0 is already REG_A
    static_assert(REG_A == ARMEmitter::WReg::w0, "This is expected to be the same");
    break;
  default: RETURN_ERROR(-EINVAL);
  }

  EMIT_INST(ret());

  RETURN_SUCCESS();
}

template<bool CalculateSize>
uint64_t BPFEmitter::HandleMisc(uint32_t BPFIP, const sock_filter* Inst) {
  [[maybe_unused]] size_t OpSize {};
  const auto MiscOp = BPF_MISCOP(Inst->code);
  switch (MiscOp) {
  case BPF_TAX: EMIT_INST(mov(REG_X, REG_A)); break;
  case BPF_TXA: EMIT_INST(mov(REG_A, REG_X)); break;
  default: RETURN_ERROR(-EINVAL) // Unsupported misc operation.
  }

  RETURN_SUCCESS();
}

template<bool CalculateSize, class Pred>
uint64_t BPFEmitter::HandleEmission(uint32_t flags, const sock_fprog* prog) {
  constexpr Pred PredFunc;
  uint64_t CalculatedSize {};

  for (uint32_t i = 0; i < prog->len; ++i) {
    if constexpr (!CalculateSize) {
      auto jump_label = JumpLabels.find(i);
      if (jump_label != JumpLabels.end()) {
        (void)Bind(&jump_label->second);
      }
    }

    bool HadError {};
    uint64_t Result {};

    const sock_filter* Inst = &prog->filter[i];
    const uint16_t Code = Inst->code;
    const uint16_t Class = BPF_CLASS(Code);
    switch (Class) {
    case BPF_LD:
    case BPF_LDX: {
      Result = HandleLoad<CalculateSize>(i, Inst);
      break;
    }
    case BPF_ST:
    case BPF_STX: {
      Result = HandleStore<CalculateSize>(i, Inst);
      break;
    }
    case BPF_ALU: {
      Result = HandleALU<CalculateSize>(i, Inst);
      break;
    }
    case BPF_JMP: {
      Result = HandleJmp<CalculateSize>(i, prog->len, Inst);
      break;
    }
    case BPF_RET: {
      Result = HandleRet<CalculateSize>(i, Inst);
      break;
    }
    case BPF_MISC: {
      Result = HandleMisc<CalculateSize>(i, Inst);
      break;
    }
    default:
      // We handle all instruction classes.
      FEX_UNREACHABLE;
    }

    HadError = PredFunc(Result);

    if (HadError) {
      if constexpr (!CalculateSize) {
        // Had error, early return and free the memory.
        FEXCore::Allocator::munmap(GetBufferBase(), FuncSize);
      }
      return Result;
    }

    if constexpr (CalculateSize) {
      CalculatedSize += Result;
    }
  }

  if constexpr (CalculateSize) {
    // Add the constant pool size.
    CalculatedSize += ConstPool.size() * 4;

    // Size calculation could have added constants and jump labels. Erase them now.
    ConstPool.clear();
    JumpLabels.clear();

    return CalculatedSize;
  }

  return 0;
}

uint64_t BPFEmitter::JITFilter(uint32_t flags, const sock_fprog* prog) {
  FuncSize = HandleEmission<true, SizeErrorCheck>(flags, prog);

  if (FuncSize == ~0ULL) {
    // Buffer size calculation found invalid code.
    return -EINVAL;
  }

  SetBuffer((uint8_t*)FEXCore::Allocator::mmap(nullptr, FuncSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0), FuncSize);

  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(GetBufferBase()), FuncSize);

  const auto CodeBegin = GetCursorAddress<uint8_t*>();

  uint64_t Result = HandleEmission<false, EmissionErrorCheck>(flags, prog);

  if (Result != 0) {
    // Had error, early return and free the memory.
    FEXCore::Allocator::munmap(GetBufferBase(), FuncSize);
    return Result;
  }

  const uint64_t CodeOnlySize = GetCursorAddress<uint8_t*>() - CodeBegin;

  // Emit the constant pool.
  Align();
  for (auto& Const : ConstPool) {
    (void)Bind(&Const.second);
    dc32(Const.first);
  }

  ClearICache(CodeBegin, CodeOnlySize);
  ::mprotect(CodeBegin, AllocationSize(), PROT_READ | PROT_EXEC);
  Func = CodeBegin;

  if constexpr (false) {
    // Useful for debugging seccomp filters.
    LogMan::Msg::DFmt("JITFilter: disas 0x{:x},+{}", fmt::ptr(CodeBegin), CodeOnlySize);
  }

  ConstPool.clear();
  JumpLabels.clear();
  return 0;
}


} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Seccomp/BPFEmitter.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/
#pragma once

#include <CodeEmitter/Emitter.h>

#include <FEXCore/fextl/unordered_map.h>

#include <linux/filter.h>
#include <linux/seccomp.h>

struct sock_fprog;
struct sock_filter;

namespace FEX::HLE {
class BPFEmitter final : public ARMEmitter::Emitter {
public:
  struct WorkingBuffer {
    struct seccomp_data Data;
    uint32_t ScratchMemory[BPF_MEMWORDS]; // Defined as 16 words.
  };

  BPFEmitter() = default;

  uint64_t JITFilter(uint32_t flags, const sock_fprog* prog);
  void* GetFunc() const {
    return Func;
  }

  size_t AllocationSize() const {
    return FuncSize;
  }

private:
  template<bool CalculateSize>
  uint64_t HandleLoad(uint32_t BPFIP, const sock_filter* Inst);
  template<bool CalculateSize>
  uint64_t HandleStore(uint32_t BPFIP, const sock_filter* Inst);
  template<bool CalculateSize>
  uint64_t HandleALU(uint32_t BPFIP, const sock_filter* Inst);
  template<bool CalculateSize>
  uint64_t HandleJmp(uint32_t BPFIP, uint32_t NumInst, const sock_filter* Inst);
  template<bool CalculateSize>
  uint64_t HandleRet(uint32_t BPFIP, const sock_filter* Inst);
  template<bool CalculateSize>
  uint64_t HandleMisc(uint32_t BPFIP, const sock_filter* Inst);

  template<bool CalculateSize, class Pred>
  uint64_t HandleEmission(uint32_t flags, const sock_fprog* prog);

  fextl::unordered_map<uint32_t, ARMEmitter::ForwardLabel> JumpLabels;
  fextl::unordered_map<uint32_t, ARMEmitter::ForwardLabel> ConstPool;

  using JumpLabelIterator = decltype(JumpLabels)::iterator;

  void* Func {};
  size_t FuncSize {};
};


} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Seccomp/Dumper.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Seccomp/SeccompEmulator.h"

#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>

namespace FEX::HLE {
void SeccompEmulator::DumpProgram(const sock_fprog* prog) {
  auto Parse_Class_LD = [](uint32_t BPFIP, const sock_filter* Inst) {
    auto DestName = [](const sock_filter* Inst) {
      if (BPF_CLASS(Inst->code) == BPF_LD) {
        return "A";
      } else {
        return "X";
      }
    };

    auto AccessSize = [](const sock_filter* Inst) {
      switch (BPF_SIZE(Inst->code)) {
      case BPF_W: return 32;
      case BPF_H: return 16;
      case BPF_B: return 8;
      case 0x18: /* BPF_DW */ return 64;
      }
      return 0;
    };

    auto ModeType = [](const sock_filter* Inst) {
      switch (BPF_MODE(Inst->code)) {
      case BPF_IMM: return "IMM";
      case BPF_ABS: return "ABS";
      case BPF_IND: return "IND";
      case BPF_MEM: return "MEM";
      case BPF_LEN: return "LEN";
      case BPF_MSH: return "MSH";
      }
      return "Unknown";
    };

    auto LoadName = [](const sock_filter* Inst) {
      using namespace std::string_view_literals;
      switch (BPF_MODE(Inst->code)) {
      case BPF_IMM: return fextl::fmt::format("#{}", Inst->k);
      case BPF_ABS: return fextl::fmt::format("seccomp_data + #{}", Inst->k);
      case BPF_IND: return fextl::fmt::format("Ind[X+#{}]", Inst->k);
      case BPF_MEM: return fextl::fmt::format("Mem[#{}]", Inst->k);
      case BPF_LEN: return fextl::fmt::format("len");
      case BPF_MSH: return fextl::fmt::format("msh");
      }
      return fextl::fmt::format("Unknown");
    };

    LogMan::Msg::IFmt("0x{:04x}: {} <- LD.{} {} {}", BPFIP, DestName(Inst), AccessSize(Inst), ModeType(Inst), LoadName(Inst));
  };

  auto Parse_Class_ST = [](uint32_t BPFIP, const sock_filter* Inst) {
    auto DestName = [](const sock_filter* Inst) {
      if (BPF_CLASS(Inst->code) == BPF_ST) {
        return "A";
      } else {
        return "X";
      }
    };

    LogMan::Msg::IFmt("0x{:04x}: Mem[{}] <- ST.{}", BPFIP, Inst->k, DestName(Inst));
  };

  auto Parse_Class_ALU = [](uint32_t BPFIP, const sock_filter* Inst) {
    auto GetOp = [](const sock_filter* Inst) {
      const auto Op = BPF_OP(Inst->code);

      switch (Op) {
      case BPF_ADD: return "ADD";
      case BPF_SUB: return "SUB";
      case BPF_MUL: return "MUL";
      case BPF_DIV: return "DIV";
      case BPF_OR: return "OR";
      case BPF_AND: return "AND";
      case BPF_LSH: return "LSH";
      case BPF_RSH: return "RSH";
      case BPF_MOD: return "MOD";
      case BPF_XOR: return "XOR";
      case BPF_NEG: return "NEG";
      default: return "Unknown";
      }
    };

    auto GetSrc = [](const sock_filter* Inst) {
      switch (BPF_SRC(Inst->code)) {
      case BPF_K: return fextl::fmt::format("0x{:x}", Inst->k);
      case BPF_X: return fextl::fmt::format("<X>");
      }
      return fextl::fmt::format("Unknown");
    };

    LogMan::Msg::IFmt("0x{:04x}: {} <A>, {}", BPFIP, GetOp(Inst), GetSrc(Inst));
  };

  auto Parse_Class_JMP = [](uint32_t BPFIP, const sock_filter* Inst) {
    auto GetOp = [](const sock_filter* Inst) {
      switch (BPF_OP(Inst->code)) {
      case BPF_JA: return "a";
      case BPF_JEQ: return "eq";
      case BPF_JGT: return "gt";
      case BPF_JGE: return "ge";
      case BPF_JSET: return "set";
      }
      return "Unknown";
    };

    auto GetSrc = [](const sock_filter* Inst) {
      switch (BPF_SRC(Inst->code)) {
      case BPF_K: return fextl::fmt::format("0x{:x}", Inst->k);
      case BPF_X: return fextl::fmt::format("<X>");
      }
      return fextl::fmt::format("Unknown");
    };

    LogMan::Msg::IFmt("0x{:04x}: JMP.{} {}, +{} (#0x{:x}), +{} (#0x{:x})", BPFIP, GetOp(Inst), GetSrc(Inst), Inst->jt, BPFIP + Inst->jt + 1,
                      Inst->jf, BPFIP + Inst->jf + 1);
  };

  auto Parse_Class_RET = [](uint32_t BPFIP, const sock_filter* Inst) {
    auto GetRetValue = [](const sock_filter* Inst) {
      switch (BPF_RVAL(Inst->code)) {
      case BPF_K: {
        uint32_t RetData = Inst->k & SECCOMP_RET_DATA;
        switch (Inst->k & SECCOMP_RET_ACTION_FULL) {
        case SECCOMP_RET_KILL_PROCESS: return fextl::fmt::format("KILL_PROCESS.{}", RetData);
        case SECCOMP_RET_KILL_THREAD: return fextl::fmt::format("KILL_THREAD.{}", RetData);
        case SECCOMP_RET_TRAP: return fextl::fmt::format("TRAP.{}", RetData);
        case SECCOMP_RET_ERRNO: return fextl::fmt::format("ERRNO.{}", RetData);
        case SECCOMP_RET_USER_NOTIF: return fextl::fmt::format("USER_NOTIF.{}", RetData);
        case SECCOMP_RET_TRACE: return fextl::fmt::format("TRACE.{}", RetData);
        case SECCOMP_RET_LOG: return fextl::fmt::format("LOG.{}", RetData);
        case SECCOMP_RET_ALLOW: return fextl::fmt::format("ALLOW.{}", RetData);
        default: break;
        }
        return fextl::fmt::format("<Unknown>.{}", RetData);
      }
      case BPF_X: return fextl::fmt::format("<X>");
      case BPF_A: return fextl::fmt::format("<A>");
      }

      return fextl::fmt::format("Unknown");
    };

    LogMan::Msg::IFmt("0x{:04x}: RET {}", BPFIP, GetRetValue(Inst));
  };

  auto Parse_Class_MISC = [](uint32_t BPFIP, const sock_filter* Inst) {
    const auto MiscOp = BPF_MISCOP(Inst->code);
    switch (MiscOp) {
    case BPF_TAX: LogMan::Msg::IFmt("0x{:04x}: TAX", BPFIP); break;
    case BPF_TXA: LogMan::Msg::IFmt("0x{:04x}: TXA", BPFIP); break;
    default: LogMan::Msg::IFmt("0x{:04x}: Misc: Unknown", BPFIP); break;
    };
  };

  LogMan::Msg::IFmt("BPF program: 0x{:x} instructions", prog->len);

  for (size_t i = 0; i < prog->len; ++i) {
    const sock_filter* Inst = &prog->filter[i];
    const uint16_t Code = Inst->code;
    const uint16_t Class = BPF_CLASS(Code);
    switch (Class) {
    case BPF_LD:
    case BPF_LDX: Parse_Class_LD(i, Inst); break;
    case BPF_ST:
    case BPF_STX: Parse_Class_ST(i, Inst); break;
    case BPF_ALU: Parse_Class_ALU(i, Inst); break;
    case BPF_JMP: Parse_Class_JMP(i, Inst); break;
    case BPF_RET: Parse_Class_RET(i, Inst); break;
    case BPF_MISC: Parse_Class_MISC(i, Inst); break;
    }
  }
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Seccomp/SeccompEmulator.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Seccomp/BPFEmitter.h"
#include "LinuxSyscalls/Seccomp/SeccompEmulator.h"

#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"

#include <CodeEmitter/Emitter.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <fcntl.h>
#include <linux/audit.h>
#include <linux/bpf_common.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
#include <sys/prctl.h>

// seccomp
//
// global
// - kcmp                              - pass
// - mode_strict_support               - pass
// - mode_strict_cannot_call_prctl     - pass
// - no_new_privs_support              - pass
// - mode_filter_support               - pass
// - mode_filter_without_nnp           - pass
// - filter_size_limits                - pass
// - filter_chain_limits               - pass
// - mode_filter_cannot_move_to_strict - pass
// - mode_filter_get_seccomp           - pass
// - ALLOW_all                         - pass
// - empty_prog                        - pass
// - log_all                           - pass
// - unknown_ret_is_kill_inside        - pass
// - unknown_ret_is_kill_above_allow   - pass
// - KILL_all                          - pass
// - KILL_one                          - pass
// - KILL_one_arg_one                  - pass
// - KILL_one_arg_six                  - pass
// - KILL_thread                       - FAIL (unrelated to bpf)
// - KILL_process                      - FAIL (unrelated to bpf)
// - KILL_unknown                      - FAIL (unrelated to bpf)
// - arg_out_of_range                  - pass
// - ERRNO_valid                       - pass
// - ERRNO_zero                        - pass
// - ERRNO_capped                      - pass
// - ERRNO_order                       - pass
// - seccomp_syscall                   - pass
// - seccomp_syscall_mode_lock         - pass
// - detect_seccomp_filter_flags       - pass
// - TSYNC_first                       - pass
// - syscall_restart                   - FAIL (PTRACE)
// - filter_flag_log                   - pass
// - get_action_avail                  - FAIL (ptrace and user-notif)
// TSYNC
// - siblings_fail_prctl               - pass
// - two_siblings_with_ancestor        - FAIL (kill-thread not working quite right)
// - two_sibling_want_nnp              - pass
// - two_siblings_with_one_divergence  - pass
// - two_siblings_with_one_divergence_no_tid_in_err - pass
// - two_siblings_not_under_filter     - FAIL (kill-thread not working quite right)
// - two_siblings_with_no_filter       - FAIL (kill-thread not working quite right)
//
// user-notif stuff
// - get_metadata                      - SKIP (Needs root)
// - user_notification_basic           - FAIL (user-notif)
// - user_notification_with_tsync      - FAIL (user-notif)
// - user_notification_kill_in_middle  - FAIL (user-notif)
// - user_notification_signal          - FAIL (user-notif)
// - user_notification_closed_listener - FAIL (user-notif)
// - user_notification_child_pid_ns    - FAIL (user-notif)
// - user_notification_sibling_pid_ns  - FAIL (user-notif)
// - user_notification_fault_recv      - FAIL (user-notif)
// - seccomp_get_notif_sizes           - pass
// - user_notification_continue        - FAIL (user-notif)
// - user_notification_filter_empty    - FAIL (user-notif)
// - user_notification_filter_empty_threaded - FAIL (user-notif)
// - user_notification_addfd           - FAIL (user-notif)
// - user_notification_addfd_rlimit    - FAIL (user-notif)
// - user_notification_sync            - FAIL (user-notif)
// - user_notification_fifo            - FAIL (user-notif)
// - user_notification_wait_killable_pre_notification - FAIL (user-notif)
// - user_notification_wait_killable   - FAIL (user-notif)
// - user_notification_wait_killable_fatal - FAIL (user-notif)
//
// O_SUSPEND_SECCOMP
// - setoptions - FAIL (ptrace)
// - seize      - FAIL (ptrace)
// TRAP
// - dfl     - pass
// - ign     - pass
// - handler - pass
//
// precedence
// - allow_ok                     - pass
// - kill_is_highest              - pass
// - kill_is_highest_in_any_order - pass
// - trap_is_second               - pass
// - trap_is_second_in_any_order  - pass
// - errno_is_third               - pass
// - errno_is_third_in_any_order  - pass
// - trace_is_fourth              - pass
// - trace_is_fourth_in_any_order - pass
// - log_is_fifth                 - pass
// - log_is_fifth_in_any_order    - pass
//
// TRACE_poke
// - ptrace unsupported
// TRACE_syscall
// - ptrace unsupported

namespace FEX::HLE {
uint64_t SeccompEmulator::Handle(FEXCore::Core::CpuStateFrame* Frame, uint32_t Op, uint32_t flags, void* arg) {
  // If seccomp isn't enabled then say so.
  if (!NeedsSeccomp) {
    return -EINVAL;
  }

  switch (Op) {
  case SECCOMP_SET_MODE_STRICT: return SetModeStrict(Frame, flags, arg);
  case SECCOMP_SET_MODE_FILTER: return SetModeFilter(Frame, flags, static_cast<const sock_fprog*>(arg));
  case SECCOMP_GET_ACTION_AVAIL: return GetActionAvail(flags, static_cast<const uint32_t*>(arg));
  case SECCOMP_GET_NOTIF_SIZES: return GetNotifSizes(flags, static_cast<struct seccomp_notif_sizes*>(arg));
  default:
    // operation is unknown or is not supported by this kernel version or configuration.
    return -EINVAL;
  }
}

// Equivalent to prctl(PR_GET_SECCOMP)
uint64_t SeccompEmulator::GetSeccomp(FEXCore::Core::CpuStateFrame* Frame) {
  // If seccomp isn't enabled then say so.
  if (!NeedsSeccomp) {
    return -EINVAL;
  }

  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  return Thread->SeccompMode;
}

void SeccompEmulator::InheritSeccompFilters(FEX::HLE::ThreadStateObject* Parent, FEX::HLE::ThreadStateObject* Child) {
  // Don't interrupt me while I'm copying.
  auto lk = FEXCore::MaskSignalsAndLockMutex(FilterMutex);

  Child->Filters.resize(Parent->Filters.size());

  for (size_t i = 0; i < Child->Filters.size(); ++i) {
    auto& ParentFilter = Parent->Filters[i];
    auto& ChildFilter = Child->Filters[i];
    ChildFilter = ParentFilter;
    std::atomic_ref<uint64_t>(ParentFilter->RefCount)++;
  }

  // Copy the operating mode.
  Child->SeccompMode = Parent->SeccompMode;
}

void SeccompEmulator::FreeSeccompFilters(FEX::HLE::ThreadStateObject* Thread) {
  // Don't talk to me when I'm busy deleting myself.
  auto lk = FEXCore::MaskSignalsAndLockMutex(FilterMutex);

  bool HasFiltersToDelete {};
  for (auto& Filter : Thread->Filters) {
    auto RefCount = std::atomic_ref<uint64_t>(Filter->RefCount).fetch_sub(1);

    if (RefCount == 1) {
      HasFiltersToDelete = true;
    }
  }
  Thread->Filters.clear();

  if (HasFiltersToDelete) {
    // Garbage collect filters
    std::erase_if(Filters, [](auto& Filter) {
      if (std::atomic_ref<uint64_t>(Filter.RefCount).load(std::memory_order_relaxed) != 0) {
        return false;
      }

      FEXCore::Allocator::munmap(reinterpret_cast<void*>(Filter.Func), Filter.MappedSize);
      return true;
    });
  }
}

struct SerializedFilter {
  size_t CodeSize;
  uint32_t FilterInstructions;
  bool ShouldLog;
  char Code[];
};

struct SerializationHeader {
  size_t NumberOfFilters;
  uint32_t SeccompMode;
  SerializedFilter Filters[];
};

std::optional<int> SeccompEmulator::SerializeFilters(FEXCore::Core::CpuStateFrame* Frame) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  if (Thread->SeccompMode == SECCOMP_MODE_DISABLED) {
    // Didn't have seccomp enabled.
    return std::nullopt;
  }

  int FD = memfd_create("seccomp_filters", MFD_ALLOW_SEALING);
  if (FD == -1) {
    // Couldn't create memfd
    LogMan::Msg::EFmt("Couldn't create seccomp filter FD!");
    return -1;
  }

  SerializationHeader Header {
    .NumberOfFilters = Thread->Filters.size(),
    .SeccompMode = Thread->SeccompMode,
  };

  int Res = write(FD, &Header, sizeof(Header));
  if (Res == -1) {
    LogMan::Msg::EFmt("Couldn't write header!");
    close(FD);
    return -1;
  }

  for (auto& Filter : Thread->Filters) {
    SerializedFilter SFilter {
      .CodeSize = Filter->MappedSize,
      .FilterInstructions = Filter->FilterInstructions,
      .ShouldLog = Filter->ShouldLog,
    };

    Res = write(FD, &SFilter, sizeof(SFilter));
    if (Res == -1) {
      LogMan::Msg::EFmt("Couldn't write filter header!");
      close(FD);
      return -1;
    }

    Res = write(FD, (const void*)Filter->Func, Filter->MappedSize);
    if (Res == -1) {
      LogMan::Msg::EFmt("Couldn't write filter!");
      close(FD);
      return -1;
    }
  }

  // Reset FD to start.
  lseek(FD, 0, SEEK_SET);

  // Seal everything about this FD.
  if (fcntl(FD, F_ADD_SEALS, F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE | F_SEAL_FUTURE_WRITE) == -1) {
    LogMan::Msg::IFmt("Couldn't seal seccomp serialize FD. Nefarious code could modify");
  }

  return FD;
}

void SeccompEmulator::DeserializeFilters(FEXCore::Core::CpuStateFrame* Frame, int FD) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  SerializationHeader Header;
  int Res = read(FD, &Header, sizeof(Header));
  if (Res == -1 || Res != sizeof(Header)) {
    LogMan::Msg::EFmt("Couldn't read Seccomp header!");
    close(FD);
    return;
  }

  for (size_t i = 0; i < Header.NumberOfFilters; ++i) {
    SerializedFilter SFilter;

    Res = read(FD, &SFilter, sizeof(SFilter));
    if (Res == -1 || Res != sizeof(SFilter)) {
      LogMan::Msg::EFmt("Couldn't read Seccomp Filter header!");
      close(FD);
      return;
    }
    auto Ptr = FEXCore::Allocator::mmap(nullptr, SFilter.CodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    if (Ptr == (void*)~0ULL) {
      LogMan::Msg::EFmt("Couldn't allocate ptr for filter!");
      close(FD);
      return;
    }

    Res = read(FD, Ptr, SFilter.CodeSize);
    if (Res == -1 || Res != SFilter.CodeSize) {
      LogMan::Msg::EFmt("Couldn't read Seccomp Filter code!");
      close(FD);
      return;
    }

    ::mprotect(Ptr, SFilter.CodeSize, PROT_READ | PROT_EXEC);

    FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(Ptr), SFilter.CodeSize);

    auto& it =
      Filters.emplace_back(SeccompFilterInfo {(SeccompFilterFunc)Ptr, 1, SFilter.CodeSize, SFilter.FilterInstructions, SFilter.ShouldLog});
    TotalFilterInstructions += SFilter.FilterInstructions;

    // Append the filter to the thread.
    Thread->Filters.emplace_back(&it);
  }

  Thread->SeccompMode = Header.SeccompMode;
  close(FD);
}

SeccompEmulator::ExecuteFilterResult
SeccompEmulator::ExecuteFilter(FEXCore::Core::CpuStateFrame* Frame, uint64_t JITPC, FEXCore::HLE::SyscallArguments* Args) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  if (Thread->Filters.empty()) {
    // Seccomp not installed. Allow it.
    return {false, 0};
  }

  // Reconstruct the RIP from the JITPC.
  const uint64_t RIP = Thread->Thread->CTX->RestoreRIPFromHostPC(Frame->Thread, JITPC);

  const auto Arch = Is64BitMode() ? AUDIT_ARCH_X86_64 : AUDIT_ARCH_I386;
  bool ShouldLog {};
  uint32_t SeccompResult {};

  {
    BPFEmitter::WorkingBuffer Data {
      .Data =
        {
          .nr = static_cast<int32_t>(Args->Argument[0]),
          .arch = Arch,
          .instruction_pointer = RIP,
          .args =
            {
              Args->Argument[1],
              Args->Argument[2],
              Args->Argument[3],
              Args->Argument[4],
              Args->Argument[5],
              Args->Argument[6],
            },
        },
    };

    bool HasResult {};
    // seccomp filters are executed from latest added to oldest.
    for (auto it = Thread->Filters.rbegin(); it != Thread->Filters.rend(); ++it) {
      // Explicitly zero scratch memory.
      memset(&Data.ScratchMemory, 0, sizeof(Data.ScratchMemory));

      uint32_t CurrentResult = (*it)->Func(0, 0, 0, 0, &Data);

      if (!HasResult) {
        SeccompResult = CurrentResult;
        ShouldLog = (*it)->ShouldLog;
        HasResult = true;
        continue;
      }

      const int16_t CurrentAction = (CurrentResult & SECCOMP_RET_ACTION_FULL) >> 16;
      const int16_t Action = (SeccompResult & SECCOMP_RET_ACTION_FULL) >> 16;

      // All actions are executed but the first highest precendent result is returned.
      // Precedent order from highest priority to lowest:
      //   - SECCOMP_RET_KILL_PROCESS (0x8000, -32768)
      //   - SECCOMP_RET_KILL_THREAD  (0x0000,      0)
      //   - SECCOMP_RET_TRAP         (0x0003,      3)
      //   - SECCOMP_RET_ERRNO        (0x0005,      5)
      //   - SECCOMP_RET_USER_NOTIF   (0x7fc0,  32704)
      //   - SECCOMP_RET_TRACE        (0x7ff0,  32752)
      //   - SECCOMP_RET_LOG          (0x7ffc,  32764)
      //   - SECCOMP_RET_ALLOW        (0x7fff,  32767)
      if (CurrentAction < Action) {
        SeccompResult = CurrentResult;
        ShouldLog = (*it)->ShouldLog;
      }
    }
  }

  const auto ActionMasked = SeccompResult & SECCOMP_RET_ACTION_FULL;
  const auto DataMasked = SeccompResult & SECCOMP_RET_DATA;

  // Logging rules
  // - Log if explicitly returning SECCOMP_RET_LOG
  // - Log if the filter enabled the logging flag and the action is something other than SECCOMP_RET_ALLOW.
  if ((ShouldLog && ActionMasked != SECCOMP_RET_ALLOW) || ActionMasked == SECCOMP_RET_LOG) {
    int Signal = 0;
    switch (ActionMasked) {
    case SECCOMP_RET_KILL_PROCESS:
    case SECCOMP_RET_KILL_THREAD: Signal = GetKillSignal(); break;
    case SECCOMP_RET_TRAP: Signal = SIGSYS; break;
    default: break;
    }

    // With real secommp the logs go to dmesg. log through FEX since we can't use dmesg.
    // ex: `[13572.669277] audit: type=1326 audit(1715469332.533:62): auid=1000 uid=1000 gid=1000 ses=2 subj=unconfined pid=52546 comm="seccomp_bpf"
    // exe="/mnt/Work/Projects/work/linux/tools/testing/selftests/seccomp/seccomp_bpf" sig=0 arch=c000003e syscall=39 compat=0 ip=0x7d789352725d code=0x7ffc0000`
    timespec tp {};
    clock_gettime(CLOCK_MONOTONIC, &tp);
    LogMan::Msg::IFmt("audit: type={} audit({}.{:03}:{}): uid={} gid={} pid={} comm={} sig={} arch={:x} syscall={} ip=0x{:x} code=0x{:x}",
                      AUDIT_SECCOMP, tp.tv_sec, tp.tv_nsec / 1'000'000, AuditSerialIncrement(), ::getuid(), ::getgid(), ::getpid(),
                      Filename(), Signal, Arch, Args->Argument[0], RIP, SeccompResult);
  }

  switch (ActionMasked) {
  // Unknown actions behave like RET_KILL_PROCESS.
  default:
  case SECCOMP_RET_KILL_PROCESS: {
    const int KillSignal = GetKillSignal();
    // Ignores signal handler and sigmask
    uint64_t Mask = 1ULL << (KillSignal - 1);
    SignalDelegation->GuestSigProcMask(Thread, SIG_UNBLOCK, &Mask, nullptr);
    SignalDelegation->UninstallHostHandler(KillSignal);
    kill(0, KillSignal);
    break;
  }
  case SECCOMP_RET_KILL_THREAD: {
    // Ignores signal handler and sigmask
    uint64_t Mask = 1 << (SIGSYS - 1);
    SignalDelegation->GuestSigProcMask(Thread, SIG_UNBLOCK, &Mask, nullptr);
    SignalDelegation->UninstallHostHandler(SIGSYS);
    FHU::Syscalls::tgkill(::getpid(), ::gettid(), SIGSYS);
    break;
  }
  case SECCOMP_RET_TRAP: {
    siginfo_t Info {
      .si_signo = SIGSYS,
      .si_errno = static_cast<int32_t>(DataMasked),
      .si_code = 1, // SYS_SECCOMP
    };

    Info.si_call_addr = reinterpret_cast<void*>(RIP);
    Info.si_syscall = Args->Argument[0];
    Info.si_arch = Arch;

    SignalDelegation->QueueSignal(::getpid(), ::gettid(), SIGSYS, &Info, true);
    break;
  }
  case SECCOMP_RET_ERRNO: {
    // errno return is clamped.
    return {true, -(std::min<uint64_t>(DataMasked, 4095))};
  }
  case SECCOMP_RET_TRACE: {
    // When no tracer attached, behave like RET_ERRNO returning ENOSYS.
    // TODO: Implement once FEX supports tracing.
    return {true, static_cast<uint64_t>(-ENOSYS)};
  }
  case SECCOMP_RET_USER_NOTIF:
  case SECCOMP_RET_LOG:
  case SECCOMP_RET_ALLOW: break;
  }

  return {false, 0};
}

// Equivalent to seccomp(SECCOMP_SET_MODE_STRICT, ...);
uint64_t SeccompEmulator::SetModeStrict(FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, const void* arg) {
  const auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  if (::prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 0) {
    // The caller did not have the CAP_SYS_ADMIN capability in its user namespace, or had not set no_new_privs before using SECCOMP_SET_MODE_FILTER.
    return -EACCES;
  }

  if (flags != 0) {
    // The specified flags are invalid for the given operation.
    return -EINVAL;
  }

  if (arg != nullptr) {
    // The specified arg are invalid for the given operation.
    return -EINVAL;
  }

  if (Thread->SeccompMode == SECCOMP_MODE_FILTER) {
    // Filter mode cannot move to strict
    return -EINVAL;
  }

#define syscall_nr (offsetof(struct seccomp_data, nr))
#define ALLOW_SYSCALL(name) \
  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, FEX::HLE::x64::SYSCALL_x64_##name, 0, 1), BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)
#define ALLOW_SYSCALL_x32(name) \
  BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, FEX::HLE::x32::SYSCALL_x86_##name, 0, 1), BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW)

  constexpr static struct sock_filter strict_filter_x64[] = {
    // Load syscall number
    BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_nr),

    // Allow read, write, exit, exit_group, and sigreturn
    ALLOW_SYSCALL(read),
    ALLOW_SYSCALL(write),
    ALLOW_SYSCALL(exit),
    ALLOW_SYSCALL(exit_group),
    ALLOW_SYSCALL(rt_sigreturn),
    BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS),
  };

  constexpr static struct sock_filter strict_filter_x32[] = {
    // Load syscall number
    BPF_STMT(BPF_LD + BPF_W + BPF_ABS, syscall_nr),

    // Allow read, write, exit, exit_group, and sigreturn
    ALLOW_SYSCALL_x32(read),
    ALLOW_SYSCALL_x32(write),
    ALLOW_SYSCALL_x32(exit),
    ALLOW_SYSCALL_x32(exit_group),
    ALLOW_SYSCALL_x32(rt_sigreturn),
    ALLOW_SYSCALL_x32(sigreturn),
    BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_KILL_PROCESS),
  };

  const sock_fprog prog_x64 {
    .len = (unsigned short)(sizeof(strict_filter_x64) / sizeof(strict_filter_x64[0])),
    .filter = const_cast<struct sock_filter*>(strict_filter_x64),
  };

  const sock_fprog prog_x32 {
    .len = (unsigned short)(sizeof(strict_filter_x32) / sizeof(strict_filter_x32[0])),
    .filter = const_cast<struct sock_filter*>(strict_filter_x32),
  };
  CurrentKillSignal = SIGKILL;
  const sock_fprog* prog = Is64BitMode() ? &prog_x64 : &prog_x32;
  SetModeFilter(Frame, 0, prog);
  Thread->SeccompMode = SECCOMP_MODE_STRICT;

  return 0;
}

uint64_t SeccompEmulator::CanDoTSync(FEXCore::Core::CpuStateFrame* Frame) {
  auto ParentThread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  auto Threads = SyscallHandler->TM.GetThreads();

  for (auto& Thread : *Threads) {
    if (Thread == ParentThread) {
      // Skip same thread.
      continue;
    }

    if (Thread->SeccompMode == SECCOMP_MODE_DISABLED) {
      // Threads which have seccomp disabled are safe to TSync
      continue;
    }

    if (Thread->SeccompMode != ParentThread->SeccompMode) {
      /// If the seccomp mode differs between threads then it can't tsync.
      /// Strict versus filter mode aren't tsync compatible.
      return Thread->ThreadInfo.TID;
    }

    if (Thread->Filters.size() != ParentThread->Filters.size()) {
      // If the filter count doesn't even match then it can't tsync.
      return Thread->ThreadInfo.TID;
    }

    // Walk each filter and ensure the entry points are the same and in the same order.
    for (size_t i = 0; i < ParentThread->Filters.size(); ++i) {
      if (Thread->Filters[i]->Func != ParentThread->Filters[i]->Func) {
        /// Entry point mismatch, not the same filter.
        /// Not tsync compatible.
        return Thread->ThreadInfo.TID;
      }
    }
  }

  // Everything matched. tsync compatible!
  return 0;
}

void SeccompEmulator::TSyncFilters(FEXCore::Core::CpuStateFrame* Frame) {
  auto ParentThread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  auto Threads = SyscallHandler->TM.GetThreads();

  for (auto& Thread : *Threads) {
    if (Thread == ParentThread) {
      // Skip same thread.
      continue;
    }

    Thread->Filters.clear();
    Thread->Filters = ParentThread->Filters;
    for (auto& Filter : ParentThread->Filters) {
      // Need to increment all the refcounters
      std::atomic_ref<uint64_t>(Filter->RefCount)++;
    }
    Thread->SeccompMode = ParentThread->SeccompMode;
  }
}

// Equivalent to seccomp(SECCOMP_SET_MODE_FILTER, ...);
uint64_t SeccompEmulator::SetModeFilter(FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, const sock_fprog* prog) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  // Order of checks in this function matter
  // 1) Check flags
  // 2) Check if program is invalid
  uint32_t SUPPORTED_FLAGS = SECCOMP_FILTER_FLAG_TSYNC |      // 1U << 0
                             SECCOMP_FILTER_FLAG_LOG |        // 1U << 1
                             SECCOMP_FILTER_FLAG_SPEC_ALLOW | // 1U << 2
                             // SECCOMP_FILTER_FLAG_NEW_LISTENER |      // 1U << 3
                             SECCOMP_FILTER_FLAG_TSYNC_ESRCH | // 1U << 4
                             0;

  const bool DoingTsync = flags & SECCOMP_FILTER_FLAG_TSYNC;

  if (flags & ~SUPPORTED_FLAGS) {
    // Unknown flags passed in.
    return -EINVAL;
  }

  if ((flags & SECCOMP_FILTER_FLAG_TSYNC) && (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER) && !(flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH)) {
    /// If NEW_LISTENER and TSYNC are both used then TSYNC_ESRCH must also be set.
    /// Otherwise on error there would be no way to tell the difference between success and failure.
    return -EINVAL;
  }

  if (!prog) {
    return -EFAULT;
  }

  if (::prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) == 0) {
    // The caller did not have the CAP_SYS_ADMIN capability in its user namespace, or had not set no_new_privs before using SECCOMP_SET_MODE_FILTER.
    return -EACCES;
  }

  if (prog->len > BPF_MAXINSNS || prog->len == 0) {
    // operation specified SECCOMP_SET_MODE_FILTER, but the filter program pointed to by args was not valid or the length of the filter
    // program was zero or exceeded BPF_MAXINSNS (4096) instructions.
    return -EINVAL;
  }

  // Don't interrupt me while I'm jitting.
  auto lk = FEXCore::MaskSignalsAndLockMutex(FilterMutex);

  const size_t TotalFinalInstructions = TotalFilterInstructions + prog->len + Thread->Filters.size() * BPF_MULTIFILTERPENALTY;
  if (TotalFinalInstructions > BPF_MAX_INSNS_PER_PATH) {
    return -ENOMEM;
  }

  if constexpr (false) {
    // Useful for debugging seccomp problems.
    DumpProgram(prog);
  }

  if (DoingTsync) {
    auto TSyncThread = CanDoTSync(Frame);
    if (TSyncThread != 0) {
      if (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH) {
        // This flag explicitly ensures that if TSYNC can't sync then it won't return a TID.
        return -ESRCH;
      } else {
        // Return the TID that caused a tsync problem.
        return TSyncThread;
      }
    }
  }

  BPFEmitter emit {};
  const bool LoggingEnabled = flags & SECCOMP_FILTER_FLAG_LOG;
  auto Result = emit.JITFilter(flags, prog);
  if (Result == 0) {
    auto& it = Filters.emplace_back(SeccompFilterInfo {(SeccompFilterFunc)emit.GetFunc(), 1, emit.AllocationSize(), prog->len, LoggingEnabled});
    TotalFilterInstructions += prog->len;

    // Append the filter to the thread.
    Thread->Filters.emplace_back(&it);
    Thread->SeccompMode = SECCOMP_MODE_FILTER;
    if (flags & SECCOMP_FILTER_FLAG_TSYNC) {
      TSyncFilters(Frame);
    }
  }

  return Result;
}

// Equivalent to seccomp(SECCOMP_GET_ACTION_AVAIL, ...);
uint64_t SeccompEmulator::GetActionAvail(uint32_t flags, const uint32_t* action) {
  if (flags != 0) {
    // Unknown flags passed in
    return -EINVAL;
  }

  if (!action) {
    // Invalid action
    return -EFAULT;
  }
  switch (*action) {
  case SECCOMP_RET_KILL_PROCESS:
  case SECCOMP_RET_KILL_THREAD:
  case SECCOMP_RET_TRAP:
  case SECCOMP_RET_ERRNO:
  case SECCOMP_RET_LOG:
  case SECCOMP_RET_ALLOW: return 0;
  case SECCOMP_RET_USER_NOTIF:
  case SECCOMP_RET_TRACE:
  default: break;
  }

  return -EOPNOTSUPP;
}

// Equivalent to seccomp(SECCOMP_GET_NOTIF_SIZES, ...);
uint64_t SeccompEmulator::GetNotifSizes(uint32_t flags, struct seccomp_notif_sizes* sizes) {
  if (flags != 0) {
    // Unknown flags passed in
    return -EINVAL;
  }
  sizes->seccomp_notif = sizeof(struct seccomp_notif);
  sizes->seccomp_notif_resp = sizeof(struct seccomp_notif_resp);
  sizes->seccomp_data = sizeof(struct seccomp_data);

  return 0;
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Seccomp/SeccompEmulator.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/
#pragma once

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/Utils/SignalScopeGuards.h>

#include <atomic>
#include <csignal>
#include <cstddef>
#include <cstdint>
#include <optional>

struct sock_fprog;
struct seccomp_data;
struct seccomp_notif_sizes;

namespace FEXCore {

namespace Core {
  struct CpuStateFrame;
}

namespace HLE {
  struct SyscallArguments;
}

} // namespace FEXCore

namespace FEX::HLE {

class SignalDelegator;
class SyscallHandler;
struct ThreadStateObject;

using SeccompFilterFunc = uint64_t (*)(uint32_t Acc, uint32_t Index, uint32_t Tmp, uint32_t Tmp2, void* Data);
struct SeccompFilterInfo final {
  SeccompFilterFunc Func;
  uint64_t RefCount;
  size_t MappedSize;
  uint32_t FilterInstructions;
  bool ShouldLog;
};

class SeccompEmulator final {
public:
  SeccompEmulator(FEX::HLE::SyscallHandler* SyscallHandler, FEX::HLE::SignalDelegator* SignalDelegation)
    : SyscallHandler {SyscallHandler}
    , SignalDelegation {SignalDelegation} {}

  uint64_t Handle(FEXCore::Core::CpuStateFrame* Frame, uint32_t Op, uint32_t flags, void* arg);

  // Equivalent to prctl(PR_GET_SECCOMP)
  uint64_t GetSeccomp(FEXCore::Core::CpuStateFrame* Frame);

  void InheritSeccompFilters(FEX::HLE::ThreadStateObject* Parent, FEX::HLE::ThreadStateObject* Child);
  void FreeSeccompFilters(FEX::HLE::ThreadStateObject* Thread);

  struct ExecuteFilterResult {
    bool EarlyReturn {};
    uint64_t Result;
  };
  ExecuteFilterResult ExecuteFilter(FEXCore::Core::CpuStateFrame* Frame, uint64_t JITPC, FEXCore::HLE::SyscallArguments* Args);
  int GetKillSignal() const {
    return CurrentKillSignal;
  }

  std::optional<int> SerializeFilters(FEXCore::Core::CpuStateFrame* Frame);
  void DeserializeFilters(FEXCore::Core::CpuStateFrame* Frame, int FD);

private:
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  FEX_CONFIG_OPT(NeedsSeccomp, NEEDSSECCOMP);
  FEX_CONFIG_OPT(Filename, APP_FILENAME);
  FEX::HLE::SyscallHandler* SyscallHandler;
  FEX::HLE::SignalDelegator* SignalDelegation;

  int CurrentKillSignal {SIGSYS};

  // Equivalent to seccomp(SECCOMP_SET_MODE_STRICT, ...);
  uint64_t SetModeStrict(FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, const void* arg);
  // Equivalent to seccomp(SECCOMP_SET_MODE_FILTER, ...);
  uint64_t SetModeFilter(FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, const sock_fprog* prog);
  // Equivalent to seccomp(SECCOMP_GET_ACTION_AVAIL, ...);
  uint64_t GetActionAvail(uint32_t flags, const uint32_t* action);
  // Equivalent to seccomp(SECCOMP_GET_NOTIF_SIZES, ...);
  uint64_t GetNotifSizes(uint32_t flags, struct seccomp_notif_sizes* sizes);

  // 0 on TSync possible
  /// TID for the first thread that breaks tsync.
  uint64_t CanDoTSync(FEXCore::Core::CpuStateFrame* Frame);
  void TSyncFilters(FEXCore::Core::CpuStateFrame* Frame);

  static void DumpProgram(const sock_fprog* prog);

  // Multiple filter instruction count penalty.
  // When multiple filters are installed there is a penalty per filter counted towards the maximum number of instructions.
  constexpr static size_t BPF_MULTIFILTERPENALTY = 4;
  // Maximum number of BPF instructions.
  constexpr static size_t BPF_MAX_INSNS_PER_PATH = 32768;
  uint64_t TotalFilterInstructions {};

  FEXCore::ForkableUniqueMutex FilterMutex;
  fextl::list<SeccompFilterInfo> Filters {};

  uint64_t AuditSerialIncrement() {
    return AuditSerial.fetch_add(1);
  }
  std::atomic<uint64_t> AuditSerial {};
};
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator/GuestFramesManagement.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
desc: Handles host -> host and host -> guest signal routing, emulates procmask & co
$end_info$
*/

#include "LinuxSyscalls/SignalDelegator.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/FPState.h>
#include <FEXCore/Utils/MathUtils.h>

#include <csignal>
#include <cstddef>
#include <cstdint>
#include <cstring>

namespace FEX::HLE {
constexpr uint32_t X86_SA_RESTORER = 0x04000000;
// Total number of layouts that siginfo supports.
enum class SigInfoLayout {
  LAYOUT_KILL,
  LAYOUT_TIMER,
  LAYOUT_POLL,
  LAYOUT_FAULT,
  LAYOUT_FAULT_RIP,
  LAYOUT_CHLD,
  LAYOUT_RT,
  LAYOUT_SYS,
};

// Calculate the siginfo layout based on Signal and si_code.
static SigInfoLayout CalculateSigInfoLayout(int Signal, int si_code, uint32_t err_code) {
  if (si_code > SI_USER && si_code < SI_KERNEL) {
    // For signals that are not considered RT.
    if (Signal == SIGSEGV || Signal == SIGBUS || Signal == SIGTRAP) {
      if (err_code & FEXCore::X86State::X86_PF_INSTR) {
        // Fault layout but addr refers to RIP.
        return SigInfoLayout::LAYOUT_FAULT_RIP;
      } else {
        // Regular FAULT layout.
        return SigInfoLayout::LAYOUT_FAULT;
      }
    } else if (Signal == SIGILL || Signal == SIGFPE) {
      // Fault layout but addr refers to RIP.
      return SigInfoLayout::LAYOUT_FAULT_RIP;
    } else if (Signal == SIGCHLD) {
      // Child layout
      return SigInfoLayout::LAYOUT_CHLD;
    } else if (Signal == SIGPOLL) {
      // Poll layout
      return SigInfoLayout::LAYOUT_POLL;
    } else if (Signal == SIGSYS) {
      // Sys layout
      return SigInfoLayout::LAYOUT_SYS;
    }
  } else {
    // Negative si_codes are kernel specific things.
    if (si_code == SI_TIMER) {
      return SigInfoLayout::LAYOUT_TIMER;
    } else if (si_code == SI_SIGIO) {
      return SigInfoLayout::LAYOUT_POLL;
    } else if (si_code < 0) {
      return SigInfoLayout::LAYOUT_RT;
    }
  }

  return SigInfoLayout::LAYOUT_KILL;
}

static uint32_t ConvertSignalToTrapNo(int Signal, siginfo_t* HostSigInfo) {
  switch (Signal) {
  case SIGSEGV:
    if (HostSigInfo->si_code == SEGV_MAPERR || HostSigInfo->si_code == SEGV_ACCERR) {
      // Protection fault
      return FEXCore::X86State::X86_TRAPNO_PF;
    }
    break;
  }

  // Unknown mapping, fall back to old behaviour and just pass signal
  return Signal;
}

static uint32_t ConvertSignalToError(void* ucontext, int Signal, siginfo_t* HostSigInfo) {
  switch (Signal) {
  case SIGSEGV:
    if (HostSigInfo->si_code == SEGV_MAPERR || HostSigInfo->si_code == SEGV_ACCERR) {
      // Protection fault
      // Always a user fault for us
      return ArchHelpers::Context::GetProtectFlags(ucontext);
    }
    break;
  }

  // Not a page fault issue
  return 0;
}

template<typename T>
static void SetXStateInfo(T* xstate, bool is_avx_enabled) {
  auto* fpstate = &xstate->fpstate;

  fpstate->sw_reserved.magic1 = is_avx_enabled ? FEXCore::x86_64::fpx_sw_bytes::FP_XSTATE_MAGIC_1 : 0;
  fpstate->sw_reserved.extended_size = is_avx_enabled ? sizeof(T) : 0;

  fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_FP | FEXCore::x86_64::fpx_sw_bytes::FEATURE_SSE;
  if (is_avx_enabled) {
    fpstate->sw_reserved.xfeatures |= FEXCore::x86_64::fpx_sw_bytes::FEATURE_YMM;
  }

  fpstate->sw_reserved.xstate_size = fpstate->sw_reserved.extended_size;

  if (is_avx_enabled) {
    xstate->xstate_hdr.xfeatures = 0;
    xstate->magic2.magic = FEXCore::x86_64::fpx_sw_bytes::FP_XSTATE_MAGIC_2;
  }
}

void SignalDelegator::RestoreFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                                       FEXCore::Core::CpuStateFrame* Frame, void* ucontext) {
  auto* guest_uctx = reinterpret_cast<FEXCore::x86_64::ucontext_t*>(Context->UContextLocation);
  [[maybe_unused]] auto* guest_siginfo = reinterpret_cast<siginfo_t*>(Context->SigInfoLocation);

  // If the guest modified the RIP then we need to take special precautions here
  if (Context->OriginalRIP != guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] || Context->FaultToTopAndGeneratedException) {

    // Restore previous `InSyscallInfo` structure.
    Frame->InSyscallInfo = Context->InSyscallInfo;

    // Hack! Go back to the top of the dispatcher top
    // This is only safe inside the JIT rather than anything outside of it
    ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA);
    ArchHelpers::Context::SetFillSRASingleInst(ucontext, false);
    // Set our state register to point to our guest thread data
    ArchHelpers::Context::SetState(ucontext, reinterpret_cast<uint64_t>(Frame));

    Frame->State.rip = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP];

    // Restore segments.
    // FS and GS are explicitly ignored here, as WRFSGSbase is used instead.
    Frame->State.cs_idx = (guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CSGSFS] >> 0) & 0xffff;
    Frame->State.ss_idx = (guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CSGSFS] >> 48) & 0xffff;

    Frame->State.cs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));
    Frame->State.ss_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ss_idx));

    // XXX: Full context setting
    CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL]);

#define COPY_REG(x) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x];
    COPY_REG(R8);
    COPY_REG(R9);
    COPY_REG(R10);
    COPY_REG(R11);
    COPY_REG(R12);
    COPY_REG(R13);
    COPY_REG(R14);
    COPY_REG(R15);
    COPY_REG(RDI);
    COPY_REG(RSI);
    COPY_REG(RBP);
    COPY_REG(RBX);
    COPY_REG(RDX);
    COPY_REG(RAX);
    COPY_REG(RCX);
    COPY_REG(RSP);
#undef COPY_REG
    auto* xstate = reinterpret_cast<FEXCore::x86_64::xstate*>(guest_uctx->uc_mcontext.fpregs);
    auto* fpstate = &xstate->fpstate;

    if (SupportsAVX) {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
    } else {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, nullptr);
    }

    // FCW store default
    Frame->State.FCW = fpstate->fcw;
    Frame->State.AbridgedFTW = fpstate->ftw;

    // Deconstruct FSW
    Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = fpstate->fsw & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111;

    // Copy float registers
    const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
      const auto modulo_i = (i + CurrentOffset) % 8;
      memcpy(&Frame->State.mm[modulo_i], &fpstate->_st[i], sizeof(Frame->State.mm[0]));
    }
  }
}

void SignalDelegator::RestoreFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                                        FEXCore::Core::CpuStateFrame* Frame, void* ucontext) {
  SigFrame_i32* guest_uctx = reinterpret_cast<SigFrame_i32*>(Context->UContextLocation);
  // If the guest modified the RIP then we need to take special precautions here
  if (Context->OriginalRIP != guest_uctx->sc.ip || Context->FaultToTopAndGeneratedException) {
    // Restore previous `InSyscallInfo` structure.
    Frame->InSyscallInfo = Context->InSyscallInfo;

    // Hack! Go back to the top of the dispatcher top
    // This is only safe inside the JIT rather than anything outside of it
    ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA);
    ArchHelpers::Context::SetFillSRASingleInst(ucontext, false);
    // Set our state register to point to our guest thread data
    ArchHelpers::Context::SetState(ucontext, reinterpret_cast<uint64_t>(Frame));

    // XXX: Full context setting
    CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->sc.flags);

    Frame->State.rip = guest_uctx->sc.ip;
    Frame->State.cs_idx = guest_uctx->sc.cs;
    Frame->State.ds_idx = guest_uctx->sc.ds;
    Frame->State.es_idx = guest_uctx->sc.es;
    Frame->State.fs_idx = guest_uctx->sc.fs;
    Frame->State.gs_idx = guest_uctx->sc.gs;
    Frame->State.ss_idx = guest_uctx->sc.ss;

    Frame->State.cs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));
    Frame->State.ds_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ds_idx));
    Frame->State.es_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.es_idx));
    Frame->State.fs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.fs_idx));
    Frame->State.gs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.gs_idx));
    Frame->State.ss_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ss_idx));

#define COPY_REG(x, y) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->sc.y;
    COPY_REG(RDI, di);
    COPY_REG(RSI, si);
    COPY_REG(RBP, bp);
    COPY_REG(RBX, bx);
    COPY_REG(RDX, dx);
    COPY_REG(RAX, ax);
    COPY_REG(RCX, cx);
    COPY_REG(RSP, sp);
#undef COPY_REG
    auto* xstate = reinterpret_cast<FEXCore::x86::xstate*>(guest_uctx->sc.fpstate);
    auto* fpstate = &xstate->fpstate;

    // Extended XMM state
    if (SupportsAVX) {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
    } else {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, nullptr);
    }

    // FCW store default
    Frame->State.FCW = fpstate->fcw;
    Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw);

    // Deconstruct FSW
    Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = fpstate->fsw & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111;

    // Copy float registers
    const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
      // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64
      const auto modulo_i = (i + CurrentOffset) % 8;
      memcpy(&Frame->State.mm[modulo_i], &fpstate->_st[i], 10);
    }
  }
}

void SignalDelegator::RestoreRTFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                                          FEXCore::Core::CpuStateFrame* Frame, void* ucontext) {
  RTSigFrame_i32* guest_uctx = reinterpret_cast<RTSigFrame_i32*>(Context->UContextLocation);
  // If the guest modified the RIP then we need to take special precautions here
  if (Context->OriginalRIP != guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] || Context->FaultToTopAndGeneratedException) {

    // Restore previous `InSyscallInfo` structure.
    Frame->InSyscallInfo = Context->InSyscallInfo;

    // Hack! Go back to the top of the dispatcher top
    // This is only safe inside the JIT rather than anything outside of it
    ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA);
    ArchHelpers::Context::SetFillSRASingleInst(ucontext, false);
    // Set our state register to point to our guest thread data
    ArchHelpers::Context::SetState(ucontext, reinterpret_cast<uint64_t>(Frame));

    // XXX: Full context setting
    CTX->SetFlagsFromCompactedEFLAGS(Thread, guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL]);

    Frame->State.rip = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP];
    Frame->State.cs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS];
    Frame->State.ds_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS];
    Frame->State.es_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES];
    Frame->State.fs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS];
    Frame->State.gs_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS];
    Frame->State.ss_idx = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS];

    Frame->State.cs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));
    Frame->State.ds_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ds_idx));
    Frame->State.es_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.es_idx));
    Frame->State.fs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.fs_idx));
    Frame->State.gs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.gs_idx));
    Frame->State.ss_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ss_idx));

#define COPY_REG(x) Frame->State.gregs[FEXCore::X86State::REG_##x] = guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x];
    COPY_REG(RDI);
    COPY_REG(RSI);
    COPY_REG(RBP);
    COPY_REG(RBX);
    COPY_REG(RDX);
    COPY_REG(RAX);
    COPY_REG(RCX);
    COPY_REG(RSP);
#undef COPY_REG
    auto* xstate = reinterpret_cast<FEXCore::x86::xstate*>(guest_uctx->uc.uc_mcontext.fpregs);
    auto* fpstate = &xstate->fpstate;

    // Extended XMM state
    if (SupportsAVX) {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
    } else {
      CTX->SetXMMRegistersFromState(Thread, fpstate->_xmm, nullptr);
    }

    // FCW store default
    Frame->State.FCW = fpstate->fcw;
    Frame->State.AbridgedFTW = FEXCore::FPState::ConvertToAbridgedFTW(fpstate->ftw);

    // Deconstruct FSW
    Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = fpstate->fsw & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (fpstate->fsw >> 8) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (fpstate->fsw >> 9) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (fpstate->fsw >> 10) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (fpstate->fsw >> 14) & 1;
    Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (fpstate->fsw >> 11) & 0b111;

    // Copy float registers
    const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
      // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64
      const auto modulo_i = (i + CurrentOffset) % 8;
      memcpy(&Frame->State.mm[modulo_i], &fpstate->_st[i], 10);
    }
  }
}

uint64_t SignalDelegator::SetupFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                                         FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                                         GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) {

  // Back up past the redzone, which is 128bytes
  // 32-bit doesn't have a redzone
  NewGuestSP -= 128;

  // On 64-bit the kernel sets up the siginfo_t and ucontext_t regardless of SA_SIGINFO set.
  // This allows the application to /always/ get the siginfo and ucontext even if it didn't set this flag.
  //
  // Signal frame layout on stack needs to be as follows
  // void* ReturnPointer
  // ucontext_t
  // siginfo_t
  // FP state
  // Host stack location
  NewGuestSP -= sizeof(uint64_t);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t));

  uint64_t HostStackLocation = NewGuestSP;

  if (SupportsAVX) {
    NewGuestSP -= sizeof(FEXCore::x86_64::xstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::xstate));
  } else {
    NewGuestSP -= sizeof(FEXCore::x86_64::_libc_fpstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::_libc_fpstate));
  }

  uint64_t FPStateLocation = NewGuestSP;

  NewGuestSP -= sizeof(siginfo_t);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(siginfo_t));
  uint64_t SigInfoLocation = NewGuestSP;

  NewGuestSP -= sizeof(FEXCore::x86_64::ucontext_t);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86_64::ucontext_t));
  uint64_t UContextLocation = NewGuestSP;

  ContextBackup->FPStateLocation = FPStateLocation;
  ContextBackup->UContextLocation = UContextLocation;
  ContextBackup->SigInfoLocation = SigInfoLocation;

  FEXCore::x86_64::ucontext_t* guest_uctx = reinterpret_cast<FEXCore::x86_64::ucontext_t*>(UContextLocation);
  siginfo_t* guest_siginfo = reinterpret_cast<siginfo_t*>(SigInfoLocation);
  // Store where the host context lives in the guest stack.
  *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup;

  // We have extended float information
  guest_uctx->uc_flags = FEXCore::x86_64::UC_FP_XSTATE | FEXCore::x86_64::UC_SIGCONTEXT_SS | FEXCore::x86_64::UC_STRICT_RESTORE_SS;

  // Pointer to where the fpreg memory is
  guest_uctx->uc_mcontext.fpregs = reinterpret_cast<FEXCore::x86_64::_libc_fpstate*>(FPStateLocation);
  auto* xstate = reinterpret_cast<FEXCore::x86_64::xstate*>(FPStateLocation);
  SetXStateInfo(xstate, SupportsAVX);

  guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_RIP] = ContextBackup->OriginalRIP;
  guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_EFL] = eflags;
  // This stores the CS/GS/FS selectors. It ALSO stores the SS selector in the top 16 bits...For some reason.
  // Despite the naming, the endianness is swapped from what you'd expect.
  guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CSGSFS] =
    ((uint64_t)Frame->State.ss_idx << 48) | ((uint64_t)Frame->State.fs_idx << 32) | ((uint64_t)Frame->State.gs_idx << 16) |
    ((uint64_t)Frame->State.cs_idx << 0);

  // aarch64 and x86_64 siginfo_t matches. We can just copy this over
  // SI_USER could also potentially have random data in it, needs to be bit perfect
  // For guest faults we don't have a real way to reconstruct state to a real guest RIP
  *guest_siginfo = *HostSigInfo;

  if (ContextBackup->FaultToTopAndGeneratedException) {
    guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo;
    guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code;

    // Overwrite si_code and si_addr
    guest_siginfo->si_code = Thread->CurrentFrame->SynchronousFaultData.si_code;
    guest_siginfo->si_addr = reinterpret_cast<void*>(ContextBackup->OriginalRIP);
    Signal = Frame->SynchronousFaultData.Signal;
  } else {
    guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo);
    guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo);
  }
  guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_OLDMASK] = 0;
  guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_CR2] = 0;

#define COPY_REG(x) guest_uctx->uc_mcontext.gregs[FEXCore::x86_64::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x];
  COPY_REG(R8);
  COPY_REG(R9);
  COPY_REG(R10);
  COPY_REG(R11);
  COPY_REG(R12);
  COPY_REG(R13);
  COPY_REG(R14);
  COPY_REG(R15);
  COPY_REG(RDI);
  COPY_REG(RSI);
  COPY_REG(RBP);
  COPY_REG(RBX);
  COPY_REG(RDX);
  COPY_REG(RAX);
  COPY_REG(RCX);
  COPY_REG(RSP);
#undef COPY_REG

  auto* fpstate = &xstate->fpstate;

  // Copy float registers
  const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
    const auto modulo_i = (i + CurrentOffset) % 8;
    memcpy(&fpstate->_st[i], &Frame->State.mm[modulo_i], sizeof(Frame->State.mm[0]));
  }

  if (SupportsAVX) {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
  } else {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, nullptr);
  }

  // FCW store default
  fpstate->fcw = Frame->State.FCW;
  fpstate->ftw = Frame->State.AbridgedFTW;

  // Reconstruct FSW
  fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14) | Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC];

  // Copy over signal stack information
  guest_uctx->uc_stack.ss_flags = GuestStack->ss_flags;
  guest_uctx->uc_stack.ss_sp = GuestStack->ss_sp;
  guest_uctx->uc_stack.ss_size = GuestStack->ss_size;

  // Apparently RAX is always set to zero in case of badly misbehaving C applications and variadics.
  Frame->State.gregs[FEXCore::X86State::REG_RAX] = 0;
  Frame->State.gregs[FEXCore::X86State::REG_RDI] = Signal;
  Frame->State.gregs[FEXCore::X86State::REG_RSI] = SigInfoLocation;
  Frame->State.gregs[FEXCore::X86State::REG_RDX] = UContextLocation;

  // Set up the new SP for stack handling
  // The host is required to provide us a restorer.
  // If the guest didn't provide a restorer then the application should fail with a SIGSEGV.
  // TODO: Emulate SIGSEGV when the guest doesn't provide a restorer.
  NewGuestSP -= 8;
  if (GuestAction->restorer) {
    *(uint64_t*)NewGuestSP = (uint64_t)GuestAction->restorer;
  } else {
    // XXX: Emulate SIGSEGV here
    // *(uint64_t*)NewGuestSP = SignalReturn;
  }

  return NewGuestSP;
}

uint64_t SignalDelegator::SetupFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                                          FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                                          GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) {

  const uint64_t SignalReturn = reinterpret_cast<uint64_t>(VDSOPointers.VDSO_kernel_sigreturn);

  NewGuestSP -= sizeof(uint64_t);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t));

  uint64_t HostStackLocation = NewGuestSP;

  if (SupportsAVX) {
    NewGuestSP -= sizeof(FEXCore::x86::xstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate));
  } else {
    NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate));
  }

  uint64_t FPStateLocation = NewGuestSP;

  NewGuestSP -= sizeof(SigFrame_i32);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(SigFrame_i32));
  uint64_t SigFrameLocation = NewGuestSP;

  ContextBackup->FPStateLocation = FPStateLocation;
  ContextBackup->UContextLocation = SigFrameLocation;
  ContextBackup->SigInfoLocation = 0;

  SigFrame_i32* guest_uctx = reinterpret_cast<SigFrame_i32*>(SigFrameLocation);
  // Store where the host context lives in the guest stack.
  *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup;

  // Pointer to where the fpreg memory is
  guest_uctx->sc.fpstate = static_cast<uint32_t>(FPStateLocation);
  auto* xstate = reinterpret_cast<FEXCore::x86::xstate*>(FPStateLocation);
  SetXStateInfo(xstate, SupportsAVX);

  guest_uctx->sc.cs = Frame->State.cs_idx;
  guest_uctx->sc.ds = Frame->State.ds_idx;
  guest_uctx->sc.es = Frame->State.es_idx;
  guest_uctx->sc.fs = Frame->State.fs_idx;
  guest_uctx->sc.gs = Frame->State.gs_idx;
  guest_uctx->sc.ss = Frame->State.ss_idx;

  if (ContextBackup->FaultToTopAndGeneratedException) {
    guest_uctx->sc.trapno = Frame->SynchronousFaultData.TrapNo;
    guest_uctx->sc.err = Frame->SynchronousFaultData.err_code;
    Signal = Frame->SynchronousFaultData.Signal;
  } else {
    guest_uctx->sc.trapno = ConvertSignalToTrapNo(Signal, HostSigInfo);
    guest_uctx->sc.err = ConvertSignalToError(ucontext, Signal, HostSigInfo);
  }

  guest_uctx->sc.ip = ContextBackup->OriginalRIP;
  guest_uctx->sc.flags = eflags;
  guest_uctx->sc.sp_at_signal = 0;

#define COPY_REG(x, y) guest_uctx->sc.x = Frame->State.gregs[FEXCore::X86State::REG_##y];
  COPY_REG(di, RDI);
  COPY_REG(si, RSI);
  COPY_REG(bp, RBP);
  COPY_REG(bx, RBX);
  COPY_REG(dx, RDX);
  COPY_REG(ax, RAX);
  COPY_REG(cx, RCX);
  COPY_REG(sp, RSP);
#undef COPY_REG

  auto* fpstate = &xstate->fpstate;

  // Copy float registers
  const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
    const auto modulo_i = (i + CurrentOffset) % 8;
    // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64
    memcpy(&fpstate->_st[i], &Frame->State.mm[modulo_i], 10);
  }

  // Extended XMM state
  fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE;

  if (SupportsAVX) {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
  } else {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, nullptr);
  }

  // FCW store default
  fpstate->fcw = Frame->State.FCW;
  // Reconstruct FSW
  fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14) | Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC];
  fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW);

  // Curiously non-rt signals don't support altstack. So that state doesn't exist here.

  // Copy over the signal information.
  guest_uctx->Signal = Signal;

  // Retcode needs to be bit-exact for debuggers
  constexpr static uint8_t retcode[] = {
    0x58,                   // pop eax
    0xb8,                   // mov
    0x77, 0x00, 0x00, 0x00, // 32-bit sigreturn
    0xcd, 0x80,             // int 0x80
  };

  memcpy(guest_uctx->retcode, &retcode, sizeof(retcode));

  // 32-bit Guest can provide its own restorer or we need to provide our own.
  // On a real host this restorer will live in VDSO.
  const bool HasRestorer = (GuestAction->sa_flags & X86_SA_RESTORER) == X86_SA_RESTORER;
  if (HasRestorer) {
    guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer;
  } else {
    guest_uctx->pretcode = SignalReturn;
    LOGMAN_THROW_A_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB");
  }

  // Support regparm=3
  Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal;
  Frame->State.gregs[FEXCore::X86State::REG_RDX] = 0;
  Frame->State.gregs[FEXCore::X86State::REG_RCX] = 0;

  return NewGuestSP;
}

uint64_t SignalDelegator::SetupRTFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                                            FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                                            GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags) {

  const uint64_t SignalReturn = reinterpret_cast<uint64_t>(VDSOPointers.VDSO_kernel_rt_sigreturn);

  NewGuestSP -= sizeof(uint64_t);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(uint64_t));

  uint64_t HostStackLocation = NewGuestSP;

  if (SupportsAVX) {
    NewGuestSP -= sizeof(FEXCore::x86::xstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::xstate));
  } else {
    NewGuestSP -= sizeof(FEXCore::x86::_libc_fpstate);
    NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(FEXCore::x86::_libc_fpstate));
  }

  uint64_t FPStateLocation = NewGuestSP;

  NewGuestSP -= sizeof(RTSigFrame_i32);
  NewGuestSP = FEXCore::AlignDown(NewGuestSP, alignof(RTSigFrame_i32));

  uint64_t SigFrameLocation = NewGuestSP;
  RTSigFrame_i32* guest_uctx = reinterpret_cast<RTSigFrame_i32*>(SigFrameLocation);
  // Store where the host context lives in the guest stack.
  *(uint64_t*)HostStackLocation = (uint64_t)ContextBackup;

  ContextBackup->FPStateLocation = FPStateLocation;
  ContextBackup->UContextLocation = SigFrameLocation;
  ContextBackup->SigInfoLocation = 0; // Part of frame.

  // We have extended float information
  guest_uctx->uc.uc_flags = FEXCore::x86::UC_FP_XSTATE;
  guest_uctx->uc.uc_link = 0;

  // Pointer to where the fpreg memory is
  guest_uctx->uc.uc_mcontext.fpregs = static_cast<uint32_t>(FPStateLocation);
  auto* xstate = reinterpret_cast<FEXCore::x86::xstate*>(FPStateLocation);
  SetXStateInfo(xstate, SupportsAVX);

  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_CS] = Frame->State.cs_idx;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_DS] = Frame->State.ds_idx;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ES] = Frame->State.es_idx;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_FS] = Frame->State.fs_idx;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_GS] = Frame->State.gs_idx;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_SS] = Frame->State.ss_idx;

  if (ContextBackup->FaultToTopAndGeneratedException) {
    guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = Frame->SynchronousFaultData.TrapNo;
    guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = Frame->SynchronousFaultData.err_code;
    Signal = Frame->SynchronousFaultData.Signal;
  } else {
    guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_TRAPNO] = ConvertSignalToTrapNo(Signal, HostSigInfo);
    guest_uctx->info.si_code = HostSigInfo->si_code;
    guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR] = ConvertSignalToError(ucontext, Signal, HostSigInfo);
  }

  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EIP] = ContextBackup->OriginalRIP;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_EFL] = eflags;
  guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_UESP] = Frame->State.gregs[FEXCore::X86State::REG_RSP];
  guest_uctx->uc.uc_mcontext.cr2 = 0;

#define COPY_REG(x) guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_##x] = Frame->State.gregs[FEXCore::X86State::REG_##x];
  COPY_REG(RDI);
  COPY_REG(RSI);
  COPY_REG(RBP);
  COPY_REG(RBX);
  COPY_REG(RDX);
  COPY_REG(RAX);
  COPY_REG(RCX);
  COPY_REG(RSP);
#undef COPY_REG

  auto* fpstate = &xstate->fpstate;

  // Copy float registers
  const uint16_t CurrentOffset = Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC];
  for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
    const auto modulo_i = (i + CurrentOffset) % 8;
    // 32-bit st register size is only 10 bytes. Not padded to 16byte like x86-64
    memcpy(&fpstate->_st[i], &Frame->State.mm[modulo_i], 10);
  }

  // Extended XMM state
  fpstate->status = FEXCore::x86::fpstate_magic::MAGIC_XFPSTATE;

  if (SupportsAVX) {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, xstate->ymmh.ymmh_space);
  } else {
    CTX->ReconstructXMMRegisters(Thread, fpstate->_xmm, nullptr);
  }

  // FCW store default
  fpstate->fcw = Frame->State.FCW;
  // Reconstruct FSW
  fpstate->fsw = (Frame->State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | (Frame->State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) |
                 (Frame->State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14) | Frame->State.flags[FEXCore::X86State::X87FLAG_IE_LOC];
  fpstate->ftw = FEXCore::FPState::ConvertFromAbridgedFTW(fpstate->fsw, Frame->State.mm, Frame->State.AbridgedFTW);

  // Copy over signal stack information
  guest_uctx->uc.uc_stack.ss_flags = GuestStack->ss_flags;
  guest_uctx->uc.uc_stack.ss_sp = static_cast<uint32_t>(reinterpret_cast<uint64_t>(GuestStack->ss_sp));
  guest_uctx->uc.uc_stack.ss_size = GuestStack->ss_size;

  // Setup siginfo
  // These three elements are in every siginfo
  guest_uctx->info.si_signo = HostSigInfo->si_signo;
  guest_uctx->info.si_errno = HostSigInfo->si_errno;
  if (ContextBackup->FaultToTopAndGeneratedException) {
    guest_uctx->info.si_code = Frame->SynchronousFaultData.si_code;
  } else {
    guest_uctx->info.si_code = HostSigInfo->si_code;
  }

  const SigInfoLayout Layout =
    CalculateSigInfoLayout(Signal, guest_uctx->info.si_code, guest_uctx->uc.uc_mcontext.gregs[FEXCore::x86::FEX_REG_ERR]);

  switch (Layout) {
  case SigInfoLayout::LAYOUT_KILL:
    guest_uctx->info._sifields._kill.pid = HostSigInfo->si_pid;
    guest_uctx->info._sifields._kill.uid = HostSigInfo->si_uid;
    break;
  case SigInfoLayout::LAYOUT_TIMER:
    guest_uctx->info._sifields._timer.tid = HostSigInfo->si_timerid;
    guest_uctx->info._sifields._timer.overrun = HostSigInfo->si_overrun;
    guest_uctx->info._sifields._timer.sigval.sival_int = HostSigInfo->si_int;
    break;
  case SigInfoLayout::LAYOUT_POLL:
    guest_uctx->info._sifields._poll.band = HostSigInfo->si_band;
    guest_uctx->info._sifields._poll.fd = HostSigInfo->si_fd;
    break;
  case SigInfoLayout::LAYOUT_FAULT:
    // Macro expansion to get the si_addr
    // This is the address trying to be accessed, not the RIP
    guest_uctx->info._sifields._sigfault.addr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(HostSigInfo->si_addr));
    break;
  case SigInfoLayout::LAYOUT_FAULT_RIP:
    // Macro expansion to get the si_addr
    // Can't really give a real result here. Pull from the context for now
    guest_uctx->info._sifields._sigfault.addr = ContextBackup->OriginalRIP;
    break;
  case SigInfoLayout::LAYOUT_CHLD:
    guest_uctx->info._sifields._sigchld.pid = HostSigInfo->si_pid;
    guest_uctx->info._sifields._sigchld.uid = HostSigInfo->si_uid;
    guest_uctx->info._sifields._sigchld.status = HostSigInfo->si_status;
    guest_uctx->info._sifields._sigchld.utime = HostSigInfo->si_utime;
    guest_uctx->info._sifields._sigchld.stime = HostSigInfo->si_stime;
    break;
  case SigInfoLayout::LAYOUT_RT:
    guest_uctx->info._sifields._rt.pid = HostSigInfo->si_pid;
    guest_uctx->info._sifields._rt.uid = HostSigInfo->si_uid;
    guest_uctx->info._sifields._rt.sigval.sival_int = HostSigInfo->si_int;
    break;
  case SigInfoLayout::LAYOUT_SYS:
    guest_uctx->info._sifields._sigsys.call_addr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(HostSigInfo->si_call_addr));
    guest_uctx->info._sifields._sigsys.syscall = HostSigInfo->si_syscall;
    // We need to lie about the architecture here.
    // Otherwise we would expose incorrect information to the guest.
    constexpr uint32_t AUDIT_LE = 0x4000'0000U;
    constexpr uint32_t MACHINE_I386 = 3; // This matches the ELF definition.
    guest_uctx->info._sifields._sigsys.arch = AUDIT_LE | MACHINE_I386;
    break;
  }

  // Setup the guest stack context.
  guest_uctx->Signal = Signal;
  guest_uctx->pinfo = (uint32_t)(uint64_t)&guest_uctx->info;
  guest_uctx->puc = (uint32_t)(uint64_t)&guest_uctx->uc;

  // Retcode needs to be bit-exact for debuggers
  constexpr static uint8_t rt_retcode[] = {
    0xb8,                   // mov
    0xad, 0x00, 0x00, 0x00, // 32-bit rt_sigreturn
    0xcd, 0x80,             // int 0x80
    0x0,                    // Pad
  };

  memcpy(guest_uctx->retcode, &rt_retcode, sizeof(rt_retcode));

  // 32-bit Guest can provide its own restorer or we need to provide our own.
  // On a real host this restorer will live in VDSO.
  const bool HasRestorer = (GuestAction->sa_flags & X86_SA_RESTORER) == X86_SA_RESTORER;
  if (HasRestorer) {
    guest_uctx->pretcode = (uint32_t)(uint64_t)GuestAction->restorer;
  } else {
    guest_uctx->pretcode = SignalReturn;
    LOGMAN_THROW_A_FMT(SignalReturn < 0x1'0000'0000ULL, "This needs to be below 4GB");
  }

  // Support regparm=3
  Frame->State.gregs[FEXCore::X86State::REG_RAX] = Signal;
  Frame->State.gregs[FEXCore::X86State::REG_RDX] = guest_uctx->pinfo;
  Frame->State.gregs[FEXCore::X86State::REG_RCX] = guest_uctx->puc;

  return NewGuestSP;
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
desc: Handles host -> host and host -> guest signal routing, emulates procmask & co
$end_info$
*/

#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/FPState.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <atomic>
#include <cerrno>
#include <csignal>
#include <cstddef>
#include <cstring>
#include <functional>
#include <linux/futex.h>
#include <syscall.h>
#include <sys/mman.h>
#include <sys/signalfd.h>
#include <unistd.h>
#include <utility>

// For older build environments
#ifndef SS_AUTODISARM
#define SS_AUTODISARM (1U << 31)
#endif

namespace FEX::HLE {
#ifdef ARCHITECTURE_x86_64
__attribute__((naked)) static void sigrestore() {
  __asm volatile("syscall;" ::"a"(0xF) : "memory");
}
#endif

constexpr static uint32_t X86_MINSIGSTKSZ = 2048;

static FEX::HLE::ThreadStateObject* GetThreadFromAltStack(const stack_t& alt_stack) {
  // The thread object lives just before the alt-stack begin.
  FEX::HLE::ThreadStateObject* ThreadObject {};
  memcpy(&ThreadObject, reinterpret_cast<void*>(reinterpret_cast<uint64_t>(alt_stack.ss_sp) - 8), sizeof(void*));
  return ThreadObject;
}

static void SignalHandlerThunk(int Signal, siginfo_t* Info, void* UContext) {
  ucontext_t* _context = (ucontext_t*)UContext;
  auto ThreadObject = GetThreadFromAltStack(_context->uc_stack);
  FEXCORE_PROFILE_ACCUMULATION(ThreadObject->Thread, AccumulatedSignalTime);
  ThreadObject->SignalInfo.Delegator->HandleSignal(ThreadObject, Signal, Info, UContext);
}

uint64_t SigIsMember(GuestSAMask* Set, int Signal) {
  // Signal 0 isn't real, so everything is offset by one inside the set
  Signal -= 1;
  return (Set->Val >> Signal) & 1;
}

uint64_t SetSignal(GuestSAMask* Set, int Signal) {
  // Signal 0 isn't real, so everything is offset by one inside the set
  Signal -= 1;
  return Set->Val | (1ULL << Signal);
}

/**
 * @name Signal frame setup
 * @{ */

void SignalDelegator::HandleSignal(FEX::HLE::ThreadStateObject* Thread, int Signal, void* Info, void* UContext) {
  // Let the host take first stab at handling the signal
  if (!Thread) {
    LogMan::Msg::AFmt("Thread {} has received a signal and hasn't registered itself with the delegate! Programming error!",
                      FHU::Syscalls::gettid());
  } else {
    SignalHandler& Handler = HostHandlers[Signal];
    for (auto& HandlerFunc : Handler.Handlers) {
      if (HandlerFunc(Thread->Thread, Signal, Info, UContext)) {
        // If the host handler handled the fault then we can continue now
        return;
      }
    }

    if (Handler.FrontendHandler && Handler.FrontendHandler(Thread->Thread, Signal, Info, UContext)) {
      return;
    }

    // Now let the frontend handle the signal
    // It's clearly a guest signal and this ends up being an OS specific issue
    HandleGuestSignal(Thread, Signal, Info, UContext);
  }
}

void SignalDelegator::RegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) {
  SetHostSignalHandler(Signal, std::move(Func), Required);
  FrontendRegisterHostSignalHandler(Signal, Required);
}

void SignalDelegator::SpillSRA(FEXCore::Core::InternalThreadState* Thread, void* ucontext, uint32_t IgnoreMask) {
#ifdef ARCHITECTURE_arm64
  Thread->CurrentFrame->State.rip = CTX->RestoreRIPFromHostPC(Thread, ArchHelpers::Context::GetPc(ucontext));

  for (size_t i = 0; i < Config.SRAGPRCount; i++) {
    const uint8_t SRAIdxMap = Config.SRAGPRMapping[i];
    if (IgnoreMask & (1U << SRAIdxMap)) {
      // Skip this one, it's already spilled
      continue;
    }
    Thread->CurrentFrame->State.gregs[i] = ArchHelpers::Context::GetArmReg(ucontext, SRAIdxMap);
  }

  if (SupportsAVX) {
    // TODO: This doesn't save the upper 128-bits of the 256-bit registers.
    // This needs to be implemented still.
    for (size_t i = 0; i < Config.SRAFPRCount; i++) {
      auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]);
      memcpy(&Thread->CurrentFrame->State.xmm.avx.data[i][0], &FPR, sizeof(__uint128_t));
    }
  } else {
    for (size_t i = 0; i < Config.SRAFPRCount; i++) {
      auto FPR = ArchHelpers::Context::GetArmFPR(ucontext, Config.SRAFPRMapping[i]);
      memcpy(&Thread->CurrentFrame->State.xmm.sse.data[i][0], &FPR, sizeof(__uint128_t));
    }
  }

  uint32_t EFlags =
    CTX->ReconstructCompactedEFLAGS(Thread, true, ArchHelpers::Context::GetArmGPRs(ucontext), ArchHelpers::Context::GetArmPState(ucontext));
  CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags);
#endif
}

ArchHelpers::Context::ContextBackup* SignalDelegator::StoreThreadState(FEXCore::Core::InternalThreadState* Thread, int Signal, void* ucontext) {
  // We can end up getting a signal at any point in our host state
  // Jump to a handler that saves all state so we can safely return
  uint64_t OldSP = ArchHelpers::Context::GetSp(ucontext);
  uintptr_t NewSP = OldSP;

  size_t StackOffset = sizeof(ArchHelpers::Context::ContextBackup);

  // We need to back up behind the host's red zone
  // We do this on the guest side as well
  // (does nothing on arm hosts)
  NewSP -= ArchHelpers::Context::ContextBackup::RedZoneSize;

  NewSP -= StackOffset;
  NewSP = FEXCore::AlignDown(NewSP, 16);

  auto Context = reinterpret_cast<ArchHelpers::Context::ContextBackup*>(NewSP);
  ArchHelpers::Context::BackupContext(ucontext, Context);

  // Retain the action pointer so we can see it when we return
  Context->Signal = Signal;

  // Save guest state
  // We can't guarantee if registers are in context or host GPRs
  // So we need to save everything
  memcpy(&Context->GuestState, &Thread->CurrentFrame->State, sizeof(FEXCore::Core::CPUState));

  // Set the new SP
  ArchHelpers::Context::SetSp(ucontext, NewSP);

  Context->Flags = 0;
  Context->FPStateLocation = 0;
  Context->UContextLocation = 0;
  Context->SigInfoLocation = 0;
  Context->InSyscallInfo = 0;

  // Store fault to top status and then reset it
  Context->FaultToTopAndGeneratedException = Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException;
  Thread->CurrentFrame->SynchronousFaultData.FaultToTopAndGeneratedException = false;

  return Context;
}

void SignalDelegator::RestoreThreadState(FEXCore::Core::InternalThreadState* Thread, void* ucontext, RestoreType Type) {
  uint64_t OldSP {};
  if (Type == RestoreType::TYPE_PAUSE) [[unlikely]] {
    OldSP = ArchHelpers::Context::GetSp(ucontext);
  } else {
    // Some fun introspection here.
    // We store a pointer to our host-stack on the guest stack.
    // We need to inspect the guest state coming in, so we can get our host stack back.
    uint64_t GuestSP = Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP];

    if (Is64BitMode) {
      // Signal frame layout on stack needs to be as follows
      // void* ReturnPointer
      // ucontext_t
      // siginfo_t
      // FP state
      // Host stack location

      GuestSP += sizeof(FEXCore::x86_64::ucontext_t);
      GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::ucontext_t));

      GuestSP += sizeof(siginfo_t);
      GuestSP = FEXCore::AlignUp(GuestSP, alignof(siginfo_t));

      if (SupportsAVX) {
        GuestSP += sizeof(FEXCore::x86_64::xstate);
        GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::xstate));
      } else {
        GuestSP += sizeof(FEXCore::x86_64::_libc_fpstate);
        GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86_64::_libc_fpstate));
      }
    } else {
      if (Type == RestoreType::TYPE_NONREALTIME) {
        // Signal frame layout on stack needs to be as follows
        // SigFrame_i32
        // FPState
        // Host stack location

        // Remove the 4-byte pretcode /AND/ a legacy argument that is ignored.
        GuestSP += sizeof(SigFrame_i32) - 8;
        GuestSP = FEXCore::AlignUp(GuestSP, alignof(SigFrame_i32));

        if (SupportsAVX) {
          GuestSP += sizeof(FEXCore::x86::xstate);
          GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate));
        } else {
          GuestSP += sizeof(FEXCore::x86::_libc_fpstate);
          GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate));
        }
      } else {
        // Signal frame layout on stack needs to be as follows
        // RTSigFrame_i32
        // FPState
        // Host stack location

        // Remove the 4-byte pretcode.
        GuestSP += sizeof(RTSigFrame_i32) - 4;
        GuestSP = FEXCore::AlignUp(GuestSP, alignof(RTSigFrame_i32));

        if (SupportsAVX) {
          GuestSP += sizeof(FEXCore::x86::xstate);
          GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::xstate));
        } else {
          GuestSP += sizeof(FEXCore::x86::_libc_fpstate);
          GuestSP = FEXCore::AlignUp(GuestSP, alignof(FEXCore::x86::_libc_fpstate));
        }
      }
    }

    OldSP = *reinterpret_cast<uint64_t*>(GuestSP);
  }

  uintptr_t NewSP = OldSP;
  auto Context = reinterpret_cast<ArchHelpers::Context::ContextBackup*>(NewSP);

  // Restore host state
  ArchHelpers::Context::RestoreContext(ucontext, Context);

  // Reset the guest state
  memcpy(&Thread->CurrentFrame->State, &Context->GuestState, sizeof(FEXCore::Core::CPUState));

  if (Context->UContextLocation) {
    auto Frame = Thread->CurrentFrame;

    if (Context->Flags & ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT) {
      // XXX: Unsupported since it needs state reconstruction
      // If we are in the JIT then SRA might need to be restored to values from the context
      // We can't currently support this since it might result in tearing without real state reconstruction
    }

    if (Is64BitMode) {
      RestoreFrame_x64(Thread, Context, Frame, ucontext);
    } else {
      if (Type == RestoreType::TYPE_NONREALTIME) {
        RestoreFrame_ia32(Thread, Context, Frame, ucontext);
      } else {
        RestoreRTFrame_ia32(Thread, Context, Frame, ucontext);
      }
    }
  }
}

bool SignalDelegator::HandleDispatcherGuestSignal(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext,
                                                  GuestSigAction* GuestAction, stack_t* GuestStack) {
  auto ContextBackup = StoreThreadState(Thread, Signal, ucontext);

  auto Frame = Thread->CurrentFrame;

  // Ref count our faults
  // We use this to track if it is safe to clear cache
  ++Thread->CurrentFrame->SignalHandlerRefCounter;

  uint64_t OldPC = ArchHelpers::Context::GetPc(ucontext);
  const bool WasInJIT = CTX->IsAddressInCodeBuffer(Thread, OldPC);

  // Spill the SRA regardless of signal handler type
  // We are going to be returning to the top of the dispatcher which will fill again
  // Otherwise we might load garbage
  if (WasInJIT) {
    uint32_t IgnoreMask {};
#ifdef ARCHITECTURE_arm64
    if (Frame->InSyscallInfo != 0) {
      // We are in a syscall, this means we are in a weird register state
      // We need to spill SRA but only some of it, since some values have already been spilled
      // Lower 16 bits tells us which registers are already spilled to the context
      // So we ignore spilling those ones
      IgnoreMask = Frame->InSyscallInfo & 0xFFFF;
    } else {
      // We must spill everything
      IgnoreMask = 0;
    }
#endif

    // We are in jit, SRA must be spilled
    SpillSRA(Thread, ucontext, IgnoreMask);

    ContextBackup->Flags |= ArchHelpers::Context::ContextFlags::CONTEXT_FLAG_INJIT;

    // We are leaving the syscall information behind. Make sure to store the previous state.
    ContextBackup->InSyscallInfo = Thread->CurrentFrame->InSyscallInfo;
    Thread->CurrentFrame->InSyscallInfo = 0;
  } else {
    if (!IsAddressInDispatcher(OldPC)) {
      // This is likely to cause issues but in some cases it isn't fatal
      // This can also happen if we have put a signal on hold, then we just reenabled the signal
      // So we are in the syscall handler
      // Only throw a log message in this case
      if constexpr (false) {
        // XXX: Messages in the signal handler can cause us to crash
        LogMan::Msg::EFmt("Signals in dispatcher have unsynchronized context");
      }
    }
  }

  uint64_t OldGuestSP = Frame->State.gregs[FEXCore::X86State::REG_RSP];
  uint64_t NewGuestSP = OldGuestSP;

  // altstack is only used if the signal handler was setup with SA_ONSTACK
  if (GuestAction->sa_flags & SA_ONSTACK) {
    // Additionally the altstack is only used if the enabled (SS_DISABLE flag is not set)
    if (!(GuestStack->ss_flags & SS_DISABLE)) {
      // If our guest is already inside of the alternative stack
      // Then that means we are hitting recursive signals and we need to walk back the stack correctly
      uint64_t AltStackBase = reinterpret_cast<uint64_t>(GuestStack->ss_sp);
      uint64_t AltStackEnd = AltStackBase + GuestStack->ss_size;
      if (OldGuestSP >= AltStackBase && OldGuestSP <= AltStackEnd) {
        // We are already in the alt stack, the rest of the code will handle adjusting this
      } else {
        NewGuestSP = AltStackEnd;
      }
    }
  }

  // siginfo_t
  siginfo_t* HostSigInfo = reinterpret_cast<siginfo_t*>(info);

  ContextBackup->OriginalRIP = Thread->CurrentFrame->State.rip;
  uint32_t eflags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0);

  if (Is64BitMode) {
    NewGuestSP = SetupFrame_x64(Thread, ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags);
  } else {
    const bool SigInfoFrame = (GuestAction->sa_flags & SA_SIGINFO) == SA_SIGINFO;
    if (SigInfoFrame) {
      NewGuestSP = SetupRTFrame_ia32(Thread, ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags);
    } else {
      NewGuestSP = SetupFrame_ia32(Thread, ContextBackup, Frame, Signal, HostSigInfo, ucontext, GuestAction, GuestStack, NewGuestSP, eflags);
    }
  }

  Frame->State.rip = reinterpret_cast<uint64_t>(GuestAction->sigaction_handler.sigaction);
  Frame->State.gregs[FEXCore::X86State::REG_RSP] = NewGuestSP;

  // Linux clears DF, RF, and TF flags on signal.
  Frame->State.flags[FEXCore::X86State::RFLAG_DF_RAW_LOC] = 1;
  Frame->State.flags[FEXCore::X86State::RFLAG_RF_LOC] = 0;
  Frame->State.flags[FEXCore::X86State::RFLAG_TF_RAW_LOC] = 0;

  // Linux resets the CS and SS registers on signal handler.
  // This way signal handlers always go back to their original operating mode.
  // Doesn't matter for 32-bit processes as they can only be 32-bit, but does
  // matter for 64-bit processes as they could have potentially installed a 32-bit code segment.
  Frame->State.cs_idx = FEXCore::Core::CPUState::DEFAULT_USER_CS << 3;
  Frame->State.ss_idx = 0;
  Frame->State.cs_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));
  Frame->State.ss_cached = Frame->State.CalculateGDTBase(*Frame->State.GetSegmentFromIndex(Frame->State, Frame->State.ss_idx));

  // The guest starts its signal frame with a zero initialized FPU
  // Set that up now. Little bit costly but it's a requirement
  // This state will be restored on rt_sigreturn
  memset(Frame->State.xmm.avx.data, 0, sizeof(Frame->State.xmm));
  memset(Frame->State.mm, 0, sizeof(Frame->State.mm));
  Frame->State.FCW = 0x37F;
  Frame->State.AbridgedFTW = 0;

  // Set the new PC
  ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA);
  ArchHelpers::Context::SetFillSRASingleInst(ucontext, false);
  // Set our state register to point to our guest thread data
  ArchHelpers::Context::SetState(ucontext, reinterpret_cast<uint64_t>(Frame));

  return true;
}

bool SignalDelegator::HandleSIGILL(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) {
  if (ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddress ||
      ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT) {
    auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
    RestoreThreadState(Thread, ucontext,
                       ArchHelpers::Context::GetPc(ucontext) == Config.SignalHandlerReturnAddressRT ? RestoreType::TYPE_REALTIME :
                                                                                                      RestoreType::TYPE_NONREALTIME);

    // Ref count our faults
    // We use this to track if it is safe to clear cache
    --Thread->CurrentFrame->SignalHandlerRefCounter;

    if (ThreadObject->SignalInfo.DeferredSignalFrames.size() != 0) {
      // If we have more deferred frames to process then mprotect back to PROT_NONE.
      // It will have been RW coming in to this sigreturn and now we need to remove permissions
      // to ensure FEX trampolines back to the SIGSEGV deferred handler.
      mprotect(reinterpret_cast<void*>(&Thread->InterruptFaultPage), sizeof(Thread->InterruptFaultPage), PROT_NONE);
    }
    return true;
  }

  if (ArchHelpers::Context::GetPc(ucontext) == Config.PauseReturnInstruction) {
    RestoreThreadState(Thread, ucontext, RestoreType::TYPE_PAUSE);

    // Ref count our faults
    // We use this to track if it is safe to clear cache
    --Thread->CurrentFrame->SignalHandlerRefCounter;
    return true;
  }

  return false;
}

bool SignalDelegator::HandleSignalPause(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) {
  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
  SignalEvent SignalReason = ThreadObject->SignalReason.load();
  auto Frame = Thread->CurrentFrame;

  if (SignalReason == SignalEvent::Pause) {
    // Store our thread state so we can come back to this
    StoreThreadState(Thread, Signal, ucontext);

    if (CTX->IsAddressInCodeBuffer(Thread, ArchHelpers::Context::GetPc(ucontext))) {
      // We are in jit, SRA must be spilled
      ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddressSpillSRA);
    } else {
      // We are in non-jit, SRA is already spilled
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), "Signals in dispatcher have unsynchronized "
                                                                                        "context");
#endif
      ArchHelpers::Context::SetPc(ucontext, Config.ThreadPauseHandlerAddress);
    }

    // Set our state register to point to our guest thread data
    ArchHelpers::Context::SetState(ucontext, reinterpret_cast<uint64_t>(Frame));

    // Ref count our faults
    // We use this to track if it is safe to clear cache
    ++Thread->CurrentFrame->SignalHandlerRefCounter;

    ThreadObject->SignalReason.store(SignalEvent::Nothing);
    return true;
  }

  if (SignalReason == SignalEvent::Stop) {
    // Our thread is stopping
    // We don't care about anything at this point
    // Set the stack to our starting location when we entered the core and get out safely
    ArchHelpers::Context::SetSp(ucontext, Frame->ReturningStackLocation);

    // Our ref counting doesn't matter anymore
    Thread->CurrentFrame->SignalHandlerRefCounter = 0;

    // Set the new PC
    if (CTX->IsAddressInCodeBuffer(Thread, ArchHelpers::Context::GetPc(ucontext))) {
      // We are in jit, SRA must be spilled
      ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddressSpillSRA);
    } else {
      // We are in non-jit, SRA is already spilled
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
      LOGMAN_THROW_A_FMT(!IsAddressInDispatcher(ArchHelpers::Context::GetPc(ucontext)), "Signals in dispatcher have unsynchronized "
                                                                                        "context");
#endif
      ArchHelpers::Context::SetPc(ucontext, Config.ThreadStopHandlerAddress);
    }

    // We need to be a little bit careful here
    // If we were already paused (due to GDB) and we are immediately stopping (due to gdb kill)
    // Then we need to ensure we don't double decrement our idle thread counter
    if (ThreadObject->ThreadSleeping) {
      // If the thread was sleeping then its idle counter was decremented
      // Reincrement it here to not break logic
      FEX::HLE::_SyscallHandler->TM.IncrementIdleRefCount();
    }

    ThreadObject->SignalReason.store(SignalEvent::Nothing);
    return true;
  }

  if (SignalReason == SignalEvent::Return || SignalReason == SignalEvent::ReturnRT) {
    RestoreThreadState(Thread, ucontext, SignalReason == SignalEvent::ReturnRT ? RestoreType::TYPE_REALTIME : RestoreType::TYPE_NONREALTIME);

    // Ref count our faults
    // We use this to track if it is safe to clear cache
    --Thread->CurrentFrame->SignalHandlerRefCounter;

    ThreadObject->SignalReason.store(SignalEvent::Nothing);
    return true;
  }
  return false;
}

void SignalDelegator::SignalThread(FEXCore::Core::InternalThreadState* Thread, SignalEvent Event) {
  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
  ThreadObject->SignalReason.store(Event);
  FHU::Syscalls::tgkill(ThreadObject->ThreadInfo.PID, ThreadObject->ThreadInfo.TID, SignalDelegator::SIGNAL_FOR_PAUSE);
}

/**  @} */

static bool IsAsyncSignal(const siginfo_t* Info, int Signal) {
  if (Info->si_code <= SI_USER) {
    // If the signal is not from the kernel then it is always async.
    // This is because synchronous signals can be sent through tgkill,sigqueue and other methods.
    // SI_USER == 0 and all negative si_code values come from the user.
    return true;
  } else {
    // If the signal is from the kernel then it is async only if it isn't an explicit synchronous signal.
    switch (Signal) {
    // These are all synchronous signals.
    case SIGBUS:
    case SIGFPE:
    case SIGILL:
    case SIGSEGV:
    case SIGTRAP: return false;
    default: break;
    }
  }

  // Everything else is async and can be deferred.
  return true;
}

uint64_t SignalDelegator::GetNewSigMask(int Signal) const {
  const SignalHandler& Handler = HostHandlers[Signal];
  // Set up a new mask based on this signals signal mask
  uint64_t NewMask = Handler.GuestAction.sa_mask.Val;

  // If NODEFER then the new signal mask includes this signal
  if (!(Handler.GuestAction.sa_flags & SA_NODEFER)) {
    NewMask |= (1ULL << (Signal - 1));
  }

  // Walk our required signals and stop masking them if requested
  for (size_t i = 0; i < MAX_SIGNALS; ++i) {
    if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) {
      // Never mask our required signals
      NewMask &= ~(1ULL << i);
    }
  }

  return NewMask;
}

bool SignalDelegator::HandleFrontendSIGSEGV(FEXCore::Core::InternalThreadState* Thread, int Signal, void* Info, void* UContext) {
  auto SigInfo = *static_cast<siginfo_t*>(Info);

  if (FaultSafeUserMemAccess::TryHandleSafeFault(Signal, SigInfo, UContext)) {
    ERROR_AND_DIE_FMT("Received invalid data to syscall. Crashing now!");
  }

#ifdef ARCHITECTURE_arm64
  if (Signal == SIGSEGV && SigInfo.si_code == SEGV_ACCERR && SigInfo.si_addr >= reinterpret_cast<void*>(Thread->JITGuardPage) &&
      SigInfo.si_addr < reinterpret_cast<void*>(Thread->JITGuardPage + FEXCore::Utils::FEX_PAGE_SIZE)) {
    FEXCore::UncheckedLongJump::ManuallyLoadJumpBuf(Thread->RestartJump, Thread->JITGuardOverflowArgument,
                                                    ArchHelpers::Context::GetArmGPRs(UContext), ArchHelpers::Context::GetArmFPRs(UContext),
                                                    ArchHelpers::Context::GetArmPc(UContext));
    return true;
  }
#endif

  return false;
}

void SignalDelegator::HandleGuestSignal(FEX::HLE::ThreadStateObject* ThreadObject, int Signal, void* Info, void* UContext) {
  auto Thread = ThreadObject->Thread;
  ucontext_t* _context = (ucontext_t*)UContext;
  auto SigInfo = *static_cast<siginfo_t*>(Info);

  auto MustDeferSignal = (Thread->CurrentFrame->State.DeferredSignalRefCount.Load() != 0);
  if (Signal == SIGSEGV && SigInfo.si_code == SEGV_ACCERR && SigInfo.si_addr == reinterpret_cast<void*>(&Thread->InterruptFaultPage)) {
    if (!MustDeferSignal) {
      // We just reached the end of the outermost signal-deferring section and faulted to check for pending signals.
      // Pull a signal frame off the stack.

      mprotect(reinterpret_cast<void*>(&Thread->InterruptFaultPage), sizeof(Thread->InterruptFaultPage), PROT_READ | PROT_WRITE);

      if (ThreadObject->SignalInfo.DeferredSignalFrames.empty()) {
        // No signals to defer. Just set the fault page back to RW and continue execution.
        // This occurs as a minor race condition between the refcount decrement and the access to the fault page.
        return;
      }

      const auto& Top = ThreadObject->SignalInfo.DeferredSignalFrames.back();
      Signal = Top.Signal;
      SigInfo = Top.Info;
      // sig mask has been updated at the defer time, recover the original mask
      memcpy(&_context->uc_sigmask, &Top.SigMask, sizeof(uint64_t));
      ThreadObject->SignalInfo.DeferredSignalFrames.pop_back();

      // Until we re-protect the page to PROT_NONE, FEX will now *permanently* defer signals and /not/ check them.
      //
      // In order to return /back/ to a sane state, we wait for the rt_sigreturn to happen.
      // rt_sigreturn will check if there are any more deferred signals to handle
      // - If there are deferred signals
      //   - mprotect back to PROT_NONE
      //   - sigreturn will trampoline out to the previous fault address check, SIGSEGV and restart
      // - If there are *no* deferred signals
      //  - No need to mprotect, it is already RW
    } else {
#ifdef ARCHITECTURE_arm64
      // If RefCount != 0 then that means we hit an access with nested signal-deferring sections.
      // Increment the PC past the `str zr, [x1]` to continue code execution until we reach the outermost section.
      ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetPc(UContext) + 4);
      return;
#else
      // X86 should always be doing a refcount compare and branch since we can't guarantee instruction size.
      // ARM64 just always does the access to reduce branching overhead.
      ERROR_AND_DIE_FMT("X86 shouldn't hit this InterruptFaultPage");
#endif
    }
  } else if (IsAsyncSignal(&SigInfo, Signal) && MustDeferSignal) {
    // If the signal is asynchronous (as determined by si_code) and FEX is in a state of needing
    // to defer the signal, then add the signal to the thread's signal queue.
    LOGMAN_THROW_A_FMT(ThreadObject->SignalInfo.DeferredSignalFrames.size() != ThreadObject->SignalInfo.DeferredSignalFrames.capacity(),
                       "Deferred signals vector hit "
                       "capacity size. This will "
                       "likely crash! Asserting now!");

    ThreadObject->SignalInfo.DeferredSignalFrames.emplace_back(ThreadStateObject::DeferredSignalState {
      .Info = SigInfo,
      .Signal = Signal,
      .SigMask = _context->uc_sigmask.__val[0],
    });

    uint64_t NewMask = GetNewSigMask(Signal);

    // Update our host signal mask so we don't hit race conditions with signals
    // This allows us to maintain the expected signal mask through the guest signal handling and then all the way back again
    memcpy(&_context->uc_sigmask, &NewMask, sizeof(uint64_t));

    // Now update the faulting page permissions so it will fault on write.
    mprotect(reinterpret_cast<void*>(&Thread->InterruptFaultPage), sizeof(Thread->InterruptFaultPage), PROT_NONE);

    // Postpone the remainder of signal handling logic until we process the SIGSEGV triggered by writing to InterruptFaultPage.
    return;
  }

  // Check for masked signals
  if (ThreadObject->SignalInfo.CurrentSignalMask.Val & (1ULL << (Signal - 1)) && IsAsyncSignal(&SigInfo, Signal)) {
    // This signal is masked, must defer until the guest updates the signal mask.
    // Add it to the pending signal list
    ThreadObject->SignalInfo.PendingSignals |= 1ULL << (Signal - 1);
    return;
  }

  // Let the host take first stab at handling the signal
  SignalHandler& Handler = HostHandlers[Signal];

  // Remove the pending signal
  ThreadObject->SignalInfo.PendingSignals &= ~(1ULL << (Signal - 1));

  // We have an emulation thread pointer, we can now modify its state
  if (Handler.GuestAction.sigaction_handler.handler == SIG_DFL) {
    if (Handler.DefaultBehaviour == DEFAULT_TERM || Handler.DefaultBehaviour == DEFAULT_COREDUMP) {
      // Let the signal fall through to the unhandled path
      // This way the parent process can know it died correctly
    }
  } else if (Handler.GuestAction.sigaction_handler.handler == SIG_IGN) {
    return;
  } else {
    if (Handler.GuestHandler &&
        Handler.GuestHandler(Thread, Signal, &SigInfo, UContext, &Handler.GuestAction, &ThreadObject->SignalInfo.GuestAltStack)) {
      uint64_t NewMask = GetNewSigMask(Signal);

      // Update our host signal mask so we don't hit race conditions with signals
      // This allows us to maintain the expected signal mask through the guest signal handling and then all the way back again
      memcpy(&_context->uc_sigmask, &NewMask, sizeof(uint64_t));

      // We handled this signal, continue running
      return;
    }
    ERROR_AND_DIE_FMT("Unhandled guest exception");
  }

  // Unhandled crash
  // Call back in to the previous handler
  if (Handler.OldAction.sa_flags & SA_SIGINFO) {
    Handler.OldAction.sigaction(Signal, &SigInfo, UContext);
  } else if (Handler.OldAction.handler == SIG_IGN || (Handler.OldAction.handler == SIG_DFL && Handler.DefaultBehaviour == DEFAULT_IGNORE)) {
    // Do nothing
  } else if (Handler.OldAction.handler == SIG_DFL && (Handler.DefaultBehaviour == DEFAULT_COREDUMP || Handler.DefaultBehaviour == DEFAULT_TERM)) {
    CTX->FlushAndCloseCodeMap();

#ifndef FEX_DISABLE_TELEMETRY
    // In the case of signals that cause coredump or terminate, save telemetry early.
    // FEX is hard crashing at this point and won't hit regular shutdown routines.
    // Add the signal to the crash mask.
    FEXCORE_TELEMETRY_OR(TYPE_CRASH_MASK, (1ULL << Signal));
    if (Signal == SIGSEGV && reinterpret_cast<uint64_t>(SigInfo.si_addr) >= SyscallHandler::TASK_MAX_64BIT) {
      // Tried accessing invalid non-canonical x86-64 address.
      FEXCORE_TELEMETRY_SET(TYPE_UNHANDLED_NONCANONICAL_ADDRESS, 1);
    }
    SaveTelemetry();
#endif

    FEX::HLE::_SyscallHandler->TM.CleanupForExit();

    // Reassign back to DFL and crash
    signal(Signal, SIG_DFL);
    if (SigInfo.si_code != SI_KERNEL) {
      // If the signal wasn't sent by the kernel then we need to reraise it.
      // This is necessary since returning from this signal handler now might just continue executing.
      // eg: If sent from tgkill then the signal gets dropped and returns.
      FHU::Syscalls::tgkill(::getpid(), FHU::Syscalls::gettid(), Signal);
    }
  } else {
    Handler.OldAction.handler(Signal);
  }
}

void SignalDelegator::SaveTelemetry() {
#ifndef FEX_DISABLE_TELEMETRY
  if (!ApplicationName.empty()) {
    FEXCore::Telemetry::Shutdown(ApplicationName);
  }
#endif
}

bool SignalDelegator::InstallHostThunk(int Signal) {
  SignalHandler& SignalHandler = HostHandlers[Signal];
  // If the host thunk is already installed for this, just return
  if (SignalHandler.Installed) {
    return false;
  }

  // Default flags for us
  SignalHandler.HostAction.sa_flags = SA_SIGINFO | SA_ONSTACK;

  bool Result = UpdateHostThunk(Signal);

  SignalHandler.Installed = Result;
  return Result;
}

bool SignalDelegator::UpdateHostThunk(int Signal) {
  SignalHandler& SignalHandler = HostHandlers[Signal];

  // Now install the thunk handler
  SignalHandler.HostAction.sigaction = SignalHandlerThunk;

  auto CheckAndAddFlags = [](uint64_t HostFlags, uint64_t GuestFlags, uint64_t Flags) {
    // If any of the flags don't match then update to the newest set
    if ((HostFlags ^ GuestFlags) & Flags) {
      // Remove all the flags from the host that we are testing for
      HostFlags &= ~Flags;
      // Copy over the guest flags being set
      HostFlags |= GuestFlags & Flags;
    }

    return HostFlags;
  };

  // Don't allow the guest to override flags for
  // SA_SIGINFO : Host always needs SA_SIGINFO
  // SA_ONSTACK : Host always needs the altstack
  // SA_RESETHAND : We don't support one shot handlers
  // SA_RESTORER : We always need our host side restorer on x86-64, Couldn't use guest restorer anyway
  SignalHandler.HostAction.sa_flags = CheckAndAddFlags(SignalHandler.HostAction.sa_flags, SignalHandler.GuestAction.sa_flags,
                                                       SA_NOCLDSTOP | SA_NOCLDWAIT | SA_NODEFER | SA_RESTART);

#ifdef ARCHITECTURE_x86_64
#define SA_RESTORER 0x04000000
  SignalHandler.HostAction.sa_flags |= SA_RESTORER;
  SignalHandler.HostAction.restorer = sigrestore;
#endif

  // Walk the signals we have that are required and make sure to remove it from the mask
  // This'll likely be SIGILL, SIGBUS, SIG63

  // If the guest has masked some signals then we need to also mask those signals
  for (size_t i = 1; i < HostHandlers.size(); ++i) {
    if (HostHandlers[i].Required.load(std::memory_order_relaxed)) {
      SignalHandler.HostAction.sa_mask &= ~(1ULL << (i - 1));
    } else if (SigIsMember(&SignalHandler.GuestAction.sa_mask, i)) {
      SignalHandler.HostAction.sa_mask |= (1ULL << (i - 1));
    }
  }

  // Check for SIG_IGN
  if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_IGN && HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) {
    // We are ignoring this signal on the guest
    // Which means we need to ignore it on the host as well
    SignalHandler.HostAction.handler = SIG_IGN;
  }

  // Check for SIG_DFL
  if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_DFL && HostHandlers[Signal].Required.load(std::memory_order_relaxed) == false) {
    // Default handler on guest and default handler on host
    // With coredump and terminate then expect fireworks, but that is what the guest wants
    SignalHandler.HostAction.handler = SIG_DFL;
  }

  // Only update the old action if we haven't ever been installed
  const int Result =
    ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.HostAction, SignalHandler.Installed ? nullptr : &SignalHandler.OldAction, 8);
  if (Result < 0) {
    // Signal 32 and 33 are consumed by glibc. We don't handle this atm
    LogMan::Msg::AFmt("Failed to install host signal thunk for signal {}: {}", Signal, strerror(errno));
    return false;
  }

  return true;
}

void SignalDelegator::UninstallHostHandler(int Signal) {
  SignalHandler& SignalHandler = HostHandlers[Signal];

  ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.OldAction, nullptr, 8);
}

void SignalDelegator::QueueSignal(pid_t tgid, pid_t tid, int Signal, siginfo_t* info, bool IgnoreMask) {
  bool WasIgnored {};
  bool WasMasked {};
  SignalHandler& SignalHandler = HostHandlers[Signal];
  if (SignalHandler.GuestAction.sigaction_handler.handler == SIG_IGN && IgnoreMask) {
    ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.OldAction, nullptr, 8);
    WasIgnored = true;
  }

  // Get the current host signal mask
  uint64_t ThreadSignalMask {};
  const uint64_t SignalMask = 1ULL << (Signal - 1);
  ::syscall(SYS_rt_sigprocmask, 0, nullptr, &ThreadSignalMask, 8);
  if (ThreadSignalMask & SignalMask) {
    WasMasked = true;

    // Signal currently masked, unmask
    ThreadSignalMask &= ~SignalMask;
    ::syscall(SYS_rt_sigprocmask, 0, &ThreadSignalMask, &ThreadSignalMask, 8);
  }

  ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, Signal, info);

  if (WasMasked) {
    // Mask again
    ::syscall(SYS_rt_sigprocmask, 0, &ThreadSignalMask, nullptr, 8);
  }

  if (WasIgnored) {
    // Ignore again
    ::syscall(SYS_rt_sigaction, Signal, &SignalHandler.HostAction, nullptr, 8);
  }
}

SignalDelegator::SignalDelegator(FEXCore::Context::Context* _CTX, const std::string_view ApplicationName, bool SupportsAVX)
  : CTX {_CTX}
  , ApplicationName {ApplicationName}
  , SupportsAVX {SupportsAVX} {
  // Signal zero isn't real
  HostHandlers[0].Installed = true;

  // We can't capture SIGKILL or SIGSTOP
  HostHandlers[SIGKILL].Installed = true;
  HostHandlers[SIGSTOP].Installed = true;

  if (HalfBarrierTSOEnabled()) {
    UnalignedHandlerType = FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::HalfBarrier;
  } else {
    UnalignedHandlerType = FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::NonAtomic;
  }

  // Most signals default to termination
  // These ones are slightly different
  static constexpr std::array<std::pair<int, SignalDelegator::DefaultBehaviourType>, 14> SignalDefaultBehaviours = {{
    {SIGQUIT, DEFAULT_COREDUMP},
    {SIGILL, DEFAULT_COREDUMP},
    {SIGTRAP, DEFAULT_COREDUMP},
    {SIGABRT, DEFAULT_COREDUMP},
    {SIGBUS, DEFAULT_COREDUMP},
    {SIGFPE, DEFAULT_COREDUMP},
    {SIGSEGV, DEFAULT_COREDUMP},
    {SIGCHLD, DEFAULT_IGNORE},
    {SIGCONT, DEFAULT_IGNORE},
    {SIGURG, DEFAULT_IGNORE},
    {SIGXCPU, DEFAULT_COREDUMP},
    {SIGXFSZ, DEFAULT_COREDUMP},
    {SIGSYS, DEFAULT_COREDUMP},
    {SIGWINCH, DEFAULT_IGNORE},
  }};

  for (const auto& [Signal, Behaviour] : SignalDefaultBehaviours) {
    HostHandlers[Signal].DefaultBehaviour = Behaviour;
  }

  // Register frontend SIGILL handler for forced assertion.
  RegisterFrontendHostSignalHandler(
    SIGILL,
    [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool {
      ucontext_t* _context = (ucontext_t*)ucontext;
      auto& mcontext = _context->uc_mcontext;
      uint64_t PC {};
#ifdef ARCHITECTURE_arm64
      PC = mcontext.pc;
#else
      PC = mcontext.gregs[REG_RIP];
#endif
      if (PC == reinterpret_cast<uint64_t>(&FEXCore::Assert::ForcedAssert)) {
        // This is a host side assert. Don't deliver this to the guest
        // We want to actually break here
        FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator->UninstallHostHandler(Signal);
        return true;
      }
      return false;
    },
    true);

  const auto PauseHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool {
    return FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator->HandleSignalPause(Thread, Signal, info, ucontext);
  };

  const auto GuestSignalHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext,
                                     GuestSigAction* GuestAction, stack_t* GuestStack) -> bool {
    return FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator->HandleDispatcherGuestSignal(
      Thread, Signal, info, ucontext, GuestAction, GuestStack);
  };

  const auto SigillHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool {
    return FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator->HandleSIGILL(Thread, Signal, info, ucontext);
  };

  const auto SigsegvHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool {
    return FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator->HandleFrontendSIGSEGV(Thread, Signal,
                                                                                                                         info, ucontext);
  };

  // Register SIGILL signal handler.
  RegisterHostSignalHandler(SIGILL, SigillHandler, true);
  RegisterHostSignalHandler(SIGSEGV, SigsegvHandler, true);

#ifdef ARCHITECTURE_arm64
  // Register SIGBUS signal handler.
  const auto SigbusHandler = [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* _info, void* ucontext) -> bool {
    const auto PC = ArchHelpers::Context::GetPc(ucontext);
    if (!Thread->CTX->IsAddressInCodeBuffer(Thread, PC)) {
      // Wasn't a sigbus in JIT code
      return false;
    }
    siginfo_t* info = reinterpret_cast<siginfo_t*>(_info);

    if (info->si_code != BUS_ADRALN) {
      // This only handles alignment problems
      return false;
    }

    FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1);
    const auto Delegator = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread)->SignalInfo.Delegator;
    const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, Delegator->GetUnalignedHandlerType(), PC,
                                                                           ArchHelpers::Context::GetArmGPRs(ucontext));
    ArchHelpers::Context::SetPc(ucontext, PC + Result.value_or(0));
    return Result.has_value();
  };

  RegisterHostSignalHandler(SIGBUS, SigbusHandler, true);
#endif
  // Register pause signal handler.
  RegisterHostSignalHandler(SignalDelegator::SIGNAL_FOR_PAUSE, PauseHandler, true);

  // Guest signal handlers.
  for (uint32_t Signal = 0; Signal <= SignalDelegator::MAX_SIGNALS; ++Signal) {
    RegisterHostSignalHandlerForGuest(Signal, GuestSignalHandler);
  }
}

SignalDelegator::~SignalDelegator() {
  for (int i = 0; i < MAX_SIGNALS; ++i) {
    if (i == 0 || i == SIGKILL || i == SIGSTOP || !HostHandlers[i].Installed) {
      continue;
    }
    ::syscall(SYS_rt_sigaction, i, &HostHandlers[i].OldAction, nullptr, 8);
    HostHandlers[i].Installed = false;
  }
}

void SignalDelegator::RegisterTLSState(FEX::HLE::ThreadStateObject* Thread) {
  FEXCore::Allocator::RegisterTLSData(Thread->Thread);

  Thread->SignalInfo.Delegator = this;

  // Set up our signal alternative stack
  // This is per thread rather than per signal
  Thread->SignalInfo.AltStackPtr = FEXCore::Allocator::mmap(nullptr, SIGSTKSZ * 16, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(Thread->SignalInfo.AltStackPtr), SIGSTKSZ * 16);
  stack_t altstack {};
  altstack.ss_sp = reinterpret_cast<void*>(reinterpret_cast<uint64_t>(Thread->SignalInfo.AltStackPtr) + 8);
  altstack.ss_size = SIGSTKSZ * 16 - 8;
  altstack.ss_flags = 0;
  LOGMAN_THROW_A_FMT(!!altstack.ss_sp, "Couldn't allocate stack pointer");

  // Copy the thread object to the start of the alt-stack
  memcpy(Thread->SignalInfo.AltStackPtr, &Thread, sizeof(void*));

  // Protect the first page of the alt-stack for overflow protection.
  mprotect(Thread->SignalInfo.AltStackPtr, FEXCore::Utils::FEX_PAGE_SIZE, PROT_READ);

  // Register the alt stack
  const int Result = sigaltstack(&altstack, nullptr);
  if (Result == -1) {
    LogMan::Msg::EFmt("Failed to install alternative signal stack {}", strerror(errno));
  }

  // Get the current host signal mask
  ::syscall(SYS_rt_sigprocmask, 0, nullptr, &Thread->SignalInfo.CurrentSignalMask.Val, 8);

  if (Thread->Thread) {
    // Reserve a small amount of deferred signal frames. Usually the stack won't be utilized beyond
    // 1 or 2 signals but add a few more just in case.
    Thread->SignalInfo.DeferredSignalFrames.reserve(8);
  }
}

void SignalDelegator::UninstallTLSState(FEX::HLE::ThreadStateObject* Thread) {
  FEXCore::Allocator::munmap(Thread->SignalInfo.AltStackPtr, SIGSTKSZ * 16);

  Thread->SignalInfo.AltStackPtr = nullptr;

  stack_t altstack {};
  altstack.ss_flags = SS_DISABLE;

  // Uninstall the alt stack
  const int Result = sigaltstack(&altstack, nullptr);
  if (Result == -1) {
    LogMan::Msg::EFmt("Failed to uninstall alternative signal stack {}", strerror(errno));
  }

  FEXCore::Allocator::UninstallTLSData(Thread->Thread);
}

void SignalDelegator::FrontendRegisterHostSignalHandler(int Signal, bool Required) {
  // Linux signal handlers are per-process rather than per thread
  // Multiple threads could be calling in to this
  std::lock_guard lk(HostDelegatorMutex);
  HostHandlers[Signal].Required = Required;
  InstallHostThunk(Signal);
}

void SignalDelegator::FrontendRegisterFrontendHostSignalHandler(int Signal, bool Required) {
  // Linux signal handlers are per-process rather than per thread
  // Multiple threads could be calling in to this
  std::lock_guard lk(HostDelegatorMutex);
  HostHandlers[Signal].Required = Required;
  InstallHostThunk(Signal);
}

void SignalDelegator::RegisterHostSignalHandlerForGuest(int Signal, FEX::HLE::HostSignalDelegatorFunctionForGuest Func) {
  std::lock_guard lk(HostDelegatorMutex);
  HostHandlers[Signal].GuestHandler = std::move(Func);
}

void SignalDelegator::RegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) {
  SetFrontendHostSignalHandler(Signal, std::move(Func), Required);
  FrontendRegisterFrontendHostSignalHandler(Signal, Required);
}

uint64_t SignalDelegator::RegisterGuestSignalHandler(int Signal, const GuestSigAction* Action, GuestSigAction* OldAction) {
  std::lock_guard lk(GuestDelegatorMutex);

  // Invalid signal specified
  if (Signal > MAX_SIGNALS) {
    return -EINVAL;
  }

  // If we have an old signal set then give it back
  if (OldAction) {
    *OldAction = HostHandlers[Signal].GuestAction;
  }

  // Now assign the new action
  if (Action) {
    // These signal dispositions can't be changed on Linux
    if (Signal == SIGKILL || Signal == SIGSTOP) {
      return -EINVAL;
    }

    HostHandlers[Signal].GuestAction = *Action;
    // Only attempt to install a new thunk handler if we were installing a new guest action
    if (!InstallHostThunk(Signal)) {
      UpdateHostThunk(Signal);
    }
  }

  return 0;
}

void SignalDelegator::CheckXIDHandler() {
  std::lock_guard lk(GuestDelegatorMutex);
  std::lock_guard lk2(HostDelegatorMutex);

  constexpr size_t SIGNAL_SETXID = 33;

  kernel_sigaction CurrentAction {};

  // Only update the old action if we haven't ever been installed
  const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, nullptr, &CurrentAction, 8);
  if (Result < 0) {
    LogMan::Msg::AFmt("Failed to get status of XID signal");
    return;
  }

  SignalHandler& HostHandler = HostHandlers[SIGNAL_SETXID];
  if (CurrentAction.handler != HostHandler.HostAction.handler) {
    // GLIBC overwrote our XID handler, reinstate our handler
    const int Result = ::syscall(SYS_rt_sigaction, SIGNAL_SETXID, &HostHandler.HostAction, nullptr, 8);
    if (Result < 0) {
      LogMan::Msg::AFmt("Failed to reinstate our XID signal handler {}", strerror(errno));
    }
  }
}

uint64_t SignalDelegator::RegisterGuestSigAltStack(FEX::HLE::ThreadStateObject* Thread, const stack_t* ss, stack_t* old_ss) {
  bool UsingAltStack {};
  uint64_t AltStackBase = reinterpret_cast<uint64_t>(Thread->SignalInfo.GuestAltStack.ss_sp);
  uint64_t AltStackEnd = AltStackBase + Thread->SignalInfo.GuestAltStack.ss_size;
  uint64_t GuestSP = Thread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP];

  if (!(Thread->SignalInfo.GuestAltStack.ss_flags & SS_DISABLE) && GuestSP >= AltStackBase && GuestSP <= AltStackEnd) {
    UsingAltStack = true;
  }

  // If we have an old signal set then give it back
  if (old_ss) {
    *old_ss = Thread->SignalInfo.GuestAltStack;

    if (UsingAltStack) {
      // We are currently operating on the alt stack
      // Let the guest know
      old_ss->ss_flags |= SS_ONSTACK;
    } else {
      old_ss->ss_flags |= SS_DISABLE;
    }
  }

  // Now assign the new action
  if (ss) {
    // If we tried setting the alt stack while we are using it then throw an error
    if (UsingAltStack) {
      return -EPERM;
    }

    // We need to check for invalid flags
    // The only flag that can be passed is SS_AUTODISARM and SS_DISABLE
    if ((ss->ss_flags & ~SS_ONSTACK) & // SS_ONSTACK is ignored
        ~(SS_AUTODISARM | SS_DISABLE)) {
      // A flag remained that isn't one of the supported ones?
      return -EINVAL;
    }

    if (ss->ss_flags & SS_DISABLE) {
      // If SS_DISABLE Is specified then the rest of the details are ignored
      Thread->SignalInfo.GuestAltStack = *ss;
      return 0;
    }

    // stack size needs to be at least X86_MINSIGSTKSZ
    if (ss->ss_size < X86_MINSIGSTKSZ) {
      return -ENOMEM;
    }

    Thread->SignalInfo.GuestAltStack = *ss;
  }

  return 0;
}

static void CheckForPendingSignals(const FEX::HLE::ThreadStateObject* Thread) {
  // Do we have any pending signals that became unmasked?
  uint64_t PendingSignals = ~Thread->SignalInfo.CurrentSignalMask.Val & Thread->SignalInfo.PendingSignals;
  if (PendingSignals != 0) {
    for (int i = 0; i < 64; ++i) {
      if (PendingSignals & (1ULL << i)) {
        FHU::Syscalls::tgkill(Thread->ThreadInfo.PID, Thread->ThreadInfo.TID, i + 1);
        // We might not even return here which is spooky
      }
    }
  }
}

uint64_t SignalDelegator::GuestSigProcMask(FEX::HLE::ThreadStateObject* Thread, int how, const uint64_t* set, uint64_t* oldset) {
  // The order in which we handle signal mask setting is important here
  // old and new can point to the same location in memory.
  // Even if the pointers are to same memory location, we must store the original signal mask
  // coming in to the syscall.
  // 1) Store old mask
  // 2) Set mask to new mask if exists
  // 3) Give old mask back
  auto OldSet = Thread->SignalInfo.CurrentSignalMask.Val;

  if (!!set) {
    uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1)));
    if (how == SIG_BLOCK) {
      Thread->SignalInfo.CurrentSignalMask.Val |= *set & IgnoredSignalsMask;
    } else if (how == SIG_UNBLOCK) {
      Thread->SignalInfo.CurrentSignalMask.Val &= ~(*set & IgnoredSignalsMask);
    } else if (how == SIG_SETMASK) {
      Thread->SignalInfo.CurrentSignalMask.Val = *set & IgnoredSignalsMask;
    } else {
      return -EINVAL;
    }

    uint64_t HostMask = Thread->SignalInfo.CurrentSignalMask.Val;
    // Now actually set the host mask
    // This will hide from the guest that we are not actually setting all of the masks it wants
    for (size_t i = 0; i < MAX_SIGNALS; ++i) {
      if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) {
        // If it is a required host signal then we can't mask it
        HostMask &= ~(1ULL << i);
      }
    }

    ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &HostMask, nullptr, 8);
  }

  if (!!oldset) {
    *oldset = OldSet;
  }

  CheckForPendingSignals(Thread);

  return 0;
}

uint64_t SignalDelegator::GuestSigPending(FEX::HLE::ThreadStateObject* Thread, uint64_t* set, size_t sigsetsize) {
  if (sigsetsize > sizeof(uint64_t)) {
    return -EINVAL;
  }

  *set = Thread->SignalInfo.PendingSignals;

  sigset_t HostSet {};
  if (sigpending(&HostSet) == 0) {
    uint64_t HostSignals {};
    for (size_t i = 0; i < MAX_SIGNALS; ++i) {
      if (sigismember(&HostSet, i + 1)) {
        HostSignals |= (1ULL << i);
      }
    }

    // Merge the real pending signal mask as well
    *set |= HostSignals;
  }
  return 0;
}

uint64_t SignalDelegator::GuestSigSuspend(FEX::HLE::ThreadStateObject* Thread, uint64_t* set, size_t sigsetsize) {
  if (sigsetsize > sizeof(uint64_t)) {
    return -EINVAL;
  }

  uint64_t IgnoredSignalsMask = ~((1ULL << (SIGKILL - 1)) | (1ULL << (SIGSTOP - 1)));

  // Backup the mask
  Thread->SignalInfo.PreviousSuspendMask = Thread->SignalInfo.CurrentSignalMask;
  // Set the new mask
  Thread->SignalInfo.CurrentSignalMask.Val = *set & IgnoredSignalsMask;
  sigset_t HostSet {};

  sigemptyset(&HostSet);

  for (int32_t i = 0; i < MAX_SIGNALS; ++i) {
    if (*set & (1ULL << i)) {
      sigaddset(&HostSet, i + 1);
    }
  }

  // Additionally we must always listen to SIGNAL_FOR_PAUSE
  // This technically forces us in to a race but should be fine
  // SIGBUS and SIGILL can't happen so we don't need to listen for them
  // sigaddset(&HostSet, SIGNAL_FOR_PAUSE);

  // Spin this in a loop until we aren't sigsuspended
  // This can happen in the case that the guest has sent signal that we can't block
  uint64_t Result = sigsuspend(&HostSet);

  // Restore Previous signal mask we are emulating
  // XXX: Might be unsafe if the signal handler adjusted the thread's signal mask
  // But since we don't support the guest adjusting the mask through the context object
  // then this is safe-ish
  Thread->SignalInfo.CurrentSignalMask = Thread->SignalInfo.PreviousSuspendMask;

  CheckForPendingSignals(Thread);

  return Result == -1 ? -errno : Result;
}

uint64_t SignalDelegator::GuestSigTimedWait(uint64_t* set, siginfo_t* info, const struct timespec* timeout, size_t sigsetsize) {
  if (sigsetsize > sizeof(uint64_t)) {
    return -EINVAL;
  }

  uint64_t Result = ::syscall(SYS_rt_sigtimedwait, set, info, timeout);

  return Result == -1 ? -errno : Result;
}

uint64_t SignalDelegator::GuestSignalFD(int fd, const uint64_t* set, size_t sigsetsize, int flags) {
  if (sigsetsize > sizeof(uint64_t)) {
    return -EINVAL;
  }

  sigset_t HostSet {};
  sigemptyset(&HostSet);

  for (size_t i = 0; i < MAX_SIGNALS; ++i) {
    if (HostHandlers[i + 1].Required.load(std::memory_order_relaxed)) {
      // For now skip our internal signals
      continue;
    }

    if (*set & (1ULL << i)) {
      sigaddset(&HostSet, i + 1);
    }
  }

  // XXX: This is a barebones implementation just to get applications that listen for SIGCHLD to work
  // In the future we need our own listern thread that forwards the result
  // Thread is necessary to prevent deadlocks for a thread that has signaled on the same thread listening to the FD and blocking is enabled
  uint64_t Result = signalfd(fd, &HostSet, flags);

  return Result == -1 ? -errno : Result;
}

fextl::unique_ptr<FEX::HLE::SignalDelegator>
CreateSignalDelegator(FEXCore::Context::Context* CTX, const std::string_view ApplicationName, bool SupportsAVX) {
  return fextl::make_unique<FEX::HLE::SignalDelegator>(CTX, ApplicationName, SupportsAVX);
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SignalDelegator.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
$end_info$
*/


#pragma once

#include "LinuxSyscalls/Types.h"
#include "ArchHelpers/MContext.h"
#include "VDSO_Emulation.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <array>
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <mutex>

#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/Telemetry.h>

namespace FEXCore::Context {
class Context;
}
namespace FEXCore::Core {
struct InternalThreadState;
}
namespace FEX::HLE {
enum class SignalEvent : uint32_t;
struct ThreadStateObject;
} // namespace FEX::HLE

namespace FEX::HLE {
using HostSignalDelegatorFunction = std::function<bool(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext)>;
using HostSignalDelegatorFunctionForGuest =
  std::function<bool(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext, GuestSigAction* GuestAction, stack_t* GuestStack)>;

class SignalDelegator final : public FEXCore::SignalDelegator, public FEXCore::Allocator::FEXAllocOperators {
public:
  constexpr static size_t MAX_SIGNALS {64};

  // Use the last signal just so we are less likely to ever conflict with something that the guest application is using
  // 64 is used internally by Valgrind
  constexpr static size_t SIGNAL_FOR_PAUSE {63};

  // Returns true if the host handled the signal
  // Arguments are the same as sigaction handler
  SignalDelegator(FEXCore::Context::Context* _CTX, const std::string_view ApplicationName, bool SupportsAVX);
  ~SignalDelegator() override;

  // Called from the signal trampoline function.
  void HandleSignal(FEX::HLE::ThreadStateObject* Thread, int Signal, void* Info, void* UContext);

  void RegisterTLSState(FEX::HLE::ThreadStateObject* Thread);
  void UninstallTLSState(FEX::HLE::ThreadStateObject* Thread);

  /**
   * @brief Registers a signal handler for the host to handle a signal
   *
   * It's a process level signal handler so one must be careful
   */
  void RegisterHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required);

  /**
   * @brief Registers a signal handler for the host to handle a signal specifically for guest handling
   *
   * It's a process level signal handler so one must be careful
   */
  void RegisterHostSignalHandlerForGuest(int Signal, HostSignalDelegatorFunctionForGuest Func);
  void RegisterFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required);

  /**
   * @name These functions are all for Linux signal emulation
   * @{ */
  /**
   * @brief Allows the guest to register a signal handler that is run after the host attempts to resolve the handler first
   */
  uint64_t RegisterGuestSignalHandler(int Signal, const GuestSigAction* Action, struct GuestSigAction* OldAction);

  uint64_t RegisterGuestSigAltStack(FEX::HLE::ThreadStateObject* Thread, const stack_t* ss, stack_t* old_ss);

  uint64_t GuestSigProcMask(FEX::HLE::ThreadStateObject* Thread, int how, const uint64_t* set, uint64_t* oldset);
  uint64_t GuestSigPending(FEX::HLE::ThreadStateObject* Thread, uint64_t* set, size_t sigsetsize);
  uint64_t GuestSigSuspend(FEX::HLE::ThreadStateObject* Thread, uint64_t* set, size_t sigsetsize);
  uint64_t GuestSigTimedWait(uint64_t* set, siginfo_t* info, const struct timespec* timeout, size_t sigsetsize);
  uint64_t GuestSignalFD(int fd, const uint64_t* set, size_t sigsetsize, int flags);
  /**  @} */

  /**
   * @brief Check to ensure the XID handler is still set to the FEX handler
   *
   * On a new thread GLIBC will set the XID handler underneath us.
   * After the first thread is created check this.
   */
  void CheckXIDHandler();

  void UninstallHostHandler(int Signal);
  void QueueSignal(pid_t tgid, pid_t tid, int Signal, siginfo_t* info, bool IgnoreMask);

  FEXCore::Context::Context* CTX;

  void SetVDSOSymbols() {
    // Get symbols from VDSO.
    VDSOPointers = FEX::VDSO::GetVDSOSymbols();
  }

  uintptr_t GetThunkCallbackRET() const override {
    return reinterpret_cast<uintptr_t>(VDSOPointers.VDSO_FEX_CallbackRET);
  }

  [[noreturn]]
  void HandleSignalHandlerReturn(bool RT) {
    using SignalHandlerReturnFunc = void (*)();

    SignalHandlerReturnFunc SignalHandlerReturn {};
    if (RT) {
      SignalHandlerReturn = reinterpret_cast<SignalHandlerReturnFunc>(Config.SignalHandlerReturnAddressRT);
    } else {
      SignalHandlerReturn = reinterpret_cast<SignalHandlerReturnFunc>(Config.SignalHandlerReturnAddress);
    }

    SignalHandlerReturn();
    FEX_UNREACHABLE;
  }

  /**
   * @brief Signals a thread with a specific core event.
   *
   * @param Thread Which thread to signal.
   * @param Event Which event to signal the event with.
   */
  void SignalThread(FEXCore::Core::InternalThreadState* Thread, SignalEvent Event);

  FEXCore::ArchHelpers::Arm64::UnalignedHandlerType GetUnalignedHandlerType() const {
    return UnalignedHandlerType;
  }

  void SaveTelemetry();

  void SpillSRA(FEXCore::Core::InternalThreadState* Thread, void* ucontext, uint32_t IgnoreMask);

private:
  // Called from the thunk handler to handle the signal
  void HandleGuestSignal(FEX::HLE::ThreadStateObject* ThreadObject, int Signal, void* Info, void* UContext);
  bool HandleFrontendSIGSEGV(FEXCore::Core::InternalThreadState* Thread, int Signal, void* Info, void* UContext);

  /**
   * @brief Registers a signal handler for the host to handle a signal
   *
   * It's a process level signal handler so one must be careful
   */
  void FrontendRegisterHostSignalHandler(int Signal, bool Required);
  void FrontendRegisterFrontendHostSignalHandler(int Signal, bool Required);

  void SetHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) {
    HostHandlers[Signal].Handlers.push_back(std::move(Func));
  }
  void SetFrontendHostSignalHandler(int Signal, HostSignalDelegatorFunction Func, bool Required) {
    HostHandlers[Signal].FrontendHandler = std::move(Func);
  }

  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  const fextl::string ApplicationName;
  FEX_CONFIG_OPT(HalfBarrierTSOEnabled, HALFBARRIERTSOENABLED);

  FEXCore::ArchHelpers::Arm64::UnalignedHandlerType UnalignedHandlerType {FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::HalfBarrier};

  enum DefaultBehaviourType {
    DEFAULT_TERM,
    // Core dump based signals are supposed to have a coredump appear
    // For FEX's behaviour we don't really care right now
    DEFAULT_COREDUMP = DEFAULT_TERM,
    DEFAULT_IGNORE,
  };

  struct kernel_sigaction {
    union {
      void (*handler)(int);
      void (*sigaction)(int, siginfo_t*, void*);
    };

    uint64_t sa_flags;

    void (*restorer)();
    uint64_t sa_mask;
  };

  struct SignalHandler {
    std::atomic<bool> Installed {};
    std::atomic<bool> Required {};
    kernel_sigaction HostAction {};
    kernel_sigaction OldAction {};
    FEX::HLE::HostSignalDelegatorFunctionForGuest GuestHandler {};
    GuestSigAction GuestAction {};
    DefaultBehaviourType DefaultBehaviour {DEFAULT_TERM};

    // Callbacks
    fextl::vector<HostSignalDelegatorFunction> Handlers {};
    HostSignalDelegatorFunction FrontendHandler {};
  };

  std::array<SignalHandler, MAX_SIGNALS + 1> HostHandlers {};
  bool InstallHostThunk(int Signal);
  bool UpdateHostThunk(int Signal);

  FEX::VDSO::VDSOEntrypoints VDSOPointers {};

  bool IsAddressInDispatcher(uint64_t Address) const {
    return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd;
  }

  /*
   * Signal frames on 32-bit architecture needs to match exactly how the kernel generates the frame.
   * This is because large parts of the signal frame definition is part of the UAPI.
   * This means that when FEX sets up the signal frame, it needs to match the UAPI stack setup.
   *
   * The two signal stack frame types below describe the two different 32-bit frame types.
   */

  // The 32-bit non-realtime signal frame.
  // This frame type is used when the guest signal is used without the `SA_SIGINFO` flag.
  struct SigFrame_i32 {
    uint32_t pretcode;                          ///< sigreturn return branch point.
    int32_t Signal;                             ///< The signal hit.
    FEXCore::x86::sigcontext sc;                ///< The signal context.
    FEXCore::x86::_libc_fpstate fpstate_unused; ///< Unused fpstate. Retained for backwards compatibility.
    uint32_t extramask[1];                      ///< Upper 32-bits of the signal mask. Lower 32-bits is in the sigcontext.
    char retcode[8];                            ///< Unused but needs to be filled. GDB seemingly uses as a debug marker.
    ///< FP state now follows after this.
  };

  // The 32-bit realtime signal frame.
  // This frame type is used when the guest signal is used with the `SA_SIGINFO` flag.
  struct RTSigFrame_i32 {
    uint32_t pretcode; ///< sigreturn return branch point.
    int32_t Signal;    ///< The signal hit.
    uint32_t pinfo;    ///< Pointer to siginfo_t
    uint32_t puc;      ///< Pointer to ucontext_t
    FEXCore::x86::siginfo_t info;
    FEXCore::x86::ucontext_t uc;
    char retcode[8]; ///< Unused but needs to be filled. GDB seemingly uses as a debug marker.
    ///< FP state now follows after this.
  };

  void RestoreFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                        FEXCore::Core::CpuStateFrame* Frame, void* ucontext);
  void RestoreFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                         FEXCore::Core::CpuStateFrame* Frame, void* ucontext);
  void RestoreRTFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* Context,
                           FEXCore::Core::CpuStateFrame* Frame, void* ucontext);

  ///< Setup the signal frame for x64.
  uint64_t SetupFrame_x64(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                          FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                          GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags);

  ///< Setup the signal frame for a 32-bit signal without SA_SIGINFO.
  uint64_t SetupFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                           FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                           GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags);

  ///< Setup the signal frame for a 32-bit signal with SA_SIGINFO.
  uint64_t SetupRTFrame_ia32(FEXCore::Core::InternalThreadState* Thread, ArchHelpers::Context::ContextBackup* ContextBackup,
                             FEXCore::Core::CpuStateFrame* Frame, int Signal, siginfo_t* HostSigInfo, void* ucontext,
                             GuestSigAction* GuestAction, stack_t* GuestStack, uint64_t NewGuestSP, const uint32_t eflags);

  enum class RestoreType {
    TYPE_REALTIME,    ///< Signal restore type is from a `realtime` signal.
    TYPE_NONREALTIME, ///< Signal restore type is from a `non-realtime` signal.
    TYPE_PAUSE,       ///< Signal restore type is from a GDB pause event.
  };
  ArchHelpers::Context::ContextBackup* StoreThreadState(FEXCore::Core::InternalThreadState* Thread, int Signal, void* ucontext);
  void RestoreThreadState(FEXCore::Core::InternalThreadState* Thread, void* ucontext, RestoreType Type);
  bool HandleDispatcherGuestSignal(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext,
                                   GuestSigAction* GuestAction, stack_t* GuestStack);
  bool HandleSignalPause(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext);
  bool HandleSIGILL(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext);

  uint64_t GetNewSigMask(int Signal) const;

  std::mutex HostDelegatorMutex;
  std::mutex GuestDelegatorMutex;
  bool SupportsAVX;
};

fextl::unique_ptr<FEX::HLE::SignalDelegator>
CreateSignalDelegator(FEXCore::Context::Context* CTX, const std::string_view ApplicationName, bool SupportsAVX);
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/EPoll.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: LinuxSyscalls|syscalls-shared ~ Syscall implementations shared between x86 and x86-64
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <stdint.h>
#include <sys/epoll.h>

namespace FEX::HLE {
void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(epoll_create, [](FEXCore::Core::CpuStateFrame* Frame, int size) -> uint64_t {
    uint64_t Result = epoll_create(size);
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FD.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <FEXHeaderUtils/Syscalls.h>

#include <fcntl.h>
#include <stdint.h>
#include <sys/file.h>
#include <sys/eventfd.h>
#include <sys/inotify.h>
#include <sys/mman.h>
#include <sys/timerfd.h>
#include <poll.h>
#include <stddef.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <sys/eventfd.h>
#include <sys/syscall.h>

namespace FEX::HLE {
void RegisterFD(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;
  REGISTER_SYSCALL_IMPL(poll, [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, int timeout) -> uint64_t {
    if (nfds) {
      // fds is allowed to be garbage if nfds is zero.
      FaultSafeUserMemAccess::VerifyIsWritable(fds, sizeof(struct pollfd) * nfds);
    }
    uint64_t Result = ::poll(fds, nfds, timeout);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(open, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, int flags, uint32_t mode) -> uint64_t {
    flags = FEX::HLE::RemapFromX86Flags(flags);
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Open(pathname, flags, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(close, [](FEXCore::Core::CpuStateFrame* Frame, int fd) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Close(fd);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(chown, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t {
    uint64_t Result = ::chown(pathname, owner, group);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(lchown, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t {
    uint64_t Result = ::lchown(pathname, owner, group);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(access, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, int mode) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Access(pathname, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(pipe, [](FEXCore::Core::CpuStateFrame* Frame, int pipefd[2]) -> uint64_t {
    uint64_t Result = ::pipe(pipefd);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(dup3, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd, int flags) -> uint64_t {
    flags = FEX::HLE::RemapFromX86Flags(flags);
    uint64_t Result = ::dup3(oldfd, newfd, flags);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(inotify_init, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    uint64_t Result = ::inotify_init();
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(openat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfs, const char* pathname, int flags, uint32_t mode) -> uint64_t {
    flags = FEX::HLE::RemapFromX86Flags(flags);
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat(dirfs, pathname, flags, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(readlinkat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, char* buf, size_t bufsiz) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlinkat(dirfd, pathname, buf, bufsiz);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(faccessat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int mode) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat(dirfd, pathname, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(faccessat2, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int mode, int flags) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.FAccessat2(dirfd, pathname, mode, flags);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(
    openat2, [](FEXCore::Core::CpuStateFrame* Frame, int dirfs, const char* pathname, struct open_how* how, size_t usize) -> uint64_t {
      open_how HostHow {};
      size_t HostSize = std::min(sizeof(open_how), usize);
      memcpy(&HostHow, how, HostSize);

      HostHow.flags = FEX::HLE::RemapFromX86Flags(HostHow.flags);
      uint64_t Result = FEX::HLE::_SyscallHandler->FM.Openat2(dirfs, pathname, &HostHow, HostSize);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL(eventfd, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t count) -> uint64_t {
    uint64_t Result = ::syscall(SYSCALL_DEF(eventfd2), count, 0);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(pipe2, [](FEXCore::Core::CpuStateFrame* Frame, int pipefd[2], int flags) -> uint64_t {
    flags = FEX::HLE::RemapFromX86Flags(flags);
    uint64_t Result = ::pipe2(pipefd, flags);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(
    statx, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, int flags, uint32_t mask, struct statx* statxbuf) -> uint64_t {
      // Flags don't need remapped
      uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statx(dirfd, pathname, flags, mask, statxbuf);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL(close_range, [](FEXCore::Core::CpuStateFrame* Frame, unsigned int first, unsigned int last, unsigned int flags) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.CloseRange(first, last, flags);
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/FS.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <stddef.h>
#include <stdint.h>
#include <sys/mount.h>
#include <sys/swap.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/xattr.h>

namespace FEX::HLE {
void RegisterFS(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(rename, [](FEXCore::Core::CpuStateFrame* Frame, const char* oldpath, const char* newpath) -> uint64_t {
    uint64_t Result = ::rename(oldpath, newpath);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(mkdir, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t {
    uint64_t Result = ::mkdir(pathname, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(rmdir, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname) -> uint64_t {
    uint64_t Result = ::rmdir(pathname);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(link, [](FEXCore::Core::CpuStateFrame* Frame, const char* oldpath, const char* newpath) -> uint64_t {
    uint64_t Result = ::link(oldpath, newpath);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(unlink, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname) -> uint64_t {
    uint64_t Result = ::unlink(pathname);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(symlink, [](FEXCore::Core::CpuStateFrame* Frame, const char* target, const char* linkpath) -> uint64_t {
    uint64_t Result = ::symlink(target, linkpath);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(readlink, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* buf, size_t bufsiz) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Readlink(pathname, buf, bufsiz);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(chmod, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t {
    uint64_t Result = ::chmod(pathname, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(mknod, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode, dev_t dev) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Mknod(pathname, mode, dev);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(creat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, mode_t mode) -> uint64_t {
    uint64_t Result = ::creat(pathname, mode);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(
    setxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, const void* value, size_t size, int flags) -> uint64_t {
      uint64_t Result = FEX::HLE::_SyscallHandler->FM.Setxattr(path, name, value, size, flags);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL(
    lsetxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, const void* value, size_t size, int flags) -> uint64_t {
      uint64_t Result = FEX::HLE::_SyscallHandler->FM.LSetxattr(path, name, value, size, flags);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL(getxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, void* value, size_t size) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Getxattr(path, name, value, size);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(lgetxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name, void* value, size_t size) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.LGetxattr(path, name, value, size);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(listxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, char* list, size_t size) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Listxattr(path, list, size);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(llistxattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, char* list, size_t size) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.LListxattr(path, list, size);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(removexattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Removexattr(path, name);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(lremovexattr, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, const char* name) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.LRemovexattr(path, name);
    SYSCALL_ERRNO();
  });
  if (Handler->IsHostKernelVersionAtLeast(6, 13, 0)) {
    REGISTER_SYSCALL_IMPL(setxattrat,
                          [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* pathname, uint32_t at_flags, const char* name,
                             const FileManager::xattr_args* uargs, size_t usize) -> uint64_t {
                            uint64_t Result = FEX::HLE::_SyscallHandler->FM.SetxattrAt(dfd, pathname, at_flags, name, uargs, usize);
                            SYSCALL_ERRNO();
                          });
    REGISTER_SYSCALL_IMPL(getxattrat,
                          [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* pathname, uint32_t at_flags, const char* name,
                             const FileManager::xattr_args* uargs, size_t usize) -> uint64_t {
                            uint64_t Result = FEX::HLE::_SyscallHandler->FM.GetxattrAt(dfd, pathname, at_flags, name, uargs, usize);
                            SYSCALL_ERRNO();
                          });

    REGISTER_SYSCALL_IMPL(
      listxattrat, [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* pathname, uint32_t at_flags, char* list, size_t size) -> uint64_t {
        uint64_t Result = FEX::HLE::_SyscallHandler->FM.ListxattrAt(dfd, pathname, at_flags, list, size);
        SYSCALL_ERRNO();
      });
    REGISTER_SYSCALL_IMPL(
      removexattrat, [](FEXCore::Core::CpuStateFrame* Frame, int dfd, const char* pathname, uint32_t at_flags, const char* name) -> uint64_t {
        uint64_t Result = FEX::HLE::_SyscallHandler->FM.RemovexattrAt(dfd, pathname, at_flags, name);
        SYSCALL_ERRNO();
      });
  } else {
    REGISTER_SYSCALL_IMPL(setxattrat, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(getxattrat, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(listxattrat, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(removexattrat, UnimplementedSyscallSafe);
  }
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/IO.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <linux/aio_abi.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace FEX::HLE {
void RegisterIO(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(iopl, [](FEXCore::Core::CpuStateFrame* Frame, int level) -> uint64_t {
    // Just claim we don't have permission
    return -EPERM;
  });

  REGISTER_SYSCALL_IMPL(ioperm, [](FEXCore::Core::CpuStateFrame* Frame, unsigned long from, unsigned long num, int turn_on) -> uint64_t {
    // ioperm not available on our architecture
    return -EPERM;
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Info.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstring>
#include <linux/kcmp.h>
#include <linux/seccomp.h>
#include <stdarg.h>
#include <stddef.h>
#include <stdint.h>
#include <syslog.h>
#include <sys/random.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <sys/utsname.h>
#include <sys/klog.h>
#include <sys/personality.h>
#include <sys/ptrace.h>
#include <unistd.h>

#include <git_version.h>

namespace FEX::HLE {
using cap_user_header_t = void*;
using cap_user_data_t = void*;

void RegisterInfo(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(uname, [](FEXCore::Core::CpuStateFrame* Frame, struct utsname* buf) -> uint64_t {
    auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

    struct utsname Local {};
    if (::uname(&Local) == 0) {
      memcpy(buf->nodename, Local.nodename, sizeof(Local.nodename));
      static_assert(sizeof(Local.nodename) <= sizeof(buf->nodename));
      memcpy(buf->domainname, Local.domainname, sizeof(Local.domainname));
      static_assert(sizeof(Local.domainname) <= sizeof(buf->domainname));
    } else {
      strcpy(buf->nodename, "FEXCore");
      LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename);
    }
    strcpy(buf->sysname, "Linux");
    uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion();
    if (Thread->persona & UNAME26) {
      // Kernel version converts from 6.x.y to 2.6.60+x.
      GuestVersion = FEX::HLE::SyscallHandler::KernelVersion(2, 6, 60 + FEX::HLE::SyscallHandler::KernelMinor(GuestVersion));
    }
    snprintf(buf->release, sizeof(buf->release), "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion),
             FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion));

    const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__;
    strcpy(buf->version, version);
    static_assert(sizeof(version) <= sizeof(buf->version), "uname version define became too large!");
    if (Thread->persona & PER_LINUX32) {
      // Tell the guest that we are a 32bit kernel
      strcpy(buf->machine, "i686");
    } else {
      // Tell the guest that we are a 64bit kernel
      strcpy(buf->machine, "x86_64");
    }
    return 0;
  });

  REGISTER_SYSCALL_IMPL(personality, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t persona) -> uint64_t {
    auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

    if (persona == ~0U) {
      // Special case, only queries the persona.
      return Thread->persona;
    }

    // Mask off `PER_LINUX32` because AArch64 doesn't support it.
    uint32_t NewPersona = persona & ~PER_LINUX32;

    // This syscall can not physically fail with PER_LINUX32 masked off.
    // It also can not fail on a real x86 kernel.
    (void)::syscall(SYSCALL_DEF(personality), NewPersona);

    // Return the old persona while setting the new one.
    auto OldPersona = Thread->persona;
    Thread->persona = persona;
    return OldPersona;
  });

  REGISTER_SYSCALL_IMPL(seccomp, [](FEXCore::Core::CpuStateFrame* Frame, unsigned int operation, unsigned int flags, void* args) -> uint64_t {
    return FEX::HLE::_SyscallHandler->SeccompEmulator.Handle(Frame, operation, flags, args);
  });
  REGISTER_SYSCALL_IMPL(
    ptrace, [](FEXCore::Core::CpuStateFrame* Frame, int /*enum __ptrace_request*/ request, pid_t pid, void* addr, void* data) -> uint64_t {
      uint64_t Result {};

      switch (request) {
      case PTRACE_PEEKTEXT:
      case PTRACE_PEEKDATA:
      case PTRACE_POKETEXT:
      case PTRACE_POKEDATA:
      case PTRACE_ATTACH:
      case PTRACE_DETACH:
        // Passthrough these requests. Allows Wine to run the Ubisoft launcher.
        Result = ::syscall(SYSCALL_DEF(ptrace), request, pid, addr, data);
        SYSCALL_ERRNO();
      default: break;
      }
      // We don't support this
      return -EPERM;
    });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Memory.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/IR/IR.h>

#include <stddef.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace FEX::HLE {
void RegisterMemory(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(brk, [](FEXCore::Core::CpuStateFrame* Frame, void* addr) -> uint64_t {
    uint64_t Result = FEX::HLE::_SyscallHandler->HandleBRK(Frame, addr);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(madvise, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, int32_t advice) -> uint64_t {
    uint64_t Result = ::madvise(addr, length, advice);

    if (Result != -1) {
      FEX::HLE::_SyscallHandler->TrackMadvise(Frame->Thread, (uintptr_t)addr, length, advice);
    }
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/NotImplemented.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include <FEXCore/Utils/LogManager.h>
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <stdint.h>
#include <sys/epoll.h>

#define REGISTER_SYSCALL_NOT_IMPL(name)                                             \
  REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \
    LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name);                  \
    return -ENOSYS;                                                                 \
  });

#define REGISTER_SYSCALL_NO_PERM(name) REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; });

#define REGISTER_SYSCALL_NO_ACCESS(name) \
  REGISTER_SYSCALL_IMPL(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EACCES; });

namespace FEX::HLE {
// these are removed/not implemented in the linux kernel we present

void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_NOT_IMPL(ustat);
  REGISTER_SYSCALL_NOT_IMPL(sysfs);
  REGISTER_SYSCALL_NOT_IMPL(uselib);
  REGISTER_SYSCALL_NOT_IMPL(create_module);
  REGISTER_SYSCALL_NOT_IMPL(get_kernel_syms);
  REGISTER_SYSCALL_NOT_IMPL(query_module);
  REGISTER_SYSCALL_NOT_IMPL(nfsservctl); // Was removed in Linux 3.1
  REGISTER_SYSCALL_NOT_IMPL(getpmsg);
  REGISTER_SYSCALL_NOT_IMPL(putpmsg);
  REGISTER_SYSCALL_NOT_IMPL(afs_syscall);
  REGISTER_SYSCALL_NOT_IMPL(vserver);
  REGISTER_SYSCALL_NOT_IMPL(_sysctl); // Was removed in Linux 5.5

  REGISTER_SYSCALL_NO_PERM(vhangup);
  REGISTER_SYSCALL_NO_PERM(reboot)
  REGISTER_SYSCALL_NO_PERM(sethostname);
  REGISTER_SYSCALL_NO_PERM(setdomainname);
  REGISTER_SYSCALL_NO_PERM(kexec_load);
  REGISTER_SYSCALL_NO_PERM(finit_module);
  REGISTER_SYSCALL_NO_PERM(bpf);
  REGISTER_SYSCALL_NO_PERM(lookup_dcookie);
  REGISTER_SYSCALL_NO_PERM(init_module)
  REGISTER_SYSCALL_NO_PERM(delete_module);
  REGISTER_SYSCALL_NO_PERM(quotactl);
  REGISTER_SYSCALL_NO_ACCESS(perf_event_open);
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Passthrough.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: LinuxSyscalls|syscalls-shared ~ Syscall implementations shared between x86 and x86-64
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <stdint.h>
#include <sys/epoll.h>

namespace FEX::HLE {
#ifdef ARCHITECTURE_arm64
template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough0(FEXCore::Core::CpuStateFrame* Frame) {
  register uint64_t x0 asm("x0");
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough1(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1) {
  register uint64_t x0 asm("x0") = arg1;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough2(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough3(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register uint64_t x2 asm("x2") = arg3;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1), "r"(x2)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough4(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register uint64_t x2 asm("x2") = arg3;
  register uint64_t x3 asm("x3") = arg4;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough5(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register uint64_t x2 asm("x2") = arg3;
  register uint64_t x3 asm("x3") = arg4;
  register uint64_t x4 asm("x4") = arg5;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough6(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
                             uint64_t arg6) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register uint64_t x2 asm("x2") = arg3;
  register uint64_t x3 asm("x3") = arg4;
  register uint64_t x4 asm("x4") = arg5;
  register uint64_t x5 asm("x5") = arg6;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5)
                 : "memory");
  return x0;
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough7(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
                             uint64_t arg6, uint64_t arg7) {
  register uint64_t x0 asm("x0") = arg1;
  register uint64_t x1 asm("x1") = arg2;
  register uint64_t x2 asm("x2") = arg3;
  register uint64_t x3 asm("x3") = arg4;
  register uint64_t x4 asm("x4") = arg5;
  register uint64_t x5 asm("x5") = arg6;
  register uint64_t x6 asm("x6") = arg7;
  register int x8 asm("x8") = syscall_num;
  __asm volatile(R"(
    svc #0;
  )"
                 : "=r"(x0)
                 : "r"(x8), "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x6)
                 : "memory");
  return x0;
}
#else
template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough0(FEXCore::Core::CpuStateFrame* Frame) {
  uint64_t Result = ::syscall(syscall_num);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough1(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1) {
  uint64_t Result = ::syscall(syscall_num, arg1);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough2(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough3(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2, arg3);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough4(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2, arg3, arg4);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough5(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2, arg3, arg4, arg5);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough6(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
                             uint64_t arg6) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2, arg3, arg4, arg5, arg6);
  SYSCALL_ERRNO();
}

template<int syscall_num>
requires (syscall_num != -1)
uint64_t SyscallPassthrough7(FEXCore::Core::CpuStateFrame* Frame, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
                             uint64_t arg6, uint64_t arg7) {
  uint64_t Result = ::syscall(syscall_num, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
  SYSCALL_ERRNO();
}
#endif

void RegisterCommon(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;
  REGISTER_SYSCALL_IMPL(read, SyscallPassthrough3<SYSCALL_DEF(read)>);
  REGISTER_SYSCALL_IMPL(write, SyscallPassthrough3<SYSCALL_DEF(write)>);
  REGISTER_SYSCALL_IMPL(lseek, SyscallPassthrough3<SYSCALL_DEF(lseek)>);
  REGISTER_SYSCALL_IMPL(sched_yield, SyscallPassthrough0<SYSCALL_DEF(sched_yield)>);
  REGISTER_SYSCALL_IMPL(msync, SyscallPassthrough3<SYSCALL_DEF(msync)>);
  REGISTER_SYSCALL_IMPL(mincore, SyscallPassthrough3<SYSCALL_DEF(mincore)>);
  REGISTER_SYSCALL_IMPL(shmget, SyscallPassthrough3<SYSCALL_DEF(shmget)>);
  REGISTER_SYSCALL_IMPL(shmctl, SyscallPassthrough3<SYSCALL_DEF(shmctl)>);
  REGISTER_SYSCALL_IMPL(getpid, SyscallPassthrough0<SYSCALL_DEF(getpid)>);
  REGISTER_SYSCALL_IMPL(socket, SyscallPassthrough3<SYSCALL_DEF(socket)>);
  REGISTER_SYSCALL_IMPL(connect, SyscallPassthrough3<SYSCALL_DEF(connect)>);
  REGISTER_SYSCALL_IMPL(sendto, SyscallPassthrough6<SYSCALL_DEF(sendto)>);
  REGISTER_SYSCALL_IMPL(recvfrom, SyscallPassthrough6<SYSCALL_DEF(recvfrom)>);
  REGISTER_SYSCALL_IMPL(shutdown, SyscallPassthrough2<SYSCALL_DEF(shutdown)>);
  REGISTER_SYSCALL_IMPL(bind, SyscallPassthrough3<SYSCALL_DEF(bind)>);
  REGISTER_SYSCALL_IMPL(listen, SyscallPassthrough2<SYSCALL_DEF(listen)>);
  REGISTER_SYSCALL_IMPL(getsockname, SyscallPassthrough3<SYSCALL_DEF(getsockname)>);
  REGISTER_SYSCALL_IMPL(getpeername, SyscallPassthrough3<SYSCALL_DEF(getpeername)>);
  REGISTER_SYSCALL_IMPL(socketpair, SyscallPassthrough4<SYSCALL_DEF(socketpair)>);
  REGISTER_SYSCALL_IMPL(kill, SyscallPassthrough2<SYSCALL_DEF(kill)>);
  REGISTER_SYSCALL_IMPL(semget, SyscallPassthrough3<SYSCALL_DEF(semget)>);
  REGISTER_SYSCALL_IMPL(msgget, SyscallPassthrough2<SYSCALL_DEF(msgget)>);
  REGISTER_SYSCALL_IMPL(msgsnd, SyscallPassthrough4<SYSCALL_DEF(msgsnd)>);
  REGISTER_SYSCALL_IMPL(msgrcv, SyscallPassthrough5<SYSCALL_DEF(msgrcv)>);
  REGISTER_SYSCALL_IMPL(msgctl, SyscallPassthrough3<SYSCALL_DEF(msgctl)>);
  REGISTER_SYSCALL_IMPL(flock, SyscallPassthrough2<SYSCALL_DEF(flock)>);
  REGISTER_SYSCALL_IMPL(fsync, SyscallPassthrough1<SYSCALL_DEF(fsync)>);
  REGISTER_SYSCALL_IMPL(fdatasync, SyscallPassthrough1<SYSCALL_DEF(fdatasync)>);
  REGISTER_SYSCALL_IMPL(truncate, SyscallPassthrough2<SYSCALL_DEF(truncate)>);
  REGISTER_SYSCALL_IMPL(getcwd, SyscallPassthrough2<SYSCALL_DEF(getcwd)>);
  REGISTER_SYSCALL_IMPL(chdir, SyscallPassthrough1<SYSCALL_DEF(chdir)>);
  REGISTER_SYSCALL_IMPL(fchdir, SyscallPassthrough1<SYSCALL_DEF(fchdir)>);
  REGISTER_SYSCALL_IMPL(fchmod, SyscallPassthrough2<SYSCALL_DEF(fchmod)>);
  REGISTER_SYSCALL_IMPL(fchown, SyscallPassthrough3<SYSCALL_DEF(fchown)>);
  REGISTER_SYSCALL_IMPL(umask, SyscallPassthrough1<SYSCALL_DEF(umask)>);
  REGISTER_SYSCALL_IMPL(getuid, SyscallPassthrough0<SYSCALL_DEF(getuid)>);
  REGISTER_SYSCALL_IMPL(syslog, SyscallPassthrough3<SYSCALL_DEF(syslog)>);
  REGISTER_SYSCALL_IMPL(getgid, SyscallPassthrough0<SYSCALL_DEF(getgid)>);
  REGISTER_SYSCALL_IMPL(setuid, SyscallPassthrough1<SYSCALL_DEF(setuid)>);
  REGISTER_SYSCALL_IMPL(setgid, SyscallPassthrough1<SYSCALL_DEF(setgid)>);
  REGISTER_SYSCALL_IMPL(geteuid, SyscallPassthrough0<SYSCALL_DEF(geteuid)>);
  REGISTER_SYSCALL_IMPL(getegid, SyscallPassthrough0<SYSCALL_DEF(getegid)>);
  REGISTER_SYSCALL_IMPL(setpgid, SyscallPassthrough2<SYSCALL_DEF(setpgid)>);
  REGISTER_SYSCALL_IMPL(getppid, SyscallPassthrough0<SYSCALL_DEF(getppid)>);
  REGISTER_SYSCALL_IMPL(setsid, SyscallPassthrough0<SYSCALL_DEF(setsid)>);
  REGISTER_SYSCALL_IMPL(setreuid, SyscallPassthrough2<SYSCALL_DEF(setreuid)>);
  REGISTER_SYSCALL_IMPL(setregid, SyscallPassthrough2<SYSCALL_DEF(setregid)>);
  REGISTER_SYSCALL_IMPL(getgroups, SyscallPassthrough2<SYSCALL_DEF(getgroups)>);
  REGISTER_SYSCALL_IMPL(setgroups, SyscallPassthrough2<SYSCALL_DEF(setgroups)>);
  REGISTER_SYSCALL_IMPL(setresuid, SyscallPassthrough3<SYSCALL_DEF(setresuid)>);
  REGISTER_SYSCALL_IMPL(getresuid, SyscallPassthrough3<SYSCALL_DEF(getresuid)>);
  REGISTER_SYSCALL_IMPL(setresgid, SyscallPassthrough3<SYSCALL_DEF(setresgid)>);
  REGISTER_SYSCALL_IMPL(getresgid, SyscallPassthrough3<SYSCALL_DEF(getresgid)>);
  REGISTER_SYSCALL_IMPL(getpgid, SyscallPassthrough1<SYSCALL_DEF(getpgid)>);
  REGISTER_SYSCALL_IMPL(setfsuid, SyscallPassthrough1<SYSCALL_DEF(setfsuid)>);
  REGISTER_SYSCALL_IMPL(setfsgid, SyscallPassthrough1<SYSCALL_DEF(setfsgid)>);
  REGISTER_SYSCALL_IMPL(getsid, SyscallPassthrough1<SYSCALL_DEF(getsid)>);
  REGISTER_SYSCALL_IMPL(capget, SyscallPassthrough2<SYSCALL_DEF(capget)>);
  REGISTER_SYSCALL_IMPL(capset, SyscallPassthrough2<SYSCALL_DEF(capset)>);
  REGISTER_SYSCALL_IMPL(getpriority, SyscallPassthrough2<SYSCALL_DEF(getpriority)>);
  REGISTER_SYSCALL_IMPL(setpriority, SyscallPassthrough3<SYSCALL_DEF(setpriority)>);
  REGISTER_SYSCALL_IMPL(sched_setparam, SyscallPassthrough2<SYSCALL_DEF(sched_setparam)>);
  REGISTER_SYSCALL_IMPL(sched_getparam, SyscallPassthrough2<SYSCALL_DEF(sched_getparam)>);
  REGISTER_SYSCALL_IMPL(sched_setscheduler, SyscallPassthrough3<SYSCALL_DEF(sched_setscheduler)>);
  REGISTER_SYSCALL_IMPL(sched_getscheduler, SyscallPassthrough1<SYSCALL_DEF(sched_getscheduler)>);
  REGISTER_SYSCALL_IMPL(sched_get_priority_max, SyscallPassthrough1<SYSCALL_DEF(sched_get_priority_max)>);
  REGISTER_SYSCALL_IMPL(sched_get_priority_min, SyscallPassthrough1<SYSCALL_DEF(sched_get_priority_min)>);
  REGISTER_SYSCALL_IMPL(mlock, SyscallPassthrough2<SYSCALL_DEF(mlock)>);
  REGISTER_SYSCALL_IMPL(munlock, SyscallPassthrough2<SYSCALL_DEF(munlock)>);
  REGISTER_SYSCALL_IMPL(pivot_root, SyscallPassthrough2<SYSCALL_DEF(pivot_root)>);
  REGISTER_SYSCALL_IMPL(chroot, SyscallPassthrough1<SYSCALL_DEF(chroot)>);
  REGISTER_SYSCALL_IMPL(sync, SyscallPassthrough0<SYSCALL_DEF(sync)>);
  REGISTER_SYSCALL_IMPL(acct, SyscallPassthrough1<SYSCALL_DEF(acct)>);
  REGISTER_SYSCALL_IMPL(mount, SyscallPassthrough5<SYSCALL_DEF(mount)>);
  REGISTER_SYSCALL_IMPL(umount2, SyscallPassthrough2<SYSCALL_DEF(umount2)>);
  REGISTER_SYSCALL_IMPL(swapon, SyscallPassthrough2<SYSCALL_DEF(swapon)>);
  REGISTER_SYSCALL_IMPL(swapoff, SyscallPassthrough1<SYSCALL_DEF(swapoff)>);
  REGISTER_SYSCALL_IMPL(gettid, SyscallPassthrough0<SYSCALL_DEF(gettid)>);
  REGISTER_SYSCALL_IMPL(fsetxattr, SyscallPassthrough5<SYSCALL_DEF(fsetxattr)>);
  REGISTER_SYSCALL_IMPL(fgetxattr, SyscallPassthrough4<SYSCALL_DEF(fgetxattr)>);
  REGISTER_SYSCALL_IMPL(flistxattr, SyscallPassthrough3<SYSCALL_DEF(flistxattr)>);
  REGISTER_SYSCALL_IMPL(fremovexattr, SyscallPassthrough2<SYSCALL_DEF(fremovexattr)>);
  REGISTER_SYSCALL_IMPL(tkill, SyscallPassthrough2<SYSCALL_DEF(tkill)>);
  REGISTER_SYSCALL_IMPL(sched_setaffinity, SyscallPassthrough3<SYSCALL_DEF(sched_setaffinity)>);
  REGISTER_SYSCALL_IMPL(sched_getaffinity, SyscallPassthrough3<SYSCALL_DEF(sched_getaffinity)>);
  REGISTER_SYSCALL_IMPL(io_setup, SyscallPassthrough2<SYSCALL_DEF(io_setup)>);
  REGISTER_SYSCALL_IMPL(io_destroy, SyscallPassthrough1<SYSCALL_DEF(io_destroy)>);
  REGISTER_SYSCALL_IMPL(io_submit, SyscallPassthrough3<SYSCALL_DEF(io_submit)>);
  REGISTER_SYSCALL_IMPL(io_cancel, SyscallPassthrough3<SYSCALL_DEF(io_cancel)>);
  REGISTER_SYSCALL_IMPL(remap_file_pages, SyscallPassthrough5<SYSCALL_DEF(remap_file_pages)>);
  REGISTER_SYSCALL_IMPL(timer_getoverrun, SyscallPassthrough1<SYSCALL_DEF(timer_getoverrun)>);
  REGISTER_SYSCALL_IMPL(timer_delete, SyscallPassthrough1<SYSCALL_DEF(timer_delete)>);
  REGISTER_SYSCALL_IMPL(tgkill, SyscallPassthrough3<SYSCALL_DEF(tgkill)>);
  REGISTER_SYSCALL_IMPL(mbind, SyscallPassthrough6<SYSCALL_DEF(mbind)>);
  REGISTER_SYSCALL_IMPL(set_mempolicy, SyscallPassthrough3<SYSCALL_DEF(set_mempolicy)>);
  REGISTER_SYSCALL_IMPL(get_mempolicy, SyscallPassthrough5<SYSCALL_DEF(get_mempolicy)>);
  REGISTER_SYSCALL_IMPL(mq_unlink, SyscallPassthrough1<SYSCALL_DEF(mq_unlink)>);
  REGISTER_SYSCALL_IMPL(add_key, SyscallPassthrough5<SYSCALL_DEF(add_key)>);
  REGISTER_SYSCALL_IMPL(request_key, SyscallPassthrough4<SYSCALL_DEF(request_key)>);
  REGISTER_SYSCALL_IMPL(keyctl, SyscallPassthrough5<SYSCALL_DEF(keyctl)>);
  REGISTER_SYSCALL_IMPL(ioprio_set, SyscallPassthrough2<SYSCALL_DEF(ioprio_set)>);
  REGISTER_SYSCALL_IMPL(ioprio_get, SyscallPassthrough3<SYSCALL_DEF(ioprio_get)>);
  REGISTER_SYSCALL_IMPL(inotify_add_watch, SyscallPassthrough3<SYSCALL_DEF(inotify_add_watch)>);
  REGISTER_SYSCALL_IMPL(inotify_rm_watch, SyscallPassthrough2<SYSCALL_DEF(inotify_rm_watch)>);
  REGISTER_SYSCALL_IMPL(migrate_pages, SyscallPassthrough4<SYSCALL_DEF(migrate_pages)>);
  REGISTER_SYSCALL_IMPL(mkdirat, SyscallPassthrough3<SYSCALL_DEF(mkdirat)>);
  REGISTER_SYSCALL_IMPL(mknodat, SyscallPassthrough4<SYSCALL_DEF(mknodat)>);
  REGISTER_SYSCALL_IMPL(fchownat, SyscallPassthrough5<SYSCALL_DEF(fchownat)>);
  REGISTER_SYSCALL_IMPL(unlinkat, SyscallPassthrough3<SYSCALL_DEF(unlinkat)>);
  REGISTER_SYSCALL_IMPL(renameat, SyscallPassthrough4<SYSCALL_DEF(renameat)>);
  REGISTER_SYSCALL_IMPL(linkat, SyscallPassthrough5<SYSCALL_DEF(linkat)>);
  REGISTER_SYSCALL_IMPL(symlinkat, SyscallPassthrough3<SYSCALL_DEF(symlinkat)>);
  REGISTER_SYSCALL_IMPL(fchmodat, SyscallPassthrough3<SYSCALL_DEF(fchmodat)>);
  REGISTER_SYSCALL_IMPL(unshare, SyscallPassthrough1<SYSCALL_DEF(unshare)>);
  REGISTER_SYSCALL_IMPL(splice, SyscallPassthrough6<SYSCALL_DEF(splice)>);
  REGISTER_SYSCALL_IMPL(tee, SyscallPassthrough4<SYSCALL_DEF(tee)>);
  REGISTER_SYSCALL_IMPL(move_pages, SyscallPassthrough6<SYSCALL_DEF(move_pages)>);
  REGISTER_SYSCALL_IMPL(timerfd_create, SyscallPassthrough2<SYSCALL_DEF(timerfd_create)>);
  REGISTER_SYSCALL_IMPL(accept4, SyscallPassthrough4<SYSCALL_DEF(accept4)>);
  REGISTER_SYSCALL_IMPL(eventfd2, SyscallPassthrough2<SYSCALL_DEF(eventfd2)>);
  REGISTER_SYSCALL_IMPL(epoll_create1, SyscallPassthrough1<SYSCALL_DEF(epoll_create1)>);
  REGISTER_SYSCALL_IMPL(inotify_init1, SyscallPassthrough1<SYSCALL_DEF(inotify_init1)>);
  REGISTER_SYSCALL_IMPL(fanotify_init, SyscallPassthrough2<SYSCALL_DEF(fanotify_init)>);
  REGISTER_SYSCALL_IMPL(fanotify_mark, SyscallPassthrough5<SYSCALL_DEF(fanotify_mark)>);
  REGISTER_SYSCALL_IMPL(prlimit_64, SyscallPassthrough4<SYSCALL_DEF(prlimit_64)>);
  REGISTER_SYSCALL_IMPL(name_to_handle_at, SyscallPassthrough5<SYSCALL_DEF(name_to_handle_at)>);
  REGISTER_SYSCALL_IMPL(open_by_handle_at, SyscallPassthrough3<SYSCALL_DEF(open_by_handle_at)>);
  REGISTER_SYSCALL_IMPL(syncfs, SyscallPassthrough1<SYSCALL_DEF(syncfs)>);
  REGISTER_SYSCALL_IMPL(setns, SyscallPassthrough2<SYSCALL_DEF(setns)>);
  REGISTER_SYSCALL_IMPL(getcpu, SyscallPassthrough3<SYSCALL_DEF(getcpu)>);
  REGISTER_SYSCALL_IMPL(kcmp, SyscallPassthrough5<SYSCALL_DEF(kcmp)>);
  REGISTER_SYSCALL_IMPL(sched_setattr, SyscallPassthrough3<SYSCALL_DEF(sched_setattr)>);
  REGISTER_SYSCALL_IMPL(sched_getattr, SyscallPassthrough4<SYSCALL_DEF(sched_getattr)>);
  REGISTER_SYSCALL_IMPL(renameat2, SyscallPassthrough5<SYSCALL_DEF(renameat2)>);
  REGISTER_SYSCALL_IMPL(getrandom, SyscallPassthrough3<SYSCALL_DEF(getrandom)>);
  REGISTER_SYSCALL_IMPL(memfd_create, SyscallPassthrough2<SYSCALL_DEF(memfd_create)>);
  REGISTER_SYSCALL_IMPL(membarrier, SyscallPassthrough2<SYSCALL_DEF(membarrier)>);
  REGISTER_SYSCALL_IMPL(mlock2, SyscallPassthrough3<SYSCALL_DEF(mlock2)>);
  REGISTER_SYSCALL_IMPL(copy_file_range, SyscallPassthrough6<SYSCALL_DEF(copy_file_range)>);
  REGISTER_SYSCALL_IMPL(pkey_mprotect, SyscallPassthrough4<SYSCALL_DEF(pkey_mprotect)>);
  REGISTER_SYSCALL_IMPL(pkey_alloc, SyscallPassthrough2<SYSCALL_DEF(pkey_alloc)>);
  REGISTER_SYSCALL_IMPL(pkey_free, SyscallPassthrough1<SYSCALL_DEF(pkey_free)>);
  // io_uring can't be emulated as it can pass `epoll_event` objects around.
  // These are 12-byte packed structs on x86/x86-64, but on other architectures are 16-byte.
  // This means the `data` member is at offset 4 on x86, but offset 8 on other architectures, corrupting the data.
  // The queue data is entirely user-controlled, so we can't rewrite data in any sane fashion.
  // This is visible with `node.js` as a hang.
  REGISTER_SYSCALL_IMPL(io_uring_setup, UnimplementedSyscallSafe);
  REGISTER_SYSCALL_IMPL(io_uring_enter, UnimplementedSyscallSafe);
  REGISTER_SYSCALL_IMPL(io_uring_register, UnimplementedSyscallSafe);
  REGISTER_SYSCALL_IMPL(open_tree, SyscallPassthrough3<SYSCALL_DEF(open_tree)>);
  REGISTER_SYSCALL_IMPL(move_mount, SyscallPassthrough5<SYSCALL_DEF(move_mount)>);
  REGISTER_SYSCALL_IMPL(fsopen, SyscallPassthrough3<SYSCALL_DEF(fsopen)>);
  REGISTER_SYSCALL_IMPL(fsconfig, SyscallPassthrough5<SYSCALL_DEF(fsconfig)>);
  REGISTER_SYSCALL_IMPL(fsmount, SyscallPassthrough3<SYSCALL_DEF(fsmount)>);
  REGISTER_SYSCALL_IMPL(fspick, SyscallPassthrough3<SYSCALL_DEF(fspick)>);
  REGISTER_SYSCALL_IMPL(pidfd_open, SyscallPassthrough2<SYSCALL_DEF(pidfd_open)>);
  REGISTER_SYSCALL_IMPL(pidfd_getfd, SyscallPassthrough3<SYSCALL_DEF(pidfd_getfd)>);
  REGISTER_SYSCALL_IMPL(mount_setattr, SyscallPassthrough5<SYSCALL_DEF(mount_setattr)>);
  REGISTER_SYSCALL_IMPL(quotactl_fd, SyscallPassthrough4<SYSCALL_DEF(quotactl_fd)>);
  REGISTER_SYSCALL_IMPL(landlock_create_ruleset, SyscallPassthrough3<SYSCALL_DEF(landlock_create_ruleset)>);
  REGISTER_SYSCALL_IMPL(landlock_add_rule, SyscallPassthrough4<SYSCALL_DEF(landlock_add_rule)>);
  REGISTER_SYSCALL_IMPL(landlock_restrict_self, SyscallPassthrough2<SYSCALL_DEF(landlock_restrict_self)>);
  REGISTER_SYSCALL_IMPL(memfd_secret, SyscallPassthrough1<SYSCALL_DEF(memfd_secret)>);
  REGISTER_SYSCALL_IMPL(process_mrelease, SyscallPassthrough2<SYSCALL_DEF(process_mrelease)>);
  if (Handler->IsHostKernelVersionAtLeast(5, 16, 0)) {
    REGISTER_SYSCALL_IMPL(futex_waitv, SyscallPassthrough5<SYSCALL_DEF(futex_waitv)>);
  } else {
    REGISTER_SYSCALL_IMPL(futex_waitv, UnimplementedSyscallSafe);
  }
  if (Handler->IsHostKernelVersionAtLeast(5, 17, 0)) {
    REGISTER_SYSCALL_IMPL(set_mempolicy_home_node, SyscallPassthrough4<SYSCALL_DEF(set_mempolicy_home_node)>);
  } else {
    REGISTER_SYSCALL_IMPL(set_mempolicy_home_node, UnimplementedSyscallSafe);
  }

  if (Handler->IsHostKernelVersionAtLeast(6, 8, 0)) {
    REGISTER_SYSCALL_IMPL(futex_wake, SyscallPassthrough4<SYSCALL_DEF(futex_wake)>);
    REGISTER_SYSCALL_IMPL(futex_wait, SyscallPassthrough6<SYSCALL_DEF(futex_wait)>);
    REGISTER_SYSCALL_IMPL(futex_requeue, SyscallPassthrough4<SYSCALL_DEF(futex_requeue)>);
    REGISTER_SYSCALL_IMPL(statmount, SyscallPassthrough4<SYSCALL_DEF(statmount)>);
    REGISTER_SYSCALL_IMPL(listmount, SyscallPassthrough4<SYSCALL_DEF(listmount)>);
    REGISTER_SYSCALL_IMPL(lsm_get_self_attr, SyscallPassthrough4<SYSCALL_DEF(lsm_get_self_attr)>);
    REGISTER_SYSCALL_IMPL(lsm_set_self_attr, SyscallPassthrough4<SYSCALL_DEF(lsm_set_self_attr)>);
    REGISTER_SYSCALL_IMPL(lsm_list_modules, SyscallPassthrough3<SYSCALL_DEF(lsm_list_modules)>);
  } else {
    REGISTER_SYSCALL_IMPL(futex_wake, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(futex_wait, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(futex_requeue, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(statmount, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(listmount, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(lsm_get_self_attr, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(lsm_set_self_attr, UnimplementedSyscallSafe);
    REGISTER_SYSCALL_IMPL(lsm_list_modules, UnimplementedSyscallSafe);
  }
  if (Handler->IsHostKernelVersionAtLeast(6, 10, 0)) {
    REGISTER_SYSCALL_IMPL(mseal, SyscallPassthrough3<SYSCALL_DEF(mseal)>);
  } else {
    REGISTER_SYSCALL_IMPL(mseal, UnimplementedSyscallSafe);
  }
}

namespace x64 {
  void RegisterPassthrough(FEX::HLE::SyscallHandler* Handler) {
    using namespace FEXCore::IR;
    RegisterCommon(Handler);
    REGISTER_SYSCALL_IMPL_X64(ftruncate, SyscallPassthrough2<SYSCALL_DEF(ftruncate)>);
    REGISTER_SYSCALL_IMPL_X64(ioctl, SyscallPassthrough3<SYSCALL_DEF(ioctl)>);
    REGISTER_SYSCALL_IMPL_X64(pread_64, SyscallPassthrough4<SYSCALL_DEF(pread_64)>);
    REGISTER_SYSCALL_IMPL_X64(pwrite_64, SyscallPassthrough4<SYSCALL_DEF(pwrite_64)>);
    REGISTER_SYSCALL_IMPL_X64(readv, SyscallPassthrough3<SYSCALL_DEF(readv)>);
    REGISTER_SYSCALL_IMPL_X64(writev, SyscallPassthrough3<SYSCALL_DEF(writev)>);
    REGISTER_SYSCALL_IMPL_X64(dup, SyscallPassthrough1<SYSCALL_DEF(dup)>);
    REGISTER_SYSCALL_IMPL_X64(nanosleep, SyscallPassthrough2<SYSCALL_DEF(nanosleep)>);
    REGISTER_SYSCALL_IMPL_X64(getitimer, SyscallPassthrough2<SYSCALL_DEF(getitimer)>);
    REGISTER_SYSCALL_IMPL_X64(setitimer, SyscallPassthrough3<SYSCALL_DEF(setitimer)>);
    REGISTER_SYSCALL_IMPL_X64(sendfile, SyscallPassthrough4<SYSCALL_DEF(sendfile)>);
    REGISTER_SYSCALL_IMPL_X64(accept, SyscallPassthrough3<SYSCALL_DEF(accept)>);
    REGISTER_SYSCALL_IMPL_X64(sendmsg, SyscallPassthrough3<SYSCALL_DEF(sendmsg)>);
    REGISTER_SYSCALL_IMPL_X64(recvmsg, SyscallPassthrough3<SYSCALL_DEF(recvmsg)>);
    REGISTER_SYSCALL_IMPL_X64(setsockopt, SyscallPassthrough5<SYSCALL_DEF(setsockopt)>);
    REGISTER_SYSCALL_IMPL_X64(getsockopt, SyscallPassthrough5<SYSCALL_DEF(getsockopt)>);
    REGISTER_SYSCALL_IMPL_X64(wait4, SyscallPassthrough4<SYSCALL_DEF(wait4)>);
    REGISTER_SYSCALL_IMPL_X64(semop, SyscallPassthrough3<SYSCALL_DEF(semop)>);
    REGISTER_SYSCALL_IMPL_X64(gettimeofday, SyscallPassthrough2<SYSCALL_DEF(gettimeofday)>);
    REGISTER_SYSCALL_IMPL_X64(getrlimit, SyscallPassthrough2<SYSCALL_DEF(getrlimit)>);
    REGISTER_SYSCALL_IMPL_X64(getrusage, SyscallPassthrough2<SYSCALL_DEF(getrusage)>);
    REGISTER_SYSCALL_IMPL_X64(sysinfo, SyscallPassthrough1<SYSCALL_DEF(sysinfo)>);
    REGISTER_SYSCALL_IMPL_X64(times, SyscallPassthrough1<SYSCALL_DEF(times)>);
    REGISTER_SYSCALL_IMPL_X64(rt_sigqueueinfo, SyscallPassthrough3<SYSCALL_DEF(rt_sigqueueinfo)>);
    REGISTER_SYSCALL_IMPL_X64(fstatfs, SyscallPassthrough2<SYSCALL_DEF(fstatfs)>);
    REGISTER_SYSCALL_IMPL_X64(sched_rr_get_interval, SyscallPassthrough2<SYSCALL_DEF(sched_rr_get_interval)>);
    REGISTER_SYSCALL_IMPL_X64(mlockall, SyscallPassthrough1<SYSCALL_DEF(mlockall)>);
    REGISTER_SYSCALL_IMPL_X64(munlockall, SyscallPassthrough0<SYSCALL_DEF(munlockall)>);
    REGISTER_SYSCALL_IMPL_X64(adjtimex, SyscallPassthrough1<SYSCALL_DEF(adjtimex)>);
    REGISTER_SYSCALL_IMPL_X64(setrlimit, SyscallPassthrough2<SYSCALL_DEF(setrlimit)>);
    REGISTER_SYSCALL_IMPL_X64(settimeofday, SyscallPassthrough2<SYSCALL_DEF(settimeofday)>);
    REGISTER_SYSCALL_IMPL_X64(readahead, SyscallPassthrough3<SYSCALL_DEF(readahead)>);
    REGISTER_SYSCALL_IMPL_X64(futex, SyscallPassthrough6<SYSCALL_DEF(futex)>);
    REGISTER_SYSCALL_IMPL_X64(io_getevents, SyscallPassthrough5<SYSCALL_DEF(io_getevents)>);
    REGISTER_SYSCALL_IMPL_X64(semtimedop, SyscallPassthrough4<SYSCALL_DEF(semtimedop)>);
    REGISTER_SYSCALL_IMPL_X64(timer_create, SyscallPassthrough3<SYSCALL_DEF(timer_create)>);
    REGISTER_SYSCALL_IMPL_X64(timer_settime, SyscallPassthrough4<SYSCALL_DEF(timer_settime)>);
    REGISTER_SYSCALL_IMPL_X64(timer_gettime, SyscallPassthrough2<SYSCALL_DEF(timer_gettime)>);
    REGISTER_SYSCALL_IMPL_X64(clock_settime, SyscallPassthrough2<SYSCALL_DEF(clock_settime)>);
    REGISTER_SYSCALL_IMPL_X64(clock_gettime, SyscallPassthrough2<SYSCALL_DEF(clock_gettime)>);
    REGISTER_SYSCALL_IMPL_X64(clock_getres, SyscallPassthrough2<SYSCALL_DEF(clock_getres)>);
    REGISTER_SYSCALL_IMPL_X64(clock_nanosleep, SyscallPassthrough4<SYSCALL_DEF(clock_nanosleep)>);
    REGISTER_SYSCALL_IMPL_X64(mq_open, SyscallPassthrough4<SYSCALL_DEF(mq_open)>);
    REGISTER_SYSCALL_IMPL_X64(mq_timedsend, SyscallPassthrough5<SYSCALL_DEF(mq_timedsend)>);
    REGISTER_SYSCALL_IMPL_X64(mq_timedreceive, SyscallPassthrough5<SYSCALL_DEF(mq_timedreceive)>);
    REGISTER_SYSCALL_IMPL_X64(mq_notify, SyscallPassthrough2<SYSCALL_DEF(mq_notify)>);
    REGISTER_SYSCALL_IMPL_X64(mq_getsetattr, SyscallPassthrough3<SYSCALL_DEF(mq_getsetattr)>);
    REGISTER_SYSCALL_IMPL_X64(waitid, SyscallPassthrough5<SYSCALL_DEF(waitid)>);
    REGISTER_SYSCALL_IMPL_X64(pselect6, SyscallPassthrough6<SYSCALL_DEF(pselect6)>);
    REGISTER_SYSCALL_IMPL_X64(ppoll, SyscallPassthrough5<SYSCALL_DEF(ppoll)>);
    REGISTER_SYSCALL_IMPL_X64(set_robust_list, SyscallPassthrough2<SYSCALL_DEF(set_robust_list)>);
    REGISTER_SYSCALL_IMPL_X64(get_robust_list, SyscallPassthrough3<SYSCALL_DEF(get_robust_list)>);
    REGISTER_SYSCALL_IMPL_X64(sync_file_range, SyscallPassthrough4<SYSCALL_DEF(sync_file_range)>);
    REGISTER_SYSCALL_IMPL_X64(vmsplice, SyscallPassthrough4<SYSCALL_DEF(vmsplice)>);
    REGISTER_SYSCALL_IMPL_X64(utimensat, SyscallPassthrough4<SYSCALL_DEF(utimensat)>);
    REGISTER_SYSCALL_IMPL_X64(fallocate, SyscallPassthrough4<SYSCALL_DEF(fallocate)>);
    REGISTER_SYSCALL_IMPL_X64(timerfd_settime, SyscallPassthrough4<SYSCALL_DEF(timerfd_settime)>);
    REGISTER_SYSCALL_IMPL_X64(timerfd_gettime, SyscallPassthrough2<SYSCALL_DEF(timerfd_gettime)>);
    REGISTER_SYSCALL_IMPL_X64(preadv, SyscallPassthrough5<SYSCALL_DEF(preadv)>);
    REGISTER_SYSCALL_IMPL_X64(pwritev, SyscallPassthrough5<SYSCALL_DEF(pwritev)>);
    REGISTER_SYSCALL_IMPL_X64(rt_tgsigqueueinfo, SyscallPassthrough4<SYSCALL_DEF(rt_tgsigqueueinfo)>);
    REGISTER_SYSCALL_IMPL_X64(recvmmsg, SyscallPassthrough5<SYSCALL_DEF(recvmmsg)>);
    REGISTER_SYSCALL_IMPL_X64(clock_adjtime, SyscallPassthrough2<SYSCALL_DEF(clock_adjtime)>);
    REGISTER_SYSCALL_IMPL_X64(sendmmsg, SyscallPassthrough4<SYSCALL_DEF(sendmmsg)>);
    REGISTER_SYSCALL_IMPL_X64(process_vm_readv, SyscallPassthrough6<SYSCALL_DEF(process_vm_readv)>);
    REGISTER_SYSCALL_IMPL_X64(process_vm_writev, SyscallPassthrough6<SYSCALL_DEF(process_vm_writev)>);
    REGISTER_SYSCALL_IMPL_X64(preadv2, SyscallPassthrough6<SYSCALL_DEF(preadv2)>);
    REGISTER_SYSCALL_IMPL_X64(pwritev2, SyscallPassthrough6<SYSCALL_DEF(pwritev2)>);
    REGISTER_SYSCALL_IMPL_X64(io_pgetevents, SyscallPassthrough6<SYSCALL_DEF(io_pgetevents)>);
    REGISTER_SYSCALL_IMPL_X64(pidfd_send_signal, SyscallPassthrough4<SYSCALL_DEF(pidfd_send_signal)>);
    REGISTER_SYSCALL_IMPL_X64(process_madvise, SyscallPassthrough5<SYSCALL_DEF(process_madvise)>);
    REGISTER_SYSCALL_IMPL_X64(fadvise64, SyscallPassthrough4<SYSCALL_DEF(fadvise64)>);
    if (Handler->IsHostKernelVersionAtLeast(6, 5, 0)) {
      REGISTER_SYSCALL_IMPL_X64(cachestat, SyscallPassthrough4<SYSCALL_DEF(cachestat)>);
    } else {
      REGISTER_SYSCALL_IMPL_X64(cachestat, UnimplementedSyscallSafe);
    }
    if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) {
      REGISTER_SYSCALL_IMPL_X64(fchmodat2, SyscallPassthrough4<SYSCALL_DEF(fchmodat2)>);
    } else {
      REGISTER_SYSCALL_IMPL_X64(fchmodat2, UnimplementedSyscallSafe);
    }
  }
} // namespace x64

namespace x32 {
  void RegisterPassthrough(FEX::HLE::SyscallHandler* Handler) {
    using namespace FEXCore::IR;
    RegisterCommon(Handler);
    REGISTER_SYSCALL_IMPL_X32(getuid32, SyscallPassthrough0<SYSCALL_DEF(getuid)>);
    REGISTER_SYSCALL_IMPL_X32(getgid32, SyscallPassthrough0<SYSCALL_DEF(getgid)>);
    REGISTER_SYSCALL_IMPL_X32(geteuid32, SyscallPassthrough0<SYSCALL_DEF(geteuid)>);
    REGISTER_SYSCALL_IMPL_X32(getegid32, SyscallPassthrough0<SYSCALL_DEF(getegid)>);
    REGISTER_SYSCALL_IMPL_X32(setreuid32, SyscallPassthrough2<SYSCALL_DEF(setreuid)>);
    REGISTER_SYSCALL_IMPL_X32(setregid32, SyscallPassthrough2<SYSCALL_DEF(setregid)>);
    REGISTER_SYSCALL_IMPL_X32(getgroups32, SyscallPassthrough2<SYSCALL_DEF(getgroups)>);
    REGISTER_SYSCALL_IMPL_X32(setgroups32, SyscallPassthrough2<SYSCALL_DEF(setgroups)>);
    REGISTER_SYSCALL_IMPL_X32(fchown32, SyscallPassthrough3<SYSCALL_DEF(fchown)>);
    REGISTER_SYSCALL_IMPL_X32(setresuid32, SyscallPassthrough3<SYSCALL_DEF(setresuid)>);
    REGISTER_SYSCALL_IMPL_X32(getresuid32, SyscallPassthrough3<SYSCALL_DEF(getresuid)>);
    REGISTER_SYSCALL_IMPL_X32(setresgid32, SyscallPassthrough3<SYSCALL_DEF(setresgid)>);
    REGISTER_SYSCALL_IMPL_X32(getresgid32, SyscallPassthrough3<SYSCALL_DEF(getresgid)>);
    REGISTER_SYSCALL_IMPL_X32(setuid32, SyscallPassthrough1<SYSCALL_DEF(setuid)>);
    REGISTER_SYSCALL_IMPL_X32(setgid32, SyscallPassthrough1<SYSCALL_DEF(setgid)>);
    REGISTER_SYSCALL_IMPL_X32(setfsuid32, SyscallPassthrough1<SYSCALL_DEF(setfsuid)>);
    REGISTER_SYSCALL_IMPL_X32(setfsgid32, SyscallPassthrough1<SYSCALL_DEF(setfsgid)>);
    REGISTER_SYSCALL_IMPL_X32(sendfile64, SyscallPassthrough4<SYSCALL_DEF(sendfile)>);
    REGISTER_SYSCALL_IMPL_X32(clock_gettime64, SyscallPassthrough2<SYSCALL_DEF(clock_gettime)>);
    REGISTER_SYSCALL_IMPL_X32(clock_settime64, SyscallPassthrough2<SYSCALL_DEF(clock_settime)>);
    REGISTER_SYSCALL_IMPL_X32(clock_adjtime64, SyscallPassthrough2<SYSCALL_DEF(clock_adjtime)>);
    REGISTER_SYSCALL_IMPL_X32(clock_getres_time64, SyscallPassthrough2<SYSCALL_DEF(clock_getres)>);
    REGISTER_SYSCALL_IMPL_X32(clock_nanosleep_time64, SyscallPassthrough4<SYSCALL_DEF(clock_nanosleep)>);
    REGISTER_SYSCALL_IMPL_X32(timer_gettime64, SyscallPassthrough2<SYSCALL_DEF(timer_gettime)>);
    REGISTER_SYSCALL_IMPL_X32(timer_settime64, SyscallPassthrough4<SYSCALL_DEF(timer_settime)>);
    REGISTER_SYSCALL_IMPL_X32(timerfd_gettime64, SyscallPassthrough2<SYSCALL_DEF(timerfd_gettime)>);
    REGISTER_SYSCALL_IMPL_X32(timerfd_settime64, SyscallPassthrough4<SYSCALL_DEF(timerfd_settime)>);
    REGISTER_SYSCALL_IMPL_X32(utimensat_time64, SyscallPassthrough4<SYSCALL_DEF(utimensat)>);
    REGISTER_SYSCALL_IMPL_X32(ppoll_time64, SyscallPassthrough5<SYSCALL_DEF(ppoll)>);
    REGISTER_SYSCALL_IMPL_X32(io_pgetevents_time64, SyscallPassthrough6<SYSCALL_DEF(io_pgetevents)>);
    REGISTER_SYSCALL_IMPL_X32(mq_timedsend_time64, SyscallPassthrough5<SYSCALL_DEF(mq_timedsend)>);
    REGISTER_SYSCALL_IMPL_X32(mq_timedreceive_time64, SyscallPassthrough5<SYSCALL_DEF(mq_timedreceive)>);
    REGISTER_SYSCALL_IMPL_X32(semtimedop_time64, SyscallPassthrough4<SYSCALL_DEF(semtimedop)>);
    REGISTER_SYSCALL_IMPL_X32(futex_time64, SyscallPassthrough6<SYSCALL_DEF(futex)>);
    REGISTER_SYSCALL_IMPL_X32(sched_rr_get_interval_time64, SyscallPassthrough2<SYSCALL_DEF(sched_rr_get_interval)>);
  }
} // namespace x32
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Signals.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Syscalls/Thread.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/SignalDelegator.h>

#include <signal.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace SignalDelegator {
struct GuestSigAction;
}

namespace FEX::HLE {
void RegisterSignals(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL(rt_sigprocmask, [](FEXCore::Core::CpuStateFrame* Frame, int how, const uint64_t* set, uint64_t* oldset) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame),
                                                                             how, set, oldset);
  });

  REGISTER_SYSCALL_IMPL(rt_sigpending, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, size_t sigsetsize) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), set,
                                                                            sigsetsize);
  });

  REGISTER_SYSCALL_IMPL(rt_sigsuspend, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* unewset, size_t sigsetsize) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame),
                                                                            unewset, sigsetsize);
  });

  REGISTER_SYSCALL_IMPL(userfaultfd, [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t {
    // Disable userfaultfd until we can properly emulate it
    // This is okay because the kernel configuration allows you to disable it at compile time
    return -ENOSYS;
    uint64_t Result = ::syscall(SYSCALL_DEF(userfaultfd), flags);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(signalfd, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const uint64_t* mask, size_t sigsetsize) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, 0);
  });

  REGISTER_SYSCALL_IMPL(signalfd4, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const uint64_t* mask, size_t sigsetsize, int flags) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSignalFD(fd, mask, sigsetsize, flags);
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Stubs.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include <FEXCore/Utils/LogManager.h>

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <errno.h>
#include <stdint.h>
#include <sys/types.h>

#define SYSCALL_STUB(name)                         \
  do {                                             \
    ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); \
    return -ENOSYS;                                \
  } while (0)

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE {
void RegisterStubs(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL(restart_syscall, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { SYSCALL_STUB(restart_syscall); });

  REGISTER_SYSCALL_IMPL(rseq, [](FEXCore::Core::CpuStateFrame* Frame, struct rseq* rseq, uint32_t rseq_len, int flags, uint32_t sig) -> uint64_t {
    // We don't support this
    return -ENOSYS;
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "CodeLoader.h"

#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Syscalls/Thread.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/Thread.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Thread.h"
#include "LinuxSyscalls/Utils/Threads.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/Event.h>

#include <FEXHeaderUtils/Syscalls.h>

#include <grp.h>
#include <limits.h>
#include <linux/futex.h>
#include <linux/seccomp.h>
#include <linux/sched.h>
#include <stdint.h>
#include <sched.h>
#include <sys/personality.h>
#include <sys/poll.h>
#include <sys/prctl.h>
#include <sys/resource.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/wait.h>
#include <unistd.h>
#include <sys/fsuid.h>

ARG_TO_STR(idtype_t, "%u")

namespace FEX::HLE {

struct ExecutionThreadHandler {
  FEXCore::Context::Context* CTX;
  FEX::HLE::ThreadStateObject* Thread;
  Event ThreadWaiting {};

  // Pause on thread start handling.
  FEXCore::InterruptableConditionVariable StartRunningCV {};
  FEXCore::InterruptableConditionVariable StartRunningResponse {};
};

static void* ThreadHandler(void* Data) {
  ExecutionThreadHandler* Handler = reinterpret_cast<ExecutionThreadHandler*>(Data);
  auto CTX = Handler->CTX;
  auto Thread = Handler->Thread;

  Thread->ThreadInfo.PID = ::getpid();
  Thread->ThreadInfo.TID = FHU::Syscalls::gettid();
  if (Thread->Thread->ThreadStats) {
    Thread->Thread->ThreadStats->TID.store(Thread->ThreadInfo.TID, std::memory_order_relaxed);
  }

  FEXCore::Allocator::InitializeThread();

  FEX::HLE::_SyscallHandler->RegisterTLSState(Thread);

  // Now notify the thread that we are initialized
  Handler->ThreadWaiting.NotifyOne();

  Handler->StartRunningCV.Wait();

  // Notify the parent thread that it can continue.
  // Handler is a stack object on the parent thread, and will be invalid after notification.
  Handler->StartRunningResponse.NotifyOne();

  CTX->ExecuteThread(Thread->Thread);
  FEX::HLE::_SyscallHandler->UninstallTLSState(Thread);
  FEX::HLE::_SyscallHandler->TM.DestroyThread(Thread);
  return nullptr;
}

FEX::HLE::ThreadStateObject* CreateNewThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) {
  uint64_t flags = args->args.flags;
  auto NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &Frame->State, args->args.parent_tid,
                                                              FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame));

  NewThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RAX] = 0;
  if (args->Type == TYPE_CLONE3) {
    // stack pointer points to the lowest address to the stack
    // set RSP to stack + size
    NewThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = args->args.stack + args->args.stack_size;
  } else {
    NewThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = args->args.stack;
  }

  if (FEX::HLE::_SyscallHandler->Is64BitMode()) {
    if (flags & CLONE_SETTLS) {
      x64::SetThreadArea(NewThread->Thread->CurrentFrame, reinterpret_cast<void*>(args->args.tls));
    }
    // Set us to start just after the syscall instruction
    x64::AdjustRipForNewThread(NewThread->Thread->CurrentFrame);
  } else {
    if (flags & CLONE_SETTLS) {
      x32::SetThreadArea(NewThread->Thread->CurrentFrame, reinterpret_cast<void*>(args->args.tls));
    }
    x32::AdjustRipForNewThread(NewThread->Thread->CurrentFrame);
  }

  // Initialize a new thread for execution.
  ExecutionThreadHandler Arg {
    .CTX = CTX,
    .Thread = NewThread,
  };
  NewThread->ExecutionThread = FEXCore::Threads::Thread::Create(ThreadHandler, &Arg);

  // Wait for the thread to have started.
  Arg.ThreadWaiting.Wait();

  if (FEX::HLE::_SyscallHandler->NeedXIDCheck()) {
    // The first time an application creates a thread, GLIBC installs their SETXID signal handler.
    // FEX needs to capture all signals and defer them to the guest.
    // Once FEX creates its first guest thread, overwrite the GLIBC SETXID handler *again* to ensure
    // FEX maintains control of the signal handler on this signal.
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->CheckXIDHandler();
    FEX::HLE::_SyscallHandler->DisableXIDCheck();
  }

  // Return the new threads TID
  uint64_t Result = NewThread->ThreadInfo.TID;

  // Sets the child TID to pointer in ParentTID
  if (flags & CLONE_PARENT_SETTID) {
    *reinterpret_cast<pid_t*>(args->args.parent_tid) = Result;
  }

  // Sets the child TID to the pointer in ChildTID
  if (flags & CLONE_CHILD_SETTID) {
    NewThread->ThreadInfo.set_child_tid = reinterpret_cast<int32_t*>(args->args.child_tid);
    *reinterpret_cast<pid_t*>(args->args.child_tid) = Result;
  }

  // When the thread exits, clear the child thread ID at ChildTID
  // Additionally wakeup a futex at that address
  // Address /may/ be changed with SET_TID_ADDRESS syscall
  if (flags & CLONE_CHILD_CLEARTID) {
    NewThread->ThreadInfo.clear_child_tid = reinterpret_cast<int32_t*>(args->args.child_tid);
  }

  // clone3 flag
  if (flags & CLONE_PIDFD) {
    // Use pidfd_open to emulate this flag
    const int pidfd = ::syscall(SYSCALL_DEF(pidfd_open), Result, 0);
    if (Result == ~0ULL) {
      LogMan::Msg::EFmt("Couldn't get pidfd of TID {}\n", Result);
    } else {
      *reinterpret_cast<int*>(args->args.pidfd) = pidfd;
    }
  }

  FEX::HLE::_SyscallHandler->TM.TrackThread(NewThread);

  // Start running the thread
  Arg.StartRunningCV.NotifyOne();

  // Wait for the thread to start running.
  Arg.StartRunningResponse.Wait();

  return NewThread;
}

uint64_t HandleNewClone(FEX::HLE::ThreadStateObject* Thread, FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame,
                        FEX::HLE::clone3_args* CloneArgs) {
  FEXCore::Allocator::InitializeThread();
  auto GuestArgs = &CloneArgs->args;
  uint64_t flags = GuestArgs->flags;
  auto NewThread = Thread;
  bool CreatedNewThreadObject {};

  if (flags & CLONE_THREAD) {
    // Overwrite thread
    NewThread = FEX::HLE::_SyscallHandler->TM.CreateThread(0, 0, &Frame->State, GuestArgs->parent_tid,
                                                           FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame));

    NewThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RAX] = 0;
    if (GuestArgs->stack == 0) {
      // Copies in the original thread's stack
    } else {
      NewThread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack;
    }

    // CLONE_PARENT_SETTID, CLONE_CHILD_SETTID, CLONE_CHILD_CLEARTID, CLONE_PIDFD will be handled by kernel
    // Call execution thread directly since we already are on the new thread
    CreatedNewThreadObject = true;
  } else {
    // If we don't have CLONE_THREAD then we are effectively a fork
    // Clear all the other threads that are being tracked
    // Frame->Thread is /ONLY/ safe to access when CLONE_THREAD flag is not set
    // Unlock the mutexes on both sides of the fork
    FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, true);

    ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &CloneArgs->SignalMask, nullptr, sizeof(CloneArgs->SignalMask));

    Thread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RAX] = 0;
    if (GuestArgs->stack == 0) {
      // Copies in the original thread's stack
    } else {
      Thread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = GuestArgs->stack;
    }
  }

  if (CloneArgs->Type == TYPE_CLONE3) {
    // If we are coming from a clone3 handler then we need to adjust RSP.
    Thread->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] += CloneArgs->args.stack_size;
  }

  if (FEX::HLE::_SyscallHandler->Is64BitMode()) {
    if (flags & CLONE_SETTLS) {
      x64::SetThreadArea(NewThread->Thread->CurrentFrame, reinterpret_cast<void*>(GuestArgs->tls));
    }
    // Set us to start just after the syscall instruction
    x64::AdjustRipForNewThread(NewThread->Thread->CurrentFrame);
  } else {
    if (flags & CLONE_SETTLS) {
      x32::SetThreadArea(NewThread->Thread->CurrentFrame, reinterpret_cast<void*>(GuestArgs->tls));
    }
    x32::AdjustRipForNewThread(NewThread->Thread->CurrentFrame);
  }

  // Depending on clone settings, our TID and PID could have changed
  Thread->ThreadInfo.TID = FHU::Syscalls::gettid();
  Thread->ThreadInfo.PID = ::getpid();
  FEX::HLE::_SyscallHandler->FM.UpdatePID(Thread->ThreadInfo.PID);

  if (CreatedNewThreadObject) {
    FEX::HLE::_SyscallHandler->TM.TrackThread(Thread);
  }

  FEX::HLE::_SyscallHandler->RegisterTLSState(Thread);

  // Start exuting the thread directly
  // Our host clone starts in a new stack space, so it can't return back to the JIT space
  CTX->ExecuteThread(Thread->Thread);

  FEX::HLE::_SyscallHandler->UninstallTLSState(Thread);

  // The rest of the context remains as is and the thread will continue executing
  return Thread->StatusCode;
}

static int CloneFork(uint32_t flags, uint64_t exit_signal) {
  return ::syscall(SYSCALL_DEF(clone), (flags & (CLONE_FS | CLONE_FILES)) | exit_signal, nullptr, nullptr, nullptr, nullptr);
}

uint64_t ForkGuest(FEXCore::Core::InternalThreadState* Thread, FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) {
  const uint64_t flags = args->args.flags;
  auto stack = reinterpret_cast<const void*>(args->args.stack);
  const uint64_t stack_size = args->args.stack_size;
  auto parent_tid = reinterpret_cast<pid_t*>(args->args.parent_tid);
  auto child_tid = reinterpret_cast<pid_t*>(args->args.child_tid);
  auto tls = reinterpret_cast<void*>(args->args.tls);
  const uint64_t exit_signal = args->args.exit_signal;

  // Sanity check flags here.
  if (args->Type == TypeOfClone::TYPE_CLONE3) {
    constexpr uint64_t UnsupportedFlags = CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP | CLONE_NEWTIME;
    if (args->args.flags & UnsupportedFlags) {
      LogMan::Msg::EFmt("fork: Unsupported flags passed. {:#x}", args->args.flags & UnsupportedFlags);
    }
  }

  // Just before we fork, we lock all syscall mutexes so that both processes will end up with a locked mutex
  uint64_t Mask {~0ULL};
  ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, &Mask, sizeof(Mask));

  FEX::HLE::_SyscallHandler->LockBeforeFork(Frame->Thread);

  const bool IsVFork = flags & CLONE_VFORK;
  pid_t Result {};
  int VForkFDs[2];
  if (IsVFork) {
    // Use pipes as a mechanism for knowing when the child process is exiting.
    // FEX can't use `waitpid` for this since the child process may want to use it.
    // If we use `waitpid` then the kernel won't return the same data if asked again.
    pipe2(VForkFDs, O_CLOEXEC);

    // XXX: We don't currently support a real `vfork` as it causes problems.
    // Currently behaves like a fork (with wait after the fact), which isn't correct. Need to find where the problem is
    Result = CloneFork(flags, exit_signal);

    if (Result == 0) {
      // Close the read end of the pipe.
      // Keep the write end open so the parent can poll it.
      close(VForkFDs[0]);
    } else {
      // Close the write end of the pipe.
      close(VForkFDs[1]);
    }
  } else {
    Result = CloneFork(flags, exit_signal);
  }
  const bool IsChild = Result == 0;

  if (IsChild) {
    auto ThreadObject = static_cast<FEX::HLE::ThreadStateObject*>(Thread->FrontendPtr);
    // Unlock the mutexes on both sides of the fork
    FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild);

    ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask));

    // Child
    // update the internal TID
    ThreadObject->ThreadInfo.TID = FHU::Syscalls::gettid();
    ThreadObject->ThreadInfo.PID = ::getpid();
    FEX::HLE::_SyscallHandler->FM.UpdatePID(ThreadObject->ThreadInfo.PID);
    ThreadObject->ThreadInfo.clear_child_tid = nullptr;

    // only a  single thread running so no need to remove anything from the thread array

    // Handle child setup now
    if (stack != nullptr) {
      // use specified stack
      Frame->State.gregs[FEXCore::X86State::REG_RSP] = reinterpret_cast<uint64_t>(stack) + stack_size;
    } else {
      // In the case of fork and nullptr stack then the child uses the same stack space as the parent
      // Same virtual address, different addressspace
    }

    if (FEX::HLE::_SyscallHandler->Is64BitMode()) {
      if (flags & CLONE_SETTLS) {
        x64::SetThreadArea(Frame, tls);
      }
    } else {
      // 32bit TLS doesn't just set the fs register
      if (flags & CLONE_SETTLS) {
        x32::SetThreadArea(Frame, tls);
      }
    }

    // Sets the child TID to the pointer in ChildTID
    if (flags & CLONE_CHILD_SETTID) {
      ThreadObject->ThreadInfo.set_child_tid = child_tid;
      *child_tid = ThreadObject->ThreadInfo.TID;
    }

    // When the thread exits, clear the child thread ID at ChildTID
    // Additionally wakeup a futex at that address
    // Address /may/ be changed with SET_TID_ADDRESS syscall
    if (flags & CLONE_CHILD_CLEARTID) {
      ThreadObject->ThreadInfo.clear_child_tid = child_tid;
    }

    // the rest of the context remains as is, this thread will keep executing
    return 0;
  } else {
    if (Result != -1) {
      if (flags & CLONE_PARENT_SETTID) {
        *parent_tid = Result;
      }
    }

    // Unlock the mutexes on both sides of the fork
    FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, IsChild);

    ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &Mask, nullptr, sizeof(Mask));

    // VFork needs the parent to wait for the child to exit.
    if (IsVFork) {
      // Wait for the read end of the pipe to close.
      pollfd PollFD {};
      PollFD.fd = VForkFDs[0];
      PollFD.events = POLLIN | POLLOUT | POLLRDHUP | POLLERR | POLLHUP | POLLNVAL;

      // Mask all signals until the child process returns.
      sigset_t SignalMask {};
      sigfillset(&SignalMask);
      while (ppoll(&PollFD, 1, nullptr, &SignalMask) == -1 && errno == EINTR)
        ;

      // Close the read end now.
      close(VForkFDs[0]);
    }

    // Parent
    SYSCALL_ERRNO();
  }
}

void RegisterThread(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(rt_sigreturn, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(true);
    FEX_UNREACHABLE;
  });

  REGISTER_SYSCALL_IMPL(fork, ([](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
                          FEX::HLE::clone3_args args {.Type = TypeOfClone::TYPE_CLONE2,
                                                      .args = {
                                                        .flags = 0,
                                                        .pidfd = 0,
                                                        .child_tid = 0,
                                                        .parent_tid = 0,
                                                        .exit_signal = SIGCHLD,
                                                        .stack = 0,
                                                        .stack_size = 0,
                                                        .tls = 0,
                                                        .set_tid = 0,
                                                        .set_tid_size = 0,
                                                        .cgroup = 0,
                                                      }};

                          return ForkGuest(Frame->Thread, Frame, &args);
                        }));

  REGISTER_SYSCALL_IMPL(vfork, ([](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
                          FEX::HLE::clone3_args args {.Type = TypeOfClone::TYPE_CLONE2,
                                                      .args = {
                                                        .flags = CLONE_VFORK,
                                                        .pidfd = 0,
                                                        .child_tid = 0,
                                                        .parent_tid = 0,
                                                        .exit_signal = SIGCHLD,
                                                        .stack = 0,
                                                        .stack_size = 0,
                                                        .tls = 0,
                                                        .set_tid = 0,
                                                        .set_tid_size = 0,
                                                        .cgroup = 0,
                                                      }};

                          return ForkGuest(Frame->Thread, Frame, &args);
                        }));

  REGISTER_SYSCALL_IMPL(getpgrp, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    uint64_t Result = ::getpgrp();
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(clone3, ([](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::kernel_clone3_args* cl_args, size_t size) -> uint64_t {
                          FEX::HLE::clone3_args args {};
                          args.Type = TypeOfClone::TYPE_CLONE3;
                          memcpy(&args.args, cl_args, std::min(sizeof(FEX::HLE::kernel_clone3_args), size));
                          return CloneHandler(Frame, &args);
                        }));

  REGISTER_SYSCALL_IMPL(exit, [](FEXCore::Core::CpuStateFrame* Frame, int status) -> uint64_t {
    // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread.
    // Since this thread is hard stopping, we can't track the TLS/DTV teardown in FEX's thread handling.
    FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable();
    auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

    if (ThreadObject->ThreadInfo.clear_child_tid) {
      auto Addr = std::atomic_ref<int32_t>(*ThreadObject->ThreadInfo.clear_child_tid);
      Addr.store(0);
      syscall(SYSCALL_DEF(futex), ThreadObject->ThreadInfo.clear_child_tid, FUTEX_WAKE, ~0ULL, 0, 0, 0);
    }

    ThreadObject->StatusCode = status;

    FEX::HLE::_SyscallHandler->UninstallTLSState(ThreadObject);

    if (ThreadObject->ExecutionThread) {
      // If this is a pthread based execution thread, then there is more work to be done.
      // Delegate final deletion and cleanup to the pthreads Thread management.
      FEX::LinuxEmulation::Threads::LongjumpDeallocateAndExit(ThreadObject, status);
    } else {
      FEX::HLE::_SyscallHandler->TM.DestroyThread(ThreadObject, true);
      FEX::LinuxEmulation::Threads::DeallocateStackObjectAndExit(nullptr, status);
    }
    // This will never be reached
    std::terminate();
  });

  REGISTER_SYSCALL_IMPL(prctl,
                        [](FEXCore::Core::CpuStateFrame* Frame, int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
                           unsigned long arg5) -> uint64_t {
                          uint64_t Result {};
#ifndef PR_GET_AUXV
#define PR_GET_AUXV 0x41555856
#endif
                          switch (option) {
                          case PR_SET_SECCOMP: {
                            uint32_t Operation {};
                            if (arg2 == SECCOMP_MODE_STRICT) Operation = SECCOMP_SET_MODE_STRICT;
                            if (arg2 == SECCOMP_MODE_FILTER) Operation = SECCOMP_SET_MODE_FILTER;

                            return FEX::HLE::_SyscallHandler->SeccompEmulator.Handle(Frame, Operation, 0, reinterpret_cast<void*>(arg3));
                          }
                          case PR_GET_SECCOMP: return FEX::HLE::_SyscallHandler->SeccompEmulator.GetSeccomp(Frame);
                          case PR_GET_AUXV: {
                            if (arg4 || arg5) {
                              return -EINVAL;
                            }

                            void* addr = reinterpret_cast<void*>(arg2);
                            size_t UserSize = reinterpret_cast<size_t>(arg3);

                            const auto auxv = FEX::HLE::_SyscallHandler->GetCodeLoader()->GetAuxv();
                            const auto auxvBase = auxv.address;
                            const auto auxvSize = auxv.size;
                            size_t MinSize = std::min(auxvSize, UserSize);

                            memcpy(addr, reinterpret_cast<void*>(auxvBase), MinSize);

                            // Returns the size of auxv without truncation.
                            return auxvSize;
                          }
                          default: Result = ::prctl(option, arg2, arg3, arg4, arg5); break;
                          }
                          SYSCALL_ERRNO();
                        });

  REGISTER_SYSCALL_IMPL(arch_prctl, [](FEXCore::Core::CpuStateFrame* Frame, int code, unsigned long addr) -> uint64_t {
    uint64_t Result {};
    switch (code) {
    case 0x1001: // ARCH_SET_GS
      if (addr >= SyscallHandler::TASK_MAX_64BIT) {
        // Ignore a non-canonical address
        return -EPERM;
      }
      Frame->State.gs_cached = addr;
      Result = 0;
      break;
    case 0x1002: // ARCH_SET_FS
      if (addr >= SyscallHandler::TASK_MAX_64BIT) {
        // Ignore a non-canonical address
        return -EPERM;
      }
      Frame->State.fs_cached = addr;
      Result = 0;
      break;
    case 0x1003: // ARCH_GET_FS
      *reinterpret_cast<uint64_t*>(addr) = Frame->State.fs_cached;
      Result = 0;
      break;
    case 0x1004: // ARCH_GET_GS
      *reinterpret_cast<uint64_t*>(addr) = Frame->State.gs_cached;
      Result = 0;
      break;
    case 0x3001:        // ARCH_CET_STATUS
      Result = -EINVAL; // We don't support CET, return EINVAL
      break;
    case 0x1011: // ARCH_GET_CPUID
      return 1;
      break;
    case 0x1012:      // ARCH_SET_CPUID
      return -ENODEV; // Claim we don't support faulting on CPUID
      break;
    default:
      LogMan::Msg::EFmt("Unknown prctl: 0x{:x}", code);
      Result = -EINVAL;
      break;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(set_tid_address, [](FEXCore::Core::CpuStateFrame* Frame, int* tidptr) -> uint64_t {
    auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
    ThreadObject->ThreadInfo.clear_child_tid = tidptr;
    return ThreadObject->ThreadInfo.TID;
  });

  REGISTER_SYSCALL_IMPL(exit_group, [](FEXCore::Core::CpuStateFrame* Frame, int status) -> uint64_t {
    Frame->Thread->CTX->FlushAndCloseCodeMap();

    // Save telemetry if we're exiting.
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->SaveTelemetry();
    FEX::HLE::_SyscallHandler->TM.CleanupForExit();

    syscall(SYSCALL_DEF(exit_group), status);
    // This will never be reached
    std::terminate();
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Thread.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#pragma once

#include <cstdint>
#include <sys/types.h>

namespace FEXCore::Context {
class Context;
}

namespace FEXCore::Core {
struct CpuStateFrame;
struct InternalThreadState;
} // namespace FEXCore::Core

namespace FEX::HLE {
struct clone3_args;
struct ThreadStateObject;

FEX::HLE::ThreadStateObject* CreateNewThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args);
uint64_t HandleNewClone(FEX::HLE::ThreadStateObject* Thread, FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame,
                        FEX::HLE::clone3_args* GuestArgs);
uint64_t ForkGuest(FEXCore::Core::InternalThreadState* Thread, FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args);
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls/Timer.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-shared
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <FEXCore/IR/IR.h>

#include <stddef.h>
#include <stdint.h>
#include <signal.h>
#include <sys/time.h>
#include <time.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace FEX::HLE {
void RegisterTimer(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL(alarm, [](FEXCore::Core::CpuStateFrame* Frame, unsigned int seconds) -> uint64_t {
    uint64_t Result = ::alarm(seconds);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL(pause, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    uint64_t Result = ::pause();
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
category: LinuxSyscalls ~ Linux syscall emulation, marshaling and passthrough
tags: LinuxSyscalls|common
desc: Glue logic, brk allocations
$end_info$
*/

#include "CodeLoader.h"

#include "FEXHeaderUtils/StringArgumentParser.h"
#include "Linux/Utils/ELFContainer.h"
#include "Linux/Utils/ELFParser.h"

#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Syscalls/Thread.h"
#include "LinuxSyscalls/Utils/Threads.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x64/Types.h"
#include "Thunks.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/FileLoading.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/sstream.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <algorithm>
#include <alloca.h>
#include <charconv>
#include <functional>
#include <linux/audit.h>
#include <linux/seccomp.h>
#include <memory>
#include <regex>
#include <sched.h>
#include <span>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string.h>
#include <signal.h>
#include <system_error>
#include <syscall.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <unistd.h>

namespace FEX::HLE {
class SignalDelegator;
SyscallHandler* _SyscallHandler {};

template<bool IncrementOffset, typename T>
uint64_t GetDentsEmulation(int fd, T* dirp, uint32_t count) {
  uint64_t Result = syscall(SYSCALL_DEF(getdents64), static_cast<uint64_t>(fd), dirp, static_cast<uint64_t>(count));

  // Now copy back in to the array we were given
  if (Result != -1) {
    // If the outgoing d_ino is smaller than the incoming d_ino from the kernel
    // Then we need to check for overflow before writing any of the data back
    if constexpr (sizeof(decltype(FEX::HLE::x64::linux_dirent_64::d_ino)) > sizeof(decltype(T::d_ino))) {
      uint64_t TmpOffset = 0;
      while (TmpOffset < Result) {
        FEX::HLE::x64::linux_dirent_64* Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast<uint64_t>(dirp) + TmpOffset);
        decltype(T::d_ino) Result_d_ino = Tmp->d_ino;

        if (Result_d_ino != Tmp->d_ino) {
          // The resulting d_ino truncated, return error
          return -EOVERFLOW;
        }
        TmpOffset += Tmp->d_reclen;
      }
    }

    uint64_t Offset = 0;
    uint64_t TmpOffset = 0;
    size_t OffsetIndex = 1;
    // With how the emulation occurs we will always return a smaller buffer than what was given to us.
    // We need to be careful with the in-place translation that occurs here, the data returning to the guest is guaranteed to be smaller
    // than the data returned by getdents64.
    // This means FEX is guaranteed to /never/ fill the full getdents buffer to the guest, but we may temporarily use it all.
    while (TmpOffset < Result) {
      T* Outgoing = (T*)(reinterpret_cast<uint64_t>(dirp) + Offset);
      FEX::HLE::x64::linux_dirent_64* Tmp = (FEX::HLE::x64::linux_dirent_64*)(reinterpret_cast<uint64_t>(dirp) + TmpOffset);

      if (!Tmp->d_reclen) {
        break;
      }

      size_t NewRecLen = FEXCore::AlignUp(Tmp->d_reclen - (sizeof(std::remove_reference<decltype(*Tmp)>::type) - sizeof(*Outgoing)),
                                          alignof(decltype(Tmp->d_ino)));
      Outgoing->d_ino = Tmp->d_ino;

      // 32-bit getdents can't safely handle d_off
      // A safe way of emulating this is to just use an incrementing offset from 1
      Outgoing->d_off = IncrementOffset ? OffsetIndex : Tmp->d_off;
      size_t OffsetOfName = offsetof(std::remove_reference<decltype(*Tmp)>::type, d_name);
      Outgoing->d_reclen = NewRecLen;

      // Copies null character as well
      size_t NameLength = Tmp->d_reclen - OffsetOfName - 1;
      memmove(Outgoing->d_name, Tmp->d_name, NameLength);

      // Copy the hidden d_type flag
      Outgoing->d_name[Outgoing->d_reclen - offsetof(T, d_name) - 1] = Tmp->d_type;

      TmpOffset += Tmp->d_reclen;

      if (FEX::HLE::_SyscallHandler->FM.IsProtectedFile(fd, Outgoing->d_ino)) {
        continue;
      }

      // Outgoing is 5 bytes smaller
      Offset += NewRecLen;

      ++OffsetIndex;
    }
    Result = Offset;
  }
  SYSCALL_ERRNO();
}

template uint64_t GetDentsEmulation<false>(int, FEX::HLE::x64::linux_dirent*, uint32_t);

template uint64_t GetDentsEmulation<true>(int, FEX::HLE::x32::linux_dirent_32*, uint32_t);

static fextl::string GetShebangInterpFile(std::span<char> Data) {
  // File isn't large enough to even contain a shebang.
  if (Data.size() <= 2) {
    return {};
  }

  // Handle shebang files.
  if (Data[0] == '#' && Data[1] == '!') {
    fextl::string InterpreterLine {Data.begin() + 2, // strip off "#!" prefix
                                   std::find(Data.begin(), Data.end(), '\n')};
    fextl::vector<std::string_view> ShebangArguments = FHU::ParseArgumentsFromString(InterpreterLine);

    // Executable argument
    fextl::string ShebangProgram(ShebangArguments[0]);

    // If the filename is absolute then prepend the rootfs
    // If it is relative then don't append the rootfs
    if (ShebangProgram[0] == '/') {
      ShebangProgram = FEX::HLE::_SyscallHandler->RootFSPath() + ShebangProgram;
    }

    if (FHU::Filesystem::Exists(ShebangProgram)) {
      return ShebangProgram;
    }
  }

  return {};
}

static fextl::string GetShebangInterpFD(int FD) {
  // We don't know the state of the FD coming in since this might be a guest tracked FD.
  // Need to be extra careful here not to adjust file offsets and status flags.
  //
  // Can't use dup since that makes the FD have the same file description backing both FDs.

  // The maximum length of the shebang line is `#!` + 255 chars
  std::array<char, 257> Header;
  const auto ChunkSize = 257l;
  const auto ReadSize = pread(FD, Header.data(), ChunkSize, 0);

  return GetShebangInterpFile(std::span<char>(Header.data(), ReadSize));
}

static fextl::string GetShebangInterpFilename(const fextl::string& Filename) {
  // Open the Filename to determine if it is a shebang file.
  int FD = open(Filename.c_str(), O_RDONLY | O_CLOEXEC);
  if (FD == -1) {
    return {};
  }

  auto Interp = GetShebangInterpFD(FD);
  close(FD);
  return Interp;
}

uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* const* argv, char* const* envp, ExecveAtArgs Args) {
  auto SyscallHandler = FEX::HLE::_SyscallHandler;
  Frame->Thread->CTX->FlushAndCloseCodeMap();

  fextl::string Filename {};

  fextl::string RootFS = SyscallHandler->RootFSPath();
  ELFLoader::ELFContainer::ELFType Type {};
  ELFLoader::ELFContainer::ELFType InterpreterType {};

  // AT_EMPTY_PATH is only used if the pathname is empty.
  const bool IsFDExec = (Args.flags & AT_EMPTY_PATH) && strlen(pathname) == 0;
  fextl::string FDExecEnv;
  fextl::string FDSeccompEnv;

  fextl::string ShebangInterpreter {};

  if (IsFDExec) {
    Type = ELFLoader::ELFContainer::GetELFType(Args.dirfd);

    ShebangInterpreter = GetShebangInterpFD(Args.dirfd);
  } else {
    // For absolute paths, check the rootfs first (if available)
    if (pathname[0] == '/') {
      auto Path = SyscallHandler->FM.GetEmulatedPath(pathname, true);
      if (!Path.empty() && FHU::Filesystem::Exists(Path)) {
        Filename = std::move(Path);
      } else {
        Filename = pathname;
      }
    } else {
      Filename = pathname;
    }

    bool exists = FHU::Filesystem::Exists(Filename);
    if (!exists) {
      return -ENOENT;
    }

    int pid = getpid();

    char PidSelfPath[50];
    snprintf(PidSelfPath, 50, "/proc/%i/exe", pid);

    if (strcmp(pathname, "/proc/self/exe") == 0 || strcmp(pathname, "/proc/thread-self/exe") == 0 || strcmp(pathname, PidSelfPath) == 0) {
      // If the application is trying to execve `/proc/self/exe` or its variants,
      // then we need to redirect this path to the true application path.
      // This is because this path is a symlink to the executing application, which is always `FEX`.
      // ex: JRE and shapez.io do this self-execution.
      Filename = SyscallHandler->Filename();
    }

    Type = ELFLoader::ELFContainer::GetELFType(Filename);

    ShebangInterpreter = GetShebangInterpFilename(Filename);
  }

  const bool IsShebang = !ShebangInterpreter.empty();
  if (IsShebang) {
    InterpreterType = ELFLoader::ELFContainer::GetELFType(ShebangInterpreter);
  }

  if (!IsShebang && Type == ELFLoader::ELFContainer::ELFType::TYPE_NONE) {
    // If our interpeter doesn't support this file format AND ELF format is NONE then ENOEXEC
    // binfmt_misc could end up handling this case but we can't know that without parsing binfmt_misc ourselves
    // Return -ENOEXEC until proven otherwise
    return -ENOEXEC;
  }

  fextl::vector<const char*> EnvpArgs {};
  char* const* EnvpPtr = envp;
  bool FDExecCopy {};

  auto SeccompFD = SyscallHandler->SeccompEmulator.SerializeFilters(Frame);
  const auto HasSeccomp = SeccompFD.has_value() && *SeccompFD != -1;

  auto CloseSeccompFD = [&HasSeccomp, &SeccompFD]() {
    if (HasSeccomp) {
      close(*SeccompFD);
    }
  };

  auto CloseFDExecFD = [&FDExecCopy, &Args]() {
    if (FDExecCopy) {
      close(Args.dirfd);
    }
  };

  // If we don't have the interpreter installed we need to be extra careful for ENOEXEC
  // Reasoning is that if we try executing a file from FEXLoader then this process loses the ENOEXEC flag
  // Kernel does its own checks for file format support for this
  // We can only call execve directly if we both have an interpreter installed AND were ran with the interpreter
  // If the user ran FEX through FEXLoader then we must go down the emulated path
  uint64_t Result {};

  // In some cases the FD passed in to execveat needs to be copied.
  const bool NeedsFDCopy = [&]() {
    // No need for FD copy when not using FD.
    if (!IsFDExec) {
      return false;
    }

    if (SyscallHandler->IsHostKernelVersionAtLeast(999, 0, 0)) {
      // Older kernel versions have a bug with the combination of binfmt_misc and anonymous file FDs that set CLOEXEC.
      return false;
    }

    int Flags = fcntl(Args.dirfd, F_GETFD);
    if (!(Flags & FD_CLOEXEC)) {
      // No need for FD copy if FD_CLOEXEC isn't set.
      return false;
    }

    return true;
  }();

  // If the FEX interpreter is installed then just execve the ELF file
  // This will stay inside of our emulated environment since binfmt_misc will capture it
  const bool IsBinfmtCompatible = SyscallHandler->IsInterpreterInstalled() && !NeedsFDCopy &&
                                  (Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_32 || Type == ELFLoader::ELFContainer::ELFType::TYPE_X86_64);

  // We are trying to execute an ELF of a different architecture
  // We can't know if we can support this without architecture specific checks and binfmt_misc parsing
  // Just execve it and let the kernel handle the process
  const bool IsOtherELF = Type == ELFLoader::ELFContainer::ELFType::TYPE_OTHER_ELF;

  // Need to copy over envp variables if we are appending data.
  // Only situation in which an envp copy needs to occur is if we are doing an FD execveat and binfmt_misc can't handle it.
  // Additional tasks that require envp copying in the future:
  // - seccomp inheritance
  // - FEXServer FD inheritance (unshare(CLONE_NEWNET))
  // - FD_CLOEXEC set on FD on anonymous file FD.
  const bool NeedsEnvpCopy = (IsFDExec && !(IsBinfmtCompatible || IsOtherELF)) || HasSeccomp || NeedsFDCopy;

  // We are trying to execute a shebang handled by a different architecture interpreter (e.g. /usr/bin/python from the host FS).
  // In this case we just defer to the kernel.
  const bool IsForeignShebang = (IsShebang && InterpreterType == ELFLoader::ELFContainer::ELFType::TYPE_OTHER_ELF);

  if (NeedsEnvpCopy) {
    if (envp) {
      auto OldEnvp = envp;
      while (*OldEnvp) {
        ///< Copy the pointers to our own vector of environment variables.
        EnvpArgs.emplace_back(*OldEnvp);
        ++OldEnvp;
      }
    }

    if (!IsBinfmtCompatible || NeedsFDCopy) {
      if (NeedsFDCopy) {
        // FEX needs the FD to live past execve when binfmt_misc isn't used,
        // so duplicate the FD if FD_CLOEXEC is set, which removes the FD_CLOEXEC flag.
        Args.dirfd = dup(Args.dirfd);
        FDExecCopy = true;
      }

      // Remove AT_EMPTY_PATH flag now.
      // We need to emulate this flag with `FEX_EXECVEFD` environment variable.
      // If we passed this flag through to the real `execveat` then the target FD wouldn't get emulated by FEX.
      Args.flags &= ~AT_EMPTY_PATH;

      // Create the environment variable to pass the FD to our FEX.
      // Needs to stick around until execveat completes.
      FDExecEnv = fextl::fmt::format("FEX_EXECVEFD={}", Args.dirfd);

      // Insert the FD for FEX to track.
      EnvpArgs.emplace_back(FDExecEnv.data());
    }

    if (HasSeccomp) {
      // Create the environment variable to pass the FD to our FEX.
      // Needs to stick around until execveat completes.
      FDSeccompEnv = fextl::fmt::format("FEX_SECCOMPFD={}", *SeccompFD);

      // Insert the FD for FEX to track.
      EnvpArgs.emplace_back(FDSeccompEnv.data());
    }

    // Emplace nullptr at the end to stop
    EnvpArgs.emplace_back(nullptr);

    ///< Set the EnvpPtr to our copy.
    EnvpPtr = const_cast<char* const*>(EnvpArgs.data());
  }

  if (!IsFDExec && (IsForeignShebang || IsOtherELF || !IsBinfmtCompatible)) {
    // With a merged RootFS, the entire real filesystem is visible through the rootfs
    // prefix. If we are executing a non-emulated binary, we should do so through the host
    // path.

    auto Path = SyscallHandler->FM.GetHostPath(Filename, true);
    if (!Path.empty() && FHU::Filesystem::Exists(Path)) {
      Filename = std::move(Path);
    }
  }

  if (IsBinfmtCompatible || IsOtherELF || IsForeignShebang) {
    Result = ::syscall(SYS_execveat, Args.dirfd, Filename.c_str(), argv, EnvpPtr, Args.flags);
    CloseSeccompFD();
    CloseFDExecFD();
    SYSCALL_ERRNO();
  }

  // If we are executing an emulated interpreter shebang file through the loader,
  // we need to strip the RootFS prefix. The loader will pass this filename to the
  // interpreter as-is, which will access it using RootFS redirection.
  // Note that unlike above, the prefix is stripped unconditionally (AliasedOnly=false),
  // and the script path need not exist in the host.
  if (IsShebang) {
    auto Path = SyscallHandler->FM.GetHostPath(Filename, false);
    if (!Path.empty()) {
      Filename = std::move(Path);
    }
  }

  // We don't have an interpreter installed or we are executing a non-ELF executable
  // We now need to munge the arguments
  const char NullString[] = "";
  fextl::vector<const char*> ExecveArgs = SyscallHandler->GetCodeLoader()->GetExecveArguments();

  if (argv) {
    // Overwrite the filename with the new one we are redirecting to
    ExecveArgs.emplace_back(Filename.c_str());

    auto OldArgv = argv;

    // It is valid to provide nullptr first argument.
    if (*OldArgv) {
      // Skip filename argument
      ++OldArgv;
      while (*OldArgv) {
        // Append the arguments together
        ExecveArgs.emplace_back(*OldArgv);
        ++OldArgv;
      }
    } else {
      // Linux kernel will stick an empty argument in to the argv list if none are provided.
      ExecveArgs.emplace_back(NullString);
    }

    // Emplace nullptr at the end to stop
    ExecveArgs.emplace_back(nullptr);
  }

  Result = ::syscall(SYS_execveat, Args.dirfd, "/proc/self/exe", const_cast<char* const*>(ExecveArgs.data()), EnvpPtr, Args.flags);
  CloseSeccompFD();
  CloseFDExecFD();

  SYSCALL_ERRNO();
}

static bool AnyFlagsSet(uint64_t Flags, uint64_t Mask) {
  return (Flags & Mask) != 0;
}

static bool AllFlagsSet(uint64_t Flags, uint64_t Mask) {
  return (Flags & Mask) == Mask;
}

struct StackFrameData {
  FEX::HLE::ThreadStateObject* Thread {};
  FEXCore::Context::Context* CTX {};
  FEXCore::Core::CpuStateFrame NewFrame {};
  FEX::HLE::clone3_args GuestArgs {};
};

struct StackFramePlusRet {
  uint64_t Ret;
  StackFrameData Data;
  uint64_t Pad;
};

[[noreturn]]
static void CloneBody(StackFrameData* Data, bool NeedsDataFree) {
  uint64_t Result = FEX::HLE::HandleNewClone(Data->Thread, Data->CTX, &Data->NewFrame, &Data->GuestArgs);
  auto Stack = Data->GuestArgs.NewStack;
  if (NeedsDataFree) {
    FEXCore::Allocator::free(Data);
  }

  FEX::LinuxEmulation::Threads::DeallocateStackObjectAndExit(Stack, Result);
  FEX_UNREACHABLE;
}

[[noreturn]]
static void Clone3HandlerRet() {
  StackFrameData* Data = (StackFrameData*)alloca(0);
  CloneBody(Data, false);
}

static int Clone2HandlerRet(void* arg) {
  StackFrameData* Data = (StackFrameData*)arg;
  CloneBody(Data, true);
}

// Clone3 flags
#ifndef CLONE_CLEAR_SIGHAND
#define CLONE_CLEAR_SIGHAND 0x100000000ULL
#endif
#ifndef CLONE_INTO_CGROUP
#define CLONE_INTO_CGROUP 0x200000000ULL
#endif
#ifndef CLONE_NEWTIME
// Overlaps CSIGNAL, can only be used with clone3 and not clone2
#define CLONE_NEWTIME 0x00000080ULL
#endif

static void PrintFlags(uint64_t Flags) {
#define FLAGPRINT(x, y) \
  if (Flags & (y)) LogMan::Msg::IFmt("\tFlag: " #x)
  FLAGPRINT(CSIGNAL, 0x000000FF);
  FLAGPRINT(CLONE_VM, 0x00000100);
  FLAGPRINT(CLONE_FS, 0x00000200);
  FLAGPRINT(CLONE_FILES, 0x00000400);
  FLAGPRINT(CLONE_SIGHAND, 0x00000800);
  FLAGPRINT(CLONE_PTRACE, 0x00002000);
  FLAGPRINT(CLONE_VFORK, 0x00004000);
  FLAGPRINT(CLONE_PARENT, 0x00008000);
  FLAGPRINT(CLONE_THREAD, 0x00010000);
  FLAGPRINT(CLONE_NEWNS, 0x00020000);
  FLAGPRINT(CLONE_SYSVSEM, 0x00040000);
  FLAGPRINT(CLONE_SETTLS, 0x00080000);
  FLAGPRINT(CLONE_PARENT_SETTID, 0x00100000);
  FLAGPRINT(CLONE_CHILD_CLEARTID, 0x00200000);
  FLAGPRINT(CLONE_DETACHED, 0x00400000);
  FLAGPRINT(CLONE_UNTRACED, 0x00800000);
  FLAGPRINT(CLONE_CHILD_SETTID, 0x01000000);
  FLAGPRINT(CLONE_NEWCGROUP, 0x02000000);
  FLAGPRINT(CLONE_NEWUTS, 0x04000000);
  FLAGPRINT(CLONE_NEWIPC, 0x08000000);
  FLAGPRINT(CLONE_NEWUSER, 0x10000000);
  FLAGPRINT(CLONE_NEWPID, 0x20000000);
  FLAGPRINT(CLONE_NEWNET, 0x40000000);
  FLAGPRINT(CLONE_IO, 0x80000000);
  FLAGPRINT(CLONE_PIDFD, 0x00001000);
#undef FLAGPRINT
};

static uint64_t Clone2Handler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) {
  StackFrameData* Data = (StackFrameData*)FEXCore::Allocator::malloc(sizeof(StackFrameData));
  Data->Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  Data->CTX = Frame->Thread->CTX;
  Data->GuestArgs = *args;

  // Create a copy of the parent frame
  memcpy(&Data->NewFrame, Frame, sizeof(FEXCore::Core::CpuStateFrame));

  // Remove flags that will break us
  constexpr uint64_t INVALID_FOR_HOST = CLONE_SETTLS;
  uint64_t Flags = (args->args.flags & ~INVALID_FOR_HOST) | args->args.exit_signal;
  uint64_t Result = ::clone(Clone2HandlerRet,                                    // To be called function
                            (void*)((uint64_t)args->NewStack + args->StackSize), // Stack
                            Flags,                                               // Flags
                            Data,                                                // Argument
                            (pid_t*)args->args.parent_tid,                       // parent_tid
                            0,                                                   // XXX: What is correct for this? tls
                            (pid_t*)args->args.child_tid);                       // child_tid

  // Only parent will get here
  SYSCALL_ERRNO();
}

static uint64_t Clone3Handler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) {
  constexpr size_t Offset = sizeof(StackFramePlusRet);
  StackFramePlusRet* Data = (StackFramePlusRet*)(reinterpret_cast<uint64_t>(args->NewStack) + args->StackSize - Offset);
  Data->Ret = (uint64_t)Clone3HandlerRet;
  Data->Data.Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
  Data->Data.CTX = Frame->Thread->CTX;
  Data->Data.GuestArgs = *args;

  FEX::HLE::kernel_clone3_args HostArgs {};
  HostArgs.flags = args->args.flags;
  HostArgs.pidfd = args->args.pidfd;
  HostArgs.child_tid = args->args.child_tid;
  HostArgs.parent_tid = args->args.parent_tid;
  HostArgs.exit_signal = args->args.exit_signal;
  // Host stack is always created
  HostArgs.stack = reinterpret_cast<uint64_t>(args->NewStack);
  HostArgs.stack_size = args->StackSize - Offset; // Needs to be 16 byte aligned
  HostArgs.tls = 0;                               // XXX: What is correct for this?
  HostArgs.set_tid = args->args.set_tid;
  HostArgs.set_tid_size = args->args.set_tid_size;
  HostArgs.cgroup = args->args.cgroup;

  // Create a copy of the parent frame
  memcpy(&Data->Data.NewFrame, Frame, sizeof(FEXCore::Core::CpuStateFrame));
  uint64_t Result = ::syscall(SYSCALL_DEF(clone3), &HostArgs, sizeof(HostArgs));

  // Only parent will get here
  SYSCALL_ERRNO();
};

uint64_t CloneHandler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args) {
  uint64_t flags = args->args.flags;

  if (flags & CLONE_CLEAR_SIGHAND) {
    // CLONE_CLEAR_SIGHAND was added in kernel 5.5. FEX doesn't properly support this.
    // glibc started using this flag in 2.38 as an optimization for posix_spawn.
    // If clone returns EINVAL or ENOSYS then it will fallback to the non-optimized path.
    LogMan::Msg::IFmt("CLONE_CLEAR_SIGHAND passed to clone3. Returning EINVAL.");
    return -EINVAL;
  }

  auto HasUnhandledFlags = [](FEX::HLE::clone3_args* args) -> bool {
    constexpr uint64_t UNHANDLED_FLAGS = CLONE_NEWNS |
                                         // CLONE_UNTRACED |
                                         CLONE_NEWCGROUP | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWUSER | CLONE_NEWPID | CLONE_NEWNET |
                                         CLONE_IO | CLONE_CLEAR_SIGHAND | CLONE_INTO_CGROUP;

    if ((args->args.flags & UNHANDLED_FLAGS) != 0) {
      // Basic unhandled flags
      return true;
    }

    if (args->args.set_tid_size > 0) {
      // set_tid isn't exposed through anything other than clone3
      return true;
    }

    if (args->Type == TypeOfClone::TYPE_CLONE3) {
      if (AnyFlagsSet(args->args.flags, CLONE_NEWTIME)) {
        // New time namespace overlaps with CSIGNAL, only available in clone3
        return true;
      }
    }

    if (AnyFlagsSet(args->args.flags, CLONE_THREAD)) {
      if (!AllFlagsSet(args->args.flags, CLONE_SYSVSEM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)) {
        LogMan::Msg::IFmt("clone: CLONE_THREAD: Unsupported flags w/ CLONE_THREAD (Shared Resources), {:X}", args->args.flags);
        return false;
      }
    } else {
      if (AnyFlagsSet(args->args.flags, CLONE_SYSVSEM | CLONE_SIGHAND | CLONE_VM)) {
        // CLONE_VM is particularly nasty here
        // Memory regions at the point of clone(More similar to a fork) are shared
        LogMan::Msg::IFmt("clone: Unsupported flags w/o CLONE_THREAD (Shared Resources), {:X}", args->args.flags);
        return false;
      }
    }

    // We support everything here
    return false;
  };

  // If there are flags that can't be handled regularly then we need to hand off to the true clone handler
  if (HasUnhandledFlags(args)) {
    if (!AnyFlagsSet(flags, CLONE_THREAD)) {
      // Has an unsupported flag
      // Fall to a handler that can handle this case

      args->SignalMask = ~0ULL;
      ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->SignalMask, &args->SignalMask, sizeof(args->SignalMask));

      // Need to create a stack for the host thread.
      // LockBeforeFork grabs the allocator mutex to block allocations temporarily, so this must be allocated before
      args->StackSize = FEX::LinuxEmulation::Threads::STACK_SIZE;
      args->NewStack = FEX::LinuxEmulation::Threads::AllocateStackObject();

      FEX::HLE::_SyscallHandler->LockBeforeFork(Frame->Thread);

      uint64_t Result {};
      if (args->Type == TYPE_CLONE2) {
        Result = Clone2Handler(Frame, args);
      } else {
        Result = Clone3Handler(Frame, args);
      }

      if (Result != 0) {
        // Parent
        // Unlock the mutexes on both sides of the fork
        FEX::HLE::_SyscallHandler->UnlockAfterFork(Frame->Thread, false);

        ::syscall(SYS_rt_sigprocmask, SIG_SETMASK, &args->SignalMask, nullptr, sizeof(args->SignalMask));
      }
      return Result;
    } else {
      LogMan::Msg::IFmt("Unsupported flag with CLONE_THREAD. This breaks TLS, falling down classic thread path");
      PrintFlags(flags);
    }
  }

  constexpr uint64_t TASK_MAX = (1ULL << 48); // 48-bits until we can query the host side VA sanely. AArch64 doesn't expose this in cpuinfo
  if (args->args.tls && args->args.tls >= TASK_MAX) {
    return -EPERM;
  }

  auto Thread = Frame->Thread;

  if (AnyFlagsSet(flags, CLONE_PTRACE)) {
    PrintFlags(flags);
    LogMan::Msg::DFmt("clone: Ptrace* not supported");
  }

  if (!(flags & CLONE_THREAD)) {
    // CLONE_PARENT is ignored (Implied by CLONE_THREAD)
    return FEX::HLE::ForkGuest(Thread, Frame, args);
  } else {
    auto NewThread = FEX::HLE::CreateNewThread(Thread->CTX, Frame, args);

    // Return the new threads TID
    uint64_t Result = NewThread->ThreadInfo.TID;

    if (flags & CLONE_VFORK) {
      // If VFORK is set then the calling process is suspended until the thread exits with execve or exit
      NewThread->ExecutionThread->join(nullptr);

      // Normally a thread cleans itself up on exit. But because we need to join, we are now responsible
      FEX::HLE::_SyscallHandler->TM.DestroyThread(NewThread);
    }

    SYSCALL_ERRNO();
  }
};

uint64_t SyscallHandler::HandleBRK(FEXCore::Core::CpuStateFrame* Frame, void* Addr) {
  std::lock_guard<std::mutex> lk(MMapMutex);

  uint64_t Result;

  if (Addr == nullptr) { // Just wants to get the location of the program break atm
    Result = DataSpace + DataSpaceSize;
  } else {
    // Allocating out data space
    uint64_t NewEnd = reinterpret_cast<uint64_t>(Addr);
    if (NewEnd < DataSpace) {
      // Not allowed to move brk end below original start
      // Set the size to zero
      DataSpaceSize = 0;

      // Munmap the whole space.
      [[maybe_unused]] auto ok = GuestMunmap(Frame->Thread, reinterpret_cast<void*>(DataSpace), DataSpaceMappedSize);
      LOGMAN_THROW_A_FMT(ok != -1, "Munmap failed");
      DataSpaceMappedSize = 0;
    } else {
      uint64_t NewSize = NewEnd - DataSpace;
      uint64_t NewSizeAligned = FEXCore::AlignUp(NewSize, FEXCore::Utils::FEX_PAGE_SIZE);

      if (NewSizeAligned < DataSpaceMappedSize) {
        // If we are shrinking the brk then munmap the ranges
        // That way we gain the memory back and also give the application zero pages if it allocates again
        // DataspaceMaxSize is always page aligned

        uint64_t RemainingSize = DataSpaceMappedSize - NewSizeAligned;
        // We have pages we can unmap
        auto ok = GuestMunmap(Frame->Thread, reinterpret_cast<void*>(DataSpace + NewSizeAligned), RemainingSize);
        LOGMAN_THROW_A_FMT(ok != -1, "Munmap failed");

        DataSpaceMappedSize = NewSizeAligned;
      } else if (NewSize > DataSpaceMappedSize) {
        uint64_t AllocateNewSize = FEXCore::AlignUp(NewSize, FEXCore::Utils::FEX_PAGE_SIZE) - DataSpaceMappedSize;
        if (!Is64BitMode() && (DataSpace + DataSpaceMappedSize + AllocateNewSize > 0x1'0000'0000ULL)) {
          // If we are 32bit and we tried going about the 32bit limit then out of memory
          return DataSpace + DataSpaceSize;
        }

        uint64_t NewBRK {};
        NewBRK = (uint64_t)GuestMmap(Frame->Thread, (void*)(DataSpace + DataSpaceMappedSize), AllocateNewSize, PROT_READ | PROT_WRITE,
                                     MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

        if (FEX::HLE::HasSyscallError(NewBRK)) {
          // If we couldn't allocate a new region then out of memory
          return DataSpace + DataSpaceSize;
        } else {
          // Increase our BRK size
          DataSpaceMappedSize += AllocateNewSize;
        }
      }

      DataSpaceSize = NewSize;
    }
    Result = DataSpace + DataSpaceSize;
  }
  return Result;
}

void SyscallHandler::DefaultProgramBreak(uint64_t Base, uint64_t Size) {
  DataSpace = Base;

  // The frontend passes this a full 8MB of SBRK space that is mapped PROT_READ | PROT_WRITE.
  // This ensures there is some free space in front of brk, but isn't required to be reserved.
  // Unmap it now to ensure other allocations can be put in the intersecting range.
  [[maybe_unused]] auto ok = GuestMunmap(nullptr, reinterpret_cast<void*>(DataSpace), Size);
  LOGMAN_THROW_A_FMT(ok != -1, "Munmap failed");
  DataSpaceMappedSize = 0;
}

SyscallHandler::SyscallHandler(FEXCore::Context::Context* _CTX, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler)
  : TM {_CTX, _SignalDelegation}
  , SeccompEmulator {this, _SignalDelegation}
  , FM {_CTX}
  , CTX {_CTX}
  , SignalDelegation {_SignalDelegation}
  , ThunkHandler {ThunkHandler} {
  FEX::HLE::_SyscallHandler = this;
  HostKernelVersion = CalculateHostKernelVersion();
  GuestKernelVersion = CalculateGuestKernelVersion();
  Alloc32Handler = FEX::HLE::Create32BitAllocator();

  SignalDelegation->RegisterHostSignalHandler(SIGSEGV, HandleSegfault, true);

  ExtendedMetaData = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(FEXCore::Config::Get_EXTENDEDVOLATILEMETADATA()());
}

SyscallHandler::~SyscallHandler() {
  FEXCore::Allocator::munmap(reinterpret_cast<void*>(DataSpace), DataSpaceMappedSize);
}

uint32_t SyscallHandler::CalculateHostKernelVersion() {
  struct utsname buf {};
  if (uname(&buf) == -1) {
    return 0;
  }

  uint32_t Major {};
  uint32_t Minor {};
  uint32_t Patch {};

  // Parse kernel version in the form of `<Major>.<Minor>.<Patch>[Optional Data]`
  const auto End = buf.release + sizeof(buf.release);
  auto Results = std::from_chars(buf.release, End, Major, 10);
  Results = std::from_chars(Results.ptr + 1, End, Minor, 10);
  Results = std::from_chars(Results.ptr + 1, End, Patch, 10);

  return (Major << 24) | (Minor << 16) | Patch;
}

uint32_t SyscallHandler::CalculateGuestKernelVersion() {
  // We currently only emulate a kernel between the ranges of Kernel 5.15.0 and 6.11.0
  return std::max(KernelVersion(5, 15), std::min(KernelVersion(6, 11), GetHostKernelVersion()));
}

uint64_t SyscallHandler::HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) {
  // Grab the return address which will be inside the JIT.
  const uint64_t JITPC = reinterpret_cast<uint64_t>(__builtin_extract_return_addr(__builtin_return_address(0)));

  const auto SeccompResult = SeccompEmulator.ExecuteFilter(Frame, JITPC, Args);

  if (SeccompResult.EarlyReturn) {
    return SeccompResult.Result;
  }

  if (Args->Argument[0] >= Definitions.size()) {
    return -ENOSYS;
  }

  auto& Def = Definitions[Args->Argument[0]];
  uint64_t Result {};
  switch (Def.NumArgs) {
  case 0: Result = std::invoke(Def.Ptr0, Frame); break;
  case 1: Result = std::invoke(Def.Ptr1, Frame, Args->Argument[1]); break;
  case 2: Result = std::invoke(Def.Ptr2, Frame, Args->Argument[1], Args->Argument[2]); break;
  case 3: Result = std::invoke(Def.Ptr3, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3]); break;
  case 4: Result = std::invoke(Def.Ptr4, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4]); break;
  case 5:
    Result = std::invoke(Def.Ptr5, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5]);
    break;
  case 6:
    Result = std::invoke(Def.Ptr6, Frame, Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5],
                         Args->Argument[6]);
    break;
  // for missing syscalls
  case 255: return std::invoke(Def.Ptr1, Frame, Args->Argument[0]);
  default:
    LOGMAN_MSG_A_FMT("Unhandled syscall: {}", Args->Argument[0]);
    return -1;
    break;
  }
#ifdef DEBUG_STRACE
  Strace(Args, Result);
#endif
  return Result;
}

#ifdef DEBUG_STRACE
void SyscallHandler::Strace(FEXCore::HLE::SyscallArguments* Args, uint64_t Ret) {
  auto& Def = Definitions[Args->Argument[0]];
  switch (Def.NumArgs) {
  case 0: LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Ret); break;
  case 1: LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Ret); break;
  case 2: LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Ret); break;
  case 3: LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Ret); break;
  case 4: LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Ret); break;
  case 5:
    LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5], Ret);
    break;
  case 6:
    LogMan::Msg::DFmt(Def.StraceFmt.c_str(), Args->Argument[1], Args->Argument[2], Args->Argument[3], Args->Argument[4], Args->Argument[5],
                      Args->Argument[6], Ret);
    break;
  default: break;
  }
}
#endif

uint64_t UnimplementedSyscall(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber) {
  ERROR_AND_DIE_FMT("Unhandled system call: {}", SyscallNumber);
  return -ENOSYS;
}

uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber) {
  return -ENOSYS;
}

void SyscallHandler::LockBeforeFork(FEXCore::Core::InternalThreadState* Thread) {
  TM.LockBeforeFork();
  Thread->CTX->LockBeforeFork(Thread);
  VMATracking.Mutex.lock();
}

void SyscallHandler::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) {
  if (Child) {
    // Code maps are closed upon fork in the child
    FM.SetProtectedCodeMapFD(-1);

    VMATracking.Mutex.StealAndDropActiveLocks();
  } else {
    VMATracking.Mutex.unlock();
  }

  CTX->UnlockAfterFork(LiveThread, Child);

  // Clear all the other threads that are being tracked
  TM.UnlockAfterFork(LiveThread, Child);
}

void SyscallHandler::RegisterTLSState(FEX::HLE::ThreadStateObject* Thread) {
  SignalDelegation->RegisterTLSState(Thread);
  ThunkHandler->RegisterTLSState(Thread);
}

void SyscallHandler::UninstallTLSState(FEX::HLE::ThreadStateObject* Thread) {
  SignalDelegation->UninstallTLSState(Thread);
}

static bool isHEX(char c) {
  return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
}

fextl::unique_ptr<FEXCore::HLE::SourcecodeMap> SyscallHandler::GenerateMap(std::string_view GuestBinaryFile, std::string_view GuestBinaryFileId) {
  ELFParser GuestELF;

  if (!GuestELF.ReadElf(fextl::string(GuestBinaryFile))) {
    LogMan::Msg::DFmt("GenerateMap: '{}' is not an elf file?", GuestBinaryFile);
    return {};
  }

  struct stat GuestBinaryFileStat;

  if (stat(GuestBinaryFile.data(), &GuestBinaryFileStat)) {
    LogMan::Msg::DFmt("GenerateMap: failed to stat '{}'", GuestBinaryFile);
    return {};
  }

  const auto FexSrcPath = fextl::fmt::format("{}/fexsrc", FEXCore::Config::GetDataDirectory());

  if (!FHU::Filesystem::CreateDirectories(FexSrcPath)) {
    LogMan::Msg::DFmt("GenerateMap: failed to create_directories '{}'", FexSrcPath);
    return {};
  }

  auto GuestSourceFile = fextl::fmt::format("{}/{}.src", FexSrcPath, GuestBinaryFileId);

  struct stat GuestSourceFileStat;

  if (stat(GuestSourceFile.data(), &GuestSourceFileStat) != 0 || GuestBinaryFileStat.st_mtime > GuestSourceFileStat.st_mtime) {
    LogMan::Msg::DFmt("GenerateMap: Generating source for '{}'", GuestBinaryFile);
    auto command = fextl::fmt::format("x86_64-linux-gnu-objdump -SC \'{}\' > '{}'", GuestBinaryFile, GuestSourceFile);
    if (system(command.c_str()) != 0) {
      LogMan::Msg::DFmt("GenerateMap: '{}' failed", command);
      return {};
    }
  }

  const auto GuestIndexFile = fextl::fmt::format("{}/{}.idx", FexSrcPath, GuestBinaryFileId);
  struct stat GuestIndexFileStat;

  bool GenerateIndex = stat(GuestIndexFile.data(), &GuestIndexFileStat) != 0 || GuestSourceFileStat.st_mtime > GuestIndexFileStat.st_mtime;

  constexpr char SrcHeaderString[] = "fexsrcindex0";
  if (!GenerateIndex) {
    // Index file de-serialization
    LogMan::Msg::DFmt("GenerateMap: Reading index '{}'", GuestIndexFile);

    int FD = ::open(GuestIndexFile.c_str(), O_RDONLY | O_CLOEXEC);

    if (FD == -1) {
      LogMan::Msg::DFmt("GenerateMap: Failed to open '{}'", GuestIndexFile);
      goto DoGenerate;
    }

    //"fexsrcindex0"
    char filemagic[12];
    ::read(FD, filemagic, sizeof(filemagic));
    if (memcmp(filemagic, SrcHeaderString, sizeof(filemagic)) != 0) {
      LogMan::Msg::DFmt("GenerateMap: '{}' has invalid magic '{}'", GuestIndexFile, filemagic);
      close(FD);
      goto DoGenerate;
    }

    auto rv = fextl::make_unique<FEXCore::HLE::SourcecodeMap>();

    {
      auto len = rv->SourceFile.size();
      ::read(FD, (char*)&len, sizeof(len));
      rv->SourceFile.resize(len);
      ::read(FD, rv->SourceFile.data(), len);
    }

    {
      auto len = rv->SortedLineMappings.size();

      ::read(FD, (char*)&len, sizeof(len));

      rv->SortedLineMappings.resize(len);

      for (auto& Mapping : rv->SortedLineMappings) {
        ::read(FD, (char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin));
        ::read(FD, (char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd));
        ::read(FD, (char*)&Mapping.LineNumber, sizeof(Mapping.LineNumber));
      }
    }

    {
      auto len = rv->SortedSymbolMappings.size();

      ::read(FD, (char*)&len, sizeof(len));

      rv->SortedSymbolMappings.resize(len);

      for (auto& Mapping : rv->SortedSymbolMappings) {
        ::read(FD, (char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin));
        ::read(FD, (char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd));

        {
          auto len = Mapping.Name.size();
          ::read(FD, (char*)&len, sizeof(len));
          Mapping.Name.resize(len);
          ::read(FD, Mapping.Name.data(), len);
        }
      }
    }

    LogMan::Msg::DFmt("GenerateMap: Finished reading index");
    close(FD);
    return rv;
  } else {
// objdump output parsing,  index generation, index file serialization
DoGenerate:
    LogMan::Msg::DFmt("GenerateMap: Generating index for '{}'", GuestSourceFile);

    fextl::string SourceData;
    if (!FEXCore::FileLoading::LoadFile(SourceData, GuestSourceFile)) {
      LogMan::Msg::DFmt("GenerateMap: Failed to open '{}'", GuestSourceFile);
      return {};
    }
    fextl::istringstream Stream(SourceData);

    constexpr int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO;
    int IndexStream = ::open(GuestIndexFile.c_str(), O_CREAT | O_WRONLY | O_APPEND | O_CLOEXEC, USER_PERMS);

    if (IndexStream == -1) {
      LogMan::Msg::DFmt("GenerateMap: Failed to open '{}' for writing", GuestIndexFile);
      return {};
    }

    ::write(IndexStream, SrcHeaderString, strlen(SrcHeaderString));

    // objdump parsing
    fextl::string Line;
    int LineNum = 0;

    bool PreviousLineWasEmpty = false;

    uintptr_t LastSymbolOffset {};
    uintptr_t CurrentSymbolOffset {};
    fextl::string LastSymbolName;

    uintptr_t LastOffset {};
    uintptr_t CurrentOffset {};
    int LastOffsetLine;

    auto rv = fextl::make_unique<FEXCore::HLE::SourcecodeMap>();

    rv->SourceFile = std::move(GuestSourceFile);

    auto EndSymbol = [&] {
      if (LastSymbolOffset) {
        rv->SortedSymbolMappings.push_back({LastSymbolOffset, CurrentSymbolOffset, LastSymbolName});

        // LogMan::Msg::DFmt("Ended Symbol {} - {:x}...{:x}", LastSymbolName, LastSymbolOffset, CurrentSymbolOffset);
      }
      LastSymbolOffset = {};
    };

    auto EndLine = [&] {
      if (LastOffset) {
        rv->SortedLineMappings.push_back({LastOffset, CurrentOffset, LastOffsetLine});

        // LogMan::Msg::DFmt("Ended Line {} - {:x}...{:x}", LastOffsetLine, LastOffset, CurrentOffset);
      }
      LastOffset = {};
    };

    while (std::getline(Stream, Line)) {
      LineNum++;

      auto LineIsEmpty = Line.empty();

      if (LineIsEmpty) {
        PreviousLineWasEmpty = true;
      } else {

        // LogMan::Msg::DFmt("Line: '{}'", Line);

        if (isHEX(Line[0])) {
          fextl::string addr;
          int offs = 1;
          for (; offs < Line.size() && !isspace(Line[offs]); offs++)
            ;

          if (offs == Line.size()) {
            continue;
          }
          if (offs != 8 && offs != 16) {
            continue;
          }

          auto VAOffset = std::strtoul(Line.substr(0, offs).c_str(), nullptr, 16);

          auto FileOffset = GuestELF.VAToFile(VAOffset);

          if (FileOffset == 0) {
            LogMan::Msg::EFmt("File Offset {:x} did not map to file?! {}", VAOffset, Line);
          }

          CurrentSymbolOffset = FileOffset;

          if (PreviousLineWasEmpty) {
            EndSymbol();
          }
          LastSymbolOffset = CurrentSymbolOffset;

          for (; offs < Line.size() && Line[offs] != '<'; offs++)
            ;

          if (offs == Line.size()) {
            continue;
          }

          offs++;

          LastSymbolName = Line.substr(offs, Line.size() - 2 - offs);

          // LogMan::Msg::DFmt("Symbol {} @ {:x} -> Line {}", LastSymbolName, LastSymbolOffset, LineNum);
        } else if (isspace(Line[0])) {
          int offs = 1;
          for (; offs < Line.size() && isspace(Line[offs]); offs++)
            ;

          if (offs == Line.size()) {
            continue;
          }

          int start = offs;

          for (; offs < Line.size() && Line[offs] != ':'; offs++)
            ;

          if (offs == Line.size()) {
            continue;
          }

          if (Line[offs + 1] == '\t') {
            auto VAOffsetStr = Line.substr(start, offs - start);
            auto VAOffset = std::strtoul(VAOffsetStr.c_str(), nullptr, 16);
            auto FileOffset = GuestELF.VAToFile(VAOffset);
            if (FileOffset == 0) {
              LogMan::Msg::EFmt("File Offset {:x} did not map to file?! {}", VAOffset, Line);
            } else {
              if (LastOffset > FileOffset) {
                LogMan::Msg::EFmt("File Offset {:x} less than previous {:} ?!  {}", FileOffset, LastOffset, Line);
              }
              CurrentOffset = FileOffset;

              EndLine();

              LastOffset = CurrentOffset;
              LastOffsetLine = LineNum;
            }
          }
        }
        // something else -- keep going
      }
    }

    CurrentOffset = LastOffset + 4;
    CurrentSymbolOffset = CurrentOffset;

    EndSymbol();
    EndLine();

    // Index post processing - entires are sorted for faster lookups

    std::sort(rv->SortedLineMappings.begin(), rv->SortedLineMappings.end(),
              [](const auto& lhs, const auto& rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; });

    std::sort(rv->SortedSymbolMappings.begin(), rv->SortedSymbolMappings.end(),
              [](const auto& lhs, const auto& rhs) { return lhs.FileGuestEnd <= rhs.FileGuestBegin; });

    // Index serialization
    {
      auto len = rv->SourceFile.size();
      ::write(IndexStream, (const char*)&len, sizeof(len));
      ::write(IndexStream, rv->SourceFile.c_str(), len);
    }

    {
      auto len = rv->SortedLineMappings.size();

      ::write(IndexStream, (const char*)&len, sizeof(len));

      for (const auto& Mapping : rv->SortedLineMappings) {
        ::write(IndexStream, (const char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin));
        ::write(IndexStream, (const char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd));
        ::write(IndexStream, (const char*)&Mapping.LineNumber, sizeof(Mapping.LineNumber));
      }
    }

    {
      auto len = rv->SortedSymbolMappings.size();

      ::write(IndexStream, (char*)&len, sizeof(len));

      for (const auto& Mapping : rv->SortedSymbolMappings) {
        ::write(IndexStream, (const char*)&Mapping.FileGuestBegin, sizeof(Mapping.FileGuestBegin));
        ::write(IndexStream, (const char*)&Mapping.FileGuestEnd, sizeof(Mapping.FileGuestEnd));

        {
          auto len = Mapping.Name.size();
          ::write(IndexStream, (const char*)&len, sizeof(len));
          ::write(IndexStream, Mapping.Name.c_str(), len);
        }
      }
    }

    if (IndexStream != -1) {
      close(IndexStream);
    }

    LogMan::Msg::DFmt("GenerateMap: Finished generating index", GuestIndexFile);
    return rv;
  }
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Syscalls.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|common
desc: Glue logic, STRACE magic
$end_info$
*/

#pragma once

#include "Common/VolatileMetadata.h"
#include "LinuxSyscalls/FileManagement.h"
#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/ThreadManager.h"
#include "LinuxSyscalls/Seccomp/SeccompEmulator.h"
#include "LinuxSyscalls/SyscallsVMATracking.h"
#include "ArchHelpers/MContext.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Thunks.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/HLE/SourcecodeResolver.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/functional.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>

#include <mutex>
#include <shared_mutex>

#include <errno.h>
#include <fcntl.h>
#include <stdint.h>
#include <type_traits>
#include <list>
#ifdef ARCHITECTURE_x86_64
#define SYSCALL_ARCH_NAME x64
#elif ARCHITECTURE_arm64
#include "LinuxSyscalls/Arm64/SyscallsEnum.h"
#define SYSCALL_ARCH_NAME Arm64
#endif

#include "LinuxSyscalls/x64/SyscallsEnum.h"
#include "LinuxSyscalls/x32/SyscallsEnum.h"

#define CONCAT_(a, b) a##b
#define CONCAT(a, b) CONCAT_(a, b)
#define SYSCALL_DEF(name) (HLE::SYSCALL_ARCH_NAME::CONCAT(CONCAT(SYSCALL_, SYSCALL_ARCH_NAME), _##name))

// #define DEBUG_STRACE

namespace FEX {
class CodeLoader;
}

namespace FEXCore {
namespace Context {
  class Context;
}
namespace Core {
  struct CpuStateFrame;
}
} // namespace FEXCore

namespace FEX::HLE {

class SyscallHandler;
class SignalDelegator;
class ThunkHandler;

void RegisterEpoll(FEX::HLE::SyscallHandler* Handler);
void RegisterFD(FEX::HLE::SyscallHandler* Handler);
void RegisterFS(FEX::HLE::SyscallHandler* Handler);
void RegisterInfo(FEX::HLE::SyscallHandler* Handler);
void RegisterIO(FEX::HLE::SyscallHandler* Handler);
void RegisterMemory(FEX::HLE::SyscallHandler* Handler);
void RegisterSignals(FEX::HLE::SyscallHandler* Handler);
void RegisterThread(FEX::HLE::SyscallHandler* Handler);
void RegisterTimer(FEX::HLE::SyscallHandler* Handler);
void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler);
void RegisterStubs(FEX::HLE::SyscallHandler* Handler);

uint64_t UnimplementedSyscall(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber);
uint64_t UnimplementedSyscallSafe(FEXCore::Core::CpuStateFrame* Frame, uint64_t SyscallNumber);

struct ExecveAtArgs {
  int dirfd;
  int flags;
  static ExecveAtArgs Empty() {
    return ExecveAtArgs {
      .dirfd = AT_FDCWD,
      .flags = 0,
    };
  }
};

uint64_t ExecveHandler(FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* const* argv, char* const* envp, ExecveAtArgs Args);

class SyscallMmapInterface {
public:
  // does a mmap as if done via a guest syscall
  virtual void* GuestMmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset) = 0;

  // does a guest munmap as if done via a guest syscall
  virtual uint64_t GuestMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) = 0;
};

class SyscallHandler : public FEXCore::HLE::SyscallHandler,
                       public SyscallMmapInterface,
                       FEXCore::HLE::SourcecodeResolver,
                       public FEXCore::CodeMapOpener,
                       public FEXCore::Allocator::FEXAllocOperators {
public:
  ThreadManager TM;
  FEX::HLE::SeccompEmulator SeccompEmulator;

  virtual ~SyscallHandler();

  // In the case that the syscall doesn't hit the optimized path then we still need to go here
  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) final override;

  void DefaultProgramBreak(uint64_t Base, uint64_t Size);
  void DeserializeSeccompFD(FEX::HLE::ThreadStateObject* Thread, int FD) {
    if (FD == -1) {
      return;
    }
    SeccompEmulator.DeserializeFilters(Thread->Thread->CurrentFrame, FD);
  }

  using SyscallPtrArg0 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame);
  using SyscallPtrArg1 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t);
  using SyscallPtrArg2 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t, uint64_t);
  using SyscallPtrArg3 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t, uint64_t, uint64_t);
  using SyscallPtrArg4 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t, uint64_t, uint64_t, uint64_t);
  using SyscallPtrArg5 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
  using SyscallPtrArg6 = uint64_t (*)(FEXCore::Core::CpuStateFrame* Frame, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);

  struct SyscallFunctionDefinition {
    union {
      void* Ptr;
      SyscallPtrArg0 Ptr0;
      SyscallPtrArg1 Ptr1;
      SyscallPtrArg2 Ptr2;
      SyscallPtrArg3 Ptr3;
      SyscallPtrArg4 Ptr4;
      SyscallPtrArg5 Ptr5;
      SyscallPtrArg6 Ptr6;
    };
    uint8_t NumArgs;
#ifdef DEBUG_STRACE
    fextl::string StraceFmt;
#endif
  };

  const SyscallFunctionDefinition* GetDefinition(uint64_t Syscall) {
    return &Definitions.at(Syscall);
  }

  virtual void RegisterSyscall_32(int SyscallNumber,
#ifdef DEBUG_STRACE
                                  const fextl::string& TraceFormatString,
#endif
                                  void* SyscallHandler, int ArgumentCount) {
  }

  virtual void RegisterSyscall_64(int SyscallNumber,
#ifdef DEBUG_STRACE
                                  const fextl::string& TraceFormatString,
#endif
                                  void* SyscallHandler, int ArgumentCount) {
  }

  uint64_t HandleBRK(FEXCore::Core::CpuStateFrame* Frame, void* Addr);

  FEX::HLE::FileManager FM;
  FEX::CodeLoader* GetCodeLoader() const {
    return LocalLoader;
  }
  void SetCodeLoader(FEX::CodeLoader* Loader) {
    LocalLoader = Loader;
  }
  FEX::HLE::SignalDelegator* GetSignalDelegator() {
    return SignalDelegation;
  }

  FEX::HLE::ThunkHandler* GetThunkHandler() {
    return ThunkHandler;
  }

  FEX_CONFIG_OPT(IsInterpreterInstalled, INTERPRETER_INSTALLED);
  FEX_CONFIG_OPT(Filename, APP_FILENAME);
  FEX_CONFIG_OPT(RootFSPath, ROOTFS);
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  FEX_CONFIG_OPT(SMCChecks, SMCCHECKS);
  FEX_CONFIG_OPT(NeedsSeccomp, NEEDSSECCOMP);
  FEX_CONFIG_OPT(EnableCodeCaching, ENABLECODECACHINGWIP);

  uint32_t GetHostKernelVersion() const {
    return HostKernelVersion;
  }
  uint32_t GetGuestKernelVersion() const {
    return GuestKernelVersion;
  }

  bool IsHostKernelVersionAtLeast(uint32_t Major, uint32_t Minor = 0, uint32_t Patch = 0) const {
    return GetHostKernelVersion() >= KernelVersion(Major, Minor, Patch);
  }

  static uint32_t CalculateHostKernelVersion();
  uint32_t CalculateGuestKernelVersion();

  static uint32_t KernelVersion(uint32_t Major, uint32_t Minor = 0, uint32_t Patch = 0) {
    return (Major << 24) | (Minor << 16) | Patch;
  }

  static uint32_t KernelMajor(uint32_t Version) {
    return Version >> 24;
  }
  static uint32_t KernelMinor(uint32_t Version) {
    return (Version >> 16) & 0xFF;
  }
  static uint32_t KernelPatch(uint32_t Version) {
    return Version & 0xFFFF;
  }

  virtual FEX::HLE::MemAllocator* Get32BitAllocator() {
    return Alloc32Handler.get();
  }

  // does a mmap as if done via a guest syscall
  void* GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset);
  using SyscallMmapInterface::GuestMmap;

  // does a guest munmap as if done via a guest syscall
  uint64_t GuestMunmap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length);
  using SyscallMmapInterface::GuestMunmap;

  uint64_t GuestMremap(bool Is64Bit, FEXCore::Core::InternalThreadState*, void* old_address, size_t old_size, size_t new_size, int flags,
                       void* new_address);
  uint64_t GuestMprotect(FEXCore::Core::InternalThreadState*, void* addr, size_t len, int prot);
  uint64_t GuestShmat(bool Is64Bit, FEXCore::Core::InternalThreadState*, int shmid, const void* shmaddr, int shmflg);
  uint64_t GuestShmdt(bool Is64Bit, FEXCore::Core::InternalThreadState*, const void* shmaddr);

  ///// Memory Manager tracking /////
  struct LateApplyExtendedVolatileMetadata {
    fextl::set<uint64_t> VolatileInstructions {};
    FEXCore::IntervalList<uint64_t> VolatileValidRanges {};
  };
  std::optional<LateApplyExtendedVolatileMetadata> TrackMmap(FEXCore::Core::InternalThreadState* Thread, uint64_t addr, size_t length,
                                                             int prot, int flags, int fd, off_t offset,
                                                             std::optional<FEXCore::ExecutableFileSectionInfo>& CachedSection);
  void TrackMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length);
  void TrackMremap(FEXCore::Core::InternalThreadState* Thread, uint64_t OldAddress, size_t OldSize, size_t NewSize, int flags, uint64_t NewAddress);
  void TrackShmat(FEXCore::Core::InternalThreadState* Thread, int shmid, uint64_t shmaddr, int shmflg, uint64_t Length);
  uint64_t TrackShmdt(FEXCore::Core::InternalThreadState* Thread, uint64_t shmaddr);
  void TrackMprotect(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t len, int prot);
  void TrackMadvise(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int advice);

  void InvalidateCodeRangeIfNecessary(FEXCore::Core::InternalThreadState* Thread, uint64_t Base, uint64_t Length) {
    if (SMCChecks != FEXCore::Config::CONFIG_SMC_NONE) {
      TM.InvalidateGuestCodeRange(Thread, Base, Length);
    }
  }

  void InvalidateCodeRangeIfNecessaryOnRemap(FEXCore::Core::InternalThreadState* Thread, uint64_t OldAddress, uint64_t NewAddress,
                                             size_t OldSize, size_t NewSize) {
    if (SMCChecks != FEXCore::Config::CONFIG_SMC_NONE) {
      if (OldAddress != NewAddress) {
        if (OldSize != 0) {
          // This also handles the MREMAP_DONTUNMAP case
          TM.InvalidateGuestCodeRange(Thread, OldAddress, OldSize);
        }
      } else {
        // If mapping shrunk, flush the unmapped region
        if (OldSize > NewSize) {
          TM.InvalidateGuestCodeRange(Thread, OldAddress + NewSize, OldSize - NewSize);
        }
      }
    }
  }


  ///// VMA (Virtual Memory Area) tracking /////
  static bool HandleSegfault(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext);
  void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override;
  void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override;
  std::optional<FEXCore::ExecutableFileSectionInfo>
  LookupExecutableFileSection(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) final override;

  int OpenCodeMapFile() override;

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override;

  ///// FORK tracking /////
  void LockBeforeFork(FEXCore::Core::InternalThreadState* Thread);
  void UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child);

  void RegisterTLSState(FEX::HLE::ThreadStateObject* Thread);
  void UninstallTLSState(FEX::HLE::ThreadStateObject* Thread);

  SourcecodeResolver* GetSourcecodeResolver() override {
    return this;
  }

  void SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame) override {
    TM.SleepThread(CTX, Frame);
  }

  bool NeedXIDCheck() const {
    return NeedToCheckXID;
  }
  void DisableXIDCheck() {
    NeedToCheckXID = false;
  }

  constexpr static uint64_t TASK_MAX_64BIT = (1ULL << 48);
  constexpr static size_t MAX_LDT_ENTRIES = 8192;
  constexpr static size_t LDT_ENTRY_SIZE = sizeof(FEXCore::Core::CPUState::gdt_segment);

  VMATracking::VMATracking VMATracking;

  const uint64_t CodeCacheConfigId = 0; // TODO: Make unique to active configuration

  uint64_t read_ldt(FEXCore::Core::CpuStateFrame* Frame, void* ptr, unsigned long bytecount);
  uint64_t write_ldt(FEXCore::Core::CpuStateFrame* Frame, void* ptr, unsigned long bytecount, bool legacy);

protected:
  SyscallHandler(FEXCore::Context::Context* _CTX, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler);

  fextl::vector<SyscallFunctionDefinition> Definitions {std::max<std::size_t>(FEX::HLE::x64::SYSCALL_x64_MAX, FEX::HLE::x32::SYSCALL_x86_MAX),
                                                        {
                                                          .Ptr = reinterpret_cast<void*>(&UnimplementedSyscall),
                                                          .NumArgs = 255,
                                                        }};
  std::mutex MMapMutex;

  // BRK management
  uint64_t DataSpace {};
  uint64_t DataSpaceSize {};
  uint64_t DataSpaceMappedSize {};

  // (Major << 24) | (Minor << 16) | Patch
  uint32_t HostKernelVersion {};
  uint32_t GuestKernelVersion {};

  FEXCore::Context::Context* CTX;

private:
  FEX::HLE::SignalDelegator* SignalDelegation;
  FEX::HLE::ThunkHandler* ThunkHandler;

  fextl::unordered_map<fextl::string, FEX::VolatileMetadata::ExtendedVolatileMetadata> ExtendedMetaData {};

  std::mutex FutexMutex;
  std::mutex SyscallMutex;
  FEX::CodeLoader* LocalLoader {};
  bool NeedToCheckXID {true};

#ifdef DEBUG_STRACE
  void Strace(FEXCore::HLE::SyscallArguments* Args, uint64_t Ret);
#endif
  fextl::unique_ptr<FEXCore::HLE::SourcecodeMap> GenerateMap(std::string_view GuestBinaryFile, std::string_view GuestBinaryFileId) override;

  fextl::unique_ptr<FEX::HLE::MemAllocator> Alloc32Handler {};
  std::atomic<uint64_t> AnonSharedId {1};
};

#define SYSCALL_ERRNO()              \
  do {                               \
    if (Result == -1) return -errno; \
    return Result;                   \
  } while (0)
#define SYSCALL_ERRNO_NULL()        \
  do {                              \
    if (Result == 0) return -errno; \
    return Result;                  \
  } while (0)

extern FEX::HLE::SyscallHandler* _SyscallHandler;

#ifdef DEBUG_STRACE
//////
/// Templates to map parameters to format string for syscalls
//////

template<typename T>
struct ArgToFmtString;

#define ARG_TO_STR(tpy, str)                      \
  template<>                                      \
  struct FEX::HLE::ArgToFmtString<tpy> {          \
    inline static const char* const Format = str; \
  };

// Base types
ARG_TO_STR(int, "{}")
ARG_TO_STR(unsigned int, "{}")
ARG_TO_STR(long, "{}")
ARG_TO_STR(unsigned long, "{}")

// string types
ARG_TO_STR(char*, "{}")
ARG_TO_STR(const char*, "{}")

// Pointers
template<typename T>
struct ArgToFmtString<T*> {
  inline static const char* const Format = "{:x}";
};

// Use ArgToFmtString and variadic template to create a format string from an args list
template<typename... Args>
fextl::string CollectArgsFmtString() {
  std::array<const char*, sizeof...(Args)> array = {ArgToFmtString<Args>::Format...};
  return fextl::fmt::format("{}", fmt::join(array, ", "));
}
#else
#define ARG_TO_STR(tpy, str)
#endif

struct open_how {
  uint64_t flags;
  uint64_t mode;
  uint64_t resolve;
};

struct kernel_clone3_args {
  uint64_t flags;
  uint64_t pidfd;
  uint64_t child_tid;
  uint64_t parent_tid;
  uint64_t exit_signal;
  uint64_t stack;
  uint64_t stack_size;
  uint64_t tls;
  uint64_t set_tid;
  uint64_t set_tid_size;
  uint64_t cgroup;
};

enum TypeOfClone {
  TYPE_CLONE2,
  TYPE_CLONE3,
};

struct clone3_args {
  TypeOfClone Type;
  uint64_t SignalMask;

  uint64_t StackSize;
  void* NewStack;

  kernel_clone3_args args;
};

uint64_t CloneHandler(FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::clone3_args* args);

inline static int RemapFromX86Flags(int flags) {
#ifdef ARCHITECTURE_x86_64
  // Nothing to change here
#elif ARCHITECTURE_arm64
  constexpr int X86_64_FLAG_O_DIRECT = 040000;
  constexpr int X86_64_FLAG_O_LARGEFILE = 0100000;
  constexpr int X86_64_FLAG_O_DIRECTORY = 0200000;
  constexpr int X86_64_FLAG_O_NOFOLLOW = 0400000;

  constexpr int AARCH64_FLAG_O_DIRECTORY = 040000;
  constexpr int AARCH64_FLAG_O_NOFOLLOW = 0100000;
  constexpr int AARCH64_FLAG_O_DIRECT = 0200000;
  constexpr int AARCH64_FLAG_O_LARGEFILE = 0400000;

  int new_flags {};
  if (flags & X86_64_FLAG_O_DIRECT) {
    flags = (flags & ~X86_64_FLAG_O_DIRECT);
    new_flags |= AARCH64_FLAG_O_DIRECT;
  }
  if (flags & X86_64_FLAG_O_LARGEFILE) {
    flags = (flags & ~X86_64_FLAG_O_LARGEFILE);
    new_flags |= AARCH64_FLAG_O_LARGEFILE;
  }
  if (flags & X86_64_FLAG_O_DIRECTORY) {
    flags = (flags & ~X86_64_FLAG_O_DIRECTORY);
    new_flags |= AARCH64_FLAG_O_DIRECTORY;
  }
  if (flags & X86_64_FLAG_O_NOFOLLOW) {
    flags = (flags & ~X86_64_FLAG_O_NOFOLLOW);
    new_flags |= AARCH64_FLAG_O_NOFOLLOW;
  }
  flags |= new_flags;
#else
#error Unknown flag remappings for this host platform
#endif
  return flags;
}

inline static int RemapToX86Flags(int flags) {
#ifdef ARCHITECTURE_x86_64
  // Nothing to change here
#elif ARCHITECTURE_arm64
  constexpr int X86_64_FLAG_O_DIRECT = 040000;
  constexpr int X86_64_FLAG_O_LARGEFILE = 0100000;
  constexpr int X86_64_FLAG_O_DIRECTORY = 0200000;
  constexpr int X86_64_FLAG_O_NOFOLLOW = 0400000;

  constexpr int AARCH64_FLAG_O_DIRECTORY = 040000;
  constexpr int AARCH64_FLAG_O_NOFOLLOW = 0100000;
  constexpr int AARCH64_FLAG_O_DIRECT = 0200000;
  constexpr int AARCH64_FLAG_O_LARGEFILE = 0400000;

  int new_flags {};
  if (flags & AARCH64_FLAG_O_DIRECT) {
    flags = (flags & ~AARCH64_FLAG_O_DIRECT);
    new_flags |= X86_64_FLAG_O_DIRECT;
  }
  if (flags & AARCH64_FLAG_O_LARGEFILE) {
    flags = (flags & ~AARCH64_FLAG_O_LARGEFILE);
    new_flags |= X86_64_FLAG_O_LARGEFILE;
  }
  if (flags & AARCH64_FLAG_O_DIRECTORY) {
    flags = (flags & ~AARCH64_FLAG_O_DIRECTORY);
    new_flags |= X86_64_FLAG_O_DIRECTORY;
  }
  if (flags & AARCH64_FLAG_O_NOFOLLOW) {
    flags = (flags & ~AARCH64_FLAG_O_NOFOLLOW);
    new_flags |= X86_64_FLAG_O_NOFOLLOW;
  }
  flags |= new_flags;
#else
#error Unknown flag remappings for this host platform
#endif
  return flags;
}

/**
 * @brief Checks raw syscall return for error
 *
 * This should only be used with raw syscall usage
 *
 * This should not be used with glibc wrapped syscall functions
 *   - This includes the glibc ::syscall(...) function
 *   - This is due to glibc already wrapping the return and setting errno
 *
 * This function should not be used with UAPI breaking syscall results
 * ioctl specifically will break this convention.
 *
 * @param Result The raw syscall return
 *
 * @return If the result was an error result
 */

[[maybe_unused]]
static bool HasSyscallError(uint64_t Result) {
  // MAX_ERRNO is part of the Linux Syscall ABI
  // Redefined here since it doesn't exist as a visible define in the UAPI headers
  constexpr uint64_t MAX_ERRNO = 0xFFFF'FFFF'FFFF'0001ULL;
  // Raw syscalls are guaranteed to not return a valid result in the range of [-4095, -1]
  // In cases where FEX needs to use raw syscalls, this helper checks for this idiom
  return reinterpret_cast<uint64_t>(Result) >= MAX_ERRNO;
}

[[maybe_unused]]
static bool HasSyscallError(const void* Result) {
  return HasSyscallError(reinterpret_cast<uintptr_t>(Result));
}

template<bool IncrementOffset, typename T>
uint64_t GetDentsEmulation(int fd, T* dirp, uint32_t count);

namespace FaultSafeUserMemAccess {
  // These are little helper functions for cases when FEX needs to copy data to or from the application in a robust fashion.
  // CopyFromUser and CopyToUser are memcpy routines that expect to safely SIGSEGV when reading or writing application memory respectively.
  // Returns zero if the memcpy completed, or crashes with SIGABRT and a log message if it faults.
  [[nodiscard]]
  size_t CopyFromUser(void* Dest, const void* Src, size_t Size);
  [[nodiscard]]
  size_t CopyToUser(void* Dest, const void* Src, size_t Size);
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED && defined(ARCHITECTURE_arm64)
  // These helpers just check if the user pointer is readable and writable.
  // This is useful in an assert build that can be safely sprinkled through the syscall handler without overhead in release builds.
  void VerifyIsReadable(const void* Src, size_t Size);
  void VerifyIsReadableOrNull(const void* Src, size_t Size);
  void VerifyIsWritable(void* Src, size_t Size);
  void VerifyIsWritableOrNull(void* Src, size_t Size);

  // Iterates a null-terminated string and checks if all bytes are readable
  void VerifyIsStringReadable(const char* Src);

  // Iterates a null-terminated string and checks if all bytes are readable. Up to MaxSize bytes are checked.
  void VerifyIsStringReadableMaxSize(const char* Src, size_t MaxSize);
#else
  inline void VerifyIsReadable(const void* Src, size_t Size) {
    if (Src == nullptr) {
      ERROR_AND_DIE_FMT("Unexpected nullptr syscall argument");
    }
  }
  inline void VerifyIsReadableOrNull(const void* Src, size_t Size) {}
  inline void VerifyIsWritable(void* Src, size_t Size) {
    if (Src == nullptr) {
      ERROR_AND_DIE_FMT("Unexpected nullptr syscall argument");
    }
  }
  inline void VerifyIsWritableOrNull(void* Src, size_t Size) {}
  inline void VerifyIsStringReadable(const char* Src) {
    if (Src == nullptr) {
      ERROR_AND_DIE_FMT("Unexpected nullptr syscall argument");
    }
  }
  inline void VerifyIsStringReadableMaxSize(const char* Src, size_t MaxSize) {
    if (Src == nullptr) {
      ERROR_AND_DIE_FMT("Unexpected nullptr syscall argument");
    }
  }
#endif
  bool IsFaultLocation(uint64_t PC);

  static inline bool TryHandleSafeFault(int Signal, const siginfo_t& SigInfo, void* UContext) {
    if (Signal == SIGSEGV && (SigInfo.si_code == SEGV_MAPERR || SigInfo.si_code == SEGV_ACCERR) &&
        FaultSafeUserMemAccess::IsFaultLocation(ArchHelpers::Context::GetPc(UContext))) {
      // Return from the subroutine, returning EFAULT.
      ArchHelpers::Context::SetArmReg(UContext, 0, EFAULT);
      ArchHelpers::Context::SetPc(UContext, ArchHelpers::Context::GetArmReg(UContext, 30));
      return true;
    }

    return false;
  }
} // namespace FaultSafeUserMemAccess


template<typename T>
inline static uint64_t futimesat_compat(int dirfd, const char* pathname, const T times[2]) {
  FaultSafeUserMemAccess::VerifyIsReadableOrNull(times, sizeof(*times) * 2);

  timespec tvs[2] {};
  timespec* tv_ptr {};
  if (times) {
    constexpr int64_t ONE_SECOND_AS_USEC = 1'000'000LL;

    // Incoming microsecond time must not be negative or be larger than one second.
    if (times[0].tv_usec < 0 || times[1].tv_usec < 0 || times[0].tv_usec >= ONE_SECOND_AS_USEC || times[1].tv_usec >= ONE_SECOND_AS_USEC) {
      return -EINVAL;
    }

    tvs[0].tv_sec = times[0].tv_sec;
    tvs[0].tv_nsec = 1000LL * times[0].tv_usec;
    tvs[1].tv_sec = times[1].tv_sec;
    tvs[1].tv_nsec = 1000LL * times[1].tv_usec;
    tv_ptr = tvs;
  }

  uint64_t Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, tv_ptr, 0);
  SYSCALL_ERRNO();
}

} // namespace FEX::HLE

// Registers syscall for both 32bit and 64bit
#define REGISTER_SYSCALL_IMPL(name, lambda)                                                      \
  do {                                                                                           \
    FEX::HLE::x64::RegisterSyscall(Handler, FEX::HLE::x64::SYSCALL_x64_##name, #name, (lambda)); \
    FEX::HLE::x32::RegisterSyscall(Handler, FEX::HLE::x32::SYSCALL_x86_##name, #name, (lambda)); \
  } while (false)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsSMCTracking.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
category: LinuxSyscalls ~ Linux syscall emulation, marshaling and passthrough
tags: LinuxSyscalls|common
desc: SMC/MMan Tracking
$end_info$
*/

#include <Common/Config.h>
#include "Common/FDUtils.h"
#include "Common/FEXServerClient.h"
#include "Common/FileMappingBaseAddress.h"

#include <filesystem>
#include <sys/file.h>
#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/shm.h>

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXHeaderUtils/Filesystem.h>
#include <Linux/Utils/ELFParser.h>

namespace FEX::HLE {
// SMC interactions
bool SyscallHandler::HandleSegfault(FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) {
  const auto FaultAddress = (uintptr_t)((siginfo_t*)info)->si_addr;

  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
  auto CallRetStackInfo = ThreadObject->GetCallRetStackInfo();
  if (FaultAddress >= CallRetStackInfo.AllocationBase && FaultAddress < CallRetStackInfo.AllocationEnd) {
    // Reset REG_CALLRET_SP to the default location to allow for underflows/overflows
    ArchHelpers::Context::SetArmReg(ucontext, 25, CallRetStackInfo.DefaultLocation);
    return true;
  }

  {
    // Can't use the deferred signal lock in the SIGSEGV handler.
    auto lk = FEXCore::MaskSignalsAndLockMutex<std::shared_lock>(_SyscallHandler->VMATracking.Mutex);

    auto VMATracking = &_SyscallHandler->VMATracking;

    // If the write spans two pages, they will be flushed one at a time (generating two faults)
    auto Entry = VMATracking->FindVMAEntry(FaultAddress);

    // If an untracked address, or the mapping wasn't writable, it can't be handled here
    if (Entry == VMATracking->VMAs.end() || !Entry->second.Prot.Writable) {
      return false;
    }

    auto FaultBase = FEXCore::AlignDown(FaultAddress, FEXCore::Utils::FEX_PAGE_SIZE);

    auto UnprotectRegionCallback = [](uintptr_t Start, uintptr_t Length) {
      auto rv = mprotect((void*)Start, Length, PROT_READ | PROT_WRITE);
      LogMan::Throw::AFmt(rv == 0, "mprotect({}, {}) failed", Start, Length);
    };

    if (Entry->second.Flags.Shared) {
      LOGMAN_THROW_A_FMT(Entry->second.Resource, "VMA tracking error");

      auto Offset = FaultBase - Entry->first + Entry->second.Offset;

      auto VMA = Entry->second.Resource->FirstVMA;
      LOGMAN_THROW_A_FMT(VMA, "VMA tracking error");

      // Flush all mirrors, remap the page writable as needed
      do {
        if (VMA->Offset <= Offset && (VMA->Offset + VMA->Length) > Offset) {
          auto FaultBaseMirrored = Offset - VMA->Offset + VMA->Base;

          if (VMA->Prot.Writable) {
            _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, FaultBaseMirrored, FEXCore::Utils::FEX_PAGE_SIZE, UnprotectRegionCallback);
          } else {
            _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, FaultBaseMirrored, FEXCore::Utils::FEX_PAGE_SIZE);
          }
        }
      } while ((VMA = VMA->ResourceNextVMA));
    } else {
      _SyscallHandler->TM.InvalidateGuestCodeRange(Thread, FaultBase, FEXCore::Utils::FEX_PAGE_SIZE, UnprotectRegionCallback);
    }

    FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1);

    auto CTX = Thread->CTX;
    if (CTX->IsAddressInCodeBuffer(Thread, ArchHelpers::Context::GetPc(ucontext)) && !CTX->IsCurrentBlockSingleInst(Thread) &&
        CTX->IsAddressInCurrentBlock(Thread, FaultAddress & FEXCore::Utils::FEX_PAGE_MASK, FEXCore::Utils::FEX_PAGE_SIZE)) {
      // If we are not in a single-instruction block, and the SMC write address could intersect with the current block,
      // reconstruct the context and repeat the faulting instruction as a single-instruction block so any SMC it performs
      // is immediately picked up.
      ThreadObject->SignalInfo.Delegator->SpillSRA(Thread, ucontext, Thread->CurrentFrame->InSyscallInfo & 0xFFFF);

      // Adjust context to return to the dispatcher, reloading SRA from thread state
      const auto& Config = ThreadObject->SignalInfo.Delegator->GetConfig();
      ArchHelpers::Context::SetPc(ucontext, Config.AbsoluteLoopTopAddressFillSRA);
      ArchHelpers::Context::SetArmReg(ucontext, 1, 1); // Set ENTRY_FILL_SRA_SINGLE_INST_REG to force a single step
    }

    return true;
  }
}

void SyscallHandler::MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) {
  const auto Base = Start & FEXCore::Utils::FEX_PAGE_MASK;
  const auto Top = FEXCore::AlignUp(Start + Length, FEXCore::Utils::FEX_PAGE_SIZE);

  {
    if (SMCChecks != FEXCore::Config::CONFIG_SMC_MTRACK) {
      return;
    }

    auto lk = FEXCore::GuardSignalDeferringSection<std::shared_lock>(VMATracking.Mutex, Thread);

    // Find the first mapping at or after the range ends, or ::end().
    // Top points to the address after the end of the range
    auto Mapping = VMATracking.VMAs.lower_bound(Top);

    while (Mapping != VMATracking.VMAs.begin()) {
      Mapping--;

      const auto MapBase = Mapping->first;
      const auto MapTop = MapBase + Mapping->second.Length;

      if (MapTop <= Base) {
        // Mapping ends before the Range start, exit
        break;
      } else {
        const auto ProtectBase = std::max(MapBase, Base);
        const auto ProtectSize = std::min(MapTop, Top) - ProtectBase;

        if (Mapping->second.Flags.Shared) {
          LOGMAN_THROW_A_FMT(Mapping->second.Resource, "VMA tracking error");

          const auto OffsetBase = ProtectBase - Mapping->first + Mapping->second.Offset;
          const auto OffsetTop = OffsetBase + ProtectSize;

          auto VMA = Mapping->second.Resource->FirstVMA;
          LOGMAN_THROW_A_FMT(VMA, "VMA tracking error");

          do {
            auto VMAOffsetBase = VMA->Offset;
            auto VMAOffsetTop = VMA->Offset + VMA->Length;
            auto VMABase = VMA->Base;

            if (VMA->Prot.Writable && VMAOffsetBase < OffsetTop && VMAOffsetTop > OffsetBase) {

              const auto MirroredBase = std::max(VMAOffsetBase, OffsetBase);
              const auto MirroredSize = std::min(OffsetTop, VMAOffsetTop) - MirroredBase;

              auto rv = mprotect((void*)(MirroredBase - VMAOffsetBase + VMABase), MirroredSize, PROT_READ);
              LogMan::Throw::AFmt(rv == 0, "mprotect({}, {}) failed", MirroredBase, MirroredSize);
            }
          } while ((VMA = VMA->ResourceNextVMA));

        } else if (Mapping->second.Prot.Writable) {
          int rv = mprotect((void*)ProtectBase, ProtectSize, PROT_READ);

          LogMan::Throw::AFmt(rv == 0, "mprotect({}, {}) failed", ProtectBase, ProtectSize);
        }
      }
    }
  }
}

void SyscallHandler::InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) {
  InvalidateCodeRangeIfNecessary(Thread, Start, Length);
}

static FEXCore::ExecutableFileSectionInfo BuildSectionInfo(const VMATracking::MappedResource& Resource, uint64_t Base, uint64_t Size) {
  return FEXCore::ExecutableFileSectionInfo {*Resource.MappedFile, Resource.FirstVMA->Base, Base, Base + Size};
}

std::optional<FEXCore::ExecutableFileSectionInfo>
SyscallHandler::LookupExecutableFileSection(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) {
  auto lk = FEXCore::GuardSignalDeferringSection<std::shared_lock>(VMATracking.Mutex, Thread);

  auto EntryIt = VMATracking.FindVMAEntry(GuestAddr);
  if (EntryIt == VMATracking.VMAs.end() || !EntryIt->second.Resource || !EntryIt->second.Resource->MappedFile) {
    return std::nullopt;
  }

  auto& [MappingBaseAddr, Entry] = *EntryIt;
  return BuildSectionInfo(*Entry.Resource, MappingBaseAddr, Entry.Length);
}

FEXCore::HLE::ExecutableRangeInfo SyscallHandler::QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) {
  auto lk = FEXCore::GuardSignalDeferringSection<std::shared_lock>(VMATracking.Mutex, Thread);
  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);

  auto Entry = VMATracking.FindVMAEntry(Address);
  if (Entry == VMATracking.VMAs.end() ||
      (!Entry->second.Prot.Executable && (!(ThreadObject->persona & READ_IMPLIES_EXEC) || !Entry->second.Prot.Readable))) {
    return {0, 0, false};
  }
  return {Entry->first, Entry->second.Length, Entry->second.Prot.Writable};
}

struct ReadELFHeadersResult {
  fextl::vector<Elf64_Phdr> ProgramHeaders;
  fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> Relocations;
  bool HasCodeRelocations;
};

static ReadELFHeadersResult ReadELFHeaders(int FD, std::span<std::byte> HeaderData = {}) {
  std::string_view ELFMagic = ELFMAG;
  if (HeaderData.data()) {
    if (HeaderData.size_bytes() < ELFMagic.size() || std::memcmp(ELFMagic.data(), HeaderData.data(), ELFMagic.size()) != 0) {
      // Not an ELF file
      return {};
    }
  } else {
    // Read from FD in case the caller didn't have a mapped header available
  }

  ELFParser Parser;
  Parser.ReadElf(dup(FD));

  auto Relocations = Parser.PopulateRelocations();
  if (!Relocations.empty()) {
    LogMan::Msg::IFmt("Loaded ELF with {} relocations", Relocations.size());
  }

  auto HasCodeRelocations = Parser.HasCodeRelocations();
  return ReadELFHeadersResult {std::move(Parser.phdrs), std::move(Relocations), HasCodeRelocations};
}

static void LoadCodeCache(FEXCore::Core::InternalThreadState& Thread, FEXCore::ExecutableFileSectionInfo& Section, uint64_t CodeCacheConfigId) {
  auto CacheFilename = fextl::fmt::format("{}cache/{}-{:016x}", FEX::Config::GetCacheDirectory(),
                                          FEXCore::CodeMap::GetBaseFilename(Section.FileInfo, false), CodeCacheConfigId);
  int CacheFD = open(CacheFilename.c_str(), O_RDONLY);
  if (CacheFD == -1) {
    LogMan::Msg::IFmt("Cache file does not exist: {}", CacheFilename);
    return;
  }

  struct stat buf;
  if (fstat(CacheFD, &buf) != 0) {
    LogMan::Msg::EFmt("Invalid cache file: {}", CacheFilename);
    close(CacheFD);
    return;
  }

  auto CacheFileSize = buf.st_size;
  auto MappedCache = (std::byte*)FEXCore::Allocator::mmap(nullptr, CacheFileSize, PROT_READ, MAP_PRIVATE, CacheFD, 0);
  LOGMAN_THROW_A_FMT(MappedCache, "Failed to map code cache into memory");
  if (!Thread.CTX->GetCodeCache().LoadData(&Thread, MappedCache, Section)) {
    // TODO: Delete this cache file
  }
  FEXCore::Allocator::munmap(MappedCache, CacheFileSize);
  close(CacheFD);
}

void* SyscallHandler::GuestMmap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags,
                                int fd, off_t offset) {
  LOGMAN_THROW_A_FMT(Is64Bit || (length >> 32) == 0, "values must fit to 32 bits");

  uint64_t Result {};
  size_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);
  std::optional<LateApplyExtendedVolatileMetadata> LateMetadata = std::nullopt;

  std::optional<FEXCore::ExecutableFileSectionInfo> CachedSection;

  {
    // NOTE: Frontend calls this with a nullptr Thread during initialization, but
    //       providing this code with a valid Thread object earlier would allow
    //       us to be more optimal by using GuardSignalDeferringSection instead
    auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(VMATracking.Mutex, Thread);

    bool Map32Bit = !Is64Bit || (flags & FEX::HLE::X86_64_MAP_32BIT);
    if (Map32Bit) {
      Result = (uint64_t)Get32BitAllocator()->Mmap((void*)addr, length, prot, flags, fd, offset);
      if (FEX::HLE::HasSyscallError(Result)) {
        return reinterpret_cast<void*>(Result);
      }
      LOGMAN_THROW_A_FMT(Is64Bit || (Result >> 32) == 0 || (Result >> 32) == 0xFFFFFFFF, "values must fit to 32 bits");
    } else {
      Result = reinterpret_cast<uint64_t>(::mmap(reinterpret_cast<void*>(addr), length, prot, flags, fd, offset));
      if (Result == ~0ULL) {
        return reinterpret_cast<void*>(-errno);
      }
    }

    LateMetadata = TrackMmap(Thread, Result, length, prot, flags, fd, offset, CachedSection);
  }

  InvalidateCodeRangeIfNecessary(Thread, Result, Size);

  if (LateMetadata) {
    auto CodeInvalidationlk = GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), Thread);
    CTX->AddForceTSOInformation(LateMetadata->VolatileValidRanges, std::move(LateMetadata->VolatileInstructions));
  }

  if (EnableCodeCaching && CachedSection) {
    LoadCodeCache(*Thread, *CachedSection, CodeCacheConfigId);
  }

  return reinterpret_cast<void*>(Result);
}

uint64_t SyscallHandler::GuestMunmap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) {
  LOGMAN_THROW_A_FMT(Is64Bit || (reinterpret_cast<uintptr_t>(addr) >> 32) == 0, "values must fit to 32 bits: {}", fmt::ptr(addr));
  LOGMAN_THROW_A_FMT(Is64Bit || (length >> 32) == 0, "values must fit to 32 bits");

  uint64_t Result {};
  uint64_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);

  {
    // Frontend calls this with nullptr Thread during initialization.
    // This is why `GuardSignalDeferringSectionWithFallback` is used here.
    // To be more optimal the frontend should provide this code with a valid Thread object earlier.
    auto lk = FEXCore::GuardSignalDeferringSectionWithFallback(VMATracking.Mutex, Thread);

    if (reinterpret_cast<uintptr_t>(addr) < 0x1'0000'0000ULL) {
      Result = Get32BitAllocator()->Munmap(addr, length);
      if (FEX::HLE::HasSyscallError(Result)) {
        return Result;
      }
    } else {
      Result = ::munmap(addr, length);
      if (Result == -1) {
        return -errno;
      }
    }
    TrackMunmap(Thread, addr, length);
  }
  InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast<uint64_t>(addr), Size);

  if (length) {
    auto CodeInvalidationlk = GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), Thread);
    CTX->RemoveForceTSOInformation(reinterpret_cast<uint64_t>(addr), length);
  }

  return Result;
}

uint64_t SyscallHandler::GuestMremap(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, void* old_address, size_t old_size,
                                     size_t new_size, int flags, void* new_address) {
  uint64_t Result {};

  {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);
    if (Is64Bit) {
      Result = reinterpret_cast<uint64_t>(::mremap(old_address, old_size, new_size, flags, new_address));
      if (Result == -1) {
        return -errno;
      }
    } else {
      Result = reinterpret_cast<uint64_t>(Get32BitAllocator()->Mremap(old_address, old_size, new_size, flags, new_address));
      if (FEX::HLE::HasSyscallError(Result)) {
        return Result;
      }
    }
    TrackMremap(Thread, reinterpret_cast<uint64_t>(old_address), old_size, new_size, flags, Result);
  }

  InvalidateCodeRangeIfNecessaryOnRemap(Thread, reinterpret_cast<uint64_t>(old_address), Result, old_size, new_size);
  return Result;
}

int SyscallHandler::OpenCodeMapFile() {
  // Query from FEXServer whether this is the first instance of this executable; if it is, also enable code dumping!
  FEX_CONFIG_OPT(RootFSPath, ROOTFS);
  FEX_CONFIG_OPT(Multiblock, MULTIBLOCK);
  auto ProgramName = FEXCore::Config::Get(FEXCore::Config::CONFIG_APP_FILENAME);
  LOGMAN_THROW_A_FMT(ProgramName && ProgramName.value()->c_str()[0] == '/', "");

  // Check RootFS first, then the plain path
  auto ProgramFD = open((RootFSPath() + ProgramName.value()->c_str()).c_str(), O_RDONLY);
  if (ProgramFD == -1) {
    ProgramFD = open(ProgramName.value()->c_str(), O_RDONLY);
  }
  if (ProgramFD == -1) {
    return -1;
  }

  int CodeMapFD = FEXServerClient::RequestCodeMapFD(FEXServerClient::GetServerFD(), ProgramFD, Multiblock);
  close(ProgramFD);
  if (CodeMapFD == -1) {
    return -1;
  }

  // Acquire exclusive lock to prevent FEXServer from processing this file eagerly
  [[maybe_unused]] auto ret = flock(CodeMapFD, LOCK_EX);
  LOGMAN_THROW_A_FMT(ret == 0, "Could not lock code map");

  FM.SetProtectedCodeMapFD(CodeMapFD);

  // Ensure the file descriptor is closed on exec
  auto flags = fcntl(CodeMapFD, F_GETFD);
  fcntl(CodeMapFD, F_SETFD, flags | FD_CLOEXEC);
  return CodeMapFD;
}

uint64_t SyscallHandler::GuestMprotect(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t len, int prot) {
  uint64_t Result {};

  {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);
    Result = ::mprotect(addr, len, prot);
    if (Result == -1) {
      return -errno;
    }

    TrackMprotect(Thread, addr, len, prot);
  }

  InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast<uint64_t>(addr), len);

  // Prepare for delayed code cache load after ld/Wine is done applying relocations.
  // Hooking into mprotect is a reliable heuristic that matches behavior of ld (for ELF) and Wine (for PE).
  // False-positives are avoided by setting RequiresDelayedCacheLoad in TrackMmap only for
  // binaries that we know will go through this path.
  fextl::vector<FEXCore::ExecutableFileSectionInfo> CachedSections;
  if (EnableCodeCaching && (prot & PROT_EXEC) && (prot & PROT_WRITE) == 0) {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);

    auto VMAEntry = VMATracking.FindVMAEntry(reinterpret_cast<uint64_t>(addr));
    auto Resource = VMAEntry != VMATracking.VMAs.end() ? VMAEntry->second.Resource : nullptr;
    if (Resource && Resource->MappedFile && Resource->RequiresDelayedCacheLoad) {
      Resource->RequiresDelayedCacheLoad = false;
      LogMan::Msg::IFmt("Triggering delayed cache load for {} after mprotect of {:#x}-{:#x}", Resource->MappedFile->Filename,
                        VMAEntry->first, VMAEntry->first + VMAEntry->second.Length);

      for (auto VMA = Resource->FirstVMA; VMA; VMA = VMA->ResourceNextVMA) {
        CachedSections.push_back(BuildSectionInfo(*Resource, VMA->Base, VMA->Length));
      }
    }
  }

  // Trigger delayed cache load. This must be done separately since
  // LoadCodeCache will call interfaces that acquire the VMATracking mutex.
  for (auto& CachedSection : CachedSections) {
    LoadCodeCache(*Thread, CachedSection, CodeCacheConfigId);
  }

  return Result;
}

uint64_t SyscallHandler::GuestShmat(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, int shmid, const void* shmaddr, int shmflg) {
  uint64_t Result {};
  uint64_t Length {};

  {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);
    if (Is64Bit) {
      Result = reinterpret_cast<uint64_t>(::shmat(shmid, shmaddr, shmflg));
      if (Result == -1) {
        return -errno;
      }
    } else {
      uint32_t Addr;
      Result = Get32BitAllocator()->Shmat(shmid, shmaddr, shmflg, &Addr);
      if (FEX::HLE::HasSyscallError(Result)) {
        return Result;
      }
      Result = Addr;
    }

    shmid_ds stat;

    auto res = shmctl(shmid, IPC_STAT, &stat);
    LOGMAN_THROW_A_FMT(res != -1, "shmctl IPC_STAT failed");

    Length = stat.shm_segsz;
    TrackShmat(Thread, shmid, Result, shmflg, Length);
  }

  InvalidateCodeRangeIfNecessary(Thread, Result, Length);
  return Result;
}

uint64_t SyscallHandler::GuestShmdt(bool Is64Bit, FEXCore::Core::InternalThreadState* Thread, const void* shmaddr) {
  uint64_t Result {};
  uint64_t Length {};
  {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);
    if (Is64Bit) {
      Result = ::shmdt(shmaddr);
      if (Result == -1) {
        return -errno;
      }
    } else {
      Result = Get32BitAllocator()->Shmdt(shmaddr);
      if (FEX::HLE::HasSyscallError(Result)) {
        return Result;
      }
    }

    Length = TrackShmdt(Thread, reinterpret_cast<uintptr_t>(shmaddr));
  }

  InvalidateCodeRangeIfNecessary(Thread, reinterpret_cast<uintptr_t>(shmaddr), Length);
  return Result;
}

// MMan Tracking
std::optional<SyscallHandler::LateApplyExtendedVolatileMetadata>
SyscallHandler::TrackMmap(FEXCore::Core::InternalThreadState* Thread, uint64_t addr, size_t length, int prot, int flags, int fd,
                          off_t offset, std::optional<FEXCore::ExecutableFileSectionInfo>& CachedSection) {
  size_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);
  const auto ProtMapping = VMATracking::VMAProt::fromProt(prot);

  VMATracking::MappedResource* Resource = nullptr;

  std::optional<SyscallHandler::LateApplyExtendedVolatileMetadata> VolatileMetadata = std::nullopt;

  if (!(flags & MAP_ANONYMOUS)) {
    struct stat64 buf;
    fstat64(fd, &buf);

    const VMATracking::MRID mrid {buf.st_dev, buf.st_ino};

    char Tmp[PATH_MAX];
    auto PathLength = FEX::get_fdpath(fd, Tmp);

    auto [ResourceIt, ResourceEnd] = VMATracking.FindResources(mrid);
    bool Inserted = false;
    const bool MappedELFHeaderAgain = ResourceIt != ResourceEnd && offset == 0 && !ResourceIt->second.ProgramHeaders.empty();
    if (ResourceIt == ResourceEnd || MappedELFHeaderAgain) {
      // Create a new MappedResource for previously unseen file and for re-mappings of an ELF header
      ResourceIt = VMATracking.InsertMappedResource(mrid, VMATracking::MappedResource {nullptr, nullptr, 0, {}, {}});
      ResourceIt->second.Iterator = ResourceIt;
      Inserted = true;
    }
    Resource = &ResourceIt->second;

    // Only handle FDs that are backed by regular files that are executable
    if (PathLength != -1 && S_ISREG(buf.st_mode) && (buf.st_mode & S_IXUSR)) {
      // ELF files that are mapped multiple times get a separate MappedResource for each base virtual address
      if ((prot & PROT_READ) && Inserted) {
        Resource->MappedFile = fextl::make_unique<FEXCore::ExecutableFileInfo>();
        Resource->MappedFile->Filename = fextl::string(Tmp, PathLength);
        Resource->MappedFile->FileId = CTX->GetCodeCache().ComputeCodeMapId(Resource->MappedFile->Filename, fd);

        // Read ELF headers if applicable.
        // For performance, skip ELF checks if we're not mapping the file header
        bool CheckForElfFile = (offset == 0);
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
        CheckForElfFile = true;
#endif
        if (CheckForElfFile) {
          auto ELFResult = ReadELFHeaders(fd, std::span {reinterpret_cast<std::byte*>(addr), length});
          Resource->ProgramHeaders = std::move(ELFResult.ProgramHeaders);
          Resource->MappedFile->Relocations = std::move(ELFResult.Relocations);
          Resource->RequiresDelayedCacheLoad = ELFResult.HasCodeRelocations;

          // GuestRelocationType::Skip indicates to FEXOfflineCompiler that
          // any blocks covered by the relocation may not be cached.
          // At runtime, we can safely drop these relocations.
          for (auto it = Resource->MappedFile->Relocations.begin(); it != Resource->MappedFile->Relocations.end();) {
            if (it->second == FEXCore::GuestRelocationType::Skip) {
              it = Resource->MappedFile->Relocations.erase(it);
            } else {
              ++it;
            }
          }

          LOGMAN_THROW_A_FMT(Resource->ProgramHeaders.empty() || offset == 0, "Expected file offset 0 for the first mapping of an ELF "
                                                                              "file");
        }
      } else if (ResourceIt->second.ProgramHeaders.empty()) {
        // Not an ELF file, so we don't need to distinguish between different base addresses
      } else {
        // Mapped a non-header section of an ELF file.
        // Look up the corresponding MappedResource using the expected base address.

        ResourceIt = std::find_if(ResourceIt, ResourceEnd, [&](const VMATracking::MappedResource::ContainerType::value_type& ResourcePair) {
          auto& Resource = ResourcePair.second;
          auto ExpectedBases = FEXCore::InferMappingBaseAddress(
            Resource.ProgramHeaders, addr, Size, offset,
            (ProtMapping.Executable ? PF_X : 0) | (ProtMapping.Writable ? PF_W : 0) | (ProtMapping.Readable ? PF_R : 0));
          return std::ranges::find(ExpectedBases, Resource.FirstVMA->Base) != ExpectedBases.end();
        });
        if (ResourceIt == ResourceEnd) {
          // This isn't necessarily a fatal exception. It just means the ELF section isn't a part of the ELF Program headers.
          // Node.js hits this as it maps a section of itself that isn't a part of the program headers.
          LogMan::Msg::IFmt("Warning: Could not find base for file mapping at {:#x} (offset {:#x}): {}", addr, offset,
                            std::string_view(Tmp, PathLength));
        } else {
          Resource = &ResourceIt->second;
        }
      }

      if (Resource->MappedFile) {
        const fextl::string Filename = FHU::Filesystem::GetFilename(Resource->MappedFile->Filename);

        // We now have the filename and the offset in the filename getting mapped.
        // Check for extended volatile metadata.
        auto it = ExtendedMetaData.find(Filename);
        if (it != ExtendedMetaData.end()) {
          SyscallHandler::LateApplyExtendedVolatileMetadata LateMetadata;
          FEX::VolatileMetadata::ApplyFEXExtendedVolatileMetadata(
            it->second, LateMetadata.VolatileInstructions, LateMetadata.VolatileValidRanges, addr, addr + length, offset, offset + length);

          if (!LateMetadata.VolatileInstructions.empty() || !LateMetadata.VolatileValidRanges.Empty()) {
            VolatileMetadata.emplace(std::move(LateMetadata));
          }
        }
      }
    }
  } else if (flags & MAP_SHARED) {
    VMATracking::MRID mrid {VMATracking::SpecialDev::Anon, AnonSharedId++};

    auto [Iter, IterEnd] = VMATracking.FindResources(mrid);
    LOGMAN_THROW_A_FMT(Iter == IterEnd, "VMA tracking error");

    Iter = VMATracking.InsertMappedResource(mrid, VMATracking::MappedResource {nullptr, nullptr, 0, {}, {}});
    Resource = &Iter->second;
    Resource->Iterator = Iter;
  }

  VMATracking.TrackVMARange(CTX, Resource, addr, offset, Size, VMATracking::VMAFlags::fromFlags(flags), VMATracking::VMAProt::fromProt(prot));

  // Load code cache if present.
  // FEXServer was requested to generate library caches on program launch.
  if (EnableCodeCaching && Resource && Resource->MappedFile && VMATracking::VMAProt::fromProt(prot).Executable) {
    if (Thread) {
      if (!Resource->RequiresDelayedCacheLoad) {
        CachedSection.emplace(BuildSectionInfo(*Resource, addr, Size));
      } else {
        LogMan::Msg::IFmt("Delaying code cache load for {} until mprotect {:#x}-{:#x}", Resource->MappedFile->Filename, addr, addr + Size);
      }
    } else {
      // Cache can't be loaded with a thread; skip this for now
      LogMan::Msg::DFmt("Oops, tried caching without a thread: {}", Resource->MappedFile->Filename);
    }
  }

  return VolatileMetadata;
}

void SyscallHandler::TrackMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length) {
  uint64_t Size = FEXCore::AlignUp(length, FEXCore::Utils::FEX_PAGE_SIZE);
  VMATracking.DeleteVMARange(CTX, reinterpret_cast<uintptr_t>(addr), Size);
}

void SyscallHandler::TrackMprotect(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t len, int prot) {
  uint64_t Size = FEXCore::AlignUp(len, FEXCore::Utils::FEX_PAGE_SIZE);

  VMATracking.ChangeProtectionFlags(reinterpret_cast<uintptr_t>(addr), Size, VMATracking::VMAProt::fromProt(prot));
}

void SyscallHandler::TrackMremap(FEXCore::Core::InternalThreadState* Thread, uint64_t OldAddress, size_t OldSize, size_t NewSize, int flags,
                                 uint64_t NewAddress) {
  OldSize = FEXCore::AlignUp(OldSize, FEXCore::Utils::FEX_PAGE_SIZE);
  NewSize = FEXCore::AlignUp(NewSize, FEXCore::Utils::FEX_PAGE_SIZE);

  const auto OldVMA = VMATracking.FindVMAEntry(OldAddress);

  const auto OldResource = OldVMA->second.Resource;
  const auto OldOffset = OldVMA->second.Offset + OldAddress - OldVMA->first;
  const auto OldFlags = OldVMA->second.Flags;
  const auto OldProt = OldVMA->second.Prot;

  LOGMAN_THROW_A_FMT(OldVMA != VMATracking.VMAs.end(), "VMA Tracking corruption");

  if (OldSize == 0) {
    // Mirror existing mapping
    // must be a shared mapping
    LOGMAN_THROW_A_FMT(OldResource != nullptr, "VMA Tracking error");
    LOGMAN_THROW_A_FMT(OldFlags.Shared, "VMA Tracking error");
    VMATracking.TrackVMARange(CTX, OldResource, NewAddress, OldOffset, NewSize, OldFlags, OldProt);
  } else {

#ifndef MREMAP_DONTUNMAP
// MREMAP_DONTUNMAP is kernel 5.7+ and might not exist
#define MREMAP_DONTUNMAP 4
#endif
    if (!(flags & MREMAP_DONTUNMAP)) {
      VMATracking.DeleteVMARange(CTX, OldAddress, OldSize, OldResource);
    }

    // Make anonymous mapping
    VMATracking.TrackVMARange(CTX, OldResource, NewAddress, OldOffset, NewSize, OldFlags, OldProt);
  }
}

void SyscallHandler::TrackShmat(FEXCore::Core::InternalThreadState* Thread, int shmid, uint64_t shmaddr, int shmflg, uint64_t Length) {
  VMATracking::MRID mrid {VMATracking::SpecialDev::SHM, static_cast<uint64_t>(shmid)};

  auto [Iter, IterEnd] = VMATracking.FindResources(mrid);
  if (Iter == IterEnd) {
    Iter = VMATracking.InsertMappedResource(mrid, VMATracking::MappedResource {nullptr, nullptr, Length, {}, {}});
    Iter->second.Iterator = Iter;
  }
  auto Resource = &Iter->second;
  VMATracking.TrackVMARange(CTX, Resource, shmaddr, 0, Length, VMATracking::VMAFlags::fromFlags(MAP_SHARED), VMATracking::VMAProt::fromSHM(shmflg));
}

uint64_t SyscallHandler::TrackShmdt(FEXCore::Core::InternalThreadState* Thread, uint64_t shmaddr) {
  return VMATracking.DeleteSHMRegion(CTX, reinterpret_cast<uintptr_t>(shmaddr));
}

void SyscallHandler::TrackMadvise(FEXCore::Core::InternalThreadState* Thread, uintptr_t Base, uintptr_t Size, int advice) {
  Size = FEXCore::AlignUp(Size, FEXCore::Utils::FEX_PAGE_SIZE);
  {
    auto lk = FEXCore::GuardSignalDeferringSection(VMATracking.Mutex, Thread);
    // TODO
  }
}

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
category: LinuxSyscalls ~ Linux syscall emulation, marshaling and passthrough
tags: LinuxSyscalls|common
desc: VMA Tracking
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include <sys/shm.h>

namespace FEX::HLE::VMATracking {
/// Helpers ///
auto VMAProt::fromProt(int Prot) -> VMAProt {
  return VMAProt {
    .Readable = (Prot & PROT_READ) != 0,
    .Writable = (Prot & PROT_WRITE) != 0,
    .Executable = (Prot & PROT_EXEC) != 0,
  };
}

auto VMAProt::fromSHM(int SHMFlg) -> VMAProt {
  return VMAProt {
    .Readable = true,
    .Writable = SHMFlg & SHM_RDONLY ? false : true,
    .Executable = SHMFlg & SHM_EXEC ? true : false,
  };
}

auto VMAFlags::fromFlags(int Flags) -> VMAFlags {
  return VMAFlags {
    .Shared = (Flags & MAP_SHARED) != 0, // also includes MAP_SHARED_VALIDATE
  };
}

/// List Operations ///
static inline void ListCheckVMALinks(const VMAEntry* VMA) {
  if (VMA) {
    LOGMAN_THROW_A_FMT(VMA->ResourceNextVMA != VMA, "VMA tracking error");
    LOGMAN_THROW_A_FMT(VMA->ResourcePrevVMA != VMA, "VMA tracking error");
  }
}

// Removes a VMA from corresponding MappedResource list
// Returns true if list is empty
static bool ListRemove(VMAEntry* VMA) {
  LOGMAN_THROW_A_FMT(VMA->Resource != nullptr, "VMA tracking error");

  // if it has prev, make prev to next
  if (VMA->ResourcePrevVMA) {
    LOGMAN_THROW_A_FMT(VMA->ResourcePrevVMA->ResourceNextVMA == VMA, "VMA tracking error");
    VMA->ResourcePrevVMA->ResourceNextVMA = VMA->ResourceNextVMA;
  } else {
    LOGMAN_THROW_A_FMT(VMA->Resource->FirstVMA == VMA, "VMA tracking error");
  }

  // if it has next, make next to prev
  if (VMA->ResourceNextVMA) {
    LOGMAN_THROW_A_FMT(VMA->ResourceNextVMA->ResourcePrevVMA == VMA, "VMA tracking error");
    VMA->ResourceNextVMA->ResourcePrevVMA = VMA->ResourcePrevVMA;
  }

  // If it is the first in the list, make Next the first in the list
  if (VMA->Resource && VMA->Resource->FirstVMA == VMA) {
    LOGMAN_THROW_A_FMT(!VMA->ResourceNextVMA || VMA->ResourceNextVMA->ResourcePrevVMA == nullptr, "VMA tracking error");

    VMA->Resource->FirstVMA = VMA->ResourceNextVMA;
  }

  ListCheckVMALinks(VMA);
  ListCheckVMALinks(VMA->ResourceNextVMA);
  ListCheckVMALinks(VMA->ResourcePrevVMA);

  // Return true if list is empty
  return VMA->Resource->FirstVMA == nullptr;
}

// Replaces a VMA in corresponding MappedResource list
// Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup
static void ListReplace(VMAEntry* VMA, VMAEntry* NewVMA) {
  LOGMAN_THROW_A_FMT(VMA->Resource != nullptr, "VMA tracking error");

  LOGMAN_THROW_A_FMT(VMA->Resource == NewVMA->Resource, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourcePrevVMA == VMA->ResourcePrevVMA, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourceNextVMA == VMA->ResourceNextVMA, "VMA tracking error");

  if (VMA->ResourcePrevVMA) {
    LOGMAN_THROW_A_FMT(VMA->Resource->FirstVMA != VMA, "VMA tracking error");
    LOGMAN_THROW_A_FMT(VMA->ResourcePrevVMA->ResourceNextVMA == VMA, "VMA tracking error");
    VMA->ResourcePrevVMA->ResourceNextVMA = NewVMA;
  } else {
    LOGMAN_THROW_A_FMT(VMA->Resource->FirstVMA == VMA, "VMA tracking error");
    VMA->Resource->FirstVMA = NewVMA;
  }

  if (VMA->ResourceNextVMA) {
    LOGMAN_THROW_A_FMT(VMA->ResourceNextVMA->ResourcePrevVMA == VMA, "VMA tracking error");
    VMA->ResourceNextVMA->ResourcePrevVMA = NewVMA;
  }

  ListCheckVMALinks(VMA);
  ListCheckVMALinks(NewVMA);
  ListCheckVMALinks(VMA->ResourceNextVMA);
  ListCheckVMALinks(VMA->ResourcePrevVMA);
}

// Inserts a VMA in corresponding MappedResource list
// Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup
static void ListInsertAfter(VMAEntry* AfterVMA, VMAEntry* NewVMA) {
  LOGMAN_THROW_A_FMT(NewVMA->Resource != nullptr, "VMA tracking error");

  LOGMAN_THROW_A_FMT(AfterVMA->Resource == NewVMA->Resource, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourcePrevVMA == AfterVMA, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourceNextVMA == AfterVMA->ResourceNextVMA, "VMA tracking error");

  if (AfterVMA->ResourceNextVMA) {
    LOGMAN_THROW_A_FMT(AfterVMA->ResourceNextVMA->ResourcePrevVMA == AfterVMA, "VMA tracking error");
    AfterVMA->ResourceNextVMA->ResourcePrevVMA = NewVMA;
  }
  AfterVMA->ResourceNextVMA = NewVMA;

  ListCheckVMALinks(AfterVMA);
  ListCheckVMALinks(NewVMA);
  ListCheckVMALinks(AfterVMA->ResourceNextVMA);
  ListCheckVMALinks(AfterVMA->ResourcePrevVMA);
}

// Prepends a VMA
// Requires NewVMA->Resource, NewVMA->ResourcePrevVMA and NewVMA->ResourceNextVMA to be already setup
static void ListPrepend(MappedResource* Resource, VMAEntry* NewVMA) {
  LOGMAN_THROW_A_FMT(Resource != nullptr, "VMA tracking error");

  LOGMAN_THROW_A_FMT(NewVMA->Resource == Resource, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourcePrevVMA == nullptr, "VMA tracking error");
  LOGMAN_THROW_A_FMT(NewVMA->ResourceNextVMA == Resource->FirstVMA, "VMA tracking error");

  if (Resource->FirstVMA) {
    LOGMAN_THROW_A_FMT(Resource->FirstVMA->ResourcePrevVMA == nullptr, "VMA tracking error");
    Resource->FirstVMA->ResourcePrevVMA = NewVMA;
  }

  Resource->FirstVMA = NewVMA;

  ListCheckVMALinks(NewVMA);
  ListCheckVMALinks(NewVMA->ResourceNextVMA);
  ListCheckVMALinks(NewVMA->ResourcePrevVMA);
}

/// VMA tracking ///

// Lookup a VMA by address
VMATracking::VMACIterator VMATracking::FindVMAEntry(uint64_t GuestAddr) const {
  auto Entry = VMAs.upper_bound(GuestAddr);

  if (Entry != VMAs.begin()) {
    --Entry;

    if (Entry->first <= GuestAddr && (Entry->first + Entry->second.Length) > GuestAddr) {
      return Entry;
    }
  }

  return VMAs.end();
}

// Set or Replace mappings in a range with a new mapping
void VMATracking::TrackVMARange(FEXCore::Context::Context* CTX, MappedResource* MappedResource, uintptr_t Base, uintptr_t Offset,
                                uintptr_t Length, VMAFlags Flags, VMAProt Prot) {
  Mutex.check_lock_owned_by_self_as_write();

  DeleteVMARange(CTX, Base, Length, MappedResource);

  auto PrevResVMA = MappedResource ? MappedResource->FirstVMA : nullptr;
  auto NextResVMA = PrevResVMA ? PrevResVMA->ResourceNextVMA : nullptr;
  if (PrevResVMA && PrevResVMA->Base > Base) {
    NextResVMA = std::exchange(PrevResVMA, nullptr);
  }
  while (NextResVMA && NextResVMA->Base < Base) {
    PrevResVMA = NextResVMA;
    NextResVMA = PrevResVMA->ResourceNextVMA;
  }

  auto [Iter, Inserted] = VMAs.emplace(Base, VMAEntry {MappedResource, PrevResVMA, NextResVMA, Base, Offset, Length, Flags, Prot});

  LOGMAN_THROW_A_FMT(Inserted == true, "VMA Tracking corruption");

  if (MappedResource && !PrevResVMA) {
    // Insert to the front of the linked list
    ListPrepend(MappedResource, &Iter->second);
  } else if (MappedResource) {
    ListInsertAfter(PrevResVMA, &Iter->second);
  }
}

// Remove mappings in a range, possibly splitting them if needed and
// freeing their associated MappedResource unless it is equal to PreservedMappedResource
void VMATracking::DeleteVMARange(FEXCore::Context::Context* CTX, uintptr_t Base, uintptr_t Length, MappedResource* PreservedMappedResource) {
  Mutex.check_lock_owned_by_self_as_write();

  const auto Top = Base + Length;

  // find the first Mapping at or after the Range ends, or ::end()
  // Top is the address after the end
  auto CurrentIter = VMAs.lower_bound(Top);

  // Iterate backwards all mappings
  while (CurrentIter != VMAs.begin()) {
    CurrentIter--;

    const auto Current = &CurrentIter->second;
    const auto MapBase = Current->Base;
    const auto MapTop = MapBase + Current->Length;
    const auto OffsetDiff = Current->Offset - MapBase;

    if (MapTop <= Base) {
      // Mapping ends before the Range start, exit
      break;
    } else {
      const bool HasFirstPart = MapBase < Base;
      const bool HasTrailingPart = MapTop > Top;

      // (1) HasFirstPart, !HasTrailingPart -> trim
      // (2) HasFirstPart, HasTrailingPart -> trim, insert trailing, list add after first part
      // (3) !HasFirstPart, !HasTrailing part -> list remove, erase
      // (4) !HasFirstPart, HasTrailing part -> insert trailing, list replace first part, erase

      if (HasFirstPart) {
        // Handle trim for (1) & (2)
        Current->Length = Base - MapBase;
      } else if (!HasTrailingPart) {
        // Handle all of (3)
        // Mapping is included or equal to Range, delete

        // If linked to a Mapped Resource, remove from linked list and possibly delete the Mapped Resource
        if (Current->Resource) {
          if (ListRemove(Current) && Current->Resource != PreservedMappedResource) {
            MappedResources.erase(Current->Resource->Iterator);
          }
        }

        // returns next element, so -- is safe at loop
        CurrentIter = VMAs.erase(CurrentIter);
        continue; // we're done
      }

      const bool ReplaceAndErase = !HasFirstPart;

      if (HasTrailingPart) {
        // Handle insert of (2), (4)

        // insert trailing part, link it after Mapping
        auto NewOffset = OffsetDiff + Top;
        auto NewLength = MapTop - Top;

        auto [Iter, Inserted] = VMAs.emplace(Top, VMAEntry {Current->Resource, ReplaceAndErase ? Current->ResourcePrevVMA : Current,
                                                            Current->ResourceNextVMA, Top, NewOffset, NewLength, Current->Flags, Current->Prot});
        LOGMAN_THROW_A_FMT(Inserted == true, "VMA tracking error");
        auto TrailingPart = &Iter->second;
        if (Current->Resource) {
          if (ReplaceAndErase) {
            // Handle list replace of (4)
            ListReplace(Current, TrailingPart);
          } else {
            // Handle list insert (2)
            ListInsertAfter(Current, TrailingPart);
          }
        }
      }

      if (ReplaceAndErase) {
        // Handle erase of (4)
        // returns next element, so -- is safe at loop
        CurrentIter = VMAs.erase(CurrentIter);
      }
    }
  }
}

// Change flags of mappings in a range and split the mappings if needed
void VMATracking::ChangeProtectionFlags(uintptr_t Base, uintptr_t Length, VMAProt NewProt) {
  Mutex.check_lock_owned_by_self_as_write();

  // Handle 0 size as no-op like the kernel
  if (Length == 0) {
    return;
  }

  // This needs to handle multiple split-merge strategies:
  // 1) Exact overlap - No Split, no Merge. Only protection tracking changes.
  // 2) Exact base overlap - Single insert, can never fail.
  // 3) Insert in middle of VMA range. 1 or 2 inserts, can never fail.
  // 4) Partial overlapping merge. The most interesting strategy.
  //    - More information below about this one.

  auto Top = Base + Length;

  // find the first Mapping at or after the Range ends, or ::end()
  // Top is the address after the end
  auto MappingIter = VMAs.lower_bound(Top);

  // Iterate backwards all mappings
  while (MappingIter != VMAs.begin()) {
    MappingIter--;

    auto Current = &MappingIter->second;

    if (Current->Base <= Base || Current->Base + Current->Length < Top) {
      break;
    }

    const auto CurrentBase = Current->Base;
    const auto CurrentTop = CurrentBase + Current->Length;
    const auto CurrentFlags = Current->Flags;
    const auto CurrentProt = Current->Prot;

    ///< Resource mapping base.
    const auto OffsetDiff = Current->Offset - CurrentBase;

    // Merge strategy 4)
    // CurrentBase range doesn't fully overlap the starting range but does overlap the tail.
    // This is the most confusing strategy as it requires splitting the protect range itself.
    //
    // if the VMA has tail data after the protection range we must first deal with that:
    // 1) Split the tail data in to new VMA range with original protections. Must not fail.
    // 2) Adjust the overlapping VMA protections to the new protections and the truncated length
    // 3) Truncate the mprotecting length and top to be that untouched range. Next loop will continue inserting.
    // [ Incoming Ranges ]
    // CurrentVMA:                            [CurrentBase ====== CurrentTop)
    // CurrentMProtectRange: [Base =============== Top)**********************
    // [ Modified Ranges ]
    // New Tail Range:                                [TailBase === Tail Top)
    // CurrentVMA Modified Range:             [=======)
    // Remaining Tracking:   [Base ==== NewTop)
    //
    // Next loop iterations will decompose the remaining mprotects in to more merge strategies.

    // Steps:
    // 1) Split VMA if Top != CurrentTop
    // 2) Change [CurrentBase, Top) protections
    // 3) Change CurrentVMA length
    // 4) Adjust searching length for [Base, CurrentBase)
    const bool HasTailData = CurrentTop > Top;

    if (HasTailData) {
      // We now need to insert another VMA entry afterwards to ensure consistency.
      // This will have the original VMA's protection flags.

      // Make new VMA with new flags, insert for length of range
      auto NewOffset = OffsetDiff + CurrentBase;
      auto NewLength = CurrentTop - Top;

      auto [Iter, Inserted] = VMAs.emplace(Top, VMAEntry {.Resource = Current->Resource,
                                                          .ResourcePrevVMA = Current,
                                                          .ResourceNextVMA = Current->ResourceNextVMA,
                                                          .Base = Top,
                                                          .Offset = NewOffset,
                                                          .Length = NewLength,
                                                          .Flags = CurrentFlags,
                                                          .Prot = CurrentProt});

      if (!Inserted) {
        // We can't recover from this.
        // Shouldn't ever happen.
        ERROR_AND_DIE_FMT("{}:{}: VMA tracking error", __func__, __LINE__);
      }

      if (Current->Resource) {
        ListInsertAfter(Current, &Iter->second);
      }
    }

    // Change CurrentVMA's protections
    Current->Prot = NewProt;

    // Change CurrentVMA's length
    Current->Length = Top - CurrentBase;

    // Adjust the protection length we're searching for.
    // Next loop will pick up the next check.
    Length = CurrentBase - Base;
    Top = Base + Length;
  }

  auto Current = &MappingIter->second;
  const auto CurrentBase = Current->Base;
  const auto CurrentTop = CurrentBase + Current->Length;
  const auto CurrentFlags = Current->Flags;
  const auto CurrentProt = Current->Prot;

  ///< Resource mapping base.
  const auto OffsetDiff = Current->Offset - CurrentBase;
  if (CurrentTop <= Base) {
    // Mapping is below what we care about
    // [CurrentBase === CurrentTop)
    //                            [Base === Top)
  } else if (CurrentBase == Base && CurrentTop == Top) {
    // Merge strategy 1)
    // Exact encompassing, quite common.
    // [CurrentBase ======================== CurrentTop)
    // [Base ====================================== Top)
    Current->Prot = NewProt;
  } else if (CurrentBase == Base && CurrentTop > Top) {
    // Merge strategy 2)
    // [CurrentBase ======================== CurrentTop)
    // [Base =============== Top)***********************
    // VMA fully encompasses with matching base.
    // VMA needs to split.

    // Steps:
    // 1) Set new permissions for this VMA
    // 2) Trim VMA->Length to match [CurrentBase, CurrentBase+Length)
    // 2) Insert new node at [CurrentBase+Length, CurrentTop)

    // 1) Set new permissions
    Current->Prot = NewProt;

    // Trim end of original mapping
    // New length for Current VMA is Top - CurrentBase
    Current->Length = Top - CurrentBase;

    // Make new VMA with original protections, insert for remaining length
    auto NewOffset = OffsetDiff + Top;
    auto NewLength = CurrentTop - Top;

    auto [Iter, Inserted] = VMAs.emplace(Top, VMAEntry {.Resource = Current->Resource,
                                                        .ResourcePrevVMA = Current,
                                                        .ResourceNextVMA = Current->ResourceNextVMA,
                                                        .Base = Top,
                                                        .Offset = NewOffset,
                                                        .Length = NewLength,
                                                        .Flags = CurrentFlags,
                                                        .Prot = CurrentProt});

    if (!Inserted) [[unlikely]] {
      // We can't recover from this.
      // Shouldn't ever happen.
      ERROR_AND_DIE_FMT("{}:{}: VMA tracking error", __func__, __LINE__);
    }

    if (Current->Resource) {
      ListInsertAfter(Current, &Iter->second);
    }
  } else if (CurrentBase < Base && CurrentTop >= Top) {
    // Merge strategy 3)
    // VMA fully encompasses, VMA needs to split.
    // Explicitly VMA base doesn't match current base.
    // [CurrentBase ======================== CurrentTop)
    // ***************[Base =============== Top)********

    // Steps:
    // 1) Split the CurrentVMA
    // 2) Set new length of CurrentVMA
    // 3) If there is tail length still, Insert another new VMA with CurrentVMA data.

    const bool HasTailData = CurrentTop > Top;

    // Trim end of original mapping
    Current->Length = Base - CurrentBase;
    {
      // Make new VMA with new flags, insert for length of range
      auto NewOffset = OffsetDiff + Base;
      auto NewLength = Top - Base;

      auto [Iter, Inserted] = VMAs.emplace(Base, VMAEntry {.Resource = Current->Resource,
                                                           .ResourcePrevVMA = Current,
                                                           .ResourceNextVMA = Current->ResourceNextVMA,
                                                           .Base = Base,
                                                           .Offset = NewOffset,
                                                           .Length = NewLength,
                                                           .Flags = CurrentFlags,
                                                           .Prot = NewProt});

      if (!Inserted) [[unlikely]] {
        // We can't recover from this.
        // Shouldn't ever happen.
        ERROR_AND_DIE_FMT("{}:{}: VMA tracking error", __func__, __LINE__);
      }

      if (Current->Resource) {
        ListInsertAfter(Current, &Iter->second);
      }
    }

    if (HasTailData) {
      // We now need to insert another VMA entry afterwards to ensure consistency.
      // This will have the original VMA's protection flags.

      // Make new VMA with new flags, insert for length of range
      auto NewOffset = OffsetDiff + Top;
      auto NewLength = CurrentTop - Top;

      auto [Iter, Inserted] = VMAs.emplace(Top, VMAEntry {.Resource = Current->Resource,
                                                          .ResourcePrevVMA = Current,
                                                          .ResourceNextVMA = Current->ResourceNextVMA,
                                                          .Base = Top,
                                                          .Offset = NewOffset,
                                                          .Length = NewLength,
                                                          .Flags = CurrentFlags,
                                                          .Prot = CurrentProt});

      if (!Inserted) {
        // We can't recover from this.
        // Shouldn't ever happen.
        ERROR_AND_DIE_FMT("{}:{}: VMA tracking error", __func__, __LINE__);
      }

      if (Current->Resource) {
        ListInsertAfter(Current, &Iter->second);
      }
    }
  } else {
    ERROR_AND_DIE_FMT("Unexpected {} Merge strategy! [0x{:x}, 0x{:x}) Versus [0x{:x}, 0x{:x})\n", __func__, CurrentBase, CurrentTop, Base, Top);
  }
}

// This matches the peculiarities algorithm used in linux ksys_shmdt (linux kernel 5.16, ipc/shm.c)
uintptr_t VMATracking::DeleteSHMRegion(FEXCore::Context::Context* CTX, uintptr_t Base) {

  // Find first VMA at or after Base
  // Iterate until first SHM VMA, with matching offset, get length
  // Then, erase any later occurrences of this SHM

  // returns first element that is greater or equal or ::end
  auto Entry = VMAs.lower_bound(Base);

  for (; Entry != VMAs.end(); ++Entry) {
    LOGMAN_THROW_A_FMT(Entry->second.Base >= Base, "VMA tracking corruption");
    if (Entry->second.Base - Base == Entry->second.Offset && Entry->second.Resource &&
        Entry->second.Resource->Iterator->first.dev == SpecialDev::SHM) {
      break;
    }
  }

  if (Entry == VMAs.end()) {
    return 0;
  }

  const auto ShmLength = Entry->second.Resource->Iterator->second.Length;
  const auto Resource = Entry->second.Resource;

  do {
    if (Entry->second.Resource == Resource) {
      if (ListRemove(&Entry->second)) {
        MappedResources.erase(Entry->second.Resource->Iterator);
      }
      Entry = VMAs.erase(Entry);
    } else {
      Entry++;
    }
  } while (Entry != VMAs.end() && (Entry->second.Base + Entry->second.Length - Base) <= ShmLength);

  return ShmLength;
}
} // namespace FEX::HLE::VMATracking


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/SyscallsVMATracking.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>
#include <tuple>

#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/Utils/SignalScopeGuards.h>

#include <elf.h>

namespace FEX::HLE::VMATracking {
///// VMA (Virtual Memory Area) tracking /////

namespace SpecialDev {
  static constexpr uint64_t Anon = 0x1'0000'0000; // Anonymous shared mapping, id is incrementing allocation number
  static constexpr uint64_t SHM = 0x2'0000'0000;  // sys-v shm, id is shmid
}; // namespace SpecialDev

// Memory Resource ID
// An id that can be used to identify when shared mappings actually have the same backing storage
// when dev != SpecialDev::Anon, this is unique system wide
struct MRID {
  uint64_t dev; // kernel dev_t is actually 32-bits, we use the extra bits to track SpecialDevs
  uint64_t id;

  bool operator<(const MRID& other) const {
    return std::tie(dev, id) < std::tie(other.dev, other.id);
  }
};

struct VMAEntry;

/**
 * Meta data associated to one system resource.
 *
 * Typically there is one instance of this type per ELF/PE file or special device.
 * However if an ELF/PE file is mapped multiple times at different base addresses,
 * there will be one separate MappedResource for each base address. The MRID
 * is the same in this case.
 */
struct MappedResource {
  using ContainerType = fextl::multimap<MRID, MappedResource>;

  fextl::unique_ptr<FEXCore::ExecutableFileInfo> MappedFile;
  // Pointer to lowest memory range this file is mapped to
  VMAEntry* FirstVMA;
  uint64_t Length; // 0 if not fixed size
  ContainerType::iterator Iterator;

  bool RequiresDelayedCacheLoad = false;
  fextl::vector<Elf64_Phdr> ProgramHeaders;
};

union VMAProt {
  struct {
    bool Readable   : 1;
    bool Writable   : 1;
    bool Executable : 1;
  };
  uint8_t All : 3;

  static VMAProt fromProt(int Prot);
  static VMAProt fromSHM(int SHMFlg);
};

struct VMAFlags {
  bool Shared : 1;

  static VMAFlags fromFlags(int Flags);
};

struct VMAEntry {
  MappedResource* Resource;

  // these are for intrusive linked list tracking, starting from Resource->FirstVMA and ordered by address
  VMAEntry* ResourcePrevVMA;
  VMAEntry* ResourceNextVMA;

  uint64_t Base;
  uint64_t Offset;
  uint64_t Length;

  VMAFlags Flags;
  VMAProt Prot;
};

struct VMATracking {
  // Held while reading/writing this struct
  FEXCore::ForkableSharedMutex Mutex;

  // Memory ranges indexed by page aligned starting address
  fextl::map<uint64_t, VMAEntry> VMAs;

  using VMACIterator = decltype(VMAs)::const_iterator;

  // Find a VMA entry associated with the memory address.
  // Used by `mremap` and SIGSEGV handler to find previously mapped ranges, and CodeCache to find cache entries.
  // - Mutex must be at least shared_locked before calling
  VMACIterator FindVMAEntry(uint64_t GuestAddr) const;

  // Adds a new VMA Range to be tracked, along with a `MappedResource` associated with that VMA range.
  // Primarily matches `mmap` semantics, but also used by `mremap`, and `shmat`, as they all can add new VMA ranges to be tracked.
  // - Mutex must be unique_locked before calling
  void TrackVMARange(FEXCore::Context::Context* Ctx, MappedResource* MappedResource, uintptr_t Base, uintptr_t Offset, uintptr_t Length,
                     VMAFlags Flags, VMAProt Prot);

  // Deletes a VMA range provided from tracking.
  // Matches `munmap` semantics, and `mremap` with `MREMAP_DONTUNMAP` flag set.
  // Deletes internal `MappedResource` that correlates with the range **unless** it matches `PreservedMappedResource`
  // - Mutex must be unique_locked before calling
  void DeleteVMARange(FEXCore::Context::Context* Ctx, uintptr_t Base, uintptr_t Length, MappedResource* PreservedMappedResource = nullptr);

  // Changes the protections tracking for the VMA range provided.
  // Matches `mprotect` semantics.
  // - Mutex must be unique_locked before calling
  void ChangeProtectionFlags(uintptr_t Base, uintptr_t Length, VMAProt Prot);

  // Deletes the SHM region mapped at Base from tracking.
  // Matches `shmdt` semantics.
  // - Mutex must be unique_locked before calling
  // Returns the Size of the Shm or 0 if not found
  uintptr_t DeleteSHMRegion(FEXCore::Context::Context* Ctx, uintptr_t Base);

  // Adds a new `MappedResource` to track.
  inline auto InsertMappedResource(const MRID& mrid, MappedResource Resource) {
    return MappedResources.emplace(mrid, std::move(Resource));
  }

  // Returns an iterator pair spanning the range of all MappedResources matching the given MRID.
  // Typically there is only one associated resource, however sometimes the same file gets mapped
  // multiple times at different base addresses. In that case, each MappedResource will cover an
  // exclusive set of VMAEntries that refer to a consistent base mapping address.
  inline auto FindResources(const MRID& mrid) {
    return MappedResources.equal_range(mrid);
  }

private:
  MappedResource::ContainerType MappedResources;
};


} // namespace FEX::HLE::VMATracking


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.cpp
================================================
// SPDX-License-Identifier: MIT

#include "LinuxSyscalls/ThreadManager.h"

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Seccomp/SeccompEmulator.h"

#include <FEXHeaderUtils/Syscalls.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/fextl/fmt.h>

#include <sys/mman.h>
#include <sys/personality.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <git_version.h>

namespace FEX::HLE {

ThreadManager::StatAlloc::StatAlloc() {
  Initialize();
  SaveHeader(Is64BitMode() ? FEXCore::SHMStats::AppType::LINUX_64 : FEXCore::SHMStats::AppType::LINUX_32);
}

void ThreadManager::StatAlloc::Initialize() {
  if (!ProfileStats()) {
    return;
  }

  int fd = shm_open(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str(), O_CREAT | O_TRUNC | O_RDWR, USER_PERMS);
  if (fd == -1) {
    return;
  }
  CurrentSize = sysconf(_SC_PAGESIZE);
  CurrentSize = CurrentSize > 0 ? CurrentSize : FEXCore::Utils::FEX_PAGE_SIZE;

  if (ftruncate(fd, CurrentSize) == -1) {
    LogMan::Msg::EFmt("[StatAlloc] ftruncate failed");
    goto err;
  }

  // Reserve a region of MAX_STATS_SIZE so we can grow the allocation buffer.
  // Number of thread slots when ThreadStatsHeader == 64bytes and ThreadStats == 40bytes:
  // 1 page: 99 slots
  // 1 MB: 26211 slots
  // 128 MB: 3355440 slots
  Base = FEXCore::Allocator::mmap(nullptr, MAX_STATS_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
  if (Base == MAP_FAILED) {
    LogMan::Msg::EFmt("[StatAlloc] mmap base failed");
    Base = nullptr;
    goto err;
  }

  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(Base), MAX_STATS_SIZE);

  // Allocate a small working shared space for now, grow as necessary.
  {
    auto SharedBase = FEXCore::Allocator::mmap(Base, CurrentSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
    if (SharedBase == MAP_FAILED) {
      LogMan::Msg::EFmt("[StatAlloc] mmap shm failed");
      FEXCore::Allocator::munmap(Base, MAX_STATS_SIZE);
      Base = nullptr;
      goto err;
    }
  }

err:
  close(fd);
}

uint32_t ThreadManager::StatAlloc::FrontendAllocateSlots(uint32_t NewSize) {
  if (CurrentSize == MAX_STATS_SIZE) {
    // Allocator has reached maximum slots. We can't allocate anymore.
    // New threads won't get stats.
    return CurrentSize;
  }
  NewSize = std::min(MAX_STATS_SIZE, NewSize);

  // When allocating more slots, open the fd without O_TRUNC | O_CREAT.
  int fd = shm_open(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str(), O_RDWR, USER_PERMS);
  if (fd == -1) {
    return CurrentSize;
  }

  if (ftruncate(fd, NewSize) == -1) {
    LogMan::Msg::EFmt("[StatAlloc] ftruncate more failed");

    goto err;
  }

  {
    auto SharedBase = FEXCore::Allocator::mmap(Base, NewSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, fd, 0);
    if (SharedBase == MAP_FAILED) {
      LogMan::Msg::EFmt("[StatAlloc] allocate more mmap shm failed");
      goto err;
    }
  }

err:
  close(fd);
  return NewSize;
}

FEXCore::SHMStats::ThreadStats* ThreadManager::StatAlloc::AllocateSlot(uint32_t TID) {
  std::scoped_lock lk(StatMutex);
  return StatAllocBase::AllocateSlot(TID);
}

void ThreadManager::StatAlloc::DeallocateSlot(FEXCore::SHMStats::ThreadStats* AllocatedSlot) {
  if (!AllocatedSlot) {
    return;
  }

  std::scoped_lock lk(StatMutex);
  StatAllocBase::DeallocateSlot(AllocatedSlot);
}

void ThreadManager::StatAlloc::CleanupForExit() {
  shm_unlink(fextl::fmt::format("fex-{}-stats", ::getpid()).c_str());
}

void ThreadManager::StatAlloc::LockBeforeFork() {
  if (!ProfileStats()) {
    return;
  }
  StatMutex.lock();
}

void ThreadManager::StatAlloc::UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child) {
  if (!ProfileStats()) {
    return;
  }

  if (!Child) {
    StatMutex.unlock();
    return;
  }

  StatMutex.StealAndDropActiveLocks();

  // shm_memory ownership is retained by the parent process, so the child must replace it with its own one.
  // Otherwise this process will keep reporting in the original parent thread's stats region.
  FEXCore::Allocator::munmap(Base, MAX_STATS_SIZE);
  Base = nullptr;
  CurrentSize = 0;
  Head = nullptr;
  Stats = nullptr;
  StatTail = nullptr;
  RemainingSlots = 0;

  Thread->ThreadStats = nullptr;

  Initialize();
  SaveHeader(Is64BitMode() ? FEXCore::SHMStats::AppType::LINUX_64 : FEXCore::SHMStats::AppType::LINUX_32);

  // Update this thread's ThreadStats object
  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromFEXCoreThread(Thread);
  ThreadObject->Thread->ThreadStats = AllocateSlot(ThreadObject->ThreadInfo.TID);
}

uint64_t ThreadManager::SetSignalMask(uint64_t Mask) {
  ::syscall(SYSCALL_DEF(rt_sigprocmask), SIG_SETMASK, &Mask, &Mask, 8);
  return Mask;
}

void ThreadManager::SetThreadName(const char* name) {
  pthread_setname_np(pthread_self(), name);
}

constexpr size_t CALLRET_STACK_ALLOC_SIZE = FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE + 2 * FEXCore::Utils::FEX_PAGE_SIZE;

FEX::HLE::ThreadStateObject* ThreadManager::CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState,
                                                         uint64_t ParentTID, FEX::HLE::ThreadStateObject* InheritThread) {
  auto ThreadStateObject = new FEX::HLE::ThreadStateObject;

  ThreadStateObject->ThreadInfo.parent_tid = ParentTID;
  ThreadStateObject->ThreadInfo.PID = ::getpid();

  ThreadStateObject->ThreadInfo.TID = FHU::Syscalls::gettid();

  ThreadStateObject->Thread = CTX->CreateThread(InitialRIP, StackPointer, NewThreadState);
  auto Frame = ThreadStateObject->Thread->CurrentFrame;

  // Allocate the call-ret stack with guard pages on both sides
  auto AllocBase =
    reinterpret_cast<uint64_t>(FEXCore::Allocator::mmap(nullptr, CALLRET_STACK_ALLOC_SIZE, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));

  FEXCore::Allocator::VirtualName("FEXMem_CallRetStacks", reinterpret_cast<void*>(AllocBase), CALLRET_STACK_ALLOC_SIZE);

  // Disable HUGEPAGE on callret stacks.
  FEXCore::Allocator::VirtualTHPControl(reinterpret_cast<void*>(AllocBase), CALLRET_STACK_ALLOC_SIZE, FEXCore::Allocator::THPControl::Disable);

  // Set the base used for invalidation to the start past the guard pages
  ThreadStateObject->Thread->CallRetStackBase = reinterpret_cast<void*>(AllocBase + FEXCore::Utils::FEX_PAGE_SIZE);
  ::mprotect(ThreadStateObject->Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE, PROT_READ | PROT_WRITE);
  Frame->State.callret_sp = ThreadStateObject->GetCallRetStackInfo().DefaultLocation;

  ThreadStateObject->Thread->FrontendPtr = ThreadStateObject;
  if (ProfileStats()) {
    ThreadStateObject->Thread->ThreadStats = Stat.AllocateSlot(ThreadStateObject->ThreadInfo.TID);
  }

  // GDT and LDT are tracked per thread.
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &ThreadStateObject->gdt[0];
  // Mirror LDT to the GDT by default. Not technically correctly, but fixes crashes in unittests.
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &ThreadStateObject->gdt[0];

  if (InheritThread) {
    // If we are inheriting thread data then we inherit both the gdt and ldt arrays.
    // They are then forked from the parent thread.
    static_assert(sizeof(ThreadStateObject->gdt) == (8 * 32));
    memcpy(ThreadStateObject->gdt, InheritThread->gdt, sizeof(ThreadStateObject->gdt));
    if (InheritThread->ldt_entry_count) {
      const auto new_ldt_size = InheritThread->ldt_entry_count * FEX::HLE::SyscallHandler::LDT_ENTRY_SIZE;
      ThreadStateObject->ldt_entries = reinterpret_cast<FEXCore::Core::CPUState::gdt_segment*>(
        FEXCore::Allocator::mmap(nullptr, new_ldt_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
      FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(ThreadStateObject->ldt_entries), new_ldt_size);

      ThreadStateObject->ldt_entry_count = InheritThread->ldt_entry_count;
      memcpy(ThreadStateObject->ldt_entries, InheritThread->ldt_entries, new_ldt_size);
    }
  } else {
    // Without any thread data to inherit, setup the default gdt.
    // Default code segment indexes match the numbers that the Linux kernel uses.
    Frame->State.cs_idx = FEXCore::Core::CPUState::DEFAULT_USER_CS << 3;
    auto GDT = FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx);
    FEXCore::Core::CPUState::SetGDTBase(GDT, 0);
    FEXCore::Core::CPUState::SetGDTLimit(GDT, 0xF'FFFFU);
    Frame->State.cs_cached =
      FEXCore::Core::CPUState::CalculateGDTBase(*FEXCore::Core::CPUState::GetSegmentFromIndex(Frame->State, Frame->State.cs_idx));

    if (Is64BitMode()) {
      GDT->L = 1; // L = Long Mode = 64-bit
      GDT->D = 0; // D = Default Operand SIze = Reserved
    } else {
      GDT->L = 0; // L = Long Mode = 32-bit
      GDT->D = 1; // D = Default Operand Size = 32-bit
    }
  }

  if (InheritThread) {
    FEX::HLE::_SyscallHandler->SeccompEmulator.InheritSeccompFilters(InheritThread, ThreadStateObject);
    ThreadStateObject->persona = InheritThread->persona;
  } else {
    ThreadStateObject->persona = ::personality(0xffffffff);
  }

  ++IdleWaitRefCount;
  return ThreadStateObject;
}

void ThreadManager::DestroyThread(FEX::HLE::ThreadStateObject* Thread, bool NeedsTLSUninstall) {
  {
    std::lock_guard lk(ThreadCreationMutex);
    auto It = std::find(Threads.begin(), Threads.end(), Thread);
    LOGMAN_THROW_A_FMT(It != Threads.end(), "Thread wasn't in Threads");
    Threads.erase(It);
    if (Threads.empty()) {
      Thread->Thread->CTX->FlushAndCloseCodeMap();
    }
  }

  Stat.DeallocateSlot(Thread->Thread->ThreadStats);

  HandleThreadDeletion(Thread, NeedsTLSUninstall);
}

void ThreadManager::StopThread(FEX::HLE::ThreadStateObject* Thread) {
  SignalDelegation->SignalThread(Thread->Thread, SignalEvent::Stop);
}

void ThreadManager::HandleThreadDeletion(FEX::HLE::ThreadStateObject* Thread, bool NeedsTLSUninstall) {
  if (Thread->ExecutionThread) {
    if (Thread->ExecutionThread->joinable()) {
      Thread->ExecutionThread->join(nullptr);
    }

    if (Thread->ExecutionThread->IsSelf()) {
      Thread->ExecutionThread->detach();
    }
  }

  if (NeedsTLSUninstall) {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    // Sanity check. This can only be called from the owning thread.
    {
      const auto pid = ::getpid();
      const auto tid = FHU::Syscalls::gettid();
      LOGMAN_THROW_A_FMT(Thread->ThreadInfo.PID == pid && Thread->ThreadInfo.TID == tid, "Can't delete TLS data from a different thread!");
    }
#endif
    FEXCore::Allocator::UninstallTLSData(Thread->Thread);
  }

  // Free the call-ret stack
  FEXCore::Allocator::munmap(reinterpret_cast<void*>(Thread->GetCallRetStackInfo().AllocationBase), CALLRET_STACK_ALLOC_SIZE);

  // If the LDT segment exists then deallocate it.
  if (Thread->ldt_entry_count) {
    FEXCore::Allocator::munmap(Thread->ldt_entries, Thread->ldt_entry_count * FEX::HLE::SyscallHandler::LDT_ENTRY_SIZE);
  }

  CTX->DestroyThread(Thread->Thread);
  FEX::HLE::_SyscallHandler->SeccompEmulator.FreeSeccompFilters(Thread);

  delete Thread;
  --IdleWaitRefCount;
  IdleWaitCV.notify_all();
}

void ThreadManager::NotifyPause() {
  // Tell all the threads that they should pause
  std::lock_guard lk(ThreadCreationMutex);
  for (auto& Thread : Threads) {
    SignalDelegation->SignalThread(Thread->Thread, SignalEvent::Pause);
  }
}

void ThreadManager::Pause() {
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  // Sanity check. This can't be called from an emulation thread.
  {
    const auto pid = ::getpid();
    const auto tid = FHU::Syscalls::gettid();
    std::lock_guard lk(ThreadCreationMutex);
    for (auto& Thread : Threads) {
      LOGMAN_THROW_A_FMT(!(Thread->ThreadInfo.PID == pid && Thread->ThreadInfo.TID == tid), "Can't put threads to sleep from inside "
                                                                                            "emulation thread!");
    }
  }
#endif
  NotifyPause();
  WaitForIdle();
}

void ThreadManager::Run() {
  // Spin up all the threads
  std::lock_guard lk(ThreadCreationMutex);
  for (auto& Thread : Threads) {
    Thread->SignalReason.store(SignalEvent::Return);
  }
}

void ThreadManager::WaitForIdleWithTimeout() {
  std::unique_lock<std::mutex> lk(IdleWaitMutex);
  bool WaitResult = IdleWaitCV.wait_for(lk, std::chrono::milliseconds(1500), [this] { return IdleWaitRefCount.load() == 0; });

  if (!WaitResult) {
    // The wait failed, this will occur if we stepped in to a syscall
    // That's okay, we just need to pause the threads manually
    NotifyPause();
  }

  // We have sent every thread a pause signal
  // Now wait again because they /will/ be going to sleep
  WaitForIdle();
}

void ThreadManager::WaitForThreadsToRun() {
  size_t NumThreads {};
  {
    std::lock_guard lk(ThreadCreationMutex);
    NumThreads = Threads.size();
  }

  // Spin while waiting for the threads to start up
  std::unique_lock<std::mutex> lk(IdleWaitMutex);
  IdleWaitCV.wait(lk, [this, NumThreads] { return IdleWaitRefCount.load() >= NumThreads; });

  Running = true;
}

void ThreadManager::Step() {
  LogMan::Msg::AFmt("ThreadManager::Step currently not implemented");
  {
    std::lock_guard lk(ThreadCreationMutex);
    // Walk the threads and tell them to clear their caches
    // Useful when our block size is set to a large number and we need to step a single instruction
    for (auto& Thread : Threads) {
      CTX->ClearCodeCache(Thread->Thread, false);
    }
  }

  // TODO: Set to single step mode.
  Run();
  WaitForThreadsToRun();
  WaitForIdle();
  // TODO: Set back to full running mode.
}

void ThreadManager::Stop(bool IgnoreCurrentThread) {
  pid_t tid = FHU::Syscalls::gettid();
  FEX::HLE::ThreadStateObject* CurrentThread {};

  // Tell all the threads that they should stop
  {
    std::lock_guard lk(ThreadCreationMutex);
    for (auto& Thread : Threads) {
      if (IgnoreCurrentThread && Thread->ThreadInfo.TID == tid) {
        // If we are callign stop from the current thread then we can ignore sending signals to this thread
        // This means that this thread is already gone
      } else if (Thread->ThreadInfo.TID == tid) {
        // We need to save the current thread for last to ensure all threads receive their stop signals
        CurrentThread = Thread;
        continue;
      }

      StopThread(Thread);
    }
  }

  // Stop the current thread now if we aren't ignoring it
  if (CurrentThread) {
    StopThread(CurrentThread);
  }
}

void ThreadManager::SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame) {
  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
  // Sanity check. This can only be called from the owning thread.
  {
    const auto pid = ::getpid();
    const auto tid = FHU::Syscalls::gettid();
    LOGMAN_THROW_A_FMT(ThreadObject->ThreadInfo.PID == pid && ThreadObject->ThreadInfo.TID == tid, "Can't delete TLS data from a different "
                                                                                                   "thread!");
  }
#endif

  --IdleWaitRefCount;
  IdleWaitCV.notify_all();

  ThreadObject->ThreadSleeping = true;

  // Go to sleep
  ThreadObject->ThreadPaused.Wait();

  ++IdleWaitRefCount;
  ThreadObject->ThreadSleeping = false;

  IdleWaitCV.notify_all();
}

void ThreadManager::UnpauseThread(FEX::HLE::ThreadStateObject* Thread) {
  Thread->ThreadPaused.NotifyOne();
}

void ThreadManager::LockBeforeFork() {
  Stat.LockBeforeFork();
}

void ThreadManager::UnlockAfterFork(FEXCore::Core::InternalThreadState* LiveThread, bool Child) {
  Stat.UnlockAfterFork(LiveThread, Child);
  if (!Child) {
    return;
  }

  // This function is called after fork
  // We need to cleanup some of the thread data that is dead
  for (auto& DeadThread : Threads) {
    // The fork parent retains ownership of ThreadStats
    DeadThread->Thread->ThreadStats = nullptr;

    if (DeadThread->Thread == LiveThread) {
      continue;
    }

    // Despite what google searches may susgest, glibc actually has special code to handle forks
    // with multiple active threads.
    // It cleans up the stacks of dead threads and marks them as terminated.
    // It also cleans up a bunch of internal mutexes.

    // FIXME: TLS is probally still alive. Investigate

    // Deconstructing the Interneal thread state should clean up most of the state.
    // But if anything on the now deleted stack is holding a refrence to the heap, it will be leaked
    CTX->DestroyThread(DeadThread->Thread);
    delete DeadThread;

    // FIXME: Make sure sure nothing gets leaked via the heap. Ideas:
    //         * Make sure nothing is allocated on the heap without ref in InternalThreadState
    //         * Surround any code that heap allocates with a per-thread mutex.
    //           Before forking, the the forking thread can lock all thread mutexes.
  }

  // Remove all threads but the live thread from Threads
  Threads.clear();

  auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(LiveThread->CurrentFrame);
  Threads.push_back(ThreadObject);

  // Clean up dead stacks
  FEXCore::Threads::Thread::CleanupAfterFork();

  // We now only have one thread.
  IdleWaitRefCount = 1;
  ThreadCreationMutex.StealAndDropActiveLocks();
}

void ThreadManager::WaitForIdle() {
  std::unique_lock<std::mutex> lk(IdleWaitMutex);
  IdleWaitCV.wait(lk, [this] { return IdleWaitRefCount.load() == 0; });

  Running = false;
}

ThreadManager::~ThreadManager() {
  std::lock_guard lk(ThreadCreationMutex);

  for (auto& Thread : Threads) {
    HandleThreadDeletion(Thread);
  }
  Threads.clear();
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/ThreadManager.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|ThreadManager
desc: Frontend thread management
$end_info$
*/

#pragma once

#include "Common/SHMStats.h"

#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x32/IoctlEmulation.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/fextl/vector.h>
#include <FEXCore/Utils/InterruptableConditionVariable.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/TypeDefines.h>

#include <atomic>
#include <condition_variable>
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <optional>
#include <sys/stat.h>

#include <bits/types/sigset_t.h>
#include <linux/seccomp.h>

namespace FEX::HLE {
class SignalDelegator;
class SyscallHandler;
struct SeccompFilterInfo;

enum class SignalEvent : uint32_t {
  Nothing, // If the guest uses our signal we need to know it was errant on our end
  Pause,
  Stop,
  Return,
  ReturnRT,
};

struct ThreadStateObject : public FEXCore::Allocator::FEXAllocOperators {
  struct DeferredSignalState {
    siginfo_t Info;
    int Signal;
    uint64_t SigMask;
  };

  FEXCore::Core::InternalThreadState* Thread;

  struct {
    uint32_t parent_tid;
    uint32_t PID;
    std::atomic<uint32_t> TID;
    int32_t* set_child_tid {0};
    int32_t* clear_child_tid {0};
    uint64_t robust_list_head {0};
  } ThreadInfo {};

  struct {
    SignalDelegator* Delegator {};

    void* AltStackPtr {};
    stack_t GuestAltStack {
      .ss_sp = nullptr,
      .ss_flags = SS_DISABLE, // By default the guest alt stack is disabled
      .ss_size = 0,
    };
    // This is the thread's current signal mask
    FEX::HLE::GuestSAMask CurrentSignalMask {};
    // The mask prior to a suspend
    FEX::HLE::GuestSAMask PreviousSuspendMask {};

    uint64_t PendingSignals {};

    // Queue of thread local signal frames that have been deferred.
    // Async signals aren't guaranteed to be delivered in any particular order, but FEX treats them as FILO.
    fextl::vector<DeferredSignalState> DeferredSignalFrames;
  } SignalInfo {};

  // Seccomp thread specific data.
  uint32_t SeccompMode {SECCOMP_MODE_DISABLED};
  fextl::vector<FEX::HLE::SeccompFilterInfo*> Filters {};

  // personality emulation.
  uint32_t persona {};

  FEXCore::Core::NonMovableUniquePtr<FEXCore::Threads::Thread> ExecutionThread;

  // Thread signaling information
  std::atomic<SignalEvent> SignalReason {SignalEvent::Nothing};

  // Thread pause handling
  std::atomic_bool ThreadSleeping {false};
  FEXCore::InterruptableConditionVariable ThreadPaused;

  // GDB signal information
  struct GdbInfoStruct {
    int Signal {};
  };
  std::optional<GdbInfoStruct> GdbInfo;

  int StatusCode {};

  struct CallRetStackInfo {
    uint64_t AllocationBase;
    uint64_t AllocationEnd;
    uint64_t DefaultLocation;
  };

  CallRetStackInfo GetCallRetStackInfo() {
    uint64_t Base = reinterpret_cast<uint64_t>(Thread->CallRetStackBase);
    // Leave some room from the base for the default location to allow for underflows without constant exceptions
    return {Base - FEXCore::Utils::FEX_PAGE_SIZE, Base + FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE + FEXCore::Utils::FEX_PAGE_SIZE,
            Base + FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE / 4};
  }

  // GDT and LDT tracking
  FEXCore::Core::CPUState::gdt_segment gdt[32] {};
  size_t ldt_entry_count {};
  FEXCore::Core::CPUState::gdt_segment* ldt_entries {};

  // 32-bit FD cache for DRM handlers.
  fextl::unique_ptr<x32::DRMLRUCacheFDCache> DRMLRUCache {};
};

class ThreadManager final {
public:

  ThreadManager(FEXCore::Context::Context* CTX, FEX::HLE::SignalDelegator* SignalDelegation)
    : CTX {CTX}
    , SignalDelegation {SignalDelegation} {}

  ~ThreadManager();

  class StatAlloc final : public FEX::SHMStats::StatAllocBase {
  public:
    StatAlloc();

    void LockBeforeFork();
    void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child);

    void CleanupForExit();

    FEXCore::SHMStats::ThreadStats* AllocateSlot(uint32_t TID);
    void DeallocateSlot(FEXCore::SHMStats::ThreadStats* AllocatedSlot);

  private:
    void Initialize();

    uint32_t FrontendAllocateSlots(uint32_t NewSize) override;
    FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
    FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);

    constexpr static int USER_PERMS = S_IRWXU | S_IRWXG | S_IRWXO;
    FEXCore::ForkableUniqueMutex StatMutex;
  };

  void CleanupForExit() {
    Stat.CleanupForExit();
  }

  /**
   * @brief Sets the calling thread's signal mask to the one provided
   *
   * @param Mask The new 64-bit signal mask to set
   *
   * @return The previous signal mask
   */
  static uint64_t SetSignalMask(uint64_t Mask);
  static void SetThreadName(const char* name);

  ///< Returns the ThreadStateObject from a CpuStateFrame object.
  static inline FEX::HLE::ThreadStateObject* GetStateObjectFromCPUState(FEXCore::Core::CpuStateFrame* Frame) {
    return static_cast<FEX::HLE::ThreadStateObject*>(Frame->Thread->FrontendPtr);
  }

  static inline FEX::HLE::ThreadStateObject* GetStateObjectFromFEXCoreThread(FEXCore::Core::InternalThreadState* Thread) {
    return static_cast<FEX::HLE::ThreadStateObject*>(Thread->FrontendPtr);
  }

  FEX::HLE::ThreadStateObject* CreateThread(uint64_t InitialRIP, uint64_t StackPointer, const FEXCore::Core::CPUState* NewThreadState = nullptr,
                                            uint64_t ParentTID = 0, FEX::HLE::ThreadStateObject* InheritThread = nullptr);
  void TrackThread(FEX::HLE::ThreadStateObject* Thread) {
    std::lock_guard lk(ThreadCreationMutex);
    Threads.emplace_back(Thread);
  }

  void DestroyThread(FEX::HLE::ThreadStateObject* Thread, bool NeedsTLSUninstall = false);
  void StopThread(FEX::HLE::ThreadStateObject* Thread);
  void UnpauseThread(FEX::HLE::ThreadStateObject* Thread);

  void Pause();
  void Run();
  void Step();
  void Stop(bool IgnoreCurrentThread = false);

  void WaitForIdle();
  void WaitForIdleWithTimeout();
  void WaitForThreadsToRun();

  void SleepThread(FEXCore::Context::Context* CTX, FEXCore::Core::CpuStateFrame* Frame);

  void LockBeforeFork();
  void UnlockAfterFork(FEXCore::Core::InternalThreadState* Thread, bool Child);

  void IncrementIdleRefCount() {
    ++IdleWaitRefCount;
  }

  void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* CallingThread, uint64_t Start, uint64_t Length) {
    std::lock_guard lk(ThreadCreationMutex);

    // Potential deferred since Thread might not be valid.
    // Thread object isn't valid very early in frontend's initialization.
    // To be more optimal the frontend should provide this code with a valid Thread object earlier.
    auto CodeInvalidationlk = GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), CallingThread);
    CTX->InvalidateCodeBuffersCodeRange(Start, Length);
    for (auto& Thread : Threads) {
      CTX->InvalidateThreadCachedCodeRange(Thread->Thread, Start, Length);
    }
  }

  void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* CallingThread, uint64_t Start, uint64_t Length,
                                FEXCore::Context::CodeRangeInvalidationFn after_callback) {
    std::lock_guard lk(ThreadCreationMutex);

    // Potential deferred since Thread might not be valid.
    // Thread object isn't valid very early in frontend's initialization.
    // To be more optimal the frontend should provide this code with a valid Thread object earlier.
    auto CodeInvalidationlk = GuardSignalDeferringSectionWithFallback(CTX->GetCodeInvalidationMutex(), CallingThread);
    CTX->InvalidateCodeBuffersCodeRange(Start, Length);
    for (auto& Thread : Threads) {
      CTX->InvalidateThreadCachedCodeRange(Thread->Thread, Start, Length);
    }

    // Callback while holding the locks.
    after_callback(Start, Length);
  }

  const fextl::vector<FEX::HLE::ThreadStateObject*>* GetThreads() const {
    return &Threads;
  }

private:
  StatAlloc Stat;
  FEXCore::Context::Context* CTX;
  FEX::HLE::SignalDelegator* SignalDelegation;

  FEXCore::ForkableUniqueMutex ThreadCreationMutex;
  fextl::vector<FEX::HLE::ThreadStateObject*> Threads;

  // Thread idling support.
  bool Running {};
  std::mutex IdleWaitMutex;
  std::condition_variable IdleWaitCV;
  std::atomic<uint32_t> IdleWaitRefCount {};

  void HandleThreadDeletion(FEX::HLE::ThreadStateObject* Thread, bool NeedsTLSUninstall = false);
  void NotifyPause();
  FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
};

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Types.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <algorithm>
#include <signal.h>
#include <sys/epoll.h>
#include <type_traits>

namespace FEX::HLE {
using key_serial_t = int32_t;
using kernel_timer_t = int32_t;
using mqd_t = int32_t;

#ifndef GETPID
#define GETPID 11
#endif

#ifndef GETVAL
#define GETVAL 12
#endif

#ifndef GETALL
#define GETALL 13
#endif

#ifndef GETNCNT
#define GETNCNT 14
#endif

#ifndef GETZCNT
#define GETZCNT 15
#endif

#ifndef SETVAL
#define SETVAL 16
#endif

#ifndef SETALL
#define SETALL 17
#endif

#ifndef SEM_STAT
#define SEM_STAT 18
#endif

#ifndef SEM_INFO
#define SEM_INFO 19
#endif

#ifndef SEM_STAT_ANY
#define SEM_STAT_ANY 20
#endif

struct FEX_PACKED epoll_event_x86 {
  uint32_t events;
  epoll_data_t data;

  epoll_event_x86() = delete;

  operator struct epoll_event() const {
    epoll_event event {};
    event.events = events;
    event.data = data;
    return event;
  }

  epoll_event_x86(struct epoll_event event) {
    events = event.events;
    data = event.data;
  }
};
static_assert(std::is_trivially_copyable_v<epoll_event_x86>);
static_assert(sizeof(epoll_event_x86) == 12);

// This directly matches the Linux `struct seminfo` structure
// Due to the way this definition cyclic depends inside of includes, redefine it
// This works around some terrible compile errors on some platforms
struct fex_seminfo {
  int32_t semmap;
  int32_t semmni;
  int32_t semmns;
  int32_t semmnu;
  int32_t semmsl;
  int32_t semopm;
  int32_t semume;
  int32_t semusz;
  int32_t semvmx;
  int32_t semaem;
};

struct FEX_PACKED GuestSAMask {
  uint64_t Val;
};

struct FEX_PACKED GuestSigAction {
  union {
    void (*handler)(int);
    void (*sigaction)(int, siginfo_t*, void*);
  } sigaction_handler;

  uint64_t sa_flags;
  void (*restorer)(void);
  GuestSAMask sa_mask;
};

} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.cpp
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/Utils/Threads.h"
#include "LinuxSyscalls/Syscalls.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LongJump.h>
#include <FEXCore/Utils/Threads.h>

namespace FEX::LinuxEmulation::Threads {
void* StackTracker::AllocateStackObject() {
  std::lock_guard lk {DeadStackPoolMutex};
  // Keep the first item in the stack pool
  void* Ptr {};

  for (auto it = DeadStackPool.begin(); it != DeadStackPool.end();) {
    auto Ready = std::atomic_ref<bool>(it->ReadyToBeReaped);
    bool ReadyToBeReaped = Ready.load();
    if (Ptr == nullptr && ReadyToBeReaped) {
      Ptr = it->Ptr;
      it = DeadStackPool.erase(it);
      continue;
    }

    if (ReadyToBeReaped) {
      FEXCore::Allocator::munmap(it->Ptr, it->Size);
      it = DeadStackPool.erase(it);
      continue;
    }

    ++it;
  }

  if (Ptr == nullptr) {
    Ptr = FEXCore::Allocator::mmap(nullptr, FEX::LinuxEmulation::Threads::STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(Ptr), FEX::LinuxEmulation::Threads::STACK_SIZE);
  }

  return Ptr;
}

bool* StackTracker::AddStackToDeadPool(void* Ptr) {
  std::lock_guard lk {DeadStackPoolMutex};
  auto& it = DeadStackPool.emplace_back(DeadStackPoolItem {Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE, false});
  return &it.ReadyToBeReaped;
}

void StackTracker::AddStackToLivePool(void* Ptr) {
  std::lock_guard lk {LiveStackPoolMutex};
  LiveStackPool.emplace_back(StackPoolItem {Ptr, FEX::LinuxEmulation::Threads::STACK_SIZE});
}

void StackTracker::RemoveStackFromLivePool(void* Ptr) {
  std::lock_guard lk {LiveStackPoolMutex};
  for (auto it = LiveStackPool.begin(); it != LiveStackPool.end(); ++it) {
    if (it->Ptr == Ptr) {
      LiveStackPool.erase(it);
      return;
    }
  }
}

void StackTracker::CleanupAfterFork_PThread() {
  // We don't need to pull the mutex here
  // After a fork we are the only thread running
  // Just need to make sure not to delete our own stack
  uintptr_t StackLocation = reinterpret_cast<uintptr_t>(alloca(0));

  auto ClearStackPool = [StackLocation](auto& StackPool) {
    for (auto it = StackPool.begin(); it != StackPool.end();) {
      auto& Item = *it;
      uintptr_t ItemStack = reinterpret_cast<uintptr_t>(Item.Ptr);
      if (ItemStack <= StackLocation && (ItemStack + Item.Size) > StackLocation) {
        // This is our stack item, skip it
        ++it;
      } else {
        // Untracked stack. Clean it up
        FEXCore::Allocator::munmap(Item.Ptr, Item.Size);
        it = StackPool.erase(it);
      }
    }
  };

  // Clear both dead stacks and live stacks
  ClearStackPool(DeadStackPool);
  ClearStackPool(LiveStackPool);

  LogMan::Throw::AFmt((DeadStackPool.size() + LiveStackPool.size()) <= 1, "After fork we should only have zero or one tracked stacks!");
}

void StackTracker::Shutdown() {
  std::lock_guard lk {DeadStackPoolMutex};
  std::lock_guard lk2 {LiveStackPoolMutex};
  // Erase all the dead stack pools
  for (auto& Item : DeadStackPool) {
    FEXCore::Allocator::munmap(Item.Ptr, Item.Size);
  }

  // Now clean up any that are considered to still be live
  // We are in shutdown phase, everything in the process is dead
  for (auto& Item : LiveStackPool) {
    FEXCore::Allocator::munmap(Item.Ptr, Item.Size);
  }

  DeadStackPool.clear();
  LiveStackPool.clear();
}

void StackTracker::DeallocateStackObjectImmediately(void* Ptr) {
  if (Ptr) {
    RemoveStackFromLivePool(Ptr);
    auto ReadyToBeReaped = AddStackToDeadPool(Ptr);
    *ReadyToBeReaped = true;
  }
}

[[noreturn]]
void StackTracker::DeallocateStackObjectAndExit(void* Ptr, int Status) {
  if (Ptr) {
    RemoveStackFromLivePool(Ptr);
    auto ReadyToBeReaped = AddStackToDeadPool(Ptr);
    *ReadyToBeReaped = true;
  }

#ifdef ARCHITECTURE_arm64
  __asm volatile("mov x8, %[SyscallNum];"
                 "mov w0, %w[Result];"
                 "svc #0;" ::[SyscallNum] "i"(SYSCALL_DEF(exit)),
                 [Result] "r"(Status)
                 : "memory", "x0", "x8");
#else
  __asm volatile("mov %[Result], %%edi;"
                 "syscall;" ::"a"(SYSCALL_DEF(exit)),
                 [Result] "r"(Status)
                 : "memory", "rdi");
#endif
  FEX_UNREACHABLE;
}

#ifdef ARCHITECTURE_arm64
__attribute__((naked)) void StackPivotAndCall(void* Arg, FEXCore::Threads::ThreadFunc Func, uint64_t StackPivot) {
  // x0: Arg
  // x1: Function to call
  // x2: StackPivot
  __asm volatile(R"(
    // Stack pivot.
    mov x3, sp;
    mov sp, x2;

    // Store stack storage location on to current stack
    stp x3, lr, [sp, -16]!;

    // x0 already has argument to pass.
    blr x1

    // Reload stack storage location
    ldp x2, lr, [sp], 16;

    // Stack pivot back
    mov sp, x2;

    ret;
    )" ::
                   : "memory");
}
#else
__attribute__((naked)) void StackPivotAndCall(void* Arg, FEXCore::Threads::ThreadFunc Func, uint64_t StackPivot) {
  // rdi: Arg
  // rsi: Function to call
  // rdx: StackPivot
  __asm volatile(R"(
    // Copy original stack in to RSP.
    movq %%rsp, %%rcx;

    // Store original stack on new stack
    pushq %%rcx;

    // Store stack pivot on new stack.
    pushq %%rdx;

    // rdi already contains function argument.
    callq *%%rsi;

    // Restore original stack
    popq %%rsp;

    ret;

    )" ::
                   : "memory");
}
#endif
namespace PThreads {
  void* InitializeThread(void* Ptr);

  class PThread final : public FEXCore::Threads::Thread {
  public:
    PThread(StackTracker* STracker, FEXCore::Threads::ThreadFunc Func, void* Arg)
      : STracker {STracker}
      , UserFunc {Func}
      , UserArg {Arg} {
      pthread_attr_t Attr {};
      Stack = STracker->AllocateStackObject();
      // pthreads allocates its dtv region behind our back and there is nothing we can do about it.
      FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator glibc;
      STracker->AddStackToLivePool(Stack);
      pthread_attr_init(&Attr);
      // Allocate a minimum size stack through pthreads, then stack pivot to FEX's allocated stack.
      // This is required due to a race condition with pthread's DTV/TLS regions when a stack is reused before pthreads deletes that thread's
      // DTV/TLS regions.
      // This can be seen as a crash when running Steam fairly easily, but is very confusing when debugging.
      // The cause of this race condition is from glibc associating a DTV/TLS region with a stack region until the kernel clears the
      // `set_tid_address` address construct. If the stack is reused before the address is set to zero, then glibc won't initialize the new thread's
      // DTV/TLS region, resulting in TLS usage crashing.
      pthread_attr_setstacksize(&Attr, PTHREAD_STACK_MIN);
      pthread_create(&Thread, &Attr, InitializeThread, this);
      pthread_attr_destroy(&Attr);
    }

    bool joinable() override {
      pthread_attr_t Attr {};
      if (pthread_getattr_np(Thread, &Attr) == 0) {
        int AttachState {};
        if (pthread_attr_getdetachstate(&Attr, &AttachState) == 0) {
          if (AttachState == PTHREAD_CREATE_JOINABLE) {
            pthread_attr_destroy(&Attr);
            return true;
          }
        }
        pthread_attr_destroy(&Attr);
      }
      return false;
    }

    bool join(void** ret) override {
      return pthread_join(Thread, ret) == 0;
    }

    bool detach() override {
      return pthread_detach(Thread) == 0;
    }

    bool IsSelf() override {
      auto self = pthread_self();
      return self == Thread;
    }

    FEXCore::Threads::ThreadFunc GetUserFunc() const {
      return UserFunc;
    }

    void* GetUserArg() const {
      return UserArg;
    }

    void* GetPivotStack() const {
      return Stack;
    }

    StackTracker* GetStackTracker() const {
      return STracker;
    }

    void SetupLongJump(FEXCore::UncheckedLongJump::JumpBuf* exit_resolver) {
      _exit_resolver = exit_resolver;
    }

    [[noreturn]]
    void LongJumpExit(FEX::HLE::ThreadStateObject* ThreadObject, uint32_t Status) {
      this->Status = Status;
      this->ThreadObject = ThreadObject;
      FEXCore::UncheckedLongJump::LongJump(*_exit_resolver, 1);
      FEX_UNREACHABLE;
    }

    uint32_t GetStatus() const {
      return Status;
    }

    FEX::HLE::ThreadStateObject* GetThreadObject() const {
      return ThreadObject;
    }

  private:
    pthread_t Thread;
    StackTracker* STracker;
    FEXCore::Threads::ThreadFunc UserFunc;
    void* UserArg;
    void* Stack {};

    // Use FEXCore's UncheckedLongJump to avoid fortification checks.
    // This avoids a false positive since glibc does not understand stack pivots.
    FEXCore::UncheckedLongJump::JumpBuf* _exit_resolver {};
    FEX::HLE::ThreadStateObject* ThreadObject {};
    uint32_t Status {};
  };

  void* InitializeThread(void* Ptr) {
    void* StackBase {};
    StackTracker* STracker {};
    PThread* Thread {reinterpret_cast<PThread*>(Ptr)};
    StackBase = Thread->GetPivotStack();
    STracker = Thread->GetStackTracker();
    FEXCore::UncheckedLongJump::JumpBuf exit_resolver {};

    bool LongJumpExit {};

    if (FEXCore::UncheckedLongJump::SetJump(exit_resolver) == 0) {
      Thread->SetupLongJump(&exit_resolver);
      // Run the user function.
      // `Thread` object is dead after this function returns.
      StackPivotAndCall(Thread->GetUserArg(), Thread->GetUserFunc(),
                        reinterpret_cast<uint64_t>(StackBase) + FEX::LinuxEmulation::Threads::STACK_SIZE);
    } else {
      LongJumpExit = true;
    }

    const auto Status = Thread->GetStatus();
    auto ThreadObject = Thread->GetThreadObject();
    // TLS/DTV teardown is something FEX can't control. Disable glibc checking when we leave a pthread.
    FEXCore::Allocator::YesIKnowImNotSupposedToUseTheGlibcAllocator::HardDisable();

    // Detach to ensure thread teardown occurs.
    Thread->detach();

    if (LongJumpExit) {
      // We have ownership of the thread object. Make sure to clean it up to prevent memory leaks.
      FEX::HLE::_SyscallHandler->TM.DestroyThread(ThreadObject, true);
      if (Status == 0) {
        // If status is zero then we can safely deallocate this thread's pivot stack (Which is no longer used).
        STracker->DeallocateStackObjectImmediately(StackBase);
        StackBase = nullptr;
      }
    }

    if (!LongJumpExit || Status != 0) {
      // If we didn't have a long jump exit (So not a pthread thread) OR status wasn't zero then we need to terminate locally.
      // There is an api limitation in glibc/pthreads where a function's return value is ignored and not passed to SYS_exit.
      // In or to match error condition thread exits, we must call SYS_exit ourselves in this case.
      //
      // This is a memory leak if this is a pthread based thread! We can't work around this.
      // - Leaks 128KB PTHREAD_STACK_MIN
      // - Leaks some glibc internal dtv tracking data.
      STracker->DeallocateStackObjectAndExit(StackBase, Status);
      FEX_UNREACHABLE;
    }

    // Give control back to pthreads.
    // This is required so glibc puts this thread's stack back in the stack cache, preventing a memory leak.
    // We can't use pthread_exit since that requires libgcc_s.so unwinder support which might not be available.
    // We are /expecting/ pthreads to return this status to the _exit syscall.
    return (void*)(uint64_t)Status;
  }

  StackTracker* STracker {};

  fextl::unique_ptr<FEXCore::Threads::Thread> CreateThread_PThread(FEXCore::Threads::ThreadFunc Func, void* Arg) {
    return fextl::make_unique<PThread>(STracker, Func, Arg);
  }

  void CleanupAfterFork_PThread() {
    STracker->CleanupAfterFork_PThread();
  }

}; // namespace PThreads

fextl::unique_ptr<StackTracker> SetupThreadHandlers() {
  FEXCore::Threads::Pointers Ptrs = {
    .CreateThread = PThreads::CreateThread_PThread,
    .CleanupAfterFork = PThreads::CleanupAfterFork_PThread,
  };

  FEXCore::Threads::Thread::SetInternalPointers(Ptrs);

  PThreads::STracker = new StackTracker();
  return fextl::unique_ptr<StackTracker>(PThreads::STracker);
}

void* AllocateStackObject() {
  return PThreads::STracker->AllocateStackObject();
}

[[noreturn]]
void DeallocateStackObjectAndExit(void* Ptr, int Status) {
  PThreads::STracker->DeallocateStackObjectAndExit(Ptr, Status);
  FEX_UNREACHABLE;
}

[[noreturn]]
void LongjumpDeallocateAndExit(FEX::HLE::ThreadStateObject* ThreadObject, int Status) {
  auto ThreadObject_P = reinterpret_cast<PThreads::PThread*>(ThreadObject->ExecutionThread.get());
  ThreadObject_P->LongJumpExit(ThreadObject, Status);
  FEX_UNREACHABLE;
}

void* GetStackBase(FEXCore::Threads::Thread* ThreadObject) {
  auto ThreadObject_P = reinterpret_cast<PThreads::PThread*>(ThreadObject);
  return ThreadObject_P->GetPivotStack();
}

void Shutdown(fextl::unique_ptr<StackTracker> STracker) {
  STracker->Shutdown();
  STracker.reset();
  PThreads::STracker = nullptr;
}
} // namespace FEX::LinuxEmulation::Threads


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/Utils/Threads.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/deque.h>
#include <FEXCore/fextl/memory.h>

#include <cstddef>
#include <mutex>

namespace FEXCore::Threads {
class Thread;
}

namespace FEX::HLE {
struct ThreadStateObject;
}

namespace FEX::LinuxEmulation::Threads {
/**
 * @brief Size of the stack that this interface creates.
 */
constexpr size_t STACK_SIZE = 8 * 1024 * 1024;
// Stack pool handling
struct StackPoolItem {
  void* Ptr;
  size_t Size;
};

struct DeadStackPoolItem {
  void* Ptr;
  size_t Size;
  bool ReadyToBeReaped;
};

class StackTracker final : public FEXCore::Allocator::FEXAllocOperators {
public:
  void* AllocateStackObject();
  bool* AddStackToDeadPool(void* Ptr);
  void AddStackToLivePool(void* Ptr);
  void RemoveStackFromLivePool(void* Ptr);

  void DeallocateStackObjectImmediately(void* Ptr);

  [[noreturn]]
  void DeallocateStackObjectAndExit(void* Ptr, int Status);

  void CleanupAfterFork_PThread();

  void Shutdown();

private:
  std::mutex DeadStackPoolMutex {};
  std::mutex LiveStackPoolMutex {};

  fextl::deque<DeadStackPoolItem> DeadStackPool {};
  fextl::deque<StackPoolItem> LiveStackPool {};
};

/**
 * @brief Allocates a stack object from the internally managed stack pool.
 */
void* AllocateStackObject();

/**
 * @brief Deallocates a stack from the internally managed stack pool.
 *
 * Will not free the memory immediately, instead saving for reuse temporarily to solve race conditions on stack usage while stack tears down.
 *
 * @param Ptr The stack base from `AllocateStackObject`
 * @param Status The status to pass to the exit syscall.
 */
[[noreturn]]
void DeallocateStackObjectAndExit(void* Ptr, int Status);

void* GetStackBase(FEXCore::Threads::Thread* ThreadObject);

[[noreturn]]
void LongjumpDeallocateAndExit(FEX::HLE::ThreadStateObject* ThreadObject, int Status);

/**
 * @brief Registers thread creation handlers with FEXCore.
 */
fextl::unique_ptr<StackTracker> SetupThreadHandlers();

/**
 * @brief Cleans up any remaining stack objects in the pools.
 */
void Shutdown(fextl::unique_ptr<StackTracker> STracker);
} // namespace FEX::LinuxEmulation::Threads


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/EPoll.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: LinuxSyscalls|syscalls-x86-32 ~ x86-32 specific syscall implementations
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <cstdint>
#include <sys/epoll.h>
#include <syscall.h>
#include <time.h>
#include <unistd.h>

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::epoll_event32>, "%lx")
ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::timespec32>, "%lx")

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(
    epoll_wait,
    [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr<FEX::HLE::x32::epoll_event32> events, int maxevents, int timeout) -> uint64_t {
      fextl::vector<struct epoll_event> Events(std::max(0, maxevents));
      uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8);

      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::x32::epoll_event32) * Result);
        for (size_t i = 0; i < Result; ++i) {
          events[i] = Events[i];
        }
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    epoll_ctl, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, int op, int fd, compat_ptr<FEX::HLE::x32::epoll_event32> event) -> uint64_t {
      struct epoll_event Event;
      struct epoll_event* EventPtr {};
      if (event) {
        FaultSafeUserMemAccess::VerifyIsReadable(event, sizeof(FEX::HLE::x32::epoll_event32));
        Event = *event;
        EventPtr = &Event;
      }
      uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, EventPtr);

      if (Result != -1 && event) {
        FaultSafeUserMemAccess::VerifyIsWritable(event, sizeof(FEX::HLE::x32::epoll_event32));
        *event = Event;
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(epoll_pwait,
                            [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr<FEX::HLE::x32::epoll_event32> events, int maxevent,
                               int timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t {
                              fextl::vector<struct epoll_event> Events(std::max(0, maxevent));

                              uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize);

                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::x32::epoll_event32) * Result);
                                for (size_t i = 0; i < Result; ++i) {
                                  events[i] = Events[i];
                                }
                              }

                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(epoll_pwait2,
                            [](FEXCore::Core::CpuStateFrame* Frame, int epfd, compat_ptr<FEX::HLE::x32::epoll_event32> events, int maxevent,
                               compat_ptr<timespec32> timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t {
                              fextl::vector<struct epoll_event> Events(std::max(0, maxevent));

                              struct timespec tp64 {};
                              struct timespec* timed_ptr {};
                              if (timeout) {
                                tp64 = *timeout;
                                timed_ptr = &tp64;
                              }

                              uint64_t Result =
                                ::syscall(SYSCALL_DEF(epoll_pwait2), epfd, Events.data(), maxevent, timed_ptr, sigmask, sigsetsize);

                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::x32::epoll_event32) * Result);
                                for (size_t i = 0; i < Result; ++i) {
                                  events[i] = Events[i];
                                }
                              }

                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FD.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/IoctlEmulation.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/SyscallsEnum.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <cstdint>
#include <fcntl.h>
#include <limits>
#include <poll.h>
#include <signal.h>
#include <stddef.h>
#include <string.h>
#include <sys/select.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/time.h>
#include <sys/timerfd.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <syscall.h>
#include <time.h>
#include <type_traits>
#include <unistd.h>

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::sigset_argpack32>, "%lx")

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
// Used to ensure no bogus values are passed into readv/writev family syscalls.
// This is mainly to sanitize vector sizing. It's fine for the bogus value
// itself to pass into the syscall, since the kernel will handle it.
static constexpr int SanitizeIOCount(int count) {
  return std::max(0, count);
}

#ifdef ARCHITECTURE_x86_64
uint32_t ioctl_32(FEXCore::Core::CpuStateFrame*, int fd, uint32_t cmd, uint32_t args) {
  uint32_t Result {};
  __asm volatile("int $0x80;" : "=a"(Result) : "a"(SYSCALL_x86_ioctl), "b"(fd), "c"(cmd), "d"(args) : "memory");
  return Result;
}
#endif
// These are redefined to be their non-64bit tagged value on x86-64
constexpr int OP_GETLK64_32 = 12;
constexpr int OP_SETLK64_32 = 13;
constexpr int OP_SETLKW64_32 = 14;

auto fcntlHandler = [](FEXCore::Core::CpuStateFrame* Frame, int fd, int cmd, uint64_t arg) -> uint64_t {
  // fcntl64 struct directly matches the 64bit fcntl op
  // cmd just needs to be fixed up

  void* lock_arg = (void*)arg;
  struct flock tmp {};
  int old_cmd = cmd;

  switch (old_cmd) {
  case OP_GETLK64_32: {
    cmd = F_GETLK;
    lock_arg = (void*)&tmp;
    FaultSafeUserMemAccess::VerifyIsReadable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
    tmp = *reinterpret_cast<flock64_32*>(arg);
    break;
  }
  case OP_SETLK64_32: {
    cmd = F_SETLK;
    lock_arg = (void*)&tmp;
    FaultSafeUserMemAccess::VerifyIsReadable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
    tmp = *reinterpret_cast<flock64_32*>(arg);
    break;
  }
  case OP_SETLKW64_32: {
    cmd = F_SETLKW;
    lock_arg = (void*)&tmp;
    FaultSafeUserMemAccess::VerifyIsReadable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
    tmp = *reinterpret_cast<flock64_32*>(arg);
    break;
  }
  case F_OFD_SETLK:
  case F_OFD_GETLK:
  case F_OFD_SETLKW: {
    lock_arg = (void*)&tmp;
    FaultSafeUserMemAccess::VerifyIsReadable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
    tmp = *reinterpret_cast<flock64_32*>(arg);
    break;
  }
  case F_GETLK:
  case F_SETLK:
  case F_SETLKW: {
    lock_arg = (void*)&tmp;
    FaultSafeUserMemAccess::VerifyIsReadable(reinterpret_cast<void*>(arg), sizeof(flock_32));
    tmp = *reinterpret_cast<flock_32*>(arg);
    break;
  }

  case F_SETFL: lock_arg = reinterpret_cast<void*>(FEX::HLE::RemapFromX86Flags(arg)); break;
  // Everything else maps directly. Check `COMPAT_SYSCALL_DEFINE3(fcntl64, ...)` entrypoint in the kernel if this changes.
  default: break;
  }

  uint64_t Result = ::fcntl(fd, cmd, lock_arg);

  if (Result != -1) {
    switch (old_cmd) {
    case OP_GETLK64_32: {
      FaultSafeUserMemAccess::VerifyIsWritable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
      *reinterpret_cast<flock64_32*>(arg) = tmp;
      break;
    }
    case F_OFD_GETLK: {
      FaultSafeUserMemAccess::VerifyIsWritable(reinterpret_cast<void*>(arg), sizeof(flock64_32));
      *reinterpret_cast<flock64_32*>(arg) = tmp;
      break;
    }
    case F_GETLK: {
      FaultSafeUserMemAccess::VerifyIsWritable(reinterpret_cast<void*>(arg), sizeof(flock_32));
      *reinterpret_cast<flock_32*>(arg) = tmp;
      break;
    } break;
    case F_DUPFD:
    case F_DUPFD_CLOEXEC: FEX::HLE::x32::CheckAndAddFDDuplication(Frame, fd, Result); break;
    case F_GETFL: {
      Result = FEX::HLE::RemapToX86Flags(Result);
      break;
    }
    default: break;
    }
  }
  SYSCALL_ERRNO();
};

auto fcntl32Handler = [](FEXCore::Core::CpuStateFrame* Frame, int fd, int cmd, uint64_t arg) -> uint64_t {
  // fcntl32 handler explicitly blocks these commands.
  switch (cmd) {
  case OP_GETLK64_32:
  case OP_SETLK64_32:
  case OP_SETLKW64_32:
  case F_OFD_GETLK:
  case F_OFD_SETLK:
  case F_OFD_SETLKW: return -EINVAL;
  default: break;
  }

  return fcntlHandler(Frame, fd, cmd, arg);
};

auto selectHandler = [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds,
                        struct timeval32* timeout) -> uint64_t {
  struct timeval tp64 {};
  if (timeout) {
    FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
    tp64 = *timeout;
  }

  fd_set Host_readfds;
  fd_set Host_writefds;
  fd_set Host_exceptfds;
  FD_ZERO(&Host_readfds);
  FD_ZERO(&Host_writefds);
  FD_ZERO(&Host_exceptfds);

  // Round up to the full 32bit word
  uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4;

  if (readfds) {
    FaultSafeUserMemAccess::VerifyIsReadable(readfds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < NumWords; ++i) {
      uint32_t FD = readfds[i];
      int32_t Rem = nfds - (i * 32);
      for (int j = 0; j < 32 && j < Rem; ++j) {
        if ((FD >> j) & 1) {
          FD_SET(i * 32 + j, &Host_readfds);
        }
      }
    }
  }

  if (writefds) {
    FaultSafeUserMemAccess::VerifyIsReadable(writefds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < NumWords; ++i) {
      uint32_t FD = writefds[i];
      int32_t Rem = nfds - (i * 32);
      for (int j = 0; j < 32 && j < Rem; ++j) {
        if ((FD >> j) & 1) {
          FD_SET(i * 32 + j, &Host_writefds);
        }
      }
    }
  }

  if (exceptfds) {
    FaultSafeUserMemAccess::VerifyIsReadable(exceptfds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < NumWords; ++i) {
      uint32_t FD = exceptfds[i];
      int32_t Rem = nfds - (i * 32);
      for (int j = 0; j < 32 && j < Rem; ++j) {
        if ((FD >> j) & 1) {
          FD_SET(i * 32 + j, &Host_exceptfds);
        }
      }
    }
  }

  uint64_t Result = ::select(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr,
                             exceptfds ? &Host_exceptfds : nullptr, timeout ? &tp64 : nullptr);
  if (readfds) {
    FaultSafeUserMemAccess::VerifyIsWritable(readfds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < nfds; ++i) {
      if (FD_ISSET(i, &Host_readfds)) {
        readfds[i / 32] |= 1 << (i & 31);
      } else {
        readfds[i / 32] &= ~(1 << (i & 31));
      }
    }
  }

  if (writefds) {
    FaultSafeUserMemAccess::VerifyIsWritable(writefds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < nfds; ++i) {
      if (FD_ISSET(i, &Host_writefds)) {
        writefds[i / 32] |= 1 << (i & 31);
      } else {
        writefds[i / 32] &= ~(1 << (i & 31));
      }
    }
  }

  if (exceptfds) {
    FaultSafeUserMemAccess::VerifyIsWritable(exceptfds, sizeof(fd_set32) * NumWords);
    for (int i = 0; i < nfds; ++i) {
      if (FD_ISSET(i, &Host_exceptfds)) {
        exceptfds[i / 32] |= 1 << (i & 31);
      } else {
        exceptfds[i / 32] &= ~(1 << (i & 31));
      }
    }
  }

  if (timeout) {
    FaultSafeUserMemAccess::VerifyIsWritable(timeout, sizeof(*timeout));
    *timeout = tp64;
  }
  SYSCALL_ERRNO();
};

void RegisterFD(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(ppoll,
                            [](FEXCore::Core::CpuStateFrame* Frame, struct pollfd* fds, nfds_t nfds, timespec32* timeout_ts,
                               const uint64_t* sigmask, size_t sigsetsize) -> uint64_t {
                              // sigsetsize is unused here since it is currently a constant and not exposed through glibc
                              struct timespec tp64 {};
                              struct timespec* timed_ptr {};
                              if (timeout_ts) {
                                struct timespec32 timeout {};
                                if (FaultSafeUserMemAccess::CopyFromUser(&timeout, timeout_ts, sizeof(timeout)) == EFAULT) {
                                  return -EFAULT;
                                }

                                tp64 = timeout;
                                timed_ptr = &tp64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(ppoll), fds, nfds, timed_ptr, sigmask, sigsetsize);

                              if (timeout_ts) {
                                struct timespec32 timeout {};
                                timeout = tp64;

                                if (FaultSafeUserMemAccess::CopyToUser(timeout_ts, &timeout, sizeof(timeout)) == EFAULT) {
                                  // Write to user memory failed, this can occur if the timeout is defined in read-only memory.
                                  // This is okay to happen, kernel continues happily.
                                }
                              }

                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    _llseek, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t fd, uint32_t offset_high, uint32_t offset_low, loff_t* result, uint32_t whence) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;
      uint64_t Result = lseek(fd, Offset, whence);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(result, sizeof(*result));
        *result = Result;
        // On non-error result, llseek returns zero (As the result is returned in pointer).
        return 0;
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(readv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, int iovcnt) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
    fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));
    uint64_t Result = ::readv(fd, Host_iovec.data(), iovcnt);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(writev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, int iovcnt) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
    fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));
    uint64_t Result = ::writev(fd, Host_iovec.data(), iovcnt);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(chown32, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t {
    uint64_t Result = ::chown(pathname, owner, group);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(lchown32, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uid_t owner, gid_t group) -> uint64_t {
    uint64_t Result = ::lchown(pathname, owner, group);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(oldstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, oldstat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat);
    if (Result != -1) {
      if (host_stat.st_ino > std::numeric_limits<decltype(buf->st_ino)>::max()) {
        return -EOVERFLOW;
      }
      if (host_stat.st_nlink > std::numeric_limits<decltype(buf->st_nlink)>::max()) {
        return -EOVERFLOW;
      }
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));

      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(oldfstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, oldstat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = ::fstat(fd, &host_stat);
    if (Result != -1) {
      if (host_stat.st_ino > std::numeric_limits<decltype(buf->st_ino)>::max()) {
        return -EOVERFLOW;
      }
      if (host_stat.st_nlink > std::numeric_limits<decltype(buf->st_nlink)>::max()) {
        return -EOVERFLOW;
      }
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));

      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(oldlstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, oldstat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat);
    if (Result != -1) {
      if (host_stat.st_ino > std::numeric_limits<decltype(buf->st_ino)>::max()) {
        return -EOVERFLOW;
      }
      if (host_stat.st_nlink > std::numeric_limits<decltype(buf->st_nlink)>::max()) {
        return -EOVERFLOW;
      }
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));

      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(stat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, stat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(fstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, stat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = ::fstat(fd, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(lstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, stat32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(stat64, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, stat64_32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(lstat64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, stat64_32* buf) -> uint64_t {
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat);

    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(fstat64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, stat64_32* buf) -> uint64_t {
    struct stat64 host_stat;
    uint64_t Result = ::fstat64(fd, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(statfs, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, statfs32_32* buf) -> uint64_t {
    struct statfs host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(fstatfs, [](FEXCore::Core::CpuStateFrame* Frame, int fd, statfs32_32* buf) -> uint64_t {
    struct statfs host_stat;
    uint64_t Result = ::fstatfs(fd, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(fstatfs64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, size_t sz, struct statfs64_32* buf) -> uint64_t {
    LOGMAN_THROW_A_FMT(sz == sizeof(struct statfs64_32), "This needs to match");

    struct statfs64 host_stat;
    uint64_t Result = ::fstatfs64(fd, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(statfs64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, size_t sz, struct statfs64_32* buf) -> uint64_t {
    LOGMAN_THROW_A_FMT(sz == sizeof(struct statfs64_32), "This needs to match");

    struct statfs host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, &host_stat);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }

    SYSCALL_ERRNO();
  });

  // x86 32-bit fcntl syscall has a historical quirk that it uses the same handler as fcntl64
  // This is in direct opposition to all other 32-bit architectures that use the compat_fcntl handler
  // This quirk goes back to the start of the Linux 2.6.12-rc2 git history. Seeing history before
  // that point to see when this quirk happened would be difficult
  //
  // For more reference, the compat_fcntl handler blocks a few commands:
  // - F_GETLK64
  // - F_SETLK64
  // - F_SETLKW64
  // - F_OFD_GETLK
  // - F_OFD_SETLK
  // - F_OFD_SETLKW

  REGISTER_SYSCALL_IMPL_X32(fcntl, fcntl32Handler);
  REGISTER_SYSCALL_IMPL_X32(fcntl64, fcntlHandler);

  REGISTER_SYSCALL_IMPL_X32(dup, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd) -> uint64_t {
    uint64_t Result = ::dup(oldfd);
    if (Result != -1) {
      CheckAndAddFDDuplication(Frame, oldfd, Result);
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(dup2, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd) -> uint64_t {
    uint64_t Result = ::dup2(oldfd, newfd);
    if (Result != -1) {
      CheckAndAddFDDuplication(Frame, oldfd, newfd);
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    preadv, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, uint32_t pos_high) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
      fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));

      uint64_t Result = ::syscall(SYSCALL_DEF(preadv), fd, Host_iovec.data(), iovcnt, pos_low, pos_high);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    pwritev, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low, uint32_t pos_high) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
      fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));

      uint64_t Result = ::syscall(SYSCALL_DEF(pwritev), fd, Host_iovec.data(), iovcnt, pos_low, pos_high);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(process_vm_readv,
                            [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec32* local_iov, unsigned long liovcnt,
                               const struct iovec32* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t {
                              FaultSafeUserMemAccess::VerifyIsReadable(local_iov, sizeof(struct iovec32) * SanitizeIOCount(liovcnt));
                              FaultSafeUserMemAccess::VerifyIsReadable(remote_iov, sizeof(struct iovec32) * SanitizeIOCount(riovcnt));

                              fextl::vector<iovec> Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt));
                              fextl::vector<iovec> Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt));

                              uint64_t Result =
                                ::process_vm_readv(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(process_vm_writev,
                            [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, const struct iovec32* local_iov, unsigned long liovcnt,
                               const struct iovec32* remote_iov, unsigned long riovcnt, unsigned long flags) -> uint64_t {
                              FaultSafeUserMemAccess::VerifyIsReadable(local_iov, sizeof(struct iovec32) * SanitizeIOCount(liovcnt));
                              FaultSafeUserMemAccess::VerifyIsReadable(remote_iov, sizeof(struct iovec32) * SanitizeIOCount(riovcnt));

                              fextl::vector<iovec> Host_local_iovec(local_iov, local_iov + SanitizeIOCount(liovcnt));
                              fextl::vector<iovec> Host_remote_iovec(remote_iov, remote_iov + SanitizeIOCount(riovcnt));

                              uint64_t Result =
                                ::process_vm_writev(pid, Host_local_iovec.data(), liovcnt, Host_remote_iovec.data(), riovcnt, flags);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(preadv2,
                            [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low,
                               uint32_t pos_high, int flags) -> uint64_t {
                              FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
                              fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));

                              uint64_t Result = ::syscall(SYSCALL_DEF(preadv2), fd, Host_iovec.data(), iovcnt, pos_low, pos_high, flags);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(pwritev2,
                            [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, uint32_t iovcnt, uint32_t pos_low,
                               uint32_t pos_high, int flags) -> uint64_t {
                              FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(iovcnt));
                              fextl::vector<iovec> Host_iovec(iov, iov + SanitizeIOCount(iovcnt));

                              uint64_t Result = ::syscall(SYSCALL_DEF(pwritev2), fd, Host_iovec.data(), iovcnt, pos_low, pos_high, flags);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(fstatat_64, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, stat64_32* buf, int flag) -> uint64_t {
    struct stat64 host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt64(dirfd, pathname, &host_stat, flag);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(ioctl, ioctl32);

  REGISTER_SYSCALL_IMPL_X32(getdents, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t {
    return GetDentsEmulation<true>(fd, reinterpret_cast<FEX::HLE::x32::linux_dirent_32*>(dirp), count);
  });

  REGISTER_SYSCALL_IMPL_X32(getdents64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t {
    uint64_t Result = ::syscall(SYSCALL_DEF(getdents64), static_cast<uint64_t>(fd), dirp, static_cast<uint64_t>(count));
    if (Result != -1) {
      // Walk each offset
      // if we are passing the full d_off to the 32bit application then it seems to break things?
      for (size_t i = 0, num = 0; i < Result; ++num) {
        linux_dirent_64* Incoming = (linux_dirent_64*)(reinterpret_cast<uint64_t>(dirp) + i);
        Incoming->d_off = num;
        if (FEX::HLE::_SyscallHandler->FM.IsProtectedFile(fd, Incoming->d_ino)) {
          Result -= Incoming->d_reclen;
          memmove(Incoming, (linux_dirent_64*)(reinterpret_cast<uint64_t>(Incoming) + Incoming->d_reclen), Result - i);
          continue;
        }
        i += Incoming->d_reclen;
      }
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(select, [](FEXCore::Core::CpuStateFrame* Frame, compat_select_args* arg) -> uint64_t {
    return selectHandler(Frame, arg->nfds, arg->readfds, arg->writefds, arg->exceptfds, arg->timeout);
  });

  REGISTER_SYSCALL_IMPL_X32(_newselect, selectHandler);

  REGISTER_SYSCALL_IMPL_X32(pselect6,
                            [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds,
                               timespec32* timeout, compat_ptr<sigset_argpack32> sigmaskpack) -> uint64_t {
                              struct timespec tp64 {};
                              if (timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
                                tp64 = *timeout;
                              }

                              fd_set Host_readfds;
                              fd_set Host_writefds;
                              fd_set Host_exceptfds;
                              sigset_t HostSet {};

                              FD_ZERO(&Host_readfds);
                              FD_ZERO(&Host_writefds);
                              FD_ZERO(&Host_exceptfds);
                              sigemptyset(&HostSet);

                              // Round up to the full 32bit word
                              uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4;

                              if (readfds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(readfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = readfds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_readfds);
                                    }
                                  }
                                }
                              }

                              if (writefds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(writefds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = writefds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_writefds);
                                    }
                                  }
                                }
                              }

                              if (exceptfds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(exceptfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = exceptfds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_exceptfds);
                                    }
                                  }
                                }
                              }

                              FaultSafeUserMemAccess::VerifyIsReadableOrNull(sigmaskpack, sizeof(*sigmaskpack));
                              if (sigmaskpack && sigmaskpack->sigset) {
                                FaultSafeUserMemAccess::VerifyIsReadable(sigmaskpack->sigset, sizeof(*sigmaskpack->sigset));
                                uint64_t* sigmask = sigmaskpack->sigset;
                                size_t sigsetsize = sigmaskpack->size;
                                for (int32_t i = 0; i < (sigsetsize * 8); ++i) {
                                  if (*sigmask & (1ULL << i)) {
                                    sigaddset(&HostSet, i + 1);
                                  }
                                }
                              }

                              uint64_t Result = ::pselect(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr,
                                                          exceptfds ? &Host_exceptfds : nullptr, timeout ? &tp64 : nullptr, &HostSet);

                              if (readfds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(readfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_readfds)) {
                                    readfds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    readfds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              if (writefds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(writefds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_writefds)) {
                                    writefds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    writefds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              if (exceptfds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(exceptfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_exceptfds)) {
                                    exceptfds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    exceptfds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              if (timeout) {
                                FaultSafeUserMemAccess::VerifyIsWritable(timeout, sizeof(*timeout));
                                *timeout = tp64;
                              }
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    fadvise64, [](FEXCore::Core::CpuStateFrame* Frame, int32_t fd, uint32_t offset_low, uint32_t offset_high, uint32_t len, int advice) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;
      uint64_t Result = ::posix_fadvise64(fd, Offset, len, advice);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(fadvise64_64,
                            [](FEXCore::Core::CpuStateFrame* Frame, int32_t fd, uint32_t offset_low, uint32_t offset_high, uint32_t len_low,
                               uint32_t len_high, int advice) -> uint64_t {
                              uint64_t Offset = offset_high;
                              Offset <<= 32;
                              Offset |= offset_low;
                              uint64_t Len = len_high;
                              Len <<= 32;
                              Len |= len_low;
                              uint64_t Result = ::posix_fadvise64(fd, Offset, Len, advice);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(timerfd_settime,
                            [](FEXCore::Core::CpuStateFrame* Frame, int fd, int flags, const FEX::HLE::x32::old_itimerspec32* new_value,
                               FEX::HLE::x32::old_itimerspec32* old_value) -> uint64_t {
                              struct itimerspec new_value_host {};
                              struct itimerspec old_value_host {};
                              struct itimerspec* old_value_host_p {};

                              new_value_host = *new_value;
                              if (old_value) {
                                FaultSafeUserMemAccess::VerifyIsReadable(old_value, sizeof(*old_value));
                                old_value_host_p = &old_value_host;
                              }

                              // Flags don't need remapped
                              uint64_t Result = ::timerfd_settime(fd, flags, &new_value_host, old_value_host_p);

                              if (Result != -1 && old_value) {
                                FaultSafeUserMemAccess::VerifyIsWritable(old_value, sizeof(*old_value));
                                *old_value = old_value_host;
                              }
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(timerfd_gettime, [](FEXCore::Core::CpuStateFrame* Frame, int fd, FEX::HLE::x32::old_itimerspec32* curr_value) -> uint64_t {
    struct itimerspec Host {};

    uint64_t Result = ::timerfd_gettime(fd, &Host);

    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(curr_value, sizeof(*curr_value));
      *curr_value = Host;
    }

    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(pselect6_time64,
                            [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set32* readfds, fd_set32* writefds, fd_set32* exceptfds,
                               struct timespec* timeout, compat_ptr<sigset_argpack32> sigmaskpack) -> uint64_t {
                              fd_set Host_readfds;
                              fd_set Host_writefds;
                              fd_set Host_exceptfds;
                              sigset_t HostSet {};

                              FD_ZERO(&Host_readfds);
                              FD_ZERO(&Host_writefds);
                              FD_ZERO(&Host_exceptfds);
                              sigemptyset(&HostSet);

                              // Round up to the full 32bit word
                              uint32_t NumWords = FEXCore::AlignUp(nfds, 32) / 4;

                              if (readfds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(readfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = readfds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_readfds);
                                    }
                                  }
                                }
                              }

                              if (writefds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(writefds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = writefds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_writefds);
                                    }
                                  }
                                }
                              }

                              if (exceptfds) {
                                FaultSafeUserMemAccess::VerifyIsReadable(exceptfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < NumWords; ++i) {
                                  uint32_t FD = exceptfds[i];
                                  int32_t Rem = nfds - (i * 32);
                                  for (int j = 0; j < 32 && j < Rem; ++j) {
                                    if ((FD >> j) & 1) {
                                      FD_SET(i * 32 + j, &Host_exceptfds);
                                    }
                                  }
                                }
                              }

                              FaultSafeUserMemAccess::VerifyIsReadableOrNull(sigmaskpack, sizeof(*sigmaskpack));
                              if (sigmaskpack && sigmaskpack->sigset) {
                                FaultSafeUserMemAccess::VerifyIsReadable(sigmaskpack->sigset, sizeof(*sigmaskpack->sigset));
                                uint64_t* sigmask = sigmaskpack->sigset;
                                size_t sigsetsize = sigmaskpack->size;
                                for (int32_t i = 0; i < (sigsetsize * 8); ++i) {
                                  if (*sigmask & (1ULL << i)) {
                                    sigaddset(&HostSet, i + 1);
                                  }
                                }
                              }

                              uint64_t Result = ::pselect(nfds, readfds ? &Host_readfds : nullptr, writefds ? &Host_writefds : nullptr,
                                                          exceptfds ? &Host_exceptfds : nullptr, timeout, &HostSet);

                              if (readfds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(readfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_readfds)) {
                                    readfds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    readfds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              if (writefds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(writefds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_writefds)) {
                                    writefds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    writefds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              if (exceptfds) {
                                FaultSafeUserMemAccess::VerifyIsWritable(exceptfds, sizeof(fd_set32) * NumWords);
                                for (int i = 0; i < nfds; ++i) {
                                  if (FD_ISSET(i, &Host_exceptfds)) {
                                    exceptfds[i / 32] |= 1 << (i & 31);
                                  } else {
                                    exceptfds[i / 32] &= ~(1 << (i & 31));
                                  }
                                }
                              }

                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(sendfile, [](FEXCore::Core::CpuStateFrame* Frame, int out_fd, int in_fd, compat_off_t* offset, size_t count) -> uint64_t {
    off_t Local {};
    off_t* Local_p {};
    if (offset) {
      Local_p = &Local;
      Local = *offset;
    }
    uint64_t Result = ::sendfile(out_fd, in_fd, Local_p, count);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    pread_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;

      uint64_t Result = ::pread64(fd, buf, count, Offset);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    pwrite_64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* buf, uint32_t count, uint32_t offset_low, uint32_t offset_high) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;

      uint64_t Result = ::pwrite64(fd, buf, count, Offset);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    readahead, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint64_t offset_high, size_t count) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;

      uint64_t Result = ::readahead(fd, Offset, count);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(sync_file_range,
                            [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint32_t offset_high, uint32_t len_low,
                               uint32_t len_high, unsigned int flags) -> uint64_t {
                              // Flags don't need remapped
                              uint64_t Offset = offset_high;
                              Offset <<= 32;
                              Offset |= offset_low;

                              uint64_t Len = len_high;
                              Len <<= 32;
                              Len |= len_low;

                              uint64_t Result = ::syscall(SYSCALL_DEF(sync_file_range), fd, Offset, Len, flags);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(fallocate,
                            [](FEXCore::Core::CpuStateFrame* Frame, int fd, int mode, uint32_t offset_low, uint32_t offset_high,
                               uint32_t len_low, uint32_t len_high) -> uint64_t {
                              uint64_t Offset = offset_high;
                              Offset <<= 32;
                              Offset |= offset_low;

                              uint64_t Len = len_high;
                              Len <<= 32;
                              Len |= len_low;

                              uint64_t Result = ::fallocate(fd, mode, Offset, Len);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    vmsplice, [](FEXCore::Core::CpuStateFrame* Frame, int fd, const struct iovec32* iov, unsigned long nr_segs, unsigned int flags) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(iov, sizeof(struct iovec32) * SanitizeIOCount(nr_segs));
      fextl::vector<iovec> Host_iovec(iov, iov + nr_segs);
      uint64_t Result = ::vmsplice(fd, Host_iovec.data(), nr_segs, flags);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(ftruncate, [](FEXCore::Core::CpuStateFrame* Frame, int fd, compat_off_t length) -> uint64_t {
    uint64_t Result = ::syscall(SYSCALL_DEF(ftruncate), fd, static_cast<int64_t>(length));
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/FS.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"

#include <stddef.h>
#include <stdint.h>
#include <sys/mount.h>
#include <unistd.h>

namespace FEX::HLE::x32 {
void RegisterFS(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(umount, [](FEXCore::Core::CpuStateFrame* Frame, const char* target) -> uint64_t {
    uint64_t Result = ::umount(target);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    truncate64, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, uint32_t offset_low, uint32_t offset_high) -> uint64_t {
      uint64_t Offset = offset_high;
      Offset <<= 32;
      Offset |= offset_low;
      uint64_t Result = ::truncate(path, Offset);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(ftruncate64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t offset_low, uint32_t offset_high) -> uint64_t {
    uint64_t Offset = offset_high;
    Offset <<= 32;
    Offset |= offset_low;
    uint64_t Result = ::ftruncate(fd, Offset);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    sigprocmask, [](FEXCore::Core::CpuStateFrame* Frame, int how, const uint64_t* set, uint64_t* oldset, size_t sigsetsize) -> uint64_t {
      return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame),
                                                                               how, set, oldset);
    });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IO.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include <linux/aio_abi.h>
#include <stdint.h>
#include <syscall.h>
#include <unistd.h>

namespace FEX::HLE::x32 {
void RegisterIO(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(io_getevents,
                            [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event* events,
                               struct timespec32* timeout) -> uint64_t {
                              struct timespec* timeout_ptr {};
                              struct timespec tp64 {};
                              if (timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
                                tp64 = *timeout;
                                timeout_ptr = &tp64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(io_getevents), ctx_id, min_nr, nr, events, timeout_ptr);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(io_pgetevents,
                            [](FEXCore::Core::CpuStateFrame* Frame, aio_context_t ctx_id, long min_nr, long nr, struct io_event* events,
                               struct timespec32* timeout, const struct io_sigset* usig) -> uint64_t {
                              struct timespec* timeout_ptr {};
                              struct timespec tp64 {};
                              if (timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
                                tp64 = *timeout;
                                timeout_ptr = &tp64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(io_pgetevents), ctx_id, min_nr, nr, events, timeout_ptr, usig);
                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Info.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include <algorithm>
#include <asm/posix_types.h>
#include <limits>
#include <linux/utsname.h>
#include <stdint.h>
#include <sys/resource.h>
#include <sys/sysinfo.h>
#include <sys/utsname.h>

#include <git_version.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::rlimit32<true>>, "%lx")
ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::rlimit32<false>>, "%lx")

namespace FEX::HLE::x32 {
struct sysinfo32 {
  int32_t uptime;
  uint32_t loads[3];
  uint32_t totalram;
  uint32_t freeram;
  uint32_t sharedram;
  uint32_t bufferram;
  uint32_t totalswap;
  uint32_t freeswap;
  uint16_t procs;
  uint32_t totalhigh;
  uint32_t freehigh;
  uint32_t mem_unit;
  char _pad[8];
};

static_assert(sizeof(sysinfo32) == 64, "Needs to be 64bytes");

void RegisterInfo(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(oldolduname, [](FEXCore::Core::CpuStateFrame* Frame, struct oldold_utsname* buf) -> uint64_t {
    struct utsname Local {};

    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));

    memset(buf, 0, sizeof(*buf));
    if (::uname(&Local) == 0) {
      memcpy(buf->nodename, Local.nodename, __OLD_UTS_LEN);
    } else {
      strncpy(buf->nodename, "FEXCore", __OLD_UTS_LEN);
      LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename);
    }
    strncpy(buf->sysname, "Linux", __OLD_UTS_LEN);
    uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion();
    snprintf(buf->release, __OLD_UTS_LEN, "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion),
             FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion));

    const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__;
    strncpy(buf->version, version, __OLD_UTS_LEN);
    // Tell the guest that we are a 64bit kernel
    strncpy(buf->machine, "x86_64", __OLD_UTS_LEN);
    return 0;
  });

  REGISTER_SYSCALL_IMPL_X32(olduname, [](FEXCore::Core::CpuStateFrame* Frame, struct old_utsname* buf) -> uint64_t {
    struct utsname Local {};

    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));

    memset(buf, 0, sizeof(*buf));
    if (::uname(&Local) == 0) {
      memcpy(buf->nodename, Local.nodename, __NEW_UTS_LEN);
    } else {
      strncpy(buf->nodename, "FEXCore", __NEW_UTS_LEN);
      LogMan::Msg::EFmt("Couldn't determine host nodename. Defaulting to '{}'", buf->nodename);
    }
    strncpy(buf->sysname, "Linux", __NEW_UTS_LEN);
    uint32_t GuestVersion = FEX::HLE::_SyscallHandler->GetGuestKernelVersion();
    snprintf(buf->release, __NEW_UTS_LEN, "%d.%d.%d", FEX::HLE::SyscallHandler::KernelMajor(GuestVersion),
             FEX::HLE::SyscallHandler::KernelMinor(GuestVersion), FEX::HLE::SyscallHandler::KernelPatch(GuestVersion));

    const char version[] = "#" GIT_DESCRIBE_STRING " SMP " __DATE__ " " __TIME__;
    strncpy(buf->version, version, __NEW_UTS_LEN);
    // Tell the guest that we are a 64bit kernel
    strncpy(buf->machine, "x86_64", __NEW_UTS_LEN);
    return 0;
  });

  REGISTER_SYSCALL_IMPL_X32(
    getrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, compat_ptr<FEX::HLE::x32::rlimit32<true>> rlim) -> uint64_t {
      struct rlimit rlim64 {};
      uint64_t Result = ::getrlimit(resource, &rlim64);
      FaultSafeUserMemAccess::VerifyIsWritable(rlim, sizeof(*rlim));
      *rlim = rlim64;
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    ugetrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, compat_ptr<FEX::HLE::x32::rlimit32<false>> rlim) -> uint64_t {
      struct rlimit rlim64 {};
      uint64_t Result = ::getrlimit(resource, &rlim64);
      FaultSafeUserMemAccess::VerifyIsWritable(rlim, sizeof(*rlim));
      *rlim = rlim64;
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    setrlimit, [](FEXCore::Core::CpuStateFrame* Frame, int resource, const compat_ptr<FEX::HLE::x32::rlimit32<false>> rlim) -> uint64_t {
      struct rlimit rlim64 {};
      FaultSafeUserMemAccess::VerifyIsReadable(rlim, sizeof(*rlim));
      rlim64 = *rlim;
      uint64_t Result = ::setrlimit(resource, &rlim64);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(sysinfo, [](FEXCore::Core::CpuStateFrame* Frame, struct sysinfo32* info) -> uint64_t {
    struct sysinfo Host {};
    uint64_t Result = ::sysinfo(&Host);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(info, sizeof(*info));
#define Copy(x) \
  info->x = static_cast<decltype(info->x)>(std::min(Host.x, static_cast<decltype(Host.x)>(std::numeric_limits<decltype(info->x)>::max())));
      Copy(uptime);
      Copy(procs);
#define CopyShift(x) info->x = static_cast<decltype(info->x)>(Host.x >> ShiftAmount);

      info->loads[0] = std::min(Host.loads[0], static_cast<unsigned long>(std::numeric_limits<uint32_t>::max()));
      info->loads[1] = std::min(Host.loads[1], static_cast<unsigned long>(std::numeric_limits<uint32_t>::max()));
      info->loads[2] = std::min(Host.loads[2], static_cast<unsigned long>(std::numeric_limits<uint32_t>::max()));

      // If any result can't fit in to a uint32_t then we need to shift the mem_unit and all the members
      // Set the mem_unit to the pagesize
      uint32_t ShiftAmount {};
      if ((Host.totalram >> 32) != 0 || (Host.totalswap >> 32) != 0) {

        while (Host.mem_unit < FEXCore::Utils::FEX_PAGE_SIZE) {
          Host.mem_unit <<= 1;
          ++ShiftAmount;
        }
      }

      CopyShift(totalram);
      CopyShift(freeram);
      CopyShift(sharedram);
      CopyShift(bufferram);
      CopyShift(totalswap);
      CopyShift(freeswap);
      CopyShift(totalhigh);
      CopyShift(freehigh);
      Copy(mem_unit);
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(getrusage, [](FEXCore::Core::CpuStateFrame* Frame, int who, rusage_32* usage) -> uint64_t {
    struct rusage usage64 {};
    uint64_t Result = ::syscall(SYSCALL_DEF(getrusage), who, &usage64);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(usage, sizeof(*usage));
      *usage = usage64;
    }
    SYSCALL_ERRNO();
  });

  if (Handler->IsHostKernelVersionAtLeast(6, 8, 0)) {
    REGISTER_SYSCALL_IMPL_X32(map_shadow_stack, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t addr, uint64_t size, uint32_t flags) -> uint64_t {
      // Claim that shadow stack isn't supported.
      return -EOPNOTSUPP;
    });
  } else {
    REGISTER_SYSCALL_IMPL_X32(map_shadow_stack, UnimplementedSyscallSafe);
  }
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/HelperDefines.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#define STRINGY2(x, y) x##y
#define STRINGY(x, y) STRINGY2(x, y)

#define STRINGY12(x) STRINGY11(x)
#define STRINGY11(x) #x
#define STRINGY1(x) STRINGY12(x)

#ifndef _BASIC_META
// Meta typedef variable in unnamed and matches upstream
// Use this for the super basic ioctl passthrough path
#define _BASIC_META(x)                   \
  __attribute__((annotate("fex-match"))) \
  __attribute__((annotate("ioctl-alias-x86_32-_" #x STRINGY1(__LINE__)))) typedef uint8_t STRINGY(_##x, __LINE__)[x];
#endif

#ifndef _BASIC_META_VAR
// This is similar to _BASIC_META except that it allows you to pass variadic arguments to the original ioctl definition
#define _BASIC_META_VAR(x, args...)      \
  __attribute__((annotate("fex-match"))) \
  __attribute__((annotate("ioctl-alias-x86_32-_" #x STRINGY1(__LINE__)))) typedef uint8_t STRINGY(_##x, __LINE__)[x(args)];
#endif

#ifndef _CUSTOM_META
// IOCTL doesn't match across architecture
// Generates a FEX_<name> version of the ioctl with custom ioctl definition
// eg: _CUSTOM_META(DRM_IOCTL_AMDGPU_GEM_METADATA, DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, FEX::HLE::x32::AMDGPU::fex_drm_amdgpu_gem_metadata));
// Allows you to effectively pass in the original ioctl definition with custom type replacing the upstream type
#define _CUSTOM_META(name, ioctl_num)                                                              \
  typedef uint8_t _meta_##name[name];                                                              \
  __attribute__((annotate("ioctl-alias-x86_32-_meta_" #name))) typedef uint8_t _##name[ioctl_num]; \
  constexpr static uint32_t FEX_##name = ioctl_num;
#endif

#ifndef _CUSTOM_META_OFFSET
// Same as _CUSTOM_META but allows you to define multiple types from an offset
// Required to have an ioctl covering a range which some ioctls do
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)                                                        \
  typedef uint8_t _meta_##name[ioctl_num + offset];                                                         \
  __attribute__((annotate("ioctl-alias-x86_32-_meta_" #name))) typedef uint8_t _##name[ioctl_num + offset]; \
  constexpr static uint32_t FEX_##name = ioctl_num + offset;
#endif


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl
================================================
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_CREATE)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_MMAP)
_BASIC_META(DRM_IOCTL_AMDGPU_CTX)
_BASIC_META(DRM_IOCTL_AMDGPU_BO_LIST)
_BASIC_META(DRM_IOCTL_AMDGPU_CS)
_BASIC_META(DRM_IOCTL_AMDGPU_INFO)
_CUSTOM_META(DRM_IOCTL_AMDGPU_GEM_METADATA, DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, FEX::HLE::x32::AMDGPU::fex_drm_amdgpu_gem_metadata))
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_VA)
_BASIC_META(DRM_IOCTL_AMDGPU_WAIT_CS)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_OP)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_USERPTR)
_BASIC_META(DRM_IOCTL_AMDGPU_WAIT_FENCES)
_BASIC_META(DRM_IOCTL_AMDGPU_VM)
_BASIC_META(DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE)
_BASIC_META(DRM_IOCTL_AMDGPU_SCHED)
_BASIC_META(DRM_IOCTL_AMDGPU_USERQ)
_BASIC_META(DRM_IOCTL_AMDGPU_USERQ_SIGNAL)
_BASIC_META(DRM_IOCTL_AMDGPU_USERQ_WAIT)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/asahi_drm.inl
================================================
_BASIC_META(DRM_IOCTL_ASAHI_GET_PARAMS)
_BASIC_META(DRM_IOCTL_ASAHI_GET_TIME)
_BASIC_META(DRM_IOCTL_ASAHI_VM_CREATE)
_BASIC_META(DRM_IOCTL_ASAHI_VM_DESTROY)
_BASIC_META(DRM_IOCTL_ASAHI_VM_BIND)
_BASIC_META(DRM_IOCTL_ASAHI_GEM_CREATE)
_BASIC_META(DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET)
_BASIC_META(DRM_IOCTL_ASAHI_GEM_BIND_OBJECT)
_BASIC_META(DRM_IOCTL_ASAHI_QUEUE_CREATE)
_BASIC_META(DRM_IOCTL_ASAHI_QUEUE_DESTROY)
_BASIC_META(DRM_IOCTL_ASAHI_SUBMIT)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/asound.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <sound/asound.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {

namespace asound {
#ifndef SNDRV_TIMER_IOCTL_TREAD_OLD
#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
#endif

#ifndef SNDRV_PCM_IOCTL_USER_PVERSION
#define SNDRV_PCM_IOCTL_USER_PVERSION _IOW('A', 0x04, int)
#endif

#ifndef SNDRV_TIMER_IOCTL_TREAD64
#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
#endif

#include "LinuxSyscalls/x32/Ioctl/asound.inl"
} // namespace asound
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/asound.inl
================================================
_BASIC_META(SNDRV_HWDEP_IOCTL_PVERSION)
_BASIC_META(SNDRV_HWDEP_IOCTL_INFO)
_BASIC_META(SNDRV_HWDEP_IOCTL_DSP_STATUS)
//_BASIC_META(SNDRV_HWDEP_IOCTL_DSP_LOAD)

_BASIC_META(SNDRV_PCM_IOCTL_PVERSION)
_BASIC_META(SNDRV_PCM_IOCTL_INFO)
_BASIC_META(SNDRV_PCM_IOCTL_TSTAMP)
_BASIC_META(SNDRV_PCM_IOCTL_TTSTAMP)
_BASIC_META(SNDRV_PCM_IOCTL_USER_PVERSION)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_HW_REFINE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_HW_PARAMS)
_BASIC_META(SNDRV_PCM_IOCTL_HW_FREE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_SW_PARAMS)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_STATUS)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_DELAY)
_BASIC_META(SNDRV_PCM_IOCTL_HWSYNC)
// XXX: _BASIC_META(__SNDRV_PCM_IOCTL_SYNC_PTR)
// XXX: _BASIC_META(__SNDRV_PCM_IOCTL_SYNC_PTR64)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_SYNC_PTR)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_STATUS_EXT)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_CHANNEL_INFO)
_BASIC_META(SNDRV_PCM_IOCTL_PREPARE)
_BASIC_META(SNDRV_PCM_IOCTL_RESET)
_BASIC_META(SNDRV_PCM_IOCTL_START)
_BASIC_META(SNDRV_PCM_IOCTL_DROP)
_BASIC_META(SNDRV_PCM_IOCTL_DRAIN)
_BASIC_META(SNDRV_PCM_IOCTL_PAUSE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_REWIND)
_BASIC_META(SNDRV_PCM_IOCTL_RESUME)
_BASIC_META(SNDRV_PCM_IOCTL_XRUN)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_FORWARD)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_WRITEI_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_READI_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_WRITEN_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_READN_FRAMES)
_BASIC_META(SNDRV_PCM_IOCTL_LINK)
_BASIC_META(SNDRV_PCM_IOCTL_UNLINK)

_BASIC_META(SNDRV_RAWMIDI_IOCTL_PVERSION)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_INFO)
// XXX: _BASIC_META(SNDRV_RAWMIDI_IOCTL_PARAMS)
// XXX: _BASIC_META(SNDRV_RAWMIDI_IOCTL_STATUS)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_DROP)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_DRAIN)

_BASIC_META(SNDRV_TIMER_IOCTL_PVERSION)
_BASIC_META(SNDRV_TIMER_IOCTL_NEXT_DEVICE)
_BASIC_META(SNDRV_TIMER_IOCTL_TREAD_OLD)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GINFO)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GPARAMS)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GSTATUS)
_BASIC_META(SNDRV_TIMER_IOCTL_SELECT)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_INFO)
_BASIC_META(SNDRV_TIMER_IOCTL_PARAMS)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_STATUS)
_BASIC_META(SNDRV_TIMER_IOCTL_START)
_BASIC_META(SNDRV_TIMER_IOCTL_STOP)
_BASIC_META(SNDRV_TIMER_IOCTL_CONTINUE)
_BASIC_META(SNDRV_TIMER_IOCTL_PAUSE)
_BASIC_META(SNDRV_TIMER_IOCTL_TREAD64)

_BASIC_META(SNDRV_CTL_IOCTL_PVERSION)
_BASIC_META(SNDRV_CTL_IOCTL_CARD_INFO)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_LIST)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_INFO)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_READ)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_WRITE)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_LOCK)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_UNLOCK)
_BASIC_META(SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_ADD)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_REPLACE)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_REMOVE)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_READ)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_WRITE)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_COMMAND)
_BASIC_META(SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_HWDEP_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_POWER)
_BASIC_META(SNDRV_CTL_IOCTL_POWER_STATE)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/drm.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
extern "C" {
// drm headers use a `__user` define that has an address_space attribute. This allows their tooling to see unsafe user-space accesses.
// Define this to nothing so we don't need to modify those headers.
#define __user
#include "fex-drm/drm.h"
#include "fex-drm/drm_mode.h"
#include "fex-drm/i915_drm.h"
#include "fex-drm/amdgpu_drm.h"
#include "fex-drm/asahi_drm.h"
#include "fex-drm/lima_drm.h"
#include "fex-drm/panfrost_drm.h"
#include "fex-drm/msm_drm.h"
#include "fex-drm/nouveau_drm.h"
#include "fex-drm/nova_drm.h"
#include "fex-drm/radeon_drm.h"
#include "fex-drm/vc4_drm.h"
#include "fex-drm/v3d_drm.h"
#include "fex-drm/panthor_drm.h"
#include "fex-drm/pvr_drm.h"
#include "fex-drm/virtgpu_drm.h"
#include "fex-drm/xe_drm.h"
}
#include <sys/ioctl.h>

#define CPYT(x) val.x = x
#define CPYF(x) x = val.x
namespace FEX::HLE::x32 {

namespace DRM {
  struct FEX_ANNOTATE("alias-x86_32-drm_version") FEX_ANNOTATE("fex-match") fex_drm_version {
    int version_major;      /**< Major version */
    int version_minor;      /**< Minor version */
    int version_patchlevel; /**< Patch level */
    uint32_t name_len;      /**< Length of name buffer */
    compat_ptr<char> name;  /**< Name of driver */
    uint32_t date_len;      /**< Length of date buffer */
    compat_ptr<char> date;  /**< User-space buffer to hold date */
    uint32_t desc_len;      /**< Length of desc buffer */
    compat_ptr<char> desc;  /**< User-space buffer to hold desc */

    fex_drm_version() = delete;

    operator drm_version() const {
      drm_version val {};
      CPYT(version_major);
      CPYT(version_minor);
      CPYT(version_patchlevel);
      CPYT(name_len);
      CPYT(name);
      CPYT(date_len);
      CPYT(date);
      CPYT(desc_len);
      CPYT(desc);
      return val;
    }

    fex_drm_version(struct drm_version val)
      : name {auto_compat_ptr {val.name}}
      , date {auto_compat_ptr {val.date}}
      , desc {auto_compat_ptr {val.desc}} {
      CPYF(version_major);
      CPYF(version_minor);
      CPYF(version_patchlevel);
      CPYF(name_len);
      CPYF(date_len);
      CPYF(desc_len);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_unique") FEX_ANNOTATE("fex-match") fex_drm_unique {
    compat_size_t unique_len;
    compat_ptr<char> unique;

    fex_drm_unique() = delete;

    operator drm_unique() const {
      drm_unique val {};
      CPYT(unique_len);
      CPYT(unique);
      return val;
    }

    fex_drm_unique(struct drm_unique val)
      : unique {auto_compat_ptr {val.unique}} {
      CPYF(unique_len);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_map") FEX_ANNOTATE("fex-match") fex_drm_map {
    uint32_t offset;
    uint32_t size;
    enum drm_map_type type;
    enum drm_map_flags flags;
    compat_ptr<void> handle;
    int32_t mtrr;

    fex_drm_map() = delete;

    operator drm_map() const {
      drm_map val {};
      CPYT(offset);
      CPYT(size);
      CPYT(type);
      CPYT(flags);
      CPYT(handle);
      CPYT(mtrr);
      return val;
    }

    fex_drm_map(struct drm_map val)
      : handle {auto_compat_ptr {val.handle}} {
      CPYF(offset);
      CPYF(size);
      CPYF(type);
      CPYF(flags);
      CPYF(mtrr);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_client") FEX_ANNOTATE("fex-match") fex_drm_client {
    int32_t idx;
    int32_t auth;
    uint32_t pid;
    uint32_t uid;
    uint32_t magic;
    uint32_t iocs;

    fex_drm_client() = delete;

    operator drm_client() const {
      drm_client val {};
      CPYT(idx);
      CPYT(auth);
      CPYT(pid);
      CPYT(uid);
      CPYT(magic);
      CPYT(iocs);
      return val;
    }

    fex_drm_client(struct drm_client val) {
      CPYF(idx);
      CPYF(auth);
      CPYF(pid);
      CPYF(uid);
      CPYF(magic);
      CPYF(iocs);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_stats") FEX_ANNOTATE("fex-match") fex_drm_stats {
    uint32_t count;
    struct {
      uint32_t value;
      enum drm_stat_type type;
    } data[15];

    fex_drm_stats() = delete;

    operator drm_stats() const {
      drm_stats val {};
      CPYT(count);
      for (size_t i = 0; i < 15; ++i) {
        CPYT(data[i].value);
        CPYT(data[i].type);
      }
      return val;
    }

    fex_drm_stats(struct drm_stats val) {
      CPYF(count);
      for (size_t i = 0; i < 15; ++i) {
        CPYF(data[i].value);
        CPYF(data[i].type);
      }
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_buf_desc") FEX_ANNOTATE("fex-match") fex_drm_buf_desc {
    int32_t count;
    int32_t size;
    int32_t low_mark;
    int32_t high_mark;
    enum { _DRM_PAGE_ALIGN = 0x01, _DRM_AGP_BUFFER = 0x02, _DRM_SG_BUFFER = 0x04, _DRM_FB_BUFFER = 0x08, _DRM_PCI_BUFFER_RO = 0x10 } flags;
    uint32_t agp_start;

    fex_drm_buf_desc() = delete;

    operator drm_buf_desc() const {
      drm_buf_desc val {};
      CPYT(count);
      CPYT(size);
      CPYT(low_mark);
      CPYT(high_mark);
      memcpy(&val.flags, &flags, sizeof(val.flags));
      CPYT(agp_start);
      return val;
    }

    fex_drm_buf_desc(struct drm_buf_desc val) {
      CPYF(count);
      CPYF(size);
      CPYF(low_mark);
      CPYF(high_mark);
      memcpy(&flags, &val.flags, sizeof(val.flags));
      CPYF(agp_start);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_buf_info") FEX_ANNOTATE("fex-match") fex_drm_buf_info {
    int32_t count;
    compat_ptr<struct drm_buf_desc> list;

    fex_drm_buf_info() = delete;

    operator drm_buf_info() const {
      drm_buf_info val {};
      CPYT(count);
      CPYT(list);
      return val;
    }

    fex_drm_buf_info(struct drm_buf_info val)
      : list {auto_compat_ptr {val.list}} {
      CPYF(count);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_buf_pub") FEX_ANNOTATE("fex-match") fex_drm_buf_pub {
    int32_t idx;
    int32_t total;
    int32_t used;
    compat_ptr<void> address;

    fex_drm_buf_pub() = delete;

    operator drm_buf_pub() const {
      drm_buf_pub val {};
      CPYT(idx);
      CPYT(total);
      CPYT(used);
      CPYT(address);
      return val;
    }

    fex_drm_buf_pub(struct drm_buf_pub val)
      : address {auto_compat_ptr {val.address}} {
      CPYF(idx);
      CPYF(total);
      CPYF(used);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_buf_map") FEX_ANNOTATE("fex-match") fex_drm_buf_map {
    int32_t count;
#ifdef __cplusplus
    compat_ptr<void> virt;
#else
    compat_ptr<void> virtual;
#endif
    compat_ptr<drm_buf_pub> list;

    fex_drm_buf_map() = delete;

    operator drm_buf_map() const {
      drm_buf_map val {};
      CPYT(count);
#ifdef __cplusplus
      CPYT(virt);
#else
      CPYT(virtual);
#endif
      CPYT(list);
      return val;
    }

    fex_drm_buf_map(struct drm_buf_map val)
#ifdef __cplusplus
      : virt {auto_compat_ptr {val.virt}}
#else
      : virtual {auto_compat_ptr {val.virtual}}
#endif
      , list {auto_compat_ptr {val.list}} {
      CPYF(count);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_buf_free") FEX_ANNOTATE("fex-match") fex_drm_buf_free {
    int32_t count;
    compat_ptr<int> list;

    fex_drm_buf_free() = delete;

    operator drm_buf_free() const {
      drm_buf_free val {};
      CPYT(count);
      CPYT(list);
      return val;
    }

    fex_drm_buf_free(struct drm_buf_free val)
      : list {auto_compat_ptr {val.list}} {
      CPYF(count);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_ctx_priv_map") FEX_ANNOTATE("fex-match") fex_drm_ctx_priv_map {
    uint32_t ctx_id;
    compat_ptr<void> handle;

    fex_drm_ctx_priv_map() = delete;

    operator drm_ctx_priv_map() const {
      drm_ctx_priv_map val {};
      CPYT(ctx_id);
      CPYT(handle);
      return val;
    }

    fex_drm_ctx_priv_map(struct drm_ctx_priv_map val)
      : handle {auto_compat_ptr {val.handle}} {
      CPYF(ctx_id);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_ctx_res") FEX_ANNOTATE("fex-match") fex_drm_ctx_res {
    int32_t count;
    compat_ptr<struct drm_ctx> contexts;

    fex_drm_ctx_res() = delete;

    operator drm_ctx_res() const {
      drm_ctx_res val {};
      CPYT(count);
      return val;
    }

    fex_drm_ctx_res(struct drm_ctx_res val)
      : contexts {auto_compat_ptr {val.contexts}} {
      CPYF(count);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_dma") FEX_ANNOTATE("fex-match") fex_drm_dma {
    int32_t context;
    int32_t send_count;
    compat_ptr<int32_t> send_indices;
    compat_ptr<int32_t> send_sizes;
    enum drm_dma_flags flags;
    int32_t request_count;
    int32_t request_size;
    compat_ptr<int32_t> request_indices;
    compat_ptr<int32_t> request_sizes;
    int32_t granted_count;

    fex_drm_dma() = delete;

    operator drm_dma() const {
      drm_dma val {};
      CPYT(context);
      CPYT(send_count);
      CPYT(send_indices);
      CPYT(send_sizes);
      CPYT(flags);
      CPYT(request_count);
      CPYT(request_size);
      CPYT(request_indices);
      CPYT(request_sizes);
      CPYT(granted_count);
      return val;
    }

    fex_drm_dma(struct drm_dma val)
      : send_indices {auto_compat_ptr {val.send_indices}}
      , send_sizes {auto_compat_ptr {val.send_sizes}}
      , request_indices {auto_compat_ptr {val.request_indices}}
      , request_sizes {auto_compat_ptr {val.request_sizes}} {
      CPYF(context);
      CPYF(send_count);
      CPYF(flags);
      CPYF(request_count);
      CPYF(request_size);
      CPYF(granted_count);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_scatter_gather") FEX_ANNOTATE("fex-match") fex_drm_scatter_gather {
    uint32_t size;
    uint32_t handle;

    fex_drm_scatter_gather() = delete;

    operator drm_scatter_gather() const {
      drm_scatter_gather val {};
      CPYT(size);
      CPYT(handle);
      return val;
    }

    fex_drm_scatter_gather(struct drm_scatter_gather val) {
      CPYF(size);
      CPYF(handle);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_wait_vblank_request") FEX_ANNOTATE("fex-match") fex_drm_wait_vblank_request {
    enum drm_vblank_seq_type type;
    uint32_t sequence;
    uint32_t signal;

    fex_drm_wait_vblank_request() = delete;

    operator drm_wait_vblank_request() const {
      drm_wait_vblank_request val {};
      CPYT(type);
      CPYT(sequence);
      CPYT(signal);
      return val;
    }

    fex_drm_wait_vblank_request(struct drm_wait_vblank_request val) {
      CPYF(type);
      CPYF(sequence);
      CPYF(signal);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_wait_vblank_reply") FEX_ANNOTATE("fex-match") fex_drm_wait_vblank_reply {
    enum drm_vblank_seq_type type;
    uint32_t sequence;
    int32_t tval_sec;
    int32_t tval_usec;

    fex_drm_wait_vblank_reply() = delete;

    operator drm_wait_vblank_reply() const {
      drm_wait_vblank_reply val {};
      CPYT(type);
      CPYT(sequence);
      CPYT(tval_sec);
      CPYT(tval_usec);
      return val;
    }

    fex_drm_wait_vblank_reply(struct drm_wait_vblank_reply val) {
      CPYF(type);
      CPYF(sequence);
      CPYF(tval_sec);
      CPYF(tval_usec);
    }
  };

  union FEX_ANNOTATE("alias-x86_32-drm_wait_vblank") FEX_ANNOTATE("fex-match") fex_drm_wait_vblank {
    fex_drm_wait_vblank_request request;
    fex_drm_wait_vblank_reply reply;

    fex_drm_wait_vblank() = delete;
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_update_draw") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_update_draw {
    drm_drawable_t handle;
    uint32_t type;
    uint32_t num;
    compat_uint64_t data;

    fex_drm_update_draw() = delete;

    operator drm_update_draw() const {
      drm_update_draw val {};
      CPYT(handle);
      CPYT(type);
      CPYT(num);
      CPYT(data);
      return val;
    }

    fex_drm_update_draw(struct drm_update_draw val) {
      CPYF(handle);
      CPYF(type);
      CPYF(num);
      CPYF(data);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_mode_get_plane_res") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_mode_get_plane_res {
    compat_uint64_t plane_id_ptr;
    uint32_t count_planes;
    fex_drm_mode_get_plane_res() = delete;

    operator drm_mode_get_plane_res() const {
      drm_mode_get_plane_res val {};
      CPYT(plane_id_ptr);
      CPYT(count_planes);
      return val;
    }

    fex_drm_mode_get_plane_res(struct drm_mode_get_plane_res val) {
      CPYF(plane_id_ptr);
      CPYF(count_planes);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_mode_fb_cmd2") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_mode_fb_cmd2 {
    uint32_t fb_id;
    uint32_t width;
    uint32_t height;
    uint32_t pixel_format;
    uint32_t flags;

    uint32_t handles[4];
    uint32_t pitches[4];
    uint32_t offsets[4];
    compat_uint64_t modifier[4];
    fex_drm_mode_fb_cmd2() = delete;

    operator drm_mode_fb_cmd2() const {
      drm_mode_fb_cmd2 val {};
      CPYT(fb_id);
      CPYT(width);
      CPYT(height);
      CPYT(pixel_format);
      CPYT(flags);
      for (int i = 0; i < 4; ++i) {
        CPYT(handles[i]);
        CPYT(pitches[i]);
        CPYT(offsets[i]);
        CPYT(modifier[i]);
      }
      return val;
    }

    fex_drm_mode_fb_cmd2(struct drm_mode_fb_cmd2 val) {
      CPYF(fb_id);
      CPYF(width);
      CPYF(height);
      CPYF(pixel_format);
      CPYF(flags);
      for (int i = 0; i < 4; ++i) {
        CPYF(handles[i]);
        CPYF(pitches[i]);
        CPYF(offsets[i]);
        CPYF(modifier[i]);
      }
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_mode_obj_get_properties") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_mode_obj_get_properties {
    compat_uint64_t props_ptr;
    compat_uint64_t prop_values_ptr;
    uint32_t count_props;
    uint32_t obj_id;
    uint32_t obj_type;

    fex_drm_mode_obj_get_properties() = delete;

    operator drm_mode_obj_get_properties() const {
      drm_mode_obj_get_properties val {};
      CPYT(props_ptr);
      CPYT(prop_values_ptr);
      CPYT(count_props);
      CPYT(obj_id);
      CPYT(obj_type);
      return val;
    }

    fex_drm_mode_obj_get_properties(struct drm_mode_obj_get_properties val) {
      CPYF(props_ptr);
      CPYF(prop_values_ptr);
      CPYF(count_props);
      CPYF(obj_id);
      CPYF(obj_type);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_mode_obj_set_property") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_mode_obj_set_property {
    compat_uint64_t value;
    uint32_t prop_id;
    uint32_t obj_id;
    uint32_t obj_type;

    fex_drm_mode_obj_set_property() = delete;

    operator drm_mode_obj_set_property() const {
      drm_mode_obj_set_property val {};
      CPYT(value);
      CPYT(prop_id);
      CPYT(obj_id);
      CPYT(obj_type);
      return val;
    }

    fex_drm_mode_obj_set_property(struct drm_mode_obj_set_property val) {
      CPYF(value);
      CPYF(prop_id);
      CPYF(obj_id);
      CPYF(obj_type);
    }
  };

} // namespace DRM

namespace AMDGPU {
  struct FEX_ANNOTATE("alias-x86_32-drm_amdgpu_gem_metadata") FEX_ANNOTATE("fex-match") fex_drm_amdgpu_gem_metadata {
    __u32 handle;
    __u32 op;
    struct {
      compat_uint64_t flags;
      compat_uint64_t tiling_info;
      __u32 data_size_bytes;
      __u32 data[64];
    } data;

    fex_drm_amdgpu_gem_metadata() = delete;
    operator drm_amdgpu_gem_metadata() const {
      drm_amdgpu_gem_metadata val {};
      CPYT(handle);
      CPYT(op);
      CPYT(data.flags);
      CPYT(data.tiling_info);
      CPYT(data.data_size_bytes);
      memcpy(val.data.data, data.data, sizeof(data.data));
      return val;
    }

    fex_drm_amdgpu_gem_metadata(struct drm_amdgpu_gem_metadata val) {
      CPYF(handle);
      CPYF(op);
      CPYF(data.flags);
      CPYF(data.tiling_info);
      CPYF(data.data_size_bytes);
      memcpy(data.data, val.data.data, sizeof(data.data));
    }
  };
} // namespace AMDGPU

namespace RADEON {
  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_gem_create") FEX_ANNOTATE("fex-match") fex_drm_radeon_gem_create {
    compat_uint64_t size;
    compat_uint64_t alignment;
    __u32 handle;
    __u32 initial_domain;
    __u32 flags;

    fex_drm_radeon_gem_create() = delete;

    operator drm_radeon_gem_create() const {
      drm_radeon_gem_create val {};
      CPYT(size);
      CPYT(alignment);
      CPYT(handle);
      CPYT(initial_domain);
      CPYT(flags);
      return val;
    }

    fex_drm_radeon_gem_create(struct drm_radeon_gem_create val) {
      CPYF(size);
      CPYF(alignment);
      CPYF(handle);
      CPYF(initial_domain);
      CPYF(flags);
    }
  };

  struct FEX_PACKED FEX_ANNOTATE("alias-x86_32-drm_radeon_init") FEX_ANNOTATE("fex-match") fex_drm_radeon_init_t {
    uint32_t func;

    compat_ulong_t sarea_priv_offset;
    int32_t is_pci;
    int32_t cp_mode;
    int32_t gart_size;
    int32_t ring_size;
    int32_t usec_timeout;

    uint32_t fb_bpp;
    uint32_t front_offset, front_pitch;
    uint32_t back_offset, back_pitch;
    uint32_t depth_bpp;
    uint32_t depth_offset, depth_pitch;

    compat_ulong_t fb_offset;
    compat_ulong_t mmio_offset;
    compat_ulong_t ring_offset;
    compat_ulong_t ring_rptr_offset;
    compat_ulong_t buffers_offset;
    compat_ulong_t gart_textures_offset;

    fex_drm_radeon_init_t() = delete;

    operator drm_radeon_init_t() const {
      drm_radeon_init_t val {};
      memcpy(&val.func, &func, sizeof(val.func));
      CPYT(sarea_priv_offset);
      CPYT(is_pci);
      CPYT(cp_mode);
      CPYT(gart_size);
      CPYT(ring_size);
      CPYT(usec_timeout);
      CPYT(fb_bpp);
      CPYT(front_offset);
      CPYT(front_pitch);
      CPYT(back_offset);
      CPYT(back_pitch);
      CPYT(depth_bpp);
      CPYT(depth_offset);
      CPYT(depth_pitch);
      CPYT(fb_offset);
      CPYT(mmio_offset);
      CPYT(ring_offset);
      CPYT(ring_rptr_offset);
      CPYT(buffers_offset);
      CPYT(gart_textures_offset);
      return val;
    }

    fex_drm_radeon_init_t(drm_radeon_init_t val) {
      memcpy(&func, &val.func, sizeof(val.func));
      CPYF(sarea_priv_offset);
      CPYF(is_pci);
      CPYF(cp_mode);
      CPYF(gart_size);
      CPYF(ring_size);
      CPYF(usec_timeout);
      CPYF(fb_bpp);
      CPYF(front_offset);
      CPYF(front_pitch);
      CPYF(back_offset);
      CPYF(back_pitch);
      CPYF(depth_bpp);
      CPYF(depth_offset);
      CPYF(depth_pitch);
      CPYF(fb_offset);
      CPYF(mmio_offset);
      CPYF(ring_offset);
      CPYF(ring_rptr_offset);
      CPYF(buffers_offset);
      CPYF(gart_textures_offset);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_clear") FEX_ANNOTATE("fex-match") fex_drm_radeon_clear_t {
    uint32_t flags;
    uint32_t clear_color;
    uint32_t clear_depth;
    uint32_t color_mask;
    uint32_t depth_mask;
    compat_ptr<drm_radeon_clear_rect_t> depth_boxes;

    fex_drm_radeon_clear_t() = delete;

    operator drm_radeon_clear_t() const {
      drm_radeon_clear_t val {};
      CPYT(flags);
      CPYT(clear_color);
      CPYT(clear_depth);
      CPYT(color_mask);
      CPYT(depth_mask);
      CPYT(depth_boxes);
      return val;
    }

    fex_drm_radeon_clear_t(drm_radeon_clear_t val)
      : depth_boxes {auto_compat_ptr {val.depth_boxes}} {
      CPYF(flags);
      CPYF(clear_color);
      CPYF(clear_depth);
      CPYF(color_mask);
      CPYF(depth_mask);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_stipple") FEX_ANNOTATE("fex-match") fex_drm_radeon_stipple_t {
    compat_ptr<uint32_t> mask;

    fex_drm_radeon_stipple_t() = delete;

    operator drm_radeon_stipple_t() const {
      drm_radeon_stipple_t val {};
      CPYT(mask);
      return val;
    }

    fex_drm_radeon_stipple_t(drm_radeon_stipple_t val)
      : mask {auto_compat_ptr {val.mask}} {}
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_texture") FEX_ANNOTATE("fex-match") fex_drm_radeon_texture_t {
    uint32_t offset;
    int32_t pitch;
    int32_t format;
    int32_t width;
    int32_t height;
    compat_ptr<drm_radeon_tex_image_t> image;

    fex_drm_radeon_texture_t() = delete;

    operator drm_radeon_texture_t() const {
      drm_radeon_texture_t val {};
      CPYT(offset);
      CPYT(pitch);
      CPYT(format);
      CPYT(width);
      CPYT(height);
      CPYT(image);
      return val;
    }

    fex_drm_radeon_texture_t(drm_radeon_texture_t val)
      : image {auto_compat_ptr {val.image}} {
      CPYF(offset);
      CPYF(pitch);
      CPYF(format);
      CPYF(width);
      CPYF(height);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_vertex2") FEX_ANNOTATE("fex-match") fex_drm_radeon_vertex2_t {
    int32_t idx;
    int32_t discard;
    int32_t nr_states;
    compat_ptr<drm_radeon_state_t> state;
    int32_t nr_prims;
    compat_ptr<drm_radeon_prim_t> prim;

    fex_drm_radeon_vertex2_t() = delete;

    operator drm_radeon_vertex2_t() const {
      drm_radeon_vertex2_t val;
      CPYT(idx);
      CPYT(discard);
      CPYT(nr_states);
      CPYT(state);
      CPYT(nr_prims);
      CPYT(prim);
      return val;
    }

    fex_drm_radeon_vertex2_t(drm_radeon_vertex2_t val)
      : state {auto_compat_ptr {val.state}}
      , prim {auto_compat_ptr {val.prim}} {
      CPYF(idx);
      CPYF(discard);
      CPYF(nr_states);
      CPYF(nr_prims);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_cmd_buffer") FEX_ANNOTATE("fex-match") fex_drm_radeon_cmd_buffer_t {
    int32_t bufsz;
    compat_ptr<char> buf;
    int32_t nbox;
    compat_ptr<drm_clip_rect> boxes;

    fex_drm_radeon_cmd_buffer_t() = delete;

    operator drm_radeon_cmd_buffer_t() const {
      drm_radeon_cmd_buffer_t val;
      CPYT(bufsz);
      CPYT(buf);
      CPYT(nbox);
      CPYT(boxes);
      return val;
    }

    fex_drm_radeon_cmd_buffer_t(drm_radeon_cmd_buffer_t val)
      : buf {auto_compat_ptr {val.buf}}
      , boxes {auto_compat_ptr {val.boxes}} {
      CPYF(bufsz);
      CPYF(nbox);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_getparam") FEX_ANNOTATE("fex-match") fex_drm_radeon_getparam_t {
    int32_t param;
    compat_ptr<void> value;

    fex_drm_radeon_getparam_t() = delete;

    operator drm_radeon_getparam_t() const {
      drm_radeon_getparam_t val;
      CPYT(param);
      CPYT(value);
      return val;
    }

    fex_drm_radeon_getparam_t(drm_radeon_getparam_t val)
      : value {auto_compat_ptr {val.value}} {
      CPYF(param);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_mem_alloc") FEX_ANNOTATE("fex-match") fex_drm_radeon_mem_alloc_t {
    int32_t region;
    int32_t alignment;
    int32_t size;
    compat_ptr<int32_t> region_offset;

    fex_drm_radeon_mem_alloc_t() = delete;

    operator drm_radeon_mem_alloc_t() const {
      drm_radeon_mem_alloc_t val;
      CPYT(region);
      CPYT(alignment);
      CPYT(size);
      CPYT(region_offset);
      return val;
    }

    fex_drm_radeon_mem_alloc_t(drm_radeon_mem_alloc_t val)
      : region_offset {auto_compat_ptr {val.region_offset}} {
      CPYF(region);
      CPYF(alignment);
      CPYF(size);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_irq_emit") FEX_ANNOTATE("fex-match") fex_drm_radeon_irq_emit_t {
    compat_ptr<int32_t> irq_seq;

    fex_drm_radeon_irq_emit_t() = delete;

    operator drm_radeon_irq_emit_t() const {
      drm_radeon_irq_emit_t val;
      CPYT(irq_seq);
      return val;
    }

    fex_drm_radeon_irq_emit_t(drm_radeon_irq_emit_t val)
      : irq_seq {auto_compat_ptr {val.irq_seq}} {}
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_radeon_setparam") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_radeon_setparam_t {
    uint32_t param;
    compat_int64_t value;

    fex_drm_radeon_setparam_t() = delete;

    operator drm_radeon_setparam_t() const {
      drm_radeon_setparam_t val;
      CPYT(param);
      CPYT(value);
      return val;
    }

    fex_drm_radeon_setparam_t(drm_radeon_setparam_t val) {
      CPYF(param);
      CPYF(value);
    }
  };

} // namespace RADEON

namespace MSM {
  struct FEX_ANNOTATE("alias-x86_32-drm_msm_timespec") FEX_ANNOTATE("fex-match") fex_drm_msm_timespec {
    compat_int64_t tv_sec;
    compat_int64_t tv_nsec;

    operator drm_msm_timespec() const {
      drm_msm_timespec val {};
      CPYT(tv_sec);
      CPYT(tv_nsec);
      return val;
    }

    static fex_drm_msm_timespec FromHost(struct drm_msm_timespec val) {
      fex_drm_msm_timespec ret;
      ret.tv_sec = val.tv_sec;
      ret.tv_nsec = val.tv_nsec;
      return ret;
    }

  private:
    fex_drm_msm_timespec() = default;
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_msm_wait_fence") FEX_ANNOTATE("fex-match") FEX_PACKED fex_drm_msm_wait_fence {
    uint32_t fence;
    uint32_t flags;
    struct fex_drm_msm_timespec timeout;
    uint32_t queueid;

    fex_drm_msm_wait_fence() = delete;

    operator drm_msm_wait_fence() const {
      drm_msm_wait_fence val {};
      CPYT(fence);
      CPYT(flags);
      CPYT(timeout);
      CPYT(queueid);
      return val;
    }

    fex_drm_msm_wait_fence(struct drm_msm_wait_fence val)
      : timeout {fex_drm_msm_timespec::FromHost(val.timeout)} {
      CPYF(fence);
      CPYF(flags);
      CPYF(queueid);
    }
  };

} // namespace MSM

namespace I915 {

  struct FEX_ANNOTATE("alias-x86_32-drm_i915_batchbuffer") FEX_ANNOTATE("fex-match") fex_drm_i915_batchbuffer_t {
    int32_t start;
    int32_t used;
    int32_t DR1;
    int32_t DR4;
    int32_t num_cliprects;
    compat_ptr<struct drm_clip_rect> cliprects;

    fex_drm_i915_batchbuffer_t() = delete;

    operator drm_i915_batchbuffer_t() const {
      drm_i915_batchbuffer_t val {};
      CPYT(start);
      CPYT(used);
      CPYT(DR1);
      CPYT(DR4);
      CPYT(num_cliprects);
      CPYT(cliprects);
      return val;
    }

    fex_drm_i915_batchbuffer_t(drm_i915_batchbuffer_t val)
      : cliprects {auto_compat_ptr {val.cliprects}} {
      CPYF(start);
      CPYF(used);
      CPYF(DR1);
      CPYF(DR4);
      CPYF(num_cliprects);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_i915_irq_emit") FEX_ANNOTATE("fex-match") fex_drm_i915_irq_emit_t {
    compat_ptr<int> irq_seq;

    fex_drm_i915_irq_emit_t() = delete;

    operator drm_i915_irq_emit_t() const {
      drm_i915_irq_emit_t val {};
      CPYT(irq_seq);
      return val;
    }

    fex_drm_i915_irq_emit_t(drm_i915_irq_emit_t val)
      : irq_seq {auto_compat_ptr {val.irq_seq}} {}
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_i915_getparam") FEX_ANNOTATE("fex-match") fex_drm_i915_getparam_t {
    int32_t param;
    compat_ptr<int> value;
    fex_drm_i915_getparam_t() = delete;

    operator drm_i915_getparam_t() const {
      drm_i915_getparam_t val {};
      CPYT(param);
      CPYT(value);
      return val;
    }

    fex_drm_i915_getparam_t(drm_i915_getparam_t val)
      : value {auto_compat_ptr {val.value}} {
      CPYF(param);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-drm_i915_mem_alloc") FEX_ANNOTATE("fex-match") fex_drm_i915_mem_alloc_t {
    int32_t region;
    int32_t alignment;
    int32_t size;
    compat_ptr<int> region_offset;
    fex_drm_i915_mem_alloc_t() = delete;

    operator drm_i915_mem_alloc_t() const {
      drm_i915_mem_alloc_t val {};
      CPYT(region);
      CPYT(alignment);
      CPYT(size);
      CPYT(region_offset);
      return val;
    }

    fex_drm_i915_mem_alloc_t(drm_i915_mem_alloc_t val)
      : region_offset {auto_compat_ptr {val.region_offset}} {
      CPYF(region);
      CPYF(alignment);
      CPYF(size);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-_drm_i915_cmdbuffer") FEX_ANNOTATE("fex-match") fex_drm_i915_cmdbuffer_t {
    compat_ptr<char> buf;
    int32_t sz;
    int32_t DR1;
    int32_t DR4;
    int32_t num_cliprects;
    compat_ptr<struct drm_clip_rect> cliprects;

    fex_drm_i915_cmdbuffer_t() = delete;

    operator drm_i915_cmdbuffer_t() const {
      drm_i915_cmdbuffer_t val {};
      CPYT(buf);
      CPYT(sz);
      CPYT(DR1);
      CPYT(DR4);
      CPYT(num_cliprects);
      CPYT(cliprects);
      return val;
    }

    fex_drm_i915_cmdbuffer_t(drm_i915_cmdbuffer_t val)
      : buf {auto_compat_ptr {val.buf}}
      , cliprects {auto_compat_ptr {val.cliprects}} {
      CPYF(sz);
      CPYF(DR1);
      CPYF(DR4);
      CPYF(num_cliprects);
    }
  };

// I915 defines if they don't exist
// Older DRM doesn't have this
#ifndef DRM_IOCTL_I915_GEM_MMAP_OFFSET
  struct drm_i915_gem_mmap_offset {
    uint32_t handle;
    uint32_t pad;
    compat_uint64_t offset;
    compat_uint64_t flags;
    compat_uint64_t extensions;
  };

#define DRM_IOCTL_I915_GEM_MMAP_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, FEX::HLE::x32::I915::drm_i915_gem_mmap_offset)
#endif
} // namespace I915

namespace VC4 {
  struct FEX_ANNOTATE("alias-x86_32-drm_vc4_perfmon_get_values") FEX_ANNOTATE("fex-match") fex_drm_vc4_perfmon_get_values {
    uint32_t id;
    compat_uint64_t values_ptr;

    fex_drm_vc4_perfmon_get_values() = delete;

    operator drm_vc4_perfmon_get_values() const {
      drm_vc4_perfmon_get_values val {};
      CPYT(id);
      CPYT(values_ptr);
      return val;
    }
    fex_drm_vc4_perfmon_get_values(drm_vc4_perfmon_get_values val) {
      CPYF(id);
      CPYF(values_ptr);
    }
  };

} // namespace VC4

namespace V3D {
  struct FEX_ANNOTATE("alias-x86_32-drm_v3d_submit_csd") FEX_ANNOTATE("fex-match") fex_drm_v3d_submit_csd {
    uint32_t cfg[7];
    uint32_t coef[4];

    compat_uint64_t bo_handles;

    uint32_t bo_handle_count;

    uint32_t in_sync;

    uint32_t out_sync;

    /**
     * @name This member were added in Linux 5.15
     * Commit: 26a4dc29b74a137f45665089f6d3d633fcc9b662
     *
     * As far as I can tell this is an ABI break, Probably safe since this likely would have been padded to 8 bytes.
     * Still pretty sketchy.
     * @{ */

    uint32_t perfmon_id;
    /**  @} */

    /**
     * @name These members were added in Linux 5.17
     * Commit: bb3425efdcd99f2b4e608e850226f7107b2f993e
     * This added additional members to `drm_v3d_submit_cl` and `drm_v3d_submit_tfu` as well.
     *
     * As far as I can tell this is an ABI break for the `submit_tfu` and `submit_csd` structs.
     * `submit_cl` is safe because it it already had a flags member.
     *
     * We just need to eat the fact that if the userspace isn't compiled against Linux 5.17 headers
     * that copying this member may cause faults that we can't capture currently.
     * @{ */

    compat_uint64_t extensions;

    uint32_t flags;

    uint32_t pad;
    /**  @} */

    fex_drm_v3d_submit_csd() = default;

    operator drm_v3d_submit_csd() const {
      drm_v3d_submit_csd val {};
      memcpy(val.cfg, cfg, sizeof(cfg));
      memcpy(val.coef, coef, sizeof(coef));
      CPYT(bo_handles);
      CPYT(bo_handle_count);
      CPYT(in_sync);
      CPYT(out_sync);
      CPYT(perfmon_id);
      CPYT(extensions);
      CPYT(flags);
      CPYT(pad);
      return val;
    }

    static void SafeConvertToGuest(fex_drm_v3d_submit_csd* Result, drm_v3d_submit_csd Src, size_t IoctlSize) {
      // We need to be more careful since this API changes over time
      fex_drm_v3d_submit_csd Tmp = Src;
      memcpy(Result, &Tmp, IoctlSize);
    }

    static drm_v3d_submit_csd SafeConvertToHost(fex_drm_v3d_submit_csd* Src, size_t IoctlSize) {
      // We need to be more careful since this API changes over time
      drm_v3d_submit_csd Result {};

      // Copy the incoming variable over with memcpy
      // This way if it is smaller than expected we will zero the remaining struct
      fex_drm_v3d_submit_csd Tmp {};
      memcpy(&Tmp, Src, std::min(IoctlSize, sizeof(fex_drm_v3d_submit_csd)));

      memcpy(Result.cfg, Tmp.cfg, sizeof(cfg));
      memcpy(Result.coef, Tmp.coef, sizeof(coef));
      Result.bo_handles = Tmp.bo_handles;
      Result.bo_handle_count = Tmp.bo_handle_count;
      Result.in_sync = Tmp.in_sync;
      Result.out_sync = Tmp.out_sync;
      Result.perfmon_id = Tmp.perfmon_id;
      Result.extensions = Tmp.extensions;
      Result.flags = Tmp.flags;
      Result.pad = Tmp.pad;

      return Result;
    }

    fex_drm_v3d_submit_csd(drm_v3d_submit_csd val) {
      memcpy(cfg, val.cfg, sizeof(cfg));
      memcpy(coef, val.coef, sizeof(coef));
      CPYF(bo_handles);
      CPYF(bo_handle_count);
      CPYF(in_sync);
      CPYF(out_sync);
      CPYF(perfmon_id);
      CPYF(extensions);
      CPYF(flags);
      CPYF(pad);
    }
  };

} // namespace V3D

#include "LinuxSyscalls/x32/Ioctl/drm.inl"
#include "LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/asahi_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/msm_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/i915_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/lima_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/panfrost_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/nouveau_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/nova_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/radeon_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/vc4_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/v3d_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/panthor_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/pvr_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/xe_drm.inl"
} // namespace FEX::HLE::x32
#undef CPYT
#undef CPYF


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/drm.inl
================================================
_CUSTOM_META(DRM_IOCTL_VERSION, DRM_IOWR(0x00, FEX::HLE::x32::DRM::fex_drm_version))
_CUSTOM_META(DRM_IOCTL_GET_UNIQUE, DRM_IOWR(0x01, FEX::HLE::x32::DRM::fex_drm_unique))
_BASIC_META(DRM_IOCTL_GET_MAGIC)
_BASIC_META(DRM_IOCTL_IRQ_BUSID)
_CUSTOM_META(DRM_IOCTL_GET_MAP, DRM_IOWR(0x04, FEX::HLE::x32::DRM::fex_drm_map))
_CUSTOM_META(DRM_IOCTL_GET_CLIENT, DRM_IOWR(0x05, FEX::HLE::x32::DRM::fex_drm_client))
_CUSTOM_META(DRM_IOCTL_GET_STATS, DRM_IOR(0x06, FEX::HLE::x32::DRM::fex_drm_stats))
_BASIC_META(DRM_IOCTL_SET_VERSION)
_BASIC_META(DRM_IOCTL_MODESET_CTL)
_BASIC_META(DRM_IOCTL_GEM_CLOSE)
_BASIC_META(DRM_IOCTL_GEM_FLINK)
_BASIC_META(DRM_IOCTL_GEM_OPEN)
_BASIC_META(DRM_IOCTL_GET_CAP)
_BASIC_META(DRM_IOCTL_SET_CLIENT_CAP)

_CUSTOM_META(DRM_IOCTL_SET_UNIQUE, DRM_IOW(0x10, FEX::HLE::x32::DRM::fex_drm_unique))
_BASIC_META(DRM_IOCTL_AUTH_MAGIC)
_BASIC_META(DRM_IOCTL_BLOCK)
_BASIC_META(DRM_IOCTL_UNBLOCK)
_BASIC_META(DRM_IOCTL_CONTROL)
_CUSTOM_META(DRM_IOCTL_ADD_MAP, DRM_IOWR(0x15, FEX::HLE::x32::DRM::fex_drm_map))
_CUSTOM_META(DRM_IOCTL_ADD_BUFS, DRM_IOWR(0x16, FEX::HLE::x32::DRM::fex_drm_buf_desc))
_CUSTOM_META(DRM_IOCTL_MARK_BUFS, DRM_IOW(0x17, FEX::HLE::x32::DRM::fex_drm_buf_desc))
_CUSTOM_META(DRM_IOCTL_INFO_BUFS, DRM_IOWR(0x18, FEX::HLE::x32::DRM::fex_drm_buf_info))
_CUSTOM_META(DRM_IOCTL_MAP_BUFS, DRM_IOWR(0x19, FEX::HLE::x32::DRM::fex_drm_buf_map))
_CUSTOM_META(DRM_IOCTL_FREE_BUFS, DRM_IOW(0x1a, FEX::HLE::x32::DRM::fex_drm_buf_free))

_CUSTOM_META(DRM_IOCTL_RM_MAP, DRM_IOW(0x1b, FEX::HLE::x32::DRM::fex_drm_map))

_CUSTOM_META(DRM_IOCTL_SET_SAREA_CTX, DRM_IOW(0x1c, FEX::HLE::x32::DRM::fex_drm_ctx_priv_map))
_CUSTOM_META(DRM_IOCTL_GET_SAREA_CTX, DRM_IOWR(0x1d, FEX::HLE::x32::DRM::fex_drm_ctx_priv_map))

_BASIC_META(DRM_IOCTL_SET_MASTER)
_BASIC_META(DRM_IOCTL_DROP_MASTER)

_BASIC_META(DRM_IOCTL_ADD_CTX)
_BASIC_META(DRM_IOCTL_RM_CTX)
_BASIC_META(DRM_IOCTL_MOD_CTX)
_BASIC_META(DRM_IOCTL_GET_CTX)
_BASIC_META(DRM_IOCTL_SWITCH_CTX)
_BASIC_META(DRM_IOCTL_NEW_CTX)
_CUSTOM_META(DRM_IOCTL_RES_CTX, DRM_IOWR(0x26, FEX::HLE::x32::DRM::fex_drm_ctx_res))
_BASIC_META(DRM_IOCTL_ADD_DRAW)
_BASIC_META(DRM_IOCTL_RM_DRAW)
_CUSTOM_META(DRM_IOCTL_DMA, DRM_IOWR(0x29, FEX::HLE::x32::DRM::fex_drm_dma))
_BASIC_META(DRM_IOCTL_LOCK)
_BASIC_META(DRM_IOCTL_UNLOCK)
_BASIC_META(DRM_IOCTL_FINISH)

_BASIC_META(DRM_IOCTL_PRIME_HANDLE_TO_FD)
_BASIC_META(DRM_IOCTL_PRIME_FD_TO_HANDLE)

_BASIC_META(DRM_IOCTL_AGP_ACQUIRE)
_BASIC_META(DRM_IOCTL_AGP_RELEASE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_ENABLE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_INFO)
// XXX: _BASIC_META(DRM_IOCTL_AGP_ALLOC)
// XXX: _BASIC_META(DRM_IOCTL_AGP_FREE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_BIND)
// XXX: _BASIC_META(DRM_IOCTL_AGP_UNBIND)

_CUSTOM_META(DRM_IOCTL_SG_ALLOC, DRM_IOWR(0x38, FEX::HLE::x32::DRM::fex_drm_scatter_gather))
_CUSTOM_META(DRM_IOCTL_SG_FREE, DRM_IOW(0x39, FEX::HLE::x32::DRM::fex_drm_scatter_gather))

_CUSTOM_META(DRM_IOCTL_WAIT_VBLANK, DRM_IOWR(0x3a, FEX::HLE::x32::DRM::fex_drm_wait_vblank))

_BASIC_META(DRM_IOCTL_CRTC_GET_SEQUENCE)
_BASIC_META(DRM_IOCTL_CRTC_QUEUE_SEQUENCE)

_CUSTOM_META(DRM_IOCTL_UPDATE_DRAW, DRM_IOW(0x3f, FEX::HLE::x32::DRM::fex_drm_update_draw))

_BASIC_META(DRM_IOCTL_MODE_GETRESOURCES)
_BASIC_META(DRM_IOCTL_MODE_GETCRTC)
_BASIC_META(DRM_IOCTL_MODE_SETCRTC)
_BASIC_META(DRM_IOCTL_MODE_CURSOR)
_BASIC_META(DRM_IOCTL_MODE_GETGAMMA)
_BASIC_META(DRM_IOCTL_MODE_SETGAMMA)
_BASIC_META(DRM_IOCTL_MODE_GETENCODER)
_BASIC_META(DRM_IOCTL_MODE_GETCONNECTOR)
_BASIC_META(DRM_IOCTL_MODE_ATTACHMODE)
_BASIC_META(DRM_IOCTL_MODE_DETACHMODE)

_BASIC_META(DRM_IOCTL_MODE_GETPROPERTY)
_BASIC_META(DRM_IOCTL_MODE_SETPROPERTY)
_BASIC_META(DRM_IOCTL_MODE_GETPROPBLOB)
_BASIC_META(DRM_IOCTL_MODE_GETFB)
_BASIC_META(DRM_IOCTL_MODE_ADDFB)
_BASIC_META(DRM_IOCTL_MODE_RMFB)
_BASIC_META(DRM_IOCTL_MODE_PAGE_FLIP)
_BASIC_META(DRM_IOCTL_MODE_DIRTYFB)

_BASIC_META(DRM_IOCTL_MODE_CREATE_DUMB)
_BASIC_META(DRM_IOCTL_MODE_MAP_DUMB)
_BASIC_META(DRM_IOCTL_MODE_DESTROY_DUMB)
_CUSTOM_META(DRM_IOCTL_MODE_GETPLANERESOURCES, DRM_IOWR(0xB5, FEX::HLE::x32::DRM::fex_drm_mode_get_plane_res))
_BASIC_META(DRM_IOCTL_MODE_GETPLANE)
_BASIC_META(DRM_IOCTL_MODE_SETPLANE)
_CUSTOM_META(DRM_IOCTL_MODE_ADDFB2, DRM_IOWR(0xB8, FEX::HLE::x32::DRM::fex_drm_mode_fb_cmd2))
_CUSTOM_META(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, DRM_IOWR(0xB9, FEX::HLE::x32::DRM::fex_drm_mode_obj_get_properties))
_CUSTOM_META(DRM_IOCTL_MODE_OBJ_SETPROPERTY, DRM_IOWR(0xBA, FEX::HLE::x32::DRM::fex_drm_mode_obj_set_property))
_BASIC_META(DRM_IOCTL_MODE_CURSOR2)
_BASIC_META(DRM_IOCTL_MODE_ATOMIC)
_BASIC_META(DRM_IOCTL_MODE_CREATEPROPBLOB)
_BASIC_META(DRM_IOCTL_MODE_DESTROYPROPBLOB)

_BASIC_META(DRM_IOCTL_SYNCOBJ_CREATE)
_BASIC_META(DRM_IOCTL_SYNCOBJ_DESTROY)
_BASIC_META(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD)
_BASIC_META(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE)
_BASIC_META(DRM_IOCTL_SYNCOBJ_WAIT)
_BASIC_META(DRM_IOCTL_SYNCOBJ_RESET)
_BASIC_META(DRM_IOCTL_SYNCOBJ_SIGNAL)

_BASIC_META(DRM_IOCTL_MODE_CREATE_LEASE)
_BASIC_META(DRM_IOCTL_MODE_LIST_LESSEES)
_BASIC_META(DRM_IOCTL_MODE_GET_LEASE)
_BASIC_META(DRM_IOCTL_MODE_REVOKE_LEASE)

_BASIC_META(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT)
_BASIC_META(DRM_IOCTL_SYNCOBJ_QUERY)
_BASIC_META(DRM_IOCTL_SYNCOBJ_TRANSFER)
_BASIC_META(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL)

_CUSTOM_META(DRM_IOCTL_MODE_GETFB2, DRM_IOWR(0xCE, FEX::HLE::x32::DRM::fex_drm_mode_fb_cmd2))
_BASIC_META(DRM_IOCTL_SYNCOBJ_EVENTFD)
_BASIC_META(DRM_IOCTL_MODE_CLOSEFB)
_BASIC_META(DRM_IOCTL_SET_CLIENT_NAME)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/ext_fs.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/blktrace_api.h>
#include <linux/fs.h>
#include <linux/fiemap.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {

namespace ext_fs {
#include "LinuxSyscalls/x32/Ioctl/ext_fs.inl"
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/ext_fs.inl
================================================
_BASIC_META(BLKROSET)
_BASIC_META(BLKROGET)
_BASIC_META(BLKRRPART)
_BASIC_META(BLKGETSIZE)
_BASIC_META(BLKFLSBUF)
_BASIC_META(BLKRASET)
_BASIC_META(BLKRAGET)
_BASIC_META(BLKFRASET)
_BASIC_META(BLKFRAGET)
_BASIC_META(BLKSECTSET)
_BASIC_META(BLKSECTGET)
_BASIC_META(BLKSSZGET)

_BASIC_META(BLKBSZGET)
_BASIC_META(BLKBSZSET)
_BASIC_META(BLKGETSIZE64)
_BASIC_META(BLKTRACESETUP)
_BASIC_META(BLKTRACESTART)
_BASIC_META(BLKTRACESTOP)
_BASIC_META(BLKTRACETEARDOWN)
_BASIC_META(BLKDISCARD)
_BASIC_META(BLKIOMIN)
_BASIC_META(BLKIOOPT)
_BASIC_META(BLKALIGNOFF)
_BASIC_META(BLKPBSZGET)
_BASIC_META(BLKDISCARDZEROES)
_BASIC_META(BLKSECDISCARD)
_BASIC_META(BLKROTATIONAL)
_BASIC_META(BLKZEROOUT)

_BASIC_META(FIBMAP)
_BASIC_META(FIGETBSZ)
_BASIC_META(FIFREEZE)
_BASIC_META(FITHAW)
_BASIC_META(FITRIM)
_BASIC_META(FICLONE)
_BASIC_META(FICLONERANGE)
_BASIC_META(FIDEDUPERANGE)

_BASIC_META(FS_IOC_GETFLAGS)
_BASIC_META(FS_IOC_SETFLAGS)
_BASIC_META(FS_IOC_GETVERSION)
_BASIC_META(FS_IOC_SETVERSION)
_BASIC_META(FS_IOC_FIEMAP)
_BASIC_META(FS_IOC32_GETFLAGS)
_BASIC_META(FS_IOC32_SETFLAGS)
_BASIC_META(FS_IOC32_GETVERSION)
_BASIC_META(FS_IOC32_SETVERSION)
_BASIC_META(FS_IOC_FSGETXATTR)
_BASIC_META(FS_IOC_FSSETXATTR)
_BASIC_META(FS_IOC_GETFSLABEL)
_BASIC_META(FS_IOC_SETFSLABEL)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/f2fs.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace f2fs {
  // There is no userspace definitions for these
  // Must define everything ourselves
  constexpr uint32_t F2FS_IOCTL_MAGIC = 0xf5;
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, uint32_t)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
//#define F2FS_IOC_DEFRAGMENT         _IOWR(F2FS_IOCTL_MAGIC, 8,    \
//                                          struct f2fs_defragment)
//#define F2FS_IOC_MOVE_RANGE         _IOWR(F2FS_IOCTL_MAGIC, 9,    \
//                                          struct f2fs_move_range)
//#define F2FS_IOC_FLUSH_DEVICE       _IOW(F2FS_IOCTL_MAGIC, 10,    \
//                                          struct f2fs_flush_device)
//#define F2FS_IOC_GARBAGE_COLLECT_RANGE    _IOW(F2FS_IOCTL_MAGIC, 11,    \
//                                          struct f2fs_gc_range)
#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, uint32_t)
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, uint32_t)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, uint32_t)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, uint64_t)
#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, uint64_t)
#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 18, uint64_t)
#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 19, uint64_t)
//#define F2FS_IOC_SEC_TRIM_FILE            _IOW(F2FS_IOCTL_MAGIC, 20,    \
//                                          struct f2fs_sectrim_range)
#include "LinuxSyscalls/x32/Ioctl/f2fs.inl"
} // namespace f2fs
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/f2fs.inl
================================================
_BASIC_META(F2FS_IOC_START_ATOMIC_WRITE)
_BASIC_META(F2FS_IOC_COMMIT_ATOMIC_WRITE)
_BASIC_META(F2FS_IOC_START_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_RELEASE_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_ABORT_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_GARBAGE_COLLECT)
_BASIC_META(F2FS_IOC_WRITE_CHECKPOINT)
//_CUSTOM_META(F2FS_IOC_DEFRAGMENT, XXX)
//_CUSTOM_META(F2FS_IOC_MOVE_RANGE, XXX)
//_CUSTOM_META(F2FS_IOC_FLUSH_DEVICE, XXX)
//_CUSTOM_META(F2FS_IOC_GARBAGE_COLLECT_RANGE, XXX)
_BASIC_META(F2FS_IOC_GET_FEATURES)
_BASIC_META(F2FS_IOC_SET_PIN_FILE)
_BASIC_META(F2FS_IOC_GET_PIN_FILE)
_BASIC_META(F2FS_IOC_PRECACHE_EXTENTS)
_BASIC_META(F2FS_IOC_RESIZE_FS)
_BASIC_META(F2FS_IOC_GET_COMPRESS_BLOCKS)
_BASIC_META(F2FS_IOC_RELEASE_COMPRESS_BLOCKS)
_BASIC_META(F2FS_IOC_RESERVE_COMPRESS_BLOCKS)
//_CUSTOM_META(F2FS_IOC_SEC_TRIM_FILE, XXX)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/i915_drm.inl
================================================
_BASIC_META(DRM_IOCTL_I915_INIT)
_BASIC_META(DRM_IOCTL_I915_FLUSH)
_BASIC_META(DRM_IOCTL_I915_FLIP)
_CUSTOM_META(DRM_IOCTL_I915_BATCHBUFFER, DRM_IOW(DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, FEX::HLE::x32::I915::fex_drm_i915_batchbuffer_t))
_CUSTOM_META(DRM_IOCTL_I915_IRQ_EMIT, DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, FEX::HLE::x32::I915::fex_drm_i915_irq_emit_t))
_BASIC_META(DRM_IOCTL_I915_IRQ_WAIT)
_CUSTOM_META(DRM_IOCTL_I915_GETPARAM, DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, FEX::HLE::x32::I915::fex_drm_i915_getparam_t))
_BASIC_META(DRM_IOCTL_I915_SETPARAM)
_CUSTOM_META(DRM_IOCTL_I915_ALLOC, DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, FEX::HLE::x32::I915::fex_drm_i915_mem_alloc_t))
_BASIC_META(DRM_IOCTL_I915_FREE)
_BASIC_META(DRM_IOCTL_I915_INIT_HEAP)
_CUSTOM_META(DRM_IOCTL_I915_CMDBUFFER, DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, FEX::HLE::x32::I915::fex_drm_i915_cmdbuffer_t))
_BASIC_META(DRM_IOCTL_I915_DESTROY_HEAP)
_BASIC_META(DRM_IOCTL_I915_SET_VBLANK_PIPE)
_BASIC_META(DRM_IOCTL_I915_GET_VBLANK_PIPE)
_BASIC_META(DRM_IOCTL_I915_VBLANK_SWAP)
_BASIC_META(DRM_IOCTL_I915_HWS_ADDR)
_BASIC_META(DRM_IOCTL_I915_GEM_INIT)
_BASIC_META(DRM_IOCTL_I915_GEM_EXECBUFFER)
_BASIC_META(DRM_IOCTL_I915_GEM_EXECBUFFER2)
// DRM_IOCTL_I915_GEM_EXECBUFFER2_WR overlaps DRM_IOCTL_I915_GEM_EXECBUFFER2
_CUSTOM_META(DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2))
_BASIC_META(DRM_IOCTL_I915_GEM_PIN)
_BASIC_META(DRM_IOCTL_I915_GEM_UNPIN)
_BASIC_META(DRM_IOCTL_I915_GEM_BUSY)
_BASIC_META(DRM_IOCTL_I915_GEM_SET_CACHING)
_BASIC_META(DRM_IOCTL_I915_GEM_GET_CACHING)
_BASIC_META(DRM_IOCTL_I915_GEM_THROTTLE)
_BASIC_META(DRM_IOCTL_I915_GEM_ENTERVT)
_BASIC_META(DRM_IOCTL_I915_GEM_LEAVEVT)
_BASIC_META(DRM_IOCTL_I915_GEM_CREATE)
_BASIC_META(DRM_IOCTL_I915_GEM_CREATE_EXT)
_BASIC_META(DRM_IOCTL_I915_GEM_PREAD)
_BASIC_META(DRM_IOCTL_I915_GEM_PWRITE)
_BASIC_META(DRM_IOCTL_I915_GEM_MMAP)
_BASIC_META(DRM_IOCTL_I915_GEM_MMAP_GTT)
// DRM_IOCTL_I915_GEM_MMAP_OFFSET overlaps DRM_IOCTL_I915_GEM_MMAP_GTT
#ifndef DRM_IOCTL_I915_GEM_MMAP_OFFSET
_CUSTOM_META(DRM_IOCTL_I915_GEM_MMAP_OFFSET, DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_offset))
#endif
_BASIC_META(DRM_IOCTL_I915_GEM_SET_DOMAIN)
_BASIC_META(DRM_IOCTL_I915_GEM_SW_FINISH)
_BASIC_META(DRM_IOCTL_I915_GEM_SET_TILING)
_BASIC_META(DRM_IOCTL_I915_GEM_GET_TILING)
_BASIC_META(DRM_IOCTL_I915_GEM_GET_APERTURE)
_BASIC_META(DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID)
_BASIC_META(DRM_IOCTL_I915_GEM_MADVISE)
_BASIC_META(DRM_IOCTL_I915_OVERLAY_PUT_IMAGE)
_BASIC_META(DRM_IOCTL_I915_OVERLAY_ATTRS)
_BASIC_META(DRM_IOCTL_I915_SET_SPRITE_COLORKEY)
_BASIC_META(DRM_IOCTL_I915_GET_SPRITE_COLORKEY)
_BASIC_META(DRM_IOCTL_I915_GEM_WAIT)
_BASIC_META(DRM_IOCTL_I915_GEM_CONTEXT_CREATE)
// DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT overlaps DRM_IOCTL_I915_GEM_CONTEXT_CREATE
_CUSTOM_META(DRM_IOCTL_I915_GEM_CONTEXT_CREATE_EXT, DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create_ext))
_BASIC_META(DRM_IOCTL_I915_GEM_CONTEXT_DESTROY)
_BASIC_META(DRM_IOCTL_I915_REG_READ)
_BASIC_META(DRM_IOCTL_I915_GET_RESET_STATS)
_BASIC_META(DRM_IOCTL_I915_GEM_USERPTR)
_BASIC_META(DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM)
_BASIC_META(DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM)
_BASIC_META(DRM_IOCTL_I915_PERF_OPEN)
_BASIC_META(DRM_IOCTL_I915_PERF_ADD_CONFIG)
_BASIC_META(DRM_IOCTL_I915_PERF_REMOVE_CONFIG)
_BASIC_META(DRM_IOCTL_I915_QUERY)
_BASIC_META(DRM_IOCTL_I915_GEM_VM_CREATE)
_BASIC_META(DRM_IOCTL_I915_GEM_VM_DESTROY)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/input.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/input.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace input {
#include "LinuxSyscalls/x32/Ioctl/input.inl"
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/input.inl
================================================
_BASIC_META(EVIOCGVERSION)
_BASIC_META(EVIOCGID)
_BASIC_META(EVIOCGREP)
_BASIC_META(EVIOCSREP)
_BASIC_META(EVIOCGKEYCODE)
_BASIC_META(EVIOCGKEYCODE_V2)
_BASIC_META(EVIOCSKEYCODE)
_BASIC_META(EVIOCSKEYCODE_V2)
_BASIC_META_VAR(EVIOCGNAME, 0)
_BASIC_META_VAR(EVIOCGPHYS, 0)
_BASIC_META_VAR(EVIOCGUNIQ, 0)
_BASIC_META_VAR(EVIOCGPROP, 0)
_BASIC_META_VAR(EVIOCGMTSLOTS, 0)
_BASIC_META_VAR(EVIOCGKEY, 0)
_BASIC_META_VAR(EVIOCGLED, 0)
_BASIC_META_VAR(EVIOCGSND, 0)
_BASIC_META_VAR(EVIOCGSW, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x00, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x01, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x02, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x03, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x04, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x05, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x06, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x07, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x08, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x09, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0A, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0B, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0C, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0D, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0E, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0F, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x10, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x11, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x12, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x13, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x14, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x15, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x16, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x17, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x18, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x19, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1A, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1B, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1C, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1D, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1E, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1F, 0)
_BASIC_META_VAR(EVIOCGABS, 0x00)
_BASIC_META_VAR(EVIOCGABS, 0x01)
_BASIC_META_VAR(EVIOCGABS, 0x02)
_BASIC_META_VAR(EVIOCGABS, 0x03)
_BASIC_META_VAR(EVIOCGABS, 0x04)
_BASIC_META_VAR(EVIOCGABS, 0x05)
_BASIC_META_VAR(EVIOCGABS, 0x06)
_BASIC_META_VAR(EVIOCGABS, 0x07)
_BASIC_META_VAR(EVIOCGABS, 0x08)
_BASIC_META_VAR(EVIOCGABS, 0x09)
_BASIC_META_VAR(EVIOCGABS, 0x0A)
_BASIC_META_VAR(EVIOCGABS, 0x0B)
_BASIC_META_VAR(EVIOCGABS, 0x0C)
_BASIC_META_VAR(EVIOCGABS, 0x0D)
_BASIC_META_VAR(EVIOCGABS, 0x0E)
_BASIC_META_VAR(EVIOCGABS, 0x0F)
_BASIC_META_VAR(EVIOCGABS, 0x10)
_BASIC_META_VAR(EVIOCGABS, 0x11)
_BASIC_META_VAR(EVIOCGABS, 0x12)
_BASIC_META_VAR(EVIOCGABS, 0x13)
_BASIC_META_VAR(EVIOCGABS, 0x14)
_BASIC_META_VAR(EVIOCGABS, 0x15)
_BASIC_META_VAR(EVIOCGABS, 0x16)
_BASIC_META_VAR(EVIOCGABS, 0x17)
_BASIC_META_VAR(EVIOCGABS, 0x18)
_BASIC_META_VAR(EVIOCGABS, 0x19)
_BASIC_META_VAR(EVIOCGABS, 0x1A)
_BASIC_META_VAR(EVIOCGABS, 0x1B)
_BASIC_META_VAR(EVIOCGABS, 0x1C)
_BASIC_META_VAR(EVIOCGABS, 0x1D)
_BASIC_META_VAR(EVIOCGABS, 0x1E)
_BASIC_META_VAR(EVIOCGABS, 0x1F)
_BASIC_META_VAR(EVIOCGABS, 0x20)
_BASIC_META_VAR(EVIOCGABS, 0x21)
_BASIC_META_VAR(EVIOCGABS, 0x22)
_BASIC_META_VAR(EVIOCGABS, 0x23)
_BASIC_META_VAR(EVIOCGABS, 0x24)
_BASIC_META_VAR(EVIOCGABS, 0x25)
_BASIC_META_VAR(EVIOCGABS, 0x26)
_BASIC_META_VAR(EVIOCGABS, 0x27)
_BASIC_META_VAR(EVIOCGABS, 0x28)
_BASIC_META_VAR(EVIOCGABS, 0x29)
_BASIC_META_VAR(EVIOCGABS, 0x2A)
_BASIC_META_VAR(EVIOCGABS, 0x2B)
_BASIC_META_VAR(EVIOCGABS, 0x2C)
_BASIC_META_VAR(EVIOCGABS, 0x2D)
_BASIC_META_VAR(EVIOCGABS, 0x2E)
_BASIC_META_VAR(EVIOCGABS, 0x2F)
_BASIC_META_VAR(EVIOCSABS, 0x00)
_BASIC_META_VAR(EVIOCSABS, 0x01)
_BASIC_META_VAR(EVIOCSABS, 0x02)
_BASIC_META_VAR(EVIOCSABS, 0x03)
_BASIC_META_VAR(EVIOCSABS, 0x04)
_BASIC_META_VAR(EVIOCSABS, 0x05)
_BASIC_META_VAR(EVIOCSABS, 0x06)
_BASIC_META_VAR(EVIOCSABS, 0x07)
_BASIC_META_VAR(EVIOCSABS, 0x08)
_BASIC_META_VAR(EVIOCSABS, 0x09)
_BASIC_META_VAR(EVIOCSABS, 0x0A)
_BASIC_META_VAR(EVIOCSABS, 0x0B)
_BASIC_META_VAR(EVIOCSABS, 0x0C)
_BASIC_META_VAR(EVIOCSABS, 0x0D)
_BASIC_META_VAR(EVIOCSABS, 0x0E)
_BASIC_META_VAR(EVIOCSABS, 0x0F)
_BASIC_META_VAR(EVIOCSABS, 0x10)
_BASIC_META_VAR(EVIOCSABS, 0x11)
_BASIC_META_VAR(EVIOCSABS, 0x12)
_BASIC_META_VAR(EVIOCSABS, 0x13)
_BASIC_META_VAR(EVIOCSABS, 0x14)
_BASIC_META_VAR(EVIOCSABS, 0x15)
_BASIC_META_VAR(EVIOCSABS, 0x16)
_BASIC_META_VAR(EVIOCSABS, 0x17)
_BASIC_META_VAR(EVIOCSABS, 0x18)
_BASIC_META_VAR(EVIOCSABS, 0x19)
_BASIC_META_VAR(EVIOCSABS, 0x1A)
_BASIC_META_VAR(EVIOCSABS, 0x1B)
_BASIC_META_VAR(EVIOCSABS, 0x1C)
_BASIC_META_VAR(EVIOCSABS, 0x1D)
_BASIC_META_VAR(EVIOCSABS, 0x1E)
_BASIC_META_VAR(EVIOCSABS, 0x1F)
_BASIC_META_VAR(EVIOCSABS, 0x20)
_BASIC_META_VAR(EVIOCSABS, 0x21)
_BASIC_META_VAR(EVIOCSABS, 0x22)
_BASIC_META_VAR(EVIOCSABS, 0x23)
_BASIC_META_VAR(EVIOCSABS, 0x24)
_BASIC_META_VAR(EVIOCSABS, 0x25)
_BASIC_META_VAR(EVIOCSABS, 0x26)
_BASIC_META_VAR(EVIOCSABS, 0x27)
_BASIC_META_VAR(EVIOCSABS, 0x28)
_BASIC_META_VAR(EVIOCSABS, 0x29)
_BASIC_META_VAR(EVIOCSABS, 0x2A)
_BASIC_META_VAR(EVIOCSABS, 0x2B)
_BASIC_META_VAR(EVIOCSABS, 0x2C)
_BASIC_META_VAR(EVIOCSABS, 0x2D)
_BASIC_META_VAR(EVIOCSABS, 0x2E)
_BASIC_META_VAR(EVIOCSABS, 0x2F)
// XXX: _BASIC_META(EVIOCSFF)
_BASIC_META(EVIOCRMFF)
_BASIC_META(EVIOCGEFFECTS)
_BASIC_META(EVIOCGRAB)
_BASIC_META(EVIOCREVOKE)
_BASIC_META(EVIOCGMASK)
_BASIC_META(EVIOCSMASK)
_BASIC_META(EVIOCSCLOCKID)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/joystick.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/joystick.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {

namespace joystick {
#include "LinuxSyscalls/x32/Ioctl/joystick.inl"
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/joystick.inl
================================================
_BASIC_META(JSIOCGVERSION)
_BASIC_META(JSIOCGAXES)
_BASIC_META(JSIOCGBUTTONS)
_BASIC_META_VAR(JSIOCGNAME, 0)
_BASIC_META(JSIOCSCORR)
_BASIC_META(JSIOCGCORR)
_BASIC_META(JSIOCSAXMAP)
_BASIC_META(JSIOCGAXMAP)
_BASIC_META(JSIOCSBTNMAP)
_BASIC_META(JSIOCGBTNMAP)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/lima_drm.inl
================================================
_BASIC_META(DRM_IOCTL_LIMA_GET_PARAM)
_BASIC_META(DRM_IOCTL_LIMA_GEM_CREATE)
_BASIC_META(DRM_IOCTL_LIMA_GEM_INFO)
_BASIC_META(DRM_IOCTL_LIMA_GEM_SUBMIT)
_BASIC_META(DRM_IOCTL_LIMA_GEM_WAIT)
_BASIC_META(DRM_IOCTL_LIMA_CTX_CREATE)
_BASIC_META(DRM_IOCTL_LIMA_CTX_FREE)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/msdos_fs.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/msdos_fs.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {

namespace msdos_fs {
#include "LinuxSyscalls/x32/Ioctl/msdos_fs.inl"
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/msdos_fs.inl
================================================
_BASIC_META(VFAT_IOCTL_READDIR_BOTH)
_BASIC_META(VFAT_IOCTL_READDIR_SHORT)
_BASIC_META(FAT_IOCTL_GET_ATTRIBUTES)
_BASIC_META(FAT_IOCTL_SET_ATTRIBUTES)
_BASIC_META(FAT_IOCTL_GET_VOLUME_ID)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/msm_drm.inl
================================================
_BASIC_META(DRM_IOCTL_MSM_GET_PARAM)
_BASIC_META(DRM_IOCTL_MSM_SET_PARAM)
_BASIC_META(DRM_IOCTL_MSM_GEM_NEW)
_BASIC_META(DRM_IOCTL_MSM_GEM_INFO)
_BASIC_META(DRM_IOCTL_MSM_GEM_CPU_PREP)
_BASIC_META(DRM_IOCTL_MSM_GEM_CPU_FINI)
_BASIC_META(DRM_IOCTL_MSM_GEM_SUBMIT)
_CUSTOM_META(DRM_IOCTL_MSM_WAIT_FENCE, DRM_IOW (DRM_COMMAND_BASE + DRM_MSM_WAIT_FENCE, FEX::HLE::x32::MSM::fex_drm_msm_wait_fence))
_BASIC_META(DRM_IOCTL_MSM_GEM_MADVISE)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_NEW)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_QUERY)
_BASIC_META(DRM_IOCTL_MSM_VM_BIND)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/nouveau_drm.inl
================================================
_BASIC_META(DRM_IOCTL_NOUVEAU_GETPARAM)
_BASIC_META(DRM_IOCTL_NOUVEAU_CHANNEL_ALLOC)
_BASIC_META(DRM_IOCTL_NOUVEAU_CHANNEL_FREE)
_BASIC_META(DRM_IOCTL_NOUVEAU_SVM_INIT)
_BASIC_META(DRM_IOCTL_NOUVEAU_SVM_BIND)
_BASIC_META(DRM_IOCTL_NOUVEAU_GEM_NEW)
_BASIC_META(DRM_IOCTL_NOUVEAU_GEM_PUSHBUF)
_BASIC_META(DRM_IOCTL_NOUVEAU_GEM_CPU_PREP)
_BASIC_META(DRM_IOCTL_NOUVEAU_GEM_CPU_FINI)
_BASIC_META(DRM_IOCTL_NOUVEAU_GEM_INFO)
_BASIC_META(DRM_IOCTL_NOUVEAU_VM_INIT)
_BASIC_META(DRM_IOCTL_NOUVEAU_VM_BIND)
_BASIC_META(DRM_IOCTL_NOUVEAU_EXEC)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/nova_drm.inl
================================================
_BASIC_META(DRM_IOCTL_NOVA_GETPARAM)
_BASIC_META(DRM_IOCTL_NOVA_GEM_CREATE)
_BASIC_META(DRM_IOCTL_NOVA_GEM_INFO)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/panfrost_drm.inl
================================================
_BASIC_META(DRM_IOCTL_PANFROST_SUBMIT)
_BASIC_META(DRM_IOCTL_PANFROST_WAIT_BO)
_BASIC_META(DRM_IOCTL_PANFROST_CREATE_BO)
_BASIC_META(DRM_IOCTL_PANFROST_MMAP_BO)
_BASIC_META(DRM_IOCTL_PANFROST_GET_PARAM)
_BASIC_META(DRM_IOCTL_PANFROST_GET_BO_OFFSET)
_BASIC_META(DRM_IOCTL_PANFROST_MADVISE)
_BASIC_META(DRM_IOCTL_PANFROST_PERFCNT_ENABLE)
_BASIC_META(DRM_IOCTL_PANFROST_PERFCNT_DUMP)
_BASIC_META(DRM_IOCTL_PANFROST_SET_LABEL_BO)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/panthor_drm.inl
================================================
_BASIC_META(DRM_IOCTL_PANTHOR_DEV_QUERY)
_BASIC_META(DRM_IOCTL_PANTHOR_VM_CREATE)
_BASIC_META(DRM_IOCTL_PANTHOR_VM_DESTROY)
_BASIC_META(DRM_IOCTL_PANTHOR_VM_BIND)
_BASIC_META(DRM_IOCTL_PANTHOR_VM_GET_STATE)
_BASIC_META(DRM_IOCTL_PANTHOR_BO_CREATE)
_BASIC_META(DRM_IOCTL_PANTHOR_BO_MMAP_OFFSET)
_BASIC_META(DRM_IOCTL_PANTHOR_GROUP_CREATE)
_BASIC_META(DRM_IOCTL_PANTHOR_GROUP_DESTROY)
_BASIC_META(DRM_IOCTL_PANTHOR_GROUP_SUBMIT)
_BASIC_META(DRM_IOCTL_PANTHOR_GROUP_GET_STATE)
_BASIC_META(DRM_IOCTL_PANTHOR_TILER_HEAP_CREATE)
_BASIC_META(DRM_IOCTL_PANTHOR_TILER_HEAP_DESTROY)
_BASIC_META(DRM_IOCTL_PANTHOR_BO_SET_LABEL)
_BASIC_META(DRM_IOCTL_PANTHOR_SET_USER_MMIO_OFFSET)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/pvr_drm.inl
================================================
_BASIC_META(DRM_IOCTL_PVR_DEV_QUERY)
_BASIC_META(DRM_IOCTL_PVR_CREATE_BO)
_BASIC_META(DRM_IOCTL_PVR_GET_BO_MMAP_OFFSET)
_BASIC_META(DRM_IOCTL_PVR_CREATE_VM_CONTEXT)
_BASIC_META(DRM_IOCTL_PVR_DESTROY_VM_CONTEXT)
_BASIC_META(DRM_IOCTL_PVR_VM_MAP)
_BASIC_META(DRM_IOCTL_PVR_VM_UNMAP)
_BASIC_META(DRM_IOCTL_PVR_CREATE_CONTEXT)
_BASIC_META(DRM_IOCTL_PVR_DESTROY_CONTEXT)
_BASIC_META(DRM_IOCTL_PVR_CREATE_FREE_LIST)
_BASIC_META(DRM_IOCTL_PVR_DESTROY_FREE_LIST)
_BASIC_META(DRM_IOCTL_PVR_CREATE_HWRT_DATASET)
_BASIC_META(DRM_IOCTL_PVR_DESTROY_HWRT_DATASET)
_BASIC_META(DRM_IOCTL_PVR_SUBMIT_JOBS)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/radeon_drm.inl
================================================
_CUSTOM_META(DRM_IOCTL_RADEON_CP_INIT, DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CP_INIT, FEX::HLE::x32::RADEON::fex_drm_radeon_init_t))
_BASIC_META(DRM_IOCTL_RADEON_CP_START)
_BASIC_META(DRM_IOCTL_RADEON_CP_STOP)
_BASIC_META(DRM_IOCTL_RADEON_CP_RESET)
_BASIC_META(DRM_IOCTL_RADEON_CP_IDLE)
_BASIC_META(DRM_IOCTL_RADEON_RESET)
_BASIC_META(DRM_IOCTL_RADEON_FULLSCREEN)
_BASIC_META(DRM_IOCTL_RADEON_SWAP)
_CUSTOM_META(DRM_IOCTL_RADEON_CLEAR, DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CLEAR, FEX::HLE::x32::RADEON::fex_drm_radeon_clear_t))
_BASIC_META(DRM_IOCTL_RADEON_VERTEX)
_BASIC_META(DRM_IOCTL_RADEON_INDICES)
_CUSTOM_META(DRM_IOCTL_RADEON_STIPPLE, DRM_IOW( DRM_COMMAND_BASE + DRM_RADEON_STIPPLE, FEX::HLE::x32::RADEON::fex_drm_radeon_stipple_t))
_BASIC_META(DRM_IOCTL_RADEON_INDIRECT)
_CUSTOM_META(DRM_IOCTL_RADEON_TEXTURE, DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_TEXTURE, FEX::HLE::x32::RADEON::fex_drm_radeon_texture_t))
_CUSTOM_META(DRM_IOCTL_RADEON_VERTEX2, DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_VERTEX2, FEX::HLE::x32::RADEON::fex_drm_radeon_vertex2_t))
_CUSTOM_META(DRM_IOCTL_RADEON_CMDBUF, DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_CMDBUF, FEX::HLE::x32::RADEON::fex_drm_radeon_cmd_buffer_t))
_CUSTOM_META(DRM_IOCTL_RADEON_GETPARAM, DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GETPARAM, FEX::HLE::x32::RADEON::fex_drm_radeon_getparam_t))
_BASIC_META(DRM_IOCTL_RADEON_FLIP)
_CUSTOM_META(DRM_IOCTL_RADEON_ALLOC, DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_ALLOC, FEX::HLE::x32::RADEON::fex_drm_radeon_mem_alloc_t))
_BASIC_META(DRM_IOCTL_RADEON_FREE)
_BASIC_META(DRM_IOCTL_RADEON_INIT_HEAP)
_CUSTOM_META(DRM_IOCTL_RADEON_IRQ_EMIT, DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_IRQ_EMIT, FEX::HLE::x32::RADEON::fex_drm_radeon_irq_emit_t))
_BASIC_META(DRM_IOCTL_RADEON_IRQ_WAIT)
_BASIC_META(DRM_IOCTL_RADEON_CP_RESUME)
_CUSTOM_META(DRM_IOCTL_RADEON_SETPARAM, DRM_IOW(DRM_COMMAND_BASE + DRM_RADEON_SETPARAM, FEX::HLE::x32::RADEON::fex_drm_radeon_setparam_t))
_BASIC_META(DRM_IOCTL_RADEON_SURF_ALLOC)
_BASIC_META(DRM_IOCTL_RADEON_SURF_FREE)

_BASIC_META(DRM_IOCTL_RADEON_GEM_INFO)
_CUSTOM_META(DRM_IOCTL_RADEON_GEM_CREATE, DRM_IOWR(DRM_COMMAND_BASE + DRM_RADEON_GEM_CREATE, FEX::HLE::x32::RADEON::fex_drm_radeon_gem_create))
_BASIC_META(DRM_IOCTL_RADEON_GEM_MMAP)
_BASIC_META(DRM_IOCTL_RADEON_GEM_PREAD)
_BASIC_META(DRM_IOCTL_RADEON_GEM_PWRITE)
_BASIC_META(DRM_IOCTL_RADEON_GEM_SET_DOMAIN)
_BASIC_META(DRM_IOCTL_RADEON_GEM_WAIT_IDLE)
_BASIC_META(DRM_IOCTL_RADEON_CS)
_BASIC_META(DRM_IOCTL_RADEON_INFO)
_BASIC_META(DRM_IOCTL_RADEON_GEM_SET_TILING)
_BASIC_META(DRM_IOCTL_RADEON_GEM_GET_TILING)
_BASIC_META(DRM_IOCTL_RADEON_GEM_BUSY)
_BASIC_META(DRM_IOCTL_RADEON_GEM_VA)
_BASIC_META(DRM_IOCTL_RADEON_GEM_OP)
_BASIC_META(DRM_IOCTL_RADEON_GEM_USERPTR)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/sockios.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/sockios.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace sockios {
#ifndef SIOCGSKNS
#define SIOCGSKNS 0x894C
#endif
#include "LinuxSyscalls/x32/Ioctl/sockios.inl"
} // namespace sockios
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/sockios.inl
================================================
#ifndef SIOCGSTAMP_OLD
#define SIOCGSTAMP_OLD 0x8906
#endif
_BASIC_META(SIOCGSTAMP_OLD)
#ifndef SIOCGSTAMPNS_OLD
#define SIOCGSTAMPNS_OLD 0x8907
#endif
_BASIC_META(SIOCGSTAMPNS_OLD)
_BASIC_META(SIOCADDRT)
_BASIC_META(SIOCDELRT)
_BASIC_META(SIOCRTMSG)
_BASIC_META(SIOCGIFNAME)
_BASIC_META(SIOCSIFLINK)
_BASIC_META(SIOCGIFCONF)
_BASIC_META(SIOCGIFFLAGS)
_BASIC_META(SIOCSIFFLAGS)
_BASIC_META(SIOCGIFADDR)
_BASIC_META(SIOCSIFADDR)
_BASIC_META(SIOCGIFDSTADDR)
_BASIC_META(SIOCSIFDSTADDR)
_BASIC_META(SIOCGIFBRDADDR)
_BASIC_META(SIOCSIFBRDADDR)
_BASIC_META(SIOCGIFNETMASK)
_BASIC_META(SIOCSIFNETMASK)
_BASIC_META(SIOCGIFMETRIC)
_BASIC_META(SIOCSIFMETRIC)
_BASIC_META(SIOCGIFMEM)
_BASIC_META(SIOCSIFMEM)
_BASIC_META(SIOCGIFMTU)
_BASIC_META(SIOCSIFMTU)
_BASIC_META(SIOCSIFNAME)
_BASIC_META(SIOCSIFHWADDR)
_BASIC_META(SIOCGIFENCAP)
_BASIC_META(SIOCSIFENCAP)
_BASIC_META(SIOCGIFHWADDR)
_BASIC_META(SIOCGIFSLAVE)
_BASIC_META(SIOCSIFSLAVE)
_BASIC_META(SIOCADDMULTI)
_BASIC_META(SIOCDELMULTI)
_BASIC_META(SIOCGIFINDEX)
_BASIC_META(SIOCSIFPFLAGS)
_BASIC_META(SIOCGIFPFLAGS)
_BASIC_META(SIOCDIFADDR)
_BASIC_META(SIOCSIFHWBROADCAST)
_BASIC_META(SIOCGIFCOUNT)
_BASIC_META(SIOCGIFBR)
_BASIC_META(SIOCSIFBR)
_BASIC_META(SIOCGIFTXQLEN)
_BASIC_META(SIOCSIFTXQLEN)
_BASIC_META(SIOCETHTOOL)
_BASIC_META(SIOCGMIIPHY)
_BASIC_META(SIOCGMIIREG)
_BASIC_META(SIOCSMIIREG)
_BASIC_META(SIOCWANDEV)
_BASIC_META(SIOCOUTQNSD)
_BASIC_META(SIOCGSKNS)
_BASIC_META(SIOCDARP)
_BASIC_META(SIOCGARP)
_BASIC_META(SIOCSARP)
_BASIC_META(SIOCDRARP)
_BASIC_META(SIOCGRARP)
_BASIC_META(SIOCSRARP)
_BASIC_META(SIOCGIFMAP)
_BASIC_META(SIOCSIFMAP)
_BASIC_META(SIOCADDDLCI)
_BASIC_META(SIOCDELDLCI)
_BASIC_META(SIOCGIFVLAN)
_BASIC_META(SIOCSIFVLAN)
_BASIC_META(SIOCBONDENSLAVE)
_BASIC_META(SIOCBONDRELEASE)
_BASIC_META(SIOCBONDSETHWADDR)
_BASIC_META(SIOCBONDSLAVEINFOQUERY)
_BASIC_META(SIOCBONDINFOQUERY)
_BASIC_META(SIOCBONDCHANGEACTIVE)
_BASIC_META(SIOCBRADDBR)
_BASIC_META(SIOCBRDELBR)
_BASIC_META(SIOCBRADDIF)
_BASIC_META(SIOCBRDELIF)
_BASIC_META(SIOCSHWTSTAMP)
_BASIC_META(SIOCGHWTSTAMP)

_CUSTOM_META_OFFSET(SIOCDEVPRIVATE,   SIOCDEVPRIVATE, 0x0)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_1, SIOCDEVPRIVATE, 0x1)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_2, SIOCDEVPRIVATE, 0x2)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_3, SIOCDEVPRIVATE, 0x3)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_4, SIOCDEVPRIVATE, 0x4)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_5, SIOCDEVPRIVATE, 0x5)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_6, SIOCDEVPRIVATE, 0x6)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_7, SIOCDEVPRIVATE, 0x7)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_8, SIOCDEVPRIVATE, 0x8)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_9, SIOCDEVPRIVATE, 0x9)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_A, SIOCDEVPRIVATE, 0xA)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_B, SIOCDEVPRIVATE, 0xB)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_C, SIOCDEVPRIVATE, 0xC)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_D, SIOCDEVPRIVATE, 0xD)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_E, SIOCDEVPRIVATE, 0xE)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_F, SIOCDEVPRIVATE, 0xF)

_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE,   SIOCPROTOPRIVATE, 0x0)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_1, SIOCPROTOPRIVATE, 0x1)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_2, SIOCPROTOPRIVATE, 0x2)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_3, SIOCPROTOPRIVATE, 0x3)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_4, SIOCPROTOPRIVATE, 0x4)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_5, SIOCPROTOPRIVATE, 0x5)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_6, SIOCPROTOPRIVATE, 0x6)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_7, SIOCPROTOPRIVATE, 0x7)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_8, SIOCPROTOPRIVATE, 0x8)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_9, SIOCPROTOPRIVATE, 0x9)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_A, SIOCPROTOPRIVATE, 0xA)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_B, SIOCPROTOPRIVATE, 0xB)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_C, SIOCPROTOPRIVATE, 0xC)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_D, SIOCPROTOPRIVATE, 0xD)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_E, SIOCPROTOPRIVATE, 0xE)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_F, SIOCPROTOPRIVATE, 0xF)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/streams.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace streams {
#ifndef TIOCGPTPEER
#define TIOCGPTPEER _IO('T', 0x41)
#endif
#include "LinuxSyscalls/x32/Ioctl/streams.inl"
} // namespace streams

} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/streams.inl
================================================
_BASIC_META(TCGETS)
_BASIC_META(TCSETS)
_BASIC_META(TCSETSW)
_BASIC_META(TCSETSF)
_BASIC_META(TCGETA)
_BASIC_META(TCSETA)
_BASIC_META(TCSETAW)
_BASIC_META(TCSETAF)
_BASIC_META(TCSBRK)
_BASIC_META(TCXONC)
_BASIC_META(TCFLSH)
_BASIC_META(TIOCEXCL)
_BASIC_META(TIOCNXCL)
_BASIC_META(TIOCSCTTY)
_BASIC_META(TIOCGPGRP)
_BASIC_META(TIOCSPGRP)
_BASIC_META(TIOCOUTQ)
_BASIC_META(TIOCSTI)
_BASIC_META(TIOCGWINSZ)
_BASIC_META(TIOCSWINSZ)
_BASIC_META(TIOCMGET)
_BASIC_META(TIOCMBIS)
_BASIC_META(TIOCMBIC)
_BASIC_META(TIOCMSET)
_BASIC_META(TIOCGSOFTCAR)
_BASIC_META(TIOCSSOFTCAR)
_BASIC_META(TIOCINQ)
_BASIC_META(TIOCLINUX)
_BASIC_META(TIOCCONS)
_BASIC_META(TIOCGSERIAL)
_BASIC_META(TIOCSSERIAL)
_BASIC_META(TIOCPKT)
_BASIC_META(FIONBIO)
_BASIC_META(TIOCNOTTY)
_BASIC_META(TIOCSETD)
_BASIC_META(TIOCGETD)
_BASIC_META(TCSBRKP)
_BASIC_META(TIOCSBRK)
_BASIC_META(TIOCCBRK)
_BASIC_META(TIOCGSID)
//_BASIC_META(TCGETS2)
//_BASIC_META(TCSETS2)
//_BASIC_META(TCSETSW2)
//_BASIC_META(TCSETSF2)
_BASIC_META(TIOCGRS485)
_BASIC_META(TIOCGRS485)
_BASIC_META(TIOCGPTN)
_BASIC_META(TIOCSPTLCK)
_BASIC_META(TIOCGDEV)
_BASIC_META(TCGETX)
_BASIC_META(TCSETX)
_BASIC_META(TCSETXF)
_BASIC_META(TCSETXW)
_BASIC_META(TIOCSIG)
_BASIC_META(TIOCVHANGUP)
_BASIC_META(TIOCGPKT)
_BASIC_META(TIOCGPTLCK)
_BASIC_META(TIOCGEXCL)
_BASIC_META(TIOCGPTPEER)
//_BASIC_META(TIOCGISO7816)
//_BASIC_META(TIOCSISO7816)
_BASIC_META(FIONCLEX)
_BASIC_META(FIONCLEX)
_BASIC_META(FIOASYNC)
_BASIC_META(TIOCSERCONFIG)
_BASIC_META(TIOCSERGWILD)
_BASIC_META(TIOCSERSWILD)
_BASIC_META(TIOCGLCKTRMIOS)
_BASIC_META(TIOCSLCKTRMIOS)
_BASIC_META(TIOCSERGSTRUCT)
_BASIC_META(TIOCSERGETLSR)
_BASIC_META(TIOCSERGETMULTI)
_BASIC_META(TIOCSERSETMULTI)
_BASIC_META(TIOCMIWAIT)
_BASIC_META(TIOCGICOUNT)
_BASIC_META(FIOQSIZE)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/usbdev.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/usbdevice_fs.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace usbdev {
#ifndef USBDEVFS_GET_SPEED
#define USBDEVFS_GET_SPEED _IO('U', 31)
#endif
#ifndef USBDEVFS_CONNINFO_EX
#define USBDEVFS_CONNINFO_EX(len) _IOC(_IOC_READ, 'U', 32, len)
#endif
#ifndef USBDEVFS_FORBID_SUSPEND
#define USBDEVFS_FORBID_SUSPEND _IO('U', 33)
#endif
#ifndef USBDEVFS_ALLOW_SUSPEND
#define USBDEVFS_ALLOW_SUSPEND _IO('U', 34)
#endif
#ifndef USBDEVFS_WAIT_FOR_RESUME
#define USBDEVFS_WAIT_FOR_RESUME _IO('U', 35)
#endif
#include "LinuxSyscalls/x32/Ioctl/usbdev.inl"
} // namespace usbdev
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/usbdev.inl
================================================
// XXX: _BASIC_META(USBDEVFS_CONTROL)
// _BASIC_META(USBDEVFS_CONTROL32)
// XXX: _BASIC_META(USBDEVFS_BULK)
// _BASIC_META(USBDEVFS_BULK32)
_BASIC_META(USBDEVFS_RESETEP)
_BASIC_META(USBDEVFS_SETINTERFACE)
_BASIC_META(USBDEVFS_SETCONFIGURATION)
_BASIC_META(USBDEVFS_GETDRIVER)
// XXX: _BASIC_META(USBDEVFS_SUBMITURB)
// _BASIC_META(USBDEVFS_SUBMITURB32)
_BASIC_META(USBDEVFS_DISCARDURB)
// XXX: _BASIC_META(USBDEVFS_REAPURB)
// _BASIC_META(USBDEVFS_REAPUSB32)
// XXX: _BASIC_META(USBDEVFS_REAPURBNDELAY)
_BASIC_META(USBDEVFS_REAPURBNDELAY32)
// XXX: _BASIC_META(USBDEVFS_DISCSIGNAL)
// _BASIC_META(USBDEVFS_DISCSIGNAL32)
_BASIC_META(USBDEVFS_CLAIMINTERFACE)
_BASIC_META(USBDEVFS_RELEASEINTERFACE)
_BASIC_META(USBDEVFS_CONNECTINFO)
// XXX: _BASIC_META(USBDEVFS_IOCTL)
//_BASIC_META(USBDEVFS_IOCTL32)
_BASIC_META(USBDEVFS_HUB_PORTINFO)
_BASIC_META(USBDEVFS_RESET)
_BASIC_META(USBDEVFS_CLEAR_HALT)
_BASIC_META(USBDEVFS_RELEASE_PORT)
_BASIC_META(USBDEVFS_GET_CAPABILITIES)
_BASIC_META(USBDEVFS_DISCONNECT_CLAIM)
_BASIC_META(USBDEVFS_ALLOC_STREAMS)
_BASIC_META(USBDEVFS_FREE_STREAMS)
_BASIC_META(USBDEVFS_DROP_PRIVILEGES)
_BASIC_META(USBDEVFS_GET_SPEED)
_BASIC_META_VAR(USBDEVFS_CONNINFO_EX, 0)
_BASIC_META(USBDEVFS_FORBID_SUSPEND)
_BASIC_META(USBDEVFS_ALLOW_SUSPEND)
_BASIC_META(USBDEVFS_WAIT_FOR_RESUME)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/v3d_drm.inl
================================================
_BASIC_META(DRM_IOCTL_V3D_SUBMIT_CL)
_BASIC_META(DRM_IOCTL_V3D_WAIT_BO)
_BASIC_META(DRM_IOCTL_V3D_CREATE_BO)
_BASIC_META(DRM_IOCTL_V3D_MMAP_BO)
_BASIC_META(DRM_IOCTL_V3D_GET_PARAM)
_BASIC_META(DRM_IOCTL_V3D_GET_BO_OFFSET)
_BASIC_META(DRM_IOCTL_V3D_SUBMIT_TFU)
_CUSTOM_META(DRM_IOCTL_V3D_SUBMIT_CSD, DRM_IOW(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CSD, FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd)) // XXX: This will still be incorrect on x86-64
_BASIC_META(DRM_IOCTL_V3D_PERFMON_CREATE)
_BASIC_META(DRM_IOCTL_V3D_PERFMON_DESTROY)
_BASIC_META(DRM_IOCTL_V3D_PERFMON_GET_VALUES)
_BASIC_META(DRM_IOCTL_V3D_SUBMIT_CPU)
_BASIC_META(DRM_IOCTL_V3D_PERFMON_GET_COUNTER)
_BASIC_META(DRM_IOCTL_V3D_PERFMON_SET_GLOBAL)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/v4l2.h
================================================
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"
#include "LinuxSyscalls/x32/Types.h"

#include <cstdint>
#include <linux/videodev2.h>
#include <sys/ioctl.h>

#define CPYT(x) val.x = x
#define CPYF(x) x = val.x

extern "C" {
// Upstream definitions that changed over time.
struct upstream_v4l2_create_buffers {
  uint32_t index;
  uint32_t count;
  uint32_t memory;
  struct v4l2_format format;
  uint32_t capabilities;
  uint32_t flags;
  uint32_t max_num_buffers;
  uint32_t reserved[5];
};

struct upstream_v4l2_remove_buffers {
  uint32_t index;
  uint32_t count;
  uint32_t type;
  uint32_t reserved[13];
};
}

namespace FEX::HLE::x32 {
namespace V4l2 {

  struct FEX_ANNOTATE("alias-x86_32-v4l2_window") FEX_ANNOTATE("fex-match") fex_v4l2_window {
    struct v4l2_rect w;
    uint32_t field;
    uint32_t chromakey;
    compat_uptr_t clips;
    uint32_t clipcount;
    compat_uptr_t bitmap;
    uint8_t global_alpha;

    fex_v4l2_window() = default;
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_format") FEX_ANNOTATE("fex-match") fex_v4l2_format {
    uint32_t type;
    union {
      struct v4l2_pix_format pix;
      struct v4l2_pix_format_mplane pix_mp;
      // Just a valid place holder for struct verifier.
      fex_v4l2_window win;
      struct v4l2_vbi_format vbi;
      struct v4l2_sliced_vbi_format sliced;
      struct v4l2_sdr_format sdr;
      struct v4l2_meta_format meta;
      __u8 raw_data[200];
    } fmt;

    fex_v4l2_format() = delete;

    operator v4l2_format() const {
      v4l2_format val {};
      CPYT(type);

      switch (type) {
      case V4L2_BUF_TYPE_VIDEO_CAPTURE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_pix_format)); break;
      case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_pix_format_mplane)); break;
      case V4L2_BUF_TYPE_VBI_CAPTURE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_vbi_format)); break;
      case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_sliced_vbi_format)); break;
      case V4L2_BUF_TYPE_SDR_CAPTURE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_sdr_format)); break;
      case V4L2_BUF_TYPE_META_CAPTURE: memcpy(&val.fmt, &fmt, sizeof(struct v4l2_meta_format)); break;
      case V4L2_BUF_TYPE_VIDEO_OVERLAY: break;
      default: memcpy(&val.fmt, &fmt, 200); break;
      }

      return val;
    };

    fex_v4l2_format(v4l2_format val) {
      CPYF(type);
      switch (type) {
      case V4L2_BUF_TYPE_VIDEO_CAPTURE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_pix_format)); break;
      case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_pix_format_mplane)); break;
      case V4L2_BUF_TYPE_VBI_CAPTURE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_vbi_format)); break;
      case V4L2_BUF_TYPE_SLICED_VBI_CAPTURE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_sliced_vbi_format)); break;
      case V4L2_BUF_TYPE_SDR_CAPTURE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_sdr_format)); break;
      case V4L2_BUF_TYPE_META_CAPTURE: memcpy(&fmt, &val.fmt, sizeof(struct v4l2_meta_format)); break;
      case V4L2_BUF_TYPE_VIDEO_OVERLAY: break;
      default: memcpy(&fmt, &val.fmt, 200); break;
      }
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_buffer") FEX_ANNOTATE("fex-match") fex_v4l2_buffer {
    uint32_t index;
    uint32_t type;
    uint32_t bytesused;
    uint32_t flags;
    uint32_t field;
    struct timeval32 timestamp;
    struct v4l2_timecode timecode;
    uint32_t sequence;
    uint32_t memory;

    union {
      uint32_t offset;
      compat_ptr<void> userptr;
      compat_ptr<struct v4l2_plane> planes;
      int32_t fd;
    } m;
    uint32_t length;
    uint32_t reserved2;
    union {
      int32_t request_fd;
      uint32_t reserved;
    };

    fex_v4l2_buffer() = delete;

    operator v4l2_buffer() const {
      v4l2_buffer val {};
      CPYT(index);
      CPYT(type);
      CPYT(bytesused);
      CPYT(flags);
      CPYT(field);
      CPYT(timestamp);
      CPYT(timecode);
      CPYT(sequence);
      CPYT(memory);
      CPYT(length);
      CPYT(reserved2);
      CPYT(m.offset);
      CPYT(request_fd);
      return val;
    }

    fex_v4l2_buffer(v4l2_buffer val)
      : timestamp {val.timestamp}
      , m {.offset = val.m.offset} {
      CPYF(index);
      CPYF(type);
      CPYF(bytesused);
      CPYF(flags);
      CPYF(field);
      CPYF(timecode);
      CPYF(sequence);
      CPYF(memory);
      CPYF(length);
      CPYF(reserved2);
      CPYF(request_fd);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_framebuffer") FEX_ANNOTATE("fex-match") fex_v4l2_framebuffer {
    uint32_t capability;
    uint32_t flags;
    compat_ptr<void> base;
    struct {
      uint32_t width;
      uint32_t height;
      uint32_t pixelformat;
      uint32_t field;
      uint32_t bytesperline;
      uint32_t sizeimage;
      uint32_t colorspace;
      uint32_t priv;
    } fmt;

    fex_v4l2_framebuffer() = delete;

    operator v4l2_framebuffer() const {
      v4l2_framebuffer val {};
      CPYT(capability);
      CPYT(flags);
      CPYT(base);
      memcpy(&val.fmt, &fmt, sizeof(fmt));
      return val;
    }

    fex_v4l2_framebuffer(v4l2_framebuffer val)
      : base {auto_compat_ptr {val.base}} {
      CPYF(capability);
      CPYF(flags);
      memcpy(&fmt, &val.fmt, sizeof(fmt));
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_standard") FEX_ANNOTATE("fex-match") fex_v4l2_standard {
    uint32_t index;
    compat_uint64_t id;
    uint8_t name[24];
    struct v4l2_fract frameperiod;
    uint32_t framelines;
    uint32_t reserved[4];

    fex_v4l2_standard() = delete;

    operator v4l2_standard() const {
      v4l2_standard val {};
      CPYT(index);
      CPYT(id);
      memcpy(&val.name, name, sizeof(name));
      CPYT(frameperiod);
      CPYT(framelines);
      memcpy(&val.reserved, reserved, sizeof(uint32_t) * 4);
      return val;
    }

    fex_v4l2_standard(v4l2_standard val) {
      CPYF(index);
      CPYF(id);
      memcpy(&name, val.name, sizeof(name));
      CPYF(frameperiod);
      CPYF(framelines);
      memcpy(&reserved, val.reserved, sizeof(uint32_t) * 4);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_input") FEX_ANNOTATE("fex-match") fex_v4l2_input {
    uint32_t index;
    uint8_t name[32];
    uint32_t type;
    uint32_t audioset;
    uint32_t tuner;
    compat_uint64_t std;
    uint32_t status;
    uint32_t capabilities;
    uint32_t reserved[3];

    fex_v4l2_input() = delete;

    operator v4l2_input() const {
      v4l2_input val {};
      CPYT(index);
      memcpy(&val.name, &name, sizeof(name));
      CPYT(type);
      CPYT(audioset);
      CPYT(tuner);
      CPYT(std);
      CPYT(status);
      CPYT(capabilities);
      memcpy(&val.reserved, &reserved, sizeof(uint32_t) * 3);
      return val;
    }

    fex_v4l2_input(v4l2_input val) {
      CPYF(index);
      memcpy(&name, &val.name, sizeof(name));
      CPYF(type);
      CPYF(audioset);
      CPYF(tuner);
      CPYF(std);
      CPYF(status);
      CPYF(capabilities);
      memcpy(&reserved, &val.reserved, sizeof(uint32_t) * 3);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_edid") FEX_ANNOTATE("fex-match") fex_v4l2_edid {
    uint32_t pad;
    uint32_t start_block;
    uint32_t blocks;
    uint32_t reserved[5];
    compat_ptr<uint8_t> edid;

    fex_v4l2_edid() = delete;

    operator v4l2_edid() const {
      v4l2_edid val {};
      CPYT(pad);
      CPYT(start_block);
      CPYT(blocks);
      memcpy(&val.reserved, &reserved, sizeof(uint32_t) * 5);
      CPYT(edid);
      return val;
    }

    fex_v4l2_edid(v4l2_edid val)
      : edid {auto_compat_ptr {val.edid}} {
      CPYF(pad);
      CPYF(start_block);
      CPYF(blocks);
      memcpy(&reserved, &val.reserved, sizeof(uint32_t) * 5);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_ext_controls") FEX_ANNOTATE("fex-match") fex_v4l2_ext_controls {
    union {
      uint32_t ctrl_class;
      uint32_t which;
    };
    uint32_t count;
    uint32_t error_idx;
    int32_t request_fd;
    uint32_t reserved[1];
    compat_ptr<struct v4l2_ext_control> controls;

    fex_v4l2_ext_controls() = delete;

    operator v4l2_ext_controls() const {
      v4l2_ext_controls val {};
      CPYT(which);
      CPYT(count);
      CPYT(error_idx);
      CPYT(request_fd);
      CPYT(reserved[0]);
      CPYT(controls);
      return val;
    }

    fex_v4l2_ext_controls(v4l2_ext_controls val)
      : controls {auto_compat_ptr {val.controls}} {
      CPYF(which);
      CPYF(count);
      CPYF(error_idx);
      CPYF(request_fd);
      CPYF(reserved[0]);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_event_ctrl") FEX_ANNOTATE("fex-match") fex_v4l2_event_ctrl {
    uint32_t changes;
    uint32_t type;
    union {
      int32_t value;
      compat_int64_t value64;
    };
    uint32_t flags;
    int32_t minimum;
    int32_t maximum;
    int32_t step;
    int32_t default_value;

    fex_v4l2_event_ctrl() = default;

    operator v4l2_event_ctrl() const {
      v4l2_event_ctrl val {};
      CPYT(changes);
      CPYT(type);
      CPYT(value64);
      CPYT(flags);
      CPYT(minimum);
      CPYT(maximum);
      CPYT(step);
      CPYT(default_value);
      return val;
    }

    fex_v4l2_event_ctrl(v4l2_event_ctrl val) {
      CPYF(changes);
      CPYF(type);
      CPYF(value64);
      CPYF(flags);
      CPYF(minimum);
      CPYF(maximum);
      CPYF(step);
      CPYF(default_value);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-v4l2_event") FEX_ANNOTATE("fex-match") fex_v4l2_event {
    uint32_t type;
    union {
      struct v4l2_event_vsync vsync;
      fex_v4l2_event_ctrl ctrl;
      struct v4l2_event_frame_sync frame_sync;
      struct v4l2_event_src_change src_change;
      struct v4l2_event_motion_det motion_det;
      uint8_t data[64];
    } u;
    uint32_t pending;
    uint32_t sequence;
    timespec32 timestamp;
    uint32_t id;
    uint32_t reserved[8];

    fex_v4l2_event() = delete;

    operator v4l2_event() const {
      v4l2_event val {};
      CPYT(type);
      switch (type) {
      case V4L2_EVENT_VSYNC: CPYT(u.vsync); break;
      case V4L2_EVENT_CTRL: CPYT(u.ctrl); break;
      case V4L2_EVENT_FRAME_SYNC: CPYT(u.frame_sync); break;
      case V4L2_EVENT_SOURCE_CHANGE: CPYT(u.src_change); break;
      case V4L2_EVENT_MOTION_DET: CPYT(u.motion_det); break;
      default: memcpy(&val.u.data, &u.data, 64); break;
      }
      CPYT(pending);
      CPYT(sequence);
      CPYT(timestamp);
      CPYT(id);
      memcpy(&val.reserved, &reserved, sizeof(uint32_t) * 8);
      return val;
    }

    fex_v4l2_event(v4l2_event val) {
      CPYF(type);
      switch (type) {
      case V4L2_EVENT_VSYNC: CPYF(u.vsync); break;
      case V4L2_EVENT_CTRL: CPYF(u.ctrl); break;
      case V4L2_EVENT_FRAME_SYNC: CPYF(u.frame_sync); break;
      case V4L2_EVENT_SOURCE_CHANGE: CPYF(u.src_change); break;
      case V4L2_EVENT_MOTION_DET: CPYF(u.motion_det); break;
      default: memcpy(&u.data, &val.u.data, 64); break;
      }
      CPYF(pending);
      CPYF(sequence);
      CPYF(timestamp);
      CPYF(id);
      memcpy(&reserved, &val.reserved, sizeof(uint32_t) * 8);
    }
  };

  struct FEX_ANNOTATE("alias-x86_32-upstream_v4l2_create_buffers") FEX_ANNOTATE("fex-match") fex_v4l2_create_buffers {
    uint32_t index;
    uint32_t count;
    uint32_t memory;
    fex_v4l2_format format;
    uint32_t capabilities;
    uint32_t flags;
    uint32_t max_num_buffers;
    uint32_t reserved[5];

    fex_v4l2_create_buffers() = delete;
    operator upstream_v4l2_create_buffers() const {
      upstream_v4l2_create_buffers val {};
      CPYT(index);
      CPYT(count);
      CPYT(memory);
      CPYT(format);
      CPYT(capabilities);
      CPYT(flags);
      CPYT(max_num_buffers);
      memcpy(&val.reserved, &reserved, sizeof(uint32_t) * 5);
      return val;
    }

    fex_v4l2_create_buffers(upstream_v4l2_create_buffers val)
      : format {val.format} {
      CPYF(index);
      CPYF(count);
      CPYF(memory);
      CPYF(capabilities);
      CPYF(flags);
      CPYF(max_num_buffers);
      memcpy(&reserved, &val.reserved, sizeof(uint32_t) * 5);
    }
  };

#include "LinuxSyscalls/x32/Ioctl/v4l2.inl"
} // namespace V4l2
} // namespace FEX::HLE::x32
#undef CPYT
#undef CPYF


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/v4l2.inl
================================================
#ifndef VIDIOC_REMOVE_BUFS
#define VIDIOC_REMOVE_BUFS _IOWR('V', 104, struct upstream_v4l2_remove_buffers)
#endif

_BASIC_META(VIDIOC_QUERYCAP)
_BASIC_META(VIDIOC_ENUM_FMT)
_CUSTOM_META(VIDIOC_G_FMT, _IOWR('V', 4, FEX::HLE::x32::V4l2::fex_v4l2_format))
_CUSTOM_META(VIDIOC_S_FMT, _IOWR('V', 5, FEX::HLE::x32::V4l2::fex_v4l2_format))
_BASIC_META(VIDIOC_REQBUFS)
_CUSTOM_META(VIDIOC_QUERYBUF, _IOWR('V',  9, FEX::HLE::x32::V4l2::fex_v4l2_buffer))
_CUSTOM_META(VIDIOC_G_FBUF, _IOR('V', 10, FEX::HLE::x32::V4l2::fex_v4l2_framebuffer))
_CUSTOM_META(VIDIOC_S_FBUF, _IOW('V', 11, FEX::HLE::x32::V4l2::fex_v4l2_framebuffer))
_BASIC_META(VIDIOC_OVERLAY)
_CUSTOM_META(VIDIOC_QBUF, _IOWR('V', 15, FEX::HLE::x32::V4l2::fex_v4l2_buffer))
_BASIC_META(VIDIOC_EXPBUF)
_CUSTOM_META(VIDIOC_DQBUF, _IOWR('V', 17, FEX::HLE::x32::V4l2::fex_v4l2_buffer))
_BASIC_META(VIDIOC_STREAMON)
_BASIC_META(VIDIOC_STREAMOFF)
_BASIC_META(VIDIOC_G_PARM)
_BASIC_META(VIDIOC_S_PARM)
_BASIC_META(VIDIOC_G_STD)
_BASIC_META(VIDIOC_S_STD)
_CUSTOM_META(VIDIOC_ENUMSTD, _IOWR('V', 25, FEX::HLE::x32::V4l2::fex_v4l2_standard))
_CUSTOM_META(VIDIOC_ENUMINPUT, _IOWR('V', 26, FEX::HLE::x32::V4l2::fex_v4l2_input))
_BASIC_META(VIDIOC_G_CTRL)
_BASIC_META(VIDIOC_S_CTRL)
_BASIC_META(VIDIOC_G_TUNER)
_BASIC_META(VIDIOC_S_TUNER)
_BASIC_META(VIDIOC_G_AUDIO)
_BASIC_META(VIDIOC_S_AUDIO)
_BASIC_META(VIDIOC_QUERYCTRL)
_BASIC_META(VIDIOC_QUERYMENU)
_BASIC_META(VIDIOC_G_INPUT)
_BASIC_META(VIDIOC_S_INPUT)
_CUSTOM_META(VIDIOC_G_EDID, _IOWR('V', 40, FEX::HLE::x32::V4l2::fex_v4l2_edid))
_CUSTOM_META(VIDIOC_S_EDID, _IOWR('V', 41, FEX::HLE::x32::V4l2::fex_v4l2_edid))
_BASIC_META(VIDIOC_G_OUTPUT)
_BASIC_META(VIDIOC_S_OUTPUT)
_BASIC_META(VIDIOC_ENUMOUTPUT)
_BASIC_META(VIDIOC_G_AUDOUT)
_BASIC_META(VIDIOC_S_AUDOUT)
_BASIC_META(VIDIOC_G_MODULATOR)
_BASIC_META(VIDIOC_S_MODULATOR)
_BASIC_META(VIDIOC_G_FREQUENCY)
_BASIC_META(VIDIOC_S_FREQUENCY)
_BASIC_META(VIDIOC_CROPCAP)
_BASIC_META(VIDIOC_G_CROP)
_BASIC_META(VIDIOC_S_CROP)
_BASIC_META(VIDIOC_G_JPEGCOMP)
_BASIC_META(VIDIOC_S_JPEGCOMP)
_BASIC_META(VIDIOC_QUERYSTD)
_CUSTOM_META(VIDIOC_TRY_FMT, _IOWR('V', 64, FEX::HLE::x32::V4l2::fex_v4l2_format))
_BASIC_META(VIDIOC_ENUMAUDIO)
_BASIC_META(VIDIOC_ENUMAUDOUT)
_BASIC_META(VIDIOC_G_PRIORITY)
_BASIC_META(VIDIOC_S_PRIORITY)
_BASIC_META(VIDIOC_G_SLICED_VBI_CAP)
_BASIC_META(VIDIOC_LOG_STATUS)
_CUSTOM_META(VIDIOC_G_EXT_CTRLS, _IOWR('V', 71, FEX::HLE::x32::V4l2::fex_v4l2_ext_controls))
_CUSTOM_META(VIDIOC_S_EXT_CTRLS, _IOWR('V', 72, FEX::HLE::x32::V4l2::fex_v4l2_ext_controls))
_CUSTOM_META(VIDIOC_TRY_EXT_CTRLS, _IOWR('V', 73, FEX::HLE::x32::V4l2::fex_v4l2_ext_controls))
_BASIC_META(VIDIOC_ENUM_FRAMESIZES)
_BASIC_META(VIDIOC_ENUM_FRAMEINTERVALS)
_BASIC_META(VIDIOC_G_ENC_INDEX)
_BASIC_META(VIDIOC_ENCODER_CMD)
_BASIC_META(VIDIOC_TRY_ENCODER_CMD)
_BASIC_META(VIDIOC_DBG_S_REGISTER)
_BASIC_META(VIDIOC_DBG_G_REGISTER)
_BASIC_META(VIDIOC_S_HW_FREQ_SEEK)
_BASIC_META(VIDIOC_S_DV_TIMINGS)
_BASIC_META(VIDIOC_G_DV_TIMINGS)
_CUSTOM_META(VIDIOC_DQEVENT,  _IOR('V', 89, FEX::HLE::x32::V4l2::fex_v4l2_event))
_BASIC_META(VIDIOC_SUBSCRIBE_EVENT)
_BASIC_META(VIDIOC_UNSUBSCRIBE_EVENT)
_CUSTOM_META(VIDIOC_CREATE_BUFS, _IOWR('V', 92, FEX::HLE::x32::V4l2::fex_v4l2_create_buffers))
_CUSTOM_META(VIDIOC_PREPARE_BUF, _IOWR('V', 93, FEX::HLE::x32::V4l2::fex_v4l2_buffer))
_BASIC_META(VIDIOC_G_SELECTION)
_BASIC_META(VIDIOC_S_SELECTION)
_BASIC_META(VIDIOC_DECODER_CMD)
_BASIC_META(VIDIOC_TRY_DECODER_CMD)
_BASIC_META(VIDIOC_ENUM_DV_TIMINGS)
_BASIC_META(VIDIOC_QUERY_DV_TIMINGS)
_BASIC_META(VIDIOC_DV_TIMINGS_CAP)
_BASIC_META(VIDIOC_ENUM_FREQ_BANDS)
_BASIC_META(VIDIOC_DBG_G_CHIP_INFO)
_BASIC_META(VIDIOC_QUERY_EXT_CTRL)
_BASIC_META(VIDIOC_REMOVE_BUFS)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/vc4_drm.inl
================================================
_BASIC_META(DRM_IOCTL_VC4_SUBMIT_CL)
_BASIC_META(DRM_IOCTL_VC4_WAIT_SEQNO)
_BASIC_META(DRM_IOCTL_VC4_WAIT_BO)
_BASIC_META(DRM_IOCTL_VC4_CREATE_BO)
_BASIC_META(DRM_IOCTL_VC4_MMAP_BO)
_BASIC_META(DRM_IOCTL_VC4_CREATE_SHADER_BO)
_BASIC_META(DRM_IOCTL_VC4_GET_HANG_STATE)
_BASIC_META(DRM_IOCTL_VC4_GET_PARAM)
_BASIC_META(DRM_IOCTL_VC4_SET_TILING)
_BASIC_META(DRM_IOCTL_VC4_GET_TILING)
_BASIC_META(DRM_IOCTL_VC4_LABEL_BO)
_BASIC_META(DRM_IOCTL_VC4_GEM_MADVISE)
_BASIC_META(DRM_IOCTL_VC4_PERFMON_CREATE)
_BASIC_META(DRM_IOCTL_VC4_PERFMON_DESTROY)
_CUSTOM_META(DRM_IOCTL_VC4_PERFMON_GET_VALUES, DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_PERFMON_GET_VALUES, FEX::HLE::x32::VC4::fex_drm_vc4_perfmon_get_values)) // XXX: This will still be incorrect on x86-64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/virtio_drm.inl
================================================
_BASIC_META(DRM_IOCTL_VIRTGPU_MAP)
_BASIC_META(DRM_IOCTL_VIRTGPU_EXECBUFFER)
_BASIC_META(DRM_IOCTL_VIRTGPU_GETPARAM)
_BASIC_META(DRM_IOCTL_VIRTGPU_RESOURCE_CREATE)
_BASIC_META(DRM_IOCTL_VIRTGPU_RESOURCE_INFO)
_BASIC_META(DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST)
_BASIC_META(DRM_IOCTL_VIRTGPU_TRANSFER_TO_HOST)
_BASIC_META(DRM_IOCTL_VIRTGPU_WAIT)
_BASIC_META(DRM_IOCTL_VIRTGPU_GET_CAPS)
_BASIC_META(DRM_IOCTL_VIRTGPU_RESOURCE_CREATE_BLOB)
_BASIC_META(DRM_IOCTL_VIRTGPU_CONTEXT_INIT)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/wireless.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x32/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/wireless.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x32 {
namespace wireless {
#include "LinuxSyscalls/x32/Ioctl/wireless.inl"
}

} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/wireless.inl
================================================
_BASIC_META(SIOCSIWCOMMIT)
_BASIC_META(SIOCGIWNAME)
_BASIC_META(SIOCSIWNWID)
_BASIC_META(SIOCGIWNWID)
_BASIC_META(SIOCSIWFREQ)
_BASIC_META(SIOCGIWFREQ)
_BASIC_META(SIOCSIWMODE)
_BASIC_META(SIOCGIWMODE)
_BASIC_META(SIOCSIWSENS)
_BASIC_META(SIOCGIWSENS)
_BASIC_META(SIOCSIWRANGE)
_BASIC_META(SIOCGIWRANGE)
_BASIC_META(SIOCSIWPRIV)
_BASIC_META(SIOCGIWPRIV)
_BASIC_META(SIOCSIWSTATS)
_BASIC_META(SIOCGIWSTATS)
_BASIC_META(SIOCSIWSPY)
_BASIC_META(SIOCGIWSPY)
_BASIC_META(SIOCSIWTHRSPY)
_BASIC_META(SIOCGIWTHRSPY)
_BASIC_META(SIOCSIWAP)
_BASIC_META(SIOCGIWAP)
_BASIC_META(SIOCGIWAPLIST)
_BASIC_META(SIOCSIWSCAN)
_BASIC_META(SIOCGIWSCAN)
_BASIC_META(SIOCSIWESSID)
_BASIC_META(SIOCGIWESSID)
_BASIC_META(SIOCSIWNICKN)
_BASIC_META(SIOCGIWNICKN)
_BASIC_META(SIOCSIWRATE)
_BASIC_META(SIOCGIWRATE)
_BASIC_META(SIOCSIWRTS)
_BASIC_META(SIOCGIWRTS)
_BASIC_META(SIOCSIWFRAG)
_BASIC_META(SIOCGIWFRAG)
_BASIC_META(SIOCSIWTXPOW)
_BASIC_META(SIOCGIWTXPOW)
_BASIC_META(SIOCSIWRETRY)
_BASIC_META(SIOCGIWRETRY)
_BASIC_META(SIOCSIWENCODE)
_BASIC_META(SIOCGIWENCODE)
_BASIC_META(SIOCSIWPOWER)
_BASIC_META(SIOCGIWPOWER)
_BASIC_META(SIOCSIWGENIE)
_BASIC_META(SIOCGIWGENIE)
_BASIC_META(SIOCSIWMLME)
_BASIC_META(SIOCSIWAUTH)
_BASIC_META(SIOCGIWAUTH)
_BASIC_META(SIOCSIWENCODEEXT)
_BASIC_META(SIOCGIWENCODEEXT)
_BASIC_META(SIOCSIWPMKSA)

_BASIC_META(SIOCIWFIRSTPRIV)
_BASIC_META(SIOCIWLASTPRIV)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctl/xe_drm.inl
================================================
_BASIC_META(DRM_IOCTL_XE_DEVICE_QUERY)
_BASIC_META(DRM_IOCTL_XE_GEM_CREATE)
_BASIC_META(DRM_IOCTL_XE_GEM_MMAP_OFFSET)
_BASIC_META(DRM_IOCTL_XE_VM_CREATE)
_BASIC_META(DRM_IOCTL_XE_VM_DESTROY)
_BASIC_META(DRM_IOCTL_XE_VM_BIND)
_BASIC_META(DRM_IOCTL_XE_EXEC_QUEUE_CREATE)
_BASIC_META(DRM_IOCTL_XE_EXEC_QUEUE_DESTROY)
_BASIC_META(DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY)
_BASIC_META(DRM_IOCTL_XE_EXEC)
_BASIC_META(DRM_IOCTL_XE_WAIT_USER_FENCE)
_BASIC_META(DRM_IOCTL_XE_OBSERVATION)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Ioctl/drm.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Ioctl/asound.h"
#include "LinuxSyscalls/x32/Ioctl/drm.h"
#include "LinuxSyscalls/x32/Ioctl/usbdev.h"
#include "LinuxSyscalls/x32/Ioctl/streams.h"
#include "LinuxSyscalls/x32/Ioctl/sockios.h"
#include "LinuxSyscalls/x32/Ioctl/input.h"
#include "LinuxSyscalls/x32/Ioctl/joystick.h"
#include "LinuxSyscalls/x32/Ioctl/wireless.h"
#include "LinuxSyscalls/x32/Ioctl/v4l2.h"
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/map.h>
#include <FEXCore/fextl/vector.h>

#include <cstdint>
#include <sys/ioctl.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace FEX::HLE::x32 {
static void UnhandledIoctl(const char* Type, int fd, uint32_t cmd, uint32_t args) {
  LogMan::Msg::EFmt("@@@@@@@@@@@@@@@@@@@@@@@@@");
  LogMan::Msg::EFmt("Unhandled {} ioctl({}, 0x{:08x}, 0x{:08x})", Type, fd, cmd, args);
  LogMan::Msg::EFmt("\tDir  : 0x{:x}", _IOC_DIR(cmd));
  LogMan::Msg::EFmt("\tType : 0x{:x}", _IOC_TYPE(cmd));
  LogMan::Msg::EFmt("\tNR   : 0x{:x}", _IOC_NR(cmd));
  LogMan::Msg::EFmt("\tSIZE : 0x{:x}", _IOC_SIZE(cmd));
  LogMan::Msg::AFmt("@@@@@@@@@@@@@@@@@@@@@@@@@");
}

namespace BasicHandler {
  uint32_t BasicHandler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    uint64_t Result = ::ioctl(fd, cmd, args);
    SYSCALL_ERRNO();
  }
} // namespace BasicHandler

namespace V4l2 {
  uint32_t V4l2Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_VIDIOC_G_FMT): {
      fex_v4l2_format* format = reinterpret_cast<fex_v4l2_format*>(args);
      v4l2_format Host_format {.type = format->type};
      if (Host_format.type == V4L2_BUF_TYPE_VIDEO_OVERLAY || Host_format.type == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY) {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
      }
      uint64_t Result = ::ioctl(fd, VIDIOC_G_FMT, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_S_FMT): {
      fex_v4l2_format* format = reinterpret_cast<fex_v4l2_format*>(args);
      v4l2_format Host_format = *format;
      if (Host_format.type == V4L2_BUF_TYPE_VIDEO_OVERLAY || Host_format.type == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY) {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
      }
      uint64_t Result = ::ioctl(fd, VIDIOC_S_FMT, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_QUERYBUF): {
      auto format = reinterpret_cast<fex_v4l2_buffer*>(args);
      v4l2_buffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_QUERYBUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_G_FBUF): {
      auto format = reinterpret_cast<fex_v4l2_framebuffer*>(args);
      v4l2_framebuffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_G_FBUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_S_FBUF): {
      auto format = reinterpret_cast<fex_v4l2_framebuffer*>(args);
      v4l2_framebuffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_S_FBUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_QBUF): {
      auto format = reinterpret_cast<fex_v4l2_buffer*>(args);
      v4l2_buffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_QBUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_DQBUF): {
      auto format = reinterpret_cast<fex_v4l2_buffer*>(args);
      v4l2_buffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_DQBUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_ENUMSTD): {
      auto format = reinterpret_cast<fex_v4l2_standard*>(args);
      v4l2_standard Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_ENUMSTD, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_ENUMINPUT): {
      auto format = reinterpret_cast<fex_v4l2_input*>(args);
      v4l2_input Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_ENUMINPUT, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_G_EDID): {
      auto format = reinterpret_cast<fex_v4l2_edid*>(args);
      v4l2_edid Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_G_EDID, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_S_EDID): {
      auto format = reinterpret_cast<fex_v4l2_edid*>(args);
      v4l2_edid Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_S_EDID, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_TRY_FMT): {
      fex_v4l2_format* format = reinterpret_cast<fex_v4l2_format*>(args);
      v4l2_format Host_format = *format;
      if (Host_format.type == V4L2_BUF_TYPE_VIDEO_OVERLAY || Host_format.type == V4L2_BUF_TYPE_VIDEO_OUTPUT_OVERLAY) {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
      }
      uint64_t Result = ::ioctl(fd, VIDIOC_TRY_FMT, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_G_EXT_CTRLS): {
      auto format = reinterpret_cast<fex_v4l2_ext_controls*>(args);
      v4l2_ext_controls Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_G_EXT_CTRLS, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_S_EXT_CTRLS): {
      auto format = reinterpret_cast<fex_v4l2_ext_controls*>(args);
      v4l2_ext_controls Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_S_EXT_CTRLS, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_TRY_EXT_CTRLS): {
      auto format = reinterpret_cast<fex_v4l2_ext_controls*>(args);
      v4l2_ext_controls Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_TRY_EXT_CTRLS, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_DQEVENT): {
      auto format = reinterpret_cast<fex_v4l2_event*>(args);
      v4l2_event Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_DQEVENT, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_CREATE_BUFS): {
      auto format = reinterpret_cast<fex_v4l2_create_buffers*>(args);
      upstream_v4l2_create_buffers Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_CREATE_BUFS, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_VIDIOC_PREPARE_BUF): {
      auto format = reinterpret_cast<fex_v4l2_buffer*>(args);
      v4l2_buffer Host_format = *format;

      uint64_t Result = ::ioctl(fd, VIDIOC_PREPARE_BUF, &Host_format);
      if (Result != -1) {
        *format = Host_format;
      }
      SYSCALL_ERRNO();
      break;
    }
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
#include "LinuxSyscalls/x32/Ioctl/v4l2.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("V4L2", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }
} // namespace V4l2

namespace DRM {
  uint32_t AddAndRunHandler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args);
  void AssignDeviceTypeToFD(FEXCore::Core::CpuStateFrame* Frame, int fd, const drm_version& Version);

  DRMLRUCacheFDCache::HandlerType FindHandler(FEXCore::Core::CpuStateFrame* Frame, int32_t FD) {
    auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
    if (!Thread->DRMLRUCache) {
      Thread->DRMLRUCache = fextl::make_unique<DRMLRUCacheFDCache>();
    }
    return Thread->DRMLRUCache->FindHandler(FD);
  }

  uint32_t AddAndRunHandler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
    if (!Thread->DRMLRUCache) {
      Thread->DRMLRUCache = fextl::make_unique<DRMLRUCacheFDCache>();
    }

    return Thread->DRMLRUCache->AddAndRunMapHandler(Frame, fd, cmd, args);
  }

  void CheckAndAddFDDuplication(FEXCore::Core::CpuStateFrame* Frame, int fd, int NewFD) {
    auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
    if (!Thread->DRMLRUCache) {
      Thread->DRMLRUCache = fextl::make_unique<DRMLRUCacheFDCache>();
    }
    Thread->DRMLRUCache->DuplicateFD(fd, NewFD);
  }

  uint32_t AMDGPU_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_AMDGPU_GEM_METADATA): {
      AMDGPU::fex_drm_amdgpu_gem_metadata* val = reinterpret_cast<AMDGPU::fex_drm_amdgpu_gem_metadata*>(args);
      drm_amdgpu_gem_metadata Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_AMDGPU_GEM_METADATA, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("AMDGPU", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t RADEON_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_CP_INIT): {
      RADEON::fex_drm_radeon_init_t* val = reinterpret_cast<RADEON::fex_drm_radeon_init_t*>(args);
      drm_radeon_init_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CP_INIT, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_CLEAR): {
      RADEON::fex_drm_radeon_clear_t* val = reinterpret_cast<RADEON::fex_drm_radeon_clear_t*>(args);
      drm_radeon_clear_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CLEAR, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_STIPPLE): {
      RADEON::fex_drm_radeon_stipple_t* val = reinterpret_cast<RADEON::fex_drm_radeon_stipple_t*>(args);
      drm_radeon_stipple_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_STIPPLE, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_TEXTURE): {
      RADEON::fex_drm_radeon_texture_t* val = reinterpret_cast<RADEON::fex_drm_radeon_texture_t*>(args);
      drm_radeon_texture_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_TEXTURE, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_VERTEX2): {
      RADEON::fex_drm_radeon_vertex2_t* val = reinterpret_cast<RADEON::fex_drm_radeon_vertex2_t*>(args);
      drm_radeon_vertex2_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_VERTEX2, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_CMDBUF): {
      RADEON::fex_drm_radeon_cmd_buffer_t* val = reinterpret_cast<RADEON::fex_drm_radeon_cmd_buffer_t*>(args);
      drm_radeon_cmd_buffer_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_CMDBUF, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_GETPARAM): {
      RADEON::fex_drm_radeon_getparam_t* val = reinterpret_cast<RADEON::fex_drm_radeon_getparam_t*>(args);
      drm_radeon_getparam_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GETPARAM, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_ALLOC): {
      RADEON::fex_drm_radeon_mem_alloc_t* val = reinterpret_cast<RADEON::fex_drm_radeon_mem_alloc_t*>(args);
      drm_radeon_mem_alloc_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_ALLOC, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_IRQ_EMIT): {
      RADEON::fex_drm_radeon_irq_emit_t* val = reinterpret_cast<RADEON::fex_drm_radeon_irq_emit_t*>(args);
      drm_radeon_irq_emit_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_IRQ_EMIT, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_SETPARAM): {
      RADEON::fex_drm_radeon_setparam_t* val = reinterpret_cast<RADEON::fex_drm_radeon_setparam_t*>(args);
      drm_radeon_setparam_t Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_SETPARAM, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
    case _IOC_NR(FEX_DRM_IOCTL_RADEON_GEM_CREATE): {
      RADEON::fex_drm_radeon_gem_create* val = reinterpret_cast<RADEON::fex_drm_radeon_gem_create*>(args);
      drm_radeon_gem_create Host_val = *val;
      uint64_t Result = ioctl(fd, DRM_IOCTL_RADEON_GEM_CREATE, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/radeon_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("RADEON", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t MSM_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_MSM_WAIT_FENCE): {
      MSM::fex_drm_msm_wait_fence* val = reinterpret_cast<MSM::fex_drm_msm_wait_fence*>(args);
      drm_msm_wait_fence Host_val = *val;
      uint64_t Result = ::ioctl(fd, DRM_IOCTL_MSM_WAIT_FENCE, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }

#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/msm_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("MSM", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t Nouveau_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
      // DRM
#include "LinuxSyscalls/x32/Ioctl/nouveau_drm.inl"
    // Let's hope NVIF is arch agnostic.
    case DRM_COMMAND_BASE + DRM_NOUVEAU_NVIF: {
      uint64_t Result = ::ioctl(fd, cmd, args);
      SYSCALL_ERRNO();
      break;
    }
    default:
      UnhandledIoctl("Nouveau", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t I915_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
#define SIMPLE(enum, type)                                               \
  case _IOC_NR(FEX_##enum): {                                            \
    I915::fex_##type* guest = reinterpret_cast<I915::fex_##type*>(args); \
    type host = *guest;                                                  \
    uint64_t Result = ::ioctl(fd, enum, &host);                          \
    if (Result != -1) {                                                  \
      *guest = host;                                                     \
    }                                                                    \
    SYSCALL_ERRNO();                                                     \
    break;                                                               \
  }


    switch (_IOC_NR(cmd)) {
      SIMPLE(DRM_IOCTL_I915_BATCHBUFFER, drm_i915_batchbuffer_t)
      SIMPLE(DRM_IOCTL_I915_IRQ_EMIT, drm_i915_irq_emit_t)
      SIMPLE(DRM_IOCTL_I915_GETPARAM, drm_i915_getparam_t)
      SIMPLE(DRM_IOCTL_I915_ALLOC, drm_i915_mem_alloc_t)
      SIMPLE(DRM_IOCTL_I915_CMDBUFFER, drm_i915_cmdbuffer_t)

#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
      // DRM
#include "LinuxSyscalls/x32/Ioctl/i915_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("I915", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef SIMPLE
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t Panfrost_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
      // DRM
#include "LinuxSyscalls/x32/Ioctl/panfrost_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("Panfrost", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t Lima_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
      // DRM
#include "LinuxSyscalls/x32/Ioctl/lima_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("Lima", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t VC4_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_VC4_PERFMON_GET_VALUES): {
      FEX::HLE::x32::VC4::fex_drm_vc4_perfmon_get_values* val = reinterpret_cast<FEX::HLE::x32::VC4::fex_drm_vc4_perfmon_get_values*>(args);
      drm_vc4_perfmon_get_values Host_val = *val;
      uint64_t Result = ::ioctl(fd, DRM_IOCTL_VC4_PERFMON_GET_VALUES, &Host_val);
      if (Result != -1) {
        *val = Host_val;
      }
      SYSCALL_ERRNO();
      break;
    }

#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/vc4_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("VC4", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t V3D_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_V3D_SUBMIT_CSD): {
      FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd* val = reinterpret_cast<FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd*>(args);
      drm_v3d_submit_csd Host_val = FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToHost(val, _IOC_SIZE(cmd));
      uint64_t Result = ::ioctl(fd, DRM_IOCTL_V3D_SUBMIT_CSD, &Host_val);
      if (Result != -1) {
        FEX::HLE::x32::V3D::fex_drm_v3d_submit_csd::SafeConvertToGuest(val, Host_val, _IOC_SIZE(cmd));
      }
      SYSCALL_ERRNO();
      break;
    }

#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/v3d_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("V3D", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t Virtio_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    switch (_IOC_NR(cmd)) {
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
      // DRM
#include "LinuxSyscalls/x32/Ioctl/virtio_drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }
    default:
      UnhandledIoctl("Virtio", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET
    return -EPERM;
  }

  uint32_t Default_Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
    // Default handler assumes everything is correct and doesn't need to do any work.
    uint64_t Result = ::ioctl(fd, cmd, args);
    SYSCALL_ERRNO();
  }

  void AssignDeviceTypeToFD(FEXCore::Core::CpuStateFrame* Frame, int fd, const drm_version& Version) {
    if (Version.name) {
      auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
      if (!Thread->DRMLRUCache) {
        Thread->DRMLRUCache = fextl::make_unique<DRMLRUCacheFDCache>();
      }

      const std::string_view Name(Version.name, Version.name_len);
      if (Name == "amdgpu") {
        Thread->DRMLRUCache->SetFDHandler(fd, AMDGPU_Handler);
      } else if (Name == "radeon") {
        Thread->DRMLRUCache->SetFDHandler(fd, RADEON_Handler);
      } else if (Name == "msm") {
        Thread->DRMLRUCache->SetFDHandler(fd, MSM_Handler);
      } else if (Name == "nouveau") {
        Thread->DRMLRUCache->SetFDHandler(fd, Nouveau_Handler);
      } else if (Name == "i915") {
        Thread->DRMLRUCache->SetFDHandler(fd, I915_Handler);
      } else if (Name == "panfrost") {
        Thread->DRMLRUCache->SetFDHandler(fd, Panfrost_Handler);
      } else if (Name == "lima") {
        Thread->DRMLRUCache->SetFDHandler(fd, Lima_Handler);
      } else if (Name == "vc4") {
        Thread->DRMLRUCache->SetFDHandler(fd, VC4_Handler);
      } else if (Name == "v3d") {
        Thread->DRMLRUCache->SetFDHandler(fd, V3D_Handler);
      } else if (Name == "virtio_gpu") {
        Thread->DRMLRUCache->SetFDHandler(fd, Virtio_Handler);
      } else {
        // Known safe drm drivers.
        if (!(Name == "asahi" || Name == "panthor" || Name == "xe")) {
          LogMan::Msg::IFmt("Unknown DRM device: '{}'. Using default passthrough", Version.name);
        }
        Thread->DRMLRUCache->SetFDHandler(fd, Default_Handler);
      }
    }
  }

  uint32_t Handler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
#define SIMPLE(enum, type)                                             \
  case _IOC_NR(FEX_##enum): {                                          \
    DRM::fex_##type* guest = reinterpret_cast<DRM::fex_##type*>(args); \
    type host = *guest;                                                \
    uint64_t Result = ::ioctl(fd, enum, &host);                        \
    if (Result != -1) {                                                \
      *guest = host;                                                   \
    }                                                                  \
    SYSCALL_ERRNO();                                                   \
    break;                                                             \
  }

    switch (_IOC_NR(cmd)) {
    case _IOC_NR(FEX_DRM_IOCTL_VERSION): {
      fex_drm_version* version = reinterpret_cast<fex_drm_version*>(args);
      drm_version Host_Version = *version;
      uint64_t Result = ::ioctl(fd, DRM_IOCTL_VERSION, &Host_Version);
      if (Result != -1) {
        *version = Host_Version;
        AssignDeviceTypeToFD(Frame, fd, Host_Version);
      }
      SYSCALL_ERRNO();
      break;
    }

      SIMPLE(DRM_IOCTL_GET_UNIQUE, drm_unique)
      SIMPLE(DRM_IOCTL_GET_CLIENT, drm_client)
      SIMPLE(DRM_IOCTL_GET_STATS, drm_stats)
      SIMPLE(DRM_IOCTL_SET_UNIQUE, drm_unique)

      SIMPLE(DRM_IOCTL_ADD_MAP, drm_map)
      SIMPLE(DRM_IOCTL_ADD_BUFS, drm_buf_desc)
      SIMPLE(DRM_IOCTL_MARK_BUFS, drm_buf_desc)
      SIMPLE(DRM_IOCTL_INFO_BUFS, drm_buf_info)
      SIMPLE(DRM_IOCTL_MAP_BUFS, drm_buf_map)
      SIMPLE(DRM_IOCTL_FREE_BUFS, drm_buf_free)
      SIMPLE(DRM_IOCTL_RM_MAP, drm_map)
      SIMPLE(DRM_IOCTL_SET_SAREA_CTX, drm_ctx_priv_map)
      SIMPLE(DRM_IOCTL_GET_SAREA_CTX, drm_ctx_priv_map)

      SIMPLE(DRM_IOCTL_RES_CTX, drm_ctx_res)
      SIMPLE(DRM_IOCTL_DMA, drm_dma)
      SIMPLE(DRM_IOCTL_SG_ALLOC, drm_scatter_gather)
      SIMPLE(DRM_IOCTL_SG_FREE, drm_scatter_gather)
      SIMPLE(DRM_IOCTL_UPDATE_DRAW, drm_update_draw)
      SIMPLE(DRM_IOCTL_MODE_GETPLANERESOURCES, drm_mode_get_plane_res)
      SIMPLE(DRM_IOCTL_MODE_ADDFB2, drm_mode_fb_cmd2)
      SIMPLE(DRM_IOCTL_MODE_OBJ_GETPROPERTIES, drm_mode_obj_get_properties)
      SIMPLE(DRM_IOCTL_MODE_OBJ_SETPROPERTY, drm_mode_obj_set_property)
      SIMPLE(DRM_IOCTL_MODE_GETFB2, drm_mode_fb_cmd2)

    case _IOC_NR(FEX_DRM_IOCTL_WAIT_VBLANK): {
      fex_drm_wait_vblank* guest = reinterpret_cast<fex_drm_wait_vblank*>(args);
      drm_wait_vblank Host {};
      Host.request = guest->request;
      uint64_t Result = ::ioctl(fd, FEX_DRM_IOCTL_WAIT_VBLANK, &Host);
      if (Result != -1) {
        guest->reply = Host.reply;
      }
      SYSCALL_ERRNO();
      break;
    }
    // Passthrough
#define _BASIC_META(x) case _IOC_NR(x):
#define _BASIC_META_VAR(x, args...) case _IOC_NR(x):
#define _CUSTOM_META(name, ioctl_num)
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)
    // DRM
#include "LinuxSyscalls/x32/Ioctl/drm.inl"
      {
        uint64_t Result = ::ioctl(fd, cmd, args);
        SYSCALL_ERRNO();
        break;
      }

    case DRM_COMMAND_BASE ...(DRM_COMMAND_END - 1): {
      // This is the space of the DRM device commands
      auto it = FindHandler(Frame, fd);
      return it(Frame, fd, cmd, args);
      break;
    }
    default:
      UnhandledIoctl("DRM", fd, cmd, args);
      return -EPERM;
      break;
    }
#undef SIMPLE
#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET

    return -EPERM;
  }
} // namespace DRM

DRMLRUCacheFDCache::DRMLRUCacheFDCache() {
  // Set the last element to our handler
  // This element will always be the last one
  LRUCache[LRUSize] = LRUObject {-1, DRM::AddAndRunHandler};
}

void DRMLRUCacheFDCache::SetFDHandler(uint32_t FD, HandlerType Handler) {
  FDToHandler[FD] = Handler;
}

void DRMLRUCacheFDCache::DuplicateFD(int fd, int NewFD) {
  auto it = FDToHandler.find(fd);
  if (it != FDToHandler.end()) {
    FDToHandler[NewFD] = it->second;
  }
}

DRMLRUCacheFDCache::HandlerType DRMLRUCacheFDCache::FindHandler(int32_t FD) {
  HandlerType Handler {};
  for (size_t i = 0; i < LRUSize; ++i) {
    auto& it = LRUCache[i];
    if (it.FD == FD) {
      if (i == 0) {
        // If we are the first in the queue then just return it
        return it.Handler;
      }
      Handler = it.Handler;
      break;
    }
  }

  if (Handler) {
    AddToFront(FD, Handler);
    return Handler;
  }
  return LRUCache[LRUSize].Handler;
}

void DRMLRUCacheFDCache::AddToFront(int32_t FD, HandlerType Handler) {
  // Push the element to the front if we found one
  // First copy all the other elements back one
  // Ensuring the final element isn't written over
  memmove(&LRUCache[1], &LRUCache[0], (LRUSize - 1) * sizeof(LRUCache[0]));
  // Now set the first element to the one we just found
  LRUCache[0] = LRUObject {FD, Handler};
}

uint32_t DRMLRUCacheFDCache::AddAndRunMapHandler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args) {
  // Couldn't find in cache, check map
  {
    auto it = FDToHandler.find(fd);
    if (it != FDToHandler.end()) {
      // Found, add to the cache
      AddToFront(fd, it->second);
      return it->second(Frame, fd, cmd, args);
    }
  }

  // Wasn't found in map, query it
  drm_version Host_Version {};
  Host_Version.name = reinterpret_cast<char*>(alloca(128));
  Host_Version.name_len = 128;
  uint64_t Result = ioctl(fd, DRM_IOCTL_VERSION, &Host_Version);

  // Add it to the map and double check that it was added
  // Next time around when the ioctl is used then it will be added to cache
  if (Result != -1) {
    DRM::AssignDeviceTypeToFD(Frame, fd, Host_Version);
  }

  auto it = FDToHandler.find(fd);

  if (it == FDToHandler.end()) {
    // We don't understand this DRM ioctl
    return -EPERM;
  }
  Result = it->second(Frame, fd, cmd, args);
  SYSCALL_ERRNO();
}

std::array<DRMLRUCacheFDCache::HandlerType, 1U << _IOC_TYPEBITS> Handlers = []() consteval {
  using namespace DRM;
  using namespace sockios;
  using namespace V4l2;
  std::array<DRMLRUCacheFDCache::HandlerType, 1U << _IOC_TYPEBITS> Handlers {};

  ///< Default fill handlers with BasicHandler.
  for (auto& Handler : Handlers) {
    Handler = FEX::HLE::x32::BasicHandler::BasicHandler;
  }

#define _BASIC_META(x) Handlers[_IOC_TYPE(x)] = FEX::HLE::x32::V4l2::V4l2Handler;
#define _BASIC_META_VAR(x, args...) Handlers[_IOC_TYPE(x(args))] = FEX::HLE::x32::V4l2::V4l2Handler;
#define _CUSTOM_META(name, ioctl_num) Handlers[_IOC_TYPE(FEX_##name)] = FEX::HLE::x32::V4l2::V4l2Handler;
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) Handlers[_IOC_TYPE(FEX_##name)] = FEX::HLE::x32::V4l2::V4l2Handler;
  // V4L2
#include "LinuxSyscalls/x32/Ioctl/v4l2.inl"

#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET

#define _BASIC_META(x) Handlers[_IOC_TYPE(x)] = FEX::HLE::x32::DRM::Handler;
#define _BASIC_META_VAR(x, args...) Handlers[_IOC_TYPE(x(args))] = FEX::HLE::x32::DRM::Handler;
#define _CUSTOM_META(name, ioctl_num) Handlers[_IOC_TYPE(FEX_##name)] = FEX::HLE::x32::DRM::Handler;
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset) Handlers[_IOC_TYPE(FEX_##name)] = FEX::HLE::x32::DRM::Handler;
  // DRM
#include "LinuxSyscalls/x32/Ioctl/drm.inl"

#include "LinuxSyscalls/x32/Ioctl/amdgpu_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/msm_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/i915_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/lima_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/panfrost_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/nouveau_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/radeon_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/vc4_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/v3d_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/virtio_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/panthor_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/pvr_drm.inl"
#include "LinuxSyscalls/x32/Ioctl/xe_drm.inl"

#undef _BASIC_META
#undef _BASIC_META_VAR
#undef _CUSTOM_META
#undef _CUSTOM_META_OFFSET

  return Handlers;
}();

uint32_t ioctl32(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t request, uint32_t args) {
  return Handlers[_IOC_TYPE(request)](Frame, fd, request, args);
}

void CheckAndAddFDDuplication(FEXCore::Core::CpuStateFrame* Frame, int fd, int NewFD) {
  DRM::CheckAndAddFDDuplication(Frame, fd, NewFD);
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/IoctlEmulation.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/fextl/map.h>

#include <cstdint>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
class DRMLRUCacheFDCache final {
public:
  using HandlerType = uint32_t (*)(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args);
  DRMLRUCacheFDCache();
  void SetFDHandler(uint32_t FD, HandlerType Handler);
  void DuplicateFD(int fd, int NewFD);
  HandlerType FindHandler(int32_t FD);
  uint32_t AddAndRunMapHandler(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t cmd, uint32_t args);

protected:
  constexpr static size_t LRUSize = 3;
  void AddToFront(int32_t FD, HandlerType Handler);

  struct LRUObject {
    int32_t FD;
    HandlerType Handler;
  };
  // With four elements total (3 + 1) then this is a single cacheline in size
  LRUObject LRUCache[LRUSize + 1];

  fextl::map<int32_t, HandlerType> FDToHandler;
};

uint32_t ioctl32(FEXCore::Core::CpuStateFrame* Frame, int fd, uint32_t request, uint32_t args);
void CheckAndAddFDDuplication(FEXCore::Core::CpuStateFrame* Frame, int fd, int NewFD);
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Ioctls.inl
================================================
IOCTL(DRM_IOCTL_VERSION, FEX::HLE::x32::DRM::Handler)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Memory.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/MathUtils.h>

#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include <system_error>
#include <filesystem>

namespace FEX::HLE::x32 {

void RegisterMemory(FEX::HLE::SyscallHandler* Handler) {
  struct old_mmap_struct {
    uint32_t addr;
    uint32_t len;
    uint32_t prot;
    uint32_t flags;
    uint32_t fd;
    uint32_t offset;
  };
  REGISTER_SYSCALL_IMPL_X32(mmap, [](FEXCore::Core::CpuStateFrame* Frame, const old_mmap_struct* arg) -> uint64_t {
    return reinterpret_cast<uint64_t>(FEX::HLE::_SyscallHandler->GuestMmap(false, Frame->Thread, reinterpret_cast<void*>(arg->addr),
                                                                           arg->len, arg->prot, arg->flags, arg->fd, arg->offset));
  });

  REGISTER_SYSCALL_IMPL_X32(
    mmap2, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t addr, uint32_t length, int prot, int flags, int fd, uint32_t pgoffset) -> uint64_t {
      return reinterpret_cast<uint64_t>(FEX::HLE::_SyscallHandler->GuestMmap(false, Frame->Thread, reinterpret_cast<void*>(addr), length,
                                                                             prot, flags, fd, (uint64_t)pgoffset * 0x1000));
    });

  REGISTER_SYSCALL_IMPL_X32(munmap, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestMunmap(Frame->Thread, addr, length);
  });

  REGISTER_SYSCALL_IMPL_X32(mprotect, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, uint32_t len, int prot) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestMprotect(Frame->Thread, addr, len, prot);
  });

  REGISTER_SYSCALL_IMPL_X32(
    mremap, [](FEXCore::Core::CpuStateFrame* Frame, void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) -> uint64_t {
      return FEX::HLE::_SyscallHandler->GuestMremap(false, Frame->Thread, old_address, old_size, new_size, flags, new_address);
    });

  REGISTER_SYSCALL_IMPL_X32(mlockall, [](FEXCore::Core::CpuStateFrame* Frame, int flags) -> uint64_t {
    uint64_t Result = ::syscall(SYSCALL_DEF(mlock2), reinterpret_cast<void*>(0x1'0000), 0x1'0000'0000ULL - 0x1'0000, flags);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(munlockall, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    uint64_t Result = ::munlock(reinterpret_cast<void*>(0x1'0000), 0x1'0000'0000ULL - 0x1'0000);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(shmat, [](FEXCore::Core::CpuStateFrame* Frame, int shmid, const void* shmaddr, int shmflg) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestShmat(false, Frame->Thread, shmid, shmaddr, shmflg);
  });

  REGISTER_SYSCALL_IMPL_X32(shmdt, [](FEXCore::Core::CpuStateFrame* Frame, const void* shmaddr) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestShmdt(false, Frame->Thread, shmaddr);
  });
}

} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Msg.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <stdint.h>
#include <syscall.h>
#include <time.h>
#include <unistd.h>

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::mq_attr32>, "%lx")
ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::sigevent32>, "%lx")

namespace FEX::HLE::x32 {
void RegisterMsg(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(mq_timedsend,
                            [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const char* msg_ptr, size_t msg_len,
                               unsigned int msg_prio, const struct timespec32* abs_timeout) -> uint64_t {
                              struct timespec tp64 {};
                              struct timespec* timed_ptr {};
                              if (abs_timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(abs_timeout, sizeof(*abs_timeout));
                                tp64 = *abs_timeout;
                                timed_ptr = &tp64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedsend), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(mq_timedreceive,
                            [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, char* msg_ptr, size_t msg_len,
                               unsigned int* msg_prio, const struct timespec32* abs_timeout) -> uint64_t {
                              struct timespec tp64 {};
                              struct timespec* timed_ptr {};
                              if (abs_timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(abs_timeout, sizeof(*abs_timeout));
                                tp64 = *abs_timeout;
                                timed_ptr = &tp64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(mq_timedreceive), mqdes, msg_ptr, msg_len, msg_prio, timed_ptr);
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    mq_open, [](FEXCore::Core::CpuStateFrame* Frame, const char* name, int oflag, mode_t mode, compat_ptr<FEX::HLE::x32::mq_attr32> attr) -> uint64_t {
      mq_attr HostAttr {};
      mq_attr* HostAttr_p {};
      if ((oflag & O_CREAT) && attr) {
        FaultSafeUserMemAccess::VerifyIsReadable(attr, sizeof(*attr));
        // attr is optional unless O_CREAT is set
        // Then attr can be valid or nullptr
        HostAttr = *attr;
        HostAttr_p = &HostAttr;
      }
      uint64_t Result = ::syscall(SYSCALL_DEF(mq_open), name, oflag, mode, HostAttr_p);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    mq_notify, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, const compat_ptr<FEX::HLE::x32::sigevent32> sevp) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(sevp, sizeof(*sevp));
      sigevent Host = *sevp;
      uint64_t Result = ::syscall(SYSCALL_DEF(mq_notify), mqdes, &Host);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(mq_getsetattr,
                            [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::mqd_t mqdes, compat_ptr<FEX::HLE::x32::mq_attr32> newattr,
                               compat_ptr<FEX::HLE::x32::mq_attr32> oldattr) -> uint64_t {
                              mq_attr HostNew {};
                              mq_attr* HostNew_p {};

                              mq_attr HostOld {};
                              mq_attr* HostOld_p {};

                              if (newattr) {
                                FaultSafeUserMemAccess::VerifyIsReadable(newattr, sizeof(*newattr));
                                HostNew = *newattr;
                                HostNew_p = &HostNew;
                              }

                              if (oldattr) {
                                HostOld_p = &HostOld;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(mq_getsetattr), mqdes, HostNew_p, HostOld_p);

                              if (Result != 1 && oldattr) {
                                FaultSafeUserMemAccess::VerifyIsWritable(oldattr, sizeof(*oldattr));
                                *oldattr = HostOld;
                              }

                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/NotImplemented.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/x32/Syscalls.h"
#include <FEXCore/Utils/LogManager.h>

#include <errno.h>
#include <stdint.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
#define REGISTER_SYSCALL_NOT_IMPL_X32(name)                                             \
  REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \
    LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name);                      \
    return -ENOSYS;                                                                     \
  });
#define REGISTER_SYSCALL_NO_PERM_X32(name) \
  REGISTER_SYSCALL_IMPL_X32(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; });

// these are removed/not implemented in the linux kernel we present
void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_NOT_IMPL_X32(break);
  REGISTER_SYSCALL_NOT_IMPL_X32(stty);
  REGISTER_SYSCALL_NOT_IMPL_X32(gtty);
  REGISTER_SYSCALL_NOT_IMPL_X32(prof);
  REGISTER_SYSCALL_NOT_IMPL_X32(ftime);
  REGISTER_SYSCALL_NOT_IMPL_X32(mpx);
  REGISTER_SYSCALL_NOT_IMPL_X32(lock);
  REGISTER_SYSCALL_NOT_IMPL_X32(ulimit);
  REGISTER_SYSCALL_NOT_IMPL_X32(profil);
  REGISTER_SYSCALL_NOT_IMPL_X32(idle);

  REGISTER_SYSCALL_NO_PERM_X32(stime);
  REGISTER_SYSCALL_NO_PERM_X32(bdflush);
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Sched.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include <stdint.h>
#include <sched.h>
#include <time.h>
#include <unistd.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
void RegisterSched(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(sched_rr_get_interval, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, struct timespec32* tp) -> uint64_t {
    struct timespec tp64 {};
    uint64_t Result = ::sched_rr_get_interval(pid, tp ? &tp64 : nullptr);
    if (tp) {
      FaultSafeUserMemAccess::VerifyIsWritable(tp, sizeof(*tp));
      *tp = tp64;
    }
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Semaphore.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <cstdint>
#include <errno.h>
#include <limits>
#include <string.h>
#include <sys/msg.h>
#include <sys/shm.h>
#include <time.h>
#include <type_traits>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
// Define the IPC ops
enum IPCOp {
  OP_SEMOP = 1,
  OP_SEMGET = 2,
  OP_SEMCTL = 3,
  OP_SEMTIMEDOP = 4,
  OP_MSGSND = 11,
  OP_MSGRCV = 12,
  OP_MSGGET = 13,
  OP_MSGCTL = 14,
  OP_SHMAT = 21,
  OP_SHMDT = 22,
  OP_SHMGET = 23,
  OP_SHMCTL = 24,
};

struct msgbuf_32 {
  compat_long_t mtype;
  char mtext[1];
};

union semun_32 {
  int32_t val;                          // Value for SETVAL
  compat_ptr<semid_ds_32> buf32;        // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  compat_ptr<semid_ds_64> buf64;        // struct semid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  uint32_t array;                       // uint16_t array for GETALL, SETALL
  compat_ptr<struct fex_seminfo> __buf; // struct seminfo * - Buffer for IPC_INFO
};

union msgun_32 {
  int32_t val;                      // Value for SETVAL
  compat_ptr<msqid_ds_32> buf32;    // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  compat_ptr<msqid_ds_64> buf64;    // struct msgid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  uint32_t array;                   // uint16_t array for GETALL, SETALL
  compat_ptr<struct msginfo> __buf; // struct msginfo * - Buffer for IPC_INFO
};

union shmun_32 {
  int32_t val;                           // Value for SETVAL
  compat_ptr<shmid_ds_32> buf32;         // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  compat_ptr<shmid_ds_64> buf64;         // struct shmid_ds* - Buffer ptr for IPC_STAT, IPC_SET
  uint32_t array;                        // uint16_t array for GETALL, SETALL
  compat_ptr<struct shminfo_32> __buf32; // struct shminfo * - Buffer for IPC_INFO
  compat_ptr<struct shminfo_64> __buf64; // struct shminfo * - Buffer for IPC_INFO

  compat_ptr<struct shm_info_32> __buf_info_32; // struct shm_info * - Buffer for SHM_INFO
};

union semun {
  int val;                   /* value for SETVAL */
  struct semid_ds_32* buf;   /* buffer for IPC_STAT & IPC_SET */
  unsigned short* array;     /* array for GETALL & SETALL */
  struct fex_seminfo* __buf; /* buffer for IPC_INFO */
  void* __pad;
};

uint64_t _ipc(FEXCore::Core::CpuStateFrame* Frame, uint32_t call, uint32_t first, uint32_t second, uint32_t third, uint32_t ptr, uint32_t fifth) {
  uint64_t Result {};

  const int Version = call >> 16;
  call &= 0xffff;

  switch (static_cast<IPCOp>(call)) {
  case OP_SEMOP: {
    Result = ::syscall(SYSCALL_DEF(semop), first, reinterpret_cast<struct sembuf*>(ptr), second);
    break;
  }
  case OP_SEMGET: {
    Result = ::syscall(SYSCALL_DEF(semget), first, second, third);
    break;
  }
  case OP_SEMCTL: {
    uint32_t semid = first;
    uint32_t semnum = second;
    // Upper 16bits used for a different flag?
    int32_t cmd = third & 0xFF;
    auto_compat_ptr<semun_32> semun(ptr);
    bool IPC64 = third & 0x100;
    switch (cmd) {
    case IPC_SET: {
      struct semid64_ds buf {};
      if (IPC64) {
        FaultSafeUserMemAccess::VerifyIsReadable(semun->buf64, sizeof(*semun->buf64));
        buf = *semun->buf64;
      } else {
        FaultSafeUserMemAccess::VerifyIsReadable(semun->buf32, sizeof(*semun->buf32));
        buf = *semun->buf32;
      }
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf64, sizeof(*semun->buf64));
          *semun->buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf32, sizeof(*semun->buf32));
          *semun->buf32 = buf;
        }
      }
      break;
    }
    case SEM_STAT:
    case SEM_STAT_ANY:
    case IPC_STAT: {
      struct semid64_ds buf {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf64, sizeof(*semun->buf64));
          *semun->buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf32, sizeof(*semun->buf32));
          *semun->buf32 = buf;
        }
      }
      break;
    }
    case SEM_INFO:
    case IPC_INFO: {
      struct fex_seminfo si {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(semun->__buf, sizeof(*semun->__buf));
        memcpy(semun->__buf, &si, sizeof(si));
      }
      break;
    }
    case GETALL:
    case SETALL: {
      // ptr is just a int32_t* in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array);
      break;
    }
    case SETVAL: {
      // ptr is just a int32_t in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val);
      break;
    }
    case IPC_RMID:
    case GETPID:
    case GETNCNT:
    case GETZCNT:
    case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd); break;
    default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL;
    }
    break;
  }
  case OP_SEMTIMEDOP: {
    timespec32* timeout = reinterpret_cast<timespec32*>(fifth);
    struct timespec tp64 {};
    struct timespec* timed_ptr {};
    if (timeout) {
      FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
      tp64 = *timeout;
      timed_ptr = &tp64;
    }

    Result = ::syscall(SYSCALL_DEF(semtimedop), first, reinterpret_cast<struct sembuf*>(ptr), second, timed_ptr);
    break;
  }
  case OP_MSGSND: {
    // Requires a temporary buffer
    fextl::vector<uint8_t> Tmp(second + sizeof(size_t));
    struct msgbuf* TmpMsg = reinterpret_cast<struct msgbuf*>(Tmp.data());
    msgbuf_32* src = reinterpret_cast<msgbuf_32*>(ptr);
    FaultSafeUserMemAccess::VerifyIsReadable(src, sizeof(*src));
    FaultSafeUserMemAccess::VerifyIsReadable(src->mtext, second);
    TmpMsg->mtype = src->mtype;
    memcpy(TmpMsg->mtext, src->mtext, second);

    Result = ::syscall(SYSCALL_DEF(msgsnd), first, TmpMsg, second, third);
    break;
  }
  case OP_MSGRCV: {
    fextl::vector<uint8_t> Tmp(second + sizeof(size_t));
    struct msgbuf* TmpMsg = reinterpret_cast<struct msgbuf*>(Tmp.data());

    if (Version != 0) {
      Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, fifth, third);
      if (Result != -1) {
        msgbuf_32* src = reinterpret_cast<msgbuf_32*>(ptr);
        FaultSafeUserMemAccess::VerifyIsWritable(src, sizeof(*src));
        FaultSafeUserMemAccess::VerifyIsWritable(src->mtext, Result);
        src->mtype = TmpMsg->mtype;
        memcpy(src->mtext, TmpMsg->mtext, Result);
      }

    } else {
      struct compat_ipc_kludge {
        compat_uptr_t msgp;
        compat_long_t msgtyp;
      };
      compat_ipc_kludge* ipck = reinterpret_cast<compat_ipc_kludge*>(ptr);
      Result = ::syscall(SYSCALL_DEF(msgrcv), first, TmpMsg, second, ipck->msgtyp, third);
      if (Result != -1) {
        msgbuf_32* src = reinterpret_cast<msgbuf_32*>(ipck->msgp);
        FaultSafeUserMemAccess::VerifyIsWritable(src, sizeof(*src));
        FaultSafeUserMemAccess::VerifyIsWritable(src->mtext, Result);
        ipck->msgtyp = TmpMsg->mtype;
        memcpy(src->mtext, TmpMsg->mtext, Result);
      }
    }

    break;
  }
  case OP_MSGGET: {
    Result = ::syscall(SYSCALL_DEF(msgget), first, second);
    break;
  }
  case OP_MSGCTL: {
    uint32_t msqid = first;
    int32_t cmd = second & 0xFF;
    msgun_32 msgun {};
    msgun.val = ptr;
    bool IPC64 = second & 0x100;
    switch (cmd) {
    case IPC_SET: {
      struct msqid64_ds buf {};
      if (IPC64) {
        FaultSafeUserMemAccess::VerifyIsReadable(msgun.buf64, sizeof(*msgun.buf64));
        buf = *msgun.buf64;
      } else {
        FaultSafeUserMemAccess::VerifyIsReadable(msgun.buf32, sizeof(*msgun.buf32));
        buf = *msgun.buf32;
      }
      Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf);
      break;
    }
    case MSG_STAT:
    case MSG_STAT_ANY:
    case IPC_STAT: {
      struct msqid64_ds buf {};
      Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(msgun.buf64, sizeof(*msgun.buf64));
          *msgun.buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(msgun.buf32, sizeof(*msgun.buf32));
          *msgun.buf32 = buf;
        }
      }
      break;
    }
    case MSG_INFO:
    case IPC_INFO: {
      struct msginfo mi {};
      Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, reinterpret_cast<struct msqid_ds*>(&mi));
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(msgun.__buf, sizeof(mi));
        memcpy(msgun.__buf, &mi, sizeof(mi));
      }
      break;
    }
    case IPC_RMID: Result = ::syscall(SYSCALL_DEF(msgctl), msqid, cmd, nullptr); break;
    default: LOGMAN_MSG_A_FMT("Unhandled msgctl cmd: {}", cmd); return -EINVAL;
    }
    break;
  }
  case OP_SHMAT: {
    if (Version == 1) {
      // shmat explicitly doesn't support version 1.
      return -EINVAL;
    }
    auto Result = FEX::HLE::_SyscallHandler->GuestShmat(false, Frame->Thread, first, reinterpret_cast<const void*>(ptr), second);
    if (!FEX::HLE::HasSyscallError(Result)) {
      *reinterpret_cast<uint32_t*>(third) = Result;
    }
    return Result;
  }
  case OP_SHMDT: {
    return FEX::HLE::_SyscallHandler->GuestShmdt(false, Frame->Thread, reinterpret_cast<const void*>(ptr));
  }
  case OP_SHMGET: {
    Result = ::shmget(first, second, third);
    break;
  }
  case OP_SHMCTL: {
    int32_t shmid = first;
    int32_t shmcmd = second;
    int32_t cmd = shmcmd & 0xFF;
    bool IPC64 = shmcmd & 0x100;
    shmun_32 shmun {};
    shmun.val = reinterpret_cast<uint32_t>(ptr);

    switch (cmd) {
    case IPC_SET: {
      struct shmid64_ds buf {};
      if (IPC64) {
        FaultSafeUserMemAccess::VerifyIsReadable(shmun.buf64, sizeof(*shmun.buf64));
        buf = *shmun.buf64;
      } else {
        FaultSafeUserMemAccess::VerifyIsReadable(shmun.buf32, sizeof(*shmun.buf32));
        buf = *shmun.buf32;
      }
      Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, &buf);
      // IPC_SET sets the internal data structure that the kernel uses
      // No need to writeback
      break;
    }
    case SHM_STAT:
    case SHM_STAT_ANY:
    case IPC_STAT: {
      struct shmid64_ds buf {};
      Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(shmun.buf64, sizeof(*shmun.buf64));
          *shmun.buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(shmun.buf32, sizeof(*shmun.buf32));
          *shmun.buf32 = buf;
        }
      }
      break;
    }
    case IPC_INFO: {
      struct shminfo si {};
      Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, reinterpret_cast<struct shmid_ds*>(&si));
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(shmun.__buf64, sizeof(*shmun.__buf64));
          *shmun.__buf64 = si;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(shmun.__buf32, sizeof(*shmun.__buf32));
          *shmun.__buf32 = si;
        }
      }
      break;
    }
    case SHM_INFO: {
      struct shm_info si {};
      Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, reinterpret_cast<struct shmid_ds*>(&si));
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(shmun.__buf_info_32, sizeof(*shmun.__buf_info_32));
        // SHM_INFO doesn't follow IPC64 behaviour
        *shmun.__buf_info_32 = si;
      }
      break;
    }
    case SHM_LOCK: Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, nullptr); break;
    case SHM_UNLOCK: Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, nullptr); break;
    case IPC_RMID: Result = ::syscall(SYSCALL_DEF(shmctl), shmid, cmd, nullptr); break;

    default: LOGMAN_MSG_A_FMT("Unhandled shmctl cmd: {}", cmd); return -EINVAL;
    }
    break;
  }

  default: return -ENOSYS;
  }
  SYSCALL_ERRNO();
}
void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(ipc, _ipc);

  REGISTER_SYSCALL_IMPL_X32(semctl, [](FEXCore::Core::CpuStateFrame* Frame, int semid, int semnum, int cmd, semun_32* semun) -> uint64_t {
    uint64_t Result {};
    bool IPC64 = cmd & 0x100;

    switch (cmd) {
    case IPC_SET: {
      struct semid64_ds buf {};
      if (IPC64) {
        FaultSafeUserMemAccess::VerifyIsReadable(semun->buf64, sizeof(*semun->buf64));
        buf = *semun->buf64;
      } else {
        FaultSafeUserMemAccess::VerifyIsReadable(semun->buf32, sizeof(*semun->buf32));
        buf = *semun->buf32;
      }
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf64, sizeof(*semun->buf64));
          *semun->buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf32, sizeof(*semun->buf32));
          *semun->buf32 = buf;
        }
      }
      break;
    }
    case SEM_STAT:
    case SEM_STAT_ANY:
    case IPC_STAT: {
      struct semid64_ds buf {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        if (IPC64) {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf64, sizeof(*semun->buf64));
          *semun->buf64 = buf;
        } else {
          FaultSafeUserMemAccess::VerifyIsWritable(semun->buf32, sizeof(*semun->buf32));
          *semun->buf32 = buf;
        }
      }
      break;
    }
    case SEM_INFO:
    case IPC_INFO: {
      struct fex_seminfo si {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(semun->__buf, sizeof(*semun->__buf));
        memcpy(semun->__buf, &si, sizeof(si));
      }
      break;
    }
    case GETALL:
    case SETALL: {
      // ptr is just a int32_t* in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->array);
      break;
    }
    case SETVAL: {
      // ptr is just a int32_t in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun->val);
      break;
    }
    case IPC_RMID:
    case GETPID:
    case GETNCNT:
    case GETZCNT:
    case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); break;
    default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL;
    }
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Signals.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "ArchHelpers/UContext.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include <FEXCore/Core/SignalDelegator.h>
#include <errno.h>
#include <signal.h>
#include <stdint.h>
#include <sys/syscall.h>
#include <unistd.h>

#include <time.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEXCore::x86::siginfo_t>, "%lx")

namespace FEX::HLE::x32 {
void CopySigInfo(FEXCore::x86::siginfo_t* Info, const siginfo_t& Host) {
  // Copy the basic things first
  Info->si_signo = Host.si_signo;
  Info->si_errno = Host.si_errno;
  Info->si_code = Host.si_code;

  // Check si_code to determine how we need to interpret this
  if (Info->si_code == SI_TIMER) {
    // SI_TIMER means pid, uid, value
    Info->_sifields._timer.tid = Host.si_timerid;
    Info->_sifields._timer.overrun = Host.si_overrun;
    Info->_sifields._timer.sigval.sival_int = Host.si_value.sival_int;
  } else {
    // Now we need to copy over the more complex things
    switch (Info->si_signo) {
    case SIGSEGV:
    case SIGBUS:
      // This is the address trying to be accessed, not the RIP
      Info->_sifields._sigfault.addr = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(Host.si_addr));
      break;
    case SIGFPE:
    case SIGILL:
      // Can't really give a real result here. This is the RIP causing a sigill or sigfpe
      // Claim at RIP 0 for now
      Info->_sifields._sigfault.addr = 0;
      break;
    case SIGCHLD:
      Info->_sifields._sigchld.pid = Host.si_pid;
      Info->_sifields._sigchld.uid = Host.si_uid;
      Info->_sifields._sigchld.status = Host.si_status;
      Info->_sifields._sigchld.utime = Host.si_utime;
      Info->_sifields._sigchld.stime = Host.si_stime;
      break;
    case SIGALRM:
    case SIGVTALRM:
      Info->_sifields._timer.tid = Host.si_timerid;
      Info->_sifields._timer.overrun = Host.si_overrun;
      Info->_sifields._timer.sigval.sival_int = Host.si_int;
      break;
    default: LogMan::Msg::EFmt("Unhandled siginfo_t for sigtimedwait: {}", Info->si_signo); break;
    }
  }
}

void RegisterSignals(FEX::HLE::SyscallHandler* Handler) {

  // Only gets the lower 32-bits of the signal mask
  REGISTER_SYSCALL_IMPL_X32(sgetmask, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    uint64_t Set {};
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), 0, nullptr, &Set);
    return Set & ~0U;
  });

  // Only controls the lower 32-bits of the signal mask
  // Blocks the upper 32-bits
  REGISTER_SYSCALL_IMPL_X32(ssetmask, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t New) -> uint64_t {
    uint64_t Set {};
    uint64_t NewSet = (~0ULL << 32) | New;
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigProcMask(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame),
                                                                      SIG_SETMASK, &NewSet, &Set);
    return Set & ~0U;
  });

  // Only masks the lower 32-bits of the signal mask
  // The upper 32-bits are still active (unmasked) and can signal the program
  REGISTER_SYSCALL_IMPL_X32(sigsuspend, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t Mask) -> uint64_t {
    uint64_t Mask64 = Mask;
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigSuspend(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), &Mask64, 8);
  });

  REGISTER_SYSCALL_IMPL_X32(sigpending, [](FEXCore::Core::CpuStateFrame* Frame, compat_old_sigset_t* set) -> uint64_t {
    uint64_t HostSet {};
    uint64_t Result =
      FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigPending(FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), &HostSet, 8);
    if (Result == 0) {
      // This old interface only returns the lower signals
      FaultSafeUserMemAccess::VerifyIsWritable(set, sizeof(*set));
      *set = HostSet & ~0U;
    }
    return Result;
  });

  REGISTER_SYSCALL_IMPL_X32(signal, [](FEXCore::Core::CpuStateFrame* Frame, int signum, uint32_t handler) -> uint64_t {
    GuestSigAction newact {};
    GuestSigAction oldact {};
    newact.sigaction_handler.handler = reinterpret_cast<decltype(newact.sigaction_handler.handler)>(handler);
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, &newact, &oldact);
    return static_cast<uint32_t>(reinterpret_cast<uint64_t>(oldact.sigaction_handler.handler));
  });

  REGISTER_SYSCALL_IMPL_X32(
    sigaction, [](FEXCore::Core::CpuStateFrame* Frame, int signum, const OldGuestSigAction_32* act, OldGuestSigAction_32* oldact) -> uint64_t {
      GuestSigAction* act64_p {};
      GuestSigAction* old64_p {};

      GuestSigAction act64 {};
      if (act) {
        FaultSafeUserMemAccess::VerifyIsReadable(act, sizeof(*act));
        act64 = *act;
        act64_p = &act64;
      }
      GuestSigAction old64 {};

      if (oldact) {
        old64_p = &old64;
      }

      uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p);
      if (Result == 0 && oldact) {
        FaultSafeUserMemAccess::VerifyIsWritable(oldact, sizeof(*oldact));
        *oldact = old64;
      }

      return Result;
    });

  REGISTER_SYSCALL_IMPL_X32(
    rt_sigaction,
    [](FEXCore::Core::CpuStateFrame* Frame, int signum, const GuestSigAction_32* act, GuestSigAction_32* oldact, size_t sigsetsize) -> uint64_t {
      if (sigsetsize != 8) {
        return -EINVAL;
      }

      GuestSigAction* act64_p {};
      GuestSigAction* old64_p {};

      GuestSigAction act64 {};
      if (act) {
        FaultSafeUserMemAccess::VerifyIsReadable(act, sizeof(*act));
        act64 = *act;
        act64_p = &act64;
      }
      GuestSigAction old64 {};

      if (oldact) {
        old64_p = &old64;
      }

      uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act64_p, old64_p);
      if (Result == 0 && oldact) {
        FaultSafeUserMemAccess::VerifyIsWritable(oldact, sizeof(*oldact));
        *oldact = old64;
      }

      return Result;
    });

  REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait,
                            [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, compat_ptr<FEXCore::x86::siginfo_t> info,
                               const struct timespec32* timeout, size_t sigsetsize) -> uint64_t {
                              struct timespec* timeout_ptr {};
                              struct timespec tp64 {};
                              if (timeout) {
                                FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
                                tp64 = *timeout;
                                timeout_ptr = &tp64;
                              }

                              siginfo_t HostInfo {};
                              uint64_t Result =
                                FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout_ptr, sigsetsize);
                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(info, sizeof(*info));
                                // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t
                                CopySigInfo(info, HostInfo);
                              }
                              return Result;
                            });


  REGISTER_SYSCALL_IMPL_X32(rt_sigtimedwait_time64,
                            [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, compat_ptr<FEXCore::x86::siginfo_t> info,
                               const struct timespec* timeout, size_t sigsetsize) -> uint64_t {
                              siginfo_t HostInfo {};
                              uint64_t Result =
                                FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, &HostInfo, timeout, sigsetsize);
                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(info, sizeof(*info));
                                // We need to translate the 64-bit siginfo_t to 32-bit siginfo_t
                                CopySigInfo(info, HostInfo);
                              }
                              return Result;
                            });

  REGISTER_SYSCALL_IMPL_X32(
    pidfd_send_signal,
    [](FEXCore::Core::CpuStateFrame* Frame, int pidfd, int sig, compat_ptr<FEXCore::x86::siginfo_t> info, unsigned int flags) -> uint64_t {
      siginfo_t* InfoHost_ptr {};
      siginfo_t InfoHost {};
      if (info) {
        FaultSafeUserMemAccess::VerifyIsReadable(info, sizeof(*info));
        InfoHost = *info;
        InfoHost_ptr = &InfoHost;
      }

      uint64_t Result = ::syscall(SYSCALL_DEF(pidfd_send_signal), pidfd, sig, InfoHost_ptr, flags);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    rt_sigqueueinfo, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int sig, compat_ptr<FEXCore::x86::siginfo_t> info) -> uint64_t {
      siginfo_t info64 {};
      siginfo_t* info64_p {};

      if (info) {
        FaultSafeUserMemAccess::VerifyIsReadable(info, sizeof(*info));
        info64 = *info;
        info64_p = &info64;
      }

      uint64_t Result = ::syscall(SYSCALL_DEF(rt_sigqueueinfo), pid, sig, info64_p);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    rt_tgsigqueueinfo, [](FEXCore::Core::CpuStateFrame* Frame, pid_t tgid, pid_t tid, int sig, compat_ptr<FEXCore::x86::siginfo_t> info) -> uint64_t {
      siginfo_t info64 {};
      siginfo_t* info64_p {};

      if (info) {
        FaultSafeUserMemAccess::VerifyIsReadable(info, sizeof(*info));
        info64 = *info;
        info64_p = &info64;
      }

      uint64_t Result = ::syscall(SYSCALL_DEF(rt_tgsigqueueinfo), tgid, tid, sig, info64_p);
      SYSCALL_ERRNO();
    });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Socket.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/vector.h>

#include <alloca.h>
#include <cstdint>
#include <cstring>
#include <memory>
#include <stddef.h>
#include <sys/socket.h>
#include <unistd.h>

ARG_TO_STR(FEX::HLE::x32::auto_compat_ptr<FEX::HLE::x32::mmsghdr_32>, "%lx")
ARG_TO_STR(FEX::HLE::x32::auto_compat_ptr<void>, "%lx")
ARG_TO_STR(FEX::HLE::x32::auto_compat_ptr<uint32_t>, "%lx")

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {

// Some sockopt defines for older build environments
#ifndef SO_RCVTIMEO_OLD
#define SO_RCVTIMEO_OLD 20
#endif
#ifndef SO_SNDTIMEO_OLD
#define SO_SNDTIMEO_OLD 21
#endif
#ifndef SO_TIMESTAMP_OLD
#define SO_TIMESTAMP_OLD 29
#endif
#ifndef SO_TIMESTAMPNS_OLD
#define SO_TIMESTAMPNS_OLD 35
#endif
#ifndef SO_TIMESTAMPING_OLD
#define SO_TIMESTAMPING_OLD 37
#endif
#ifndef SO_MEMINFO
#define SO_MEMINFO 55
#endif
#ifndef SO_INCOMING_NAPI_ID
#define SO_INCOMING_NAPI_ID 56
#endif
#ifndef SO_PEERGROUPS
#define SO_PEERGROUPS 59
#endif
#ifndef SO_ZEROCOPY
#define SO_ZEROCOPY 60
#endif
#ifndef SO_TXTIME
#define SO_TXTIME 61
#endif
#ifndef SO_BINDTOIFINDEX
#define SO_BINDTOIFINDEX 62
#endif
#ifndef SO_TIMESTAMP_NEW
#define SO_TIMESTAMP_NEW 63
#endif
#ifndef SO_TIMESTAMPNS_NEW
#define SO_TIMESTAMPNS_NEW 64
#endif
#ifndef SO_TIMESTAMPING_NEW
#define SO_TIMESTAMPING_NEW 65
#endif
#ifndef SO_RCVTIMEO_NEW
#define SO_RCVTIMEO_NEW 66
#endif
#ifndef SO_SNDTIMEO_NEW
#define SO_SNDTIMEO_NEW 67
#endif
#ifndef SO_DETACH_REUSEPORT_BPF
#define SO_DETACH_REUSEPORT_BPF 68
#endif
#ifndef SO_PREFER_BUSY_POLL
#define SO_PREFER_BUSY_POLL 69
#endif
#ifndef SO_BUSY_POLL_BUDGET
#define SO_BUSY_POLL_BUDGET 70
#endif
#ifndef SO_NETNS_COOKIE
#define SO_NETNS_COOKIE 71
#endif
#ifndef SO_BUF_LOCK
#define SO_BUF_LOCK 72
#endif
#ifndef SO_RESERVE_MEM
#define SO_RESERVE_MEM 73
#endif
#ifndef SO_TXREHASH
#define SO_TXREHASH 74
#endif
#ifndef SO_RCVMARK
#define SO_RCVMARK 75
#endif
#ifndef SO_PASSPIDFD
#define SO_PASSPIDFD 76
#endif
#ifndef SO_PEERPIDFD
#define SO_PEERPIDFD 77
#endif

enum SockOp {
  OP_SOCKET = 1,
  OP_BIND = 2,
  OP_CONNECT = 3,
  OP_LISTEN = 4,
  OP_ACCEPT = 5,
  OP_GETSOCKNAME = 6,
  OP_GETPEERNAME = 7,
  OP_SOCKETPAIR = 8,
  OP_SEND = 9,
  OP_RECV = 10,
  OP_SENDTO = 11,
  OP_RECVFROM = 12,
  OP_SHUTDOWN = 13,
  OP_SETSOCKOPT = 14,
  OP_GETSOCKOPT = 15,
  OP_SENDMSG = 16,
  OP_RECVMSG = 17,
  OP_ACCEPT4 = 18,
  OP_RECVMMSG = 19,
  OP_SENDMMSG = 20,
};

static uint64_t SendMsg(int sockfd, const struct msghdr32* msg, int flags) {
  struct msghdr HostHeader {};
  fextl::vector<iovec> Host_iovec(msg->msg_iovlen);
  for (size_t i = 0; i < msg->msg_iovlen; ++i) {
    Host_iovec[i] = msg->msg_iov[i];
  }

  HostHeader.msg_name = msg->msg_name;
  HostHeader.msg_namelen = msg->msg_namelen;

  HostHeader.msg_iov = Host_iovec.data();
  HostHeader.msg_iovlen = msg->msg_iovlen;

  HostHeader.msg_control = alloca(msg->msg_controllen * 2);
  HostHeader.msg_controllen = msg->msg_controllen;

  HostHeader.msg_flags = msg->msg_flags;
  if (HostHeader.msg_controllen) {
    void* CurrentGuestPtr = msg->msg_control;
    struct cmsghdr* CurrentHost = reinterpret_cast<struct cmsghdr*>(HostHeader.msg_control);

    for (cmsghdr32* msghdr_guest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr); CurrentGuestPtr != 0;
         msghdr_guest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr)) {

      CurrentHost->cmsg_level = msghdr_guest->cmsg_level;
      CurrentHost->cmsg_type = msghdr_guest->cmsg_type;

      if (msghdr_guest->cmsg_len) {
        size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32));
        CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease;
        HostHeader.msg_controllen += SizeIncrease;
        memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32));
      }

      // Go to next host
      CurrentHost = CMSG_NXTHDR(&HostHeader, CurrentHost);

      // Go to next msg
      if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) {
        CurrentGuestPtr = nullptr;
      } else {
        CurrentGuestPtr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(CurrentGuestPtr) + msghdr_guest->cmsg_len);
        CurrentGuestPtr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(CurrentGuestPtr) + 3) & ~3ULL);
        if (CurrentGuestPtr >= reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(static_cast<void*>(msg->msg_control)) + msg->msg_controllen)) {
          CurrentGuestPtr = nullptr;
        }
      }
    }
  }

  uint64_t Result = ::sendmsg(sockfd, &HostHeader, flags);
  SYSCALL_ERRNO();
}

static uint64_t RecvMsg(int sockfd, struct msghdr32* msg, int flags) {
  struct msghdr HostHeader {};
  fextl::vector<iovec> Host_iovec(msg->msg_iovlen);
  for (size_t i = 0; i < msg->msg_iovlen; ++i) {
    Host_iovec[i] = msg->msg_iov[i];
  }

  HostHeader.msg_name = msg->msg_name;
  HostHeader.msg_namelen = msg->msg_namelen;

  HostHeader.msg_iov = Host_iovec.data();
  HostHeader.msg_iovlen = msg->msg_iovlen;

  HostHeader.msg_control = alloca(msg->msg_controllen * 2);
  HostHeader.msg_controllen = msg->msg_controllen * 2;

  HostHeader.msg_flags = msg->msg_flags;

  uint64_t Result = ::recvmsg(sockfd, &HostHeader, flags);
  if (Result != -1) {
    for (size_t i = 0; i < msg->msg_iovlen; ++i) {
      msg->msg_iov[i] = Host_iovec[i];
    }

    msg->msg_namelen = HostHeader.msg_namelen;
    msg->msg_controllen = HostHeader.msg_controllen;
    msg->msg_flags = HostHeader.msg_flags;
    if (HostHeader.msg_controllen) {
      // Host and guest cmsg data structures aren't compatible.
      // Copy them over now
      void* CurrentGuestPtr = msg->msg_control;
      for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(&HostHeader); cmsg != nullptr; cmsg = CMSG_NXTHDR(&HostHeader, cmsg)) {
        cmsghdr32* CurrentGuest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr);

        // Copy over the header first
        // cmsg_len needs to be adjusted by the size of the header between host and guest
        // Host is 16 bytes, guest is 12 bytes
        CurrentGuest->cmsg_level = cmsg->cmsg_level;
        CurrentGuest->cmsg_type = cmsg->cmsg_type;

        // Now copy over the data
        if (cmsg->cmsg_len) {
          size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32));
          CurrentGuest->cmsg_len = cmsg->cmsg_len - SizeIncrease;

          // Controllen size also changes
          msg->msg_controllen -= SizeIncrease;

          memcpy(CurrentGuest->cmsg_data, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof(struct cmsghdr));
          CurrentGuestPtr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(CurrentGuestPtr) + CurrentGuest->cmsg_len);
          CurrentGuestPtr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(CurrentGuestPtr) + 3) & ~3ULL);
        }
      }
    }
  }
  SYSCALL_ERRNO();
}

void ConvertHeaderToHost(fextl::vector<iovec>& iovec, struct msghdr* Host, const struct msghdr32* Guest, fextl::vector<uint8_t>& ControlLen,
                         size_t& ControlLenOffset) {
  size_t CurrentIOVecSize = iovec.size();
  iovec.resize(CurrentIOVecSize + Guest->msg_iovlen);
  for (size_t i = 0; i < Guest->msg_iovlen; ++i) {
    iovec[CurrentIOVecSize + i] = Guest->msg_iov[i];
  }

  Host->msg_name = Guest->msg_name;
  Host->msg_namelen = Guest->msg_namelen;

  Host->msg_iov = &iovec[CurrentIOVecSize];
  Host->msg_iovlen = Guest->msg_iovlen;

  Host->msg_control = &ControlLen[ControlLenOffset];
  Host->msg_controllen = Guest->msg_controllen * 2;
  ControlLenOffset += Host->msg_controllen;

  Host->msg_flags = Guest->msg_flags;
}

void ConvertHeaderToGuest(struct msghdr32* Guest, struct msghdr* Host) {
  for (size_t i = 0; i < Guest->msg_iovlen; ++i) {
    Guest->msg_iov[i] = Host->msg_iov[i];
  }

  Guest->msg_namelen = Host->msg_namelen;
  Guest->msg_controllen = Host->msg_controllen;
  Guest->msg_flags = Host->msg_flags;

  if (Host->msg_controllen) {
    // Host and guest cmsg data structures aren't compatible.
    // Copy them over now
    void* CurrentGuestPtr = Guest->msg_control;
    for (struct cmsghdr* cmsg = CMSG_FIRSTHDR(Host); cmsg != nullptr; cmsg = CMSG_NXTHDR(Host, cmsg)) {
      cmsghdr32* CurrentGuest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr);

      // Copy over the header first
      // cmsg_len needs to be adjusted by the size of the header between host and guest
      // Host is 16 bytes, guest is 12 bytes
      CurrentGuest->cmsg_level = cmsg->cmsg_level;
      CurrentGuest->cmsg_type = cmsg->cmsg_type;

      // Now copy over the data
      if (cmsg->cmsg_len) {
        size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32));
        CurrentGuest->cmsg_len = cmsg->cmsg_len - SizeIncrease;

        // Controllen size also changes
        Guest->msg_controllen -= SizeIncrease;

        memcpy(CurrentGuest->cmsg_data, CMSG_DATA(cmsg), cmsg->cmsg_len - sizeof(struct cmsghdr));
        CurrentGuestPtr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(CurrentGuestPtr) + CurrentGuest->cmsg_len);
        CurrentGuestPtr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(CurrentGuestPtr) + 3) & ~3ULL);
      }
    }
  }
}

static uint64_t RecvMMsg(int sockfd, auto_compat_ptr<mmsghdr_32> msgvec, uint32_t vlen, int flags, struct timespec* timeout_ts) {
  fextl::vector<iovec> Host_iovec;
  fextl::vector<struct mmsghdr> HostMHeader(vlen);
  fextl::vector<uint8_t> control_len;

  size_t total_control_len {};
  for (size_t i = 0; i < vlen; ++i) {
    auto Guest = &msgvec[i].msg_hdr;
    total_control_len += Guest->msg_controllen * 2;
  }
  control_len.resize(total_control_len);

  size_t CurrentControlLen {};
  for (size_t i = 0; i < vlen; ++i) {
    ConvertHeaderToHost(Host_iovec, &HostMHeader[i].msg_hdr, &msgvec[i].msg_hdr, control_len, CurrentControlLen);
    HostMHeader[i].msg_len = msgvec[i].msg_len;
  }
  uint64_t Result = ::recvmmsg(sockfd, HostMHeader.data(), vlen, flags, timeout_ts);
  if (Result != -1) {
    for (size_t i = 0; i < Result; ++i) {
      ConvertHeaderToGuest(&msgvec[i].msg_hdr, &HostMHeader[i].msg_hdr);
      msgvec[i].msg_len = HostMHeader[i].msg_len;
    }
  }
  SYSCALL_ERRNO();
}

static uint64_t SendMMsg(int sockfd, auto_compat_ptr<mmsghdr_32> msgvec, uint32_t vlen, int flags) {
  fextl::vector<iovec> Host_iovec;
  fextl::vector<struct mmsghdr> HostMmsg(vlen);

  // Walk the iovec and convert them
  // Calculate controllen at the same time
  size_t Controllen_size {};
  for (size_t i = 0; i < vlen; ++i) {
    msghdr32& guest = msgvec[i].msg_hdr;

    Controllen_size += guest.msg_controllen * 2;
    for (size_t j = 0; j < guest.msg_iovlen; ++j) {
      iovec guest_iov = guest.msg_iov[j];
      Host_iovec.emplace_back(guest_iov);
    }
  }

  fextl::vector<uint8_t> Controllen(Controllen_size);

  size_t current_iov {};
  size_t current_controllen_offset {};
  for (size_t i = 0; i < vlen; ++i) {
    msghdr32& guest = msgvec[i].msg_hdr;
    struct msghdr& msg = HostMmsg[i].msg_hdr;
    msg.msg_name = guest.msg_name;
    msg.msg_namelen = guest.msg_namelen;

    msg.msg_iov = &Host_iovec.at(current_iov);
    msg.msg_iovlen = guest.msg_iovlen;
    current_iov += msg.msg_iovlen;

    if (guest.msg_controllen) {
      msg.msg_control = &Controllen.at(current_controllen_offset);
      current_controllen_offset += guest.msg_controllen * 2;
    }
    msg.msg_controllen = guest.msg_controllen;

    msg.msg_flags = guest.msg_flags;

    if (msg.msg_controllen) {
      void* CurrentGuestPtr = guest.msg_control;
      struct cmsghdr* CurrentHost = reinterpret_cast<struct cmsghdr*>(msg.msg_control);

      for (cmsghdr32* msghdr_guest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr); CurrentGuestPtr != 0;
           msghdr_guest = reinterpret_cast<cmsghdr32*>(CurrentGuestPtr)) {

        CurrentHost->cmsg_level = msghdr_guest->cmsg_level;
        CurrentHost->cmsg_type = msghdr_guest->cmsg_type;

        if (msghdr_guest->cmsg_len) {
          size_t SizeIncrease = (CMSG_LEN(0) - sizeof(cmsghdr32));
          CurrentHost->cmsg_len = msghdr_guest->cmsg_len + SizeIncrease;
          msg.msg_controllen += SizeIncrease;
          memcpy(CMSG_DATA(CurrentHost), msghdr_guest->cmsg_data, msghdr_guest->cmsg_len - sizeof(cmsghdr32));
        }

        // Go to next host
        CurrentHost = CMSG_NXTHDR(&msg, CurrentHost);

        // Go to next msg
        if (msghdr_guest->cmsg_len < sizeof(cmsghdr32)) {
          CurrentGuestPtr = nullptr;
        } else {
          CurrentGuestPtr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(CurrentGuestPtr) + msghdr_guest->cmsg_len);
          CurrentGuestPtr = reinterpret_cast<void*>((reinterpret_cast<uintptr_t>(CurrentGuestPtr) + 3) & ~3ULL);
          if (CurrentGuestPtr >= reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(static_cast<void*>(guest.msg_control)) + guest.msg_controllen)) {
            CurrentGuestPtr = nullptr;
          }
        }
      }
    }

    HostMmsg[i].msg_len = msgvec[i].msg_len;
  }

  uint64_t Result = ::sendmmsg(sockfd, HostMmsg.data(), vlen, flags);

  if (Result != -1) {
    // Update guest msglen
    for (size_t i = 0; i < Result; ++i) {
      msgvec[i].msg_len = HostMmsg[i].msg_len;
    }
  }
  SYSCALL_ERRNO();
}

static uint64_t SetSockOpt(int sockfd, int level, int optname, auto_compat_ptr<void> optval, int optlen) {
  uint64_t Result {};

  if (level == SOL_SOCKET) {
    switch (optname) {
    case SO_ATTACH_FILTER:
    case SO_ATTACH_REUSEPORT_CBPF: {
      struct sock_fprog32 {
        uint16_t len;
        uint32_t filter;
      };
      struct sock_fprog64 {
        uint16_t len;
        uint64_t filter;
      };

      if (optlen != sizeof(sock_fprog32)) {
        return -EINVAL;
      }

      sock_fprog32* prog = reinterpret_cast<sock_fprog32*>(optval.Ptr);
      sock_fprog64 prog64 {};
      prog64.len = prog->len;
      prog64.filter = prog->filter;

      Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, &prog64, sizeof(sock_fprog64));
      break;
    }
    case SO_RCVTIMEO_OLD: {
      // _OLD uses old_timeval32. Needs to be converted
      struct timeval tv64 = *reinterpret_cast<timeval32*>(optval.Ptr);
      Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, SO_RCVTIMEO_NEW, &tv64, sizeof(tv64));
      break;
    }
    case SO_SNDTIMEO_OLD: {
      // _OLD uses old_timeval32. Needs to be converted
      struct timeval tv64 = *reinterpret_cast<timeval32*>(optval.Ptr);
      Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, SO_SNDTIMEO_NEW, &tv64, sizeof(tv64));
      break;
    }
    // Each optname as a reminder which setting has been manually checked
    case SO_DEBUG:
    case SO_REUSEADDR:
    case SO_TYPE:
    case SO_ERROR:
    case SO_DONTROUTE:
    case SO_BROADCAST:
    case SO_SNDBUF:
    case SO_RCVBUF:
    case SO_SNDBUFFORCE:
    case SO_RCVBUFFORCE:
    case SO_KEEPALIVE:
    case SO_OOBINLINE:
    case SO_NO_CHECK:
    case SO_PRIORITY:
    case SO_LINGER:
    case SO_BSDCOMPAT:
    case SO_REUSEPORT:
    /**
     * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc}
     * @{ */
    case SO_PASSCRED:
    case SO_PEERCRED:
    case SO_RCVLOWAT:
    case SO_SNDLOWAT:
    /**  @} */
    case SO_SECURITY_AUTHENTICATION:
    case SO_SECURITY_ENCRYPTION_TRANSPORT:
    case SO_SECURITY_ENCRYPTION_NETWORK:
    case SO_DETACH_FILTER:
    case SO_PEERNAME:
    case SO_TIMESTAMP_OLD: // Returns int32_t boolean
    case SO_ACCEPTCONN:
    case SO_PEERSEC:
    // Gap 32, 33
    case SO_PASSSEC:
    case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean
    case SO_MARK:
    case SO_TIMESTAMPING_OLD: // Returns so_timestamping
    case SO_PROTOCOL:
    case SO_DOMAIN:
    case SO_RXQ_OVFL:
    case SO_WIFI_STATUS:
    case SO_PEEK_OFF:
    case SO_NOFCS:
    case SO_LOCK_FILTER:
    case SO_SELECT_ERR_QUEUE:
    case SO_BUSY_POLL:
    case SO_MAX_PACING_RATE:
    case SO_BPF_EXTENSIONS:
    case SO_INCOMING_CPU:
    case SO_ATTACH_BPF:
    case SO_ATTACH_REUSEPORT_EBPF:
    case SO_CNX_ADVICE:
    // Gap 54 (SCM_TIMESTAMPING_OPT_STATS)
    case SO_MEMINFO:
    case SO_INCOMING_NAPI_ID:
    case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit
    // Gap 58 (SCM_TIMESTAMPING_PKTINFO)
    case SO_PEERGROUPS:
    case SO_ZEROCOPY:
    case SO_TXTIME:
    case SO_BINDTOIFINDEX:
    case SO_TIMESTAMP_NEW:
    case SO_TIMESTAMPNS_NEW:
    case SO_TIMESTAMPING_NEW:
    case SO_RCVTIMEO_NEW:
    case SO_SNDTIMEO_NEW:
    case SO_DETACH_REUSEPORT_BPF:
    case SO_PREFER_BUSY_POLL:
    case SO_BUSY_POLL_BUDGET:
    case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit
    case SO_BUF_LOCK:
    case SO_RESERVE_MEM:
    case SO_TXREHASH:
    case SO_RCVMARK:
    case SO_PASSPIDFD:
    case SO_PEERPIDFD:
    default: Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, reinterpret_cast<const void*>(optval.Ptr), optlen); break;
    }
  } else {
    Result = ::syscall(SYSCALL_DEF(setsockopt), sockfd, level, optname, reinterpret_cast<const void*>(optval.Ptr), optlen);
  }

  SYSCALL_ERRNO();
}

static uint64_t GetSockOpt(int sockfd, int level, int optname, auto_compat_ptr<void> optval, auto_compat_ptr<socklen_t> optlen) {
  uint64_t Result {};
  if (level == SOL_SOCKET) {
    switch (optname) {
    case SO_RCVTIMEO_OLD: {
      // _OLD uses old_timeval32. Needs to be converted
      struct timeval tv64 {};
      Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, SO_RCVTIMEO_NEW, &tv64, sizeof(tv64));
      *reinterpret_cast<timeval32*>(optval.Ptr) = tv64;
      break;
    }
    case SO_SNDTIMEO_OLD: {
      // _OLD uses old_timeval32. Needs to be converted
      struct timeval tv64 {};
      Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, SO_SNDTIMEO_NEW, &tv64, sizeof(tv64));
      *reinterpret_cast<timeval32*>(optval.Ptr) = tv64;
      break;
    }
    // Each optname as a reminder which setting has been manually checked
    case SO_DEBUG:
    case SO_REUSEADDR:
    case SO_TYPE:
    case SO_ERROR:
    case SO_DONTROUTE:
    case SO_BROADCAST:
    case SO_SNDBUF:
    case SO_RCVBUF:
    case SO_SNDBUFFORCE:
    case SO_RCVBUFFORCE:
    case SO_KEEPALIVE:
    case SO_OOBINLINE:
    case SO_NO_CHECK:
    case SO_PRIORITY:
    case SO_LINGER:
    case SO_BSDCOMPAT:
    case SO_REUSEPORT:
    /**
     * @name These end up differing between {x86,arm} and {powerpc, alpha, sparc, mips, parisc}
     * @{ */
    case SO_PASSCRED:
    case SO_PEERCRED:
    case SO_RCVLOWAT:
    case SO_SNDLOWAT:
    /**  @} */
    case SO_SECURITY_AUTHENTICATION:
    case SO_SECURITY_ENCRYPTION_TRANSPORT:
    case SO_SECURITY_ENCRYPTION_NETWORK:
    case SO_ATTACH_FILTER: // Renamed to SO_GET_FILTER on get. Same between 32-bit and 64-bit
    case SO_DETACH_FILTER:
    case SO_PEERNAME:
    case SO_TIMESTAMP_OLD: // Returns int32_t boolean
    case SO_ACCEPTCONN:
    case SO_PEERSEC:
    // Gap 32, 33
    case SO_PASSSEC:
    case SO_TIMESTAMPNS_OLD: // Returns int32_t boolean
    case SO_MARK:
    case SO_TIMESTAMPING_OLD: // Returns so_timestamping
    case SO_PROTOCOL:
    case SO_DOMAIN:
    case SO_RXQ_OVFL:
    case SO_WIFI_STATUS:
    case SO_PEEK_OFF:
    case SO_NOFCS:
    case SO_LOCK_FILTER:
    case SO_SELECT_ERR_QUEUE:
    case SO_BUSY_POLL:
    case SO_MAX_PACING_RATE:
    case SO_BPF_EXTENSIONS:
    case SO_INCOMING_CPU:
    case SO_ATTACH_BPF:
    case SO_ATTACH_REUSEPORT_CBPF: // Doesn't do anything in get
    case SO_ATTACH_REUSEPORT_EBPF:
    case SO_CNX_ADVICE:
    // Gap 54 (SCM_TIMESTAMPING_OPT_STATS)
    case SO_MEMINFO:
    case SO_INCOMING_NAPI_ID:
    case SO_COOKIE: // Cookie always returns 64-bit even on 32-bit
    // Gap 58 (SCM_TIMESTAMPING_PKTINFO)
    case SO_PEERGROUPS:
    case SO_ZEROCOPY:
    case SO_TXTIME:
    case SO_BINDTOIFINDEX:
    case SO_TIMESTAMP_NEW:
    case SO_TIMESTAMPNS_NEW:
    case SO_TIMESTAMPING_NEW:
    case SO_RCVTIMEO_NEW:
    case SO_SNDTIMEO_NEW:
    case SO_DETACH_REUSEPORT_BPF:
    case SO_PREFER_BUSY_POLL:
    case SO_BUSY_POLL_BUDGET:
    case SO_NETNS_COOKIE: // Cookie always returns 64-bit even on 32-bit
    case SO_BUF_LOCK:
    case SO_RESERVE_MEM:
    default: Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen); break;
    }
  } else {
    Result = ::syscall(SYSCALL_DEF(getsockopt), sockfd, level, optname, optval, optlen);
  }
  SYSCALL_ERRNO();
}

void RegisterSocket(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(socketcall, [](FEXCore::Core::CpuStateFrame* Frame, uint32_t call, uint32_t* Arguments) -> uint64_t {
    uint64_t Result {};

    switch (call) {
    case OP_SOCKET: {
      Result = ::socket(Arguments[0], Arguments[1], Arguments[2]);
      break;
    }
    case OP_BIND: {
      Result = ::bind(Arguments[0], reinterpret_cast<const struct sockaddr*>(Arguments[1]), Arguments[2]);
      break;
    }
    case OP_CONNECT: {
      Result = ::connect(Arguments[0], reinterpret_cast<const struct sockaddr*>(Arguments[1]), Arguments[2]);
      break;
    }
    case OP_LISTEN: {
      Result = ::listen(Arguments[0], Arguments[1]);
      break;
    }
    case OP_ACCEPT: {
      Result = ::accept(Arguments[0], reinterpret_cast<struct sockaddr*>(Arguments[1]), reinterpret_cast<socklen_t*>(Arguments[2]));
      break;
    }
    case OP_GETSOCKNAME: {
      Result = ::getsockname(Arguments[0], reinterpret_cast<struct sockaddr*>(Arguments[1]), reinterpret_cast<socklen_t*>(Arguments[2]));
      break;
    }
    case OP_GETPEERNAME: {
      Result = ::getpeername(Arguments[0], reinterpret_cast<struct sockaddr*>(Arguments[1]), reinterpret_cast<socklen_t*>(Arguments[2]));
      break;
    }
    case OP_SOCKETPAIR: {
      Result = ::socketpair(Arguments[0], Arguments[1], Arguments[2], reinterpret_cast<int32_t*>(Arguments[3]));
      break;
    }
    case OP_SEND: {
      Result = ::send(Arguments[0], reinterpret_cast<const void*>(Arguments[1]), Arguments[2], Arguments[3]);
      break;
    }
    case OP_RECV: {
      Result = ::recv(Arguments[0], reinterpret_cast<void*>(Arguments[1]), Arguments[2], Arguments[3]);
      break;
    }
    case OP_SENDTO: {
      Result = ::sendto(Arguments[0], reinterpret_cast<const void*>(Arguments[1]), Arguments[2], Arguments[3],
                        reinterpret_cast<struct sockaddr*>(Arguments[4]), reinterpret_cast<socklen_t>(Arguments[5]));
      break;
    }
    case OP_RECVFROM: {
      Result = ::recvfrom(Arguments[0], reinterpret_cast<void*>(Arguments[1]), Arguments[2], Arguments[3],
                          reinterpret_cast<struct sockaddr*>(Arguments[4]), reinterpret_cast<socklen_t*>(Arguments[5]));
      break;
    }
    case OP_SHUTDOWN: {
      Result = ::shutdown(Arguments[0], Arguments[1]);
      break;
    }
    case OP_SETSOCKOPT: {
      return SetSockOpt(Arguments[0], Arguments[1], Arguments[2], Arguments[3], reinterpret_cast<socklen_t>(Arguments[4]));
      break;
    }
    case OP_GETSOCKOPT: {
      return GetSockOpt(Arguments[0], Arguments[1], Arguments[2], reinterpret_cast<void*>(Arguments[3]),
                        reinterpret_cast<socklen_t*>(Arguments[4]));
      break;
    }
    case OP_SENDMSG: {
      return SendMsg(Arguments[0], reinterpret_cast<const struct msghdr32*>(Arguments[1]), Arguments[2]);
      break;
    }
    case OP_RECVMSG: {
      return RecvMsg(Arguments[0], reinterpret_cast<struct msghdr32*>(Arguments[1]), Arguments[2]);
      break;
    }
    case OP_ACCEPT4: {
      return ::accept4(Arguments[0], reinterpret_cast<struct sockaddr*>(Arguments[1]), reinterpret_cast<socklen_t*>(Arguments[2]), Arguments[3]);
      break;
    }
    case OP_RECVMMSG: {
      timespec32* timeout_ts = reinterpret_cast<timespec32*>(Arguments[4]);
      struct timespec tp64 {};
      struct timespec* timed_ptr {};
      if (timeout_ts) {
        tp64 = *timeout_ts;
        timed_ptr = &tp64;
      }

      uint64_t Result = RecvMMsg(Arguments[0], Arguments[1], Arguments[2], Arguments[3], timed_ptr);

      if (timeout_ts) {
        *timeout_ts = tp64;
      }

      return Result;
      break;
    }
    case OP_SENDMMSG: {
      return SendMMsg(Arguments[0], reinterpret_cast<mmsghdr_32*>(Arguments[1]), Arguments[2], Arguments[3]);
      break;
    }
    default: LOGMAN_MSG_A_FMT("Unsupported socketcall op: {}", call); break;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(sendmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, const struct msghdr32* msg, int flags) -> uint64_t {
    return SendMsg(sockfd, msg, flags);
  });

  REGISTER_SYSCALL_IMPL_X32(sendmmsg,
                            [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, auto_compat_ptr<mmsghdr_32> msgvec, uint32_t vlen,
                               int flags) -> uint64_t { return SendMMsg(sockfd, msgvec, vlen, flags); });

  REGISTER_SYSCALL_IMPL_X32(recvmmsg,
                            [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, auto_compat_ptr<mmsghdr_32> msgvec, uint32_t vlen,
                               int flags, timespec32* timeout_ts) -> uint64_t {
                              struct timespec tp64 {};
                              struct timespec* timed_ptr {};
                              if (timeout_ts) {
                                tp64 = *timeout_ts;
                                timed_ptr = &tp64;
                              }

                              uint64_t Result = RecvMMsg(sockfd, msgvec, vlen, flags, timed_ptr);

                              if (timeout_ts) {
                                *timeout_ts = tp64;
                              }

                              return Result;
                            });

  REGISTER_SYSCALL_IMPL_X32(recvmmsg_time64,
                            [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, auto_compat_ptr<mmsghdr_32> msgvec, uint32_t vlen, int flags,
                               struct timespec* timeout_ts) -> uint64_t { return RecvMMsg(sockfd, msgvec, vlen, flags, timeout_ts); });

  REGISTER_SYSCALL_IMPL_X32(recvmsg, [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, struct msghdr32* msg, int flags) -> uint64_t {
    return RecvMsg(sockfd, msg, flags);
  });

  REGISTER_SYSCALL_IMPL_X32(setsockopt,
                            [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, auto_compat_ptr<void> optval,
                               socklen_t optlen) -> uint64_t { return SetSockOpt(sockfd, level, optname, optval, optlen); });

  REGISTER_SYSCALL_IMPL_X32(getsockopt,
                            [](FEXCore::Core::CpuStateFrame* Frame, int sockfd, int level, int optname, auto_compat_ptr<void> optval,
                               auto_compat_ptr<socklen_t> optlen) -> uint64_t { return GetSockOpt(sockfd, level, optname, optval, optlen); });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Stubs.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include <FEXCore/Utils/LogManager.h>

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"

#include <errno.h>
#include <stdint.h>
#include <sys/types.h>

#define SYSCALL_STUB(name)                         \
  do {                                             \
    ERROR_AND_DIE_FMT("Syscall: " #name " stub!"); \
    return -ENOSYS;                                \
  } while (0)

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
void RegisterStubs(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(modify_ldt, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { SYSCALL_STUB(readdir); });

  REGISTER_SYSCALL_IMPL_X32(readdir, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { SYSCALL_STUB(readdir); });

  REGISTER_SYSCALL_IMPL_X32(vm86old, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -ENOSYS; });

  REGISTER_SYSCALL_IMPL_X32(vm86, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -ENOSYS; });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/IoctlEmulation.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/SyscallsEnum.h"

#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/memory.h>

#include <bitset>
#include <cerrno>
#include <cstdint>
#include <limits>
#include <mutex>
#include <sys/mman.h>
#include <sys/shm.h>
#include <utility>

namespace FEX::HLE::x32 {
void RegisterEpoll(FEX::HLE::SyscallHandler* Handler);
void RegisterFD(FEX::HLE::SyscallHandler* Handler);
void RegisterFS(FEX::HLE::SyscallHandler* Handler);
void RegisterInfo(FEX::HLE::SyscallHandler* Handler);
void RegisterIO(FEX::HLE::SyscallHandler* Handler);
void RegisterMemory(FEX::HLE::SyscallHandler* Handler);
void RegisterMsg(FEX::HLE::SyscallHandler* Handler);
void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler);
void RegisterSched(FEX::HLE::SyscallHandler* Handler);
void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler);
void RegisterSignals(FEX::HLE::SyscallHandler* Handler);
void RegisterSocket(FEX::HLE::SyscallHandler* Handler);
void RegisterStubs(FEX::HLE::SyscallHandler* Handler);
void RegisterThread(FEX::HLE::SyscallHandler* Handler);
void RegisterTime(FEX::HLE::SyscallHandler* Handler);
void RegisterTimer(FEX::HLE::SyscallHandler* Handler);
void RegisterPassthrough(FEX::HLE::SyscallHandler* Handler);

x32SyscallHandler::x32SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation,
                                     FEX::HLE::ThunkHandler* ThunkHandler, fextl::unique_ptr<MemAllocator> Allocator)
  : SyscallHandler {ctx, _SignalDelegation, ThunkHandler}
  , AllocHandler {std::move(Allocator)} {
  OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX32;
  RegisterSyscallHandlers();
}

void x32SyscallHandler::RegisterSyscallHandlers() {
  FEX::HLE::RegisterEpoll(this);
  FEX::HLE::RegisterFD(this);
  FEX::HLE::RegisterFS(this);
  FEX::HLE::RegisterInfo(this);
  FEX::HLE::RegisterIO(this);
  FEX::HLE::RegisterMemory(this);
  FEX::HLE::RegisterSignals(this);
  FEX::HLE::RegisterThread(this);
  FEX::HLE::RegisterTimer(this);
  FEX::HLE::RegisterNotImplemented(this);
  FEX::HLE::RegisterStubs(this);

  // 32bit specific
  FEX::HLE::x32::RegisterEpoll(this);
  FEX::HLE::x32::RegisterFD(this);
  FEX::HLE::x32::RegisterFS(this);
  FEX::HLE::x32::RegisterInfo(this);
  FEX::HLE::x32::RegisterIO(this);
  FEX::HLE::x32::RegisterMemory(this);
  FEX::HLE::x32::RegisterMsg(this);
  FEX::HLE::x32::RegisterNotImplemented(this);
  FEX::HLE::x32::RegisterSched(this);
  FEX::HLE::x32::RegisterSemaphore(this);
  FEX::HLE::x32::RegisterSignals(this);
  FEX::HLE::x32::RegisterSocket(this);
  FEX::HLE::x32::RegisterStubs(this);
  FEX::HLE::x32::RegisterThread(this);
  FEX::HLE::x32::RegisterTime(this);
  FEX::HLE::x32::RegisterTimer(this);
  FEX::HLE::x32::RegisterPassthrough(this);

#if PRINT_MISSING_SYSCALLS
  for (auto& Syscall : SyscallNames) {
    if (Definitions[Syscall.first].Ptr == reinterpret_cast<void*>(&UnimplementedSyscall)) {
      LogMan::Msg::DFmt("Unimplemented syscall: {}: {}", Syscall.first, Syscall.second);
    }
  }
#endif
}

fextl::unique_ptr<FEX::HLE::SyscallHandler> CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation,
                                                          FEX::HLE::ThunkHandler* ThunkHandler, fextl::unique_ptr<MemAllocator> Allocator) {
  return fextl::make_unique<x32SyscallHandler>(ctx, _SignalDelegation, ThunkHandler, std::move(Allocator));
}

} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Syscalls.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#pragma once

#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>

#include "LinuxSyscalls/Syscalls.h"

#include <memory>
#include <stddef.h>
#include <stdint.h>
#include <sys/types.h>

namespace FEXCore {
namespace Context {
  class Context;
}
namespace Core {
  struct CpuStateFrame;
}
} // namespace FEXCore

namespace FEX::HLE {
class SignalDelegator;
class ThunkHandler;
} // namespace FEX::HLE

namespace FEX::HLE::x32 {

class x32SyscallHandler final : public FEX::HLE::SyscallHandler {
public:
  x32SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler,
                    fextl::unique_ptr<MemAllocator> Allocator);

  FEX::HLE::MemAllocator* GetAllocator() {
    return AllocHandler.get();
  }
  FEX::HLE::MemAllocator* Get32BitAllocator() override {
    return GetAllocator();
  }

  void* GuestMmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset) override {
    return FEX::HLE::SyscallHandler::GuestMmap(false, Thread, addr, length, prot, flags, fd, offset);
  }
  uint64_t GuestMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) override {
    return FEX::HLE::SyscallHandler::GuestMunmap(false, Thread, addr, length);
  }

  void RegisterSyscall_32(int SyscallNumber,
#ifdef DEBUG_STRACE
                          const fextl::string& TraceFormatString,
#endif
                          void* SyscallHandler, int ArgumentCount) override {
    auto& Def = Definitions.at(SyscallNumber);
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(Def.Ptr == reinterpret_cast<void*>(&UnimplementedSyscall), "Oops overwriting sysall problem, {}", SyscallNumber);
#endif
    Def.Ptr = SyscallHandler;
    Def.NumArgs = ArgumentCount;
#ifdef DEBUG_STRACE
    Def.StraceFmt = TraceFormatString;
#endif
  }

private:
  void RegisterSyscallHandlers();
  fextl::unique_ptr<MemAllocator> AllocHandler {};
};

fextl::unique_ptr<FEX::HLE::SyscallHandler> CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation,
                                                          FEX::HLE::ThunkHandler* ThunkHandler, fextl::unique_ptr<MemAllocator> Allocator);
//////
// REGISTER_SYSCALL_IMPL implementation
// Given a syscall name + a lambda, and it will generate an strace string, extract number of arguments
// and register it as a syscall handler
//////

// RegisterSyscall base
// Deduces return, args... from the function passed
// Does not work with lambas, because they are objects with operator (), not functions
template<typename R, typename... Args>
void RegisterSyscall(SyscallHandler* Handler, int SyscallNumber, const char* Name, R (*fn)(FEXCore::Core::CpuStateFrame* Frame, Args...)) {
#ifdef DEBUG_STRACE
  auto TraceFormatString = fextl::string(Name) + "(" + CollectArgsFmtString<Args...>() + ") = {}";
#endif
  Handler->RegisterSyscall_32(SyscallNumber,
#ifdef DEBUG_STRACE
                              TraceFormatString,
#endif
                              reinterpret_cast<void*>(fn), sizeof...(Args));
}

// Generic RegisterSyscall for lambdas
// Non-capturing lambdas can be cast to function pointers, but this does not happen on argument matching
// This is some glue logic that will cast a lambda and call the base RegisterSyscall implementation
template<class F>
void RegisterSyscall(SyscallHandler* _Handler, int num, const char* name, F f) {
  RegisterSyscall(_Handler, num, name, +f);
}

} // namespace FEX::HLE::x32

// Registers syscall for 32bit only
#define REGISTER_SYSCALL_IMPL_X32(name, lambda)                                      \
  do {                                                                               \
    FEX::HLE::x32::RegisterSyscall(Handler, x32::SYSCALL_x86_##name, #name, lambda); \
  } while (false)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/SyscallsEnum.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/
#pragma once

namespace FEX::HLE::x32 {
///< Enum containing all 32bit x86 linux syscalls for the guest kernel version
enum Syscalls_x86 {
  SYSCALL_x86_restart_syscall = 0,
  SYSCALL_x86_exit = 1,
  SYSCALL_x86_fork = 2,
  SYSCALL_x86_read = 3,
  SYSCALL_x86_write = 4,
  SYSCALL_x86_open = 5,
  SYSCALL_x86_close = 6,
  SYSCALL_x86_waitpid = 7,
  SYSCALL_x86_creat = 8,
  SYSCALL_x86_link = 9,
  SYSCALL_x86_unlink = 10,
  SYSCALL_x86_execve = 11,
  SYSCALL_x86_chdir = 12,
  SYSCALL_x86_time = 13,
  SYSCALL_x86_mknod = 14,
  SYSCALL_x86_chmod = 15,
  SYSCALL_x86_lchown = 16,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_break = 17,
  SYSCALL_x86_oldstat = 18,
  SYSCALL_x86_lseek = 19,
  SYSCALL_x86_getpid = 20,
  SYSCALL_x86_mount = 21,
  SYSCALL_x86_umount = 22,
  SYSCALL_x86_setuid = 23,
  SYSCALL_x86_getuid = 24,
  SYSCALL_x86_stime = 25,
  SYSCALL_x86_ptrace = 26,
  SYSCALL_x86_alarm = 27,
  SYSCALL_x86_oldfstat = 28,
  SYSCALL_x86_pause = 29,
  SYSCALL_x86_utime = 30,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_stty = 31,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_gtty = 32,
  SYSCALL_x86_access = 33,
  SYSCALL_x86_nice = 34,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_ftime = 35,
  SYSCALL_x86_sync = 36,
  SYSCALL_x86_kill = 37,
  SYSCALL_x86_rename = 38,
  SYSCALL_x86_mkdir = 39,
  SYSCALL_x86_rmdir = 40,
  SYSCALL_x86_dup = 41,
  SYSCALL_x86_pipe = 42,
  SYSCALL_x86_times = 43,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_prof = 44,
  SYSCALL_x86_brk = 45,
  SYSCALL_x86_setgid = 46,
  SYSCALL_x86_getgid = 47,
  SYSCALL_x86_signal = 48,
  SYSCALL_x86_geteuid = 49,
  SYSCALL_x86_getegid = 50,
  SYSCALL_x86_acct = 51,
  SYSCALL_x86_umount2 = 52,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_lock = 53,
  SYSCALL_x86_ioctl = 54,
  SYSCALL_x86_fcntl = 55,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_mpx = 56,
  SYSCALL_x86_setpgid = 57,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_ulimit = 58,
  SYSCALL_x86_oldolduname = 59,
  SYSCALL_x86_umask = 60,
  SYSCALL_x86_chroot = 61,
  SYSCALL_x86_ustat = 62,
  SYSCALL_x86_dup2 = 63,
  SYSCALL_x86_getppid = 64,
  SYSCALL_x86_getpgrp = 65,
  SYSCALL_x86_setsid = 66,
  SYSCALL_x86_sigaction = 67,
  SYSCALL_x86_sgetmask = 68,
  SYSCALL_x86_ssetmask = 69,
  SYSCALL_x86_setreuid = 70,
  SYSCALL_x86_setregid = 71,
  SYSCALL_x86_sigsuspend = 72,
  SYSCALL_x86_sigpending = 73,
  SYSCALL_x86_sethostname = 74,
  SYSCALL_x86_setrlimit = 75,
  SYSCALL_x86_getrlimit = 76,
  SYSCALL_x86_getrusage = 77,
  SYSCALL_x86_gettimeofday = 78,
  SYSCALL_x86_settimeofday = 79,
  SYSCALL_x86_getgroups = 80,
  SYSCALL_x86_setgroups = 81,
  SYSCALL_x86_select = 82,
  SYSCALL_x86_symlink = 83,
  SYSCALL_x86_oldlstat = 84,
  SYSCALL_x86_readlink = 85,
  SYSCALL_x86_uselib = 86,
  SYSCALL_x86_swapon = 87,
  SYSCALL_x86_reboot = 88,
  SYSCALL_x86_readdir = 89,
  SYSCALL_x86_mmap = 90,
  SYSCALL_x86_munmap = 91,
  SYSCALL_x86_truncate = 92,
  SYSCALL_x86_ftruncate = 93,
  SYSCALL_x86_fchmod = 94,
  SYSCALL_x86_fchown = 95,
  SYSCALL_x86_getpriority = 96,
  SYSCALL_x86_setpriority = 97,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_profil = 98,
  SYSCALL_x86_statfs = 99,
  SYSCALL_x86_fstatfs = 100,
  SYSCALL_x86_ioperm = 101,
  SYSCALL_x86_socketcall = 102,
  SYSCALL_x86_syslog = 103,
  SYSCALL_x86_setitimer = 104,
  SYSCALL_x86_getitimer = 105,
  SYSCALL_x86_stat = 106,
  SYSCALL_x86_lstat = 107,
  SYSCALL_x86_fstat = 108,
  SYSCALL_x86_olduname = 109,
  SYSCALL_x86_iopl = 110,
  SYSCALL_x86_vhangup = 111,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_idle = 112,
  SYSCALL_x86_vm86old = 113,
  SYSCALL_x86_wait4 = 114,
  SYSCALL_x86_swapoff = 115,
  SYSCALL_x86_sysinfo = 116,
  SYSCALL_x86_ipc = 117,
  SYSCALL_x86_fsync = 118,
  SYSCALL_x86_sigreturn = 119,
  SYSCALL_x86_clone = 120,
  SYSCALL_x86_setdomainname = 121,
  SYSCALL_x86_uname = 122,
  SYSCALL_x86_modify_ldt = 123,
  SYSCALL_x86_adjtimex = 124,
  SYSCALL_x86_mprotect = 125,
  SYSCALL_x86_sigprocmask = 126,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_create_module = 127,
  SYSCALL_x86_init_module = 128,
  SYSCALL_x86_delete_module = 129,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_get_kernel_syms = 130,
  SYSCALL_x86_quotactl = 131,
  SYSCALL_x86_getpgid = 132,
  SYSCALL_x86_fchdir = 133,
  SYSCALL_x86_bdflush = 134,
  SYSCALL_x86_sysfs = 135,
  SYSCALL_x86_personality = 136,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_afs_syscall = 137,
  SYSCALL_x86_setfsuid = 138,
  SYSCALL_x86_setfsgid = 139,
  SYSCALL_x86__llseek = 140,
  SYSCALL_x86_getdents = 141,
  SYSCALL_x86__newselect = 142,
  SYSCALL_x86_flock = 143,
  SYSCALL_x86_msync = 144,
  SYSCALL_x86_readv = 145,
  SYSCALL_x86_writev = 146,
  SYSCALL_x86_getsid = 147,
  SYSCALL_x86_fdatasync = 148,
  SYSCALL_x86__sysctl = 149,
  SYSCALL_x86_mlock = 150,
  SYSCALL_x86_munlock = 151,
  SYSCALL_x86_mlockall = 152,
  SYSCALL_x86_munlockall = 153,
  SYSCALL_x86_sched_setparam = 154,
  SYSCALL_x86_sched_getparam = 155,
  SYSCALL_x86_sched_setscheduler = 156,
  SYSCALL_x86_sched_getscheduler = 157,
  SYSCALL_x86_sched_yield = 158,
  SYSCALL_x86_sched_get_priority_max = 159,
  SYSCALL_x86_sched_get_priority_min = 160,
  SYSCALL_x86_sched_rr_get_interval = 161,
  SYSCALL_x86_nanosleep = 162,
  SYSCALL_x86_mremap = 163,
  SYSCALL_x86_setresuid = 164,
  SYSCALL_x86_getresuid = 165,
  SYSCALL_x86_vm86 = 166,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_query_module = 167,
  SYSCALL_x86_poll = 168,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_nfsservctl = 169,
  SYSCALL_x86_setresgid = 170,
  SYSCALL_x86_getresgid = 171,
  SYSCALL_x86_prctl = 172,
  SYSCALL_x86_rt_sigreturn = 173,
  SYSCALL_x86_rt_sigaction = 174,
  SYSCALL_x86_rt_sigprocmask = 175,
  SYSCALL_x86_rt_sigpending = 176,
  SYSCALL_x86_rt_sigtimedwait = 177,
  SYSCALL_x86_rt_sigqueueinfo = 178,
  SYSCALL_x86_rt_sigsuspend = 179,
  SYSCALL_x86_pread_64 = 180,
  SYSCALL_x86_pwrite_64 = 181,
  SYSCALL_x86_chown = 182,
  SYSCALL_x86_getcwd = 183,
  SYSCALL_x86_capget = 184,
  SYSCALL_x86_capset = 185,
  SYSCALL_x86_sigaltstack = 186,
  SYSCALL_x86_sendfile = 187,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_getpmsg = 188,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_putpmsg = 189,
  SYSCALL_x86_vfork = 190,
  SYSCALL_x86_ugetrlimit = 191,
  SYSCALL_x86_mmap2 = 192,
  SYSCALL_x86_truncate64 = 193,
  SYSCALL_x86_ftruncate64 = 194,
  SYSCALL_x86_stat64 = 195,
  SYSCALL_x86_lstat64 = 196,
  SYSCALL_x86_fstat64 = 197,
  SYSCALL_x86_lchown32 = 198,
  SYSCALL_x86_getuid32 = 199,
  SYSCALL_x86_getgid32 = 200,
  SYSCALL_x86_geteuid32 = 201,
  SYSCALL_x86_getegid32 = 202,
  SYSCALL_x86_setreuid32 = 203,
  SYSCALL_x86_setregid32 = 204,
  SYSCALL_x86_getgroups32 = 205,
  SYSCALL_x86_setgroups32 = 206,
  SYSCALL_x86_fchown32 = 207,
  SYSCALL_x86_setresuid32 = 208,
  SYSCALL_x86_getresuid32 = 209,
  SYSCALL_x86_setresgid32 = 210,
  SYSCALL_x86_getresgid32 = 211,
  SYSCALL_x86_chown32 = 212,
  SYSCALL_x86_setuid32 = 213,
  SYSCALL_x86_setgid32 = 214,
  SYSCALL_x86_setfsuid32 = 215,
  SYSCALL_x86_setfsgid32 = 216,
  SYSCALL_x86_pivot_root = 217,
  SYSCALL_x86_mincore = 218,
  SYSCALL_x86_madvise = 219,
  SYSCALL_x86_getdents64 = 220,
  SYSCALL_x86_fcntl64 = 221,
  SYSCALL_x86_gettid = 224,
  SYSCALL_x86_readahead = 225,
  SYSCALL_x86_setxattr = 226,
  SYSCALL_x86_lsetxattr = 227,
  SYSCALL_x86_fsetxattr = 228,
  SYSCALL_x86_getxattr = 229,
  SYSCALL_x86_lgetxattr = 230,
  SYSCALL_x86_fgetxattr = 231,
  SYSCALL_x86_listxattr = 232,
  SYSCALL_x86_llistxattr = 233,
  SYSCALL_x86_flistxattr = 234,
  SYSCALL_x86_removexattr = 235,
  SYSCALL_x86_lremovexattr = 236,
  SYSCALL_x86_fremovexattr = 237,
  SYSCALL_x86_tkill = 238,
  SYSCALL_x86_sendfile64 = 239,
  SYSCALL_x86_futex = 240,
  SYSCALL_x86_sched_setaffinity = 241,
  SYSCALL_x86_sched_getaffinity = 242,
  SYSCALL_x86_set_thread_area = 243,
  SYSCALL_x86_get_thread_area = 244,
  SYSCALL_x86_io_setup = 245,
  SYSCALL_x86_io_destroy = 246,
  SYSCALL_x86_io_getevents = 247,
  SYSCALL_x86_io_submit = 248,
  SYSCALL_x86_io_cancel = 249,
  SYSCALL_x86_fadvise64 = 250,
  SYSCALL_x86_exit_group = 252,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_lookup_dcookie = 253,
  SYSCALL_x86_epoll_create = 254,
  SYSCALL_x86_epoll_ctl = 255,
  SYSCALL_x86_epoll_wait = 256,
  SYSCALL_x86_remap_file_pages = 257,
  SYSCALL_x86_set_tid_address = 258,
  SYSCALL_x86_timer_create = 259,
  SYSCALL_x86_timer_settime = 260,
  SYSCALL_x86_timer_gettime = 261,
  SYSCALL_x86_timer_getoverrun = 262,
  SYSCALL_x86_timer_delete = 263,
  SYSCALL_x86_clock_settime = 264,
  SYSCALL_x86_clock_gettime = 265,
  SYSCALL_x86_clock_getres = 266,
  SYSCALL_x86_clock_nanosleep = 267,
  SYSCALL_x86_statfs64 = 268,
  SYSCALL_x86_fstatfs64 = 269,
  SYSCALL_x86_tgkill = 270,
  SYSCALL_x86_utimes = 271,
  SYSCALL_x86_fadvise64_64 = 272,
  // No entrypoint. -ENOSYS
  SYSCALL_x86_vserver = 273,
  SYSCALL_x86_mbind = 274,
  SYSCALL_x86_get_mempolicy = 275,
  SYSCALL_x86_set_mempolicy = 276,
  SYSCALL_x86_mq_open = 277,
  SYSCALL_x86_mq_unlink = 278,
  SYSCALL_x86_mq_timedsend = 279,
  SYSCALL_x86_mq_timedreceive = 280,
  SYSCALL_x86_mq_notify = 281,
  SYSCALL_x86_mq_getsetattr = 282,
  SYSCALL_x86_kexec_load = 283,
  SYSCALL_x86_waitid = 284,
  SYSCALL_x86_add_key = 286,
  SYSCALL_x86_request_key = 287,
  SYSCALL_x86_keyctl = 288,
  SYSCALL_x86_ioprio_set = 289,
  SYSCALL_x86_ioprio_get = 290,
  SYSCALL_x86_inotify_init = 291,
  SYSCALL_x86_inotify_add_watch = 292,
  SYSCALL_x86_inotify_rm_watch = 293,
  SYSCALL_x86_migrate_pages = 294,
  SYSCALL_x86_openat = 295,
  SYSCALL_x86_mkdirat = 296,
  SYSCALL_x86_mknodat = 297,
  SYSCALL_x86_fchownat = 298,
  SYSCALL_x86_futimesat = 299,
  SYSCALL_x86_fstatat_64 = 300,
  SYSCALL_x86_unlinkat = 301,
  SYSCALL_x86_renameat = 302,
  SYSCALL_x86_linkat = 303,
  SYSCALL_x86_symlinkat = 304,
  SYSCALL_x86_readlinkat = 305,
  SYSCALL_x86_fchmodat = 306,
  SYSCALL_x86_faccessat = 307,
  SYSCALL_x86_pselect6 = 308,
  SYSCALL_x86_ppoll = 309,
  SYSCALL_x86_unshare = 310,
  SYSCALL_x86_set_robust_list = 311,
  SYSCALL_x86_get_robust_list = 312,
  SYSCALL_x86_splice = 313,
  SYSCALL_x86_sync_file_range = 314,
  SYSCALL_x86_tee = 315,
  SYSCALL_x86_vmsplice = 316,
  SYSCALL_x86_move_pages = 317,
  SYSCALL_x86_getcpu = 318,
  SYSCALL_x86_epoll_pwait = 319,
  SYSCALL_x86_utimensat = 320,
  SYSCALL_x86_signalfd = 321,
  SYSCALL_x86_timerfd_create = 322,
  SYSCALL_x86_eventfd = 323,
  SYSCALL_x86_fallocate = 324,
  SYSCALL_x86_timerfd_settime = 325,
  SYSCALL_x86_timerfd_gettime = 326,
  SYSCALL_x86_signalfd4 = 327,
  SYSCALL_x86_eventfd2 = 328,
  SYSCALL_x86_epoll_create1 = 329,
  SYSCALL_x86_dup3 = 330,
  SYSCALL_x86_pipe2 = 331,
  SYSCALL_x86_inotify_init1 = 332,
  SYSCALL_x86_preadv = 333,
  SYSCALL_x86_pwritev = 334,
  SYSCALL_x86_rt_tgsigqueueinfo = 335,
  SYSCALL_x86_perf_event_open = 336,
  SYSCALL_x86_recvmmsg = 337,
  SYSCALL_x86_fanotify_init = 338,
  SYSCALL_x86_fanotify_mark = 339,
  SYSCALL_x86_prlimit_64 = 340,
  SYSCALL_x86_name_to_handle_at = 341,
  SYSCALL_x86_open_by_handle_at = 342,
  SYSCALL_x86_clock_adjtime = 343,
  SYSCALL_x86_syncfs = 344,
  SYSCALL_x86_sendmmsg = 345,
  SYSCALL_x86_setns = 346,
  SYSCALL_x86_process_vm_readv = 347,
  SYSCALL_x86_process_vm_writev = 348,
  SYSCALL_x86_kcmp = 349,
  SYSCALL_x86_finit_module = 350,
  SYSCALL_x86_sched_setattr = 351,
  SYSCALL_x86_sched_getattr = 352,
  SYSCALL_x86_renameat2 = 353,
  SYSCALL_x86_seccomp = 354,
  SYSCALL_x86_getrandom = 355,
  SYSCALL_x86_memfd_create = 356,
  SYSCALL_x86_bpf = 357,
  SYSCALL_x86_execveat = 358,
  SYSCALL_x86_socket = 359,
  SYSCALL_x86_socketpair = 360,
  SYSCALL_x86_bind = 361,
  SYSCALL_x86_connect = 362,
  SYSCALL_x86_listen = 363,
  SYSCALL_x86_accept4 = 364,
  SYSCALL_x86_getsockopt = 365,
  SYSCALL_x86_setsockopt = 366,
  SYSCALL_x86_getsockname = 367,
  SYSCALL_x86_getpeername = 368,
  SYSCALL_x86_sendto = 369,
  SYSCALL_x86_sendmsg = 370,
  SYSCALL_x86_recvfrom = 371,
  SYSCALL_x86_recvmsg = 372,
  SYSCALL_x86_shutdown = 373,
  SYSCALL_x86_userfaultfd = 374,
  SYSCALL_x86_membarrier = 375,
  SYSCALL_x86_mlock2 = 376,
  SYSCALL_x86_copy_file_range = 377,
  SYSCALL_x86_preadv2 = 378,
  SYSCALL_x86_pwritev2 = 379,
  SYSCALL_x86_pkey_mprotect = 380,
  SYSCALL_x86_pkey_alloc = 381,
  SYSCALL_x86_pkey_free = 382,
  SYSCALL_x86_statx = 383,
  SYSCALL_x86_arch_prctl = 384,
  SYSCALL_x86_io_pgetevents = 385,
  SYSCALL_x86_rseq = 386,
  SYSCALL_x86_semget = 393,
  SYSCALL_x86_semctl = 394,
  SYSCALL_x86_shmget = 395,
  SYSCALL_x86_shmctl = 396,
  SYSCALL_x86_shmat = 397,
  SYSCALL_x86_shmdt = 398,
  SYSCALL_x86_msgget = 399,
  SYSCALL_x86_msgsnd = 400,
  SYSCALL_x86_msgrcv = 401,
  SYSCALL_x86_msgctl = 402,
  SYSCALL_x86_clock_gettime64 = 403,
  SYSCALL_x86_clock_settime64 = 404,
  SYSCALL_x86_clock_adjtime64 = 405,
  SYSCALL_x86_clock_getres_time64 = 406,
  SYSCALL_x86_clock_nanosleep_time64 = 407,
  SYSCALL_x86_timer_gettime64 = 408,
  SYSCALL_x86_timer_settime64 = 409,
  SYSCALL_x86_timerfd_gettime64 = 410,
  SYSCALL_x86_timerfd_settime64 = 411,
  SYSCALL_x86_utimensat_time64 = 412,
  SYSCALL_x86_pselect6_time64 = 413,
  SYSCALL_x86_ppoll_time64 = 414,
  SYSCALL_x86_io_pgetevents_time64 = 416,
  SYSCALL_x86_recvmmsg_time64 = 417,
  SYSCALL_x86_mq_timedsend_time64 = 418,
  SYSCALL_x86_mq_timedreceive_time64 = 419,
  SYSCALL_x86_semtimedop_time64 = 420,
  SYSCALL_x86_rt_sigtimedwait_time64 = 421,
  SYSCALL_x86_futex_time64 = 422,
  SYSCALL_x86_sched_rr_get_interval_time64 = 423,
  SYSCALL_x86_pidfd_send_signal = 424,
  SYSCALL_x86_io_uring_setup = 425,
  SYSCALL_x86_io_uring_enter = 426,
  SYSCALL_x86_io_uring_register = 427,
  SYSCALL_x86_open_tree = 428,
  SYSCALL_x86_move_mount = 429,
  SYSCALL_x86_fsopen = 430,
  SYSCALL_x86_fsconfig = 431,
  SYSCALL_x86_fsmount = 432,
  SYSCALL_x86_fspick = 433,
  SYSCALL_x86_pidfd_open = 434,
  SYSCALL_x86_clone3 = 435,
  SYSCALL_x86_close_range = 436,
  SYSCALL_x86_openat2 = 437,
  SYSCALL_x86_pidfd_getfd = 438,
  SYSCALL_x86_faccessat2 = 439,
  SYSCALL_x86_process_madvise = 440,
  SYSCALL_x86_epoll_pwait2 = 441,
  SYSCALL_x86_mount_setattr = 442,
  SYSCALL_x86_quotactl_fd = 443,
  SYSCALL_x86_landlock_create_ruleset = 444,
  SYSCALL_x86_landlock_add_rule = 445,
  SYSCALL_x86_landlock_restrict_self = 446,
  SYSCALL_x86_memfd_secret = 447,
  SYSCALL_x86_process_mrelease = 448,
  SYSCALL_x86_futex_waitv = 449,
  SYSCALL_x86_set_mempolicy_home_node = 450,
  SYSCALL_x86_cachestat = 451,
  SYSCALL_x86_fchmodat2 = 452,
  SYSCALL_x86_map_shadow_stack = 453,
  SYSCALL_x86_futex_wake = 454,
  SYSCALL_x86_futex_wait = 455,
  SYSCALL_x86_futex_requeue = 456,
  SYSCALL_x86_statmount = 457,
  SYSCALL_x86_listmount = 458,
  SYSCALL_x86_lsm_get_self_attr = 459,
  SYSCALL_x86_lsm_set_self_attr = 460,
  SYSCALL_x86_lsm_list_modules = 461,
  SYSCALL_x86_mseal = 462,
  SYSCALL_x86_setxattrat = 463,
  SYSCALL_x86_getxattrat = 464,
  SYSCALL_x86_listxattrat = 465,
  SYSCALL_x86_removexattrat = 466,
  SYSCALL_x86_MAX = 512,
};
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/SyscallsNames.inl
================================================
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

{ 0, "restart_syscall" },
{ 1, "exit" },
{ 2, "fork" },
{ 3, "read" },
{ 4, "write" },
{ 5, "open" },
{ 6, "close" },
{ 7, "waitpid" },
{ 8, "creat" },
{ 9, "link" },
{ 10, "unlink" },
{ 11, "execve" },
{ 12, "chdir" },
{ 13, "time" },
{ 14, "mknod" },
{ 15, "chmod" },
{ 16, "lchown" },
{ 17, "break" },
{ 18, "oldstat" },
{ 19, "lseek" },
{ 20, "getpid" },
{ 21, "mount" },
{ 22, "umount" },
{ 23, "setuid" },
{ 24, "getuid" },
{ 25, "stime" },
{ 26, "ptrace" },
{ 27, "alarm" },
{ 28, "oldfstat" },
{ 29, "pause" },
{ 30, "utime" },
{ 31, "stty" },
{ 32, "gtty" },
{ 33, "access" },
{ 34, "nice" },
{ 35, "ftime" },
{ 36, "sync" },
{ 37, "kill" },
{ 38, "rename" },
{ 39, "mkdir" },
{ 40, "rmdir" },
{ 41, "dup" },
{ 42, "pipe" },
{ 43, "times" },
{ 44, "prof" },
{ 45, "brk" },
{ 46, "setgid" },
{ 47, "getgid" },
{ 48, "signal" },
{ 49, "geteuid" },
{ 50, "getegid" },
{ 51, "acct" },
{ 52, "umount2" },
{ 53, "lock" },
{ 54, "ioctl" },
{ 55, "fcntl" },
{ 56, "mpx" },
{ 57, "setpgid" },
{ 58, "ulimit" },
{ 59, "oldolduname" },
{ 60, "umask" },
{ 61, "chroot" },
{ 62, "ustat" },
{ 63, "dup2" },
{ 64, "getppid" },
{ 65, "getpgrp" },
{ 66, "setsid" },
{ 67, "sigaction" },
{ 68, "sgetmask" },
{ 69, "ssetmask" },
{ 70, "setreuid" },
{ 71, "setregid" },
{ 72, "sigsuspend" },
{ 73, "sigpending" },
{ 74, "sethostname" },
{ 75, "setrlimit" },
{ 76, "getrlimit" },
{ 77, "getrusage" },
{ 78, "gettimeofday" },
{ 79, "settimeofday" },
{ 80, "getgroups" },
{ 81, "setgroups" },
{ 82, "select" },
{ 83, "symlink" },
{ 84, "oldlstat" },
{ 85, "readlink" },
{ 86, "uselib" },
{ 87, "swapon" },
{ 88, "reboot" },
{ 89, "readdir" },
{ 90, "mmap" },
{ 91, "munmap" },
{ 92, "truncate" },
{ 93, "ftruncate" },
{ 94, "fchmod" },
{ 95, "fchown" },
{ 96, "getpriority" },
{ 97, "setpriority" },
{ 98, "profil" },
{ 99, "statfs" },
{ 100, "fstatfs" },
{ 101, "ioperm" },
{ 102, "socketcall" },
{ 103, "syslog" },
{ 104, "setitimer" },
{ 105, "getitimer" },
{ 106, "stat" },
{ 107, "lstat" },
{ 108, "fstat" },
{ 109, "olduname" },
{ 110, "iopl" },
{ 111, "vhangup" },
{ 112, "idle" },
{ 113, "vm86old" },
{ 114, "wait4" },
{ 115, "swapoff" },
{ 116, "sysinfo" },
{ 117, "ipc" },
{ 118, "fsync" },
{ 119, "sigreturn" },
{ 120, "clone" },
{ 121, "setdomainname" },
{ 122, "uname" },
{ 123, "modify_ldt" },
{ 124, "adjtimex" },
{ 125, "mprotect" },
{ 126, "sigprocmask" },
{ 127, "create_module" },
{ 128, "init_module" },
{ 129, "delete_module" },
{ 130, "get_kernel_syms" },
{ 131, "quotactl" },
{ 132, "getpgid" },
{ 133, "fchdir" },
{ 134, "bdflush" },
{ 135, "sysfs" },
{ 136, "personality" },
{ 137, "afs_syscall" },
{ 138, "setfsuid" },
{ 139, "setfsgid" },
{ 140, "_llseek" },
{ 141, "getdents" },
{ 142, "_newselect" },
{ 143, "flock" },
{ 144, "msync" },
{ 145, "readv" },
{ 146, "writev" },
{ 147, "getsid" },
{ 148, "fdatasync" },
{ 149, "_sysctl" },
{ 150, "mlock" },
{ 151, "munlock" },
{ 152, "mlockall" },
{ 153, "munlockall" },
{ 154, "sched_setparam" },
{ 155, "sched_getparam" },
{ 156, "sched_setscheduler" },
{ 157, "sched_getscheduler" },
{ 158, "sched_yield" },
{ 159, "sched_get_priority_max" },
{ 160, "sched_get_priority_min" },
{ 161, "sched_rr_get_interval" },
{ 162, "nanosleep" },
{ 163, "mremap" },
{ 164, "setresuid" },
{ 165, "getresuid" },
{ 166, "vm86" },
{ 167, "query_module" },
{ 168, "poll" },
{ 169, "nfsservctl" },
{ 170, "setresgid" },
{ 171, "getresgid" },
{ 172, "prctl" },
{ 173, "rt_sigreturn" },
{ 174, "rt_sigaction" },
{ 175, "rt_sigprocmask" },
{ 176, "rt_sigpending" },
{ 177, "rt_sigtimedwait" },
{ 178, "rt_sigqueueinfo" },
{ 179, "rt_sigsuspend" },
{ 180, "pread64" },
{ 181, "pwrite64" },
{ 182, "chown" },
{ 183, "getcwd" },
{ 184, "capget" },
{ 185, "capset" },
{ 186, "sigaltstack" },
{ 187, "sendfile" },
{ 188, "getpmsg" },
{ 189, "putpmsg" },
{ 190, "vfork" },
{ 191, "ugetrlimit" },
{ 192, "mmap2" },
{ 193, "truncate64" },
{ 194, "ftruncate64" },
{ 195, "stat64" },
{ 196, "lstat64" },
{ 197, "fstat64" },
{ 198, "lchown32" },
{ 199, "getuid32" },
{ 200, "getgid32" },
{ 201, "geteuid32" },
{ 202, "getegid32" },
{ 203, "setreuid32" },
{ 204, "setregid32" },
{ 205, "getgroups32" },
{ 206, "setgroups32" },
{ 207, "fchown32" },
{ 208, "setresuid32" },
{ 209, "getresuid32" },
{ 210, "setresgid32" },
{ 211, "getresgid32" },
{ 212, "chown32" },
{ 213, "setuid32" },
{ 214, "setgid32" },
{ 215, "setfsuid32" },
{ 216, "setfsgid32" },
{ 217, "pivot_root" },
{ 218, "mincore" },
{ 219, "madvise" },
{ 220, "getdents64" },
{ 221, "fcntl64" },
{ 224, "gettid" },
{ 225, "readahead" },
{ 226, "setxattr" },
{ 227, "lsetxattr" },
{ 228, "fsetxattr" },
{ 229, "getxattr" },
{ 230, "lgetxattr" },
{ 231, "fgetxattr" },
{ 232, "listxattr" },
{ 233, "llistxattr" },
{ 234, "flistxattr" },
{ 235, "removexattr" },
{ 236, "lremovexattr" },
{ 237, "fremovexattr" },
{ 238, "tkill" },
{ 239, "sendfile64" },
{ 240, "futex" },
{ 241, "sched_setaffinity" },
{ 242, "sched_getaffinity" },
{ 243, "set_thread_area" },
{ 244, "get_thread_area" },
{ 245, "io_setup" },
{ 246, "io_destroy" },
{ 247, "io_getevents" },
{ 248, "io_submit" },
{ 249, "io_cancel" },
{ 250, "fadvise64" },
{ 252, "exit_group" },
{ 253, "lookup_dcookie" },
{ 254, "epoll_create" },
{ 255, "epoll_ctl" },
{ 256, "epoll_wait" },
{ 257, "remap_file_pages" },
{ 258, "set_tid_address" },
{ 259, "timer_create" },
{ 260, "timer_settime" },
{ 261, "timer_gettime" },
{ 262, "timer_getoverrun" },
{ 263, "timer_delete" },
{ 264, "clock_settime" },
{ 265, "clock_gettime" },
{ 266, "clock_getres" },
{ 267, "clock_nanosleep" },
{ 268, "statfs64" },
{ 269, "fstatfs64" },
{ 270, "tgkill" },
{ 271, "utimes" },
{ 272, "fadvise64_64" },
{ 273, "vserver" },
{ 274, "mbind" },
{ 275, "get_mempolicy" },
{ 276, "set_mempolicy" },
{ 277, "mq_open" },
{ 278, "mq_unlink" },
{ 279, "mq_timedsend" },
{ 280, "mq_timedreceive" },
{ 281, "mq_notify" },
{ 282, "mq_getsetattr" },
{ 283, "kexec_load" },
{ 284, "waitid" },
{ 286, "add_key" },
{ 287, "request_key" },
{ 288, "keyctl" },
{ 289, "ioprio_set" },
{ 290, "ioprio_get" },
{ 291, "inotify_init" },
{ 292, "inotify_add_watch" },
{ 293, "inotify_rm_watch" },
{ 294, "migrate_pages" },
{ 295, "openat" },
{ 296, "mkdirat" },
{ 297, "mknodat" },
{ 298, "fchownat" },
{ 299, "futimesat" },
{ 300, "fstatat64" },
{ 301, "unlinkat" },
{ 302, "renameat" },
{ 303, "linkat" },
{ 304, "symlinkat" },
{ 305, "readlinkat" },
{ 306, "fchmodat" },
{ 307, "faccessat" },
{ 308, "pselect6" },
{ 309, "ppoll" },
{ 310, "unshare" },
{ 311, "set_robust_list" },
{ 312, "get_robust_list" },
{ 313, "splice" },
{ 314, "sync_file_range" },
{ 315, "tee" },
{ 316, "vmsplice" },
{ 317, "move_pages" },
{ 318, "getcpu" },
{ 319, "epoll_pwait" },
{ 320, "utimensat" },
{ 321, "signalfd" },
{ 322, "timerfd_create" },
{ 323, "eventfd" },
{ 324, "fallocate" },
{ 325, "timerfd_settime" },
{ 326, "timerfd_gettime" },
{ 327, "signalfd4" },
{ 328, "eventfd2" },
{ 329, "epoll_create1" },
{ 330, "dup3" },
{ 331, "pipe2" },
{ 332, "inotify_init1" },
{ 333, "preadv" },
{ 334, "pwritev" },
{ 335, "rt_tgsigqueueinfo" },
{ 336, "perf_event_open" },
{ 337, "recvmmsg" },
{ 338, "fanotify_init" },
{ 339, "fanotify_mark" },
{ 340, "prlimit64" },
{ 341, "name_to_handle_at" },
{ 342, "open_by_handle_at" },
{ 343, "clock_adjtime" },
{ 344, "syncfs" },
{ 345, "sendmmsg" },
{ 346, "setns" },
{ 347, "process_vm_readv" },
{ 348, "process_vm_writev" },
{ 349, "kcmp" },
{ 350, "finit_module" },
{ 351, "sched_setattr" },
{ 352, "sched_getattr" },
{ 353, "renameat2" },
{ 354, "seccomp" },
{ 355, "getrandom" },
{ 356, "memfd_create" },
{ 357, "bpf" },
{ 358, "execveat" },
{ 359, "socket" },
{ 360, "socketpair" },
{ 361, "bind" },
{ 362, "connect" },
{ 363, "listen" },
{ 364, "accept4" },
{ 365, "getsockopt" },
{ 366, "setsockopt" },
{ 367, "getsockname" },
{ 368, "getpeername" },
{ 369, "sendto" },
{ 370, "sendmsg" },
{ 371, "recvfrom" },
{ 372, "recvmsg" },
{ 373, "shutdown" },
{ 374, "userfaultfd" },
{ 375, "membarrier" },
{ 376, "mlock2" },
{ 377, "copy_file_range" },
{ 378, "preadv2" },
{ 379, "pwritev2" },
{ 380, "pkey_mprotect" },
{ 381, "pkey_alloc" },
{ 382, "pkey_free" },
{ 383, "statx" },
{ 384, "arch_prctl" },
{ 385, "io_pgetevents" },
{ 386, "rseq" },
{ 393, "semget" },
{ 394, "semctl" },
{ 395, "shmget" },
{ 396, "shmctl" },
{ 397, "shmat" },
{ 398, "shmdt" },
{ 399, "msgget" },
{ 400, "msgsnd" },
{ 401, "msgrcv" },
{ 402, "msgctl" },
{ 403, "clock_gettime64" },
{ 404, "clock_settime64" },
{ 405, "clock_adjtime64" },
{ 406, "clock_getres_time64" },
{ 407, "clock_nanosleep_time64" },
{ 408, "timer_gettime64" },
{ 409, "timer_settime64" },
{ 410, "timerfd_gettime64" },
{ 411, "timerfd_settime64" },
{ 412, "utimensat_time64" },
{ 413, "pselect6_time64" },
{ 414, "ppoll_time64" },
{ 416, "io_pgetevents_time64" },
{ 417, "recvmmsg_time64" },
{ 418, "mq_timedsend_time64" },
{ 419, "mq_timedreceive_time64" },
{ 420, "semtimedop_time64" },
{ 421, "rt_sigtimedwait_time64" },
{ 422, "futex_time64" },
{ 423, "sched_rr_get_interval_time64" },
{ 424, "pidfd_send_signal" },
{ 425, "io_uring_setup" },
{ 426, "io_uring_enter" },
{ 427, "io_uring_register" },
{ 428, "open_tree" },
{ 429, "move_mount" },
{ 430, "fsopen" },
{ 431, "fsconfig" },
{ 432, "fsmount" },
{ 433, "fspick" },
{ 434, "pidfd_open" },
{ 435, "clone3" },


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "ArchHelpers/UContext.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Thread.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/fextl/vector.h>

#include <errno.h>
#include <grp.h>
#include <linux/futex.h>
#include <sched.h>
#include <signal.h>
#include <sys/fsuid.h>
#include <sys/resource.h>
#include <sys/wait.h>
#include <syscall.h>
#include <time.h>
#include <unistd.h>

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::stack_t32>, "%x")
ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEXCore::x86::siginfo_t>, "%x")

namespace FEX::HLE::x32 {
// The kernel only gives 32-bit userspace 3 TLS segments
// Depending on if the host kernel is 32-bit or 64-bit then the TLS index assigned is different
//
// Host kernel x86_64, valid TLS enries: 12,13,14
// Host kernel x86, valid TLS enries: 6,7,8
// Since we are claiming to be a 64-bit kernel, use the 64-bit range
//
// 6/12 = glibc
// 7/13 = wine fs
// 8/14 = etc
constexpr uint32_t TLS_NextEntry = 12;
constexpr uint32_t TLS_MaxEntry = TLS_NextEntry + 3;

uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls) {
  struct x32::user_desc* u_info = reinterpret_cast<struct x32::user_desc*>(tls);
  FaultSafeUserMemAccess::VerifyIsReadable(u_info, sizeof(*u_info));

  if (u_info->entry_number == -1) {
    for (uint32_t i = TLS_NextEntry; i < TLS_MaxEntry; ++i) {
      auto GDT = &Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT][i];
      if (Frame->State.CalculateGDTLimit(*GDT) == 0) {
        // If the limit is zero then it isn't present with our setup
        u_info->entry_number = i;
        break;
      }
    }

    if (u_info->entry_number == -1) {
      // Couldn't find a slot. Return empty handed
      return -ESRCH;
    }
  }

  // Now we need to update the thread's GDT to handle this change
  auto GDT = &Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT][u_info->entry_number];
  Frame->State.SetGDTBase(GDT, u_info->base_addr);
  Frame->State.SetGDTLimit(GDT, 0xF'FFFFU);

  // With the segment register optimization we need to check all of the segment registers and update.
  const auto GetEntry = [](auto value) {
    return value >> 3;
  };
  if (GetEntry(Frame->State.cs_idx) == u_info->entry_number) {
    Frame->State.cs_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  if (GetEntry(Frame->State.ds_idx) == u_info->entry_number) {
    Frame->State.ds_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  if (GetEntry(Frame->State.es_idx) == u_info->entry_number) {
    Frame->State.es_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  if (GetEntry(Frame->State.fs_idx) == u_info->entry_number) {
    Frame->State.fs_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  if (GetEntry(Frame->State.gs_idx) == u_info->entry_number) {
    Frame->State.gs_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  if (GetEntry(Frame->State.ss_idx) == u_info->entry_number) {
    Frame->State.ss_cached = Frame->State.CalculateGDTBase(*GDT);
  }
  return 0;
}

void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame) {
  Frame->State.rip += 2;
}

void RegisterThread(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(sigreturn, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t {
    FEX::HLE::_SyscallHandler->GetSignalDelegator()->HandleSignalHandlerReturn(false);
    FEX_UNREACHABLE;
  });

  REGISTER_SYSCALL_IMPL_X32(
    clone, ([](FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, void* stack, pid_t* parent_tid, void* tls, pid_t* child_tid) -> uint64_t {
      // This is slightly different EFAULT behaviour, if child_tid or parent_tid is invalid then the kernel just doesn't write to the
      // pointer. Still need to be EFAULT safe although.
      if ((flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) && child_tid) {
        FaultSafeUserMemAccess::VerifyIsWritable(child_tid, sizeof(*child_tid));
      }

      if ((flags & CLONE_PARENT_SETTID) && parent_tid) {
        FaultSafeUserMemAccess::VerifyIsWritable(parent_tid, sizeof(*parent_tid));
      }


      FEX::HLE::clone3_args args {.Type = TypeOfClone::TYPE_CLONE2,
                                  .args = {
                                    .flags = flags & ~CSIGNAL,                       // This no longer contains CSIGNAL
                                    .pidfd = reinterpret_cast<uint64_t>(parent_tid), // For clone, pidfd is duplicated here
                                    .child_tid = reinterpret_cast<uint64_t>(child_tid),
                                    .parent_tid = reinterpret_cast<uint64_t>(parent_tid),
                                    .exit_signal = flags & CSIGNAL,
                                    .stack = reinterpret_cast<uint64_t>(stack),
                                    .stack_size = 0, // This syscall isn't able to see the stack size
                                    .tls = reinterpret_cast<uint64_t>(tls),
                                    .set_tid = 0, // This syscall isn't able to select TIDs
                                    .set_tid_size = 0,
                                    .cgroup = 0, // This syscall can't select cgroups
                                  }};
      return CloneHandler(Frame, &args);
    }));

  REGISTER_SYSCALL_IMPL_X32(waitpid, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int32_t* status, int32_t options) -> uint64_t {
    uint64_t Result = ::waitpid(pid, status, options);
    FaultSafeUserMemAccess::VerifyIsWritableOrNull(status, sizeof(*status));
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(nice, [](FEXCore::Core::CpuStateFrame* Frame, int inc) -> uint64_t {
    uint64_t Result = ::nice(inc);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    set_thread_area, [](FEXCore::Core::CpuStateFrame* Frame, struct user_desc* u_info) -> uint64_t { return SetThreadArea(Frame, u_info); });

  REGISTER_SYSCALL_IMPL_X32(get_thread_area, [](FEXCore::Core::CpuStateFrame* Frame, struct user_desc* u_info) -> uint64_t {
    // Index to fetch comes from the user_desc
    uint32_t Entry = u_info->entry_number;
    if (Entry < TLS_NextEntry || Entry > TLS_MaxEntry) {
      return -EINVAL;
    }

    FaultSafeUserMemAccess::VerifyIsWritable(u_info, sizeof(*u_info));

    const auto& GDT = &Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT][Entry];

    memset(u_info, 0, sizeof(*u_info));

    // FEX only stores base instead of the full GDT
    u_info->base_addr = Frame->State.CalculateGDTBase(*GDT);

    // Fill the rest of the structure with expected data (even if wrong at the moment)
    if (u_info->base_addr) {
      u_info->limit = 0xF'FFFF;
      u_info->seg_32bit = 1;
      u_info->limit_in_pages = 1;
      u_info->useable = 1;
    } else {
      u_info->read_exec_only = 1;
      u_info->seg_not_present = 1;
    }
    return 0;
  });

  REGISTER_SYSCALL_IMPL_X32(set_robust_list, [](FEXCore::Core::CpuStateFrame* Frame, struct robust_list_head* head, size_t len) -> uint64_t {
    if (len != 12) {
      // Return invalid if the passed in length doesn't match what's expected.
      return -EINVAL;
    }

    auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
    // Retain the robust list head but don't give it to the kernel
    // The kernel would break if it tried parsing a 32bit robust list from a 64bit process
    ThreadObject->ThreadInfo.robust_list_head = reinterpret_cast<uint64_t>(head);
    return 0;
  });

  REGISTER_SYSCALL_IMPL_X32(
    get_robust_list, [](FEXCore::Core::CpuStateFrame* Frame, int pid, struct robust_list_head** head, uint32_t* len_ptr) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsWritable(head, sizeof(uint32_t));
      FaultSafeUserMemAccess::VerifyIsWritable(len_ptr, sizeof(*len_ptr));

      auto ThreadObject = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);
      // Give the robust list back to the application
      // Steam specifically checks to make sure the robust list is set
      *(uint32_t*)head = (uint32_t)ThreadObject->ThreadInfo.robust_list_head;
      *len_ptr = 12;
      return 0;
    });

  REGISTER_SYSCALL_IMPL_X32(
    futex, [](FEXCore::Core::CpuStateFrame* Frame, int* uaddr, int futex_op, int val, const timespec32* timeout, int* uaddr2, uint32_t val3) -> uint64_t {
      const void* timeout_ptr = (const void*)timeout;
      struct timespec tp64 {};
      int cmd = futex_op & FUTEX_CMD_MASK;
      if (timeout && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) {
        FaultSafeUserMemAccess::VerifyIsReadable(timeout, sizeof(*timeout));
        // timeout argument is only handled as timespec in these cases
        // Otherwise just an integer
        tp64 = *timeout;
        timeout_ptr = &tp64;
      }

      uint64_t Result = syscall(SYSCALL_DEF(futex), uaddr, futex_op, val, timeout_ptr, uaddr2, val3);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(
    sigaltstack, [](FEXCore::Core::CpuStateFrame* Frame, const compat_ptr<stack_t32> ss, compat_ptr<stack_t32> old_ss) -> uint64_t {
      stack_t ss64 {};
      stack_t old64 {};

      stack_t* ss64_ptr {};
      stack_t* old64_ptr {};

      if (ss) {
        FaultSafeUserMemAccess::VerifyIsReadable(ss, sizeof(*ss));
        ss64 = *ss;
        ss64_ptr = &ss64;
      }

      if (old_ss) {
        FaultSafeUserMemAccess::VerifyIsReadable(old_ss, sizeof(*old_ss));
        old64 = *old_ss;
        old64_ptr = &old64;
      }
      uint64_t Result = FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(
        FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), ss64_ptr, old64_ptr);

      if (Result == 0 && old_ss) {
        FaultSafeUserMemAccess::VerifyIsWritable(old_ss, sizeof(*old_ss));
        *old_ss = old64;
      }
      return Result;
    });

  // launch a new process under fex
  // currently does not propagate argv[0] correctly
  REGISTER_SYSCALL_IMPL_X32(execve, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, uint32_t* argv, uint32_t* envp) -> uint64_t {
    fextl::vector<const char*> Args;
    fextl::vector<const char*> Envp;

    if (argv) {
      for (int i = 0; argv[i]; i++) {
        Args.push_back(reinterpret_cast<const char*>(static_cast<uintptr_t>(argv[i])));
      }

      Args.push_back(nullptr);
    }

    if (envp) {
      for (int i = 0; envp[i]; i++) {
        Envp.push_back(reinterpret_cast<const char*>(static_cast<uintptr_t>(envp[i])));
      }
      Envp.push_back(nullptr);
    }

    auto* const* ArgsPtr = argv ? const_cast<char* const*>(Args.data()) : nullptr;
    auto* const* EnvpPtr = envp ? const_cast<char* const*>(Envp.data()) : nullptr;

    FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty();

    return FEX::HLE::ExecveHandler(Frame, pathname, ArgsPtr, EnvpPtr, AtArgs);
  });

  REGISTER_SYSCALL_IMPL_X32(
    execveat, ([](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, uint32_t* argv, uint32_t* envp, int flags) -> uint64_t {
      fextl::vector<const char*> Args;
      fextl::vector<const char*> Envp;

      if (argv) {
        for (int i = 0; argv[i]; i++) {
          Args.push_back(reinterpret_cast<const char*>(static_cast<uintptr_t>(argv[i])));
        }

        Args.push_back(nullptr);
      }

      if (envp) {
        for (int i = 0; envp[i]; i++) {
          Envp.push_back(reinterpret_cast<const char*>(static_cast<uintptr_t>(envp[i])));
        }
        Envp.push_back(nullptr);
      }

      FEX::HLE::ExecveAtArgs AtArgs {
        .dirfd = dirfd,
        .flags = flags,
      };

      auto* const* ArgsPtr = argv ? const_cast<char* const*>(Args.data()) : nullptr;
      auto* const* EnvpPtr = envp ? const_cast<char* const*>(Envp.data()) : nullptr;
      return FEX::HLE::ExecveHandler(Frame, pathname, ArgsPtr, EnvpPtr, AtArgs);
    }));

  REGISTER_SYSCALL_IMPL_X32(wait4, [](FEXCore::Core::CpuStateFrame* Frame, pid_t pid, int* wstatus, int options, struct rusage_32* rusage) -> uint64_t {
    struct rusage usage64 {};
    struct rusage* usage64_p {};

    if (rusage) {
      FaultSafeUserMemAccess::VerifyIsReadable(rusage, sizeof(*rusage));
      usage64 = *rusage;
      usage64_p = &usage64;
    }
    uint64_t Result = ::wait4(pid, wstatus, options, usage64_p);
    if (rusage) {
      FaultSafeUserMemAccess::VerifyIsWritable(rusage, sizeof(*rusage));
      *rusage = usage64;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(waitid,
                            [](FEXCore::Core::CpuStateFrame* Frame, int which, pid_t upid, compat_ptr<FEXCore::x86::siginfo_t> info,
                               int options, struct rusage_32* rusage) -> uint64_t {
                              struct rusage usage64 {};
                              struct rusage* usage64_p {};

                              siginfo_t info64 {};
                              siginfo_t* info64_p {};

                              if (rusage) {
                                FaultSafeUserMemAccess::VerifyIsReadable(rusage, sizeof(*rusage));
                                usage64 = *rusage;
                                usage64_p = &usage64;
                              }

                              if (info) {
                                info64_p = &info64;
                              }

                              uint64_t Result = ::syscall(SYSCALL_DEF(waitid), which, upid, info64_p, options, usage64_p);

                              if (Result != -1) {
                                if (rusage) {
                                  FaultSafeUserMemAccess::VerifyIsWritable(rusage, sizeof(*rusage));
                                  *rusage = usage64;
                                }

                                if (info) {
                                  FaultSafeUserMemAccess::VerifyIsWritable(info, sizeof(*info));
                                  *info = info64;
                                }
                              }

                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Thread.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#pragma once
#include <stdint.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
// We must define this ourselves since it doesn't exist on non-x86 platforms
struct user_desc {
  uint32_t entry_number;
  uint32_t base_addr;
  uint32_t limit;
  uint32_t seg_32bit       : 1;
  uint32_t contents        : 2;
  uint32_t read_exec_only  : 1;
  uint32_t limit_in_pages  : 1;
  uint32_t seg_not_present : 1;
  uint32_t useable         : 1;
};

uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls);
void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame);
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Time.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <stdint.h>
#include <syscall.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/timex.h>
#include <time.h>
#include <unistd.h>
#include <utime.h>

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::timespec32>, "%lx")
ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::timex32>, "%lx")

struct timespec;
namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x32 {
void RegisterTime(FEX::HLE::SyscallHandler* Handler) {

  REGISTER_SYSCALL_IMPL_X32(time, [](FEXCore::Core::CpuStateFrame* Frame, FEX::HLE::x32::old_time32_t* tloc) -> uint64_t {
    time_t Host {};
    uint64_t Result = ::time(&Host);

    if (tloc) {
      FaultSafeUserMemAccess::VerifyIsWritable(tloc, sizeof(*tloc));
      // On 32-bit this truncates
      *tloc = (FEX::HLE::x32::old_time32_t)Host;
    }

    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(times, [](FEXCore::Core::CpuStateFrame* Frame, struct FEX::HLE::x32::compat_tms* buf) -> uint64_t {
    struct tms Host {};
    uint64_t Result = ::times(&Host);
    if (buf) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = Host;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(utime, [](FEXCore::Core::CpuStateFrame* Frame, char* filename, const FEX::HLE::x32::old_utimbuf32* times) -> uint64_t {
    struct utimbuf Host {};
    struct utimbuf* Host_p {};
    if (times) {
      FaultSafeUserMemAccess::VerifyIsReadable(times, sizeof(*times));
      Host = *times;
      Host_p = &Host;
    }
    uint64_t Result = ::utime(filename, Host_p);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(gettimeofday, [](FEXCore::Core::CpuStateFrame* Frame, timeval32* tv, struct timezone* tz) -> uint64_t {
    struct timeval tv64 {};
    struct timeval* tv_ptr {};
    if (tv) {
      tv_ptr = &tv64;
    }

    uint64_t Result = ::gettimeofday(tv_ptr, tz);

    if (tv) {
      FaultSafeUserMemAccess::VerifyIsWritable(tv, sizeof(*tv));
      *tv = tv64;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(settimeofday, [](FEXCore::Core::CpuStateFrame* Frame, const timeval32* tv, const struct timezone* tz) -> uint64_t {
    struct timeval tv64 {};
    struct timeval* tv_ptr {};
    if (tv) {
      FaultSafeUserMemAccess::VerifyIsReadable(tv, sizeof(*tv));
      tv64 = *tv;
      tv_ptr = &tv64;
    }

    const uint64_t Result = ::settimeofday(tv_ptr, tz);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(nanosleep, [](FEXCore::Core::CpuStateFrame* Frame, const timespec32* req, timespec32* rem) -> uint64_t {
    struct timespec rem64 {};
    struct timespec* rem64_ptr {};

    if (rem) {
      FaultSafeUserMemAccess::VerifyIsReadable(rem, sizeof(*rem));
      rem64 = *rem;
      rem64_ptr = &rem64;
    }

    uint64_t Result = 0;
    if (req) {
      FaultSafeUserMemAccess::VerifyIsReadable(req, sizeof(*req));
      const struct timespec req64 = *req;
      Result = ::nanosleep(&req64, rem64_ptr);
    } else {
      Result = ::nanosleep(nullptr, rem64_ptr);
    }

    if (rem) {
      FaultSafeUserMemAccess::VerifyIsWritable(rem, sizeof(*rem));
      *rem = rem64;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(clock_gettime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec32* tp) -> uint64_t {
    struct timespec tp64 {};
    uint64_t Result = ::clock_gettime(clk_id, &tp64);
    if (tp) {
      FaultSafeUserMemAccess::VerifyIsWritable(tp, sizeof(*tp));
      *tp = tp64;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(clock_getres, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, timespec32* tp) -> uint64_t {
    struct timespec tp64 {};
    uint64_t Result = ::clock_getres(clk_id, &tp64);
    if (tp) {
      FaultSafeUserMemAccess::VerifyIsWritable(tp, sizeof(*tp));
      *tp = tp64;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(
    clock_nanosleep, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, int flags, const timespec32* request, timespec32* remain) -> uint64_t {
      struct timespec req64 {};
      struct timespec* req64_ptr {};

      struct timespec rem64 {};
      struct timespec* rem64_ptr {};

      if (request) {
        FaultSafeUserMemAccess::VerifyIsReadable(request, sizeof(*request));
        req64 = *request;
        req64_ptr = &req64;
      }

      if (remain) {
        FaultSafeUserMemAccess::VerifyIsReadable(remain, sizeof(*remain));
        rem64 = *remain;
        rem64_ptr = &rem64;
      }

      // Can't use glibc helper here since it does additional validation and data munging that breaks games.
      uint64_t Result = ::syscall(SYSCALL_DEF(clock_nanosleep), clockid, flags, req64_ptr, rem64_ptr);

      if (remain && (flags & TIMER_ABSTIME) == 0) {
        FaultSafeUserMemAccess::VerifyIsWritable(remain, sizeof(*remain));
        // Remain is completely ignored if TIMER_ABSTIME is set.
        *remain = rem64;
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(clock_settime, [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, const timespec32* tp) -> uint64_t {
    if (!tp) {
      // clock_settime is required to pass a timespec.
      return -EFAULT;
    }

    uint64_t Result = 0;
    FaultSafeUserMemAccess::VerifyIsReadable(tp, sizeof(*tp));
    const struct timespec tp64 = *tp;
    Result = ::clock_settime(clockid, &tp64);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(futimesat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const timeval32 times[2]) -> uint64_t {
    return FEX::HLE::futimesat_compat<timeval32>(dirfd, pathname, times);
  });

  REGISTER_SYSCALL_IMPL_X32(
    utimensat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const compat_ptr<timespec32> times, int flags) -> uint64_t {
      uint64_t Result = 0;
      if (times) {
        FaultSafeUserMemAccess::VerifyIsReadable(times, sizeof(timeval32) * 2);
        timespec times64[2] {};
        times64[0] = times[0];
        times64[1] = times[1];
        Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, times64, flags);
      } else {
        Result = ::syscall(SYSCALL_DEF(utimensat), dirfd, pathname, nullptr, flags);
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(utimes, [](FEXCore::Core::CpuStateFrame* Frame, const char* filename, const timeval32 times[2]) -> uint64_t {
    uint64_t Result = 0;
    if (times) {
      FaultSafeUserMemAccess::VerifyIsReadable(times, sizeof(timeval32) * 2);
      struct timeval times64[2] {};
      times64[0] = times[0];
      times64[1] = times[1];
      Result = ::utimes(filename, times64);
    } else {
      Result = ::utimes(filename, nullptr);
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(adjtimex, [](FEXCore::Core::CpuStateFrame* Frame, compat_ptr<FEX::HLE::x32::timex32> buf) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsReadable(buf, sizeof(*buf));
    struct timex Host {};
    Host = *buf;
    uint64_t Result = ::adjtimex(&Host);
    if (Result != -1) {
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      *buf = Host;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(clock_adjtime,
                            [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clk_id, compat_ptr<FEX::HLE::x32::timex32> buf) -> uint64_t {
                              FaultSafeUserMemAccess::VerifyIsReadable(buf, sizeof(*buf));
                              struct timex Host {};
                              Host = *buf;
                              uint64_t Result = ::clock_adjtime(clk_id, &Host);
                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
                                *buf = Host;
                              }
                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Timer.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <stdint.h>
#include <syscall.h>
#include <sys/time.h>
#include <unistd.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

ARG_TO_STR(FEX::HLE::x32::compat_ptr<FEX::HLE::x32::sigevent32>, "%lx")

namespace FEX::HLE::x32 {
void RegisterTimer(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X32(timer_settime,
                            [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, int flags,
                               const FEX::HLE::x32::old_itimerspec32* new_value, FEX::HLE::x32::old_itimerspec32* old_value) -> uint64_t {
                              itimerspec new_value_host {};
                              itimerspec old_value_host {};
                              itimerspec* old_value_host_p {};

                              FaultSafeUserMemAccess::VerifyIsReadable(new_value, sizeof(*new_value));
                              new_value_host = *new_value;
                              if (old_value) {
                                old_value_host_p = &old_value_host;
                              }
                              uint64_t Result = ::syscall(SYSCALL_DEF(timer_settime), timerid, flags, &new_value_host, old_value_host_p);
                              if (Result != -1 && old_value) {
                                FaultSafeUserMemAccess::VerifyIsWritable(old_value, sizeof(*old_value));
                                *old_value = old_value_host;
                              }
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    timer_gettime, [](FEXCore::Core::CpuStateFrame* Frame, kernel_timer_t timerid, FEX::HLE::x32::old_itimerspec32* curr_value) -> uint64_t {
      itimerspec curr_value_host {};
      uint64_t Result = ::syscall(SYSCALL_DEF(timer_gettime), timerid, curr_value_host);
      FaultSafeUserMemAccess::VerifyIsWritable(curr_value, sizeof(*curr_value));
      *curr_value = curr_value_host;
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X32(getitimer, [](FEXCore::Core::CpuStateFrame* Frame, int which, FEX::HLE::x32::itimerval32* curr_value) -> uint64_t {
    itimerval val {};
    itimerval* val_p {};
    if (curr_value) {
      val_p = &val;
    }
    uint64_t Result = ::getitimer(which, val_p);
    if (curr_value) {
      FaultSafeUserMemAccess::VerifyIsWritable(curr_value, sizeof(*curr_value));
      *curr_value = val;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X32(setitimer,
                            [](FEXCore::Core::CpuStateFrame* Frame, int which, const FEX::HLE::x32::itimerval32* new_value,
                               FEX::HLE::x32::itimerval32* old_value) -> uint64_t {
                              itimerval val {};
                              itimerval old {};
                              itimerval* val_p {};
                              itimerval* old_p {};

                              if (new_value) {
                                FaultSafeUserMemAccess::VerifyIsReadable(new_value, sizeof(*new_value));
                                val = *new_value;
                                val_p = &val;
                              }

                              if (old_value) {
                                old_p = &old;
                              }

                              uint64_t Result = ::setitimer(which, val_p, old_p);

                              if (old_value) {
                                FaultSafeUserMemAccess::VerifyIsWritable(old_value, sizeof(*old_value));
                                *old_value = old;
                              }
                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X32(
    timer_create,
    [](FEXCore::Core::CpuStateFrame* Frame, clockid_t clockid, compat_ptr<FEX::HLE::x32::sigevent32> sevp, kernel_timer_t* timerid) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(sevp, sizeof(*sevp));
      sigevent Host = *sevp;
      uint64_t Result = ::syscall(SYSCALL_DEF(timer_create), clockid, &Host, timerid);
      SYSCALL_ERRNO();
    });
}
} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x32/Types.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-32
$end_info$
*/

#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include <cstddef>
#include <linux/types.h>
#include <asm/ipcbuf.h>
#include <asm/msgbuf.h>
#include <asm/sembuf.h>
#include <asm/shmbuf.h>
#include <cstdint>
#include <cstring>
#include <fcntl.h>
#include <limits>
#include <linux/mqueue.h>
#include <signal.h>
#include <sys/resource.h>
#include <sys/shm.h>
#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/times.h>
#include <sys/timex.h>
#include <sys/uio.h>
#include <time.h>
#include <type_traits>
#include <utime.h>

#include "LinuxSyscalls/Types.h"

namespace FEX::HLE::x32 {

// Basic types to make tracking easier
using compat_ulong_t = uint32_t;
using compat_long_t = int32_t;
using compat_uptr_t = uint32_t;
using compat_size_t = uint32_t;
using compat_off_t = uint32_t;
using compat_pid_t = int32_t;
using compat_dev_t = uint16_t;
using compat_ino_t = uint32_t;
using compat_mode_t = uint16_t;
using compat_nlink_t = uint16_t;
using compat_uid_t = uint16_t;
using compat_gid_t = uint16_t;
using compat_old_sigset_t = uint32_t;
using old_time32_t = int32_t;
using compat_clock_t = int32_t;
using fd_set32 = uint32_t;

// Can't use using with aligned attributes, clang doesn't honour it
typedef FEX_ALIGNED(4) uint64_t compat_uint64_t;
typedef FEX_ALIGNED(4) int64_t compat_int64_t;
typedef FEX_ALIGNED(4) int64_t compat_loff_t;

template<typename T>
class compat_ptr {
protected:
  static compat_ptr FromAddress(uint32_t In) {
    compat_ptr<T> ret;
    ret.Ptr = In;
    return ret;
  }

  compat_ptr() = default;

public:
  template<typename T2 = T, typename = std::enable_if<!std::is_same<T2, void>::value, T2>>
  T2& operator*() const {
    return *Interpret();
  }

  T* operator->() {
    return Interpret();
  }

  // In the case of non-void type, we can index the pointer
  template<typename T2 = T, typename = std::enable_if<!std::is_same<T2, void>::value, T2>>
  T2& operator[](size_t idx) const {
    return *reinterpret_cast<T2*>(Ptr + sizeof(T2) * idx);
  }

  // In the case of void type, we need to trivially convert
  template<typename T2 = T, typename = std::enable_if<std::is_same<T2, void>::value, T2>>
  operator T2*() const {
    return reinterpret_cast<T2*>(Ptr);
  }

  operator T*() const {
    return Interpret();
  }

  explicit operator bool() const noexcept {
    return !!Ptr;
  }

  explicit operator uintptr_t() const {
    return Ptr;
  }

  uint32_t Ptr;

private:
  T* Interpret() const {
    return reinterpret_cast<T*>(Ptr);
  }
};
static_assert(std::is_trivially_copyable_v<compat_ptr<void>>);
static_assert(sizeof(compat_ptr<void>) == 4);

/**
 * Helper class to import a compat_ptr from a native pointer or raw address.
 *
 * Adding these custom constructors to compat_ptr itself would trigger clang's -Wpacked-non-pod warnings.
 */
template<typename T>
class auto_compat_ptr : public compat_ptr<T> {

public:
  auto_compat_ptr(uint32_t In)
    : compat_ptr<T> {compat_ptr<T>::FromAddress(In)} {}
  auto_compat_ptr(T* In)
    : compat_ptr<T> {compat_ptr<T>::FromAddress(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(In)))} {}
};

template<typename T>
auto_compat_ptr(T*) -> auto_compat_ptr<T>;

/**
 * @name timespec32
 *
 * This is a timespec implementation that matches 32bit linux implementation
 * Provides conversation operators for the host version
 * @{ */

struct FEX_ANNOTATE("alias-x86_32-timespec") FEX_ANNOTATE("fex-match") timespec32 {
  int32_t tv_sec;
  int32_t tv_nsec;

  timespec32() = default;

  operator timespec() const {
    timespec spec {};
    spec.tv_sec = tv_sec;
    spec.tv_nsec = tv_nsec;
    return spec;
  }

  timespec32(const struct timespec& spec) {
    tv_sec = spec.tv_sec;
    tv_nsec = spec.tv_nsec;
  }
};

static_assert(std::is_trivially_copyable_v<timespec32>);
static_assert(sizeof(timespec32) == 8);
/**  @} */

/**
 * @name timeval32
 *
 * This is a timeval implementation that matches 32bit linux implementation
 * Provides conversation operators for the host version
 * @{ */

struct FEX_ANNOTATE("alias-x86_32-timeval") FEX_ANNOTATE("fex-match") timeval32 {
  int32_t tv_sec;
  int32_t tv_usec;

  timeval32() = delete;

  operator timeval() const {
    timeval spec {};
    spec.tv_sec = tv_sec;
    spec.tv_usec = tv_usec;
    return spec;
  }

  timeval32(const struct timeval& spec) {
    tv_sec = spec.tv_sec;
    tv_usec = spec.tv_usec;
  }
};
/**  @} */

static_assert(std::is_trivially_copyable_v<timeval32>);
static_assert(sizeof(timeval32) == 8);

/**
 * @name itimerval32
 *
 * This is a itimerval implementation that matches 32bit linux implementation
 * Provides conversation operators for the host version
 * @{ */

struct FEX_ANNOTATE("alias-x86_32-itimerval") FEX_ANNOTATE("fex-match") itimerval32 {
  FEX::HLE::x32::timeval32 it_interval;
  FEX::HLE::x32::timeval32 it_value;

  itimerval32() = delete;

  operator itimerval() const {
    itimerval spec {};
    spec.it_interval = it_interval;
    spec.it_value = it_value;
    return spec;
  }

  itimerval32(const struct itimerval& spec)
    : it_interval {spec.it_interval}
    , it_value {spec.it_value} {}
};
/**  @} */

static_assert(std::is_trivially_copyable_v<itimerval32>);
static_assert(sizeof(itimerval32) == 16);

/**
 * @name iovec32
 *
 * This is a iovec implementation that matches 32bit linux implementation
 * Provides conversation operators for the host version
 * @{ */

struct FEX_ANNOTATE("alias-x86_32-iovec") FEX_ANNOTATE("fex-match") iovec32 {
  uint32_t iov_base;
  uint32_t iov_len;

  iovec32() = delete;

  operator iovec() const {
    iovec vec {};
    vec.iov_base = reinterpret_cast<void*>(iov_base);
    vec.iov_len = iov_len;
    return vec;
  }

  iovec32(const struct iovec& vec) {
    iov_base = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(vec.iov_base));
    iov_len = vec.iov_len;
  }
};

static_assert(std::is_trivially_copyable_v<iovec32>);
static_assert(sizeof(iovec32) == 8);
/**  @} */

struct FEX_ANNOTATE("alias-x86_32-cmsghdr") FEX_ANNOTATE("fex-match") cmsghdr32 {
  uint32_t cmsg_len;
  int32_t cmsg_level;
  int32_t cmsg_type;
  char cmsg_data[];
};

static_assert(std::is_trivially_copyable_v<cmsghdr32>);
static_assert(sizeof(cmsghdr32) == 12);

struct FEX_ANNOTATE("alias-x86_32-msghdr") FEX_ANNOTATE("fex-match") msghdr32 {
  compat_ptr<void> msg_name;
  socklen_t msg_namelen;

  compat_ptr<iovec32> msg_iov;
  compat_size_t msg_iovlen;

  compat_ptr<void> msg_control;
  compat_size_t msg_controllen;
  int32_t msg_flags;
};

static_assert(std::is_trivially_copyable_v<msghdr32>);
static_assert(sizeof(msghdr32) == 28);

struct FEX_ANNOTATE("alias-x86_32-mmsghdr") FEX_ANNOTATE("fex-match") mmsghdr_32 {
  msghdr32 msg_hdr;
  uint32_t msg_len;
};

static_assert(std::is_trivially_copyable_v<mmsghdr_32>);
static_assert(sizeof(mmsghdr_32) == 32);

struct FEX_ANNOTATE("alias-x86_32-stack_t") FEX_ANNOTATE("fex-match") stack_t32 {
  compat_ptr<void> ss_sp;
  int32_t ss_flags;
  compat_size_t ss_size;

  stack_t32() = delete;

  operator stack_t() const {
    stack_t ss {};
    ss.ss_sp = ss_sp;
    ss.ss_flags = ss_flags;
    ss.ss_size = ss_size;
    return ss;
  }

  stack_t32(const stack_t& ss)
    : ss_sp {auto_compat_ptr {ss.ss_sp}} {
    ss_flags = ss.ss_flags;
    ss_size = ss.ss_size;
  }
};

static_assert(std::is_trivially_copyable_v<stack_t32>);
static_assert(sizeof(stack_t32) == 12);

struct
  // This does not match the glibc implementation of stat
  // Matches the definition of `struct compat_stat` in `arch/x86/include/asm/compat.h`
  FEX_ANNOTATE("fex-match") oldstat32 {
  uint16_t st_dev;
  uint16_t st_ino;
  uint16_t st_mode;
  uint16_t st_nlink;

  uint16_t st_uid;
  uint16_t st_gid;
  uint16_t st_rdev;

  uint32_t st_size;
  uint32_t st_atime_;
  uint32_t st_mtime_;
  uint32_t st_ctime_;

  oldstat32() = delete;

  oldstat32(const struct stat& host) {
#define COPY(x) x = host.x
    const uint32_t MINORBITS = 20;
    const uint32_t MINORMASK = (1U << MINORBITS) - 1;
    auto EncodeOld = [](dev_t dev) -> uint16_t {
      // This is a bit weird
      return ((dev >> MINORBITS) << 8) | (dev & MINORMASK);
    };

    st_dev = EncodeOld(host.st_dev);
    COPY(st_ino);
    COPY(st_mode);
    COPY(st_nlink);

    COPY(st_uid);
    COPY(st_gid);
    st_rdev = EncodeOld(host.st_rdev);

    COPY(st_size);

    st_atime_ = host.st_atim.tv_sec;
    st_mtime_ = host.st_mtime;
    st_ctime_ = host.st_ctime;
#undef COPY
  }
};
static_assert(std::is_trivially_copyable_v<oldstat32>);
static_assert(sizeof(oldstat32) == 32);

struct
  // This does not match the glibc implementation of stat
  // Matches the definition of `struct compat_stat` in `arch/x86/include/asm/compat.h`
  FEX_ANNOTATE("fex-match") stat32 {
  compat_dev_t st_dev;
  uint16_t __pad1;
  compat_ino_t st_ino;
  compat_mode_t st_mode;
  compat_nlink_t st_nlink;

  compat_uid_t st_uid;
  compat_gid_t st_gid;
  compat_dev_t st_rdev;

  uint16_t __pad2;
  uint32_t st_size;
  uint32_t st_blksize;
  uint32_t st_blocks; /* Number 512-byte blocks allocated. */
  uint32_t st_atime_;
  uint32_t fex_st_atime_nsec;
  uint32_t st_mtime_;
  uint32_t fex_st_mtime_nsec;
  uint32_t st_ctime_;
  uint32_t fex_st_ctime_nsec;
  uint32_t __unused4;
  uint32_t __unused5;

  stat32() = delete;

  stat32(const struct stat& host) {
#define COPY(x) x = host.x
    COPY(st_dev);
    COPY(st_ino);
    COPY(st_mode);
    COPY(st_nlink);

    COPY(st_uid);
    COPY(st_gid);
    COPY(st_rdev);

    COPY(st_size);
    COPY(st_blksize);
    COPY(st_blocks);

    st_atime_ = host.st_atim.tv_sec;
    fex_st_atime_nsec = host.st_atim.tv_nsec;

    st_mtime_ = host.st_mtime;
    fex_st_mtime_nsec = host.st_mtim.tv_nsec;

    st_ctime_ = host.st_ctime;
    fex_st_ctime_nsec = host.st_ctim.tv_nsec;
#undef COPY
    __pad1 = __pad2 = __unused4 = __unused5 = 0;
  }
};
static_assert(std::is_trivially_copyable_v<stat32>);
static_assert(sizeof(stat32) == 64);

struct
  // This does not match the glibc implementation of stat
  // Matches the definition of `struct stat64` in `x86_64-linux-gnu/asm/stat.h`
  FEX_ANNOTATE("fex-match") FEX_PACKED stat64_32 {
  compat_uint64_t st_dev;
  uint8_t __pad0[4];
  uint32_t __st_ino;

  uint32_t st_mode;
  uint32_t st_nlink;

  uint32_t st_uid;
  uint32_t st_gid;

  compat_uint64_t st_rdev;
  uint8_t __pad3[4];
  compat_int64_t st_size;
  uint32_t st_blksize;
  compat_uint64_t st_blocks; /* Number 512-byte blocks allocated. */
  uint32_t st_atime_;
  uint32_t fex_st_atime_nsec;
  uint32_t st_mtime_;
  uint32_t fex_st_mtime_nsec;
  uint32_t st_ctime_;
  uint32_t fex_st_ctime_nsec;
  compat_uint64_t st_ino;

  stat64_32() = delete;

  stat64_32(const struct stat& host) {
#define COPY(x) x = host.x
    COPY(st_dev);
    COPY(st_ino);
    COPY(st_nlink);

    COPY(st_mode);
    COPY(st_uid);
    COPY(st_gid);

    COPY(st_rdev);
    COPY(st_size);
    COPY(st_blksize);
    COPY(st_blocks);

    __st_ino = host.st_ino;

    st_atime_ = host.st_atim.tv_sec;
    fex_st_atime_nsec = host.st_atim.tv_nsec;

    st_mtime_ = host.st_mtime;
    fex_st_mtime_nsec = host.st_mtim.tv_nsec;

    st_ctime_ = host.st_ctime;
    fex_st_ctime_nsec = host.st_ctim.tv_nsec;
#undef COPY
  }

#ifndef stat64
  stat64_32(const struct stat64& host) {
#define COPY(x) x = host.x
    COPY(st_dev);
    COPY(st_ino);
    COPY(st_nlink);

    COPY(st_mode);
    COPY(st_uid);
    COPY(st_gid);

    COPY(st_rdev);
    COPY(st_size);
    COPY(st_blksize);
    COPY(st_blocks);

    __st_ino = host.st_ino;

    st_atime_ = host.st_atim.tv_sec;
    fex_st_atime_nsec = host.st_atim.tv_nsec;

    st_mtime_ = host.st_mtime;
    fex_st_mtime_nsec = host.st_mtim.tv_nsec;

    st_ctime_ = host.st_ctime;
    fex_st_ctime_nsec = host.st_ctim.tv_nsec;
#undef COPY
  }
#endif
};
static_assert(std::is_trivially_copyable_v<stat64_32>);
static_assert(sizeof(stat64_32) == 96);

struct FEX_PACKED FEX_ALIGNED(4) FEX_ANNOTATE("alias-x86_32-statfs64") FEX_ANNOTATE("fex-match") statfs64_32 {
  uint32_t f_type;
  uint32_t f_bsize;
  compat_uint64_t f_blocks;
  compat_uint64_t f_bfree;
  compat_uint64_t f_bavail;
  compat_uint64_t f_files;
  compat_uint64_t f_ffree;
  __kernel_fsid_t f_fsid;
  uint32_t f_namelen;
  uint32_t f_frsize;
  uint32_t f_flags;
  uint32_t pad[4];

  statfs64_32() = delete;

  statfs64_32(const struct statfs& host) {
#define COPY(x) x = host.x
    COPY(f_type);
    COPY(f_bsize);
    COPY(f_blocks);
    COPY(f_bfree);
    COPY(f_bavail);
    COPY(f_files);
    COPY(f_ffree);
    COPY(f_namelen);
    COPY(f_frsize);
    COPY(f_flags);

    memcpy(&f_fsid, &host.f_fsid, sizeof(f_fsid));
#undef COPY
  }

#ifndef statfs64
  statfs64_32(const struct statfs64& host) {
#define COPY(x) x = host.x
    COPY(f_type);
    COPY(f_bsize);
    COPY(f_blocks);
    COPY(f_bfree);
    COPY(f_bavail);
    COPY(f_files);
    COPY(f_ffree);
    COPY(f_namelen);
    COPY(f_frsize);
    COPY(f_flags);

    memcpy(&f_fsid, &host.f_fsid, sizeof(f_fsid));
#undef COPY
  }
#endif
};
static_assert(std::is_trivially_copyable_v<statfs64_32>);
static_assert(sizeof(statfs64_32) == 84);

struct FEX_ANNOTATE("alias-x86_32-statfs") FEX_ANNOTATE("fex-match") statfs32_32 {
  int32_t f_type;
  int32_t f_bsize;
  int32_t f_blocks;
  int32_t f_bfree;
  int32_t f_bavail;
  int32_t f_files;
  int32_t f_ffree;
  __kernel_fsid_t f_fsid;
  int32_t f_namelen;
  int32_t f_frsize;
  int32_t f_flags;
  int32_t pad[4];

  statfs32_32() = delete;

  statfs32_32(const struct statfs& host) {
#define COPY(x) x = host.x
    COPY(f_type);
    COPY(f_bsize);
    COPY(f_blocks);
    COPY(f_bfree);
    COPY(f_bavail);
    COPY(f_files);
    COPY(f_ffree);
    COPY(f_namelen);
    COPY(f_frsize);
    COPY(f_flags);

    memcpy(&f_fsid, &host.f_fsid, sizeof(f_fsid));
#undef COPY
  }

#ifndef statfs64
  statfs32_32(struct statfs64 host) {
#define COPY(x) x = host.x
    COPY(f_type);
    COPY(f_bsize);
    COPY(f_blocks);
    COPY(f_bfree);
    COPY(f_bavail);
    COPY(f_files);
    COPY(f_ffree);
    COPY(f_namelen);
    COPY(f_frsize);
    COPY(f_flags);

    memcpy(&f_fsid, &host.f_fsid, sizeof(f_fsid));
#undef COPY
  }
#endif
};
static_assert(std::is_trivially_copyable_v<statfs32_32>);
static_assert(sizeof(statfs32_32) == 64);

struct FEX_ANNOTATE("alias-x86_32-flock") FEX_ANNOTATE("fex-match") flock_32 {
  int16_t l_type;
  int16_t l_whence;
  int32_t l_start;
  int32_t l_len;
  int32_t l_pid;

  flock_32() = delete;

  flock_32(const struct flock& host) {
    l_type = host.l_type;
    l_whence = host.l_whence;
    l_start = host.l_start;
    l_len = host.l_len;
    l_pid = host.l_pid;
  }

  operator struct flock() const {
    struct flock res {};
    res.l_type = l_type;
    res.l_whence = l_whence;
    res.l_start = l_start;
    res.l_len = l_len;
    res.l_pid = l_pid;
    return res;
  }
};

static_assert(std::is_trivially_copyable_v<flock_32>);
static_assert(sizeof(flock_32) == 16);

// glibc doesn't pack flock64 while the kernel does
// This does not match glibc flock64 definition
// Matches the definition of `struct compat_flock64` in `arch/x86/include/asm/compat.h`
struct FEX_ANNOTATE("fex-match") FEX_PACKED flock64_32 {
  int16_t l_type;
  int16_t l_whence;
  compat_loff_t l_start;
  compat_loff_t l_len;
  compat_pid_t l_pid;

  flock64_32() = delete;

  flock64_32(const struct flock& host) {
    l_type = host.l_type;
    l_whence = host.l_whence;
    l_start = host.l_start;
    l_len = host.l_len;
    l_pid = host.l_pid;
  }

  operator struct flock() const {
    struct flock res {};
    res.l_type = l_type;
    res.l_whence = l_whence;
    res.l_start = l_start;
    res.l_len = l_len;
    res.l_pid = l_pid;
    return res;
  }
};
static_assert(std::is_trivially_copyable_v<flock64_32>);
static_assert(sizeof(flock64_32) == 24);

// There is no public definition of this struct
// Matches the definition of `struct linux_dirent` in fs/readdir.c
struct FEX_ANNOTATE("fex-match") linux_dirent {
  compat_uint64_t d_ino;
  compat_int64_t d_off;
  uint16_t d_reclen;
  uint8_t _pad[6];
  char d_name[];
};
static_assert(std::is_trivially_copyable_v<linux_dirent>);
static_assert(sizeof(linux_dirent) == 24);

// There is no public definition of this struct
// Matches the definition of `struct compat_linux_dirent` in fs/readdir.c
struct FEX_ANNOTATE("fex-match") linux_dirent_32 {
  compat_ulong_t d_ino;
  compat_ulong_t d_off;
  uint16_t d_reclen;
  char d_name[1];
  /* Has hidden null character and d_type */
};
static_assert(std::is_trivially_copyable_v<linux_dirent_32>);
static_assert(sizeof(linux_dirent_32) == 12);

// There is no public definition of this struct
// Matches the definition of `struct linux_dirent64` in include/linux/dirent.h
struct FEX_ANNOTATE("fex-match") linux_dirent_64 {
  compat_uint64_t d_ino;
  compat_uint64_t d_off;
  uint16_t d_reclen;
  uint8_t d_type;
  uint8_t _pad[5];
  char d_name[];
};
static_assert(std::is_trivially_copyable_v<linux_dirent_64>);
static_assert(sizeof(linux_dirent_64) == 24);

// There is no public definition of this struct
// Matches `struct compat_sigset_argpack`
struct FEX_ANNOTATE("fex-match") sigset_argpack32 {
  compat_ptr<uint64_t> sigset;
  compat_size_t size;
};

static_assert(std::is_trivially_copyable_v<sigset_argpack32>);
static_assert(sizeof(sigset_argpack32) == 8);

struct FEX_ANNOTATE("alias-x86_32-rusage") FEX_ANNOTATE("fex-match") rusage_32 {
  timeval32 ru_utime;
  timeval32 ru_stime;
  union {
    compat_long_t ru_maxrss;
    compat_long_t __ru_maxrss_word;
  };
  union {
    compat_long_t ru_ixrss;
    compat_long_t __ru_ixrss_word;
  };
  union {
    compat_long_t ru_idrss;
    compat_long_t __ru_idrss_word;
  };
  union {
    compat_long_t ru_isrss;
    compat_long_t __ru_isrss_word;
  };
  union {
    compat_long_t ru_minflt;
    compat_long_t __ru_minflt_word;
  };
  union {
    compat_long_t ru_majflt;
    compat_long_t __ru_majflt_word;
  };
  union {
    compat_long_t ru_nswap;
    compat_long_t __ru_nswap_word;
  };
  union {
    compat_long_t ru_inblock;
    compat_long_t __ru_inblock_word;
  };
  union {
    compat_long_t ru_oublock;
    compat_long_t __ru_oublock_word;
  };
  union {
    compat_long_t ru_msgsnd;
    compat_long_t __ru_msgsnd_word;
  };
  union {
    compat_long_t ru_msgrcv;
    compat_long_t __ru_msgrcv_word;
  };
  union {
    compat_long_t ru_nsignals;
    compat_long_t __ru_nsignals_word;
  };
  union {
    compat_long_t ru_nvcsw;
    compat_long_t __ru_nvcsw_word;
  };
  union {
    compat_long_t ru_nivcsw;
    compat_long_t __ru_nivcsw_word;
  };

  rusage_32() = delete;
  rusage_32(const struct rusage& usage)
    : ru_utime {usage.ru_utime}
    , ru_stime {usage.ru_stime} {
    // These only truncate
    ru_maxrss = usage.ru_maxrss;
    ru_ixrss = usage.ru_ixrss;
    ru_idrss = usage.ru_idrss;
    ru_isrss = usage.ru_isrss;
    ru_minflt = usage.ru_minflt;
    ru_majflt = usage.ru_majflt;
    ru_nswap = usage.ru_nswap;
    ru_inblock = usage.ru_inblock;
    ru_oublock = usage.ru_oublock;
    ru_msgsnd = usage.ru_msgsnd;
    ru_msgrcv = usage.ru_msgrcv;
    ru_nsignals = usage.ru_nsignals;
    ru_nvcsw = usage.ru_nvcsw;
    ru_nivcsw = usage.ru_nivcsw;
  }

  operator struct rusage() const {
    struct rusage usage {};
    usage.ru_utime = ru_utime;
    usage.ru_stime = ru_stime;
    usage.ru_maxrss = ru_maxrss;
    usage.ru_ixrss = ru_ixrss;
    usage.ru_idrss = ru_idrss;
    usage.ru_isrss = ru_isrss;
    usage.ru_minflt = ru_minflt;
    usage.ru_majflt = ru_majflt;
    usage.ru_nswap = ru_nswap;
    usage.ru_inblock = ru_inblock;
    usage.ru_oublock = ru_oublock;
    usage.ru_msgsnd = ru_msgsnd;
    usage.ru_msgrcv = ru_msgrcv;
    usage.ru_nsignals = ru_nsignals;
    usage.ru_nvcsw = ru_nvcsw;
    usage.ru_nivcsw = ru_nivcsw;

    return usage;
  }
};
static_assert(std::is_trivially_copyable_v<rusage_32>);
static_assert(sizeof(rusage_32) == 72);

struct FEX_PACKED FEX_ANNOTATE("fex-match") OldGuestSigAction_32 {
  FEX::HLE::x32::compat_ptr<void> handler_32;
  uint32_t sa_mask;
  uint32_t sa_flags;
  FEX::HLE::x32::compat_ptr<void> restorer_32;

  OldGuestSigAction_32() = delete;

  operator FEX::HLE::GuestSigAction() const {
    FEX::HLE::GuestSigAction action {};

    action.sigaction_handler.handler = reinterpret_cast<decltype(action.sigaction_handler.handler)>(handler_32.Ptr);
    action.sa_flags = sa_flags;
    action.restorer = reinterpret_cast<decltype(action.restorer)>(restorer_32.Ptr);
    action.sa_mask.Val = sa_mask;
    return action;
  }

  OldGuestSigAction_32(const FEX::HLE::GuestSigAction& action)
    : handler_32 {auto_compat_ptr {reinterpret_cast<void*>(action.sigaction_handler.handler)}}
    , restorer_32 {auto_compat_ptr {reinterpret_cast<void*>(action.restorer)}} {
    sa_flags = action.sa_flags;
    sa_mask = action.sa_mask.Val;
  }
};

static_assert(std::is_trivially_copyable_v<OldGuestSigAction_32>);
static_assert(sizeof(OldGuestSigAction_32) == 16);

// This definition isn't public
// This is for rt_sigaction
// Matches the definition for `struct compat_sigaction` in `include/linux/compat.h`
struct FEX_PACKED FEX_ANNOTATE("fex-match") GuestSigAction_32 {
  FEX::HLE::x32::compat_ptr<void> handler_32;

  uint32_t sa_flags;
  FEX::HLE::x32::compat_ptr<void> restorer_32;
  FEX::HLE::GuestSAMask sa_mask;

  GuestSigAction_32() = delete;

  operator FEX::HLE::GuestSigAction() const {
    FEX::HLE::GuestSigAction action {};

    action.sigaction_handler.handler = reinterpret_cast<decltype(action.sigaction_handler.handler)>(handler_32.Ptr);
    action.sa_flags = sa_flags;
    action.restorer = reinterpret_cast<decltype(action.restorer)>(restorer_32.Ptr);
    action.sa_mask = sa_mask;
    return action;
  }

  GuestSigAction_32(const FEX::HLE::GuestSigAction& action)
    : handler_32 {auto_compat_ptr {reinterpret_cast<void*>(action.sigaction_handler.handler)}}
    , restorer_32 {auto_compat_ptr {reinterpret_cast<void*>(action.restorer)}} {
    sa_flags = action.sa_flags;
    sa_mask = action.sa_mask;
  }
};

static_assert(std::is_trivially_copyable_v<GuestSigAction_32>);
static_assert(sizeof(GuestSigAction_32) == 20);

struct FEX_ANNOTATE("alias-x86_32-tms") FEX_ANNOTATE("fex-match") compat_tms {
  compat_clock_t tms_utime;
  compat_clock_t tms_stime;
  compat_clock_t tms_cutime;
  compat_clock_t tms_cstime;

  compat_tms() = delete;
  operator tms() const {
    tms val {};
    val.tms_utime = tms_utime;
    val.tms_stime = tms_stime;
    val.tms_cutime = tms_cutime;
    val.tms_cstime = tms_cstime;
    return val;
  }
  compat_tms(const struct tms& val) {
    tms_utime = val.tms_utime;
    tms_stime = val.tms_stime;
    tms_cutime = val.tms_cutime;
    tms_cstime = val.tms_cstime;
  }
};

static_assert(std::is_trivially_copyable_v<compat_tms>);
static_assert(sizeof(compat_tms) == 16);

struct FEX_ANNOTATE("alias-x86_32-utimbuf") FEX_ANNOTATE("fex-match") old_utimbuf32 {
  old_time32_t actime;
  old_time32_t modtime;

  old_utimbuf32() = delete;
  operator utimbuf() const {
    utimbuf val {};
    val.actime = actime;
    val.modtime = modtime;
    return val;
  }

  old_utimbuf32(const struct utimbuf& val) {
    actime = val.actime;
    modtime = val.modtime;
  }
};

static_assert(std::is_trivially_copyable_v<old_utimbuf32>);
static_assert(sizeof(old_utimbuf32) == 8);

struct FEX_ANNOTATE("alias-x86_32-itimerspec") FEX_ANNOTATE("fex-match") old_itimerspec32 {
  timespec32 it_interval;
  timespec32 it_value;

  old_itimerspec32() = delete;
  operator itimerspec() const {
    itimerspec val {};
    val.it_interval = it_interval;
    val.it_value = it_value;
    return val;
  }

  old_itimerspec32(const struct itimerspec& val)
    : it_interval {val.it_interval}
    , it_value {val.it_value} {}
};

static_assert(std::is_trivially_copyable_v<old_itimerspec32>);
static_assert(sizeof(old_itimerspec32) == 16);

template<bool Signed>
struct FEX_ANNOTATE("alias-x86_32-rlimit") FEX_ANNOTATE("fex-match") rlimit32 {
  uint32_t rlim_cur;
  uint32_t rlim_max;
  rlimit32() = delete;

  operator rlimit() const {
    static_assert(Signed == false, "Signed variant doesn't exist");
    rlimit val {};

    val.rlim_cur = rlim_cur;
    val.rlim_max = rlim_max;

    if (val.rlim_cur == ~0U) {
      val.rlim_cur = ~0UL;
    }
    if (val.rlim_max == ~0U) {
      val.rlim_max = ~0UL;
    }

    return val;
  }

  rlimit32(const struct rlimit& val) {
    constexpr uint32_t Limit = Signed ? 0x7FFF'FFFF : 0xFFFF'FFFF;
    if (val.rlim_cur > Limit) {
      rlim_cur = Limit;
    } else {
      rlim_cur = val.rlim_cur;
    }

    if (val.rlim_max > Limit) {
      rlim_max = Limit;
    } else {
      rlim_max = val.rlim_max;
    }
  }
};

static_assert(std::is_trivially_copyable_v<rlimit32<true>>);
static_assert(sizeof(rlimit32<true>) == 8);

struct FEX_ANNOTATE("alias-x86_32-timex") FEX_ANNOTATE("fex-match") timex32 {
  uint32_t modes;
  compat_long_t offset;
  compat_long_t freq;
  compat_long_t maxerror;
  compat_long_t esterror;
  int32_t status;
  compat_long_t constant;
  compat_long_t precision;
  compat_long_t tolerance;
  timeval32 time;
  compat_long_t tick;
  compat_long_t ppsfreq;
  compat_long_t jitter;
  int32_t shift;
  compat_long_t stabil;
  compat_long_t jitcnt;
  compat_long_t calcnt;
  compat_long_t errcnt;
  compat_long_t stbcnt;

  int32_t tai;

  // Padding
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;
  int32_t : 32;

  timex32() = delete;

  operator timex() const {
    timex val {};
    val.modes = modes;
    val.offset = offset;
    val.freq = freq;
    val.maxerror = maxerror;
    val.esterror = esterror;
    val.status = status;
    val.constant = constant;
    val.precision = precision;
    val.tolerance = tolerance;
    val.time = time;
    val.tick = tick;
    val.ppsfreq = ppsfreq;
    val.jitter = jitter;
    val.shift = shift;
    val.stabil = stabil;
    val.jitcnt = jitcnt;
    val.calcnt = calcnt;
    val.errcnt = errcnt;
    val.stbcnt = stbcnt;
    val.tai = tai;
    return val;
  }

  timex32(const struct timex& val)
    : time {val.time} {
    modes = val.modes;
    offset = val.offset;
    freq = val.freq;
    maxerror = val.maxerror;
    esterror = val.esterror;
    status = val.status;
    constant = val.constant;
    precision = val.precision;
    tolerance = val.tolerance;
    tick = val.tick;
    ppsfreq = val.ppsfreq;
    jitter = val.jitter;
    shift = val.shift;
    stabil = val.stabil;
    jitcnt = val.jitcnt;
    calcnt = val.calcnt;
    errcnt = val.errcnt;
    stbcnt = val.stbcnt;
    tai = val.tai;
  }
};

static_assert(std::is_trivially_copyable_v<timex32>);
static_assert(sizeof(timex32) == 128);

union FEX_ANNOTATE("alias-x86_32-sigval") FEX_ANNOTATE("fex-match") sigval32 {
  int sival_int;
  compat_ptr<void> sival_ptr;

  sigval32() = delete;

  operator sigval() const {
    sigval val {};
    val.sival_ptr = sival_ptr;
    return val;
  }

  sigval32(sigval val) {
    sival_ptr = auto_compat_ptr {val.sival_ptr};
  }
};

static_assert(std::is_trivially_copyable_v<sigval32>);
static_assert(sizeof(sigval32) == 4);

constexpr size_t FEX_SIGEV_MAX_SIZE = 64;
constexpr size_t FEX_SIGEV_PAD_SIZE = (FEX_SIGEV_MAX_SIZE - (sizeof(int32_t) * 2 + sizeof(sigval32))) / sizeof(int32_t);

struct FEX_ANNOTATE("fex-match") sigevent32 {
  FEX::HLE::x32::sigval32 sigev_value;
  int sigev_signo;
  int sigev_notify;
  union {
    int _pad[FEX_SIGEV_PAD_SIZE];
    int _tid;
    struct {
      uint32_t _function;
      uint32_t _attribute;
    } _sigev_thread;
  } _sigev_un;

  sigevent32() = delete;

// For older build environments
#ifndef sigev_notify_thread_id
#define sigev_notify_thread_id _sigev_un._tid
#endif

  operator sigevent() const {
    sigevent val {};
    val.sigev_value = sigev_value;
    val.sigev_signo = sigev_signo;
    val.sigev_notify = sigev_notify;

    if (sigev_notify == SIGEV_THREAD_ID) {
      val.sigev_notify_thread_id = _sigev_un._tid;
    } else if (sigev_notify == SIGEV_THREAD) {
      val.sigev_notify_function = reinterpret_cast<void (*)(sigval)>(_sigev_un._sigev_thread._function);
      val.sigev_notify_attributes = reinterpret_cast<pthread_attr_t*>(_sigev_un._sigev_thread._attribute);
    }
    return val;
  }

  sigevent32(const sigevent& val)
    : sigev_value {val.sigev_value} {
    sigev_signo = val.sigev_signo;
    sigev_notify = val.sigev_notify;

    if (sigev_notify == SIGEV_THREAD_ID) {
      _sigev_un._tid = val.sigev_notify_thread_id;
    } else if (sigev_notify == SIGEV_THREAD) {
      _sigev_un._sigev_thread._function = static_cast<uint32_t>(reinterpret_cast<uint64_t>(val.sigev_notify_function));
      _sigev_un._sigev_thread._attribute = static_cast<uint32_t>(reinterpret_cast<uint64_t>(val.sigev_notify_attributes));
    }
  }
};

static_assert(std::is_trivially_copyable_v<sigval32>);
static_assert(sizeof(sigval32) == 4);

struct FEX_ANNOTATE("alias-x86_32-mq_attr") FEX_ANNOTATE("fex-match") mq_attr32 {
  compat_long_t mq_flags;
  compat_long_t mq_maxmsg;
  compat_long_t mq_msgsize;
  compat_long_t mq_curmsgs;
  compat_long_t __pad[4];
  mq_attr32() = delete;

  operator mq_attr() const {
    struct mq_attr val {};
    val.mq_flags = mq_flags;
    val.mq_maxmsg = mq_maxmsg;
    val.mq_msgsize = mq_msgsize;
    val.mq_curmsgs = mq_curmsgs;
    return val;
  }

  mq_attr32(const struct mq_attr& val) {
    mq_flags = val.mq_flags;
    mq_maxmsg = val.mq_maxmsg;
    mq_msgsize = val.mq_msgsize;
    mq_curmsgs = val.mq_curmsgs;
  }
};

static_assert(std::is_trivially_copyable_v<mq_attr32>);
static_assert(sizeof(mq_attr32) == 32);

union FEX_ANNOTATE("alias-x86_32-epoll_data_t") FEX_ANNOTATE("fex-match") epoll_data32 {
  compat_ptr<void> ptr;
  int fd;
  uint32_t u32;
  compat_uint64_t u64;
};

struct FEX_PACKED FEX_ANNOTATE("alias-x86_32-epoll_event") FEX_ANNOTATE("fex-match") epoll_event32 {
  uint32_t events;
  epoll_data32 data;

  epoll_event32() = delete;

  operator struct epoll_event() const {
    epoll_event event {};
    event.events = events;
    event.data.u64 = data.u64;
    return event;
  }

  epoll_event32(const struct epoll_event& event)
    : data {auto_compat_ptr<void> {static_cast<uint32_t>(event.data.u64)}} {
    events = event.events;
  }
};
static_assert(std::is_trivially_copyable_v<epoll_event32>);
static_assert(sizeof(epoll_event32) == 12);

struct ipc_perm_32 {
  uint32_t key;
  uint16_t uid;
  uint16_t gid;
  uint16_t cuid;
  uint16_t cgid;
  uint16_t mode;
  uint16_t seq;

  ipc_perm_32() = delete;

  operator struct ipc64_perm() const {
    struct ipc64_perm perm {};
    perm.key = key;
    perm.uid = uid;
    perm.gid = gid;
    perm.cuid = cuid;
    perm.cgid = cgid;
    perm.mode = mode;
    perm.seq = seq;
    return perm;
  }

  ipc_perm_32(const struct ipc64_perm& perm) {
    key = perm.key;
    uid = perm.uid;
    gid = perm.gid;
    cuid = perm.cuid;
    cgid = perm.cgid;
    mode = perm.mode;
    seq = perm.seq;
  }
};

static_assert(std::is_trivially_copyable_v<ipc_perm_32>);
static_assert(sizeof(ipc_perm_32) == 16);

struct ipc_perm_64 {
  uint32_t key;
  uint32_t uid;
  uint32_t gid;
  uint32_t cuid;
  uint32_t cgid;
  uint16_t mode;
  uint16_t _pad1;
  uint16_t seq;
  uint16_t _pad2;
  compat_ulong_t _pad[2];

  ipc_perm_64() = delete;

  operator struct ipc64_perm() const {
    struct ipc64_perm perm {};
    perm.key = key;
    perm.uid = uid;
    perm.gid = gid;
    perm.cuid = cuid;
    perm.cgid = cgid;
    perm.mode = mode;
    perm.seq = seq;
    return perm;
  }

  ipc_perm_64(const struct ipc64_perm& perm) {
    key = perm.key;
    uid = perm.uid;
    gid = perm.gid;
    cuid = perm.cuid;
    cgid = perm.cgid;
    mode = perm.mode;
    seq = perm.seq;
    _pad1 = _pad2 = 0;
  }
};

static_assert(std::is_trivially_copyable_v<ipc_perm_64>);
static_assert(sizeof(ipc_perm_64) == 36);

struct shmid_ds_32 {
  ipc_perm_32 shm_perm;
  int32_t shm_segsz;
  int32_t shm_atime;
  int32_t shm_dtime;
  int32_t shm_ctime;
  uint16_t shm_cpid;
  uint16_t shm_lpid;
  uint16_t shm_nattch;
  uint16_t shm_unused;
  uint32_t shm_unused2;
  uint32_t shm_unused3;

  shmid_ds_32() = delete;

  operator struct shmid64_ds() const {
    struct shmid64_ds buf {};
    buf.shm_perm = shm_perm;

    buf.shm_segsz = shm_segsz;
    buf.shm_atime = shm_atime;
    buf.shm_dtime = shm_dtime;
    buf.shm_ctime = shm_ctime;
    buf.shm_cpid = shm_cpid;
    buf.shm_lpid = shm_lpid;
    buf.shm_nattch = shm_nattch;
    return buf;
  }

  shmid_ds_32(const struct shmid64_ds& buf)
    : shm_perm {buf.shm_perm} {
    shm_segsz = buf.shm_segsz;
    shm_atime = buf.shm_atime;
    shm_dtime = buf.shm_dtime;
    shm_ctime = buf.shm_ctime;
    shm_cpid = buf.shm_cpid;
    shm_lpid = buf.shm_lpid;
    shm_nattch = buf.shm_nattch;
    shm_unused = 0;
    shm_unused2 = 0;
    shm_unused3 = 0;
  }
};

static_assert(std::is_trivially_copyable_v<shmid_ds_32>);
static_assert(sizeof(shmid_ds_32) == 48);

struct shmid_ds_64 {
  ipc_perm_64 shm_perm;
  compat_size_t shm_segsz;
  compat_ulong_t shm_atime;
  compat_ulong_t shm_atime_high;
  compat_ulong_t shm_dtime;
  compat_ulong_t shm_dtime_high;
  compat_ulong_t shm_ctime;
  compat_ulong_t shm_ctime_high;
  int32_t shm_cpid;
  int32_t shm_lpid;
  compat_ulong_t shm_nattch;
  compat_ulong_t shm_unused4;
  compat_ulong_t shm_unused5;

  shmid_ds_64() = delete;

  operator struct shmid64_ds() const {
    struct shmid64_ds buf {};
    buf.shm_perm = shm_perm;

    buf.shm_segsz = shm_segsz;
    buf.shm_atime = shm_atime_high;
    buf.shm_atime <<= 32;
    buf.shm_atime |= shm_atime;

    buf.shm_dtime = shm_dtime_high;
    buf.shm_dtime <<= 32;
    buf.shm_dtime |= shm_dtime;

    buf.shm_ctime = shm_ctime_high;
    buf.shm_ctime <<= 32;
    buf.shm_ctime |= shm_ctime;

    buf.shm_cpid = shm_cpid;
    buf.shm_lpid = shm_lpid;
    buf.shm_nattch = shm_nattch;
    return buf;
  }

  shmid_ds_64(const struct shmid64_ds& buf)
    : shm_perm {buf.shm_perm} {
    shm_segsz = buf.shm_segsz;
    shm_atime = buf.shm_atime;
    shm_atime_high = buf.shm_atime >> 32;
    shm_dtime = buf.shm_dtime;
    shm_dtime_high = buf.shm_dtime >> 32;
    shm_ctime = buf.shm_ctime;
    shm_ctime_high = buf.shm_ctime >> 32;
    shm_cpid = buf.shm_cpid;
    shm_lpid = buf.shm_lpid;
    shm_nattch = buf.shm_nattch;
    shm_unused4 = shm_unused5 = 0;
  }
};

static_assert(std::is_trivially_copyable_v<shmid_ds_64>);
static_assert(sizeof(shmid_ds_64) == 84);

struct semid_ds_32 {
  struct ipc_perm_32 sem_perm;
  int32_t sem_otime;
  int32_t sem_ctime;
  uint32_t sem_base;
  uint32_t sem_pending;
  uint32_t sem_pending_last;
  uint32_t undo;
  uint16_t sem_nsems;
  uint16_t _pad;

  semid_ds_32() = delete;

  operator struct semid64_ds() const {
    struct semid64_ds buf {};
    buf.sem_perm = sem_perm;

    buf.sem_otime = sem_otime;
    buf.sem_ctime = sem_ctime;
    buf.sem_nsems = sem_nsems;

    // sem_base, sem_pending, sem_pending_last, undo doesn't exist in the definition
    // Kernel doesn't return anything in them
    return buf;
  }

  semid_ds_32(const struct semid64_ds& buf)
    : sem_perm {buf.sem_perm} {
    sem_otime = buf.sem_otime;
    sem_ctime = buf.sem_ctime;
    sem_nsems = buf.sem_nsems;
    sem_base = sem_pending = sem_pending_last = undo = _pad = 0;
  }
};

static_assert(std::is_trivially_copyable_v<semid_ds_32>);
static_assert(sizeof(semid_ds_32) == 44);

struct semid_ds_64 {
  struct ipc_perm_64 sem_perm;
  uint32_t sem_otime;
  uint32_t sem_otime_high;
  uint32_t sem_ctime;
  uint32_t sem_ctime_high;
  uint32_t sem_nsems;
  uint32_t _pad[2];

  semid_ds_64() = delete;

  operator struct semid64_ds() const {
    struct semid64_ds buf {};
    buf.sem_perm = sem_perm;

    buf.sem_otime = sem_otime_high;
    buf.sem_otime <<= 32;
    buf.sem_otime |= sem_otime;
    buf.sem_ctime = sem_ctime_high;
    buf.sem_ctime <<= 32;
    buf.sem_ctime |= sem_ctime;
    buf.sem_nsems = sem_nsems;

    // sem_base, sem_pending, sem_pending_last, undo doesn't exist in the definition
    // Kernel doesn't return anything in them
    return buf;
  }

  semid_ds_64(const struct semid64_ds& buf)
    : sem_perm {buf.sem_perm} {
    sem_otime = buf.sem_otime;
    sem_otime_high = buf.sem_otime >> 32;
    sem_ctime = buf.sem_ctime;
    sem_ctime_high = buf.sem_ctime >> 32;
    sem_nsems = buf.sem_nsems;
  }
};

static_assert(std::is_trivially_copyable_v<semid_ds_64>);
static_assert(sizeof(semid_ds_64) == 64);

struct msqid_ds_32 {
  struct ipc_perm_32 msg_perm;
  compat_uptr_t msg_first;
  compat_uptr_t msg_last;
  uint32_t msg_stime;
  uint32_t msg_rtime;
  uint32_t msg_ctime;
  uint32_t msg_lcbytes;
  uint32_t msg_lqbytes;
  uint16_t msg_cbytes;
  uint16_t msg_qnum;
  uint16_t msg_qbytes;
  uint16_t msg_lspid;
  uint16_t msg_lrpid;

  msqid_ds_32() = delete;
  operator struct msqid64_ds() const {
    struct msqid64_ds val {};
    // msg_first and msg_last are unused and untouched
    val.msg_perm = msg_perm;
    val.msg_stime = msg_stime;
    val.msg_rtime = msg_rtime;
    val.msg_ctime = msg_ctime;

    val.msg_cbytes = msg_cbytes;
    val.msg_qnum = msg_qnum;
    val.msg_qbytes = msg_qbytes;
    val.msg_lspid = msg_lspid;
    val.msg_lrpid = msg_lrpid;
    return val;
  }

  msqid_ds_32(const struct msqid64_ds& buf)
    : msg_perm {buf.msg_perm} {
    // msg_first and msg_last are unused and untouched
    msg_stime = buf.msg_stime;
    msg_rtime = buf.msg_rtime;
    msg_ctime = buf.msg_ctime;
    if (buf.msg_cbytes > std::numeric_limits<uint16_t>::max()) {
      msg_cbytes = std::numeric_limits<uint16_t>::max();
    } else {
      msg_cbytes = buf.msg_cbytes;
    }
    msg_lcbytes = buf.msg_cbytes;

    if (buf.msg_qnum > std::numeric_limits<uint16_t>::max()) {
      msg_qnum = std::numeric_limits<uint16_t>::max();
    } else {
      msg_qnum = buf.msg_qnum;
    }

    if (buf.msg_cbytes > std::numeric_limits<uint16_t>::max()) {
      msg_cbytes = std::numeric_limits<uint16_t>::max();
    } else {
      msg_cbytes = buf.msg_cbytes;
    }
    msg_lqbytes = buf.msg_qbytes;
    msg_lspid = buf.msg_lspid;
    msg_lrpid = buf.msg_lrpid;
    msg_first = msg_last = msg_qbytes = 0;
  }
};
static_assert(std::is_trivially_copyable_v<msqid_ds_32>);
static_assert(sizeof(msqid_ds_32) == 56);

struct msqid_ds_64 {
  struct ipc_perm_64 msg_perm;
  uint32_t msg_stime;
  uint32_t msg_stime_high;
  uint32_t msg_rtime;
  uint32_t msg_rtime_high;
  uint32_t msg_ctime;
  uint32_t msg_ctime_high;
  uint32_t msg_cbytes;
  uint32_t msg_qnum;
  uint32_t msg_qbytes;
  uint32_t msg_lspid;
  uint32_t msg_lrpid;
  uint32_t _pad[2];

  msqid_ds_64() = delete;
  operator struct msqid64_ds() const {
    struct msqid64_ds val {};
    val.msg_perm = msg_perm;
    val.msg_stime = msg_stime_high;
    val.msg_stime <<= 32;
    val.msg_stime |= msg_stime;

    val.msg_rtime = msg_rtime_high;
    val.msg_rtime <<= 32;
    val.msg_rtime |= msg_rtime;

    val.msg_ctime = msg_ctime_high;
    val.msg_ctime <<= 32;
    val.msg_ctime |= msg_ctime;

    val.msg_cbytes = msg_cbytes;
    val.msg_qnum = msg_qnum;
    val.msg_qbytes = msg_qbytes;
    val.msg_lspid = msg_lspid;
    val.msg_lrpid = msg_lrpid;
    return val;
  }

  msqid_ds_64(const struct msqid64_ds& buf)
    : msg_perm {buf.msg_perm} {
    msg_stime = buf.msg_stime;
    msg_stime_high = buf.msg_stime >> 32;
    msg_rtime = buf.msg_rtime;
    msg_rtime_high = buf.msg_rtime >> 32;
    msg_ctime = buf.msg_ctime;
    msg_ctime_high = buf.msg_ctime >> 32;
    msg_cbytes = buf.msg_cbytes;
    msg_qnum = buf.msg_qnum;
    msg_qbytes = buf.msg_qbytes;
    msg_lspid = buf.msg_lspid;
    msg_lrpid = buf.msg_lrpid;
  }
};

static_assert(std::is_trivially_copyable_v<msqid_ds_64>);
static_assert(sizeof(msqid_ds_64) == 88);

struct FEX_ANNOTATE("fex-match") shminfo_32 {
  uint32_t shmmax;
  uint32_t shmmin;
  uint32_t shmmni;
  uint32_t shmseg;
  uint32_t shmall;

  shminfo_32() = delete;

  operator struct shminfo() const {
    struct shminfo si {};
    si.shmmax = shmmax;
    si.shmmin = shmmin;
    si.shmmni = shmmni;
    si.shmseg = shmseg;
    si.shmall = shmall;
    return si;
  }

  shminfo_32(const struct shminfo& si) {
    shmmax = si.shmmax;
    shmmin = si.shmmin;
    shmmni = si.shmmni;
    shmseg = si.shmseg;
    shmall = si.shmall;
  }
};

static_assert(std::is_trivially_copyable_v<shminfo_32>);
static_assert(sizeof(shminfo_32) == 20);

struct FEX_ANNOTATE("alias-x86_32-shminfo64") FEX_ANNOTATE("fex-match") shminfo_64 {
  compat_ulong_t shmmax;
  compat_ulong_t shmmin;
  compat_ulong_t shmmni;
  compat_ulong_t shmseg;
  compat_ulong_t shmall;
  compat_ulong_t __unused1;
  compat_ulong_t __unused2;
  compat_ulong_t __unused3;
  compat_ulong_t __unused4;

  shminfo_64() = delete;

  operator struct shminfo() const {
    struct shminfo si {};
    si.shmmax = shmmax;
    si.shmmin = shmmin;
    si.shmmni = shmmni;
    si.shmseg = shmseg;
    si.shmall = shmall;
    return si;
  }

  shminfo_64(const struct shminfo& si) {
    shmmax = si.shmmax;
    shmmin = si.shmmin;
    shmmni = si.shmmni;
    shmseg = si.shmseg;
    shmall = si.shmall;
    __unused1 = __unused2 = __unused3 = __unused4 = 0;
  }
};

static_assert(std::is_trivially_copyable_v<shminfo_64>);
static_assert(sizeof(shminfo_64) == 36);

struct FEX_ANNOTATE("alias-x86_32-shm_info") FEX_ANNOTATE("fex-match") shm_info_32 {
  int used_ids;
  uint32_t shm_tot;
  uint32_t shm_rss;
  uint32_t shm_swp;
  uint32_t swap_attempts;
  uint32_t swap_successes;

  shm_info_32() = delete;

  shm_info_32(const struct shm_info& si) {
    used_ids = si.used_ids;
    shm_tot = si.shm_tot;
    shm_rss = si.shm_rss;
    shm_swp = si.shm_swp;
    swap_attempts = si.swap_attempts;
    swap_successes = si.swap_successes;
  }
};

static_assert(std::is_trivially_copyable_v<shm_info_32>);
static_assert(sizeof(shm_info_32) == 24);

struct FEX_ANNOTATE("fex-match") compat_select_args {
  int nfds;
  compat_ptr<fd_set32> readfds;
  compat_ptr<fd_set32> writefds;
  compat_ptr<fd_set32> exceptfds;
  compat_ptr<struct timeval32> timeout;

  compat_select_args() = delete;
};

static_assert(std::is_trivially_copyable_v<compat_select_args>);
static_assert(sizeof(compat_select_args) == 20);

} // namespace FEX::HLE::x32


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/EPoll.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: LinuxSyscalls|syscalls-x86-64 ~ x86-64 specific syscall implementations
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/Types.h"

#include <FEXCore/fextl/vector.h>

#include <algorithm>
#include <cstdint>
#include <stddef.h>
#include <sys/epoll.h>
#include <syscall.h>
#include <unistd.h>

struct timespec;
namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x64 {
void RegisterEpoll(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X64(
    epoll_wait, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevents, int timeout) -> uint64_t {
      fextl::vector<struct epoll_event> Events(std::max(0, maxevents));
      uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevents, timeout, nullptr, 8);

      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::epoll_event_x86) * Result);
        for (size_t i = 0; i < Result; ++i) {
          events[i] = Events[i];
        }
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X64(epoll_ctl, [](FEXCore::Core::CpuStateFrame* Frame, int epfd, int op, int fd, FEX::HLE::epoll_event_x86* event) -> uint64_t {
    struct epoll_event Event;
    struct epoll_event* EventPtr {};
    if (event) {
      FaultSafeUserMemAccess::VerifyIsReadable(event, sizeof(FEX::HLE::epoll_event_x86));
      Event = *event;
      EventPtr = &Event;
    }
    uint64_t Result = ::syscall(SYSCALL_DEF(epoll_ctl), epfd, op, fd, EventPtr);
    if (Result != -1 && event) {
      FaultSafeUserMemAccess::VerifyIsWritable(event, sizeof(FEX::HLE::epoll_event_x86));
      *event = Event;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(epoll_pwait,
                            [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevent, int timeout,
                               const uint64_t* sigmask, size_t sigsetsize) -> uint64_t {
                              fextl::vector<struct epoll_event> Events(std::max(0, maxevent));

                              uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize);

                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::epoll_event_x86) * Result);
                                for (size_t i = 0; i < Result; ++i) {
                                  events[i] = Events[i];
                                }
                              }

                              SYSCALL_ERRNO();
                            });

  REGISTER_SYSCALL_IMPL_X64(epoll_pwait2,
                            [](FEXCore::Core::CpuStateFrame* Frame, int epfd, FEX::HLE::epoll_event_x86* events, int maxevent,
                               timespec* timeout, const uint64_t* sigmask, size_t sigsetsize) -> uint64_t {
                              fextl::vector<struct epoll_event> Events(std::max(0, maxevent));

                              uint64_t Result = ::syscall(SYSCALL_DEF(epoll_pwait2), epfd, Events.data(), maxevent, timeout, sigmask, sigsetsize);

                              if (Result != -1) {
                                FaultSafeUserMemAccess::VerifyIsWritable(events, sizeof(FEX::HLE::epoll_event_x86) * Result);
                                for (size_t i = 0; i < Result; ++i) {
                                  events[i] = Events[i];
                                }
                              }

                              SYSCALL_ERRNO();
                            });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/FD.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/FileManagement.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/Types.h"

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Utils/MathUtils.h>

#include <fcntl.h>
#include <poll.h>
#include <stdint.h>
#include <sys/select.h>
#include <sys/stat.h>
#include <sys/statfs.h>
#include <sys/time.h>
#include <sys/uio.h>
#include <sys/sendfile.h>
#include <sys/timerfd.h>
#include <syscall.h>
#include <time.h>
#include <unistd.h>

namespace FEX::HLE::x64 {
void RegisterFD(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X64(
    select, [](FEXCore::Core::CpuStateFrame* Frame, int nfds, fd_set* readfds, fd_set* writefds, fd_set* exceptfds, struct timeval* timeout) -> uint64_t {
      ///< All FD arrays need to be writable
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(readfds, sizeof(uint64_t) * nfds);
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(writefds, sizeof(uint64_t) * nfds);
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(exceptfds, sizeof(uint64_t) * nfds);
      FaultSafeUserMemAccess::VerifyIsReadableOrNull(timeout, sizeof(*timeout));
      ///< timeout doesn't actually need to be writable, this is a quirk of glibc. Kernel just doesn't update timeout if not possible.
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(timeout, sizeof(*timeout));
      uint64_t Result = ::select(nfds, readfds, writefds, exceptfds, timeout);
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X64(fcntl, [](FEXCore::Core::CpuStateFrame* Frame, int fd, int cmd, uint64_t arg) -> uint64_t {
    uint64_t Result {};
    switch (cmd) {
    case F_GETFL:
      Result = ::fcntl(fd, cmd, arg);
      if (Result != -1) {
        Result = FEX::HLE::RemapToX86Flags(Result);
      }
      break;
    case F_SETFL: Result = ::fcntl(fd, cmd, FEX::HLE::RemapFromX86Flags(arg)); break;
    default: Result = ::fcntl(fd, cmd, arg); break;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(
    futimesat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, const struct timeval times[2]) -> uint64_t {
      return FEX::HLE::futimesat_compat<timeval>(dirfd, pathname, times);
    });

  REGISTER_SYSCALL_IMPL_X64(stat, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, FEX::HLE::x64::guest_stat* buf) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsStringReadableMaxSize(pathname, PATH_MAX);
    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Stat(pathname, &host_stat);
    if (Result != -1) {
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(fstat, [](FEXCore::Core::CpuStateFrame* Frame, int fd, FEX::HLE::x64::guest_stat* buf) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
    struct stat host_stat;
    uint64_t Result = ::fstat(fd, &host_stat);
    if (Result != -1) {
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(lstat, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, FEX::HLE::x64::guest_stat* buf) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsStringReadableMaxSize(path, PATH_MAX);
    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
    struct stat host_stat;
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Lstat(path, &host_stat);
    if (Result != -1) {
      *buf = host_stat;
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(
    newfstatat, [](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, FEX::HLE::x64::guest_stat* buf, int flag) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsStringReadableMaxSize(pathname, PATH_MAX);
      FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
      struct stat host_stat;
      uint64_t Result = FEX::HLE::_SyscallHandler->FM.NewFSStatAt(dirfd, pathname, &host_stat, flag);
      if (Result != -1) {
        *buf = host_stat;
      }
      SYSCALL_ERRNO();
    });

  REGISTER_SYSCALL_IMPL_X64(getdents, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t {
    return GetDentsEmulation<false>(fd, reinterpret_cast<FEX::HLE::x64::linux_dirent*>(dirp), count);
  });

  REGISTER_SYSCALL_IMPL_X64(getdents64, [](FEXCore::Core::CpuStateFrame* Frame, int fd, void* dirp, uint32_t count) -> uint64_t {
    uint64_t Result = ::syscall(SYSCALL_DEF(getdents64), static_cast<uint64_t>(fd), dirp, static_cast<uint64_t>(count));
    if (Result != -1) {
      // Check for and hide the RootFS FD
      for (size_t i = 0; i < Result;) {
        linux_dirent_64* Incoming = (linux_dirent_64*)(reinterpret_cast<uint64_t>(dirp) + i);
        if (FEX::HLE::_SyscallHandler->FM.IsProtectedFile(fd, Incoming->d_ino)) {
          Result -= Incoming->d_reclen;
          memmove(Incoming, (linux_dirent_64*)(reinterpret_cast<uint64_t>(Incoming) + Incoming->d_reclen), Result - i);
          continue;
        }
        i += Incoming->d_reclen;
      }
    }
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(dup2, [](FEXCore::Core::CpuStateFrame* Frame, int oldfd, int newfd) -> uint64_t {
    uint64_t Result = ::dup2(oldfd, newfd);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(statfs, [](FEXCore::Core::CpuStateFrame* Frame, const char* path, struct statfs* buf) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsStringReadableMaxSize(path, PATH_MAX);
    FaultSafeUserMemAccess::VerifyIsWritable(buf, sizeof(*buf));
    uint64_t Result = FEX::HLE::_SyscallHandler->FM.Statfs(path, buf);
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Info.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include <FEXCore/Core/Context.h>

#include <cstring>
#include <sys/utsname.h>
#include <sys/resource.h>
#include <sys/sysinfo.h>

namespace FEX::HLE::x64 {
void RegisterInfo(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  if (Handler->IsHostKernelVersionAtLeast(6, 6, 0)) {
    REGISTER_SYSCALL_IMPL_X64(map_shadow_stack, [](FEXCore::Core::CpuStateFrame* Frame, uint64_t addr, uint64_t size, uint32_t flags) -> uint64_t {
      // Claim that shadow stack isn't supported.
      return -EOPNOTSUPP;
    });
  } else {
    REGISTER_SYSCALL_IMPL_X64(map_shadow_stack, UnimplementedSyscallSafe);
  }
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/HelperDefines.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#define STRINGY2(x, y) x##y
#define STRINGY(x, y) STRINGY2(x, y)

#define STRINGY12(x) STRINGY11(x)
#define STRINGY11(x) #x
#define STRINGY1(x) STRINGY12(x)

#ifndef _BASIC_META
// Meta typedef variable in unnamed and matches upstream
// Use this for the super basic ioctl passthrough path
#define _BASIC_META(x)                   \
  __attribute__((annotate("fex-match"))) \
  __attribute__((annotate("ioctl-alias-x86_64-_" #x STRINGY1(__LINE__)))) typedef uint8_t STRINGY(_##x, __LINE__)[x];
#endif

#ifndef _BASIC_META_VAR
// This is similar to _BASIC_META except that it allows you to pass variadic arguments to the original ioctl definition
#define _BASIC_META_VAR(x, args...)      \
  __attribute__((annotate("fex-match"))) \
  __attribute__((annotate("ioctl-alias-x86_64-_" #x STRINGY1(__LINE__)))) typedef uint8_t STRINGY(_##x, __LINE__)[x(args)];
#endif

#ifndef _CUSTOM_META
// IOCTL doesn't match across architecture
// Generates a FEX_<name> version of the ioctl with custom ioctl definition
// eg: _CUSTOM_META(DRM_IOCTL_AMDGPU_GEM_METADATA, DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, FEX::HLE::x64::AMDGPU::fex_drm_amdgpu_gem_metadata));
// Allows you to effectively pass in the original ioctl definition with custom type replacing the upstream type
#define _CUSTOM_META(name, ioctl_num)                                                              \
  typedef uint8_t _meta_##name[name];                                                              \
  __attribute__((annotate("ioctl-alias-x86_64-_meta_" #name))) typedef uint8_t _##name[ioctl_num]; \
  constexpr static uint32_t FEX_##name = ioctl_num;
#endif

#ifndef _CUSTOM_META_MATCH
// IOCTL doesn't match across architecture
// Generates a FEX_<name> version of the ioctl with custom ioctl definition
// eg: _CUSTOM_META(DRM_IOCTL_AMDGPU_GEM_METADATA, DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDGPU_GEM_METADATA, FEX::HLE::x64::AMDGPU::fex_drm_amdgpu_gem_metadata));
// Allows you to effectively pass in the original ioctl definition with custom type replacing the upstream type
#define _CUSTOM_META_MATCH(name, ioctl_num)                                  \
  typedef uint8_t _meta_##name[ioctl_num];                                   \
  __attribute__((annotate("fex-match"))) typedef uint8_t _##name[ioctl_num]; \
  constexpr static uint32_t FEX_##name = ioctl_num;
#endif

#ifndef _CUSTOM_META_OFFSET
// Same as _CUSTOM_META but allows you to define multiple types from an offset
// Required to have an ioctl covering a range which some ioctls do
#define _CUSTOM_META_OFFSET(name, ioctl_num, offset)                                                        \
  typedef uint8_t _meta_##name[ioctl_num + offset];                                                         \
  __attribute__((annotate("ioctl-alias-x86_64-_meta_" #name))) typedef uint8_t _##name[ioctl_num + offset]; \
  constexpr static uint32_t FEX_##name = ioctl_num + offset;
#endif


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/amdgpu_drm.inl
================================================
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_CREATE)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_MMAP)
_BASIC_META(DRM_IOCTL_AMDGPU_CTX)
_BASIC_META(DRM_IOCTL_AMDGPU_BO_LIST)
_BASIC_META(DRM_IOCTL_AMDGPU_CS)
_BASIC_META(DRM_IOCTL_AMDGPU_INFO)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_METADATA)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_WAIT_IDLE)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_VA)
_BASIC_META(DRM_IOCTL_AMDGPU_WAIT_CS)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_OP)
_BASIC_META(DRM_IOCTL_AMDGPU_GEM_USERPTR)
_BASIC_META(DRM_IOCTL_AMDGPU_WAIT_FENCES)
_BASIC_META(DRM_IOCTL_AMDGPU_VM)
_BASIC_META(DRM_IOCTL_AMDGPU_FENCE_TO_HANDLE)
_BASIC_META(DRM_IOCTL_AMDGPU_SCHED)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/asound.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <sound/asound.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {

namespace asound {
#ifndef SNDRV_TIMER_IOCTL_TREAD_OLD
#define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int)
#endif

#ifndef SNDRV_TIMER_IOCTL_TREAD64
#define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int)
#endif

#include "LinuxSyscalls/x64/Ioctl/asound.inl"
} // namespace asound
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/asound.inl
================================================
_BASIC_META(SNDRV_HWDEP_IOCTL_PVERSION)
_BASIC_META(SNDRV_HWDEP_IOCTL_INFO)
_BASIC_META(SNDRV_HWDEP_IOCTL_DSP_STATUS)
//_BASIC_META(SNDRV_HWDEP_IOCTL_DSP_LOAD)

_BASIC_META(SNDRV_PCM_IOCTL_PVERSION)
_BASIC_META(SNDRV_PCM_IOCTL_INFO)
_BASIC_META(SNDRV_PCM_IOCTL_TSTAMP)
_BASIC_META(SNDRV_PCM_IOCTL_TTSTAMP)
_BASIC_META(SNDRV_PCM_IOCTL_USER_PVERSION)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_HW_REFINE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_HW_PARAMS)
_BASIC_META(SNDRV_PCM_IOCTL_HW_FREE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_SW_PARAMS)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_STATUS)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_DELAY)
_BASIC_META(SNDRV_PCM_IOCTL_HWSYNC)
// XXX: _BASIC_META(__SNDRV_PCM_IOCTL_SYNC_PTR)
// XXX: _BASIC_META(__SNDRV_PCM_IOCTL_SYNC_PTR64)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_SYNC_PTR)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_STATUS_EXT)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_CHANNEL_INFO)
_BASIC_META(SNDRV_PCM_IOCTL_PREPARE)
_BASIC_META(SNDRV_PCM_IOCTL_RESET)
_BASIC_META(SNDRV_PCM_IOCTL_START)
_BASIC_META(SNDRV_PCM_IOCTL_DROP)
_BASIC_META(SNDRV_PCM_IOCTL_DRAIN)
_BASIC_META(SNDRV_PCM_IOCTL_PAUSE)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_REWIND)
_BASIC_META(SNDRV_PCM_IOCTL_RESUME)
_BASIC_META(SNDRV_PCM_IOCTL_XRUN)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_FORWARD)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_WRITEI_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_READI_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_WRITEN_FRAMES)
// XXX: _BASIC_META(SNDRV_PCM_IOCTL_READN_FRAMES)
_BASIC_META(SNDRV_PCM_IOCTL_LINK)
_BASIC_META(SNDRV_PCM_IOCTL_UNLINK)

_BASIC_META(SNDRV_RAWMIDI_IOCTL_PVERSION)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_INFO)
// XXX: _BASIC_META(SNDRV_RAWMIDI_IOCTL_PARAMS)
// XXX: _BASIC_META(SNDRV_RAWMIDI_IOCTL_STATUS)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_DROP)
_BASIC_META(SNDRV_RAWMIDI_IOCTL_DRAIN)

_BASIC_META(SNDRV_TIMER_IOCTL_PVERSION)
_BASIC_META(SNDRV_TIMER_IOCTL_NEXT_DEVICE)
_BASIC_META(SNDRV_TIMER_IOCTL_TREAD_OLD)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GINFO)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GPARAMS)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_GSTATUS)
_BASIC_META(SNDRV_TIMER_IOCTL_SELECT)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_INFO)
_BASIC_META(SNDRV_TIMER_IOCTL_PARAMS)
// XXX: _BASIC_META(SNDRV_TIMER_IOCTL_STATUS)
_BASIC_META(SNDRV_TIMER_IOCTL_START)
_BASIC_META(SNDRV_TIMER_IOCTL_STOP)
_BASIC_META(SNDRV_TIMER_IOCTL_CONTINUE)
_BASIC_META(SNDRV_TIMER_IOCTL_PAUSE)
_BASIC_META(SNDRV_TIMER_IOCTL_TREAD64)

_BASIC_META(SNDRV_CTL_IOCTL_PVERSION)
_BASIC_META(SNDRV_CTL_IOCTL_CARD_INFO)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_LIST)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_INFO)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_READ)
// XXX: _BASIC_META(SNDRV_CTL_IOCTL_ELEM_WRITE)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_LOCK)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_UNLOCK)
_BASIC_META(SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_ADD)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_REPLACE)
_BASIC_META(SNDRV_CTL_IOCTL_ELEM_REMOVE)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_READ)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_WRITE)
_BASIC_META(SNDRV_CTL_IOCTL_TLV_COMMAND)
_BASIC_META(SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_HWDEP_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_INFO)
_BASIC_META(SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE)
_BASIC_META(SNDRV_CTL_IOCTL_POWER)
_BASIC_META(SNDRV_CTL_IOCTL_POWER_STATE)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/drm.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/CompilerDefs.h>

#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
extern "C" {
#include "fex-drm/drm.h"
#include "fex-drm/drm_mode.h"
#include "fex-drm/i915_drm.h"
#include "fex-drm/amdgpu_drm.h"
#include "fex-drm/lima_drm.h"
#include "fex-drm/panfrost_drm.h"
#include "fex-drm/msm_drm.h"
#include "fex-drm/nouveau_drm.h"
#include "fex-drm/vc4_drm.h"
#include "fex-drm/v3d_drm.h"
#include "fex-drm/virtgpu_drm.h"
}
#include <sys/ioctl.h>

#define CPYT(x) val.x = x
#define CPYF(x) x = val.x
namespace FEX::HLE::x64 {

#include "LinuxSyscalls/x64/Ioctl/drm.inl"
#include "LinuxSyscalls/x64/Ioctl/amdgpu_drm.inl"
#include "LinuxSyscalls/x64/Ioctl/msm_drm.inl"

} // namespace FEX::HLE::x64
#undef CPYT
#undef CPYF


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/drm.inl
================================================
_CUSTOM_META(DRM_IOCTL_VERSION, DRM_IOWR(0x00, struct drm_version))
_BASIC_META(DRM_IOCTL_GET_UNIQUE)
_BASIC_META(DRM_IOCTL_GET_MAGIC)
_BASIC_META(DRM_IOCTL_IRQ_BUSID)
_BASIC_META(DRM_IOCTL_GET_MAP)
_BASIC_META(DRM_IOCTL_GET_CLIENT)
_BASIC_META(DRM_IOCTL_GET_STATS)
_BASIC_META(DRM_IOCTL_SET_VERSION)
_BASIC_META(DRM_IOCTL_MODESET_CTL)
_BASIC_META(DRM_IOCTL_GEM_CLOSE)
_BASIC_META(DRM_IOCTL_GEM_FLINK)
_BASIC_META(DRM_IOCTL_GEM_OPEN)
_BASIC_META(DRM_IOCTL_GET_CAP)
_BASIC_META(DRM_IOCTL_SET_CLIENT_CAP)

_BASIC_META(DRM_IOCTL_SET_UNIQUE)
_BASIC_META(DRM_IOCTL_AUTH_MAGIC)
_BASIC_META(DRM_IOCTL_BLOCK)
_BASIC_META(DRM_IOCTL_UNBLOCK)
_BASIC_META(DRM_IOCTL_CONTROL)
_BASIC_META(DRM_IOCTL_ADD_MAP)
_BASIC_META(DRM_IOCTL_ADD_BUFS)
_BASIC_META(DRM_IOCTL_MARK_BUFS)
_BASIC_META(DRM_IOCTL_INFO_BUFS)
_BASIC_META(DRM_IOCTL_MAP_BUFS)
_BASIC_META(DRM_IOCTL_FREE_BUFS)

_BASIC_META(DRM_IOCTL_RM_MAP)

_BASIC_META(DRM_IOCTL_SET_SAREA_CTX)
_BASIC_META(DRM_IOCTL_GET_SAREA_CTX)

_BASIC_META(DRM_IOCTL_SET_MASTER)
_BASIC_META(DRM_IOCTL_DROP_MASTER)

_BASIC_META(DRM_IOCTL_ADD_CTX)
_BASIC_META(DRM_IOCTL_RM_CTX)
_BASIC_META(DRM_IOCTL_MOD_CTX)
_BASIC_META(DRM_IOCTL_GET_CTX)
_BASIC_META(DRM_IOCTL_SWITCH_CTX)
_BASIC_META(DRM_IOCTL_NEW_CTX)
_BASIC_META(DRM_IOCTL_RES_CTX)
_BASIC_META(DRM_IOCTL_ADD_DRAW)
_BASIC_META(DRM_IOCTL_RM_DRAW)
_BASIC_META(DRM_IOCTL_DMA)
_BASIC_META(DRM_IOCTL_LOCK)
_BASIC_META(DRM_IOCTL_UNLOCK)
_BASIC_META(DRM_IOCTL_FINISH)

_BASIC_META(DRM_IOCTL_PRIME_HANDLE_TO_FD)
_BASIC_META(DRM_IOCTL_PRIME_FD_TO_HANDLE)

_BASIC_META(DRM_IOCTL_AGP_ACQUIRE)
_BASIC_META(DRM_IOCTL_AGP_RELEASE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_ENABLE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_INFO)
// XXX: _BASIC_META(DRM_IOCTL_AGP_ALLOC)
// XXX: _BASIC_META(DRM_IOCTL_AGP_FREE)
// XXX: _BASIC_META(DRM_IOCTL_AGP_BIND)
// XXX: _BASIC_META(DRM_IOCTL_AGP_UNBIND)

_BASIC_META(DRM_IOCTL_SG_ALLOC)
_BASIC_META(DRM_IOCTL_SG_FREE)

_BASIC_META(DRM_IOCTL_WAIT_VBLANK)

_BASIC_META(DRM_IOCTL_CRTC_GET_SEQUENCE)
_BASIC_META(DRM_IOCTL_CRTC_QUEUE_SEQUENCE)

_BASIC_META(DRM_IOCTL_UPDATE_DRAW)

_BASIC_META(DRM_IOCTL_MODE_GETRESOURCES)
_BASIC_META(DRM_IOCTL_MODE_GETCRTC)
_BASIC_META(DRM_IOCTL_MODE_SETCRTC)
_BASIC_META(DRM_IOCTL_MODE_CURSOR)
_BASIC_META(DRM_IOCTL_MODE_GETGAMMA)
_BASIC_META(DRM_IOCTL_MODE_SETGAMMA)
_BASIC_META(DRM_IOCTL_MODE_GETENCODER)
_BASIC_META(DRM_IOCTL_MODE_GETCONNECTOR)
_BASIC_META(DRM_IOCTL_MODE_ATTACHMODE)
_BASIC_META(DRM_IOCTL_MODE_DETACHMODE)

_BASIC_META(DRM_IOCTL_MODE_GETPROPERTY)
_BASIC_META(DRM_IOCTL_MODE_SETPROPERTY)
_BASIC_META(DRM_IOCTL_MODE_GETPROPBLOB)
_BASIC_META(DRM_IOCTL_MODE_GETFB)
_BASIC_META(DRM_IOCTL_MODE_ADDFB)
_BASIC_META(DRM_IOCTL_MODE_RMFB)
_BASIC_META(DRM_IOCTL_MODE_PAGE_FLIP)
_BASIC_META(DRM_IOCTL_MODE_DIRTYFB)

_BASIC_META(DRM_IOCTL_MODE_CREATE_DUMB)
_BASIC_META(DRM_IOCTL_MODE_MAP_DUMB)
_BASIC_META(DRM_IOCTL_MODE_DESTROY_DUMB)
_BASIC_META(DRM_IOCTL_MODE_GETPLANERESOURCES)
_BASIC_META(DRM_IOCTL_MODE_GETPLANE)
_BASIC_META(DRM_IOCTL_MODE_SETPLANE)
_BASIC_META(DRM_IOCTL_MODE_ADDFB2)
_BASIC_META(DRM_IOCTL_MODE_OBJ_GETPROPERTIES)
_BASIC_META(DRM_IOCTL_MODE_OBJ_SETPROPERTY)
_BASIC_META(DRM_IOCTL_MODE_CURSOR2)
_BASIC_META(DRM_IOCTL_MODE_ATOMIC)
_BASIC_META(DRM_IOCTL_MODE_CREATEPROPBLOB)
_BASIC_META(DRM_IOCTL_MODE_DESTROYPROPBLOB)

_BASIC_META(DRM_IOCTL_SYNCOBJ_CREATE)
_BASIC_META(DRM_IOCTL_SYNCOBJ_DESTROY)
_BASIC_META(DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD)
_BASIC_META(DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE)
_BASIC_META(DRM_IOCTL_SYNCOBJ_WAIT)
_BASIC_META(DRM_IOCTL_SYNCOBJ_RESET)
_BASIC_META(DRM_IOCTL_SYNCOBJ_SIGNAL)

_BASIC_META(DRM_IOCTL_MODE_CREATE_LEASE)
_BASIC_META(DRM_IOCTL_MODE_LIST_LESSEES)
_BASIC_META(DRM_IOCTL_MODE_GET_LEASE)
_BASIC_META(DRM_IOCTL_MODE_REVOKE_LEASE)

_BASIC_META(DRM_IOCTL_SYNCOBJ_TIMELINE_WAIT)
_BASIC_META(DRM_IOCTL_SYNCOBJ_QUERY)
_BASIC_META(DRM_IOCTL_SYNCOBJ_TRANSFER)
_BASIC_META(DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL)

_BASIC_META(DRM_IOCTL_MODE_GETFB2)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/ext_fs.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/blktrace_api.h>
#include <linux/fs.h>
#include <linux/fiemap.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {

namespace ext_fs {
#include "LinuxSyscalls/x64/Ioctl/ext_fs.inl"
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/ext_fs.inl
================================================
_BASIC_META(BLKROSET)
_BASIC_META(BLKROGET)
_BASIC_META(BLKRRPART)
_BASIC_META(BLKGETSIZE)
_BASIC_META(BLKFLSBUF)
_BASIC_META(BLKRASET)
_BASIC_META(BLKRAGET)
_BASIC_META(BLKFRASET)
_BASIC_META(BLKFRAGET)
_BASIC_META(BLKSECTSET)
_BASIC_META(BLKSECTGET)
_BASIC_META(BLKSSZGET)

_BASIC_META(BLKBSZGET)
_BASIC_META(BLKBSZSET)
_BASIC_META(BLKGETSIZE64)
_BASIC_META(BLKTRACESETUP)
_BASIC_META(BLKTRACESTART)
_BASIC_META(BLKTRACESTOP)
_BASIC_META(BLKTRACETEARDOWN)
_BASIC_META(BLKDISCARD)
_BASIC_META(BLKIOMIN)
_BASIC_META(BLKIOOPT)
_BASIC_META(BLKALIGNOFF)
_BASIC_META(BLKPBSZGET)
_BASIC_META(BLKDISCARDZEROES)
_BASIC_META(BLKSECDISCARD)
_BASIC_META(BLKROTATIONAL)
_BASIC_META(BLKZEROOUT)

_BASIC_META(FIBMAP)
_BASIC_META(FIGETBSZ)
_BASIC_META(FIFREEZE)
_BASIC_META(FITHAW)
_BASIC_META(FITRIM)
_BASIC_META(FICLONE)
_BASIC_META(FICLONERANGE)
_BASIC_META(FIDEDUPERANGE)

_BASIC_META(FS_IOC_GETFLAGS)
_BASIC_META(FS_IOC_SETFLAGS)
_BASIC_META(FS_IOC_GETVERSION)
_BASIC_META(FS_IOC_SETVERSION)
_BASIC_META(FS_IOC_FIEMAP)
_BASIC_META(FS_IOC32_GETFLAGS)
_BASIC_META(FS_IOC32_SETFLAGS)
_BASIC_META(FS_IOC32_GETVERSION)
_BASIC_META(FS_IOC32_SETVERSION)
_BASIC_META(FS_IOC_FSGETXATTR)
_BASIC_META(FS_IOC_FSSETXATTR)
_BASIC_META(FS_IOC_GETFSLABEL)
_BASIC_META(FS_IOC_SETFSLABEL)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/f2fs.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {
namespace f2fs {
  // There is no userspace definitions for these
  // Must define everything ourselves
  constexpr uint32_t F2FS_IOCTL_MAGIC = 0xf5;
#define F2FS_IOC_START_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 1)
#define F2FS_IOC_COMMIT_ATOMIC_WRITE _IO(F2FS_IOCTL_MAGIC, 2)
#define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3)
#define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4)
#define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5)
#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, uint32_t)
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
//#define F2FS_IOC_DEFRAGMENT         _IOWR(F2FS_IOCTL_MAGIC, 8,    \
//                                          struct f2fs_defragment)
//#define F2FS_IOC_MOVE_RANGE         _IOWR(F2FS_IOCTL_MAGIC, 9,    \
//                                          struct f2fs_move_range)
//#define F2FS_IOC_FLUSH_DEVICE       _IOW(F2FS_IOCTL_MAGIC, 10,    \
//                                          struct f2fs_flush_device)
//#define F2FS_IOC_GARBAGE_COLLECT_RANGE    _IOW(F2FS_IOCTL_MAGIC, 11,    \
//                                          struct f2fs_gc_range)
#define F2FS_IOC_GET_FEATURES _IOR(F2FS_IOCTL_MAGIC, 12, uint32_t)
#define F2FS_IOC_SET_PIN_FILE _IOW(F2FS_IOCTL_MAGIC, 13, uint32_t)
#define F2FS_IOC_GET_PIN_FILE _IOR(F2FS_IOCTL_MAGIC, 14, uint32_t)
#define F2FS_IOC_PRECACHE_EXTENTS _IO(F2FS_IOCTL_MAGIC, 15)
#define F2FS_IOC_RESIZE_FS _IOW(F2FS_IOCTL_MAGIC, 16, uint64_t)
#define F2FS_IOC_GET_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 17, uint64_t)
#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 18, uint64_t)
#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS _IOR(F2FS_IOCTL_MAGIC, 19, uint64_t)
//#define F2FS_IOC_SEC_TRIM_FILE            _IOW(F2FS_IOCTL_MAGIC, 20,    \
//                                          struct f2fs_sectrim_range)
#include "LinuxSyscalls/x64/Ioctl/f2fs.inl"
} // namespace f2fs
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/f2fs.inl
================================================
_BASIC_META(F2FS_IOC_START_ATOMIC_WRITE)
_BASIC_META(F2FS_IOC_COMMIT_ATOMIC_WRITE)
_BASIC_META(F2FS_IOC_START_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_RELEASE_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_ABORT_VOLATILE_WRITE)
_BASIC_META(F2FS_IOC_GARBAGE_COLLECT)
_BASIC_META(F2FS_IOC_WRITE_CHECKPOINT)
//_CUSTOM_META(F2FS_IOC_DEFRAGMENT, XXX)
//_CUSTOM_META(F2FS_IOC_MOVE_RANGE, XXX)
//_CUSTOM_META(F2FS_IOC_FLUSH_DEVICE, XXX)
//_CUSTOM_META(F2FS_IOC_GARBAGE_COLLECT_RANGE, XXX)
_BASIC_META(F2FS_IOC_GET_FEATURES)
_BASIC_META(F2FS_IOC_SET_PIN_FILE)
_BASIC_META(F2FS_IOC_GET_PIN_FILE)
_BASIC_META(F2FS_IOC_PRECACHE_EXTENTS)
_BASIC_META(F2FS_IOC_RESIZE_FS)
_BASIC_META(F2FS_IOC_GET_COMPRESS_BLOCKS)
_BASIC_META(F2FS_IOC_RELEASE_COMPRESS_BLOCKS)
_BASIC_META(F2FS_IOC_RESERVE_COMPRESS_BLOCKS)
//_CUSTOM_META(F2FS_IOC_SEC_TRIM_FILE, XXX)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/input.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/input.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {
namespace input {
#include "LinuxSyscalls/x64/Ioctl/input.inl"
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/input.inl
================================================
_BASIC_META(EVIOCGVERSION)
_BASIC_META(EVIOCGID)
_BASIC_META(EVIOCGREP)
_BASIC_META(EVIOCSREP)
_BASIC_META(EVIOCGKEYCODE)
_BASIC_META(EVIOCGKEYCODE_V2)
_BASIC_META(EVIOCSKEYCODE)
_BASIC_META(EVIOCSKEYCODE_V2)
_BASIC_META_VAR(EVIOCGNAME, 0)
_BASIC_META_VAR(EVIOCGPHYS, 0)
_BASIC_META_VAR(EVIOCGUNIQ, 0)
_BASIC_META_VAR(EVIOCGPROP, 0)
_BASIC_META_VAR(EVIOCGMTSLOTS, 0)
_BASIC_META_VAR(EVIOCGKEY, 0)
_BASIC_META_VAR(EVIOCGLED, 0)
_BASIC_META_VAR(EVIOCGSND, 0)
_BASIC_META_VAR(EVIOCGSW, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x00, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x01, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x02, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x03, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x04, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x05, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x06, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x07, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x08, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x09, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0A, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0B, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0C, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0D, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0E, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x0F, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x10, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x11, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x12, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x13, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x14, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x15, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x16, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x17, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x18, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x19, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1A, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1B, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1C, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1D, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1E, 0)
_BASIC_META_VAR(EVIOCGBIT, 0x1F, 0)
_BASIC_META_VAR(EVIOCGABS, 0x00)
_BASIC_META_VAR(EVIOCGABS, 0x01)
_BASIC_META_VAR(EVIOCGABS, 0x02)
_BASIC_META_VAR(EVIOCGABS, 0x03)
_BASIC_META_VAR(EVIOCGABS, 0x04)
_BASIC_META_VAR(EVIOCGABS, 0x05)
_BASIC_META_VAR(EVIOCGABS, 0x06)
_BASIC_META_VAR(EVIOCGABS, 0x07)
_BASIC_META_VAR(EVIOCGABS, 0x08)
_BASIC_META_VAR(EVIOCGABS, 0x09)
_BASIC_META_VAR(EVIOCGABS, 0x0A)
_BASIC_META_VAR(EVIOCGABS, 0x0B)
_BASIC_META_VAR(EVIOCGABS, 0x0C)
_BASIC_META_VAR(EVIOCGABS, 0x0D)
_BASIC_META_VAR(EVIOCGABS, 0x0E)
_BASIC_META_VAR(EVIOCGABS, 0x0F)
_BASIC_META_VAR(EVIOCGABS, 0x10)
_BASIC_META_VAR(EVIOCGABS, 0x11)
_BASIC_META_VAR(EVIOCGABS, 0x12)
_BASIC_META_VAR(EVIOCGABS, 0x13)
_BASIC_META_VAR(EVIOCGABS, 0x14)
_BASIC_META_VAR(EVIOCGABS, 0x15)
_BASIC_META_VAR(EVIOCGABS, 0x16)
_BASIC_META_VAR(EVIOCGABS, 0x17)
_BASIC_META_VAR(EVIOCGABS, 0x18)
_BASIC_META_VAR(EVIOCGABS, 0x19)
_BASIC_META_VAR(EVIOCGABS, 0x1A)
_BASIC_META_VAR(EVIOCGABS, 0x1B)
_BASIC_META_VAR(EVIOCGABS, 0x1C)
_BASIC_META_VAR(EVIOCGABS, 0x1D)
_BASIC_META_VAR(EVIOCGABS, 0x1E)
_BASIC_META_VAR(EVIOCGABS, 0x1F)
_BASIC_META_VAR(EVIOCGABS, 0x20)
_BASIC_META_VAR(EVIOCGABS, 0x21)
_BASIC_META_VAR(EVIOCGABS, 0x22)
_BASIC_META_VAR(EVIOCGABS, 0x23)
_BASIC_META_VAR(EVIOCGABS, 0x24)
_BASIC_META_VAR(EVIOCGABS, 0x25)
_BASIC_META_VAR(EVIOCGABS, 0x26)
_BASIC_META_VAR(EVIOCGABS, 0x27)
_BASIC_META_VAR(EVIOCGABS, 0x28)
_BASIC_META_VAR(EVIOCGABS, 0x29)
_BASIC_META_VAR(EVIOCGABS, 0x2A)
_BASIC_META_VAR(EVIOCGABS, 0x2B)
_BASIC_META_VAR(EVIOCGABS, 0x2C)
_BASIC_META_VAR(EVIOCGABS, 0x2D)
_BASIC_META_VAR(EVIOCGABS, 0x2E)
_BASIC_META_VAR(EVIOCGABS, 0x2F)
_BASIC_META_VAR(EVIOCSABS, 0x00)
_BASIC_META_VAR(EVIOCSABS, 0x01)
_BASIC_META_VAR(EVIOCSABS, 0x02)
_BASIC_META_VAR(EVIOCSABS, 0x03)
_BASIC_META_VAR(EVIOCSABS, 0x04)
_BASIC_META_VAR(EVIOCSABS, 0x05)
_BASIC_META_VAR(EVIOCSABS, 0x06)
_BASIC_META_VAR(EVIOCSABS, 0x07)
_BASIC_META_VAR(EVIOCSABS, 0x08)
_BASIC_META_VAR(EVIOCSABS, 0x09)
_BASIC_META_VAR(EVIOCSABS, 0x0A)
_BASIC_META_VAR(EVIOCSABS, 0x0B)
_BASIC_META_VAR(EVIOCSABS, 0x0C)
_BASIC_META_VAR(EVIOCSABS, 0x0D)
_BASIC_META_VAR(EVIOCSABS, 0x0E)
_BASIC_META_VAR(EVIOCSABS, 0x0F)
_BASIC_META_VAR(EVIOCSABS, 0x10)
_BASIC_META_VAR(EVIOCSABS, 0x11)
_BASIC_META_VAR(EVIOCSABS, 0x12)
_BASIC_META_VAR(EVIOCSABS, 0x13)
_BASIC_META_VAR(EVIOCSABS, 0x14)
_BASIC_META_VAR(EVIOCSABS, 0x15)
_BASIC_META_VAR(EVIOCSABS, 0x16)
_BASIC_META_VAR(EVIOCSABS, 0x17)
_BASIC_META_VAR(EVIOCSABS, 0x18)
_BASIC_META_VAR(EVIOCSABS, 0x19)
_BASIC_META_VAR(EVIOCSABS, 0x1A)
_BASIC_META_VAR(EVIOCSABS, 0x1B)
_BASIC_META_VAR(EVIOCSABS, 0x1C)
_BASIC_META_VAR(EVIOCSABS, 0x1D)
_BASIC_META_VAR(EVIOCSABS, 0x1E)
_BASIC_META_VAR(EVIOCSABS, 0x1F)
_BASIC_META_VAR(EVIOCSABS, 0x20)
_BASIC_META_VAR(EVIOCSABS, 0x21)
_BASIC_META_VAR(EVIOCSABS, 0x22)
_BASIC_META_VAR(EVIOCSABS, 0x23)
_BASIC_META_VAR(EVIOCSABS, 0x24)
_BASIC_META_VAR(EVIOCSABS, 0x25)
_BASIC_META_VAR(EVIOCSABS, 0x26)
_BASIC_META_VAR(EVIOCSABS, 0x27)
_BASIC_META_VAR(EVIOCSABS, 0x28)
_BASIC_META_VAR(EVIOCSABS, 0x29)
_BASIC_META_VAR(EVIOCSABS, 0x2A)
_BASIC_META_VAR(EVIOCSABS, 0x2B)
_BASIC_META_VAR(EVIOCSABS, 0x2C)
_BASIC_META_VAR(EVIOCSABS, 0x2D)
_BASIC_META_VAR(EVIOCSABS, 0x2E)
_BASIC_META_VAR(EVIOCSABS, 0x2F)
// XXX: _BASIC_META(EVIOCSFF)
_BASIC_META(EVIOCRMFF)
_BASIC_META(EVIOCGEFFECTS)
_BASIC_META(EVIOCGRAB)
_BASIC_META(EVIOCREVOKE)
_BASIC_META(EVIOCGMASK)
_BASIC_META(EVIOCSMASK)
_BASIC_META(EVIOCSCLOCKID)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/joystick.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/joystick.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {

namespace joystick {
#include "LinuxSyscalls/x64/Ioctl/joystick.inl"
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/joystick.inl
================================================
_BASIC_META(JSIOCGVERSION)
_BASIC_META(JSIOCGAXES)
_BASIC_META(JSIOCGBUTTONS)
_BASIC_META_VAR(JSIOCGNAME, 0)
_BASIC_META(JSIOCSCORR)
_BASIC_META(JSIOCGCORR)
_BASIC_META(JSIOCSAXMAP)
_BASIC_META(JSIOCGAXMAP)
_BASIC_META(JSIOCSBTNMAP)
_BASIC_META(JSIOCGBTNMAP)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/msdos_fs.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/msdos_fs.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {

namespace msdos_fs {
#include "LinuxSyscalls/x64/Ioctl/msdos_fs.inl"
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/msdos_fs.inl
================================================
_BASIC_META(VFAT_IOCTL_READDIR_BOTH)
_BASIC_META(VFAT_IOCTL_READDIR_SHORT)
_BASIC_META(FAT_IOCTL_GET_ATTRIBUTES)
_BASIC_META(FAT_IOCTL_SET_ATTRIBUTES)
_BASIC_META(FAT_IOCTL_GET_VOLUME_ID)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/msm_drm.inl
================================================
_BASIC_META(DRM_IOCTL_MSM_GET_PARAM)
_BASIC_META(DRM_IOCTL_MSM_SET_PARAM)
_BASIC_META(DRM_IOCTL_MSM_GEM_NEW)
_BASIC_META(DRM_IOCTL_MSM_GEM_INFO)
_BASIC_META(DRM_IOCTL_MSM_GEM_CPU_PREP)
_BASIC_META(DRM_IOCTL_MSM_GEM_CPU_FINI)
_BASIC_META(DRM_IOCTL_MSM_GEM_SUBMIT)
_BASIC_META(DRM_IOCTL_MSM_WAIT_FENCE)
_BASIC_META(DRM_IOCTL_MSM_GEM_MADVISE)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_NEW)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_CLOSE)
_BASIC_META(DRM_IOCTL_MSM_SUBMITQUEUE_QUERY)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/sockios.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/if.h>
#include <linux/sockios.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {
namespace sockios {
#include "LinuxSyscalls/x64/Ioctl/sockios.inl"
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/sockios.inl
================================================
#ifndef SIOCGSTAMP_OLD
#define SIOCGSTAMP_OLD 0x8906
#endif
_BASIC_META(SIOCGSTAMP_OLD)
#ifndef SIOCGSTAMPNS_OLD
#define SIOCGSTAMPNS_OLD 0x8907
#endif
_BASIC_META(SIOCGSTAMPNS_OLD)
_BASIC_META(SIOCADDRT)
_BASIC_META(SIOCDELRT)
_BASIC_META(SIOCRTMSG)
_BASIC_META(SIOCGIFNAME)
_BASIC_META(SIOCSIFLINK)
_CUSTOM_META(SIOCGIFCONF,
  _IOC(
    _IOC_DIR(SIOCGIFCONF),
    _IOC_TYPE(SIOCGIFCONF),
    _IOC_NR(SIOCGIFCONF),
    sizeof(struct ifconf))) // This should hit failure
_BASIC_META(SIOCGIFFLAGS)
_BASIC_META(SIOCSIFFLAGS)
_BASIC_META(SIOCGIFADDR)
_BASIC_META(SIOCSIFADDR)
_BASIC_META(SIOCGIFDSTADDR)
_BASIC_META(SIOCSIFDSTADDR)
_BASIC_META(SIOCGIFBRDADDR)
_BASIC_META(SIOCSIFBRDADDR)
_BASIC_META(SIOCGIFNETMASK)
_BASIC_META(SIOCSIFNETMASK)
_BASIC_META(SIOCGIFMETRIC)
_BASIC_META(SIOCSIFMETRIC)
_BASIC_META(SIOCGIFMEM)
_BASIC_META(SIOCSIFMEM)
_BASIC_META(SIOCGIFMTU)
_BASIC_META(SIOCSIFMTU)
_BASIC_META(SIOCSIFNAME)
_BASIC_META(SIOCSIFHWADDR)
_BASIC_META(SIOCGIFENCAP)
_BASIC_META(SIOCSIFENCAP)
_BASIC_META(SIOCGIFHWADDR)
_BASIC_META(SIOCGIFSLAVE)
_BASIC_META(SIOCSIFSLAVE)
_BASIC_META(SIOCADDMULTI)
_BASIC_META(SIOCDELMULTI)
_BASIC_META(SIOCGIFINDEX)
_BASIC_META(SIOCSIFPFLAGS)
_BASIC_META(SIOCGIFPFLAGS)
_BASIC_META(SIOCDIFADDR)
_BASIC_META(SIOCSIFHWBROADCAST)
_BASIC_META(SIOCGIFCOUNT)
_BASIC_META(SIOCGIFBR)
_BASIC_META(SIOCSIFBR)
_BASIC_META(SIOCGIFTXQLEN)
_BASIC_META(SIOCSIFTXQLEN)
_BASIC_META(SIOCETHTOOL)
_BASIC_META(SIOCGMIIPHY)
_BASIC_META(SIOCGMIIREG)
_BASIC_META(SIOCSMIIREG)
_BASIC_META(SIOCWANDEV)
_BASIC_META(SIOCOUTQNSD)
_BASIC_META(SIOCGSKNS)
_BASIC_META(SIOCDARP)
_BASIC_META(SIOCGARP)
_BASIC_META(SIOCSARP)
_BASIC_META(SIOCDRARP)
_BASIC_META(SIOCGRARP)
_BASIC_META(SIOCSRARP)
_BASIC_META(SIOCGIFMAP)
_BASIC_META(SIOCSIFMAP)
_BASIC_META(SIOCADDDLCI)
_BASIC_META(SIOCDELDLCI)
_BASIC_META(SIOCGIFVLAN)
_BASIC_META(SIOCSIFVLAN)
_BASIC_META(SIOCBONDENSLAVE)
_BASIC_META(SIOCBONDRELEASE)
_BASIC_META(SIOCBONDSETHWADDR)
_BASIC_META(SIOCBONDSLAVEINFOQUERY)
_BASIC_META(SIOCBONDINFOQUERY)
_BASIC_META(SIOCBONDCHANGEACTIVE)
_BASIC_META(SIOCBRADDBR)
_BASIC_META(SIOCBRDELBR)
_BASIC_META(SIOCBRADDIF)
_BASIC_META(SIOCBRDELIF)
_BASIC_META(SIOCSHWTSTAMP)
_BASIC_META(SIOCGHWTSTAMP)

_CUSTOM_META_OFFSET(SIOCDEVPRIVATE,   SIOCDEVPRIVATE, 0x0)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_1, SIOCDEVPRIVATE, 0x1)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_2, SIOCDEVPRIVATE, 0x2)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_3, SIOCDEVPRIVATE, 0x3)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_4, SIOCDEVPRIVATE, 0x4)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_5, SIOCDEVPRIVATE, 0x5)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_6, SIOCDEVPRIVATE, 0x6)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_7, SIOCDEVPRIVATE, 0x7)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_8, SIOCDEVPRIVATE, 0x8)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_9, SIOCDEVPRIVATE, 0x9)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_A, SIOCDEVPRIVATE, 0xA)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_B, SIOCDEVPRIVATE, 0xB)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_C, SIOCDEVPRIVATE, 0xC)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_D, SIOCDEVPRIVATE, 0xD)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_E, SIOCDEVPRIVATE, 0xE)
_CUSTOM_META_OFFSET(SIOCDEVPRIVATE_F, SIOCDEVPRIVATE, 0xF)

_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE,   SIOCPROTOPRIVATE, 0x0)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_1, SIOCPROTOPRIVATE, 0x1)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_2, SIOCPROTOPRIVATE, 0x2)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_3, SIOCPROTOPRIVATE, 0x3)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_4, SIOCPROTOPRIVATE, 0x4)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_5, SIOCPROTOPRIVATE, 0x5)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_6, SIOCPROTOPRIVATE, 0x6)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_7, SIOCPROTOPRIVATE, 0x7)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_8, SIOCPROTOPRIVATE, 0x8)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_9, SIOCPROTOPRIVATE, 0x9)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_A, SIOCPROTOPRIVATE, 0xA)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_B, SIOCPROTOPRIVATE, 0xB)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_C, SIOCPROTOPRIVATE, 0xC)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_D, SIOCPROTOPRIVATE, 0xD)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_E, SIOCPROTOPRIVATE, 0xE)
_CUSTOM_META_OFFSET(SIOCPROTOPRIVATE_F, SIOCPROTOPRIVATE, 0xF)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/wireless.h
================================================
// SPDX-License-Identifier: MIT
#include "LinuxSyscalls/x64/Types.h"
#include "LinuxSyscalls/x64/Ioctl/HelperDefines.h"

#include <cstdint>
#include <linux/wireless.h>
#include <sys/ioctl.h>

namespace FEX::HLE::x64 {
namespace wireless {
#include "LinuxSyscalls/x64/Ioctl/wireless.inl"
}

} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Ioctl/wireless.inl
================================================
_BASIC_META(SIOCSIWCOMMIT)
_BASIC_META(SIOCGIWNAME)
_BASIC_META(SIOCSIWNWID)
_BASIC_META(SIOCGIWNWID)
_BASIC_META(SIOCSIWFREQ)
_BASIC_META(SIOCGIWFREQ)
_BASIC_META(SIOCSIWMODE)
_BASIC_META(SIOCGIWMODE)
_BASIC_META(SIOCSIWSENS)
_BASIC_META(SIOCGIWSENS)
_BASIC_META(SIOCSIWRANGE)
_BASIC_META(SIOCGIWRANGE)
_BASIC_META(SIOCSIWPRIV)
_BASIC_META(SIOCGIWPRIV)
_BASIC_META(SIOCSIWSTATS)
_BASIC_META(SIOCGIWSTATS)
_BASIC_META(SIOCSIWSPY)
_BASIC_META(SIOCGIWSPY)
_BASIC_META(SIOCSIWTHRSPY)
_BASIC_META(SIOCGIWTHRSPY)
_BASIC_META(SIOCSIWAP)
_BASIC_META(SIOCGIWAP)
_BASIC_META(SIOCGIWAPLIST)
_BASIC_META(SIOCSIWSCAN)
_BASIC_META(SIOCGIWSCAN)
_BASIC_META(SIOCSIWESSID)
_BASIC_META(SIOCGIWESSID)
_BASIC_META(SIOCSIWNICKN)
_BASIC_META(SIOCGIWNICKN)
_BASIC_META(SIOCSIWRATE)
_BASIC_META(SIOCGIWRATE)
_BASIC_META(SIOCSIWRTS)
_BASIC_META(SIOCGIWRTS)
_BASIC_META(SIOCSIWFRAG)
_BASIC_META(SIOCGIWFRAG)
_BASIC_META(SIOCSIWTXPOW)
_BASIC_META(SIOCGIWTXPOW)
_BASIC_META(SIOCSIWRETRY)
_BASIC_META(SIOCGIWRETRY)
_BASIC_META(SIOCSIWENCODE)
_BASIC_META(SIOCGIWENCODE)
_BASIC_META(SIOCSIWPOWER)
_BASIC_META(SIOCGIWPOWER)
_BASIC_META(SIOCSIWGENIE)
_BASIC_META(SIOCGIWGENIE)
_BASIC_META(SIOCSIWMLME)
_BASIC_META(SIOCSIWAUTH)
_BASIC_META(SIOCGIWAUTH)
_BASIC_META(SIOCSIWENCODEEXT)
_BASIC_META(SIOCGIWENCODEEXT)
_BASIC_META(SIOCSIWPMKSA)

_BASIC_META(SIOCIWFIRSTPRIV)
_BASIC_META(SIOCIWLASTPRIV)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Memory.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include <FEXCore/Core/Context.h>
#include <FEXCore/Debug/InternalThreadState.h>

#include <FEXCore/IR/IR.h>

#include <sys/mman.h>
#include <sys/shm.h>
#include <unistd.h>

#include <FEXCore/Core/Context.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/MathUtils.h>

namespace FEX::HLE::x64 {

void RegisterMemory(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;

  REGISTER_SYSCALL_IMPL_X64(
    mmap, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length, int prot, int flags, int fd, off_t offset) -> uint64_t {
      return (uint64_t)FEX::HLE::_SyscallHandler->GuestMmap(Frame->Thread, addr, length, prot, flags, fd, offset);
    });

  REGISTER_SYSCALL_IMPL_X64(munmap, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t length) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestMunmap(Frame->Thread, addr, length);
  });

  REGISTER_SYSCALL_IMPL_X64(
    mremap, [](FEXCore::Core::CpuStateFrame* Frame, void* old_address, size_t old_size, size_t new_size, int flags, void* new_address) -> uint64_t {
      return FEX::HLE::_SyscallHandler->GuestMremap(true, Frame->Thread, old_address, old_size, new_size, flags, new_address);
    });

  REGISTER_SYSCALL_IMPL_X64(mprotect, [](FEXCore::Core::CpuStateFrame* Frame, void* addr, size_t len, int prot) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestMprotect(Frame->Thread, addr, len, prot);
  });

  REGISTER_SYSCALL_IMPL_X64(shmat, ([](FEXCore::Core::CpuStateFrame* Frame, int shmid, const void* shmaddr, int shmflg) -> uint64_t {
                              return FEX::HLE::_SyscallHandler->GuestShmat(true, Frame->Thread, shmid, shmaddr, shmflg);
                            }));

  REGISTER_SYSCALL_IMPL_X64(shmdt, [](FEXCore::Core::CpuStateFrame* Frame, const void* shmaddr) -> uint64_t {
    return FEX::HLE::_SyscallHandler->GuestShmdt(true, Frame->Thread, shmaddr);
  });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/NotImplemented.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include <FEXCore/Utils/LogManager.h>
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <errno.h>
#include <stdint.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x64 {
#define REGISTER_SYSCALL_NOT_IMPL_X64(name)                                             \
  REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { \
    LogMan::Msg::DFmt("Using deprecated/removed syscall: " #name);                      \
    return -ENOSYS;                                                                     \
  });

#define REGISTER_SYSCALL_NOT_IMPL_SAFE_X64(name) \
  REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -ENOSYS; });

#define REGISTER_SYSCALL_NO_PERM_X64(name) \
  REGISTER_SYSCALL_IMPL_X64(name, [](FEXCore::Core::CpuStateFrame* Frame) -> uint64_t { return -EPERM; });

// these are removed/not implemented in the linux kernel we present
void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_NOT_IMPL_X64(tuxcall);
  REGISTER_SYSCALL_NOT_IMPL_X64(security);
  REGISTER_SYSCALL_NOT_IMPL_X64(set_thread_area);
  REGISTER_SYSCALL_NOT_IMPL_X64(get_thread_area);
  REGISTER_SYSCALL_NOT_IMPL_X64(epoll_ctl_old);
  REGISTER_SYSCALL_NOT_IMPL_X64(epoll_wait_old);
  REGISTER_SYSCALL_NO_PERM_X64(kexec_file_load);
  REGISTER_SYSCALL_NOT_IMPL_SAFE_X64(uretprobe);
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Semaphore.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/Types.h"

#include <FEXHeaderUtils/Syscalls.h>

#include <linux/sem.h>
#include <stddef.h>
#include <stdint.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

ARG_TO_STR(FEX::HLE::x64::semun, "%lx")

namespace FEX::HLE::x64 {
void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X64(semctl, [](FEXCore::Core::CpuStateFrame* Frame, int semid, int semnum, int cmd, FEX::HLE::x64::semun semun) -> uint64_t {
    uint64_t Result {};
    switch (cmd) {
    case IPC_SET: {
      struct semid64_ds buf {};
      FaultSafeUserMemAccess::VerifyIsReadable(semun.buf, sizeof(*semun.buf));
      buf = *semun.buf;
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(semun.buf, sizeof(*semun.buf));
        *semun.buf = buf;
      }
      break;
    }
    case SEM_STAT:
    case SEM_STAT_ANY:
    case IPC_STAT: {
      struct semid64_ds buf {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &buf);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(semun.buf, sizeof(*semun.buf));
        *semun.buf = buf;
      }
      break;
    }
    case SEM_INFO:
    case IPC_INFO: {
      struct fex_seminfo si {};
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, &si);
      if (Result != -1) {
        FaultSafeUserMemAccess::VerifyIsWritable(semun.__buf, sizeof(si));
        memcpy(semun.__buf, &si, sizeof(si));
      }
      break;
    }
    case GETALL:
    case SETALL: {
      // ptr is just a int32_t* in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.array);
      break;
    }
    case SETVAL: {
      // ptr is just a int32_t in this case
      Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun.val);
      break;
    }
    case IPC_RMID:
    case GETPID:
    case GETNCNT:
    case GETZCNT:
    case GETVAL: Result = ::syscall(SYSCALL_DEF(semctl), semid, semnum, cmd, semun); break;
    default: LOGMAN_MSG_A_FMT("Unhandled semctl cmd: {}", cmd); return -EINVAL;
    }
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Signals.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Syscalls/Thread.h"

#include "LinuxSyscalls/x64/Syscalls.h"

#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/SignalDelegator.h>

#include <signal.h>
#include <sys/syscall.h>
#include <unistd.h>

namespace FEX::HLE::x64 {
void RegisterSignals(FEX::HLE::SyscallHandler* Handler) {
  REGISTER_SYSCALL_IMPL_X64(
    rt_sigaction, [](FEXCore::Core::CpuStateFrame* Frame, int signum, const GuestSigAction* act, GuestSigAction* oldact, size_t sigsetsize) -> uint64_t {
      if (sigsetsize != 8) {
        return -EINVAL;
      }
      FaultSafeUserMemAccess::VerifyIsReadableOrNull(act, sizeof(GuestSigAction));
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(oldact, sizeof(GuestSigAction));

      return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSignalHandler(signum, act, oldact);
    });

  REGISTER_SYSCALL_IMPL_X64(
    rt_sigtimedwait,
    [](FEXCore::Core::CpuStateFrame* Frame, uint64_t* set, siginfo_t* info, const struct timespec* timeout, size_t sigsetsize) -> uint64_t {
      FaultSafeUserMemAccess::VerifyIsReadable(set, sizeof(sigsetsize));
      FaultSafeUserMemAccess::VerifyIsWritableOrNull(info, sizeof(siginfo_t));
      FaultSafeUserMemAccess::VerifyIsReadableOrNull(timeout, sizeof(timespec));
      return FEX::HLE::_SyscallHandler->GetSignalDelegator()->GuestSigTimedWait(set, info, timeout, sigsetsize);
    });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/SyscallsEnum.h"

#include <FEXCore/HLE/SyscallHandler.h>

namespace FEX::HLE::x64 {
void RegisterEpoll(FEX::HLE::SyscallHandler* Handler);
void RegisterFD(FEX::HLE::SyscallHandler* Handler);
void RegisterInfo(FEX::HLE::SyscallHandler* Handler);
void RegisterMemory(FEX::HLE::SyscallHandler* Handler);
void RegisterSemaphore(FEX::HLE::SyscallHandler* Handler);
void RegisterSignals(FEX::HLE::SyscallHandler* Handler);
void RegisterThread(FEX::HLE::SyscallHandler* Handler);
void RegisterTime(FEX::HLE::SyscallHandler* Handler);
void RegisterNotImplemented(FEX::HLE::SyscallHandler* Handler);
void RegisterPassthrough(FEX::HLE::SyscallHandler* Handler);

x64SyscallHandler::x64SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler)
  : SyscallHandler {ctx, _SignalDelegation, ThunkHandler} {
  OSABI = FEXCore::HLE::SyscallOSABI::OS_LINUX64;

  RegisterSyscallHandlers();
}

void x64SyscallHandler::RegisterSyscallHandlers() {
  FEX::HLE::RegisterEpoll(this);
  FEX::HLE::RegisterFD(this);
  FEX::HLE::RegisterFS(this);
  FEX::HLE::RegisterInfo(this);
  FEX::HLE::RegisterIO(this);
  FEX::HLE::RegisterMemory(this);
  FEX::HLE::RegisterSignals(this);
  FEX::HLE::RegisterThread(this);
  FEX::HLE::RegisterTimer(this);
  FEX::HLE::RegisterNotImplemented(this);
  FEX::HLE::RegisterStubs(this);

  // 64bit specific
  FEX::HLE::x64::RegisterEpoll(this);
  FEX::HLE::x64::RegisterFD(this);
  FEX::HLE::x64::RegisterInfo(this);
  FEX::HLE::x64::RegisterMemory(this);
  FEX::HLE::x64::RegisterSemaphore(this);
  FEX::HLE::x64::RegisterSignals(this);
  FEX::HLE::x64::RegisterThread(this);
  FEX::HLE::x64::RegisterTime(this);
  FEX::HLE::x64::RegisterNotImplemented(this);
  FEX::HLE::x64::RegisterPassthrough(this);

  // x86-64 has a gap of syscalls in the range of [335, 424) where there aren't any
  // These are defined that these must return -ENOSYS
  // This allows x86-64 to start using the common syscall numbers
  // Fill the gap to ensure that FEX doesn't assert
  constexpr int SYSCALL_GAP_BEGIN = 335;
  constexpr int SYSCALL_GAP_END = 424;

  const SyscallFunctionDefinition InvalidSyscall {
    .Ptr = reinterpret_cast<void*>(&UnimplementedSyscall),
    .NumArgs = 0,
#ifdef DEBUG_STRACE
    .StraceFmt = "Invalid",
#endif
  };
  std::fill(Definitions.begin() + SYSCALL_GAP_BEGIN, Definitions.begin() + SYSCALL_GAP_END, InvalidSyscall);

#if PRINT_MISSING_SYSCALLS
  for (auto& Syscall : SyscallNames) {
    if (Definitions[Syscall.first].Ptr == reinterpret_cast<void*>(&UnimplementedSyscall)) {
      LogMan::Msg::DFmt("Unimplemented syscall: {}", Syscall.second);
    }
  }
#endif
}

fextl::unique_ptr<FEX::HLE::SyscallHandler>
CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler) {
  return fextl::make_unique<x64SyscallHandler>(ctx, _SignalDelegation, ThunkHandler);
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Syscalls.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#pragma once

#include "LinuxSyscalls/FileManagement.h"
#include "LinuxSyscalls/Syscalls.h"

#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>

#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>

namespace FEX::HLE {
class SignalDelegator;
class SyscallHandler;
class ThunkHandler;
} // namespace FEX::HLE

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEX::HLE::x64 {
class x64SyscallHandler final : public FEX::HLE::SyscallHandler {
public:
  x64SyscallHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler);

  void* GuestMmap(FEXCore::Core::InternalThreadState* Thread, void* addr, size_t length, int prot, int flags, int fd, off_t offset) override {
    return FEX::HLE::SyscallHandler::GuestMmap(true, Thread, addr, length, prot, flags, fd, offset);
  }
  uint64_t GuestMunmap(FEXCore::Core::InternalThreadState* Thread, void* addr, uint64_t length) override {
    return FEX::HLE::SyscallHandler::GuestMunmap(true, Thread, addr, length);
  }


  void RegisterSyscall_64(int SyscallNumber,
#ifdef DEBUG_STRACE
                          const fextl::string& TraceFormatString,
#endif
                          void* SyscallHandler, int ArgumentCount) override {
    auto& Def = Definitions.at(SyscallNumber);
#if defined(ASSERTIONS_ENABLED) && ASSERTIONS_ENABLED
    LOGMAN_THROW_A_FMT(Def.Ptr == reinterpret_cast<void*>(&UnimplementedSyscall), "Oops overwriting sysall problem, {}", SyscallNumber);
#endif
    Def.Ptr = SyscallHandler;
    Def.NumArgs = ArgumentCount;
#ifdef DEBUG_STRACE
    Def.StraceFmt = TraceFormatString;
#endif
  }

private:
  void RegisterSyscallHandlers();
};

fextl::unique_ptr<FEX::HLE::SyscallHandler>
CreateHandler(FEXCore::Context::Context* ctx, FEX::HLE::SignalDelegator* _SignalDelegation, FEX::HLE::ThunkHandler* ThunkHandler);

//////
// REGISTER_SYSCALL_IMPL implementation
// Given a syscall name + a lambda, and it will generate an strace string, extract number of arguments
// and register it as a syscall handler
//////

// RegisterSyscall base
// Deduces return, args... from the function passed
// Does not work with lambas, because they are objects with operator (), not functions
template<typename R, typename... Args>
void RegisterSyscall(SyscallHandler* Handler, int SyscallNumber, const char* Name, R (*fn)(FEXCore::Core::CpuStateFrame* Frame, Args...)) {
#ifdef DEBUG_STRACE
  auto TraceFormatString = fextl::string(Name) + "(" + CollectArgsFmtString<Args...>() + ") = {}";
#endif
  Handler->RegisterSyscall_64(SyscallNumber,
#ifdef DEBUG_STRACE
                              TraceFormatString,
#endif
                              reinterpret_cast<void*>(fn), sizeof...(Args));
}

// Generic RegisterSyscall for lambdas
// Non-capturing lambdas can be cast to function pointers, but this does not happen on argument matching
// This is some glue logic that will cast a lambda and call the base RegisterSyscall implementation
template<class F>
void RegisterSyscall(SyscallHandler* _Handler, int num, const char* name, F f) {
  RegisterSyscall(_Handler, num, name, +f);
}

} // namespace FEX::HLE::x64

// Registers syscall for 64bit only
#define REGISTER_SYSCALL_IMPL_X64(name, lambda)                                        \
  do {                                                                                 \
    FEX::HLE::x64::RegisterSyscall(Handler, x64::SYSCALL_x64_##name, #name, (lambda)); \
  } while (false)


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/SyscallsEnum.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/
#pragma once

namespace FEX::HLE::x64 {
///< Enum containing all x86-64 linux syscalls for the guest kernel version
enum Syscalls_x64 {
  SYSCALL_x64_read = 0,
  SYSCALL_x64_write = 1,
  SYSCALL_x64_open = 2,
  SYSCALL_x64_close = 3,
  SYSCALL_x64_stat = 4,
  SYSCALL_x64_fstat = 5,
  SYSCALL_x64_lstat = 6,
  SYSCALL_x64_poll = 7,
  SYSCALL_x64_lseek = 8,
  SYSCALL_x64_mmap = 9,
  SYSCALL_x64_mprotect = 10,
  SYSCALL_x64_munmap = 11,
  SYSCALL_x64_brk = 12,
  SYSCALL_x64_rt_sigaction = 13,
  SYSCALL_x64_rt_sigprocmask = 14,
  SYSCALL_x64_rt_sigreturn = 15,
  SYSCALL_x64_ioctl = 16,
  SYSCALL_x64_pread_64 = 17,
  SYSCALL_x64_pwrite_64 = 18,
  SYSCALL_x64_readv = 19,
  SYSCALL_x64_writev = 20,
  SYSCALL_x64_access = 21,
  SYSCALL_x64_pipe = 22,
  SYSCALL_x64_select = 23,
  SYSCALL_x64_sched_yield = 24,
  SYSCALL_x64_mremap = 25,
  SYSCALL_x64_msync = 26,
  SYSCALL_x64_mincore = 27,
  SYSCALL_x64_madvise = 28,
  SYSCALL_x64_shmget = 29,
  SYSCALL_x64_shmat = 30,
  SYSCALL_x64_shmctl = 31,
  SYSCALL_x64_dup = 32,
  SYSCALL_x64_dup2 = 33,
  SYSCALL_x64_pause = 34,
  SYSCALL_x64_nanosleep = 35,
  SYSCALL_x64_getitimer = 36,
  SYSCALL_x64_alarm = 37,
  SYSCALL_x64_setitimer = 38,
  SYSCALL_x64_getpid = 39,
  SYSCALL_x64_sendfile = 40,
  SYSCALL_x64_socket = 41,
  SYSCALL_x64_connect = 42,
  SYSCALL_x64_accept = 43,
  SYSCALL_x64_sendto = 44,
  SYSCALL_x64_recvfrom = 45,
  SYSCALL_x64_sendmsg = 46,
  SYSCALL_x64_recvmsg = 47,
  SYSCALL_x64_shutdown = 48,
  SYSCALL_x64_bind = 49,
  SYSCALL_x64_listen = 50,
  SYSCALL_x64_getsockname = 51,
  SYSCALL_x64_getpeername = 52,
  SYSCALL_x64_socketpair = 53,
  SYSCALL_x64_setsockopt = 54,
  SYSCALL_x64_getsockopt = 55,
  SYSCALL_x64_clone = 56,
  SYSCALL_x64_fork = 57,
  SYSCALL_x64_vfork = 58,
  SYSCALL_x64_execve = 59,
  SYSCALL_x64_exit = 60,
  SYSCALL_x64_wait4 = 61,
  SYSCALL_x64_kill = 62,
  SYSCALL_x64_uname = 63,
  SYSCALL_x64_semget = 64,
  SYSCALL_x64_semop = 65,
  SYSCALL_x64_semctl = 66,
  SYSCALL_x64_shmdt = 67,
  SYSCALL_x64_msgget = 68,
  SYSCALL_x64_msgsnd = 69,
  SYSCALL_x64_msgrcv = 70,
  SYSCALL_x64_msgctl = 71,
  SYSCALL_x64_fcntl = 72,
  SYSCALL_x64_flock = 73,
  SYSCALL_x64_fsync = 74,
  SYSCALL_x64_fdatasync = 75,
  SYSCALL_x64_truncate = 76,
  SYSCALL_x64_ftruncate = 77,
  SYSCALL_x64_getdents = 78,
  SYSCALL_x64_getcwd = 79,
  SYSCALL_x64_chdir = 80,
  SYSCALL_x64_fchdir = 81,
  SYSCALL_x64_rename = 82,
  SYSCALL_x64_mkdir = 83,
  SYSCALL_x64_rmdir = 84,
  SYSCALL_x64_creat = 85,
  SYSCALL_x64_link = 86,
  SYSCALL_x64_unlink = 87,
  SYSCALL_x64_symlink = 88,
  SYSCALL_x64_readlink = 89,
  SYSCALL_x64_chmod = 90,
  SYSCALL_x64_fchmod = 91,
  SYSCALL_x64_chown = 92,
  SYSCALL_x64_fchown = 93,
  SYSCALL_x64_lchown = 94,
  SYSCALL_x64_umask = 95,
  SYSCALL_x64_gettimeofday = 96,
  SYSCALL_x64_getrlimit = 97,
  SYSCALL_x64_getrusage = 98,
  SYSCALL_x64_sysinfo = 99,
  SYSCALL_x64_times = 100,
  SYSCALL_x64_ptrace = 101,
  SYSCALL_x64_getuid = 102,
  SYSCALL_x64_syslog = 103,
  SYSCALL_x64_getgid = 104,
  SYSCALL_x64_setuid = 105,
  SYSCALL_x64_setgid = 106,
  SYSCALL_x64_geteuid = 107,
  SYSCALL_x64_getegid = 108,
  SYSCALL_x64_setpgid = 109,
  SYSCALL_x64_getppid = 110,
  SYSCALL_x64_getpgrp = 111,
  SYSCALL_x64_setsid = 112,
  SYSCALL_x64_setreuid = 113,
  SYSCALL_x64_setregid = 114,
  SYSCALL_x64_getgroups = 115,
  SYSCALL_x64_setgroups = 116,
  SYSCALL_x64_setresuid = 117,
  SYSCALL_x64_getresuid = 118,
  SYSCALL_x64_setresgid = 119,
  SYSCALL_x64_getresgid = 120,
  SYSCALL_x64_getpgid = 121,
  SYSCALL_x64_setfsuid = 122,
  SYSCALL_x64_setfsgid = 123,
  SYSCALL_x64_getsid = 124,
  SYSCALL_x64_capget = 125,
  SYSCALL_x64_capset = 126,
  SYSCALL_x64_rt_sigpending = 127,
  SYSCALL_x64_rt_sigtimedwait = 128,
  SYSCALL_x64_rt_sigqueueinfo = 129,
  SYSCALL_x64_rt_sigsuspend = 130,
  SYSCALL_x64_sigaltstack = 131,
  SYSCALL_x64_utime = 132,
  SYSCALL_x64_mknod = 133,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_uselib = 134,
  SYSCALL_x64_personality = 135,
  SYSCALL_x64_ustat = 136,
  SYSCALL_x64_statfs = 137,
  SYSCALL_x64_fstatfs = 138,
  SYSCALL_x64_sysfs = 139,
  SYSCALL_x64_getpriority = 140,
  SYSCALL_x64_setpriority = 141,
  SYSCALL_x64_sched_setparam = 142,
  SYSCALL_x64_sched_getparam = 143,
  SYSCALL_x64_sched_setscheduler = 144,
  SYSCALL_x64_sched_getscheduler = 145,
  SYSCALL_x64_sched_get_priority_max = 146,
  SYSCALL_x64_sched_get_priority_min = 147,
  SYSCALL_x64_sched_rr_get_interval = 148,
  SYSCALL_x64_mlock = 149,
  SYSCALL_x64_munlock = 150,
  SYSCALL_x64_mlockall = 151,
  SYSCALL_x64_munlockall = 152,
  SYSCALL_x64_vhangup = 153,
  SYSCALL_x64_modify_ldt = 154,
  SYSCALL_x64_pivot_root = 155,
  SYSCALL_x64__sysctl = 156,
  SYSCALL_x64_prctl = 157,
  SYSCALL_x64_arch_prctl = 158,
  SYSCALL_x64_adjtimex = 159,
  SYSCALL_x64_setrlimit = 160,
  SYSCALL_x64_chroot = 161,
  SYSCALL_x64_sync = 162,
  SYSCALL_x64_acct = 163,
  SYSCALL_x64_settimeofday = 164,
  SYSCALL_x64_mount = 165,
  SYSCALL_x64_umount2 = 166,
  SYSCALL_x64_swapon = 167,
  SYSCALL_x64_swapoff = 168,
  SYSCALL_x64_reboot = 169,
  SYSCALL_x64_sethostname = 170,
  SYSCALL_x64_setdomainname = 171,
  SYSCALL_x64_iopl = 172,
  SYSCALL_x64_ioperm = 173,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_create_module = 174,
  SYSCALL_x64_init_module = 175,
  SYSCALL_x64_delete_module = 176,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_get_kernel_syms = 177,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_query_module = 178,
  SYSCALL_x64_quotactl = 179,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_nfsservctl = 180,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_getpmsg = 181,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_putpmsg = 182,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_afs_syscall = 183,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_tuxcall = 184,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_security = 185,
  SYSCALL_x64_gettid = 186,
  SYSCALL_x64_readahead = 187,
  SYSCALL_x64_setxattr = 188,
  SYSCALL_x64_lsetxattr = 189,
  SYSCALL_x64_fsetxattr = 190,
  SYSCALL_x64_getxattr = 191,
  SYSCALL_x64_lgetxattr = 192,
  SYSCALL_x64_fgetxattr = 193,
  SYSCALL_x64_listxattr = 194,
  SYSCALL_x64_llistxattr = 195,
  SYSCALL_x64_flistxattr = 196,
  SYSCALL_x64_removexattr = 197,
  SYSCALL_x64_lremovexattr = 198,
  SYSCALL_x64_fremovexattr = 199,
  SYSCALL_x64_tkill = 200,
  SYSCALL_x64_time = 201,
  SYSCALL_x64_futex = 202,
  SYSCALL_x64_sched_setaffinity = 203,
  SYSCALL_x64_sched_getaffinity = 204,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_set_thread_area = 205,
  SYSCALL_x64_io_setup = 206,
  SYSCALL_x64_io_destroy = 207,
  SYSCALL_x64_io_getevents = 208,
  SYSCALL_x64_io_submit = 209,
  SYSCALL_x64_io_cancel = 210,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_get_thread_area = 211,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_lookup_dcookie = 212,
  SYSCALL_x64_epoll_create = 213,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_epoll_ctl_old = 214,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_epoll_wait_old = 215,
  SYSCALL_x64_remap_file_pages = 216,
  SYSCALL_x64_getdents64 = 217,
  SYSCALL_x64_set_tid_address = 218,
  SYSCALL_x64_restart_syscall = 219,
  SYSCALL_x64_semtimedop = 220,
  SYSCALL_x64_fadvise64 = 221,
  SYSCALL_x64_timer_create = 222,
  SYSCALL_x64_timer_settime = 223,
  SYSCALL_x64_timer_gettime = 224,
  SYSCALL_x64_timer_getoverrun = 225,
  SYSCALL_x64_timer_delete = 226,
  SYSCALL_x64_clock_settime = 227,
  SYSCALL_x64_clock_gettime = 228,
  SYSCALL_x64_clock_getres = 229,
  SYSCALL_x64_clock_nanosleep = 230,
  SYSCALL_x64_exit_group = 231,
  SYSCALL_x64_epoll_wait = 232,
  SYSCALL_x64_epoll_ctl = 233,
  SYSCALL_x64_tgkill = 234,
  SYSCALL_x64_utimes = 235,
  // No entrypoint. -ENOSYS
  SYSCALL_x64_vserver = 236,
  SYSCALL_x64_mbind = 237,
  SYSCALL_x64_set_mempolicy = 238,
  SYSCALL_x64_get_mempolicy = 239,
  SYSCALL_x64_mq_open = 240,
  SYSCALL_x64_mq_unlink = 241,
  SYSCALL_x64_mq_timedsend = 242,
  SYSCALL_x64_mq_timedreceive = 243,
  SYSCALL_x64_mq_notify = 244,
  SYSCALL_x64_mq_getsetattr = 245,
  SYSCALL_x64_kexec_load = 246,
  SYSCALL_x64_waitid = 247,
  SYSCALL_x64_add_key = 248,
  SYSCALL_x64_request_key = 249,
  SYSCALL_x64_keyctl = 250,
  SYSCALL_x64_ioprio_set = 251,
  SYSCALL_x64_ioprio_get = 252,
  SYSCALL_x64_inotify_init = 253,
  SYSCALL_x64_inotify_add_watch = 254,
  SYSCALL_x64_inotify_rm_watch = 255,
  SYSCALL_x64_migrate_pages = 256,
  SYSCALL_x64_openat = 257,
  SYSCALL_x64_mkdirat = 258,
  SYSCALL_x64_mknodat = 259,
  SYSCALL_x64_fchownat = 260,
  SYSCALL_x64_futimesat = 261,
  SYSCALL_x64_newfstatat = 262,
  SYSCALL_x64_unlinkat = 263,
  SYSCALL_x64_renameat = 264,
  SYSCALL_x64_linkat = 265,
  SYSCALL_x64_symlinkat = 266,
  SYSCALL_x64_readlinkat = 267,
  SYSCALL_x64_fchmodat = 268,
  SYSCALL_x64_faccessat = 269,
  SYSCALL_x64_pselect6 = 270,
  SYSCALL_x64_ppoll = 271,
  SYSCALL_x64_unshare = 272,
  SYSCALL_x64_set_robust_list = 273,
  SYSCALL_x64_get_robust_list = 274,
  SYSCALL_x64_splice = 275,
  SYSCALL_x64_tee = 276,
  SYSCALL_x64_sync_file_range = 277,
  SYSCALL_x64_vmsplice = 278,
  SYSCALL_x64_move_pages = 279,
  SYSCALL_x64_utimensat = 280,
  SYSCALL_x64_epoll_pwait = 281,
  SYSCALL_x64_signalfd = 282,
  SYSCALL_x64_timerfd_create = 283,
  SYSCALL_x64_eventfd = 284,
  SYSCALL_x64_fallocate = 285,
  SYSCALL_x64_timerfd_settime = 286,
  SYSCALL_x64_timerfd_gettime = 287,
  SYSCALL_x64_accept4 = 288,
  SYSCALL_x64_signalfd4 = 289,
  SYSCALL_x64_eventfd2 = 290,
  SYSCALL_x64_epoll_create1 = 291,
  SYSCALL_x64_dup3 = 292,
  SYSCALL_x64_pipe2 = 293,
  SYSCALL_x64_inotify_init1 = 294,
  SYSCALL_x64_preadv = 295,
  SYSCALL_x64_pwritev = 296,
  SYSCALL_x64_rt_tgsigqueueinfo = 297,
  SYSCALL_x64_perf_event_open = 298,
  SYSCALL_x64_recvmmsg = 299,
  SYSCALL_x64_fanotify_init = 300,
  SYSCALL_x64_fanotify_mark = 301,
  SYSCALL_x64_prlimit_64 = 302,
  SYSCALL_x64_name_to_handle_at = 303,
  SYSCALL_x64_open_by_handle_at = 304,
  SYSCALL_x64_clock_adjtime = 305,
  SYSCALL_x64_syncfs = 306,
  SYSCALL_x64_sendmmsg = 307,
  SYSCALL_x64_setns = 308,
  SYSCALL_x64_getcpu = 309,
  SYSCALL_x64_process_vm_readv = 310,
  SYSCALL_x64_process_vm_writev = 311,
  SYSCALL_x64_kcmp = 312,
  SYSCALL_x64_finit_module = 313,
  SYSCALL_x64_sched_setattr = 314,
  SYSCALL_x64_sched_getattr = 315,
  SYSCALL_x64_renameat2 = 316,
  SYSCALL_x64_seccomp = 317,
  SYSCALL_x64_getrandom = 318,
  SYSCALL_x64_memfd_create = 319,
  SYSCALL_x64_kexec_file_load = 320,
  SYSCALL_x64_bpf = 321,
  SYSCALL_x64_execveat = 322,
  SYSCALL_x64_userfaultfd = 323,
  SYSCALL_x64_membarrier = 324,
  SYSCALL_x64_mlock2 = 325,
  SYSCALL_x64_copy_file_range = 326,
  SYSCALL_x64_preadv2 = 327,
  SYSCALL_x64_pwritev2 = 328,
  SYSCALL_x64_pkey_mprotect = 329,
  SYSCALL_x64_pkey_alloc = 330,
  SYSCALL_x64_pkey_free = 331,
  SYSCALL_x64_statx = 332,
  SYSCALL_x64_io_pgetevents = 333,
  SYSCALL_x64_rseq = 334,
  SYSCALL_x64_uretprobe = 335,
  SYSCALL_x64_pidfd_send_signal = 424,
  SYSCALL_x64_io_uring_setup = 425,
  SYSCALL_x64_io_uring_enter = 426,
  SYSCALL_x64_io_uring_register = 427,
  SYSCALL_x64_open_tree = 428,
  SYSCALL_x64_move_mount = 429,
  SYSCALL_x64_fsopen = 430,
  SYSCALL_x64_fsconfig = 431,
  SYSCALL_x64_fsmount = 432,
  SYSCALL_x64_fspick = 433,
  SYSCALL_x64_pidfd_open = 434,
  SYSCALL_x64_clone3 = 435,
  SYSCALL_x64_close_range = 436,
  SYSCALL_x64_openat2 = 437,
  SYSCALL_x64_pidfd_getfd = 438,
  SYSCALL_x64_faccessat2 = 439,
  SYSCALL_x64_process_madvise = 440,
  SYSCALL_x64_epoll_pwait2 = 441,
  SYSCALL_x64_mount_setattr = 442,
  SYSCALL_x64_quotactl_fd = 443,
  SYSCALL_x64_landlock_create_ruleset = 444,
  SYSCALL_x64_landlock_add_rule = 445,
  SYSCALL_x64_landlock_restrict_self = 446,
  SYSCALL_x64_memfd_secret = 447,
  SYSCALL_x64_process_mrelease = 448,
  SYSCALL_x64_futex_waitv = 449,
  SYSCALL_x64_set_mempolicy_home_node = 450,
  SYSCALL_x64_cachestat = 451,
  SYSCALL_x64_fchmodat2 = 452,
  SYSCALL_x64_map_shadow_stack = 453,
  SYSCALL_x64_futex_wake = 454,
  SYSCALL_x64_futex_wait = 455,
  SYSCALL_x64_futex_requeue = 456,
  SYSCALL_x64_statmount = 457,
  SYSCALL_x64_listmount = 458,
  SYSCALL_x64_lsm_get_self_attr = 459,
  SYSCALL_x64_lsm_set_self_attr = 460,
  SYSCALL_x64_lsm_list_modules = 461,
  SYSCALL_x64_mseal = 462,
  SYSCALL_x64_setxattrat = 463,
  SYSCALL_x64_getxattrat = 464,
  SYSCALL_x64_listxattrat = 465,
  SYSCALL_x64_removexattrat = 466,
  SYSCALL_x64_MAX = 512,

  // Unsupported syscalls on this host
  SYSCALL_x64_waitpid = ~0,
  SYSCALL_x64_break = ~0,
  SYSCALL_x64_oldstat = ~0,
  SYSCALL_x64_umount = ~0,
  SYSCALL_x64_stime = ~0,
  SYSCALL_x64_oldfstat = ~0,
  SYSCALL_x64_stty = ~0,
  SYSCALL_x64_gtty = ~0,
  SYSCALL_x64_nice = ~0,
  SYSCALL_x64_ftime = ~0,
  SYSCALL_x64_prof = ~0,
  SYSCALL_x64_signal = ~0,
  SYSCALL_x64_lock = ~0,
  SYSCALL_x64_mpx = ~0,
  SYSCALL_x64_ulimit = ~0,
  SYSCALL_x64_oldolduname = ~0,
  SYSCALL_x64_sigaction = ~0,
  SYSCALL_x64_sgetmask = ~0,
  SYSCALL_x64_ssetmask = ~0,
  SYSCALL_x64_sigsuspend = ~0,
  SYSCALL_x64_sigpending = ~0,
  SYSCALL_x64_oldlstat = ~0,
  SYSCALL_x64_readdir = ~0,
  SYSCALL_x64_profil = ~0,
  SYSCALL_x64_socketcall = ~0,
  SYSCALL_x64_olduname = ~0,
  SYSCALL_x64_idle = ~0,
  SYSCALL_x64_vm86old = ~0,
  SYSCALL_x64_ipc = ~0,
  SYSCALL_x64_sigreturn = ~0,
  SYSCALL_x64_sigprocmask = ~0,
  SYSCALL_x64_bdflush = ~0,
  SYSCALL_x64__llseek = ~0,
  SYSCALL_x64__newselect = ~0,
  SYSCALL_x64_vm86 = ~0,
  SYSCALL_x64_ugetrlimit = ~0,
  SYSCALL_x64_mmap2 = ~0,
  SYSCALL_x64_truncate64 = ~0,
  SYSCALL_x64_ftruncate64 = ~0,
  SYSCALL_x64_stat64 = ~0,
  SYSCALL_x64_lstat64 = ~0,
  SYSCALL_x64_fstat64 = ~0,
  SYSCALL_x64_lchown32 = ~0,
  SYSCALL_x64_getuid32 = ~0,
  SYSCALL_x64_getgid32 = ~0,
  SYSCALL_x64_geteuid32 = ~0,
  SYSCALL_x64_getegid32 = ~0,
  SYSCALL_x64_setreuid32 = ~0,
  SYSCALL_x64_setregid32 = ~0,
  SYSCALL_x64_getgroups32 = ~0,
  SYSCALL_x64_setgroups32 = ~0,
  SYSCALL_x64_fchown32 = ~0,
  SYSCALL_x64_setresuid32 = ~0,
  SYSCALL_x64_getresuid32 = ~0,
  SYSCALL_x64_setresgid32 = ~0,
  SYSCALL_x64_getresgid32 = ~0,
  SYSCALL_x64_chown32 = ~0,
  SYSCALL_x64_setuid32 = ~0,
  SYSCALL_x64_setgid32 = ~0,
  SYSCALL_x64_setfsuid32 = ~0,
  SYSCALL_x64_setfsgid32 = ~0,
  SYSCALL_x64_fcntl64 = ~0,
  SYSCALL_x64_sendfile64 = ~0,
  SYSCALL_x64_statfs64 = ~0,
  SYSCALL_x64_fstatfs64 = ~0,
  SYSCALL_x64_fadvise64_64 = ~0,
  SYSCALL_x64_fstatat_64 = ~0,
  SYSCALL_x64_clock_gettime64 = ~0,
  SYSCALL_x64_clock_settime64 = ~0,
  SYSCALL_x64_clock_adjtime64 = ~0,
  SYSCALL_x64_clock_getres_time64 = ~0,
  SYSCALL_x64_clock_nanosleep_time64 = ~0,
  SYSCALL_x64_timer_gettime64 = ~0,
  SYSCALL_x64_timer_settime64 = ~0,
  SYSCALL_x64_timerfd_gettime64 = ~0,
  SYSCALL_x64_timerfd_settime64 = ~0,
  SYSCALL_x64_utimensat_time64 = ~0,
  SYSCALL_x64_pselect6_time64 = ~0,
  SYSCALL_x64_ppoll_time64 = ~0,
  SYSCALL_x64_io_pgetevents_time64 = ~0,
  SYSCALL_x64_recvmmsg_time64 = ~0,
  SYSCALL_x64_mq_timedsend_time64 = ~0,
  SYSCALL_x64_mq_timedreceive_time64 = ~0,
  SYSCALL_x64_semtimedop_time64 = ~0,
  SYSCALL_x64_rt_sigtimedwait_time64 = ~0,
  SYSCALL_x64_futex_time64 = ~0,
  SYSCALL_x64_sched_rr_get_interval_time64 = ~0,
};
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/SyscallsNames.inl
================================================
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

{ 0, "read"},
{ 1, "write"},
{ 2, "open"},
{ 3, "close"},
{ 4, "stat"},
{ 5, "fstat"},
{ 6, "lstat"},
{ 7, "poll"},
{ 8, "lseek"},
{ 9, "mmap"},
{ 10, "mprotect"},
{ 11, "munmap"},
{ 12, "brk"},
{ 13, "rt_sigaction"},
{ 14, "rt_sigprocmask"},
{ 15, "rt_sigreturn"},
{ 16, "ioctl"},
{ 17, "pread64"},
{ 18, "pwrite64"},
{ 19, "readv"},
{ 20, "writev"},
{ 21, "access"},
{ 22, "pipe"},
{ 23, "select"},
{ 24, "sched_yield"},
{ 25, "mremap"},
{ 26, "msync"},
{ 27, "mincore"},
{ 28, "madvise"},
{ 29, "shmget"},
{ 30, "shmat"},
{ 31, "shmctl"},
{ 32, "dup"},
{ 33, "dup2"},
{ 34, "pause"},
{ 35, "nanosleep"},
{ 36, "getitimer"},
{ 37, "alarm"},
{ 38, "setitimer"},
{ 39, "getpid"},
{ 40, "sendfile"},
{ 41, "socket"},
{ 42, "connect"},
{ 43, "accept"},
{ 44, "sendto"},
{ 45, "recvfrom"},
{ 46, "sendmsg"},
{ 47, "recvmsg"},
{ 48, "shutdown"},
{ 49, "bind"},
{ 50, "listen"},
{ 51, "getsockname"},
{ 52, "getpeername"},
{ 53, "socketpair"},
{ 54, "setsockopt"},
{ 55, "getsockopt"},
{ 56, "clone"},
{ 57, "fork"},
{ 58, "vfork"},
{ 59, "execve"},
{ 60, "exit"},
{ 61, "wait4"},
{ 62, "kill"},
{ 63, "uname"},
{ 64, "semget"},
{ 65, "semop"},
{ 66, "semctl"},
{ 67, "shmdt"},
{ 68, "msgget"},
{ 69, "msgsnd"},
{ 70, "msgrcv"},
{ 71, "msgctl"},
{ 72, "fcntl"},
{ 73, "flock"},
{ 74, "fsync"},
{ 75, "fdatasync"},
{ 76, "truncate"},
{ 77, "ftruncate"},
{ 78, "getdents"},
{ 79, "getcwd"},
{ 80, "chdir"},
{ 81, "fchdir"},
{ 82, "rename"},
{ 83, "mkdir"},
{ 84, "rmdir"},
{ 85, "creat"},
{ 86, "link"},
{ 87, "unlink"},
{ 88, "symlink"},
{ 89, "readlink"},
{ 90, "chmod"},
{ 91, "fchmod"},
{ 92, "chown"},
{ 93, "fchown"},
{ 94, "lchown"},
{ 95, "umask"},
{ 96, "gettimeofday"},
{ 97, "getrlimit"},
{ 98, "getrusage"},
{ 99, "sysinfo"},
{ 100, "times"},
{ 101, "ptrace"},
{ 102, "getuid"},
{ 103, "syslog"},
{ 104, "getgid"},
{ 105, "setuid"},
{ 106, "setgid"},
{ 107, "geteuid"},
{ 108, "getegid"},
{ 109, "setpgid"},
{ 110, "getppid"},
{ 111, "getpgrp"},
{ 112, "setsid"},
{ 113, "setreuid"},
{ 114, "setregid"},
{ 115, "getgroups"},
{ 116, "setgroups"},
{ 117, "setresuid"},
{ 118, "getresuid"},
{ 119, "setresgid"},
{ 120, "getresgid"},
{ 121, "getpgid"},
{ 122, "setfsuid"},
{ 123, "setfsgid"},
{ 124, "getsid"},
{ 125, "capget"},
{ 126, "capset"},
{ 127, "rt_sigpending"},
{ 128, "rt_sigtimedwait"},
{ 129, "rt_sigqueueinfo"},
{ 130, "rt_sigsuspend"},
{ 131, "sigaltstack"},
{ 132, "utime"},
{ 133, "mknod"},
{ 134, "uselib"},
{ 135, "personality"},
{ 136, "ustat"},
{ 137, "statfs"},
{ 138, "fstatfs"},
{ 139, "sysfs"},
{ 140, "getpriority"},
{ 141, "setpriority"},
{ 142, "sched_setparam"},
{ 143, "sched_getparam"},
{ 144, "sched_setscheduler"},
{ 145, "sched_getscheduler"},
{ 146, "sched_get_priority_max"},
{ 147, "sched_get_priority_min"},
{ 148, "sched_rr_get_interval"},
{ 149, "mlock"},
{ 150, "munlock"},
{ 151, "mlockall"},
{ 152, "munlockall"},
{ 153, "vhangup"},
{ 154, "modify_ldt"},
{ 155, "pivot_root"},
{ 156, "_sysctl"},
{ 157, "prctl"},
{ 158, "arch_prctl"},
{ 159, "adjtimex"},
{ 160, "setrlimit"},
{ 161, "chroot"},
{ 162, "sync"},
{ 163, "acct"},
{ 164, "settimeofday"},
{ 165, "mount"},
{ 166, "umount2"},
{ 167, "swapon"},
{ 168, "swapoff"},
{ 169, "reboot"},
{ 170, "sethostname"},
{ 171, "setdomainname"},
{ 172, "iopl"},
{ 173, "ioperm"},
{ 174, "create_module"},
{ 175, "init_module"},
{ 176, "delete_module"},
{ 177, "get_kernel_syms"},
{ 178, "query_module"},
{ 179, "quotactl"},
{ 180, "nfsservctl"},
{ 181, "getpmsg"},
{ 182, "putpmsg"},
{ 183, "afs_syscall"},
{ 184, "tuxcall"},
{ 185, "security"},
{ 186, "gettid"},
{ 187, "readahead"},
{ 188, "setxattr"},
{ 189, "lsetxattr"},
{ 190, "fsetxattr"},
{ 191, "getxattr"},
{ 192, "lgetxattr"},
{ 193, "fgetxattr"},
{ 194, "listxattr"},
{ 195, "llistxattr"},
{ 196, "flistxattr"},
{ 197, "removexattr"},
{ 198, "lremovexattr"},
{ 199, "fremovexattr"},
{ 200, "tkill"},
{ 201, "time"},
{ 202, "futex"},
{ 203, "sched_setaffinity"},
{ 204, "sched_getaffinity"},
{ 205, "set_thread_area"},
{ 206, "io_setup"},
{ 207, "io_destroy"},
{ 208, "io_getevents"},
{ 209, "io_submit"},
{ 210, "io_cancel"},
{ 211, "get_thread_area"},
{ 212, "lookup_dcookie"},
{ 213, "epoll_create"},
{ 214, "epoll_ctl_old"},
{ 215, "epoll_wait_old"},
{ 216, "remap_file_pages"},
{ 217, "getdents64"},
{ 218, "set_tid_address"},
{ 219, "restart_syscall"},
{ 220, "semtimedop"},
{ 221, "fadvise64"},
{ 222, "timer_create"},
{ 223, "timer_settime"},
{ 224, "timer_gettime"},
{ 225, "timer_getoverrun"},
{ 226, "timer_delete"},
{ 227, "clock_settime"},
{ 228, "clock_gettime"},
{ 229, "clock_getres"},
{ 230, "clock_nanosleep"},
{ 231, "exit_group"},
{ 232, "epoll_wait"},
{ 233, "epoll_ctl"},
{ 234, "tgkill"},
{ 235, "utimes"},
{ 236, "vserver"},
{ 237, "mbind"},
{ 238, "set_mempolicy"},
{ 239, "get_mempolicy"},
{ 240, "mq_open"},
{ 241, "mq_unlink"},
{ 242, "mq_timedsend"},
{ 243, "mq_timedreceive"},
{ 244, "mq_notify"},
{ 245, "mq_getsetattr"},
{ 246, "kexec_load"},
{ 247, "waitid"},
{ 248, "add_key"},
{ 249, "request_key"},
{ 250, "keyctl"},
{ 251, "ioprio_set"},
{ 252, "ioprio_get"},
{ 253, "inotify_init"},
{ 254, "inotify_add_watch"},
{ 255, "inotify_rm_watch"},
{ 256, "migrate_pages"},
{ 257, "openat"},
{ 258, "mkdirat"},
{ 259, "mknodat"},
{ 260, "fchownat"},
{ 261, "futimesat"},
{ 262, "newfstatat"},
{ 263, "unlinkat"},
{ 264, "renameat"},
{ 265, "linkat"},
{ 266, "symlinkat"},
{ 267, "readlinkat"},
{ 268, "fchmodat"},
{ 269, "faccessat"},
{ 270, "pselect6"},
{ 271, "ppoll"},
{ 272, "unshare"},
{ 273, "set_robust_list"},
{ 274, "get_robust_list"},
{ 275, "splice"},
{ 276, "tee"},
{ 277, "sync_file_range"},
{ 278, "vmsplice"},
{ 279, "move_pages"},
{ 280, "utimensat"},
{ 281, "epoll_pwait"},
{ 282, "signalfd"},
{ 283, "timerfd_create"},
{ 284, "eventfd"},
{ 285, "fallocate"},
{ 286, "timerfd_settime"},
{ 287, "timerfd_gettime"},
{ 288, "accept4"},
{ 289, "signalfd4"},
{ 290, "eventfd2"},
{ 291, "epoll_create1"},
{ 292, "dup3"},
{ 293, "pipe2"},
{ 294, "inotify_init1"},
{ 295, "preadv"},
{ 296, "pwritev"},
{ 297, "rt_tgsigqueueinfo"},
{ 298, "perf_event_open"},
{ 299, "recvmmsg"},
{ 300, "fanotify_init"},
{ 301, "fanotify_mark"},
{ 302, "prlimit64"},
{ 303, "name_to_handle_at"},
{ 304, "open_by_handle_at"},
{ 305, "clock_adjtime"},
{ 306, "syncfs"},
{ 307, "sendmmsg"},
{ 308, "setns"},
{ 309, "getcpu"},
{ 310, "process_vm_readv"},
{ 311, "process_vm_writev"},
{ 312, "kcmp"},
{ 313, "finit_module"},
{ 314, "sched_setattr"},
{ 315, "sched_getattr"},
{ 316, "renameat2"},
{ 317, "seccomp"},
{ 318, "getrandom"},
{ 319, "memfd_create"},
{ 320, "kexec_file_load"},
{ 321, "bpf"},
{ 322, "execveat"},
{ 323, "userfaultfd"},
{ 324, "membarrier"},
{ 325, "mlock2"},
{ 326, "copy_file_range"},
{ 327, "preadv2"},
{ 328, "pwritev2"},
{ 329, "pkey_mprotect"},
{ 330, "pkey_alloc"},
{ 331, "pkey_free"},
{ 332, "statx"},
{ 333, "io_pgetevents"},
{ 334, "rseq"},

================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/SignalDelegator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/x64/Thread.h"

#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/fextl/vector.h>

#include <sched.h>
#include <signal.h>
#include <stddef.h>
#include <syscall.h>
#include <stdint.h>
#include <unistd.h>

namespace FEX::HLE {
uint64_t SyscallHandler::read_ldt(FEXCore::Core::CpuStateFrame* Frame, void* ptr, unsigned long bytecount) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  if (!Thread->ldt_entries) {
    return 0;
  }

  bytecount = std::min(bytecount, MAX_LDT_ENTRIES * LDT_ENTRY_SIZE);
  const auto EntriesToCopySize = std::min(bytecount, Thread->ldt_entry_count * LDT_ENTRY_SIZE);

  if (FaultSafeUserMemAccess::CopyToUser(ptr, Thread->ldt_entries, EntriesToCopySize) != EntriesToCopySize) {
    return -EFAULT;
  }

  // Quirk that if the number of bytes that the user is asking for is larger than the amount we have, then zero the remaining memory.
  // This means the guest can't ever know the actual size of the LDT.
  size_t RemainingSize = bytecount - EntriesToCopySize;
  if (RemainingSize) {
    void* remaining = alloca(RemainingSize);
    memset(remaining, 0, RemainingSize);
    if (FaultSafeUserMemAccess::CopyToUser(reinterpret_cast<uint8_t*>(ptr) + EntriesToCopySize, remaining, RemainingSize) != RemainingSize) {
      return -EFAULT;
    }
  }

  // Return the combined size of ldt entries and zero initialized range.
  // I don't make the rules, it's just the weirdness that the kernel does.
  return bytecount;
}

static uint64_t read_default_ldt(FEXCore::Core::CpuStateFrame* Frame, void* ptr, unsigned long bytecount) {
  // This is some weird old legacy thing. Just returns zeroes up to 128-bytes.
  uint8_t Data[128] {};
  bytecount = std::min<uint64_t>(bytecount, sizeof(Data));

  if (FaultSafeUserMemAccess::CopyToUser(ptr, Data, bytecount) != bytecount) {
    return -EFAULT;
  }

  return bytecount;
}

uint64_t SyscallHandler::write_ldt(FEXCore::Core::CpuStateFrame* Frame, void* ptr, unsigned long bytecount, bool legacy) {
  auto Thread = FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame);

  struct user_desc_x64 {
    uint32_t entry_number;
    uint32_t base_addr;
    uint32_t limit;
    uint32_t seg_32bit       : 1;
    uint32_t contents        : 2;
    uint32_t read_exec_only  : 1;
    uint32_t limit_in_pages  : 1;
    uint32_t seg_not_present : 1;
    uint32_t useable         : 1;
    uint32_t lm              : 1;
  };
  static_assert(sizeof(user_desc_x64) == 16);

  // `content` member variables.
  constexpr static uint32_t MODIFY_LDT_CONTENTS_CONFORMING = 3;

  if (bytecount != sizeof(user_desc_x64)) {
    // Can only write a single ldt. Reject smaller and larger values.
    return -EINVAL;
  }

  user_desc_x64 ldt_info {};
  FEXCore::Core::CPUState::gdt_segment ldt {};

  if (FaultSafeUserMemAccess::CopyFromUser(&ldt_info, ptr, sizeof(ldt_info)) == EFAULT) {
    // Reject if we can't read it.
    return -EFAULT;
  }

  if (ldt_info.entry_number > MAX_LDT_ENTRIES) {
    return -EINVAL;
  }

  if (ldt_info.contents == MODIFY_LDT_CONTENTS_CONFORMING) {
    // Conforming is mostly ignored.
    // Legacy doesn't support it at all. Good.
    if (legacy) {
      return -EINVAL;
    }
    // Non-legacy ignores if only if the `seg_not_present` is set.
    if (ldt_info.seg_not_present == 0) {
      return -EINVAL;
    }
  }

  auto is_empty = [](user_desc_x64 ldt_info, bool legacy) {
    // Legacy empty is trivial.
    const bool legacy_empty = legacy && ldt_info.base_addr == 0 && ldt_info.limit == 0;
    if (legacy_empty) {
      return true;
    }

    // Non-legacy is a bit more work.
    return ldt_info.base_addr == 0 && ldt_info.limit == 0 && ldt_info.contents == 0 && ldt_info.read_exec_only == 1 &&
           ldt_info.limit_in_pages == 0 && ldt_info.seg_not_present == 1 && ldt_info.useable == 0;
  };

  auto fill_ldt = [](FEXCore::Core::CPUState::gdt_segment& segment, user_desc_x64 ldt_info) {
    FEXCore::Core::CPUState::SetGDTBase(&segment, ldt_info.base_addr);
    FEXCore::Core::CPUState::SetGDTLimit(&segment, ldt_info.limit);

    // Additional flags
    // Type: bit [11:8]
    // - bit[8]  - Accessed
    // - bit[9]  - Readable
    // - bit[10] - Conforming
    // - bit[11]
    //   - 1 - Code
    //   - 0 - Data
    segment.Type = ((ldt_info.read_exec_only ^ 1) << 1) | // Readable
                   (ldt_info.contents << 2) |             // Code/Data+Conforming
                   1;                                     // Accessed
    // S: bit [12]
    // - 0 (System descriptor)
    // - 1 (User descriptor)
    segment.S = 1;
    // DPL: bit[14:13]
    segment.DPL = 3;
    // P: Present
    segment.P = ldt_info.seg_not_present ^ 1;
    // AVL: Available to software
    segment.AVL = ldt_info.useable;
    // L: Long-mode
    // This doesn't allow setting 64-bit segments!
    segment.L = 0;
    // D: Default operand size
    // - 0: 16-bit operand size
    // - 1: 32-bit operand size
    segment.D = ldt_info.seg_32bit;
    // G: Granularity
    segment.G = ldt_info.limit_in_pages;
  };

  if (is_empty(ldt_info, legacy)) {
    // If the ldt_info is considered empty then this is a zeroing operation.
    // Just use the zero ldt.
  } else {
    // This syscall only allows installing 32-bit segments. If `seg_32bit` isn't set then
    // it assumes a 16-bit segment!
    if (!ldt_info.seg_32bit) {
      return -EINVAL;
    }

    fill_ldt(ldt, ldt_info);

    if (legacy) {
      // Legacy always zeros this.
      ldt.AVL = 0;
    }
  }

  // Need to be careful with ldt replacement here to ensure it is atomically visible.
  auto old_ldt = Thread->ldt_entries;
  auto old_ldt_entries = Thread->ldt_entry_count;

  const auto new_ldt_count = std::max<size_t>(old_ldt_entries, ldt_info.entry_number + 1);
  const auto new_ldt_size = new_ldt_count * LDT_ENTRY_SIZE;

  const auto new_ldt_entries = reinterpret_cast<FEXCore::Core::CPUState::gdt_segment*>(
    FEXCore::Allocator::mmap(nullptr, new_ldt_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));

  FEXCore::Allocator::VirtualName("FEXMem_Misc", reinterpret_cast<void*>(new_ldt_entries), new_ldt_size);

  if (old_ldt) {
    // Copy old entries if they existed.
    memcpy(new_ldt_entries, old_ldt, old_ldt_entries * LDT_ENTRY_SIZE);
  }

  // Set new LDT.
  new_ldt_entries[ldt_info.entry_number] = ldt;

  // Set new LDT pointer.
  Thread->ldt_entries = new_ldt_entries;
  Thread->ldt_entry_count = new_ldt_count;

  // Give the new LDT to CPUState.
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = new_ldt_entries;

  if (old_ldt) {
    FEXCore::Allocator::munmap(old_ldt, old_ldt_entries * LDT_ENTRY_SIZE);
  }

  return 0;
}

} // namespace FEX::HLE

namespace FEX::HLE::x64 {
uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls) {
  Frame->State.fs_cached = reinterpret_cast<uint64_t>(tls);
  return 0;
}

void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame) {
  Frame->State.rip += 2;
}

enum Modify_ldt_func : int32_t {
  LDT_READ = 0,
  LDT_WRITE_LEGACY = 1,
  LDT_READ_DEFAULT = 2,
  LDT_WRITE = 0x11,
};

void RegisterThread(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;
  REGISTER_SYSCALL_IMPL_X64(modify_ldt, [](FEXCore::Core::CpuStateFrame* Frame, int func, void* ptr, unsigned long bytecount) -> uint64_t {
    switch (func) {
    case Modify_ldt_func::LDT_READ: return FEX::HLE::_SyscallHandler->read_ldt(Frame, ptr, bytecount);
    case Modify_ldt_func::LDT_WRITE_LEGACY: return FEX::HLE::_SyscallHandler->write_ldt(Frame, ptr, bytecount, true);
    case Modify_ldt_func::LDT_READ_DEFAULT: return read_default_ldt(Frame, ptr, bytecount);
    case Modify_ldt_func::LDT_WRITE: return FEX::HLE::_SyscallHandler->write_ldt(Frame, ptr, bytecount, false);
    default: return -ENOSYS;
    }
  });

  REGISTER_SYSCALL_IMPL_X64(
    clone, ([](FEXCore::Core::CpuStateFrame* Frame, uint32_t flags, void* stack, pid_t* parent_tid, pid_t* child_tid, void* tls) -> uint64_t {
      // This is slightly different EFAULT behaviour, if child_tid or parent_tid is invalid then the kernel just doesn't write to the
      // pointer. Still need to be EFAULT safe although.
      if ((flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)) && child_tid) {
        FaultSafeUserMemAccess::VerifyIsWritable(child_tid, sizeof(*child_tid));
      }

      if ((flags & CLONE_PARENT_SETTID) && parent_tid) {
        FaultSafeUserMemAccess::VerifyIsWritable(parent_tid, sizeof(*parent_tid));
      }

      FEX::HLE::clone3_args args {
        .Type = TypeOfClone::TYPE_CLONE2,
        .args =
          {

            .flags = flags & ~CSIGNAL, // This no longer contains CSIGNAL
            .pidfd = 0,                // For clone, pidfd is duplicated here
            .child_tid = reinterpret_cast<uint64_t>(child_tid),
            .parent_tid = reinterpret_cast<uint64_t>(parent_tid),
            .exit_signal = flags & CSIGNAL,
            .stack = reinterpret_cast<uint64_t>(stack),
            .stack_size = 0, // This syscall isn't able to see the stack size
            .tls = reinterpret_cast<uint64_t>(tls),
            .set_tid = 0, // This syscall isn't able to select TIDs
            .set_tid_size = 0,
            .cgroup = 0, // This syscall can't select cgroups
          },
      };
      return CloneHandler(Frame, &args);
    }));

  REGISTER_SYSCALL_IMPL_X64(sigaltstack, [](FEXCore::Core::CpuStateFrame* Frame, const stack_t* ss, stack_t* old_ss) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsReadableOrNull(ss, sizeof(*ss));
    FaultSafeUserMemAccess::VerifyIsWritableOrNull(old_ss, sizeof(*old_ss));
    return FEX::HLE::_SyscallHandler->GetSignalDelegator()->RegisterGuestSigAltStack(
      FEX::HLE::ThreadManager::GetStateObjectFromCPUState(Frame), ss, old_ss);
  });

  // launch a new process under fex
  // currently does not propagate argv[0] correctly
  REGISTER_SYSCALL_IMPL_X64(execve, [](FEXCore::Core::CpuStateFrame* Frame, const char* pathname, char* const argv[], char* const envp[]) -> uint64_t {
    fextl::vector<const char*> Args;
    fextl::vector<const char*> Envp;

    if (argv) {
      for (int i = 0; argv[i]; i++) {
        Args.push_back(argv[i]);
      }

      Args.push_back(nullptr);
    }

    if (envp) {
      for (int i = 0; envp[i]; i++) {
        Envp.push_back(envp[i]);
      }

      Envp.push_back(nullptr);
    }

    auto* const* ArgsPtr = argv ? const_cast<char* const*>(Args.data()) : nullptr;
    auto* const* EnvpPtr = envp ? const_cast<char* const*>(Envp.data()) : nullptr;

    FEX::HLE::ExecveAtArgs AtArgs = FEX::HLE::ExecveAtArgs::Empty();

    return FEX::HLE::ExecveHandler(Frame, pathname, ArgsPtr, EnvpPtr, AtArgs);
  });

  REGISTER_SYSCALL_IMPL_X64(execveat, ([](FEXCore::Core::CpuStateFrame* Frame, int dirfd, const char* pathname, char* const argv[],
                                          char* const envp[], int flags) -> uint64_t {
                              fextl::vector<const char*> Args;
                              fextl::vector<const char*> Envp;

                              if (argv) {
                                for (int i = 0; argv[i]; i++) {
                                  Args.push_back(argv[i]);
                                }

                                Args.push_back(nullptr);
                              }

                              if (envp) {
                                for (int i = 0; envp[i]; i++) {
                                  Envp.push_back(envp[i]);
                                }

                                Envp.push_back(nullptr);
                              }

                              FEX::HLE::ExecveAtArgs AtArgs {
                                .dirfd = dirfd,
                                .flags = flags,
                              };

                              auto* const* ArgsPtr = argv ? const_cast<char* const*>(Args.data()) : nullptr;
                              auto* const* EnvpPtr = envp ? const_cast<char* const*>(Envp.data()) : nullptr;
                              return FEX::HLE::ExecveHandler(Frame, pathname, ArgsPtr, EnvpPtr, AtArgs);
                            }));
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Thread.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#pragma once
#include <stdint.h>

namespace FEXCore::Core {
struct CpuStateFrame;
}

namespace FEX::HLE::x64 {
uint64_t SetThreadArea(FEXCore::Core::CpuStateFrame* Frame, void* tls);
void AdjustRipForNewThread(FEXCore::Core::CpuStateFrame* Frame);
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Time.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/Types.h"
#include "LinuxSyscalls/x64/Syscalls.h"

#include <stddef.h>
#include <stdint.h>
#include <time.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/times.h>
#include <sys/timex.h>
#include <unistd.h>
#include <utime.h>

namespace FEX::HLE::x64 {
void RegisterTime(FEX::HLE::SyscallHandler* Handler) {
  using namespace FEXCore::IR;
  REGISTER_SYSCALL_IMPL_X64(time, [](FEXCore::Core::CpuStateFrame* Frame, time_t* tloc) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsWritableOrNull(tloc, sizeof(time_t));
    uint64_t Result = ::time(tloc);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(utime, [](FEXCore::Core::CpuStateFrame* Frame, const char* filename, const struct utimbuf* times) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsStringReadable(filename);
    FaultSafeUserMemAccess::VerifyIsReadableOrNull(times, sizeof(utimbuf));
    uint64_t Result = ::utime(filename, times);
    SYSCALL_ERRNO();
  });

  REGISTER_SYSCALL_IMPL_X64(utimes, [](FEXCore::Core::CpuStateFrame* Frame, const char* filename, const struct timeval times[2]) -> uint64_t {
    FaultSafeUserMemAccess::VerifyIsStringReadable(filename);
    FaultSafeUserMemAccess::VerifyIsReadableOrNull(times, sizeof(timeval) * 2);
    uint64_t Result = ::utimes(filename, times);
    SYSCALL_ERRNO();
  });
}
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/LinuxSyscalls/x64/Types.h
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: LinuxSyscalls|syscalls-x86-64
$end_info$
*/

#pragma once

#include "LinuxSyscalls/Types.h"
#include <FEXCore/Utils/CompilerDefs.h>

#include <linux/types.h>
#include <asm/ipcbuf.h>
#include <asm/posix_types.h>
#include <asm/sembuf.h>
#include <cstdint>
#include <sys/stat.h>
#include <type_traits>

namespace FEX::HLE::x64 {
using kernel_old_time_t = int64_t;
using kernel_ulong_t = uint64_t;
using __time_t = time_t;

struct ipc_perm_64 {
  uint32_t key;
  uint32_t uid;
  uint32_t gid;
  uint32_t cuid;
  uint32_t cgid;
  uint16_t mode;
  uint16_t _pad1;
  uint16_t seq;
  uint16_t _pad2;
  kernel_ulong_t _pad[2];

  ipc_perm_64() = delete;

  operator struct ipc64_perm() const {
    struct ipc64_perm perm {};
    perm.key = key;
    perm.uid = uid;
    perm.gid = gid;
    perm.cuid = cuid;
    perm.cgid = cgid;
    perm.mode = mode;
    perm.seq = seq;
    return perm;
  }

  ipc_perm_64(struct ipc64_perm perm) {
    key = perm.key;
    uid = perm.uid;
    gid = perm.gid;
    cuid = perm.cuid;
    cgid = perm.cgid;
    mode = perm.mode;
    seq = perm.seq;
    _pad1 = _pad2 = 0;
  }
};

static_assert(std::is_trivially_copyable_v<ipc_perm_64>);
static_assert(sizeof(ipc_perm_64) == 48);

// Matches the definition x86/include/uapi/asm/sembuf.h
struct FEX_ANNOTATE("alias-x86_64-semid64_ds") FEX_ANNOTATE("fex-match") semid_ds_64 {
  FEX::HLE::x64::ipc_perm_64 sem_perm;
  time_t sem_otime;
  uint64_t __unused1;
  time_t sem_ctime;
  uint64_t __unused2;
  uint64_t sem_nsems;
  uint64_t __unused3;
  uint64_t __unused4;

  semid_ds_64() = delete;

  operator struct semid64_ds() const {
    struct semid64_ds buf {};
    buf.sem_perm = sem_perm;

    buf.sem_otime = sem_otime;
    buf.sem_ctime = sem_ctime;
    buf.sem_nsems = sem_nsems;
    return buf;
  }

  semid_ds_64(struct semid64_ds buf)
    : sem_perm {buf.sem_perm} {
    sem_otime = buf.sem_otime;
    sem_ctime = buf.sem_ctime;
    sem_nsems = buf.sem_nsems;
  }
};

static_assert(std::is_trivially_copyable_v<FEX::HLE::x64::semid_ds_64>);
static_assert(sizeof(FEX::HLE::x64::semid_ds_64) == 104);

union semun {
  int val;
  FEX::HLE::x64::semid_ds_64* buf;
  unsigned short* array;
  struct fex_seminfo* __buf;
  void* __pad;
};

static_assert(std::is_trivially_copyable_v<FEX::HLE::x64::semun>);
static_assert(sizeof(FEX::HLE::x64::semun) == 8);

struct FEX_ANNOTATE("fex-match") FEX_PACKED guest_stat {
  uint64_t st_dev;
  uint64_t st_ino;
  uint64_t st_nlink;

  unsigned int st_mode;
  unsigned int st_uid;
  unsigned int st_gid;
  unsigned int __pad0;
  uint64_t st_rdev;
  int64_t st_size;
  int64_t st_blksize;
  int64_t st_blocks; /* Number 512-byte blocks allocated. */

  uint64_t st_atime_;
  uint64_t fex_st_atime_nsec;
  uint64_t st_mtime_;
  uint64_t fex_st_mtime_nsec;
  uint64_t st_ctime_;
  uint64_t fex_st_ctime_nsec;
  int64_t unused[3];

  guest_stat() = delete;
  operator struct stat() const {
    struct stat val {};
#define COPY(x) val.x = x
    COPY(st_dev);
    COPY(st_ino);
    COPY(st_nlink);

    COPY(st_mode);
    COPY(st_uid);
    COPY(st_gid);

    COPY(st_rdev);
    COPY(st_size);
    COPY(st_blksize);
    COPY(st_blocks);

    val.st_atim.tv_sec = st_atime_;
    val.st_atim.tv_nsec = fex_st_atime_nsec;

    val.st_mtim.tv_sec = st_mtime_;
    val.st_mtim.tv_nsec = fex_st_mtime_nsec;

    val.st_ctim.tv_sec = st_ctime_;
    val.st_ctim.tv_nsec = fex_st_ctime_nsec;
#undef COPY
    return val;
  }

  guest_stat(struct stat val) {
#define COPY(x) x = val.x
    COPY(st_dev);
    COPY(st_ino);
    COPY(st_nlink);

    COPY(st_mode);
    COPY(st_uid);
    COPY(st_gid);

    COPY(st_rdev);
    COPY(st_size);
    COPY(st_blksize);
    COPY(st_blocks);

    st_atime_ = val.st_atim.tv_sec;
    fex_st_atime_nsec = val.st_atim.tv_nsec;

    st_mtime_ = val.st_mtime;
    fex_st_mtime_nsec = val.st_mtim.tv_nsec;

    st_ctime_ = val.st_ctime;
    fex_st_ctime_nsec = val.st_ctim.tv_nsec;
#undef COPY
    __pad0 = 0;
  }
};

// Original definition in `arch/x86/include/uapi/asm/stat.h` for future excavation
static_assert(std::is_trivially_copyable_v<FEX::HLE::x64::guest_stat>);
static_assert(sizeof(FEX::HLE::x64::guest_stat) == 144);

// There is no public definition of this struct
// Matches the definition of `struct linux_dirent` in fs/readdir.c
struct FEX_ANNOTATE("fex-match") linux_dirent {
  uint64_t d_ino;
  uint64_t d_off;
  uint16_t d_reclen;
  char d_name[1];
  /* Has hidden null character and d_type */
};
static_assert(std::is_trivially_copyable_v<linux_dirent>);
static_assert(offsetof(linux_dirent, d_ino) == 0);
static_assert(offsetof(linux_dirent, d_off) == 8);
static_assert(offsetof(linux_dirent, d_reclen) == 16);
static_assert(offsetof(linux_dirent, d_name) == 18);
static_assert(sizeof(linux_dirent) == 24);

// There is no public definition of this struct
// Matches the definition of `struct linux_dirent64` in include/linux/dirent.h
struct FEX_ANNOTATE("fex-match") FEX_PACKED linux_dirent_64 {
  uint64_t d_ino;
  uint64_t d_off;
  uint16_t d_reclen;
  uint8_t d_type;
  char d_name[];
};
static_assert(std::is_trivially_copyable_v<linux_dirent_64>);
static_assert(sizeof(linux_dirent_64) == 19);
} // namespace FEX::HLE::x64


================================================
FILE: Source/Tools/LinuxEmulation/Thunks.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
meta: glue|thunks ~ FEXCore side of thunks: Registration, Lookup
tags: glue|thunks
$end_info$
*/

#include "Thunks.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/ThreadManager.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/Thunks.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/fextl/set.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/unordered_map.h>

#include <cstdint>
#include <dlfcn.h>

#include <malloc.h>
#include <mutex>
#include <shared_mutex>
#include <stdint.h>
#include <utility>

#ifdef ENABLE_JEMALLOC_GLIBC
extern "C" {
// jemalloc defines nothrow on its internal C function signatures.
#define JEMALLOC_NOTHROW __attribute__((nothrow))
// Forward declare jemalloc functions because we can't include the headers from the glibc jemalloc project.
// This is because we can't simultaneously set up include paths for both of our internal jemalloc modules.
FEX_DEFAULT_VISIBILITY JEMALLOC_NOTHROW extern int glibc_je_is_known_allocation(void* ptr);
}
#endif

static __attribute__((aligned(16), naked, section("HostToGuestTrampolineTemplate"))) void HostToGuestTrampolineTemplate() {
#if defined(ARCHITECTURE_x86_64)
  asm("lea 0f(%rip), %r11 \n"
      "jmpq *0f(%rip) \n"
      ".align 8 \n"
      "0: \n"
      ".quad 0, 0, 0, 0 \n" // TrampolineInstanceInfo
  );
#elif defined(ARCHITECTURE_arm64)
  asm(
    // x11 is part of the custom ABI and needs to point to the TrampolineInstanceInfo.
    "ldr x16, 0f \n"
    "adr x11, 0f \n"
    "br x16 \n"
    // Manually align to the next 8-byte boundary
    // NOTE: GCC over-aligns to a full page when using .align directives on ARM (last tested on GCC 11.2)
    "nop \n"
    "0: \n"
    ".quad 0, 0, 0, 0 \n" // TrampolineInstanceInfo
  );
#else
#error Unsupported host architecture
#endif
}

extern char __start_HostToGuestTrampolineTemplate[];
extern char __stop_HostToGuestTrampolineTemplate[];

namespace FEX::HLE {

static thread_local FEX::HLE::ThreadStateObject* ThreadObject {};

struct TrampolineInstanceInfo {
  void* HostPacker;
  uintptr_t CallCallback;
  uintptr_t GuestUnpacker;
  uintptr_t GuestTarget;
};

// Opaque type pointing to an instance of HostToGuestTrampolineTemplate and its
// embedded TrampolineInstanceInfo
struct HostToGuestTrampolinePtr;

static TrampolineInstanceInfo& GetInstanceInfo(HostToGuestTrampolinePtr* Trampoline) {
  const auto Length = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate;
  const auto InstanceInfoOffset = Length - sizeof(TrampolineInstanceInfo);
  return *reinterpret_cast<TrampolineInstanceInfo*>(reinterpret_cast<char*>(Trampoline) + InstanceInfoOffset);
}

struct GuestcallInfo {
  uintptr_t GuestUnpacker;
  uintptr_t GuestTarget;

  bool operator==(const GuestcallInfo&) const noexcept = default;
};

struct GuestcallInfoHash {
  size_t operator()(const GuestcallInfo& x) const noexcept {
    // Hash only the target address, which is generally unique.
    // For the unlikely case of a hash collision, fextl::unordered_map still picks the correct bucket entry.
    return std::hash<uintptr_t> {}(x.GuestTarget);
  }
};

namespace ThunkFunctions {
  void LoadLib(void* ArgsV);
  void IsLibLoaded(void* ArgsRV);
  void IsHostHeapAllocation(void* ArgsRV);
  void LinkAddressToGuestFunction(void* argsv);
  void AllocateHostTrampolineForGuestFunction(void* ArgsRV);
} // namespace ThunkFunctions

struct ThunkHandler_impl final : public FEX::HLE::ThunkHandler {
  std::shared_mutex ThunksMutex;

  // Can't be a string_view. We need to keep a copy of the library name in-case string_view pointer goes away.
  // Ideally we track when a library has been unloaded and remove it from this set before the memory backing goes away.
  fextl::set<fextl::string> Libs;

  fextl::unordered_map<GuestcallInfo, HostToGuestTrampolinePtr*, GuestcallInfoHash> GuestcallToHostTrampoline;

  uint8_t* HostTrampolineInstanceDataPtr;
  size_t HostTrampolineInstanceDataAvailable = 0;

  /*
      Set arg0/1 to arg regs, use CTX::HandleCallback to handle the callback
  */
  static void CallCallback(void* callback, void* arg0, void* arg1) {
    if (!ThreadObject) {
      ERROR_AND_DIE_FMT("Thunked library attempted to invoke guest callback asynchronously");
    }

    auto CTX = static_cast<FEXCore::Context::Context*>(ThreadObject->Thread->CTX);
    auto ThunkHandler = reinterpret_cast<ThunkHandler_impl*>(FEX::HLE::_SyscallHandler->GetThunkHandler());

    if (ThunkHandler->Is64BitMode()) {
      ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDI] = (uintptr_t)arg0;
      ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSI] = (uintptr_t)arg1;
    } else {
      if ((reinterpret_cast<uintptr_t>(arg1) >> 32) != 0) {
        ERROR_AND_DIE_FMT("Tried to call guest function with arguments packed to a 64-bit address");
      }
      ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RCX] = (uintptr_t)arg0;
      ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RDX] = (uintptr_t)arg1;
    }

    CTX->HandleCallback(ThreadObject->Thread, (uintptr_t)callback);
  }

  FEXCore::ThunkedFunction* LookupThunk(const FEXCore::IR::SHA256Sum& sha256) override {

    std::shared_lock lk(ThunksMutex);

    auto it = Thunks.find(sha256);

    if (it != Thunks.end()) {
      return it->second;
    } else {
      return nullptr;
    }
  }

  void RegisterTLSState(FEX::HLE::ThreadStateObject* _ThreadObject) override {
    ThreadObject = _ThreadObject;
  }

  void AppendThunkDefinitions(std::span<const FEXCore::IR::ThunkDefinition> Definitions) override {
    for (auto& Definition : Definitions) {
      Thunks.emplace(Definition.Sum, Definition.ThunkFunction);
    }
  }

  void LoadLib(std::string_view Name);

private:
  // Bits in a SHA256 sum are already randomly distributed, so truncation yields a suitable hash function
  struct TruncatingSHA256Hash {
    size_t operator()(const FEXCore::IR::SHA256Sum& SHA256Sum) const noexcept {
      return (const size_t&)SHA256Sum;
    }
  };

  fextl::unordered_map<FEXCore::IR::SHA256Sum, FEXCore::ThunkedFunction*, TruncatingSHA256Hash> Thunks = {
    {// sha256(fex:loadlib)
     {0x27, 0x7e, 0xb7, 0x69, 0x5b, 0xe9, 0xab, 0x12, 0x6e, 0xf7, 0x85, 0x9d, 0x4b, 0xc9, 0xa2, 0x44,
      0x46, 0xcf, 0xbd, 0xb5, 0x87, 0x43, 0xef, 0x28, 0xa2, 0x65, 0xba, 0xfc, 0x89, 0x0f, 0x77, 0x80},
     &ThunkFunctions::LoadLib},
    {// sha256(fex:is_lib_loaded)
     {0xee, 0x57, 0xba, 0x0c, 0x5f, 0x6e, 0xef, 0x2a, 0x8c, 0xb5, 0x19, 0x81, 0xc9, 0x23, 0xe6, 0x51,
      0xae, 0x65, 0x02, 0x8f, 0x2b, 0x5d, 0x59, 0x90, 0x6a, 0x7e, 0xe2, 0xe7, 0x1c, 0x33, 0x8a, 0xff},
     &ThunkFunctions::IsLibLoaded},
    {// sha256(fex:is_host_heap_allocation)
     {0xf5, 0x77, 0x68, 0x43, 0xbb, 0x6b, 0x28, 0x18, 0x40, 0xb0, 0xdb, 0x8a, 0x66, 0xfb, 0x0e, 0x2d,
      0x98, 0xc2, 0xad, 0xe2, 0x5a, 0x18, 0x5a, 0x37, 0x2e, 0x13, 0xc9, 0xe7, 0xb9, 0x8c, 0xa9, 0x3e},
     &ThunkFunctions::IsHostHeapAllocation},
    {// sha256(fex:link_address_to_function)
     {0xe6, 0xa8, 0xec, 0x1c, 0x7b, 0x74, 0x35, 0x27, 0xe9, 0x4f, 0x5b, 0x6e, 0x2d, 0xc9, 0xa0, 0x27,
      0xd6, 0x1f, 0x2b, 0x87, 0x8f, 0x2d, 0x35, 0x50, 0xea, 0x16, 0xb8, 0xc4, 0x5e, 0x42, 0xfd, 0x77},
     &ThunkFunctions::LinkAddressToGuestFunction},
    {// sha256(fex:allocate_host_trampoline_for_guest_function)
     {0x9b, 0xb2, 0xf4, 0xb4, 0x83, 0x7d, 0x28, 0x93, 0x40, 0xcb, 0xf4, 0x7a, 0x0b, 0x47, 0x85, 0x87,
      0xf9, 0xbc, 0xb5, 0x27, 0xca, 0xa6, 0x93, 0xa5, 0xc0, 0x73, 0x27, 0x24, 0xae, 0xc8, 0xb8, 0x5a},
     &ThunkFunctions::AllocateHostTrampolineForGuestFunction},
  };

  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  FEX_CONFIG_OPT(ThunkHostLibsPath, THUNKHOSTLIBS);
};

void ThunkHandler_impl::LoadLib(std::string_view Name) {
  auto SOName = ThunkHostLibsPath();
  while (SOName.ends_with('/')) {
    SOName.pop_back();
  }
  SOName = fmt::format("{}{}/{}-host.so", SOName, (Is64BitMode() ? "" : "_32"), Name);

  LogMan::Msg::DFmt("LoadLib: {} -> {}", Name, SOName);

  auto Handle = dlopen(SOName.c_str(), RTLD_LOCAL | RTLD_NOW);
  if (!Handle) {
    ERROR_AND_DIE_FMT("LoadLib: Failed to dlopen thunk library {}: {}", SOName, dlerror());
  }

  // Library names often include dashes, which may not be used in C++ identifiers.
  // They are replaced with underscores hence.
  auto InitSym = "fexthunks_exports_" + fextl::string {Name};
  std::replace(InitSym.begin(), InitSym.end(), '-', '_');

  struct ExportEntry {
    uint8_t* sha256;
    FEXCore::ThunkedFunction* Fn;
  };

  ExportEntry* (*InitFN)();
  (void*&)InitFN = dlsym(Handle, InitSym.c_str());
  if (!InitFN) {
    ERROR_AND_DIE_FMT("LoadLib: Failed to find export {}", InitSym);
  }

  auto Exports = InitFN();
  if (!Exports) {
    ERROR_AND_DIE_FMT("LoadLib: Failed to initialize thunk library {}. "
                      "Check if the corresponding host library is installed "
                      "or disable thunking of this library.",
                      Name);
  }

  {
    std::lock_guard lk(ThunksMutex);

    Libs.insert(fextl::string {Name});

    int i;
    for (i = 0; Exports[i].sha256; i++) {
      Thunks[*reinterpret_cast<FEXCore::IR::SHA256Sum*>(Exports[i].sha256)] = Exports[i].Fn;
    }

    LogMan::Msg::DFmt("Loaded {} syms", i);
  }
}

/**
 * Generates a host-callable trampoline to call guest functions via the host ABI.
 *
 * This trampoline uses the same calling convention as the given HostPacker. Trampolines
 * are cached, so it's safe to call this function repeatedly on the same arguments without
 * leaking memory.
 *
 * Invoking the returned trampoline has the effect of:
 * - packing the arguments (using the HostPacker identified by its SHA256)
 * - performing a host->guest transition
 * - unpacking the arguments via GuestUnpacker
 * - calling the function at GuestTarget
 *
 * The primary use case of this is ensuring that guest function pointers ("callbacks")
 * passed to thunked APIs can safely be called by the native host library.
 *
 * Returns a pointer to the generated host trampoline and its TrampolineInstanceInfo.
 *
 * If HostPacker is zero, the trampoline will be partially initialized and needs to be
 * finalized with a call to FinalizeHostTrampolineForGuestFunction. A typical use case
 * is to allocate the trampoline for a given GuestTarget/GuestUnpacker on the guest-side,
 * and provide the HostPacker host-side.
 */
FEX_DEFAULT_VISIBILITY HostToGuestTrampolinePtr*
MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  LOGMAN_THROW_A_FMT(GuestTarget, "Tried to create host-trampoline to null pointer guest function");

  const auto ThunkHandler = reinterpret_cast<ThunkHandler_impl*>(FEX::HLE::_SyscallHandler->GetThunkHandler());

  const GuestcallInfo gci = {GuestUnpacker, GuestTarget};

  // Try first with shared_lock
  {
    std::shared_lock lk(ThunkHandler->ThunksMutex);

    auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci);
    if (found != ThunkHandler->GuestcallToHostTrampoline.end()) {
      return found->second;
    }
  }

  std::lock_guard lk(ThunkHandler->ThunksMutex);

  // Retry lookup with full lock before making a new trampoline to avoid double trampolines
  {
    auto found = ThunkHandler->GuestcallToHostTrampoline.find(gci);
    if (found != ThunkHandler->GuestcallToHostTrampoline.end()) {
      return found->second;
    }
  }

  LogMan::Msg::DFmt("Thunks: Adding host trampoline for guest function {:#x} via unpacker {:#x}", GuestTarget, GuestUnpacker);

  const auto HostToGuestTrampolineSize = __stop_HostToGuestTrampolineTemplate - __start_HostToGuestTrampolineTemplate;

  if (ThunkHandler->HostTrampolineInstanceDataAvailable < HostToGuestTrampolineSize) {
    const auto allocation_step = 16 * 1024;
    ThunkHandler->HostTrampolineInstanceDataAvailable = allocation_step;
    ThunkHandler->HostTrampolineInstanceDataPtr = (uint8_t*)mmap(0, ThunkHandler->HostTrampolineInstanceDataAvailable,
                                                                 PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

    LOGMAN_THROW_A_FMT(ThunkHandler->HostTrampolineInstanceDataPtr != MAP_FAILED, "Failed to mmap HostTrampolineInstanceDataPtr");
  }

  auto HostTrampoline = reinterpret_cast<HostToGuestTrampolinePtr*>(ThunkHandler->HostTrampolineInstanceDataPtr);
  ThunkHandler->HostTrampolineInstanceDataAvailable -= HostToGuestTrampolineSize;
  ThunkHandler->HostTrampolineInstanceDataPtr += HostToGuestTrampolineSize;
  memcpy(HostTrampoline, (void*)&HostToGuestTrampolineTemplate, HostToGuestTrampolineSize);
  GetInstanceInfo(HostTrampoline) = TrampolineInstanceInfo {
    .HostPacker = HostPacker, .CallCallback = (uintptr_t)&ThunkHandler_impl::CallCallback, .GuestUnpacker = GuestUnpacker, .GuestTarget = GuestTarget};

  ThunkHandler->GuestcallToHostTrampoline[gci] = HostTrampoline;
  return HostTrampoline;
}

FEX_DEFAULT_VISIBILITY void FinalizeHostTrampolineForGuestFunction(HostToGuestTrampolinePtr* TrampolineAddress, void* HostPacker) {
  if (TrampolineAddress == nullptr) {
    return;
  }

  auto& Trampoline = GetInstanceInfo(TrampolineAddress);

  LOGMAN_THROW_A_FMT(Trampoline.CallCallback == (uintptr_t)&ThunkHandler_impl::CallCallback, "Invalid trampoline at {} passed to {}",
                     fmt::ptr(TrampolineAddress), __FUNCTION__);

  if (!Trampoline.HostPacker) {
    LogMan::Msg::DFmt("Thunks: Finalizing trampoline at {} with host packer {}", fmt::ptr(TrampolineAddress), fmt::ptr(HostPacker));
    Trampoline.HostPacker = HostPacker;
  }
}

namespace ThunkFunctions {
  void LoadLib(void* ArgsV) {
    struct LoadlibArgs {
      const char* Name;
    };

    auto Args = reinterpret_cast<LoadlibArgs*>(ArgsV);
    auto ThunkHandler = reinterpret_cast<ThunkHandler_impl*>(FEX::HLE::_SyscallHandler->GetThunkHandler());

    ThunkHandler->LoadLib(Args->Name);
  }

  void IsLibLoaded(void* ArgsRV) {
    struct ArgsRV_t {
      const char* Name;
      bool rv;
    };

    auto& [Name, rv] = *reinterpret_cast<ArgsRV_t*>(ArgsRV);
    auto ThunkHandler = reinterpret_cast<ThunkHandler_impl*>(FEX::HLE::_SyscallHandler->GetThunkHandler());

    {
      std::shared_lock lk(ThunkHandler->ThunksMutex);
      rv = ThunkHandler->Libs.contains(Name);
    }
  }

  /**
   * Checks if the given pointer is allocated on the host heap.
   *
   * This is useful for thunking APIs that need to work with both guest
   * and host heap pointers.
   */
  void IsHostHeapAllocation(void* ArgsRV) {
#ifdef ENABLE_JEMALLOC_GLIBC
    struct ArgsRV_t {
      void* ptr;
      bool rv;
    }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

    args->rv = glibc_je_is_known_allocation(args->ptr);
#else
    // Thunks usage without jemalloc isn't supported
    ERROR_AND_DIE_FMT("Unsupported: Thunks querying for host heap allocation information");
#endif
  }

  /**
   * Instructs the Core to redirect calls to functions at the given
   * address to another function. The original callee address is passed
   * to the target function through an implicit argument stored in r11.
   *
   * For 32-bit the implicit argument is stored in the lower 32-bits of mm0.
   *
   * The primary use case of this is ensuring that host function pointers
   * returned from thunked APIs can safely be called by the guest.
   */
  void LinkAddressToGuestFunction(void* argsv) {
    struct args_t {
      uintptr_t original_callee;
      uintptr_t target_addr; // Guest function to call when branching to original_callee
    };

    auto args = reinterpret_cast<args_t*>(argsv);
    auto CTX = static_cast<FEXCore::Context::Context*>(ThreadObject->Thread->CTX);
    CTX->AddThunkTrampolineIRHandler(args->original_callee, args->target_addr);
  }

  /**
   * Guest-side helper to initiate creation of a host trampoline for
   * calling guest functions. This must be followed by a host-side call
   * to FinalizeHostTrampolineForGuestFunction to make the trampoline
   * usable.
   *
   * This two-step initialization is equivalent to a host-side call to
   * MakeHostTrampolineForGuestFunction. The split is needed if the
   * host doesn't have all information needed to create the trampoline
   * on its own.
   */
  void AllocateHostTrampolineForGuestFunction(void* ArgsRV) {
    struct ArgsRV_t {
      uintptr_t GuestUnpacker;
      uintptr_t GuestTarget;
      uintptr_t rv; // Pointer to host trampoline + TrampolineInstanceInfo
    }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

    args->rv = (uintptr_t)MakeHostTrampolineForGuestFunction(nullptr, args->GuestTarget, args->GuestUnpacker);
  }
} // namespace ThunkFunctions

FEX_DEFAULT_VISIBILITY void* GetGuestStack() {
  if (!ThreadObject) {
    ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously");
  }

  return (void*)(uintptr_t)((ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP]));
}

FEX_DEFAULT_VISIBILITY void MoveGuestStack(uintptr_t NewAddress) {
  if (!ThreadObject) {
    ERROR_AND_DIE_FMT("Thunked library attempted to query guest stack pointer asynchronously");
  }

  if (NewAddress >> 32) {
    ERROR_AND_DIE_FMT("Tried to set stack pointer for 32-bit guest to a 64-bit address");
  }

  ThreadObject->Thread->CurrentFrame->State.gregs[FEXCore::X86State::REG_RSP] = NewAddress;
}

fextl::unique_ptr<ThunkHandler> CreateThunkHandler() {
  return fextl::make_unique<ThunkHandler_impl>();
}
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/Thunks.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/Thunks.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/IR/IR.h>

#include <span>

namespace FEX::HLE {
struct ThreadStateObject;

class ThunkHandler : public FEXCore::ThunkHandler {
public:
  virtual void RegisterTLSState(FEX::HLE::ThreadStateObject* ThreadObject) = 0;
  /**
   * @brief Allows the frontend to register its own thunk handlers independent of what is controlled in the backend.
   *
   * @param CTX A valid non-null context instance.
   * @param Definitions A vector of thunk definitions that the frontend controls
   */
  virtual void AppendThunkDefinitions(std::span<const FEXCore::IR::ThunkDefinition> Definitions) = 0;
};
fextl::unique_ptr<ThunkHandler> CreateThunkHandler();
} // namespace FEX::HLE


================================================
FILE: Source/Tools/LinuxEmulation/VDSO_Emulation.cpp
================================================
// SPDX-License-Identifier: MIT
#include "VDSO_Emulation.h"

#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Types.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/IR/IR.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/map.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <array>
#include <dlfcn.h>
#include <elf.h>
#include <fcntl.h>
#include <filesystem>
#include <sys/auxv.h>
#include <sys/mman.h>
#include <sys/time.h>
#include <unistd.h>

namespace FEX::VDSO {
VDSOEntrypoints VDSOPointers {};
namespace VDSOHandlers {
  using TimeType = decltype(::time)*;
  using GetTimeOfDayType = decltype(::gettimeofday)*;
  using ClockGetTimeType = decltype(::clock_gettime)*;
  using ClockGetResType = decltype(::clock_getres)*;
  using GetCPUType = decltype(FHU::Syscalls::getcpu)*;
  using GetRandomType = ssize_t (*)(void*, size_t, uint32_t, void*, size_t);

  TimeType TimePtr;
  GetTimeOfDayType GetTimeOfDayPtr;
  ClockGetTimeType ClockGetTimePtr;
  ClockGetResType ClockGetResPtr;
  GetCPUType GetCPUPtr;
  GetRandomType GetRandomPtr;
} // namespace VDSOHandlers

using HandlerPtr = void (*)(void*);
namespace x64 {
  static uint64_t SyscallRet(uint64_t Result) {
    if (Result == -1) {
      return -errno;
    }
    return Result;
  }
  // glibc handlers
  namespace glibc {
    static void time(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        time_t* a_0;
        uint64_t rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      uint64_t Result = ::time(args->a_0);
      args->rv = SyscallRet(Result);
    }

    static void gettimeofday(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        struct timeval* tv;
        struct timezone* tz;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = ::gettimeofday(args->tv, args->tz);
      args->rv = SyscallRet(Result);
    }

    static void clock_gettime(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        struct timespec* tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = ::clock_gettime(args->clk_id, args->tp);
      args->rv = SyscallRet(Result);
    }

    static void clock_getres(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        struct timespec* tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = ::clock_getres(args->clk_id, args->tp);
      args->rv = SyscallRet(Result);
    }

    static void getcpu(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        uint32_t* cpu;
        uint32_t* node;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = FHU::Syscalls::getcpu(args->cpu, args->node);
      args->rv = SyscallRet(Result);
    }

    static void getrandom(void* ArgsRV) {
      struct vgetrandom_opaque_params {
        uint32_t size_of_opaque_state;
        uint32_t mmap_prot;
        uint32_t mmap_flags;
        uint32_t reserved[13];
      };
      static_assert(sizeof(vgetrandom_opaque_params) == sizeof(uint32_t[16]));

      struct __attribute__((packed)) ArgsRV_t {
        void* buffer;
        size_t len;
        uint32_t flags;
        vgetrandom_opaque_params* opaque_state;
        size_t opaque_len;
        ssize_t rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      if (args->buffer == nullptr && args->len == 0 && args->flags == 0 && args->opaque_len == ~0ULL) [[unlikely]] {
        // Special case querying for flags
        // Since this is the syscall implementation, we need to return valid but unused data.
        // This will cause glibc to allocate a page of memory, but it ends up being unused.
        args->opaque_state->size_of_opaque_state = FEXCore::Utils::FEX_PAGE_SIZE;
        args->opaque_state->mmap_prot = PROT_NONE;
        args->opaque_state->mmap_flags = MAP_NORESERVE | MAP_ANONYMOUS | MAP_PRIVATE;
        args->rv = 0;
        return;
      }

      int Result = ::syscall(SYS_getrandom, args->buffer, args->len, args->flags);
      args->rv = SyscallRet(Result);
    }
  } // namespace glibc

  namespace VDSO {
    // VDSO handlers
    static void time(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        time_t* a_0;
        uint64_t rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::TimePtr(args->a_0);
    }

    static void gettimeofday(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        struct timeval* tv;
        struct timezone* tz;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::GetTimeOfDayPtr(args->tv, args->tz);
    }

    static void clock_gettime(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        struct timespec* tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp);
    }

    static void clock_getres(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        struct timespec* tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::ClockGetResPtr(args->clk_id, args->tp);
    }

    static void getcpu(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        uint32_t* cpu;
        uint32_t* node;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node);
    }

    static void getrandom(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        void* buffer;
        size_t len;
        uint32_t flags;
        void* opaque_state;
        size_t opaque_len;
        ssize_t rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::GetRandomPtr(args->buffer, args->len, args->flags, args->opaque_state, args->opaque_len);
    }
  } // namespace VDSO

  HandlerPtr Handler_time = FEX::VDSO::x64::glibc::time;
  HandlerPtr Handler_gettimeofday = FEX::VDSO::x64::glibc::gettimeofday;
  HandlerPtr Handler_clock_gettime = FEX::VDSO::x64::glibc::clock_gettime;
  HandlerPtr Handler_clock_getres = FEX::VDSO::x64::glibc::clock_getres;
  HandlerPtr Handler_getcpu = FEX::VDSO::x64::glibc::getcpu;
  HandlerPtr Handler_getrandom = FEX::VDSO::x64::glibc::getrandom;
} // namespace x64
namespace x32 {
  namespace glibc {
    static int SyscallRet(int Result) {
      if (Result == -1) {
        return -errno;
      }
      return Result;
    }

    // glibc handlers
    static void time(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<FEX::HLE::x32::old_time32_t> a_0;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      time_t Host {};
      int Result = ::time(&Host);
      args->rv = SyscallRet(Result);
      if (Result != -1 && args->a_0) {
        *args->a_0 = Host;
      }
    }

    static void gettimeofday(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<FEX::HLE::x32::timeval32> tv;
        HLE::x32::compat_ptr<struct timezone> tz;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timeval tv64 {};
      struct timeval* tv_ptr {};
      if (args->tv) {
        tv_ptr = &tv64;
      }

      int Result = ::gettimeofday(tv_ptr, args->tz);
      args->rv = SyscallRet(Result);

      if (Result != -1 && args->tv) {
        *args->tv = tv64;
      }
    }

    static void clock_gettime(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<HLE::x32::timespec32> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timespec tp64 {};
      int Result = ::clock_gettime(args->clk_id, &tp64);
      args->rv = SyscallRet(Result);

      if (Result != -1 && args->tp) {
        *args->tp = tp64;
      }
    }

    static void clock_gettime64(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<struct timespec> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = ::clock_gettime(args->clk_id, args->tp);
      args->rv = SyscallRet(Result);
    }

    static void clock_getres(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<HLE::x32::timespec32> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timespec tp64 {};

      int Result = ::clock_getres(args->clk_id, &tp64);
      args->rv = SyscallRet(Result);

      if (Result != -1 && args->tp) {
        *args->tp = tp64;
      }
    }

    static void getcpu(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<uint32_t> cpu;
        HLE::x32::compat_ptr<uint32_t> node;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      int Result = FHU::Syscalls::getcpu(args->cpu, args->node);
      args->rv = SyscallRet(Result);
    }
  } // namespace glibc

  namespace VDSO {
    static bool SyscallErr(uint64_t Result) {
      return Result >= -4095;
    }

    // VDSO handlers
    static void time(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<FEX::HLE::x32::old_time32_t> a_0;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      time_t Host {};
      uint64_t Result = VDSOHandlers::TimePtr(&Host);
      args->rv = Result;
      if (!SyscallErr(Result) && args->a_0) {
        *args->a_0 = Host;
      }
    }

    static void gettimeofday(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<FEX::HLE::x32::timeval32> tv;
        HLE::x32::compat_ptr<struct timezone> tz;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timeval tv64 {};
      struct timeval* tv_ptr {};
      if (args->tv) {
        tv_ptr = &tv64;
      }

      uint64_t Result = VDSOHandlers::GetTimeOfDayPtr(tv_ptr, args->tz);
      args->rv = Result;

      if (!SyscallErr(Result) && args->tv) {
        *args->tv = tv64;
      }
    }

    static void clock_gettime(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<HLE::x32::timespec32> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timespec tp64 {};
      uint64_t Result = VDSOHandlers::ClockGetTimePtr(args->clk_id, &tp64);
      args->rv = Result;

      if (!SyscallErr(Result) && args->tp) {
        *args->tp = tp64;
      }
    }

    static void clock_gettime64(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<struct timespec> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::ClockGetTimePtr(args->clk_id, args->tp);
    }

    static void clock_getres(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        clockid_t clk_id;
        HLE::x32::compat_ptr<HLE::x32::timespec32> tp;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      struct timespec tp64 {};

      uint64_t Result = VDSOHandlers::ClockGetResPtr(args->clk_id, &tp64);
      args->rv = Result;

      if (!SyscallErr(Result) && args->tp) {
        *args->tp = tp64;
      }
    }

    static void getcpu(void* ArgsRV) {
      struct __attribute__((packed)) ArgsRV_t {
        HLE::x32::compat_ptr<uint32_t> cpu;
        HLE::x32::compat_ptr<uint32_t> node;
        int rv;
      }* args = reinterpret_cast<ArgsRV_t*>(ArgsRV);

      args->rv = VDSOHandlers::GetCPUPtr(args->cpu, args->node);
    }
  } // namespace VDSO

  HandlerPtr Handler_time = FEX::VDSO::x32::glibc::time;
  HandlerPtr Handler_gettimeofday = FEX::VDSO::x32::glibc::gettimeofday;
  HandlerPtr Handler_clock_gettime = FEX::VDSO::x32::glibc::clock_gettime;
  HandlerPtr Handler_clock_gettime64 = FEX::VDSO::x32::glibc::clock_gettime64;
  HandlerPtr Handler_clock_getres = FEX::VDSO::x32::glibc::clock_getres;
  HandlerPtr Handler_getcpu = FEX::VDSO::x32::glibc::getcpu;
} // namespace x32

class VDSOParser final {
public:
  VDSOParser(const uint8_t* HeaderBase);

  void* FindSymbol(std::string_view Name) const {
    auto it = Symbols.find(Name);
    if (it == Symbols.end()) {
      return nullptr;
    }
    return it->second;
  }
private:
  fextl::map<std::string_view, void*> Symbols;
};

VDSOParser::VDSOParser(const uint8_t* HeaderBase) {
  // Minimal ELF parser that only knows how to scan for dynamic symbols from VDSO.
  auto Header = reinterpret_cast<const Elf64_Ehdr*>(HeaderBase);
  auto SectionHeaderOffset = Header->e_shoff;
  auto SectionHeaderCount = Header->e_shnum;
  auto SectionHeaders = reinterpret_cast<const Elf64_Shdr*>(&HeaderBase[SectionHeaderOffset]);

  // Scan for the symbol and string headers.
  const Elf64_Shdr* DynamicSymbolHeader {};
  const Elf64_Shdr* DynamicStringHeader {};
  for (size_t i = 0; i < SectionHeaderCount; ++i) {
    if (DynamicSymbolHeader && DynamicStringHeader) {
      // Found both headers.
      break;
    }

    if (SectionHeaders[i].sh_type == SHT_DYNSYM) {
      // Dynamic symbol header found.
      DynamicSymbolHeader = &SectionHeaders[i];
    }

    if (SectionHeaders[i].sh_type == SHT_STRTAB && SectionHeaders[i].sh_addr) {
      // Dynamic string header found.
      DynamicStringHeader = &SectionHeaders[i];
    }
  }

  if (!DynamicSymbolHeader || !DynamicStringHeader) {
    LogMan::Msg::DFmt("Couldn't parse host VDSO symbols. Falling back to glibc implementations.");
    return;
  }

  auto NumberOfDynamicSymbols = DynamicSymbolHeader->sh_size / DynamicSymbolHeader->sh_entsize;
  const char* DynamicStringTable = reinterpret_cast<const char*>(&HeaderBase[DynamicStringHeader->sh_offset]);

  // Scan all the symbols and populate the look-up table.
  for (size_t i = 0; i < NumberOfDynamicSymbols; ++i) {
    auto Offset = DynamicSymbolHeader->sh_offset + (i * DynamicSymbolHeader->sh_entsize);
    auto Symbol = reinterpret_cast<const Elf64_Sym*>(&HeaderBase[Offset]);

    if (Symbol->st_info != 0) {
      // Save the symbol.
      const char* Name = &DynamicStringTable[Symbol->st_name];
      auto SymbolPtr = HeaderBase + Symbol->st_value;
      Symbols[Name] = const_cast<void*>(static_cast<const void*>(SymbolPtr));
    }
  }
}

void LoadHostVDSO() {
  // Linux gives the VDSO ELF header base in the auxv value AT_SYSINFO_EHDR.
  auto VDSOHeader = ::getauxval(AT_SYSINFO_EHDR);

  if (!VDSOHeader) {
    // We couldn't load VDSO, fallback to C implementations. Which will still be faster than emulated libc versions.
    LogMan::Msg::IFmt("linux-vdso implementation falling back to libc. Consider enabling VDSO in your kernel.");
    return;
  }

  auto VDSO = VDSOParser(reinterpret_cast<const uint8_t*>(VDSOHeader));

  auto SymbolPtr = VDSO.FindSymbol("__kernel_time");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_time");
  }
  if (SymbolPtr) {
    VDSOHandlers::TimePtr = reinterpret_cast<VDSOHandlers::TimeType>(SymbolPtr);
    x64::Handler_time = x64::VDSO::time;
    x32::Handler_time = x32::VDSO::time;
  }

  SymbolPtr = VDSO.FindSymbol("__kernel_gettimeofday");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_gettimeofday");
  }

  if (SymbolPtr) {
    VDSOHandlers::GetTimeOfDayPtr = reinterpret_cast<VDSOHandlers::GetTimeOfDayType>(SymbolPtr);
    x64::Handler_gettimeofday = x64::VDSO::gettimeofday;
    x32::Handler_gettimeofday = x32::VDSO::gettimeofday;
  }

  SymbolPtr = VDSO.FindSymbol("__kernel_clock_gettime");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_clock_gettime");
  }

  if (SymbolPtr) {
    VDSOHandlers::ClockGetTimePtr = reinterpret_cast<VDSOHandlers::ClockGetTimeType>(SymbolPtr);
    x64::Handler_clock_gettime = x64::VDSO::clock_gettime;
    x32::Handler_clock_gettime = x32::VDSO::clock_gettime;
    x32::Handler_clock_gettime64 = x32::VDSO::clock_gettime64;
  }

  SymbolPtr = VDSO.FindSymbol("__kernel_clock_getres");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_clock_getres");
  }

  if (SymbolPtr) {
    VDSOHandlers::ClockGetResPtr = reinterpret_cast<VDSOHandlers::ClockGetResType>(SymbolPtr);
    x64::Handler_clock_getres = x64::VDSO::clock_getres;
    x32::Handler_clock_getres = x32::VDSO::clock_getres;
  }

  SymbolPtr = VDSO.FindSymbol("__kernel_getcpu");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_getcpu");
  }

  if (SymbolPtr) {
    VDSOHandlers::GetCPUPtr = reinterpret_cast<VDSOHandlers::GetCPUType>(SymbolPtr);
    x64::Handler_getcpu = x64::VDSO::getcpu;
    x32::Handler_getcpu = x32::VDSO::getcpu;
  }

  SymbolPtr = VDSO.FindSymbol("__kernel_getrandom");
  if (!SymbolPtr) {
    SymbolPtr = VDSO.FindSymbol("__vdso_getrandom");
  }

  if (SymbolPtr) {
    VDSOHandlers::GetRandomPtr = reinterpret_cast<VDSOHandlers::GetRandomType>(SymbolPtr);
    x64::Handler_getrandom = x64::VDSO::getrandom;
    // 32-bit doesn't have getrandom vdso
  }
}

static std::array<FEXCore::IR::ThunkDefinition, 7> VDSODefinitions = {{
  {
    // sha256(libVDSO:time)
    {0x37, 0x63, 0x46, 0xb0, 0x79, 0x06, 0x5f, 0x9d, 0x00, 0xb6, 0x8d, 0xfd, 0x9e, 0x4a, 0x62, 0xcd,
     0x1e, 0x6c, 0xcc, 0x22, 0xcd, 0xb2, 0xc0, 0x17, 0x7d, 0x42, 0x6a, 0x40, 0xd1, 0xeb, 0xfa, 0xe0},
    nullptr,
  },
  {
    // sha256(libVDSO:gettimeofday)
    {0x77, 0x2a, 0xde, 0x1c, 0x13, 0x2d, 0xe9, 0x48, 0xaf, 0xe0, 0xba, 0xcc, 0x6a, 0x89, 0xff, 0xca,
     0x4a, 0xdc, 0xd5, 0x63, 0x2c, 0xc5, 0x62, 0x8b, 0x5d, 0xde, 0x0b, 0x15, 0x35, 0xc6, 0xc7, 0x14},
    nullptr,
  },
  {
    // sha256(libVDSO:clock_gettime)
    {0x3c, 0x96, 0x9b, 0x2d, 0xc3, 0xad, 0x2b, 0x3b, 0x9c, 0x4e, 0x4d, 0xca, 0x1c, 0xe8, 0x18, 0x4a,
     0x12, 0x8a, 0xe4, 0xc1, 0x56, 0x92, 0x73, 0xce, 0x65, 0x85, 0x5f, 0x65, 0x7e, 0x94, 0x26, 0xbe},
    nullptr,
  },

  {
    // sha256(libVDSO:clock_gettime64)
    {0xba, 0xe9, 0x6d, 0x30, 0xc0, 0x68, 0xc6, 0xd7, 0x59, 0x04, 0xf7, 0x10, 0x06, 0x72, 0x88, 0xfd,
     0x4c, 0x57, 0x0f, 0x31, 0xa5, 0xea, 0xa9, 0xb9, 0xd3, 0x8d, 0x03, 0x81, 0x50, 0x16, 0x22, 0x71},
    nullptr,
  },

  {
    // sha256(libVDSO:clock_getres)
    {0xe4, 0xa1, 0xf6, 0x23, 0x35, 0xae, 0xb7, 0xb6, 0xb0, 0x37, 0xc5, 0xc3, 0xa3, 0xfd, 0xbf, 0xa2,
     0xa1, 0xc8, 0x95, 0x78, 0xe5, 0x76, 0x86, 0xdb, 0x3e, 0x6c, 0x54, 0xd5, 0x02, 0x60, 0xd8, 0x6d},
    nullptr,
  },
  {
    // sha256(libVDSO:getcpu)
    {0x39, 0x83, 0x39, 0x36, 0x0f, 0x68, 0xd6, 0xfc, 0xc2, 0x3a, 0x97, 0x11, 0x85, 0x09, 0xc7, 0x25,
     0xbb, 0x50, 0x49, 0x55, 0x6b, 0x0c, 0x9f, 0x50, 0x37, 0xf5, 0x9d, 0xb0, 0x38, 0x58, 0x57, 0x12},
    nullptr,
  },
  {
    // sha256(libVDSO:getrandom)
    {0xf8, 0x03, 0xe2, 0x70, 0xe3, 0xf1, 0xbb, 0xc1, 0x7d, 0xa7, 0x8b, 0xb3, 0x1f, 0x3e, 0xbd, 0xc6,
     0x8a, 0x50, 0xd3, 0x4a, 0x1f, 0xb3, 0x4b, 0x7e, 0x32, 0xcb, 0x1e, 0x18, 0x3b, 0x7c, 0xeb, 0x4b},
    nullptr,
  },
}};

template<bool Is64Bit>
void LoadGuestVDSOSymbols(char* VDSOBase) {
  using ELFHeaderType = std::conditional_t<Is64Bit, Elf64_Ehdr, Elf32_Ehdr>;
  using ELFSHeaderType = std::conditional_t<Is64Bit, Elf64_Shdr, Elf32_Shdr>;
  using ELFSymbolType = std::conditional_t<Is64Bit, Elf64_Sym, Elf32_Sym>;
  constexpr auto ELFClass = Is64Bit ? ELFCLASS64 : ELFCLASS32;
  constexpr auto ELFMachine = Is64Bit ? EM_X86_64 : EM_386;

  // We need to load symbols we care about.
  auto Header = reinterpret_cast<const ELFHeaderType*>(VDSOBase);

  // Check ELF magic.
  if (Header->e_ident[EI_MAG0] != ELFMAG0 || Header->e_ident[EI_MAG1] != ELFMAG1 || Header->e_ident[EI_MAG2] != ELFMAG2 ||
      Header->e_ident[EI_MAG3] != ELFMAG3) {
    return;
  }

  // Check ELF class and Machine.
  if (Header->e_ident[EI_CLASS] != ELFClass || Header->e_machine != ELFMachine) {
    return;
  }

  // First walk the section headers to find the symbol table.
  auto RawShdrs = reinterpret_cast<const ELFSHeaderType*>(VDSOBase + Header->e_shoff);

  const auto StrHeader = &RawShdrs[Header->e_shstrndx];
  const char* SHStrings = VDSOBase + StrHeader->sh_offset;

  struct SymbolTypes {
    const char* name;
    int sh_type;
  };

  constexpr std::array<SymbolTypes, 2> symbol_table_names = {{{".dynsym", SHT_DYNSYM}, {".symtab", SHT_SYMTAB}}};

  for (auto sym_table : symbol_table_names) {
    const ELFSHeaderType* SymTableHeader {};
    const ELFSHeaderType* StringTableHeader {};

    for (size_t i = 0; i < Header->e_shnum; ++i) {
      const auto& Header = RawShdrs[i];
      if (Header.sh_type == sym_table.sh_type && strcmp(&SHStrings[Header.sh_name], sym_table.name) == 0) {
        SymTableHeader = &Header;
        StringTableHeader = &RawShdrs[SymTableHeader->sh_link];
        break;
      }
    }

    if (!SymTableHeader) {
      // Couldn't find symbol table
      continue;
    }

    const char* StrTab = VDSOBase + StringTableHeader->sh_offset;
    size_t NumSymbols = SymTableHeader->sh_size / SymTableHeader->sh_entsize;

    for (size_t i = 0; i < NumSymbols; ++i) {
      uint64_t offset = SymTableHeader->sh_offset + i * SymTableHeader->sh_entsize;
      auto Symbol = reinterpret_cast<const ELFSymbolType*>(VDSOBase + offset);
      if (ELF32_ST_VISIBILITY(Symbol->st_other) != STV_HIDDEN && Symbol->st_value != 0) {
        const char* Name = &StrTab[Symbol->st_name];
        if (Name[0] != '\0') {
          if (strcmp(Name, "__kernel_sigreturn") == 0) {
            VDSOPointers.VDSO_kernel_sigreturn = VDSOBase + Symbol->st_value;
          } else if (strcmp(Name, "__kernel_rt_sigreturn") == 0) {
            VDSOPointers.VDSO_kernel_rt_sigreturn = VDSOBase + Symbol->st_value;
          } else if (strcmp(Name, "__fex_callback_ret") == 0) {
            VDSOPointers.VDSO_FEX_CallbackRET = VDSOBase + Symbol->st_value;
          }
        }
      }
    }
  }
}

void LoadFEXGeneratedCode(FEXCore::Core::InternalThreadState* Thread, bool Is64Bit, VDSOMapping* Mapping, FEX::HLE::SyscallHandler* const Handler) {
  if (VDSOPointers.VDSO_FEX_CallbackRET && (!Is64Bit || (VDSOPointers.VDSO_kernel_sigreturn && VDSOPointers.VDSO_kernel_rt_sigreturn))) {
    // Unnecessary if all VDSO paths have already been loaded.
    return;
  }

  // Hardcoded to one page for now
  auto PageSize = sysconf(_SC_PAGESIZE);
  PageSize = PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE;
  Mapping->X86GeneratedCodeSize = PageSize;

  if (Is64Bit) {
    // 64bit mode can have its code anywhere
    auto Result =
      Handler->GuestMmap(Is64Bit, Thread, nullptr, Mapping->X86GeneratedCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

    if (!FEX::HLE::HasSyscallError(Result)) {
      Mapping->X86GeneratedCodePtr = Result;
    }
  } else {
    // First 64bit page
    constexpr uintptr_t LOCATION_MAX = 0x1'0000'0000;

    // We need to have the sigret handler in the lower 32bits of memory space
    // Scan top down and try to allocate a location
    for (size_t Location = 0xFFFF'E000; Location != 0x0; Location -= PageSize) {
      auto Ptr = Handler->GuestMmap(Is64Bit, Thread, reinterpret_cast<void*>(Location), PageSize, PROT_READ | PROT_WRITE,
                                    MAP_FIXED_NOREPLACE | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

      if (!FEX::HLE::HasSyscallError(Ptr) && reinterpret_cast<uintptr_t>(Ptr) >= LOCATION_MAX) {
        // Failed to map in the lower 32bits
        // Try again
        // Can happen in the case that host kernel ignores MAP_FIXED_NOREPLACE
        Handler->GuestMunmap(Thread, Ptr, PageSize);
        continue;
      }

      if (!FEX::HLE::HasSyscallError(Ptr)) {
        Mapping->X86GeneratedCodePtr = Ptr;
        break;
      }
    }
  }

  // Can't do anything about this
  // Here's hoping the application doesn't use signals
  if (!Mapping->X86GeneratedCodePtr) {
    return;
  }

  FEXCore::Allocator::VirtualName("FEXMem_Misc", Mapping->X86GeneratedCodePtr, Mapping->X86GeneratedCodeSize);

  size_t CurrentCodeOffset {};

  if (!Is64Bit) {
    // Signal return handlers need to be bit-exact to what the Linux kernel provides in VDSO.
    // GDB and unwinding libraries key off of these instructions to understand if the stack frame is a signal frame or not.
    // This two code sections match exactly what libSegFault expects.
    //
    // Typically this handlers are provided by the 32-bit VDSO thunk library, but that isn't available in all cases.
    // Falling back to this generated code segment still allows a backtrace to work, just might not show
    // the symbol as VDSO since there is no ELF to parse.
    constexpr std::array<uint8_t, 9> sigreturn_32_code = {
      0x58,                         // pop eax
      0xb8, 0x77, 0x00, 0x00, 0x00, // mov eax, 0x77
      0xcd, 0x80,                   // int 0x80
      0x90,                         // nop
    };

    constexpr std::array<uint8_t, 7> rt_sigreturn_32_code = {
      0xb8, 0xad, 0x00, 0x00, 0x00, // mov eax, 0xad
      0xcd, 0x80,                   // int 0x80
    };

    if (!VDSOPointers.VDSO_kernel_sigreturn) {
      VDSOPointers.VDSO_kernel_sigreturn = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Mapping->X86GeneratedCodePtr) + CurrentCodeOffset);
      memcpy(VDSOPointers.VDSO_kernel_sigreturn, sigreturn_32_code.data(), sigreturn_32_code.size());
      CurrentCodeOffset += sigreturn_32_code.size();
    }

    if (!VDSOPointers.VDSO_kernel_rt_sigreturn) {
      VDSOPointers.VDSO_kernel_rt_sigreturn =
        reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Mapping->X86GeneratedCodePtr) + CurrentCodeOffset);
      memcpy(VDSOPointers.VDSO_kernel_rt_sigreturn, rt_sigreturn_32_code.data(), rt_sigreturn_32_code.size());
      CurrentCodeOffset += rt_sigreturn_32_code.size();
    }
  }

  if (!VDSOPointers.VDSO_FEX_CallbackRET) {
    constexpr std::array<uint8_t, 2> CallbackRetCode = {
      0x0F, 0x3E, // CALLBACKRET FEX Instruction
    };

    VDSOPointers.VDSO_FEX_CallbackRET = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Mapping->X86GeneratedCodePtr) + CurrentCodeOffset);
    memcpy(VDSOPointers.VDSO_FEX_CallbackRET, CallbackRetCode.data(), CallbackRetCode.size());
    CurrentCodeOffset += CallbackRetCode.size();
  }

  Handler->GuestMprotect(Thread, Mapping->X86GeneratedCodePtr, Mapping->X86GeneratedCodeSize, PROT_READ | PROT_EXEC);
}

void UnloadVDSOMapping(FEXCore::Core::InternalThreadState* Thread, FEX::HLE::SyscallHandler* const Handler, const VDSOMapping& Mapping) {
  if (Mapping.VDSOBase) {
    Handler->GuestMunmap(Thread, Mapping.VDSOBase, Mapping.VDSOSize);
  }

  if (Mapping.X86GeneratedCodePtr) {
    Handler->GuestMunmap(Thread, Mapping.X86GeneratedCodePtr, Mapping.X86GeneratedCodeSize);
  }
}

VDSOMapping LoadVDSOThunks(FEXCore::Core::InternalThreadState* Thread, bool Is64Bit, FEX::HLE::SyscallHandler* const Handler) {
  VDSOMapping Mapping {};
  FEX_CONFIG_OPT(ThunkGuestLibs, THUNKGUESTLIBS);
  fextl::string ThunkGuestPath = ThunkGuestLibs();
  while (ThunkGuestPath.ends_with('/')) {
    ThunkGuestPath.pop_back();
  }
  ThunkGuestPath = fextl::fmt::format("{}{}/libVDSO-guest.so", ThunkGuestPath, Is64Bit ? "" : "_32");
  // Load VDSO if we can
  int VDSOFD = ::open(ThunkGuestPath.c_str(), O_RDONLY);

  if (VDSOFD != -1) {
    // Get file size
    Mapping.VDSOSize = lseek(VDSOFD, 0, SEEK_END);

    if (Mapping.VDSOSize >= std::min(sizeof(Elf32_Ehdr), sizeof(Elf64_Ehdr))) {
      // Reset to beginning
      lseek(VDSOFD, 0, SEEK_SET);
      Mapping.VDSOSize = FEXCore::AlignUp(Mapping.VDSOSize, FEXCore::Utils::FEX_PAGE_SIZE);

      auto VASize = FEXCore::Allocator::DetermineVASize();
      uint64_t VDSOHint {};
      if (Is64Bit) {
        if (VASize > 47) {
          // If VA size is at least as large as minimum x86 specification, then set to max.
          VASize = 47;
        }

        // Calculate the highest point the vdso could go.
        VDSOHint = (1ULL << VASize) - Mapping.VDSOSize;
      } else {
        VDSOHint = 0x1'0000'0000ULL - Mapping.VDSOSize;
      }

      auto PageSize = sysconf(_SC_PAGESIZE);
      PageSize = PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE;

      // Scan top down and try to allocate a location
      void* VDSOPointerBase {};
      do {
        VDSOPointerBase = Handler->GuestMmap(Is64Bit, Thread, reinterpret_cast<void*>(VDSOHint), Mapping.VDSOSize, PROT_READ | PROT_EXEC,
                                             MAP_FIXED_NOREPLACE | MAP_SHARED, VDSOFD, 0);
        // Scan-downward until we fit.
        VDSOHint -= PageSize;
      } while (FEX::HLE::HasSyscallError(VDSOPointerBase) && static_cast<int64_t>(VDSOHint) > 0);

      if (FEX::HLE::HasSyscallError(VDSOPointerBase)) {
        LogMan::Msg::EFmt("Couldn't Map VDSO");
        close(VDSOFD);
        return {};
      }

      Mapping.VDSOBase = VDSOPointerBase;

      // Since we found our VDSO thunk library, find our host VDSO function implementations.
      LoadHostVDSO();
    }
    close(VDSOFD);

    if (!Mapping.VDSOBase) {
      return {};
    }

    if (Is64Bit) {
      LoadGuestVDSOSymbols<true>(reinterpret_cast<char*>(Mapping.VDSOBase));
    } else {
      LoadGuestVDSOSymbols<false>(reinterpret_cast<char*>(Mapping.VDSOBase));
    }
  }

  // If VDSO couldn't find sigreturn then FEX needs to provide unique implementations.
  LoadFEXGeneratedCode(Thread, Is64Bit, &Mapping, Handler);

  if (Is64Bit) {
    // Set the Thunk definition pointers for x86-64
    VDSODefinitions[0].ThunkFunction = FEX::VDSO::x64::Handler_time;
    VDSODefinitions[1].ThunkFunction = FEX::VDSO::x64::Handler_gettimeofday;
    VDSODefinitions[2].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime;
    VDSODefinitions[3].ThunkFunction = FEX::VDSO::x64::Handler_clock_gettime;
    VDSODefinitions[4].ThunkFunction = FEX::VDSO::x64::Handler_clock_getres;
    VDSODefinitions[5].ThunkFunction = FEX::VDSO::x64::Handler_getcpu;
    VDSODefinitions[6].ThunkFunction = FEX::VDSO::x64::Handler_getrandom;
  } else {
    // Set the Thunk definition pointers for x86
    VDSODefinitions[0].ThunkFunction = FEX::VDSO::x32::Handler_time;
    VDSODefinitions[1].ThunkFunction = FEX::VDSO::x32::Handler_gettimeofday;
    VDSODefinitions[2].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime;
    VDSODefinitions[3].ThunkFunction = FEX::VDSO::x32::Handler_clock_gettime64;
    VDSODefinitions[4].ThunkFunction = FEX::VDSO::x32::Handler_clock_getres;
    VDSODefinitions[5].ThunkFunction = FEX::VDSO::x32::Handler_getcpu;
    // getrandom doesn't exist on 32-bit, so leave VDSODefinitions[6] unfilled
  }

  return Mapping;
}

uint64_t GetVSyscallEntry(const void* VDSOBase) {
  if (!VDSOBase) {
    return 0;
  }

  // Extract the vsyscall location from the VDSO header.
  auto Header = reinterpret_cast<const Elf32_Ehdr*>(VDSOBase);

  if (Header->e_entry) {
    return reinterpret_cast<uint64_t>(VDSOBase) + Header->e_entry;
  }

  return 0;
}

const std::span<FEXCore::IR::ThunkDefinition> GetVDSOThunkDefinitions(bool Is64Bit) {
  return std::span(VDSODefinitions.begin(), VDSODefinitions.end() - (Is64Bit ? 0 : 1));
}

const VDSOEntrypoints& GetVDSOSymbols() {
  return VDSOPointers;
}
} // namespace FEX::VDSO


================================================
FILE: Source/Tools/LinuxEmulation/VDSO_Emulation.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/IR/IR.h>

#include <cstddef>
#include <cstdint>
#include <span>

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEX::HLE {
class SyscallHandler;
}

namespace FEX::VDSO {
struct VDSOMapping {
  void* VDSOBase {};
  size_t VDSOSize {};
  void* X86GeneratedCodePtr {};
  size_t X86GeneratedCodeSize {};
};

struct VDSOEntrypoints {
  void* VDSO_kernel_sigreturn;
  void* VDSO_kernel_rt_sigreturn;
  void* VDSO_FEX_CallbackRET;
};
VDSOMapping LoadVDSOThunks(FEXCore::Core::InternalThreadState* Thread, bool Is64Bit, FEX::HLE::SyscallHandler* const Handler);
void UnloadVDSOMapping(FEXCore::Core::InternalThreadState* Thread, FEX::HLE::SyscallHandler* const Handler, const VDSOMapping& Mapping);

uint64_t GetVSyscallEntry(const void* VDSOBase);

const std::span<FEXCore::IR::ThunkDefinition> GetVDSOThunkDefinitions(bool Is64Bit);
const VDSOEntrypoints& GetVDSOSymbols();
} // namespace FEX::VDSO


================================================
FILE: Source/Tools/TestHarnessRunner/CMakeLists.txt
================================================
list(APPEND LIBS FEXCore Common JemallocLibs ${PTHREAD_LIB})

set(SRCS TestHarnessRunner.cpp)
if (NOT MINGW)
  list(APPEND SRCS TestHarnessRunner/HostRunner.cpp)
  list(APPEND LIBS LinuxEmulation CommonTools)
endif()

add_executable(TestHarnessRunner ${SRCS})

if (ENABLE_VIXL_SIMULATOR)
  target_compile_definitions(TestHarnessRunner PRIVATE "-DVIXL_SIMULATOR=1")
endif()

target_include_directories(TestHarnessRunner PRIVATE ${CMAKE_BINARY_DIR}/generated)

target_link_libraries(TestHarnessRunner PRIVATE ${LIBS})


================================================
FILE: Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.cpp
================================================
// SPDX-License-Identifier: MIT
#include "ArchHelpers/UContext.h"
#include "LinuxSyscalls/SignalDelegator.h"
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/fextl/list.h>
#include <FEXCore/fextl/unordered_map.h>
#include <FEXCore/fextl/unordered_set.h>
#include <FEXCore/Utils/LogManager.h>

#ifdef ARCHITECTURE_x86_64
#include "Common/X86Features.h"
#include <asm/ldt.h>
#include <sys/syscall.h>
#endif
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include <ucontext.h>

#include <signal.h>

#ifdef ARCHITECTURE_x86_64
static inline int modify_ldt(int func, void* ldt) {
  return ::syscall(SYS_modify_ldt, func, ldt, sizeof(struct user_desc));
}

__attribute__((naked)) void Dispatcher(uintptr_t BranchTarget, void* ReturningStackLocation, int CodeSegment, int SupportsFSGSBase) {
  // BranchTarget: rdi
  // ReturningStackLocation: rsi
  // CodeSegment: rdx
  // SupportsFSGSBase: rcx
  __asm volatile(R"(
  .intel_syntax noprefix;
    // x86-64 ABI has the stack aligned when /call/ happens
    // Which means the destination has a misaligned stack at that point
    push rbx;
    push rbp;
    push r12;
    push r13;
    push r14;
    push r15;

    test ecx, ecx;
    je 1f;
    rdfsbase rbx;
    push rbx;
    rdgsbase rbx;
    push rbx;
    1:

    push rcx;

    // Save this stack pointer so we can cleanly shutdown the emulation with a long jump
    // regardless of where we were in the stack
    mov [rsi], rsp;

    // Clear all state going in to the branch target.
    // Only remaining state, rdi, rdx, rsp
    mov rax, 0;
    mov rbx, 0;
    mov rcx, 0;
    mov rbp, 0;
    mov rsi, 0;
    mov r8, 0;
    mov r9, 0;
    mov r10, 0;
    mov r11, 0;
    mov r12, 0;
    mov r13, 0;
    mov r14, 0;
    mov r15, 0;
    finit;

    cmp rdx, 0;
    jnz .32_bit;

    .64_bit:
      // Set flags to x86 reset state (0x202: IF=1, reserved bit 1=1).
      push 0x202;
      popfq;
      mov rdx, 0;
      mov rsp, 0;

      // Tail-call
      jmp rdi;

    .32_bit:
      // Far call needs to go through a gate
      // This is setup just like the following packing
      // {
      //  uint32_t RIP;
      //  uint16_t CodeSegment;
      // }
      sub rsp, 16
      mov [rsp], edi;
      mov [rsp+4], dx

      // Set flags to x86 reset state (0x202: IF=1, reserved bit 1=1).
      push 0x202;
      popfq;
      mov rdx, 0;

      GetCodeSegmentEntryLocation:
      hlt;

      jmp fword ptr [rsp];

    ThreadStopHandlerAddress:

    pop rcx
    test ecx, ecx;
    je 1f;
    pop rbx;
    wrgsbase rbx;
    pop rbx;
    wrfsbase rbx;
    1:

    pop r15;
    pop r14;
    pop r13;
    pop r12;
    pop rbp;
    pop rbx;

    ret;

  .att_syntax prefix;
  )" ::
                   : "memory", "cc");
}

extern "C" void* GetCodeSegmentEntryLocation;
uintptr_t GetCodeSegmentEntryLocationPtr = (uintptr_t)&GetCodeSegmentEntryLocation;
extern "C" void* ThreadStopHandlerAddress;
uintptr_t ThreadStopHandlerAddressPtr = (uintptr_t)&ThreadStopHandlerAddress;

class x86HostRunner final {
public:
  x86HostRunner() {
    Setup32BitCodeSegment();
  }

  bool HandleSIGSEGV(FEXCore::Core::CPUState* OutState, int Signal, void* info, void* ucontext) {
    ucontext_t* _context = (ucontext_t*)ucontext;
    mcontext_t* _mcontext = &_context->uc_mcontext;

    // Check our current instruction that we just executed to ensure it was an HLT
    uint8_t* Inst {};

    Inst = reinterpret_cast<uint8_t*>(_mcontext->gregs[REG_RIP]);
    if (!Is64BitMode()) {
      if (_mcontext->gregs[REG_RIP] == ::GetCodeSegmentEntryLocationPtr) {
        // Backup the CSGSFS register
        GlobalCodeSegmentEntry = _mcontext->gregs[REG_CSGSFS];
        // Skip past this hlt and keep running
        _mcontext->gregs[REG_RIP] += 1;
        return true;
      }
    }
    constexpr uint8_t HLT = 0xF4;
    if (Inst[0] != HLT) {
      return false;
    }

    // Store our host state in to the guest for testing against
    OutState->gregs[FEXCore::X86State::REG_RAX] = _mcontext->gregs[REG_RAX];
    OutState->gregs[FEXCore::X86State::REG_RBX] = _mcontext->gregs[REG_RBX];
    OutState->gregs[FEXCore::X86State::REG_RCX] = _mcontext->gregs[REG_RCX];
    OutState->gregs[FEXCore::X86State::REG_RDX] = _mcontext->gregs[REG_RDX];
    OutState->gregs[FEXCore::X86State::REG_RBP] = _mcontext->gregs[REG_RBP];
    OutState->gregs[FEXCore::X86State::REG_RSI] = _mcontext->gregs[REG_RSI];
    OutState->gregs[FEXCore::X86State::REG_RDI] = _mcontext->gregs[REG_RDI];
    OutState->gregs[FEXCore::X86State::REG_RSP] = _mcontext->gregs[REG_RSP];
    OutState->gregs[FEXCore::X86State::REG_R8] = _mcontext->gregs[REG_R8];
    OutState->gregs[FEXCore::X86State::REG_R9] = _mcontext->gregs[REG_R9];
    OutState->gregs[FEXCore::X86State::REG_R10] = _mcontext->gregs[REG_R10];
    OutState->gregs[FEXCore::X86State::REG_R11] = _mcontext->gregs[REG_R11];
    OutState->gregs[FEXCore::X86State::REG_R12] = _mcontext->gregs[REG_R12];
    OutState->gregs[FEXCore::X86State::REG_R13] = _mcontext->gregs[REG_R13];
    OutState->gregs[FEXCore::X86State::REG_R14] = _mcontext->gregs[REG_R14];
    OutState->gregs[FEXCore::X86State::REG_R15] = _mcontext->gregs[REG_R15];
    OutState->rip = _mcontext->gregs[REG_RIP];

    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
      memcpy(&OutState->xmm.avx.data[i], &_mcontext->fpregs->_xmm[i], sizeof(_mcontext->fpregs->_xmm[0]));
    }
    const auto* xstate = reinterpret_cast<FEXCore::x86_64::xstate*>(_mcontext->fpregs);
    const auto* reserved = &xstate->fpstate.sw_reserved;
    if (reserved->HasExtendedContext() && reserved->HasYMMH()) {
      for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; i++) {
        memcpy(&OutState->xmm.avx.data[i][2], &xstate->ymmh.ymmh_space[i], sizeof(xstate->ymmh.ymmh_space[0]));
      }
    }

    const uint16_t CurrentOffset = (_mcontext->fpregs->swd >> 11) & 7;
    for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_MMS; ++i) {
      memcpy(&OutState->mm[(i + CurrentOffset) % 8], &_mcontext->fpregs->_st[i], sizeof(_mcontext->fpregs->_st[0]));
    }

    // Our thread is stopping
    // We don't care about anything at this point
    // Set the stack to our starting location when we entered the JIT and get out safely
    _mcontext->gregs[REG_RSP] = ReturningStackLocation;

    // Set the new PC
    _mcontext->gregs[REG_RIP] = ::ThreadStopHandlerAddressPtr;

    if (!Is64BitMode()) {
      // Unset code segment so we can jump back in to 64-bit mode
      _mcontext->gregs[REG_CSGSFS] = GlobalCodeSegmentEntry;
    }

    return true;
  }

  void Dispatch(uint64_t InitialRip) {
    FEX::X86::Features Feature {};
    Dispatcher(InitialRip, &ReturningStackLocation, CodeSegmentEntry, Feature.Feat_fsgsbase);
  }

private:
  FEX_CONFIG_OPT(Is64BitMode, IS64BIT_MODE);
  uint64_t GlobalCodeSegmentEntry {};
  int CodeSegmentEntry {};
  uint64_t ReturningStackLocation;

  uint32_t MakeSelector(int Segment, bool LDT) const {
    // Selector Index, Table Indicator (1 = LDT, 0 = GDT), CPL (3 = userland)
    return (Segment << 3) | ((uint32_t)LDT << 2) | 3;
  };

  void Setup32BitCodeSegment() {
    if (Is64BitMode()) {
      return;
    }

    struct user_desc ldt {};
    ldt.entry_number = 1;
    // This is where HarnessCodeLoader loads code to
    ldt.base_addr = 0;
    ldt.limit = ~0U;   // No limit
    ldt.seg_32bit = 1; // 32-bit
    ldt.contents = MODIFY_LDT_CONTENTS_CODE;
    ldt.read_exec_only = 0;
    ldt.limit_in_pages = 1;
    ldt.seg_not_present = 0;
    ldt.useable = 1;
    ldt.lm = 0; // Not-64-bit
    int Res = modify_ldt(0x11, &ldt);
    if (Res == -1) {
      LogMan::Msg::EFmt("Couldn't load 32-bit LDT");
      return;
    }

    CodeSegmentEntry = MakeSelector(ldt.entry_number, 1);

    // Make the data segment follow directly after the code segment
    // Overlapping region makes it read/write
    ldt.entry_number = 2;
    // This is where HarnessCodeLoader loads code to
    ldt.base_addr = 0;
    ldt.limit = ~0U;   // No limit
    ldt.seg_32bit = 1; // 32-bit
    ldt.contents = MODIFY_LDT_CONTENTS_DATA;
    ldt.read_exec_only = 0;
    ldt.limit_in_pages = 1;
    ldt.seg_not_present = 0;
    ldt.useable = 1;
    ldt.lm = 0; // Not-64-bit
    Res = modify_ldt(0x11, &ldt);
    if (Res == -1) {
      LogMan::Msg::EFmt("Couldn't load 32-bit LDT");
      return;
    }

    // Stack entry overlapping data
    ldt.entry_number = 3;
    // This is where HarnessCodeLoader loads code to
    ldt.base_addr = 0;
    ldt.limit = ~0U;   // No limit
    ldt.seg_32bit = 1; // 32-bit
    ldt.contents = MODIFY_LDT_CONTENTS_STACK;
    ldt.read_exec_only = 0;
    ldt.limit_in_pages = 1;
    ldt.seg_not_present = 0;
    ldt.useable = 1;
    ldt.lm = 0; // Not-64-bit
    Res = modify_ldt(0x11, &ldt);
    if (Res == -1) {
      LogMan::Msg::EFmt("Couldn't load 32-bit LDT");
      return;
    }
  }
};

void RunAsHost(fextl::unique_ptr<FEX::HLE::SignalDelegator>& SignalDelegation, uintptr_t InitialRip, FEXCore::Core::CPUState* OutputState) {
  x86HostRunner runner;
  SignalDelegation->RegisterHostSignalHandler(
    SIGSEGV,
    [&runner, OutputState](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) -> bool {
      return runner.HandleSIGSEGV(OutputState, Signal, info, ucontext);
    },
    true);

  runner.Dispatch(InitialRip);
}
#else
void RunAsHost(fextl::unique_ptr<FEX::HLE::SignalDelegator>& SignalDelegation, uintptr_t InitialRip, FEXCore::Core::CPUState* OutputState) {
  LOGMAN_MSG_A_FMT("RunAsHost doesn't exist for this host");
}
#endif


================================================
FILE: Source/Tools/TestHarnessRunner/TestHarnessRunner/HostRunner.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/fextl/memory.h>

namespace FEXCore::CPU {
class CPUBackend;
}
namespace FEXCore::Context {
class Context;
}
namespace FEXCore::Core {
struct InternalThreadState;
struct CPUState;
} // namespace FEXCore::Core

namespace FEX::HLE {
class SignalDelegator;
}

void RunAsHost(fextl::unique_ptr<FEX::HLE::SignalDelegator>& SignalDelegation, uintptr_t InitialRip, FEXCore::Core::CPUState* OutputState);


================================================
FILE: Source/Tools/TestHarnessRunner/TestHarnessRunner.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|TestHarnessRunner
desc: Used to run Assembly tests
$end_info$
*/

#ifdef _WIN32
#include "DummyHandlers.h"
#include "ArchHelpers/WinContext.h"
#else
#include "LinuxSyscalls/LinuxAllocator.h"
#include "LinuxSyscalls/Syscalls.h"
#include "LinuxSyscalls/x32/Syscalls.h"
#include "LinuxSyscalls/x64/Syscalls.h"
#include "LinuxSyscalls/SignalDelegator.h"
#endif

#include "Common/HostFeatures.h"
#include "Common/Linux/SBRKAllocations.h"
#include "HarnessHelpers.h"
#include "TestHarnessRunner/HostRunner.h"

#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/fextl/memory.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include <FEXHeaderUtils/Filesystem.h>

#include <csetjmp>
#include <cstdint>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <sys/types.h>
#include <utility>

#ifdef ARCHITECTURE_x86_64
#include "Common/X86Features.h"
#endif

void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  fextl::fmt::print("{} {}\n", LogMan::DebugLevelStr(Level), Message);
}

void AssertHandler(const char* Message) {
  fextl::fmt::print("A {}\n", Message);

  // make sure buffers are flushed
  fflush(nullptr);
}

namespace {
static const fextl::vector<std::pair<const char*, FEXCore::Config::ConfigOption>> EnvConfigLookup = {{
#define OPT_BASE(type, group, enum, json, default) {"FEX_" #enum, FEXCore::Config::ConfigOption::CONFIG_##enum},
#include <FEXCore/Config/ConfigValues.inl>
}};

// Claims to be a local application config layer
class TestEnvLoader final : public FEXCore::Config::Layer {
public:
  explicit TestEnvLoader(fextl::vector<std::pair<std::string_view, std::string_view>> _Env)
    : FEXCore::Config::Layer(FEXCore::Config::LayerType::LAYER_LOCAL_APP)
    , Env {std::move(_Env)} {
    Load();
  }

  void Load() override {
    fextl::unordered_map<std::string_view, std::string> EnvMap;
    for (auto& Option : Env) {
      std::string_view Key = Option.first;
      std::string_view Value_View = Option.second;
      std::optional<fextl::string> Value;

#define ENVLOADER
#include <FEXCore/Config/ConfigOptions.inl>

      if (Value) {
        EnvMap.insert_or_assign(Key, *Value);
      } else {
        EnvMap.insert_or_assign(Key, Value_View);
      }
    }

    auto GetVar = [&](const std::string_view id) -> std::optional<std::string_view> {
      const auto it = EnvMap.find(id);
      if (it == EnvMap.end()) {
        return std::nullopt;
      }

      return it->second;
    };

    for (auto& it : EnvConfigLookup) {
      if (auto Value = GetVar(it.first); Value) {
        Set(it.second, *Value);
      }
    }
  }

private:
  fextl::vector<std::pair<std::string_view, std::string_view>> Env;
};
} // namespace

namespace LongJumpHandler {
static jmp_buf LongJump {};
static bool DidFault {};

#ifndef _WIN32
void RegisterLongJumpHandler(FEX::HLE::SignalDelegator* Handler) {
  Handler->RegisterFrontendHostSignalHandler(
    SIGSEGV,
    [](FEXCore::Core::InternalThreadState* Thread, int Signal, void* info, void* ucontext) {
      constexpr uint8_t HLT = 0xF4;
      if (reinterpret_cast<uint8_t*>(Thread->CurrentFrame->State.rip)[0] != HLT) {
        DidFault = true;
        return false;
      }

      longjmp(LongJumpHandler::LongJump, 1);
      return false;
    },
    true);
}
#else
FEX::DummyHandlers::DummySignalDelegator* Handler;

static void LongJumpHandler() {
  longjmp(LongJump, 1);
}

LONG WINAPI VectoredExceptionHandler(struct _EXCEPTION_POINTERS* ExceptionInfo) {
  auto Thread = Handler->GetBackingTLSThread();
  PCONTEXT Context;
  Context = ExceptionInfo->ContextRecord;

  switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
  case STATUS_DATATYPE_MISALIGNMENT: {
    const auto PC = FEX::ArchHelpers::Context::GetPc(Context);
    if (!Thread->CTX->IsAddressInCodeBuffer(Thread, PC)) {
      // Wasn't a sigbus in JIT code
      return EXCEPTION_CONTINUE_SEARCH;
    }

    const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(true, PC, FEX::ArchHelpers::Context::GetArmGPRs(Context));
    FEX::ArchHelpers::Context::SetPc(Context, PC + Result.value_or(0));
    return Result ? EXCEPTION_CONTINUE_EXECUTION : EXCEPTION_CONTINUE_SEARCH;
  }
  case STATUS_ACCESS_VIOLATION: {
    constexpr uint8_t HLT = 0xF4;
    if (reinterpret_cast<uint8_t*>(Thread->CurrentFrame->State.rip)[0] != HLT) {
      DidFault = true;
      return EXCEPTION_CONTINUE_SEARCH;
    }

    FEX::ArchHelpers::Context::SetPc(Context, reinterpret_cast<uint64_t>(LongJumpHandler));
    return EXCEPTION_CONTINUE_EXECUTION;
  }
  default: break;
  }

  printf("!Fault!\n");
  printf("\tExceptionCode: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionCode);
  printf("\tExceptionFlags: 0x%lx\n", ExceptionInfo->ExceptionRecord->ExceptionFlags);
  printf("\tExceptionRecord: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionRecord);
  printf("\tExceptionAddress: 0x%p\n", ExceptionInfo->ExceptionRecord->ExceptionAddress);
  printf("\tNumberParameters: 0x%lx\n", ExceptionInfo->ExceptionRecord->NumberParameters);

  return EXCEPTION_CONTINUE_SEARCH;
}

void RegisterLongJumpHandler(FEX::DummyHandlers::DummySignalDelegator* Handler) {
  // Install VEH handler.
  AddVectoredExceptionHandler(0, VectoredExceptionHandler);

  LongJumpHandler::Handler = Handler;
}
#endif
} // namespace LongJumpHandler

int main(int argc, char** argv, char** const envp) {
#ifndef _WIN32
  auto SBRKPointer = FEX::SBRKAllocations::DisableSBRKAllocations();
#endif
  FEXCore::Allocator::GLIBCScopedFault GLIBFaultScope;
  LogMan::Throw::InstallHandler(AssertHandler);
  LogMan::Msg::InstallHandler(MsgHandler);

  FEX::Config::InitializeConfigs(FEX::Config::PortableInformation {});
  FEXCore::Config::Initialize();
  FEXCore::Config::AddLayer(FEX::Config::CreateEnvironmentLayer(envp));
  FEXCore::Config::Load();

  if (argc < 3) {
    LogMan::Msg::EFmt("Not enough arguments");
    return -1;
  }

  auto Filename = argv[1];
  auto ConfigFile = argv[2];

  if (!FHU::Filesystem::Exists(Filename)) {
    LogMan::Msg::EFmt("File {} does not exist", Filename);
    return -1;
  }

  if (!FHU::Filesystem::Exists(ConfigFile)) {
    LogMan::Msg::EFmt("File {} does not exist", ConfigFile);
    return -1;
  }

  FEX::HarnessHelper::HarnessCodeLoader Loader {Filename, ConfigFile};

  // Adds in environment options from the test harness config
  FEXCore::Config::AddLayer(fextl::make_unique<TestEnvLoader>(Loader.GetEnvironmentOptions()));
  FEXCore::Config::ReloadMetaLayer();

  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, Loader.Is64BitMode() ? "1" : "0");
#ifdef VIXL_SIMULATOR
  // If running under the vixl simulator, ensure that indirect runtime calls are enabled.
  FEXCore::Config::Set(FEXCore::Config::CONFIG_DISABLE_VIXL_INDIRECT_RUNTIME_CALLS, "0");
#endif

#ifndef _WIN32
  fextl::unique_ptr<FEX::HLE::MemAllocator> Allocator;

  if (!Loader.Is64BitMode()) {
    // Setup our userspace allocator
    const auto PageSize = sysconf(_SC_PAGESIZE);
    FEXCore::Allocator::SetupHooks(PageSize > 0 ? PageSize : FEXCore::Utils::FEX_PAGE_SIZE);
    Allocator = FEX::HLE::CreatePassthroughAllocator();
  }
#endif

  bool SupportsAVX = false;
  FEXCore::Core::CPUState State;

  auto HostFeatures = FEX::FetchHostFeatures();
  auto CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);

#ifndef _WIN32
  auto SignalDelegation = FEX::HLE::CreateSignalDelegator(CTX.get(), {}, HostFeatures.SupportsAVX);
#else
  // Enable exit on HLT while Wine's longjump is broken.
  //
  // Once they fix longjump, we can remove this.
  CTX->EnableExitOnHLT();
  auto SignalDelegation = FEX::WindowsHandlers::CreateSignalDelegator();
#endif

  // Skip any tests that the host doesn't support features for
  SupportsAVX = HostFeatures.SupportsAVX;

  bool TestUnsupported = (!SupportsAVX && Loader.RequiresAVX()) || (!HostFeatures.SupportsRAND && Loader.RequiresRAND()) ||
                         (!HostFeatures.SupportsSHA && Loader.RequiresSHA()) || (!HostFeatures.SupportsCLZERO && Loader.RequiresCLZERO()) ||
                         (!HostFeatures.SupportsAES256 && Loader.RequiresAES256()) || (!HostFeatures.SupportsAFP && Loader.RequiresAFP());


  bool IsHostRunner = false;
#if !defined(VIXL_SIMULATOR) && defined(ARCHITECTURE_x86_64)
  IsHostRunner = true;
  ///< Features that are only unsupported when running using the HostRunner and the CI machine doesn't support the feature getting tested.
  FEX::X86::Features Feature {};
  const bool Supports3DNow = Feature.Feat_3dnow;
  const bool SupportsSSE4A = Feature.Feat_sse4a;
  const bool SupportsBMI1 = Feature.Feat_bmi1;
  const bool SupportsBMI2 = Feature.Feat_bmi2;
  const bool SupportsCLWB = Feature.Feat_clwb;
  const bool SupportsSSSE3 = Feature.Feat_ssse3;
  const bool SupportsSSE4_1 = Feature.Feat_sse4_1;
  const bool SupportsSSE4_2 = Feature.Feat_sse4_2;
  const bool SupportsAES = Feature.Feat_aes;
  const bool SupportsPCLMUL = Feature.Feat_pclmulqdq;
  const bool SupportsMOVBE = Feature.Feat_movbe;
  const bool SupportsADX = Feature.Feat_adx;
  const bool SupportsXSAVE = Feature.Feat_xsave;
  const bool SupportsRDPID = Feature.Feat_rdpid;
  const bool SupportsCLFLOPT = Feature.Feat_clflopt;
  const bool SupportsFSGSBase = Feature.Feat_fsgsbase;

  TestUnsupported |=
    (!Supports3DNow && Loader.Requires3DNow()) || (!SupportsSSE4A && Loader.RequiresSSE4A()) || (!SupportsBMI1 && Loader.RequiresBMI1()) ||
    (!SupportsBMI2 && Loader.RequiresBMI2()) || (!SupportsCLWB && Loader.RequiresCLWB()) || (!SupportsSSSE3 && Loader.RequiresSSSE3()) ||
    (!SupportsSSE4_1 && Loader.RequiresSSE4_1()) || (!SupportsSSE4_2 && Loader.RequiresSSE4_2()) ||
    (!SupportsAES && Loader.RequiresAES()) || (!SupportsPCLMUL && Loader.RequiresPCLMUL()) || (!SupportsMOVBE && Loader.RequiresMOVBE()) ||
    (!SupportsADX && Loader.RequiresADX()) || (!SupportsXSAVE && Loader.RequiresXSAVE()) || (!SupportsRDPID && Loader.RequiresRDPID()) ||
    (!SupportsCLFLOPT && Loader.RequiresCLFLOPT()) || (!SupportsFSGSBase && Loader.RequiresFSGSBase()) || Loader.RequiresEMMI();
#endif

#ifdef _WIN32
  TestUnsupported |= Loader.RequiresLinux();
#endif

  if (TestUnsupported) {
    return 0;
  }

#ifndef _WIN32
  auto SyscallHandler = Loader.Is64BitMode() ? FEX::HLE::x64::CreateHandler(CTX.get(), SignalDelegation.get(), nullptr) :
                                               FEX::HLE::x32::CreateHandler(CTX.get(), SignalDelegation.get(), nullptr, std::move(Allocator));

  auto DoMmap = [&](uint64_t Address, size_t Size) -> void* {
    void* Result = SyscallHandler->GuestMmap(nullptr, (void*)Address, Size, PROT_READ | PROT_WRITE | PROT_EXEC,
                                             MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
    LOGMAN_THROW_A_FMT(Result == reinterpret_cast<void*>(Address), "Map Memory mmap failed");
    return Result;
  };

#else
  auto SyscallHandler = FEX::WindowsHandlers::CreateSyscallHandler();

  auto DoMMap = [](uint64_t Address, size_t Size) -> void* {
    void* Result = FEXCore::Allocator::VirtualAlloc(reinterpret_cast<void*>(Address), Size, true);
    LOGMAN_THROW_A_FMT(Result == reinterpret_cast<void*>(Address), "Map Memory mmap failed");
    return Result;
  };
#endif

  CTX->SetSignalDelegator(SignalDelegation.get());
  CTX->SetSyscallHandler(SyscallHandler.get());

  if (!CTX->InitCore()) {
    return 1;
  }

  if (!IsHostRunner) {
    LongJumpHandler::RegisterLongJumpHandler(SignalDelegation.get());

    // Run through FEX
    if (!Loader.MapMemory(DoMmap)) {
      // failed to map
      LogMan::Msg::EFmt("Failed to map {}-bit elf file.", Loader.Is64BitMode() ? 64 : 32);
      return -ENOEXEC;
    }

    auto ParentThread = SyscallHandler->TM.CreateThread(Loader.DefaultRIP(), 0);
    SyscallHandler->TM.TrackThread(ParentThread);
    SignalDelegation->RegisterTLSState(ParentThread);

    if (!ParentThread) {
      return 1;
    }

    int LongJumpVal = setjmp(LongJumpHandler::LongJump);
    if (!LongJumpVal) {
      CTX->ExecuteThread(ParentThread->Thread);
    }

    // Just re-use compare state. It also checks against the expected values in config.
    memcpy(&State, &ParentThread->Thread->CurrentFrame->State, sizeof(State));

    __uint128_t XMM_Low[FEXCore::Core::CPUState::NUM_XMMS];
    if (SupportsAVX) {
      ///< Reconstruct the XMM registers even if they are in split view, then remerge them.
      __uint128_t YMM_High[FEXCore::Core::CPUState::NUM_XMMS];
      CTX->ReconstructXMMRegisters(ParentThread->Thread, XMM_Low, YMM_High);
      for (size_t i = 0; i < FEXCore::Core::CPUState::NUM_XMMS; ++i) {
        memcpy(&State.xmm.avx.data[i][0], &XMM_Low[i], sizeof(__uint128_t));
        memcpy(&State.xmm.avx.data[i][2], &YMM_High[i], sizeof(__uint128_t));
      }
    } else {
      CTX->ReconstructXMMRegisters(ParentThread->Thread, reinterpret_cast<__uint128_t*>(State.xmm.sse.data), nullptr);
    }

    SignalDelegation->UninstallTLSState(ParentThread);
    FEX::HLE::_SyscallHandler->TM.DestroyThread(ParentThread, true);
  }
#ifndef _WIN32
  else {
    // Run as host
    SupportsAVX = true;
    auto ParentThread = SyscallHandler->TM.CreateThread(Loader.DefaultRIP(), 0);
    SyscallHandler->TM.TrackThread(ParentThread);
    SignalDelegation->RegisterTLSState(ParentThread);

    auto DoMmap = [&](uint64_t Address, size_t Size) -> void* {
      void* Result = mmap((void*)Address, Size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
      LOGMAN_THROW_A_FMT(Result == reinterpret_cast<void*>(Address), "Map Memory mmap failed");
      return Result;
    };
    if (!Loader.MapMemory(DoMmap)) {
      // failed to map
      LogMan::Msg::EFmt("Failed to map {}-bit elf file.", Loader.Is64BitMode() ? 64 : 32);
      return -ENOEXEC;
    }

    RunAsHost(SignalDelegation, Loader.DefaultRIP(), &State);
    SignalDelegation->UninstallTLSState(ParentThread);
    FEX::HLE::_SyscallHandler->TM.DestroyThread(ParentThread, true);
  }
#endif

  SyscallHandler.reset();

  bool Passed = !LongJumpHandler::DidFault && Loader.CompareStates(&State, nullptr, SupportsAVX);

  LogMan::Msg::IFmt("Faulted? {}", LongJumpHandler::DidFault ? "Yes" : "No");
  LogMan::Msg::IFmt("Passed? {}", Passed ? "Yes" : "No");


  SignalDelegation.reset();

  FEXCore::Config::Shutdown();

  LogMan::Throw::UnInstallHandler();
  LogMan::Msg::UnInstallHandler();

#ifndef _WIN32
  FEXCore::Allocator::ClearHooks();

  FEX::SBRKAllocations::ReenableSBRKAllocations(SBRKPointer);
#endif

  return Passed ? 0 : -1;
}


================================================
FILE: Source/Tools/pidof/CMakeLists.txt
================================================
add_executable(FEXpidof pidof.cpp)

target_link_libraries(FEXpidof PRIVATE
  cpp-optparse
  JemallocDummy
  fmt::fmt
  range-v3::range-v3)

LinkerGC(FEXpidof)

install(TARGETS FEXpidof RUNTIME
  DESTINATION bin
  COMPONENT Runtime)


================================================
FILE: Source/Tools/pidof/pidof.cpp
================================================
// SPDX-License-Identifier: MIT
#include "OptionParser.h"

#include <FEXHeaderUtils/Filesystem.h>

#include <charconv>
#include <cstring>
#include <filesystem>
#include <fmt/format.h>
#include <fstream>
#include <string>
#include <unordered_set>

#include <range/v3/view/split.hpp>
#include <range/v3/view/transform.hpp>

namespace Config {

bool SingleShot {};
bool SkipZombie {true};
bool DoNotDisplay {};
bool AllFEX {};
std::string Separator {" "};
std::unordered_set<int64_t> OmitPids;
std::unordered_set<std::string> Programs;

void LoadOptions(int argc, char** argv) {
  optparse::OptionParser Parser {};

  Parser.add_option("-s").help("Single shot - Only returns one pid").action("store_true").set_default(SingleShot);

  Parser.add_option("-q")
    .help("Do not display matched PIDs to stdout. Simply exit with status of true or false if a PID was found")
    .action("store_true")
    .set_default(DoNotDisplay);

  Parser.add_option("-z").help("Try to detect zombie processes - Usually zombie processes are skipped").action("store_false").set_default(SkipZombie);

  Parser.add_option("-d").help("Use a different separator if more than one pid is show - Default is space").set_default(Separator);

  Parser.add_option("-o").help("Ignore processes with matched pids").action("append");

  optparse::Values Options = Parser.parse_args(argc, argv);

  SingleShot = Options.get("s");
  DoNotDisplay = Options.get("q");
  SkipZombie = Options.get("z");
  Separator = Options["d"];

  auto to_string_view = [](auto rng) {
    return std::string_view(&*rng.begin(), ranges::distance(rng));
  };

  for (const auto& Omit : Options.all("o")) {
    for (auto pid_str : ranges::views::split(Omit, ',') | ranges::views::transform(to_string_view)) {
      int64_t pid;
      auto ConvResult = std::from_chars(pid_str.data(), pid_str.data() + pid_str.size(), pid, 10);

      // Invalid pid, skip.
      if (ConvResult.ec == std::errc::invalid_argument) {
        continue;
      }

      OmitPids.emplace(pid);
    }
  }

  for (const auto& Program : Parser.args()) {
    if (Program == "FEX") {
      AllFEX = true;
    }
    Programs.emplace(Program);
  }
}
} // namespace Config

bool FindWineFEXApplication(int64_t PID, std::string_view exe, const std::vector<std::string_view>& Args) {
  // Walk the arguments and see if anything contains wine.
  bool FoundWine = false;

  if (exe.find("wine") != exe.npos) {
    FoundWine = true;
  }

  if (!FoundWine) {
    for (auto Arg : Args) {
      if (Arg.find("wine") != Arg.npos) {
        FoundWine = true;
        break;
      }
    }
  }

  if (!FoundWine) {
    return false;
  }

  // Wine was found, scan the mapped files to see if anything mapped "libarm64ecfex.dll" or "libwow64fex.dll"

  std::error_code ec {};
  auto dir_iter = std::filesystem::directory_iterator(fmt::format("/proc/{}/map_files", PID), ec);
  // If error reading symlink then skip.
  if (ec) {
    return false;
  }

  for (const auto& Entry : dir_iter) {
    // If not a symlink then skip.
    if (!Entry.is_symlink()) {
      continue;
    }

    const auto symlink_path = std::filesystem::read_symlink(Entry.path(), ec);
    // If error reading symlink then skip.
    if (ec) {
      continue;
    }

    const auto filename = symlink_path.filename().string();
    if (filename.find("arm64ecfex.dll") != filename.npos || filename.find("wow64fex.dll") != filename.npos) {
      return true;
    }
  }

  return false;
}

struct PIDInfo {
  int64_t pid;
  std::string cmdline;
  std::string exe_link;
  char State;
};

std::vector<PIDInfo> PIDs;

static void IteratePids() {
  // Iterate over all pids, storing the data for investigating afterwards.
  for (const auto& Entry : std::filesystem::directory_iterator("/proc/")) {
    // If not a directory then skip.
    if (!Entry.is_directory()) {
      continue;
    }

    auto CMDLinePath = Entry.path() / "cmdline";
    auto StatusPath = Entry.path() / "status";
    auto ExePath = Entry.path() / "exe";

    // If cmdline doesn't exist then skip.
    std::error_code ec;
    if (!std::filesystem::exists(CMDLinePath, ec) || ec) {
      continue;
    }

    auto Filename = Entry.path().filename().string();
    int64_t pid;
    auto ConvResult = std::from_chars(Filename.data(), Filename.data() + Filename.size(), pid, 10);

    // If the filename couldn't be converted to a PID then skip.
    // Happens with folders like `self` and a few other folders in this directory.
    if (ConvResult.ec == std::errc::invalid_argument) {
      continue;
    }

    std::ostringstream CMDLineData;
    {
      std::ifstream fs(CMDLinePath, std::ios_base::in | std::ios_base::binary);

      if (!fs.is_open()) {
        continue;
      }

      CMDLineData << fs.rdbuf();

      // If cmdline was empty then skip.
      if (CMDLineData.str().empty()) {
        continue;
      }
    }

    std::string exe_link = std::filesystem::read_symlink(ExePath, ec);

    auto deleted_pos = exe_link.find(" (deleted)");
    if (deleted_pos != std::string::npos) {
      exe_link = exe_link.substr(0, deleted_pos);
    }

    // Couldn't read exe path? skip.
    if (ec) {
      continue;
    }

    // Read state
    char State {};

    {
      std::ifstream fs(StatusPath, std::ios_base::in | std::ios_base::binary);

      if (!fs.is_open()) {
        continue;
      }

      std::string Line;

      while (std::getline(fs, Line)) {
        if (fs.eof()) {
          break;
        }

        if (Line.find("State") == Line.npos) {
          continue;
        }

        if (sscanf(Line.c_str(), "State: %c", &State) == 1) {
          break;
        }
      }
    }

    PIDs.emplace_back(PIDInfo {
      .pid = pid,
      .cmdline = CMDLineData.str(),
      .exe_link = std::move(exe_link),
      .State = State,
    });
  }
}

int main(int argc, char** argv) {
  Config::LoadOptions(argc, argv);

  IteratePids();

  std::unordered_set<int64_t> MatchedPIDs;
  for (const auto& pid : PIDs) {
    if (pid.State == 'Z' && Config::SkipZombie) {
      continue;
    }
    if (Config::OmitPids.contains(pid.pid)) {
      continue;
    }

    std::vector<std::string_view> Args;
    const char* arg = pid.cmdline.data();

    while (arg[0]) {
      Args.emplace_back(arg);
      arg += strlen(arg) + 1;
    }

    struct ProgramPair {
      std::string_view ProgramPath;
      std::string_view ProgramFilename;
    };

    auto FindEmulatedWineArgument = [](int32_t BeginningArg, const std::vector<std::string_view>& Args, bool Wine) -> ProgramPair {
      std::string_view ProgramName = Args[BeginningArg];

      for (size_t i = BeginningArg; i < Args.size(); ++i) {
        auto CurrentProgramName = FHU::Filesystem::GetFilename(Args[i]);

        if (CurrentProgramName == "wine-preloader" || CurrentProgramName == "wine64-preloader") {
          // Wine preloader is required to be in the format of `wine-preloader <wine executable>`
          // The preloader doesn't execve the executable, instead maps it directly itself
          // Skip the next argument since we know it is wine (potentially with custom wine executable name)
          ++i;
          Wine = true;
        } else if (CurrentProgramName == "wine" || CurrentProgramName == "wine64") {
          // Next argument, this isn't the program we want
          //
          // If we are running wine or wine64 then we should check the next argument for the application name instead.
          // wine will change the active program name with `setprogname` or `prctl(PR_SET_NAME`.
          // Since FEX needs this data far earlier than libraries we need a different check.
          Wine = true;
        } else {
          if (Wine == true) {
            // If this was path separated with '\' then we need to check that.
            auto WinSeparator = CurrentProgramName.find_last_of('\\');
            if (WinSeparator != CurrentProgramName.npos) {
              // Used windows separators
              CurrentProgramName = CurrentProgramName.substr(WinSeparator + 1);
            }

            return {
              .ProgramPath = Args[i],
              .ProgramFilename = CurrentProgramName,
            };
          }
          break;
        }
      }

      auto ProgramFilename = ProgramName;
      auto Separator = ProgramName.find_last_of('/');
      if (Separator != ProgramName.npos) {
        // Used windows separators
        ProgramFilename = ProgramFilename.substr(Separator + 1);
      }

      return {
        .ProgramPath = ProgramName,
        .ProgramFilename = ProgramFilename,
      };
    };

    int32_t ProgramArg = -1;
    if (pid.exe_link.ends_with("FEX")) {
      // Skip the first argument if it contains `FEX`, otherwise the application name begins at 0.
      ProgramArg = Args[0].ends_with("FEX") ? 1 : 0;
    }

    // If matching all "FEX" instances then add to the matched list.
    if (ProgramArg != -1 && Config::AllFEX) {
      MatchedPIDs.emplace(pid.pid);
      continue;
    }

    bool IsWine = false;
    // If we still haven't found a FEX path then this might be an arm64ec FEX application.
    // The only way to know for sure is the walk the mapped files of the process and check if FEX is mapped.
    if (FindWineFEXApplication(pid.pid, pid.exe_link, Args)) {
      // Search from the start.
      ProgramArg = 0;
      IsWine = true;
    }

    if (ProgramArg == -1 || ProgramArg >= Args.size()) {
      continue;
    }

    // If matching all "FEX" instances then add arm64ec/wow64 FEX to the matched list.
    if (ProgramArg != -1 && Config::AllFEX) {
      MatchedPIDs.emplace(pid.pid);
      continue;
    }

    ProgramPair Arg = FindEmulatedWineArgument(ProgramArg, Args, IsWine);
    bool Matched = false;
    for (const auto& CompareProgram : Config::Programs) {
      auto CompareProgramFilename = std::filesystem::path(CompareProgram).filename();
      if (CompareProgram == Arg.ProgramFilename || CompareProgram == Arg.ProgramPath || CompareProgramFilename == Arg.ProgramFilename) {
        MatchedPIDs.emplace(pid.pid);
        Matched = true;
        break;
      }
    }

    if (Matched && Config::SingleShot) {
      break;
    }
  }

  if (!MatchedPIDs.empty() && !Config::DoNotDisplay) {
    bool first = true;
    for (const auto& MatchedPID : MatchedPIDs) {
      if (first) {
        fmt::print("{}", MatchedPID);
        first = false;
      } else {
        fmt::print("{}{}", Config::Separator, MatchedPID);
      }
    }
    fmt::print("\n");
  }

  return MatchedPIDs.empty() ? 1 : 0;
}


================================================
FILE: Source/Windows/ARM64EC/BTInterface.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <windef.h>
#include <ntstatus.h>
#include <winternl.h>

extern "C" {
NTSTATUS STDMETHODCALLTYPE ProcessInit();
void STDMETHODCALLTYPE ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status);
NTSTATUS STDMETHODCALLTYPE ThreadInit();
NTSTATUS STDMETHODCALLTYPE ThreadTerm(HANDLE Thread, LONG ExitCode);
NTSTATUS STDMETHODCALLTYPE ResetToConsistentState(EXCEPTION_RECORD* Exception, CONTEXT* GuestContext, ARM64_NT_CONTEXT* NativeContext);
void STDMETHODCALLTYPE NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot, BOOL After, NTSTATUS Status);
void STDMETHODCALLTYPE NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType, BOOL After, NTSTATUS Status);
void STDMETHODCALLTYPE NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt, BOOL After, NTSTATUS Status);
NTSTATUS STDMETHODCALLTYPE NotifyMapViewOfSection(void* Unk1, void* Address, void* Unk2, SIZE_T Size, ULONG AllocType, ULONG Prot);
void STDMETHODCALLTYPE NotifyUnmapViewOfSection(void* Address, BOOL After, NTSTATUS Status);
void STDMETHODCALLTYPE FlushInstructionCacheHeavy(const void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpu64NotifyMemoryDirty(void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpu64NotifyReadFile(HANDLE Handle, void* Address, SIZE_T Size, BOOL After, NTSTATUS Status);
BOOLEAN STDMETHODCALLTYPE BTCpu64IsProcessorFeaturePresent(UINT Feature);
void STDMETHODCALLTYPE UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info);
}


================================================
FILE: Source/Windows/ARM64EC/CMakeLists.txt
================================================
add_library(arm64ecfex SHARED
  Module.cpp
  Module.S
  libarm64ecfex.def
  $<TARGET_OBJECTS:FEXCore_object>)

patch_library_wine(arm64ecfex)

target_include_directories(arm64ecfex PRIVATE
  "${CMAKE_SOURCE_DIR}/Source/Windows/include/"
  "${CMAKE_SOURCE_DIR}/Source/"
  "${CMAKE_SOURCE_DIR}/Source/Windows/")

target_link_libraries(arm64ecfex PRIVATE
  FEXCore_Base
  Common
  CommonTools
  CommonWindows
  CommonWindowsRuntime
  ntdll_ex)

target_link_options(arm64ecfex PRIVATE -static -nostdlib -nostartfiles -nodefaultlibs -lc++ -lc++abi -lunwind)
target_link_libraries(arm64ecfex PRIVATE ${LIBGCC_PATH})
install(TARGETS arm64ecfex RUNTIME
  DESTINATION ${CMAKE_INSTALL_LIBDIR}
  COMPONENT Runtime)


================================================
FILE: Source/Windows/ARM64EC/Module.S
================================================
.text
.balign 16

  // __os_arm64x_x64_jump in ARM64EC docs
  // Expects target code address in x9
.globl DispatchJump
DispatchJump:
  str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr.
  b check_target_ec

  // __os_arm64x_dispatch_ret in ARM64EC docs
  // Expects target code address in lr
.globl RetToEntryThunk
RetToEntryThunk:
  mov x9, lr

check_target_ec:
  // Check if target is in fact x86 code
  ldr x16, [x18, #0x60] // TEB->PEB
  ldr x16, [x16, #0x368] // PEB->EcCodeBitMap
  lsr x17, x9, #15
  and x17, x17, #0x1fffffffffff8
  ldr x16, [x16, x17]
  lsr x17, x9, #12
  lsr x16, x16, x17
  tbnz x16, #0, ExitFunctionEC
  b enter_jit

  // __os_arm64x_dispatch_call_no_redirect in ARM64EC docs
  // Expects target code address in x9, and to be called using a 'blr x16' instruction.
.globl ExitToX64
ExitToX64:
  str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr.

enter_jit:
  ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
  mov w16, #1
  strb w16, [x17, #0x0] // ChpeV2CpuAreaInfo->InSimulation
  ldr x16, [x17, #0x40] // ChpeV2CpuAreaInfo->EmulatorData[2] - DispatcherLoopTopEnterEC
  br x16 // DispatcherLoopTopEnterEC(RIP:x9, CPUArea:x17)

  // Invoked by KiUserEmulationDispatcher after e.g. an NtContinue to x86 code
.global BeginSimulation
BeginSimulation:
  ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
  ldr x16, [x17, #0x8] // ChpeV2CpuAreaInfo->EmulatorStackBase
  mov sp, x16
  ldr x0, [x17, #0x18] // ChpeV2CpuAreaInfo->ContextAmd64
  bl "#SyncThreadContext"
  ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
  ldr x16, [x17, #0x48] // ChpeV2CpuAreaInfo->EmulatorData[3] - DispatcherLoopTopEnterECFillSRA
  mov x11, #0 // Zero ENTRY_FILL_SRA_SINGLE_INST_REG to avoid single step
  br x16 // DispatcherLoopTopEnterECFillSRA(SingleInst:x10, CPUArea:x17)

  // Called into by FEXCore
  // Expects the target code address in x9
.global ExitFunctionEC
ExitFunctionEC:
  // Clear any the AFP NEP and AH bits in FPCR as native code won't expect their behaviour.
  mrs x17, fpcr
  and x17, x17, #~6 // NEP + AH
  msr fpcr, x17
  ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
  strb wzr, [x17, #0x0] // ChpeV2CpuAreaInfo->InSimulation
  ldr x17, [x17, #0x20] // ChpeV2CpuAreaInfo->SuspendDoorbell
  ldr w17, [x17]
  cbz w17, no_suspend
.global ExitFunctionSuspendPoint
ExitFunctionSuspendPoint:
  brk #0xCAFE
  // Will resume here
no_suspend:
  // Either return to an exit thunk (return to ARM64EC function) or call an entry thunk (call to ARM64EC function).
  // It is assumed that a 'blr x16' instruction is only ever used to call into x86 code from an exit thunk, and that all
  // exported ARM64EC functions have a 4-byte offset to their entry thunk immediately before their first instruction.
  mov x17, x9
  mov w16, #0x200
  movk w16, #0xd63f, lsl 16 // blr x16
  ldursw x23, [x17, #-0x4] // Load either the entry thunk offset or the calling instruction.
  cmp w23, w16
  beq ret_sp_aligned

  and x23, x23, #-0x4
  add x17, x17, x23 // Resolve entry thunk address.

  mov x4, sp
  tbz x4, #3, ret_sp_misaligned
  ldr lr, [x4], #0x8 // Pop the return address into lr.
  mov sp, x4

ret_sp_aligned:
  br x17

ret_sp_misaligned:
  // In the case of the x64 caller leaving sp only 8-byte aligned, leave the return address on the stack to keep 16-byte
  // alignment and have the callee return to an x86 ret instruction. FEX can then return to the actual caller keeping
  // the misaligned RSP.
  adrp lr, X64ReturnInstr
  ldr lr, [lr, #:lo12:X64ReturnInstr]
  br x17

  // Makes a wrapper for calling a system call directly, skipping the usual ntdll thunks
#define HASH #
#define DIRECT_SYSCALL_WRAPPER(Name, WineIdName, WindowsId) \
  .global Name; \
  Name:; \
    adrp x16, WineSyscallDispatcher; \
    ldr x16, [x16, HASH:lo12:WineSyscallDispatcher]; \
    cbz x16, 1f; \
    mov x9, x30; \
    adrp x8, WineIdName; \
    ldr x8, [x8, HASH:lo12:WineIdName]; \
    blr x16; \
    ret; \
  1:; \
    svc HASH WindowsId; \
    ret

  // Allows for continuing from a full native context, as the NTDLL NtContinue export takes in an x64 context with EC and
  // the conversion to that loses the ARM64EC ABI-disallowed registers that FEX uses.
DIRECT_SYSCALL_WRAPPER("#NtContinueNative", WineNtContinueSyscallId, 0x43)

  // Both of these are wrapped as FEX needs them to setup its call checker at startup time and their NTDLL thunks could
  // already be patched by then (and because the call checker isn't installed, their patched x86 versions would be invoked
  // when called by FEX).
DIRECT_SYSCALL_WRAPPER("#NtAllocateVirtualMemoryNative", WineNtAllocateVirtualMemorySyscallId, 0x18)
DIRECT_SYSCALL_WRAPPER("#NtProtectVirtualMemoryNative", WineNtProtectVirtualMemorySyscallId, 0x50)

  // A replacement for the standard ARM64EC call checker that ignores any FFS patches and always redirects to a function's
  // native implementation. As the only library FEX calls into is NTDLL, this is done using a LUT generated at init time.
  // Expects the FFS address in x11, exit thunk address in x10 (unused) and it's own address in x9. Return address is in x11.
.global "CheckCall"
"CheckCall":
  adrp x9, NtDllBase
  ldr x9, [x9, #:lo12:NtDllBase]
  subs x16, x11, x9
  b.lo end
  adrp x17, NtDllRedirectionLUTSize
  ldr x17, [x17, #:lo12:NtDllRedirectionLUTSize]
  cmp x16, x17
  b.hi end
  adrp x17, NtDllRedirectionLUT
  ldr x17, [x17, #:lo12:NtDllRedirectionLUT]
  ldr w11, [x17, x16, lsl #2]
  add x11, x11, x9
end:
  ret

  // Expects target address in x0, and the SP to set in x1
.global "#JumpSetStack"
"#JumpSetStack":
  mov sp, x1
  br x0


================================================
FILE: Source/Windows/ARM64EC/Module.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|ARM64EC
desc: Implements the ARM64EC BT module API using FEXCore
$end_info$
*/

#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/SHMStats.h>
#include <FEXCore/Utils/EnumOperators.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/FPState.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/SignalScopeGuards.h>

#include "Common/CallRetStack.h"
#include "Common/JITGuardPage.h"
#include "Common/Config.h"
#include "Common/Exception.h"
#include "Common/ImageTracker.h"
#include "Common/InvalidationTracker.h"
#include "Common/OvercommitTracker.h"
#include "Common/TSOHandlerConfig.h"
#include "Common/CPUFeatures.h"
#include "Common/Logging.h"
#include "Common/Module.h"
#include "Common/CRT/CRT.h"
#include "Common/PortabilityInfo.h"
#include "Common/Handle.h"
#include "DummyHandlers.h"
#include "BTInterface.h"
#include "Windows/Common/SHMStats.h"

#include <cstdint>
#include <cstdio>
#include <type_traits>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <utility>
#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <winnt.h>
#include <wine/debug.h>

namespace Exception {
class ECSyscallHandler;
}

extern "C" {
extern IMAGE_DOS_HEADER __ImageBase; // Provided by the linker

extern void* ExitFunctionEC;
extern void* CheckCall;
extern void* ExitFunctionSuspendPoint;

void* X64ReturnInstr; // See Module.S
uintptr_t NtDllBase;

// Exports on ARM64EC point to x64 fast forward sequences to allow for redirecting to the JIT if functions are hotpatched. This LUT is from their addresses to the relative addresses of the native code exports.
uint32_t* NtDllRedirectionLUT;
uint32_t NtDllRedirectionLUTSize;

// Wine doesn't support issuing direct system calls with SVC, and unlike Windows it doesn't have a 'stable' syscall number for NtContinue
void* WineSyscallDispatcher;
uint64_t WineNtContinueSyscallId;
uint64_t WineNtAllocateVirtualMemorySyscallId;
uint64_t WineNtProtectVirtualMemorySyscallId;

NTSTATUS NtContinueNative(ARM64_NT_CONTEXT* NativeContext, BOOLEAN Alert);
NTSTATUS NtAllocateVirtualMemoryNative(HANDLE, PVOID*, ULONG_PTR, SIZE_T*, ULONG, ULONG);
NTSTATUS NtProtectVirtualMemoryNative(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG*);

[[noreturn]]
void JumpSetStack(uintptr_t PC, uintptr_t SP);
}

struct ThreadCPUArea {
  static constexpr size_t TEBCPUAreaOffset = 0x1788;
  CHPE_V2_CPU_AREA_INFO* Area;

  explicit ThreadCPUArea(_TEB* TEB)
    : Area(*reinterpret_cast<CHPE_V2_CPU_AREA_INFO**>(reinterpret_cast<uintptr_t>(TEB) + TEBCPUAreaOffset)) {}

  uint64_t& EmulatorStackLimit() const {
    return Area->EmulatorStackLimit;
  }

  uint64_t& EmulatorStackBase() const {
    return Area->EmulatorStackBase;
  }

  ARM64EC_NT_CONTEXT& ContextAmd64() const {
    return *Area->ContextAmd64;
  }

  FEXCore::Core::CpuStateFrame*& StateFrame() const {
    return reinterpret_cast<FEXCore::Core::CpuStateFrame*&>(Area->EmulatorData[0]);
  }

  FEXCore::Core::InternalThreadState*& ThreadState() const {
    return reinterpret_cast<FEXCore::Core::InternalThreadState*&>(Area->EmulatorData[1]);
  }

  uint64_t& DispatcherLoopTopEnterEC() const {
    return reinterpret_cast<uint64_t&>(Area->EmulatorData[2]);
  }

  uint64_t& DispatcherLoopTopEnterECFillSRA() const {
    return reinterpret_cast<uint64_t&>(Area->EmulatorData[3]);
  }
};

struct FrontendThreadData {
  bool InLockedRWXRead {};
};

namespace {
fextl::unique_ptr<FEXCore::Context::Context> CTX;
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> SignalDelegator;
fextl::unique_ptr<Exception::ECSyscallHandler> SyscallHandler;
fextl::unique_ptr<FEX::Windows::StatAlloc> StatAllocHandler;
std::optional<FEX::Windows::InvalidationTracker> InvalidationTracker;
std::optional<FEX::Windows::CPUFeatures> CPUFeatures;
std::optional<FEX::Windows::OvercommitTracker> OvercommitTracker;
std::optional<FEX::Windows::ImageTracker> ImageTracker;

std::recursive_mutex ThreadCreationMutex;
// Map of TIDs to their FEX thread state, `ThreadCreationMutex` must be locked when accessing
std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*> Threads;

std::pair<NTSTATUS, ThreadCPUArea> GetThreadCPUArea(HANDLE Thread) {
  THREAD_BASIC_INFORMATION Info;
  const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr);
  return {Err, ThreadCPUArea(reinterpret_cast<_TEB*>(Info.TebBaseAddress))};
}

ThreadCPUArea GetCPUArea() {
  return ThreadCPUArea(NtCurrentTeb());
}

FrontendThreadData* GetFrontendThreadData(FEXCore::Core::InternalThreadState* Thread) {
  return static_cast<FrontendThreadData*>(Thread->FrontendPtr);
}

bool IsEmulatorStackAddress(const ThreadCPUArea CPUArea, uint64_t Address) {
  return Address <= CPUArea.EmulatorStackBase() && Address >= CPUArea.EmulatorStackLimit();
}

bool IsDispatcherAddress(uint64_t Address) {
  const auto& Config = SignalDelegator->GetConfig();
  return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd;
}


void FillNtDllLUTs(HMODULE NtDll) {
  ULONG Size;
  const auto* LoadConfig =
    reinterpret_cast<_IMAGE_LOAD_CONFIG_DIRECTORY64*>(RtlImageDirectoryEntryToData(NtDll, true, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, &Size));
  const auto* CHPEMetadata = reinterpret_cast<IMAGE_ARM64EC_METADATA*>(LoadConfig->CHPEMetadataPointer);
  const auto* RedirectionTableBegin = reinterpret_cast<IMAGE_ARM64EC_REDIRECTION_ENTRY*>(NtDllBase + CHPEMetadata->RedirectionMetadata);
  const auto* RedirectionTableEnd = RedirectionTableBegin + CHPEMetadata->RedirectionMetadataCount;

  NtDllRedirectionLUTSize = std::prev(RedirectionTableEnd)->Source + 1;

  SIZE_T AllocSize = NtDllRedirectionLUTSize * sizeof(uint32_t);
  NtAllocateVirtualMemoryNative(NtCurrentProcess(), reinterpret_cast<void**>(&NtDllRedirectionLUT), 0, &AllocSize, MEM_COMMIT | MEM_RESERVE,
                                PAGE_READWRITE);
  for (auto It = RedirectionTableBegin; It != RedirectionTableEnd; It++) {
    NtDllRedirectionLUT[It->Source] = It->Destination;
  }
}

template<typename T>
void WriteModuleRVA(HMODULE Module, LONG RVA, T Data) {
  if (!RVA) {
    return;
  }

  void* Address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Module) + RVA);
  void* ProtAddress = Address;
  SIZE_T ProtSize = sizeof(T);
  ULONG Prot;
  NtProtectVirtualMemoryNative(NtCurrentProcess(), &ProtAddress, &ProtSize, PAGE_READWRITE, &Prot);
  *reinterpret_cast<T*>(Address) = Data;
  NtProtectVirtualMemoryNative(NtCurrentProcess(), &ProtAddress, &ProtSize, Prot, nullptr);
}

void PatchCallChecker() {
  // See the comment for CheckCall in Module.S for why this is necessary
  const auto Module = reinterpret_cast<HMODULE>(&__ImageBase);
  ULONG Size;
  const auto* LoadConfig =
    reinterpret_cast<_IMAGE_LOAD_CONFIG_DIRECTORY64*>(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, &Size));
  const auto* CHPEMetadata = reinterpret_cast<IMAGE_ARM64EC_METADATA*>(LoadConfig->CHPEMetadataPointer);
  WriteModuleRVA(Module, CHPEMetadata->__os_arm64x_dispatch_call, &CheckCall);
  WriteModuleRVA(Module, CHPEMetadata->__os_arm64x_dispatch_icall, &CheckCall);
  WriteModuleRVA(Module, CHPEMetadata->__os_arm64x_dispatch_icall_cfg, &CheckCall);
}

// Fills in the syscall numbers necessary to call *Native variants of syscalls from FEX under wine.
void ParseWineSyscallNumbers(HMODULE NtDll) {
  ULONG Size;
  const auto* Exports = reinterpret_cast<IMAGE_EXPORT_DIRECTORY*>(RtlImageDirectoryEntryToData(NtDll, true, IMAGE_DIRECTORY_ENTRY_EXPORT, &Size));
  const auto* NameTable = reinterpret_cast<uint32_t*>(NtDllBase + Exports->AddressOfNames);
  const auto* FunctionTable = reinterpret_cast<uint32_t*>(NtDllBase + Exports->AddressOfFunctions);
  const auto* OrdinalTable = reinterpret_cast<uint16_t*>(NtDllBase + Exports->AddressOfNameOrdinals);
  struct SyscallEntry {
    const char* Name;
    uint32_t RVA;

    bool operator<(const SyscallEntry& Other) const {
      return RVA < Other.RVA;
    }
  };

  // Cannot use any syscalls at this stage, so rely on a stack-allocated array
  std::array<SyscallEntry, 0x200> SyscallTable;
  auto SyscallTableEnd = SyscallTable.begin();

  // Windows/Wine orders syscalls in memory by their ID, take advantage of that to find the syscall indices for those
  // which we need to manually issue. Note that all functions starting with Nt besides NtGetTickCount are syscalls.
  for (uint32_t Idx = 0; Idx < Exports->NumberOfNames; Idx++) {
    const char* Name = reinterpret_cast<const char*>(NtDllBase + NameTable[Idx]);
    if (Name[0] == 'N' && Name[1] == 't' && strcmp(Name, "NtGetTickCount") != 0) {
      *SyscallTableEnd++ = {Name, FunctionTable[OrdinalTable[Idx]]};
    }
  }

  // Sort such that index 0 is now syscall 0, etc
  std::sort(SyscallTable.begin(), SyscallTableEnd);

  for (auto it = SyscallTable.begin(); it != SyscallTableEnd; it++) {
    uint32_t CurSyscallId = static_cast<uint32_t>(std::distance(SyscallTable.begin(), it));
    if (strcmp(it->Name, "NtContinue") == 0) {
      WineNtContinueSyscallId = CurSyscallId;
    } else if (strcmp(it->Name, "NtAllocateVirtualMemory") == 0) {
      WineNtAllocateVirtualMemorySyscallId = CurSyscallId;
    } else if (strcmp(it->Name, "NtProtectVirtualMemory") == 0) {
      WineNtProtectVirtualMemorySyscallId = CurSyscallId;
    }
  }
}

// Syscall thunks may have been patched before FEX has loaded, the default call checker installed by ntdll into FEX will
// try to invoke the JIT when calling such patched syscalls but this obviously doesn't work before FEX is initalised.
// This function parses ntdll and sets up a custom call checker to prevent this, as such it must avoid using any syscall
// thunks itself.
void InitSyscalls() {
  // The ntdll exports called by GetModuleHandle/GetProcAddress aren't known to be patched before JIT init by any current
  // software so are safe to call, but if that changes the loader structures in the PEB could be parsed manually.
  const auto NtDll = GetModuleHandle("ntdll.dll");
  NtDllBase = reinterpret_cast<uintptr_t>(NtDll);

  const auto WineSyscallDispatcherPtr = reinterpret_cast<void**>(GetProcAddress(NtDll, "__wine_syscall_dispatcher"));
  if (WineSyscallDispatcherPtr) {
    WineSyscallDispatcher = *WineSyscallDispatcherPtr;
    ParseWineSyscallNumbers(NtDll);
  }

  FillNtDllLUTs(NtDll);
  PatchCallChecker();
}

void HandleImageMap(uint64_t Address, bool MainImage = false) {
  fextl::string ModulePath = FEX::Windows::GetSectionFilePath(Address);
  fextl::string ModuleName = fextl::string {FEX::Windows::BaseName(ModulePath)};
  InvalidationTracker->HandleImageMap(ModuleName, Address);
  ImageTracker->HandleImageMap(ModulePath, Address, MainImage);
}

void HandleImageUnmap(uint64_t Address, uint64_t Size) {
  ImageTracker->HandleImageUnmap(Address, Size);
}
} // namespace

namespace Exception {
static std::optional<FEX::Windows::TSOHandlerConfig> HandlerConfig;
static uintptr_t KiUserExceptionDispatcher;

struct alignas(16) KiUserExceptionDispatcherStackLayout {
  ARM64_NT_CONTEXT Context;
  uint64_t Pad[4]; // Only present on newer Windows versions, likely for SVE.
  EXCEPTION_RECORD Rec;
  uint64_t Align;
  uint64_t Redzone[2];
};

static bool HandleUnalignedAccess(const ThreadCPUArea CPUArea, ARM64_NT_CONTEXT& Context, bool IsJIT) {
  auto Thread = CPUArea.ThreadState();
  FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1);
  const auto Result =
    FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, HandlerConfig->GetUnalignedHandlerType(), Context.Pc, &Context.X0, IsJIT);
  Context.Pc += Result.value_or(0);
  return Result.has_value();
}

static void LoadStateFromECContext(FEXCore::Core::InternalThreadState* Thread, CONTEXT& Context) {
  auto& State = Thread->CurrentFrame->State;

  if ((Context.ContextFlags & CONTEXT_INTEGER) == CONTEXT_INTEGER) {
    // General register state
    State.gregs[FEXCore::X86State::REG_RAX] = Context.Rax;
    State.gregs[FEXCore::X86State::REG_RCX] = Context.Rcx;
    State.gregs[FEXCore::X86State::REG_RDX] = Context.Rdx;
    State.gregs[FEXCore::X86State::REG_RBX] = Context.Rbx;

    State.gregs[FEXCore::X86State::REG_RSI] = Context.Rsi;
    State.gregs[FEXCore::X86State::REG_RDI] = Context.Rdi;
    State.gregs[FEXCore::X86State::REG_R8] = Context.R8;
    State.gregs[FEXCore::X86State::REG_R9] = Context.R9;
    State.gregs[FEXCore::X86State::REG_R10] = Context.R10;
    State.gregs[FEXCore::X86State::REG_R11] = Context.R11;
    State.gregs[FEXCore::X86State::REG_R12] = Context.R12;
    State.gregs[FEXCore::X86State::REG_R13] = Context.R13;
    State.gregs[FEXCore::X86State::REG_R14] = Context.R14;
    State.gregs[FEXCore::X86State::REG_R15] = Context.R15;
  }

  if ((Context.ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL) {
    State.rip = Context.Rip;
    State.gregs[FEXCore::X86State::REG_RSP] = Context.Rsp;
    State.gregs[FEXCore::X86State::REG_RBP] = Context.Rbp;
    CTX->SetFlagsFromCompactedEFLAGS(Thread, Context.EFlags);
  }

  if ((Context.ContextFlags & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS) {
    State.es_idx = Context.SegEs & 0xffff;
    State.cs_idx = Context.SegCs & 0xffff;
    State.ss_idx = Context.SegSs & 0xffff;
    State.ds_idx = Context.SegDs & 0xffff;
    State.fs_idx = Context.SegFs & 0xffff;
    State.gs_idx = Context.SegGs & 0xffff;

    // The TEB is the only populated GDT entry by default
    const auto TEB = reinterpret_cast<uint64_t>(NtCurrentTeb());
    auto GDT = State.GetSegmentFromIndex(State, (Context.SegGs & 0xffff));
    State.SetGDTBase(GDT, TEB);
    State.SetGDTLimit(GDT, 0xF'FFFFU);
    State.gs_cached = TEB;
    State.fs_cached = 0;
    State.es_cached = 0;
    State.cs_cached = 0;
    State.ss_cached = 0;
    State.ds_cached = 0;
  }

  if ((Context.ContextFlags & CONTEXT_FLOATING_POINT) == CONTEXT_FLOATING_POINT) {
    // Floating-point register state
    if ((Context.ContextFlags & CONTEXT_XSTATE) == CONTEXT_XSTATE) {
      const auto* Ymm = RtlLocateExtendedFeature(reinterpret_cast<CONTEXT_EX*>(&Context + 1), XSTATE_AVX, nullptr);
      CTX->SetXMMRegistersFromState(Thread, reinterpret_cast<const __uint128_t*>(Context.FltSave.XmmRegisters),
                                    reinterpret_cast<const __uint128_t*>(Ymm));
    } else {
      CTX->SetXMMRegistersFromState(Thread, reinterpret_cast<const __uint128_t*>(Context.FltSave.XmmRegisters), nullptr);
    }
    memcpy(State.mm, Context.FltSave.FloatRegisters, sizeof(State.mm));

    State.FCW = Context.FltSave.ControlWord;
    State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = Context.FltSave.StatusWord & 1;
    State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (Context.FltSave.StatusWord >> 8) & 1;
    State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (Context.FltSave.StatusWord >> 9) & 1;
    State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (Context.FltSave.StatusWord >> 10) & 1;
    State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (Context.FltSave.StatusWord >> 14) & 1;
    State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (Context.FltSave.StatusWord >> 11) & 0b111;
    State.AbridgedFTW = Context.FltSave.TagWord;
  }
}

static void ReconstructThreadState(FEXCore::Core::InternalThreadState* Thread, ARM64_NT_CONTEXT& Context) {
  const auto& Config = SignalDelegator->GetConfig();
  auto& State = Thread->CurrentFrame->State;

  State.rip = CTX->RestoreRIPFromHostPC(Thread, Context.Pc);

  // Spill all SRA GPRs
  for (size_t i = 0; i < Config.SRAGPRCount; i++) {
    State.gregs[i] = Context.X[Config.SRAGPRMapping[i]];
  }

  // Spill all SRA FPRs
  for (size_t i = 0; i < Config.SRAFPRCount; i++) {
    memcpy(State.xmm.sse.data[i], &Context.V[Config.SRAFPRMapping[i]], sizeof(__uint128_t));
  }

  // Spill EFlags
  uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, true, Context.X, Context.Cpsr);
  CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags);
}

// Reconstructs an x64 context from the input thread's state, packed into a regular ARM64 context following the ARM64EC register mapping
static ARM64_NT_CONTEXT StoreStateToPackedECContext(FEXCore::Core::InternalThreadState* Thread, uint32_t FPCR, uint32_t FPSR) {
  ARM64_NT_CONTEXT ECContext {};

  ECContext.ContextFlags = CONTEXT_ARM64_FULL;
  if (CPUFeatures->IsFeaturePresent(PF_AVX2_INSTRUCTIONS_AVAILABLE)) {
    // This is a FEX extension and requires corresponding wine-side patches to be of use, however it is harmless to set
    // even if those patches are not used.
    ECContext.ContextFlags |= CONTEXT_ARM64_FEX_YMMSTATE;
  }

  auto& State = Thread->CurrentFrame->State;

  ECContext.X8 = State.gregs[FEXCore::X86State::REG_RAX];
  ECContext.X0 = State.gregs[FEXCore::X86State::REG_RCX];
  ECContext.X1 = State.gregs[FEXCore::X86State::REG_RDX];
  ECContext.X27 = State.gregs[FEXCore::X86State::REG_RBX];
  ECContext.Sp = State.gregs[FEXCore::X86State::REG_RSP];
  ECContext.Fp = State.gregs[FEXCore::X86State::REG_RBP];
  ECContext.X25 = State.gregs[FEXCore::X86State::REG_RSI];
  ECContext.X26 = State.gregs[FEXCore::X86State::REG_RDI];
  ECContext.X2 = State.gregs[FEXCore::X86State::REG_R8];
  ECContext.X3 = State.gregs[FEXCore::X86State::REG_R9];
  ECContext.X4 = State.gregs[FEXCore::X86State::REG_R10];
  ECContext.X5 = State.gregs[FEXCore::X86State::REG_R11];
  ECContext.X19 = State.gregs[FEXCore::X86State::REG_R12];
  ECContext.X20 = State.gregs[FEXCore::X86State::REG_R13];
  ECContext.X21 = State.gregs[FEXCore::X86State::REG_R14];
  ECContext.X22 = State.gregs[FEXCore::X86State::REG_R15];

  ECContext.Pc = State.rip;

  CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(&ECContext.V[0]), reinterpret_cast<__uint128_t*>(&ECContext.V[16]));

  ECContext.Lr = State.mm[0][0];
  ECContext.X6 = State.mm[1][0];
  ECContext.X7 = State.mm[2][0];
  ECContext.X9 = State.mm[3][0];
  ECContext.X16 = (State.mm[3][1] & 0xffff) << 48 | (State.mm[2][1] & 0xffff) << 32 | (State.mm[1][1] & 0xffff) << 16 | (State.mm[0][1] & 0xffff);
  ECContext.X10 = State.mm[4][0];
  ECContext.X11 = State.mm[5][0];
  ECContext.X12 = State.mm[6][0];
  ECContext.X15 = State.mm[7][0];
  ECContext.X17 = (State.mm[7][1] & 0xffff) << 48 | (State.mm[6][1] & 0xffff) << 32 | (State.mm[5][1] & 0xffff) << 16 | (State.mm[4][1] & 0xffff);

  // Zero all disallowed registers
  ECContext.X13 = 0;
  ECContext.X14 = 0;
  ECContext.X18 = 0;
  ECContext.X23 = 0;
  ECContext.X24 = 0;
  ECContext.X28 = 0;

  // NZCV+SS will be converted into EFlags by ntdll, the rest are lost during exception handling.
  // See HandleGuestException
  uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0);
  ECContext.Cpsr = 0;
  ECContext.Cpsr |= (EFlags & (1U << FEXCore::X86State::RFLAG_TF_RAW_LOC)) ? (1U << 21) : 0;
  ECContext.Cpsr |= (EFlags & (1U << FEXCore::X86State::RFLAG_OF_RAW_LOC)) ? (1U << 28) : 0;
  ECContext.Cpsr |= (EFlags & (1U << FEXCore::X86State::RFLAG_CF_RAW_LOC)) ? (1U << 29) : 0;
  ECContext.Cpsr |= (EFlags & (1U << FEXCore::X86State::RFLAG_ZF_RAW_LOC)) ? (1U << 30) : 0;
  ECContext.Cpsr |= (EFlags & (1U << FEXCore::X86State::RFLAG_SF_RAW_LOC)) ? (1U << 31) : 0;

  ECContext.Fpcr = FPCR;
  ECContext.Fpsr = FPSR;

  return ECContext;
}

static void RethrowGuestException(const EXCEPTION_RECORD& Rec, ARM64_NT_CONTEXT& Context) {
  const auto& Config = SignalDelegator->GetConfig();
  auto* Thread = GetCPUArea().ThreadState();
  auto& Fault = Thread->CurrentFrame->SynchronousFaultData;
  uint64_t GuestSp = Context.X[Config.SRAGPRMapping[static_cast<size_t>(FEXCore::X86State::REG_RSP)]];
  auto* Args = reinterpret_cast<KiUserExceptionDispatcherStackLayout*>(FEXCore::AlignDown(GuestSp, 64)) - 1;

  LogMan::Msg::DFmt("Reconstructing context");
  if (!IsDispatcherAddress(Context.Pc)) {
    ReconstructThreadState(Thread, Context);
  }
  Args->Context = StoreStateToPackedECContext(Thread, Context.Fpcr, Context.Fpsr);
  LogMan::Msg::DFmt("pc: {:X} rip: {:X}", Context.Pc, Args->Context.Pc);

  // X64 Windows always clears TF, DF and AF when handling an exception, restoring after.
  // Current ARM64EC windows can only restore NZCV+SS when returning from an exception and other flags are left untouched from the handler context.
  // TODO: Can extend wine to support this by mapping the remaining EFlags into reserved cpsr members.
  uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0);
  EFlags &= ~(1 << FEXCore::X86State::RFLAG_TF_RAW_LOC);
  CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags);

  Args->Rec = FEX::Windows::HandleGuestException(Fault, Rec, Args->Context.Pc, Args->Context.X8);
  if (Args->Rec.ExceptionCode == EXCEPTION_SINGLE_STEP) {
    Args->Context.Cpsr &= ~(1 << 21); // PSTATE.SS
  } else if (Args->Rec.ExceptionCode == EXCEPTION_BREAKPOINT) {
    // INT3 will set RIP to the instruction following it, undo this (any edge cases with multibyte instructions that trigger breakpoints are bugs present in Windows also)
    Args->Context.Pc -= 1;
  }

  Context.Sp = reinterpret_cast<uint64_t>(Args);
  Context.Pc = KiUserExceptionDispatcher;
}

class ECSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators {
public:
  ECSyscallHandler() {
    OSABI = FEXCore::HLE::SyscallOSABI::OS_GENERIC;
  }

  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
    ProcessPendingCrossProcessEmulatorWork();

    // Manually raise an exeption with the current JIT state packed into a native context, ntdll handles this and
    // reenters the JIT (see dlls/ntdll/signal_arm64ec.c in wine).
    uint64_t FPCR, FPSR;
    __asm volatile("mrs %[fpcr], fpcr" : [fpcr] "=r"(FPCR));
    __asm volatile("mrs %[fpsr], fpsr" : [fpsr] "=r"(FPSR));

    auto* Thread = GetCPUArea().ThreadState();
    KiUserExceptionDispatcherStackLayout DispatchArgs {
      .Context = StoreStateToPackedECContext(Thread, static_cast<uint32_t>(FPCR), static_cast<uint32_t>(FPSR)),
      .Rec = {.ExceptionCode = STATUS_EMULATION_SYSCALL}};
    // PC is expected to hold the return address after the thunk, so skip over the INT 2E/SYSCALL instruction.
    DispatchArgs.Context.Pc += 2;
    JumpSetStack(KiUserExceptionDispatcher, reinterpret_cast<uintptr_t>(&DispatchArgs));
  }

  std::optional<FEXCore::ExecutableFileSectionInfo> LookupExecutableFileSection(FEXCore::Core::InternalThreadState*, uint64_t Address) override {
    return ImageTracker->LookupExecutableFileSection(Address);
  }

  void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {
    InvalidationTracker->ReprotectRWXIntervals(Start, Length);
  }

  void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {
    InvalidationTracker->InvalidateAlignedInterval(Start, Length, false);
  }

  void MarkOvercommitRange(uint64_t Start, uint64_t Length) override {
    OvercommitTracker->MarkRange(Start, Length);
  }

  void UnmarkOvercommitRange(uint64_t Start, uint64_t Length) override {
    OvercommitTracker->UnmarkRange(Start, Length);
  }

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override {
    return InvalidationTracker->QueryExecutableRange(Address);
  }

  void PreCompile() override {
    ProcessPendingCrossProcessEmulatorWork();
  }
};
} // namespace Exception

extern "C" void SyncThreadContext(CONTEXT* Context) {
  ProcessPendingCrossProcessEmulatorWork();
  auto* Thread = GetCPUArea().ThreadState();
  // All other EFlags bits are lost when converting to/from an ARM64EC context, so merge them in from the current JIT state.
  // This is advisable over dropping their values as thread suspend/resume uses this function, and that can happen at any point in guest code.
  static constexpr uint32_t ECValidEFlagsMask {(1U << FEXCore::X86State::RFLAG_OF_RAW_LOC) | (1U << FEXCore::X86State::RFLAG_CF_RAW_LOC) |
                                               (1U << FEXCore::X86State::RFLAG_ZF_RAW_LOC) | (1U << FEXCore::X86State::RFLAG_SF_RAW_LOC) |
                                               (1U << FEXCore::X86State::RFLAG_TF_RAW_LOC)};

  uint32_t StateEFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0);
  Context->EFlags = (Context->EFlags & ECValidEFlagsMask) | (StateEFlags & ~ECValidEFlagsMask);
  Exception::LoadStateFromECContext(Thread, *Context);
}

NTSTATUS ProcessInit() {
  InitSyscalls();

  FEX::Windows::InitCRTProcess();
  const auto ExecutableName = FEX::Windows::BaseName(FEX::Windows::GetExecutableFilePath());
  FEX::Config::LoadConfig(fextl::string {ExecutableName}, _environ, FEX::ReadPortabilityInformation());
  FEXCore::Config::ReloadMetaLayer();
  FEX::Windows::Logging::Init();

  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, "1");

  FEXCore::Profiler::Init("", "");

  SignalDelegator = fextl::make_unique<FEX::DummyHandlers::DummySignalDelegator>();
  SyscallHandler = fextl::make_unique<Exception::ECSyscallHandler>();

  const auto NtDll = GetModuleHandle("ntdll.dll");
  const bool IsWine = !!GetProcAddress(NtDll, "wine_get_version");
  OvercommitTracker.emplace(IsWine);

  {
    auto HostFeatures = FEX::Windows::CPUFeatures::FetchHostFeatures(IsWine);
    CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
  }

  CTX->SetSignalDelegator(SignalDelegator.get());
  CTX->SetSyscallHandler(SyscallHandler.get());
  CTX->InitCore();
  Exception::HandlerConfig.emplace(*CTX);
  InvalidationTracker.emplace(*CTX, Threads);
  ImageTracker.emplace(*CTX, false);

  auto MainModule = reinterpret_cast<__TEB*>(NtCurrentTeb())->Peb->ImageBaseAddress;
  HandleImageMap(reinterpret_cast<uint64_t>(MainModule), true);

  HandleImageMap(NtDllBase);

  CPUFeatures.emplace(*CTX);

  X64ReturnInstr = ::VirtualAlloc(nullptr, FEXCore::Utils::FEX_PAGE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
  InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(X64ReturnInstr), FEXCore::Utils::FEX_PAGE_SIZE,
                                                          PAGE_EXECUTE_READ);
  *reinterpret_cast<uint8_t*>(X64ReturnInstr) = 0xc3;

  const uintptr_t KiUserExceptionDispatcherFFS = reinterpret_cast<uintptr_t>(GetProcAddress(NtDll, "KiUserExceptionDispatcher"));
  Exception::KiUserExceptionDispatcher = NtDllRedirectionLUT[KiUserExceptionDispatcherFFS - NtDllBase] + NtDllBase;

  FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
  if (TSOEnabled()) {
    BOOL Enable = TRUE;
    NTSTATUS Status = NtSetInformationProcess(NtCurrentProcess(), ProcessFexHardwareTso, &Enable, sizeof(Enable));
    if (Status == STATUS_SUCCESS) {
      CTX->SetHardwareTSOSupport(true);
    }
  }

  FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
  FEX_CONFIG_OPT(StartupSleep, STARTUPSLEEP);
  FEX_CONFIG_OPT(StartupSleepProcName, STARTUPSLEEPPROCNAME);

  if (IsWine && ProfileStats()) {
    StatAllocHandler = fextl::make_unique<FEX::Windows::StatAlloc>(FEXCore::SHMStats::AppType::WIN_ARM64EC);
  }

  if (StartupSleep() && (StartupSleepProcName().empty() || ExecutableName == StartupSleepProcName())) {
    LogMan::Msg::IFmt("[{}][{}] Sleeping for {} seconds", GetCurrentProcessId(), ExecutableName, StartupSleep());
    std::this_thread::sleep_for(std::chrono::seconds(StartupSleep()));
  }

  return STATUS_SUCCESS;
}

void ProcessTerm(HANDLE Handle, BOOL After, NTSTATUS Status) {}

class ScopedCallbackDisable {
private:
  bool Prev;

public:
  ScopedCallbackDisable() {
    const auto CPUArea = GetCPUArea();
    Prev = CPUArea.Area->InSyscallCallback;
    CPUArea.Area->InSyscallCallback = true;
  }

  ~ScopedCallbackDisable() {
    GetCPUArea().Area->InSyscallCallback = Prev;
  }
};

// Returns true if exception dispatch should be halted and the execution context restored to NativeContext
bool ResetToConsistentStateImpl(const ThreadCPUArea CPUArea, EXCEPTION_RECORD* Exception, CONTEXT* GuestContext, ARM64_NT_CONTEXT* NativeContext) {
  auto Thread = CPUArea.ThreadState();
  FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedSignalTime);
  LogMan::Msg::DFmt("Exception: Code: {:X} Address: {:X}", Exception->ExceptionCode, reinterpret_cast<uintptr_t>(Exception->ExceptionAddress));

  if (NativeContext->Pc == reinterpret_cast<uint64_t>(&ExitFunctionSuspendPoint)) {
    // A suspend interrupt can occur in ExitFunctionEC before InSimulation is unset and set SuspendDoorbell. If this
    // occurs then it is still our duty to cooperatively suspend with an appropriate context. To support this, after
    // unsetting InSimulation a brk #0xCAFE instruction will be raised that we can handle here.
    NativeContext->Pc += 4; // Skip over the brk instruction when we resume
    *CPUArea.Area->SuspendDoorbell = 0;
    return true;
  }

  if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
    const auto FaultAddress = static_cast<uint64_t>(Exception->ExceptionInformation[1]);

    if (FEX::Windows::CallRetStack::HandleAccessViolation(Thread, FaultAddress, NativeContext->X17)) {
      return true;
    }

    if (FEX::Windows::JITGuardPage::HandleJITGuardPage(Thread, reinterpret_cast<void*>(FaultAddress), NativeContext->X,
                                                       reinterpret_cast<__uint128_t*>(NativeContext->V), &NativeContext->Pc)) {
      return true;
    }

    std::scoped_lock Lock(ThreadCreationMutex);
    if (InvalidationTracker && InvalidationTracker->HandleRWXAccessViolation(Thread, NativeContext->Pc, FaultAddress)) {
      FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1);
      if (CTX->IsAddressInCodeBuffer(Thread, NativeContext->Pc) && !CTX->IsCurrentBlockSingleInst(CPUArea.ThreadState()) &&
          CTX->IsAddressInCurrentBlock(Thread, FaultAddress & FEXCore::Utils::FEX_PAGE_MASK, FEXCore::Utils::FEX_PAGE_SIZE)) {
        // If we are not patching ourself (single inst block case) and potentially patching the current block, this is inline SMC. Reconstruct the current context (before the SMC write) then single step the write to reduce it to regular SMC.
        Exception::ReconstructThreadState(Thread, *NativeContext);
        LogMan::Msg::DFmt("Handled inline self-modifying code: pc: {:X} rip: {:X} fault: {:X}", NativeContext->Pc,
                          Thread->CurrentFrame->State.rip, FaultAddress);
        NativeContext->Pc = CPUArea.DispatcherLoopTopEnterECFillSRA();
        NativeContext->Sp = CPUArea.EmulatorStackBase();
        NativeContext->X11 = 1;                                        // Set ENTRY_FILL_SRA_SINGLE_INST_REG to force a single step
        NativeContext->X17 = reinterpret_cast<uint64_t>(CPUArea.Area); // Set EC_ENTRY_CPUAREA_REG
      } else {
        LogMan::Msg::DFmt("Handled self-modifying code: pc: {:X} fault: {:X}", NativeContext->Pc, FaultAddress);
      }

      return true;
    }
  }

  bool IsJIT = CTX->IsAddressInCodeBuffer(Thread, NativeContext->Pc);
  if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Exception::HandleUnalignedAccess(CPUArea, *NativeContext, IsJIT)) {
    LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", NativeContext->Pc);
    return true;
  }

  if (!IsJIT && !IsDispatcherAddress(NativeContext->Pc)) {
    LogMan::Msg::DFmt("Passing through exception");
    return false;
  }

  // The JIT (in CompileBlock) emits code to check the suspend doorbell at the start of every block, and run the following instruction if it is set:
  static constexpr uint32_t SuspendTrapMagic {0xD4395FC0}; // brk #0xCAFE
  if (Exception->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION && *reinterpret_cast<uint32_t*>(NativeContext->Pc) == SuspendTrapMagic) {
    Exception::ReconstructThreadState(Thread, *NativeContext);
    *NativeContext = Exception::StoreStateToPackedECContext(Thread, NativeContext->Fpcr, NativeContext->Fpsr);
    LogMan::Msg::DFmt("Suspending: RIP: {:X} SP: {:X}", NativeContext->Pc, NativeContext->Sp);
    CPUArea.Area->InSimulation = 0;
    *CPUArea.Area->SuspendDoorbell = 0;
    return true;
  }

  if (IsEmulatorStackAddress(CPUArea, reinterpret_cast<uint64_t>(__builtin_frame_address(0)))) {
    Exception::RethrowGuestException(*Exception, *NativeContext);
    LogMan::Msg::DFmt("Rethrowing onto guest stack: {:X}", NativeContext->Sp);
    return true;
  } else {
    LogMan::Msg::EFmt("Unexpected exception in JIT code on guest stack");
    return false;
  }
}

NTSTATUS ResetToConsistentState(EXCEPTION_RECORD* Exception, CONTEXT* GuestContext, ARM64_NT_CONTEXT* NativeContext) {
  bool Cont {};
  if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
    const auto FaultAddress = static_cast<uint64_t>(Exception->ExceptionInformation[1]);

    if (OvercommitTracker) {
      {
        ScopedCallbackDisable guard;
        Cont = OvercommitTracker->HandleAccessViolation(FaultAddress);
      }
      if (Cont) {
        NtContinueNative(NativeContext, false);
      }
    }
  }

  const auto CPUArea = GetCPUArea();
  if (!CPUArea.ThreadState()) {
    return STATUS_SUCCESS;
  }

  {
    ScopedCallbackDisable guard;
    Cont = ResetToConsistentStateImpl(CPUArea, Exception, GuestContext, NativeContext);
  }

  if (Cont) {
    NtContinueNative(NativeContext, false);
  }

  CPUArea.Area->InSimulation = false;
  CPUArea.Area->InSyscallCallback = false;
  return STATUS_SUCCESS;
}

void NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot, BOOL After, NTSTATUS Status) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    // MEM_RESET(_UNDO) ignores the passed permissions
    if (!Status && !(Type & (MEM_RESET | MEM_RESET_UNDO))) {
      InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), Prot);
    }
    ThreadCreationMutex.unlock();
  }
}

void NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType, BOOL After, NTSTATUS Status) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    if (!Status) {
      InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), true);
    }
    ThreadCreationMutex.unlock();
  }
}

void NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt, BOOL After, NTSTATUS Status) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    if (!Status) {
      InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), NewProt);
    }
    ThreadCreationMutex.unlock();
  }
}

NTSTATUS NotifyMapViewOfSection(void* Unk1, void* Address, void* Unk2, SIZE_T Size, ULONG AllocType, ULONG Prot) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return STATUS_SUCCESS;
  }

  {
    std::scoped_lock Lock(ThreadCreationMutex);
    HandleImageMap(reinterpret_cast<uint64_t>(Address));
  }


  return STATUS_SUCCESS;
}

void NotifyUnmapViewOfSection(void* Address, BOOL After, NTSTATUS Status) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  if (!After) {
    ThreadCreationMutex.lock();
    auto [Start, Size] = InvalidationTracker->InvalidateContainingSection(reinterpret_cast<uint64_t>(Address), true);
    if (Size) {
      HandleImageUnmap(Start, Size);
    }
  } else {
    ThreadCreationMutex.unlock();
  }
}

void FlushInstructionCacheHeavy(const void* Address, SIZE_T Size) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpu64NotifyMemoryDirty(void* Address, SIZE_T Size) {
  if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
    return;
  }

  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpu64NotifyReadFile(HANDLE Handle, void* Address, SIZE_T Size, BOOL After, NTSTATUS Status) {
  auto* ThreadState = GetCPUArea().ThreadState();
  if (!InvalidationTracker || !ThreadState) {
    return;
  }

  auto& InLockedRWXRead = GetFrontendThreadData(ThreadState)->InLockedRWXRead;
  if (!After) {
    ThreadCreationMutex.lock();
    CTX->GetCodeInvalidationMutex().lock();
    if (InvalidationTracker->BeginUntrackedWriteLocked(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size))) {
      InLockedRWXRead = true;
    } else {
      CTX->GetCodeInvalidationMutex().unlock();
      ThreadCreationMutex.unlock();
    }
  } else {
    if (InLockedRWXRead) {
      InLockedRWXRead = false;
      CTX->GetCodeInvalidationMutex().unlock();
      ThreadCreationMutex.unlock();
    }
  }
}

NTSTATUS ThreadInit() {
  std::scoped_lock Lock(ThreadCreationMutex);
  FEX::Windows::InitCRTThread();
  const auto CPUArea = GetCPUArea();

  static constexpr size_t EmulatorStackSize = 0x40000;
  const uint64_t EmulatorStack = reinterpret_cast<uint64_t>(::VirtualAlloc(nullptr, EmulatorStackSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE));
  CPUArea.EmulatorStackLimit() = EmulatorStack;
  CPUArea.EmulatorStackBase() = EmulatorStack + EmulatorStackSize;

  auto* Thread = CTX->CreateThread(0, 0);

  // Default segment setup.
  auto Frame = Thread->CurrentFrame;
  auto NewSegments = new FEXCore::Core::CPUState::gdt_segment[32];

  // Setup initial code-segment GDT
  auto& GDT = NewSegments[FEXCore::Core::CPUState::DEFAULT_USER_CS];
  FEXCore::Core::CPUState::SetGDTBase(&GDT, 0);
  FEXCore::Core::CPUState::SetGDTLimit(&GDT, 0xF'FFFFU);
  GDT.L = 1; // L = Long Mode = 64-bit
  GDT.D = 0; // D = Default Operand SIze = Reserved

  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &NewSegments[0];
  // TODO: LDTs are currently unsupported, mirror them to GDT.
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &NewSegments[0];

  Frame->State.cs_idx = FEXCore::Core::CPUState::DEFAULT_USER_CS << 3;
  Frame->State.cs_cached = FEXCore::Core::CPUState::CalculateGDTBase(GDT);

  FEX::Windows::CallRetStack::InitializeThread(Thread);
  Thread->CurrentFrame->Pointers.ExitFunctionEC = reinterpret_cast<uintptr_t>(&ExitFunctionEC);
  CPUArea.StateFrame() = Thread->CurrentFrame;

  uint64_t EnterEC = Thread->CurrentFrame->Pointers.DispatcherLoopTopEnterEC;
  CPUArea.DispatcherLoopTopEnterEC() = EnterEC;

  uint64_t EnterECFillSRA = Thread->CurrentFrame->Pointers.DispatcherLoopTopEnterECFillSRA;
  CPUArea.DispatcherLoopTopEnterECFillSRA() = EnterECFillSRA;

  CPUArea.ContextAmd64() = {.ContextFlags = CONTEXT_CONTROL | CONTEXT_SEGMENTS | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT,
                            .AMD64_SegCs = (FEXCore::Core::CPUState::DEFAULT_USER_CS << 3) | 3,
                            .AMD64_SegDs = 0x2b,
                            .AMD64_SegEs = 0x2b,
                            .AMD64_SegFs = 0x53,
                            .AMD64_SegGs = 0x2b,
                            .AMD64_SegSs = 0x2b,
                            .AMD64_EFlags = 0x202,
                            .AMD64_MxCsr = 0x1f80,
                            .AMD64_MxCsr_copy = 0x1f80,
                            .AMD64_ControlWord = 0x27f};
  Exception::LoadStateFromECContext(Thread, CPUArea.ContextAmd64().AMD64_Context);

  Thread->FrontendPtr = new FrontendThreadData();

  {
    auto ThreadTID = GetCurrentThreadId();
    Threads.emplace(ThreadTID, Thread);
    if (StatAllocHandler) {
      Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID);
    }
  }

  CPUArea.ThreadState() = Thread;
  CPUArea.Area->SuspendDoorbell = reinterpret_cast<ULONG*>(&Thread->CurrentFrame->SuspendDoorbell);
  return STATUS_SUCCESS;
}

NTSTATUS ThreadTerm(HANDLE Thread, LONG ExitCode) {
  if (!FEX::Windows::ValidateHandleAccess(Thread, THREAD_TERMINATE)) {
    return STATUS_ACCESS_DENIED;
  }

  auto ThreadDup = FEX::Windows::DupHandle(Thread, THREAD_QUERY_INFORMATION | THREAD_SUSPEND_RESUME);

  THREAD_BASIC_INFORMATION Info;
  if (auto Err = NtQueryInformationThread(*ThreadDup, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) {
    return Err;
  }

  const auto ThreadTID = reinterpret_cast<uint64_t>(Info.ClientId.UniqueThread);
  bool Self = ThreadTID == GetCurrentThreadId();
  if (!Self) {
    CONTEXT TmpContext;
    // If we are suspending a thread that isn't ourselves, try to suspend it first so we know internal JIT locks aren't being held.
    NtSuspendThread(*ThreadDup, NULL);
    // This will wait for the thread to be suspended
    NtGetContextThread(*ThreadDup, &TmpContext);
  }

  const auto [Err, CPUArea] = GetThreadCPUArea(*ThreadDup);
  if (Err) {
    return Err;
  }

  {
    std::scoped_lock Lock(ThreadCreationMutex);
    auto it = Threads.find(ThreadTID);
    if (it == Threads.end()) {
      // Thread already terminated
      return STATUS_SUCCESS;
    }

    Threads.erase(it);
    if (StatAllocHandler) {
      StatAllocHandler->DeallocateSlot(CPUArea.ThreadState()->ThreadStats);
    }
  }
  auto ThreadState = CPUArea.ThreadState();

  delete GetFrontendThreadData(ThreadState);

  // GDT and LDT are mirrored, only free one.
  delete[] ThreadState->CurrentFrame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT];

  FEX::Windows::CallRetStack::DestroyThread(ThreadState);
  CTX->DestroyThread(ThreadState);
  ::VirtualFree(reinterpret_cast<void*>(CPUArea.EmulatorStackLimit()), 0, MEM_RELEASE);
  if (ThreadTID == GetCurrentThreadId()) {
    FEX::Windows::DeinitCRTThread();
  }

  return STATUS_SUCCESS;
}

BOOLEAN BTCpu64IsProcessorFeaturePresent(UINT Feature) {
  return CPUFeatures->IsFeaturePresent(Feature) ? TRUE : FALSE;
}

void UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info) {
  CPUFeatures->UpdateInformation(Info);
}


================================================
FILE: Source/Windows/ARM64EC/libarm64ecfex.def
================================================
LIBRARY libarm64ecfex.dll

EXPORTS
  BTCpu64FlushInstructionCache
  BTCpu64IsProcessorFeaturePresent
  BTCpu64NotifyMemoryDirty
  BTCpu64NotifyReadFile
  DispatchJump DATA
  RetToEntryThunk DATA
  ExitToX64 DATA
  BeginSimulation DATA
  FlushInstructionCacheHeavy
  NotifyMapViewOfSection
  NotifyMemoryAlloc
  NotifyMemoryFree
  NotifyMemoryProtect
  NotifyUnmapViewOfSection
  ProcessInit
  ProcessTerm
  ResetToConsistentState
  ThreadInit
  ThreadTerm
  UpdateProcessorInformation


================================================
FILE: Source/Windows/CMakeLists.txt
================================================
function(build_implib name)
  set(name_ex ${name}_ex)
  add_custom_target(${name_ex}lib ALL DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/lib${name_ex}.a)
  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib${name_ex}.a
    COMMAND ${CMAKE_DLLTOOL} -d ${CMAKE_CURRENT_SOURCE_DIR}/Defs/${name}.def -k -l lib${name_ex}.a
    COMMENT "Building lib${name_ex}.a")

  add_library(${name_ex} SHARED IMPORTED)
  set_property(TARGET ${name_ex} PROPERTY IMPORTED_IMPLIB ${CMAKE_CURRENT_BINARY_DIR}/lib${name_ex}.a)
  add_dependencies(${name_ex} ${name_ex}lib)
endfunction()

function(patch_library_wine target)
  add_custom_command(TARGET ${target} POST_BUILD
    COMMAND dd bs=32 count=1 seek=2 conv=notrunc if=${CMAKE_SOURCE_DIR}/Source/Windows/wine_builtin.bin of=$<TARGET_FILE:${target}>)
endfunction()

execute_process(COMMAND ${CMAKE_CXX_COMPILER} ${CMAKE_CXX_FLAGS} -print-libgcc-file-name
  OUTPUT_VARIABLE LIBGCC_PATH
  OUTPUT_STRIP_TRAILING_WHITESPACE)

build_implib(ntdll)
build_implib(wow64)

add_subdirectory(Common)

if (ARCHITECTURE_arm64ec)
  add_subdirectory(ARM64EC)
elseif (ARCHITECTURE_arm64)
  add_subdirectory(WOW64)
endif()


================================================
FILE: Source/Windows/Common/CMakeLists.txt
================================================
add_library(CommonWindowsRuntime STATIC LoadConfig.S)
add_subdirectory(CRT)
add_subdirectory(WinAPI)

target_link_libraries(CommonWindowsRuntime FEXCore_Base JemallocLibs)
target_compile_options(CommonWindowsRuntime PRIVATE -Wno-inconsistent-dllimport)
target_include_directories(CommonWindowsRuntime PRIVATE "${CMAKE_SOURCE_DIR}/Source/Windows/include/")

add_library(CommonWindows STATIC
  CPUFeatures.cpp
  SHMStats.cpp
  InvalidationTracker.cpp
  ImageTracker.cpp
  Logging.cpp)

target_link_libraries(CommonWindows FEXCore_Base)
target_include_directories(CommonWindows PRIVATE "${CMAKE_SOURCE_DIR}/Source/Windows/include/")


================================================
FILE: Source/Windows/Common/CPUFeatures.cpp
================================================
// SPDX-License-Identifier: MIT

#include "Common/CPUInfo.h"

#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/HostFeatures.h>
#include <FEXCore/fextl/fmt.h>

#include <windows.h>

#include "CPUFeatures.h"

namespace {

HKEY OpenProcessorKey(uint32_t Idx) {
  HKEY Out;
  auto Path = fextl::fmt::format("Hardware\\Description\\System\\CentralProcessor\\{}", Idx);
  if (RegOpenKeyExA(HKEY_LOCAL_MACHINE, Path.c_str(), 0, KEY_READ, &Out)) {
    return nullptr;
  }
  return Out;
}

uint64_t ReadRegU64(HKEY Key, const char* Name) {
  uint64_t Value = 0;
  DWORD Size = sizeof(Value);
  RegGetValueA(Key, nullptr, Name, 0, nullptr, &Value, &Size);
  return Value;
}

} // namespace

namespace FEX::Windows {
class CPUFeaturesFromRegistry final : public FEX::CPUFeatures {
public:
  explicit CPUFeaturesFromRegistry(HKEY Key) {
    ISAR0.SetReg(ReadRegU64(Key, "CP 4030"));
    PFR0.SetReg(ReadRegU64(Key, "CP 4020"));
    PFR1.SetReg(ReadRegU64(Key, "CP 4021"));
    ISAR1.SetReg(ReadRegU64(Key, "CP 4031"));
    MMFR0.SetReg(ReadRegU64(Key, "CP 4038"));
    MMFR2.SetReg(ReadRegU64(Key, "CP 403A"));
    ZFR0.SetReg(ReadRegU64(Key, "CP 4024"));
    MMFR1.SetReg(ReadRegU64(Key, "CP 4039"));
    ISAR2.SetReg(ReadRegU64(Key, "CP 4032"));
    FillFeatureFlags();
  }
};

FEXCore::HostFeatures CPUFeatures::FetchHostFeatures(bool IsWine) {
  HKEY Key = OpenProcessorKey(0);
  if (!Key) {
    ERROR_AND_DIE_FMT("Couldn't detect CPU features");
  }

  CPUFeaturesFromRegistry Features(Key);

  uint64_t CTR = ReadRegU64(Key, "CP 5801");
  uint64_t MIDR = ReadRegU64(Key, "CP 4000");

  FEXCore::HostFeatures HostFeatures = {};

  for (uint32_t Idx = 0; Key; Key = OpenProcessorKey(++Idx)) {
    // Truncate to 32-bits, top 32-bits are all reserved in MIDR
    HostFeatures.CPUMIDRs.push_back(static_cast<uint32_t>(ReadRegU64(Key, "CP 4000")));
    RegCloseKey(Key);
  }

  FEX::FetchHostFeatures(Features, HostFeatures, !IsWine, CTR, MIDR);

  // Force-disable SVE until wine/windows gain support for SVE context save/restore
  HostFeatures.SupportsSVE128 = false;
  HostFeatures.SupportsSVE256 = false;

  HostFeatures.SupportsCPUIndexInTPIDRRO = !IsWine;
  return HostFeatures;
}

CPUFeatures::CPUFeatures(FEXCore::Context::Context& CTX) {
#ifdef ARCHITECTURE_arm64ec
  // Report as a 64-bit host for ARM64EC.
  CpuInfo.ProcessorArchitecture = PROCESSOR_ARCHITECTURE_AMD64;
#else
  // Report as a 32-bit host for WoW64.
  CpuInfo.ProcessorArchitecture = PROCESSOR_ARCHITECTURE_INTEL;
#endif

  // Baseline FEX feature-set
  CpuInfo.ProcessorFeatureBits = CPU_FEATURE_VME | CPU_FEATURE_TSC | CPU_FEATURE_CMOV | CPU_FEATURE_PGE | CPU_FEATURE_PSE | CPU_FEATURE_MTRR |
                                 CPU_FEATURE_CX8 | CPU_FEATURE_MMX | CPU_FEATURE_X86 | CPU_FEATURE_PAT | CPU_FEATURE_FXSR | CPU_FEATURE_SEP |
                                 CPU_FEATURE_SSE | CPU_FEATURE_3DNOW | CPU_FEATURE_SSE2 | CPU_FEATURE_SSE3 | CPU_FEATURE_CX128 |
                                 CPU_FEATURE_NX | CPU_FEATURE_SSSE3 | CPU_FEATURE_SSE41 | CPU_FEATURE_PAE | CPU_FEATURE_DAZ;

  // Features that require specific host CPU support
  const auto CPUIDResult01 = CTX.RunCPUIDFunction(0x01, 0);
  if (CPUIDResult01.ecx & (1 << 20)) {
    CpuInfo.ProcessorFeatureBits |= CPU_FEATURE_SSE42;
  }
  if (CPUIDResult01.ecx & (1 << 27)) {
    CpuInfo.ProcessorFeatureBits |= CPU_FEATURE_XSAVE;
  }
  if (CPUIDResult01.ecx & (1 << 28)) {
    CpuInfo.ProcessorFeatureBits |= CPU_FEATURE_AVX;
  }

  const auto CPUIDResult07 = CTX.RunCPUIDFunction(0x07, 0);
  if (CPUIDResult07.ebx & (1 << 5)) {
    CpuInfo.ProcessorFeatureBits |= CPU_FEATURE_AVX2;
  }

  const auto FamilyIdentifier = CPUIDResult01.eax;
  CpuInfo.ProcessorLevel = ((FamilyIdentifier >> 8) & 0xf) + ((FamilyIdentifier >> 20) & 0xff); // Family
  CpuInfo.ProcessorRevision = (FamilyIdentifier & 0xf0000) >> 4;                                // Extended Model
  CpuInfo.ProcessorRevision |= (FamilyIdentifier & 0xf0) << 4;                                  // Model
  CpuInfo.ProcessorRevision |= FamilyIdentifier & 0xf;                                          // Stepping
}

bool CPUFeatures::IsFeaturePresent(uint32_t Feature) {
  switch (Feature) {
  case PF_FLOATING_POINT_PRECISION_ERRATA: return FALSE;
  case PF_FLOATING_POINT_EMULATED: return FALSE;
  case PF_COMPARE_EXCHANGE_DOUBLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX8);
  case PF_MMX_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_MMX);
  case PF_XMMI_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE);
  case PF_3DNOW_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_3DNOW);
  case PF_RDTSC_INSTRUCTION_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_TSC);
  case PF_PAE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_PAE);
  case PF_XMMI64_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE2);
  case PF_SSE3_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE3);
  case PF_SSSE3_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSSE3);
  case PF_XSAVE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_XSAVE);
  case PF_COMPARE_EXCHANGE128: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_CX128);
  case PF_SSE_DAZ_MODE_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_DAZ);
  case PF_NX_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_NX);
  case PF_SECOND_LEVEL_ADDRESS_TRANSLATION: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_2NDLEV);
  case PF_VIRT_FIRMWARE_ENABLED: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_VIRT);
  case PF_RDWRFSGSBASE_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_RDFS);
  case PF_FASTFAIL_AVAILABLE: return TRUE;
  case PF_SSE4_1_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE41);
  case PF_SSE4_2_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_SSE42);
  case PF_AVX_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX);
  case PF_AVX2_INSTRUCTIONS_AVAILABLE: return !!(CpuInfo.ProcessorFeatureBits & CPU_FEATURE_AVX2);
  default: return false;
  }
}

void CPUFeatures::UpdateInformation(SYSTEM_CPU_INFORMATION* Info) {
  Info->ProcessorArchitecture = CpuInfo.ProcessorArchitecture;
  Info->ProcessorLevel = CpuInfo.ProcessorLevel;
  Info->ProcessorRevision = CpuInfo.ProcessorRevision;
  Info->ProcessorFeatureBits = CpuInfo.ProcessorFeatureBits;
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/CPUFeatures.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <windef.h>
#include <winternl.h>

#include <Common/HostFeatures.h>

namespace FEXCore::Context {
class Context;
}

/**
 * @brief Maps CPUID results to Windows CPU info structures
 */
namespace FEX::Windows {
class CPUFeatures {
public:
  static FEXCore::HostFeatures FetchHostFeatures(bool IsWine);

  CPUFeatures(FEXCore::Context::Context& CTX);

  /**
   * @brief If the given PF_* feature is supported
   */
  bool IsFeaturePresent(uint32_t Feature);

  /**
   * @brief Fills in `Info` according to the detected CPU features
   */
  void UpdateInformation(SYSTEM_CPU_INFORMATION* Info);

private:
  SYSTEM_CPU_INFORMATION CpuInfo {};
};
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/CRT/Alloc.cpp
================================================
// SPDX-License-Identifier: MIT
#define _SECIMP
#define _CRTIMP
#include <cstdint>
#include "../Priv.h"
#include <rpmalloc/rpmalloc.h>

void* calloc(size_t NumOfElements, size_t SizeOfElements) {
  return ::rpcalloc(NumOfElements, SizeOfElements);
}

void free(void* Memory) {
  ::rpfree(Memory);
}

void* malloc(size_t Size) {
  return ::rpmalloc(Size);
}

void* realloc(void* Memory, size_t NewSize) {
  return ::rprealloc(Memory, NewSize);
}

DLLEXPORT_FUNC(void*, _aligned_malloc, (size_t Size, size_t Alignment)) {
  return ::rpaligned_alloc(Alignment, Size);
}

DLLEXPORT_FUNC(void, _aligned_free, (void* Memory)) {
  ::rpfree(Memory);
}


================================================
FILE: Source/Windows/Common/CRT/CMakeLists.txt
================================================
target_sources(CommonWindowsRuntime PRIVATE Alloc.cpp IO.cpp Math.cpp String.cpp Misc.cpp CRT.cpp)
add_subdirectory(Musl)


================================================
FILE: Source/Windows/Common/CRT/CRT.cpp
================================================
// SPDX-License-Identifier: MIT
#include <iterator>
#include <windef.h>
#include <winternl.h>
#include <wine/debug.h>
#include <rpmalloc/rpmalloc.h>
#include "CRT.h"

extern "C" {
__attribute__((section(".CRT$FEXA"))) void (*FEXA)() = nullptr;
__attribute__((section(".CRT$FEXZ"))) void (*FEXZ)() = nullptr;
__attribute__((section(".CRT$XIA"))) void (*XIA)() = nullptr;
__attribute__((section(".CRT$XIZ"))) void (*XIZ)() = nullptr;
__attribute__((section(".CRT$XCA"))) void (*XCA)() = nullptr;
__attribute__((section(".CRT$XCZ"))) void (*XCZ)() = nullptr;
__attribute__((section(".CRT$XDA"))) void (*XDA)() = nullptr;
__attribute__((section(".CRT$XDZ"))) void (*XDZ)() = nullptr;
__attribute__((section(".CRT$XLA"))) void (*XLA)(HINSTANCE, DWORD, LPVOID*) = nullptr;
__attribute__((section(".CRT$XZA"))) void (*XLZ)(HINSTANCE, DWORD, LPVOID*) = nullptr;

uint64_t _tls_index;
extern void (*__CTOR_LIST__[])();
extern void (*__DTOR_LIST__[])();

BOOL DllMainCRTStartup(HMODULE Handle, DWORD Reason, LPVOID Reserved) {
  LdrDisableThreadCalloutsForDll(Handle);
  return true;
}
}
namespace {
template<typename TFuncIt, typename... TArgs>
void RunFuncArray(TFuncIt Begin, TFuncIt End, TArgs... Args) {
  for (auto It = Begin; It != End; It++) {
    if (*It) {
      (**It)(Args...);
    }
  }
}
} // namespace

namespace FEX::Windows {
void InitCRTProcess() {
  rpmalloc_initialize(nullptr);

  auto GNUCtorBegin = &__CTOR_LIST__[1];
  auto GNUCtorEnd = GNUCtorBegin;
  while (*GNUCtorEnd != nullptr) {
    GNUCtorEnd++;
  }

  RunFuncArray(&FEXA, &FEXZ);
  RunFuncArray(std::reverse_iterator(GNUCtorEnd), std::reverse_iterator(GNUCtorBegin));
  RunFuncArray(&XIA, &XIZ);
  RunFuncArray(&XCA, &XCZ);
  RunFuncArray(&XLA, &XLZ, nullptr, DLL_PROCESS_ATTACH, nullptr);
}

void InitCRTThread() {
  rpmalloc_thread_initialize();
  RunFuncArray(&XLA, &XLZ, nullptr, DLL_THREAD_ATTACH, nullptr);
}

void DeinitCRTThread() {
  RunFuncArray(&XLA, &XLZ, nullptr, DLL_THREAD_DETACH, nullptr);
  rpmalloc_thread_finalize();
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/CRT/CRT.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

namespace FEX::Windows {
void InitCRTProcess();
void InitCRTThread();
void DeinitCRTThread();
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/CRT/IO.cpp
================================================
// SPDX-License-Identifier: MIT
#define _FILE_DEFINED
struct FILE;
#define _SECIMP
#define _CRTIMP

#include <memory>
#include <vector>
#include <mutex>
#include <algorithm>
#include <cstdlib>
#include <cstdio>
#include <cstdint>
#include <cerrno>
#include <io.h>
#include <ctype.h>
#include <wchar.h>
#include <windef.h>
#include <winternl.h>
#include <winbase.h>
#include <winerror.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <handleapi.h>
#include <fileapi.h>
#include <errhandlingapi.h>
#include <wine/debug.h>
#include "../Priv.h"

struct FILE {
  HANDLE Handle {INVALID_HANDLE_VALUE};
  int FileHandle {-1};
  bool Append {false};

  FILE(HANDLE Handle, int FileHandle, bool Append)
    : Handle {Handle}
    , FileHandle {FileHandle}
    , Append {Append} {}

  ~FILE() {
    CloseHandle(Handle);
  }
};

namespace {
std::mutex FileTableLock;
std::vector<std::unique_ptr<FILE>> OpenFileTable;


int ErrnoReturn(int Value) {
  errno = Value;
  return -1;
}

DWORD OpenFlagToAccess(int OpenFlag) {
  if (OpenFlag & _O_RDONLY) {
    return GENERIC_READ;
  }
  if (OpenFlag & _O_WRONLY) {
    return GENERIC_WRITE;
  }
  if (OpenFlag & _O_RDWR) {
    return GENERIC_READ | GENERIC_WRITE;
  }
  return 0;
}

DWORD OpenFlagToCreation(int OpenFlag) {
  if ((OpenFlag & (_O_TRUNC | _O_CREAT)) == (_O_TRUNC | _O_CREAT)) {
    return CREATE_ALWAYS;
  }
  if ((OpenFlag & (_O_EXCL | _O_CREAT)) == (_O_EXCL | _O_CREAT)) {
    return CREATE_NEW;
  }
  if (OpenFlag & _O_TRUNC) {
    return TRUNCATE_EXISTING;
  }
  if (OpenFlag & _O_CREAT) {
    return OPEN_ALWAYS;
  }
  return OPEN_EXISTING;
}

int AllocateFile(std::unique_ptr<FILE>&& File) {
  std::scoped_lock Lock {FileTableLock};
  auto It = std::find(OpenFileTable.begin(), OpenFileTable.end(), nullptr);
  if (It == OpenFileTable.end()) {
    It = OpenFileTable.emplace(OpenFileTable.end(), std::move(File));
  } else {
    *It = std::move(File);
  }
  size_t Idx = std::distance(OpenFileTable.begin(), It);
  if (Idx >= std::numeric_limits<int>::max()) {
    std::terminate();
  }
  (*It)->FileHandle = static_cast<int>(Idx);
  return (*It)->FileHandle;
}

FILE* GetFile(int FileHandle) {
  std::scoped_lock Lock {FileTableLock};
  return OpenFileTable[FileHandle].get();
}

void RemoveFile(int FileHandle) {
  std::scoped_lock Lock {FileTableLock};
  OpenFileTable[FileHandle].reset();
}

DWORD OriginToMoveMethod(int Origin) {
  switch (Origin) {
  case SEEK_SET: return FILE_BEGIN;
  case SEEK_CUR: return FILE_CURRENT;
  case SEEK_END: return FILE_END;
  }
  UNIMPLEMENTED();
}
} // namespace

// io.h File Operatons
DLLEXPORT_FUNC(int, _wsopen, (const wchar_t* Filename, int OpenFlag, int ShareFlag, ...)) {
  DWORD Attrs = 0;
  if (OpenFlag & _O_CREAT) {
    va_list VA;
    int PermMode;
    va_start(VA, ShareFlag);
    PermMode = va_arg(VA, int);
    va_end(VA);
    if (!(PermMode & _S_IWRITE)) {
      Attrs = FILE_ATTRIBUTE_READONLY;
    }
  }
  auto access = OpenFlagToAccess(OpenFlag);
  ULONG sharing = FILE_SHARE_READ;
  if (access == GENERIC_WRITE) {
    sharing |= FILE_SHARE_WRITE;
  }

  if (ShareFlag == _SH_DENYRW) {
    sharing = 0;
  } else if (ShareFlag == _SH_DENYWR) {
    sharing &= ~FILE_SHARE_WRITE;
  } else if (ShareFlag == _SH_DENYRD) {
    sharing &= ~FILE_SHARE_READ;
  }

  HANDLE Handle = CreateFileW(Filename, access, sharing, nullptr, OpenFlagToCreation(OpenFlag), Attrs, nullptr);
  if (Handle != INVALID_HANDLE_VALUE) {
    return AllocateFile(std::make_unique<FILE>(Handle, -1, OpenFlag & _O_APPEND));
  }

  if (GetLastError() == ERROR_FILE_EXISTS) {
    return ErrnoReturn(EEXIST);
  }
  if (GetLastError() == ERROR_FILE_NOT_FOUND) {
    return ErrnoReturn(ENOENT);
  }
  if (GetLastError() == ERROR_ACCESS_DENIED) {
    return ErrnoReturn(EACCES);
  }
  return ErrnoReturn(ENOENT);
}

DLLEXPORT_FUNC(int, _wopen, (const wchar_t* Filename, int OpenFlag, ...)) {
  if (OpenFlag & _O_CREAT) {
    va_list VA;
    int PermMode;
    va_start(VA, OpenFlag);
    PermMode = va_arg(VA, int);
    va_end(VA);
    return _wsopen(Filename, OpenFlag, _SH_DENYNO, PermMode);
  }
  return _wsopen(Filename, OpenFlag, _SH_DENYNO);
}

DLLEXPORT_FUNC(int, _sopen, (const char* Filename, int OpenFlag, int ShareFlag, ...)) {
  UNICODE_STRING FilenameW;
  if (!RtlCreateUnicodeStringFromAsciiz(&FilenameW, Filename)) {
    return ErrnoReturn(EINVAL);
  }
  int ret = 0;
  if (OpenFlag & _O_CREAT) {
    va_list VA;
    int PermMode;
    va_start(VA, ShareFlag);
    PermMode = va_arg(VA, int);
    va_end(VA);
    ret = _wopen(FilenameW.Buffer, OpenFlag, ShareFlag, PermMode);
  } else {
    ret = _wopen(FilenameW.Buffer, OpenFlag, ShareFlag);
  }
  RtlFreeUnicodeString(&FilenameW);
  return ret;
}

DLLEXPORT_FUNC(int, _open, (const char* Filename, int OpenFlag, ...)) {
  if (OpenFlag & _O_CREAT) {
    va_list VA;
    int PermMode;
    va_start(VA, OpenFlag);
    PermMode = va_arg(VA, int);
    va_end(VA);
    return _sopen(Filename, OpenFlag, _SH_DENYNO, PermMode);
  }
  return _sopen(Filename, OpenFlag, _SH_DENYNO);
}

DLLEXPORT_FUNC(int, open, (const char* Filename, int OpenFlag, ...)) {
  if (OpenFlag & _O_CREAT) {
    va_list VA;
    int PermMode;
    va_start(VA, OpenFlag);
    PermMode = va_arg(VA, int);
    va_end(VA);
    return _open(Filename, OpenFlag, PermMode);
  }
  return _open(Filename, OpenFlag);
}

DLLEXPORT_FUNC(int, _close, (int FileHandle)) {
  RemoveFile(FileHandle);
  return 0;
}

int close(int FileHandle) {
  return _close(FileHandle);
}

int64_t _lseeki64(int FileHandle, int64_t Offset, int Origin) {
  LARGE_INTEGER Res;
  SetFilePointerEx(GetFile(FileHandle)->Handle, LARGE_INTEGER {.QuadPart = Offset}, &Res, OriginToMoveMethod(Origin));
  return Res.QuadPart;
}

DLLEXPORT_FUNC(long, _lseek, (int FileHandle, long Offset, int Origin)) {
  LARGE_INTEGER Res;
  SetFilePointerEx(GetFile(FileHandle)->Handle, LARGE_INTEGER {.QuadPart = Offset}, &Res, OriginToMoveMethod(Origin));
  return Res.QuadPart;
}

long lseek(int FileHandle, long Offset, int Origin) {
  return _lseek(FileHandle, Offset, Origin);
}

int64_t _telli64(int FileHandle) {
  LARGE_INTEGER Res;
  SetFilePointerEx(GetFile(FileHandle)->Handle, LARGE_INTEGER {}, &Res, FILE_CURRENT);
  return Res.QuadPart;
}

DLLEXPORT_FUNC(int, _read, (int FileHandle, void* DstBuf, unsigned int MaxCharCount)) {
  DWORD Read;
  ReadFile(GetFile(FileHandle)->Handle, DstBuf, MaxCharCount, &Read, nullptr);
  return static_cast<int>(Read);
}

int read(int FileHandle, void* DstBuf, unsigned int MaxCharCount) {
  return _read(FileHandle, DstBuf, MaxCharCount);
}

DLLEXPORT_FUNC(int, _write, (int FileHandle, const void* Buf, unsigned int MaxCharCount)) {
  DWORD Written;
  FILE* File = GetFile(FileHandle);
  if (File->Append) {
    SetFilePointerEx(File->Handle, LARGE_INTEGER {}, nullptr, FILE_END);
  }
  WriteFile(File->Handle, Buf, MaxCharCount, &Written, nullptr);
  return static_cast<int>(Written);
}

int write(int FileHandle, const void* Buf, unsigned int MaxCharCount) {
  return _write(FileHandle, Buf, MaxCharCount);
}

DLLEXPORT_FUNC(int, _isatty, (int _FileHandle)) {
  return 0;
}

DLLEXPORT_FUNC(intptr_t, _get_osfhandle, (int _FileHandle)) {
  UNIMPLEMENTED();
}

namespace {
template<typename TStr, typename TChar, TStr (*StrchrFunc)(TStr, TChar)>
int ModeToOpenFlag(TStr Mode) {
  int OpenFlag = 0;
  if (StrchrFunc(Mode, 'a')) {
    OpenFlag |= _O_RDWR | _O_CREAT | _O_APPEND;
  } else if (StrchrFunc(Mode, 'r')) {
    if (StrchrFunc(Mode, '+')) {
      OpenFlag |= _O_RDWR;
    } else {
      OpenFlag |= _O_RDONLY;
    }
  } else {
    OpenFlag |= _O_RDWR | _O_CREAT | _O_TRUNC;
  }
  if (StrchrFunc(Mode, 'x')) {
    OpenFlag |= _O_EXCL;
  }
  return OpenFlag;
}
} // namespace

// stdio.h File Operations
DLLEXPORT_FUNC(FILE*, _wfopen, (const wchar_t* __restrict__ Filename, const wchar_t* __restrict__ Mode)) {
  int OpenFlag = ModeToOpenFlag<const wchar_t*, wchar_t, &wcschr>(Mode);
  int Ret = _wopen(Filename, OpenFlag, _S_IWRITE | _S_IREAD);
  if (Ret == -1) {
    return nullptr;
  }
  return GetFile(Ret);
}

FILE* fopen(const char* __restrict__ Filename, const char* __restrict__ Mode) {
  int OpenFlag = ModeToOpenFlag<const char*, int, &strchr>(Mode);
  int Ret = _open(Filename, OpenFlag, _S_IWRITE | _S_IREAD);
  if (Ret == -1) {
    return nullptr;
  }
  return GetFile(Ret);
}

FILE* fdopen(int _FileHandle, const char* _Mode) {
  UNIMPLEMENTED();
}

int fclose(FILE* File) {
  RemoveFile(File->FileHandle);
  return 0;
}

DLLEXPORT_FUNC(int, _fseeki64, (FILE * File, _off64_t Offset, int Origin)) {
  SetFilePointerEx(File->Handle, LARGE_INTEGER {.QuadPart = Offset}, nullptr, OriginToMoveMethod(Origin));
  return 0;
}

int fseek(FILE* File, long Offset, int Origin) {
  return _fseeki64(File, Offset, Origin);
}

DLLEXPORT_FUNC(_off64_t, _ftelli64, (FILE * File)) {
  LARGE_INTEGER Res;
  SetFilePointerEx(File->Handle, LARGE_INTEGER {}, &Res, FILE_CURRENT);
  return Res.QuadPart;
}

long ftell(FILE* File) {
  return static_cast<long>(_ftelli64(File));
}

size_t fread(void* __restrict__ DstBuf, size_t ElementSize, size_t Count, FILE* __restrict__ File) {
  DWORD Read;
  ReadFile(File->Handle, DstBuf, ElementSize * Count, &Read, nullptr);
  return static_cast<size_t>(Read);
}

size_t fwrite(const void* __restrict__ Str, size_t Size, size_t Count, FILE* __restrict__ File) {
  DWORD Written;
  if (File->Append) {
    SetFilePointerEx(File->Handle, LARGE_INTEGER {}, nullptr, FILE_END);
  }
  WriteFile(File->Handle, Str, Size * Count, &Written, nullptr);
  return static_cast<size_t>(Written);
}

void setbuf(FILE* __restrict__ _File, char* __restrict__ _Buffer) {
  UNIMPLEMENTED();
}

int fflush(FILE* _File) {
  UNIMPLEMENTED();
}

int fprintf(FILE* __restrict__, const char* __restrict__, ...) {
  UNIMPLEMENTED();
}

int vfprintf(FILE* __restrict__, const char* __restrict__, va_list) {
  UNIMPLEMENTED();
}

int ungetc(int _Ch, FILE* _File) {
  UNIMPLEMENTED();
}

wint_t fgetwc(FILE* _File) {
  UNIMPLEMENTED();
}

wint_t fputwc(wchar_t _Ch, FILE* _File) {
  UNIMPLEMENTED();
}

int fputc(int _Ch, FILE* _File) {
  UNIMPLEMENTED();
}

int fputs(const char* __restrict__ _Str, FILE* __restrict__ _File) {
  UNIMPLEMENTED();
}

int getc(FILE* _File) {
  UNIMPLEMENTED();
}

void _lock_file(FILE* _File) {
  UNIMPLEMENTED();
}

wint_t ungetwc(wint_t _Ch, FILE* _File) {
  UNIMPLEMENTED();
}

void _unlock_file(FILE* _File) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(FILE*, __acrt_iob_func, (unsigned index)) {
  return nullptr;
}

DLLEXPORT_FUNC(int, _fileno, (FILE * _File)) {
  UNIMPLEMENTED();
}

int access(const char* Path, int AccessMode) {
  UNICODE_STRING PathW;
  if (!RtlCreateUnicodeStringFromAsciiz(&PathW, Path)) {
    return ErrnoReturn(EINVAL);
  }

  UNICODE_STRING NTPath;
  bool Success = RtlDosPathNameToNtPathName_U(PathW.Buffer, &NTPath, nullptr, nullptr);
  RtlFreeUnicodeString(&PathW);
  if (!Success) {
    return ErrnoReturn(EINVAL);
  }

  OBJECT_ATTRIBUTES ObjAttributes;
  InitializeObjectAttributes(&ObjAttributes, &NTPath, OBJ_CASE_INSENSITIVE, nullptr, nullptr);

  FILE_BASIC_INFORMATION Info;
  Success = !NtQueryAttributesFile(&ObjAttributes, &Info);
  RtlFreeUnicodeString(&NTPath);

  if (!Success) {
    return ErrnoReturn(ENOENT);
  }

  if ((AccessMode & W_OK) && (Info.FileAttributes & FILE_ATTRIBUTE_READONLY)) {
    return ErrnoReturn(EACCES);
  }

  return 0;
}

int rename(const char* _OldFilename, const char* _NewFilename) {
  UNIMPLEMENTED();
}


================================================
FILE: Source/Windows/Common/CRT/Math.cpp
================================================
// SPDX-License-Identifier: MIT
#define _SECIMP
#define _CRTIMP
#include <cstdlib>
#include <cstdint>
#include <cmath>

long double tanl(long double X) {
  return tan(static_cast<double>(X));
}

long double sinl(long double X) {
  return sin(static_cast<double>(X));
}

long double cosl(long double X) {
  return cos(static_cast<double>(X));
}

long double exp2l(long double N) {
  return exp2(static_cast<double>(N));
}

long double log2l(long double N) {
  return log2(static_cast<double>(N));
}

long double atan2l(long double X, long double Y) {
  return atan2(static_cast<double>(X), static_cast<double>(Y));
}


================================================
FILE: Source/Windows/Common/CRT/Misc.cpp
================================================
// SPDX-License-Identifier: MIT
#define _SECIMP
#define _CRTIMP
#include <cstdlib>
#include <cstdint>
#include <unistd.h>
#include <wchar.h>
#include <windef.h>
#include <winternl.h>
#include <winbase.h>
#include "../Priv.h"

namespace {
char* Env;
char** EnvArray;
} // namespace

namespace {
void InitEnv() {
  RtlAcquirePebLock();
  auto ProcessParams = reinterpret_cast<RTL_USER_PROCESS_PARAMETERS64*>(NtCurrentTeb()->ProcessEnvironmentBlock->ProcessParameters);
  wchar_t* EnvW = reinterpret_cast<wchar_t*>(ProcessParams->Environment);
  DWORD SizeW = 4;
  // The PEB environment is terminated by two null wchars.
  for (wchar_t* It = EnvW; It[0] != 0 || It[1] != 0; It++, SizeW += 2)
    ;
  DWORD Size;
  RtlUnicodeToMultiByteSize(&Size, EnvW, SizeW);
  Env = reinterpret_cast<char*>(RtlAllocateHeap(GetProcessHeap(), 0, Size + 1));
  RtlUnicodeToMultiByteN(Env, Size + 1, nullptr, EnvW, SizeW);

  size_t EnvCount = 0;
  char* It = Env;
  while (*It) {
    EnvCount++;
    It += strlen(It) + 1;
  }

  EnvArray = reinterpret_cast<char**>(RtlAllocateHeap(GetProcessHeap(), 0, (EnvCount + 1) * sizeof(char*)));

  It = Env;
  for (size_t i = 0; i < EnvCount; i++) {
    EnvArray[i] = It;
    It += strlen(It) + 1;
  }
  EnvArray[EnvCount] = nullptr;

  RtlReleasePebLock();
}

__attribute__((used, section(".CRT$FEXB"))) void (*_InitEnv)(void) = InitEnv;
} // namespace

char*** __p__environ() {
  return &EnvArray;
}

char* getenv(const char* VarName) {
  size_t VarNameLen = strlen(VarName);
  char* It = Env;
  char* Ret = nullptr;

  while (*It) {
    char* Eq = strchr(It, '=');
    if (Eq && Eq - It == VarNameLen && !strncmp(It, VarName, VarNameLen)) {
      Ret = Eq + 1;
      break;
    }

    It += strlen(It) + 1;
  }

  return Ret;
}

int atexit(void (*)(void)) {
  return 0;
}

#pragma push_macro("abort")
#undef abort
void abort(void) {
  UNIMPLEMENTED();
}
#pragma pop_macro("abort")

int getpid(void) {
  return static_cast<int>(GetCurrentProcessId());
}

void exit(int _Code) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(void, _assert, (const char* message, const char* file, unsigned line)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(
  uintptr_t, _beginthreadex,
  (void* security, unsigned stack_size, unsigned(__stdcall* start_address)(void*), void* arglist, unsigned initflag, unsigned* thrdaddr)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int*, __sys_nerr, (void)) {
  UNIMPLEMENTED();
}


================================================
FILE: Source/Windows/Common/CRT/Musl/CMakeLists.txt
================================================
target_sources(CommonWindowsRuntime PRIVATE exp2.c log2_data.c remainder.c strtoimax.c strtoull.c exp_data.c fmod.c log2.c isnan.c remquo.c strtoll.c strtoumax.c __math_uflow.c __math_oflow.c __math_xflow.c __math_invalid.c __math_divzero.c)


================================================
FILE: Source/Windows/Common/CRT/Musl/__math_divzero.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include "libm.h"

double __math_divzero(uint32_t sign) {
  return fp_barrier(sign ? -1.0 : 1.0) / 0.0;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/__math_invalid.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include "libm.h"

double __math_invalid(double x) {
  return (x - x) / (x - x);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/__math_oflow.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include "libm.h"

double __math_oflow(uint32_t sign) {
  return __math_xflow(sign, 0x1p769);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/__math_uflow.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include "libm.h"

double __math_uflow(uint32_t sign) {
  return __math_xflow(sign, 0x1p-767);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/__math_xflow.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include "libm.h"

double __math_xflow(uint32_t sign, double y) {
  return eval_as_double(fp_barrier(sign ? -y : y) * y);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/exp2.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "exp_data.h"

#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]

/* Handle cases that may overflow or underflow when computing the result that
   is scale*(1+TMP) without intermediate rounding.  The bit representation of
   scale is in SBITS, however it has a computed exponent that may have
   overflown into the sign bit so that needs to be adjusted before using it as
   a double.  (int32_t)KI is the k used in the argument reduction and exponent
   adjustment of scale, positive k here means the result may overflow and
   negative k means the result may underflow.  */
static inline double specialcase(double_t tmp, uint64_t sbits, uint64_t ki) {
  double_t scale, y;

  if ((ki & 0x80000000) == 0) {
    /* k > 0, the exponent of scale might have overflowed by 1.  */
    sbits -= 1ull << 52;
    scale = asdouble(sbits);
    y = 2 * (scale + scale * tmp);
    return eval_as_double(y);
  }
  /* k < 0, need special care in the subnormal range.  */
  sbits += 1022ull << 52;
  scale = asdouble(sbits);
  y = scale + scale * tmp;
  if (y < 1.0) {
    /* Round y to the right precision before scaling it into the subnormal
       range to avoid double rounding that can cause 0.5+E/2 ulp error where
       E is the worst-case ulp error outside the subnormal range.  So this
       is only useful if the goal is better than 1 ulp worst-case error.  */
    double_t hi, lo;
    lo = scale - y + scale * tmp;
    hi = 1.0 + y;
    lo = 1.0 - hi + y + lo;
    y = eval_as_double(hi + lo) - 1.0;
    /* Avoid -0.0 with downward rounding.  */
    if (WANT_ROUNDING && y == 0.0) {
      y = 0.0;
    }
    /* The underflow exception needs to be signaled explicitly.  */
    fp_force_eval(fp_barrier(0x1p-1022) * 0x1p-1022);
  }
  y = 0x1p-1022 * y;
  return eval_as_double(y);
}

/* Top 12 bits of a double (sign and exponent bits).  */
static inline uint32_t top12(double x) {
  return asuint64(x) >> 52;
}

double exp2(double x) {
  uint32_t abstop;
  uint64_t ki, idx, top, sbits;
  double_t kd, r, r2, scale, tail, tmp;

  abstop = top12(x) & 0x7ff;
  if (predict_false(abstop - top12(0x1p-54) >= top12(512.0) - top12(0x1p-54))) {
    if (abstop - top12(0x1p-54) >= 0x80000000) {
      /* Avoid spurious underflow for tiny x.  */
      /* Note: 0 is common input.  */
      return WANT_ROUNDING ? 1.0 + x : 1.0;
    }
    if (abstop >= top12(1024.0)) {
      if (asuint64(x) == asuint64(-INFINITY)) {
        return 0.0;
      }
      if (abstop >= top12(INFINITY)) {
        return 1.0 + x;
      }
      if (!(asuint64(x) >> 63)) {
        return __math_oflow(0);
      } else if (asuint64(x) >= asuint64(-1075.0)) {
        return __math_uflow(0);
      }
    }
    if (2 * asuint64(x) > 2 * asuint64(928.0)) {
      /* Large x is special cased below.  */
      abstop = 0;
    }
  }

  /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
  /* x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
  kd = eval_as_double(x + Shift);
  ki = asuint64(kd); /* k.  */
  kd -= Shift;       /* k/N for int k.  */
  r = x - kd;
  /* 2^(k/N) ~= scale * (1 + tail).  */
  idx = 2 * (ki % N);
  top = ki << (52 - EXP_TABLE_BITS);
  tail = asdouble(T[idx]);
  /* This is only a valid scale when -1023*N < k < 1024*N.  */
  sbits = T[idx + 1] + top;
  /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
  /* Evaluation is optimized assuming superscalar pipelined execution.  */
  r2 = r * r;
  /* Without fma the worst case error is 0.5/N ulp larger.  */
  /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
  tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
  if (predict_false(abstop == 0)) {
    return specialcase(tmp, sbits, ki);
  }
  scale = asdouble(sbits);
  /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
     is no spurious underflow here even without fma.  */
  return eval_as_double(scale + scale * tmp);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/exp_data.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#include "exp_data.h"

#define N (1 << EXP_TABLE_BITS)

const struct exp_data __exp_data = {
  // N/ln2
  .invln2N = 0x1.71547652b82fep0 * N,
  // -ln2/N
  .negln2hiN = -0x1.62e42fefa0000p-8,
  .negln2loN = -0x1.cf79abc9e3b3ap-47,
// Used for rounding when !TOINT_INTRINSICS
#if EXP_USE_TOINT_NARROW
  .shift = 0x1800000000.8p0,
#else
  .shift = 0x1.8p52,
#endif
  // exp polynomial coefficients.
  .poly =
    {
      // abs error: 1.555*2^-66
      // ulp error: 0.509 (0.511 without fma)
      // if |x| < ln2/256+eps
      // abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
      // abs error if |x| < ln2/128: 1.7145*2^-56
      0x1.ffffffffffdbdp-2,
      0x1.555555555543cp-3,
      0x1.55555cf172b91p-5,
      0x1.1111167a4d017p-7,
    },
  .exp2_shift = 0x1.8p52 / N,
  // exp2 polynomial coefficients.
  .exp2_poly =
    {
      // abs error: 1.2195*2^-65
      // ulp error: 0.507 (0.511 without fma)
      // if |x| < 1/256
      // abs error if |x| < 1/128: 1.9941*2^-56
      0x1.62e42fefa39efp-1,
      0x1.ebfbdff82c424p-3,
      0x1.c6b08d70cf4b5p-5,
      0x1.3b2abd24650ccp-7,
      0x1.5d7e09b4e3a84p-10,
    },
  // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
  // tab[2*k] = asuint64(T[k])
  // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
  .tab =
    {
      0x0,
      0x3ff0000000000000,
      0x3c9b3b4f1a88bf6e,
      0x3feff63da9fb3335,
      0xbc7160139cd8dc5d,
      0x3fefec9a3e778061,
      0xbc905e7a108766d1,
      0x3fefe315e86e7f85,
      0x3c8cd2523567f613,
      0x3fefd9b0d3158574,
      0xbc8bce8023f98efa,
      0x3fefd06b29ddf6de,
      0x3c60f74e61e6c861,
      0x3fefc74518759bc8,
      0x3c90a3e45b33d399,
      0x3fefbe3ecac6f383,
      0x3c979aa65d837b6d,
      0x3fefb5586cf9890f,
      0x3c8eb51a92fdeffc,
      0x3fefac922b7247f7,
      0x3c3ebe3d702f9cd1,
      0x3fefa3ec32d3d1a2,
      0xbc6a033489906e0b,
      0x3fef9b66affed31b,
      0xbc9556522a2fbd0e,
      0x3fef9301d0125b51,
      0xbc5080ef8c4eea55,
      0x3fef8abdc06c31cc,
      0xbc91c923b9d5f416,
      0x3fef829aaea92de0,
      0x3c80d3e3e95c55af,
      0x3fef7a98c8a58e51,
      0xbc801b15eaa59348,
      0x3fef72b83c7d517b,
      0xbc8f1ff055de323d,
      0x3fef6af9388c8dea,
      0x3c8b898c3f1353bf,
      0x3fef635beb6fcb75,
      0xbc96d99c7611eb26,
      0x3fef5be084045cd4,
      0x3c9aecf73e3a2f60,
      0x3fef54873168b9aa,
      0xbc8fe782cb86389d,
      0x3fef4d5022fcd91d,
      0x3c8a6f4144a6c38d,
      0x3fef463b88628cd6,
      0x3c807a05b0e4047d,
      0x3fef3f49917ddc96,
      0x3c968efde3a8a894,
      0x3fef387a6e756238,
      0x3c875e18f274487d,
      0x3fef31ce4fb2a63f,
      0x3c80472b981fe7f2,
      0x3fef2b4565e27cdd,
      0xbc96b87b3f71085e,
      0x3fef24dfe1f56381,
      0x3c82f7e16d09ab31,
      0x3fef1e9df51fdee1,
      0xbc3d219b1a6fbffa,
      0x3fef187fd0dad990,
      0x3c8b3782720c0ab4,
      0x3fef1285a6e4030b,
      0x3c6e149289cecb8f,
      0x3fef0cafa93e2f56,
      0x3c834d754db0abb6,
      0x3fef06fe0a31b715,
      0x3c864201e2ac744c,
      0x3fef0170fc4cd831,
      0x3c8fdd395dd3f84a,
      0x3feefc08b26416ff,
      0xbc86a3803b8e5b04,
      0x3feef6c55f929ff1,
      0xbc924aedcc4b5068,
      0x3feef1a7373aa9cb,
      0xbc9907f81b512d8e,
      0x3feeecae6d05d866,
      0xbc71d1e83e9436d2,
      0x3feee7db34e59ff7,
      0xbc991919b3ce1b15,
      0x3feee32dc313a8e5,
      0x3c859f48a72a4c6d,
      0x3feedea64c123422,
      0xbc9312607a28698a,
      0x3feeda4504ac801c,
      0xbc58a78f4817895b,
      0x3feed60a21f72e2a,
      0xbc7c2c9b67499a1b,
      0x3feed1f5d950a897,
      0x3c4363ed60c2ac11,
      0x3feece086061892d,
      0x3c9666093b0664ef,
      0x3feeca41ed1d0057,
      0x3c6ecce1daa10379,
      0x3feec6a2b5c13cd0,
      0x3c93ff8e3f0f1230,
      0x3feec32af0d7d3de,
      0x3c7690cebb7aafb0,
      0x3feebfdad5362a27,
      0x3c931dbdeb54e077,
      0x3feebcb299fddd0d,
      0xbc8f94340071a38e,
      0x3feeb9b2769d2ca7,
      0xbc87deccdc93a349,
      0x3feeb6daa2cf6642,
      0xbc78dec6bd0f385f,
      0x3feeb42b569d4f82,
      0xbc861246ec7b5cf6,
      0x3feeb1a4ca5d920f,
      0x3c93350518fdd78e,
      0x3feeaf4736b527da,
      0x3c7b98b72f8a9b05,
      0x3feead12d497c7fd,
      0x3c9063e1e21c5409,
      0x3feeab07dd485429,
      0x3c34c7855019c6ea,
      0x3feea9268a5946b7,
      0x3c9432e62b64c035,
      0x3feea76f15ad2148,
      0xbc8ce44a6199769f,
      0x3feea5e1b976dc09,
      0xbc8c33c53bef4da8,
      0x3feea47eb03a5585,
      0xbc845378892be9ae,
      0x3feea34634ccc320,
      0xbc93cedd78565858,
      0x3feea23882552225,
      0x3c5710aa807e1964,
      0x3feea155d44ca973,
      0xbc93b3efbf5e2228,
      0x3feea09e667f3bcd,
      0xbc6a12ad8734b982,
      0x3feea012750bdabf,
      0xbc6367efb86da9ee,
      0x3fee9fb23c651a2f,
      0xbc80dc3d54e08851,
      0x3fee9f7df9519484,
      0xbc781f647e5a3ecf,
      0x3fee9f75e8ec5f74,
      0xbc86ee4ac08b7db0,
      0x3fee9f9a48a58174,
      0xbc8619321e55e68a,
      0x3fee9feb564267c9,
      0x3c909ccb5e09d4d3,
      0x3feea0694fde5d3f,
      0xbc7b32dcb94da51d,
      0x3feea11473eb0187,
      0x3c94ecfd5467c06b,
      0x3feea1ed0130c132,
      0x3c65ebe1abd66c55,
      0x3feea2f336cf4e62,
      0xbc88a1c52fb3cf42,
      0x3feea427543e1a12,
      0xbc9369b6f13b3734,
      0x3feea589994cce13,
      0xbc805e843a19ff1e,
      0x3feea71a4623c7ad,
      0xbc94d450d872576e,
      0x3feea8d99b4492ed,
      0x3c90ad675b0e8a00,
      0x3feeaac7d98a6699,
      0x3c8db72fc1f0eab4,
      0x3feeace5422aa0db,
      0xbc65b6609cc5e7ff,
      0x3feeaf3216b5448c,
      0x3c7bf68359f35f44,
      0x3feeb1ae99157736,
      0xbc93091fa71e3d83,
      0x3feeb45b0b91ffc6,
      0xbc5da9b88b6c1e29,
      0x3feeb737b0cdc5e5,
      0xbc6c23f97c90b959,
      0x3feeba44cbc8520f,
      0xbc92434322f4f9aa,
      0x3feebd829fde4e50,
      0xbc85ca6cd7668e4b,
      0x3feec0f170ca07ba,
      0x3c71affc2b91ce27,
      0x3feec49182a3f090,
      0x3c6dd235e10a73bb,
      0x3feec86319e32323,
      0xbc87c50422622263,
      0x3feecc667b5de565,
      0x3c8b1c86e3e231d5,
      0x3feed09bec4a2d33,
      0xbc91bbd1d3bcbb15,
      0x3feed503b23e255d,
      0x3c90cc319cee31d2,
      0x3feed99e1330b358,
      0x3c8469846e735ab3,
      0x3feede6b5579fdbf,
      0xbc82dfcd978e9db4,
      0x3feee36bbfd3f37a,
      0x3c8c1a7792cb3387,
      0x3feee89f995ad3ad,
      0xbc907b8f4ad1d9fa,
      0x3feeee07298db666,
      0xbc55c3d956dcaeba,
      0x3feef3a2b84f15fb,
      0xbc90a40e3da6f640,
      0x3feef9728de5593a,
      0xbc68d6f438ad9334,
      0x3feeff76f2fb5e47,
      0xbc91eee26b588a35,
      0x3fef05b030a1064a,
      0x3c74ffd70a5fddcd,
      0x3fef0c1e904bc1d2,
      0xbc91bdfbfa9298ac,
      0x3fef12c25bd71e09,
      0x3c736eae30af0cb3,
      0x3fef199bdd85529c,
      0x3c8ee3325c9ffd94,
      0x3fef20ab5fffd07a,
      0x3c84e08fd10959ac,
      0x3fef27f12e57d14b,
      0x3c63cdaf384e1a67,
      0x3fef2f6d9406e7b5,
      0x3c676b2c6c921968,
      0x3fef3720dcef9069,
      0xbc808a1883ccb5d2,
      0x3fef3f0b555dc3fa,
      0xbc8fad5d3ffffa6f,
      0x3fef472d4a07897c,
      0xbc900dae3875a949,
      0x3fef4f87080d89f2,
      0x3c74a385a63d07a7,
      0x3fef5818dcfba487,
      0xbc82919e2040220f,
      0x3fef60e316c98398,
      0x3c8e5a50d5c192ac,
      0x3fef69e603db3285,
      0x3c843a59ac016b4b,
      0x3fef7321f301b460,
      0xbc82d52107b43e1f,
      0x3fef7c97337b9b5f,
      0xbc892ab93b470dc9,
      0x3fef864614f5a129,
      0x3c74b604603a88d3,
      0x3fef902ee78b3ff6,
      0x3c83c5ec519d7271,
      0x3fef9a51fbc74c83,
      0xbc8ff7128fd391f0,
      0x3fefa4afa2a490da,
      0xbc8dae98e223747d,
      0x3fefaf482d8e67f1,
      0x3c8ec3bc41aa2008,
      0x3fefba1bee615a27,
      0x3c842b94c3a9eb32,
      0x3fefc52b376bba97,
      0x3c8a64a931d185ee,
      0x3fefd0765b6e4540,
      0xbc8e37bae43be3ed,
      0x3fefdbfdad9cbe14,
      0x3c77893b4d91cd9d,
      0x3fefe7c1819e90d8,
      0x3c5305c14160cc89,
      0x3feff3c22b8f71f1,
    },
};


================================================
FILE: Source/Windows/Common/CRT/Musl/exp_data.h
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#ifndef _EXP_DATA_H
#define _EXP_DATA_H

#include <stdint.h>

#define EXP_TABLE_BITS 7
#define EXP_POLY_ORDER 5
#define EXP_USE_TOINT_NARROW 0
#define EXP2_POLY_ORDER 5
extern const struct exp_data {
  double invln2N;
  double shift;
  double negln2hiN;
  double negln2loN;
  double poly[4]; /* Last four coefficients.  */
  double exp2_shift;
  double exp2_poly[EXP2_POLY_ORDER];
  uint64_t tab[2 * (1 << EXP_TABLE_BITS)];
} __exp_data;

#endif


================================================
FILE: Source/Windows/Common/CRT/Musl/fmod.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include <math.h>
#include <stdint.h>

double fmod(double x, double y) {
  union {
    double f;
    uint64_t i;
  } ux = {x}, uy = {y};
  int ex = ux.i >> 52 & 0x7ff;
  int ey = uy.i >> 52 & 0x7ff;
  int sx = ux.i >> 63;
  uint64_t i;

  /* in the followings uxi should be ux.i, but then gcc wrongly adds */
  /* float load/store to inner loops ruining performance and code size */
  uint64_t uxi = ux.i;

  if (uy.i << 1 == 0 || isnan(y) || ex == 0x7ff) {
    return (x * y) / (x * y);
  }
  if (uxi << 1 <= uy.i << 1) {
    if (uxi << 1 == uy.i << 1) {
      return 0 * x;
    }
    return x;
  }

  /* normalize x and y */
  if (!ex) {
    for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1)
      ;
    uxi <<= -ex + 1;
  } else {
    uxi &= -1ULL >> 12;
    uxi |= 1ULL << 52;
  }
  if (!ey) {
    for (i = uy.i << 12; i >> 63 == 0; ey--, i <<= 1)
      ;
    uy.i <<= -ey + 1;
  } else {
    uy.i &= -1ULL >> 12;
    uy.i |= 1ULL << 52;
  }

  /* x mod y */
  for (; ex > ey; ex--) {
    i = uxi - uy.i;
    if (i >> 63 == 0) {
      if (i == 0) {
        return 0 * x;
      }
      uxi = i;
    }
    uxi <<= 1;
  }
  i = uxi - uy.i;
  if (i >> 63 == 0) {
    if (i == 0) {
      return 0 * x;
    }
    uxi = i;
  }
  for (; uxi >> 52 == 0; uxi <<= 1, ex--)
    ;

  /* scale result */
  if (ex > 0) {
    uxi -= 1ULL << 52;
    uxi |= (uint64_t)ex << 52;
  } else {
    uxi >>= -ex + 1;
  }
  uxi |= (uint64_t)sx << 63;
  ux.i = uxi;
  return ux.f;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/isnan.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

static unsigned long long __DOUBLE_BITS(double __f) {
  union {
    double __f;
    unsigned long long __i;
  } __u;
  __u.__f = __f;
  return __u.__i;
}


int __isnan(double x) {
  return (__DOUBLE_BITS(x) & -1ULL >> 1) > 0x7ffULL << 52;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/libm.h
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#ifndef _LIBM_H
#define _LIBM_H

#include <stdint.h>
#include <float.h>
#include <math.h>

#define hidden

#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
  long double f;
  struct {
    uint64_t m;
    uint16_t se;
  } i;
};
#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
/* This is the m68k variant of 80-bit long double, and this definition only works
 * on archs where the alignment requirement of uint64_t is <= 4. */
union ldshape {
  long double f;
  struct {
    uint16_t se;
    uint16_t pad;
    uint64_t m;
  } i;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __LITTLE_ENDIAN
union ldshape {
  long double f;
  struct {
    uint64_t lo;
    uint32_t mid;
    uint16_t top;
    uint16_t se;
  } i;
  struct {
    uint64_t lo;
    uint64_t hi;
  } i2;
};
#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384 && __BYTE_ORDER == __BIG_ENDIAN
union ldshape {
  long double f;
  struct {
    uint16_t se;
    uint16_t top;
    uint32_t mid;
    uint64_t lo;
  } i;
  struct {
    uint64_t hi;
    uint64_t lo;
  } i2;
};
#else
#error Unsupported long double representation
#endif

/* Support non-nearest rounding mode.  */
#define WANT_ROUNDING 1
/* Support signaling NaNs.  */
#define WANT_SNAN 0

#if WANT_SNAN
#error SNaN is unsupported
#else
#define issignalingf_inline(x) 0
#define issignaling_inline(x) 0
#endif

#ifndef TOINT_INTRINSICS
#define TOINT_INTRINSICS 0
#endif

#if TOINT_INTRINSICS
/* Round x to nearest int in all rounding modes, ties have to be rounded
   consistently with converttoint so the results match.  If the result
   would be outside of [-2^31, 2^31-1] then the semantics is unspecified.  */
static double_t roundtoint(double_t);

/* Convert x to nearest int in all rounding modes, ties have to be rounded
   consistently with roundtoint.  If the result is not representible in an
   int32_t then the semantics is unspecified.  */
static int32_t converttoint(double_t);
#endif

/* Helps static branch prediction so hot path can be better optimized.  */
#ifdef __GNUC__
#define predict_true(x) __builtin_expect(!!(x), 1)
#define predict_false(x) __builtin_expect(x, 0)
#else
#define predict_true(x) (x)
#define predict_false(x) (x)
#endif

/* Evaluate an expression as the specified type. With standard excess
   precision handling a type cast or assignment is enough (with
   -ffloat-store an assignment is required, in old compilers argument
   passing and return statement may not drop excess precision).  */

static inline float eval_as_float(float x) {
  float y = x;
  return y;
}

static inline double eval_as_double(double x) {
  double y = x;
  return y;
}

/* fp_barrier returns its input, but limits code transformations
   as if it had a side-effect (e.g. observable io) and returned
   an arbitrary value.  */

#ifndef fp_barrierf
#define fp_barrierf fp_barrierf
static inline float fp_barrierf(float x) {
  volatile float y = x;
  return y;
}
#endif

#ifndef fp_barrier
#define fp_barrier fp_barrier
static inline double fp_barrier(double x) {
  volatile double y = x;
  return y;
}
#endif

#ifndef fp_barrierl
#define fp_barrierl fp_barrierl
static inline long double fp_barrierl(long double x) {
  volatile long double y = x;
  return y;
}
#endif

/* fp_force_eval ensures that the input value is computed when that's
   otherwise unused.  To prevent the constant folding of the input
   expression, an additional fp_barrier may be needed or a compilation
   mode that does so (e.g. -frounding-math in gcc). Then it can be
   used to evaluate an expression for its fenv side-effects only.   */

#ifndef fp_force_evalf
#define fp_force_evalf fp_force_evalf
static inline void fp_force_evalf(float x) {
  volatile float y;
  y = x;
}
#endif

#ifndef fp_force_eval
#define fp_force_eval fp_force_eval
static inline void fp_force_eval(double x) {
  volatile double y;
  y = x;
}
#endif

#ifndef fp_force_evall
#define fp_force_evall fp_force_evall
static inline void fp_force_evall(long double x) {
  volatile long double y;
  y = x;
}
#endif

#define FORCE_EVAL(x)                         \
  do {                                        \
    if (sizeof(x) == sizeof(float)) {         \
      fp_force_evalf(x);                      \
    } else if (sizeof(x) == sizeof(double)) { \
      fp_force_eval(x);                       \
    } else {                                  \
      fp_force_evall(x);                      \
    }                                         \
  } while (0)

#define asuint(f) \
  ((union {       \
    float _f;     \
    uint32_t _i;  \
  }) {f})         \
    ._i
#define asfloat(i) \
  ((union {        \
    uint32_t _i;   \
    float _f;      \
  }) {i})          \
    ._f
#define asuint64(f) \
  ((union {         \
    double _f;      \
    uint64_t _i;    \
  }) {f})           \
    ._i
#define asdouble(i) \
  ((union {         \
    uint64_t _i;    \
    double _f;      \
  }) {i})           \
    ._f

#define EXTRACT_WORDS(hi, lo, d) \
  do {                           \
    uint64_t __u = asuint64(d);  \
    (hi) = __u >> 32;            \
    (lo) = (uint32_t)__u;        \
  } while (0)

#define GET_HIGH_WORD(hi, d)  \
  do {                        \
    (hi) = asuint64(d) >> 32; \
  } while (0)

#define GET_LOW_WORD(lo, d)       \
  do {                            \
    (lo) = (uint32_t)asuint64(d); \
  } while (0)

#define INSERT_WORDS(d, hi, lo)                              \
  do {                                                       \
    (d) = asdouble(((uint64_t)(hi) << 32) | (uint32_t)(lo)); \
  } while (0)

#define SET_HIGH_WORD(d, hi) INSERT_WORDS(d, hi, (uint32_t)asuint64(d))

#define SET_LOW_WORD(d, lo) INSERT_WORDS(d, asuint64(d) >> 32, lo)

#define GET_FLOAT_WORD(w, d) \
  do {                       \
    (w) = asuint(d);         \
  } while (0)

#define SET_FLOAT_WORD(d, w) \
  do {                       \
    (d) = asfloat(w);        \
  } while (0)

hidden int __rem_pio2_large(double*, double*, int, int, int);

hidden int __rem_pio2(double, double*);
hidden double __sin(double, double, int);
hidden double __cos(double, double);
hidden double __tan(double, double, int);
hidden double __expo2(double, double);

hidden int __rem_pio2f(float, double*);
hidden float __sindf(double);
hidden float __cosdf(double);
hidden float __tandf(double, int);
hidden float __expo2f(float, float);

hidden int __rem_pio2l(long double, long double*);
hidden long double __sinl(long double, long double, int);
hidden long double __cosl(long double, long double);
hidden long double __tanl(long double, long double, int);

hidden long double __polevll(long double, const long double*, int);
hidden long double __p1evll(long double, const long double*, int);

extern int __signgam;
hidden double __lgamma_r(double, int*);
hidden float __lgammaf_r(float, int*);

/* error handling functions */
hidden float __math_xflowf(uint32_t, float);
hidden float __math_uflowf(uint32_t);
hidden float __math_oflowf(uint32_t);
hidden float __math_divzerof(uint32_t);
hidden float __math_invalidf(float);
hidden double __math_xflow(uint32_t, double);
hidden double __math_uflow(uint32_t);
hidden double __math_oflow(uint32_t);
hidden double __math_divzero(uint32_t);
hidden double __math_invalid(double);
#if LDBL_MANT_DIG != DBL_MANT_DIG
hidden long double __math_invalidl(long double);
#endif

#endif


================================================
FILE: Source/Windows/Common/CRT/Musl/log2.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#include <math.h>
#include <stdint.h>
#include "libm.h"
#include "log2_data.h"

#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000

/* Top 16 bits of a double.  */
static inline uint32_t top16(double x) {
  return asuint64(x) >> 48;
}

double log2(double x) {
  double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
  uint64_t ix, iz, tmp;
  uint32_t top;
  int k, i;

  ix = asuint64(x);
  top = top16(x);
#define LO asuint64(1.0 - 0x1.5b51p-5)
#define HI asuint64(1.0 + 0x1.6ab2p-5)
  if (predict_false(ix - LO < HI - LO)) {
    /* Handle close to 1.0 inputs separately.  */
    /* Fix sign of zero with downward rounding when x==1.  */
    if (WANT_ROUNDING && predict_false(ix == asuint64(1.0))) {
      return 0;
    }
    r = x - 1.0;
#if __FP_FAST_FMA
    hi = r * InvLn2hi;
    lo = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -hi);
#else
    double_t rhi, rlo;
    rhi = asdouble(asuint64(r) & -1ULL << 32);
    rlo = r - rhi;
    hi = rhi * InvLn2hi;
    lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
    r2 = r * r; /* rounding error: 0x1p-62.  */
    r4 = r2 * r2;
    /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma).  */
    p = r2 * (B[0] + r * B[1]);
    y = hi + p;
    lo += hi - y + p;
    lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
    y += lo;
    return eval_as_double(y);
  }
  if (predict_false(top - 0x0010 >= 0x7ff0 - 0x0010)) {
    /* x < 0x1p-1022 or inf or nan.  */
    if (ix * 2 == 0) {
      return __math_divzero(1);
    }
    if (ix == asuint64(INFINITY)) { /* log(inf) == inf.  */
      return x;
    }
    if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) {
      return __math_invalid(x);
    }
    /* x is subnormal, normalize it.  */
    ix = asuint64(x * 0x1p52);
    ix -= 52ULL << 52;
  }

  /* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
     The range is split into N subintervals.
     The ith subinterval contains z and c is near its center.  */
  tmp = ix - OFF;
  i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
  k = (int64_t)tmp >> 52; /* arithmetic shift */
  iz = ix - (tmp & 0xfffULL << 52);
  invc = T[i].invc;
  logc = T[i].logc;
  z = asdouble(iz);
  kd = (double_t)k;

  /* log2(x) = log2(z/c) + log2(c) + k.  */
  /* r ~= z/c - 1, |r| < 1/(2*N).  */
#if __FP_FAST_FMA
  /* rounding error: 0x1p-55/N.  */
  r = __builtin_fma(z, invc, -1.0);
  t1 = r * InvLn2hi;
  t2 = r * InvLn2lo + __builtin_fma(r, InvLn2hi, -t1);
#else
  double_t rhi, rlo;
  /* rounding error: 0x1p-55/N + 0x1p-65.  */
  r = (z - T2[i].chi - T2[i].clo) * invc;
  rhi = asdouble(asuint64(r) & -1ULL << 32);
  rlo = r - rhi;
  t1 = rhi * InvLn2hi;
  t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif

  /* hi + lo = r/ln2 + log2(c) + k.  */
  t3 = kd + logc;
  hi = t3 + t1;
  lo = t3 - hi + t1 + t2;

  /* log2(r+1) = r/ln2 + r^2*poly(r).  */
  /* Evaluation is optimized assuming superscalar pipelined execution.  */
  r2 = r * r; /* rounding error: 0x1p-54/N^2.  */
  r4 = r2 * r2;
  /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
     ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma).  */
  p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
  y = lo + r2 * p + hi;
  return eval_as_double(y);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/log2_data.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#include "log2_data.h"

#define N (1 << LOG2_TABLE_BITS)

const struct log2_data __log2_data = {
  // First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
  .invln2hi = 0x1.7154765200000p+0,
  .invln2lo = 0x1.705fc2eefa200p-33,
  .poly1 =
    {
      // relative error: 0x1.2fad8188p-63
      // in -0x1.5b51p-5 0x1.6ab2p-5
      -0x1.71547652b82fep-1,
      0x1.ec709dc3a03f7p-2,
      -0x1.71547652b7c3fp-2,
      0x1.2776c50f05be4p-2,
      -0x1.ec709dd768fe5p-3,
      0x1.a61761ec4e736p-3,
      -0x1.7153fbc64a79bp-3,
      0x1.484d154f01b4ap-3,
      -0x1.289e4a72c383cp-3,
      0x1.0b32f285aee66p-3,
    },
  .poly =
    {
      // relative error: 0x1.a72c2bf8p-58
      // abs error: 0x1.67a552c8p-66
      // in -0x1.f45p-8 0x1.f45p-8
      -0x1.71547652b8339p-1,
      0x1.ec709dc3a04bep-2,
      -0x1.7154764702ffbp-2,
      0x1.2776c50034c48p-2,
      -0x1.ec7b328ea92bcp-3,
      0x1.a6225e117f92ep-3,
    },
  /* Algorithm:

          x = 2^k z
          log2(x) = k + log2(c) + log2(z/c)
          log2(z/c) = poly(z/c - 1)

  where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
  into the ith one, then table entries are computed as

          tab[i].invc = 1/c
          tab[i].logc = (double)log2(c)
          tab2[i].chi = (double)c
          tab2[i].clo = (double)(c - (double)c)

  where c is near the center of the subinterval and is chosen by trying +-2^29
  floating point invc candidates around 1/center and selecting one for which

          1) the rounding error in 0x1.8p10 + logc is 0,
          2) the rounding error in z - chi - clo is < 0x1p-64 and
          3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).

  Note: 1) ensures that k + logc can be computed without rounding error, 2)
  ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
  single rounding error when there is no fast fma for z*invc - 1, 3) ensures
  that logc + poly(z/c - 1) has small error, however near x == 1 when
  |log2(x)| < 0x1p-4, this is not enough so that is special cased.  */
  .tab =
    {
      {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
      {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
      {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2}, {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
      {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2}, {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
      {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2}, {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
      {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2}, {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
      {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2}, {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
      {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2}, {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
      {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2}, {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
      {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2}, {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
      {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3}, {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
      {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3}, {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
      {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3}, {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
      {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3}, {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
      {0x1.19453847f2200p+0, -0x1.162595afdc000p-3}, {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
      {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4}, {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
      {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4}, {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
      {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4}, {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
      {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5}, {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
      {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6}, {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
      {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},  {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
      {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},  {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
      {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},  {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
      {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},  {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
      {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},  {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
      {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},  {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
      {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},  {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
      {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},  {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
      {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},  {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
      {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},  {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
      {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},  {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
      {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},  {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
    },
#if !__FP_FAST_FMA
  .tab2 =
    {
      {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},  {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
      {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},  {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
      {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},  {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
      {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},  {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
      {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55}, {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
      {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},  {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
      {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56}, {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
      {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},  {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
      {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57}, {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
      {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55}, {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
      {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55}, {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
      {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},    {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
      {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},  {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
      {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56}, {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
      {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58}, {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
      {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},  {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
      {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},   {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
      {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55}, {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
      {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55}, {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
      {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},  {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
      {0x1.0200004292367p+0, 0x1.b7ff365324681p-54},  {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
      {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58}, {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
      {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55}, {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
      {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},  {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
      {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55}, {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
      {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56}, {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
      {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},  {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
      {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},  {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
      {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},  {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
      {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55}, {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
      {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},  {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
      {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},  {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
    },
#endif
};


================================================
FILE: Source/Windows/Common/CRT/Musl/log2_data.h
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright (c) 2018, Arm Limited.

#ifndef _LOG2_DATA_H
#define _LOG2_DATA_H

#define LOG2_TABLE_BITS 6
#define LOG2_POLY_ORDER 7
#define LOG2_POLY1_ORDER 11
extern const struct log2_data {
  double invln2hi;
  double invln2lo;
  double poly[LOG2_POLY_ORDER - 1];
  double poly1[LOG2_POLY1_ORDER - 1];
  struct {
    double invc, logc;
  } tab[1 << LOG2_TABLE_BITS];
#if !__FP_FAST_FMA
  struct {
    double chi, clo;
  } tab2[1 << LOG2_TABLE_BITS];
#endif
} __log2_data;

#endif


================================================
FILE: Source/Windows/Common/CRT/Musl/remainder.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include <math.h>

double remainder(double x, double y) {
  int q;
  return remquo(x, y, &q);
}


================================================
FILE: Source/Windows/Common/CRT/Musl/remquo.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2020 Rich Felker, et al.

#include <math.h>
#include <stdint.h>

double remquo(double x, double y, int* quo) {
  union {
    double f;
    uint64_t i;
  } ux = {x}, uy = {y};
  int ex = ux.i >> 52 & 0x7ff;
  int ey = uy.i >> 52 & 0x7ff;
  int sx = ux.i >> 63;
  int sy = uy.i >> 63;
  uint32_t q;
  uint64_t i;
  uint64_t uxi = ux.i;

  *quo = 0;
  if (uy.i << 1 == 0 || isnan(y) || ex == 0x7ff) {
    return (x * y) / (x * y);
  }
  if (ux.i << 1 == 0) {
    return x;
  }

  /* normalize x and y */
  if (!ex) {
    for (i = uxi << 12; i >> 63 == 0; ex--, i <<= 1)
      ;
    uxi <<= -ex + 1;
  } else {
    uxi &= -1ULL >> 12;
    uxi |= 1ULL << 52;
  }
  if (!ey) {
    for (i = uy.i << 12; i >> 63 == 0; ey--, i <<= 1)
      ;
    uy.i <<= -ey + 1;
  } else {
    uy.i &= -1ULL >> 12;
    uy.i |= 1ULL << 52;
  }

  q = 0;
  if (ex < ey) {
    if (ex + 1 == ey) {
      goto end;
    }
    return x;
  }

  /* x mod y */
  for (; ex > ey; ex--) {
    i = uxi - uy.i;
    if (i >> 63 == 0) {
      uxi = i;
      q++;
    }
    uxi <<= 1;
    q <<= 1;
  }
  i = uxi - uy.i;
  if (i >> 63 == 0) {
    uxi = i;
    q++;
  }
  if (uxi == 0) {
    ex = -60;
  } else {
    for (; uxi >> 52 == 0; uxi <<= 1, ex--)
      ;
  }
end:
  /* scale result and decide between |x| and |x|-|y| */
  if (ex > 0) {
    uxi -= 1ULL << 52;
    uxi |= (uint64_t)ex << 52;
  } else {
    uxi >>= -ex + 1;
  }
  ux.i = uxi;
  x = ux.f;
  if (sy) {
    y = -y;
  }
  if (ex == ey || (ex + 1 == ey && (2 * x > y || (2 * x == y && q % 2)))) {
    x -= y;
    q++;
  }
  q &= 0x7fffffff;
  *quo = sx ^ sy ? -(int)q : (int)q;
  return sx ? -x : x;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/strtoimax.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2011 Rich Felker, et al.
// NOTE: From an older musl release that avoids stdio usage

#include <inttypes.h>
#include <errno.h>
#include <ctype.h>

intmax_t strtoimax(const char* s1, char** p, int base) {
  const unsigned char* s = s1;
  int sign = 0;
  uintmax_t x;

  /* Initial whitespace */
  for (; isspace(*s); s++)
    ;

  /* Optional sign */
  if (*s == '-') {
    sign = *s++;
  } else if (*s == '+') {
    s++;
  }

  x = strtoumax(s, p, base);
  if (x > INTMAX_MAX) {
    if (!sign || -x != INTMAX_MIN) {
      errno = ERANGE;
    }
    return sign ? INTMAX_MIN : INTMAX_MAX;
  }
  return sign ? -x : x;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/strtoll.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2011 Rich Felker, et al.
// NOTE: From an older musl release that avoids stdio usage

#include <stdlib.h>
#include <inttypes.h>
#include <errno.h>
#include <limits.h>

long long strtoll(const char* s, char** p, int base) {
  intmax_t x = strtoimax(s, p, base);
  if (x > LLONG_MAX) {
    errno = ERANGE;
    return LLONG_MAX;
  } else if (x < LLONG_MIN) {
    errno = ERANGE;
    return LLONG_MIN;
  }
  return x;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/strtoull.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2011 Rich Felker, et al.
// NOTE: From an older musl release that avoids stdio usage

#include <stdlib.h>
#include <inttypes.h>
#include <errno.h>
#include <limits.h>

unsigned long long strtoull(const char* s, char** p, int base) {
  uintmax_t x = strtoumax(s, p, base);
  if (x > ULLONG_MAX) {
    errno = ERANGE;
    return ULLONG_MAX;
  }
  return x;
}


================================================
FILE: Source/Windows/Common/CRT/Musl/strtoumax.c
================================================
// SPDX-License-Identifier: MIT
// SPDX-FileCopyrightText: Copyright © 2005-2011 Rich Felker, et al.
// NOTE: From an older musl release that avoids stdio usage

#include <inttypes.h>
#include <stdlib.h>
#include <errno.h>
#include <ctype.h>
#include <stdio.h>

/* Lookup table for digit values. -1==255>=36 -> invalid */
static const unsigned char digits[] = {
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  -1, -1, -1, -1, -1, -1,
  -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
  -1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
};

uintmax_t strtoumax(const char* s1, char** p, int base) {
  const unsigned char* s = s1;
  size_t x1, z1;
  uintmax_t x, z = 0;
  int sign = 0;
  int shift;

  if (!p) {
    p = (char**)&s1;
  }

  /* Initial whitespace */
  for (; isspace(*s); s++)
    ;

  /* Optional sign */
  if (*s == '-') {
    sign = *s++;
  } else if (*s == '+') {
    s++;
  }

  /* Default base 8, 10, or 16 depending on prefix */
  if (base == 0) {
    if (s[0] == '0') {
      if ((s[1] | 32) == 'x') {
        base = 16;
      } else {
        base = 8;
      }
    } else {
      base = 10;
    }
  }

  if ((unsigned)base - 2 > 36 - 2 || digits[*s] >= base) {
    *p = (char*)s1;
    errno = EINVAL;
    return 0;
  }

  /* Main loops. Only use big types if we have to. */
  if (base == 10) {
    for (x1 = 0; isdigit(*s) && x1 <= SIZE_MAX / 10 - 10; s++) {
      x1 = 10 * x1 + *s - '0';
    }
    for (x = x1; isdigit(*s) && x <= UINTMAX_MAX / 10 - 10; s++) {
      x = 10 * x + *s - '0';
    }
    if (isdigit(*s)) {
      if (isdigit(s[1]) || 10 * x > UINTMAX_MAX - (*s - '0')) {
        goto overflow;
      }
      x = 10 * x + *s - '0';
    }
  } else if (!(base & base / 2)) {
    if (base == 16) {
      if (s[0] == '0' && (s[1] | 32) == 'x' && digits[s[2]] < 16) {
        s += 2;
      }
      shift = 4;
      z1 = SIZE_MAX / 16;
      z = UINTMAX_MAX / 16;
    } else if (base == 8) {
      shift = 3;
      z1 = SIZE_MAX / 8;
      z = UINTMAX_MAX / 8;
    } else if (base == 2) {
      shift = 1;
      z1 = SIZE_MAX / 2;
      z = UINTMAX_MAX / 2;
    } else if (base == 4) {
      shift = 2;
      z1 = SIZE_MAX / 4;
      z = UINTMAX_MAX / 4;
    } else /* if (base == 32) */ {
      shift = 5;
      z1 = SIZE_MAX / 32;
      z = UINTMAX_MAX / 32;
    }
    for (x1 = 0; digits[*s] < base && x1 <= z1; s++) {
      x1 = (x1 << shift) + digits[*s];
    }
    for (x = x1; digits[*s] < base && x <= z; s++) {
      x = (x << shift) + digits[*s];
    }
    if (digits[*s] < base) {
      goto overflow;
    }
  } else {
    z1 = SIZE_MAX / base - base;
    for (x1 = 0; digits[*s] < base && x1 <= z1; s++) {
      x1 = x1 * base + digits[*s];
    }
    if (digits[*s] < base) {
      z = UINTMAX_MAX / base - base;
    }
    for (x = x1; digits[*s] < base && x <= z; s++) {
      x = x * base + digits[*s];
    }
    if (digits[*s] < base) {
      if (digits[s[1]] < base || x * base > UINTMAX_MAX - digits[*s]) {
        goto overflow;
      }
      x = x * base + digits[*s];
    }
  }

  *p = (char*)s;
  return sign ? -x : x;

overflow:
  for (; digits[*s] < base; s++)
    ;
  *p = (char*)s;
  errno = ERANGE;
  return UINTMAX_MAX;
}


================================================
FILE: Source/Windows/Common/CRT/String.cpp
================================================
// SPDX-License-Identifier: MIT
#define _SECIMP
#define _CRTIMP


#define vsprintf __ignore__vsprintf
#include <cstdint>
#include <cstdlib>
#include <cstdio>
#include <cerrno>
#include <unistd.h>
#include <ctype.h>
#include <locale.h>
#include <wchar.h>
#include <time.h>
#include "../Priv.h"
#undef vsprintf

extern "C" int __cdecl vsprintf(char* __restrict__ _Dest, const char* __restrict__ _Format, va_list _Args) __MINGW_ATTRIB_DEPRECATED_SEC_WARN;

static unsigned short CTypeData[256];
static char Locale[2] = "C";

DLLEXPORT_FUNC(char*, _strdup, (const char* Src)) {
  size_t Len = strlen(Src) + 1;
  char* Dst = reinterpret_cast<char*>(malloc(Len));
  memcpy(Dst, Src, Len);
  return Dst;
}

char* strdup(const char* Src) {
  return _strdup(Src);
}

float strtof(const char* __restrict__, char** __restrict__) {
  UNIMPLEMENTED();
}

double strtod(const char* __restrict__, char** __restrict__) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(double, _strtod_l, (const char* __restrict__ _Str, char** __restrict__ _EndPtr, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

long long wcstoll(const wchar_t* __restrict__ nptr, wchar_t** __restrict__ endptr, int base) {
  UNIMPLEMENTED();
}

unsigned long long wcstoull(const wchar_t* __restrict__ nptr, wchar_t** __restrict__ endptr, int base) {
  UNIMPLEMENTED();
}

long long atoll(const char*) {
  UNIMPLEMENTED();
}

long double strtold(const char* __restrict__, char** __restrict__) {
  UNIMPLEMENTED();
}

double wcstod(const wchar_t* __restrict__ _Str, wchar_t** __restrict__ _EndPtr) {
  UNIMPLEMENTED();
}

long double wcstold(const wchar_t* __restrict__, wchar_t** __restrict__) {
  UNIMPLEMENTED();
}

float wcstof(const wchar_t* __restrict__ nptr, wchar_t** __restrict__ endptr) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(__int64, _strtoi64_l, (const char* _String, char** _EndPtr, int _Radix, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(unsigned __int64, _strtoui64_l, (const char* _String, char** _EndPtr, int _Radix, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

int __stdio_common_vsscanf(unsigned __int64 options, const char* input, size_t length, const char* format, _locale_t locale, va_list valist) {
  UNIMPLEMENTED();
}

int __stdio_common_vswprintf(unsigned __int64 options, wchar_t* str, size_t len, const wchar_t* format, _locale_t locale, va_list valist) {
  return _vsnwprintf(str, len, format, valist);
}

int __mingw_vsnwprintf(wchar_t* __restrict__ Dest, size_t Count, const wchar_t* __restrict__ Format, va_list Args) {
  int ret = _vsnwprintf(Dest, Count, Format, Args);
  return ret;
}

int __mingw_vsprintf(char* __restrict__ Dest, const char* __restrict__ Format, va_list Args) {
  int ret = vsprintf(Dest, Format, Args);
  return ret;
}

DLLEXPORT_FUNC(size_t, _strftime_l,
               (char* __restrict__ Buf, size_t Max_size, const char* __restrict__ Format, const struct tm* __restrict__ Tm, _locale_t Locale)) {
  UNIMPLEMENTED();
}

int vsnprintf(char* __restrict__ Dest, size_t Count, const char* __restrict__ Format, va_list Args) {
  int ret = _vsnprintf(Dest, Count, Format, Args);
  if (ret == -1) {
    Dest[Count - 1] = '\0';
    return _vsnprintf(nullptr, 0, Format, Args);
  }
  return ret;
}

int snprintf(char* stream, size_t n, const char* format, ...) {
  __builtin_va_list args;
  __builtin_va_start(args, format);
  int ret = vsnprintf(stream, n, format, args);
  __builtin_va_end(args);
  return ret;
}

char* setlocale(int _Category, const char* _Locale) {
  return Locale;
}

int _configthreadlocale(int _Flag) {
  return 0;
}

DLLEXPORT_FUNC(_locale_t, _create_locale, (int _Category, const char* _Locale)) {
  return nullptr;
}

DLLEXPORT_FUNC(struct lconv*, localeconv, (void)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(void, _free_locale, (_locale_t _Locale)) {}

wint_t btowc(int) {
  UNIMPLEMENTED();
}

size_t mbsrtowcs(wchar_t* __restrict__ _Dest, const char** __restrict__ _PSrc, size_t _Count,
                 mbstate_t* __restrict__ _State) __MINGW_ATTRIB_DEPRECATED_SEC_WARN {
  UNIMPLEMENTED();
}

size_t mbrtowc(wchar_t* __restrict__ _DstCh, const char* __restrict__ _SrcCh, size_t _SizeInBytes, mbstate_t* __restrict__ _State) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _mbtowc_l, (wchar_t* __restrict__ DstCh, const char* __restrict__ SrcCh, size_t SrcSizeInBytes, _locale_t Locale)) {
  if (!SrcCh || SrcSizeInBytes == 0) {
    return 0;
  }
  *DstCh = static_cast<wchar_t>(*SrcCh);
  return 1;
}

size_t mbrlen(const char* __restrict__ _Ch, size_t _SizeInBytes, mbstate_t* __restrict__ _State) {
  UNIMPLEMENTED();
}

size_t wcrtomb(char* __restrict__ _Dest, wchar_t _Source, mbstate_t* __restrict__ _State) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(errno_t, wcrtomb_s, (size_t* _Retval, char* _Dst, size_t _SizeInBytes, wchar_t _Ch, mbstate_t* _State)) {
  UNIMPLEMENTED();
}

int wctob(wint_t _WCh) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(errno_t, strerror_s, (char* _Buf, size_t _SizeInBytes, int _ErrNum)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _isctype, (int _C, int _Type)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(const unsigned short*, __pctype_func, (void)) {
  return CTypeData;
}

DLLEXPORT_FUNC(int, _isctype_l, (int _C, int _Type, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _strcoll_l, (const char* _Str1, const char* _Str2, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(size_t, _strxfrm_l, (char* __restrict__ _Dst, const char* __restrict__ _Src, size_t _MaxCount, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _wcscoll_l, (const wchar_t* _Str1, const wchar_t* _Str2, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(size_t, _wcsxfrm_l, (wchar_t* __restrict__ _Dst, const wchar_t* __restrict__ _Src, size_t _MaxCount, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswalpha_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswupper_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswlower_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswdigit_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswxdigit_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswspace_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswpunct_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswalnum_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswprint_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswgraph_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _iswcntrl_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(wint_t, _towupper_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(wint_t, _towlower_l, (wint_t _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _toupper_l, (int _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, _tolower_l, (int _C, _locale_t _Locale)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(int, ___mb_cur_max_func, (void)) {
  UNIMPLEMENTED();
}


================================================
FILE: Source/Windows/Common/CallRetStack.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/Context.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Debug/InternalThreadState.h>

namespace FEX::Windows::CallRetStack {
struct CallRetStackInfo {
  uint64_t AllocationBase;
  uint64_t AllocationEnd;
  uint64_t DefaultLocation;
};

CallRetStackInfo GetInfoThread(FEXCore::Core::InternalThreadState* Thread) {
  uint64_t Base = reinterpret_cast<uint64_t>(Thread->CallRetStackBase);
  // Leave some room from the base for the default location to allow for underflows without constant exceptions
  return {Base - FEXCore::Utils::FEX_PAGE_SIZE, Base + FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE + FEXCore::Utils::FEX_PAGE_SIZE,
          Base + FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE / 4};
}

void InitializeThread(FEXCore::Core::InternalThreadState* Thread) {
  // Allocate the call-ret stack with guard pages on both sides
  const void* CallRetStackAlloc = ::VirtualAlloc(
    nullptr, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE + 2 * FEXCore::Utils::FEX_PAGE_SIZE, MEM_RESERVE, PAGE_NOACCESS);

  Thread->CallRetStackBase = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(CallRetStackAlloc) + FEXCore::Utils::FEX_PAGE_SIZE);
  ::VirtualAlloc(Thread->CallRetStackBase, FEXCore::Core::InternalThreadState::CALLRET_STACK_SIZE, MEM_COMMIT, PAGE_READWRITE);

  Thread->CurrentFrame->State.callret_sp = GetInfoThread(Thread).DefaultLocation;
}

void DestroyThread(FEXCore::Core::InternalThreadState* Thread) {
  auto CallRetStackInfo = GetInfoThread(Thread);
  ::VirtualFree(reinterpret_cast<void*>(CallRetStackInfo.AllocationBase), 0, MEM_RELEASE);
}

bool HandleAccessViolation(FEXCore::Core::InternalThreadState* Thread, uint64_t Address, uint64_t& CallRetSPReg) {
  auto CallRetStackInfo = GetInfoThread(Thread);
  if (Address >= CallRetStackInfo.AllocationBase && Address < CallRetStackInfo.AllocationEnd) {
    LogMan::Msg::DFmt("Call-ret stack inbalance: {:X}", Address);
    CallRetSPReg = CallRetStackInfo.DefaultLocation;
    return true;
  }
  return false;
}
} // namespace FEX::Windows::CallRetStack


================================================
FILE: Source/Windows/Common/Exception.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Debug/InternalThreadState.h>

namespace FEX::Windows {
template<typename TReg>
static inline EXCEPTION_RECORD
HandleGuestException(FEXCore::Core::CpuStateFrame::SynchronousFaultDataStruct& Fault, const EXCEPTION_RECORD& Src, TReg& Rip, TReg Rax) {
  EXCEPTION_RECORD Dst = Src;
  Dst.ExceptionAddress = reinterpret_cast<void*>(Rip);

  if (!Fault.FaultToTopAndGeneratedException) {
    return Dst;
  }
  Fault.FaultToTopAndGeneratedException = false;

  Dst.ExceptionFlags = 0;
  Dst.NumberParameters = 0;

  switch (Fault.Signal) {
  case FEXCore::Core::FAULT_SIGILL: Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION; return Dst;
  case FEXCore::Core::FAULT_SIGTRAP:
    switch (Fault.TrapNo) {
    case FEXCore::X86State::X86_TRAPNO_DB: Dst.ExceptionCode = EXCEPTION_SINGLE_STEP; return Dst;
    case FEXCore::X86State::X86_TRAPNO_BP:
      Dst.ExceptionAddress = reinterpret_cast<void*>(Rip - 1);
      Dst.ExceptionCode = EXCEPTION_BREAKPOINT;
      Dst.NumberParameters = 1;
      Dst.ExceptionInformation[0] = 0;
      return Dst;
    default: LogMan::Msg::EFmt("Unknown SIGTRAP trap: {}", Fault.TrapNo); break;
    }
    break;
  case FEXCore::Core::FAULT_SIGSEGV:
    switch (Fault.TrapNo) {
    case FEXCore::X86State::X86_TRAPNO_GP:
      if ((Fault.err_code & 0b111) == 0b010) {
        switch (Fault.err_code >> 3) {
        case 0x2d:
          Rip += 3;
          Dst.ExceptionCode = EXCEPTION_BREAKPOINT;
          Dst.ExceptionAddress = reinterpret_cast<void*>(Rip);
          Dst.NumberParameters = 1;
          Dst.ExceptionInformation[0] = Rax; // RAX
          // Note that ExceptionAddress doesn't equal the reported context RIP here, this discrepancy expected and not having it can trigger anti-debug logic.
          return Dst;
        default: LogMan::Msg::EFmt("Unknown interrupt: 0x{:X}", Fault.err_code >> 3); break;
        }
      } else {
        Dst.ExceptionCode = EXCEPTION_PRIV_INSTRUCTION;
        return Dst;
      }
      break;
    case FEXCore::X86State::X86_TRAPNO_OF: Dst.ExceptionCode = EXCEPTION_INT_OVERFLOW; return Dst;
    case FEXCore::X86State::X86_TRAPNO_PF:
      // A page-fault raised by an explicit break in JIT code is always an execute fault
      Dst.NumberParameters = 2;
      Dst.ExceptionInformation[0] = EXCEPTION_EXECUTE_FAULT;
      Dst.ExceptionInformation[1] = Rip;
      return Dst;
    default: LogMan::Msg::EFmt("Unknown SIGSEGV trap: {}", Fault.TrapNo); break;
    }
    break;
  default: LogMan::Msg::EFmt("Unknown signal type: {}", Fault.Signal); break;
  }

  // Default to SIGILL
  Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION;
  return Dst;
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/Handle.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <winternl.h>

namespace FEX::Windows {
class ScopedHandle final {
public:
  ScopedHandle() = default;

  explicit ScopedHandle(HANDLE Handle)
    : Handle(Handle) {}

  // Move-only type
  ScopedHandle(const ScopedHandle&) = delete;
  ScopedHandle& operator=(ScopedHandle&) = delete;
  ScopedHandle(ScopedHandle&& rhs)
    : Handle(rhs.Handle) {
    rhs.Handle = INVALID_HANDLE_VALUE;
  }

  ~ScopedHandle() {
    if (Handle != INVALID_HANDLE_VALUE) {
      NtClose(Handle);
    }
  }

  const HANDLE& operator*() const {
    return Handle;
  }

  HANDLE& operator*() {
    return Handle;
  }

  operator bool() const {
    return Handle != INVALID_HANDLE_VALUE;
  }

private:
  HANDLE Handle {INVALID_HANDLE_VALUE};
};


inline bool ValidateHandleAccess(HANDLE Handle, ACCESS_MASK Access) {
  OBJECT_BASIC_INFORMATION Info;

  if (NtQueryObject(Handle, ObjectBasicInformation, &Info, sizeof(Info), nullptr)) {
    return false;
  }

  return (Info.GrantedAccess & Access) == Access;
}

inline ScopedHandle DupHandle(HANDLE Handle, ACCESS_MASK Access) {
  HANDLE Duplicated = INVALID_HANDLE_VALUE;
  NtDuplicateObject(NtCurrentProcess(), Handle, NtCurrentProcess(), &Duplicated, Access, 0, 0);
  return ScopedHandle {Duplicated};
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/ImageTracker.cpp
================================================
// SPDX-License-Identifier: MIT

#include <array>
#include <mutex>
#include <filesystem>
#include <optional>
#include <string_view>
#include <cctype>

#include <FEXCore/Core/Context.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SourcecodeResolver.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/fextl/string.h>
#include <FEXCore/fextl/vector.h>
#include "Common/Config.h"

#include <fcntl.h>
#include <io.h>
#include <winternl.h>
#include <xxhash.h>

#include "Handle.h"
#include "Module.h"
#include "Priv.h"
#include "ImageTracker.h"

namespace FEX::Windows {
static fextl::string ToLower(std::string_view String) {
  fextl::string Res;
  Res.resize(String.size());
  std::transform(String.begin(), String.end(), Res.begin(), [](unsigned char c) { return std::tolower(c); });
  return Res;
}

FEXCore::CodeMapFileId ComputeCodeMapId(std::string_view FileName, uint32_t TimeDateStamp, uint32_t SizeOfImage) {
  const auto Norm {ToLower(FileName)};
  return XXH3_64bits(Norm.data(), Norm.size()) ^ (static_cast<uint64_t>(SizeOfImage) << 32 | TimeDateStamp);
}

static void LoadImageVolatileMetadata(fextl::set<uint64_t>& VolatileInstructions, FEXCore::IntervalList<uint64_t>& VolatileValidRanges,
                                      HMODULE Module, ArchImageNtHeaders* Nt, uint64_t Address, uint64_t EndAddress) {
  ULONG Size;

  const auto* LoadConfig =
    reinterpret_cast<ArchImageLoadConfigDirectory*>(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, &Size));
  if (!LoadConfig || LoadConfig->Size <= offsetof(ArchImageLoadConfigDirectory, VolatileMetadataPointer)) {
    return;
  }

  if (LoadConfig->VolatileMetadataPointer < Address || LoadConfig->VolatileMetadataPointer + sizeof(IMAGE_VOLATILE_METADATA) >= EndAddress) {
    return;
  }

  const auto* VolatileMetadata = reinterpret_cast<IMAGE_VOLATILE_METADATA*>(LoadConfig->VolatileMetadataPointer);
  if (!VolatileMetadata || Address + VolatileMetadata->VolatileAccessTable + VolatileMetadata->VolatileAccessTableSize >= EndAddress ||
      Address + VolatileMetadata->VolatileInfoRangeTable + VolatileMetadata->VolatileInfoRangeTableSize >= EndAddress) {
    return;
  }

  const auto* VolatileAccessTableBegin = reinterpret_cast<IMAGE_VOLATILE_RVA_METADATA*>(Address + VolatileMetadata->VolatileAccessTable);
  const auto* VolatileAccessTableEnd =
    VolatileAccessTableBegin + (VolatileMetadata->VolatileAccessTableSize / sizeof(IMAGE_VOLATILE_RVA_METADATA));
  for (auto It = VolatileAccessTableBegin; It != VolatileAccessTableEnd; It++) {
    VolatileInstructions.emplace(Address + It->Rva);
  }

  const auto* VolatileInfoRangeTableBegin = reinterpret_cast<IMAGE_VOLATILE_RANGE_METADATA*>(Address + VolatileMetadata->VolatileInfoRangeTable);
  const auto* VolatileInfoRangeTableEnd =
    VolatileInfoRangeTableBegin + (VolatileMetadata->VolatileInfoRangeTableSize / sizeof(IMAGE_VOLATILE_RANGE_METADATA));
  for (auto It = VolatileInfoRangeTableBegin; It != VolatileInfoRangeTableEnd; It++) {
    VolatileValidRanges.Insert({Address + It->Rva, Address + It->Rva + It->Size});
  }
}

static fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> LoadImageRelocations(ArchImageNtHeaders* Nt, uint64_t Address) {
  const auto Module = reinterpret_cast<HMODULE>(Address);
  ULONG Size;

  const auto RelocationBlocksBegin =
    reinterpret_cast<uint64_t>(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_BASERELOC, &Size));
  if (!RelocationBlocksBegin) {
    return {};
  }

  fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> Result;
  const uint64_t RelocationBlocksEnd = RelocationBlocksBegin + Size - sizeof(IMAGE_BASE_RELOCATION);
  for (uint64_t CurrentRelocation = RelocationBlocksBegin; CurrentRelocation < RelocationBlocksEnd;) {
    const auto* Block = reinterpret_cast<IMAGE_BASE_RELOCATION*>(CurrentRelocation);
    if (!Block->SizeOfBlock) {
      break;
    }
    const uint64_t BlockEnd = CurrentRelocation + Block->SizeOfBlock; // Includes the size of IMAGE_BASE_RELOCATION
    CurrentRelocation += sizeof(IMAGE_BASE_RELOCATION);

    for (; CurrentRelocation < BlockEnd; CurrentRelocation += 2) {
      auto PackedRelocation = *reinterpret_cast<uint16_t*>(CurrentRelocation);
      uint32_t RelocatedRVA = Block->VirtualAddress + (PackedRelocation & 0xfff);
      uint8_t Type = PackedRelocation >> 12;

      switch (Type) {
      case IMAGE_REL_BASED_ABSOLUTE: break;
      case IMAGE_REL_BASED_HIGHLOW: Result[RelocatedRVA] = FEXCore::GuestRelocationType::Rel32; break;
      case IMAGE_REL_BASED_DIR64: Result[RelocatedRVA] = FEXCore::GuestRelocationType::Rel64; break;
      default: ERROR_AND_DIE_FMT("Unhandled relocation");
      }
    }
  }

  return Result;
}

ImageTracker::ImageTracker(FEXCore::Context::Context& CTX, bool IsGeneratingCache)
  : CTX {CTX}
  , ExtendedMetaData {FEX::VolatileMetadata::ParseExtendedVolatileMetadata(ExtendedVolatileMetadataConfig())}
  , IsGeneratingCache {IsGeneratingCache} {}

ImageTracker::MappedImageInfo::MappedImageInfo(std::string_view Path, uint64_t Address, ArchImageNtHeaders* Nt,
                                               fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> Relocations)
  : Info {.FileId = ComputeCodeMapId(BaseName(Path), Nt->FileHeader.TimeDateStamp, Nt->OptionalHeader.SizeOfImage),
          .Filename = ToLower(Path), // Normalize path case as Windows paths are case-insensitive
          .Relocations = std::move(Relocations)}
  , SectionInfo {.FileInfo = Info, .FileStartVA = Address, .BeginVA = Address, .EndVA = Address + Nt->OptionalHeader.SizeOfImage} {}

FEXCore::ExecutableFileSectionInfo ImageTracker::HandleImageMap(std::string_view Path, uint64_t Address, bool MainImage) {
  std::scoped_lock Lock(CTX.GetCodeInvalidationMutex());
  const fextl::string ModuleName {BaseName(Path)};
  const auto Module = reinterpret_cast<HMODULE>(Address);
  auto* Nt = reinterpret_cast<ArchImageNtHeaders*>(RtlImageNtHeader(Module));
  MappedImageInfo* ImageInfo = nullptr;
  {
    auto Relocations = [&]() {
      if (IsGeneratingCache) {
        return LoadImageRelocations(Nt, Address);
      }
      return fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> {};
    }();
    std::unique_lock Lk {ImagesLock};
    auto [It, Inserted] = MappedImages.emplace(std::piecewise_construct, std::forward_as_tuple(Address),
                                               std::forward_as_tuple(Path, Address, Nt, std::move(Relocations)));

    if (!Inserted) {
      return It->second.SectionInfo;
    }

    ImageInfo = &It->second;
  }

  auto ID = FEXCore::CodeMap::GetBaseFilename(ImageInfo->Info, false);
  LogMan::Msg::DFmt("Load module {} ({}): {:X}", ModuleName, ID, Address);

  if (FEXCore::Config::Get_ENABLECODECACHINGWIP() && !IsGeneratingCache) {
    if (MainImage) {
      LARGE_INTEGER Time;
      NtQuerySystemTime(&Time);
      const auto CodeMapDir = fmt::format("{}codemap\\new\\", FEX::Config::GetCacheDirectory());
      std::error_code ec;
      if (!std::filesystem::exists(CodeMapDir, ec)) {
        std::filesystem::create_directories(CodeMapDir, ec);
      }
      if (!ec) {
        ActiveCodeMapPath = fmt::format("{}{}.{}.bin", CodeMapDir, ID, Time.QuadPart);

        auto Writer = fextl::make_unique<FEXCore::CodeMapWriter>(*this, false);
        Writer->AppendSetMainExecutable(ImageInfo->Info);
        CTX.SetCodeMapWriter(std::move(Writer));
      }
      LoadAOTImages(*ImageInfo);
    }

    auto AOTImage = AOTImages.find(ID);
    if (AOTImage != AOTImages.end()) {
      CTX.GetCodeCache().LoadData(nullptr, AOTImage->second.Data, ImageInfo->SectionInfo);
    }
  }

  uint64_t EndAddress = Address + Nt->OptionalHeader.SizeOfImage;
  fextl::set<uint64_t> VolatileInstructions {};
  FEXCore::IntervalList<uint64_t> VolatileValidRanges {};
  LoadImageVolatileMetadata(VolatileInstructions, VolatileValidRanges, Module, Nt, Address, EndAddress);
  if (auto It = ExtendedMetaData.find(ModuleName); It != ExtendedMetaData.end()) {
    FEX::VolatileMetadata::ApplyFEXExtendedVolatileMetadata(It->second, VolatileInstructions, VolatileValidRanges, Address, EndAddress);
  }

  if (!VolatileInstructions.empty() || !VolatileValidRanges.Empty()) {
    LogMan::Msg::DFmt("Loaded volatile metadata for {:X}: {} entries", Address, VolatileInstructions.size());
    CTX.AddForceTSOInformation(VolatileValidRanges, std::move(VolatileInstructions));
  }

  return ImageInfo->SectionInfo;
}

void ImageTracker::HandleImageUnmap(uint64_t Address, uint64_t Size) {
  std::scoped_lock Lock(CTX.GetCodeInvalidationMutex());
  CTX.RemoveForceTSOInformation(Address, Size);

  std::unique_lock Lk {ImagesLock};
  MappedImages.erase(Address);
}

std::optional<FEXCore::ExecutableFileSectionInfo> ImageTracker::LookupExecutableFileSection(uint64_t Address) {
  std::shared_lock Lk {ImagesLock};
  auto It = MappedImages.upper_bound(Address);
  if (It == MappedImages.begin() || std::prev(It)->second.SectionInfo.EndVA <= Address) {
    return {};
  }
  return std::prev(It)->second.SectionInfo;
}

int ImageTracker::OpenCodeMapFile() {
  if (ActiveCodeMapPath.empty()) {
    return -1;
  }
  return _sopen(ActiveCodeMapPath.c_str(), O_CREAT | O_TRUNC | O_WRONLY | O_APPEND, _SH_DENYRW, 0644);
}

void ImageTracker::LoadAOTImages(MappedImageInfo& ImageInfo) {
  const auto AnsiPath =
    fmt::format("\\??\\{}cache\\{}", FEX::Config::GetCacheDirectory(), FEXCore::CodeMap::GetBaseFilename(ImageInfo.Info, false));

  // Iterate over all files in the given executable's cache directory, mapping them into memory for future use.
  // Each cache file name matches the unique ID (as returned by FEXCore::CodeMap::GetBaseFilename) of the image it corresponds to.
  ScopedUnicodeString NtPath(AnsiPath.c_str());

  OBJECT_ATTRIBUTES DirAttr;
  InitializeObjectAttributes(&DirAttr, &*NtPath, OBJ_CASE_INSENSITIVE, NULL, NULL);

  ScopedHandle DirHandle;
  IO_STATUS_BLOCK IOSB;
  if (!NT_SUCCESS(NtOpenFile(&*DirHandle, FILE_LIST_DIRECTORY | SYNCHRONIZE, &DirAttr, &IOSB, FILE_SHARE_READ | FILE_SHARE_WRITE,
                             FILE_DIRECTORY_FILE | FILE_SYNCHRONOUS_IO_NONALERT))) {
    return;
  }

  std::array<uint8_t, 0x1000> DirBuffer;
  bool FirstScan = true;
  auto QueryDir = [&]() {
    NTSTATUS Status = NtQueryDirectoryFile(*DirHandle, nullptr, nullptr, nullptr, &IOSB, DirBuffer.data(), DirBuffer.size(),
                                           FileBothDirectoryInformation, FALSE, nullptr, FirstScan);
    if (FirstScan) {
      FirstScan = false;
    }
    return NT_SUCCESS(Status);
  };

  while (QueryDir()) {
    auto* Info = reinterpret_cast<PFILE_BOTH_DIRECTORY_INFORMATION>(DirBuffer.data());

    while (true) {
      UNICODE_STRING CurrentFileName;
      CurrentFileName.Buffer = Info->FileName;
      CurrentFileName.Length = static_cast<USHORT>(Info->FileNameLength);
      CurrentFileName.MaximumLength = CurrentFileName.Length;

      bool Skip = (Info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) || (Info->FileNameLength == 2 && Info->FileName[0] == '.') ||
                  (Info->FileNameLength == 4 && Info->FileName[0] == '.' && Info->FileName[1] == '.');

      if (!Skip) {
        OBJECT_ATTRIBUTES FileAttr;
        InitializeObjectAttributes(&FileAttr, &CurrentFileName, OBJ_CASE_INSENSITIVE, *DirHandle, nullptr);

        ScopedHandle FileHandle;
        if (NT_SUCCESS(NtOpenFile(&*FileHandle, GENERIC_READ | SYNCHRONIZE, &FileAttr, &IOSB, FILE_SHARE_READ, FILE_SYNCHRONOUS_IO_NONALERT))) {
          ScopedHandle SectionHandle;
          if (NT_SUCCESS(NtCreateSection(&*SectionHandle, SECTION_MAP_EXECUTE | SECTION_MAP_READ, nullptr, nullptr, PAGE_EXECUTE_READ,
                                         SEC_COMMIT, *FileHandle))) {
            void* LoadAddress = nullptr;
            SIZE_T MappedSize = 0;
            if (NT_SUCCESS(NtMapViewOfSection(*SectionHandle, NtCurrentProcess(), &LoadAddress, 0, 0, nullptr, &MappedSize, ViewUnmap,
                                              MEM_RESERVE | MEM_TOP_DOWN, PAGE_EXECUTE_READ))) {
              fextl::string UniqueId;
              ULONG AnsiLength = 0;
              RtlUnicodeToMultiByteSize(&AnsiLength, Info->FileName, Info->FileNameLength);
              UniqueId.resize(AnsiLength);
              RtlUnicodeToMultiByteN(UniqueId.data(), AnsiLength, NULL, Info->FileName, Info->FileNameLength);

              AOTImages[UniqueId] = {.Data = static_cast<std::byte*>(LoadAddress)};
              LogMan::Msg::IFmt("Loaded cache: {}", UniqueId);
            }
          }
        }
      }

      if (Info->NextEntryOffset == 0) {
        break;
      }
      Info = reinterpret_cast<PFILE_BOTH_DIRECTORY_INFORMATION>(reinterpret_cast<uint8_t*>(Info) + Info->NextEntryOffset);
    }
  }
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/ImageTracker.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <cstdint>
#include <mutex>
#include <map>
#include <shared_mutex>
#include <string_view>

#include <FEXCore/Utils/CompilerDefs.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Core/CodeCache.h>

#include "Common/VolatileMetadata.h"
#include "Module.h"

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEXCore::Context {
class Context;
}

namespace FEX::Windows {
#ifdef ARCHITECTURE_arm64ec
using ArchImageNtHeaders = IMAGE_NT_HEADERS64;
using ArchImageLoadConfigDirectory = _IMAGE_LOAD_CONFIG_DIRECTORY64;
#else
using ArchImageNtHeaders = IMAGE_NT_HEADERS32;
using ArchImageLoadConfigDirectory = _IMAGE_LOAD_CONFIG_DIRECTORY32;
#endif

FEXCore::CodeMapFileId ComputeCodeMapId(std::string_view FileName, uint32_t TimeDateStamp, uint32_t SizeOfImage);

/**
 * @brief Tracks mapped PE code images and handles their volatile metadata
 */
class ImageTracker : public FEXCore::CodeMapOpener {
public:
  ImageTracker(FEXCore::Context::Context& CTX, bool IsGeneratingCache);
  FEXCore::ExecutableFileSectionInfo HandleImageMap(std::string_view Path, uint64_t Address, bool MainImage);
  void HandleImageUnmap(uint64_t Address, uint64_t Size);

  std::optional<FEXCore::ExecutableFileSectionInfo> LookupExecutableFileSection(uint64_t Address);

  int OpenCodeMapFile() override;

private:
  struct MappedImageInfo {
    FEXCore::ExecutableFileInfo Info;
    FEXCore::ExecutableFileSectionInfo SectionInfo;

    MappedImageInfo(std::string_view Path, uint64_t Address, ArchImageNtHeaders* Nt,
                    fextl::robin_map<uint32_t, FEXCore::GuestRelocationType> Relocations);
  };

  struct AOTImageInfo {
    std::byte* Data;
  };

  void LoadAOTImages(MappedImageInfo& Info);

  FEXCore::Context::Context& CTX;

  FEX_CONFIG_OPT(ExtendedVolatileMetadataConfig, EXTENDEDVOLATILEMETADATA);
  fextl::unordered_map<fextl::string, FEX::VolatileMetadata::ExtendedVolatileMetadata> ExtendedMetaData;

  std::shared_mutex ImagesLock;
  std::map<uint64_t, MappedImageInfo> MappedImages;
  std::map<fextl::string, AOTImageInfo> AOTImages;

  std::string ActiveCodeMapPath;
  bool IsGeneratingCache;
};

} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/InvalidationTracker.cpp
================================================
// SPDX-License-Identifier: MIT

#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/SignalScopeGuards.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include "InvalidationTracker.h"
#include <windef.h>
#include <winternl.h>

namespace FEX::Windows {
InvalidationTracker::InvalidationTracker(FEXCore::Context::Context& CTX, const std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*>& Threads)
  : CTX {CTX}
  , Threads {Threads} {
  FEX_CONFIG_OPT(SMCChecks, SMCCHECKS);
  SMCDetectionDisabled = (SMCChecks == FEXCore::Config::CONFIG_SMC_NONE);

  MEMORY_BASIC_INFORMATION Info;
  uint64_t Address = 0;

  while (VirtualQuery(reinterpret_cast<LPCVOID>(Address), &Info, sizeof(Info))) {
    uint64_t BaseAddress = reinterpret_cast<uint64_t>(Info.BaseAddress);
    if (Info.State == MEM_COMMIT) {
      HandleMemoryProtectionNotification(BaseAddress, Info.RegionSize, Info.Protect);
    }

    Address = BaseAddress + Info.RegionSize;
  }
}

void InvalidationTracker::HandleMemoryProtectionNotification(uint64_t Address, uint64_t Size, ULONG Prot) {
  const auto AlignedBase = Address & FEXCore::Utils::FEX_PAGE_MASK;
  const auto AlignedSize = (Address - AlignedBase + Size + FEXCore::Utils::FEX_PAGE_SIZE - 1) & FEXCore::Utils::FEX_PAGE_MASK;

  const bool NeedsInvalidate = [&]() {
    std::unique_lock Lock(IntervalsLock);

    FEXCore::IntervalList<uint64_t>::Interval ProtInterval {AlignedBase, AlignedBase + AlignedSize};
    if (Prot & (PAGE_EXECUTE | PAGE_EXECUTE_READ | PAGE_EXECUTE_READWRITE | PAGE_EXECUTE_WRITECOPY)) {
      XIntervals.Insert(ProtInterval);
      if (Prot & (PAGE_EXECUTE_WRITECOPY | PAGE_EXECUTE_READWRITE)) {
        LogMan::Msg::DFmt("Add SMC interval: {:X} - {:X}", AlignedBase, AlignedBase + AlignedSize);
        RWXIntervals.Insert(ProtInterval);
      }
      return true;
    } else if (XIntervals.Intersect(ProtInterval)) {
      XIntervals.Remove(ProtInterval);
      RWXIntervals.Remove(ProtInterval);
      return true;
    }

    return false;
  }();

  if (NeedsInvalidate) {
    // IntervalsLock cannot be held during invalidation
    InvalidateIntervalInternal(AlignedBase, AlignedSize);
  }
}

void InvalidationTracker::HandleImageMap(std::string_view Name, uint64_t Address) {
  auto* Nt = RtlImageNtHeader(reinterpret_cast<HMODULE>(Address));
  auto* SectionsBegin = IMAGE_FIRST_SECTION(Nt);
  auto* SectionsEnd = SectionsBegin + Nt->FileHeader.NumberOfSections;
  uint64_t LastExecutableSectionEnd = 0;

  for (auto* Section = SectionsBegin; Section != SectionsEnd; Section++) {
    if (Section->Characteristics & IMAGE_SCN_MEM_EXECUTE) {
      std::unique_lock Lock(IntervalsLock);

      uint64_t SectionBase = Address + Section->VirtualAddress;
      uint64_t SectionEnd = SectionBase + Section->Misc.VirtualSize;
      XIntervals.Insert({SectionBase, SectionEnd});
      LastExecutableSectionEnd = std::max(LastExecutableSectionEnd, SectionEnd);
      if (Section->Characteristics & IMAGE_SCN_MEM_WRITE) {
        LogMan::Msg::DFmt("Add image SMC interval: {:X} - {:X}", SectionBase, SectionBase + Section->Misc.VirtualSize);
        RWXIntervals.Insert({SectionBase, SectionBase + Section->Misc.VirtualSize});
      }
    }
  }

  FEX_CONFIG_OPT(MonoHacks, MONOHACKS);
  if (MonoHacks && (Name == "mono-2.0-bdwgc.dll" || Name == "mono.dll")) {
    FEX_CONFIG_OPT(MaxInst, MAXINST);
    FEX_CONFIG_OPT(Multiblock, MULTIBLOCK);
    if (Multiblock && MaxInst() >= 500) {
      // Require these settings to ensure we can safely hook all SMC sites in a single block
      CTX.MarkMonoDetected();
      MonoBackpatcherDetectionPending = true;
      MonoBase = Address;
      MonoEnd = LastExecutableSectionEnd;
    } else {
      LogMan::Msg::IFmt("Not applying mono hacks, Multiblock with MaxInst >= 500 required");
    }
  }
}

InvalidationTracker::InvalidateContainingSectionResult InvalidationTracker::InvalidateContainingSection(uint64_t Address, bool Free) {
  MEMORY_BASIC_INFORMATION Info;
  if (NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast<void*>(Address), MemoryBasicInformation, &Info, sizeof(Info), nullptr)) {
    return {Address, 0};
  }

  const auto SectionBase = reinterpret_cast<uint64_t>(Info.AllocationBase);
  auto SectionSize = reinterpret_cast<uint64_t>(Info.BaseAddress) + Info.RegionSize - SectionBase;

  while (!NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast<void*>(SectionBase + SectionSize), MemoryBasicInformation, &Info,
                               sizeof(Info), nullptr) &&
         reinterpret_cast<uint64_t>(Info.AllocationBase) == SectionBase) {
    SectionSize += Info.RegionSize;
  }

  InvalidateIntervalInternal(SectionBase, SectionSize);

  if (Free) {
    std::unique_lock Lock(IntervalsLock);
    XIntervals.Remove({SectionBase, SectionBase + SectionSize});
    RWXIntervals.Remove({SectionBase, SectionBase + SectionSize});
  }

  return {SectionBase, SectionSize};
}

void InvalidationTracker::InvalidateAlignedInterval(uint64_t Address, uint64_t Size, bool Free) {
  if (!Address) {
    // Match the Windows behaviour when passed a NULL base address.
    Size = std::numeric_limits<uint64_t>::max();
  }

  const auto AlignedBase = Address & FEXCore::Utils::FEX_PAGE_MASK;
  const auto AlignedSize = std::max(Size, (Address - AlignedBase + Size + FEXCore::Utils::FEX_PAGE_SIZE - 1) & FEXCore::Utils::FEX_PAGE_MASK);

  InvalidateIntervalInternal(AlignedBase, AlignedSize);

  if (Free) {
    std::unique_lock Lock(IntervalsLock);
    XIntervals.Remove({AlignedBase, AlignedBase + AlignedSize});
    RWXIntervals.Remove({AlignedBase, AlignedBase + AlignedSize});
  }
}

void InvalidationTracker::ReprotectRWXIntervals(uint64_t Address, uint64_t Size) {
  ProtectRWXIntervalsInternal(Address, Size, false);
}

bool InvalidationTracker::HandleRWXAccessViolation(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPc, uint64_t FaultAddress) {
  const bool NeedsInvalidate = [&](uint64_t Address) {
    std::shared_lock Lock(IntervalsLock);
    return RWXIntervals.Query(Address).Enclosed;
  }(FaultAddress);

  if (NeedsInvalidate) {
    // IntervalsLock cannot be held during invalidation
    {
      std::scoped_lock Lock(CTX.GetCodeInvalidationMutex());

      InvalidateIntervalInternalLocked(FaultAddress & FEXCore::Utils::FEX_PAGE_MASK, FEXCore::Utils::FEX_PAGE_SIZE);

      // Invalidate, then unprotect the faulting page with the compilation lock held to ensure that any racing invalidations are not dropped.
      ULONG TmpProt;
      void* TmpAddress = reinterpret_cast<void*>(FaultAddress);
      SIZE_T TmpSize = 1;
      NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_EXECUTE_READWRITE, &TmpProt);
    }
    DetectMonoBackpatcherBlock(Thread, HostPc);
    return true;
  }
  return false;
}

bool InvalidationTracker::BeginUntrackedWriteLocked(uint64_t Address, uint64_t Size) {
  return ProtectRWXIntervalsInternal(Address, Size, true);
}

FEXCore::HLE::ExecutableRangeInfo InvalidationTracker::QueryExecutableRange(uint64_t Address) {
  std::shared_lock Lock(IntervalsLock);
  const auto XResult = XIntervals.Query(Address);
  if (!XResult.Enclosed) {
    return {};
  }
  const auto RWXResult = RWXIntervals.Query(Address);
  if (RWXResult.Enclosed) {
    return {RWXResult.Interval.Offset, RWXResult.Interval.End - RWXResult.Interval.Offset, true};
  } else if (RWXResult.Size && RWXResult.Size < XResult.Size) {
    return {XResult.Interval.Offset, RWXResult.Interval.Offset - XResult.Interval.Offset, false};
  }
  return {XResult.Interval.Offset, XResult.Interval.End - XResult.Interval.Offset, false};
}

void InvalidationTracker::DetectMonoBackpatcherBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPc) {
  if (!MonoBackpatcherDetectionPending) {
    return;
  }

  if (!CTX.IsAddressInCodeBuffer(Thread, HostPc)) {
    return;
  }

  uint64_t RIP = CTX.RestoreRIPFromHostPC(Thread, HostPc);
  if (!RIP || RIP < MonoBase || RIP >= MonoEnd) {
    return;
  }

  static constexpr uint8_t XChgOp = 0x87;
  if (*reinterpret_cast<uint8_t*>(RIP) != XChgOp && *reinterpret_cast<uint8_t*>(RIP + 1) != XChgOp) {
    return;
  }

  uint64_t BlockEntry = CTX.GetGuestBlockEntry(Thread);
  LogMan::Msg::DFmt("Detected mono backpatcher at: {:X}", BlockEntry);
  DisableSMCDetection();
  {
    std::scoped_lock CodeLock(CTX.GetCodeInvalidationMutex());
    CTX.MarkMonoBackpatcherBlock(BlockEntry);
  }
  InvalidateAlignedInterval(BlockEntry, FEXCore::Utils::FEX_PAGE_SIZE, false);
}

void InvalidationTracker::DisableSMCDetection() {
  std::unique_lock Lock(IntervalsLock);
  SMCDetectionDisabled = true;
  uint64_t Address = 0;

  // Reprotect all RWX intervals as RWX
  FEXCore::IntervalList<uint64_t>::QueryResult Query;
  do {
    Query = RWXIntervals.Query(Address);
    if (Query.Enclosed) {
      void* TmpAddress = reinterpret_cast<void*>(Address);
      SIZE_T TmpSize = static_cast<SIZE_T>(Query.Size);
      ULONG TmpProt;
      NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_EXECUTE_READWRITE, &TmpProt);
    }
    Address += Query.Size;
  } while (Query.Size);
}

void InvalidationTracker::InvalidateIntervalInternal(uint64_t Address, uint64_t Size) {
  std::scoped_lock CodeLock(CTX.GetCodeInvalidationMutex());
  InvalidateIntervalInternalLocked(Address, Size);
}

void InvalidationTracker::InvalidateIntervalInternalLocked(uint64_t Address, uint64_t Size) {
  // NOTE: This assumes CodeInvalidationMutex is locked by the caller
  CTX.InvalidateCodeBuffersCodeRange(Address, Size);
  for (auto Thread : Threads) {
    CTX.InvalidateThreadCachedCodeRange(Thread.second, Address, Size);
  }
}

bool InvalidationTracker::ProtectRWXIntervalsInternal(uint64_t Address, uint64_t Size, bool ForWriteLocked) {
  const auto End = Address + Size;
  std::shared_lock Lock(IntervalsLock);

  if (SMCDetectionDisabled) {
    return false;
  }

  bool HitRWXInterval = false;
  do {
    const auto Query = RWXIntervals.Query(Address);
    if (Query.Enclosed) {
      if (!HitRWXInterval) {
        if (ForWriteLocked) {
          // If we are protecting as writable, then the entire range must be invalidated before any protections are
          // applied and the invalidation mutex must be locked throughout.
          // Do this lazily only when an RWX region is actually hit.
          // NOTE: This assumes CodeInvalidationMutex is locked by the caller
          InvalidateIntervalInternalLocked(Address, Size);
        }
        HitRWXInterval = true;
      }
      void* TmpAddress = reinterpret_cast<void*>(Address);
      SIZE_T TmpSize = static_cast<SIZE_T>(std::min(End, Address + Query.Size) - Address);
      ULONG TmpProt;
      NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, ForWriteLocked ? PAGE_EXECUTE_READWRITE : PAGE_EXECUTE_READ, &TmpProt);
    } else if (!Query.Size) {
      // No more regions past `Address` in the interval list
      break;
    }

    Address += Query.Size;
  } while (Address < End);

  return HitRWXInterval;
}

} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/InvalidationTracker.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/IntervalList.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <mutex>
#include <shared_mutex>
#include <unordered_map>
#include <string_view>

namespace FEXCore::Core {
struct InternalThreadState;
}

namespace FEXCore::Context {
class Context;
}

namespace FEX::Windows {
/**
 * @brief Handles SMC and regular code invalidation
 */
class InvalidationTracker {
public:
  InvalidationTracker(FEXCore::Context::Context& CTX, const std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*>& Threads);
  void HandleMemoryProtectionNotification(uint64_t Address, uint64_t Size, ULONG Prot);
  void HandleImageMap(std::string_view Name, uint64_t Address);
  struct InvalidateContainingSectionResult {
    uint64_t SectionStart;
    uint64_t SectionSize;
  };
  InvalidateContainingSectionResult InvalidateContainingSection(uint64_t Address, bool Free);
  void InvalidateAlignedInterval(uint64_t Address, uint64_t Size, bool Free);
  void ReprotectRWXIntervals(uint64_t Address, uint64_t Size);
  bool HandleRWXAccessViolation(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC, uint64_t FaultAddress);

  // Unprotects any RWX intervals in the input interval and invalidates code
  // NOTE: CodeInvalidationMutex must be locked when calling this, and if true is returned, kept locked until the write ends.
  bool BeginUntrackedWriteLocked(uint64_t Address, uint64_t Size);

  FEXCore::HLE::ExecutableRangeInfo QueryExecutableRange(uint64_t Address);

private:
  void DetectMonoBackpatcherBlock(FEXCore::Core::InternalThreadState* Thread, uint64_t HostPC);
  void DisableSMCDetection();
  void InvalidateIntervalInternal(uint64_t Address, uint64_t Size);
  // NOTE: This assumed CodeInvalidationMutex is locked by the caller
  void InvalidateIntervalInternalLocked(uint64_t Address, uint64_t Size);

  // NOTE: If ForWriteLocked is true then this assumes CodeInvalidationMutex is locked by the caller,
  // and any code in the range will be invalidated before protection as RWX, otherwise protects as RX if false.
  bool ProtectRWXIntervalsInternal(uint64_t Address, uint64_t Size, bool ForWriteLocked);

  FEXCore::IntervalList<uint64_t> XIntervals;
  FEXCore::IntervalList<uint64_t> RWXIntervals;
  std::shared_mutex IntervalsLock;
  FEXCore::Context::Context& CTX;
  const std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*>& Threads;
  bool SMCDetectionDisabled {false}; // Protected by IntervalsLock

  bool MonoBackpatcherDetectionPending {false};
  uint64_t MonoBase {0};
  uint64_t MonoEnd {0};
};
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/JITGuardPage.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/Utils/LongJump.h>

namespace FEX::Windows::JITGuardPage {
static inline bool HandleJITGuardPage(FEXCore::Core::InternalThreadState* Thread, void* Address, uint64_t* GPRs, __uint128_t* FPRs, uint64_t* PC) {
  if (Address >= reinterpret_cast<void*>(Thread->JITGuardPage) &&
      Address < reinterpret_cast<void*>(Thread->JITGuardPage + FEXCore::Utils::FEX_PAGE_SIZE)) {
    FEXCore::UncheckedLongJump::ManuallyLoadJumpBuf(Thread->RestartJump, Thread->JITGuardOverflowArgument, GPRs, FPRs, PC);
    return true;
  }

  return false;
}

} // namespace FEX::Windows::JITGuardPage


================================================
FILE: Source/Windows/Common/LoadConfig.S
================================================
/**
 * This file has no copyright assigned and is placed in the Public Domain.
 * This file is part of the mingw-w64 runtime package.
 * No warranty is given; refer to the file DISCLAIMER.PD within this package.
 */

#define PTR .8byte
#define ALIGN 16
#define EXPORT_SYM(x) .globl x; x:
#define SYM(x) x

.text
.balign ALIGN
#ifdef __arm64ec__
/*
Calls to this are synthesized by the linker when calling into import libraries,
this is referred to as an 'Adjustor Thunk' in ARM64EC documentation.
*/
EXPORT_SYM(__icall_helper_arm64ec)
.seh_proc "__icall_helper_arm64ec"
  stp  fp,   lr, [sp, #-16]!
.seh_save_fplr_x 16
  mov  fp,   sp
.seh_set_fp
.seh_endprologue
  adrp x16, __os_arm64x_check_icall
  ldr  x16, [x16, #:lo12:__os_arm64x_check_icall]
  blr  x16
.seh_startepilogue
  ldp  fp,  lr,  [sp], #16
.seh_save_fplr_x 16
.seh_endepilogue
  br   x11
.seh_endproc
#endif

SYM(__guard_check_icall_dummy):
  ret

.section  .00cfg, "dr"
.balign ALIGN
#ifdef __arm64ec__
/*
These symbols are updated at runtime by the dynamic linker to point to emulator
helper routines.
*/
EXPORT_SYM(__os_arm64x_dispatch_call_no_redirect)
  PTR 0
EXPORT_SYM(__os_arm64x_dispatch_ret)
  PTR 0
EXPORT_SYM(__os_arm64x_check_icall)
EXPORT_SYM(__os_arm64x_dispatch_icall)
  PTR 0
EXPORT_SYM(__os_arm64x_check_call)
EXPORT_SYM(__os_arm64x_dispatch_call)
  PTR 0
EXPORT_SYM(__os_arm64x_check_icall_cfg)
EXPORT_SYM(__os_arm64x_check_dispatch_cfg)
  PTR 0
EXPORT_SYM(__os_arm64x_rdtsc)
EXPORT_SYM(__os_arm64x_get_x64_information)
  PTR 0
EXPORT_SYM(__os_arm64x_set_x64_information)
EXPORT_SYM(__os_arm64x_cpuidex)
  PTR 0
EXPORT_SYM(__os_arm64x_x64_jump)
EXPORT_SYM(__os_arm64x_dispatch_fptr)
  PTR 0
EXPORT_SYM(__os_arm64x_helper0)
  PTR 0
EXPORT_SYM(__os_arm64x_helper1)
  PTR 0
EXPORT_SYM(__os_arm64x_helper2)
  PTR 0
EXPORT_SYM(__os_arm64x_helper3)
  PTR 0
EXPORT_SYM(__os_arm64x_helper4)
  PTR 0
EXPORT_SYM(__os_arm64x_helper5)
  PTR 0
EXPORT_SYM(__os_arm64x_helper6)
  PTR 0
EXPORT_SYM(__os_arm64x_helper7)
  PTR 0
EXPORT_SYM(__os_arm64x_helper8)
  PTR 0
#endif
EXPORT_SYM(__guard_check_icall_fptr)
  PTR SYM(__guard_check_icall_dummy)

#ifdef __arm64ec__
/*
This structure is read at runtime by the dynamic linker on ARM64EC to configure
metadata necessary for EC code to interface with x86_64 code.
*/
.section  .rdata,"dr"
.balign ALIGN
EXPORT_SYM(__chpe_metadata)
  .4byte 1 /* Version */
  .4byte __hybrid_code_map@IMGREL /* CodeMap */
  .4byte __hybrid_code_map_count /* CodeMapCount */
  .4byte __x64_code_ranges_to_entry_points@IMGREL /* CodeRangesToEntryPoints */
  .4byte __arm64x_redirection_metadata@IMGREL /* RedirectionMetadata */
  .4byte __os_arm64x_dispatch_call_no_redirect@IMGREL /* __os_arm64x_dispatch_call_no_redirect */
  .4byte __os_arm64x_dispatch_ret@IMGREL /* __os_arm64x_dispatch_ret */
  .4byte __os_arm64x_check_call@IMGREL /* __os_arm64x_dispatch_call */
  .4byte __os_arm64x_check_icall@IMGREL /* __os_arm64x_dispatch_icall */
  .4byte __os_arm64x_check_icall_cfg@IMGREL /* __os_arm64x_dispatch_icall_cfg */
  .4byte __arm64x_native_entrypoint@IMGREL /* AlternateEntryPoint */
  .4byte __hybrid_auxiliary_iat@IMGREL /* AuxiliaryIAT */
  .4byte __x64_code_ranges_to_entry_points_count /* CodeRangesToEntryPointsCount */
  .4byte __arm64x_redirection_metadata_count /* RedirectionMetadataCount */
  .4byte __os_arm64x_get_x64_information@IMGREL /* GetX64InformationFunctionPointer */
  .4byte __os_arm64x_set_x64_information@IMGREL /* SetX64InformationFunctionPointer */
  .4byte __arm64x_extra_rfe_table@IMGREL /* ExtraRFETable */
  .4byte __arm64x_extra_rfe_table_size /* ExtraRFETableSize */
  .4byte __os_arm64x_x64_jump@IMGREL /* __os_arm64x_dispatch_fptr */
  .4byte __hybrid_auxiliary_iat_copy@IMGREL /* AuxiliaryIATCopy */
  /* The following members are undocumented */
  .4byte __os_arm64x_helper0@IMGREL
  .4byte __os_arm64x_helper1@IMGREL
  .4byte __os_arm64x_helper2@IMGREL
  .4byte __os_arm64x_helper3@IMGREL
  .4byte __os_arm64x_helper4@IMGREL
  .4byte __os_arm64x_helper5@IMGREL
  .4byte __os_arm64x_helper6@IMGREL
  .4byte __os_arm64x_helper7@IMGREL
  .4byte __os_arm64x_helper8@IMGREL
#endif

.section  .rdata,"dr"
.globl  SYM(_load_config_used)
.balign ALIGN
SYM(_load_config_used):
  .4byte  SYM(_load_config_used__end) - SYM(_load_config_used) /* Size */
  .4byte  0 /* TimeDateStamp */
  .2byte  0 /* MajorVersion */
  .2byte  0 /* MinorVersion */
  .4byte  0 /* GLobalFlagsClear */
  .4byte  0 /* GlobalFlagsSet */
  .4byte  0 /* CriticalSectionDefaultTimeout */
  PTR  0 /* DeCommitFreeBlockThreshold */
  PTR  0 /* DeCommitTotalFreeThreshold */
  PTR  0 /* LockPrefixTable */
  PTR  0 /* MaximumAllocationSize */
  PTR  0 /* VirtualMemoryThreshold */
  PTR  0 /* ProcessAffinityMask */
  .4byte  0 /* ProcessHeapFlags */
  .2byte  0 /* CSDVersion */
  .2byte  0 /* DependentLoadFlags */
  PTR  0 /* EditList */
  PTR  0 /* SecurityCookie */
  PTR  0 /* SEHandlerTable */
  PTR  0 /* SEHandlerCount */
  PTR  SYM(__guard_check_icall_fptr) /* GuardCFCheckFunction */
  PTR  0 /* GuardCFCheckDispatch */
  PTR  SYM(__guard_fids_table) /* GuardCFFunctionTable */
  PTR  SYM(__guard_fids_count) /* GuardCFFunctionCount */
  .4byte  SYM(__guard_flags) /* GuardFlags */
  .2byte  0 /* CodeIntegrity_Flags */
  .2byte  0 /* CodeIntegrity_Catalog */
  .4byte  0 /* CodeIntegrity_CatalogOffset */
  .4byte  0 /* CodeIntegrity_Reserved */
  PTR  SYM(__guard_iat_table) /* GuardAddressTakenIatEntryTable */
  PTR  SYM(__guard_iat_count) /* GuardAddressTakenIatEntryCount */
  PTR  SYM(__guard_longjmp_table) /* GuardLongJumpTargetTable */
  PTR  SYM(__guard_longjmp_count) /* GuardLongJumpTargetCount */
  PTR  0 /* DynamicValueRelocTable */
#ifdef __arm64ec__
  PTR  SYM(__chpe_metadata) /* CHPEMetadataPointer */
#endif
  PTR  0 /* GuardRFFailureRoutine */
  PTR  0 /* GuardRFFailureRoutineFunctionPointer */
  .4byte  0 /* DynamicValueRelocTableOffset */
  .2byte  0 /* DynamicValueRelocTableSection */
  .2byte  0 /* Reserved2 */
  PTR  0 /* GuardRFVerifyStackPointerFunctionPointer */
  .4byte  0 /* HotPatchTableOffset */
  .4byte  0 /* Reserved3 */
  PTR  0 /* EnclaveConfigurationPointer */
  PTR  0 /* VolatileMetadataPointer */
  PTR SYM(__guard_eh_cont_table) /* GuardEHContinuationTable */
  PTR SYM(__guard_eh_cont_count) /* GuardEHContinuationCount */
  PTR  0 /* GuardXFGCheckFunctionPointer */
  PTR  0 /* GuardXFGDispatchFunctionPointer */
  PTR  0 /* GuardXFGTableDispatchFunctionPointer */
  PTR  0 /* CastGuardOsDeterminedFailureMode */

SYM(_load_config_used__end):


================================================
FILE: Source/Windows/Common/Logging.cpp
================================================
// SPDX-License-Identifier: MIT
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/LogManager.h>

#include <cstdio>
#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <winnt.h>

namespace {
void (*WineDbgOut)(const char* Message);
FILE* LogFile;

static void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
  const auto Output = fextl::fmt::format("{} {:X} {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message);
  if (WineDbgOut) {
    WineDbgOut(Output.c_str());
  } else if (LogFile) {
    fwrite(Output.c_str(), 1, Output.size(), LogFile);
  }
}

static void AssertHandler(const char* Message) {
  const auto Output = fextl::fmt::format("A {}\n", Message);
  if (WineDbgOut) {
    WineDbgOut(Output.c_str());
  } else if (LogFile) {
    fwrite(Output.c_str(), 1, Output.size(), LogFile);
  }
}
} // namespace

namespace FEX::Windows::Logging {
void Init() {
  FEX_CONFIG_OPT(SilentLog, SILENTLOG);
  if (SilentLog()) {
    return;
  }

  WineDbgOut = reinterpret_cast<decltype(WineDbgOut)>(GetProcAddress(GetModuleHandleA("ntdll.dll"), "__wine_dbg_output"));
  if (!WineDbgOut) {
    const auto Path = fextl::fmt::format("{}\\fex-{}.log", getenv("LOCALAPPDATA"), GetCurrentProcessId());
    LogFile = fopen(Path.c_str(), "a");
  }
  LogMan::Throw::InstallHandler(AssertHandler);
  LogMan::Msg::InstallHandler(MsgHandler);
}
} // namespace FEX::Windows::Logging


================================================
FILE: Source/Windows/Common/Logging.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

namespace FEX::Windows::Logging {
void Init();
} // namespace FEX::Windows::Logging


================================================
FILE: Source/Windows/Common/Module.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <array>
#include <FEXCore/fextl/string.h>
#include <winternl.h>

namespace FEX::Windows {
inline fextl::string GetExecutableFilePath() {
  std::array<WCHAR, PATH_MAX> Buf;
  UNICODE_STRING PathW {.Length = 0, .MaximumLength = Buf.size() * sizeof(WCHAR), .Buffer = Buf.data()};

  if (LdrGetDllFullName(nullptr, &PathW)) {
    return {};
  }

  STRING PathA;
  RtlUnicodeStringToAnsiString(&PathA, &PathW, TRUE);
  fextl::string Path(PathA.Buffer);
  RtlFreeAnsiString(&PathA);

  return Path;
}

inline fextl::string GetSectionFilePath(uint64_t Address) {
  struct {
    MEMORY_SECTION_NAME Info;
    std::array<WCHAR, PATH_MAX> PathW;
  } Buffer;

  if (NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast<void*>(Address), MemoryMappedFilenameInformation, &Buffer, sizeof(Buffer), NULL)) {
    return {};
  }

  STRING PathA;
  RtlUnicodeStringToAnsiString(&PathA, &Buffer.Info.SectionFileName, TRUE);
  fextl::string Path(PathA.Buffer);
  RtlFreeAnsiString(&PathA);

  return Path;
}

inline std::string_view BaseName(std::string_view Path) {
  return Path.substr(Path.find_last_of('\\') + 1);
}
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/OvercommitTracker.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Utils/IntervalList.h>
#include <thread>
#include <shared_mutex>


namespace FEX::Windows {
/**
 * @brief Emulates memory overcommit of reserved regions with exceptions
 */
class OvercommitTracker {
private:
  bool IsWine;
  FEXCore::IntervalList<uint64_t> OvercommitIntervals;
  std::shared_mutex OvercommitIntervalsMutex;

public:
  OvercommitTracker(bool IsWine)
    : IsWine {IsWine} {}

  void MarkRange(uint64_t Start, uint64_t Length) {
    std::unique_lock Lock {OvercommitIntervalsMutex};
    OvercommitIntervals.Insert({Start, Start + Length});
  }

  void UnmarkRange(uint64_t Start, uint64_t Length) {
    std::unique_lock Lock {OvercommitIntervalsMutex};
    OvercommitIntervals.Remove({Start, Start + Length});
  }

  bool HandleAccessViolation(uint64_t FaultAddress) {
    std::shared_lock Lock {OvercommitIntervalsMutex};
    auto Query = OvercommitIntervals.Query(FaultAddress);

    if (Query.Enclosed) {
      if (IsWine) {
        MEMORY_BASIC_INFORMATION Info;
        NtQueryVirtualMemory(NtCurrentProcess(), reinterpret_cast<void*>(FaultAddress), MemoryBasicInformation, &Info, sizeof(Info), nullptr);
        const auto CommitSize = reinterpret_cast<SIZE_T>(Info.BaseAddress) + Info.RegionSize - reinterpret_cast<SIZE_T>(Info.AllocationBase);
        VirtualAlloc(reinterpret_cast<void*>(Info.AllocationBase), CommitSize, MEM_COMMIT, PAGE_READWRITE);
      } else {
        static constexpr size_t MaxFaultCommitSize = 1024 * 64;
        const auto AlignedFaultAddress = reinterpret_cast<void*>(FaultAddress & FEXCore::Utils::FEX_PAGE_MASK);
        VirtualAlloc(AlignedFaultAddress, std::min(Query.Size, MaxFaultCommitSize), MEM_COMMIT, PAGE_READWRITE);
      }
      return true;
    }
    return false;
  }
};
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/PortabilityInfo.h
================================================
// SPDX-License-Identifier: MIT
#pragma once
#include "Common/Config.h"

namespace FEX {
static inline FEX::Config::PortableInformation ReadPortabilityInformation() {
  const FEX::Config::PortableInformation BadResult {false, {}};
  const char* PortableConfig = getenv("FEX_PORTABLE");
  if (!PortableConfig || strtol(PortableConfig, nullptr, 0) == 0) {
    return BadResult;
  }

  return {true, getenv("LOCALAPPDATA")};
}
} // namespace FEX


================================================
FILE: Source/Windows/Common/Priv.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <exception>
#include <winternl.h>

static inline __TEB* GetCurrentTEB() {
  return reinterpret_cast<__TEB*>(NtCurrentTeb());
}

static inline __PEB* GetCurrentPEB() {
  return GetCurrentTEB()->Peb;
}

static inline bool WinAPIReturn(NTSTATUS Status) {
  if (!Status) {
    return true;
  }
  GetCurrentTEB()->LastErrorValue = RtlNtStatusToDosError(Status);
  return false;
}

static inline UNICODE_STRING InitUnicodeString(const wchar_t* String) {
  UNICODE_STRING StringDesc;
  RtlInitUnicodeString(&StringDesc, String);
  return StringDesc;
}

static inline STRING InitAnsiString(const char* String) {
  STRING StringDesc;
  RtlInitAnsiString(&StringDesc, String);
  return StringDesc;
}

class ScopedUnicodeString {
private:
  UNICODE_STRING Str {};
public:
  ScopedUnicodeString() = default;

  ScopedUnicodeString(const char* AStr) {
    RtlCreateUnicodeStringFromAsciiz(&Str, AStr);
  }

  ~ScopedUnicodeString() {
    RtlFreeUnicodeString(&Str);
  }

  UNICODE_STRING* operator->() {
    return &Str;
  }

  UNICODE_STRING& operator*() {
    return Str;
  }
};


#define UNIMPLEMENTED()                        \
  do {                                         \
    NtTerminateProcess(NtCurrentProcess(), 0); \
    __fastfail(0);                             \
  } while (0)

#define DLLEXPORT_FUNC(Ret, Name, Args) \
  extern "C" Ret Name Args;             \
  Ret(*__imp_##Name) Args = Name;       \
  Ret(*__imp_aux_##Name) Args = Name;   \
  Ret Name Args


================================================
FILE: Source/Windows/Common/SHMStats.cpp
================================================
// SPDX-License-Identifier: MIT
#include "Windows/Common/SHMStats.h"

#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/TypeDefines.h>

#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <winnt.h>
#include <wine/debug.h>

namespace FEX::Windows {
__attribute__((naked)) uint64_t linux_getpid() {
  asm volatile(R"(
  mov x8, 172;
  svc #0;
  ret;
  )" ::
                 : "r0", "r8");
}

uint32_t StatAlloc::FrontendAllocateSlots(uint32_t NewSize) {
  if (CurrentSize == MAX_STATS_SIZE || !UsingNTQueryPath) {
    LogMan::Msg::DFmt("Ran out of slots. Can't allocate more");
    return CurrentSize;
  }

  MEMORY_FEX_STATS_SHM_INFORMATION Info {
    .shm_base = nullptr,
    .map_size = std::min(CurrentSize * 2, MAX_STATS_SIZE),
    .max_size = MAX_STATS_SIZE,
  };
  size_t Length {};
  auto Result = NtQueryVirtualMemory(NtCurrentProcess(), nullptr, MemoryFexStatsShm, &Info, sizeof(Info), &Length);
  if (!Result) {
    CurrentSize = Info.map_size;
  }

  return CurrentSize;
}

StatAlloc::StatAlloc(FEXCore::SHMStats::AppType AppType) {
  // Try wine+fex magic path.

  {
    MEMORY_FEX_STATS_SHM_INFORMATION Info {
      .shm_base = nullptr,
      .map_size = FEXCore::Utils::FEX_PAGE_SIZE,
      .max_size = MAX_STATS_SIZE,
    };
    size_t Length {};
    auto Result = NtQueryVirtualMemory(NtCurrentProcess(), nullptr, MemoryFexStatsShm, &Info, sizeof(Info), &Length);
    if (!Result) {
      UsingNTQueryPath = true;
      CurrentSize = Info.map_size;
      Base = Info.shm_base;
      SaveHeader(AppType);
      return;
    }
  }
  CurrentSize = MAX_STATS_SIZE;

  auto handle = CreateFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str(), GENERIC_READ | GENERIC_WRITE,
                           FILE_SHARE_READ, nullptr, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, nullptr);

  // Create the section mapping for the file handle for the full size.
  HANDLE SectionMapping;
  LARGE_INTEGER SectionSize {{MAX_STATS_SIZE}};
  auto Result = NtCreateSection(&SectionMapping, SECTION_EXTEND_SIZE | SECTION_MAP_READ | SECTION_MAP_WRITE, nullptr, &SectionSize,
                                PAGE_READWRITE, SEC_COMMIT, handle);
  if (Result != 0) {
    CloseHandle(handle);
    return;
  }

  // Section mapping is used from now on.
  CloseHandle(handle);

  // Now actually map the view of the section.
  Base = 0;
  size_t FullSize = MAX_STATS_SIZE;
  Result = NtMapViewOfSection(SectionMapping, NtCurrentProcess(), &Base, 0, 0, nullptr, &FullSize, ViewUnmap, MEM_RESERVE | MEM_TOP_DOWN,
                              PAGE_READWRITE);
  if (Result != 0) {
    CloseHandle(SectionMapping);
    return;
  }

  // Once WINE supports NtExtendSection and SECTION_EXTEND_SIZE correctly then we can map/commit a single page, map the full MAX_STATS_SIZE
  // view as reserved, and extend the view using NtExtendSection.
  SaveHeader(AppType);
}
StatAlloc::~StatAlloc() {
  DeleteFile(fextl::fmt::format("/dev/shm/fex-{}-stats", linux_getpid()).c_str());
}

} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/SHMStats.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include "Common/SHMStats.h"

namespace FEX::Windows {
class StatAlloc final : public FEX::SHMStats::StatAllocBase {
public:
  StatAlloc(FEXCore::SHMStats::AppType AppType);
  virtual ~StatAlloc();

  FEXCore::SHMStats::ThreadStats* AllocateSlot(uint32_t TID) {
    return StatAllocBase::AllocateSlot(TID);
  }

  void DeallocateSlot(FEXCore::SHMStats::ThreadStats* AllocatedSlot) {
    if (!AllocatedSlot) {
      return;
    }

    StatAllocBase::DeallocateSlot(AllocatedSlot);
  }

private:
  uint32_t FrontendAllocateSlots(uint32_t NewSize) override;
  bool UsingNTQueryPath {};
};

} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/TSOHandlerConfig.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <FEXCore/Core/Context.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>

namespace FEX::Windows {
class TSOHandlerConfig final {
public:
  TSOHandlerConfig(FEXCore::Context::Context& CTX) {
    if (HalfBarrierTSOEnabled()) {
      UnalignedHandlerType = FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::HalfBarrier;
    } else {
      UnalignedHandlerType = FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::NonAtomic;
    }

    if (TSOEnabled()) {
      BOOL Enable = TRUE;
      NTSTATUS Status = NtSetInformationProcess(NtCurrentProcess(), ProcessFexHardwareTso, &Enable, sizeof(Enable));
      if (Status == STATUS_SUCCESS) {
        CTX.SetHardwareTSOSupport(true);
      }
    }

    uint64_t Flags = (StrictInProcessSplitLocks() ? FEX_UNALIGN_ATOMIC_STRICT_SPLIT_LOCKS : 0) |
                     (KernelUnalignedAtomicBackpatching() ? FEX_UNALIGN_ATOMIC_BACKPATCH : 0) | FEX_UNALIGN_ATOMIC_EMULATE;

    if (NtSetInformationProcess(NtCurrentProcess(), ProcessFexUnalignAtomic, &Flags, sizeof(Flags)) == STATUS_SUCCESS) {
      LogMan::Msg::IFmt("FEX: Kernel unaligned atomics enabled!");
    }
  }

  FEXCore::ArchHelpers::Arm64::UnalignedHandlerType GetUnalignedHandlerType() const {
    return UnalignedHandlerType;
  }

private:
  FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
  FEX_CONFIG_OPT(HalfBarrierTSOEnabled, HALFBARRIERTSOENABLED);
  FEX_CONFIG_OPT(StrictInProcessSplitLocks, STRICTINPROCESSSPLITLOCKS);
  FEX_CONFIG_OPT(KernelUnalignedAtomicBackpatching, KERNELUNALIGNEDATOMICBACKPATCHING);

  FEXCore::ArchHelpers::Arm64::UnalignedHandlerType UnalignedHandlerType {FEXCore::ArchHelpers::Arm64::UnalignedHandlerType::HalfBarrier};
};
} // namespace FEX::Windows


================================================
FILE: Source/Windows/Common/WinAPI/Alloc.cpp
================================================
// SPDX-License-Identifier: MIT
#define NTDDI_VERSION 0x0A000005
#define WINAPI
#define WINBASEAPI

#include <cstdlib>
#include <cstdio>
#include <cstdint>
#include <cerrno>
#include <winternl.h>
#include <windows.h>
#include <processenv.h>
#include "../Priv.h"

#ifndef _M_ARM64EC
namespace {
SYSTEM_BASIC_INFORMATION BasicInfo;

void InitBasicInfo() {
  NtQuerySystemInformation(SystemEmulationBasicInformation, &BasicInfo, sizeof(BasicInfo), nullptr);
}

__attribute__((used, section(".CRT$FEXH"))) void (*_InitBasicInfo)(void) = InitBasicInfo;

MEM_ADDRESS_REQUIREMENTS MakeWOW64AddressReqs() {
  MEM_ADDRESS_REQUIREMENTS Reqs {};
  Reqs.LowestStartingAddress = reinterpret_cast<void*>(BasicInfo.HighestUserAddress & ~(BasicInfo.AllocationGranularity - 1));
  return Reqs;
}
} // namespace
#endif

DLLEXPORT_FUNC(void*, VirtualAlloc, (void* lpAddress, SIZE_T dwSize, DWORD flAllocationType, DWORD flProtect)) {
  NTSTATUS Status;
#ifndef _M_ARM64EC
  if (!lpAddress) {
    // Add address requirements for WOW64 to limit allocations to outside the 32-bit user address space
    MEM_EXTENDED_PARAMETER ExtParam {};
    MEM_ADDRESS_REQUIREMENTS AddrReq = MakeWOW64AddressReqs();

    ExtParam.Type = MemExtendedParameterAddressRequirements;
    ExtParam.Pointer = &AddrReq;

    Status = NtAllocateVirtualMemoryEx(NtCurrentProcess(), &lpAddress, &dwSize, flAllocationType, flProtect, &ExtParam, 1);
  } else {
#endif
    Status = NtAllocateVirtualMemory(NtCurrentProcess(), &lpAddress, 0, &dwSize, flAllocationType, flProtect);
#ifndef _M_ARM64EC
  }
#endif
  if (Status) {
    SetLastError(RtlNtStatusToDosError(Status));
    return nullptr;
  }
  return lpAddress;
}

DLLEXPORT_FUNC(SIZE_T, VirtualQuery, (LPCVOID lpAddress, PMEMORY_BASIC_INFORMATION lpBuffer, SIZE_T dwLength)) {
  SIZE_T WrittenSize;
  NTSTATUS Status =
    NtQueryVirtualMemory(NtCurrentProcess(), lpAddress, MemoryBasicInformation, reinterpret_cast<void*>(lpBuffer), dwLength, &WrittenSize);
  if (Status) {
    SetLastError(RtlNtStatusToDosError(Status));
    return 0;
  }
  return WrittenSize;
}

DLLEXPORT_FUNC(WINBOOL, VirtualProtect, (void* lpAddress, SIZE_T dwSize, DWORD flNewProtect, PDWORD lpflOldProtect)) {
  return WinAPIReturn(NtProtectVirtualMemory(NtCurrentProcess(), &lpAddress, &dwSize, flNewProtect, lpflOldProtect));
}

DLLEXPORT_FUNC(void*, VirtualAlloc2,
               (HANDLE Process, void* BaseAddress, SIZE_T Size, ULONG AllocationType, ULONG PageProtection,
                MEM_EXTENDED_PARAMETER* ExtendedParameters, ULONG ParameterCount)) {
  NTSTATUS Status;
#ifndef _M_ARM64EC
  if (!BaseAddress) {
    // Add address requirements for WOW64 to limit allocations to outside the 32-bit user address space
    auto* NewExtParams = reinterpret_cast<MEM_EXTENDED_PARAMETER*>(alloca((ParameterCount + 1) * sizeof(MEM_EXTENDED_PARAMETER)));
    if (ExtendedParameters && ParameterCount > 0) {
      memcpy(NewExtParams, ExtendedParameters, ParameterCount * sizeof(MEM_EXTENDED_PARAMETER));
    }

    MEM_ADDRESS_REQUIREMENTS AddrReq = MakeWOW64AddressReqs();

    NewExtParams[ParameterCount].Type = MemExtendedParameterAddressRequirements;
    NewExtParams[ParameterCount].Pointer = &AddrReq;

    Status = NtAllocateVirtualMemoryEx(Process ? Process : NtCurrentProcess(), &BaseAddress, &Size, AllocationType, PageProtection,
                                       NewExtParams, ParameterCount + 1);
  } else {
#endif
    Status = NtAllocateVirtualMemoryEx(Process ? Process : NtCurrentProcess(), &BaseAddress, &Size, AllocationType, PageProtection,
                                       ExtendedParameters, ParameterCount);
#ifndef _M_ARM64EC
  }
#endif
  if (Status) {
    SetLastError(RtlNtStatusToDosError(Status));
    return nullptr;
  }
  return BaseAddress;
}

DLLEXPORT_FUNC(WINBOOL, VirtualFree, (void* lpAddress, SIZE_T dwSize, DWORD dwFreeType)) {
  return WinAPIReturn(NtFreeVirtualMemory(NtCurrentProcess(), &lpAddress, &dwSize, dwFreeType));
}

DLLEXPORT_FUNC(WINBOOL, FlushInstructionCache, (HANDLE hProcess, const void* lpBaseAddress, SIZE_T dwSize)) {
  return WinAPIReturn(NtFlushInstructionCache(hProcess, const_cast<void*>(lpBaseAddress), dwSize));
}

DLLEXPORT_FUNC(DWORD, FlsAlloc, (PFLS_CALLBACK_FUNCTION lpCallback)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(void*, FlsGetValue, (DWORD dwFlsIndex)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, FlsSetValue, (DWORD dwFlsIndex, void* lpFlsData)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, FlsFree, (DWORD dwFlsIndex)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(HLOCAL, LocalFree, (HLOCAL hMem)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, TlsAlloc, ()) {
  RtlAcquirePebLock();

  // Cannot use expansion slots or FLS here, as they would be freed before FEX can cleanup.
  DWORD Slot = RtlFindClearBitsAndSet(GetCurrentPEB()->TlsBitmap, 1, 1);
  if (Slot != -1) {
    GetCurrentTEB()->TlsSlots[Slot] = nullptr;
  }
  RtlReleasePebLock();
  return Slot;
}

DLLEXPORT_FUNC(void*, TlsGetValue, (DWORD dwTlsIndex)) {
  return GetCurrentTEB()->TlsSlots[dwTlsIndex];
}

DLLEXPORT_FUNC(WINBOOL, TlsSetValue, (DWORD dwTlsIndex, void* lpTlsValue)) {
  GetCurrentTEB()->TlsSlots[dwTlsIndex] = lpTlsValue;
  return true;
}

DLLEXPORT_FUNC(WINBOOL, TlsFree, (DWORD dwTlsIndex)) {
  RtlAcquirePebLock();

  RtlClearBits(GetCurrentPEB()->TlsBitmap, dwTlsIndex, 1);
  NTSTATUS Status = NtSetInformationThread(NtCurrentThread(), ThreadZeroTlsCell, &dwTlsIndex, sizeof(dwTlsIndex));
  RtlReleasePebLock();
  return WinAPIReturn(Status);
}


================================================
FILE: Source/Windows/Common/WinAPI/CMakeLists.txt
================================================
target_sources(CommonWindowsRuntime PRIVATE Alloc.cpp Sync.cpp IO.cpp Misc.cpp)


================================================
FILE: Source/Windows/Common/WinAPI/IO.cpp
================================================
// SPDX-License-Identifier: MIT
#define NTDDI_VERSION 0x0A000005
#define WINAPI
#define WINBASEAPI

#include <cstdlib>
#include <cstdio>
#include <cstdint>
#include <cerrno>
#include <winternl.h>
#include <windows.h>
#include <processenv.h>
#include <wine/debug.h>
#include "../Priv.h"

namespace {
ULONG CreateDispositionToNT(DWORD Disposition) {
  switch (Disposition) {
  case CREATE_ALWAYS: return FILE_OVERWRITE_IF;
  case CREATE_NEW: return FILE_CREATE;
  case TRUNCATE_EXISTING: return FILE_OVERWRITE;
  case OPEN_ALWAYS: return FILE_OPEN_IF;
  case OPEN_EXISTING: return FILE_OPEN;
  default: UNIMPLEMENTED();
  }
}

ULONG OpenFlagsToNT(DWORD Flags) {
  ULONG NTFlags = 0;
  NTFlags |= (Flags & FILE_FLAG_BACKUP_SEMANTICS) ? FILE_OPEN_FOR_BACKUP_INTENT : 0;
  NTFlags |= (Flags & FILE_FLAG_DELETE_ON_CLOSE) ? FILE_DELETE_ON_CLOSE : 0;
  NTFlags |= (Flags & FILE_FLAG_NO_BUFFERING) ? FILE_NO_INTERMEDIATE_BUFFERING : 0;
  NTFlags |= (Flags & FILE_FLAG_RANDOM_ACCESS) ? FILE_RANDOM_ACCESS : 0;
  NTFlags |= (Flags & FILE_FLAG_SEQUENTIAL_SCAN) ? FILE_SEQUENTIAL_ONLY : 0;
  NTFlags |= (Flags & FILE_FLAG_WRITE_THROUGH) ? FILE_WRITE_THROUGH : 0;
  return NTFlags;
}

FILE_INFORMATION_CLASS FileInfoClassToNT(FILE_INFO_BY_HANDLE_CLASS InformationClass) {
  switch (InformationClass) {
  case FileBasicInfo: return FileBasicInformation;
  case FileStandardInfo: return FileStandardInformation;
  default: UNIMPLEMENTED();
  }
}
} // namespace

DLLEXPORT_FUNC(BOOL, DeleteFileA, (LPCSTR lpFileName)) {
  ScopedUnicodeString FileName {lpFileName};
  return DeleteFileW(FileName->Buffer);
}

DLLEXPORT_FUNC(BOOL, DeleteFileW, (LPCWSTR lpFileName)) {
  UNICODE_STRING PathW;
  RtlInitUnicodeString(&PathW, lpFileName);

  ScopedUnicodeString NTPath;
  if (!RtlDosPathNameToNtPathName_U(PathW.Buffer, &*NTPath, nullptr, nullptr)) {
    SetLastError(ERROR_PATH_NOT_FOUND);
    return false;
  }

  OBJECT_ATTRIBUTES ObjAttributes;
  InitializeObjectAttributes(&ObjAttributes, &*NTPath, OBJ_CASE_INSENSITIVE, nullptr, nullptr);

  HANDLE Handle;
  IO_STATUS_BLOCK IOSB;

  NTSTATUS Status =
    NtCreateFile(&Handle, SYNCHRONIZE | DELETE, &ObjAttributes, &IOSB, nullptr, 0, FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
                 FILE_OPEN, FILE_DELETE_ON_CLOSE | FILE_NON_DIRECTORY_FILE, nullptr, 0);
  if (WinAPIReturn(Status)) {
    Status = NtClose(Handle);
  }

  return WinAPIReturn(Status);
}

DLLEXPORT_FUNC(HANDLE, CreateFileA,
               (LPCSTR lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes,
                DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile)) {

  ScopedUnicodeString FileName {lpFileName};
  return CreateFileW(FileName->Buffer, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes,
                     hTemplateFile);
}

DLLEXPORT_FUNC(HANDLE, CreateFileW,
               (LPCWSTR lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes,
                DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile)) {
  UNICODE_STRING PathW;
  RtlInitUnicodeString(&PathW, lpFileName);

  ScopedUnicodeString NTPath;
  if (!RtlDosPathNameToNtPathName_U(PathW.Buffer, &*NTPath, nullptr, nullptr)) {
    SetLastError(ERROR_PATH_NOT_FOUND);
    return INVALID_HANDLE_VALUE;
  }

  OBJECT_ATTRIBUTES ObjAttributes;
  InitializeObjectAttributes(&ObjAttributes, &*NTPath, OBJ_CASE_INSENSITIVE, nullptr, nullptr);

  HANDLE Handle;
  IO_STATUS_BLOCK IOSB;
  NTSTATUS Status =
    NtCreateFile(&Handle, dwDesiredAccess | GENERIC_READ | SYNCHRONIZE, &ObjAttributes, &IOSB, nullptr, OpenFlagsToNT(dwFlagsAndAttributes),
                 dwShareMode, CreateDispositionToNT(dwCreationDisposition), FILE_SYNCHRONOUS_IO_NONALERT, nullptr, 0);
  return WinAPIReturn(Status) ? Handle : INVALID_HANDLE_VALUE;
}

DLLEXPORT_FUNC(WINBOOL, WriteFile,
               (HANDLE hFile, const void* lpBuffer, DWORD nNumberOfBytesToWrite, LPDWORD lpNumberOfBytesWritten, LPOVERLAPPED lpOverlapped)) {
  IO_STATUS_BLOCK IOSB;
  if (lpOverlapped) {
    UNIMPLEMENTED();
  }
  NTSTATUS Status = NtWriteFile(hFile, nullptr, nullptr, nullptr, &IOSB, lpBuffer, nNumberOfBytesToWrite, nullptr, nullptr);
  if (lpNumberOfBytesWritten) {
    *lpNumberOfBytesWritten = static_cast<DWORD>(IOSB.Information);
  }
  return WinAPIReturn(Status);
}

DLLEXPORT_FUNC(HANDLE, GetStdHandle, (DWORD nStdHandle)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, WriteConsoleA,
               (HANDLE hConsoleOutput, CONST void* lpBuffer, DWORD nNumberOfCharsToWrite, LPDWORD lpNumberOfCharsWritten, void* lpReserved)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, WriteConsoleW,
               (HANDLE hConsoleOutput, CONST void* lpBuffer, DWORD nNumberOfCharsToWrite, LPDWORD lpNumberOfCharsWritten, void* lpReserved)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetFilePointerEx, (HANDLE hFile, LARGE_INTEGER liDistanceToMove, PLARGE_INTEGER lpNewFilePointer, DWORD dwMoveMethod)) {
  IO_STATUS_BLOCK IOSB;
  FILE_POSITION_INFORMATION PositionInfo;
  if (NTSTATUS Status = NtQueryInformationFile(hFile, &IOSB, &PositionInfo, sizeof(PositionInfo), FilePositionInformation); Status) {
    return WinAPIReturn(Status);
  }
  FILE_STANDARD_INFORMATION StandardInfo;
  if (NTSTATUS Status = NtQueryInformationFile(hFile, &IOSB, &StandardInfo, sizeof(StandardInfo), FileStandardInformation); Status) {
    return WinAPIReturn(Status);
  }

  switch (dwMoveMethod) {
  case FILE_BEGIN: PositionInfo.CurrentByteOffset = liDistanceToMove; break;
  case FILE_CURRENT: PositionInfo.CurrentByteOffset.QuadPart += liDistanceToMove.QuadPart; break;
  case FILE_END: PositionInfo.CurrentByteOffset = StandardInfo.EndOfFile; break;
  default: UNIMPLEMENTED();
  }
  if (NTSTATUS Status = NtSetInformationFile(hFile, &IOSB, &PositionInfo, sizeof(PositionInfo), FilePositionInformation); Status) {
    return WinAPIReturn(Status);
  }
  if (lpNewFilePointer) {
    *lpNewFilePointer = PositionInfo.CurrentByteOffset;
  }
  return true;
}

DLLEXPORT_FUNC(WINBOOL, ReadFile,
               (HANDLE hFile, void* lpBuffer, DWORD nNumberOfBytesToRead, LPDWORD lpNumberOfBytesRead, LPOVERLAPPED lpOverlapped)) {
  IO_STATUS_BLOCK IOSB;
  if (lpOverlapped) {
    UNIMPLEMENTED();
  }
  NTSTATUS Status = NtReadFile(hFile, nullptr, nullptr, nullptr, &IOSB, lpBuffer, nNumberOfBytesToRead, nullptr, nullptr);
  if (lpNumberOfBytesRead) {
    *lpNumberOfBytesRead = static_cast<DWORD>(IOSB.Information);
  }
  return WinAPIReturn(Status);
}

DLLEXPORT_FUNC(WINBOOL, FlushFileBuffers, (HANDLE hFile)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetFinalPathNameByHandleA, (HANDLE hFile, LPSTR lpszFilePath, DWORD cchFilePath, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetFinalPathNameByHandleW, (HANDLE hFile, LPWSTR lpszFilePath, DWORD cchFilePath, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, CreateHardLinkA, (LPCSTR lpFileName, LPCSTR lpExistingFileName, LPSECURITY_ATTRIBUTES lpSecurityAttributes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, CreateHardLinkW, (LPCWSTR lpFileName, LPCWSTR lpExistingFileName, LPSECURITY_ATTRIBUTES lpSecurityAttributes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, CreateDirectoryA, (LPCSTR lpPathName, LPSECURITY_ATTRIBUTES lpSecurityAttributes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, CreateDirectoryW, (LPCWSTR lpPathName, LPSECURITY_ATTRIBUTES lpSecurityAttributes)) {
  UNICODE_STRING PathW;
  RtlInitUnicodeString(&PathW, lpPathName);

  ScopedUnicodeString NTPath;
  if (!RtlDosPathNameToNtPathName_U(PathW.Buffer, &*NTPath, nullptr, nullptr)) {
    SetLastError(ERROR_PATH_NOT_FOUND);
    return false;
  }

  OBJECT_ATTRIBUTES ObjAttributes;
  InitializeObjectAttributes(&ObjAttributes, &*NTPath, OBJ_CASE_INSENSITIVE, nullptr, nullptr);

  HANDLE Handle;
  IO_STATUS_BLOCK IOSB;
  NTSTATUS Status = NtCreateFile(&Handle, GENERIC_READ | SYNCHRONIZE, &ObjAttributes, &IOSB, nullptr, FILE_ATTRIBUTE_NORMAL,
                                 FILE_SHARE_READ, FILE_CREATE, FILE_DIRECTORY_FILE | FILE_SYNCHRONOUS_IO_NONALERT, nullptr, 0);
  return WinAPIReturn(Status);
}

DLLEXPORT_FUNC(WINBOOL, GetFileInformationByHandle, (HANDLE hFile, LPBY_HANDLE_FILE_INFORMATION lpFileInformation)) {
  FILE_BASIC_INFO BasicInfo;
  if (!GetFileInformationByHandleEx(hFile, FileBasicInfo, &BasicInfo, sizeof(BasicInfo))) {
    return false;
  }
  FILE_STANDARD_INFO StandardInfo;
  if (!GetFileInformationByHandleEx(hFile, FileStandardInfo, &StandardInfo, sizeof(StandardInfo))) {
    return false;
  }

  *lpFileInformation = BY_HANDLE_FILE_INFORMATION {
    .dwFileAttributes = BasicInfo.FileAttributes,
    .ftCreationTime = {static_cast<DWORD>(BasicInfo.CreationTime.LowPart), static_cast<DWORD>(BasicInfo.CreationTime.HighPart)},
    .ftLastAccessTime = {static_cast<DWORD>(BasicInfo.LastAccessTime.LowPart), static_cast<DWORD>(BasicInfo.LastAccessTime.HighPart)},
    .ftLastWriteTime = {static_cast<DWORD>(BasicInfo.LastWriteTime.LowPart), static_cast<DWORD>(BasicInfo.LastWriteTime.HighPart)},
    .dwVolumeSerialNumber = 0,
    .nFileSizeHigh = static_cast<DWORD>(StandardInfo.EndOfFile.HighPart),
    .nFileSizeLow = static_cast<DWORD>(StandardInfo.EndOfFile.LowPart),
    .nNumberOfLinks = StandardInfo.NumberOfLinks,
    .nFileIndexHigh = 0,
    .nFileIndexLow = 0};
  return true;
}

DLLEXPORT_FUNC(WINBOOL, SetFileInformationByHandle,
               (HANDLE hFile, FILE_INFO_BY_HANDLE_CLASS FileInformationClass, void* lpFileInformation, DWORD dwBufferSize)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetEndOfFile, (HANDLE hFile)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetFileAttributesA, (LPCSTR lpFileName)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetFileAttributesW, (LPCWSTR lpFileName)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetFileAttributesA, (LPCSTR lpFileName, DWORD dwFileAttributes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetFileAttributesW, (LPCWSTR lpFileName, DWORD dwFileAttributes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetFileTime,
               (HANDLE hFile, CONST FILETIME* lpCreationTime, CONST FILETIME* lpLastAccessTime, CONST FILETIME* lpLastWriteTime)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, MoveFileExA, (LPCSTR lpExistingFileName, LPCSTR lpNewFileName, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, MoveFileExW, (LPCWSTR lpExistingFileName, LPCWSTR lpNewFileName, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, GetDiskFreeSpaceExA,
               (LPCSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailableToCaller, PULARGE_INTEGER lpTotalNumberOfBytes,
                PULARGE_INTEGER lpTotalNumberOfFreeBytes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, GetDiskFreeSpaceExW,
               (LPCWSTR lpDirectoryName, PULARGE_INTEGER lpFreeBytesAvailableToCaller, PULARGE_INTEGER lpTotalNumberOfBytes,
                PULARGE_INTEGER lpTotalNumberOfFreeBytes)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetTempPathA, (DWORD nBufferLength, LPSTR lpBuffer)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetTempPathW, (DWORD nBufferLength, LPWSTR lpBuffer)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(BOOLEAN, CreateSymbolicLinkA, (LPCSTR lpSymlinkFileName, LPCSTR lpTargetFileName, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(BOOLEAN, CreateSymbolicLinkW, (LPCWSTR lpSymlinkFileName, LPCWSTR lpTargetFileName, DWORD dwFlags)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, AreFileApisANSI, ()) {
  return true;
}

DLLEXPORT_FUNC(WINBOOL, FindNextFileA, (HANDLE hFindFile, LPWIN32_FIND_DATAA lpFindFileData)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, FindNextFileW, (HANDLE hFindFile, LPWIN32_FIND_DATAW lpFindFileData)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, FindClose, (HANDLE hFindFile)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(HANDLE, FindFirstFileA, (LPCSTR lpFileName, LPWIN32_FIND_DATAA lpFindFileData)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(HANDLE, FindFirstFileW, (LPCWSTR lpFileName, LPWIN32_FIND_DATAW lpFindFileData)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, DeviceIoControl,
               (HANDLE hDevice, DWORD dwIoControlCode, void* lpInBuffer, DWORD nInBufferSize, void* lpOutBuffer, DWORD nOutBufferSize,
                LPDWORD lpBytesReturned, LPOVERLAPPED lpOverlapped)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, GetFileInformationByHandleEx,
               (HANDLE hFile, FILE_INFO_BY_HANDLE_CLASS FileInformationClass, void* lpFileInformation, DWORD dwBufferSize)) {
  IO_STATUS_BLOCK IOSB;
  return WinAPIReturn(NtQueryInformationFile(hFile, &IOSB, lpFileInformation, dwBufferSize, FileInfoClassToNT(FileInformationClass)));
}

DLLEXPORT_FUNC(void, GetSystemTimeAsFileTime, (LPFILETIME lpSystemTimeAsFileTime)) {
  LARGE_INTEGER Time;
  NtQuerySystemTime(&Time);
  lpSystemTimeAsFileTime->dwLowDateTime = Time.LowPart;
  lpSystemTimeAsFileTime->dwHighDateTime = Time.HighPart;
}

DLLEXPORT_FUNC(void, GetSystemTimePreciseAsFileTime, (LPFILETIME lpSystemTimeAsFileTime)) {
  GetSystemTimeAsFileTime(lpSystemTimeAsFileTime);
}

DLLEXPORT_FUNC(WINBOOL, SetCurrentDirectoryA, (LPCSTR lpPathName)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, SetCurrentDirectoryW, (LPCWSTR lpPathName)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetCurrentDirectoryA, (DWORD nBufferLength, LPSTR lpBuffer)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetCurrentDirectoryW, (DWORD nBufferLength, LPWSTR lpBuffer)) {
  return RtlGetCurrentDirectory_U(nBufferLength * sizeof(wchar_t), lpBuffer) / sizeof(wchar_t);
}


================================================
FILE: Source/Windows/Common/WinAPI/Misc.cpp
================================================
// SPDX-License-Identifier: MIT
#define NTDDI_VERSION 0x0A000005
#define WINAPI
#define WINBASEAPI

#include <cstdlib>
#include <cstdio>
#include <cstdint>
#include <cerrno>
#include <winternl.h>
#include <windows.h>
#include <processenv.h>
#include "../Priv.h"

DLLEXPORT_FUNC(HMODULE, GetModuleHandleA, (LPCSTR lpModuleName)) {
  ScopedUnicodeString ModuleName {lpModuleName};
  return GetModuleHandleW(ModuleName->Buffer);
}

DLLEXPORT_FUNC(HMODULE, GetModuleHandleW, (LPCWSTR lpModuleName)) {
  HMODULE Res = nullptr;
  UNICODE_STRING ModuleName = InitUnicodeString(lpModuleName);

  NTSTATUS Status = LdrGetDllHandle(nullptr, 0, &ModuleName, &Res);
  if (Status) {
    SetLastError(RtlNtStatusToDosError(Status));
    return nullptr;
  }
  return Res;
}

DLLEXPORT_FUNC(FARPROC, GetProcAddress, (HMODULE hModule, LPCSTR lpProcName)) {
  void* Res = nullptr;
  STRING ProcName = InitAnsiString(lpProcName);
  NTSTATUS Status = LdrGetProcedureAddress(hModule, &ProcName, 0, &Res);
  if (Status) {
    SetLastError(RtlNtStatusToDosError(Status));
    return nullptr;
  }
  return reinterpret_cast<FARPROC>(Res);
}

DLLEXPORT_FUNC(void, RaiseException, (DWORD dwExceptionCode, DWORD dwExceptionFlags, DWORD nNumberOfArguments, CONST ULONG_PTR* lpArguments)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(WINBOOL, CloseHandle, (HANDLE hObject)) {
  return WinAPIReturn(NtClose(hObject));
}

DLLEXPORT_FUNC(WINBOOL, QueryPerformanceCounter, (LARGE_INTEGER * lpPerformanceCount)) {
  return RtlQueryPerformanceCounter(lpPerformanceCount);
}

DLLEXPORT_FUNC(WINBOOL, QueryPerformanceFrequency, (LARGE_INTEGER * lpFrequency)) {
  return RtlQueryPerformanceFrequency(lpFrequency);
}

DLLEXPORT_FUNC(void, GetSystemInfo, (LPSYSTEM_INFO lpSystemInfo)) {
  SYSTEM_BASIC_INFORMATION Info;

  if (NtQuerySystemInformation(SystemBasicInformation, &Info, sizeof(Info), nullptr)) {
    return;
  }

  *lpSystemInfo = SYSTEM_INFO {
    .wProcessorArchitecture = PROCESSOR_ARCHITECTURE_ARM64,
    .dwPageSize = Info.PhysicalPageSize,
    .lpMinimumApplicationAddress = reinterpret_cast<void*>(Info.LowestUserAddress),
    .lpMaximumApplicationAddress = reinterpret_cast<void*>(Info.HighestUserAddress),
    .dwActiveProcessorMask = Info.ActiveProcessors,
    .dwNumberOfProcessors = static_cast<BYTE>(Info.NumberOfProcessors),
    .dwProcessorType = 0,
    .dwAllocationGranularity = Info.AllocationGranularity,
    .wProcessorLevel = 0,
    .wProcessorRevision = 0,
  };
}

DLLEXPORT_FUNC(int, MultiByteToWideChar,
               (UINT CodePage, DWORD dwFlags, LPCCH lpMultiByteStr, int cbMultiByte, LPWSTR lpWideCharStr, int cchWideChar)) {
  DWORD Size = (cbMultiByte == -1) ? (strlen(lpMultiByteStr) + 1) : cbMultiByte;
  DWORD Res;
  if (!cchWideChar) {
    RtlMultiByteToUnicodeSize(&Res, lpMultiByteStr, Size);
  } else {
    RtlMultiByteToUnicodeN(lpWideCharStr, cchWideChar * sizeof(wchar_t), &Res, lpMultiByteStr, Size);
  }
  return static_cast<int>(Res / sizeof(wchar_t));
}

DLLEXPORT_FUNC(int, WideCharToMultiByte,
               (UINT CodePage, DWORD dwFlags, LPCWCH lpWideCharStr, int cchWideChar, LPSTR lpMultiByteStr, int cbMultiByte,
                LPCCH lpDefaultChar, LPBOOL lpUsedDefaultChar)) {
  DWORD SizeW = ((cchWideChar == -1) ? (wcslen(lpWideCharStr) + 1) : cchWideChar) * sizeof(wchar_t);
  DWORD Res;
  if (!cbMultiByte) {
    RtlUnicodeToMultiByteSize(&Res, const_cast<wchar_t*>(lpWideCharStr), SizeW);
  } else {
    RtlUnicodeToMultiByteN(lpMultiByteStr, cbMultiByte, &Res, lpWideCharStr, SizeW);
  }
  if (lpUsedDefaultChar) {
    *lpUsedDefaultChar = false;
  }
  return static_cast<int>(Res);
}

DLLEXPORT_FUNC(DWORD, FormatMessageA,
               (DWORD dwFlags, const void* lpSource, DWORD dwMessageId, DWORD dwLanguageId, LPSTR lpBuffer, DWORD nSize, va_list* Arguments)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, FormatMessageW,
               (DWORD dwFlags, const void* lpSource, DWORD dwMessageId, DWORD dwLanguageId, LPWSTR lpBuffer, DWORD nSize, va_list* Arguments)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(DWORD, GetLastError, ()) {
  return GetCurrentTEB()->LastErrorValue;
}

DLLEXPORT_FUNC(void, SetLastError, (DWORD dwErrCode)) {
  GetCurrentTEB()->LastErrorValue = dwErrCode;
}

DLLEXPORT_FUNC(LONG, RegOpenKeyExA, (HKEY hKey, LPCSTR lpSubKey, DWORD ulOptions, REGSAM samDesired, PHKEY phkResult)) {
  if (hKey != HKEY_LOCAL_MACHINE) {
    UNIMPLEMENTED();
  }

  ScopedUnicodeString RootKey {"\\Registry\\Machine"};
  OBJECT_ATTRIBUTES ObjAttributes;
  InitializeObjectAttributes(&ObjAttributes, &*RootKey, OBJ_CASE_INSENSITIVE, nullptr, nullptr);
  HKEY HKLM;
  NTSTATUS Status = NtOpenKeyEx(reinterpret_cast<HANDLE*>(&HKLM), MAXIMUM_ALLOWED, &ObjAttributes, 0);
  if (Status) {
    return RtlNtStatusToDosError(Status);
  }

  ScopedUnicodeString SubKey {lpSubKey};
  InitializeObjectAttributes(&ObjAttributes, &*SubKey, OBJ_CASE_INSENSITIVE, reinterpret_cast<HANDLE>(HKLM), nullptr);
  Status = NtOpenKeyEx(reinterpret_cast<HANDLE*>(phkResult), samDesired, &ObjAttributes, ulOptions);
  NtClose(HKLM);
  return RtlNtStatusToDosError(Status);
}

DLLEXPORT_FUNC(LONG, RegGetValueA, (HKEY hKey, LPCSTR lpSubKey, LPCSTR lpValue, DWORD dwFlags, LPDWORD pdwType, PVOID pvData, LPDWORD pcbData)) {
  if (lpSubKey || dwFlags) {
    UNIMPLEMENTED();
  }

  ScopedUnicodeString ValueName {lpValue};

  union {
    KEY_VALUE_PARTIAL_INFORMATION Info;
    uint8_t Buf[512];
  } Data;
  ULONG OutSize;
  NTSTATUS Status = NtQueryValueKey(hKey, &*ValueName, KeyValuePartialInformation, &Data.Info, sizeof(Data), &OutSize);
  if (Status) {
    return RtlNtStatusToDosError(Status);
  }

  if (pdwType) {
    *pdwType = Data.Info.Type;
  }

  if (pvData) {
    if (*pcbData < Data.Info.DataLength) {
      *pcbData = Data.Info.DataLength;
      return ERROR_MORE_DATA;
    }

    memcpy(pvData, &Data.Info.Data, Data.Info.DataLength);
  }

  if (pcbData) {
    *pcbData = Data.Info.DataLength;
  }

  return ERROR_SUCCESS;
}

DLLEXPORT_FUNC(LONG, RegCloseKey, (HKEY hKey)) {
  return RtlNtStatusToDosError(NtClose(hKey));
}

DLLEXPORT_FUNC(DWORD, GetActiveProcessorCount, (WORD group)) {
  UNIMPLEMENTED();
}


================================================
FILE: Source/Windows/Common/WinAPI/Sync.cpp
================================================
// SPDX-License-Identifier: MIT
#define NTDDI_VERSION 0x0A000005
#define WINAPI
#define WINBASEAPI

#include <cstdlib>
#include <cstdio>
#include <cstdint>
#include <cerrno>
#include <ntstatus.h>
#include <winternl.h>
#include <windows.h>
#include <processenv.h>
#include "../Priv.h"

WINBOOL WaitOnAddress(volatile void* Address, void* CompareAddress, SIZE_T AddressSize, DWORD dwMilliseconds) {
  LARGE_INTEGER Time;
  // A negative value indicates a relative time measured in 100ns intervals.
  Time.QuadPart = static_cast<ULONGLONG>(dwMilliseconds) * -10000;
  return RtlWaitOnAddress(const_cast<void*>(Address), CompareAddress, AddressSize, dwMilliseconds == INFINITE ? nullptr : &Time) == STATUS_SUCCESS;
}

void WakeByAddressAll(PVOID Address) {
  RtlWakeAddressAll(Address);
}

void WINAPI WakeByAddressSingle(PVOID Address) {
  RtlWakeAddressSingle(Address);
}

DLLEXPORT_FUNC(void, InitializeSRWLock, (PSRWLOCK SRWLock)) {
  RtlInitializeSRWLock(SRWLock);
}

void AcquireSRWLockExclusive(PSRWLOCK SRWLock) {
  RtlAcquireSRWLockExclusive(SRWLock);
}

void ReleaseSRWLockExclusive(PSRWLOCK SRWLock) {
  RtlReleaseSRWLockExclusive(SRWLock);
}

void AcquireSRWLockShared(PSRWLOCK SRWLock) {
  RtlAcquireSRWLockShared(SRWLock);
}

void ReleaseSRWLockShared(PSRWLOCK SRWLock) {
  RtlReleaseSRWLockShared(SRWLock);
}

DLLEXPORT_FUNC(BOOLEAN, TryAcquireSRWLockShared, (PSRWLOCK SRWLock)) {
  return RtlTryAcquireSRWLockShared(SRWLock);
}

DLLEXPORT_FUNC(BOOLEAN, TryAcquireSRWLockExclusive, (PSRWLOCK SRWLock)) {
  return RtlTryAcquireSRWLockExclusive(SRWLock);
}

DLLEXPORT_FUNC(void, InitializeCriticalSection, (LPCRITICAL_SECTION lpCriticalSection)) {
  RtlInitializeCriticalSection(lpCriticalSection);
}

DLLEXPORT_FUNC(void, EnterCriticalSection, (LPCRITICAL_SECTION lpCriticalSection)) {
  RtlEnterCriticalSection(lpCriticalSection);
}

DLLEXPORT_FUNC(void, LeaveCriticalSection, (LPCRITICAL_SECTION lpCriticalSection)) {
  RtlLeaveCriticalSection(lpCriticalSection);
}

DLLEXPORT_FUNC(WINBOOL, TryEnterCriticalSection, (LPCRITICAL_SECTION lpCriticalSection)) {
  return RtlTryEnterCriticalSection(lpCriticalSection);
}

DLLEXPORT_FUNC(void, DeleteCriticalSection, (LPCRITICAL_SECTION lpCriticalSection)) {
  RtlDeleteCriticalSection(lpCriticalSection);
}

DLLEXPORT_FUNC(void, InitializeConditionVariable, (PCONDITION_VARIABLE ConditionVariable)) {
  RtlInitializeConditionVariable(ConditionVariable);
}

DLLEXPORT_FUNC(void, WakeConditionVariable, (PCONDITION_VARIABLE ConditionVariable)) {
  RtlWakeConditionVariable(ConditionVariable);
}

DLLEXPORT_FUNC(void, WakeAllConditionVariable, (PCONDITION_VARIABLE ConditionVariable)) {
  RtlWakeAllConditionVariable(ConditionVariable);
}

DLLEXPORT_FUNC(WINBOOL, SleepConditionVariableSRW, (PCONDITION_VARIABLE ConditionVariable, PSRWLOCK SRWLock, DWORD dwMilliseconds, ULONG Flags)) {
  LARGE_INTEGER Time;
  // A negative value indicates a relative time measured in 100ns intervals.
  Time.QuadPart = static_cast<ULONGLONG>(dwMilliseconds) * -10000;
  return RtlSleepConditionVariableSRW(ConditionVariable, SRWLock, dwMilliseconds == INFINITE ? nullptr : &Time, Flags);
}

DLLEXPORT_FUNC(WINBOOL, InitOnceExecuteOnce, (PINIT_ONCE InitOnce, PINIT_ONCE_FN InitFn, void* Parameter, void** Context)) {
  return !RtlRunOnceExecuteOnce(InitOnce, reinterpret_cast<PRTL_RUN_ONCE_INIT_FN>(InitFn), Parameter, Context);
}

DLLEXPORT_FUNC(DWORD, WaitForSingleObjectEx, (HANDLE hHandle, DWORD dwMilliseconds, WINBOOL bAlertable)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(HANDLE, GetProcessHeap, ()) {
  return GetCurrentPEB()->ProcessHeap;
}

DLLEXPORT_FUNC(DWORD, GetCurrentProcessId, ()) {
  return static_cast<DWORD>(reinterpret_cast<uintptr_t>(GetCurrentTEB()->ClientId.UniqueProcess));
}

DLLEXPORT_FUNC(DWORD, GetCurrentThreadId, ()) {
  return static_cast<DWORD>(reinterpret_cast<uintptr_t>(GetCurrentTEB()->ClientId.UniqueThread));
}

DLLEXPORT_FUNC(DWORD, GetThreadId, (HANDLE Thread)) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(HANDLE, GetCurrentProcess, ()) {
  return NtCurrentProcess();
}

DLLEXPORT_FUNC(HANDLE, GetCurrentThread, ()) {
  return NtCurrentThread();
}

DLLEXPORT_FUNC(DWORD, GetCurrentProcessorNumber, ()) {
  return NtGetCurrentProcessorNumber();
}

DLLEXPORT_FUNC(WINBOOL, SwitchToThread, ()) {
  UNIMPLEMENTED();
}

DLLEXPORT_FUNC(void, Sleep, (DWORD dwMilliseconds)) {
  LARGE_INTEGER Time;
  // A negative value indicates a relative time measured in 100ns intervals.
  Time.QuadPart = static_cast<ULONGLONG>(dwMilliseconds) * -10000;
  NtDelayExecution(false, &Time);
}


================================================
FILE: Source/Windows/Defs/ntdll.def
================================================
; generated from wine/dlls/ntdll/ntdll.spec with ordinals removed and RtlIsEcCode added

LIBRARY ntdll.dll

EXPORTS
  A_SHAFinal
  A_SHAInit
  A_SHAUpdate
  ApiSetQueryApiSetPresence
  ApiSetQueryApiSetPresenceEx
  CsrAllocateCaptureBuffer PRIVATE
  CsrAllocateCapturePointer PRIVATE
  CsrAllocateMessagePointer PRIVATE
  CsrCaptureMessageBuffer PRIVATE
  CsrCaptureMessageString PRIVATE
  CsrCaptureTimeout PRIVATE
  CsrClientCallServer PRIVATE
  CsrClientConnectToServer PRIVATE
  CsrClientMaxMessage PRIVATE
  CsrClientSendMessage PRIVATE
  CsrClientThreadConnect PRIVATE
  CsrFreeCaptureBuffer PRIVATE
  CsrIdentifyAlertableThread PRIVATE
  CsrNewThread PRIVATE
  CsrProbeForRead PRIVATE
  CsrProbeForWrite PRIVATE
  CsrSetPriorityClass PRIVATE
  CsrpProcessCallbackRequest PRIVATE
  DbgBreakPoint
  DbgPrint
  DbgPrintEx
  DbgPrompt PRIVATE
  DbgUiConnectToDbg
  DbgUiContinue
  DbgUiConvertStateChangeStructure
  DbgUiDebugActiveProcess
  DbgUiGetThreadDebugObject
  DbgUiIssueRemoteBreakin
  DbgUiRemoteBreakin
  DbgUiSetThreadDebugObject
  DbgUiStopDebugging
  DbgUiWaitStateChange
  DbgUserBreakPoint
  EtwEventActivityIdControl
  EtwEventEnabled
  EtwEventProviderEnabled
  EtwEventRegister
  EtwEventSetInformation
  EtwEventUnregister
  EtwEventWrite
  EtwEventWriteString
  EtwEventWriteTransfer
  EtwGetTraceEnableFlags
  EtwGetTraceEnableLevel
  EtwGetTraceLoggerHandle
  EtwLogTraceEvent
  EtwRegisterTraceGuidsA
  EtwRegisterTraceGuidsW
  EtwTraceMessage
  EtwTraceMessageVa
  EtwUnregisterTraceGuids
  KiRaiseUserExceptionDispatcher
  KiUserApcDispatcher
  KiUserCallbackDispatcher
  KiUserCallbackDispatcherReturn
  KiUserExceptionDispatcher
  LdrAccessResource
  LdrAddDllDirectory
  LdrAddRefDll
  LdrDisableThreadCalloutsForDll
  LdrEnumResources PRIVATE
  LdrEnumerateLoadedModules
  LdrFindEntryForAddress
  LdrFindResourceDirectory_U
  LdrFindResource_U
  LdrFlushAlternateResourceModules PRIVATE
  LdrGetDllDirectory
  LdrGetDllFullName
  LdrGetDllHandle
  LdrGetDllHandleEx
  LdrGetDllPath
  LdrGetProcedureAddress
  LdrInitShimEngineDynamic PRIVATE
  LdrInitializeThunk
  LdrLoadAlternateResourceModule PRIVATE
  LdrLoadDll
  LdrLockLoaderLock
  LdrProcessRelocationBlock
  LdrQueryImageFileExecutionOptions
  LdrQueryProcessModuleInformation
  LdrRegisterDllNotification
  LdrRemoveDllDirectory
  LdrResolveDelayLoadedAPI
  LdrSetAppCompatDllRedirectionCallback PRIVATE
  LdrSetDefaultDllDirectories
  LdrSetDllDirectory
  LdrSetDllManifestProber PRIVATE
  LdrShutdownProcess
  LdrShutdownThread
  LdrSystemDllInitBlock DATA
  LdrUnloadAlternateResourceModule PRIVATE
  LdrUnloadDll
  LdrUnlockLoaderLock
  LdrUnregisterDllNotification
  LdrVerifyImageMatchesChecksum PRIVATE
  MD4Final
  MD4Init
  MD4Update
  MD5Final
  MD5Init
  MD5Update
  NlsAnsiCodePage DATA
  NlsMbCodePageTag DATA
  NlsMbOemCodePageTag DATA
  NtAcceptConnectPort
  NtAccessCheck
  NtAccessCheckAndAuditAlarm
  NtAddAtom
  NtAdjustGroupsToken
  NtAdjustPrivilegesToken
  NtAlertResumeThread
  NtAlertThread
  NtAlertThreadByThreadId
  NtAllocateLocallyUniqueId
  NtAllocateUuids
  NtAllocateVirtualMemory
  NtAllocateVirtualMemoryEx
  NtAreMappedFilesTheSame
  NtAssignProcessToJobObject
  NtCallbackReturn
  NtCancelIoFile
  NtCancelIoFileEx
  NtCancelSynchronousIoFile
  NtCancelTimer
  NtClearEvent
  NtClose
  NtCommitTransaction
  NtCompareObjects
  NtCompleteConnectPort
  NtConnectPort
  NtContinue
  NtCreateDebugObject
  NtCreateDirectoryObject
  NtCreateEvent
  NtCreateFile
  NtCreateIoCompletion
  NtCreateJobObject
  NtCreateKey
  NtCreateKeyTransacted
  NtCreateKeyedEvent
  NtCreateLowBoxToken
  NtCreateMailslotFile
  NtCreateMutant
  NtCreateNamedPipeFile
  NtCreatePagingFile
  NtCreatePort
  NtCreateSection
  NtCreateSemaphore
  NtCreateSymbolicLinkObject
  NtCreateThread
  NtCreateThreadEx
  NtCreateTimer
  NtCreateToken
  NtCreateTransaction
  NtCreateUserProcess
  NtDebugActiveProcess
  NtDebugContinue
  NtDelayExecution
  NtDeleteAtom
  NtDeleteFile
  NtDeleteKey
  NtDeleteValueKey
  NtDeviceIoControlFile
  NtDisplayString
  NtDuplicateObject
  NtDuplicateToken
  NtEnumerateKey
  NtEnumerateValueKey
  NtFilterToken
  NtFindAtom
  NtFlushBuffersFile
  NtFlushInstructionCache
  NtFlushKey
  NtFlushProcessWriteBuffers
  NtFlushVirtualMemory
  NtFreeVirtualMemory
  NtFsControlFile
  NtGetContextThread
  NtGetCurrentProcessorNumber
  NtGetNextThread
  NtGetNlsSectionPtr
  NtGetTickCount
  NtGetWriteWatch
  NtImpersonateAnonymousToken
  NtInitializeNlsFiles
  NtInitiatePowerAction
  NtIsProcessInJob
  NtListenPort
  NtLoadDriver
  NtLoadKey2
  NtLoadKey
  NtLoadKeyEx
  NtLockFile
  NtLockVirtualMemory
  NtMakeTemporaryObject
  NtMapViewOfSection
  NtMapViewOfSectionEx
  NtNotifyChangeDirectoryFile
  NtNotifyChangeKey
  NtNotifyChangeMultipleKeys
  NtOpenDirectoryObject
  NtOpenEvent
  NtOpenFile
  NtOpenIoCompletion
  NtOpenJobObject
  NtOpenKey
  NtOpenKeyEx
  NtOpenKeyTransacted
  NtOpenKeyTransactedEx
  NtOpenKeyedEvent
  NtOpenMutant
  NtOpenProcess
  NtOpenProcessToken
  NtOpenProcessTokenEx
  NtOpenSection
  NtOpenSemaphore
  NtOpenSymbolicLinkObject
  NtOpenThread
  NtOpenThreadToken
  NtOpenThreadTokenEx
  NtOpenTimer
  NtPowerInformation
  NtPrivilegeCheck
  NtProtectVirtualMemory
  NtPulseEvent
  NtQueryAttributesFile
  NtQueryDefaultLocale
  NtQueryDefaultUILanguage
  NtQueryDirectoryFile
  NtQueryDirectoryObject
  NtQueryEaFile
  NtQueryEvent
  NtQueryFullAttributesFile
  NtQueryInformationAtom
  NtQueryInformationFile
  NtQueryInformationJobObject
  NtQueryInformationProcess
  NtQueryInformationThread
  NtQueryInformationToken
  NtQueryInstallUILanguage
  NtQueryIoCompletion
  NtQueryKey
  NtQueryLicenseValue
  NtQueryMultipleValueKey
  NtQueryMutant
  NtQueryObject
  NtQueryPerformanceCounter
  NtQuerySection
  NtQuerySecurityObject
  NtQuerySemaphore
  NtQuerySymbolicLinkObject
  NtQuerySystemEnvironmentValue
  NtQuerySystemEnvironmentValueEx
  NtQuerySystemInformation
  NtQuerySystemInformationEx
  NtQuerySystemTime
  NtQueryTimer
  NtQueryTimerResolution
  NtQueryValueKey
  NtQueryVirtualMemory
  NtQueryVolumeInformationFile
  NtQueueApcThread
  NtRaiseException
  NtRaiseHardError
  NtReadFile
  NtReadFileScatter
  NtReadVirtualMemory
  NtRegisterThreadTerminatePort
  NtReleaseKeyedEvent
  NtReleaseMutant
  NtReleaseSemaphore
  NtRemoveIoCompletion
  NtRemoveIoCompletionEx
  NtRemoveProcessDebug
  NtRenameKey
  NtReplaceKey
  NtReplyWaitReceivePort
  NtRequestWaitReplyPort
  NtResetEvent
  NtResetWriteWatch
  NtRestoreKey
  NtResumeProcess
  NtResumeThread
  NtRollbackTransaction
  NtSaveKey
  NtSecureConnectPort
  NtSetContextThread
  NtSetDebugFilterState
  NtSetDefaultLocale
  NtSetDefaultUILanguage
  NtSetEaFile
  NtSetEvent
  NtSetInformationDebugObject
  NtSetInformationFile
  NtSetInformationJobObject
  NtSetInformationKey
  NtSetInformationObject
  NtSetInformationProcess
  NtSetInformationThread
  NtSetInformationToken
  NtSetInformationVirtualMemory
  NtSetIntervalProfile
  NtSetIoCompletion
  NtSetLdtEntries
  NtSetSecurityObject
  NtSetSystemInformation
  NtSetSystemTime
  NtSetThreadExecutionState
  NtSetTimer
  NtSetTimerResolution
  NtSetValueKey
  NtSetVolumeInformationFile
  NtShutdownSystem
  NtSignalAndWaitForSingleObject
  NtSuspendProcess
  NtSuspendThread
  NtSystemDebugControl
  NtTerminateJobObject
  NtTerminateProcess
  NtTerminateThread
  NtTestAlert
  NtTraceControl
  NtUnloadDriver
  NtUnloadKey
  NtUnlockFile
  NtUnlockVirtualMemory
  NtUnmapViewOfSection
  NtUnmapViewOfSectionEx
  NtWaitForAlertByThreadId
  NtWaitForDebugEvent
  NtWaitForKeyedEvent
  NtWaitForMultipleObjects
  NtWaitForSingleObject
  NtWriteFile
  NtWriteFileGather
  NtWriteVirtualMemory
  NtYieldExecution
  PfxFindPrefix PRIVATE
  PfxInitialize PRIVATE
  PfxInsertPrefix PRIVATE
  PfxRemovePrefix PRIVATE
  ProcessPendingCrossProcessEmulatorWork
  RtlAbortRXact PRIVATE
  RtlAbsoluteToSelfRelativeSD
  RtlAcquirePebLock
  RtlAcquireResourceExclusive
  RtlAcquireResourceShared
  RtlAcquireSRWLockExclusive
  RtlAcquireSRWLockShared
  RtlActivateActivationContext
  RtlActivateActivationContextEx
  RtlActivateActivationContextUnsafeFast PRIVATE
  RtlAddAccessAllowedAce
  RtlAddAccessAllowedAceEx
  RtlAddAccessAllowedObjectAce
  RtlAddAccessDeniedAce
  RtlAddAccessDeniedAceEx
  RtlAddAccessDeniedObjectAce
  RtlAddAce
  RtlAddActionToRXact PRIVATE
  RtlAddAtomToAtomTable
  RtlAddAttributeActionToRXact PRIVATE
  RtlAddAuditAccessAce
  RtlAddAuditAccessAceEx
  RtlAddAuditAccessObjectAce
  RtlAddFunctionTable
  RtlAddGrowableFunctionTable
  RtlAddMandatoryAce
  RtlAddProcessTrustLabelAce
  RtlAddRefActivationContext
  RtlAddVectoredContinueHandler
  RtlAddVectoredExceptionHandler
  RtlAddressInSectionTable
  RtlAdjustPrivilege
  RtlAllocateAndInitializeSid
  RtlAllocateHandle
  RtlAllocateHeap
  RtlAnsiCharToUnicodeChar
  RtlAnsiStringToUnicodeSize
  RtlAnsiStringToUnicodeString
  RtlAppendAsciizToString
  RtlAppendStringToString
  RtlAppendUnicodeStringToString
  RtlAppendUnicodeToString
  RtlApplyRXact PRIVATE
  RtlApplyRXactNoFlush PRIVATE
  RtlAreAllAccessesGranted
  RtlAreAnyAccessesGranted
  RtlAreBitsClear
  RtlAreBitsSet
  RtlAssert
  RtlCaptureContext
  RtlCaptureStackBackTrace
  RtlCharToInteger
  RtlCheckRegistryKey
  RtlClearAllBits
  RtlClearBits
  RtlClosePropertySet PRIVATE
  RtlCompactHeap
  RtlCompareMemory
  RtlCompareMemoryUlong
  RtlCompareString
  RtlCompareUnicodeString
  RtlCompareUnicodeStrings
  RtlCompressBuffer
  RtlComputeCrc32
  RtlConsoleMultiByteToUnicodeN PRIVATE
  RtlConvertExclusiveToShared PRIVATE
  RtlConvertSharedToExclusive PRIVATE
  RtlConvertSidToUnicodeString
  RtlConvertToAutoInheritSecurityObject
  RtlConvertUiListToApiList PRIVATE
  RtlCopyContext
  RtlCopyExtendedContext
  RtlCopyLuid
  RtlCopyLuidAndAttributesArray
  RtlCopyMemory
  RtlCopySecurityDescriptor
  RtlCopySid
  RtlCopySidAndAttributesArray PRIVATE
  RtlCopyString
  RtlCopyUnicodeString
  RtlCreateAcl
  RtlCreateActivationContext
  RtlCreateAndSetSD PRIVATE
  RtlCreateAtomTable
  RtlCreateEnvironment
  RtlCreateHeap
  RtlCreateProcessParameters
  RtlCreateProcessParametersEx
  RtlCreatePropertySet PRIVATE
  RtlCreateQueryDebugBuffer
  RtlCreateRegistryKey
  RtlCreateSecurityDescriptor
  RtlCreateTagHeap PRIVATE
  RtlCreateTimer
  RtlCreateTimerQueue
  RtlCreateUnicodeString
  RtlCreateUnicodeStringFromAsciiz
  RtlCreateUserProcess
  RtlCreateUserSecurityObject PRIVATE
  RtlCreateUserStack
  RtlCreateUserThread
  RtlCustomCPToUnicodeN
  RtlCutoverTimeToSystemTime PRIVATE
  RtlDeNormalizeProcessParams
  RtlDeactivateActivationContext
  RtlDeactivateActivationContextUnsafeFast PRIVATE
  RtlDebugPrintTimes PRIVATE
  RtlDecodePointer
  RtlDecodeSystemPointer
  RtlDecompressBuffer
  RtlDecompressFragment
  RtlDefaultNpAcl
  RtlDelete PRIVATE
  RtlDeleteAce
  RtlDeleteAtomFromAtomTable
  RtlDeleteCriticalSection
  RtlDeleteGrowableFunctionTable
  RtlDeleteElementGenericTable PRIVATE
  RtlDeleteElementGenericTableAvl PRIVATE
  RtlDeleteFunctionTable
  RtlDeleteNoSplay PRIVATE
  RtlDeleteOwnersRanges PRIVATE
  RtlDeleteRange PRIVATE
  RtlDeleteRegistryValue
  RtlDeleteResource
  RtlDeleteSecurityObject
  RtlDeleteTimer
  RtlDeleteTimerQueueEx
  RtlDeregisterWait
  RtlDeregisterWaitEx
  RtlDestroyAtomTable
  RtlDestroyEnvironment
  RtlDestroyHandleTable
  RtlDestroyHeap
  RtlDestroyProcessParameters
  RtlDestroyQueryDebugBuffer
  RtlDetermineDosPathNameType_U
  RtlDllShutdownInProgress
  RtlDoesFileExists_U
  RtlDosPathNameToNtPathName_U
  RtlDosPathNameToNtPathName_U_WithStatus
  RtlDosPathNameToRelativeNtPathName_U
  RtlDosPathNameToRelativeNtPathName_U_WithStatus
  RtlDosSearchPath_U
  RtlDowncaseUnicodeChar
  RtlDowncaseUnicodeString
  RtlDumpResource
  RtlDuplicateUnicodeString
  RtlEmptyAtomTable
  RtlEncodePointer
  RtlEncodeSystemPointer
  RtlEnterCriticalSection
  RtlEnumProcessHeaps PRIVATE
  RtlEnumerateGenericTable PRIVATE
  RtlEnumerateGenericTableWithoutSplaying
  RtlEnumerateProperties PRIVATE
  RtlEqualComputerName
  RtlEqualDomainName
  RtlEqualLuid
  RtlEqualPrefixSid
  RtlEqualSid
  RtlEqualString
  RtlEqualUnicodeString
  RtlEraseUnicodeString
  RtlExitUserProcess
  RtlExitUserThread
  RtlExpandEnvironmentStrings
  RtlExpandEnvironmentStrings_U
  RtlExtendHeap PRIVATE
  RtlFillMemory
  RtlFillMemoryUlong
  RtlFinalReleaseOutOfProcessMemoryStream PRIVATE
  RtlFindActivationContextSectionGuid
  RtlFindActivationContextSectionString
  RtlFindCharInUnicodeString
  RtlFindClearBits
  RtlFindClearBitsAndSet
  RtlFindClearRuns
  RtlFindExportedRoutineByName
  RtlFindLastBackwardRunClear
  RtlFindLastBackwardRunSet
  RtlFindLeastSignificantBit
  RtlFindLongestRunClear
  RtlFindLongestRunSet
  RtlFindMessage
  RtlFindMostSignificantBit
  RtlFindNextForwardRunClear
  RtlFindNextForwardRunSet
  RtlFindRange PRIVATE
  RtlFindSetBits
  RtlFindSetBitsAndClear
  RtlFindSetRuns
  RtlFirstEntrySList
  RtlFirstFreeAce
  RtlFlsAlloc
  RtlFlsFree
  RtlFlsGetValue
  RtlFlsSetValue
  RtlFlushPropertySet PRIVATE
  RtlFormatCurrentUserKeyPath
  RtlFormatMessage
  RtlFormatMessageEx
  RtlFreeActivationContextStack
  RtlFreeAnsiString
  RtlFreeHandle
  RtlFreeHeap
  RtlFreeOemString
  RtlFreeSid
  RtlFreeThreadActivationContextStack
  RtlFreeUnicodeString
  RtlFreeUserStack
  RtlGUIDFromString
  RtlGenerate8dot3Name PRIVATE
  RtlGetAce
  RtlGetActiveActivationContext
  RtlGetCallersAddress
  RtlGetCompressionWorkSpaceSize
  RtlGetControlSecurityDescriptor
  RtlGetCurrentDirectory_U
  RtlGetCurrentPeb
  RtlGetCurrentProcessorNumberEx
  RtlGetCurrentTransaction
  RtlGetDaclSecurityDescriptor
  RtlGetElementGenericTable
  RtlGetEnabledExtendedFeatures
  RtlGetExePath
  RtlGetExtendedContextLength
  RtlGetExtendedContextLength2
  RtlGetExtendedFeaturesMask
  RtlGetFrame
  RtlGetFullPathName_U
  RtlGetGroupSecurityDescriptor
  RtlGetLastNtStatus
  RtlGetLastWin32Error
  RtlGetLocaleFileMappingAddress
  RtlGetLongestNtPathLength
  RtlGetNativeSystemInformation
  RtlGetNtGlobalFlags
  RtlGetNtProductType
  RtlGetNtVersionNumbers
  RtlGetOwnerSecurityDescriptor
  RtlGetProductInfo
  RtlGetProcessHeaps
  RtlGetProcessPreferredUILanguages
  RtlGetSaclSecurityDescriptor
  RtlGetSearchPath
  RtlGetSystemPreferredUILanguages
  RtlGetSystemTimePrecise
  RtlGetThreadErrorMode
  RtlGetThreadPreferredUILanguages
  RtlGetUnloadEventTrace
  RtlGetUnloadEventTraceEx
  RtlGetUserInfoHeap
  RtlGetUserPreferredUILanguages
  RtlGetVersion
  RtlGrowFunctionTable
  RtlGuidToPropertySetName PRIVATE
  RtlHashUnicodeString
  RtlIdentifierAuthoritySid
  RtlIdnToAscii
  RtlIdnToNameprepUnicode
  RtlIdnToUnicode
  RtlImageDirectoryEntryToData
  RtlImageNtHeader
  RtlImageRvaToSection
  RtlImageRvaToVa
  RtlImpersonateSelf
  RtlInitAnsiString
  RtlInitAnsiStringEx
  RtlInitCodePageTable
  RtlInitNlsTables
  RtlInitString
  RtlInitUnicodeString
  RtlInitUnicodeStringEx
  RtlInitializeBitMap
  RtlInitializeConditionVariable
  RtlInitializeContext PRIVATE
  RtlInitializeCriticalSection
  RtlInitializeCriticalSectionAndSpinCount
  RtlInitializeCriticalSectionEx
  RtlInitializeExtendedContext
  RtlInitializeExtendedContext2
  RtlInitializeGenericTable
  RtlInitializeGenericTableAvl
  RtlInitializeHandleTable
  RtlInitializeRXact PRIVATE
  RtlInitializeResource
  RtlInitializeSListHead
  RtlInitializeSRWLock
  RtlInitializeSid
  RtlInsertElementGenericTable PRIVATE
  RtlInsertElementGenericTableAvl
  RtlInstallFunctionTableCallback
  RtlInt64ToUnicodeString
  RtlIntegerToChar
  RtlIntegerToUnicodeString
  RtlInterlockedFlushSList
  RtlInterlockedPopEntrySList
  RtlInterlockedPushEntrySList
  RtlInterlockedPushListSList
  RtlInterlockedPushListSListEx
  RtlIpv4AddressToStringA
  RtlIpv4AddressToStringExA
  RtlIpv4AddressToStringExW
  RtlIpv4AddressToStringW
  RtlIpv4StringToAddressA
  RtlIpv4StringToAddressExA
  RtlIpv4StringToAddressExW
  RtlIpv4StringToAddressW
  RtlIpv6AddressToStringA
  RtlIpv6AddressToStringExA
  RtlIpv6AddressToStringExW
  RtlIpv6AddressToStringW
  RtlIpv6StringToAddressA
  RtlIpv6StringToAddressExA
  RtlIpv6StringToAddressExW
  RtlIpv6StringToAddressW
  RtlIsActivationContextActive
  RtlIsCriticalSectionLocked
  RtlIsCriticalSectionLockedByThread
  RtlIsCurrentProcess
  RtlIsCurrentThread
  RtlIsDosDeviceName_U
  RtlIsEcCode
  RtlIsGenericTableEmpty PRIVATE
  RtlIsNameLegalDOS8Dot3
  RtlIsNormalizedString
  RtlIsProcessorFeaturePresent
  RtlIsTextUnicode
  RtlIsValidHandle
  RtlIsValidIndexHandle
  RtlIsValidLocaleName
  RtlLargeIntegerToChar
  RtlLcidToLocaleName
  RtlLeaveCriticalSection
  RtlLengthRequiredSid
  RtlLengthSecurityDescriptor
  RtlLengthSid
  RtlLocalTimeToSystemTime
  RtlLocaleNameToLcid
  RtlLocateExtendedFeature
  RtlLocateExtendedFeature2
  RtlLocateLegacyContext
  RtlLockHeap
  RtlLookupAtomInAtomTable
  RtlLookupElementGenericTable
  RtlLookupFunctionEntry
  RtlLookupFunctionTable
  RtlMakeSelfRelativeSD
  RtlMapGenericMask
  RtlMoveMemory
  RtlMultiByteToUnicodeN
  RtlMultiByteToUnicodeSize
  RtlNewInstanceSecurityObject PRIVATE
  RtlNewSecurityGrantedAccess PRIVATE
  RtlNewSecurityObject
  RtlNewSecurityObjectEx
  RtlNewSecurityObjectWithMultipleInheritance
  RtlNormalizeProcessParams
  RtlNormalizeString
  RtlNtStatusToDosError
  RtlNtStatusToDosErrorNoTeb
  RtlNumberGenericTableElements
  RtlNumberOfClearBits
  RtlNumberOfSetBits
  RtlOemStringToUnicodeSize
  RtlOemStringToUnicodeString
  RtlOemToUnicodeN
  RtlOpenCrossProcessEmulatorWorkConnection
  RtlOpenCurrentUser
  RtlPcToFileHeader
  RtlPinAtomInAtomTable
  RtlPopFrame
  RtlPrefixString
  RtlPrefixUnicodeString
  RtlProcessFlsData
  RtlPropertySetNameToGuid PRIVATE
  RtlProtectHeap PRIVATE
  RtlPushFrame
  RtlQueryActivationContextApplicationSettings
  RtlQueryAtomInAtomTable
  RtlQueryDepthSList
  RtlQueryDynamicTimeZoneInformation
  RtlQueryEnvironmentVariable_U
  RtlQueryEnvironmentVariable
  RtlQueryHeapInformation
  RtlQueryInformationAcl
  RtlQueryInformationActivationContext
  RtlQueryInformationActiveActivationContext PRIVATE
  RtlQueryInterfaceMemoryStream PRIVATE
  RtlQueryPackageIdentity
  RtlQueryPerformanceCounter
  RtlQueryPerformanceFrequency
  RtlQueryProcessBackTraceInformation PRIVATE
  RtlQueryProcessDebugInformation
  RtlQueryProcessHeapInformation PRIVATE
  RtlQueryProcessLockInformation PRIVATE
  RtlQueryProcessPlaceholderCompatibilityMode
  RtlQueryProperties PRIVATE
  RtlQueryPropertyNames PRIVATE
  RtlQueryPropertySet PRIVATE
  RtlQueryRegistryValues
  RtlQueryRegistryValuesEx
  RtlQuerySecurityObject PRIVATE
  RtlQueryTagHeap PRIVATE
  RtlQueryTimeZoneInformation
  RtlQueryUnbiasedInterruptTime
  RtlQueueApcWow64Thread PRIVATE
  RtlQueueWorkItem
  RtlRaiseException
  RtlRaiseStatus
  RtlRandom
  RtlRandomEx
  RtlReAllocateHeap
  RtlReadMemoryStream PRIVATE
  RtlReadOutOfProcessMemoryStream PRIVATE
  RtlRealPredecessor PRIVATE
  RtlRealSuccessor PRIVATE
  RtlRegisterSecureMemoryCacheCallback PRIVATE
  RtlRegisterWait
  RtlReleaseActivationContext
  RtlReleaseMemoryStream PRIVATE
  RtlReleasePath
  RtlReleasePebLock
  RtlReleaseRelativeName
  RtlReleaseResource
  RtlReleaseSRWLockExclusive
  RtlReleaseSRWLockShared
  RtlRemoteCall PRIVATE
  RtlRemoveVectoredContinueHandler
  RtlRemoveVectoredExceptionHandler
  RtlResetRtlTranslations
  RtlRestoreContext
  RtlRestoreLastWin32Error
  RtlRevertMemoryStream PRIVATE
  RtlRunDecodeUnicodeString PRIVATE
  RtlRunEncodeUnicodeString PRIVATE
  RtlRunOnceBeginInitialize
  RtlRunOnceComplete
  RtlRunOnceExecuteOnce
  RtlRunOnceInitialize
  RtlSecondsSince1970ToTime
  RtlSecondsSince1980ToTime
  RtlSelfRelativeToAbsoluteSD
  RtlSetAllBits
  RtlSetBits
  RtlSetControlSecurityDescriptor
  RtlSetCriticalSectionSpinCount
  RtlSetCurrentDirectory_U
  RtlSetCurrentEnvironment
  RtlSetCurrentTransaction
  RtlSetDaclSecurityDescriptor
  RtlSetEnvironmentVariable
  RtlSetExtendedFeaturesMask
  RtlSetGroupSecurityDescriptor
  RtlSetHeapInformation
  RtlSetInformationAcl PRIVATE
  RtlSetIoCompletionCallback
  RtlSetLastWin32Error
  RtlSetLastWin32ErrorAndNtStatusFromNtStatus
  RtlSetOwnerSecurityDescriptor
  RtlSetProcessPreferredUILanguages
  RtlSetProperties PRIVATE
  RtlSetPropertyClassId PRIVATE
  RtlSetPropertyNames PRIVATE
  RtlSetPropertySetClassId PRIVATE
  RtlSetSaclSecurityDescriptor
  RtlSetSearchPathMode
  RtlSetSecurityObject PRIVATE
  RtlSetThreadErrorMode
  RtlSetThreadPreferredUILanguages
  RtlSetTimeZoneInformation
  RtlSetUnhandledExceptionFilter
  RtlSetUnicodeCallouts PRIVATE
  RtlSetUserFlagsHeap
  RtlSetUserValueHeap
  RtlSizeHeap
  RtlSleepConditionVariableCS
  RtlSleepConditionVariableSRW
  RtlSplay PRIVATE
  RtlStartRXact PRIVATE
  RtlStringFromGUID
  RtlSubAuthorityCountSid
  RtlSubAuthoritySid
  RtlSubtreePredecessor PRIVATE
  RtlSubtreeSuccessor PRIVATE
  RtlSystemTimeToLocalTime
  RtlTimeFieldsToTime
  RtlTimeToElapsedTimeFields
  RtlTimeToSecondsSince1970
  RtlTimeToSecondsSince1980
  RtlTimeToTimeFields
  RtlTryAcquireSRWLockExclusive
  RtlTryAcquireSRWLockShared
  RtlTryEnterCriticalSection
  RtlUTF8ToUnicodeN
  RtlUnicodeStringToAnsiSize
  RtlUnicodeStringToAnsiString
  RtlUnicodeStringToCountedOemString PRIVATE
  RtlUnicodeStringToInteger
  RtlUnicodeStringToOemSize
  RtlUnicodeStringToOemString
  RtlUnicodeToCustomCPN
  RtlUnicodeToMultiByteN
  RtlUnicodeToMultiByteSize
  RtlUnicodeToOemN
  RtlUnicodeToUTF8N
  RtlUniform
  RtlUnlockHeap
  RtlUnwind
  RtlUnwindEx
  RtlUpcaseUnicodeChar
  RtlUpcaseUnicodeString
  RtlUpcaseUnicodeStringToAnsiString
  RtlUpcaseUnicodeStringToCountedOemString
  RtlUpcaseUnicodeStringToOemString
  RtlUpcaseUnicodeToCustomCPN
  RtlUpcaseUnicodeToMultiByteN
  RtlUpcaseUnicodeToOemN
  RtlUpdateTimer
  RtlUpperChar
  RtlUpperString
  RtlUsageHeap PRIVATE
  RtlUserThreadStart
  RtlValidAcl
  RtlValidRelativeSecurityDescriptor
  RtlValidSecurityDescriptor
  RtlValidSid
  RtlValidateHeap
  RtlValidateProcessHeaps PRIVATE
  RtlVerifyVersionInfo
  RtlVirtualUnwind
  RtlVirtualUnwind2
  RtlWaitOnAddress
  RtlWakeAddressAll
  RtlWakeAddressSingle
  RtlWakeAllConditionVariable
  RtlWakeConditionVariable
  RtlWalkFrameChain
  RtlWalkHeap
  RtlWow64EnableFsRedirection
  RtlWow64EnableFsRedirectionEx
  RtlWow64GetCpuAreaInfo
  RtlWow64GetCurrentCpuArea
  RtlWow64GetCurrentMachine
  RtlWow64GetProcessMachines
  RtlWow64GetSharedInfoProcess
  RtlWow64GetThreadContext
  RtlWow64GetThreadSelectorEntry
  RtlWow64IsWowGuestMachineSupported
  RtlWow64PopAllCrossProcessWorkFromWorkList
  RtlWow64PopCrossProcessWorkFromFreeList
  RtlWow64PushCrossProcessWorkOntoFreeList
  RtlWow64PushCrossProcessWorkOntoWorkList
  RtlWow64RequestCrossProcessHeavyFlush
  RtlWow64SetThreadContext
  RtlWow64SuspendThread
  RtlWriteMemoryStream PRIVATE
  RtlWriteRegistryValue
  RtlZeroHeap PRIVATE
  RtlZeroMemory
  RtlZombifyActivationContext
  RtlpNtCreateKey
  RtlpNtEnumerateSubKey
  RtlpNtMakeTemporaryKey
  RtlpNtOpenKey
  RtlpNtQueryValueKey
  RtlpNtSetValueKey
  RtlpUnWaitCriticalSection
  RtlpWaitForCriticalSection
  RtlxAnsiStringToUnicodeSize
  RtlxOemStringToUnicodeSize
  RtlxUnicodeStringToAnsiSize
  RtlxUnicodeStringToOemSize
  TpAllocCleanupGroup
  TpAllocIoCompletion
  TpAllocPool
  TpAllocTimer
  TpAllocWait
  TpAllocWork
  TpCallbackLeaveCriticalSectionOnCompletion
  TpCallbackMayRunLong
  TpCallbackReleaseMutexOnCompletion
  TpCallbackReleaseSemaphoreOnCompletion
  TpCallbackSetEventOnCompletion
  TpCallbackUnloadDllOnCompletion
  TpCancelAsyncIoOperation
  TpDisassociateCallback
  TpIsTimerSet
  TpPostWork
  TpQueryPoolStackInformation
  TpReleaseCleanupGroup
  TpReleaseCleanupGroupMembers
  TpReleaseIoCompletion
  TpReleasePool
  TpReleaseTimer
  TpReleaseWait
  TpReleaseWork
  TpSetPoolMaxThreads
  TpSetPoolMinThreads
  TpSetPoolStackInformation
  TpSetTimer
  TpSetWait
  TpSimpleTryPost
  TpStartAsyncIoOperation
  TpWaitForIoCompletion
  TpWaitForTimer
  TpWaitForWait
  TpWaitForWork
  VerSetConditionMask
  WinSqmEndSession
  WinSqmIncrementDWORD
  WinSqmIsOptedIn
  WinSqmSetDWORD
  WinSqmSetIfMaxDWORD
  WinSqmStartSession
  ZwAcceptConnectPort
  ZwAccessCheck
  ZwAccessCheckAndAuditAlarm
  ZwAddAtom
  ZwAdjustGroupsToken
  ZwAdjustPrivilegesToken
  ZwAlertResumeThread
  ZwAlertThread
  ZwAlertThreadByThreadId
  ZwAllocateLocallyUniqueId
  ZwAllocateUuids
  ZwAllocateVirtualMemory
  ZwAllocateVirtualMemoryEx
  ZwAreMappedFilesTheSame
  ZwAssignProcessToJobObject
  ZwCancelIoFile
  ZwCancelIoFileEx
  ZwCancelSynchronousIoFile
  ZwCancelTimer
  ZwClearEvent
  ZwClose
  ZwCompareObjects
  ZwCompleteConnectPort
  ZwConnectPort
  ZwContinue
  ZwCreateDirectoryObject
  ZwCreateEvent
  ZwCreateFile
  ZwCreateIoCompletion
  ZwCreateJobObject
  ZwCreateKey
  ZwCreateKeyTransacted
  ZwCreateKeyedEvent
  ZwCreateLowBoxToken
  ZwCreateMailslotFile
  ZwCreateMutant
  ZwCreateNamedPipeFile
  ZwCreatePagingFile
  ZwCreatePort
  ZwCreateSection
  ZwCreateSemaphore
  ZwCreateSymbolicLinkObject
  ZwCreateThread
  ZwCreateThreadEx
  ZwCreateTimer
  ZwCreateToken
  ZwCreateUserProcess
  ZwDebugActiveProcess
  ZwDebugContinue
  ZwDelayExecution
  ZwDeleteAtom
  ZwDeleteFile
  ZwDeleteKey
  ZwDeleteValueKey
  ZwDeviceIoControlFile
  ZwDisplayString
  ZwDuplicateObject
  ZwDuplicateToken
  ZwEnumerateKey
  ZwEnumerateValueKey
  ZwFilterToken
  ZwFindAtom
  ZwFlushBuffersFile
  ZwFlushInstructionCache
  ZwFlushKey
  ZwFlushProcessWriteBuffers
  ZwFlushVirtualMemory
  ZwFreeVirtualMemory
  ZwFsControlFile
  ZwGetContextThread
  ZwGetCurrentProcessorNumber
  ZwGetNlsSectionPtr
  ZwGetTickCount
  ZwGetWriteWatch
  ZwImpersonateAnonymousToken
  ZwInitializeNlsFiles
  ZwInitiatePowerAction
  ZwIsProcessInJob
  ZwListenPort
  ZwLoadDriver
  ZwLoadKey2
  ZwLoadKey
  ZwLockFile
  ZwLockVirtualMemory
  ZwMakeTemporaryObject
  ZwMapViewOfSection
  ZwMapViewOfSectionEx
  ZwNotifyChangeDirectoryFile
  ZwNotifyChangeKey
  ZwNotifyChangeMultipleKeys
  ZwOpenDirectoryObject
  ZwOpenEvent
  ZwOpenFile
  ZwOpenIoCompletion
  ZwOpenJobObject
  ZwOpenKey
  ZwOpenKeyEx
  ZwOpenKeyTransacted
  ZwOpenKeyTransactedEx
  ZwOpenKeyedEvent
  ZwOpenMutant
  ZwOpenProcess
  ZwOpenProcessToken
  ZwOpenProcessTokenEx
  ZwOpenSection
  ZwOpenSemaphore
  ZwOpenSymbolicLinkObject
  ZwOpenThread
  ZwOpenThreadToken
  ZwOpenThreadTokenEx
  ZwOpenTimer
  ZwPowerInformation
  ZwPrivilegeCheck
  ZwProtectVirtualMemory
  ZwPulseEvent
  ZwQueryAttributesFile
  ZwQueryDefaultLocale
  ZwQueryDefaultUILanguage
  ZwQueryDirectoryFile
  ZwQueryDirectoryObject
  ZwQueryEaFile
  ZwQueryEvent
  ZwQueryFullAttributesFile
  ZwQueryInformationAtom
  ZwQueryInformationFile
  ZwQueryInformationJobObject
  ZwQueryInformationProcess
  ZwQueryInformationThread
  ZwQueryInformationToken
  ZwQueryInstallUILanguage
  ZwQueryIoCompletion
  ZwQueryKey
  ZwQueryLicenseValue
  ZwQueryMultipleValueKey
  ZwQueryMutant
  ZwQueryObject
  ZwQueryPerformanceCounter
  ZwQuerySection
  ZwQuerySecurityObject
  ZwQuerySemaphore
  ZwQuerySymbolicLinkObject
  ZwQuerySystemEnvironmentValue
  ZwQuerySystemEnvironmentValueEx
  ZwQuerySystemInformation
  ZwQuerySystemInformationEx
  ZwQuerySystemTime
  ZwQueryTimer
  ZwQueryTimerResolution
  ZwQueryValueKey
  ZwQueryVirtualMemory
  ZwQueryVolumeInformationFile
  ZwQueueApcThread
  ZwRaiseException
  ZwRaiseHardError
  ZwReadFile
  ZwReadFileScatter
  ZwReadVirtualMemory
  ZwRegisterThreadTerminatePort
  ZwReleaseKeyedEvent
  ZwReleaseMutant
  ZwReleaseSemaphore
  ZwRemoveIoCompletion
  ZwRemoveIoCompletionEx
  ZwRemoveProcessDebug
  ZwRenameKey
  ZwReplaceKey
  ZwReplyWaitReceivePort
  ZwRequestWaitReplyPort
  ZwResetEvent
  ZwResetWriteWatch
  ZwRestoreKey
  ZwResumeProcess
  ZwResumeThread
  ZwSaveKey
  ZwSecureConnectPort
  ZwSetContextThread
  ZwSetDebugFilterState
  ZwSetDefaultLocale
  ZwSetDefaultUILanguage
  ZwSetEaFile
  ZwSetEvent
  ZwSetInformationDebugObject
  ZwSetInformationFile
  ZwSetInformationJobObject
  ZwSetInformationKey
  ZwSetInformationObject
  ZwSetInformationProcess
  ZwSetInformationThread
  ZwSetInformationToken
  ZwSetInformationVirtualMemory
  ZwSetIntervalProfile
  ZwSetIoCompletion
  ZwSetLdtEntries
  ZwSetSecurityObject
  ZwSetSystemInformation
  ZwSetSystemTime
  ZwSetThreadExecutionState
  ZwSetTimer
  ZwSetTimerResolution
  ZwSetValueKey
  ZwSetVolumeInformationFile
  ZwShutdownSystem
  ZwSignalAndWaitForSingleObject
  ZwSuspendProcess
  ZwSuspendThread
  ZwSystemDebugControl
  ZwTerminateJobObject
  ZwTerminateProcess
  ZwTerminateThread
  ZwTestAlert
  ZwTraceControl
  ZwUnloadDriver
  ZwUnloadKey
  ZwUnlockFile
  ZwUnlockVirtualMemory
  ZwUnmapViewOfSection
  ZwUnmapViewOfSectionEx
  ZwWaitForAlertByThreadId
  ZwWaitForDebugEvent
  ZwWaitForKeyedEvent
  ZwWaitForMultipleObjects
  ZwWaitForSingleObject
  ZwWriteFile
  ZwWriteFileGather
  ZwWriteVirtualMemory
  ZwYieldExecution
  __C_specific_handler
  __chkstk
  __isascii
  __iscsym
  __iscsymf
  __toascii
  _atoi64
  _errno
  _fltused PRIVATE
  _i64toa
  _i64toa_s
  _i64tow
  _i64tow_s
  _itoa
  _itoa_s
  _itow
  _itow_s
  _lfind
  _local_unwind
  _ltoa
  _ltoa_s
  _ltow
  _ltow_s
  _makepath_s
  _memccpy
  _memicmp
  _setjmpex
  _snprintf
  _snprintf_s
  _snwprintf
  _snwprintf_s
  _splitpath
  _splitpath_s
  _strcmpi
  _stricmp
  _strlwr
  _strlwr_s
  _strnicmp
  _strupr
  _strupr_s
  _swprintf
  _tolower
  _toupper
  _ui64toa
  _ui64toa_s
  _ui64tow
  _ui64tow_s
  _ultoa
  _ultoa_s
  _ultow
  _ultow_s
  _vscprintf
  _vscwprintf
  _vsnprintf
  _vsnprintf_s
  _vsnwprintf
  _vsnwprintf_s
  _vswprintf
  _wcsicmp
  _wcslwr
  _wcslwr_s
  _wcsnicmp
  _wcstoi64
  _wcstoui64
  _wcsupr
  _wcsupr_s
  _wmakepath_s
  _wsplitpath_s
  _wtoi
  _wtoi64
  _wtol
  abs
  atan
  atan2
  atoi
  atol
  bsearch
  bsearch_s
  ceil
  cos
  fabs
  floor
  isalnum
  isalpha
  iscntrl
  isdigit
  isgraph
  islower
  isprint
  ispunct
  isspace
  isupper
  iswalnum
  iswalpha
  iswascii
  iswctype
  iswdigit
  iswgraph
  iswlower
  iswprint
  iswspace
  iswxdigit
  isxdigit
  labs
  log
  longjmp
  mbstowcs
  memchr
  memcmp
  memcpy
  memcpy_s
  memmove
  memmove_s
  memset
  pow
  qsort
  qsort_s
  sin
  sprintf
  sprintf_s
  sqrt
  sscanf
  strcat
  strcat_s
  strchr
  strcmp
  strcpy
  strcpy_s
  strcspn
  strlen
  strncat
  strncat_s
  strncmp
  strncpy
  strncpy_s
  strnlen
  strpbrk
  strrchr
  strspn
  strstr
  strtok_s
  strtol
  strtoul
  swprintf
  swprintf_s
  tan
  tolower
  toupper
  towlower
  towupper
  vDbgPrintEx
  vDbgPrintExWithPrefix
  vsprintf
  vsprintf_s
  vswprintf_s
  wcscat
  wcscat_s
  wcschr
  wcscmp
  wcscpy
  wcscpy_s
  wcscspn
  wcslen
  wcsncat
  wcsncat_s
  wcsncmp
  wcsncpy
  wcsncpy_s
  wcsnlen
  wcspbrk
  wcsrchr
  wcsspn
  wcsstr
  wcstok
  wcstok_s
  wcstol
  wcstombs
  wcstoul
  wine_server_call
  wine_server_fd_to_handle
  wine_server_handle_to_fd
  __wine_unix_spawnvp
  __wine_ctrl_routine
  __wine_syscall_dispatcher DATA PRIVATE
  __wine_unix_call_dispatcher DATA PRIVATE
  __wine_unixlib_handle DATA PRIVATE
  __wine_set_unix_env
  __wine_dbg_write
  __wine_dbg_get_channel_flags
  __wine_dbg_header
  __wine_dbg_output
  __wine_dbg_strdup
  __wine_dbg_ftrace
  wine_get_version
  wine_get_build_id
  wine_get_host_version
  wine_nt_to_unix_file_name
  wine_unix_to_nt_file_name
  __wine_needs_override_large_address_aware


================================================
FILE: Source/Windows/Defs/wow64.def
================================================
; File generated automatically from wine/dlls/wow64/wow64.spec; do not edit!
; To generate: winebuild --def -E wine/dlls/wow64/wow64.spec > wow64.def

LIBRARY wow64.dll

EXPORTS
  Wow64AllocThreadHeap @1 PRIVATE
  Wow64AllocateHeap @2 PRIVATE
  Wow64AllocateTemp @3
  Wow64ApcRoutine @4
  Wow64CheckIfNXEnabled @5 PRIVATE
  Wow64EmulateAtlThunk @6 PRIVATE
  Wow64FreeHeap @7 PRIVATE
  Wow64FreeThreadHeap @8 PRIVATE
  Wow64GetWow64ImageOption @9 PRIVATE
  Wow64IsControlFlowGuardEnforced @10 PRIVATE
  Wow64IsStackExtentsCheckEnforced @11 PRIVATE
  Wow64KiUserCallbackDispatcher @12
  Wow64LdrpInitialize @13
  Wow64LogPrint @14 PRIVATE
  Wow64NotifyUnsimulateComplete @15 PRIVATE
  Wow64PassExceptionToGuest @16
  Wow64PrepareForDebuggerAttach @17 PRIVATE
  Wow64PrepareForException @18
  Wow64ProcessPendingCrossProcessItems @19
  Wow64RaiseException @20
  Wow64ShallowThunkAllocObjectAttributes32TO64_FNC @21 PRIVATE
  Wow64ShallowThunkAllocSecurityQualityOfService32TO64_FNC @22 PRIVATE
  Wow64ShallowThunkSIZE_T32TO64 @23 PRIVATE
  Wow64ShallowThunkSIZE_T64TO32 @24 PRIVATE
  Wow64SuspendLocalThread @25 PRIVATE
  Wow64SystemServiceEx @26
  Wow64ValidateUserCallTarget @27 PRIVATE
  Wow64ValidateUserCallTargetFilter @28 PRIVATE


================================================
FILE: Source/Windows/WOW64/BTInterface.h
================================================
// SPDX-License-Identifier: MIT
#pragma once

#include <windef.h>
#include <ntstatus.h>
#include <winternl.h>

extern "C" {
void STDMETHODCALLTYPE BTCpuProcessInit();
void STDMETHODCALLTYPE BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status);
void STDMETHODCALLTYPE BTCpuThreadInit();
void STDMETHODCALLTYPE BTCpuThreadTerm(HANDLE Thread, LONG ExitCode);
void STDMETHODCALLTYPE* BTCpuGetBopCode();
void STDMETHODCALLTYPE* __wine_get_unix_opcode();
NTSTATUS STDMETHODCALLTYPE BTCpuGetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context);
NTSTATUS STDMETHODCALLTYPE BTCpuSetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context);
void STDMETHODCALLTYPE BTCpuSimulate();
NTSTATUS STDMETHODCALLTYPE BTCpuSuspendLocalThread(HANDLE Thread, ULONG* Count);
NTSTATUS STDMETHODCALLTYPE BTCpuResetToConsistentState(EXCEPTION_POINTERS* Ptrs);
void STDMETHODCALLTYPE BTCpuFlushInstructionCache2(const void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpuFlushInstructionCacheHeavy(const void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpuNotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot, BOOL After, ULONG Status);
void STDMETHODCALLTYPE BTCpuNotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt, BOOL After, ULONG Status);
void STDMETHODCALLTYPE BTCpuNotifyMemoryDirty(void* Address, SIZE_T Size);
void STDMETHODCALLTYPE BTCpuNotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType, BOOL After, ULONG Status);
NTSTATUS STDMETHODCALLTYPE BTCpuNotifyMapViewOfSection(void* Unk1, void* Address, void* Unk2, SIZE_T Size, ULONG AllocType, ULONG Prot);
void STDMETHODCALLTYPE BTCpuNotifyUnmapViewOfSection(void* Address, BOOL After, ULONG Status);
void STDMETHODCALLTYPE BTCpuNotifyReadFile(HANDLE Handle, void* Address, SIZE_T Size, BOOL After, NTSTATUS Status);
BOOLEAN STDMETHODCALLTYPE BTCpuIsProcessorFeaturePresent(UINT Feature);
void STDMETHODCALLTYPE BTCpuUpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info);
}


================================================
FILE: Source/Windows/WOW64/CMakeLists.txt
================================================
add_library(wow64fex SHARED
  Module.cpp
  libwow64fex.def
  $<TARGET_OBJECTS:FEXCore_object>)

patch_library_wine(wow64fex)

target_include_directories(wow64fex PRIVATE
  "${CMAKE_SOURCE_DIR}/Source/Windows/include/"
  "${CMAKE_SOURCE_DIR}/Source/Windows/"
  "${CMAKE_SOURCE_DIR}/Source/")

target_link_libraries(wow64fex PRIVATE
  FEXCore_Base
  Common
  CommonTools
  CommonWindows
  CommonWindowsRuntime
  wow64_ex
  ntdll_ex)

target_link_options(wow64fex PRIVATE -static -nostdlib -nostartfiles -nodefaultlibs -lc++ -lc++abi -lunwind)
target_link_libraries(wow64fex PRIVATE ${LIBGCC_PATH})
install(TARGETS wow64fex RUNTIME
  DESTINATION ${CMAKE_INSTALL_LIBDIR}
  COMPONENT Runtime)


================================================
FILE: Source/Windows/WOW64/Module.cpp
================================================
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|WOW64
desc: Implements the WOW64 BT module API using FEXCore
$end_info$
*/

// Thanks to André Zwing, whose ideas from https://github.com/AndreRH/hangover this code is based upon

#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/Profiler.h>
#include <FEXCore/Utils/SHMStats.h>
#include <FEXCore/Utils/EnumOperators.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/FPState.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/TypeDefines.h>
#include <FEXCore/Utils/SignalScopeGuards.h>

#include "Common/CallRetStack.h"
#include "Common/JITGuardPage.h"
#include "Common/Config.h"
#include "Common/Exception.h"
#include "Common/TSOHandlerConfig.h"
#include "Common/ImageTracker.h"
#include "Common/InvalidationTracker.h"
#include "Common/OvercommitTracker.h"
#include "Common/CPUFeatures.h"
#include "Common/Logging.h"
#include "Common/Module.h"
#include "Common/CRT/CRT.h"
#include "Common/PortabilityInfo.h"
#include "Common/Handle.h"
#include "DummyHandlers.h"
#include "BTInterface.h"
#include "Windows/Common/SHMStats.h"

#include <cstdint>
#include <type_traits>
#include <atomic>
#include <mutex>
#include <utility>
#include <unordered_map>
#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <wine/debug.h>
#include <wine/unixlib.h>

namespace ControlBits {
// When this is unset, a thread can be safely interrupted and have its context recovered
// IMPORTANT: This can only safely be written by the owning thread
static constexpr uint32_t IN_JIT {1U << 0};

// JIT entry polls this bit until it is unset, at which point CONTROL_IN_JIT will be set
static constexpr uint32_t PAUSED {1U << 1};

// When this is set, the CPU context stored in the CPU area has not yet been flushed to the FEX TLS
static constexpr uint32_t WOW_CPU_AREA_DIRTY {1U << 2};
}; // namespace ControlBits

struct TLS {
  enum class Slot : size_t {
    ENTRY_CONTEXT = WOW64_TLS_MAX_NUMBER - 1,
    CONTROL_WORD = WOW64_TLS_MAX_NUMBER - 2,
    THREAD_STATE = WOW64_TLS_MAX_NUMBER - 3,
    CACHED_CALLRET_SP = WOW64_TLS_MAX_NUMBER - 4,
  };

  _TEB* TEB;

  explicit TLS(_TEB* TEB)
    : TEB(TEB) {}

  WOW64INFO& Wow64Info() const {
    return *reinterpret_cast<WOW64INFO*>(TEB->TlsSlots[WOW64_TLS_WOW64INFO]);
  }

  std::atomic<uint32_t>& ControlWord() const {
    // TODO: Change this when libc++ gains std::atomic_ref support
    return reinterpret_cast<std::atomic<uint32_t>&>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::CONTROL_WORD)]);
  }

  CONTEXT*& EntryContext() const {
    return reinterpret_cast<CONTEXT*&>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::ENTRY_CONTEXT)]);
  }

  FEXCore::Core::InternalThreadState*& ThreadState() const {
    return reinterpret_cast<FEXCore::Core::InternalThreadState*&>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::THREAD_STATE)]);
  }

  // This is used to work around user callback handling (see Wow64KiUserCallbackDispatcher in wine) unbalancing the
  // call-ret stace since user callbacks are returned from using a syscall that we can't really intercept.
  uint64_t& CachedCallRetSp() const {
    return reinterpret_cast<uint64_t&>(TEB->TlsSlots[FEXCore::ToUnderlying(Slot::CACHED_CALLRET_SP)]);
  }
};

struct FrontendThreadData {
  bool InLockedRWXRead {};
};

class WowSyscallHandler;

namespace {
namespace BridgeInstrs {
  // These directly jumped to by the guest to make system calls
  void* Syscall {};
  void* UnixCall {};
} // namespace BridgeInstrs

fextl::unique_ptr<FEXCore::Context::Context> CTX;
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> SignalDelegator;
fextl::unique_ptr<WowSyscallHandler> SyscallHandler;
fextl::unique_ptr<FEX::Windows::StatAlloc> StatAllocHandler;

std::optional<FEX::Windows::InvalidationTracker> InvalidationTracker;
std::optional<FEX::Windows::CPUFeatures> CPUFeatures;
std::optional<FEX::Windows::OvercommitTracker> OvercommitTracker;
std::optional<FEX::Windows::ImageTracker> ImageTracker;

std::mutex ThreadCreationMutex;
// Map of TIDs to their FEX thread state, `ThreadCreationMutex` must be locked when accessing
std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*> Threads;

decltype(__wine_unix_call_dispatcher) WineUnixCall;

std::pair<NTSTATUS, TLS> GetThreadTLS(HANDLE Thread) {
  THREAD_BASIC_INFORMATION Info;
  const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr);
  return {Err, TLS {reinterpret_cast<_TEB*>(Info.TebBaseAddress)}};
}

TLS GetTLS() {
  return TLS {NtCurrentTeb()};
}

FrontendThreadData* GetFrontendThreadData(FEXCore::Core::InternalThreadState* Thread) {
  return static_cast<FrontendThreadData*>(Thread->FrontendPtr);
}

uint64_t GetWowTEB(void* TEB) {
  static constexpr size_t WowTEBOffsetMemberOffset {0x180c};
  return static_cast<uint64_t>(
    *reinterpret_cast<LONG*>(reinterpret_cast<uintptr_t>(TEB) + WowTEBOffsetMemberOffset) + reinterpret_cast<uint64_t>(TEB));
}

bool IsDispatcherAddress(uint64_t Address) {
  const auto& Config = SignalDelegator->GetConfig();
  return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd;
}

bool IsAddressInJit(uint64_t Address) {
  if (IsDispatcherAddress(Address)) {
    return true;
  }

  auto Thread = GetTLS().ThreadState();
  return Thread->CTX->IsAddressInCodeBuffer(Thread, Address);
}

void HandleImageMap(uint64_t Address, bool MainImage = false) {
  fextl::string ModulePath = FEX::Windows::GetSectionFilePath(Address);
  fextl::string ModuleName = fextl::string {FEX::Windows::BaseName(ModulePath)};
  InvalidationTracker->HandleImageMap(ModuleName, Address);
  ImageTracker->HandleImageMap(ModulePath, Address, MainImage);
}

void HandleImageUnmap(uint64_t Address, uint64_t Size) {
  ImageTracker->HandleImageUnmap(Address, Size);
}
} // namespace

namespace Context {
void LoadStateFromWowContext(FEXCore::Core::InternalThreadState* Thread, uint64_t WowTEB, WOW64_CONTEXT* Context) {
  auto& State = Thread->CurrentFrame->State;

  // General register state

  State.gregs[FEXCore::X86State::REG_RAX] = Context->Eax;
  State.gregs[FEXCore::X86State::REG_RBX] = Context->Ebx;
  State.gregs[FEXCore::X86State::REG_RCX] = Context->Ecx;
  State.gregs[FEXCore::X86State::REG_RDX] = Context->Edx;
  State.gregs[FEXCore::X86State::REG_RSI] = Context->Esi;
  State.gregs[FEXCore::X86State::REG_RDI] = Context->Edi;
  State.gregs[FEXCore::X86State::REG_RBP] = Context->Ebp;
  State.gregs[FEXCore::X86State::REG_RSP] = Context->Esp;

  State.rip = Context->Eip;
  CTX->SetFlagsFromCompactedEFLAGS(Thread, Context->EFlags);

  State.es_idx = Context->SegEs & 0xffff;
  State.cs_idx = Context->SegCs & 0xffff;
  State.ss_idx = Context->SegSs & 0xffff;
  State.ds_idx = Context->SegDs & 0xffff;
  State.fs_idx = Context->SegFs & 0xffff;
  State.gs_idx = Context->SegGs & 0xffff;

  // The TEB is the only populated GDT entry by default
  auto GDT = State.GetSegmentFromIndex(State, (Context->SegFs & 0xffff));
  State.SetGDTBase(GDT, WowTEB);
  State.SetGDTLimit(GDT, 0xF'FFFFU);
  State.fs_cached = WowTEB;
  State.es_cached = 0;
  State.cs_cached = 0;
  State.ss_cached = 0;
  State.ds_cached = 0;

  // Floating-point register state
  const auto* XSave = reinterpret_cast<XSAVE_FORMAT*>(Context->ExtendedRegisters);

  CTX->SetXMMRegistersFromState(Thread, reinterpret_cast<const __uint128_t*>(XSave->XmmRegisters), nullptr);
  memcpy(State.mm, XSave->FloatRegisters, sizeof(State.mm));

  State.FCW = XSave->ControlWord;
  State.flags[FEXCore::X86State::X87FLAG_IE_LOC] = XSave->StatusWord & 1;
  State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (XSave->StatusWord >> 8) & 1;
  State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (XSave->StatusWord >> 9) & 1;
  State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (XSave->StatusWord >> 10) & 1;
  State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (XSave->StatusWord >> 14) & 1;
  State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (XSave->StatusWord >> 11) & 0b111;
  State.AbridgedFTW = XSave->TagWord;
}

void StoreWowContextFromState(FEXCore::Core::InternalThreadState* Thread, WOW64_CONTEXT* Context) {
  auto& State = Thread->CurrentFrame->State;

  // General register state

  Context->Eax = State.gregs[FEXCore::X86State::REG_RAX];
  Context->Ebx = State.gregs[FEXCore::X86State::REG_RBX];
  Context->Ecx = State.gregs[FEXCore::X86State::REG_RCX];
  Context->Edx = State.gregs[FEXCore::X86State::REG_RDX];
  Context->Esi = State.gregs[FEXCore::X86State::REG_RSI];
  Context->Edi = State.gregs[FEXCore::X86State::REG_RDI];
  Context->Ebp = State.gregs[FEXCore::X86State::REG_RBP];
  Context->Esp = State.gregs[FEXCore::X86State::REG_RSP];

  Context->Eip = State.rip;
  Context->EFlags = CTX->ReconstructCompactedEFLAGS(Thread, false, nullptr, 0);

  Context->SegEs = State.es_idx;
  Context->SegCs = State.cs_idx;
  Context->SegSs = State.ss_idx;
  Context->SegDs = State.ds_idx;
  Context->SegFs = State.fs_idx;
  Context->SegGs = State.gs_idx;

  // Floating-point register state

  auto* XSave = reinterpret_cast<XSAVE_FORMAT*>(Context->ExtendedRegisters);

  CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(XSave->XmmRegisters), nullptr);
  memcpy(XSave->FloatRegisters, State.mm, sizeof(State.mm));

  XSave->ControlWord = State.FCW;
  XSave->StatusWord = (State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] << 11) | (State.flags[FEXCore::X86State::X87FLAG_C0_LOC] << 8) |
                      (State.flags[FEXCore::X86State::X87FLAG_C1_LOC] << 9) | (State.flags[FEXCore::X86State::X87FLAG_C2_LOC] << 10) |
                      (State.flags[FEXCore::X86State::X87FLAG_C3_LOC] << 14) | State.flags[FEXCore::X86State::X87FLAG_IE_LOC];
  XSave->TagWord = State.AbridgedFTW;

  Context->FloatSave.ControlWord = XSave->ControlWord;
  Context->FloatSave.StatusWord = XSave->StatusWord;
  Context->FloatSave.TagWord = FEXCore::FPState::ConvertFromAbridgedFTW(XSave->StatusWord, State.mm, XSave->TagWord);
  Context->FloatSave.ErrorOffset = XSave->ErrorOffset;
  Context->FloatSave.ErrorSelector = XSave->ErrorSelector | (XSave->ErrorOpcode << 16);
  Context->FloatSave.DataOffset = XSave->DataOffset;
  Context->FloatSave.DataSelector = XSave->DataSelector;
  Context->FloatSave.Cr0NpxState = XSave->StatusWord | 0xffff0000;
}

NTSTATUS FlushThreadStateContext(HANDLE Thread) {
  const auto [Err, TLS] = GetThreadTLS(Thread);
  if (Err) {
    return Err;
  }

  WOW64_CONTEXT TmpWowContext {.ContextFlags = WOW64_CONTEXT_FULL | WOW64_CONTEXT_EXTENDED_REGISTERS};

  Context::StoreWowContextFromState(TLS.ThreadState(), &TmpWowContext);
  return RtlWow64SetThreadContext(Thread, &TmpWowContext);
}

void ReconstructThreadState(TLS TLS, CONTEXT* Context) {
  const auto& Config = SignalDelegator->GetConfig();
  auto* Thread = TLS.ThreadState();
  auto& State = Thread->CurrentFrame->State;

  State.rip = CTX->RestoreRIPFromHostPC(Thread, Context->Pc);

  // Spill all SRA GPRs
  for (size_t i = 0; i < Config.SRAGPRCount; i++) {
    State.gregs[i] = Context->X[Config.SRAGPRMapping[i]];
  }

  // Spill all SRA FPRs
  for (size_t i = 0; i < Config.SRAFPRCount; i++) {
    memcpy(State.xmm.sse.data[i], &Context->V[Config.SRAFPRMapping[i]], sizeof(__uint128_t));
  }

  // Spill EFlags
  uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, true, Context->X, Context->Cpsr);
  CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags);
}

WOW64_CONTEXT ReconstructWowContext(TLS TLS, CONTEXT* Context) {
  if (!IsDispatcherAddress(Context->Pc)) {
    ReconstructThreadState(TLS, Context);
  }

  WOW64_CONTEXT WowContext {
    .ContextFlags = WOW64_CONTEXT_ALL,
  };

  auto* XSave = reinterpret_cast<XSAVE_FORMAT*>(WowContext.ExtendedRegisters);
  XSave->ControlWord = 0x27f;
  XSave->MxCsr = 0x1f80;

  Context::StoreWowContextFromState(TLS.ThreadState(), &WowContext);
  return WowContext;
}

static std::optional<FEX::Windows::TSOHandlerConfig> HandlerConfig;

bool HandleUnalignedAccess(TLS TLS, CONTEXT* Context) {
  auto Thread = TLS.ThreadState();
  if (!Thread->CTX->IsAddressInCodeBuffer(Thread, Context->Pc)) {
    return false;
  }

  const auto Result =
    FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(Thread, HandlerConfig->GetUnalignedHandlerType(), Context->Pc, &Context->X0);
  Context->Pc += Result.value_or(0);
  return Result.has_value();
}

void LockJITContext(TLS TLS) {
  uint32_t Expected = TLS.ControlWord().load(), New;

  // Spin until PAUSED is unset, setting IN_JIT when that occurs
  do {
    Expected = Expected & ~ControlBits::PAUSED;
    New = (Expected | ControlBits::IN_JIT) & ~ControlBits::WOW_CPU_AREA_DIRTY;
  } while (!TLS.ControlWord().compare_exchange_weak(Expected, New, std::memory_order::relaxed));
  std::atomic_signal_fence(std::memory_order::seq_cst);

  // If the CPU area is dirty, flush it to the JIT context before reentry
  if (Expected & ControlBits::WOW_CPU_AREA_DIRTY) {
    WOW64_CONTEXT* WowContext;
    RtlWow64GetCurrentCpuArea(nullptr, reinterpret_cast<void**>(&WowContext), nullptr);
    Context::LoadStateFromWowContext(TLS.ThreadState(), GetWowTEB(NtCurrentTeb()), WowContext);
  }
}

void UnlockJITContext(TLS TLS) {
  std::atomic_signal_fence(std::memory_order::seq_cst);
  TLS.ControlWord().fetch_and(~ControlBits::IN_JIT, std::memory_order::relaxed);
}

class ScopedJITContextLock {
private:
  TLS TLSData;

public:
  ScopedJITContextLock(TLS TLSData)
    : TLSData {TLSData} {
    LockJITContext(TLSData);
  }

  ~ScopedJITContextLock() {
    UnlockJITContext(TLSData);
  }
};

bool HandleSuspendInterrupt(TLS TLS, CONTEXT* Context, uint64_t FaultAddress) {
  if (FaultAddress != reinterpret_cast<uint64_t>(&TLS.ThreadState()->InterruptFaultPage)) {
    return false;
  }

  void* TmpAddress = reinterpret_cast<void*>(FaultAddress);
  SIZE_T TmpSize = FEXCore::Utils::FEX_PAGE_SIZE;
  ULONG TmpProt;
  NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_READWRITE, &TmpProt);

  // Since interrupts only happen at the start of blocks, the reconstructed state should be entirely accurate
  ReconstructThreadState(TLS, Context);

  // Yield to the suspender
  UnlockJITContext(TLS);
  LockJITContext(TLS);

  // Adjust context to return to the dispatcher, reloading SRA from thread state
  const auto& Config = SignalDelegator->GetConfig();
  Context->Pc = Config.AbsoluteLoopTopAddressFillSRA;
  Context->X1 = 0; // Set ENTRY_FILL_SRA_SINGLE_INST_REG
  return true;
}
} // namespace Context

// Calls a 2-argument function `Func` setting the parent unwind frame information to the given SP and PC
__attribute__((naked)) extern "C" uint64_t SEHFrameTrampoline2Args(void* Arg0, void* Arg1, void* Func, uint64_t Sp, uint64_t Pc) {
  asm(".seh_proc SEHFrameTrampoline2Args;"
      "stp x3, x4, [sp, #-0x10]!;"
      ".seh_pushframe;"
      "stp x29, x30, [sp, #-0x10]!;"
      ".seh_save_fplr_x 16;"
      ".seh_endprologue;"
      "blr x2;"
      "ldp x29, x30, [sp], 0x20;"
      "ret;"
      ".seh_endproc;");
}

class WowSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators {
public:
  WowSyscallHandler() {
    OSABI = FEXCore::HLE::SyscallOSABI::OS_GENERIC;
  }

  static uint64_t HandleSyscallImpl(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) {
    const uint64_t ReturnRIP = *(uint32_t*)(Frame->State.gregs[FEXCore::X86State::REG_RSP]); // Return address from the stack
    uint64_t ReturnRSP = Frame->State.gregs[FEXCore::X86State::REG_RSP] + 4;                 // Stack pointer after popping return address
    uint64_t ReturnRAX = 0;

    if (Frame->State.rip == (uint64_t)BridgeInstrs::UnixCall) {
      struct StackLayout {
        unixlib_handle_t Handle;
        UINT32 ID;
        ULONG32 Args;
      }* StackArgs = reinterpret_cast<StackLayout*>(ReturnRSP);

      ReturnRSP += sizeof(StackLayout);

      const auto TLS = GetTLS();
      Context::UnlockJITContext(TLS);
      ReturnRAX = static_cast<uint64_t>(WineUnixCall(StackArgs->Handle, StackArgs->ID, ULongToPtr(StackArgs->Args)));
      Context::LockJITContext(TLS);
    } else if (Frame->State.rip == (uint64_t)BridgeInstrs::Syscall) {
      const uint64_t EntryRAX = Frame->State.gregs[FEXCore::X86State::REG_RAX];

      const auto TLS = GetTLS();
      Context::UnlockJITContext(TLS);
      Wow64ProcessPendingCrossProcessItems();
      ReturnRAX = static_cast<uint64_t>(Wow64SystemServiceEx(static_cast<UINT>(EntryRAX), reinterpret_cast<UINT*>(ReturnRSP + 4)));
      Context::LockJITContext(TLS);
    }
    // If a new context has been set, use it directly and don't return to the syscall caller
    if (Frame->State.rip == (uint64_t)BridgeInstrs::Syscall || Frame->State.rip == (uint64_t)BridgeInstrs::UnixCall) {
      Frame->State.gregs[FEXCore::X86State::REG_RAX] = ReturnRAX;
      Frame->State.gregs[FEXCore::X86State::REG_RSP] = ReturnRSP;
      Frame->State.rip = ReturnRIP;
    }

    // NORETURNEDRESULT causes this result to be ignored since we restore all registers back from memory after a syscall anyway
    return 0;
  }

  uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
    const auto TLS = GetTLS();
    // Stash the the context pointer on the stack, as Simulate can be called from this syscall handler which would overwrite it
    CONTEXT* EntryContext = TLS.EntryContext();
    // Call the syscall handler with unwind information pointing to Simulate as its caller
    uint64_t Ret = SEHFrameTrampoline2Args(reinterpret_cast<void*>(Frame), reinterpret_cast<void*>(Args),
                                           reinterpret_cast<void*>(&HandleSyscallImpl), EntryContext->Sp, EntryContext->Pc);
    TLS.EntryContext() = EntryContext;
    return Ret;
  }

  std::optional<FEXCore::ExecutableFileSectionInfo> LookupExecutableFileSection(FEXCore::Core::InternalThreadState*, uint64_t Address) override {
    return ImageTracker->LookupExecutableFileSection(Address);
  }

  void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {
    InvalidationTracker->ReprotectRWXIntervals(Start, Length);
  }

  void InvalidateGuestCodeRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {
    InvalidationTracker->InvalidateAlignedInterval(Start, Length, false);
  }

  void MarkOvercommitRange(uint64_t Start, uint64_t Length) override {
    OvercommitTracker->MarkRange(Start, Length);
  }

  void UnmarkOvercommitRange(uint64_t Start, uint64_t Length) override {
    OvercommitTracker->UnmarkRange(Start, Length);
  }

  FEXCore::HLE::ExecutableRangeInfo QueryGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Address) override {
    return InvalidationTracker->QueryExecutableRange(Address);
  }

  void PreCompile() override {
    Wow64ProcessPendingCrossProcessItems();
  }
};

void BTCpuProcessInit() {
  FEX::Windows::InitCRTProcess();
  const auto ExecutableName = FEX::Windows::BaseName(FEX::Windows::GetExecutableFilePath());
  FEX::Config::LoadConfig(fextl::string {ExecutableName}, _environ, FEX::ReadPortabilityInformation());
  FEXCore::Config::ReloadMetaLayer();
  FEX::Windows::Logging::Init();

  FEXCore::Config::Set(FEXCore::Config::CONFIG_INTERPRETER_INSTALLED, "0");
  FEXCore::Config::Set(FEXCore::Config::CONFIG_IS64BIT_MODE, "0");

  FEXCore::Profiler::Init("", "");

  SignalDelegator = fextl::make_unique<FEX::DummyHandlers::DummySignalDelegator>();
  SyscallHandler = fextl::make_unique<WowSyscallHandler>();
  const auto NtDll = GetModuleHandle("ntdll.dll");
  const bool IsWine = !!GetProcAddress(NtDll, "wine_get_version");
  OvercommitTracker.emplace(IsWine);

  {
    auto HostFeatures = FEX::Windows::CPUFeatures::FetchHostFeatures(IsWine);
    // AVX is unsupported for WOW64
    HostFeatures.SupportsAVX = false;
    CTX = FEXCore::Context::Context::CreateNewContext(HostFeatures);
  }

  CTX->SetSignalDelegator(SignalDelegator.get());
  CTX->SetSyscallHandler(SyscallHandler.get());
  CTX->InitCore();
  Context::HandlerConfig.emplace(*CTX);
  InvalidationTracker.emplace(*CTX, Threads);
  ImageTracker.emplace(*CTX, false);

  auto MainModule = reinterpret_cast<__TEB*>(NtCurrentTeb())->Peb->ImageBaseAddress;
  HandleImageMap(reinterpret_cast<uint64_t>(MainModule), true);

  auto NtDllX86 = reinterpret_cast<SYSTEM_DLL_INIT_BLOCK*>(GetProcAddress(NtDll, "LdrSystemDllInitBlock"))->ntdll_handle;
  HandleImageMap(NtDllX86);

  CPUFeatures.emplace(*CTX);

  // Allocate the syscall/unixcall trampolines in the lower 2GB of the address space
  SIZE_T Size = 4;
  void* Addr = nullptr;
  NtAllocateVirtualMemory(NtCurrentProcess(), &Addr, (1U << 31) - 1, &Size, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);
  InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Addr), Size, PAGE_EXECUTE);
  *reinterpret_cast<uint32_t*>(Addr) = 0x2ecd2ecd;
  BridgeInstrs::Syscall = Addr;
  BridgeInstrs::UnixCall = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(Addr) + 2);

  const auto Sym = GetProcAddress(NtDll, "__wine_unix_call_dispatcher");
  if (Sym) {
    WineUnixCall = *reinterpret_cast<decltype(WineUnixCall)*>(Sym);
  }

  // wow64.dll will only initialise the cross-process queue if this is set
  GetTLS().Wow64Info().CpuFlags = WOW64_CPUFLAGS_SOFTWARE;

  FEX_CONFIG_OPT(TSOEnabled, TSOENABLED);
  if (TSOEnabled()) {
    BOOL Enable = TRUE;
    NTSTATUS Status = NtSetInformationProcess(NtCurrentProcess(), ProcessFexHardwareTso, &Enable, sizeof(Enable));
    if (Status == STATUS_SUCCESS) {
      CTX->SetHardwareTSOSupport(true);
    }
  }

  FEX_CONFIG_OPT(ProfileStats, PROFILESTATS);
  FEX_CONFIG_OPT(StartupSleep, STARTUPSLEEP);
  FEX_CONFIG_OPT(StartupSleepProcName, STARTUPSLEEPPROCNAME);

  if (IsWine && ProfileStats()) {
    StatAllocHandler = fextl::make_unique<FEX::Windows::StatAlloc>(FEXCore::SHMStats::AppType::WIN_WOW64);
  }

  if (StartupSleep() && (StartupSleepProcName().empty() || ExecutableName == StartupSleepProcName())) {
    LogMan::Msg::IFmt("[{}][{}] Sleeping for {} seconds", GetCurrentProcessId(), ExecutableName, StartupSleep());
    std::this_thread::sleep_for(std::chrono::seconds(StartupSleep()));
  }
}

void BTCpuProcessTerm(HANDLE Handle, BOOL After, ULONG Status) {}

void BTCpuThreadInit() {
  static constexpr size_t DefaultWow64CS {4};
  std::scoped_lock Lock(ThreadCreationMutex);
  FEX::Windows::InitCRTThread();
  auto* Thread = CTX->CreateThread(0, 0);

  // Default segment setup.
  auto Frame = Thread->CurrentFrame;
  auto NewSegments = new FEXCore::Core::CPUState::gdt_segment[32]();

  // Setup initial code-segment GDT
  auto& GDT = NewSegments[DefaultWow64CS];
  FEXCore::Core::CPUState::SetGDTBase(&GDT, 0);
  FEXCore::Core::CPUState::SetGDTLimit(&GDT, 0xF'FFFFU);
  GDT.L = 0; // L = Long Mode = 32-bit
  GDT.D = 1; // D = Default Operand Size = 32-bit

  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT] = &NewSegments[0];
  // TODO: LDTs are currently unsupported, mirror them to GDT.
  Frame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_LDT] = &NewSegments[0];

  Frame->State.cs_idx = DefaultWow64CS << 3;
  Frame->State.cs_cached = FEXCore::Core::CPUState::CalculateGDTBase(GDT);

  FEX::Windows::CallRetStack::InitializeThread(Thread);

  const auto TLS = GetTLS();
  TLS.ThreadState() = Thread;
  TLS.ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed);

  Thread->FrontendPtr = new FrontendThreadData();

  auto ThreadTID = GetCurrentThreadId();
  Threads.emplace(ThreadTID, Thread);
  if (StatAllocHandler) {
    Thread->ThreadStats = StatAllocHandler->AllocateSlot(ThreadTID);
  }
}

void BTCpuThreadTerm(HANDLE Thread, LONG ExitCode) {
  if (!FEX::Windows::ValidateHandleAccess(Thread, THREAD_TERMINATE)) {
    return;
  }

  auto ThreadDup = FEX::Windows::DupHandle(Thread, THREAD_QUERY_INFORMATION | THREAD_SUSPEND_RESUME);

  THREAD_BASIC_INFORMATION Info;
  if (auto Err = NtQueryInformationThread(*ThreadDup, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) {
    return;
  }

  const auto ThreadTID = reinterpret_cast<uint64_t>(Info.ClientId.UniqueThread);
  bool Self = ThreadTID == GetCurrentThreadId();
  if (!Self) {
    // If we are suspending a thread that isn't ourselves, try to suspend it first so we know internal JIT locks aren't being held.
    RtlWow64SuspendThread(*ThreadDup, NULL);
  }

  auto [Err, TLS] = GetThreadTLS(*ThreadDup);
  if (Err) {
    return;
  }

  {
    std::scoped_lock Lock(ThreadCreationMutex);
    auto it = Threads.find(ThreadTID);
    if (it == Threads.end()) {
      // Thread already terminated
      return;
    }

    Threads.erase(it);
    if (StatAllocHandler) {
      StatAllocHandler->DeallocateSlot(TLS.ThreadState()->ThreadStats);
    }
  }
  auto ThreadState = TLS.ThreadState();

  delete GetFrontendThreadData(ThreadState);

  // GDT and LDT are mirrored, only free one.
  delete[] ThreadState->CurrentFrame->State.segment_arrays[FEXCore::Core::CPUState::SEGMENT_ARRAY_INDEX_GDT];

  FEX::Windows::CallRetStack::DestroyThread(ThreadState);
  CTX->DestroyThread(ThreadState);
  if (Self) {
    FEX::Windows::DeinitCRTThread();
  }
}

void* BTCpuGetBopCode() {
  return BridgeInstrs::Syscall;
}

void* __wine_get_unix_opcode() {
  return BridgeInstrs::UnixCall;
}

NTSTATUS BTCpuGetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context) {
  if (!FEX::Windows::ValidateHandleAccess(Thread, THREAD_GET_CONTEXT)) {
    return STATUS_ACCESS_DENIED;
  }

  auto ThreadDup = FEX::Windows::DupHandle(Thread, THREAD_QUERY_INFORMATION | THREAD_GET_CONTEXT | THREAD_SET_CONTEXT);
  auto [Err, TLS] = GetThreadTLS(*ThreadDup);
  if (Err) {
    return Err;
  }

  Context::ScopedJITContextLock Lk {TLS};
  if (Err = Context::FlushThreadStateContext(*ThreadDup); Err) {
    return Err;
  }

  return RtlWow64GetThreadContext(*ThreadDup, Context);
}

NTSTATUS BTCpuSetContext(HANDLE Thread, HANDLE Process, void* Unknown, WOW64_CONTEXT* Context) {
  if (!FEX::Windows::ValidateHandleAccess(Thread, THREAD_SET_CONTEXT)) {
    return STATUS_ACCESS_DENIED;
  }

  auto ThreadDup = FEX::Windows::DupHandle(Thread, THREAD_QUERY_INFORMATION | THREAD_GET_CONTEXT | THREAD_SET_CONTEXT);
  auto [Err, TLS] = GetThreadTLS(*ThreadDup);
  if (Err) {
    return Err;
  }

  // Back-up the input context incase we've been passed the CPU area (the flush below would wipe it out otherwise)
  WOW64_CONTEXT TmpContext = *Context;

  Context::ScopedJITContextLock Lk {TLS};
  if (Err = Context::FlushThreadStateContext(*ThreadDup); Err) {
    return Err;
  }

  // Merge the input context into the CPU area then pass the full context into the JIT
  if (Err = RtlWow64SetThreadContext(*ThreadDup, &TmpContext); Err) {
    return Err;
  }

  TmpContext.ContextFlags = WOW64_CONTEXT_FULL | WOW64_CONTEXT_EXTENDED_REGISTERS;

  if (Err = RtlWow64GetThreadContext(*ThreadDup, &TmpContext); Err) {
    return Err;
  }

  if (Thread == GetCurrentThread() && TLS.CachedCallRetSp()) {
    TLS.ThreadState()->CurrentFrame->State.callret_sp = TLS.CachedCallRetSp();
  }

  Context::LoadStateFromWowContext(TLS.ThreadState(), GetWowTEB(TLS.TEB), &TmpContext);
  return STATUS_SUCCESS;
}

// .seh_pushframe doesn't restore the frame pointer, so if when unwinding from RtlCaptureContext an operation is used
// that sets SP from FP, the unwound SP value will be incorrect. Wrap RtlCaptureContext so the correct FP is immediately
// restored from the stack to prevent this.
__attribute__((naked)) void BTCpuSimulate() {
  asm(".seh_proc BTCpuSimulate;"
      "sub sp, sp, #0x390;"
      ".seh_stackalloc 0x390;"
      "stp x29, x30, [sp, #-0x10]!;"
      ".seh_save_fplr_x 16;"
      ".seh_endprologue;"
      "add x0, sp, #0x10;"
      "bl RtlCaptureContext;"
      "add x0, sp, #0x10;"
      "bl BTCpuSimulateImpl;"
      "ldp x29, x30, [sp], 0x10;"
      "add sp, sp, #0x390;"
      "ret;"
      ".seh_endproc;");
}

extern "C" void BTCpuSimulateImpl(CONTEXT* entry_context) {
  const auto TLS = GetTLS();
  TLS.EntryContext() = entry_context;
  TLS.CachedCallRetSp() = TLS.ThreadState()->CurrentFrame->State.callret_sp;

  Context::ScopedJITContextLock Lk {TLS};
  CTX->ExecuteThread(TLS.ThreadState());
}

NTSTATUS BTCpuSuspendLocalThread(HANDLE Thread, ULONG* Count) {
  if (!FEX::Windows::ValidateHandleAccess(Thread, THREAD_SUSPEND_RESUME)) {
    return STATUS_ACCESS_DENIED;
  }

  auto ThreadDup = FEX::Windows::DupHandle(Thread, THREAD_QUERY_INFORMATION | THREAD_SUSPEND_RESUME | THREAD_GET_CONTEXT | THREAD_SET_CONTEXT);
  THREAD_BASIC_INFORMATION Info;
  if (NTSTATUS Err = NtQueryInformationThread(*ThreadDup, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) {
    return Err;
  }

  const auto ThreadTID = reinterpret_cast<uint64_t>(Info.ClientId.UniqueThread);
  if (ThreadTID == GetCurrentThreadId()) {
    LogMan::Msg::DFmt("Suspending self");
    // Mark the CPU area as dirty, to force the JIT context to be restored from it on entry as it may be changed using
    // SetThreadContext (which doesn't use the BTCpu API)
    if (!(GetTLS().ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed) & ControlBits::WOW_CPU_AREA_DIRTY)) {
      if (NTSTATUS Err = Context::FlushThreadStateContext(*ThreadDup); Err) {
        return Err;
      }
    }

    return NtSuspendThread(*ThreadDup, Count);
  }

  LogMan::Msg::DFmt("Suspending thread: {:X}", ThreadTID);

  auto [Err, TLS] = GetThreadTLS(*ThreadDup);
  if (Err) {
    return Err;
  }

  std::scoped_lock Lock(ThreadCreationMutex);

  // If the thread hasn't yet been initialized, suspend it without special handling as it wont yet have entered the JIT
  if (!Threads.contains(ThreadTID)) {
    LogMan::Msg::DFmt("Thread suspended: {:X}", ThreadTID);
    return NtSuspendThread(*ThreadDup, Count);
  }

  // If CONTROL_IN_JIT is unset at this point, then it can never be set (and thus the JIT cannot be reentered) as
  // CONTROL_PAUSED has been set, as such, while this may redundantly request interrupts in rare cases it will never
  // miss them
  if (TLS.ControlWord().fetch_or(ControlBits::PAUSED, std::memory_order::relaxed) & ControlBits::IN_JIT) {
    LogMan::Msg::DFmt("Thread {:X} is in JIT, polling for interrupt", ThreadTID);

    ULONG TmpProt;
    void* TmpAddress = &TLS.ThreadState()->InterruptFaultPage;
    SIZE_T TmpSize = FEXCore::Utils::FEX_PAGE_SIZE;
    NtProtectVirtualMemory(NtCurrentProcess(), &TmpAddress, &TmpSize, PAGE_READONLY, &TmpProt);
  }

  // Spin until the JIT is interrupted
  while (TLS.ControlWord().load() & ControlBits::IN_JIT)
    ;

  // The JIT has now been interrupted and the context stored in the thread's CPU area is up-to-date
  if (Err = NtSuspendThread(*ThreadDup, Count); Err) {
    TLS.ControlWord().fetch_and(~ControlBits::PAUSED, std::memory_order::relaxed);
    return Err;
  }

  CONTEXT TmpContext {
    .ContextFlags = CONTEXT_INTEGER,
  };

  // NtSuspendThread may return before the thread is actually suspended, so a sync operation like NtGetContextThread
  // needs to be called to ensure it is before we unset CONTROL_PAUSED
  std::ignore = NtGetContextThread(*ThreadDup, &TmpContext);

  // Mark the CPU area as dirty, to force the JIT context to be restored from it on entry as it may be changed using
  // SetThreadContext (which doesn't use the BTCpu API)
  if (!(TLS.ControlWord().fetch_or(ControlBits::WOW_CPU_AREA_DIRTY, std::memory_order::relaxed) & ControlBits::WOW_CPU_AREA_DIRTY)) {
    if (Err = Context::FlushThreadStateContext(*ThreadDup); Err) {
      return Err;
    }
  }

  LogMan::Msg::DFmt("Thread suspended: {:X}", ThreadTID);

  // Now the thread is suspended on the host, unset CONTROL_PAUSED so that NtResumeThread will
  // continue execution in the JIT
  TLS.ControlWord().fetch_and(~ControlBits::PAUSED, std::memory_order::relaxed);

  return Err;
}

// Returns true if exception dispatch should be halted and the execution context restored to Ptrs->Context
bool BTCpuResetToConsistentStateImpl(EXCEPTION_POINTERS* Ptrs) {
  auto* Context = Ptrs->ContextRecord;
  auto* Exception = Ptrs->ExceptionRecord;
  auto TLS = GetTLS();
  auto Thread = TLS.ThreadState();
  FEXCORE_PROFILE_ACCUMULATION(Thread, AccumulatedSignalTime);

  if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
    const auto FaultAddress = static_cast<uint64_t>(Exception->ExceptionInformation[1]);

    if (FEX::Windows::CallRetStack::HandleAccessViolation(Thread, FaultAddress, Context->X25)) {
      return true;
    }

    if (OvercommitTracker && OvercommitTracker->HandleAccessViolation(FaultAddress)) {
      return true;
    }

    if (Context::HandleSuspendInterrupt(TLS, Context, FaultAddress)) {
      LogMan::Msg::DFmt("Resumed from suspend");
      return true;
    }

    if (FEX::Windows::JITGuardPage::HandleJITGuardPage(Thread, reinterpret_cast<void*>(FaultAddress), Context->X,
                                                       reinterpret_cast<__uint128_t*>(Context->V), &Context->Pc)) {
      return true;
    }

    if (Thread) {
      std::scoped_lock Lock(ThreadCreationMutex);
      FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSMCCount, 1);
      if (InvalidationTracker->HandleRWXAccessViolation(Thread, Context->Pc, FaultAddress)) {
        if (CTX->IsAddressInCodeBuffer(Thread, Context->Pc) && !CTX->IsCurrentBlockSingleInst(Thread) &&
            CTX->IsAddressInCurrentBlock(Thread, FaultAddress & FEXCore::Utils::FEX_PAGE_MASK, FEXCore::Utils::FEX_PAGE_SIZE)) {
          Context::ReconstructThreadState(TLS, Context);
          LogMan::Msg::DFmt("Handled inline self-modifying code: pc: {:X} rip: {:X} fault: {:X}", Context->Pc,
                            Thread->CurrentFrame->State.rip, FaultAddress);

          // Adjust context to return to the dispatcher, reloading SRA from thread state
          const auto& Config = SignalDelegator->GetConfig();
          Context->Pc = Config.AbsoluteLoopTopAddressFillSRA;
          Context->X1 = 1; // Set ENTRY_FILL_SRA_SINGLE_INST_REG to force a single step
        } else {
          LogMan::Msg::DFmt("Handled self-modifying code: pc: {:X} fault: {:X}", Context->Pc, FaultAddress);
        }
        return true;
      }
    }
  }

  if (!Thread || !IsAddressInJit(Context->Pc)) {
    return false;
  }

  FEXCORE_PROFILE_INSTANT_INCREMENT(Thread, AccumulatedSIGBUSCount, 1);
  if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Context::HandleUnalignedAccess(TLS, Context)) {
    LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", Context->Pc);
    return true;
  }

  LogMan::Msg::DFmt("Reconstructing context");

  WOW64_CONTEXT WowContext = Context::ReconstructWowContext(TLS, Context);
  LogMan::Msg::DFmt("pc: {:X} eip: {:X}", Context->Pc, WowContext.Eip);

  auto& Fault = Thread->CurrentFrame->SynchronousFaultData;
  *Exception = FEX::Windows::HandleGuestException(Fault, *Exception, WowContext.Eip, WowContext.Eax);
  if (Exception->ExceptionCode == EXCEPTION_SINGLE_STEP) {
    WowContext.EFlags &= ~(1 << FEXCore::X86State::RFLAG_TF_RAW_LOC);
  }
  // wow64.dll will handle adjusting PC in the dispatched context after a breakpoint

  BTCpuSetContext(GetCurrentThread(), GetCurrentProcess(), nullptr, &WowContext);
  Context::UnlockJITContext(TLS);

  // Replace the host context with one captured before JIT entry so host code can unwind
  memcpy(Context, TLS.EntryContext(), sizeof(*Context));

  return false;
}

NTSTATUS BTCpuResetToConsistentState(EXCEPTION_POINTERS* Ptrs) {
  if (BTCpuResetToConsistentStateImpl(Ptrs)) {
    NtContinue(Ptrs->ContextRecord, FALSE);
  }

  return STATUS_SUCCESS;
}

void BTCpuFlushInstructionCache2(const void* Address, SIZE_T Size) {
  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpuFlushInstructionCacheHeavy(const void* Address, SIZE_T Size) {
  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpuNotifyMemoryDirty(void* Address, SIZE_T Size) {
  std::scoped_lock Lock(ThreadCreationMutex);
  InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}

void BTCpuNotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot, BOOL After, ULONG Status) {
  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    // MEM_RESET(_UNDO) ignores the passed permissions
    if (!Status && !(Type & (MEM_RESET | MEM_RESET_UNDO))) {
      InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), Prot);
    }
    ThreadCreationMutex.unlock();
  }
}

void BTCpuNotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt, BOOL After, ULONG Status) {
  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    if (!Status) {
      InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), NewProt);
    }
    ThreadCreationMutex.unlock();
  }
}

void BTCpuNotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType, BOOL After, ULONG Status) {
  if (!After) {
    ThreadCreationMutex.lock();
  } else {
    if (!Status) {
      InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), true);
    }
    ThreadCreationMutex.unlock();
  }
}

NTSTATUS BTCpuNotifyMapViewOfSection(void* Unk1, void* Address, void* Unk2, SIZE_T Size, ULONG AllocType, ULONG Prot) {
  std::scoped_lock Lock(ThreadCreationMutex);
  HandleImageMap(reinterpret_cast<uint64_t>(Address));
  return STATUS_SUCCESS;
}

void BTCpuNotifyUnmapViewOfSection(void* Address, BOOL After, ULONG Status) {
  if (!After) {
    ThreadCreationMutex.lock();
    auto [Start, Size] = InvalidationTracker->InvalidateContainingSection(reinterpret_cast<uint64_t>(Address), true);
    if (Size) {
      HandleImageUnmap(Start, Size);
    }
  } else {
    ThreadCreationMutex.unlock();
  }
}

void BTCpuNotifyReadFile(HANDLE Handle, void* Address, SIZE_T Size, BOOL After, NTSTATUS Status) {
  auto& InLockedRWXRead = GetFrontendThreadData(GetTLS().ThreadState())->InLockedRWXRead;
  if (!After) {
    ThreadCreationMutex.lock();
    CTX->GetCodeInvalidationMutex().lock();
    if (InvalidationTracker->BeginUntrackedWriteLocked(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size))) {
      InLockedRWXRead = true;
    } else {
      CTX->GetCodeInvalidationMutex().unlock();
      ThreadCreationMutex.unlock();
    }
  } else {
    if (InLockedRWXRead) {
      InLockedRWXRead = false;
      CTX->GetCodeInvalidationMutex().unlock();
      ThreadCreationMutex.unlock();
    }
  }
}

BOOLEAN WINAPI BTCpuIsProcessorFeaturePresent(UINT Feature) {
  return CPUFeatures->IsFeaturePresent(Feature) ? TRUE : FALSE;
}

void BTCpuUpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info) {
  CPUFeatures->UpdateInformation(Info);
}


================================================
FILE: Source/Windows/WOW64/libwow64fex.def
================================================
LIBRARY libwow64fex.dll

EXPORTS
  BTCpuFlushInstructionCache2
  BTCpuFlushInstructionCacheHeavy
  BTCpuGetBopCode
  BTCpuGetContext
  BTCpuIsProcessorFeaturePresent
  BTCpuNotifyMemoryAlloc
  BTCpuNotifyMemoryProtect
  BTCpuNotifyMemoryDirty
  BTCpuNotifyMemoryFree
  BTCpuNotifyMapViewOfSection
  BTCpuNotifyUnmapViewOfSection
  BTCpuNotifyReadFile
  BTCpuProcessInit
  BTCpuResetToConsistentState
  BTCpuSetContext
  BTCpuSimulate
  BTCpuSuspendLocalThread
  BTCpuThreadInit
  BTCpuProcessTerm
  BTCpuThreadTerm
  BTCpuUpdateProcessorInformation
  __wine_get_unix_opcode


================================================
FILE: Source/Windows/include/wine/debug.h
================================================
// SPDX-License-Identifier: LGPL-2.1-or-later
// SPDX-FileCopyrightText: Copyright 1999 Patrik Stridvall

#pragma once

#include <windef.h>

#ifdef __cplusplus
extern "C" {
#endif

int __cdecl __wine_dbg_output(const char* str);

#ifdef __cplusplus
}
#endif


================================================
FILE: Source/Windows/include/wine/unixlib.h
================================================
// SPDX-License-Identifier: LGPL-2.1-or-later
// SPDX-FileCopyrightText: Copyright (C) 2021 Alexandre Julliard

#pragma once

#include <winternl.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef UINT64 unixlib_handle_t;

extern NTSTATUS(WINAPI* __wine_unix_call_dispatcher)(unixlib_handle_t, unsigned int, void*);

static inline NTSTATUS __wine_unix_call(unixlib_handle_t handle, unsigned int code, void* args) {
  return __wine_unix_call_dispatcher(handle, code, args);
}

#ifdef __cplusplus
}
#endif


================================================
FILE: Source/Windows/include/winnt.h
================================================
// SPDX-License-Identifier: LGPL-2.1-or-later
// SPDX-FileCopyrightText: Copyright (C) the Wine project

#pragma once

#include_next <winnt.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef struct ___IMAGE_LOAD_CONFIG_CODE_INTEGRITY {
  WORD Flags;
  WORD Catalog;
  DWORD CatalogOffset;
  DWORD Reserved;
} __IMAGE_LOAD_CONFIG_CODE_INTEGRITY, *__PIMAGE_LOAD_CONFIG_CODE_INTEGRITY;

typedef struct __IMAGE_LOAD_CONFIG_DIRECTORY64 {
  DWORD Size; /* 000 */
  DWORD TimeDateStamp;
  WORD MajorVersion;
  WORD MinorVersion;
  DWORD GlobalFlagsClear;
  DWORD GlobalFlagsSet; /* 010 */
  DWORD CriticalSectionDefaultTimeout;
  ULONGLONG DeCommitFreeBlockThreshold;
  ULONGLONG DeCommitTotalFreeThreshold; /* 020 */
  ULONGLONG LockPrefixTable;
  ULONGLONG MaximumAllocationSize; /* 030 */
  ULONGLONG VirtualMemoryThreshold;
  ULONGLONG ProcessAffinityMask; /* 040 */
  DWORD ProcessHeapFlags;
  WORD CSDVersion;
  WORD DependentLoadFlags;
  ULONGLONG EditList; /* 050 */
  ULONGLONG SecurityCookie;
  ULONGLONG SEHandlerTable; /* 060 */
  ULONGLONG SEHandlerCount;
  ULONGLONG GuardCFCheckFunctionPointer; /* 070 */
  ULONGLONG GuardCFDispatchFunctionPointer;
  ULONGLONG GuardCFFunctionTable; /* 080 */
  ULONGLONG GuardCFFunctionCount;
  DWORD GuardFlags; /* 090 */
  __IMAGE_LOAD_CONFIG_CODE_INTEGRITY CodeIntegrity;
  ULONGLONG GuardAddressTakenIatEntryTable; /* 0a0 */
  ULONGLONG GuardAddressTakenIatEntryCount;
  ULONGLONG GuardLongJumpTargetTable; /* 0b0 */
  ULONGLONG GuardLongJumpTargetCount;
  ULONGLONG DynamicValueRelocTable; /* 0c0 */
  ULONGLONG CHPEMetadataPointer;
  ULONGLONG GuardRFFailureRoutine; /* 0d0 */
  ULONGLONG GuardRFFailureRoutineFunctionPointer;
  DWORD DynamicValueRelocTableOffset; /* 0e0 */
  WORD DynamicValueRelocTableSection;
  WORD Reserved2;
  ULONGLONG GuardRFVerifyStackPointerFunctionPointer;
  DWORD HotPatchTableOffset; /* 0f0 */
  DWORD Reserved3;
  ULONGLONG EnclaveConfigurationPointer;
  ULONGLONG VolatileMetadataPointer; /* 100 */
  ULONGLONG GuardEHContinuationTable;
  ULONGLONG GuardEHContinuationCount; /* 110 */
  ULONGLONG GuardXFGCheckFunctionPointer;
  ULONGLONG GuardXFGDispatchFunctionPointer; /* 120 */
  ULONGLONG GuardXFGTableDispatchFunctionPointer;
  ULONGLONG CastGuardOsDeterminedFailureMode; /* 130 */
  ULONGLONG GuardMemcpyFunctionPointer;
} _IMAGE_LOAD_CONFIG_DIRECTORY64, *_PIMAGE_LOAD_CONFIG_DIRECTORY64;

typedef struct __IMAGE_LOAD_CONFIG_DIRECTORY32 {
  DWORD Size; /* 000 */
  DWORD TimeDateStamp;
  WORD MajorVersion;
  WORD MinorVersion;
  DWORD GlobalFlagsClear;
  DWORD GlobalFlagsSet; /* 010 */
  DWORD CriticalSectionDefaultTimeout;
  DWORD DeCommitFreeBlockThreshold;
  DWORD DeCommitTotalFreeThreshold;
  DWORD LockPrefixTable; /* 020 */
  DWORD MaximumAllocationSize;
  DWORD VirtualMemoryThreshold;
  DWORD ProcessHeapFlags;
  DWORD ProcessAffinityMask; /* 030 */
  WORD CSDVersion;
  WORD DependentLoadFlags;
  DWORD EditList;
  DWORD SecurityCookie;
  DWORD SEHandlerTable; /* 040 */
  DWORD SEHandlerCount;
  DWORD GuardCFCheckFunctionPointer;
  DWORD GuardCFDispatchFunctionPointer;
  DWORD GuardCFFunctionTable; /* 050 */
  DWORD GuardCFFunctionCount;
  DWORD GuardFlags;
  IMAGE_LOAD_CONFIG_CODE_INTEGRITY CodeIntegrity;
  DWORD GuardAddressTakenIatEntryTable;
  DWORD GuardAddressTakenIatEntryCount;
  DWORD GuardLongJumpTargetTable; /* 070 */
  DWORD GuardLongJumpTargetCount;
  DWORD DynamicValueRelocTable;
  DWORD CHPEMetadataPointer;
  DWORD GuardRFFailureRoutine; /* 080 */
  DWORD GuardRFFailureRoutineFunctionPointer;
  DWORD DynamicValueRelocTableOffset;
  WORD DynamicValueRelocTableSection;
  WORD Reserved2;
  DWORD GuardRFVerifyStackPointerFunctionPointer; /* 090 */
  DWORD HotPatchTableOffset;
  DWORD Reserved3;
  DWORD EnclaveConfigurationPointer;
  DWORD VolatileMetadataPointer; /* 0a0 */
  DWORD GuardEHContinuationTable;
  DWORD GuardEHContinuationCount;
  DWORD GuardXFGCheckFunctionPointer;
  DWORD GuardXFGDispatchFunctionPointer; /* 0b0 */
  DWORD GuardXFGTableDispatchFunctionPointer;
  DWORD CastGuardOsDeterminedFailureMode;
  DWORD GuardMemcpyFunctionPointer;
} _IMAGE_LOAD_CONFIG_DIRECTORY32, *_PIMAGE_LOAD_CONFIG_DIRECTORY32;

typedef struct _IMAGE_CHPE_RANGE_ENTRY {
  union {
    ULONG StartOffset;
    struct {
      ULONG NativeCode  : 1;
      ULONG AddressBits : 31;
    } DUMMYSTRUCTNAME;
  } DUMMYUNIONNAME;
  ULONG Length;
} IMAGE_CHPE_RANGE_ENTRY, *PIMAGE_CHPE_RANGE_ENTRY;

typedef struct _IMAGE_ARM64EC_METADATA {
  ULONG Version;
  ULONG CodeMap;
  ULONG CodeMapCount;
  ULONG CodeRangesToEntryPoints;
  ULONG RedirectionMetadata;
  ULONG __os_arm64x_dispatch_call_no_redirect;
  ULONG __os_arm64x_dispatch_ret;
  ULONG __os_arm64x_dispatch_call;
  ULONG __os_arm64x_dispatch_icall;
  ULONG __os_arm64x_dispatch_icall_cfg;
  ULONG AlternateEntryPoint;
  ULONG AuxiliaryIAT;
  ULONG CodeRangesToEntryPointsCount;
  ULONG RedirectionMetadataCount;
  ULONG GetX64InformationFunctionPointer;
  ULONG SetX64InformationFunctionPointer;
  ULONG ExtraRFETable;
  ULONG ExtraRFETableSize;
  ULONG __os_arm64x_dispatch_fptr;
  ULONG AuxiliaryIATCopy;
  ULONG __os_arm64x_helper0;
  ULONG __os_arm64x_helper1;
  ULONG __os_arm64x_helper2;
  ULONG __os_arm64x_helper3;
  ULONG __os_arm64x_helper4;
  ULONG __os_arm64x_helper5;
  ULONG __os_arm64x_helper6;
  ULONG __os_arm64x_helper7;
  ULONG __os_arm64x_helper8;
} IMAGE_ARM64EC_METADATA;

typedef struct _IMAGE_ARM64EC_REDIRECTION_ENTRY {
  ULONG Source;
  ULONG Destination;
} IMAGE_ARM64EC_REDIRECTION_ENTRY;

typedef struct _IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT {
  ULONG StartRva;
  ULONG EndRva;
  ULONG EntryPoint;
} IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT;

typedef struct _CONTEXT_CHUNK {
  LONG Offset;
  ULONG Length;
} CONTEXT_CHUNK, *PCONTEXT_CHUNK;

typedef struct _CONTEXT_EX {
  CONTEXT_CHUNK All;
  CONTEXT_CHUNK Legacy;
  CONTEXT_CHUNK XState;
#ifdef _WIN64
  ULONG64 align;
#endif
} CONTEXT_EX, *PCONTEXT_EX;

// From process hacker
typedef struct _IMAGE_VOLATILE_METADATA {
  ULONG Size;
  ULONG Version;
  ULONG VolatileAccessTable;
  ULONG VolatileAccessTableSize;
  ULONG VolatileInfoRangeTable;
  ULONG VolatileInfoRangeTableSize;
} IMAGE_VOLATILE_METADATA, *PIMAGE_VOLATILE_METADATA;

typedef struct _IMAGE_VOLATILE_RVA_METADATA {
  ULONG Rva;
} IMAGE_VOLATILE_RVA_METADATA, *PIMAGE_VOLATILE_RVA_METADATA;

typedef struct _IMAGE_VOLATILE_RANGE_METADATA {
  ULONG Rva;
  ULONG Size;
} IMAGE_VOLATILE_RANGE_METADATA, *PIMAGE_VOLATILE_RANGE_METADATA;

NTSYSAPI DWORD WINAPI RtlRunOnceExecuteOnce(PRTL_RUN_ONCE, PRTL_RUN_ONCE_INIT_FN, PVOID, PVOID*);

// This is a FEX extension, and requires corresponding wine patches
#define CONTEXT_ARM64_FEX_YMMSTATE (CONTEXT_ARM64 | 0x00000040)

#ifdef __cplusplus
}
#endif


================================================
FILE: Source/Windows/include/winternl.h
================================================
// SPDX-License-Identifier: LGPL-2.1-or-later
// SPDX-FileCopyrightText: Copyright (C) the Wine project

#pragma once

#include_next <winternl.h>
#include <winnt.h>

#ifdef __cplusplus
extern "C" {
#endif

#define SH_COMPAT 0x00 /* Compatibility */
#define SH_DENYRW 0x10 /* Deny read/write */
#define SH_DENYWR 0x20 /* Deny write */
#define SH_DENYRD 0x30 /* Deny read */
#define SH_DENYNO 0x40 /* Deny nothing */

#define _SH_COMPAT SH_COMPAT
#define _SH_DENYRW SH_DENYRW
#define _SH_DENYWR SH_DENYWR
#define _SH_DENYRD SH_DENYRD
#define _SH_DENYNO SH_DENYNO

#define NtCurrentProcess() ((HANDLE) ~(ULONG_PTR)0)
#define NtCurrentThread() ((HANDLE) ~(ULONG_PTR)1)

#define WOW64_TLS_WOW64INFO 10
#define WOW64_TLS_MAX_NUMBER 19

#define WOW64_CPUFLAGS_SOFTWARE 0x02

#define STATUS_EMULATION_SYSCALL ((NTSTATUS)0x40000039)

#ifdef ARCHITECTURE_arm64ec
typedef struct _CHPE_V2_CPU_AREA_INFO {
  BOOLEAN InSimulation;             /* 000 */
  BOOLEAN InSyscallCallback;        /* 001 */
  ULONG64 EmulatorStackBase;        /* 008 */
  ULONG64 EmulatorStackLimit;       /* 010 */
  ARM64EC_NT_CONTEXT* ContextAmd64; /* 018 */
  ULONG* SuspendDoorbell;           /* 020 */
  ULONG64 LoadingModuleModflag;     /* 028 */
  void* EmulatorData[4];            /* 030 */
  ULONG64 EmulatorDataInline;       /* 050 */
} CHPE_V2_CPU_AREA_INFO, *PCHPE_V2_CPU_AREA_INFO;
#endif

typedef struct {
  ULONG version;
  ULONG unknown1[3];
  ULONG64 unknown2;
  ULONG64 pLdrInitializeThunk;
  ULONG64 pKiUserExceptionDispatcher;
  ULONG64 pKiUserApcDispatcher;
  ULONG64 pKiUserCallbackDispatcher;
  ULONG64 pRtlUserThreadStart;
  ULONG64 pRtlpQueryProcessDebugInformationRemote;
  ULONG64 ntdll_handle;
  ULONG64 pLdrSystemDllInitBlock;
  ULONG64 pRtlpFreezeTimeBias;
} SYSTEM_DLL_INIT_BLOCK;

typedef struct _UNICODE_STRING64 {
  USHORT Length;
  USHORT MaximumLength;
  ULONG64 Buffer;
} UNICODE_STRING64;

typedef struct _CURDIR64 {
  UNICODE_STRING64 DosPath;
  ULONG64 Handle;
} CURDIR64;

typedef struct RTL_DRIVE_LETTER_CURDIR64 {
  USHORT Flags;
  USHORT Length;
  ULONG TimeStamp;
  UNICODE_STRING64 DosPath;
} RTL_DRIVE_LETTER_CURDIR64;

typedef struct _RTL_USER_PROCESS_PARAMETERS64 {
  ULONG AllocationSize;
  ULONG Size;
  ULONG Flags;
  ULONG DebugFlags;
  ULONG64 ConsoleHandle;
  ULONG ConsoleFlags;
  ULONG64 hStdInput;
  ULONG64 hStdOutput;
  ULONG64 hStdError;
  CURDIR64 CurrentDirectory;
  UNICODE_STRING64 DllPath;
  UNICODE_STRING64 ImagePathName;
  UNICODE_STRING64 CommandLine;
  ULONG64 Environment;
  ULONG dwX;
  ULONG dwY;
  ULONG dwXSize;
  ULONG dwYSize;
  ULONG dwXCountChars;
  ULONG dwYCountChars;
  ULONG dwFillAttribute;
  ULONG dwFlags;
  ULONG wShowWindow;
  UNICODE_STRING64 WindowTitle;
  UNICODE_STRING64 Desktop;
  UNICODE_STRING64 ShellInfo;
  UNICODE_STRING64 RuntimeInfo;
  RTL_DRIVE_LETTER_CURDIR64 DLCurrentDirectory[0x20];
  ULONG64 EnvironmentSize;
  ULONG64 EnvironmentVersion;
  ULONG64 PackageDependencyData;
  ULONG ProcessGroupId;
  ULONG LoaderThreads;
} RTL_USER_PROCESS_PARAMETERS64;
typedef struct tagRTL_BITMAP {
  ULONG SizeOfBitMap; /* Number of bits in the bitmap */
  PULONG Buffer;      /* Bitmap data, assumed sized to a DWORD boundary */
} RTL_BITMAP, *PRTL_BITMAP;

typedef const RTL_BITMAP* PCRTL_BITMAP;


typedef struct __PEB {                    /* win32/win64 */
  BOOLEAN InheritedAddressSpace;          /* 000/000 */
  BOOLEAN ReadImageFileExecOptions;       /* 001/001 */
  BOOLEAN BeingDebugged;                  /* 002/002 */
  UCHAR ImageUsedLargePages          : 1; /* 003/003 */
  UCHAR IsProtectedProcess           : 1;
  UCHAR IsImageDynamicallyRelocated  : 1;
  UCHAR SkipPatchingUser32Forwarders : 1;
  UCHAR IsPackagedProcess            : 1;
  UCHAR IsAppContainer               : 1;
  UCHAR IsProtectedProcessLight      : 1;
  UCHAR IsLongPathAwareProcess       : 1;
  HANDLE Mutant;                                    /* 004/008 */
  HMODULE ImageBaseAddress;                         /* 008/010 */
  PPEB_LDR_DATA LdrData;                            /* 00c/018 */
  RTL_USER_PROCESS_PARAMETERS64* ProcessParameters; /* 010/020 */
  PVOID SubSystemData;                              /* 014/028 */
  HANDLE ProcessHeap;                               /* 018/030 */
  PRTL_CRITICAL_SECTION FastPebLock;                /* 01c/038 */
  PVOID AtlThunkSListPtr;                           /* 020/040 */
  PVOID IFEOKey;                                    /* 024/048 */
  ULONG ProcessInJob               : 1;             /* 028/050 */
  ULONG ProcessInitializing        : 1;
  ULONG ProcessUsingVEH            : 1;
  ULONG ProcessUsingVCH            : 1;
  ULONG ProcessUsingFTH            : 1;
  ULONG ProcessPreviouslyThrottled : 1;
  ULONG ProcessCurrentlyThrottled  : 1;
  ULONG ProcessImagesHotPatched    : 1;
  ULONG ReservedBits0              : 24;
  void* KernelCallbackTable;             /* 02c/058 */
  ULONG Reserved;                        /* 030/060 */
  ULONG AtlThunkSListPtr32;              /* 034/064 */
  PVOID ApiSetMap;                       /* 038/068 */
  ULONG TlsExpansionCounter;             /* 03c/070 */
  PRTL_BITMAP TlsBitmap;                 /* 040/078 */
  ULONG TlsBitmapBits[2];                /* 044/080 */
  PVOID ReadOnlySharedMemoryBase;        /* 04c/088 */
  PVOID SharedData;                      /* 050/090 */
  PVOID* ReadOnlyStaticServerData;       /* 054/098 */
  PVOID AnsiCodePageData;                /* 058/0a0 */
  PVOID OemCodePageData;                 /* 05c/0a8 */
  PVOID UnicodeCaseTableData;            /* 060/0b0 */
  ULONG NumberOfProcessors;              /* 064/0b8 */
  ULONG NtGlobalFlag;                    /* 068/0bc */
  LARGE_INTEGER CriticalSectionTimeout;  /* 070/0c0 */
  SIZE_T HeapSegmentReserve;             /* 078/0c8 */
  SIZE_T HeapSegmentCommit;              /* 07c/0d0 */
  SIZE_T HeapDeCommitTotalFreeThreshold; /* 080/0d8 */
  SIZE_T HeapDeCommitFreeBlockThreshold; /* 084/0e0 */
  ULONG NumberOfHeaps;                   /* 088/0e8 */
  ULONG MaximumNumberOfHeaps;            /* 08c/0ec */
  PVOID* ProcessHeaps;                   /* 090/0f0 */
  PVOID GdiSharedHandleTable;            /* 094/0f8 */
  PVOID ProcessStarterHelper;            /* 098/100 */
  PVOID GdiDCAttributeList;              /* 09c/108 */
  PVOID LoaderLock;                      /* 0a0/110 */
  ULONG OSMajorVersion;                  /* 0a4/118 */
  ULONG OSMinorVersion;                  /* 0a8/11c */
  ULONG OSBuildNumber;                   /* 0ac/120 */
  ULONG OSPlatformId;                    /* 0b0/124 */
  ULONG ImageSubSystem;                  /* 0b4/128 */
  ULONG ImageSubSystemMajorVersion;      /* 0b8/12c */
  ULONG ImageSubSystemMinorVersion;      /* 0bc/130 */
  KAFFINITY ActiveProcessAffinityMask;   /* 0c0/138 */
#ifdef _WIN64
  ULONG GdiHandleBuffer[60]; /*    /140 */
#else
  ULONG GdiHandleBuffer[34]; /* 0c4/    */
#endif
  PVOID PostProcessInitRoutine;      /* 14c/230 */
  PRTL_BITMAP TlsExpansionBitmap;    /* 150/238 */
  ULONG TlsExpansionBitmapBits[32];  /* 154/240 */
  ULONG SessionId;                   /* 1d4/2c0 */
  ULARGE_INTEGER AppCompatFlags;     /* 1d8/2c8 */
  ULARGE_INTEGER AppCompatFlagsUser; /* 1e0/2d0 */
  PVOID ShimData;                    /* 1e8/2d8 */
  PVOID AppCompatInfo;               /* 1ec/2e0 */
  UNICODE_STRING64 CSDVersion;       /* 1f0/2e8 */
  PVOID ActivationContextData;       /* 1f8/2f8 */
  PVOID ProcessAssemblyStorageMap;   /* 1fc/300 */
  PVOID SystemDefaultActivationData; /* 200/308 */
  PVOID SystemAssemblyStorageMap;    /* 204/310 */
  SIZE_T MinimumStackCommit;         /* 208/318 */
  PVOID* FlsCallback;                /* 20c/320 */
  LIST_ENTRY FlsListHead;            /* 210/328 */
  PRTL_BITMAP FlsBitmap;             /* 218/338 */
  ULONG FlsBitmapBits[4];            /* 21c/340 */
  ULONG FlsHighIndex;                /* 22c/350 */
  PVOID WerRegistrationData;         /* 230/358 */
  PVOID WerShipAssertPtr;            /* 234/360 */
  PVOID EcCodeBitMap;                /* 238/368 */
  PVOID pImageHeaderHash;            /* 23c/370 */
  ULONG HeapTracingEnabled      : 1; /* 240/378 */
  ULONG CritSecTracingEnabled   : 1;
  ULONG LibLoaderTracingEnabled : 1;
  ULONG SpareTracingBits        : 29;
  ULONGLONG CsrServerReadOnlySharedMemoryBase;  /* 248/380 */
  ULONG TppWorkerpListLock;                     /* 250/388 */
  LIST_ENTRY TppWorkerpList;                    /* 254/390 */
  PVOID WaitOnAddressHashTable[0x80];           /* 25c/3a0 */
  PVOID TelemetryCoverageHeader;                /* 45c/7a0 */
  ULONG CloudFileFlags;                         /* 460/7a8 */
  ULONG CloudFileDiagFlags;                     /* 464/7ac */
  CHAR PlaceholderCompatibilityMode;            /* 468/7b0 */
  CHAR PlaceholderCompatibilityModeReserved[7]; /* 469/7b1 */
  PVOID LeapSecondData;                         /* 470/7b8 */
  ULONG LeapSecondFlags;                        /* 474/7c0 */
  ULONG NtGlobalFlag2;                          /* 478/7c4 */
} __PEB, *__PPEB;

typedef struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME {
  struct _RTL_ACTIVATION_CONTEXT_STACK_FRAME* Previous;
  struct _ACTIVATION_CONTEXT* ActivationContext;
  ULONG Flags;
} RTL_ACTIVATION_CONTEXT_STACK_FRAME, *PRTL_ACTIVATION_CONTEXT_STACK_FRAME;

typedef struct _ACTIVATION_CONTEXT_STACK {
  RTL_ACTIVATION_CONTEXT_STACK_FRAME* ActiveFrame;
  LIST_ENTRY FrameListCache;
  ULONG Flags;
  ULONG NextCookieSequenceNumber;
  ULONG_PTR StackId;
} ACTIVATION_CONTEXT_STACK, *PACTIVATION_CONTEXT_STACK;
typedef struct _GDI_TEB_BATCH {
  ULONG Offset;
  HANDLE HDC;
  ULONG Buffer[0x136];
} GDI_TEB_BATCH;
typedef struct __TEB {                          /* win32/win64 */
  NT_TIB Tib;                                   /* 000/0000 */
  PVOID EnvironmentPointer;                     /* 01c/0038 */
  CLIENT_ID ClientId;                           /* 020/0040 */
  PVOID ActiveRpcHandle;                        /* 028/0050 */
  PVOID ThreadLocalStoragePointer;              /* 02c/0058 */
  __PPEB Peb;                                   /* 030/0060 */
  ULONG LastErrorValue;                         /* 034/0068 */
  ULONG CountOfOwnedCriticalSections;           /* 038/006c */
  PVOID CsrClientThread;                        /* 03c/0070 */
  PVOID Win32ThreadInfo;                        /* 040/0078 */
  ULONG User32Reserved[26];                     /* 044/0080 */
  ULONG UserReserved[5];                        /* 0ac/00e8 */
  PVOID WOW32Reserved;                          /* 0c0/0100 */
  ULONG CurrentLocale;                          /* 0c4/0108 */
  ULONG FpSoftwareStatusRegister;               /* 0c8/010c */
  PVOID ReservedForDebuggerInstrumentation[16]; /* 0cc/0110 */
#ifdef _WIN64
  PVOID SystemReserved1[30]; /*    /0190 */
#else
  PVOID SystemReserved1[26]; /* 10c/     used for krnl386 private data in Wine */
#endif
  char PlaceholderCompatibilityMode;                       /* 174/0280 */
  BOOLEAN PlaceholderHydrationAlwaysExplicit;              /* 175/0281 */
  char PlaceholderReserved[10];                            /* 176/0282 */
  DWORD ProxiedProcessId;                                  /* 180/028c */
  ACTIVATION_CONTEXT_STACK ActivationContextStack;         /* 184/0290 */
  UCHAR WorkingOnBehalfOfTicket[8];                        /* 19c/02b8 */
  LONG ExceptionCode;                                      /* 1a4/02c0 */
  ACTIVATION_CONTEXT_STACK* ActivationContextStackPointer; /* 1a8/02c8 */
  ULONG_PTR InstrumentationCallbackSp;                     /* 1ac/02d0 */
  ULONG_PTR InstrumentationCallbackPreviousPc;             /* 1b0/02d8 */
  ULONG_PTR InstrumentationCallbackPreviousSp;             /* 1b4/02e0 */
#ifdef _WIN64
  ULONG TxFsContext;                       /*    /02e8 */
  BOOLEAN InstrumentationCallbackDisabled; /*    /02ec */
  BOOLEAN UnalignedLoadStoreExceptions;    /*    /02ed */
#else
  BOOLEAN InstrumentationCallbackDisabled; /* 1b8/     */
  BYTE SpareBytes1[23];                    /* 1b9/     */
  ULONG TxFsContext;                       /* 1d0/     */
#endif
  GDI_TEB_BATCH GdiTebBatch;          /* 1d4/02f0 used for ntdll private data in Wine */
  CLIENT_ID RealClientId;             /* 6b4/07d8 */
  HANDLE GdiCachedProcessHandle;      /* 6bc/07e8 */
  ULONG GdiClientPID;                 /* 6c0/07f0 */
  ULONG GdiClientTID;                 /* 6c4/07f4 */
  PVOID GdiThreadLocaleInfo;          /* 6c8/07f8 */
  ULONG_PTR Win32ClientInfo[62];      /* 6cc/0800 used for user32 private data in Wine */
  PVOID glDispatchTable[233];         /* 7c4/09f0 */
  PVOID glReserved1[29];              /* b68/1138 */
  PVOID glReserved2;                  /* bdc/1220 */
  PVOID glSectionInfo;                /* be0/1228 */
  PVOID glSection;                    /* be4/1230 */
  PVOID glTable;                      /* be8/1238 */
  PVOID glCurrentRC;                  /* bec/1240 */
  PVOID glContext;                    /* bf0/1248 */
  ULONG LastStatusValue;              /* bf4/1250 */
  UNICODE_STRING StaticUnicodeString; /* bf8/1258 */
  WCHAR StaticUnicodeBuffer[261];     /* c00/1268 */
  PVOID DeallocationStack;            /* e0c/1478 */
  PVOID TlsSlots[64];                 /* e10/1480 */
  LIST_ENTRY TlsLinks;                /* f10/1680 */
  PVOID Vdm;                          /* f18/1690 */
  PVOID ReservedForNtRpc;             /* f1c/1698 */
  PVOID DbgSsReserved[2];             /* f20/16a0 */
  ULONG HardErrorMode;                /* f28/16b0 */
#ifdef _WIN64
  PVOID Instrumentation[11]; /*    /16b8 */
#else
  PVOID Instrumentation[9]; /* f2c/ */
#endif
  GUID ActivityId;                   /* f50/1710 */
  PVOID SubProcessTag;               /* f60/1720 */
  PVOID PerflibData;                 /* f64/1728 */
  PVOID EtwTraceData;                /* f68/1730 */
  PVOID WinSockData;                 /* f6c/1738 */
  ULONG GdiBatchCount;               /* f70/1740 */
  ULONG IdealProcessorValue;         /* f74/1744 */
  ULONG GuaranteedStackBytes;        /* f78/1748 */
  PVOID ReservedForPerf;             /* f7c/1750 */
  PVOID ReservedForOle;              /* f80/1758 */
  ULONG WaitingOnLoaderLock;         /* f84/1760 */
  PVOID SavedPriorityState;          /* f88/1768 */
  ULONG_PTR ReservedForCodeCoverage; /* f8c/1770 */
  PVOID ThreadPoolData;              /* f90/1778 */
  PVOID* TlsExpansionSlots;          /* f94/1780 */
#ifdef _WIN64
  union {
    PVOID DeallocationBStore; /*    /1788 */
#ifdef ARCHITECTURE_arm64ec
    CHPE_V2_CPU_AREA_INFO* ChpeV2CpuAreaInfo; /*    /1788 */
#endif
  } DUMMYUNIONNAME;
  PVOID BStoreLimit; /*    /1790 */
#endif
  ULONG MuiGeneration;            /* f98/1798 */
  ULONG IsImpersonating;          /* f9c/179c */
  PVOID NlsCache;                 /* fa0/17a0 */
  PVOID ShimData;                 /* fa4/17a8 */
  ULONG HeapVirtualAffinity;      /* fa8/17b0 */
  PVOID CurrentTransactionHandle; /* fac/17b8 */
  PVOID ActiveFrame;              /* fb0/17c0 */
  PVOID FlsSlots;                 /* fb4/17c8 */
  PVOID PreferredLanguages;       /* fb8/17d0 */
  PVOID UserPrefLanguages;        /* fbc/17d8 */
  PVOID MergedPrefLanguages;      /* fc0/17e0 */
  ULONG MuiImpersonation;         /* fc4/17e8 */
  USHORT CrossTebFlags;           /* fc8/17ec */
  USHORT SameTebFlags;            /* fca/17ee */
  PVOID TxnScopeEnterCallback;    /* fcc/17f0 */
  PVOID TxnScopeExitCallback;     /* fd0/17f8 */
  PVOID TxnScopeContext;          /* fd4/1800 */
  ULONG LockCount;                /* fd8/1808 */
  LONG WowTebOffset;              /* fdc/180c */
  PVOID ResourceRetValue;         /* fe0/1810 */
  PVOID ReservedForWdf;           /* fe4/1818 */
  ULONGLONG ReservedForCrt;       /* fe8/1820 */
  GUID EffectiveContainerId;      /* ff0/1828 */
} __TEB, *__PTEB;

typedef struct _WOW64INFO {
  ULONG NativeSystemPageSize;
  ULONG CpuFlags;
  ULONG Wow64ExecuteFlags;
  ULONG unknown;
  ULONGLONG SectionHandle;
  ULONGLONG CrossProcessWorkList;
  USHORT NativeMachineType;
  USHORT EmulatedMachineType;
} WOW64INFO;

typedef struct _THREAD_BASIC_INFORMATION {
  NTSTATUS ExitStatus;
  PVOID TebBaseAddress;
  CLIENT_ID ClientId;
  ULONG_PTR AffinityMask;
  LONG Priority;
  LONG BasePriority;
} THREAD_BASIC_INFORMATION, *PTHREAD_BASIC_INFORMATION;

/* System Information Class 0x01 */

typedef struct _SYSTEM_CPU_INFORMATION {
  USHORT ProcessorArchitecture;
  USHORT ProcessorLevel;
  USHORT ProcessorRevision;
  USHORT MaximumProcessors;
  ULONG ProcessorFeatureBits;
} SYSTEM_CPU_INFORMATION, *PSYSTEM_CPU_INFORMATION;

typedef enum _SECTION_INHERIT {
  ViewShare = 1,
  ViewUnmap = 2,
} SECTION_INHERIT;

/* definitions of bits in the Feature set for the x86 processors */
#define CPU_FEATURE_VME 0x00000005    /* Virtual 86 Mode Extensions */
#define CPU_FEATURE_TSC 0x00000002    /* Time Stamp Counter available */
#define CPU_FEATURE_CMOV 0x00000008   /* Conditional Move instruction*/
#define CPU_FEATURE_PGE 0x00000014    /* Page table Entry Global bit */
#define CPU_FEATURE_PSE 0x00000024    /* Page Size Extension */
#define CPU_FEATURE_MTRR 0x00000040   /* Memory Type Range Registers */
#define CPU_FEATURE_CX8 0x00000080    /* Compare and eXchange 8 byte instr. */
#define CPU_FEATURE_MMX 0x00000100    /* Multi Media eXtensions */
#define CPU_FEATURE_X86 0x00000200    /* seems to be always ON, on the '86 */
#define CPU_FEATURE_PAT 0x00000400    /* Page Attribute Table */
#define CPU_FEATURE_FXSR 0x00000800   /* FXSAVE and FXSTORE instructions */
#define CPU_FEATURE_SEP 0x00001000    /* SYSENTER and SYSEXIT instructions */
#define CPU_FEATURE_SSE 0x00002000    /* SSE extensions (ext. MMX) */
#define CPU_FEATURE_3DNOW 0x00004000  /* 3DNOW instructions available */
#define CPU_FEATURE_SSE2 0x00010000   /* SSE2 extensions (XMMI64) */
#define CPU_FEATURE_DS 0x00020000     /* Debug Store */
#define CPU_FEATURE_HTT 0x00040000    /* Hyper Threading Technology */
#define CPU_FEATURE_SSE3 0x00080000   /* SSE3 extensions */
#define CPU_FEATURE_CX128 0x00100000  /* cmpxchg16b instruction */
#define CPU_FEATURE_XSAVE 0x00800000  /* XSAVE instructions */
#define CPU_FEATURE_2NDLEV 0x04000000 /* Second-level address translation */
#define CPU_FEATURE_VIRT 0x08000000   /* Virtualization support */
#define CPU_FEATURE_RDFS 0x10000000   /* RDFSBASE etc. instructions */
#define CPU_FEATURE_NX 0x20000000     /* Data execution prevention */

/* FIXME: following values are made up, actual flags are unknown */
#define CPU_FEATURE_SSSE3 0x00008000 /* SSSE3 instructions */
#define CPU_FEATURE_SSE41 0x01000000 /* SSE41 instructions */
#define CPU_FEATURE_SSE42 0x02000000 /* SSE42 instructions */
#define CPU_FEATURE_AVX 0x40000000   /* AVX instructions */
#define CPU_FEATURE_AVX2 0x80000000  /* AVX2 instructions */
#define CPU_FEATURE_PAE 0x00200000
#define CPU_FEATURE_DAZ 0x00400000

typedef enum _MEMORY_INFORMATION_CLASS {
  MemoryBasicInformation,
  MemoryWorkingSetInformation,
  MemoryMappedFilenameInformation,
  MemoryRegionInformation,
  MemoryWorkingSetExInformation,
  MemorySharedCommitInformation,
  MemoryImageInformation,
  MemoryRegionInformationEx,
  MemoryPrivilegedBasicInformation,
  MemoryEnclaveImageInformation,
  MemoryBasicInformationCapped,
  MemoryPhysicalContiguityInformation,
  MemoryBadInformation,
  MemoryBadInformationAllProcesses,
#ifdef __WINESRC__
  MemoryWineUnixFuncs = 1000,
  MemoryWineUnixWow64Funcs,
#endif
  MemoryFexStatsShm = 2000,
} MEMORY_INFORMATION_CLASS;

#define SystemEmulationBasicInformation (SYSTEM_INFORMATION_CLASS)62

#define ProcessFexHardwareTso (PROCESSINFOCLASS)2000
#define ProcessFexUnalignAtomic (PROCESSINFOCLASS)2001

// These match the prctl flag values
#define FEX_UNALIGN_ATOMIC_EMULATE (1ULL << 0)
#define FEX_UNALIGN_ATOMIC_BACKPATCH (1ULL << 1)
#define FEX_UNALIGN_ATOMIC_STRICT_SPLIT_LOCKS (1ULL << 2)

typedef enum _KEY_VALUE_INFORMATION_CLASS {
  KeyValueBasicInformation,
  KeyValueFullInformation,
  KeyValuePartialInformation,
  KeyValueFullInformationAlign64,
  KeyValuePartialInformationAlign64,
  KeyValueLayerInformation,
} KEY_VALUE_INFORMATION_CLASS;

typedef struct _KEY_VALUE_PARTIAL_INFORMATION {
  ULONG TitleIndex;
  ULONG Type;
  ULONG DataLength;
  UCHAR Data[1];
} KEY_VALUE_PARTIAL_INFORMATION, *PKEY_VALUE_PARTIAL_INFORMATION;

typedef struct _MEMORY_FEX_STATS_SHM_INFORMATION {
  void* shm_base;
  DWORD map_size;
  DWORD max_size;
} MEMORY_FEX_STATS_SHM_INFORMATION, *PMEMORY_FEX_STATS_SHM_INFORMATION;

typedef struct _MEMORY_SECTION_NAME {
  UNICODE_STRING SectionFileName;
} MEMORY_SECTION_NAME, *PMEMORY_SECTION_NAME;

NTSTATUS WINAPIV DbgPrint(LPCSTR fmt, ...);
NTSTATUS WINAPI LdrDisableThreadCalloutsForDll(HMODULE);
NTSTATUS WINAPI LdrGetDllFullName(HMODULE, UNICODE_STRING*);
NTSTATUS WINAPI LdrGetDllHandle(LPCWSTR, ULONG, const UNICODE_STRING*, HMODULE*);
NTSTATUS WINAPI LdrGetProcedureAddress(HMODULE, const ANSI_STRING*, ULONG, void**);
NTSTATUS WINAPI NtAllocateVirtualMemoryEx(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MEM_EXTENDED_PARAMETER*, ULONG);
NTSTATUS WINAPI NtAllocateVirtualMemory(HANDLE, PVOID*, ULONG_PTR, SIZE_T*, ULONG, ULONG);
NTSTATUS WINAPI NtContinue(PCONTEXT, BOOLEAN);
NTSTATUS WINAPI NtCreateSection(HANDLE*, ACCESS_MASK, const OBJECT_ATTRIBUTES*, const LARGE_INTEGER*, ULONG, ULONG, HANDLE);
NTSTATUS WINAPI NtDelayExecution(BOOLEAN, const LARGE_INTEGER*);
NTSTATUS WINAPI NtDuplicateObject(HANDLE, HANDLE, HANDLE, PHANDLE, ACCESS_MASK, ULONG, ULONG);
NTSTATUS WINAPI NtFlushInstructionCache(HANDLE, LPCVOID, SIZE_T);
NTSTATUS WINAPI NtFreeVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG);
NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*);
ULONG WINAPI NtGetCurrentProcessorNumber(void);
NTSYSAPI NTSTATUS WINAPI NtMapViewOfSection(HANDLE, HANDLE, PVOID*, ULONG_PTR, SIZE_T, const LARGE_INTEGER*, SIZE_T*, SECTION_INHERIT, ULONG, ULONG);
NTSTATUS WINAPI NtOpenKeyEx(PHANDLE, ACCESS_MASK, const OBJECT_ATTRIBUTES*, ULONG);
NTSTATUS WINAPI NtProtectVirtualMemory(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG*);
NTSTATUS WINAPI NtQueryAttributesFile(const OBJECT_ATTRIBUTES*, FILE_BASIC_INFORMATION*);
NTSTATUS WINAPI NtQueryDirectoryFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID, PIO_STATUS_BLOCK, PVOID, ULONG, FILE_INFORMATION_CLASS,
                                     BOOLEAN, PUNICODE_STRING, BOOLEAN);
NTSTATUS WINAPI NtQueryValueKey(HANDLE, const UNICODE_STRING*, KEY_VALUE_INFORMATION_CLASS, void*, DWORD, DWORD*);
NTSTATUS WINAPI NtQueryVirtualMemory(HANDLE, LPCVOID, MEMORY_INFORMATION_CLASS, PVOID, SIZE_T, SIZE_T*);
NTSTATUS WINAPI NtReadFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID, PIO_STATUS_BLOCK, PVOID, ULONG, PLARGE_INTEGER, PULONG);
NTSTATUS WINAPI NtSetContextThread(HANDLE, const CONTEXT*);
NTSTATUS WINAPI NtSuspendThread(HANDLE, PULONG);
NTSTATUS WINAPI NtTerminateProcess(HANDLE, LONG);
NTSTATUS WINAPI NtWriteFile(HANDLE, HANDLE, PIO_APC_ROUTINE, PVOID, PIO_STATUS_BLOCK, const void*, ULONG, PLARGE_INTEGER, PULONG);
void WINAPI ProcessPendingCrossProcessEmulatorWork();
void WINAPI RtlAcquirePebLock(void);
void WINAPI RtlAcquireSRWLockExclusive(RTL_SRWLOCK*);
void WINAPI RtlClearBits(PRTL_BITMAP, ULONG, ULONG);
NTSTATUS WINAPI RtlDeleteCriticalSection(RTL_CRITICAL_SECTION*);
NTSTATUS WINAPI RtlEnterCriticalSection(RTL_CRITICAL_SECTION*);
ULONG WINAPI RtlFindClearBitsAndSet(PRTL_BITMAP, ULONG, ULONG);
ULONG WINAPI RtlGetCurrentDirectory_U(ULONG, LPWSTR);
PIMAGE_NT_HEADERS WINAPI RtlImageNtHeader(HMODULE);
PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE, BOOL, WORD, ULONG*);
void WINAPI RtlInitializeConditionVariable(RTL_CONDITION_VARIABLE*);
NTSTATUS WINAPI RtlInitializeCriticalSection(RTL_CRITICAL_SECTION*);
void WINAPI RtlInitializeSRWLock(RTL_SRWLOCK*);
NTSTATUS WINAPI RtlLeaveCriticalSection(RTL_CRITICAL_SECTION*);
void* WINAPI RtlLocateExtendedFeature(CONTEXT_EX*, ULONG, ULONG*);
NTSTATUS WINAPI RtlMultiByteToUnicodeN(LPWSTR, DWORD, LPDWORD, LPCSTR, DWORD);
NTSTATUS WINAPI RtlMultiByteToUnicodeSize(DWORD*, LPCSTR, ULONG);
BOOL WINAPI RtlQueryPerformanceCounter(LARGE_INTEGER*);
BOOL WINAPI RtlQueryPerformanceFrequency(LARGE_INTEGER*);
void WINAPI RtlReleasePebLock(void);
void WINAPI RtlReleaseSRWLockExclusive(RTL_SRWLOCK*);
NTSTATUS WINAPI RtlSleepConditionVariableSRW(RTL_CONDITION_VARIABLE*, RTL_SRWLOCK*, const LARGE_INTEGER*, ULONG);
BOOLEAN WINAPI RtlTryAcquireSRWLockExclusive(RTL_SRWLOCK*);
BOOL WINAPI RtlTryEnterCriticalSection(RTL_CRITICAL_SECTION*);
NTSTATUS WINAPI RtlUnicodeToMultiByteN(LPSTR, DWORD, LPDWORD, LPCWSTR, DWORD);
void WINAPI RtlWakeAllConditionVariable(RTL_CONDITION_VARIABLE*);
void WINAPI RtlWakeConditionVariable(RTL_CONDITION_VARIABLE*);
NTSTATUS WINAPI RtlWow64GetCurrentCpuArea(USHORT*, void**, void**);
NTSTATUS WINAPI RtlWow64GetThreadContext(HANDLE, WOW64_CONTEXT*);
NTSTATUS WINAPI RtlWow64SetThreadContext(HANDLE, const WOW64_CONTEXT*);
void WINAPI Wow64ProcessPendingCrossProcessItems(void);
NTSTATUS WINAPI Wow64SystemServiceEx(UINT, UINT*);
NTSTATUS WINAPI RtlWow64SuspendThread(HANDLE, ULONG*);
void WINAPI RtlAcquireSRWLockShared(RTL_SRWLOCK*);
void WINAPI RtlReleaseSRWLockShared(RTL_SRWLOCK*);
BOOLEAN WINAPI RtlTryAcquireSRWLockShared(RTL_SRWLOCK*);
void WINAPI RtlWakeAddressAll(const void*);
NTSTATUS WINAPI RtlWaitOnAddress(const void*, const void*, SIZE_T, const LARGE_INTEGER*);
void WINAPI RtlWakeAddressSingle(const void*);

#ifdef __cplusplus
}
#endif


================================================
FILE: ThunkLibs/Generator/CMakeLists.txt
================================================
find_package(Clang REQUIRED CONFIG)
find_package(OpenSSL REQUIRED COMPONENTS Crypto)

# Query clang's global resource directory for system include directories
if (NOT CLANG_RESOURCE_DIR)
  find_program(CLANG_EXEC_PATH clang REQUIRED)

  execute_process(COMMAND ${CLANG_EXEC_PATH} -print-resource-dir
    OUTPUT_VARIABLE CLANG_RESOURCE_DIR
    OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()

add_library(thunkgenlib STATIC analysis.cpp data_layout.cpp gen.cpp)
target_include_directories(thunkgenlib INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
target_include_directories(thunkgenlib SYSTEM PUBLIC ${CLANG_INCLUDE_DIRS})
target_link_libraries(thunkgenlib PUBLIC clang-cpp LLVM)
target_link_libraries(thunkgenlib PRIVATE OpenSSL::Crypto)
target_link_libraries(thunkgenlib PRIVATE fmt::fmt)
target_compile_definitions(thunkgenlib INTERFACE -DCLANG_RESOURCE_DIR="${CLANG_RESOURCE_DIR}")

# Clang's libtooling won't compile with libstdc++'s debug mode
target_compile_options(thunkgenlib PUBLIC "-U_GLIBCXX_DEBUG")

add_executable(thunkgen main.cpp)
target_link_libraries(thunkgen PRIVATE thunkgenlib)


================================================
FILE: ThunkLibs/Generator/analysis.cpp
================================================
#include "analysis.h"
#include "diagnostics.h"

#include <clang/AST/RecursiveASTVisitor.h>
#include <clang/Basic/Version.h>
#include <clang/Frontend/CompilerInstance.h>

#include <fmt/format.h>

struct NamespaceAnnotations {
  std::optional<unsigned> version;
  std::optional<std::string> load_host_endpoint_via;
  bool generate_guest_symtable = false;
  bool indirect_guest_calls = false;
};

static clang::SourceLocation GetTemplateArgLocation(clang::ClassTemplateSpecializationDecl* decl, unsigned i) {
#if CLANG_VERSION_MAJOR >= 19
  return decl->getTemplateArgsAsWritten()->getTemplateArgs()[i].getLocation();
#else
  return decl->getTypeAsWritten()->getTypeLoc().getAs<clang::TemplateSpecializationTypeLoc>().getArgLoc(i).getLocation();
#endif
}

static NamespaceAnnotations GetNamespaceAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) {
  if (!decl->hasDefinition()) {
    return {};
  }

  ErrorReporter report_error {context};
  NamespaceAnnotations ret;

  for (const clang::CXXBaseSpecifier& base : decl->bases()) {
    auto annotation = base.getType().getAsString();
    if (annotation == "fexgen::generate_guest_symtable") {
      ret.generate_guest_symtable = true;
    } else if (annotation == "fexgen::indirect_guest_calls") {
      ret.indirect_guest_calls = true;
    } else {
      throw report_error(base.getSourceRange().getBegin(), "Unknown namespace annotation");
    }
  }

  for (const clang::FieldDecl* field : decl->fields()) {
    auto name = field->getNameAsString();
    if (name == "load_host_endpoint_via") {
      auto loader_function_expr = field->getInClassInitializer()->IgnoreCasts();
      auto loader_function_str = llvm::dyn_cast_or_null<clang::StringLiteral>(loader_function_expr);
      if (loader_function_expr && !loader_function_str) {
        throw report_error(loader_function_expr->getBeginLoc(), "Must initialize load_host_endpoint_via with a string");
      }
      if (loader_function_str) {
        ret.load_host_endpoint_via = loader_function_str->getString();
      }
    } else if (name == "version") {
      auto initializer = field->getInClassInitializer()->IgnoreCasts();
      auto version_literal = llvm::dyn_cast_or_null<clang::IntegerLiteral>(initializer);
      if (!initializer || !version_literal) {
        throw report_error(field->getBeginLoc(), "No version given (expected integral typed member, e.g. \"int version = 5;\")");
      }
      ret.version = version_literal->getValue().getZExtValue();
    } else {
      throw report_error(field->getBeginLoc(), "Unknown namespace annotation");
    }
  }

  return ret;
}

enum class CallbackStrategy {
  Default,
  Stub,
};

struct Annotations {
  bool custom_host_impl = false;
  bool custom_guest_entrypoint = false;

  bool returns_guest_pointer = false;

  std::optional<clang::QualType> uniform_va_type;

  CallbackStrategy callback_strategy = CallbackStrategy::Default;
};

static Annotations GetAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) {
  ErrorReporter report_error {context};
  Annotations ret;

  for (const auto& base : decl->bases()) {
    auto annotation = base.getType().getAsString();
    if (annotation == "fexgen::returns_guest_pointer") {
      ret.returns_guest_pointer = true;
    } else if (annotation == "fexgen::custom_host_impl") {
      ret.custom_host_impl = true;
    } else if (annotation == "fexgen::callback_stub") {
      ret.callback_strategy = CallbackStrategy::Stub;
    } else if (annotation == "fexgen::custom_guest_entrypoint") {
      ret.custom_guest_entrypoint = true;
    } else {
      throw report_error(base.getSourceRange().getBegin(), "Unknown annotation");
    }
  }

  for (const auto& child_decl : decl->getPrimaryContext()->decls()) {
    if (auto field = llvm::dyn_cast_or_null<clang::FieldDecl>(child_decl)) {
      throw report_error(field->getBeginLoc(), "Unknown field annotation");
    } else if (auto type_alias = llvm::dyn_cast_or_null<clang::TypedefNameDecl>(child_decl)) {
      auto name = type_alias->getNameAsString();
      if (name == "uniform_va_type") {
        ret.uniform_va_type = type_alias->getUnderlyingType();
      } else {
        throw report_error(type_alias->getBeginLoc(), "Unknown type alias annotation");
      }
    }
  }

  return ret;
}

void AnalysisAction::ExecuteAction() {
  clang::ASTFrontendAction::ExecuteAction();

  // Post-processing happens here rather than in an overridden EndSourceFileAction implementation.
  // We can't move the logic to the latter since this code might still raise errors, but
  // clang's diagnostics engine is already shut down by the time EndSourceFileAction is called.

  auto& context = getCompilerInstance().getASTContext();
  if (context.getDiagnostics().hasErrorOccurred()) {
    return;
  }
  decl_contexts.front() = context.getTranslationUnitDecl();

  try {
    ParseInterface(context);
    CoverReferencedTypes(context);
    OnAnalysisComplete(context);
  } catch (ClangDiagnosticAsException& exception) {
    exception.Report(context.getDiagnostics());
  }
}

static clang::ClassTemplateDecl* FindClassTemplateDeclByName(clang::DeclContext& decl_context, std::string_view symbol_name) {
  auto& ast_context = decl_context.getParentASTContext();
  auto* ident = &ast_context.Idents.get(symbol_name);
  auto declname = ast_context.DeclarationNames.getIdentifier(ident);
  auto result = decl_context.noload_lookup(declname);
  if (result.empty()) {
    return nullptr;
  } else if (std::next(result.begin()) == result.end()) {
    return llvm::dyn_cast<clang::ClassTemplateDecl>(*result.begin());
  } else {
    throw std::runtime_error("Found multiple matches to symbol " + std::string {symbol_name});
  }
}

struct TypeAnnotations {
  bool is_opaque = false;
  bool assumed_compatible = false;
  bool emit_layout_wrappers = false;
};

static TypeAnnotations GetTypeAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) {
  if (!decl->hasDefinition()) {
    return {};
  }

  ErrorReporter report_error {context};
  TypeAnnotations ret;

  for (const clang::CXXBaseSpecifier& base : decl->bases()) {
    auto annotation = base.getType().getAsString();
    if (annotation == "fexgen::opaque_type") {
      ret.is_opaque = true;
    } else if (annotation == "fexgen::assume_compatible_data_layout") {
      ret.assumed_compatible = true;
    } else if (annotation == "fexgen::emit_layout_wrappers") {
      ret.emit_layout_wrappers = true;
    } else {
      throw report_error(base.getSourceRange().getBegin(), "Unknown type annotation");
    }
  }

  return ret;
}

static ParameterAnnotations GetParameterAnnotations(clang::ASTContext& context, clang::CXXRecordDecl* decl) {
  if (!decl->hasDefinition()) {
    return {};
  }

  ErrorReporter report_error {context};
  ParameterAnnotations ret;

  for (const clang::CXXBaseSpecifier& base : decl->bases()) {
    auto annotation = base.getType().getAsString();
    if (annotation == "fexgen::ptr_passthrough") {
      ret.is_passthrough = true;
    } else if (annotation == "fexgen::assume_compatible_data_layout") {
      ret.assume_compatible = true;
    } else {
      throw report_error(base.getSourceRange().getBegin(), "Unknown parameter annotation");
    }
  }

  return ret;
}

void AnalysisAction::ParseInterface(clang::ASTContext& context) {
  ErrorReporter report_error {context};

  const std::unordered_map<unsigned, ParameterAnnotations> no_param_annotations {};

  // TODO: Assert fex_gen_type is not declared at non-global namespaces
  if (auto template_decl = FindClassTemplateDeclByName(*context.getTranslationUnitDecl(), "fex_gen_type")) {
    for (auto* decl : template_decl->specializations()) {
      const auto& template_args = decl->getTemplateArgs();
      assert(template_args.size() == 1);

      // NOTE: Function types that are equivalent but use differently
      //       named types (e.g. GLuint/GLenum) are represented by
      //       different Type instances. The canonical type they refer
      //       to is unique, however.
      clang::QualType type = context.getCanonicalType(template_args[0].getAsType());
      type = type->getLocallyUnqualifiedSingleStepDesugaredType();

      const auto annotations = GetTypeAnnotations(context, decl);
      if (type->isFunctionPointerType() || type->isFunctionType()) {
        if (decl->getNumBases()) {
          throw report_error(decl->getBeginLoc(), "Function pointer types cannot be annotated");
        }
        thunked_funcptrs[type.getAsString()] = std::pair {type.getTypePtr(), no_param_annotations};
      } else {
        RepackedType repack_info = {.assumed_compatible = annotations.is_opaque || annotations.assumed_compatible,
                                    .pointers_only = annotations.is_opaque && !annotations.assumed_compatible,
                                    .emit_layout_wrappers = annotations.emit_layout_wrappers};
        [[maybe_unused]] auto [it, inserted] = types.emplace(context.getCanonicalType(type.getTypePtr()), repack_info);
        assert(inserted);
      }
    }
  }

  // Process function parameter annotations
  std::unordered_map<const clang::FunctionDecl*, std::unordered_map<unsigned, ParameterAnnotations>> param_annotations;
  for (auto& decl_context : decl_contexts) {
    if (auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_param")) {
      for (auto* decl : template_decl->specializations()) {
        const auto& template_args = decl->getTemplateArgs();
        assert(template_args.size() == 3);

        auto function = llvm::dyn_cast<clang::FunctionDecl>(template_args[0].getAsDecl());
        auto param_idx = template_args[1].getAsIntegral().getSExtValue();
        clang::QualType type = context.getCanonicalType(template_args[2].getAsType());
        type = type->getLocallyUnqualifiedSingleStepDesugaredType();

        if (param_idx >= function->getNumParams() || param_idx < -1) {
          throw report_error(GetTemplateArgLocation(decl, 1), "Out-of-bounds parameter index passed to fex_gen_param");
        }

        auto expected_type = param_idx == -1 ? function->getReturnType() : function->getParamDecl(param_idx)->getType();

        if (!type->isVoidType() && !context.hasSameType(type, expected_type)) {
          auto loc = param_idx == -1 ? function->getReturnTypeSourceRange().getBegin() :
                                       function->getParamDecl(param_idx)->getTypeSourceInfo()->getTypeLoc().getBeginLoc();
          throw report_error(GetTemplateArgLocation(decl, 2), "Type passed to fex_gen_param doesn't match the function signature")
            .addNote(report_error(loc, "Expected this type instead"));
        }

        param_annotations[function][param_idx] = GetParameterAnnotations(context, decl);
      }
    }
  }

  // Process declarations and specializations of fex_gen_config,
  // i.e. the function descriptions of the thunked API
  for (auto& decl_context : decl_contexts) {
    if (const auto template_decl = FindClassTemplateDeclByName(*decl_context, "fex_gen_config")) {
      // Gather general information about symbols in this namespace
      const auto annotations = GetNamespaceAnnotations(context, template_decl->getTemplatedDecl());

      auto namespace_decl = llvm::dyn_cast<clang::NamespaceDecl>(decl_context);
      namespaces.push_back(
        {namespace_decl, namespace_decl ? namespace_decl->getNameAsString() : "", annotations.load_host_endpoint_via.value_or(""),
         annotations.generate_guest_symtable, annotations.indirect_guest_calls});
      const auto namespace_idx = namespaces.size() - 1;
      const NamespaceInfo& namespace_info = namespaces.back();

      if (annotations.version) {
        if (namespace_decl) {
          throw report_error(template_decl->getBeginLoc(), "Library version must be defined in the global namespace");
        }
        lib_version = annotations.version;
      }

      // Process specializations of template fex_gen_config
      // First, perform some validation and process member annotations
      // In a second iteration, process the actual function API
      for (auto* decl : template_decl->specializations()) {
        if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) {
          throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n");
        }

        const auto& template_args = decl->getTemplateArgs();
        assert(template_args.size() == 1);

        const auto template_arg_loc = GetTemplateArgLocation(decl, 0);

        if (llvm::isa<clang::FunctionDecl>(template_args[0].getAsDecl())) {
          // Process later
        } else if (auto annotated_member = llvm::dyn_cast<clang::FieldDecl>(template_args[0].getAsDecl())) {
          if (decl->getNumBases() != 1 || decl->bases_begin()->getType().getAsString() != "fexgen::custom_repack") {
            throw report_error(template_arg_loc, "Unsupported member annotation(s)");
          }

          // Get or add parent type to list of structure types
#if CLANG_VERSION_MAJOR >= 22
          auto parent_qt = context.getTagType(clang::ElaboratedTypeKeyword::None, std::nullopt, annotated_member->getParent(), false);
          auto repack_info_it = types.emplace(context.getCanonicalType(parent_qt).getTypePtr(), RepackedType {}).first;
#else
          auto repack_info_it = types.emplace(context.getCanonicalType(annotated_member->getParent()->getTypeForDecl()), RepackedType {}).first;
#endif
          if (repack_info_it->second.assumed_compatible) {
            throw report_error(template_arg_loc, "May not annotate members of opaque types");
          }
          // Add member to its list of members
          repack_info_it->second.custom_repacked_members.insert(annotated_member->getNameAsString());
        } else {
          throw report_error(template_arg_loc, "Cannot annotate this kind of symbol");
        }
      }

      // Process API functions
      for (auto* decl : template_decl->specializations()) {
        if (decl->getSpecializationKind() == clang::TSK_ExplicitInstantiationDefinition) {
          throw report_error(decl->getBeginLoc(), "fex_gen_config may not be partially specialized\n");
        }

        const auto& template_args = decl->getTemplateArgs();
        assert(template_args.size() == 1);

        const auto template_arg_loc = GetTemplateArgLocation(decl, 0);

        if (auto emitted_function = llvm::dyn_cast<clang::FunctionDecl>(template_args[0].getAsDecl())) {
          auto return_type = emitted_function->getReturnType();

          const auto annotations = GetAnnotations(context, decl);
          if (return_type->isFunctionPointerType() && !annotations.returns_guest_pointer) {
            throw report_error(template_arg_loc, "Function pointer return types require explicit annotation\n");
          }

          // TODO: Use the types as written in the signature instead?
          ThunkedFunction data;
          data.function_name = emitted_function->getName().str();
          data.return_type = return_type;
          data.is_variadic = emitted_function->isVariadic();

          data.decl = emitted_function;

          data.custom_host_impl = annotations.custom_host_impl;

          data.param_annotations = param_annotations[emitted_function];

          const int retval_index = -1;
          for (int param_idx = retval_index; param_idx < (int)emitted_function->param_size(); ++param_idx) {
            auto param_type =
              param_idx == retval_index ? emitted_function->getReturnType() : emitted_function->getParamDecl(param_idx)->getType();
            auto param_loc = param_idx == retval_index ? emitted_function->getReturnTypeSourceRange().getBegin() :
                                                         emitted_function->getParamDecl(param_idx)->getBeginLoc();

            if (param_idx != retval_index) {
              data.param_types.push_back(param_type);
            } else if (param_type->isVoidType()) {
              continue;
            }

            if (data.param_annotations[param_idx].is_passthrough && !data.custom_host_impl) {
              throw report_error(param_loc, "Passthrough annotation requires custom host implementation");
            }
            // Skip pointers-to-structs passed through to the host in guest_layout.
            // This avoids pulling in member types that can't be processed.
            if (data.param_annotations[param_idx].is_passthrough && param_type->isPointerType() &&
                param_type->getPointeeType()->isStructureType()) {
              continue;
            }

            auto check_struct_type = [&](const clang::Type* type) {
              if (type->isIncompleteType()) {
                throw report_error(type->getAsTagDecl()->getBeginLoc(), "Unannotated pointer with incomplete struct type; consider using "
                                                                        "an opaque_type annotation")
                  .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note))
                  .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note));
              }

              for (auto* member : type->getAsStructureType()->getDecl()->fields()) {
                auto annotated_type = types.find(type->getCanonicalTypeUnqualified().getTypePtr());
                if (annotated_type == types.end() || !annotated_type->second.UsesCustomRepackFor(member)) {
                  /*if (!member->getType()->isPointerType())*/ {
                    // TODO: Perform more elaborate validation for non-pointers to ensure ABI compatibility
                    continue;
                  }

                  throw report_error(member->getBeginLoc(), "Unannotated pointer member")
                    .addNote(report_error(param_loc, "in struct type", clang::DiagnosticsEngine::Note))
                    .addNote(report_error(template_arg_loc, "used in annotation here", clang::DiagnosticsEngine::Note));
                }
              }
            };

            if (param_type->isFunctionPointerType()) {
              if (param_idx == retval_index) {
                // TODO: We already rely on this in a few places...
                // TODO: Revisit now that we support ptr_passthrough for return values
                //                                throw report_error(template_arg_loc, "Support for returning function pointers is not implemented");
                continue;
              }
              auto funcptr = emitted_function->getParamDecl(param_idx)->getFunctionType()->getAs<clang::FunctionProtoType>();
              ThunkedCallback callback;
              callback.return_type = funcptr->getReturnType();
              for (auto& cb_param : funcptr->getParamTypes()) {
                callback.param_types.push_back(cb_param);
              }
              callback.is_stub = annotations.callback_strategy == CallbackStrategy::Stub;
              callback.is_variadic = funcptr->isVariadic();

              data.callbacks.emplace(param_idx, callback);
              if (!callback.is_stub && !data.custom_host_impl) {
                thunked_funcptrs[emitted_function->getNameAsString() + "_cb" + std::to_string(param_idx)] =
                  std::pair {context.getCanonicalType(funcptr), no_param_annotations};
              }

              if (data.callbacks.size() != 1) {
                throw report_error(template_arg_loc, "Support for more than one callback is untested");
              }
              if (funcptr->isVariadic() && !callback.is_stub) {
                throw report_error(template_arg_loc, "Variadic callbacks are not supported");
              }

              // Force treatment as passthrough-pointer
              data.param_annotations[param_idx].is_passthrough = true;
            } else if (param_type->isBuiltinType()) {
              // NOTE: Intentionally not using getCanonicalType here since that would turn e.g. size_t into platform-specific types
              // TODO: Still, we may want to de-duplicate some of these...
              types.emplace(param_type.getTypePtr(), RepackedType {});
            } else if (param_type->isEnumeralType()) {
              types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType {});
            } else if (param_type->isStructureType() && !(types.contains(context.getCanonicalType(param_type.getTypePtr())) &&
                                                          LookupType(context, param_type.getTypePtr()).assumed_compatible)) {
              check_struct_type(param_type.getTypePtr());
              types.emplace(context.getCanonicalType(param_type.getTypePtr()), RepackedType {});
            } else if (param_type->isPointerType()) {
              auto pointee_type = param_type->getPointeeType();

              if (pointee_type->isIntegerType()) {
                // Add builtin pointee type to type list
                if (!pointee_type->isEnumeralType()) {
                  types.emplace(pointee_type.getTypePtr(), RepackedType {});
                } else {
                  types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType {});
                }
              }

              if (data.param_annotations[param_idx].assume_compatible) {
                // Nothing to do
              } else if (types.contains(context.getCanonicalType(pointee_type.getTypePtr())) &&
                         LookupType(context, pointee_type.getTypePtr()).assumed_compatible) {
                // Parameter points to a type that is assumed compatible
                data.param_annotations[param_idx].assume_compatible = true;
              } else if (pointee_type->isStructureType()) {
                // Unannotated pointer to unannotated structure.
                // Append the structure type to the type list for checking data layout compatibility.
                check_struct_type(pointee_type.getTypePtr());
                types.emplace(context.getCanonicalType(pointee_type.getTypePtr()), RepackedType {});
              } else if (data.param_annotations[param_idx].is_passthrough) {
                // Nothing to do
              } else {
                // Assume this parameter type is unsupported.
                // Since not all of our libraries are adapted for this yet, so
                // an error is only thrown for a curated set of functions.
                // TODO: At least detect and reject pointers-to-pointers on 32-bit
                if (emitted_function->getNameAsString().starts_with("gl") && pointee_type->isPointerType()) {
                  throw report_error(param_loc, "Unsupported parameter type")
                    .addNote(report_error(emitted_function->getNameInfo().getLoc(), "in function", clang::DiagnosticsEngine::Note))
                    .addNote(report_error(template_arg_loc, "used in definition here", clang::DiagnosticsEngine::Note));
                }
              }
            } else {
              // TODO: For non-pointer parameters, perform more elaborate validation to ensure ABI compatibility
            }
          }

          thunked_api.push_back(ThunkedAPIFunction {(const FunctionParams&)data, data.function_name, data.return_type,
                                                    namespace_info.host_loader.empty() ? "dlsym_default" : namespace_info.host_loader,
                                                    data.is_variadic || annotations.custom_guest_entrypoint, data.is_variadic, std::nullopt});
          if (namespace_info.generate_guest_symtable) {
            thunked_api.back().symtable_namespace = namespace_idx;
          }

          if (data.is_variadic) {
            if (!annotations.uniform_va_type) {
              throw report_error(decl->getBeginLoc(), "Variadic functions must be annotated with parameter type using uniform_va_type");
            }

            // Convert variadic argument list into a count + pointer pair
            data.param_types.push_back(context.getSizeType());
            data.param_types.push_back(context.getPointerType(*annotations.uniform_va_type));
            types.emplace(context.getSizeType().getTypePtr(), RepackedType {});
            if (!annotations.uniform_va_type.value()->isVoidPointerType()) {
              types.emplace(annotations.uniform_va_type->getTypePtr(), RepackedType {});
            }
          }

          if (data.is_variadic) {
            // This function is thunked through an "_internal" symbol since its signature
            // is different from the one in the native host/guest libraries.
            data.function_name = data.function_name + "_internal";
            if (data.custom_host_impl) {
              throw report_error(decl->getBeginLoc(), "Custom host impl requested but this is implied by the function signature already");
            }
            data.custom_host_impl = true;
          }

          // For indirect calls, register the function signature as a function pointer type
          if (namespace_info.indirect_guest_calls) {
            thunked_funcptrs[emitted_function->getNameAsString()] =
              std::pair {context.getCanonicalType(emitted_function->getFunctionType()), data.param_annotations};
          }

          thunks.push_back(std::move(data));
        }
      }
    }
  }
}

void AnalysisAction::CoverReferencedTypes(clang::ASTContext& context) {
  // Add common fixed-size integer types explicitly
  for (unsigned size : {8, 32, 64}) {
    types.emplace(context.getIntTypeForBitwidth(size, false).getTypePtr(), RepackedType {});
    types.emplace(context.getIntTypeForBitwidth(size, true).getTypePtr(), RepackedType {});
  }

  // Repeat until no more children are appended
  for (bool changed = true; std::exchange(changed, false);) {
    for (auto next_type_it = types.begin(), type_it = next_type_it; type_it != types.end(); type_it = next_type_it) {
      ++next_type_it;
      const auto& [type, type_repack_info] = *type_it;
      if (!type->isStructureType()) {
        continue;
      }

      if (type_repack_info.assumed_compatible) {
        // If assumed compatible, we don't need the member definitions
        continue;
      }

      for (auto* member : type->getAsStructureType()->getDecl()->fields()) {
        auto member_type = member->getType().getTypePtr();
        if (type_repack_info.UsesCustomRepackFor(member) && member_type->isPointerType() && member_type->getPointeeType()->isStructureType()) {
          continue;
        }

        while (member_type->isArrayType()) {
          member_type = member_type->getArrayElementTypeNoTypeQual();
        }
        while (member_type->isPointerType()) {
          member_type = member_type->getPointeeType().getTypePtr();
        }

        if (!member_type->isBuiltinType()) {
          member_type = context.getCanonicalType(member_type);
        }
        if (types.contains(member_type) && types.at(member_type).pointers_only) {
          if (member_type == context.getCanonicalType(member->getType().getTypePtr())) {
            throw std::runtime_error(
              fmt::format("\"{}\" references opaque type \"{}\" via non-pointer member \"{}\"", clang::QualType {type, 0}.getAsString(),
                          clang::QualType {member_type, 0}.getAsString(), member->getNameAsString()));
          }
          continue;
        }
        if (member_type->isUnionType() && !types.contains(member_type) && !type_repack_info.UsesCustomRepackFor(member)) {
          throw std::runtime_error(fmt::format("\"{}\" has unannotated member \"{}\" of union type \"{}\"", clang::QualType {type, 0}.getAsString(),
                                               member->getNameAsString(), clang::QualType {member_type, 0}.getAsString()));
        }

        if (!member_type->isStructureType() && !(member_type->isBuiltinType() && !member_type->isVoidType()) && !member_type->isEnumeralType()) {
          continue;
        }

        auto [new_type_it, inserted] = types.emplace(member_type, RepackedType {});
        if (inserted) {
          changed = true;
          next_type_it = new_type_it;
        }
      }
    }
  }
}

class ASTVisitor : public clang::RecursiveASTVisitor<ASTVisitor> {
  std::vector<clang::DeclContext*>& decl_contexts;

public:
  ASTVisitor(std::vector<clang::DeclContext*>& decl_contexts_)
    : decl_contexts(decl_contexts_) {}

  /**
   * Matches "template<auto> struct fex_gen_config { ... }"
   */
  bool VisitClassTemplateDecl(clang::ClassTemplateDecl* decl) {
    if (decl->getName() != "fex_gen_config") {
      return true;
    }

    if (llvm::dyn_cast<clang::NamespaceDecl>(decl->getDeclContext())) {
      decl_contexts.push_back(decl->getDeclContext());
    }

    return true;
  }
};

class ASTConsumer : public clang::ASTConsumer {
  std::vector<clang::DeclContext*>& decl_contexts;

public:
  ASTConsumer(std::vector<clang::DeclContext*>& decl_contexts_)
    : decl_contexts(decl_contexts_) {}

  void HandleTranslationUnit(clang::ASTContext& context) override {
    ASTVisitor {decl_contexts}.TraverseDecl(context.getTranslationUnitDecl());
  }
};

std::unique_ptr<clang::ASTConsumer> AnalysisAction::CreateASTConsumer(clang::CompilerInstance&, clang::StringRef) {
  return std::make_unique<ASTConsumer>(decl_contexts);
}


================================================
FILE: ThunkLibs/Generator/analysis.h
================================================
#pragma once

#include <clang/Basic/FileEntry.h>
#include <clang/Frontend/FrontendAction.h>

#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>

struct FunctionParams {
  std::vector<clang::QualType> param_types;
};

struct ThunkedCallback : FunctionParams {
  clang::QualType return_type;

  bool is_stub = false; // Callback will be replaced by a stub that calls std::abort
  bool is_variadic = false;
};

struct ParameterAnnotations {
  bool is_passthrough = false;
  bool assume_compatible = false;

  bool operator==(const ParameterAnnotations&) const = default;
};

/**
 * Guest<->Host transition point.
 *
 * These are normally used to translate the public API of the guest to host
 * function calls (ThunkedAPIFunction), but a thunk library may also define
 * internal thunks that don't correspond to any function in the implemented
 * API.
 */
struct ThunkedFunction : FunctionParams {
  std::string function_name;
  clang::QualType return_type;

  // If true, param_types contains an extra size_t and the valist for marshalling through an internal function
  bool is_variadic = false;

  // If true, the unpacking function will call a custom fexfn_impl function
  // to be provided manually instead of calling the host library function
  // directly.
  // This is implied e.g. for thunks generated for variadic functions
  bool custom_host_impl = false;

  std::string GetOriginalFunctionName() const {
    const std::string suffix = "_internal";
    assert(function_name.length() > suffix.size());
    assert((std::string_view {&*function_name.end() - suffix.size(), suffix.size()} == suffix));
    return function_name.substr(0, function_name.size() - suffix.size());
  }

  // Maps parameter index to ThunkedCallback
  std::unordered_map<unsigned, ThunkedCallback> callbacks;

  // Maps parameter index to ParameterAnnotations
  // TODO: Use index -1 for the return value?
  std::unordered_map<unsigned, ParameterAnnotations> param_annotations;

  clang::FunctionDecl* decl;
};

/**
 * Function that is part of the API of the thunked library.
 *
 * For each of these, there is:
 * - A publicly visible guest entrypoint (usually auto-generated but may be manually defined)
 * - A pointer to the native host library function loaded through dlsym (or a user-provided function specified via host_loader)
 * - A ThunkedFunction with the same function_name (possibly suffixed with _internal)
 */
struct ThunkedAPIFunction : FunctionParams {
  std::string function_name;

  clang::QualType return_type;

  // name of the function to load the native host symbol with
  std::string host_loader;

  // If true, no guest-side implementation of this function will be autogenerated
  bool custom_guest_impl;

  bool is_variadic;

  // Index of the symbol table to store this export in (see guest_symtables).
  // If empty, a library export is created, otherwise the function is entered into a function pointer array
  std::optional<std::size_t> symtable_namespace;
};

struct NamespaceInfo {
  clang::DeclContext* context;

  std::string name;

  // Function to load native host library functions with.
  // This function must be defined manually with the signature "void* func(void*, const char*)"
  std::string host_loader;

  bool generate_guest_symtable;

  bool indirect_guest_calls;
};

class AnalysisAction : public clang::ASTFrontendAction {
public:
  AnalysisAction() {
    decl_contexts.push_back(nullptr); // global namespace (replaced by getTranslationUnitDecl later)
  }

  void ExecuteAction() override;

  std::unique_ptr<clang::ASTConsumer> CreateASTConsumer(clang::CompilerInstance&, clang::StringRef /*file*/) override;

  struct RepackedType {
    bool assumed_compatible = false;         // opaque_type or assume_compatible_data_layout
    bool pointers_only = assumed_compatible; // if true, only pointers to this type may be used

    // If true, emit guest_layout/host_layout definitions even if the type is non-repackable
    bool emit_layout_wrappers = false;

    // Set of members (identified by their field name) with custom repacking
    std::unordered_set<std::string> custom_repacked_members;

    bool UsesCustomRepackFor(const clang::FieldDecl* member) const {
      return custom_repacked_members.contains(member->getNameAsString());
    }
    bool UsesCustomRepackFor(const std::string& member_name) const {
      return custom_repacked_members.contains(member_name);
    }
  };

protected:
  // Build the internal API representation by processing fex_gen_config and other annotated entities
  void ParseInterface(clang::ASTContext&);

  // Recursively extend the type set to include types of struct members
  void CoverReferencedTypes(clang::ASTContext&);

  // Called from ExecuteAction() after parsing is complete
  virtual void OnAnalysisComplete(clang::ASTContext&) {};

  std::vector<clang::DeclContext*> decl_contexts;

  std::vector<ThunkedFunction> thunks;
  std::vector<ThunkedAPIFunction> thunked_api;

  // Set of function types for which to generate Guest->Host thunking trampolines.
  // The map key is a unique identifier that must be consistent between guest/host processing passes.
  // The map value is a pair of the function pointer's clang::Type and the mapping of parameter annotations
  std::unordered_map<std::string, std::pair<const clang::Type*, std::unordered_map<unsigned, ParameterAnnotations>>> thunked_funcptrs;

  std::unordered_map<const clang::Type*, RepackedType> types;
  std::optional<unsigned> lib_version;
  std::vector<NamespaceInfo> namespaces;

  RepackedType& LookupType(clang::ASTContext& context, const clang::Type* type) {
    return types.at(context.getCanonicalType(type));
  }
};

inline std::string get_type_name(const clang::ASTContext& context, const clang::Type* type) {
  if (type->isBuiltinType()) {
    // Skip canonicalization
    return clang::QualType {type, 0}.getAsString();
  }

  if (auto decl = type->getAsTagDecl()) {
    // Replace unnamed types with a placeholder. This will fail to compile if referenced
    // anywhere in generated code, but at least it will point to a useful location.
    //
    // A notable exception are C-style struct declarations like "typedef struct (unnamed) { ... } MyStruct;".
    // A typedef name is associated with these for linking purposes, so
    // getAsString() will produce a usable identifier.
    // TODO: Consider turning this into a hard error instead of replacing the name
    if (!decl->getDeclName() && !decl->getTypedefNameForAnonDecl()) {
      auto loc = context.getSourceManager().getPresumedLoc(decl->getLocation());
      std::string filename = loc.getFilename();
      filename = std::move(filename).substr(filename.rfind("/"));
      filename = std::move(filename).substr(1);
      std::replace(filename.begin(), filename.end(), '.', '_');
      return "unnamed_type_" + filename + "_" + std::to_string(loc.getLine());
    }
  }

  auto type_name = clang::QualType {context.getCanonicalType(type), 0}.getAsString();
  if (type_name.starts_with("struct ")) {
    type_name = type_name.substr(7);
  }
  if (type_name.starts_with("class ") || type_name.starts_with("union ")) {
    type_name = type_name.substr(6);
  }
  if (type_name.starts_with("enum ")) {
    type_name = type_name.substr(5);
  }
  return type_name;
}

inline std::string get_fixed_size_int_name(bool is_signed, int size) {
  return (!is_signed ? "u" : "") + std::string {"int"} + std::to_string(size) + "_t";
}

inline std::string get_fixed_size_int_name(const clang::Type* type, int size) {
  return get_fixed_size_int_name(type->isSignedIntegerType(), size);
}

inline std::string get_fixed_size_int_name(const clang::Type* type, const clang::ASTContext& context) {
  return get_fixed_size_int_name(type, context.getTypeSize(type));
}


================================================
FILE: ThunkLibs/Generator/data_layout.cpp
================================================
#include "analysis.h"
#include "data_layout.h"
#include "interface.h"

#include <fmt/format.h>

#include <openssl/sha.h>

constexpr bool enable_debug_output = false;

// Visitor for gathering data layout information that can be passed across libclang invocations
class AnalyzeDataLayoutAction : public AnalysisAction {
  ABI& type_abi;

  void OnAnalysisComplete(clang::ASTContext&) override;

public:
  AnalyzeDataLayoutAction(ABI&);
};

AnalyzeDataLayoutAction::AnalyzeDataLayoutAction(ABI& abi_)
  : type_abi(abi_) {}

std::unordered_map<const clang::Type*, TypeInfo>
ComputeDataLayout(const clang::ASTContext& context, const std::unordered_map<const clang::Type*, AnalysisAction::RepackedType>& types) {
  std::unordered_map<const clang::Type*, TypeInfo> layout;

  // First, add all types directly used in function signatures of the library API to the meta set
  for (const auto& [type, type_repack_info] : types) {
    if (type_repack_info.assumed_compatible) {
      auto [_, inserted] = layout.insert(std::pair {context.getCanonicalType(type), TypeInfo {}});
      if (!inserted) {
        throw std::runtime_error(
          "Failed to gather type metadata: Opaque type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered");
      }
      continue;
    }

    if (type->isIncompleteType()) {
      throw std::runtime_error(
        "Cannot compute data layout of incomplete type \"" + clang::QualType {type, 0}.getAsString() + "\". Did you forget any annotations?");
    }

    if (type->isStructureType()) {
      StructInfo info;
      info.size_bits = context.getTypeSize(type);
      info.alignment_bits = context.getTypeAlign(type);

      auto [_, inserted] = layout.insert(std::pair {context.getCanonicalType(type), info});
      if (!inserted) {
        throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered");
      }
    } else if (type->isBuiltinType() || type->isEnumeralType()) {
      SimpleTypeInfo info;
      info.size_bits = context.getTypeSize(type);
      info.alignment_bits = context.getTypeAlign(type);

      // NOTE: Non-enum types are intentionally not canonicalized since that would turn e.g. size_t into platform-specific types
      auto [_, inserted] = layout.insert(std::pair {type->isEnumeralType() ? context.getCanonicalType(type) : type, info});
      if (!inserted) {
        throw std::runtime_error("Failed to gather type metadata: Type \"" + clang::QualType {type, 0}.getAsString() + "\" already registered");
      }
    }
  }

  // Then, add information about members
  for (const auto& [type, type_repack_info] : types) {
    if (!type->isStructureType() || type_repack_info.assumed_compatible) {
      continue;
    }

    auto& info = *layout.at(context.getCanonicalType(type)).get_if_struct();

    for (auto* field : type->getAsStructureType()->getDecl()->fields()) {
      auto field_type = field->getType().getTypePtr();
      std::optional<uint64_t> array_size;
      if (auto array_type = llvm::dyn_cast<clang::ConstantArrayType>(field->getType())) {
        array_size = array_type->getSize().getZExtValue();
        field_type = array_type->getElementType().getTypePtr();
        if (llvm::isa<clang::ConstantArrayType>(field_type)) {
          throw std::runtime_error("Unsupported multi-dimensional array member \"" + field->getNameAsString() + "\" in type \"" +
                                   clang::QualType {type, 0}.getAsString() + "\"");
        }
      }

      StructInfo::MemberInfo member_info {
        .size_bits = context.getTypeSize(field->getType()), // Total size even for arrays
        .offset_bits = context.getFieldOffset(field),
        .type_name = get_type_name(context, field_type),
        .member_name = field->getNameAsString(),
        .array_size = array_size,
        .is_function_pointer = field_type->isFunctionPointerType(),
        .is_integral = field->getType()->isIntegerType(),
        .is_signed_integer = field->getType()->isSignedIntegerType(),
      };

      // TODO: Process types in dependency-order. Currently we skip this
      //       check if we haven't processed the member type already,
      //       which is only safe since this is a consistency check
      if (field_type->isStructureType() && layout.contains(context.getCanonicalType(field_type))) {
        // Assert for self-consistency
        auto field_meta = layout.at(context.getCanonicalType(field_type));
        (void)types.at(context.getCanonicalType(field_type));
        if (auto field_info = field_meta.get_if_simple_or_struct()) {
          if (field_info->size_bits != member_info.size_bits / member_info.array_size.value_or(1)) {
            throw std::runtime_error("Inconsistent type size detected");
          }
        }
      }

      // Add built-in types, even if referenced through a pointer
      for (auto* inner_field_type = field_type; inner_field_type; inner_field_type = inner_field_type->getPointeeType().getTypePtrOrNull()) {
        if (inner_field_type->isBuiltinType() || inner_field_type->isEnumeralType()) {
          // The analysis pass doesn't explicitly register built-in types, so add them manually here
          SimpleTypeInfo info {
            .size_bits = context.getTypeSize(inner_field_type),
            .alignment_bits = context.getTypeAlign(inner_field_type),
          };
          if (!inner_field_type->isBuiltinType()) {
            inner_field_type = context.getCanonicalType(inner_field_type);
          }
          [[maybe_unused]] auto [prev, inserted] = layout.insert(std::pair {inner_field_type, info});
          //                    if (!inserted && prev->second != TypeInfo { info }) {
          //                        // TODO: Throw error since consistency check failed
          //                    }
        }
      }

      info.members.push_back(std::move(member_info));
    }
  }

  if (enable_debug_output) {
    for (const auto& [type, info] : layout) {
      auto basic_info = info.get_if_simple_or_struct();
      if (!basic_info) {
        continue;
      }

      fprintf(stderr, "  Host entry %s: %lu (%lu)\n", clang::QualType {type, 0}.getAsString().c_str(), basic_info->size_bits / 8,
              basic_info->alignment_bits / 8);

      if (auto struct_info = info.get_if_struct()) {
        for (const auto& member : struct_info->members) {
          fprintf(stderr, "    Offset %lu-%lu: %s %s%s\n", member.offset_bits / 8, (member.offset_bits + member.size_bits - 1) / 8,
                  member.type_name.c_str(), member.member_name.c_str(),
                  member.array_size ? fmt::format("[{}]", member.array_size.value()).c_str() : "");
        }
      }
    }
  }

  return layout;
}

ABI GetStableLayout(const clang::ASTContext& context, const std::unordered_map<const clang::Type*, TypeInfo>& data_layout) {
  ABI stable_layout;

  for (auto [type, type_info] : data_layout) {
    auto type_name = get_type_name(context, type);
    if (auto struct_info = type_info.get_if_struct()) {
      for (auto& member : struct_info->members) {
        if (member.is_integral) {
          // Map member types to fixed-size integers
          auto alt_type_name = get_fixed_size_int_name(member.is_signed_integer, member.size_bits);
          auto alt_type_info = SimpleTypeInfo {
            .size_bits = member.size_bits,
            .alignment_bits = context.getTypeAlign(context.getIntTypeForBitwidth(member.size_bits, member.is_signed_integer)),
          };
          stable_layout.insert(std::pair {alt_type_name, alt_type_info});
          member.type_name = std::move(alt_type_name);
        }
      }
    }

    auto [it, inserted] = stable_layout.insert(std::pair {type_name, std::move(type_info)});
    if (type->isIntegerType()) {
      auto alt_type_name = get_fixed_size_int_name(type, context);
      stable_layout.insert(std::pair {std::move(alt_type_name), type_info});
    }

    if (!inserted && it->second != type_info && !type->isIntegerType()) {
      throw std::runtime_error("Duplicate type information: Tried to re-register type \"" + type_name + "\"");
    }
  }

  stable_layout.pointer_size = context.getTypeSize(context.getUIntPtrType()) / 8;

  return stable_layout;
}

static std::array<uint8_t, 32> GetSha256(const std::string& function_name) {
  std::array<uint8_t, 32> sha256;
  SHA256(reinterpret_cast<const unsigned char*>(function_name.data()), function_name.size(), sha256.data());
  return sha256;
};

std::string GetTypeNameWithFixedSizeIntegers(clang::ASTContext& context, clang::QualType type) {
  if (type->isBuiltinType() && type->isIntegerType()) {
    auto size = context.getTypeSize(type);
    return fmt::format("uint{}_t", size);
  } else if (type->isPointerType() && type->getPointeeType()->isBuiltinType() && type->getPointeeType()->isIntegerType() &&
             context.getTypeSize(type->getPointeeType()) > 8) {
    // TODO: Also apply this path to char-like types
    auto size = context.getTypeSize(type->getPointeeType());
    return fmt::format("uint{}_t*", size);
  } else {
    return type.getAsString();
  }
}

void AnalyzeDataLayoutAction::OnAnalysisComplete(clang::ASTContext& context) {
  type_abi = GetStableLayout(context, ComputeDataLayout(context, types));

  // Register functions that must be guest-callable through host function pointers
  for (auto funcptr_type_it = thunked_funcptrs.begin(); funcptr_type_it != thunked_funcptrs.end(); ++funcptr_type_it) {
    auto& funcptr_id = funcptr_type_it->first;
    auto& [type, param_annotations] = funcptr_type_it->second;
    auto func_type = type->getAs<clang::FunctionProtoType>();
    std::string mangled_name = clang::QualType {type, 0}.getAsString();
    auto cb_sha256 = GetSha256("fexcallback_" + mangled_name);
    FuncPtrInfo info = {cb_sha256};

    // TODO: Also apply GetTypeNameWithFixedSizeIntegers here
    info.result = func_type->getReturnType().getAsString();

    for (auto arg : func_type->getParamTypes()) {
      info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg));
    }
    type_abi.thunked_funcptrs[funcptr_id] = std::move(info);
  }
}

TypeCompatibility DataLayoutCompareAction::GetTypeCompatibility(const clang::ASTContext& context, const clang::Type* type,
                                                                const std::unordered_map<const clang::Type*, TypeInfo> host_abi,
                                                                std::unordered_map<const clang::Type*, TypeCompatibility>& type_compat) {
  assert(type->isCanonicalUnqualified() || type->isBuiltinType() || type->isEnumeralType());

  {
    // Reserve a slot to be filled later. The placeholder value is used
    // to detect infinite recursions.
    constexpr auto placeholder_compat = TypeCompatibility {100};
    auto [existing_compat_it, is_new_type] = type_compat.emplace(type, placeholder_compat);
    if (!is_new_type) {
      if (existing_compat_it->second == placeholder_compat) {
        throw std::runtime_error("Found recursive reference to type \"" + clang::QualType {type, 0}.getAsString() + "\"");
      }

      return existing_compat_it->second;
    }
  }

  if (types.contains(type) && types.at(type).assumed_compatible) {
    if (types.at(type).pointers_only && !type->isPointerType()) {
      throw std::runtime_error(
        "Tried to dereference opaque type \"" + clang::QualType {type, 0}.getAsString() + "\" when querying data layout compatibility");
    }
    type_compat.at(type) = TypeCompatibility::Full;
    return TypeCompatibility::Full;
  }

  auto type_name = get_type_name(context, type);
  // Look up the same type name in the guest map,
  // unless it's an integer (which is mapped to fixed-size uintX_t types)
  auto guest_info = guest_abi.at(!type->isIntegerType() ? std::move(type_name) : get_fixed_size_int_name(type, context));
  auto& host_info = host_abi.at(type->isBuiltinType() ? type : context.getCanonicalType(type));

  const bool is_32bit = (guest_abi.pointer_size == 4);

  // Assume full compatibility, then downgrade as needed
  auto compat = TypeCompatibility::Full;

  if (guest_info != host_info) {
    // Non-matching data layout... downgrade to Repackable
    // TODO: Even for non-structs, this only works if the types are reasonably similar (e.g. uint32_t -> uint64_t)
    compat = TypeCompatibility::Repackable;
  }

  auto guest_struct_info = guest_info.get_if_struct();
  if (guest_struct_info && guest_struct_info->members.size() != host_info.get_if_struct()->members.size()) {
    // Members are missing from either the guest or host layout
    // NOTE: If the members are merely named differently, this will be caught in the else-if below
    compat = TypeCompatibility::None;
  } else if (guest_struct_info) {
    std::vector<TypeCompatibility> member_compat;
    for (std::size_t member_idx = 0; member_idx < guest_struct_info->members.size(); ++member_idx) {
      // Look up the corresponding member in the host struct definition.
      // The members may be listed in a different order, so we can't
      // directly use member_idx for this
      auto* host_member_field = [&]() -> clang::FieldDecl* {
        auto struct_decl = type->getAsStructureType()->getDecl();
        auto it = std::find_if(struct_decl->field_begin(), struct_decl->field_end(),
                               [&](auto* field) { return field->getName() == guest_struct_info->members.at(member_idx).member_name; });
        if (it == struct_decl->field_end()) {
          return nullptr;
        }
        return *it;
      }();
      if (!host_member_field) {
        // No corresponding host struct member
        // TODO: Also detect host members that are missing from the guest struct
        member_compat.push_back(TypeCompatibility::None);
        break;
      }

      auto host_member_type = context.getCanonicalType(host_member_field->getType().getTypePtr());
      if (auto array_type = llvm::dyn_cast<clang::ConstantArrayType>(host_member_type)) {
        // Compare array element type only. The array size is already considered by the layout information of the containing struct.
        host_member_type = context.getCanonicalType(array_type->getElementType().getTypePtr());
      }

      if (types.at(type).UsesCustomRepackFor(host_member_field)) {
        member_compat.push_back(TypeCompatibility::Repackable);
        continue;
      } else if (host_member_type->isPointerType()) {
        // Automatic repacking of pointers to non-compatible types is only possible if:
        // * Pointee is fully compatible, or
        // * Pointer member is annotated
        auto host_member_pointee_type = context.getCanonicalType(host_member_type->getPointeeType().getTypePtr());
        if (types.contains(host_member_pointee_type) && types.at(host_member_pointee_type).assumed_compatible) {
          // Pointee doesn't need repacking, but pointer needs extending on 32-bit
          member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full);
        } else if (host_member_pointee_type->isPointerType()) {
          // This is a nested pointer, e.g. void**

          if (is_32bit) {
            // Nested pointers can't be repacked on 32-bit
            member_compat.push_back(TypeCompatibility::None);
          } else if (types.contains(host_member_pointee_type->getPointeeType().getTypePtr()) &&
                     types.at(host_member_pointee_type->getPointeeType().getTypePtr()).assumed_compatible) {
            // Pointers to opaque types are fine
            member_compat.push_back(TypeCompatibility::Full);
          } else {
            // Check the innermost type's compatibility on 64-bit
            auto pointee_pointee_type = host_member_pointee_type->getPointeeType().getTypePtr();
            // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working"
            auto pointee_pointee_compat = pointee_pointee_type->isVoidType() ?
                                            TypeCompatibility::Full :
                                            GetTypeCompatibility(context, pointee_pointee_type, host_abi, type_compat);
            if (pointee_pointee_compat == TypeCompatibility::Full) {
              member_compat.push_back(TypeCompatibility::Full);
            } else {
              member_compat.push_back(TypeCompatibility::None);
            }
          }
        } else if (!host_member_pointee_type->isVoidType() &&
                   (host_member_pointee_type->isBuiltinType() || host_member_pointee_type->isEnumeralType())) {
          // TODO: What are good heuristics for this?
          // size_t should yield TypeCompatibility::Repackable
          // inconsistent types should probably default to TypeCompatibility::None
          // For now, just always assume compatible... (will degrade to Repackable below)
          member_compat.push_back(TypeCompatibility::Full);
        } else if (!host_member_pointee_type->isVoidType() &&
                   (host_member_pointee_type->isStructureType() || types.contains(host_member_pointee_type))) {
          auto pointee_compat = GetTypeCompatibility(context, host_member_pointee_type, host_abi, type_compat);
          if (pointee_compat == TypeCompatibility::Full) {
            // Pointee is fully compatible, so automatic repacking only requires converting the pointers themselves
            member_compat.push_back(is_32bit ? TypeCompatibility::Repackable : TypeCompatibility::Full);
          } else {
            // If the pointee is incompatible (even if repackable), automatic repacking isn't possible
            member_compat.push_back(TypeCompatibility::None);
          }
        } else if (!is_32bit && host_member_pointee_type->isVoidType()) {
          // TODO: Not sure how to handle void here. Probably should require an annotation instead of "just working"
          member_compat.push_back(TypeCompatibility::Full);
        } else {
          member_compat.push_back(TypeCompatibility::None);
        }
        continue;
      }

      if (guest_abi.at(guest_struct_info->members[member_idx].type_name).get_if_struct()) {
        auto host_type_info = host_abi.at(host_member_type);
        member_compat.push_back(GetTypeCompatibility(context, host_member_type, host_abi, type_compat));
      } else {
        // Member was checked for size/alignment above already
      }
    }

    if (std::all_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::Full; })) {
      // TypeCompatibility::Full or ::Repackable
    } else if (std::none_of(member_compat.begin(), member_compat.end(), [](auto compat) { return compat == TypeCompatibility::None; })) {
      // Downgrade to Repackable
      compat = TypeCompatibility::Repackable;
    } else {
      // Downgrade to None
      compat = TypeCompatibility::None;
    }
  }

  type_compat.at(type) = compat;
  return compat;
}

FuncPtrInfo DataLayoutCompareAction::LookupGuestFuncPtrInfo(const char* funcptr_id) {
  return guest_abi.thunked_funcptrs.at(funcptr_id);
}

DataLayoutCompareActionFactory::DataLayoutCompareActionFactory(const ABI& abi)
  : abi(abi) {}

DataLayoutCompareActionFactory::~DataLayoutCompareActionFactory() = default;

std::unique_ptr<clang::FrontendAction> DataLayoutCompareActionFactory::create() {
  return std::make_unique<DataLayoutCompareAction>(abi);
}

AnalyzeDataLayoutActionFactory::AnalyzeDataLayoutActionFactory()
  : abi(std::make_unique<ABI>()) {}

AnalyzeDataLayoutActionFactory::~AnalyzeDataLayoutActionFactory() = default;

std::unique_ptr<clang::FrontendAction> AnalyzeDataLayoutActionFactory::create() {
  return std::make_unique<AnalyzeDataLayoutAction>(*abi);
}


================================================
FILE: ThunkLibs/Generator/data_layout.h
================================================
#pragma once

#include "analysis.h"

#include <clang/Frontend/FrontendAction.h>

#include <cstdint>
#include <optional>
#include <string>
#include <unordered_map>
#include <variant>
#include <vector>

struct SimpleTypeInfo {
  uint64_t size_bits;
  uint64_t alignment_bits;

  bool operator==(const SimpleTypeInfo& other) const {
    return size_bits == other.size_bits && alignment_bits == other.alignment_bits;
  }
};

struct StructInfo : SimpleTypeInfo {
  struct MemberInfo {
    uint64_t size_bits; // size of this member. For arrays, total size of all elements
    uint64_t offset_bits;
    std::string type_name;
    std::string member_name;
    std::optional<uint64_t> array_size;
    bool is_function_pointer;
    bool is_integral;
    bool is_signed_integer;

    bool operator==(const MemberInfo& other) const {
      return size_bits == other.size_bits && offset_bits == other.offset_bits &&
             // The type name may differ for integral types if all other parameters are equal
             (type_name == other.type_name || (is_integral && other.is_integral)) && member_name == other.member_name &&
             array_size == other.array_size && is_function_pointer == other.is_function_pointer && is_integral == other.is_integral;
    }
  };

  std::vector<MemberInfo> members;

  bool operator==(const StructInfo& other) const {
    return (const SimpleTypeInfo&)*this == (const SimpleTypeInfo&)other &&
           std::equal(members.begin(), members.end(), other.members.begin(), other.members.end());
  }
};

struct TypeInfo : std::variant<std::monostate, SimpleTypeInfo, StructInfo> {
  using Parent = std::variant<std::monostate, SimpleTypeInfo, StructInfo>;

  TypeInfo() = default;
  TypeInfo(const SimpleTypeInfo& info)
    : Parent(info) {}
  TypeInfo(const StructInfo& info)
    : Parent(info) {}

  // Opaque declaration with no full definition.
  // Pointers to these can still be passed along ABI boundaries assuming
  // implementation details are only ever accessed on one side.
  bool is_opaque() const {
    return std::holds_alternative<std::monostate>(*this);
  }

  const StructInfo* get_if_struct() const {
    return std::get_if<StructInfo>(this);
  }

  StructInfo* get_if_struct() {
    return std::get_if<StructInfo>(this);
  }

  const SimpleTypeInfo* get_if_simple_or_struct() const {
    auto as_struct = std::get_if<StructInfo>(this);
    if (as_struct) {
      return as_struct;
    }
    return std::get_if<SimpleTypeInfo>(this);
  }
};

struct FuncPtrInfo {
  std::array<uint8_t, 32> sha256;
  std::string result;
  std::vector<std::string> args;
};

struct ABI : std::unordered_map<std::string, TypeInfo> {
  std::unordered_map<std::string, FuncPtrInfo> thunked_funcptrs;
  int pointer_size; // in bytes
};

std::unordered_map<const clang::Type*, TypeInfo>
ComputeDataLayout(const clang::ASTContext& context, const std::unordered_map<const clang::Type*, AnalysisAction::RepackedType>& types);

// Convert the output of ComputeDataLayout to a format that isn't tied to a libclang session.
// As a consequence, type information is indexed by type name instead of clang::Type.
ABI GetStableLayout(const clang::ASTContext& context, const std::unordered_map<const clang::Type*, TypeInfo>& data_layout);

/**
 * Returns the type of the given name, but replaces any mentions of integer
 * types with fixed-size equivalents.
 *
 * Examples:
 * - int -> int32_t
 * - unsigned long long* -> uint64_t*
 * - MyStruct -> MyStruct (no change)
 */
std::string GetTypeNameWithFixedSizeIntegers(clang::ASTContext&, clang::QualType);

enum class TypeCompatibility {
  Full,       // Type has matching data layout across architectures
  Repackable, // Type has different data layout but can be repacked automatically
  None,       // Type has different data layout and cannot be repacked automatically
};

class DataLayoutCompareAction : public AnalysisAction {
public:
  DataLayoutCompareAction(const ABI& guest_abi)
    : guest_abi(guest_abi) {}

  TypeCompatibility GetTypeCompatibility(const clang::ASTContext&, const clang::Type*,
                                         const std::unordered_map<const clang::Type*, TypeInfo> host_abi,
                                         std::unordered_map<const clang::Type*, TypeCompatibility>& type_compat);

  FuncPtrInfo LookupGuestFuncPtrInfo(const char* funcptr_id);

protected:
  const ABI& guest_abi;
};


================================================
FILE: ThunkLibs/Generator/diagnostics.h
================================================
#pragma once

#include <clang/AST/ASTContext.h>
#include <clang/Basic/Diagnostic.h>
#include <clang/Basic/SourceLocation.h>

#include <utility>
#include <vector>

struct ClangDiagnosticAsException {
  std::pair<clang::SourceLocation, unsigned> diagnostic;

  std::vector<ClangDiagnosticAsException> notes;

  // List of callbacks that add an argument to a clang::DiagnosticBuilder
  std::vector<std::function<void(clang::DiagnosticBuilder&)>> args;

  ClangDiagnosticAsException& AddString(std::string str) {
    args.push_back([arg = std::move(str)](clang::DiagnosticBuilder& db) { db.AddString(arg); });
    return *this;
  }

  ClangDiagnosticAsException& AddTaggedVal(clang::QualType type) {
    args.push_back([val = type](clang::DiagnosticBuilder& db) {
      db.AddTaggedVal(reinterpret_cast<uintptr_t>(val.getAsOpaquePtr()), clang::DiagnosticsEngine::ak_qualtype);
    });
    return *this;
  }

  ClangDiagnosticAsException& addNote(ClangDiagnosticAsException diagnostic) {
    notes.push_back(std::move(diagnostic));
    return *this;
  }

  void Report(clang::DiagnosticsEngine& diagnostics) const {
    {
      auto builder = diagnostics.Report(diagnostic.first, diagnostic.second);
      for (auto& arg_appender : args) {
        arg_appender(builder);
      }
    }
    for (auto& note : notes) {
      note.Report(diagnostics);
    }
  }
};

// Helper class to build a custom DiagID from the given message and store it in a throwable object
struct ErrorReporter {
  clang::ASTContext& context;

  template<std::size_t N>
  [[nodiscard]]
  ClangDiagnosticAsException
  operator()(clang::SourceLocation loc, const char (&message)[N], clang::DiagnosticsEngine::Level level = clang::DiagnosticsEngine::Error) {
    auto id = context.getDiagnostics().getCustomDiagID(level, message);
    return {std::pair(loc, id)};
  }
};


================================================
FILE: ThunkLibs/Generator/gen.cpp
================================================
#include "analysis.h"
#include "data_layout.h"
#include "diagnostics.h"
#include "interface.h"
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Basic/DiagnosticOptions.h>

#include <fstream>
#include <numeric>
#include <iostream>
#include <string_view>
#include <unordered_map>
#include <variant>

#include <fmt/format.h>
#include <fmt/ostream.h>
#include <fmt/ranges.h>

#include <openssl/sha.h>

class GenerateThunkLibsAction : public DataLayoutCompareAction {
public:
  GenerateThunkLibsAction(const std::string& libname, const OutputFilenames&, const ABI& abi);

private:
  // Generate helper code for thunk libraries and write them to the output file
  void OnAnalysisComplete(clang::ASTContext&) override;

  // Emit guest_layout/host_layout wrappers for types passed across architecture boundaries
  void EmitLayoutWrappers(clang::ASTContext&, std::ofstream&, std::unordered_map<const clang::Type*, TypeCompatibility>& type_compat);

  const std::string& libfilename;
  std::string libname; // sanitized filename, usable as part of emitted function names
  const OutputFilenames& output_filenames;
};

GenerateThunkLibsAction::GenerateThunkLibsAction(const std::string& libname_, const OutputFilenames& output_filenames_, const ABI& abi)
  : DataLayoutCompareAction(abi)
  , libfilename(libname_)
  , libname(libname_)
  , output_filenames(output_filenames_) {
  for (auto& c : libname) {
    if (c == '-') {
      c = '_';
    }
  }
}

template<typename Fn>
static std::string format_function_args(const FunctionParams& params, Fn&& format_arg) {
  std::string ret;
  for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) {
    ret += std::forward<Fn>(format_arg)(idx) + ", ";
  }
  // drop trailing ", "
  ret.resize(ret.size() > 2 ? ret.size() - 2 : 0);
  return ret;
};

// Custom sort algorithm that works with partial orders.
//
// In contrast, std::sort requires that any two different elements A and B of
// the input range compare either A<B or B<A. This requirement is violated e.g.
// for dependency relations: Elements A and B might not depend on each other,
// but they both might depend on some third element C. BubbleSort then ensures
// C preceeds both A and B in the sorted range, while leaving the relative
// order of A and B undetermined. In effect when iterating over the sorted
// range, each dependency is visited before any of its dependees.
template<std::forward_iterator It>
void BubbleSort(It begin, It end, std::relation<std::iter_value_t<It>, std::iter_value_t<It>> auto compare) {
  bool fixpoint;
  do {
    fixpoint = true;
    for (auto it = begin; it != end; ++it) {
      for (auto it2 = std::next(it); it2 != end; ++it2) {
        if (compare(*it2, *it)) {
          std::swap(*it, *it2);
          fixpoint = false;
          it2 = it;
        }
      }
    }
  } while (!fixpoint);
}

// Compares such that A < B if B contains A as a member and requires A to be completely defined (i.e. non-pointer/non-reference).
// This applies recursively to structs contained by B.
struct compare_by_struct_dependency {
  clang::ASTContext& context;

  bool operator()(const std::pair<const clang::Type*, GenerateThunkLibsAction::RepackedType>& a,
                  const std::pair<const clang::Type*, GenerateThunkLibsAction::RepackedType>& b) const {
    return (*this)(a.first, b.first);
  }

  bool operator()(const clang::Type* a, const clang::Type* b) const {
    if (llvm::isa<clang::ConstantArrayType>(b)) {
      throw std::runtime_error("Cannot have \"b\" be an array");
    }

    auto* b_as_struct = b->getAsStructureType();
    if (!b_as_struct) {
      // Not a struct => no dependency
      return false;
    }

    if (a->isArrayType()) {
      throw std::runtime_error("Cannot have \"a\" be an array");
    }

    for (auto* child : b_as_struct->getDecl()->fields()) {
      auto child_type = child->getType().getTypePtr();

      if (child_type->isPointerType()) {
        // Pointers don't need the definition to be available
        continue;
      }

      // Peel off any array type layers from the member
      while (auto child_as_array = llvm::dyn_cast<clang::ConstantArrayType>(child_type)) {
        child_type = child_as_array->getArrayElementTypeNoTypeQual();
      }

      if (context.hasSameType(a, child_type)) {
        return true;
      }

      if ((*this)(a, child_type)) {
        // Child depends on A => transitive dependency
        return true;
      }
    }

    // No dependency found
    return false;
  }
};

void GenerateThunkLibsAction::EmitLayoutWrappers(clang::ASTContext& context, std::ofstream& file,
                                                 std::unordered_map<const clang::Type*, TypeCompatibility>& type_compat) {
  // Sort struct types by dependency so that repacking code is emitted in an order that compiles fine
  std::vector<std::pair<const clang::Type*, RepackedType>> types {this->types.begin(), this->types.end()};
  BubbleSort(types.begin(), types.end(), compare_by_struct_dependency {context});

  for (const auto& [type, type_repack_info] : types) {
    auto struct_name = get_type_name(context, type);

    // Opaque types don't need layout definitions
    if (type_repack_info.assumed_compatible && type_repack_info.pointers_only && struct_name != "void") {
      if (guest_abi.pointer_size != 4) {
        fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}*> = true;\n", struct_name);
      }
      continue;
    } else if (type_repack_info.assumed_compatible) {
      // TODO: Handle more cleanly
      type_compat[type] = TypeCompatibility::Full;
    }

    // These must be handled later since they are not canonicalized and hence must be de-duplicated first
    if (type->isBuiltinType()) {
      continue;
    }

    // TODO: Instead, map these names back to *some* type that's named?
    if (struct_name.starts_with("unnamed_")) {
      continue;
    }

    if (type->isEnumeralType()) {
      fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name);
      fmt::print(file, "  using type = {}int{}_t;\n", type->isUnsignedIntegerOrEnumerationType() ? "u" : "",
                 guest_abi.at(struct_name).get_if_simple_or_struct()->size_bits);
      fmt::print(file, "  type data;\n");
      fmt::print(file, "}};\n");
      continue;
    }

    if (type_compat.at(type) == TypeCompatibility::None && !type_repack_info.emit_layout_wrappers) {
      // Disallow use of layout wrappers for this type by specializing without a definition
      fmt::print(file, "template<>\nstruct guest_layout<{}>;\n", struct_name);
      fmt::print(file, "template<>\nstruct host_layout<{}>;\n", struct_name);
      fmt::print(file, "guest_layout<{}>& to_guest(const host_layout<{}>&) = delete;\n", struct_name, struct_name);
      continue;
    }

    // Guest layout definition
    // NOTE: uint64_t has lower alignment requirements on 32-bit than on 64-bit, so we require tightly packed structs
    // TODO: Now we must emit padding bytes explicitly, though!
    fmt::print(file, "template<>\nstruct __attribute__((packed)) guest_layout<{}> {{\n", struct_name);
    if (type_compat.at(type) == TypeCompatibility::Full) {
      fmt::print(file, "  using type = {};\n", struct_name);
    } else {
      fmt::print(file, "  struct type {{\n");
      for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) {
        fmt::print(file, "    guest_layout<{}{}> {};\n", member.type_name,
                   member.array_size ? fmt::format("[{}]", member.array_size.value()) : "", member.member_name);
      }
      fmt::print(file, "  }};\n");
    }
    fmt::print(file, "  type data;\n");
    fmt::print(file, "}};\n");

    fmt::print(file, "template<>\nstruct guest_layout<const {}> : guest_layout<{}> {{\n", struct_name, struct_name);
    fmt::print(file, "  guest_layout& operator=(const guest_layout<{}>& other) {{ memcpy(this, &other, sizeof(other)); return *this; }}\n",
               struct_name);
    fmt::print(file, "}};\n");

    // Host layout definition
    fmt::print(file, "template<>\n");
    fmt::print(file, "struct host_layout<{}> {{\n", struct_name);
    fmt::print(file, "  using type = {};\n", struct_name);
    fmt::print(file, "  type data;\n");
    fmt::print(file, "\n");
    // Host->guest layout conversion
    fmt::print(file, "  host_layout(const guest_layout<{}>& from) :\n", struct_name);
    if (type_compat.at(type) == TypeCompatibility::Full) {
      fmt::print(file, "    data {{ from.data }} {{\n");
    } else {
      // Conversion needs struct repacking.
      // Wrapping each member in `host_layout<>` ensures this is done recursively.
      fmt::print(file, "    data {{\n");
      auto map_field = [&file](clang::FieldDecl* member, bool skip_arrays) {
        auto decl_name = member->getNameAsString();
        auto type_name = member->getType().getAsString();
        auto array_type = llvm::dyn_cast<clang::ConstantArrayType>(member->getType());
        if (!array_type && skip_arrays) {
          if (member->getType()->isFunctionPointerType()) {
            // Function pointers must be handled manually, so zero them out by default
            fmt::print(file, "      .{} {{ }},\n", decl_name);
          } else {
            fmt::print(file, "      .{} = host_layout<{}> {{ from.data.{} }}.data,\n", decl_name, type_name, decl_name);
          }
        } else if (array_type && !skip_arrays) {
          // Copy element-wise below
          fmt::print(file, "      for (size_t i = 0; i < {}; ++i) {{\n", array_type->getSize().getZExtValue());
          fmt::print(file, "        data.{}[i] = host_layout<{}> {{ from.data.{} }}.data[i];\n", decl_name, type_name, decl_name);
          fmt::print(file, "      }}\n");
        }
      };
      // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body
      for (auto* member : type->getAsStructureType()->getDecl()->fields()) {
        if (!type_repack_info.UsesCustomRepackFor(member)) {
          map_field(member, true);
        } else {
          // Leave field uninitialized
        }
      }
      fmt::print(file, "    }} {{\n");
      for (auto* member : type->getAsStructureType()->getDecl()->fields()) {
        if (!type_repack_info.UsesCustomRepackFor(member)) {
          map_field(member, false);
        } else {
          // Leave field uninitialized
        }
      }
    }
    fmt::print(file, "  }}\n");
    fmt::print(file, "}};\n\n");

    // Guest->host layout conversion
    fmt::print(file, "inline guest_layout<{}> to_guest(const host_layout<{}>& from) {{\n", struct_name, struct_name);
    if (type_compat.at(type) == TypeCompatibility::Full) {
      fmt::print(file, "  guest_layout<{}> ret;\n", struct_name);
      fmt::print(file, "  static_assert(sizeof(from) == sizeof(ret));\n");
      fmt::print(file, "  memcpy(&ret, &from, sizeof(from));\n");
    } else {
      // Conversion needs struct repacking.
      // Wrapping each member in `to_guest(to_host_layout(...))` ensures this is done recursively.
      fmt::print(file, "  guest_layout<{}> ret {{ .data {{\n", struct_name);
      auto map_field2 = [&file](const StructInfo::MemberInfo& member, bool skip_arrays) {
        auto& decl_name = member.member_name;
        auto& array_size = member.array_size;
        if (!array_size && skip_arrays) {
          if (member.is_function_pointer) {
            // Function pointers must be handled manually, so zero them out by default
            fmt::print(file, "    .{} {{ }},\n", decl_name);
          } else {
            fmt::print(file, "    .{} = to_guest(to_host_layout(from.data.{})),\n", decl_name, decl_name);
          }
        } else if (array_size && !skip_arrays) {
          // Copy element-wise below
          fmt::print(file, "    for (size_t i = 0; i < {}; ++i) {{\n", array_size.value());
          fmt::print(file, "      ret.data.{}.data[i] = to_guest(to_host_layout(from.data.{}[i]));\n", decl_name, decl_name);
          fmt::print(file, "    }}\n");
        }
      };

      // Prefer initialization via the constructor's initializer list if possible (to detect unintended narrowing), otherwise initialize in the body
      for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) {
        if (!type_repack_info.UsesCustomRepackFor(member.member_name)) {
          map_field2(member, true);
        } else {
          // Leave field uninitialized
        }
      }
      fmt::print(file, "  }} }};\n");
      for (auto& member : guest_abi.at(struct_name).get_if_struct()->members) {
        if (!type_repack_info.UsesCustomRepackFor(member.member_name)) {
          map_field2(member, false);
        } else {
          // Leave field uninitialized
        }
      }
    }
    fmt::print(file, "  return ret;\n");
    fmt::print(file, "}}\n\n");

    // Forward-declare user-provided repacking functions
    if (type_repack_info.custom_repacked_members.empty()) {
      fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name);
      fmt::print(file, "}}\n");
      fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, const host_layout<{}>& from) {{\n", struct_name, struct_name);
      fmt::print(file, "  return false;\n");
      fmt::print(file, "}}\n");
    } else {
      fmt::print(file, "void fex_custom_repack_entry(host_layout<{}>& into, const guest_layout<{}>& from);\n", struct_name, struct_name);
      fmt::print(file, "bool fex_custom_repack_exit(guest_layout<{}>& into, const host_layout<{}>& from);\n\n", struct_name, struct_name);

      fmt::print(file, "void fex_apply_custom_repacking_entry(host_layout<{}>& source, const guest_layout<{}>& from) {{\n", struct_name, struct_name);
      fmt::print(file, "  fex_custom_repack_entry(source, from);\n");
      fmt::print(file, "}}\n");

      fmt::print(file, "bool fex_apply_custom_repacking_exit(guest_layout<{}>& into, const host_layout<{}>& from) {{\n", struct_name, struct_name);
      fmt::print(file, "  return fex_custom_repack_exit(into, from);\n");
      fmt::print(file, "}}\n");
    }

    fmt::print(file, "template<> inline constexpr bool has_compatible_data_layout<{}> = {};\n", struct_name,
               (type_compat.at(type) == TypeCompatibility::Full));
  }
}

void GenerateThunkLibsAction::OnAnalysisComplete(clang::ASTContext& context) {
  ErrorReporter report_error {context};

  // Compute data layout differences between host and guest
  auto type_compat = [&]() {
    std::unordered_map<const clang::Type*, TypeCompatibility> ret;
    const auto host_abi = ComputeDataLayout(context, types);
    for (const auto& [type, type_repack_info] : types) {
      if (type_repack_info.emit_layout_wrappers) {
        // Assume incompatible, since this annotation is set when
        // compatibility checks would otherwise fail (e.g. due to
        // circular references)
        ret.emplace(type, TypeCompatibility::None);
      } else if (!type_repack_info.pointers_only) {
        GetTypeCompatibility(context, type, host_abi, ret);
      }
    }
    return ret;
  }();

  static auto format_decl = [](clang::QualType type, const std::string_view& name) {
    clang::QualType innermostPointee = type;
    while (innermostPointee->isPointerType()) {
      innermostPointee = innermostPointee->getPointeeType();
    }
    if (innermostPointee->isFunctionType()) {
      // Function pointer declarations (e.g. void (**callback)()) require
      // the variable name to be prefixed *and* suffixed.

      auto signature = type.getAsString();

      // Search for strings like (*), (**), or (*****). Insert the
      // variable name before the closing parenthesis
      auto needle = signature.begin();
      for (; needle != signature.end(); ++needle) {
        if (signature.end() - needle < 3 || std::string_view {&*needle, 2} != "(*") {
          continue;
        }
        while (*++needle == '*') {}
        if (*needle == ')') {
          break;
        }
      }
      if (needle == signature.end()) {
        // It's *probably* a typedef, so this should be safe after all
        return fmt::format("{} {}", signature, name);
      } else {
        signature.insert(needle, name.begin(), name.end());
        return signature;
      }
    } else {
      return type.getAsString() + " " + std::string(name);
    }
  };

  auto format_function_params = [](const FunctionParams& params) {
    std::string ret;
    for (std::size_t idx = 0; idx < params.param_types.size(); ++idx) {
      auto& type = params.param_types[idx];
      ret += format_decl(type, fmt::format("a_{}", idx)) + ", ";
    }
    // drop trailing ", "
    ret.resize(ret.size() > 2 ? ret.size() - 2 : 0);
    return ret;
  };

  auto get_sha256 = [this](const std::string& function_name, bool include_libname) {
    std::string sha256_message = (include_libname ? libname + ":" : "") + function_name;
    std::vector<unsigned char> sha256(SHA256_DIGEST_LENGTH);
    SHA256(reinterpret_cast<const unsigned char*>(sha256_message.data()), sha256_message.size(), sha256.data());
    return sha256;
  };

  auto get_callback_name = [](std::string_view function_name, unsigned param_index) -> std::string {
    return fmt::format("{}CBFN{}", function_name, param_index);
  };

  // Files used guest-side
  if (!output_filenames.guest.empty()) {
    std::ofstream file(output_filenames.guest);

    // Guest->Host transition points for API functions
    file << "extern \"C\" {\n";
    for (auto& thunk : thunks) {
      const auto& function_name = thunk.function_name;
      auto sha256 = get_sha256(function_name, true);
      fmt::print(file, "MAKE_THUNK({}, {}, \"{:#02x}\")\n", libname, function_name, fmt::join(sha256, ", "));
    }
    file << "}\n";

    // Guest->Host transition points for invoking runtime host-function pointers based on their signature
    std::vector<std::vector<unsigned char>> sha256s;
    for (auto type_it = thunked_funcptrs.begin(); type_it != thunked_funcptrs.end(); ++type_it) {
      auto* type = type_it->second.first;
      std::string funcptr_signature = clang::QualType {type, 0}.getAsString();

      auto cb_sha256 = get_sha256("fexcallback_" + funcptr_signature, false);
      auto it = std::find(sha256s.begin(), sha256s.end(), cb_sha256);
      if (it != sha256s.end()) {
        // TODO: Avoid this ugly way of avoiding duplicates
        continue;
      } else {
        sha256s.push_back(cb_sha256);
      }

      // Thunk used for guest-side calls to host function pointers
      file << "  // " << funcptr_signature << "\n";
      auto funcptr_idx = std::distance(thunked_funcptrs.begin(), type_it);
      fmt::print(file, "  MAKE_CALLBACK_THUNK(callback_{}, {}, \"{:#02x}\");\n", funcptr_idx, funcptr_signature, fmt::join(cb_sha256, ", "));
    }

    // Thunks-internal packing functions
    file << "extern \"C\" {\n";
    for (auto& data : thunks) {
      const auto& function_name = data.function_name;
      bool is_void = data.return_type->isVoidType();
      file << "FEX_PACKFN_LINKAGE auto fexfn_pack_" << function_name << "(";
      for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
        auto& type = data.param_types[idx];
        file << (idx == 0 ? "" : ", ") << format_decl(type, fmt::format("a_{}", idx));
      }
      // Using trailing return type as it makes handling function pointer returns much easier
      file << ") -> " << data.return_type.getAsString() << " {\n";
      file << "  struct __attribute__((packed)) {\n";
      for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
        auto& type = data.param_types[idx];
        file << "    " << format_decl(type.getUnqualifiedType(), fmt::format("a_{}", idx)) << ";\n";
      }
      if (!is_void) {
        file << "    " << format_decl(data.return_type, "rv") << ";\n";
      } else if (data.param_types.size() == 0) {
        // Avoid "empty struct has size 0 in C, size 1 in C++" warning
        file << "    char force_nonempty;\n";
      }
      file << "  } args;\n";

      for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
        auto cb = data.callbacks.find(idx);

        file << "  args.a_" << idx << " = ";
        if (cb == data.callbacks.end() || cb->second.is_stub) {
          file << "a_" << idx << ";\n";
        } else {
          // Before passing guest function pointers to the host, wrap them in a host-callable trampoline
          fmt::print(file, "AllocateHostTrampolineForGuestFunction(a_{});\n", idx);
        }
      }
      file << "  fexthunks_" << libname << "_" << function_name << "(&args);\n";
      if (!is_void) {
        file << "  return args.rv;\n";
      }
      file << "}\n";
    }
    file << "}\n";

    // Publicly exports equivalent to symbols exported from the native guest library
    file << "extern \"C\" {\n";
    for (auto& data : thunked_api) {
      if (data.custom_guest_impl) {
        continue;
      }

      const auto& function_name = data.function_name;

      file << "__attribute__((alias(\"fexfn_pack_" << function_name << "\"))) auto " << function_name << "(";
      for (std::size_t idx = 0; idx < data.param_types.size(); ++idx) {
        auto& type = data.param_types[idx];
        file << (idx == 0 ? "" : ", ") << format_decl(type, "a_" + std::to_string(idx));
      }
      file << ") -> " << data.return_type.getAsString() << ";\n";
    }
    file << "}\n";

    // Symbol enumerators
    for (std::size_t namespace_idx = 0; namespace_idx < namespaces.size(); ++namespace_idx) {
      const auto& ns = namespaces[namespace_idx];
      file << "#define FOREACH_" << ns.name << (ns.name.empty() ? "" : "_") << "SYMBOL(EXPAND) \\\n";
      for (auto& symbol : thunked_api) {
        if (symbol.symtable_namespace.value_or(0) == namespace_idx) {
          file << "  EXPAND(" << symbol.function_name << ", \"TODO\") \\\n";
        }
      }
      file << "\n";
    }
  }

  // Files used host-side
  if (!output_filenames.host.empty()) {
    std::ofstream file(output_filenames.host);

    EmitLayoutWrappers(context, file, type_compat);

    // Forward declarations for symbols loaded from the native host library
    for (auto& import : thunked_api) {
      const auto& function_name = import.function_name;
      const char* variadic_ellipsis = import.is_variadic ? ", ..." : "";
      file << "using fexldr_type_" << libname << "_" << function_name << " = auto (" << format_function_params(import) << variadic_ellipsis
           << ") -> " << import.return_type.getAsString() << ";\n";
      file << "static fexldr_type_" << libname << "_" << function_name << " *fexldr_ptr_" << libname << "_" << function_name << ";\n";
    }

    file << "extern \"C\" {\n";
    for (auto& thunk : thunks) {
      const auto& function_name = thunk.function_name;

      // Generate stub callbacks
      for (auto& [cb_idx, cb] : thunk.callbacks) {
        if (cb.is_stub) {
          const char* variadic_ellipsis = cb.is_variadic ? ", ..." : "";
          auto cb_function_name = "fexfn_unpack_" + get_callback_name(function_name, cb_idx) + "_stub";
          file << "[[noreturn]] static " << cb.return_type.getAsString() << " " << cb_function_name << "(" << format_function_params(cb)
               << variadic_ellipsis << ") {\n";
          file << "  fprintf(stderr, \"FATAL: Attempted to invoke callback stub for " << function_name << "\\n\");\n";
          file << "  std::abort();\n";
          file << "}\n";
        }
      }

      auto get_guest_type_name = [this](clang::QualType type) {
        if (type->isBuiltinType() && type->isIntegerType()) {
          auto size = guest_abi.at(type.getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits;
          return get_fixed_size_int_name(type.getTypePtr(), size);
        } else if (type->isPointerType() && type->getPointeeType()->isBuiltinType() && type->getPointeeType()->isIntegerType() &&
                   !type->getPointeeType()->isVoidType()) {
          auto size = guest_abi.at(type->getPointeeType().getUnqualifiedType().getAsString()).get_if_simple_or_struct()->size_bits;
          return fmt::format("{}{}*", type->getPointeeType().isConstQualified() ? "const " : "",
                             get_fixed_size_int_name(type->getPointeeType().getTypePtr(), size));
        } else {
          return type.getUnqualifiedType().getAsString();
        }
      };

      // Forward declarations for user-provided implementations
      if (thunk.custom_host_impl) {
        file << "static auto fexfn_impl_" << libname << "_" << function_name << "(";
        for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) {
          auto& type = thunk.param_types[idx];

          file << (idx == 0 ? "" : ", ");

          if (thunk.param_annotations[idx].is_passthrough) {
            fmt::print(file, "guest_layout<{}> a_{}", get_guest_type_name(type), idx);
          } else {
            fmt::print(file, "{}", format_decl(type, fmt::format("a_{}", idx)));
          }
        }
        // Using trailing return type as it makes handling function pointer returns much easier
        bool is_passthrough_ret = thunk.param_annotations[-1].is_passthrough;
        fmt::print(file, ") -> {}{}{};\n", is_passthrough_ret ? "guest_layout<" : "", thunk.return_type.getAsString(),
                   is_passthrough_ret ? ">" : "");
      }

      // Check data layout compatibility of parameter types
      // TODO: Also check non-struct/non-pointer types
      // TODO: Also check return type
      for (size_t param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) {
        const auto& param_type = thunk.param_types[param_idx];
        if (!param_type->isPointerType() || !param_type->getPointeeType()->isStructureType()) {
          continue;
        }
        if (!thunk.param_annotations[param_idx].is_passthrough) {
          auto type = param_type->getPointeeType();
          if (!types.at(context.getCanonicalType(type.getTypePtr())).assumed_compatible &&
              type_compat.at(context.getCanonicalType(type.getTypePtr())) == TypeCompatibility::None) {
            // TODO: Factor in "assume_compatible_layout" annotations here
            //       That annotation should cause the type to be treated as TypeCompatibility::Full
            throw report_error(thunk.decl->getLocation(), "Unsupported parameter type %0").AddTaggedVal(param_type);
          }
        }
      }

      // Packed argument structs used in fexfn_unpack_*
      auto GeneratePackedArgs = [&](const auto& function_name, const ThunkedFunction& thunk) -> std::string {
        std::string struct_name = "fexfn_packed_args_" + libname + "_" + function_name;
        file << "struct __attribute__((packed)) " << struct_name << " {\n";

        for (std::size_t idx = 0; idx < thunk.param_types.size(); ++idx) {
          fmt::print(file, "  guest_layout<{}> a_{};\n", get_guest_type_name(thunk.param_types[idx]), idx);
        }
        if (!thunk.return_type->isVoidType()) {
          fmt::print(file, "  guest_layout<{}> rv;\n", get_guest_type_name(thunk.return_type));
        } else if (thunk.param_types.size() == 0) {
          // Avoid "empty struct has size 0 in C, size 1 in C++" warning
          file << "    char force_nonempty;\n";
        }
        file << "};\n";
        return struct_name;
      };
      auto struct_name = GeneratePackedArgs(function_name, thunk);

      // Unpacking functions
      auto function_to_call = "fexldr_ptr_" + libname + "_" + function_name;
      if (thunk.custom_host_impl) {
        function_to_call = "fexfn_impl_" + libname + "_" + function_name;
      }

      auto get_type_name_with_nonconst_pointee = [&](clang::QualType type) {
        type = type.getLocalUnqualifiedType();
        if (type->isPointerType()) {
          // Strip away "const" from pointee type
          type = context.getPointerType(type->getPointeeType().getLocalUnqualifiedType());
        }
        return get_type_name(context, type.getTypePtr());
      };


      file << "static void fexfn_unpack_" << libname << "_" << function_name << "(" << struct_name << "* args) {\n";

      for (unsigned param_idx = 0; param_idx != thunk.param_types.size(); ++param_idx) {
        if (thunk.callbacks.contains(param_idx) && thunk.callbacks.at(param_idx).is_stub) {
          continue;
        }

        auto& param_type = thunk.param_types[param_idx];
        const bool is_assumed_compatible =
          param_type->isPointerType() &&
          (thunk.param_annotations[param_idx].assume_compatible ||
           ((param_type->getPointeeType()->isStructureType() ||
             (param_type->getPointeeType()->isPointerType() && param_type->getPointeeType()->getPointeeType()->isStructureType())) &&
            (types.contains(context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr())) &&
             LookupType(context, context.getCanonicalType(param_type->getPointeeType()->getLocallyUnqualifiedSingleStepDesugaredType().getTypePtr()))
               .assumed_compatible)));

        std::optional<TypeCompatibility> pointee_compat;
        if (param_type->isPointerType()) {
          // Get TypeCompatibility from existing entry, or register TypeCompatibility::None if no entry exists
          // TODO: Currently needs TypeCompatibility::Full workaround...
          pointee_compat =
            type_compat.emplace(context.getCanonicalType(param_type->getPointeeType().getTypePtr()), TypeCompatibility::Full).first->second;
        }

        if (thunk.param_annotations[param_idx].is_passthrough) {
          // args are passed directly to function, no need to use `unpacked` wrappers
          continue;
        }

        // Layout repacking happens here
        if (!param_type->isPointerType() || (is_assumed_compatible || pointee_compat == TypeCompatibility::Full) ||
            param_type->getPointeeType()->isBuiltinType() /* TODO: handle size_t. Actually, properly check for data layout compatibility */) {
          // Fully compatible
          fmt::print(file, "  host_layout<{}> a_{} {{ args->a_{} }};\n", get_type_name(context, param_type.getTypePtr()), param_idx, param_idx);
        } else if (pointee_compat == TypeCompatibility::Repackable) {
          // TODO: Require opt-in for this to be emitted since it's single-element only; otherwise, pointers-to-arrays arguments will cause stack trampling
          fmt::print(file, "  auto a_{} = make_repack_wrapper<{}>(args->a_{});\n", param_idx,
                     get_type_name_with_nonconst_pointee(param_type), param_idx);
        } else {
          throw report_error(thunk.decl->getLocation(), "Cannot generate unpacking function for function %0 with unannotated pointer "
                                                        "parameter %1")
            .AddString(function_name)
            .AddTaggedVal(param_type);
        }
      }

      if (!thunk.return_type->isVoidType()) {
        fmt::print(file, "  args->rv = ");
        if (!thunk.return_type->isFunctionPointerType() && !thunk.param_annotations[-1].is_passthrough) {
          fmt::print(file, "to_guest(to_host_layout<{}>(", thunk.return_type.getAsString());
        }
      }
      fmt::print(file, "{}(", function_to_call);
      {
        auto format_param = [&](std::size_t idx) {
          auto cb = thunk.callbacks.find(idx);
          if (cb != thunk.callbacks.end() && cb->second.is_stub) {
            return "fexfn_unpack_" + get_callback_name(function_name, cb->first) + "_stub";
          } else if (cb != thunk.callbacks.end()) {
            auto arg_name = fmt::format("args->a_{}", idx); // Use parameter directly
            // Use comma operator to inject a function call before returning the argument
            // TODO: Avoid casting away the guest_layout
            if (thunk.custom_host_impl) {
              return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), {})", arg_name, arg_name);
            } else {
              return fmt::format("(FinalizeHostTrampolineForGuestFunction({}), ({})(uint64_t {{ {}.data }}))", arg_name,
                                 get_type_name(context, thunk.param_types[idx].getTypePtr()), arg_name);
            }
          } else if (thunk.param_annotations[idx].is_passthrough) {
            // Pass raw guest_layout<T*>
            return fmt::format("args->a_{}", idx);
          } else {
            // Unwrap host_layout/repack_wrapper layer
            return fmt::format("unwrap_host(a_{})", idx);
          }
        };

        fmt::print(file, "{}", format_function_args(thunk, format_param));
      }
      if (!thunk.return_type->isVoidType() && !thunk.return_type->isFunctionPointerType() && !thunk.param_annotations[-1].is_passthrough) {
        fmt::print(file, "))");
      }
      fmt::print(file, ");\n");

      file << "}\n";
    }
    file << "}\n";

    // Endpoints for Guest->Host invocation of API functions
    file << "static ExportEntry exports[] = {\n";
    for (auto& thunk : thunks) {
      const auto& function_name = thunk.function_name;
      auto sha256 = get_sha256(function_name, true);
      fmt::print(file, "  {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&fexfn_unpack_{}_{}}}, // {}:{}\n", fmt::join(sha256, "\\x"), libname,
                 function_name, libname, function_name);
    }

    // Endpoints for Guest->Host invocation of runtime host-function pointers
    // NOTE: The function parameters may differ slightly between guest and host,
    //       e.g. due to differing sizes or due to data layout differences.
    //       Hence, two separate parameter lists are managed here.
    for (auto& host_funcptr_entry : thunked_funcptrs) {
      auto& [type, param_annotations] = host_funcptr_entry.second;
      auto func_type = type->getAs<clang::FunctionProtoType>();
      FuncPtrInfo info = {};

      // TODO: Use GetTypeNameWithFixedSizeIntegers
      info.result = func_type->getReturnType().getAsString();

      // NOTE: In guest contexts, integer types must be mapped to
      //       fixed-size equivalents. Since this is a host context, this
      //       isn't strictly necessary here, but it makes matching up
      //       guest_layout/host_layout constructors easier.
      for (auto arg : func_type->getParamTypes()) {
        info.args.push_back(GetTypeNameWithFixedSizeIntegers(context, arg));
      }

      std::string annotations;
      for (int param_idx = -1; param_idx < (int)info.args.size(); ++param_idx) {
        if (param_idx != -1) {
          annotations += ", ";
        }

        annotations += "ParameterAnnotations {";
        if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).is_passthrough) {
          annotations += ".is_passthrough=true,";
        }
        if (param_annotations.contains(param_idx) && param_annotations.at(param_idx).assume_compatible) {
          annotations += ".assume_compatible=true,";
        }
        annotations += "}";
      }
      auto guest_info = LookupGuestFuncPtrInfo(host_funcptr_entry.first.c_str());
      // TODO: Consider differences in guest/host return types
      fmt::print(file, "  {{(uint8_t*)\"\\x{:02x}\", (void(*)(void *))&GuestWrapperForHostFunction<{}({}){}{}>::Call<{}>}}, // {}\n",
                 fmt::join(guest_info.sha256, "\\x"), guest_info.result, fmt::join(info.args, ", "), guest_info.args.empty() ? "" : ", ",
                 fmt::join(guest_info.args, ", "), annotations, host_funcptr_entry.first);
    }

    file << "  { nullptr, nullptr }\n";
    file << "};\n";

    // Symbol lookup from native host library
    file << "static void* fexldr_ptr_" << libname << "_so;\n";
    file << "extern \"C\" bool fexldr_init_" << libname << "() {\n";

    std::string version_suffix;
    if (lib_version) {
      version_suffix = '.' + std::to_string(*lib_version);
    }
    const std::string library_filename = libfilename + ".so" + version_suffix;

    // Load the host library in the global symbol namespace.
    // This follows how these libraries get loaded in a non-emulated environment,
    // Either by directly linking to the library or a loader (In OpenGL or Vulkan) putting everything in the global namespace.
    file << "  fexldr_ptr_" << libname << "_so = dlopen(\"" << library_filename << "\", RTLD_GLOBAL | RTLD_LAZY);\n";

    file << "  if (!fexldr_ptr_" << libname << "_so) { return false; }\n\n";
    for (auto& import : thunked_api) {
      fmt::print(file, "  (void*&)fexldr_ptr_{}_{} = {}(fexldr_ptr_{}_so, \"{}\");\n", libname, import.function_name, import.host_loader,
                 libname, import.function_name);
    }
    file << "  return true;\n";
    file << "}\n";
  }
}

bool GenerateThunkLibsActionFactory::runInvocation(std::shared_ptr<clang::CompilerInvocation> Invocation, clang::FileManager* Files,
                                                   std::shared_ptr<clang::PCHContainerOperations> PCHContainerOps,
                                                   clang::DiagnosticConsumer* DiagConsumer) {
#if LLVM_VERSION_MAJOR >= 21
  clang::CompilerInstance Compiler(std::move(Invocation), std::move(PCHContainerOps));
#else
  clang::CompilerInstance Compiler(std::move(PCHContainerOps));
  Compiler.setInvocation(std::move(Invocation));
#endif
  Compiler.setFileManager(Files);

  GenerateThunkLibsAction Action(libname, output_filenames, abi);

#if LLVM_VERSION_MAJOR >= 22
  auto Diags = clang::CompilerInstance::createDiagnostics(Compiler.getVirtualFileSystem(), Compiler.getDiagnosticOpts(), DiagConsumer, false);
  Compiler.setDiagnostics(std::move(Diags));
#elif LLVM_VERSION_MAJOR >= 20
  Compiler.createDiagnostics(Compiler.getVirtualFileSystem(), DiagConsumer, false);
#else
  Compiler.createDiagnostics(DiagConsumer, false);
#endif
  if (!Compiler.hasDiagnostics()) {
    return false;
  }

#if LLVM_VERSION_MAJOR >= 22
  Compiler.createSourceManager();
#else
  Compiler.createSourceManager(*Files);
#endif

  const bool Success = Compiler.ExecuteAction(Action);

  Files->clearStatCache();
  return Success;
}


================================================
FILE: ThunkLibs/Generator/interface.h
================================================
#include <clang/Tooling/Tooling.h>

#include <optional>
#include <string>

struct OutputFilenames {
  std::string host;
  std::string guest;
};

class AnalyzeDataLayoutActionFactory : public clang::tooling::FrontendActionFactory {
  std::unique_ptr<struct ABI> abi;

public:
  AnalyzeDataLayoutActionFactory();
  ~AnalyzeDataLayoutActionFactory();

  std::unique_ptr<clang::FrontendAction> create() override;

  const ABI& GetDataLayout() {
    return *abi;
  }

  std::unique_ptr<ABI> TakeDataLayout() {
    return std::move(abi);
  }
};

class DataLayoutCompareActionFactory : public clang::tooling::FrontendActionFactory {
  const ABI& abi;

public:
  DataLayoutCompareActionFactory(const ABI&);
  ~DataLayoutCompareActionFactory();

  std::unique_ptr<clang::FrontendAction> create() override;
};

class GenerateThunkLibsActionFactory : public clang::tooling::ToolAction {
public:
  GenerateThunkLibsActionFactory(std::string_view libname_, OutputFilenames output_filenames_, const ABI& abi_)
    : libname(std::move(libname_))
    , output_filenames(std::move(output_filenames_))
    , abi(abi_) {}

  bool runInvocation(std::shared_ptr<clang::CompilerInvocation> Invocation, clang::FileManager* Files,
                     std::shared_ptr<clang::PCHContainerOperations> PCHContainerOps, clang::DiagnosticConsumer* DiagConsumer) override;

private:
  std::string libname;
  OutputFilenames output_filenames;
  const ABI& abi;
};


================================================
FILE: ThunkLibs/Generator/main.cpp
================================================
#include "clang/Tooling/Tooling.h"
#include "clang/Tooling/CompilationDatabase.h"

#include "llvm/Support/Signals.h"

#include <iostream>
#include <optional>
#include <string>

#include "interface.h"

using namespace clang::tooling;

void print_usage(const char* program_name) {
  std::cerr << "Usage: " << program_name << " <filename> <libname> <gen_target> <output_filename> -- <clang_flags>\n";
}

int main(int argc, char* const argv[]) {
  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);

  if (argc < 6) {
    print_usage(argv[0]);
    return EXIT_FAILURE;
  }

  // Parse compile flags after "--" (this updates argc to the index of the "--" separator)
  std::string error;
  auto compile_db = FixedCompilationDatabase::loadFromCommandLine(argc, argv, error);
  if (!compile_db) {
    print_usage(argv[0]);
    std::cerr << "\nError: " << error << "\n";
    return EXIT_FAILURE;
  }

  // Process arguments before the "--" separator
  if (argc != 6 && argc != 7) {
    print_usage(argv[0]);
    return EXIT_FAILURE;
  }

  char* const* arg = argv + 1;
  const auto filename = *arg++;
  const std::string libname = *arg++;
  const std::string target_abi = *arg++;
  const std::string output_filename = *arg++;
  const std::string x86_rootfs = *arg++;

  OutputFilenames output_filenames;
  if (target_abi == "-host") {
    output_filenames.host = std::move(output_filename);
  } else if (target_abi == "-guest") {
    output_filenames.guest = std::move(output_filename);
  } else {
    std::cerr << "Unrecognized generator target ABI \"" << target_abi << "\"\n";
    return EXIT_FAILURE;
  }

  ClangTool Tool(*compile_db, {filename});
  if (CLANG_RESOURCE_DIR[0] != 0) {
    auto set_resource_directory = [](const clang::tooling::CommandLineArguments& Args, clang::StringRef) {
      clang::tooling::CommandLineArguments AdjustedArgs = Args;
      AdjustedArgs.push_back(std::string {"-resource-dir="} + CLANG_RESOURCE_DIR);
      return AdjustedArgs;
    };
    Tool.appendArgumentsAdjuster(set_resource_directory);
  }

  ClangTool GuestTool = Tool;

  auto append_x86_rootfs_includes = [&x86_rootfs](clang::tooling::CommandLineArguments& Args, const char* triple) {
    if (x86_rootfs == "/") {
      return;
    }

    Args.push_back("--sysroot");
    Args.push_back(x86_rootfs);

    // The dev rootfs is only really needed for the standard library.
    // Other libraries generally don't have platform specific headers.
    Args.push_back("-idirafter");
    Args.push_back("/usr/include/");
  };

  // Analyse data layout for guest ABI
  const bool is_32bit_guest = (argv[6] == std::string_view {"-for-32bit-guest"});
  GuestTool.appendArgumentsAdjuster([&](const clang::tooling::CommandLineArguments& Args, clang::StringRef) {
    clang::tooling::CommandLineArguments AdjustedArgs = Args;
    const char* platform = is_32bit_guest ? "i686-linux-gnu" : "x86_64-linux-gnu";
    if (is_32bit_guest) {
      AdjustedArgs.push_back("-m32");
      AdjustedArgs.push_back("-DIS_32BIT_THUNK");
    }
    AdjustedArgs.push_back("-DGUEST_THUNK_LIBRARY");
    AdjustedArgs.push_back(std::string {"--target="} + platform);
    AdjustedArgs.push_back("-isystem");
    AdjustedArgs.push_back(std::string {"/usr/"} + platform + "/include/");

    append_x86_rootfs_includes(AdjustedArgs, platform);

    return AdjustedArgs;
  });
  auto data_layout_analysis_factory = std::make_unique<AnalyzeDataLayoutActionFactory>();
  GuestTool.run(data_layout_analysis_factory.get());
  auto& data_layout = data_layout_analysis_factory->GetDataLayout();

  // Run generator for target ABI
  Tool.appendArgumentsAdjuster([&](const clang::tooling::CommandLineArguments& Args, clang::StringRef) {
    clang::tooling::CommandLineArguments AdjustedArgs = Args;
    AdjustedArgs.push_back("-DIS_HOST_THUNKGEN_PASS");
    if (target_abi == "-guest") {
      const char* platform = is_32bit_guest ? "i686-linux-gnu" : "x86_64-linux-gnu";
      append_x86_rootfs_includes(AdjustedArgs, platform);
    }

    return AdjustedArgs;
  });
  return Tool.run(std::make_unique<GenerateThunkLibsActionFactory>(std::move(libname), std::move(output_filenames), data_layout).get());
}


================================================
FILE: ThunkLibs/GuestLibs/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(guest-thunks)
include(${FEX_PROJECT_SOURCE_DIR}/Data/CMake/version_to_variables.cmake)

option(ENABLE_CLANG_THUNKS "Enable building thunks with clang" FALSE)

if (ENABLE_CLANG_THUNKS)
  set(LD_OVERRIDE "-fuse-ld=lld")
  add_link_options(${LD_OVERRIDE})
endif()

if (NOT X86_DEV_ROOTFS)
  message(FATAL_ERROR "X86_DEV_ROOTFS must be set(use \"/\" to ignore)")
endif()

find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
  message(STATUS "CCache enabled for guest thunks")
  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
endif()

if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
  # We've been included using ExternalProject_add, so set up the actual thunk libraries to be cross-compiled
  set(CMAKE_CXX_STANDARD 20)

  # This gets passed in from the main cmake project
  set(DATA_DIRECTORY "" CACHE PATH "Global data directory (override)")
  if (NOT DATA_DIRECTORY)
    set(DATA_DIRECTORY "${CMAKE_INSTALL_PREFIX}/share/fex-emu")
  endif()

  set(TARGET_TYPE SHARED)
  set(GENERATE_GUEST_INSTALL_TARGETS TRUE)

  # uninstall target
  if(NOT TARGET uninstall)
    configure_file("${FEX_PROJECT_SOURCE_DIR}/Data/CMake/cmake_uninstall.cmake.in"
      "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cmake_uninstall.cmake"
      IMMEDIATE @ONLY)

    add_custom_target(uninstall
      COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cmake_uninstall.cmake)
  endif()
else()
  # We've been included using add_subdirectory, so set up targets for IDE integration using the host toolchain
  set(GENERATOR_EXE thunkgen)
  set(TARGET_TYPE OBJECT)
  set(GENERATE_GUEST_INSTALL_TARGETS FALSE)
  set(BITNESS 64)
endif()

# Syntax: generate(libxyz libxyz-interface.cpp)
# This defines a target and a custom command:
# - custom command: Main build step that runs the thunk generator on the given interface definition
# - libxyz-guest-deps: Interface target to read include directories from which are passed to libclang when parsing the interface definition
function(generate NAME SOURCE_FILE)
  # Interface target for the user to add include directories
  add_library(${NAME}-guest-deps INTERFACE)
  target_include_directories(${NAME}-guest-deps INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/../include")
  if (BITNESS EQUAL 32)
    target_compile_definitions(${NAME}-guest-deps INTERFACE IS_32BIT_THUNK)
  endif()
  # Shorthand for the include directories added after calling this function.
  # This is not evaluated directly, hence directories added after return are still picked up
  set(prop "$<TARGET_PROPERTY:${NAME}-guest-deps,INTERFACE_INCLUDE_DIRECTORIES>")
  set(compile_prop "$<TARGET_PROPERTY:${NAME}-guest-deps,INTERFACE_COMPILE_DEFINITIONS>")

  # Run thunk generator for each of the given output files
  set(OUTFOLDER "${CMAKE_CURRENT_BINARY_DIR}/gen")
  set(OUTFILE "${OUTFOLDER}/thunkgen_guest_${NAME}.inl")

  file(MAKE_DIRECTORY "${OUTFOLDER}")

  if (BITNESS EQUAL 32)
    set(BITNESS_FLAGS "-for-32bit-guest")
    set(BITNESS_FLAGS2 "-m32" "--target=i686-linux-gnu" "-isystem" "/usr/i686-linux-gnu/include/")
  else()
    set(BITNESS_FLAGS "")
    set(BITNESS_FLAGS2 "--target=x86_64-linux-gnu" "-isystem" "/usr/x86_64-linux-gnu/include/")
  endif()

  add_custom_command(
    OUTPUT "${OUTFILE}"
    DEPENDS "${GENERATOR_EXE}"
    DEPENDS "${SOURCE_FILE}"
    COMMAND "${GENERATOR_EXE}" "${SOURCE_FILE}" "${NAME}" "-guest" "${OUTFILE}" "${X86_DEV_ROOTFS}" ${BITNESS_FLAGS} -- -std=c++20 ${BITNESS_FLAGS2}
      # Expand compile definitions to space-separated list of -D parameters
      "$<$<BOOL:${compile_prop}>:;-D$<JOIN:${compile_prop},;-D>>"
      # Expand include directories to space-separated list of -isystem parameters
      "$<$<BOOL:${prop}>:;-isystem$<JOIN:${prop},;-isystem>>"
    VERBATIM
    COMMAND_EXPAND_LISTS)

  list(APPEND OUTPUTS "${OUTFILE}")
  set(GEN_${NAME} ${OUTPUTS} PARENT_SCOPE)
endfunction()

function(add_guest_lib NAME SONAME)
  set(SOURCE_FILE ../lib${NAME}/lib${NAME}_Guest.cpp)
  get_filename_component(SOURCE_FILE_ABS "${SOURCE_FILE}" ABSOLUTE)

  set(SOURCE_LDS_FILE ../lib${NAME}/lib${NAME}_Guest.lds)
  get_filename_component(SOURCE_LDS_FILE_ABS "${SOURCE_LDS_FILE}" ABSOLUTE)

  set(SOURCE_LDS_32_FILE ../lib${NAME}/lib${NAME}_Guest_32.lds)
  get_filename_component(SOURCE_LDS_32_FILE_ABS "${SOURCE_LDS_32_FILE}" ABSOLUTE)

  if (NOT EXISTS "${SOURCE_FILE_ABS}")
    set(SOURCE_FILE ../lib${NAME}/Guest.cpp)
    get_filename_component(SOURCE_FILE_ABS "${SOURCE_FILE}" ABSOLUTE)
    if (NOT EXISTS "${SOURCE_FILE_ABS}")
      message (FATAL_ERROR "Thunk source file for Guest lib ${NAME} doesn't exist!")
    endif()
  endif()

  add_library(${NAME}-guest ${TARGET_TYPE} ${SOURCE_FILE} ${GEN_lib${NAME}})
  target_include_directories(${NAME}-guest PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/gen/")
  target_compile_definitions(${NAME}-guest PRIVATE GUEST_THUNK_LIBRARY)
  target_link_libraries(${NAME}-guest PRIVATE lib${NAME}-guest-deps)

  ## Make signed overflow well defined 2's complement overflow
  target_compile_options(${NAME}-guest PRIVATE -fwrapv)
  if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
    ## Compile for SSE2
    ## Compile with fpmath=sse to remove x87 usage
    target_compile_options(${NAME}-guest PRIVATE -msse2 -mfpmath=sse)
  endif()

  if (BITNESS EQUAL 32)
    # Makes the GOT/PLT lookups slightly less painful
    target_compile_options(${NAME}-guest PRIVATE -fno-plt -fno-stack-protector)
    target_link_options(${NAME}-guest PRIVATE "LINKER:-z,now" "LINKER:-z,relro" "LINKER:-z,notext")
  endif()

  # Add linker script if set
  if (BITNESS EQUAL 64 AND EXISTS "${SOURCE_LDS_FILE_ABS}")
    target_link_options(${NAME}-guest PRIVATE "-T" "${CMAKE_CURRENT_SOURCE_DIR}/../lib${NAME}/lib${NAME}_Guest.lds")
    set_property(TARGET ${NAME}-guest APPEND PROPERTY LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../lib${NAME}/lib${NAME}_Guest.lds")
  endif()

  if (BITNESS EQUAL 32 AND EXISTS "${SOURCE_LDS_32_FILE_ABS}")
    target_link_options(${NAME}-guest PRIVATE "-T" "${CMAKE_CURRENT_SOURCE_DIR}/../lib${NAME}/lib${NAME}_Guest_32.lds")
    set_property(TARGET ${NAME}-guest APPEND PROPERTY LINK_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/../lib${NAME}/lib${NAME}_Guest_32.lds")
  endif()

  # We need to override the soname for the linker.
  # Our guest thunk libraries are named `lib<Thunk>-guest`.
  # Once we override the loaded name, the guest is free to dlopen again by SONAME rather than filepath.
  # eg:
  # dlopen("libGL.so.1", RTLD_GLOBAL | RTLD_NOW); -> We override this `libGL.so.1` to `libGL-guest.so`
  # Later on in the program, it can do:
  # dlopen("libGL.so.1", RTLD_GLOBAL | RTLD_NOLOAD);
  # This second dlopen will only check to see if the previous load has made the library resident
  # Searching for SONAME in the process.
  #
  # Additionally, VDSO can only be opened by SONAME.
  # This means it will only ever open the handle with `dlopen("linux-vdso.so.1", RTLD_GLOBAL | RTLD_NOLOAD);
  # Note that this doesn't have a lib prefix, and also since it doesn't exist on the filesystem, it can never
  # Actually load from a path.
  target_link_options(${NAME}-guest PRIVATE "LINKER:-soname,${SONAME}")
  set_target_properties(${NAME}-guest PROPERTIES NO_SONAME ON)

  if (GENERATE_GUEST_INSTALL_TARGETS)
    if (BITNESS EQUAL 64)
      install(TARGETS ${NAME}-guest DESTINATION ${DATA_DIRECTORY}/GuestThunks/)
    else()
      install(TARGETS ${NAME}-guest DESTINATION ${DATA_DIRECTORY}/GuestThunks_32/)
    endif()
  endif()
endfunction()

# These thunks only support 64-bit
if (BITNESS EQUAL 64)
  #add_guest_lib(fex_malloc_loader)
  #target_link_libraries(fex_malloc_loader-guest PRIVATE dl)

  #generate(libfex_malloc)
  #add_guest_lib(fex_malloc)

  generate(libasound ${CMAKE_CURRENT_SOURCE_DIR}/../libasound/libasound_interface.cpp)
  add_guest_lib(asound "libasound.so.2")

  # disabled for now, headers are platform specific
  # find_package(SDL2 REQUIRED)
  # generate(libSDL2)
  # add_guest_lib(SDL2)
  # target_include_directories(SDL2-guest PRIVATE ${SDL2_INCLUDE_DIRS})
  # target_link_libraries(SDL2-guest PRIVATE GL)
  # target_link_libraries(SDL2-guest PRIVATE dl)

  generate(libvulkan ${CMAKE_CURRENT_SOURCE_DIR}/../libvulkan/libvulkan_interface.cpp)
  target_include_directories(libvulkan-guest-deps INTERFACE ${FEX_PROJECT_SOURCE_DIR}/External/Vulkan-Headers/include/)
  add_guest_lib(vulkan "libvulkan.so.1")

  generate(libdrm ${CMAKE_CURRENT_SOURCE_DIR}/../libdrm/libdrm_interface.cpp)
  target_include_directories(libdrm-guest-deps INTERFACE /usr/include/drm/)
  target_include_directories(libdrm-guest-deps INTERFACE /usr/include/libdrm/)
  add_guest_lib(drm "libdrm.so.2")
endif()

generate(libwayland-client ${CMAKE_CURRENT_SOURCE_DIR}/../libwayland-client/libwayland-client_interface.cpp)
add_guest_lib(wayland-client "libwayland-client.so.0.20.0")
target_include_directories(libwayland-client-guest-deps INTERFACE /usr/include/wayland)

generate(libVDSO ${CMAKE_CURRENT_SOURCE_DIR}/../libVDSO/libVDSO_interface.cpp)
add_guest_lib(VDSO "linux-vdso.so.1")
# Can't use a stack protector because otherwise cross-compiling fails
# Not necessary anyway because it only trampolines
target_compile_options(VDSO-guest PRIVATE "-fno-stack-protector")
target_link_options(VDSO-guest PRIVATE "-nostdlib" "LINKER:--no-undefined" "LINKER:-z,max-page-size=4096" "LINKER:--hash-style=both")

if (BITNESS EQUAL 32)
  # 32-bit entrypoint points to __kernel_vsyscall and needs to exist
  target_link_options(VDSO-guest PRIVATE "LINKER:-e,__kernel_vsyscall")
  # 32-bit VDSO needs to have PIC disabled.
  # Otherwise GCC/Clang generates GOT prologues on the functions that corrupt vsyscall.
  # Correct:
  # 00000350 <__kernel_vsyscall>:
  #  350:   cd 80                   int    0x80
  #  352:   c3                      ret
  #  353:   0f 0b                   ud2
  # Incorrect:
  # 0000032a <__kernel_vsyscall>:
  #  32a:   e8 0b 00 00 00          call   33a <__x86.get_pc_thunk.ax>
  #  32f:   05 79 03 00 00          add    eax,0x379
  #  334:   cd 80                   int    0x80
  #  336:   c3                      ret
  #  337:   90                      nop
  #  338:   0f 0b                   ud2
  target_compile_options(VDSO-guest PRIVATE "-fno-pic")
endif()

if (BUILD_FEX_LINUX_TESTS)
  generate(libfex_thunk_test ${CMAKE_CURRENT_SOURCE_DIR}/../libfex_thunk_test/libfex_thunk_test_interface.cpp)
  add_guest_lib(fex_thunk_test "libfex_thunk_test.so")
endif()

generate(libGL ${CMAKE_CURRENT_SOURCE_DIR}/../libGL/libGL_interface.cpp)
add_guest_lib(GL "libGL.so.1")

generate(libEGL ${CMAKE_CURRENT_SOURCE_DIR}/../libEGL/libEGL_interface.cpp)
add_guest_lib(EGL "libEGL.so.1")
target_link_libraries(EGL-guest PRIVATE GL-guest)

# libGL must pull in libX11.so, so generate a placeholder libX11.so to link against
add_library(PlaceholderX11 SHARED ../libX11/libX11_NativeGuest.cpp)
target_link_options(PlaceholderX11 PRIVATE "LINKER:-soname,libX11.so.6")
set_target_properties(PlaceholderX11 PROPERTIES NO_SONAME ON)
target_link_libraries(GL-guest PRIVATE PlaceholderX11)


================================================
FILE: ThunkLibs/HostLibs/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(host-thunks)
include(${FEX_PROJECT_SOURCE_DIR}/Data/CMake/version_to_variables.cmake)

set(CMAKE_CXX_STANDARD 20)
option(ENABLE_CLANG_THUNKS "Enable building thunks with clang" FALSE)

if (ENABLE_CLANG_THUNKS)
  set(LD_OVERRIDE "-fuse-ld=lld")
  add_link_options(${LD_OVERRIDE})
endif()

# Syntax: generate(libxyz libxyz-interface.cpp)
# This defines two targets and a custom command:
# - custom command: Main build step that runs the thunk generator on the given interface definition
# - libxyz-interface: Target for IDE integration (making sure libxyz-interface.cpp shows up as a source file in the project tree)
# - libxyz-deps: Interface target to read include directories from which are passed to libclang when parsing the interface definition
function(generate NAME SOURCE_FILE GUEST_BITNESS)
  # Interface target for the user to add include directories
  add_library(${NAME}-${GUEST_BITNESS}-deps INTERFACE)
  target_include_directories(${NAME}-${GUEST_BITNESS}-deps INTERFACE "${CMAKE_CURRENT_SOURCE_DIR}/../include")
  if (GUEST_BITNESS EQUAL 32)
    target_compile_definitions(${NAME}-${GUEST_BITNESS}-deps INTERFACE IS_32BIT_THUNK)
  endif()
  # Shorthand for the include directories added after calling this function.
  # This is not evaluated directly, hence directories added after return are still picked up
  set(prop "$<TARGET_PROPERTY:${NAME}-${GUEST_BITNESS}-deps,INTERFACE_INCLUDE_DIRECTORIES>")
  set(compile_prop "$<TARGET_PROPERTY:${NAME}-${GUEST_BITNESS}-deps,INTERFACE_COMPILE_DEFINITIONS>")
  if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
    list(APPEND compile_prop ARCHITECTURE_x86_64=1)
  elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
    list(APPEND compile_prop ARCHITECTURE_arm64=1)
  endif()

  # Target for IDE integration
  add_library(${NAME}-${GUEST_BITNESS}-interface EXCLUDE_FROM_ALL ${SOURCE_FILE})
  target_link_libraries(${NAME}-${GUEST_BITNESS}-interface PRIVATE ${NAME}-${GUEST_BITNESS}-deps)

  # Run thunk generator for each of the given output files
  set(OUTFOLDER "${CMAKE_CURRENT_BINARY_DIR}/gen_${GUEST_BITNESS}")
  set(OUTFILE "${OUTFOLDER}/thunkgen_host_${NAME}.inl")

  file(MAKE_DIRECTORY "${OUTFOLDER}")

  set(BITNESS_FLAGS "")
  if (GUEST_BITNESS EQUAL 32)
    set(BITNESS_FLAGS "-for-32bit-guest")
  endif()

  add_custom_command(
    OUTPUT "${OUTFILE}"
    DEPENDS "${SOURCE_FILE}"
    DEPENDS thunkgen
    COMMAND thunkgen "${SOURCE_FILE}" "${NAME}" "-host" "${OUTFILE}" "${X86_DEV_ROOTFS}" ${BITNESS_FLAGS} -- -std=c++20
      # Expand compile definitions to space-separated list of -D parameters
      "$<$<BOOL:${compile_prop}>:;-D$<JOIN:${compile_prop},;-D>>"
      # Expand include directories to space-separated list of -isystem parameters
      "$<$<BOOL:${prop}>:;-isystem$<JOIN:${prop},;-isystem>>"
    VERBATIM
    COMMAND_EXPAND_LISTS)

  list(APPEND OUTPUTS "${OUTFILE}")
  set(GEN_${NAME} ${OUTPUTS} PARENT_SCOPE)
endfunction()

function(add_host_lib NAME GUEST_BITNESS)
  set(SOURCE_FILE ../lib${NAME}/lib${NAME}_Host.cpp)
    get_filename_component(SOURCE_FILE_ABS "${SOURCE_FILE}" ABSOLUTE)
  if (NOT EXISTS "${SOURCE_FILE_ABS}")
    set(SOURCE_FILE ../lib${NAME}/Host.cpp)
    get_filename_component(SOURCE_FILE_ABS "${SOURCE_FILE}" ABSOLUTE)
    if (NOT EXISTS "${SOURCE_FILE_ABS}")
      message (FATAL_ERROR "Thunk source file for Host lib ${NAME} doesn't exist!")
    endif()
  endif()

  add_library(${NAME}-host-${GUEST_BITNESS} SHARED ${SOURCE_FILE} ${GEN_lib${NAME}})
  set_target_properties(${NAME}-host-${GUEST_BITNESS} PROPERTIES OUTPUT_NAME "${NAME}-host")
  set_target_properties(${NAME}-host-${GUEST_BITNESS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/HostLibs_${GUEST_BITNESS}")
  target_include_directories(${NAME}-host-${GUEST_BITNESS} PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/gen_${GUEST_BITNESS}/")
  target_link_libraries(${NAME}-host-${GUEST_BITNESS} PRIVATE dl)
  target_link_libraries(${NAME}-host-${GUEST_BITNESS} PRIVATE lib${NAME}-${GUEST_BITNESS}-deps)
  target_link_libraries(${NAME}-host-${GUEST_BITNESS} PRIVATE FEX)
  ## Make signed overflow well defined 2's complement overflow
  target_compile_options(${NAME}-host-${GUEST_BITNESS} PRIVATE -fwrapv)

  if (NOT ENABLE_ASAN)
    # generated files forward-declare functions that need to be implemented manually, so pass --no-undefined to make sure errors are detected at compile-time rather than runtime
    # NOTE: ASan is not compatible with --no-undefined, see https://github.com/google/sanitizers/issues/380 for details
    target_link_options(${NAME}-host-${GUEST_BITNESS} PRIVATE "LINKER:--no-undefined")
  endif()

  if (${GUEST_BITNESS} EQUAL 32)
    install(TARGETS ${NAME}-host-${GUEST_BITNESS} COMPONENT Runtime DESTINATION ${HOSTLIBS_DATA_DIRECTORY}/HostThunks_32/)
  else()
    install(TARGETS ${NAME}-host-${GUEST_BITNESS} COMPONENT Runtime DESTINATION ${HOSTLIBS_DATA_DIRECTORY}/HostThunks/)
  endif()
endfunction()

set(BITNESS_LIST "64")
foreach(GUEST_BITNESS IN LISTS BITNESS_LIST)
  #add_host_lib(fex_malloc_symbols ${GUEST_BITNESS})

  #generate(libfex_malloc)
  #add_host_lib(fex_malloc ${GUEST_BITNESS})

  generate(libasound ${CMAKE_CURRENT_SOURCE_DIR}/../libasound/libasound_interface.cpp ${GUEST_BITNESS})
  add_host_lib(asound ${GUEST_BITNESS})

  # disabled for now, headers are platform specific
  # find_package(SDL2 REQUIRED)
  # generate(libSDL2)
  # add_host_lib(SDL2 ${GUEST_BITNESS})
  # target_include_directories(SDL2-host PRIVATE ${SDL2_INCLUDE_DIRS})

  generate(libdrm ${CMAKE_CURRENT_SOURCE_DIR}/../libdrm/libdrm_interface.cpp ${GUEST_BITNESS})
  target_include_directories(libdrm-${GUEST_BITNESS}-deps INTERFACE /usr/include/drm/)
  target_include_directories(libdrm-${GUEST_BITNESS}-deps INTERFACE /usr/include/libdrm/)
  add_host_lib(drm ${GUEST_BITNESS})
endforeach()

set(BITNESS_LIST "32;64")
foreach(GUEST_BITNESS IN LISTS BITNESS_LIST)
  if (BUILD_FEX_LINUX_TESTS)
    generate(libfex_thunk_test ${CMAKE_CURRENT_SOURCE_DIR}/../libfex_thunk_test/libfex_thunk_test_interface.cpp ${GUEST_BITNESS})
    add_host_lib(fex_thunk_test ${GUEST_BITNESS})
  endif()

  generate(libvulkan ${CMAKE_CURRENT_SOURCE_DIR}/../libvulkan/libvulkan_interface.cpp ${GUEST_BITNESS})
  target_include_directories(libvulkan-${GUEST_BITNESS}-deps INTERFACE ${FEX_PROJECT_SOURCE_DIR}/External/Vulkan-Headers/include/)
  add_host_lib(vulkan ${GUEST_BITNESS})

  generate(libwayland-client ${CMAKE_CURRENT_SOURCE_DIR}/../libwayland-client/libwayland-client_interface.cpp ${GUEST_BITNESS})
  add_host_lib(wayland-client ${GUEST_BITNESS})
  target_include_directories(libwayland-client-${GUEST_BITNESS}-deps INTERFACE /usr/include/wayland)

  generate(libEGL ${CMAKE_CURRENT_SOURCE_DIR}/../libEGL/libEGL_interface.cpp ${GUEST_BITNESS})
  add_host_lib(EGL ${GUEST_BITNESS})

  generate(libGL ${CMAKE_CURRENT_SOURCE_DIR}/../libGL/libGL_interface.cpp ${GUEST_BITNESS})
  add_host_lib(GL ${GUEST_BITNESS})

  find_package(OpenGL REQUIRED)
  target_link_libraries(GL-host-${GUEST_BITNESS} PRIVATE OpenGL::GL)
endforeach()

if (BUILD_FEX_LINUX_TESTS)
  add_library(fex_thunk_test SHARED ../libfex_thunk_test/lib.cpp)
  install(TARGETS fex_thunk_test LIBRARY DESTINATION lib COMPONENT TestLibraries)
endif()


================================================
FILE: ThunkLibs/README.md
================================================
# FEX Library Thunking (Thunklibs)
FEX supports special guest libraries that call out to host code for speed and compatibility.

We support both guest->host thunks, as well as host->guest callbacks

## Building and using
The thunked libraries can be built via the `guest-libs` and `host-libs` targets of the main FEX project. The outputs are in `$BUILDDIR/Guest` and `$BUILDDIR/Host`

After that, a guest rootfs is needed with the guest-libs installed. Typically this is done with symlinks that replace the native guest libraries. eg 
```
# Unlink original guest lib
unlink $ROOTFS/lib/x86_64-linux-gnu/libX11.so.6
# Make it point to thunked version
ln -s $BUILDDIR/Guest/libX11-guest.so $ROOTFS/lib/x86_64-linux-gnu/libX11.so.6
```

Finally, FEX needs to be told where to look for the matching host libraries with `-t /Host/Libs/Path`. eg
```FEX_THUNKHOSTLIBS= $BUILDDIR/Host FEX /PATH/TO/ELF```

We currently don't have any unit tests for the guest libraries, only for OP_THUNK.

## Implementation outline
There are several parts that make this possible. This is a rough outline.

In FEX
- Opcode 0xF 0x3F (IR::OP_THUNK) is used for the Guest -> Host transition. Register RSI (arg0 in guest) is passed as arg0 in host. Thunks are identified by a string in the form `library:function` that directly follows the Guest opcode.
- `Context::HandleCallback` does the Host -> Guest transition, and returns when the Guest function returns.
- A special thunk, `fex:loadlib` is used to load and initialize a matching host lib. For more details, look in `ThunkHandler_impl::LoadLib`
- `ThunkHandler_impl::CallCallback` is provided to the host libs, so they can call callbacks. It prepares guest arguments and uses `Context::HandleCallback` 

ThunkLibs, Library loading
- In Guest code, when a thunking library is loaded it has a constructor that calls the `fex:loadlib` thunk, with the library name and callback unpackers, if any.
- In FEX, a matching host library is loaded using dlopen, `fexthunks_exports_$libname(CallCallbackPtr, GuestUnpackers)` is called to initialize the host library.
- In Host code, the real host library is loaded using dlopen and dlsym (see ldr generation)

ThunkLibs, Guest -> Host
- In Guest code (guest packer), a packer takes care of packing the arguments & return value into a struct in Guest stack. The packer is usually exported as a symbol from the Guest library.
- In Guest code (guest thunk), a thunk does the Guest -> Host transition via OP_THUNK, and passes the struct pointer as an argument
- FEX handles OP_THUNK and looks up the Host function from the opcode argument
- In Host code (host unpacker), an unpacker takes the arguments from the struct, and calls a function pointer with the implementation of that function. It also stores the return value, if any, to the struct.
- In Host code (host unpacker), the unpacker returns, and we do an implicit Host -> Guest transition
- In Guest code (guest packer), the return value is loaded from the struct and returned, if needed

ThunkLibs, Host -> Guest. This is only possible while handling a Guest -> Host call (ie, callbacks). 
- In Host code (host packer), a packer packs the arguments & return value to a struct in Host stack.
- In Host code (host packer), `ThunkHandler_impl::CallCallback` is called with the Guest unpacker, and Guest function as arguments
- In Guest code (guest unpacker), the arguments are unpacked, the Guest function is called, and the return value is stored to the struct
- In Guest code (guest unpacker), the unpacker returns and we do an implicit Guest -> Host transition
- In host code (host packer), the return value is loaded from the struct and returned, if needed

Boilerplate code is automated using a dedicated code generator tool, which parses a C++ source file (`libX_interface.cpp`) that specializes
a templated `fex_gen_config` struct for each thunked function. The generator will pull all required function signatures from the original
library's header files and emit the appropriate boilerplate (guest->host thunks, argument packers/unpackers, host library loader, ...).

In most cases, an empty `fex_gen_config` specialization is sufficient, but if needed the generator behavior can be customized on a
function-by-function basis using an annotation-syntax: Binary properties are toggled by inheriting from a fixed set of tag types
(e.g. `fexgen::custom_host_impl`), whereas complicated properties are customized by defining struct members/aliases with a magic name
detected by the generator (e.g. `using uniform_va_type = char`).

For each thunked library, the generator outputs the following files:
- `thunks.inl`: Guest -> Host transition functions that use 0xF 0x3F
- `function_packs.inl`: Guest argument packers / rv handling, private to the SO. These are used to solve symbol resolution issues with glxGetProc*, etc.
- `function_packs_public.inl`: Guest argument packers / rv handling, exported from the SO. These are identical to the function_packs, but exported from the SO
- `function_unpacks.inl`: Host argument unpackers / rv handling
- `ldr.inl`: Host loader that dlopens/dlsyms the "real" host library for the implementation functions.
- `ldr_ptrs.inl`: Host loader pointer declarations, used by ldr and function_unpacks
- `tab_function_unpacks.inl`: Host function unpackers list, passed to FEX after Host library init so it can resolve the Guest Thunks to Host functions


## Adding a new library

There are two kinds of libs, simpler ones with no callbacks, and complex ones with callbacks. You can see how `libX11` is implemented for a callbacks example, and `libasound` for a non-callbacks example.

Getting started
- Create `libName/libName_interface.cpp` and customize the `fex_gen_config` template for each thunked function. See some existing lib for details.
- Create `libName/libName_Guest.cpp` and `libName/libName_Host.cpp`. Copy & rename from some existing lib is the way to go.
- Edit `GuestLibs/CMakeLists.txt` and `HostLibs/CMakeLists.txt` to add the new targets, similar to how other libs are done.

Now the host and the guest libs should be built as part of `guest-libs` and `host-libs`


================================================
FILE: ThunkLibs/include/common/GeneratorInterface.h
================================================
namespace fexgen {
struct returns_guest_pointer {};
struct custom_host_impl {};
struct custom_guest_entrypoint {};

struct generate_guest_symtable {};
struct indirect_guest_calls {};

struct callback_annotation_base {
  // Prevent annotating multiple callback strategies
  bool prevent_multiple;
};
struct callback_stub : callback_annotation_base {};

// Member annotation to mark members handled by custom repacking. This enables
// automatic struct repacking of structs with non-trivial members (pointers,
// unions, ...). Repacking logic is auto-generated as usual, with the
// difference that an external function is called to manually repack the
// annotated members.
//
// Two functions must be implemented for the parent struct type:
// * fex_custom_repack_entry, called after automatic repacking of the other members
// * fex_custom_repack_exit, called on exit but before automatic exit-repacking
//     of the other members. Non-trivial implementations must perform host->guest
//     repacking manually and return the boolean value true.
//
// If multiple members of the same struct are annotated as custom_repack,
// they must be handled in the same fex_custom_repack_entry/exit functions.
struct custom_repack {};

// Type annotation to indicate that guest_layout/host_layout definitions should
// be emitted even if the type is non-repackable. Pointer members will be
// copied (or zero-extended) without regard for the referred data.
struct emit_layout_wrappers {};

struct type_annotation_base {
  bool prevent_multiple;
};

// Pointers to types annotated with this will be passed through without change
struct opaque_type : type_annotation_base {};

// Function parameter annotation.
// Pointers are passed through to host (extending to 64-bit if needed) without modifying the pointee.
// The type passed to Host will be guest_layout<pointee_type>*.
struct ptr_passthrough {};

// Type / Function parameter annotation.
// Assume objects of the given type are compatible across architectures,
// even if the generator can't automatically prove this. For pointers, this refers to the pointee type.
// NOTE: In contrast to opaque_type, this allows for non-pointer members with the annotated type to be repacked automatically.
struct assume_compatible_data_layout : type_annotation_base {};

} // namespace fexgen


================================================
FILE: ThunkLibs/include/common/Guest.h
================================================
#pragma once
#include <stdint.h>
#include <type_traits>

#include "PackedArguments.h"

#if __SIZEOF_POINTER__ == 8
#define THUNK_ABI
#else
#ifdef __clang__
#define THUNK_ABI __fastcall
#else
#define THUNK_ABI __attribute__((fastcall))
#endif
#endif

template<typename signature>
THUNK_ABI const int (*fexthunks_invoke_callback)(void*);

#ifndef ARCHITECTURE_arm64
#define MAKE_THUNK(lib, name, hash)                                                                    \
  extern "C" __attribute__((visibility("hidden"))) THUNK_ABI int fexthunks_##lib##_##name(void* args); \
  asm(".text\nfexthunks_" #lib "_" #name ":\n.byte 0xF, 0x3F\n.byte " hash);

#define MAKE_CALLBACK_THUNK(name, signature, hash)                                             \
  extern "C" __attribute__((visibility("hidden"))) THUNK_ABI int fexthunks_##name(void* args); \
  asm(".text\nfexthunks_" #name ":\n.byte 0xF, 0x3F\n.byte " hash);                            \
  template<>                                                                                   \
  THUNK_ABI inline constexpr int (*fexthunks_invoke_callback<signature>)(void*) = fexthunks_##name;

#else
// We're compiling for IDE integration, so provide a dummy-implementation that just calls an undefined function.
// The name of that function serves as an error message if this library somehow gets loaded at runtime.
extern "C" void BROKEN_INSTALL___TRIED_LOADING_AARCH64_BUILD_OF_GUEST_THUNK();
#define MAKE_THUNK(lib, name, hash)                                \
  extern "C" int fexthunks_##lib##_##name(void* args) {            \
    BROKEN_INSTALL___TRIED_LOADING_AARCH64_BUILD_OF_GUEST_THUNK(); \
    return 0;                                                      \
  }
#define MAKE_CALLBACK_THUNK(name, signature, hash) \
  extern "C" int fexthunks_##name(void* args);     \
  template<>                                       \
  inline constexpr int (*fexthunks_invoke_callback<signature>)(void*) = fexthunks_##name;
#endif

// Generated fexfn_pack_ symbols should be hidden by default, but clang does
// not support aliasing to static functions. Make them regular non-static
// functions on that compiler instead, hence.
#if defined(__clang__)
#define FEX_PACKFN_LINKAGE
#else
#define FEX_PACKFN_LINKAGE static
#endif

struct LoadlibArgs {
  const char* Name;
  uintptr_t CallbackThunks;
};

MAKE_THUNK(fex, loadlib,
           "0x27, 0x7e, 0xb7, 0x69, 0x5b, 0xe9, 0xab, 0x12, 0x6e, 0xf7, 0x85, 0x9d, 0x4b, 0xc9, 0xa2, 0x44, 0x46, 0xcf, 0xbd, 0xb5, 0x87, "
           "0x43, 0xef, 0x28, 0xa2, 0x65, 0xba, 0xfc, 0x89, 0x0f, 0x77, 0x80")
MAKE_THUNK(fex, is_lib_loaded,
           "0xee, 0x57, 0xba, 0x0c, 0x5f, 0x6e, 0xef, 0x2a, 0x8c, 0xb5, 0x19, 0x81, 0xc9, 0x23, 0xe6, 0x51, 0xae, 0x65, 0x02, 0x8f, 0x2b, "
           "0x5d, 0x59, 0x90, 0x6a, 0x7e, 0xe2, 0xe7, 0x1c, 0x33, 0x8a, 0xff")
MAKE_THUNK(fex, is_host_heap_allocation,
           "0xf5, 0x77, 0x68, 0x43, 0xbb, 0x6b, 0x28, 0x18, 0x40, 0xb0, 0xdb, 0x8a, 0x66, 0xfb, 0x0e, 0x2d, 0x98, 0xc2, 0xad, 0xe2, 0x5a, "
           "0x18, 0x5a, 0x37, 0x2e, 0x13, 0xc9, 0xe7, 0xb9, 0x8c, 0xa9, 0x3e")
MAKE_THUNK(fex, link_address_to_function,
           "0xe6, 0xa8, 0xec, 0x1c, 0x7b, 0x74, 0x35, 0x27, 0xe9, 0x4f, 0x5b, 0x6e, 0x2d, 0xc9, 0xa0, 0x27, 0xd6, 0x1f, 0x2b, 0x87, 0x8f, "
           "0x2d, 0x35, 0x50, 0xea, 0x16, 0xb8, 0xc4, 0x5e, 0x42, 0xfd, 0x77")
MAKE_THUNK(fex, allocate_host_trampoline_for_guest_function,
           "0x9b, 0xb2, 0xf4, 0xb4, 0x83, 0x7d, 0x28, 0x93, 0x40, 0xcb, 0xf4, 0x7a, 0x0b, 0x47, 0x85, 0x87, 0xf9, 0xbc, 0xb5, 0x27, 0xca, "
           "0xa6, 0x93, 0xa5, 0xc0, 0x73, 0x27, 0x24, 0xae, 0xc8, 0xb8, 0x5a")

#define LOAD_LIB_BASE(name, init_fn)                   \
  __attribute__((constructor)) static void loadlib() { \
    LoadlibArgs args = {#name};                        \
    fexthunks_fex_loadlib(&args);                      \
    if ((init_fn)) ((void (*)())init_fn)();            \
  }

#define LOAD_LIB(name) LOAD_LIB_BASE(name, nullptr)
#define LOAD_LIB_INIT(name, init_fn) LOAD_LIB_BASE(name, init_fn)

inline void LinkAddressToFunction(uintptr_t addr, uintptr_t target) {
  struct args_t {
    uint64_t original_callee;
    uint64_t target_addr; // Function to call when branching to replaced_addr
  };
  args_t args = {addr, target};
  fexthunks_fex_link_address_to_function(&args);
}

inline bool IsLibLoaded(const char* libname) {
  struct {
    const char* Name;
    bool rv;
  } argsrv = {libname};

  fexthunks_fex_is_lib_loaded(&argsrv);

  return argsrv.rv;
}

// Helper template that packs the given arguments and invokes a thunk at the
// address stored in the `r11` guest register. The signature of the thunk must
// be specified at compile-time via the Thunk template parameter.
// Other than reading the thunk address from `r11`, this is equivalent to the
// fexfn_pack_* functions generated for global API functions.
template<auto Thunk, typename Result, typename... Args>
inline Result CallHostFunction(Args... args) {
#ifndef ARCHITECTURE_arm64
#if __SIZEOF_POINTER__ == 8
  // This magic incantation of using a register variable with an empty asm block is necessary for correct operation!
  // If we only use inline asm that sets a variable then the compiler will reorder the function
  // prologue to be BEFORE our inline asm. Which makes sense in hindsight, but for anything with 8+ arguments this
  // will clobber our r11 register we save the data that is inside of it.

  // First we need to declare the r11 register variable
  register uintptr_t host_addr asm("r11");

  // We then create an empty *volatile* asm block saying that it is assigning the register variable.
  // Yes, it is already set coming in to this function due to custom ABI.
  // This gets both GCC and Clang to understand that the variable is set, seemingly at the start of the function.
  // So its own internal live-range tracking extends its begining range to the start of the function.
  //
  // To verify this in the future, search for `mov     r11` in binaryninja, and ensure that all uses inside of `CallHostFunction`
  // don't have intersecting ranges.
  //
  // Note that this issue is more likely to occur when clang is used to compile thunks, since its optimizer is more aggressive at using R11.
  // This magic incantation also works in that instance so this is about the best we can do without adding a new attribute to clang for
  // modifying the ABI.
  asm volatile("" : "=r"(host_addr));
#else
  // Use mm0 to pass in host_addr (chosen to avoid conflicts with vectorcall).
  // Note this register overlaps the x87 st(0) register (used to return float values),
  // so applications that expect this register to be preserved could run into problems.
  uintptr_t host_addr;
  asm volatile("movd %%mm0, %0" : "=r"(host_addr));
#endif
#else
  uintptr_t host_addr = 0;
#endif

  PackedArguments<Result, Args..., uint64_t> packed_args = {
    args..., host_addr
    // Return value not explicitly initialized since an initializer would fail to compile for the void case
  };

  Thunk(reinterpret_cast<void*>(&packed_args));

  if constexpr (!std::is_void_v<Result>) {
    return packed_args.rv;
  }
}

// Convenience wrapper that returns the function pointer to a CallHostFunction
// instantiation matching the function signature of `host_func`
template<typename Result, typename... Args>
static auto GetCallerForHostFunction(Result (*host_func)(Args...)) -> Result (*)(Args...) {
  return &CallHostFunction<fexthunks_invoke_callback<Result(Args...)>, Result, Args...>;
}

// Ensures the given host function can safely be called from guest code.
template<typename Result, typename... Args>
inline void MakeHostFunctionGuestCallable(THUNK_ABI Result (*host_func)(Args...)) {
  auto caller = (uintptr_t)GetCallerForHostFunction(host_func);
  LinkAddressToFunction((uintptr_t)host_func, (uintptr_t)caller);
}

template<typename Target>
inline Target* AllocateHostTrampolineForGuestFunction(void THUNK_ABI (*GuestUnpacker)(uintptr_t, void*), Target* GuestTarget) {
  if (!GuestTarget) {
    return 0;
  }

  struct {
    uint64_t GuestUnpacker;
    uint64_t GuestTarget;
    uint64_t rv;
  } argsrv = {(uintptr_t)GuestUnpacker, (uintptr_t)GuestTarget};

  fexthunks_fex_allocate_host_trampoline_for_guest_function((void*)&argsrv);

  return (Target*)argsrv.rv;
}

template<typename F>
struct CallbackUnpack;

template<typename Result, typename... Args>
struct CallbackUnpack<Result(Args...)> {
  static void THUNK_ABI Unpack(uintptr_t cb, void* argsv) {
    using fn_t = Result(Args...);
    auto callback = reinterpret_cast<fn_t*>(cb);
    auto args = reinterpret_cast<PackedArguments<Result, Args...>*>(argsv);
    Invoke(callback, *args);
  }
};

template<typename Result, typename... Args>
struct CallbackUnpack<Result (*)(Args...)> : CallbackUnpack<Result(Args...)> {};

template<typename Target>
inline Target* AllocateHostTrampolineForGuestFunction(Target* GuestTarget) {
  return AllocateHostTrampolineForGuestFunction(CallbackUnpack<Target*>::Unpack, GuestTarget);
}

inline bool IsHostHeapAllocation(void* ptr) {
  struct {
    void* ptr;
    bool rv;
  } args = {ptr, {}};

  fexthunks_fex_is_host_heap_allocation(&args);
  return args.rv;
}


================================================
FILE: ThunkLibs/include/common/Host.h
================================================
/*
$info$
category: thunklibs ~ These are generated + glue logic 1:1 thunks unless noted otherwise
$end_info$
*/

#pragma once
#include <array>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <dlfcn.h>
#include <optional>

#include "PackedArguments.h"

// Import FEX::HLE functions for use in host thunk libraries.
//
// Note these are statically linked into the FEX executable. The linker hence
// doesn't know about them when linking thunk libraries. This issue is avoided
// by declaring the functions as weak symbols.
namespace FEX::HLE {
struct HostToGuestTrampolinePtr;

__attribute__((weak)) HostToGuestTrampolinePtr* MakeHostTrampolineForGuestFunction(void* HostPacker, uintptr_t GuestTarget, uintptr_t GuestUnpacker);

__attribute__((weak)) HostToGuestTrampolinePtr* FinalizeHostTrampolineForGuestFunction(HostToGuestTrampolinePtr*, void* HostPacker);

__attribute__((weak)) void* GetGuestStack();

__attribute__((weak)) void MoveGuestStack(uintptr_t NewAddress);
} // namespace FEX::HLE

template<typename Fn>
struct function_traits;
template<typename Result, typename Arg>
struct function_traits<Result (*)(Arg)> {
  using result_t = Result;
  using arg_t = Arg;
};

template<auto Fn>
static typename function_traits<decltype(Fn)>::result_t fexfn_type_erased_unpack(void* argsv) {
  using args_t = typename function_traits<decltype(Fn)>::arg_t;
  return Fn(reinterpret_cast<args_t>(argsv));
}

struct ExportEntry {
  uint8_t* sha256;
  void (*fn)(void*);
};

typedef void fex_call_callback_t(uintptr_t callback, void* arg0, void* arg1);

#define EXPORTS(name)                       \
  extern "C" {                              \
  ExportEntry* fexthunks_exports_##name() { \
    if (!fexldr_init_##name()) {            \
      return nullptr;                       \
    }                                       \
    return exports;                         \
  }                                         \
  }

#define LOAD_LIB_INIT(init_fn)                         \
  __attribute__((constructor)) static void loadlib() { \
    init_fn();                                         \
  }

struct GuestcallInfo {
  uintptr_t HostPacker;
  void (*CallCallback)(uintptr_t GuestUnpacker, uintptr_t GuestTarget, void* argsrv);
  uintptr_t GuestUnpacker;
  uintptr_t GuestTarget;
};

// Helper macro for reading an internal argument passed through the `r11`
// host register. This macro must be placed at the very beginning of
// the function it is used in.
#if defined(ARCHITECTURE_x86_64)
#define LOAD_INTERNAL_GUESTPTR_VIA_CUSTOM_ABI(target_variable) asm volatile("mov %%r11, %0" : "=r"(target_variable))
#elif defined(ARCHITECTURE_arm64)
#define LOAD_INTERNAL_GUESTPTR_VIA_CUSTOM_ABI(target_variable) asm volatile("mov %0, x11" : "=r"(target_variable))
#endif

struct ParameterAnnotations {
  bool is_passthrough = false;
  bool assume_compatible = false;
};

// Generator emits specializations for this for each type that has compatible layout
template<typename T>
inline constexpr bool has_compatible_data_layout =
  std::is_integral_v<T> || std::is_enum_v<T> ||
  std::is_floating_point_v<T>
#ifndef IS_32BIT_THUNK
  // If none of the previous predicates matched, the thunk generator did *not* emit a specialization for T.
  // This should not happen on 64-bit with the currently thunked libraries, since their types
  // * either have fully consistent data layout across 64-bit architectures.
  // * or use custom repacking, in which case has_compatible_data_layout isn't used
  //
  // Throwing a fake exception here will trigger a build failure.
  || (throw "Instantiated on a type that was expected to be compatible", true)
#endif
  ;

#ifndef IS_32BIT_THUNK
// Pointers have the same size, hence data layout compatibility only depends on the pointee type
template<typename T>
inline constexpr bool has_compatible_data_layout<T*> = has_compatible_data_layout<std::remove_cv_t<T>>;
template<typename T>
inline constexpr bool has_compatible_data_layout<T* const> = has_compatible_data_layout<std::remove_cv_t<T>*>;

// void* and void** are assumed to be compatible to simplify handling of libraries that use them ubiquitously
template<>
inline constexpr bool has_compatible_data_layout<void*> = true;
template<>
inline constexpr bool has_compatible_data_layout<const void*> = true;
template<>
inline constexpr bool has_compatible_data_layout<void**> = true;
template<>
inline constexpr bool has_compatible_data_layout<const void**> = true;
#endif

// Placeholder type to indicate the given data is in guest-layout
template<typename T>
struct __attribute__((packed)) guest_layout {
  static_assert(!std::is_class_v<T>, "No guest layout defined for this non-opaque struct type. This may be a bug in the thunk generator.");
  static_assert(!std::is_union_v<T>, "No guest layout defined for this non-opaque union type. This may be a bug in the thunk generator.");
  static_assert(!std::is_enum_v<T>, "No guest layout defined for this enum type. This is a bug in the thunk generator.");
  static_assert(!std::is_void_v<T>, "Attempted to get guest layout of void. Missing annotation for void pointer?");

  static_assert(std::is_fundamental_v<T> || has_compatible_data_layout<T>, "Default guest_layout may not be used for non-compatible data");

  using type = std::enable_if_t<!std::is_pointer_v<T>, T>;
  type data;

  guest_layout& operator=(const T from) {
    data = from;
    return *this;
  }
};

template<typename T, std::size_t N>
struct __attribute__((packed)) guest_layout<T[N]> {
  using type = std::enable_if_t<!std::is_pointer_v<T>, T>;
  std::array<guest_layout<type>, N> data;
};

template<typename T>
struct guest_layout<T*> {
#ifdef IS_32BIT_THUNK
  using type = uint32_t;
#else
  using type = uint64_t;
#endif
  type data;

  // Allow implicit conversion for function pointers, since they disallow use of host_layout
  guest_layout& operator=(const T* from) requires (std::is_function_v<T>)
  {
    // TODO: Assert upper 32 bits are zero
    data = reinterpret_cast<uintptr_t>(from);
    return *this;
  }

  guest_layout<T>* get_pointer() {
    return reinterpret_cast<guest_layout<T>*>(uintptr_t {data});
  }

  const guest_layout<T>* get_pointer() const {
    return reinterpret_cast<const guest_layout<T>*>(uintptr_t {data});
  }

  T* force_get_host_pointer() {
    return reinterpret_cast<T*>(uintptr_t {data});
  }

  const T* force_get_host_pointer() const {
    return reinterpret_cast<const T*>(uintptr_t {data});
  }
};

template<typename T>
struct guest_layout<T* const> {
#ifdef IS_32BIT_THUNK
  using type = uint32_t;
#else
  using type = uint64_t;
#endif
  type data;

  // Allow implicit conversion for function pointers, since they disallow use of host_layout
  guest_layout& operator=(const T* from) requires (std::is_function_v<T>)
  {
    // TODO: Assert upper 32 bits are zero
    data = reinterpret_cast<uintptr_t>(from);
    return *this;
  }

  guest_layout<T>* get_pointer() {
    return reinterpret_cast<guest_layout<T>*>(uintptr_t {data});
  }

  const guest_layout<T>* get_pointer() const {
    return reinterpret_cast<const guest_layout<T>*>(uintptr_t {data});
  }
};

template<typename T>
struct host_layout;

template<typename T>
struct host_layout {
  static_assert(!std::is_class_v<T>, "No host_layout specialization generated for struct/class type");
  static_assert(!std::is_union_v<T>, "No host_layout specialization generated for union type");
  static_assert(!std::is_void_v<T>, "Attempted to get host layout of void. Missing annotation for void pointer?");

  // TODO: This generic implementation shouldn't be needed. Instead, auto-specialize host_layout for all types used as members.

  T data;

  explicit host_layout(const guest_layout<T>& from) requires (!std::is_enum_v<T>)
    : data {from.data} {
    // NOTE: This is not strictly neccessary since differently sized types may
    //       be used across architectures. It's important that the host type
    //       can represent all guest values without loss, however.
    static_assert(sizeof(data) == sizeof(from));
  }

  explicit host_layout(const guest_layout<T>& from) requires (std::is_enum_v<T>)
    : data {static_cast<T>(from.data)} {}

  // Allow conversion of integral types of smaller or equal size and same sign
  // to each other. Zero-extension is applied if needed.
  // Notably, this is useful for handling "long"/"long long" on 64-bit, as well
  // as uint8_t/char.
  template<typename U>
  explicit host_layout(const guest_layout<U>& from)
    requires (std::is_integral_v<U> && sizeof(U) <= sizeof(T) && std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>)
    : data {static_cast<T>(from.data)} {}
};

// Explicitly turn a host type into its corresponding host_layout
template<typename T>
const host_layout<T>& to_host_layout(const T& t) {
  static_assert(std::is_same_v<decltype(host_layout<T>::data), T>);
  return reinterpret_cast<const host_layout<T>&>(t);
}

template<typename T, size_t N>
struct host_layout<T[N]> {
  std::array<T, N> data;

  explicit host_layout(const guest_layout<T[N]>& from) {
    for (size_t i = 0; i < N; ++i) {
      data[i] = host_layout<T> {from.data[i]}.data;
    }
  }
};

template<typename T>
constexpr bool is_long_or_longlong =
  std::is_same_v<T, long> || std::is_same_v<T, unsigned long> || std::is_same_v<T, long long> || std::is_same_v<T, unsigned long long>;

template<typename T>
struct host_layout<T*> {
  T* data;

  static_assert(!std::is_function_v<T>, "Function types must be handled separately");

  // Assume underlying data is compatible and just convert the guest-sized pointer to 64-bit
  explicit host_layout(const guest_layout<T*>& from)
    : data {(T*)(uintptr_t)from.data} {}

  host_layout() = default;

  // Allow conversion of pointers to 64-bit integer types to "(un)signed long (long)*".
  // This is useful for handling "long"/"long long" on 64-bit, which are distinct types
  // but have equal data layout.
  template<typename U>
  explicit host_layout(const guest_layout<U*>& from)
    requires (is_long_or_longlong<std::remove_cv_t<T>> && std::is_integral_v<U> && std::is_convertible_v<T, U> &&
              std::is_signed_v<T> == std::is_signed_v<U>
#if __clang_major__ >= 16
              // Old clang versions don't support using sizeof on incomplete types when evaluating requires()
              && sizeof(T) == sizeof(U)
#endif
                )
    : data {(T*)(uintptr_t)from.data} {
  }

  // Allow conversion of pointers to 8-bit integer types to "char*".
  // This is useful since "char"/"signed char"/"unsigned char"/"int8_t"/"uint8_t"
  // may all be distinct types but have equal data layout
  template<typename U>
  explicit host_layout(const guest_layout<U*>& from)
    requires (std::is_same_v<std::remove_cv_t<T>, char> && std::is_integral_v<U> && std::is_convertible_v<T, U> && sizeof(U) == 1)
    : data {(T*)(uintptr_t)from.data} {}

  // Allow conversion of pointers to 32-bit integer types to "wchar_t*".
  template<typename U>
  explicit host_layout(const guest_layout<U*>& from) requires (
    std::is_same_v<std::remove_cv_t<T>, wchar_t> && std::is_integral_v<U> && std::is_convertible_v<T, U> && sizeof(U) == sizeof(wchar_t))
    : data {(T*)(uintptr_t)from.data} {}
};

template<typename T>
struct host_layout<T* const> {
  T* data;

  static_assert(!std::is_function_v<T>, "Function types must be handled separately");

  // Assume underlying data is compatible and just convert the guest-sized pointer to 64-bit
  explicit host_layout(const guest_layout<T* const>& from)
    : data {(T*)(uintptr_t)from.data} {}
};

// Wrapper around host_layout that repacks from a guest_layout on construction
// and exit-repacks on scope exit (if needed). The wrapper manages the storage
// needed for repacked data itself.
// This also implicitly converts to a pointer of the wrapped host type, since
// this conversion is required at all call sites anyway
template<typename T, typename GuestT>
struct repack_wrapper {
  static_assert(std::is_pointer_v<T>);

  // Strip "const" from pointee type in host_layout storage
  using PointeeT = std::remove_cv_t<std::remove_pointer_t<T>>;

  std::optional<host_layout<PointeeT>> data;
  guest_layout<GuestT>& orig_arg;

  repack_wrapper(guest_layout<GuestT>& orig_arg_)
    : orig_arg(orig_arg_) {
    if (orig_arg.get_pointer()) {
      data = {*orig_arg_.get_pointer()};

      if constexpr (!std::is_enum_v<T>) {
        constexpr bool is_compatible = has_compatible_data_layout<T> && std::is_same_v<T, GuestT>;
        if constexpr (!is_compatible && std::is_class_v<std::remove_pointer_t<T>>) {
          fex_apply_custom_repacking_entry(*data, *orig_arg_.get_pointer());
        }
      }
    }
  }

  ~repack_wrapper() {
    // TODO: Properly detect opaque types
    if constexpr (std::is_class_v<std::remove_pointer_t<T>> && requires(guest_layout<T> t, decltype(data) h) {
                    t.get_pointer();
                    (bool)h;
                    *data;
                  }) {
      if (data) {
        // NOTE: It's assumed that the native host library didn't modify any
        //       const-pointees, so we skip automatic exit repacking for them.
        //       However, *custom* repacking must still be applied since it
        //       might have unrelated side effects (such as deallocation of
        //       memory reserved on entry)
        if (!fex_apply_custom_repacking_exit(*orig_arg.get_pointer(), *data)) {
          if constexpr (!std::is_const_v<std::remove_pointer_t<T>>) { // Skip exit-repacking for const pointees
            if constexpr (!(has_compatible_data_layout<T> && std::is_same_v<T, GuestT>)) {
              *orig_arg.get_pointer() = to_guest(*data); // TODO: Only if annotated as out-parameter
            }
          }
        }
      }
    }
  }

  operator PointeeT*() {
    static_assert(sizeof(PointeeT) == sizeof(host_layout<PointeeT>));
    static_assert(alignof(PointeeT) == alignof(host_layout<PointeeT>));
    return data ? &data.value().data : nullptr;
  }
};

template<typename T, typename GuestT>
static repack_wrapper<T, GuestT> make_repack_wrapper(guest_layout<GuestT>& orig_arg) {
  return {orig_arg};
}

template<typename T>
T& unwrap_host(host_layout<T>& val) {
  return val.data;
}

template<typename T, typename T2>
T* unwrap_host(repack_wrapper<T*, T2>& val) {
  return val;
}

template<typename T>
struct host_to_guest_convertible {
  const host_layout<T>& from;

  // Conversion from host to guest layout for non-pointers
  operator guest_layout<T>() const requires (!std::is_pointer_v<T>)
  {
    if constexpr (std::is_enum_v<T>) {
      // enums are represented by fixed-size integers in guest_layout, so explicitly cast them
      return guest_layout<T> {static_cast<std::underlying_type_t<T>>(from.data)};
    } else {
      guest_layout<T> ret {.data = from.data};
      return ret;
    }
  }

  operator guest_layout<T>() const requires (std::is_pointer_v<T>)
  {
    // TODO: Assert upper 32 bits are zero
    guest_layout<T> ret;
    ret.data = reinterpret_cast<uintptr_t>(from.data);
    return ret;
  }

#if IS_32BIT_THUNK
  // Allow size_t -> uint32_t conversions, since they are so common on 32-bit
  operator guest_layout<uint32_t>() const requires (std::is_same_v<T, size_t>)
  {
    return {static_cast<uint32_t>(from.data)};
  }

  // libGL also needs to allow long->int conversions for return values...
  operator guest_layout<int32_t>() const requires (std::is_same_v<T, long>)
  {
    return {static_cast<int32_t>(from.data)};
  }
#endif

  // Make guest_layout of "long long" and "long" interoperable, since they are
  // the same type as far as data layout is concerned.
  operator guest_layout<const unsigned long long*>() const requires (std::is_same_v<T, const unsigned long*>)
  {
    return (guest_layout<const unsigned long long*>)reinterpret_cast<const host_to_guest_convertible<const unsigned long long*>&>(*this);
  }

  // Make guest_layout of "char" and "uint8_t" interoperable
  operator guest_layout<const uint8_t*>() const requires (std::is_same_v<T, const char*>)
  {
    return (guest_layout<const uint8_t*>)reinterpret_cast<const host_to_guest_convertible<const uint8_t*>&>(*this);
  }

  operator guest_layout<uint8_t*>() const requires (std::is_same_v<T, char*>)
  {
    return (guest_layout<uint8_t*>)reinterpret_cast<const host_to_guest_convertible<uint8_t*>&>(*this);
  }

  // Make guest_layout of "wchar_t" and "uint32_t" interoperable
  operator guest_layout<uint32_t*>() const requires (std::is_same_v<T, wchar_t*>)
  {
    return (guest_layout<uint32_t*>)reinterpret_cast<const host_to_guest_convertible<uint32_t*>&>(*this);
  }

  static_assert(sizeof(wchar_t) == 4);

  // Allow conversion of integral types of same size and sign to each other.
  // This is useful for handling "long"/"long long" on 64-bit, as well as uint8_t/char.
  template<typename U>
  operator guest_layout<U>() const
    requires (std::is_integral_v<U> && sizeof(U) == sizeof(T) && std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>)
  {
    return guest_layout<U> {.data {static_cast<T>(from.data)}};
  }
};

template<typename T>
inline host_to_guest_convertible<T> to_guest(const host_layout<T>& from) {
  return {from};
}

template<typename>
struct CallbackUnpack;

template<typename T, ParameterAnnotations Annotation>
constexpr bool IsCompatible() {
  if constexpr (Annotation.assume_compatible) {
    return true;
  } else if constexpr (has_compatible_data_layout<T>) {
    return true;
  } else {
    if constexpr (std::is_pointer_v<T>) {
      return has_compatible_data_layout<std::remove_cv_t<std::remove_pointer_t<T>>>;
    } else {
      return false;
    }
  }
}

template<typename T>
struct decaying_host_layout {
  host_layout<T> data;
  operator T() {
    return data.data;
  }
};

template<ParameterAnnotations Annotation, typename HostT, typename T>
auto Projection(guest_layout<T>& data) {
  if constexpr (Annotation.is_passthrough) {
    return data;
  } else if constexpr ((IsCompatible<T, Annotation>() && std::is_same_v<T, HostT>) || !std::is_pointer_v<T>) {
    // Instead of using host_layout<HostT> { data }.data, return a wrapper object.
    // This ensures that temporary lifetime extension can kick in at call-site.
    return decaying_host_layout<HostT> {.data {data}};
  } else {
    // This argument requires temporary storage for repacked data
    // *and* it needs to call custom repack functions (if any)
    return make_repack_wrapper<HostT>(data);
  }
}

#ifdef IS_32BIT_THUNK
/**
 * Helper class to manage guest stack memory from a host function.
 *
 * The current guest stack position is saved upon construction and bumped
 * for each object construction. Upon destruction, the old guest stack is
 * restored.
 */
class GuestStackBumpAllocator final {
  uintptr_t Top = reinterpret_cast<uintptr_t>(FEX::HLE::GetGuestStack());
  uintptr_t Next = Top;

public:
  ~GuestStackBumpAllocator() {
    FEX::HLE::MoveGuestStack(Top);
  }

  template<typename T, typename... Args>
  T* New(Args&&... args) {
    Next -= sizeof(T);
    Next &= ~uintptr_t {alignof(T) - 1};
    FEX::HLE::MoveGuestStack(Next);
    return new (reinterpret_cast<void*>(Next)) T {std::forward<Args>(args)...};
  }
};
#endif

template<typename Result, typename... Args>
struct CallbackUnpack<Result(Args...)> {
  static Result CallGuestPtr(Args... args) {
    GuestcallInfo* guestcall;
    LOAD_INTERNAL_GUESTPTR_VIA_CUSTOM_ABI(guestcall);

#ifndef IS_32BIT_THUNK
    PackedArguments<Result, guest_layout<Args>...> packed_args = {to_guest(to_host_layout(args))...};
#else
    GuestStackBumpAllocator GuestStack;
    auto& packed_args = *GuestStack.New<PackedArguments<Result, guest_layout<Args>...>>(to_guest(to_host_layout(args))...);
#endif
    guestcall->CallCallback(guestcall->GuestUnpacker, guestcall->GuestTarget, &packed_args);

    if constexpr (!std::is_void_v<Result>) {
      return packed_args.rv;
    }
  }
};

template<bool Cond, typename T, typename GuestT>
using as_guest_layout_if = std::conditional_t<Cond, guest_layout<GuestT>, T>;

template<typename, typename...>
struct GuestWrapperForHostFunction;

template<typename Result, typename... Args, typename... GuestArgs>
struct GuestWrapperForHostFunction<Result(Args...), GuestArgs...> {
  // Host functions called from Guest
  // NOTE: GuestArgs typically matches up with Args, however there may be exceptions (e.g. size_t)
  template<ParameterAnnotations RetAnnotations, ParameterAnnotations... Annotations>
  static void Call(void* argsv) {
    static_assert(sizeof...(Annotations) == sizeof...(Args));
    static_assert(sizeof...(GuestArgs) == sizeof...(Args));

    auto args =
      reinterpret_cast<PackedArguments<as_guest_layout_if<!std::is_void_v<Result>, Result, Result>, guest_layout<GuestArgs>..., uintptr_t>*>(argsv);
    constexpr auto CBIndex = sizeof...(GuestArgs);
    uintptr_t cb;
    static_assert(CBIndex <= 18 || CBIndex == 23);
    if constexpr (CBIndex == 0) {
      cb = args->a0;
    } else if constexpr (CBIndex == 1) {
      cb = args->a1;
    } else if constexpr (CBIndex == 2) {
      cb = args->a2;
    } else if constexpr (CBIndex == 3) {
      cb = args->a3;
    } else if constexpr (CBIndex == 4) {
      cb = args->a4;
    } else if constexpr (CBIndex == 5) {
      cb = args->a5;
    } else if constexpr (CBIndex == 6) {
      cb = args->a6;
    } else if constexpr (CBIndex == 7) {
      cb = args->a7;
    } else if constexpr (CBIndex == 8) {
      cb = args->a8;
    } else if constexpr (CBIndex == 9) {
      cb = args->a9;
    } else if constexpr (CBIndex == 10) {
      cb = args->a10;
    } else if constexpr (CBIndex == 11) {
      cb = args->a11;
    } else if constexpr (CBIndex == 12) {
      cb = args->a12;
    } else if constexpr (CBIndex == 13) {
      cb = args->a13;
    } else if constexpr (CBIndex == 14) {
      cb = args->a14;
    } else if constexpr (CBIndex == 15) {
      cb = args->a15;
    } else if constexpr (CBIndex == 16) {
      cb = args->a16;
    } else if constexpr (CBIndex == 17) {
      cb = args->a17;
    } else if constexpr (CBIndex == 18) {
      cb = args->a18;
    } else if constexpr (CBIndex == 23) {
      cb = args->a23;
    }

    // This is almost the same type as "Result func(Args..., uintptr_t)", but
    // individual types annotated as passthrough are wrapped in guest_layout<>
    auto callback = reinterpret_cast<as_guest_layout_if<RetAnnotations.is_passthrough, Result, Result> (*)(
      as_guest_layout_if<Annotations.is_passthrough, Args, GuestArgs>..., uintptr_t)>(cb);

    auto f = [&callback](guest_layout<GuestArgs>... args, uintptr_t target) {
      // Fold over each of Annotations, Args, and args. This will match up the elements in triplets.
      if constexpr (std::is_void_v<Result>) {
        callback(Projection<Annotations, Args>(args)..., target);
      } else if constexpr (!RetAnnotations.is_passthrough) {
        return (guest_layout<Result>)to_guest(to_host_layout(callback(Projection<Annotations, Args>(args)..., target)));
      } else {
        return callback(Projection<Annotations, Args>(args)..., target);
      }
    };
    Invoke(f, *args);
  }
};

template<typename FuncType>
void MakeHostTrampolineForGuestFunctionAt(uintptr_t GuestTarget, uintptr_t GuestUnpacker, FuncType** Func) {
  *Func = (FuncType*)FEX::HLE::MakeHostTrampolineForGuestFunction((void*)&CallbackUnpack<FuncType>::CallGuestPtr, GuestTarget, GuestUnpacker);
}

template<typename F>
void FinalizeHostTrampolineForGuestFunction(F* PreallocatedTrampolineForGuestFunction) {
  FEX::HLE::FinalizeHostTrampolineForGuestFunction((FEX::HLE::HostToGuestTrampolinePtr*)PreallocatedTrampolineForGuestFunction,
                                                   (void*)&CallbackUnpack<F>::CallGuestPtr);
}

template<typename F>
void FinalizeHostTrampolineForGuestFunction(guest_layout<F*> PreallocatedTrampolineForGuestFunction) {
  FEX::HLE::FinalizeHostTrampolineForGuestFunction((FEX::HLE::HostToGuestTrampolinePtr*)PreallocatedTrampolineForGuestFunction.data,
                                                   (void*)&CallbackUnpack<F>::CallGuestPtr);
}

// In the case of the thunk host_loader being the default, FEX need to use dlsym with RTLD_DEFAULT.
// If FEX queried the symbol object directly then it wouldn't follow symbol overriding rules.
//
// Common usecase is LD_PRELOAD with a library that defines some symbols.
// And then programs and libraries will pick up the preloaded symbols.
// ex: MangoHud overrides GLX and EGL symbols.
inline void* dlsym_default(void* handle, const char* symbol) {
  return dlsym(RTLD_DEFAULT, symbol);
}


================================================
FILE: ThunkLibs/include/common/PackedArguments.h
================================================
#pragma once

#include <cstdint>
#include <type_traits>
#include <utility>

template<typename Result, typename... Args>
struct __attribute__((packed)) PackedArguments;

template<typename R>
struct __attribute__((packed)) PackedArguments<R> {
  R rv;
};
template<typename R, typename A0>
struct __attribute__((packed)) PackedArguments<R, A0> {
  A0 a0;
  R rv;
};
template<typename R, typename A0, typename A1>
struct __attribute__((packed)) PackedArguments<R, A0, A1> {
  A0 a0;
  A1 a1;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2> {
  A0 a0;
  A1 a1;
  A2 a2;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11, typename A12>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11, typename A12, typename A13>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  R rv;
};
template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11, typename A12, typename A13, typename A14>
struct __attribute__((packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  R rv;
};

template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11, typename A12, typename A13, typename A14, typename A15, typename A16, typename A17,
         typename A18, typename A19, typename A20, typename A21, typename A22>
struct __attribute__((
  packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
  A16 a16;
  A17 a17;
  A18 a18;
  A19 a19;
  A20 a20;
  A21 a21;
  A22 a22;
  R rv;
};

template<typename R, typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8,
         typename A9, typename A10, typename A11, typename A12, typename A13, typename A14, typename A15, typename A16, typename A17,
         typename A18, typename A19, typename A20, typename A21, typename A22, typename A23>
struct __attribute__((
  packed)) PackedArguments<R, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
  A16 a16;
  A17 a17;
  A18 a18;
  A19 a19;
  A20 a20;
  A21 a21;
  A22 a22;
  A23 a23;
  R rv;
};

template<>
struct __attribute__((packed)) PackedArguments<void> {};
template<typename A0>
struct __attribute__((packed)) PackedArguments<void, A0> {
  A0 a0;
};
template<typename A0, typename A1>
struct __attribute__((packed)) PackedArguments<void, A0, A1> {
  A0 a0;
  A1 a1;
};
template<typename A0, typename A1, typename A2>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2> {
  A0 a0;
  A1 a1;
  A2 a2;
};
template<typename A0, typename A1, typename A2, typename A3>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9, typename A10, typename A11>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13, typename A14>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13, typename A14, typename A15>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13, typename A14, typename A15, typename A16>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
  A16 a16;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13, typename A14, typename A15, typename A16, typename A17>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
  A16 a16;
  A17 a17;
};
template<typename A0, typename A1, typename A2, typename A3, typename A4, typename A5, typename A6, typename A7, typename A8, typename A9,
         typename A10, typename A11, typename A12, typename A13, typename A14, typename A15, typename A16, typename A17, typename A18>
struct __attribute__((packed)) PackedArguments<void, A0, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18> {
  A0 a0;
  A1 a1;
  A2 a2;
  A3 a3;
  A4 a4;
  A5 a5;
  A6 a6;
  A7 a7;
  A8 a8;
  A9 a9;
  A10 a10;
  A11 a11;
  A12 a12;
  A13 a13;
  A14 a14;
  A15 a15;
  A16 a16;
  A17 a17;
  A18 a18;
};

// Helper struct that allows assigning the result of a function to a variable, even if that result is a void type.
//
// For non-void result types, the overloaded the comma operator will always returns its left argument.
// For void types, the overloaded comma operator is *not* used. Instead, a dummy object is returned.
struct Regularize {};
template<typename T>
T&& operator,(T&& t, Regularize) {
  return std::forward<T>(t);
}

template<typename Result, typename... Args, typename Func>
void Invoke(Func&& func, PackedArguments<Result, Args...>& args) requires (std::is_invocable_r_v<Result, Func, Args...>)
{
  constexpr auto NumArgs = sizeof...(Args);
  static_assert(NumArgs <= 19 || NumArgs == 24);

  std::conditional_t<std::is_void_v<Result>, Regularize, Result> rv;
  if constexpr (NumArgs == 0) {
    rv = (func(), Regularize {});
  } else if constexpr (NumArgs == 1) {
    rv = (func(args.a0), Regularize {});
  } else if constexpr (NumArgs == 2) {
    rv = (func(args.a0, args.a1), Regularize {});
  } else if constexpr (NumArgs == 3) {
    rv = (func(args.a0, args.a1, args.a2), Regularize {});
  } else if constexpr (NumArgs == 4) {
    rv = (func(args.a0, args.a1, args.a2, args.a3), Regularize {});
  } else if constexpr (NumArgs == 5) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4), Regularize {});
  } else if constexpr (NumArgs == 6) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5), Regularize {});
  } else if constexpr (NumArgs == 7) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6), Regularize {});
  } else if constexpr (NumArgs == 8) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7), Regularize {});
  } else if constexpr (NumArgs == 9) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8), Regularize {});
  } else if constexpr (NumArgs == 10) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9), Regularize {});
  } else if constexpr (NumArgs == 11) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10), Regularize {});
  } else if constexpr (NumArgs == 12) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11), Regularize {});
  } else if constexpr (NumArgs == 13) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12),
          Regularize {});
  } else if constexpr (NumArgs == 14) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12, args.a13),
          Regularize {});
  } else if constexpr (NumArgs == 15) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14),
          Regularize {});
  } else if constexpr (NumArgs == 16) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14, args.a15),
          Regularize {});
  } else if constexpr (NumArgs == 17) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14, args.a15, args.a16),
          Regularize {});
  } else if constexpr (NumArgs == 18) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14, args.a15, args.a16, args.a17),
          Regularize {});
  } else if constexpr (NumArgs == 19) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14, args.a15, args.a16, args.a17, args.a18),
          Regularize {});
  } else if constexpr (NumArgs == 24) {
    rv = (func(args.a0, args.a1, args.a2, args.a3, args.a4, args.a5, args.a6, args.a7, args.a8, args.a9, args.a10, args.a11, args.a12,
               args.a13, args.a14, args.a15, args.a16, args.a17, args.a18, args.a19, args.a20, args.a21, args.a22, args.a23),
          Regularize {});
  }

  if constexpr (!std::is_void_v<Result>) {
    args.rv = rv;
  }
}


================================================
FILE: ThunkLibs/include/common/X11Manager.h
================================================
#include "Host.h"

#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <dlfcn.h>
#include <mutex>
#include <unordered_map>

#include <X11/Xlib.h>
#include <xcb/xcb.h>

#ifdef IS_32BIT_THUNK
using guest_long = int32_t;
using guest_size_t = int32_t;
#else
using guest_long = long;
using guest_size_t = size_t;
#endif

/**
 * Guest X11 displays and xcb connections can't be used by the host, so
 * instead an intermediary object is created and mapped to the original
 * guest display/connection.
 */
struct X11Manager {
  std::mutex mutex;

  // Maps guest connection to intermediary host connection
  std::unordered_map<xcb_connection_t*, xcb_connection_t*> connections;

  xcb_connection_t* GuestToHostConnection(xcb_connection_t* GuestConnection) {
    std::unique_lock lock(mutex);
    auto [it, inserted] = connections.emplace(GuestConnection, nullptr);
    if (inserted) {
      // NOTE: There's no easy way to query the display name from the guest, so just connect to the default display.
      static void* libxcb = dlopen("libxcb.so.1", RTLD_LAZY);
      static auto ptr_xcb_connect = (decltype(&xcb_connect))dlsym(libxcb, "xcb_connect");
      static auto ptr_xcb_connection_has_error = (decltype(&xcb_connection_has_error))dlsym(libxcb, "xcb_connection_has_error");
      it->second = ptr_xcb_connect(nullptr, nullptr);
      if (ptr_xcb_connection_has_error(it->second)) {
        fprintf(stderr, "ERROR: Could not open xcb connection\n");
        std::abort();
      }
    }
    return it->second;
  }

  // Maps guest display to intermediary host display
  std::unordered_map<_XDisplay*, _XDisplay*> displays;

  _XDisplay* GuestToHostDisplay(_XDisplay* GuestDisplay) {
    // Flush event queue to make effects of the guest-side connection visible
    GuestXSync(GuestDisplay, 0);

    std::unique_lock lock(mutex);
    auto [it, inserted] = displays.emplace(GuestDisplay, nullptr);
    if (inserted) {
      auto host_display = HostXOpenDisplay(GuestXDisplayString(GuestDisplay));
      fprintf(stderr, "Opening host-side X11 display: %p -> %p\n", GuestDisplay, host_display);
      if (!host_display) {
        fprintf(stderr, "ERROR: Could not open X display\n");
        std::abort();
      } else {
        it->second = host_display;
      }
    }
    return it->second;
  }

  guest_layout<_XDisplay*> HostToGuestDisplay(const _XDisplay* from) {
    if (from == nullptr) {
      return {.data = 0};
    }

    std::unique_lock lock(mutex);
    for (auto& [guest, host] : displays) {
      if (host == from) {
        guest_layout<_XDisplay*> ret;
        ret.data = reinterpret_cast<uintptr_t>(guest);
        return ret;
      }
    }

    fprintf(stderr, "ERROR: Could not map host display %p back to guest\n", from);
    std::abort();
  }

  static void* GetLibX11() {
    static void* libx11 = dlopen("libX11.so.6", RTLD_LAZY);
    return libx11;
  }

  static int HostXFree(void* Ptr) {
    static auto func = reinterpret_cast<decltype(&XFree)>(dlsym(GetLibX11(), "XFree"));
    return func(Ptr);
  }

  static int HostXFlush(Display* Dis) {
    static auto func = reinterpret_cast<decltype(&XFlush)>(dlsym(GetLibX11(), "XFlush"));
    return func(Dis);
  }

  static Display* HostXOpenDisplay(const char* Name) {
    static auto func = reinterpret_cast<decltype(&XOpenDisplay)>(dlsym(GetLibX11(), "XOpenDisplay"));
    return func(Name);
  }

  static XVisualInfo* HostXGetVisualInfo(Display* a, long b, XVisualInfo* c, int* d) {
    static auto func = reinterpret_cast<decltype(&XGetVisualInfo)>(dlsym(GetLibX11(), "XGetVisualInfo"));
    return func(a, b, c, d);
  }

  // NOTE: Struct pointers are replaced by void* to avoid involving data layout conversion here.
  int (*GuestXSync)(void*, int) = nullptr;
  void* (*GuestXGetVisualInfo)(void*, guest_long, void*, int*) = nullptr;

  // XDisplayString internally just reads data from _XDisplay's internal struct definition.
  // This breaks when data layout is different, so allow reading from a guest context instead.
  char* (*GuestXDisplayString)(void*) = nullptr;
};


================================================
FILE: ThunkLibs/libEGL/libEGL_Guest.cpp
================================================
/*
$info$
tags: thunklibs|EGL
desc: Depends on glXGetProcAddress thunk
$end_info$
*/

#include <GL/glx.h>
#include <EGL/egl.h>

#include <stdio.h>
#include <cstring>

#include "common/Guest.h"

#include "thunkgen_guest_libEGL.inl"

typedef void voidFunc();


extern "C" {
voidFunc* eglGetProcAddress(const char* procname) {
  // TODO: Fix this HACK
  return glXGetProcAddress((const GLubyte*)procname);
}
}

LOAD_LIB(libEGL)


================================================
FILE: ThunkLibs/libEGL/libEGL_Host.cpp
================================================
/*
$info$
tags: thunklibs|EGL
$end_info$
*/

#include <cstdio>
#include <dlfcn.h>

#include <EGL/egl.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "thunkgen_host_libEGL.inl"

EXPORTS(libEGL)


================================================
FILE: ThunkLibs/libEGL/libEGL_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <EGL/egl.h>

template<auto>
struct fex_gen_config {
  unsigned version = 1;
};

// Function, parameter index, parameter type [optional]
template<auto, int, typename = void>
struct fex_gen_param {};

template<>
struct fex_gen_config<eglBindAPI> {};
template<>
struct fex_gen_config<eglChooseConfig> {};
template<>
struct fex_gen_config<eglDestroyContext> {};
template<>
struct fex_gen_config<eglDestroySurface> {};
template<>
struct fex_gen_config<eglInitialize> {};
template<>
struct fex_gen_config<eglMakeCurrent> {};
template<>
struct fex_gen_config<eglQuerySurface> {};
template<>
struct fex_gen_config<eglSurfaceAttrib> {};
template<>
struct fex_gen_config<eglSwapBuffers> {};
template<>
struct fex_gen_config<eglTerminate> {};
template<>
struct fex_gen_config<eglGetError> {};
template<>
struct fex_gen_config<eglCreateContext> {};
template<>
struct fex_gen_config<eglCreateWindowSurface> {};
template<>
struct fex_gen_config<eglGetCurrentContext> {};
template<>
struct fex_gen_config<eglGetCurrentDisplay> {};
template<>
struct fex_gen_config<eglGetCurrentSurface> {};

// EGLNativeDisplayType is a pointer to opaque data (wl_display/(X)Display/...)
template<>
struct fex_gen_config<eglGetDisplay> {};
template<>
struct fex_gen_param<eglGetDisplay, 0, EGLNativeDisplayType> : fexgen::assume_compatible_data_layout {};


================================================
FILE: ThunkLibs/libGL/glcorearb.h
================================================
#ifndef __gl_glcorearb_h_
#define __gl_glcorearb_h_ 1

#ifdef __cplusplus
extern "C" {
#endif

/*
** Copyright (c) 2013-2018 The Khronos Group Inc.
**
** Permission is hereby granted, free of charge, to any person obtaining a
** copy of this software and/or associated documentation files (the
** "Materials"), to deal in the Materials without restriction, including
** without limitation the rights to use, copy, modify, merge, publish,
** distribute, sublicense, and/or sell copies of the Materials, and to
** permit persons to whom the Materials are furnished to do so, subject to
** the following conditions:
**
** The above copyright notice and this permission notice shall be included
** in all copies or substantial portions of the Materials.
**
** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
*/
/*
** This header is generated from the Khronos OpenGL / OpenGL ES XML
** API Registry. The current version of the Registry, generator scripts
** used to make the header, and the header can be found at
**   https://github.com/KhronosGroup/OpenGL-Registry
*/

#if defined(_WIN32) && !defined(APIENTRY) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__)
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN 1
#endif
#include <windows.h>
#endif

#ifndef APIENTRY
#define APIENTRY
#endif
#ifndef APIENTRYP
#define APIENTRYP APIENTRY*
#endif
#ifndef GLAPI
#define GLAPI extern
#endif

/* glcorearb.h is for use with OpenGL core profile implementations.
** It should should be placed in the same directory as gl.h and
** included as <GL/glcorearb.h>.
**
** glcorearb.h includes only APIs in the latest OpenGL core profile
** implementation together with APIs in newer ARB extensions which
** can be supported by the core profile. It does not, and never will
** include functionality removed from the core profile, such as
** fixed-function vertex and fragment processing.
**
** Do not #include both <GL/glcorearb.h> and either of <GL/gl.h> or
** <GL/glext.h> in the same source file.
*/

/* Generated C header for:
 * API: gl
 * Profile: core
 * Versions considered: .*
 * Versions emitted: .*
 * Default extensions included: glcore
 * Additional extensions included: _nomatch_^
 * Extensions removed: _nomatch_^
 */

#ifndef GL_VERSION_1_0
#define GL_VERSION_1_0 1
typedef void GLvoid;
typedef unsigned int GLenum;
#include <KHR/khrplatform.h>
typedef khronos_float_t GLfloat;
typedef int GLint;
typedef int GLsizei;
typedef unsigned int GLbitfield;
typedef double GLdouble;
typedef unsigned int GLuint;
typedef unsigned char GLboolean;
typedef khronos_uint8_t GLubyte;
#define GL_DEPTH_BUFFER_BIT 0x00000100
#define GL_STENCIL_BUFFER_BIT 0x00000400
#define GL_COLOR_BUFFER_BIT 0x00004000
#define GL_FALSE 0
#define GL_TRUE 1
#define GL_POINTS 0x0000
#define GL_LINES 0x0001
#define GL_LINE_LOOP 0x0002
#define GL_LINE_STRIP 0x0003
#define GL_TRIANGLES 0x0004
#define GL_TRIANGLE_STRIP 0x0005
#define GL_TRIANGLE_FAN 0x0006
#define GL_QUADS 0x0007
#define GL_NEVER 0x0200
#define GL_LESS 0x0201
#define GL_EQUAL 0x0202
#define GL_LEQUAL 0x0203
#define GL_GREATER 0x0204
#define GL_NOTEQUAL 0x0205
#define GL_GEQUAL 0x0206
#define GL_ALWAYS 0x0207
#define GL_ZERO 0
#define GL_ONE 1
#define GL_SRC_COLOR 0x0300
#define GL_ONE_MINUS_SRC_COLOR 0x0301
#define GL_SRC_ALPHA 0x0302
#define GL_ONE_MINUS_SRC_ALPHA 0x0303
#define GL_DST_ALPHA 0x0304
#define GL_ONE_MINUS_DST_ALPHA 0x0305
#define GL_DST_COLOR 0x0306
#define GL_ONE_MINUS_DST_COLOR 0x0307
#define GL_SRC_ALPHA_SATURATE 0x0308
#define GL_NONE 0
#define GL_FRONT_LEFT 0x0400
#define GL_FRONT_RIGHT 0x0401
#define GL_BACK_LEFT 0x0402
#define GL_BACK_RIGHT 0x0403
#define GL_FRONT 0x0404
#define GL_BACK 0x0405
#define GL_LEFT 0x0406
#define GL_RIGHT 0x0407
#define GL_FRONT_AND_BACK 0x0408
#define GL_NO_ERROR 0
#define GL_INVALID_ENUM 0x0500
#define GL_INVALID_VALUE 0x0501
#define GL_INVALID_OPERATION 0x0502
#define GL_OUT_OF_MEMORY 0x0505
#define GL_CW 0x0900
#define GL_CCW 0x0901
#define GL_POINT_SIZE 0x0B11
#define GL_POINT_SIZE_RANGE 0x0B12
#define GL_POINT_SIZE_GRANULARITY 0x0B13
#define GL_LINE_SMOOTH 0x0B20
#define GL_LINE_WIDTH 0x0B21
#define GL_LINE_WIDTH_RANGE 0x0B22
#define GL_LINE_WIDTH_GRANULARITY 0x0B23
#define GL_POLYGON_MODE 0x0B40
#define GL_POLYGON_SMOOTH 0x0B41
#define GL_CULL_FACE 0x0B44
#define GL_CULL_FACE_MODE 0x0B45
#define GL_FRONT_FACE 0x0B46
#define GL_DEPTH_RANGE 0x0B70
#define GL_DEPTH_TEST 0x0B71
#define GL_DEPTH_WRITEMASK 0x0B72
#define GL_DEPTH_CLEAR_VALUE 0x0B73
#define GL_DEPTH_FUNC 0x0B74
#define GL_STENCIL_TEST 0x0B90
#define GL_STENCIL_CLEAR_VALUE 0x0B91
#define GL_STENCIL_FUNC 0x0B92
#define GL_STENCIL_VALUE_MASK 0x0B93
#define GL_STENCIL_FAIL 0x0B94
#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95
#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96
#define GL_STENCIL_REF 0x0B97
#define GL_STENCIL_WRITEMASK 0x0B98
#define GL_VIEWPORT 0x0BA2
#define GL_DITHER 0x0BD0
#define GL_BLEND_DST 0x0BE0
#define GL_BLEND_SRC 0x0BE1
#define GL_BLEND 0x0BE2
#define GL_LOGIC_OP_MODE 0x0BF0
#define GL_DRAW_BUFFER 0x0C01
#define GL_READ_BUFFER 0x0C02
#define GL_SCISSOR_BOX 0x0C10
#define GL_SCISSOR_TEST 0x0C11
#define GL_COLOR_CLEAR_VALUE 0x0C22
#define GL_COLOR_WRITEMASK 0x0C23
#define GL_DOUBLEBUFFER 0x0C32
#define GL_STEREO 0x0C33
#define GL_LINE_SMOOTH_HINT 0x0C52
#define GL_POLYGON_SMOOTH_HINT 0x0C53
#define GL_UNPACK_SWAP_BYTES 0x0CF0
#define GL_UNPACK_LSB_FIRST 0x0CF1
#define GL_UNPACK_ROW_LENGTH 0x0CF2
#define GL_UNPACK_SKIP_ROWS 0x0CF3
#define GL_UNPACK_SKIP_PIXELS 0x0CF4
#define GL_UNPACK_ALIGNMENT 0x0CF5
#define GL_PACK_SWAP_BYTES 0x0D00
#define GL_PACK_LSB_FIRST 0x0D01
#define GL_PACK_ROW_LENGTH 0x0D02
#define GL_PACK_SKIP_ROWS 0x0D03
#define GL_PACK_SKIP_PIXELS 0x0D04
#define GL_PACK_ALIGNMENT 0x0D05
#define GL_MAX_TEXTURE_SIZE 0x0D33
#define GL_MAX_VIEWPORT_DIMS 0x0D3A
#define GL_SUBPIXEL_BITS 0x0D50
#define GL_TEXTURE_1D 0x0DE0
#define GL_TEXTURE_2D 0x0DE1
#define GL_TEXTURE_WIDTH 0x1000
#define GL_TEXTURE_HEIGHT 0x1001
#define GL_TEXTURE_BORDER_COLOR 0x1004
#define GL_DONT_CARE 0x1100
#define GL_FASTEST 0x1101
#define GL_NICEST 0x1102
#define GL_BYTE 0x1400
#define GL_UNSIGNED_BYTE 0x1401
#define GL_SHORT 0x1402
#define GL_UNSIGNED_SHORT 0x1403
#define GL_INT 0x1404
#define GL_UNSIGNED_INT 0x1405
#define GL_FLOAT 0x1406
#define GL_STACK_OVERFLOW 0x0503
#define GL_STACK_UNDERFLOW 0x0504
#define GL_CLEAR 0x1500
#define GL_AND 0x1501
#define GL_AND_REVERSE 0x1502
#define GL_COPY 0x1503
#define GL_AND_INVERTED 0x1504
#define GL_NOOP 0x1505
#define GL_XOR 0x1506
#define GL_OR 0x1507
#define GL_NOR 0x1508
#define GL_EQUIV 0x1509
#define GL_INVERT 0x150A
#define GL_OR_REVERSE 0x150B
#define GL_COPY_INVERTED 0x150C
#define GL_OR_INVERTED 0x150D
#define GL_NAND 0x150E
#define GL_SET 0x150F
#define GL_TEXTURE 0x1702
#define GL_COLOR 0x1800
#define GL_DEPTH 0x1801
#define GL_STENCIL 0x1802
#define GL_STENCIL_INDEX 0x1901
#define GL_DEPTH_COMPONENT 0x1902
#define GL_RED 0x1903
#define GL_GREEN 0x1904
#define GL_BLUE 0x1905
#define GL_ALPHA 0x1906
#define GL_RGB 0x1907
#define GL_RGBA 0x1908
#define GL_POINT 0x1B00
#define GL_LINE 0x1B01
#define GL_FILL 0x1B02
#define GL_KEEP 0x1E00
#define GL_REPLACE 0x1E01
#define GL_INCR 0x1E02
#define GL_DECR 0x1E03
#define GL_VENDOR 0x1F00
#define GL_RENDERER 0x1F01
#define GL_VERSION 0x1F02
#define GL_EXTENSIONS 0x1F03
#define GL_NEAREST 0x2600
#define GL_LINEAR 0x2601
#define GL_NEAREST_MIPMAP_NEAREST 0x2700
#define GL_LINEAR_MIPMAP_NEAREST 0x2701
#define GL_NEAREST_MIPMAP_LINEAR 0x2702
#define GL_LINEAR_MIPMAP_LINEAR 0x2703
#define GL_TEXTURE_MAG_FILTER 0x2800
#define GL_TEXTURE_MIN_FILTER 0x2801
#define GL_TEXTURE_WRAP_S 0x2802
#define GL_TEXTURE_WRAP_T 0x2803
#define GL_REPEAT 0x2901
typedef void(APIENTRYP PFNGLCULLFACEPROC)(GLenum mode);
typedef void(APIENTRYP PFNGLFRONTFACEPROC)(GLenum mode);
typedef void(APIENTRYP PFNGLHINTPROC)(GLenum target, GLenum mode);
typedef void(APIENTRYP PFNGLLINEWIDTHPROC)(GLfloat width);
typedef void(APIENTRYP PFNGLPOINTSIZEPROC)(GLfloat size);
typedef void(APIENTRYP PFNGLPOLYGONMODEPROC)(GLenum face, GLenum mode);
typedef void(APIENTRYP PFNGLSCISSORPROC)(GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXPARAMETERFPROC)(GLenum target, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLTEXPARAMETERFVPROC)(GLenum target, GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLTEXPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLTEXPARAMETERIVPROC)(GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXIMAGE1DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format,
                                            GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXIMAGE2DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border,
                                            GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLDRAWBUFFERPROC)(GLenum buf);
typedef void(APIENTRYP PFNGLCLEARPROC)(GLbitfield mask);
typedef void(APIENTRYP PFNGLCLEARCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
typedef void(APIENTRYP PFNGLCLEARSTENCILPROC)(GLint s);
typedef void(APIENTRYP PFNGLCLEARDEPTHPROC)(GLdouble depth);
typedef void(APIENTRYP PFNGLSTENCILMASKPROC)(GLuint mask);
typedef void(APIENTRYP PFNGLCOLORMASKPROC)(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
typedef void(APIENTRYP PFNGLDEPTHMASKPROC)(GLboolean flag);
typedef void(APIENTRYP PFNGLDISABLEPROC)(GLenum cap);
typedef void(APIENTRYP PFNGLENABLEPROC)(GLenum cap);
typedef void(APIENTRYP PFNGLFINISHPROC)(void);
typedef void(APIENTRYP PFNGLFLUSHPROC)(void);
typedef void(APIENTRYP PFNGLBLENDFUNCPROC)(GLenum sfactor, GLenum dfactor);
typedef void(APIENTRYP PFNGLLOGICOPPROC)(GLenum opcode);
typedef void(APIENTRYP PFNGLSTENCILFUNCPROC)(GLenum func, GLint ref, GLuint mask);
typedef void(APIENTRYP PFNGLSTENCILOPPROC)(GLenum fail, GLenum zfail, GLenum zpass);
typedef void(APIENTRYP PFNGLDEPTHFUNCPROC)(GLenum func);
typedef void(APIENTRYP PFNGLPIXELSTOREFPROC)(GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLPIXELSTOREIPROC)(GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLREADBUFFERPROC)(GLenum src);
typedef void(APIENTRYP PFNGLREADPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void* pixels);
typedef void(APIENTRYP PFNGLGETBOOLEANVPROC)(GLenum pname, GLboolean* data);
typedef void(APIENTRYP PFNGLGETDOUBLEVPROC)(GLenum pname, GLdouble* data);
typedef GLenum(APIENTRYP PFNGLGETERRORPROC)(void);
typedef void(APIENTRYP PFNGLGETFLOATVPROC)(GLenum pname, GLfloat* data);
typedef void(APIENTRYP PFNGLGETINTEGERVPROC)(GLenum pname, GLint* data);
typedef const GLubyte*(APIENTRYP PFNGLGETSTRINGPROC)(GLenum name);
typedef void(APIENTRYP PFNGLGETTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
typedef void(APIENTRYP PFNGLGETTEXPARAMETERFVPROC)(GLenum target, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXLEVELPARAMETERFVPROC)(GLenum target, GLint level, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXLEVELPARAMETERIVPROC)(GLenum target, GLint level, GLenum pname, GLint* params);
typedef GLboolean(APIENTRYP PFNGLISENABLEDPROC)(GLenum cap);
typedef void(APIENTRYP PFNGLDEPTHRANGEPROC)(GLdouble n, GLdouble f);
typedef void(APIENTRYP PFNGLVIEWPORTPROC)(GLint x, GLint y, GLsizei width, GLsizei height);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glCullFace(GLenum mode);
GLAPI void APIENTRY glFrontFace(GLenum mode);
GLAPI void APIENTRY glHint(GLenum target, GLenum mode);
GLAPI void APIENTRY glLineWidth(GLfloat width);
GLAPI void APIENTRY glPointSize(GLfloat size);
GLAPI void APIENTRY glPolygonMode(GLenum face, GLenum mode);
GLAPI void APIENTRY glScissor(GLint x, GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glTexParameterf(GLenum target, GLenum pname, GLfloat param);
GLAPI void APIENTRY glTexParameterfv(GLenum target, GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glTexParameteri(GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glTexParameteriv(GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glTexImage1D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border, GLenum format, GLenum type,
                                 const void* pixels);
GLAPI void APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border,
                                 GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glDrawBuffer(GLenum buf);
GLAPI void APIENTRY glClear(GLbitfield mask);
GLAPI void APIENTRY glClearColor(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
GLAPI void APIENTRY glClearStencil(GLint s);
GLAPI void APIENTRY glClearDepth(GLdouble depth);
GLAPI void APIENTRY glStencilMask(GLuint mask);
GLAPI void APIENTRY glColorMask(GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
GLAPI void APIENTRY glDepthMask(GLboolean flag);
GLAPI void APIENTRY glDisable(GLenum cap);
GLAPI void APIENTRY glEnable(GLenum cap);
GLAPI void APIENTRY glFinish(void);
GLAPI void APIENTRY glFlush(void);
GLAPI void APIENTRY glBlendFunc(GLenum sfactor, GLenum dfactor);
GLAPI void APIENTRY glLogicOp(GLenum opcode);
GLAPI void APIENTRY glStencilFunc(GLenum func, GLint ref, GLuint mask);
GLAPI void APIENTRY glStencilOp(GLenum fail, GLenum zfail, GLenum zpass);
GLAPI void APIENTRY glDepthFunc(GLenum func);
GLAPI void APIENTRY glPixelStoref(GLenum pname, GLfloat param);
GLAPI void APIENTRY glPixelStorei(GLenum pname, GLint param);
GLAPI void APIENTRY glReadBuffer(GLenum src);
GLAPI void APIENTRY glReadPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void* pixels);
GLAPI void APIENTRY glGetBooleanv(GLenum pname, GLboolean* data);
GLAPI void APIENTRY glGetDoublev(GLenum pname, GLdouble* data);
GLAPI GLenum APIENTRY glGetError(void);
GLAPI void APIENTRY glGetFloatv(GLenum pname, GLfloat* data);
GLAPI void APIENTRY glGetIntegerv(GLenum pname, GLint* data);
GLAPI const GLubyte* APIENTRY glGetString(GLenum name);
GLAPI void APIENTRY glGetTexImage(GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
GLAPI void APIENTRY glGetTexParameterfv(GLenum target, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTexParameteriv(GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTexLevelParameterfv(GLenum target, GLint level, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTexLevelParameteriv(GLenum target, GLint level, GLenum pname, GLint* params);
GLAPI GLboolean APIENTRY glIsEnabled(GLenum cap);
GLAPI void APIENTRY glDepthRange(GLdouble n, GLdouble f);
GLAPI void APIENTRY glViewport(GLint x, GLint y, GLsizei width, GLsizei height);
#endif
#endif /* GL_VERSION_1_0 */

#ifndef GL_VERSION_1_1
#define GL_VERSION_1_1 1
typedef khronos_float_t GLclampf;
typedef double GLclampd;
#define GL_COLOR_LOGIC_OP 0x0BF2
#define GL_POLYGON_OFFSET_UNITS 0x2A00
#define GL_POLYGON_OFFSET_POINT 0x2A01
#define GL_POLYGON_OFFSET_LINE 0x2A02
#define GL_POLYGON_OFFSET_FILL 0x8037
#define GL_POLYGON_OFFSET_FACTOR 0x8038
#define GL_TEXTURE_BINDING_1D 0x8068
#define GL_TEXTURE_BINDING_2D 0x8069
#define GL_TEXTURE_INTERNAL_FORMAT 0x1003
#define GL_TEXTURE_RED_SIZE 0x805C
#define GL_TEXTURE_GREEN_SIZE 0x805D
#define GL_TEXTURE_BLUE_SIZE 0x805E
#define GL_TEXTURE_ALPHA_SIZE 0x805F
#define GL_DOUBLE 0x140A
#define GL_PROXY_TEXTURE_1D 0x8063
#define GL_PROXY_TEXTURE_2D 0x8064
#define GL_R3_G3_B2 0x2A10
#define GL_RGB4 0x804F
#define GL_RGB5 0x8050
#define GL_RGB8 0x8051
#define GL_RGB10 0x8052
#define GL_RGB12 0x8053
#define GL_RGB16 0x8054
#define GL_RGBA2 0x8055
#define GL_RGBA4 0x8056
#define GL_RGB5_A1 0x8057
#define GL_RGBA8 0x8058
#define GL_RGB10_A2 0x8059
#define GL_RGBA12 0x805A
#define GL_RGBA16 0x805B
#define GL_VERTEX_ARRAY 0x8074
typedef void(APIENTRYP PFNGLDRAWARRAYSPROC)(GLenum mode, GLint first, GLsizei count);
typedef void(APIENTRYP PFNGLDRAWELEMENTSPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices);
typedef void(APIENTRYP PFNGLGETPOINTERVPROC)(GLenum pname, void** params);
typedef void(APIENTRYP PFNGLPOLYGONOFFSETPROC)(GLfloat factor, GLfloat units);
typedef void(APIENTRYP PFNGLCOPYTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
typedef void(APIENTRYP PFNGLCOPYTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width,
                                                GLsizei height, GLint border);
typedef void(APIENTRYP PFNGLCOPYTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
typedef void(APIENTRYP PFNGLCOPYTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y,
                                                   GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type,
                                               const void* pixels);
typedef void(APIENTRYP PFNGLTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                               GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLBINDTEXTUREPROC)(GLenum target, GLuint texture);
typedef void(APIENTRYP PFNGLDELETETEXTURESPROC)(GLsizei n, const GLuint* textures);
typedef void(APIENTRYP PFNGLGENTEXTURESPROC)(GLsizei n, GLuint* textures);
typedef GLboolean(APIENTRYP PFNGLISTEXTUREPROC)(GLuint texture);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count);
GLAPI void APIENTRY glDrawElements(GLenum mode, GLsizei count, GLenum type, const void* indices);
GLAPI void APIENTRY glGetPointerv(GLenum pname, void** params);
GLAPI void APIENTRY glPolygonOffset(GLfloat factor, GLfloat units);
GLAPI void APIENTRY glCopyTexImage1D(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLint border);
GLAPI void APIENTRY glCopyTexImage2D(GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height,
                                     GLint border);
GLAPI void APIENTRY glCopyTexSubImage1D(GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
GLAPI void APIENTRY glCopyTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glTexSubImage1D(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format,
                                    GLenum type, const void* pixels);
GLAPI void APIENTRY glBindTexture(GLenum target, GLuint texture);
GLAPI void APIENTRY glDeleteTextures(GLsizei n, const GLuint* textures);
GLAPI void APIENTRY glGenTextures(GLsizei n, GLuint* textures);
GLAPI GLboolean APIENTRY glIsTexture(GLuint texture);
#endif
#endif /* GL_VERSION_1_1 */

#ifndef GL_VERSION_1_2
#define GL_VERSION_1_2 1
#define GL_UNSIGNED_BYTE_3_3_2 0x8032
#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033
#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034
#define GL_UNSIGNED_INT_8_8_8_8 0x8035
#define GL_UNSIGNED_INT_10_10_10_2 0x8036
#define GL_TEXTURE_BINDING_3D 0x806A
#define GL_PACK_SKIP_IMAGES 0x806B
#define GL_PACK_IMAGE_HEIGHT 0x806C
#define GL_UNPACK_SKIP_IMAGES 0x806D
#define GL_UNPACK_IMAGE_HEIGHT 0x806E
#define GL_TEXTURE_3D 0x806F
#define GL_PROXY_TEXTURE_3D 0x8070
#define GL_TEXTURE_DEPTH 0x8071
#define GL_TEXTURE_WRAP_R 0x8072
#define GL_MAX_3D_TEXTURE_SIZE 0x8073
#define GL_UNSIGNED_BYTE_2_3_3_REV 0x8362
#define GL_UNSIGNED_SHORT_5_6_5 0x8363
#define GL_UNSIGNED_SHORT_5_6_5_REV 0x8364
#define GL_UNSIGNED_SHORT_4_4_4_4_REV 0x8365
#define GL_UNSIGNED_SHORT_1_5_5_5_REV 0x8366
#define GL_UNSIGNED_INT_8_8_8_8_REV 0x8367
#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368
#define GL_BGR 0x80E0
#define GL_BGRA 0x80E1
#define GL_MAX_ELEMENTS_VERTICES 0x80E8
#define GL_MAX_ELEMENTS_INDICES 0x80E9
#define GL_CLAMP_TO_EDGE 0x812F
#define GL_TEXTURE_MIN_LOD 0x813A
#define GL_TEXTURE_MAX_LOD 0x813B
#define GL_TEXTURE_BASE_LEVEL 0x813C
#define GL_TEXTURE_MAX_LEVEL 0x813D
#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12
#define GL_SMOOTH_POINT_SIZE_GRANULARITY 0x0B13
#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22
#define GL_SMOOTH_LINE_WIDTH_GRANULARITY 0x0B23
#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E
typedef void(APIENTRYP PFNGLDRAWRANGEELEMENTSPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void* indices);
typedef void(APIENTRYP PFNGLTEXIMAGE3DPROC)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth,
                                            GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                               GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOPYTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x,
                                                   GLint y, GLsizei width, GLsizei height);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawRangeElements(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void* indices);
GLAPI void APIENTRY glTexImage3D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth,
                                 GLint border, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTexSubImage3D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height,
                                    GLsizei depth, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCopyTexSubImage3D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y,
                                        GLsizei width, GLsizei height);
#endif
#endif /* GL_VERSION_1_2 */

#ifndef GL_VERSION_1_3
#define GL_VERSION_1_3 1
#define GL_TEXTURE0 0x84C0
#define GL_TEXTURE1 0x84C1
#define GL_TEXTURE2 0x84C2
#define GL_TEXTURE3 0x84C3
#define GL_TEXTURE4 0x84C4
#define GL_TEXTURE5 0x84C5
#define GL_TEXTURE6 0x84C6
#define GL_TEXTURE7 0x84C7
#define GL_TEXTURE8 0x84C8
#define GL_TEXTURE9 0x84C9
#define GL_TEXTURE10 0x84CA
#define GL_TEXTURE11 0x84CB
#define GL_TEXTURE12 0x84CC
#define GL_TEXTURE13 0x84CD
#define GL_TEXTURE14 0x84CE
#define GL_TEXTURE15 0x84CF
#define GL_TEXTURE16 0x84D0
#define GL_TEXTURE17 0x84D1
#define GL_TEXTURE18 0x84D2
#define GL_TEXTURE19 0x84D3
#define GL_TEXTURE20 0x84D4
#define GL_TEXTURE21 0x84D5
#define GL_TEXTURE22 0x84D6
#define GL_TEXTURE23 0x84D7
#define GL_TEXTURE24 0x84D8
#define GL_TEXTURE25 0x84D9
#define GL_TEXTURE26 0x84DA
#define GL_TEXTURE27 0x84DB
#define GL_TEXTURE28 0x84DC
#define GL_TEXTURE29 0x84DD
#define GL_TEXTURE30 0x84DE
#define GL_TEXTURE31 0x84DF
#define GL_ACTIVE_TEXTURE 0x84E0
#define GL_MULTISAMPLE 0x809D
#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E
#define GL_SAMPLE_ALPHA_TO_ONE 0x809F
#define GL_SAMPLE_COVERAGE 0x80A0
#define GL_SAMPLE_BUFFERS 0x80A8
#define GL_SAMPLES 0x80A9
#define GL_SAMPLE_COVERAGE_VALUE 0x80AA
#define GL_SAMPLE_COVERAGE_INVERT 0x80AB
#define GL_TEXTURE_CUBE_MAP 0x8513
#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514
#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515
#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516
#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517
#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518
#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519
#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A
#define GL_PROXY_TEXTURE_CUBE_MAP 0x851B
#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C
#define GL_COMPRESSED_RGB 0x84ED
#define GL_COMPRESSED_RGBA 0x84EE
#define GL_TEXTURE_COMPRESSION_HINT 0x84EF
#define GL_TEXTURE_COMPRESSED_IMAGE_SIZE 0x86A0
#define GL_TEXTURE_COMPRESSED 0x86A1
#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3
#define GL_CLAMP_TO_BORDER 0x812D
typedef void(APIENTRYP PFNGLACTIVETEXTUREPROC)(GLenum texture);
typedef void(APIENTRYP PFNGLSAMPLECOVERAGEPROC)(GLfloat value, GLboolean invert);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height,
                                                      GLsizei depth, GLint border, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXIMAGE2DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height,
                                                      GLint border, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXIMAGE1DPROC)(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border,
                                                      GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                         GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width,
                                                         GLsizei height, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE1DPROC)(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format,
                                                         GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint level, void* img);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glActiveTexture(GLenum texture);
GLAPI void APIENTRY glSampleCoverage(GLfloat value, GLboolean invert);
GLAPI void APIENTRY glCompressedTexImage3D(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth,
                                           GLint border, GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTexImage2D(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border,
                                           GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTexImage1D(GLenum target, GLint level, GLenum internalformat, GLsizei width, GLint border,
                                           GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTexSubImage3D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                              GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTexSubImage2D(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                              GLenum format, GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTexSubImage1D(GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format, GLsizei imageSize,
                                              const void* data);
GLAPI void APIENTRY glGetCompressedTexImage(GLenum target, GLint level, void* img);
#endif
#endif /* GL_VERSION_1_3 */

#ifndef GL_VERSION_1_4
#define GL_VERSION_1_4 1
#define GL_BLEND_DST_RGB 0x80C8
#define GL_BLEND_SRC_RGB 0x80C9
#define GL_BLEND_DST_ALPHA 0x80CA
#define GL_BLEND_SRC_ALPHA 0x80CB
#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128
#define GL_DEPTH_COMPONENT16 0x81A5
#define GL_DEPTH_COMPONENT24 0x81A6
#define GL_DEPTH_COMPONENT32 0x81A7
#define GL_MIRRORED_REPEAT 0x8370
#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD
#define GL_TEXTURE_LOD_BIAS 0x8501
#define GL_INCR_WRAP 0x8507
#define GL_DECR_WRAP 0x8508
#define GL_TEXTURE_DEPTH_SIZE 0x884A
#define GL_TEXTURE_COMPARE_MODE 0x884C
#define GL_TEXTURE_COMPARE_FUNC 0x884D
#define GL_BLEND_COLOR 0x8005
#define GL_BLEND_EQUATION 0x8009
#define GL_CONSTANT_COLOR 0x8001
#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002
#define GL_CONSTANT_ALPHA 0x8003
#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004
#define GL_FUNC_ADD 0x8006
#define GL_FUNC_REVERSE_SUBTRACT 0x800B
#define GL_FUNC_SUBTRACT 0x800A
#define GL_MIN 0x8007
#define GL_MAX 0x8008
typedef void(APIENTRYP PFNGLBLENDFUNCSEPARATEPROC)(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSPROC)(GLenum mode, const GLint* first, const GLsizei* count, GLsizei drawcount);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSPROC)(GLenum mode, const GLsizei* count, GLenum type, const void* const* indices, GLsizei drawcount);
typedef void(APIENTRYP PFNGLPOINTPARAMETERFPROC)(GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLPOINTPARAMETERFVPROC)(GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLPOINTPARAMETERIPROC)(GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLPOINTPARAMETERIVPROC)(GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLBLENDCOLORPROC)(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
typedef void(APIENTRYP PFNGLBLENDEQUATIONPROC)(GLenum mode);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBlendFuncSeparate(GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
GLAPI void APIENTRY glMultiDrawArrays(GLenum mode, const GLint* first, const GLsizei* count, GLsizei drawcount);
GLAPI void APIENTRY glMultiDrawElements(GLenum mode, const GLsizei* count, GLenum type, const void* const* indices, GLsizei drawcount);
GLAPI void APIENTRY glPointParameterf(GLenum pname, GLfloat param);
GLAPI void APIENTRY glPointParameterfv(GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glPointParameteri(GLenum pname, GLint param);
GLAPI void APIENTRY glPointParameteriv(GLenum pname, const GLint* params);
GLAPI void APIENTRY glBlendColor(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
GLAPI void APIENTRY glBlendEquation(GLenum mode);
#endif
#endif /* GL_VERSION_1_4 */

#ifndef GL_VERSION_1_5
#define GL_VERSION_1_5 1
typedef khronos_ssize_t GLsizeiptr;
typedef khronos_intptr_t GLintptr;
#define GL_BUFFER_SIZE 0x8764
#define GL_BUFFER_USAGE 0x8765
#define GL_QUERY_COUNTER_BITS 0x8864
#define GL_CURRENT_QUERY 0x8865
#define GL_QUERY_RESULT 0x8866
#define GL_QUERY_RESULT_AVAILABLE 0x8867
#define GL_ARRAY_BUFFER 0x8892
#define GL_ELEMENT_ARRAY_BUFFER 0x8893
#define GL_ARRAY_BUFFER_BINDING 0x8894
#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895
#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
#define GL_READ_ONLY 0x88B8
#define GL_WRITE_ONLY 0x88B9
#define GL_READ_WRITE 0x88BA
#define GL_BUFFER_ACCESS 0x88BB
#define GL_BUFFER_MAPPED 0x88BC
#define GL_BUFFER_MAP_POINTER 0x88BD
#define GL_STREAM_DRAW 0x88E0
#define GL_STREAM_READ 0x88E1
#define GL_STREAM_COPY 0x88E2
#define GL_STATIC_DRAW 0x88E4
#define GL_STATIC_READ 0x88E5
#define GL_STATIC_COPY 0x88E6
#define GL_DYNAMIC_DRAW 0x88E8
#define GL_DYNAMIC_READ 0x88E9
#define GL_DYNAMIC_COPY 0x88EA
#define GL_SAMPLES_PASSED 0x8914
#define GL_SRC1_ALPHA 0x8589
typedef void(APIENTRYP PFNGLGENQUERIESPROC)(GLsizei n, GLuint* ids);
typedef void(APIENTRYP PFNGLDELETEQUERIESPROC)(GLsizei n, const GLuint* ids);
typedef GLboolean(APIENTRYP PFNGLISQUERYPROC)(GLuint id);
typedef void(APIENTRYP PFNGLBEGINQUERYPROC)(GLenum target, GLuint id);
typedef void(APIENTRYP PFNGLENDQUERYPROC)(GLenum target);
typedef void(APIENTRYP PFNGLGETQUERYIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETQUERYOBJECTIVPROC)(GLuint id, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETQUERYOBJECTUIVPROC)(GLuint id, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLBINDBUFFERPROC)(GLenum target, GLuint buffer);
typedef void(APIENTRYP PFNGLDELETEBUFFERSPROC)(GLsizei n, const GLuint* buffers);
typedef void(APIENTRYP PFNGLGENBUFFERSPROC)(GLsizei n, GLuint* buffers);
typedef GLboolean(APIENTRYP PFNGLISBUFFERPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLBUFFERDATAPROC)(GLenum target, GLsizeiptr size, const void* data, GLenum usage);
typedef void(APIENTRYP PFNGLBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, const void* data);
typedef void(APIENTRYP PFNGLGETBUFFERSUBDATAPROC)(GLenum target, GLintptr offset, GLsizeiptr size, void* data);
typedef void*(APIENTRYP PFNGLMAPBUFFERPROC)(GLenum target, GLenum access);
typedef GLboolean(APIENTRYP PFNGLUNMAPBUFFERPROC)(GLenum target);
typedef void(APIENTRYP PFNGLGETBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETBUFFERPOINTERVPROC)(GLenum target, GLenum pname, void** params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glGenQueries(GLsizei n, GLuint* ids);
GLAPI void APIENTRY glDeleteQueries(GLsizei n, const GLuint* ids);
GLAPI GLboolean APIENTRY glIsQuery(GLuint id);
GLAPI void APIENTRY glBeginQuery(GLenum target, GLuint id);
GLAPI void APIENTRY glEndQuery(GLenum target);
GLAPI void APIENTRY glGetQueryiv(GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetQueryObjectiv(GLuint id, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetQueryObjectuiv(GLuint id, GLenum pname, GLuint* params);
GLAPI void APIENTRY glBindBuffer(GLenum target, GLuint buffer);
GLAPI void APIENTRY glDeleteBuffers(GLsizei n, const GLuint* buffers);
GLAPI void APIENTRY glGenBuffers(GLsizei n, GLuint* buffers);
GLAPI GLboolean APIENTRY glIsBuffer(GLuint buffer);
GLAPI void APIENTRY glBufferData(GLenum target, GLsizeiptr size, const void* data, GLenum usage);
GLAPI void APIENTRY glBufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, const void* data);
GLAPI void APIENTRY glGetBufferSubData(GLenum target, GLintptr offset, GLsizeiptr size, void* data);
GLAPI void* APIENTRY glMapBuffer(GLenum target, GLenum access);
GLAPI GLboolean APIENTRY glUnmapBuffer(GLenum target);
GLAPI void APIENTRY glGetBufferParameteriv(GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetBufferPointerv(GLenum target, GLenum pname, void** params);
#endif
#endif /* GL_VERSION_1_5 */

#ifndef GL_VERSION_2_0
#define GL_VERSION_2_0 1
typedef char GLchar;
typedef khronos_int16_t GLshort;
typedef khronos_int8_t GLbyte;
typedef khronos_uint16_t GLushort;
#define GL_BLEND_EQUATION_RGB 0x8009
#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622
#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623
#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624
#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625
#define GL_CURRENT_VERTEX_ATTRIB 0x8626
#define GL_VERTEX_PROGRAM_POINT_SIZE 0x8642
#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645
#define GL_STENCIL_BACK_FUNC 0x8800
#define GL_STENCIL_BACK_FAIL 0x8801
#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802
#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803
#define GL_MAX_DRAW_BUFFERS 0x8824
#define GL_DRAW_BUFFER0 0x8825
#define GL_DRAW_BUFFER1 0x8826
#define GL_DRAW_BUFFER2 0x8827
#define GL_DRAW_BUFFER3 0x8828
#define GL_DRAW_BUFFER4 0x8829
#define GL_DRAW_BUFFER5 0x882A
#define GL_DRAW_BUFFER6 0x882B
#define GL_DRAW_BUFFER7 0x882C
#define GL_DRAW_BUFFER8 0x882D
#define GL_DRAW_BUFFER9 0x882E
#define GL_DRAW_BUFFER10 0x882F
#define GL_DRAW_BUFFER11 0x8830
#define GL_DRAW_BUFFER12 0x8831
#define GL_DRAW_BUFFER13 0x8832
#define GL_DRAW_BUFFER14 0x8833
#define GL_DRAW_BUFFER15 0x8834
#define GL_BLEND_EQUATION_ALPHA 0x883D
#define GL_MAX_VERTEX_ATTRIBS 0x8869
#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872
#define GL_FRAGMENT_SHADER 0x8B30
#define GL_VERTEX_SHADER 0x8B31
#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A
#define GL_MAX_VARYING_FLOATS 0x8B4B
#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
#define GL_SHADER_TYPE 0x8B4F
#define GL_FLOAT_VEC2 0x8B50
#define GL_FLOAT_VEC3 0x8B51
#define GL_FLOAT_VEC4 0x8B52
#define GL_INT_VEC2 0x8B53
#define GL_INT_VEC3 0x8B54
#define GL_INT_VEC4 0x8B55
#define GL_BOOL 0x8B56
#define GL_BOOL_VEC2 0x8B57
#define GL_BOOL_VEC3 0x8B58
#define GL_BOOL_VEC4 0x8B59
#define GL_FLOAT_MAT2 0x8B5A
#define GL_FLOAT_MAT3 0x8B5B
#define GL_FLOAT_MAT4 0x8B5C
#define GL_SAMPLER_1D 0x8B5D
#define GL_SAMPLER_2D 0x8B5E
#define GL_SAMPLER_3D 0x8B5F
#define GL_SAMPLER_CUBE 0x8B60
#define GL_SAMPLER_1D_SHADOW 0x8B61
#define GL_SAMPLER_2D_SHADOW 0x8B62
#define GL_DELETE_STATUS 0x8B80
#define GL_COMPILE_STATUS 0x8B81
#define GL_LINK_STATUS 0x8B82
#define GL_VALIDATE_STATUS 0x8B83
#define GL_INFO_LOG_LENGTH 0x8B84
#define GL_ATTACHED_SHADERS 0x8B85
#define GL_ACTIVE_UNIFORMS 0x8B86
#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87
#define GL_SHADER_SOURCE_LENGTH 0x8B88
#define GL_ACTIVE_ATTRIBUTES 0x8B89
#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A
#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
#define GL_SHADING_LANGUAGE_VERSION 0x8B8C
#define GL_CURRENT_PROGRAM 0x8B8D
#define GL_POINT_SPRITE_COORD_ORIGIN 0x8CA0
#define GL_LOWER_LEFT 0x8CA1
#define GL_UPPER_LEFT 0x8CA2
#define GL_STENCIL_BACK_REF 0x8CA3
#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4
#define GL_STENCIL_BACK_WRITEMASK 0x8CA5
typedef void(APIENTRYP PFNGLBLENDEQUATIONSEPARATEPROC)(GLenum modeRGB, GLenum modeAlpha);
typedef void(APIENTRYP PFNGLDRAWBUFFERSPROC)(GLsizei n, const GLenum* bufs);
typedef void(APIENTRYP PFNGLSTENCILOPSEPARATEPROC)(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
typedef void(APIENTRYP PFNGLSTENCILFUNCSEPARATEPROC)(GLenum face, GLenum func, GLint ref, GLuint mask);
typedef void(APIENTRYP PFNGLSTENCILMASKSEPARATEPROC)(GLenum face, GLuint mask);
typedef void(APIENTRYP PFNGLATTACHSHADERPROC)(GLuint program, GLuint shader);
typedef void(APIENTRYP PFNGLBINDATTRIBLOCATIONPROC)(GLuint program, GLuint index, const GLchar* name);
typedef void(APIENTRYP PFNGLCOMPILESHADERPROC)(GLuint shader);
typedef GLuint(APIENTRYP PFNGLCREATEPROGRAMPROC)(void);
typedef GLuint(APIENTRYP PFNGLCREATESHADERPROC)(GLenum type);
typedef void(APIENTRYP PFNGLDELETEPROGRAMPROC)(GLuint program);
typedef void(APIENTRYP PFNGLDELETESHADERPROC)(GLuint shader);
typedef void(APIENTRYP PFNGLDETACHSHADERPROC)(GLuint program, GLuint shader);
typedef void(APIENTRYP PFNGLDISABLEVERTEXATTRIBARRAYPROC)(GLuint index);
typedef void(APIENTRYP PFNGLENABLEVERTEXATTRIBARRAYPROC)(GLuint index);
typedef void(APIENTRYP PFNGLGETACTIVEATTRIBPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLint* size, GLenum* type,
                                                 GLchar* name);
typedef void(APIENTRYP PFNGLGETACTIVEUNIFORMPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLint* size, GLenum* type,
                                                  GLchar* name);
typedef void(APIENTRYP PFNGLGETATTACHEDSHADERSPROC)(GLuint program, GLsizei maxCount, GLsizei* count, GLuint* shaders);
typedef GLint(APIENTRYP PFNGLGETATTRIBLOCATIONPROC)(GLuint program, const GLchar* name);
typedef void(APIENTRYP PFNGLGETPROGRAMIVPROC)(GLuint program, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETPROGRAMINFOLOGPROC)(GLuint program, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
typedef void(APIENTRYP PFNGLGETSHADERIVPROC)(GLuint shader, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETSHADERINFOLOGPROC)(GLuint shader, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
typedef void(APIENTRYP PFNGLGETSHADERSOURCEPROC)(GLuint shader, GLsizei bufSize, GLsizei* length, GLchar* source);
typedef GLint(APIENTRYP PFNGLGETUNIFORMLOCATIONPROC)(GLuint program, const GLchar* name);
typedef void(APIENTRYP PFNGLGETUNIFORMFVPROC)(GLuint program, GLint location, GLfloat* params);
typedef void(APIENTRYP PFNGLGETUNIFORMIVPROC)(GLuint program, GLint location, GLint* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBDVPROC)(GLuint index, GLenum pname, GLdouble* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBFVPROC)(GLuint index, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBIVPROC)(GLuint index, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBPOINTERVPROC)(GLuint index, GLenum pname, void** pointer);
typedef GLboolean(APIENTRYP PFNGLISPROGRAMPROC)(GLuint program);
typedef GLboolean(APIENTRYP PFNGLISSHADERPROC)(GLuint shader);
typedef void(APIENTRYP PFNGLLINKPROGRAMPROC)(GLuint program);
typedef void(APIENTRYP PFNGLSHADERSOURCEPROC)(GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length);
typedef void(APIENTRYP PFNGLUSEPROGRAMPROC)(GLuint program);
typedef void(APIENTRYP PFNGLUNIFORM1FPROC)(GLint location, GLfloat v0);
typedef void(APIENTRYP PFNGLUNIFORM2FPROC)(GLint location, GLfloat v0, GLfloat v1);
typedef void(APIENTRYP PFNGLUNIFORM3FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
typedef void(APIENTRYP PFNGLUNIFORM4FPROC)(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
typedef void(APIENTRYP PFNGLUNIFORM1IPROC)(GLint location, GLint v0);
typedef void(APIENTRYP PFNGLUNIFORM2IPROC)(GLint location, GLint v0, GLint v1);
typedef void(APIENTRYP PFNGLUNIFORM3IPROC)(GLint location, GLint v0, GLint v1, GLint v2);
typedef void(APIENTRYP PFNGLUNIFORM4IPROC)(GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
typedef void(APIENTRYP PFNGLUNIFORM1FVPROC)(GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORM2FVPROC)(GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORM3FVPROC)(GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORM4FVPROC)(GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORM1IVPROC)(GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLUNIFORM2IVPROC)(GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLUNIFORM3IVPROC)(GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLUNIFORM4IVPROC)(GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLVALIDATEPROGRAMPROC)(GLuint program);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1DPROC)(GLuint index, GLdouble x);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1FPROC)(GLuint index, GLfloat x);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1FVPROC)(GLuint index, const GLfloat* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1SPROC)(GLuint index, GLshort x);
typedef void(APIENTRYP PFNGLVERTEXATTRIB1SVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2DPROC)(GLuint index, GLdouble x, GLdouble y);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2FPROC)(GLuint index, GLfloat x, GLfloat y);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2FVPROC)(GLuint index, const GLfloat* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2SPROC)(GLuint index, GLshort x, GLshort y);
typedef void(APIENTRYP PFNGLVERTEXATTRIB2SVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3FVPROC)(GLuint index, const GLfloat* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3SPROC)(GLuint index, GLshort x, GLshort y, GLshort z);
typedef void(APIENTRYP PFNGLVERTEXATTRIB3SVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NBVPROC)(GLuint index, const GLbyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NIVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NSVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NUBPROC)(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NUBVPROC)(GLuint index, const GLubyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NUIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4NUSVPROC)(GLuint index, const GLushort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4BVPROC)(GLuint index, const GLbyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4FPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4FVPROC)(GLuint index, const GLfloat* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4IVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4SPROC)(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4SVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4UBVPROC)(GLuint index, const GLubyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4UIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIB4USVPROC)(GLuint index, const GLushort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBPOINTERPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride,
                                                     const void* pointer);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBlendEquationSeparate(GLenum modeRGB, GLenum modeAlpha);
GLAPI void APIENTRY glDrawBuffers(GLsizei n, const GLenum* bufs);
GLAPI void APIENTRY glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
GLAPI void APIENTRY glStencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask);
GLAPI void APIENTRY glStencilMaskSeparate(GLenum face, GLuint mask);
GLAPI void APIENTRY glAttachShader(GLuint program, GLuint shader);
GLAPI void APIENTRY glBindAttribLocation(GLuint program, GLuint index, const GLchar* name);
GLAPI void APIENTRY glCompileShader(GLuint shader);
GLAPI GLuint APIENTRY glCreateProgram(void);
GLAPI GLuint APIENTRY glCreateShader(GLenum type);
GLAPI void APIENTRY glDeleteProgram(GLuint program);
GLAPI void APIENTRY glDeleteShader(GLuint shader);
GLAPI void APIENTRY glDetachShader(GLuint program, GLuint shader);
GLAPI void APIENTRY glDisableVertexAttribArray(GLuint index);
GLAPI void APIENTRY glEnableVertexAttribArray(GLuint index);
GLAPI void APIENTRY glGetActiveAttrib(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
GLAPI void APIENTRY glGetActiveUniform(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
GLAPI void APIENTRY glGetAttachedShaders(GLuint program, GLsizei maxCount, GLsizei* count, GLuint* shaders);
GLAPI GLint APIENTRY glGetAttribLocation(GLuint program, const GLchar* name);
GLAPI void APIENTRY glGetProgramiv(GLuint program, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetProgramInfoLog(GLuint program, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
GLAPI void APIENTRY glGetShaderiv(GLuint shader, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetShaderInfoLog(GLuint shader, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
GLAPI void APIENTRY glGetShaderSource(GLuint shader, GLsizei bufSize, GLsizei* length, GLchar* source);
GLAPI GLint APIENTRY glGetUniformLocation(GLuint program, const GLchar* name);
GLAPI void APIENTRY glGetUniformfv(GLuint program, GLint location, GLfloat* params);
GLAPI void APIENTRY glGetUniformiv(GLuint program, GLint location, GLint* params);
GLAPI void APIENTRY glGetVertexAttribdv(GLuint index, GLenum pname, GLdouble* params);
GLAPI void APIENTRY glGetVertexAttribfv(GLuint index, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetVertexAttribiv(GLuint index, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetVertexAttribPointerv(GLuint index, GLenum pname, void** pointer);
GLAPI GLboolean APIENTRY glIsProgram(GLuint program);
GLAPI GLboolean APIENTRY glIsShader(GLuint shader);
GLAPI void APIENTRY glLinkProgram(GLuint program);
GLAPI void APIENTRY glShaderSource(GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length);
GLAPI void APIENTRY glUseProgram(GLuint program);
GLAPI void APIENTRY glUniform1f(GLint location, GLfloat v0);
GLAPI void APIENTRY glUniform2f(GLint location, GLfloat v0, GLfloat v1);
GLAPI void APIENTRY glUniform3f(GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
GLAPI void APIENTRY glUniform4f(GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
GLAPI void APIENTRY glUniform1i(GLint location, GLint v0);
GLAPI void APIENTRY glUniform2i(GLint location, GLint v0, GLint v1);
GLAPI void APIENTRY glUniform3i(GLint location, GLint v0, GLint v1, GLint v2);
GLAPI void APIENTRY glUniform4i(GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
GLAPI void APIENTRY glUniform1fv(GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glUniform2fv(GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glUniform3fv(GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glUniform4fv(GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glUniform1iv(GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glUniform2iv(GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glUniform3iv(GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glUniform4iv(GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glUniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glValidateProgram(GLuint program);
GLAPI void APIENTRY glVertexAttrib1d(GLuint index, GLdouble x);
GLAPI void APIENTRY glVertexAttrib1dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttrib1f(GLuint index, GLfloat x);
GLAPI void APIENTRY glVertexAttrib1fv(GLuint index, const GLfloat* v);
GLAPI void APIENTRY glVertexAttrib1s(GLuint index, GLshort x);
GLAPI void APIENTRY glVertexAttrib1sv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttrib2d(GLuint index, GLdouble x, GLdouble y);
GLAPI void APIENTRY glVertexAttrib2dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttrib2f(GLuint index, GLfloat x, GLfloat y);
GLAPI void APIENTRY glVertexAttrib2fv(GLuint index, const GLfloat* v);
GLAPI void APIENTRY glVertexAttrib2s(GLuint index, GLshort x, GLshort y);
GLAPI void APIENTRY glVertexAttrib2sv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttrib3d(GLuint index, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glVertexAttrib3dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttrib3f(GLuint index, GLfloat x, GLfloat y, GLfloat z);
GLAPI void APIENTRY glVertexAttrib3fv(GLuint index, const GLfloat* v);
GLAPI void APIENTRY glVertexAttrib3s(GLuint index, GLshort x, GLshort y, GLshort z);
GLAPI void APIENTRY glVertexAttrib3sv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttrib4Nbv(GLuint index, const GLbyte* v);
GLAPI void APIENTRY glVertexAttrib4Niv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttrib4Nsv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttrib4Nub(GLuint index, GLubyte x, GLubyte y, GLubyte z, GLubyte w);
GLAPI void APIENTRY glVertexAttrib4Nubv(GLuint index, const GLubyte* v);
GLAPI void APIENTRY glVertexAttrib4Nuiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttrib4Nusv(GLuint index, const GLushort* v);
GLAPI void APIENTRY glVertexAttrib4bv(GLuint index, const GLbyte* v);
GLAPI void APIENTRY glVertexAttrib4d(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
GLAPI void APIENTRY glVertexAttrib4dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttrib4f(GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
GLAPI void APIENTRY glVertexAttrib4fv(GLuint index, const GLfloat* v);
GLAPI void APIENTRY glVertexAttrib4iv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttrib4s(GLuint index, GLshort x, GLshort y, GLshort z, GLshort w);
GLAPI void APIENTRY glVertexAttrib4sv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttrib4ubv(GLuint index, const GLubyte* v);
GLAPI void APIENTRY glVertexAttrib4uiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttrib4usv(GLuint index, const GLushort* v);
GLAPI void APIENTRY glVertexAttribPointer(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void* pointer);
#endif
#endif /* GL_VERSION_2_0 */

#ifndef GL_VERSION_2_1
#define GL_VERSION_2_1 1
#define GL_PIXEL_PACK_BUFFER 0x88EB
#define GL_PIXEL_UNPACK_BUFFER 0x88EC
#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED
#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF
#define GL_FLOAT_MAT2x3 0x8B65
#define GL_FLOAT_MAT2x4 0x8B66
#define GL_FLOAT_MAT3x2 0x8B67
#define GL_FLOAT_MAT3x4 0x8B68
#define GL_FLOAT_MAT4x2 0x8B69
#define GL_FLOAT_MAT4x3 0x8B6A
#define GL_SRGB 0x8C40
#define GL_SRGB8 0x8C41
#define GL_SRGB_ALPHA 0x8C42
#define GL_SRGB8_ALPHA8 0x8C43
#define GL_COMPRESSED_SRGB 0x8C48
#define GL_COMPRESSED_SRGB_ALPHA 0x8C49
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4X2FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3X4FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4X3FVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glUniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glUniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
#endif
#endif /* GL_VERSION_2_1 */

#ifndef GL_VERSION_3_0
#define GL_VERSION_3_0 1
typedef khronos_uint16_t GLhalf;
#define GL_COMPARE_REF_TO_TEXTURE 0x884E
#define GL_CLIP_DISTANCE0 0x3000
#define GL_CLIP_DISTANCE1 0x3001
#define GL_CLIP_DISTANCE2 0x3002
#define GL_CLIP_DISTANCE3 0x3003
#define GL_CLIP_DISTANCE4 0x3004
#define GL_CLIP_DISTANCE5 0x3005
#define GL_CLIP_DISTANCE6 0x3006
#define GL_CLIP_DISTANCE7 0x3007
#define GL_MAX_CLIP_DISTANCES 0x0D32
#define GL_MAJOR_VERSION 0x821B
#define GL_MINOR_VERSION 0x821C
#define GL_NUM_EXTENSIONS 0x821D
#define GL_CONTEXT_FLAGS 0x821E
#define GL_COMPRESSED_RED 0x8225
#define GL_COMPRESSED_RG 0x8226
#define GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT 0x00000001
#define GL_RGBA32F 0x8814
#define GL_RGB32F 0x8815
#define GL_RGBA16F 0x881A
#define GL_RGB16F 0x881B
#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD
#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF
#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904
#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905
#define GL_CLAMP_READ_COLOR 0x891C
#define GL_FIXED_ONLY 0x891D
#define GL_MAX_VARYING_COMPONENTS 0x8B4B
#define GL_TEXTURE_1D_ARRAY 0x8C18
#define GL_PROXY_TEXTURE_1D_ARRAY 0x8C19
#define GL_TEXTURE_2D_ARRAY 0x8C1A
#define GL_PROXY_TEXTURE_2D_ARRAY 0x8C1B
#define GL_TEXTURE_BINDING_1D_ARRAY 0x8C1C
#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D
#define GL_R11F_G11F_B10F 0x8C3A
#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B
#define GL_RGB9_E5 0x8C3D
#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E
#define GL_TEXTURE_SHARED_SIZE 0x8C3F
#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83
#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
#define GL_PRIMITIVES_GENERATED 0x8C87
#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
#define GL_RASTERIZER_DISCARD 0x8C89
#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
#define GL_INTERLEAVED_ATTRIBS 0x8C8C
#define GL_SEPARATE_ATTRIBS 0x8C8D
#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E
#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
#define GL_RGBA32UI 0x8D70
#define GL_RGB32UI 0x8D71
#define GL_RGBA16UI 0x8D76
#define GL_RGB16UI 0x8D77
#define GL_RGBA8UI 0x8D7C
#define GL_RGB8UI 0x8D7D
#define GL_RGBA32I 0x8D82
#define GL_RGB32I 0x8D83
#define GL_RGBA16I 0x8D88
#define GL_RGB16I 0x8D89
#define GL_RGBA8I 0x8D8E
#define GL_RGB8I 0x8D8F
#define GL_RED_INTEGER 0x8D94
#define GL_GREEN_INTEGER 0x8D95
#define GL_BLUE_INTEGER 0x8D96
#define GL_RGB_INTEGER 0x8D98
#define GL_RGBA_INTEGER 0x8D99
#define GL_BGR_INTEGER 0x8D9A
#define GL_BGRA_INTEGER 0x8D9B
#define GL_SAMPLER_1D_ARRAY 0x8DC0
#define GL_SAMPLER_2D_ARRAY 0x8DC1
#define GL_SAMPLER_1D_ARRAY_SHADOW 0x8DC3
#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4
#define GL_SAMPLER_CUBE_SHADOW 0x8DC5
#define GL_UNSIGNED_INT_VEC2 0x8DC6
#define GL_UNSIGNED_INT_VEC3 0x8DC7
#define GL_UNSIGNED_INT_VEC4 0x8DC8
#define GL_INT_SAMPLER_1D 0x8DC9
#define GL_INT_SAMPLER_2D 0x8DCA
#define GL_INT_SAMPLER_3D 0x8DCB
#define GL_INT_SAMPLER_CUBE 0x8DCC
#define GL_INT_SAMPLER_1D_ARRAY 0x8DCE
#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF
#define GL_UNSIGNED_INT_SAMPLER_1D 0x8DD1
#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2
#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3
#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4
#define GL_UNSIGNED_INT_SAMPLER_1D_ARRAY 0x8DD6
#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7
#define GL_QUERY_WAIT 0x8E13
#define GL_QUERY_NO_WAIT 0x8E14
#define GL_QUERY_BY_REGION_WAIT 0x8E15
#define GL_QUERY_BY_REGION_NO_WAIT 0x8E16
#define GL_BUFFER_ACCESS_FLAGS 0x911F
#define GL_BUFFER_MAP_LENGTH 0x9120
#define GL_BUFFER_MAP_OFFSET 0x9121
#define GL_DEPTH_COMPONENT32F 0x8CAC
#define GL_DEPTH32F_STENCIL8 0x8CAD
#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506
#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
#define GL_FRAMEBUFFER_DEFAULT 0x8218
#define GL_FRAMEBUFFER_UNDEFINED 0x8219
#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A
#define GL_MAX_RENDERBUFFER_SIZE 0x84E8
#define GL_DEPTH_STENCIL 0x84F9
#define GL_UNSIGNED_INT_24_8 0x84FA
#define GL_DEPTH24_STENCIL8 0x88F0
#define GL_TEXTURE_STENCIL_SIZE 0x88F1
#define GL_TEXTURE_RED_TYPE 0x8C10
#define GL_TEXTURE_GREEN_TYPE 0x8C11
#define GL_TEXTURE_BLUE_TYPE 0x8C12
#define GL_TEXTURE_ALPHA_TYPE 0x8C13
#define GL_TEXTURE_DEPTH_TYPE 0x8C16
#define GL_UNSIGNED_NORMALIZED 0x8C17
#define GL_FRAMEBUFFER_BINDING 0x8CA6
#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6
#define GL_RENDERBUFFER_BINDING 0x8CA7
#define GL_READ_FRAMEBUFFER 0x8CA8
#define GL_DRAW_FRAMEBUFFER 0x8CA9
#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA
#define GL_RENDERBUFFER_SAMPLES 0x8CAB
#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0
#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1
#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
#define GL_FRAMEBUFFER_COMPLETE 0x8CD5
#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
#define GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER 0x8CDB
#define GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER 0x8CDC
#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD
#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF
#define GL_COLOR_ATTACHMENT0 0x8CE0
#define GL_COLOR_ATTACHMENT1 0x8CE1
#define GL_COLOR_ATTACHMENT2 0x8CE2
#define GL_COLOR_ATTACHMENT3 0x8CE3
#define GL_COLOR_ATTACHMENT4 0x8CE4
#define GL_COLOR_ATTACHMENT5 0x8CE5
#define GL_COLOR_ATTACHMENT6 0x8CE6
#define GL_COLOR_ATTACHMENT7 0x8CE7
#define GL_COLOR_ATTACHMENT8 0x8CE8
#define GL_COLOR_ATTACHMENT9 0x8CE9
#define GL_COLOR_ATTACHMENT10 0x8CEA
#define GL_COLOR_ATTACHMENT11 0x8CEB
#define GL_COLOR_ATTACHMENT12 0x8CEC
#define GL_COLOR_ATTACHMENT13 0x8CED
#define GL_COLOR_ATTACHMENT14 0x8CEE
#define GL_COLOR_ATTACHMENT15 0x8CEF
#define GL_COLOR_ATTACHMENT16 0x8CF0
#define GL_COLOR_ATTACHMENT17 0x8CF1
#define GL_COLOR_ATTACHMENT18 0x8CF2
#define GL_COLOR_ATTACHMENT19 0x8CF3
#define GL_COLOR_ATTACHMENT20 0x8CF4
#define GL_COLOR_ATTACHMENT21 0x8CF5
#define GL_COLOR_ATTACHMENT22 0x8CF6
#define GL_COLOR_ATTACHMENT23 0x8CF7
#define GL_COLOR_ATTACHMENT24 0x8CF8
#define GL_COLOR_ATTACHMENT25 0x8CF9
#define GL_COLOR_ATTACHMENT26 0x8CFA
#define GL_COLOR_ATTACHMENT27 0x8CFB
#define GL_COLOR_ATTACHMENT28 0x8CFC
#define GL_COLOR_ATTACHMENT29 0x8CFD
#define GL_COLOR_ATTACHMENT30 0x8CFE
#define GL_COLOR_ATTACHMENT31 0x8CFF
#define GL_DEPTH_ATTACHMENT 0x8D00
#define GL_STENCIL_ATTACHMENT 0x8D20
#define GL_FRAMEBUFFER 0x8D40
#define GL_RENDERBUFFER 0x8D41
#define GL_RENDERBUFFER_WIDTH 0x8D42
#define GL_RENDERBUFFER_HEIGHT 0x8D43
#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44
#define GL_STENCIL_INDEX1 0x8D46
#define GL_STENCIL_INDEX4 0x8D47
#define GL_STENCIL_INDEX8 0x8D48
#define GL_STENCIL_INDEX16 0x8D49
#define GL_RENDERBUFFER_RED_SIZE 0x8D50
#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51
#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52
#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53
#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54
#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55
#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
#define GL_MAX_SAMPLES 0x8D57
#define GL_FRAMEBUFFER_SRGB 0x8DB9
#define GL_HALF_FLOAT 0x140B
#define GL_MAP_READ_BIT 0x0001
#define GL_MAP_WRITE_BIT 0x0002
#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004
#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008
#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010
#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020
#define GL_COMPRESSED_RED_RGTC1 0x8DBB
#define GL_COMPRESSED_SIGNED_RED_RGTC1 0x8DBC
#define GL_COMPRESSED_RG_RGTC2 0x8DBD
#define GL_COMPRESSED_SIGNED_RG_RGTC2 0x8DBE
#define GL_RG 0x8227
#define GL_RG_INTEGER 0x8228
#define GL_R8 0x8229
#define GL_R16 0x822A
#define GL_RG8 0x822B
#define GL_RG16 0x822C
#define GL_R16F 0x822D
#define GL_R32F 0x822E
#define GL_RG16F 0x822F
#define GL_RG32F 0x8230
#define GL_R8I 0x8231
#define GL_R8UI 0x8232
#define GL_R16I 0x8233
#define GL_R16UI 0x8234
#define GL_R32I 0x8235
#define GL_R32UI 0x8236
#define GL_RG8I 0x8237
#define GL_RG8UI 0x8238
#define GL_RG16I 0x8239
#define GL_RG16UI 0x823A
#define GL_RG32I 0x823B
#define GL_RG32UI 0x823C
#define GL_VERTEX_ARRAY_BINDING 0x85B5
typedef void(APIENTRYP PFNGLCOLORMASKIPROC)(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
typedef void(APIENTRYP PFNGLGETBOOLEANI_VPROC)(GLenum target, GLuint index, GLboolean* data);
typedef void(APIENTRYP PFNGLGETINTEGERI_VPROC)(GLenum target, GLuint index, GLint* data);
typedef void(APIENTRYP PFNGLENABLEIPROC)(GLenum target, GLuint index);
typedef void(APIENTRYP PFNGLDISABLEIPROC)(GLenum target, GLuint index);
typedef GLboolean(APIENTRYP PFNGLISENABLEDIPROC)(GLenum target, GLuint index);
typedef void(APIENTRYP PFNGLBEGINTRANSFORMFEEDBACKPROC)(GLenum primitiveMode);
typedef void(APIENTRYP PFNGLENDTRANSFORMFEEDBACKPROC)(void);
typedef void(APIENTRYP PFNGLBINDBUFFERRANGEPROC)(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLBINDBUFFERBASEPROC)(GLenum target, GLuint index, GLuint buffer);
typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKVARYINGSPROC)(GLuint program, GLsizei count, const GLchar* const* varyings, GLenum bufferMode);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKVARYINGPROC)(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLsizei* size,
                                                             GLenum* type, GLchar* name);
typedef void(APIENTRYP PFNGLCLAMPCOLORPROC)(GLenum target, GLenum clamp);
typedef void(APIENTRYP PFNGLBEGINCONDITIONALRENDERPROC)(GLuint id, GLenum mode);
typedef void(APIENTRYP PFNGLENDCONDITIONALRENDERPROC)(void);
typedef void(APIENTRYP PFNGLVERTEXATTRIBIPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void* pointer);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBIIVPROC)(GLuint index, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBIUIVPROC)(GLuint index, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI1IPROC)(GLuint index, GLint x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI2IPROC)(GLuint index, GLint x, GLint y);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI3IPROC)(GLuint index, GLint x, GLint y, GLint z);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4IPROC)(GLuint index, GLint x, GLint y, GLint z, GLint w);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI1UIPROC)(GLuint index, GLuint x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI2UIPROC)(GLuint index, GLuint x, GLuint y);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI3UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4UIPROC)(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI1IVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI2IVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI3IVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4IVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI1UIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI2UIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI3UIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4UIVPROC)(GLuint index, const GLuint* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4BVPROC)(GLuint index, const GLbyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4SVPROC)(GLuint index, const GLshort* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4UBVPROC)(GLuint index, const GLubyte* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBI4USVPROC)(GLuint index, const GLushort* v);
typedef void(APIENTRYP PFNGLGETUNIFORMUIVPROC)(GLuint program, GLint location, GLuint* params);
typedef void(APIENTRYP PFNGLBINDFRAGDATALOCATIONPROC)(GLuint program, GLuint color, const GLchar* name);
typedef GLint(APIENTRYP PFNGLGETFRAGDATALOCATIONPROC)(GLuint program, const GLchar* name);
typedef void(APIENTRYP PFNGLUNIFORM1UIPROC)(GLint location, GLuint v0);
typedef void(APIENTRYP PFNGLUNIFORM2UIPROC)(GLint location, GLuint v0, GLuint v1);
typedef void(APIENTRYP PFNGLUNIFORM3UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2);
typedef void(APIENTRYP PFNGLUNIFORM4UIPROC)(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
typedef void(APIENTRYP PFNGLUNIFORM1UIVPROC)(GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLUNIFORM2UIVPROC)(GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLUNIFORM3UIVPROC)(GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLUNIFORM4UIVPROC)(GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, const GLuint* params);
typedef void(APIENTRYP PFNGLGETTEXPARAMETERIIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXPARAMETERIUIVPROC)(GLenum target, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLCLEARBUFFERIVPROC)(GLenum buffer, GLint drawbuffer, const GLint* value);
typedef void(APIENTRYP PFNGLCLEARBUFFERUIVPROC)(GLenum buffer, GLint drawbuffer, const GLuint* value);
typedef void(APIENTRYP PFNGLCLEARBUFFERFVPROC)(GLenum buffer, GLint drawbuffer, const GLfloat* value);
typedef void(APIENTRYP PFNGLCLEARBUFFERFIPROC)(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
typedef const GLubyte*(APIENTRYP PFNGLGETSTRINGIPROC)(GLenum name, GLuint index);
typedef GLboolean(APIENTRYP PFNGLISRENDERBUFFERPROC)(GLuint renderbuffer);
typedef void(APIENTRYP PFNGLBINDRENDERBUFFERPROC)(GLenum target, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLDELETERENDERBUFFERSPROC)(GLsizei n, const GLuint* renderbuffers);
typedef void(APIENTRYP PFNGLGENRENDERBUFFERSPROC)(GLsizei n, GLuint* renderbuffers);
typedef void(APIENTRYP PFNGLRENDERBUFFERSTORAGEPROC)(GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef GLboolean(APIENTRYP PFNGLISFRAMEBUFFERPROC)(GLuint framebuffer);
typedef void(APIENTRYP PFNGLBINDFRAMEBUFFERPROC)(GLenum target, GLuint framebuffer);
typedef void(APIENTRYP PFNGLDELETEFRAMEBUFFERSPROC)(GLsizei n, const GLuint* framebuffers);
typedef void(APIENTRYP PFNGLGENFRAMEBUFFERSPROC)(GLsizei n, GLuint* framebuffers);
typedef GLenum(APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSPROC)(GLenum target);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTURE1DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DPROC)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
typedef void(APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFERPROC)(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLenum target, GLenum attachment, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGENERATEMIPMAPPROC)(GLenum target);
typedef void(APIENTRYP PFNGLBLITFRAMEBUFFERPROC)(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1,
                                                 GLint dstY1, GLbitfield mask, GLenum filter);
typedef void(APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void*(APIENTRYP PFNGLMAPBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
typedef void(APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEPROC)(GLenum target, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLBINDVERTEXARRAYPROC)(GLuint array);
typedef void(APIENTRYP PFNGLDELETEVERTEXARRAYSPROC)(GLsizei n, const GLuint* arrays);
typedef void(APIENTRYP PFNGLGENVERTEXARRAYSPROC)(GLsizei n, GLuint* arrays);
typedef GLboolean(APIENTRYP PFNGLISVERTEXARRAYPROC)(GLuint array);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glColorMaski(GLuint index, GLboolean r, GLboolean g, GLboolean b, GLboolean a);
GLAPI void APIENTRY glGetBooleani_v(GLenum target, GLuint index, GLboolean* data);
GLAPI void APIENTRY glGetIntegeri_v(GLenum target, GLuint index, GLint* data);
GLAPI void APIENTRY glEnablei(GLenum target, GLuint index);
GLAPI void APIENTRY glDisablei(GLenum target, GLuint index);
GLAPI GLboolean APIENTRY glIsEnabledi(GLenum target, GLuint index);
GLAPI void APIENTRY glBeginTransformFeedback(GLenum primitiveMode);
GLAPI void APIENTRY glEndTransformFeedback(void);
GLAPI void APIENTRY glBindBufferRange(GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
GLAPI void APIENTRY glBindBufferBase(GLenum target, GLuint index, GLuint buffer);
GLAPI void APIENTRY glTransformFeedbackVaryings(GLuint program, GLsizei count, const GLchar* const* varyings, GLenum bufferMode);
GLAPI void APIENTRY glGetTransformFeedbackVarying(GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLsizei* size,
                                                  GLenum* type, GLchar* name);
GLAPI void APIENTRY glClampColor(GLenum target, GLenum clamp);
GLAPI void APIENTRY glBeginConditionalRender(GLuint id, GLenum mode);
GLAPI void APIENTRY glEndConditionalRender(void);
GLAPI void APIENTRY glVertexAttribIPointer(GLuint index, GLint size, GLenum type, GLsizei stride, const void* pointer);
GLAPI void APIENTRY glGetVertexAttribIiv(GLuint index, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetVertexAttribIuiv(GLuint index, GLenum pname, GLuint* params);
GLAPI void APIENTRY glVertexAttribI1i(GLuint index, GLint x);
GLAPI void APIENTRY glVertexAttribI2i(GLuint index, GLint x, GLint y);
GLAPI void APIENTRY glVertexAttribI3i(GLuint index, GLint x, GLint y, GLint z);
GLAPI void APIENTRY glVertexAttribI4i(GLuint index, GLint x, GLint y, GLint z, GLint w);
GLAPI void APIENTRY glVertexAttribI1ui(GLuint index, GLuint x);
GLAPI void APIENTRY glVertexAttribI2ui(GLuint index, GLuint x, GLuint y);
GLAPI void APIENTRY glVertexAttribI3ui(GLuint index, GLuint x, GLuint y, GLuint z);
GLAPI void APIENTRY glVertexAttribI4ui(GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
GLAPI void APIENTRY glVertexAttribI1iv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttribI2iv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttribI3iv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttribI4iv(GLuint index, const GLint* v);
GLAPI void APIENTRY glVertexAttribI1uiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttribI2uiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttribI3uiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttribI4uiv(GLuint index, const GLuint* v);
GLAPI void APIENTRY glVertexAttribI4bv(GLuint index, const GLbyte* v);
GLAPI void APIENTRY glVertexAttribI4sv(GLuint index, const GLshort* v);
GLAPI void APIENTRY glVertexAttribI4ubv(GLuint index, const GLubyte* v);
GLAPI void APIENTRY glVertexAttribI4usv(GLuint index, const GLushort* v);
GLAPI void APIENTRY glGetUniformuiv(GLuint program, GLint location, GLuint* params);
GLAPI void APIENTRY glBindFragDataLocation(GLuint program, GLuint color, const GLchar* name);
GLAPI GLint APIENTRY glGetFragDataLocation(GLuint program, const GLchar* name);
GLAPI void APIENTRY glUniform1ui(GLint location, GLuint v0);
GLAPI void APIENTRY glUniform2ui(GLint location, GLuint v0, GLuint v1);
GLAPI void APIENTRY glUniform3ui(GLint location, GLuint v0, GLuint v1, GLuint v2);
GLAPI void APIENTRY glUniform4ui(GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
GLAPI void APIENTRY glUniform1uiv(GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glUniform2uiv(GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glUniform3uiv(GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glUniform4uiv(GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glTexParameterIiv(GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glTexParameterIuiv(GLenum target, GLenum pname, const GLuint* params);
GLAPI void APIENTRY glGetTexParameterIiv(GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTexParameterIuiv(GLenum target, GLenum pname, GLuint* params);
GLAPI void APIENTRY glClearBufferiv(GLenum buffer, GLint drawbuffer, const GLint* value);
GLAPI void APIENTRY glClearBufferuiv(GLenum buffer, GLint drawbuffer, const GLuint* value);
GLAPI void APIENTRY glClearBufferfv(GLenum buffer, GLint drawbuffer, const GLfloat* value);
GLAPI void APIENTRY glClearBufferfi(GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
GLAPI const GLubyte* APIENTRY glGetStringi(GLenum name, GLuint index);
GLAPI GLboolean APIENTRY glIsRenderbuffer(GLuint renderbuffer);
GLAPI void APIENTRY glBindRenderbuffer(GLenum target, GLuint renderbuffer);
GLAPI void APIENTRY glDeleteRenderbuffers(GLsizei n, const GLuint* renderbuffers);
GLAPI void APIENTRY glGenRenderbuffers(GLsizei n, GLuint* renderbuffers);
GLAPI void APIENTRY glRenderbufferStorage(GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glGetRenderbufferParameteriv(GLenum target, GLenum pname, GLint* params);
GLAPI GLboolean APIENTRY glIsFramebuffer(GLuint framebuffer);
GLAPI void APIENTRY glBindFramebuffer(GLenum target, GLuint framebuffer);
GLAPI void APIENTRY glDeleteFramebuffers(GLsizei n, const GLuint* framebuffers);
GLAPI void APIENTRY glGenFramebuffers(GLsizei n, GLuint* framebuffers);
GLAPI GLenum APIENTRY glCheckFramebufferStatus(GLenum target);
GLAPI void APIENTRY glFramebufferTexture1D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glFramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glFramebufferTexture3D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
GLAPI void APIENTRY glFramebufferRenderbuffer(GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
GLAPI void APIENTRY glGetFramebufferAttachmentParameteriv(GLenum target, GLenum attachment, GLenum pname, GLint* params);
GLAPI void APIENTRY glGenerateMipmap(GLenum target);
GLAPI void APIENTRY glBlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1,
                                      GLint dstY1, GLbitfield mask, GLenum filter);
GLAPI void APIENTRY glRenderbufferStorageMultisample(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glFramebufferTextureLayer(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
GLAPI void* APIENTRY glMapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
GLAPI void APIENTRY glFlushMappedBufferRange(GLenum target, GLintptr offset, GLsizeiptr length);
GLAPI void APIENTRY glBindVertexArray(GLuint array);
GLAPI void APIENTRY glDeleteVertexArrays(GLsizei n, const GLuint* arrays);
GLAPI void APIENTRY glGenVertexArrays(GLsizei n, GLuint* arrays);
GLAPI GLboolean APIENTRY glIsVertexArray(GLuint array);
#endif
#endif /* GL_VERSION_3_0 */

#ifndef GL_VERSION_3_1
#define GL_VERSION_3_1 1
#define GL_SAMPLER_2D_RECT 0x8B63
#define GL_SAMPLER_2D_RECT_SHADOW 0x8B64
#define GL_SAMPLER_BUFFER 0x8DC2
#define GL_INT_SAMPLER_2D_RECT 0x8DCD
#define GL_INT_SAMPLER_BUFFER 0x8DD0
#define GL_UNSIGNED_INT_SAMPLER_2D_RECT 0x8DD5
#define GL_UNSIGNED_INT_SAMPLER_BUFFER 0x8DD8
#define GL_TEXTURE_BUFFER 0x8C2A
#define GL_MAX_TEXTURE_BUFFER_SIZE 0x8C2B
#define GL_TEXTURE_BINDING_BUFFER 0x8C2C
#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING 0x8C2D
#define GL_TEXTURE_RECTANGLE 0x84F5
#define GL_TEXTURE_BINDING_RECTANGLE 0x84F6
#define GL_PROXY_TEXTURE_RECTANGLE 0x84F7
#define GL_MAX_RECTANGLE_TEXTURE_SIZE 0x84F8
#define GL_R8_SNORM 0x8F94
#define GL_RG8_SNORM 0x8F95
#define GL_RGB8_SNORM 0x8F96
#define GL_RGBA8_SNORM 0x8F97
#define GL_R16_SNORM 0x8F98
#define GL_RG16_SNORM 0x8F99
#define GL_RGB16_SNORM 0x8F9A
#define GL_RGBA16_SNORM 0x8F9B
#define GL_SIGNED_NORMALIZED 0x8F9C
#define GL_PRIMITIVE_RESTART 0x8F9D
#define GL_PRIMITIVE_RESTART_INDEX 0x8F9E
#define GL_COPY_READ_BUFFER 0x8F36
#define GL_COPY_WRITE_BUFFER 0x8F37
#define GL_UNIFORM_BUFFER 0x8A11
#define GL_UNIFORM_BUFFER_BINDING 0x8A28
#define GL_UNIFORM_BUFFER_START 0x8A29
#define GL_UNIFORM_BUFFER_SIZE 0x8A2A
#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B
#define GL_MAX_GEOMETRY_UNIFORM_BLOCKS 0x8A2C
#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D
#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E
#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F
#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30
#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31
#define GL_MAX_COMBINED_GEOMETRY_UNIFORM_COMPONENTS 0x8A32
#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33
#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34
#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36
#define GL_UNIFORM_TYPE 0x8A37
#define GL_UNIFORM_SIZE 0x8A38
#define GL_UNIFORM_NAME_LENGTH 0x8A39
#define GL_UNIFORM_BLOCK_INDEX 0x8A3A
#define GL_UNIFORM_OFFSET 0x8A3B
#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C
#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D
#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E
#define GL_UNIFORM_BLOCK_BINDING 0x8A3F
#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40
#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41
#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42
#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43
#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44
#define GL_UNIFORM_BLOCK_REFERENCED_BY_GEOMETRY_SHADER 0x8A45
#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46
#define GL_INVALID_INDEX 0xFFFFFFFFu
typedef void(APIENTRYP PFNGLDRAWARRAYSINSTANCEDPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei instancecount);
typedef void(APIENTRYP PFNGLTEXBUFFERPROC)(GLenum target, GLenum internalformat, GLuint buffer);
typedef void(APIENTRYP PFNGLPRIMITIVERESTARTINDEXPROC)(GLuint index);
typedef void(APIENTRYP PFNGLCOPYBUFFERSUBDATAPROC)(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset,
                                                   GLsizeiptr size);
typedef void(APIENTRYP PFNGLGETUNIFORMINDICESPROC)(GLuint program, GLsizei uniformCount, const GLchar* const* uniformNames, GLuint* uniformIndices);
typedef void(APIENTRYP PFNGLGETACTIVEUNIFORMSIVPROC)(GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETACTIVEUNIFORMNAMEPROC)(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformName);
typedef GLuint(APIENTRYP PFNGLGETUNIFORMBLOCKINDEXPROC)(GLuint program, const GLchar* uniformBlockName);
typedef void(APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKIVPROC)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC)(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length,
                                                           GLchar* uniformBlockName);
typedef void(APIENTRYP PFNGLUNIFORMBLOCKBINDINGPROC)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawArraysInstanced(GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
GLAPI void APIENTRY glDrawElementsInstanced(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei instancecount);
GLAPI void APIENTRY glTexBuffer(GLenum target, GLenum internalformat, GLuint buffer);
GLAPI void APIENTRY glPrimitiveRestartIndex(GLuint index);
GLAPI void APIENTRY glCopyBufferSubData(GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
GLAPI void APIENTRY glGetUniformIndices(GLuint program, GLsizei uniformCount, const GLchar* const* uniformNames, GLuint* uniformIndices);
GLAPI void APIENTRY glGetActiveUniformsiv(GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetActiveUniformName(GLuint program, GLuint uniformIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformName);
GLAPI GLuint APIENTRY glGetUniformBlockIndex(GLuint program, const GLchar* uniformBlockName);
GLAPI void APIENTRY glGetActiveUniformBlockiv(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetActiveUniformBlockName(GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformBlockName);
GLAPI void APIENTRY glUniformBlockBinding(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
#endif
#endif /* GL_VERSION_3_1 */

#ifndef GL_VERSION_3_2
#define GL_VERSION_3_2 1
typedef struct __GLsync* GLsync;
typedef khronos_uint64_t GLuint64;
typedef khronos_int64_t GLint64;
#define GL_CONTEXT_CORE_PROFILE_BIT 0x00000001
#define GL_CONTEXT_COMPATIBILITY_PROFILE_BIT 0x00000002
#define GL_LINES_ADJACENCY 0x000A
#define GL_LINE_STRIP_ADJACENCY 0x000B
#define GL_TRIANGLES_ADJACENCY 0x000C
#define GL_TRIANGLE_STRIP_ADJACENCY 0x000D
#define GL_PROGRAM_POINT_SIZE 0x8642
#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS 0x8C29
#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED 0x8DA7
#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS 0x8DA8
#define GL_GEOMETRY_SHADER 0x8DD9
#define GL_GEOMETRY_VERTICES_OUT 0x8916
#define GL_GEOMETRY_INPUT_TYPE 0x8917
#define GL_GEOMETRY_OUTPUT_TYPE 0x8918
#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS 0x8DDF
#define GL_MAX_GEOMETRY_OUTPUT_VERTICES 0x8DE0
#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS 0x8DE1
#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122
#define GL_MAX_GEOMETRY_INPUT_COMPONENTS 0x9123
#define GL_MAX_GEOMETRY_OUTPUT_COMPONENTS 0x9124
#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125
#define GL_CONTEXT_PROFILE_MASK 0x9126
#define GL_DEPTH_CLAMP 0x864F
#define GL_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION 0x8E4C
#define GL_FIRST_VERTEX_CONVENTION 0x8E4D
#define GL_LAST_VERTEX_CONVENTION 0x8E4E
#define GL_PROVOKING_VERTEX 0x8E4F
#define GL_TEXTURE_CUBE_MAP_SEAMLESS 0x884F
#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111
#define GL_OBJECT_TYPE 0x9112
#define GL_SYNC_CONDITION 0x9113
#define GL_SYNC_STATUS 0x9114
#define GL_SYNC_FLAGS 0x9115
#define GL_SYNC_FENCE 0x9116
#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117
#define GL_UNSIGNALED 0x9118
#define GL_SIGNALED 0x9119
#define GL_ALREADY_SIGNALED 0x911A
#define GL_TIMEOUT_EXPIRED 0x911B
#define GL_CONDITION_SATISFIED 0x911C
#define GL_WAIT_FAILED 0x911D
#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull
#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001
#define GL_SAMPLE_POSITION 0x8E50
#define GL_SAMPLE_MASK 0x8E51
#define GL_SAMPLE_MASK_VALUE 0x8E52
#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59
#define GL_TEXTURE_2D_MULTISAMPLE 0x9100
#define GL_PROXY_TEXTURE_2D_MULTISAMPLE 0x9101
#define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102
#define GL_PROXY_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9103
#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104
#define GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY 0x9105
#define GL_TEXTURE_SAMPLES 0x9106
#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107
#define GL_SAMPLER_2D_MULTISAMPLE 0x9108
#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109
#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A
#define GL_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910B
#define GL_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910C
#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE_ARRAY 0x910D
#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E
#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F
#define GL_MAX_INTEGER_SAMPLES 0x9110
typedef void(APIENTRYP PFNGLDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices, GLint basevertex);
typedef void(APIENTRYP PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC)(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type,
                                                             const void* indices, GLint basevertex);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices,
                                                                 GLsizei instancecount, GLint basevertex);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSBASEVERTEXPROC)(GLenum mode, const GLsizei* count, GLenum type, const void* const* indices,
                                                             GLsizei drawcount, const GLint* basevertex);
typedef void(APIENTRYP PFNGLPROVOKINGVERTEXPROC)(GLenum mode);
typedef GLsync(APIENTRYP PFNGLFENCESYNCPROC)(GLenum condition, GLbitfield flags);
typedef GLboolean(APIENTRYP PFNGLISSYNCPROC)(GLsync sync);
typedef void(APIENTRYP PFNGLDELETESYNCPROC)(GLsync sync);
typedef GLenum(APIENTRYP PFNGLCLIENTWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout);
typedef void(APIENTRYP PFNGLWAITSYNCPROC)(GLsync sync, GLbitfield flags, GLuint64 timeout);
typedef void(APIENTRYP PFNGLGETINTEGER64VPROC)(GLenum pname, GLint64* data);
typedef void(APIENTRYP PFNGLGETSYNCIVPROC)(GLsync sync, GLenum pname, GLsizei count, GLsizei* length, GLint* values);
typedef void(APIENTRYP PFNGLGETINTEGER64I_VPROC)(GLenum target, GLuint index, GLint64* data);
typedef void(APIENTRYP PFNGLGETBUFFERPARAMETERI64VPROC)(GLenum target, GLenum pname, GLint64* params);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTUREPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLTEXIMAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                                       GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXIMAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                                       GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLGETMULTISAMPLEFVPROC)(GLenum pname, GLuint index, GLfloat* val);
typedef void(APIENTRYP PFNGLSAMPLEMASKIPROC)(GLuint maskNumber, GLbitfield mask);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawElementsBaseVertex(GLenum mode, GLsizei count, GLenum type, const void* indices, GLint basevertex);
GLAPI void APIENTRY glDrawRangeElementsBaseVertex(GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void* indices,
                                                  GLint basevertex);
GLAPI void APIENTRY glDrawElementsInstancedBaseVertex(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei instancecount,
                                                      GLint basevertex);
GLAPI void APIENTRY glMultiDrawElementsBaseVertex(GLenum mode, const GLsizei* count, GLenum type, const void* const* indices,
                                                  GLsizei drawcount, const GLint* basevertex);
GLAPI void APIENTRY glProvokingVertex(GLenum mode);
GLAPI GLsync APIENTRY glFenceSync(GLenum condition, GLbitfield flags);
GLAPI GLboolean APIENTRY glIsSync(GLsync sync);
GLAPI void APIENTRY glDeleteSync(GLsync sync);
GLAPI GLenum APIENTRY glClientWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout);
GLAPI void APIENTRY glWaitSync(GLsync sync, GLbitfield flags, GLuint64 timeout);
GLAPI void APIENTRY glGetInteger64v(GLenum pname, GLint64* data);
GLAPI void APIENTRY glGetSynciv(GLsync sync, GLenum pname, GLsizei count, GLsizei* length, GLint* values);
GLAPI void APIENTRY glGetInteger64i_v(GLenum target, GLuint index, GLint64* data);
GLAPI void APIENTRY glGetBufferParameteri64v(GLenum target, GLenum pname, GLint64* params);
GLAPI void APIENTRY glFramebufferTexture(GLenum target, GLenum attachment, GLuint texture, GLint level);
GLAPI void APIENTRY glTexImage2DMultisample(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                            GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTexImage3DMultisample(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                            GLsizei depth, GLboolean fixedsamplelocations);
GLAPI void APIENTRY glGetMultisamplefv(GLenum pname, GLuint index, GLfloat* val);
GLAPI void APIENTRY glSampleMaski(GLuint maskNumber, GLbitfield mask);
#endif
#endif /* GL_VERSION_3_2 */

#ifndef GL_VERSION_3_3
#define GL_VERSION_3_3 1
#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE
#define GL_SRC1_COLOR 0x88F9
#define GL_ONE_MINUS_SRC1_COLOR 0x88FA
#define GL_ONE_MINUS_SRC1_ALPHA 0x88FB
#define GL_MAX_DUAL_SOURCE_DRAW_BUFFERS 0x88FC
#define GL_ANY_SAMPLES_PASSED 0x8C2F
#define GL_SAMPLER_BINDING 0x8919
#define GL_RGB10_A2UI 0x906F
#define GL_TEXTURE_SWIZZLE_R 0x8E42
#define GL_TEXTURE_SWIZZLE_G 0x8E43
#define GL_TEXTURE_SWIZZLE_B 0x8E44
#define GL_TEXTURE_SWIZZLE_A 0x8E45
#define GL_TEXTURE_SWIZZLE_RGBA 0x8E46
#define GL_TIME_ELAPSED 0x88BF
#define GL_TIMESTAMP 0x8E28
#define GL_INT_2_10_10_10_REV 0x8D9F
typedef void(APIENTRYP PFNGLBINDFRAGDATALOCATIONINDEXEDPROC)(GLuint program, GLuint colorNumber, GLuint index, const GLchar* name);
typedef GLint(APIENTRYP PFNGLGETFRAGDATAINDEXPROC)(GLuint program, const GLchar* name);
typedef void(APIENTRYP PFNGLGENSAMPLERSPROC)(GLsizei count, GLuint* samplers);
typedef void(APIENTRYP PFNGLDELETESAMPLERSPROC)(GLsizei count, const GLuint* samplers);
typedef GLboolean(APIENTRYP PFNGLISSAMPLERPROC)(GLuint sampler);
typedef void(APIENTRYP PFNGLBINDSAMPLERPROC)(GLuint unit, GLuint sampler);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERIPROC)(GLuint sampler, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, const GLint* param);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERFPROC)(GLuint sampler, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, const GLfloat* param);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, const GLint* param);
typedef void(APIENTRYP PFNGLSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, const GLuint* param);
typedef void(APIENTRYP PFNGLGETSAMPLERPARAMETERIVPROC)(GLuint sampler, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETSAMPLERPARAMETERIIVPROC)(GLuint sampler, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETSAMPLERPARAMETERFVPROC)(GLuint sampler, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETSAMPLERPARAMETERIUIVPROC)(GLuint sampler, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLQUERYCOUNTERPROC)(GLuint id, GLenum target);
typedef void(APIENTRYP PFNGLGETQUERYOBJECTI64VPROC)(GLuint id, GLenum pname, GLint64* params);
typedef void(APIENTRYP PFNGLGETQUERYOBJECTUI64VPROC)(GLuint id, GLenum pname, GLuint64* params);
typedef void(APIENTRYP PFNGLVERTEXATTRIBDIVISORPROC)(GLuint index, GLuint divisor);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP1UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP1UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP2UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP2UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP3UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP3UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP4UIPROC)(GLuint index, GLenum type, GLboolean normalized, GLuint value);
typedef void(APIENTRYP PFNGLVERTEXATTRIBP4UIVPROC)(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBindFragDataLocationIndexed(GLuint program, GLuint colorNumber, GLuint index, const GLchar* name);
GLAPI GLint APIENTRY glGetFragDataIndex(GLuint program, const GLchar* name);
GLAPI void APIENTRY glGenSamplers(GLsizei count, GLuint* samplers);
GLAPI void APIENTRY glDeleteSamplers(GLsizei count, const GLuint* samplers);
GLAPI GLboolean APIENTRY glIsSampler(GLuint sampler);
GLAPI void APIENTRY glBindSampler(GLuint unit, GLuint sampler);
GLAPI void APIENTRY glSamplerParameteri(GLuint sampler, GLenum pname, GLint param);
GLAPI void APIENTRY glSamplerParameteriv(GLuint sampler, GLenum pname, const GLint* param);
GLAPI void APIENTRY glSamplerParameterf(GLuint sampler, GLenum pname, GLfloat param);
GLAPI void APIENTRY glSamplerParameterfv(GLuint sampler, GLenum pname, const GLfloat* param);
GLAPI void APIENTRY glSamplerParameterIiv(GLuint sampler, GLenum pname, const GLint* param);
GLAPI void APIENTRY glSamplerParameterIuiv(GLuint sampler, GLenum pname, const GLuint* param);
GLAPI void APIENTRY glGetSamplerParameteriv(GLuint sampler, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint* params);
GLAPI void APIENTRY glQueryCounter(GLuint id, GLenum target);
GLAPI void APIENTRY glGetQueryObjecti64v(GLuint id, GLenum pname, GLint64* params);
GLAPI void APIENTRY glGetQueryObjectui64v(GLuint id, GLenum pname, GLuint64* params);
GLAPI void APIENTRY glVertexAttribDivisor(GLuint index, GLuint divisor);
GLAPI void APIENTRY glVertexAttribP1ui(GLuint index, GLenum type, GLboolean normalized, GLuint value);
GLAPI void APIENTRY glVertexAttribP1uiv(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
GLAPI void APIENTRY glVertexAttribP2ui(GLuint index, GLenum type, GLboolean normalized, GLuint value);
GLAPI void APIENTRY glVertexAttribP2uiv(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
GLAPI void APIENTRY glVertexAttribP3ui(GLuint index, GLenum type, GLboolean normalized, GLuint value);
GLAPI void APIENTRY glVertexAttribP3uiv(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
GLAPI void APIENTRY glVertexAttribP4ui(GLuint index, GLenum type, GLboolean normalized, GLuint value);
GLAPI void APIENTRY glVertexAttribP4uiv(GLuint index, GLenum type, GLboolean normalized, const GLuint* value);
#endif
#endif /* GL_VERSION_3_3 */

#ifndef GL_VERSION_4_0
#define GL_VERSION_4_0 1
#define GL_SAMPLE_SHADING 0x8C36
#define GL_MIN_SAMPLE_SHADING_VALUE 0x8C37
#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5E
#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5F
#define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009
#define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY 0x900A
#define GL_PROXY_TEXTURE_CUBE_MAP_ARRAY 0x900B
#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C
#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D
#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E
#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F
#define GL_DRAW_INDIRECT_BUFFER 0x8F3F
#define GL_DRAW_INDIRECT_BUFFER_BINDING 0x8F43
#define GL_GEOMETRY_SHADER_INVOCATIONS 0x887F
#define GL_MAX_GEOMETRY_SHADER_INVOCATIONS 0x8E5A
#define GL_MIN_FRAGMENT_INTERPOLATION_OFFSET 0x8E5B
#define GL_MAX_FRAGMENT_INTERPOLATION_OFFSET 0x8E5C
#define GL_FRAGMENT_INTERPOLATION_OFFSET_BITS 0x8E5D
#define GL_MAX_VERTEX_STREAMS 0x8E71
#define GL_DOUBLE_VEC2 0x8FFC
#define GL_DOUBLE_VEC3 0x8FFD
#define GL_DOUBLE_VEC4 0x8FFE
#define GL_DOUBLE_MAT2 0x8F46
#define GL_DOUBLE_MAT3 0x8F47
#define GL_DOUBLE_MAT4 0x8F48
#define GL_DOUBLE_MAT2x3 0x8F49
#define GL_DOUBLE_MAT2x4 0x8F4A
#define GL_DOUBLE_MAT3x2 0x8F4B
#define GL_DOUBLE_MAT3x4 0x8F4C
#define GL_DOUBLE_MAT4x2 0x8F4D
#define GL_DOUBLE_MAT4x3 0x8F4E
#define GL_ACTIVE_SUBROUTINES 0x8DE5
#define GL_ACTIVE_SUBROUTINE_UNIFORMS 0x8DE6
#define GL_ACTIVE_SUBROUTINE_UNIFORM_LOCATIONS 0x8E47
#define GL_ACTIVE_SUBROUTINE_MAX_LENGTH 0x8E48
#define GL_ACTIVE_SUBROUTINE_UNIFORM_MAX_LENGTH 0x8E49
#define GL_MAX_SUBROUTINES 0x8DE7
#define GL_MAX_SUBROUTINE_UNIFORM_LOCATIONS 0x8DE8
#define GL_NUM_COMPATIBLE_SUBROUTINES 0x8E4A
#define GL_COMPATIBLE_SUBROUTINES 0x8E4B
#define GL_PATCHES 0x000E
#define GL_PATCH_VERTICES 0x8E72
#define GL_PATCH_DEFAULT_INNER_LEVEL 0x8E73
#define GL_PATCH_DEFAULT_OUTER_LEVEL 0x8E74
#define GL_TESS_CONTROL_OUTPUT_VERTICES 0x8E75
#define GL_TESS_GEN_MODE 0x8E76
#define GL_TESS_GEN_SPACING 0x8E77
#define GL_TESS_GEN_VERTEX_ORDER 0x8E78
#define GL_TESS_GEN_POINT_MODE 0x8E79
#define GL_ISOLINES 0x8E7A
#define GL_FRACTIONAL_ODD 0x8E7B
#define GL_FRACTIONAL_EVEN 0x8E7C
#define GL_MAX_PATCH_VERTICES 0x8E7D
#define GL_MAX_TESS_GEN_LEVEL 0x8E7E
#define GL_MAX_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E7F
#define GL_MAX_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E80
#define GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS 0x8E81
#define GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS 0x8E82
#define GL_MAX_TESS_CONTROL_OUTPUT_COMPONENTS 0x8E83
#define GL_MAX_TESS_PATCH_COMPONENTS 0x8E84
#define GL_MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 0x8E85
#define GL_MAX_TESS_EVALUATION_OUTPUT_COMPONENTS 0x8E86
#define GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS 0x8E89
#define GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS 0x8E8A
#define GL_MAX_TESS_CONTROL_INPUT_COMPONENTS 0x886C
#define GL_MAX_TESS_EVALUATION_INPUT_COMPONENTS 0x886D
#define GL_MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E1E
#define GL_MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E1F
#define GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_CONTROL_SHADER 0x84F0
#define GL_UNIFORM_BLOCK_REFERENCED_BY_TESS_EVALUATION_SHADER 0x84F1
#define GL_TESS_EVALUATION_SHADER 0x8E87
#define GL_TESS_CONTROL_SHADER 0x8E88
#define GL_TRANSFORM_FEEDBACK 0x8E22
#define GL_TRANSFORM_FEEDBACK_BUFFER_PAUSED 0x8E23
#define GL_TRANSFORM_FEEDBACK_BUFFER_ACTIVE 0x8E24
#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25
#define GL_MAX_TRANSFORM_FEEDBACK_BUFFERS 0x8E70
typedef void(APIENTRYP PFNGLMINSAMPLESHADINGPROC)(GLfloat value);
typedef void(APIENTRYP PFNGLBLENDEQUATIONIPROC)(GLuint buf, GLenum mode);
typedef void(APIENTRYP PFNGLBLENDEQUATIONSEPARATEIPROC)(GLuint buf, GLenum modeRGB, GLenum modeAlpha);
typedef void(APIENTRYP PFNGLBLENDFUNCIPROC)(GLuint buf, GLenum src, GLenum dst);
typedef void(APIENTRYP PFNGLBLENDFUNCSEPARATEIPROC)(GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
typedef void(APIENTRYP PFNGLDRAWARRAYSINDIRECTPROC)(GLenum mode, const void* indirect);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINDIRECTPROC)(GLenum mode, GLenum type, const void* indirect);
typedef void(APIENTRYP PFNGLUNIFORM1DPROC)(GLint location, GLdouble x);
typedef void(APIENTRYP PFNGLUNIFORM2DPROC)(GLint location, GLdouble x, GLdouble y);
typedef void(APIENTRYP PFNGLUNIFORM3DPROC)(GLint location, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLUNIFORM4DPROC)(GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
typedef void(APIENTRYP PFNGLUNIFORM1DVPROC)(GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORM2DVPROC)(GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORM3DVPROC)(GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORM4DVPROC)(GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2X3DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX2X4DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3X2DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX3X4DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4X2DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLUNIFORMMATRIX4X3DVPROC)(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLGETUNIFORMDVPROC)(GLuint program, GLint location, GLdouble* params);
typedef GLint(APIENTRYP PFNGLGETSUBROUTINEUNIFORMLOCATIONPROC)(GLuint program, GLenum shadertype, const GLchar* name);
typedef GLuint(APIENTRYP PFNGLGETSUBROUTINEINDEXPROC)(GLuint program, GLenum shadertype, const GLchar* name);
typedef void(APIENTRYP PFNGLGETACTIVESUBROUTINEUNIFORMIVPROC)(GLuint program, GLenum shadertype, GLuint index, GLenum pname, GLint* values);
typedef void(APIENTRYP PFNGLGETACTIVESUBROUTINEUNIFORMNAMEPROC)(GLuint program, GLenum shadertype, GLuint index, GLsizei bufSize,
                                                                GLsizei* length, GLchar* name);
typedef void(APIENTRYP PFNGLGETACTIVESUBROUTINENAMEPROC)(GLuint program, GLenum shadertype, GLuint index, GLsizei bufSize, GLsizei* length,
                                                         GLchar* name);
typedef void(APIENTRYP PFNGLUNIFORMSUBROUTINESUIVPROC)(GLenum shadertype, GLsizei count, const GLuint* indices);
typedef void(APIENTRYP PFNGLGETUNIFORMSUBROUTINEUIVPROC)(GLenum shadertype, GLint location, GLuint* params);
typedef void(APIENTRYP PFNGLGETPROGRAMSTAGEIVPROC)(GLuint program, GLenum shadertype, GLenum pname, GLint* values);
typedef void(APIENTRYP PFNGLPATCHPARAMETERIPROC)(GLenum pname, GLint value);
typedef void(APIENTRYP PFNGLPATCHPARAMETERFVPROC)(GLenum pname, const GLfloat* values);
typedef void(APIENTRYP PFNGLBINDTRANSFORMFEEDBACKPROC)(GLenum target, GLuint id);
typedef void(APIENTRYP PFNGLDELETETRANSFORMFEEDBACKSPROC)(GLsizei n, const GLuint* ids);
typedef void(APIENTRYP PFNGLGENTRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint* ids);
typedef GLboolean(APIENTRYP PFNGLISTRANSFORMFEEDBACKPROC)(GLuint id);
typedef void(APIENTRYP PFNGLPAUSETRANSFORMFEEDBACKPROC)(void);
typedef void(APIENTRYP PFNGLRESUMETRANSFORMFEEDBACKPROC)(void);
typedef void(APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKPROC)(GLenum mode, GLuint id);
typedef void(APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMPROC)(GLenum mode, GLuint id, GLuint stream);
typedef void(APIENTRYP PFNGLBEGINQUERYINDEXEDPROC)(GLenum target, GLuint index, GLuint id);
typedef void(APIENTRYP PFNGLENDQUERYINDEXEDPROC)(GLenum target, GLuint index);
typedef void(APIENTRYP PFNGLGETQUERYINDEXEDIVPROC)(GLenum target, GLuint index, GLenum pname, GLint* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMinSampleShading(GLfloat value);
GLAPI void APIENTRY glBlendEquationi(GLuint buf, GLenum mode);
GLAPI void APIENTRY glBlendEquationSeparatei(GLuint buf, GLenum modeRGB, GLenum modeAlpha);
GLAPI void APIENTRY glBlendFunci(GLuint buf, GLenum src, GLenum dst);
GLAPI void APIENTRY glBlendFuncSeparatei(GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
GLAPI void APIENTRY glDrawArraysIndirect(GLenum mode, const void* indirect);
GLAPI void APIENTRY glDrawElementsIndirect(GLenum mode, GLenum type, const void* indirect);
GLAPI void APIENTRY glUniform1d(GLint location, GLdouble x);
GLAPI void APIENTRY glUniform2d(GLint location, GLdouble x, GLdouble y);
GLAPI void APIENTRY glUniform3d(GLint location, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glUniform4d(GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
GLAPI void APIENTRY glUniform1dv(GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glUniform2dv(GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glUniform3dv(GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glUniform4dv(GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glUniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glGetUniformdv(GLuint program, GLint location, GLdouble* params);
GLAPI GLint APIENTRY glGetSubroutineUniformLocation(GLuint program, GLenum shadertype, const GLchar* name);
GLAPI GLuint APIENTRY glGetSubroutineIndex(GLuint program, GLenum shadertype, const GLchar* name);
GLAPI void APIENTRY glGetActiveSubroutineUniformiv(GLuint program, GLenum shadertype, GLuint index, GLenum pname, GLint* values);
GLAPI void APIENTRY glGetActiveSubroutineUniformName(GLuint program, GLenum shadertype, GLuint index, GLsizei bufSize, GLsizei* length,
                                                     GLchar* name);
GLAPI void APIENTRY glGetActiveSubroutineName(GLuint program, GLenum shadertype, GLuint index, GLsizei bufSize, GLsizei* length, GLchar* name);
GLAPI void APIENTRY glUniformSubroutinesuiv(GLenum shadertype, GLsizei count, const GLuint* indices);
GLAPI void APIENTRY glGetUniformSubroutineuiv(GLenum shadertype, GLint location, GLuint* params);
GLAPI void APIENTRY glGetProgramStageiv(GLuint program, GLenum shadertype, GLenum pname, GLint* values);
GLAPI void APIENTRY glPatchParameteri(GLenum pname, GLint value);
GLAPI void APIENTRY glPatchParameterfv(GLenum pname, const GLfloat* values);
GLAPI void APIENTRY glBindTransformFeedback(GLenum target, GLuint id);
GLAPI void APIENTRY glDeleteTransformFeedbacks(GLsizei n, const GLuint* ids);
GLAPI void APIENTRY glGenTransformFeedbacks(GLsizei n, GLuint* ids);
GLAPI GLboolean APIENTRY glIsTransformFeedback(GLuint id);
GLAPI void APIENTRY glPauseTransformFeedback(void);
GLAPI void APIENTRY glResumeTransformFeedback(void);
GLAPI void APIENTRY glDrawTransformFeedback(GLenum mode, GLuint id);
GLAPI void APIENTRY glDrawTransformFeedbackStream(GLenum mode, GLuint id, GLuint stream);
GLAPI void APIENTRY glBeginQueryIndexed(GLenum target, GLuint index, GLuint id);
GLAPI void APIENTRY glEndQueryIndexed(GLenum target, GLuint index);
GLAPI void APIENTRY glGetQueryIndexediv(GLenum target, GLuint index, GLenum pname, GLint* params);
#endif
#endif /* GL_VERSION_4_0 */

#ifndef GL_VERSION_4_1
#define GL_VERSION_4_1 1
#define GL_FIXED 0x140C
#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A
#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
#define GL_LOW_FLOAT 0x8DF0
#define GL_MEDIUM_FLOAT 0x8DF1
#define GL_HIGH_FLOAT 0x8DF2
#define GL_LOW_INT 0x8DF3
#define GL_MEDIUM_INT 0x8DF4
#define GL_HIGH_INT 0x8DF5
#define GL_SHADER_COMPILER 0x8DFA
#define GL_SHADER_BINARY_FORMATS 0x8DF8
#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9
#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB
#define GL_MAX_VARYING_VECTORS 0x8DFC
#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD
#define GL_RGB565 0x8D62
#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257
#define GL_PROGRAM_BINARY_LENGTH 0x8741
#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE
#define GL_PROGRAM_BINARY_FORMATS 0x87FF
#define GL_VERTEX_SHADER_BIT 0x00000001
#define GL_FRAGMENT_SHADER_BIT 0x00000002
#define GL_GEOMETRY_SHADER_BIT 0x00000004
#define GL_TESS_CONTROL_SHADER_BIT 0x00000008
#define GL_TESS_EVALUATION_SHADER_BIT 0x00000010
#define GL_ALL_SHADER_BITS 0xFFFFFFFF
#define GL_PROGRAM_SEPARABLE 0x8258
#define GL_ACTIVE_PROGRAM 0x8259
#define GL_PROGRAM_PIPELINE_BINDING 0x825A
#define GL_MAX_VIEWPORTS 0x825B
#define GL_VIEWPORT_SUBPIXEL_BITS 0x825C
#define GL_VIEWPORT_BOUNDS_RANGE 0x825D
#define GL_LAYER_PROVOKING_VERTEX 0x825E
#define GL_VIEWPORT_INDEX_PROVOKING_VERTEX 0x825F
#define GL_UNDEFINED_VERTEX 0x8260
typedef void(APIENTRYP PFNGLRELEASESHADERCOMPILERPROC)(void);
typedef void(APIENTRYP PFNGLSHADERBINARYPROC)(GLsizei count, const GLuint* shaders, GLenum binaryformat, const void* binary, GLsizei length);
typedef void(APIENTRYP PFNGLGETSHADERPRECISIONFORMATPROC)(GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision);
typedef void(APIENTRYP PFNGLDEPTHRANGEFPROC)(GLfloat n, GLfloat f);
typedef void(APIENTRYP PFNGLCLEARDEPTHFPROC)(GLfloat d);
typedef void(APIENTRYP PFNGLGETPROGRAMBINARYPROC)(GLuint program, GLsizei bufSize, GLsizei* length, GLenum* binaryFormat, void* binary);
typedef void(APIENTRYP PFNGLPROGRAMBINARYPROC)(GLuint program, GLenum binaryFormat, const void* binary, GLsizei length);
typedef void(APIENTRYP PFNGLPROGRAMPARAMETERIPROC)(GLuint program, GLenum pname, GLint value);
typedef void(APIENTRYP PFNGLUSEPROGRAMSTAGESPROC)(GLuint pipeline, GLbitfield stages, GLuint program);
typedef void(APIENTRYP PFNGLACTIVESHADERPROGRAMPROC)(GLuint pipeline, GLuint program);
typedef GLuint(APIENTRYP PFNGLCREATESHADERPROGRAMVPROC)(GLenum type, GLsizei count, const GLchar* const* strings);
typedef void(APIENTRYP PFNGLBINDPROGRAMPIPELINEPROC)(GLuint pipeline);
typedef void(APIENTRYP PFNGLDELETEPROGRAMPIPELINESPROC)(GLsizei n, const GLuint* pipelines);
typedef void(APIENTRYP PFNGLGENPROGRAMPIPELINESPROC)(GLsizei n, GLuint* pipelines);
typedef GLboolean(APIENTRYP PFNGLISPROGRAMPIPELINEPROC)(GLuint pipeline);
typedef void(APIENTRYP PFNGLGETPROGRAMPIPELINEIVPROC)(GLuint pipeline, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1IPROC)(GLuint program, GLint location, GLint v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1IVPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1FPROC)(GLuint program, GLint location, GLfloat v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1DPROC)(GLuint program, GLint location, GLdouble v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1DVPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UIPROC)(GLuint program, GLint location, GLuint v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2IPROC)(GLuint program, GLint location, GLint v0, GLint v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2IVPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2DPROC)(GLuint program, GLint location, GLdouble v0, GLdouble v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2DVPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3IPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3IVPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3DPROC)(GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3DVPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4IPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4IVPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4DPROC)(GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2, GLdouble v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4DVPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3DVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                           const GLdouble* value);
typedef void(APIENTRYP PFNGLVALIDATEPROGRAMPIPELINEPROC)(GLuint pipeline);
typedef void(APIENTRYP PFNGLGETPROGRAMPIPELINEINFOLOGPROC)(GLuint pipeline, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1DPROC)(GLuint index, GLdouble x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2DPROC)(GLuint index, GLdouble x, GLdouble y);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4DPROC)(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4DVPROC)(GLuint index, const GLdouble* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBLPOINTERPROC)(GLuint index, GLint size, GLenum type, GLsizei stride, const void* pointer);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBLDVPROC)(GLuint index, GLenum pname, GLdouble* params);
typedef void(APIENTRYP PFNGLVIEWPORTARRAYVPROC)(GLuint first, GLsizei count, const GLfloat* v);
typedef void(APIENTRYP PFNGLVIEWPORTINDEXEDFPROC)(GLuint index, GLfloat x, GLfloat y, GLfloat w, GLfloat h);
typedef void(APIENTRYP PFNGLVIEWPORTINDEXEDFVPROC)(GLuint index, const GLfloat* v);
typedef void(APIENTRYP PFNGLSCISSORARRAYVPROC)(GLuint first, GLsizei count, const GLint* v);
typedef void(APIENTRYP PFNGLSCISSORINDEXEDPROC)(GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLSCISSORINDEXEDVPROC)(GLuint index, const GLint* v);
typedef void(APIENTRYP PFNGLDEPTHRANGEARRAYVPROC)(GLuint first, GLsizei count, const GLdouble* v);
typedef void(APIENTRYP PFNGLDEPTHRANGEINDEXEDPROC)(GLuint index, GLdouble n, GLdouble f);
typedef void(APIENTRYP PFNGLGETFLOATI_VPROC)(GLenum target, GLuint index, GLfloat* data);
typedef void(APIENTRYP PFNGLGETDOUBLEI_VPROC)(GLenum target, GLuint index, GLdouble* data);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glReleaseShaderCompiler(void);
GLAPI void APIENTRY glShaderBinary(GLsizei count, const GLuint* shaders, GLenum binaryformat, const void* binary, GLsizei length);
GLAPI void APIENTRY glGetShaderPrecisionFormat(GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision);
GLAPI void APIENTRY glDepthRangef(GLfloat n, GLfloat f);
GLAPI void APIENTRY glClearDepthf(GLfloat d);
GLAPI void APIENTRY glGetProgramBinary(GLuint program, GLsizei bufSize, GLsizei* length, GLenum* binaryFormat, void* binary);
GLAPI void APIENTRY glProgramBinary(GLuint program, GLenum binaryFormat, const void* binary, GLsizei length);
GLAPI void APIENTRY glProgramParameteri(GLuint program, GLenum pname, GLint value);
GLAPI void APIENTRY glUseProgramStages(GLuint pipeline, GLbitfield stages, GLuint program);
GLAPI void APIENTRY glActiveShaderProgram(GLuint pipeline, GLuint program);
GLAPI GLuint APIENTRY glCreateShaderProgramv(GLenum type, GLsizei count, const GLchar* const* strings);
GLAPI void APIENTRY glBindProgramPipeline(GLuint pipeline);
GLAPI void APIENTRY glDeleteProgramPipelines(GLsizei n, const GLuint* pipelines);
GLAPI void APIENTRY glGenProgramPipelines(GLsizei n, GLuint* pipelines);
GLAPI GLboolean APIENTRY glIsProgramPipeline(GLuint pipeline);
GLAPI void APIENTRY glGetProgramPipelineiv(GLuint pipeline, GLenum pname, GLint* params);
GLAPI void APIENTRY glProgramUniform1i(GLuint program, GLint location, GLint v0);
GLAPI void APIENTRY glProgramUniform1iv(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform1f(GLuint program, GLint location, GLfloat v0);
GLAPI void APIENTRY glProgramUniform1fv(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform1d(GLuint program, GLint location, GLdouble v0);
GLAPI void APIENTRY glProgramUniform1dv(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform1ui(GLuint program, GLint location, GLuint v0);
GLAPI void APIENTRY glProgramUniform1uiv(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform2i(GLuint program, GLint location, GLint v0, GLint v1);
GLAPI void APIENTRY glProgramUniform2iv(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform2f(GLuint program, GLint location, GLfloat v0, GLfloat v1);
GLAPI void APIENTRY glProgramUniform2fv(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform2d(GLuint program, GLint location, GLdouble v0, GLdouble v1);
GLAPI void APIENTRY glProgramUniform2dv(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform2ui(GLuint program, GLint location, GLuint v0, GLuint v1);
GLAPI void APIENTRY glProgramUniform2uiv(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform3i(GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
GLAPI void APIENTRY glProgramUniform3iv(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform3f(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
GLAPI void APIENTRY glProgramUniform3fv(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform3d(GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2);
GLAPI void APIENTRY glProgramUniform3dv(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform3ui(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
GLAPI void APIENTRY glProgramUniform3uiv(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform4i(GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
GLAPI void APIENTRY glProgramUniform4iv(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform4f(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
GLAPI void APIENTRY glProgramUniform4fv(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform4d(GLuint program, GLint location, GLdouble v0, GLdouble v1, GLdouble v2, GLdouble v3);
GLAPI void APIENTRY glProgramUniform4dv(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform4ui(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
GLAPI void APIENTRY glProgramUniform4uiv(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glValidateProgramPipeline(GLuint pipeline);
GLAPI void APIENTRY glGetProgramPipelineInfoLog(GLuint pipeline, GLsizei bufSize, GLsizei* length, GLchar* infoLog);
GLAPI void APIENTRY glVertexAttribL1d(GLuint index, GLdouble x);
GLAPI void APIENTRY glVertexAttribL2d(GLuint index, GLdouble x, GLdouble y);
GLAPI void APIENTRY glVertexAttribL3d(GLuint index, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glVertexAttribL4d(GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
GLAPI void APIENTRY glVertexAttribL1dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttribL2dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttribL3dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttribL4dv(GLuint index, const GLdouble* v);
GLAPI void APIENTRY glVertexAttribLPointer(GLuint index, GLint size, GLenum type, GLsizei stride, const void* pointer);
GLAPI void APIENTRY glGetVertexAttribLdv(GLuint index, GLenum pname, GLdouble* params);
GLAPI void APIENTRY glViewportArrayv(GLuint first, GLsizei count, const GLfloat* v);
GLAPI void APIENTRY glViewportIndexedf(GLuint index, GLfloat x, GLfloat y, GLfloat w, GLfloat h);
GLAPI void APIENTRY glViewportIndexedfv(GLuint index, const GLfloat* v);
GLAPI void APIENTRY glScissorArrayv(GLuint first, GLsizei count, const GLint* v);
GLAPI void APIENTRY glScissorIndexed(GLuint index, GLint left, GLint bottom, GLsizei width, GLsizei height);
GLAPI void APIENTRY glScissorIndexedv(GLuint index, const GLint* v);
GLAPI void APIENTRY glDepthRangeArrayv(GLuint first, GLsizei count, const GLdouble* v);
GLAPI void APIENTRY glDepthRangeIndexed(GLuint index, GLdouble n, GLdouble f);
GLAPI void APIENTRY glGetFloati_v(GLenum target, GLuint index, GLfloat* data);
GLAPI void APIENTRY glGetDoublei_v(GLenum target, GLuint index, GLdouble* data);
#endif
#endif /* GL_VERSION_4_1 */

#ifndef GL_VERSION_4_2
#define GL_VERSION_4_2 1
#define GL_COPY_READ_BUFFER_BINDING 0x8F36
#define GL_COPY_WRITE_BUFFER_BINDING 0x8F37
#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24
#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23
#define GL_UNPACK_COMPRESSED_BLOCK_WIDTH 0x9127
#define GL_UNPACK_COMPRESSED_BLOCK_HEIGHT 0x9128
#define GL_UNPACK_COMPRESSED_BLOCK_DEPTH 0x9129
#define GL_UNPACK_COMPRESSED_BLOCK_SIZE 0x912A
#define GL_PACK_COMPRESSED_BLOCK_WIDTH 0x912B
#define GL_PACK_COMPRESSED_BLOCK_HEIGHT 0x912C
#define GL_PACK_COMPRESSED_BLOCK_DEPTH 0x912D
#define GL_PACK_COMPRESSED_BLOCK_SIZE 0x912E
#define GL_NUM_SAMPLE_COUNTS 0x9380
#define GL_MIN_MAP_BUFFER_ALIGNMENT 0x90BC
#define GL_ATOMIC_COUNTER_BUFFER 0x92C0
#define GL_ATOMIC_COUNTER_BUFFER_BINDING 0x92C1
#define GL_ATOMIC_COUNTER_BUFFER_START 0x92C2
#define GL_ATOMIC_COUNTER_BUFFER_SIZE 0x92C3
#define GL_ATOMIC_COUNTER_BUFFER_DATA_SIZE 0x92C4
#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTERS 0x92C5
#define GL_ATOMIC_COUNTER_BUFFER_ACTIVE_ATOMIC_COUNTER_INDICES 0x92C6
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_VERTEX_SHADER 0x92C7
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_CONTROL_SHADER 0x92C8
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TESS_EVALUATION_SHADER 0x92C9
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_GEOMETRY_SHADER 0x92CA
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_FRAGMENT_SHADER 0x92CB
#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC
#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD
#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE
#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF
#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0
#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1
#define GL_MAX_VERTEX_ATOMIC_COUNTERS 0x92D2
#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3
#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4
#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS 0x92D5
#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS 0x92D6
#define GL_MAX_COMBINED_ATOMIC_COUNTERS 0x92D7
#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8
#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC
#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
#define GL_UNIFORM_ATOMIC_COUNTER_BUFFER_INDEX 0x92DA
#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
#define GL_UNIFORM_BARRIER_BIT 0x00000004
#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
#define GL_COMMAND_BARRIER_BIT 0x00000040
#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
#define GL_MAX_IMAGE_UNITS 0x8F38
#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
#define GL_IMAGE_BINDING_NAME 0x8F3A
#define GL_IMAGE_BINDING_LEVEL 0x8F3B
#define GL_IMAGE_BINDING_LAYERED 0x8F3C
#define GL_IMAGE_BINDING_LAYER 0x8F3D
#define GL_IMAGE_BINDING_ACCESS 0x8F3E
#define GL_IMAGE_1D 0x904C
#define GL_IMAGE_2D 0x904D
#define GL_IMAGE_3D 0x904E
#define GL_IMAGE_2D_RECT 0x904F
#define GL_IMAGE_CUBE 0x9050
#define GL_IMAGE_BUFFER 0x9051
#define GL_IMAGE_1D_ARRAY 0x9052
#define GL_IMAGE_2D_ARRAY 0x9053
#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
#define GL_IMAGE_2D_MULTISAMPLE 0x9055
#define GL_IMAGE_2D_MULTISAMPLE_ARRAY 0x9056
#define GL_INT_IMAGE_1D 0x9057
#define GL_INT_IMAGE_2D 0x9058
#define GL_INT_IMAGE_3D 0x9059
#define GL_INT_IMAGE_2D_RECT 0x905A
#define GL_INT_IMAGE_CUBE 0x905B
#define GL_INT_IMAGE_BUFFER 0x905C
#define GL_INT_IMAGE_1D_ARRAY 0x905D
#define GL_INT_IMAGE_2D_ARRAY 0x905E
#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
#define GL_INT_IMAGE_2D_MULTISAMPLE 0x9060
#define GL_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x9061
#define GL_UNSIGNED_INT_IMAGE_1D 0x9062
#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
#define GL_UNSIGNED_INT_IMAGE_2D_RECT 0x9065
#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
#define GL_UNSIGNED_INT_IMAGE_1D_ARRAY 0x9068
#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE 0x906B
#define GL_UNSIGNED_INT_IMAGE_2D_MULTISAMPLE_ARRAY 0x906C
#define GL_MAX_IMAGE_SAMPLES 0x906D
#define GL_IMAGE_BINDING_FORMAT 0x906E
#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
#define GL_COMPRESSED_RGBA_BPTC_UNORM 0x8E8C
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT 0x8E8F
#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
typedef void(APIENTRYP PFNGLDRAWARRAYSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLint first, GLsizei count, GLsizei instancecount, GLuint baseinstance);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEINSTANCEPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices,
                                                                   GLsizei instancecount, GLuint baseinstance);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXBASEINSTANCEPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices,
                                                                             GLsizei instancecount, GLint basevertex, GLuint baseinstance);
typedef void(APIENTRYP PFNGLGETINTERNALFORMATIVPROC)(GLenum target, GLenum internalformat, GLenum pname, GLsizei count, GLint* params);
typedef void(APIENTRYP PFNGLGETACTIVEATOMICCOUNTERBUFFERIVPROC)(GLuint program, GLuint bufferIndex, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access,
                                                  GLenum format);
typedef void(APIENTRYP PFNGLMEMORYBARRIERPROC)(GLbitfield barriers);
typedef void(APIENTRYP PFNGLTEXSTORAGE1DPROC)(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
typedef void(APIENTRYP PFNGLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKINSTANCEDPROC)(GLenum mode, GLuint id, GLsizei instancecount);
typedef void(APIENTRYP PFNGLDRAWTRANSFORMFEEDBACKSTREAMINSTANCEDPROC)(GLenum mode, GLuint id, GLuint stream, GLsizei instancecount);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawArraysInstancedBaseInstance(GLenum mode, GLint first, GLsizei count, GLsizei instancecount, GLuint baseinstance);
GLAPI void APIENTRY glDrawElementsInstancedBaseInstance(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei instancecount,
                                                        GLuint baseinstance);
GLAPI void APIENTRY glDrawElementsInstancedBaseVertexBaseInstance(GLenum mode, GLsizei count, GLenum type, const void* indices,
                                                                  GLsizei instancecount, GLint basevertex, GLuint baseinstance);
GLAPI void APIENTRY glGetInternalformativ(GLenum target, GLenum internalformat, GLenum pname, GLsizei count, GLint* params);
GLAPI void APIENTRY glGetActiveAtomicCounterBufferiv(GLuint program, GLuint bufferIndex, GLenum pname, GLint* params);
GLAPI void APIENTRY glBindImageTexture(GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
GLAPI void APIENTRY glMemoryBarrier(GLbitfield barriers);
GLAPI void APIENTRY glTexStorage1D(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
GLAPI void APIENTRY glTexStorage2D(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glTexStorage3D(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
GLAPI void APIENTRY glDrawTransformFeedbackInstanced(GLenum mode, GLuint id, GLsizei instancecount);
GLAPI void APIENTRY glDrawTransformFeedbackStreamInstanced(GLenum mode, GLuint id, GLuint stream, GLsizei instancecount);
#endif
#endif /* GL_VERSION_4_2 */

#ifndef GL_VERSION_4_3
#define GL_VERSION_4_3 1
typedef void(APIENTRY* GLDEBUGPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message,
                                    const void* userParam);
#define GL_NUM_SHADING_LANGUAGE_VERSIONS 0x82E9
#define GL_VERTEX_ATTRIB_ARRAY_LONG 0x874E
#define GL_COMPRESSED_RGB8_ETC2 0x9274
#define GL_COMPRESSED_SRGB8_ETC2 0x9275
#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276
#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277
#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278
#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279
#define GL_COMPRESSED_R11_EAC 0x9270
#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271
#define GL_COMPRESSED_RG11_EAC 0x9272
#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273
#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
#define GL_MAX_ELEMENT_INDEX 0x8D6B
#define GL_COMPUTE_SHADER 0x91B9
#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
#define GL_UNIFORM_BLOCK_REFERENCED_BY_COMPUTE_SHADER 0x90EC
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_COMPUTE_SHADER 0x90ED
#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
#define GL_COMPUTE_SHADER_BIT 0x00000020
#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
#define GL_DEBUG_CALLBACK_FUNCTION 0x8244
#define GL_DEBUG_CALLBACK_USER_PARAM 0x8245
#define GL_DEBUG_SOURCE_API 0x8246
#define GL_DEBUG_SOURCE_WINDOW_SYSTEM 0x8247
#define GL_DEBUG_SOURCE_SHADER_COMPILER 0x8248
#define GL_DEBUG_SOURCE_THIRD_PARTY 0x8249
#define GL_DEBUG_SOURCE_APPLICATION 0x824A
#define GL_DEBUG_SOURCE_OTHER 0x824B
#define GL_DEBUG_TYPE_ERROR 0x824C
#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR 0x824D
#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR 0x824E
#define GL_DEBUG_TYPE_PORTABILITY 0x824F
#define GL_DEBUG_TYPE_PERFORMANCE 0x8250
#define GL_DEBUG_TYPE_OTHER 0x8251
#define GL_MAX_DEBUG_MESSAGE_LENGTH 0x9143
#define GL_MAX_DEBUG_LOGGED_MESSAGES 0x9144
#define GL_DEBUG_LOGGED_MESSAGES 0x9145
#define GL_DEBUG_SEVERITY_HIGH 0x9146
#define GL_DEBUG_SEVERITY_MEDIUM 0x9147
#define GL_DEBUG_SEVERITY_LOW 0x9148
#define GL_DEBUG_TYPE_MARKER 0x8268
#define GL_DEBUG_TYPE_PUSH_GROUP 0x8269
#define GL_DEBUG_TYPE_POP_GROUP 0x826A
#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B
#define GL_MAX_DEBUG_GROUP_STACK_DEPTH 0x826C
#define GL_DEBUG_GROUP_STACK_DEPTH 0x826D
#define GL_BUFFER 0x82E0
#define GL_SHADER 0x82E1
#define GL_PROGRAM 0x82E2
#define GL_QUERY 0x82E3
#define GL_PROGRAM_PIPELINE 0x82E4
#define GL_SAMPLER 0x82E6
#define GL_MAX_LABEL_LENGTH 0x82E8
#define GL_DEBUG_OUTPUT 0x92E0
#define GL_CONTEXT_FLAG_DEBUG_BIT 0x00000002
#define GL_MAX_UNIFORM_LOCATIONS 0x826E
#define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310
#define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311
#define GL_FRAMEBUFFER_DEFAULT_LAYERS 0x9312
#define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313
#define GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS 0x9314
#define GL_MAX_FRAMEBUFFER_WIDTH 0x9315
#define GL_MAX_FRAMEBUFFER_HEIGHT 0x9316
#define GL_MAX_FRAMEBUFFER_LAYERS 0x9317
#define GL_MAX_FRAMEBUFFER_SAMPLES 0x9318
#define GL_INTERNALFORMAT_SUPPORTED 0x826F
#define GL_INTERNALFORMAT_PREFERRED 0x8270
#define GL_INTERNALFORMAT_RED_SIZE 0x8271
#define GL_INTERNALFORMAT_GREEN_SIZE 0x8272
#define GL_INTERNALFORMAT_BLUE_SIZE 0x8273
#define GL_INTERNALFORMAT_ALPHA_SIZE 0x8274
#define GL_INTERNALFORMAT_DEPTH_SIZE 0x8275
#define GL_INTERNALFORMAT_STENCIL_SIZE 0x8276
#define GL_INTERNALFORMAT_SHARED_SIZE 0x8277
#define GL_INTERNALFORMAT_RED_TYPE 0x8278
#define GL_INTERNALFORMAT_GREEN_TYPE 0x8279
#define GL_INTERNALFORMAT_BLUE_TYPE 0x827A
#define GL_INTERNALFORMAT_ALPHA_TYPE 0x827B
#define GL_INTERNALFORMAT_DEPTH_TYPE 0x827C
#define GL_INTERNALFORMAT_STENCIL_TYPE 0x827D
#define GL_MAX_WIDTH 0x827E
#define GL_MAX_HEIGHT 0x827F
#define GL_MAX_DEPTH 0x8280
#define GL_MAX_LAYERS 0x8281
#define GL_MAX_COMBINED_DIMENSIONS 0x8282
#define GL_COLOR_COMPONENTS 0x8283
#define GL_DEPTH_COMPONENTS 0x8284
#define GL_STENCIL_COMPONENTS 0x8285
#define GL_COLOR_RENDERABLE 0x8286
#define GL_DEPTH_RENDERABLE 0x8287
#define GL_STENCIL_RENDERABLE 0x8288
#define GL_FRAMEBUFFER_RENDERABLE 0x8289
#define GL_FRAMEBUFFER_RENDERABLE_LAYERED 0x828A
#define GL_FRAMEBUFFER_BLEND 0x828B
#define GL_READ_PIXELS 0x828C
#define GL_READ_PIXELS_FORMAT 0x828D
#define GL_READ_PIXELS_TYPE 0x828E
#define GL_TEXTURE_IMAGE_FORMAT 0x828F
#define GL_TEXTURE_IMAGE_TYPE 0x8290
#define GL_GET_TEXTURE_IMAGE_FORMAT 0x8291
#define GL_GET_TEXTURE_IMAGE_TYPE 0x8292
#define GL_MIPMAP 0x8293
#define GL_MANUAL_GENERATE_MIPMAP 0x8294
#define GL_AUTO_GENERATE_MIPMAP 0x8295
#define GL_COLOR_ENCODING 0x8296
#define GL_SRGB_READ 0x8297
#define GL_SRGB_WRITE 0x8298
#define GL_FILTER 0x829A
#define GL_VERTEX_TEXTURE 0x829B
#define GL_TESS_CONTROL_TEXTURE 0x829C
#define GL_TESS_EVALUATION_TEXTURE 0x829D
#define GL_GEOMETRY_TEXTURE 0x829E
#define GL_FRAGMENT_TEXTURE 0x829F
#define GL_COMPUTE_TEXTURE 0x82A0
#define GL_TEXTURE_SHADOW 0x82A1
#define GL_TEXTURE_GATHER 0x82A2
#define GL_TEXTURE_GATHER_SHADOW 0x82A3
#define GL_SHADER_IMAGE_LOAD 0x82A4
#define GL_SHADER_IMAGE_STORE 0x82A5
#define GL_SHADER_IMAGE_ATOMIC 0x82A6
#define GL_IMAGE_TEXEL_SIZE 0x82A7
#define GL_IMAGE_COMPATIBILITY_CLASS 0x82A8
#define GL_IMAGE_PIXEL_FORMAT 0x82A9
#define GL_IMAGE_PIXEL_TYPE 0x82AA
#define GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_TEST 0x82AC
#define GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_TEST 0x82AD
#define GL_SIMULTANEOUS_TEXTURE_AND_DEPTH_WRITE 0x82AE
#define GL_SIMULTANEOUS_TEXTURE_AND_STENCIL_WRITE 0x82AF
#define GL_TEXTURE_COMPRESSED_BLOCK_WIDTH 0x82B1
#define GL_TEXTURE_COMPRESSED_BLOCK_HEIGHT 0x82B2
#define GL_TEXTURE_COMPRESSED_BLOCK_SIZE 0x82B3
#define GL_CLEAR_BUFFER 0x82B4
#define GL_TEXTURE_VIEW 0x82B5
#define GL_VIEW_COMPATIBILITY_CLASS 0x82B6
#define GL_FULL_SUPPORT 0x82B7
#define GL_CAVEAT_SUPPORT 0x82B8
#define GL_IMAGE_CLASS_4_X_32 0x82B9
#define GL_IMAGE_CLASS_2_X_32 0x82BA
#define GL_IMAGE_CLASS_1_X_32 0x82BB
#define GL_IMAGE_CLASS_4_X_16 0x82BC
#define GL_IMAGE_CLASS_2_X_16 0x82BD
#define GL_IMAGE_CLASS_1_X_16 0x82BE
#define GL_IMAGE_CLASS_4_X_8 0x82BF
#define GL_IMAGE_CLASS_2_X_8 0x82C0
#define GL_IMAGE_CLASS_1_X_8 0x82C1
#define GL_IMAGE_CLASS_11_11_10 0x82C2
#define GL_IMAGE_CLASS_10_10_10_2 0x82C3
#define GL_VIEW_CLASS_128_BITS 0x82C4
#define GL_VIEW_CLASS_96_BITS 0x82C5
#define GL_VIEW_CLASS_64_BITS 0x82C6
#define GL_VIEW_CLASS_48_BITS 0x82C7
#define GL_VIEW_CLASS_32_BITS 0x82C8
#define GL_VIEW_CLASS_24_BITS 0x82C9
#define GL_VIEW_CLASS_16_BITS 0x82CA
#define GL_VIEW_CLASS_8_BITS 0x82CB
#define GL_VIEW_CLASS_S3TC_DXT1_RGB 0x82CC
#define GL_VIEW_CLASS_S3TC_DXT1_RGBA 0x82CD
#define GL_VIEW_CLASS_S3TC_DXT3_RGBA 0x82CE
#define GL_VIEW_CLASS_S3TC_DXT5_RGBA 0x82CF
#define GL_VIEW_CLASS_RGTC1_RED 0x82D0
#define GL_VIEW_CLASS_RGTC2_RG 0x82D1
#define GL_VIEW_CLASS_BPTC_UNORM 0x82D2
#define GL_VIEW_CLASS_BPTC_FLOAT 0x82D3
#define GL_UNIFORM 0x92E1
#define GL_UNIFORM_BLOCK 0x92E2
#define GL_PROGRAM_INPUT 0x92E3
#define GL_PROGRAM_OUTPUT 0x92E4
#define GL_BUFFER_VARIABLE 0x92E5
#define GL_SHADER_STORAGE_BLOCK 0x92E6
#define GL_VERTEX_SUBROUTINE 0x92E8
#define GL_TESS_CONTROL_SUBROUTINE 0x92E9
#define GL_TESS_EVALUATION_SUBROUTINE 0x92EA
#define GL_GEOMETRY_SUBROUTINE 0x92EB
#define GL_FRAGMENT_SUBROUTINE 0x92EC
#define GL_COMPUTE_SUBROUTINE 0x92ED
#define GL_VERTEX_SUBROUTINE_UNIFORM 0x92EE
#define GL_TESS_CONTROL_SUBROUTINE_UNIFORM 0x92EF
#define GL_TESS_EVALUATION_SUBROUTINE_UNIFORM 0x92F0
#define GL_GEOMETRY_SUBROUTINE_UNIFORM 0x92F1
#define GL_FRAGMENT_SUBROUTINE_UNIFORM 0x92F2
#define GL_COMPUTE_SUBROUTINE_UNIFORM 0x92F3
#define GL_TRANSFORM_FEEDBACK_VARYING 0x92F4
#define GL_ACTIVE_RESOURCES 0x92F5
#define GL_MAX_NAME_LENGTH 0x92F6
#define GL_MAX_NUM_ACTIVE_VARIABLES 0x92F7
#define GL_MAX_NUM_COMPATIBLE_SUBROUTINES 0x92F8
#define GL_NAME_LENGTH 0x92F9
#define GL_TYPE 0x92FA
#define GL_ARRAY_SIZE 0x92FB
#define GL_OFFSET 0x92FC
#define GL_BLOCK_INDEX 0x92FD
#define GL_ARRAY_STRIDE 0x92FE
#define GL_MATRIX_STRIDE 0x92FF
#define GL_IS_ROW_MAJOR 0x9300
#define GL_ATOMIC_COUNTER_BUFFER_INDEX 0x9301
#define GL_BUFFER_BINDING 0x9302
#define GL_BUFFER_DATA_SIZE 0x9303
#define GL_NUM_ACTIVE_VARIABLES 0x9304
#define GL_ACTIVE_VARIABLES 0x9305
#define GL_REFERENCED_BY_VERTEX_SHADER 0x9306
#define GL_REFERENCED_BY_TESS_CONTROL_SHADER 0x9307
#define GL_REFERENCED_BY_TESS_EVALUATION_SHADER 0x9308
#define GL_REFERENCED_BY_GEOMETRY_SHADER 0x9309
#define GL_REFERENCED_BY_FRAGMENT_SHADER 0x930A
#define GL_REFERENCED_BY_COMPUTE_SHADER 0x930B
#define GL_TOP_LEVEL_ARRAY_SIZE 0x930C
#define GL_TOP_LEVEL_ARRAY_STRIDE 0x930D
#define GL_LOCATION 0x930E
#define GL_LOCATION_INDEX 0x930F
#define GL_IS_PER_PATCH 0x92E7
#define GL_SHADER_STORAGE_BUFFER 0x90D2
#define GL_SHADER_STORAGE_BUFFER_BINDING 0x90D3
#define GL_SHADER_STORAGE_BUFFER_START 0x90D4
#define GL_SHADER_STORAGE_BUFFER_SIZE 0x90D5
#define GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS 0x90D6
#define GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS 0x90D7
#define GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS 0x90D8
#define GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS 0x90D9
#define GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS 0x90DA
#define GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS 0x90DB
#define GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS 0x90DC
#define GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS 0x90DD
#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE 0x90DE
#define GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT 0x90DF
#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000
#define GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES 0x8F39
#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA
#define GL_TEXTURE_BUFFER_OFFSET 0x919D
#define GL_TEXTURE_BUFFER_SIZE 0x919E
#define GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT 0x919F
#define GL_TEXTURE_VIEW_MIN_LEVEL 0x82DB
#define GL_TEXTURE_VIEW_NUM_LEVELS 0x82DC
#define GL_TEXTURE_VIEW_MIN_LAYER 0x82DD
#define GL_TEXTURE_VIEW_NUM_LAYERS 0x82DE
#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF
#define GL_VERTEX_ATTRIB_BINDING 0x82D4
#define GL_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D5
#define GL_VERTEX_BINDING_DIVISOR 0x82D6
#define GL_VERTEX_BINDING_OFFSET 0x82D7
#define GL_VERTEX_BINDING_STRIDE 0x82D8
#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9
#define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA
#define GL_VERTEX_BINDING_BUFFER 0x8F4F
typedef void(APIENTRYP PFNGLCLEARBUFFERDATAPROC)(GLenum target, GLenum internalformat, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARBUFFERSUBDATAPROC)(GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format,
                                                    GLenum type, const void* data);
typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNGLCOPYIMAGESUBDATAPROC)(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ,
                                                  GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ,
                                                  GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
typedef void(APIENTRYP PFNGLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETINTERNALFORMATI64VPROC)(GLenum target, GLenum internalformat, GLenum pname, GLsizei count, GLint64* params);
typedef void(APIENTRYP PFNGLINVALIDATETEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                       GLsizei width, GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNGLINVALIDATETEXIMAGEPROC)(GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLINVALIDATEBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLINVALIDATEBUFFERDATAPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLINVALIDATEFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments);
typedef void(APIENTRYP PFNGLINVALIDATESUBFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x,
                                                          GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTPROC)(GLenum mode, const void* indirect, GLsizei drawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTPROC)(GLenum mode, GLenum type, const void* indirect, GLsizei drawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLGETPROGRAMINTERFACEIVPROC)(GLuint program, GLenum programInterface, GLenum pname, GLint* params);
typedef GLuint(APIENTRYP PFNGLGETPROGRAMRESOURCEINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCENAMEPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize,
                                                        GLsizei* length, GLchar* name);
typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCEIVPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount,
                                                      const GLenum* props, GLsizei count, GLsizei* length, GLint* params);
typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef GLint(APIENTRYP PFNGLGETPROGRAMRESOURCELOCATIONINDEXPROC)(GLuint program, GLenum programInterface, const GLchar* name);
typedef void(APIENTRYP PFNGLSHADERSTORAGEBLOCKBINDINGPROC)(GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding);
typedef void(APIENTRYP PFNGLTEXBUFFERRANGEPROC)(GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLTEXSTORAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width,
                                                         GLsizei height, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXSTORAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width,
                                                         GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTUREVIEWPROC)(GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel,
                                             GLuint numlevels, GLuint minlayer, GLuint numlayers);
typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERPROC)(GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXATTRIBFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBIFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBLFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXATTRIBBINDINGPROC)(GLuint attribindex, GLuint bindingindex);
typedef void(APIENTRYP PFNGLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLuint divisor);
typedef void(APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids,
                                                     GLboolean enabled);
typedef void(APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf);
typedef void(APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC)(GLDEBUGPROC callback, const void* userParam);
typedef GLuint(APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC)(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids,
                                                      GLenum* severities, GLsizei* lengths, GLchar* messageLog);
typedef void(APIENTRYP PFNGLPUSHDEBUGGROUPPROC)(GLenum source, GLuint id, GLsizei length, const GLchar* message);
typedef void(APIENTRYP PFNGLPOPDEBUGGROUPPROC)(void);
typedef void(APIENTRYP PFNGLOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei length, const GLchar* label);
typedef void(APIENTRYP PFNGLGETOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei* length, GLchar* label);
typedef void(APIENTRYP PFNGLOBJECTPTRLABELPROC)(const void* ptr, GLsizei length, const GLchar* label);
typedef void(APIENTRYP PFNGLGETOBJECTPTRLABELPROC)(const void* ptr, GLsizei bufSize, GLsizei* length, GLchar* label);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glClearBufferData(GLenum target, GLenum internalformat, GLenum format, GLenum type, const void* data);
GLAPI void APIENTRY glClearBufferSubData(GLenum target, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format, GLenum type,
                                         const void* data);
GLAPI void APIENTRY glDispatchCompute(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
GLAPI void APIENTRY glDispatchComputeIndirect(GLintptr indirect);
GLAPI void APIENTRY glCopyImageSubData(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName,
                                       GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth,
                                       GLsizei srcHeight, GLsizei srcDepth);
GLAPI void APIENTRY glFramebufferParameteri(GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glGetFramebufferParameteriv(GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetInternalformati64v(GLenum target, GLenum internalformat, GLenum pname, GLsizei count, GLint64* params);
GLAPI void APIENTRY glInvalidateTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                            GLsizei height, GLsizei depth);
GLAPI void APIENTRY glInvalidateTexImage(GLuint texture, GLint level);
GLAPI void APIENTRY glInvalidateBufferSubData(GLuint buffer, GLintptr offset, GLsizeiptr length);
GLAPI void APIENTRY glInvalidateBufferData(GLuint buffer);
GLAPI void APIENTRY glInvalidateFramebuffer(GLenum target, GLsizei numAttachments, const GLenum* attachments);
GLAPI void APIENTRY glInvalidateSubFramebuffer(GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y,
                                               GLsizei width, GLsizei height);
GLAPI void APIENTRY glMultiDrawArraysIndirect(GLenum mode, const void* indirect, GLsizei drawcount, GLsizei stride);
GLAPI void APIENTRY glMultiDrawElementsIndirect(GLenum mode, GLenum type, const void* indirect, GLsizei drawcount, GLsizei stride);
GLAPI void APIENTRY glGetProgramInterfaceiv(GLuint program, GLenum programInterface, GLenum pname, GLint* params);
GLAPI GLuint APIENTRY glGetProgramResourceIndex(GLuint program, GLenum programInterface, const GLchar* name);
GLAPI void APIENTRY glGetProgramResourceName(GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei* length, GLchar* name);
GLAPI void APIENTRY glGetProgramResourceiv(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum* props,
                                           GLsizei count, GLsizei* length, GLint* params);
GLAPI GLint APIENTRY glGetProgramResourceLocation(GLuint program, GLenum programInterface, const GLchar* name);
GLAPI GLint APIENTRY glGetProgramResourceLocationIndex(GLuint program, GLenum programInterface, const GLchar* name);
GLAPI void APIENTRY glShaderStorageBlockBinding(GLuint program, GLuint storageBlockIndex, GLuint storageBlockBinding);
GLAPI void APIENTRY glTexBufferRange(GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
GLAPI void APIENTRY glTexStorage2DMultisample(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                              GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTexStorage3DMultisample(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                              GLsizei depth, GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTextureView(GLuint texture, GLenum target, GLuint origtexture, GLenum internalformat, GLuint minlevel,
                                  GLuint numlevels, GLuint minlayer, GLuint numlayers);
GLAPI void APIENTRY glBindVertexBuffer(GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
GLAPI void APIENTRY glVertexAttribFormat(GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
GLAPI void APIENTRY glVertexAttribIFormat(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexAttribLFormat(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexAttribBinding(GLuint attribindex, GLuint bindingindex);
GLAPI void APIENTRY glVertexBindingDivisor(GLuint bindingindex, GLuint divisor);
GLAPI void APIENTRY glDebugMessageControl(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids, GLboolean enabled);
GLAPI void APIENTRY glDebugMessageInsert(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf);
GLAPI void APIENTRY glDebugMessageCallback(GLDEBUGPROC callback, const void* userParam);
GLAPI GLuint APIENTRY glGetDebugMessageLog(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids, GLenum* severities,
                                           GLsizei* lengths, GLchar* messageLog);
GLAPI void APIENTRY glPushDebugGroup(GLenum source, GLuint id, GLsizei length, const GLchar* message);
GLAPI void APIENTRY glPopDebugGroup(void);
GLAPI void APIENTRY glObjectLabel(GLenum identifier, GLuint name, GLsizei length, const GLchar* label);
GLAPI void APIENTRY glGetObjectLabel(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei* length, GLchar* label);
GLAPI void APIENTRY glObjectPtrLabel(const void* ptr, GLsizei length, const GLchar* label);
GLAPI void APIENTRY glGetObjectPtrLabel(const void* ptr, GLsizei bufSize, GLsizei* length, GLchar* label);
#endif
#endif /* GL_VERSION_4_3 */

#ifndef GL_VERSION_4_4
#define GL_VERSION_4_4 1
#define GL_MAX_VERTEX_ATTRIB_STRIDE 0x82E5
#define GL_PRIMITIVE_RESTART_FOR_PATCHES_SUPPORTED 0x8221
#define GL_TEXTURE_BUFFER_BINDING 0x8C2A
#define GL_MAP_PERSISTENT_BIT 0x0040
#define GL_MAP_COHERENT_BIT 0x0080
#define GL_DYNAMIC_STORAGE_BIT 0x0100
#define GL_CLIENT_STORAGE_BIT 0x0200
#define GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT 0x00004000
#define GL_BUFFER_IMMUTABLE_STORAGE 0x821F
#define GL_BUFFER_STORAGE_FLAGS 0x8220
#define GL_CLEAR_TEXTURE 0x9365
#define GL_LOCATION_COMPONENT 0x934A
#define GL_TRANSFORM_FEEDBACK_BUFFER_INDEX 0x934B
#define GL_TRANSFORM_FEEDBACK_BUFFER_STRIDE 0x934C
#define GL_QUERY_BUFFER 0x9192
#define GL_QUERY_BUFFER_BARRIER_BIT 0x00008000
#define GL_QUERY_BUFFER_BINDING 0x9193
#define GL_QUERY_RESULT_NO_WAIT 0x9194
#define GL_MIRROR_CLAMP_TO_EDGE 0x8743
typedef void(APIENTRYP PFNGLBUFFERSTORAGEPROC)(GLenum target, GLsizeiptr size, const void* data, GLbitfield flags);
typedef void(APIENTRYP PFNGLCLEARTEXIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARTEXSUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                  GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLBINDBUFFERSBASEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers);
typedef void(APIENTRYP PFNGLBINDBUFFERSRANGEPROC)(GLenum target, GLuint first, GLsizei count, const GLuint* buffers,
                                                  const GLintptr* offsets, const GLsizeiptr* sizes);
typedef void(APIENTRYP PFNGLBINDTEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures);
typedef void(APIENTRYP PFNGLBINDSAMPLERSPROC)(GLuint first, GLsizei count, const GLuint* samplers);
typedef void(APIENTRYP PFNGLBINDIMAGETEXTURESPROC)(GLuint first, GLsizei count, const GLuint* textures);
typedef void(APIENTRYP PFNGLBINDVERTEXBUFFERSPROC)(GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets,
                                                   const GLsizei* strides);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBufferStorage(GLenum target, GLsizeiptr size, const void* data, GLbitfield flags);
GLAPI void APIENTRY glClearTexImage(GLuint texture, GLint level, GLenum format, GLenum type, const void* data);
GLAPI void APIENTRY glClearTexSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                       GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* data);
GLAPI void APIENTRY glBindBuffersBase(GLenum target, GLuint first, GLsizei count, const GLuint* buffers);
GLAPI void APIENTRY glBindBuffersRange(GLenum target, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets,
                                       const GLsizeiptr* sizes);
GLAPI void APIENTRY glBindTextures(GLuint first, GLsizei count, const GLuint* textures);
GLAPI void APIENTRY glBindSamplers(GLuint first, GLsizei count, const GLuint* samplers);
GLAPI void APIENTRY glBindImageTextures(GLuint first, GLsizei count, const GLuint* textures);
GLAPI void APIENTRY glBindVertexBuffers(GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets, const GLsizei* strides);
#endif
#endif /* GL_VERSION_4_4 */

#ifndef GL_VERSION_4_5
#define GL_VERSION_4_5 1
#define GL_CONTEXT_LOST 0x0507
#define GL_NEGATIVE_ONE_TO_ONE 0x935E
#define GL_ZERO_TO_ONE 0x935F
#define GL_CLIP_ORIGIN 0x935C
#define GL_CLIP_DEPTH_MODE 0x935D
#define GL_QUERY_WAIT_INVERTED 0x8E17
#define GL_QUERY_NO_WAIT_INVERTED 0x8E18
#define GL_QUERY_BY_REGION_WAIT_INVERTED 0x8E19
#define GL_QUERY_BY_REGION_NO_WAIT_INVERTED 0x8E1A
#define GL_MAX_CULL_DISTANCES 0x82F9
#define GL_MAX_COMBINED_CLIP_AND_CULL_DISTANCES 0x82FA
#define GL_TEXTURE_TARGET 0x1006
#define GL_QUERY_TARGET 0x82EA
#define GL_GUILTY_CONTEXT_RESET 0x8253
#define GL_INNOCENT_CONTEXT_RESET 0x8254
#define GL_UNKNOWN_CONTEXT_RESET 0x8255
#define GL_RESET_NOTIFICATION_STRATEGY 0x8256
#define GL_LOSE_CONTEXT_ON_RESET 0x8252
#define GL_NO_RESET_NOTIFICATION 0x8261
#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004
#define GL_CONTEXT_RELEASE_BEHAVIOR 0x82FB
#define GL_CONTEXT_RELEASE_BEHAVIOR_FLUSH 0x82FC
typedef void(APIENTRYP PFNGLCLIPCONTROLPROC)(GLenum origin, GLenum depth);
typedef void(APIENTRYP PFNGLCREATETRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint* ids);
typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERBASEPROC)(GLuint xfb, GLuint index, GLuint buffer);
typedef void(APIENTRYP PFNGLTRANSFORMFEEDBACKBUFFERRANGEPROC)(GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKIVPROC)(GLuint xfb, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint* param);
typedef void(APIENTRYP PFNGLGETTRANSFORMFEEDBACKI64_VPROC)(GLuint xfb, GLenum pname, GLuint index, GLint64* param);
typedef void(APIENTRYP PFNGLCREATEBUFFERSPROC)(GLsizei n, GLuint* buffers);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSTORAGEPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags);
typedef void(APIENTRYP PFNGLNAMEDBUFFERDATAPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
typedef void(APIENTRYP PFNGLCOPYNAMEDBUFFERSUBDATAPROC)(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset,
                                                        GLsizeiptr size);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERDATAPROC)(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size,
                                                         GLenum format, GLenum type, const void* data);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERPROC)(GLuint buffer, GLenum access);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
typedef GLboolean(APIENTRYP PFNGLUNMAPNAMEDBUFFERPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVPROC)(GLuint buffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERI64VPROC)(GLuint buffer, GLenum pname, GLint64* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVPROC)(GLuint buffer, GLenum pname, void** params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
typedef void(APIENTRYP PFNGLCREATEFRAMEBUFFERSPROC)(GLsizei n, GLuint* framebuffers);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFERPROC)(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIPROC)(GLuint framebuffer, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYERPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERPROC)(GLuint framebuffer, GLenum buf);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERDRAWBUFFERSPROC)(GLuint framebuffer, GLsizei n, const GLenum* bufs);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERREADBUFFERPROC)(GLuint framebuffer, GLenum src);
typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments);
typedef void(APIENTRYP PFNGLINVALIDATENAMEDFRAMEBUFFERSUBDATAPROC)(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments,
                                                                   GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERUIVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFVPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat* value);
typedef void(APIENTRYP PFNGLCLEARNAMEDFRAMEBUFFERFIPROC)(GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
typedef void(APIENTRYP PFNGLBLITNAMEDFRAMEBUFFERPROC)(GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1,
                                                      GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask,
                                                      GLenum filter);
typedef GLenum(APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSPROC)(GLuint framebuffer, GLenum target);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVPROC)(GLuint framebuffer, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVPROC)(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATERENDERBUFFERSPROC)(GLsizei n, GLuint* renderbuffers);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEPROC)(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEPROC)(GLuint renderbuffer, GLsizei samples, GLenum internalformat,
                                                                     GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVPROC)(GLuint renderbuffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATETEXTURESPROC)(GLenum target, GLsizei n, GLuint* textures);
typedef void(APIENTRYP PFNGLTEXTUREBUFFERPROC)(GLuint texture, GLenum internalformat, GLuint buffer);
typedef void(APIENTRYP PFNGLTEXTUREBUFFERRANGEPROC)(GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE1DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DPROC)(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height,
                                                  GLsizei depth);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width,
                                                             GLsizei height, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEPROC)(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width,
                                                             GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type,
                                                   const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                                   GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                   GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format,
                                                             GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width,
                                                             GLsizei height, GLenum format, GLsizei imageSize, const void* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                             GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize,
                                                             const void* data);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DPROC)(GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y,
                                                       GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x,
                                                       GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFPROC)(GLuint texture, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, const GLfloat* param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIPROC)(GLuint texture, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, const GLuint* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, const GLint* param);
typedef void(APIENTRYP PFNGLGENERATETEXTUREMIPMAPPROC)(GLuint texture);
typedef void(APIENTRYP PFNGLBINDTEXTUREUNITPROC)(GLuint unit, GLuint texture);
typedef void(APIENTRYP PFNGLGETTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEPROC)(GLuint texture, GLint level, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVPROC)(GLuint texture, GLint level, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVPROC)(GLuint texture, GLint level, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERFVPROC)(GLuint texture, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIIVPROC)(GLuint texture, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVPROC)(GLuint texture, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIVPROC)(GLuint texture, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLCREATEVERTEXARRAYSPROC)(GLsizei n, GLuint* arrays);
typedef void(APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLVERTEXARRAYELEMENTBUFFERPROC)(GLuint vaobj, GLuint buffer);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERPROC)(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBUFFERSPROC)(GLuint vaobj, GLuint first, GLsizei count, const GLuint* buffers,
                                                          const GLintptr* offsets, const GLsizei* strides);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBBINDINGPROC)(GLuint vaobj, GLuint attribindex, GLuint bindingindex);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized,
                                                         GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBIFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYATTRIBLFORMATPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYBINDINGDIVISORPROC)(GLuint vaobj, GLuint bindingindex, GLuint divisor);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYIVPROC)(GLuint vaobj, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXEDIVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINDEXED64IVPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint64* param);
typedef void(APIENTRYP PFNGLCREATESAMPLERSPROC)(GLsizei n, GLuint* samplers);
typedef void(APIENTRYP PFNGLCREATEPROGRAMPIPELINESPROC)(GLsizei n, GLuint* pipelines);
typedef void(APIENTRYP PFNGLCREATEQUERIESPROC)(GLenum target, GLsizei n, GLuint* ids);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUI64VPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLGETQUERYBUFFEROBJECTUIVPROC)(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
typedef void(APIENTRYP PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers);
typedef void(APIENTRYP PFNGLGETTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                    GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTURESUBIMAGEPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                              GLsizei width, GLsizei height, GLsizei depth, GLsizei bufSize, void* pixels);
typedef GLenum(APIENTRYP PFNGLGETGRAPHICSRESETSTATUSPROC)(void);
typedef void(APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEPROC)(GLenum target, GLint lod, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETNTEXIMAGEPROC)(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
typedef void(APIENTRYP PFNGLGETNUNIFORMDVPROC)(GLuint program, GLint location, GLsizei bufSize, GLdouble* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint* params);
typedef void(APIENTRYP PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize,
                                             void* data);
typedef void(APIENTRYP PFNGLTEXTUREBARRIERPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glClipControl(GLenum origin, GLenum depth);
GLAPI void APIENTRY glCreateTransformFeedbacks(GLsizei n, GLuint* ids);
GLAPI void APIENTRY glTransformFeedbackBufferBase(GLuint xfb, GLuint index, GLuint buffer);
GLAPI void APIENTRY glTransformFeedbackBufferRange(GLuint xfb, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
GLAPI void APIENTRY glGetTransformFeedbackiv(GLuint xfb, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetTransformFeedbacki_v(GLuint xfb, GLenum pname, GLuint index, GLint* param);
GLAPI void APIENTRY glGetTransformFeedbacki64_v(GLuint xfb, GLenum pname, GLuint index, GLint64* param);
GLAPI void APIENTRY glCreateBuffers(GLsizei n, GLuint* buffers);
GLAPI void APIENTRY glNamedBufferStorage(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags);
GLAPI void APIENTRY glNamedBufferData(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
GLAPI void APIENTRY glNamedBufferSubData(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
GLAPI void APIENTRY glCopyNamedBufferSubData(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
GLAPI void APIENTRY glClearNamedBufferData(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data);
GLAPI void APIENTRY glClearNamedBufferSubData(GLuint buffer, GLenum internalformat, GLintptr offset, GLsizeiptr size, GLenum format,
                                              GLenum type, const void* data);
GLAPI void* APIENTRY glMapNamedBuffer(GLuint buffer, GLenum access);
GLAPI void* APIENTRY glMapNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
GLAPI GLboolean APIENTRY glUnmapNamedBuffer(GLuint buffer);
GLAPI void APIENTRY glFlushMappedNamedBufferRange(GLuint buffer, GLintptr offset, GLsizeiptr length);
GLAPI void APIENTRY glGetNamedBufferParameteriv(GLuint buffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetNamedBufferParameteri64v(GLuint buffer, GLenum pname, GLint64* params);
GLAPI void APIENTRY glGetNamedBufferPointerv(GLuint buffer, GLenum pname, void** params);
GLAPI void APIENTRY glGetNamedBufferSubData(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
GLAPI void APIENTRY glCreateFramebuffers(GLsizei n, GLuint* framebuffers);
GLAPI void APIENTRY glNamedFramebufferRenderbuffer(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
GLAPI void APIENTRY glNamedFramebufferParameteri(GLuint framebuffer, GLenum pname, GLint param);
GLAPI void APIENTRY glNamedFramebufferTexture(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
GLAPI void APIENTRY glNamedFramebufferTextureLayer(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
GLAPI void APIENTRY glNamedFramebufferDrawBuffer(GLuint framebuffer, GLenum buf);
GLAPI void APIENTRY glNamedFramebufferDrawBuffers(GLuint framebuffer, GLsizei n, const GLenum* bufs);
GLAPI void APIENTRY glNamedFramebufferReadBuffer(GLuint framebuffer, GLenum src);
GLAPI void APIENTRY glInvalidateNamedFramebufferData(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments);
GLAPI void APIENTRY glInvalidateNamedFramebufferSubData(GLuint framebuffer, GLsizei numAttachments, const GLenum* attachments, GLint x,
                                                        GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glClearNamedFramebufferiv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLint* value);
GLAPI void APIENTRY glClearNamedFramebufferuiv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLuint* value);
GLAPI void APIENTRY glClearNamedFramebufferfv(GLuint framebuffer, GLenum buffer, GLint drawbuffer, const GLfloat* value);
GLAPI void APIENTRY glClearNamedFramebufferfi(GLuint framebuffer, GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
GLAPI void APIENTRY glBlitNamedFramebuffer(GLuint readFramebuffer, GLuint drawFramebuffer, GLint srcX0, GLint srcY0, GLint srcX1,
                                           GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
GLAPI GLenum APIENTRY glCheckNamedFramebufferStatus(GLuint framebuffer, GLenum target);
GLAPI void APIENTRY glGetNamedFramebufferParameteriv(GLuint framebuffer, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetNamedFramebufferAttachmentParameteriv(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
GLAPI void APIENTRY glCreateRenderbuffers(GLsizei n, GLuint* renderbuffers);
GLAPI void APIENTRY glNamedRenderbufferStorage(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glNamedRenderbufferStorageMultisample(GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glGetNamedRenderbufferParameteriv(GLuint renderbuffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glCreateTextures(GLenum target, GLsizei n, GLuint* textures);
GLAPI void APIENTRY glTextureBuffer(GLuint texture, GLenum internalformat, GLuint buffer);
GLAPI void APIENTRY glTextureBufferRange(GLuint texture, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
GLAPI void APIENTRY glTextureStorage1D(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width);
GLAPI void APIENTRY glTextureStorage2D(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glTextureStorage3D(GLuint texture, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
GLAPI void APIENTRY glTextureStorage2DMultisample(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                                  GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTextureStorage3DMultisample(GLuint texture, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height,
                                                  GLsizei depth, GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                        GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                        GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, GLsizei width, GLenum format,
                                                  GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height,
                                                  GLenum format, GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                  GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void* data);
GLAPI void APIENTRY glCopyTextureSubImage1D(GLuint texture, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
GLAPI void APIENTRY glCopyTextureSubImage2D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width,
                                            GLsizei height);
GLAPI void APIENTRY glCopyTextureSubImage3D(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y,
                                            GLsizei width, GLsizei height);
GLAPI void APIENTRY glTextureParameterf(GLuint texture, GLenum pname, GLfloat param);
GLAPI void APIENTRY glTextureParameterfv(GLuint texture, GLenum pname, const GLfloat* param);
GLAPI void APIENTRY glTextureParameteri(GLuint texture, GLenum pname, GLint param);
GLAPI void APIENTRY glTextureParameterIiv(GLuint texture, GLenum pname, const GLint* params);
GLAPI void APIENTRY glTextureParameterIuiv(GLuint texture, GLenum pname, const GLuint* params);
GLAPI void APIENTRY glTextureParameteriv(GLuint texture, GLenum pname, const GLint* param);
GLAPI void APIENTRY glGenerateTextureMipmap(GLuint texture);
GLAPI void APIENTRY glBindTextureUnit(GLuint unit, GLuint texture);
GLAPI void APIENTRY glGetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
GLAPI void APIENTRY glGetCompressedTextureImage(GLuint texture, GLint level, GLsizei bufSize, void* pixels);
GLAPI void APIENTRY glGetTextureLevelParameterfv(GLuint texture, GLint level, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTextureLevelParameteriv(GLuint texture, GLint level, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTextureParameterfv(GLuint texture, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTextureParameterIiv(GLuint texture, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTextureParameterIuiv(GLuint texture, GLenum pname, GLuint* params);
GLAPI void APIENTRY glGetTextureParameteriv(GLuint texture, GLenum pname, GLint* params);
GLAPI void APIENTRY glCreateVertexArrays(GLsizei n, GLuint* arrays);
GLAPI void APIENTRY glDisableVertexArrayAttrib(GLuint vaobj, GLuint index);
GLAPI void APIENTRY glEnableVertexArrayAttrib(GLuint vaobj, GLuint index);
GLAPI void APIENTRY glVertexArrayElementBuffer(GLuint vaobj, GLuint buffer);
GLAPI void APIENTRY glVertexArrayVertexBuffer(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
GLAPI void APIENTRY glVertexArrayVertexBuffers(GLuint vaobj, GLuint first, GLsizei count, const GLuint* buffers, const GLintptr* offsets,
                                               const GLsizei* strides);
GLAPI void APIENTRY glVertexArrayAttribBinding(GLuint vaobj, GLuint attribindex, GLuint bindingindex);
GLAPI void APIENTRY glVertexArrayAttribFormat(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayAttribIFormat(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayAttribLFormat(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayBindingDivisor(GLuint vaobj, GLuint bindingindex, GLuint divisor);
GLAPI void APIENTRY glGetVertexArrayiv(GLuint vaobj, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetVertexArrayIndexediv(GLuint vaobj, GLuint index, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetVertexArrayIndexed64iv(GLuint vaobj, GLuint index, GLenum pname, GLint64* param);
GLAPI void APIENTRY glCreateSamplers(GLsizei n, GLuint* samplers);
GLAPI void APIENTRY glCreateProgramPipelines(GLsizei n, GLuint* pipelines);
GLAPI void APIENTRY glCreateQueries(GLenum target, GLsizei n, GLuint* ids);
GLAPI void APIENTRY glGetQueryBufferObjecti64v(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
GLAPI void APIENTRY glGetQueryBufferObjectiv(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
GLAPI void APIENTRY glGetQueryBufferObjectui64v(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
GLAPI void APIENTRY glGetQueryBufferObjectuiv(GLuint id, GLuint buffer, GLenum pname, GLintptr offset);
GLAPI void APIENTRY glMemoryBarrierByRegion(GLbitfield barriers);
GLAPI void APIENTRY glGetTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                         GLsizei height, GLsizei depth, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
GLAPI void APIENTRY glGetCompressedTextureSubImage(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                                   GLsizei height, GLsizei depth, GLsizei bufSize, void* pixels);
GLAPI GLenum APIENTRY glGetGraphicsResetStatus(void);
GLAPI void APIENTRY glGetnCompressedTexImage(GLenum target, GLint lod, GLsizei bufSize, void* pixels);
GLAPI void APIENTRY glGetnTexImage(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* pixels);
GLAPI void APIENTRY glGetnUniformdv(GLuint program, GLint location, GLsizei bufSize, GLdouble* params);
GLAPI void APIENTRY glGetnUniformfv(GLuint program, GLint location, GLsizei bufSize, GLfloat* params);
GLAPI void APIENTRY glGetnUniformiv(GLuint program, GLint location, GLsizei bufSize, GLint* params);
GLAPI void APIENTRY glGetnUniformuiv(GLuint program, GLint location, GLsizei bufSize, GLuint* params);
GLAPI void APIENTRY glReadnPixels(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void* data);
GLAPI void APIENTRY glTextureBarrier(void);
#endif
#endif /* GL_VERSION_4_5 */

#ifndef GL_VERSION_4_6
#define GL_VERSION_4_6 1
#define GL_SHADER_BINARY_FORMAT_SPIR_V 0x9551
#define GL_SPIR_V_BINARY 0x9552
#define GL_PARAMETER_BUFFER 0x80EE
#define GL_PARAMETER_BUFFER_BINDING 0x80EF
#define GL_CONTEXT_FLAG_NO_ERROR_BIT 0x00000008
#define GL_VERTICES_SUBMITTED 0x82EE
#define GL_PRIMITIVES_SUBMITTED 0x82EF
#define GL_VERTEX_SHADER_INVOCATIONS 0x82F0
#define GL_TESS_CONTROL_SHADER_PATCHES 0x82F1
#define GL_TESS_EVALUATION_SHADER_INVOCATIONS 0x82F2
#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED 0x82F3
#define GL_FRAGMENT_SHADER_INVOCATIONS 0x82F4
#define GL_COMPUTE_SHADER_INVOCATIONS 0x82F5
#define GL_CLIPPING_INPUT_PRIMITIVES 0x82F6
#define GL_CLIPPING_OUTPUT_PRIMITIVES 0x82F7
#define GL_POLYGON_OFFSET_CLAMP 0x8E1B
#define GL_SPIR_V_EXTENSIONS 0x9553
#define GL_NUM_SPIR_V_EXTENSIONS 0x9554
#define GL_TEXTURE_MAX_ANISOTROPY 0x84FE
#define GL_MAX_TEXTURE_MAX_ANISOTROPY 0x84FF
#define GL_TRANSFORM_FEEDBACK_OVERFLOW 0x82EC
#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW 0x82ED
typedef void(APIENTRYP PFNGLSPECIALIZESHADERPROC)(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants,
                                                  const GLuint* pConstantIndex, const GLuint* pConstantValue);
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTPROC)(GLenum mode, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount,
                                                              GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTPROC)(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount,
                                                                GLsizei maxdrawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLPOLYGONOFFSETCLAMPPROC)(GLfloat factor, GLfloat units, GLfloat clamp);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glSpecializeShader(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants,
                                       const GLuint* pConstantIndex, const GLuint* pConstantValue);
GLAPI void APIENTRY glMultiDrawArraysIndirectCount(GLenum mode, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
GLAPI void APIENTRY glMultiDrawElementsIndirectCount(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount,
                                                     GLsizei maxdrawcount, GLsizei stride);
GLAPI void APIENTRY glPolygonOffsetClamp(GLfloat factor, GLfloat units, GLfloat clamp);
#endif
#endif /* GL_VERSION_4_6 */

#ifndef GL_ARB_ES2_compatibility
#define GL_ARB_ES2_compatibility 1
#endif /* GL_ARB_ES2_compatibility */

#ifndef GL_ARB_ES3_1_compatibility
#define GL_ARB_ES3_1_compatibility 1
#endif /* GL_ARB_ES3_1_compatibility */

#ifndef GL_ARB_ES3_2_compatibility
#define GL_ARB_ES3_2_compatibility 1
#define GL_PRIMITIVE_BOUNDING_BOX_ARB 0x92BE
#define GL_MULTISAMPLE_LINE_WIDTH_RANGE_ARB 0x9381
#define GL_MULTISAMPLE_LINE_WIDTH_GRANULARITY_ARB 0x9382
typedef void(APIENTRYP PFNGLPRIMITIVEBOUNDINGBOXARBPROC)(GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY,
                                                         GLfloat maxZ, GLfloat maxW);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glPrimitiveBoundingBoxARB(GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY,
                                              GLfloat maxZ, GLfloat maxW);
#endif
#endif /* GL_ARB_ES3_2_compatibility */

#ifndef GL_ARB_ES3_compatibility
#define GL_ARB_ES3_compatibility 1
#endif /* GL_ARB_ES3_compatibility */

#ifndef GL_ARB_arrays_of_arrays
#define GL_ARB_arrays_of_arrays 1
#endif /* GL_ARB_arrays_of_arrays */

#ifndef GL_ARB_base_instance
#define GL_ARB_base_instance 1
#endif /* GL_ARB_base_instance */

#ifndef GL_ARB_bindless_texture
#define GL_ARB_bindless_texture 1
typedef khronos_uint64_t GLuint64EXT;
#define GL_UNSIGNED_INT64_ARB 0x140F
typedef GLuint64(APIENTRYP PFNGLGETTEXTUREHANDLEARBPROC)(GLuint texture);
typedef GLuint64(APIENTRYP PFNGLGETTEXTURESAMPLERHANDLEARBPROC)(GLuint texture, GLuint sampler);
typedef void(APIENTRYP PFNGLMAKETEXTUREHANDLERESIDENTARBPROC)(GLuint64 handle);
typedef void(APIENTRYP PFNGLMAKETEXTUREHANDLENONRESIDENTARBPROC)(GLuint64 handle);
typedef GLuint64(APIENTRYP PFNGLGETIMAGEHANDLEARBPROC)(GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format);
typedef void(APIENTRYP PFNGLMAKEIMAGEHANDLERESIDENTARBPROC)(GLuint64 handle, GLenum access);
typedef void(APIENTRYP PFNGLMAKEIMAGEHANDLENONRESIDENTARBPROC)(GLuint64 handle);
typedef void(APIENTRYP PFNGLUNIFORMHANDLEUI64ARBPROC)(GLint location, GLuint64 value);
typedef void(APIENTRYP PFNGLUNIFORMHANDLEUI64VARBPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64ARBPROC)(GLuint program, GLint location, GLuint64 value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* values);
typedef GLboolean(APIENTRYP PFNGLISTEXTUREHANDLERESIDENTARBPROC)(GLuint64 handle);
typedef GLboolean(APIENTRYP PFNGLISIMAGEHANDLERESIDENTARBPROC)(GLuint64 handle);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1UI64ARBPROC)(GLuint index, GLuint64EXT x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1UI64VARBPROC)(GLuint index, const GLuint64EXT* v);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBLUI64VARBPROC)(GLuint index, GLenum pname, GLuint64EXT* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI GLuint64 APIENTRY glGetTextureHandleARB(GLuint texture);
GLAPI GLuint64 APIENTRY glGetTextureSamplerHandleARB(GLuint texture, GLuint sampler);
GLAPI void APIENTRY glMakeTextureHandleResidentARB(GLuint64 handle);
GLAPI void APIENTRY glMakeTextureHandleNonResidentARB(GLuint64 handle);
GLAPI GLuint64 APIENTRY glGetImageHandleARB(GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format);
GLAPI void APIENTRY glMakeImageHandleResidentARB(GLuint64 handle, GLenum access);
GLAPI void APIENTRY glMakeImageHandleNonResidentARB(GLuint64 handle);
GLAPI void APIENTRY glUniformHandleui64ARB(GLint location, GLuint64 value);
GLAPI void APIENTRY glUniformHandleui64vARB(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glProgramUniformHandleui64ARB(GLuint program, GLint location, GLuint64 value);
GLAPI void APIENTRY glProgramUniformHandleui64vARB(GLuint program, GLint location, GLsizei count, const GLuint64* values);
GLAPI GLboolean APIENTRY glIsTextureHandleResidentARB(GLuint64 handle);
GLAPI GLboolean APIENTRY glIsImageHandleResidentARB(GLuint64 handle);
GLAPI void APIENTRY glVertexAttribL1ui64ARB(GLuint index, GLuint64EXT x);
GLAPI void APIENTRY glVertexAttribL1ui64vARB(GLuint index, const GLuint64EXT* v);
GLAPI void APIENTRY glGetVertexAttribLui64vARB(GLuint index, GLenum pname, GLuint64EXT* params);
#endif
#endif /* GL_ARB_bindless_texture */

#ifndef GL_ARB_blend_func_extended
#define GL_ARB_blend_func_extended 1
#endif /* GL_ARB_blend_func_extended */

#ifndef GL_ARB_buffer_storage
#define GL_ARB_buffer_storage 1
#endif /* GL_ARB_buffer_storage */

#ifndef GL_ARB_cl_event
#define GL_ARB_cl_event 1
struct _cl_context;
struct _cl_event;
#define GL_SYNC_CL_EVENT_ARB 0x8240
#define GL_SYNC_CL_EVENT_COMPLETE_ARB 0x8241
typedef GLsync(APIENTRYP PFNGLCREATESYNCFROMCLEVENTARBPROC)(struct _cl_context* context, struct _cl_event* event, GLbitfield flags);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI GLsync APIENTRY glCreateSyncFromCLeventARB(struct _cl_context* context, struct _cl_event* event, GLbitfield flags);
#endif
#endif /* GL_ARB_cl_event */

#ifndef GL_ARB_clear_buffer_object
#define GL_ARB_clear_buffer_object 1
#endif /* GL_ARB_clear_buffer_object */

#ifndef GL_ARB_clear_texture
#define GL_ARB_clear_texture 1
#endif /* GL_ARB_clear_texture */

#ifndef GL_ARB_clip_control
#define GL_ARB_clip_control 1
#endif /* GL_ARB_clip_control */

#ifndef GL_ARB_compressed_texture_pixel_storage
#define GL_ARB_compressed_texture_pixel_storage 1
#endif /* GL_ARB_compressed_texture_pixel_storage */

#ifndef GL_ARB_compute_shader
#define GL_ARB_compute_shader 1
#endif /* GL_ARB_compute_shader */

#ifndef GL_ARB_compute_variable_group_size
#define GL_ARB_compute_variable_group_size 1
#define GL_MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB 0x9344
#define GL_MAX_COMPUTE_FIXED_GROUP_INVOCATIONS_ARB 0x90EB
#define GL_MAX_COMPUTE_VARIABLE_GROUP_SIZE_ARB 0x9345
#define GL_MAX_COMPUTE_FIXED_GROUP_SIZE_ARB 0x91BF
typedef void(APIENTRYP PFNGLDISPATCHCOMPUTEGROUPSIZEARBPROC)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z,
                                                             GLuint group_size_x, GLuint group_size_y, GLuint group_size_z);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z, GLuint group_size_x,
                                                  GLuint group_size_y, GLuint group_size_z);
#endif
#endif /* GL_ARB_compute_variable_group_size */

#ifndef GL_ARB_conditional_render_inverted
#define GL_ARB_conditional_render_inverted 1
#endif /* GL_ARB_conditional_render_inverted */

#ifndef GL_ARB_conservative_depth
#define GL_ARB_conservative_depth 1
#endif /* GL_ARB_conservative_depth */

#ifndef GL_ARB_copy_buffer
#define GL_ARB_copy_buffer 1
#endif /* GL_ARB_copy_buffer */

#ifndef GL_ARB_copy_image
#define GL_ARB_copy_image 1
#endif /* GL_ARB_copy_image */

#ifndef GL_ARB_cull_distance
#define GL_ARB_cull_distance 1
#endif /* GL_ARB_cull_distance */

#ifndef GL_ARB_debug_output
#define GL_ARB_debug_output 1
typedef void(APIENTRY* GLDEBUGPROCARB)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message,
                                       const void* userParam);
#define GL_DEBUG_OUTPUT_SYNCHRONOUS_ARB 0x8242
#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH_ARB 0x8243
#define GL_DEBUG_CALLBACK_FUNCTION_ARB 0x8244
#define GL_DEBUG_CALLBACK_USER_PARAM_ARB 0x8245
#define GL_DEBUG_SOURCE_API_ARB 0x8246
#define GL_DEBUG_SOURCE_WINDOW_SYSTEM_ARB 0x8247
#define GL_DEBUG_SOURCE_SHADER_COMPILER_ARB 0x8248
#define GL_DEBUG_SOURCE_THIRD_PARTY_ARB 0x8249
#define GL_DEBUG_SOURCE_APPLICATION_ARB 0x824A
#define GL_DEBUG_SOURCE_OTHER_ARB 0x824B
#define GL_DEBUG_TYPE_ERROR_ARB 0x824C
#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR_ARB 0x824D
#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR_ARB 0x824E
#define GL_DEBUG_TYPE_PORTABILITY_ARB 0x824F
#define GL_DEBUG_TYPE_PERFORMANCE_ARB 0x8250
#define GL_DEBUG_TYPE_OTHER_ARB 0x8251
#define GL_MAX_DEBUG_MESSAGE_LENGTH_ARB 0x9143
#define GL_MAX_DEBUG_LOGGED_MESSAGES_ARB 0x9144
#define GL_DEBUG_LOGGED_MESSAGES_ARB 0x9145
#define GL_DEBUG_SEVERITY_HIGH_ARB 0x9146
#define GL_DEBUG_SEVERITY_MEDIUM_ARB 0x9147
#define GL_DEBUG_SEVERITY_LOW_ARB 0x9148
typedef void(APIENTRYP PFNGLDEBUGMESSAGECONTROLARBPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids,
                                                        GLboolean enabled);
typedef void(APIENTRYP PFNGLDEBUGMESSAGEINSERTARBPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf);
typedef void(APIENTRYP PFNGLDEBUGMESSAGECALLBACKARBPROC)(GLDEBUGPROCARB callback, const void* userParam);
typedef GLuint(APIENTRYP PFNGLGETDEBUGMESSAGELOGARBPROC)(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids,
                                                         GLenum* severities, GLsizei* lengths, GLchar* messageLog);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDebugMessageControlARB(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint* ids, GLboolean enabled);
GLAPI void APIENTRY glDebugMessageInsertARB(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* buf);
GLAPI void APIENTRY glDebugMessageCallbackARB(GLDEBUGPROCARB callback, const void* userParam);
GLAPI GLuint APIENTRY glGetDebugMessageLogARB(GLuint count, GLsizei bufSize, GLenum* sources, GLenum* types, GLuint* ids,
                                              GLenum* severities, GLsizei* lengths, GLchar* messageLog);
#endif
#endif /* GL_ARB_debug_output */

#ifndef GL_ARB_depth_buffer_float
#define GL_ARB_depth_buffer_float 1
#endif /* GL_ARB_depth_buffer_float */

#ifndef GL_ARB_depth_clamp
#define GL_ARB_depth_clamp 1
#endif /* GL_ARB_depth_clamp */

#ifndef GL_ARB_derivative_control
#define GL_ARB_derivative_control 1
#endif /* GL_ARB_derivative_control */

#ifndef GL_ARB_direct_state_access
#define GL_ARB_direct_state_access 1
#endif /* GL_ARB_direct_state_access */

#ifndef GL_ARB_draw_buffers_blend
#define GL_ARB_draw_buffers_blend 1
typedef void(APIENTRYP PFNGLBLENDEQUATIONIARBPROC)(GLuint buf, GLenum mode);
typedef void(APIENTRYP PFNGLBLENDEQUATIONSEPARATEIARBPROC)(GLuint buf, GLenum modeRGB, GLenum modeAlpha);
typedef void(APIENTRYP PFNGLBLENDFUNCIARBPROC)(GLuint buf, GLenum src, GLenum dst);
typedef void(APIENTRYP PFNGLBLENDFUNCSEPARATEIARBPROC)(GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBlendEquationiARB(GLuint buf, GLenum mode);
GLAPI void APIENTRY glBlendEquationSeparateiARB(GLuint buf, GLenum modeRGB, GLenum modeAlpha);
GLAPI void APIENTRY glBlendFunciARB(GLuint buf, GLenum src, GLenum dst);
GLAPI void APIENTRY glBlendFuncSeparateiARB(GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
#endif
#endif /* GL_ARB_draw_buffers_blend */

#ifndef GL_ARB_draw_elements_base_vertex
#define GL_ARB_draw_elements_base_vertex 1
#endif /* GL_ARB_draw_elements_base_vertex */

#ifndef GL_ARB_draw_indirect
#define GL_ARB_draw_indirect 1
#endif /* GL_ARB_draw_indirect */

#ifndef GL_ARB_draw_instanced
#define GL_ARB_draw_instanced 1
typedef void(APIENTRYP PFNGLDRAWARRAYSINSTANCEDARBPROC)(GLenum mode, GLint first, GLsizei count, GLsizei primcount);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDARBPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei primcount);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawArraysInstancedARB(GLenum mode, GLint first, GLsizei count, GLsizei primcount);
GLAPI void APIENTRY glDrawElementsInstancedARB(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei primcount);
#endif
#endif /* GL_ARB_draw_instanced */

#ifndef GL_ARB_enhanced_layouts
#define GL_ARB_enhanced_layouts 1
#endif /* GL_ARB_enhanced_layouts */

#ifndef GL_ARB_explicit_attrib_location
#define GL_ARB_explicit_attrib_location 1
#endif /* GL_ARB_explicit_attrib_location */

#ifndef GL_ARB_explicit_uniform_location
#define GL_ARB_explicit_uniform_location 1
#endif /* GL_ARB_explicit_uniform_location */

#ifndef GL_ARB_fragment_coord_conventions
#define GL_ARB_fragment_coord_conventions 1
#endif /* GL_ARB_fragment_coord_conventions */

#ifndef GL_ARB_fragment_layer_viewport
#define GL_ARB_fragment_layer_viewport 1
#endif /* GL_ARB_fragment_layer_viewport */

#ifndef GL_ARB_fragment_shader_interlock
#define GL_ARB_fragment_shader_interlock 1
#endif /* GL_ARB_fragment_shader_interlock */

#ifndef GL_ARB_framebuffer_no_attachments
#define GL_ARB_framebuffer_no_attachments 1
#endif /* GL_ARB_framebuffer_no_attachments */

#ifndef GL_ARB_framebuffer_object
#define GL_ARB_framebuffer_object 1
#endif /* GL_ARB_framebuffer_object */

#ifndef GL_ARB_framebuffer_sRGB
#define GL_ARB_framebuffer_sRGB 1
#endif /* GL_ARB_framebuffer_sRGB */

#ifndef GL_ARB_geometry_shader4
#define GL_ARB_geometry_shader4 1
#define GL_LINES_ADJACENCY_ARB 0x000A
#define GL_LINE_STRIP_ADJACENCY_ARB 0x000B
#define GL_TRIANGLES_ADJACENCY_ARB 0x000C
#define GL_TRIANGLE_STRIP_ADJACENCY_ARB 0x000D
#define GL_PROGRAM_POINT_SIZE_ARB 0x8642
#define GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS_ARB 0x8C29
#define GL_FRAMEBUFFER_ATTACHMENT_LAYERED_ARB 0x8DA7
#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS_ARB 0x8DA8
#define GL_FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB 0x8DA9
#define GL_GEOMETRY_SHADER_ARB 0x8DD9
#define GL_GEOMETRY_VERTICES_OUT_ARB 0x8DDA
#define GL_GEOMETRY_INPUT_TYPE_ARB 0x8DDB
#define GL_GEOMETRY_OUTPUT_TYPE_ARB 0x8DDC
#define GL_MAX_GEOMETRY_VARYING_COMPONENTS_ARB 0x8DDD
#define GL_MAX_VERTEX_VARYING_COMPONENTS_ARB 0x8DDE
#define GL_MAX_GEOMETRY_UNIFORM_COMPONENTS_ARB 0x8DDF
#define GL_MAX_GEOMETRY_OUTPUT_VERTICES_ARB 0x8DE0
#define GL_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS_ARB 0x8DE1
typedef void(APIENTRYP PFNGLPROGRAMPARAMETERIARBPROC)(GLuint program, GLenum pname, GLint value);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTUREARBPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTURELAYERARBPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTUREFACEARBPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glProgramParameteriARB(GLuint program, GLenum pname, GLint value);
GLAPI void APIENTRY glFramebufferTextureARB(GLenum target, GLenum attachment, GLuint texture, GLint level);
GLAPI void APIENTRY glFramebufferTextureLayerARB(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
GLAPI void APIENTRY glFramebufferTextureFaceARB(GLenum target, GLenum attachment, GLuint texture, GLint level, GLenum face);
#endif
#endif /* GL_ARB_geometry_shader4 */

#ifndef GL_ARB_get_program_binary
#define GL_ARB_get_program_binary 1
#endif /* GL_ARB_get_program_binary */

#ifndef GL_ARB_get_texture_sub_image
#define GL_ARB_get_texture_sub_image 1
#endif /* GL_ARB_get_texture_sub_image */

#ifndef GL_ARB_gl_spirv
#define GL_ARB_gl_spirv 1
#define GL_SHADER_BINARY_FORMAT_SPIR_V_ARB 0x9551
#define GL_SPIR_V_BINARY_ARB 0x9552
typedef void(APIENTRYP PFNGLSPECIALIZESHADERARBPROC)(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants,
                                                     const GLuint* pConstantIndex, const GLuint* pConstantValue);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glSpecializeShaderARB(GLuint shader, const GLchar* pEntryPoint, GLuint numSpecializationConstants,
                                          const GLuint* pConstantIndex, const GLuint* pConstantValue);
#endif
#endif /* GL_ARB_gl_spirv */

#ifndef GL_ARB_gpu_shader5
#define GL_ARB_gpu_shader5 1
#endif /* GL_ARB_gpu_shader5 */

#ifndef GL_ARB_gpu_shader_fp64
#define GL_ARB_gpu_shader_fp64 1
#endif /* GL_ARB_gpu_shader_fp64 */

#ifndef GL_ARB_gpu_shader_int64
#define GL_ARB_gpu_shader_int64 1
#define GL_INT64_ARB 0x140E
#define GL_INT64_VEC2_ARB 0x8FE9
#define GL_INT64_VEC3_ARB 0x8FEA
#define GL_INT64_VEC4_ARB 0x8FEB
#define GL_UNSIGNED_INT64_VEC2_ARB 0x8FF5
#define GL_UNSIGNED_INT64_VEC3_ARB 0x8FF6
#define GL_UNSIGNED_INT64_VEC4_ARB 0x8FF7
typedef void(APIENTRYP PFNGLUNIFORM1I64ARBPROC)(GLint location, GLint64 x);
typedef void(APIENTRYP PFNGLUNIFORM2I64ARBPROC)(GLint location, GLint64 x, GLint64 y);
typedef void(APIENTRYP PFNGLUNIFORM3I64ARBPROC)(GLint location, GLint64 x, GLint64 y, GLint64 z);
typedef void(APIENTRYP PFNGLUNIFORM4I64ARBPROC)(GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
typedef void(APIENTRYP PFNGLUNIFORM1I64VARBPROC)(GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLUNIFORM2I64VARBPROC)(GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLUNIFORM3I64VARBPROC)(GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLUNIFORM4I64VARBPROC)(GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLUNIFORM1UI64ARBPROC)(GLint location, GLuint64 x);
typedef void(APIENTRYP PFNGLUNIFORM2UI64ARBPROC)(GLint location, GLuint64 x, GLuint64 y);
typedef void(APIENTRYP PFNGLUNIFORM3UI64ARBPROC)(GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
typedef void(APIENTRYP PFNGLUNIFORM4UI64ARBPROC)(GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
typedef void(APIENTRYP PFNGLUNIFORM1UI64VARBPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLUNIFORM2UI64VARBPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLUNIFORM3UI64VARBPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLUNIFORM4UI64VARBPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLGETUNIFORMI64VARBPROC)(GLuint program, GLint location, GLint64* params);
typedef void(APIENTRYP PFNGLGETUNIFORMUI64VARBPROC)(GLuint program, GLint location, GLuint64* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMI64VARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLint64* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMUI64VARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint64* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1I64ARBPROC)(GLuint program, GLint location, GLint64 x);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2I64ARBPROC)(GLuint program, GLint location, GLint64 x, GLint64 y);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3I64ARBPROC)(GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4I64ARBPROC)(GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1I64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2I64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3I64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4I64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UI64ARBPROC)(GLuint program, GLint location, GLuint64 x);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UI64ARBPROC)(GLuint program, GLint location, GLuint64 x, GLuint64 y);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UI64ARBPROC)(GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UI64ARBPROC)(GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UI64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UI64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UI64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UI64VARBPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glUniform1i64ARB(GLint location, GLint64 x);
GLAPI void APIENTRY glUniform2i64ARB(GLint location, GLint64 x, GLint64 y);
GLAPI void APIENTRY glUniform3i64ARB(GLint location, GLint64 x, GLint64 y, GLint64 z);
GLAPI void APIENTRY glUniform4i64ARB(GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
GLAPI void APIENTRY glUniform1i64vARB(GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glUniform2i64vARB(GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glUniform3i64vARB(GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glUniform4i64vARB(GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glUniform1ui64ARB(GLint location, GLuint64 x);
GLAPI void APIENTRY glUniform2ui64ARB(GLint location, GLuint64 x, GLuint64 y);
GLAPI void APIENTRY glUniform3ui64ARB(GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
GLAPI void APIENTRY glUniform4ui64ARB(GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
GLAPI void APIENTRY glUniform1ui64vARB(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glUniform2ui64vARB(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glUniform3ui64vARB(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glUniform4ui64vARB(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glGetUniformi64vARB(GLuint program, GLint location, GLint64* params);
GLAPI void APIENTRY glGetUniformui64vARB(GLuint program, GLint location, GLuint64* params);
GLAPI void APIENTRY glGetnUniformi64vARB(GLuint program, GLint location, GLsizei bufSize, GLint64* params);
GLAPI void APIENTRY glGetnUniformui64vARB(GLuint program, GLint location, GLsizei bufSize, GLuint64* params);
GLAPI void APIENTRY glProgramUniform1i64ARB(GLuint program, GLint location, GLint64 x);
GLAPI void APIENTRY glProgramUniform2i64ARB(GLuint program, GLint location, GLint64 x, GLint64 y);
GLAPI void APIENTRY glProgramUniform3i64ARB(GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z);
GLAPI void APIENTRY glProgramUniform4i64ARB(GLuint program, GLint location, GLint64 x, GLint64 y, GLint64 z, GLint64 w);
GLAPI void APIENTRY glProgramUniform1i64vARB(GLuint program, GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glProgramUniform2i64vARB(GLuint program, GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glProgramUniform3i64vARB(GLuint program, GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glProgramUniform4i64vARB(GLuint program, GLint location, GLsizei count, const GLint64* value);
GLAPI void APIENTRY glProgramUniform1ui64ARB(GLuint program, GLint location, GLuint64 x);
GLAPI void APIENTRY glProgramUniform2ui64ARB(GLuint program, GLint location, GLuint64 x, GLuint64 y);
GLAPI void APIENTRY glProgramUniform3ui64ARB(GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z);
GLAPI void APIENTRY glProgramUniform4ui64ARB(GLuint program, GLint location, GLuint64 x, GLuint64 y, GLuint64 z, GLuint64 w);
GLAPI void APIENTRY glProgramUniform1ui64vARB(GLuint program, GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glProgramUniform2ui64vARB(GLuint program, GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glProgramUniform3ui64vARB(GLuint program, GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glProgramUniform4ui64vARB(GLuint program, GLint location, GLsizei count, const GLuint64* value);
#endif
#endif /* GL_ARB_gpu_shader_int64 */

#ifndef GL_ARB_half_float_vertex
#define GL_ARB_half_float_vertex 1
#endif /* GL_ARB_half_float_vertex */

#ifndef GL_ARB_imaging
#define GL_ARB_imaging 1
#endif /* GL_ARB_imaging */

#ifndef GL_ARB_indirect_parameters
#define GL_ARB_indirect_parameters 1
#define GL_PARAMETER_BUFFER_ARB 0x80EE
#define GL_PARAMETER_BUFFER_BINDING_ARB 0x80EF
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTCOUNTARBPROC)(GLenum mode, const void* indirect, GLintptr drawcount,
                                                                 GLsizei maxdrawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTCOUNTARBPROC)(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount,
                                                                   GLsizei maxdrawcount, GLsizei stride);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMultiDrawArraysIndirectCountARB(GLenum mode, const void* indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
GLAPI void APIENTRY glMultiDrawElementsIndirectCountARB(GLenum mode, GLenum type, const void* indirect, GLintptr drawcount,
                                                        GLsizei maxdrawcount, GLsizei stride);
#endif
#endif /* GL_ARB_indirect_parameters */

#ifndef GL_ARB_instanced_arrays
#define GL_ARB_instanced_arrays 1
#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ARB 0x88FE
typedef void(APIENTRYP PFNGLVERTEXATTRIBDIVISORARBPROC)(GLuint index, GLuint divisor);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glVertexAttribDivisorARB(GLuint index, GLuint divisor);
#endif
#endif /* GL_ARB_instanced_arrays */

#ifndef GL_ARB_internalformat_query
#define GL_ARB_internalformat_query 1
#endif /* GL_ARB_internalformat_query */

#ifndef GL_ARB_internalformat_query2
#define GL_ARB_internalformat_query2 1
#define GL_SRGB_DECODE_ARB 0x8299
#define GL_VIEW_CLASS_EAC_R11 0x9383
#define GL_VIEW_CLASS_EAC_RG11 0x9384
#define GL_VIEW_CLASS_ETC2_RGB 0x9385
#define GL_VIEW_CLASS_ETC2_RGBA 0x9386
#define GL_VIEW_CLASS_ETC2_EAC_RGBA 0x9387
#define GL_VIEW_CLASS_ASTC_4x4_RGBA 0x9388
#define GL_VIEW_CLASS_ASTC_5x4_RGBA 0x9389
#define GL_VIEW_CLASS_ASTC_5x5_RGBA 0x938A
#define GL_VIEW_CLASS_ASTC_6x5_RGBA 0x938B
#define GL_VIEW_CLASS_ASTC_6x6_RGBA 0x938C
#define GL_VIEW_CLASS_ASTC_8x5_RGBA 0x938D
#define GL_VIEW_CLASS_ASTC_8x6_RGBA 0x938E
#define GL_VIEW_CLASS_ASTC_8x8_RGBA 0x938F
#define GL_VIEW_CLASS_ASTC_10x5_RGBA 0x9390
#define GL_VIEW_CLASS_ASTC_10x6_RGBA 0x9391
#define GL_VIEW_CLASS_ASTC_10x8_RGBA 0x9392
#define GL_VIEW_CLASS_ASTC_10x10_RGBA 0x9393
#define GL_VIEW_CLASS_ASTC_12x10_RGBA 0x9394
#define GL_VIEW_CLASS_ASTC_12x12_RGBA 0x9395
#endif /* GL_ARB_internalformat_query2 */

#ifndef GL_ARB_invalidate_subdata
#define GL_ARB_invalidate_subdata 1
#endif /* GL_ARB_invalidate_subdata */

#ifndef GL_ARB_map_buffer_alignment
#define GL_ARB_map_buffer_alignment 1
#endif /* GL_ARB_map_buffer_alignment */

#ifndef GL_ARB_map_buffer_range
#define GL_ARB_map_buffer_range 1
#endif /* GL_ARB_map_buffer_range */

#ifndef GL_ARB_multi_bind
#define GL_ARB_multi_bind 1
#endif /* GL_ARB_multi_bind */

#ifndef GL_ARB_multi_draw_indirect
#define GL_ARB_multi_draw_indirect 1
#endif /* GL_ARB_multi_draw_indirect */

#ifndef GL_ARB_occlusion_query2
#define GL_ARB_occlusion_query2 1
#endif /* GL_ARB_occlusion_query2 */

#ifndef GL_ARB_parallel_shader_compile
#define GL_ARB_parallel_shader_compile 1
#define GL_MAX_SHADER_COMPILER_THREADS_ARB 0x91B0
#define GL_COMPLETION_STATUS_ARB 0x91B1
typedef void(APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSARBPROC)(GLuint count);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMaxShaderCompilerThreadsARB(GLuint count);
#endif
#endif /* GL_ARB_parallel_shader_compile */

#ifndef GL_ARB_pipeline_statistics_query
#define GL_ARB_pipeline_statistics_query 1
#define GL_VERTICES_SUBMITTED_ARB 0x82EE
#define GL_PRIMITIVES_SUBMITTED_ARB 0x82EF
#define GL_VERTEX_SHADER_INVOCATIONS_ARB 0x82F0
#define GL_TESS_CONTROL_SHADER_PATCHES_ARB 0x82F1
#define GL_TESS_EVALUATION_SHADER_INVOCATIONS_ARB 0x82F2
#define GL_GEOMETRY_SHADER_PRIMITIVES_EMITTED_ARB 0x82F3
#define GL_FRAGMENT_SHADER_INVOCATIONS_ARB 0x82F4
#define GL_COMPUTE_SHADER_INVOCATIONS_ARB 0x82F5
#define GL_CLIPPING_INPUT_PRIMITIVES_ARB 0x82F6
#define GL_CLIPPING_OUTPUT_PRIMITIVES_ARB 0x82F7
#endif /* GL_ARB_pipeline_statistics_query */

#ifndef GL_ARB_pixel_buffer_object
#define GL_ARB_pixel_buffer_object 1
#define GL_PIXEL_PACK_BUFFER_ARB 0x88EB
#define GL_PIXEL_UNPACK_BUFFER_ARB 0x88EC
#define GL_PIXEL_PACK_BUFFER_BINDING_ARB 0x88ED
#define GL_PIXEL_UNPACK_BUFFER_BINDING_ARB 0x88EF
#endif /* GL_ARB_pixel_buffer_object */

#ifndef GL_ARB_polygon_offset_clamp
#define GL_ARB_polygon_offset_clamp 1
#endif /* GL_ARB_polygon_offset_clamp */

#ifndef GL_ARB_post_depth_coverage
#define GL_ARB_post_depth_coverage 1
#endif /* GL_ARB_post_depth_coverage */

#ifndef GL_ARB_program_interface_query
#define GL_ARB_program_interface_query 1
#endif /* GL_ARB_program_interface_query */

#ifndef GL_ARB_provoking_vertex
#define GL_ARB_provoking_vertex 1
#endif /* GL_ARB_provoking_vertex */

#ifndef GL_ARB_query_buffer_object
#define GL_ARB_query_buffer_object 1
#endif /* GL_ARB_query_buffer_object */

#ifndef GL_ARB_robust_buffer_access_behavior
#define GL_ARB_robust_buffer_access_behavior 1
#endif /* GL_ARB_robust_buffer_access_behavior */

#ifndef GL_ARB_robustness
#define GL_ARB_robustness 1
#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB 0x00000004
#define GL_LOSE_CONTEXT_ON_RESET_ARB 0x8252
#define GL_GUILTY_CONTEXT_RESET_ARB 0x8253
#define GL_INNOCENT_CONTEXT_RESET_ARB 0x8254
#define GL_UNKNOWN_CONTEXT_RESET_ARB 0x8255
#define GL_RESET_NOTIFICATION_STRATEGY_ARB 0x8256
#define GL_NO_RESET_NOTIFICATION_ARB 0x8261
typedef GLenum(APIENTRYP PFNGLGETGRAPHICSRESETSTATUSARBPROC)(void);
typedef void(APIENTRYP PFNGLGETNTEXIMAGEARBPROC)(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* img);
typedef void(APIENTRYP PFNGLREADNPIXELSARBPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type,
                                                GLsizei bufSize, void* data);
typedef void(APIENTRYP PFNGLGETNCOMPRESSEDTEXIMAGEARBPROC)(GLenum target, GLint lod, GLsizei bufSize, void* img);
typedef void(APIENTRYP PFNGLGETNUNIFORMFVARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMIVARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLint* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMUIVARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint* params);
typedef void(APIENTRYP PFNGLGETNUNIFORMDVARBPROC)(GLuint program, GLint location, GLsizei bufSize, GLdouble* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI GLenum APIENTRY glGetGraphicsResetStatusARB(void);
GLAPI void APIENTRY glGetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type, GLsizei bufSize, void* img);
GLAPI void APIENTRY glReadnPixelsARB(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void* data);
GLAPI void APIENTRY glGetnCompressedTexImageARB(GLenum target, GLint lod, GLsizei bufSize, void* img);
GLAPI void APIENTRY glGetnUniformfvARB(GLuint program, GLint location, GLsizei bufSize, GLfloat* params);
GLAPI void APIENTRY glGetnUniformivARB(GLuint program, GLint location, GLsizei bufSize, GLint* params);
GLAPI void APIENTRY glGetnUniformuivARB(GLuint program, GLint location, GLsizei bufSize, GLuint* params);
GLAPI void APIENTRY glGetnUniformdvARB(GLuint program, GLint location, GLsizei bufSize, GLdouble* params);
#endif
#endif /* GL_ARB_robustness */

#ifndef GL_ARB_robustness_isolation
#define GL_ARB_robustness_isolation 1
#endif /* GL_ARB_robustness_isolation */

#ifndef GL_ARB_sample_locations
#define GL_ARB_sample_locations 1
#define GL_SAMPLE_LOCATION_SUBPIXEL_BITS_ARB 0x933D
#define GL_SAMPLE_LOCATION_PIXEL_GRID_WIDTH_ARB 0x933E
#define GL_SAMPLE_LOCATION_PIXEL_GRID_HEIGHT_ARB 0x933F
#define GL_PROGRAMMABLE_SAMPLE_LOCATION_TABLE_SIZE_ARB 0x9340
#define GL_SAMPLE_LOCATION_ARB 0x8E50
#define GL_PROGRAMMABLE_SAMPLE_LOCATION_ARB 0x9341
#define GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS_ARB 0x9342
#define GL_FRAMEBUFFER_SAMPLE_LOCATION_PIXEL_GRID_ARB 0x9343
typedef void(APIENTRYP PFNGLFRAMEBUFFERSAMPLELOCATIONSFVARBPROC)(GLenum target, GLuint start, GLsizei count, const GLfloat* v);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVARBPROC)(GLuint framebuffer, GLuint start, GLsizei count, const GLfloat* v);
typedef void(APIENTRYP PFNGLEVALUATEDEPTHVALUESARBPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFramebufferSampleLocationsfvARB(GLenum target, GLuint start, GLsizei count, const GLfloat* v);
GLAPI void APIENTRY glNamedFramebufferSampleLocationsfvARB(GLuint framebuffer, GLuint start, GLsizei count, const GLfloat* v);
GLAPI void APIENTRY glEvaluateDepthValuesARB(void);
#endif
#endif /* GL_ARB_sample_locations */

#ifndef GL_ARB_sample_shading
#define GL_ARB_sample_shading 1
#define GL_SAMPLE_SHADING_ARB 0x8C36
#define GL_MIN_SAMPLE_SHADING_VALUE_ARB 0x8C37
typedef void(APIENTRYP PFNGLMINSAMPLESHADINGARBPROC)(GLfloat value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMinSampleShadingARB(GLfloat value);
#endif
#endif /* GL_ARB_sample_shading */

#ifndef GL_ARB_sampler_objects
#define GL_ARB_sampler_objects 1
#endif /* GL_ARB_sampler_objects */

#ifndef GL_ARB_seamless_cube_map
#define GL_ARB_seamless_cube_map 1
#endif /* GL_ARB_seamless_cube_map */

#ifndef GL_ARB_seamless_cubemap_per_texture
#define GL_ARB_seamless_cubemap_per_texture 1
#endif /* GL_ARB_seamless_cubemap_per_texture */

#ifndef GL_ARB_separate_shader_objects
#define GL_ARB_separate_shader_objects 1
#endif /* GL_ARB_separate_shader_objects */

#ifndef GL_ARB_shader_atomic_counter_ops
#define GL_ARB_shader_atomic_counter_ops 1
#endif /* GL_ARB_shader_atomic_counter_ops */

#ifndef GL_ARB_shader_atomic_counters
#define GL_ARB_shader_atomic_counters 1
#endif /* GL_ARB_shader_atomic_counters */

#ifndef GL_ARB_shader_ballot
#define GL_ARB_shader_ballot 1
#endif /* GL_ARB_shader_ballot */

#ifndef GL_ARB_shader_bit_encoding
#define GL_ARB_shader_bit_encoding 1
#endif /* GL_ARB_shader_bit_encoding */

#ifndef GL_ARB_shader_clock
#define GL_ARB_shader_clock 1
#endif /* GL_ARB_shader_clock */

#ifndef GL_ARB_shader_draw_parameters
#define GL_ARB_shader_draw_parameters 1
#endif /* GL_ARB_shader_draw_parameters */

#ifndef GL_ARB_shader_group_vote
#define GL_ARB_shader_group_vote 1
#endif /* GL_ARB_shader_group_vote */

#ifndef GL_ARB_shader_image_load_store
#define GL_ARB_shader_image_load_store 1
#endif /* GL_ARB_shader_image_load_store */

#ifndef GL_ARB_shader_image_size
#define GL_ARB_shader_image_size 1
#endif /* GL_ARB_shader_image_size */

#ifndef GL_ARB_shader_precision
#define GL_ARB_shader_precision 1
#endif /* GL_ARB_shader_precision */

#ifndef GL_ARB_shader_stencil_export
#define GL_ARB_shader_stencil_export 1
#endif /* GL_ARB_shader_stencil_export */

#ifndef GL_ARB_shader_storage_buffer_object
#define GL_ARB_shader_storage_buffer_object 1
#endif /* GL_ARB_shader_storage_buffer_object */

#ifndef GL_ARB_shader_subroutine
#define GL_ARB_shader_subroutine 1
#endif /* GL_ARB_shader_subroutine */

#ifndef GL_ARB_shader_texture_image_samples
#define GL_ARB_shader_texture_image_samples 1
#endif /* GL_ARB_shader_texture_image_samples */

#ifndef GL_ARB_shader_viewport_layer_array
#define GL_ARB_shader_viewport_layer_array 1
#endif /* GL_ARB_shader_viewport_layer_array */

#ifndef GL_ARB_shading_language_420pack
#define GL_ARB_shading_language_420pack 1
#endif /* GL_ARB_shading_language_420pack */

#ifndef GL_ARB_shading_language_include
#define GL_ARB_shading_language_include 1
#define GL_SHADER_INCLUDE_ARB 0x8DAE
#define GL_NAMED_STRING_LENGTH_ARB 0x8DE9
#define GL_NAMED_STRING_TYPE_ARB 0x8DEA
typedef void(APIENTRYP PFNGLNAMEDSTRINGARBPROC)(GLenum type, GLint namelen, const GLchar* name, GLint stringlen, const GLchar* string);
typedef void(APIENTRYP PFNGLDELETENAMEDSTRINGARBPROC)(GLint namelen, const GLchar* name);
typedef void(APIENTRYP PFNGLCOMPILESHADERINCLUDEARBPROC)(GLuint shader, GLsizei count, const GLchar* const* path, const GLint* length);
typedef GLboolean(APIENTRYP PFNGLISNAMEDSTRINGARBPROC)(GLint namelen, const GLchar* name);
typedef void(APIENTRYP PFNGLGETNAMEDSTRINGARBPROC)(GLint namelen, const GLchar* name, GLsizei bufSize, GLint* stringlen, GLchar* string);
typedef void(APIENTRYP PFNGLGETNAMEDSTRINGIVARBPROC)(GLint namelen, const GLchar* name, GLenum pname, GLint* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glNamedStringARB(GLenum type, GLint namelen, const GLchar* name, GLint stringlen, const GLchar* string);
GLAPI void APIENTRY glDeleteNamedStringARB(GLint namelen, const GLchar* name);
GLAPI void APIENTRY glCompileShaderIncludeARB(GLuint shader, GLsizei count, const GLchar* const* path, const GLint* length);
GLAPI GLboolean APIENTRY glIsNamedStringARB(GLint namelen, const GLchar* name);
GLAPI void APIENTRY glGetNamedStringARB(GLint namelen, const GLchar* name, GLsizei bufSize, GLint* stringlen, GLchar* string);
GLAPI void APIENTRY glGetNamedStringivARB(GLint namelen, const GLchar* name, GLenum pname, GLint* params);
#endif
#endif /* GL_ARB_shading_language_include */

#ifndef GL_ARB_shading_language_packing
#define GL_ARB_shading_language_packing 1
#endif /* GL_ARB_shading_language_packing */

#ifndef GL_ARB_sparse_buffer
#define GL_ARB_sparse_buffer 1
#define GL_SPARSE_STORAGE_BIT_ARB 0x0400
#define GL_SPARSE_BUFFER_PAGE_SIZE_ARB 0x82F8
typedef void(APIENTRYP PFNGLBUFFERPAGECOMMITMENTARBPROC)(GLenum target, GLintptr offset, GLsizeiptr size, GLboolean commit);
typedef void(APIENTRYP PFNGLNAMEDBUFFERPAGECOMMITMENTEXTPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit);
typedef void(APIENTRYP PFNGLNAMEDBUFFERPAGECOMMITMENTARBPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBufferPageCommitmentARB(GLenum target, GLintptr offset, GLsizeiptr size, GLboolean commit);
GLAPI void APIENTRY glNamedBufferPageCommitmentEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit);
GLAPI void APIENTRY glNamedBufferPageCommitmentARB(GLuint buffer, GLintptr offset, GLsizeiptr size, GLboolean commit);
#endif
#endif /* GL_ARB_sparse_buffer */

#ifndef GL_ARB_sparse_texture
#define GL_ARB_sparse_texture 1
#define GL_TEXTURE_SPARSE_ARB 0x91A6
#define GL_VIRTUAL_PAGE_SIZE_INDEX_ARB 0x91A7
#define GL_NUM_SPARSE_LEVELS_ARB 0x91AA
#define GL_NUM_VIRTUAL_PAGE_SIZES_ARB 0x91A8
#define GL_VIRTUAL_PAGE_SIZE_X_ARB 0x9195
#define GL_VIRTUAL_PAGE_SIZE_Y_ARB 0x9196
#define GL_VIRTUAL_PAGE_SIZE_Z_ARB 0x9197
#define GL_MAX_SPARSE_TEXTURE_SIZE_ARB 0x9198
#define GL_MAX_SPARSE_3D_TEXTURE_SIZE_ARB 0x9199
#define GL_MAX_SPARSE_ARRAY_TEXTURE_LAYERS_ARB 0x919A
#define GL_SPARSE_TEXTURE_FULL_ARRAY_CUBE_MIPMAPS_ARB 0x91A9
typedef void(APIENTRYP PFNGLTEXPAGECOMMITMENTARBPROC)(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                      GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glTexPageCommitmentARB(GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                           GLsizei height, GLsizei depth, GLboolean commit);
#endif
#endif /* GL_ARB_sparse_texture */

#ifndef GL_ARB_sparse_texture2
#define GL_ARB_sparse_texture2 1
#endif /* GL_ARB_sparse_texture2 */

#ifndef GL_ARB_sparse_texture_clamp
#define GL_ARB_sparse_texture_clamp 1
#endif /* GL_ARB_sparse_texture_clamp */

#ifndef GL_ARB_spirv_extensions
#define GL_ARB_spirv_extensions 1
#endif /* GL_ARB_spirv_extensions */

#ifndef GL_ARB_stencil_texturing
#define GL_ARB_stencil_texturing 1
#endif /* GL_ARB_stencil_texturing */

#ifndef GL_ARB_sync
#define GL_ARB_sync 1
#endif /* GL_ARB_sync */

#ifndef GL_ARB_tessellation_shader
#define GL_ARB_tessellation_shader 1
#endif /* GL_ARB_tessellation_shader */

#ifndef GL_ARB_texture_barrier
#define GL_ARB_texture_barrier 1
#endif /* GL_ARB_texture_barrier */

#ifndef GL_ARB_texture_border_clamp
#define GL_ARB_texture_border_clamp 1
#define GL_CLAMP_TO_BORDER_ARB 0x812D
#endif /* GL_ARB_texture_border_clamp */

#ifndef GL_ARB_texture_buffer_object
#define GL_ARB_texture_buffer_object 1
#define GL_TEXTURE_BUFFER_ARB 0x8C2A
#define GL_MAX_TEXTURE_BUFFER_SIZE_ARB 0x8C2B
#define GL_TEXTURE_BINDING_BUFFER_ARB 0x8C2C
#define GL_TEXTURE_BUFFER_DATA_STORE_BINDING_ARB 0x8C2D
#define GL_TEXTURE_BUFFER_FORMAT_ARB 0x8C2E
typedef void(APIENTRYP PFNGLTEXBUFFERARBPROC)(GLenum target, GLenum internalformat, GLuint buffer);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glTexBufferARB(GLenum target, GLenum internalformat, GLuint buffer);
#endif
#endif /* GL_ARB_texture_buffer_object */

#ifndef GL_ARB_texture_buffer_object_rgb32
#define GL_ARB_texture_buffer_object_rgb32 1
#endif /* GL_ARB_texture_buffer_object_rgb32 */

#ifndef GL_ARB_texture_buffer_range
#define GL_ARB_texture_buffer_range 1
#endif /* GL_ARB_texture_buffer_range */

#ifndef GL_ARB_texture_compression_bptc
#define GL_ARB_texture_compression_bptc 1
#define GL_COMPRESSED_RGBA_BPTC_UNORM_ARB 0x8E8C
#define GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM_ARB 0x8E8D
#define GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT_ARB 0x8E8E
#define GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT_ARB 0x8E8F
#endif /* GL_ARB_texture_compression_bptc */

#ifndef GL_ARB_texture_compression_rgtc
#define GL_ARB_texture_compression_rgtc 1
#endif /* GL_ARB_texture_compression_rgtc */

#ifndef GL_ARB_texture_cube_map_array
#define GL_ARB_texture_cube_map_array 1
#define GL_TEXTURE_CUBE_MAP_ARRAY_ARB 0x9009
#define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY_ARB 0x900A
#define GL_PROXY_TEXTURE_CUBE_MAP_ARRAY_ARB 0x900B
#define GL_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900C
#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW_ARB 0x900D
#define GL_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900E
#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY_ARB 0x900F
#endif /* GL_ARB_texture_cube_map_array */

#ifndef GL_ARB_texture_filter_anisotropic
#define GL_ARB_texture_filter_anisotropic 1
#endif /* GL_ARB_texture_filter_anisotropic */

#ifndef GL_ARB_texture_filter_minmax
#define GL_ARB_texture_filter_minmax 1
#define GL_TEXTURE_REDUCTION_MODE_ARB 0x9366
#define GL_WEIGHTED_AVERAGE_ARB 0x9367
#endif /* GL_ARB_texture_filter_minmax */

#ifndef GL_ARB_texture_gather
#define GL_ARB_texture_gather 1
#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB 0x8E5E
#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB 0x8E5F
#define GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB 0x8F9F
#endif /* GL_ARB_texture_gather */

#ifndef GL_ARB_texture_mirror_clamp_to_edge
#define GL_ARB_texture_mirror_clamp_to_edge 1
#endif /* GL_ARB_texture_mirror_clamp_to_edge */

#ifndef GL_ARB_texture_mirrored_repeat
#define GL_ARB_texture_mirrored_repeat 1
#define GL_MIRRORED_REPEAT_ARB 0x8370
#endif /* GL_ARB_texture_mirrored_repeat */

#ifndef GL_ARB_texture_multisample
#define GL_ARB_texture_multisample 1
#endif /* GL_ARB_texture_multisample */

#ifndef GL_ARB_texture_non_power_of_two
#define GL_ARB_texture_non_power_of_two 1
#endif /* GL_ARB_texture_non_power_of_two */

#ifndef GL_ARB_texture_query_levels
#define GL_ARB_texture_query_levels 1
#endif /* GL_ARB_texture_query_levels */

#ifndef GL_ARB_texture_query_lod
#define GL_ARB_texture_query_lod 1
#endif /* GL_ARB_texture_query_lod */

#ifndef GL_ARB_texture_rg
#define GL_ARB_texture_rg 1
#endif /* GL_ARB_texture_rg */

#ifndef GL_ARB_texture_rgb10_a2ui
#define GL_ARB_texture_rgb10_a2ui 1
#endif /* GL_ARB_texture_rgb10_a2ui */

#ifndef GL_ARB_texture_stencil8
#define GL_ARB_texture_stencil8 1
#endif /* GL_ARB_texture_stencil8 */

#ifndef GL_ARB_texture_storage
#define GL_ARB_texture_storage 1
#endif /* GL_ARB_texture_storage */

#ifndef GL_ARB_texture_storage_multisample
#define GL_ARB_texture_storage_multisample 1
#endif /* GL_ARB_texture_storage_multisample */

#ifndef GL_ARB_texture_swizzle
#define GL_ARB_texture_swizzle 1
#endif /* GL_ARB_texture_swizzle */

#ifndef GL_ARB_texture_view
#define GL_ARB_texture_view 1
#endif /* GL_ARB_texture_view */

#ifndef GL_ARB_timer_query
#define GL_ARB_timer_query 1
#endif /* GL_ARB_timer_query */

#ifndef GL_ARB_transform_feedback2
#define GL_ARB_transform_feedback2 1
#endif /* GL_ARB_transform_feedback2 */

#ifndef GL_ARB_transform_feedback3
#define GL_ARB_transform_feedback3 1
#endif /* GL_ARB_transform_feedback3 */

#ifndef GL_ARB_transform_feedback_instanced
#define GL_ARB_transform_feedback_instanced 1
#endif /* GL_ARB_transform_feedback_instanced */

#ifndef GL_ARB_transform_feedback_overflow_query
#define GL_ARB_transform_feedback_overflow_query 1
#define GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB 0x82EC
#define GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB 0x82ED
#endif /* GL_ARB_transform_feedback_overflow_query */

#ifndef GL_ARB_uniform_buffer_object
#define GL_ARB_uniform_buffer_object 1
#endif /* GL_ARB_uniform_buffer_object */

#ifndef GL_ARB_vertex_array_bgra
#define GL_ARB_vertex_array_bgra 1
#endif /* GL_ARB_vertex_array_bgra */

#ifndef GL_ARB_vertex_array_object
#define GL_ARB_vertex_array_object 1
#endif /* GL_ARB_vertex_array_object */

#ifndef GL_ARB_vertex_attrib_64bit
#define GL_ARB_vertex_attrib_64bit 1
#endif /* GL_ARB_vertex_attrib_64bit */

#ifndef GL_ARB_vertex_attrib_binding
#define GL_ARB_vertex_attrib_binding 1
#endif /* GL_ARB_vertex_attrib_binding */

#ifndef GL_ARB_vertex_type_10f_11f_11f_rev
#define GL_ARB_vertex_type_10f_11f_11f_rev 1
#endif /* GL_ARB_vertex_type_10f_11f_11f_rev */

#ifndef GL_ARB_vertex_type_2_10_10_10_rev
#define GL_ARB_vertex_type_2_10_10_10_rev 1
#endif /* GL_ARB_vertex_type_2_10_10_10_rev */

#ifndef GL_ARB_viewport_array
#define GL_ARB_viewport_array 1
typedef void(APIENTRYP PFNGLDEPTHRANGEARRAYDVNVPROC)(GLuint first, GLsizei count, const GLdouble* v);
typedef void(APIENTRYP PFNGLDEPTHRANGEINDEXEDDNVPROC)(GLuint index, GLdouble n, GLdouble f);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDepthRangeArraydvNV(GLuint first, GLsizei count, const GLdouble* v);
GLAPI void APIENTRY glDepthRangeIndexeddNV(GLuint index, GLdouble n, GLdouble f);
#endif
#endif /* GL_ARB_viewport_array */

#ifndef GL_KHR_blend_equation_advanced
#define GL_KHR_blend_equation_advanced 1
#define GL_MULTIPLY_KHR 0x9294
#define GL_SCREEN_KHR 0x9295
#define GL_OVERLAY_KHR 0x9296
#define GL_DARKEN_KHR 0x9297
#define GL_LIGHTEN_KHR 0x9298
#define GL_COLORDODGE_KHR 0x9299
#define GL_COLORBURN_KHR 0x929A
#define GL_HARDLIGHT_KHR 0x929B
#define GL_SOFTLIGHT_KHR 0x929C
#define GL_DIFFERENCE_KHR 0x929E
#define GL_EXCLUSION_KHR 0x92A0
#define GL_HSL_HUE_KHR 0x92AD
#define GL_HSL_SATURATION_KHR 0x92AE
#define GL_HSL_COLOR_KHR 0x92AF
#define GL_HSL_LUMINOSITY_KHR 0x92B0
typedef void(APIENTRYP PFNGLBLENDBARRIERKHRPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBlendBarrierKHR(void);
#endif
#endif /* GL_KHR_blend_equation_advanced */

#ifndef GL_KHR_blend_equation_advanced_coherent
#define GL_KHR_blend_equation_advanced_coherent 1
#define GL_BLEND_ADVANCED_COHERENT_KHR 0x9285
#endif /* GL_KHR_blend_equation_advanced_coherent */

#ifndef GL_KHR_context_flush_control
#define GL_KHR_context_flush_control 1
#endif /* GL_KHR_context_flush_control */

#ifndef GL_KHR_debug
#define GL_KHR_debug 1
#endif /* GL_KHR_debug */

#ifndef GL_KHR_no_error
#define GL_KHR_no_error 1
#define GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR 0x00000008
#endif /* GL_KHR_no_error */

#ifndef GL_KHR_parallel_shader_compile
#define GL_KHR_parallel_shader_compile 1
#define GL_MAX_SHADER_COMPILER_THREADS_KHR 0x91B0
#define GL_COMPLETION_STATUS_KHR 0x91B1
typedef void(APIENTRYP PFNGLMAXSHADERCOMPILERTHREADSKHRPROC)(GLuint count);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMaxShaderCompilerThreadsKHR(GLuint count);
#endif
#endif /* GL_KHR_parallel_shader_compile */

#ifndef GL_KHR_robust_buffer_access_behavior
#define GL_KHR_robust_buffer_access_behavior 1
#endif /* GL_KHR_robust_buffer_access_behavior */

#ifndef GL_KHR_robustness
#define GL_KHR_robustness 1
#define GL_CONTEXT_ROBUST_ACCESS 0x90F3
#endif /* GL_KHR_robustness */

#ifndef GL_KHR_shader_subgroup
#define GL_KHR_shader_subgroup 1
#define GL_SUBGROUP_SIZE_KHR 0x9532
#define GL_SUBGROUP_SUPPORTED_STAGES_KHR 0x9533
#define GL_SUBGROUP_SUPPORTED_FEATURES_KHR 0x9534
#define GL_SUBGROUP_QUAD_ALL_STAGES_KHR 0x9535
#define GL_SUBGROUP_FEATURE_BASIC_BIT_KHR 0x00000001
#define GL_SUBGROUP_FEATURE_VOTE_BIT_KHR 0x00000002
#define GL_SUBGROUP_FEATURE_ARITHMETIC_BIT_KHR 0x00000004
#define GL_SUBGROUP_FEATURE_BALLOT_BIT_KHR 0x00000008
#define GL_SUBGROUP_FEATURE_SHUFFLE_BIT_KHR 0x00000010
#define GL_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT_KHR 0x00000020
#define GL_SUBGROUP_FEATURE_CLUSTERED_BIT_KHR 0x00000040
#define GL_SUBGROUP_FEATURE_QUAD_BIT_KHR 0x00000080
#endif /* GL_KHR_shader_subgroup */

#ifndef GL_KHR_texture_compression_astc_hdr
#define GL_KHR_texture_compression_astc_hdr 1
#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0
#define GL_COMPRESSED_RGBA_ASTC_5x4_KHR 0x93B1
#define GL_COMPRESSED_RGBA_ASTC_5x5_KHR 0x93B2
#define GL_COMPRESSED_RGBA_ASTC_6x5_KHR 0x93B3
#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4
#define GL_COMPRESSED_RGBA_ASTC_8x5_KHR 0x93B5
#define GL_COMPRESSED_RGBA_ASTC_8x6_KHR 0x93B6
#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7
#define GL_COMPRESSED_RGBA_ASTC_10x5_KHR 0x93B8
#define GL_COMPRESSED_RGBA_ASTC_10x6_KHR 0x93B9
#define GL_COMPRESSED_RGBA_ASTC_10x8_KHR 0x93BA
#define GL_COMPRESSED_RGBA_ASTC_10x10_KHR 0x93BB
#define GL_COMPRESSED_RGBA_ASTC_12x10_KHR 0x93BC
#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR 0x93D0
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR 0x93D1
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR 0x93D2
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR 0x93D3
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR 0x93D4
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR 0x93D5
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR 0x93D6
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR 0x93D7
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR 0x93D8
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR 0x93D9
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR 0x93DA
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR 0x93DB
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR 0x93DC
#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR 0x93DD
#endif /* GL_KHR_texture_compression_astc_hdr */

#ifndef GL_KHR_texture_compression_astc_ldr
#define GL_KHR_texture_compression_astc_ldr 1
#endif /* GL_KHR_texture_compression_astc_ldr */

#ifndef GL_KHR_texture_compression_astc_sliced_3d
#define GL_KHR_texture_compression_astc_sliced_3d 1
#endif /* GL_KHR_texture_compression_astc_sliced_3d */

#ifndef GL_AMD_framebuffer_multisample_advanced
#define GL_AMD_framebuffer_multisample_advanced 1
#define GL_RENDERBUFFER_STORAGE_SAMPLES_AMD 0x91B2
#define GL_MAX_COLOR_FRAMEBUFFER_SAMPLES_AMD 0x91B3
#define GL_MAX_COLOR_FRAMEBUFFER_STORAGE_SAMPLES_AMD 0x91B4
#define GL_MAX_DEPTH_STENCIL_FRAMEBUFFER_SAMPLES_AMD 0x91B5
#define GL_NUM_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B6
#define GL_SUPPORTED_MULTISAMPLE_MODES_AMD 0x91B7
typedef void(APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)(GLenum target, GLsizei samples, GLsizei storageSamples,
                                                                           GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEADVANCEDAMDPROC)(
  GLuint renderbuffer, GLsizei samples, GLsizei storageSamples, GLenum internalformat, GLsizei width, GLsizei height);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glRenderbufferStorageMultisampleAdvancedAMD(GLenum target, GLsizei samples, GLsizei storageSamples,
                                                                GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleAdvancedAMD(GLuint renderbuffer, GLsizei samples, GLsizei storageSamples,
                                                                     GLenum internalformat, GLsizei width, GLsizei height);
#endif
#endif /* GL_AMD_framebuffer_multisample_advanced */

#ifndef GL_AMD_performance_monitor
#define GL_AMD_performance_monitor 1
#define GL_COUNTER_TYPE_AMD 0x8BC0
#define GL_COUNTER_RANGE_AMD 0x8BC1
#define GL_UNSIGNED_INT64_AMD 0x8BC2
#define GL_PERCENTAGE_AMD 0x8BC3
#define GL_PERFMON_RESULT_AVAILABLE_AMD 0x8BC4
#define GL_PERFMON_RESULT_SIZE_AMD 0x8BC5
#define GL_PERFMON_RESULT_AMD 0x8BC6
typedef void(APIENTRYP PFNGLGETPERFMONITORGROUPSAMDPROC)(GLint* numGroups, GLsizei groupsSize, GLuint* groups);
typedef void(APIENTRYP PFNGLGETPERFMONITORCOUNTERSAMDPROC)(GLuint group, GLint* numCounters, GLint* maxActiveCounters, GLsizei counterSize,
                                                           GLuint* counters);
typedef void(APIENTRYP PFNGLGETPERFMONITORGROUPSTRINGAMDPROC)(GLuint group, GLsizei bufSize, GLsizei* length, GLchar* groupString);
typedef void(APIENTRYP PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC)(GLuint group, GLuint counter, GLsizei bufSize, GLsizei* length,
                                                                GLchar* counterString);
typedef void(APIENTRYP PFNGLGETPERFMONITORCOUNTERINFOAMDPROC)(GLuint group, GLuint counter, GLenum pname, void* data);
typedef void(APIENTRYP PFNGLGENPERFMONITORSAMDPROC)(GLsizei n, GLuint* monitors);
typedef void(APIENTRYP PFNGLDELETEPERFMONITORSAMDPROC)(GLsizei n, GLuint* monitors);
typedef void(APIENTRYP PFNGLSELECTPERFMONITORCOUNTERSAMDPROC)(GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint* counterList);
typedef void(APIENTRYP PFNGLBEGINPERFMONITORAMDPROC)(GLuint monitor);
typedef void(APIENTRYP PFNGLENDPERFMONITORAMDPROC)(GLuint monitor);
typedef void(APIENTRYP PFNGLGETPERFMONITORCOUNTERDATAAMDPROC)(GLuint monitor, GLenum pname, GLsizei dataSize, GLuint* data, GLint* bytesWritten);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glGetPerfMonitorGroupsAMD(GLint* numGroups, GLsizei groupsSize, GLuint* groups);
GLAPI void APIENTRY glGetPerfMonitorCountersAMD(GLuint group, GLint* numCounters, GLint* maxActiveCounters, GLsizei counterSize, GLuint* counters);
GLAPI void APIENTRY glGetPerfMonitorGroupStringAMD(GLuint group, GLsizei bufSize, GLsizei* length, GLchar* groupString);
GLAPI void APIENTRY glGetPerfMonitorCounterStringAMD(GLuint group, GLuint counter, GLsizei bufSize, GLsizei* length, GLchar* counterString);
GLAPI void APIENTRY glGetPerfMonitorCounterInfoAMD(GLuint group, GLuint counter, GLenum pname, void* data);
GLAPI void APIENTRY glGenPerfMonitorsAMD(GLsizei n, GLuint* monitors);
GLAPI void APIENTRY glDeletePerfMonitorsAMD(GLsizei n, GLuint* monitors);
GLAPI void APIENTRY glSelectPerfMonitorCountersAMD(GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint* counterList);
GLAPI void APIENTRY glBeginPerfMonitorAMD(GLuint monitor);
GLAPI void APIENTRY glEndPerfMonitorAMD(GLuint monitor);
GLAPI void APIENTRY glGetPerfMonitorCounterDataAMD(GLuint monitor, GLenum pname, GLsizei dataSize, GLuint* data, GLint* bytesWritten);
#endif
#endif /* GL_AMD_performance_monitor */

#ifndef GL_APPLE_rgb_422
#define GL_APPLE_rgb_422 1
#define GL_RGB_422_APPLE 0x8A1F
#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA
#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB
#define GL_RGB_RAW_422_APPLE 0x8A51
#endif /* GL_APPLE_rgb_422 */

#ifndef GL_EXT_EGL_image_storage
#define GL_EXT_EGL_image_storage 1
typedef void* GLeglImageOES;
typedef void(APIENTRYP PFNGLEGLIMAGETARGETTEXSTORAGEEXTPROC)(GLenum target, GLeglImageOES image, const GLint* attrib_list);
typedef void(APIENTRYP PFNGLEGLIMAGETARGETTEXTURESTORAGEEXTPROC)(GLuint texture, GLeglImageOES image, const GLint* attrib_list);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glEGLImageTargetTexStorageEXT(GLenum target, GLeglImageOES image, const GLint* attrib_list);
GLAPI void APIENTRY glEGLImageTargetTextureStorageEXT(GLuint texture, GLeglImageOES image, const GLint* attrib_list);
#endif
#endif /* GL_EXT_EGL_image_storage */

#ifndef GL_EXT_EGL_sync
#define GL_EXT_EGL_sync 1
#endif /* GL_EXT_EGL_sync */

#ifndef GL_EXT_debug_label
#define GL_EXT_debug_label 1
#define GL_PROGRAM_PIPELINE_OBJECT_EXT 0x8A4F
#define GL_PROGRAM_OBJECT_EXT 0x8B40
#define GL_SHADER_OBJECT_EXT 0x8B48
#define GL_BUFFER_OBJECT_EXT 0x9151
#define GL_QUERY_OBJECT_EXT 0x9153
#define GL_VERTEX_ARRAY_OBJECT_EXT 0x9154
typedef void(APIENTRYP PFNGLLABELOBJECTEXTPROC)(GLenum type, GLuint object, GLsizei length, const GLchar* label);
typedef void(APIENTRYP PFNGLGETOBJECTLABELEXTPROC)(GLenum type, GLuint object, GLsizei bufSize, GLsizei* length, GLchar* label);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glLabelObjectEXT(GLenum type, GLuint object, GLsizei length, const GLchar* label);
GLAPI void APIENTRY glGetObjectLabelEXT(GLenum type, GLuint object, GLsizei bufSize, GLsizei* length, GLchar* label);
#endif
#endif /* GL_EXT_debug_label */

#ifndef GL_EXT_debug_marker
#define GL_EXT_debug_marker 1
typedef void(APIENTRYP PFNGLINSERTEVENTMARKEREXTPROC)(GLsizei length, const GLchar* marker);
typedef void(APIENTRYP PFNGLPUSHGROUPMARKEREXTPROC)(GLsizei length, const GLchar* marker);
typedef void(APIENTRYP PFNGLPOPGROUPMARKEREXTPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glInsertEventMarkerEXT(GLsizei length, const GLchar* marker);
GLAPI void APIENTRY glPushGroupMarkerEXT(GLsizei length, const GLchar* marker);
GLAPI void APIENTRY glPopGroupMarkerEXT(void);
#endif
#endif /* GL_EXT_debug_marker */

#ifndef GL_EXT_direct_state_access
#define GL_EXT_direct_state_access 1
#define GL_PROGRAM_MATRIX_EXT 0x8E2D
#define GL_TRANSPOSE_PROGRAM_MATRIX_EXT 0x8E2E
#define GL_PROGRAM_MATRIX_STACK_DEPTH_EXT 0x8E2F
typedef void(APIENTRYP PFNGLMATRIXLOADFEXTPROC)(GLenum mode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXLOADDEXTPROC)(GLenum mode, const GLdouble* m);
typedef void(APIENTRYP PFNGLMATRIXMULTFEXTPROC)(GLenum mode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXMULTDEXTPROC)(GLenum mode, const GLdouble* m);
typedef void(APIENTRYP PFNGLMATRIXLOADIDENTITYEXTPROC)(GLenum mode);
typedef void(APIENTRYP PFNGLMATRIXROTATEFEXTPROC)(GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
typedef void(APIENTRYP PFNGLMATRIXROTATEDEXTPROC)(GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLMATRIXSCALEFEXTPROC)(GLenum mode, GLfloat x, GLfloat y, GLfloat z);
typedef void(APIENTRYP PFNGLMATRIXSCALEDEXTPROC)(GLenum mode, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLMATRIXTRANSLATEFEXTPROC)(GLenum mode, GLfloat x, GLfloat y, GLfloat z);
typedef void(APIENTRYP PFNGLMATRIXTRANSLATEDEXTPROC)(GLenum mode, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLMATRIXFRUSTUMEXTPROC)(GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear,
                                                  GLdouble zFar);
typedef void(APIENTRYP PFNGLMATRIXORTHOEXTPROC)(GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear,
                                                GLdouble zFar);
typedef void(APIENTRYP PFNGLMATRIXPOPEXTPROC)(GLenum mode);
typedef void(APIENTRYP PFNGLMATRIXPUSHEXTPROC)(GLenum mode);
typedef void(APIENTRYP PFNGLCLIENTATTRIBDEFAULTEXTPROC)(GLbitfield mask);
typedef void(APIENTRYP PFNGLPUSHCLIENTATTRIBDEFAULTEXTPROC)(GLbitfield mask);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERFVEXTPROC)(GLuint texture, GLenum target, GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXTUREIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                   GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTUREIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                   GLsizei height, GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                      GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                      GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOPYTEXTUREIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                                       GLsizei width, GLint border);
typedef void(APIENTRYP PFNGLCOPYTEXTUREIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                                       GLsizei width, GLsizei height, GLint border);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint x, GLint y,
                                                          GLsizei width);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x,
                                                          GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETTEXTUREIMAGEEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERFVEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERFVEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETTEXTURELEVELPARAMETERIVEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLTEXTUREIMAGE3DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                   GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLTEXTURESUBIMAGE3DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                      GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                      GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOPYTEXTURESUBIMAGE3DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                          GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLBINDMULTITEXTUREEXTPROC)(GLenum texunit, GLenum target, GLuint texture);
typedef void(APIENTRYP PFNGLMULTITEXCOORDPOINTEREXTPROC)(GLenum texunit, GLint size, GLenum type, GLsizei stride, const void* pointer);
typedef void(APIENTRYP PFNGLMULTITEXENVFEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLMULTITEXENVFVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLMULTITEXENVIEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLMULTITEXENVIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLMULTITEXGENDEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLdouble param);
typedef void(APIENTRYP PFNGLMULTITEXGENDVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, const GLdouble* params);
typedef void(APIENTRYP PFNGLMULTITEXGENFEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLMULTITEXGENFVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLMULTITEXGENIEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLMULTITEXGENIVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLGETMULTITEXENVFVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETMULTITEXENVIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETMULTITEXGENDVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLdouble* params);
typedef void(APIENTRYP PFNGLGETMULTITEXGENFVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETMULTITEXGENIVEXTPROC)(GLenum texunit, GLenum coord, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERIEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERFEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLfloat param);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERFVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLfloat* params);
typedef void(APIENTRYP PFNGLMULTITEXIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                    GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLMULTITEXIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                    GLsizei height, GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLMULTITEXSUBIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                       GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLMULTITEXSUBIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                       GLsizei width, GLsizei height, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOPYMULTITEXIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                                        GLsizei width, GLint border);
typedef void(APIENTRYP PFNGLCOPYMULTITEXIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                                        GLsizei width, GLsizei height, GLint border);
typedef void(APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint x, GLint y,
                                                           GLsizei width);
typedef void(APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                           GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETMULTITEXIMAGEEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
typedef void(APIENTRYP PFNGLGETMULTITEXPARAMETERFVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETMULTITEXPARAMETERIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETMULTITEXLEVELPARAMETERFVEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum pname, GLfloat* params);
typedef void(APIENTRYP PFNGLGETMULTITEXLEVELPARAMETERIVEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLMULTITEXIMAGE3DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width,
                                                    GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLMULTITEXSUBIMAGE3DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                       GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                       GLenum type, const void* pixels);
typedef void(APIENTRYP PFNGLCOPYMULTITEXSUBIMAGE3DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                           GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLENABLECLIENTSTATEINDEXEDEXTPROC)(GLenum array, GLuint index);
typedef void(APIENTRYP PFNGLDISABLECLIENTSTATEINDEXEDEXTPROC)(GLenum array, GLuint index);
typedef void(APIENTRYP PFNGLGETFLOATINDEXEDVEXTPROC)(GLenum target, GLuint index, GLfloat* data);
typedef void(APIENTRYP PFNGLGETDOUBLEINDEXEDVEXTPROC)(GLenum target, GLuint index, GLdouble* data);
typedef void(APIENTRYP PFNGLGETPOINTERINDEXEDVEXTPROC)(GLenum target, GLuint index, void** data);
typedef void(APIENTRYP PFNGLENABLEINDEXEDEXTPROC)(GLenum target, GLuint index);
typedef void(APIENTRYP PFNGLDISABLEINDEXEDEXTPROC)(GLenum target, GLuint index);
typedef GLboolean(APIENTRYP PFNGLISENABLEDINDEXEDEXTPROC)(GLenum target, GLuint index);
typedef void(APIENTRYP PFNGLGETINTEGERINDEXEDVEXTPROC)(GLenum target, GLuint index, GLint* data);
typedef void(APIENTRYP PFNGLGETBOOLEANINDEXEDVEXTPROC)(GLenum target, GLuint index, GLboolean* data);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE3DEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                             GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                             GLsizei height, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTUREIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLenum internalformat,
                                                             GLsizei width, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE3DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                                GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                                GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE2DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                                GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDTEXTURESUBIMAGE1DEXTPROC)(GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                                GLenum format, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDTEXTUREIMAGEEXTPROC)(GLuint texture, GLenum target, GLint lod, void* img);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE3DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                              GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                              GLsizei height, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLenum internalformat,
                                                              GLsizei width, GLint border, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE3DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                                 GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                                 GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE2DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                                 GLsizei width, GLsizei height, GLenum format, GLsizei imageSize,
                                                                 const void* bits);
typedef void(APIENTRYP PFNGLCOMPRESSEDMULTITEXSUBIMAGE1DEXTPROC)(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                                 GLenum format, GLsizei imageSize, const void* bits);
typedef void(APIENTRYP PFNGLGETCOMPRESSEDMULTITEXIMAGEEXTPROC)(GLenum texunit, GLenum target, GLint lod, void* img);
typedef void(APIENTRYP PFNGLMATRIXLOADTRANSPOSEFEXTPROC)(GLenum mode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXLOADTRANSPOSEDEXTPROC)(GLenum mode, const GLdouble* m);
typedef void(APIENTRYP PFNGLMATRIXMULTTRANSPOSEFEXTPROC)(GLenum mode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXMULTTRANSPOSEDEXTPROC)(GLenum mode, const GLdouble* m);
typedef void(APIENTRYP PFNGLNAMEDBUFFERDATAEXTPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSUBDATAEXTPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFEREXTPROC)(GLuint buffer, GLenum access);
typedef GLboolean(APIENTRYP PFNGLUNMAPNAMEDBUFFEREXTPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERIVEXTPROC)(GLuint buffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPOINTERVEXTPROC)(GLuint buffer, GLenum pname, void** params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERSUBDATAEXTPROC)(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1FEXTPROC)(GLuint program, GLint location, GLfloat v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2FEXTPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3FEXTPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4FEXTPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1IEXTPROC)(GLuint program, GLint location, GLint v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2IEXTPROC)(GLuint program, GLint location, GLint v0, GLint v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3IEXTPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4IEXTPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1FVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2FVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3FVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4FVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1IVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2IVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3IVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4IVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3FVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLfloat* value);
typedef void(APIENTRYP PFNGLTEXTUREBUFFEREXTPROC)(GLuint texture, GLenum target, GLenum internalformat, GLuint buffer);
typedef void(APIENTRYP PFNGLMULTITEXBUFFEREXTPROC)(GLenum texunit, GLenum target, GLenum internalformat, GLuint buffer);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLTEXTUREPARAMETERIUIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, const GLuint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETTEXTUREPARAMETERIUIVEXTPROC)(GLuint texture, GLenum target, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERIIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
typedef void(APIENTRYP PFNGLMULTITEXPARAMETERIUIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, const GLuint* params);
typedef void(APIENTRYP PFNGLGETMULTITEXPARAMETERIIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETMULTITEXPARAMETERIUIVEXTPROC)(GLenum texunit, GLenum target, GLenum pname, GLuint* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UIEXTPROC)(GLuint program, GLint location, GLuint v0);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UIEXTPROC)(GLuint program, GLint location, GLuint v0, GLuint v1);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UIEXTPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UIEXTPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UIVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UIVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UIVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UIVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLuint* value);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERS4FVEXTPROC)(GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat* params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4IEXTPROC)(GLuint program, GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4IVEXTPROC)(GLuint program, GLenum target, GLuint index, const GLint* params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERSI4IVEXTPROC)(GLuint program, GLenum target, GLuint index, GLsizei count, const GLint* params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIEXTPROC)(GLuint program, GLenum target, GLuint index, GLuint x, GLuint y,
                                                                   GLuint z, GLuint w);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERI4UIVEXTPROC)(GLuint program, GLenum target, GLuint index, const GLuint* params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETERSI4UIVEXTPROC)(GLuint program, GLenum target, GLuint index, GLsizei count,
                                                                     const GLuint* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERIIVEXTPROC)(GLuint program, GLenum target, GLuint index, GLint* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERIUIVEXTPROC)(GLuint program, GLenum target, GLuint index, GLuint* params);
typedef void(APIENTRYP PFNGLENABLECLIENTSTATEIEXTPROC)(GLenum array, GLuint index);
typedef void(APIENTRYP PFNGLDISABLECLIENTSTATEIEXTPROC)(GLenum array, GLuint index);
typedef void(APIENTRYP PFNGLGETFLOATI_VEXTPROC)(GLenum pname, GLuint index, GLfloat* params);
typedef void(APIENTRYP PFNGLGETDOUBLEI_VEXTPROC)(GLenum pname, GLuint index, GLdouble* params);
typedef void(APIENTRYP PFNGLGETPOINTERI_VEXTPROC)(GLenum pname, GLuint index, void** params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMSTRINGEXTPROC)(GLuint program, GLenum target, GLenum format, GLsizei len, const void* string);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4DEXTPROC)(GLuint program, GLenum target, GLuint index, GLdouble x, GLdouble y,
                                                                 GLdouble z, GLdouble w);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4DVEXTPROC)(GLuint program, GLenum target, GLuint index, const GLdouble* params);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4FEXTPROC)(GLuint program, GLenum target, GLuint index, GLfloat x, GLfloat y,
                                                                 GLfloat z, GLfloat w);
typedef void(APIENTRYP PFNGLNAMEDPROGRAMLOCALPARAMETER4FVEXTPROC)(GLuint program, GLenum target, GLuint index, const GLfloat* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERDVEXTPROC)(GLuint program, GLenum target, GLuint index, GLdouble* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMLOCALPARAMETERFVEXTPROC)(GLuint program, GLenum target, GLuint index, GLfloat* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMIVEXTPROC)(GLuint program, GLenum target, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGETNAMEDPROGRAMSTRINGEXTPROC)(GLuint program, GLenum target, GLenum pname, void* string);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEEXTPROC)(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLGETNAMEDRENDERBUFFERPARAMETERIVEXTPROC)(GLuint renderbuffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC)(GLuint renderbuffer, GLsizei samples, GLenum internalformat,
                                                                        GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLNAMEDRENDERBUFFERSTORAGEMULTISAMPLECOVERAGEEXTPROC)(
  GLuint renderbuffer, GLsizei coverageSamples, GLsizei colorSamples, GLenum internalformat, GLsizei width, GLsizei height);
typedef GLenum(APIENTRYP PFNGLCHECKNAMEDFRAMEBUFFERSTATUSEXTPROC)(GLuint framebuffer, GLenum target);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE1DEXTPROC)(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE2DEXTPROC)(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURE3DEXTPROC)(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture,
                                                              GLint level, GLint zoffset);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERRENDERBUFFEREXTPROC)(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget,
                                                                 GLuint renderbuffer);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC)(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLGENERATETEXTUREMIPMAPEXTPROC)(GLuint texture, GLenum target);
typedef void(APIENTRYP PFNGLGENERATEMULTITEXMIPMAPEXTPROC)(GLenum texunit, GLenum target);
typedef void(APIENTRYP PFNGLFRAMEBUFFERDRAWBUFFEREXTPROC)(GLuint framebuffer, GLenum mode);
typedef void(APIENTRYP PFNGLFRAMEBUFFERDRAWBUFFERSEXTPROC)(GLuint framebuffer, GLsizei n, const GLenum* bufs);
typedef void(APIENTRYP PFNGLFRAMEBUFFERREADBUFFEREXTPROC)(GLuint framebuffer, GLenum mode);
typedef void(APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVEXTPROC)(GLuint framebuffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLNAMEDCOPYBUFFERSUBDATAEXTPROC)(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset,
                                                           GLsizeiptr size);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREEXTPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTURELAYEREXTPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERTEXTUREFACEEXTPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLenum face);
typedef void(APIENTRYP PFNGLTEXTURERENDERBUFFEREXTPROC)(GLuint texture, GLenum target, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLMULTITEXRENDERBUFFEREXTPROC)(GLenum texunit, GLenum target, GLuint renderbuffer);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYCOLOROFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYEDGEFLAGOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYINDEXOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYNORMALOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYTEXCOORDOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYMULTITEXCOORDOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLenum texunit, GLint size, GLenum type,
                                                                   GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYFOGCOORDOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYSECONDARYCOLOROFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride,
                                                                    GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type,
                                                                  GLboolean normalized, GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBIOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type,
                                                                   GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLENABLEVERTEXARRAYEXTPROC)(GLuint vaobj, GLenum array);
typedef void(APIENTRYP PFNGLDISABLEVERTEXARRAYEXTPROC)(GLuint vaobj, GLenum array);
typedef void(APIENTRYP PFNGLENABLEVERTEXARRAYATTRIBEXTPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLDISABLEVERTEXARRAYATTRIBEXTPROC)(GLuint vaobj, GLuint index);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINTEGERVEXTPROC)(GLuint vaobj, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYPOINTERVEXTPROC)(GLuint vaobj, GLenum pname, void** param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYINTEGERI_VEXTPROC)(GLuint vaobj, GLuint index, GLenum pname, GLint* param);
typedef void(APIENTRYP PFNGLGETVERTEXARRAYPOINTERI_VEXTPROC)(GLuint vaobj, GLuint index, GLenum pname, void** param);
typedef void*(APIENTRYP PFNGLMAPNAMEDBUFFERRANGEEXTPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
typedef void(APIENTRYP PFNGLFLUSHMAPPEDNAMEDBUFFERRANGEEXTPROC)(GLuint buffer, GLintptr offset, GLsizeiptr length);
typedef void(APIENTRYP PFNGLNAMEDBUFFERSTORAGEEXTPROC)(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERDATAEXTPROC)(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLCLEARNAMEDBUFFERSUBDATAEXTPROC)(GLuint buffer, GLenum internalformat, GLsizeiptr offset, GLsizeiptr size,
                                                            GLenum format, GLenum type, const void* data);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERPARAMETERIEXTPROC)(GLuint framebuffer, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLGETNAMEDFRAMEBUFFERPARAMETERIVEXTPROC)(GLuint framebuffer, GLenum pname, GLint* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1DEXTPROC)(GLuint program, GLint location, GLdouble x);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2DEXTPROC)(GLuint program, GLint location, GLdouble x, GLdouble y);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3DEXTPROC)(GLuint program, GLint location, GLdouble x, GLdouble y, GLdouble z);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4DEXTPROC)(GLuint program, GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1DVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2DVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3DVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4DVEXTPROC)(GLuint program, GLint location, GLsizei count, const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                            const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X3DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2X4DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X2DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3X4DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X2DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4X3DVEXTPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose,
                                                              const GLdouble* value);
typedef void(APIENTRYP PFNGLTEXTUREBUFFERRANGEEXTPROC)(GLuint texture, GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset,
                                                       GLsizeiptr size);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC)(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC)(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width,
                                                     GLsizei height);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC)(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width,
                                                     GLsizei height, GLsizei depth);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE2DMULTISAMPLEEXTPROC)(GLuint texture, GLenum target, GLsizei samples, GLenum internalformat,
                                                                GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLTEXTURESTORAGE3DMULTISAMPLEEXTPROC)(GLuint texture, GLenum target, GLsizei samples, GLenum internalformat,
                                                                GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
typedef void(APIENTRYP PFNGLVERTEXARRAYBINDVERTEXBUFFEREXTPROC)(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBFORMATEXTPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type,
                                                                  GLboolean normalized, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBIFORMATEXTPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBLFORMATEXTPROC)(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBBINDINGEXTPROC)(GLuint vaobj, GLuint attribindex, GLuint bindingindex);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXBINDINGDIVISOREXTPROC)(GLuint vaobj, GLuint bindingindex, GLuint divisor);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBLOFFSETEXTPROC)(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type,
                                                                   GLsizei stride, GLintptr offset);
typedef void(APIENTRYP PFNGLTEXTUREPAGECOMMITMENTEXTPROC)(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                          GLsizei width, GLsizei height, GLsizei depth, GLboolean commit);
typedef void(APIENTRYP PFNGLVERTEXARRAYVERTEXATTRIBDIVISOREXTPROC)(GLuint vaobj, GLuint index, GLuint divisor);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMatrixLoadfEXT(GLenum mode, const GLfloat* m);
GLAPI void APIENTRY glMatrixLoaddEXT(GLenum mode, const GLdouble* m);
GLAPI void APIENTRY glMatrixMultfEXT(GLenum mode, const GLfloat* m);
GLAPI void APIENTRY glMatrixMultdEXT(GLenum mode, const GLdouble* m);
GLAPI void APIENTRY glMatrixLoadIdentityEXT(GLenum mode);
GLAPI void APIENTRY glMatrixRotatefEXT(GLenum mode, GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
GLAPI void APIENTRY glMatrixRotatedEXT(GLenum mode, GLdouble angle, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glMatrixScalefEXT(GLenum mode, GLfloat x, GLfloat y, GLfloat z);
GLAPI void APIENTRY glMatrixScaledEXT(GLenum mode, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glMatrixTranslatefEXT(GLenum mode, GLfloat x, GLfloat y, GLfloat z);
GLAPI void APIENTRY glMatrixTranslatedEXT(GLenum mode, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glMatrixFrustumEXT(GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
GLAPI void APIENTRY glMatrixOrthoEXT(GLenum mode, GLdouble left, GLdouble right, GLdouble bottom, GLdouble top, GLdouble zNear, GLdouble zFar);
GLAPI void APIENTRY glMatrixPopEXT(GLenum mode);
GLAPI void APIENTRY glMatrixPushEXT(GLenum mode);
GLAPI void APIENTRY glClientAttribDefaultEXT(GLbitfield mask);
GLAPI void APIENTRY glPushClientAttribDefaultEXT(GLbitfield mask);
GLAPI void APIENTRY glTextureParameterfEXT(GLuint texture, GLenum target, GLenum pname, GLfloat param);
GLAPI void APIENTRY glTextureParameterfvEXT(GLuint texture, GLenum target, GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glTextureParameteriEXT(GLuint texture, GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glTextureParameterivEXT(GLuint texture, GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glTextureImage1DEXT(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border,
                                        GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureImage2DEXT(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height,
                                        GLint border, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureSubImage1DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format,
                                           GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureSubImage2DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width,
                                           GLsizei height, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCopyTextureImage1DEXT(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                            GLsizei width, GLint border);
GLAPI void APIENTRY glCopyTextureImage2DEXT(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                            GLsizei width, GLsizei height, GLint border);
GLAPI void APIENTRY glCopyTextureSubImage1DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
GLAPI void APIENTRY glCopyTextureSubImage2DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y,
                                               GLsizei width, GLsizei height);
GLAPI void APIENTRY glGetTextureImageEXT(GLuint texture, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
GLAPI void APIENTRY glGetTextureParameterfvEXT(GLuint texture, GLenum target, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTextureParameterivEXT(GLuint texture, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTextureLevelParameterfvEXT(GLuint texture, GLenum target, GLint level, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetTextureLevelParameterivEXT(GLuint texture, GLenum target, GLint level, GLenum pname, GLint* params);
GLAPI void APIENTRY glTextureImage3DEXT(GLuint texture, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height,
                                        GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glTextureSubImage3DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                           GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCopyTextureSubImage3DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                               GLint x, GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glBindMultiTextureEXT(GLenum texunit, GLenum target, GLuint texture);
GLAPI void APIENTRY glMultiTexCoordPointerEXT(GLenum texunit, GLint size, GLenum type, GLsizei stride, const void* pointer);
GLAPI void APIENTRY glMultiTexEnvfEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat param);
GLAPI void APIENTRY glMultiTexEnvfvEXT(GLenum texunit, GLenum target, GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glMultiTexEnviEXT(GLenum texunit, GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glMultiTexEnvivEXT(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glMultiTexGendEXT(GLenum texunit, GLenum coord, GLenum pname, GLdouble param);
GLAPI void APIENTRY glMultiTexGendvEXT(GLenum texunit, GLenum coord, GLenum pname, const GLdouble* params);
GLAPI void APIENTRY glMultiTexGenfEXT(GLenum texunit, GLenum coord, GLenum pname, GLfloat param);
GLAPI void APIENTRY glMultiTexGenfvEXT(GLenum texunit, GLenum coord, GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glMultiTexGeniEXT(GLenum texunit, GLenum coord, GLenum pname, GLint param);
GLAPI void APIENTRY glMultiTexGenivEXT(GLenum texunit, GLenum coord, GLenum pname, const GLint* params);
GLAPI void APIENTRY glGetMultiTexEnvfvEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetMultiTexEnvivEXT(GLenum texunit, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetMultiTexGendvEXT(GLenum texunit, GLenum coord, GLenum pname, GLdouble* params);
GLAPI void APIENTRY glGetMultiTexGenfvEXT(GLenum texunit, GLenum coord, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetMultiTexGenivEXT(GLenum texunit, GLenum coord, GLenum pname, GLint* params);
GLAPI void APIENTRY glMultiTexParameteriEXT(GLenum texunit, GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glMultiTexParameterivEXT(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glMultiTexParameterfEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat param);
GLAPI void APIENTRY glMultiTexParameterfvEXT(GLenum texunit, GLenum target, GLenum pname, const GLfloat* params);
GLAPI void APIENTRY glMultiTexImage1DEXT(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLint border,
                                         GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glMultiTexImage2DEXT(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height,
                                         GLint border, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glMultiTexSubImage1DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width, GLenum format,
                                            GLenum type, const void* pixels);
GLAPI void APIENTRY glMultiTexSubImage2DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width,
                                            GLsizei height, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCopyMultiTexImage1DEXT(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                             GLsizei width, GLint border);
GLAPI void APIENTRY glCopyMultiTexImage2DEXT(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLint x, GLint y,
                                             GLsizei width, GLsizei height, GLint border);
GLAPI void APIENTRY glCopyMultiTexSubImage1DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint x, GLint y, GLsizei width);
GLAPI void APIENTRY glCopyMultiTexSubImage2DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y,
                                                GLsizei width, GLsizei height);
GLAPI void APIENTRY glGetMultiTexImageEXT(GLenum texunit, GLenum target, GLint level, GLenum format, GLenum type, void* pixels);
GLAPI void APIENTRY glGetMultiTexParameterfvEXT(GLenum texunit, GLenum target, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetMultiTexParameterivEXT(GLenum texunit, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetMultiTexLevelParameterfvEXT(GLenum texunit, GLenum target, GLint level, GLenum pname, GLfloat* params);
GLAPI void APIENTRY glGetMultiTexLevelParameterivEXT(GLenum texunit, GLenum target, GLint level, GLenum pname, GLint* params);
GLAPI void APIENTRY glMultiTexImage3DEXT(GLenum texunit, GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height,
                                         GLsizei depth, GLint border, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glMultiTexSubImage3DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                            GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void* pixels);
GLAPI void APIENTRY glCopyMultiTexSubImage3DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset,
                                                GLint x, GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glEnableClientStateIndexedEXT(GLenum array, GLuint index);
GLAPI void APIENTRY glDisableClientStateIndexedEXT(GLenum array, GLuint index);
GLAPI void APIENTRY glGetFloatIndexedvEXT(GLenum target, GLuint index, GLfloat* data);
GLAPI void APIENTRY glGetDoubleIndexedvEXT(GLenum target, GLuint index, GLdouble* data);
GLAPI void APIENTRY glGetPointerIndexedvEXT(GLenum target, GLuint index, void** data);
GLAPI void APIENTRY glEnableIndexedEXT(GLenum target, GLuint index);
GLAPI void APIENTRY glDisableIndexedEXT(GLenum target, GLuint index);
GLAPI GLboolean APIENTRY glIsEnabledIndexedEXT(GLenum target, GLuint index);
GLAPI void APIENTRY glGetIntegerIndexedvEXT(GLenum target, GLuint index, GLint* data);
GLAPI void APIENTRY glGetBooleanIndexedvEXT(GLenum target, GLuint index, GLboolean* data);
GLAPI void APIENTRY glCompressedTextureImage3DEXT(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                  GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedTextureImage2DEXT(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                  GLsizei height, GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedTextureImage1DEXT(GLuint texture, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                  GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedTextureSubImage3DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                     GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                     GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedTextureSubImage2DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                     GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedTextureSubImage1DEXT(GLuint texture, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                     GLenum format, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glGetCompressedTextureImageEXT(GLuint texture, GLenum target, GLint lod, void* img);
GLAPI void APIENTRY glCompressedMultiTexImage3DEXT(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                   GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedMultiTexImage2DEXT(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                   GLsizei height, GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedMultiTexImage1DEXT(GLenum texunit, GLenum target, GLint level, GLenum internalformat, GLsizei width,
                                                   GLint border, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedMultiTexSubImage3DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                      GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format,
                                                      GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedMultiTexSubImage2DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLint yoffset,
                                                      GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glCompressedMultiTexSubImage1DEXT(GLenum texunit, GLenum target, GLint level, GLint xoffset, GLsizei width,
                                                      GLenum format, GLsizei imageSize, const void* bits);
GLAPI void APIENTRY glGetCompressedMultiTexImageEXT(GLenum texunit, GLenum target, GLint lod, void* img);
GLAPI void APIENTRY glMatrixLoadTransposefEXT(GLenum mode, const GLfloat* m);
GLAPI void APIENTRY glMatrixLoadTransposedEXT(GLenum mode, const GLdouble* m);
GLAPI void APIENTRY glMatrixMultTransposefEXT(GLenum mode, const GLfloat* m);
GLAPI void APIENTRY glMatrixMultTransposedEXT(GLenum mode, const GLdouble* m);
GLAPI void APIENTRY glNamedBufferDataEXT(GLuint buffer, GLsizeiptr size, const void* data, GLenum usage);
GLAPI void APIENTRY glNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, const void* data);
GLAPI void* APIENTRY glMapNamedBufferEXT(GLuint buffer, GLenum access);
GLAPI GLboolean APIENTRY glUnmapNamedBufferEXT(GLuint buffer);
GLAPI void APIENTRY glGetNamedBufferParameterivEXT(GLuint buffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetNamedBufferPointervEXT(GLuint buffer, GLenum pname, void** params);
GLAPI void APIENTRY glGetNamedBufferSubDataEXT(GLuint buffer, GLintptr offset, GLsizeiptr size, void* data);
GLAPI void APIENTRY glProgramUniform1fEXT(GLuint program, GLint location, GLfloat v0);
GLAPI void APIENTRY glProgramUniform2fEXT(GLuint program, GLint location, GLfloat v0, GLfloat v1);
GLAPI void APIENTRY glProgramUniform3fEXT(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
GLAPI void APIENTRY glProgramUniform4fEXT(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
GLAPI void APIENTRY glProgramUniform1iEXT(GLuint program, GLint location, GLint v0);
GLAPI void APIENTRY glProgramUniform2iEXT(GLuint program, GLint location, GLint v0, GLint v1);
GLAPI void APIENTRY glProgramUniform3iEXT(GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
GLAPI void APIENTRY glProgramUniform4iEXT(GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
GLAPI void APIENTRY glProgramUniform1fvEXT(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform2fvEXT(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform3fvEXT(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform4fvEXT(GLuint program, GLint location, GLsizei count, const GLfloat* value);
GLAPI void APIENTRY glProgramUniform1ivEXT(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform2ivEXT(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform3ivEXT(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniform4ivEXT(GLuint program, GLint location, GLsizei count, const GLint* value);
GLAPI void APIENTRY glProgramUniformMatrix2fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix2x3fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3x2fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix2x4fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4x2fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix3x4fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glProgramUniformMatrix4x3fvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
GLAPI void APIENTRY glTextureBufferEXT(GLuint texture, GLenum target, GLenum internalformat, GLuint buffer);
GLAPI void APIENTRY glMultiTexBufferEXT(GLenum texunit, GLenum target, GLenum internalformat, GLuint buffer);
GLAPI void APIENTRY glTextureParameterIivEXT(GLuint texture, GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glTextureParameterIuivEXT(GLuint texture, GLenum target, GLenum pname, const GLuint* params);
GLAPI void APIENTRY glGetTextureParameterIivEXT(GLuint texture, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetTextureParameterIuivEXT(GLuint texture, GLenum target, GLenum pname, GLuint* params);
GLAPI void APIENTRY glMultiTexParameterIivEXT(GLenum texunit, GLenum target, GLenum pname, const GLint* params);
GLAPI void APIENTRY glMultiTexParameterIuivEXT(GLenum texunit, GLenum target, GLenum pname, const GLuint* params);
GLAPI void APIENTRY glGetMultiTexParameterIivEXT(GLenum texunit, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetMultiTexParameterIuivEXT(GLenum texunit, GLenum target, GLenum pname, GLuint* params);
GLAPI void APIENTRY glProgramUniform1uiEXT(GLuint program, GLint location, GLuint v0);
GLAPI void APIENTRY glProgramUniform2uiEXT(GLuint program, GLint location, GLuint v0, GLuint v1);
GLAPI void APIENTRY glProgramUniform3uiEXT(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
GLAPI void APIENTRY glProgramUniform4uiEXT(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
GLAPI void APIENTRY glProgramUniform1uivEXT(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform2uivEXT(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform3uivEXT(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glProgramUniform4uivEXT(GLuint program, GLint location, GLsizei count, const GLuint* value);
GLAPI void APIENTRY glNamedProgramLocalParameters4fvEXT(GLuint program, GLenum target, GLuint index, GLsizei count, const GLfloat* params);
GLAPI void APIENTRY glNamedProgramLocalParameterI4iEXT(GLuint program, GLenum target, GLuint index, GLint x, GLint y, GLint z, GLint w);
GLAPI void APIENTRY glNamedProgramLocalParameterI4ivEXT(GLuint program, GLenum target, GLuint index, const GLint* params);
GLAPI void APIENTRY glNamedProgramLocalParametersI4ivEXT(GLuint program, GLenum target, GLuint index, GLsizei count, const GLint* params);
GLAPI void APIENTRY glNamedProgramLocalParameterI4uiEXT(GLuint program, GLenum target, GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
GLAPI void APIENTRY glNamedProgramLocalParameterI4uivEXT(GLuint program, GLenum target, GLuint index, const GLuint* params);
GLAPI void APIENTRY glNamedProgramLocalParametersI4uivEXT(GLuint program, GLenum target, GLuint index, GLsizei count, const GLuint* params);
GLAPI void APIENTRY glGetNamedProgramLocalParameterIivEXT(GLuint program, GLenum target, GLuint index, GLint* params);
GLAPI void APIENTRY glGetNamedProgramLocalParameterIuivEXT(GLuint program, GLenum target, GLuint index, GLuint* params);
GLAPI void APIENTRY glEnableClientStateiEXT(GLenum array, GLuint index);
GLAPI void APIENTRY glDisableClientStateiEXT(GLenum array, GLuint index);
GLAPI void APIENTRY glGetFloati_vEXT(GLenum pname, GLuint index, GLfloat* params);
GLAPI void APIENTRY glGetDoublei_vEXT(GLenum pname, GLuint index, GLdouble* params);
GLAPI void APIENTRY glGetPointeri_vEXT(GLenum pname, GLuint index, void** params);
GLAPI void APIENTRY glNamedProgramStringEXT(GLuint program, GLenum target, GLenum format, GLsizei len, const void* string);
GLAPI void APIENTRY glNamedProgramLocalParameter4dEXT(GLuint program, GLenum target, GLuint index, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
GLAPI void APIENTRY glNamedProgramLocalParameter4dvEXT(GLuint program, GLenum target, GLuint index, const GLdouble* params);
GLAPI void APIENTRY glNamedProgramLocalParameter4fEXT(GLuint program, GLenum target, GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
GLAPI void APIENTRY glNamedProgramLocalParameter4fvEXT(GLuint program, GLenum target, GLuint index, const GLfloat* params);
GLAPI void APIENTRY glGetNamedProgramLocalParameterdvEXT(GLuint program, GLenum target, GLuint index, GLdouble* params);
GLAPI void APIENTRY glGetNamedProgramLocalParameterfvEXT(GLuint program, GLenum target, GLuint index, GLfloat* params);
GLAPI void APIENTRY glGetNamedProgramivEXT(GLuint program, GLenum target, GLenum pname, GLint* params);
GLAPI void APIENTRY glGetNamedProgramStringEXT(GLuint program, GLenum target, GLenum pname, void* string);
GLAPI void APIENTRY glNamedRenderbufferStorageEXT(GLuint renderbuffer, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glGetNamedRenderbufferParameterivEXT(GLuint renderbuffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleEXT(GLuint renderbuffer, GLsizei samples, GLenum internalformat, GLsizei width,
                                                             GLsizei height);
GLAPI void APIENTRY glNamedRenderbufferStorageMultisampleCoverageEXT(GLuint renderbuffer, GLsizei coverageSamples, GLsizei colorSamples,
                                                                     GLenum internalformat, GLsizei width, GLsizei height);
GLAPI GLenum APIENTRY glCheckNamedFramebufferStatusEXT(GLuint framebuffer, GLenum target);
GLAPI void APIENTRY glNamedFramebufferTexture1DEXT(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glNamedFramebufferTexture2DEXT(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
GLAPI void APIENTRY glNamedFramebufferTexture3DEXT(GLuint framebuffer, GLenum attachment, GLenum textarget, GLuint texture, GLint level,
                                                   GLint zoffset);
GLAPI void APIENTRY glNamedFramebufferRenderbufferEXT(GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
GLAPI void APIENTRY glGetNamedFramebufferAttachmentParameterivEXT(GLuint framebuffer, GLenum attachment, GLenum pname, GLint* params);
GLAPI void APIENTRY glGenerateTextureMipmapEXT(GLuint texture, GLenum target);
GLAPI void APIENTRY glGenerateMultiTexMipmapEXT(GLenum texunit, GLenum target);
GLAPI void APIENTRY glFramebufferDrawBufferEXT(GLuint framebuffer, GLenum mode);
GLAPI void APIENTRY glFramebufferDrawBuffersEXT(GLuint framebuffer, GLsizei n, const GLenum* bufs);
GLAPI void APIENTRY glFramebufferReadBufferEXT(GLuint framebuffer, GLenum mode);
GLAPI void APIENTRY glGetFramebufferParameterivEXT(GLuint framebuffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glNamedCopyBufferSubDataEXT(GLuint readBuffer, GLuint writeBuffer, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
GLAPI void APIENTRY glNamedFramebufferTextureEXT(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level);
GLAPI void APIENTRY glNamedFramebufferTextureLayerEXT(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint layer);
GLAPI void APIENTRY glNamedFramebufferTextureFaceEXT(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLenum face);
GLAPI void APIENTRY glTextureRenderbufferEXT(GLuint texture, GLenum target, GLuint renderbuffer);
GLAPI void APIENTRY glMultiTexRenderbufferEXT(GLenum texunit, GLenum target, GLuint renderbuffer);
GLAPI void APIENTRY glVertexArrayVertexOffsetEXT(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayColorOffsetEXT(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayEdgeFlagOffsetEXT(GLuint vaobj, GLuint buffer, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayIndexOffsetEXT(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayNormalOffsetEXT(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayTexCoordOffsetEXT(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayMultiTexCoordOffsetEXT(GLuint vaobj, GLuint buffer, GLenum texunit, GLint size, GLenum type,
                                                        GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayFogCoordOffsetEXT(GLuint vaobj, GLuint buffer, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArraySecondaryColorOffsetEXT(GLuint vaobj, GLuint buffer, GLint size, GLenum type, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayVertexAttribOffsetEXT(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type,
                                                       GLboolean normalized, GLsizei stride, GLintptr offset);
GLAPI void APIENTRY glVertexArrayVertexAttribIOffsetEXT(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride,
                                                        GLintptr offset);
GLAPI void APIENTRY glEnableVertexArrayEXT(GLuint vaobj, GLenum array);
GLAPI void APIENTRY glDisableVertexArrayEXT(GLuint vaobj, GLenum array);
GLAPI void APIENTRY glEnableVertexArrayAttribEXT(GLuint vaobj, GLuint index);
GLAPI void APIENTRY glDisableVertexArrayAttribEXT(GLuint vaobj, GLuint index);
GLAPI void APIENTRY glGetVertexArrayIntegervEXT(GLuint vaobj, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetVertexArrayPointervEXT(GLuint vaobj, GLenum pname, void** param);
GLAPI void APIENTRY glGetVertexArrayIntegeri_vEXT(GLuint vaobj, GLuint index, GLenum pname, GLint* param);
GLAPI void APIENTRY glGetVertexArrayPointeri_vEXT(GLuint vaobj, GLuint index, GLenum pname, void** param);
GLAPI void* APIENTRY glMapNamedBufferRangeEXT(GLuint buffer, GLintptr offset, GLsizeiptr length, GLbitfield access);
GLAPI void APIENTRY glFlushMappedNamedBufferRangeEXT(GLuint buffer, GLintptr offset, GLsizeiptr length);
GLAPI void APIENTRY glNamedBufferStorageEXT(GLuint buffer, GLsizeiptr size, const void* data, GLbitfield flags);
GLAPI void APIENTRY glClearNamedBufferDataEXT(GLuint buffer, GLenum internalformat, GLenum format, GLenum type, const void* data);
GLAPI void APIENTRY glClearNamedBufferSubDataEXT(GLuint buffer, GLenum internalformat, GLsizeiptr offset, GLsizeiptr size, GLenum format,
                                                 GLenum type, const void* data);
GLAPI void APIENTRY glNamedFramebufferParameteriEXT(GLuint framebuffer, GLenum pname, GLint param);
GLAPI void APIENTRY glGetNamedFramebufferParameterivEXT(GLuint framebuffer, GLenum pname, GLint* params);
GLAPI void APIENTRY glProgramUniform1dEXT(GLuint program, GLint location, GLdouble x);
GLAPI void APIENTRY glProgramUniform2dEXT(GLuint program, GLint location, GLdouble x, GLdouble y);
GLAPI void APIENTRY glProgramUniform3dEXT(GLuint program, GLint location, GLdouble x, GLdouble y, GLdouble z);
GLAPI void APIENTRY glProgramUniform4dEXT(GLuint program, GLint location, GLdouble x, GLdouble y, GLdouble z, GLdouble w);
GLAPI void APIENTRY glProgramUniform1dvEXT(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform2dvEXT(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform3dvEXT(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniform4dvEXT(GLuint program, GLint location, GLsizei count, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix2dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix2x3dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix2x4dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3x2dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix3x4dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4x2dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glProgramUniformMatrix4x3dvEXT(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLdouble* value);
GLAPI void APIENTRY glTextureBufferRangeEXT(GLuint texture, GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
GLAPI void APIENTRY glTextureStorage1DEXT(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
GLAPI void APIENTRY glTextureStorage2DEXT(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
GLAPI void APIENTRY glTextureStorage3DEXT(GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width,
                                          GLsizei height, GLsizei depth);
GLAPI void APIENTRY glTextureStorage2DMultisampleEXT(GLuint texture, GLenum target, GLsizei samples, GLenum internalformat, GLsizei width,
                                                     GLsizei height, GLboolean fixedsamplelocations);
GLAPI void APIENTRY glTextureStorage3DMultisampleEXT(GLuint texture, GLenum target, GLsizei samples, GLenum internalformat, GLsizei width,
                                                     GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
GLAPI void APIENTRY glVertexArrayBindVertexBufferEXT(GLuint vaobj, GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
GLAPI void APIENTRY glVertexArrayVertexAttribFormatEXT(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLboolean normalized,
                                                       GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayVertexAttribIFormatEXT(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayVertexAttribLFormatEXT(GLuint vaobj, GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
GLAPI void APIENTRY glVertexArrayVertexAttribBindingEXT(GLuint vaobj, GLuint attribindex, GLuint bindingindex);
GLAPI void APIENTRY glVertexArrayVertexBindingDivisorEXT(GLuint vaobj, GLuint bindingindex, GLuint divisor);
GLAPI void APIENTRY glVertexArrayVertexAttribLOffsetEXT(GLuint vaobj, GLuint buffer, GLuint index, GLint size, GLenum type, GLsizei stride,
                                                        GLintptr offset);
GLAPI void APIENTRY glTexturePageCommitmentEXT(GLuint texture, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width,
                                               GLsizei height, GLsizei depth, GLboolean commit);
GLAPI void APIENTRY glVertexArrayVertexAttribDivisorEXT(GLuint vaobj, GLuint index, GLuint divisor);
#endif
#endif /* GL_EXT_direct_state_access */

#ifndef GL_EXT_draw_instanced
#define GL_EXT_draw_instanced 1
typedef void(APIENTRYP PFNGLDRAWARRAYSINSTANCEDEXTPROC)(GLenum mode, GLint start, GLsizei count, GLsizei primcount);
typedef void(APIENTRYP PFNGLDRAWELEMENTSINSTANCEDEXTPROC)(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei primcount);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawArraysInstancedEXT(GLenum mode, GLint start, GLsizei count, GLsizei primcount);
GLAPI void APIENTRY glDrawElementsInstancedEXT(GLenum mode, GLsizei count, GLenum type, const void* indices, GLsizei primcount);
#endif
#endif /* GL_EXT_draw_instanced */

#ifndef GL_EXT_multiview_tessellation_geometry_shader
#define GL_EXT_multiview_tessellation_geometry_shader 1
#endif /* GL_EXT_multiview_tessellation_geometry_shader */

#ifndef GL_EXT_multiview_texture_multisample
#define GL_EXT_multiview_texture_multisample 1
#endif /* GL_EXT_multiview_texture_multisample */

#ifndef GL_EXT_multiview_timer_query
#define GL_EXT_multiview_timer_query 1
#endif /* GL_EXT_multiview_timer_query */

#ifndef GL_EXT_polygon_offset_clamp
#define GL_EXT_polygon_offset_clamp 1
#define GL_POLYGON_OFFSET_CLAMP_EXT 0x8E1B
typedef void(APIENTRYP PFNGLPOLYGONOFFSETCLAMPEXTPROC)(GLfloat factor, GLfloat units, GLfloat clamp);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glPolygonOffsetClampEXT(GLfloat factor, GLfloat units, GLfloat clamp);
#endif
#endif /* GL_EXT_polygon_offset_clamp */

#ifndef GL_EXT_post_depth_coverage
#define GL_EXT_post_depth_coverage 1
#endif /* GL_EXT_post_depth_coverage */

#ifndef GL_EXT_raster_multisample
#define GL_EXT_raster_multisample 1
#define GL_RASTER_MULTISAMPLE_EXT 0x9327
#define GL_RASTER_SAMPLES_EXT 0x9328
#define GL_MAX_RASTER_SAMPLES_EXT 0x9329
#define GL_RASTER_FIXED_SAMPLE_LOCATIONS_EXT 0x932A
#define GL_MULTISAMPLE_RASTERIZATION_ALLOWED_EXT 0x932B
#define GL_EFFECTIVE_RASTER_SAMPLES_EXT 0x932C
typedef void(APIENTRYP PFNGLRASTERSAMPLESEXTPROC)(GLuint samples, GLboolean fixedsamplelocations);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glRasterSamplesEXT(GLuint samples, GLboolean fixedsamplelocations);
#endif
#endif /* GL_EXT_raster_multisample */

#ifndef GL_EXT_separate_shader_objects
#define GL_EXT_separate_shader_objects 1
#define GL_ACTIVE_PROGRAM_EXT 0x8B8D
typedef void(APIENTRYP PFNGLUSESHADERPROGRAMEXTPROC)(GLenum type, GLuint program);
typedef void(APIENTRYP PFNGLACTIVEPROGRAMEXTPROC)(GLuint program);
typedef GLuint(APIENTRYP PFNGLCREATESHADERPROGRAMEXTPROC)(GLenum type, const GLchar* string);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glUseShaderProgramEXT(GLenum type, GLuint program);
GLAPI void APIENTRY glActiveProgramEXT(GLuint program);
GLAPI GLuint APIENTRY glCreateShaderProgramEXT(GLenum type, const GLchar* string);
#endif
#endif /* GL_EXT_separate_shader_objects */

#ifndef GL_EXT_shader_framebuffer_fetch
#define GL_EXT_shader_framebuffer_fetch 1
#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
#endif /* GL_EXT_shader_framebuffer_fetch */

#ifndef GL_EXT_shader_framebuffer_fetch_non_coherent
#define GL_EXT_shader_framebuffer_fetch_non_coherent 1
typedef void(APIENTRYP PFNGLFRAMEBUFFERFETCHBARRIEREXTPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFramebufferFetchBarrierEXT(void);
#endif
#endif /* GL_EXT_shader_framebuffer_fetch_non_coherent */

#ifndef GL_EXT_shader_integer_mix
#define GL_EXT_shader_integer_mix 1
#endif /* GL_EXT_shader_integer_mix */

#ifndef GL_EXT_texture_compression_s3tc
#define GL_EXT_texture_compression_s3tc 1
#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0
#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1
#define GL_COMPRESSED_RGBA_S3TC_DXT3_EXT 0x83F2
#define GL_COMPRESSED_RGBA_S3TC_DXT5_EXT 0x83F3
#endif /* GL_EXT_texture_compression_s3tc */

#ifndef GL_EXT_texture_filter_minmax
#define GL_EXT_texture_filter_minmax 1
#define GL_TEXTURE_REDUCTION_MODE_EXT 0x9366
#define GL_WEIGHTED_AVERAGE_EXT 0x9367
#endif /* GL_EXT_texture_filter_minmax */

#ifndef GL_EXT_texture_sRGB_R8
#define GL_EXT_texture_sRGB_R8 1
#define GL_SR8_EXT 0x8FBD
#endif /* GL_EXT_texture_sRGB_R8 */

#ifndef GL_EXT_texture_sRGB_decode
#define GL_EXT_texture_sRGB_decode 1
#define GL_TEXTURE_SRGB_DECODE_EXT 0x8A48
#define GL_DECODE_EXT 0x8A49
#define GL_SKIP_DECODE_EXT 0x8A4A
#endif /* GL_EXT_texture_sRGB_decode */

#ifndef GL_EXT_texture_shadow_lod
#define GL_EXT_texture_shadow_lod 1
#endif /* GL_EXT_texture_shadow_lod */

#ifndef GL_EXT_window_rectangles
#define GL_EXT_window_rectangles 1
#define GL_INCLUSIVE_EXT 0x8F10
#define GL_EXCLUSIVE_EXT 0x8F11
#define GL_WINDOW_RECTANGLE_EXT 0x8F12
#define GL_WINDOW_RECTANGLE_MODE_EXT 0x8F13
#define GL_MAX_WINDOW_RECTANGLES_EXT 0x8F14
#define GL_NUM_WINDOW_RECTANGLES_EXT 0x8F15
typedef void(APIENTRYP PFNGLWINDOWRECTANGLESEXTPROC)(GLenum mode, GLsizei count, const GLint* box);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glWindowRectanglesEXT(GLenum mode, GLsizei count, const GLint* box);
#endif
#endif /* GL_EXT_window_rectangles */

#ifndef GL_INTEL_blackhole_render
#define GL_INTEL_blackhole_render 1
#define GL_BLACKHOLE_RENDER_INTEL 0x83FC
#endif /* GL_INTEL_blackhole_render */

#ifndef GL_INTEL_conservative_rasterization
#define GL_INTEL_conservative_rasterization 1
#define GL_CONSERVATIVE_RASTERIZATION_INTEL 0x83FE
#endif /* GL_INTEL_conservative_rasterization */

#ifndef GL_INTEL_framebuffer_CMAA
#define GL_INTEL_framebuffer_CMAA 1
typedef void(APIENTRYP PFNGLAPPLYFRAMEBUFFERATTACHMENTCMAAINTELPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glApplyFramebufferAttachmentCMAAINTEL(void);
#endif
#endif /* GL_INTEL_framebuffer_CMAA */

#ifndef GL_INTEL_performance_query
#define GL_INTEL_performance_query 1
#define GL_PERFQUERY_SINGLE_CONTEXT_INTEL 0x00000000
#define GL_PERFQUERY_GLOBAL_CONTEXT_INTEL 0x00000001
#define GL_PERFQUERY_WAIT_INTEL 0x83FB
#define GL_PERFQUERY_FLUSH_INTEL 0x83FA
#define GL_PERFQUERY_DONOT_FLUSH_INTEL 0x83F9
#define GL_PERFQUERY_COUNTER_EVENT_INTEL 0x94F0
#define GL_PERFQUERY_COUNTER_DURATION_NORM_INTEL 0x94F1
#define GL_PERFQUERY_COUNTER_DURATION_RAW_INTEL 0x94F2
#define GL_PERFQUERY_COUNTER_THROUGHPUT_INTEL 0x94F3
#define GL_PERFQUERY_COUNTER_RAW_INTEL 0x94F4
#define GL_PERFQUERY_COUNTER_TIMESTAMP_INTEL 0x94F5
#define GL_PERFQUERY_COUNTER_DATA_UINT32_INTEL 0x94F8
#define GL_PERFQUERY_COUNTER_DATA_UINT64_INTEL 0x94F9
#define GL_PERFQUERY_COUNTER_DATA_FLOAT_INTEL 0x94FA
#define GL_PERFQUERY_COUNTER_DATA_DOUBLE_INTEL 0x94FB
#define GL_PERFQUERY_COUNTER_DATA_BOOL32_INTEL 0x94FC
#define GL_PERFQUERY_QUERY_NAME_LENGTH_MAX_INTEL 0x94FD
#define GL_PERFQUERY_COUNTER_NAME_LENGTH_MAX_INTEL 0x94FE
#define GL_PERFQUERY_COUNTER_DESC_LENGTH_MAX_INTEL 0x94FF
#define GL_PERFQUERY_GPA_EXTENDED_COUNTERS_INTEL 0x9500
typedef void(APIENTRYP PFNGLBEGINPERFQUERYINTELPROC)(GLuint queryHandle);
typedef void(APIENTRYP PFNGLCREATEPERFQUERYINTELPROC)(GLuint queryId, GLuint* queryHandle);
typedef void(APIENTRYP PFNGLDELETEPERFQUERYINTELPROC)(GLuint queryHandle);
typedef void(APIENTRYP PFNGLENDPERFQUERYINTELPROC)(GLuint queryHandle);
typedef void(APIENTRYP PFNGLGETFIRSTPERFQUERYIDINTELPROC)(GLuint* queryId);
typedef void(APIENTRYP PFNGLGETNEXTPERFQUERYIDINTELPROC)(GLuint queryId, GLuint* nextQueryId);
typedef void(APIENTRYP PFNGLGETPERFCOUNTERINFOINTELPROC)(
  GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar* counterName, GLuint counterDescLength, GLchar* counterDesc,
  GLuint* counterOffset, GLuint* counterDataSize, GLuint* counterTypeEnum, GLuint* counterDataTypeEnum, GLuint64* rawCounterMaxValue);
typedef void(APIENTRYP PFNGLGETPERFQUERYDATAINTELPROC)(GLuint queryHandle, GLuint flags, GLsizei dataSize, void* data, GLuint* bytesWritten);
typedef void(APIENTRYP PFNGLGETPERFQUERYIDBYNAMEINTELPROC)(GLchar* queryName, GLuint* queryId);
typedef void(APIENTRYP PFNGLGETPERFQUERYINFOINTELPROC)(GLuint queryId, GLuint queryNameLength, GLchar* queryName, GLuint* dataSize,
                                                       GLuint* noCounters, GLuint* noInstances, GLuint* capsMask);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBeginPerfQueryINTEL(GLuint queryHandle);
GLAPI void APIENTRY glCreatePerfQueryINTEL(GLuint queryId, GLuint* queryHandle);
GLAPI void APIENTRY glDeletePerfQueryINTEL(GLuint queryHandle);
GLAPI void APIENTRY glEndPerfQueryINTEL(GLuint queryHandle);
GLAPI void APIENTRY glGetFirstPerfQueryIdINTEL(GLuint* queryId);
GLAPI void APIENTRY glGetNextPerfQueryIdINTEL(GLuint queryId, GLuint* nextQueryId);
GLAPI void APIENTRY glGetPerfCounterInfoINTEL(GLuint queryId, GLuint counterId, GLuint counterNameLength, GLchar* counterName,
                                              GLuint counterDescLength, GLchar* counterDesc, GLuint* counterOffset, GLuint* counterDataSize,
                                              GLuint* counterTypeEnum, GLuint* counterDataTypeEnum, GLuint64* rawCounterMaxValue);
GLAPI void APIENTRY glGetPerfQueryDataINTEL(GLuint queryHandle, GLuint flags, GLsizei dataSize, void* data, GLuint* bytesWritten);
GLAPI void APIENTRY glGetPerfQueryIdByNameINTEL(GLchar* queryName, GLuint* queryId);
GLAPI void APIENTRY glGetPerfQueryInfoINTEL(GLuint queryId, GLuint queryNameLength, GLchar* queryName, GLuint* dataSize, GLuint* noCounters,
                                            GLuint* noInstances, GLuint* capsMask);
#endif
#endif /* GL_INTEL_performance_query */

#ifndef GL_MESA_framebuffer_flip_x
#define GL_MESA_framebuffer_flip_x 1
#define GL_FRAMEBUFFER_FLIP_X_MESA 0x8BBC
#endif /* GL_MESA_framebuffer_flip_x */

#ifndef GL_MESA_framebuffer_flip_y
#define GL_MESA_framebuffer_flip_y 1
#define GL_FRAMEBUFFER_FLIP_Y_MESA 0x8BBB
typedef void(APIENTRYP PFNGLFRAMEBUFFERPARAMETERIMESAPROC)(GLenum target, GLenum pname, GLint param);
typedef void(APIENTRYP PFNGLGETFRAMEBUFFERPARAMETERIVMESAPROC)(GLenum target, GLenum pname, GLint* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFramebufferParameteriMESA(GLenum target, GLenum pname, GLint param);
GLAPI void APIENTRY glGetFramebufferParameterivMESA(GLenum target, GLenum pname, GLint* params);
#endif
#endif /* GL_MESA_framebuffer_flip_y */

#ifndef GL_MESA_framebuffer_swap_xy
#define GL_MESA_framebuffer_swap_xy 1
#define GL_FRAMEBUFFER_SWAP_XY_MESA 0x8BBD
#endif /* GL_MESA_framebuffer_swap_xy */

#ifndef GL_NV_bindless_multi_draw_indirect
#define GL_NV_bindless_multi_draw_indirect 1
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSNVPROC)(GLenum mode, const void* indirect, GLsizei drawCount, GLsizei stride,
                                                                   GLint vertexBufferCount);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSNVPROC)(GLenum mode, GLenum type, const void* indirect, GLsizei drawCount,
                                                                     GLsizei stride, GLint vertexBufferCount);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMultiDrawArraysIndirectBindlessNV(GLenum mode, const void* indirect, GLsizei drawCount, GLsizei stride,
                                                        GLint vertexBufferCount);
GLAPI void APIENTRY glMultiDrawElementsIndirectBindlessNV(GLenum mode, GLenum type, const void* indirect, GLsizei drawCount, GLsizei stride,
                                                          GLint vertexBufferCount);
#endif
#endif /* GL_NV_bindless_multi_draw_indirect */

#ifndef GL_NV_bindless_multi_draw_indirect_count
#define GL_NV_bindless_multi_draw_indirect_count 1
typedef void(APIENTRYP PFNGLMULTIDRAWARRAYSINDIRECTBINDLESSCOUNTNVPROC)(GLenum mode, const void* indirect, GLsizei drawCount,
                                                                        GLsizei maxDrawCount, GLsizei stride, GLint vertexBufferCount);
typedef void(APIENTRYP PFNGLMULTIDRAWELEMENTSINDIRECTBINDLESSCOUNTNVPROC)(GLenum mode, GLenum type, const void* indirect, GLsizei drawCount,
                                                                          GLsizei maxDrawCount, GLsizei stride, GLint vertexBufferCount);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMultiDrawArraysIndirectBindlessCountNV(GLenum mode, const void* indirect, GLsizei drawCount, GLsizei maxDrawCount,
                                                             GLsizei stride, GLint vertexBufferCount);
GLAPI void APIENTRY glMultiDrawElementsIndirectBindlessCountNV(GLenum mode, GLenum type, const void* indirect, GLsizei drawCount,
                                                               GLsizei maxDrawCount, GLsizei stride, GLint vertexBufferCount);
#endif
#endif /* GL_NV_bindless_multi_draw_indirect_count */

#ifndef GL_NV_bindless_texture
#define GL_NV_bindless_texture 1
typedef GLuint64(APIENTRYP PFNGLGETTEXTUREHANDLENVPROC)(GLuint texture);
typedef GLuint64(APIENTRYP PFNGLGETTEXTURESAMPLERHANDLENVPROC)(GLuint texture, GLuint sampler);
typedef void(APIENTRYP PFNGLMAKETEXTUREHANDLERESIDENTNVPROC)(GLuint64 handle);
typedef void(APIENTRYP PFNGLMAKETEXTUREHANDLENONRESIDENTNVPROC)(GLuint64 handle);
typedef GLuint64(APIENTRYP PFNGLGETIMAGEHANDLENVPROC)(GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format);
typedef void(APIENTRYP PFNGLMAKEIMAGEHANDLERESIDENTNVPROC)(GLuint64 handle, GLenum access);
typedef void(APIENTRYP PFNGLMAKEIMAGEHANDLENONRESIDENTNVPROC)(GLuint64 handle);
typedef void(APIENTRYP PFNGLUNIFORMHANDLEUI64NVPROC)(GLint location, GLuint64 value);
typedef void(APIENTRYP PFNGLUNIFORMHANDLEUI64VNVPROC)(GLint location, GLsizei count, const GLuint64* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64NVPROC)(GLuint program, GLint location, GLuint64 value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMHANDLEUI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64* values);
typedef GLboolean(APIENTRYP PFNGLISTEXTUREHANDLERESIDENTNVPROC)(GLuint64 handle);
typedef GLboolean(APIENTRYP PFNGLISIMAGEHANDLERESIDENTNVPROC)(GLuint64 handle);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI GLuint64 APIENTRY glGetTextureHandleNV(GLuint texture);
GLAPI GLuint64 APIENTRY glGetTextureSamplerHandleNV(GLuint texture, GLuint sampler);
GLAPI void APIENTRY glMakeTextureHandleResidentNV(GLuint64 handle);
GLAPI void APIENTRY glMakeTextureHandleNonResidentNV(GLuint64 handle);
GLAPI GLuint64 APIENTRY glGetImageHandleNV(GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum format);
GLAPI void APIENTRY glMakeImageHandleResidentNV(GLuint64 handle, GLenum access);
GLAPI void APIENTRY glMakeImageHandleNonResidentNV(GLuint64 handle);
GLAPI void APIENTRY glUniformHandleui64NV(GLint location, GLuint64 value);
GLAPI void APIENTRY glUniformHandleui64vNV(GLint location, GLsizei count, const GLuint64* value);
GLAPI void APIENTRY glProgramUniformHandleui64NV(GLuint program, GLint location, GLuint64 value);
GLAPI void APIENTRY glProgramUniformHandleui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64* values);
GLAPI GLboolean APIENTRY glIsTextureHandleResidentNV(GLuint64 handle);
GLAPI GLboolean APIENTRY glIsImageHandleResidentNV(GLuint64 handle);
#endif
#endif /* GL_NV_bindless_texture */

#ifndef GL_NV_blend_equation_advanced
#define GL_NV_blend_equation_advanced 1
#define GL_BLEND_OVERLAP_NV 0x9281
#define GL_BLEND_PREMULTIPLIED_SRC_NV 0x9280
#define GL_BLUE_NV 0x1905
#define GL_COLORBURN_NV 0x929A
#define GL_COLORDODGE_NV 0x9299
#define GL_CONJOINT_NV 0x9284
#define GL_CONTRAST_NV 0x92A1
#define GL_DARKEN_NV 0x9297
#define GL_DIFFERENCE_NV 0x929E
#define GL_DISJOINT_NV 0x9283
#define GL_DST_ATOP_NV 0x928F
#define GL_DST_IN_NV 0x928B
#define GL_DST_NV 0x9287
#define GL_DST_OUT_NV 0x928D
#define GL_DST_OVER_NV 0x9289
#define GL_EXCLUSION_NV 0x92A0
#define GL_GREEN_NV 0x1904
#define GL_HARDLIGHT_NV 0x929B
#define GL_HARDMIX_NV 0x92A9
#define GL_HSL_COLOR_NV 0x92AF
#define GL_HSL_HUE_NV 0x92AD
#define GL_HSL_LUMINOSITY_NV 0x92B0
#define GL_HSL_SATURATION_NV 0x92AE
#define GL_INVERT_OVG_NV 0x92B4
#define GL_INVERT_RGB_NV 0x92A3
#define GL_LIGHTEN_NV 0x9298
#define GL_LINEARBURN_NV 0x92A5
#define GL_LINEARDODGE_NV 0x92A4
#define GL_LINEARLIGHT_NV 0x92A7
#define GL_MINUS_CLAMPED_NV 0x92B3
#define GL_MINUS_NV 0x929F
#define GL_MULTIPLY_NV 0x9294
#define GL_OVERLAY_NV 0x9296
#define GL_PINLIGHT_NV 0x92A8
#define GL_PLUS_CLAMPED_ALPHA_NV 0x92B2
#define GL_PLUS_CLAMPED_NV 0x92B1
#define GL_PLUS_DARKER_NV 0x9292
#define GL_PLUS_NV 0x9291
#define GL_RED_NV 0x1903
#define GL_SCREEN_NV 0x9295
#define GL_SOFTLIGHT_NV 0x929C
#define GL_SRC_ATOP_NV 0x928E
#define GL_SRC_IN_NV 0x928A
#define GL_SRC_NV 0x9286
#define GL_SRC_OUT_NV 0x928C
#define GL_SRC_OVER_NV 0x9288
#define GL_UNCORRELATED_NV 0x9282
#define GL_VIVIDLIGHT_NV 0x92A6
#define GL_XOR_NV 0x1506
typedef void(APIENTRYP PFNGLBLENDPARAMETERINVPROC)(GLenum pname, GLint value);
typedef void(APIENTRYP PFNGLBLENDBARRIERNVPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBlendParameteriNV(GLenum pname, GLint value);
GLAPI void APIENTRY glBlendBarrierNV(void);
#endif
#endif /* GL_NV_blend_equation_advanced */

#ifndef GL_NV_blend_equation_advanced_coherent
#define GL_NV_blend_equation_advanced_coherent 1
#define GL_BLEND_ADVANCED_COHERENT_NV 0x9285
#endif /* GL_NV_blend_equation_advanced_coherent */

#ifndef GL_NV_blend_minmax_factor
#define GL_NV_blend_minmax_factor 1
#define GL_FACTOR_MIN_AMD 0x901C
#define GL_FACTOR_MAX_AMD 0x901D
#endif /* GL_NV_blend_minmax_factor */

#ifndef GL_NV_clip_space_w_scaling
#define GL_NV_clip_space_w_scaling 1
#define GL_VIEWPORT_POSITION_W_SCALE_NV 0x937C
#define GL_VIEWPORT_POSITION_W_SCALE_X_COEFF_NV 0x937D
#define GL_VIEWPORT_POSITION_W_SCALE_Y_COEFF_NV 0x937E
typedef void(APIENTRYP PFNGLVIEWPORTPOSITIONWSCALENVPROC)(GLuint index, GLfloat xcoeff, GLfloat ycoeff);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glViewportPositionWScaleNV(GLuint index, GLfloat xcoeff, GLfloat ycoeff);
#endif
#endif /* GL_NV_clip_space_w_scaling */

#ifndef GL_NV_command_list
#define GL_NV_command_list 1
#define GL_TERMINATE_SEQUENCE_COMMAND_NV 0x0000
#define GL_NOP_COMMAND_NV 0x0001
#define GL_DRAW_ELEMENTS_COMMAND_NV 0x0002
#define GL_DRAW_ARRAYS_COMMAND_NV 0x0003
#define GL_DRAW_ELEMENTS_STRIP_COMMAND_NV 0x0004
#define GL_DRAW_ARRAYS_STRIP_COMMAND_NV 0x0005
#define GL_DRAW_ELEMENTS_INSTANCED_COMMAND_NV 0x0006
#define GL_DRAW_ARRAYS_INSTANCED_COMMAND_NV 0x0007
#define GL_ELEMENT_ADDRESS_COMMAND_NV 0x0008
#define GL_ATTRIBUTE_ADDRESS_COMMAND_NV 0x0009
#define GL_UNIFORM_ADDRESS_COMMAND_NV 0x000A
#define GL_BLEND_COLOR_COMMAND_NV 0x000B
#define GL_STENCIL_REF_COMMAND_NV 0x000C
#define GL_LINE_WIDTH_COMMAND_NV 0x000D
#define GL_POLYGON_OFFSET_COMMAND_NV 0x000E
#define GL_ALPHA_REF_COMMAND_NV 0x000F
#define GL_VIEWPORT_COMMAND_NV 0x0010
#define GL_SCISSOR_COMMAND_NV 0x0011
#define GL_FRONT_FACE_COMMAND_NV 0x0012
typedef void(APIENTRYP PFNGLCREATESTATESNVPROC)(GLsizei n, GLuint* states);
typedef void(APIENTRYP PFNGLDELETESTATESNVPROC)(GLsizei n, const GLuint* states);
typedef GLboolean(APIENTRYP PFNGLISSTATENVPROC)(GLuint state);
typedef void(APIENTRYP PFNGLSTATECAPTURENVPROC)(GLuint state, GLenum mode);
typedef GLuint(APIENTRYP PFNGLGETCOMMANDHEADERNVPROC)(GLenum tokenID, GLuint size);
typedef GLushort(APIENTRYP PFNGLGETSTAGEINDEXNVPROC)(GLenum shadertype);
typedef void(APIENTRYP PFNGLDRAWCOMMANDSNVPROC)(GLenum primitiveMode, GLuint buffer, const GLintptr* indirects, const GLsizei* sizes, GLuint count);
typedef void(APIENTRYP PFNGLDRAWCOMMANDSADDRESSNVPROC)(GLenum primitiveMode, const GLuint64* indirects, const GLsizei* sizes, GLuint count);
typedef void(APIENTRYP PFNGLDRAWCOMMANDSSTATESNVPROC)(GLuint buffer, const GLintptr* indirects, const GLsizei* sizes, const GLuint* states,
                                                      const GLuint* fbos, GLuint count);
typedef void(APIENTRYP PFNGLDRAWCOMMANDSSTATESADDRESSNVPROC)(const GLuint64* indirects, const GLsizei* sizes, const GLuint* states,
                                                             const GLuint* fbos, GLuint count);
typedef void(APIENTRYP PFNGLCREATECOMMANDLISTSNVPROC)(GLsizei n, GLuint* lists);
typedef void(APIENTRYP PFNGLDELETECOMMANDLISTSNVPROC)(GLsizei n, const GLuint* lists);
typedef GLboolean(APIENTRYP PFNGLISCOMMANDLISTNVPROC)(GLuint list);
typedef void(APIENTRYP PFNGLLISTDRAWCOMMANDSSTATESCLIENTNVPROC)(GLuint list, GLuint segment, const void** indirects, const GLsizei* sizes,
                                                                const GLuint* states, const GLuint* fbos, GLuint count);
typedef void(APIENTRYP PFNGLCOMMANDLISTSEGMENTSNVPROC)(GLuint list, GLuint segments);
typedef void(APIENTRYP PFNGLCOMPILECOMMANDLISTNVPROC)(GLuint list);
typedef void(APIENTRYP PFNGLCALLCOMMANDLISTNVPROC)(GLuint list);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glCreateStatesNV(GLsizei n, GLuint* states);
GLAPI void APIENTRY glDeleteStatesNV(GLsizei n, const GLuint* states);
GLAPI GLboolean APIENTRY glIsStateNV(GLuint state);
GLAPI void APIENTRY glStateCaptureNV(GLuint state, GLenum mode);
GLAPI GLuint APIENTRY glGetCommandHeaderNV(GLenum tokenID, GLuint size);
GLAPI GLushort APIENTRY glGetStageIndexNV(GLenum shadertype);
GLAPI void APIENTRY glDrawCommandsNV(GLenum primitiveMode, GLuint buffer, const GLintptr* indirects, const GLsizei* sizes, GLuint count);
GLAPI void APIENTRY glDrawCommandsAddressNV(GLenum primitiveMode, const GLuint64* indirects, const GLsizei* sizes, GLuint count);
GLAPI void APIENTRY glDrawCommandsStatesNV(GLuint buffer, const GLintptr* indirects, const GLsizei* sizes, const GLuint* states,
                                           const GLuint* fbos, GLuint count);
GLAPI void APIENTRY glDrawCommandsStatesAddressNV(const GLuint64* indirects, const GLsizei* sizes, const GLuint* states, const GLuint* fbos,
                                                  GLuint count);
GLAPI void APIENTRY glCreateCommandListsNV(GLsizei n, GLuint* lists);
GLAPI void APIENTRY glDeleteCommandListsNV(GLsizei n, const GLuint* lists);
GLAPI GLboolean APIENTRY glIsCommandListNV(GLuint list);
GLAPI void APIENTRY glListDrawCommandsStatesClientNV(GLuint list, GLuint segment, const void** indirects, const GLsizei* sizes,
                                                     const GLuint* states, const GLuint* fbos, GLuint count);
GLAPI void APIENTRY glCommandListSegmentsNV(GLuint list, GLuint segments);
GLAPI void APIENTRY glCompileCommandListNV(GLuint list);
GLAPI void APIENTRY glCallCommandListNV(GLuint list);
#endif
#endif /* GL_NV_command_list */

#ifndef GL_NV_compute_shader_derivatives
#define GL_NV_compute_shader_derivatives 1
#endif /* GL_NV_compute_shader_derivatives */

#ifndef GL_NV_conditional_render
#define GL_NV_conditional_render 1
#define GL_QUERY_WAIT_NV 0x8E13
#define GL_QUERY_NO_WAIT_NV 0x8E14
#define GL_QUERY_BY_REGION_WAIT_NV 0x8E15
#define GL_QUERY_BY_REGION_NO_WAIT_NV 0x8E16
typedef void(APIENTRYP PFNGLBEGINCONDITIONALRENDERNVPROC)(GLuint id, GLenum mode);
typedef void(APIENTRYP PFNGLENDCONDITIONALRENDERNVPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBeginConditionalRenderNV(GLuint id, GLenum mode);
GLAPI void APIENTRY glEndConditionalRenderNV(void);
#endif
#endif /* GL_NV_conditional_render */

#ifndef GL_NV_conservative_raster
#define GL_NV_conservative_raster 1
#define GL_CONSERVATIVE_RASTERIZATION_NV 0x9346
#define GL_SUBPIXEL_PRECISION_BIAS_X_BITS_NV 0x9347
#define GL_SUBPIXEL_PRECISION_BIAS_Y_BITS_NV 0x9348
#define GL_MAX_SUBPIXEL_PRECISION_BIAS_BITS_NV 0x9349
typedef void(APIENTRYP PFNGLSUBPIXELPRECISIONBIASNVPROC)(GLuint xbits, GLuint ybits);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glSubpixelPrecisionBiasNV(GLuint xbits, GLuint ybits);
#endif
#endif /* GL_NV_conservative_raster */

#ifndef GL_NV_conservative_raster_dilate
#define GL_NV_conservative_raster_dilate 1
#define GL_CONSERVATIVE_RASTER_DILATE_NV 0x9379
#define GL_CONSERVATIVE_RASTER_DILATE_RANGE_NV 0x937A
#define GL_CONSERVATIVE_RASTER_DILATE_GRANULARITY_NV 0x937B
typedef void(APIENTRYP PFNGLCONSERVATIVERASTERPARAMETERFNVPROC)(GLenum pname, GLfloat value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glConservativeRasterParameterfNV(GLenum pname, GLfloat value);
#endif
#endif /* GL_NV_conservative_raster_dilate */

#ifndef GL_NV_conservative_raster_pre_snap
#define GL_NV_conservative_raster_pre_snap 1
#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_NV 0x9550
#endif /* GL_NV_conservative_raster_pre_snap */

#ifndef GL_NV_conservative_raster_pre_snap_triangles
#define GL_NV_conservative_raster_pre_snap_triangles 1
#define GL_CONSERVATIVE_RASTER_MODE_NV 0x954D
#define GL_CONSERVATIVE_RASTER_MODE_POST_SNAP_NV 0x954E
#define GL_CONSERVATIVE_RASTER_MODE_PRE_SNAP_TRIANGLES_NV 0x954F
typedef void(APIENTRYP PFNGLCONSERVATIVERASTERPARAMETERINVPROC)(GLenum pname, GLint param);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glConservativeRasterParameteriNV(GLenum pname, GLint param);
#endif
#endif /* GL_NV_conservative_raster_pre_snap_triangles */

#ifndef GL_NV_conservative_raster_underestimation
#define GL_NV_conservative_raster_underestimation 1
#endif /* GL_NV_conservative_raster_underestimation */

#ifndef GL_NV_depth_buffer_float
#define GL_NV_depth_buffer_float 1
#define GL_DEPTH_COMPONENT32F_NV 0x8DAB
#define GL_DEPTH32F_STENCIL8_NV 0x8DAC
#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV_NV 0x8DAD
#define GL_DEPTH_BUFFER_FLOAT_MODE_NV 0x8DAF
typedef void(APIENTRYP PFNGLDEPTHRANGEDNVPROC)(GLdouble zNear, GLdouble zFar);
typedef void(APIENTRYP PFNGLCLEARDEPTHDNVPROC)(GLdouble depth);
typedef void(APIENTRYP PFNGLDEPTHBOUNDSDNVPROC)(GLdouble zmin, GLdouble zmax);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDepthRangedNV(GLdouble zNear, GLdouble zFar);
GLAPI void APIENTRY glClearDepthdNV(GLdouble depth);
GLAPI void APIENTRY glDepthBoundsdNV(GLdouble zmin, GLdouble zmax);
#endif
#endif /* GL_NV_depth_buffer_float */

#ifndef GL_NV_draw_vulkan_image
#define GL_NV_draw_vulkan_image 1
typedef void(APIENTRY* GLVULKANPROCNV)(void);
typedef void(APIENTRYP PFNGLDRAWVKIMAGENVPROC)(GLuint64 vkImage, GLuint sampler, GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, GLfloat z,
                                               GLfloat s0, GLfloat t0, GLfloat s1, GLfloat t1);
typedef GLVULKANPROCNV(APIENTRYP PFNGLGETVKPROCADDRNVPROC)(const GLchar* name);
typedef void(APIENTRYP PFNGLWAITVKSEMAPHORENVPROC)(GLuint64 vkSemaphore);
typedef void(APIENTRYP PFNGLSIGNALVKSEMAPHORENVPROC)(GLuint64 vkSemaphore);
typedef void(APIENTRYP PFNGLSIGNALVKFENCENVPROC)(GLuint64 vkFence);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawVkImageNV(GLuint64 vkImage, GLuint sampler, GLfloat x0, GLfloat y0, GLfloat x1, GLfloat y1, GLfloat z, GLfloat s0,
                                    GLfloat t0, GLfloat s1, GLfloat t1);
GLAPI GLVULKANPROCNV APIENTRY glGetVkProcAddrNV(const GLchar* name);
GLAPI void APIENTRY glWaitVkSemaphoreNV(GLuint64 vkSemaphore);
GLAPI void APIENTRY glSignalVkSemaphoreNV(GLuint64 vkSemaphore);
GLAPI void APIENTRY glSignalVkFenceNV(GLuint64 vkFence);
#endif
#endif /* GL_NV_draw_vulkan_image */

#ifndef GL_NV_fill_rectangle
#define GL_NV_fill_rectangle 1
#define GL_FILL_RECTANGLE_NV 0x933C
#endif /* GL_NV_fill_rectangle */

#ifndef GL_NV_fragment_coverage_to_color
#define GL_NV_fragment_coverage_to_color 1
#define GL_FRAGMENT_COVERAGE_TO_COLOR_NV 0x92DD
#define GL_FRAGMENT_COVERAGE_COLOR_NV 0x92DE
typedef void(APIENTRYP PFNGLFRAGMENTCOVERAGECOLORNVPROC)(GLuint color);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFragmentCoverageColorNV(GLuint color);
#endif
#endif /* GL_NV_fragment_coverage_to_color */

#ifndef GL_NV_fragment_shader_barycentric
#define GL_NV_fragment_shader_barycentric 1
#endif /* GL_NV_fragment_shader_barycentric */

#ifndef GL_NV_fragment_shader_interlock
#define GL_NV_fragment_shader_interlock 1
#endif /* GL_NV_fragment_shader_interlock */

#ifndef GL_NV_framebuffer_mixed_samples
#define GL_NV_framebuffer_mixed_samples 1
#define GL_COVERAGE_MODULATION_TABLE_NV 0x9331
#define GL_COLOR_SAMPLES_NV 0x8E20
#define GL_DEPTH_SAMPLES_NV 0x932D
#define GL_STENCIL_SAMPLES_NV 0x932E
#define GL_MIXED_DEPTH_SAMPLES_SUPPORTED_NV 0x932F
#define GL_MIXED_STENCIL_SAMPLES_SUPPORTED_NV 0x9330
#define GL_COVERAGE_MODULATION_NV 0x9332
#define GL_COVERAGE_MODULATION_TABLE_SIZE_NV 0x9333
typedef void(APIENTRYP PFNGLCOVERAGEMODULATIONTABLENVPROC)(GLsizei n, const GLfloat* v);
typedef void(APIENTRYP PFNGLGETCOVERAGEMODULATIONTABLENVPROC)(GLsizei bufSize, GLfloat* v);
typedef void(APIENTRYP PFNGLCOVERAGEMODULATIONNVPROC)(GLenum components);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glCoverageModulationTableNV(GLsizei n, const GLfloat* v);
GLAPI void APIENTRY glGetCoverageModulationTableNV(GLsizei bufSize, GLfloat* v);
GLAPI void APIENTRY glCoverageModulationNV(GLenum components);
#endif
#endif /* GL_NV_framebuffer_mixed_samples */

#ifndef GL_NV_framebuffer_multisample_coverage
#define GL_NV_framebuffer_multisample_coverage 1
#define GL_RENDERBUFFER_COVERAGE_SAMPLES_NV 0x8CAB
#define GL_RENDERBUFFER_COLOR_SAMPLES_NV 0x8E10
#define GL_MAX_MULTISAMPLE_COVERAGE_MODES_NV 0x8E11
#define GL_MULTISAMPLE_COVERAGE_MODES_NV 0x8E12
typedef void(APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLECOVERAGENVPROC)(GLenum target, GLsizei coverageSamples, GLsizei colorSamples,
                                                                          GLenum internalformat, GLsizei width, GLsizei height);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glRenderbufferStorageMultisampleCoverageNV(GLenum target, GLsizei coverageSamples, GLsizei colorSamples,
                                                               GLenum internalformat, GLsizei width, GLsizei height);
#endif
#endif /* GL_NV_framebuffer_multisample_coverage */

#ifndef GL_NV_geometry_shader_passthrough
#define GL_NV_geometry_shader_passthrough 1
#endif /* GL_NV_geometry_shader_passthrough */

#ifndef GL_NV_gpu_shader5
#define GL_NV_gpu_shader5 1
typedef khronos_int64_t GLint64EXT;
#define GL_INT64_NV 0x140E
#define GL_UNSIGNED_INT64_NV 0x140F
#define GL_INT8_NV 0x8FE0
#define GL_INT8_VEC2_NV 0x8FE1
#define GL_INT8_VEC3_NV 0x8FE2
#define GL_INT8_VEC4_NV 0x8FE3
#define GL_INT16_NV 0x8FE4
#define GL_INT16_VEC2_NV 0x8FE5
#define GL_INT16_VEC3_NV 0x8FE6
#define GL_INT16_VEC4_NV 0x8FE7
#define GL_INT64_VEC2_NV 0x8FE9
#define GL_INT64_VEC3_NV 0x8FEA
#define GL_INT64_VEC4_NV 0x8FEB
#define GL_UNSIGNED_INT8_NV 0x8FEC
#define GL_UNSIGNED_INT8_VEC2_NV 0x8FED
#define GL_UNSIGNED_INT8_VEC3_NV 0x8FEE
#define GL_UNSIGNED_INT8_VEC4_NV 0x8FEF
#define GL_UNSIGNED_INT16_NV 0x8FF0
#define GL_UNSIGNED_INT16_VEC2_NV 0x8FF1
#define GL_UNSIGNED_INT16_VEC3_NV 0x8FF2
#define GL_UNSIGNED_INT16_VEC4_NV 0x8FF3
#define GL_UNSIGNED_INT64_VEC2_NV 0x8FF5
#define GL_UNSIGNED_INT64_VEC3_NV 0x8FF6
#define GL_UNSIGNED_INT64_VEC4_NV 0x8FF7
#define GL_FLOAT16_NV 0x8FF8
#define GL_FLOAT16_VEC2_NV 0x8FF9
#define GL_FLOAT16_VEC3_NV 0x8FFA
#define GL_FLOAT16_VEC4_NV 0x8FFB
typedef void(APIENTRYP PFNGLUNIFORM1I64NVPROC)(GLint location, GLint64EXT x);
typedef void(APIENTRYP PFNGLUNIFORM2I64NVPROC)(GLint location, GLint64EXT x, GLint64EXT y);
typedef void(APIENTRYP PFNGLUNIFORM3I64NVPROC)(GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z);
typedef void(APIENTRYP PFNGLUNIFORM4I64NVPROC)(GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
typedef void(APIENTRYP PFNGLUNIFORM1I64VNVPROC)(GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM2I64VNVPROC)(GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM3I64VNVPROC)(GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM4I64VNVPROC)(GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM1UI64NVPROC)(GLint location, GLuint64EXT x);
typedef void(APIENTRYP PFNGLUNIFORM2UI64NVPROC)(GLint location, GLuint64EXT x, GLuint64EXT y);
typedef void(APIENTRYP PFNGLUNIFORM3UI64NVPROC)(GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
typedef void(APIENTRYP PFNGLUNIFORM4UI64NVPROC)(GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
typedef void(APIENTRYP PFNGLUNIFORM1UI64VNVPROC)(GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM2UI64VNVPROC)(GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM3UI64VNVPROC)(GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLUNIFORM4UI64VNVPROC)(GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLGETUNIFORMI64VNVPROC)(GLuint program, GLint location, GLint64EXT* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1I64NVPROC)(GLuint program, GLint location, GLint64EXT x);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2I64NVPROC)(GLuint program, GLint location, GLint64EXT x, GLint64EXT y);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3I64NVPROC)(GLuint program, GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4I64NVPROC)(GLuint program, GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1I64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2I64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3I64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4I64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UI64NVPROC)(GLuint program, GLint location, GLuint64EXT x);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UI64NVPROC)(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UI64NVPROC)(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UI64NVPROC)(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM1UI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM2UI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM3UI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORM4UI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glUniform1i64NV(GLint location, GLint64EXT x);
GLAPI void APIENTRY glUniform2i64NV(GLint location, GLint64EXT x, GLint64EXT y);
GLAPI void APIENTRY glUniform3i64NV(GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z);
GLAPI void APIENTRY glUniform4i64NV(GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
GLAPI void APIENTRY glUniform1i64vNV(GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glUniform2i64vNV(GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glUniform3i64vNV(GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glUniform4i64vNV(GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glUniform1ui64NV(GLint location, GLuint64EXT x);
GLAPI void APIENTRY glUniform2ui64NV(GLint location, GLuint64EXT x, GLuint64EXT y);
GLAPI void APIENTRY glUniform3ui64NV(GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
GLAPI void APIENTRY glUniform4ui64NV(GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
GLAPI void APIENTRY glUniform1ui64vNV(GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glUniform2ui64vNV(GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glUniform3ui64vNV(GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glUniform4ui64vNV(GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glGetUniformi64vNV(GLuint program, GLint location, GLint64EXT* params);
GLAPI void APIENTRY glProgramUniform1i64NV(GLuint program, GLint location, GLint64EXT x);
GLAPI void APIENTRY glProgramUniform2i64NV(GLuint program, GLint location, GLint64EXT x, GLint64EXT y);
GLAPI void APIENTRY glProgramUniform3i64NV(GLuint program, GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z);
GLAPI void APIENTRY glProgramUniform4i64NV(GLuint program, GLint location, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
GLAPI void APIENTRY glProgramUniform1i64vNV(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glProgramUniform2i64vNV(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glProgramUniform3i64vNV(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glProgramUniform4i64vNV(GLuint program, GLint location, GLsizei count, const GLint64EXT* value);
GLAPI void APIENTRY glProgramUniform1ui64NV(GLuint program, GLint location, GLuint64EXT x);
GLAPI void APIENTRY glProgramUniform2ui64NV(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y);
GLAPI void APIENTRY glProgramUniform3ui64NV(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
GLAPI void APIENTRY glProgramUniform4ui64NV(GLuint program, GLint location, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
GLAPI void APIENTRY glProgramUniform1ui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glProgramUniform2ui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glProgramUniform3ui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glProgramUniform4ui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
#endif
#endif /* GL_NV_gpu_shader5 */

#ifndef GL_NV_internalformat_sample_query
#define GL_NV_internalformat_sample_query 1
#define GL_MULTISAMPLES_NV 0x9371
#define GL_SUPERSAMPLE_SCALE_X_NV 0x9372
#define GL_SUPERSAMPLE_SCALE_Y_NV 0x9373
#define GL_CONFORMANT_NV 0x9374
typedef void(APIENTRYP PFNGLGETINTERNALFORMATSAMPLEIVNVPROC)(GLenum target, GLenum internalformat, GLsizei samples, GLenum pname,
                                                             GLsizei count, GLint* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glGetInternalformatSampleivNV(GLenum target, GLenum internalformat, GLsizei samples, GLenum pname, GLsizei count, GLint* params);
#endif
#endif /* GL_NV_internalformat_sample_query */

#ifndef GL_NV_memory_attachment
#define GL_NV_memory_attachment 1
#define GL_ATTACHED_MEMORY_OBJECT_NV 0x95A4
#define GL_ATTACHED_MEMORY_OFFSET_NV 0x95A5
#define GL_MEMORY_ATTACHABLE_ALIGNMENT_NV 0x95A6
#define GL_MEMORY_ATTACHABLE_SIZE_NV 0x95A7
#define GL_MEMORY_ATTACHABLE_NV 0x95A8
#define GL_DETACHED_MEMORY_INCARNATION_NV 0x95A9
#define GL_DETACHED_TEXTURES_NV 0x95AA
#define GL_DETACHED_BUFFERS_NV 0x95AB
#define GL_MAX_DETACHED_TEXTURES_NV 0x95AC
#define GL_MAX_DETACHED_BUFFERS_NV 0x95AD
typedef void(APIENTRYP PFNGLGETMEMORYOBJECTDETACHEDRESOURCESUIVNVPROC)(GLuint memory, GLenum pname, GLint first, GLsizei count, GLuint* params);
typedef void(APIENTRYP PFNGLRESETMEMORYOBJECTPARAMETERNVPROC)(GLuint memory, GLenum pname);
typedef void(APIENTRYP PFNGLTEXATTACHMEMORYNVPROC)(GLenum target, GLuint memory, GLuint64 offset);
typedef void(APIENTRYP PFNGLBUFFERATTACHMEMORYNVPROC)(GLenum target, GLuint memory, GLuint64 offset);
typedef void(APIENTRYP PFNGLTEXTUREATTACHMEMORYNVPROC)(GLuint texture, GLuint memory, GLuint64 offset);
typedef void(APIENTRYP PFNGLNAMEDBUFFERATTACHMEMORYNVPROC)(GLuint buffer, GLuint memory, GLuint64 offset);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glGetMemoryObjectDetachedResourcesuivNV(GLuint memory, GLenum pname, GLint first, GLsizei count, GLuint* params);
GLAPI void APIENTRY glResetMemoryObjectParameterNV(GLuint memory, GLenum pname);
GLAPI void APIENTRY glTexAttachMemoryNV(GLenum target, GLuint memory, GLuint64 offset);
GLAPI void APIENTRY glBufferAttachMemoryNV(GLenum target, GLuint memory, GLuint64 offset);
GLAPI void APIENTRY glTextureAttachMemoryNV(GLuint texture, GLuint memory, GLuint64 offset);
GLAPI void APIENTRY glNamedBufferAttachMemoryNV(GLuint buffer, GLuint memory, GLuint64 offset);
#endif
#endif /* GL_NV_memory_attachment */

#ifndef GL_NV_mesh_shader
#define GL_NV_mesh_shader 1
#define GL_MESH_SHADER_NV 0x9559
#define GL_TASK_SHADER_NV 0x955A
#define GL_MAX_MESH_UNIFORM_BLOCKS_NV 0x8E60
#define GL_MAX_MESH_TEXTURE_IMAGE_UNITS_NV 0x8E61
#define GL_MAX_MESH_IMAGE_UNIFORMS_NV 0x8E62
#define GL_MAX_MESH_UNIFORM_COMPONENTS_NV 0x8E63
#define GL_MAX_MESH_ATOMIC_COUNTER_BUFFERS_NV 0x8E64
#define GL_MAX_MESH_ATOMIC_COUNTERS_NV 0x8E65
#define GL_MAX_MESH_SHADER_STORAGE_BLOCKS_NV 0x8E66
#define GL_MAX_COMBINED_MESH_UNIFORM_COMPONENTS_NV 0x8E67
#define GL_MAX_TASK_UNIFORM_BLOCKS_NV 0x8E68
#define GL_MAX_TASK_TEXTURE_IMAGE_UNITS_NV 0x8E69
#define GL_MAX_TASK_IMAGE_UNIFORMS_NV 0x8E6A
#define GL_MAX_TASK_UNIFORM_COMPONENTS_NV 0x8E6B
#define GL_MAX_TASK_ATOMIC_COUNTER_BUFFERS_NV 0x8E6C
#define GL_MAX_TASK_ATOMIC_COUNTERS_NV 0x8E6D
#define GL_MAX_TASK_SHADER_STORAGE_BLOCKS_NV 0x8E6E
#define GL_MAX_COMBINED_TASK_UNIFORM_COMPONENTS_NV 0x8E6F
#define GL_MAX_MESH_WORK_GROUP_INVOCATIONS_NV 0x95A2
#define GL_MAX_TASK_WORK_GROUP_INVOCATIONS_NV 0x95A3
#define GL_MAX_MESH_TOTAL_MEMORY_SIZE_NV 0x9536
#define GL_MAX_TASK_TOTAL_MEMORY_SIZE_NV 0x9537
#define GL_MAX_MESH_OUTPUT_VERTICES_NV 0x9538
#define GL_MAX_MESH_OUTPUT_PRIMITIVES_NV 0x9539
#define GL_MAX_TASK_OUTPUT_COUNT_NV 0x953A
#define GL_MAX_DRAW_MESH_TASKS_COUNT_NV 0x953D
#define GL_MAX_MESH_VIEWS_NV 0x9557
#define GL_MESH_OUTPUT_PER_VERTEX_GRANULARITY_NV 0x92DF
#define GL_MESH_OUTPUT_PER_PRIMITIVE_GRANULARITY_NV 0x9543
#define GL_MAX_MESH_WORK_GROUP_SIZE_NV 0x953B
#define GL_MAX_TASK_WORK_GROUP_SIZE_NV 0x953C
#define GL_MESH_WORK_GROUP_SIZE_NV 0x953E
#define GL_TASK_WORK_GROUP_SIZE_NV 0x953F
#define GL_MESH_VERTICES_OUT_NV 0x9579
#define GL_MESH_PRIMITIVES_OUT_NV 0x957A
#define GL_MESH_OUTPUT_TYPE_NV 0x957B
#define GL_UNIFORM_BLOCK_REFERENCED_BY_MESH_SHADER_NV 0x959C
#define GL_UNIFORM_BLOCK_REFERENCED_BY_TASK_SHADER_NV 0x959D
#define GL_REFERENCED_BY_MESH_SHADER_NV 0x95A0
#define GL_REFERENCED_BY_TASK_SHADER_NV 0x95A1
#define GL_MESH_SHADER_BIT_NV 0x00000040
#define GL_TASK_SHADER_BIT_NV 0x00000080
#define GL_MESH_SUBROUTINE_NV 0x957C
#define GL_TASK_SUBROUTINE_NV 0x957D
#define GL_MESH_SUBROUTINE_UNIFORM_NV 0x957E
#define GL_TASK_SUBROUTINE_UNIFORM_NV 0x957F
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_MESH_SHADER_NV 0x959E
#define GL_ATOMIC_COUNTER_BUFFER_REFERENCED_BY_TASK_SHADER_NV 0x959F
typedef void(APIENTRYP PFNGLDRAWMESHTASKSNVPROC)(GLuint first, GLuint count);
typedef void(APIENTRYP PFNGLDRAWMESHTASKSINDIRECTNVPROC)(GLintptr indirect);
typedef void(APIENTRYP PFNGLMULTIDRAWMESHTASKSINDIRECTNVPROC)(GLintptr indirect, GLsizei drawcount, GLsizei stride);
typedef void(APIENTRYP PFNGLMULTIDRAWMESHTASKSINDIRECTCOUNTNVPROC)(GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glDrawMeshTasksNV(GLuint first, GLuint count);
GLAPI void APIENTRY glDrawMeshTasksIndirectNV(GLintptr indirect);
GLAPI void APIENTRY glMultiDrawMeshTasksIndirectNV(GLintptr indirect, GLsizei drawcount, GLsizei stride);
GLAPI void APIENTRY glMultiDrawMeshTasksIndirectCountNV(GLintptr indirect, GLintptr drawcount, GLsizei maxdrawcount, GLsizei stride);
#endif
#endif /* GL_NV_mesh_shader */

#ifndef GL_NV_path_rendering
#define GL_NV_path_rendering 1
#define GL_PATH_FORMAT_SVG_NV 0x9070
#define GL_PATH_FORMAT_PS_NV 0x9071
#define GL_STANDARD_FONT_NAME_NV 0x9072
#define GL_SYSTEM_FONT_NAME_NV 0x9073
#define GL_FILE_NAME_NV 0x9074
#define GL_PATH_STROKE_WIDTH_NV 0x9075
#define GL_PATH_END_CAPS_NV 0x9076
#define GL_PATH_INITIAL_END_CAP_NV 0x9077
#define GL_PATH_TERMINAL_END_CAP_NV 0x9078
#define GL_PATH_JOIN_STYLE_NV 0x9079
#define GL_PATH_MITER_LIMIT_NV 0x907A
#define GL_PATH_DASH_CAPS_NV 0x907B
#define GL_PATH_INITIAL_DASH_CAP_NV 0x907C
#define GL_PATH_TERMINAL_DASH_CAP_NV 0x907D
#define GL_PATH_DASH_OFFSET_NV 0x907E
#define GL_PATH_CLIENT_LENGTH_NV 0x907F
#define GL_PATH_FILL_MODE_NV 0x9080
#define GL_PATH_FILL_MASK_NV 0x9081
#define GL_PATH_FILL_COVER_MODE_NV 0x9082
#define GL_PATH_STROKE_COVER_MODE_NV 0x9083
#define GL_PATH_STROKE_MASK_NV 0x9084
#define GL_COUNT_UP_NV 0x9088
#define GL_COUNT_DOWN_NV 0x9089
#define GL_PATH_OBJECT_BOUNDING_BOX_NV 0x908A
#define GL_CONVEX_HULL_NV 0x908B
#define GL_BOUNDING_BOX_NV 0x908D
#define GL_TRANSLATE_X_NV 0x908E
#define GL_TRANSLATE_Y_NV 0x908F
#define GL_TRANSLATE_2D_NV 0x9090
#define GL_TRANSLATE_3D_NV 0x9091
#define GL_AFFINE_2D_NV 0x9092
#define GL_AFFINE_3D_NV 0x9094
#define GL_TRANSPOSE_AFFINE_2D_NV 0x9096
#define GL_TRANSPOSE_AFFINE_3D_NV 0x9098
#define GL_UTF8_NV 0x909A
#define GL_UTF16_NV 0x909B
#define GL_BOUNDING_BOX_OF_BOUNDING_BOXES_NV 0x909C
#define GL_PATH_COMMAND_COUNT_NV 0x909D
#define GL_PATH_COORD_COUNT_NV 0x909E
#define GL_PATH_DASH_ARRAY_COUNT_NV 0x909F
#define GL_PATH_COMPUTED_LENGTH_NV 0x90A0
#define GL_PATH_FILL_BOUNDING_BOX_NV 0x90A1
#define GL_PATH_STROKE_BOUNDING_BOX_NV 0x90A2
#define GL_SQUARE_NV 0x90A3
#define GL_ROUND_NV 0x90A4
#define GL_TRIANGULAR_NV 0x90A5
#define GL_BEVEL_NV 0x90A6
#define GL_MITER_REVERT_NV 0x90A7
#define GL_MITER_TRUNCATE_NV 0x90A8
#define GL_SKIP_MISSING_GLYPH_NV 0x90A9
#define GL_USE_MISSING_GLYPH_NV 0x90AA
#define GL_PATH_ERROR_POSITION_NV 0x90AB
#define GL_ACCUM_ADJACENT_PAIRS_NV 0x90AD
#define GL_ADJACENT_PAIRS_NV 0x90AE
#define GL_FIRST_TO_REST_NV 0x90AF
#define GL_PATH_GEN_MODE_NV 0x90B0
#define GL_PATH_GEN_COEFF_NV 0x90B1
#define GL_PATH_GEN_COMPONENTS_NV 0x90B3
#define GL_PATH_STENCIL_FUNC_NV 0x90B7
#define GL_PATH_STENCIL_REF_NV 0x90B8
#define GL_PATH_STENCIL_VALUE_MASK_NV 0x90B9
#define GL_PATH_STENCIL_DEPTH_OFFSET_FACTOR_NV 0x90BD
#define GL_PATH_STENCIL_DEPTH_OFFSET_UNITS_NV 0x90BE
#define GL_PATH_COVER_DEPTH_FUNC_NV 0x90BF
#define GL_PATH_DASH_OFFSET_RESET_NV 0x90B4
#define GL_MOVE_TO_RESETS_NV 0x90B5
#define GL_MOVE_TO_CONTINUES_NV 0x90B6
#define GL_CLOSE_PATH_NV 0x00
#define GL_MOVE_TO_NV 0x02
#define GL_RELATIVE_MOVE_TO_NV 0x03
#define GL_LINE_TO_NV 0x04
#define GL_RELATIVE_LINE_TO_NV 0x05
#define GL_HORIZONTAL_LINE_TO_NV 0x06
#define GL_RELATIVE_HORIZONTAL_LINE_TO_NV 0x07
#define GL_VERTICAL_LINE_TO_NV 0x08
#define GL_RELATIVE_VERTICAL_LINE_TO_NV 0x09
#define GL_QUADRATIC_CURVE_TO_NV 0x0A
#define GL_RELATIVE_QUADRATIC_CURVE_TO_NV 0x0B
#define GL_CUBIC_CURVE_TO_NV 0x0C
#define GL_RELATIVE_CUBIC_CURVE_TO_NV 0x0D
#define GL_SMOOTH_QUADRATIC_CURVE_TO_NV 0x0E
#define GL_RELATIVE_SMOOTH_QUADRATIC_CURVE_TO_NV 0x0F
#define GL_SMOOTH_CUBIC_CURVE_TO_NV 0x10
#define GL_RELATIVE_SMOOTH_CUBIC_CURVE_TO_NV 0x11
#define GL_SMALL_CCW_ARC_TO_NV 0x12
#define GL_RELATIVE_SMALL_CCW_ARC_TO_NV 0x13
#define GL_SMALL_CW_ARC_TO_NV 0x14
#define GL_RELATIVE_SMALL_CW_ARC_TO_NV 0x15
#define GL_LARGE_CCW_ARC_TO_NV 0x16
#define GL_RELATIVE_LARGE_CCW_ARC_TO_NV 0x17
#define GL_LARGE_CW_ARC_TO_NV 0x18
#define GL_RELATIVE_LARGE_CW_ARC_TO_NV 0x19
#define GL_RESTART_PATH_NV 0xF0
#define GL_DUP_FIRST_CUBIC_CURVE_TO_NV 0xF2
#define GL_DUP_LAST_CUBIC_CURVE_TO_NV 0xF4
#define GL_RECT_NV 0xF6
#define GL_CIRCULAR_CCW_ARC_TO_NV 0xF8
#define GL_CIRCULAR_CW_ARC_TO_NV 0xFA
#define GL_CIRCULAR_TANGENT_ARC_TO_NV 0xFC
#define GL_ARC_TO_NV 0xFE
#define GL_RELATIVE_ARC_TO_NV 0xFF
#define GL_BOLD_BIT_NV 0x01
#define GL_ITALIC_BIT_NV 0x02
#define GL_GLYPH_WIDTH_BIT_NV 0x01
#define GL_GLYPH_HEIGHT_BIT_NV 0x02
#define GL_GLYPH_HORIZONTAL_BEARING_X_BIT_NV 0x04
#define GL_GLYPH_HORIZONTAL_BEARING_Y_BIT_NV 0x08
#define GL_GLYPH_HORIZONTAL_BEARING_ADVANCE_BIT_NV 0x10
#define GL_GLYPH_VERTICAL_BEARING_X_BIT_NV 0x20
#define GL_GLYPH_VERTICAL_BEARING_Y_BIT_NV 0x40
#define GL_GLYPH_VERTICAL_BEARING_ADVANCE_BIT_NV 0x80
#define GL_GLYPH_HAS_KERNING_BIT_NV 0x100
#define GL_FONT_X_MIN_BOUNDS_BIT_NV 0x00010000
#define GL_FONT_Y_MIN_BOUNDS_BIT_NV 0x00020000
#define GL_FONT_X_MAX_BOUNDS_BIT_NV 0x00040000
#define GL_FONT_Y_MAX_BOUNDS_BIT_NV 0x00080000
#define GL_FONT_UNITS_PER_EM_BIT_NV 0x00100000
#define GL_FONT_ASCENDER_BIT_NV 0x00200000
#define GL_FONT_DESCENDER_BIT_NV 0x00400000
#define GL_FONT_HEIGHT_BIT_NV 0x00800000
#define GL_FONT_MAX_ADVANCE_WIDTH_BIT_NV 0x01000000
#define GL_FONT_MAX_ADVANCE_HEIGHT_BIT_NV 0x02000000
#define GL_FONT_UNDERLINE_POSITION_BIT_NV 0x04000000
#define GL_FONT_UNDERLINE_THICKNESS_BIT_NV 0x08000000
#define GL_FONT_HAS_KERNING_BIT_NV 0x10000000
#define GL_ROUNDED_RECT_NV 0xE8
#define GL_RELATIVE_ROUNDED_RECT_NV 0xE9
#define GL_ROUNDED_RECT2_NV 0xEA
#define GL_RELATIVE_ROUNDED_RECT2_NV 0xEB
#define GL_ROUNDED_RECT4_NV 0xEC
#define GL_RELATIVE_ROUNDED_RECT4_NV 0xED
#define GL_ROUNDED_RECT8_NV 0xEE
#define GL_RELATIVE_ROUNDED_RECT8_NV 0xEF
#define GL_RELATIVE_RECT_NV 0xF7
#define GL_FONT_GLYPHS_AVAILABLE_NV 0x9368
#define GL_FONT_TARGET_UNAVAILABLE_NV 0x9369
#define GL_FONT_UNAVAILABLE_NV 0x936A
#define GL_FONT_UNINTELLIGIBLE_NV 0x936B
#define GL_CONIC_CURVE_TO_NV 0x1A
#define GL_RELATIVE_CONIC_CURVE_TO_NV 0x1B
#define GL_FONT_NUM_GLYPH_INDICES_BIT_NV 0x20000000
#define GL_STANDARD_FONT_FORMAT_NV 0x936C
#define GL_PATH_PROJECTION_NV 0x1701
#define GL_PATH_MODELVIEW_NV 0x1700
#define GL_PATH_MODELVIEW_STACK_DEPTH_NV 0x0BA3
#define GL_PATH_MODELVIEW_MATRIX_NV 0x0BA6
#define GL_PATH_MAX_MODELVIEW_STACK_DEPTH_NV 0x0D36
#define GL_PATH_TRANSPOSE_MODELVIEW_MATRIX_NV 0x84E3
#define GL_PATH_PROJECTION_STACK_DEPTH_NV 0x0BA4
#define GL_PATH_PROJECTION_MATRIX_NV 0x0BA7
#define GL_PATH_MAX_PROJECTION_STACK_DEPTH_NV 0x0D38
#define GL_PATH_TRANSPOSE_PROJECTION_MATRIX_NV 0x84E4
#define GL_FRAGMENT_INPUT_NV 0x936D
typedef GLuint(APIENTRYP PFNGLGENPATHSNVPROC)(GLsizei range);
typedef void(APIENTRYP PFNGLDELETEPATHSNVPROC)(GLuint path, GLsizei range);
typedef GLboolean(APIENTRYP PFNGLISPATHNVPROC)(GLuint path);
typedef void(APIENTRYP PFNGLPATHCOMMANDSNVPROC)(GLuint path, GLsizei numCommands, const GLubyte* commands, GLsizei numCoords,
                                                GLenum coordType, const void* coords);
typedef void(APIENTRYP PFNGLPATHCOORDSNVPROC)(GLuint path, GLsizei numCoords, GLenum coordType, const void* coords);
typedef void(APIENTRYP PFNGLPATHSUBCOMMANDSNVPROC)(GLuint path, GLsizei commandStart, GLsizei commandsToDelete, GLsizei numCommands,
                                                   const GLubyte* commands, GLsizei numCoords, GLenum coordType, const void* coords);
typedef void(APIENTRYP PFNGLPATHSUBCOORDSNVPROC)(GLuint path, GLsizei coordStart, GLsizei numCoords, GLenum coordType, const void* coords);
typedef void(APIENTRYP PFNGLPATHSTRINGNVPROC)(GLuint path, GLenum format, GLsizei length, const void* pathString);
typedef void(APIENTRYP PFNGLPATHGLYPHSNVPROC)(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle,
                                              GLsizei numGlyphs, GLenum type, const void* charcodes, GLenum handleMissingGlyphs,
                                              GLuint pathParameterTemplate, GLfloat emScale);
typedef void(APIENTRYP PFNGLPATHGLYPHRANGENVPROC)(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle,
                                                  GLuint firstGlyph, GLsizei numGlyphs, GLenum handleMissingGlyphs,
                                                  GLuint pathParameterTemplate, GLfloat emScale);
typedef void(APIENTRYP PFNGLWEIGHTPATHSNVPROC)(GLuint resultPath, GLsizei numPaths, const GLuint* paths, const GLfloat* weights);
typedef void(APIENTRYP PFNGLCOPYPATHNVPROC)(GLuint resultPath, GLuint srcPath);
typedef void(APIENTRYP PFNGLINTERPOLATEPATHSNVPROC)(GLuint resultPath, GLuint pathA, GLuint pathB, GLfloat weight);
typedef void(APIENTRYP PFNGLTRANSFORMPATHNVPROC)(GLuint resultPath, GLuint srcPath, GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLPATHPARAMETERIVNVPROC)(GLuint path, GLenum pname, const GLint* value);
typedef void(APIENTRYP PFNGLPATHPARAMETERINVPROC)(GLuint path, GLenum pname, GLint value);
typedef void(APIENTRYP PFNGLPATHPARAMETERFVNVPROC)(GLuint path, GLenum pname, const GLfloat* value);
typedef void(APIENTRYP PFNGLPATHPARAMETERFNVPROC)(GLuint path, GLenum pname, GLfloat value);
typedef void(APIENTRYP PFNGLPATHDASHARRAYNVPROC)(GLuint path, GLsizei dashCount, const GLfloat* dashArray);
typedef void(APIENTRYP PFNGLPATHSTENCILFUNCNVPROC)(GLenum func, GLint ref, GLuint mask);
typedef void(APIENTRYP PFNGLPATHSTENCILDEPTHOFFSETNVPROC)(GLfloat factor, GLfloat units);
typedef void(APIENTRYP PFNGLSTENCILFILLPATHNVPROC)(GLuint path, GLenum fillMode, GLuint mask);
typedef void(APIENTRYP PFNGLSTENCILSTROKEPATHNVPROC)(GLuint path, GLint reference, GLuint mask);
typedef void(APIENTRYP PFNGLSTENCILFILLPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                            GLenum fillMode, GLuint mask, GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLSTENCILSTROKEPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                              GLint reference, GLuint mask, GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLPATHCOVERDEPTHFUNCNVPROC)(GLenum func);
typedef void(APIENTRYP PFNGLCOVERFILLPATHNVPROC)(GLuint path, GLenum coverMode);
typedef void(APIENTRYP PFNGLCOVERSTROKEPATHNVPROC)(GLuint path, GLenum coverMode);
typedef void(APIENTRYP PFNGLCOVERFILLPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                          GLenum coverMode, GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLCOVERSTROKEPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                            GLenum coverMode, GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLGETPATHPARAMETERIVNVPROC)(GLuint path, GLenum pname, GLint* value);
typedef void(APIENTRYP PFNGLGETPATHPARAMETERFVNVPROC)(GLuint path, GLenum pname, GLfloat* value);
typedef void(APIENTRYP PFNGLGETPATHCOMMANDSNVPROC)(GLuint path, GLubyte* commands);
typedef void(APIENTRYP PFNGLGETPATHCOORDSNVPROC)(GLuint path, GLfloat* coords);
typedef void(APIENTRYP PFNGLGETPATHDASHARRAYNVPROC)(GLuint path, GLfloat* dashArray);
typedef void(APIENTRYP PFNGLGETPATHMETRICSNVPROC)(GLbitfield metricQueryMask, GLsizei numPaths, GLenum pathNameType, const void* paths,
                                                  GLuint pathBase, GLsizei stride, GLfloat* metrics);
typedef void(APIENTRYP PFNGLGETPATHMETRICRANGENVPROC)(GLbitfield metricQueryMask, GLuint firstPathName, GLsizei numPaths, GLsizei stride,
                                                      GLfloat* metrics);
typedef void(APIENTRYP PFNGLGETPATHSPACINGNVPROC)(GLenum pathListMode, GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                  GLfloat advanceScale, GLfloat kerningScale, GLenum transformType, GLfloat* returnedSpacing);
typedef GLboolean(APIENTRYP PFNGLISPOINTINFILLPATHNVPROC)(GLuint path, GLuint mask, GLfloat x, GLfloat y);
typedef GLboolean(APIENTRYP PFNGLISPOINTINSTROKEPATHNVPROC)(GLuint path, GLfloat x, GLfloat y);
typedef GLfloat(APIENTRYP PFNGLGETPATHLENGTHNVPROC)(GLuint path, GLsizei startSegment, GLsizei numSegments);
typedef GLboolean(APIENTRYP PFNGLPOINTALONGPATHNVPROC)(GLuint path, GLsizei startSegment, GLsizei numSegments, GLfloat distance, GLfloat* x,
                                                       GLfloat* y, GLfloat* tangentX, GLfloat* tangentY);
typedef void(APIENTRYP PFNGLMATRIXLOAD3X2FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXLOAD3X3FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXLOADTRANSPOSE3X3FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXMULT3X2FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXMULT3X3FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLMATRIXMULTTRANSPOSE3X3FNVPROC)(GLenum matrixMode, const GLfloat* m);
typedef void(APIENTRYP PFNGLSTENCILTHENCOVERFILLPATHNVPROC)(GLuint path, GLenum fillMode, GLuint mask, GLenum coverMode);
typedef void(APIENTRYP PFNGLSTENCILTHENCOVERSTROKEPATHNVPROC)(GLuint path, GLint reference, GLuint mask, GLenum coverMode);
typedef void(APIENTRYP PFNGLSTENCILTHENCOVERFILLPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths,
                                                                     GLuint pathBase, GLenum fillMode, GLuint mask, GLenum coverMode,
                                                                     GLenum transformType, const GLfloat* transformValues);
typedef void(APIENTRYP PFNGLSTENCILTHENCOVERSTROKEPATHINSTANCEDNVPROC)(GLsizei numPaths, GLenum pathNameType, const void* paths,
                                                                       GLuint pathBase, GLint reference, GLuint mask, GLenum coverMode,
                                                                       GLenum transformType, const GLfloat* transformValues);
typedef GLenum(APIENTRYP PFNGLPATHGLYPHINDEXRANGENVPROC)(GLenum fontTarget, const void* fontName, GLbitfield fontStyle,
                                                         GLuint pathParameterTemplate, GLfloat emScale, GLuint baseAndCount[2]);
typedef GLenum(APIENTRYP PFNGLPATHGLYPHINDEXARRAYNVPROC)(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle,
                                                         GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
typedef GLenum(APIENTRYP PFNGLPATHMEMORYGLYPHINDEXARRAYNVPROC)(GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize,
                                                               const void* fontData, GLsizei faceIndex, GLuint firstGlyphIndex,
                                                               GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
typedef void(APIENTRYP PFNGLPROGRAMPATHFRAGMENTINPUTGENNVPROC)(GLuint program, GLint location, GLenum genMode, GLint components,
                                                               const GLfloat* coeffs);
typedef void(APIENTRYP PFNGLGETPROGRAMRESOURCEFVNVPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount,
                                                        const GLenum* props, GLsizei count, GLsizei* length, GLfloat* params);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI GLuint APIENTRY glGenPathsNV(GLsizei range);
GLAPI void APIENTRY glDeletePathsNV(GLuint path, GLsizei range);
GLAPI GLboolean APIENTRY glIsPathNV(GLuint path);
GLAPI void APIENTRY glPathCommandsNV(GLuint path, GLsizei numCommands, const GLubyte* commands, GLsizei numCoords, GLenum coordType,
                                     const void* coords);
GLAPI void APIENTRY glPathCoordsNV(GLuint path, GLsizei numCoords, GLenum coordType, const void* coords);
GLAPI void APIENTRY glPathSubCommandsNV(GLuint path, GLsizei commandStart, GLsizei commandsToDelete, GLsizei numCommands,
                                        const GLubyte* commands, GLsizei numCoords, GLenum coordType, const void* coords);
GLAPI void APIENTRY glPathSubCoordsNV(GLuint path, GLsizei coordStart, GLsizei numCoords, GLenum coordType, const void* coords);
GLAPI void APIENTRY glPathStringNV(GLuint path, GLenum format, GLsizei length, const void* pathString);
GLAPI void APIENTRY glPathGlyphsNV(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle, GLsizei numGlyphs,
                                   GLenum type, const void* charcodes, GLenum handleMissingGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
GLAPI void APIENTRY glPathGlyphRangeNV(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle, GLuint firstGlyph,
                                       GLsizei numGlyphs, GLenum handleMissingGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
GLAPI void APIENTRY glWeightPathsNV(GLuint resultPath, GLsizei numPaths, const GLuint* paths, const GLfloat* weights);
GLAPI void APIENTRY glCopyPathNV(GLuint resultPath, GLuint srcPath);
GLAPI void APIENTRY glInterpolatePathsNV(GLuint resultPath, GLuint pathA, GLuint pathB, GLfloat weight);
GLAPI void APIENTRY glTransformPathNV(GLuint resultPath, GLuint srcPath, GLenum transformType, const GLfloat* transformValues);
GLAPI void APIENTRY glPathParameterivNV(GLuint path, GLenum pname, const GLint* value);
GLAPI void APIENTRY glPathParameteriNV(GLuint path, GLenum pname, GLint value);
GLAPI void APIENTRY glPathParameterfvNV(GLuint path, GLenum pname, const GLfloat* value);
GLAPI void APIENTRY glPathParameterfNV(GLuint path, GLenum pname, GLfloat value);
GLAPI void APIENTRY glPathDashArrayNV(GLuint path, GLsizei dashCount, const GLfloat* dashArray);
GLAPI void APIENTRY glPathStencilFuncNV(GLenum func, GLint ref, GLuint mask);
GLAPI void APIENTRY glPathStencilDepthOffsetNV(GLfloat factor, GLfloat units);
GLAPI void APIENTRY glStencilFillPathNV(GLuint path, GLenum fillMode, GLuint mask);
GLAPI void APIENTRY glStencilStrokePathNV(GLuint path, GLint reference, GLuint mask);
GLAPI void APIENTRY glStencilFillPathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase, GLenum fillMode,
                                                 GLuint mask, GLenum transformType, const GLfloat* transformValues);
GLAPI void APIENTRY glStencilStrokePathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                   GLint reference, GLuint mask, GLenum transformType, const GLfloat* transformValues);
GLAPI void APIENTRY glPathCoverDepthFuncNV(GLenum func);
GLAPI void APIENTRY glCoverFillPathNV(GLuint path, GLenum coverMode);
GLAPI void APIENTRY glCoverStrokePathNV(GLuint path, GLenum coverMode);
GLAPI void APIENTRY glCoverFillPathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase, GLenum coverMode,
                                               GLenum transformType, const GLfloat* transformValues);
GLAPI void APIENTRY glCoverStrokePathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                 GLenum coverMode, GLenum transformType, const GLfloat* transformValues);
GLAPI void APIENTRY glGetPathParameterivNV(GLuint path, GLenum pname, GLint* value);
GLAPI void APIENTRY glGetPathParameterfvNV(GLuint path, GLenum pname, GLfloat* value);
GLAPI void APIENTRY glGetPathCommandsNV(GLuint path, GLubyte* commands);
GLAPI void APIENTRY glGetPathCoordsNV(GLuint path, GLfloat* coords);
GLAPI void APIENTRY glGetPathDashArrayNV(GLuint path, GLfloat* dashArray);
GLAPI void APIENTRY glGetPathMetricsNV(GLbitfield metricQueryMask, GLsizei numPaths, GLenum pathNameType, const void* paths,
                                       GLuint pathBase, GLsizei stride, GLfloat* metrics);
GLAPI void APIENTRY glGetPathMetricRangeNV(GLbitfield metricQueryMask, GLuint firstPathName, GLsizei numPaths, GLsizei stride, GLfloat* metrics);
GLAPI void APIENTRY glGetPathSpacingNV(GLenum pathListMode, GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                       GLfloat advanceScale, GLfloat kerningScale, GLenum transformType, GLfloat* returnedSpacing);
GLAPI GLboolean APIENTRY glIsPointInFillPathNV(GLuint path, GLuint mask, GLfloat x, GLfloat y);
GLAPI GLboolean APIENTRY glIsPointInStrokePathNV(GLuint path, GLfloat x, GLfloat y);
GLAPI GLfloat APIENTRY glGetPathLengthNV(GLuint path, GLsizei startSegment, GLsizei numSegments);
GLAPI GLboolean APIENTRY glPointAlongPathNV(GLuint path, GLsizei startSegment, GLsizei numSegments, GLfloat distance, GLfloat* x,
                                            GLfloat* y, GLfloat* tangentX, GLfloat* tangentY);
GLAPI void APIENTRY glMatrixLoad3x2fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glMatrixLoad3x3fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glMatrixLoadTranspose3x3fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glMatrixMult3x2fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glMatrixMult3x3fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glMatrixMultTranspose3x3fNV(GLenum matrixMode, const GLfloat* m);
GLAPI void APIENTRY glStencilThenCoverFillPathNV(GLuint path, GLenum fillMode, GLuint mask, GLenum coverMode);
GLAPI void APIENTRY glStencilThenCoverStrokePathNV(GLuint path, GLint reference, GLuint mask, GLenum coverMode);
GLAPI void APIENTRY glStencilThenCoverFillPathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                          GLenum fillMode, GLuint mask, GLenum coverMode, GLenum transformType,
                                                          const GLfloat* transformValues);
GLAPI void APIENTRY glStencilThenCoverStrokePathInstancedNV(GLsizei numPaths, GLenum pathNameType, const void* paths, GLuint pathBase,
                                                            GLint reference, GLuint mask, GLenum coverMode, GLenum transformType,
                                                            const GLfloat* transformValues);
GLAPI GLenum APIENTRY glPathGlyphIndexRangeNV(GLenum fontTarget, const void* fontName, GLbitfield fontStyle, GLuint pathParameterTemplate,
                                              GLfloat emScale, GLuint baseAndCount[2]);
GLAPI GLenum APIENTRY glPathGlyphIndexArrayNV(GLuint firstPathName, GLenum fontTarget, const void* fontName, GLbitfield fontStyle,
                                              GLuint firstGlyphIndex, GLsizei numGlyphs, GLuint pathParameterTemplate, GLfloat emScale);
GLAPI GLenum APIENTRY glPathMemoryGlyphIndexArrayNV(GLuint firstPathName, GLenum fontTarget, GLsizeiptr fontSize, const void* fontData,
                                                    GLsizei faceIndex, GLuint firstGlyphIndex, GLsizei numGlyphs,
                                                    GLuint pathParameterTemplate, GLfloat emScale);
GLAPI void APIENTRY glProgramPathFragmentInputGenNV(GLuint program, GLint location, GLenum genMode, GLint components, const GLfloat* coeffs);
GLAPI void APIENTRY glGetProgramResourcefvNV(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum* props,
                                             GLsizei count, GLsizei* length, GLfloat* params);
#endif
#endif /* GL_NV_path_rendering */

#ifndef GL_NV_path_rendering_shared_edge
#define GL_NV_path_rendering_shared_edge 1
#define GL_SHARED_EDGE_NV 0xC0
#endif /* GL_NV_path_rendering_shared_edge */

#ifndef GL_NV_representative_fragment_test
#define GL_NV_representative_fragment_test 1
#define GL_REPRESENTATIVE_FRAGMENT_TEST_NV 0x937F
#endif /* GL_NV_representative_fragment_test */

#ifndef GL_NV_sample_locations
#define GL_NV_sample_locations 1
#define GL_SAMPLE_LOCATION_SUBPIXEL_BITS_NV 0x933D
#define GL_SAMPLE_LOCATION_PIXEL_GRID_WIDTH_NV 0x933E
#define GL_SAMPLE_LOCATION_PIXEL_GRID_HEIGHT_NV 0x933F
#define GL_PROGRAMMABLE_SAMPLE_LOCATION_TABLE_SIZE_NV 0x9340
#define GL_SAMPLE_LOCATION_NV 0x8E50
#define GL_PROGRAMMABLE_SAMPLE_LOCATION_NV 0x9341
#define GL_FRAMEBUFFER_PROGRAMMABLE_SAMPLE_LOCATIONS_NV 0x9342
#define GL_FRAMEBUFFER_SAMPLE_LOCATION_PIXEL_GRID_NV 0x9343
typedef void(APIENTRYP PFNGLFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)(GLenum target, GLuint start, GLsizei count, const GLfloat* v);
typedef void(APIENTRYP PFNGLNAMEDFRAMEBUFFERSAMPLELOCATIONSFVNVPROC)(GLuint framebuffer, GLuint start, GLsizei count, const GLfloat* v);
typedef void(APIENTRYP PFNGLRESOLVEDEPTHVALUESNVPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFramebufferSampleLocationsfvNV(GLenum target, GLuint start, GLsizei count, const GLfloat* v);
GLAPI void APIENTRY glNamedFramebufferSampleLocationsfvNV(GLuint framebuffer, GLuint start, GLsizei count, const GLfloat* v);
GLAPI void APIENTRY glResolveDepthValuesNV(void);
#endif
#endif /* GL_NV_sample_locations */

#ifndef GL_NV_sample_mask_override_coverage
#define GL_NV_sample_mask_override_coverage 1
#endif /* GL_NV_sample_mask_override_coverage */

#ifndef GL_NV_scissor_exclusive
#define GL_NV_scissor_exclusive 1
#define GL_SCISSOR_TEST_EXCLUSIVE_NV 0x9555
#define GL_SCISSOR_BOX_EXCLUSIVE_NV 0x9556
typedef void(APIENTRYP PFNGLSCISSOREXCLUSIVENVPROC)(GLint x, GLint y, GLsizei width, GLsizei height);
typedef void(APIENTRYP PFNGLSCISSOREXCLUSIVEARRAYVNVPROC)(GLuint first, GLsizei count, const GLint* v);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glScissorExclusiveNV(GLint x, GLint y, GLsizei width, GLsizei height);
GLAPI void APIENTRY glScissorExclusiveArrayvNV(GLuint first, GLsizei count, const GLint* v);
#endif
#endif /* GL_NV_scissor_exclusive */

#ifndef GL_NV_shader_atomic_counters
#define GL_NV_shader_atomic_counters 1
#endif /* GL_NV_shader_atomic_counters */

#ifndef GL_NV_shader_atomic_float
#define GL_NV_shader_atomic_float 1
#endif /* GL_NV_shader_atomic_float */

#ifndef GL_NV_shader_atomic_float64
#define GL_NV_shader_atomic_float64 1
#endif /* GL_NV_shader_atomic_float64 */

#ifndef GL_NV_shader_atomic_fp16_vector
#define GL_NV_shader_atomic_fp16_vector 1
#endif /* GL_NV_shader_atomic_fp16_vector */

#ifndef GL_NV_shader_atomic_int64
#define GL_NV_shader_atomic_int64 1
#endif /* GL_NV_shader_atomic_int64 */

#ifndef GL_NV_shader_buffer_load
#define GL_NV_shader_buffer_load 1
#define GL_BUFFER_GPU_ADDRESS_NV 0x8F1D
#define GL_GPU_ADDRESS_NV 0x8F34
#define GL_MAX_SHADER_BUFFER_ADDRESS_NV 0x8F35
typedef void(APIENTRYP PFNGLMAKEBUFFERRESIDENTNVPROC)(GLenum target, GLenum access);
typedef void(APIENTRYP PFNGLMAKEBUFFERNONRESIDENTNVPROC)(GLenum target);
typedef GLboolean(APIENTRYP PFNGLISBUFFERRESIDENTNVPROC)(GLenum target);
typedef void(APIENTRYP PFNGLMAKENAMEDBUFFERRESIDENTNVPROC)(GLuint buffer, GLenum access);
typedef void(APIENTRYP PFNGLMAKENAMEDBUFFERNONRESIDENTNVPROC)(GLuint buffer);
typedef GLboolean(APIENTRYP PFNGLISNAMEDBUFFERRESIDENTNVPROC)(GLuint buffer);
typedef void(APIENTRYP PFNGLGETBUFFERPARAMETERUI64VNVPROC)(GLenum target, GLenum pname, GLuint64EXT* params);
typedef void(APIENTRYP PFNGLGETNAMEDBUFFERPARAMETERUI64VNVPROC)(GLuint buffer, GLenum pname, GLuint64EXT* params);
typedef void(APIENTRYP PFNGLGETINTEGERUI64VNVPROC)(GLenum value, GLuint64EXT* result);
typedef void(APIENTRYP PFNGLUNIFORMUI64NVPROC)(GLint location, GLuint64EXT value);
typedef void(APIENTRYP PFNGLUNIFORMUI64VNVPROC)(GLint location, GLsizei count, const GLuint64EXT* value);
typedef void(APIENTRYP PFNGLGETUNIFORMUI64VNVPROC)(GLuint program, GLint location, GLuint64EXT* params);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMUI64NVPROC)(GLuint program, GLint location, GLuint64EXT value);
typedef void(APIENTRYP PFNGLPROGRAMUNIFORMUI64VNVPROC)(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glMakeBufferResidentNV(GLenum target, GLenum access);
GLAPI void APIENTRY glMakeBufferNonResidentNV(GLenum target);
GLAPI GLboolean APIENTRY glIsBufferResidentNV(GLenum target);
GLAPI void APIENTRY glMakeNamedBufferResidentNV(GLuint buffer, GLenum access);
GLAPI void APIENTRY glMakeNamedBufferNonResidentNV(GLuint buffer);
GLAPI GLboolean APIENTRY glIsNamedBufferResidentNV(GLuint buffer);
GLAPI void APIENTRY glGetBufferParameterui64vNV(GLenum target, GLenum pname, GLuint64EXT* params);
GLAPI void APIENTRY glGetNamedBufferParameterui64vNV(GLuint buffer, GLenum pname, GLuint64EXT* params);
GLAPI void APIENTRY glGetIntegerui64vNV(GLenum value, GLuint64EXT* result);
GLAPI void APIENTRY glUniformui64NV(GLint location, GLuint64EXT value);
GLAPI void APIENTRY glUniformui64vNV(GLint location, GLsizei count, const GLuint64EXT* value);
GLAPI void APIENTRY glGetUniformui64vNV(GLuint program, GLint location, GLuint64EXT* params);
GLAPI void APIENTRY glProgramUniformui64NV(GLuint program, GLint location, GLuint64EXT value);
GLAPI void APIENTRY glProgramUniformui64vNV(GLuint program, GLint location, GLsizei count, const GLuint64EXT* value);
#endif
#endif /* GL_NV_shader_buffer_load */

#ifndef GL_NV_shader_buffer_store
#define GL_NV_shader_buffer_store 1
#define GL_SHADER_GLOBAL_ACCESS_BARRIER_BIT_NV 0x00000010
#endif /* GL_NV_shader_buffer_store */

#ifndef GL_NV_shader_subgroup_partitioned
#define GL_NV_shader_subgroup_partitioned 1
#define GL_SUBGROUP_FEATURE_PARTITIONED_BIT_NV 0x00000100
#endif /* GL_NV_shader_subgroup_partitioned */

#ifndef GL_NV_shader_texture_footprint
#define GL_NV_shader_texture_footprint 1
#endif /* GL_NV_shader_texture_footprint */

#ifndef GL_NV_shader_thread_group
#define GL_NV_shader_thread_group 1
#define GL_WARP_SIZE_NV 0x9339
#define GL_WARPS_PER_SM_NV 0x933A
#define GL_SM_COUNT_NV 0x933B
#endif /* GL_NV_shader_thread_group */

#ifndef GL_NV_shader_thread_shuffle
#define GL_NV_shader_thread_shuffle 1
#endif /* GL_NV_shader_thread_shuffle */

#ifndef GL_NV_shading_rate_image
#define GL_NV_shading_rate_image 1
#define GL_SHADING_RATE_IMAGE_NV 0x9563
#define GL_SHADING_RATE_NO_INVOCATIONS_NV 0x9564
#define GL_SHADING_RATE_1_INVOCATION_PER_PIXEL_NV 0x9565
#define GL_SHADING_RATE_1_INVOCATION_PER_1X2_PIXELS_NV 0x9566
#define GL_SHADING_RATE_1_INVOCATION_PER_2X1_PIXELS_NV 0x9567
#define GL_SHADING_RATE_1_INVOCATION_PER_2X2_PIXELS_NV 0x9568
#define GL_SHADING_RATE_1_INVOCATION_PER_2X4_PIXELS_NV 0x9569
#define GL_SHADING_RATE_1_INVOCATION_PER_4X2_PIXELS_NV 0x956A
#define GL_SHADING_RATE_1_INVOCATION_PER_4X4_PIXELS_NV 0x956B
#define GL_SHADING_RATE_2_INVOCATIONS_PER_PIXEL_NV 0x956C
#define GL_SHADING_RATE_4_INVOCATIONS_PER_PIXEL_NV 0x956D
#define GL_SHADING_RATE_8_INVOCATIONS_PER_PIXEL_NV 0x956E
#define GL_SHADING_RATE_16_INVOCATIONS_PER_PIXEL_NV 0x956F
#define GL_SHADING_RATE_IMAGE_BINDING_NV 0x955B
#define GL_SHADING_RATE_IMAGE_TEXEL_WIDTH_NV 0x955C
#define GL_SHADING_RATE_IMAGE_TEXEL_HEIGHT_NV 0x955D
#define GL_SHADING_RATE_IMAGE_PALETTE_SIZE_NV 0x955E
#define GL_MAX_COARSE_FRAGMENT_SAMPLES_NV 0x955F
#define GL_SHADING_RATE_SAMPLE_ORDER_DEFAULT_NV 0x95AE
#define GL_SHADING_RATE_SAMPLE_ORDER_PIXEL_MAJOR_NV 0x95AF
#define GL_SHADING_RATE_SAMPLE_ORDER_SAMPLE_MAJOR_NV 0x95B0
typedef void(APIENTRYP PFNGLBINDSHADINGRATEIMAGENVPROC)(GLuint texture);
typedef void(APIENTRYP PFNGLGETSHADINGRATEIMAGEPALETTENVPROC)(GLuint viewport, GLuint entry, GLenum* rate);
typedef void(APIENTRYP PFNGLGETSHADINGRATESAMPLELOCATIONIVNVPROC)(GLenum rate, GLuint samples, GLuint index, GLint* location);
typedef void(APIENTRYP PFNGLSHADINGRATEIMAGEBARRIERNVPROC)(GLboolean synchronize);
typedef void(APIENTRYP PFNGLSHADINGRATEIMAGEPALETTENVPROC)(GLuint viewport, GLuint first, GLsizei count, const GLenum* rates);
typedef void(APIENTRYP PFNGLSHADINGRATESAMPLEORDERNVPROC)(GLenum order);
typedef void(APIENTRYP PFNGLSHADINGRATESAMPLEORDERCUSTOMNVPROC)(GLenum rate, GLuint samples, const GLint* locations);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBindShadingRateImageNV(GLuint texture);
GLAPI void APIENTRY glGetShadingRateImagePaletteNV(GLuint viewport, GLuint entry, GLenum* rate);
GLAPI void APIENTRY glGetShadingRateSampleLocationivNV(GLenum rate, GLuint samples, GLuint index, GLint* location);
GLAPI void APIENTRY glShadingRateImageBarrierNV(GLboolean synchronize);
GLAPI void APIENTRY glShadingRateImagePaletteNV(GLuint viewport, GLuint first, GLsizei count, const GLenum* rates);
GLAPI void APIENTRY glShadingRateSampleOrderNV(GLenum order);
GLAPI void APIENTRY glShadingRateSampleOrderCustomNV(GLenum rate, GLuint samples, const GLint* locations);
#endif
#endif /* GL_NV_shading_rate_image */

#ifndef GL_NV_stereo_view_rendering
#define GL_NV_stereo_view_rendering 1
#endif /* GL_NV_stereo_view_rendering */

#ifndef GL_NV_texture_barrier
#define GL_NV_texture_barrier 1
typedef void(APIENTRYP PFNGLTEXTUREBARRIERNVPROC)(void);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glTextureBarrierNV(void);
#endif
#endif /* GL_NV_texture_barrier */

#ifndef GL_NV_texture_rectangle_compressed
#define GL_NV_texture_rectangle_compressed 1
#endif /* GL_NV_texture_rectangle_compressed */

#ifndef GL_NV_uniform_buffer_unified_memory
#define GL_NV_uniform_buffer_unified_memory 1
#define GL_UNIFORM_BUFFER_UNIFIED_NV 0x936E
#define GL_UNIFORM_BUFFER_ADDRESS_NV 0x936F
#define GL_UNIFORM_BUFFER_LENGTH_NV 0x9370
#endif /* GL_NV_uniform_buffer_unified_memory */

#ifndef GL_NV_vertex_attrib_integer_64bit
#define GL_NV_vertex_attrib_integer_64bit 1
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1I64NVPROC)(GLuint index, GLint64EXT x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2I64NVPROC)(GLuint index, GLint64EXT x, GLint64EXT y);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3I64NVPROC)(GLuint index, GLint64EXT x, GLint64EXT y, GLint64EXT z);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4I64NVPROC)(GLuint index, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1I64VNVPROC)(GLuint index, const GLint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2I64VNVPROC)(GLuint index, const GLint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3I64VNVPROC)(GLuint index, const GLint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4I64VNVPROC)(GLuint index, const GLint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1UI64NVPROC)(GLuint index, GLuint64EXT x);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2UI64NVPROC)(GLuint index, GLuint64EXT x, GLuint64EXT y);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3UI64NVPROC)(GLuint index, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4UI64NVPROC)(GLuint index, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL1UI64VNVPROC)(GLuint index, const GLuint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL2UI64VNVPROC)(GLuint index, const GLuint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL3UI64VNVPROC)(GLuint index, const GLuint64EXT* v);
typedef void(APIENTRYP PFNGLVERTEXATTRIBL4UI64VNVPROC)(GLuint index, const GLuint64EXT* v);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBLI64VNVPROC)(GLuint index, GLenum pname, GLint64EXT* params);
typedef void(APIENTRYP PFNGLGETVERTEXATTRIBLUI64VNVPROC)(GLuint index, GLenum pname, GLuint64EXT* params);
typedef void(APIENTRYP PFNGLVERTEXATTRIBLFORMATNVPROC)(GLuint index, GLint size, GLenum type, GLsizei stride);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glVertexAttribL1i64NV(GLuint index, GLint64EXT x);
GLAPI void APIENTRY glVertexAttribL2i64NV(GLuint index, GLint64EXT x, GLint64EXT y);
GLAPI void APIENTRY glVertexAttribL3i64NV(GLuint index, GLint64EXT x, GLint64EXT y, GLint64EXT z);
GLAPI void APIENTRY glVertexAttribL4i64NV(GLuint index, GLint64EXT x, GLint64EXT y, GLint64EXT z, GLint64EXT w);
GLAPI void APIENTRY glVertexAttribL1i64vNV(GLuint index, const GLint64EXT* v);
GLAPI void APIENTRY glVertexAttribL2i64vNV(GLuint index, const GLint64EXT* v);
GLAPI void APIENTRY glVertexAttribL3i64vNV(GLuint index, const GLint64EXT* v);
GLAPI void APIENTRY glVertexAttribL4i64vNV(GLuint index, const GLint64EXT* v);
GLAPI void APIENTRY glVertexAttribL1ui64NV(GLuint index, GLuint64EXT x);
GLAPI void APIENTRY glVertexAttribL2ui64NV(GLuint index, GLuint64EXT x, GLuint64EXT y);
GLAPI void APIENTRY glVertexAttribL3ui64NV(GLuint index, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z);
GLAPI void APIENTRY glVertexAttribL4ui64NV(GLuint index, GLuint64EXT x, GLuint64EXT y, GLuint64EXT z, GLuint64EXT w);
GLAPI void APIENTRY glVertexAttribL1ui64vNV(GLuint index, const GLuint64EXT* v);
GLAPI void APIENTRY glVertexAttribL2ui64vNV(GLuint index, const GLuint64EXT* v);
GLAPI void APIENTRY glVertexAttribL3ui64vNV(GLuint index, const GLuint64EXT* v);
GLAPI void APIENTRY glVertexAttribL4ui64vNV(GLuint index, const GLuint64EXT* v);
GLAPI void APIENTRY glGetVertexAttribLi64vNV(GLuint index, GLenum pname, GLint64EXT* params);
GLAPI void APIENTRY glGetVertexAttribLui64vNV(GLuint index, GLenum pname, GLuint64EXT* params);
GLAPI void APIENTRY glVertexAttribLFormatNV(GLuint index, GLint size, GLenum type, GLsizei stride);
#endif
#endif /* GL_NV_vertex_attrib_integer_64bit */

#ifndef GL_NV_vertex_buffer_unified_memory
#define GL_NV_vertex_buffer_unified_memory 1
#define GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV 0x8F1E
#define GL_ELEMENT_ARRAY_UNIFIED_NV 0x8F1F
#define GL_VERTEX_ATTRIB_ARRAY_ADDRESS_NV 0x8F20
#define GL_VERTEX_ARRAY_ADDRESS_NV 0x8F21
#define GL_NORMAL_ARRAY_ADDRESS_NV 0x8F22
#define GL_COLOR_ARRAY_ADDRESS_NV 0x8F23
#define GL_INDEX_ARRAY_ADDRESS_NV 0x8F24
#define GL_TEXTURE_COORD_ARRAY_ADDRESS_NV 0x8F25
#define GL_EDGE_FLAG_ARRAY_ADDRESS_NV 0x8F26
#define GL_SECONDARY_COLOR_ARRAY_ADDRESS_NV 0x8F27
#define GL_FOG_COORD_ARRAY_ADDRESS_NV 0x8F28
#define GL_ELEMENT_ARRAY_ADDRESS_NV 0x8F29
#define GL_VERTEX_ATTRIB_ARRAY_LENGTH_NV 0x8F2A
#define GL_VERTEX_ARRAY_LENGTH_NV 0x8F2B
#define GL_NORMAL_ARRAY_LENGTH_NV 0x8F2C
#define GL_COLOR_ARRAY_LENGTH_NV 0x8F2D
#define GL_INDEX_ARRAY_LENGTH_NV 0x8F2E
#define GL_TEXTURE_COORD_ARRAY_LENGTH_NV 0x8F2F
#define GL_EDGE_FLAG_ARRAY_LENGTH_NV 0x8F30
#define GL_SECONDARY_COLOR_ARRAY_LENGTH_NV 0x8F31
#define GL_FOG_COORD_ARRAY_LENGTH_NV 0x8F32
#define GL_ELEMENT_ARRAY_LENGTH_NV 0x8F33
#define GL_DRAW_INDIRECT_UNIFIED_NV 0x8F40
#define GL_DRAW_INDIRECT_ADDRESS_NV 0x8F41
#define GL_DRAW_INDIRECT_LENGTH_NV 0x8F42
typedef void(APIENTRYP PFNGLBUFFERADDRESSRANGENVPROC)(GLenum pname, GLuint index, GLuint64EXT address, GLsizeiptr length);
typedef void(APIENTRYP PFNGLVERTEXFORMATNVPROC)(GLint size, GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLNORMALFORMATNVPROC)(GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLCOLORFORMATNVPROC)(GLint size, GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLINDEXFORMATNVPROC)(GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLTEXCOORDFORMATNVPROC)(GLint size, GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLEDGEFLAGFORMATNVPROC)(GLsizei stride);
typedef void(APIENTRYP PFNGLSECONDARYCOLORFORMATNVPROC)(GLint size, GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLFOGCOORDFORMATNVPROC)(GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXATTRIBFORMATNVPROC)(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride);
typedef void(APIENTRYP PFNGLVERTEXATTRIBIFORMATNVPROC)(GLuint index, GLint size, GLenum type, GLsizei stride);
typedef void(APIENTRYP PFNGLGETINTEGERUI64I_VNVPROC)(GLenum value, GLuint index, GLuint64EXT* result);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glBufferAddressRangeNV(GLenum pname, GLuint index, GLuint64EXT address, GLsizeiptr length);
GLAPI void APIENTRY glVertexFormatNV(GLint size, GLenum type, GLsizei stride);
GLAPI void APIENTRY glNormalFormatNV(GLenum type, GLsizei stride);
GLAPI void APIENTRY glColorFormatNV(GLint size, GLenum type, GLsizei stride);
GLAPI void APIENTRY glIndexFormatNV(GLenum type, GLsizei stride);
GLAPI void APIENTRY glTexCoordFormatNV(GLint size, GLenum type, GLsizei stride);
GLAPI void APIENTRY glEdgeFlagFormatNV(GLsizei stride);
GLAPI void APIENTRY glSecondaryColorFormatNV(GLint size, GLenum type, GLsizei stride);
GLAPI void APIENTRY glFogCoordFormatNV(GLenum type, GLsizei stride);
GLAPI void APIENTRY glVertexAttribFormatNV(GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride);
GLAPI void APIENTRY glVertexAttribIFormatNV(GLuint index, GLint size, GLenum type, GLsizei stride);
GLAPI void APIENTRY glGetIntegerui64i_vNV(GLenum value, GLuint index, GLuint64EXT* result);
#endif
#endif /* GL_NV_vertex_buffer_unified_memory */

#ifndef GL_NV_viewport_array2
#define GL_NV_viewport_array2 1
#endif /* GL_NV_viewport_array2 */

#ifndef GL_NV_viewport_swizzle
#define GL_NV_viewport_swizzle 1
#define GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV 0x9350
#define GL_VIEWPORT_SWIZZLE_NEGATIVE_X_NV 0x9351
#define GL_VIEWPORT_SWIZZLE_POSITIVE_Y_NV 0x9352
#define GL_VIEWPORT_SWIZZLE_NEGATIVE_Y_NV 0x9353
#define GL_VIEWPORT_SWIZZLE_POSITIVE_Z_NV 0x9354
#define GL_VIEWPORT_SWIZZLE_NEGATIVE_Z_NV 0x9355
#define GL_VIEWPORT_SWIZZLE_POSITIVE_W_NV 0x9356
#define GL_VIEWPORT_SWIZZLE_NEGATIVE_W_NV 0x9357
#define GL_VIEWPORT_SWIZZLE_X_NV 0x9358
#define GL_VIEWPORT_SWIZZLE_Y_NV 0x9359
#define GL_VIEWPORT_SWIZZLE_Z_NV 0x935A
#define GL_VIEWPORT_SWIZZLE_W_NV 0x935B
typedef void(APIENTRYP PFNGLVIEWPORTSWIZZLENVPROC)(GLuint index, GLenum swizzlex, GLenum swizzley, GLenum swizzlez, GLenum swizzlew);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glViewportSwizzleNV(GLuint index, GLenum swizzlex, GLenum swizzley, GLenum swizzlez, GLenum swizzlew);
#endif
#endif /* GL_NV_viewport_swizzle */

#ifndef GL_OVR_multiview
#define GL_OVR_multiview 1
#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_NUM_VIEWS_OVR 0x9630
#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_BASE_VIEW_INDEX_OVR 0x9632
#define GL_MAX_VIEWS_OVR 0x9631
#define GL_FRAMEBUFFER_INCOMPLETE_VIEW_TARGETS_OVR 0x9633
typedef void(APIENTRYP PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)(GLenum target, GLenum attachment, GLuint texture, GLint level,
                                                                GLint baseViewIndex, GLsizei numViews);
#ifdef GL_GLEXT_PROTOTYPES
GLAPI void APIENTRY glFramebufferTextureMultiviewOVR(GLenum target, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex,
                                                     GLsizei numViews);
#endif
#endif /* GL_OVR_multiview */

#ifndef GL_OVR_multiview2
#define GL_OVR_multiview2 1
#endif /* GL_OVR_multiview2 */

#ifdef __cplusplus
}
#endif

#endif


================================================
FILE: ThunkLibs/libGL/libGL_Guest.cpp
================================================
/*
$info$
tags: thunklibs|GL
desc: Handles glXGetProcAddress
$end_info$
*/

#define GL_GLEXT_PROTOTYPES 1
#define GLX_GLXEXT_PROTOTYPES 1

#include <GL/glx.h>
#include <GL/glxext.h>
#include <GL/gl.h>
#include <GL/glext.h>

#undef GL_ARB_viewport_array
#include "glcorearb.h"

#include <dlfcn.h>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <functional>
#include <string_view>
#include <unordered_map>

#include "common/Guest.h"

#include "thunkgen_guest_libGL.inl"

typedef void voidFunc();

// Maps OpenGL API function names to the address of a guest function which is
// linked to the corresponding host function pointer
const std::unordered_map<std::string_view, uintptr_t /* guest function address */> HostPtrInvokers = std::invoke([]() {
#define PAIR(name, unused) Ret[#name] = reinterpret_cast<uintptr_t>(GetCallerForHostFunction(name));
  std::unordered_map<std::string_view, uintptr_t> Ret;
  FOREACH_internal_SYMBOL(PAIR);
  return Ret;
#undef PAIR
});

extern "C" {
voidFunc* glXGetProcAddress(const GLubyte* procname) {
  auto Ret = fexfn_pack_glXGetProcAddress(procname);
  if (!Ret) {
    return nullptr;
  }

  auto TargetFuncIt = HostPtrInvokers.find(reinterpret_cast<const char*>(procname));
  if (TargetFuncIt == HostPtrInvokers.end()) {
    std::string_view procname_s {reinterpret_cast<const char*>(procname)};
    // If glXGetProcAddress is querying itself, then we can just return itself.
    // Some games do this for unknown reasons.
    if (procname_s == "glXGetProcAddress" || procname_s == "glXGetProcAddressARB") {
      return reinterpret_cast<voidFunc*>(glXGetProcAddress);
    }

    // Extension found in host but not in our interface definition => Not fatal but warn about it
    // Some games query leaked GLES symbols but don't use them
    // glFrustrumf : ES 1.x function
    //  - Papers, Please
    //  - Dicey Dungeons
    fprintf(stderr, "glXGetProcAddress: not found %s\n", procname);
    return nullptr;
  }

  LinkAddressToFunction((uintptr_t)Ret, TargetFuncIt->second);
  return Ret;
}

voidFunc* glXGetProcAddressARB(const GLubyte* procname) {
  return glXGetProcAddress(procname);
}
}

// Wrapper around malloc() without noexcept specifiers
static void* malloc_wrapper(size_t size) {
  return malloc(size);
}

static void OnInit() {
  fexfn_pack_GL_SetGuestMalloc((uintptr_t)malloc_wrapper, (uintptr_t)CallbackUnpack<decltype(malloc_wrapper)>::Unpack);
  fexfn_pack_GL_SetGuestXSync((uintptr_t)XSync, (uintptr_t)CallbackUnpack<decltype(XSync)>::Unpack);
  fexfn_pack_GL_SetGuestXGetVisualInfo((uintptr_t)XGetVisualInfo, (uintptr_t)CallbackUnpack<decltype(XGetVisualInfo)>::Unpack);
  fexfn_pack_GL_SetGuestXDisplayString((uintptr_t)XDisplayString, (uintptr_t)CallbackUnpack<decltype(XDisplayString)>::Unpack);
}

// libGL.so must pull in libX11.so as a dependency. Referencing some libX11
// symbol here prevents the linker from optimizing away the unused dependency
auto implicit_libx11_dependency = XSetErrorHandler;

LOAD_LIB_INIT(libGL, OnInit)


================================================
FILE: ThunkLibs/libGL/libGL_Host.cpp
================================================
/*
$info$
tags: thunklibs|GL
desc: Uses glXGetProcAddress instead of dlsym
$end_info$
*/

#include <cstdio>
#include <cstdlib>
#include <string_view>

#define GL_GLEXT_PROTOTYPES 1
#define GLX_GLXEXT_PROTOTYPES 1

#include "glcorearb.h"

#include <GL/glx.h>
#include <GL/glxext.h>
#include <GL/gl.h>
#include <GL/glext.h>
#include <xcb/xcb.h>

#include "common/Host.h"
#include "common/X11Manager.h"

template<>
struct host_layout<_XDisplay*> {
  _XDisplay* data;
  _XDisplay* guest_display;

  host_layout(guest_layout<_XDisplay*>&);

  ~host_layout();
};

static X11Manager x11_manager;

static void* (*GuestMalloc)(guest_size_t) = nullptr;

host_layout<_XDisplay*>::host_layout(guest_layout<_XDisplay*>& guest)
  : guest_display(guest.force_get_host_pointer()) {
  data = x11_manager.GuestToHostDisplay(guest_display);
}

host_layout<_XDisplay*>::~host_layout() {
  // Flush host-side event queue to make effects of the guest-side connection visible
  x11_manager.HostXFlush(data);
}

// Functions returning _XDisplay* should be handled explicitly via ptr_passthrough
guest_layout<_XDisplay*> to_guest(host_layout<_XDisplay*>) = delete;

static void fexfn_impl_libGL_GL_SetGuestMalloc(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &GuestMalloc);
}

static void fexfn_impl_libGL_GL_SetGuestXGetVisualInfo(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXGetVisualInfo);
}

static void fexfn_impl_libGL_GL_SetGuestXSync(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXSync);
}

static void fexfn_impl_libGL_GL_SetGuestXDisplayString(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXDisplayString);
}

#include "thunkgen_host_libGL.inl"

auto fexfn_impl_libGL_glXGetProcAddress(const GLubyte* name) -> void (*)() {
  using VoidFn = void (*)();
  std::string_view name_sv {reinterpret_cast<const char*>(name)};
  if (name_sv == "glCompileShaderIncludeARB") {
    return (VoidFn)fexfn_impl_libGL_glCompileShaderIncludeARB;
  } else if (name_sv == "glCreateShaderProgramv") {
    return (VoidFn)fexfn_impl_libGL_glCreateShaderProgramv;
  } else if (name_sv == "glGetBufferPointerv") {
    return (VoidFn)fexfn_impl_libGL_glGetBufferPointerv;
  } else if (name_sv == "glGetBufferPointervARB") {
    return (VoidFn)fexfn_impl_libGL_glGetBufferPointervARB;
  } else if (name_sv == "glGetNamedBufferPointerv") {
    return (VoidFn)fexfn_impl_libGL_glGetNamedBufferPointerv;
  } else if (name_sv == "glGetNamedBufferPointervEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetNamedBufferPointervEXT;
  } else if (name_sv == "glGetPointerv") {
    return (VoidFn)fexfn_impl_libGL_glGetPointerv;
  } else if (name_sv == "glGetPointervEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetPointervEXT;
  } else if (name_sv == "glGetPointeri_vEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetPointeri_vEXT;
  } else if (name_sv == "glGetPointerIndexedvEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetPointerIndexedvEXT;
  } else if (name_sv == "glGetVariantPointervEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetVariantPointervEXT;
  } else if (name_sv == "glGetVertexAttribPointervARB") {
    return (VoidFn)fexfn_impl_libGL_glGetVertexAttribPointervARB;
  } else if (name_sv == "glGetVertexAttribPointerv") {
    return (VoidFn)fexfn_impl_libGL_glGetVertexAttribPointerv;
  } else if (name_sv == "glGetVertexAttribPointervNV") {
    return (VoidFn)fexfn_impl_libGL_glGetVertexAttribPointervNV;
  } else if (name_sv == "glGetVertexArrayPointeri_vEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetVertexArrayPointeri_vEXT;
  } else if (name_sv == "glGetVertexArrayPointervEXT") {
    return (VoidFn)fexfn_impl_libGL_glGetVertexArrayPointervEXT;
  } else if (name_sv == "glShaderSource") {
    return (VoidFn)fexfn_impl_libGL_glShaderSource;
  } else if (name_sv == "glShaderSourceARB") {
    return (VoidFn)fexfn_impl_libGL_glShaderSourceARB;
#ifdef IS_32BIT_THUNK
  } else if (name_sv == "glBindBuffersRange") {
    return (VoidFn)fexfn_impl_libGL_glBindBuffersRange;
  } else if (name_sv == "glBindVertexBuffers") {
    return (VoidFn)fexfn_impl_libGL_glBindVertexBuffers;
  } else if (name_sv == "glGetUniformIndices") {
    return (VoidFn)fexfn_impl_libGL_glGetUniformIndices;
  } else if (name_sv == "glVertexArrayVertexBuffers") {
    return (VoidFn)fexfn_impl_libGL_glVertexArrayVertexBuffers;
#endif
  } else if (name_sv == "glXChooseFBConfig") {
    return (VoidFn)fexfn_impl_libGL_glXChooseFBConfig;
  } else if (name_sv == "glXChooseFBConfigSGIX") {
    return (VoidFn)fexfn_impl_libGL_glXChooseFBConfigSGIX;
  } else if (name_sv == "glXGetCurrentDisplay") {
    return (VoidFn)fexfn_impl_libGL_glXGetCurrentDisplay;
  } else if (name_sv == "glXGetCurrentDisplayEXT") {
    return (VoidFn)fexfn_impl_libGL_glXGetCurrentDisplayEXT;
  } else if (name_sv == "glXGetFBConfigs") {
    return (VoidFn)fexfn_impl_libGL_glXGetFBConfigs;
  } else if (name_sv == "glXGetFBConfigFromVisualSGIX") {
    return (VoidFn)fexfn_impl_libGL_glXGetFBConfigFromVisualSGIX;
  } else if (name_sv == "glXGetVisualFromFBConfigSGIX") {
    return (VoidFn)fexfn_impl_libGL_glXGetVisualFromFBConfigSGIX;
  } else if (name_sv == "glXChooseVisual") {
    return (VoidFn)fexfn_impl_libGL_glXChooseVisual;
  } else if (name_sv == "glXCreateContext") {
    return (VoidFn)fexfn_impl_libGL_glXCreateContext;
  } else if (name_sv == "glXCreateGLXPixmap") {
    return (VoidFn)fexfn_impl_libGL_glXCreateGLXPixmap;
  } else if (name_sv == "glXCreateGLXPixmapMESA") {
    return (VoidFn)fexfn_impl_libGL_glXCreateGLXPixmapMESA;
  } else if (name_sv == "glXGetConfig") {
    return (VoidFn)fexfn_impl_libGL_glXGetConfig;
  } else if (name_sv == "glXGetVisualFromFBConfig") {
    return (VoidFn)fexfn_impl_libGL_glXGetVisualFromFBConfig;
#ifdef IS_32BIT_THUNK
  } else if (name_sv == "glXGetSelectedEvent") {
    return (VoidFn)fexfn_impl_libGL_glXGetSelectedEvent;
  } else if (name_sv == "glXGetSelectedEventSGIX") {
    return (VoidFn)fexfn_impl_libGL_glXGetSelectedEventSGIX;
#endif
  }
  return (VoidFn)glXGetProcAddress((const GLubyte*)name);
}

// TODO: unsigned int *glXEnumerateVideoDevicesNV (Display *dpy, int screen, int *nelements);


void fexfn_impl_libGL_glCompileShaderIncludeARB(GLuint a_0, GLsizei Count, guest_layout<const GLchar* const*> a_2, const GLint* a_3) {
#ifndef IS_32BIT_THUNK
  auto sources = a_2.force_get_host_pointer();
#else
  auto sources = (const char**)alloca(Count * sizeof(const char*));
  for (GLsizei i = 0; i < Count; ++i) {
    sources[i] = host_layout<const char* const> {a_2.get_pointer()[i]}.data;
  }
#endif
  return fexldr_ptr_libGL_glCompileShaderIncludeARB(a_0, Count, sources, a_3);
}

GLuint fexfn_impl_libGL_glCreateShaderProgramv(GLuint a_0, GLsizei count, guest_layout<const GLchar* const*> a_2) {
#ifndef IS_32BIT_THUNK
  auto sources = a_2.force_get_host_pointer();
#else
  auto sources = (const char**)alloca(count * sizeof(const char*));
  for (GLsizei i = 0; i < count; ++i) {
    sources[i] = host_layout<const char* const> {a_2.get_pointer()[i]}.data;
  }
#endif
  return fexldr_ptr_libGL_glCreateShaderProgramv(a_0, count, sources);
}

void fexfn_impl_libGL_glGetBufferPointerv(GLenum a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetBufferPointerv(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetBufferPointervARB(GLenum a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetBufferPointervARB(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetNamedBufferPointerv(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetNamedBufferPointerv(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetNamedBufferPointervEXT(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetNamedBufferPointervEXT(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetPointerv(GLenum a_0, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetPointerv(a_0, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetPointervEXT(GLenum a_0, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetPointervEXT(a_0, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetPointeri_vEXT(GLenum a_0, GLuint a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetPointeri_vEXT(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetPointerIndexedvEXT(GLenum a_0, GLuint a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetPointerIndexedvEXT(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVariantPointervEXT(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVariantPointervEXT(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVertexAttribPointervARB(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVertexAttribPointervARB(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVertexAttribPointerv(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVertexAttribPointerv(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVertexAttribPointervNV(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVertexAttribPointervNV(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVertexArrayPointeri_vEXT(GLuint a_0, GLuint a_1, GLenum a_2, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVertexArrayPointeri_vEXT(a_0, a_1, a_2, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glGetVertexArrayPointervEXT(GLuint a_0, GLenum a_1, guest_layout<void**> GuestOut) {
  void* HostOut;
  fexldr_ptr_libGL_glGetVertexArrayPointervEXT(a_0, a_1, &HostOut);
  *GuestOut.get_pointer() = to_guest(to_host_layout(HostOut));
}

void fexfn_impl_libGL_glShaderSource(GLuint a_0, GLsizei count, guest_layout<const GLchar* const*> a_2, const GLint* a_3) {
#ifndef IS_32BIT_THUNK
  auto sources = a_2.force_get_host_pointer();
#else
  auto sources = (const char**)alloca(count * sizeof(const char*));
  for (GLsizei i = 0; i < count; ++i) {
    sources[i] = host_layout<const char* const> {a_2.get_pointer()[i]}.data;
  }
#endif
  return fexldr_ptr_libGL_glShaderSource(a_0, count, sources, a_3);
}

void fexfn_impl_libGL_glShaderSourceARB(GLuint a_0, GLsizei count, guest_layout<const GLcharARB**> a_2, const GLint* a_3) {
#ifndef IS_32BIT_THUNK
  auto sources = a_2.force_get_host_pointer();
#else
  auto sources = (const char**)alloca(count * sizeof(const char*));
  for (GLsizei i = 0; i < count; ++i) {
    sources[i] = a_2.get_pointer()[i].force_get_host_pointer();
  }
#endif
  return fexldr_ptr_libGL_glShaderSourceARB(a_0, count, sources, a_3);
}

// Relocate data to guest heap so it can be called with XFree.
// The memory at the given host location will be de-allocated.
template<typename T>
guest_layout<T*> RelocateArrayToGuestHeap(T* Data, int NumItems) {
  if (!Data) {
    return guest_layout<T*> {.data = 0};
  }

  guest_layout<T*> GuestData;
  GuestData.data = reinterpret_cast<uintptr_t>(GuestMalloc(sizeof(guest_layout<T>) * NumItems));
  for (int Index = 0; Index < NumItems; ++Index) {
    GuestData.get_pointer()[Index] = to_guest(to_host_layout(Data[Index]));
  }
  x11_manager.HostXFree(Data);
  return GuestData;
}

// Maps to a host-side XVisualInfo, which must be XFree'ed by the caller.
static XVisualInfo* LookupHostVisualInfo(Display* HostDisplay, guest_layout<XVisualInfo*> GuestInfo) {
  if (!GuestInfo.data) {
    return nullptr;
  }

  int num_matches;
  auto HostInfo = host_layout<XVisualInfo> {*GuestInfo.get_pointer()}.data;
  auto ret = x11_manager.HostXGetVisualInfo(HostDisplay, uint64_t {VisualScreenMask | VisualIDMask}, &HostInfo, &num_matches);
  if (num_matches != 1) {
    fprintf(stderr, "ERROR: Did not find unique host XVisualInfo\n");
    std::abort();
  }
  return ret;
}

// Maps to a guest-side XVisualInfo and destroys the host argument.
static guest_layout<XVisualInfo*> MapToGuestVisualInfo(Display* HostDisplay, XVisualInfo* HostInfo) {
  if (!HostInfo) {
    return guest_layout<XVisualInfo*> {.data = 0};
  }

  auto guest_display = x11_manager.HostToGuestDisplay(HostDisplay);
#ifndef IS_32BIT_THUNK
  int num_matches;
  auto GuestInfo = to_guest(to_host_layout(*HostInfo));
#else
  GuestStackBumpAllocator GuestStack;
  auto& num_matches = *GuestStack.New<int>();
  auto& GuestInfo = *GuestStack.New<guest_layout<XVisualInfo>>(to_guest(to_host_layout(*HostInfo)));
#endif
  auto ret = x11_manager.GuestXGetVisualInfo(guest_display.get_pointer(), VisualScreenMask | VisualIDMask, &GuestInfo, &num_matches);

  if (num_matches != 1) {
    fprintf(stderr, "ERROR: Did not find unique guest XVisualInfo\n");
    std::abort();
  }

  // We effectively relocated the VisualInfo, so free the original one now
  x11_manager.HostXFree(HostInfo);
  guest_layout<XVisualInfo*> GuestRet;
  GuestRet.data = reinterpret_cast<uintptr_t>(ret);
  return GuestRet;
}

guest_layout<GLXFBConfig*> fexfn_impl_libGL_glXChooseFBConfig(Display* Display, int Screen, const int* Attributes, int* NumItems) {
  auto ret = fexldr_ptr_libGL_glXChooseFBConfig(Display, Screen, Attributes, NumItems);
  return RelocateArrayToGuestHeap(ret, *NumItems);
}

guest_layout<GLXFBConfigSGIX*> fexfn_impl_libGL_glXChooseFBConfigSGIX(Display* Display, int Screen, int* Attributes, int* NumItems) {
  auto ret = fexldr_ptr_libGL_glXChooseFBConfigSGIX(Display, Screen, Attributes, NumItems);
  return RelocateArrayToGuestHeap(ret, *NumItems);
}

guest_layout<_XDisplay*> fexfn_impl_libGL_glXGetCurrentDisplay() {
  auto ret = fexldr_ptr_libGL_glXGetCurrentDisplay();
  return x11_manager.HostToGuestDisplay(ret);
}

guest_layout<_XDisplay*> fexfn_impl_libGL_glXGetCurrentDisplayEXT() {
  auto ret = fexldr_ptr_libGL_glXGetCurrentDisplayEXT();
  return x11_manager.HostToGuestDisplay(ret);
}

guest_layout<GLXFBConfig*> fexfn_impl_libGL_glXGetFBConfigs(Display* Display, int Screen, int* NumItems) {
  auto ret = fexldr_ptr_libGL_glXGetFBConfigs(Display, Screen, NumItems);
  return RelocateArrayToGuestHeap(ret, *NumItems);
}

GLXFBConfigSGIX fexfn_impl_libGL_glXGetFBConfigFromVisualSGIX(Display* Display, guest_layout<XVisualInfo*> Info) {
  auto HostInfo = LookupHostVisualInfo(Display, Info);
  auto ret = fexldr_ptr_libGL_glXGetFBConfigFromVisualSGIX(Display, HostInfo);
  x11_manager.HostXFree(HostInfo);
  return ret;
}

guest_layout<XVisualInfo*> fexfn_impl_libGL_glXGetVisualFromFBConfigSGIX(Display* Display, GLXFBConfigSGIX Config) {
  return MapToGuestVisualInfo(Display, fexldr_ptr_libGL_glXGetVisualFromFBConfigSGIX(Display, Config));
}

guest_layout<XVisualInfo*> fexfn_impl_libGL_glXChooseVisual(Display* Display, int Screen, int* Attributes) {
  return MapToGuestVisualInfo(Display, fexldr_ptr_libGL_glXChooseVisual(Display, Screen, Attributes));
}

GLXContext fexfn_impl_libGL_glXCreateContext(Display* Display, guest_layout<XVisualInfo*> Info, GLXContext ShareList, Bool Direct) {
  auto HostInfo = LookupHostVisualInfo(Display, Info);
  auto ret = fexldr_ptr_libGL_glXCreateContext(Display, HostInfo, ShareList, Direct);
  x11_manager.HostXFree(HostInfo);
  return ret;
}

GLXPixmap fexfn_impl_libGL_glXCreateGLXPixmap(Display* Display, guest_layout<XVisualInfo*> Info, Pixmap Pixmap) {
  auto HostInfo = LookupHostVisualInfo(Display, Info);
  auto ret = fexldr_ptr_libGL_glXCreateGLXPixmap(Display, HostInfo, Pixmap);
  x11_manager.HostXFree(HostInfo);
  return ret;
}

GLXPixmap fexfn_impl_libGL_glXCreateGLXPixmapMESA(Display* Display, guest_layout<XVisualInfo*> Info, Pixmap Pixmap, Colormap Colormap) {
  auto HostInfo = LookupHostVisualInfo(Display, Info);
  auto ret = fexldr_ptr_libGL_glXCreateGLXPixmapMESA(Display, HostInfo, Pixmap, Colormap);
  x11_manager.HostXFree(HostInfo);
  return ret;
}

int fexfn_impl_libGL_glXGetConfig(Display* Display, guest_layout<XVisualInfo*> Info, int Attribute, int* Value) {
  auto HostInfo = LookupHostVisualInfo(Display, Info);
  auto ret = fexldr_ptr_libGL_glXGetConfig(Display, HostInfo, Attribute, Value);
  x11_manager.HostXFree(HostInfo);
  return ret;
}

guest_layout<XVisualInfo*> fexfn_impl_libGL_glXGetVisualFromFBConfig(Display* Display, GLXFBConfig Config) {
  return MapToGuestVisualInfo(Display, fexldr_ptr_libGL_glXGetVisualFromFBConfig(Display, Config));
}

#ifdef IS_32BIT_THUNK
void fexfn_impl_libGL_glBindBuffersRange(GLenum a_0, GLuint a_1, GLsizei Count, const GLuint* a_3, guest_layout<const int*> Offsets,
                                         guest_layout<const int*> Sizes) {
  auto HostOffsets = (GLintptr*)alloca(Count * sizeof(GLintptr));
  auto HostSizes = (GLsizeiptr*)alloca(Count * sizeof(GLsizeiptr));
  for (int i = 0; i < Count; ++i) {
    HostOffsets[i] = Offsets.get_pointer()[i].data;
    HostSizes[i] = Sizes.get_pointer()[i].data;
  }
  return fexldr_ptr_libGL_glBindBuffersRange(a_0, a_1, Count, a_3, HostOffsets, HostSizes);
}

void fexfn_impl_libGL_glBindVertexBuffers(GLuint a_0, GLsizei count, const GLuint* a_2, guest_layout<const int*> Offsets, const GLsizei* a_4) {
  auto HostOffsets = (GLintptr*)alloca(count * sizeof(GLintptr));
  for (int i = 0; i < count; ++i) {
    HostOffsets[i] = Offsets.get_pointer()[i].data;
  }
  fexldr_ptr_libGL_glBindVertexBuffers(a_0, count, a_2, HostOffsets, a_4);
}

void fexfn_impl_libGL_glGetUniformIndices(GLuint a_0, GLsizei Count, guest_layout<const GLchar* const*> Names, GLuint* a_3) {
  auto HostNames = (const GLchar**)alloca(Count * sizeof(GLintptr));
  for (int i = 0; i < Count; ++i) {
    HostNames[i] = host_layout<const char* const> {Names.get_pointer()[i]}.data;
  }
  fexldr_ptr_libGL_glGetUniformIndices(a_0, Count, HostNames, a_3);
}

void fexfn_impl_libGL_glVertexArrayVertexBuffers(GLuint a_0, GLuint a_1, GLsizei count, const GLuint* a_3, guest_layout<const int*> Offsets,
                                                 const GLsizei* a_5) {
  auto HostOffsets = (GLintptr*)alloca(count * sizeof(GLintptr));
  for (int i = 0; i < count; ++i) {
    HostOffsets[i] = Offsets.get_pointer()[i].data;
  }
  fexldr_ptr_libGL_glVertexArrayVertexBuffers(a_0, a_1, count, a_3, HostOffsets, a_5);
}

void fexfn_impl_libGL_glXGetSelectedEvent(Display* Display, GLXDrawable Drawable, guest_layout<uint32_t*> Mask) {
  unsigned long HostMask;
  fexldr_ptr_libGL_glXGetSelectedEvent(Display, Drawable, &HostMask);
  *Mask.get_pointer() = HostMask;
}
void fexfn_impl_libGL_glXGetSelectedEventSGIX(Display* Display, GLXDrawable Drawable, guest_layout<uint32_t*> Mask) {
  unsigned long HostMask;
  fexldr_ptr_libGL_glXGetSelectedEventSGIX(Display, Drawable, &HostMask);
  *Mask.get_pointer() = HostMask;
}
#endif

EXPORTS(libGL)


================================================
FILE: ThunkLibs/libGL/libGL_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#define GL_GLEXT_PROTOTYPES 1
#define GLX_GLXEXT_PROTOTYPES 1

#include <GL/glx.h>
#include <GL/glxext.h>
#include <GL/gl.h>
#include <GL/glext.h>

#undef GL_ARB_viewport_array
#include "glcorearb.h"

#include <type_traits>

template<auto>
struct fex_gen_config {
  unsigned version = 1;
};

template<>
struct fex_gen_config<glXGetProcAddress> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer {};

// internal use
void GL_SetGuestMalloc(uintptr_t, uintptr_t);
void GL_SetGuestXSync(uintptr_t, uintptr_t);
void GL_SetGuestXGetVisualInfo(uintptr_t, uintptr_t);
void GL_SetGuestXDisplayString(uintptr_t, uintptr_t);
template<>
struct fex_gen_config<GL_SetGuestMalloc> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};
template<>
struct fex_gen_config<GL_SetGuestXSync> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};
template<>
struct fex_gen_config<GL_SetGuestXGetVisualInfo> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};
template<>
struct fex_gen_config<GL_SetGuestXDisplayString> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};

template<typename>
struct fex_gen_type {};

// Assume void* always points to data with consistent layout.
// It's used in too many functions to annotate them all.
template<>
struct fex_gen_type<void> : fexgen::opaque_type {};

template<>
struct fex_gen_type<std::remove_pointer_t<GLXContext>> : fexgen::opaque_type {};
// NOTE: The data layout of this is almost the same between 64-bit and 32-bit,
//       but the total struct size is 4 bytes larger on 64-bit due to stricter
//       alignment requirements (8 vs 4 bytes). Since it's always allocated on
//       the host *and* never directly used in arrays, this is not a problem.
template<>
struct fex_gen_type<std::remove_pointer_t<GLXFBConfig>> : fexgen::opaque_type {};
template<>
struct fex_gen_type<std::remove_pointer_t<GLsync>> : fexgen::opaque_type {};

// NOTE: These should be opaque, but actually aren't because the respective libraries aren't thunked
template<>
struct fex_gen_type<_cl_context> : fexgen::opaque_type {};
template<>
struct fex_gen_type<_cl_event> : fexgen::opaque_type {};

// host_layout is manually customized for this. Mark as opaque to please the interface parser
template<>
struct fex_gen_type<_XDisplay> : fexgen::opaque_type {};

template<>
struct fex_gen_type<XVisualInfo> : fexgen::emit_layout_wrappers {};
template<>
struct fex_gen_type<Visual> : fexgen::opaque_type {}; // Used in XVisualInfo; treat as opaque

// Symbols queryable through glXGetProcAddr
namespace internal {
template<auto>
struct fex_gen_config : fexgen::generate_guest_symtable, fexgen::indirect_guest_calls {};

// Function, parameter index, parameter type [optional]
template<auto, int, typename = void>
struct fex_gen_param {};

template<>
struct fex_gen_config<glXQueryCurrentRendererStringMESA> {};
template<>
struct fex_gen_config<glXQueryRendererStringMESA> {};
template<>
struct fex_gen_config<glXGetContextIDEXT> {};
template<>
struct fex_gen_config<glXCreateContextWithConfigSGIX> {};
template<>
struct fex_gen_config<glXImportContextEXT> {};
template<>
struct fex_gen_config<glXGetCurrentReadDrawableSGI> {};
template<>
struct fex_gen_config<glXChooseFBConfigSGIX> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXChooseFBConfigSGIX, -1, GLXFBConfigSGIX*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetFBConfigFromVisualSGIX> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetFBConfigFromVisualSGIX, 1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXCreateGLXPbufferSGIX> {};
template<>
struct fex_gen_config<glXCreateGLXPixmapWithConfigSGIX> {};
template<>
struct fex_gen_config<glXSwapBuffersMscOML> {};
template<>
struct fex_gen_config<glXGetFBConfigAttribSGIX> {};
template<>
struct fex_gen_config<glXGetMscRateOML> {};
template<>
struct fex_gen_config<glXGetSwapIntervalMESA> {};
template<>
struct fex_gen_config<glXGetSyncValuesOML> {};
template<>
struct fex_gen_config<glXGetVideoSyncSGI> {};
template<>
struct fex_gen_config<glXMakeCurrentReadSGI> {};
template<>
struct fex_gen_config<glXQueryContextInfoEXT> {};
template<>
struct fex_gen_config<glXQueryCurrentRendererIntegerMESA> {};
template<>
struct fex_gen_config<glXQueryRendererIntegerMESA> {};
template<>
struct fex_gen_config<glXSwapIntervalMESA> {};
template<>
struct fex_gen_config<glXSwapIntervalSGI> {};
template<>
struct fex_gen_config<glXWaitForMscOML> {};
template<>
struct fex_gen_config<glXWaitForSbcOML> {};
template<>
struct fex_gen_config<glXWaitVideoSyncSGI> {};
template<>
struct fex_gen_config<glXBindTexImageEXT> {};
template<>
struct fex_gen_config<glXCopySubBufferMESA> {};
template<>
struct fex_gen_config<glXDestroyGLXPbufferSGIX> {};
template<>
struct fex_gen_config<glXFreeContextEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glXGetSelectedEventSGIX> {};
#else
template<>
struct fex_gen_config<glXGetSelectedEventSGIX> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetSelectedEventSGIX, 2, unsigned long*> : fexgen::ptr_passthrough {};
#endif

template<>
struct fex_gen_config<glXQueryGLXPbufferSGIX> {};
template<>
struct fex_gen_config<glXReleaseTexImageEXT> {};
template<>
struct fex_gen_config<glXSelectEventSGIX> {};
template<>
struct fex_gen_config<glXGetVisualFromFBConfigSGIX> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetVisualFromFBConfigSGIX, -1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetClientString> {};
template<>
struct fex_gen_config<glXQueryExtensionsString> {};
template<>
struct fex_gen_config<glXQueryServerString> {};
template<>
struct fex_gen_config<glXGetCurrentDisplay> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetCurrentDisplay, -1, _XDisplay*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXCreateContext> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXCreateContext, 1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXCreateNewContext> {};
template<>
struct fex_gen_config<glXGetCurrentContext> {};
template<>
struct fex_gen_config<glXGetCurrentDrawable> {};
template<>
struct fex_gen_config<glXGetCurrentReadDrawable> {};
template<>
struct fex_gen_config<glXChooseFBConfig> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXChooseFBConfig, -1, GLXFBConfig*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetFBConfigs> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetFBConfigs, -1, GLXFBConfig*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXCreatePbuffer> {};
template<>
struct fex_gen_config<glXCreateGLXPixmap> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXCreateGLXPixmap, 1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXCreatePixmap> {};
template<>
struct fex_gen_config<glXCreateWindow> {};
template<>
struct fex_gen_config<glXGetConfig> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetConfig, 1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetFBConfigAttrib> {};
template<>
struct fex_gen_config<glXIsDirect> {};
template<>
struct fex_gen_config<glXMakeContextCurrent> {};
template<>
struct fex_gen_config<glXMakeCurrent> {};
template<>
struct fex_gen_config<glXQueryContext> {};
template<>
struct fex_gen_config<glXQueryExtension> {};
template<>
struct fex_gen_config<glXQueryVersion> {};
template<>
struct fex_gen_config<glXCopyContext> {};
template<>
struct fex_gen_config<glXDestroyContext> {};
template<>
struct fex_gen_config<glXDestroyGLXPixmap> {};
template<>
struct fex_gen_config<glXDestroyPbuffer> {};
template<>
struct fex_gen_config<glXDestroyPixmap> {};
template<>
struct fex_gen_config<glXDestroyWindow> {};
#ifdef GLX_NV_vertex_array_range
template<>
struct fex_gen_config<glXAllocateMemoryNV> {};
template<>
struct fex_gen_config<glXFreeMemoryNV> {};
#endif
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glXGetSelectedEvent> {};
#else
template<>
struct fex_gen_config<glXGetSelectedEvent> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetSelectedEvent, 2, unsigned long*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<glXQueryDrawable> {};
template<>
struct fex_gen_config<glXSelectEvent> {};
template<>
struct fex_gen_config<glXSwapBuffers> {};
template<>
struct fex_gen_config<glXUseXFont> {};
template<>
struct fex_gen_config<glXWaitGL> {};
template<>
struct fex_gen_config<glXChooseVisual> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXChooseVisual, -1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetVisualFromFBConfig> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetVisualFromFBConfig, -1, XVisualInfo*> : fexgen::ptr_passthrough {};

// template<> struct fex_gen_config<glXCreateContextAttribs> {};
template<>
struct fex_gen_config<glXCreateContextAttribsARB> {};
template<>
struct fex_gen_config<glXSwapIntervalEXT> {};

template<>
struct fex_gen_config<glColorP3ui> {};
template<>
struct fex_gen_config<glColorP3uiv> {};
template<>
struct fex_gen_config<glColorP4ui> {};
template<>
struct fex_gen_config<glColorP4uiv> {};
template<>
struct fex_gen_config<glFogCoordd> {};
template<>
struct fex_gen_config<glFogCoorddv> {};
template<>
struct fex_gen_config<glFogCoordf> {};
template<>
struct fex_gen_config<glFogCoordfv> {};
template<>
struct fex_gen_config<glFogCoordPointer> {};
template<>
struct fex_gen_config<glGetnColorTableARB> {};
template<>
struct fex_gen_config<glGetnConvolutionFilterARB> {};
template<>
struct fex_gen_config<glGetnHistogramARB> {};
template<>
struct fex_gen_config<glGetnColorTable> {};
template<>
struct fex_gen_config<glGetnConvolutionFilter> {};
template<>
struct fex_gen_config<glGetnHistogram> {};
template<>
struct fex_gen_config<glGetnMapdv> {};
template<>
struct fex_gen_config<glGetnMapfv> {};
template<>
struct fex_gen_config<glGetnMapiv> {};
template<>
struct fex_gen_config<glGetnPixelMapfv> {};
template<>
struct fex_gen_config<glGetnPixelMapuiv> {};
template<>
struct fex_gen_config<glGetnPixelMapusv> {};
template<>
struct fex_gen_config<glGetnPolygonStipple> {};
template<>
struct fex_gen_config<glGetnSeparableFilter> {};
template<>
struct fex_gen_config<glGetnMinmax> {};
template<>
struct fex_gen_config<glGetnMapdvARB> {};
template<>
struct fex_gen_config<glGetnMapfvARB> {};
template<>
struct fex_gen_config<glGetnMapivARB> {};
template<>
struct fex_gen_config<glGetnMinmaxARB> {};
template<>
struct fex_gen_config<glGetnPixelMapfvARB> {};
template<>
struct fex_gen_config<glGetnPixelMapuivARB> {};
template<>
struct fex_gen_config<glGetnPixelMapusvARB> {};
template<>
struct fex_gen_config<glGetnPolygonStippleARB> {};
template<>
struct fex_gen_config<glGetnSeparableFilterARB> {};
template<>
struct fex_gen_config<glMultiTexCoordP1ui> {};
template<>
struct fex_gen_config<glMultiTexCoordP1uiv> {};
template<>
struct fex_gen_config<glMultiTexCoordP2ui> {};
template<>
struct fex_gen_config<glMultiTexCoordP2uiv> {};
template<>
struct fex_gen_config<glMultiTexCoordP3ui> {};
template<>
struct fex_gen_config<glMultiTexCoordP3uiv> {};
template<>
struct fex_gen_config<glMultiTexCoordP4ui> {};
template<>
struct fex_gen_config<glMultiTexCoordP4uiv> {};
template<>
struct fex_gen_config<glNormalP3ui> {};
template<>
struct fex_gen_config<glNormalP3uiv> {};
template<>
struct fex_gen_config<glSecondaryColor3b> {};
template<>
struct fex_gen_config<glSecondaryColor3bv> {};
template<>
struct fex_gen_config<glSecondaryColor3d> {};
template<>
struct fex_gen_config<glSecondaryColor3dv> {};
template<>
struct fex_gen_config<glSecondaryColor3f> {};
template<>
struct fex_gen_config<glSecondaryColor3fv> {};
template<>
struct fex_gen_config<glSecondaryColor3i> {};
template<>
struct fex_gen_config<glSecondaryColor3iv> {};
template<>
struct fex_gen_config<glSecondaryColor3s> {};
template<>
struct fex_gen_config<glSecondaryColor3sv> {};
template<>
struct fex_gen_config<glSecondaryColor3ub> {};
template<>
struct fex_gen_config<glSecondaryColor3ubv> {};
template<>
struct fex_gen_config<glSecondaryColor3ui> {};
template<>
struct fex_gen_config<glSecondaryColor3uiv> {};
template<>
struct fex_gen_config<glSecondaryColor3us> {};
template<>
struct fex_gen_config<glSecondaryColor3usv> {};
template<>
struct fex_gen_config<glSecondaryColorP3ui> {};
template<>
struct fex_gen_config<glSecondaryColorP3uiv> {};
template<>
struct fex_gen_config<glSecondaryColorPointer> {};
template<>
struct fex_gen_config<glTexCoordP1ui> {};
template<>
struct fex_gen_config<glTexCoordP1uiv> {};
template<>
struct fex_gen_config<glTexCoordP2ui> {};
template<>
struct fex_gen_config<glTexCoordP2uiv> {};
template<>
struct fex_gen_config<glTexCoordP3ui> {};
template<>
struct fex_gen_config<glTexCoordP3uiv> {};
template<>
struct fex_gen_config<glTexCoordP4ui> {};
template<>
struct fex_gen_config<glTexCoordP4uiv> {};
template<>
struct fex_gen_config<glVertexP2ui> {};
template<>
struct fex_gen_config<glVertexP2uiv> {};
template<>
struct fex_gen_config<glVertexP3ui> {};
template<>
struct fex_gen_config<glVertexP3uiv> {};
template<>
struct fex_gen_config<glVertexP4ui> {};
template<>
struct fex_gen_config<glVertexP4uiv> {};
template<>
struct fex_gen_config<glWindowPos2d> {};
template<>
struct fex_gen_config<glWindowPos2dv> {};
template<>
struct fex_gen_config<glWindowPos2f> {};
template<>
struct fex_gen_config<glWindowPos2fv> {};
template<>
struct fex_gen_config<glWindowPos2i> {};
template<>
struct fex_gen_config<glWindowPos2iv> {};
template<>
struct fex_gen_config<glWindowPos2s> {};
template<>
struct fex_gen_config<glWindowPos2sv> {};
template<>
struct fex_gen_config<glWindowPos3d> {};
template<>
struct fex_gen_config<glWindowPos3dv> {};
template<>
struct fex_gen_config<glWindowPos3f> {};
template<>
struct fex_gen_config<glWindowPos3fv> {};
template<>
struct fex_gen_config<glWindowPos3i> {};
template<>
struct fex_gen_config<glWindowPos3iv> {};
template<>
struct fex_gen_config<glWindowPos3s> {};
template<>
struct fex_gen_config<glWindowPos3sv> {};
template<>
struct fex_gen_config<glGetString> {};
template<>
struct fex_gen_config<glGetStringi> {};
template<>
struct fex_gen_config<glQueryMatrixxOES> {};
template<>
struct fex_gen_config<glAcquireKeyedMutexWin32EXT> {};
template<>
struct fex_gen_config<glAreProgramsResidentNV> {};
template<>
struct fex_gen_config<glAreTexturesResidentEXT> {};
template<>
struct fex_gen_config<glAreTexturesResident> {};
template<>
struct fex_gen_config<glIsAsyncMarkerSGIX> {};
template<>
struct fex_gen_config<glIsBufferARB> {};
template<>
struct fex_gen_config<glIsBuffer> {};
template<>
struct fex_gen_config<glIsBufferResidentNV> {};
template<>
struct fex_gen_config<glIsCommandListNV> {};
template<>
struct fex_gen_config<glIsEnabled> {};
template<>
struct fex_gen_config<glIsEnabledi> {};
template<>
struct fex_gen_config<glIsEnabledIndexedEXT> {};
template<>
struct fex_gen_config<glIsFenceAPPLE> {};
template<>
struct fex_gen_config<glIsFenceNV> {};
template<>
struct fex_gen_config<glIsFramebufferEXT> {};
template<>
struct fex_gen_config<glIsFramebuffer> {};
template<>
struct fex_gen_config<glIsImageHandleResidentARB> {};
template<>
struct fex_gen_config<glIsImageHandleResidentNV> {};
template<>
struct fex_gen_config<glIsList> {};
template<>
struct fex_gen_config<glIsMemoryObjectEXT> {};
template<>
struct fex_gen_config<glIsNameAMD> {};
template<>
struct fex_gen_config<glIsNamedBufferResidentNV> {};
template<>
struct fex_gen_config<glIsNamedStringARB> {};
template<>
struct fex_gen_config<glIsObjectBufferATI> {};
template<>
struct fex_gen_config<glIsOcclusionQueryNV> {};
template<>
struct fex_gen_config<glIsPathNV> {};
template<>
struct fex_gen_config<glIsPointInFillPathNV> {};
template<>
struct fex_gen_config<glIsPointInStrokePathNV> {};
template<>
struct fex_gen_config<glIsProgramARB> {};
template<>
struct fex_gen_config<glIsProgram> {};
template<>
struct fex_gen_config<glIsProgramNV> {};
template<>
struct fex_gen_config<glIsProgramPipeline> {};
template<>
struct fex_gen_config<glIsQueryARB> {};
template<>
struct fex_gen_config<glIsQuery> {};
template<>
struct fex_gen_config<glIsRenderbufferEXT> {};
template<>
struct fex_gen_config<glIsRenderbuffer> {};
template<>
struct fex_gen_config<glIsSampler> {};
template<>
struct fex_gen_config<glIsSemaphoreEXT> {};
template<>
struct fex_gen_config<glIsShader> {};
template<>
struct fex_gen_config<glIsStateNV> {};
template<>
struct fex_gen_config<glIsSync> {};
template<>
struct fex_gen_config<glIsTextureEXT> {};
template<>
struct fex_gen_config<glIsTexture> {};
template<>
struct fex_gen_config<glIsTextureHandleResidentARB> {};
template<>
struct fex_gen_config<glIsTextureHandleResidentNV> {};
template<>
struct fex_gen_config<glIsTransformFeedback> {};
template<>
struct fex_gen_config<glIsTransformFeedbackNV> {};
template<>
struct fex_gen_config<glIsVariantEnabledEXT> {};
template<>
struct fex_gen_config<glIsVertexArrayAPPLE> {};
template<>
struct fex_gen_config<glIsVertexArray> {};
template<>
struct fex_gen_config<glIsVertexAttribEnabledAPPLE> {};
template<>
struct fex_gen_config<glPointAlongPathNV> {};
template<>
struct fex_gen_config<glReleaseKeyedMutexWin32EXT> {};
template<>
struct fex_gen_config<glTestFenceAPPLE> {};
template<>
struct fex_gen_config<glTestFenceNV> {};
template<>
struct fex_gen_config<glTestObjectAPPLE> {};
template<>
struct fex_gen_config<glUnmapBufferARB> {};
template<>
struct fex_gen_config<glUnmapBuffer> {};
template<>
struct fex_gen_config<glUnmapNamedBufferEXT> {};
template<>
struct fex_gen_config<glUnmapNamedBuffer> {};
template<>
struct fex_gen_config<glVDPAUIsSurfaceNV> {};
template<>
struct fex_gen_config<glCheckFramebufferStatusEXT> {};
template<>
struct fex_gen_config<glCheckFramebufferStatus> {};
template<>
struct fex_gen_config<glCheckNamedFramebufferStatusEXT> {};
template<>
struct fex_gen_config<glCheckNamedFramebufferStatus> {};
template<>
struct fex_gen_config<glClientWaitSync> {};
template<>
struct fex_gen_config<glGetError> {};
template<>
struct fex_gen_config<glGetGraphicsResetStatus> {};
template<>
struct fex_gen_config<glGetGraphicsResetStatusARB> {};
template<>
struct fex_gen_config<glObjectPurgeableAPPLE> {};
template<>
struct fex_gen_config<glObjectUnpurgeableAPPLE> {};
template<>
struct fex_gen_config<glPathGlyphIndexArrayNV> {};
template<>
struct fex_gen_config<glPathGlyphIndexRangeNV> {};
template<>
struct fex_gen_config<glPathMemoryGlyphIndexArrayNV> {};
template<>
struct fex_gen_config<glVideoCaptureNV> {};
template<>
struct fex_gen_config<glGetPathLengthNV> {};
template<>
struct fex_gen_config<glCreateProgramObjectARB> {};
template<>
struct fex_gen_config<glCreateShaderObjectARB> {};
template<>
struct fex_gen_config<glGetHandleARB> {};
template<>
struct fex_gen_config<glFinishAsyncSGIX> {};
template<>
struct fex_gen_config<glGetAttribLocationARB> {};
template<>
struct fex_gen_config<glGetAttribLocation> {};
template<>
struct fex_gen_config<glGetFragDataIndex> {};
template<>
struct fex_gen_config<glGetFragDataLocationEXT> {};
template<>
struct fex_gen_config<glGetFragDataLocation> {};
template<>
struct fex_gen_config<glGetInstrumentsSGIX> {};
template<>
struct fex_gen_config<glGetProgramResourceLocation> {};
template<>
struct fex_gen_config<glGetProgramResourceLocationIndex> {};
template<>
struct fex_gen_config<glGetSubroutineUniformLocation> {};
template<>
struct fex_gen_config<glGetUniformBufferSizeEXT> {};
template<>
struct fex_gen_config<glGetUniformLocationARB> {};
template<>
struct fex_gen_config<glGetUniformLocation> {};
template<>
struct fex_gen_config<glGetVaryingLocationNV> {};
template<>
struct fex_gen_config<glPollAsyncSGIX> {};
template<>
struct fex_gen_config<glPollInstrumentsSGIX> {};
template<>
struct fex_gen_config<glQueryResourceNV> {};
template<>
struct fex_gen_config<glRenderMode> {};
template<>
struct fex_gen_config<glGetUniformOffsetEXT> {};
template<>
struct fex_gen_config<glCreateSyncFromCLeventARB> {};
template<>
struct fex_gen_config<glFenceSync> {};
template<>
struct fex_gen_config<glImportSyncEXT> {};
template<>
struct fex_gen_config<glGetImageHandleARB> {};
template<>
struct fex_gen_config<glGetImageHandleNV> {};
template<>
struct fex_gen_config<glGetTextureHandleARB> {};
template<>
struct fex_gen_config<glGetTextureHandleNV> {};
template<>
struct fex_gen_config<glGetTextureSamplerHandleARB> {};
template<>
struct fex_gen_config<glGetTextureSamplerHandleNV> {};
template<>
struct fex_gen_config<glAsyncCopyBufferSubDataNVX> {};
template<>
struct fex_gen_config<glAsyncCopyImageSubDataNVX> {};
template<>
struct fex_gen_config<glBindLightParameterEXT> {};
template<>
struct fex_gen_config<glBindMaterialParameterEXT> {};
template<>
struct fex_gen_config<glBindParameterEXT> {};
template<>
struct fex_gen_config<glBindTexGenParameterEXT> {};
template<>
struct fex_gen_config<glBindTextureUnitParameterEXT> {};
template<>
struct fex_gen_config<glCreateProgram> {};
template<>
struct fex_gen_config<glCreateProgressFenceNVX> {};
template<>
struct fex_gen_config<glCreateShader> {};
template<>
struct fex_gen_config<glCreateShaderProgramEXT> {};
template<>
struct fex_gen_config<glCreateShaderProgramv> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glCreateShaderProgramv, 2, const GLchar* const*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGenAsyncMarkersSGIX> {};
template<>
struct fex_gen_config<glGenFragmentShadersATI> {};
template<>
struct fex_gen_config<glGenLists> {};
template<>
struct fex_gen_config<glGenPathsNV> {};
template<>
struct fex_gen_config<glGenSymbolsEXT> {};
template<>
struct fex_gen_config<glGenVertexShadersEXT> {};
template<>
struct fex_gen_config<glGetCommandHeaderNV> {};
template<>
struct fex_gen_config<glGetDebugMessageLogAMD> {};
template<>
struct fex_gen_config<glGetDebugMessageLogARB> {};
template<>
struct fex_gen_config<glGetDebugMessageLog> {};
template<>
struct fex_gen_config<glGetProgramResourceIndex> {};
template<>
struct fex_gen_config<glGetSubroutineIndex> {};
template<>
struct fex_gen_config<glGetUniformBlockIndex> {};
template<>
struct fex_gen_config<glNewObjectBufferATI> {};
template<>
struct fex_gen_config<glGetStageIndexNV> {};
template<>
struct fex_gen_config<glVDPAURegisterOutputSurfaceNV> {};
template<>
struct fex_gen_config<glVDPAURegisterVideoSurfaceNV> {};
template<>
struct fex_gen_config<glVDPAURegisterVideoSurfaceWithPictureStructureNV> {};
template<>
struct fex_gen_config<glAccum> {};
template<>
struct fex_gen_config<glAccumxOES> {};
template<>
struct fex_gen_config<glActiveProgramEXT> {};
template<>
struct fex_gen_config<glActiveShaderProgram> {};
template<>
struct fex_gen_config<glActiveStencilFaceEXT> {};
template<>
struct fex_gen_config<glActiveTextureARB> {};
template<>
struct fex_gen_config<glActiveTexture> {};
template<>
struct fex_gen_config<glActiveVaryingNV> {};
template<>
struct fex_gen_config<glAlphaFragmentOp1ATI> {};
template<>
struct fex_gen_config<glAlphaFragmentOp2ATI> {};
template<>
struct fex_gen_config<glAlphaFragmentOp3ATI> {};
template<>
struct fex_gen_config<glAlphaFunc> {};
template<>
struct fex_gen_config<glAlphaFuncxOES> {};
template<>
struct fex_gen_config<glAlphaToCoverageDitherControlNV> {};
template<>
struct fex_gen_config<glApplyFramebufferAttachmentCMAAINTEL> {};
template<>
struct fex_gen_config<glApplyTextureEXT> {};
template<>
struct fex_gen_config<glArrayElementEXT> {};
template<>
struct fex_gen_config<glArrayElement> {};
template<>
struct fex_gen_config<glArrayObjectATI> {};
template<>
struct fex_gen_config<glAsyncMarkerSGIX> {};
template<>
struct fex_gen_config<glAttachObjectARB> {};
template<>
struct fex_gen_config<glAttachShader> {};
template<>
struct fex_gen_config<glBeginConditionalRender> {};
template<>
struct fex_gen_config<glBeginConditionalRenderNV> {};
template<>
struct fex_gen_config<glBeginConditionalRenderNVX> {};
template<>
struct fex_gen_config<glBeginFragmentShaderATI> {};
template<>
struct fex_gen_config<glBegin> {};
template<>
struct fex_gen_config<glBeginOcclusionQueryNV> {};
template<>
struct fex_gen_config<glBeginPerfMonitorAMD> {};
template<>
struct fex_gen_config<glBeginPerfQueryINTEL> {};
template<>
struct fex_gen_config<glBeginQueryARB> {};
template<>
struct fex_gen_config<glBeginQuery> {};
template<>
struct fex_gen_config<glBeginQueryIndexed> {};
template<>
struct fex_gen_config<glBeginTransformFeedbackEXT> {};
template<>
struct fex_gen_config<glBeginTransformFeedback> {};
template<>
struct fex_gen_config<glBeginTransformFeedbackNV> {};
template<>
struct fex_gen_config<glBeginVertexShaderEXT> {};
template<>
struct fex_gen_config<glBeginVideoCaptureNV> {};
template<>
struct fex_gen_config<glBindAttribLocationARB> {};
template<>
struct fex_gen_config<glBindAttribLocation> {};
template<>
struct fex_gen_config<glBindBufferARB> {};
template<>
struct fex_gen_config<glBindBufferBaseEXT> {};
template<>
struct fex_gen_config<glBindBufferBase> {};
template<>
struct fex_gen_config<glBindBufferBaseNV> {};
template<>
struct fex_gen_config<glBindBuffer> {};
template<>
struct fex_gen_config<glBindBufferOffsetEXT> {};
template<>
struct fex_gen_config<glBindBufferOffsetNV> {};
template<>
struct fex_gen_config<glBindBufferRangeEXT> {};
template<>
struct fex_gen_config<glBindBufferRange> {};
template<>
struct fex_gen_config<glBindBufferRangeNV> {};
template<>
struct fex_gen_config<glBindBuffersBase> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glBindBuffersRange> {};
#else
template<>
struct fex_gen_config<glBindBuffersRange> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glBindBuffersRange, 4, const GLintptr*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<glBindBuffersRange, 5, const GLsizeiptr*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<glBindFragDataLocationEXT> {};
template<>
struct fex_gen_config<glBindFragDataLocation> {};
template<>
struct fex_gen_config<glBindFragDataLocationIndexed> {};
template<>
struct fex_gen_config<glBindFragmentShaderATI> {};
template<>
struct fex_gen_config<glBindFramebufferEXT> {};
template<>
struct fex_gen_config<glBindFramebuffer> {};
template<>
struct fex_gen_config<glBindImageTextureEXT> {};
template<>
struct fex_gen_config<glBindImageTexture> {};
template<>
struct fex_gen_config<glBindImageTextures> {};
template<>
struct fex_gen_config<glBindMultiTextureEXT> {};
template<>
struct fex_gen_config<glBindProgramARB> {};
template<>
struct fex_gen_config<glBindProgramNV> {};
template<>
struct fex_gen_config<glBindProgramPipeline> {};
template<>
struct fex_gen_config<glBindRenderbufferEXT> {};
template<>
struct fex_gen_config<glBindRenderbuffer> {};
template<>
struct fex_gen_config<glBindSampler> {};
template<>
struct fex_gen_config<glBindSamplers> {};
template<>
struct fex_gen_config<glBindShadingRateImageNV> {};
template<>
struct fex_gen_config<glBindTextureEXT> {};
template<>
struct fex_gen_config<glBindTexture> {};
template<>
struct fex_gen_config<glBindTextures> {};
template<>
struct fex_gen_config<glBindTextureUnit> {};
template<>
struct fex_gen_config<glBindTransformFeedback> {};
template<>
struct fex_gen_config<glBindTransformFeedbackNV> {};
template<>
struct fex_gen_config<glBindVertexArrayAPPLE> {};
template<>
struct fex_gen_config<glBindVertexArray> {};
template<>
struct fex_gen_config<glBindVertexBuffer> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glBindVertexBuffers> {};
#else
template<>
struct fex_gen_config<glBindVertexBuffers> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glBindVertexBuffers, 3, const GLintptr*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<glBindVertexShaderEXT> {};
template<>
struct fex_gen_config<glBindVideoCaptureStreamBufferNV> {};
template<>
struct fex_gen_config<glBindVideoCaptureStreamTextureNV> {};
template<>
struct fex_gen_config<glBinormal3bEXT> {};
template<>
struct fex_gen_config<glBinormal3bvEXT> {};
template<>
struct fex_gen_config<glBinormal3dEXT> {};
template<>
struct fex_gen_config<glBinormal3dvEXT> {};
template<>
struct fex_gen_config<glBinormal3fEXT> {};
template<>
struct fex_gen_config<glBinormal3fvEXT> {};
template<>
struct fex_gen_config<glBinormal3iEXT> {};
template<>
struct fex_gen_config<glBinormal3ivEXT> {};
template<>
struct fex_gen_config<glBinormal3sEXT> {};
template<>
struct fex_gen_config<glBinormal3svEXT> {};
template<>
struct fex_gen_config<glBinormalPointerEXT> {};
template<>
struct fex_gen_config<glBitmap> {};
template<>
struct fex_gen_config<glBitmapxOES> {};
template<>
struct fex_gen_config<glBlendBarrierKHR> {};
template<>
struct fex_gen_config<glBlendBarrierNV> {};
template<>
struct fex_gen_config<glBlendColorEXT> {};
template<>
struct fex_gen_config<glBlendColor> {};
template<>
struct fex_gen_config<glBlendColorxOES> {};
template<>
struct fex_gen_config<glBlendEquationEXT> {};
template<>
struct fex_gen_config<glBlendEquation> {};
template<>
struct fex_gen_config<glBlendEquationiARB> {};
template<>
struct fex_gen_config<glBlendEquationi> {};
template<>
struct fex_gen_config<glBlendEquationIndexedAMD> {};
template<>
struct fex_gen_config<glBlendEquationSeparateEXT> {};
template<>
struct fex_gen_config<glBlendEquationSeparate> {};
template<>
struct fex_gen_config<glBlendEquationSeparateiARB> {};
template<>
struct fex_gen_config<glBlendEquationSeparatei> {};
template<>
struct fex_gen_config<glBlendEquationSeparateIndexedAMD> {};
template<>
struct fex_gen_config<glBlendFunc> {};
template<>
struct fex_gen_config<glBlendFunciARB> {};
template<>
struct fex_gen_config<glBlendFunci> {};
template<>
struct fex_gen_config<glBlendFuncIndexedAMD> {};
template<>
struct fex_gen_config<glBlendFuncSeparateEXT> {};
template<>
struct fex_gen_config<glBlendFuncSeparate> {};
template<>
struct fex_gen_config<glBlendFuncSeparateiARB> {};
template<>
struct fex_gen_config<glBlendFuncSeparatei> {};
template<>
struct fex_gen_config<glBlendFuncSeparateIndexedAMD> {};
template<>
struct fex_gen_config<glBlendFuncSeparateINGR> {};
template<>
struct fex_gen_config<glBlendParameteriNV> {};
template<>
struct fex_gen_config<glBlitFramebufferEXT> {};
template<>
struct fex_gen_config<glBlitFramebuffer> {};
template<>
struct fex_gen_config<glBlitNamedFramebuffer> {};
template<>
struct fex_gen_config<glBufferAddressRangeNV> {};
template<>
struct fex_gen_config<glBufferAttachMemoryNV> {};
template<>
struct fex_gen_config<glBufferDataARB> {};
template<>
struct fex_gen_config<glBufferData> {};
template<>
struct fex_gen_config<glBufferPageCommitmentARB> {};
template<>
struct fex_gen_config<glBufferParameteriAPPLE> {};
template<>
struct fex_gen_config<glBufferStorageExternalEXT> {};
template<>
struct fex_gen_config<glBufferStorage> {};
template<>
struct fex_gen_config<glBufferStorageMemEXT> {};
template<>
struct fex_gen_config<glBufferSubDataARB> {};
template<>
struct fex_gen_config<glBufferSubData> {};
template<>
struct fex_gen_config<glCallCommandListNV> {};
template<>
struct fex_gen_config<glCallList> {};
template<>
struct fex_gen_config<glCallLists> {};
template<>
struct fex_gen_config<glClampColorARB> {};
template<>
struct fex_gen_config<glClampColor> {};
template<>
struct fex_gen_config<glClearAccum> {};
template<>
struct fex_gen_config<glClearAccumxOES> {};
template<>
struct fex_gen_config<glClearBufferData> {};
template<>
struct fex_gen_config<glClearBufferfi> {};
template<>
struct fex_gen_config<glClearBufferfv> {};
template<>
struct fex_gen_config<glClearBufferiv> {};
template<>
struct fex_gen_config<glClearBufferSubData> {};
template<>
struct fex_gen_config<glClearBufferuiv> {};
template<>
struct fex_gen_config<glClearColor> {};
template<>
struct fex_gen_config<glClearColorIiEXT> {};
template<>
struct fex_gen_config<glClearColorIuiEXT> {};
template<>
struct fex_gen_config<glClearColorxOES> {};
template<>
struct fex_gen_config<glClearDepthdNV> {};
template<>
struct fex_gen_config<glClearDepthf> {};
template<>
struct fex_gen_config<glClearDepthfOES> {};
template<>
struct fex_gen_config<glClearDepth> {};
template<>
struct fex_gen_config<glClearDepthxOES> {};
template<>
struct fex_gen_config<glClear> {};
template<>
struct fex_gen_config<glClearIndex> {};
template<>
struct fex_gen_config<glClearNamedBufferDataEXT> {};
template<>
struct fex_gen_config<glClearNamedBufferData> {};
template<>
struct fex_gen_config<glClearNamedBufferSubDataEXT> {};
template<>
struct fex_gen_config<glClearNamedBufferSubData> {};
template<>
struct fex_gen_config<glClearNamedFramebufferfi> {};
template<>
struct fex_gen_config<glClearNamedFramebufferfv> {};
template<>
struct fex_gen_config<glClearNamedFramebufferiv> {};
template<>
struct fex_gen_config<glClearNamedFramebufferuiv> {};
template<>
struct fex_gen_config<glClearStencil> {};
template<>
struct fex_gen_config<glClearTexImage> {};
template<>
struct fex_gen_config<glClearTexSubImage> {};
template<>
struct fex_gen_config<glClientActiveTextureARB> {};
template<>
struct fex_gen_config<glClientActiveTexture> {};
template<>
struct fex_gen_config<glClientActiveVertexStreamATI> {};
template<>
struct fex_gen_config<glClientAttribDefaultEXT> {};
template<>
struct fex_gen_config<glClientWaitSemaphoreui64NVX> {};
template<>
struct fex_gen_config<glClipControl> {};
template<>
struct fex_gen_config<glClipPlanefOES> {};
template<>
struct fex_gen_config<glClipPlane> {};
template<>
struct fex_gen_config<glClipPlanexOES> {};
template<>
struct fex_gen_config<glColor3b> {};
template<>
struct fex_gen_config<glColor3bv> {};
template<>
struct fex_gen_config<glColor3d> {};
template<>
struct fex_gen_config<glColor3dv> {};
template<>
struct fex_gen_config<glColor3f> {};
template<>
struct fex_gen_config<glColor3fv> {};
template<>
struct fex_gen_config<glColor3fVertex3fSUN> {};
template<>
struct fex_gen_config<glColor3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glColor3hNV> {};
template<>
struct fex_gen_config<glColor3hvNV> {};
template<>
struct fex_gen_config<glColor3i> {};
template<>
struct fex_gen_config<glColor3iv> {};
template<>
struct fex_gen_config<glColor3s> {};
template<>
struct fex_gen_config<glColor3sv> {};
template<>
struct fex_gen_config<glColor3ub> {};
template<>
struct fex_gen_config<glColor3ubv> {};
template<>
struct fex_gen_config<glColor3ui> {};
template<>
struct fex_gen_config<glColor3uiv> {};
template<>
struct fex_gen_config<glColor3us> {};
template<>
struct fex_gen_config<glColor3usv> {};
template<>
struct fex_gen_config<glColor3xOES> {};
template<>
struct fex_gen_config<glColor3xvOES> {};
template<>
struct fex_gen_config<glColor4b> {};
template<>
struct fex_gen_config<glColor4bv> {};
template<>
struct fex_gen_config<glColor4d> {};
template<>
struct fex_gen_config<glColor4dv> {};
template<>
struct fex_gen_config<glColor4f> {};
template<>
struct fex_gen_config<glColor4fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glColor4fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glColor4fv> {};
template<>
struct fex_gen_config<glColor4hNV> {};
template<>
struct fex_gen_config<glColor4hvNV> {};
template<>
struct fex_gen_config<glColor4i> {};
template<>
struct fex_gen_config<glColor4iv> {};
template<>
struct fex_gen_config<glColor4s> {};
template<>
struct fex_gen_config<glColor4sv> {};
template<>
struct fex_gen_config<glColor4ub> {};
template<>
struct fex_gen_config<glColor4ubv> {};
template<>
struct fex_gen_config<glColor4ubVertex2fSUN> {};
template<>
struct fex_gen_config<glColor4ubVertex2fvSUN> {};
template<>
struct fex_gen_config<glColor4ubVertex3fSUN> {};
template<>
struct fex_gen_config<glColor4ubVertex3fvSUN> {};
template<>
struct fex_gen_config<glColor4ui> {};
template<>
struct fex_gen_config<glColor4uiv> {};
template<>
struct fex_gen_config<glColor4us> {};
template<>
struct fex_gen_config<glColor4usv> {};
template<>
struct fex_gen_config<glColor4xOES> {};
template<>
struct fex_gen_config<glColor4xvOES> {};
template<>
struct fex_gen_config<glColorFormatNV> {};
template<>
struct fex_gen_config<glColorFragmentOp1ATI> {};
template<>
struct fex_gen_config<glColorFragmentOp2ATI> {};
template<>
struct fex_gen_config<glColorFragmentOp3ATI> {};
template<>
struct fex_gen_config<glColorMask> {};
template<>
struct fex_gen_config<glColorMaski> {};
template<>
struct fex_gen_config<glColorMaskIndexedEXT> {};
template<>
struct fex_gen_config<glColorMaterial> {};
template<>
struct fex_gen_config<glColorPointerEXT> {};
template<>
struct fex_gen_config<glColorPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glColorPointerListIBM> {};
template<>
struct fex_gen_param<glColorPointerListIBM, 3, const void**> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glColorPointervINTEL> {};
template<>
struct fex_gen_param<glColorPointervINTEL, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glColorSubTableEXT> {};
template<>
struct fex_gen_config<glColorSubTable> {};
template<>
struct fex_gen_config<glColorTableEXT> {};
template<>
struct fex_gen_config<glColorTable> {};
template<>
struct fex_gen_config<glColorTableParameterfv> {};
template<>
struct fex_gen_config<glColorTableParameterfvSGI> {};
template<>
struct fex_gen_config<glColorTableParameteriv> {};
template<>
struct fex_gen_config<glColorTableParameterivSGI> {};
template<>
struct fex_gen_config<glColorTableSGI> {};
template<>
struct fex_gen_config<glCombinerInputNV> {};
template<>
struct fex_gen_config<glCombinerOutputNV> {};
template<>
struct fex_gen_config<glCombinerParameterfNV> {};
template<>
struct fex_gen_config<glCombinerParameterfvNV> {};
template<>
struct fex_gen_config<glCombinerParameteriNV> {};
template<>
struct fex_gen_config<glCombinerParameterivNV> {};
template<>
struct fex_gen_config<glCombinerStageParameterfvNV> {};
template<>
struct fex_gen_config<glCommandListSegmentsNV> {};
template<>
struct fex_gen_config<glCompileCommandListNV> {};
template<>
struct fex_gen_config<glCompileShaderARB> {};
template<>
struct fex_gen_config<glCompileShader> {};
template<>
struct fex_gen_config<glCompileShaderIncludeARB> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glCompileShaderIncludeARB, 2, const GLchar* const*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glCompressedMultiTexImage1DEXT> {};
template<>
struct fex_gen_config<glCompressedMultiTexImage2DEXT> {};
template<>
struct fex_gen_config<glCompressedMultiTexImage3DEXT> {};
template<>
struct fex_gen_config<glCompressedMultiTexSubImage1DEXT> {};
template<>
struct fex_gen_config<glCompressedMultiTexSubImage2DEXT> {};
template<>
struct fex_gen_config<glCompressedMultiTexSubImage3DEXT> {};
template<>
struct fex_gen_config<glCompressedTexImage1DARB> {};
template<>
struct fex_gen_config<glCompressedTexImage1D> {};
template<>
struct fex_gen_config<glCompressedTexImage2DARB> {};
template<>
struct fex_gen_config<glCompressedTexImage2D> {};
template<>
struct fex_gen_config<glCompressedTexImage3DARB> {};
template<>
struct fex_gen_config<glCompressedTexImage3D> {};
template<>
struct fex_gen_config<glCompressedTexSubImage1DARB> {};
template<>
struct fex_gen_config<glCompressedTexSubImage1D> {};
template<>
struct fex_gen_config<glCompressedTexSubImage2DARB> {};
template<>
struct fex_gen_config<glCompressedTexSubImage2D> {};
template<>
struct fex_gen_config<glCompressedTexSubImage3DARB> {};
template<>
struct fex_gen_config<glCompressedTexSubImage3D> {};
template<>
struct fex_gen_config<glCompressedTextureImage1DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureImage2DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureImage3DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage1DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage1D> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage2DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage2D> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage3DEXT> {};
template<>
struct fex_gen_config<glCompressedTextureSubImage3D> {};
template<>
struct fex_gen_config<glConservativeRasterParameterfNV> {};
template<>
struct fex_gen_config<glConservativeRasterParameteriNV> {};
template<>
struct fex_gen_config<glConvolutionFilter1DEXT> {};
template<>
struct fex_gen_config<glConvolutionFilter1D> {};
template<>
struct fex_gen_config<glConvolutionFilter2DEXT> {};
template<>
struct fex_gen_config<glConvolutionFilter2D> {};
template<>
struct fex_gen_config<glConvolutionParameterfEXT> {};
template<>
struct fex_gen_config<glConvolutionParameterf> {};
template<>
struct fex_gen_config<glConvolutionParameterfvEXT> {};
template<>
struct fex_gen_config<glConvolutionParameterfv> {};
template<>
struct fex_gen_config<glConvolutionParameteriEXT> {};
template<>
struct fex_gen_config<glConvolutionParameteri> {};
template<>
struct fex_gen_config<glConvolutionParameterivEXT> {};
template<>
struct fex_gen_config<glConvolutionParameteriv> {};
template<>
struct fex_gen_config<glConvolutionParameterxOES> {};
template<>
struct fex_gen_config<glConvolutionParameterxvOES> {};
template<>
struct fex_gen_config<glCopyBufferSubData> {};
template<>
struct fex_gen_config<glCopyColorSubTableEXT> {};
template<>
struct fex_gen_config<glCopyColorSubTable> {};
template<>
struct fex_gen_config<glCopyColorTable> {};
template<>
struct fex_gen_config<glCopyColorTableSGI> {};
template<>
struct fex_gen_config<glCopyConvolutionFilter1DEXT> {};
template<>
struct fex_gen_config<glCopyConvolutionFilter1D> {};
template<>
struct fex_gen_config<glCopyConvolutionFilter2DEXT> {};
template<>
struct fex_gen_config<glCopyConvolutionFilter2D> {};
template<>
struct fex_gen_config<glCopyImageSubData> {};
template<>
struct fex_gen_config<glCopyImageSubDataNV> {};
template<>
struct fex_gen_config<glCopyMultiTexImage1DEXT> {};
template<>
struct fex_gen_config<glCopyMultiTexImage2DEXT> {};
template<>
struct fex_gen_config<glCopyMultiTexSubImage1DEXT> {};
template<>
struct fex_gen_config<glCopyMultiTexSubImage2DEXT> {};
template<>
struct fex_gen_config<glCopyMultiTexSubImage3DEXT> {};
template<>
struct fex_gen_config<glCopyNamedBufferSubData> {};
template<>
struct fex_gen_config<glCopyPathNV> {};
template<>
struct fex_gen_config<glCopyPixels> {};
template<>
struct fex_gen_config<glCopyTexImage1DEXT> {};
template<>
struct fex_gen_config<glCopyTexImage1D> {};
template<>
struct fex_gen_config<glCopyTexImage2DEXT> {};
template<>
struct fex_gen_config<glCopyTexImage2D> {};
template<>
struct fex_gen_config<glCopyTexSubImage1DEXT> {};
template<>
struct fex_gen_config<glCopyTexSubImage1D> {};
template<>
struct fex_gen_config<glCopyTexSubImage2DEXT> {};
template<>
struct fex_gen_config<glCopyTexSubImage2D> {};
template<>
struct fex_gen_config<glCopyTexSubImage3DEXT> {};
template<>
struct fex_gen_config<glCopyTexSubImage3D> {};
template<>
struct fex_gen_config<glCopyTextureImage1DEXT> {};
template<>
struct fex_gen_config<glCopyTextureImage2DEXT> {};
template<>
struct fex_gen_config<glCopyTextureSubImage1DEXT> {};
template<>
struct fex_gen_config<glCopyTextureSubImage1D> {};
template<>
struct fex_gen_config<glCopyTextureSubImage2DEXT> {};
template<>
struct fex_gen_config<glCopyTextureSubImage2D> {};
template<>
struct fex_gen_config<glCopyTextureSubImage3DEXT> {};
template<>
struct fex_gen_config<glCopyTextureSubImage3D> {};
template<>
struct fex_gen_config<glCoverageModulationNV> {};
template<>
struct fex_gen_config<glCoverageModulationTableNV> {};
template<>
struct fex_gen_config<glCoverFillPathInstancedNV> {};
template<>
struct fex_gen_config<glCoverFillPathNV> {};
template<>
struct fex_gen_config<glCoverStrokePathInstancedNV> {};
template<>
struct fex_gen_config<glCoverStrokePathNV> {};
template<>
struct fex_gen_config<glCreateBuffers> {};
template<>
struct fex_gen_config<glCreateCommandListsNV> {};
template<>
struct fex_gen_config<glCreateFramebuffers> {};
template<>
struct fex_gen_config<glCreateMemoryObjectsEXT> {};
template<>
struct fex_gen_config<glCreatePerfQueryINTEL> {};
template<>
struct fex_gen_config<glCreateProgramPipelines> {};
template<>
struct fex_gen_config<glCreateQueries> {};
template<>
struct fex_gen_config<glCreateRenderbuffers> {};
template<>
struct fex_gen_config<glCreateSamplers> {};
template<>
struct fex_gen_config<glCreateStatesNV> {};
template<>
struct fex_gen_config<glCreateTextures> {};
template<>
struct fex_gen_config<glCreateTransformFeedbacks> {};
template<>
struct fex_gen_config<glCreateVertexArrays> {};
template<>
struct fex_gen_config<glCullFace> {};
template<>
struct fex_gen_config<glCullParameterdvEXT> {};
template<>
struct fex_gen_config<glCullParameterfvEXT> {};
template<>
struct fex_gen_config<glCurrentPaletteMatrixARB> {};
template<>
struct fex_gen_config<glDebugMessageCallbackAMD> : fexgen::callback_stub {};
template<>
struct fex_gen_config<glDebugMessageCallbackARB> : fexgen::callback_stub {};
template<>
struct fex_gen_config<glDebugMessageCallback> : fexgen::callback_stub {};
template<>
struct fex_gen_config<glDebugMessageControlARB> {};
template<>
struct fex_gen_config<glDebugMessageControl> {};
template<>
struct fex_gen_config<glDebugMessageEnableAMD> {};
template<>
struct fex_gen_config<glDebugMessageInsertAMD> {};
template<>
struct fex_gen_config<glDebugMessageInsertARB> {};
template<>
struct fex_gen_config<glDebugMessageInsert> {};
template<>
struct fex_gen_config<glDeformationMap3dSGIX> {};
template<>
struct fex_gen_config<glDeformationMap3fSGIX> {};
template<>
struct fex_gen_config<glDeformSGIX> {};
template<>
struct fex_gen_config<glDeleteAsyncMarkersSGIX> {};
template<>
struct fex_gen_config<glDeleteBuffersARB> {};
template<>
struct fex_gen_config<glDeleteBuffers> {};
template<>
struct fex_gen_config<glDeleteCommandListsNV> {};
template<>
struct fex_gen_config<glDeleteFencesAPPLE> {};
template<>
struct fex_gen_config<glDeleteFencesNV> {};
template<>
struct fex_gen_config<glDeleteFragmentShaderATI> {};
template<>
struct fex_gen_config<glDeleteFramebuffersEXT> {};
template<>
struct fex_gen_config<glDeleteFramebuffers> {};
template<>
struct fex_gen_config<glDeleteLists> {};
template<>
struct fex_gen_config<glDeleteMemoryObjectsEXT> {};
template<>
struct fex_gen_config<glDeleteNamedStringARB> {};
template<>
struct fex_gen_config<glDeleteNamesAMD> {};
template<>
struct fex_gen_config<glDeleteObjectARB> {};
template<>
struct fex_gen_config<glDeleteOcclusionQueriesNV> {};
template<>
struct fex_gen_config<glDeletePathsNV> {};
template<>
struct fex_gen_config<glDeletePerfMonitorsAMD> {};
template<>
struct fex_gen_config<glDeletePerfQueryINTEL> {};
template<>
struct fex_gen_config<glDeleteProgram> {};
template<>
struct fex_gen_config<glDeleteProgramPipelines> {};
template<>
struct fex_gen_config<glDeleteProgramsARB> {};
template<>
struct fex_gen_config<glDeleteProgramsNV> {};
template<>
struct fex_gen_config<glDeleteQueriesARB> {};
template<>
struct fex_gen_config<glDeleteQueries> {};
template<>
struct fex_gen_config<glDeleteQueryResourceTagNV> {};
template<>
struct fex_gen_config<glDeleteRenderbuffersEXT> {};
template<>
struct fex_gen_config<glDeleteRenderbuffers> {};
template<>
struct fex_gen_config<glDeleteSamplers> {};
template<>
struct fex_gen_config<glDeleteSemaphoresEXT> {};
template<>
struct fex_gen_config<glDeleteShader> {};
template<>
struct fex_gen_config<glDeleteStatesNV> {};
template<>
struct fex_gen_config<glDeleteSync> {};
template<>
struct fex_gen_config<glDeleteTexturesEXT> {};
template<>
struct fex_gen_config<glDeleteTextures> {};
template<>
struct fex_gen_config<glDeleteTransformFeedbacks> {};
template<>
struct fex_gen_config<glDeleteTransformFeedbacksNV> {};
template<>
struct fex_gen_config<glDeleteVertexArraysAPPLE> {};
template<>
struct fex_gen_config<glDeleteVertexArrays> {};
template<>
struct fex_gen_config<glDeleteVertexShaderEXT> {};
template<>
struct fex_gen_config<glDepthBoundsdNV> {};
template<>
struct fex_gen_config<glDepthBoundsEXT> {};
template<>
struct fex_gen_config<glDepthFunc> {};
template<>
struct fex_gen_config<glDepthMask> {};
template<>
struct fex_gen_config<glDepthRangeArraydvNV> {};
template<>
struct fex_gen_config<glDepthRangeArrayv> {};
template<>
struct fex_gen_config<glDepthRangedNV> {};
template<>
struct fex_gen_config<glDepthRangef> {};
template<>
struct fex_gen_config<glDepthRangefOES> {};
template<>
struct fex_gen_config<glDepthRange> {};
template<>
struct fex_gen_config<glDepthRangeIndexeddNV> {};
template<>
struct fex_gen_config<glDepthRangeIndexed> {};
template<>
struct fex_gen_config<glDepthRangexOES> {};
template<>
struct fex_gen_config<glDetachObjectARB> {};
template<>
struct fex_gen_config<glDetachShader> {};
template<>
struct fex_gen_config<glDetailTexFuncSGIS> {};
template<>
struct fex_gen_config<glDisableClientState> {};
template<>
struct fex_gen_config<glDisableClientStateiEXT> {};
template<>
struct fex_gen_config<glDisableClientStateIndexedEXT> {};
template<>
struct fex_gen_config<glDisable> {};
template<>
struct fex_gen_config<glDisablei> {};
template<>
struct fex_gen_config<glDisableIndexedEXT> {};
template<>
struct fex_gen_config<glDisableVariantClientStateEXT> {};
template<>
struct fex_gen_config<glDisableVertexArrayAttribEXT> {};
template<>
struct fex_gen_config<glDisableVertexArrayAttrib> {};
template<>
struct fex_gen_config<glDisableVertexArrayEXT> {};
template<>
struct fex_gen_config<glDisableVertexAttribAPPLE> {};
template<>
struct fex_gen_config<glDisableVertexAttribArrayARB> {};
template<>
struct fex_gen_config<glDisableVertexAttribArray> {};
template<>
struct fex_gen_config<glDispatchCompute> {};
template<>
struct fex_gen_config<glDispatchComputeGroupSizeARB> {};
template<>
struct fex_gen_config<glDispatchComputeIndirect> {};
template<>
struct fex_gen_config<glDrawArraysEXT> {};
template<>
struct fex_gen_config<glDrawArrays> {};
template<>
struct fex_gen_config<glDrawArraysIndirect> {};
template<>
struct fex_gen_config<glDrawArraysInstancedARB> {};
template<>
struct fex_gen_config<glDrawArraysInstancedBaseInstance> {};
template<>
struct fex_gen_config<glDrawArraysInstancedEXT> {};
template<>
struct fex_gen_config<glDrawArraysInstanced> {};
template<>
struct fex_gen_config<glDrawBuffer> {};
template<>
struct fex_gen_config<glDrawBuffersARB> {};
template<>
struct fex_gen_config<glDrawBuffersATI> {};
template<>
struct fex_gen_config<glDrawBuffers> {};
template<>
struct fex_gen_config<glDrawCommandsAddressNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glDrawCommandsNV> {};
#endif
template<>
struct fex_gen_config<glDrawCommandsStatesAddressNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glDrawCommandsStatesNV> {};
#endif
template<>
struct fex_gen_config<glDrawElementArrayAPPLE> {};
template<>
struct fex_gen_config<glDrawElementArrayATI> {};
template<>
struct fex_gen_config<glDrawElementsBaseVertex> {};
template<>
struct fex_gen_config<glDrawElements> {};
template<>
struct fex_gen_config<glDrawElementsIndirect> {};
template<>
struct fex_gen_config<glDrawElementsInstancedARB> {};
template<>
struct fex_gen_config<glDrawElementsInstancedBaseInstance> {};
template<>
struct fex_gen_config<glDrawElementsInstancedBaseVertexBaseInstance> {};
template<>
struct fex_gen_config<glDrawElementsInstancedBaseVertex> {};
template<>
struct fex_gen_config<glDrawElementsInstancedEXT> {};
template<>
struct fex_gen_config<glDrawElementsInstanced> {};
template<>
struct fex_gen_config<glDrawMeshArraysSUN> {};
template<>
struct fex_gen_config<glDrawMeshTasksIndirectNV> {};
template<>
struct fex_gen_config<glDrawMeshTasksNV> {};
template<>
struct fex_gen_config<glDrawPixels> {};
template<>
struct fex_gen_config<glDrawRangeElementArrayAPPLE> {};
template<>
struct fex_gen_config<glDrawRangeElementArrayATI> {};
template<>
struct fex_gen_config<glDrawRangeElementsBaseVertex> {};
template<>
struct fex_gen_config<glDrawRangeElementsEXT> {};
template<>
struct fex_gen_config<glDrawRangeElements> {};
template<>
struct fex_gen_config<glDrawTextureNV> {};
template<>
struct fex_gen_config<glDrawTransformFeedback> {};
template<>
struct fex_gen_config<glDrawTransformFeedbackInstanced> {};
template<>
struct fex_gen_config<glDrawTransformFeedbackNV> {};
template<>
struct fex_gen_config<glDrawTransformFeedbackStream> {};
template<>
struct fex_gen_config<glDrawTransformFeedbackStreamInstanced> {};
template<>
struct fex_gen_config<glDrawVkImageNV> {};
template<>
struct fex_gen_config<glEdgeFlagFormatNV> {};
template<>
struct fex_gen_config<glEdgeFlag> {};
template<>
struct fex_gen_config<glEdgeFlagPointerEXT> {};
template<>
struct fex_gen_config<glEdgeFlagPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glEdgeFlagPointerListIBM> {};
template<>
struct fex_gen_param<glEdgeFlagPointerListIBM, 1, const GLboolean**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glEdgeFlagv> {};
template<>
struct fex_gen_config<glEGLImageTargetRenderbufferStorageOES> {};
template<>
struct fex_gen_config<glEGLImageTargetTexStorageEXT> {};
template<>
struct fex_gen_config<glEGLImageTargetTexture2DOES> {};
template<>
struct fex_gen_config<glEGLImageTargetTextureStorageEXT> {};
template<>
struct fex_gen_config<glElementPointerAPPLE> {};
template<>
struct fex_gen_config<glElementPointerATI> {};
template<>
struct fex_gen_config<glEnableClientState> {};
template<>
struct fex_gen_config<glEnableClientStateiEXT> {};
template<>
struct fex_gen_config<glEnableClientStateIndexedEXT> {};
template<>
struct fex_gen_config<glEnable> {};
template<>
struct fex_gen_config<glEnablei> {};
template<>
struct fex_gen_config<glEnableIndexedEXT> {};
template<>
struct fex_gen_config<glEnableVariantClientStateEXT> {};
template<>
struct fex_gen_config<glEnableVertexArrayAttribEXT> {};
template<>
struct fex_gen_config<glEnableVertexArrayAttrib> {};
template<>
struct fex_gen_config<glEnableVertexArrayEXT> {};
template<>
struct fex_gen_config<glEnableVertexAttribAPPLE> {};
template<>
struct fex_gen_config<glEnableVertexAttribArrayARB> {};
template<>
struct fex_gen_config<glEnableVertexAttribArray> {};
template<>
struct fex_gen_config<glEnd> {};
template<>
struct fex_gen_config<glEndConditionalRender> {};
template<>
struct fex_gen_config<glEndConditionalRenderNV> {};
template<>
struct fex_gen_config<glEndConditionalRenderNVX> {};
template<>
struct fex_gen_config<glEndFragmentShaderATI> {};
template<>
struct fex_gen_config<glEndList> {};
template<>
struct fex_gen_config<glEndOcclusionQueryNV> {};
template<>
struct fex_gen_config<glEndPerfMonitorAMD> {};
template<>
struct fex_gen_config<glEndPerfQueryINTEL> {};
template<>
struct fex_gen_config<glEndQueryARB> {};
template<>
struct fex_gen_config<glEndQuery> {};
template<>
struct fex_gen_config<glEndQueryIndexed> {};
template<>
struct fex_gen_config<glEndTransformFeedback> {};
template<>
struct fex_gen_config<glEndTransformFeedbackEXT> {};
template<>
struct fex_gen_config<glEndTransformFeedbackNV> {};
template<>
struct fex_gen_config<glEndVertexShaderEXT> {};
template<>
struct fex_gen_config<glEndVideoCaptureNV> {};
template<>
struct fex_gen_config<glEvalCoord1d> {};
template<>
struct fex_gen_config<glEvalCoord1dv> {};
template<>
struct fex_gen_config<glEvalCoord1f> {};
template<>
struct fex_gen_config<glEvalCoord1fv> {};
template<>
struct fex_gen_config<glEvalCoord1xOES> {};
template<>
struct fex_gen_config<glEvalCoord1xvOES> {};
template<>
struct fex_gen_config<glEvalCoord2d> {};
template<>
struct fex_gen_config<glEvalCoord2dv> {};
template<>
struct fex_gen_config<glEvalCoord2f> {};
template<>
struct fex_gen_config<glEvalCoord2fv> {};
template<>
struct fex_gen_config<glEvalCoord2xOES> {};
template<>
struct fex_gen_config<glEvalCoord2xvOES> {};
template<>
struct fex_gen_config<glEvalMapsNV> {};
template<>
struct fex_gen_config<glEvalMesh1> {};
template<>
struct fex_gen_config<glEvalMesh2> {};
template<>
struct fex_gen_config<glEvalPoint1> {};
template<>
struct fex_gen_config<glEvalPoint2> {};
template<>
struct fex_gen_config<glEvaluateDepthValuesARB> {};
template<>
struct fex_gen_config<glExecuteProgramNV> {};
template<>
struct fex_gen_config<glExtractComponentEXT> {};
template<>
struct fex_gen_config<glFeedbackBuffer> {};
template<>
struct fex_gen_config<glFeedbackBufferxOES> {};
template<>
struct fex_gen_config<glFinalCombinerInputNV> {};
template<>
struct fex_gen_config<glFinish> {};
template<>
struct fex_gen_config<glFinishFenceAPPLE> {};
template<>
struct fex_gen_config<glFinishFenceNV> {};
template<>
struct fex_gen_config<glFinishObjectAPPLE> {};
template<>
struct fex_gen_config<glFinishTextureSUNX> {};
template<>
struct fex_gen_config<glFlush> {};
template<>
struct fex_gen_config<glFlushMappedBufferRangeAPPLE> {};
template<>
struct fex_gen_config<glFlushMappedBufferRange> {};
template<>
struct fex_gen_config<glFlushMappedNamedBufferRangeEXT> {};
template<>
struct fex_gen_config<glFlushMappedNamedBufferRange> {};
template<>
struct fex_gen_config<glFlushPixelDataRangeNV> {};
template<>
struct fex_gen_config<glFlushRasterSGIX> {};
template<>
struct fex_gen_config<glFlushStaticDataIBM> {};
template<>
struct fex_gen_config<glFlushVertexArrayRangeAPPLE> {};
template<>
struct fex_gen_config<glFlushVertexArrayRangeNV> {};
template<>
struct fex_gen_config<glFogCoorddEXT> {};
template<>
struct fex_gen_config<glFogCoorddvEXT> {};
template<>
struct fex_gen_config<glFogCoordfEXT> {};
template<>
struct fex_gen_config<glFogCoordFormatNV> {};
template<>
struct fex_gen_config<glFogCoordfvEXT> {};
template<>
struct fex_gen_config<glFogCoordhNV> {};
template<>
struct fex_gen_config<glFogCoordhvNV> {};
template<>
struct fex_gen_config<glFogCoordPointerEXT> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glFogCoordPointerListIBM> {};
template<>
struct fex_gen_param<glFogCoordPointerListIBM, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glFogf> {};
template<>
struct fex_gen_config<glFogFuncSGIS> {};
template<>
struct fex_gen_config<glFogfv> {};
template<>
struct fex_gen_config<glFogi> {};
template<>
struct fex_gen_config<glFogiv> {};
template<>
struct fex_gen_config<glFogxOES> {};
template<>
struct fex_gen_config<glFogxvOES> {};
template<>
struct fex_gen_config<glFragmentColorMaterialSGIX> {};
template<>
struct fex_gen_config<glFragmentCoverageColorNV> {};
template<>
struct fex_gen_config<glFragmentLightfSGIX> {};
template<>
struct fex_gen_config<glFragmentLightfvSGIX> {};
template<>
struct fex_gen_config<glFragmentLightiSGIX> {};
template<>
struct fex_gen_config<glFragmentLightivSGIX> {};
template<>
struct fex_gen_config<glFragmentLightModelfSGIX> {};
template<>
struct fex_gen_config<glFragmentLightModelfvSGIX> {};
template<>
struct fex_gen_config<glFragmentLightModeliSGIX> {};
template<>
struct fex_gen_config<glFragmentLightModelivSGIX> {};
template<>
struct fex_gen_config<glFragmentMaterialfSGIX> {};
template<>
struct fex_gen_config<glFragmentMaterialfvSGIX> {};
template<>
struct fex_gen_config<glFragmentMaterialiSGIX> {};
template<>
struct fex_gen_config<glFragmentMaterialivSGIX> {};
template<>
struct fex_gen_config<glFramebufferDrawBufferEXT> {};
template<>
struct fex_gen_config<glFramebufferDrawBuffersEXT> {};
template<>
struct fex_gen_config<glFramebufferFetchBarrierEXT> {};
template<>
struct fex_gen_config<glFramebufferParameteri> {};
template<>
struct fex_gen_config<glFramebufferParameteriMESA> {};
template<>
struct fex_gen_config<glFramebufferReadBufferEXT> {};
template<>
struct fex_gen_config<glFramebufferRenderbufferEXT> {};
template<>
struct fex_gen_config<glFramebufferRenderbuffer> {};
template<>
struct fex_gen_config<glFramebufferSampleLocationsfvARB> {};
template<>
struct fex_gen_config<glFramebufferSampleLocationsfvNV> {};
template<>
struct fex_gen_config<glFramebufferSamplePositionsfvAMD> {};
template<>
struct fex_gen_config<glFramebufferTexture1DEXT> {};
template<>
struct fex_gen_config<glFramebufferTexture1D> {};
template<>
struct fex_gen_config<glFramebufferTexture2DEXT> {};
template<>
struct fex_gen_config<glFramebufferTexture2D> {};
template<>
struct fex_gen_config<glFramebufferTexture3DEXT> {};
template<>
struct fex_gen_config<glFramebufferTexture3D> {};
template<>
struct fex_gen_config<glFramebufferTextureARB> {};
template<>
struct fex_gen_config<glFramebufferTextureEXT> {};
template<>
struct fex_gen_config<glFramebufferTextureFaceARB> {};
template<>
struct fex_gen_config<glFramebufferTextureFaceEXT> {};
template<>
struct fex_gen_config<glFramebufferTexture> {};
template<>
struct fex_gen_config<glFramebufferTextureLayerARB> {};
template<>
struct fex_gen_config<glFramebufferTextureLayerEXT> {};
template<>
struct fex_gen_config<glFramebufferTextureLayer> {};
template<>
struct fex_gen_config<glFramebufferTextureMultiviewOVR> {};
template<>
struct fex_gen_config<glFrameTerminatorGREMEDY> {};
template<>
struct fex_gen_config<glFrameZoomSGIX> {};
template<>
struct fex_gen_config<glFreeObjectBufferATI> {};
template<>
struct fex_gen_config<glFrontFace> {};
template<>
struct fex_gen_config<glFrustumfOES> {};
template<>
struct fex_gen_config<glFrustum> {};
template<>
struct fex_gen_config<glFrustumxOES> {};
template<>
struct fex_gen_config<glGenBuffersARB> {};
template<>
struct fex_gen_config<glGenBuffers> {};
template<>
struct fex_gen_config<glGenerateMipmapEXT> {};
template<>
struct fex_gen_config<glGenerateMipmap> {};
template<>
struct fex_gen_config<glGenerateMultiTexMipmapEXT> {};
template<>
struct fex_gen_config<glGenerateTextureMipmapEXT> {};
template<>
struct fex_gen_config<glGenerateTextureMipmap> {};
template<>
struct fex_gen_config<glGenFencesAPPLE> {};
template<>
struct fex_gen_config<glGenFencesNV> {};
template<>
struct fex_gen_config<glGenFramebuffersEXT> {};
template<>
struct fex_gen_config<glGenFramebuffers> {};
template<>
struct fex_gen_config<glGenNamesAMD> {};
template<>
struct fex_gen_config<glGenOcclusionQueriesNV> {};
template<>
struct fex_gen_config<glGenPerfMonitorsAMD> {};
template<>
struct fex_gen_config<glGenProgramPipelines> {};
template<>
struct fex_gen_config<glGenProgramsARB> {};
template<>
struct fex_gen_config<glGenProgramsNV> {};
template<>
struct fex_gen_config<glGenQueriesARB> {};
template<>
struct fex_gen_config<glGenQueries> {};
template<>
struct fex_gen_config<glGenQueryResourceTagNV> {};
template<>
struct fex_gen_config<glGenRenderbuffersEXT> {};
template<>
struct fex_gen_config<glGenRenderbuffers> {};
template<>
struct fex_gen_config<glGenSamplers> {};
template<>
struct fex_gen_config<glGenSemaphoresEXT> {};
template<>
struct fex_gen_config<glGenTexturesEXT> {};
template<>
struct fex_gen_config<glGenTextures> {};
template<>
struct fex_gen_config<glGenTransformFeedbacks> {};
template<>
struct fex_gen_config<glGenTransformFeedbacksNV> {};
template<>
struct fex_gen_config<glGenVertexArraysAPPLE> {};
template<>
struct fex_gen_config<glGenVertexArrays> {};
template<>
struct fex_gen_config<glGetActiveAtomicCounterBufferiv> {};
template<>
struct fex_gen_config<glGetActiveAttribARB> {};
template<>
struct fex_gen_config<glGetActiveAttrib> {};
template<>
struct fex_gen_config<glGetActiveSubroutineName> {};
template<>
struct fex_gen_config<glGetActiveSubroutineUniformiv> {};
template<>
struct fex_gen_config<glGetActiveSubroutineUniformName> {};
template<>
struct fex_gen_config<glGetActiveUniformARB> {};
template<>
struct fex_gen_config<glGetActiveUniformBlockiv> {};
template<>
struct fex_gen_config<glGetActiveUniformBlockName> {};
template<>
struct fex_gen_config<glGetActiveUniform> {};
template<>
struct fex_gen_config<glGetActiveUniformName> {};
template<>
struct fex_gen_config<glGetActiveUniformsiv> {};
template<>
struct fex_gen_config<glGetActiveVaryingNV> {};
template<>
struct fex_gen_config<glGetArrayObjectfvATI> {};
template<>
struct fex_gen_config<glGetArrayObjectivATI> {};
template<>
struct fex_gen_config<glGetAttachedObjectsARB> {};
template<>
struct fex_gen_config<glGetAttachedShaders> {};
template<>
struct fex_gen_config<glGetBooleanIndexedvEXT> {};
template<>
struct fex_gen_config<glGetBooleani_v> {};
template<>
struct fex_gen_config<glGetBooleanv> {};
template<>
struct fex_gen_config<glGetBufferParameteri64v> {};
template<>
struct fex_gen_config<glGetBufferParameterivARB> {};
template<>
struct fex_gen_config<glGetBufferParameteriv> {};
template<>
struct fex_gen_config<glGetBufferParameterui64vNV> {};
template<>
struct fex_gen_config<glGetBufferPointerv> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetBufferPointerv, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetBufferPointervARB> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetBufferPointervARB, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetBufferSubDataARB> {};
template<>
struct fex_gen_config<glGetBufferSubData> {};
template<>
struct fex_gen_config<glGetClipPlanefOES> {};
template<>
struct fex_gen_config<glGetClipPlane> {};
template<>
struct fex_gen_config<glGetClipPlanexOES> {};
template<>
struct fex_gen_config<glGetColorTableEXT> {};
template<>
struct fex_gen_config<glGetColorTable> {};
template<>
struct fex_gen_config<glGetColorTableParameterfvEXT> {};
template<>
struct fex_gen_config<glGetColorTableParameterfv> {};
template<>
struct fex_gen_config<glGetColorTableParameterfvSGI> {};
template<>
struct fex_gen_config<glGetColorTableParameterivEXT> {};
template<>
struct fex_gen_config<glGetColorTableParameteriv> {};
template<>
struct fex_gen_config<glGetColorTableParameterivSGI> {};
template<>
struct fex_gen_config<glGetColorTableSGI> {};
template<>
struct fex_gen_config<glGetCombinerInputParameterfvNV> {};
template<>
struct fex_gen_config<glGetCombinerInputParameterivNV> {};
template<>
struct fex_gen_config<glGetCombinerOutputParameterfvNV> {};
template<>
struct fex_gen_config<glGetCombinerOutputParameterivNV> {};
template<>
struct fex_gen_config<glGetCombinerStageParameterfvNV> {};
template<>
struct fex_gen_config<glGetCompressedMultiTexImageEXT> {};
template<>
struct fex_gen_config<glGetCompressedTexImageARB> {};
template<>
struct fex_gen_config<glGetCompressedTexImage> {};
template<>
struct fex_gen_config<glGetCompressedTextureImageEXT> {};
template<>
struct fex_gen_config<glGetCompressedTextureImage> {};
template<>
struct fex_gen_config<glGetCompressedTextureSubImage> {};
template<>
struct fex_gen_config<glGetConvolutionFilterEXT> {};
template<>
struct fex_gen_config<glGetConvolutionFilter> {};
template<>
struct fex_gen_config<glGetConvolutionParameterfvEXT> {};
template<>
struct fex_gen_config<glGetConvolutionParameterfv> {};
template<>
struct fex_gen_config<glGetConvolutionParameterivEXT> {};
template<>
struct fex_gen_config<glGetConvolutionParameteriv> {};
template<>
struct fex_gen_config<glGetConvolutionParameterxvOES> {};
template<>
struct fex_gen_config<glGetCoverageModulationTableNV> {};
template<>
struct fex_gen_config<glGetDetailTexFuncSGIS> {};
template<>
struct fex_gen_config<glGetDoubleIndexedvEXT> {};
template<>
struct fex_gen_config<glGetDoublei_vEXT> {};
template<>
struct fex_gen_config<glGetDoublei_v> {};
template<>
struct fex_gen_config<glGetDoublev> {};
template<>
struct fex_gen_config<glGetFenceivNV> {};
template<>
struct fex_gen_config<glGetFinalCombinerInputParameterfvNV> {};
template<>
struct fex_gen_config<glGetFinalCombinerInputParameterivNV> {};
template<>
struct fex_gen_config<glGetFirstPerfQueryIdINTEL> {};
template<>
struct fex_gen_config<glGetFixedvOES> {};
template<>
struct fex_gen_config<glGetFloatIndexedvEXT> {};
template<>
struct fex_gen_config<glGetFloati_vEXT> {};
template<>
struct fex_gen_config<glGetFloati_v> {};
template<>
struct fex_gen_config<glGetFloatv> {};
template<>
struct fex_gen_config<glGetFogFuncSGIS> {};
template<>
struct fex_gen_config<glGetFragmentLightfvSGIX> {};
template<>
struct fex_gen_config<glGetFragmentLightivSGIX> {};
template<>
struct fex_gen_config<glGetFragmentMaterialfvSGIX> {};
template<>
struct fex_gen_config<glGetFragmentMaterialivSGIX> {};
template<>
struct fex_gen_config<glGetFramebufferAttachmentParameterivEXT> {};
template<>
struct fex_gen_config<glGetFramebufferAttachmentParameteriv> {};
template<>
struct fex_gen_config<glGetFramebufferParameterfvAMD> {};
template<>
struct fex_gen_config<glGetFramebufferParameterivEXT> {};
template<>
struct fex_gen_config<glGetFramebufferParameteriv> {};
template<>
struct fex_gen_config<glGetFramebufferParameterivMESA> {};
template<>
struct fex_gen_config<glGetHistogramEXT> {};
template<>
struct fex_gen_config<glGetHistogram> {};
template<>
struct fex_gen_config<glGetHistogramParameterfvEXT> {};
template<>
struct fex_gen_config<glGetHistogramParameterfv> {};
template<>
struct fex_gen_config<glGetHistogramParameterivEXT> {};
template<>
struct fex_gen_config<glGetHistogramParameteriv> {};
template<>
struct fex_gen_config<glGetHistogramParameterxvOES> {};
template<>
struct fex_gen_config<glGetImageTransformParameterfvHP> {};
template<>
struct fex_gen_config<glGetImageTransformParameterivHP> {};
template<>
struct fex_gen_config<glGetInfoLogARB> {};
template<>
struct fex_gen_config<glGetInteger64i_v> {};
template<>
struct fex_gen_config<glGetInteger64v> {};
template<>
struct fex_gen_config<glGetIntegerIndexedvEXT> {};
template<>
struct fex_gen_config<glGetIntegeri_v> {};
template<>
struct fex_gen_config<glGetIntegerui64i_vNV> {};
template<>
struct fex_gen_config<glGetIntegerui64vNV> {};
template<>
struct fex_gen_config<glGetIntegerv> {};
template<>
struct fex_gen_config<glGetInternalformati64v> {};
template<>
struct fex_gen_config<glGetInternalformativ> {};
template<>
struct fex_gen_config<glGetInternalformatSampleivNV> {};
template<>
struct fex_gen_config<glGetInvariantBooleanvEXT> {};
template<>
struct fex_gen_config<glGetInvariantFloatvEXT> {};
template<>
struct fex_gen_config<glGetInvariantIntegervEXT> {};
template<>
struct fex_gen_config<glGetLightfv> {};
template<>
struct fex_gen_config<glGetLightiv> {};
template<>
struct fex_gen_config<glGetLightxOES> {};
template<>
struct fex_gen_config<glGetListParameterfvSGIX> {};
template<>
struct fex_gen_config<glGetListParameterivSGIX> {};
template<>
struct fex_gen_config<glGetLocalConstantBooleanvEXT> {};
template<>
struct fex_gen_config<glGetLocalConstantFloatvEXT> {};
template<>
struct fex_gen_config<glGetLocalConstantIntegervEXT> {};
template<>
struct fex_gen_config<glGetMapAttribParameterfvNV> {};
template<>
struct fex_gen_config<glGetMapAttribParameterivNV> {};
template<>
struct fex_gen_config<glGetMapControlPointsNV> {};
template<>
struct fex_gen_config<glGetMapdv> {};
template<>
struct fex_gen_config<glGetMapfv> {};
template<>
struct fex_gen_config<glGetMapiv> {};
template<>
struct fex_gen_config<glGetMapParameterfvNV> {};
template<>
struct fex_gen_config<glGetMapParameterivNV> {};
template<>
struct fex_gen_config<glGetMapxvOES> {};
template<>
struct fex_gen_config<glGetMaterialfv> {};
template<>
struct fex_gen_config<glGetMaterialiv> {};
template<>
struct fex_gen_config<glGetMaterialxOES> {};
template<>
struct fex_gen_config<glGetMemoryObjectDetachedResourcesuivNV> {};
template<>
struct fex_gen_config<glGetMemoryObjectParameterivEXT> {};
template<>
struct fex_gen_config<glGetMinmaxEXT> {};
template<>
struct fex_gen_config<glGetMinmax> {};
template<>
struct fex_gen_config<glGetMinmaxParameterfvEXT> {};
template<>
struct fex_gen_config<glGetMinmaxParameterfv> {};
template<>
struct fex_gen_config<glGetMinmaxParameterivEXT> {};
template<>
struct fex_gen_config<glGetMinmaxParameteriv> {};
template<>
struct fex_gen_config<glGetMultisamplefv> {};
template<>
struct fex_gen_config<glGetMultisamplefvNV> {};
template<>
struct fex_gen_config<glGetMultiTexEnvfvEXT> {};
template<>
struct fex_gen_config<glGetMultiTexEnvivEXT> {};
template<>
struct fex_gen_config<glGetMultiTexGendvEXT> {};
template<>
struct fex_gen_config<glGetMultiTexGenfvEXT> {};
template<>
struct fex_gen_config<glGetMultiTexGenivEXT> {};
template<>
struct fex_gen_config<glGetMultiTexImageEXT> {};
template<>
struct fex_gen_config<glGetMultiTexLevelParameterfvEXT> {};
template<>
struct fex_gen_config<glGetMultiTexLevelParameterivEXT> {};
template<>
struct fex_gen_config<glGetMultiTexParameterfvEXT> {};
template<>
struct fex_gen_config<glGetMultiTexParameterIivEXT> {};
template<>
struct fex_gen_config<glGetMultiTexParameterIuivEXT> {};
template<>
struct fex_gen_config<glGetMultiTexParameterivEXT> {};
template<>
struct fex_gen_config<glGetNamedBufferParameteri64v> {};
template<>
struct fex_gen_config<glGetNamedBufferParameterivEXT> {};
template<>
struct fex_gen_config<glGetNamedBufferParameteriv> {};
template<>
struct fex_gen_config<glGetNamedBufferParameterui64vNV> {};
template<>
struct fex_gen_config<glGetNamedBufferPointerv> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetNamedBufferPointerv, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetNamedBufferPointervEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetNamedBufferPointervEXT, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetNamedBufferSubDataEXT> {};
template<>
struct fex_gen_config<glGetNamedBufferSubData> {};
template<>
struct fex_gen_config<glGetNamedFramebufferAttachmentParameterivEXT> {};
template<>
struct fex_gen_config<glGetNamedFramebufferAttachmentParameteriv> {};
template<>
struct fex_gen_config<glGetNamedFramebufferParameterfvAMD> {};
template<>
struct fex_gen_config<glGetNamedFramebufferParameterivEXT> {};
template<>
struct fex_gen_config<glGetNamedFramebufferParameteriv> {};
template<>
struct fex_gen_config<glGetNamedProgramivEXT> {};
template<>
struct fex_gen_config<glGetNamedProgramLocalParameterdvEXT> {};
template<>
struct fex_gen_config<glGetNamedProgramLocalParameterfvEXT> {};
template<>
struct fex_gen_config<glGetNamedProgramLocalParameterIivEXT> {};
template<>
struct fex_gen_config<glGetNamedProgramLocalParameterIuivEXT> {};
template<>
struct fex_gen_config<glGetNamedProgramStringEXT> {};
template<>
struct fex_gen_config<glGetNamedRenderbufferParameterivEXT> {};
template<>
struct fex_gen_config<glGetNamedRenderbufferParameteriv> {};
template<>
struct fex_gen_config<glGetNamedStringARB> {};
template<>
struct fex_gen_config<glGetNamedStringivARB> {};
template<>
struct fex_gen_config<glGetnCompressedTexImageARB> {};
template<>
struct fex_gen_config<glGetnCompressedTexImage> {};
template<>
struct fex_gen_config<glGetNextPerfQueryIdINTEL> {};
template<>
struct fex_gen_config<glGetnTexImageARB> {};
template<>
struct fex_gen_config<glGetnTexImage> {};
template<>
struct fex_gen_config<glGetnUniformdvARB> {};
template<>
struct fex_gen_config<glGetnUniformdv> {};
template<>
struct fex_gen_config<glGetnUniformfvARB> {};
template<>
struct fex_gen_config<glGetnUniformfv> {};
template<>
struct fex_gen_config<glGetnUniformi64vARB> {};
template<>
struct fex_gen_config<glGetnUniformivARB> {};
template<>
struct fex_gen_config<glGetnUniformiv> {};
template<>
struct fex_gen_config<glGetnUniformui64vARB> {};
template<>
struct fex_gen_config<glGetnUniformuivARB> {};
template<>
struct fex_gen_config<glGetnUniformuiv> {};
template<>
struct fex_gen_config<glGetObjectBufferfvATI> {};
template<>
struct fex_gen_config<glGetObjectBufferivATI> {};
template<>
struct fex_gen_config<glGetObjectLabelEXT> {};
template<>
struct fex_gen_config<glGetObjectLabel> {};
template<>
struct fex_gen_config<glGetObjectParameterfvARB> {};
template<>
struct fex_gen_config<glGetObjectParameterivAPPLE> {};
template<>
struct fex_gen_config<glGetObjectParameterivARB> {};
template<>
struct fex_gen_config<glGetObjectPtrLabel> {};
template<>
struct fex_gen_config<glGetOcclusionQueryivNV> {};
template<>
struct fex_gen_config<glGetOcclusionQueryuivNV> {};
template<>
struct fex_gen_config<glGetPathCommandsNV> {};
template<>
struct fex_gen_config<glGetPathCoordsNV> {};
template<>
struct fex_gen_config<glGetPathDashArrayNV> {};
template<>
struct fex_gen_config<glGetPathMetricRangeNV> {};
template<>
struct fex_gen_config<glGetPathMetricsNV> {};
template<>
struct fex_gen_config<glGetPathParameterfvNV> {};
template<>
struct fex_gen_config<glGetPathParameterivNV> {};
template<>
struct fex_gen_config<glGetPathSpacingNV> {};
template<>
struct fex_gen_config<glGetPerfCounterInfoINTEL> {};
template<>
struct fex_gen_config<glGetPerfMonitorCounterDataAMD> {};
template<>
struct fex_gen_config<glGetPerfMonitorCounterInfoAMD> {};
template<>
struct fex_gen_config<glGetPerfMonitorCountersAMD> {};
template<>
struct fex_gen_config<glGetPerfMonitorCounterStringAMD> {};
template<>
struct fex_gen_config<glGetPerfMonitorGroupsAMD> {};
template<>
struct fex_gen_config<glGetPerfMonitorGroupStringAMD> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glGetPerfQueryDataINTEL> {};
#endif
template<>
struct fex_gen_config<glGetPerfQueryIdByNameINTEL> {};
template<>
struct fex_gen_config<glGetPerfQueryInfoINTEL> {};
template<>
struct fex_gen_config<glGetPixelMapfv> {};
template<>
struct fex_gen_config<glGetPixelMapuiv> {};
template<>
struct fex_gen_config<glGetPixelMapusv> {};
template<>
struct fex_gen_config<glGetPixelMapxv> {};
template<>
struct fex_gen_config<glGetPixelTexGenParameterfvSGIS> {};
template<>
struct fex_gen_config<glGetPixelTexGenParameterivSGIS> {};
template<>
struct fex_gen_config<glGetPixelTransformParameterfvEXT> {};
template<>
struct fex_gen_config<glGetPixelTransformParameterivEXT> {};
template<>
struct fex_gen_config<glGetPointerv> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetPointerv, 1, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetPointervEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetPointervEXT, 1, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetPointeri_vEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetPointeri_vEXT, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetPointerIndexedvEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetPointerIndexedvEXT, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetPolygonStipple> {};
template<>
struct fex_gen_config<glGetProgramBinary> {};
template<>
struct fex_gen_config<glGetProgramEnvParameterdvARB> {};
template<>
struct fex_gen_config<glGetProgramEnvParameterfvARB> {};
template<>
struct fex_gen_config<glGetProgramEnvParameterIivNV> {};
template<>
struct fex_gen_config<glGetProgramEnvParameterIuivNV> {};
template<>
struct fex_gen_config<glGetProgramInfoLog> {};
template<>
struct fex_gen_config<glGetProgramInterfaceiv> {};
template<>
struct fex_gen_config<glGetProgramivARB> {};
template<>
struct fex_gen_config<glGetProgramiv> {};
template<>
struct fex_gen_config<glGetProgramivNV> {};
template<>
struct fex_gen_config<glGetProgramLocalParameterdvARB> {};
template<>
struct fex_gen_config<glGetProgramLocalParameterfvARB> {};
template<>
struct fex_gen_config<glGetProgramLocalParameterIivNV> {};
template<>
struct fex_gen_config<glGetProgramLocalParameterIuivNV> {};
template<>
struct fex_gen_config<glGetProgramNamedParameterdvNV> {};
template<>
struct fex_gen_config<glGetProgramNamedParameterfvNV> {};
template<>
struct fex_gen_config<glGetProgramParameterdvNV> {};
template<>
struct fex_gen_config<glGetProgramParameterfvNV> {};
template<>
struct fex_gen_config<glGetProgramPipelineInfoLog> {};
template<>
struct fex_gen_config<glGetProgramPipelineiv> {};
template<>
struct fex_gen_config<glGetProgramResourcefvNV> {};
template<>
struct fex_gen_config<glGetProgramResourceiv> {};
template<>
struct fex_gen_config<glGetProgramResourceName> {};
template<>
struct fex_gen_config<glGetProgramStageiv> {};
template<>
struct fex_gen_config<glGetProgramStringARB> {};
template<>
struct fex_gen_config<glGetProgramStringNV> {};
template<>
struct fex_gen_config<glGetProgramSubroutineParameteruivNV> {};
template<>
struct fex_gen_config<glGetQueryBufferObjecti64v> {};
template<>
struct fex_gen_config<glGetQueryBufferObjectiv> {};
template<>
struct fex_gen_config<glGetQueryBufferObjectui64v> {};
template<>
struct fex_gen_config<glGetQueryBufferObjectuiv> {};
template<>
struct fex_gen_config<glGetQueryIndexediv> {};
template<>
struct fex_gen_config<glGetQueryivARB> {};
template<>
struct fex_gen_config<glGetQueryiv> {};
template<>
struct fex_gen_config<glGetQueryObjecti64vEXT> {};
template<>
struct fex_gen_config<glGetQueryObjecti64v> {};
template<>
struct fex_gen_config<glGetQueryObjectivARB> {};
template<>
struct fex_gen_config<glGetQueryObjectiv> {};
template<>
struct fex_gen_config<glGetQueryObjectui64vEXT> {};
template<>
struct fex_gen_config<glGetQueryObjectui64v> {};
template<>
struct fex_gen_config<glGetQueryObjectuivARB> {};
template<>
struct fex_gen_config<glGetQueryObjectuiv> {};
template<>
struct fex_gen_config<glGetRenderbufferParameterivEXT> {};
template<>
struct fex_gen_config<glGetRenderbufferParameteriv> {};
template<>
struct fex_gen_config<glGetSamplerParameterfv> {};
template<>
struct fex_gen_config<glGetSamplerParameterIiv> {};
template<>
struct fex_gen_config<glGetSamplerParameterIuiv> {};
template<>
struct fex_gen_config<glGetSamplerParameteriv> {};
template<>
struct fex_gen_config<glGetSemaphoreParameterui64vEXT> {};
template<>
struct fex_gen_config<glGetSeparableFilterEXT> {};
template<>
struct fex_gen_config<glGetSeparableFilter> {};
template<>
struct fex_gen_config<glGetShaderInfoLog> {};
template<>
struct fex_gen_config<glGetShaderiv> {};
template<>
struct fex_gen_config<glGetShaderPrecisionFormat> {};
template<>
struct fex_gen_config<glGetShaderSourceARB> {};
template<>
struct fex_gen_config<glGetShaderSource> {};
template<>
struct fex_gen_config<glGetShadingRateImagePaletteNV> {};
template<>
struct fex_gen_config<glGetShadingRateSampleLocationivNV> {};
template<>
struct fex_gen_config<glGetSharpenTexFuncSGIS> {};
template<>
struct fex_gen_config<glGetSynciv> {};
template<>
struct fex_gen_config<glGetTexBumpParameterfvATI> {};
template<>
struct fex_gen_config<glGetTexBumpParameterivATI> {};
template<>
struct fex_gen_config<glGetTexEnvfv> {};
template<>
struct fex_gen_config<glGetTexEnviv> {};
template<>
struct fex_gen_config<glGetTexEnvxvOES> {};
template<>
struct fex_gen_config<glGetTexFilterFuncSGIS> {};
template<>
struct fex_gen_config<glGetTexGendv> {};
template<>
struct fex_gen_config<glGetTexGenfv> {};
template<>
struct fex_gen_config<glGetTexGeniv> {};
template<>
struct fex_gen_config<glGetTexGenxvOES> {};
template<>
struct fex_gen_config<glGetTexImage> {};
template<>
struct fex_gen_config<glGetTexLevelParameterfv> {};
template<>
struct fex_gen_config<glGetTexLevelParameteriv> {};
template<>
struct fex_gen_config<glGetTexLevelParameterxvOES> {};
template<>
struct fex_gen_config<glGetTexParameterfv> {};
template<>
struct fex_gen_config<glGetTexParameterIivEXT> {};
template<>
struct fex_gen_config<glGetTexParameterIiv> {};
template<>
struct fex_gen_config<glGetTexParameterIuivEXT> {};
template<>
struct fex_gen_config<glGetTexParameterIuiv> {};
template<>
struct fex_gen_config<glGetTexParameteriv> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glGetTexParameterPointervAPPLE> {};
template<>
struct fex_gen_param<glGetTexParameterPointervAPPLE, 2, void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glGetTexParameterxvOES> {};
template<>
struct fex_gen_config<glGetTextureImageEXT> {};
template<>
struct fex_gen_config<glGetTextureImage> {};
template<>
struct fex_gen_config<glGetTextureLevelParameterfvEXT> {};
template<>
struct fex_gen_config<glGetTextureLevelParameterfv> {};
template<>
struct fex_gen_config<glGetTextureLevelParameterivEXT> {};
template<>
struct fex_gen_config<glGetTextureLevelParameteriv> {};
template<>
struct fex_gen_config<glGetTextureParameterfvEXT> {};
template<>
struct fex_gen_config<glGetTextureParameterfv> {};
template<>
struct fex_gen_config<glGetTextureParameterIivEXT> {};
template<>
struct fex_gen_config<glGetTextureParameterIiv> {};
template<>
struct fex_gen_config<glGetTextureParameterIuivEXT> {};
template<>
struct fex_gen_config<glGetTextureParameterIuiv> {};
template<>
struct fex_gen_config<glGetTextureParameterivEXT> {};
template<>
struct fex_gen_config<glGetTextureParameteriv> {};
template<>
struct fex_gen_config<glGetTextureSubImage> {};
template<>
struct fex_gen_config<glGetTrackMatrixivNV> {};
template<>
struct fex_gen_config<glGetTransformFeedbacki64_v> {};
template<>
struct fex_gen_config<glGetTransformFeedbackiv> {};
template<>
struct fex_gen_config<glGetTransformFeedbacki_v> {};
template<>
struct fex_gen_config<glGetTransformFeedbackVaryingEXT> {};
template<>
struct fex_gen_config<glGetTransformFeedbackVarying> {};
template<>
struct fex_gen_config<glGetTransformFeedbackVaryingNV> {};
template<>
struct fex_gen_config<glGetUniformdv> {};
template<>
struct fex_gen_config<glGetUniformfvARB> {};
template<>
struct fex_gen_config<glGetUniformfv> {};
template<>
struct fex_gen_config<glGetUniformi64vARB> {};
template<>
struct fex_gen_config<glGetUniformi64vNV> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glGetUniformIndices> {};
template<>
struct fex_gen_param<glGetUniformIndices, 2, const char* const*> : fexgen::assume_compatible_data_layout {};
#else
template<>
struct fex_gen_config<glGetUniformIndices> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetUniformIndices, 2, const char* const*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<glGetUniformivARB> {};
template<>
struct fex_gen_config<glGetUniformiv> {};
template<>
struct fex_gen_config<glGetUniformSubroutineuiv> {};
template<>
struct fex_gen_config<glGetUniformui64vARB> {};
template<>
struct fex_gen_config<glGetUniformui64vNV> {};
template<>
struct fex_gen_config<glGetUniformuivEXT> {};
template<>
struct fex_gen_config<glGetUniformuiv> {};
template<>
struct fex_gen_config<glGetUnsignedBytei_vEXT> {};
template<>
struct fex_gen_config<glGetUnsignedBytevEXT> {};
template<>
struct fex_gen_config<glGetVariantArrayObjectfvATI> {};
template<>
struct fex_gen_config<glGetVariantArrayObjectivATI> {};
template<>
struct fex_gen_config<glGetVariantBooleanvEXT> {};
template<>
struct fex_gen_config<glGetVariantFloatvEXT> {};
template<>
struct fex_gen_config<glGetVariantIntegervEXT> {};
template<>
struct fex_gen_config<glGetVariantPointervEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVariantPointervEXT, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVertexArrayIndexed64iv> {};
template<>
struct fex_gen_config<glGetVertexArrayIndexediv> {};
template<>
struct fex_gen_config<glGetVertexArrayIntegeri_vEXT> {};
template<>
struct fex_gen_config<glGetVertexArrayIntegervEXT> {};
template<>
struct fex_gen_config<glGetVertexArrayiv> {};
template<>
struct fex_gen_config<glGetVertexArrayPointeri_vEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVertexArrayPointeri_vEXT, 3, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVertexArrayPointervEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVertexArrayPointervEXT, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVertexAttribArrayObjectfvATI> {};
template<>
struct fex_gen_config<glGetVertexAttribArrayObjectivATI> {};
template<>
struct fex_gen_config<glGetVertexAttribdvARB> {};
template<>
struct fex_gen_config<glGetVertexAttribdv> {};
template<>
struct fex_gen_config<glGetVertexAttribdvNV> {};
template<>
struct fex_gen_config<glGetVertexAttribfvARB> {};
template<>
struct fex_gen_config<glGetVertexAttribfv> {};
template<>
struct fex_gen_config<glGetVertexAttribfvNV> {};
template<>
struct fex_gen_config<glGetVertexAttribIivEXT> {};
template<>
struct fex_gen_config<glGetVertexAttribIiv> {};
template<>
struct fex_gen_config<glGetVertexAttribIuivEXT> {};
template<>
struct fex_gen_config<glGetVertexAttribIuiv> {};
template<>
struct fex_gen_config<glGetVertexAttribivARB> {};
template<>
struct fex_gen_config<glGetVertexAttribiv> {};
template<>
struct fex_gen_config<glGetVertexAttribivNV> {};
template<>
struct fex_gen_config<glGetVertexAttribLdvEXT> {};
template<>
struct fex_gen_config<glGetVertexAttribLdv> {};
template<>
struct fex_gen_config<glGetVertexAttribLi64vNV> {};
template<>
struct fex_gen_config<glGetVertexAttribLui64vARB> {};
template<>
struct fex_gen_config<glGetVertexAttribLui64vNV> {};
template<>
struct fex_gen_config<glGetVertexAttribPointervARB> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVertexAttribPointervARB, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVertexAttribPointerv> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVertexAttribPointerv, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVertexAttribPointervNV> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glGetVertexAttribPointervNV, 2, void**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glGetVideoCaptureivNV> {};
template<>
struct fex_gen_config<glGetVideoCaptureStreamdvNV> {};
template<>
struct fex_gen_config<glGetVideoCaptureStreamfvNV> {};
template<>
struct fex_gen_config<glGetVideoCaptureStreamivNV> {};
template<>
struct fex_gen_config<glGetVideoi64vNV> {};
template<>
struct fex_gen_config<glGetVideoivNV> {};
template<>
struct fex_gen_config<glGetVideoui64vNV> {};
template<>
struct fex_gen_config<glGetVideouivNV> {};
template<>
struct fex_gen_config<glGlobalAlphaFactorbSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactordSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactorfSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactoriSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactorsSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactorubSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactoruiSUN> {};
template<>
struct fex_gen_config<glGlobalAlphaFactorusSUN> {};
template<>
struct fex_gen_config<glHint> {};
template<>
struct fex_gen_config<glHintPGI> {};
template<>
struct fex_gen_config<glHistogramEXT> {};
template<>
struct fex_gen_config<glHistogram> {};
template<>
struct fex_gen_config<glIglooInterfaceSGIX> {};
template<>
struct fex_gen_config<glImageTransformParameterfHP> {};
template<>
struct fex_gen_config<glImageTransformParameterfvHP> {};
template<>
struct fex_gen_config<glImageTransformParameteriHP> {};
template<>
struct fex_gen_config<glImageTransformParameterivHP> {};
template<>
struct fex_gen_config<glImportMemoryFdEXT> {};
template<>
struct fex_gen_config<glImportMemoryWin32HandleEXT> {};
template<>
struct fex_gen_config<glImportMemoryWin32NameEXT> {};
template<>
struct fex_gen_config<glImportSemaphoreFdEXT> {};
template<>
struct fex_gen_config<glImportSemaphoreWin32HandleEXT> {};
template<>
struct fex_gen_config<glImportSemaphoreWin32NameEXT> {};
template<>
struct fex_gen_config<glIndexd> {};
template<>
struct fex_gen_config<glIndexdv> {};
template<>
struct fex_gen_config<glIndexf> {};
template<>
struct fex_gen_config<glIndexFormatNV> {};
template<>
struct fex_gen_config<glIndexFuncEXT> {};
template<>
struct fex_gen_config<glIndexfv> {};
template<>
struct fex_gen_config<glIndexi> {};
template<>
struct fex_gen_config<glIndexiv> {};
template<>
struct fex_gen_config<glIndexMask> {};
template<>
struct fex_gen_config<glIndexMaterialEXT> {};
template<>
struct fex_gen_config<glIndexPointerEXT> {};
template<>
struct fex_gen_config<glIndexPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glIndexPointerListIBM> {};
template<>
struct fex_gen_param<glIndexPointerListIBM, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glIndexs> {};
template<>
struct fex_gen_config<glIndexsv> {};
template<>
struct fex_gen_config<glIndexub> {};
template<>
struct fex_gen_config<glIndexubv> {};
template<>
struct fex_gen_config<glIndexxOES> {};
template<>
struct fex_gen_config<glIndexxvOES> {};
template<>
struct fex_gen_config<glInitNames> {};
template<>
struct fex_gen_config<glInsertComponentEXT> {};
template<>
struct fex_gen_config<glInsertEventMarkerEXT> {};
template<>
struct fex_gen_config<glInstrumentsBufferSGIX> {};
template<>
struct fex_gen_config<glInterleavedArrays> {};
template<>
struct fex_gen_config<glInterpolatePathsNV> {};
template<>
struct fex_gen_config<glInvalidateBufferData> {};
template<>
struct fex_gen_config<glInvalidateBufferSubData> {};
template<>
struct fex_gen_config<glInvalidateFramebuffer> {};
template<>
struct fex_gen_config<glInvalidateNamedFramebufferData> {};
template<>
struct fex_gen_config<glInvalidateNamedFramebufferSubData> {};
template<>
struct fex_gen_config<glInvalidateSubFramebuffer> {};
template<>
struct fex_gen_config<glInvalidateTexImage> {};
template<>
struct fex_gen_config<glInvalidateTexSubImage> {};
template<>
struct fex_gen_config<glLabelObjectEXT> {};
template<>
struct fex_gen_config<glLGPUCopyImageSubDataNVX> {};
template<>
struct fex_gen_config<glLGPUInterlockNVX> {};
template<>
struct fex_gen_config<glLGPUNamedBufferSubDataNVX> {};
template<>
struct fex_gen_config<glLightEnviSGIX> {};
template<>
struct fex_gen_config<glLightf> {};
template<>
struct fex_gen_config<glLightfv> {};
template<>
struct fex_gen_config<glLighti> {};
template<>
struct fex_gen_config<glLightiv> {};
template<>
struct fex_gen_config<glLightModelf> {};
template<>
struct fex_gen_config<glLightModelfv> {};
template<>
struct fex_gen_config<glLightModeli> {};
template<>
struct fex_gen_config<glLightModeliv> {};
template<>
struct fex_gen_config<glLightModelxOES> {};
template<>
struct fex_gen_config<glLightModelxvOES> {};
template<>
struct fex_gen_config<glLightxOES> {};
template<>
struct fex_gen_config<glLightxvOES> {};
template<>
struct fex_gen_config<glLineStipple> {};
template<>
struct fex_gen_config<glLineWidth> {};
template<>
struct fex_gen_config<glLineWidthxOES> {};
template<>
struct fex_gen_config<glLinkProgramARB> {};
template<>
struct fex_gen_config<glLinkProgram> {};
template<>
struct fex_gen_config<glListBase> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glListDrawCommandsStatesClientNV> {};
template<>
struct fex_gen_param<glListDrawCommandsStatesClientNV, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glListParameterfSGIX> {};
template<>
struct fex_gen_config<glListParameterfvSGIX> {};
template<>
struct fex_gen_config<glListParameteriSGIX> {};
template<>
struct fex_gen_config<glListParameterivSGIX> {};
template<>
struct fex_gen_config<glLoadIdentity> {};
template<>
struct fex_gen_config<glLoadIdentityDeformationMapSGIX> {};
template<>
struct fex_gen_config<glLoadMatrixd> {};
template<>
struct fex_gen_config<glLoadMatrixf> {};
template<>
struct fex_gen_config<glLoadMatrixxOES> {};
template<>
struct fex_gen_config<glLoadName> {};
template<>
struct fex_gen_config<glLoadProgramNV> {};
template<>
struct fex_gen_config<glLoadTransposeMatrixdARB> {};
template<>
struct fex_gen_config<glLoadTransposeMatrixd> {};
template<>
struct fex_gen_config<glLoadTransposeMatrixfARB> {};
template<>
struct fex_gen_config<glLoadTransposeMatrixf> {};
template<>
struct fex_gen_config<glLoadTransposeMatrixxOES> {};
template<>
struct fex_gen_config<glLockArraysEXT> {};
template<>
struct fex_gen_config<glLogicOp> {};
template<>
struct fex_gen_config<glMakeBufferNonResidentNV> {};
template<>
struct fex_gen_config<glMakeBufferResidentNV> {};
template<>
struct fex_gen_config<glMakeImageHandleNonResidentARB> {};
template<>
struct fex_gen_config<glMakeImageHandleNonResidentNV> {};
template<>
struct fex_gen_config<glMakeImageHandleResidentARB> {};
template<>
struct fex_gen_config<glMakeImageHandleResidentNV> {};
template<>
struct fex_gen_config<glMakeNamedBufferNonResidentNV> {};
template<>
struct fex_gen_config<glMakeNamedBufferResidentNV> {};
template<>
struct fex_gen_config<glMakeTextureHandleNonResidentARB> {};
template<>
struct fex_gen_config<glMakeTextureHandleNonResidentNV> {};
template<>
struct fex_gen_config<glMakeTextureHandleResidentARB> {};
template<>
struct fex_gen_config<glMakeTextureHandleResidentNV> {};
template<>
struct fex_gen_config<glMap1d> {};
template<>
struct fex_gen_config<glMap1f> {};
template<>
struct fex_gen_config<glMap1xOES> {};
template<>
struct fex_gen_config<glMap2d> {};
template<>
struct fex_gen_config<glMap2f> {};
template<>
struct fex_gen_config<glMap2xOES> {};
template<>
struct fex_gen_config<glMapBufferARB> {};
template<>
struct fex_gen_config<glMapBuffer> {};
template<>
struct fex_gen_config<glMapBufferRange> {};
template<>
struct fex_gen_config<glMapControlPointsNV> {};
template<>
struct fex_gen_config<glMapGrid1d> {};
template<>
struct fex_gen_config<glMapGrid1f> {};
template<>
struct fex_gen_config<glMapGrid1xOES> {};
template<>
struct fex_gen_config<glMapGrid2d> {};
template<>
struct fex_gen_config<glMapGrid2f> {};
template<>
struct fex_gen_config<glMapGrid2xOES> {};
template<>
struct fex_gen_config<glMapNamedBufferEXT> {};
template<>
struct fex_gen_config<glMapNamedBuffer> {};
template<>
struct fex_gen_config<glMapNamedBufferRangeEXT> {};
template<>
struct fex_gen_config<glMapNamedBufferRange> {};
template<>
struct fex_gen_config<glMapObjectBufferATI> {};
template<>
struct fex_gen_config<glMapParameterfvNV> {};
template<>
struct fex_gen_config<glMapParameterivNV> {};
template<>
struct fex_gen_config<glMapTexture2DINTEL> {};
template<>
struct fex_gen_config<glMapVertexAttrib1dAPPLE> {};
template<>
struct fex_gen_config<glMapVertexAttrib1fAPPLE> {};
template<>
struct fex_gen_config<glMapVertexAttrib2dAPPLE> {};
template<>
struct fex_gen_config<glMapVertexAttrib2fAPPLE> {};
template<>
struct fex_gen_config<glMaterialf> {};
template<>
struct fex_gen_config<glMaterialfv> {};
template<>
struct fex_gen_config<glMateriali> {};
template<>
struct fex_gen_config<glMaterialiv> {};
template<>
struct fex_gen_config<glMaterialxOES> {};
template<>
struct fex_gen_config<glMaterialxvOES> {};
template<>
struct fex_gen_config<glMatrixFrustumEXT> {};
template<>
struct fex_gen_config<glMatrixIndexPointerARB> {};
template<>
struct fex_gen_config<glMatrixIndexubvARB> {};
template<>
struct fex_gen_config<glMatrixIndexuivARB> {};
template<>
struct fex_gen_config<glMatrixIndexusvARB> {};
template<>
struct fex_gen_config<glMatrixLoad3x2fNV> {};
template<>
struct fex_gen_config<glMatrixLoad3x3fNV> {};
template<>
struct fex_gen_config<glMatrixLoaddEXT> {};
template<>
struct fex_gen_config<glMatrixLoadfEXT> {};
template<>
struct fex_gen_config<glMatrixLoadIdentityEXT> {};
template<>
struct fex_gen_config<glMatrixLoadTranspose3x3fNV> {};
template<>
struct fex_gen_config<glMatrixLoadTransposedEXT> {};
template<>
struct fex_gen_config<glMatrixLoadTransposefEXT> {};
template<>
struct fex_gen_config<glMatrixMode> {};
template<>
struct fex_gen_config<glMatrixMult3x2fNV> {};
template<>
struct fex_gen_config<glMatrixMult3x3fNV> {};
template<>
struct fex_gen_config<glMatrixMultdEXT> {};
template<>
struct fex_gen_config<glMatrixMultfEXT> {};
template<>
struct fex_gen_config<glMatrixMultTranspose3x3fNV> {};
template<>
struct fex_gen_config<glMatrixMultTransposedEXT> {};
template<>
struct fex_gen_config<glMatrixMultTransposefEXT> {};
template<>
struct fex_gen_config<glMatrixOrthoEXT> {};
template<>
struct fex_gen_config<glMatrixPopEXT> {};
template<>
struct fex_gen_config<glMatrixPushEXT> {};
template<>
struct fex_gen_config<glMatrixRotatedEXT> {};
template<>
struct fex_gen_config<glMatrixRotatefEXT> {};
template<>
struct fex_gen_config<glMatrixScaledEXT> {};
template<>
struct fex_gen_config<glMatrixScalefEXT> {};
template<>
struct fex_gen_config<glMatrixTranslatedEXT> {};
template<>
struct fex_gen_config<glMatrixTranslatefEXT> {};
template<>
struct fex_gen_config<glMaxShaderCompilerThreadsARB> {};
template<>
struct fex_gen_config<glMaxShaderCompilerThreadsKHR> {};
template<>
struct fex_gen_config<glMemoryBarrierByRegion> {};
template<>
struct fex_gen_config<glMemoryBarrierEXT> {};
template<>
struct fex_gen_config<glMemoryBarrier> {};
template<>
struct fex_gen_config<glMemoryObjectParameterivEXT> {};
template<>
struct fex_gen_config<glMinmaxEXT> {};
template<>
struct fex_gen_config<glMinmax> {};
template<>
struct fex_gen_config<glMinSampleShadingARB> {};
template<>
struct fex_gen_config<glMinSampleShading> {};
template<>
struct fex_gen_config<glMulticastBarrierNV> {};
template<>
struct fex_gen_config<glMulticastBlitFramebufferNV> {};
template<>
struct fex_gen_config<glMulticastBufferSubDataNV> {};
template<>
struct fex_gen_config<glMulticastCopyBufferSubDataNV> {};
template<>
struct fex_gen_config<glMulticastCopyImageSubDataNV> {};
template<>
struct fex_gen_config<glMulticastFramebufferSampleLocationsfvNV> {};
template<>
struct fex_gen_config<glMulticastGetQueryObjecti64vNV> {};
template<>
struct fex_gen_config<glMulticastGetQueryObjectivNV> {};
template<>
struct fex_gen_config<glMulticastGetQueryObjectui64vNV> {};
template<>
struct fex_gen_config<glMulticastGetQueryObjectuivNV> {};
template<>
struct fex_gen_config<glMulticastScissorArrayvNVX> {};
template<>
struct fex_gen_config<glMulticastViewportArrayvNVX> {};
template<>
struct fex_gen_config<glMulticastViewportPositionWScaleNVX> {};
template<>
struct fex_gen_config<glMulticastWaitSyncNV> {};
template<>
struct fex_gen_config<glMultiDrawArraysEXT> {};
template<>
struct fex_gen_config<glMultiDrawArrays> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirectAMD> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirectBindlessCountNV> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirectBindlessNV> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirectCountARB> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirectCount> {};
template<>
struct fex_gen_config<glMultiDrawArraysIndirect> {};
template<>
struct fex_gen_config<glMultiDrawElementArrayAPPLE> {};
#ifndef IS_32BIT_THUNK
// Needs manual handling: The type of this is actually int8_t**, int16_t**, or int32_t**, depending on the "type" argument
// TODO: Do these values get copied or do they have to stay valid past the call?
template<>
struct fex_gen_config<glMultiDrawElementsBaseVertex> {};
template<>
struct fex_gen_param<glMultiDrawElementsBaseVertex, 3, const void* const*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glMultiDrawElementsEXT> {};
template<>
struct fex_gen_param<glMultiDrawElementsEXT, 3, const void* const*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glMultiDrawElements> {};
template<>
struct fex_gen_param<glMultiDrawElements, 3, const void* const*> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glMultiDrawElementsIndirectAMD> {};
template<>
struct fex_gen_config<glMultiDrawElementsIndirectBindlessCountNV> {};
template<>
struct fex_gen_config<glMultiDrawElementsIndirectBindlessNV> {};
template<>
struct fex_gen_config<glMultiDrawElementsIndirectCountARB> {};
template<>
struct fex_gen_config<glMultiDrawElementsIndirectCount> {};
template<>
struct fex_gen_config<glMultiDrawElementsIndirect> {};
template<>
struct fex_gen_config<glMultiDrawMeshTasksIndirectCountNV> {};
template<>
struct fex_gen_config<glMultiDrawMeshTasksIndirectNV> {};
template<>
struct fex_gen_config<glMultiDrawRangeElementArrayAPPLE> {};
template<>
struct fex_gen_config<glMultiModeDrawArraysIBM> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glMultiModeDrawElementsIBM> {};
template<>
struct fex_gen_param<glMultiModeDrawElementsIBM, 3, const void* const*> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glMultiTexBufferEXT> {};
template<>
struct fex_gen_config<glMultiTexCoord1bOES> {};
template<>
struct fex_gen_config<glMultiTexCoord1bvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord1dARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1d> {};
template<>
struct fex_gen_config<glMultiTexCoord1dvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1dv> {};
template<>
struct fex_gen_config<glMultiTexCoord1fARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1f> {};
template<>
struct fex_gen_config<glMultiTexCoord1fvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1fv> {};
template<>
struct fex_gen_config<glMultiTexCoord1hNV> {};
template<>
struct fex_gen_config<glMultiTexCoord1hvNV> {};
template<>
struct fex_gen_config<glMultiTexCoord1iARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1i> {};
template<>
struct fex_gen_config<glMultiTexCoord1ivARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1iv> {};
template<>
struct fex_gen_config<glMultiTexCoord1sARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1s> {};
template<>
struct fex_gen_config<glMultiTexCoord1svARB> {};
template<>
struct fex_gen_config<glMultiTexCoord1sv> {};
template<>
struct fex_gen_config<glMultiTexCoord1xOES> {};
template<>
struct fex_gen_config<glMultiTexCoord1xvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord2bOES> {};
template<>
struct fex_gen_config<glMultiTexCoord2bvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord2dARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2d> {};
template<>
struct fex_gen_config<glMultiTexCoord2dvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2dv> {};
template<>
struct fex_gen_config<glMultiTexCoord2fARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2f> {};
template<>
struct fex_gen_config<glMultiTexCoord2fvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2fv> {};
template<>
struct fex_gen_config<glMultiTexCoord2hNV> {};
template<>
struct fex_gen_config<glMultiTexCoord2hvNV> {};
template<>
struct fex_gen_config<glMultiTexCoord2iARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2i> {};
template<>
struct fex_gen_config<glMultiTexCoord2ivARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2iv> {};
template<>
struct fex_gen_config<glMultiTexCoord2sARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2s> {};
template<>
struct fex_gen_config<glMultiTexCoord2svARB> {};
template<>
struct fex_gen_config<glMultiTexCoord2sv> {};
template<>
struct fex_gen_config<glMultiTexCoord2xOES> {};
template<>
struct fex_gen_config<glMultiTexCoord2xvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord3bOES> {};
template<>
struct fex_gen_config<glMultiTexCoord3bvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord3dARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3d> {};
template<>
struct fex_gen_config<glMultiTexCoord3dvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3dv> {};
template<>
struct fex_gen_config<glMultiTexCoord3fARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3f> {};
template<>
struct fex_gen_config<glMultiTexCoord3fvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3fv> {};
template<>
struct fex_gen_config<glMultiTexCoord3hNV> {};
template<>
struct fex_gen_config<glMultiTexCoord3hvNV> {};
template<>
struct fex_gen_config<glMultiTexCoord3iARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3i> {};
template<>
struct fex_gen_config<glMultiTexCoord3ivARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3iv> {};
template<>
struct fex_gen_config<glMultiTexCoord3sARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3s> {};
template<>
struct fex_gen_config<glMultiTexCoord3svARB> {};
template<>
struct fex_gen_config<glMultiTexCoord3sv> {};
template<>
struct fex_gen_config<glMultiTexCoord3xOES> {};
template<>
struct fex_gen_config<glMultiTexCoord3xvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord4bOES> {};
template<>
struct fex_gen_config<glMultiTexCoord4bvOES> {};
template<>
struct fex_gen_config<glMultiTexCoord4dARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4d> {};
template<>
struct fex_gen_config<glMultiTexCoord4dvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4dv> {};
template<>
struct fex_gen_config<glMultiTexCoord4fARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4f> {};
template<>
struct fex_gen_config<glMultiTexCoord4fvARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4fv> {};
template<>
struct fex_gen_config<glMultiTexCoord4hNV> {};
template<>
struct fex_gen_config<glMultiTexCoord4hvNV> {};
template<>
struct fex_gen_config<glMultiTexCoord4iARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4i> {};
template<>
struct fex_gen_config<glMultiTexCoord4ivARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4iv> {};
template<>
struct fex_gen_config<glMultiTexCoord4sARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4s> {};
template<>
struct fex_gen_config<glMultiTexCoord4svARB> {};
template<>
struct fex_gen_config<glMultiTexCoord4sv> {};
template<>
struct fex_gen_config<glMultiTexCoord4xOES> {};
template<>
struct fex_gen_config<glMultiTexCoord4xvOES> {};
template<>
struct fex_gen_config<glMultiTexCoordPointerEXT> {};
template<>
struct fex_gen_config<glMultiTexEnvfEXT> {};
template<>
struct fex_gen_config<glMultiTexEnvfvEXT> {};
template<>
struct fex_gen_config<glMultiTexEnviEXT> {};
template<>
struct fex_gen_config<glMultiTexEnvivEXT> {};
template<>
struct fex_gen_config<glMultiTexGendEXT> {};
template<>
struct fex_gen_config<glMultiTexGendvEXT> {};
template<>
struct fex_gen_config<glMultiTexGenfEXT> {};
template<>
struct fex_gen_config<glMultiTexGenfvEXT> {};
template<>
struct fex_gen_config<glMultiTexGeniEXT> {};
template<>
struct fex_gen_config<glMultiTexGenivEXT> {};
template<>
struct fex_gen_config<glMultiTexImage1DEXT> {};
template<>
struct fex_gen_config<glMultiTexImage2DEXT> {};
template<>
struct fex_gen_config<glMultiTexImage3DEXT> {};
template<>
struct fex_gen_config<glMultiTexParameterfEXT> {};
template<>
struct fex_gen_config<glMultiTexParameterfvEXT> {};
template<>
struct fex_gen_config<glMultiTexParameteriEXT> {};
template<>
struct fex_gen_config<glMultiTexParameterIivEXT> {};
template<>
struct fex_gen_config<glMultiTexParameterIuivEXT> {};
template<>
struct fex_gen_config<glMultiTexParameterivEXT> {};
template<>
struct fex_gen_config<glMultiTexRenderbufferEXT> {};
template<>
struct fex_gen_config<glMultiTexSubImage1DEXT> {};
template<>
struct fex_gen_config<glMultiTexSubImage2DEXT> {};
template<>
struct fex_gen_config<glMultiTexSubImage3DEXT> {};
template<>
struct fex_gen_config<glMultMatrixd> {};
template<>
struct fex_gen_config<glMultMatrixf> {};
template<>
struct fex_gen_config<glMultMatrixxOES> {};
template<>
struct fex_gen_config<glMultTransposeMatrixdARB> {};
template<>
struct fex_gen_config<glMultTransposeMatrixd> {};
template<>
struct fex_gen_config<glMultTransposeMatrixfARB> {};
template<>
struct fex_gen_config<glMultTransposeMatrixf> {};
template<>
struct fex_gen_config<glMultTransposeMatrixxOES> {};
template<>
struct fex_gen_config<glNamedBufferAttachMemoryNV> {};
template<>
struct fex_gen_config<glNamedBufferDataEXT> {};
template<>
struct fex_gen_config<glNamedBufferData> {};
template<>
struct fex_gen_config<glNamedBufferPageCommitmentARB> {};
template<>
struct fex_gen_config<glNamedBufferPageCommitmentEXT> {};
template<>
struct fex_gen_config<glNamedBufferStorageExternalEXT> {};
template<>
struct fex_gen_config<glNamedBufferStorageEXT> {};
template<>
struct fex_gen_config<glNamedBufferStorage> {};
template<>
struct fex_gen_config<glNamedBufferStorageMemEXT> {};
template<>
struct fex_gen_config<glNamedBufferSubDataEXT> {};
template<>
struct fex_gen_config<glNamedBufferSubData> {};
template<>
struct fex_gen_config<glNamedCopyBufferSubDataEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferDrawBuffer> {};
template<>
struct fex_gen_config<glNamedFramebufferDrawBuffers> {};
template<>
struct fex_gen_config<glNamedFramebufferParameteriEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferParameteri> {};
template<>
struct fex_gen_config<glNamedFramebufferReadBuffer> {};
template<>
struct fex_gen_config<glNamedFramebufferRenderbufferEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferRenderbuffer> {};
template<>
struct fex_gen_config<glNamedFramebufferSampleLocationsfvARB> {};
template<>
struct fex_gen_config<glNamedFramebufferSampleLocationsfvNV> {};
template<>
struct fex_gen_config<glNamedFramebufferSamplePositionsfvAMD> {};
template<>
struct fex_gen_config<glNamedFramebufferTexture1DEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTexture2DEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTexture3DEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTextureEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTextureFaceEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTexture> {};
template<>
struct fex_gen_config<glNamedFramebufferTextureLayerEXT> {};
template<>
struct fex_gen_config<glNamedFramebufferTextureLayer> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameter4dEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameter4dvEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameter4fEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameter4fvEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameterI4iEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameterI4ivEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameterI4uiEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameterI4uivEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParameters4fvEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParametersI4ivEXT> {};
template<>
struct fex_gen_config<glNamedProgramLocalParametersI4uivEXT> {};
template<>
struct fex_gen_config<glNamedProgramStringEXT> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorageEXT> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorage> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorageMultisampleAdvancedAMD> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorageMultisampleCoverageEXT> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorageMultisampleEXT> {};
template<>
struct fex_gen_config<glNamedRenderbufferStorageMultisample> {};
template<>
struct fex_gen_config<glNamedStringARB> {};
template<>
struct fex_gen_config<glNewList> {};
template<>
struct fex_gen_config<glNormal3b> {};
template<>
struct fex_gen_config<glNormal3bv> {};
template<>
struct fex_gen_config<glNormal3d> {};
template<>
struct fex_gen_config<glNormal3dv> {};
template<>
struct fex_gen_config<glNormal3f> {};
template<>
struct fex_gen_config<glNormal3fv> {};
template<>
struct fex_gen_config<glNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glNormal3hNV> {};
template<>
struct fex_gen_config<glNormal3hvNV> {};
template<>
struct fex_gen_config<glNormal3i> {};
template<>
struct fex_gen_config<glNormal3iv> {};
template<>
struct fex_gen_config<glNormal3s> {};
template<>
struct fex_gen_config<glNormal3sv> {};
template<>
struct fex_gen_config<glNormal3xOES> {};
template<>
struct fex_gen_config<glNormal3xvOES> {};
template<>
struct fex_gen_config<glNormalFormatNV> {};
template<>
struct fex_gen_config<glNormalPointerEXT> {};
template<>
struct fex_gen_config<glNormalPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glNormalPointerListIBM> {};
template<>
struct fex_gen_param<glNormalPointerListIBM, 2, const void**> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glNormalPointervINTEL> {};
template<>
struct fex_gen_param<glNormalPointervINTEL, 1, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glNormalStream3bATI> {};
template<>
struct fex_gen_config<glNormalStream3bvATI> {};
template<>
struct fex_gen_config<glNormalStream3dATI> {};
template<>
struct fex_gen_config<glNormalStream3dvATI> {};
template<>
struct fex_gen_config<glNormalStream3fATI> {};
template<>
struct fex_gen_config<glNormalStream3fvATI> {};
template<>
struct fex_gen_config<glNormalStream3iATI> {};
template<>
struct fex_gen_config<glNormalStream3ivATI> {};
template<>
struct fex_gen_config<glNormalStream3sATI> {};
template<>
struct fex_gen_config<glNormalStream3svATI> {};
template<>
struct fex_gen_config<glObjectLabel> {};
template<>
struct fex_gen_config<glObjectPtrLabel> {};
template<>
struct fex_gen_config<glOrthofOES> {};
template<>
struct fex_gen_config<glOrtho> {};
template<>
struct fex_gen_config<glOrthoxOES> {};
template<>
struct fex_gen_config<glPassTexCoordATI> {};
template<>
struct fex_gen_config<glPassThrough> {};
template<>
struct fex_gen_config<glPassThroughxOES> {};
template<>
struct fex_gen_config<glPatchParameterfv> {};
template<>
struct fex_gen_config<glPatchParameteri> {};
template<>
struct fex_gen_config<glPathCommandsNV> {};
template<>
struct fex_gen_config<glPathCoordsNV> {};
template<>
struct fex_gen_config<glPathCoverDepthFuncNV> {};
template<>
struct fex_gen_config<glPathDashArrayNV> {};
template<>
struct fex_gen_config<glPathGlyphRangeNV> {};
template<>
struct fex_gen_config<glPathGlyphsNV> {};
template<>
struct fex_gen_config<glPathParameterfNV> {};
template<>
struct fex_gen_config<glPathParameterfvNV> {};
template<>
struct fex_gen_config<glPathParameteriNV> {};
template<>
struct fex_gen_config<glPathParameterivNV> {};
template<>
struct fex_gen_config<glPathStencilDepthOffsetNV> {};
template<>
struct fex_gen_config<glPathStencilFuncNV> {};
template<>
struct fex_gen_config<glPathStringNV> {};
template<>
struct fex_gen_config<glPathSubCommandsNV> {};
template<>
struct fex_gen_config<glPathSubCoordsNV> {};
template<>
struct fex_gen_config<glPauseTransformFeedback> {};
template<>
struct fex_gen_config<glPauseTransformFeedbackNV> {};
template<>
struct fex_gen_config<glPixelDataRangeNV> {};
template<>
struct fex_gen_config<glPixelMapfv> {};
template<>
struct fex_gen_config<glPixelMapuiv> {};
template<>
struct fex_gen_config<glPixelMapusv> {};
template<>
struct fex_gen_config<glPixelMapx> {};
template<>
struct fex_gen_config<glPixelStoref> {};
template<>
struct fex_gen_config<glPixelStorei> {};
template<>
struct fex_gen_config<glPixelStorex> {};
template<>
struct fex_gen_config<glPixelTexGenParameterfSGIS> {};
template<>
struct fex_gen_config<glPixelTexGenParameterfvSGIS> {};
template<>
struct fex_gen_config<glPixelTexGenParameteriSGIS> {};
template<>
struct fex_gen_config<glPixelTexGenParameterivSGIS> {};
template<>
struct fex_gen_config<glPixelTexGenSGIX> {};
template<>
struct fex_gen_config<glPixelTransferf> {};
template<>
struct fex_gen_config<glPixelTransferi> {};
template<>
struct fex_gen_config<glPixelTransferxOES> {};
template<>
struct fex_gen_config<glPixelTransformParameterfEXT> {};
template<>
struct fex_gen_config<glPixelTransformParameterfvEXT> {};
template<>
struct fex_gen_config<glPixelTransformParameteriEXT> {};
template<>
struct fex_gen_config<glPixelTransformParameterivEXT> {};
template<>
struct fex_gen_config<glPixelZoom> {};
template<>
struct fex_gen_config<glPixelZoomxOES> {};
template<>
struct fex_gen_config<glPNTrianglesfATI> {};
template<>
struct fex_gen_config<glPNTrianglesiATI> {};
template<>
struct fex_gen_config<glPointParameterfARB> {};
template<>
struct fex_gen_config<glPointParameterfEXT> {};
template<>
struct fex_gen_config<glPointParameterf> {};
template<>
struct fex_gen_config<glPointParameterfSGIS> {};
template<>
struct fex_gen_config<glPointParameterfvARB> {};
template<>
struct fex_gen_config<glPointParameterfvEXT> {};
template<>
struct fex_gen_config<glPointParameterfv> {};
template<>
struct fex_gen_config<glPointParameterfvSGIS> {};
template<>
struct fex_gen_config<glPointParameteri> {};
template<>
struct fex_gen_config<glPointParameteriNV> {};
template<>
struct fex_gen_config<glPointParameteriv> {};
template<>
struct fex_gen_config<glPointParameterivNV> {};
template<>
struct fex_gen_config<glPointParameterxvOES> {};
template<>
struct fex_gen_config<glPointSize> {};
template<>
struct fex_gen_config<glPointSizexOES> {};
template<>
struct fex_gen_config<glPolygonMode> {};
template<>
struct fex_gen_config<glPolygonOffsetClampEXT> {};
template<>
struct fex_gen_config<glPolygonOffsetClamp> {};
template<>
struct fex_gen_config<glPolygonOffsetEXT> {};
template<>
struct fex_gen_config<glPolygonOffset> {};
template<>
struct fex_gen_config<glPolygonOffsetxOES> {};
template<>
struct fex_gen_config<glPolygonStipple> {};
template<>
struct fex_gen_config<glPopAttrib> {};
template<>
struct fex_gen_config<glPopClientAttrib> {};
template<>
struct fex_gen_config<glPopDebugGroup> {};
template<>
struct fex_gen_config<glPopGroupMarkerEXT> {};
template<>
struct fex_gen_config<glPopMatrix> {};
template<>
struct fex_gen_config<glPopName> {};
template<>
struct fex_gen_config<glPresentFrameDualFillNV> {};
template<>
struct fex_gen_config<glPresentFrameKeyedNV> {};
template<>
struct fex_gen_config<glPrimitiveBoundingBoxARB> {};
template<>
struct fex_gen_config<glPrimitiveRestartIndex> {};
template<>
struct fex_gen_config<glPrimitiveRestartIndexNV> {};
template<>
struct fex_gen_config<glPrimitiveRestartNV> {};
template<>
struct fex_gen_config<glPrioritizeTexturesEXT> {};
template<>
struct fex_gen_config<glPrioritizeTextures> {};
template<>
struct fex_gen_config<glPrioritizeTexturesxOES> {};
template<>
struct fex_gen_config<glProgramBinary> {};
template<>
struct fex_gen_config<glProgramBufferParametersfvNV> {};
template<>
struct fex_gen_config<glProgramBufferParametersIivNV> {};
template<>
struct fex_gen_config<glProgramBufferParametersIuivNV> {};
template<>
struct fex_gen_config<glProgramEnvParameter4dARB> {};
template<>
struct fex_gen_config<glProgramEnvParameter4dvARB> {};
template<>
struct fex_gen_config<glProgramEnvParameter4fARB> {};
template<>
struct fex_gen_config<glProgramEnvParameter4fvARB> {};
template<>
struct fex_gen_config<glProgramEnvParameterI4iNV> {};
template<>
struct fex_gen_config<glProgramEnvParameterI4ivNV> {};
template<>
struct fex_gen_config<glProgramEnvParameterI4uiNV> {};
template<>
struct fex_gen_config<glProgramEnvParameterI4uivNV> {};
template<>
struct fex_gen_config<glProgramEnvParameters4fvEXT> {};
template<>
struct fex_gen_config<glProgramEnvParametersI4ivNV> {};
template<>
struct fex_gen_config<glProgramEnvParametersI4uivNV> {};
template<>
struct fex_gen_config<glProgramLocalParameter4dARB> {};
template<>
struct fex_gen_config<glProgramLocalParameter4dvARB> {};
template<>
struct fex_gen_config<glProgramLocalParameter4fARB> {};
template<>
struct fex_gen_config<glProgramLocalParameter4fvARB> {};
template<>
struct fex_gen_config<glProgramLocalParameterI4iNV> {};
template<>
struct fex_gen_config<glProgramLocalParameterI4ivNV> {};
template<>
struct fex_gen_config<glProgramLocalParameterI4uiNV> {};
template<>
struct fex_gen_config<glProgramLocalParameterI4uivNV> {};
template<>
struct fex_gen_config<glProgramLocalParameters4fvEXT> {};
template<>
struct fex_gen_config<glProgramLocalParametersI4ivNV> {};
template<>
struct fex_gen_config<glProgramLocalParametersI4uivNV> {};
template<>
struct fex_gen_config<glProgramNamedParameter4dNV> {};
template<>
struct fex_gen_config<glProgramNamedParameter4dvNV> {};
template<>
struct fex_gen_config<glProgramNamedParameter4fNV> {};
template<>
struct fex_gen_config<glProgramNamedParameter4fvNV> {};
template<>
struct fex_gen_config<glProgramParameter4dNV> {};
template<>
struct fex_gen_config<glProgramParameter4dvNV> {};
template<>
struct fex_gen_config<glProgramParameter4fNV> {};
template<>
struct fex_gen_config<glProgramParameter4fvNV> {};
template<>
struct fex_gen_config<glProgramParameteriARB> {};
template<>
struct fex_gen_config<glProgramParameteriEXT> {};
template<>
struct fex_gen_config<glProgramParameteri> {};
template<>
struct fex_gen_config<glProgramParameters4dvNV> {};
template<>
struct fex_gen_config<glProgramParameters4fvNV> {};
template<>
struct fex_gen_config<glProgramPathFragmentInputGenNV> {};
template<>
struct fex_gen_config<glProgramStringARB> {};
template<>
struct fex_gen_config<glProgramSubroutineParametersuivNV> {};
template<>
struct fex_gen_config<glProgramUniform1dEXT> {};
template<>
struct fex_gen_config<glProgramUniform1d> {};
template<>
struct fex_gen_config<glProgramUniform1dvEXT> {};
template<>
struct fex_gen_config<glProgramUniform1dv> {};
template<>
struct fex_gen_config<glProgramUniform1fEXT> {};
template<>
struct fex_gen_config<glProgramUniform1f> {};
template<>
struct fex_gen_config<glProgramUniform1fvEXT> {};
template<>
struct fex_gen_config<glProgramUniform1fv> {};
template<>
struct fex_gen_config<glProgramUniform1i64ARB> {};
template<>
struct fex_gen_config<glProgramUniform1i64NV> {};
template<>
struct fex_gen_config<glProgramUniform1i64vARB> {};
template<>
struct fex_gen_config<glProgramUniform1i64vNV> {};
template<>
struct fex_gen_config<glProgramUniform1iEXT> {};
template<>
struct fex_gen_config<glProgramUniform1i> {};
template<>
struct fex_gen_config<glProgramUniform1ivEXT> {};
template<>
struct fex_gen_config<glProgramUniform1iv> {};
template<>
struct fex_gen_config<glProgramUniform1ui64ARB> {};
template<>
struct fex_gen_config<glProgramUniform1ui64NV> {};
template<>
struct fex_gen_config<glProgramUniform1ui64vARB> {};
template<>
struct fex_gen_config<glProgramUniform1ui64vNV> {};
template<>
struct fex_gen_config<glProgramUniform1uiEXT> {};
template<>
struct fex_gen_config<glProgramUniform1ui> {};
template<>
struct fex_gen_config<glProgramUniform1uivEXT> {};
template<>
struct fex_gen_config<glProgramUniform1uiv> {};
template<>
struct fex_gen_config<glProgramUniform2dEXT> {};
template<>
struct fex_gen_config<glProgramUniform2d> {};
template<>
struct fex_gen_config<glProgramUniform2dvEXT> {};
template<>
struct fex_gen_config<glProgramUniform2dv> {};
template<>
struct fex_gen_config<glProgramUniform2fEXT> {};
template<>
struct fex_gen_config<glProgramUniform2f> {};
template<>
struct fex_gen_config<glProgramUniform2fvEXT> {};
template<>
struct fex_gen_config<glProgramUniform2fv> {};
template<>
struct fex_gen_config<glProgramUniform2i64ARB> {};
template<>
struct fex_gen_config<glProgramUniform2i64NV> {};
template<>
struct fex_gen_config<glProgramUniform2i64vARB> {};
template<>
struct fex_gen_config<glProgramUniform2i64vNV> {};
template<>
struct fex_gen_config<glProgramUniform2iEXT> {};
template<>
struct fex_gen_config<glProgramUniform2i> {};
template<>
struct fex_gen_config<glProgramUniform2ivEXT> {};
template<>
struct fex_gen_config<glProgramUniform2iv> {};
template<>
struct fex_gen_config<glProgramUniform2ui64ARB> {};
template<>
struct fex_gen_config<glProgramUniform2ui64NV> {};
template<>
struct fex_gen_config<glProgramUniform2ui64vARB> {};
template<>
struct fex_gen_config<glProgramUniform2ui64vNV> {};
template<>
struct fex_gen_config<glProgramUniform2uiEXT> {};
template<>
struct fex_gen_config<glProgramUniform2ui> {};
template<>
struct fex_gen_config<glProgramUniform2uivEXT> {};
template<>
struct fex_gen_config<glProgramUniform2uiv> {};
template<>
struct fex_gen_config<glProgramUniform3dEXT> {};
template<>
struct fex_gen_config<glProgramUniform3d> {};
template<>
struct fex_gen_config<glProgramUniform3dvEXT> {};
template<>
struct fex_gen_config<glProgramUniform3dv> {};
template<>
struct fex_gen_config<glProgramUniform3fEXT> {};
template<>
struct fex_gen_config<glProgramUniform3f> {};
template<>
struct fex_gen_config<glProgramUniform3fvEXT> {};
template<>
struct fex_gen_config<glProgramUniform3fv> {};
template<>
struct fex_gen_config<glProgramUniform3i64ARB> {};
template<>
struct fex_gen_config<glProgramUniform3i64NV> {};
template<>
struct fex_gen_config<glProgramUniform3i64vARB> {};
template<>
struct fex_gen_config<glProgramUniform3i64vNV> {};
template<>
struct fex_gen_config<glProgramUniform3iEXT> {};
template<>
struct fex_gen_config<glProgramUniform3i> {};
template<>
struct fex_gen_config<glProgramUniform3ivEXT> {};
template<>
struct fex_gen_config<glProgramUniform3iv> {};
template<>
struct fex_gen_config<glProgramUniform3ui64ARB> {};
template<>
struct fex_gen_config<glProgramUniform3ui64NV> {};
template<>
struct fex_gen_config<glProgramUniform3ui64vARB> {};
template<>
struct fex_gen_config<glProgramUniform3ui64vNV> {};
template<>
struct fex_gen_config<glProgramUniform3uiEXT> {};
template<>
struct fex_gen_config<glProgramUniform3ui> {};
template<>
struct fex_gen_config<glProgramUniform3uivEXT> {};
template<>
struct fex_gen_config<glProgramUniform3uiv> {};
template<>
struct fex_gen_config<glProgramUniform4dEXT> {};
template<>
struct fex_gen_config<glProgramUniform4d> {};
template<>
struct fex_gen_config<glProgramUniform4dvEXT> {};
template<>
struct fex_gen_config<glProgramUniform4dv> {};
template<>
struct fex_gen_config<glProgramUniform4fEXT> {};
template<>
struct fex_gen_config<glProgramUniform4f> {};
template<>
struct fex_gen_config<glProgramUniform4fvEXT> {};
template<>
struct fex_gen_config<glProgramUniform4fv> {};
template<>
struct fex_gen_config<glProgramUniform4i64ARB> {};
template<>
struct fex_gen_config<glProgramUniform4i64NV> {};
template<>
struct fex_gen_config<glProgramUniform4i64vARB> {};
template<>
struct fex_gen_config<glProgramUniform4i64vNV> {};
template<>
struct fex_gen_config<glProgramUniform4iEXT> {};
template<>
struct fex_gen_config<glProgramUniform4i> {};
template<>
struct fex_gen_config<glProgramUniform4ivEXT> {};
template<>
struct fex_gen_config<glProgramUniform4iv> {};
template<>
struct fex_gen_config<glProgramUniform4ui64ARB> {};
template<>
struct fex_gen_config<glProgramUniform4ui64NV> {};
template<>
struct fex_gen_config<glProgramUniform4ui64vARB> {};
template<>
struct fex_gen_config<glProgramUniform4ui64vNV> {};
template<>
struct fex_gen_config<glProgramUniform4uiEXT> {};
template<>
struct fex_gen_config<glProgramUniform4ui> {};
template<>
struct fex_gen_config<glProgramUniform4uivEXT> {};
template<>
struct fex_gen_config<glProgramUniform4uiv> {};
template<>
struct fex_gen_config<glProgramUniformHandleui64ARB> {};
template<>
struct fex_gen_config<glProgramUniformHandleui64NV> {};
template<>
struct fex_gen_config<glProgramUniformHandleui64vARB> {};
template<>
struct fex_gen_config<glProgramUniformHandleui64vNV> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x3dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x3dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x3fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x3fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x4dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x4dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x4fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix2x4fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x2dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x2dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x2fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x2fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x4dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x4dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x4fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix3x4fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x2dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x2dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x2fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x2fv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x3dvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x3dv> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x3fvEXT> {};
template<>
struct fex_gen_config<glProgramUniformMatrix4x3fv> {};
template<>
struct fex_gen_config<glProgramUniformui64NV> {};
template<>
struct fex_gen_config<glProgramUniformui64vNV> {};
template<>
struct fex_gen_config<glProgramVertexLimitNV> {};
template<>
struct fex_gen_config<glProvokingVertexEXT> {};
template<>
struct fex_gen_config<glProvokingVertex> {};
template<>
struct fex_gen_config<glPushAttrib> {};
template<>
struct fex_gen_config<glPushClientAttribDefaultEXT> {};
template<>
struct fex_gen_config<glPushClientAttrib> {};
template<>
struct fex_gen_config<glPushDebugGroup> {};
template<>
struct fex_gen_config<glPushGroupMarkerEXT> {};
template<>
struct fex_gen_config<glPushMatrix> {};
template<>
struct fex_gen_config<glPushName> {};
template<>
struct fex_gen_config<glQueryCounter> {};
template<>
struct fex_gen_config<glQueryObjectParameteruiAMD> {};
template<>
struct fex_gen_config<glQueryResourceTagNV> {};
template<>
struct fex_gen_config<glRasterPos2d> {};
template<>
struct fex_gen_config<glRasterPos2dv> {};
template<>
struct fex_gen_config<glRasterPos2f> {};
template<>
struct fex_gen_config<glRasterPos2fv> {};
template<>
struct fex_gen_config<glRasterPos2i> {};
template<>
struct fex_gen_config<glRasterPos2iv> {};
template<>
struct fex_gen_config<glRasterPos2s> {};
template<>
struct fex_gen_config<glRasterPos2sv> {};
template<>
struct fex_gen_config<glRasterPos2xOES> {};
template<>
struct fex_gen_config<glRasterPos2xvOES> {};
template<>
struct fex_gen_config<glRasterPos3d> {};
template<>
struct fex_gen_config<glRasterPos3dv> {};
template<>
struct fex_gen_config<glRasterPos3f> {};
template<>
struct fex_gen_config<glRasterPos3fv> {};
template<>
struct fex_gen_config<glRasterPos3i> {};
template<>
struct fex_gen_config<glRasterPos3iv> {};
template<>
struct fex_gen_config<glRasterPos3s> {};
template<>
struct fex_gen_config<glRasterPos3sv> {};
template<>
struct fex_gen_config<glRasterPos3xOES> {};
template<>
struct fex_gen_config<glRasterPos3xvOES> {};
template<>
struct fex_gen_config<glRasterPos4d> {};
template<>
struct fex_gen_config<glRasterPos4dv> {};
template<>
struct fex_gen_config<glRasterPos4f> {};
template<>
struct fex_gen_config<glRasterPos4fv> {};
template<>
struct fex_gen_config<glRasterPos4i> {};
template<>
struct fex_gen_config<glRasterPos4iv> {};
template<>
struct fex_gen_config<glRasterPos4s> {};
template<>
struct fex_gen_config<glRasterPos4sv> {};
template<>
struct fex_gen_config<glRasterPos4xOES> {};
template<>
struct fex_gen_config<glRasterPos4xvOES> {};
template<>
struct fex_gen_config<glRasterSamplesEXT> {};
template<>
struct fex_gen_config<glReadBuffer> {};
template<>
struct fex_gen_config<glReadInstrumentsSGIX> {};
template<>
struct fex_gen_config<glReadnPixelsARB> {};
template<>
struct fex_gen_config<glReadnPixels> {};
template<>
struct fex_gen_config<glReadPixels> {};
template<>
struct fex_gen_config<glRectd> {};
template<>
struct fex_gen_config<glRectdv> {};
template<>
struct fex_gen_config<glRectf> {};
template<>
struct fex_gen_config<glRectfv> {};
template<>
struct fex_gen_config<glRecti> {};
template<>
struct fex_gen_config<glRectiv> {};
template<>
struct fex_gen_config<glRects> {};
template<>
struct fex_gen_config<glRectsv> {};
template<>
struct fex_gen_config<glRectxOES> {};
template<>
struct fex_gen_config<glRectxvOES> {};
template<>
struct fex_gen_config<glReferencePlaneSGIX> {};
template<>
struct fex_gen_config<glReleaseShaderCompiler> {};
template<>
struct fex_gen_config<glRenderbufferStorageEXT> {};
template<>
struct fex_gen_config<glRenderbufferStorage> {};
template<>
struct fex_gen_config<glRenderbufferStorageMultisampleAdvancedAMD> {};
template<>
struct fex_gen_config<glRenderbufferStorageMultisampleCoverageNV> {};
template<>
struct fex_gen_config<glRenderbufferStorageMultisampleEXT> {};
template<>
struct fex_gen_config<glRenderbufferStorageMultisample> {};
template<>
struct fex_gen_config<glRenderGpuMaskNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glReplacementCodePointerSUN> {};
template<>
struct fex_gen_param<glReplacementCodePointerSUN, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glReplacementCodeubSUN> {};
template<>
struct fex_gen_config<glReplacementCodeubvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor3fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor4fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor4fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor4ubVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiColor4ubVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fColor4fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiTexCoord2fVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiVertex3fSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuiVertex3fvSUN> {};
template<>
struct fex_gen_config<glReplacementCodeuivSUN> {};
template<>
struct fex_gen_config<glReplacementCodeusSUN> {};
template<>
struct fex_gen_config<glReplacementCodeusvSUN> {};
template<>
struct fex_gen_config<glRequestResidentProgramsNV> {};
template<>
struct fex_gen_config<glResetHistogramEXT> {};
template<>
struct fex_gen_config<glResetHistogram> {};
template<>
struct fex_gen_config<glResetMemoryObjectParameterNV> {};
template<>
struct fex_gen_config<glResetMinmaxEXT> {};
template<>
struct fex_gen_config<glResetMinmax> {};
template<>
struct fex_gen_config<glResizeBuffersMESA> {};
template<>
struct fex_gen_config<glResolveDepthValuesNV> {};
template<>
struct fex_gen_config<glResumeTransformFeedback> {};
template<>
struct fex_gen_config<glResumeTransformFeedbackNV> {};
template<>
struct fex_gen_config<glRotated> {};
template<>
struct fex_gen_config<glRotatef> {};
template<>
struct fex_gen_config<glRotatexOES> {};
template<>
struct fex_gen_config<glSampleCoverageARB> {};
template<>
struct fex_gen_config<glSampleCoverage> {};
template<>
struct fex_gen_config<glSampleMapATI> {};
template<>
struct fex_gen_config<glSampleMaskEXT> {};
template<>
struct fex_gen_config<glSampleMaski> {};
template<>
struct fex_gen_config<glSampleMaskIndexedNV> {};
template<>
struct fex_gen_config<glSampleMaskSGIS> {};
template<>
struct fex_gen_config<glSamplePatternEXT> {};
template<>
struct fex_gen_config<glSamplePatternSGIS> {};
template<>
struct fex_gen_config<glSamplerParameterf> {};
template<>
struct fex_gen_config<glSamplerParameterfv> {};
template<>
struct fex_gen_config<glSamplerParameteri> {};
template<>
struct fex_gen_config<glSamplerParameterIiv> {};
template<>
struct fex_gen_config<glSamplerParameterIuiv> {};
template<>
struct fex_gen_config<glSamplerParameteriv> {};
template<>
struct fex_gen_config<glScaled> {};
template<>
struct fex_gen_config<glScalef> {};
template<>
struct fex_gen_config<glScalexOES> {};
template<>
struct fex_gen_config<glScissorArrayv> {};
template<>
struct fex_gen_config<glScissorExclusiveArrayvNV> {};
template<>
struct fex_gen_config<glScissorExclusiveNV> {};
template<>
struct fex_gen_config<glScissor> {};
template<>
struct fex_gen_config<glScissorIndexed> {};
template<>
struct fex_gen_config<glScissorIndexedv> {};
template<>
struct fex_gen_config<glSecondaryColor3bEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3bvEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3dEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3dvEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3fEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3fvEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3hNV> {};
template<>
struct fex_gen_config<glSecondaryColor3hvNV> {};
template<>
struct fex_gen_config<glSecondaryColor3iEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3ivEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3sEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3svEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3ubEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3ubvEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3uiEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3uivEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3usEXT> {};
template<>
struct fex_gen_config<glSecondaryColor3usvEXT> {};
template<>
struct fex_gen_config<glSecondaryColorFormatNV> {};
template<>
struct fex_gen_config<glSecondaryColorPointerEXT> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glSecondaryColorPointerListIBM> {};
template<>
struct fex_gen_param<glSecondaryColorPointerListIBM, 3, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glSelectBuffer> {};
template<>
struct fex_gen_config<glSelectPerfMonitorCountersAMD> {};
template<>
struct fex_gen_config<glSemaphoreParameterui64vEXT> {};
template<>
struct fex_gen_config<glSeparableFilter2DEXT> {};
template<>
struct fex_gen_config<glSeparableFilter2D> {};
template<>
struct fex_gen_config<glSetFenceAPPLE> {};
template<>
struct fex_gen_config<glSetFenceNV> {};
template<>
struct fex_gen_config<glSetFragmentShaderConstantATI> {};
template<>
struct fex_gen_config<glSetInvariantEXT> {};
template<>
struct fex_gen_config<glSetLocalConstantEXT> {};
template<>
struct fex_gen_config<glSetMultisamplefvAMD> {};
template<>
struct fex_gen_config<glShadeModel> {};
template<>
struct fex_gen_config<glShaderBinary> {};
template<>
struct fex_gen_config<glShaderOp1EXT> {};
template<>
struct fex_gen_config<glShaderOp2EXT> {};
template<>
struct fex_gen_config<glShaderOp3EXT> {};
template<>
struct fex_gen_config<glShaderSource> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glShaderSource, 2, const GLchar* const*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glShaderSourceARB> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glShaderSourceARB, 2, const GLcharARB**> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glShaderStorageBlockBinding> {};
template<>
struct fex_gen_config<glShadingRateImageBarrierNV> {};
template<>
struct fex_gen_config<glShadingRateImagePaletteNV> {};
template<>
struct fex_gen_config<glShadingRateSampleOrderCustomNV> {};
template<>
struct fex_gen_config<glShadingRateSampleOrderNV> {};
template<>
struct fex_gen_config<glSharpenTexFuncSGIS> {};
template<>
struct fex_gen_config<glSignalSemaphoreEXT> {};
template<>
struct fex_gen_config<glSignalSemaphoreui64NVX> {};
template<>
struct fex_gen_config<glSignalVkFenceNV> {};
template<>
struct fex_gen_config<glSignalVkSemaphoreNV> {};
template<>
struct fex_gen_config<glSpecializeShaderARB> {};
template<>
struct fex_gen_config<glSpecializeShader> {};
template<>
struct fex_gen_config<glSpriteParameterfSGIX> {};
template<>
struct fex_gen_config<glSpriteParameterfvSGIX> {};
template<>
struct fex_gen_config<glSpriteParameteriSGIX> {};
template<>
struct fex_gen_config<glSpriteParameterivSGIX> {};
template<>
struct fex_gen_config<glStartInstrumentsSGIX> {};
template<>
struct fex_gen_config<glStateCaptureNV> {};
template<>
struct fex_gen_config<glStencilClearTagEXT> {};
template<>
struct fex_gen_config<glStencilFillPathInstancedNV> {};
template<>
struct fex_gen_config<glStencilFillPathNV> {};
template<>
struct fex_gen_config<glStencilFunc> {};
template<>
struct fex_gen_config<glStencilFuncSeparateATI> {};
template<>
struct fex_gen_config<glStencilFuncSeparate> {};
template<>
struct fex_gen_config<glStencilMask> {};
template<>
struct fex_gen_config<glStencilMaskSeparate> {};
template<>
struct fex_gen_config<glStencilOp> {};
template<>
struct fex_gen_config<glStencilOpSeparateATI> {};
template<>
struct fex_gen_config<glStencilOpSeparate> {};
template<>
struct fex_gen_config<glStencilOpValueAMD> {};
template<>
struct fex_gen_config<glStencilStrokePathInstancedNV> {};
template<>
struct fex_gen_config<glStencilStrokePathNV> {};
template<>
struct fex_gen_config<glStencilThenCoverFillPathInstancedNV> {};
template<>
struct fex_gen_config<glStencilThenCoverFillPathNV> {};
template<>
struct fex_gen_config<glStencilThenCoverStrokePathInstancedNV> {};
template<>
struct fex_gen_config<glStencilThenCoverStrokePathNV> {};
template<>
struct fex_gen_config<glStopInstrumentsSGIX> {};
template<>
struct fex_gen_config<glStringMarkerGREMEDY> {};
template<>
struct fex_gen_config<glSubpixelPrecisionBiasNV> {};
template<>
struct fex_gen_config<glSwizzleEXT> {};
template<>
struct fex_gen_config<glSyncTextureINTEL> {};
template<>
struct fex_gen_config<glTagSampleBufferSGIX> {};
template<>
struct fex_gen_config<glTangent3bEXT> {};
template<>
struct fex_gen_config<glTangent3bvEXT> {};
template<>
struct fex_gen_config<glTangent3dEXT> {};
template<>
struct fex_gen_config<glTangent3dvEXT> {};
template<>
struct fex_gen_config<glTangent3fEXT> {};
template<>
struct fex_gen_config<glTangent3fvEXT> {};
template<>
struct fex_gen_config<glTangent3iEXT> {};
template<>
struct fex_gen_config<glTangent3ivEXT> {};
template<>
struct fex_gen_config<glTangent3sEXT> {};
template<>
struct fex_gen_config<glTangent3svEXT> {};
template<>
struct fex_gen_config<glTangentPointerEXT> {};
template<>
struct fex_gen_config<glTbufferMask3DFX> {};
template<>
struct fex_gen_config<glTessellationFactorAMD> {};
template<>
struct fex_gen_config<glTessellationModeAMD> {};
template<>
struct fex_gen_config<glTexAttachMemoryNV> {};
template<>
struct fex_gen_config<glTexBufferARB> {};
template<>
struct fex_gen_config<glTexBufferEXT> {};
template<>
struct fex_gen_config<glTexBuffer> {};
template<>
struct fex_gen_config<glTexBufferRange> {};
template<>
struct fex_gen_config<glTexBumpParameterfvATI> {};
template<>
struct fex_gen_config<glTexBumpParameterivATI> {};
template<>
struct fex_gen_config<glTexCoord1bOES> {};
template<>
struct fex_gen_config<glTexCoord1bvOES> {};
template<>
struct fex_gen_config<glTexCoord1d> {};
template<>
struct fex_gen_config<glTexCoord1dv> {};
template<>
struct fex_gen_config<glTexCoord1f> {};
template<>
struct fex_gen_config<glTexCoord1fv> {};
template<>
struct fex_gen_config<glTexCoord1hNV> {};
template<>
struct fex_gen_config<glTexCoord1hvNV> {};
template<>
struct fex_gen_config<glTexCoord1i> {};
template<>
struct fex_gen_config<glTexCoord1iv> {};
template<>
struct fex_gen_config<glTexCoord1s> {};
template<>
struct fex_gen_config<glTexCoord1sv> {};
template<>
struct fex_gen_config<glTexCoord1xOES> {};
template<>
struct fex_gen_config<glTexCoord1xvOES> {};
template<>
struct fex_gen_config<glTexCoord2bOES> {};
template<>
struct fex_gen_config<glTexCoord2bvOES> {};
template<>
struct fex_gen_config<glTexCoord2d> {};
template<>
struct fex_gen_config<glTexCoord2dv> {};
template<>
struct fex_gen_config<glTexCoord2fColor3fVertex3fSUN> {};
template<>
struct fex_gen_config<glTexCoord2fColor3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glTexCoord2fColor4fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glTexCoord2fColor4fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glTexCoord2fColor4ubVertex3fSUN> {};
template<>
struct fex_gen_config<glTexCoord2fColor4ubVertex3fvSUN> {};
template<>
struct fex_gen_config<glTexCoord2f> {};
template<>
struct fex_gen_config<glTexCoord2fNormal3fVertex3fSUN> {};
template<>
struct fex_gen_config<glTexCoord2fNormal3fVertex3fvSUN> {};
template<>
struct fex_gen_config<glTexCoord2fv> {};
template<>
struct fex_gen_config<glTexCoord2fVertex3fSUN> {};
template<>
struct fex_gen_config<glTexCoord2fVertex3fvSUN> {};
template<>
struct fex_gen_config<glTexCoord2hNV> {};
template<>
struct fex_gen_config<glTexCoord2hvNV> {};
template<>
struct fex_gen_config<glTexCoord2i> {};
template<>
struct fex_gen_config<glTexCoord2iv> {};
template<>
struct fex_gen_config<glTexCoord2s> {};
template<>
struct fex_gen_config<glTexCoord2sv> {};
template<>
struct fex_gen_config<glTexCoord2xOES> {};
template<>
struct fex_gen_config<glTexCoord2xvOES> {};
template<>
struct fex_gen_config<glTexCoord3bOES> {};
template<>
struct fex_gen_config<glTexCoord3bvOES> {};
template<>
struct fex_gen_config<glTexCoord3d> {};
template<>
struct fex_gen_config<glTexCoord3dv> {};
template<>
struct fex_gen_config<glTexCoord3f> {};
template<>
struct fex_gen_config<glTexCoord3fv> {};
template<>
struct fex_gen_config<glTexCoord3hNV> {};
template<>
struct fex_gen_config<glTexCoord3hvNV> {};
template<>
struct fex_gen_config<glTexCoord3i> {};
template<>
struct fex_gen_config<glTexCoord3iv> {};
template<>
struct fex_gen_config<glTexCoord3s> {};
template<>
struct fex_gen_config<glTexCoord3sv> {};
template<>
struct fex_gen_config<glTexCoord3xOES> {};
template<>
struct fex_gen_config<glTexCoord3xvOES> {};
template<>
struct fex_gen_config<glTexCoord4bOES> {};
template<>
struct fex_gen_config<glTexCoord4bvOES> {};
template<>
struct fex_gen_config<glTexCoord4d> {};
template<>
struct fex_gen_config<glTexCoord4dv> {};
template<>
struct fex_gen_config<glTexCoord4fColor4fNormal3fVertex4fSUN> {};
template<>
struct fex_gen_config<glTexCoord4fColor4fNormal3fVertex4fvSUN> {};
template<>
struct fex_gen_config<glTexCoord4f> {};
template<>
struct fex_gen_config<glTexCoord4fv> {};
template<>
struct fex_gen_config<glTexCoord4fVertex4fSUN> {};
template<>
struct fex_gen_config<glTexCoord4fVertex4fvSUN> {};
template<>
struct fex_gen_config<glTexCoord4hNV> {};
template<>
struct fex_gen_config<glTexCoord4hvNV> {};
template<>
struct fex_gen_config<glTexCoord4i> {};
template<>
struct fex_gen_config<glTexCoord4iv> {};
template<>
struct fex_gen_config<glTexCoord4s> {};
template<>
struct fex_gen_config<glTexCoord4sv> {};
template<>
struct fex_gen_config<glTexCoord4xOES> {};
template<>
struct fex_gen_config<glTexCoord4xvOES> {};
template<>
struct fex_gen_config<glTexCoordFormatNV> {};
template<>
struct fex_gen_config<glTexCoordPointerEXT> {};
template<>
struct fex_gen_config<glTexCoordPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glTexCoordPointerListIBM> {};
template<>
struct fex_gen_param<glTexCoordPointerListIBM, 3, const void**> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glTexCoordPointervINTEL> {};
template<>
struct fex_gen_param<glTexCoordPointervINTEL, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glTexEnvf> {};
template<>
struct fex_gen_config<glTexEnvfv> {};
template<>
struct fex_gen_config<glTexEnvi> {};
template<>
struct fex_gen_config<glTexEnviv> {};
template<>
struct fex_gen_config<glTexEnvxOES> {};
template<>
struct fex_gen_config<glTexEnvxvOES> {};
template<>
struct fex_gen_config<glTexFilterFuncSGIS> {};
template<>
struct fex_gen_config<glTexGend> {};
template<>
struct fex_gen_config<glTexGendv> {};
template<>
struct fex_gen_config<glTexGenf> {};
template<>
struct fex_gen_config<glTexGenfv> {};
template<>
struct fex_gen_config<glTexGeni> {};
template<>
struct fex_gen_config<glTexGeniv> {};
template<>
struct fex_gen_config<glTexGenxOES> {};
template<>
struct fex_gen_config<glTexGenxvOES> {};
template<>
struct fex_gen_config<glTexImage1D> {};
template<>
struct fex_gen_config<glTexImage2D> {};
template<>
struct fex_gen_config<glTexImage2DMultisampleCoverageNV> {};
template<>
struct fex_gen_config<glTexImage2DMultisample> {};
template<>
struct fex_gen_config<glTexImage3DEXT> {};
template<>
struct fex_gen_config<glTexImage3D> {};
template<>
struct fex_gen_config<glTexImage3DMultisampleCoverageNV> {};
template<>
struct fex_gen_config<glTexImage3DMultisample> {};
template<>
struct fex_gen_config<glTexImage4DSGIS> {};
template<>
struct fex_gen_config<glTexPageCommitmentARB> {};
template<>
struct fex_gen_config<glTexParameterf> {};
template<>
struct fex_gen_config<glTexParameterfv> {};
template<>
struct fex_gen_config<glTexParameteri> {};
template<>
struct fex_gen_config<glTexParameterIivEXT> {};
template<>
struct fex_gen_config<glTexParameterIiv> {};
template<>
struct fex_gen_config<glTexParameterIuivEXT> {};
template<>
struct fex_gen_config<glTexParameterIuiv> {};
template<>
struct fex_gen_config<glTexParameteriv> {};
template<>
struct fex_gen_config<glTexParameterxOES> {};
template<>
struct fex_gen_config<glTexParameterxvOES> {};
template<>
struct fex_gen_config<glTexRenderbufferNV> {};
template<>
struct fex_gen_config<glTexStorage1D> {};
template<>
struct fex_gen_config<glTexStorage2D> {};
template<>
struct fex_gen_config<glTexStorage2DMultisample> {};
template<>
struct fex_gen_config<glTexStorage3D> {};
template<>
struct fex_gen_config<glTexStorage3DMultisample> {};
template<>
struct fex_gen_config<glTexStorageMem1DEXT> {};
template<>
struct fex_gen_config<glTexStorageMem2DEXT> {};
template<>
struct fex_gen_config<glTexStorageMem2DMultisampleEXT> {};
template<>
struct fex_gen_config<glTexStorageMem3DEXT> {};
template<>
struct fex_gen_config<glTexStorageMem3DMultisampleEXT> {};
template<>
struct fex_gen_config<glTexStorageSparseAMD> {};
template<>
struct fex_gen_config<glTexSubImage1DEXT> {};
template<>
struct fex_gen_config<glTexSubImage1D> {};
template<>
struct fex_gen_config<glTexSubImage2DEXT> {};
template<>
struct fex_gen_config<glTexSubImage2D> {};
template<>
struct fex_gen_config<glTexSubImage3DEXT> {};
template<>
struct fex_gen_config<glTexSubImage3D> {};
template<>
struct fex_gen_config<glTexSubImage4DSGIS> {};
template<>
struct fex_gen_config<glTextureAttachMemoryNV> {};
template<>
struct fex_gen_config<glTextureBarrier> {};
template<>
struct fex_gen_config<glTextureBarrierNV> {};
template<>
struct fex_gen_config<glTextureBufferEXT> {};
template<>
struct fex_gen_config<glTextureBuffer> {};
template<>
struct fex_gen_config<glTextureBufferRangeEXT> {};
template<>
struct fex_gen_config<glTextureBufferRange> {};
template<>
struct fex_gen_config<glTextureColorMaskSGIS> {};
template<>
struct fex_gen_config<glTextureImage1DEXT> {};
template<>
struct fex_gen_config<glTextureImage2DEXT> {};
template<>
struct fex_gen_config<glTextureImage2DMultisampleCoverageNV> {};
template<>
struct fex_gen_config<glTextureImage2DMultisampleNV> {};
template<>
struct fex_gen_config<glTextureImage3DEXT> {};
template<>
struct fex_gen_config<glTextureImage3DMultisampleCoverageNV> {};
template<>
struct fex_gen_config<glTextureImage3DMultisampleNV> {};
template<>
struct fex_gen_config<glTextureLightEXT> {};
template<>
struct fex_gen_config<glTextureMaterialEXT> {};
template<>
struct fex_gen_config<glTextureNormalEXT> {};
template<>
struct fex_gen_config<glTexturePageCommitmentEXT> {};
template<>
struct fex_gen_config<glTextureParameterfEXT> {};
template<>
struct fex_gen_config<glTextureParameterf> {};
template<>
struct fex_gen_config<glTextureParameterfvEXT> {};
template<>
struct fex_gen_config<glTextureParameterfv> {};
template<>
struct fex_gen_config<glTextureParameteriEXT> {};
template<>
struct fex_gen_config<glTextureParameteri> {};
template<>
struct fex_gen_config<glTextureParameterIivEXT> {};
template<>
struct fex_gen_config<glTextureParameterIiv> {};
template<>
struct fex_gen_config<glTextureParameterIuivEXT> {};
template<>
struct fex_gen_config<glTextureParameterIuiv> {};
template<>
struct fex_gen_config<glTextureParameterivEXT> {};
template<>
struct fex_gen_config<glTextureParameteriv> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glTextureRangeAPPLE> {};
#endif
template<>
struct fex_gen_config<glTextureRenderbufferEXT> {};
template<>
struct fex_gen_config<glTextureStorage1DEXT> {};
template<>
struct fex_gen_config<glTextureStorage1D> {};
template<>
struct fex_gen_config<glTextureStorage2DEXT> {};
template<>
struct fex_gen_config<glTextureStorage2D> {};
template<>
struct fex_gen_config<glTextureStorage2DMultisampleEXT> {};
template<>
struct fex_gen_config<glTextureStorage2DMultisample> {};
template<>
struct fex_gen_config<glTextureStorage3DEXT> {};
template<>
struct fex_gen_config<glTextureStorage3D> {};
template<>
struct fex_gen_config<glTextureStorage3DMultisampleEXT> {};
template<>
struct fex_gen_config<glTextureStorage3DMultisample> {};
template<>
struct fex_gen_config<glTextureStorageMem1DEXT> {};
template<>
struct fex_gen_config<glTextureStorageMem2DEXT> {};
template<>
struct fex_gen_config<glTextureStorageMem2DMultisampleEXT> {};
template<>
struct fex_gen_config<glTextureStorageMem3DEXT> {};
template<>
struct fex_gen_config<glTextureStorageMem3DMultisampleEXT> {};
template<>
struct fex_gen_config<glTextureStorageSparseAMD> {};
template<>
struct fex_gen_config<glTextureSubImage1DEXT> {};
template<>
struct fex_gen_config<glTextureSubImage1D> {};
template<>
struct fex_gen_config<glTextureSubImage2DEXT> {};
template<>
struct fex_gen_config<glTextureSubImage2D> {};
template<>
struct fex_gen_config<glTextureSubImage3DEXT> {};
template<>
struct fex_gen_config<glTextureSubImage3D> {};
template<>
struct fex_gen_config<glTextureView> {};
template<>
struct fex_gen_config<glTrackMatrixNV> {};
template<>
struct fex_gen_config<glTransformFeedbackAttribsNV> {};
template<>
struct fex_gen_config<glTransformFeedbackBufferBase> {};
template<>
struct fex_gen_config<glTransformFeedbackBufferRange> {};
template<>
struct fex_gen_config<glTransformFeedbackStreamAttribsNV> {};
#ifndef IS_32BIT_THUNK
// TODO
template<>
struct fex_gen_config<glTransformFeedbackVaryingsEXT> {};
template<>
struct fex_gen_param<glTransformFeedbackVaryingsEXT, 2, const char* const*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glTransformFeedbackVaryings> {};
template<>
struct fex_gen_param<glTransformFeedbackVaryings, 2, const char* const*> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glTransformFeedbackVaryingsNV> {};
template<>
struct fex_gen_config<glTransformPathNV> {};
template<>
struct fex_gen_config<glTranslated> {};
template<>
struct fex_gen_config<glTranslatef> {};
template<>
struct fex_gen_config<glTranslatexOES> {};
template<>
struct fex_gen_config<glUniform1d> {};
template<>
struct fex_gen_config<glUniform1dv> {};
template<>
struct fex_gen_config<glUniform1fARB> {};
template<>
struct fex_gen_config<glUniform1f> {};
template<>
struct fex_gen_config<glUniform1fvARB> {};
template<>
struct fex_gen_config<glUniform1fv> {};
template<>
struct fex_gen_config<glUniform1i64ARB> {};
template<>
struct fex_gen_config<glUniform1i64NV> {};
template<>
struct fex_gen_config<glUniform1i64vARB> {};
template<>
struct fex_gen_config<glUniform1i64vNV> {};
template<>
struct fex_gen_config<glUniform1iARB> {};
template<>
struct fex_gen_config<glUniform1i> {};
template<>
struct fex_gen_config<glUniform1ivARB> {};
template<>
struct fex_gen_config<glUniform1iv> {};
template<>
struct fex_gen_config<glUniform1ui64ARB> {};
template<>
struct fex_gen_config<glUniform1ui64NV> {};
template<>
struct fex_gen_config<glUniform1ui64vARB> {};
template<>
struct fex_gen_config<glUniform1ui64vNV> {};
template<>
struct fex_gen_config<glUniform1uiEXT> {};
template<>
struct fex_gen_config<glUniform1ui> {};
template<>
struct fex_gen_config<glUniform1uivEXT> {};
template<>
struct fex_gen_config<glUniform1uiv> {};
template<>
struct fex_gen_config<glUniform2d> {};
template<>
struct fex_gen_config<glUniform2dv> {};
template<>
struct fex_gen_config<glUniform2fARB> {};
template<>
struct fex_gen_config<glUniform2f> {};
template<>
struct fex_gen_config<glUniform2fvARB> {};
template<>
struct fex_gen_config<glUniform2fv> {};
template<>
struct fex_gen_config<glUniform2i64ARB> {};
template<>
struct fex_gen_config<glUniform2i64NV> {};
template<>
struct fex_gen_config<glUniform2i64vARB> {};
template<>
struct fex_gen_config<glUniform2i64vNV> {};
template<>
struct fex_gen_config<glUniform2iARB> {};
template<>
struct fex_gen_config<glUniform2i> {};
template<>
struct fex_gen_config<glUniform2ivARB> {};
template<>
struct fex_gen_config<glUniform2iv> {};
template<>
struct fex_gen_config<glUniform2ui64ARB> {};
template<>
struct fex_gen_config<glUniform2ui64NV> {};
template<>
struct fex_gen_config<glUniform2ui64vARB> {};
template<>
struct fex_gen_config<glUniform2ui64vNV> {};
template<>
struct fex_gen_config<glUniform2uiEXT> {};
template<>
struct fex_gen_config<glUniform2ui> {};
template<>
struct fex_gen_config<glUniform2uivEXT> {};
template<>
struct fex_gen_config<glUniform2uiv> {};
template<>
struct fex_gen_config<glUniform3d> {};
template<>
struct fex_gen_config<glUniform3dv> {};
template<>
struct fex_gen_config<glUniform3fARB> {};
template<>
struct fex_gen_config<glUniform3f> {};
template<>
struct fex_gen_config<glUniform3fvARB> {};
template<>
struct fex_gen_config<glUniform3fv> {};
template<>
struct fex_gen_config<glUniform3i64ARB> {};
template<>
struct fex_gen_config<glUniform3i64NV> {};
template<>
struct fex_gen_config<glUniform3i64vARB> {};
template<>
struct fex_gen_config<glUniform3i64vNV> {};
template<>
struct fex_gen_config<glUniform3iARB> {};
template<>
struct fex_gen_config<glUniform3i> {};
template<>
struct fex_gen_config<glUniform3ivARB> {};
template<>
struct fex_gen_config<glUniform3iv> {};
template<>
struct fex_gen_config<glUniform3ui64ARB> {};
template<>
struct fex_gen_config<glUniform3ui64NV> {};
template<>
struct fex_gen_config<glUniform3ui64vARB> {};
template<>
struct fex_gen_config<glUniform3ui64vNV> {};
template<>
struct fex_gen_config<glUniform3uiEXT> {};
template<>
struct fex_gen_config<glUniform3ui> {};
template<>
struct fex_gen_config<glUniform3uivEXT> {};
template<>
struct fex_gen_config<glUniform3uiv> {};
template<>
struct fex_gen_config<glUniform4d> {};
template<>
struct fex_gen_config<glUniform4dv> {};
template<>
struct fex_gen_config<glUniform4fARB> {};
template<>
struct fex_gen_config<glUniform4f> {};
template<>
struct fex_gen_config<glUniform4fvARB> {};
template<>
struct fex_gen_config<glUniform4fv> {};
template<>
struct fex_gen_config<glUniform4i64ARB> {};
template<>
struct fex_gen_config<glUniform4i64NV> {};
template<>
struct fex_gen_config<glUniform4i64vARB> {};
template<>
struct fex_gen_config<glUniform4i64vNV> {};
template<>
struct fex_gen_config<glUniform4iARB> {};
template<>
struct fex_gen_config<glUniform4i> {};
template<>
struct fex_gen_config<glUniform4ivARB> {};
template<>
struct fex_gen_config<glUniform4iv> {};
template<>
struct fex_gen_config<glUniform4ui64ARB> {};
template<>
struct fex_gen_config<glUniform4ui64NV> {};
template<>
struct fex_gen_config<glUniform4ui64vARB> {};
template<>
struct fex_gen_config<glUniform4ui64vNV> {};
template<>
struct fex_gen_config<glUniform4uiEXT> {};
template<>
struct fex_gen_config<glUniform4ui> {};
template<>
struct fex_gen_config<glUniform4uivEXT> {};
template<>
struct fex_gen_config<glUniform4uiv> {};
template<>
struct fex_gen_config<glUniformBlockBinding> {};
template<>
struct fex_gen_config<glUniformBufferEXT> {};
template<>
struct fex_gen_config<glUniformHandleui64ARB> {};
template<>
struct fex_gen_config<glUniformHandleui64NV> {};
template<>
struct fex_gen_config<glUniformHandleui64vARB> {};
template<>
struct fex_gen_config<glUniformHandleui64vNV> {};
template<>
struct fex_gen_config<glUniformMatrix2dv> {};
template<>
struct fex_gen_config<glUniformMatrix2fvARB> {};
template<>
struct fex_gen_config<glUniformMatrix2fv> {};
template<>
struct fex_gen_config<glUniformMatrix2x3dv> {};
template<>
struct fex_gen_config<glUniformMatrix2x3fv> {};
template<>
struct fex_gen_config<glUniformMatrix2x4dv> {};
template<>
struct fex_gen_config<glUniformMatrix2x4fv> {};
template<>
struct fex_gen_config<glUniformMatrix3dv> {};
template<>
struct fex_gen_config<glUniformMatrix3fvARB> {};
template<>
struct fex_gen_config<glUniformMatrix3fv> {};
template<>
struct fex_gen_config<glUniformMatrix3x2dv> {};
template<>
struct fex_gen_config<glUniformMatrix3x2fv> {};
template<>
struct fex_gen_config<glUniformMatrix3x4dv> {};
template<>
struct fex_gen_config<glUniformMatrix3x4fv> {};
template<>
struct fex_gen_config<glUniformMatrix4dv> {};
template<>
struct fex_gen_config<glUniformMatrix4fvARB> {};
template<>
struct fex_gen_config<glUniformMatrix4fv> {};
template<>
struct fex_gen_config<glUniformMatrix4x2dv> {};
template<>
struct fex_gen_config<glUniformMatrix4x2fv> {};
template<>
struct fex_gen_config<glUniformMatrix4x3dv> {};
template<>
struct fex_gen_config<glUniformMatrix4x3fv> {};
template<>
struct fex_gen_config<glUniformSubroutinesuiv> {};
template<>
struct fex_gen_config<glUniformui64NV> {};
template<>
struct fex_gen_config<glUniformui64vNV> {};
template<>
struct fex_gen_config<glUnlockArraysEXT> {};
template<>
struct fex_gen_config<glUnmapObjectBufferATI> {};
template<>
struct fex_gen_config<glUnmapTexture2DINTEL> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glUpdateObjectBufferATI> {};
#endif
template<>
struct fex_gen_config<glUploadGpuMaskNVX> {};
template<>
struct fex_gen_config<glUseProgram> {};
template<>
struct fex_gen_config<glUseProgramObjectARB> {};
template<>
struct fex_gen_config<glUseProgramStages> {};
template<>
struct fex_gen_config<glUseShaderProgramEXT> {};
template<>
struct fex_gen_config<glValidateProgramARB> {};
template<>
struct fex_gen_config<glValidateProgram> {};
template<>
struct fex_gen_config<glValidateProgramPipeline> {};
template<>
struct fex_gen_config<glVariantArrayObjectATI> {};
template<>
struct fex_gen_config<glVariantbvEXT> {};
template<>
struct fex_gen_config<glVariantdvEXT> {};
template<>
struct fex_gen_config<glVariantfvEXT> {};
template<>
struct fex_gen_config<glVariantivEXT> {};
template<>
struct fex_gen_config<glVariantPointerEXT> {};
template<>
struct fex_gen_config<glVariantsvEXT> {};
template<>
struct fex_gen_config<glVariantubvEXT> {};
template<>
struct fex_gen_config<glVariantuivEXT> {};
template<>
struct fex_gen_config<glVariantusvEXT> {};
template<>
struct fex_gen_config<glVDPAUFiniNV> {};
template<>
struct fex_gen_config<glVDPAUGetSurfaceivNV> {};
template<>
struct fex_gen_config<glVDPAUInitNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glVDPAUMapSurfacesNV> {};
#endif
template<>
struct fex_gen_config<glVDPAUSurfaceAccessNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glVDPAUUnmapSurfacesNV> {};
#endif
template<>
struct fex_gen_config<glVDPAUUnregisterSurfaceNV> {};
template<>
struct fex_gen_config<glVertex2bOES> {};
template<>
struct fex_gen_config<glVertex2bvOES> {};
template<>
struct fex_gen_config<glVertex2d> {};
template<>
struct fex_gen_config<glVertex2dv> {};
template<>
struct fex_gen_config<glVertex2f> {};
template<>
struct fex_gen_config<glVertex2fv> {};
template<>
struct fex_gen_config<glVertex2hNV> {};
template<>
struct fex_gen_config<glVertex2hvNV> {};
template<>
struct fex_gen_config<glVertex2i> {};
template<>
struct fex_gen_config<glVertex2iv> {};
template<>
struct fex_gen_config<glVertex2s> {};
template<>
struct fex_gen_config<glVertex2sv> {};
template<>
struct fex_gen_config<glVertex2xOES> {};
template<>
struct fex_gen_config<glVertex2xvOES> {};
template<>
struct fex_gen_config<glVertex3bOES> {};
template<>
struct fex_gen_config<glVertex3bvOES> {};
template<>
struct fex_gen_config<glVertex3d> {};
template<>
struct fex_gen_config<glVertex3dv> {};
template<>
struct fex_gen_config<glVertex3f> {};
template<>
struct fex_gen_config<glVertex3fv> {};
template<>
struct fex_gen_config<glVertex3hNV> {};
template<>
struct fex_gen_config<glVertex3hvNV> {};
template<>
struct fex_gen_config<glVertex3i> {};
template<>
struct fex_gen_config<glVertex3iv> {};
template<>
struct fex_gen_config<glVertex3s> {};
template<>
struct fex_gen_config<glVertex3sv> {};
template<>
struct fex_gen_config<glVertex3xOES> {};
template<>
struct fex_gen_config<glVertex3xvOES> {};
template<>
struct fex_gen_config<glVertex4bOES> {};
template<>
struct fex_gen_config<glVertex4bvOES> {};
template<>
struct fex_gen_config<glVertex4d> {};
template<>
struct fex_gen_config<glVertex4dv> {};
template<>
struct fex_gen_config<glVertex4f> {};
template<>
struct fex_gen_config<glVertex4fv> {};
template<>
struct fex_gen_config<glVertex4hNV> {};
template<>
struct fex_gen_config<glVertex4hvNV> {};
template<>
struct fex_gen_config<glVertex4i> {};
template<>
struct fex_gen_config<glVertex4iv> {};
template<>
struct fex_gen_config<glVertex4s> {};
template<>
struct fex_gen_config<glVertex4sv> {};
template<>
struct fex_gen_config<glVertex4xOES> {};
template<>
struct fex_gen_config<glVertex4xvOES> {};
template<>
struct fex_gen_config<glVertexArrayAttribBinding> {};
template<>
struct fex_gen_config<glVertexArrayAttribFormat> {};
template<>
struct fex_gen_config<glVertexArrayAttribIFormat> {};
template<>
struct fex_gen_config<glVertexArrayAttribLFormat> {};
template<>
struct fex_gen_config<glVertexArrayBindingDivisor> {};
template<>
struct fex_gen_config<glVertexArrayBindVertexBufferEXT> {};
template<>
struct fex_gen_config<glVertexArrayColorOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayEdgeFlagOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayElementBuffer> {};
template<>
struct fex_gen_config<glVertexArrayFogCoordOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayIndexOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayMultiTexCoordOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayNormalOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayParameteriAPPLE> {};
template<>
struct fex_gen_config<glVertexArrayRangeAPPLE> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glVertexArrayRangeNV> {};
#endif
template<>
struct fex_gen_config<glVertexArraySecondaryColorOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayTexCoordOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribBindingEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribDivisorEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribFormatEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribIFormatEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribIOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribLFormatEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribLOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexAttribOffsetEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexBindingDivisorEXT> {};
template<>
struct fex_gen_config<glVertexArrayVertexBuffer> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<glVertexArrayVertexBuffers> {};
#else
template<>
struct fex_gen_config<glVertexArrayVertexBuffers> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glVertexArrayVertexBuffers, 4, const GLintptr*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<glVertexArrayVertexOffsetEXT> {};
template<>
struct fex_gen_config<glVertexAttrib1dARB> {};
template<>
struct fex_gen_config<glVertexAttrib1d> {};
template<>
struct fex_gen_config<glVertexAttrib1dNV> {};
template<>
struct fex_gen_config<glVertexAttrib1dvARB> {};
template<>
struct fex_gen_config<glVertexAttrib1dv> {};
template<>
struct fex_gen_config<glVertexAttrib1dvNV> {};
template<>
struct fex_gen_config<glVertexAttrib1fARB> {};
template<>
struct fex_gen_config<glVertexAttrib1f> {};
template<>
struct fex_gen_config<glVertexAttrib1fNV> {};
template<>
struct fex_gen_config<glVertexAttrib1fvARB> {};
template<>
struct fex_gen_config<glVertexAttrib1fv> {};
template<>
struct fex_gen_config<glVertexAttrib1fvNV> {};
template<>
struct fex_gen_config<glVertexAttrib1hNV> {};
template<>
struct fex_gen_config<glVertexAttrib1hvNV> {};
template<>
struct fex_gen_config<glVertexAttrib1sARB> {};
template<>
struct fex_gen_config<glVertexAttrib1s> {};
template<>
struct fex_gen_config<glVertexAttrib1sNV> {};
template<>
struct fex_gen_config<glVertexAttrib1svARB> {};
template<>
struct fex_gen_config<glVertexAttrib1sv> {};
template<>
struct fex_gen_config<glVertexAttrib1svNV> {};
template<>
struct fex_gen_config<glVertexAttrib2dARB> {};
template<>
struct fex_gen_config<glVertexAttrib2d> {};
template<>
struct fex_gen_config<glVertexAttrib2dNV> {};
template<>
struct fex_gen_config<glVertexAttrib2dvARB> {};
template<>
struct fex_gen_config<glVertexAttrib2dv> {};
template<>
struct fex_gen_config<glVertexAttrib2dvNV> {};
template<>
struct fex_gen_config<glVertexAttrib2fARB> {};
template<>
struct fex_gen_config<glVertexAttrib2f> {};
template<>
struct fex_gen_config<glVertexAttrib2fNV> {};
template<>
struct fex_gen_config<glVertexAttrib2fvARB> {};
template<>
struct fex_gen_config<glVertexAttrib2fv> {};
template<>
struct fex_gen_config<glVertexAttrib2fvNV> {};
template<>
struct fex_gen_config<glVertexAttrib2hNV> {};
template<>
struct fex_gen_config<glVertexAttrib2hvNV> {};
template<>
struct fex_gen_config<glVertexAttrib2sARB> {};
template<>
struct fex_gen_config<glVertexAttrib2s> {};
template<>
struct fex_gen_config<glVertexAttrib2sNV> {};
template<>
struct fex_gen_config<glVertexAttrib2svARB> {};
template<>
struct fex_gen_config<glVertexAttrib2sv> {};
template<>
struct fex_gen_config<glVertexAttrib2svNV> {};
template<>
struct fex_gen_config<glVertexAttrib3dARB> {};
template<>
struct fex_gen_config<glVertexAttrib3d> {};
template<>
struct fex_gen_config<glVertexAttrib3dNV> {};
template<>
struct fex_gen_config<glVertexAttrib3dvARB> {};
template<>
struct fex_gen_config<glVertexAttrib3dv> {};
template<>
struct fex_gen_config<glVertexAttrib3dvNV> {};
template<>
struct fex_gen_config<glVertexAttrib3fARB> {};
template<>
struct fex_gen_config<glVertexAttrib3f> {};
template<>
struct fex_gen_config<glVertexAttrib3fNV> {};
template<>
struct fex_gen_config<glVertexAttrib3fvARB> {};
template<>
struct fex_gen_config<glVertexAttrib3fv> {};
template<>
struct fex_gen_config<glVertexAttrib3fvNV> {};
template<>
struct fex_gen_config<glVertexAttrib3hNV> {};
template<>
struct fex_gen_config<glVertexAttrib3hvNV> {};
template<>
struct fex_gen_config<glVertexAttrib3sARB> {};
template<>
struct fex_gen_config<glVertexAttrib3s> {};
template<>
struct fex_gen_config<glVertexAttrib3sNV> {};
template<>
struct fex_gen_config<glVertexAttrib3svARB> {};
template<>
struct fex_gen_config<glVertexAttrib3sv> {};
template<>
struct fex_gen_config<glVertexAttrib3svNV> {};
template<>
struct fex_gen_config<glVertexAttrib4bvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4bv> {};
template<>
struct fex_gen_config<glVertexAttrib4dARB> {};
template<>
struct fex_gen_config<glVertexAttrib4d> {};
template<>
struct fex_gen_config<glVertexAttrib4dNV> {};
template<>
struct fex_gen_config<glVertexAttrib4dvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4dv> {};
template<>
struct fex_gen_config<glVertexAttrib4dvNV> {};
template<>
struct fex_gen_config<glVertexAttrib4fARB> {};
template<>
struct fex_gen_config<glVertexAttrib4f> {};
template<>
struct fex_gen_config<glVertexAttrib4fNV> {};
template<>
struct fex_gen_config<glVertexAttrib4fvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4fv> {};
template<>
struct fex_gen_config<glVertexAttrib4fvNV> {};
template<>
struct fex_gen_config<glVertexAttrib4hNV> {};
template<>
struct fex_gen_config<glVertexAttrib4hvNV> {};
template<>
struct fex_gen_config<glVertexAttrib4ivARB> {};
template<>
struct fex_gen_config<glVertexAttrib4iv> {};
template<>
struct fex_gen_config<glVertexAttrib4NbvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nbv> {};
template<>
struct fex_gen_config<glVertexAttrib4NivARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Niv> {};
template<>
struct fex_gen_config<glVertexAttrib4NsvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nsv> {};
template<>
struct fex_gen_config<glVertexAttrib4NubARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nub> {};
template<>
struct fex_gen_config<glVertexAttrib4NubvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nubv> {};
template<>
struct fex_gen_config<glVertexAttrib4NuivARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nuiv> {};
template<>
struct fex_gen_config<glVertexAttrib4NusvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4Nusv> {};
template<>
struct fex_gen_config<glVertexAttrib4sARB> {};
template<>
struct fex_gen_config<glVertexAttrib4s> {};
template<>
struct fex_gen_config<glVertexAttrib4sNV> {};
template<>
struct fex_gen_config<glVertexAttrib4svARB> {};
template<>
struct fex_gen_config<glVertexAttrib4sv> {};
template<>
struct fex_gen_config<glVertexAttrib4svNV> {};
template<>
struct fex_gen_config<glVertexAttrib4ubNV> {};
template<>
struct fex_gen_config<glVertexAttrib4ubvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4ubv> {};
template<>
struct fex_gen_config<glVertexAttrib4ubvNV> {};
template<>
struct fex_gen_config<glVertexAttrib4uivARB> {};
template<>
struct fex_gen_config<glVertexAttrib4uiv> {};
template<>
struct fex_gen_config<glVertexAttrib4usvARB> {};
template<>
struct fex_gen_config<glVertexAttrib4usv> {};
template<>
struct fex_gen_config<glVertexAttribArrayObjectATI> {};
template<>
struct fex_gen_config<glVertexAttribBinding> {};
template<>
struct fex_gen_config<glVertexAttribDivisorARB> {};
template<>
struct fex_gen_config<glVertexAttribDivisor> {};
template<>
struct fex_gen_config<glVertexAttribFormat> {};
template<>
struct fex_gen_config<glVertexAttribFormatNV> {};
template<>
struct fex_gen_config<glVertexAttribI1iEXT> {};
template<>
struct fex_gen_config<glVertexAttribI1i> {};
template<>
struct fex_gen_config<glVertexAttribI1ivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI1iv> {};
template<>
struct fex_gen_config<glVertexAttribI1uiEXT> {};
template<>
struct fex_gen_config<glVertexAttribI1ui> {};
template<>
struct fex_gen_config<glVertexAttribI1uivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI1uiv> {};
template<>
struct fex_gen_config<glVertexAttribI2iEXT> {};
template<>
struct fex_gen_config<glVertexAttribI2i> {};
template<>
struct fex_gen_config<glVertexAttribI2ivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI2iv> {};
template<>
struct fex_gen_config<glVertexAttribI2uiEXT> {};
template<>
struct fex_gen_config<glVertexAttribI2ui> {};
template<>
struct fex_gen_config<glVertexAttribI2uivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI2uiv> {};
template<>
struct fex_gen_config<glVertexAttribI3iEXT> {};
template<>
struct fex_gen_config<glVertexAttribI3i> {};
template<>
struct fex_gen_config<glVertexAttribI3ivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI3iv> {};
template<>
struct fex_gen_config<glVertexAttribI3uiEXT> {};
template<>
struct fex_gen_config<glVertexAttribI3ui> {};
template<>
struct fex_gen_config<glVertexAttribI3uivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI3uiv> {};
template<>
struct fex_gen_config<glVertexAttribI4bvEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4bv> {};
template<>
struct fex_gen_config<glVertexAttribI4iEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4i> {};
template<>
struct fex_gen_config<glVertexAttribI4ivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4iv> {};
template<>
struct fex_gen_config<glVertexAttribI4svEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4sv> {};
template<>
struct fex_gen_config<glVertexAttribI4ubvEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4ubv> {};
template<>
struct fex_gen_config<glVertexAttribI4uiEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4ui> {};
template<>
struct fex_gen_config<glVertexAttribI4uivEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4uiv> {};
template<>
struct fex_gen_config<glVertexAttribI4usvEXT> {};
template<>
struct fex_gen_config<glVertexAttribI4usv> {};
template<>
struct fex_gen_config<glVertexAttribIFormat> {};
template<>
struct fex_gen_config<glVertexAttribIFormatNV> {};
template<>
struct fex_gen_config<glVertexAttribIPointerEXT> {};
template<>
struct fex_gen_config<glVertexAttribIPointer> {};
template<>
struct fex_gen_config<glVertexAttribL1dEXT> {};
template<>
struct fex_gen_config<glVertexAttribL1d> {};
template<>
struct fex_gen_config<glVertexAttribL1dvEXT> {};
template<>
struct fex_gen_config<glVertexAttribL1dv> {};
template<>
struct fex_gen_config<glVertexAttribL1i64NV> {};
template<>
struct fex_gen_config<glVertexAttribL1i64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL1ui64ARB> {};
template<>
struct fex_gen_config<glVertexAttribL1ui64NV> {};
template<>
struct fex_gen_config<glVertexAttribL1ui64vARB> {};
template<>
struct fex_gen_config<glVertexAttribL1ui64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL2dEXT> {};
template<>
struct fex_gen_config<glVertexAttribL2d> {};
template<>
struct fex_gen_config<glVertexAttribL2dvEXT> {};
template<>
struct fex_gen_config<glVertexAttribL2dv> {};
template<>
struct fex_gen_config<glVertexAttribL2i64NV> {};
template<>
struct fex_gen_config<glVertexAttribL2i64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL2ui64NV> {};
template<>
struct fex_gen_config<glVertexAttribL2ui64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL3dEXT> {};
template<>
struct fex_gen_config<glVertexAttribL3d> {};
template<>
struct fex_gen_config<glVertexAttribL3dvEXT> {};
template<>
struct fex_gen_config<glVertexAttribL3dv> {};
template<>
struct fex_gen_config<glVertexAttribL3i64NV> {};
template<>
struct fex_gen_config<glVertexAttribL3i64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL3ui64NV> {};
template<>
struct fex_gen_config<glVertexAttribL3ui64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL4dEXT> {};
template<>
struct fex_gen_config<glVertexAttribL4d> {};
template<>
struct fex_gen_config<glVertexAttribL4dvEXT> {};
template<>
struct fex_gen_config<glVertexAttribL4dv> {};
template<>
struct fex_gen_config<glVertexAttribL4i64NV> {};
template<>
struct fex_gen_config<glVertexAttribL4i64vNV> {};
template<>
struct fex_gen_config<glVertexAttribL4ui64NV> {};
template<>
struct fex_gen_config<glVertexAttribL4ui64vNV> {};
template<>
struct fex_gen_config<glVertexAttribLFormat> {};
template<>
struct fex_gen_config<glVertexAttribLFormatNV> {};
template<>
struct fex_gen_config<glVertexAttribLPointerEXT> {};
template<>
struct fex_gen_config<glVertexAttribLPointer> {};
template<>
struct fex_gen_config<glVertexAttribP1ui> {};
template<>
struct fex_gen_config<glVertexAttribP1uiv> {};
template<>
struct fex_gen_config<glVertexAttribP2ui> {};
template<>
struct fex_gen_config<glVertexAttribP2uiv> {};
template<>
struct fex_gen_config<glVertexAttribP3ui> {};
template<>
struct fex_gen_config<glVertexAttribP3uiv> {};
template<>
struct fex_gen_config<glVertexAttribP4ui> {};
template<>
struct fex_gen_config<glVertexAttribP4uiv> {};
template<>
struct fex_gen_config<glVertexAttribParameteriAMD> {};
template<>
struct fex_gen_config<glVertexAttribPointerARB> {};
template<>
struct fex_gen_config<glVertexAttribPointer> {};
template<>
struct fex_gen_config<glVertexAttribPointerNV> {};
template<>
struct fex_gen_config<glVertexAttribs1dvNV> {};
template<>
struct fex_gen_config<glVertexAttribs1fvNV> {};
template<>
struct fex_gen_config<glVertexAttribs1hvNV> {};
template<>
struct fex_gen_config<glVertexAttribs1svNV> {};
template<>
struct fex_gen_config<glVertexAttribs2dvNV> {};
template<>
struct fex_gen_config<glVertexAttribs2fvNV> {};
template<>
struct fex_gen_config<glVertexAttribs2hvNV> {};
template<>
struct fex_gen_config<glVertexAttribs2svNV> {};
template<>
struct fex_gen_config<glVertexAttribs3dvNV> {};
template<>
struct fex_gen_config<glVertexAttribs3fvNV> {};
template<>
struct fex_gen_config<glVertexAttribs3hvNV> {};
template<>
struct fex_gen_config<glVertexAttribs3svNV> {};
template<>
struct fex_gen_config<glVertexAttribs4dvNV> {};
template<>
struct fex_gen_config<glVertexAttribs4fvNV> {};
template<>
struct fex_gen_config<glVertexAttribs4hvNV> {};
template<>
struct fex_gen_config<glVertexAttribs4svNV> {};
template<>
struct fex_gen_config<glVertexAttribs4ubvNV> {};
template<>
struct fex_gen_config<glVertexBindingDivisor> {};
template<>
struct fex_gen_config<glVertexBlendARB> {};
template<>
struct fex_gen_config<glVertexBlendEnvfATI> {};
template<>
struct fex_gen_config<glVertexBlendEnviATI> {};
template<>
struct fex_gen_config<glVertexFormatNV> {};
template<>
struct fex_gen_config<glVertexPointerEXT> {};
template<>
struct fex_gen_config<glVertexPointer> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glVertexPointerListIBM> {};
template<>
struct fex_gen_param<glVertexPointerListIBM, 3, const void**> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<glVertexPointervINTEL> {};
template<>
struct fex_gen_param<glVertexPointervINTEL, 2, const void**> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<glVertexStream1dATI> {};
template<>
struct fex_gen_config<glVertexStream1dvATI> {};
template<>
struct fex_gen_config<glVertexStream1fATI> {};
template<>
struct fex_gen_config<glVertexStream1fvATI> {};
template<>
struct fex_gen_config<glVertexStream1iATI> {};
template<>
struct fex_gen_config<glVertexStream1ivATI> {};
template<>
struct fex_gen_config<glVertexStream1sATI> {};
template<>
struct fex_gen_config<glVertexStream1svATI> {};
template<>
struct fex_gen_config<glVertexStream2dATI> {};
template<>
struct fex_gen_config<glVertexStream2dvATI> {};
template<>
struct fex_gen_config<glVertexStream2fATI> {};
template<>
struct fex_gen_config<glVertexStream2fvATI> {};
template<>
struct fex_gen_config<glVertexStream2iATI> {};
template<>
struct fex_gen_config<glVertexStream2ivATI> {};
template<>
struct fex_gen_config<glVertexStream2sATI> {};
template<>
struct fex_gen_config<glVertexStream2svATI> {};
template<>
struct fex_gen_config<glVertexStream3dATI> {};
template<>
struct fex_gen_config<glVertexStream3dvATI> {};
template<>
struct fex_gen_config<glVertexStream3fATI> {};
template<>
struct fex_gen_config<glVertexStream3fvATI> {};
template<>
struct fex_gen_config<glVertexStream3iATI> {};
template<>
struct fex_gen_config<glVertexStream3ivATI> {};
template<>
struct fex_gen_config<glVertexStream3sATI> {};
template<>
struct fex_gen_config<glVertexStream3svATI> {};
template<>
struct fex_gen_config<glVertexStream4dATI> {};
template<>
struct fex_gen_config<glVertexStream4dvATI> {};
template<>
struct fex_gen_config<glVertexStream4fATI> {};
template<>
struct fex_gen_config<glVertexStream4fvATI> {};
template<>
struct fex_gen_config<glVertexStream4iATI> {};
template<>
struct fex_gen_config<glVertexStream4ivATI> {};
template<>
struct fex_gen_config<glVertexStream4sATI> {};
template<>
struct fex_gen_config<glVertexStream4svATI> {};
template<>
struct fex_gen_config<glVertexWeightfEXT> {};
template<>
struct fex_gen_config<glVertexWeightfvEXT> {};
template<>
struct fex_gen_config<glVertexWeighthNV> {};
template<>
struct fex_gen_config<glVertexWeighthvNV> {};
template<>
struct fex_gen_config<glVertexWeightPointerEXT> {};
template<>
struct fex_gen_config<glVideoCaptureStreamParameterdvNV> {};
template<>
struct fex_gen_config<glVideoCaptureStreamParameterfvNV> {};
template<>
struct fex_gen_config<glVideoCaptureStreamParameterivNV> {};
template<>
struct fex_gen_config<glViewportArrayv> {};
template<>
struct fex_gen_config<glViewport> {};
template<>
struct fex_gen_config<glViewportIndexedf> {};
template<>
struct fex_gen_config<glViewportIndexedfv> {};
template<>
struct fex_gen_config<glViewportPositionWScaleNV> {};
template<>
struct fex_gen_config<glViewportSwizzleNV> {};
template<>
struct fex_gen_config<glWaitSemaphoreEXT> {};
template<>
struct fex_gen_config<glWaitSemaphoreui64NVX> {};
template<>
struct fex_gen_config<glWaitSync> {};
template<>
struct fex_gen_config<glWaitVkSemaphoreNV> {};
template<>
struct fex_gen_config<glWeightbvARB> {};
template<>
struct fex_gen_config<glWeightdvARB> {};
template<>
struct fex_gen_config<glWeightfvARB> {};
template<>
struct fex_gen_config<glWeightivARB> {};
template<>
struct fex_gen_config<glWeightPathsNV> {};
template<>
struct fex_gen_config<glWeightPointerARB> {};
template<>
struct fex_gen_config<glWeightsvARB> {};
template<>
struct fex_gen_config<glWeightubvARB> {};
template<>
struct fex_gen_config<glWeightuivARB> {};
template<>
struct fex_gen_config<glWeightusvARB> {};
template<>
struct fex_gen_config<glWindowPos2dARB> {};
template<>
struct fex_gen_config<glWindowPos2dMESA> {};
template<>
struct fex_gen_config<glWindowPos2dvARB> {};
template<>
struct fex_gen_config<glWindowPos2dvMESA> {};
template<>
struct fex_gen_config<glWindowPos2fARB> {};
template<>
struct fex_gen_config<glWindowPos2fMESA> {};
template<>
struct fex_gen_config<glWindowPos2fvARB> {};
template<>
struct fex_gen_config<glWindowPos2fvMESA> {};
template<>
struct fex_gen_config<glWindowPos2iARB> {};
template<>
struct fex_gen_config<glWindowPos2iMESA> {};
template<>
struct fex_gen_config<glWindowPos2ivARB> {};
template<>
struct fex_gen_config<glWindowPos2ivMESA> {};
template<>
struct fex_gen_config<glWindowPos2sARB> {};
template<>
struct fex_gen_config<glWindowPos2sMESA> {};
template<>
struct fex_gen_config<glWindowPos2svARB> {};
template<>
struct fex_gen_config<glWindowPos2svMESA> {};
template<>
struct fex_gen_config<glWindowPos3dARB> {};
template<>
struct fex_gen_config<glWindowPos3dMESA> {};
template<>
struct fex_gen_config<glWindowPos3dvARB> {};
template<>
struct fex_gen_config<glWindowPos3dvMESA> {};
template<>
struct fex_gen_config<glWindowPos3fARB> {};
template<>
struct fex_gen_config<glWindowPos3fMESA> {};
template<>
struct fex_gen_config<glWindowPos3fvARB> {};
template<>
struct fex_gen_config<glWindowPos3fvMESA> {};
template<>
struct fex_gen_config<glWindowPos3iARB> {};
template<>
struct fex_gen_config<glWindowPos3iMESA> {};
template<>
struct fex_gen_config<glWindowPos3ivARB> {};
template<>
struct fex_gen_config<glWindowPos3ivMESA> {};
template<>
struct fex_gen_config<glWindowPos3sARB> {};
template<>
struct fex_gen_config<glWindowPos3sMESA> {};
template<>
struct fex_gen_config<glWindowPos3svARB> {};
template<>
struct fex_gen_config<glWindowPos3svMESA> {};
template<>
struct fex_gen_config<glWindowPos4dMESA> {};
template<>
struct fex_gen_config<glWindowPos4dvMESA> {};
template<>
struct fex_gen_config<glWindowPos4fMESA> {};
template<>
struct fex_gen_config<glWindowPos4fvMESA> {};
template<>
struct fex_gen_config<glWindowPos4iMESA> {};
template<>
struct fex_gen_config<glWindowPos4ivMESA> {};
template<>
struct fex_gen_config<glWindowPos4sMESA> {};
template<>
struct fex_gen_config<glWindowPos4svMESA> {};
template<>
struct fex_gen_config<glWindowRectanglesEXT> {};
template<>
struct fex_gen_config<glWriteMaskEXT> {};

// GLext.h
// template<> struct fex_gen_config<glGetVkProcAddrNV> : fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer{};
template<>
struct fex_gen_config<glPathColorGenNV> {};
template<>
struct fex_gen_config<glPathTexGenNV> {};
template<>
struct fex_gen_config<glPathFogGenNV> {};
template<>
struct fex_gen_config<glGetPathColorGenivNV> {};
template<>
struct fex_gen_config<glGetPathColorGenfvNV> {};
template<>
struct fex_gen_config<glGetPathTexGenivNV> {};
template<>
struct fex_gen_config<glGetPathTexGenfvNV> {};
template<>
struct fex_gen_config<glBlendEquationSeparateATI> {};

// glx.h
template<>
struct fex_gen_config<glXWaitX> {};
#ifdef GLX_ARB_render_texture
template<>
struct fex_gen_config<glXBindTexImageARB> {};
template<>
struct fex_gen_config<glXReleaseTexImageARB> {};
template<>
struct fex_gen_config<glXDrawableAttribARB> {};
#endif
#ifdef GLX_MESA_swap_frame_usage
template<>
struct fex_gen_config<glXGetFrameUsageMESA> {};
template<>
struct fex_gen_config<glXBeginFrameTrackingMESA> {};
template<>
struct fex_gen_config<glXEndFrameTrackingMESA> {};
template<>
struct fex_gen_config<glXQueryFrameTrackingMESA> {};
#endif

// glxext.h
template<>
struct fex_gen_config<glXGetGPUIDsAMD> {};
template<>
struct fex_gen_config<glXGetGPUInfoAMD> {};
template<>
struct fex_gen_config<glXGetContextGPUIDAMD> {};
template<>
struct fex_gen_config<glXCreateAssociatedContextAMD> {};
template<>
struct fex_gen_config<glXCreateAssociatedContextAttribsAMD> {};
template<>
struct fex_gen_config<glXDeleteAssociatedContextAMD> {};
template<>
struct fex_gen_config<glXMakeAssociatedContextCurrentAMD> {};
template<>
struct fex_gen_config<glXGetCurrentAssociatedContextAMD> {};
template<>
struct fex_gen_config<glXBlitContextFramebufferAMD> {};
template<>
struct fex_gen_config<glXGetCurrentDisplayEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXGetCurrentDisplayEXT, -1, _XDisplay*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXGetAGPOffsetMESA> {};
template<>
struct fex_gen_config<glXCreateGLXPixmapMESA> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<glXCreateGLXPixmapMESA, 1, XVisualInfo*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<glXReleaseBuffersMESA> {};
template<>
struct fex_gen_config<glXSet3DfxModeMESA> {};
template<>
struct fex_gen_config<glXCopyBufferSubDataNV> {};
template<>
struct fex_gen_config<glXNamedCopyBufferSubDataNV> {};
template<>
struct fex_gen_config<glXCopyImageSubDataNV> {};
template<>
struct fex_gen_config<glXDelayBeforeSwapNV> {};
template<>
struct fex_gen_config<glXEnumerateVideoDevicesNV> {}; // TODO: Custom host impl
template<>
struct fex_gen_config<glXBindVideoDeviceNV> {};
template<>
struct fex_gen_config<glXJoinSwapGroupNV> {};
template<>
struct fex_gen_config<glXBindSwapBarrierNV> {};
template<>
struct fex_gen_config<glXQuerySwapGroupNV> {};
template<>
struct fex_gen_config<glXQueryMaxSwapGroupsNV> {};
template<>
struct fex_gen_config<glXQueryFrameCountNV> {};
template<>
struct fex_gen_config<glXResetFrameCountNV> {};
template<>
struct fex_gen_config<glXBindVideoCaptureDeviceNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glXEnumerateVideoCaptureDevicesNV> {};
#endif
template<>
struct fex_gen_config<glXLockVideoCaptureDeviceNV> {};
template<>
struct fex_gen_config<glXQueryVideoCaptureDeviceNV> {};
template<>
struct fex_gen_config<glXReleaseVideoCaptureDeviceNV> {};
template<>
struct fex_gen_config<glXGetVideoDeviceNV> {};
template<>
struct fex_gen_config<glXReleaseVideoDeviceNV> {};
template<>
struct fex_gen_config<glXBindVideoImageNV> {};
template<>
struct fex_gen_config<glXReleaseVideoImageNV> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glXSendPbufferToVideoNV> {};
template<>
struct fex_gen_config<glXGetVideoInfoNV> {};
#endif
template<>
struct fex_gen_config<glXQueryHyperpipeNetworkSGIX> {};
template<>
struct fex_gen_config<glXHyperpipeConfigSGIX> {};
template<>
struct fex_gen_config<glXQueryHyperpipeConfigSGIX> {};
template<>
struct fex_gen_config<glXDestroyHyperpipeConfigSGIX> {};
template<>
struct fex_gen_config<glXBindHyperpipeSGIX> {};
template<>
struct fex_gen_config<glXQueryHyperpipeBestAttribSGIX> {};
template<>
struct fex_gen_config<glXHyperpipeAttribSGIX> {};
template<>
struct fex_gen_config<glXQueryHyperpipeAttribSGIX> {};
template<>
struct fex_gen_config<glXBindSwapBarrierSGIX> {};
template<>
struct fex_gen_config<glXQueryMaxSwapBarriersSGIX> {};
template<>
struct fex_gen_config<glXJoinSwapGroupSGIX> {};
template<>
struct fex_gen_config<glXBindChannelToWindowSGIX> {};
template<>
struct fex_gen_config<glXChannelRectSGIX> {};
template<>
struct fex_gen_config<glXQueryChannelRectSGIX> {};
template<>
struct fex_gen_config<glXQueryChannelDeltasSGIX> {};
template<>
struct fex_gen_config<glXChannelRectSyncSGIX> {};
template<>
struct fex_gen_config<glXCushionSGI> {};
#ifndef IS_32BIT_THUNK
// TODO: 32-bit support
template<>
struct fex_gen_config<glXGetTransparentIndexSUN> {};
#endif
} // namespace internal


================================================
FILE: ThunkLibs/libSDL2/libSDL2_Guest.cpp
================================================
/*
$info$
tags: thunklibs|SDL2
desc: Handles sdlglproc, dload, stubs a few log fns
$end_info$
*/

#include <SDL2/SDL.h>
#include <SDL2/SDL_syswm.h>

#include <GL/glx.h>
#include <dlfcn.h>

#include <stdio.h>
#include <cstring>
#include <map>
#include <string>
#include <stdarg.h>

#include "common/Guest.h"

#include "thunkgen_guest_libSDL2.inl"

LOAD_LIB(libSDL2)

#include <vector>

struct __va_list_tag;


int SDL_snprintf(char*, size_t, const char*, ...) {
  return printf("SDL2: SDL_snprintf\n");
}
int SDL_sscanf(const char*, const char*, ...) {
  return printf("SDL2: SDL_sscanf\n");
}
void SDL_Log(const char*, ...) {
  printf("SDL2: SDL_Log\n");
}
void SDL_LogCritical(int, const char*, ...) {
  printf("SDL2: SDL_LogCritical\n");
}
void SDL_LogDebug(int, const char*, ...) {
  printf("SDL2: SDL_LogDebug\n");
}
void SDL_LogError(int, const char*, ...) {
  printf("SDL2: SDL_LogError\n");
}
void SDL_LogInfo(int, const char*, ...) {
  printf("SDL2: SDL_LogInfo\n");
}
void SDL_LogMessage(int, SDL_LogPriority, const char*, ...) {
  printf("SDL2: SDL_LogMessage\n");
}
void SDL_LogVerbose(int, const char*, ...) {
  printf("SDL2: SDL_LogVerbose\n");
}
void SDL_LogWarn(int, const char*, ...) {
  printf("SDL2: SDL_LogWarn\n");
}
int SDL_SetError(const char*, ...) {
  return printf("SDL2: SDL_SetError\n");
}

void SDL_LogMessageV(int, SDL_LogPriority, const char*, __va_list_tag*) {
  printf("SDL2: SDL_LogMessageV\n");
}
int SDL_vsnprintf(char*, size_t, const char*, __va_list_tag*) {
  return printf("SDL2: SDL_vsnprintf\n");
}
int SDL_vsscanf(const char*, const char*, __va_list_tag*) {
  return printf("SDL2: SDL_vsscanf\n");
}

extern "C" {
void* SDL_GL_GetProcAddress(const char* name) {
  // TODO: Fix this HACK
  return (void*)glXGetProcAddress((const GLubyte*)name);
}

// TODO: These are not 100% conforming to SDL either
void* SDL_LoadObject(const char* sofile) {
  auto lib = dlopen(sofile, RTLD_NOW | RTLD_LOCAL);
  if (!lib) {
    printf("SDL_LoadObject: Failed to load %s\n", sofile);
  }
  return lib;
}

void* SDL_LoadFunction(void* lib, const char* name) {
  return dlsym(lib, name);
}

void SDL_UnloadObject(void* lib) {
  if (lib) {
    dlclose(lib);
  }
}
}


================================================
FILE: ThunkLibs/libSDL2/libSDL2_Host.cpp
================================================
/*
$info$
tags: thunklibs|SDL2
$end_info$
*/

#include <stdio.h>

#include <SDL2/SDL.h>
#include <SDL2/SDL_syswm.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "thunkgen_host_libSDL2.inl"

EXPORTS(libSDL2)


================================================
FILE: ThunkLibs/libVDSO/Types.h
================================================
#pragma once
#include <cstdint>

struct timespec64 {
  int64_t tv_sec;
  int64_t tv_nsec;
};


================================================
FILE: ThunkLibs/libVDSO/libVDSO_Guest.cpp
================================================

/*
$info$
tags: thunklibs|VDSO
desc: Linux VDSO thunking
$end_info$
*/

#include <stdio.h>
#include <cstring>

#include <sched.h>
#include <sys/time.h>
#include <time.h>

#include "Types.h"
#include "common/Guest.h"

#include "thunkgen_guest_libVDSO.inl"

extern "C" {
time_t __vdso_time(time_t* tloc) __attribute__((alias("fexfn_pack_time")));
int __vdso_gettimeofday(struct timeval* tv, struct timezone* tz) __attribute__((alias("fexfn_pack_gettimeofday")));
int __vdso_clock_gettime(clockid_t, struct timespec*) __attribute__((alias("fexfn_pack_clock_gettime")));
int __vdso_clock_getres(clockid_t, struct timespec*) __attribute__((alias("fexfn_pack_clock_getres")));
int __vdso_getcpu(uint32_t*, uint32_t*) __attribute__((alias("fexfn_pack_getcpu")));

#if __SIZEOF_POINTER__ == 4
int __vdso_clock_gettime64(clockid_t, struct timespec64*) __attribute__((alias("fexfn_pack_clock_gettime64")));

__attribute__((naked)) int __kernel_vsyscall() {
  asm volatile(R"(
  .intel_syntax noprefix
  int 0x80;
  ret;
  .att_syntax prefix
  )" ::
                 : "memory");
}

__attribute__((naked)) void __kernel_sigreturn() {
  asm volatile(R"(
  .intel_syntax noprefix
  pop eax;
  mov eax, 0x77;
  int 0x80;
  nop;
  .att_syntax prefix
  )" ::
                 : "memory");
}
__attribute__((naked)) void __kernel_rt_sigreturn() {
  asm volatile(R"(
  .intel_syntax noprefix
  mov eax, 0xad;
  int 0x80;
  .att_syntax prefix
  )" ::
                 : "memory");
}
#else
ssize_t __vdso_getrandom(void*, size_t, uint32_t, void*, size_t) __attribute__((alias("fexfn_pack_getrandom")));
#endif

__attribute__((naked)) void __fex_callback_ret() {
  // CALLBACKRET FEX Instruction
  asm volatile(R"(
  .byte 0x0f, 0x3e;
  )" ::
                 : "memory");
}
}


================================================
FILE: ThunkLibs/libVDSO/libVDSO_Guest.lds
================================================
SECTIONS {
  . = SIZEOF_HEADERS;
  .hash : { *(.hash) } :text
  .gnu.hash : { *(.gnu.hash) }
  .dynsym : { *(.dynsym) }
  .dynstr : { *(.dynstr) }
  .gnu.version : { *(.gnu.version) }
  .gnu.version_d : { *(.gnu.version_d) }
  .gnu.version_r : { *(.gnu.version_r) }
  .dynamic : { *(.dynamic) } :text :dynamic
  .rodata : {
    *(.rodata*)
      *(.data*)
      *(.sdata*)
      *(.got.plt) *(.got)
      *(.gnu.linkonce.d.*)
      *(.bss*)
      *(.dynbss*)
      *(.gnu.linkonce.b.*)
  } :text

  /DISCARD/ : {
    *(.note)
    *(.note.gnu.property)
    *(.eh_frame_hdr)
    *(.eh_frame)
    *(.symtab)
  }
}

PHDRS {
  text PT_LOAD FLAGS(4 | 1) FILEHDR PHDRS;
  dynamic PT_DYNAMIC FLAGS(4);
  note PT_NOTE FLAGS(4);
}

VERSION {
  LINUX_2.6 {
  global:
    __vdso_time;
    time;
    __vdso_gettimeofday;
    gettimeofday;
    __vdso_clock_gettime;
    clock_gettime;
    __vdso_clock_getres;
    clock_getres;
    __vdso_getcpu;
    getcpu;
    __vdso_getrandom;
    getrandom;
    __fex_callback_ret;
  local: *;
  };
}


================================================
FILE: ThunkLibs/libVDSO/libVDSO_Guest_32.lds
================================================
SECTIONS {
  . = SIZEOF_HEADERS;
  .hash : { *(.hash) } :text
  .gnu.hash : { *(.gnu.hash) }
  .dynsym : { *(.dynsym) }
  .dynstr : { *(.dynstr) }
  .gnu.version : { *(.gnu.version) }
  .gnu.version_d : { *(.gnu.version_d) }
  .gnu.version_r : { *(.gnu.version_r) }
  .dynamic : { *(.dynamic) } :text :dynamic
  .rodata : {
    *(.rodata*)
      *(.data*)
      *(.sdata*)
      *(.got.plt) *(.got)
      *(.gnu.linkonce.d.*)
      *(.bss*)
      *(.dynbss*)
      *(.gnu.linkonce.b.*)
  } :text

  /DISCARD/ : {
    *(.note)
    *(.note.gnu.property)
    *(.eh_frame_hdr)
    *(.eh_frame)
    *(.symtab)
  }
}

PHDRS {
  text PT_LOAD FLAGS(4 | 1) FILEHDR PHDRS;
  dynamic PT_DYNAMIC FLAGS(4);
  note PT_NOTE FLAGS(4);
}

VERSION {
  LINUX_2.6 {
  global:
    __vdso_time;
    time;
    __vdso_gettimeofday;
    gettimeofday;
    __vdso_clock_gettime;
    clock_gettime;
    __vdso_clock_getres;
    clock_getres;
    __vdso_getcpu;
    getcpu;
    __vdso_clock_gettime64;
    clock_gettime64;
    __fex_callback_ret;
  local: *;
  };
  LINUX_2.5 {
  global:
    __kernel_vsyscall;
    __kernel_sigreturn;
    __kernel_rt_sigreturn;
  local: *;
  };
}


================================================
FILE: ThunkLibs/libVDSO/libVDSO_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <sched.h>
#include <sys/time.h>
#include <sys/types.h>
#include <time.h>

#include "Types.h"

template<auto>
struct fex_gen_config {};

template<>
struct fex_gen_config<time> {};
template<>
struct fex_gen_config<gettimeofday> {};
template<>
struct fex_gen_config<clock_gettime> {};
template<>
struct fex_gen_config<clock_getres> {};
template<>
struct fex_gen_config<getcpu> {};

#if __SIZEOF_POINTER__ == 4
extern int clock_gettime64(clockid_t __clock_id, struct timespec64* __tp) __THROW;
template<>
struct fex_gen_config<clock_gettime64> {};
#else
extern ssize_t getrandom(void* buffer, size_t len, uint32_t flags, void* opaque_state, size_t opaque_len);
template<>
struct fex_gen_config<getrandom> {};
#endif


================================================
FILE: ThunkLibs/libX11/libX11_NativeGuest.cpp
================================================
// This file only exists to create a placeholder library to link against for
// libraries that are supposed to implicitly load libX11. At runtime, the guest
// linker will select the library from the RootFS instead, which is then
// replaced by libX11-guest.so.

// Define some symbol so that the linker doesn't consider this library unused
extern "C" void XSetErrorHandler() {}


================================================
FILE: ThunkLibs/libasound/libasound_Guest.cpp
================================================
/*
$info$
tags: thunklibs|asound
$end_info$
*/

extern "C" {
#include <alsa/asoundlib.h>
}

#include <stdio.h>
#include <cstring>
#include <map>
#include <string>

#include "common/Guest.h"
#include <stdarg.h>

#include "thunkgen_guest_libasound.inl"

LOAD_LIB(libasound)


================================================
FILE: ThunkLibs/libasound/libasound_Host.cpp
================================================
/*
$info$
tags: thunklibs|asound
$end_info$
*/

#include <stdio.h>

#include <alsa/asoundlib.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "thunkgen_host_libasound.inl"

EXPORTS(libasound)


================================================
FILE: ThunkLibs/libasound/libasound_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <alsa/asoundlib.h>
#include <alsa/version.h>

#include <type_traits>

template<auto>
struct fex_gen_config {
  unsigned version = 2;
};

// Function, parameter index, parameter type [optional]
template<auto, int, typename = void>
struct fex_gen_param {};

template<typename>
struct fex_gen_type {};

template<>
struct fex_gen_type<snd_pcm_status_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_shm_area> : fexgen::opaque_type {};

template<>
struct fex_gen_type<snd_async_handler_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<std::remove_pointer_t<snd_config_iterator_t>> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_config_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_config_update_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_card_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_elem_id_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_elem_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_elem_list_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_elem_value_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_event_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_ctl_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_devname_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hctl_elem_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hctl_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hwdep_dsp_image_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hwdep_dsp_status_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hwdep_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_hwdep_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_input_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_midi_event_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_mixer_class_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_mixer_elem_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_mixer_selem_id_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_mixer_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_output_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_access_mask_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_format_mask_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_hook_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_hw_params_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_scope_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_subformat_mask_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_sw_params_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_pcm_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_rawmidi_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_rawmidi_params_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_rawmidi_status_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_rawmidi_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_sctl_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_client_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_client_pool_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_ev_ext_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_port_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_port_subscribe_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_query_subscribe_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_queue_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_queue_status_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_queue_tempo_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_queue_timer_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_remove_events_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_system_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_seq_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_ginfo_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_gparams_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_gstatus_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_id_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_info_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_params_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_query_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_status_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timer_t> : fexgen::opaque_type {};
template<>
struct fex_gen_type<snd_timestamp_t> : fexgen::opaque_type {};

template<>
struct fex_gen_type<FILE> : fexgen::opaque_type {};

// Union types with compatible data layout
template<>
struct fex_gen_type<snd_pcm_sync_id_t> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<snd_seq_timestamp> : fexgen::assume_compatible_data_layout {};
// Has anonymous union member
template<>
struct fex_gen_type<snd_seq_event> : fexgen::assume_compatible_data_layout {};

#ifndef IS_32BIT_THUNK
// TODO: Convert vtable
template<>
struct fex_gen_type<snd_pcm_scope_ops_t> : fexgen::assume_compatible_data_layout {};
#endif

template<>
struct fex_gen_config<snd_asoundlib_version> {};
#if SND_LIB_VERSION < ((1 << 16) | (2 << 8) | (6))
// Exists on 1.2.6
int snd_dlpath(char* path, size_t path_len, const char* name);
#endif
template<>
struct fex_gen_config<snd_dlpath> {};
template<>
struct fex_gen_config<snd_dlopen> {};
template<>
struct fex_gen_config<snd_dlsym> {};
template<>
struct fex_gen_config<snd_dlclose> {};
// template<> struct fex_gen_config<snd_async_add_handler> {};
// template<> struct fex_gen_config<snd_async_del_handler> {};
// template<> struct fex_gen_config<snd_async_handler_get_fd> {};
// template<> struct fex_gen_config<snd_async_handler_get_signo> {};
// template<> struct fex_gen_config<snd_async_handler_get_callback_private> {};
template<>
struct fex_gen_config<snd_shm_area_create> {};
template<>
struct fex_gen_config<snd_shm_area_share> {};
template<>
struct fex_gen_config<snd_shm_area_destroy> {};
template<>
struct fex_gen_config<snd_user_file> {};
template<>
struct fex_gen_config<snd_input_stdio_open> {};
template<>
struct fex_gen_config<snd_input_stdio_attach> {};
template<>
struct fex_gen_config<snd_input_buffer_open> {};
template<>
struct fex_gen_config<snd_input_close> {};
template<>
struct fex_gen_config<snd_input_gets> {};
template<>
struct fex_gen_config<snd_input_getc> {};
template<>
struct fex_gen_config<snd_input_ungetc> {};
template<>
struct fex_gen_config<snd_output_stdio_open> {};
template<>
struct fex_gen_config<snd_output_stdio_attach> {};
template<>
struct fex_gen_config<snd_output_buffer_open> {};
template<>
struct fex_gen_config<snd_output_buffer_string> {};
template<>
struct fex_gen_config<snd_output_close> {};
template<>
struct fex_gen_config<snd_output_puts> {};
template<>
struct fex_gen_config<snd_output_putc> {};
template<>
struct fex_gen_config<snd_output_flush> {};
template<>
struct fex_gen_config<snd_strerror> {};
// Variadic callback not supported
template<>
struct fex_gen_config<snd_lib_error_set_handler> : fexgen::callback_stub {};
// template<> struct fex_gen_config<snd_lib_error_set_local> {};
template<>
struct fex_gen_config<snd_config_topdir> {};
template<>
struct fex_gen_config<snd_config_top> {};
template<>
struct fex_gen_config<snd_config_load> {};
template<>
struct fex_gen_config<snd_config_load_override> {};
template<>
struct fex_gen_config<snd_config_save> {};
template<>
struct fex_gen_config<snd_config_update> {};
template<>
struct fex_gen_config<snd_config_update_r> {};
template<>
struct fex_gen_config<snd_config_update_free> {};
template<>
struct fex_gen_config<snd_config_update_free_global> {};
template<>
struct fex_gen_config<snd_config_update_ref> {};
template<>
struct fex_gen_config<snd_config_ref> {};
template<>
struct fex_gen_config<snd_config_unref> {};
template<>
struct fex_gen_config<snd_config_search> {};
template<>
struct fex_gen_config<snd_config_search_definition> {};
template<>
struct fex_gen_config<snd_config_expand> {};
template<>
struct fex_gen_config<snd_config_evaluate> {};
template<>
struct fex_gen_config<snd_config_add> {};
template<>
struct fex_gen_config<snd_config_add_before> {};
template<>
struct fex_gen_config<snd_config_add_after> {};
template<>
struct fex_gen_config<snd_config_remove> {};
template<>
struct fex_gen_config<snd_config_delete> {};
template<>
struct fex_gen_config<snd_config_delete_compound_members> {};
template<>
struct fex_gen_config<snd_config_copy> {};
template<>
struct fex_gen_config<snd_config_make> {};
template<>
struct fex_gen_config<snd_config_make_integer> {};
template<>
struct fex_gen_config<snd_config_make_integer64> {};
template<>
struct fex_gen_config<snd_config_make_real> {};
template<>
struct fex_gen_config<snd_config_make_string> {};
template<>
struct fex_gen_config<snd_config_make_pointer> {};
template<>
struct fex_gen_config<snd_config_make_compound> {};
// template<> struct fex_gen_config<snd_config_imake_integer> {};
// template<> struct fex_gen_config<snd_config_imake_integer64> {};
// template<> struct fex_gen_config<snd_config_imake_real> {};
template<>
struct fex_gen_config<snd_config_imake_string> {};
template<>
struct fex_gen_config<snd_config_imake_safe_string> {};
template<>
struct fex_gen_config<snd_config_imake_pointer> {};
template<>
struct fex_gen_config<snd_config_get_type> {};
template<>
struct fex_gen_config<snd_config_is_array> {};
template<>
struct fex_gen_config<snd_config_set_id> {};
template<>
struct fex_gen_config<snd_config_set_integer> {};
template<>
struct fex_gen_config<snd_config_set_integer64> {};
template<>
struct fex_gen_config<snd_config_set_real> {};
template<>
struct fex_gen_config<snd_config_set_string> {};
template<>
struct fex_gen_config<snd_config_set_ascii> {};
template<>
struct fex_gen_config<snd_config_set_pointer> {};
template<>
struct fex_gen_config<snd_config_get_id> {};
template<>
struct fex_gen_config<snd_config_get_integer> {};
template<>
struct fex_gen_config<snd_config_get_integer64> {};
template<>
struct fex_gen_config<snd_config_get_real> {};
template<>
struct fex_gen_config<snd_config_get_ireal> {};
template<>
struct fex_gen_config<snd_config_get_string> {};
template<>
struct fex_gen_config<snd_config_get_ascii> {};
template<>
struct fex_gen_config<snd_config_get_pointer> {};
template<>
struct fex_gen_config<snd_config_test_id> {};
template<>
struct fex_gen_config<snd_config_iterator_first> {};
// template<> struct fex_gen_config<snd_config_iterator_next> {};
template<>
struct fex_gen_config<snd_config_iterator_end> {};
// template<> struct fex_gen_config<snd_config_iterator_entry> {};
template<>
struct fex_gen_config<snd_config_get_bool_ascii> {};
template<>
struct fex_gen_config<snd_config_get_bool> {};
template<>
struct fex_gen_config<snd_config_get_ctl_iface_ascii> {};
template<>
struct fex_gen_config<snd_config_get_ctl_iface> {};
template<>
struct fex_gen_config<snd_names_list> {};
template<>
struct fex_gen_config<snd_names_list_free> {};
template<>
struct fex_gen_config<snd_pcm_open> {};
template<>
struct fex_gen_config<snd_pcm_open_lconf> {};
template<>
struct fex_gen_config<snd_pcm_open_fallback> {};
template<>
struct fex_gen_config<snd_pcm_close> {};
template<>
struct fex_gen_config<snd_pcm_name> {};
template<>
struct fex_gen_config<snd_pcm_type> {};
template<>
struct fex_gen_config<snd_pcm_stream> {};
template<>
struct fex_gen_config<snd_pcm_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_pcm_poll_descriptors> {};
template<>
struct fex_gen_config<snd_pcm_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_pcm_nonblock> {};
// template<> struct fex_gen_config<snd_async_add_pcm_handler> {};
template<>
struct fex_gen_config<snd_async_handler_get_pcm> {};
template<>
struct fex_gen_config<snd_pcm_info> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_current> {};
template<>
struct fex_gen_config<snd_pcm_hw_params> {};
template<>
struct fex_gen_config<snd_pcm_hw_free> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_current> {};
template<>
struct fex_gen_config<snd_pcm_sw_params> {};
template<>
struct fex_gen_config<snd_pcm_prepare> {};
template<>
struct fex_gen_config<snd_pcm_reset> {};
template<>
struct fex_gen_config<snd_pcm_status> {};
template<>
struct fex_gen_config<snd_pcm_start> {};
template<>
struct fex_gen_config<snd_pcm_drop> {};
template<>
struct fex_gen_config<snd_pcm_drain> {};
template<>
struct fex_gen_config<snd_pcm_pause> {};
template<>
struct fex_gen_config<snd_pcm_state> {};
template<>
struct fex_gen_config<snd_pcm_hwsync> {};
template<>
struct fex_gen_config<snd_pcm_delay> {};
template<>
struct fex_gen_config<snd_pcm_resume> {};
template<>
struct fex_gen_config<snd_pcm_htimestamp> {};
template<>
struct fex_gen_config<snd_pcm_avail> {};
template<>
struct fex_gen_config<snd_pcm_avail_update> {};
template<>
struct fex_gen_config<snd_pcm_avail_delay> {};
template<>
struct fex_gen_config<snd_pcm_rewindable> {};
template<>
struct fex_gen_config<snd_pcm_rewind> {};
template<>
struct fex_gen_config<snd_pcm_forwardable> {};
template<>
struct fex_gen_config<snd_pcm_forward> {};
template<>
struct fex_gen_config<snd_pcm_writei> {};
template<>
struct fex_gen_config<snd_pcm_readi> {};
template<>
struct fex_gen_config<snd_pcm_writen> {};
template<>
struct fex_gen_config<snd_pcm_readn> {};
template<>
struct fex_gen_config<snd_pcm_wait> {};
template<>
struct fex_gen_config<snd_pcm_link> {};
template<>
struct fex_gen_config<snd_pcm_unlink> {};
template<>
struct fex_gen_config<snd_pcm_query_chmaps> {};
template<>
struct fex_gen_config<snd_pcm_query_chmaps_from_hw> {};
template<>
struct fex_gen_config<snd_pcm_free_chmaps> {};
template<>
struct fex_gen_config<snd_pcm_get_chmap> {};
template<>
struct fex_gen_config<snd_pcm_set_chmap> {};
template<>
struct fex_gen_config<snd_pcm_chmap_type_name> {};
template<>
struct fex_gen_config<snd_pcm_chmap_name> {};
template<>
struct fex_gen_config<snd_pcm_chmap_long_name> {};
template<>
struct fex_gen_config<snd_pcm_chmap_print> {};
template<>
struct fex_gen_config<snd_pcm_chmap_from_string> {};
template<>
struct fex_gen_config<snd_pcm_chmap_parse_string> {};
template<>
struct fex_gen_config<snd_pcm_recover> {};
template<>
struct fex_gen_config<snd_pcm_set_params> {};
template<>
struct fex_gen_config<snd_pcm_get_params> {};
template<>
struct fex_gen_config<snd_pcm_info_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_info_malloc> {};
template<>
struct fex_gen_config<snd_pcm_info_free> {};
template<>
struct fex_gen_config<snd_pcm_info_copy> {};
template<>
struct fex_gen_config<snd_pcm_info_get_device> {};
template<>
struct fex_gen_config<snd_pcm_info_get_subdevice> {};
template<>
struct fex_gen_config<snd_pcm_info_get_stream> {};
template<>
struct fex_gen_config<snd_pcm_info_get_card> {};
template<>
struct fex_gen_config<snd_pcm_info_get_id> {};
template<>
struct fex_gen_config<snd_pcm_info_get_name> {};
template<>
struct fex_gen_config<snd_pcm_info_get_subdevice_name> {};
template<>
struct fex_gen_config<snd_pcm_info_get_class> {};
template<>
struct fex_gen_config<snd_pcm_info_get_subclass> {};
template<>
struct fex_gen_config<snd_pcm_info_get_subdevices_count> {};
template<>
struct fex_gen_config<snd_pcm_info_get_subdevices_avail> {};
template<>
struct fex_gen_config<snd_pcm_info_get_sync> {};
template<>
struct fex_gen_config<snd_pcm_info_set_device> {};
template<>
struct fex_gen_config<snd_pcm_info_set_subdevice> {};
template<>
struct fex_gen_config<snd_pcm_info_set_stream> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_any> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_mmap_sample_resolution> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_double> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_batch> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_block_transfer> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_monotonic> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_overrange> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_pause> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_resume> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_half_duplex> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_is_joint_duplex> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_sync_start> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_can_disable_period_wakeup> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_supports_audio_wallclock_ts> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_supports_audio_ts_type> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_rate_numden> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_sbits> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_fifo_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_malloc> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_free> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_copy> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_access> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_access> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_access> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_access_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_access_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_access_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_access_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_format> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_format> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_format> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_format_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_format_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_format_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_format_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_subformat> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_subformat> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_subformat> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_subformat_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_subformat_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_subformat_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_subformat_mask> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_channels> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_channels_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_channels_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_channels> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_channels_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_rate> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_rate_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_rate_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_rate> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_rate_resample> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_rate_resample> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_export_buffer> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_export_buffer> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_wakeup> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_wakeup> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_time_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_time_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_period_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_time_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_size_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_period_size_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_period_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_period_size_integer> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_periods> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_periods_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_periods_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_periods> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_periods_integer> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_time_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_time_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_buffer_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_time_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_size_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_buffer_size_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_test_buffer_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_min> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_max> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_minmax> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_near> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_first> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_set_buffer_size_last> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_get_min_align> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_malloc> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_free> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_copy> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_boundary> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_tstamp_mode> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_tstamp_mode> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_tstamp_type> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_tstamp_type> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_avail_min> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_avail_min> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_period_event> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_period_event> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_start_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_start_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_stop_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_stop_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_silence_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_silence_threshold> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_set_silence_size> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_get_silence_size> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_malloc> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_free> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_copy> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_none> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_any> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_test> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_empty> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_set> {};
template<>
struct fex_gen_config<snd_pcm_access_mask_reset> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_malloc> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_free> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_copy> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_none> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_any> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_test> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_empty> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_set> {};
template<>
struct fex_gen_config<snd_pcm_format_mask_reset> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_malloc> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_free> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_copy> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_none> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_any> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_test> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_empty> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_set> {};
template<>
struct fex_gen_config<snd_pcm_subformat_mask_reset> {};
template<>
struct fex_gen_config<snd_pcm_status_sizeof> {};
template<>
struct fex_gen_config<snd_pcm_status_malloc> {};
template<>
struct fex_gen_config<snd_pcm_status_free> {};
template<>
struct fex_gen_config<snd_pcm_status_copy> {};
template<>
struct fex_gen_config<snd_pcm_status_get_state> {};
template<>
struct fex_gen_config<snd_pcm_status_get_trigger_tstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_trigger_htstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_tstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_htstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_audio_htstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_driver_htstamp> {};
template<>
struct fex_gen_config<snd_pcm_status_get_audio_htstamp_report> {};
template<>
struct fex_gen_config<snd_pcm_status_set_audio_htstamp_config> {};
template<>
struct fex_gen_config<snd_pcm_status_get_delay> {};
template<>
struct fex_gen_config<snd_pcm_status_get_avail> {};
template<>
struct fex_gen_config<snd_pcm_status_get_avail_max> {};
template<>
struct fex_gen_config<snd_pcm_status_get_overrange> {};
template<>
struct fex_gen_config<snd_pcm_type_name> {};
template<>
struct fex_gen_config<snd_pcm_stream_name> {};
template<>
struct fex_gen_config<snd_pcm_access_name> {};
template<>
struct fex_gen_config<snd_pcm_format_name> {};
template<>
struct fex_gen_config<snd_pcm_format_description> {};
template<>
struct fex_gen_config<snd_pcm_subformat_name> {};
template<>
struct fex_gen_config<snd_pcm_subformat_description> {};
template<>
struct fex_gen_config<snd_pcm_format_value> {};
// template<> struct fex_gen_config<snd_pcm_tstamp_mode_name> {};
// template<> struct fex_gen_config<snd_pcm_state_name> {};
template<>
struct fex_gen_config<snd_pcm_dump> {};
template<>
struct fex_gen_config<snd_pcm_dump_hw_setup> {};
template<>
struct fex_gen_config<snd_pcm_dump_sw_setup> {};
template<>
struct fex_gen_config<snd_pcm_dump_setup> {};
template<>
struct fex_gen_config<snd_pcm_hw_params_dump> {};
template<>
struct fex_gen_config<snd_pcm_sw_params_dump> {};
template<>
struct fex_gen_config<snd_pcm_status_dump> {};
template<>
struct fex_gen_config<snd_pcm_mmap_begin> {};
template<>
struct fex_gen_config<snd_pcm_mmap_commit> {};
template<>
struct fex_gen_config<snd_pcm_mmap_writei> {};
template<>
struct fex_gen_config<snd_pcm_mmap_readi> {};
template<>
struct fex_gen_config<snd_pcm_mmap_writen> {};
template<>
struct fex_gen_config<snd_pcm_mmap_readn> {};
template<>
struct fex_gen_config<snd_pcm_format_signed> {};
template<>
struct fex_gen_config<snd_pcm_format_unsigned> {};
template<>
struct fex_gen_config<snd_pcm_format_linear> {};
template<>
struct fex_gen_config<snd_pcm_format_float> {};
template<>
struct fex_gen_config<snd_pcm_format_little_endian> {};
template<>
struct fex_gen_config<snd_pcm_format_big_endian> {};
template<>
struct fex_gen_config<snd_pcm_format_cpu_endian> {};
template<>
struct fex_gen_config<snd_pcm_format_width> {};
template<>
struct fex_gen_config<snd_pcm_format_physical_width> {};
template<>
struct fex_gen_config<snd_pcm_build_linear_format> {};
template<>
struct fex_gen_config<snd_pcm_format_size> {};
template<>
struct fex_gen_config<snd_pcm_format_silence> {};
template<>
struct fex_gen_config<snd_pcm_format_silence_16> {};
template<>
struct fex_gen_config<snd_pcm_format_silence_32> {};
template<>
struct fex_gen_config<snd_pcm_format_silence_64> {};
template<>
struct fex_gen_config<snd_pcm_format_set_silence> {};
template<>
struct fex_gen_config<snd_pcm_bytes_to_frames> {};
template<>
struct fex_gen_config<snd_pcm_frames_to_bytes> {};
template<>
struct fex_gen_config<snd_pcm_bytes_to_samples> {};
template<>
struct fex_gen_config<snd_pcm_samples_to_bytes> {};
template<>
struct fex_gen_config<snd_pcm_area_silence> {};
template<>
struct fex_gen_config<snd_pcm_areas_silence> {};
template<>
struct fex_gen_config<snd_pcm_area_copy> {};
template<>
struct fex_gen_config<snd_pcm_areas_copy> {};
// template<> struct fex_gen_config<snd_pcm_areas_copy_wrap> {};
// template<> struct fex_gen_config<snd_pcm_hook_get_pcm> {};
// template<> struct fex_gen_config<snd_pcm_hook_get_private> {};
// template<> struct fex_gen_config<snd_pcm_hook_set_private> {};
// template<> struct fex_gen_config<snd_pcm_hook_add> {};
// template<> struct fex_gen_config<snd_pcm_hook_remove> {};
template<>
struct fex_gen_config<snd_pcm_meter_get_bufsize> {};
template<>
struct fex_gen_config<snd_pcm_meter_get_channels> {};
template<>
struct fex_gen_config<snd_pcm_meter_get_rate> {};
template<>
struct fex_gen_config<snd_pcm_meter_get_now> {};
template<>
struct fex_gen_config<snd_pcm_meter_get_boundary> {};
template<>
struct fex_gen_config<snd_pcm_meter_add_scope> {};
template<>
struct fex_gen_config<snd_pcm_meter_search_scope> {};
template<>
struct fex_gen_config<snd_pcm_scope_malloc> {};
template<>
struct fex_gen_config<snd_pcm_scope_set_ops> {};
template<>
struct fex_gen_config<snd_pcm_scope_set_name> {};
template<>
struct fex_gen_config<snd_pcm_scope_get_name> {};
template<>
struct fex_gen_config<snd_pcm_scope_get_callback_private> {};
template<>
struct fex_gen_config<snd_pcm_scope_set_callback_private> {};
template<>
struct fex_gen_config<snd_pcm_scope_s16_open> {};
template<>
struct fex_gen_config<snd_pcm_scope_s16_get_channel_buffer> {};
template<>
struct fex_gen_config<snd_spcm_init> {};
template<>
struct fex_gen_config<snd_spcm_init_duplex> {};
template<>
struct fex_gen_config<snd_spcm_init_get_params> {};
template<>
struct fex_gen_config<snd_rawmidi_open> {};
template<>
struct fex_gen_config<snd_rawmidi_open_lconf> {};
template<>
struct fex_gen_config<snd_rawmidi_close> {};
template<>
struct fex_gen_config<snd_rawmidi_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_rawmidi_poll_descriptors> {};
template<>
struct fex_gen_config<snd_rawmidi_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_rawmidi_nonblock> {};
template<>
struct fex_gen_config<snd_rawmidi_info_sizeof> {};
template<>
struct fex_gen_config<snd_rawmidi_info_malloc> {};
template<>
struct fex_gen_config<snd_rawmidi_info_free> {};
template<>
struct fex_gen_config<snd_rawmidi_info_copy> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_device> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_subdevice> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_stream> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_card> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_flags> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_id> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_name> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_subdevice_name> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_subdevices_count> {};
template<>
struct fex_gen_config<snd_rawmidi_info_get_subdevices_avail> {};
template<>
struct fex_gen_config<snd_rawmidi_info_set_device> {};
template<>
struct fex_gen_config<snd_rawmidi_info_set_subdevice> {};
template<>
struct fex_gen_config<snd_rawmidi_info_set_stream> {};
template<>
struct fex_gen_config<snd_rawmidi_info> {};
template<>
struct fex_gen_config<snd_rawmidi_params_sizeof> {};
template<>
struct fex_gen_config<snd_rawmidi_params_malloc> {};
template<>
struct fex_gen_config<snd_rawmidi_params_free> {};
template<>
struct fex_gen_config<snd_rawmidi_params_copy> {};
template<>
struct fex_gen_config<snd_rawmidi_params_set_buffer_size> {};
template<>
struct fex_gen_config<snd_rawmidi_params_get_buffer_size> {};
template<>
struct fex_gen_config<snd_rawmidi_params_set_avail_min> {};
template<>
struct fex_gen_config<snd_rawmidi_params_get_avail_min> {};
template<>
struct fex_gen_config<snd_rawmidi_params_set_no_active_sensing> {};
template<>
struct fex_gen_config<snd_rawmidi_params_get_no_active_sensing> {};
template<>
struct fex_gen_config<snd_rawmidi_params> {};
template<>
struct fex_gen_config<snd_rawmidi_params_current> {};
template<>
struct fex_gen_config<snd_rawmidi_status_sizeof> {};
template<>
struct fex_gen_config<snd_rawmidi_status_malloc> {};
template<>
struct fex_gen_config<snd_rawmidi_status_free> {};
template<>
struct fex_gen_config<snd_rawmidi_status_copy> {};
template<>
struct fex_gen_config<snd_rawmidi_status_get_tstamp> {};
template<>
struct fex_gen_config<snd_rawmidi_status_get_avail> {};
template<>
struct fex_gen_config<snd_rawmidi_status_get_xruns> {};
template<>
struct fex_gen_config<snd_rawmidi_status> {};
template<>
struct fex_gen_config<snd_rawmidi_drain> {};
template<>
struct fex_gen_config<snd_rawmidi_drop> {};
template<>
struct fex_gen_config<snd_rawmidi_write> {};
template<>
struct fex_gen_config<snd_rawmidi_read> {};
template<>
struct fex_gen_config<snd_rawmidi_name> {};
template<>
struct fex_gen_config<snd_rawmidi_type> {};
template<>
struct fex_gen_config<snd_rawmidi_stream> {};
template<>
struct fex_gen_config<snd_timer_query_open> {};
template<>
struct fex_gen_config<snd_timer_query_open_lconf> {};
template<>
struct fex_gen_config<snd_timer_query_close> {};
template<>
struct fex_gen_config<snd_timer_query_next_device> {};
template<>
struct fex_gen_config<snd_timer_query_info> {};
template<>
struct fex_gen_config<snd_timer_query_params> {};
template<>
struct fex_gen_config<snd_timer_query_status> {};
template<>
struct fex_gen_config<snd_timer_open> {};
template<>
struct fex_gen_config<snd_timer_open_lconf> {};
template<>
struct fex_gen_config<snd_timer_close> {};
// template<> struct fex_gen_config<snd_async_add_timer_handler> {};
// template<> struct fex_gen_config<snd_async_handler_get_timer> {};
template<>
struct fex_gen_config<snd_timer_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_timer_poll_descriptors> {};
template<>
struct fex_gen_config<snd_timer_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_timer_info> {};
template<>
struct fex_gen_config<snd_timer_params> {};
template<>
struct fex_gen_config<snd_timer_status> {};
template<>
struct fex_gen_config<snd_timer_start> {};
template<>
struct fex_gen_config<snd_timer_stop> {};
template<>
struct fex_gen_config<snd_timer_continue> {};
template<>
struct fex_gen_config<snd_timer_read> {};
template<>
struct fex_gen_config<snd_timer_id_sizeof> {};
template<>
struct fex_gen_config<snd_timer_id_malloc> {};
template<>
struct fex_gen_config<snd_timer_id_free> {};
template<>
struct fex_gen_config<snd_timer_id_copy> {};
template<>
struct fex_gen_config<snd_timer_id_set_class> {};
template<>
struct fex_gen_config<snd_timer_id_get_class> {};
template<>
struct fex_gen_config<snd_timer_id_set_sclass> {};
template<>
struct fex_gen_config<snd_timer_id_get_sclass> {};
template<>
struct fex_gen_config<snd_timer_id_set_card> {};
template<>
struct fex_gen_config<snd_timer_id_get_card> {};
template<>
struct fex_gen_config<snd_timer_id_set_device> {};
template<>
struct fex_gen_config<snd_timer_id_get_device> {};
template<>
struct fex_gen_config<snd_timer_id_set_subdevice> {};
template<>
struct fex_gen_config<snd_timer_id_get_subdevice> {};
template<>
struct fex_gen_config<snd_timer_ginfo_sizeof> {};
template<>
struct fex_gen_config<snd_timer_ginfo_malloc> {};
template<>
struct fex_gen_config<snd_timer_ginfo_free> {};
template<>
struct fex_gen_config<snd_timer_ginfo_copy> {};
template<>
struct fex_gen_config<snd_timer_ginfo_set_tid> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_tid> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_flags> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_card> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_id> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_name> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_resolution> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_resolution_min> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_resolution_max> {};
template<>
struct fex_gen_config<snd_timer_ginfo_get_clients> {};
template<>
struct fex_gen_config<snd_timer_info_sizeof> {};
template<>
struct fex_gen_config<snd_timer_info_malloc> {};
template<>
struct fex_gen_config<snd_timer_info_free> {};
template<>
struct fex_gen_config<snd_timer_info_copy> {};
template<>
struct fex_gen_config<snd_timer_info_is_slave> {};
template<>
struct fex_gen_config<snd_timer_info_get_card> {};
template<>
struct fex_gen_config<snd_timer_info_get_id> {};
template<>
struct fex_gen_config<snd_timer_info_get_name> {};
template<>
struct fex_gen_config<snd_timer_info_get_resolution> {};
template<>
struct fex_gen_config<snd_timer_params_sizeof> {};
template<>
struct fex_gen_config<snd_timer_params_malloc> {};
template<>
struct fex_gen_config<snd_timer_params_free> {};
template<>
struct fex_gen_config<snd_timer_params_copy> {};
template<>
struct fex_gen_config<snd_timer_params_set_auto_start> {};
template<>
struct fex_gen_config<snd_timer_params_get_auto_start> {};
template<>
struct fex_gen_config<snd_timer_params_set_exclusive> {};
template<>
struct fex_gen_config<snd_timer_params_get_exclusive> {};
template<>
struct fex_gen_config<snd_timer_params_set_early_event> {};
template<>
struct fex_gen_config<snd_timer_params_get_early_event> {};
template<>
struct fex_gen_config<snd_timer_params_set_ticks> {};
template<>
struct fex_gen_config<snd_timer_params_get_ticks> {};
template<>
struct fex_gen_config<snd_timer_params_set_queue_size> {};
template<>
struct fex_gen_config<snd_timer_params_get_queue_size> {};
template<>
struct fex_gen_config<snd_timer_params_set_filter> {};
template<>
struct fex_gen_config<snd_timer_params_get_filter> {};
template<>
struct fex_gen_config<snd_timer_status_sizeof> {};
template<>
struct fex_gen_config<snd_timer_status_malloc> {};
template<>
struct fex_gen_config<snd_timer_status_free> {};
template<>
struct fex_gen_config<snd_timer_status_copy> {};
template<>
struct fex_gen_config<snd_timer_status_get_timestamp> {};
template<>
struct fex_gen_config<snd_timer_status_get_resolution> {};
template<>
struct fex_gen_config<snd_timer_status_get_lost> {};
template<>
struct fex_gen_config<snd_timer_status_get_overrun> {};
template<>
struct fex_gen_config<snd_timer_status_get_queue> {};
template<>
struct fex_gen_config<snd_timer_info_get_ticks> {};
template<>
struct fex_gen_config<snd_hwdep_open> {};
template<>
struct fex_gen_config<snd_hwdep_close> {};
template<>
struct fex_gen_config<snd_hwdep_poll_descriptors> {};
template<>
struct fex_gen_config<snd_hwdep_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_hwdep_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_hwdep_nonblock> {};
template<>
struct fex_gen_config<snd_hwdep_info> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_load> {};
template<>
struct fex_gen_config<snd_hwdep_ioctl> {};
template<>
struct fex_gen_config<snd_hwdep_write> {};
template<>
struct fex_gen_config<snd_hwdep_read> {};
template<>
struct fex_gen_config<snd_hwdep_info_sizeof> {};
template<>
struct fex_gen_config<snd_hwdep_info_malloc> {};
template<>
struct fex_gen_config<snd_hwdep_info_free> {};
template<>
struct fex_gen_config<snd_hwdep_info_copy> {};
template<>
struct fex_gen_config<snd_hwdep_info_get_device> {};
template<>
struct fex_gen_config<snd_hwdep_info_get_card> {};
template<>
struct fex_gen_config<snd_hwdep_info_get_id> {};
template<>
struct fex_gen_config<snd_hwdep_info_get_name> {};
template<>
struct fex_gen_config<snd_hwdep_info_get_iface> {};
template<>
struct fex_gen_config<snd_hwdep_info_set_device> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_sizeof> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_malloc> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_free> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_copy> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_get_version> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_get_id> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_get_num_dsps> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_get_dsp_loaded> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_status_get_chip_ready> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_sizeof> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_malloc> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_free> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_copy> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_get_index> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_get_name> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_get_image> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_get_length> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_set_index> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_set_name> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_set_image> {};
template<>
struct fex_gen_config<snd_hwdep_dsp_image_set_length> {};
template<>
struct fex_gen_config<snd_card_load> {};
template<>
struct fex_gen_config<snd_card_next> {};
template<>
struct fex_gen_config<snd_card_get_index> {};
template<>
struct fex_gen_config<snd_card_get_name> {};
template<>
struct fex_gen_config<snd_card_get_longname> {};
template<>
struct fex_gen_config<snd_device_name_hint> {};
template<>
struct fex_gen_config<snd_device_name_free_hint> {};
template<>
struct fex_gen_config<snd_device_name_get_hint> {};
template<>
struct fex_gen_config<snd_ctl_open> {};
template<>
struct fex_gen_config<snd_ctl_open_lconf> {};
template<>
struct fex_gen_config<snd_ctl_open_fallback> {};
template<>
struct fex_gen_config<snd_ctl_close> {};
template<>
struct fex_gen_config<snd_ctl_nonblock> {};
// template<> struct fex_gen_config<snd_async_add_ctl_handler> {};
// template<> struct fex_gen_config<snd_async_handler_get_ctl> {};
template<>
struct fex_gen_config<snd_ctl_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_ctl_poll_descriptors> {};
template<>
struct fex_gen_config<snd_ctl_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_ctl_subscribe_events> {};
template<>
struct fex_gen_config<snd_ctl_card_info> {};
template<>
struct fex_gen_config<snd_ctl_elem_list> {};
template<>
struct fex_gen_config<snd_ctl_elem_info> {};
template<>
struct fex_gen_config<snd_ctl_elem_read> {};
template<>
struct fex_gen_config<snd_ctl_elem_write> {};
template<>
struct fex_gen_config<snd_ctl_elem_lock> {};
template<>
struct fex_gen_config<snd_ctl_elem_unlock> {};
template<>
struct fex_gen_config<snd_ctl_elem_tlv_read> {};
template<>
struct fex_gen_config<snd_ctl_elem_tlv_write> {};
template<>
struct fex_gen_config<snd_ctl_elem_tlv_command> {};
template<>
struct fex_gen_config<snd_ctl_hwdep_next_device> {};
template<>
struct fex_gen_config<snd_ctl_hwdep_info> {};
template<>
struct fex_gen_config<snd_ctl_pcm_next_device> {};
template<>
struct fex_gen_config<snd_ctl_pcm_info> {};
template<>
struct fex_gen_config<snd_ctl_pcm_prefer_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_rawmidi_next_device> {};
template<>
struct fex_gen_config<snd_ctl_rawmidi_info> {};
template<>
struct fex_gen_config<snd_ctl_rawmidi_prefer_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_set_power_state> {};
template<>
struct fex_gen_config<snd_ctl_get_power_state> {};
template<>
struct fex_gen_config<snd_ctl_read> {};
template<>
struct fex_gen_config<snd_ctl_wait> {};
template<>
struct fex_gen_config<snd_ctl_name> {};
template<>
struct fex_gen_config<snd_ctl_type> {};
template<>
struct fex_gen_config<snd_ctl_elem_type_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_iface_name> {};
template<>
struct fex_gen_config<snd_ctl_event_type_name> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_mask> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_numid> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_id> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_interface> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_device> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_name> {};
template<>
struct fex_gen_config<snd_ctl_event_elem_get_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_alloc_space> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_free_space> {};
template<>
struct fex_gen_config<snd_ctl_ascii_elem_id_get> {};
template<>
struct fex_gen_config<snd_ctl_ascii_elem_id_parse> {};
template<>
struct fex_gen_config<snd_ctl_ascii_value_parse> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_malloc> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_free> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_clear> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_copy> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_get_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_id_set_index> {};
template<>
struct fex_gen_config<snd_ctl_card_info_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_card_info_malloc> {};
template<>
struct fex_gen_config<snd_ctl_card_info_free> {};
template<>
struct fex_gen_config<snd_ctl_card_info_clear> {};
template<>
struct fex_gen_config<snd_ctl_card_info_copy> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_card> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_id> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_driver> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_name> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_longname> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_mixername> {};
template<>
struct fex_gen_config<snd_ctl_card_info_get_components> {};
template<>
struct fex_gen_config<snd_ctl_event_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_event_malloc> {};
template<>
struct fex_gen_config<snd_ctl_event_free> {};
template<>
struct fex_gen_config<snd_ctl_event_clear> {};
template<>
struct fex_gen_config<snd_ctl_event_copy> {};
template<>
struct fex_gen_config<snd_ctl_event_get_type> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_malloc> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_free> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_clear> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_copy> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_set_offset> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_used> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_count> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_id> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_list_get_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_malloc> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_free> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_clear> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_copy> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_type> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_readable> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_writable> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_volatile> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_inactive> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_locked> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_tlv_readable> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_tlv_writable> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_tlv_commandable> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_owner> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_is_user> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_owner> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_count> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_min> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_max> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_step> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_min64> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_max64> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_step64> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_items> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_item> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_item_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_dimensions> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_dimension> {};
// template<> struct fex_gen_config<snd_ctl_elem_info_set_dimension> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_id> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_get_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_id> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_info_set_index> {};
template<>
struct fex_gen_config<snd_ctl_add_integer_elem_set> {};
template<>
struct fex_gen_config<snd_ctl_add_integer64_elem_set> {};
template<>
struct fex_gen_config<snd_ctl_add_boolean_elem_set> {};
// template<> struct fex_gen_config<snd_ctl_add_enumerated_elem_set> {};
template<>
struct fex_gen_config<snd_ctl_add_bytes_elem_set> {};
template<>
struct fex_gen_config<snd_ctl_elem_add_integer> {};
template<>
struct fex_gen_config<snd_ctl_elem_add_integer64> {};
template<>
struct fex_gen_config<snd_ctl_elem_add_boolean> {};
// template<> struct fex_gen_config<snd_ctl_elem_add_enumerated> {};
template<>
struct fex_gen_config<snd_ctl_elem_add_iec958> {};
template<>
struct fex_gen_config<snd_ctl_elem_remove> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_sizeof> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_malloc> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_free> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_clear> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_copy> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_compare> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_id> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_id> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_numid> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_interface> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_device> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_subdevice> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_name> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_index> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_boolean> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_integer> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_integer64> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_enumerated> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_byte> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_boolean> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_integer> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_integer64> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_enumerated> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_byte> {};
template<>
struct fex_gen_config<snd_ctl_elem_set_bytes> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_bytes> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_get_iec958> {};
template<>
struct fex_gen_config<snd_ctl_elem_value_set_iec958> {};
template<>
struct fex_gen_config<snd_tlv_parse_dB_info> {};
template<>
struct fex_gen_config<snd_tlv_get_dB_range> {};
template<>
struct fex_gen_config<snd_tlv_convert_to_dB> {};
template<>
struct fex_gen_config<snd_tlv_convert_from_dB> {};
template<>
struct fex_gen_config<snd_ctl_get_dB_range> {};
template<>
struct fex_gen_config<snd_ctl_convert_to_dB> {};
template<>
struct fex_gen_config<snd_ctl_convert_from_dB> {};
template<>
struct fex_gen_config<snd_hctl_compare_fast> {};
template<>
struct fex_gen_config<snd_hctl_open> {};
template<>
struct fex_gen_config<snd_hctl_open_ctl> {};
template<>
struct fex_gen_config<snd_hctl_close> {};
template<>
struct fex_gen_config<snd_hctl_nonblock> {};
template<>
struct fex_gen_config<snd_hctl_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_hctl_poll_descriptors> {};
template<>
struct fex_gen_config<snd_hctl_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_hctl_get_count> {};
// template<> struct fex_gen_config<snd_hctl_set_compare> {};
template<>
struct fex_gen_config<snd_hctl_first_elem> {};
template<>
struct fex_gen_config<snd_hctl_last_elem> {};
template<>
struct fex_gen_config<snd_hctl_find_elem> {};
// template<> struct fex_gen_config<snd_hctl_set_callback> {};
// template<> struct fex_gen_config<snd_hctl_set_callback_private> {};
// template<> struct fex_gen_config<snd_hctl_get_callback_private> {};
template<>
struct fex_gen_config<snd_hctl_load> {};
template<>
struct fex_gen_config<snd_hctl_free> {};
template<>
struct fex_gen_config<snd_hctl_handle_events> {};
template<>
struct fex_gen_config<snd_hctl_name> {};
template<>
struct fex_gen_config<snd_hctl_wait> {};
template<>
struct fex_gen_config<snd_hctl_ctl> {};
template<>
struct fex_gen_config<snd_hctl_elem_next> {};
template<>
struct fex_gen_config<snd_hctl_elem_prev> {};
template<>
struct fex_gen_config<snd_hctl_elem_info> {};
template<>
struct fex_gen_config<snd_hctl_elem_read> {};
template<>
struct fex_gen_config<snd_hctl_elem_write> {};
template<>
struct fex_gen_config<snd_hctl_elem_tlv_read> {};
template<>
struct fex_gen_config<snd_hctl_elem_tlv_write> {};
template<>
struct fex_gen_config<snd_hctl_elem_tlv_command> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_hctl> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_id> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_numid> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_interface> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_device> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_subdevice> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_name> {};
template<>
struct fex_gen_config<snd_hctl_elem_get_index> {};
// template<> struct fex_gen_config<snd_hctl_elem_set_callback> {};
// template<> struct fex_gen_config<snd_hctl_elem_get_callback_private> {};
// template<> struct fex_gen_config<snd_hctl_elem_set_callback_private> {};
template<>
struct fex_gen_config<snd_sctl_build> {};
template<>
struct fex_gen_config<snd_sctl_free> {};
template<>
struct fex_gen_config<snd_sctl_install> {};
template<>
struct fex_gen_config<snd_sctl_remove> {};
template<>
struct fex_gen_config<snd_mixer_open> {};
template<>
struct fex_gen_config<snd_mixer_close> {};
template<>
struct fex_gen_config<snd_mixer_first_elem> {};
template<>
struct fex_gen_config<snd_mixer_last_elem> {};
template<>
struct fex_gen_config<snd_mixer_handle_events> {};
template<>
struct fex_gen_config<snd_mixer_attach> {};
template<>
struct fex_gen_config<snd_mixer_attach_hctl> {};
template<>
struct fex_gen_config<snd_mixer_detach> {};
template<>
struct fex_gen_config<snd_mixer_detach_hctl> {};
template<>
struct fex_gen_config<snd_mixer_get_hctl> {};
template<>
struct fex_gen_config<snd_mixer_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_mixer_poll_descriptors> {};
template<>
struct fex_gen_config<snd_mixer_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_mixer_load> {};
template<>
struct fex_gen_config<snd_mixer_free> {};
template<>
struct fex_gen_config<snd_mixer_wait> {};
// template<> struct fex_gen_config<snd_mixer_set_compare> {};
// template<> struct fex_gen_config<snd_mixer_set_callback> {};
// template<> struct fex_gen_config<snd_mixer_get_callback_private> {};
// template<> struct fex_gen_config<snd_mixer_set_callback_private> {};
template<>
struct fex_gen_config<snd_mixer_get_count> {};
template<>
struct fex_gen_config<snd_mixer_class_unregister> {};
template<>
struct fex_gen_config<snd_mixer_elem_next> {};
template<>
struct fex_gen_config<snd_mixer_elem_prev> {};
// template<> struct fex_gen_config<snd_mixer_elem_set_callback> {};
template<>
struct fex_gen_config<snd_mixer_elem_get_callback_private> {};
// template<> struct fex_gen_config<snd_mixer_elem_set_callback_private> {};
template<>
struct fex_gen_config<snd_mixer_elem_get_type> {};
template<>
struct fex_gen_config<snd_mixer_class_register> {};
// template<> struct fex_gen_config<snd_mixer_elem_new> {};
template<>
struct fex_gen_config<snd_mixer_elem_add> {};
template<>
struct fex_gen_config<snd_mixer_elem_remove> {};
template<>
struct fex_gen_config<snd_mixer_elem_free> {};
template<>
struct fex_gen_config<snd_mixer_elem_info> {};
template<>
struct fex_gen_config<snd_mixer_elem_value> {};
template<>
struct fex_gen_config<snd_mixer_elem_attach> {};
template<>
struct fex_gen_config<snd_mixer_elem_detach> {};
template<>
struct fex_gen_config<snd_mixer_elem_empty> {};
template<>
struct fex_gen_config<snd_mixer_elem_get_private> {};
template<>
struct fex_gen_config<snd_mixer_class_sizeof> {};
template<>
struct fex_gen_config<snd_mixer_class_malloc> {};
template<>
struct fex_gen_config<snd_mixer_class_free> {};
template<>
struct fex_gen_config<snd_mixer_class_copy> {};
// template<> struct fex_gen_config<snd_mixer_class_get_mixer> {};
// template<> struct fex_gen_config<snd_mixer_class_get_event> {};
// template<> struct fex_gen_config<snd_mixer_class_get_private> {};
// template<> struct fex_gen_config<snd_mixer_class_get_compare> {};
// template<> struct fex_gen_config<snd_mixer_class_set_event> {};
// template<> struct fex_gen_config<snd_mixer_class_set_private> {};
// template<> struct fex_gen_config<snd_mixer_class_set_private_free> {};
// template<> struct fex_gen_config<snd_mixer_class_set_compare> {};
template<>
struct fex_gen_config<snd_mixer_selem_channel_name> {};
template<>
struct fex_gen_config<snd_mixer_selem_register> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_id> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_name> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_index> {};
template<>
struct fex_gen_config<snd_mixer_find_selem> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_active> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_playback_mono> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_playback_channel> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_capture_mono> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_channel> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_group> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_common_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_playback_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_playback_volume_joined> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_volume_joined> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_common_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_playback_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_playback_switch_joined> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_switch_joined> {};
template<>
struct fex_gen_config<snd_mixer_selem_has_capture_switch_exclusive> {};
template<>
struct fex_gen_config<snd_mixer_selem_ask_playback_vol_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_ask_capture_vol_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_ask_playback_dB_vol> {};
template<>
struct fex_gen_config<snd_mixer_selem_ask_capture_dB_vol> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_playback_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_playback_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_playback_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_volume> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_dB> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_volume_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_volume_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_dB_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_dB_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_switch> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_switch_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_switch_all> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_playback_volume_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_playback_dB_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_playback_volume_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_volume_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_capture_dB_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_capture_volume_range> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_enumerated> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_enum_playback> {};
template<>
struct fex_gen_config<snd_mixer_selem_is_enum_capture> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_enum_items> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_enum_item_name> {};
template<>
struct fex_gen_config<snd_mixer_selem_get_enum_item> {};
template<>
struct fex_gen_config<snd_mixer_selem_set_enum_item> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_sizeof> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_malloc> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_free> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_copy> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_get_name> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_get_index> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_set_name> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_set_index> {};
template<>
struct fex_gen_config<snd_mixer_selem_id_parse> {};
template<>
struct fex_gen_config<snd_seq_open> {};
template<>
struct fex_gen_config<snd_seq_open_lconf> {};
template<>
struct fex_gen_config<snd_seq_name> {};
template<>
struct fex_gen_config<snd_seq_type> {};
template<>
struct fex_gen_config<snd_seq_close> {};
template<>
struct fex_gen_config<snd_seq_poll_descriptors_count> {};
template<>
struct fex_gen_config<snd_seq_poll_descriptors> {};
template<>
struct fex_gen_config<snd_seq_poll_descriptors_revents> {};
template<>
struct fex_gen_config<snd_seq_nonblock> {};
template<>
struct fex_gen_config<snd_seq_client_id> {};
template<>
struct fex_gen_config<snd_seq_get_output_buffer_size> {};
template<>
struct fex_gen_config<snd_seq_get_input_buffer_size> {};
template<>
struct fex_gen_config<snd_seq_set_output_buffer_size> {};
template<>
struct fex_gen_config<snd_seq_set_input_buffer_size> {};
template<>
struct fex_gen_config<snd_seq_system_info_sizeof> {};
template<>
struct fex_gen_config<snd_seq_system_info_malloc> {};
template<>
struct fex_gen_config<snd_seq_system_info_free> {};
template<>
struct fex_gen_config<snd_seq_system_info_copy> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_queues> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_clients> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_ports> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_channels> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_cur_clients> {};
template<>
struct fex_gen_config<snd_seq_system_info_get_cur_queues> {};
template<>
struct fex_gen_config<snd_seq_system_info> {};
template<>
struct fex_gen_config<snd_seq_client_info_sizeof> {};
template<>
struct fex_gen_config<snd_seq_client_info_malloc> {};
template<>
struct fex_gen_config<snd_seq_client_info_free> {};
template<>
struct fex_gen_config<snd_seq_client_info_copy> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_client> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_type> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_name> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_broadcast_filter> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_error_bounce> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_card> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_pid> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_event_filter> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_num_ports> {};
template<>
struct fex_gen_config<snd_seq_client_info_get_event_lost> {};
template<>
struct fex_gen_config<snd_seq_client_info_set_client> {};
template<>
struct fex_gen_config<snd_seq_client_info_set_name> {};
template<>
struct fex_gen_config<snd_seq_client_info_set_broadcast_filter> {};
template<>
struct fex_gen_config<snd_seq_client_info_set_error_bounce> {};
template<>
struct fex_gen_config<snd_seq_client_info_set_event_filter> {};
template<>
struct fex_gen_config<snd_seq_client_info_event_filter_clear> {};
template<>
struct fex_gen_config<snd_seq_client_info_event_filter_add> {};
template<>
struct fex_gen_config<snd_seq_client_info_event_filter_del> {};
template<>
struct fex_gen_config<snd_seq_client_info_event_filter_check> {};
template<>
struct fex_gen_config<snd_seq_get_client_info> {};
template<>
struct fex_gen_config<snd_seq_get_any_client_info> {};
template<>
struct fex_gen_config<snd_seq_set_client_info> {};
template<>
struct fex_gen_config<snd_seq_query_next_client> {};
template<>
struct fex_gen_config<snd_seq_client_pool_sizeof> {};
template<>
struct fex_gen_config<snd_seq_client_pool_malloc> {};
template<>
struct fex_gen_config<snd_seq_client_pool_free> {};
template<>
struct fex_gen_config<snd_seq_client_pool_copy> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_client> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_output_pool> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_input_pool> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_output_room> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_output_free> {};
template<>
struct fex_gen_config<snd_seq_client_pool_get_input_free> {};
template<>
struct fex_gen_config<snd_seq_client_pool_set_output_pool> {};
template<>
struct fex_gen_config<snd_seq_client_pool_set_input_pool> {};
template<>
struct fex_gen_config<snd_seq_client_pool_set_output_room> {};
template<>
struct fex_gen_config<snd_seq_get_client_pool> {};
template<>
struct fex_gen_config<snd_seq_set_client_pool> {};
template<>
struct fex_gen_config<snd_seq_port_info_sizeof> {};
template<>
struct fex_gen_config<snd_seq_port_info_malloc> {};
template<>
struct fex_gen_config<snd_seq_port_info_free> {};
template<>
struct fex_gen_config<snd_seq_port_info_copy> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_client> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_port> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_addr> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_name> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_capability> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_type> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_midi_channels> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_midi_voices> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_synth_voices> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_read_use> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_write_use> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_port_specified> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_timestamping> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_timestamp_real> {};
template<>
struct fex_gen_config<snd_seq_port_info_get_timestamp_queue> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_client> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_port> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_addr> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_name> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_capability> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_type> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_midi_channels> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_midi_voices> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_synth_voices> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_port_specified> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_timestamping> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_timestamp_real> {};
template<>
struct fex_gen_config<snd_seq_port_info_set_timestamp_queue> {};
template<>
struct fex_gen_config<snd_seq_create_port> {};
template<>
struct fex_gen_config<snd_seq_delete_port> {};
template<>
struct fex_gen_config<snd_seq_get_port_info> {};
template<>
struct fex_gen_config<snd_seq_get_any_port_info> {};
template<>
struct fex_gen_config<snd_seq_set_port_info> {};
template<>
struct fex_gen_config<snd_seq_query_next_port> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_sizeof> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_malloc> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_free> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_copy> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_sender> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_dest> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_queue> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_exclusive> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_time_update> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_get_time_real> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_sender> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_dest> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_queue> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_exclusive> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_time_update> {};
template<>
struct fex_gen_config<snd_seq_port_subscribe_set_time_real> {};
template<>
struct fex_gen_config<snd_seq_get_port_subscription> {};
template<>
struct fex_gen_config<snd_seq_subscribe_port> {};
template<>
struct fex_gen_config<snd_seq_unsubscribe_port> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_sizeof> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_malloc> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_free> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_copy> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_client> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_port> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_root> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_type> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_index> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_num_subs> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_addr> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_queue> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_exclusive> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_time_update> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_get_time_real> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_set_client> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_set_port> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_set_root> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_set_type> {};
template<>
struct fex_gen_config<snd_seq_query_subscribe_set_index> {};
template<>
struct fex_gen_config<snd_seq_query_port_subscribers> {};
template<>
struct fex_gen_config<snd_seq_queue_info_sizeof> {};
template<>
struct fex_gen_config<snd_seq_queue_info_malloc> {};
template<>
struct fex_gen_config<snd_seq_queue_info_free> {};
template<>
struct fex_gen_config<snd_seq_queue_info_copy> {};
template<>
struct fex_gen_config<snd_seq_queue_info_get_queue> {};
template<>
struct fex_gen_config<snd_seq_queue_info_get_name> {};
template<>
struct fex_gen_config<snd_seq_queue_info_get_owner> {};
template<>
struct fex_gen_config<snd_seq_queue_info_get_locked> {};
template<>
struct fex_gen_config<snd_seq_queue_info_get_flags> {};
template<>
struct fex_gen_config<snd_seq_queue_info_set_name> {};
template<>
struct fex_gen_config<snd_seq_queue_info_set_owner> {};
template<>
struct fex_gen_config<snd_seq_queue_info_set_locked> {};
template<>
struct fex_gen_config<snd_seq_queue_info_set_flags> {};
template<>
struct fex_gen_config<snd_seq_create_queue> {};
template<>
struct fex_gen_config<snd_seq_alloc_named_queue> {};
template<>
struct fex_gen_config<snd_seq_alloc_queue> {};
template<>
struct fex_gen_config<snd_seq_free_queue> {};
template<>
struct fex_gen_config<snd_seq_get_queue_info> {};
template<>
struct fex_gen_config<snd_seq_set_queue_info> {};
template<>
struct fex_gen_config<snd_seq_query_named_queue> {};
template<>
struct fex_gen_config<snd_seq_get_queue_usage> {};
template<>
struct fex_gen_config<snd_seq_set_queue_usage> {};
template<>
struct fex_gen_config<snd_seq_queue_status_sizeof> {};
template<>
struct fex_gen_config<snd_seq_queue_status_malloc> {};
template<>
struct fex_gen_config<snd_seq_queue_status_free> {};
template<>
struct fex_gen_config<snd_seq_queue_status_copy> {};
template<>
struct fex_gen_config<snd_seq_queue_status_get_queue> {};
template<>
struct fex_gen_config<snd_seq_queue_status_get_events> {};
template<>
struct fex_gen_config<snd_seq_queue_status_get_tick_time> {};
template<>
struct fex_gen_config<snd_seq_queue_status_get_real_time> {};
template<>
struct fex_gen_config<snd_seq_queue_status_get_status> {};
template<>
struct fex_gen_config<snd_seq_get_queue_status> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_sizeof> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_malloc> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_free> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_copy> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_get_queue> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_get_tempo> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_get_ppq> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_get_skew> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_get_skew_base> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_set_tempo> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_set_ppq> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_set_skew> {};
template<>
struct fex_gen_config<snd_seq_queue_tempo_set_skew_base> {};
template<>
struct fex_gen_config<snd_seq_get_queue_tempo> {};
template<>
struct fex_gen_config<snd_seq_set_queue_tempo> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_sizeof> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_malloc> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_free> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_copy> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_get_queue> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_get_type> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_get_id> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_get_resolution> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_set_type> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_set_id> {};
template<>
struct fex_gen_config<snd_seq_queue_timer_set_resolution> {};
template<>
struct fex_gen_config<snd_seq_get_queue_timer> {};
template<>
struct fex_gen_config<snd_seq_set_queue_timer> {};
template<>
struct fex_gen_config<snd_seq_free_event> {};
template<>
struct fex_gen_config<snd_seq_event_length> {};
template<>
struct fex_gen_config<snd_seq_event_output> {};
template<>
struct fex_gen_config<snd_seq_event_output_buffer> {};
template<>
struct fex_gen_config<snd_seq_event_output_direct> {};
template<>
struct fex_gen_config<snd_seq_event_input> {};
template<>
struct fex_gen_config<snd_seq_event_input_pending> {};
template<>
struct fex_gen_config<snd_seq_drain_output> {};
template<>
struct fex_gen_config<snd_seq_event_output_pending> {};
template<>
struct fex_gen_config<snd_seq_extract_output> {};
template<>
struct fex_gen_config<snd_seq_drop_output> {};
template<>
struct fex_gen_config<snd_seq_drop_output_buffer> {};
template<>
struct fex_gen_config<snd_seq_drop_input> {};
template<>
struct fex_gen_config<snd_seq_drop_input_buffer> {};
template<>
struct fex_gen_config<snd_seq_remove_events_sizeof> {};
template<>
struct fex_gen_config<snd_seq_remove_events_malloc> {};
template<>
struct fex_gen_config<snd_seq_remove_events_free> {};
template<>
struct fex_gen_config<snd_seq_remove_events_copy> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_condition> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_queue> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_time> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_dest> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_channel> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_event_type> {};
template<>
struct fex_gen_config<snd_seq_remove_events_get_tag> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_condition> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_queue> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_time> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_dest> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_channel> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_event_type> {};
template<>
struct fex_gen_config<snd_seq_remove_events_set_tag> {};
template<>
struct fex_gen_config<snd_seq_remove_events> {};
template<>
struct fex_gen_config<snd_seq_set_bit> {};
template<>
struct fex_gen_config<snd_seq_unset_bit> {};
template<>
struct fex_gen_config<snd_seq_change_bit> {};
template<>
struct fex_gen_config<snd_seq_get_bit> {};
template<>
struct fex_gen_config<snd_seq_control_queue> {};
template<>
struct fex_gen_config<snd_seq_create_simple_port> {};
template<>
struct fex_gen_config<snd_seq_delete_simple_port> {};
template<>
struct fex_gen_config<snd_seq_connect_from> {};
template<>
struct fex_gen_config<snd_seq_connect_to> {};
template<>
struct fex_gen_config<snd_seq_disconnect_from> {};
template<>
struct fex_gen_config<snd_seq_disconnect_to> {};
template<>
struct fex_gen_config<snd_seq_set_client_name> {};
template<>
struct fex_gen_config<snd_seq_set_client_event_filter> {};
template<>
struct fex_gen_config<snd_seq_set_client_pool_output> {};
template<>
struct fex_gen_config<snd_seq_set_client_pool_output_room> {};
template<>
struct fex_gen_config<snd_seq_set_client_pool_input> {};
template<>
struct fex_gen_config<snd_seq_sync_output_queue> {};
template<>
struct fex_gen_config<snd_seq_parse_address> {};
template<>
struct fex_gen_config<snd_seq_reset_pool_output> {};
template<>
struct fex_gen_config<snd_seq_reset_pool_input> {};
template<>
struct fex_gen_config<snd_midi_event_new> {};
template<>
struct fex_gen_config<snd_midi_event_resize_buffer> {};
template<>
struct fex_gen_config<snd_midi_event_free> {};
template<>
struct fex_gen_config<snd_midi_event_init> {};
template<>
struct fex_gen_config<snd_midi_event_reset_encode> {};
template<>
struct fex_gen_config<snd_midi_event_reset_decode> {};
template<>
struct fex_gen_config<snd_midi_event_no_status> {};
template<>
struct fex_gen_config<snd_midi_event_encode> {};
template<>
struct fex_gen_config<snd_midi_event_encode_byte> {};
template<>
struct fex_gen_config<snd_midi_event_decode> {};


================================================
FILE: ThunkLibs/libdrm/Guest.cpp
================================================
/*
$info$
tags: thunklibs|drm
$end_info$
*/

#include <xf86drm.h>

#include <stdio.h>
#include <cstring>
#include <map>
#include <string>

#include "common/Guest.h"
#include <stdarg.h>

#include "thunkgen_guest_libdrm.inl"

extern "C" {
void FEX_malloc_free_on_host(void* Ptr) {
  struct {
    void* p;
  } args;
  args.p = Ptr;
  fexthunks_libdrm_FEX_free_on_host(&args);
}

size_t FEX_malloc_usable_size(void* Ptr) {
  struct {
    void* p;
    size_t rv;
  } args;
  args.p = Ptr;
  fexthunks_libdrm_FEX_usable_size(&args);
  return args.rv;
}

void drmMsg(const char* format, ...) {
  va_list ap;
  if (1) {
    va_start(ap, format);
    vfprintf(stderr, format, ap);
    va_end(ap);
  }
}

char* drmGetDeviceNameFromFd(int a_0) {
  auto ret = fexfn_pack_drmGetDeviceNameFromFd(a_0);

  if (ret) {
    // Usable size
    size_t Usable = FEX_malloc_usable_size(ret);

    // This will be a bit wasteful but this is an unsized pointer
    void* NewPtr = malloc(Usable);
    memcpy(NewPtr, ret, Usable);

    FEX_malloc_free_on_host(ret);
    ret = (char*)NewPtr;
  }

  return ret;
}

char* drmGetDeviceNameFromFd2(int a_0) {
  auto ret = fexfn_pack_drmGetDeviceNameFromFd2(a_0);
  if (ret) {
    // Usable size
    size_t Usable = FEX_malloc_usable_size(ret);

    // This will be a bit wasteful but this is an unsized pointer
    void* NewPtr = malloc(Usable);
    memcpy(NewPtr, ret, Usable);

    FEX_malloc_free_on_host(ret);
    ret = (char*)NewPtr;
  }

  return ret;
}

char* drmGetPrimaryDeviceNameFromFd(int a_0) {
  auto ret = fexfn_pack_drmGetPrimaryDeviceNameFromFd(a_0);
  if (ret) {
    // Usable size
    size_t Usable = FEX_malloc_usable_size(ret);

    // This will be a bit wasteful but this is an unsized pointer
    void* NewPtr = malloc(Usable);
    memcpy(NewPtr, ret, Usable);

    FEX_malloc_free_on_host(ret);
    ret = (char*)NewPtr;
  }

  return ret;
}

char* drmGetRenderDeviceNameFromFd(int a_0) {
  auto ret = fexfn_pack_drmGetRenderDeviceNameFromFd(a_0);

  if (ret) {
    // Usable size
    size_t Usable = FEX_malloc_usable_size(ret);

    // This will be a bit wasteful but this is an unsized pointer
    void* NewPtr = malloc(Usable);
    memcpy(NewPtr, ret, Usable);

    FEX_malloc_free_on_host(ret);
    ret = (char*)NewPtr;
  }

  return ret;
}
}

LOAD_LIB(libdrm)


================================================
FILE: ThunkLibs/libdrm/Host.cpp
================================================
/*
$info$
tags: thunklibs|drm
$end_info$
*/

#include <stdio.h>

#include <xf86drm.h>

#include "common/Host.h"
#include <dlfcn.h>
#include <malloc.h>

#include "thunkgen_host_libdrm.inl"

static size_t fexfn_impl_libdrm_FEX_usable_size(void* a_0) {
  return malloc_usable_size(a_0);
}

static void fexfn_impl_libdrm_FEX_free_on_host(void* a_0) {
  free(a_0);
}

EXPORTS(libdrm)


================================================
FILE: ThunkLibs/libdrm/libdrm_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <xf86drm.h>

template<auto>
struct fex_gen_config {
  unsigned version = 2;
};

template<typename>
struct fex_gen_type {};

#ifndef IS_32BIT_THUNK
// Union types with compatible data layout
template<>
struct fex_gen_type<drmDevice> : fexgen::assume_compatible_data_layout {};

// Anonymous sub-structs
template<>
struct fex_gen_type<drmStatsT> : fexgen::assume_compatible_data_layout {};

// TODO: Convert vtable
template<>
struct fex_gen_type<drmServerInfo> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<drmEventContext> : fexgen::assume_compatible_data_layout {};
#endif

size_t FEX_usable_size(void*);
void FEX_free_on_host(void*);

template<>
struct fex_gen_config<FEX_usable_size> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint {};
template<>
struct fex_gen_config<FEX_free_on_host> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint {};
template<>
struct fex_gen_config<drmIoctl> {};
template<>
struct fex_gen_config<drmGetHashTable> {};
template<>
struct fex_gen_config<drmGetEntry> {};
template<>
struct fex_gen_config<drmAvailable> {};
template<>
struct fex_gen_config<drmOpen> {};
template<>
struct fex_gen_config<drmOpenWithType> {};
template<>
struct fex_gen_config<drmOpenControl> {};
template<>
struct fex_gen_config<drmOpenRender> {};
template<>
struct fex_gen_config<drmClose> {};
template<>
struct fex_gen_config<drmGetVersion> {};
template<>
struct fex_gen_config<drmGetLibVersion> {};
template<>
struct fex_gen_config<drmGetCap> {};
template<>
struct fex_gen_config<drmFreeVersion> {};
template<>
struct fex_gen_config<drmGetMagic> {};
template<>
struct fex_gen_config<drmGetBusid> {};
template<>
struct fex_gen_config<drmGetInterruptFromBusID> {};
template<>
struct fex_gen_config<drmGetMap> {};
template<>
struct fex_gen_config<drmGetClient> {};
template<>
struct fex_gen_config<drmGetStats> {};
template<>
struct fex_gen_config<drmSetInterfaceVersion> {};
template<>
struct fex_gen_config<drmCommandNone> {};
template<>
struct fex_gen_config<drmCommandRead> {};
template<>
struct fex_gen_config<drmCommandWrite> {};
template<>
struct fex_gen_config<drmCommandWriteRead> {};
template<>
struct fex_gen_config<drmFreeBusid> {};
template<>
struct fex_gen_config<drmSetBusid> {};
template<>
struct fex_gen_config<drmAuthMagic> {};
template<>
struct fex_gen_config<drmAddMap> {};
template<>
struct fex_gen_config<drmRmMap> {};
template<>
struct fex_gen_config<drmAddContextPrivateMapping> {};
template<>
struct fex_gen_config<drmAddBufs> {};
template<>
struct fex_gen_config<drmMarkBufs> {};
template<>
struct fex_gen_config<drmCreateContext> {};
template<>
struct fex_gen_config<drmSetContextFlags> {};
template<>
struct fex_gen_config<drmGetContextFlags> {};
template<>
struct fex_gen_config<drmAddContextTag> {};
template<>
struct fex_gen_config<drmDelContextTag> {};
template<>
struct fex_gen_config<drmGetContextTag> {};
template<>
struct fex_gen_config<drmGetReservedContextList> {};
template<>
struct fex_gen_config<drmFreeReservedContextList> {};
template<>
struct fex_gen_config<drmSwitchToContext> {};
template<>
struct fex_gen_config<drmDestroyContext> {};
template<>
struct fex_gen_config<drmCreateDrawable> {};
template<>
struct fex_gen_config<drmDestroyDrawable> {};
template<>
struct fex_gen_config<drmUpdateDrawableInfo> {};
template<>
struct fex_gen_config<drmCtlInstHandler> {};
template<>
struct fex_gen_config<drmCtlUninstHandler> {};
template<>
struct fex_gen_config<drmSetClientCap> {};
template<>
struct fex_gen_config<drmCrtcGetSequence> {};
template<>
struct fex_gen_config<drmCrtcQueueSequence> {};
template<>
struct fex_gen_config<drmMap> {};
template<>
struct fex_gen_config<drmUnmap> {};
template<>
struct fex_gen_config<drmGetBufInfo> {};
template<>
struct fex_gen_config<drmMapBufs> {};
template<>
struct fex_gen_config<drmUnmapBufs> {};
template<>
struct fex_gen_config<drmDMA> {};
template<>
struct fex_gen_config<drmFreeBufs> {};
template<>
struct fex_gen_config<drmGetLock> {};
template<>
struct fex_gen_config<drmUnlock> {};
template<>
struct fex_gen_config<drmFinish> {};
template<>
struct fex_gen_config<drmGetContextPrivateMapping> {};
template<>
struct fex_gen_config<drmScatterGatherAlloc> {};
template<>
struct fex_gen_config<drmScatterGatherFree> {};
template<>
struct fex_gen_config<drmWaitVBlank> {};
template<>
struct fex_gen_config<drmSetServerInfo> {};
template<>
struct fex_gen_config<drmError> {};
template<>
struct fex_gen_config<drmMalloc> {};
template<>
struct fex_gen_config<drmFree> {};
template<>
struct fex_gen_config<drmHashCreate> {};
template<>
struct fex_gen_config<drmHashDestroy> {};
template<>
struct fex_gen_config<drmHashLookup> {};
template<>
struct fex_gen_config<drmHashInsert> {};
template<>
struct fex_gen_config<drmHashDelete> {};
template<>
struct fex_gen_config<drmHashFirst> {};
template<>
struct fex_gen_config<drmHashNext> {};
template<>
struct fex_gen_config<drmRandomCreate> {};
template<>
struct fex_gen_config<drmRandomDestroy> {};
template<>
struct fex_gen_config<drmRandom> {};
template<>
struct fex_gen_config<drmRandomDouble> {};
template<>
struct fex_gen_config<drmSLCreate> {};
template<>
struct fex_gen_config<drmSLDestroy> {};
template<>
struct fex_gen_config<drmSLLookup> {};
template<>
struct fex_gen_config<drmSLInsert> {};
template<>
struct fex_gen_config<drmSLDelete> {};
template<>
struct fex_gen_config<drmSLNext> {};
template<>
struct fex_gen_config<drmSLFirst> {};
template<>
struct fex_gen_config<drmSLDump> {};
template<>
struct fex_gen_config<drmSLLookupNeighbors> {};
template<>
struct fex_gen_config<drmOpenOnce> {};
template<>
struct fex_gen_config<drmOpenOnceWithType> {};
template<>
struct fex_gen_config<drmCloseOnce> {};
template<>
struct fex_gen_config<drmSetMaster> {};
template<>
struct fex_gen_config<drmDropMaster> {};
template<>
struct fex_gen_config<drmIsMaster> {};
template<>
struct fex_gen_config<drmHandleEvent> {};
template<>
struct fex_gen_config<drmGetDeviceNameFromFd> : fexgen::custom_guest_entrypoint {};
template<>
struct fex_gen_config<drmGetDeviceNameFromFd2> : fexgen::custom_guest_entrypoint {};

template<>
struct fex_gen_config<drmGetNodeTypeFromFd> {};
template<>
struct fex_gen_config<drmPrimeHandleToFD> {};
template<>
struct fex_gen_config<drmPrimeFDToHandle> {};
template<>
struct fex_gen_config<drmGetPrimaryDeviceNameFromFd> : fexgen::custom_guest_entrypoint {};
template<>
struct fex_gen_config<drmGetRenderDeviceNameFromFd> : fexgen::custom_guest_entrypoint {};

template<>
struct fex_gen_config<drmGetDevice> {};
template<>
struct fex_gen_config<drmFreeDevice> {};
template<>
struct fex_gen_config<drmGetDevices> {};
template<>
struct fex_gen_config<drmFreeDevices> {};
template<>
struct fex_gen_config<drmGetDevice2> {};
template<>
struct fex_gen_config<drmGetDevices2> {};
template<>
struct fex_gen_config<drmDevicesEqual> {};
template<>
struct fex_gen_config<drmSyncobjCreate> {};
template<>
struct fex_gen_config<drmSyncobjDestroy> {};
template<>
struct fex_gen_config<drmSyncobjHandleToFD> {};
template<>
struct fex_gen_config<drmSyncobjFDToHandle> {};
template<>
struct fex_gen_config<drmSyncobjImportSyncFile> {};
template<>
struct fex_gen_config<drmSyncobjExportSyncFile> {};
template<>
struct fex_gen_config<drmSyncobjWait> {};
template<>
struct fex_gen_config<drmSyncobjReset> {};
template<>
struct fex_gen_config<drmSyncobjSignal> {};
template<>
struct fex_gen_config<drmSyncobjTimelineSignal> {};
template<>
struct fex_gen_config<drmSyncobjTimelineWait> {};
template<>
struct fex_gen_config<drmSyncobjQuery> {};
template<>
struct fex_gen_config<drmSyncobjQuery2> {};
template<>
struct fex_gen_config<drmSyncobjTransfer> {};


================================================
FILE: ThunkLibs/libfex_malloc/Guest.cpp
================================================
/*
$info$
tags: thunklibs|fex_malloc
desc: Handles allocations between guest and host thunks
$end_info$
*/

#include <cstring>
#include <map>
#include <string>

#include "common/Guest.h"
#include <stdarg.h>

#include "Types.h"

#include "thunkgen_guest_libfex_malloc.inl"

#include <vector>

extern "C" {
void fex_malloc_NoOptimize() {
  // Does nothing, just ensures our libraries pull it in
}

#define ALIAS(fn) __attribute__((alias(#fn), used))
#define PREALIAS(fn) ALIAS(fn)


void* __libc_calloc(size_t n, size_t size) PREALIAS(fexfn_pack_calloc);

void __libc_free(void* ptr) PREALIAS(fexfn_pack_free);

void* __libc_malloc(size_t size) PREALIAS(fexfn_pack_malloc);

void* __libc_memalign(size_t align, size_t s) PREALIAS(fexfn_pack_memalign);

void* __libc_realloc(void* ptr, size_t size) PREALIAS(fexfn_pack_realloc);

void* __libc_valloc(size_t size) PREALIAS(fexfn_pack_valloc);

int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(fexfn_pack_posix_memalign);

// If we replace libc malloc and an application calls the malloc_usable_size then we can get a crash
// Symbol doesn't alias exactly so just wrap it

size_t __malloc_usable_size(void* ptr) {
  return fexfn_pack_malloc_usable_size(ptr);
}
}

LOAD_LIB(libfex_malloc)


================================================
FILE: ThunkLibs/libfex_malloc/Host.cpp
================================================
/*
$info$
tags: thunklibs|fex_malloc
desc: Handles allocations between guest and host thunks
$end_info$
*/

#include <cstring>
#include <cstdlib>
#include <stdio.h>
#include <memory.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "Types.h"

#include "thunkgen_host_libfex_malloc.inl"

void fexfn_impl_libfex_malloc_fex_get_allocation_ptrs(AllocationPtrs* Ptrs);

extern "C" {
// FEX allocation routines
extern MallocPtr FEX_Malloc_Ptr;
extern FreePtr FEX_Free_Ptr;
extern CallocPtr FEX_Calloc_Ptr;
extern MemalignPtr FEX_Memalign_Ptr;
extern ReallocPtr FEX_Realloc_Ptr;
extern VallocPtr FEX_Valloc_Ptr;
extern PosixMemalignPtr FEX_PosixMemalign_Ptr;
extern AlignedAllocPtr FEX_AlignedAlloc_Ptr;
extern MallocUsablePtr FEX_MallocUsable_Ptr;
}

extern "C" {
AllocationPtrs AllocationPointers {
  .Malloc = FEX_Malloc_Ptr,
  .Free = FEX_Free_Ptr,
  .Calloc = FEX_Calloc_Ptr,
  .Memalign = FEX_Memalign_Ptr,
  .Realloc = FEX_Realloc_Ptr,
  .Valloc = FEX_Valloc_Ptr,
  .PosixMemalign = FEX_PosixMemalign_Ptr,
  .AlignedAlloc = FEX_AlignedAlloc_Ptr,
  .MallocUsable = FEX_MallocUsable_Ptr,
};

// Our allocators
#define ALIAS(fn) __attribute__((alias(#fn), used))
#define PREALIAS(fn) ALIAS(fn)

void* fex_malloc(size_t Size) {
  return AllocationPointers.Malloc(Size);
}
void* __libc_malloc(size_t Size) __attribute__((alias("fex_malloc"), used));
void* malloc(size_t Size) __attribute__((alias("fex_malloc"), used));

void fex_free(void* p) {
  AllocationPointers.Free(p);
}
void __libc_free(void* ptr) PREALIAS(fex_free);
void __GI___libc_free(void* ptr) PREALIAS(fex_free);
void free(void* ptr) PREALIAS(fex_free);

void* fex_calloc(size_t n, size_t size) {
  return AllocationPointers.Calloc(n, size);
}
void* __libc_calloc(size_t n, size_t size) PREALIAS(fex_calloc);
void* calloc(size_t n, size_t size) PREALIAS(fex_calloc);

void* fex_memalign(size_t align, size_t s) {
  return AllocationPointers.Memalign(align, s);
}
void* __libc_memalign(size_t align, size_t s) PREALIAS(fex_memalign);
void* memalign(size_t align, size_t s) PREALIAS(fex_memalign);

void* fex_realloc(void* ptr, size_t size) {
  return AllocationPointers.Realloc(ptr, size);
}
void* __libc_realloc(void* ptr, size_t size) PREALIAS(fex_realloc);
void* realloc(void* ptr, size_t size) PREALIAS(fex_realloc);

void* fex_valloc(size_t size) {
  return AllocationPointers.Valloc(size);
}
void* __libc_valloc(size_t size) PREALIAS(fex_valloc);
void* valloc(size_t size) PREALIAS(fex_valloc);

int fex_posix_memalign(void** r, size_t a, size_t s) {
  return AllocationPointers.PosixMemalign(r, a, s);
}
int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(fex_posix_memalign);
int posix_memalign(void** r, size_t a, size_t s) PREALIAS(fex_posix_memalign);

void* fex_aligned_alloc(size_t a, size_t s) {
  return AllocationPointers.AlignedAlloc(a, s);
}
void* aligned_alloc(size_t a, size_t s) PREALIAS(fex_aligned_alloc);

size_t fex_malloc_usable_size(void* ptr) {
  return AllocationPointers.MallocUsable(ptr);
}

size_t __malloc_usable_size(void* ptr) {
  return fex_malloc_usable_size(ptr);
}
size_t malloc_usable_size(void* ptr) {
  return fex_malloc_usable_size(ptr);
}

static void fexfn_unpack_libfex_malloc_malloc(void* argsv) {
  struct arg_t {
    size_t a_0;
    void* rv;
  };
  auto args = (arg_t*)argsv;
  args->rv = AllocationPointers.Malloc(args->a_0);
}

static void fexfn_unpack_libfex_malloc_free(void* argsv) {
  struct arg_t {
    void* a_0;
  };
  auto args = (arg_t*)argsv;
  AllocationPointers.Free(args->a_0);
}

static void fexfn_unpack_libfex_malloc_calloc(void* argsv) {
  struct arg_t {
    size_t a_0;
    size_t a_1;
    void* rv;
  };
  auto args = (arg_t*)argsv;
  args->rv = AllocationPointers.Calloc(args->a_0, args->a_1);
}
static void fexfn_unpack_libfex_malloc_memalign(void* argsv) {
  struct arg_t {
    size_t a_0;
    size_t a_1;
    void* rv;
  };
  auto args = (arg_t*)argsv;
  args->rv = AllocationPointers.Memalign(args->a_0, args->a_1);
}
static void fexfn_unpack_libfex_malloc_realloc(void* argsv) {
  struct arg_t {
    void* a_0;
    size_t a_1;
    void* rv;
  };
  auto args = (arg_t*)argsv;
  args->rv = AllocationPointers.Realloc(args->a_0, args->a_1);
}
static void fexfn_unpack_libfex_malloc_valloc(void* argsv) {
  struct arg_t {
    size_t a_0;
    void* rv;
  };
  auto args = (arg_t*)argsv;

  args->rv = AllocationPointers.Valloc(args->a_0);
}
static void fexfn_unpack_libfex_malloc_posix_memalign(void* argsv) {
  struct arg_t {
    void** a_0;
    size_t a_1;
    size_t a_2;
    int rv;
  };
  auto args = (arg_t*)argsv;

  args->rv = AllocationPointers.PosixMemalign(args->a_0, args->a_1, args->a_2);
}
static void fexfn_unpack_libfex_malloc_aligned_alloc(void* argsv) {
  struct arg_t {
    size_t a_0;
    size_t a_1;
    void* rv;
  };
  auto args = (arg_t*)argsv;

  args->rv = AllocationPointers.AlignedAlloc(args->a_0, args->a_1);
}
static void fexfn_unpack_libfex_malloc_malloc_usable_size(void* argsv) {
  struct arg_t {
    void* a_0;
    size_t rv;
  };
  auto args = (arg_t*)argsv;

  args->rv = AllocationPointers.MallocUsable(args->a_0);
}

void (*__free_hook)(void* ptr) = fex_free;
void* (*__malloc_hook)(size_t size) = fex_malloc;
void* (*__realloc_hook)(void* ptr, size_t size) = fex_realloc;
void* (*__memalign_hook)(size_t alignment, size_t size) = fex_memalign;
}

void fexfn_impl_libfex_malloc_fex_get_allocation_ptrs(AllocationPtrs* Ptrs) {
  *Ptrs = AllocationPointers;
}

static void init_lib() {
  // Set pointers
  AllocationPointers.Malloc = FEX_Malloc_Ptr;
  AllocationPointers.Free = FEX_Free_Ptr;
  AllocationPointers.Calloc = FEX_Calloc_Ptr;
  AllocationPointers.Memalign = FEX_Memalign_Ptr;
  AllocationPointers.Realloc = FEX_Realloc_Ptr;
  AllocationPointers.Valloc = FEX_Valloc_Ptr;
  AllocationPointers.PosixMemalign = FEX_PosixMemalign_Ptr;
  AllocationPointers.AlignedAlloc = FEX_AlignedAlloc_Ptr;
  AllocationPointers.MallocUsable = FEX_MallocUsable_Ptr;
}

EXPORTS(libfex_malloc)
LOAD_LIB_INIT(init_lib)


================================================
FILE: ThunkLibs/libfex_malloc/Types.h
================================================
#pragma once

#include <cstddef>

using MallocPtr = void* (*)(size_t);
using FreePtr = void (*)(void*);
using CallocPtr = void* (*)(size_t, size_t);
using MemalignPtr = void* (*)(size_t, size_t);
using ReallocPtr = void* (*)(void*, size_t);
using VallocPtr = void* (*)(size_t);
using PosixMemalignPtr = int (*)(void**, size_t, size_t);
using AlignedAllocPtr = void* (*)(size_t, size_t);
using MallocUsablePtr = size_t (*)(void*);

struct AllocationPtrs {
  MallocPtr Malloc;
  FreePtr Free;
  CallocPtr Calloc;
  MemalignPtr Memalign;
  ReallocPtr Realloc;
  VallocPtr Valloc;
  PosixMemalignPtr PosixMemalign;
  AlignedAllocPtr AlignedAlloc;
  MallocUsablePtr MallocUsable;
};


================================================
FILE: ThunkLibs/libfex_malloc_loader/Guest.cpp
================================================
/*
$info$
tags: thunklibs|fex_malloc_loader
desc: Delays malloc symbol replacement until it is safe to run constructors
$end_info$
*/

#include <stdio.h>
#include <dlfcn.h>
extern "C" {
__attribute__((constructor)) static void loadlib() {
  fprintf(stderr, "Time to load mallocs\n");
  dlopen("/mnt/Work/Work/work/FEXNew/Build/Guest/libfex_malloc-guest.so", RTLD_GLOBAL | RTLD_NOW | RTLD_NODELETE | RTLD_DEEPBIND);
}
}


================================================
FILE: ThunkLibs/libfex_malloc_symbols/Host.cpp
================================================
/*
$info$
tags: thunklibs|fex_malloc_symbols
desc: Allows FEX to export allocation symbols
$end_info$
*/

#include <cstring>
#include <cstdlib>
#include <stdio.h>
#include <memory.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "../libfex_malloc/Types.h"

extern "C" {
// FEX allocation routines
MallocPtr FEX_Malloc_Ptr = (MallocPtr)0x4142434445464748ULL;
FreePtr FEX_Free_Ptr = (FreePtr)0x4142434445464748ULL;
CallocPtr FEX_Calloc_Ptr = (CallocPtr)0x4142434445464748ULL;
MemalignPtr FEX_Memalign_Ptr = (MemalignPtr)0x4142434445464748ULL;
ReallocPtr FEX_Realloc_Ptr = (ReallocPtr)0x4142434445464748ULL;
VallocPtr FEX_Valloc_Ptr = (VallocPtr)0x4142434445464748ULL;
PosixMemalignPtr FEX_PosixMemalign_Ptr = (PosixMemalignPtr)0x4142434445464748ULL;
AlignedAllocPtr FEX_AlignedAlloc_Ptr = (AlignedAllocPtr)0x4142434445464748ULL;
MallocUsablePtr FEX_MallocUsable_Ptr = (MallocUsablePtr)0x4142434445464748ULL;
}


================================================
FILE: ThunkLibs/libfex_thunk_test/Guest.cpp
================================================
/*
$info$
tags: thunklibs|fex_thunk_test
$end_info$
*/

#include "common/Guest.h"
#include "api.h"

#include "thunkgen_guest_libfex_thunk_test.inl"

LOAD_LIB(libfex_thunk_test)


================================================
FILE: ThunkLibs/libfex_thunk_test/Host.cpp
================================================
/*
$info$
tags: thunklibs|fex_thunk_test
$end_info$
*/

#include <dlfcn.h>

#include <unordered_map>

#include "common/Host.h"

#include "api.h"

#include "thunkgen_host_libfex_thunk_test.inl"

static uint32_t fexfn_impl_libfex_thunk_test_QueryOffsetOf(guest_layout<ReorderingType*> data, int index) {
  if (index == 0) {
    return offsetof(guest_layout<ReorderingType>::type, a);
  } else {
    return offsetof(guest_layout<ReorderingType>::type, b);
  }
}

void fex_custom_repack_entry(host_layout<CustomRepackedType>& to, const guest_layout<CustomRepackedType>& from) {
  to.data.custom_repack_invoked = 1;
}

bool fex_custom_repack_exit(guest_layout<CustomRepackedType>& to, const host_layout<CustomRepackedType>& from) {
  return false;
}

template<StructType TypeIndex, typename Type>
static const TestBaseStruct* convert(const TestBaseStruct* source) {
  // Using malloc here since no easily available type information is available at the time of destruction.
  auto guest_next = reinterpret_cast<guest_layout<Type>*>((void*)source);
  auto child_mem = (char*)aligned_alloc(alignof(host_layout<Type>), sizeof(host_layout<Type>));
  auto child = new (child_mem) host_layout<Type> {*guest_next};

  fex_custom_repack_entry(*child, *reinterpret_cast<guest_layout<Type>*>((void*)(source)));

  return (const TestBaseStruct*)child;
}

template<StructType TypeIndex, typename Type>
static void convert_to_guest(void* into, const TestBaseStruct* from) {
  auto typed_into = (guest_layout<Type>*)into;
  auto oldNext = typed_into->data.Next;
  *typed_into = to_guest(to_host_layout(*(Type*)from));
  typed_into->data.Next = oldNext;

  fex_custom_repack_exit(*typed_into, to_host_layout(*(Type*)from));
}

template<StructType TypeIndex, typename Type>
inline constexpr std::pair<StructType, std::pair<const TestBaseStruct* (*)(const TestBaseStruct*), void (*)(void*, const TestBaseStruct*)>> converters = {
  TypeIndex,
  {convert<TypeIndex, Type>, convert_to_guest<TypeIndex, Type>}};

static std::unordered_map<StructType, std::pair<const TestBaseStruct* (*)(const TestBaseStruct*), void (*)(void*, const TestBaseStruct*)>> next_handlers {
  converters<StructType::Struct1, TestStruct1>,
  converters<StructType::Struct2, TestStruct2>,
};

static void default_fex_custom_repack_entry(TestBaseStruct& into, const guest_layout<TestBaseStruct>* from) {
  if (!from->data.Next.get_pointer()) {
    into.Next = nullptr;
    return;
  }
  auto typed_source = reinterpret_cast<const guest_layout<TestBaseStruct>*>(from->data.Next.get_pointer());

  auto next_handler = next_handlers.at(StructType {typed_source->data.Type.data});

  into.Next = (TestBaseStruct*)next_handler.first((const TestBaseStruct*)typed_source);
}

static void default_fex_custom_repack_reverse(guest_layout<TestBaseStruct>& into, const TestBaseStruct* from) {
  auto NextHost = from->Next;
  if (!NextHost) {
    return;
  }

  auto next_handler = next_handlers.at(static_cast<StructType>(into.data.Next.get_pointer()->data.Type.data));
  next_handler.second((void*)into.data.Next.get_pointer(), from->Next);

  free((void*)NextHost);
}

#define CREATE_INFO_DEFAULT_CUSTOM_REPACK(name)                                                                                  \
  void fex_custom_repack_entry(host_layout<name>& into, const guest_layout<name>& from) {                                        \
    default_fex_custom_repack_entry(*(TestBaseStruct*)&into.data, reinterpret_cast<const guest_layout<TestBaseStruct>*>(&from)); \
  }                                                                                                                              \
                                                                                                                                 \
  bool fex_custom_repack_exit(guest_layout<name>& into, const host_layout<name>& from) {                                         \
    auto prev_next = into.data.Next;                                                                                             \
    default_fex_custom_repack_reverse(*reinterpret_cast<guest_layout<TestBaseStruct>*>(&into),                                   \
                                      &reinterpret_cast<const TestBaseStruct&>(from.data));                                      \
    into = to_guest(from);                                                                                                       \
    into.data.Next = prev_next;                                                                                                  \
    return true;                                                                                                                 \
  }

CREATE_INFO_DEFAULT_CUSTOM_REPACK(TestStruct1)
CREATE_INFO_DEFAULT_CUSTOM_REPACK(TestStruct2)

void fex_custom_repack_entry(host_layout<TestBaseStruct>&, const guest_layout<TestBaseStruct>&) {
  std::abort();
}

bool fex_custom_repack_exit(guest_layout<TestBaseStruct>&, const host_layout<TestBaseStruct>&) {
  std::abort();
  return false;
}

EXPORTS(libfex_thunk_test)


================================================
FILE: ThunkLibs/libfex_thunk_test/api.h
================================================
/**
 * This file defines interfaces of a dummy library used to test various
 * features of the thunk generator.
 */
#pragma once

#include <cstdint>
#include <limits>

extern "C" {

uint32_t GetDoubledValue(uint32_t);


/// Interfaces used to test opaque_type and assume_compatible_data_layout annotations

struct OpaqueType;

OpaqueType* MakeOpaqueType(uint32_t data);
uint32_t ReadOpaqueTypeData(OpaqueType*);
void DestroyOpaqueType(OpaqueType*);

union UnionType {
  uint32_t a;
  int32_t b;
  uint8_t c[4];
};

UnionType MakeUnionType(uint8_t a, uint8_t b, uint8_t c, uint8_t d);
uint32_t GetUnionTypeA(UnionType*);


/// Interfaces used to test automatic struct repacking

// A simple struct with data layout that differs between guest and host.
// The thunk generator should emit code that swaps the member data into
// correct position.
struct ReorderingType {
#if !defined(GUEST_THUNK_LIBRARY)
  uint32_t a;
  uint32_t b;
#else
  uint32_t b;
  uint32_t a;
#endif
};

ReorderingType MakeReorderingType(uint32_t a, uint32_t b);
uint32_t GetReorderingTypeMember(const ReorderingType*, int index);
void ModifyReorderingTypeMembers(ReorderingType* data);
uint32_t QueryOffsetOf(ReorderingType*, int index);

// Uses assume_compatible_data_layout to skip repacking
uint32_t GetReorderingTypeMemberWithoutRepacking(const ReorderingType*, int index);

/// Interfaces used to test assisted struct repacking

// We enable custom repacking on the "data" member, with repacking code that
// sets the first bit of "custom_repack_invoked" to 1 on entry.
struct CustomRepackedType {
  ReorderingType* data;
  int custom_repack_invoked;
};

// Should return true if the custom repacker set "custom_repack_invoked" to true
int RanCustomRepack(CustomRepackedType*);

/// Interface used to check that function arguments with different integer size
/// get forwarded correctly

#if !defined(GUEST_THUNK_LIBRARY)
enum DivType : uint8_t {};
#else
enum DivType : uint32_t {};
#endif
int FunctionWithDivergentSignature(DivType, DivType, DivType, DivType);


/// Interfaces used to test Vulkan-like APIs

// Equivalent of VkStructureType
enum class StructType {
  Struct1,
  Struct2,
};

// Equivalent of VkBaseInStructure
struct TestBaseStruct {
  TestBaseStruct* Next;
  StructType Type;
};

// Equivalent of e.g. VkImageCreateInfo
struct TestStruct1 {
  const void* Next;
  StructType Type; // StructType::Struct1
  uint8_t Data2;
  uint8_t pad0[3];
  int Data1;
};

struct TestStruct2 {
  const void* Next;
  StructType Type; // StructType::Struct2
  int Data1;
};

int ReadData1(TestStruct1*, int depth);
}


================================================
FILE: ThunkLibs/libfex_thunk_test/lib.cpp
================================================
#include "api.h"

#include <cstdio>
#include <cstddef>

extern "C" {

uint32_t GetDoubledValue(uint32_t input) {
  return 2 * input;
}

struct OpaqueType {
  uint32_t data;
};

OpaqueType* MakeOpaqueType(uint32_t data) {
  return new OpaqueType {data};
}

uint32_t ReadOpaqueTypeData(OpaqueType* value) {
  return value->data;
}

void DestroyOpaqueType(OpaqueType* value) {
  delete value;
}

UnionType MakeUnionType(uint8_t a, uint8_t b, uint8_t c, uint8_t d) {
  return UnionType {.c = {a, b, c, d}};
}

uint32_t GetUnionTypeA(UnionType* value) {
  return value->a;
}

ReorderingType MakeReorderingType(uint32_t a, uint32_t b) {
  return ReorderingType {.a = a, .b = b};
}

uint32_t GetReorderingTypeMember(const ReorderingType* data, int index) {
  if (index == 0) {
    return data->a;
  } else {
    return data->b;
  }
}

uint32_t GetReorderingTypeMemberWithoutRepacking(const ReorderingType* data, int index) {
  return GetReorderingTypeMember(data, index);
}

void ModifyReorderingTypeMembers(ReorderingType* data) {
  data->a += 1;
  data->b += 2;
}

int RanCustomRepack(CustomRepackedType* data) {
  return data->custom_repack_invoked;
}

int FunctionWithDivergentSignature(DivType a, DivType b, DivType c, DivType d) {
  return ((uint8_t)a << 24) | ((uint8_t)b << 16) | ((uint8_t)c << 8) | (uint8_t)d;
}

int ReadData1(TestStruct1* data, int depth) {
  auto* base = (TestBaseStruct*)data;
  for (int i = 0; i != depth; ++i) {
    if (!base) {
      return -1;
    }
    base = base->Next;
  }
  if (!base) {
    return -1;
  }

  switch (base->Type) {
  case StructType::Struct1: return ((TestStruct1*)base)->Data1;

  case StructType::Struct2: return ((TestStruct2*)base)->Data1;

  default: return -2;
  }
}

} // extern "C"


================================================
FILE: ThunkLibs/libfex_thunk_test/libfex_thunk_test_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include "api.h"

template<auto>
struct fex_gen_config {};

template<typename>
struct fex_gen_type {};

template<auto, int, typename>
struct fex_gen_param {};

template<>
struct fex_gen_config<GetDoubledValue> {};

template<>
struct fex_gen_type<OpaqueType> : fexgen::opaque_type {};
template<>
struct fex_gen_config<MakeOpaqueType> {};
template<>
struct fex_gen_config<ReadOpaqueTypeData> {};
template<>
struct fex_gen_config<DestroyOpaqueType> {};

template<>
struct fex_gen_type<UnionType> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<MakeUnionType> {};
template<>
struct fex_gen_config<GetUnionTypeA> {};

template<>
struct fex_gen_config<MakeReorderingType> {};
template<>
struct fex_gen_config<GetReorderingTypeMember> {};
template<>
struct fex_gen_config<GetReorderingTypeMemberWithoutRepacking> {};
template<>
struct fex_gen_param<GetReorderingTypeMemberWithoutRepacking, 0, const ReorderingType*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<ModifyReorderingTypeMembers> {};

template<>
struct fex_gen_config<QueryOffsetOf> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<QueryOffsetOf, 0, ReorderingType*> : fexgen::ptr_passthrough {};

template<>
struct fex_gen_config<&CustomRepackedType::data> : fexgen::custom_repack {};
template<>
struct fex_gen_config<RanCustomRepack> {};

template<>
struct fex_gen_config<FunctionWithDivergentSignature> {};

template<>
struct fex_gen_config<&TestBaseStruct::Next> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&TestStruct1::Next> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&TestStruct2::Next> : fexgen::custom_repack {};

template<>
struct fex_gen_config<ReadData1> {};


================================================
FILE: ThunkLibs/libvulkan/Guest.cpp
================================================
/*
$info$
tags: thunklibs|Vulkan
$end_info$
*/

#define VK_USE_64_BIT_PTR_DEFINES 0

#define VK_USE_PLATFORM_XLIB_XRANDR_EXT
#define VK_USE_PLATFORM_XLIB_KHR
#define VK_USE_PLATFORM_XCB_KHR
#define VK_USE_PLATFORM_WAYLAND_KHR
#include <vulkan/vulkan.h>

#include "common/Guest.h"

#include <cstdio>
#include <dlfcn.h>
#include <functional>
#include <string_view>
#include <unordered_map>

#include "thunkgen_guest_libvulkan.inl"

extern "C" {

// Maps Vulkan API function names to the address of a guest function which is
// linked to the corresponding host function pointer
const std::unordered_map<std::string_view, uintptr_t /* guest function address */> HostPtrInvokers = std::invoke([]() {
#define PAIR(name, unused) Ret[#name] = reinterpret_cast<uintptr_t>(GetCallerForHostFunction(name));
  std::unordered_map<std::string_view, uintptr_t> Ret;
  FOREACH_internal_SYMBOL(PAIR);
  return Ret;
#undef PAIR
});

// This variable controls the behavior of vkGetDevice/InstanceProcAddr for functions we don't know the signature of:
// - if false (default), we return a nullptr (since the application might have a fallback code path)
// - if true, we return a stub function that fatally errors upon being called
constexpr bool stub_unknown_functions = false;

// Fatally erroring function with a thunk-like interface. This is used as a placeholder for unknown Vulkan functions
[[noreturn]]
static void FatalError(void* raw_args) {
  auto called_function = reinterpret_cast<PackedArguments<void, uintptr_t>*>(raw_args)->a0;
  fprintf(stderr, "FATAL: Called unknown Vulkan function at address %p\n", reinterpret_cast<void*>(called_function));
  __builtin_trap();
}

static PFN_vkVoidFunction MakeGuestCallable(const char* origin, PFN_vkVoidFunction func, const char* name) {
  auto It = HostPtrInvokers.find(name);
  if (It == HostPtrInvokers.end()) {
    fprintf(stderr, "%s: Unknown Vulkan function at address %p: %s\n", origin, func, name);
    if (stub_unknown_functions) {
      const auto StubHostPtrInvoker = CallHostFunction<FatalError, void>;
      LinkAddressToFunction((uintptr_t)func, reinterpret_cast<uintptr_t>(StubHostPtrInvoker));
      return func;
    }
    return nullptr;
  }
  fprintf(stderr, "Linking address %p to host invoker %#zx\n", func, It->second);
  LinkAddressToFunction((uintptr_t)func, It->second);
  return func;
}

PFN_vkVoidFunction vkGetDeviceProcAddr(VkDevice a_0, const char* a_1) {
  auto Ret = fexfn_pack_vkGetDeviceProcAddr(a_0, a_1);
  if (!Ret) {
    return nullptr;
  }
  return MakeGuestCallable(__FUNCTION__, Ret, a_1);
}

PFN_vkVoidFunction vkGetInstanceProcAddr(VkInstance a_0, const char* a_1) {
  if (a_1 == std::string_view {"vkGetDeviceProcAddr"}) {
    return (PFN_vkVoidFunction)vkGetDeviceProcAddr;
  } else {
    auto Ret = fexfn_pack_vkGetInstanceProcAddr(a_0, a_1);
    if (!Ret) {
      return nullptr;
    }
    return MakeGuestCallable(__FUNCTION__, Ret, a_1);
  }
}
}

void OnInit() {
  // TODO: Load libX11 on-demand instead
  void* libx11 = dlopen("libX11.so.6", RTLD_LAZY);
  fexfn_pack_Vulkan_SetGuestXSync((uintptr_t)dlsym(libx11, "XSync"), (uintptr_t)CallbackUnpack<decltype(XSync)>::Unpack);
  fexfn_pack_Vulkan_SetGuestXGetVisualInfo((uintptr_t)dlsym(libx11, "XGetVisualInfo"), (uintptr_t)CallbackUnpack<decltype(XGetVisualInfo)>::Unpack);
  fexfn_pack_Vulkan_SetGuestXDisplayString((uintptr_t)dlsym(libx11, "XDisplayString"), (uintptr_t)CallbackUnpack<decltype(XDisplayString)>::Unpack);
}

LOAD_LIB_INIT(libvulkan, OnInit)


================================================
FILE: ThunkLibs/libvulkan/Host.cpp
================================================
/*
$info$
tags: thunklibs|Vulkan
$end_info$
*/

#define VK_USE_64_BIT_PTR_DEFINES 0

#define VK_USE_PLATFORM_XLIB_XRANDR_EXT
#define VK_USE_PLATFORM_XLIB_KHR
#define VK_USE_PLATFORM_XCB_KHR
#define VK_USE_PLATFORM_WAYLAND_KHR
#include <vulkan/vulkan.h>

#include "common/Host.h"

#include <cassert>
#include <cstring>
#include <mutex>
#include <span>
#include <string_view>
#include <unordered_map>
#include <vector>

#ifdef IS_32BIT_THUNK
// Union type embedded in VkDescriptorGetInfoEXT
template<>
struct guest_layout<VkDescriptorDataEXT> {
  char union_storage[8];
};
#endif

#include "thunkgen_host_libvulkan.inl"

#include <common/X11Manager.h>

static bool SetupInstance {};
static std::mutex SetupMutex {};

#define LDR_PTR(fn) fexldr_ptr_libvulkan_##fn

static void DoSetupWithInstance(VkInstance instance) {
  std::unique_lock lk {SetupMutex};

  // Needed since the Guest-endpoint calls without a function pointer
  // TODO: Support use of multiple instances
  (void*&)LDR_PTR(vkGetDeviceProcAddr) = (void*)LDR_PTR(vkGetInstanceProcAddr)(instance, "vkGetDeviceProcAddr");
  if (LDR_PTR(vkGetDeviceProcAddr) == nullptr) {
    std::abort();
  }

  // Query pointers for non-EXT functions customized below
  (void*&)LDR_PTR(vkCreateDevice) = (void*)LDR_PTR(vkGetInstanceProcAddr)(instance, "vkCreateDevice");

  // Only do this lookup once.
  // NOTE: If vkGetInstanceProcAddr was called with a null instance, only a few function pointers will be filled with non-null values, so we do repeat the lookup in that case
  if (instance) {
    SetupInstance = true;
  }
}

#define FEXFN_IMPL(fn) fexfn_impl_libvulkan_##fn

static X11Manager x11_manager;

static void fexfn_impl_libvulkan_Vulkan_SetGuestXGetVisualInfo(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXGetVisualInfo);
}

static void fexfn_impl_libvulkan_Vulkan_SetGuestXSync(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXSync);
}

static void fexfn_impl_libvulkan_Vulkan_SetGuestXDisplayString(uintptr_t GuestTarget, uintptr_t GuestUnpacker) {
  MakeHostTrampolineForGuestFunctionAt(GuestTarget, GuestUnpacker, &x11_manager.GuestXDisplayString);
}

void fex_custom_repack_entry(host_layout<VkXcbSurfaceCreateInfoKHR>& to, const guest_layout<VkXcbSurfaceCreateInfoKHR>& from) {
  // TODO: xcb_aux_sync?
  to.data.connection = x11_manager.GuestToHostConnection(const_cast<xcb_connection_t*>(from.data.connection.force_get_host_pointer()));
}

bool fex_custom_repack_exit(guest_layout<VkXcbSurfaceCreateInfoKHR>&, const host_layout<VkXcbSurfaceCreateInfoKHR>&) {
  // TODO: xcb_sync?
  return false;
}

void fex_custom_repack_entry(host_layout<VkXlibSurfaceCreateInfoKHR>& to, const guest_layout<VkXlibSurfaceCreateInfoKHR>& from) {
  to.data.dpy = x11_manager.GuestToHostDisplay(const_cast<Display*>(from.data.dpy.force_get_host_pointer()));
}

bool fex_custom_repack_exit(guest_layout<VkXlibSurfaceCreateInfoKHR>&, const host_layout<VkXlibSurfaceCreateInfoKHR>& from) {
  x11_manager.HostXFlush(from.data.dpy);
  return false;
}

static VkResult fexfn_impl_libvulkan_vkAcquireXlibDisplayEXT(VkPhysicalDevice a_0, guest_layout<Display*> a_1, VkDisplayKHR a_2) {
  auto host_display = x11_manager.GuestToHostDisplay(a_1.force_get_host_pointer());
  auto ret = fexldr_ptr_libvulkan_vkAcquireXlibDisplayEXT(a_0, host_display, a_2);
  x11_manager.HostXFlush(host_display);
  return ret;
}

static VkResult fexfn_impl_libvulkan_vkGetRandROutputDisplayEXT(VkPhysicalDevice a_0, guest_layout<Display*> a_1, RROutput a_2, VkDisplayKHR* a_3) {
  auto host_display = x11_manager.GuestToHostDisplay(a_1.force_get_host_pointer());
  auto ret = fexldr_ptr_libvulkan_vkGetRandROutputDisplayEXT(a_0, host_display, a_2, a_3);
  x11_manager.HostXFlush(host_display);
  return ret;
}

static VkBool32 fexfn_impl_libvulkan_vkGetPhysicalDeviceXcbPresentationSupportKHR(VkPhysicalDevice a_0, uint32_t a_1,
                                                                                  guest_layout<xcb_connection_t*> a_2, xcb_visualid_t a_3) {
  auto host_connection = x11_manager.GuestToHostConnection(a_2.force_get_host_pointer());
  return fexldr_ptr_libvulkan_vkGetPhysicalDeviceXcbPresentationSupportKHR(a_0, a_1, host_connection, a_3);
}

static VkBool32 fexfn_impl_libvulkan_vkGetPhysicalDeviceXlibPresentationSupportKHR(VkPhysicalDevice a_0, uint32_t a_1,
                                                                                   guest_layout<Display*> a_2, VisualID a_3) {
  auto host_display = x11_manager.GuestToHostDisplay(a_2.force_get_host_pointer());
  auto ret = fexldr_ptr_libvulkan_vkGetPhysicalDeviceXlibPresentationSupportKHR(a_0, a_1, host_display, a_3);
  x11_manager.HostXFlush(host_display);
  return ret;
}

// Functions with callbacks are overridden to ignore the guest-side callbacks

static VkResult
FEXFN_IMPL(vkCreateShaderModule)(VkDevice a_0, const VkShaderModuleCreateInfo* a_1, const VkAllocationCallbacks* a_2, VkShaderModule* a_3) {
  (void*&)LDR_PTR(vkCreateShaderModule) = (void*)LDR_PTR(vkGetDeviceProcAddr)(a_0, "vkCreateShaderModule");
  return LDR_PTR(vkCreateShaderModule)(a_0, a_1, nullptr, a_3);
}

static VkBool32
DummyVkDebugReportCallback(VkDebugReportFlagsEXT, VkDebugReportObjectTypeEXT, uint64_t, size_t, int32_t, const char*, const char*, void*) {
  return VK_FALSE;
}

static VkResult FEXFN_IMPL(vkCreateInstance)(const VkInstanceCreateInfo* a_0, const VkAllocationCallbacks* a_1, guest_layout<VkInstance*> a_2) {
  const VkInstanceCreateInfo* vk_struct_base = a_0;
  for (const VkBaseInStructure* vk_struct = reinterpret_cast<const VkBaseInStructure*>(vk_struct_base); vk_struct->pNext;
       vk_struct = vk_struct->pNext) {
    // Override guest callbacks used for VK_EXT_debug_report
    if (reinterpret_cast<const VkBaseInStructure*>(vk_struct->pNext)->sType == VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT) {
      // Overwrite the pNext pointer, ignoring its const-qualifier
      const_cast<VkBaseInStructure*>(vk_struct)->pNext = vk_struct->pNext->pNext;

      // If we copied over a nullptr for pNext then early exit
      if (!vk_struct->pNext) {
        break;
      }
    }
  }

  VkInstance out;
  auto ret = LDR_PTR(vkCreateInstance)(vk_struct_base, nullptr, &out);
  *a_2.get_pointer() = to_guest(to_host_layout(out));
  return ret;
}

static VkResult FEXFN_IMPL(vkCreateDevice)(VkPhysicalDevice a_0, const VkDeviceCreateInfo* a_1, const VkAllocationCallbacks* a_2,
                                           guest_layout<VkDevice*> a_3) {
  VkDevice out;
  auto ret = LDR_PTR(vkCreateDevice)(a_0, a_1, nullptr, &out);
  *a_3.get_pointer() = to_guest(to_host_layout(out));

  // Reload device-specific function pointers used in custom implementations.
  // This is only done in advance for functions that don't take a VkDevice
  // argument. Since this breaks multi-device scenarios, other functions reload
  // the function pointer on-demand.
  // NOTE: Running KHR-GLES31.core.compute_shader.simple-compute-shared_context with zink may trigger related issues
  // TODO: Support multi-device scenarios everywhere
#ifdef IS_32BIT_THUNK
  fexldr_ptr_libvulkan_vkCmdSetVertexInputEXT = (PFN_vkCmdSetVertexInputEXT)fexldr_ptr_libvulkan_vkGetDeviceProcAddr(out, "vkCmdSetVertexIn"
                                                                                                                          "putEXT");
  fexldr_ptr_libvulkan_vkQueueSubmit = (PFN_vkQueueSubmit)fexldr_ptr_libvulkan_vkGetDeviceProcAddr(out, "vkQueueSubmit");
#else
  // No functions affected on 64-bit
#endif

  return ret;
}

static VkResult FEXFN_IMPL(vkAllocateMemory)(VkDevice a_0, const VkMemoryAllocateInfo* a_1, const VkAllocationCallbacks* a_2, VkDeviceMemory* a_3) {
  (void*&)LDR_PTR(vkAllocateMemory) = (void*)LDR_PTR(vkGetDeviceProcAddr)(a_0, "vkAllocateMemory");
  return LDR_PTR(vkAllocateMemory)(a_0, a_1, nullptr, a_3);
}

static void FEXFN_IMPL(vkFreeMemory)(VkDevice a_0, VkDeviceMemory a_1, const VkAllocationCallbacks* a_2) {
  (void*&)LDR_PTR(vkFreeMemory) = (void*)LDR_PTR(vkGetDeviceProcAddr)(a_0, "vkFreeMemory");
  LDR_PTR(vkFreeMemory)(a_0, a_1, nullptr);
}

static VkResult FEXFN_IMPL(vkCreateDebugReportCallbackEXT)(VkInstance a_0, guest_layout<const VkDebugReportCallbackCreateInfoEXT*> a_1,
                                                           const VkAllocationCallbacks* a_2, VkDebugReportCallbackEXT* a_3) {
  auto overridden_callback = host_layout<VkDebugReportCallbackCreateInfoEXT> {*a_1.get_pointer()}.data;
  overridden_callback.pfnCallback = DummyVkDebugReportCallback;
  (void*&)LDR_PTR(vkCreateDebugReportCallbackEXT) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, "vkCreateDebugReportCallbackEXT");
  return LDR_PTR(vkCreateDebugReportCallbackEXT)(a_0, &overridden_callback, nullptr, a_3);
}

static void FEXFN_IMPL(vkDestroyDebugReportCallbackEXT)(VkInstance a_0, VkDebugReportCallbackEXT a_1, const VkAllocationCallbacks* a_2) {
  (void*&)LDR_PTR(vkDestroyDebugReportCallbackEXT) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, "vkDestroyDebugReportCallbackEXT");
  LDR_PTR(vkDestroyDebugReportCallbackEXT)(a_0, a_1, nullptr);
}

extern "C" VkBool32 DummyVkDebugUtilsMessengerCallback(VkDebugUtilsMessageSeverityFlagBitsEXT, VkDebugUtilsMessageTypeFlagsEXT,
                                                       const VkDebugUtilsMessengerCallbackDataEXT*, void*) {
  return VK_FALSE;
}

static VkResult FEXFN_IMPL(vkCreateDebugUtilsMessengerEXT)(VkInstance_T* a_0, guest_layout<const VkDebugUtilsMessengerCreateInfoEXT*> a_1,
                                                           const VkAllocationCallbacks* a_2, VkDebugUtilsMessengerEXT* a_3) {
  auto overridden_callback = host_layout<VkDebugUtilsMessengerCreateInfoEXT> {*a_1.get_pointer()}.data;
  overridden_callback.pfnUserCallback = DummyVkDebugUtilsMessengerCallback;
  (void*&)LDR_PTR(vkCreateDebugUtilsMessengerEXT) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, "vkCreateDebugUtilsMessengerEXT");
  return LDR_PTR(vkCreateDebugUtilsMessengerEXT)(a_0, &overridden_callback, nullptr, a_3);
}

#ifdef IS_32BIT_THUNK
VkResult fexfn_impl_libvulkan_vkEnumeratePhysicalDevices(VkInstance instance, uint32_t* count, guest_layout<VkPhysicalDevice*> devices) {
  if (!devices.get_pointer()) {
    return fexldr_ptr_libvulkan_vkEnumeratePhysicalDevices(instance, count, nullptr);
  }

  auto input_count = *count;
  std::vector<VkPhysicalDevice> out(input_count);
  auto ret = fexldr_ptr_libvulkan_vkEnumeratePhysicalDevices(instance, count, out.data());
  for (size_t i = 0; i < std::min(input_count, *count); ++i) {
    devices.get_pointer()[i] = to_guest(to_host_layout(out[i]));
  }
  return ret;
}

void fexfn_impl_libvulkan_vkGetDeviceQueue(VkDevice device, uint32_t family_index, uint32_t queue_index, guest_layout<VkQueue*> queue) {
  VkQueue out;
  (void*&)fexldr_ptr_libvulkan_vkGetDeviceQueue = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkGetDeviceQueue");
  fexldr_ptr_libvulkan_vkGetDeviceQueue(device, family_index, queue_index, &out);
  *queue.get_pointer() = to_guest(to_host_layout(out));
}

VkResult fexfn_impl_libvulkan_vkAllocateCommandBuffers(VkDevice device, const VkCommandBufferAllocateInfo* info,
                                                       guest_layout<VkCommandBuffer*> buffers) {
  std::vector<VkCommandBuffer> out(info->commandBufferCount);
  (void*&)fexldr_ptr_libvulkan_vkAllocateCommandBuffers = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkAllocateCommandBuffers");
  auto ret = fexldr_ptr_libvulkan_vkAllocateCommandBuffers(device, info, out.data());
  if (ret == VK_SUCCESS) {
    for (size_t i = 0; i < info->commandBufferCount; ++i) {
      buffers.get_pointer()[i] = to_guest(to_host_layout(out[i]));
    }
  }
  return ret;
}

VkResult fexfn_impl_libvulkan_vkMapMemory(VkDevice device, VkDeviceMemory memory, VkDeviceSize offset, VkDeviceSize size,
                                          VkMemoryMapFlags flags, guest_layout<void**> data) {
  host_layout<void*> host_data {};
  void* mapped;
  (void*&)fexldr_ptr_libvulkan_vkMapMemory = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkMapMemory");
  auto ret = fexldr_ptr_libvulkan_vkMapMemory(device, memory, offset, size, flags, &mapped);
  if (ret == VK_SUCCESS) {
    host_data.data = mapped;
    *data.get_pointer() = to_guest(host_data);
  }
  return ret;
}

// Allocates storage on the heap that must be de-allocated using delete[] or DeleteRepackedStructArray
template<bool NeedsRepack = true, typename T>
std::span<std::remove_cv_t<T>> RepackStructArray(uint32_t Count, const guest_layout<T*> GuestData) {
  if (!GuestData.get_pointer() || Count == 0) {
    return {};
  }

  auto HostData = new std::remove_cv_t<T>[Count];
  for (size_t i = 0; i < Count; ++i) {
    auto& GuestElement = (const guest_layout<std::remove_cv_t<T>>&)GuestData.get_pointer()[i];
    auto Element = host_layout<std::remove_cv_t<T>> {GuestElement};
    if constexpr (NeedsRepack) {
      fex_apply_custom_repacking_entry(Element, GuestElement);
    }
    HostData[i] = Element.data;
  }
  return {HostData, Count};
}

template<typename T>
void DeleteRepackedStructArray(uint32_t Count, T* HostData, guest_layout<T*>& GuestData) {
  for (uint32_t i = 0; i < Count; ++i) {
    fex_apply_custom_repacking_exit(GuestData.get_pointer()[i], to_host_layout(HostData[i]));
  }
  delete[] HostData;
}

void fexfn_impl_libvulkan_vkCmdSetVertexInputEXT(
  VkCommandBuffer Buffer, uint32_t BindingDescCount, guest_layout<const VkVertexInputBindingDescription2EXT*> GuestBindingDescs,
  uint32_t AttributeDescCount, guest_layout<const VkVertexInputAttributeDescription2EXT*> GuestAttributeDescs) {

  assert(GuestBindingDescs.get_pointer() && BindingDescCount > 0);
  assert(GuestAttributeDescs.get_pointer() && AttributeDescCount > 0);

  auto BindingDescs = RepackStructArray(BindingDescCount, GuestBindingDescs);
  auto AttributeDescs = RepackStructArray(AttributeDescCount, GuestAttributeDescs);

  fexldr_ptr_libvulkan_vkCmdSetVertexInputEXT(Buffer, BindingDescCount, BindingDescs.data(), AttributeDescCount, AttributeDescs.data());

  delete[] AttributeDescs.data();
  delete[] BindingDescs.data();
}

void fexfn_impl_libvulkan_vkUpdateDescriptorSets(VkDevice device, unsigned int descriptorWriteCount,
                                                 guest_layout<const VkWriteDescriptorSet*> pDescriptorWrites, unsigned int descriptorCopyCount,
                                                 guest_layout<const VkCopyDescriptorSet*> pDescriptorCopies) {

  auto HostDescriptorWrites = RepackStructArray(descriptorWriteCount, pDescriptorWrites);
  auto HostDescriptorCopies = RepackStructArray(descriptorCopyCount, pDescriptorCopies);

  (void*&)fexldr_ptr_libvulkan_vkUpdateDescriptorSets = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkUpdateDescriptorSets");
  fexldr_ptr_libvulkan_vkUpdateDescriptorSets(device, descriptorWriteCount, HostDescriptorWrites.data(), descriptorCopyCount,
                                              HostDescriptorCopies.data());

  delete[] HostDescriptorCopies.data();
  delete[] HostDescriptorWrites.data();
}

VkResult fexfn_impl_libvulkan_vkQueueSubmit(VkQueue queue, uint32_t submit_count, guest_layout<const VkSubmitInfo*> submit_infos, VkFence fence) {

  auto HostSubmitInfos = RepackStructArray(submit_count, submit_infos);
  auto ret = fexldr_ptr_libvulkan_vkQueueSubmit(queue, submit_count, HostSubmitInfos.data(), fence);
  delete[] HostSubmitInfos.data();
  return ret;
}

void fexfn_impl_libvulkan_vkFreeCommandBuffers(VkDevice device, VkCommandPool pool, uint32_t num_buffers,
                                               guest_layout<const VkCommandBuffer*> buffers) {

  auto HostBuffers = RepackStructArray<false>(num_buffers, buffers);
  (void*&)fexldr_ptr_libvulkan_vkFreeCommandBuffers = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkFreeCommandBuffers");
  fexldr_ptr_libvulkan_vkFreeCommandBuffers(device, pool, num_buffers, HostBuffers.data());
  delete[] HostBuffers.data();
}

VkResult fexfn_impl_libvulkan_vkGetPipelineCacheData(VkDevice device, VkPipelineCache cache, guest_layout<uint32_t*> guest_data_size, void* data) {
  size_t data_size = guest_data_size.get_pointer()->data;
  (void*&)fexldr_ptr_libvulkan_vkGetPipelineCacheData = (void*)LDR_PTR(vkGetDeviceProcAddr)(device, "vkGetPipelineCacheData");
  auto ret = fexldr_ptr_libvulkan_vkGetPipelineCacheData(device, cache, &data_size, data);
  *guest_data_size.get_pointer() = data_size;
  return ret;
}

#endif

static PFN_vkVoidFunction LookupCustomVulkanFunction(const char* a_1) {
  using namespace std::string_view_literals;

  if (a_1 == "vkCreateShaderModule"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkCreateShaderModule;
  } else if (a_1 == "vkCreateInstance"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkCreateInstance;
  } else if (a_1 == "vkCreateDevice"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkCreateDevice;
  } else if (a_1 == "vkAllocateMemory"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkAllocateMemory;
  } else if (a_1 == "vkFreeMemory"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkFreeMemory;
  } else if (a_1 == "vkAcquireXlibDisplayEXT"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkAcquireXlibDisplayEXT;
  } else if (a_1 == "vkGetRandROutputDisplayEXT"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkGetRandROutputDisplayEXT;
  } else if (a_1 == "vkGetPhysicalDeviceXcbPresentationSupportKHR"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkGetPhysicalDeviceXcbPresentationSupportKHR;
  } else if (a_1 == "vkGetPhysicalDeviceXlibPresentationSupportKHR"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkGetPhysicalDeviceXlibPresentationSupportKHR;
#ifdef IS_32BIT_THUNK
  } else if (a_1 == "vkAllocateCommandBuffers"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkAllocateCommandBuffers;
  } else if (a_1 == "vkEnumeratePhysicalDevices"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkEnumeratePhysicalDevices;
  } else if (a_1 == "vkFreeCommandBuffers"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkFreeCommandBuffers;
  } else if (a_1 == "vkGetDeviceQueue"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkGetDeviceQueue;
  } else if (a_1 == "vkGetPipelineCacheData"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkGetPipelineCacheData;
  } else if (a_1 == "vkMapMemory"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkMapMemory;
  } else if (a_1 == "vkQueueSubmit"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkQueueSubmit;
  } else if (a_1 == "vkCmdSetVertexInputEXT"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkCmdSetVertexInputEXT;
  } else if (a_1 == "vkUpdateDescriptorSets"sv) {
    return (PFN_vkVoidFunction)fexfn_impl_libvulkan_vkUpdateDescriptorSets;
#endif
  }
  return nullptr;
}

static PFN_vkVoidFunction FEXFN_IMPL(vkGetDeviceProcAddr)(VkDevice a_0, const char* a_1) {
  // Just return the host facing function pointer
  // The guest will handle mapping if this exists

  // Check for functions with custom implementations first
  if (auto ptr = LookupCustomVulkanFunction(a_1)) {
    return ptr;
  }

  return LDR_PTR(vkGetDeviceProcAddr)(a_0, a_1);
}

static PFN_vkVoidFunction FEXFN_IMPL(vkGetInstanceProcAddr)(VkInstance a_0, const char* a_1) {
  // Just return the host facing function pointer
  // The guest will handle mapping if it exists

  if (!SetupInstance && a_0) {
    DoSetupWithInstance(a_0);
  }

  // Check for functions with custom implementations first
  if (auto ptr = LookupCustomVulkanFunction(a_1)) {
    // If this function belongs to an instance extension, requery its address.
    // This ensures fexldr_ptr_* is valid if the application creates a minimal
    // VkInstance with no extensions before creating its actual instance.
    using namespace std::string_view_literals;
    if (a_1 == "vkGetRandROutputDisplayEXT"sv && !LDR_PTR(vkGetRandROutputDisplayEXT)) {
      (void*&)LDR_PTR(vkGetRandROutputDisplayEXT) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, "vkGetRandROutputDisplayEXT");
    }
    if (a_1 == "vkAcquireXlibDisplayEXT"sv && !LDR_PTR(vkAcquireXlibDisplayEXT)) {
      (void*&)LDR_PTR(vkAcquireXlibDisplayEXT) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, "vkAcquireXlibDisplayEXT");
    }
    const char* XcbPresent = "vkGetPhysicalDeviceXcbPresentationSupportKHR";
    if (a_1 == std::string_view {XcbPresent} && !LDR_PTR(vkGetPhysicalDeviceXcbPresentationSupportKHR)) {
      (void*&)LDR_PTR(vkGetPhysicalDeviceXcbPresentationSupportKHR) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, XcbPresent);
    }
    const char* XlibPresent = "vkGetPhysicalDeviceXlibPresentationSupportKHR";
    if (a_1 == std::string_view {XlibPresent} && !LDR_PTR(vkGetPhysicalDeviceXlibPresentationSupportKHR)) {
      (void*&)LDR_PTR(vkGetPhysicalDeviceXlibPresentationSupportKHR) = (void*)LDR_PTR(vkGetInstanceProcAddr)(a_0, XlibPresent);
    }

    return ptr;
  }

  return LDR_PTR(vkGetInstanceProcAddr)(a_0, a_1);
}

#ifdef IS_32BIT_THUNK
template<VkStructureType TypeIndex, typename Type>
static VkBaseOutStructure* convert(const guest_layout<VkBaseOutStructure>* source) {
  // Using malloc here since no easily available type information is available at the time of destruction.
  auto typed_source = reinterpret_cast<const guest_layout<Type>*>(source);
  auto child_mem = (char*)aligned_alloc(alignof(host_layout<Type>), sizeof(host_layout<Type>));
  auto child = new (child_mem) host_layout<Type> {*typed_source};

  fex_custom_repack_entry(*child, *typed_source);

  return reinterpret_cast<VkBaseOutStructure*>(&child->data);
}

template<VkStructureType TypeIndex, typename Type>
static void convert_to_guest(void* into, const VkBaseOutStructure* from) {
  auto typed_into = reinterpret_cast<guest_layout<Type>*>(into);
  auto oldNext = typed_into->data.pNext; // TODO: This assumes Vulkan never modifies pNext internally
  *typed_into = to_guest(to_host_layout(*(Type*)from));
  typed_into->data.pNext = oldNext;

  fex_custom_repack_exit(*typed_into, to_host_layout(*(Type*)from));
}

template<VkStructureType TypeIndex, typename Type>
inline constexpr std::pair<VkStructureType, std::pair<VkBaseOutStructure* (*)(const guest_layout<VkBaseOutStructure>*), void (*)(void*, const VkBaseOutStructure*)>>
  converters = {TypeIndex, {convert<TypeIndex, Type>, convert_to_guest<TypeIndex, Type>}};

// NOTE: Not all Vulkan structures with pNext members are listed here. This is because excluding structs exclusively used as top-level entries is useful to detect repacking bugs.
static std::unordered_map<VkStructureType, std::pair<VkBaseOutStructure* (*)(const guest_layout<VkBaseOutStructure>*), void (*)(void*, const VkBaseOutStructure*)>> next_handlers {
  converters<VkStructureType::VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MOTION_INFO_NV, VkAccelerationStructureMotionInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_AMIGO_PROFILING_SUBMIT_INFO_SEC, VkAmigoProfilingSubmitInfoSEC>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT, VkAttachmentDescriptionStencilLayout>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT, VkAttachmentReferenceStencilLayout>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD, VkAttachmentSampleCountInfoAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_DEVICE_GROUP_INFO, VkBindBufferMemoryDeviceGroupInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_DEVICE_GROUP_INFO, VkBindImageMemoryDeviceGroupInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR, VkBindImageMemorySwapchainInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BIND_IMAGE_PLANE_MEMORY_INFO, VkBindImagePlaneMemoryInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT, VkBufferDeviceAddressCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO, VkBufferOpaqueCaptureAddressCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR, VkBufferUsageFlags2CreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_CONDITIONAL_RENDERING_INFO_EXT, VkCommandBufferInheritanceConditionalRenderingInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO, VkCommandBufferInheritanceRenderingInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDER_PASS_TRANSFORM_INFO_QCOM, VkCommandBufferInheritanceRenderPassTransformInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_VIEWPORT_SCISSOR_INFO_NV, VkCommandBufferInheritanceViewportScissorInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_COPY_COMMAND_TRANSFORM_INFO_QCOM, VkCopyCommandTransformInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, VkDebugReportCallbackCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT, VkDebugUtilsMessengerCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT, VkDebugUtilsObjectNameInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV, VkDedicatedAllocationBufferCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV, VkDedicatedAllocationImageCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV, VkDedicatedAllocationMemoryAllocateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, VkDepthBiasRepresentationInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DESCRIPTOR_BUFFER_BINDING_PUSH_DESCRIPTOR_BUFFER_HANDLE_EXT, VkDescriptorBufferBindingPushDescriptorBufferHandleEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_INLINE_UNIFORM_BLOCK_CREATE_INFO, VkDescriptorPoolInlineUniformBlockCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO, VkDescriptorSetLayoutBindingFlagsCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO, VkDescriptorSetVariableDescriptorCountAllocateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_LAYOUT_SUPPORT, VkDescriptorSetVariableDescriptorCountLayoutSupport>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_ADDRESS_BINDING_CALLBACK_DATA_EXT, VkDeviceAddressBindingCallbackDataEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_DIAGNOSTICS_CONFIG_CREATE_INFO_NV, VkDeviceDiagnosticsConfigCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_GROUP_COMMAND_BUFFER_BEGIN_INFO, VkDeviceGroupCommandBufferBeginInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_GROUP_PRESENT_INFO_KHR, VkDeviceGroupPresentInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_GROUP_RENDER_PASS_BEGIN_INFO, VkDeviceGroupRenderPassBeginInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO, VkDeviceGroupSubmitInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_GROUP_SWAPCHAIN_CREATE_INFO_KHR, VkDeviceGroupSwapchainCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD, VkDeviceMemoryOverallocationCreateInfoAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_PRIVATE_DATA_CREATE_INFO, VkDevicePrivateDataCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR, VkDeviceQueueGlobalPriorityCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DISPLAY_NATIVE_HDR_SURFACE_CAPABILITIES_AMD, VkDisplayNativeHdrSurfaceCapabilitiesAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_DISPLAY_PRESENT_INFO_KHR, VkDisplayPresentInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXPORT_FENCE_CREATE_INFO, VkExportFenceCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO, VkExportMemoryAllocateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_NV, VkExportMemoryAllocateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO, VkExportSemaphoreCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES, VkExternalImageFormatProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_ACQUIRE_UNMODIFIED_EXT, VkExternalMemoryAcquireUnmodifiedEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO, VkExternalMemoryBufferCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO, VkExternalMemoryImageCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV, VkExternalMemoryImageCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT, VkFilterCubicImageViewImageFormatPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3, VkFormatProperties3>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_LIBRARY_CREATE_INFO_EXT, VkGraphicsPipelineLibraryCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_HOST_IMAGE_COPY_DEVICE_PERFORMANCE_QUERY_EXT, VkHostImageCopyDevicePerformanceQueryEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_COMPRESSION_CONTROL_EXT, VkImageCompressionControlEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_COMPRESSION_PROPERTIES_EXT, VkImageCompressionPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT, VkImageDrmFormatModifierListCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO, VkImageFormatListCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO, VkImagePlaneMemoryRequirementsInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO, VkImageStencilUsageCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_SWAPCHAIN_CREATE_INFO_KHR, VkImageSwapchainCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT, VkImageViewASTCDecodeModeEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_VIEW_MIN_LOD_CREATE_INFO_EXT, VkImageViewMinLodCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_VIEW_SAMPLE_WEIGHT_CREATE_INFO_QCOM, VkImageViewSampleWeightCreateInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_VIEW_SLICED_CREATE_INFO_EXT, VkImageViewSlicedCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, VkImageViewUsageCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR, VkImportMemoryFdInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, VkMemoryAllocateFlagsInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_BARRIER_2, VkMemoryBarrier2>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, VkMemoryDedicatedAllocateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, VkMemoryDedicatedRequirements>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO, VkMemoryOpaqueCaptureAddressAllocateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT, VkMemoryPriorityAllocateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_INFO_EXT, VkMultisampledRenderToSingleSampledInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_ATTRIBUTES_INFO_NVX, VkMultiviewPerViewAttributesInfoNVX>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_MULTIVIEW_PER_VIEW_RENDER_AREAS_RENDER_PASS_BEGIN_INFO_QCOM, VkMultiviewPerViewRenderAreasRenderPassBeginInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_OPTICAL_FLOW_IMAGE_FORMAT_INFO_NV, VkOpticalFlowImageFormatInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PERFORMANCE_QUERY_SUBMIT_INFO_KHR, VkPerformanceQuerySubmitInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, VkPhysicalDevice16BitStorageFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT, VkPhysicalDevice4444FormatsFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES, VkPhysicalDevice8BitStorageFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, VkPhysicalDeviceAccelerationStructureFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR, VkPhysicalDeviceAccelerationStructurePropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ADDRESS_BINDING_REPORT_FEATURES_EXT, VkPhysicalDeviceAddressBindingReportFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_AMIGO_PROFILING_FEATURES_SEC, VkPhysicalDeviceAmigoProfilingFeaturesSEC>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ASTC_DECODE_FEATURES_EXT, VkPhysicalDeviceASTCDecodeFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_DYNAMIC_STATE_FEATURES_EXT, VkPhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_FEATURES_EXT, VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_FEATURES_EXT, VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BLEND_OPERATION_ADVANCED_PROPERTIES_EXT, VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT, VkPhysicalDeviceBorderColorSwizzleFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, VkPhysicalDeviceBufferDeviceAddressFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT, VkPhysicalDeviceBufferDeviceAddressFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CLUSTER_CULLING_SHADER_FEATURES_HUAWEI, VkPhysicalDeviceClusterCullingShaderFeaturesHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CLUSTER_CULLING_SHADER_PROPERTIES_HUAWEI, VkPhysicalDeviceClusterCullingShaderPropertiesHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD, VkPhysicalDeviceCoherentMemoryFeaturesAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT, VkPhysicalDeviceColorWriteEnableFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV, VkPhysicalDeviceComputeShaderDerivativesFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT, VkPhysicalDeviceConditionalRenderingFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT, VkPhysicalDeviceConservativeRasterizationPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_KHR, VkPhysicalDeviceCooperativeMatrixFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV, VkPhysicalDeviceCooperativeMatrixFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_KHR, VkPhysicalDeviceCooperativeMatrixPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV, VkPhysicalDeviceCooperativeMatrixPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_FEATURES_NV, VkPhysicalDeviceCopyMemoryIndirectFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COPY_MEMORY_INDIRECT_PROPERTIES_NV, VkPhysicalDeviceCopyMemoryIndirectPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CORNER_SAMPLED_IMAGE_FEATURES_NV, VkPhysicalDeviceCornerSampledImageFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COVERAGE_REDUCTION_MODE_FEATURES_NV, VkPhysicalDeviceCoverageReductionModeFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT, VkPhysicalDeviceCustomBorderColorFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT, VkPhysicalDeviceCustomBorderColorPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEDICATED_ALLOCATION_IMAGE_ALIASING_FEATURES_NV, VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_BIAS_CONTROL_FEATURES_EXT, VkPhysicalDeviceDepthBiasControlFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLAMP_ZERO_ONE_FEATURES_EXT, VkPhysicalDeviceDepthClampZeroOneFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT, VkPhysicalDeviceDepthClipControlFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT, VkPhysicalDeviceDepthClipEnableFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES, VkPhysicalDeviceDepthStencilResolveProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_DENSITY_MAP_PROPERTIES_EXT, VkPhysicalDeviceDescriptorBufferDensityMapPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, VkPhysicalDeviceDescriptorBufferFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_PROPERTIES_EXT, VkPhysicalDeviceDescriptorBufferPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, VkPhysicalDeviceDescriptorIndexingFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES, VkPhysicalDeviceDescriptorIndexingProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE, VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_COMPUTE_FEATURES_NV, VkPhysicalDeviceDeviceGeneratedCommandsComputeFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_FEATURES_NV, VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_GENERATED_COMMANDS_PROPERTIES_NV, VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEVICE_MEMORY_REPORT_FEATURES_EXT, VkPhysicalDeviceDeviceMemoryReportFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DIAGNOSTICS_CONFIG_FEATURES_NV, VkPhysicalDeviceDiagnosticsConfigFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT, VkPhysicalDeviceDiscardRectanglePropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, VkPhysicalDeviceDriverProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT, VkPhysicalDeviceDrmPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES, VkPhysicalDeviceDynamicRenderingFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_UNUSED_ATTACHMENTS_FEATURES_EXT, VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXCLUSIVE_SCISSOR_FEATURES_NV, VkPhysicalDeviceExclusiveScissorFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT, VkPhysicalDeviceExtendedDynamicState2FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT, VkPhysicalDeviceExtendedDynamicState3FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_PROPERTIES_EXT, VkPhysicalDeviceExtendedDynamicState3PropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT, VkPhysicalDeviceExtendedDynamicStateFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO, VkPhysicalDeviceExternalImageFormatInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT, VkPhysicalDeviceExternalMemoryHostPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_RDMA_FEATURES_NV, VkPhysicalDeviceExternalMemoryRDMAFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT, VkPhysicalDeviceFaultFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, VkPhysicalDeviceFeatures2>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES, VkPhysicalDeviceFloatControlsProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_2_FEATURES_EXT, VkPhysicalDeviceFragmentDensityMap2FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_2_PROPERTIES_EXT, VkPhysicalDeviceFragmentDensityMap2PropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT, VkPhysicalDeviceFragmentDensityMapFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_FEATURES_QCOM, VkPhysicalDeviceFragmentDensityMapOffsetFeaturesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_PROPERTIES_QCOM, VkPhysicalDeviceFragmentDensityMapOffsetPropertiesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT, VkPhysicalDeviceFragmentDensityMapPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_FEATURES_KHR, VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_BARYCENTRIC_PROPERTIES_KHR, VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT, VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_ENUMS_FEATURES_NV, VkPhysicalDeviceFragmentShadingRateEnumsFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_ENUMS_PROPERTIES_NV, VkPhysicalDeviceFragmentShadingRateEnumsPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR, VkPhysicalDeviceFragmentShadingRateFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, VkPhysicalDeviceFragmentShadingRatePropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR, VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_FEATURES_EXT, VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GRAPHICS_PIPELINE_LIBRARY_PROPERTIES_EXT, VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_FEATURES_EXT, VkPhysicalDeviceHostImageCopyFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_IMAGE_COPY_PROPERTIES_EXT, VkPhysicalDeviceHostImageCopyPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES, VkPhysicalDeviceHostQueryResetFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES, VkPhysicalDeviceIDProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT, VkPhysicalDeviceImage2DViewOf3DFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_COMPRESSION_CONTROL_FEATURES_EXT, VkPhysicalDeviceImageCompressionControlFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_COMPRESSION_CONTROL_SWAPCHAIN_FEATURES_EXT, VkPhysicalDeviceImageCompressionControlSwapchainFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_DRM_FORMAT_MODIFIER_INFO_EXT, VkPhysicalDeviceImageDrmFormatModifierInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES, VkPhysicalDeviceImagelessFramebufferFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_PROCESSING_FEATURES_QCOM, VkPhysicalDeviceImageProcessingFeaturesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_PROCESSING_PROPERTIES_QCOM, VkPhysicalDeviceImageProcessingPropertiesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES, VkPhysicalDeviceImageRobustnessFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_SLICED_VIEW_OF_3D_FEATURES_EXT, VkPhysicalDeviceImageSlicedViewOf3DFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT, VkPhysicalDeviceImageViewImageFormatInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT, VkPhysicalDeviceImageViewMinLodFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT, VkPhysicalDeviceIndexTypeUint8FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INHERITED_VIEWPORT_SCISSOR_FEATURES_NV, VkPhysicalDeviceInheritedViewportScissorFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES, VkPhysicalDeviceInlineUniformBlockFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES, VkPhysicalDeviceInlineUniformBlockProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INVOCATION_MASK_FEATURES_HUAWEI, VkPhysicalDeviceInvocationMaskFeaturesHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LEGACY_DITHERING_FEATURES_EXT, VkPhysicalDeviceLegacyDitheringFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINEAR_COLOR_ATTACHMENT_FEATURES_NV, VkPhysicalDeviceLinearColorAttachmentFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT, VkPhysicalDeviceLineRasterizationFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT, VkPhysicalDeviceLineRasterizationPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES, VkPhysicalDeviceMaintenance3Properties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES, VkPhysicalDeviceMaintenance4Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES, VkPhysicalDeviceMaintenance4Properties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_FEATURES_KHR, VkPhysicalDeviceMaintenance5FeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_5_PROPERTIES_KHR, VkPhysicalDeviceMaintenance5PropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT, VkPhysicalDeviceMemoryBudgetPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_FEATURES_NV, VkPhysicalDeviceMemoryDecompressionFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_DECOMPRESSION_PROPERTIES_NV, VkPhysicalDeviceMemoryDecompressionPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT, VkPhysicalDeviceMemoryPriorityFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT, VkPhysicalDeviceMeshShaderFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV, VkPhysicalDeviceMeshShaderFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT, VkPhysicalDeviceMeshShaderPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV, VkPhysicalDeviceMeshShaderPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT, VkPhysicalDeviceMultiDrawFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT, VkPhysicalDeviceMultiDrawPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTISAMPLED_RENDER_TO_SINGLE_SAMPLED_FEATURES_EXT, VkPhysicalDeviceMultisampledRenderToSingleSampledFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES, VkPhysicalDeviceMultiviewFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_ATTRIBUTES_PROPERTIES_NVX, VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_RENDER_AREAS_FEATURES_QCOM, VkPhysicalDeviceMultiviewPerViewRenderAreasFeaturesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PER_VIEW_VIEWPORTS_FEATURES_QCOM, VkPhysicalDeviceMultiviewPerViewViewportsFeaturesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES, VkPhysicalDeviceMultiviewProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT, VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT, VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_FEATURES_EXT, VkPhysicalDeviceOpacityMicromapFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPACITY_MICROMAP_PROPERTIES_EXT, VkPhysicalDeviceOpacityMicromapPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_FEATURES_NV, VkPhysicalDeviceOpticalFlowFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_OPTICAL_FLOW_PROPERTIES_NV, VkPhysicalDeviceOpticalFlowPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PAGEABLE_DEVICE_LOCAL_MEMORY_FEATURES_EXT, VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, VkPhysicalDevicePCIBusInfoPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR, VkPhysicalDevicePerformanceQueryFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR, VkPhysicalDevicePerformanceQueryPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES, VkPhysicalDevicePipelineCreationCacheControlFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR, VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_LIBRARY_GROUP_HANDLES_FEATURES_EXT, VkPhysicalDevicePipelineLibraryGroupHandlesFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_PROPERTIES_FEATURES_EXT, VkPhysicalDevicePipelinePropertiesFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_PROTECTED_ACCESS_FEATURES_EXT, VkPhysicalDevicePipelineProtectedAccessFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_FEATURES_EXT, VkPhysicalDevicePipelineRobustnessFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_ROBUSTNESS_PROPERTIES_EXT, VkPhysicalDevicePipelineRobustnessPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES, VkPhysicalDevicePointClippingProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_BARRIER_FEATURES_NV, VkPhysicalDevicePresentBarrierFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR, VkPhysicalDevicePresentIdFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR, VkPhysicalDevicePresentWaitFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT, VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT, VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES, VkPhysicalDevicePrivateDataFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES, VkPhysicalDeviceProtectedMemoryFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES, VkPhysicalDeviceProtectedMemoryProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT, VkPhysicalDeviceProvokingVertexFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT, VkPhysicalDeviceProvokingVertexPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR, VkPhysicalDevicePushDescriptorPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_EXT, VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR, VkPhysicalDeviceRayQueryFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_FEATURES_NV, VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_NV, VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR, VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MOTION_BLUR_FEATURES_NV, VkPhysicalDeviceRayTracingMotionBlurFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, VkPhysicalDeviceRayTracingPipelineFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR, VkPhysicalDeviceRayTracingPipelinePropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_POSITION_FETCH_FEATURES_KHR, VkPhysicalDeviceRayTracingPositionFetchFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV, VkPhysicalDeviceRayTracingPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_REPRESENTATIVE_FRAGMENT_TEST_FEATURES_NV, VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RGBA10X6_FORMATS_FEATURES_EXT, VkPhysicalDeviceRGBA10X6FormatsFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, VkPhysicalDeviceRobustness2FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT, VkPhysicalDeviceRobustness2PropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT, VkPhysicalDeviceSampleLocationsPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES, VkPhysicalDeviceSamplerFilterMinmaxProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES, VkPhysicalDeviceSamplerYcbcrConversionFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES, VkPhysicalDeviceScalarBlockLayoutFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES, VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT, VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, VkPhysicalDeviceShaderAtomicFloatFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES, VkPhysicalDeviceShaderAtomicInt64Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR, VkPhysicalDeviceShaderClockFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_BUILTINS_FEATURES_ARM, VkPhysicalDeviceShaderCoreBuiltinsFeaturesARM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_BUILTINS_PROPERTIES_ARM, VkPhysicalDeviceShaderCoreBuiltinsPropertiesARM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD, VkPhysicalDeviceShaderCoreProperties2AMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD, VkPhysicalDeviceShaderCorePropertiesAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_ARM, VkPhysicalDeviceShaderCorePropertiesARM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES, VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, VkPhysicalDeviceShaderDrawParametersFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_EARLY_AND_LATE_FRAGMENT_TESTS_FEATURES_AMD, VkPhysicalDeviceShaderEarlyAndLateFragmentTestsFeaturesAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, VkPhysicalDeviceShaderFloat16Int8Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT, VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_FOOTPRINT_FEATURES_NV, VkPhysicalDeviceShaderImageFootprintFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES, VkPhysicalDeviceShaderIntegerDotProductFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES, VkPhysicalDeviceShaderIntegerDotProductProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL, VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT, VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT, VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT, VkPhysicalDeviceShaderObjectFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_PROPERTIES_EXT, VkPhysicalDeviceShaderObjectPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_FEATURES_NV, VkPhysicalDeviceShaderSMBuiltinsFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SM_BUILTINS_PROPERTIES_NV, VkPhysicalDeviceShaderSMBuiltinsPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES, VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR, VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES, VkPhysicalDeviceShaderTerminateInvocationFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TILE_IMAGE_FEATURES_EXT, VkPhysicalDeviceShaderTileImageFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TILE_IMAGE_PROPERTIES_EXT, VkPhysicalDeviceShaderTileImagePropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_FEATURES_NV, VkPhysicalDeviceShadingRateImageFeaturesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADING_RATE_IMAGE_PROPERTIES_NV, VkPhysicalDeviceShadingRateImagePropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES, VkPhysicalDeviceSubgroupProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES, VkPhysicalDeviceSubgroupSizeControlFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES, VkPhysicalDeviceSubgroupSizeControlProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBPASS_MERGE_FEEDBACK_FEATURES_EXT, VkPhysicalDeviceSubpassMergeFeedbackFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBPASS_SHADING_FEATURES_HUAWEI, VkPhysicalDeviceSubpassShadingFeaturesHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBPASS_SHADING_PROPERTIES_HUAWEI, VkPhysicalDeviceSubpassShadingPropertiesHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SWAPCHAIN_MAINTENANCE_1_FEATURES_EXT, VkPhysicalDeviceSwapchainMaintenance1FeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES, VkPhysicalDeviceSynchronization2Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT, VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES, VkPhysicalDeviceTexelBufferAlignmentProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES, VkPhysicalDeviceTextureCompressionASTCHDRFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TILE_PROPERTIES_FEATURES_QCOM, VkPhysicalDeviceTilePropertiesFeaturesQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, VkPhysicalDeviceTimelineSemaphoreFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES, VkPhysicalDeviceTimelineSemaphoreProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT, VkPhysicalDeviceTransformFeedbackFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT, VkPhysicalDeviceTransformFeedbackPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES, VkPhysicalDeviceUniformBufferStandardLayoutFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VkPhysicalDeviceVariablePointersFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT, VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT, VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT, VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES, VkPhysicalDeviceVulkan11Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES, VkPhysicalDeviceVulkan11Properties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, VkPhysicalDeviceVulkan12Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES, VkPhysicalDeviceVulkan12Properties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, VkPhysicalDeviceVulkan13Features>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES, VkPhysicalDeviceVulkan13Properties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES, VkPhysicalDeviceVulkanMemoryModelFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR, VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_2_PLANE_444_FORMATS_FEATURES_EXT, VkPhysicalDeviceYcbcr2Plane444FormatsFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT, VkPhysicalDeviceYcbcrImageArraysFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES, VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeatures>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_ADVANCED_STATE_CREATE_INFO_EXT, VkPipelineColorBlendAdvancedStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COLOR_WRITE_CREATE_INFO_EXT, VkPipelineColorWriteCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COMPILER_CONTROL_CREATE_INFO_AMD, VkPipelineCompilerControlCreateInfoAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_MODULATION_STATE_CREATE_INFO_NV, VkPipelineCoverageModulationStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_REDUCTION_STATE_CREATE_INFO_NV, VkPipelineCoverageReductionStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_COVERAGE_TO_COLOR_STATE_CREATE_INFO_NV, VkPipelineCoverageToColorStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR, VkPipelineCreateFlags2CreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT, VkPipelineDiscardRectangleStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_ENUM_STATE_CREATE_INFO_NV, VkPipelineFragmentShadingRateEnumStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR, VkPipelineFragmentShadingRateStateCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR, VkPipelineLibraryCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT, VkPipelineRasterizationConservativeStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT, VkPipelineRasterizationDepthClipStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT, VkPipelineRasterizationLineStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT, VkPipelineRasterizationProvokingVertexStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD, VkPipelineRasterizationStateRasterizationOrderAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT, VkPipelineRasterizationStateStreamCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO, VkPipelineRenderingCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_REPRESENTATIVE_FRAGMENT_TEST_STATE_CREATE_INFO_NV, VkPipelineRepresentativeFragmentTestStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_ROBUSTNESS_CREATE_INFO_EXT, VkPipelineRobustnessCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT, VkPipelineSampleLocationsStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT, VkPipelineShaderStageModuleIdentifierCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO, VkPipelineShaderStageRequiredSubgroupSizeCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO, VkPipelineTessellationDomainOriginStateCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT, VkPipelineVertexInputDivisorStateCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT, VkPipelineViewportDepthClipControlCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_EXCLUSIVE_SCISSOR_STATE_CREATE_INFO_NV, VkPipelineViewportExclusiveScissorStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV, VkPipelineViewportSwizzleStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_W_SCALING_STATE_CREATE_INFO_NV, VkPipelineViewportWScalingStateCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PRESENT_ID_KHR, VkPresentIdKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_PROTECTED_SUBMIT_INFO, VkProtectedSubmitInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_CREATE_INFO_KHR, VkQueryPoolPerformanceCreateInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL, VkQueryPoolPerformanceQueryCreateInfoINTEL>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_2_NV, VkQueueFamilyCheckpointProperties2NV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUEUE_FAMILY_CHECKPOINT_PROPERTIES_NV, VkQueueFamilyCheckpointPropertiesNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR, VkQueueFamilyGlobalPriorityPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR, VkQueueFamilyQueryResultStatusPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR, VkQueueFamilyVideoPropertiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_INFO_EXT, VkRenderingFragmentDensityMapAttachmentInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, VkRenderingFragmentShadingRateAttachmentInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_ATTACHMENT_BEGIN_INFO, VkRenderPassAttachmentBeginInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_CREATION_CONTROL_EXT, VkRenderPassCreationControlEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_CREATION_FEEDBACK_CREATE_INFO_EXT, VkRenderPassCreationFeedbackCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT, VkRenderPassFragmentDensityMapCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_INPUT_ATTACHMENT_ASPECT_CREATE_INFO, VkRenderPassInputAttachmentAspectCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO, VkRenderPassMultiviewCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_SUBPASS_FEEDBACK_CREATE_INFO_EXT, VkRenderPassSubpassFeedbackCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_RENDER_PASS_TRANSFORM_BEGIN_INFO_QCOM, VkRenderPassTransformBeginInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT, VkSampleLocationsInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT, VkSamplerBorderColorComponentMappingCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT, VkSamplerCustomBorderColorCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO, VkSamplerReductionModeCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_IMAGE_FORMAT_PROPERTIES, VkSamplerYcbcrConversionImageFormatProperties>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SAMPLER_YCBCR_CONVERSION_INFO, VkSamplerYcbcrConversionInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO, VkSemaphoreTypeCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, VkShaderModuleCreateInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SHADER_MODULE_VALIDATION_CACHE_CREATE_INFO_EXT, VkShaderModuleValidationCacheCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR, VkSharedPresentSurfaceCapabilitiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SUBPASS_FRAGMENT_DENSITY_MAP_OFFSET_END_INFO_QCOM, VkSubpassFragmentDensityMapOffsetEndInfoQCOM>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SUBPASS_RESOLVE_PERFORMANCE_QUERY_EXT, VkSubpassResolvePerformanceQueryEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SUBPASS_SHADING_PIPELINE_CREATE_INFO_HUAWEI, VkSubpassShadingPipelineCreateInfoHUAWEI>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SUBRESOURCE_HOST_MEMCPY_SIZE_EXT, VkSubresourceHostMemcpySizeEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_PRESENT_BARRIER_NV, VkSurfaceCapabilitiesPresentBarrierNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SURFACE_PRESENT_MODE_COMPATIBILITY_EXT, VkSurfacePresentModeCompatibilityEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SURFACE_PRESENT_MODE_EXT, VkSurfacePresentModeEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SURFACE_PRESENT_SCALING_CAPABILITIES_EXT, VkSurfacePresentScalingCapabilitiesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SURFACE_PROTECTED_CAPABILITIES_KHR, VkSurfaceProtectedCapabilitiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_COUNTER_CREATE_INFO_EXT, VkSwapchainCounterCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_DISPLAY_NATIVE_HDR_CREATE_INFO_AMD, VkSwapchainDisplayNativeHdrCreateInfoAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_BARRIER_CREATE_INFO_NV, VkSwapchainPresentBarrierCreateInfoNV>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT, VkSwapchainPresentFenceInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT, VkSwapchainPresentModeInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODES_CREATE_INFO_EXT, VkSwapchainPresentModesCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_SCALING_CREATE_INFO_EXT, VkSwapchainPresentScalingCreateInfoEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_TEXTURE_LOD_GATHER_FORMAT_PROPERTIES_AMD, VkTextureLODGatherFormatPropertiesAMD>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, VkTimelineSemaphoreSubmitInfo>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT, VkValidationFeaturesEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT, VkValidationFlagsEXT>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_CAPABILITIES_KHR, VkVideoDecodeCapabilitiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_CAPABILITIES_KHR, VkVideoDecodeH264CapabilitiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_DPB_SLOT_INFO_KHR, VkVideoDecodeH264DpbSlotInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PICTURE_INFO_KHR, VkVideoDecodeH264PictureInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H264_PROFILE_INFO_KHR, VkVideoDecodeH264ProfileInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_CAPABILITIES_KHR, VkVideoDecodeH265CapabilitiesKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_DPB_SLOT_INFO_KHR, VkVideoDecodeH265DpbSlotInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PICTURE_INFO_KHR, VkVideoDecodeH265PictureInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_H265_PROFILE_INFO_KHR, VkVideoDecodeH265ProfileInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_DECODE_USAGE_INFO_KHR, VkVideoDecodeUsageInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_VIDEO_PROFILE_INFO_KHR, VkVideoProfileInfoKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, VkWriteDescriptorSetAccelerationStructureKHR>,
  converters<VkStructureType::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV, VkWriteDescriptorSetAccelerationStructureNV>,
};

static void default_fex_custom_repack_entry(VkBaseOutStructure& into, const guest_layout<VkBaseOutStructure>* from) {
  if (!from->data.pNext.get_pointer()) {
    into.pNext = nullptr;
    return;
  }
  auto typed_source = reinterpret_cast<const guest_layout<VkBaseOutStructure>*>(from->data.pNext.get_pointer());

  auto next_handler = next_handlers.find(static_cast<VkStructureType>(typed_source->data.sType.data));
  if (next_handler == next_handlers.end()) {
    fprintf(stderr, "ERROR: Unrecognized VkStructureType %u referenced by pNext\n", typed_source->data.sType.data);
    std::abort();
  }

  into.pNext = next_handler->second.first(typed_source);
}

template<typename T>
void default_fex_custom_repack_entry(host_layout<T>& into, const guest_layout<T>& from) {
  default_fex_custom_repack_entry(*(VkBaseOutStructure*)&into.data, reinterpret_cast<const guest_layout<VkBaseOutStructure>*>(&from));
}

static void default_fex_custom_repack_reverse(guest_layout<VkBaseOutStructure>& into, const VkBaseOutStructure* from) {
  auto pNextHost = from->pNext;
  if (!pNextHost) {
    return;
  }

  auto next_handler = next_handlers.find(static_cast<VkStructureType>(into.data.pNext.get_pointer()->data.sType.data));
  if (next_handler == next_handlers.end()) {
    fprintf(stderr, "ERROR: Unrecognized VkStructureType %u referenced by pNext when converting to guest\n", from->sType);
    std::abort();
  }
  next_handler->second.second((void*)into.data.pNext.get_pointer(), from->pNext);

  free(pNextHost);
}

// Default repacking functions that only traverses and repacks the pNext chain.
// If other members need to be repacked, use VULKAN_NONDEFAULT_CUSTOM_REPACK instead
#define VULKAN_DEFAULT_CUSTOM_REPACK(name)                                                             \
  void fex_custom_repack_entry(host_layout<name>& into, const guest_layout<name>& from) {              \
    default_fex_custom_repack_entry(reinterpret_cast<VkBaseOutStructure&>(into.data),                  \
                                    &reinterpret_cast<const guest_layout<VkBaseOutStructure>&>(from)); \
  }                                                                                                    \
                                                                                                       \
  bool fex_custom_repack_exit(guest_layout<name>& into, const host_layout<name>& from) {               \
    auto prev_next = into.data.pNext;                                                                  \
    default_fex_custom_repack_reverse(*reinterpret_cast<guest_layout<VkBaseOutStructure>*>(&into),     \
                                      &reinterpret_cast<const VkBaseOutStructure&>(from.data));        \
    into = to_guest(from);                                                                             \
    into.data.pNext = prev_next;                                                                       \
    return true;                                                                                       \
  }

// Intentionally left empty. This macro doesn't automate anything, but it
// helps ensure we don't forget any Vulkan types in the list. The actual
// repacking functions are defined manually later
#define VULKAN_NONDEFAULT_CUSTOM_REPACK(name)

// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureBuildGeometryInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureBuildSizesInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureCaptureDescriptorDataInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureDeviceAddressInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureGeometryAabbsDataKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureGeometryInstancesDataKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureGeometryKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureGeometryMotionTrianglesDataNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureGeometryTrianglesDataKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureMemoryRequirementsInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureMotionInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureTrianglesOpacityMicromapEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAccelerationStructureVersionInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAcquireNextImageInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAcquireProfilingLockInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAmigoProfilingSubmitInfoSEC)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkAntiLagDataAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAntiLagPresentationInfoAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkApplicationInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentDescription2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentDescriptionStencilLayout)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentFeedbackLoopInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentReference2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentReferenceStencilLayout)
VULKAN_DEFAULT_CUSTOM_REPACK(VkAttachmentSampleCountInfoAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBeginCustomResolveInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindAccelerationStructureMemoryInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindBufferMemoryDeviceGroupInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindBufferMemoryInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindDataGraphPipelineSessionMemoryInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindDescriptorBufferEmbeddedSamplersInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindDescriptorSetsInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindImageMemoryDeviceGroupInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindImageMemoryInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindImageMemorySwapchainInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindImagePlaneMemoryInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindMemoryStatus)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkBindSparseInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindTensorMemoryInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBindVideoSessionMemoryInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBlitImageCubicWeightsInfoQCOM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkBlitImageInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferCaptureDescriptorDataInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferCopy2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferDeviceAddressCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferDeviceAddressInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferImageCopy2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferMemoryBarrier)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferMemoryBarrier2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferMemoryRequirementsInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferOpaqueCaptureAddressCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferUsageFlags2CreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBufferViewCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkBuildPartitionedAccelerationStructureInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCalibratedTimestampInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCheckpointData2NV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCheckpointDataNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkClusterAccelerationStructureClustersBottomLevelInputNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkClusterAccelerationStructureMoveObjectsInputNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkClusterAccelerationStructureTriangleClusterInputNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferAllocateInfo)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkCommandBufferBeginInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferInheritanceConditionalRenderingInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferInheritanceInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferInheritanceRenderingInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferInheritanceRenderPassTransformInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferInheritanceViewportScissorInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandBufferSubmitInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCommandPoolCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkComputeOccupancyPriorityParametersNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkComputePipelineCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkComputePipelineIndirectBufferInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkConditionalRenderingBeginInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCooperativeMatrixFlexibleDimensionsPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCooperativeMatrixPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCooperativeMatrixPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCooperativeVectorPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyAccelerationStructureInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyAccelerationStructureToMemoryInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyBufferInfo2)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyBufferToImageInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyCommandTransformInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyDescriptorSet)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyImageInfo2)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyImageToBufferInfo2)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyImageToImageInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyImageToMemoryInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMemoryIndirectInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMemoryToAccelerationStructureInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMemoryToImageIndirectInfoKHR)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkCopyMemoryToImageInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMemoryToMicromapInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMicromapInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyMicromapToMemoryInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCopyTensorInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCuFunctionCreateInfoNVX)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCuLaunchInfoNVX)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkCuModuleCreateInfoNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCuModuleTexturingModeCreateInfoNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkCustomResolveCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineBuiltinModelCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineCompilerControlCreateInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineConstantARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineConstantTensorSemiStructuredSparsityInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineDispatchInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineIdentifierCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelinePropertyQueryResultARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineResourceInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineSessionBindPointRequirementARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineSessionBindPointRequirementsInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineSessionCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineSessionMemoryRequirementsInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphPipelineShaderModuleCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDataGraphProcessingEngineCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugMarkerMarkerInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugMarkerObjectNameInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugMarkerObjectTagInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugReportCallbackCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugUtilsLabelEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugUtilsMessengerCallbackDataEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugUtilsMessengerCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugUtilsObjectNameInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDebugUtilsObjectTagInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDecompressMemoryInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDedicatedAllocationBufferCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDedicatedAllocationImageCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDedicatedAllocationMemoryAllocateInfoNV)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkDependencyInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDepthBiasInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDepthBiasRepresentationInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorAddressInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorBufferBindingInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorBufferBindingPushDescriptorBufferHandleEXT)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkDescriptorGetInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorGetTensorInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorPoolCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorPoolInlineUniformBlockCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetBindingReferenceVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetLayoutBindingFlagsCreateInfo)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkDescriptorSetLayoutCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetLayoutHostMappingInfoVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetLayoutSupport)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetVariableDescriptorCountAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDescriptorSetVariableDescriptorCountLayoutSupport)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkDescriptorUpdateTemplateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceAddressBindingCallbackDataEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceBufferMemoryRequirements)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkDeviceCreateInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceDeviceMemoryReportCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceDiagnosticsConfigCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceEventInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceFaultCountsEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceFaultInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupBindSparseInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupCommandBufferBeginInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupDeviceCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupPresentCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupPresentInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupRenderPassBeginInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupSubmitInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceGroupSwapchainCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceImageMemoryRequirements)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceImageSubresourceInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceMemoryOpaqueCaptureAddressInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceMemoryOverallocationCreateInfoAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceMemoryReportCallbackDataEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDevicePipelineBinaryInternalCacheControlKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDevicePrivateDataCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceQueueCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceQueueGlobalPriorityCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceQueueInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceQueueShaderCoreControlCreateInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDeviceTensorMemoryRequirementsARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDirectDriverLoadingInfoLUNARG)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDirectDriverLoadingListLUNARG)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDispatchTileInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayEventInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayModeCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayModeProperties2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayModeStereoPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayNativeHdrSurfaceCapabilitiesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayPlaneCapabilities2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayPlaneInfo2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayPlaneProperties2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayPowerInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayPresentInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplayProperties2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplaySurfaceCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkDisplaySurfaceStereoCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDrmFormatModifierPropertiesList2EXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkDrmFormatModifierPropertiesListEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkEventCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExportFenceCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExportMemoryAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExportMemoryAllocateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExportSemaphoreCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalBufferProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalComputeQueueCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalComputeQueueDataParamsNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalComputeQueueDeviceCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalFenceProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalImageFormatProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalMemoryAcquireUnmodifiedEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalMemoryBufferCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalMemoryImageCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalMemoryImageCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalMemoryTensorCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalSemaphoreProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkExternalTensorPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFenceCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFenceGetFdInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFilterCubicImageViewImageFormatPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFormatProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFormatProperties3)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkFragmentShadingRateAttachmentInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkFrameBoundaryEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFrameBoundaryTensorsARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFramebufferAttachmentImageInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkFramebufferAttachmentsCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFramebufferCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkFramebufferMixedSamplesCombinationNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsMemoryRequirementsInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsMemoryRequirementsInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsPipelineInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeneratedCommandsShaderInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeometryAABBNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeometryNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGeometryTrianglesNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkGetLatencyMarkerInfoNV)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkGraphicsPipelineCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkGraphicsPipelineLibraryCreateInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkGraphicsPipelineShaderGroupsCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkGraphicsShaderGroupCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkHdrVividDynamicMetadataHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkHeadlessSurfaceCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkHostImageCopyDevicePerformanceQuery)
VULKAN_DEFAULT_CUSTOM_REPACK(VkHostImageLayoutTransitionInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageAlignmentControlCreateInfoMESA)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageBlit2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageCaptureDescriptorDataInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageCompressionControlEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageCompressionPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageCopy2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageCreateInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkImageDrmFormatModifierExplicitCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageDrmFormatModifierListCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageDrmFormatModifierPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageFormatListCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageFormatProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageMemoryBarrier)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageMemoryBarrier2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageMemoryRequirementsInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImagePlaneMemoryRequirementsInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageResolve2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageSparseMemoryRequirementsInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageStencilUsageCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageSubresource2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageSwapchainCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkImageToMemoryCopyEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewAddressPropertiesNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewASTCDecodeModeEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewCaptureDescriptorDataInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewHandleInfoNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewMinLodCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewSampleWeightCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewSlicedCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImageViewUsageCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImportFenceFdInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImportMemoryFdInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkImportMemoryHostPointerInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkImportSemaphoreFdInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkIndirectCommandsLayoutCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkIndirectCommandsLayoutTokenNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkIndirectExecutionSetPipelineInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkIndirectExecutionSetShaderInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkIndirectExecutionSetShaderLayoutInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkInitializePerformanceApiInfoINTEL)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkInstanceCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkLatencySleepInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkLatencySleepModeInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkLatencySubmissionPresentIdNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkLatencySurfaceCapabilitiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkLatencyTimingsFrameReportNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkLayerSettingsCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMappedMemoryRange)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryAllocateFlagsInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryBarrier)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryBarrier2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryBarrierAccessFlags3KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryDedicatedAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryDedicatedAllocateInfoTensorARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryDedicatedRequirements)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryFdPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryGetFdInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryGetRemoteAddressInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryHostPointerPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryMapInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryMapPlacedInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryOpaqueCaptureAddressAllocateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryPriorityAllocateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryRequirements2)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkMemoryToImageCopy)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMemoryUnmapInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkMicromapBuildInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMicromapBuildSizesInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMicromapCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMicromapVersionInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMultisampledRenderToSingleSampledInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMultisamplePropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMultiviewPerViewAttributesInfoNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkMultiviewPerViewRenderAreasRenderPassBeginInfoQCOM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkMutableDescriptorTypeCreateInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkOpaqueCaptureDescriptorDataCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkOpticalFlowExecuteInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkOpticalFlowImageFormatInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkOpticalFlowImageFormatPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkOpticalFlowSessionCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkOpticalFlowSessionCreatePrivateDataInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkOutOfBandQueueTypeInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPartitionedAccelerationStructureFlagsNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPartitionedAccelerationStructureInstancesInputNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPastPresentationTimingEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPastPresentationTimingInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPastPresentationTimingPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceConfigurationAcquireInfoINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceCounterARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceCounterDescriptionARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceCounterDescriptionKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceCounterKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceMarkerInfoINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceOverrideInfoINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceQuerySubmitInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerformanceStreamMarkerInfoINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerTileBeginInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPerTileEndInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevice16BitStorageFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevice4444FormatsFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevice8BitStorageFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAccelerationStructureFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAccelerationStructurePropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAddressBindingReportFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAmigoProfilingFeaturesSEC)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAntiLagFeaturesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceASTCDecodeFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceBorderColorSwizzleFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceBufferDeviceAddressFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceClusterAccelerationStructureFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceClusterAccelerationStructurePropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceClusterCullingShaderFeaturesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceClusterCullingShaderPropertiesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceClusterCullingShaderVrsFeaturesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCoherentMemoryFeaturesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceColorWriteEnableFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCommandBufferInheritanceFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceComputeOccupancyPriorityFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceComputeShaderDerivativesPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceConditionalRenderingFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceConservativeRasterizationPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrix2FeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrix2PropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrixFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrixFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrixPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeMatrixPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeVectorFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCooperativeVectorPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCopyMemoryIndirectFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCopyMemoryIndirectFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCopyMemoryIndirectPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCornerSampledImageFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCoverageReductionModeFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCubicClampFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCubicWeightsFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCustomBorderColorFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCustomBorderColorPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceCustomResolveFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDataGraphFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDataGraphModelFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthBiasControlFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthClampControlFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthClampZeroOneFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthClipControlFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthClipEnableFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDepthStencilResolveProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorBufferDensityMapPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorBufferFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorBufferPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorBufferTensorFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorBufferTensorPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorIndexingFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorIndexingProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorPoolOverallocationFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceGeneratedCommandsComputeFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDeviceMemoryReportFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDiagnosticsConfigFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDiscardRectanglePropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDriverProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDrmPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDynamicRenderingFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDynamicRenderingLocalReadFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExclusiveScissorFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedDynamicState2FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedDynamicState3FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedDynamicState3PropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedSparseAddressSpaceFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExtendedSparseAddressSpacePropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalBufferInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalComputeQueuePropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalFenceInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalImageFormatInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalMemoryHostPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalMemoryRDMAFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalSemaphoreInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceExternalTensorInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFaultFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFeatures2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFloatControlsProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFormatPackFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMap2FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMap2PropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapLayeredFeaturesVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapLayeredPropertiesVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapOffsetFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapOffsetPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentDensityMapPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShadingRateEnumsFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShadingRateEnumsPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShadingRateFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShadingRateKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFragmentShadingRatePropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceFrameBoundaryFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceGlobalPriorityQueryFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceGroupProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceHdrVividFeaturesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceHostImageCopyFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceHostImageCopyProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceHostQueryResetFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceIDProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImage2DViewOf3DFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageAlignmentControlFeaturesMESA)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageAlignmentControlPropertiesMESA)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageCompressionControlFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageCompressionControlSwapchainFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageDrmFormatModifierInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageFormatInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImagelessFramebufferFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageProcessing2FeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageProcessing2PropertiesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageProcessingFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageProcessingPropertiesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageRobustnessFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageSlicedViewOf3DFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageViewImageFormatInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceImageViewMinLodFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceIndexTypeUint8Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceInheritedViewportScissorFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceInlineUniformBlockFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceInlineUniformBlockProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceInvocationMaskFeaturesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLayeredApiPropertiesKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLayeredApiPropertiesListKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLayeredApiVulkanPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLayeredDriverPropertiesMSFT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLegacyDitheringFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLegacyVertexAttributesFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLegacyVertexAttributesPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLinearColorAttachmentFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLineRasterizationFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceLineRasterizationProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance10FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance10PropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance3Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance4Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance4Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance5Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance5Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance6Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance6Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance7FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance7PropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance8FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance9FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMaintenance9PropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMapMemoryPlacedFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMapMemoryPlacedPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMemoryBudgetPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMemoryDecompressionFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMemoryDecompressionPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMemoryPriorityFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMemoryProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMeshShaderFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMeshShaderFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMeshShaderPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMeshShaderPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiDrawFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiDrawPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultisampledRenderToSingleSampledFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiviewFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiviewPerViewRenderAreasFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiviewPerViewViewportsFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMultiviewProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceNestedCommandBufferFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceNestedCommandBufferPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceOpacityMicromapFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceOpacityMicromapPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceOpticalFlowFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceOpticalFlowPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePCIBusInfoPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePerformanceCountersByRegionFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePerformanceCountersByRegionPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePerformanceQueryFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePerformanceQueryPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePerStageDescriptorSetFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineBinaryFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineBinaryPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineCacheIncrementalModeFeaturesSEC)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineCreationCacheControlFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineLibraryGroupHandlesFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineOpacityMicromapFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelinePropertiesFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineProtectedAccessFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineRobustnessFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePipelineRobustnessProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePointClippingProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentBarrierFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentId2FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentIdFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentMeteringFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentModeFifoLatestReadyFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentTimingFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentWait2FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePresentWaitFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePrivateDataFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceProtectedMemoryFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceProtectedMemoryProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceProvokingVertexFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceProvokingVertexPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDevicePushDescriptorProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceQueueFamilyDataGraphProcessingEngineInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRawAccessChainsFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayQueryFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingMotionBlurFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingPipelineFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingPipelinePropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingPositionFetchFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRayTracingValidationFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRelaxedLineRasterizationFeaturesIMG)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRenderPassStripedFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRenderPassStripedPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRGBA10X6FormatsFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRobustness2FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceRobustness2PropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSampleLocationsPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSamplerFilterMinmaxProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSamplerYcbcrConversionFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceScalarBlockLayoutFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSchedulingControlsFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSchedulingControlsPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShader64BitIndexingFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderAtomicFloat16VectorFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderAtomicInt64Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderBfloat16FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderClockFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderCoreBuiltinsFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderCoreBuiltinsPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderCoreProperties2AMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderCorePropertiesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderCorePropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderDrawParametersFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderEarlyAndLateFragmentTestsFeaturesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderExpectAssumeFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderFloat16Int8Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderFloat8FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderFloatControls2Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderFmaFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderImageFootprintFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderIntegerDotProductFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderIntegerDotProductProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderLongVectorFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderLongVectorPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderMaximalReconvergenceFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderObjectFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderObjectPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderQuadControlFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderReplicatedCompositesFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderSMBuiltinsFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderSMBuiltinsPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderSubgroupRotateFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderTerminateInvocationFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderTileImageFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderTileImagePropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderUniformBufferUnsizedArrayFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShaderUntypedPointersFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShadingRateImageFeaturesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceShadingRateImagePropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSparseImageFormatInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubgroupProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubgroupSizeControlFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubgroupSizeControlProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubpassMergeFeedbackFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubpassShadingFeaturesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSubpassShadingPropertiesHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSurfaceInfo2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSwapchainMaintenance1FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceSynchronization2Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTensorFeaturesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTensorPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTexelBufferAlignmentProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTextureCompressionASTC3DFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTextureCompressionASTCHDRFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTileMemoryHeapFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTileMemoryHeapPropertiesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTilePropertiesFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTileShadingFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTileShadingPropertiesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTimelineSemaphoreFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTimelineSemaphoreProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceToolProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTransformFeedbackFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceTransformFeedbackPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceUnifiedImageLayoutsFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceUniformBufferStandardLayoutFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVariablePointersFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVertexAttributeDivisorFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVertexAttributeDivisorProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVertexAttributeRobustnessFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoDecodeVP9FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoEncodeAV1FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoEncodeIntraRefreshFeaturesKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoEncodeQuantizationMapFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoEncodeRgbConversionFeaturesVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoFormatInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoMaintenance1FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVideoMaintenance2FeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan11Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan11Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan12Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan12Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan13Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan13Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan14Features)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkan14Properties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceVulkanMemoryModelFeatures)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceYcbcr2Plane444FormatsFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceYcbcrDegammaFeaturesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceZeroInitializeDeviceMemoryFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeatures)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineBinaryCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineBinaryDataInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineBinaryHandlesInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineBinaryInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineBinaryKeyKHR)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkPipelineCacheCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineColorBlendAdvancedStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineColorBlendStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineColorWriteCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCompilerControlCreateInfoAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCoverageModulationStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCoverageReductionStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCoverageToColorStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCreateFlags2CreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineCreationFeedbackCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineDepthStencilStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineDiscardRectangleStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineDynamicStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineExecutableInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineExecutableInternalRepresentationKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineExecutablePropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineExecutableStatisticKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineFragmentDensityMapLayeredCreateInfoVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineFragmentShadingRateEnumStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineFragmentShadingRateStateCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineIndirectDeviceAddressInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineInputAssemblyStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineLayoutCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineLibraryCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineMultisampleStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelinePropertiesIdentifierEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationConservativeStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationDepthClipStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationLineStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationProvokingVertexStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationStateRasterizationOrderAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRasterizationStateStreamCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRenderingCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRepresentativeFragmentTestStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineRobustnessCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineSampleLocationsStateCreateInfoEXT)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkPipelineShaderStageCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineShaderStageModuleIdentifierCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineShaderStageRequiredSubgroupSizeCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineTessellationDomainOriginStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineTessellationStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineVertexInputDivisorStateCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineVertexInputStateCreateInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportCoarseSampleOrderStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportDepthClampControlCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportDepthClipControlCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportExclusiveScissorStateCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportShadingRateImageStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportStateCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportSwizzleStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPipelineViewportWScalingStateCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentId2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentIdKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentRegionsKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentTimesInfoGOOGLE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentTimingInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentTimingsInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentTimingSurfaceCapabilitiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPresentWait2InfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkPrivateDataSlotCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkProtectedSubmitInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPushConstantsInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPushDescriptorSetInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkPushDescriptorSetWithTemplateInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkQueryLowLatencySupportNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueryPoolCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueryPoolPerformanceCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueryPoolPerformanceQueryCreateInfoINTEL)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueryPoolVideoEncodeFeedbackCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyCheckpointProperties2NV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyCheckpointPropertiesNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyDataGraphProcessingEnginePropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyDataGraphPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyGlobalPriorityProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyOwnershipTransferPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyQueryResultStatusPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkQueueFamilyVideoPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingPipelineCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingPipelineCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingPipelineInterfaceCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingShaderGroupCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRayTracingShaderGroupCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkReleaseCapturedPipelineDataInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkReleaseSwapchainImagesInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingAreaInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingAttachmentFlagsInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingAttachmentInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingAttachmentLocationInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingEndInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingFragmentDensityMapAttachmentInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingFragmentShadingRateAttachmentInfoKHR)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkRenderingInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderingInputAttachmentIndexInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassAttachmentBeginInfo)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkRenderPassBeginInfo)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkRenderPassCreateInfo)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkRenderPassCreateInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassCreationControlEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassCreationFeedbackCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassFragmentDensityMapCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassFragmentDensityMapOffsetEndInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassInputAttachmentAspectCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassMultiviewCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassPerformanceCountersByRegionBeginInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassSampleLocationsBeginInfoEXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassStripeBeginInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassStripeInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassStripeSubmitInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassSubpassFeedbackCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassTileShadingCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkRenderPassTransformBeginInfoQCOM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkResolveImageInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkResolveImageModeInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSampleLocationsInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerBlockMatchWindowCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerBorderColorComponentMappingCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerCaptureDescriptorDataInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerCubicWeightsCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerCustomBorderColorCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerReductionModeCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerYcbcrConversionCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerYcbcrConversionImageFormatProperties)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerYcbcrConversionInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSamplerYcbcrConversionYcbcrDegammaCreateInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreGetFdInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreSignalInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreSubmitInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreTypeCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSemaphoreWaitInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSetDescriptorBufferOffsetsInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSetLatencyMarkerInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSetPresentConfigNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkShaderCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkShaderModuleCreateInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkShaderModuleIdentifierEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkShaderModuleValidationCacheCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSharedPresentSurfaceCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSparseImageFormatProperties2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSparseImageMemoryRequirements2)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkSubmitInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkSubmitInfo2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassBeginInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassDependency2)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkSubpassDescription2)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassDescriptionDepthStencilResolve)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassEndInfo)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassResolvePerformanceQueryEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubpassShadingPipelineCreateInfoHUAWEI)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubresourceHostMemcpySize)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSubresourceLayout2)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceCapabilities2EXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceCapabilities2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceCapabilitiesPresentBarrierNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceCapabilitiesPresentId2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceCapabilitiesPresentWait2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceFormat2KHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfacePresentModeCompatibilityKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfacePresentModeKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfacePresentScalingCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSurfaceProtectedCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainCalibratedTimestampInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainCounterCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainDisplayNativeHdrCreateInfoAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainLatencyCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainPresentBarrierCreateInfoNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainPresentFenceInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainPresentModeInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainPresentModesCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainPresentScalingCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainTimeDomainPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkSwapchainTimingPropertiesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorCaptureDescriptorDataInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorCopyARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorCreateInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorDependencyInfoARM)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorDescriptionARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorFormatPropertiesARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorMemoryBarrierARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorMemoryRequirementsInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorViewCaptureDescriptorDataInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTensorViewCreateInfoARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTextureLODGatherFormatPropertiesAMD)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTileMemoryBindInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTileMemoryRequirementsQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTileMemorySizeInfoQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTilePropertiesQCOM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkTimelineSemaphoreSubmitInfo)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkValidationCacheCreateInfoEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkValidationFeaturesEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkValidationFlagsEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVertexInputAttributeDescription2EXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVertexInputBindingDescription2EXT)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoBeginCodingInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoCodingControlInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeAV1CapabilitiesKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeAV1DpbSlotInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeAV1InlineSessionParametersInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeAV1ProfileInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeAV1SessionParametersCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264CapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264DpbSlotInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264PictureInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264ProfileInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264SessionParametersAddInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH264SessionParametersCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265CapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265DpbSlotInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265PictureInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265ProfileInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265SessionParametersAddInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeH265SessionParametersCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeUsageInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeVP9CapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoDecodeVP9ProfileInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1CapabilitiesKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1DpbSlotInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1GopRemainingFrameInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1ProfileInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1QualityLevelPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1QuantizationMapCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1RateControlInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1RateControlLayerInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1SessionCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeAV1SessionParametersCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264CapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264DpbSlotInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264GopRemainingFrameInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264ProfileInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264QualityLevelPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264QuantizationMapCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264RateControlInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264RateControlLayerInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264SessionCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264SessionParametersFeedbackInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH264SessionParametersGetInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265CapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265DpbSlotInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265GopRemainingFrameInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265ProfileInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265QualityLevelPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265QuantizationMapCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265RateControlInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265RateControlLayerInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265SessionCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265SessionParametersFeedbackInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeH265SessionParametersGetInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeIntraRefreshCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeIntraRefreshInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeProfileRgbConversionInfoVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeQualityLevelInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeQualityLevelPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeQuantizationMapCapabilitiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeQuantizationMapInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeQuantizationMapSessionParametersCreateInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeRateControlInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeRateControlLayerInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeRgbConversionCapabilitiesVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeSessionIntraRefreshCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeSessionParametersFeedbackInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeSessionParametersGetInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeSessionRgbConversionCreateInfoVALVE)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEncodeUsageInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoEndCodingInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoFormatAV1QuantizationMapPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoFormatH265QuantizationMapPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoFormatPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoFormatQuantizationMapPropertiesKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoInlineQueryInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoPictureResourceInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoProfileInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoProfileListInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoReferenceIntraRefreshInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoReferenceSlotInfoKHR)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoSessionCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoSessionMemoryRequirementsKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoSessionParametersCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkVideoSessionParametersUpdateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWaylandSurfaceCreateInfoKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSet) // TODO: This should be non-default instead
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSetAccelerationStructureKHR)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSetAccelerationStructureNV)
// VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSetInlineUniformBlock)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSetPartitionedAccelerationStructureNV)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteDescriptorSetTensorARM)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteIndirectExecutionSetPipelineEXT)
VULKAN_DEFAULT_CUSTOM_REPACK(VkWriteIndirectExecutionSetShaderEXT)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkXcbSurfaceCreateInfoKHR)
VULKAN_NONDEFAULT_CUSTOM_REPACK(VkXlibSurfaceCreateInfoKHR)


void fex_custom_repack_entry(host_layout<VkInstanceCreateInfo>& into, const guest_layout<VkInstanceCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  auto HostApplicationInfo = new host_layout<VkApplicationInfo> {*from.data.pApplicationInfo.get_pointer()};
  fex_apply_custom_repacking_entry(*HostApplicationInfo, *from.data.pApplicationInfo.get_pointer());

  into.data.pApplicationInfo = &HostApplicationInfo->data;

  auto extension_count = from.data.enabledExtensionCount.data;
  into.data.ppEnabledExtensionNames = RepackStructArray<false>(extension_count, from.data.ppEnabledExtensionNames).data();

  auto layer_count = from.data.enabledLayerCount.data;
  into.data.ppEnabledLayerNames = RepackStructArray<false>(layer_count, from.data.ppEnabledLayerNames).data();
}

bool fex_custom_repack_exit(guest_layout<VkInstanceCreateInfo>& into, const host_layout<VkInstanceCreateInfo>& from) {
  delete from.data.pApplicationInfo;
  delete[] from.data.ppEnabledExtensionNames;
  delete[] from.data.ppEnabledLayerNames;
  return false;
}

void fex_custom_repack_entry(host_layout<VkMemoryToImageCopyEXT>& into, const guest_layout<VkMemoryToImageCopyEXT>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pHostPointer = from.data.pHostPointer.get_pointer();
}

bool fex_custom_repack_exit(guest_layout<VkMemoryToImageCopyEXT>& into, const host_layout<VkMemoryToImageCopyEXT>& from) {
  return false;
}

void fex_custom_repack_entry(host_layout<VkDeviceCreateInfo>& into, const guest_layout<VkDeviceCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  auto HostQueueCreateInfo = new host_layout<VkDeviceQueueCreateInfo> {*from.data.pQueueCreateInfos.get_pointer()};
  fex_apply_custom_repacking_entry(*HostQueueCreateInfo, *from.data.pQueueCreateInfos.get_pointer());
  into.data.pQueueCreateInfos = &HostQueueCreateInfo->data;

  auto layer_count = from.data.enabledExtensionCount.data;
  fprintf(stderr, "  Repacking %d ppEnabledLayerNames\n", layer_count);
  into.data.ppEnabledLayerNames = RepackStructArray<false>(layer_count, from.data.ppEnabledLayerNames).data();

  auto extension_count = from.data.enabledExtensionCount.data;
  fprintf(stderr, "  Repacking %d ppEnabledExtensionNames\n", extension_count);
  into.data.ppEnabledExtensionNames = RepackStructArray<false>(extension_count, from.data.ppEnabledExtensionNames).data();
}

bool fex_custom_repack_exit(guest_layout<VkDeviceCreateInfo>& into, const host_layout<VkDeviceCreateInfo>& from) {
  delete from.data.pQueueCreateInfos;
  delete[] from.data.ppEnabledExtensionNames;
  delete[] from.data.ppEnabledLayerNames;
  return false;
}

void fex_custom_repack_entry(host_layout<VkDescriptorSetLayoutCreateInfo>& into, const guest_layout<VkDescriptorSetLayoutCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pBindings = RepackStructArray(from.data.bindingCount.data, from.data.pBindings).data();
}

bool fex_custom_repack_exit(guest_layout<VkDescriptorSetLayoutCreateInfo>& into, const host_layout<VkDescriptorSetLayoutCreateInfo>& from) {
  delete[] from.data.pBindings;
  return false;
}

void fex_custom_repack_entry(host_layout<VkRenderPassCreateInfo>& into, const guest_layout<VkRenderPassCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pSubpasses = RepackStructArray(from.data.subpassCount.data, from.data.pSubpasses).data();
}

bool fex_custom_repack_exit(guest_layout<VkRenderPassCreateInfo>& into, const host_layout<VkRenderPassCreateInfo>& from) {
  delete[] from.data.pSubpasses;
  return false;
}

void fex_custom_repack_entry(host_layout<VkRenderPassCreateInfo2>& into, const guest_layout<VkRenderPassCreateInfo2>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pAttachments = RepackStructArray(from.data.attachmentCount.data, from.data.pAttachments).data();
  into.data.pSubpasses = RepackStructArray(from.data.subpassCount.data, from.data.pSubpasses).data();
  into.data.pDependencies = RepackStructArray(from.data.dependencyCount.data, from.data.pDependencies).data();
}

bool fex_custom_repack_exit(guest_layout<VkRenderPassCreateInfo2>& into, const host_layout<VkRenderPassCreateInfo2>& from) {
  DeleteRepackedStructArray(from.data.attachmentCount, from.data.pAttachments, into.data.pAttachments);
  DeleteRepackedStructArray(from.data.subpassCount, from.data.pSubpasses, into.data.pSubpasses);
  DeleteRepackedStructArray(from.data.dependencyCount, from.data.pDependencies, into.data.pDependencies);
  return false;
}

void fex_custom_repack_entry(host_layout<VkSubpassDescription2>& into, const guest_layout<VkSubpassDescription2>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pInputAttachments = RepackStructArray(from.data.inputAttachmentCount.data, from.data.pInputAttachments).data();
  into.data.pColorAttachments = RepackStructArray(from.data.colorAttachmentCount.data, from.data.pColorAttachments).data();
  into.data.pResolveAttachments = RepackStructArray(from.data.colorAttachmentCount.data, from.data.pResolveAttachments).data();

  if (from.data.pDepthStencilAttachment.data == 0) {
    into.data.pDepthStencilAttachment = nullptr;
  } else {
    into.data.pDepthStencilAttachment = new VkAttachmentReference2;
    auto in_data = host_layout<VkAttachmentReference2> {*from.data.pDepthStencilAttachment.get_pointer()};
    fex_apply_custom_repacking_entry(in_data, *from.data.pDepthStencilAttachment.get_pointer());
    memcpy((void*)into.data.pDepthStencilAttachment, &in_data.data, sizeof(VkAttachmentReference2));
  }
}

bool fex_custom_repack_exit(guest_layout<VkSubpassDescription2>& into, const host_layout<VkSubpassDescription2>& from) {
  DeleteRepackedStructArray(from.data.inputAttachmentCount, from.data.pInputAttachments, into.data.pInputAttachments);
  DeleteRepackedStructArray(from.data.colorAttachmentCount, from.data.pColorAttachments, into.data.pColorAttachments);
  DeleteRepackedStructArray(from.data.colorAttachmentCount, from.data.pResolveAttachments, into.data.pResolveAttachments);
  if (from.data.pDepthStencilAttachment) {
    fex_apply_custom_repacking_exit(*into.data.pDepthStencilAttachment.get_pointer(), to_host_layout(*from.data.pDepthStencilAttachment));
    delete from.data.pDepthStencilAttachment;
  }
  return false;
}

void fex_custom_repack_entry(host_layout<VkRenderingInfo>& into, const guest_layout<VkRenderingInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  into.data.pColorAttachments = RepackStructArray(from.data.colorAttachmentCount.data, from.data.pColorAttachments).data();

  if (from.data.pDepthAttachment.get_pointer() == nullptr) {
    into.data.pDepthAttachment = nullptr;
  } else {
    into.data.pDepthAttachment = new VkRenderingAttachmentInfo;
    auto in_data = host_layout<VkRenderingAttachmentInfo> {*from.data.pDepthAttachment.get_pointer()};
    fex_apply_custom_repacking_entry(in_data, *from.data.pDepthAttachment.get_pointer());
    memcpy((void*)into.data.pDepthAttachment, &in_data.data, sizeof(VkRenderingAttachmentInfo));
  }

  if (from.data.pStencilAttachment.get_pointer() == nullptr) {
    into.data.pStencilAttachment = nullptr;
  } else {
    into.data.pStencilAttachment = new VkRenderingAttachmentInfo;
    auto in_data = host_layout<VkRenderingAttachmentInfo> {*from.data.pStencilAttachment.get_pointer()};
    fex_apply_custom_repacking_entry(in_data, *from.data.pStencilAttachment.get_pointer());
    memcpy((void*)into.data.pStencilAttachment, &in_data.data, sizeof(VkRenderingAttachmentInfo));
  }
}

bool fex_custom_repack_exit(guest_layout<VkRenderingInfo>& into, const host_layout<VkRenderingInfo>& from) {
  DeleteRepackedStructArray(from.data.colorAttachmentCount, from.data.pColorAttachments, into.data.pColorAttachments);
  if (from.data.pDepthAttachment) {
    fex_apply_custom_repacking_exit(*into.data.pDepthAttachment.get_pointer(), to_host_layout(*from.data.pDepthAttachment));
    delete from.data.pDepthAttachment;
  }
  if (from.data.pStencilAttachment) {
    fex_apply_custom_repacking_exit(*into.data.pStencilAttachment.get_pointer(), to_host_layout(*from.data.pStencilAttachment));
    delete from.data.pStencilAttachment;
  }
  return false;
}

void fex_custom_repack_entry(host_layout<VkDescriptorGetInfoEXT>& into, const guest_layout<VkDescriptorGetInfoEXT>& from) {
  default_fex_custom_repack_entry(into, from);

  switch (into.data.type) {
  case VK_DESCRIPTOR_TYPE_SAMPLER:
  case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
  case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
  case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
  case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
    // VkSampler* or VkDescriptorImageInfo*. Handle by zero-extending
    guest_layout<VkSampler*> guest_data;
    memcpy(&guest_data, from.data.data.union_storage, sizeof(guest_data));
    into.data.data.pSampler = host_layout<VkSampler*> {guest_data}.data;
    break;
  }

  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
  case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
    // VkDescriptorAddressInfoEXT*. Repacking required
    guest_layout<VkDescriptorAddressInfoEXT*> guest_ptr;
    memcpy(&guest_ptr, from.data.data.union_storage, sizeof(guest_ptr));
    auto child_mem = (char*)aligned_alloc(alignof(host_layout<VkDescriptorAddressInfoEXT>), sizeof(host_layout<VkDescriptorAddressInfoEXT>));
    auto child = new (child_mem) host_layout<VkDescriptorAddressInfoEXT> {*guest_ptr.get_pointer()};

    default_fex_custom_repack_entry(*child, *guest_ptr.get_pointer());
    into.data.data.pUniformBuffer = &child->data;
    break;
  }

  case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
  case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV: {
    // Copy unmodified
    static_assert(sizeof(guest_layout<VkDeviceAddress>) == sizeof(uint64_t));
    memcpy(&into.data.data.accelerationStructure, &from.data.data, sizeof(uint64_t));
  }

  case VK_DESCRIPTOR_TYPE_SAMPLE_WEIGHT_IMAGE_QCOM:
  case VK_DESCRIPTOR_TYPE_BLOCK_MATCH_IMAGE_QCOM:
  case VK_DESCRIPTOR_TYPE_MUTABLE_EXT:
  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
  case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
  case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK:
  default: fprintf(stderr, "ERROR: Invalid descriptor type used in VkDescriptorGetInfoEXT"); std::abort();
  }
}

bool fex_custom_repack_exit(guest_layout<VkDescriptorGetInfoEXT>& into, const host_layout<VkDescriptorGetInfoEXT>& from) {
  switch (from.data.type) {
  case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
  case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
  case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
  case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
    // Delete storage allocated on entry
    free((void*)from.data.data.pUniformBuffer);

  default:
    // Nothing to do for the rest
    break;
  }
  return false;
}

void fex_custom_repack_entry(host_layout<VkCopyMemoryToImageInfoEXT>& into, const guest_layout<VkCopyMemoryToImageInfoEXT>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pRegions = RepackStructArray(from.data.regionCount.data, from.data.pRegions).data();
}

bool fex_custom_repack_exit(guest_layout<VkCopyMemoryToImageInfoEXT>& into, const host_layout<VkCopyMemoryToImageInfoEXT>& from) {
  DeleteRepackedStructArray(from.data.regionCount, from.data.pRegions, into.data.pRegions);
  return false;
}

void fex_custom_repack_entry(host_layout<VkDependencyInfo>& into, const guest_layout<VkDependencyInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pMemoryBarriers = RepackStructArray(from.data.memoryBarrierCount.data, from.data.pMemoryBarriers).data();
  into.data.pImageMemoryBarriers = RepackStructArray(from.data.imageMemoryBarrierCount.data, from.data.pImageMemoryBarriers).data();
  into.data.pBufferMemoryBarriers = RepackStructArray(from.data.bufferMemoryBarrierCount.data, from.data.pBufferMemoryBarriers).data();
}

bool fex_custom_repack_exit(guest_layout<VkDependencyInfo>& into, const host_layout<VkDependencyInfo>& from) {
  DeleteRepackedStructArray(from.data.memoryBarrierCount, from.data.pMemoryBarriers, into.data.pMemoryBarriers);
  DeleteRepackedStructArray(from.data.imageMemoryBarrierCount, from.data.pImageMemoryBarriers, into.data.pImageMemoryBarriers);
  DeleteRepackedStructArray(from.data.bufferMemoryBarrierCount, from.data.pBufferMemoryBarriers, into.data.pBufferMemoryBarriers);
  return false;
}

void fex_custom_repack_entry(host_layout<VkDescriptorUpdateTemplateCreateInfo>& into,
                             const guest_layout<VkDescriptorUpdateTemplateCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pDescriptorUpdateEntries = RepackStructArray(from.data.descriptorUpdateEntryCount.data, from.data.pDescriptorUpdateEntries).data();
}

bool fex_custom_repack_exit(guest_layout<VkDescriptorUpdateTemplateCreateInfo>& into, const host_layout<VkDescriptorUpdateTemplateCreateInfo>& from) {
  DeleteRepackedStructArray(from.data.descriptorUpdateEntryCount, from.data.pDescriptorUpdateEntries, into.data.pDescriptorUpdateEntries);
  return false;
}

void fex_custom_repack_entry(host_layout<VkPipelineShaderStageCreateInfo>& into, const guest_layout<VkPipelineShaderStageCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  if (from.data.pSpecializationInfo.get_pointer()) {
    fprintf(stderr, "ERROR: Cannot repack non-null VkPipelineShaderStageCreateInfo::pSpecializationInfo yet");
    std::abort();
  }
}

bool fex_custom_repack_exit(guest_layout<VkPipelineShaderStageCreateInfo>& into, const host_layout<VkPipelineShaderStageCreateInfo>& from) {
  // TODO
  return false;
}

void fex_custom_repack_entry(host_layout<VkGraphicsPipelineCreateInfo>& into, const guest_layout<VkGraphicsPipelineCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pStages = RepackStructArray(from.data.stageCount.data, from.data.pStages).data();

  if (!from.data.pVertexInputState.get_pointer()) {
    into.data.pVertexInputState = nullptr;
  } else {
    into.data.pVertexInputState = &(new host_layout<VkPipelineVertexInputStateCreateInfo> {*from.data.pVertexInputState.get_pointer()})->data;
  }

  if (!from.data.pInputAssemblyState.get_pointer()) {
    into.data.pInputAssemblyState = nullptr;
  } else {
    into.data.pInputAssemblyState =
      &(new host_layout<VkPipelineInputAssemblyStateCreateInfo> {*from.data.pInputAssemblyState.get_pointer()})->data;
  }

  if (!from.data.pTessellationState.get_pointer()) {
    into.data.pTessellationState = nullptr;
  } else {
    into.data.pTessellationState = &(new host_layout<VkPipelineTessellationStateCreateInfo> {*from.data.pTessellationState.get_pointer()})->data;
  }

  if (!from.data.pViewportState.get_pointer()) {
    into.data.pViewportState = nullptr;
  } else {
    into.data.pViewportState = &(new host_layout<VkPipelineViewportStateCreateInfo> {*from.data.pViewportState.get_pointer()})->data;
  }

  if (!from.data.pRasterizationState.get_pointer()) {
    into.data.pRasterizationState = nullptr;
  } else {
    into.data.pRasterizationState =
      &(new host_layout<VkPipelineRasterizationStateCreateInfo> {*from.data.pRasterizationState.get_pointer()})->data;
  }

  if (!from.data.pMultisampleState.get_pointer()) {
    into.data.pMultisampleState = nullptr;
  } else {
    into.data.pMultisampleState = &(new host_layout<VkPipelineMultisampleStateCreateInfo> {*from.data.pMultisampleState.get_pointer()})->data;
  }

  if (!from.data.pDepthStencilState.get_pointer()) {
    into.data.pDepthStencilState = nullptr;
  } else {
    into.data.pDepthStencilState = &(new host_layout<VkPipelineDepthStencilStateCreateInfo> {*from.data.pDepthStencilState.get_pointer()})->data;
  }

  if (!from.data.pColorBlendState.get_pointer()) {
    into.data.pColorBlendState = nullptr;
  } else {
    into.data.pColorBlendState = &(new host_layout<VkPipelineColorBlendStateCreateInfo> {*from.data.pColorBlendState.get_pointer()})->data;
  }

  if (!from.data.pDynamicState.get_pointer()) {
    into.data.pDynamicState = nullptr;
  } else {
    into.data.pDynamicState = &(new host_layout<VkPipelineDynamicStateCreateInfo> {*from.data.pDynamicState.get_pointer()})->data;
  }
}

bool fex_custom_repack_exit(guest_layout<VkGraphicsPipelineCreateInfo>& into, const host_layout<VkGraphicsPipelineCreateInfo>& from) {
  delete[] from.data.pStages;
  delete from.data.pVertexInputState;
  delete from.data.pInputAssemblyState;
  delete from.data.pTessellationState;
  delete from.data.pViewportState;
  delete from.data.pRasterizationState;
  delete from.data.pMultisampleState;
  delete from.data.pDepthStencilState;
  delete from.data.pColorBlendState;
  delete from.data.pDynamicState;
  return false;
}

void fex_custom_repack_entry(host_layout<VkSubmitInfo>& into, const guest_layout<VkSubmitInfo>& from) {
  default_fex_custom_repack_entry(into, from);
  into.data.pCommandBuffers = RepackStructArray<false>(from.data.commandBufferCount.data, from.data.pCommandBuffers).data();
}

bool fex_custom_repack_exit(guest_layout<VkSubmitInfo>& into, const host_layout<VkSubmitInfo>& from) {
  delete[] from.data.pCommandBuffers;
  return false;
}

void fex_custom_repack_entry(host_layout<VkCommandBufferBeginInfo>& into, const guest_layout<VkCommandBufferBeginInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  if (!from.data.pInheritanceInfo.get_pointer() || !from.data.pInheritanceInfo.data) {
    into.data.pInheritanceInfo = nullptr;
    return;
  }
  into.data.pInheritanceInfo = new VkCommandBufferInheritanceInfo;
  auto src = host_layout<VkCommandBufferInheritanceInfo> {*from.data.pInheritanceInfo.get_pointer()}.data;
  static_assert(sizeof(src) == sizeof(*into.data.pInheritanceInfo));
  memcpy((void*)into.data.pInheritanceInfo, &src, sizeof(src));
}

bool fex_custom_repack_exit(guest_layout<VkCommandBufferBeginInfo>& into, const host_layout<VkCommandBufferBeginInfo>& from) {
  delete from.data.pInheritanceInfo;
  return false;
}

void fex_custom_repack_entry(host_layout<VkPipelineCacheCreateInfo>& into, const guest_layout<VkPipelineCacheCreateInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  // Same underlying layout, so there's nothing to do
  into.data.pInitialData = from.data.pInitialData.get_pointer();
}

bool fex_custom_repack_exit(guest_layout<VkPipelineCacheCreateInfo>& into, const host_layout<VkPipelineCacheCreateInfo>& from) {
  // Nothing to do
  return false;
}

void fex_custom_repack_entry(host_layout<VkRenderPassBeginInfo>& into, const guest_layout<VkRenderPassBeginInfo>& from) {
  default_fex_custom_repack_entry(into, from);

  // Same underlying layout, so there's nothing to do
  into.data.pClearValues = reinterpret_cast<const VkClearValue*>(from.data.pClearValues.get_pointer());
}

bool fex_custom_repack_exit(guest_layout<VkRenderPassBeginInfo>& into, const host_layout<VkRenderPassBeginInfo>& from) {
  // Nothing to do
  return false;
}
#endif

EXPORTS(libvulkan)


================================================
FILE: ThunkLibs/libvulkan/libvulkan_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <type_traits>

template<auto>
struct fex_gen_config {
  unsigned version = 1;
};

// Some of Vulkan's handle types are so-called "non-dispatchable handles".
// On 64-bit, these are defined as dedicated types by default, which makes
// annotating these handle types unnecessarily complicated. Instead, setting
// the following define will make the Vulkan headers alias all handle types
// to uint64_t.
#define VK_USE_64_BIT_PTR_DEFINES 0

#define VK_USE_PLATFORM_XLIB_XRANDR_EXT
#define VK_USE_PLATFORM_XLIB_KHR
#define VK_USE_PLATFORM_XCB_KHR
#define VK_USE_PLATFORM_WAYLAND_KHR
#include <vulkan/vulkan.h>

template<>
struct fex_gen_config<vkGetDeviceProcAddr> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer {};
template<>
struct fex_gen_config<vkGetInstanceProcAddr> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint, fexgen::returns_guest_pointer {};

template<typename>
struct fex_gen_type {};

// internal use
void Vulkan_SetGuestXSync(uintptr_t, uintptr_t);
void Vulkan_SetGuestXGetVisualInfo(uintptr_t, uintptr_t);
void Vulkan_SetGuestXDisplayString(uintptr_t, uintptr_t);
template<>
struct fex_gen_config<Vulkan_SetGuestXSync> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};
template<>
struct fex_gen_config<Vulkan_SetGuestXGetVisualInfo> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};
template<>
struct fex_gen_config<Vulkan_SetGuestXDisplayString> : fexgen::custom_guest_entrypoint, fexgen::custom_host_impl {};

// So-called "dispatchable" handles are represented as opaque pointers.
// In addition to marking them as such, API functions that create these objects
// need special care since they wrap these handles in another pointer, which
// the thunk generator can't automatically handle.
//
// So-called "non-dispatchable" handles don't need this extra treatment, since
// they are uint64_t IDs on both 32-bit and 64-bit systems.
template<>
struct fex_gen_type<VkCommandBuffer_T> : fexgen::opaque_type {};
template<>
struct fex_gen_type<VkDevice_T> : fexgen::opaque_type {};
template<>
struct fex_gen_type<VkInstance_T> : fexgen::opaque_type {};
template<>
struct fex_gen_type<VkPhysicalDevice_T> : fexgen::opaque_type {};
template<>
struct fex_gen_type<VkQueue_T> : fexgen::opaque_type {};
template<>
struct fex_gen_type<VkExternalComputeQueueNV_T> : fexgen::opaque_type {};

// Mark union types with compatible layout as such
// TODO: These may still have different alignment requirements!
template<>
struct fex_gen_type<VkClearValue> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkClearColorValue> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkPipelineExecutableStatisticValueKHR> : fexgen::assume_compatible_data_layout {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_type<VkAccelerationStructureGeometryDataKHR> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkDescriptorDataEXT> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkDeviceOrHostAddressKHR> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkDeviceOrHostAddressConstKHR> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkPerformanceValueDataINTEL> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkIndirectExecutionSetInfoEXT> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkIndirectCommandsTokenDataEXT> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_type<VkClusterAccelerationStructureOpInputNV> : fexgen::assume_compatible_data_layout {};
#endif

// Explicitly register types that are only ever referenced through nested pointers
template<>
struct fex_gen_type<VkAccelerationStructureBuildRangeInfoKHR> {};
template<>
struct fex_gen_type<VkDescriptorSetLayoutBinding> {};
template<>
struct fex_gen_type<VkDescriptorUpdateTemplateEntry> {};
template<>
struct fex_gen_type<VkSubpassDescription> {};

// Structures that contain function pointers
// TODO: Use custom repacking for these instead
template<>
struct fex_gen_type<VkDebugReportCallbackCreateInfoEXT> : fexgen::emit_layout_wrappers {};
template<>
struct fex_gen_type<VkDebugUtilsMessengerCreateInfoEXT> : fexgen::emit_layout_wrappers {};

#ifdef IS_32BIT_THUNK
template<>
struct fex_gen_type<VkBaseOutStructure> : fexgen::emit_layout_wrappers {};

// Register structs with an extension point (pNext). Any other members that need customization are listed below.
// Generated using
// for i in `grep VK_STRUCTURE_TYPE vk.xml -B1 | grep category=\"struct\" | cut -d'"' -f 4 | sort`
// do
//   grep $i vulkan_{core,wayland,xcb,xlib,xlib_xrandr}.h >& /dev/null && echo $i
// done | awk '{ print "template<> struct fex_gen_config<&"$1"::pNext> : fexgen::custom_repack {};" }'
// template<>
// struct fex_gen_config<&VkAccelerationStructureBuildGeometryInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureBuildSizesInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureCaptureDescriptorDataInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureDeviceAddressInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureGeometryAabbsDataKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureGeometryInstancesDataKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureGeometryKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureGeometryMotionTrianglesDataNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureGeometryTrianglesDataKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureMemoryRequirementsInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureMotionInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAccelerationStructureTrianglesOpacityMicromapEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAccelerationStructureVersionInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAcquireNextImageInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAcquireProfilingLockInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAmigoProfilingSubmitInfoSEC::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkAntiLagDataAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAntiLagPresentationInfoAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkApplicationInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentDescription2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentDescriptionStencilLayout::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentFeedbackLoopInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentReference2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentReferenceStencilLayout::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkAttachmentSampleCountInfoAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBeginCustomResolveInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindAccelerationStructureMemoryInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindBufferMemoryDeviceGroupInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindBufferMemoryInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindDataGraphPipelineSessionMemoryInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindDescriptorBufferEmbeddedSamplersInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindDescriptorSetsInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindImageMemoryDeviceGroupInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindImageMemoryInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindImageMemorySwapchainInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindImagePlaneMemoryInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindMemoryStatus::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkBindSparseInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindTensorMemoryInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBindVideoSessionMemoryInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBlitImageCubicWeightsInfoQCOM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkBlitImageInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferCaptureDescriptorDataInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferCopy2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferDeviceAddressCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferDeviceAddressInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferImageCopy2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferMemoryBarrier::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferMemoryBarrier2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferMemoryRequirementsInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferOpaqueCaptureAddressCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferUsageFlags2CreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBufferViewCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkBuildPartitionedAccelerationStructureInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCalibratedTimestampInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCheckpointData2NV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCheckpointDataNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkClusterAccelerationStructureClustersBottomLevelInputNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkClusterAccelerationStructureCommandsInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkClusterAccelerationStructureInputInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkClusterAccelerationStructureMoveObjectsInputNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkClusterAccelerationStructureTriangleClusterInputNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferBeginInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferInheritanceConditionalRenderingInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferInheritanceInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferInheritanceRenderingInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferInheritanceRenderPassTransformInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferInheritanceViewportScissorInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandBufferSubmitInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCommandPoolCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkComputeOccupancyPriorityParametersNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkComputePipelineCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkComputePipelineIndirectBufferInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkConditionalRenderingBeginInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkConvertCooperativeVectorMatrixInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCooperativeMatrixFlexibleDimensionsPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCooperativeMatrixPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCooperativeMatrixPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCooperativeVectorPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyAccelerationStructureInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyAccelerationStructureToMemoryInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyBufferInfo2::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyBufferToImageInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyCommandTransformInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyDescriptorSet::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyImageInfo2::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyImageToBufferInfo2::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyImageToImageInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyImageToMemoryInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyMemoryIndirectInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyMemoryToAccelerationStructureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyMemoryToImageIndirectInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyMemoryToImageInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyMemoryToImageInfo::pRegions> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyMemoryToMicromapInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCopyMicromapInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyMicromapToMemoryInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCopyTensorInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCuFunctionCreateInfoNVX::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCuLaunchInfoNVX::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkCuModuleCreateInfoNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCuModuleTexturingModeCreateInfoNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkCustomResolveCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineBuiltinModelCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineCompilerControlCreateInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDataGraphPipelineConstantARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineConstantTensorSemiStructuredSparsityInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDataGraphPipelineCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineDispatchInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineIdentifierCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDataGraphPipelinePropertyQueryResultARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineResourceInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineSessionBindPointRequirementARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineSessionBindPointRequirementsInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineSessionCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphPipelineSessionMemoryRequirementsInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDataGraphPipelineShaderModuleCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDataGraphProcessingEngineCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugMarkerMarkerInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugMarkerObjectNameInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDebugMarkerObjectTagInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugReportCallbackCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugUtilsLabelEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDebugUtilsMessengerCallbackDataEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugUtilsMessengerCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDebugUtilsObjectNameInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDebugUtilsObjectTagInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDecompressMemoryInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDedicatedAllocationBufferCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDedicatedAllocationImageCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDedicatedAllocationMemoryAllocateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDependencyInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDepthBiasInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDepthBiasRepresentationInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorAddressInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorBufferBindingInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorBufferBindingPushDescriptorBufferHandleEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorGetInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorGetTensorInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorPoolCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorPoolInlineUniformBlockCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetBindingReferenceVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetLayoutBindingFlagsCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetLayoutCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetLayoutHostMappingInfoVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetLayoutSupport::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetVariableDescriptorCountAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorSetVariableDescriptorCountLayoutSupport::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDescriptorUpdateTemplateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceAddressBindingCallbackDataEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceBufferMemoryRequirements::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceCreateInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceDeviceMemoryReportCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceDiagnosticsConfigCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceEventInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceFaultCountsEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceFaultInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceGroupBindSparseInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupCommandBufferBeginInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceGroupDeviceCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupPresentCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupPresentInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupRenderPassBeginInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupSubmitInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceGroupSwapchainCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceImageMemoryRequirements::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceImageSubresourceInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceMemoryOpaqueCaptureAddressInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceMemoryOverallocationCreateInfoAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceMemoryReportCallbackDataEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDevicePipelineBinaryInternalCacheControlKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDevicePrivateDataCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceQueueCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceQueueGlobalPriorityCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceQueueInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceQueueShaderCoreControlCreateInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDeviceTensorMemoryRequirementsARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDirectDriverLoadingInfoLUNARG::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDirectDriverLoadingListLUNARG::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDispatchTileInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayEventInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayModeCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayModeProperties2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayModeStereoPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayNativeHdrSurfaceCapabilitiesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayPlaneCapabilities2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayPlaneInfo2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayPlaneProperties2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayPowerInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayPresentInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplayProperties2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplaySurfaceCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDisplaySurfaceStereoCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDrmFormatModifierPropertiesList2EXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkDrmFormatModifierPropertiesListEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkEventCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExportFenceCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExportMemoryAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExportMemoryAllocateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExportSemaphoreCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalBufferProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalComputeQueueCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalComputeQueueDataParamsNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalComputeQueueDeviceCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalFenceProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalImageFormatProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalMemoryAcquireUnmodifiedEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalMemoryBufferCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalMemoryImageCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalMemoryImageCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalMemoryTensorCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalSemaphoreProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkExternalTensorPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFenceCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFenceGetFdInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFilterCubicImageViewImageFormatPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFormatProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFormatProperties3::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkFragmentShadingRateAttachmentInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkFrameBoundaryEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFrameBoundaryTensorsARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFramebufferAttachmentImageInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkFramebufferAttachmentsCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFramebufferCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkFramebufferMixedSamplesCombinationNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeneratedCommandsInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkGeneratedCommandsInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeneratedCommandsMemoryRequirementsInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeneratedCommandsMemoryRequirementsInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeneratedCommandsPipelineInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeneratedCommandsShaderInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeometryAABBNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeometryNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGeometryTrianglesNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkGetLatencyMarkerInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineLibraryCreateInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkGraphicsPipelineShaderGroupsCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkGraphicsShaderGroupCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkHdrVividDynamicMetadataHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkHeadlessSurfaceCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkHostImageCopyDevicePerformanceQuery::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkHostImageLayoutTransitionInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageAlignmentControlCreateInfoMESA::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageBlit2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageCaptureDescriptorDataInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageCompressionControlEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageCompressionPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageCopy2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageCreateInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkImageDrmFormatModifierExplicitCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageDrmFormatModifierListCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageDrmFormatModifierPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageFormatListCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageFormatProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageMemoryBarrier::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageMemoryBarrier2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageMemoryRequirementsInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImagePlaneMemoryRequirementsInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageResolve2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageSparseMemoryRequirementsInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageStencilUsageCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageSubresource2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageSwapchainCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkImageToMemoryCopy::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewAddressPropertiesNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewASTCDecodeModeEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewCaptureDescriptorDataInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewHandleInfoNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewMinLodCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewSampleWeightCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewSlicedCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImageViewUsageCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImportFenceFdInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImportMemoryFdInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkImportMemoryHostPointerInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkImportSemaphoreFdInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkIndirectCommandsLayoutCreateInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkIndirectCommandsLayoutCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkIndirectCommandsLayoutTokenEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkIndirectCommandsLayoutTokenNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkIndirectExecutionSetCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkIndirectExecutionSetPipelineInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkIndirectExecutionSetShaderInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkIndirectExecutionSetShaderLayoutInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkInitializePerformanceApiInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkInstanceCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkLatencySleepInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkLatencySleepModeInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkLatencySubmissionPresentIdNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkLatencySurfaceCapabilitiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkLatencyTimingsFrameReportNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkLayerSettingsCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMappedMemoryRange::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryAllocateFlagsInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryBarrier::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryBarrier2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryBarrierAccessFlags3KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryDedicatedAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryDedicatedAllocateInfoTensorARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryDedicatedRequirements::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryFdPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryGetFdInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryGetRemoteAddressInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryHostPointerPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryMapInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkMemoryMapPlacedInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryOpaqueCaptureAddressAllocateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryPriorityAllocateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryRequirements2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryToImageCopy::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryToImageCopy::pHostPointer> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMemoryUnmapInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkMicromapBuildInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMicromapBuildSizesInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMicromapCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMicromapVersionInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMultisampledRenderToSingleSampledInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMultisamplePropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMultiviewPerViewAttributesInfoNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkMultiviewPerViewRenderAreasRenderPassBeginInfoQCOM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkMutableDescriptorTypeCreateInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkOpaqueCaptureDescriptorDataCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkOpticalFlowExecuteInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkOpticalFlowImageFormatInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkOpticalFlowImageFormatPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkOpticalFlowSessionCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkOpticalFlowSessionCreatePrivateDataInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkOutOfBandQueueTypeInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPartitionedAccelerationStructureFlagsNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPartitionedAccelerationStructureInstancesInputNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPastPresentationTimingEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPastPresentationTimingInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPastPresentationTimingPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceConfigurationAcquireInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceCounterARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceCounterDescriptionARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceCounterDescriptionKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceCounterKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceMarkerInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceOverrideInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceQuerySubmitInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerformanceStreamMarkerInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerTileBeginInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPerTileEndInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevice16BitStorageFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevice4444FormatsFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevice8BitStorageFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAccelerationStructureFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAccelerationStructurePropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAddressBindingReportFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAmigoProfilingFeaturesSEC::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAntiLagFeaturesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceASTCDecodeFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAttachmentFeedbackLoopDynamicStateFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceAttachmentFeedbackLoopLayoutFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceBlendOperationAdvancedFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceBlendOperationAdvancedPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceBorderColorSwizzleFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceBufferDeviceAddressFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceBufferDeviceAddressFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceClusterAccelerationStructureFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceClusterAccelerationStructurePropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceClusterCullingShaderFeaturesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceClusterCullingShaderPropertiesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceClusterCullingShaderVrsFeaturesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCoherentMemoryFeaturesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceColorWriteEnableFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCommandBufferInheritanceFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceComputeOccupancyPriorityFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceComputeShaderDerivativesPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceConditionalRenderingFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceConservativeRasterizationPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrix2FeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrix2PropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrixFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrixFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrixPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeMatrixPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeVectorFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCooperativeVectorPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCopyMemoryIndirectFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCopyMemoryIndirectFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCopyMemoryIndirectPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCornerSampledImageFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCoverageReductionModeFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCubicClampFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCubicWeightsFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCustomBorderColorFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCustomBorderColorPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceCustomResolveFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDataGraphFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDataGraphModelFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthBiasControlFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthClampControlFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthClampZeroOneFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthClipControlFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthClipEnableFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDepthStencilResolveProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorBufferDensityMapPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorBufferFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorBufferPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorBufferTensorFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorBufferTensorPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorIndexingFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorIndexingProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorPoolOverallocationFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceGeneratedCommandsComputeFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceGeneratedCommandsFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceGeneratedCommandsFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceGeneratedCommandsPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceGeneratedCommandsPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDeviceMemoryReportFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDiagnosticsConfigFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDiscardRectanglePropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDriverProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDrmPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDynamicRenderingFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDynamicRenderingLocalReadFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceDynamicRenderingUnusedAttachmentsFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExclusiveScissorFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedDynamicState2FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedDynamicState3FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedDynamicState3PropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedDynamicStateFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedSparseAddressSpaceFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExtendedSparseAddressSpacePropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalBufferInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalComputeQueuePropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalFenceInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalImageFormatInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalMemoryHostPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalMemoryRDMAFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceExternalSemaphoreInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPhysicalDeviceExternalTensorInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFaultFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFeatures2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFloatControlsProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFormatPackFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMap2FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMap2PropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapLayeredFeaturesVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapLayeredPropertiesVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapOffsetFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapOffsetPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentDensityMapPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShaderBarycentricFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShaderBarycentricPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShadingRateEnumsFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShadingRateEnumsPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShadingRateFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShadingRateKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFragmentShadingRatePropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceFrameBoundaryFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceGlobalPriorityQueryFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceGraphicsPipelineLibraryFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceGraphicsPipelineLibraryPropertiesEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPhysicalDeviceGroupProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceHdrVividFeaturesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceHostImageCopyFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceHostImageCopyProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceHostQueryResetFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceIDProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImage2DViewOf3DFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageAlignmentControlFeaturesMESA::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageAlignmentControlPropertiesMESA::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageCompressionControlFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageCompressionControlSwapchainFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageDrmFormatModifierInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageFormatInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImagelessFramebufferFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageProcessing2FeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageProcessing2PropertiesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageProcessingFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageProcessingPropertiesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageRobustnessFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageSlicedViewOf3DFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageViewImageFormatInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceImageViewMinLodFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceIndexTypeUint8Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceInheritedViewportScissorFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceInlineUniformBlockFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceInlineUniformBlockProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceInvocationMaskFeaturesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLayeredApiPropertiesKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPhysicalDeviceLayeredApiPropertiesListKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLayeredApiVulkanPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLayeredDriverPropertiesMSFT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLegacyDitheringFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLegacyVertexAttributesFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLegacyVertexAttributesPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLinearColorAttachmentFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLineRasterizationFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceLineRasterizationProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance10FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance10PropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance3Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance4Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance4Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance5Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance5Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance6Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance6Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance7FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance7PropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance8FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance9FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMaintenance9PropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMapMemoryPlacedFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMapMemoryPlacedPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMemoryBudgetPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMemoryDecompressionFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMemoryDecompressionPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMemoryPriorityFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMemoryProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMeshShaderFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMeshShaderFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMeshShaderPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMeshShaderPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiDrawFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiDrawPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultisampledRenderToSingleSampledFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiviewFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiviewPerViewAttributesPropertiesNVX::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiviewPerViewRenderAreasFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiviewPerViewViewportsFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMultiviewProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceNestedCommandBufferFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceNestedCommandBufferPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceOpacityMicromapFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceOpacityMicromapPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceOpticalFlowFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceOpticalFlowPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePageableDeviceLocalMemoryFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePartitionedAccelerationStructureFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePartitionedAccelerationStructurePropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePCIBusInfoPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePerformanceCountersByRegionFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePerformanceCountersByRegionPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePerformanceQueryFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePerformanceQueryPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePerStageDescriptorSetFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineBinaryFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineBinaryPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineCacheIncrementalModeFeaturesSEC::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineCreationCacheControlFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineLibraryGroupHandlesFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineOpacityMicromapFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelinePropertiesFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineProtectedAccessFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineRobustnessFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePipelineRobustnessProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePointClippingProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentBarrierFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentId2FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentIdFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentMeteringFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentModeFifoLatestReadyFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentTimingFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentWait2FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePresentWaitFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePrivateDataFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceProtectedMemoryFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceProtectedMemoryProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceProvokingVertexFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceProvokingVertexPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDevicePushDescriptorProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceQueueFamilyDataGraphProcessingEngineInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRawAccessChainsFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayQueryFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingInvocationReorderPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingLinearSweptSpheresFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingMotionBlurFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingPipelineFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingPipelinePropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingPositionFetchFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRayTracingValidationFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRelaxedLineRasterizationFeaturesIMG::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRenderPassStripedFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRenderPassStripedPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRepresentativeFragmentTestFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRGBA10X6FormatsFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRobustness2FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceRobustness2PropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSampleLocationsPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSamplerFilterMinmaxProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSamplerYcbcrConversionFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceScalarBlockLayoutFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSchedulingControlsFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSchedulingControlsPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSeparateDepthStencilLayoutsFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShader64BitIndexingFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderAtomicFloat16VectorFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderAtomicFloatFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderAtomicInt64Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderBfloat16FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderClockFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderCoreBuiltinsFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderCoreBuiltinsPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderCoreProperties2AMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderCorePropertiesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderCorePropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderDemoteToHelperInvocationFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderDrawParametersFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderEarlyAndLateFragmentTestsFeaturesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderExpectAssumeFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderFloat16Int8Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderFloat8FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderFloatControls2Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderFmaFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderImageFootprintFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderIntegerDotProductFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderIntegerDotProductProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderLongVectorFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderLongVectorPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderMaximalReconvergenceFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderObjectFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderObjectPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderQuadControlFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderReplicatedCompositesFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderSMBuiltinsFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderSMBuiltinsPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderSubgroupRotateFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderTerminateInvocationFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderTileImageFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderTileImagePropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderUniformBufferUnsizedArrayFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShaderUntypedPointersFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShadingRateImageFeaturesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceShadingRateImagePropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSparseImageFormatInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubgroupProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubgroupSizeControlFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubgroupSizeControlProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubpassMergeFeedbackFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubpassShadingFeaturesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSubpassShadingPropertiesHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSurfaceInfo2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSwapchainMaintenance1FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceSynchronization2Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTensorFeaturesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTensorPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTexelBufferAlignmentProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTextureCompressionASTC3DFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTextureCompressionASTCHDRFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTileMemoryHeapFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTileMemoryHeapPropertiesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTilePropertiesFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTileShadingFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTileShadingPropertiesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTimelineSemaphoreFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTimelineSemaphoreProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceToolProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTransformFeedbackFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceTransformFeedbackPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceUnifiedImageLayoutsFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceUniformBufferStandardLayoutFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVariablePointersFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVertexAttributeDivisorFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVertexAttributeDivisorProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVertexAttributeRobustnessFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoDecodeVP9FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoEncodeAV1FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoEncodeIntraRefreshFeaturesKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPhysicalDeviceVideoEncodeQualityLevelInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoEncodeQuantizationMapFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoEncodeRgbConversionFeaturesVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoFormatInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoMaintenance1FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVideoMaintenance2FeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan11Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan11Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan12Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan12Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan13Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan13Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan14Features::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkan14Properties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceVulkanMemoryModelFeatures::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceYcbcr2Plane444FormatsFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceYcbcrDegammaFeaturesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceYcbcrImageArraysFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceZeroInitializeDeviceMemoryFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeatures::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineBinaryCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineBinaryDataInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineBinaryHandlesInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineBinaryInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineBinaryKeyKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCacheCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineColorBlendAdvancedStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineColorBlendStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineColorWriteCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCompilerControlCreateInfoAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCoverageModulationStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCoverageReductionStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCoverageToColorStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCreateFlags2CreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineCreationFeedbackCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineDepthStencilStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineDiscardRectangleStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineDynamicStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineExecutableInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineExecutableInternalRepresentationKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineExecutablePropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineExecutableStatisticKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineFragmentDensityMapLayeredCreateInfoVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineFragmentShadingRateEnumStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineFragmentShadingRateStateCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineIndirectDeviceAddressInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineInputAssemblyStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineLayoutCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineLibraryCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineMultisampleStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelinePropertiesIdentifierEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationConservativeStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationDepthClipStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationLineStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationProvokingVertexStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationStateRasterizationOrderAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRasterizationStateStreamCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRenderingCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRepresentativeFragmentTestStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineRobustnessCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineSampleLocationsStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineShaderStageCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineShaderStageModuleIdentifierCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineShaderStageRequiredSubgroupSizeCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineTessellationDomainOriginStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineTessellationStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineVertexInputDivisorStateCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineVertexInputStateCreateInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineViewportCoarseSampleOrderStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportDepthClampControlCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportDepthClipControlCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportExclusiveScissorStateCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPipelineViewportShadingRateImageStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportStateCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportSwizzleStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPipelineViewportWScalingStateCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentId2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentIdKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPresentRegionsKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPresentTimesInfoGOOGLE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentTimingInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPresentTimingsInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentTimingSurfaceCapabilitiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPresentWait2InfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkPrivateDataSlotCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkProtectedSubmitInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPushConstantsInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPushDescriptorSetInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkPushDescriptorSetWithTemplateInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkQueryLowLatencySupportNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueryPoolCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueryPoolPerformanceCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueryPoolPerformanceQueryCreateInfoINTEL::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueryPoolVideoEncodeFeedbackCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyCheckpointProperties2NV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyCheckpointPropertiesNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyDataGraphProcessingEnginePropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyDataGraphPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyGlobalPriorityProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyOwnershipTransferPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyQueryResultStatusPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkQueueFamilyVideoPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRayTracingPipelineClusterAccelerationStructureCreateInfoNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRayTracingPipelineCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRayTracingPipelineCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRayTracingPipelineInterfaceCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRayTracingShaderGroupCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRayTracingShaderGroupCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkReleaseCapturedPipelineDataInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkReleaseSwapchainImagesInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingAreaInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingAttachmentFlagsInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingAttachmentInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingAttachmentLocationInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingEndInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingFragmentDensityMapAttachmentInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingFragmentShadingRateAttachmentInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingInputAttachmentIndexInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassAttachmentBeginInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassBeginInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreateInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreationControlEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreationFeedbackCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassFragmentDensityMapCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassFragmentDensityMapOffsetEndInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassInputAttachmentAspectCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassMultiviewCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassPerformanceCountersByRegionBeginInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRenderPassSampleLocationsBeginInfoEXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRenderPassStripeBeginInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassStripeInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkRenderPassStripeSubmitInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassSubpassFeedbackCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassTileShadingCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassTransformBeginInfoQCOM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkResolveImageInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkResolveImageModeInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSampleLocationsInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerBlockMatchWindowCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerBorderColorComponentMappingCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerCaptureDescriptorDataInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerCubicWeightsCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerCustomBorderColorCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerReductionModeCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerYcbcrConversionCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerYcbcrConversionImageFormatProperties::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerYcbcrConversionInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSamplerYcbcrConversionYcbcrDegammaCreateInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreGetFdInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreSignalInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreSubmitInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreTypeCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSemaphoreWaitInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSetDescriptorBufferOffsetsInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSetLatencyMarkerInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSetPresentConfigNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkShaderCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkShaderModuleCreateInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkShaderModuleIdentifierEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkShaderModuleValidationCacheCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSharedPresentSurfaceCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSparseImageFormatProperties2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSparseImageMemoryRequirements2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubmitInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkSubmitInfo2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassBeginInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassDependency2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassDescription2::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkSubpassDescriptionDepthStencilResolve::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassEndInfo::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassResolvePerformanceQueryEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassShadingPipelineCreateInfoHUAWEI::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubresourceHostMemcpySize::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubresourceLayout2::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceCapabilities2EXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceCapabilities2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceCapabilitiesPresentBarrierNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceCapabilitiesPresentId2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceCapabilitiesPresentWait2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceFormat2KHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfacePresentModeCompatibilityKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfacePresentModeKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfacePresentScalingCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSurfaceProtectedCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainCalibratedTimestampInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainCounterCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainDisplayNativeHdrCreateInfoAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainLatencyCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainPresentBarrierCreateInfoNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainPresentFenceInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainPresentModeInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainPresentModesCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainPresentScalingCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkSwapchainTimeDomainPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSwapchainTimingPropertiesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorCaptureDescriptorDataInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkTensorCopyARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkTensorCreateInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkTensorDependencyInfoARM::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkTensorDescriptionARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorFormatPropertiesARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorMemoryBarrierARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorMemoryRequirementsInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorViewCaptureDescriptorDataInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTensorViewCreateInfoARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTextureLODGatherFormatPropertiesAMD::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTileMemoryBindInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTileMemoryRequirementsQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTileMemorySizeInfoQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTilePropertiesQCOM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkTimelineSemaphoreSubmitInfo::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkValidationCacheCreateInfoEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkValidationFeaturesEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkValidationFlagsEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVertexInputAttributeDescription2EXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVertexInputBindingDescription2EXT::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoBeginCodingInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoCodingControlInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeAV1CapabilitiesKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeAV1DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeAV1InlineSessionParametersInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeAV1PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeAV1ProfileInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeAV1SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH264CapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH264DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH264InlineSessionParametersInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH264PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH264ProfileInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH264SessionParametersAddInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH264SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH265CapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH265DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH265InlineSessionParametersInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH265PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeH265ProfileInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH265SessionParametersAddInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeH265SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeUsageInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeVP9CapabilitiesKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoDecodeVP9PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoDecodeVP9ProfileInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1CapabilitiesKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeAV1DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1GopRemainingFrameInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeAV1PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1ProfileInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1QualityLevelPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1QuantizationMapCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1RateControlInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1RateControlLayerInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeAV1SessionCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeAV1SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264CapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264GopRemainingFrameInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH264NaluSliceInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH264PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264ProfileInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264QualityLevelPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264QuantizationMapCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264RateControlInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264RateControlLayerInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264SessionCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH264SessionParametersAddInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH264SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264SessionParametersFeedbackInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH264SessionParametersGetInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265CapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265DpbSlotInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265GopRemainingFrameInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH265NaluSliceSegmentInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH265PictureInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265ProfileInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265QualityLevelPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265QuantizationMapCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265RateControlInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265RateControlLayerInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265SessionCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH265SessionParametersAddInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeH265SessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265SessionParametersFeedbackInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeH265SessionParametersGetInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeIntraRefreshCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeIntraRefreshInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeProfileRgbConversionInfoVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeQualityLevelInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeQualityLevelPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeQuantizationMapCapabilitiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeQuantizationMapInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeQuantizationMapSessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoEncodeRateControlInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeRateControlLayerInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeRgbConversionCapabilitiesVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeSessionIntraRefreshCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeSessionParametersFeedbackInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeSessionParametersGetInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeSessionRgbConversionCreateInfoVALVE::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEncodeUsageInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoEndCodingInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoFormatAV1QuantizationMapPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoFormatH265QuantizationMapPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoFormatPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoFormatQuantizationMapPropertiesKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoInlineQueryInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoPictureResourceInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoProfileInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoProfileListInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoReferenceIntraRefreshInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoReferenceSlotInfoKHR::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkVideoSessionCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoSessionMemoryRequirementsKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoSessionParametersCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkVideoSessionParametersUpdateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWaylandSurfaceCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteDescriptorSet::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteDescriptorSetAccelerationStructureKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteDescriptorSetAccelerationStructureNV::pNext> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkWriteDescriptorSetInlineUniformBlock::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteDescriptorSetPartitionedAccelerationStructureNV::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteDescriptorSetTensorARM::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteIndirectExecutionSetPipelineEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkWriteIndirectExecutionSetShaderEXT::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkXcbSurfaceCreateInfoKHR::pNext> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkXlibSurfaceCreateInfoKHR::pNext> : fexgen::custom_repack {};


template<>
struct fex_gen_config<&VkCommandBufferBeginInfo::pInheritanceInfo> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkDeviceCreateInfo::pQueueCreateInfos> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceCreateInfo::ppEnabledLayerNames> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDeviceCreateInfo::ppEnabledExtensionNames> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkDependencyInfo::pMemoryBarriers> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDependencyInfo::pBufferMemoryBarriers> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkDependencyInfo::pImageMemoryBarriers> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkDescriptorGetInfoEXT::data> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkDescriptorSetLayoutCreateInfo::pBindings> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkDescriptorUpdateTemplateCreateInfo::pDescriptorUpdateEntries> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pStages> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pVertexInputState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pInputAssemblyState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pTessellationState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pViewportState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pRasterizationState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pMultisampleState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pDepthStencilState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pColorBlendState> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkGraphicsPipelineCreateInfo::pDynamicState> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkInstanceCreateInfo::pApplicationInfo> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkInstanceCreateInfo::ppEnabledLayerNames> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkInstanceCreateInfo::ppEnabledExtensionNames> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkRenderPassCreateInfo::pSubpasses> : fexgen::custom_repack {};
// NOTE: pDependencies and pAttachments point to ABI-compatible data

template<>
struct fex_gen_config<&VkRenderPassCreateInfo2::pAttachments> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreateInfo2::pSubpasses> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderPassCreateInfo2::pDependencies> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkPipelineShaderStageCreateInfo::pSpecializationInfo> : fexgen::custom_repack {};
// template<>
// struct fex_gen_config<&VkSpecializationInfo::pMapEntries> : fexgen::custom_repack {};

// TODO: Support annotating as assume_compatible_data_layout instead
template<>
struct fex_gen_config<&VkPipelineCacheCreateInfo::pInitialData> : fexgen::custom_repack {};

// Command buffers are dispatchable handles, so on 32-bit they need to be repacked
template<>
struct fex_gen_config<&VkSubmitInfo::pCommandBuffers> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkRenderingInfo::pColorAttachments> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingInfo::pDepthAttachment> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkRenderingInfo::pStencilAttachment> : fexgen::custom_repack {};

// TODO: Support annotating as assume_compatible_data_layout instead
template<>
struct fex_gen_config<&VkRenderPassBeginInfo::pClearValues> : fexgen::custom_repack {};

template<>
struct fex_gen_config<&VkSubpassDescription2::pInputAttachments> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassDescription2::pColorAttachments> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassDescription2::pResolveAttachments> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkSubpassDescription2::pDepthStencilAttachment> : fexgen::custom_repack {};

// These types have incompatible data layout but we use their layout wrappers elsewhere
template<>
struct fex_gen_type<VkWriteDescriptorSet> : fexgen::emit_layout_wrappers {};
#else
// The pNext member of this is a pointer to another VkBaseOutStructure, so data layout compatibility can't be inferred automatically
template<>
struct fex_gen_type<VkBaseOutStructure> : fexgen::assume_compatible_data_layout {};
#endif


// TODO: Should not be opaque, but it's usually NULL anyway. Supporting the contained function pointers will need more work.
template<>
struct fex_gen_type<VkAllocationCallbacks> : fexgen::opaque_type {};

// X11 interop
template<>
struct fex_gen_config<&VkXcbSurfaceCreateInfoKHR::connection> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&VkXlibSurfaceCreateInfoKHR::dpy> : fexgen::custom_repack {};

// Wayland interop
template<>
struct fex_gen_type<wl_display> : fexgen::opaque_type {};
template<>
struct fex_gen_type<wl_surface> : fexgen::opaque_type {};

namespace internal {

// Function, parameter index, parameter type [optional]
template<auto, int, typename = void>
struct fex_gen_param {};

template<auto>
struct fex_gen_config : fexgen::generate_guest_symtable, fexgen::indirect_guest_calls {};

template<>
struct fex_gen_config<vkCreateInstance> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkCreateInstance, 2, VkInstance*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<vkDestroyInstance> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkEnumeratePhysicalDevices> {};
#else
template<>
struct fex_gen_config<vkEnumeratePhysicalDevices> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkEnumeratePhysicalDevices, 2, VkPhysicalDevice*> : fexgen::ptr_passthrough {};
#endif


template<>
struct fex_gen_config<vkGetPhysicalDeviceFeatures> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceFormatProperties> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceImageFormatProperties> {};
// TODO: Output parameter must repack on exit!
template<>
struct fex_gen_config<vkGetPhysicalDeviceProperties> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyProperties> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceMemoryProperties> {};
template<>
struct fex_gen_config<vkCreateDevice> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkCreateDevice, 3, VkDevice*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<vkDestroyDevice> {};
template<>
struct fex_gen_config<vkEnumerateInstanceExtensionProperties> {};
template<>
struct fex_gen_config<vkEnumerateDeviceExtensionProperties> {};
template<>
struct fex_gen_config<vkEnumerateInstanceLayerProperties> {};
template<>
struct fex_gen_config<vkEnumerateDeviceLayerProperties> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetDeviceQueue> {};
#else
template<>
struct fex_gen_config<vkGetDeviceQueue> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkGetDeviceQueue, 3, VkQueue*> : fexgen::ptr_passthrough {};
#endif
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkQueueSubmit> {};
#else
// Needs array repacking for multiple submit infos
template<>
struct fex_gen_config<vkQueueSubmit> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkQueueSubmit, 2, const VkSubmitInfo*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<vkQueueWaitIdle> {};
template<>
struct fex_gen_config<vkDeviceWaitIdle> {};
template<>
struct fex_gen_config<vkAllocateMemory> : fexgen::custom_host_impl {};
template<>
struct fex_gen_config<vkFreeMemory> : fexgen::custom_host_impl {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkMapMemory> {};
#else
template<>
struct fex_gen_config<vkMapMemory> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkMapMemory, 5, void**> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<vkUnmapMemory> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkFlushMappedMemoryRanges> {};
template<>
struct fex_gen_config<vkInvalidateMappedMemoryRanges> {};
template<>
struct fex_gen_config<vkGetDeviceMemoryCommitment> {};
#endif
template<>
struct fex_gen_config<vkBindBufferMemory> {};
template<>
struct fex_gen_config<vkBindImageMemory> {};
template<>
struct fex_gen_config<vkGetBufferMemoryRequirements> {};
template<>
struct fex_gen_config<vkGetImageMemoryRequirements> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetImageSparseMemoryRequirements> {};
#endif
template<>
struct fex_gen_config<vkGetPhysicalDeviceSparseImageFormatProperties> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkQueueBindSparse> {};
#endif
template<>
struct fex_gen_config<vkCreateFence> {};
template<>
struct fex_gen_config<vkDestroyFence> {};
template<>
struct fex_gen_config<vkResetFences> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetFenceStatus> {};
#endif
template<>
struct fex_gen_config<vkWaitForFences> {};
template<>
struct fex_gen_config<vkCreateSemaphore> {};
template<>
struct fex_gen_config<vkDestroySemaphore> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCreateQueryPool> {};
template<>
struct fex_gen_config<vkDestroyQueryPool> {};
template<>
struct fex_gen_config<vkGetQueryPoolResults> {};
template<>
struct fex_gen_param<vkGetQueryPoolResults, 5, void*> : fexgen::assume_compatible_data_layout {};
#endif
template<>
struct fex_gen_config<vkCreateBuffer> {};
template<>
struct fex_gen_config<vkDestroyBuffer> {};
template<>
struct fex_gen_config<vkCreateImage> {};
template<>
struct fex_gen_config<vkDestroyImage> {};
template<>
struct fex_gen_config<vkGetImageSubresourceLayout> {};
template<>
struct fex_gen_config<vkCreateImageView> {};
template<>
struct fex_gen_config<vkDestroyImageView> {};
template<>
struct fex_gen_config<vkCreateCommandPool> {};
template<>
struct fex_gen_config<vkDestroyCommandPool> {};
template<>
struct fex_gen_config<vkResetCommandPool> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkAllocateCommandBuffers> {};
#else
template<>
struct fex_gen_config<vkAllocateCommandBuffers> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkAllocateCommandBuffers, 2, VkCommandBuffer*> : fexgen::ptr_passthrough {};
#endif
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkFreeCommandBuffers> {};
#else
template<>
struct fex_gen_config<vkFreeCommandBuffers> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkFreeCommandBuffers, 3, const VkCommandBuffer*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<vkBeginCommandBuffer> {};
template<>
struct fex_gen_config<vkEndCommandBuffer> {};
template<>
struct fex_gen_config<vkResetCommandBuffer> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdCopyBuffer> {};
template<>
struct fex_gen_config<vkCmdCopyImage> {};
#endif
template<>
struct fex_gen_config<vkCmdCopyBufferToImage> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdCopyImageToBuffer> {};
template<>
struct fex_gen_config<vkCmdUpdateBuffer> {};
template<>
struct fex_gen_param<vkCmdUpdateBuffer, 4, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdFillBuffer> {};
#endif
template<>
struct fex_gen_config<vkCmdPipelineBarrier> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBeginQuery> {};
template<>
struct fex_gen_config<vkCmdEndQuery> {};
template<>
struct fex_gen_config<vkCmdResetQueryPool> {};
template<>
struct fex_gen_config<vkCmdWriteTimestamp> {};
template<>
struct fex_gen_config<vkCmdCopyQueryPoolResults> {};
template<>
struct fex_gen_config<vkCmdExecuteCommands> {};
template<>
struct fex_gen_config<vkCreateEvent> {};
template<>
struct fex_gen_config<vkDestroyEvent> {};
template<>
struct fex_gen_config<vkGetEventStatus> {};
template<>
struct fex_gen_config<vkSetEvent> {};
template<>
struct fex_gen_config<vkResetEvent> {};
#endif
template<>
struct fex_gen_config<vkCreateBufferView> {};
template<>
struct fex_gen_config<vkDestroyBufferView> {};
template<>
struct fex_gen_config<vkCreateShaderModule> : fexgen::custom_host_impl {};
template<>
struct fex_gen_config<vkDestroyShaderModule> {};
template<>
struct fex_gen_config<vkCreatePipelineCache> {};
template<>
struct fex_gen_config<vkDestroyPipelineCache> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetPipelineCacheData> {};
#else
template<>
struct fex_gen_config<vkGetPipelineCacheData> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkGetPipelineCacheData, 2, size_t*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_param<vkGetPipelineCacheData, 3, void*> : fexgen::assume_compatible_data_layout {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkMergePipelineCaches> {};
template<>
struct fex_gen_config<vkCreateComputePipelines> {};
#endif
template<>
struct fex_gen_config<vkDestroyPipeline> {};
template<>
struct fex_gen_config<vkCreatePipelineLayout> {};
template<>
struct fex_gen_config<vkDestroyPipelineLayout> {};
template<>
struct fex_gen_config<vkCreateSampler> {};
template<>
struct fex_gen_config<vkDestroySampler> {};
template<>
struct fex_gen_config<vkCreateDescriptorSetLayout> {};
template<>
struct fex_gen_config<vkDestroyDescriptorSetLayout> {};
template<>
struct fex_gen_config<vkCreateDescriptorPool> {};
template<>
struct fex_gen_config<vkDestroyDescriptorPool> {};
template<>
struct fex_gen_config<vkResetDescriptorPool> {};
template<>
struct fex_gen_config<vkAllocateDescriptorSets> {};
template<>
struct fex_gen_config<vkFreeDescriptorSets> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkUpdateDescriptorSets> {};
#else
template<>
struct fex_gen_config<vkUpdateDescriptorSets> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkUpdateDescriptorSets, 2, const VkWriteDescriptorSet*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<vkUpdateDescriptorSets, 4, const VkCopyDescriptorSet*> : fexgen::ptr_passthrough {};
#endif
template<>
struct fex_gen_config<vkCmdBindPipeline> {};
template<>
struct fex_gen_config<vkCmdBindDescriptorSets> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdClearColorImage> {};
template<>
struct fex_gen_config<vkCmdDispatch> {};
template<>
struct fex_gen_config<vkCmdDispatchIndirect> {};
template<>
struct fex_gen_config<vkCmdSetEvent> {};
template<>
struct fex_gen_config<vkCmdResetEvent> {};
template<>
struct fex_gen_config<vkCmdWaitEvents> {};
template<>
struct fex_gen_config<vkCmdPushConstants> {};
template<>
struct fex_gen_param<vkCmdPushConstants, 5, const void*> : fexgen::assume_compatible_data_layout {};
#endif

// TODO: Should be custom_host_impl since there may be more than one VkGraphicsPipelineCreateInfo and more than one output pipeline
template<>
struct fex_gen_config<vkCreateGraphicsPipelines> {};
template<>
struct fex_gen_config<vkCreateFramebuffer> {};
template<>
struct fex_gen_config<vkDestroyFramebuffer> {};
template<>
struct fex_gen_config<vkCreateRenderPass> {};
template<>
struct fex_gen_config<vkDestroyRenderPass> {};
template<>
struct fex_gen_config<vkGetRenderAreaGranularity> {};
template<>
struct fex_gen_config<vkCmdSetViewport> {};
template<>
struct fex_gen_config<vkCmdSetScissor> {};
template<>
struct fex_gen_config<vkCmdSetLineWidth> {};
template<>
struct fex_gen_config<vkCmdSetDepthBias> {};
template<>
struct fex_gen_config<vkCmdSetBlendConstants> {};
template<>
struct fex_gen_config<vkCmdSetDepthBounds> {};
template<>
struct fex_gen_config<vkCmdSetStencilCompareMask> {};
template<>
struct fex_gen_config<vkCmdSetStencilWriteMask> {};
template<>
struct fex_gen_config<vkCmdSetStencilReference> {};
template<>
struct fex_gen_config<vkCmdBindIndexBuffer> {};
template<>
struct fex_gen_config<vkCmdBindVertexBuffers> {};
template<>
struct fex_gen_config<vkCmdDraw> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdDrawIndexed> {};
template<>
struct fex_gen_config<vkCmdDrawIndirect> {};
template<>
struct fex_gen_config<vkCmdDrawIndexedIndirect> {};
template<>
struct fex_gen_config<vkCmdBlitImage> {};
template<>
struct fex_gen_config<vkCmdClearDepthStencilImage> {};
#endif
template<>
struct fex_gen_config<vkCmdClearAttachments> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdResolveImage> {};
#endif
template<>
struct fex_gen_config<vkCmdBeginRenderPass> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdNextSubpass> {};
#endif
template<>
struct fex_gen_config<vkCmdEndRenderPass> {};
template<>
struct fex_gen_config<vkEnumerateInstanceVersion> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkBindBufferMemory2> {};
template<>
struct fex_gen_config<vkBindImageMemory2> {};
template<>
struct fex_gen_config<vkGetDeviceGroupPeerMemoryFeatures> {};
template<>
struct fex_gen_config<vkCmdSetDeviceMask> {};
template<>
struct fex_gen_config<vkEnumeratePhysicalDeviceGroups> {};
#endif
template<>
struct fex_gen_config<vkGetImageMemoryRequirements2> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetBufferMemoryRequirements2> {};
template<>
struct fex_gen_config<vkGetImageSparseMemoryRequirements2> {};
#endif
template<>
struct fex_gen_config<vkGetPhysicalDeviceFeatures2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceProperties2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceFormatProperties2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceImageFormatProperties2> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyProperties2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceMemoryProperties2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSparseImageFormatProperties2> {};
template<>
struct fex_gen_config<vkTrimCommandPool> {};
template<>
struct fex_gen_config<vkGetDeviceQueue2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalBufferProperties> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalFenceProperties> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalSemaphoreProperties> {};
template<>
struct fex_gen_config<vkCmdDispatchBase> {};
#endif
template<>
struct fex_gen_config<vkCreateDescriptorUpdateTemplate> {};
template<>
struct fex_gen_config<vkDestroyDescriptorUpdateTemplate> {};
template<>
struct fex_gen_config<vkUpdateDescriptorSetWithTemplate> {};
template<>
struct fex_gen_param<vkUpdateDescriptorSetWithTemplate, 3, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkGetDescriptorSetLayoutSupport> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCreateSamplerYcbcrConversion> {};
template<>
struct fex_gen_config<vkDestroySamplerYcbcrConversion> {};
template<>
struct fex_gen_config<vkResetQueryPool> {};
template<>
struct fex_gen_config<vkGetSemaphoreCounterValue> {};
#endif
template<>
struct fex_gen_config<vkWaitSemaphores> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkSignalSemaphore> {};
#endif
template<>
struct fex_gen_config<vkGetBufferDeviceAddress> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetBufferOpaqueCaptureAddress> {};
template<>
struct fex_gen_config<vkGetDeviceMemoryOpaqueCaptureAddress> {};
template<>
struct fex_gen_config<vkCmdDrawIndirectCount> {};
template<>
struct fex_gen_config<vkCmdDrawIndexedIndirectCount> {};
#endif
template<>
struct fex_gen_config<vkCreateRenderPass2> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBeginRenderPass2> {};
template<>
struct fex_gen_config<vkCmdNextSubpass2> {};
template<>
struct fex_gen_config<vkCmdEndRenderPass2> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceToolProperties> {};
template<>
struct fex_gen_config<vkCreatePrivateDataSlot> {};
template<>
struct fex_gen_config<vkDestroyPrivateDataSlot> {};
template<>
struct fex_gen_config<vkSetPrivateData> {};
template<>
struct fex_gen_config<vkGetPrivateData> {};
#endif
template<>
struct fex_gen_config<vkCmdPipelineBarrier2> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdWriteTimestamp2> {};
template<>
struct fex_gen_config<vkQueueSubmit2> {};
template<>
struct fex_gen_config<vkCmdCopyBuffer2> {};
template<>
struct fex_gen_config<vkCmdCopyImage2> {};
template<>
struct fex_gen_config<vkCmdCopyBufferToImage2> {};
template<>
struct fex_gen_config<vkCmdCopyImageToBuffer2> {};
template<>
struct fex_gen_config<vkGetDeviceBufferMemoryRequirements> {};
template<>
struct fex_gen_config<vkGetDeviceImageMemoryRequirements> {};
template<>
struct fex_gen_config<vkGetDeviceImageSparseMemoryRequirements> {};
template<>
struct fex_gen_config<vkCmdSetEvent2> {};
template<>
struct fex_gen_config<vkCmdResetEvent2> {};
template<>
struct fex_gen_config<vkCmdWaitEvents2> {};
template<>
struct fex_gen_config<vkCmdBlitImage2> {};
template<>
struct fex_gen_config<vkCmdResolveImage2> {};
#endif
template<>
struct fex_gen_config<vkCmdBeginRendering> {};
template<>
struct fex_gen_config<vkCmdEndRendering> {};
template<>
struct fex_gen_config<vkCmdSetCullMode> {};
template<>
struct fex_gen_config<vkCmdSetFrontFace> {};
template<>
struct fex_gen_config<vkCmdSetPrimitiveTopology> {};
template<>
struct fex_gen_config<vkCmdSetViewportWithCount> {};
template<>
struct fex_gen_config<vkCmdSetScissorWithCount> {};
template<>
struct fex_gen_config<vkCmdBindVertexBuffers2> {};
template<>
struct fex_gen_config<vkCmdSetDepthTestEnable> {};
template<>
struct fex_gen_config<vkCmdSetDepthWriteEnable> {};
template<>
struct fex_gen_config<vkCmdSetDepthCompareOp> {};
template<>
struct fex_gen_config<vkCmdSetDepthBoundsTestEnable> {};
template<>
struct fex_gen_config<vkCmdSetStencilTestEnable> {};
template<>
struct fex_gen_config<vkCmdSetStencilOp> {};
template<>
struct fex_gen_config<vkCmdSetRasterizerDiscardEnable> {};
template<>
struct fex_gen_config<vkCmdSetDepthBiasEnable> {};
template<>
struct fex_gen_config<vkCmdSetPrimitiveRestartEnable> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkMapMemory2> {};
#endif
template<>
struct fex_gen_config<vkUnmapMemory2> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetDeviceImageSubresourceLayout> {};
#endif
template<>
struct fex_gen_config<vkGetImageSubresourceLayout2> {};
template<>
struct fex_gen_config<vkCopyMemoryToImage> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCopyImageToMemory> {};
template<>
struct fex_gen_config<vkCopyImageToImage> {};
#endif
template<>
struct fex_gen_config<vkTransitionImageLayout> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdPushDescriptorSet> {};
#endif
template<>
struct fex_gen_config<vkCmdPushDescriptorSetWithTemplate> {};
template<>
struct fex_gen_param<vkCmdPushDescriptorSetWithTemplate, 4, const void*> : fexgen::assume_compatible_data_layout {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBindDescriptorSets2> {};
template<>
struct fex_gen_config<vkCmdPushConstants2> {};
template<>
struct fex_gen_config<vkCmdPushDescriptorSet2> {};
template<>
struct fex_gen_config<vkCmdPushDescriptorSetWithTemplate2> {};
#endif
template<>
struct fex_gen_config<vkCmdSetLineStipple> {};
template<>
struct fex_gen_config<vkCmdBindIndexBuffer2> {};
template<>
struct fex_gen_config<vkGetRenderingAreaGranularity> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdSetRenderingAttachmentLocations> {};
template<>
struct fex_gen_config<vkCmdSetRenderingInputAttachmentIndices> {};
#endif
template<>
struct fex_gen_config<vkDestroySurfaceKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceSupportKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceCapabilitiesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceFormatsKHR> {}; // TODO: Need to figure out how *not* to repack the last parameter on input...
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfacePresentModesKHR> {};
template<>
struct fex_gen_config<vkCreateSwapchainKHR> {};
template<>
struct fex_gen_config<vkDestroySwapchainKHR> {};
template<>
struct fex_gen_config<vkGetSwapchainImagesKHR> {};
template<>
struct fex_gen_config<vkAcquireNextImageKHR> {};
template<>
struct fex_gen_config<vkQueuePresentKHR> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetDeviceGroupPresentCapabilitiesKHR> {};
template<>
struct fex_gen_config<vkGetDeviceGroupSurfacePresentModesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDevicePresentRectanglesKHR> {};
template<>
struct fex_gen_config<vkAcquireNextImage2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceDisplayPropertiesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceDisplayPlanePropertiesKHR> {};
template<>
struct fex_gen_config<vkGetDisplayPlaneSupportedDisplaysKHR> {};
template<>
struct fex_gen_config<vkGetDisplayModePropertiesKHR> {};
template<>
struct fex_gen_config<vkCreateDisplayModeKHR> {};
template<>
struct fex_gen_config<vkGetDisplayPlaneCapabilitiesKHR> {};
template<>
struct fex_gen_config<vkCreateDisplayPlaneSurfaceKHR> {};
template<>
struct fex_gen_config<vkCreateSharedSwapchainsKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceVideoCapabilitiesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceVideoFormatPropertiesKHR> {};
template<>
struct fex_gen_config<vkCreateVideoSessionKHR> {};
template<>
struct fex_gen_config<vkDestroyVideoSessionKHR> {};
template<>
struct fex_gen_config<vkGetVideoSessionMemoryRequirementsKHR> {};
template<>
struct fex_gen_config<vkBindVideoSessionMemoryKHR> {};
template<>
struct fex_gen_config<vkCreateVideoSessionParametersKHR> {};
template<>
struct fex_gen_config<vkUpdateVideoSessionParametersKHR> {};
template<>
struct fex_gen_config<vkDestroyVideoSessionParametersKHR> {};
template<>
struct fex_gen_config<vkCmdBeginVideoCodingKHR> {};
template<>
struct fex_gen_config<vkCmdEndVideoCodingKHR> {};
template<>
struct fex_gen_config<vkCmdControlVideoCodingKHR> {};
template<>
struct fex_gen_config<vkCmdDecodeVideoKHR> {};
template<>
struct fex_gen_config<vkCmdBeginRenderingKHR> {};
template<>
struct fex_gen_config<vkCmdEndRenderingKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceFeatures2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceFormatProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceImageFormatProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceMemoryProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSparseImageFormatProperties2KHR> {};
template<>
struct fex_gen_config<vkGetDeviceGroupPeerMemoryFeaturesKHR> {};
template<>
struct fex_gen_config<vkCmdSetDeviceMaskKHR> {};
template<>
struct fex_gen_config<vkCmdDispatchBaseKHR> {};
template<>
struct fex_gen_config<vkTrimCommandPoolKHR> {};
template<>
struct fex_gen_config<vkEnumeratePhysicalDeviceGroupsKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalBufferPropertiesKHR> {};
template<>
struct fex_gen_config<vkGetMemoryFdKHR> {};
template<>
struct fex_gen_config<vkGetMemoryFdPropertiesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalSemaphorePropertiesKHR> {};
template<>
struct fex_gen_config<vkImportSemaphoreFdKHR> {};
template<>
struct fex_gen_config<vkGetSemaphoreFdKHR> {};
template<>
struct fex_gen_config<vkCmdPushDescriptorSetKHR> {};
#endif
template<>
struct fex_gen_config<vkCmdPushDescriptorSetWithTemplateKHR> {};
template<>
struct fex_gen_param<vkCmdPushDescriptorSetWithTemplateKHR, 4, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCreateDescriptorUpdateTemplateKHR> {};
template<>
struct fex_gen_config<vkDestroyDescriptorUpdateTemplateKHR> {};
template<>
struct fex_gen_config<vkUpdateDescriptorSetWithTemplateKHR> {};
template<>
struct fex_gen_param<vkUpdateDescriptorSetWithTemplateKHR, 3, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCreateRenderPass2KHR> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBeginRenderPass2KHR> {};
template<>
struct fex_gen_config<vkCmdNextSubpass2KHR> {};
template<>
struct fex_gen_config<vkCmdEndRenderPass2KHR> {};
template<>
struct fex_gen_config<vkGetSwapchainStatusKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalFencePropertiesKHR> {};
template<>
struct fex_gen_config<vkImportFenceFdKHR> {};
template<>
struct fex_gen_config<vkGetFenceFdKHR> {};
template<>
struct fex_gen_config<vkEnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR> {};
template<>
struct fex_gen_config<vkAcquireProfilingLockKHR> {};
template<>
struct fex_gen_config<vkReleaseProfilingLockKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceCapabilities2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceFormats2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceDisplayProperties2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceDisplayPlaneProperties2KHR> {};
template<>
struct fex_gen_config<vkGetDisplayModeProperties2KHR> {};
template<>
struct fex_gen_config<vkGetDisplayPlaneCapabilities2KHR> {};
template<>
struct fex_gen_config<vkGetImageMemoryRequirements2KHR> {};
template<>
struct fex_gen_config<vkGetBufferMemoryRequirements2KHR> {};
template<>
struct fex_gen_config<vkGetImageSparseMemoryRequirements2KHR> {};
template<>
struct fex_gen_config<vkCreateSamplerYcbcrConversionKHR> {};
template<>
struct fex_gen_config<vkDestroySamplerYcbcrConversionKHR> {};
template<>
struct fex_gen_config<vkBindBufferMemory2KHR> {};
template<>
struct fex_gen_config<vkBindImageMemory2KHR> {};
template<>
struct fex_gen_config<vkGetDescriptorSetLayoutSupportKHR> {};
template<>
struct fex_gen_config<vkCmdDrawIndirectCountKHR> {};
template<>
struct fex_gen_config<vkCmdDrawIndexedIndirectCountKHR> {};
template<>
struct fex_gen_config<vkGetSemaphoreCounterValueKHR> {};
template<>
struct fex_gen_config<vkWaitSemaphoresKHR> {};
template<>
struct fex_gen_config<vkSignalSemaphoreKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceFragmentShadingRatesKHR> {};
template<>
struct fex_gen_config<vkCmdSetFragmentShadingRateKHR> {};
template<>
struct fex_gen_config<vkCmdSetRenderingAttachmentLocationsKHR> {};
template<>
struct fex_gen_config<vkCmdSetRenderingInputAttachmentIndicesKHR> {};
template<>
struct fex_gen_config<vkWaitForPresentKHR> {};
template<>
struct fex_gen_config<vkGetBufferDeviceAddressKHR> {};
template<>
struct fex_gen_config<vkGetBufferOpaqueCaptureAddressKHR> {};
template<>
struct fex_gen_config<vkGetDeviceMemoryOpaqueCaptureAddressKHR> {};
template<>
struct fex_gen_config<vkCreateDeferredOperationKHR> {};
template<>
struct fex_gen_config<vkDestroyDeferredOperationKHR> {};
template<>
struct fex_gen_config<vkGetDeferredOperationMaxConcurrencyKHR> {};
template<>
struct fex_gen_config<vkGetDeferredOperationResultKHR> {};
template<>
struct fex_gen_config<vkDeferredOperationJoinKHR> {};
template<>
struct fex_gen_config<vkGetPipelineExecutablePropertiesKHR> {};
template<>
struct fex_gen_config<vkGetPipelineExecutableStatisticsKHR> {};
template<>
struct fex_gen_config<vkGetPipelineExecutableInternalRepresentationsKHR> {};
template<>
struct fex_gen_config<vkMapMemory2KHR> {};
template<>
struct fex_gen_config<vkUnmapMemory2KHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceVideoEncodeQualityLevelPropertiesKHR> {};
template<>
struct fex_gen_config<vkGetEncodedVideoSessionParametersKHR> {};
template<>
struct fex_gen_config<vkCmdEncodeVideoKHR> {};
template<>
struct fex_gen_config<vkCmdSetEvent2KHR> {};
template<>
struct fex_gen_config<vkCmdResetEvent2KHR> {};
template<>
struct fex_gen_config<vkCmdWaitEvents2KHR> {};
template<>
struct fex_gen_config<vkCmdPipelineBarrier2KHR> {};
template<>
struct fex_gen_config<vkCmdWriteTimestamp2KHR> {};
template<>
struct fex_gen_config<vkQueueSubmit2KHR> {};
template<>
struct fex_gen_config<vkCmdCopyBuffer2KHR> {};
template<>
struct fex_gen_config<vkCmdCopyImage2KHR> {};
template<>
struct fex_gen_config<vkCmdCopyBufferToImage2KHR> {};
template<>
struct fex_gen_config<vkCmdCopyImageToBuffer2KHR> {};
template<>
struct fex_gen_config<vkCmdBlitImage2KHR> {};
template<>
struct fex_gen_config<vkCmdResolveImage2KHR> {};
template<>
struct fex_gen_config<vkCmdTraceRaysIndirect2KHR> {};
template<>
struct fex_gen_config<vkGetDeviceBufferMemoryRequirementsKHR> {};
template<>
struct fex_gen_config<vkGetDeviceImageMemoryRequirementsKHR> {};
template<>
struct fex_gen_config<vkGetDeviceImageSparseMemoryRequirementsKHR> {};
template<>
struct fex_gen_config<vkCmdBindIndexBuffer2KHR> {};
template<>
struct fex_gen_config<vkGetRenderingAreaGranularityKHR> {};
template<>
struct fex_gen_config<vkGetDeviceImageSubresourceLayoutKHR> {};
template<>
struct fex_gen_config<vkGetImageSubresourceLayout2KHR> {};
template<>
struct fex_gen_config<vkWaitForPresent2KHR> {};
template<>
struct fex_gen_config<vkCreatePipelineBinariesKHR> {};
template<>
struct fex_gen_config<vkDestroyPipelineBinaryKHR> {};
template<>
struct fex_gen_config<vkGetPipelineKeyKHR> {};
template<>
struct fex_gen_config<vkGetPipelineBinaryDataKHR> {};
template<>
struct fex_gen_config<vkReleaseCapturedPipelineDataKHR> {};
template<>
struct fex_gen_config<vkReleaseSwapchainImagesKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceCooperativeMatrixPropertiesKHR> {};
template<>
struct fex_gen_config<vkCmdSetLineStippleKHR> {};
#endif
template<>
struct fex_gen_config<vkGetPhysicalDeviceCalibrateableTimeDomainsKHR> {};
template<>
struct fex_gen_config<vkGetCalibratedTimestampsKHR> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBindDescriptorSets2KHR> {};
template<>
struct fex_gen_config<vkCmdPushConstants2KHR> {};
template<>
struct fex_gen_config<vkCmdPushDescriptorSet2KHR> {};
template<>
struct fex_gen_config<vkCmdPushDescriptorSetWithTemplate2KHR> {};
template<>
struct fex_gen_config<vkCmdSetDescriptorBufferOffsets2EXT> {};
template<>
struct fex_gen_config<vkCmdBindDescriptorBufferEmbeddedSamplers2EXT> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryIndirectKHR> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryToImageIndirectKHR> {};
template<>
struct fex_gen_config<vkCmdEndRendering2KHR> {};
#endif
template<>
struct fex_gen_config<vkCreateDebugReportCallbackEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkCreateDebugReportCallbackEXT, 1, const VkDebugReportCallbackCreateInfoEXT*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<vkDestroyDebugReportCallbackEXT> : fexgen::custom_host_impl {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkDebugReportMessageEXT> {};
template<>
struct fex_gen_config<vkDebugMarkerSetObjectTagEXT> {};
template<>
struct fex_gen_config<vkDebugMarkerSetObjectNameEXT> {};
template<>
struct fex_gen_config<vkCmdDebugMarkerBeginEXT> {};
template<>
struct fex_gen_config<vkCmdDebugMarkerEndEXT> {};
template<>
struct fex_gen_config<vkCmdDebugMarkerInsertEXT> {};
template<>
struct fex_gen_config<vkCmdBindTransformFeedbackBuffersEXT> {};
template<>
struct fex_gen_config<vkCmdBeginTransformFeedbackEXT> {};
template<>
struct fex_gen_config<vkCmdEndTransformFeedbackEXT> {};
template<>
struct fex_gen_config<vkCmdBeginQueryIndexedEXT> {};
template<>
struct fex_gen_config<vkCmdEndQueryIndexedEXT> {};
template<>
struct fex_gen_config<vkCmdDrawIndirectByteCountEXT> {};
template<>
struct fex_gen_config<vkCreateCuModuleNVX> {};
template<>
struct fex_gen_config<vkCreateCuFunctionNVX> {};
template<>
struct fex_gen_config<vkDestroyCuModuleNVX> {};
template<>
struct fex_gen_config<vkDestroyCuFunctionNVX> {};
template<>
struct fex_gen_config<vkCmdCuLaunchKernelNVX> {};
template<>
struct fex_gen_config<vkGetImageViewHandleNVX> {};
template<>
struct fex_gen_config<vkGetImageViewHandle64NVX> {};
template<>
struct fex_gen_config<vkGetImageViewAddressNVX> {};
template<>
struct fex_gen_config<vkCmdDrawIndirectCountAMD> {};
template<>
struct fex_gen_config<vkCmdDrawIndexedIndirectCountAMD> {};
template<>
struct fex_gen_config<vkGetShaderInfoAMD> {};
template<>
struct fex_gen_param<vkGetShaderInfoAMD, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalImageFormatPropertiesNV> {};
template<>
struct fex_gen_config<vkCmdBeginConditionalRenderingEXT> {};
template<>
struct fex_gen_config<vkCmdEndConditionalRenderingEXT> {};
template<>
struct fex_gen_config<vkCmdSetViewportWScalingNV> {};
template<>
struct fex_gen_config<vkReleaseDisplayEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSurfaceCapabilities2EXT> {};
template<>
struct fex_gen_config<vkDisplayPowerControlEXT> {};
template<>
struct fex_gen_config<vkRegisterDeviceEventEXT> {};
template<>
struct fex_gen_config<vkRegisterDisplayEventEXT> {};
template<>
struct fex_gen_config<vkGetSwapchainCounterEXT> {};
template<>
struct fex_gen_config<vkGetRefreshCycleDurationGOOGLE> {};
template<>
struct fex_gen_config<vkGetPastPresentationTimingGOOGLE> {};
template<>
struct fex_gen_config<vkCmdSetDiscardRectangleEXT> {};
template<>
struct fex_gen_config<vkCmdSetDiscardRectangleEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetDiscardRectangleModeEXT> {};
template<>
struct fex_gen_config<vkSetHdrMetadataEXT> {};
template<>
struct fex_gen_config<vkSetDebugUtilsObjectNameEXT> {};
template<>
struct fex_gen_config<vkSetDebugUtilsObjectTagEXT> {};
template<>
struct fex_gen_config<vkQueueBeginDebugUtilsLabelEXT> {};
template<>
struct fex_gen_config<vkQueueEndDebugUtilsLabelEXT> {};
template<>
struct fex_gen_config<vkQueueInsertDebugUtilsLabelEXT> {};
template<>
struct fex_gen_config<vkCmdBeginDebugUtilsLabelEXT> {};
template<>
struct fex_gen_config<vkCmdEndDebugUtilsLabelEXT> {};
template<>
struct fex_gen_config<vkCmdInsertDebugUtilsLabelEXT> {};
#endif
template<>
struct fex_gen_config<vkCreateDebugUtilsMessengerEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkCreateDebugUtilsMessengerEXT, 1, const VkDebugUtilsMessengerCreateInfoEXT*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<vkDestroyDebugUtilsMessengerEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkSubmitDebugUtilsMessageEXT> {};
template<>
struct fex_gen_config<vkCmdSetSampleLocationsEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceMultisamplePropertiesEXT> {};
template<>
struct fex_gen_config<vkGetImageDrmFormatModifierPropertiesEXT> {};
template<>
struct fex_gen_config<vkCreateValidationCacheEXT> {};
template<>
struct fex_gen_config<vkDestroyValidationCacheEXT> {};
template<>
struct fex_gen_config<vkMergeValidationCachesEXT> {};
template<>
struct fex_gen_config<vkGetValidationCacheDataEXT> {};
template<>
struct fex_gen_param<vkGetValidationCacheDataEXT, 3, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdBindShadingRateImageNV> {};
template<>
struct fex_gen_config<vkCmdSetViewportShadingRatePaletteNV> {};
template<>
struct fex_gen_config<vkCmdSetCoarseSampleOrderNV> {};
template<>
struct fex_gen_config<vkCreateAccelerationStructureNV> {};
template<>
struct fex_gen_config<vkDestroyAccelerationStructureNV> {};
template<>
struct fex_gen_config<vkGetAccelerationStructureMemoryRequirementsNV> {};
template<>
struct fex_gen_config<vkBindAccelerationStructureMemoryNV> {};
template<>
struct fex_gen_config<vkCmdBuildAccelerationStructureNV> {};
template<>
struct fex_gen_config<vkCmdCopyAccelerationStructureNV> {};
template<>
struct fex_gen_config<vkCmdTraceRaysNV> {};
template<>
struct fex_gen_config<vkCreateRayTracingPipelinesNV> {};
template<>
struct fex_gen_config<vkGetRayTracingShaderGroupHandlesKHR> {};
template<>
struct fex_gen_param<vkGetRayTracingShaderGroupHandlesKHR, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkGetRayTracingShaderGroupHandlesNV> {};
template<>
struct fex_gen_param<vkGetRayTracingShaderGroupHandlesNV, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkGetAccelerationStructureHandleNV> {};
template<>
struct fex_gen_param<vkGetAccelerationStructureHandleNV, 3, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdWriteAccelerationStructuresPropertiesNV> {};
template<>
struct fex_gen_config<vkCompileDeferredNV> {};
template<>
struct fex_gen_config<vkGetMemoryHostPointerPropertiesEXT> {};
template<>
struct fex_gen_param<vkGetMemoryHostPointerPropertiesEXT, 2, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdWriteBufferMarkerAMD> {};
#endif
template<>
struct fex_gen_config<vkCmdWriteBufferMarker2AMD> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceCalibrateableTimeDomainsEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetCalibratedTimestampsEXT> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksNV> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksIndirectNV> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksIndirectCountNV> {};
template<>
struct fex_gen_config<vkCmdSetExclusiveScissorEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetExclusiveScissorNV> {};
template<>
struct fex_gen_config<vkCmdSetCheckpointNV> {};
template<>
struct fex_gen_param<vkCmdSetCheckpointNV, 1, const void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkGetQueueCheckpointDataNV> {};
template<>
struct fex_gen_config<vkGetQueueCheckpointData2NV> {};
template<>
struct fex_gen_config<vkSetSwapchainPresentTimingQueueSizeEXT> {};
template<>
struct fex_gen_config<vkGetSwapchainTimingPropertiesEXT> {};
template<>
struct fex_gen_config<vkGetSwapchainTimeDomainPropertiesEXT> {};
template<>
struct fex_gen_config<vkGetPastPresentationTimingEXT> {};
template<>
struct fex_gen_config<vkInitializePerformanceApiINTEL> {};
template<>
struct fex_gen_config<vkUninitializePerformanceApiINTEL> {};
template<>
struct fex_gen_config<vkCmdSetPerformanceMarkerINTEL> {};
template<>
struct fex_gen_config<vkCmdSetPerformanceStreamMarkerINTEL> {};
template<>
struct fex_gen_config<vkCmdSetPerformanceOverrideINTEL> {};
template<>
struct fex_gen_config<vkAcquirePerformanceConfigurationINTEL> {};
template<>
struct fex_gen_config<vkReleasePerformanceConfigurationINTEL> {};
template<>
struct fex_gen_config<vkQueueSetPerformanceConfigurationINTEL> {};
template<>
struct fex_gen_config<vkGetPerformanceParameterINTEL> {};
template<>
struct fex_gen_config<vkSetLocalDimmingAMD> {};
template<>
struct fex_gen_config<vkGetBufferDeviceAddressEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceToolPropertiesEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceCooperativeMatrixPropertiesNV> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceSupportedFramebufferMixedSamplesCombinationsNV> {};
template<>
struct fex_gen_config<vkCreateHeadlessSurfaceEXT> {};
#endif
template<>
struct fex_gen_config<vkCmdSetLineStippleEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkResetQueryPoolEXT> {};
template<>
struct fex_gen_config<vkCmdSetCullModeEXT> {};
template<>
struct fex_gen_config<vkCmdSetFrontFaceEXT> {};
template<>
struct fex_gen_config<vkCmdSetPrimitiveTopologyEXT> {};
template<>
struct fex_gen_config<vkCmdSetViewportWithCountEXT> {};
template<>
struct fex_gen_config<vkCmdSetScissorWithCountEXT> {};
template<>
struct fex_gen_config<vkCmdBindVertexBuffers2EXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthTestEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthWriteEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthCompareOpEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthBoundsTestEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetStencilTestEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetStencilOpEXT> {};
#endif
template<>
struct fex_gen_config<vkCopyMemoryToImageEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCopyImageToMemoryEXT> {};
template<>
struct fex_gen_config<vkCopyImageToImageEXT> {};
#endif
template<>
struct fex_gen_config<vkTransitionImageLayoutEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetImageSubresourceLayout2EXT> {};
template<>
struct fex_gen_config<vkReleaseSwapchainImagesEXT> {};
template<>
struct fex_gen_config<vkGetGeneratedCommandsMemoryRequirementsNV> {};
template<>
struct fex_gen_config<vkCmdPreprocessGeneratedCommandsNV> {};
template<>
struct fex_gen_config<vkCmdExecuteGeneratedCommandsNV> {};
template<>
struct fex_gen_config<vkCmdBindPipelineShaderGroupNV> {};
template<>
struct fex_gen_config<vkCreateIndirectCommandsLayoutNV> {};
template<>
struct fex_gen_config<vkDestroyIndirectCommandsLayoutNV> {};
template<>
struct fex_gen_config<vkCmdSetDepthBias2EXT> {};
template<>
struct fex_gen_config<vkAcquireDrmDisplayEXT> {};
template<>
struct fex_gen_config<vkGetDrmDisplayEXT> {};
template<>
struct fex_gen_config<vkCreatePrivateDataSlotEXT> {};
template<>
struct fex_gen_config<vkDestroyPrivateDataSlotEXT> {};
template<>
struct fex_gen_config<vkSetPrivateDataEXT> {};
template<>
struct fex_gen_config<vkGetPrivateDataEXT> {};
template<>
struct fex_gen_config<vkCmdDispatchTileQCOM> {};
template<>
struct fex_gen_config<vkCmdBeginPerTileExecutionQCOM> {};
template<>
struct fex_gen_config<vkCmdEndPerTileExecutionQCOM> {};
#endif
template<>
struct fex_gen_config<vkGetDescriptorSetLayoutSizeEXT> {};
template<>
struct fex_gen_config<vkGetDescriptorSetLayoutBindingOffsetEXT> {};
template<>
struct fex_gen_config<vkGetDescriptorEXT> {};
template<>
struct fex_gen_param<vkGetDescriptorEXT, 3, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdBindDescriptorBuffersEXT> {};
template<>
struct fex_gen_config<vkCmdSetDescriptorBufferOffsetsEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBindDescriptorBufferEmbeddedSamplersEXT> {};
template<>
struct fex_gen_config<vkGetBufferOpaqueCaptureDescriptorDataEXT> {};
template<>
struct fex_gen_config<vkGetImageOpaqueCaptureDescriptorDataEXT> {};
template<>
struct fex_gen_config<vkGetImageViewOpaqueCaptureDescriptorDataEXT> {};
template<>
struct fex_gen_config<vkGetSamplerOpaqueCaptureDescriptorDataEXT> {};
template<>
struct fex_gen_config<vkGetAccelerationStructureOpaqueCaptureDescriptorDataEXT> {};
template<>
struct fex_gen_config<vkCmdSetFragmentShadingRateEnumNV> {};
template<>
struct fex_gen_config<vkGetDeviceFaultInfoEXT> {};
#endif
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdSetVertexInputEXT> {};
#else
template<>
struct fex_gen_config<vkCmdSetVertexInputEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkCmdSetVertexInputEXT, 2, const VkVertexInputBindingDescription2EXT*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<vkCmdSetVertexInputEXT, 4, const VkVertexInputAttributeDescription2EXT*> : fexgen::ptr_passthrough {};
#endif
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetDeviceSubpassShadingMaxWorkgroupSizeHUAWEI> {};
template<>
struct fex_gen_config<vkCmdSubpassShadingHUAWEI> {};
template<>
struct fex_gen_config<vkCmdBindInvocationMaskHUAWEI> {};
#ifndef IS_32BIT_THUNK
// VkRemoteAddressNV* expands to void**, so it needs custom repacking on on 32-bit
template<>
struct fex_gen_config<vkGetMemoryRemoteAddressNV> {};
#endif
template<>
struct fex_gen_config<vkGetPipelinePropertiesEXT> {};
#endif
template<>
struct fex_gen_config<vkCmdSetPatchControlPointsEXT> {};
template<>
struct fex_gen_config<vkCmdSetRasterizerDiscardEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthBiasEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetLogicOpEXT> {};
template<>
struct fex_gen_config<vkCmdSetPrimitiveRestartEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetColorWriteEnableEXT> {};
template<>
struct fex_gen_config<vkCmdDrawMultiEXT> {};
template<>
struct fex_gen_config<vkCmdDrawMultiIndexedEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCreateMicromapEXT> {};
template<>
struct fex_gen_config<vkDestroyMicromapEXT> {};
template<>
struct fex_gen_config<vkCmdBuildMicromapsEXT> {};
template<>
struct fex_gen_config<vkBuildMicromapsEXT> {};
template<>
struct fex_gen_config<vkCopyMicromapEXT> {};
template<>
struct fex_gen_config<vkCopyMicromapToMemoryEXT> {};
template<>
struct fex_gen_config<vkCopyMemoryToMicromapEXT> {};
template<>
struct fex_gen_config<vkWriteMicromapsPropertiesEXT> {};
template<>
struct fex_gen_param<vkWriteMicromapsPropertiesEXT, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdCopyMicromapEXT> {};
template<>
struct fex_gen_config<vkCmdCopyMicromapToMemoryEXT> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryToMicromapEXT> {};
template<>
struct fex_gen_config<vkCmdWriteMicromapsPropertiesEXT> {};
template<>
struct fex_gen_config<vkGetDeviceMicromapCompatibilityEXT> {};
template<>
struct fex_gen_config<vkGetMicromapBuildSizesEXT> {};
template<>
struct fex_gen_config<vkCmdDrawClusterHUAWEI> {};
template<>
struct fex_gen_config<vkCmdDrawClusterIndirectHUAWEI> {};
template<>
struct fex_gen_config<vkSetDeviceMemoryPriorityEXT> {};
template<>
struct fex_gen_config<vkGetDescriptorSetLayoutHostMappingInfoVALVE> {};
template<>
struct fex_gen_config<vkGetDescriptorSetHostMappingVALVE> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryIndirectNV> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryToImageIndirectNV> {};
template<>
struct fex_gen_config<vkCmdDecompressMemoryNV> {};
template<>
struct fex_gen_config<vkCmdDecompressMemoryIndirectCountNV> {};
template<>
struct fex_gen_config<vkGetPipelineIndirectMemoryRequirementsNV> {};
template<>
struct fex_gen_config<vkCmdUpdatePipelineIndirectBufferNV> {};
template<>
struct fex_gen_config<vkGetPipelineIndirectDeviceAddressNV> {};
#endif
template<>
struct fex_gen_config<vkCmdSetDepthClampEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetPolygonModeEXT> {};
template<>
struct fex_gen_config<vkCmdSetRasterizationSamplesEXT> {};
template<>
struct fex_gen_config<vkCmdSetSampleMaskEXT> {};
template<>
struct fex_gen_config<vkCmdSetAlphaToCoverageEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetAlphaToOneEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetLogicOpEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetColorBlendEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetColorBlendEquationEXT> {};
template<>
struct fex_gen_config<vkCmdSetColorWriteMaskEXT> {};
template<>
struct fex_gen_config<vkCmdSetTessellationDomainOriginEXT> {};
template<>
struct fex_gen_config<vkCmdSetRasterizationStreamEXT> {};
template<>
struct fex_gen_config<vkCmdSetConservativeRasterizationModeEXT> {};
template<>
struct fex_gen_config<vkCmdSetExtraPrimitiveOverestimationSizeEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthClipEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetSampleLocationsEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetColorBlendAdvancedEXT> {};
template<>
struct fex_gen_config<vkCmdSetProvokingVertexModeEXT> {};
template<>
struct fex_gen_config<vkCmdSetLineRasterizationModeEXT> {};
template<>
struct fex_gen_config<vkCmdSetLineStippleEnableEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthClipNegativeOneToOneEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdSetViewportWScalingEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetViewportSwizzleNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageToColorEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageToColorLocationNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageModulationModeNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageModulationTableEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageModulationTableNV> {};
template<>
struct fex_gen_config<vkCmdSetShadingRateImageEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetRepresentativeFragmentTestEnableNV> {};
template<>
struct fex_gen_config<vkCmdSetCoverageReductionModeNV> {};
template<>
struct fex_gen_config<vkCreateTensorARM> {};
#endif
template<>
struct fex_gen_config<vkDestroyTensorARM> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCreateTensorViewARM> {};
#endif
template<>
struct fex_gen_config<vkDestroyTensorViewARM> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetTensorMemoryRequirementsARM> {};
template<>
struct fex_gen_config<vkBindTensorMemoryARM> {};
template<>
struct fex_gen_config<vkGetDeviceTensorMemoryRequirementsARM> {};
template<>
struct fex_gen_config<vkCmdCopyTensorARM> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceExternalTensorPropertiesARM> {};
template<>
struct fex_gen_config<vkGetTensorOpaqueCaptureDescriptorDataARM> {};
template<>
struct fex_gen_config<vkGetTensorViewOpaqueCaptureDescriptorDataARM> {};
template<>
struct fex_gen_config<vkGetShaderModuleIdentifierEXT> {};
template<>
struct fex_gen_config<vkGetShaderModuleCreateInfoIdentifierEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceOpticalFlowImageFormatsNV> {};
template<>
struct fex_gen_config<vkCreateOpticalFlowSessionNV> {};
template<>
struct fex_gen_config<vkDestroyOpticalFlowSessionNV> {};
template<>
struct fex_gen_config<vkBindOpticalFlowSessionImageNV> {};
template<>
struct fex_gen_config<vkCmdOpticalFlowExecuteNV> {};
template<>
struct fex_gen_config<vkAntiLagUpdateAMD> {};
template<>
struct fex_gen_config<vkCreateShadersEXT> {};
template<>
struct fex_gen_config<vkDestroyShaderEXT> {};
template<>
struct fex_gen_config<vkGetShaderBinaryDataEXT> {};
template<>
struct fex_gen_config<vkCmdBindShadersEXT> {};
template<>
struct fex_gen_config<vkCmdSetDepthClampRangeEXT> {};
template<>
struct fex_gen_config<vkGetFramebufferTilePropertiesQCOM> {};
template<>
struct fex_gen_config<vkGetDynamicRenderingTilePropertiesQCOM> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceCooperativeVectorPropertiesNV> {};
template<>
struct fex_gen_config<vkConvertCooperativeVectorMatrixNV> {};
template<>
struct fex_gen_config<vkCmdConvertCooperativeVectorMatrixNV> {};
template<>
struct fex_gen_config<vkSetLatencySleepModeNV> {};
template<>
struct fex_gen_config<vkLatencySleepNV> {};
template<>
struct fex_gen_config<vkSetLatencyMarkerNV> {};
template<>
struct fex_gen_config<vkGetLatencyTimingsNV> {};
template<>
struct fex_gen_config<vkQueueNotifyOutOfBandNV> {};
template<>
struct fex_gen_config<vkCreateDataGraphPipelinesARM> {};
template<>
struct fex_gen_config<vkCreateDataGraphPipelineSessionARM> {};
template<>
struct fex_gen_config<vkGetDataGraphPipelineSessionBindPointRequirementsARM> {};
template<>
struct fex_gen_config<vkGetDataGraphPipelineSessionMemoryRequirementsARM> {};
template<>
struct fex_gen_config<vkBindDataGraphPipelineSessionMemoryARM> {};
#endif
template<>
struct fex_gen_config<vkDestroyDataGraphPipelineSessionARM> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdDispatchDataGraphARM> {};
template<>
struct fex_gen_config<vkGetDataGraphPipelineAvailablePropertiesARM> {};
template<>
struct fex_gen_config<vkGetDataGraphPipelinePropertiesARM> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyDataGraphPropertiesARM> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceQueueFamilyDataGraphProcessingEnginePropertiesARM> {};
#endif
template<>
struct fex_gen_config<vkCmdSetAttachmentFeedbackLoopEnableEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCmdBindTileMemoryQCOM> {};
template<>
struct fex_gen_config<vkCmdDecompressMemoryEXT> {};
#endif
template<>
struct fex_gen_config<vkCmdDecompressMemoryIndirectCountEXT> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkCreateExternalComputeQueueNV> {};
#endif
template<>
struct fex_gen_config<vkDestroyExternalComputeQueueNV> {};
#ifndef IS_32BIT_THUNK
template<>
struct fex_gen_config<vkGetExternalComputeQueueDataNV> {};
template<>
struct fex_gen_config<vkGetClusterAccelerationStructureBuildSizesNV> {};
template<>
struct fex_gen_config<vkCmdBuildClusterAccelerationStructureIndirectNV> {};
template<>
struct fex_gen_config<vkGetPartitionedAccelerationStructuresBuildSizesNV> {};
template<>
struct fex_gen_config<vkCmdBuildPartitionedAccelerationStructuresNV> {};
template<>
struct fex_gen_config<vkGetGeneratedCommandsMemoryRequirementsEXT> {};
template<>
struct fex_gen_config<vkCmdPreprocessGeneratedCommandsEXT> {};
template<>
struct fex_gen_config<vkCmdExecuteGeneratedCommandsEXT> {};
template<>
struct fex_gen_config<vkCreateIndirectCommandsLayoutEXT> {};
template<>
struct fex_gen_config<vkDestroyIndirectCommandsLayoutEXT> {};
template<>
struct fex_gen_config<vkCreateIndirectExecutionSetEXT> {};
template<>
struct fex_gen_config<vkDestroyIndirectExecutionSetEXT> {};
template<>
struct fex_gen_config<vkUpdateIndirectExecutionSetPipelineEXT> {};
template<>
struct fex_gen_config<vkUpdateIndirectExecutionSetShaderEXT> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceCooperativeMatrixFlexibleDimensionsPropertiesNV> {};
template<>
struct fex_gen_config<vkEnumeratePhysicalDeviceQueueFamilyPerformanceCountersByRegionARM> {};
template<>
struct fex_gen_config<vkCmdEndRendering2EXT> {};
template<>
struct fex_gen_config<vkCmdBeginCustomResolveEXT> {};
template<>
struct fex_gen_config<vkCmdSetComputeOccupancyPriorityNV> {};
template<>
struct fex_gen_config<vkCreateAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkDestroyAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkCmdBuildAccelerationStructuresKHR> {};
template<>
struct fex_gen_config<vkCmdBuildAccelerationStructuresIndirectKHR> {};
template<>
struct fex_gen_config<vkBuildAccelerationStructuresKHR> {};
template<>
struct fex_gen_config<vkCopyAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkCopyAccelerationStructureToMemoryKHR> {};
template<>
struct fex_gen_config<vkCopyMemoryToAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkWriteAccelerationStructuresPropertiesKHR> {};
template<>
struct fex_gen_param<vkWriteAccelerationStructuresPropertiesKHR, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdCopyAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkCmdCopyAccelerationStructureToMemoryKHR> {};
template<>
struct fex_gen_config<vkCmdCopyMemoryToAccelerationStructureKHR> {};
template<>
struct fex_gen_config<vkGetAccelerationStructureDeviceAddressKHR> {};
template<>
struct fex_gen_config<vkCmdWriteAccelerationStructuresPropertiesKHR> {};
template<>
struct fex_gen_config<vkGetDeviceAccelerationStructureCompatibilityKHR> {};
template<>
struct fex_gen_config<vkGetAccelerationStructureBuildSizesKHR> {};
template<>
struct fex_gen_config<vkCmdTraceRaysKHR> {};
template<>
struct fex_gen_config<vkCreateRayTracingPipelinesKHR> {};
template<>
struct fex_gen_config<vkGetRayTracingCaptureReplayShaderGroupHandlesKHR> {};
template<>
struct fex_gen_param<vkGetRayTracingCaptureReplayShaderGroupHandlesKHR, 5, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<vkCmdTraceRaysIndirectKHR> {};
template<>
struct fex_gen_config<vkGetRayTracingShaderGroupStackSizeKHR> {};
template<>
struct fex_gen_config<vkCmdSetRayTracingPipelineStackSizeKHR> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksEXT> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksIndirectEXT> {};
template<>
struct fex_gen_config<vkCmdDrawMeshTasksIndirectCountEXT> {};
#endif

// vulkan_xlib_xrandr.h
template<>
struct fex_gen_config<vkAcquireXlibDisplayEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkAcquireXlibDisplayEXT, 1, Display*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<vkGetRandROutputDisplayEXT> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkGetRandROutputDisplayEXT, 1, Display*> : fexgen::ptr_passthrough {};

// vulkan_wayland.h
template<>
struct fex_gen_config<vkCreateWaylandSurfaceKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceWaylandPresentationSupportKHR> {};

// vulkan_xcb.h
template<>
struct fex_gen_config<vkCreateXcbSurfaceKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceXcbPresentationSupportKHR> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkGetPhysicalDeviceXcbPresentationSupportKHR, 2, xcb_connection_t*> : fexgen::ptr_passthrough {};

// vulkan_xlib.h
template<>
struct fex_gen_config<vkCreateXlibSurfaceKHR> {};
template<>
struct fex_gen_config<vkGetPhysicalDeviceXlibPresentationSupportKHR> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<vkGetPhysicalDeviceXlibPresentationSupportKHR, 2, Display*> : fexgen::ptr_passthrough {};

} // namespace internal


================================================
FILE: ThunkLibs/libwayland-client/Guest.cpp
================================================
/*
$info$
tags: thunklibs|wayland-client
$end_info$
*/

#include <wayland-util.h>
#include <wayland-client.h>

// These must be re-declared with an initializer here, since they don't get exported otherwise
// NOTE: The initializers for these must be fetched from the host Wayland library, however
//       we can't control how these symbols are loaded since they are global const objects.
//       LD puts them in the application rodata section and ignores any nontrivial library-provided
//       initializers. There is a workaround to enable late initialization anyway in OnInit.
// NOTE: We only need to do this for interfaces exported by libwayland-client itself. Interfaces
//       defined by external libraries work fine.
extern "C" const wl_interface wl_output_interface {};
extern "C" const wl_interface wl_shm_pool_interface {};
extern "C" const wl_interface wl_pointer_interface {};
extern "C" const wl_interface wl_compositor_interface {};
extern "C" const wl_interface wl_shm_interface {};
extern "C" const wl_interface wl_registry_interface {};
extern "C" const wl_interface wl_buffer_interface {};
extern "C" const wl_interface wl_seat_interface {};
extern "C" const wl_interface wl_surface_interface {};
extern "C" const wl_interface wl_keyboard_interface {};
extern "C" const wl_interface wl_callback_interface {};
extern "C" const wl_interface wl_display_interface {};
extern "C" const wl_interface wl_data_offer_interface {};
extern "C" const wl_interface wl_data_source_interface {};
extern "C" const wl_interface wl_data_device_interface {};
extern "C" const wl_interface wl_data_device_manager_interface {};
extern "C" const wl_interface wl_shell_interface {};
extern "C" const wl_interface wl_shell_surface_interface {};
extern "C" const wl_interface wl_touch_interface {};
extern "C" const wl_interface wl_region_interface {};
extern "C" const wl_interface wl_subcompositor_interface {};
extern "C" const wl_interface wl_subsurface_interface {};

#include <algorithm>
#include <array>
#include <charconv>
#include <cstdio>
#include <cstdarg>
#include <cstring>
#include <string>

#include "common/Guest.h"

#include "thunkgen_guest_libwayland-client.inl"

// See wayland-util.h for documentation on protocol message signatures
template<char>
struct ArgType;
template<>
struct ArgType<'s'> {
  using type = const char*;
};
template<>
struct ArgType<'u'> {
  using type = uint32_t;
};
template<>
struct ArgType<'i'> {
  using type = int32_t;
};
template<>
struct ArgType<'o'> {
  using type = wl_proxy*;
};
template<>
struct ArgType<'n'> {
  using type = wl_proxy*;
};
template<>
struct ArgType<'a'> {
  using type = wl_array*;
};
template<>
struct ArgType<'f'> {
  using type = wl_fixed_t;
};
template<>
struct ArgType<'h'> {
  using type = int32_t;
}; // fd?

template<char... Signature>
static uint64_t WaylandAllocateHostTrampolineForGuestListener(void (*callback)()) {
  using cb = void(void*, wl_proxy*, typename ArgType<Signature>::type...);
  return (uint64_t)(uintptr_t)(void*)AllocateHostTrampolineForGuestFunction((cb*)callback);
}

#define WL_CLOSURE_MAX_ARGS 20

extern "C" int wl_proxy_add_listener(wl_proxy* proxy, void (**callback)(void), void* data) {
  // Replace guest-provided callback table with host-callable function pointers
  // NOTE: A reference to this table is stored in the wl_proxy, so the data
  //       must remain valid until the proxy is destroyed (or another listener
  //       is added)
  delete[] (uint64_t*)wl_proxy_get_listener(proxy); // Delete previous substitute, if any
  auto host_callbacks = new uint64_t[WL_CLOSURE_MAX_ARGS];

  for (int i = 0; i < fex_wl_get_interface_event_count(proxy); ++i) {
    char event_signature[16];
    fex_wl_get_interface_event_signature(proxy, i, event_signature);
    auto signature2 = std::string_view {event_signature};

    // A leading number indicates the minimum protocol version
    uint32_t since_version = 0;
    auto [ptr, res] = std::from_chars(signature2.begin(), signature2.end(), since_version, 10);
    auto signature = std::string {signature2.substr(ptr - signature2.begin())};

    // ? just indicates that the argument may be null, so it doesn't change the signature
    signature.erase(std::remove(signature.begin(), signature.end(), '?'), signature.end());

    if (signature == "") {
      // E.g. xdg_toplevel::close
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<>(callback[i]);
    } else if (signature == "a") {
      // E.g. xdg_toplevel::wm_capabilities
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'a'>(callback[i]);
    } else if (signature == "f") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'f'>(callback[i]);
    } else if (signature == "hu") {
      // E.g. zwp_linux_dmabuf_feedback_v1::format_table
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'h', 'u'>(callback[i]);
    } else if (signature == "i") {
      // E.g. wl_output_listener::scale
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i'>(callback[i]);
    } else if (signature == "if") {
      // E.g. wl_touch_listener::orientation
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'f'>(callback[i]);
    } else if (signature == "iff") {
      // E.g. wl_touch_listener::shape
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'f', 'f'>(callback[i]);
    } else if (signature == "ii") {
      // E.g. xdg_toplevel::configure_bounds
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'i'>(callback[i]);
    } else if (signature == "iu") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'u'>(callback[i]);
    } else if (signature == "iia") {
      // E.g. xdg_toplevel::configure
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'i', 'a'>(callback[i]);
    } else if (signature == "iiii") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'i', 'i', 'i'>(callback[i]);
    } else if (signature == "iiiiissi") {
      // E.g. wl_output_listener::geometry
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'i', 'i', 'i', 'i', 'i', 's', 's', 'i'>(callback[i]);
    } else if (signature == "n") {
      // E.g. wl_data_device_listener::data_offer
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'n'>(callback[i]);
    } else if (signature == "o") {
      // E.g. wl_data_device_listener::selection
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'o'>(callback[i]);
    } else if (signature == "u") {
      // E.g. wl_registry::global_remove
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u'>(callback[i]);
    } else if (signature == "uff") {
      // E.g. wl_pointer_listener::motion
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'f', 'f'>(callback[i]);
    } else if (signature == "uffff") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'f', 'f', 'f', 'f'>(callback[i]);
    } else if (signature == "uhu") {
      // E.g. wl_keyboard_listener::keymap
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'h', 'u'>(callback[i]);
    } else if (signature == "ui") {
      // E.g. wl_pointer_listener::axis_discrete
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'i'>(callback[i]);
    } else if (signature == "uiff") {
      // E.g. wl_touch_listener::motion
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'i', 'f', 'f'>(callback[i]);
    } else if (signature == "uiii") {
      // E.g. wl_output_listener::mode
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'i', 'i', 'i'>(callback[i]);
    } else if (signature == "uiiii") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'i', 'i', 'i', 'i'>(callback[i]);
    } else if (signature == "uo") {
      // E.g. wl_pointer_listener::leave
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'o'>(callback[i]);
    } else if (signature == "uoa") {
      // E.g. wl_keyboard_listener::enter
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'o', 'a'>(callback[i]);
    } else if (signature == "uoff") {
      // E.g. wl_pointer_listener::enter
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'o', 'f', 'f'>(callback[i]);
    } else if (signature == "uoffo") {
      // E.g. wl_data_device_listener::enter
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'o', 'f', 'f', 'o'>(callback[i]);
    } else if (signature == "uoo") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'o', 'o'>(callback[i]);
    } else if (signature == "us") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 's'>(callback[i]);
    } else if (signature == "uss") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 's', 's'>(callback[i]);
    } else if (signature == "usu") {
      // E.g. wl_registry::global
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 's', 'u'>(callback[i]);
    } else if (signature == "uu") {
      // E.g. wl_pointer_listener::axis_stop
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u'>(callback[i]);
    } else if (signature == "uuf") {
      // E.g. wl_pointer_listener::axis
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'f'>(callback[i]);
    } else if (signature == "uui") {
      // E.g. wl_touch_listener::up
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'i'>(callback[i]);
    } else if (signature == "uuoiff") {
      // E.g. wl_touch_listener::down
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'o', 'i', 'f', 'f'>(callback[i]);
    } else if (signature == "uuou") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'o', 'u'>(callback[i]);
    } else if (signature == "uuu") {
      // E.g. zwp_linux_dmabuf_v1::modifier
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'u'>(callback[i]);
    } else if (signature == "uuuu") {
      // E.g. wl_pointer_listener::button
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'u', 'u'>(callback[i]);
    } else if (signature == "uuuuu") {
      // E.g. wl_keyboard_listener::modifiers
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'u', 'u', 'u', 'u', 'u'>(callback[i]);
    } else if (signature == "s") {
      // E.g. wl_seat::name
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'s'>(callback[i]);
    } else if (signature == "ss") {
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'s', 's'>(callback[i]);
    } else if (signature == "sii") {
      // E.g. zwp_text_input_v3::preedit_string
      host_callbacks[i] = WaylandAllocateHostTrampolineForGuestListener<'s', 'i', 'i'>(callback[i]);
    } else {
      fprintf(stderr, "TODO: Unknown wayland event signature descriptor %s\n", signature.data());
      std::abort();
    }
  }

  return fexfn_pack_wl_proxy_add_listener(proxy, (void (**)())host_callbacks, data);
}

extern "C" void wl_proxy_destroy(wl_proxy* proxy) {
  // Delete substitute callback table (if any), then the proxy itself
  delete[] (uint64_t*)wl_proxy_get_listener(proxy);
  fexfn_pack_wl_proxy_destroy(proxy);
}

// Adapted from the Wayland sources
static const char* get_next_argument_type(const char* signature, char& type) {
  for (; *signature; ++signature) {
    switch (*signature) {
    case 'i':
    case 'u':
    case 'f':
    case 's':
    case 'o':
    case 'n':
    case 'a':
    case 'h': type = *signature; return signature + 1;

    default: continue;
    }
  }
  type = 0;
  return signature;
}

static void wl_argument_from_va_list(const char* signature, wl_argument* args, int count, va_list ap) {

  auto sig_iter = signature;
  for (int i = 0; i < count; i++) {
    char arg_type;
    sig_iter = get_next_argument_type(sig_iter, arg_type);

    switch (arg_type) {
    case 'i': args[i].i = va_arg(ap, int32_t); break;
    case 'u': args[i].u = va_arg(ap, uint32_t); break;
    case 'f': args[i].f = va_arg(ap, wl_fixed_t); break;
    case 's': args[i].s = va_arg(ap, const char*); break;
    case 'o': args[i].o = va_arg(ap, struct wl_object*); break;
    case 'n': args[i].o = va_arg(ap, struct wl_object*); break;
    case 'a': args[i].a = va_arg(ap, struct wl_array*); break;
    case 'h': args[i].h = va_arg(ap, int32_t); break;
    case '\0': return;
    }
  }
}

extern "C" void wl_proxy_marshal(wl_proxy* proxy, uint32_t opcode, ...) {
  wl_argument args[WL_CLOSURE_MAX_ARGS];
  va_list ap;

  va_start(ap, opcode);
  // This is equivalent to reading proxy->interface->methods[opcode].signature on 64-bit.
  // On 32-bit, the data layout differs between host and guest however, so we let the host extract the data.
  char signature[64];
  fex_wl_get_method_signature(proxy, opcode, signature);
  wl_argument_from_va_list(signature, args, WL_CLOSURE_MAX_ARGS, ap);
  va_end(ap);

  wl_proxy_marshal_array(proxy, opcode, args);
}

extern "C" wl_proxy* wl_proxy_marshal_constructor(wl_proxy* proxy, uint32_t opcode, const wl_interface* interface, ...) {
  wl_argument args[WL_CLOSURE_MAX_ARGS];
  va_list ap;

  va_start(ap, interface);
  // This is equivalent to reading ((wl_proxy_private*)proxy)->interface->methods[opcode].signature on 64-bit.
  // On 32-bit, the data layout differs between host and guest however, so we let the host extract the data.
  char signature[64];
  fex_wl_get_method_signature(proxy, opcode, signature);
  wl_argument_from_va_list(signature, args, WL_CLOSURE_MAX_ARGS, ap);
  va_end(ap);

  return wl_proxy_marshal_array_constructor(proxy, opcode, args, interface);
}

extern "C" wl_proxy* wl_proxy_marshal_constructor_versioned(wl_proxy* proxy, uint32_t opcode, const wl_interface* interface, uint32_t version, ...) {
  wl_argument args[WL_CLOSURE_MAX_ARGS];
  va_list ap;

  va_start(ap, version);
  // This is equivalent to reading ((wl_proxy_private*)proxy)->interface->methods[opcode].signature on 64-bit.
  // On 32-bit, the data layout differs between host and guest however, so we let the host extract the data.
  char signature[64];
  fex_wl_get_method_signature(proxy, opcode, signature);
  wl_argument_from_va_list(signature, args, WL_CLOSURE_MAX_ARGS, ap);
  va_end(ap);

  return wl_proxy_marshal_array_constructor_versioned(proxy, opcode, args, interface, version);
}

extern "C" wl_proxy* wl_proxy_marshal_flags(wl_proxy* proxy, uint32_t opcode, const wl_interface* interface, uint32_t version, uint32_t flags, ...) {
  wl_argument args[WL_CLOSURE_MAX_ARGS];
  va_list ap;

  va_start(ap, flags);
  // This is equivalent to reading proxy->interface->methods[opcode].signature on 64-bit.
  // On 32-bit, the data layout differs between host and guest however, so we let the host extract the data.
  char signature[64];
  fex_wl_get_method_signature(proxy, opcode, signature);
  wl_argument_from_va_list(signature, args, WL_CLOSURE_MAX_ARGS, ap);
  va_end(ap);

  // wl_proxy_marshal_array_flags is only available starting from Wayland 1.19.91
#if WAYLAND_VERSION_MAJOR * 10000 + WAYLAND_VERSION_MINOR * 100 + WAYLAND_VERSION_MICRO >= 11991
  return wl_proxy_marshal_array_flags(proxy, opcode, interface, version, flags, args);
#else
  fprintf(stderr, "Host Wayland version is too old to support FEX thunking\n");
  __builtin_trap();
#endif
}

extern "C" void wl_log_set_handler_client(wl_log_func_t handler) {
  // Ignore
}


void OnInit() {
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_output_interface), "wl_output_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_shm_pool_interface), "wl_shm_pool_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_pointer_interface), "wl_pointer_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_compositor_interface), "wl_compositor_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_shm_interface), "wl_shm_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_registry_interface), "wl_registry_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_buffer_interface), "wl_buffer_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_seat_interface), "wl_seat_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_surface_interface), "wl_surface_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_keyboard_interface), "wl_keyboard_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_callback_interface), "wl_callback_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_display_interface), "wl_display_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_data_offer_interface), "wl_data_offer_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_data_source_interface), "wl_data_source_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_data_device_interface), "wl_data_device_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_data_device_manager_interface), "wl_data_device_manager_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_shell_interface), "wl_shell_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_shell_surface_interface), "wl_shell_surface_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_touch_interface), "wl_touch_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_region_interface), "wl_region_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_subcompositor_interface), "wl_subcompositor_interface");
  fex_wl_exchange_interface_pointer(const_cast<wl_interface*>(&wl_subsurface_interface), "wl_subsurface_interface");
}

// Would insert spaces around -
// clang-format off
LOAD_LIB_INIT(libwayland-client, OnInit)


================================================
FILE: ThunkLibs/libwayland-client/Host.cpp
================================================
/*
$info$
tags: thunklibs|wayland-client
$end_info$
*/

#include <string_view>
#include <unordered_map>
#include <wayland-client.h>

#include <stdio.h>

#include "common/Host.h"
#include <dlfcn.h>

#include <sys/mman.h>

#include <algorithm>
#include <array>
#include <charconv>
#include <cstring>
#include <map>
#include <span>
#include <string>

#include <ranges>

template<>
struct guest_layout<wl_argument> {
#ifdef IS_32BIT_THUNK
  using type = uint32_t;
#else
  using type = wl_argument;
#endif
  type data;

  guest_layout& operator=(const wl_argument from) {
#ifdef IS_32BIT_THUNK
    data = from.u;
#else
    data = from;
#endif
    return *this;
  }
};

#include "thunkgen_host_libwayland-client.inl"

// Maps guest interface to host_interfaces
static std::unordered_map<guest_layout<const wl_interface>*, wl_interface*> guest_to_host_interface;

static wl_interface* get_proxy_interface(wl_proxy* proxy) {
  // wl_proxy is a private struct, but its first member is the wl_interface pointer
  return *reinterpret_cast<wl_interface**>(proxy);
}

static void assert_is_valid_host_interface(const wl_interface* interface) {
  // The 32-bit data layout of wl_interface differs from the 64-bit one due to
  // its pointer members. Our repacking code takes care of these differences.
  //
  // To ensure this indeed functions properly, a simple consistency check is
  // applied here: If any of the message counts are absurdly high, it means
  // data from pointer members leaked into other members.

  if ((uint32_t)interface->method_count >= 0x1000 || (uint32_t)interface->event_count >= 0x1000) {
    fprintf(stderr, "ERROR: Expected %p to be a host wl_interface, but it's not\n", interface);
    std::abort();
  }
}

#ifdef IS_32BIT_THUNK
static void assert_is_valid_guest_interface(guest_layout<const wl_interface*> guest_interface) {
  // Consistency check for expected data layout.
  // See assert_is_valid_host_interface for details

  const wl_interface* as_host_interface = (const wl_interface*)guest_interface.force_get_host_pointer();
  if ((uint32_t)as_host_interface->method_count < 0x1000 && (uint32_t)as_host_interface->event_count < 0x1000) {
    fprintf(stderr, "ERROR: Expected %p to be a guest wl_interface, but it's not\n", guest_interface.force_get_host_pointer());
    std::abort();
  }
}

static void repack_guest_wl_interface_to_host(guest_layout<const wl_interface*> guest_interface_ptr, wl_interface* host_interface) {
  auto& guest_interface = *guest_interface_ptr.get_pointer();
  static_assert(sizeof(guest_interface) == 24);

  *host_interface = host_layout<wl_interface> {guest_interface}.data;
  fex_apply_custom_repacking_entry(reinterpret_cast<host_layout<wl_interface>&>(*host_interface), guest_interface);
}

// Maps guest interface pointers to host pointers
static const wl_interface* lookup_wl_interface(guest_layout<const wl_interface*> interface) {
  // Used e.g. for wl_shm_pool_destroy
  if (interface.force_get_host_pointer() == nullptr) {
    return nullptr;
  }

  auto [host_interface_it, inserted] = guest_to_host_interface.emplace(interface.get_pointer(), nullptr);
  if (!inserted) {
    assert_is_valid_host_interface(host_interface_it->second);
    return host_interface_it->second;
  }

  assert_is_valid_guest_interface(interface);

  fprintf(stderr, "Unknown wayland interface %p, adding to registry\n", interface.get_pointer());

  host_interface_it->second = new wl_interface;
  wl_interface* host_interface = host_interface_it->second;
  repack_guest_wl_interface_to_host(interface, host_interface);
  return host_interface_it->second;
}

void fex_custom_repack_entry(host_layout<wl_interface>& into, const guest_layout<wl_interface>& from) {
  // NOTE: These arrays are complements to global symbols in the guest, so we
  //       never explicitly free this memory
  auto& host_interface = into.data;
  into.data.methods = new wl_message[into.data.method_count];
  into.data.events = new wl_message[into.data.event_count];

  memset((void*)host_interface.methods, 0, sizeof(wl_message) * host_interface.method_count);
  for (int i = 0; i < host_interface.method_count; ++i) {
    const auto& guest_method {from.data.methods.get_pointer()[i]};
    host_layout<wl_message> host_method {guest_method};
    fex_apply_custom_repacking_entry(host_method, guest_method);
    memcpy((void*)&host_interface.methods[i], &host_method, sizeof(host_method));
  }

  memset((void*)host_interface.events, 0, sizeof(wl_message) * host_interface.event_count);
  for (int i = 0; i < host_interface.event_count; ++i) {
    const auto& guest_event {from.data.events.get_pointer()[i]};
    host_layout<wl_message> host_event {guest_event};
    fex_apply_custom_repacking_entry(host_event, guest_event);
    memcpy((void*)&host_interface.events[i], &host_event, sizeof(host_event));
  }
}

bool fex_custom_repack_exit(guest_layout<wl_interface>&, const host_layout<wl_interface>&) {
  fprintf(stderr, "Should not be called: %s\n", __PRETTY_FUNCTION__);
  std::abort();
}
void fex_custom_repack_entry(host_layout<wl_message>& into, const guest_layout<wl_message>& from) {
  auto& host_method = into.data;
  auto num_types = std::ranges::count_if(std::string_view {host_method.signature}, isalpha);
  if (num_types) {
    host_method.types = new const wl_interface*[num_types];
    for (int type = 0; type < num_types; ++type) {
      auto guest_interface_addr = from.data.types.get_pointer()[type];
      host_method.types[type] = guest_interface_addr.force_get_host_pointer() ? lookup_wl_interface(guest_interface_addr) : nullptr;
    }
  }
}
bool fex_custom_repack_exit(guest_layout<wl_message>&, const host_layout<wl_message>&) {
  fprintf(stderr, "Should not be called: %s\n", __PRETTY_FUNCTION__);
  std::abort();
}
#else
const wl_interface* lookup_wl_interface(guest_layout<const wl_interface*> interface) {
  return interface.force_get_host_pointer();
}
#endif

static wl_proxy* fexfn_impl_libwayland_client_wl_proxy_create(wl_proxy* proxy, guest_layout<const wl_interface*> guest_interface_raw) {
  auto host_interface = lookup_wl_interface(guest_interface_raw);
  return fexldr_ptr_libwayland_client_wl_proxy_create(proxy, host_interface);
}

#define WL_CLOSURE_MAX_ARGS 20
static auto fex_wl_remap_argument_list(guest_layout<wl_argument*> args, const wl_message& message) {
#ifndef IS_32BIT_THUNK
  // Cast to host layout and return as std::span
  wl_argument* host_args = host_layout<wl_argument*> {args}.data;
  return std::span<wl_argument, WL_CLOSURE_MAX_ARGS> {host_args, WL_CLOSURE_MAX_ARGS};
#else
  // Return a new array of elements zero-extended to 64-bit
  std::array<wl_argument, WL_CLOSURE_MAX_ARGS> host_args;
  int arg_count = std::ranges::count_if(std::string_view {message.signature}, isalpha);
  for (int i = 0; i < arg_count; ++i) {
    // NOTE: wl_argument can store a pointer argument, so for 32-bit guests
    //       we need to make sure the upper 32-bits are explicitly zeroed
    std::memset(&host_args[i], 0, sizeof(host_args[i]));
    std::memcpy(&host_args[i], &args.get_pointer()[i], sizeof(args.get_pointer()[i]));
  }
  return host_args;
#endif
}

extern "C" void fexfn_impl_libwayland_client_wl_proxy_marshal_array(wl_proxy* proxy, uint32_t opcode, guest_layout<wl_argument*> args) {
  auto host_args = fex_wl_remap_argument_list(args, get_proxy_interface(proxy)->methods[opcode]);
  fexldr_ptr_libwayland_client_wl_proxy_marshal_array(proxy, opcode, host_args.data());
}

static wl_proxy* fex_wl_proxy_marshal_array(wl_proxy* proxy, uint32_t opcode, guest_layout<wl_argument*> args,
                                            guest_layout<const wl_interface*> guest_interface,
                                            bool constructor, // Call the _constructor variant of the native wayland function
                                            std::optional<uint32_t> version, std::optional<uint32_t> flags) {
  auto interface = lookup_wl_interface(guest_interface);

  assert_is_valid_host_interface(get_proxy_interface(proxy));

  auto host_args = fex_wl_remap_argument_list(args, get_proxy_interface(proxy)->methods[opcode]);

  if (false) {
  } else if (!constructor && !version && !flags) {
    return nullptr;
  } else if (!constructor && version && flags) {
    // wl_proxy_marshal_array_flags is only available starting from Wayland 1.19.91
#if WAYLAND_VERSION_MAJOR * 10000 + WAYLAND_VERSION_MINOR * 100 + WAYLAND_VERSION_MICRO >= 11991
    return fexldr_ptr_libwayland_client_wl_proxy_marshal_array_flags(proxy, opcode, interface, version.value(), flags.value(), host_args.data());
#else
    fprintf(stderr, "Host Wayland version is too old to support FEX thunking\n");
    __builtin_trap();
#endif
  } else if (constructor && version && !flags) {
    return fexldr_ptr_libwayland_client_wl_proxy_marshal_array_constructor_versioned(proxy, opcode, host_args.data(), interface, version.value());
  } else if (constructor && !version && !flags) {
    return fexldr_ptr_libwayland_client_wl_proxy_marshal_array_constructor(proxy, opcode, host_args.data(), interface);
  } else {
    fprintf(stderr, "Invalid configuration\n");
    __builtin_trap();
  }
}

extern "C" wl_proxy* fexfn_impl_libwayland_client_wl_proxy_marshal_array_constructor_versioned(
  wl_proxy* proxy, uint32_t opcode, guest_layout<wl_argument*> args, guest_layout<const wl_interface*> interface, uint32_t version) {
  return fex_wl_proxy_marshal_array(proxy, opcode, args, interface, true, version, std::nullopt);
}

extern "C" wl_proxy* fexfn_impl_libwayland_client_wl_proxy_marshal_array_constructor(
  wl_proxy* proxy, uint32_t opcode, guest_layout<wl_argument*> args, guest_layout<const wl_interface*> interface) {
  return fex_wl_proxy_marshal_array(proxy, opcode, args, interface, true, std::nullopt, std::nullopt);
}

extern "C" wl_proxy* fexfn_impl_libwayland_client_wl_proxy_marshal_array_flags(wl_proxy* proxy, uint32_t opcode,
                                                                               guest_layout<const wl_interface*> interface, uint32_t version,
                                                                               uint32_t flags, guest_layout<wl_argument*> args) {
  return fex_wl_proxy_marshal_array(proxy, opcode, args, interface, false, version, flags);
}

// Variant of CallbackUnpack::CallGuestPtr that relocates a wl_array parameter
// for 32-bit guests. Relocating this parameter is required since it may
// reference inaccessible memory regions (presumably due to pointing to data
// on the host stack).
#ifndef IS_32BIT_THUNK
template<typename Result, typename... Args>
const auto CallGuestPtrWithWaylandArray = CallbackUnpack<Result(Args..., wl_array*)>::CallGuestPtr;
#else
template<typename Result, typename... Args>
static auto CallGuestPtrWithWaylandArray(Args... args, wl_array* array) -> Result {
  GuestcallInfo* guestcall;
  LOAD_INTERNAL_GUESTPTR_VIA_CUSTOM_ABI(guestcall);

  using PackedArgumentsType = PackedArguments<Result, guest_layout<Args>..., guest_layout<wl_array*>>;

  GuestStackBumpAllocator GuestStack;

  auto* guest_array = GuestStack.New<guest_layout<wl_array>>(to_guest(to_host_layout(*array)));
  guest_layout<wl_array*> guest_array_ptr = {.data = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(guest_array))};

  auto& packed_args = *GuestStack.New<PackedArgumentsType>(to_guest(to_host_layout(args))..., guest_array_ptr);

  guestcall->CallCallback(guestcall->GuestUnpacker, guestcall->GuestTarget, &packed_args);

  if constexpr (!std::is_void_v<Result>) {
    return packed_args.rv;
  }
}
#endif

// See wayland-util.h for documentation on protocol message signatures
template<char>
struct ArgType;
template<>
struct ArgType<'s'> {
  using type = const char*;
};
template<>
struct ArgType<'u'> {
  using type = uint32_t;
};
template<>
struct ArgType<'i'> {
  using type = int32_t;
};
template<>
struct ArgType<'o'> {
  using type = wl_proxy*;
};
template<>
struct ArgType<'n'> {
  using type = wl_proxy*;
};
template<>
struct ArgType<'a'> {
  using type = wl_array*;
};
template<>
struct ArgType<'f'> {
  using type = wl_fixed_t;
};
template<>
struct ArgType<'h'> {
  using type = int32_t;
}; // fd?

template<char... Signature>
static void WaylandFinalizeHostTrampolineForGuestListener(void (*callback)()) {
  using cb = void(void*, wl_proxy*, typename ArgType<Signature>::type...);
  FinalizeHostTrampolineForGuestFunction((cb*)callback);
}

extern "C" int
fexfn_impl_libwayland_client_wl_proxy_add_listener(struct wl_proxy* proxy, guest_layout<void (**)(void)> callback_table_raw, void* data) {
  auto interface = get_proxy_interface(proxy);

  assert_is_valid_host_interface(interface);

  auto callback_table = callback_table_raw.force_get_host_pointer();

  for (int i = 0; i < interface->event_count; ++i) {
    auto signature_view = std::string_view {interface->events[i].signature};

    // A leading number indicates the minimum protocol version
    uint32_t since_version = 0;
    auto [ptr, res] = std::from_chars(signature_view.begin(), signature_view.end(), since_version, 10);
    auto signature = std::string {signature_view.substr(ptr - signature_view.begin())};

    // ? just indicates that the argument may be null, so it doesn't change the signature
    signature.erase(std::remove(signature.begin(), signature.end(), '?'), signature.end());

    auto callback = callback_table[i];

    if (signature == "") {
      // E.g. xdg_toplevel::close
      WaylandFinalizeHostTrampolineForGuestListener<>(callback);
    } else if (signature == "a") {
      // E.g. xdg_toplevel::wm_capabilities
      FEX::HLE::FinalizeHostTrampolineForGuestFunction((FEX::HLE::HostToGuestTrampolinePtr*)callback,
                                                       (void*)CallGuestPtrWithWaylandArray<void, void*, wl_proxy*>);
    } else if (signature == "f") {
      WaylandFinalizeHostTrampolineForGuestListener<'f'>(callback);
    } else if (signature == "hu") {
      // E.g. zwp_linux_dmabuf_feedback_v1::format_table
      WaylandFinalizeHostTrampolineForGuestListener<'h', 'u'>(callback);
    } else if (signature == "i") {
      // E.g. wl_output_listener::scale
      WaylandFinalizeHostTrampolineForGuestListener<'i'>(callback);
    } else if (signature == "if") {
      // E.g. wl_touch_listener::orientation
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'f'>(callback);
    } else if (signature == "iff") {
      // E.g. wl_touch_listener::shape
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'f', 'f'>(callback);
    } else if (signature == "ii") {
      // E.g. xdg_toplevel::configure_bounds
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'i'>(callback);
    } else if (signature == "iu") {
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'u'>(callback);
    } else if (signature == "iia") {
      // E.g. xdg_toplevel::configure
      FEX::HLE::FinalizeHostTrampolineForGuestFunction((FEX::HLE::HostToGuestTrampolinePtr*)callback,
                                                       (void*)CallGuestPtrWithWaylandArray<void, void*, wl_proxy*, int32_t, int32_t>);
    } else if (signature == "iiii") {
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'i', 'i', 'i'>(callback);
    } else if (signature == "iiiiissi") {
      // E.g. wl_output_listener::geometry
      WaylandFinalizeHostTrampolineForGuestListener<'i', 'i', 'i', 'i', 'i', 's', 's', 'i'>(callback);
    } else if (signature == "n") {
      // E.g. wl_data_device_listener::data_offer
      WaylandFinalizeHostTrampolineForGuestListener<'n'>(callback);
    } else if (signature == "o") {
      // E.g. wl_data_device_listener::selection
      WaylandFinalizeHostTrampolineForGuestListener<'o'>(callback);
    } else if (signature == "u") {
      // E.g. wl_registry::global_remove
      WaylandFinalizeHostTrampolineForGuestListener<'u'>(callback);
    } else if (signature == "uff") {
      // E.g. wl_pointer_listener::motion
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'f', 'f'>(callback);
    } else if (signature == "uffff") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'f', 'f', 'f', 'f'>(callback);
    } else if (signature == "uhu") {
      // E.g. wl_keyboard_listener::keymap
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'h', 'u'>(callback);
    } else if (signature == "ui") {
      // E.g. wl_pointer_listener::axis_discrete
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'i'>(callback);
    } else if (signature == "uiff") {
      // E.g. wl_touch_listener::motion
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'i', 'f', 'f'>(callback);
    } else if (signature == "uiii") {
      // E.g. wl_output_listener::mode
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'i', 'i', 'i'>(callback);
    } else if (signature == "uiiii") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'i', 'i', 'i', 'i'>(callback);
    } else if (signature == "uo") {
      // E.g. wl_pointer_listener::leave
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'o'>(callback);
    } else if (signature == "uoa") {
      // E.g. wl_keyboard_listener::enter
      FEX::HLE::FinalizeHostTrampolineForGuestFunction((FEX::HLE::HostToGuestTrampolinePtr*)callback,
                                                       (void*)CallGuestPtrWithWaylandArray<void, void*, wl_proxy*, uint32_t, wl_surface*>);
    } else if (signature == "uoff") {
      // E.g. wl_pointer_listener::enter
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'o', 'f', 'f'>(callback);
    } else if (signature == "uoffo") {
      // E.g. wl_data_device_listener::enter
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'o', 'f', 'f', 'o'>(callback);
    } else if (signature == "uoo") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'o', 'o'>(callback);
    } else if (signature == "us") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 's'>(callback);
    } else if (signature == "uss") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 's', 's'>(callback);
    } else if (signature == "usu") {
      // E.g. wl_registry::global
      WaylandFinalizeHostTrampolineForGuestListener<'u', 's', 'u'>(callback);
    } else if (signature == "uu") {
      // E.g. wl_pointer_listener::axis_stop
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u'>(callback);
    } else if (signature == "uuf") {
      // E.g. wl_pointer_listener::axis
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'f'>(callback);
    } else if (signature == "uui") {
      // E.g. wl_touch_listener::up
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'i'>(callback);
    } else if (signature == "uuoiff") {
      // E.g. wl_touch_listener::down
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'o', 'i', 'f', 'f'>(callback);
    } else if (signature == "uuou") {
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'o', 'u'>(callback);
    } else if (signature == "uuu") {
      // E.g. zwp_linux_dmabuf_v1::modifier
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'u'>(callback);
    } else if (signature == "uuuu") {
      // E.g. wl_pointer_listener::button
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'u', 'u'>(callback);
    } else if (signature == "uuuuu") {
      // E.g. wl_keyboard_listener::modifiers
      WaylandFinalizeHostTrampolineForGuestListener<'u', 'u', 'u', 'u', 'u'>(callback);
    } else if (signature == "s") {
      // E.g. wl_seat::name
      WaylandFinalizeHostTrampolineForGuestListener<'s'>(callback);
    } else if (signature == "sii") {
      // E.g. zwp_text_input_v3::preedit_string
      WaylandFinalizeHostTrampolineForGuestListener<'s', 'i', 'i'>(callback);
    } else if (signature == "ss") {
      WaylandFinalizeHostTrampolineForGuestListener<'s', 's'>(callback);
    } else {
      fprintf(stderr, "TODO: Unknown wayland event signature descriptor %s\n", signature.data());
      std::abort();
    }
  }

  // Pass the original function pointer table to the host wayland library. This ensures the table is valid until the listener is unregistered.
  return fexldr_ptr_libwayland_client_wl_proxy_add_listener(proxy, callback_table, data);
}

void fexfn_impl_libwayland_client_fex_wl_exchange_interface_pointer(guest_layout<wl_interface*> guest_interface_raw, const char* name) {
  auto& guest_interface = *guest_interface_raw.get_pointer();
  auto& host_interface = guest_to_host_interface[reinterpret_cast<guest_layout<const wl_interface>*>(&guest_interface)];
  host_interface = reinterpret_cast<wl_interface*>(dlsym(fexldr_ptr_libwayland_client_so, name));
  if (!host_interface) {
    fprintf(stderr, "Could not find host interface corresponding to %p (%s)\n", &guest_interface, name);
    std::abort();
  }

  // Wayland-client declares interface pointers as `const`, which makes LD put
  // them into the rodata section of the application itself instead of the
  // library. To copy the host information to them on startup, we must
  // temporarily disable write-protection on this data hence.
  // NOTE: This may span page boundaries, so up to 2 pages may need to be changed
  const auto source_addr = reinterpret_cast<uintptr_t>(guest_interface_raw.force_get_host_pointer());
  const auto page_begin = source_addr & ~uintptr_t {0xfff};
  const auto remap_size = ((source_addr & 0xfff) + sizeof(*guest_interface_raw.force_get_host_pointer()) > 0x1000) ? 0x2000 : 0x1000;
  if (0 != mprotect((void*)page_begin, remap_size, PROT_READ | PROT_WRITE)) {
    fprintf(stderr, "ERROR: %s\n", strerror(errno));
    std::abort();
  }

#ifndef IS_32BIT_THUNK
  memcpy(&guest_interface, host_interface, sizeof(wl_interface));
#else
  guest_interface = to_guest(to_host_layout(*host_interface));

  // NOTE: These arrays are complements to global symbols in the guest, so we
  //       never explicitly free this memory
  guest_interface.data.methods.data = (uintptr_t)new guest_layout<wl_message>[host_interface->method_count];
  for (int i = 0; i < host_interface->method_count; ++i) {
    guest_interface.data.methods.get_pointer()[i] = to_guest(to_host_layout(host_interface->methods[i]));
    guest_interface.data.methods.get_pointer()[i].data.types = to_guest(to_host_layout(host_interface->methods[i].types));
  }

  guest_interface.data.events.data = (uintptr_t)new guest_layout<wl_message>[host_interface->event_count];
  for (int i = 0; i < host_interface->event_count; ++i) {
    guest_interface.data.events.get_pointer()[i] = to_guest(to_host_layout(host_interface->events[i]));
    guest_interface.data.events.get_pointer()[i].data.types = to_guest(to_host_layout(host_interface->events[i].types));
  }
#endif

  // TODO: Disabled until we ensure the interface data is indeed stored in rodata
  //  mprotect((void*)page_begin, remap_size, PROT_READ);
}

void fexfn_impl_libwayland_client_fex_wl_get_method_signature(wl_proxy* proxy, uint32_t opcode, char* out) {
  strcpy(out, get_proxy_interface(proxy)->methods[opcode].signature);
}

int fexfn_impl_libwayland_client_fex_wl_get_interface_event_count(wl_proxy* proxy) {
  return get_proxy_interface(proxy)->event_count;
}

void fexfn_impl_libwayland_client_fex_wl_get_interface_event_name(wl_proxy* proxy, int i, char* out) {
  strcpy(out, get_proxy_interface(proxy)->events[i].name);
}

void fexfn_impl_libwayland_client_fex_wl_get_interface_event_signature(wl_proxy* proxy, int i, char* out) {
  strcpy(out, get_proxy_interface(proxy)->events[i].signature);
}

EXPORTS(libwayland_client)


================================================
FILE: ThunkLibs/libwayland-client/libwayland-client_interface.cpp
================================================
#include <common/GeneratorInterface.h>

#include <wayland-client.h>

template<auto>
struct fex_gen_config {
  unsigned version = 0;
};

template<typename>
struct fex_gen_type {};

// Function, parameter index, parameter type [optional]
template<auto, int, typename = void>
struct fex_gen_param {};

template<>
struct fex_gen_type<wl_display> : fexgen::opaque_type {};
template<>
struct fex_gen_type<wl_proxy> : fexgen::opaque_type {};

template<>
struct fex_gen_type<wl_event_queue> : fexgen::opaque_type {};

// Passed over Wayland's wire protocol for some functions
template<>
struct fex_gen_type<wl_array> : fexgen::emit_layout_wrappers {};

#ifdef IS_32BIT_THUNK
// wl_interface and wl_message reference each other through pointers
template<>
struct fex_gen_type<wl_interface> : fexgen::emit_layout_wrappers {};
template<>
struct fex_gen_config<&wl_interface::methods> : fexgen::custom_repack {};
template<>
struct fex_gen_config<&wl_interface::events> : fexgen::custom_repack {};
template<>
struct fex_gen_type<wl_message> : fexgen::emit_layout_wrappers {};
template<>
struct fex_gen_config<&wl_message::types> : fexgen::custom_repack {};
#else
template<>
struct fex_gen_type<wl_interface> : fexgen::assume_compatible_data_layout {};
#endif

template<>
struct fex_gen_config<wl_proxy_destroy> : fexgen::custom_guest_entrypoint {};

template<>
struct fex_gen_config<wl_display_connect> {};
template<>
struct fex_gen_config<wl_display_flush> {};
template<>
struct fex_gen_config<wl_display_cancel_read> {};
template<>
struct fex_gen_config<wl_display_create_queue> {};
template<>
struct fex_gen_config<wl_display_disconnect> {};
template<>
struct fex_gen_config<wl_display_dispatch> {};
template<>
struct fex_gen_config<wl_display_dispatch_pending> {};
template<>
struct fex_gen_config<wl_display_dispatch_queue> {};
template<>
struct fex_gen_config<wl_display_dispatch_queue_pending> {};
template<>
struct fex_gen_config<wl_display_get_error> {};
template<>
struct fex_gen_config<wl_display_prepare_read> {};
template<>
struct fex_gen_config<wl_display_prepare_read_queue> {};
template<>
struct fex_gen_config<wl_display_read_events> {};
template<>
struct fex_gen_config<wl_display_roundtrip> {};
template<>
struct fex_gen_config<wl_display_roundtrip_queue> {};
template<>
struct fex_gen_config<wl_display_connect_to_fd> {};
template<>
struct fex_gen_config<wl_display_get_fd> {};

template<>
struct fex_gen_config<wl_event_queue_destroy> {};

template<>
struct fex_gen_config<wl_proxy_add_listener> : fexgen::custom_host_impl, fexgen::custom_guest_entrypoint {};
// Callback table
template<>
struct fex_gen_param<wl_proxy_add_listener, 1, void (**)()> : fexgen::ptr_passthrough {};
// User-provided data pointer (not used in caller-provided callback)
template<>
struct fex_gen_param<wl_proxy_add_listener, 2, void*> : fexgen::assume_compatible_data_layout {};
template<>
struct fex_gen_config<wl_proxy_create> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<wl_proxy_create, 1, const wl_interface*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<wl_proxy_create_wrapper> {};
template<>
struct fex_gen_config<wl_proxy_get_class> {};
template<>
struct fex_gen_config<wl_proxy_get_id> {};
template<>
struct fex_gen_config<wl_proxy_get_listener> {};
template<>
struct fex_gen_config<wl_proxy_get_tag> {};
template<>
struct fex_gen_config<wl_proxy_get_user_data> {};
template<>
struct fex_gen_config<wl_proxy_get_version> {};
template<>
struct fex_gen_config<wl_proxy_set_queue> {};
template<>
struct fex_gen_config<wl_proxy_set_tag> {};
// TODO: This has a void* parameter. Why does 32-bit accept this without annotations?
template<>
struct fex_gen_config<wl_proxy_set_user_data> {};
template<>
struct fex_gen_config<wl_proxy_wrapper_destroy> {};

template<>
struct fex_gen_config<wl_proxy_marshal_array> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<wl_proxy_marshal_array, 2, wl_argument*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<wl_proxy_marshal_array_constructor> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_constructor, 2, wl_argument*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_constructor, 3, const wl_interface*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_config<wl_proxy_marshal_array_constructor_versioned> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_constructor_versioned, 2, wl_argument*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_constructor_versioned, 3, const wl_interface*> : fexgen::ptr_passthrough {};
// wl_proxy_marshal_array_flags is only available starting from Wayland 1.19.91
#if WAYLAND_VERSION_MAJOR * 10000 + WAYLAND_VERSION_MINOR * 100 + WAYLAND_VERSION_MICRO >= 11991
template<>
struct fex_gen_config<wl_proxy_marshal_array_flags> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_flags, 2, const wl_interface*> : fexgen::ptr_passthrough {};
template<>
struct fex_gen_param<wl_proxy_marshal_array_flags, 5, wl_argument*> : fexgen::ptr_passthrough {};
#endif

// Guest notifies host about its interface. Host returns its corresponding interface pointer
void fex_wl_exchange_interface_pointer(wl_interface*, const char* name);
template<>
struct fex_gen_config<fex_wl_exchange_interface_pointer> : fexgen::custom_host_impl {};
template<>
struct fex_gen_param<fex_wl_exchange_interface_pointer, 0, wl_interface*> : fexgen::ptr_passthrough {};

// This is equivalent to reading proxy->interface->methods[opcode].signature on 64-bit.
// On 32-bit, the data layout differs between host and guest however, so we let the host extract the data.
void fex_wl_get_method_signature(wl_proxy*, uint32_t opcode, char*);
template<>
struct fex_gen_config<fex_wl_get_method_signature> : fexgen::custom_host_impl {};
int fex_wl_get_interface_event_count(wl_proxy*);
template<>
struct fex_gen_config<fex_wl_get_interface_event_count> : fexgen::custom_host_impl {};
void fex_wl_get_interface_event_name(wl_proxy*, int, char*);
template<>
struct fex_gen_config<fex_wl_get_interface_event_name> : fexgen::custom_host_impl {};
void fex_wl_get_interface_event_signature(wl_proxy*, int, char*);
template<>
struct fex_gen_config<fex_wl_get_interface_event_signature> : fexgen::custom_host_impl {};


================================================
FILE: ThunkLibs/libxshmfence/Guest.cpp
================================================
/*
$info$
tags: thunklibs|xshmfence
$end_info$
*/

extern "C" {
#include <X11/xshmfence.h>
}

#include <stdio.h>
#include <cstring>
#include <map>
#include <string>

#include "common/Guest.h"
#include <stdarg.h>

#include "thunkgen_guest_libxshmfence.inl"

LOAD_LIB(libxshmfence)


================================================
FILE: ThunkLibs/libxshmfence/Host.cpp
================================================
/*
$info$
tags: thunklibs|xshmfence
$end_info$
*/

#include <stdio.h>

#include <X11/xshmfence.h>

#include "common/Host.h"
#include <dlfcn.h>

#include "thunkgen_host_libxshmfence.inl"

EXPORTS(libxshmfence)


================================================
FILE: ThunkLibs/libxshmfence/libxshmfence_interface.cpp
================================================
#include <common/GeneratorInterface.h>

extern "C" {
#include <X11/xshmfence.h>
}

template<auto>
struct fex_gen_config {
  unsigned version = 1;
};

template<typename>
struct fex_gen_type {};

template<>
struct fex_gen_type<xshmfence> : fexgen::opaque_type {};

template<>
struct fex_gen_config<xshmfence_trigger> {};
template<>
struct fex_gen_config<xshmfence_await> {};
template<>
struct fex_gen_config<xshmfence_query> {};
template<>
struct fex_gen_config<xshmfence_reset> {};
template<>
struct fex_gen_config<xshmfence_alloc_shm> {};
template<>
struct fex_gen_config<xshmfence_map_shm> {};
template<>
struct fex_gen_config<xshmfence_unmap_shm> {};


================================================
FILE: docs/CPUID.md
================================================
# FEXCore custom CPUID functions

## 4000_0000h - Hypervisor information function
* Follows VMWare and Microsoft's hypervisor information proposal
* https://lwn.net/Articles/301888/
* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/feature-discovery

* EAX - The maximum input value for the hypervisor CPUID information
  * 4000_0001h
* EBX - Hypervisor vendor ID signature
  * 'FEXI' - 4958_4546h
* ECX - Hypervisor vendor ID signature
  * 'FEXI' - 4958_4546h
* EDX - Hypervisor vendor ID signature
  * 'EMU\0' - 0055_4d45h

* memcpy ebx:ecx:edx in to a 12 byte string to get 'FEXIFEXIEMU\0' for determining running under FEX

## 4000_0001h - Hypervisor config function

### Sub-Leaf 0: ECX == 0
* EAX:
  * Bits EAX[3:0] - Host architecture
    * 0 - Unknown architecture
    * 1 - x86_64
    * 2 - AArch64
    * 3-15: **Reserved**
  * Bits EAX[15:4] - **Reserved**
  * Bits EAX[31:16] - Maximum subleaf input value for CPUID function 4000_0001h
* EBX - **Reserved** - Read as zero
* ECX - **Reserved** - Read as zero
* EDX - **Reserved** - Read as zero

### Sub-leaf 1: ECX == 1
* FEX version string signature. First 16-bytes
* memcpy eax:ebx:ecx:edx in to the first 16-bytes of a string.

### Sub-leaf 2: ECX == 2
* FEX version string signature. Second 16-bytes
* memcpy eax:ebx:ecx:edx in to the second 16-bytes of a string.

### Sub-Leaf 0000_0003 - FFFF_FFFF: **Reserved**

## 4000_0002h - 4000_000Fh
* **Reserved range**
* Returns zero until implemented

## 4000_0010h - 4FFF_FFFFh
* **Undefined**
* FEX-Emu will return zero until implemented


================================================
FILE: docs/DeferredSignals.md
================================================
# Deferred signals and why FEX needs them

FEX-Emu has locations in its code which are effectively "uninterruptible". In the sense that if the guest application receives a signal during an
"uninterruptible" code section, then FEX is likely to hang or crash in spurious and terrible ways.

## Example
When FEX is in the process of emitting code, it often needs to acquire mutexes to safeguard operations like memory allocations or reading guest state.
This puts FEX in a vulnerable state: If a signal is received in the middle of this, FEX may need to initiate compilation of new code. In this case a
mutex could already be held, so attempting to acquire it again would trigger a deadlock.

## How do we solve this?

### Classical signal masking
One solution to this problem is to mask **all** signals going in to an uninterruptible section and then unmask when leaving. This is the classical
approach that is viable if performance isn't a significant concern. A major problem is that it requires two system calls per "uninterruptible" code
section, which adds overhead that may exceed the runtime of the section itself.

### Cooperative signal deferring
A new solution is to defer asynchronous signals caught inside an uninterruptible section and handle them at the end of that section.

At the basic level, we increment a reference counter going in to the "uninterruptible" section, and then decrement the reference counter once we leave.
This way when the signal handler receives a signal, it can check that thread's reference counter, store the `siginfo_t` to an array/stack object, and
return to the same code segment to be handled later.

By making this check as cheap as possible, overhead is minimized for the general case that no signal occurs during "uninterruptible" sections. FEX
achieves this by maintaining two memory regions for tracking deferred signals **per thread**.

#### 1st memory region

This region is FEX's InternalThreadState object, which is always resident for each guest thread and usually inside a register inside the JIT.
Inside this object is where the reference counter for "uninterruptible" code segments lives. It is specifically a reference counter since these
code segments may nest inside each other and we can only interrupt with a signal if the counter is zero.

This reference counter is thread local and won't be read by any other threads, so it can be a non-atomic increment and decrement.
Meaning it is usually three instructions (on ARM64) to increment and decrement.

```cpp
NonAtomicRefCounter<uint64_t> DeferredSignalRefCount;
```

#### 2nd memory region

This memory region is a single page of memory that is allocated per thread. Its purpose is to trigger a SIGSEGV when FEX leaves an "uninterruptible"
section if a signal has been deferred. FEX's signal handler will check if the faulting address is in this special page and subsequently starts the
deferred signal mechanisms.

```cpp
NonAtomicRefCounter<uint64_t> *InterruptFaultPage;
```

#### Example ARM64 JIT code for uninterruptible region
```asm
  ; Increment the reference counter.
  ldr x0, [x28, #(offsetof(CPUState, DeferredSignalRefCount))]
  add x0, x0, #1
  str x0, [x28, #(offsetof(CPUState, DeferredSignalRefCount))]

  ; Do the uninterruptible code section here.
  <...>

  ; Now decrement the reference counter.
  ldr x0, [x28, #(offsetof(CPUState, DeferredSignalRefCount))]
  sub x0, x0, #1
  str x0, [x28, #(offsetof(CPUState, DeferredSignalRefCount))]

  ; Just store zero. (1 cycle plus no dependencies on a register. Super fast!)
  ; Will store fine with no deferred signal, or SIGSEGV if there was one!
  strb wxr, [x28, #(offsetof(CPUState, InterruptFaultPage))]
```

### Deferred signal handling
In the case that FEX has received a signal, FEX's signal handler will first check to see if that thread's reference counter is zero or not.

#### Reference counter is zero
This is the easy case, just handle the signal as normal.

#### Reference counter is not zero
The signal handler now knows that FEX is in an uninterruptible code section. We check the signal to see if it is a synchronous signal or not.
- If the signal is synchronous then we need to handle it as normal, because this is a hardware signal that we can't defer.
- If it is an async signal (from tgkill, sigqueue, or something else) then we will start the deferring process.

The deferring process starts with storing the kernel `siginfo_t` to a thread local array so we can restore it later.
We then modify the permissions on the thread local `InterruptFaultPage` to be `PROT_NONE`.
We then immediately return from the signal handler so that FEX can resume its "uninterruptible" code section without breaking anything.
Once the "uninterruptible" code section finishes, FEX will intentionally trigger a SIGSEGV by storing to the page.

Once FEX-Emu is in its SIGSEGV handler, it will determine that it is handling a deferred signal. This will pull the previously saved `siginfo_t` and
start processing the signal.

Once a guest signal handler has finished what it was working on, it will call `rt_sigreturn` or `sigreturn` which triggers FEX's SIGILL signal
handler.

Inside of this SIGILL signal handler FEX will restore the state of FEX /back/ to where the deferred signal handler started (The str xzr, [x0]).
Then, FEX will check if any further deferred signals need to be handled.
- Checks if the reference counter is zero or not
   - If further asynchronous signals have been triggered that need handling, mprotect the fault page to `PROT_NONE`
      - This trampolining is repeated once per asynchronous signal queued during processing.
      - This will cause further signal handling immediately once the JIT returns to its original location (where it'll cause a SIGSEGV again).

Once FEX gets back to the page store, it will trampoline back to the SIGSEGV handler if it has more signals to handle.

## Disadvantages of cooperative signal deferring
- How do we handle the guest doing a longjmp out of a signal frame and still receiving signals?
   - FEX relies on guest signal handlers returning via `sigreturn` to handle stacked deferred signals, so a longjmp would interfere with this
   -  Do we need to store guest stack as well to see if it has reset its own stack frame?
   - moon-buggy does this as an example
   - We currently just leak stack for every guest signal handler that long jumps out of the signal frame.
      - Long term this would exhaust our stack and then crash.
      - Test with a second guest thread where our host will only have an 8MB stack instead of the 128MB primary stack.
      - See issue #2487
- Deeply recursive signal deferring sections can have excessive SIGSEGV faults.
   - In the case of ARM64 it will do a SIGSEGV at the end of each deferred signal section if a signal is queued.
   - This can result in a bunch of trampolining.
   - Just make sure to not do excessive nesting of deferred signal sections.
   - Typically not a problem since deferred signals aren't common.

## Expectations and considerations
### What happens with a race condition with the refcounter?
There are two edges to this problem. The incrementing edge and the decrementing edge that must be considered.

#### Incrementing edge
This is the most problematic edge. This takes three instructions (one on x86) to increment the ref counter. If a signal is received between the load
and store then this theoretically could result in a tear on the refcounter. In actual practice this is a real tear but doesn't cause any problems.

The reasoning for this is that FEX isn't in the "uninterruptible" section until that reference counter has been stored, so FEX will handle the signals
immediately at that point, return to this code location, and then increment the counter. In particular, once returning to the code location the
refcounter will be the original value loaded. So even though it is a tear, it's one that doesn't cause issues since it is all thread local.

#### Decrementing edge
This edge is far less problematic to understand compared to the incrementing edge. Signals will get deferred entirely until the store instruction (If
storing zero), so FEX will always return to the code region and finish the decrement.

If FEX receives a signal after the decrement store has completed but /before/ the page faulting store has occurred, then FEX will start processing the
signal immediately. At which point the fault page will have either RW or NONE permission. FEX will then likely hit another "uninterruptible" code
section which will complete the store to the fault page.
 - RW permission if it hadn't received another signal in the uninterruptible section
 - NONE permission if it did receive a signal previously

RW permission has no problems, it will continue as normal.

NONE will get captured by the fault handler, the fault handler will determine that there was no deferred signals, and set the fault page back to RW
permissions and continue execution safely.

## Execution examples
### No signal
This is a simple example because nothing happens.

- **Enter Deferred region - 3 instructions**
- Compiling JIT Code
- **Exit deferred region - 5 instructions**

### Signal outside of region
This is simple because the JIT just handles it.

- In JIT code
- Signal received
- Guest Signal handler called
- JIT jumps to guest signal handler
- Hopefully guest calls rt_sigreturn instead of long jumping out.

### Synchronous signal in JIT
Deferred signals don't affect anything here because only asynchronous signals get affected.

- In JIT code
- JIT code causes a synchronous signal (SIGSEGV or other)
- Guest Signal Handler called
- JIT jumps to guest signal handler
- Hopefully guest calls rt_sigreturn instead of long jumping out.

### Asynchronous signal in code emitter
This is the first interesting example since deferred signals affects it.

- **Enter Deferred region - 3 instructions**
- Compiling JIT Code
- Asynchronous Signal received
  - Host signal handler determines the thread is in a deferred signal section.
  - Signal information is stored in a queue
  - mprotect signal page to NONE.
  - Signal handler returns without giving the signal to the guest
- Compiling JIT Code continues.
- **Exit deferred region - 5 instructions**
- Deferred region section causes SIGSEGV
  - Host signal handler determines deferred region is done, Still has signal in queue.
  - Pull signal information off of queue
- JIT jumps to guest signal handler
- Hopefully guest calls rt_sigreturn instead of long jumping out.
- Host PC is back at deferred signal section.
- Deferred region section causes SIGSEGV #2
  - Host signal handler determines deferred region is done, No signals in the queue.
  - mprotect signal page to RW.
  - Continue execution.
- **Exit deferred region continues**

### Recursive regions with signal in code emitter.
This one mostly matches the previous example except the behaviour of deferred signal regions leaving.

In this case, if the thread-local refcount is still >0 on `<Exit deferred region>` then there are two behaviours.
- On ARM64, it will receive a SIGSEGV but the signal handler will increment PC by one instruction and continue execution
   - Expectation is that signals are significantly less common than `<Exit deferred region>` so the cost of SIGSEGV+PC increment is faster.
- On x86-64, the region exit checks the refcount before doing the fault access.
   - This adds more instructions so is slower on average.

This has the expectation that recursive deferred regions both aren't very deep (usually only nested a couple times), and that signals are rare.
This way there aren't many SIGSEGV checks generated and the signal is finally only handled when reaching the top-most deferred region exit routine.

- **Enter Deferred region - 3 instructions**
- Compiling JIT Code
     - Enter Deferred region - 3 instructions
     - Memory allocation
     - Async signal received logic from above
     - Exit deferred region - 5 instructions
     - Exit deferred region causes SIGSEGV
     - TLS refcount is still 1
     - PC is incremented by one instruction, signal still unhandled.
- **Exit deferred region - 5 instructions**
- Exit deferred region causes SIGSEGV
- **Regular deferred region handling from above called**

### Multiple signals in signal-deferring region
This is slightly different from the previous iterations since multiple signals in the stack result in odd behaviour.

- **Enter Deferred region - 3 instructions**
- Compiling JIT Code
- Asynchronous Signal received
    - Signal queued logic
- Asynchronous Signal received
    - Signal queued logic
- **Exit deferred region - 5 instructions**
- Exit deferred region causes SIGSEGV
- **Regular deferred region handling from above called**
- Guest calls rt_sigreturn
  - rt_sigreturn handler checks for number of queued signals
  - mprotect signal page to NONE because signals is > 0
  - JIT is back to **Exit deferred region**
  - Exit deferred region causes SIGSEGV again.
  - Regular handler loop occurs


================================================
FILE: docs/ProgrammingConcerns.md
================================================
# Memory allocation routines
## What is the problem?
FEX-Emu needs to allocate memory differently than regular applications. This problem happens because FEX runs both 32-bit and 64-bit guest
applications in the same address space as FEX itself. When running 32-bit applications, FEX reserves up all memory above 4GB in order to correctly
emulate the 32-bit address space. We then use that reserved space for FEX allocations, so we don't interrupt application's own allocations.

### Why not just replace the system allocator?
We could control the placement of FEX's internal allocations by overriding the system
allocator. However, 32-bit thunks (and their corresponding native host libraries) still
need to allocate memory in the lower 4 GB of memory so that they produce
guest-accessible pointers. Since overriding the system allocator is a global operation,
selectively overriding it like this is not possible.

Since we found no way to resolve this conflict, we had to resort to the alternative of avoiding use of the system allocator for FEX's internal
allocations entirely (where possible).

## Sub-projects and applications that need to follow this.
- FEXCore
- FEXInterpreter

## Sub-projects that explicitly cannot follow this
- Thunks

## APIs which allocate memory that FEX needs to avoid
Most C++ APIs allow you to replace their allocators, but some don't and we need to avoid those APIs. If FEX uses them then the 32-bit application
running might run out of memory. This isn't an all encompassing list and we will add to it as our CI captures more problems.

### `get_nprocs_conf`
Use `FEX::CPUInfo::CalculateNumberOfCPUs` instead.

### `getcwd`
Don't use getcwd with a nullptr buffer. It will allocate memory behind our back and return a pointer that needs a free.

### `strerror`
This allocates and frees memory based on locale! Even with C local it'll attempt to free(0).
FEX-Emu should avoid using this function and instead just return the number.
If necessary FEX will provide its own routine for getting this string back.

### `std::make_unique`
Use `fextl::make_unique` instead.

### `std::unique_ptr`
Use `fextl::unique_ptr` instead.

### `std::filesystem`
This namespace is /highly/ likely to allocate memory behind our back.
FEX should avoid using this API as much as possible.

#### std::filesystem::path
#### std::filesystem::path::is_relative
Use `FHU::Filesystem::IsRelative` instead.

#### std::filesystem::absolute
Always allocates memory.
Use `realpath` instead.

#### std::filesystem::exists
Creates a std::filesystem::path when passing in to it.
Use `FHU::Filesystem::Exists` instead.

#### std::filesystem::canonical
Always allocates memory.
Use `realpath` instead.

#### std::filesystem::path::lexically_normal
Use `FHU::Filesystem::LexicallyNormal` instead.

#### std::filesystem::create_directory
#### std::filesystem::create_directories
Creates a std::filesystem::path when passing in to it.
Use `FHU::Filesystem::CreateDirectory` and `FHU::Filesystem::CreateDirectories` instead.

#### std::filesystem::path::parent_path
Use `FHU::Filesystem::ParentPath` instead.

#### std::filesystem::path::filename
Use `FHU::Filesystem::GetFilename` instead.

#### std::filesystem::copy_file
Use `FHU::Filesystem::CopyFile` instead.

#### std::filesystem::temp_directory_path
See `GetTempFolder()` in `FEXServerClient.cpp` (split/move to `FHU::Filesystem` if needed by other users).

### Any `FILE`-based API
`FILE` always allocates memory and must be avoided.
Use a combination of raw FDs and fextl::string APIs instead.

#### Includes but not limited to:
* `<fstream>` -> `std::fstream`
* `<cstdio>` -> `std::fwrite`

### `std::string`
Use `fextl::string` instead.

#### std::to_string
Use `fextl::fmt::format` instead.

### `std::stol`
### `std::stoul`
### `std::stoll`
### `std::stoull`
These all consume a `std::string` as their first argument. Use the equivalent functions that don't use `std::string`
- `std::strtol`
- `std::strtoul`
- `std::strtoll`
- `std::strtoull`

### `fmt::`
### `fmt::format`
Use `fextl::fmt::` instead

### `getpwuid` and `getpwuid_r`
Allocates memory for parsing passwd and other files. One would assume `getpwuid_r` would use the buffer passed in, but nope glibc nss_database_get
allocates memory.

### APIs that FEX doesn't have a replacement for
Don't use any of these APIs in FEXLoader/FEXInterpreter. Shoutout to
[this](https://stackoverflow.com/questions/43056338/standard-library-facilities-which-allocate-but-dont-use-an-allocator) StackOverflow post for this
huge list.

#### `std::any`
#### `std::function` and lambdas
One must take additional considerations when using these to ensure that they don't allocate memory. These don't have any way to replace which
allocator is used for these objects. Additionally there is no way up-front to know if these will allocate memory or if the compiler will use
small-function optimizations to avoid allocations. The only real way to check this is to enable the glibc faulting compile option.
- Lambdas without anything in the capture list will never allocate memory.
- One pointer in the capture list is likely to hit small-function optimizations and not allocate memory.
   - This isn't guaranteed.
- Passing the `std::function` as an argument is unlikely to optimize away their memory allocations.

#### `std::valarray`
#### `std::filebuf`
#### `std::inplace_merge`
#### `<stdexcept>`
#### `std::boyer_moore_searcher`
#### `std::filesystem::path`
#### `std::filesystem::directory_iterator`
#### `std::regex`
#### `std::thread`
#### `std::async`
#### `std::packaged_task`
#### `std::promise`
#### `<iostream>`
#### Remember this is not an all-encompassing list! We may find APIs that still allocate memory and need to be avoided!

### Regular memory allocation routines.
Don't use these directly as they will the glibc allocator.

#### mmap
Use `FEXCore::Allocator::mmap`

#### munmap
Use `FEXCore::Allocator::munmap`

#### malloc
Use `FEXCore::Allocator::malloc`

#### calloc
Use `FEXCore::Allocator::calloc`

#### memalign 
Use `FEXCore::Allocator::memalign`

#### valloc
Use `FEXCore::Allocator::valloc`

#### posix_memalign
Use `FEXCore::Allocator::posix_memalign`

#### realloc
Use `FEXCore::Allocator::realloc`

#### free
Use `FEXCore::Allocator::free`

#### aligned_alloc
Use `FEXCore::Allocator::aligned_alloc`

#### __libc_malloc
#### __libc_calloc
#### __libc_memalign
#### __libc_valloc
#### __posix_memalign
#### __malloc_usable_size
!! DO NOT USE !!

## How does FEX ensure that this paradigm doesn't break?
FEX has the cmake option `ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT` to hook in to glibc's allocator and fault if anything is allocating through it.
This can't be used with thunks for thunk testing as those actually use the glibc allocator.
CI will run FEX's test suite with extra verification to ensure FEX makes no allocations are made through glibc. Thunking must be disabled for this
run, since thunks are by design the only place where glibc allocation still happen.


================================================
FILE: docs/Readme_CN.md
================================================
[English](https://github.com/FEX-Emu/FEX/blob/main/Readme.md)
# FEX —— 快速的x86模拟器前端
FEX和qemu-user以及box86类似，允许你在AArch64的host端运行x86和x86-64二进制程序。
FEX原生支持rootfs（作为guest程序的运行环境），所以无需使用chroot。同时支持thunklibs将guest程序所用到的库转发到host，例如：libGL。
FEX为guest程序提供Linux 5.0的接口（系统调用），同时支持AArch64和x86-64做为host。
FEX处于重度开发阶段，所以会有很多改善。


## 快速指引
### Ubuntu 20.04, 21.04, 21.10, 22.04
在终端执行以下命令添加PPA去安装FEX。

`curl --silent https://raw.githubusercontent.com/FEX-Emu/FEX/main/Scripts/InstallFEX.py --output /tmp/InstallFEX.py && python3 /tmp/InstallFEX.py && rm /tmp/InstallFEX.py`

这条命令将会引导你通过PPA安装FEX，然后下载FEX所需的RootFS。

Ubuntu下的PPA 随FEX月度发布更新。

### 其他系统
参考[这里](https://wiki.fex-emu.com/index.php/QuickStartGuide)

## 开始
FEX在ARMv8.0，ARMv8.1+和x86-64(支持AVX或更新处理器)硬件上进行过编译和运行测试。
不支持ARMv7以及老旧的x86处理器。
同时需要确保操作系统为Linux。FEX在Ubuntu 20.04，20.10和21.04以及Arch Linux上测试过。
在AArch64 host端，用户需要准备x86-64 RootFS[创建RootFS](#RootFS-Generation)。

### 源码导览
详见[源码大纲](SourceOutline.md)。

### 编译依赖
* cmake (version 3.14 minimum)
* ninja-build
* clang (version 10 minimum for C++20)
* libglfw3-dev (For GUI)
* libsdl2-dev (For GUI)
* libepoxy-dev (For GUI)
* g++-x86-64-linux-gnu (For building thunks)
* nasm (only if building tests)

### 编译FEX
安装完依赖后，通过以下命令进行编译。
```Shell
git clone https://github.com/FEX-Emu/FEX.git
cd FEX
git submodule update --init
mkdir Build
cd Build
CC=clang CXX=clang++ cmake -DCMAKE_INSTALL_PREFIX=/usr -DCMAKE_BUILD_TYPE=Release -DENABLE_LTO=True -DBUILD_TESTING=False -G Ninja ..
ninja
```

### 安装
```Shell
sudo ninja install
```

### 关于AArch64 Hosts
在AArch64使用binfmt_misc（执行下述命令）可以支持32位和64位x86程序直接运行。如果已经安装了box86 binfmt_misc配置，在FEX达到可用状态前我并不建议安装FEX进行替代。请确保install命令在下述命令前执行，不然binfmt_misc将依旧使用旧版本的FEX，即使FEX已经更新。
```Shell
sudo ninja binfmt_misc_32
sudo ninja binfmt_misc_64
```

### 更多信息
更多关于FEX和平台相关的设置信息请参考以下维基页面：
https://wiki.fex-emu.com/index.php/Development:Setting_up_FEX

### 创建RootFS
AArch64 host端需要一个rootfs去运行guest程序。参考以下维基页面从头开始创建一个rootfs
https://wiki.fex-emu.com/index.php/Development:Setting_up_RootFS


================================================
FILE: docs/ReleaseProcess.md
================================================
# FEX tagged version (release) process
A FEX tagged version happens near the start of each month.

The tagged versioning is `FEX-<YYMM>` with the month being the current month.

If a tagged version was being done on `Sun, 02 Jan 2022` then the FEX version would be FEX-2201

There are multiple locations that need to be updated during a release
* Github tagged release
* Github releases page
* fex-emu.com blog post
* https://launchpad.net/~fex-emu/+archive/ubuntu/fex Ubuntu PPA
* @FEX_Emu twitter account

* Optional: Update the rootfs images

## Github Steps
* Check out the commit that will be the branch

  $ git checkout upstream/main

* Make local main branch be the selected commit

  $ git branch -D main
  $ git checkout -b main

* Run the release script

  $ Scripts/generate_release.sh

* Push the branches upstream
  * This requires administrative push rights
  * Both the tag and the main branch needs to be committed

  $ git push upstream $CURRENT
  $ git push upstream main

## Launchpad PPA steps
Follow the steps in: https://github.com/FEX-Emu/FEX-ppa/blob/main/README.md
* Requires PPA GPG key signing access
* Wait the 20-30 minutes for Ubuntu PPA to build and publish the binaries

## Github releases page Steps
* Requires administrative rights
* Go to https://github.com/FEX-Emu/FEX/releases
* Click Draft a new release
* Copy and paste the tagged changelog in to the draft release markdown
  * This was generated from the generate_release.sh script
* Clean the markdown to a desired level of combining and ordering
  * Fairly trivial cleanups, it's more just a developer focused changelog
* Click publish release

## fex-emu.com blog post steps
* clone https://github.com/FEX-Emu/fex-emu.com
* Copy the previous post from the _posts/ folder to a new markdown file
  * Ensure correct date format in filename
* Copy github release pages markdown in to this
* Easy to forget areas:
  * Title text section
  * See Release notes top section, links to github release tag
  * See detailed changelog at the bottom, linking to github raw revision comparison
* Short blurb in the top paragraph if desired
* push new md file to the repo. Either in direct push or PR
* Jekyll will automatically regenerate the website with a github action
* Verify that the post shows up on the site at fex-emu.com

## @FEX_Emu twitter account steps
* Requires @FEX_Emu twitter account access
* Create a tweet with some small blurb/sizzle text about some relevant changes in this tagged version
* Link to the fex-emu.com blog post about the change

## RootFS image updating
* This doesn't typically need to be done on a monthly basis
* This lives in https://github.com/FEX-Emu/RootFS

* Follow the Build_Data file's information for how to generate an image using `build_image.py`
  * This gives a squashfs image for the rootfs
* Use FEXRootFSFetcher <image.sqsh> to generate the xxhash for the image
* Update `https://rootfs.fex-emu.com/file/fex-rootfs/RootFS_links.json` with the new rootfs image and hash
  * This currently lives in a private FEX-Emu backblaze bucket with cloudflare servicing it.
  * Never publicly give the direct backblaze link to the file. Will cause BW costs to skyrocket
  * Always pass through cloudflare

* Upload new image to Backblaze using the b2 upload tool
  * b2 upload-file <bucketname> <image.sqsh> <Image folder name>/<image.sqsh>

* Upload the new RootFS_links.json
  * Lives in the root of the bucket
  * b2 upload-file <bucketname> RootFS_links.json RootFS_links.json

* Once uploaded it should propagate immediately
* Might be worth thinking about the coherency problem of updating the hash versus image independently if overwriting an image
  * Need to be careful about it to not break anyone in the process of downloading an image


================================================
FILE: docs/SourceOutline.md
================================================
# FEX-2603

## FEXCore
See [FEXCore/Readme.md](../FEXCore/Readme.md) for more details

### Glossary

- Splatter: a code generator backend that concatenates configurable macros instead of doing isel
- IR: Intermediate Representation, our high-level opcode representation, loosely modeling arm64
- SSA: Single Static Assignment, a form of representing IR in memory
- Basic Block: A block of instructions with no control flow, terminated by control flow
- Fragment: A Collection of basic blocks, possibly an entire guest function or a subset of it


### backend
IR to host code generation

#### arm64
- [ALUOps.cpp](../FEXCore/Source/Interface/Core/JIT/ALUOps.cpp)
- [Arm64Relocations.cpp](../FEXCore/Source/Interface/Core/JIT/Arm64Relocations.cpp): relocation logic of the arm64 splatter backend
- [AtomicOps.cpp](../FEXCore/Source/Interface/Core/JIT/AtomicOps.cpp)
- [BranchOps.cpp](../FEXCore/Source/Interface/Core/JIT/BranchOps.cpp)
- [ConversionOps.cpp](../FEXCore/Source/Interface/Core/JIT/ConversionOps.cpp)
- [EncryptionOps.cpp](../FEXCore/Source/Interface/Core/JIT/EncryptionOps.cpp)
- [JIT.cpp](../FEXCore/Source/Interface/Core/JIT/JIT.cpp): Main glue logic of the arm64 splatter backend
- [JITClass.h](../FEXCore/Source/Interface/Core/JIT/JITClass.h)
- [MemoryOps.cpp](../FEXCore/Source/Interface/Core/JIT/MemoryOps.cpp)
- [MiscOps.cpp](../FEXCore/Source/Interface/Core/JIT/MiscOps.cpp)
- [MoveOps.cpp](../FEXCore/Source/Interface/Core/JIT/MoveOps.cpp)
- [VectorOps.cpp](../FEXCore/Source/Interface/Core/JIT/VectorOps.cpp)

#### shared
- [CPUBackend.h](../FEXCore/Source/Interface/Core/CPUBackend.h)


### frontend

#### x86-meta-blocks
- [Frontend.cpp](../FEXCore/Source/Interface/Core/Frontend.cpp): Extracts instruction & block meta info, frontend multiblock logic

#### x86-tables
- [BaseTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/BaseTables.cpp)
- [DDDTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/DDDTables.cpp)
- [H0F38Tables.cpp](../FEXCore/Source/Interface/Core/X86Tables/H0F38Tables.cpp)
- [H0F3ATables.cpp](../FEXCore/Source/Interface/Core/X86Tables/H0F3ATables.cpp)
- [PrimaryGroupTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/PrimaryGroupTables.cpp)
- [SecondaryGroupTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/SecondaryGroupTables.cpp)
- [SecondaryModRMTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/SecondaryModRMTables.cpp)
- [SecondaryTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/SecondaryTables.cpp)
- [VEXTables.cpp](../FEXCore/Source/Interface/Core/X86Tables/VEXTables.cpp)
- [X86Tables.h](../FEXCore/Source/Interface/Core/X86Tables/X86Tables.h)
- [X87Tables.cpp](../FEXCore/Source/Interface/Core/X86Tables/X87Tables.cpp)

#### x86-to-ir
- [AVX_128.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp): Handles x86/64 AVX instructions to 128-bit IR
- [Crypto.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp): Handles x86/64 Crypto instructions to IR
- [Flags.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp): Handles x86/64 flag generation
- [Vector.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp): Handles x86/64 Vector instructions to IR
- [X87.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp): Handles x86/64 x87 to IR
- [X87F64.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp): Handles x86/64 x87 to IR
- [OpcodeDispatcher.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp): Handles x86/64 ops to IR, no-pf opt, local-flags opt


### glue
Logic that binds various parts together

#### block-database
- [LookupCache.cpp](../FEXCore/Source/Interface/Core/LookupCache.cpp): Stores information about blocks, and provides C++ implementations to lookup the blocks

#### driver
Emulation mainloop related glue logic
- [Core.cpp](../FEXCore/Source/Interface/Core/Core.cpp): Glues Frontend, OpDispatcher and IR Opts & Compilation, LookupCache, Dispatcher and provides the Execution loop entrypoint

#### log-manager
- [LogManager.cpp](../FEXCore/Source/Utils/LogManager.cpp)

#### thunks
- [Thunks.h](../FEXCore/include/FEXCore/Core/Thunks.h)


### ir

#### debug
- [IRDumperPass.cpp](../FEXCore/Source/Interface/IR/Passes/IRDumperPass.cpp): Prints IR

#### dumper
IR -> Text
- [IRDumper.cpp](../FEXCore/Source/Interface/IR/IRDumper.cpp)

#### emitter
C++ Functions to generate IR. See IR.json for spec.
- [IREmitter.cpp](../FEXCore/Source/Interface/IR/IREmitter.cpp)

#### opts
IR to IR Optimization
- [PassManager.cpp](../FEXCore/Source/Interface/IR/PassManager.cpp): Defines which passes are run, and runs them
- [PassManager.h](../FEXCore/Source/Interface/IR/PassManager.h)
- [IRValidation.cpp](../FEXCore/Source/Interface/IR/Passes/IRValidation.cpp): Sanity checking pass
- [RedundantFlagCalculationElimination.cpp](../FEXCore/Source/Interface/IR/Passes/RedundantFlagCalculationElimination.cpp)
- [RegisterAllocationPass.cpp](../FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.cpp)
- [RegisterAllocationPass.h](../FEXCore/Source/Interface/IR/Passes/RegisterAllocationPass.h)


### opcodes

#### cpuid
- [CPUID.cpp](../FEXCore/Source/Interface/Core/CPUID.cpp): Handles presented capability bits for guest cpu

#### dispatcher-implementations
- [AVX_128.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/AVX_128.cpp): Handles x86/64 AVX instructions to 128-bit IR
- [Crypto.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Crypto.cpp): Handles x86/64 Crypto instructions to IR
- [Flags.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Flags.cpp): Handles x86/64 flag generation
- [Vector.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/Vector.cpp): Handles x86/64 Vector instructions to IR
- [X87.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/X87.cpp): Handles x86/64 x87 to IR
- [X87F64.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher/X87F64.cpp): Handles x86/64 x87 to IR
- [OpcodeDispatcher.cpp](../FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp): Handles x86/64 ops to IR, no-pf opt, local-flags opt

## ThunkLibs
See [ThunkLibs/README.md](../ThunkLibs/README.md) for more details

### thunklibs
These are generated + glue logic 1:1 thunks unless noted otherwise

#### EGL
- [libEGL_Guest.cpp](../ThunkLibs/libEGL/libEGL_Guest.cpp): Depends on glXGetProcAddress thunk
- [libEGL_Host.cpp](../ThunkLibs/libEGL/libEGL_Host.cpp)

#### GL
- [libGL_Guest.cpp](../ThunkLibs/libGL/libGL_Guest.cpp): Handles glXGetProcAddress
- [libGL_Host.cpp](../ThunkLibs/libGL/libGL_Host.cpp): Uses glXGetProcAddress instead of dlsym

#### SDL2
- [libSDL2_Guest.cpp](../ThunkLibs/libSDL2/libSDL2_Guest.cpp): Handles sdlglproc, dload, stubs a few log fns
- [libSDL2_Host.cpp](../ThunkLibs/libSDL2/libSDL2_Host.cpp)

#### VDSO
- [libVDSO_Guest.cpp](../ThunkLibs/libVDSO/libVDSO_Guest.cpp): Linux VDSO thunking

#### Vulkan
- [Guest.cpp](../ThunkLibs/libvulkan/Guest.cpp)
- [Host.cpp](../ThunkLibs/libvulkan/Host.cpp)

#### asound
- [libasound_Guest.cpp](../ThunkLibs/libasound/libasound_Guest.cpp)
- [libasound_Host.cpp](../ThunkLibs/libasound/libasound_Host.cpp)

#### drm
- [Guest.cpp](../ThunkLibs/libdrm/Guest.cpp)
- [Host.cpp](../ThunkLibs/libdrm/Host.cpp)

#### fex_malloc
- [Guest.cpp](../ThunkLibs/libfex_malloc/Guest.cpp): Handles allocations between guest and host thunks
- [Host.cpp](../ThunkLibs/libfex_malloc/Host.cpp): Handles allocations between guest and host thunks

#### fex_malloc_loader
- [Guest.cpp](../ThunkLibs/libfex_malloc_loader/Guest.cpp): Delays malloc symbol replacement until it is safe to run constructors

#### fex_malloc_symbols
- [Host.cpp](../ThunkLibs/libfex_malloc_symbols/Host.cpp): Allows FEX to export allocation symbols

#### fex_thunk_test
- [Guest.cpp](../ThunkLibs/libfex_thunk_test/Guest.cpp)
- [Host.cpp](../ThunkLibs/libfex_thunk_test/Host.cpp)

#### wayland-client
- [Guest.cpp](../ThunkLibs/libwayland-client/Guest.cpp)
- [Host.cpp](../ThunkLibs/libwayland-client/Host.cpp)

#### xshmfence
- [Guest.cpp](../ThunkLibs/libxshmfence/Guest.cpp)
- [Host.cpp](../ThunkLibs/libxshmfence/Host.cpp)

## Source/Tests

## unittests
See [unittests/Readme.md](../unittests/Readme.md) for more details


================================================
FILE: docs/allocator_usage.md
================================================
# Dual allocator usage
FEX-Emu uses two different heap allocators at once, each for different purposes:
- rpmalloc: The primary heap allocator (to keep FEX's internal allocations out of the 32-bit address space used by guest applications)
- jemalloc_glibc: The second heap allocator (to add allocation introspection features used by thunks)

## rpmalloc - primary heap allocator
This allocator overrides `mmap` and `munmap` by forwarding them to FEXCore's internal VMA region allocator.

All of FEXCore's `fextl::` namespaced objects allocate memory with this method.

### FEXCore internal VMA region allocator
When running a 32-bit guest application, the VMA region allocator allocates from memory *above* the 4GB of virtual address space reserved for the
32-bit application.

This ensures that all of FEX's allocations stay out of the lower 32-bit 4GB VA space, since games would quickly run out of virtual address space
otherwise.

When running a 64-bit guest application, this VMA region allocator is disabled and passes through to mmap and munmap in the host kernel.

## jemalloc_glibc - secondary heap allocator
This heap allocator replaces the host glibc's allocator using weak symbol overriding. It adds introspection features used for thunking, but has no
functional differences otherwise: All memory is allocated in the 4GB of 32-bit address space. The FEXCore VMA region allocator is explicitly **not**
involved hence.

All native shared libraries use this allocator including the host-side of thunks.

Internally, all allocations that go through this heap allocator use the kernel mmap and munmap interface.

### Thunks
Thunks may allocate memory either through the guest-side (on the guest glibc heap) or the host-side (on the `jemalloc_glibc` heap). To properly free
this memory, FEX must be able to determine which heap allocator it belongs to.

`glibc` provides no public interface to do this, but FEX's `jemalloc` fork does. The `is_known_allocation` function is used by FEXCore to query
whether a given pointer originated from the `jemalloc_glibc` allocator. This enables FEXCore to determine the appropriate heap for freeing the
pointer.


================================================
FILE: unittests/32Bit_ASM/CMakeLists.txt
================================================
enable_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER_LOADED)
  error("Failed to find NASM compatible assembler!")
endif()

# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE ASM_SOURCES CONFIGURE_DEPENDS *.asm)

set(ASM_DEPENDS "")

execute_process(COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/ClassifyCPU.py"
  OUTPUT_STRIP_TRAILING_WHITESPACE
  OUTPUT_VARIABLE CPU_CLASS)

foreach(ASM_SRC ${ASM_SOURCES})
  file(RELATIVE_PATH REL_ASM ${CMAKE_SOURCE_DIR} ${ASM_SRC})
  file(RELATIVE_PATH REL_TEST_ASM ${CMAKE_CURRENT_SOURCE_DIR} ${ASM_SRC})
  get_filename_component(ASM_NAME ${ASM_SRC} NAME)
  get_filename_component(ASM_DIR "${REL_ASM}" DIRECTORY)
  set(OUTPUT_ASM_FOLDER "${CMAKE_BINARY_DIR}/${ASM_DIR}")

  # Generate build directory
  file(MAKE_DIRECTORY "${OUTPUT_ASM_FOLDER}")

  # Generate a temporary file
  set(ASM_TMP "${ASM_NAME}_TMP.asm")
  set(TMP_FILE "${OUTPUT_ASM_FOLDER}/${ASM_TMP}")

  add_custom_command(OUTPUT ${TMP_FILE}
    DEPENDS "${ASM_SRC}"
    COMMAND "cp" ARGS "${ASM_SRC}" "${TMP_FILE}"
    COMMAND "sed" ARGS "-i" "-e" "\'1s;^;BITS 32\\norg 10000h\\nmov eax, 0x17\\nmov ds, ax\\nmov es, ax\\n;\'" "-e" "\'\$\$a\\ret\\n\'" "${TMP_FILE}")

  set(OUTPUT_NAME "${OUTPUT_ASM_FOLDER}/${ASM_NAME}.bin")
  set(OUTPUT_CONFIG_NAME "${OUTPUT_ASM_FOLDER}/${ASM_NAME}.config.bin")

  add_custom_command(OUTPUT ${OUTPUT_NAME}
    DEPENDS "${TMP_FILE}"
    COMMAND "nasm" ARGS "${TMP_FILE}" "-o" "${OUTPUT_NAME}")

  add_custom_command(OUTPUT ${OUTPUT_CONFIG_NAME}
    DEPENDS "${ASM_SRC}"
    DEPENDS "${CMAKE_SOURCE_DIR}/Scripts/json_asm_config_parse.py"
    DEPENDS "${CMAKE_SOURCE_DIR}/Scripts/json_config_parse.py"
    COMMAND "python3" ARGS "${CMAKE_SOURCE_DIR}/Scripts/json_asm_config_parse.py" "${ASM_SRC}" "${OUTPUT_CONFIG_NAME}")

  list(APPEND ASM_DEPENDS "${OUTPUT_NAME};${OUTPUT_CONFIG_NAME}")

  set(TEST_ARGS)
  if (ARCHITECTURE_arm64 OR ENABLE_VIXL_SIMULATOR)
    list(APPEND TEST_ARGS
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=1 FEX_MULTIBLOCK=0 FEX_TSOENABLED=0"   "jit_1"     "jit"
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=500 FEX_MULTIBLOCK=0 FEX_TSOENABLED=0" "jit_500"   "jit"
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=500 FEX_MULTIBLOCK=1 FEX_TSOENABLED=0" "jit_500_m" "jit")
  endif()

  if (ENABLE_VIXL_SIMULATOR)
    set(CPU_CLASS Simulator)
  elseif (ARCHITECTURE_x86_64)
    list(APPEND TEST_ARGS "FEX_SILENTLOG=0 FEX_DUMPGPRS=1" "host" "host")
  endif()

  if (NOT MINGW)
    set(LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
  else()
    set(LAUNCH_PROGRAM "wine" "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner.exe")
  endif()

  list(LENGTH TEST_ARGS ARG_COUNT)
  math(EXPR ARG_COUNT "${ARG_COUNT}-1")
  foreach(Index RANGE 0 ${ARG_COUNT} 3)
    math(EXPR TEST_NAME_INDEX "${Index}+1")
    math(EXPR TEST_TYPE_INDEX "${Index}+2")

    list(GET TEST_ARGS ${Index} FEX_ARGS)
    list(GET TEST_ARGS ${TEST_NAME_INDEX} TEST_DESC)
    list(GET TEST_ARGS ${TEST_TYPE_INDEX} TEST_TYPE)

    set(TEST_NAME "${TEST_DESC}/Test_32Bit_${REL_TEST_ASM}")
    string(REPLACE " " ";" FEX_ARGS_LIST ${FEX_ARGS})
    add_test(NAME ${TEST_NAME}
      COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/testharness_runner.py"
      "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Known_Failures"
      "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Known_Failures_${TEST_TYPE}"
      "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Disabled_Tests"
      "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Disabled_Tests_${TEST_TYPE}"
      "${CMAKE_SOURCE_DIR}/unittests/32Bit_ASM/Disabled_Tests_${CPU_CLASS}"
      "Test_32Bit_${REL_TEST_ASM}"
      "${TEST_NAME}"
      ${LAUNCH_PROGRAM}
      "${OUTPUT_NAME}" "${OUTPUT_CONFIG_NAME}")
    # This will cause the ASM tests to fail if it can't find the TestHarness or ASMN files
    # Prety crap way to work around the fact that tests can't have a build dependency in a different directory
    # Just make sure to independently run `make all` then `make test`
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${OUTPUT_NAME}")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${OUTPUT_CONFIG_NAME}")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY SKIP_RETURN_CODE 125)
    set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT ${FEX_ARGS_LIST})
    if (MINGW)
      # Ensure the DOS region can be allocated.
      set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "WINEPRELOADRESERVE=10000-110000")
    endif()
  endforeach()

endforeach()

add_custom_target(32bit_asm_files ALL
  DEPENDS "${ASM_DEPENDS}")

add_custom_target(32bit_asm_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  DEPENDS 32bit_asm_files
  DEPENDS "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner"
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*32Bit\.*.asm$$")


================================================
FILE: unittests/32Bit_ASM/Disabled_Tests
================================================
# Relies on undefined behaviour
Test_32Bit_X87/D9_F9.asm

Test_32Bit_X87/D9_F2.asm

# Relies on rounding correctness
Test_32Bit_X87/D9_F8.asm


================================================
FILE: unittests/32Bit_ASM/Disabled_Tests_Simulator
================================================
# Simulator can't handle `mrs x0, nzcv`
Test_32Bit_SecondaryModRM/Reg_7_1.asm


================================================
FILE: unittests/32Bit_ASM/Disabled_Tests_host
================================================
# 32-bit segment pushing and popping causes the runner to break
# We aren't 100% matching behaviour
Test_32Bit_Primary/Pop_Segments.asm

# Hecks with GS, FS, and ES
# Causing the signal handler delegate to break
Test_32Bit_Primary/Primary_8C.asm
Test_32Bit_Primary/Primary_8C_2.asm

# Hecks with CS
# Causing our host runner a bit of pain
Test_32Bit_Primary/Primary_CF.asm

# Zen+ CI doesn't support UMIP so it returns "real" values
Test_32Bit_Secondary/07_XX_00.asm


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/GOT_calculation.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x10013"
  },
  "Mode": "32BIT"
}
%endif

; Preamble (32Bit_ASM/CMakeLists.txt) sets ES and changes expectation.
; Originally 0x10011, now 0x10013.

mov esp, 0xe0000010

; This is a common pattern in 32-bit PIE code.
; 32-bit GOT calculation needs to do a call+pop to do get the EIP.
; LEA doesn't work because it there is no EIP relative ops like on x86-64.

call target
target:
pop eax

hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/IMUL_garbagedata.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000007dbf2800",
    "RDX": "0x0000000000000000",
    "RBX": "0x000000000000004f",
    "RCX": "0x000000000000004f",
    "RBP": "0x0000000000009e4f",
    "RSI": "0x0000000000009e4f",
    "RSP": "0x000000000000004f"
  },
  "Mode": "32BIT"
}
%endif

; FEX had a bug where smaller than 64-bit imul could leave garbage data in the upper 32-bits of the 32-bit result.
; This would cause subsequent instructions after the imul to receive garbage bits.
; In particular this would feed in to address calculation in DXVK with "Dungeon Defenders" doing address calculation.
; The address calculation did something similar to:
;   xor edx, edx
;   mov eax, 0x7dbf2800
;   imul ebx, ebx, 0xaaaaaaab
;   div ebx
; Divide expected 0x4f but received 0xffffffb1'0000'004f

; Dividend
xor edx, edx
mov eax, 0x7dbf2800

; Multiply starting value
mov ebx, 0xED

jmp .test

.test:

; imul 1-src
mov edi, 0xaaaaaaab
imul di, bx
mov esp, 0xaaaaaaab
imul esp, ebx

; imul 2-src 8-bit check
imul bp, bx, 0xab
imul esi, ebx, 0xab

; imul 2-src 16-bit check
imul cx, bx, 0xaaab
imul ebx, ebx, 0xaaaaaaab

hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/InlineSyscall.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  },
  "Mode": "32BIT"
}
%endif

; FEX 32-bit inline syscalls hit an assert in uxtw
; Just use an inline syscall and throw it zero data to catch the assert
mov eax, 355 ; getrandom, is an inline syscall
mov ebx, 0
mov ecx, 0
mov edx, 0
int 0x80

hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/InvertedCarrySet.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; FEX had a bug where inverting CF to match the ABI when flushing the register cache didn't mark CF as possibly being set.
; This caused accesses relying on that flag to be set correctly to return wrong values.

mov esp, 0xe0000020
mov al, 3
mov cl, 2
mov ecx, 1
mov eax, 1

and al, cl ; Zeros CF, non-inverted
push ecx ; Triggers a register cache flush
inc eax ; Tries to preserve CF, but would encounter the bug and set it instead
jnb succ
mov eax, 0
hlt
succ:
mov eax, 1
hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/LoopAddressSizeCheck.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000001",
    "RBX": "0x0000000000010001"
  },
  "Mode": "32BIT"
}
%endif

; FEX-Emu had a bug where a16 loop instructions weren't treating the input RCX register as 16-bit.
; Effectively always treating it as 32-bit.
; Little test that operates at 16-bit and 32-bit sizes to ensure it is correctly handled.
mov eax, 0
mov ebx, 0
mov ecx, 0x0001_0001

.test:
inc eax
a16 loop .test

mov ecx, 0x0001_0001
.test2:
inc ebx
a32 loop .test2

hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/SignExtendBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344",
    "RBX": "0x41424344",
    "RCX": "0x51525354"
  },
  "MemoryRegions": {
    "0x00fd0000": "4096",
    "0xf0000000": "4096"
  },
  "MemoryData": {
    "0xf0000000": "0x41424344",
    "0x00fd0000": "0x51525354"
  },
  "Mode": "32BIT"
}
%endif

; Ensures that zero extension of addresses are adhered to.
lea eax, [0xf000_0000]
mov eax, [ds:eax]

; Ensures that zext occurs correctly with two registers that have the sign bit set.
mov ebx, 0xffff_ffff
mov ecx, 0xf000_0001

; Break the block so it can't optimize through.
jmp .test
.test:
mov ebx, [ebx+ecx]

; Ensures that zext occurs correctly with SIB indexing with second argument not having sign bit set but "index" having sign bit.
; Originally saw in Metal Gear Rising Revengeance with a `jmp dword [ecx*4+0xfdbf10]` instruction.
; With ecx = 0xfffffff4 = -12. This is them loading a switch table's branches just before the switch base.
mov ecx, -12

; Break the block so it can't optimize through.
jmp .test2
.test2:

mov ecx, [ecx*4+0x00fd_0030]
hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/SubAddrBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xdeadbeef"
  },
  "MemoryRegions": {
    "0x10000000": "4096"
  },
  "MemoryData": {
    "0x10000000": "0xdeadbeef"
  },
  "Mode": "32BIT"
}
%endif

section .text

lea eax, [0x10000040]
mov eax, [eax-0x40]
hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/TelemetryFlags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000011000",
    "RCX": "0x0000000051529654",
    "RDX": "0x0000000061626303"
  },
  "Mode": "32BIT"
}
%endif

; FEX-Emu had a bug with its `TelemetrySetValue` IR operation where it would corrupt host flags at an inopportune time.
; The IR operation does `cmp+cset`, but even with `ImplicitFlagClobber` set, this happened at a invalid time for flag handling.
; To test this:
;  - btr -> Sets CF
;  - adc with `ss:` -> Adds to register with carry, but `ss:` causes `TelemetrySetValue`.
;  - Host flags are corrupted after the `TelemetrySetValue`, before the `adc` was able to operate.

mov ecx, 0x51525354
mov edx, 0x61626303

lea eax, [.data]
lea esp, [.data_flags]
popf

and word [eax], dx
btr cx, dx
adc cx, ss:[eax]

hlt

align 4096

.data:
dd 0x41424344

.data_flags:
dd 0xfeff


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/VEXW_Bug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000004",
    "RBX": "0x0000000000000004",
    "RCX": "0x0000000000000006",
    "RDX": "0x0000000000000006",
    "XMM0": ["0x402f800000000000", "0x400c000000000000"],
    "XMM1": ["0x402f800000000000", "0x4035400000000000"],
    "XMM2": ["0x0000000045464748", "0"],
    "XMM3": ["0x4142434445464748", "0"],
    "XMM4": ["0x0000000041424344", "0"],
    "XMM5": ["0x0000000045464748", "0x5152535455565758"]
  },
  "Mode": "32BIT"
}
%endif

; FEX-Emu had a bug where 32-bit applications that relied on VEX.W would incorrectly handle widening behaviour.
; AVX instructions that use VEX.W wouldn't scale their element sizes correctly.
; Checks all instructions (skipping a few duplicates in the same class) that react to VEX.W.

vmovaps xmm0, [rel .data_xmm0]
vmovaps xmm1, [rel .data_xmm1]
vmovaps xmm2, [rel .data_xmm2]

; Affects all scalar FMA.
vfmadd132sd xmm0, xmm2, xmm1

; Affects all packed FMA.
vmovaps xmm1, [rel .data_xmm0]
vmovaps xmm2, [rel .data_xmm1]
vmovaps xmm3, [rel .data_xmm2]
vfmadd132pd xmm1, xmm3, xmm2

vmovaps xmm2, [rel .data_xmm0]
; Affects vcvttsd2si as well.
vcvtsd2si eax, xmm2
; This actually works on 32-bit, behaves like a 32-bit operation. Don't question it.
db 0xc4, 0xe1, 0xfb, 0x2d, 0xda; vcvtsd2si rbx, xmm2

vmovaps xmm2, [rel .data_6]
; Affects vcvttss2si as well.
vcvtss2si ecx, xmm2
; This actually works on 32-bit, behaves like a 32-bit operation. Don't question it.
db 0xc4, 0xe1, 0xfa, 0x2d, 0xd2  ; vcvtss2si rdx, xmm2

vmovaps xmm2, [rel .data_test]
vmovd dword [rel .data_temp], xmm2
vmovaps xmm2, [rel .data_temp]

vmovaps xmm3, [rel .data_test]
vmovq qword [rel .data_temp], xmm3
vmovaps xmm3, [rel .data_temp]

vpxor xmm7, xmm7, xmm7
vmovaps [rel .data_temp], xmm7

; vpextrq qword explicitly SIGILLs on 32-bit
vmovaps xmm4, [rel .data_test]
vpextrd dword [rel .data_temp], xmm4, 1
vmovaps xmm4, [rel .data_temp]

vmovaps [rel .data_temp], xmm7

; vpinsrq qword explicitly SIGILLs on 32-bit
vmovaps xmm5, [rel .data_test]
vpinsrd xmm5, dword [rel .data_temp + 8], 1

hlt

align 4096
.data_xmm0:
dq 0x400c000000000000, 0x400c000000000000
.data_xmm1:
dq 0x400c000000000000, 0x4012000000000000
.data_xmm2:
dq 0x400c000000000000, 0x4016000000000000

.data_test:
dq 0x4142434445464748, 0x5152535455565758

.data_6:
dd 6.0, 6.0, 6.0, 6.0

.data_temp:
dq 0, 0


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/adc.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000fffffffe",
    "RBX": "0x0000000000000001"
  },
  "Mode": "32BIT"
}
%endif

; FEX had a bug where ADD or SUB with carry was generating results with garbage in the upper 32-bits.

mov eax, -1
mov ebx, -1
mov edx, -1

clc
adc eax, edx
adc eax, edx
adc eax, edx

clc
sbb ebx, edx
sbb ebx, edx
sbb ebx, edx
hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/rep_lods_bug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x52",
    "RBX": "0x202",
    "RCX": "0"
  },
  "Mode": "32BIT"
}
%endif

; FEX had a bug that only manifests in 32-bit mode around pushing and popping flags around rep lobs{b,w,d,q}
; This manifested as a corrupt CF and ZF flag even though rep lodsb isn't supposed to affect flags.
; Test this by first storing zero to eflags, doing the operation and then loading it back.
mov esi, 0xe000_0000
mov esp, 0xe000_0800

mov eax, 0x41424344
mov [esi], eax

mov eax, 0x51525354
mov [esi + 4], eax

mov eax, 0
mov ecx, 7

; Push zero and then load back in to eflags.
push dword 0
popfd

; Do a rep lodsb, whichever size, doesn't matter.
rep lodsb

; Push flags and then load back in to ebx
pushfd
pop dword ebx

hlt


================================================
FILE: unittests/32Bit_ASM/FEX_bugs/x87_unordered_cmp_fix_32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xcafecafe"
  },
  "Mode": "32BIT"
}
%endif

; IsNan() couldn't detect negative NaNs (sign bit set in exponent field).
; This caused __builtin_isunordered() to return wrong values.

mov esp, 0xe000_1000

; Test 1: __builtin_isunordered(1.0, 2.0) should return 0
; Pattern: fucomip + setp + test for 0
fld1
lea edx, [two]
fld tword [edx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jnz test_failed         ; If not 0, test failed (should be ordered)

; Test 2: __builtin_isunordered(1.0, NaN) should return 1  
fld1
lea edx, [qnan]
fld tword [edx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jz test_failed          ; If 0, test failed (should be unordered)

; Test 3: __builtin_isunordered(NaN, 1.0) should return 1
lea edx, [qnan]
fld tword [edx]
fld1
fucomip st1
setp al
movzx eax, al
test eax, eax
jz test_failed          ; If 0, test failed (should be unordered)

; Test 4: __builtin_isunordered(2.0, 2.0) should return 0 (equal case)
lea edx, [two]
fld tword [edx]
lea edx, [two]
fld tword [edx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jnz test_failed         ; If not 0, test failed (should be ordered)

; All tests passed
mov eax, 0xcafecafe
hlt

test_failed:
; Test failed 
mov eax, 0xdeadbeef
hlt

align 8
two:
  dt 2.0

align 8  
qnan:
  dq 0xC000000000000000  ; Quiet NaN with only quiet bit set (no bottom 62 bits) - this breaks IsNan
  dw 0x7FFF              ; Standard NaN exponent (0x7FFF)

================================================
FILE: unittests/32Bit_ASM/Known_Failures
================================================
Test_32Bit_X87/D9_F8.asm


================================================
FILE: unittests/32Bit_ASM/Primary/Loops.asm
================================================
%ifdef CONFIG
{
  "Mode": "32BIT"
}
%endif

mov ecx, 0x10
.loop:
dec ecx
test ecx, ecx
jnz .loop
.end:

mov ecx, 0x10
.loop2:
dec ecx
test ecx, ecx
jz .end2
jmp .loop2

.end2:
hlt

================================================
FILE: unittests/32Bit_ASM/Primary/Pop_Segments.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RSP": "0xE0000040"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000040
mov eax, 0

push eax
push eax
push eax
push eax
push eax

push ax
push ax
push ax
push ax
push ax

; Only pops the segments
; Doesn't check for a correct segment value
; Just ensures we are popping the correct amount of data
pop ss
pop ds
pop es
pop fs
pop gs

o16 pop ss
o16 pop ds
o16 pop es
o16 pop fs
o16 pop gs

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424315",
    "RBX": "0x51525425",
    "RCX": "0x61626435"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000000

mov eax, 0x41424344
mov [esp + 4 * 0], eax
mov eax , 0x51525354
mov [esp + 4 * 1], eax
mov eax, 0x61626364
mov [esp + 4 * 2], eax

mov eax, 0xD1
add byte  [esp + 4 * 0], al
add word  [esp + 4 * 1], ax
add dword [esp + 4 * 2], eax

mov eax, [esp + 4 * 0]
mov ebx, [esp + 4 * 1]
mov ecx, [esp + 4 * 2]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xB5",
    "RBX": "0x53D5",
    "RCX": "0x616263F5"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000000

mov eax, 0x41424344
mov [esp + 4 * 0], eax
mov eax , 0x51525354
mov [esp + 4 * 1], eax
mov eax, 0x61626364
mov [esp + 4 * 2], eax

mov eax, 0x71
mov ebx, 0x81
mov ecx, 0x91

add al,  byte  [esp + 4 * 0]
add bx,  word  [esp + 4 * 1]
add ecx, dword [esp + 4 * 2]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_00_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x81",
    "RBX": "0x8081",
    "RCX": "0x80808081"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x01
add al, 0x80

mov ebx, 0x01
add bx, 0x8080

mov ecx, 0x01
add ecx, 0x80808080

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_27.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x12345637"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234561f
daa
daa
daa
daa
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_2F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x12345607"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234561f
das
das
das
das
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_37.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x12345a07"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234561f
aaa
aaa
aaa
aaa
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_3F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x12345107"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234561f
aas
aas
aas
aas
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_60.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x6",
    "RCX": "0x5",
    "RDX": "0x4",
    "RSP": "0xE0000020",
    "RBX": "0x3",
    "RBP": "0x2",
    "RSI": "0x1",
    "RDI": "0x0"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000020

mov eax, 0
mov ecx, 1
mov edx, 2
mov ebx, 3
mov ebp, 4
mov esi, 5
mov edi, 6

pushad

; Invert the order

mov eax, [esp + 4 * 0]
mov ecx, [esp + 4 * 1]
mov edx, [esp + 4 * 2]
; sp here
mov ebx, [esp + 4 * 4]
mov ebp, [esp + 4 * 5]
mov esi, [esp + 4 * 6]
mov edi, [esp + 4 * 7]

; Load sp last
mov esp, [esp + 4 * 3]


hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_60_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x6",
    "RCX": "0x5",
    "RDX": "0x4",
    "RSP": "0xE0000020",
    "RBX": "0x3",
    "RBP": "0x2",
    "RSI": "0x1",
    "RDI": "0x0"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000020

mov eax, 0
mov ecx, 1
mov edx, 2
mov ebx, 3
mov ebp, 4
mov esi, 5
mov edi, 6

; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o32`
db 0x66
pusha

; Invert the order
mov ax, [esp + 2 * 0]
mov cx, [esp + 2 * 1]
mov dx, [esp + 2 * 2]
; sp here
mov bx, [esp + 2 * 4]
mov bp, [esp + 2 * 5]
mov si, [esp + 2 * 6]
mov di, [esp + 2 * 7]

; Load sp last
mov sp, [esp + 2 * 3]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_61.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x6",
    "RCX": "0x5",
    "RDX": "0x4",
    "RSP": "0xE0000020",
    "RBX": "0x3",
    "RBP": "0x2",
    "RSI": "0x1",
    "RDI": "0x0"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000020

push dword 0x6
push dword 0x5
push dword 0x4
push dword 0x3
push dword 0x41424344 ; Skipped
push dword 0x2
push dword 0x1
push dword 0x0

popad

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_61_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFF0006",
    "RCX": "0x5",
    "RDX": "0x4",
    "RSP": "0xE0000020",
    "RBX": "0x3",
    "RBP": "0x2",
    "RSI": "0x1",
    "RDI": "0x0"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000020

mov eax, 0xFF0000
mov ecx, 0xFF
mov edx, 0xFF
mov ebx, 0xFF
mov ebp, 0xFF
mov esi, 0xFF
mov edi, 0xFF

push word 0x6
push word 0x5
push word 0x4
push word 0x3
push word 0x4142 ; Skipped
push word 0x2
push word 0x1
push word 0x0

; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o32`
db 0x66
popa

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_8C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF0033",
    "RBX": "0xFFFF0033",
    "RCX": "0xFFFF0033"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x33
mov gs, ax
mov fs, ax
mov es, ax

mov eax, 0xFFFFFFFF
mov ebp, 0xe0000000

; Store 32bits of data
mov dword [ebp + 0], eax
mov dword [ebp + 4], eax
mov dword [ebp + 8], eax

; Ensure that the segment store only writes 16-bits
mov word [ebp + 0], gs
mov word [ebp + 4], fs
mov word [ebp + 8], es

mov eax, 0
mov ebx, 0
mov ecx, 0
mov eax, dword [ebp + 0]
mov ebx, dword [ebp + 4]
mov ecx, dword [ebp + 8]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_8C_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF0033",
    "RBX": "0x33",
    "RCX": "0x33"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x33
mov gs, ax
mov fs, ax
mov es, ax

mov eax, 0xFFFFFFFF
mov ebx, 0xFFFFFFFF
mov ecx, 0xFFFFFFFF

; 16-bit insert
mov ax, gs
; 32-bit zext
mov ebx, fs
mov ecx, es

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_8D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000",
    "RBX": "0x4000",
    "RCX": "0x8000",
    "RDX": "0x9000",
    "RSI": "0x7FC0",
    "RSP": "0xFFFF7FC0",
    "RBP": "0x1"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0
mov ebx, 0
mov esp, -1

; Specific encoded `lea ax, [0x4000]`
; Operand size override and address size override
; Nasm doesn't seem to emit this at all
db 0x67, 0x66, 0x8d, 0x06, 0x00, 0x40

lea bx, [0xC000]
lea si, [0x4001]

mov ebp, 0
; Try to LEA past the 16bits
lea ebp, [bx + si]

lea bx, [0x4000]
lea si, [0x4000]

; Address size override and Operand size overrides
lea cx, [bx + si]
lea dx, [bx + si + 0x1000]
lea sp, [bx + si - 64]

; Address size override without operand size override
lea esi, [bx + si - 64]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0xFFFF0042",
    "RDX": "0x42"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000
mov eax, 0x42
mov [edx], eax

mov eax, -1
; mov eax, [0xe0000000]
db 0xA1
dd 0xe0000000
mov edx, eax

mov eax, -1
; mov ax, [0xe0000000]
db 0x66
db 0xA1
dd 0xe0000000
mov ecx, eax

; We can't actually test this one since we can't allocate memory in the lower 16bits
;mov eax, -1
;; mov ax, [0xe00]
;db 0x67
;db 0x66
;db 0xA1
;dw 0xe00
;mov ebx, eax

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0x43",
    "RDX": "0x42"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x42
; mov [0xe0000000], eax
db 0xA3
dd 0xe0000000

mov edx, 0xe0000000
mov edx, [edx]

mov eax, 0xFFFF0043
; mov [0xe0000000], ax
db 0x66
db 0xA3
dd 0xe0000000

mov ecx, 0xe0000000
mov ecx, [ecx]

; We can't actually test this one since we can't allocate memory in the lower 16bits
;mov eax, 0xFFFF0044
;; mov [0xe000], ax
;db 0x57
;db 0x66
;db 0xA3
;dw 0xe000
;
;mov ebx, 0xe000
;mov ebx, [ebx]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8300",
    "RDI": "0xE0000009",
    "RSI": "0xE0000001"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x47
mov [edx + 8 * 0], eax
mov eax, 0x61
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

cld
cmpsb
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0

mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0200",
    "RCX": "0x5",
    "RDX": "0x0",
    "RDI": "0xE0000005",
    "RSI": "0xE0000015"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 14

lea edi, [edx + 8 * 0]
lea esi, [edx + 8 * 2]

cld
mov ecx, 10
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestUnmatched\0"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x3",
    "RDX": "0x1",
    "RDI": "0xE0000007",
    "RSI": "0xE0000017"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 11

lea edi, [edx + 8 * 0]
lea esi, [edx + 8 * 2]

cld
mov ecx, 10
repne cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "StringTest\0"
.StringTwo: db "UnmatcTest\0"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REPNE_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xE000000A",
    "RSI": "0xE000001A"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  repne movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 11

lea edi, [edx + 8 * 0]
lea esi, [edx + 8 * 2]

cld
mov ecx, 10
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestString\0"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REP_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xE000000A",
    "RSI": "0xE000001A"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 11

lea edi, [edx + 8 * 0]
lea esi, [edx + 8 * 2]

cld
mov ecx, 10
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestString\0"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REP_Smaller.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0600",
    "RCX": "0x5",
    "RDX": "0x0",
    "RDI": "0xE0000005",
    "RSI": "0xE0000015"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 14

lea edi, [edx + 8 * 0]
lea esi, [edx + 8 * 2]

cld
mov ecx, 10
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "Test\0"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8300",
    "RCX": "0x9",
    "RDX": "0x0",
    "RDI": "0xE000000C",
    "RSI": "0xE000001C"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 14

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 14

lea edi, [edx + 8 * 0 + 13]
lea esi, [edx + 8 * 2 + 13]

std
mov ecx, 10
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "\0\0\0\0TestString"
.StringTwo: db "\0TestUnmatched"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_REP_down_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xDFFFFFFF",
    "RSI": "0xE000000F"
  },
  "Mode": "32BIT"
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov edi, %1
  mov esi, %2
  mov ecx, %3

  mov eax, 0x17
  mov es, eax
  mov ds, eax

  cld
  rep movsb
%endmacro

mov edx, 0xe0000000

lea ebx, [edx + 8 * 0]
lea ebp, .StringOne
copy ebx, ebp, 11

lea ebx, [edx + 8 * 2]
lea ebp, .StringTwo
copy ebx, ebp, 11

lea edi, [edx + 8 * 0 + 10]
lea esi, [edx + 8 * 2 + 10]

std
mov ecx, 11
repe cmpsb
mov eax, 0
lahf

mov edx, 0
sete dl

hlt

.StringOne: db "\0TestString"
.StringTwo: db "\0TestString"


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A6_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000007",
    "RSI": "0xDFFFFFFF"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x61
mov [edx + 8 * 0], eax
mov eax, 0x47
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

std
cmpsb
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0

mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A7_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1600",
    "RDI": "0xE000000C",
    "RSI": "0xE0000004"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x61626364
mov [edx + 8 * 0], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

cld
cmpsd
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A7_dword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1600",
    "RDI": "0xE0000004",
    "RSI": "0xDFFFFFFC"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x61626364
mov [edx + 8 * 0], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

std
cmpsd
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A7_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE000000A",
    "RSI": "0xE0000002"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x6162
mov [edx + 8 * 0], eax
mov eax, 0x4546
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

cld
cmpsw
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0

mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_A7_word_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000006",
    "RSI": "0xDFFFFFFE"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x6162
mov [edx + 8 * 0], eax
mov eax, 0x4546
mov [edx + 8 * 1], eax

lea edi, [edx + 8 * 1]
lea esi, [edx + 8 * 0]

std
cmpsw
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0

mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000001"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45464748
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 0]

cld
mov eax, 0x61
scasb
; cmp = 0x61 - 0x48 = 0x19
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00010000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00010010
; OF: LAHF doesn't load - 0

mov eax, 0
lahf

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AE_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "5",
    "RDI": "0xE0000003"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45466161
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 0]

cld
mov eax, 0x61
mov ecx, 8
cmp eax, 0x61

rep scasb

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AE_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "1",
    "RDI": "0xE0000007"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45464748
mov [edx + 8 * 0], eax
mov eax, 0x41614344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 0]

cld
mov eax, 0x61
mov ecx, 8
cmp eax, 0

repne scasb

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AE_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "5",
    "RDI": "0xE000000D"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45464748
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51615354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 2]

std
mov eax, 0x61
mov ecx, 8
cmp eax, 0

repne scasb

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AE_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "7",
    "RDI": "0xE000000F"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45466161
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 2]

std
mov eax, 0x61
mov ecx, 8
cmp eax, 0x61

rep scasb

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AF_REP_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "6",
    "RDI": "0xE0000008"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x61626364
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 0]

cld
mov eax, 0x61626364
mov ecx, 8
cmp eax, 0x61626364

rep scasd

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_AF_REP_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "6",
    "RDI": "0xE0000004"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x45466162
mov [edx + 8 * 0], eax
mov eax, 0x41424344
mov [edx + 8 * 0 + 4], eax
mov eax, 0x55565758
mov [edx + 8 * 1], eax
mov eax, 0x51525354
mov [edx + 8 * 1 + 4], eax
mov eax, 0x0
mov [edx + 8 * 2], eax

lea edi, [edx + 8 * 0]

cld
mov eax, 0x6162
mov ecx, 8
cmp eax, 0x6162

rep scasw

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_C9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBP": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000020
mov ebp, 0x41424344

; Act like an ENTER frame without using ENTER
push ebp
mov ebp, esp
call .target
jmp .end

.target:
mov eax, 1
leave

.end:
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_CE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  },
  "Mode": "32BIT"
}
%endif

; Clear OF just incase
test eax, eax

; Just ensure it executes safely
into

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_CF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RSP": "0xe0000010"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000010

lea ebx, [rel .end]

mov eax, 0x202
push eax ; RFLAGS
mov eax, 0x33
push eax ; CS
push ebx ; RIP

mov eax, -1
iretd

; Super fail
mov eax, 2
hlt

.end_fail:
mov eax, 0
hlt

.end:
mov eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_D4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234
aam
aam 0xc
aam 0x1f
aam 0xff
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_D5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xe8"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x1234
aad
aad 0x3
aad 0x1f
aad 0xae
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_D6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000041D7FF00"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x41424344
mov [edx + 8 * 0], eax
mov eax, 0x51525354

; Set resulting al to zero
clc
salc
mov [edx + 8 * 0 + 0], al

; Set resulting al to 0xFF
stc
salc
lahf
mov [edx + 8 * 0 + 1], al

; Ensure that salc doesn't set flags
mov eax, -1
sahf
salc
lahf
mov [edx + 8 * 0 + 2], ah

mov eax, [edx + 8 * 0]

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0x10000"
  },
  "Mode": "32BIT"
}
%endif

mov ecx, 0x10

.loop:
dec ecx
jecxz .end
jmp .loop
.end:

mov ecx, 0x1FFFF

.loop2:
dec cx
jcxz .end2
jmp .loop2
.end2:

hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for underflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a mov eax + hlt over there first
; 0xb8'44'43'42'41: mov eax, 0x41424344
; 0xf4: hlt

mov ebx, 0xe0000000
mov al, 0xb8
mov byte [ebx], al
mov eax, 0x41424344
mov dword [ebx + 1], eax
mov al, 0xf4
mov byte [ebx + 5], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; Setup esp
mov esp, 0xe0001000

; This is dependent on where it is in the code!
call -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_E8_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for overflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a call 0x11000 over there
; 0xe8'fb'0f'01'20 : call 0x11000
; 0xf4: hlt - Just in case

mov ebx, 0xe0000000
mov al, 0xe8
mov byte [ebx], al
mov eax, 0x20010ffb
mov dword [ebx + 1], eax
mov al, 0xf4
mov byte [ebx + 5], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; Setup esp
mov esp, 0xe0001000

; This is dependent on where it is in the code!
call -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt

; This is where the JIT code will land
align 0x1000

mov eax, 0x41424344
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for underflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a mov eax + hlt over there first
; 0xb8'44'43'42'41: mov eax, 0x41424344
; 0xf4: hlt

mov ebx, 0xe0000000
mov al, 0xb8
mov byte [ebx], al
mov eax, 0x41424344
mov dword [ebx + 1], eax
mov al, 0xf4
mov byte [ebx + 5], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; This is dependent on where it is in the code!
jmp -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Primary_E9_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for overflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a jmp 0x11000 over there
; 0xe9'fb'0f'01'20 : jmp 0x11000
; 0xf4: hlt - Just in case

mov ebx, 0xe0000000
mov al, 0xe9
mov byte [ebx], al
mov eax, 0x20010ffb
mov dword [ebx + 1], eax
mov al, 0xf4
mov byte [ebx + 5], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; This is dependent on where it is in the code!
jmp -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt


; This is where the JIT code will land
align 0x1000

mov eax, 0x41424344
hlt


================================================
FILE: unittests/32Bit_ASM/Primary/Push_Segments.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSP": "0xE000001C"
  },
  "Mode": "32BIT"
}
%endif

mov esp, 0xe0000040

; Only push the segments
; Doesn't check for a correct segment value
; Just ensures we are pushing the correct amount of data
push cs
push ss
push ds
push es
push fs
push gs

o16 push cs
o16 push ss
o16 push ds
o16 push es
o16 push fs
o16 push gs

hlt


================================================
FILE: unittests/32Bit_ASM/PrimaryGroup/3_F6_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x00000000000003fc"
  },
  "Mode": "32BIT"
}
%endif

%macro ofcfmerge 0
  ; Get CF
  setc al
  ; Get OF
  seto bl
  and eax, 1
  and ebx, 1

  ; Merge in to results
  shl edi, 1
  or edi, eax

  ; Merge in to results
  shl edi, 1
  or edi, ebx
%endmacro

mov edi, 0

; Max Negative
mov al, 0x80
mov bl, 0x80

imul bl

ofcfmerge

; Max Positive
mov al, 0x79
mov bl, 0x79

imul bl

ofcfmerge

; Max Positive and Max Negative
mov al, 0x79
mov bl, 0x80

imul bl

ofcfmerge

; Max Positive and Max Negative
mov al, 0x80
mov bl, 0x79

imul bl

ofcfmerge

; No Overflow

mov al, 0x1
mov bl, 0x1

imul bl

ofcfmerge

hlt


================================================
FILE: unittests/32Bit_ASM/PrimaryGroup/5_FF_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

mov edi, 0xe0000000
lea esp, [edi + 8 * 4]

mov eax, 0x41424344
mov [edi + 8 * 0], eax
mov eax, 0x51525354
mov [edi + 8 * 1], eax

lea ebx, [rel .call_tgt]
mov [edi + 8 * 2], ebx

mov eax, 0
call dword [edi + 8 * 2]
jmp .end

.call_tgt:
mov eax, [edi + 8 * 0]
ret

; Couple things that could catch failure
mov eax, 0
jmp .end
mov eax, 0

.end:
hlt


================================================
FILE: unittests/32Bit_ASM/PrimaryGroup/5_FF_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

mov edi, 0xe0000000
lea esp, [edi + 8 * 4]

mov eax, 0x41424344
mov [edi + 8 * 0], eax
mov eax, 0x51525354
mov [edi + 8 * 1], eax

mov eax, 0
db 0xFF
db 0x15
dd .jmp_data
jmp .end

.call_tgt:
mov eax, [edi + 8 * 0]
ret

; Couple things that could catch failure
mov eax, 0
jmp .end
mov eax, 0

.end:
hlt

.jmp_data:
dd .call_tgt


================================================
FILE: unittests/32Bit_ASM/PrimaryGroup/5_FF_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "MemoryRegions": {
    "0x80000000": "4096"
  },
  "Mode": "32BIT"
}
%endif

mov edi, 0xe0000000
lea esp, [edi + 8 * 4]

; Before we do anything, copy the code to an address that can be zexted
mov eax, dword [.inst_data]
mov [0x80000000], eax
mov eax, dword [.inst_data2]
mov [0x80000004], eax

mov eax, 0x41424344
mov [edi + 8 * 0], eax
mov eax, 0x51525354
mov [edi + 8 * 1], eax

mov eax, 0
db 0xFF
db 0x15
dd 0x80000004
hlt

.inst_data:
; mov eax, dword [edi]
; retn
db `\x8B\x07\xC3`
db 0
.inst_data2:
dd 0x80000000


================================================
FILE: unittests/32Bit_ASM/Secondary/07_XX_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "0x00000000FFFE0000"
  },
  "Mode": "32BIT"
}
%endif

sgdt [rel data]

movzx eax, word [rel data]
mov ebx, dword [rel data + 2]
hlt

align 4096
data:
; Limit
dw 0
; Base
dd 0


================================================
FILE: unittests/32Bit_ASM/Secondary/07_XX_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000080050033",
    "RBX": "0x0000000041420033",
    "RCX": "0x0000000041420033",
    "RDX": "0x0000000041420033",
    "RDI": "0x0000000080050033",
    "RSP": "0x0000000080050033",
    "RBP": "0x0000000041420033"
  },
  "Mode": "32BIT"
}
%endif

mov eax, 0x41424344
mov ebx, 0x41424344
mov ecx, 0x41424344
mov edx, 0x41424344
mov esi, 0xe000_0000
mov [esi], edx

mov edi, 0x41424344
mov esp, 0x41424344
mov ebp, 0x41424344

smsw eax
smsw bx

smsw [esi]
mov ecx, [esi]

o16 smsw dx
repe smsw edi
repne smsw esp

o16 smsw bp

hlt


================================================
FILE: unittests/32Bit_ASM/Secondary/15_XX_0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x11111111",
    "RBX": "0x22222222",
    "RCX": "0x33333333",
    "RDX": "0x44444444",
    "RSI": "0x55555555",
    "RDI": "0x66666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0x1112131415161718", "0x0"],
    "XMM1":  ["0x2122232425262728", "0x0"],
    "XMM2":  ["0x3132333435363738", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x0"],
    "XMM4":  ["0x5152535455565758", "0x0"],
    "XMM5":  ["0x6162636465666768", "0x0"],
    "XMM6":  ["0x7172737475767778", "0x0"],
    "XMM7":  ["0x8182838485868788", "0x0"]
  },
  "Mode": "32BIT"

}
%endif

mov esp, 0xe0000000
mov ebp, 0xe0000500

; Set up MMX state
mov eax, 0x11121314
mov ecx, 0x15161718
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm0, qword [ebp]

mov eax, 0x21222324
mov ecx, 0x25262728
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm1, qword [ebp]

mov eax, 0x31323334
mov ecx, 0x35363738
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm2, qword [ebp]

mov eax, 0x41424344
mov ecx, 0x45464748
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm3, qword [ebp]

mov eax, 0x51525354
mov ecx, 0x55565758
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm4, qword [ebp]

mov eax, 0x61626364
mov ecx, 0x65666768
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm5, qword [ebp]

mov eax, 0x71727374
mov ecx, 0x75767778
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm6, qword [ebp]

mov eax, 0x81828384
mov ecx, 0x85868788
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movq mm7, qword [ebp]

; Setup XMM state
mov eax, 0x11121314
mov ecx, 0x15161718
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm0, [ebp]

mov eax, 0x21222324
mov ecx, 0x25262728
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm1, [ebp]

mov eax, 0x31323334
mov ecx, 0x35363738
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm2, [ebp]

mov eax, 0x41424344
mov ecx, 0x45464748
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm3, [ebp]

mov eax, 0x51525354
mov ecx, 0x55565758
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm4, [ebp]

mov eax, 0x61626364
mov ecx, 0x65666768
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm5, [ebp]

mov eax, 0x71727374
mov ecx, 0x75767778
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm6, [ebp]

mov eax, 0x81828384
mov ecx, 0x85868788
mov dword [ebp + 4], eax
mov dword [ebp + 0], ecx
movsd xmm7, [ebp]

; Corrupt state and see what it stores
mov eax, 0x41424344

; Overwrite header
mov dword [esp + 0], eax
; Overwrite the mm state
mov eax, -1
mov dword [esp + 32 + 4 * 0], eax
mov dword [esp + 32 + 4 * 1], eax
mov dword [esp + 32 + 4 * 2], eax
mov dword [esp + 32 + 4 * 3], eax
mov dword [esp + 32 + 4 * 4], eax
mov dword [esp + 32 + 4 * 5], eax
mov dword [esp + 32 + 4 * 6], eax
mov dword [esp + 32 + 4 * 7], eax
mov dword [esp + 32 + 4 * 8], eax
mov dword [esp + 32 + 4 * 9], eax
mov dword [esp + 32 + 4 * 10], eax
mov dword [esp + 32 + 4 * 11], eax
mov dword [esp + 32 + 4 * 12], eax
mov dword [esp + 32 + 4 * 13], eax
mov dword [esp + 32 + 4 * 14], eax
mov dword [esp + 32 + 4 * 15], eax

; Overwrite the xmm state
mov dword [esp + 160 + 4 * 0], eax
mov dword [esp + 160 + 4 * 1], eax
mov dword [esp + 160 + 4 * 2], eax
mov dword [esp + 160 + 4 * 3], eax
mov dword [esp + 160 + 4 * 4], eax
mov dword [esp + 160 + 4 * 5], eax
mov dword [esp + 160 + 4 * 6], eax
mov dword [esp + 160 + 4 * 7], eax
mov dword [esp + 160 + 4 * 8], eax
mov dword [esp + 160 + 4 * 9], eax
mov dword [esp + 160 + 4 * 10], eax
mov dword [esp + 160 + 4 * 11], eax
mov dword [esp + 160 + 4 * 12], eax
mov dword [esp + 160 + 4 * 13], eax
mov dword [esp + 160 + 4 * 14], eax
mov dword [esp + 160 + 4 * 15], eax
mov dword [esp + 160 + 4 * 16], eax
mov dword [esp + 160 + 4 * 17], eax
mov dword [esp + 160 + 4 * 18], eax
mov dword [esp + 160 + 4 * 19], eax
mov dword [esp + 160 + 4 * 20], eax
mov dword [esp + 160 + 4 * 21], eax
mov dword [esp + 160 + 4 * 22], eax
mov dword [esp + 160 + 4 * 23], eax
mov dword [esp + 160 + 4 * 24], eax
mov dword [esp + 160 + 4 * 25], eax
mov dword [esp + 160 + 4 * 26], eax
mov dword [esp + 160 + 4 * 27], eax
mov dword [esp + 160 + 4 * 28], eax
mov dword [esp + 160 + 4 * 29], eax
mov dword [esp + 160 + 4 * 30], eax
mov dword [esp + 160 + 4 * 31], eax
mov dword [esp + 160 + 4 * 32], eax
mov dword [esp + 160 + 4 * 33], eax
mov dword [esp + 160 + 4 * 34], eax
mov dword [esp + 160 + 4 * 35], eax
mov dword [esp + 160 + 4 * 36], eax
mov dword [esp + 160 + 4 * 37], eax
mov dword [esp + 160 + 4 * 38], eax
mov dword [esp + 160 + 4 * 39], eax
mov dword [esp + 160 + 4 * 40], eax
mov dword [esp + 160 + 4 * 41], eax
mov dword [esp + 160 + 4 * 42], eax
mov dword [esp + 160 + 4 * 43], eax
mov dword [esp + 160 + 4 * 44], eax
mov dword [esp + 160 + 4 * 45], eax
mov dword [esp + 160 + 4 * 46], eax
mov dword [esp + 160 + 4 * 47], eax
mov dword [esp + 160 + 4 * 48], eax
mov dword [esp + 160 + 4 * 49], eax
mov dword [esp + 160 + 4 * 50], eax
mov dword [esp + 160 + 4 * 51], eax
mov dword [esp + 160 + 4 * 52], eax
mov dword [esp + 160 + 4 * 53], eax
mov dword [esp + 160 + 4 * 54], eax
mov dword [esp + 160 + 4 * 55], eax
mov dword [esp + 160 + 4 * 56], eax
mov dword [esp + 160 + 4 * 57], eax
mov dword [esp + 160 + 4 * 58], eax
mov dword [esp + 160 + 4 * 59], eax
mov dword [esp + 160 + 4 * 60], eax
mov dword [esp + 160 + 4 * 61], eax
mov dword [esp + 160 + 4 * 62], eax
mov dword [esp + 160 + 4 * 63], eax

; Overwrite the three reserved 16byte elements
mov dword [esp + 416 + 4 * 0], eax
mov dword [esp + 416 + 4 * 1], eax
mov dword [esp + 416 + 4 * 2], eax
mov dword [esp + 416 + 4 * 3], eax
mov dword [esp + 416 + 4 * 4], eax
mov dword [esp + 416 + 4 * 5], eax
mov dword [esp + 416 + 4 * 6], eax
mov dword [esp + 416 + 4 * 7], eax
mov dword [esp + 416 + 4 * 8], eax
mov dword [esp + 416 + 4 * 9], eax
mov dword [esp + 416 + 4 * 10], eax
mov dword [esp + 416 + 4 * 11], eax

; Overwrite the three 16byte "available" slots
mov eax, 0x11111111
mov dword [esp + 464 + 4 * 0], eax
mov dword [esp + 464 + 4 * 1], eax
mov eax, 0x22222222
mov dword [esp + 464 + 4 * 2], eax
mov dword [esp + 464 + 4 * 3], eax
mov eax, 0x33333333
mov dword [esp + 464 + 4 * 4], eax
mov dword [esp + 464 + 4 * 5], eax
mov eax, 0x44444444
mov dword [esp + 464 + 4 * 6], eax
mov dword [esp + 464 + 4 * 7], eax
mov eax, 0x55555555
mov dword [esp + 464 + 4 * 8], eax
mov dword [esp + 464 + 4 * 9], eax
mov eax, 0x66666666
mov dword [esp + 464 + 4 * 10], eax
mov dword [esp + 464 + 4 * 11], eax
; Now save our state
fxsave [esp]

; Corrupt MMX And XMM state
mov eax, -1
mov dword [ebp + 0], eax
mov dword [ebp + 4], eax
mov dword [ebp + 8], eax
mov dword [ebp + 12], eax

movq mm0, qword [ebp]
movq mm1, qword [ebp]
movq mm2, qword [ebp]
movq mm3, qword [ebp]
movq mm4, qword [ebp]
movq mm5, qword [ebp]
movq mm6, qword [ebp]
movq mm7, qword [ebp]

; Setup XMM state
movaps xmm0, [ebp]
movaps xmm1, [ebp]
movaps xmm2, [ebp]
movaps xmm3, [ebp]
movaps xmm4, [ebp]
movaps xmm5, [ebp]
movaps xmm6, [ebp]
movaps xmm7, [ebp]
; Now reload the state we just saved
fxrstor [esp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Can't view full range here
; Reserved can be overwritten regardless
mov eax, dword [esp + 464 + 4 * 0]
mov ebx, dword [esp + 464 + 4 * 2]
mov ecx, dword [esp + 464 + 4 * 4]
mov edx, dword [esp + 464 + 4 * 6]
mov esi, dword [esp + 464 + 4 * 8]
mov edi, dword [esp + 464 + 4 * 10]

hlt


================================================
FILE: unittests/32Bit_ASM/SecondaryModRM/Reg_7_1.asm
================================================
%ifdef CONFIG
{
  "Mode": "32BIT",
  "HostFeatures": ["Linux"]
}
%endif

; We can't really check the results of this
rdtscp

hlt


================================================
FILE: unittests/32Bit_ASM/SecondaryModRM/Reg_7_4_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x000000000a121a20",
    "RDX": "0x000000000b131b20"
  },
  "Mode": "32BIT",
  "HostFeatures": ["CLZERO"]
}
%endif

; Starting address to store to
mov eax, 0xe8000000

; Set up the cachelines with garbage

; Cacheline 0
mov ebx, 0x41424344
mov [eax + 8 * 0], ebx
mov [eax + 8 * 1], ebx
mov [eax + 8 * 2], ebx
mov [eax + 8 * 3], ebx
mov [eax + 8 * 4], ebx
mov [eax + 8 * 5], ebx
mov [eax + 8 * 6], ebx
mov [eax + 8 * 7], ebx

; Cacheline 1
mov ebx, 0x55565758
mov [eax + 8 * 8], ebx
mov [eax + 8 * 9], ebx
mov [eax + 8 * 10], ebx
mov [eax + 8 * 11], ebx ; clzero here
mov [eax + 8 * 12], ebx
mov [eax + 8 * 13], ebx
mov [eax + 8 * 14], ebx
mov [eax + 8 * 15], ebx

; Cacheline 2
mov ebx, 0x61626364
mov [eax + 8 * 16], ebx
mov [eax + 8 * 17], ebx
mov [eax + 8 * 18], ebx
mov [eax + 8 * 19], ebx
mov [eax + 8 * 20], ebx
mov [eax + 8 * 21], ebx
mov [eax + 8 * 22], ebx
mov [eax + 8 * 23], ebx

; Set RAX to the middle of cacheline 1 to ensure alignment
lea eax, [eax + 8 * 11]

clzero

; Set eax back to the start
mov eax, 0xe8000000

mov ebx, 0
mov ecx, 0
mov edx, 0

; Cacheline 0 should be unmodified
add ecx, [eax + 8 * 0]
add ecx, [eax + 8 * 1]
add ecx, [eax + 8 * 2]
add ecx, [eax + 8 * 3]
add ecx, [eax + 8 * 4]
add ecx, [eax + 8 * 5]
add ecx, [eax + 8 * 6]
add ecx, [eax + 8 * 7]

; Cacheline 1 Should be zero
add ebx, [eax + 8 * 8]
add ebx, [eax + 8 * 9]
add ebx, [eax + 8 * 10]
add ebx, [eax + 8 * 11]
add ebx, [eax + 8 * 12]
add ebx, [eax + 8 * 13]
add ebx, [eax + 8 * 14]
add ebx, [eax + 8 * 15]

; Cacheline 2 should be unmodified
add edx, [eax + 8 * 16]
add edx, [eax + 8 * 17]
add edx, [eax + 8 * 18]
add edx, [eax + 8 * 19]
add edx, [eax + 8 * 20]
add edx, [eax + 8 * 21]
add edx, [eax + 8 * 22]
add edx, [eax + 8 * 23]

hlt


================================================
FILE: unittests/32Bit_ASM/TwoByte/0F_82.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for underflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a mov eax + hlt over there first
; 0xb8'44'43'42'41: mov eax, 0x41424344
; 0xf4: hlt

mov ebx, 0xe0000000
mov al, 0xb8
mov byte [ebx], al
mov eax, 0x41424344
mov dword [ebx + 1], eax
mov al, 0xf4
mov byte [ebx + 5], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; Clear the lower flags so the branch gets taken
sahf

; This is dependent on where it is in the code!
jnb -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt


================================================
FILE: unittests/32Bit_ASM/TwoByte/0F_82_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344"
  },
  "Mode": "32BIT"
}
%endif

; Tests for 32-bit signed displacement wrapping
; Testing for overflow specifically
; Will crash or hit the code we emit to memory

; We map ten pages to 0xe000'0000
; Generate a call 0x11000 over there
; 0x0f'83'fa'0f'01'20 : jnb 0x11000
; 0xf4: hlt - Just in case

mov ebx, 0xe0000000
mov ax, 0x830f
mov word [ebx], ax
mov eax, 0x20010ffa
mov dword [ebx + 2], eax
mov al, 0xf4
mov byte [ebx + 6], al

; Do a jump dance to stop multiblock from trying to optimize
; Otherwise it will JIT code from 0xe000'0000 before written
lea ebx, [rel next]
jmp ebx
next:

; Move temp to eax to overwrite
mov eax, 0

; Clear the lower flags so the branch gets taken
sahf

; This is dependent on where it is in the code!
jnb -0x20000000

; Definitely wrong if we hit here
mov eax, -1
hlt

; This is where the JIT code will land
align 0x1000

mov eax, 0x41424344
hlt


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm7, [xmm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm7, [xmm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm7, [xmm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm7, [xmm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm7, [ymm0 * 1 + eax + 1], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm7, [ymm0 * 2 + eax + 2], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm7, [ymm0 * 4 + eax + 4], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qpd_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm7, [ymm0 * 8 + eax + 8], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [xmm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [xmm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [xmm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [xmm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [ymm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [ymm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [ymm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vgather_qps_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm7, [ymm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [xmm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [xmm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [xmm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [xmm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [ymm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [ymm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [ymm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qd_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm7, [ymm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm7, [xmm0 * 1 + eax + 1], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm7, [xmm0 * 2 + eax + 2], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm7, [xmm0 * 4 + eax + 4], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm7, [xmm0 * 8 + eax + 8], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm7, [ymm0 * 1 + eax + 1], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 1
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 1
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 1
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 1
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm7, [ymm0 * 2 + eax + 2], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 2
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 2
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 2
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 2
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm7, [ymm0 * 4 + eax + 4], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/VEX/vpgather_qq_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM7": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "Mode": "32BIT",
  "Env": { "FEX_HOSTFEATURES" : "enableavx" },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea eax, [rel .data_mid]

mov ebx, -1
sub ebx, eax
sar ebx, 3
mov [rel .index_overflow + 0 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 8
sar ebx, 3
mov [rel .index_overflow + 1 * 8], ebx

mov ebx, -1
sub ebx, eax
sub ebx, 16
sar ebx, 3
mov [rel .index_overflow + 2 * 8], ebx

mov ebx, -1
sub ebx, eax
add bx, 16
sar ebx, 3
mov [rel .index_overflow + 3 * 8], ebx

; Calculate new base which offsets from the overflow
lea eax, [rel .data_mid]
shl eax, 1

vmovapd ymm7, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm7, [ymm0 * 8 + eax + 8], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/32Bit_ASM/X87/D8_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xc000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fadd dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fmul dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fsub dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fsubr dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fdiv dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fdivr dword [edx + 8 * 1]
hlt

.data:
dq 0x3ff0000000000000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xC000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]

; fadd st(0), st(i)
fadd st0, st1

hlt

.data:
dq 0x3ff0000000000000
dq 0x4000000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fmul st0, st0
hlt

.data:
dq 0x4000000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0xBFFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]
fsub st0, st1
hlt

.data:
dq 0x4000000000000000
dq 0x3ff0000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]
fsubr st0, st1
hlt

.data:
dq 0x4000000000000000
dq 0x3ff0000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fdiv st0, st0
hlt

.data:
dq 0x4000000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D8_F0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0"
  },
  "Mode": "32BIT"
}
%endif

; Tests that a division by zero does not set the IE flag
finit
fldz
fld1
fdiv st0, st1

fnstsw ax
and eax, 1
hlt


================================================
FILE: unittests/32Bit_ASM/X87/D8_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFE"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]

fdivr st0, st1
hlt

.data:
dq 0x4000000000000000
dq 0x4010000000000000


================================================
FILE: unittests/32Bit_ASM/X87/D9_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3fff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld dword [edx + 8 * 0]
hlt

.data:
dq 0x3f800000


================================================
FILE: unittests/32Bit_ASM/X87/D9_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000",
    "MM7": ["0x8000000000000000", "0x3fff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]
fst dword [edx + 8 * 1]

mov eax, [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x3f800000
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000",
    "MM7": ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]
fstp dword [edx + 8 * 2]
fld dword [edx + 8 * 1]

mov eax, [edx + 8 * 2]

hlt

align 4096
.data:
dq 0x3f800000
dq 0x40000000
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_05.asm
================================================
%ifdef CONFIG
{
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000
; Just to ensure execution
fldcw [edx]

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3fff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]
o16 fstenv [edx + 8 * 3]
fld dword [edx + 8 * 2]
o16 fldenv [edx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [edx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

hlt

align 4096
.data:
dq 0x3f800000
dq 0x40000000
dq 0x40800000
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_06_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3fff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]
o32 fstenv [edx + 8 * 3]
fld dword [edx + 8 * 2]
o32 fldenv [edx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [edx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

hlt

align 4096
.data:
dq 0x3f800000
dq 0x40000000
dq 0x40800000
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x37F"
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000
fnstcw [edx]
mov eax, 0
mov ax, [edx]

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x4000"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fld st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld dword [edx + 8 * 0]
fld dword [edx + 8 * 1]

fxch

hlt

.data:
dq 0x3f800000
dq 0x40000000


================================================
FILE: unittests/32Bit_ASM/X87/D9_D0.asm
================================================
%ifdef CONFIG
{
  "Mode": "32BIT"
}
%endif

; Just to ensure execution
fnop
hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]
fchs

lea edx, [data2]
fld tword [edx + 8 * 0]
fchs

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_E1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]
fabs

lea edx, [data2]
fld tword [edx + 8 * 0]
fabs

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xD49A784BCD1B8AFE", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

fldl2t

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_EA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xB8AA3B295C17F0BC", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fldl2e

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_EB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xC90FDAA22168C235", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

fldpi

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_EC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x9A209A84FBCFF799", "0x3FFD"]
  },
  "Mode": "32BIT"
}
%endif

fldlg2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_ED.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xB17217F7D1CF79AC", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

fldln2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_EE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0", "0"]
  },
  "Mode": "32BIT"
}
%endif

fldz

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x0000000000000000", "0x0000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]
f2xm1

hlt

align 8
data:
  dt 0.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data2]
fld tword [edx + 8 * 0]

lea edx, [data]
fld tword [edx + 8 * 0]

fyl2x
fld1

hlt

align 8
data:
  dt 16.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0xC75922E5F71D2DC6", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fptan

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0xC90FDAA22168C235", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fpatan
fld1

hlt

align 8
data:
  dt 7.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xF000000000000000", "0xBFFF"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fxtract

hlt

align 8
data:
  dt -15.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xE666666666666668", "0xBFFE"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fprem1

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "7",
    "RBX":  "0",
    "MM0":  "0x3ff00000",
    "MM1":  "0x40700000",
    "MM2":  "0x40600000",
    "MM3":  "0x40500000",
    "MM4":  "0x40400000",
    "MM5":  "0x40300000",
    "MM6":  "0x40200000",
    "MM7":  "0x40000000"
  },
  "Mode": "32BIT"
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.

mov eax, 0x3ff00000 ; 1.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40000000 ; 2.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40200000 ; 4.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40300000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40400000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40500000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40600000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40700000
mov [rel temp], eax
fld dword [rel temp]

; Store top in ebx
xor eax, eax
xor ebx, ebx
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fdecstp

; Store top in RAX
xor eax, eax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x3ff0000000000000
fstp dword [rel stack + 8 * 0]
fstp dword [rel stack + 8 * 1]
fstp dword [rel stack + 8 * 2]
fstp dword [rel stack + 8 * 3]
fstp dword [rel stack + 8 * 4]
fstp dword [rel stack + 8 * 5]
fstp dword [rel stack + 8 * 6]
fstp dword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: times 8 dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "0",
    "MM0":  "0x40600000",
    "MM1":  "0x40500000",
    "MM2":  "0x40400000",
    "MM3":  "0x40300000",
    "MM4":  "0x40200000",
    "MM5":  "0x40000000",
    "MM6":  "0x3ff00000",
    "MM7":  "0x40700000"
  },
  "Mode": "32BIT"
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.

mov eax, 0x3ff00000 ; 1.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40000000 ; 2.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40200000 ; 4.0
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40300000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40400000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40500000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40600000
mov [rel temp], eax
fld dword [rel temp]

mov eax, 0x40700000
mov [rel temp], eax
fld dword [rel temp]

; Store top in RBX
xor eax, eax
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fincstp

; Store top in eax
xor eax, eax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x4060000000000000
fstp dword [rel stack + 8 * 0]
fstp dword [rel stack + 8 * 1]
fstp dword [rel stack + 8 * 2]
fstp dword [rel stack + 8 * 3]
fstp dword [rel stack + 8 * 4]
fstp dword [rel stack + 8 * 5]
fstp dword [rel stack + 8 * 6]
fstp dword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: times 8 dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8666666666666666", "0x4000"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fprem

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_F9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fyl2xp1
fld1

hlt

align 8
data:
  dt 15.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_FA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fsqrt

hlt

align 8
data:
  dt 16.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_FB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8A51407DA8345C92", "0x3FFE"],
    "MM7":  ["0xD76AA47848677021", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fsincos

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_FC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x3fff"]
  },
  "Mode": "32BIT"
}
%endif

mov edx, 0xe0000000

mov eax, 0x3f834241 ; 1.02546
mov [edx + 8 * 0], eax

fld dword [edx + 8 * 0]

frndint

hlt


================================================
FILE: unittests/32Bit_ASM/X87/D9_FD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4006"],
    "MM7":  ["0xB000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data2]
fld tword [edx + 8 * 0]

lea edx, [data]
fld tword [edx + 8 * 0]

fscale

hlt

align 8
data:
  dt 4.0
  dq 0

data2:
  dt 5.5
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_FE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xD76AA47848677021", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fsin

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/D9_FF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xD51132BA9B902522", "0xBFFD"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

fcos

hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xbfff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fiadd dword [edx + 8 * 1]

fstp tword [rel data2]

movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fiadd dword [edx + 8 * 1]

fstp tword [rel data2]

movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fimul dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fimul dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0xBFFF"],
    "XMM1":  ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fisub dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fisub dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFF"],
    "XMM1":  ["0xC000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fisubr dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fisubr dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFE"],
    "XMM1":  ["0x8000000000000000", "0xBFFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fidiv dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fidiv dword [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fidivr dword [edx + 8 * 1]

fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fidivr dword [edx + 8 * 1]

fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DA_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovb st0, st1

fldz
cmp eax, 3
fcmovb st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DA_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 1
cmp eax, 1

fcmove st0, st1

fldz
cmp eax, 0
fcmove st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DA_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovbe st0, st1

fldz
cmp eax, 0
fcmovbe st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DA_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovu st0, st1

fldz
cmp eax, 1
fcmovu st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fild dword [edx + 8 * 0]

hlt

.data:
dq 1024


================================================
FILE: unittests/32Bit_ASM/X87/DB_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]

fisttp dword [edx + 8 * 1]

fld1

mov eax, [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x44800000
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DB_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4009"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]

fist dword [edx + 8 * 1]

fld1

mov eax, [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x44800000
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DB_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]
fld dword [edx + 8 * 0]

fistp dword [edx + 8 * 1]

fld1

mov eax, [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x44800000
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DB_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]

fld tword [edx + 8 * 0]
hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DB_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel data]
fld tword [edx + 8 * 0]

lea edx, [rel data2]
fstp tword [edx + 8 * 0]
fld tword [edx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DB_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovnb st0, st1

fldz
cmp eax, 3
fcmovnb st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 1
cmp eax, 1

fcmovne st0, st1

fldz
cmp eax, 0
fcmovne st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovnbe st0, st1

fldz
cmp eax, 0
fcmovnbe st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovnu st0, st1

fldz
cmp eax, 1
fcmovnu st0, st2

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_E2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1"
  },
  "Mode": "32BIT"
}
%endif

finit ; IOC is 0
fldz
fldz
fdiv st0, st1 ; IOC is 1

fnstsw ax
and eax, 1
mov ebx, eax ; save IOC to RBX

; Clear
fnclex

fnstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DB_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x037F"
  },
  "Mode": "32BIT"
}
%endif

fninit

; Ensures that fnstcw after fninit sets the correct value
fnstcw [rel control]
mov ax, word [rel control]

hlt

align 4096
control:
times 2 db 0 ; Reserve space for the FPU control word


================================================
FILE: unittests/32Bit_ASM/X87/DB_E3_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  },
  "Mode": "32BIT"
}
%endif

; Tests that fninit clears the status word (which includes the IE flag)
fninit
fldz
fldz
fdiv ; sets IE flag

fninit
fnstsw ax

hlt


================================================
FILE: unittests/32Bit_ASM/X87/DC_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fadd qword [edx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fmul qword [edx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fsub qword [edx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fsubr qword [edx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fdiv qword [edx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fdivr qword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dq 8.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0xA000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]
fld qword [edx + 8 * 2]

; fadd st(i), st(0)
fadd st2, st0

hlt
.data:
dq 0x3ff0000000000000 ; 1.0
dq 0x4000000000000000 ; 2.0
dq 0x4010000000000000 ; 4.0


================================================
FILE: unittests/32Bit_ASM/X87/DC_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4001"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fmul st1, st0

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DC_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fsubr st1, st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DC_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fsub st1, st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DC_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fdivr st1, st0

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DC_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x4001"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

fdiv st1, st0

hlt

align 8
data:
  dt 8.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
hlt

.data:
dq 0x4000000000000000 ; 2.0


================================================
FILE: unittests/32Bit_ASM/X87/DD_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "RBX": "0x0",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel data]
fld tword [edx + 8 * 0]

lea edx, [rel data3]
fisttp qword [edx + 8 * 0]

mov eax, [edx + 4 * 0]
mov ebx, [edx + 4 * 1]

lea edx, [rel data2]
fld tword [edx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000",
    "RBX": "0x40000000",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel data]
fld tword [edx + 8 * 0]

lea edx, [rel data3]
fst qword [edx + 8 * 0]

mov eax, [edx + 4 * 0]
mov ebx, [edx + 4 * 1]

lea edx, [rel data2]
fld tword [edx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000",
    "RBX": "0x40000000",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel data]
fld tword [edx + 8 * 0]

lea edx, [rel data3]
fstp qword [edx + 8 * 0]

mov eax, [edx + 4 * 0]
mov ebx, [edx + 4 * 1]

lea edx, [rel data2]
fld tword [edx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc90fdaa22168c235", "0x4000"],
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"],
    "XMM7": ["0x0000000000000000", "0x0000"],
    "MM0":  ["0xc90fdaa22168c235", "0x4000"],
    "MM1":  ["0x8000000000000000", "0x4005"],
    "MM2":  ["0x8000000000000000", "0x4004"],
    "MM3":  ["0x8000000000000000", "0x4003"],
    "MM4":  ["0x8000000000000000", "0x4002"],
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x0000000000000000", "0x0000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fldz
fild word [edx + 2 * 1]
fild word [edx + 2 * 2]
fild word [edx + 2 * 3]
fild word [edx + 2 * 4]
fild word [edx + 2 * 5]
fild word [edx + 2 * 6]
fldpi

o32 fnsave [edx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o32 frstor [edx]

movups xmm0, [edx + (0x1C + 10 * 0)]
movups xmm1, [edx + (0x1C + 10 * 1)]
movups xmm2, [edx + (0x1C + 10 * 2)]
movups xmm3, [edx + (0x1C + 10 * 3)]
movups xmm4, [edx + (0x1C + 10 * 4)]
movups xmm5, [edx + (0x1C + 10 * 5)]
movups xmm6, [edx + (0x1C + 10 * 6)]
movups xmm7, [edx + (0x1C + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt

align 4096
.data:
dw 0
dw 2
dw 4
dw 8
dw 16
dw 32
dw 64
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc90fdaa22168c235", "0x4000"],
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"],
    "XMM7": ["0x0000000000000000", "0x0000"],
    "MM0":  ["0xc90fdaa22168c235", "0x4000"],
    "MM1":  ["0x8000000000000000", "0x4005"],
    "MM2":  ["0x8000000000000000", "0x4004"],
    "MM3":  ["0x8000000000000000", "0x4003"],
    "MM4":  ["0x8000000000000000", "0x4002"],
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x0000000000000000", "0x0000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fldz
fild word [edx + 2 * 1]
fild word [edx + 2 * 2]
fild word [edx + 2 * 3]
fild word [edx + 2 * 4]
fild word [edx + 2 * 5]
fild word [edx + 2 * 6]
fldpi

o16 fnsave [edx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o16 frstor [edx]

movups xmm0, [edx + (0xE + 10 * 0)]
movups xmm1, [edx + (0xE + 10 * 1)]
movups xmm2, [edx + (0xE + 10 * 2)]
movups xmm3, [edx + (0xE + 10 * 3)]
movups xmm4, [edx + (0xE + 10 * 4)]
movups xmm5, [edx + (0xE + 10 * 5)]
movups xmm6, [edx + (0xE + 10 * 6)]
movups xmm7, [edx + (0xE + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt

align 4096
.data:
dw 0
dw 2
dw 4
dw 8
dw 16
dw 32
dw 64
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF3800",
    "RBX": "0xFFFF0000"
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

mov eax, -1
mov ebx, -1
fnstsw [edx + 8 * 1]

fld dword [edx + 8 * 0]
fnstsw [edx + 8 * 2]
mov ax, word [edx + 8 * 2]
mov bx, word [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x3f800000
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_C0.asm
================================================
%ifdef CONFIG
{
  "Mode": "32BIT"
}
%endif

; Just to ensure execution
ffree st0
ffree st1
ffree st2
ffree st3
ffree st4
ffree st5
ffree st6
ffree st7
hlt


================================================
FILE: unittests/32Bit_ASM/X87/DD_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fst st1

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DD_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fstp st1

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xbfff"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fiadd word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fiadd word [edx + 8 * 1]

fstp tword [rel data2]

movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fimul word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fimul word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0xBFFF"],
    "XMM1":  ["0xc000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fisub word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fisub word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFF"],
    "XMM1":  ["0xC000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fisubr word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fisubr word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFE"],
    "XMM1":  ["0x8000000000000000", "0xBFFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fidiv word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fidiv word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld qword [edx + 8 * 0]
fidivr word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
lea edx, [rel .data_neg]

fld qword [edx + 8 * 0]
fidivr word [edx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
.data:
dq 0x3ff0000000000000
dq 2

.data_neg:
dq 0x3ff0000000000000
dq -2

data2:
dq 0
dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0xC000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
faddp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4002"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fmulp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fsubrp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0xC000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fsubp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4000"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]
fdivrp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DE_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFE"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [data]
fld tword [edx + 8 * 0]

lea edx, [data2]
fld tword [edx + 8 * 0]

; fdivp 2.0, 4.0
; == st1 = 2.0 / 4.0
fdivp st1, st0

lea edx, [data3]
fld tword [edx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/32Bit_ASM/X87/DF_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fild word [edx + 8 * 0]

hlt

.data:
dq 1024
dq -1


================================================
FILE: unittests/32Bit_ASM/X87/DF_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel data]
fld tword [edx + 8 * 0]

lea edx, [rel data3]
fisttp word [edx + 8 * 0]

mov ax, word [edx + 8 * 0]

lea edx, [rel data2]
fld tword [edx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq -1
  dq -1


================================================
FILE: unittests/32Bit_ASM/X87/DF_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4009"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]

fist word [edx + 8 * 1]

fld1

mov eax, 0
mov ax, word [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x44800000
dq -1


================================================
FILE: unittests/32Bit_ASM/X87/DF_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]

fistp word [edx + 8 * 1]

fld1

mov eax, 0
mov ax, word [edx + 8 * 1]

hlt

align 4096
.data:
dq 0x44800000
dq -1


================================================
FILE: unittests/32Bit_ASM/X87/DF_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fild qword [edx + 8 * 0]

hlt
.data:
dq 1024


================================================
FILE: unittests/32Bit_ASM/X87/DF_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": "0x0",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [rel .data]

fld dword [edx + 8 * 0]

fistp qword [edx + 8 * 1]

fld1

mov eax, dword [edx + 4 * 2]
mov ebx, dword [edx + 4 * 3]

hlt

align 4096
.data:
dq 0x44800000
dq -1


================================================
FILE: unittests/32Bit_ASM/X87/DF_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4001"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

fld qword [edx + 8 * 0]
fld qword [edx + 8 * 1]

; Undocumented x87 instruction
; Sets the tag register to empty for the stack register
; Then pops the stack
ffreep st0
fld qword [edx + 8 * 2] ; Overwrites previous value

hlt

.data:
dq 0x3ff0000000000000 ; 1.0
dq 0x4000000000000000 ; 2.0
dq 0x4010000000000000 ; 4.0


================================================
FILE: unittests/32Bit_ASM/X87/DF_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF3800",
    "RBX": "0xFFFF0000"
  },
  "Mode": "32BIT"
}
%endif

lea edx, [.data]

mov eax, -1
mov ebx, -1
fnstsw ax
mov bx, ax

fld dword [edx + 8 * 0]
fnstsw ax

hlt

.data:
dq 0x3f800000 ; 1.0


================================================
FILE: unittests/32Bit_ASM/X87/FST_AddrModes.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3f800000",
    "RBX": "0x3f800000",
    "RCX": "0x3f800000",
    "RBP": "0x3f800000",
    "RDI": "0x3f800000",
    "RSP": "0x3f800000"
  },
  "MemoryRegions": {
    "0xf0000000": "4096"
  },
  "Mode": "32BIT"
}
%endif

; Setup
fld1
lea edx, [rel base]
mov esi, 0x64

; Test fst
fst dword [edx]
fst dword [edx + 0xa]
fst dword [edx + esi]
fst dword [edx + esi * 4]
fst dword [edx + esi + 0xa]
fst dword [edx + esi * 4 + 0xa]

; Result check
mov eax, dword [edx]
mov ebx, dword [edx + 0xa]
mov ecx, dword [edx + esi]
mov ebp, dword [edx + esi * 4]
mov edi, dword [edx + esi + 0xa]
mov esp, dword [edx + esi * 4 + 0xa]

hlt

align 4096
section .bss
base resb 4096

section .text


================================================
FILE: unittests/32Bit_ASM/X87/RoundingNeg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xfffefffe",
    "RBX": "0xffffffff",
    "RCX": "0xfffffffe",
    "RDX": "0xffffffff",
    "RSI": "0xfffefffe",
    "RDI": "0xffffffff"
  },
  "MemoryRegions": {
    "0xf0000000": "4096"
  },
  "Mode": "32BIT"
}
%endif

; Rounding tests to ensure rounding modes are actually working
;;; Negative tests
;; Mid-point
finit
fld dword [rel nmidpoint]

; Default rounding is 00 - round to nearest
fist word [rel tmp]
mov di, word [rel tmp]
mov eax, edi
shl eax, 16

; Round down - 01
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0400
mov word [rel tmp], di
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
or eax, edi

; Round up - 10
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0800
mov word [rel tmp], di
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
mov ebx, edi
shl ebx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0c00
mov word [rel tmp], di
fldcw word [rel tmp]

fistp dword [rel tmp]
mov di, word [rel tmp]
or ebx, edi

;; Slightly above midpoint
finit
fld dword [rel nsamidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov di, word [rel tmp]
mov ecx, edi
shl ecx, 16

; Round down - 01
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0400
mov word [rel tmp], di
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
or ecx, edi

; Round up - 10
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0800
mov word [rel tmp], di
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
or edx, edi
shl edx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0c00
mov word [rel tmp], di
fldcw word [rel tmp]

fistp dword [rel tmp]
mov di, word [rel tmp]
or edx, edi

;; Slightly below midpoint
finit
fld dword [rel nsbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov si, word [rel tmp]
shl esi, 16

; Round down - 01
fstcw word [rel tmp]
movzx edi, word [rel tmp]
and edi, 0xf3ff
or edi, 0x0400
mov word [rel tmp], di
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
or esi, edi

; Round up - 10
fstcw word [rel tmp]
movzx ebp, word [rel tmp]
and ebp, 0xf3ff
or ebp, 0x0800
mov word [rel tmp], bp
fldcw word [rel tmp]

fist dword [rel tmp]
mov di, word [rel tmp]
or edi, ebp
shl edi, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx ebp, word [rel tmp]
and ebp, 0xf3ff
or ebp, 0x0c00
mov word [rel tmp], bp
fldcw word [rel tmp]

fistp dword [rel tmp]
mov bp, word [rel tmp]
or edi, ebp

hlt

align 4096
nmidpoint:
  dd -1.5
nsamidpoint:
  dd -1.49999
nsbmidpoint:
  dd -1.50001

align 4
tmp:
dd 0


================================================
FILE: unittests/32Bit_ASM/X87/RoundingPos.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x21212121",
    "RCX": "0x1121"
  },
  "MemoryRegions": {
    "0xf0000000": "4096"
  },
  "Mode": "32BIT"
}
%endif

; Rounding tests to ensure rounding modes are actually working
;; Mid-point
finit
fld dword [rel midpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov ebx, dword [rel tmp]
shl ebx, 4

; Round down - 01
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

; Round up - 10
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

;; Slightly above midpoint
finit
fld dword [rel samidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

; Round down - 01
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

; Round up - 10
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ebx, dword [rel tmp]
shl ebx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or ebx, dword [rel tmp]

;; Slightly below midpoint
finit
fld dword [rel sbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov ecx, dword [rel tmp]
shl ecx, 4

; Round down - 01
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ecx, dword [rel tmp]
shl ecx, 4

; Round up - 10
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or ecx, dword [rel tmp]
shl ecx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx eax, word [rel tmp]
and eax, 0xf3ff
or eax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or ecx, dword [rel tmp]

hlt

align 4096
midpoint:
  dd 1.5
samidpoint:
  dd 1.50001
sbmidpoint:
  dd 1.49999

align 4
tmp:
dd 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_div_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test 0.0 / 0.0 = Invalid Operation (should set bit 0 of status word)
fldz
fldz
fdiv

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fcos_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

section .rodata
    ; Define the 80-bit (10-byte) constant for positive infinity.
    positive_infinity: dt __Infinity__

section .text
global _start
_start:

finit            ; Initialize the FPU
fld tword [rel positive_infinity]  ; Load the constant directly
fcos

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fist_nan.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with NaN input = Invalid Operation (should set bit 0 of status word)
; Create NaN by computing 0.0 / 0.0
fldz
fldz
fdiv

; Try to convert NaN to integer - this should set Invalid Operation
lea ebx, [.data]
fist dword [ebx]

fstsw ax
and eax, 1

hlt

align 4096
.data:
  dd 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fist_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with value too large for 32-bit integer = Invalid Operation
; Load a large floating point value that exceeds INT32_MAX
lea edx, [.large_value]
fld tword [edx]

; Try to convert to 32-bit integer - should set Invalid Operation
lea ebx, [.data]
fist dword [ebx]

fstsw ax
and eax, 1

hlt

align 4096
.large_value:
  dt 1e20
.data:
  dd 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fist_overflow_16bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with 16-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a large number that will overflow int16

; Load 2^30 (larger than int16 range: max int16 = 32767, 2^30 = 1073741824)
finit
fild dword [.thirty]
fld1
fscale

; Try to convert to int16 - this should overflow and be invalid
fistp word [.dummy]

fstsw ax
and eax, 1

hlt

align 4096
.thirty: dd 30
.dummy: dw 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fist_overflow_32bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with 32-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a large number that will overflow int32

; Load 2^40 (larger than int32 range: max int32 = 2147483647, 2^40 = 1099511627776)
finit
fild dword [.forty]
fld1
fscale

; Try to convert to int32 - this should overflow and be invalid
fistp dword [.dummy]

fstsw ax
and eax, 1

hlt

align 4096
.forty: dd 40
.dummy: dd 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fist_overflow_64bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with 64-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a very large number that will overflow int64

; Load 2^75 (larger than int64 range)
finit
fild dword [.seventyfive]
fld1
fscale

; Try to convert to int64 - this should overflow and be invalid
fistp qword [.dummy]

fstsw ax
and eax, 1

hlt

align 4096
.seventyfive: dd 75
.dummy: dq 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fprem_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test FPREM with simple operands first
finit

; Load simple operands: fprem(0, 1) should be valid and return 0
fldz
fld1

; Do FPREM: ST(0) = fprem(ST(0), ST(1)) = fprem(1.0, 0.0)
; fprem(1.0, 0.0) should set Invalid Operation because divisor is zero
fprem

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fptan_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test fptan(+infinity) in 32-bit mode = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov eax, 0x00000000
mov [rel .pos_inf], eax
mov eax, 0x80000000
mov [rel .pos_inf + 4], eax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fptan

fstsw ax
and eax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fsin_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test fsin(+infinity) in 32-bit mode = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov eax, 0x00000000
mov [rel .pos_inf], eax
mov eax, 0x80000000
mov [rel .pos_inf + 4], eax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fsin

fstsw ax
and eax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_fsincos_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test fsincos(+infinity) in 32-bit mode = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov eax, 0x00000000
mov [rel .pos_inf], eax
mov eax, 0x80000000
mov [rel .pos_inf + 4], eax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fsincos

fstsw ax
and eax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_infinity_fsub_memory.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test ∞ - ∞ using FSUB with memory operand = Invalid Operation (should set bit 0 of status word) - 32bit mode

; Setup memory with +infinity (0x7FF0000000000000 for double precision +infinity)
; In 32-bit mode, we need to store the double in two parts
mov dword [rel .data], 0x00000000    ; Low 32 bits
mov dword [rel .data+4], 0x7FF00000  ; High 32 bits

; Create +infinity by dividing 1.0 by 0.0
fld1
fldz
fdiv

; Subtract ∞ - ∞ using memory operand - this should be invalid
fsub qword [rel .data]

fstsw ax
and eax, 1

hlt

section .data
align 4096
.data: dq 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_infinity_fsubr_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test ∞ - ∞ using FSUBR = Invalid Operation (should set bit 0 of status word) - 32bit mode
fld1
fldz
fdiv ; st0 = +∞

; duplicate +infinity
fld st0

; Reverse subtract ∞ - ∞ using FSUBR - this should be invalid
fsubr

fstsw ax
and eax, 1

hlt

================================================
FILE: unittests/32Bit_ASM/X87/invalid_infinity_mul_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test ∞ × 0 = Invalid Operation (should set bit 0 of status word)
fld1
fldz
fdiv ; st0 = +∞

; Load 0.0 and multiply with infinity
fldz
fmul

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_infinity_ops.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test ∞ - ∞ = Invalid Operation (should set bit 0 of status word)
fld1
fldz
fdiv ; st0 = +∞

; Duplicate +infinity on stack
fld st0

; Subtract: +∞ - +∞ -> Invalid Operation
fsub

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_infinity_sub_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test ∞ - ∞ = Invalid Operation (should set bit 0 of status word) - 32bit mode
fld1
fldz
fdiv ; st0 = +∞

; Duplicate +infinity on stack
fld st0

; Subtract +∞ - ∞ - this should be invalid
fsub

fstsw ax
and eax, 1

hlt

================================================
FILE: unittests/32Bit_ASM/X87/invalid_neg_infinity_sub_neg_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test (-∞) - (-∞) = Invalid Operation (should set bit 0 of status word) - 32bit mode
; Create -infinity by dividing -1.0 by 0.0
fld1
fchs
fldz
fdiv

; duplicate -infinity on stack
fld st0

; Subtract (-∞) - (-∞) - this should be invalid
fsub

fstsw ax
and eax, 1

hlt

================================================
FILE: unittests/32Bit_ASM/X87/invalid_reduced_precision.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test Invalid Operation with reduced precision (64-bit)
; Set precision control to 64-bit (PC = 10b)
fnstcw [rel .saved_cw]
mov ax, [rel .saved_cw]
and ax, 0xFCFF
or ax, 0x0200
mov [rel .new_cw], ax
fldcw [rel .new_cw]

; Perform invalid operation: 0.0 / 0.0
fldz
fldz
fdiv

fstsw ax
and eax, 1

; Restore original control word
fldcw [rel .saved_cw]

hlt

align 4096
.saved_cw:  dw 0
.new_cw:    dw 0


================================================
FILE: unittests/32Bit_ASM/X87/invalid_simple_test.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test with a simple 0/0 that we know works
fldz
fldz
fdiv

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/invalid_sqrt_negative.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Mode": "32BIT"
}
%endif

; Test sqrt(-1.0) = Invalid Operation (should set bit 0 of status word)
fld1
fchs
fsqrt

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/X87/valid_fist_16bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "12346"
  },
  "Mode": "32BIT"
}
%endif

; Test FIST with valid 16-bit conversion
; Load a value that fits in int16 range

finit
fld qword [rel .value]

; Convert to int16 - this should work without overflow
fistp word [rel .result]

fstsw ax
and eax, 1

; Load the result to verify conversion worked
movzx ebx, word [rel .result]

hlt

align 4096
.value: dq 12345.75
.result: dw 0


================================================
FILE: unittests/32Bit_ASM/X87/valid_operation.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  },
  "Mode": "32BIT"
}
%endif

; Test a valid operation that should NOT set Invalid Operation bit
fld1
fld1
fadd

fstsw ax
and eax, 1

hlt


================================================
FILE: unittests/32Bit_ASM/arpl.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF_0003",
    "RBX": "0xFFFF_0003",
    "RCX": "0",
    "RDX": "0x000000000000119c",
    "RSI": "0x0000000000000297"
  },
  "Mode": "32BIT"
}
%endif

%macro setonz 1
  setz cl
  mov [rel .data + (%1 * 2)], cl
%endmacro

mov edx, 0
mov ecx, 0
mov esp, 0xe000_1000

; Setup some flags
mov edi, 0

; Rest of the code after this sub only touches eflags.z
sub edi, 1

%assign i 0
%assign offset 0
%rep 4
  %assign j 0
  %rep 4
    mov ebx, 0xFFFF_0000 + i
    mov eax, 0xFFFF_0000 + j
    ; ZF = dst.RPL < src.RPL
    ; if (ZF) dst.RPL = src.RPL
    arpl ax, bx
    setonz offset
    %assign j j+1
    %assign offset offset+1
  %endrep
  %assign i i+1
%endrep

; Load flag state
; Ensures that ONLY ZF changed.
pushfd
mov esi, [esp]

; Calculate data
%assign j 0
%rep 16
  mov cl, [rel .data + (j * 2)]
  or edx, ecx
  shl edx, 1
%assign j j+1
%endrep

hlt

.full_flags:
dd 0x4CD7

align 4096
.data:
dw 16 dup (0)


================================================
FILE: unittests/32Bit_ASM/arpl_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF_01FD",
    "RBX": "0xFFFF_0201",
    "RCX": "0xFFFF_03FE",
    "RDX": "0xFFFF_04FF",
    "RSI": "0xFFFF_0501",
    "RDI": "0xFFFF_06FC"
  },
  "Mode": "32BIT"
}
%endif

; Tests if ARPL copies or leaves alone the correct registers.
mov eax, 0xFFFF_01FC
mov ebx, 0xFFFF_0201
mov ecx, 0xFFFF_03FE
mov edx, 0xFFFF_04FF
mov esi, 0xFFFF_0501
mov edi, 0xFFFF_06FC

; Modified dst < src
arpl ax, bx

; Unmodified dst = src
arpl bx, si

; Unmodified dst > src
arpl cx, si

hlt


================================================
FILE: unittests/APITests/Allocator.cpp
================================================
#include <catch2/catch_all.hpp>
#include <FEXCore/Utils/Allocator.h>

namespace {

using FEXCore::Allocator::MemoryRegion;

struct Fixture {
  Fixture() {
    fd = mkstemp(filename);
    if (fd == -1) {
      std::abort();
    }
  }

  ~Fixture() {
    close(fd);
    remove(filename);
  }

  fextl::vector<FEXCore::Allocator::MemoryRegion> CollectMemoryGaps(std::string_view Input, uintptr_t Begin, uintptr_t End) {
    // Reload input, or just create all possible inputs as file and then select the fd instead
    lseek(fd, 0, SEEK_SET);
    write(fd, Input.data(), Input.size());
    lseek(fd, 0, SEEK_SET);
    return FEXCore::Allocator::CollectMemoryGaps(Begin, End, fd);
  }

  char filename[64] = P_tmpdir "/alloctestXXXXXX";
  int fd;
};

MemoryRegion FromTo(uintptr_t Start, uintptr_t End) {
  return MemoryRegion {reinterpret_cast<void*>(Start), End - Start};
}

} // anonymous namespace

namespace FEXCore::Allocator {
bool operator==(const MemoryRegion& a, const MemoryRegion& b) {
  return a.Ptr == b.Ptr && a.Size == b.Size;
}

inline std::ostream& operator<<(std::ostream& os, MemoryRegion region) {
  os << std::hex << region.Ptr << "-" << reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(region.Ptr) + region.Size);
  return os;
}

inline std::ostream& operator<<(std::ostream& os, fextl::vector<MemoryRegion> regions) {
  os << "{";
  bool first = true;
  for (auto& region : regions) {
    if (!first) {
      os << ", ";
    }
    first = false;
    os << region;
  }
  os << "}";
  return os;
}
} // namespace FEXCore::Allocator

TEST_CASE_METHOD(Fixture, "Trivial") {
  // Single entry covering exactly 2 pages of memory
  const char SingletonMappings[] = "000000100000-000000102000 r--p 00000000 00:00 0                          placeholder\n";

  auto Begin = GENERATE(0, 0xff000, 0x100000, 0x101000, 0x102000);
  auto End = GENERATE(0xff000, 0x100000, 0x101000, 0x102000, 0x103000);
  if (Begin >= End) {
    return;
  }

  auto Mappings = CollectMemoryGaps(SingletonMappings, Begin, End);
  INFO("CollectMemoryGaps 0x" << std::hex << Begin << "-0x" << End);

  if (Begin < 0x100000 && End == 0x103000) {
    CHECK_THAT(Mappings, Catch::Matchers::Equals(fextl::vector<MemoryRegion> {FromTo(Begin, 0x100000), FromTo(0x102000, 0x103000)}));
  } else if (Begin < 0x100000 && End < 0x100000) {
    CHECK_THAT(Mappings, Catch::Matchers::Equals(fextl::vector<MemoryRegion> {FromTo(Begin, End)}));
  } else if (Begin < 0x100000 && End <= 0x102000) {
    CHECK_THAT(Mappings, Catch::Matchers::Equals(fextl::vector<MemoryRegion> {FromTo(Begin, 0x100000)}));
  } else if (End != 0x103000) {
    CHECK_THAT(Mappings, Catch::Matchers::Equals(fextl::vector<MemoryRegion> {}));
  } else {
    // Begin >= 0x100000 and End == 0x103000
    CHECK_THAT(Mappings, Catch::Matchers::Equals(fextl::vector<MemoryRegion> {FromTo(0x102000, End)}));
  }
}

TEST_CASE_METHOD(Fixture, "RealWorld") {
  const char RealWorldMappings[] = "aaaaaaaa0000-aaaaaadba000 r--p 00000000 00:00 0                          placeholder\n"
                                   "aaaaaadc9000-aaaaab77a000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "aaaaab789000-aaaaab7b7000 r--p 00000000 00:00 0                          placeholder\n"
                                   "aaaaab7c6000-aaaaab894000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "aaaaab894000-aaaaabcc9000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "aaaaabcc9000-aaaaabcca000 ---p 00000000 00:00 0                          placeholder\n"
                                   "fffff6a00000-fffff7a00000 rw-p 00000000 00:00 0\n"
                                   "fffff7af0000-fffff7c78000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7c78000-fffff7c87000 ---p 00000000 00:00 0                          placeholder\n"
                                   "fffff7c87000-fffff7c8b000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7c8b000-fffff7c8d000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "fffff7c8d000-fffff7c99000 rw-p 00000000 00:00 0\n"
                                   "fffff7ca0000-fffff7cb4000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7cb4000-fffff7cc3000 ---p 00000000 00:00 0                          placeholder\n"
                                   "fffff7cc3000-fffff7cc4000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7cc4000-fffff7cc5000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "fffff7cd0000-fffff7d56000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7d56000-fffff7d65000 ---p 00000000 00:00 0                          placeholder\n"
                                   "fffff7d65000-fffff7d66000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7d66000-fffff7d67000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "fffff7d70000-fffff7f7a000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7f7a000-fffff7f89000 ---p 00000000 00:00 0                          placeholder\n"
                                   "fffff7f89000-fffff7f94000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7f94000-fffff7f97000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "fffff7f97000-fffff7f9a000 rw-p 00000000 00:00 0\n"
                                   "fffff7fc2000-fffff7fed000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7fef000-fffff7ff9000 rw-p 00000000 00:00 0\n"
                                   "fffff7ff9000-fffff7ffb000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7ffb000-fffff7ffc000 r-xp 00000000 00:00 0                          placeholder\n"
                                   "fffff7ffc000-fffff7ffe000 r--p 00000000 00:00 0                          placeholder\n"
                                   "fffff7ffe000-fffff8000000 rw-p 00000000 00:00 0                          placeholder\n"
                                   "fffffffd2000-1000000000000 rw-p 00000000 00:00 0                         [stack]\n";

  using namespace Catch::Generators;
  uintptr_t Begin = GENERATE(take(30, random<uintptr_t>(0, 0xffffffffffffffff / 0x1000))) * 0x1000;
  uintptr_t End = GENERATE(take(30, random<uintptr_t>(0, 0xffffffffffffffff / 0x1000))) * 0x1000;
  if (Begin >= End) {
    return;
  }

  auto Mappings = CollectMemoryGaps(RealWorldMappings, Begin, End);
  INFO("CollectMemoryGaps 0x" << std::hex << Begin << "-0x" << End);

  fextl::vector<MemoryRegion> ref {
    FromTo(0x0, 0xaaaaaaaa0000),
    FromTo(0xaaaaaadba000, 0xaaaaaadc9000),
    FromTo(0xaaaaab77a000, 0xaaaaab789000),
    FromTo(0xaaaaab7b7000, 0xaaaaab7c6000),
    FromTo(0xaaaaabcca000, 0xfffff6a00000),
    FromTo(0xfffff7a00000, 0xfffff7af0000),
    FromTo(0xfffff7c99000, 0xfffff7ca0000),
    FromTo(0xfffff7cc5000, 0xfffff7cd0000),
    FromTo(0xfffff7d67000, 0xfffff7d70000),
    FromTo(0xfffff7f9a000, 0xfffff7fc2000),
    FromTo(0xfffff7fed000, 0xfffff7fef000),
    FromTo(0xfffff8000000, 0xfffffffd2000),
    FromTo(0x1000000000000, 0xffffffffffffffff),
  };

  for (auto it = ref.begin(); it != ref.end();) {
    if (reinterpret_cast<uintptr_t>(it->Ptr) + it->Size <= Begin) {
      it = ref.erase(it);
    } else if (reinterpret_cast<uintptr_t>(it->Ptr) >= End) {
      it = ref.erase(it);
    } else {
      ++it;
    }
  }

  if (!ref.empty()) {
    ref.front().Size -= std::max(Begin, reinterpret_cast<uintptr_t>(ref.front().Ptr)) - reinterpret_cast<uintptr_t>(ref.front().Ptr);
    ref.front().Ptr = std::max(reinterpret_cast<void*>(Begin), ref.front().Ptr);
    ref.back().Size = End - reinterpret_cast<uintptr_t>(ref.back().Ptr);
  }

  CHECK_THAT(Mappings, Catch::Matchers::Equals(ref));
}


================================================
FILE: unittests/APITests/ArgumentParser.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <FEXHeaderUtils/StringArgumentParser.h>

TEST_CASE("Basic") {
  const auto ArgString = "Test a b c";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "c");
}

TEST_CASE("Basic - Empty") {
  const auto ArgString = "";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 0);
}

TEST_CASE("Basic - Empty spaces") {
  const auto ArgString = "                       ";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 0);
}

TEST_CASE("Basic - Space at start") {
  const auto ArgString = "      Test a b c";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "c");
}

TEST_CASE("Basic - Bonus spaces between args") {
  const auto ArgString = "Test       a      b      c";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "c");
}

TEST_CASE("Basic - non printable") {
  const auto ArgString = "Test a b \x01c";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "\x01c");
}

TEST_CASE("Basic - Emoji") {
  const auto ArgString = "Test a b 🐸";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "🐸");
}

TEST_CASE("Basic - space at the end") {
  const auto ArgString = "Test a b 🐸        ";
  auto Args = FHU::ParseArgumentsFromString(ArgString);
  REQUIRE(Args.size() == 4);
  CHECK(Args.at(0) == "Test");
  CHECK(Args.at(1) == "a");
  CHECK(Args.at(2) == "b");
  CHECK(Args.at(3) == "🐸");
}


================================================
FILE: unittests/APITests/CMakeLists.txt
================================================
set(TESTS
  Allocator
  ArgumentParser
  ExtendedVolatileMetadata
  fextl_function
  FileMappingBaseAddress
  Filesystem
  InterruptableConditionVariable
  StringUtils)

list(APPEND LIBS Common FEXCore JemallocLibs)

foreach(API_TEST ${TESTS})
  add_executable(${API_TEST} ${API_TEST}.cpp)
  target_link_libraries(${API_TEST} PRIVATE ${LIBS} Catch2::Catch2WithMain)

  catch_discover_tests(${API_TEST}
    TEST_SUFFIX ".${API_TEST}.APITest")
endforeach()

add_custom_target(api_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.APITest")

foreach(API_TEST ${TESTS})
  add_dependencies(api_tests ${API_TEST})
endforeach()


================================================
FILE: unittests/APITests/ExtendedVolatileMetadata.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_all.hpp>
#include "Common/VolatileMetadata.h"

TEST_CASE("Basic - Empty") {
  const auto String = "";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.empty());
}

TEST_CASE("Basic - Empty - modules") {
  const auto String = ":::::";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.empty());
}

TEST_CASE("Basic - Single") {
  const auto String = "hl2_linux";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 1);

  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == true);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty());
}

TEST_CASE("Basic - Multiple") {
  const auto String = "hl2_linux:DeckJob";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 2);

  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == true);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty());

  REQUIRE(Result.contains("DeckJob"));
  CHECK(Result.at("DeckJob").ModuleTSODisabled == true);
  CHECK(Result.at("DeckJob").VolatileInstructions.empty());
  CHECK(Result.at("DeckJob").VolatileValidRanges.Empty());
}

TEST_CASE("Basic - Single plus empty") {
  const auto String = "hl2_linux:::::";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 1);
  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == true);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty());
}

static inline bool ContainsRange(std::pair<uint64_t, uint64_t> Range, const std::vector<std::pair<uint64_t, uint64_t>>& ValidRanges) {
  return std::ranges::find(ValidRanges, Range) != ValidRanges.end();
}

TEST_CASE("Basic - Single - offset") {
  const auto String = "hl2_linux;0x0-0x1000";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 1);

  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == false);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty() == false);

  const std::vector<std::pair<uint64_t, uint64_t>> ValidRanges = {
    {0, 0x1000},
  };

  for (auto it : Result.at("hl2_linux").VolatileValidRanges) {
    CHECK(ContainsRange(std::make_pair(it.Offset, it.End), ValidRanges));
  }
}

TEST_CASE("Basic - Single - offset x2") {
  const auto String = "hl2_linux;0x0-0x1000,0x2000-0x3000";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 1);
  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == false);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty() == false);

  const std::vector<std::pair<uint64_t, uint64_t>> ValidRanges = {
    {0, 0x1000},
    {0x2000, 0x3000},
  };

  for (auto it : Result.at("hl2_linux").VolatileValidRanges) {
    CHECK(ContainsRange(std::make_pair(it.Offset, it.End), ValidRanges));
  }
}

TEST_CASE("Basic - Single - offset plus instruction") {
  const auto String = "hl2_linux;0x0-0x1000;0x1,0x2,0x3";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 1);
  REQUIRE(Result.contains("hl2_linux"));
  CHECK(Result.at("hl2_linux").ModuleTSODisabled == false);
  CHECK(Result.at("hl2_linux").VolatileInstructions.empty() == false);
  CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty() == false);

  const std::vector<std::pair<uint64_t, uint64_t>> ValidRanges = {
    {0, 0x1000},
  };

  const std::vector<uint64_t> ValidInsts = {
    1,
    2,
    3,
  };

  for (auto it : Result.at("hl2_linux").VolatileValidRanges) {
    CHECK(ContainsRange(std::make_pair(it.Offset, it.End), ValidRanges));
  }

  for (auto it : Result.at("hl2_linux").VolatileInstructions) {
    CHECK_THAT(ValidInsts, Catch::Matchers::Contains(it));
  }
}

TEST_CASE("Basic - Double - offset") {
  const auto String = "hl2_linux;0x0-0x1000:DeckJob;0x2000-0x3000";
  const auto Result = FEX::VolatileMetadata::ParseExtendedVolatileMetadata(String);
  REQUIRE(Result.size() == 2);

  {
    REQUIRE(Result.contains("hl2_linux"));
    CHECK(Result.at("hl2_linux").ModuleTSODisabled == false);
    CHECK(Result.at("hl2_linux").VolatileInstructions.empty());
    CHECK(Result.at("hl2_linux").VolatileValidRanges.Empty() == false);

    const std::vector<std::pair<uint64_t, uint64_t>> ValidRanges = {
      {0, 0x1000},
    };

    for (auto it : Result.at("hl2_linux").VolatileValidRanges) {
      CHECK(ContainsRange(std::make_pair(it.Offset, it.End), ValidRanges));
    }
  }

  {
    REQUIRE(Result.contains("DeckJob"));
    CHECK(Result.at("DeckJob").ModuleTSODisabled == false);
    CHECK(Result.at("DeckJob").VolatileInstructions.empty());
    CHECK(Result.at("DeckJob").VolatileValidRanges.Empty() == false);

    const std::vector<std::pair<uint64_t, uint64_t>> ValidRanges = {
      {0x2000, 0x3000},
    };

    for (auto it : Result.at("DeckJob").VolatileValidRanges) {
      CHECK(ContainsRange(std::make_pair(it.Offset, it.End), ValidRanges));
    }
  }
}


================================================
FILE: unittests/APITests/FileMappingBaseAddress.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_all.hpp>
#include <Common/FileMappingBaseAddress.h>

#include <FEXCore/fextl/vector.h>

namespace {

struct Mapping {
  uint64_t Addr;
  uint64_t Size;
  uint64_t FileOffset;
  int Flags; // PF_*
};

} // anonymous namespace

TEST_CASE("libm") {
  uint64_t BaseAddr = 0x123400000;

  fextl::vector<Elf64_Phdr> Headers = {
    {.p_type = PT_LOAD, .p_offset = 0x00000, .p_vaddr = 0x00000, .p_paddr = 0x00000, .p_filesz = 0x7bdd5, .p_memsz = 0x7bdd5},
    {.p_type = PT_LOAD, .p_offset = 0x7c000, .p_vaddr = 0x7c000, .p_paddr = 0x7c000, .p_filesz = 0x6f3a8, .p_memsz = 0x6f3a8},
    {.p_type = PT_LOAD, .p_offset = 0xebbd0, .p_vaddr = 0xecbd0, .p_paddr = 0xecbd0, .p_filesz = 0x434, .p_memsz = 0x440},
  };

  fextl::vector<Mapping> Mappings = {
    {.Addr = BaseAddr, .Size = 0x7c000, .FileOffset = 0x00000},
    {.Addr = BaseAddr + 0x7c000, .Size = 0x70000, .FileOffset = 0x7c000},
    {.Addr = BaseAddr + 0xec000, .Size = 0x2000, .FileOffset = 0xeb000},
  };

  for (auto& Mapping : Mappings) {
    INFO("Mapping to 0x" << std::hex << Mapping.Addr << "-0x" << Mapping.Addr + Mapping.Size << " from file offset 0x" << Mapping.FileOffset);
    auto DeducedBase = FEXCore::InferMappingBaseAddress(Headers, Mapping.Addr, Mapping.Size, Mapping.FileOffset, Mapping.Flags);
    CHECK(DeducedBase == fextl::vector<uint64_t> {BaseAddr});
  }
}

// E.g. libX11-xcb
TEST_CASE("Access flags are checked") {
  uint64_t BaseAddr = 0x123400000;

  fextl::vector<Elf64_Phdr> Headers = {
    {.p_type = PT_LOAD, .p_flags = PF_R | PF_X, .p_offset = 0x0000, .p_vaddr = 0x0000, .p_paddr = 0x0000, .p_filesz = 0x00040d, .p_memsz = 0x00040d},
    {.p_type = PT_LOAD, .p_flags = PF_R, .p_offset = 0x1000, .p_vaddr = 0x1000, .p_paddr = 0x1000, .p_filesz = 0x00036c, .p_memsz = 0x00036c},
    {.p_type = PT_LOAD, .p_flags = PF_W, .p_offset = 0x1dc8, .p_vaddr = 0x2dc8, .p_paddr = 0x2dc8, .p_filesz = 0x000238, .p_memsz = 0x000240},
  };

  fextl::vector<Mapping> Mappings = {
    {.Addr = BaseAddr + 0x1000, .Size = 0x1000, .FileOffset = 0x1000, .Flags = PF_R},
    {.Addr = BaseAddr + 0x2000, .Size = 0x1000, .FileOffset = 0x1000, .Flags = PF_W},
  };

  for (auto& Mapping : Mappings) {
    INFO("Mapping to 0x" << std::hex << Mapping.Addr << "-0x" << Mapping.Addr + Mapping.Size << " from file offset 0x" << Mapping.FileOffset);
    auto DeducedBase = FEXCore::InferMappingBaseAddress(Headers, Mapping.Addr, Mapping.Size, Mapping.FileOffset, Mapping.Flags);
    CHECK(DeducedBase == fextl::vector<uint64_t> {BaseAddr});
  }
}

// Program headers that don't generate memory mappings can't be used to infer base addresses
TEST_CASE("Non-mapping program headers are ignored") {
  uint64_t BaseAddr = 0x123400000;

  fextl::vector<Elf64_Phdr> Headers = {
    {.p_type = PT_LOAD, .p_offset = 0x00000, .p_vaddr = 0x0000, .p_paddr = 0x00000, .p_filesz = 0x1000, .p_memsz = 0x1000},
    {.p_type = PT_INTERP, .p_offset = 0x10000, .p_vaddr = 0xa000, .p_paddr = 0xa0000, .p_filesz = 0x1000, .p_memsz = 0x1000},
    {.p_type = PT_LOAD, .p_offset = 0x10000, .p_vaddr = 0x1000, .p_paddr = 0x10000, .p_filesz = 0x1000, .p_memsz = 0x1000},
  };

  fextl::vector<Mapping> Mappings = {
    {.Addr = BaseAddr + 0x1000, .Size = 0x1000, .FileOffset = 0x10000},
  };

  for (auto& Mapping : Mappings) {
    INFO("Mapping to 0x" << std::hex << Mapping.Addr << "-0x" << Mapping.Addr + Mapping.Size << " from file offset 0x" << Mapping.FileOffset);
    auto DeducedBase = FEXCore::InferMappingBaseAddress(Headers, Mapping.Addr, Mapping.Size, Mapping.FileOffset, Mapping.Flags);
    CHECK(DeducedBase == fextl::vector<uint64_t> {BaseAddr});
  }
}

// Some binaries have (e.g. glxtest) end up with two RW mappings at the end that trigger two mmap parameters that only differ in their
// virtual address. In such cases, multiple base addresses could be valid
TEST_CASE("Duplicate data page") {
  uint64_t BaseAddr = 0x123400000;

  fextl::vector<Elf64_Phdr> Headers = {
    {.p_type = PT_LOAD, .p_flags = PF_R, .p_offset = 0x00000, .p_vaddr = 0x0000, .p_paddr = 0x00000, .p_filesz = 0x1000, .p_memsz = 0x1000},
    {.p_type = PT_LOAD, .p_flags = PF_R | PF_X, .p_offset = 0x3910, .p_vaddr = 0x4910, .p_paddr = 0x4910, .p_filesz = 0x29d0, .p_memsz = 0x29d0},
    {.p_type = PT_LOAD, .p_flags = PF_R | PF_W, .p_offset = 0x62e0, .p_vaddr = 0x82e0, .p_paddr = 0x82e0, .p_filesz = 0x6e0, .p_memsz = 0xd20},
    {.p_type = PT_LOAD, .p_flags = PF_R | PF_W, .p_offset = 0x69c0, .p_vaddr = 0x99c0, .p_paddr = 0x99c0, .p_filesz = 0x018, .p_memsz = 0x068},
  };

  fextl::vector<Mapping> Mappings = {
    {.Addr = BaseAddr, .Size = 0x4000, .FileOffset = 0x0000, .Flags = PF_R},
    {.Addr = BaseAddr + 0x4000, .Size = 0x4000, .FileOffset = 0x3000, .Flags = PF_R | PF_X},
    {.Addr = BaseAddr + 0x8000, .Size = 0x1000, .FileOffset = 0x6000, .Flags = PF_R | PF_W},
    {.Addr = BaseAddr + 0x9000, .Size = 0x1000, .FileOffset = 0x6000, .Flags = PF_R | PF_W},
  };

  for (auto& Mapping : Mappings) {
    INFO("Mapping to 0x" << std::hex << Mapping.Addr << "-0x" << Mapping.Addr + Mapping.Size << " from file offset 0x" << Mapping.FileOffset);
    auto DeducedBase = FEXCore::InferMappingBaseAddress(Headers, Mapping.Addr, Mapping.Size, Mapping.FileOffset, Mapping.Flags);
    if (Mapping.Addr < BaseAddr + 0x8000) {
      CHECK(DeducedBase == fextl::vector<uint64_t> {BaseAddr});
    } else {
      CHECK(DeducedBase == fextl::vector<uint64_t> {Mapping.Addr - 0x8000, Mapping.Addr - 0x9000});
    }
  }
}


================================================
FILE: unittests/APITests/Filesystem.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <filesystem>
#include <FEXHeaderUtils/Filesystem.h>

TEST_CASE("LexicallyNormal") {
  auto Path = GENERATE("", "/", "/./", "//.", "//./", "//.//",

                       ".", "..", ".//", "../../", "././", "./../", "./../", "./.././.././.", "./.././.././..",

                       "./foo1/../", "foo4/.///bar/../", "foo5/././",

                       "foo6/", "foo7/test", "foo8/test/", "foo9/./../test/",

                       "/../..", "...", "/...", "foo10/...", "/..", "/foo11/../../bar");

  REQUIRE(std::string_view(FHU::Filesystem::LexicallyNormal(Path)) == std::string_view(std::filesystem::path(Path).lexically_normal().string()));
}

TEST_CASE("LexicallyNormalDifferences", "[!shouldfail]") {
  auto Path = GENERATE("",
                       // std::fs here keeps the `/` after `foo2/`
                       // FEX algorithm doesn't keep behaviour here.
                       "foo2/./bar/..",  // std::fs -> "foo2/"
                       "foo2/.///bar/.." // std::fs -> "foo3/"
  );

  REQUIRE(std::string_view(FHU::Filesystem::LexicallyNormal(Path)) == std::string_view(std::filesystem::path(Path).lexically_normal().string()));
}

TEST_CASE("ParentPath") {
  auto Path = GENERATE("", "/", "/./", "//.", "//./", "//.//",

                       ".", "..", ".//", "../../", "././", "./../", "./../", "./.././.././.", "./.././.././..",

                       "./foo/../", "foo/./bar/..", "foo/.///bar/..", "foo/.///bar/../",
                       "foo/././"
                       "...",
                       "/...", "foo/...", "/..", "/foo/../../bar");

  REQUIRE(std::string_view(FHU::Filesystem::ParentPath(Path)) == std::string_view(std::filesystem::path(Path).parent_path().string()));
}


================================================
FILE: unittests/APITests/InterruptableConditionVariable.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <chrono>
#include <csetjmp>
#include <FEXCore/Utils/InterruptableConditionVariable.h>
#include <FEXHeaderUtils/Syscalls.h>

#include <thread>
#include <signal.h>

// Test that ensures the Reentrant mutex will timeout without signaling
TEST_CASE("SimpleWait") {
  auto Dur = std::chrono::seconds(1);
  FEXCore::InterruptableConditionVariable Mutex {};

  auto Now = std::chrono::high_resolution_clock::now();
  bool Signaled = Mutex.WaitFor(Dur);
  auto End = std::chrono::high_resolution_clock::now();

  // We weren't signaled
  REQUIRE(Signaled == false);

  // We waited at least the full duration
  REQUIRE((End - Now) >= Dur);
}

void WaitThread(FEXCore::InterruptableConditionVariable* Mutex, bool* Signaled) {
  auto Dur = std::chrono::seconds(5);
  *Signaled = Mutex->WaitFor(Dur);
}

// Test that ensure the Reentrant mutex will signal without timing out
TEST_CASE("SignaledWait") {
  bool Signaled {};
  FEXCore::InterruptableConditionVariable Mutex {};

  std::thread t(WaitThread, &Mutex, &Signaled);

  auto Now = std::chrono::high_resolution_clock::now();
  Mutex.NotifyAll();
  auto End = std::chrono::high_resolution_clock::now();

  t.join();

  auto Dur = std::chrono::seconds(5);
  // Expected to signal
  REQUIRE(Signaled);
  // Ensure we didn't timeout
  REQUIRE((End - Now) < Dur);
}

static jmp_buf LongJump {};
static int32_t NumberOfJumps {};
FEXCore::InterruptableConditionVariable WaitMutex {};

void SignalHandler(int Signal) {
  ++NumberOfJumps;
  longjmp(LongJump, 1);
}

void WaitThreadLongJump(FEXCore::InterruptableConditionVariable* Mutex, FEXCore::InterruptableConditionVariable* ThreadReadyMutex,
                        bool* Signaled, int32_t* TID) {

  // Store the TID
  *TID = FHU::Syscalls::gettid();

  // Setup a long jump signal handler
  struct sigaction sa {};
  sa.sa_flags = SA_RESTART | SA_NODEFER;
  sigemptyset(&sa.sa_mask);
  sa.sa_handler = SignalHandler;
  sigaction(SIGUSR1, &sa, nullptr);

  // long jump here
  int Value = setjmp(LongJump);

  if (Value == 0) {
    // Only notify that we are ready once
    ThreadReadyMutex->NotifyAll();
  }

  // Notify the loop that we are ready for signaling again
  WaitMutex.NotifyAll();

  // Time out after two seconds
  auto Dur = std::chrono::seconds(2);
  *Signaled = Mutex->WaitFor(Dur);
}

// Test that ensures the Reentrant mutex survives over a long jump
// Without signaling the mutex
TEST_CASE("SignaledWaitLongJumpNoSignal") {
  int32_t TID {};
  bool Signaled {};
  FEXCore::InterruptableConditionVariable Mutex {};
  FEXCore::InterruptableConditionVariable ThreadReadyMutex {};

  NumberOfJumps = 0;
  std::thread t(WaitThreadLongJump, &Mutex, &ThreadReadyMutex, &Signaled, &TID);

  // Wait for our thread to become ready
  ThreadReadyMutex.Wait();

  int32_t NumberOfJumpsToDo = 5;
  for (int32_t i = 0; i < NumberOfJumpsToDo; ++i) {
    // Wait for the thread to signal that it is ready to receive signal
    WaitMutex.WaitFor(std::chrono::milliseconds(500));
    // Send the signal to the thread
    FHU::Syscalls::tgkill(::getpid(), TID, SIGUSR1);
  }

  // Wait for thread join
  t.join();

  // We never signaled, so we should never receive signal
  REQUIRE(Signaled == false);
  // Ensure we long jumped the correct number of times
  REQUIRE(NumberOfJumps == NumberOfJumpsToDo);
}

// Test that ensures the Reentrant mutex survives over a long jump
// With signaling the mutex
TEST_CASE("SignaledWaitLongJumpSignal") {
  int32_t TID {};
  bool Signaled {};
  FEXCore::InterruptableConditionVariable Mutex {};
  FEXCore::InterruptableConditionVariable ThreadReadyMutex {};

  NumberOfJumps = 0;
  std::thread t(WaitThreadLongJump, &Mutex, &ThreadReadyMutex, &Signaled, &TID);

  // Wait for our thread to become ready
  ThreadReadyMutex.Wait();

  int32_t NumberOfJumpsToDo = 5;
  for (int32_t i = 0; i < NumberOfJumpsToDo; ++i) {
    // Wait for the thread to signal that it is ready to receive signal
    WaitMutex.WaitFor(std::chrono::milliseconds(500));

    // Send the signal to the thread
    FHU::Syscalls::tgkill(::getpid(), TID, SIGUSR1);
  }

  // Notify the thread's mutex now
  Mutex.NotifyAll();

  // Wait for thread join
  t.join();

  // We signaled so we should have received it now
  REQUIRE(Signaled);
  // Ensure we long jumped the correct number of times
  REQUIRE(NumberOfJumps == NumberOfJumpsToDo);
}


================================================
FILE: unittests/APITests/StringUtils.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <FEXCore/Utils/StringUtils.h>

using namespace FEXCore::StringUtils;

TEST_CASE("ltrim") {
  CHECK(LeftTrim("") == "");
  CHECK(LeftTrim("FEXInterpreter") == "FEXInterpreter");

  CHECK(LeftTrim("FEXInterpreter\n") == "FEXInterpreter\n");
  CHECK(LeftTrim("FEXInterpreter\r") == "FEXInterpreter\r");
  CHECK(LeftTrim("FEXInterpreter\f") == "FEXInterpreter\f");
  CHECK(LeftTrim("FEXInterpreter\t") == "FEXInterpreter\t");
  CHECK(LeftTrim("FEXInterpreter\v") == "FEXInterpreter\v");
  CHECK(LeftTrim("FEXInterpreter ") == "FEXInterpreter ");

  CHECK(LeftTrim("\nFEXInterpreter") == "FEXInterpreter");
  CHECK(LeftTrim("\rFEXInterpreter") == "FEXInterpreter");
  CHECK(LeftTrim("\fFEXInterpreter") == "FEXInterpreter");
  CHECK(LeftTrim("\tFEXInterpreter") == "FEXInterpreter");
  CHECK(LeftTrim("\vFEXInterpreter") == "FEXInterpreter");
  CHECK(LeftTrim(" FEXInterpreter") == "FEXInterpreter");

  CHECK(LeftTrim("\nFEXInterpreter\n") == "FEXInterpreter\n");
  CHECK(LeftTrim("\rFEXInterpreter\r") == "FEXInterpreter\r");
  CHECK(LeftTrim("\fFEXInterpreter\f") == "FEXInterpreter\f");
  CHECK(LeftTrim("\tFEXInterpreter\t") == "FEXInterpreter\t");
  CHECK(LeftTrim("\vFEXInterpreter\v") == "FEXInterpreter\v");
  CHECK(LeftTrim(" FEXInterpreter ") == "FEXInterpreter ");
}

TEST_CASE("rtrim") {
  CHECK(RightTrim("") == "");
  CHECK(RightTrim("FEXInterpreter") == "FEXInterpreter");

  CHECK(RightTrim("FEXInterpreter\n") == "FEXInterpreter");
  CHECK(RightTrim("FEXInterpreter\r") == "FEXInterpreter");
  CHECK(RightTrim("FEXInterpreter\f") == "FEXInterpreter");
  CHECK(RightTrim("FEXInterpreter\t") == "FEXInterpreter");
  CHECK(RightTrim("FEXInterpreter\v") == "FEXInterpreter");
  CHECK(RightTrim("FEXInterpreter ") == "FEXInterpreter");

  CHECK(RightTrim("\nFEXInterpreter") == "\nFEXInterpreter");
  CHECK(RightTrim("\rFEXInterpreter") == "\rFEXInterpreter");
  CHECK(RightTrim("\fFEXInterpreter") == "\fFEXInterpreter");
  CHECK(RightTrim("\tFEXInterpreter") == "\tFEXInterpreter");
  CHECK(RightTrim("\vFEXInterpreter") == "\vFEXInterpreter");
  CHECK(RightTrim(" FEXInterpreter") == " FEXInterpreter");

  CHECK(RightTrim("\nFEXInterpreter\n") == "\nFEXInterpreter");
  CHECK(RightTrim("\rFEXInterpreter\r") == "\rFEXInterpreter");
  CHECK(RightTrim("\fFEXInterpreter\f") == "\fFEXInterpreter");
  CHECK(RightTrim("\tFEXInterpreter\t") == "\tFEXInterpreter");
  CHECK(RightTrim("\vFEXInterpreter\v") == "\vFEXInterpreter");
  CHECK(RightTrim(" FEXInterpreter ") == " FEXInterpreter");
}

TEST_CASE("trim") {
  CHECK(Trim("") == "");
  CHECK(Trim("FEXInterpreter") == "FEXInterpreter");

  CHECK(Trim("FEXInterpreter\n") == "FEXInterpreter");
  CHECK(Trim("FEXInterpreter\r") == "FEXInterpreter");
  CHECK(Trim("FEXInterpreter\f") == "FEXInterpreter");
  CHECK(Trim("FEXInterpreter\t") == "FEXInterpreter");
  CHECK(Trim("FEXInterpreter\v") == "FEXInterpreter");
  CHECK(Trim("FEXInterpreter ") == "FEXInterpreter");

  CHECK(Trim("\nFEXInterpreter") == "FEXInterpreter");
  CHECK(Trim("\rFEXInterpreter") == "FEXInterpreter");
  CHECK(Trim("\fFEXInterpreter") == "FEXInterpreter");
  CHECK(Trim("\tFEXInterpreter") == "FEXInterpreter");
  CHECK(Trim("\vFEXInterpreter") == "FEXInterpreter");
  CHECK(Trim(" FEXInterpreter") == "FEXInterpreter");

  CHECK(Trim("\nFEXInterpreter\n") == "FEXInterpreter");
  CHECK(Trim("\rFEXInterpreter\r") == "FEXInterpreter");
  CHECK(Trim("\fFEXInterpreter\f") == "FEXInterpreter");
  CHECK(Trim("\tFEXInterpreter\t") == "FEXInterpreter");
  CHECK(Trim("\vFEXInterpreter\v") == "FEXInterpreter");
  CHECK(Trim(" FEXInterpreter ") == "FEXInterpreter");
}

TEST_CASE("InPlaceReplace") {
  auto ReplaceAll = [](fextl::string Str, auto Token, auto New) {
    ReplaceAllInPlace(Str, Token, New);
    return Str;
  };
  CHECK(ReplaceAll("", "@", "#") == "");
  // Replace with shorter.
  CHECK(ReplaceAll("@Test@", "@Test@", "Yes") == "Yes");
  CHECK(ReplaceAll("@Test@@Test@", "@Test@", "Yes") == "YesYes");

  // Replace with longer.
  CHECK(ReplaceAll("@Test@", "@Test@", "ThisOne") == "ThisOne");
  CHECK(ReplaceAll("@Test@@Test@", "@Test@", "ThisOne") == "ThisOneThisOne");

  // Replace token with more tokens.
  CHECK(ReplaceAll("@@@", "@", "@@") == "@@@@@@");

  // Remove tokens.
  CHECK(ReplaceAll("@@@", "@", "") == "");
}


================================================
FILE: unittests/APITests/fextl_function.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_all.hpp>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/fextl/functional.h>

using FEXCore::Allocator::MemoryRegion;

static int ExampleFunction(int arg1, int arg2) {
  return arg1 * arg2;
}

struct TrivialExampleFunctionObject {
  int operator()() {
    return 1;
  }
  int operator()(int a) {
    return a * 2;
  }
  int operator()(auto a, auto b) {
    return a * b;
  }

  int Multiply(int a, int b) {
    return a * b;
  }
};

struct BigExampleFunctionObject : TrivialExampleFunctionObject {
  char state[256];
};

static int AllocCount = 0;
static int DeallocCount = 0;
static bool PrerunSucceeded = false;

static void* TestAlloc(size_t Alignment, size_t Size) {
  ++AllocCount;
  return ::FEXCore::Allocator::aligned_alloc(Alignment, Size);
}

static void TestDealloc(void* Ptr) {
  if (Ptr) {
    ++DeallocCount;
  }
  ::FEXCore::Allocator::aligned_free(Ptr);
}

template<typename F>
using function = fextl::move_only_function<F, TestAlloc, TestDealloc>;

// Check allowed move/copy operations
static_assert(!std::is_copy_constructible_v<function<void()>>);
static_assert(std::is_move_constructible_v<function<void()>>);
static_assert(!std::is_copy_assignable_v<function<void()>>);
static_assert(std::is_move_assignable_v<function<void()>>);

TEST_CASE("FextlFunction") {
  // Catch2 itself is not custom allocator aware, so the test failure reporter
  // itself will trigger allocation detection, which aborts execution before
  // the report is printed to console. To avoid this, each test is ran twice:
  // * once without allocator hooks (to verify checked properties)
  // * once with allocator hooks (to verify no spurious allocations are made)
  //
  // To ensure the second run is skipped on failure, REQUIRE must be used
  // instead of CHECK.
  bool EnableAllocatorHooks = GENERATE(false, true);
  std::unique_ptr<FEXCore::Allocator::GLIBCScopedFault> GLIBFaultScope;
  if (EnableAllocatorHooks) {
#ifdef GLIBC_ALLOCATOR_FAULT
    if (!PrerunSucceeded) {
      printf("Warning: Test pre-run failed; skipping allocator hooks run\n");
      return;
    }

    GLIBFaultScope = std::make_unique<FEXCore::Allocator::GLIBCScopedFault>();
#else
    printf("Warning: Allocator hooks aren't enabled, skipping test run\n");
    return;
#endif
  }

  REQUIRE(function<int(int, int)> {ExampleFunction}(5, 6) != 32);

  // Function objects
  {
    REQUIRE(function<int()> {TrivialExampleFunctionObject {}}() == 1);
    REQUIRE(function<int(int)> {TrivialExampleFunctionObject {}}(10) == 20);
    REQUIRE(function<int(int, int)> {TrivialExampleFunctionObject {}}(10, 4) == 40);
    TrivialExampleFunctionObject obj;
    REQUIRE(function<int(TrivialExampleFunctionObject*, int, int)> {&TrivialExampleFunctionObject::Multiply}(&obj, 10, 5) == 50);
    REQUIRE(AllocCount == 0);
    REQUIRE(DeallocCount == 0);
  }

  {
    REQUIRE(function<int()> {BigExampleFunctionObject {}}() == 1);
    REQUIRE(AllocCount == 1);
    REQUIRE(DeallocCount == 1);
    REQUIRE(function<int(int)> {BigExampleFunctionObject {}}(10) == 20);
    REQUIRE(AllocCount == 2);
    REQUIRE(DeallocCount == 2);
    REQUIRE(function<int(int, int)> {BigExampleFunctionObject {}}(10, 4) == 40);
    REQUIRE(AllocCount == 3);
    REQUIRE(DeallocCount == 3);
    BigExampleFunctionObject obj;
    REQUIRE(function<int(BigExampleFunctionObject*, int, int)> {&BigExampleFunctionObject::Multiply}(&obj, 10, 5) == 50);
    REQUIRE(AllocCount == 3);
    REQUIRE(DeallocCount == 3);
    AllocCount = 0;
    DeallocCount = 0;
  }

  // Non-capturing lambda expressions
  {
    REQUIRE(function<int()> {[]() {
              return 5;
            }}() == 5);
    REQUIRE(AllocCount == 0);
    REQUIRE(DeallocCount == 0);
  }

  {
    REQUIRE(function<int(int)> {[](int arg) {
              return 2 * arg;
            }}(5) == 10);
    REQUIRE(AllocCount == 0);
    REQUIRE(DeallocCount == 0);
  }

  {
    // Polymorphic lambdas work without allocation, too
    REQUIRE(function<int(int)> {[](auto arg, auto...) {
              return 2 * arg;
            }}(5) == 10);
    REQUIRE(AllocCount == 0);
    REQUIRE(DeallocCount == 0);
  }

  // Test small capture lists
  {
    std::array<char, 2> DataBlock;
    auto small_lambda = [DataBlock]() {
      (void)DataBlock;
      return 5;
    };
    static_assert(std::is_copy_constructible_v<decltype(small_lambda)>);
    if (std::is_nothrow_constructible_v<std::function<int()>, decltype(small_lambda)>) {
      REQUIRE(function<int()> {small_lambda}() == 5);
      REQUIRE(AllocCount == 0);
      REQUIRE(DeallocCount == 0);
    } else {
      printf("Warning: Skipping small-capture lambda test since std::function doesn't optimize it\n");
    }
  }

  // Test large capture lists
  {
    std::array<char, 256> data_block;
    {
      REQUIRE(function<int()> {[data_block]() {
                (void)data_block;
                return 5;
              }}() == 5);
      REQUIRE(AllocCount == 1);
      REQUIRE(DeallocCount == 1);
      AllocCount = 0;
      DeallocCount = 0;
    }

    // Move construction
    {
      {
        function<int()> func {[data_block]() {
          (void)data_block;
          return 5;
        }};
        REQUIRE(AllocCount == 1);
        REQUIRE(DeallocCount == 0);
        REQUIRE(function<int()> {std::move(func)}() == 5);
        REQUIRE(!func);
        REQUIRE(AllocCount == 1);
        REQUIRE(DeallocCount == 1);
        // Scope end triggers destruction of moved-from func
      }
      REQUIRE(AllocCount == 1);
      REQUIRE(DeallocCount == 1);
      AllocCount = 0;
      DeallocCount = 0;
    }

    // Move assignment
    {
      {
        function<int()> func {[data_block]() {
          (void)data_block;
          return 5;
        }};
        function<int()> func2;
        REQUIRE(AllocCount == 1);
        REQUIRE(DeallocCount == 0);
        REQUIRE((func2 = std::move(func))() == 5);
        REQUIRE(!func);
        REQUIRE(AllocCount == 1);
        REQUIRE(DeallocCount == 0);
        // Scope end triggers destruction of func2 and moved-from func
      }
      REQUIRE(AllocCount == 1);
      REQUIRE(DeallocCount == 1);
      AllocCount = 0;
      DeallocCount = 0;
    }
  }

  // Destructors
  {
    int StructDtorCount = 0;
    {
      std::array<char, 200> data;
      struct StructWithDestructor {
        int& StructDtorCount;

        // fextl::function is an arbitrary choice here.
        // We just need any move-only, nullable, non-allocating member type.
        function<void()> Member = []() {
        };

        StructWithDestructor(int& StructDtorCount)
          : StructDtorCount(StructDtorCount) {}
        StructWithDestructor(StructWithDestructor&& other) = default;
        ~StructWithDestructor() {
          if (Member) {
            ++StructDtorCount;
          }
        }

        void operator()() {};
      };

      function<int()> func {[obj = StructWithDestructor {StructDtorCount}, data]() {
        (void)data;
        return 5;
      }};
      REQUIRE(AllocCount == 1);
      REQUIRE(DeallocCount == 0);
      REQUIRE(StructDtorCount == 0);
      REQUIRE(func() == 5);
      REQUIRE(StructDtorCount == 0);
      func = nullptr;
      REQUIRE(AllocCount == 1);
      REQUIRE(DeallocCount == 1);
      REQUIRE(StructDtorCount == 1);
      // Scope end triggers destruction of func2 and moved-from func
    }
    REQUIRE(StructDtorCount == 1);
    REQUIRE(AllocCount == 1);
    REQUIRE(DeallocCount == 1);
    AllocCount = 0;
    DeallocCount = 0;
  }

  PrerunSucceeded = true;
}


================================================
FILE: unittests/ASM/3DNow/0C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x3f800000bf800000",
    "MM1":  "0x43000000c3000000",
    "MM2":  "0xc700000046fffe00",
    "MM3":  "0x0" 
  },
  "HostFeatures": ["3DNOW"]
}
%endif

pi2fw mm0, [rel data1]
pi2fw mm1, [rel data2]
pi2fw mm2, [rel data3]
pi2fw mm3, [rel data4]

hlt

align 8
data1:
dw -1
dw 0xFF
dw 1
dw 0xFF

data2:
dw -128
dw 0xFFFF
dw 128
dw 0xFFFF

data3:
dw 0x7FFF
dw 0x4242
dw 0x8000
dw 0x5252

data4:
dw 0x0
dw 0x1
dw 0x0
dw 0x2


================================================
FILE: unittests/ASM/3DNow/0D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x3f800000bf800000",
    "MM1":  "0x43000000c3000000",
    "MM2":  "0xbf8000003f800000",
    "MM3":  "0x0"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

pi2fd mm0, [rel data1]
pi2fd mm1, [rel data2]
pi2fd mm2, [rel data3]
pi2fd mm3, [rel data4]

hlt

align 8
data1:
dd -1
dd 1

data2:
dd -128
dd 128

data3:
dd 1
dd -1

data4:
dd 0x0
dd 0x0


================================================
FILE: unittests/ASM/3DNow/0E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFF"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

; Load all x87 registers
finit
fldz
fldz
fldz
fldz
fldz
fldz
fldz

; femms sets all the tag bits to 0b11
femms

mov rdx, 0xe0000000
o32 fstenv [rdx]

mov eax, 0
mov ax, word [rdx + 8] ; Offset 8 in the structure has FTW

hlt


================================================
FILE: unittests/ASM/3DNow/1C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00000001FFFFFFFF",
    "MM1": "0x00000080FFFFFF80",
    "MM2": "0xFFFF800000007FFF",
    "MM3": "0x0"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

pi2fw mm0, [rel data1]
pi2fw mm1, [rel data2]
pi2fw mm2, [rel data3]
pi2fw mm3, [rel data4]

pf2iw mm0, mm0
pf2iw mm1, mm1
pf2iw mm2, mm2
pf2iw mm3, mm3

hlt

align 8
data1:
dw -1
dw 0xFF
dw 1
dw 0xFF

data2:
dw -128
dw 0xFFFF
dw 128
dw 0xFFFF

data3:
dw 0x7FFF
dw 0x4242
dw 0x8000
dw 0x5252

data4:
dw 0x0
dw 0x1
dw 0x0
dw 0x2


================================================
FILE: unittests/ASM/3DNow/1D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x00000001FFFFFFFF",
    "MM1":  "0x00000080FFFFFF80",
    "MM2":  "0xFFFFFFFF00000001",
    "MM3":  "0x0"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

pi2fd mm0, [rel data1]
pi2fd mm1, [rel data2]
pi2fd mm2, [rel data3]
pi2fd mm3, [rel data4]

pf2id mm0, mm0
pf2id mm1, mm1
pf2id mm2, mm2
pf2id mm3, mm3

hlt

align 8
data1:
dd -1
dd 1

data2:
dd -128
dd 128

data3:
dd 1
dd -1

data4:
dd 0x0
dd 0x0


================================================
FILE: unittests/ASM/3DNow/86.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "1",
    "MM3": "0xff8000007f800000"
  },
  "HostFeatures": ["3DNOW", "EMMI"]
}
%endif

%include "checkprecision.mac"

section .text
global _start

_start:
pfrcpv mm0, [rel data1]
pfrcpv mm1, [rel data2]
pfrcpv mm2, [rel data3]
pfrcpv mm3, [rel data4]

; All calculated
; Now we extract all the values into memory to call check_relerr.
movd edx, mm0
mov [rel result11], edx

psrlq mm0, 32
movd edx, mm0
mov [rel result12], edx

movd edx, mm1
mov [rel result21], edx

psrlq mm1, 32
movd edx, mm1
mov [rel result22], edx

movd edx, mm2
mov [rel result31], edx

psrlq mm2, 32
movd edx, mm2
mov [rel result32], edx

check_relerr rel eresult11, rel result11, rel tolerance
mov ebx, eax
check_relerr rel eresult12, rel result12, rel tolerance
and ebx, eax
check_relerr rel eresult21, rel result21, rel tolerance
and ebx, eax
check_relerr rel eresult22, rel result22, rel tolerance
and ebx, eax
check_relerr rel eresult31, rel result31, rel tolerance
and ebx, eax
check_relerr rel eresult32, rel result32, rel tolerance
and ebx, eax

hlt

align 4096
result11: dd 0
result12: dd 0
result21: dd 0
result22: dd 0
result31: dd 0
result32: dd 0

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 1.0
dd -1.0

data4:
dd 0.0
dd -0.0

eresult11:
dd -1.0
eresult12:
dd 1.0
eresult21:
dd 0xbc000000 ; -1/128
eresult22:
dd 0x3c000000 ; 1/128
eresult31:
dd 1.0
eresult32:
dd -1.0

tolerance:
dd 0x38800000 ; 2^-14 - 14bit accuracy

define_check_data_constants


================================================
FILE: unittests/ASM/3DNow/87.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "1"
  },
  "HostFeatures": ["3DNOW", "EMMI"]
}
%endif

%include "checkprecision.mac"

pfrsqrtv mm0, [rel data1]
pfrsqrtv mm1, [rel data2]
pfrsqrtv mm2, [rel data3]

; All calculated
; Now we extract all the values into memory to call check_relerr.
movd edx, mm0
mov [rel result11], edx

psrlq mm0, 32
movd edx, mm0
mov [rel result12], edx

movd edx, mm1
mov [rel result21], edx

psrlq mm1, 32
movd edx, mm1
mov [rel result22], edx

movd edx, mm2
mov [rel result31], edx

psrlq mm2, 32
movd edx, mm2
mov [rel result32], edx

check_relerr rel eresult11, rel result11, rel tolerance
mov ebx, eax
check_relerr rel eresult12, rel result12, rel tolerance
and ebx, eax
check_relerr rel eresult21, rel result21, rel tolerance
and ebx, eax
check_relerr rel eresult22, rel result22, rel tolerance
and ebx, eax
check_relerr rel eresult31, rel result31, rel tolerance
and ebx, eax
check_relerr rel eresult32, rel result32, rel tolerance
and ebx, eax

hlt

align 4096
result11: dd 0
result12: dd 0
result21: dd 0
result22: dd 0
result31: dd 0
result32: dd 0

align 32
data1:
dd 1.0
dd 16.0

data2:
dd 4.0
dd 25.0

data3:
dd 9.0
dd 1.0

eresult11:
dd 0x3f800000 ; 1.0
eresult12:
dd 0x3e800000 ; 1/4 = 0.25
eresult21:
dd 0x3f000000 ; 1/2 = 0.5
eresult22:
dd 0x3e4ccccd ; 1/5 = 0.2
eresult31:
dd 0x3eaaaaab ; 1/3 = 0.(3)
eresult32:
dd 0x3f800000 ; 1.0

tolerance:
dd 0x38000000 ; 2^-15 - accurate to 15bits

define_check_data_constants


================================================
FILE: unittests/ASM/3DNow/8A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x44000000c0000000",
    "MM1": "0x44800000c3800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfnacc mm0, [rel data3]
pfnacc mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/8E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00000000c0000000",
    "MM1": "0x00000000c3800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfpnacc mm0, [rel data3]
pfpnacc mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/90.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0xFFFFFFFF00000000",
    "MM2": "0xFFFFFFFFFFFFFFFF",
    "MM3": "0x00000000FFFFFFFF"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

; False, False
; 0.0 >= 1.0
; 0.0 >= 1.0
pfcmpge mm0, [rel data1]
; False, True
; 0.0 >= 1.0
; 1.0 >= 1.0
pfcmpge mm1, [rel data2]
; True, True
; -1.0 >= -1.0
; 0.0 >= 0.0
pfcmpge mm2, [rel data3]

; True, False
; 1.0 >= 0.0
; 0.0 >= 1.0
pfcmpge mm3, [rel data4]

hlt

align 8
data1:
dd 1.0
dd 1.0

data2:
dd 1.0
dd 1.0

data3:
dd -1.0
dd 0.0

data4:
dd 0.0
dd 1.0

data5:
dd 0.0
dd 0.0

data6:
dd 0.0
dd 1.0

data7:
dd -1.0
dd 0.0

data8:
dd 1.0
dd 0.0


================================================
FILE: unittests/ASM/3DNow/94.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x3f80000000000000",
    "MM2": "0x00000000bf800000",
    "MM3": "0x0"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

pfmin mm0, [rel data1]
pfmin mm1, [rel data2]
pfmin mm2, [rel data3]
pfmin mm3, [rel data4]

hlt

align 8
data1:
dd 1.0
dd 1.0

data2:
dd 1.0
dd 1.0

data3:
dd -1.0
dd 0.0

data4:
dd 0.0
dd 1.0

data5:
dd 0.0
dd 0.0

data6:
dd 0.0
dd 1.0

data7:
dd -1.0
dd 0.0

data8:
dd 1.0
dd 0.0


================================================
FILE: unittests/ASM/3DNow/96.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "1",
    "RDX": "1",
    "MM0":  "0x7f8000007f800000",
    "MM1":  "0xff800000ff800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

%include "checkprecision.mac"

; For each operation:
; * We check precision (except when checking exact values), for 0.0 and -0.0.
; * Check that top and bottom of register has the same value.

section .text
global _start

_start:
pfrcp mm0, [rel data1]

; Precision
movd [rel result], mm0
check_relerr rel eresult1, rel result, rel tolerance
movzx rdx, al
; Duplicate top/bottom
movq mm1, mm0
psrlq mm1, 32
pcmpeqd mm0, mm1
movd eax, mm0
and al, 1
movzx rcx, al

pfrcp mm0, [rel data2]

; Precision
movd [rel result], mm0
check_relerr rel eresult2, rel result, rel tolerance
and rdx, rax
; Duplicate top/bottom
movq mm1, mm0
psrlq mm1, 32
pcmpeqd mm0, mm1
movd eax, mm0
and al, 1
and rcx, rax

pfrcp mm0, [rel data3]

; Precision
movd [rel result], mm0
check_relerr rel eresult3, rel result, rel tolerance
and rdx, rax
; Duplicate top/bottom
movq mm1, mm0
psrlq mm1, 32
pcmpeqd mm0, mm1
movd eax, mm0
and al, 1
and rcx, rax

; ; Expecting exact results for +inf and -inf
pfrcp mm0, [rel data4]
pfrcp mm1, [rel data5]

hlt

align 4096
result: dd 0

align 8
data1:
dd -1.0
dd 1.0

eresult1:
dd -1.0

data2:
dd -128.0
dd 128.0

eresult2:
dd 0xbc000000

data3:
dd 1.0
dd -1.0

eresult3:
dd 1.0

data4:
dd 0.0
dd 1.0

data5:
dd -0.0
dd 1.0

tolerance:
dd 0x38800000 ; 2^-14 - 14bit accuracy

define_check_data_constants


================================================
FILE: unittests/ASM/3DNow/97.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "1",
    "RCX":  "1",
    "RDX":  "1",
    "MM4":  "0x7f8000007f800000",
    "MM5":  "0xff800000ff800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

%include "checkprecision.mac"

section .text
global _start

; From the isa manual, one thing to consider is that
; "Negative operands are treated as positive operands for purposes of
; reciprocal square-root computation, with the sign of the result the
; same as the sign of the source operand."

_start:
pfrsqrt mm0, [rel data1]
movd [rel result1], mm0
check_relerr rel eresult1, rel result1, rel tolerance
movzx rdx, al

pfrsqrt mm1, [rel data2]
movd [rel result2], mm1
check_relerr rel eresult2, rel result2, rel tolerance
movzx rcx, al

pfrsqrt mm2, [rel data3]
movd [rel result3], mm2
check_relerr rel eresult3, rel result3, rel tolerance
movzx rbx, al

pfrsqrt mm3, [rel data4] ; pfrsqrt(-1.0) == -1.0
movd [rel result4], mm3
check_relerr rel eresult4, rel result4, rel tolerance
movzx rax, al

; Expecting exact results
pfrsqrt mm4, [rel data5] ; pfrsqrt(0.0) == inf
pfrsqrt mm5, [rel data6] ; pfrsqrt(-0.0) == -inf
hlt

align 4096
result1: times 32 db 0
result2: times 32 db 0
result3: times 32 db 0
result4: times 32 db 0

align 32
data1:
dd 1.0
dd 16.0

eresult1: ; expected
dd 1.0

data2:
dd 4.0
dd 25.0

eresult2: ; expected
dd 0.5

data3:
dd 9.0
dd 1.0

eresult3: ; expected
dd 0x3eaaaaab ; 1/3

data4:
dd -1.0
dd -16.0

eresult4: ; expected
dd -1.0

data5:
dd 0.0
dd -9.0

data6:
dd -0.0
dd -9.0

tolerance:
dd 0x38000000 ; 2^-15 - accurate to 15bits

define_check_data_constants


================================================
FILE: unittests/ASM/3DNow/9A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x43808000c3808000",
    "MM1": "0x44200000c4200000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfsub mm0, [rel data3]
pfsub mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/9E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xc37f0000437f0000",
    "MM1": "0xc3c0000043c00000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfadd mm0, [rel data3]
pfadd mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/A0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0xFFFFFFFF00000000", 
    "MM2": "0xFFFFFFFFFFFFFFFF",
    "MM3": "0x00000000FFFFFFFF"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

; False, False
; 0.0 > 1.0
; 0.0 > 1.0
pfcmpgt mm0, [rel data1]

; False, True
; 0.0 > 1.0
; 2.0 > 1.0
pfcmpgt mm1, [rel data2]

; True, True
; -1.0 > -2.0
; 0.0 > -1.0

pfcmpgt mm2, [rel data3]

; True, False
; 1.0 > 0.0
; 0.0 > 1.0
pfcmpgt mm3, [rel data4]

hlt

align 8
data1:
dd 1.0
dd 1.0

data2:
dd 1.0
dd 1.0

data3:
dd -2.0
dd -1.0

data4:
dd 0.0
dd 1.0

data5:
dd 0.0
dd 0.0

data6:
dd 0.0
dd 2.0

data7:
dd -1.0
dd 0.0

data8:
dd 1.0
dd 0.0


================================================
FILE: unittests/ASM/3DNow/A4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x3f8000003f800000",
    "MM1": "0x3f8000003f800000",
    "MM2": "0x00000000bf800000",
    "MM3": "0x3f8000003f800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

pfmax mm0, [rel data1]
pfmax mm1, [rel data2]
pfmax mm2, [rel data3]
pfmax mm3, [rel data4]

hlt

align 8
data1:
dd 1.0
dd 1.0

data2:
dd 1.0
dd 1.0

data3:
dd -1.0
dd 0.0

data4:
dd 0.0
dd 1.0

data5:
dd 0.0
dd 0.0

data6:
dd 0.0
dd 1.0

data7:
dd -1.0
dd 0.0

data8:
dd 1.0
dd 0.0


================================================
FILE: unittests/ASM/3DNow/A6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x9192939481828384",
    "MM1": "0xB1B2B3B4A1A2A3A4"
  },
  "HostFeatures": ["3DNOW", "EMMI"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

; Legitimate to implement as a move if the rsqrt or recip instruction does the full calculation
pfrcpit1 mm0, [rel data3]
pfrcpit1 mm1, [rel data4]

hlt

align 8
data1:
dd 0x41424344
dd 0x51525354

data2:
dd 0x61626364
dd 0x71727374

data3:
dd 0x81828384
dd 0x91929394

data4:
dd 0xA1A2A3A4
dd 0xB1B2B3B4


================================================
FILE: unittests/ASM/3DNow/A7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x9192939481828384",
    "MM1":  "0xB1B2B3B4A1A2A3A4"
  },
  "HostFeatures": ["3DNOW", "EMMI"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

; Legitimate to implement as a move if the rsqrt or recip instruction does the full calculation
pfrsqit1 mm0, [rel data3]
pfrsqit1 mm1, [rel data4]

hlt

align 8
data1:
dd 0x41424344
dd 0x51525354

data2:
dd 0x61626364
dd 0x71727374

data3:
dd 0x81828384
dd 0x91929394

data4:
dd 0xA1A2A3A4
dd 0xB1B2B3B4


================================================
FILE: unittests/ASM/3DNow/AA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xc380800043808000",
    "MM1": "0xc420000044200000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfsubr mm0, [rel data3]
pfsubr mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/AE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfacc mm0, [rel data3]
pfacc mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/B0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0xFFFFFFFF00000000",
    "MM2": "0xFFFFFFFFFFFFFFFF",
    "MM3": "0x00000000FFFFFFFF"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

; False, False
; 0.0 == 1.0
; 0.0 == 1.0
pfcmpeq mm0, [rel data1]

; False, True
; 0.0 == 1.0
; 1.0 == 1.0
pfcmpeq mm1, [rel data2]

; True, True
; -2.0 == -2.0
; -1.0 == -1.0

pfcmpeq mm2, [rel data3]

; True, False
; 0.0 == 0.0
; 0.0 == 1.0
pfcmpeq mm3, [rel data4]

hlt

align 8
data1:
dd 1.0
dd 1.0

data2:
dd 1.0
dd 1.0

data3:
dd -2.0
dd -1.0

data4:
dd 0.0
dd 1.0

data5:
dd 0.0
dd 0.0

data6:
dd 0.0
dd 1.0

data7:
dd -2.0
dd -1.0

data8:
dd 0.0
dd 0.0


================================================
FILE: unittests/ASM/3DNow/B4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xc3800000c3800000",
    "MM1": "0xc7800000c7800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

pfmul mm0, [rel data3]
pfmul mm1, [rel data4]

hlt

align 8
data1:
dd -1.0
dd 1.0

data2:
dd -128.0
dd 128.0

data3:
dd 256.0
dd -256.0

data4:
dd 512.0
dd -512.0


================================================
FILE: unittests/ASM/3DNow/B6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x9192939481828384",
    "MM1": "0xB1B2B3B4A1A2A3A4"
  },
  "HostFeatures": ["3DNOW", "EMMI"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

; Legitimate to implement as a move if the rsqrt or recip instruction does the full calculation
pfrcpit2 mm0, [rel data3]
pfrcpit2 mm1, [rel data4]

hlt

align 8
data1:
dd 0x41424344
dd 0x51525354

data2:
dd 0x61626364
dd 0x71727374

data3:
dd 0x81828384
dd 0x91929394

data4:
dd 0xA1A2A3A4
dd 0xB1B2B3B4


================================================
FILE: unittests/ASM/3DNow/B7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0",
    "MM2": "0x3fff000100000001",
    "MM3": "0x0000000200000004"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data5]
movq mm1, [rel data6]
movq mm2, [rel data7]
movq mm3, [rel data8]

; nasm doesn't support emitting this instruction
; pmulhrw mm0, [rel data1]
db 0x0f, 0x0f, 0x05, 0x1c, 0x00, 0x00, 0x00, 0xB7
; pmulhrw mm1, [rel data2]
db 0x0f, 0x0f, 0x0d, 0x1c, 0x00, 0x00, 0x00, 0xB7
; pmulhrw mm2, [rel data3]
db 0x0f, 0x0f, 0x15, 0x1c, 0x00, 0x00, 0x00, 0xB7
; pmulhrw mm3, [rel data4]
db 0x0f, 0x0f, 0x1d, 0x1c, 0x00, 0x00, 0x00, 0xB7

hlt

align 8
data1:
  dw 0x0
  dw 0x0
  dw 0x0
  dw 0x0

data2:
  dw 0x1
  dw 0x1
  dw 0x1
  dw 0x1

data3:
  dw 0x2
  dw 0x2
  dw 0x2
  dw 0x7FFF

data4:
dw 0x10
dw 0x4
dw 0x8
dw 0x8

data5:
  dw 0x0
  dw 0x0
  dw 0x0
  dw 0x0

data6:
  dw 0x1
  dw 0x1
  dw 0x1
  dw 0x1

data7:
dw 0x7FFF
dw 0x2
dw 0x7FFF
dw 0x7FFF

data8:
dw 0x3E80
dw 0x4
dw 0x3E80
dw 0x4


================================================
FILE: unittests/ASM/3DNow/BB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x8182838491929394",
    "MM1":  "0xA1A2A3A4B1B2B3B4"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

movq mm0, [rel data1]
movq mm1, [rel data2]

; Legitimate to implement as a move if the rsqrt or recip instruction does the full calculation
pswapd mm0, [rel data3]
pswapd mm1, [rel data4]

hlt

align 8
data1:
dd 0x41424344
dd 0x51525354

data2:
dd 0x61626364
dd 0x71727374

data3:
dd 0x81828384
dd 0x91929394

data4:
dd 0xA1A2A3A4
dd 0xB1B2B3B4


================================================
FILE: unittests/ASM/3DNow/BF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2179b0697d5378c4",
    "MM1": "0x1ed68638699d35ca",
    "MM2": "0x165c42291f28194c",
    "MM3": "0x2179b0697d5378c4",
    "MM4": "0x1ed68638699d35ca",
    "MM5": "0x165c42291f28194c"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x2bb883523d4f3197
mov [rdx + 8 * 0], rax
mov rax, 0x1246c77764260189
mov [rdx + 8 * 1], rax

mov rax, 0x163add80bc57bef1
mov [rdx + 8 * 2], rax
mov rax, 0x64d615e5b405a306
mov [rdx + 8 * 3], rax

mov rax, 0x11f4881d94eb39fc
mov [rdx + 8 * 4], rax
mov rax, 0xa9162248f2d0a23a
mov [rdx + 8 * 5], rax

mov rax, 0x0
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

pavgusb mm0, mm6
pavgusb mm1, mm7

movq mm7, [rdx + 8 * 6]
pavgusb mm2, mm7

pavgusb mm3, [rdx + 8 * 2]
pavgusb mm4, [rdx + 8 * 4]
pavgusb mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/Atomics/adc_atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464848",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock adc word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock adc word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock adc word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock adc word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock adc word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/adc_atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock adc dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock adc dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock adc dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock adc dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/adc_atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434445464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock adc qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock adc qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock adc qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/neg_atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbf4243bbbbb9b948",
    "RBX": "0xbf424344454647b7",
    "RCX": "0x41424344454647b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbf42434445464748",
    "RDI": "0x41424344454647b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 1 byte offset within 4byte boundary
lock neg word [r15 + 8 * 0 + 1]

; Test 3 byte offset across 4byte boundary
lock neg word [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock neg word [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock neg word [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock neg word [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/neg_atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbfbdbcbbbb464748",
    "RBX": "0xbf42434445b9b8b7",
    "RCX": "0x4142434445b9b8b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbf42434445464748",
    "RDI": "0x4142434445b9b8b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 3 byte offset across 4byte boundary
lock neg dword [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock neg dword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock neg dword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock neg dword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/neg_atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbf42434445464748",
    "RBX": "0xbfbdbcbbbab9b8b7",
    "RCX": "0x41bdbcbbbab9b8b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbf42434445464748",
    "RDI": "0x41bdbcbbbab9b8b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 7 byte offset across 8byte boundary
lock neg qword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock neg qword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock neg qword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/not_atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbe4243bbbab9b848",
    "RBX": "0xbe424344454647b7",
    "RCX": "0x41424344454647b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbe42434445464748",
    "RDI": "0x41424344454647b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 1 byte offset within 4byte boundary
lock not word [r15 + 8 * 0 + 1]

; Test 3 byte offset across 4byte boundary
lock not word [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock not word [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock not word [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock not word [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/not_atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbebdbcbbba464748",
    "RBX": "0xbe42434445b9b8b7",
    "RCX": "0x4142434445b9b8b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbe42434445464748",
    "RDI": "0x4142434445b9b8b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 3 byte offset across 4byte boundary
lock not dword [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock not dword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock not dword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock not dword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/not_atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbe42434445464748",
    "RBX": "0xbebdbcbbbab9b8b7",
    "RCX": "0x41bdbcbbbab9b8b7",
    "RDX": "0x4142434445464748",
    "RSI": "0xbe42434445464748",
    "RDI": "0x41bdbcbbbab9b8b7"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 7 byte offset across 8byte boundary
lock not qword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock not qword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock not qword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/sbb_atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464648",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock sbb word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock sbb word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock sbb word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock sbb word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock sbb word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/sbb_atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock sbb dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock sbb dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock sbb dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock sbb dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Atomics/sbb_atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434445464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock sbb qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock sbb qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock sbb qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/CALL.asm
================================================
%ifdef CONFIG
{
  "Ignore": [],
  "RegData": {
    "RAX": "1",
    "RBX": "2"
  }
}
%endif

jmp label
label:

mov rsp, 0xe8000000

; Test direct literal call
call function

; Move the absolute address of function2 in to rbx and call it
lea rbx, [rel function2]
call rbx

hlt

function:
mov rax, 1
ret

function2:
mov rbx, 2
ret

hlt


================================================
FILE: unittests/ASM/CMakeLists.txt
================================================
enable_language(ASM_NASM)
if(NOT CMAKE_ASM_NASM_COMPILER_LOADED)
  error("Failed to find NASM compatible assembler!")
endif()

# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE ASM_SOURCES CONFIGURE_DEPENDS *.asm)

set(ASM_DEPENDS "")

execute_process(COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/ClassifyCPU.py"
  OUTPUT_STRIP_TRAILING_WHITESPACE
  OUTPUT_VARIABLE CPU_CLASS)

foreach(ASM_SRC ${ASM_SOURCES})
  file(RELATIVE_PATH REL_ASM ${CMAKE_SOURCE_DIR} ${ASM_SRC})
  file(RELATIVE_PATH REL_TEST_ASM ${CMAKE_CURRENT_SOURCE_DIR} ${ASM_SRC})
  get_filename_component(ASM_NAME ${ASM_SRC} NAME)
  get_filename_component(ASM_DIR "${REL_ASM}" DIRECTORY)
  set(OUTPUT_ASM_FOLDER "${CMAKE_BINARY_DIR}/${ASM_DIR}")

  # Generate build directory
  file(MAKE_DIRECTORY "${OUTPUT_ASM_FOLDER}")

  # Generate a temporary file
  set(ASM_TMP "${ASM_NAME}_TMP.asm")
  set(TMP_FILE "${OUTPUT_ASM_FOLDER}/${ASM_TMP}")

  add_custom_command(OUTPUT ${TMP_FILE}
    DEPENDS "${ASM_SRC}"
    COMMAND "cp" ARGS "${ASM_SRC}" "${TMP_FILE}"
    COMMAND "sed" ARGS "-i" "-e" "\'1s;^;BITS 64\\n;\'" "-e" "\'\$\$a\\ret\\n\'" "${TMP_FILE}")

  set(OUTPUT_NAME "${OUTPUT_ASM_FOLDER}/${ASM_NAME}.bin")
  set(OUTPUT_CONFIG_NAME "${OUTPUT_ASM_FOLDER}/${ASM_NAME}.config.bin")

  add_custom_command(OUTPUT ${OUTPUT_NAME}
    DEPENDS "${TMP_FILE}"
    COMMAND "nasm" ARGS "-i" "${CMAKE_SOURCE_DIR}/unittests/ASM/Includes/" "${TMP_FILE}" "-o" "${OUTPUT_NAME}")

  add_custom_command(OUTPUT ${OUTPUT_CONFIG_NAME}
    DEPENDS "${ASM_SRC}"
    DEPENDS "${CMAKE_SOURCE_DIR}/Scripts/json_asm_config_parse.py"
    DEPENDS "${CMAKE_SOURCE_DIR}/Scripts/json_config_parse.py"
    COMMAND "python3" ARGS "${CMAKE_SOURCE_DIR}/Scripts/json_asm_config_parse.py" "${ASM_SRC}" "${OUTPUT_CONFIG_NAME}")

  list(APPEND ASM_DEPENDS "${OUTPUT_NAME};${OUTPUT_CONFIG_NAME}")

  # Format is "<Test Arguments>" "<Test Name>" "<Test Type>"

  set(TEST_ARGS)
  if (ARCHITECTURE_arm64 OR ENABLE_VIXL_SIMULATOR)
    list(APPEND TEST_ARGS
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=1 FEX_MULTIBLOCK=0 FEX_TSOENABLED=0"   "jit_1"     "jit"
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=500 FEX_MULTIBLOCK=0 FEX_TSOENABLED=0" "jit_500"   "jit"
      "FEX_SILENTLOG=0 FEX_DUMPGPRS=1 FEX_MAXINST=500 FEX_MULTIBLOCK=1 FEX_TSOENABLED=0" "jit_500_m" "jit")
  endif()

  if (ENABLE_VIXL_SIMULATOR)
    set(CPU_CLASS Simulator)
  elseif (ARCHITECTURE_x86_64)
    list(APPEND TEST_ARGS "FEX_SILENTLOG=0 FEX_DUMPGPRS=1" "host" "host")
  endif()

  if (NOT MINGW)
    set(LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
  else()
    set(LAUNCH_PROGRAM "wine" "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner.exe")
  endif()

  list(LENGTH TEST_ARGS ARG_COUNT)
  math(EXPR ARG_COUNT "${ARG_COUNT}-1")
  foreach(Index RANGE 0 ${ARG_COUNT} 3)
    math(EXPR TEST_NAME_INDEX "${Index}+1")
    math(EXPR TEST_TYPE_INDEX "${Index}+2")

    list(GET TEST_ARGS ${Index} FEX_ARGS)
    list(GET TEST_ARGS ${TEST_NAME_INDEX} TEST_DESC)
    list(GET TEST_ARGS ${TEST_TYPE_INDEX} TEST_TYPE)

    set(TEST_NAME "${TEST_DESC}/Test_64Bit_${REL_TEST_ASM}")
    string(REPLACE " " ";" FEX_ARGS_LIST ${FEX_ARGS})

    if (TEST_NAME MATCHES "SelfModifyingCode")
      list(APPEND FEX_ARGS_LIST "FEX_SMCCHECKS=full")
    endif()

    add_test(NAME ${TEST_NAME}
      COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/testharness_runner.py"
      "${CMAKE_SOURCE_DIR}/unittests/ASM/Known_Failures"
      "${CMAKE_SOURCE_DIR}/unittests/ASM/Known_Failures_${TEST_TYPE}"
      "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests"
      "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests_${TEST_TYPE}"
      "${CMAKE_SOURCE_DIR}/unittests/ASM/Disabled_Tests_${CPU_CLASS}"
      "Test_${REL_TEST_ASM}"
      "${TEST_NAME}"
      ${LAUNCH_PROGRAM}
      "${OUTPUT_NAME}" "${OUTPUT_CONFIG_NAME}")
    # This will cause the ASM tests to fail if it can't find the TestHarness or ASMN files
    # Prety crap way to work around the fact that tests can't have a build dependency in a different directory
    # Just make sure to independently run `make all` then `make test`
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${OUTPUT_NAME}")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${OUTPUT_CONFIG_NAME}")
    set_property(TEST ${TEST_NAME} APPEND PROPERTY SKIP_RETURN_CODE 125)
    set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT ${FEX_ARGS_LIST})
    if (MINGW)
      # Ensure the DOS region can be allocated.
      set_property(TEST ${TEST_NAME} APPEND PROPERTY ENVIRONMENT "WINEPRELOADRESERVE=10000-110000")
    endif()
  endforeach()

endforeach()

add_custom_target(asm_files ALL DEPENDS "${ASM_DEPENDS}")

add_custom_target(64bit_asm_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  DEPENDS asm_files
  DEPENDS "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner"
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*64Bit\.*.asm$$")

add_custom_target(asm_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  DEPENDS asm_files
  DEPENDS 32bit_asm_files
  DEPENDS "${CMAKE_BINARY_DIR}/Bin/TestHarnessRunner"
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.asm$$")


================================================
FILE: unittests/ASM/ConstProp/ConstPooling.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0xf"
  }
}
%endif

mov rdx, 0xe0000000

; generate a lot of re-used constants
mov dword[rdx], 0x0
mov dword[rdx], 0x1
mov dword[rdx], 0x2
mov dword[rdx], 0x3
mov dword[rdx], 0x4
mov dword[rdx], 0x5
mov dword[rdx], 0x6
mov dword[rdx], 0x7
mov dword[rdx], 0x8
mov dword[rdx], 0x9
mov dword[rdx], 0xa
mov dword[rdx], 0xb
mov dword[rdx], 0xc
mov dword[rdx], 0xd
mov dword[rdx], 0xe
mov dword[rdx], 0xf
mov dword[rdx], 0x0
mov dword[rdx], 0x1
mov dword[rdx], 0x2
mov dword[rdx], 0x3
mov dword[rdx], 0x4
mov dword[rdx], 0x5
mov dword[rdx], 0x6
mov dword[rdx], 0x7
mov dword[rdx], 0x8
mov dword[rdx], 0x9
mov dword[rdx], 0xa
mov dword[rdx], 0xb
mov dword[rdx], 0xc
mov dword[rdx], 0xd
mov dword[rdx], 0xe
mov dword[rdx], 0xf
mov dword[rdx], 0x0
mov dword[rdx], 0x1
mov dword[rdx], 0x2
mov dword[rdx], 0x3
mov dword[rdx], 0x4
mov dword[rdx], 0x5
mov dword[rdx], 0x6
mov dword[rdx], 0x7
mov dword[rdx], 0x8
mov dword[rdx], 0x9
mov dword[rdx], 0xa
mov dword[rdx], 0xb
mov dword[rdx], 0xc
mov dword[rdx], 0xd
mov dword[rdx], 0xe
mov dword[rdx], 0xf
mov dword[rdx], 0x0
mov dword[rdx], 0x1
mov dword[rdx], 0x2
mov dword[rdx], 0x3
mov dword[rdx], 0x4
mov dword[rdx], 0x5
mov dword[rdx], 0x6
mov dword[rdx], 0x7
mov dword[rdx], 0x8
mov dword[rdx], 0x9
mov dword[rdx], 0xa
mov dword[rdx], 0xb
mov dword[rdx], 0xc
mov dword[rdx], 0xd
mov dword[rdx], 0xe
mov dword[rdx], 0xf

mov eax, dword[rdx]

hlt

================================================
FILE: unittests/ASM/DAZTest.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AFP", "AVX"],
  "RegData": {
    "XMM0": ["0x0108000040e00000", "0xd1d2d3d4d5d6d7d8", "0", "0"],
    "XMM1": ["0x00cfffff40e00000", "0xd1d2d3d4d5d6d7d8", "0", "0"]
  }
}
%endif

vmovaps ymm1, [rel .data_three]
vmovaps ymm2, [rel .data_four]

; Do an add without DAZ
vaddps xmm0, xmm1, xmm2

; Set DAZ
stmxcsr [rel .data_mxcsr]
or dword [rel .data_mxcsr], (1 << 6)
ldmxcsr [rel .data_mxcsr]

; Do an add with DAZ
vaddps xmm1, xmm1, xmm2

hlt
align 4096

.data_three:
dd 3.0, 0x00cfffff
dq 0xa1a2a3a4a5a6a7a8, 0xb1b2b3b4b5b6b7b8, 0xc1c2c3c4c5c6c7c8

.data_four:
dd 4.0, 0x00400000
dq 0xd1d2d3d4d5d6d7d8, 0xe1e2e3e4e5e6e7e8, 0xf1f2f3f4f5f6f7f8

.data_mxcsr:
dd 0


================================================
FILE: unittests/ASM/Disabled_Tests
================================================
# Can't test this in a real environment
Test_Primary/Primary_E8.asm
Test_Primary/Primary_E9.asm

# Relies on undefined behaviour
Test_X87/D9_F9.asm
Test_X87/D9_F2.asm

# Relies on rounding correctness
Test_X87/D9_F8.asm

# This is basically a benchmark.
FEX_bugs/XeSS_quadratic.asm


================================================
FILE: unittests/ASM/Disabled_Tests_ARMv8.0
================================================
# Nothing here yet


================================================
FILE: unittests/ASM/Disabled_Tests_ARMv8.2
================================================
# Nothing here yet


================================================
FILE: unittests/ASM/Disabled_Tests_ARMv8.4
================================================
# Nothing here yet


================================================
FILE: unittests/ASM/Disabled_Tests_Simulator
================================================
# AES and vpclmul unsupported in 256-bit SVE currently
Test_VEX/vaesdec.asm
Test_VEX/vaesdeclast.asm
Test_VEX/vaesdec256.asm
Test_VEX/vaesdeclast256.asm
Test_VEX/vaesenc256.asm
Test_VEX/vaesenclast256.asm
Test_VEX/vpclmulqdq_256.asm

# Simulator can't handle self-modifying code
Test_SelfModifyingCode/Delinking.asm
Test_SelfModifyingCode/DifferentBlock.asm
Test_SelfModifyingCode/SameBlock.asm

# Simulator can't do wfe
Test_Primary/Pause.asm

# Simulator can't handle `mrs x0, nzcv`
Test_SecondaryModRM/Reg_7_1.asm

# Simulator can't handle unaligned accesses
Test_Primary/Primary_01_Atomic16.asm
Test_Primary/Primary_01_Atomic32.asm
Test_Primary/Primary_01_Atomic64.asm
Test_Primary/Primary_09_Atomic16.asm
Test_Primary/Primary_09_Atomic32.asm
Test_Primary/Primary_09_Atomic64.asm
Test_Primary/Primary_23_Atomic16.asm
Test_Primary/Primary_23_Atomic32.asm
Test_Primary/Primary_23_Atomic64.asm
Test_Primary/Primary_29_Atomic16.asm
Test_Primary/Primary_29_Atomic32.asm
Test_Primary/Primary_29_Atomic64.asm
Test_Primary/Primary_31_Atomic16.asm
Test_Primary/Primary_31_Atomic32.asm
Test_Primary/Primary_31_Atomic64.asm
Test_Primary/Primary_87_Atomic16.asm
Test_Primary/Primary_87_Atomic32.asm
Test_Primary/Primary_87_Atomic64.asm
Test_Primary/Primary_FF_0_Atomic16.asm
Test_Primary/Primary_FF_0_Atomic32.asm
Test_Primary/Primary_FF_0_Atomic64.asm
Test_Primary/Primary_FF_1_Atomic16.asm
Test_Primary/Primary_FF_1_Atomic32.asm
Test_Primary/Primary_FF_1_Atomic64.asm
Test_Atomics/adc_atomic16.asm
Test_Atomics/adc_atomic32.asm
Test_Atomics/adc_atomic64.asm
Test_Atomics/sbb_atomic16.asm
Test_Atomics/sbb_atomic32.asm
Test_Atomics/sbb_atomic64.asm
Test_Atomics/neg_atomic16.asm
Test_Atomics/neg_atomic32.asm
Test_Atomics/neg_atomic64.asm
Test_Atomics/not_atomic16.asm
Test_Atomics/not_atomic32.asm
Test_Atomics/not_atomic64.asm

Test_PrimaryGroup/3_F7_02_2.asm
Test_PrimaryGroup/3_F7_02_3.asm
Test_PrimaryGroup/3_F7_03_2.asm

Test_TwoByte/0F_B0_3.asm
Test_TwoByte/0F_B0_4.asm
Test_TwoByte/0F_B0_5.asm
Test_TwoByte/0F_B0_6.asm
Test_TwoByte/0F_B0_7.asm
Test_TwoByte/0F_C0_Atomic16.asm
Test_TwoByte/0F_C0_Atomic32.asm
Test_TwoByte/0F_C0_Atomic64.asm

Test_Secondary/09_XX_01_8.asm
Test_Secondary/09_XX_01_9.asm
Test_Secondary/09_XX_01_12.asm
Test_Secondary/09_XX_01_13.asm
Test_Secondary/09_XX_01_15.asm
Test_Secondary/09_XX_01_18.asm
Test_Secondary/09_XX_01_19.asm

# Simulator doesn't handle rounding mode changes
Test_Secondary/15_XX_2.asm
Test_X87_F64/FLDCW_F64.asm
Test_H0F3A/66_08.asm
Test_H0F3A/66_09.asm
Test_H0F3A/66_0A.asm
Test_H0F3A/66_0B.asm
Test_OpSize/66_5B.asm
Test_VEX/vcvtpd2dq_inexact.asm
Test_VEX/vcvtps2dq_inexact.asm
Test_VEX/vldmxcsr.asm
Test_VEX/vroundpd.asm
Test_VEX/vroundps.asm
Test_VEX/vroundsd.asm
Test_VEX/vroundss.asm
Test_VEX/vcvtps2ph_rtne.asm
Test_VEX/vcvtps2ph_rd.asm
Test_VEX/vcvtps2ph_ru.asm
Test_VEX/vcvtps2ph_trunc.asm
Test_VEX/vcvtps2ph_rtne_mxcsr.asm
Test_VEX/vcvtps2ph_rd_mxcsr.asm
Test_VEX/vcvtps2ph_ru_mxcsr.asm
Test_VEX/vcvtps2ph_trunc_mxcsr.asm
Test_X87_F64/Rounding_F64.asm

# Simulator doesn't support cycle counter reading
Test_TwoByte/0F_31.asm

# Simulator doesn't support executing a syscall
Test_Secondary/09_F3_07.asm

# Vixl sim at 256-bit vector width access too much memory with some AVX instructions
Test_modrm_oob/VEX.asm

# Vixl simulator with 256-bit vector width doesn't always do correct data retention for 128-bit non-AVX operations.
Test_SSE4a/extrq_imm.asm
Test_SSE4a/insertq_imm.asm
Test_SSE4a/extrq_variable.asm
Test_SSE4a/insertq_variable.asm

# Simulator can't handle long jump through signal handler
Test_FEX_bugs/CodeBufferOverflow.asm


================================================
FILE: unittests/ASM/Disabled_Tests_host
================================================
# Uses thunks
Test_TwoByte/0F_3F.asm

# Not guaranteed to work in 64bit mode
Test_Primary/Primary_8C.asm
Test_Primary/Primary_8C_2.asm

# Zen+ CI doesn't support UMIP so it returns "real" values
Test_Secondary/07_XX_00.asm

# We don't emulate all of the MXCSR bits
Test_VEX/vldmxcsr.asm

# 3DNow!
Test_modrm_oob/DDD.asm


================================================
FILE: unittests/ASM/Disabled_Tests_x64
================================================
# Nothing here yet


================================================
FILE: unittests/ASM/Displacement_Encoding.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x5152535455565758"
  },

  "MemoryRegions": {
    "0x7FFFF000": "4096"
  },

  "MemoryData": {
    "0x7FFFF000": "48 47 46 45 44 43 42 41"
  }
}
%endif

; Tests to ensure that 64-bit displacement encoding works correctly without being RIP relative.
; x86-64 has two displacement encodings, one is RIP relative, one is 32-bit (signed) displacement only.
; modrm.mod = 0b00 && modrm.rm = 0b101: Means no SIB, but address mode is RIP + disp32.
; modrm.mod = 0b00 && modrm.rm = 0b100: Means SIB, but address mode is disp32.
;  - if SIB.base = 0b101 && SIB.index = 0b100. Which means no registers for base and index.
; Test disp32 by mapping a page at the limit of 2GB and read data from it. Also store and load.
; If we were accidentally using RIP relative, then it would be 2GB + <low test base address>, which won't be mapped.

; Test disp32 load.
mov rax, [abs 0x7FFF_F000]

mov rbx, 0x5152535455565758

; LEA with disp32.
lea rcx, [abs 0x7FFF_FFF8]

; Test store with disp32 store.
mov [abs 0x7FFF_FFF8], rbx

; Load back with the LEA to ensure it's correct.
mov rcx, [rcx]

hlt


================================================
FILE: unittests/ASM/FEX_bugs/32bit_syscall.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

; 32-bit:
; 265 = clock_gettime
; 64-bit
; 265 = linkat

; rax = syscall on both 32-bit and 64-bit
mov rax, 265

; rdi/rbx = first argument on 64-bit and 32-bit respectively
mov rdi, 0
mov rbx, 0

; rsi/rcx = second argument on 64-bit and 32-bit respectively
lea rsi, [rel .data]
lea rcx, [rel .data]

; Do a 32-bit syscall
; On a real linux kernel this will execute clock_gettime
; Under FEX without 32-bit syscall support this might try to execute linkat and return -ENOENT.
int 0x80
hlt

.data:
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/3DNow_ModRMSIBDecode.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x3f800000bf800000"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

; FEX-Emu had a bug with 3DNow! ModRM decoding when the source was SIB encoded.
; This would result in a crash in the frontend instruction decoding.
; Generate a 3DNow! instruction that uses SIB encoding to ensure this code path is tested.
lea rax, [rel data1]
mov rbx, 0
pi2fw mm0, [rbx * 8 + rax + 0]

hlt

align 8
data1:
dw -1
dw 0xFF
dw 1
dw 0xFF


================================================
FILE: unittests/ASM/FEX_bugs/BEXTR_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RCX": "0x5a"
  },
  "HostFeatures": ["BMI1"]
}
%endif

mov rcx, 0x8f635a775ad3b9b4
mov esi, 0x3018
bextr ecx, ecx, esi
cmp rcx, 0x5a
jne .bad

.good:
mov rax, 0
hlt

.bad:
mov rax, 1
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BLSI_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RDX": "0xcafe"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Source 0 sets ZF
mov rax, 0
blsi rax, rax

js fexi_fexi_im_so_broken
jnz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source 1 sets CF
mov rax, 1
blsi rax, rax

js fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jnc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source all-1's sets CF
mov rax, 0xffffffffffffffff
blsi rax, rax

js fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jnc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source 1<<63 sets CF and SF
mov rax, 0x8000000000000000
blsi rax, rax

jns fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jnc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Make sure we're correctly clearing the overflow flag
mov rbx, 5
mov al, 0x7F
inc al
jno fexi_fexi_im_so_broken
blsi rax, rax
jo fexi_fexi_im_so_broken

; Happy ending
mov rdx, 0xcafe
hlt

fexi_fexi_im_so_broken:
mov rdx, 0xdead
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BLSMSK_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RDX": "0xcafe",
      "RBX": "5"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Result in all-1's due to underflow so SF/CF set
mov rbx, 1
mov rax, 0
blsmsk rax, rax

jns fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jnc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Result in 1, so all flags clear
mov rbx, 2
mov rax, 11
blsmsk rax, rax

js fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Result in all-1's without carry, so SF set
mov rbx, 3
mov rax, 0x8000000000000000
blsmsk rax, rax

jns fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Make sure we're correctly clearing the zero flag
mov rbx, 4
mov rax, 0
add rax, rax
jnz fexi_fexi_im_so_broken
blsmsk rax, rax
jz fexi_fexi_im_so_broken

; Make sure we're correctly clearing the overflow flag
mov rbx, 5
mov al, 0x7F
inc al
jno fexi_fexi_im_so_broken
blsmsk rax, rax
jo fexi_fexi_im_so_broken

; Happy ending
mov rdx, 0xcafe
hlt

fexi_fexi_im_so_broken:
mov rdx, 0xdead
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BLSR_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RDX": "0xcafe"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Source 0 sets CF and ZF
mov rax, 0
blsr rax, rax

js fexi_fexi_im_so_broken
jnz fexi_fexi_im_so_broken
jnc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source 1 sets ZF
mov rax, 1
blsr rax, rax

js fexi_fexi_im_so_broken
jnz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source 3 sets nothing
mov rax, 3
blsr rax, rax

js fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Source all-1's sets SF
mov rax, 0xffffffffffffffff
blsr rax, rax

jns fexi_fexi_im_so_broken
jz fexi_fexi_im_so_broken
jc fexi_fexi_im_so_broken
jo fexi_fexi_im_so_broken

; Make sure we're correctly clearing the overflow flag
mov rbx, 5
mov al, 0x7F
inc al
jno fexi_fexi_im_so_broken
blsr rax, rax
jo fexi_fexi_im_so_broken

; Happy ending
mov rdx, 0xcafe
hlt

fexi_fexi_im_so_broken:
mov rdx, 0xdead
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BT_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xcafe"
  }
}
%endif

mov ebx, 137
mov rdx, 0xe0000000

%macro case 1
  ; set zero flag
  xor eax, eax

  ; zero flag should still be set after bt
  %1 ebx, 1
  jnz .bad
  %1 dword [rdx], ebx
  jnz .bad

  ; now clear the zero flag
  add eax, 1

  ; zero flag should still be clear after bt
  %1 eax, 1
  jz .bad
  %1 dword [rdx], ebx
  jz .bad
%endmacro

; Repeat for each bitwise op
case bt
case btc
case bts
case btr

.good:
mov rax, 0xcafe
hlt

.bad:
mov rax, 0xdeadbeef
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BZHI_Sign.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4",
    "RBX": "0xFFFFFFFFFFFFFFF4",
    "RCX": "0x0",
    "RDX": "0x1337"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; FEX had a bug where bzhi would fail to update SF. Test that bzhi correctly
; sets ZF/SF correctly based on the result.

mov rcx, 4
mov rbx, -12

; Result is 0x4
bzhi rax, rbx, rcx
mov rdx, 0xdead1
jz .fail
mov rdx, 0xdead2
js .fail

; Result is -12
mov rcx, 64
bzhi rdx, rbx, rcx
mov rdx, 0xdead3
jz .fail
mov rdx, 0xdead4
jns .fail

; Result is 0x00
mov rdx, 0
bzhi rcx, rbx, rdx
mov rdx, 0xdead5
jnz .fail
mov rdx, 0xdead6
js .fail

mov rdx, 0x1337
hlt

.fail:
hlt


================================================
FILE: unittests/ASM/FEX_bugs/BitConditionCheck.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Env": { "FEX_MAXINST" : "41010", "FEX_MULTIBLOCK": "1", "FEX_TSOENABLED": "0" }
}
%endif

; FEX-Emu had a bug where it tried to encode too large of a tbz/tbnz offset.

%macro TooConditionalBitBranch 0
  ; Stresses ARM64's tbz/tbnz branch target of +-32KB.
  jmp %%top
  %%top:

  lea rax, [rel data]
  mov rbx, 1
  %rep 2000
    add qword [rel data], rbx
  %endrep
  lea rax, [rel %%top]
  jpe %%top
%endmacro

mov rax, 0
cmp rax, 0
mov rcx, 0

jz long_jump

TooConditionalBitBranch

long_jump:
mov rax, 1
hlt

data:
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/Blake3.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"]
}
%endif

vblendps ymm3, ymm12, ymm9, 0xcc
vperm2f128 ymm12, ymm1, ymm2, 0x20
vmovups [rel .data_result + 0], ymm12
vunpckhps ymm14, ymm4, ymm5
vblendps ymm4, ymm8, ymm0, 0xcc
vunpckhps ymm15, ymm6, ymm7
vperm2f128 ymm7, ymm3, ymm4, 0x20
vmovups [rel .data_result + 32], ymm7
vshufps ymm5, ymm10, ymm13, 0x4e
vblendps ymm6, ymm5, ymm13, 0xcc
vshufps ymm13, ymm14, ymm15, 0x4e
vblendps ymm10, ymm10, ymm5, 0xcc
vblendps ymm14, ymm14, ymm13, 0xcc
vperm2f128 ymm8, ymm10, ymm14, 0x20
vmovups [rel .data_result + (32 * 2)], ymm8
vblendps ymm15, ymm13, ymm15, 0xcc
vperm2f128 ymm13, ymm6, ymm15, 0x20
vmovups [rel .data_result + (32 * 3)], ymm13
vperm2f128 ymm9, ymm1, ymm2, 0x31
vperm2f128 ymm11, ymm3, ymm4, 0x31
vmovups [rel .data_result + (32 * 4)], ymm9
vperm2f128 ymm14, ymm10, ymm14, 0x31
vperm2f128 ymm15, ymm6, ymm15, 0x31
vmovups [rel .data_result + (32 * 5)], ymm11
vmovups [rel .data_result + (32 * 6)], ymm14
vmovups [rel .data_result + (32 * 7)], ymm15
vmovdqa ymm0, [rel .data_stack + (32 * 0)]
vpaddd  ymm1, ymm0, [rel .data_stack + (32 * 1)]
vmovdqa [rel .data_stack + (32 * 1)], ymm1
vpxor   ymm0, ymm0, [rel .data]
vpxor   ymm2, ymm1, [rel .data + 32]

hlt

align 4096
.data:
dq 0, 0, 0, 0, 0, 0
dq 0, 0, 0, 0, 0, 0

.data_stack:
dq 0, 0, 0, 0
dq 0, 0, 0, 0

.data_result:
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/BranchConditionCheck.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Env": { "FEX_MAXINST" : "41010", "FEX_MULTIBLOCK": "1", "FEX_TSOENABLED": "0" }
}
%endif

; FEX-Emu had a bug where it tried encoding too large of a conditional branch offset.

%macro TooLargeConditionalBranch 0
  ; Stresses ARM64's b.cc/cbz/cbnz branch target of +-1MB.
  jmp %%top
  %%top:

  lea rax, [rel data]
  mov rbx, 1
  %rep 37500
    add qword [rel data], rbx
  %endrep
  lea rax, [rel %%top]
  jnz %%top
%endmacro

mov rax, 0
cmp rax, 0
mov rcx, 0

jz long_jump

TooLargeConditionalBranch

long_jump:
mov rax, 1
hlt

data:
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/CodeBufferOverflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "Env": { "FEX_MAXINST" : "41010", "FEX_MULTIBLOCK": "1", "FEX_TSOENABLED": "0" }
}
%endif

; FEX had a bug where its JIT heuristic wouldn't catch all cases, and allocations would overflow and crash.

%macro OverflowBuffer 0
  %rep 256
  ; This instruction is absolutely abysmal under FEX.
  ; Easily stresses our heuristic for block sizes
  rep movsq
  %endrep
%endmacro

mov rax, 0
cmp rax, 0
mov rcx, 0
mov rdi, 1
mov rsi, 2

jz long_jump

OverflowBuffer

long_jump:
mov rax, 1
hlt

data:
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/Divide32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000001",
    "RDX": "0x1"
  }
}
%endif

; FEX had a bug where we failed to ignore garbage upper bits of a 32-bit divisor
; with div. This test does a division with garbage in the upper bits where the
; result would differ if they were not ignored.

; 0x100000003 / 0x2 = 0x80000001 remainder 1
mov edx, 1
mov eax, 3
mov rcx, 0xdeadbeef00000002
div ecx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/H0F3AREXBug.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AES", "SHA", "SSE4.2", "SSE4.1", "PCLMUL", "SSSE3"]
}
%endif

; FEX-Emu had a bug in decoding the H0F3A instruction table.
; It would accidentally require REX.W to not be set on the suite of instructions that ignore the flag.
; This just executes all instructions from H0F3A that ignore the REX.W flag, to ensure it decodes.

o64 palignr mm0, mm1, 0
o64 roundps xmm0, xmm1, 0
o64 roundpd xmm0, xmm1, 0
o64 roundss xmm0, xmm1, 0
o64 roundsd xmm0, xmm1, 0
o64 blendps xmm0, xmm1, 0
o64 blendpd xmm0, xmm1, 0
o64 palignr xmm0, xmm1, 0
o64 pextrb eax, xmm0, 0
o64 pextrw eax, xmm0, 0
o64 extractps eax, xmm0, 0
o64 extractps eax, xmm0, 0
o64 pinsrb xmm0, eax, 0
o64 insertps xmm0, xmm1, 0
o64 dpps xmm0, xmm1, 0
o64 dppd xmm0, xmm1, 0
o64 mpsadbw xmm0, xmm1, 0
o64 pclmulqdq xmm0, xmm1, 0
o64 pcmpestrm xmm0, xmm1, 0
o64 pcmpestri xmm0, xmm1, 0
o64 pcmpistrm xmm0, xmm1, 0
o64 pcmpistri xmm0, xmm1, 0
o64 sha1rnds4 xmm0, xmm1, 0
o64 aeskeygenassist xmm0, xmm1, 0

hlt


================================================
FILE: unittests/ASM/FEX_bugs/IMUL_garbagedata_negative.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x0000000000003fff"
  }
}
%endif

%macro cfmerge 0

; Get CF
lahf
shr rax, 8
and rax, 1

; Merge in to results
shl r15, 1
or r15, rax

%endmacro

; FEX had a bug where imul flag calculation was incorrect.
; CF and OF are set due to overflow.

mov r15, 0
mov rax, 0

; Multiply starting value
mov ebx, 0x6D

; imul 1-src
mov ebx, 0xaaaaaaab
mov ecx, 0x6D
imul cx, bx
cfmerge

mov ebx, 0xaaaaaaab
mov ecx, 0x6D
imul ecx, ebx
cfmerge

mov rbx, 0xaaaaaaaa_aaaaaaab
mov rcx, 0x6D
imul rcx, rbx
cfmerge

; imul 2-src 8-bit check
mov ebx, 0xaaaaaaab
imul cx, bx, 0x6D
cfmerge

mov ebx, 0xaaaaaaab
imul ecx, ebx, 0x6D
cfmerge

mov rbx, 0xaaaaaaaa_aaaaaaab
imul ecx, ebx, 0x6D
cfmerge

; imul 2-src 16-bit, 32-bit, 64-bit check
mov rbx, 0xaaaaaaaa_aaaaaaab
imul cx, bx, 0x600D
cfmerge

mov rbx, 0xaaaaaaaa_aaaaaaab
imul ecx, ebx, 0x600D0000
cfmerge


mov rbx, 0xaaaaaaaa_aaaaaaab
imul rcx, rbx, 0x600D0000
cfmerge

mov rbx, 0x0aaaaaaa_aaaaaaab
imul rcx, rbx, -0x600D0000
cfmerge


; IMUL implicit dest
mov rax, 0x0aaaaaaa_aaaaaaab
mov ecx, 0x6D
imul cl
cfmerge

mov rax, 0x0aaaaaaa_aaaaaaab
mov ecx, 0x600D
imul cx
cfmerge

mov rax, 0x0aaaaaaa_aaaaaaab
mov ecx, 0x600D0000
imul ecx
cfmerge

mov rax, 0x0aaaaaaa_aaaaaaab
mov rcx, 0x600D0000_00000000
imul rcx
cfmerge

hlt


================================================
FILE: unittests/ASM/FEX_bugs/InitialPFFlag.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x202"
  }
}
%endif

mov rsp, 0xe0000010

pushfq
pop rax

; Mask out only the flags we care about (ignore undefined bits)
; Keep: CF(0), PF(2), AF(4), ZF(6), SF(7), IF(9), DF(10), OF(11), reserved(1)
and rax, 0xED7

hlt


================================================
FILE: unittests/ASM/FEX_bugs/LargeRotatesForSmallSizes.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4142434445464714",
    "RDI": "0x414243444546478a",
    "RBP": "0x4142434445464704",
    "RSP": "0x414243444546478a",
    "R8":  "0x4142434445461d22",
    "R9":  "0x41424344454651d2",
    "R10": "0x4142434445461d20",
    "R11": "0x41424344454651d2",
    "R12": "0x4142434445463a45",
    "R13": "0x41424344454608e9",
    "R14": "0x4142434445463a41",
    "R15": "0x41424344454608e9"
  }
}
%endif

; FEX-Emu had a bug where 8-bit and 16-bit rotates with carry generated incorrect results when the rotate amount was larger than the data size.
; This is well defined in x86 semantics.

mov cl, 0x9
stc
jmp .test
.test:
; 8-bit: Test 1-bit past data size, plus carry
rcr byte [rel .data + (0 * 8)], cl
rcl byte [rel .data + (1 * 8)], cl

mov cl, 0x9
clc
jmp .test2
.test2:
; 8-bit: Test 1-bit past data size, no carry
rcr byte [rel .data + (2 * 8)], cl
rcl byte [rel .data + (3 * 8)], cl

mov cl, 0x1f
stc
jmp .test3
.test3:
; 8-bit: Test maximum 32-bit rotate, plus carry
rcr byte [rel .data + (4 * 8)], cl
rcl byte [rel .data + (5 * 8)], cl

mov cl, 0x1f
clc
jmp .test4
.test4:
; 8-bit: Test maximum 32-bit rotate, plus carry
rcr byte [rel .data + (6 * 8)], cl
rcl byte [rel .data + (7 * 8)], cl

mov cl, 0xF
stc
jmp .test5
.test5:
; 16-bit: Test 1-bit past data size, plus carry
rcr word [rel .data + (8 * 8)], cl
rcl word [rel .data + (9 * 8)], cl

mov cl, 0xF
clc
jmp .test6
.test6:
; 16-bit: Test 1-bit past data size, no carry
rcr word [rel .data + (10 * 8)], cl
rcl word [rel .data + (11 * 8)], cl

mov cl, 0x1f
stc
jmp .test7
.test7:
; 16-bit: Test maximum 32-bit rotate, plus carry
rcr word [rel .data + (12 * 8)], cl
rcl word [rel .data + (13 * 8)], cl

mov cl, 0x1f
clc
jmp .test8
.test8:
; 16-bit: Test maximum 32-bit rotate, plus carry
rcr word [rel .data + (14 * 8)], cl
rcl word [rel .data + (15 * 8)], cl

jmp .end
.end:

; Load all the results in order
mov rax, [rel .data + (0 * 8)]
mov rbx, [rel .data + (1 * 8)]
mov rcx, [rel .data + (2 * 8)]
mov rdx, [rel .data + (3 * 8)]
mov rsi, [rel .data + (4 * 8)]
mov rdi, [rel .data + (5 * 8)]
mov rbp, [rel .data + (6 * 8)]
mov rsp, [rel .data + (7 * 8)]
mov r8,  [rel .data + (8 * 8)]
mov r9,  [rel .data + (9 * 8)]
mov r10, [rel .data + (10 * 8)]
mov r11, [rel .data + (11 * 8)]
mov r12, [rel .data + (12 * 8)]
mov r13, [rel .data + (13 * 8)]
mov r14, [rel .data + (14 * 8)]
mov r15, [rel .data + (15 * 8)]

hlt

align 4096
.data:
times 16 dq 0x4142434445464748


================================================
FILE: unittests/ASM/FEX_bugs/LargeRotatesForSmallSizes_More.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344454647a4",
    "RBX": "0x4142434445464790",
    "RCX": "0x4142434445464724",
    "RDX": "0x4142434445464790",
    "RSI": "0x4142434445468e90",
    "RDI": "0x414243444546a3a4",
    "RBP": "0x41424344454623a4",
    "RSP": "0x4142434445468e90",
    "R8":  "0x4142434445464729",
    "R9":  "0x4142434445464741",
    "R10": "0x4142434445464729",
    "R11": "0x4142434445464741",
    "R12": "0x4142434445464729",
    "R13": "0x4142434445464741",
    "R14": "0x4142434445464729",
    "R15": "0x4142434445464741"
  }
}
%endif

; FEX-Emu had a bug where 8-bit and 16-bit rotates with carry generated incorrect results when the rotate amount was larger than the data size.
; These are additional tests to capture more edge cases in the implementation.
; This is well defined in x86 semantics.

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rdx, 0x4142434445464748
mov rdi, 0x4142434445464748
mov rsi, 0x4142434445464748
mov rbp, 0x4142434445464748
mov rsp, 0x4142434445464748
mov r8, 0x4142434445464748
mov r9, 0x4142434445464748
mov r10, 0x4142434445464748
mov r11, 0x4142434445464748
mov r12, 0x4142434445464748
mov r13, 0x4142434445464748
mov r14, 0x4142434445464748
mov r15, 0x4142434445464748

mov rcx, 0x515253545556571E
jmp .test
.test:
; 8-bit cl, carry
stc
rcr r8b, cl
rcl r9b, cl

; 8-bit cl, no-carry
stc
rcr r10b, cl
rcl r11b, cl

; 16-bit cl, carry
stc
rcr r12b, cl
rcl r13b, cl

; 16-bit cl, no-carry
stc
rcr r14b, cl
rcl r15b, cl

; Fix RCX since we used it
mov rcx, 0x4142434445464748

; 8-bit const, carry
stc
rcr al, 0x21
rcl bl, 0x21

; 8-bit const, no-carry
clc
rcr cl, 0x21
rcl dl, 0x21

; 16-bit const, carry
stc
rcr di, 0x21
rcl si, 0x21

; 16-bit const, no-carry
clc
rcr bp, 0x21
rcl sp, 0x21

hlt


================================================
FILE: unittests/ASM/FEX_bugs/LoadAtBoundary_LowerPrecision.asm
================================================
%ifdef CONFIG
{
  "MemoryRegions": {
    "0x100000000": "4096"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; FEX-Emu had a bug where x87 loadstores at a page boundary with reduced precision enabled would loadstore 128-bits.

finit ; enters x87 state

mov rax, 0x100000000
mov rbx, 0x4142434445464748
mov rcx, 0x5152535455565758
mov rdx, (0x100000000 + 0x1000 - 16)

mov [rdx], rbx
mov [rdx + 8], rcx

mov rdx, 0x100000000 + 0x1000

; Do an 80-bit load at the edge of a page.
; Ensuring tword loads don't extend past the end of a page.
fld tword [rdx - 10]

; Do an 80-bit BCD load at the edge of a page.
fbld [rdx - 10]

; Do a BCD store
fbstp [rdx - 10]

; Regular 80-bit store
fstp tword [rdx - 10]

hlt


================================================
FILE: unittests/ASM/FEX_bugs/LongSignedDivide.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xfa4fa4fa4fa50e8f",
    "RDX": "0x000000000000001c"
  }
}
%endif
; FEX-Emu had a bug where a 128-bit divide with a large unsigned number with a negative number would result in incorrect data.
; This only manifested itself when the sign bit differed between upper and lower halves of the dividend.

mov rax, 0xfffffffffffc70f9
mov rdx, 0x0000000000000000
mov rbx, 0xffffffffffffffd3

jmp .test
.test:
idiv rbx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/LoopAddressSizeCheck.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000010001",
    "RBX": "0x0000000000000001"
  }
}
%endif

; FEX-Emu had a bug in the 32-bit implementation of LOOP where it didn't handle 16-bit RCX correctly.
; For test coverage on the 64-bit side, ensure that both 64-bit and 32-bit operation works correctly.
mov rax, 0
mov rbx, 0
mov rcx, 0x0001_0001

.test:
inc rax
a64 loop .test

mov rcx, 0x1_0000_0001
.test2:
inc rbx
a32 loop .test2

hlt


================================================
FILE: unittests/ASM/FEX_bugs/MinMaxNaN.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x00000000",
      "RBX": "0x00000000",
      "RCX": "0x00000000",
      "RSI": "0x00000000"
  },
  "HostFeatures": ["AVX"]
}
%endif

%macro single_case 4
  ; Load sources
  mov eax, %2
  mov ebx, %3
  movd xmm0, eax
  movd xmm1, ebx

  ; Calculate scalar min/max
  %1ss xmm0, xmm1

  ; Check result
  movd ecx, xmm0
  cmp ecx, %4
  jne fexi_fexi_im_so_broken
  mov ecx, 0

  ; Now try the SSE vector
  %1ps xmm0, xmm1
  movd ecx, xmm0
  cmp ecx, %4
  jne fexi_fexi_im_so_broken
  mov ecx, 0

  ; And the AVX-128 version
  v%1ps xmm2, xmm0, xmm1
  movd ecx, xmm2
  cmp ecx, %4
  jne fexi_fexi_im_so_broken
  mov ecx, 0

  ; And the AVX-256 version
  v%1ps ymm2, ymm0, ymm1
  movd ecx, xmm2
  cmp ecx, %4
  jne fexi_fexi_im_so_broken

%endmacro

%macro case_d 4
  ; Load sources
  mov rax, %2
  mov rbx, %3
  movq xmm0, rax
  movq xmm1, rbx

  ; Calculate scalar min/max
  %1sd xmm0, xmm1

  ; Check result
  movq rcx, xmm0
  mov rdx, %4
  cmp rcx, rdx
  jne fexi_fexi_im_so_broken
  mov rcx, 0

  ; Now try the SSE vector
  %1pd xmm0, xmm1
  movq rcx, xmm0
  mov rdx, %4
  cmp rcx, rdx
  jne fexi_fexi_im_so_broken
  mov rcx, 0

  ; And the AVX-128 version
  v%1pd xmm2, xmm0, xmm1
  movq rcx, xmm2
  mov rdx, %4
  cmp rcx, rdx
  jne fexi_fexi_im_so_broken
  mov rcx, 0

  ; And the AVX-256 version
  v%1pd ymm2, ymm0, ymm1
  movq rcx, xmm2
  mov rdx, %4
  cmp rcx, rdx
  jne fexi_fexi_im_so_broken
%endmacro

%macro min_s 3
  single_case min, %1, %2, %3
%endmacro

%macro max_s 3
  single_case max, %1, %2, %3
%endmacro

%macro min_d 3
  case_d min, %1, %2, %3
%endmacro

%macro max_d 3
  case_d max, %1, %2, %3
%endmacro

zero_s equ 0x00000000
negzero_s equ 0x80000000
qnan_s equ 0x7fc00000
snan_s equ 0x7f800001

zero_d equ 0x0000_0000_0000_0000
negzero_d equ 0x8000_0000_0000_0000
qnan_d equ 0x7ff8_0000_0000_0000
snan_d equ 0x7ff0_0000_0000_0001

%macro cases 1
  ; Basic identities
  min%1 zero%1,    zero%1,    zero%1
  max%1 zero%1,    zero%1,    zero%1
  min%1 negzero%1, negzero%1, negzero%1
  max%1 negzero%1, negzero%1, negzero%1
  min%1 qnan%1,    qnan%1,    qnan%1
  max%1 qnan%1,    qnan%1,    qnan%1

  ; "If the values being compared are both 0.0s (of either sign), the value in
  ; the second source operand is returned"
  min%1 zero%1,    negzero%1, negzero%1
  max%1 zero%1,    negzero%1, negzero%1
  min%1 negzero%1, zero%1,    zero%1
  max%1 negzero%1, zero%1,    zero%1

  ; "If only one value is a NaN (SNaN or QNaN) for this instruction, the second
  ; source operand, either a NaN or a valid floating-point value, is written to
  ; the result"
  min%1 zero%1,    qnan%1,    qnan%1
  min%1 negzero%1, qnan%1,    qnan%1
  min%1 qnan%1,    zero%1,    zero%1
  min%1 qnan%1,    negzero%1, negzero%1

  max%1 zero%1,    qnan%1,    qnan%1
  max%1 negzero%1, qnan%1,    qnan%1
  max%1 qnan%1,    zero%1,    zero%1
  max%1 qnan%1,    negzero%1, negzero%1

  min%1 zero%1,    snan%1,    snan%1
  min%1 negzero%1, snan%1,    snan%1
  min%1 snan%1,    zero%1,    zero%1
  min%1 snan%1,    negzero%1, negzero%1

  max%1 zero%1,    snan%1,    snan%1
  max%1 negzero%1, snan%1,    snan%1
  max%1 snan%1,    zero%1,    zero%1
  max%1 snan%1,    negzero%1, negzero%1

  ; "If a value in the second operand is an SNaN, that SNaN is returned
  ; unchanged to the destination (that is, a QNaN version of the SNaN is not
  ; returned)."
  min%1 qnan%1, snan%1, snan%1
  min%1 snan%1, snan%1, snan%1
%endmacro

single_cases:
  cases _s

cases_double:
  cases _d

success:
  mov rax, 0
  mov rbx, 0
  mov rcx, 0
  mov rsi, 0
  hlt

fexi_fexi_im_so_broken:
  ; Leave rax/rbx/rcx as-is for inspection
  mov rsi, 0xdeadbeef
  hlt


================================================
FILE: unittests/ASM/FEX_bugs/MoveMerging.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xaaaa",
    "RBX": "0xaaaa",
    "RSI": "0xbbbb"
  }
}
%endif

; FEX had a bug with mov+xchg back-to-back due to failing to account for a copy
; inserted during RA. This resulted in a hang starting the game Hades due to the
; mov+xchg code sequence found within Wine's x64 build of ucrtbase.dll.

mov rax, 0xaaaa
mov rbx, 0xbbbb
mov rsi, 0xcccc

; step 1
mov    rsi,rax
; rax = 0xaaaa
; rbx = 0xbbbb
; rsi = 0xaaaa

; step 2
xchg   rbx,rsi
; rax = 0xaaaa
; rbx = 0xaaaa
; rsi = 0xbbbb

hlt


================================================
FILE: unittests/ASM/FEX_bugs/NegativeCallAddressSizeOverride.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; FEX had a bug with relative call instructions.
; It was incorrectly truncating the immediate displacement based on address size override AND operand size override.
; Address size override doesn't actually change immediate representation on the call instruction.

mov rsp, 0xe000_1000
mov rax, 0

jmp .after
.test:
mov rax, 1
hlt

.after:
a32 call .test

hlt


================================================
FILE: unittests/ASM/FEX_bugs/OptSizeConfusion.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000f0f0f0f",
    "RBX": "0x000000000f0f0f0f",
    "RCX": "0x000000000f0f0f0f",
    "RDX": "0x00000000ffffffff",
    "R9": "0x000000000f0f0f0f"
  }
}
%endif

; FEX had several bugs in its constprop pass where 32->64 bit truncation behaviour wasn't accounted for leading
; to incorrectly inserting instead.

mov rax, 0x0f0f0f0f0f0f0f0f
mov rbx, 0x0f0f0f0f0f0f0f0f
mov rcx, 0x0f0f0f0f0f0f0f0f
mov rdx, 1
mov r9, 0x0f0f0f0f0f0f0f0f
xor eax, 0
and ebx, ebx
shr ecx, 0
neg edx
shl r9d, 0
hlt


================================================
FILE: unittests/ASM/FEX_bugs/PSRLDQBuf.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; FEX-Emu had a bug with vpsrldq where if the shift was >= 16 bytes then the top half of the ymm register wasn't modified.
; Adds a simple test to ensure this continues working.
vmovups ymm0, [rel .data]
vpsrldq ymm0, ymm0, 16
hlt

align 32
.data:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/FEX_bugs/Push.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xe0000010",
    "RSP": "0xe0000008"
  }
}
%endif

; FEX had a bug where a `push rsp` would generate an Arm64 instruction with undefined behaviour.
; `push rsp` -> `str x8, [x8, #-8]!`
; This instruction has constrained undefined behaviour.
; On Cortex it stores the original value.
; On Apple Silicon it raises a SIGILL.
; It can also store undefined data or have undefined behaviour.
; Test to ensure we don't generate undefined behaviour.
mov rsp, 0xe0000010
push rsp

mov rax, [rsp]

hlt


================================================
FILE: unittests/ASM/FEX_bugs/REX/0F_38.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x424446484a4c4e50",
    "RCX": "0x00000000d2af0486",
    "R8": "0"
  }
}
%endif

mov rax, 0x4142434445464748
mov rcx, 0x4142434445464748

mov r8, 0

lea rbx, [rel .data]
jmp .test
.test:

; adcx rax, [rbx]
; Real encoding: 0x66, 0x48, 0x0f, 0x38, 0xf6, 0x03
; Add a dummy REX prefix that enables everything. Really mess up FEX's cumulative usage.
db 0x4f, 0x66, 0x48, 0x0f, 0x38, 0xf6, 0x03

; crc32 ecx, dword [rbx]
; Real encoding: 0xf2, 0x0f, 0x38, 0xf1, 0x0b
; Add a dummy rex encoding with the widening bit set.
; If FEX parsed this incorrectly, then it converts the crc in to a 64-bit version.
db 0x48, 0xf2, 0x0f, 0x38, 0xf1, 0x0b

hlt

align 16
.data:
dq 0x0102030405060708


================================================
FILE: unittests/ASM/FEX_bugs/REX/0F_3A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000055565758",
    "RCX": "0x5152535455565758",
    "R8": "0"
  }
}
%endif

mov rax, 0x4142434445464748
mov rcx, 0x4142434445464748
mov r8, 0
movups xmm0, [rel .data]
jmp .test
.test:

; pextrd eax, xmm0, 0
; Real encoding: 0x66, 0x0f, 0x3a, 0x16, 0xc0, 0x00
; Add a NOP REX encoding. Would convert `eax` to `rax` if decoded incorrectly.
db 0x4f, 0x66, 0x0f, 0x3a, 0x16, 0xc0, 0x00
; pextrq rcx, xmm0, 0
; Real encoding: 0x66, 0x48, 0x0f, 0x3a, 0x16, 0xc1, 0x00
; Add a NOP REX encoding, should do nothing. Might convert rcx to ecx if only first REX decoded.
db 0x47, 0x66, 0x48, 0x0f, 0x3a, 0x16, 0xc1, 0x00
hlt

align 16
.data:
dq 0x5152535455565758, 0x6162636465666768


================================================
FILE: unittests/ASM/FEX_bugs/REX/DDDNow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0506070801020304",
    "MM0": "0x0506070801020304"
  },
  "HostFeatures": ["3DNOW"]
}
%endif

femms
mov rax, 0x4142434445464748

mov r8, 0

lea rbx, [rel .data]
jmp .test
.test:

; pswapd mm0, [rbx]
; Real encoding: 0x0f, 0x0f, 0x03, 0xbb
; Add a NOP REX encoding between a volatile REX and the 3DNow! instruction.
; FEX accidentally being cumulative will cause rbx to convert to r8.
db 0x41, 0x40, 0x0f, 0x0f, 0x03, 0xbb

movd rax, mm0
hlt

align 16
.data:
dq 0x0102030405060708


================================================
FILE: unittests/ASM/FEX_bugs/REX/Primary.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464e50",
    "RCX": "0x000000004a4c4e50",
    "R8": "0x4142434445464748",
    "R9": "0x4142434445464748"
  }
}
%endif

; FEX-Emu had a bug where REX was not correctly ignored if it was placed at the wrong location.
; "Wrong" means it wasn't encoded just before the opcode byte.
; This can be done for multiple reasons, either padding or anti-emulation.
mov rax, 0x4142434445464748
mov rcx, 0x4142434445464748
mov r8, 0x4142434445464748
mov r9, 0x4142434445464748

lea rbx, [rel .data]
jmp .test
.test:

; add r8w, [rbx]
; Real encoding: 0x66, 0x44, 0x03, 0x03
; Swap operand-size override and REX. Converts r8 to rax, and stays a 16-bit operation.
db 0x44, 0x66, 0x03, 0x03

; add r9, [rbx]
; Real encoding: 0x4c, 0x03, 0x0b
; Add extraneous segment-overide between REX prefix and op, changes r9 to rcx, and 64-bit to 32-bit.
db 0x4c, 0x2e, 0x03, 0x0b
hlt

align 16
.data:
dq 0x0102030405060708


================================================
FILE: unittests/ASM/FEX_bugs/REX/Primary_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x424446484a4c4e50",
    "RCX": "0x4142434445464748",
    "R8": "0x4142434445464748",
    "R9": "0x4142434445464748"
  }
}
%endif

mov rax, 0x4142434445464748
mov rcx, 0x4142434445464748
mov r8, 0x4142434445464748
mov r9, 0x4142434445464748

lea rbx, [rel .data]
jmp .test
.test:

; add rax, [rbx]
; Real encoding: 0x44, 0x03, 0x03
; Add additional false REX as padding that would convert `rax` to `r8`.
; FEX treated REX prefixes as cumulative at one point.
db 0x44, 0x48, 0x03, 0x03

hlt

align 16
.data:
dq 0x0102030405060708


================================================
FILE: unittests/ASM/FEX_bugs/REX/TwoByte.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x828486888a8c8e90",
    "RBX": "0x000000008a8c8e90",
    "RCX": "0x4142434445464748",
    "RDX": "0x0000000045464748",
    "R8": "1",
    "R9": "1"
  }
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748
mov rdx, 0x4142434445464748
mov r8, 1
mov r9, 1
jmp .test
.test:

; xadd rax, rcx
; Real encoding: 0x48, 0x0f, 0xc1, 0xc8
; For cumulative decode errors, add a REX with all bits set. Will convert rax to r8, and rcx to r9.
db 0x4f, 0x48, 0x0f, 0xc1, 0xc8

; xadd ebx, edx
; Real encoding: 0x0f, 0xc1, 0xd3
; Add a nop-prefix pad between the opcode and full REX.
db 0x4f, 0x2e, 0x0f, 0xc1, 0xd3

hlt


================================================
FILE: unittests/ASM/FEX_bugs/RegCacheMMX.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x5152535455565758", "0"]
  }
}
%endif

fninit
; Load all test values
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]
fld tword [rel .test_value]

; Setup for MMX usage
emms

; Load XMM value
movups xmm0, [rel .test_xmm_value]

; Load MMX value
movq mm0, [rel .test_mmx_value]

jmp .test
.test:
; Move MMX register in to XMM
; Should set the upper 64-bits of xmm0 to zero
movq2dq xmm0, mm0

hlt
align 32

.test_value:
dq 0x4142434445464748
dw 0x7fff

.test_mmx_value:
dq 0x5152535455565758

.test_xmm_value:
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/FEX_bugs/SBCSmall.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8000abcd8000fffe",
    "RBX": "0x0000abcc00000001",
    "RCX": "0x0000000000000293"
  }
}
%endif

; FEX had a bug setting carry with 8/16-bit SBB
mov rax, 0x8000abcd80000000
mov rbx, 0x0000abcc00000001

mov rsp, 0xe000_1000

; Start with carry set
stc

jmp .test
.test:

sbb ax, bx

pushfq
pop rcx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/SHRD_OF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000008601"
  }
}
%endif

; FEX had a bug where OF for SHRD wasn't getting calculated correctly.
; OF with SHRD set if the sign bit has changed.
; FEX /previously/ calculated it like regular SHR, where it contained the original MSB.

mov edi, 0x35b292fc
mov ebp, 0x37d434ad
shrd edi, ebp, 1

mov rax, 0

lahf
; Load OF
seto al

; Mask out AF, SHRD leaves it undefined
and rax, 0xEFFF

hlt


================================================
FILE: unittests/ASM/FEX_bugs/SIBScaleTranspose.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152535455565758",
    "RBX": "0x5152535455565758",
    "RCX": "0x5152535455565758",
    "RDX": "0x5152535455565758",
    "RDI": "0x5152535455565758",

    "XMM0": ["0x5152535455565758", "0x0"],
    "XMM1": ["0x5152535455565758", "0x0"],
    "XMM2": ["0x5152535455565758", "0x0"],
    "XMM3": ["0x5152535455565758", "0x0"],
    "XMM4": ["0x5152535455565758", "0x0"],

    "MM0": "0x5152535455565758",
    "MM1": "0x5152535455565758",
    "MM2": "0x5152535455565758",
    "MM3": "0x5152535455565758",
    "MM4": "0x5152535455565758"
  },
  "MemoryRegions": {
    "0x00000000a0000000": "4096",
    "0x0000000110000000": "4096"
  },
  "MemoryData": {
    "0x00000000a0000000": "0x4142434445464748",
    "0x0000000110000000": "0x5152535455565758"
  }
}
%endif

; FEX had a bug in its const-prop pass where x86 SIB scale would accidentally transpose the register that was scaling with the base.
; This test explicitly tests SIB in a way that a transpose would load data from the wrong address.
; Basic layout is [r14 + (r15 * 8)]

; r14 will be the base
mov r14, 0x1000_0000
; r15 will be the index
mov r15, 0x2000_0000

; Correct transpose will be at 0x0000000110000000
; Incorrect transpose will be at 0x00000000a0000000

; Break the block
jmp .test
.test:

; Basic GPR SIB test
mov rax, [r14 + (r15 * 8)]

; Basic Vector SIB test
movq xmm0, [r14 + (r15 * 8)]

; Basic MMX SIB test
movq mm0, [r14 + (r15 * 8)]

; Break the block now
jmp .test2
.test2:

; FEX GPR/XMM LoadMem const prop might only happen with disjoint add + mul so check this
; Need to be able to const-prop the multiply
imul r13, r15, 8

; Test base + offset transposed both ways, for all three types
mov rbx, [r14 + r13]
mov rcx, [r13 + r14]

movq xmm1, [r14 + r13]
movq xmm2, [r13 + r14]

movq mm1, [r14 + r13]
movq mm2, [r13 + r14]

; Break the block now
jmp .test3
.test3:

; FEX GPR/XMM LoadMem const prop might only happen with disjoint add + lshl so check this
; Need to be able to const-prop the lshl
mov r13, r15
shl r13, 3

; Test base + offset transposed both ways, for all three types
mov rdx, [r14 + r13]
mov rdi, [r13 + r14]

movq xmm3, [r14 + r13]
movq xmm4, [r13 + r14]

movq mm3, [r14 + r13]
movq mm4, [r13 + r14]

hlt


================================================
FILE: unittests/ASM/FEX_bugs/SegmentAddressOverride.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSP": "0x6",
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4",
    "R8": "0x5",
    "RBP": "0x0",
    "R9": "0x1",
    "R10": "0x2",
    "R11": "0x3",
    "R12": "0x4",
    "R13": "0x5"
  },
  "MemoryRegions": {
    "0x500000000": "0x100000000"
  },
  "HostFeatures": ["FSGSBASE"]
}
%endif

; FEX had a bug that caused the truncation from the address-size flag to be applied after adding the segment base, even
; though the flag is only supposed to apply to the offset itself.
rdfsbase r14
mov rdx, 0x500000008
mov qword [rdx - 8], 0x0
mov qword [rdx], 0x1
mov qword [rdx + 8], 0x2
mov qword [rdx + 0x10], 0x3
mov qword [rdx + 0x18], 0x4
mov qword [rdx + 0x20], 0x5
wrfsbase rdx
mov rdx, 0x5FFFFFFF8
mov qword [rdx], 0x6

mov r8, 0x500000010
mov r9, 0x10
a32 mov rsp, qword [fs:-16]
a32 mov rax, qword [fs:0]
a32 mov rbx, qword [fs:8]
a32 mov rcx, qword [fs:r8d]
a32 mov rdx, qword [fs:r8d + 8]
a32 mov r8, qword [fs:r8d + r9d]

mov r15, 0x10
mov rbp, qword [fs:-8]
mov r9, qword [fs:0]
mov r10, qword [fs:8]
mov r11, qword [fs:r15]
mov r12, qword [fs:r15 + 8]
mov r13, qword [fs:r15 + r15]

wrfsbase r14
hlt


================================================
FILE: unittests/ASM/FEX_bugs/SelfPop.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "123",
    "RBX": "456",
    "RCX": "123"
  }
}
%endif

; FEX had a bug merging pops to the same register

; Push some stuff
mov rsp, 0xe0000010
mov rax, 123
mov rbx, 456
push rax
push rbx

; Pop into the same register
pop rcx
pop rcx

; rcx now equals rax
hlt


================================================
FILE: unittests/ASM/FEX_bugs/ShiftConstantBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x500000020"
  }
}
%endif

; FEX had a bug in its `TestNZ` opcode where it would try to load a constant in to the tst instruction
; If the constant didn't fit in a logical encoding it would generate invalid instructions and also crash.
; This snippet of code was found in libGLX.so.0.0.0 when trying to load steamwebhelper.
mov     eax, 0x28000001
shl     rax, 0x5

hlt


================================================
FILE: unittests/ASM/FEX_bugs/ShiftPF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x6",
    "RAX": "1"
  }
}
%endif

; FEX had a bug where variable shifts modified PF but RCLSE ignored this,
; causing RCLSE to invalidly propagate earlier PF results.

; First set PF to odd
mov rcx, 0
add rcx, 1

; Now do a variable shift that will set PF to even
mov rbx, 3
mov cl, 1
shl rbx, cl

; Save the PF. This should be 1 = even
setp al

; Trash NZCV. This means we'll optimize to calculate PF but not NZCV, which lets
; more constant prop happen needed to materialize the bug. This instruction is
; otherwise a no-op, but without it we pass by chance.
add rdx, rdx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/ShiftZeroFlagsUpdate.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000082345679",
    "R12": "0x0000000000000202",
    "R13": "0x0000000000000202",
    "R14": "0x0000000000000202",
    "R15": "0x0000000000000202"
  }
}
%endif

; FEX-Emu has a bug where a shift by zero was updating flags.
; x86 shift by zero must not update flags.
mov rsp, 0xe000_1000
mov rax, 0x8234fdb482345679
mov rcx, 0x51525354555657E0
mov rbx, 0
mov rdx, 0
push rbx
popfq

jmp .test
.test:

; Ensure that a 32-bit shift of zero doesn't update flags.
shl eax, cl
pushfq
pop r15

; Set up the next test.
mov rdx, 0
pushfq
jmp .test2
.test2:

sar eax, cl
pushfq
pop r14

; Set up the next test.
mov rdx, 0
pushfq
jmp .test3
.test3:

shl eax, 0xE0
pushfq
pop r13


; Set up the next test.
mov rdx, 0
pushfq
jmp .test4
.test4:

sar eax, 0xE0
pushfq
pop r12

hlt


================================================
FILE: unittests/ASM/FEX_bugs/SmallShiftFlags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8":  "0x246",
    "R9":  "0x246",
    "R10": "0x246",
    "R11": "0x246",
    "R12": "0x246",
    "R13": "0x246",
    "R14": "0x246",
    "R15": "0x246"
  }
}
%endif

; FEX-Emu had a bug where 8-bit and 16-bit shifts with large offsets would calculate flags incorrectly.
mov rsp, 0xe000_1000
mov rax, 0x8234fdb482345679

; Large shift that is larger than the element size but smaller than mask limit of 0x1F
mov rcx, 0x5152535455565714
jmp .test
.test:

; Ensure that 16-bit shift updates flags correctly.
shl ax, cl
pushfq
pop r15
; Clear OF and AF since those are undefined
and r15, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test2
.test2:

sar ax, cl
pushfq
pop r14
; Clear OF and AF since those are undefined
and r14, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test3
.test3:

shl ax, 0x14
pushfq
pop r13
; Clear OF and AF since those are undefined
and r13, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test4
.test4:

sar ax, 0x14
pushfq
pop r12
; Clear OF and AF since those are undefined
and r12, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test5
.test5:

; Ensure that 8-bit shift updates flags correctly.
shl al, cl
pushfq
pop r11
; Clear OF and AF since those are undefined
and r11, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test6
.test6:

sar al, cl
pushfq
pop r10
; Clear OF and AF since those are undefined
and r10, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test7
.test7:
shl al, 0x14
pushfq
pop r9
; Clear OF and AF since those are undefined
and r9, ~((1 << 11) | (1 << 4))

; Set up the next test.
mov rax, 0x8234fdb482345679
jmp .test8
.test8:

sar al, 0x14
pushfq
pop r8
; Clear OF and AF since those are undefined
and r8, ~((1 << 11) | (1 << 4))

hlt


================================================
FILE: unittests/ASM/FEX_bugs/Test_CmpSelect_Merge.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000aaaaafaaa"
  }
}
%endif

%macro intcompare 3
  ; instruction, value1, value2
  mov rcx, %2
  mov rdx, %3
  cmp rcx, rdx
  %1 bl
  shl rax, 1
  or rax, rbx
%endmacro

; This test specifically tests the Select and compare merging that occurs in OpcodeDispatcher
; The easiest way to test this is to do the comparison op and then SETcc with the flags that we want to ensure is working

; RAX will be our result
mov rax, 0
; RBX will be our temp for setcc
mov rbx, 0

; Test integer ops
; RCX and RDX for comparison values
mov rcx, 0
mov rdx, 0

; Test EQ - true
intcompare sete, 0, 0

; Test EQ - false
intcompare sete, 0, 1

; Test NEQ - true
intcompare setne, 0, 1

; Test NEQ - false
intcompare setne, 0, 0

; Test SGE - true
intcompare setge, 0, 0

; Test SGE - false
intcompare setge, 0, 1

; Test SGE with sign difference - true
intcompare setge, 1, -1

; Test SGE with sign difference - false
intcompare setge, -1, 1

; Test SLT - true
intcompare setl, 0, 1

; Test SLT - false
intcompare setl, 0, 0

; Test SLT with sign difference - true
intcompare setl, -1, 1

; Test SLT with sign difference - false
intcompare setl, 1, -1

; Test SGT - true
intcompare setg, 1, 0

; Test SGT - false
intcompare setg, 0, 0

; Test SGT with sign difference - true
intcompare setg, 1, -1

; Test SGT with sign difference - false
intcompare setg, -1, 1

; Test SLE - true
intcompare setle, 0, 0

; Test SLE - false
intcompare setle, 1, 0

; Test SLE with sign difference - true
intcompare setle, -1, 1

; Test SLE with sign difference - false
intcompare setle, 1, -1

; Test UGE - true
intcompare setae, 0, 0

; Test UGE - false
intcompare setae, 1, 0

; Test UGE with *sign* difference - true
intcompare setae, -1, 1

; Test UGE with *sign* difference - false
intcompare setb, 1, -1

; Test ULT - true
intcompare setb, 0, 1

; Test ULT - false
intcompare setb, 1, 0

; Test ULT with *sign* difference - true
intcompare setb, 1, -1

; Test ULT with *sign* difference - false
intcompare setb, -1, 1

; Test UGT - true
intcompare seta, 1, 0

; Test UGT - false
intcompare seta, 0, 1

; Test UGT with *sign* difference - true
intcompare seta, -1, 1

; Test UGT with *sign* difference - false
intcompare seta, 1, -1

; Test ULE - true
intcompare setbe, 0, 0

; Test ULE - false
intcompare setbe, 1, 0

; Test ULE with *sign* difference - true
intcompare setbe, 1, -1

; Test ULE with *sign* difference - false
intcompare setbe, -1, 1

hlt


================================================
FILE: unittests/ASM/FEX_bugs/Test_CmpSelect_Merge_Float.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000cbe0708"
  }
}
%endif

%macro floatcompare 3
  ; instruction, value1, value2
  movsd xmm0, %2
  movsd xmm1, %3
  ucomisd xmm0, xmm1

  %1 bl
  shl rax, 1
  or rax, rbx
%endmacro

; This test specifically tests the Select and compare merging that occurs in OpcodeDispatcher
; The easiest way to test this is to do the comparison op and then SETcc with the flags that we want to ensure is working

; RAX will be our result
mov rax, 0
; RBX will be our temp for setcc
mov rbx, 0

; Float comparisons
; xmm0 and xmm1 will be our comparison values

; FLU (CF == 1), SETNE
; FLEU (CF == 1 || ZF == 1), SETBE
; FU (PF == 1), SETP
; FNU (PF == 0), SETNP
; FGE (CF == 0), SETAE
; FGT (CF == 0 && ZF == 0), SETA

; Test FLU - true
floatcompare setne, [rel .float_0], [rel .float_1]

; Test FLU - false
floatcompare setne, [rel .float_0], [rel .float_1]

; Test FLU (unordered) - true
floatcompare setne, [rel .float_0], [rel .float_qnan]

; Test FLU (unordered) - true
floatcompare setne, [rel .float_qnan], [rel .float_1]

; Test FLEU - true
floatcompare setbe, [rel .float_0], [rel .float_0]

; Test FLEU - false
floatcompare setbe, [rel .float_1], [rel .float_0]

; Test FLEU (unordered) - true
floatcompare setbe, [rel .float_0], [rel .float_qnan]

; Test FLEU (unordered) - true
floatcompare setbe, [rel .float_qnan], [rel .float_1]

; Test FU - true
floatcompare setp, [rel .float_0], [rel .float_qnan]

; Test FU - true
floatcompare setp, [rel .float_qnan], [rel .float_0]

; Test FU - true
floatcompare setp, [rel .float_qnan], [rel .float_qnan]

; Test FU - false
floatcompare setp, [rel .float_1], [rel .float_0]

; Test FU - false
floatcompare setp, [rel .float_0], [rel .float_1]

; Test FU - false
floatcompare setp, [rel .float_0], [rel .float_0]

; Test FNU - false
floatcompare setnp, [rel .float_0], [rel .float_qnan]

; Test FNU - false
floatcompare setnp, [rel .float_qnan], [rel .float_0]

; Test FNU - false
floatcompare setnp, [rel .float_qnan], [rel .float_qnan]

; Test FNU - true
floatcompare setnp, [rel .float_1], [rel .float_0]

; Test FNU - true
floatcompare setnp, [rel .float_0], [rel .float_1]

; Test FNU - true
floatcompare setnp, [rel .float_0], [rel .float_0]

; Test FGE - true
floatcompare seta, [rel .float_0], [rel .float_0]

; Test FGE - false
floatcompare seta, [rel .float_0], [rel .float_1]

; Test FGE (unordered) - false
floatcompare seta, [rel .float_0], [rel .float_qnan]

; Test FGE (unordered) - false
floatcompare seta, [rel .float_qnan], [rel .float_1]

; Test FGT - true
floatcompare seta, [rel .float_1], [rel .float_0]

; Test FGT - false
floatcompare seta, [rel .float_0], [rel .float_1]

; Test FGT (unordered) - false
floatcompare seta, [rel .float_0], [rel .float_qnan]

; Test FGT (unordered) - false
floatcompare seta, [rel .float_qnan], [rel .float_1]

hlt

align 8
.float_1:
dq 1.0
.float_0:
dq 0.0
.float_qnan:
dq 0x7ff8000000000000


================================================
FILE: unittests/ASM/FEX_bugs/Test_CmpSelect_Merge_Float_branch.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000cbe0708"
  }
}
%endif

%macro floatcompare 3
  ; instruction, value1, value2
  movsd xmm0, %2
  movsd xmm1, %3

  shl rax, 1
  ucomisd xmm0, xmm1

  ; Conditional branch
  %1 %%true

  %%fallthrough:
    ; False fallthrough path
    mov rbx, 0
    jmp %%combine

  %%true:
    ; True path
    mov rbx, 1

  %%combine:
    ; Combine
    or rax, rbx
%endmacro

; This test specifically tests the Select and compare merging that occurs in OpcodeDispatcher
; The easiest way to test this is to do the comparison op and then SETcc with the flags that we want to ensure is working

; RAX will be our result
mov rax, 0
; RBX will be our temp for setcc
mov rbx, 0

; Float comparisons
; xmm0 and xmm1 will be our comparison values

; FLU (CF == 1), SETNE
; FLEU (CF == 1 || ZF == 1), SETBE
; FU (PF == 1), SETP
; FNU (PF == 0), SETNP
; FGE (CF == 0), SETAE
; FGT (CF == 0 && ZF == 0), SETA

; Test FLU - true
floatcompare jne, [rel .float_0], [rel .float_1]

; Test FLU - false
floatcompare jne, [rel .float_0], [rel .float_1]

; Test FLU (unordered) - true
floatcompare jne, [rel .float_0], [rel .float_qnan]

; Test FLU (unordered) - true
floatcompare jne, [rel .float_qnan], [rel .float_1]

; Test FLEU - true
floatcompare jbe, [rel .float_0], [rel .float_0]

; Test FLEU - false
floatcompare jbe, [rel .float_1], [rel .float_0]

; Test FLEU (unordered) - true
floatcompare jbe, [rel .float_0], [rel .float_qnan]

; Test FLEU (unordered) - true
floatcompare jbe, [rel .float_qnan], [rel .float_1]

; Test FU - true
floatcompare jp, [rel .float_0], [rel .float_qnan]

; Test FU - true
floatcompare jp, [rel .float_qnan], [rel .float_0]

; Test FU - true
floatcompare jp, [rel .float_qnan], [rel .float_qnan]

; Test FU - false
floatcompare jp, [rel .float_1], [rel .float_0]

; Test FU - false
floatcompare jp, [rel .float_0], [rel .float_1]

; Test FU - false
floatcompare jp, [rel .float_0], [rel .float_0]

; Test FNU - false
floatcompare jnp, [rel .float_0], [rel .float_qnan]

; Test FNU - false
floatcompare jnp, [rel .float_qnan], [rel .float_0]

; Test FNU - false
floatcompare jnp, [rel .float_qnan], [rel .float_qnan]

; Test FNU - true
floatcompare jnp, [rel .float_1], [rel .float_0]

; Test FNU - true
floatcompare jnp, [rel .float_0], [rel .float_1]

; Test FNU - true
floatcompare jnp, [rel .float_0], [rel .float_0]

; Test FGE - true
floatcompare ja, [rel .float_0], [rel .float_0]

; Test FGE - false
floatcompare ja, [rel .float_0], [rel .float_1]

; Test FGE (unordered) - false
floatcompare ja, [rel .float_0], [rel .float_qnan]

; Test FGE (unordered) - false
floatcompare ja, [rel .float_qnan], [rel .float_1]

; Test FGT - true
floatcompare ja, [rel .float_1], [rel .float_0]

; Test FGT - false
floatcompare ja, [rel .float_0], [rel .float_1]

; Test FGT (unordered) - false
floatcompare ja, [rel .float_0], [rel .float_qnan]

; Test FGT (unordered) - false
floatcompare ja, [rel .float_qnan], [rel .float_1]

hlt

align 8
.float_1:
dq 1.0
.float_0:
dq 0.0
.float_qnan:
dq 0x7ff8000000000000


================================================
FILE: unittests/ASM/FEX_bugs/Test_CmpSelect_Merge_branch.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000aaaaafaaa"
  }
}
%endif

%macro intcompare 3
  ; instruction, value1, value2
  mov rcx, %2
  mov rdx, %3
  shl rax, 1
  cmp rcx, rdx

  ; Conditional branch
  %1 %%true

  %%fallthrough:
    ; False fallthrough path
    mov rbx, 0
    jmp %%combine

  %%true:
    ; True path
    mov rbx, 1

  %%combine:
    ; Combine
    or rax, rbx
%endmacro

; This test specifically tests the Select and compare merging that occurs in OpcodeDispatcher
; The easiest way to test this is to do the comparison op and then SETcc with the flags that we want to ensure is working

; RAX will be our result
mov rax, 0
; RBX will be our temp for setcc
mov rbx, 0

; Test integer ops
; RCX and RDX for comparison values
mov rcx, 0
mov rdx, 0

; Test EQ - true
intcompare je, 0, 0

; Test EQ - false
intcompare je, 0, 1

; Test NEQ - true
intcompare jne, 0, 1

; Test NEQ - false
intcompare jne, 0, 0

; Test SGE - true
intcompare jge, 0, 0

; Test SGE - false
intcompare jge, 0, 1

; Test SGE with sign difference - true
intcompare jge, 1, -1

; Test SGE with sign difference - false
intcompare jge, -1, 1

; Test SLT - true
intcompare jl, 0, 1

; Test SLT - false
intcompare jl, 0, 0

; Test SLT with sign difference - true
intcompare jl, -1, 1

; Test SLT with sign difference - false
intcompare jl, 1, -1

; Test SGT - true
intcompare jg, 1, 0

; Test SGT - false
intcompare jg, 0, 0

; Test SGT with sign difference - true
intcompare jg, 1, -1

; Test SGT with sign difference - false
intcompare jg, -1, 1

; Test SLE - true
intcompare jle, 0, 0

; Test SLE - false
intcompare jle, 1, 0

; Test SLE with sign difference - true
intcompare jle, -1, 1

; Test SLE with sign difference - false
intcompare jle, 1, -1

; Test UGE - true
intcompare jae, 0, 0

; Test UGE - false
intcompare jae, 1, 0

; Test UGE with *sign* difference - true
intcompare jae, -1, 1

; Test UGE with *sign* difference - false
intcompare jb, 1, -1

; Test ULT - true
intcompare jb, 0, 1

; Test ULT - false
intcompare jb, 1, 0

; Test ULT with *sign* difference - true
intcompare jb, 1, -1

; Test ULT with *sign* difference - false
intcompare jb, -1, 1

; Test UGT - true
intcompare ja, 1, 0

; Test UGT - false
intcompare ja, 0, 1

; Test UGT with *sign* difference - true
intcompare ja, -1, 1

; Test UGT with *sign* difference - false
intcompare ja, 1, -1

; Test ULE - true
intcompare jbe, 0, 0

; Test ULE - false
intcompare jbe, 1, 0

; Test ULE with *sign* difference - true
intcompare jbe, 1, -1

; Test ULE with *sign* difference - false
intcompare jbe, -1, 1

hlt


================================================
FILE: unittests/ASM/FEX_bugs/Test_JP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000cafecafe"
  }
}
%endif

; This test checks for proper behaviour of the parity flag. Older FEX versions
; would accidentally turn JP into a zero/nonzero check of the result.

; rax = 0x20, odd parity
mov rax, 0x10
mov rbx, 0x10
add rax, rbx
jpe fexi_fexi_im_so_broken

; rax = 0x32, odd parity
mov rax, 0x10
mov rbx, 0x22
xor rax, rbx
jpe fexi_fexi_im_so_broken

; rax = 0x41, even parity
mov rax, 0x40
mov rbx, 0x01
or rax, rbx
jpo fexi_fexi_im_so_broken

; rax = 0x43, even parity
mov rax, 0x43
mov rbx, 0xfe
and rax, rbx
jpo fexi_fexi_im_so_broken

; success code
mov rax, 0xcafecafe
hlt

; failure, rax != 0xcafecafe
fexi_fexi_im_so_broken:
hlt


================================================
FILE: unittests/ASM/FEX_bugs/Test_PF_Zero_Shift.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000cafecafe"
  }
}
%endif

; This test checks that PF is not modified with zero shifts

; First, some smoke tests: nonzero shifts should set PF as expected
; 0b111_1111_1000 has odd parity
mov eax, 0xff
mov cl, 0x3
shl eax, cl
jpe fexi_fexi_im_so_broken

; 0b11_1111_1100 has even parity
mov eax, 0xff
mov cl, 0x2
shl eax, cl
jpo fexi_fexi_im_so_broken

; At this point, parity is even
; So now test that PF is preserved across zero shifts, regardless of output parity.

mov cl, 0
mov eax, 0x0f
shl eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x0e
shl eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x1f
shr eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x1e
shr eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x2f
sal eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x2e
sal eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x3f
sar eax, cl
jpo fexi_fexi_im_so_broken

mov eax, 0x3e
sar eax, cl
jpo fexi_fexi_im_so_broken

; success code
mov rax, 0xcafecafe
hlt

; failure, rax != 0xcafecafe
fexi_fexi_im_so_broken:
hlt


================================================
FILE: unittests/ASM/FEX_bugs/TrickyRA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  }
}
%endif

; This test is reduced from a game that hit a register allocation bug. The test
; has a high register pressure across a `rep movsb` (_Memcpy), and since _Memcpy
; is modelled as writing a GPRPair, this induces live range splitting at the
; time of writing. FEX had a bug where live range splitting was unsound in
; certain circumstances.

mov rsp, 0xe000_1000
lea rbp, [rel .data_mid]
lea rdi, [rel .data_dst]
lea rsi, [rel .data_src]
mov rcx, 11

; Store where it is expected
mov [rbp-0x9e8], rcx
mov rcx, rdi

jmp .test
.test:
mov     rax, qword [rbp-0x9f0]
mov     rdx, qword [rbp-0x9e8]
mov     rdi, rcx
mov     rcx, rdx
popfq
rep movsb ; Uses RDI and RSI
pushfq
mov     qword [rbp-0x9f0], rax
mov     qword [rbp-0x9e8], rdx

hlt

align 4096
.data:
times 4096 db 0
.data_mid:
times 4096 db 0

.data_dst:
times 16 db 0

.data_src:
times 16 db 0


================================================
FILE: unittests/ASM/FEX_bugs/UnalignedLoadStoreSIGBUS.asm
================================================
%ifdef CONFIG
{
  "Env": {
    "FEX_TSOENABLED": "1",
    "FEX_TSOAUTOMIGATRION": "0"
  }
}
%endif

; FEX-Emu had a bug where SIGBUS handling of unaligned loadstores using FEAT_LRCPC would accidentally try using the FEAT_LSE atomic memory operation
; handlers. It wouldn't find the handler for FEAT_LRCPC instructions (because it was only supposed to handle FEAT_LSE instructions) and fault out.
; This happens because FEAT_LRCPC and FEAT_LSE instructions partially share an instruction encoding and FEX forgot to check for FEAT_LRCPC first
; before using the FEAT_LSE handler.
mov r15, 0xe000_0000

; Atomic unaligned load across 16-byte and 64-byte granule
mov rax, qword [r15 + 15]
mov rbx, qword [r15 + 63]

; Atomic unaligned store across 16-byte and 64-byte granule
mov qword [r15 + 15], rbx
mov qword [r15 + 63], rcx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/VectorLoadCrash.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM5":  ["0x0000000000000048", "0x0000000000000047"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif


; FEX-Emu had a bug where a vector load that was using SIB addressing would overflow to larger than what ARM could encode.
; Test that here.
; Original bug came from the Darwinia Linux binary from function `HUF_readDTableX1_wksp`

mov rbx, 0
lea r15, [rel .data - 0x3d4]

; Break the block
jmp .test
.test:

pmovzxbq xmm5, word [rbx+r15+0x3d4]

hlt

.data:
dq 0x4142434445464748, 0x5152535455565758


================================================
FILE: unittests/ASM/FEX_bugs/VectorShift_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4054c664c2f837b5",
    "MM1": "0x40516053e2d6238e",
    "MM2": "0x4044836d86ec17ec",
    "MM3": "0x402a1e1c58255b03",
    "MM4": "0x401568e0c9d9d346",
    "MM5": "0x4035fe425aee6320",
    "MM6": "0x402359003eea209b",
    "MM7": "0x40154b7d41743e96",
    "XMM0":  ["0x4054c664c2f837b5", "0x40516053e2d6238e"],
    "XMM1":  ["0x4044836d86ec17ec", "0x402a1e1c58255b03"],
    "XMM2":  ["0x401568e0c9d9d346", "0x4035fe425aee6320"],
    "XMM3":  ["0x402359003eea209b", "0x40154b7d41743e96"],
    "XMM4":  ["0x403d075a31a4bdba", "0x4050a018bd66277c"],
    "XMM5":  ["0x40334ec17ebaf102", "0x4056d7404ea4a8c1"],
    "XMM6":  ["0x404439b5c7cd898b", "0x40497b136a400fbb"],
    "XMM7":  ["0x4040528bc169c23b", "0x4037f9ca18bd6627"],
    "XMM8":  ["0x4056a929888f861a", "0x403839b866e43aa8"],
    "XMM9":  ["0x4058bc1f212d7732", "0x4056cde5c91d14e4"]
  }
}
%endif

; FEX had a bug where immediate encoded shifts by zero would generate bad code on AArch64.

lea rdx, [rel .data]
movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]
movq mm4, [rdx + 8 * 4]
movq mm5, [rdx + 8 * 5]
movq mm6, [rdx + 8 * 6]
movq mm7, [rdx + 8 * 7]

movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]

; Test MMX first
psllw mm0, 0
pslld mm1, 0
psllq mm2, 0
psraw mm3, 0
psrad mm4, 0
psrlw mm5, 0
psrld mm6, 0
psrlq mm7, 0

; Now test XMM
psllw xmm0, 0
pslld xmm1, 0
psllq xmm2, 0
pslldq xmm3, 0
psraw xmm4, 0
psrad xmm5, 0
psrlw xmm6, 0
psrld xmm7, 0
psrlq xmm8, 0
pslldq xmm9, 0

hlt
align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/FEX_bugs/VectorShift_zero_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4047dcfb00bcbe62", "0x40382c8de2ac3223", "0x4040da5269595fee", "0x40582da24894c448"],
    "XMM1":  ["0x404c46843808850a", "0x4051ce8f32378ab1", "0x404eec764adff823", "0x40562f8c7e28240b"],
    "XMM2":  ["0x404a7427525460aa", "0x4013860029f16b12", "0x405221d82fd75e20", "0x4008292a30553261"],
    "XMM3":  ["0x402ed06f69446738", "0x404cc57c6fbd273d", "0x402338eb463497b7", "0x404bc581adea8976"],
    "XMM4":  ["0x40536d2fec56d5d0", "0x403436e2435696e6", "0x40239c779a6b50b1", "0x4044a59e30014f8b"],
    "XMM5":  ["0x40560c58793dd97f", "0x404295b6c3760bf6", "0x4048c3549f94855e", "0x40248b61bb05faec"],
    "XMM6":  ["0x405811ea0ba1f4b2", "0x401a9443d46b26c0", "0x403996f73c0c1fc9", "0x4057c071b4784231"],
    "XMM7":  ["0x4047ec6b7aa25d8d", "0x4055031782d38477", "0x405681e5c91d14e4", "0x4050740cf1800a7c"],
    "XMM10": ["0x40560c58793dd97f", "0x404295b6c3760bf6", "0x4048c3549f94855e", "0x40248b61bb05faec"],
    "XMM11": ["0x40536d2fec56d5d0", "0x403436e2435696e6", "0x40239c779a6b50b1", "0x4044a59e30014f8b"],
    "XMM12": ["0x402ed06f69446738", "0x404cc57c6fbd273d", "0x402338eb463497b7", "0x404bc581adea8976"],
    "XMM13": ["0x404a7427525460aa", "0x4013860029f16b12", "0x405221d82fd75e20", "0x4008292a30553261"],
    "XMM14": ["0x404c46843808850a", "0x4051ce8f32378ab1", "0x404eec764adff823", "0x40562f8c7e28240b"],
    "XMM15": ["0x4047dcfb00bcbe62", "0x40382c8de2ac3223", "0x4040da5269595fee", "0x40582da24894c448"]
  }
}
%endif

; FEX had a bug where immediate encoded shifts by zero would generate bad code on AArch64.

lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 5]
vmovapd ymm6, [rdx + 32 * 6]
vmovapd ymm7, [rdx + 32 * 7]
vmovapd ymm8, [rdx + 32 * 8]
vmovapd ymm9, [rdx + 32 * 9]
vmovapd ymm10, [rdx + 32 * 10]
vmovapd ymm11, [rdx + 32 * 11]
vmovapd ymm12, [rdx + 32 * 12]
vmovapd ymm13, [rdx + 32 * 13]
vmovapd ymm14, [rdx + 32 * 14]
vmovapd ymm15, [rdx + 32 * 15]

vpsllw ymm0, ymm15, 0
vpslld ymm1, ymm14, 0
vpsllq ymm2, ymm13, 0
vpslldq ymm3, ymm12, 0
vpsraw ymm4, ymm11, 0
vpsrad ymm5, ymm10, 0
vpsrlw ymm6, ymm9, 0
vpsrld ymm7, ymm8, 0
vpsrlq ymm8, ymm7, 0
vpslldq ymm9, ymm6, 0

hlt
align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/FEX_bugs/VectorShift_zero_avx_128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4047dcfb00bcbe62", "0x40382c8de2ac3223", "0", "0"],
    "XMM1":  ["0x404c46843808850a", "0x4051ce8f32378ab1", "0", "0"],
    "XMM2":  ["0x404a7427525460aa", "0x4013860029f16b12", "0", "0"],
    "XMM3":  ["0x402ed06f69446738", "0x404cc57c6fbd273d", "0", "0"],
    "XMM4":  ["0x40536d2fec56d5d0", "0x403436e2435696e6", "0", "0"],
    "XMM5":  ["0x40560c58793dd97f", "0x404295b6c3760bf6", "0", "0"],
    "XMM6":  ["0x405811ea0ba1f4b2", "0x401a9443d46b26c0", "0", "0"],
    "XMM7":  ["0x4047ec6b7aa25d8d", "0x4055031782d38477", "0", "0"],
    "XMM10": ["0x40560c58793dd97f", "0x404295b6c3760bf6", "0x4048c3549f94855e", "0x40248b61bb05faec"],
    "XMM11": ["0x40536d2fec56d5d0", "0x403436e2435696e6", "0x40239c779a6b50b1", "0x4044a59e30014f8b"],
    "XMM12": ["0x402ed06f69446738", "0x404cc57c6fbd273d", "0x402338eb463497b7", "0x404bc581adea8976"],
    "XMM13": ["0x404a7427525460aa", "0x4013860029f16b12", "0x405221d82fd75e20", "0x4008292a30553261"],
    "XMM14": ["0x404c46843808850a", "0x4051ce8f32378ab1", "0x404eec764adff823", "0x40562f8c7e28240b"],
    "XMM15": ["0x4047dcfb00bcbe62", "0x40382c8de2ac3223", "0x4040da5269595fee", "0x40582da24894c448"]
  }
}
%endif

; FEX had a bug where immediate encoded shifts by zero would generate bad code on AArch64.

lea rdx, [rel .data]
vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 5]
vmovapd ymm6, [rdx + 32 * 6]
vmovapd ymm7, [rdx + 32 * 7]
vmovapd ymm8, [rdx + 32 * 8]
vmovapd ymm9, [rdx + 32 * 9]
vmovapd ymm10, [rdx + 32 * 10]
vmovapd ymm11, [rdx + 32 * 11]
vmovapd ymm12, [rdx + 32 * 12]
vmovapd ymm13, [rdx + 32 * 13]
vmovapd ymm14, [rdx + 32 * 14]
vmovapd ymm15, [rdx + 32 * 15]

vpsllw xmm0, xmm15, 0
vpslld xmm1, xmm14, 0
vpsllq xmm2, xmm13, 0
vpslldq xmm3, xmm12, 0
vpsraw xmm4, xmm11, 0
vpsrad xmm5, xmm10, 0
vpsrlw xmm6, xmm9, 0
vpsrld xmm7, xmm8, 0
vpsrlq xmm8, xmm7, 0
vpslldq xmm9, xmm6, 0

hlt
align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/FEX_bugs/X87MMXNZCV.asm
================================================
%ifdef CONFIG
{
  "RegData": {},
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; FEX had a bug where a mmx->x87 switch would flush the saved NZCV value used for ftst, causing a crash in RA

movq mm0, mm1 ; enters mmx state
ftst ; enters x87 state

hlt


================================================
FILE: unittests/ASM/FEX_bugs/XeSS_quadratic.asm
================================================
%ifdef CONFIG
{
  "MemoryRegions": {
    "0x200000000": "0x20000"
  }
}
%endif

; FEX has had various bugs throughout the years leading to accidental
; superlinear time, for example with constant pooling and register allocation.
; This test mimics the massive block found in XeSS. If this test has
; excessive runtime, something in broken in FEXCore.

mov rsp, 0x200000000
%assign i 0
%rep 0x10000
mov byte [rsp + (0x10000 + i)], (0x01 + (i << 2)) & 0xFF
%assign i i+1
%endrep

hlt


================================================
FILE: unittests/ASM/FEX_bugs/adcx_size.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000007ee544ac",
    "RBX": "0x0fb22768a2cf00bb",
    "RCX": "0x00000000e19be77f",
    "RDX": "0x06726399b9f09d2f",
    "RSI": "0xe544b42838dd404d",
    "RDI": "0x6d78590ca1418bd1",
    "RSP": "0x20bfe50ddcfce881",
    "RBP": "0x56c870e2dcbf6522"
  },
  "HostFeatures": ["ADX"]
}
%endif

mov rax, 0x6B11A609DC1643F1
mov rbx, 0x0FB22768A2CF00BB
mov rcx, 0x48E1BB8327AB4A4F
mov rdx, 0x06726399B9F09D2F
mov rsi, 0x77CC5B1B979BB47C
mov rdi, 0x6D78590CA1418BD1
mov rsp, 0xC9F7742B003D835E
mov rbp, 0x56C870E2DCBF6522

; 32-bit clc
clc
adcx eax, ebx

; 32-bit stc
stc
adcx ecx, edx

; 64-bit clc
clc
adcx rsi, rdi

; 64-bit stc
stc
adcx rsp, rbp

hlt


================================================
FILE: unittests/ASM/FEX_bugs/add_sub_carry.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xaeadacac9a9a41e5",
    "RBX": "0x6162636520238df8"
  }
}
%endif

; FEX had a bug with smaller than 32-bit operations corrupting sbb and adc results.
; A small test that tests both sbb and adc to ensure it returns data correctly.
; This was noticed in Final Fantasy 7 (steamid 39140) having broken rendering on the title screen.
mov rax, 0x4142434445464748
mov rbx, 0x5152535455565758
mov rcx, 0x6162636465666768

clc
sbb al, bl
sbb ax, bx
sbb eax, ebx
sbb rax, rbx

clc
adc bl, cl
adc bx, cx
adc ebx, ecx
adc rbx, rcx

hlt


================================================
FILE: unittests/ASM/FEX_bugs/add_sub_carry_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xedededee26260e6c",
    "RBX": "0x121212129498c16d"
  }
}
%endif

; FEX had a bug with smaller than 32-bit operations corrupting sbb and adc results.
; A small test that tests both sbb and adc to ensure it returns data correctly.
; This was noticed in Final Fantasy 7 (steamid 39140) having broken rendering on the title screen.
mov rax, 0x4142434445464748
mov rbx, 0x5152535455565758
mov rcx, 0x6162636465666768

clc
sbb al, bl
sbb ax, bx
sbb eax, ebx
sbb rax, rbx

%assign i 0
%rep 256
sbb al, [rel .data1 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
sbb ax, [rel .data2 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
sbb eax, [rel .data4 + i]
%assign i i+1
%endrep


%assign i 0
%rep 256
sbb rax, [rel .data8 + i]
%assign i i+1
%endrep

stc
%assign i 0
%rep 256
sbb al, [rel .data1 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
sbb ax, [rel .data2 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
sbb eax, [rel .data4 + i]
%assign i i+1
%endrep


%assign i 0
%rep 256
sbb rax, [rel .data8 + i]
%assign i i+1
%endrep


clc
adc bl, cl
adc bx, cx
adc ebx, ecx
adc rbx, rcx


%assign i 0
%rep 256
adc bl, [rel .data1 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
adc bx, [rel .data2 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
adc ebx, [rel .data4 + i]
%assign i i+1
%endrep


%assign i 0
%rep 256
adc rbx, [rel .data8 + i]
%assign i i+1
%endrep


stc
%assign i 0
%rep 256
adc bl, [rel .data1 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
adc bx, [rel .data2 + i]
%assign i i+1
%endrep

%assign i 0
%rep 256
adc ebx, [rel .data4 + i]
%assign i i+1
%endrep


%assign i 0
%rep 256
adc rbx, [rel .data8 + i]
%assign i i+1
%endrep


hlt

.data1:
%assign i 0
%rep 256
db i
%assign i i+1
%endrep

.data2:
%assign i 0
%rep 256
dw i
%assign i i+1
%endrep

.data4:
%assign i 0
%rep 256
dd i
%assign i i+1
%endrep

.data8:
%assign i 0
%rep 256
dq i
%assign i i+1
%endrep


================================================
FILE: unittests/ASM/FEX_bugs/cmpxchg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344fbca7654",
    "RBX": "0x00000000fbca7654",
    "RCX": "0x61626364fbca7654"
  }
}
%endif

; FEX-Emu had a but where it was failing to follow zero-extend semantics on
; the destination register when cmpxchg was a success as a 32-bit operation.
; A simple test that does a 32-bit compare exchange with success as 32-bit.
mov rax, [rel .data + (8 * 0)]
mov rbx, [rel .data + (8 * 0)]
mov rcx, [rel .data + (8 * 1)]

cmpxchg ebx, ecx
hlt

.data:
dq 0x41424344_fbca7654
dq 0x61626364_fbca7654


================================================
FILE: unittests/ASM/FEX_bugs/fnsave_fnrstor_size.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x4142434445464748",
    "RCX": "0x5152535455565758"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; FEX-Emu implements an optimization for fnsave and frstor of overlapping 16-byte read and writes for the x87 registers.
; This test ensures that these instructions don't exceed the storage limits imposed by the instruction details.
; Ensuring that changes like from https://github.com/FEX-Emu/FEX/pull/4107 would get picked up by unit tests.

; Calculate address to the end of the memory region.
mov rax, 0x1_0000_0000 + 4096

; Save at the end of the page to ensure it doesn't fault.
fnsave [rax - 108]

; Do an frstor at the end of the page to ensure it doesn't fault.
frstor [rax - 108]

; Save at the end of the page to ensure it doesn't fault.
o16 fnsave [rax - 94]

; Do an frstor at the end of the page to ensure it doesn't fault.
o16 frstor [rax - 94]

; Store data at the end.
mov rbx, 0x4142434445464748
mov [rax - 8], rbx

; Save just before the end of the data we stored.
; Ensures we don't accidentally overwrite data.
fnsave [rax - 116]

; Load back the register to ensure it still contains the correct data
mov rbx, [rax - 8]

; Store data at the end.
mov rcx, 0x5152535455565758
mov [rax - 8], rcx

; Save just before the end of the data we stored.
; Ensures we don't accidentally overwrite data.
o16 fnsave [rax - 102]

; Load back the register to ensure it still contains the correct data
mov rcx, [rax - 8]

hlt


================================================
FILE: unittests/ASM/FEX_bugs/fxrstor_bug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x782366128a6789f2", "0xf881"],
    "XMM1": ["0xa56724426b4c72f1", "0xd415"],
    "XMM2": ["0xb76472a37404b890", "0x10ad"],
    "XMM3": ["0xb9533de8ad0967d2", "0xb615"],
    "XMM4": ["0x30ae762c30b556de", "0x9af3"],
    "XMM5": ["0xe86b2b5774313a97", "0x1f6d"],
    "XMM6": ["0x48510f254d2fa47f", "0x4886"],
    "XMM7": ["0", "0"]
  }
}
%endif

; FEX had a bug where fxrstor wasn't restoring x87 registers in the correct order.
; This test also relies on fxsave saving in the correct order.

; Init x87
fninit

; Load registers with zero
times 8 fldz

; Empty them
times 8 ffreep

; Load seven of them with random data
fld tword [rel .random_data + (0 * 10)]
fld tword [rel .random_data + (1 * 10)]
fld tword [rel .random_data + (2 * 10)]
fld tword [rel .random_data + (3 * 10)]
fld tword [rel .random_data + (4 * 10)]
fld tword [rel .random_data + (5 * 10)]
fld tword [rel .random_data + (6 * 10)]

; Save the data
fxsave [rel .save_data]

; Load the x87 register data in to vectors for testing.
movups xmm0, [rel .save_data + 0x20 + (0 * 16)]
movups xmm1, [rel .save_data + 0x20 + (1 * 16)]
movups xmm2, [rel .save_data + 0x20 + (2 * 16)]
movups xmm3, [rel .save_data + 0x20 + (3 * 16)]
movups xmm4, [rel .save_data + 0x20 + (4 * 16)]
movups xmm5, [rel .save_data + 0x20 + (5 * 16)]
movups xmm6, [rel .save_data + 0x20 + (6 * 16)]
movups xmm7, [rel .save_data + 0x20 + (7 * 16)]
pand xmm0, [rel .x87_mask]
pand xmm1, [rel .x87_mask]
pand xmm2, [rel .x87_mask]
pand xmm3, [rel .x87_mask]
pand xmm4, [rel .x87_mask]
pand xmm5, [rel .x87_mask]
pand xmm6, [rel .x87_mask]
pand xmm7, [rel .x87_mask]

hlt

align 4096
.save_data:
times 64 dq 0

.x87_mask:
dq 0xffff_ffff_ffff_ffff, 0xffff

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/FEX_bugs/fxsave_bug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x782366128a6789f2", "0xf881"],
    "XMM1": ["0xa56724426b4c72f1", "0xd415"],
    "XMM2": ["0xb76472a37404b890", "0x10ad"],
    "XMM3": ["0xb9533de8ad0967d2", "0xb615"],
    "XMM4": ["0x30ae762c30b556de", "0x9af3"],
    "XMM5": ["0xe86b2b5774313a97", "0x1f6d"],
    "XMM6": ["0x48510f254d2fa47f", "0x4886"],
    "XMM7": ["0", "0"]
  }
}
%endif

; FEX had a bug where fxsave wasn't storing x87 registers in the correct order.

; Init x87
fninit

; Load registers with zero
times 8 fldz

; Empty them
times 8 ffreep

; Load seven of them with random data
fld tword [rel .random_data + (0 * 10)]
fld tword [rel .random_data + (1 * 10)]
fld tword [rel .random_data + (2 * 10)]
fld tword [rel .random_data + (3 * 10)]
fld tword [rel .random_data + (4 * 10)]
fld tword [rel .random_data + (5 * 10)]
fld tword [rel .random_data + (6 * 10)]

; Save the data
fxsave [rel .save_data]

; Load the x87 register data in to vectors for testing.
movups xmm0, [rel .save_data + 0x20 + (0 * 16)]
movups xmm1, [rel .save_data + 0x20 + (1 * 16)]
movups xmm2, [rel .save_data + 0x20 + (2 * 16)]
movups xmm3, [rel .save_data + 0x20 + (3 * 16)]
movups xmm4, [rel .save_data + 0x20 + (4 * 16)]
movups xmm5, [rel .save_data + 0x20 + (5 * 16)]
movups xmm6, [rel .save_data + 0x20 + (6 * 16)]
movups xmm7, [rel .save_data + 0x20 + (7 * 16)]
pand xmm0, [rel .x87_mask]
pand xmm1, [rel .x87_mask]
pand xmm2, [rel .x87_mask]
pand xmm3, [rel .x87_mask]
pand xmm4, [rel .x87_mask]
pand xmm5, [rel .x87_mask]
pand xmm6, [rel .x87_mask]
pand xmm7, [rel .x87_mask]

hlt

align 4096
.save_data:
times 64 dq 0

.x87_mask:
dq 0xffff_ffff_ffff_ffff, 0xffff

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/FEX_bugs/issue5084_crossblock_const.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0"
  }
}
%endif

; Regression test for issue 5084
; This test was mostly reverse engineered from the IR in 5084.
; Failed with '-n 500 -m' with the error message:
; %51: Arg[0] references invalid %24

mov rax, 0
mov rbx, 0xe0000000
mov rcx, 1

test rcx, rcx
jnz .late_target

.fallthrough:
fld1
fstp tword [rbx + 0x1234]
mov rax, 0
hlt

.late_target:
fld1
fstp tword [rbx + 0x1234]
mov rax, 0
hlt


================================================
FILE: unittests/ASM/FEX_bugs/mmx_x87_register_conflating.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4b497b9e152430ec", "0x019f45087baf8cb8"],
    "XMM1": ["0x089f12645cb5e036", "0x5a6af6f5102c523c"],
    "XMM2": ["0x4c619a6f28bed383", "0x6892c52557512e58"],
    "XMM3": ["0x8ee99e09628ebdc3", "0xa7688af8254ea454"],
    "XMM4": ["0x805080d92966f25a", "0x31f967965d3a07cb"],
    "XMM5": ["0x2828cb0ce87848be", "0xc3291045169390b4"],
    "XMM6": ["0x755ae99230a898c3", "0x3d209d2dd4bad59f"],
    "XMM7": ["0x3a670269bb42b2f8", "0x05173dbeda9e86ab"],
    "XMM8": ["0x275bf419e2f3b099", "0x276d21a284ab2912"]
  },
  "HostFeatures": ["XSAVE"]
}
%endif

; FEX-Emu had a bug where we were conflating x87 registers as mmx registers and vice-versa depending on caching behaviour.
; This unittest semi-aggressively mixes x87 and mmx with xsave/xrstor that would have failed with FEX's caching.

fninit ; Initialize x87

; Load all x87 registers
fld tword [rel .random_data + (0 * 10)]
fld tword [rel .random_data + (1 * 10)]
fld tword [rel .random_data + (2 * 10)]
fld tword [rel .random_data + (3 * 10)]
fld tword [rel .random_data + (4 * 10)]
fld tword [rel .random_data + (5 * 10)]
fld tword [rel .random_data + (6 * 10)]
fld tword [rel .random_data + (7 * 10)]

; Save the data based on bits in EDX:EAX
; Just save everything
mov edx, -1
mov eax, -1
xsave64 [rel .xsave_data]

; Load all MMX registers (Data just past what x87 loaded.
movq mm0, [rel .random_data + (8 * 10) + (0 * 8)]
movq mm1, [rel .random_data + (8 * 10) + (1 * 8)]
movq mm2, [rel .random_data + (8 * 10) + (2 * 8)]
movq mm3, [rel .random_data + (8 * 10) + (3 * 8)]
movq mm4, [rel .random_data + (8 * 10) + (4 * 8)]
movq mm5, [rel .random_data + (8 * 10) + (5 * 8)]
movq mm6, [rel .random_data + (8 * 10) + (6 * 8)]
movq mm7, [rel .random_data + (8 * 10) + (7 * 8)]

; Do some operation on the MMX registers
pxor mm0, mm1
pxor mm1, mm2
pxor mm2, mm3
pxor mm3, mm4
pxor mm4, mm5
pxor mm5, mm6
pxor mm6, mm7
pxor mm7, mm0

; Store MMX registers
movq [rel .temp_result + (0 * 8)], mm0
movq [rel .temp_result + (1 * 8)], mm1
movq [rel .temp_result + (2 * 8)], mm2
movq [rel .temp_result + (3 * 8)], mm3
movq [rel .temp_result + (4 * 8)], mm4
movq [rel .temp_result + (5 * 8)], mm5
movq [rel .temp_result + (6 * 8)], mm6
movq [rel .temp_result + (7 * 8)], mm7

; Clear MMX state
emms

; Load all x87 registers with new data
; This ensures the top 16-bits of every x87 word is different.
fld tword [rel .random_data + (8 * 10)]
fld tword [rel .random_data + (9 * 10)]
fld tword [rel .random_data + (10 * 10)]
fld tword [rel .random_data + (11 * 10)]
fld tword [rel .random_data + (12 * 10)]
fld tword [rel .random_data + (13 * 10)]
fld tword [rel .random_data + (14 * 10)]
fld tword [rel .random_data + (15 * 10)]

; Reload context, including original x87 state
mov edx, -1
mov eax, -1
xrstor64 [rel .xsave_data]

; Save the x87 registers.
fstp tword [rel .temp_x87_result + (0 * 10)]
fstp tword [rel .temp_x87_result + (1 * 10)]
fstp tword [rel .temp_x87_result + (2 * 10)]
fstp tword [rel .temp_x87_result + (3 * 10)]
fstp tword [rel .temp_x87_result + (4 * 10)]
fstp tword [rel .temp_x87_result + (5 * 10)]
fstp tword [rel .temp_x87_result + (6 * 10)]
fstp tword [rel .temp_x87_result + (7 * 10)]

; Load the results in to XMM registers
; First load the MMX registers
movups xmm0, [rel .temp_result + (0 * 16)]
movups xmm1, [rel .temp_result + (1 * 16)]
movups xmm2, [rel .temp_result + (2 * 16)]
movups xmm3, [rel .temp_result + (3 * 16)]

; Now load the 80 bytes of x87 registers
movups xmm4, [rel .temp_x87_result + (0 * 16)]
movups xmm5, [rel .temp_x87_result + (1 * 16)]
movups xmm6, [rel .temp_x87_result + (2 * 16)]
movups xmm7, [rel .temp_x87_result + (3 * 16)]
movups xmm8, [rel .temp_x87_result + (4 * 16)]

hlt

align 4096

.temp_x87_result:
times (16 * 8) db 0

.temp_result:
times (8 * 8) db 0

; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86

align 64
.xsave_data:
times 4096 db 0


================================================
FILE: unittests/ASM/FEX_bugs/mov_address_size_override.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDX": "0x5152535455565758",
    "R8": "0x5152535455565758"
  }
}
%endif
; FEX-Emu had a bug where address size override was overriding destination and source sizes on operations not affecting memory.
; This showed up as a bug in OpenSSL where GCC was padding move instructions with the address size prefix, knowing that it wouldn't do anything.
; FEX interpreted this address size prefix as making the destination 32-bit resulting in zero-extending the 64-bit source.
; Ensure this doesn't happen again.
mov rdx, 0x414243444546748
mov r8, 0x5152535455565758
jmp .test
.test:

; Add a couple address size prefixes
db 0x67, 0x67
mov rdx, r8
hlt


================================================
FILE: unittests/ASM/FEX_bugs/non_fatal_syscall.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

; FEX-Emu had a bug where an `int 0x80` instruction that never gets executed would cause the emulator to assert.
; This was due to multiblock's static analysis finding a 32-bit syscall in some code down a branch that would never execute.
; Ensure this doesn't take down the emulator by doing something similar.
mov rax, 0
cmp rax, 0

je .end

mov rax, 1
int 0x80

.end:
hlt


================================================
FILE: unittests/ASM/FEX_bugs/nzcv_implicit_clobber.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "XMM0": ["0", "0"]
  }
}
%endif

; FEX has a bug with NZCV host flag usage that IR operations that implicitly clobber flags might not save emulated eflags correctly in all instances.
; This tests one particular instance of `ImplicitFlagClobber`.
movaps xmm0, [rel .data]

; Calculate ZF up-front
mov eax, 1
add eax, eax

; This jump is necessary to break visibility.
jmp .begin
.begin:

; minss turns in to VFMinScalarInsert which implicitly clobbers Arm64 flags.
; Potentially any instruction that uses an IR operation that uses `ImplicitFlagClobber` would break.
minss   xmm0, xmm0

; Ensure the flags calculated by the `add eax, eax` are consumed.
; ZF should be unset from `add 1, 1`.
; If minss clobbers Arm64 host flags then the `fcmp` that Arm64 uses will overwrite nzcv, thus setting the ZF flag.
; This is since `fcmp #0, #0` will set nzcv to `0110`.
jnz .next
mov eax, 1
hlt

.next:
mov eax, 0
hlt

align 16
.data:
dd 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/nzcv_rmw.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xcafe"
  }
}
%endif

; FEX had a bug where an NZCV RMW would fail to calculate previously deferred
; flags, resulting in garbage flag values

; First zero NZCV and break visibility
mov rax, 0
add rax, 1
jz fexi_fexi_im_so_broken

jmp .begin
.begin:

; NZCV is zero. Set it to something nonzero with a deferred flag operation.
mov rax, 0
popcnt rax, rax

; Now do a variable shift that preserves flags. This would clear ZF if not for
; the condition on the shift flags.
mov rbx, 100
mov cl, 0
sar rbx, cl

; ZF should still be set.
jnz fexi_fexi_im_so_broken

mov rax, 0xcafe
hlt

fexi_fexi_im_so_broken:
mov rax, 0xdead
hlt


================================================
FILE: unittests/ASM/FEX_bugs/nzcv_spill_enderlilies.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "XMM0": ["0", "0"]
  },
  "HostFeatures": ["SHA"]
}
%endif

; FEX-Emu has a bug around NZCV flags getting spilled and filled.
; The bug comes down to NZCV actually being 32-bit but our IR incorrectly assumed that all flags were 8-bit.
; Once a spill situation happened, it would only store and reload the lower 8-bits of the NZCV flag which wasn't correct.
; This caused this code to infinite loop and read past memory and crash.

; Code found from Ender Lilies in their `sha1_block_data_order` function which is significantly longer than this snippit.
lea rsi, [rel .data_vecs]
mov rax, 1

; Break visibility
jmp loop_top
loop_top:

; Decrement counter.
dec     rax

; Load rsi + 0x40 in to rbx
lea     rbx, [rsi+0x40]

; Move rbx in to rsi, incrementing the pointer by 64-bytes if rax isn't zero.
cmovne  rsi, rbx

; Do a sha1rnds4, which uses enough temporaries to spill NZCV which picks up a crash.
sha1rnds4 xmm0, xmm0, 0x0

; This memory access will crash once we loop too many times.
movdqu  xmm0, [rsi]

; Jump back to the top
jne     loop_top

hlt

.data_vecs:
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/overlapping_memcpy_bug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000b752f1a6",
    "RBX": "0x0000000065a37fd3",
    "RCX": "0x00000000fafc3a00",
    "RDX": "0x0000000087421ee2"
  },
  "MemoryRegions": {
    "0xf0000000": "4096"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

; FEX-Emu had a bug where movs based copies with overlapping regions results in incorrect results.
; Since this is dealing with a large amount of data for testing purposes, CRC the results to ensure they were copied correctly.
; See https://github.com/FEX-Emu/FEX/issues/3498 for more information.

; Volatile rcx, rdi, rsi
%macro do_rep_movs_op 4
  cld
  mov rsi, %4
  mov rdi, %3
  mov rcx, %2

  jmp %%1
  %%1:

  rep %1
%endmacro

; Returns a crc32 of the memory region passed in.
; Volatile: rax, rbx, rcx
; Return in rax.
%macro do_crc 2
  jmp %%1
  %%1:

  cld
  mov rax, 0
  mov rbx, %1
  mov rcx, %2
  sub rcx, rbx
  %%2:
    crc32 eax, byte [rbx]
    inc rbx
    loop %%2
%endmacro

; Fully overlapping copy
lea rsi, [rel .random_data]
do_rep_movs_op movsq, (4096 / 8), 0xf000_0000, rsi
do_rep_movs_op movsq, (4096 / 8) - 1, 0xf000_0000, 0xf000_0000
do_crc         0xf000_0000, 0xf000_1000
mov dword [rel .results + (4 * 0)], eax

; Source partial overlaps the destination, 8-byte
lea rsi, [rel .random_data]
do_rep_movs_op movsq, (4096 / 8), 0xf000_0000, rsi
do_rep_movs_op movsq, (4096 / 8) - 1, 0xf000_0008, 0xf000_0000
do_crc         0xf000_0000, 0xf000_1000
mov dword [rel .results + (4 * 1)], eax

; Source partial overlaps the destination, 4-byte
lea rsi, [rel .random_data]
do_rep_movs_op movsd, (4096 / 4), 0xf000_0000, rsi
do_rep_movs_op movsd, (4096 / 4) - 1, 0xf000_0004, 0xf000_0000
do_crc         0xf000_0000, 0xf000_1000
mov dword [rel .results + (4 * 2)], eax

; Destination partial overlaps the source, 8-byte
lea rsi, [rel .random_data]
do_rep_movs_op movsq, (4096 / 8), 0xf000_0000, rsi
do_rep_movs_op movsq, (4096 / 8) - 1, 0xf000_0000, 0xf000_0008
do_crc         0xf000_0000, 0xf000_1000
mov dword [rel .results + (4 * 3)], eax

; Reload the results from memory.
mov eax, dword [rel .results + (4 * 0)]
mov ebx, dword [rel .results + (4 * 1)]
mov ecx, dword [rel .results + (4 * 2)]
mov edx, dword [rel .results + (4 * 3)]

hlt

align 4096
.results:
dd 0, 0, 0, 0, 0, 0, 0, 0
; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86


================================================
FILE: unittests/ASM/FEX_bugs/pcmpestri_garbage_rcx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": ["15"],
      "RCX": ["5"],
      "RDX": ["16"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Tests a bug that FEX had with pcmpestri where the returned index would leave data in the upper 32-bits of rcx.
; This instruction writes a 32-bit result to rcx with zero extend to 64-bit.
; Test this by writing data in to rcx before the instruction and ensuring it is erased after the fact.

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte character check (lsb, positive polarity)
mov rax, 15 ; Exclude 'l'
mov rdx, 16
mov rcx, 0x4142434445464748

pcmpestri xmm2, xmm3, 0b00000000
hlt

align 32
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A49 ; "IJKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/FEX_bugs/repeat_on_incdec.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0",
    "RCX": "8",
    "RDX": "0",
    "RSP": "0"
  }
}
%endif

; FEX-Emu had a bug where it thought repeat worked on increment and decrement instructions.
; While the prefix can be encoded on the instructions, it is ignored by the hardware implementation.
; This checks to ensure that inc/dec ignore the repeat prefix, and that rcx isn't ever changed from it.

mov rsp, 0
mov rcx, 8
lea rax, [rel .test]

rep inc rsp
rep inc byte [rax]

rep dec rsp
rep dec byte [rax]

mov rbx, [rel .test]
mov rdx, [rel .test + 8]

hlt

align 4096
.test:
db 0, 0, 0, 0, 0, 0, 0, 0
db 0, 0, 0, 0, 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/repeat_stringops_crash.asm
================================================
%ifdef CONFIG
{
  "MemoryRegions": {
    "0xf0000000": "4096",
    "0xf1000000": "4096"
  }
}
%endif

; FEX-Emu had a bug where a backwards repeating string operation would read past the end of a mapped page.
; This was encountered in https://github.com/FEX-Emu/FEX/pull/3478.
; To ensure we don't read past a page with `rep stos` and `rep movs`, map two disparate pages and copy the entire page.
; If FEX tries reading past the ends of either then it will fault.
%macro do_rep_op 2
  jmp %%1
  %%1:

  cld
  mov rax, r13
  mov rdi, r14
  mov rsi, r15
  mov rcx, (4096 / %2)
  rep %1
%endmacro

%macro do_backward_rep_op 2
  jmp %%1
  %%1:

  std
  mov rax, r13
  mov rdi, r14
  mov rsi, r15
  add rdi, (4096 - %2)
  add rsi, (4096 - %2)
  mov rcx, (4096 / %2)
  rep %1
%endmacro

mov r15, 0xf000_0000
mov r14, 0xf100_0000
mov r13, 0x41424344454647

do_rep_op stosb, 1
do_rep_op stosw, 2
do_rep_op stosd, 4
do_rep_op stosq, 8

do_backward_rep_op stosb, 1
do_backward_rep_op stosw, 2
do_backward_rep_op stosd, 4
do_backward_rep_op stosq, 8

do_rep_op movsb, 1
do_rep_op movsw, 2
do_rep_op movsd, 4
do_rep_op movsq, 8

do_backward_rep_op movsb, 1
do_backward_rep_op movsw, 2
do_backward_rep_op movsd, 4
do_backward_rep_op movsq, 8

hlt


================================================
FILE: unittests/ASM/FEX_bugs/rex_b_mmx.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["SSSE3"]
}
%endif

; FEX had a bug where the REX.B prefix would cause out of bounds MMX register access, when real HW ignores its presence

db 0x41 ; REX.B
psignd mm4, mm0

hlt


================================================
FILE: unittests/ASM/FEX_bugs/rotate_zero_extend_with_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000045464748",
    "RBX": "0x0000000055565758",
    "RSI": "0x0000000065666768",
    "RDX": "0x0000000075767778",
    "RDI": "0x0000000085868788",
    "RSP": "0x0000000095969798",
    "RBP": "0x0000000015161718",
    "R8":  "0x0000000025262728",
    "R9":  "0x3132333435363738",
    "R10": "0x0000000035363738"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; FEX-Emu had a bug where it forgot to zero extend 32-bit rotate operations even if the rotate value masked to zero.
; Do both immediate encoded rotates and CL encoded rotates to ensure it gets zero extended correctly.
; Tests:
; - rotate left
; - rotate right
; - rotate with carry left
; - rotate with carry right
; - BMI2 rotate right without affecting flags

mov rax, 0x4142434445464748
mov rbx, 0x5152535455565758
mov rsi, 0x6162636465666768
mov rdx, 0x7172737475767778

mov rdi, 0x8182838485868788
mov rsp, 0x9192939495969798
mov rbp, 0x1112131415161718
mov r8,  0x2122232425262728
mov r9,  0x3132333435363738
mov r10, 0xA1A2A3A4A5A6A7A8

; Rotate count that when masked by 32-bit operating size it becomes zero!
mov rcx, 0x41424344454647E0

jmp .test
.test:

; Test that 32-bit rotates that mask to zero don't zero the upper bits
rol eax, cl
ror ebx, cl

; Test with imm encoded as well
rol esi, 0xE0
ror edx, 0xE0

; Test rotate with carries as well
rcl edi, cl
rcr esp, cl

rcl ebp, 0xE0
rcr r8d, 0xE0

; Test RORX as well
rorx r10d, r9d, 0xE0

hlt


================================================
FILE: unittests/ASM/FEX_bugs/sbbNZCVBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1",
    "R8":  "1",
    "R9":  "0",
    "R10": "0",
    "R11": "1",
    "R12": "1",
    "R13": "1",
    "R14": "0x00000000ffffffff",
    "R15": "0"
  }
}
%endif

; FEX had a bug in pr #3153 which encountered a destination register overwrite in sbbNZCV.
; This was due to the destination register for that IR operation aliases the first source register.
; Once it tried modifying NZCV flags directly in the destination, it managed to clobber the source register.
; Code is based around part of a GCC adx-addcarryx32-2 assembly output snippet.
; Needs memory accesses to ensure const-prop and RA aligns correctly.
mov rdx, 0

; These need to be loaded through memory so const-prop doesn't save it
; Load the values
mov     edx, dword [rel .current_x]
mov     eax, dword [rel .current_y]
movzx   ecx, byte [rel .current_stored_cf]

; Do the operation
add     cl, 0xff ; Clear carry based on stored_cf. Can't use clc here. (0)
mov r8d, edx ; Store incoming current_x
mov r9d, eax ; Store incoming current_y
setb r10b ; Store incoming CF

sbb     eax, edx
; Get Carry result
setb    dl

setb r11b ; Store outgoing CF

; Store sbb result and carry
mov     dword [rel .current_x], eax ; (0xFFFF_FFFF)
mov     byte [rel .current_stored_cf], dl ; (0x1)

; Second operation
; Load current_y and CF(will be 1)
mov     eax, 0
movzx   edx, byte [rel .current_stored_cf]

movzx   r12, byte [rel .current_stored_cf] ; Store incoming CF

add     dl, 0xff ; Set carry based on stored_cf. Can't use stc here. (1)
; Do the operation

mov r15d, eax ; Store EAX prior to SBB
mov r14d, dword [rel .current_x] ; Store curent_x
setb r13b ; Store incoming CF

sbb     eax, dword [rel .current_x]
setb    dl  ; Set if CF=1

; sbb results in eax now
; Move carry result to ebx
; r15 = EAX prior to second sbb
; r14 = CurrentX prior to second sbb (-1)
; r13 = Incoming CF prior to second sbb (calculated from setb)
; r12 = Incoming CF prior to second sbb (calculated from memory)
; r11 = Outgoing CF from first sbb (calculated from setb)
; r10 = Incoming CF for first sbb (calculated from setb)
; r9 = store incoming current_y to first sbb
; r8 = store incoming current_x to first sbb

movzx ebx, dl

hlt

align 4096
.current_x:
dq 1

.current_y:
dq 0

.current_stored_cf:
dq 0


================================================
FILE: unittests/ASM/FEX_bugs/smallvectorload_regreg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000078", "0x0000000000000077"],
    "XMM1": ["0x0000000000000078", "0x0000000000000077"],
    "XMM2": ["0x0000000000000078", "0x0000000000000077"],
    "XMM3": ["0x0000000000000078", "0x7800000000000077"],
    "XMM4": ["0x0000000000000078", "0"],
    "XMM5": ["0x0000000000000078", "0"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

; FEX-Emu had a bug with vector loadstore instructions where 16-bit and 8-bit vector loadstores with reg+reg source would assert in the code emitter.
; This affected both vector loads and stores. SSE 8-bit and 16-bit are quite uncommon so this isn't encountered frequently.
; Tests a few different instructions that access 16-bit and 8-bit with loads and stores.
lea rax, [rel .data]
lea rbx, [rel .data_temp]
mov rcx, 0
jmp .test
.test:

; 16-bit loads
pmovzxbq xmm0, [rax + rcx]
pmovzxbq xmm1, [rax + rcx*2]
pmovzxbq xmm2, [rax + rcx*4]
pmovzxbq xmm3, [rax + rcx*8]

; 8-bit load
pinsrb xmm3, [rax + rcx], 1111b

; 8-bit store
pextrb [rbx + rcx], xmm0, 0

; Load the result back
movaps xmm4, [rbx + rcx]

; 16-bit store
pextrb [rbx + rcx], xmm0, 0

; Load the result back
movaps xmm5, [rbx + rcx]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748

.data_temp:
dq 0,0


================================================
FILE: unittests/ASM/FEX_bugs/tls_vector_element.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x0000000000000056",
    "RDX": "0x4142434445464748",
    "RSI": "0x0000000000004748",
    "RDI": "0x4142434445464748",
    "RSP": "0x0000000045464748",
    "RBP": "0x6162636465666768",
    "R8": "0x4142434445464748",
    "R9": "0x4142434445464748",
    "R10": "0x0000000000000056",
    "R11": "0x4142434445464748",
    "R12": "0x4142434445464748",
    "R13": "0x0000000000004748",
    "R14": "0x0000000045464748",
    "R15": "0x6162636465666768",
    "XMM0": ["0x5152535455565758", "0x4142434445464748"],
    "XMM1": ["0x4142434445464748", "0x6162636465666768"],
    "XMM2": ["0x5152535455564858", "0x6162636465666768"],
    "XMM3": ["0x5152535455565758", "0x4142434445464748"],
    "XMM4": ["0x4142434445464748", "0x6162636465666768"],
    "XMM5": ["0x4546474855565758", "0x6162636465666768"],
    "XMM6": ["0x5152535455565758", "0x4142434445464748"],
    "XMM7": ["0x5152535447485758", "0x6162636465666768"],
    "XMM8": ["0x5152535455565758", "0x4142434445464748"],
    "XMM9": ["0x4142434445464748", "0x4142434445464748"],
    "XMM10": ["0x5152535455564858", "0x6162636465666768"],
    "XMM11": ["0x5152535455565758", "0x4142434445464748"],
    "XMM12": ["0x4142434445464748", "0x4142434445464748"],
    "XMM13": ["0x4546474855565758", "0x6162636465666768"],
    "XMM14": ["0x5152535455565758", "0x4142434445464748"],
    "XMM15": ["0x5152535447485758", "0x6162636465666768"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; FEX-Emu had a bug where TLS vector element loadstores weren't correctly prefixing the segment on the address.
; This caused a crash in the game Halls of Torment (steamid 2218750) where it had some TLS vector data loaded with movhps.
; This tests all the vector element loadstores that FEX had missed.

; Setup TLS segment
mov rax, 0xe000_0000
wrgsbase rax

movups xmm0, [rel .data_setup]
movups xmm1, [rel .data_setup]
movups xmm2, [rel .data_setup]
movups xmm3, [rel .data_setup]
movups xmm4, [rel .data_setup]
movups xmm5, [rel .data_setup]
movups xmm6, [rel .data_setup]
movups xmm7, [rel .data_setup]
movups xmm8, [rel .data_setup]
movups xmm9, [rel .data_setup]
movups xmm10, [rel .data_setup]
movups xmm11, [rel .data_setup]
movups xmm12, [rel .data_setup]
movups xmm13, [rel .data_setup]
movups xmm14, [rel .data_setup]
movups xmm15, [rel .data_setup]

mov rax, [rel .data]
mov [gs:0], rax

jmp .test
.test:

; SSE loads
movhps xmm0, [gs:0]
movlps xmm1, [gs:0]
pinsrb xmm2, [gs:0], 1
movhpd xmm3, [gs:0]
movlpd xmm4, [gs:0]
pinsrd xmm5, [gs:0], 1
pinsrq xmm6, [gs:0], 1
pinsrw xmm7, [gs:0], 1

; AVX loads
vmovhps xmm8, xmm0, [gs:0]
vmovlps xmm9, xmm0, [gs:0]
vpinsrb xmm10, [gs:0], 1
vmovhpd xmm11, xmm0, [gs:0]
vmovlpd xmm12, xmm0, [gs:0]
vpinsrd xmm13, [gs:0], 1
vpinsrq xmm14, [gs:0], 1
vpinsrw xmm15, [gs:0], 1

; SSE stores
movhps [gs:(0 * 8)], xmm0
movlps [gs:(1 * 8)], xmm1
pextrb [gs:(2 * 8)], xmm2, 2
movhpd [gs:(3 * 8)], xmm3
movlpd [gs:(4 * 8)], xmm4
pextrw [gs:(5 * 8)], xmm5, 2
pextrd [gs:(6 * 8)], xmm6, 2
pextrq [gs:(7 * 8)], xmm7, 1

; AVX stores
vmovhps [gs:(8 * 8)], xmm0
vmovlps [gs:(9 * 8)], xmm1
vpextrb [gs:(10 * 8)], xmm2, 2
vmovhpd [gs:(11 * 8)], xmm3
vmovlpd [gs:(12 * 8)], xmm4
vpextrw [gs:(13 * 8)], xmm5, 2
vpextrd [gs:(14 * 8)], xmm6, 2
vpextrq [gs:(15 * 8)], xmm7, 1

; Load the results back in to GPRs
mov rax, [gs:(0 * 8)]
mov rbx, [gs:(1 * 8)]
mov rcx, [gs:(2 * 8)]
mov rdx, [gs:(3 * 8)]
mov rdi, [gs:(4 * 8)]
mov rsi, [gs:(5 * 8)]
mov rsp, [gs:(6 * 8)]
mov rbp, [gs:(7 * 8)]
mov r8, [gs:(8 * 8)]
mov r9, [gs:(9 * 8)]
mov r10, [gs:(10 * 8)]
mov r11, [gs:(11 * 8)]
mov r12, [gs:(12 * 8)]
mov r13, [gs:(13 * 8)]
mov r14, [gs:(14 * 8)]
mov r15, [gs:(15 * 8)]

hlt

.data:
dq 0x4142434445464748

.data_setup:
dq 0x5152535455565758, 0x6162636465666768

.data_result:
dq 0, 0, 0, 0, 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/vcvtdq2ps_incorrect_size.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"]
}
%endif

; FEX-Emu had a bug in the vcvtdq2ps and vcvtdq2pd instruction where it was incorrectly generating a 256-bit IR operation.
; Due to a quirk of the IR operation handling, this instruction was actually handled "correctly" as a 128-bit operation.
; The problem occured once there was enough live registers to cause spilling, and the register spiller tries to spill the full result.
; The full result in this case was described as a 256-bit operation when it was supposed to be only a 128-bit operation.
; This was found in `Aperture Desk Job` in `libphonon.so` in function `own_ipps_sLn_L9LAynn`.
jmp .test

.test:
vmovups ymm4,  [rel data_7ffde364df00]
vmovups ymm5,  [rel data_7ffde364df00]
vmovups ymm6,  [rel data_7ffde364df00]
vmovups ymm7,  [rel data_7ffde364df00]
vmovups ymm8,  [rel data_7ffde364df00]
vmovups ymm9,  [rel data_7ffde364df00]
vmovups ymm10,  [rel data_7ffde364df00]
vmovups ymm11,  [rel data_7ffde364df00]

vpsubd  ymm12, ymm0, ymm4
vpsubd  ymm13, ymm1, ymm4
vcvtdq2ps ymm2, ymm2
vcvtdq2ps ymm14, ymm14
vmovmskps ecx, ymm15
hlt

align 32
data_7ffde364df00:
dq 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/vgather_xmm4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; FEX had a bug where VSIB indexing wasn't allow xmm4/ymm4 to be encoded inside of the VSIB due to legacy SIB behaviour
; This ensures that VSIB with xmm4 is allowed to work.
; 128-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]

; Zero mask
vmovaps xmm4, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm15, [xmm4 * 1 + rax], xmm1

hlt

align 32

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/FEX_bugs/vmov_size_test.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0", "0", "0"],
    "XMM1":  ["0x4142434445464748", "0", "0x7172737475767778", "0x8182838485868788"],
    "XMM2":  ["0x0000000041424344", "0", "0", "0"],
    "XMM3":  ["0x0000000041424344", "0", "0x7172737475767778", "0x8182838485868788"]
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; FEX-Emu had a bug where vmovq was loading 128-bits worth of data instead of 64-bits.
; This ensures that {v,}mov{d,q} all load the correct amount of data through a test that will fault if it loads too much.

; Address at the last eight bytes
mov rax, 0x100000000 + 4096-8

; Address at the last 4 bytes
mov rbx, 0x100000000 + 4096-4

mov rcx, 0x4142434445464748

; Store data using GPR
mov [rax], rcx

; Setup vector with data
vmovaps ymm0, [rel .data]
vmovaps ymm1, [rel .data]
vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data]

; 64-bit tests

; Load with vmovq to ensure we don't try loading too much data
vmovq xmm0, qword [rax]

; Also test SSE2 version
movq xmm1, qword [rax]

; Also test MOVQ stores
vmovq qword [rax], xmm0

; Also test SSE2 version
movq qword [rax], xmm1

; 32-bit tests
; Load with vmovq to ensure we don't try loading too much data
vmovd xmm2, dword [rbx]

; Also test SSE2 version
movd xmm3, dword [rbx]

; Also test MOVD stores
vmovd dword [rbx], xmm2

; Also test SSE2 version
movd dword [rbx], xmm3

hlt

align 32
.data:
dq 0x5152535455565758, 0x6162636465666768
dq 0x7172737475767778, 0x8182838485868788


================================================
FILE: unittests/ASM/FEX_bugs/vroundscalar_sve256.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x818283843f800000", "0x9192939495969798", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; FEX-Emu had a bug in its 256-bit SVE implementation of AVX where scalar round with insert wasn't inserting correctly.
; This tests to ensure that the sources are merged correctly.
vmovaps ymm0, [rel .data_trash]
vmovaps ymm1, [rel .data_trash + 32]

vroundss xmm0, xmm1, [rel .data], 00000010b ; +inf

hlt

align 4096
.data:
dd 0.5, -0.5, 1.5, -1.5
dd 0.5, -0.5, 1.5, -1.5

.data_trash:
dq 0x4142434445464748, 0x5152535455565758
dq 0x6162636465666768, 0x7172737475767778
dq 0x8182838485868788, 0x9192939495969798
dq 0xA1A2A3A4A5A6A7A8, 0xB1B2B3B4B5B6B7B8


================================================
FILE: unittests/ASM/FEX_bugs/x87DecrementStackBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4000000000000000", "0"]
  }
}
%endif

; FEX-Emu contains a bug. It's the buggiest bug that ever bugged. Something about conflicting results between fxch and fincstp.
fld tword [rel .data1]
fld tword [rel .data2]
fld tword [rel .data3]
fld tword [rel .data4]
fld tword [rel .data5]
fld tword [rel .data6]
fld tword [rel .data7]
fld tword [rel .data8]

jmp .test

.test:
fxch st0, st1
fdecstp

jmp .end
.end:

fstp qword [rel .data_result]
movups xmm0, [rel .data_result]

hlt

align 4096
.data1:
dt 2.0
dq 0

.data2:
dt 4.0
dq 0

.data3:
dt 8.0
dq 0

.data4:
dt 16.0
dq 0

.data5:
dt 32.0
dq 0

.data6:
dt 64.0
dq 0

.data7:
dt 128.0
dq 0

.data8:
dt 256.0
dq 0

.data_result:
dq 0
dq 0


================================================
FILE: unittests/ASM/FEX_bugs/x87IncrementStackBug.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4010000000000000", "0"]
  }
}
%endif

; FEX-Emu contains a bug. It's the buggiest bug that ever bugged. Something about conflicting results between fxch and fincstp.
fld tword [rel .data1]
fld tword [rel .data2]

; ST(0) contains 4.0
; ST(1) contains 2.0

jmp .test

.test:
fxch st0, st1
; ST(0) now contains 2.0
; ST(1) now contains 4.0

fincstp
; ST(0) now contains 4.0
; ST(7) now contains 2.0

jmp .end
.end:

fstp qword [rel .data_result]
movups xmm0, [rel .data_result]

hlt

align 4096
; This or zero are incorrect results
.data1:
dt 2.0
dq 0

; Correct result
.data2:
dt 4.0
dq 0

.data_result:
dq 0
dq 0


================================================
FILE: unittests/ASM/FEX_bugs/x87_fprem.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41582d3bc0000000",
    "RBX": "0x41582d3bc0000000",
    "RCX": "0xc1582d3bc0000000",
    "RDX": "0xc1582d3bc0000000",
    "RDI": "0x42d2f6b36dfc3bc0",
    "RSI": "0x42d2f6b36dfc3bc0",
    "RBP": "0xc2d2f6b36dfc3bc0",
    "RSP": "0xc2d2f6b36dfc3bc0"
  }
}
%endif

; FEX-Emu had a bug in the fprem implementation where it was behaving like fprem1
; Do a handful of large fprem operations to ensure it works correctly.

; 64-bit float memory locations
; doremainder <result>, <src1>, <src2>
%macro doremainder 3
  ; Load big number and divisor
  fld qword %3
  fld qword %2

  ; For large remainders, x86 fprem computes partial remainders and needs to run multiple times.
  %%again:
    ; Get the remainder
    fprem
    ; Check if we need to run again
    fnstsw ax
    test ah, 0x4
    jne %%again

  ; Pop one value
  fstp st1

  ; Store the result
  fstp qword %1
%endmacro

; Do a handful of remainder checks with different sign combinations.
doremainder [rel .data_result + (8 * 0)], [rel .data_big], [rel .data_divisor]
doremainder [rel .data_result + (8 * 1)], [rel .data_big], [rel .data_divisor_negative]
doremainder [rel .data_result + (8 * 2)], [rel .data_big_negative], [rel .data_divisor]
doremainder [rel .data_result + (8 * 3)], [rel .data_big_negative], [rel .data_divisor_negative]

; Test infinities as well
doremainder [rel .data_result + (8 * 4)], [rel .data_big], [rel .data_inf]
doremainder [rel .data_result + (8 * 5)], [rel .data_big], [rel .data_inf_negative]
doremainder [rel .data_result + (8 * 6)], [rel .data_big_negative], [rel .data_inf]
doremainder [rel .data_result + (8 * 7)], [rel .data_big_negative], [rel .data_inf_negative]

; Load the results in to registers
mov rax, qword [rel .data_result + (8 * 0)]
mov rbx, qword [rel .data_result + (8 * 1)]
mov rcx, qword [rel .data_result + (8 * 2)]
mov rdx, qword [rel .data_result + (8 * 3)]
mov rdi, qword [rel .data_result + (8 * 4)]
mov rsi, qword [rel .data_result + (8 * 5)]
mov rbp, qword [rel .data_result + (8 * 6)]
mov rsp, qword [rel .data_result + (8 * 7)]

hlt

align 4096
.data_big:
dq 83403126337775.0

.data_big_negative:
dq -83403126337775.0

.data_divisor:
dq 10000000.0

.data_divisor_negative:
dq -10000000.0

%define Inf __?Infinity?__
.data_inf:
dq Inf

.data_inf_negative:
dq -Inf

.data_result:
dq 0, 0, 0, 0, 0, 0, 0, 0


================================================
FILE: unittests/ASM/FEX_bugs/x87_integer_indefinite.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8000",
    "RBX": "0x8000",
    "RCX": "0x80000000",
    "RDX": "0x80000000",
    "RSI": "0x8000000000000000",
    "RDI": "0x8000000000000000"
  }
}
%endif

; FEX-Emu had a bug where x87 float to integer conversions weren't converting to the correct "integer indefinite" value for 16-bit conversions.
; Test 16-bit, 32-bit, and 64-bit to ensure correct "integer indefinite" results for all.
; The definition for "integer indefinite" is the smallest negative integer that can be represented.
; This is regardless of the input value being positive or negative.
fninit

; 16-bit
fld qword [rel .double_larger_than_int16]
fistp word [rel .data_res_pos_16]

fld qword [rel .double_smaller_than_int16]
fistp word [rel .data_res_neg_16]

; 32-bit
fld qword [rel .double_larger_than_int32]
fistp dword [rel .data_res_pos_32]

fld qword [rel .double_smaller_than_int32]
fistp dword [rel .data_res_neg_32]

; 64-bit
fld qword [rel .double_larger_than_int64]
fistp qword [rel .data_res_pos_64]

fld qword [rel .double_smaller_than_int64]
fistp qword [rel .data_res_neg_64]

; Load the results
movzx rax, word [rel .data_res_pos_16]
movzx rbx, word [rel .data_res_neg_16]

mov ecx, dword [rel .data_res_pos_32]
mov edx, dword [rel .data_res_neg_32]

mov rsi, qword [rel .data_res_pos_64]
mov rdi, qword [rel .data_res_neg_64]

hlt

align 4096
; One-integer larger than what int16_t can hold
.double_larger_than_int16:
dq 32768.0
; One-integer smaller than what int16_t can hold
.double_smaller_than_int16:
dq -32769.0

; One-integer larger than what int32_t can hold
.double_larger_than_int32:
dq 2147483648.0
; One-integer smaller than what int32_t can hold
.double_smaller_than_int32:
dq -2147483649.0

; One-integer larger than what int64_t can hold
.double_larger_than_int64:
dq 9223372036854775808.0
; One-integer smaller than what int64_t can hold
.double_smaller_than_int64:
dq -9223372036854775809.0

.data_res_pos_16:
dw -1
.data_res_neg_16:
dw -1

.data_res_pos_32:
dd -1
.data_res_neg_32:
dd -1

.data_res_pos_64:
dq -1
.data_res_neg_64:
dq -1


================================================
FILE: unittests/ASM/FEX_bugs/x87_unordered_cmp_fix.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000cafecafe"
  }
}
%endif

; IsNan() couldn't detect negative NaNs (sign bit set in exponent field).
; This caused __builtin_isunordered() to return wrong values.

mov rsp, 0xe000_1000

; Test 1: __builtin_isunordered(1.0, 2.0) should return 0
; Pattern: fucomip + setp + test for 0
fld1
lea rdx, [rel two]
fld tword [rdx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jnz test_failed         ; If not 0, test failed (should be ordered)

; Test 2: __builtin_isunordered(1.0, NaN) should return 1  
fld1
lea rdx, [rel qnan]
fld tword [rdx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jz test_failed          ; If 0, test failed (should be unordered)

; Test 3: __builtin_isunordered(NaN, 1.0) should return 1
lea rdx, [rel qnan]
fld tword [rdx]
fld1
fucomip st1
setp al
movzx eax, al
test eax, eax
jz test_failed          ; If 0, test failed (should be unordered)

; Test 4: __builtin_isunordered(2.0, 2.0) should return 0 (equal case)
lea rdx, [rel two]
fld tword [rdx]
lea rdx, [rel two]  
fld tword [rdx]
fucomip st1
setp al
movzx eax, al
test eax, eax
jnz test_failed         ; If not 0, test failed (should be ordered)

; All tests passed
mov rax, 0xcafecafe
hlt

test_failed:
; Test failed 
mov rax, 0xdeadbeef
hlt

align 8
two:
  dt 2.0

align 8  
qnan:
  dq 0xC000000000000000  ; Quiet NaN with only quiet bit set (no bottom 62 bits) - this breaks IsNan
  dw 0x7FFF              ; Standard NaN exponent (0x7FFF)

================================================
FILE: unittests/ASM/FEX_bugs/xor_flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000004600"
  }
}
%endif

; FEX had a bug where an optimization for canonical zeroing of a register would fail to set flags correctly.
; This broke `Metal Gear Rising: Revengeance`. The title screen geometry was broken.

mov rax, 0
mov rbx, 0
sahf
xor rbx, rbx
lahf
hlt


================================================
FILE: unittests/ASM/FEX_bugs/zero-ah.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0x0"
  }
}
%endif

; FEX had a bug where `mov ah, 0` and `xor ah, ah` would zero the wrong register
; subpart.

mov al, 127
mov ah, 234
mov ah, 0

cmp al, 127
jne fexi_fexi_im_so_broken
cmp ah, 0
jne fexi_fexi_im_so_broken

mov al, 127
mov ah, 234
xor ah, ah

cmp al, 127
jne fexi_fexi_im_so_broken
cmp ah, 0
jne fexi_fexi_im_so_broken

mov ecx, 0
hlt

fexi_fexi_im_so_broken:
mov ecx, 0xdeadbeef
hlt


================================================
FILE: unittests/ASM/Flags/Shift.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R12": "0x55",
    "R13": "0x890",
    "R14": "0x55"
  }
}
%endif

mov rsi, 0xe0000080
mov rsp, 0xe0001000

; Zero shift amount
xor ecx, ecx

; Zero all flags
xor eax, eax
push rax
popfq

mov r8b, 255
mov r10b, 127
mov r11b, 1


add r8b, r11b ; Sets CF, ZF, PF, AF, zeroes OF, SF
; Shift by zero, flags should be unaffected
; This tests that we didn't optimize away the flag calculations of the add
shl rax, cl

; Ensure we can't predict the next block
lea rdi, [rel .next]
mov [rsi - 8], rdi
jmp [rsi - 8]

.next:
pushfq
pop r12

; Mask with flags we care about
and r12, 0x8d5

add r10b, r11b ; Sets OF, SF, AF, zeroes ZF, CF, PF
shr rax, cl

lea rdi, [rel .next2]
mov [rsi - 8], rdi
jmp [rsi - 8]

.next2:
pushfq
pop r13
and r13, 0x8d5

mov r8b, 255
add r8b, r11b ; Sets CF, ZF, PF, AF, zeroes OF, SF
sar rax, cl

lea rdi, [rel .next3]
mov [rsi - 8], rdi
jmp [rsi - 8]

.next3:
pushfq
pop r14
and r14, 0x8d5

hlt

================================================
FILE: unittests/ASM/GameTests/EnderLiliesFlash.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x408000003f800000", "0"],
    "XMM1": ["0x400000003f800000", "0x4080000040400000"]
  }
}
%endif

; This bug was encountered in Ender Lilies, when an enemy attacked the player character there was a chance for a black screen flash.
; This was due to a bug in FEX's `ZextAndMaskingElimination` pass which tries to remove useless vmov IR operations.
; It incorrectly removed vmov IR operations that were explicitly zero extending vectors.
; This vmov IR operation was generated by the movq instruction because the upper bits must be zero.
; When vmov was removed, it would no longer zero the upper 64-bits, which left data in the register.
; This results in a bad calculation.
movaps  xmm1, [rel .data_1]
movq    xmm0, xmm1
mulps   xmm0, [rel .data_1]

hlt

align 16
.data_1:
dd 1.0, 2.0, 3.0, 4.0


================================================
FILE: unittests/ASM/H0F38/0_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4142434445465857",
    "R14": "0x0000000058575655",
    "R13": "0x5857565554535251"
  },
  "HostFeatures": ["MOVBE"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov r15, [rdx + 8 * 0]
mov r14, [rdx + 8 * 0]
mov r13, [rdx + 8 * 0]

movbe r15w, word [rdx + 8 * 1]
movbe r14d, dword [rdx + 8 * 1]
movbe r13,  qword [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/H0F38/0_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4142434445465857",
    "R14": "0x4142434458575655",
    "R13": "0x5857565554535251"
  },
  "HostFeatures": ["MOVBE"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x5152535455565758
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov r15, [rdx + 8 * 0]
mov r14, [rdx + 8 * 0]
mov r13, [rdx + 8 * 0]

movbe word [rdx + 8 * 1], r15w
movbe dword [rdx + 8 * 2], r14d
movbe qword [rdx + 8 * 3], r13

mov r15, [rdx + 8 * 1]
mov r14, [rdx + 8 * 2]
mov r13, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/H0F38/66_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4848484848484848", "0x4848484848484848"],
    "XMM1": ["0x0", "0x0"],
    "XMM2": ["0x0", "0x0"],
    "XMM3": ["0x4847464544434241", "0x5857565554535251"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, -1
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0x8080808080808080
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

mov rax, 0x0001020304050607
mov [rdx + 8 * 8], rax
mov rax, 0x08090A0B0C0D0E0F
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 0]
movaps xmm3, [rdx + 8 * 0]

pshufb xmm0, [rdx + 8 * 2]
pshufb xmm1, [rdx + 8 * 4]
pshufb xmm2, [rdx + 8 * 6]
pshufb xmm3, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/H0F38/66_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x5858585858585858", "0x5858585858585858"],
    "XMM1": ["0x0", "0x0"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]

lea rdx, [rel .data]

pshufb xmm0, [rdx + 8 * 0]
pshufb xmm1, [rdx + 8 * 2]

hlt

align 8
.data:
; Test bits with trash data in reserved bits to ensure it is ignored
; Select single element
dq 0x7878787878787878
dq 0x7878787878787878
; Clear element
dq 0xF0F0F0F0F0F0F0F0
dq 0xF0F0F0F0F0F0F0F0


================================================
FILE: unittests/ASM/H0F38/66_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xA4A6ACAE84868C8E", "0xE4E6ECEEC4C6CCCE"],
    "XMM1": ["0xE4E6ECEEC4C6CCCE", "0xA4A6ACAE84868C8E"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]

phaddw xmm0, [rdx + 8 * 2]
phaddw xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xA6A8AAAC86888A8C", "0xE6E8EAECC6C8CACC"],
    "XMM1": ["0xE6E8EAECC6C8CACC", "0xA6A8AAAC86888A8C"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]

phaddd xmm0, [rdx + 8 * 2]
phaddd xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7FFF7FFF7FFF7FFF", "0x800080007FFF7FFF"],
    "XMM1": ["0x800080007FFF7FFF", "0x7FFF7FFF7FFF7FFF"],
    "XMM2": ["0x71836D874331472D", "0x800080007FFF7FFF"],
    "XMM3": ["0x800080007FFF7FFF", "0x71836D874331472D"],
    "XMM4": ["0x7FFF7FFF7FFF7FFF", "0x71836D874331472D"],
    "XMM5": ["0x800080007FFF7FFF", "0x800080007FFF7FFF"],
    "XMM6": ["0x71836D874331472D", "0x800080007FFF7FFF"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 2], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 3], rax

mov rax, 0x2119221823172416
mov [rdx + 8 * 4], rax
mov rax, 0x3941384237433644
mov [rdx + 8 * 5], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 6], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 7], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movaps xmm2, [rdx + 8 * 4]
movaps xmm3, [rdx + 8 * 6]
movaps xmm4, [rdx + 8 * 0]
movaps xmm5, [rdx + 8 * 2]
movaps xmm6, [rdx + 8 * 4]

phaddsw xmm0, [rdx + 8 * 2]
phaddsw xmm1, [rdx + 8 * 0]

phaddsw xmm2, [rdx + 8 * 2]
phaddsw xmm3, [rdx + 8 * 4]

phaddsw xmm4, [rdx + 8 * 4]
phaddsw xmm5, [rdx + 8 * 6]

phaddsw xmm6, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0xFE02FE02FE02FE02", "0xFE02FE02FE02FE02"],
    "XMM2": ["0x7E027E027E027E02", "0x7E027E027E027E02"],
    "XMM3": ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"],
    "XMM4": ["0x057306BC07B808B8", "0xBC53BC0EBAE5BA2E"],
    "XMM5": ["0xA473A5BCA6B8A7B8", "0x0553070E07E5092E"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, -1
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0x8141824383448445
mov [rdx + 8 * 6], rax
mov rax, 0x21F223F323F424F5
mov [rdx + 8 * 7], rax

mov rax, 0xE251E352E453E554
mov [rdx + 8 * 8], rax
mov rax, 0x71A972A873A774A6
mov [rdx + 8 * 9], rax

; Zero
movaps xmm0, [rdx + 8 * 0]
pmaddubsw xmm0, [rdx + 8 * 0]

; -1
movaps xmm1, [rdx + 8 * 2]
pmaddubsw xmm1, [rdx + 8 * 2]

; 127
movaps xmm2, [rdx + 8 * 4]
pmaddubsw xmm2, [rdx + 8 * 4]

; 255 and 127
movaps xmm3, [rdx + 8 * 2]
pmaddubsw xmm3, [rdx + 8 * 4]

; Mixture
movaps xmm4, [rdx + 8 * 6]
pmaddubsw xmm4, [rdx + 8 * 8]

movaps xmm5, [rdx + 8 * 8]
pmaddubsw xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xF202F20212021202", "0x0202020202020202"],
    "XMM1": ["0x0202020202020202", "0xF202F20212021202"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x5142634475468748
mov [rdx + 8 * 0], rax
mov rax, 0x5152435435562758
mov [rdx + 8 * 1], rax
mov rax, 0x6172637465766778
mov [rdx + 8 * 2], rax
mov rax, 0x7162736475667768
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]

phsubw xmm0, [rdx + 8 * 2]
phsubw xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xE403E40424042404", "0x0404040404040404"],
    "XMM1": ["0x0404040404040404", "0xE403E40424042404"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x5142634475468748
mov [rdx + 8 * 0], rax
mov rax, 0x5152435435562758
mov [rdx + 8 * 1], rax
mov rax, 0x6172637465766778
mov [rdx + 8 * 2], rax
mov rax, 0x7162736475667768
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]

phsubd xmm0, [rdx + 8 * 2]
phsubd xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0202020202020202", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0x0202020202020202"],
    "XMM2": ["0xFF01FF0100FF00FF", "0x0000000000000000"],
    "XMM3": ["0x0000000000000000", "0xFF01FF0100FF00FF"],
    "XMM4": ["0x0202020202020202", "0xFF01FF0100FF00FF"],
    "XMM5": ["0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xFF01FF0100FF00FF", "0x0000000000000000"],
    "XMM7": ["0x800080007FFF7FFF", "0x800080007FFF7FFF"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 2], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 3], rax

mov rax, 0x2119221823172416
mov [rdx + 8 * 4], rax
mov rax, 0x3941384237433644
mov [rdx + 8 * 5], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 6], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 7], rax

mov rax, 0x00007FFF00007FFF
mov [rdx + 8 * 8], rax
mov rax, 0x7FFFFFFF7FFFFFFF
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movaps xmm2, [rdx + 8 * 4]
movaps xmm3, [rdx + 8 * 6]
movaps xmm4, [rdx + 8 * 0]
movaps xmm5, [rdx + 8 * 2]
movaps xmm6, [rdx + 8 * 4]

movaps xmm7, [rdx + 8 * 8]

phsubsw xmm0, [rdx + 8 * 2]
phsubsw xmm1, [rdx + 8 * 0]

phsubsw xmm2, [rdx + 8 * 2]
phsubsw xmm3, [rdx + 8 * 4]

phsubsw xmm4, [rdx + 8 * 4]
phsubsw xmm5, [rdx + 8 * 6]

phsubsw xmm6, [rdx + 8 * 6]

phsubsw xmm7, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/H0F38/66_08.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0xFEFEFEFEFEFEFEFE", "0xFDFDFDFDFDFDFDFD"],
    "XMM2": ["0x0202020202020202", "0x0303030303030303"],
    "XMM3": ["0xFEFEFEFE00000000", "0x03030303FD000300"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x01010101FF000100
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

; Test with full zero
psignb xmm0, [rdx + 8 * 0]

; Test with full negative
psignb xmm1, [rdx + 8 * 2]

; Test with full positive
psignb xmm2, [rdx + 8 * 4]

; Test a mix
psignb xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_09.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0xFDFEFDFEFDFEFDFE", "0xFCFDFCFDFCFDFCFD"],
    "XMM2": ["0x0202020202020202", "0x0303030303030303"],
    "XMM3": ["0xFDFEFDFE00000000", "0x03030303FCFD0000"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0001000100010001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00010001FFFF0000
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

; Test with full zero
psignw xmm0, [rdx + 8 * 0]

; Test with full negative
psignw xmm1, [rdx + 8 * 2]

; Test with full positive
psignw xmm2, [rdx + 8 * 4]

; Test a mix
psignw xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_0A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0xFDFDFDFEFDFDFDFE", "0xFCFCFCFDFCFCFCFD"],
    "XMM2": ["0x0202020202020202", "0x0303030303030303"],
    "XMM3": ["0xFDFDFDFE00000000", "0x03030303FCFCFCFD"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

; Test with full zero
psignd xmm0, [rdx + 8 * 0]

; Test with full negative
psignd xmm1, [rdx + 8 * 2]

; Test with full positive
psignd xmm2, [rdx + 8 * 4]

; Test a mix
psignd xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_0B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x31A6343B36E09E7A", "0x48134B294E4F5186"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445468748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]

pmulhrsw xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2f5b7f", "0x2b5774313ad348ea"],
    "XMM2": ["0x30a9567b1f6d776b", "0x673d5d0730c6762c"],
    "XMM3": ["0x41159b533d5b4c09", "0xca64724c74043978"],
    "XMM4": ["0x24426b4c72f2101c", "0x656780205f15c767"],
    "XMM5": ["0x4f1694df78236612", "0x19826b033d3c78c0"],
    "XMM6": ["0x00f9589268e0127f", "0x1e7d27031e881972"],
    "XMM7": ["0x1c4c4322f8f65504", "0x07f565f98b1bb7bc"],
    "XMM8": ["0x10c41f28710f2147", "0x09925e327f5e2938"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
lea rdi, [rel .mask]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

; pblend uses xmm0 implicitly
movaps xmm0, [rdx + 16 * 0]
pblendvb xmm1, [rdx + 16 * 8]
movaps xmm0, [rdx + 16 * 1]
pblendvb xmm2, [rdx + 16 * 9]
movaps xmm0, [rdx + 16 * 2]
pblendvb xmm3, [rdx + 16 * 10]
movaps xmm0, [rdx + 16 * 3]
pblendvb xmm4, [rdx + 16 * 11]
movaps xmm0, [rdx + 16 * 4]
pblendvb xmm5, [rdx + 16 * 12]
movaps xmm0, [rdx + 16 * 5]
pblendvb xmm6, [rdx + 16 * 13]
movaps xmm0, [rdx + 16 * 6]
pblendvb xmm7, [rdx + 16 * 14]
movaps xmm0, [rdx + 16 * 7]
pblendvb xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09
.mask:
db 0xb5, 0xdb, 0xca, 0x08, 0x0a, 0xae, 0x17, 0xb6, 0x54, 0x0d, 0xd3, 0xa1, 0x1d, 0x28, 0x25, 0xd3
db 0x02, 0xf5, 0xce, 0x94, 0x2e, 0x56, 0x01, 0xf4, 0xae, 0x81, 0xbe, 0x50, 0x70, 0x45, 0x83, 0x18
db 0x33, 0x46, 0x60, 0x4e, 0x29, 0xe4, 0x49, 0xae, 0xc2, 0x0e, 0x70, 0xcd, 0x2a, 0x6f, 0xb8, 0x9d
db 0x47, 0x1e, 0x8e, 0x94, 0x81, 0xaa, 0x12, 0xe4, 0x8e, 0x77, 0x48, 0x34, 0x63, 0x8f, 0x4c, 0x57
db 0x34, 0xf9, 0x5b, 0x37, 0x76, 0x9f, 0xff, 0x8c, 0x6b, 0xf2, 0x6e, 0xd0, 0x15, 0x61, 0x4e, 0x0f
db 0xe2, 0xa7, 0xd0, 0x73, 0x7f, 0xf0, 0xae, 0xe7, 0x48, 0x32, 0x2a, 0x56, 0xc3, 0x29, 0x12, 0x4f
db 0x7d, 0x5b, 0x37, 0xc2, 0x83, 0x74, 0xd2, 0xf9, 0xcc, 0x98, 0x0a, 0x3c, 0x3c, 0x38, 0xd9, 0x4d


================================================
FILE: unittests/ASM/H0F38/66_14.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM3": ["0x41029b7f3de8ad09", "0xca79a24c7404b890"],
    "XMM4": ["0x24426b4c72f110ad", "0x65f580205ffdc710"],
    "XMM5": ["0x4f1694df78236612", "0x19a26b823d3ca2a9"],
    "XMM6": ["0x00f658ab689712b0", "0x1ea627241ed21972"],
    "XMM7": ["0x1c864322f8f69004", "0x07f565f98b8db7bc"],
    "XMM8": ["0x10c41fa1710f2147", "0x099224327fb5ac38"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
lea rdi, [rel .mask]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

; pblend uses xmm0 implicitly
movaps xmm0, [rdx + 16 * 0]
blendvps xmm1, [rdx + 16 * 8]
movaps xmm0, [rdx + 16 * 1]
blendvps xmm2, [rdx + 16 * 9]
movaps xmm0, [rdx + 16 * 2]
blendvps xmm3, [rdx + 16 * 10]
movaps xmm0, [rdx + 16 * 3]
blendvps xmm4, [rdx + 16 * 11]
movaps xmm0, [rdx + 16 * 4]
blendvps xmm5, [rdx + 16 * 12]
movaps xmm0, [rdx + 16 * 5]
blendvps xmm6, [rdx + 16 * 13]
movaps xmm0, [rdx + 16 * 6]
blendvps xmm7, [rdx + 16 * 14]
movaps xmm0, [rdx + 16 * 7]
blendvps xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09
.mask:
db 0xb5, 0xdb, 0xca, 0x08, 0x0a, 0xae, 0x17, 0xb6, 0x54, 0x0d, 0xd3, 0xa1, 0x1d, 0x28, 0x25, 0xd3
db 0x02, 0xf5, 0xce, 0x94, 0x2e, 0x56, 0x01, 0xf4, 0xae, 0x81, 0xbe, 0x50, 0x70, 0x45, 0x83, 0x18
db 0x33, 0x46, 0x60, 0x4e, 0x29, 0xe4, 0x49, 0xae, 0xc2, 0x0e, 0x70, 0xcd, 0x2a, 0x6f, 0xb8, 0x9d
db 0x47, 0x1e, 0x8e, 0x94, 0x81, 0xaa, 0x12, 0xe4, 0x8e, 0x77, 0x48, 0x34, 0x63, 0x8f, 0x4c, 0x57
db 0x34, 0xf9, 0x5b, 0x37, 0x76, 0x9f, 0xff, 0x8c, 0x6b, 0xf2, 0x6e, 0xd0, 0x15, 0x61, 0x4e, 0x0f
db 0xe2, 0xa7, 0xd0, 0x73, 0x7f, 0xf0, 0xae, 0xe7, 0x48, 0x32, 0x2a, 0x56, 0xc3, 0x29, 0x12, 0x4f
db 0x7d, 0x5b, 0x37, 0xc2, 0x83, 0x74, 0xd2, 0xf9, 0xcc, 0x98, 0x0a, 0x3c, 0x3c, 0x38, 0xd9, 0x4d


================================================
FILE: unittests/ASM/H0F38/66_15.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM3": ["0x41029b7f255b4cf4", "0xca79a24c3e9e3978"],
    "XMM4": ["0x24426b4c72f110ad", "0x65f580205ffdc710"],
    "XMM5": ["0x4f1694dfa8fb773c", "0x19a26b823d3ca2a9"],
    "XMM6": ["0x00f658ab689712b0", "0x1ea62724c8883dfa"],
    "XMM7": ["0x1c86432298df55c8", "0x07f565f98b8db7bc"],
    "XMM8": ["0x10c41fa17837c17f", "0x099224327e5e296c"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
lea rdi, [rel .mask]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

; pblend uses xmm0 implicitly
movaps xmm0, [rdx + 16 * 0]
blendvpd xmm1, [rdx + 16 * 8]
movaps xmm0, [rdx + 16 * 1]
blendvpd xmm2, [rdx + 16 * 9]
movaps xmm0, [rdx + 16 * 2]
blendvpd xmm3, [rdx + 16 * 10]
movaps xmm0, [rdx + 16 * 3]
blendvpd xmm4, [rdx + 16 * 11]
movaps xmm0, [rdx + 16 * 4]
blendvpd xmm5, [rdx + 16 * 12]
movaps xmm0, [rdx + 16 * 5]
blendvpd xmm6, [rdx + 16 * 13]
movaps xmm0, [rdx + 16 * 6]
blendvpd xmm7, [rdx + 16 * 14]
movaps xmm0, [rdx + 16 * 7]
blendvpd xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09
.mask:
db 0xb5, 0xdb, 0xca, 0x08, 0x0a, 0xae, 0x17, 0xb6, 0x54, 0x0d, 0xd3, 0xa1, 0x1d, 0x28, 0x25, 0xd3
db 0x02, 0xf5, 0xce, 0x94, 0x2e, 0x56, 0x01, 0xf4, 0xae, 0x81, 0xbe, 0x50, 0x70, 0x45, 0x83, 0x18
db 0x33, 0x46, 0x60, 0x4e, 0x29, 0xe4, 0x49, 0xae, 0xc2, 0x0e, 0x70, 0xcd, 0x2a, 0x6f, 0xb8, 0x9d
db 0x47, 0x1e, 0x8e, 0x94, 0x81, 0xaa, 0x12, 0xe4, 0x8e, 0x77, 0x48, 0x34, 0x63, 0x8f, 0x4c, 0x57
db 0x34, 0xf9, 0x5b, 0x37, 0x76, 0x9f, 0xff, 0x8c, 0x6b, 0xf2, 0x6e, 0xd0, 0x15, 0x61, 0x4e, 0x0f
db 0xe2, 0xa7, 0xd0, 0x73, 0x7f, 0xf0, 0xae, 0xe7, 0x48, 0x32, 0x2a, 0x56, 0xc3, 0x29, 0x12, 0x4f
db 0x7d, 0x5b, 0x37, 0xc2, 0x83, 0x74, 0xd2, 0xf9, 0xcc, 0x98, 0x0a, 0x3c, 0x3c, 0x38, 0xd9, 0x4d


================================================
FILE: unittests/ASM/H0F38/66_17.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15":  "0x000000000003a759",
    "XMM1": ["0", "0"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0", "0"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0", "0"],
    "XMM8": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM9": ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

; Uses AX and BX and stores result in r15
; CF:ZF
%macro zfcfmerge 0
  lahf

  ; Shift CF to zero
  shr ax, 8

  ; Move to a temp
  mov bx, ax
  and rbx, 1

  shl r15, 1
  or r15, rbx

  shl r15, 1

  ; Move to a temp
  mov bx, ax

  ; Extract ZF
  shr bx, 6
  and rbx, 1

  ; Insert ZF
  or r15, rbx
%endmacro

lea rdx, [rel .data]

mov rax, 0
mov rbx, 0
mov r15, 0

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 0]
movaps xmm5, [rdx + 16 * 1]
movaps xmm6, [rdx + 16 * 2]
movaps xmm7, [rdx + 16 * 0]
movaps xmm8, [rdx + 16 * 1]
movaps xmm9, [rdx + 16 * 2]


ptest xmm1, [rdx + 16 * 3]
zfcfmerge
ptest xmm2, [rdx + 16 * 4]
zfcfmerge
ptest xmm3, [rdx + 16 * 5]
zfcfmerge
ptest xmm4, [rdx + 16 * 6]
zfcfmerge
ptest xmm5, [rdx + 16 * 7]
zfcfmerge
ptest xmm6, [rdx + 16 * 8]
zfcfmerge
ptest xmm7, [rdx + 16 * 9]
zfcfmerge
ptest xmm8, [rdx + 16 * 10]
zfcfmerge
ptest xmm9, [rdx + 16 * 11]
zfcfmerge

hlt

align 16
.data:
dq 0, 0
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758

; Match
dq 0, 0
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758

; Match on not
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0, 0
dq 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7

; No match on either case
dq 1, 1
dq 2, 2
dq 3, 3


================================================
FILE: unittests/ASM/H0F38/66_17_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF43FF",
    "RBX": "0"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

; Set EFLAGS to known value with sahf
mov rax, -1
sahf

movups xmm0, [rel .data]
; Tests a bug that FEX had where ptest would not set OF, SF, AF, PF to zero
ptest xmm0, xmm0

; Now load back
; ZF = 1
; CF = 1
; OF, SF, AF, PF should be zero
lahf

; lahf doesn't get OF, get it with seto
mov rbx, 0
seto bl

hlt

.data:
dq 0
dq 0


================================================
FILE: unittests/ASM/H0F38/66_1C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0x0101010101010101", "0x0101010101010101"],
    "XMM2": ["0x0101010101010101", "0x0101010101010101"],
    "XMM3": ["0x0100010001010100", "0x0100010001010100"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFF000100FF01FF00
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

; Test with full zero
pabsb xmm0, [rdx + 8 * 0]

; Test with full negative
pabsb xmm1, [rdx + 8 * 2]

; Test with full positive
pabsb xmm2, [rdx + 8 * 4]

; Test a mix
pabsb xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_1D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0x0001000100010001", "0x0001000100010001"],
    "XMM2": ["0x0001000100010001", "0x0001000100010001"],
    "XMM3": ["0x0001000100000000", "0x0001000100010000"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0001000100010001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00010001FFFF0000
mov [rdx + 8 * 7], rax

; Test with full zero
pabsw xmm0, [rdx + 8 * 0]

; Test with full negative
pabsw xmm1, [rdx + 8 * 2]

; Test with full positive
pabsw xmm2, [rdx + 8 * 4]

; Test a mix
pabsw xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_1E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0x0"],
    "XMM1": ["0x0000000100000001", "0x0000000100000001"],
    "XMM2": ["0x0000000100000001", "0x0000000100000001"],
    "XMM3": ["0x0000000100000000", "0x0000000100000001"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

; Test with full zero
pabsd xmm0, [rdx + 8 * 0]

; Test with full negative
pabsd xmm1, [rdx + 8 * 2]

; Test with full positive
pabsd xmm2, [rdx + 8 * 4]

; Test a mix
pabsd xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_20.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xff85ff86ff87ff88", "0x0041004200430044"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxbw xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_21.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffffff87ffffff88", "0xffffff85ffffff86"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxbd xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_22.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffffffffffffff88", "0xffffffffffffff87"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxbq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_23.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffff8586ffff8788", "0x0000414200004344"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxwd xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_24.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffffffffffff8788", "0xffffffffffff8586"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxwq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_25.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffffffff85868788", "0x0000000041424344"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovsxdq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_28.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0xee65166050ac19a0", "0xfe1eb34a32b1a0b2"],
    "XMM2": ["0x28a18cdd2d20fb20", "0x1d6fa69c44caed04"],
    "XMM3": ["0xf514cf89a88edcde", "0x01e3dc4237becfcf"],
    "XMM4": ["0x0004b0350897f35a", "0x03cd750e809c18d0"],
    "XMM5": ["0x066a5fa4ad5148c8", "0x00bca2da387e55a2"],
    "XMM6": ["0x1e0f03011112ed90", "0x18c90f3ec0d58440"],
    "XMM7": ["0xee94b334b2358df2", "0x1b82409d7ae7fa28"],
    "XMM8": ["0xed12f34e8fb5e098", "0xd83d0ba0ff8632db"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

pmuldq xmm1, [rdx + 16 * 8]
pmuldq xmm2, [rdx + 16 * 9]
pmuldq xmm3, [rdx + 16 * 10]
pmuldq xmm4, [rdx + 16 * 11]
pmuldq xmm5, [rdx + 16 * 12]
pmuldq xmm6, [rdx + 16 * 13]
pmuldq xmm7, [rdx + 16 * 14]
pmuldq xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/66_29.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM2":  ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM3":  ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM4":  ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM5":  ["0x0", "0x0"],
    "XMM6":  ["0x0", "0x0"],
    "XMM7":  ["0x0", "0x0"],
    "XMM8":  ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM9":  ["0x0", "0xffffffffffffffff"],
    "XMM10": ["0xffffffffffffffff", "0x0"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]
movaps xmm9, [rdx + 16 * 0]
movaps xmm10, [rdx + 16 * 0]

pcmpeqq xmm1, [rdx + 16 * 8]
pcmpeqq xmm2, [rdx + 16 * 9]
pcmpeqq xmm3, [rdx + 16 * 10]
pcmpeqq xmm4, [rdx + 16 * 11]
pcmpeqq xmm5, [rdx + 16 * 12]
pcmpeqq xmm6, [rdx + 16 * 13]
pcmpeqq xmm7, [rdx + 16 * 14]
pcmpeqq xmm8, [rdx + 16 * 15]
pcmpeqq xmm9, [rdx + 16 * 16]
pcmpeqq xmm10, [rdx + 16 * 17]

hlt

align 16
.data:
dq 0.0, 0.0
dq 0.0, 1.0
dq 1.0, 0.0
dq 1.0, 1.0
dq 0.0, 0.0
dq 0.0, 1.0
dq 1.0, 0.0
dq 1.0, 1.0

dq 0.0, 0.0
dq 0.0, 1.0
dq 1.0, 0.0
dq 1.0, 1.0
dq 1.0, 1.0
dq 1.0, 0.0
dq 0.0, 1.0
dq 1.0, 1.0
dq 1.0, 0.0
dq 0.0, 1.0


================================================
FILE: unittests/ASM/H0F38/66_2A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

movntdqa xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_2B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xffffffffffffffff", "0x00000000ffff0000"],
    "XMM1": ["0xffffffffffffffff", "0x12348000ffff0000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov eax, 0
mov [rdx + 8 * 2 + 0], eax
mov eax, 0x7FFFFFFF
mov [rdx + 8 * 2 + 4], eax
mov eax, 0x80000000
mov [rdx + 8 * 3 + 0], eax
mov eax, 0xFFFFFFFF
mov [rdx + 8 * 3 + 4], eax

; Values that actually fit in to 16bit unsigned
mov eax, 0
mov [rdx + 8 * 4 + 0], eax
mov eax, 0xFFFF
mov [rdx + 8 * 4 + 4], eax
mov eax, 0x8000
mov [rdx + 8 * 5 + 0], eax
mov eax, 0x1234
mov [rdx + 8 * 5 + 4], eax

; Setup source
movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]

; Pack it
packusdw xmm0, [rdx + 8 * 2]
packusdw xmm1, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/H0F38/66_30.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0085008600870088", "0x0041004200430044"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxbw xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_31.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000008700000088", "0x0000008500000086"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxbd xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000088", "0x0000000000000087"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxbq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_33.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000858600008788", "0x0000414200004344"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxwd xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_34.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000008788", "0x0000000000008586"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxwq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_35.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000085868788", "0x0000000041424344"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434485868788
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Fill register with trash
movapd xmm0, [rdx + 8 * 2]

; Now do the move
pmovzxdq xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/66_37.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM1":  ["0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM5":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM9":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

movaps xmm0, [rel .data0]
movaps xmm1, [rel .data1]
movaps xmm2, [rel .data2]
movaps xmm3, [rel .data3]
movaps xmm4, [rel .data4]

movaps xmm5, [rel .data0]
movaps xmm6, [rel .data1]
movaps xmm7, [rel .data2]
movaps xmm8, [rel .data3]
movaps xmm9, [rel .data4]

pcmpgtq xmm0, [rel .data4]
pcmpgtq xmm1, [rel .data3]
pcmpgtq xmm2, [rel .data2]
pcmpgtq xmm3, [rel .data1]
pcmpgtq xmm4, [rel .data0]

pcmpgtq xmm5, [rel .data1]
pcmpgtq xmm6, [rel .data2]
pcmpgtq xmm7, [rel .data3]
pcmpgtq xmm8, [rel .data4]
pcmpgtq xmm9, [rel .data0]

hlt

align 16
.data0:
dq 0
dq 0

.data1:
dq -1
dq -1

.data2:
dq 1
dq 1

.data3:
dq -1
dq 1

.data4:
dq 1
dq -1


================================================
FILE: unittests/ASM/H0F38/66_38.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminsb xmm0, xmm2
pminsb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_39.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminsd xmm0, xmm2
pminsd xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminuw xmm0, xmm2
pminuw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminud xmm0, xmm2
pminud xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxsb xmm0, xmm2
pmaxsb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxsd xmm0, xmm2
pmaxsd xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxuw xmm0, xmm2
pmaxuw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_3F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxud xmm0, xmm2
pmaxud xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/H0F38/66_40.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x7a84d3fa541ef1be", "0x5f0d7667e4d8e24a"],
    "XMM2": ["0x44683c4ce9ac9780", "0x9da95e9a6f25ef94"],
    "XMM3": ["0x4bc94ea0ccb0a64c", "0x3cf36ee04f371510"],
    "XMM4": ["0x1ac415407b8ba3db", "0x92cdc300dab0773c"],
    "XMM5": ["0x6796b1563f8d578c", "0x4c64f16199291fe4"],
    "XMM6": ["0x01a14ef664207dc6", "0x1d3220da400e1027"],
    "XMM7": ["0x75ddba582c3dd348", "0xa5141c506d8c60d7"],
    "XMM8": ["0x7873ff38fb240e0d", "0x6c154f1adb67cd17"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

pmulld xmm1, [rdx + 16 * 8]
pmulld xmm2, [rdx + 16 * 9]
pmulld xmm3, [rdx + 16 * 10]
pmulld xmm4, [rdx + 16 * 11]
pmulld xmm5, [rdx + 16 * 12]
pmulld xmm6, [rdx + 16 * 13]
pmulld xmm7, [rdx + 16 * 14]
pmulld xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 655.9708,532.2244,108.0451,512.4019,754.227,586.0859,127.7574,114.8167,764.4266,226.6145,337.864,320.3296,296.5247,480.0057,28.4267,565.9418,265.8255,536.4473,754.3489,460.681,818.7269,43.7204,464.592,847.9381,306.0592,702.7584,887.6473,551.5908,620.9001,520.9829,232.9532,510.3388,204.8474,225.626,564.973,790.5175,836.1953,844.5266,633.5626,501.7409,393.2616,674.4415,244.3265,971.1598,770.8029,746.1836,255.9902,567.7578,187.7175,924.181,466.4362,169.8267,651.7481,462.4206,396.6924,355.8538,6.148,523.1443,989.7004,713.6646,497.5427,657.6965,651.0534,778.5236


================================================
FILE: unittests/ASM/H0F38/66_41.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000000000001", "0x0"],
    "XMM1":  ["0x0000000000030001", "0x0"],
    "XMM2":  ["0x0000000000070001", "0x0"],
    "XMM3":  ["0x0000000000010001", "0x0"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

; Pos 0
mov rax, 0x0004000300020001
mov [rdx + 8 * 0], rax
mov rax, 0x0008000700060005
mov [rdx + 8 * 1], rax

; Pos 3
mov rax, 0x0001000300020004
mov [rdx + 8 * 2], rax
mov rax, 0x0008000700060005
mov [rdx + 8 * 3], rax

; Pos 7
mov rax, 0x0008000300020004
mov [rdx + 8 * 4], rax
mov rax, 0x0001000700060005
mov [rdx + 8 * 5], rax

; Pos 7 & 3 & 2
; Should return lowest position
mov rax, 0x0008000100010004
mov [rdx + 8 * 6], rax
mov rax, 0x0001000700060005
mov [rdx + 8 * 7], rax

phminposuw xmm0, [rdx + 8 * 0]
phminposuw xmm1, [rdx + 8 * 2]
phminposuw xmm2, [rdx + 8 * 4]
phminposuw xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_DB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0xffffffffffffffff", "0xffffffffffffffff"],
    "XMM2": ["0x0b0d090e0b0d090e", "0x0b0d090e0b0d090e"],
    "XMM3": ["0xffffffff00000000", "0x0b0d090effffffff"],
    "XMM4": ["0x0202020202020202", "0x0303030303030303"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

aesimc xmm0, [rdx + 8 * 0]
aesimc xmm1, [rdx + 8 * 2]
aesimc xmm2, [rdx + 8 * 4]
aesimc xmm3, [rdx + 8 * 6]
aesimc xmm4, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/H0F38/66_DC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x77637B6F637B6F77", "0x7B6F77636F77637B"],
    "XMM1": ["0x889C84909C849088", "0x8490889C90889C84"],
    "XMM2": ["0x77637B6E637B6F76", "0x7B6F77626F77637A"],
    "XMM3": ["0x889C8490637B6F77", "0x7B6F776290889C84"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

aesenc xmm0, [rdx + 8 * 0]

aesenc xmm1, [rdx + 8 * 2]

aesenc xmm2, [rdx + 8 * 4]

aesenc xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_DD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x777B7B777B7B7777", "0x7B77777B77777B7B"],
    "XMM1": ["0x8884848884848888", "0x8488888488888484"],
    "XMM2": ["0x777B7B767B7B7776", "0x7B77777A77777B7A"],
    "XMM3": ["0x888484887B7B7777", "0x7B77777A88888484"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

aesenclast xmm0, [rdx + 8 * 0]

aesenclast xmm1, [rdx + 8 * 2]

aesenclast xmm2, [rdx + 8 * 4]

aesenclast xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_DE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7A1FC5A0A07A1FC5", "0xC5A07A1F1FC5A07A"],
    "XMM1": ["0x85E03A5F5F85E03A", "0x3A5F85E0E03A5F85"],
    "XMM2": ["0x7A1FC5A1A07A1FC4", "0xC5A07A1E1FC5A07B"],
    "XMM3": ["0x85E03A5FA07A1FC5", "0xC5A07A1EE03A5F85"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

aesdec xmm0, [rdx + 8 * 0]

aesdec xmm1, [rdx + 8 * 2]

aesdec xmm2, [rdx + 8 * 4]

aesdec xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_DF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xD5D56A6A6AD5D56A", "0x6A6AD5D5D56A6AD5"],
    "XMM1": ["0x2A2A9595952A2A95", "0x95952A2A2A95952A"],
    "XMM2": ["0xD5D56A6B6AD5D56B", "0x6A6AD5D4D56A6AD4"],
    "XMM3": ["0x2A2A95956AD5D56A", "0x6A6AD5D42A95952A"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 8]
movaps xmm1, [rdx + 8 * 8]
movaps xmm2, [rdx + 8 * 8]
movaps xmm3, [rdx + 8 * 8]

aesdeclast xmm0, [rdx + 8 * 0]

aesdeclast xmm1, [rdx + 8 * 2]

aesdeclast xmm2, [rdx + 8 * 4]

aesdeclast xmm3, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/H0F38/66_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000E330A81A",
    "RBX": "0x00000000BE2DA0A5",
    "RCX": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; This is a clone of the F2_F0 crc32 test with manually coded crc32 with prefix 66 instead of F2
mov rax, 0
mov rbx, 0
mov rcx, 0
mov rdx, 0
lea rsi, [rel .data]

; crc32 rax, byte [rel .data]
db 0xf2
db 0x66
db 0x48, 0x0f, 0x38, 0xf0, 0x05
dd 0x0000002A

; crc32 ebx, byte [rel .data + 1]
db 0xf2
db 0x66
db 0x0f, 0x38, 0xf0, 0x1d,
dd 0x00000021

.again:
cmp rdx, 256
je .done
; crc32 rcx, byte [rsi + rdx]
db 0xf2
db 0x66
db 0x48, 0x0f, 0x38, 0xf0, 0x0c, 0x16

add rdx, 1
jmp .again
.done:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/66_F0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000E330A81A",
    "RBX": "0x00000000BE2DA0A5",
    "RCX": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; This is a clone of the F2_F0 crc32 test with manually coded crc32 with prefix 66 instead of F2
mov rax, 0
mov rbx, 0
mov rcx, 0
mov rdx, 0
lea rsi, [rel .data]

; crc32 rax, byte [rel .data]
db 0x66
db 0xf2
db 0x48, 0x0f, 0x38, 0xf0, 0x05
dd 0x0000002A

; crc32 ebx, byte [rel .data + 1]
db 0x66
db 0xf2
db 0x0f, 0x38, 0xf0, 0x1d,
dd 0x00000021

.again:
cmp rdx, 256
je .done
; crc32 rcx, byte [rsi + rdx]
db 0x66
db 0xf2
db 0x48, 0x0f, 0x38, 0xf0, 0x0c, 0x16

add rdx, 1
jmp .again
.done:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/66_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000C5727F5A",
    "RBX": "0x00000000FAC690D7",
    "RCX": "0x000000002AAF1F77",
    "RDX": "0x00000000ADBE9F64",
    "RSI": "0x00000000ADBE9F64",
    "RDI": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; This is a clone of the F2_F1 crc32 test with manually coded crc32 with prefix 66 instead of F2
; This can't user operand size override on 32-bit instructions for testing since it WILL override to 16-bit
mov rax, 0
mov rbx, 0
mov rcx, 0

mov rdx, 0
mov rsi, 0
mov rdi, 0

mov rbp, 0
lea rsp, [rel .data]

; crc32 eax, word [rel .data]
db 0x66 ; Operand size override
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x05
dd 0x0000006c

; crc32 ebx, dword [rel .data + 2]
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x1d,
dd 0x00000065

; crc32 rcx, qword [rel .data + 8]
db 0x66 ; Operand size override
db 0xf2 ; Prefix
db 0x48, 0x0f, 0x38, 0xf1, 0x0d,
dd 0x00000060

mov rbp, 0
.again16:
cmp rbp, 128
je .done16
; crc32 edx, word [rsp + rbp * 2]
db 0x66 ; Operand size override
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x14, 0x6c

add rbp, 1
jmp .again16
.done16:

mov rbp, 0
.again32:
cmp rbp, 64
je .done32
; crc32 esi, dword [rsp + rbp * 4]
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x34, 0xac

add rbp, 1
jmp .again32
.done32:

mov rbp, 0
.again64:
cmp rbp, 32
je .done64
; crc32 rdi, qword [rsp + rbp * 8]
db 0x66 ; Operand size override
db 0xf2 ; Prefix
db 0x48, 0x0f, 0x38, 0xf1, 0x3c, 0xec

add rbp, 1
jmp .again64
.done64:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/66_F1_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000C5727F5A",
    "RBX": "0x00000000FAC690D7",
    "RCX": "0x000000002AAF1F77",
    "RDX": "0x00000000ADBE9F64",
    "RSI": "0x00000000ADBE9F64",
    "RDI": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; This is a clone of the F2_F1 crc32 test with manually coded crc32 with prefix 66 instead of F2
; This can't user operand size override on 32-bit instructions for testing since it WILL override to 16-bit
mov rax, 0
mov rbx, 0
mov rcx, 0

mov rdx, 0
mov rsi, 0
mov rdi, 0

mov rbp, 0
lea rsp, [rel .data]

; crc32 eax, word [rel .data]
db 0xf2 ; Prefix
db 0x66 ; Operand size override
db 0x0f, 0x38, 0xf1, 0x05
dd 0x0000006c

; crc32 ebx, dword [rel .data + 2]
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x1d,
dd 0x00000065

; crc32 rcx, qword [rel .data + 8]
db 0xf2 ; Prefix
db 0x66 ; Operand size override
db 0x48, 0x0f, 0x38, 0xf1, 0x0d,
dd 0x00000060

mov rbp, 0
.again16:
cmp rbp, 128
je .done16
; crc32 edx, word [rsp + rbp * 2]
db 0xf2 ; Prefix
db 0x66 ; Operand size override
db 0x0f, 0x38, 0xf1, 0x14, 0x6c

add rbp, 1
jmp .again16
.done16:

mov rbp, 0
.again32:
cmp rbp, 64
je .done32
; crc32 esi, dword [rsp + rbp * 4]
db 0xf2 ; Prefix
db 0x0f, 0x38, 0xf1, 0x34, 0xac

add rbp, 1
jmp .again32
.done32:

mov rbp, 0
.again64:
cmp rbp, 32
je .done64
; crc32 rdi, qword [rsp + rbp * 8]
db 0xf2 ; Prefix
db 0x66 ; Operand size override
db 0x48, 0x0f, 0x38, 0xf1, 0x3c, 0xec

add rbp, 1
jmp .again64
.done64:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/66_F1_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000005c3bc5b0",
    "RBX": "0x000000001dd5b1e5",
    "RCX": "0x0000000015d1c92d"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

mov rax, 0x41424344454647
mov rbx, 0x51525354555657
mov rcx, 0x61626364656667

mov rdx, 0x71727374757677

; crc32 rax, rbx
db 0x66 ; Override, Should be ignored
db 0xf2 ; Prefix
db 0x48 ; REX.W
db 0x0f, 0x38, 0xf1, 0xc3

; crc32 rbx, rcx
db 0xf2 ; Prefix
db 0x66 ; Override, Should be ignored
db 0x48 ; REX.W
db 0x0f, 0x38, 0xf1, 0xd9

; crc32 rcx, rdx
db 0x66 ; Override, Should be ignored
db 0xf2 ; Prefix
db 0x66 ; Override, Should be ignored
db 0x48 ; REX.W
db 0x0f, 0x38, 0xf1, 0xca

hlt


================================================
FILE: unittests/ASM/H0F38/F2_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000E330A81A",
    "RBX": "0x00000000BE2DA0A5",
    "RCX": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

mov rax, 0
mov rbx, 0
mov rcx, 0
mov rdx, 0
lea rsi, [rel .data]

crc32 rax, byte [rel .data]
crc32 ebx, byte [rel .data + 1]

.again:
cmp rdx, 256
je .done
crc32 rcx, byte [rsi + rdx]
add rdx, 1
jmp .again
.done:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/F2_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000C5727F5A",
    "RBX": "0x00000000FAC690D7",
    "RCX": "0x000000002AAF1F77",
    "RDX": "0x00000000ADBE9F64",
    "RSI": "0x00000000ADBE9F64",
    "RDI": "0x00000000ADBE9F64"
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

mov rax, 0
mov rbx, 0
mov rcx, 0

mov rdx, 0
mov rsi, 0
mov rdi, 0

mov rbp, 0
lea rsp, [rel .data]

crc32 eax, word [rel .data]
crc32 ebx, dword [rel .data + 2]
crc32 rcx, qword [rel .data + 8]

mov rbp, 0
.again16:
cmp rbp, 128
je .done16
crc32 edx, word [rsp + rbp * 2]
add rbp, 1
jmp .again16
.done16:


mov rbp, 0
.again32:
cmp rbp, 64
je .done32
crc32 esi, dword [rsp + rbp * 4]
add rbp, 1
jmp .again32
.done32:

mov rbp, 0
.again64:
cmp rbp, 32
je .done64
crc32 rdi, qword [rsp + rbp * 8]
add rbp, 1
jmp .again64
.done64:

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F38/XX_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4848484848484848",
    "MM1": "0x0",
    "MM2": "0x0",
    "MM3": "0x4847464544434241"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax
mov rax, -1
mov [rdx + 8 * 2], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 3], rax
mov rax, 0x0001020304050607
mov [rdx + 8 * 4], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]

pshufb mm0, [rdx + 8 * 1]
pshufb mm1, [rdx + 8 * 2]
pshufb mm2, [rdx + 8 * 3]
pshufb mm3, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4444444444444444",
    "MM1": "0x0"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

lea rdx, [rel .data]

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]

pshufb mm0, [rdx + 8 * 1]
pshufb mm1, [rdx + 8 * 2]

hlt

align 8
.data:
; Incoming vector
dq 0x4142434445464748
; Test bits with trash data in reserved bits to ensure it is ignored
; Select single element
dq 0x7C7C7C7C7C7C7C7C
; Clear element
dq 0xF8F8F8F8F8F8F8F8


================================================
FILE: unittests/ASM/H0F38/XX_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA4A6ACAE84868C8E",
    "MM1": "0x84868C8EA4A6ACAE"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

phaddw mm0, [rdx + 8 * 1]
phaddw mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA6A8AAAC86888A8C",
    "MM1": "0x86888A8CA6A8AAAC"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

phaddd mm0, [rdx + 8 * 1]
phaddd mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x7FFF7FFF7FFF7FFF",
    "MM1": "0x7FFF7FFF7FFF7FFF",
    "MM2": "0x7FFF7FFF7FFF7FFF",
    "MM3": "0x8000800080008000",
    "MM4": "0x800080007FFF7FFF",
    "MM5": "0x7FFF7FFF80008000",
    "MM6": "0x71836D874331472D"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 2], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 3], rax

mov rax, 0x2119221823172416
mov [rdx + 8 * 4], rax
mov rax, 0x3941384237433644
mov [rdx + 8 * 5], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]
movq mm4, [rdx + 8 * 2]
movq mm5, [rdx + 8 * 3]
movq mm6, [rdx + 8 * 4]

phaddsw mm0, [rdx + 8 * 1]
phaddsw mm1, [rdx + 8 * 0]

phaddsw mm2, [rdx + 8 * 2]
phaddsw mm3, [rdx + 8 * 3]

phaddsw mm4, [rdx + 8 * 3]
phaddsw mm5, [rdx + 8 * 2]

phaddsw mm6, [rdx + 8 * 5]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0xFE02FE02FE02FE02",
    "MM2": "0x7E027E027E027E02",
    "MM3": "0x7FFF7FFF7FFF7FFF",
    "MM4": "0x0D130E5F0EB90F99",
    "MM5": "0x2D132E5F2FB93099",
    "MM6": "0x483B48E649914A3C",
    "MM7": "0x283B28E629912A3C"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0
mov [rdx + 8 * 0], rax

mov rax, -1
mov [rdx + 8 * 1], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 2], rax

mov rax, 0x8141824383448445
mov [rdx + 8 * 6], rax
mov rax, 0x21F223F323F424F5
mov [rdx + 8 * 7], rax

mov rax, 0xE251E352E453E554
mov [rdx + 8 * 8], rax
mov rax, 0x71A972A873A774A6
mov [rdx + 8 * 9], rax

; Zero
movq mm0, [rdx + 8 * 0]
pmaddubsw mm0, [rdx + 8 * 0]

; -1
movq mm1, [rdx + 8 * 1]
pmaddubsw mm1, [rdx + 8 * 1]

; 127
movq mm2, [rdx + 8 * 2]
pmaddubsw mm2, [rdx + 8 * 2]

; 255 and 127
movq mm3, [rdx + 8 * 1]
pmaddubsw mm3, [rdx + 8 * 2]

; Mixture
movq mm4, [rdx + 8 * 6]
pmaddubsw mm4, [rdx + 8 * 7]

movq mm5, [rdx + 8 * 7]
pmaddubsw mm5, [rdx + 8 * 6]

movq mm6, [rdx + 8 * 8]
pmaddubsw mm6, [rdx + 8 * 9]

movq mm7, [rdx + 8 * 9]
pmaddubsw mm7, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0202020202020202",
    "MM1": "0x0202020202020202"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

phsubw mm0, [rdx + 8 * 1]
phsubw mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0404040404040404",
    "MM1": "0x0404040404040404"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

phsubd mm0, [rdx + 8 * 1]
phsubd mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0202020202020202",
    "MM1": "0x0202020202020202",
    "MM2": "0x0",
    "MM3": "0x0",
    "MM4": "0x0",
    "MM5": "0x0",
    "MM6": "0xFF01FF0100FF00FF"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7F7F7F7F7F7F7F7F
mov [rdx + 8 * 2], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 3], rax

mov rax, 0x2119221823172416
mov [rdx + 8 * 4], rax
mov rax, 0x3941384237433644
mov [rdx + 8 * 5], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]
movq mm4, [rdx + 8 * 2]
movq mm5, [rdx + 8 * 3]
movq mm6, [rdx + 8 * 4]

phsubsw mm0, [rdx + 8 * 1]
phsubsw mm1, [rdx + 8 * 0]

phsubsw mm2, [rdx + 8 * 2]
phsubsw mm3, [rdx + 8 * 3]

phsubsw mm4, [rdx + 8 * 3]
phsubsw mm5, [rdx + 8 * 2]

phsubsw mm6, [rdx + 8 * 5]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_08.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM4": "0x0",
    "MM5": "0xFEFEFEFEFEFEFEFE",
    "MM6": "0x0202020202020202",
    "MM7": "0xFE000200FE02FE00"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0x0202020202020202
mov [rdx + 8 * 3], rax
mov rax, 0xFF000100FF01FF00
mov [rdx + 8 * 4], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 4]

movq mm4, [rdx + 8 * 3]
movq mm5, [rdx + 8 * 3]
movq mm6, [rdx + 8 * 3]
movq mm7, [rdx + 8 * 3]

; Test with full zero
psignb mm4, mm0

; Test with full negative
psignb mm5, mm1

; Test with full positive
psignb mm6, mm2

; Test a mix
psignb mm7, mm3

hlt


================================================
FILE: unittests/ASM/H0F38/XX_09.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM4": "0x0",
    "MM5": "0xFBFEFBFEFBFEFBFE",
    "MM6": "0x0402040204020402",
    "MM7": "0xFBFE04020000FBFE"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0001000100010001
mov [rdx + 8 * 2], rax
mov rax, 0x0402040204020402
mov [rdx + 8 * 3], rax
mov rax, 0xFFFF00010000FFFF
mov [rdx + 8 * 4], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 4]

movq mm4, [rdx + 8 * 3]
movq mm5, [rdx + 8 * 3]
movq mm6, [rdx + 8 * 3]
movq mm7, [rdx + 8 * 3]

; Test with full zero
psignw mm4, mm0

; Test with full negative
psignw mm5, mm1

; Test with full positive
psignw mm6, mm2

; Test a mix
psignw mm7, mm3

hlt


================================================
FILE: unittests/ASM/H0F38/XX_0A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM4": "0x0",
    "MM5": "0xFAFBFCFEFDFCFBFB",
    "MM6": "0x0504030202030405",
    "MM7": "0xFAFBFCFE00000000"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0000000100000001
mov [rdx + 8 * 2], rax
mov rax, 0x0504030202030405
mov [rdx + 8 * 3], rax
mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 4], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 4]

movq mm4, [rdx + 8 * 3]
movq mm5, [rdx + 8 * 3]
movq mm6, [rdx + 8 * 3]
movq mm7, [rdx + 8 * 3]

; Test with full zero
psignd mm4, mm0

; Test with full negative
psignd mm5, mm1

; Test with full positive
psignd mm6, mm2

; Test a mix
psignd mm7, mm3

hlt


================================================
FILE: unittests/ASM/H0F38/XX_0B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00830087008B008F",
    "MM1": "0x0100FF0000FF0100"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x8001800280038004
mov [rdx + 8 * 1], rax
mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0xFF000100FF01FF00
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

pmulhrsw mm0, [rdx + 8 * 2]

pmulhrsw mm1, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_1C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0101010101010101",
    "MM2": "0x0101010101010101",
    "MM3": "0x0100010001010100"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0xFF000100FF01FF00
mov [rdx + 8 * 3], rax

; Test with full zero
pabsb mm0, [rdx + 8 * 0]

; Test with full negative
pabsb mm1, [rdx + 8 * 1]

; Test with full positive
pabsb mm2, [rdx + 8 * 2]

; Test a mix
pabsb mm3, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_1D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0001000100010001",
    "MM2": "0x0001000100010001",
    "MM3": "0x0001000100000001"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0001000100010001
mov [rdx + 8 * 2], rax
mov rax, 0xFFFF00010000FFFF
mov [rdx + 8 * 3], rax

; Test with full zero
pabsw mm0, [rdx + 8 * 0]

; Test with full negative
pabsw mm1, [rdx + 8 * 1]

; Test with full positive
pabsw mm2, [rdx + 8 * 2]

; Test a mix
pabsw mm3, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/H0F38/XX_1E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0000000100000001",
    "MM2": "0x0000000100000001",
    "MM3": "0x0000000100000000"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 1], rax
mov rax, 0x0000000100000001
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 3], rax

; Test with full zero
pabsd mm0, [rdx + 8 * 0]

; Test with full negative
pabsd mm1, [rdx + 8 * 1]

; Test with full positive
pabsd mm2, [rdx + 8 * 2]

; Test a mix
pabsd mm3, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/H0F38/adcx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "2",
      "RBX": "3",
      "RCX": "1",
      "RDX": "2",
      "RSI": "1",
      "RDI": "3"
  },
  "HostFeatures": ["ADX"]
}
%endif

; Test with no carry
mov rax, 1
clc
adcx rax, rax

; Test with carry
mov rcx, 1
mov rbx, 1
stc
adcx rbx, rcx

; 32-bit registers

; Test with no carry
mov edx, 1
clc
adcx edx, edx

; Test with carry
mov esi, 1
mov edi, 1
stc
adcx edi, esi

hlt


================================================
FILE: unittests/ASM/H0F38/adox.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0",
      "RBX": "1",
      "RCX": "0xFFFFFFFFFFFFFFFF",
      "RDX": "0",
      "RSI": "0xFFFFFFFF",
      "RDI": "1"
  },
  "HostFeatures": ["ADX"]
}
%endif

; Test with no overflow
mov rax, -1
mov rbx, 1
adox rax, rbx

; Test with overflow (flag set from previous adox)
mov rbx, 1
mov rcx, -1
adox rbx, rcx

; Clear OF for 32-bit tests.
test al, al

; 32-bit registers

; Test with no overflow
mov edx, -1
mov esi, 1
adox edx, esi

; Test with overflow (flag set from previous adox)
mov edi, 1
mov esi, -1
adox edi, esi

hlt


================================================
FILE: unittests/ASM/H0F38/sha1msg1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x5790A6E435CD1A3E", "0x3CEC3979BF41FAEF"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

sha1msg1 xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F38/sha1msg2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x1B0233CC7FCDBB45", "0x1ECD2142EC058BF8"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

sha1msg2 xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F38/sha1nexte.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x6868C3F3AAED56E0", "0xD7DD078194E6E6DE"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

; XMM1 is expected to contain the same value as XMM2
; but with the top 32-bit word set to an equivalent
; of XMM2[127:96] + (XMM1[127:96] ROL 30)

sha1nexte xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F38/sha256msg1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x43DEA25DAB8EF585", "0x1D30D1491042EED2"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

sha256msg1 xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F38/sha256msg2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x915D686150BD9E36", "0xB499245E9B33D33D"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

sha256msg2 xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F38/sha256rnds2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x97D4574EE323773D", "0xA934C32F562D8E88"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

movaps xmm0, [rdx + 16 * 2]
movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]

sha256rnds2 xmm1, xmm2

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd


================================================
FILE: unittests/ASM/H0F3A/0_66_0F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7861626364656667", "0x4871727374757677"],
    "XMM2": ["0x7861626364656667", "0x4871727374757677"],
    "XMM3": ["0x5354555657584142", "0x0000000000005152"],
    "XMM4": ["0x0", "0x0"],
    "XMM5": ["0x6162636465666768", "0x7172737475767778"]
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]

palignr xmm0, xmm1, 1

movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

db 0x48 ; Glues Rex.W to the start of the instruction
palignr xmm2, xmm3, 1

movapd xmm3, [rdx]
movapd xmm4, [rdx + 16]

palignr xmm3, xmm4, 22

movapd xmm4, [rdx]
movapd xmm5, [rdx + 16]

palignr xmm4, xmm5, 32

movapd xmm5, [rdx]
movapd xmm6, [rdx + 16]

palignr xmm5, xmm6, 0

hlt


================================================
FILE: unittests/ASM/H0F3A/0_66_21.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434465666768", "0x5152535455565758"],
    "XMM1":  ["0x4142434461626364", "0x5152535455565758"],
    "XMM2":  ["0x7576777845464748", "0x5152535455565758"],
    "XMM3":  ["0x4142434445464748", "0x5152535471727374"],
    "XMM4":  ["0x4142434445464748", "0x7576777855565758"],
    "XMM5":  ["0x4142434445464748", "0x5152535475767778"],
    "XMM6":  ["0x7576777845464748", "0x5152535455565758"],
    "XMM7":  ["0x4142434475767778", "0x5152535455565758"],
    "XMM8":  ["0x0000000065666768", "0x5152535455565758"],
    "XMM9":  ["0x0000000061626364", "0x5152535455565758"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx]
movapd xmm2, [rdx]
movapd xmm3, [rdx]
movapd xmm4, [rdx]
movapd xmm5, [rdx]
movapd xmm6, [rdx]
movapd xmm7, [rdx]
movapd xmm8, [rdx]
movapd xmm9, [rdx]
movapd xmm10, [rdx]
movapd xmm15, [rdx + 8 * 2]

; Simple move Reg<-Reg
insertps xmm0, xmm15, ((0b00 << 6) | (0b00 << 4) | (0b0000))
insertps xmm1, xmm15, ((0b01 << 6) | (0b00 << 4) | (0b0000))
insertps xmm2, xmm15, ((0b10 << 6) | (0b01 << 4) | (0b0000))
insertps xmm3, xmm15, ((0b11 << 6) | (0b10 << 4) | (0b0000))

; Simple move Reg<-Mem
insertps xmm4, [rdx + 8 * 3], ((0b00 << 6) | (0b11 << 4) | (0b0000))
insertps xmm5, [rdx + 8 * 3], ((0b01 << 6) | (0b10 << 4) | (0b0000))
insertps xmm6, [rdx + 8 * 3], ((0b10 << 6) | (0b01 << 4) | (0b0000))
insertps xmm7, [rdx + 8 * 3], ((0b11 << 6) | (0b00 << 4) | (0b0000))

; Simple move Reg<-Reg with mask
insertps xmm8, xmm15, ((0b00 << 6) | (0b00 << 4) | (0b0010))
insertps xmm9, xmm15, ((0b01 << 6) | (0b00 << 4) | (0b0010))

; Full ZMask
insertps xmm10, xmm15, ((0b00 << 6) | (0b00 << 4) | (0b1111))

hlt


================================================
FILE: unittests/ASM/H0F3A/0_66_DF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6363636363636363", "0x6363636363636363"],
    "XMM1": ["0x1616161616161616", "0x1616161616161616"],
    "XMM2": ["0x7c6363636363637c", "0x7c6363636363637c"],
    "XMM3": ["0x1616161616161616", "0x7c6363636363637c"],
    "XMM4": ["0x6363636263636363", "0x6363636263636363"],
    "XMM5": ["0x1616161416161616", "0x1616161416161616"],
    "XMM6": ["0x7c6363606363637c", "0x7c6363606363637c"],
    "XMM7": ["0x1616161216161616", "0x7c6363676363637c"],
    "XMM8": ["0x6363636663636363", "0x6363636663636363"],
    "XMM9": ["0x1616161016161616", "0x1616161016161616"],
    "XMM10": ["0x7c6363646363637c", "0x7c6363646363637c"],
    "XMM11": ["0x1616161e16161616", "0x7c63636b6363637c"],
    "XMM12": ["0x6363636a63636363", "0x6363636a63636363"],
    "XMM13": ["0x1616161c16161616", "0x1616161c16161616"],
    "XMM14": ["0x7c6363686363637c", "0x7c6363686363637c"],
    "XMM15": ["0x1616161a16161616", "0x7c63636f6363637c"]
  },
  "HostFeatures": ["AES"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x0000000100000001
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 6], rax
mov rax, 0x00000001FFFFFFFF
mov [rdx + 8 * 7], rax

mov rax, 0x0202020202020202
mov [rdx + 8 * 8], rax
mov rax, 0x0303030303030303
mov [rdx + 8 * 9], rax

aeskeygenassist xmm0, [rdx + 8 * 0], 0
aeskeygenassist xmm1, [rdx + 8 * 2], 0
aeskeygenassist xmm2, [rdx + 8 * 4], 0
aeskeygenassist xmm3, [rdx + 8 * 6], 0

aeskeygenassist xmm4, [rdx + 8 * 0], 1
aeskeygenassist xmm5, [rdx + 8 * 2], 2
aeskeygenassist xmm6, [rdx + 8 * 4], 3
aeskeygenassist xmm7, [rdx + 8 * 6], 4

aeskeygenassist xmm8, [rdx + 8 * 0], 5
aeskeygenassist xmm9, [rdx + 8 * 2], 6
aeskeygenassist xmm10, [rdx + 8 * 4], 7
aeskeygenassist xmm11, [rdx + 8 * 6], 8

aeskeygenassist xmm12, [rdx + 8 * 0], 9
aeskeygenassist xmm13, [rdx + 8 * 2], 10
aeskeygenassist xmm14, [rdx + 8 * 4], 11
aeskeygenassist xmm15, [rdx + 8 * 6], 12

hlt


================================================
FILE: unittests/ASM/H0F3A/0_XX_0F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4851525354555657",
    "MM2": "0x0061626364656667",
    "MM3": "0x0",
    "MM4": "0x5152535455565758"
  },
  "HostFeatures": ["SSSE3"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
movq mm1, [rdx + 8 * 1]

palignr mm0, mm1, 1

movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]

palignr mm2, mm3, 9

movq mm3, [rdx + 8 * 2]
movq mm4, [rdx + 8 * 3]

palignr mm3, mm4, 16

movq mm4, [rdx + 8]
movq mm5, [rdx + 8 * 1]

palignr mm4, mm5, 0

hlt


================================================
FILE: unittests/ASM/H0F3A/66_08.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x8000000000000000", "0xc000000040000000"],
    "XMM1": ["0xbf80000000000000", "0xc00000003f800000"],
    "XMM2": ["0x800000003f800000", "0xbf80000040000000"],
    "XMM3": ["0x8000000000000000", "0xbf8000003f800000"],
    "XMM4": ["0x8000000000000000", "0xc000000040000000"],
    "XMM5": ["0xbf80000000000000", "0xc00000003f800000"],
    "XMM6": ["0x800000003f800000", "0xbf80000040000000"],
    "XMM7": ["0x8000000000000000", "0xbf8000003f800000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

roundps xmm0, [rdx + 8 * 0], 00000000b ; Nearest
roundps xmm1, [rdx + 8 * 0], 00000001b ; -inf
roundps xmm2, [rdx + 8 * 0], 00000010b ; +inf
roundps xmm3, [rdx + 8 * 0], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundps xmm4, [rdx + 8 * 0], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundps xmm5, [rdx + 8 * 0], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundps xmm6, [rdx + 8 * 0], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundps xmm7, [rdx + 8 * 0], 00000100b

hlt

align 4096
.data:
dd 0.5, -0.5, 1.5, -1.5
dq 0, 0


================================================
FILE: unittests/ASM/H0F3A/66_09.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x8000000000000000"],
    "XMM1": ["0x0000000000000000", "0xbff0000000000000"],
    "XMM2": ["0x3ff0000000000000", "0x8000000000000000"],
    "XMM3": ["0x0000000000000000", "0x8000000000000000"],
    "XMM4": ["0x0000000000000000", "0x8000000000000000"],
    "XMM5": ["0x0000000000000000", "0xbff0000000000000"],
    "XMM6": ["0x3ff0000000000000", "0x8000000000000000"],
    "XMM7": ["0x0000000000000000", "0x8000000000000000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

roundpd xmm0, [rdx + 8 * 0], 00000000b ; Nearest
roundpd xmm1, [rdx + 8 * 0], 00000001b ; -inf
roundpd xmm2, [rdx + 8 * 0], 00000010b ; +inf
roundpd xmm3, [rdx + 8 * 0], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundpd xmm4, [rdx + 8 * 0], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundpd xmm5, [rdx + 8 * 0], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundpd xmm6, [rdx + 8 * 0], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundpd xmm7, [rdx + 8 * 0], 00000100b

hlt

align 4096
.data:
dq 0.5, -0.5
dq 0, 0


================================================
FILE: unittests/ASM/H0F3A/66_0A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xbf00000000000000", "0xbfc000003fc00000"],
    "XMM1": ["0xbf00000000000000", "0xbfc000003fc00000"],
    "XMM2": ["0xbf0000003f800000", "0xbfc000003fc00000"],
    "XMM3": ["0xbf00000000000000", "0xbfc000003fc00000"],
    "XMM4": ["0xbf00000000000000", "0xbfc000003fc00000"],
    "XMM5": ["0xbf00000000000000", "0xbfc000003fc00000"],
    "XMM6": ["0xbf0000003f800000", "0xbfc000003fc00000"],
    "XMM7": ["0xbf00000000000000", "0xbfc000003fc00000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 0]
movaps xmm3, [rdx + 8 * 0]
movaps xmm4, [rdx + 8 * 0]
movaps xmm5, [rdx + 8 * 0]
movaps xmm6, [rdx + 8 * 0]
movaps xmm7, [rdx + 8 * 0]

roundss xmm0, [rdx + 8 * 0], 00000000b ; Nearest
roundss xmm1, [rdx + 8 * 0], 00000001b ; -inf
roundss xmm2, [rdx + 8 * 0], 00000010b ; +inf
roundss xmm3, [rdx + 8 * 0], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundss xmm4, [rdx + 8 * 0], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundss xmm5, [rdx + 8 * 0], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundss xmm6, [rdx + 8 * 0], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundss xmm7, [rdx + 8 * 0], 00000100b

hlt

align 4096
.data:
dd 0.5, -0.5, 1.5, -1.5
dq 0, 0


================================================
FILE: unittests/ASM/H0F3A/66_0B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0xbfe0000000000000"],
    "XMM1": ["0x0000000000000000", "0xbfe0000000000000"],
    "XMM2": ["0x3ff0000000000000", "0xbfe0000000000000"],
    "XMM3": ["0x0000000000000000", "0xbfe0000000000000"],
    "XMM4": ["0x0000000000000000", "0xbfe0000000000000"],
    "XMM5": ["0x0000000000000000", "0xbfe0000000000000"],
    "XMM6": ["0x3ff0000000000000", "0xbfe0000000000000"],
    "XMM7": ["0x0000000000000000", "0xbfe0000000000000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 0]
movaps xmm3, [rdx + 8 * 0]
movaps xmm4, [rdx + 8 * 0]
movaps xmm5, [rdx + 8 * 0]
movaps xmm6, [rdx + 8 * 0]
movaps xmm7, [rdx + 8 * 0]

roundsd xmm0, [rdx + 8 * 0], 00000000b ; Nearest
roundsd xmm1, [rdx + 8 * 0], 00000001b ; -inf
roundsd xmm2, [rdx + 8 * 0], 00000010b ; +inf
roundsd xmm3, [rdx + 8 * 0], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundsd xmm4, [rdx + 8 * 0], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundsd xmm5, [rdx + 8 * 0], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundsd xmm6, [rdx + 8 * 0], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rdx + 8 * 2], eax
ldmxcsr [rdx + 8 * 2]

roundsd xmm7, [rdx + 8 * 0], 00000100b

hlt

align 4096
.data:
dq 0.5, -0.5
dq 0, 0


================================================
FILE: unittests/ASM/H0F3A/66_0C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4054c664c2f837b5", "0x40516053e2d6238e"],
    "XMM1":  ["0x4044836d7aa25d8d", "0x402a1e1c58255b03"],
    "XMM2":  ["0x4047ec6bc9d9d346", "0x4035fe425aee6320"],
    "XMM3":  ["0x4047ec6b7aa25d8d", "0x40154b7d41743e96"],
    "XMM4":  ["0x403d075a31a4bdba", "0x4050a01882d38477"],
    "XMM5":  ["0x40334ec17aa25d8d", "0x4056d74082d38477"],
    "XMM6":  ["0x4047ec6bc7cd898b", "0x40497b1382d38477"],
    "XMM7":  ["0x4047ec6b7aa25d8d", "0x4037f9ca82d38477"],
    "XMM8":  ["0x4056a929888f861a", "0x4055031766e43aa8"],
    "XMM9":  ["0x4058bc1f7aa25d8d", "0x40550317c91d14e4"],
    "XMM10": ["0x4047ec6ba10e0221", "0x4055031700bcbe62"],
    "XMM11": ["0x4047ec6b7aa25d8d", "0x405503170ed3d85a"],
    "XMM12": ["0x40419d2253111f0c", "0x4055031782d38477"],
    "XMM13": ["0x40177e287aa25d8d", "0x4055031782d38477"],
    "XMM14": ["0x4047ec6b9f16b11c", "0x4055031782d38477"],
    "XMM15": ["0x4047ec6b7aa25d8d", "0x4055031782d38477"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm0,  [rdx + 16 * 0]
movaps xmm1,  [rdx + 16 * 1]
movaps xmm2,  [rdx + 16 * 2]
movaps xmm3,  [rdx + 16 * 3]
movaps xmm4,  [rdx + 16 * 4]
movaps xmm5,  [rdx + 16 * 5]
movaps xmm6,  [rdx + 16 * 6]
movaps xmm7,  [rdx + 16 * 7]
movaps xmm8,  [rdx + 16 * 8]
movaps xmm9,  [rdx + 16 * 9]
movaps xmm10, [rdx + 16 * 10]
movaps xmm11, [rdx + 16 * 11]
movaps xmm12, [rdx + 16 * 12]
movaps xmm13, [rdx + 16 * 13]
movaps xmm14, [rdx + 16 * 14]
movaps xmm15, [rdx + 16 * 15]

blendps xmm0,  [rdx + 16 * 16], 0000b
blendps xmm1,  [rdx + 16 * 16], 0001b
blendps xmm2,  [rdx + 16 * 16], 0010b
blendps xmm3,  [rdx + 16 * 16], 0011b
blendps xmm4,  [rdx + 16 * 16], 0100b
blendps xmm5,  [rdx + 16 * 16], 0101b
blendps xmm6,  [rdx + 16 * 16], 0110b
blendps xmm7,  [rdx + 16 * 16], 0111b
blendps xmm8,  [rdx + 16 * 16], 1000b
blendps xmm9,  [rdx + 16 * 16], 1001b
blendps xmm10, [rdx + 16 * 16], 1010b
blendps xmm11, [rdx + 16 * 16], 1011b
blendps xmm12, [rdx + 16 * 16], 1100b
blendps xmm13, [rdx + 16 * 16], 1101b
blendps xmm14, [rdx + 16 * 16], 1110b
blendps xmm15, [rdx + 16 * 16], 1111b

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/H0F3A/66_0D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x58a9fc7b38c17718", "0x67d29af330ae762c"],
    "XMM3": ["0xb615b9533de8ad09", "0xca79a24c3e9e3978"],
    "XMM4": ["0x57fa6daf9af2e91c", "0x65f580205ffdc710"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]

blendpd xmm1, [rdx + 16 * 8],  00b
blendpd xmm2, [rdx + 16 * 9],  01b
blendpd xmm3, [rdx + 16 * 10], 10b
blendpd xmm4, [rdx + 16 * 11], 11b

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_0E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000c3768da8",
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x30b556de1f6d7718", "0x67d29af330ae762c"],
    "XMM3": ["0xb615b953255b4cf4", "0xb76472a37404b890"],
    "XMM4": ["0x24426daf9af2e91c", "0x8a6789f2d415a567"],
    "XMM5": ["0x4f1694dfa8fb773c", "0x19a26b823d3ca2a9"],
    "XMM6": ["0x2ef9bb9202e0077f", "0xc97d9d031ed23dfa"],
    "XMM7": ["0x944c0a76f8f69004", "0xb29bfeda8b8db7bc"],
    "XMM8": ["0x10c41fa17837c17f", "0x099224327e5e296c"],
    "XMM9": ["0x48510f254d2fa47f", "0x2b5774313a974886"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

pblendw xmm1, [rdx + 16 * 8],  00000000b
pblendw xmm2, [rdx + 16 * 9],  00000001b
pblendw xmm3, [rdx + 16 * 10], 00000011b
pblendw xmm4, [rdx + 16 * 11], 00000111b
pblendw xmm5, [rdx + 16 * 12], 00001111b
pblendw xmm6, [rdx + 16 * 13], 00011111b
pblendw xmm7, [rdx + 16 * 14], 00111111b
pblendw xmm8, [rdx + 16 * 15], 11111111b

; We can't test all 256 swizzles so loop and crc the results.
; Just loops over the 256-bytes of data, swizzling across all values for the swizzle.
mov rax, 0
%assign swizzle 0
%rep 256

movaps xmm9, [rdx + ((16 * swizzle) % 256)]
pblendw xmm9, [rdx + ((16 * swizzle + 16) % 256)], swizzle
movaps [rel .data_temp], xmm9
crc32 rax, qword [rel .data_temp]
crc32 rax, qword [rel .data_temp + 8]

%assign swizzle swizzle+1
%endrep

hlt

align 4096
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09

.data_temp:
dq 0, 0


================================================
FILE: unittests/ASM/H0F3A/66_14.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000000007f",
    "RBX": "0x0000000000000067",
    "RCX": "0x0000888658818ae8"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

pextrb rax, xmm1, 0
pextrb rbx, xmm2, 0xFF
pextrb [rsi + 8 * 0 + 0], xmm3, 2
pextrb [rsi + 8 * 0 + 1], xmm4, 0xFF
pextrb [rsi + 8 * 0 + 2], xmm5, 4
pextrb [rsi + 8 * 0 + 3], xmm6, 5
pextrb [rsi + 8 * 0 + 4], xmm7, 6
pextrb [rsi + 8 * 0 + 5], xmm8, 7
mov rcx, [rsi + 8 * 0]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_14_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSP": "0x48",
    "RBP": "0x47",
    "RSI": "0x46",
    "RDI": "0x45"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel data]

movaps xmm0, [rel data]

; Special testing for storing in to registers rsp, rbp, rsi, rdi
; These registers are in the 'high' modrm.reg encoding which can
; mean ah/ch/dh/bh or rsp/rbp/rsi/rdi depending on instruction

mov rsp, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1

pextrb rsp, xmm0, 0
pextrb rbp, xmm0, 1
pextrb rsi, xmm0, 2
pextrb rdi, xmm0, 3

hlt

align 16
data:
dq 0x4142434445464748


================================================
FILE: unittests/ASM/H0F3A/66_15.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000000a47f",
    "RBX": "0x00000000000067d2",
    "RCX": "0x1ed2a2a98a67b953"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

; Nasm only encodes pextrw instructions that have register as their destination with sse2
; hand code these following instructions
; pextrw eax, xmm1, 0
; pextrw ebx, xmm2, 0xFF
db 0x66, 0x0f, 0x3a, 0x15, 0xc8, 0x00
db 0x66, 0x0f, 0x3a, 0x15, 0xd3, 0xFF

pextrw [rsi + 8 * 0 + 0], xmm3, 2
pextrw [rsi + 8 * 0 + 2], xmm4, 0xFF
pextrw [rsi + 8 * 0 + 4], xmm5, 4
pextrw [rsi + 8 * 0 + 6], xmm6, 5
mov rcx, [rsi + 8 * 0]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000004d2fa47f",
    "RBX": "0x0000000067d29af3",
    "RCX": "0x8a6789f27404b890",
    "RDX": "0x00f658ab78236612"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

pextrd eax, xmm1, 0
pextrd ebx, xmm2, 0xFF
pextrd [rsi + 8 * 0 + 0], xmm3, 2
pextrd [rsi + 8 * 0 + 4], xmm4, 0xFF
pextrd [rsi + 8 * 1 + 0], xmm5, 4
pextrd [rsi + 8 * 1 + 4], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_16_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x48510f254d2fa47f",
    "RBX": "0x67d29af330ae762c",
    "RCX": "0xb615b9533de8ad09",
    "RDX": "0x8a6789f2d415a567",
    "RDI": "0x8996f88178236612",
    "RSP": "0xc97d9d031ed21972"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

pextrq rax, xmm1, 0
pextrq rbx, xmm2, 0xFF
pextrq [rsi + 8 * 0], xmm3, 2
pextrq [rsi + 8 * 1], xmm4, 0xFF
pextrq [rsi + 8 * 2], xmm5, 4
pextrq [rsi + 8 * 3], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]
mov rdi, [rsi + 8 * 2]
mov rsp, [rsi + 8 * 3]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_17.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000004d2fa47f",
    "RBX": "0x0000000067d29af3",
    "RCX": "0x8a6789f27404b890",
    "RDX": "0x00f658ab78236612"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

extractps eax, xmm1, 0
extractps ebx, xmm2, 0xFF
extractps [rsi + 8 * 0 + 0], xmm3, 2
extractps [rsi + 8 * 0 + 4], xmm4, 0xFF
extractps [rsi + 8 * 1 + 0], xmm5, 4
extractps [rsi + 8 * 1 + 4], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_20.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x006b6b6b6b6b6b6b", "0x6b00000000000000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

mov rax, 0
mov [rsi + 8 * 0], rax
mov [rsi + 8 * 1], rax

movaps xmm1, [rsi + 16 * 0]
movaps xmm2, [rsi + 16 * 0]
movaps xmm3, [rsi + 16 * 0]
movaps xmm4, [rsi + 16 * 0]
movaps xmm5, [rsi + 16 * 0]
movaps xmm6, [rsi + 16 * 0]
movaps xmm7, [rsi + 16 * 0]
movaps xmm8, [rsi + 16 * 0]

pinsrb xmm1, [rdx + 8 * 0 + 0], 0x00
pinsrb xmm1, [rdx + 8 * 0 + 1], 0x01
pinsrb xmm1, [rdx + 8 * 0 + 2], 0x02
pinsrb xmm1, [rdx + 8 * 0 + 3], 0x03
pinsrb xmm1, [rdx + 8 * 0 + 4], 0x04
pinsrb xmm1, [rdx + 8 * 0 + 5], 0x05
pinsrb xmm1, [rdx + 8 * 0 + 6], 0x06
pinsrb xmm1, [rdx + 8 * 0 + 7], 0x07
pinsrb xmm1, [rdx + 8 * 1 + 0], 0x08
pinsrb xmm1, [rdx + 8 * 1 + 1], 0x09
pinsrb xmm1, [rdx + 8 * 1 + 2], 0x0A
pinsrb xmm1, [rdx + 8 * 1 + 3], 0x0B
pinsrb xmm1, [rdx + 8 * 1 + 4], 0x0C
pinsrb xmm1, [rdx + 8 * 1 + 5], 0x0D
pinsrb xmm1, [rdx + 8 * 1 + 6], 0x0E
pinsrb xmm1, [rdx + 8 * 1 + 7], 0x0F
pinsrb xmm2, [rdx + 8 * 2 + 0], 0xFF
mov rax, [rdx + 8 * 2 + 0]

pinsrb xmm2, eax, 0x00
pinsrb xmm2, eax, 0x01
pinsrb xmm2, eax, 0x02
pinsrb xmm2, eax, 0x03
pinsrb xmm2, eax, 0x04
pinsrb xmm2, eax, 0x05
pinsrb xmm2, eax, 0x06

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_20_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFF42FF", "0xFFFFFFFFFFFFFFFF"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rsi, 0xe0000000

mov rax, -1
mov [rsi + 8 * 0], rax
mov [rsi + 8 * 1], rax

movaps xmm0, [rsi + 16 * 0]

mov rcx, 0
mov edi, 0x42
; This tests a frontend decoder bug in FEX
; FEX thought this was ch
pinsrb xmm0, edi, 0x01

hlt


================================================
FILE: unittests/ASM/H0F3A/66_22.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774314d2fa47f"],
    "XMM2": ["0x0000000000000000", "0x1f6de86b00000000"],
    "XMM3": ["0x1f6de86b1f6de86b", "0x1f6de86b1f6de86b"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

mov rax, 0
mov [rsi + 8 * 0], rax
mov [rsi + 8 * 1], rax

movaps xmm1, [rsi + 16 * 0]
movaps xmm2, [rsi + 16 * 0]
movaps xmm3, [rsi + 16 * 0]
movaps xmm4, [rsi + 16 * 0]
movaps xmm5, [rsi + 16 * 0]
movaps xmm6, [rsi + 16 * 0]
movaps xmm7, [rsi + 16 * 0]
movaps xmm8, [rsi + 16 * 0]

pinsrd xmm1, [rdx + 8 * 0 + 0], 0x00
pinsrd xmm1, [rdx + 8 * 0 + 4], 0x01
pinsrd xmm1, [rdx + 8 * 0 + 0], 0x02
pinsrd xmm1, [rdx + 8 * 1 + 4], 0x03
pinsrd xmm2, [rdx + 8 * 2 + 0], 0xFF
mov rax, [rdx + 8 * 2 + 0]

pinsrd xmm3, eax, 0x00
pinsrd xmm3, eax, 0x01
pinsrd xmm3, eax, 0x02
pinsrd xmm3, eax, 0x03

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_22_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x48510f254d2fa47f", "0x2b5774313a974886"],
    "XMM2": ["0x0000000000000000", "0xb615b9533de8ad09"],
    "XMM3": ["0x30b556de1f6de86b", "0x30b556de1f6de86b"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

mov rax, 0
mov [rsi + 8 * 0], rax
mov [rsi + 8 * 1], rax

movaps xmm1, [rsi + 16 * 0]
movaps xmm2, [rsi + 16 * 0]
movaps xmm3, [rsi + 16 * 0]
movaps xmm4, [rsi + 16 * 0]
movaps xmm5, [rsi + 16 * 0]
movaps xmm6, [rsi + 16 * 0]
movaps xmm7, [rsi + 16 * 0]
movaps xmm8, [rsi + 16 * 0]

pinsrq xmm1, [rdx + 8 * 0], 0x00
pinsrq xmm1, [rdx + 8 * 1], 0x01
pinsrq xmm2, [rdx + 8 * 4], 0xFF
mov rax, [rdx + 8 * 2 + 0]

pinsrq xmm3, rax, 0x00
pinsrq xmm3, rax, 0x01

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/H0F3A/66_22_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3f800000",
    "XMM0": ["0x4142434400000000", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

section .text
global _start

_start:
xor esi, esi

movapd xmm0, [rel arg1]

pextrd [rel val], xmm0, 0
pinsrd xmm0, esi, 0

mov eax, [rel val]
hlt

align 4096
val: dd 0

align 128
arg1:
dq 0x414243443f800000
dq 0x5152535455565758


================================================
FILE: unittests/ASM/H0F3A/66_40.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x492feb2e492feb2e", "0x492feb2e492feb2e"],
    "XMM2": ["0x499a5226499a5226", "0x499a5226499a5226"],
    "XMM3": ["0x494ecfa4494ecfa4", "0x494ecfa4494ecfa4"],
    "XMM4": ["0x495f7816495f7816", "0x495f7816495f7816"],
    "XMM5": ["0x496e3962496e3962", "0x496e3962496e3962"],
    "XMM6": ["0", "0"],
    "XMM7": ["0", "0"],
    "XMM8": ["0", "0"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

dpps xmm1, [rdx + 16 * 8],  11111111b
dpps xmm2, [rdx + 16 * 9],  11111111b
dpps xmm3, [rdx + 16 * 10], 11111111b
dpps xmm4, [rdx + 16 * 11], 11111111b
dpps xmm5, [rdx + 16 * 12], 11111111b
dpps xmm6, [rdx + 16 * 13], 00000000b
dpps xmm7, [rdx + 16 * 14], 11110000b
dpps xmm8, [rdx + 16 * 15], 00001111b

hlt

align 16
; 256bytes of random data
.data:
dd 655.9708,532.2244,108.0451,512.4019,754.227,586.0859,127.7574,114.8167,764.4266,226.6145,337.864,320.3296,296.5247,480.0057,28.4267,565.9418,265.8255,536.4473,754.3489,460.681,818.7269,43.7204,464.592,847.9381,306.0592,702.7584,887.6473,551.5908,620.9001,520.9829,232.9532,510.3388,204.8474,225.626,564.973,790.5175,836.1953,844.5266,633.5626,501.7409,393.2616,674.4415,244.3265,971.1598,770.8029,746.1836,255.9902,567.7578,187.7175,924.181,466.4362,169.8267,651.7481,462.4206,396.6924,355.8538,6.148,523.1443,989.7004,713.6646,497.5427,657.6965,651.0534,778.5236


================================================
FILE: unittests/ASM/H0F3A/66_40_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0", "0"],
    "XMM1": ["0x00000000c197f874", "0"],
    "XMM2": ["0xff80000000000000", "0x0000000000000000"],
    "XMM3": ["0x7a147e317a147e31", "0x0000000000000000"],
    "XMM4": ["0x0000000000000000", "0x000000006cd0f887"],
    "XMM5": ["0x000000007f800000", "0x000000007f800000"],
    "XMM6": ["0xff80000000000000", "0x00000000ff800000"],
    "XMM7": ["0xfc944256fc944256", "0x00000000fc944256"],
    "XMM8": ["0x0000000000000000", "0xc3ac072e00000000"],
    "XMM9": ["0x000000005c5c09a3", "0x5c5c09a300000000"],
    "XMM10": ["0xdc34227c00000000", "0xdc34227c00000000"],
    "XMM11": ["0xda1627d2da1627d2", "0xda1627d200000000"],
    "XMM12": ["0x0000000000000000", "0x7f8000007f800000"],
    "XMM13": ["0x000000005f30e9d3", "0x5f30e9d35f30e9d3"],
    "XMM14": ["0xda3f264a00000000", "0xda3f264ada3f264a"],
    "XMM15": ["0x7f8000007f800000", "0x7f8000007f800000"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

movaps xmm0,  [rel .data + 16 * 0]
movaps xmm1,  [rel .data + 16 * 1]
movaps xmm2,  [rel .data + 16 * 2]
movaps xmm3,  [rel .data + 16 * 3]
movaps xmm4,  [rel .data + 16 * 4]
movaps xmm5,  [rel .data + 16 * 5]
movaps xmm6,  [rel .data + 16 * 6]
movaps xmm7,  [rel .data + 16 * 7]
movaps xmm8,  [rel .data + 16 * 8]
movaps xmm9,  [rel .data + 16 * 9]
movaps xmm10, [rel .data + 16 * 10]
movaps xmm11, [rel .data + 16 * 11]
movaps xmm12, [rel .data + 16 * 12]
movaps xmm13, [rel .data + 16 * 13]
movaps xmm14, [rel .data + 16 * 14]
movaps xmm15, [rel .data + 16 * 15]

; Full source mask but different broadcast tests
dpps xmm0,  [rel .data + 16 * 16], 1111_0000b
dpps xmm1,  [rel .data + 16 * 16], 1111_0001b
dpps xmm2,  [rel .data + 16 * 16], 1111_0010b
dpps xmm3,  [rel .data + 16 * 16], 1111_0011b
dpps xmm4,  [rel .data + 16 * 16], 1111_0100b
dpps xmm5,  [rel .data + 16 * 16], 1111_0101b
dpps xmm6,  [rel .data + 16 * 16], 1111_0110b
dpps xmm7,  [rel .data + 16 * 16], 1111_0111b
dpps xmm8,  [rel .data + 16 * 16], 1111_1000b
dpps xmm9,  [rel .data + 16 * 16], 1111_1001b
dpps xmm10, [rel .data + 16 * 16], 1111_1010b
dpps xmm11, [rel .data + 16 * 16], 1111_1011b
dpps xmm12, [rel .data + 16 * 16], 1111_1100b
dpps xmm13, [rel .data + 16 * 16], 1111_1101b
dpps xmm14, [rel .data + 16 * 16], 1111_1110b
dpps xmm15, [rel .data + 16 * 16], 1111_1111b

hlt
align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/H0F3A/66_41.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x41278c496c911a6e", "0x41278c496c911a6e"],
    "XMM2": ["0x41235ccc64afb361", "0x41235ccc64afb361"],
    "XMM3": ["0x412bace273945dc5", "0x412bace273945dc5"],
    "XMM4": ["0x412cf22ef582fd76", "0x412cf22ef582fd76"],
    "XMM5": ["0x4121c80e40f3bc7b", "0x4121c80e40f3bc7b"],
    "XMM6": ["0", "0"],
    "XMM7": ["0", "0"],
    "XMM8": ["0", "0"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

dppd xmm1, [rdx + 16 * 8],  11111111b
dppd xmm2, [rdx + 16 * 9],  11111111b
dppd xmm3, [rdx + 16 * 10], 11111111b
dppd xmm4, [rdx + 16 * 11], 11111111b
dppd xmm5, [rdx + 16 * 12], 11111111b
dppd xmm6, [rdx + 16 * 13], 00000000b
dppd xmm7, [rdx + 16 * 14], 11110000b
dppd xmm8, [rdx + 16 * 15], 00001111b

hlt

align 16
; 256bytes of random data
.data:
dq 470.4127,683.87,711.3545,511.5631,996.8793,548.682,588.9345,832.5925,210.6613,792.6059,298.4494,154.4895,818.4,881.6027,705.3087,687.478,737.0665,621.31,755.3097,189.9614,552.4284,649.1206,798.252,574.5732,593.7565,577.3129,383.3844,443.3476,414.3571,615.1567,94.898,438.3107


================================================
FILE: unittests/ASM/H0F3A/66_41_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0", "0"],
    "XMM1": ["0x40a7e92935462e9e", "0"],
    "XMM2": ["0", "0x40a0712d6903205c"],
    "XMM3": ["0x408c728276ca7656", "0x408c728276ca7656"],
    "XMM4": ["0", "0"],
    "XMM5": ["0x40c0cd5f41a95ce2", "0"],
    "XMM6": ["0", "0x40b84aaf198a4022"],
    "XMM7": ["0x40abf229b504629d", "0x40abf229b504629d"],
    "XMM8": ["0", "0"],
    "XMM9": ["0x40c8384d475e602a", "0"],
    "XMM10": ["0", "0x40c8d105fa49a70e"],
    "XMM11": ["0x40c248e5ffd69239", "0x40c248e5ffd69239"],
    "XMM12": ["0", "0"],
    "XMM13": ["0x40beb622c0fe35c7", "0"],
    "XMM14": ["0", "0x40b74171bb41b9ba"],
    "XMM15": ["0x40ac8195a7735fbe", "0x40ac8195a7735fbe"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

movaps xmm0,  [rel .data + 16 * 0]
movaps xmm1,  [rel .data + 16 * 1]
movaps xmm2,  [rel .data + 16 * 2]
movaps xmm3,  [rel .data + 16 * 3]
movaps xmm4,  [rel .data + 16 * 4]
movaps xmm5,  [rel .data + 16 * 5]
movaps xmm6,  [rel .data + 16 * 6]
movaps xmm7,  [rel .data + 16 * 7]
movaps xmm8,  [rel .data + 16 * 8]
movaps xmm9,  [rel .data + 16 * 9]
movaps xmm10, [rel .data + 16 * 10]
movaps xmm11, [rel .data + 16 * 11]
movaps xmm12, [rel .data + 16 * 12]
movaps xmm13, [rel .data + 16 * 13]
movaps xmm14, [rel .data + 16 * 14]
movaps xmm15, [rel .data + 16 * 15]

; Full source mask but different broadcast tests
dppd xmm0,  [rel .data + 16 * 16], 1111_0000b
dppd xmm1,  [rel .data + 16 * 16], 1111_0001b
dppd xmm2,  [rel .data + 16 * 16], 1111_0010b
dppd xmm3,  [rel .data + 16 * 16], 1111_0011b
dppd xmm4,  [rel .data + 16 * 16], 1111_0100b
dppd xmm5,  [rel .data + 16 * 16], 1111_0101b
dppd xmm6,  [rel .data + 16 * 16], 1111_0110b
dppd xmm7,  [rel .data + 16 * 16], 1111_0111b
dppd xmm8,  [rel .data + 16 * 16], 1111_1000b
dppd xmm9,  [rel .data + 16 * 16], 1111_1001b
dppd xmm10, [rel .data + 16 * 16], 1111_1010b
dppd xmm11, [rel .data + 16 * 16], 1111_1011b
dppd xmm12, [rel .data + 16 * 16], 1111_1100b
dppd xmm13, [rel .data + 16 * 16], 1111_1101b
dppd xmm14, [rel .data + 16 * 16], 1111_1110b
dppd xmm15, [rel .data + 16 * 16], 1111_1111b

hlt
align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/H0F3A/66_42.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x01d700f201dd018b", "0x021b012d00ec015b"],
    "XMM2": ["0x021b01ea0147019c", "0x017900fb00d801d9"],
    "XMM3": ["0x010500e801000153", "0x011a015f01530171"],
    "XMM4": ["0x019c0124018f014d", "0x011f0100011e0116"],
    "XMM5": ["0x0136007e009d01e0", "0x02a802c80245019d"],
    "XMM6": ["0x009f0115017b0132", "0x013c01af01f90179"],
    "XMM7": ["0x0077012b011900e8", "0x00bc016e019e0146"],
    "XMM8": ["0x0100011c010300d5", "0x00f3014a016700cd"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

mpsadbw xmm1, [rdx + 16 * 8], 000b
mpsadbw xmm2, [rdx + 16 * 9], 001b
mpsadbw xmm3, [rdx + 16 * 10], 010b
mpsadbw xmm4, [rdx + 16 * 11], 011b
mpsadbw xmm5, [rdx + 16 * 12], 100b
mpsadbw xmm6, [rdx + 16 * 13], 101b
mpsadbw xmm7, [rdx + 16 * 14], 110b
mpsadbw xmm8, [rdx + 16 * 15], 111b

hlt

align 16
; 256bytes of random data
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0
db 0x6e, 0x35, 0xa8, 0x54, 0xd7, 0xab, 0x8b, 0x6c, 0x77, 0x5f, 0x92, 0xca, 0x25, 0xa6, 0x7e, 0x27
db 0xc7, 0xcd, 0x73, 0xec, 0x95, 0xd6, 0x6f, 0x6a, 0xbb, 0xae, 0xf2, 0xbb, 0x27, 0xb9, 0xa1, 0xdd
db 0x73, 0x4d, 0xd1, 0xc7, 0xd5, 0x2c, 0x31, 0x88, 0xfe, 0xe7, 0xdb, 0xfd, 0x1e, 0x1e, 0x09, 0x7f
db 0x14, 0xfa, 0x4e, 0x95, 0xef, 0xe6, 0x9a, 0xf2, 0xa0, 0x42, 0x62, 0x9a, 0xa4, 0xa8, 0x73, 0x82
db 0x0e, 0x0f, 0x16, 0x82, 0x38, 0x07, 0x12, 0x32, 0x07, 0x35, 0x92, 0xc1, 0x63, 0x07, 0x78, 0xb3
db 0xcb, 0x46, 0x19, 0x57, 0x2b, 0x37, 0x2a, 0x46, 0x1f, 0x04, 0x0e, 0x79, 0x3d, 0xcd, 0x8d, 0xa3
db 0x2b, 0xf3, 0x86, 0x2f, 0xab, 0xba, 0x57, 0x30, 0x2e, 0xd6, 0x2c, 0xf0, 0x46, 0x4f, 0x3f, 0xef
db 0xef, 0xd1, 0xbb, 0x85, 0x34, 0x4b, 0x3c, 0xde, 0x9e, 0x48, 0xa3, 0xb9, 0x8d, 0x71, 0xe3, 0x9d
db 0x09, 0x72, 0xfb, 0xde, 0x8a, 0x32, 0x50, 0x9d, 0x69, 0x98, 0xf1, 0xf6, 0x52, 0xeb, 0xf7, 0xee
db 0xd6, 0x99, 0xc2, 0xff, 0x30, 0x1c, 0x02, 0xce, 0x70, 0x05, 0xb2, 0xf1, 0x56, 0x9c, 0x0e, 0xa6
db 0x18, 0x62, 0xc4, 0xe2, 0x86, 0x38, 0x76, 0x30, 0x2f, 0xa1, 0xe4, 0xa7, 0x0e, 0x5d, 0x53, 0xeb
db 0x14, 0x45, 0xe0, 0xb7, 0xe1, 0xe8, 0x02, 0x68, 0x1a, 0xfe, 0x8e, 0xc1, 0x8f, 0xf2, 0xeb, 0x46
db 0x7f, 0x5d, 0x6a, 0x23, 0x46, 0x97, 0x2e, 0x03, 0x98, 0x12, 0x32, 0x8f, 0x54, 0x76, 0x59, 0xac
db 0xc8, 0x76, 0x5f, 0xc8, 0x71, 0x0c, 0xd3, 0xb6, 0xc5, 0x19, 0xea, 0xab, 0xa6, 0x2c, 0x1d, 0x88


================================================
FILE: unittests/ASM/H0F3A/pclmulqdq.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0x1E2017C5BEE29400", "0x38358E40CC367C7A"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"],
      "XMM3": ["0xE208147952DE57A0", "0x317D360F86C80DC9"],
      "XMM4": ["0xBBA54C87DA872B40", "0x6495428B7641EBE6"],
      "XMM5": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1"]
  },
  "HostFeatures": ["PCLMUL"]
}
%endif

lea rdx, [rel .data]

; With imm = 0b00000000
movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
pclmulqdq xmm1, xmm2, 0

; With imm = 0b00000001
movaps xmm3, [rdx + 16 * 0]
pclmulqdq xmm3, xmm2, 1

; With imm = 0b00010000
movaps xmm4, [rdx + 16 * 0]
pclmulqdq xmm4, xmm2, 16

; With imm = 0b00010001
movaps xmm5, [rdx + 16 * 0]
pclmulqdq xmm5, xmm2, 17

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/H0F3A/pcmpestri_equal_any.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": ["15"],
      "RDX": ["16"],
      "XMM0": ["0x04070F000F000E05", "0x0000000000040404"],
      "XMM1": ["0x0121313131311111", "0x0000000000010101"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpestri
;
%macro CompareAndStore 2
  pcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte character check (lsb, positive polarity)
mov rax, 15 ; Exclude 'l'
mov rdx, 16
CompareAndStore 0, 0b00000000

; Unsigned byte character check (msb, positive polarity)
CompareAndStore 1, 0b01000000

; Unsigned byte character check (lsb, negative polarity)
CompareAndStore 2, 0b00010000

; Unsigned byte character check (msb, negative polarity)
CompareAndStore 3, 0b01010000

; Unsigned byte character check (lsb, negative masked)
CompareAndStore 4, 0b00110000

; Unsigned byte character check (msb, negative masked)
CompareAndStore 5, 0b01110000

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word character check (msb, positive polarity)
CompareAndStore 6, 0b01000001

; Unsigned word character check (lsb, negative polarity)
CompareAndStore 7, 0b00010001

; Unsigned word character check (msb, negative polarity)
CompareAndStore 8, 0b01010001

; Unsigned word character check (lsb, negative masked)
CompareAndStore 9, 0b00110001

; Unsigned word character check (msb, negative masked)
CompareAndStore 10, 0b01110001

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A49 ; "IJKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestri_equal_each.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": ["4"],
      "RDX": ["3"],
      "XMM0": ["0x0F000B060B060F00", "0x040407000F060706"],
      "XMM1": ["0x3939010101012121", "0x0101212119191919"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4": ["0x0704030307000404", "0x0000000000000000"],
      "XMM5": ["0x1919191939390101", "0x0000000000000000"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpestri
;
%macro CompareAndStore 2
  pcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Full length unsigned byte string check (lsb, positive polarity)
mov rax, 16
mov rdx, 16
CompareAndStore 0, 0b00001000

; Full length unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001000

; Full length unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011000

; Full length unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011000

; Full length unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111000

; Full length unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111000

; Non-full length unsigned byte string check (lsb, positive polarity)
mov rax, 8
mov rdx, 7
CompareAndStore 6, 0b00001000

; Non-full length unsigned byte string check (msb, positive polarity)
CompareAndStore 7, 0b01001000

; Non-full length unsigned byte string check (lsb, negative polarity)
CompareAndStore 8, 0b00011000

; Non-full length unsigned byte string check (msb, negative polarity)
CompareAndStore 9, 0b01011000

; Non-full length unsigned byte string check (lsb, negative masked)
CompareAndStore 10, 0b00111000

; Non-full length unsigned byte string check (msb, negative masked)
CompareAndStore 11, 0b01111000

; --- 16-bit unsigned word tests ---

movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Full length unsigned word string check (lsb, positive polarity)
mov rax, 8
mov rdx, 8
CompareAndStore 12, 0b00001001

; Full length unsigned word string check (msb, positive polarity)
CompareAndStore 13, 0b01001001

; Full length unsigned word string check (lsb, negative polarity)
CompareAndStore 14, 0b00011001

; Full length unsigned word string check (msb, negative polarity)
CompareAndStore 15, 0b01011001

; Full length unsigned word string check (lsb, negative masked)
CompareAndStore 16, 0b00111001

; Full length unsigned word string check (msb, negative masked)
CompareAndStore 17, 0b01111001

; Non-full length unsigned word string check (lsb, positive polarity)
mov rax, 4
mov rdx, 3
CompareAndStore 18, 0b00001001

; Non-full length unsigned word string check (msb, positive polarity)
CompareAndStore 19, 0b01001001

; Non-full length unsigned word string check (lsb, negative polarity)
CompareAndStore 20, 0b00011001

; Non-full length unsigned word string check (msb, negative polarity)
CompareAndStore 21, 0b01011001

; Non-full length unsigned word string check (lsb, negative masked)
CompareAndStore 22, 0b00111001

; Non-full length unsigned word string check (msb, negative masked)
CompareAndStore 23, 0b01111001

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm4, [rel .indices + 16]
movaps xmm1, [rel .flags]
movaps xmm5, [rel .flags + 16]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x21212121656C706F ; "ople!!!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestri_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": ["2"],
      "RDX": ["16"],
      "XMM0": ["0x05050F000F000902", "0x0000000007000700"],
      "XMM1": ["0x1111313131311111", "0x0000000031313131"],
      "XMM2": ["0x306F8A9E30443057", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpestri
;
%macro CompareAndStore 2
  pcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
mov rax, 2
mov rdx, 16
CompareAndStore 0, 0b00001100

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001100

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011100

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011100

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111100

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111100

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001101

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011101

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011101

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111101

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111101

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6FFF6C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E30443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestri_ranges.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": ["4"],
      "RDX": ["16"],
      "XMM0": ["0x00060F000F000D01", "0x0000000000070007"],
      "XMM1": ["0x3111313131311111", "0x0000000000313131"],
      "XMM2": ["0x005A0041007A0061", "0x55AACCBBFF223344"],
      "XMM3": ["0x006500200027003F", "0x00210065004F0065"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpestri
;
%macro CompareAndStore 2
  pcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Range unsigned byte check (lsb, positive polarity)
mov rax, 4
mov rdx, 16
CompareAndStore 0, 0b00000100

; Range unsigned byte check (msb, positive polarity)
CompareAndStore 1, 0b01000100

; Range unsigned byte check (lsb, negative polarity)
CompareAndStore 2, 0b00010100

; Range unsigned byte check (msb, negative polarity)
CompareAndStore 3, 0b01010100

; Range unsigned byte check (lsb, negative masked)
CompareAndStore 4, 0b00110100

; Range unsigned byte check (msb, negative masked)
CompareAndStore 5, 0b01110100

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Range unsigned word check (msb, positive polarity)
CompareAndStore 6, 0b01000101

; Range unsigned word check (lsb, negative polarity)
CompareAndStore 7, 0b00010101

; Range unsigned word check (msb, negative polarity)
CompareAndStore 8, 0b01010101

; Range unsigned word check (lsb, negative masked)
CompareAndStore 9, 0b00110101

; Range unsigned word check (msb, negative masked)
CompareAndStore 10, 0b01110101

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877665A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF223344
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestrm_equal_any.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX":   ["15"],
      "RDX":   ["16"],
      "XMM1":  ["0x0121313131311111", "0x0000000000010101"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x00000000000060A0", "0x0000000000000000"],
      "XMM5":  ["0xFF00FF0000000000", "0x00FFFF0000000000"],
      "XMM6":  ["0x0000000000009F5F", "0x0000000000000000"],
      "XMM7":  ["0x00FF00FFFFFFFFFF", "0xFF0000FFFFFFFFFF"],
      "XMM8":  ["0x0000000000009F5F", "0x0000000000000000"],
      "XMM9":  ["0x00FF00FFFFFFFFFF", "0xFF0000FFFFFFFFFF"],
      "XMM10": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000"],
      "XMM11": ["0x0000000000000010", "0x0000000000000000"],
      "XMM12": ["0x0000000000000000", "0x000000000000FFFF"],
      "XMM13": ["0x0000000000000010", "0x0000000000000000"],
      "XMM14": ["0x0000000000000000", "0x000000000000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpestrm xmm2, xmm3, %2
  movaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte character check (bits, positive polarity)
mov rax, 15 ; Exclude 'l'
mov rdx, 16
CompareAndStore 0, 0b00000000, 4

; Unsigned byte character check (mask, positive polarity)
CompareAndStore 1, 0b01000000, 5

; Unsigned byte character check (bits, negative polarity)
CompareAndStore 2, 0b00010000, 6

; Unsigned byte character check (mask, negative polarity)
CompareAndStore 3, 0b01010000, 7

; Unsigned byte character check (bits, negative masked)
CompareAndStore 4, 0b00110000, 8

; Unsigned byte character check (mask, negative masked)
CompareAndStore 5, 0b01110000, 9

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word character check (mask, positive polarity)
CompareAndStore 6, 0b01000001, 10

; Unsigned word character check (bits, negative polarity)
CompareAndStore 7, 0b00010001, 11

; Unsigned word character check (mask, negative polarity)
CompareAndStore 8, 0b01010001, 12

; Unsigned word character check (bits, negative masked)
CompareAndStore 9, 0b00110001, 13

; Unsigned word character check (mask, negative masked)
CompareAndStore 10, 0b01110001, 14

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A49 ; "IJKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestrm_equal_each.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX":   ["8"],
      "RDX":   ["8"],
      "XMM1":  ["0x2121010101012121", "0x0000000001010101"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x000000000000F43F", "0x0000000000000000"],
      "XMM5":  ["0x0000FFFFFFFFFFFF", "0xFFFFFFFF00FF0000"],
      "XMM6":  ["0x0000000000000BC0", "0x0000000000000000"],
      "XMM7":  ["0xFFFF000000000000", "0x00000000FF00FFFF"],
      "XMM8":  ["0x0000000000000BC0", "0x0000000000000000"],
      "XMM9":  ["0xFFFF000000000000", "0x00000000FF00FFFF"],
      "XMM10": ["0x00000000000000EF", "0x0000000000000000"],
      "XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000"],
      "XMM12": ["0x0000000000000010", "0x0000000000000000"],
      "XMM13": ["0x0000000000000000", "0x000000000000FFFF"],
      "XMM14": ["0x0000000000000010", "0x0000000000000000"],
      "XMM15": ["0x0000000000000000", "0x000000000000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpestrm xmm2, xmm3, %2
  movaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Full length unsigned byte string check (bits, positive polarity)
mov rax, 16
mov rdx, 16
CompareAndStore 0, 0b00001000, 4

; Full length unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001000, 5

; Full length unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011000, 6

; Full length unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011000, 7

; Full length unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111000, 8

; Full length unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111000, 9

; --- 16-bit unsigned word tests ---

movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Full length unsigned word string check (bits, positive polarity)
mov rax, 8
mov rdx, 8
CompareAndStore 6, 0b00001001, 10

; Full length unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001001, 11

; Full length unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011001, 12

; Full length unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011001, 13

; Full length unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111001, 14

; Full length unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111001, 15

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x21212121656C706F ; "ople!!!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestrm_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX":   ["2"],
      "RDX":   ["16"],
      "XMM1":  ["0x1111313131311111", "0x0000000031313131"],
      "XMM2":  ["0x306F8A9E30443057", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x0000000000000204", "0x0000000000000000"],
      "XMM5":  ["0x0000000000FF0000", "0x000000000000FF00"],
      "XMM6":  ["0x000000000000FDFB", "0x0000000000000000"],
      "XMM7":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF"],
      "XMM8":  ["0x000000000000FDFB", "0x0000000000000000"],
      "XMM9":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF"],
      "XMM10": ["0x0000000000000020", "0x0000000000000000"],
      "XMM11": ["0x0000000000000000", "0x00000000FFFF0000"],
      "XMM12": ["0x00000000000000DF", "0x0000000000000000"],
      "XMM13": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF"],
      "XMM14": ["0x00000000000000DF", "0x0000000000000000"],
      "XMM15": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpestrm xmm2, xmm3, %2
  movaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
mov rax, 2
mov rdx, 16
CompareAndStore 0, 0b00001100, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001100, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011100, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011100, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111100, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111100, 9

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001101, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011101, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011101, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111101, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111101, 15

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6FFF6C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E30443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpestrm_ranges.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX":   ["4"],
      "RDX":   ["16"],
      "XMM1":  ["0x3111313131311111", "0x0000000000313131"],
      "XMM2":  ["0x005A0041007A0061", "0x55AACCBBFF223344"],
      "XMM3":  ["0x006500200027003F", "0x00210065004F0065"],
      "XMM4":  ["0x0000000000003DEA", "0x0000000000000000"],
      "XMM5":  ["0xFFFFFF00FF00FF00", "0x0000FFFFFFFF00FF"],
      "XMM6":  ["0x000000000000C215", "0x0000000000000000"],
      "XMM7":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00"],
      "XMM8":  ["0x000000000000C215", "0x0000000000000000"],
      "XMM9":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00"],
      "XMM10": ["0xFFFF000000000000", "0x0000FFFFFFFFFFFF"],
      "XMM11": ["0x0000000000000087", "0x0000000000000000"],
      "XMM12": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000"],
      "XMM13": ["0x0000000000000087", "0x0000000000000000"],
      "XMM14": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpestrm xmm2, xmm3, %2
  movaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Range unsigned byte check (bits, positive polarity)
mov rax, 4
mov rdx, 16
CompareAndStore 0, 0b00000100, 4

; Range unsigned byte check (mask, positive polarity)
CompareAndStore 1, 0b01000100, 5

; Range unsigned byte check (bits, negative polarity)
CompareAndStore 2, 0b00010100, 6

; Range unsigned byte check (mask, negative polarity)
CompareAndStore 3, 0b01010100, 7

; Range unsigned byte check (bits, negative masked)
CompareAndStore 4, 0b00110100, 8

; Range unsigned byte check (mask, negative masked)
CompareAndStore 5, 0b01110100, 9

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Range unsigned word check (mask, positive polarity)
CompareAndStore 6, 0b01000101, 10

; Range unsigned word check (bits, negative polarity)
CompareAndStore 7, 0b00010101, 11

; Range unsigned word check (mask, negative polarity)
CompareAndStore 8, 0b01010101, 12

; Range unsigned word check (bits, negative masked)
CompareAndStore 9, 0b00110101, 13

; Range unsigned word check (mask, negative masked)
CompareAndStore 10, 0b01110101, 14

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877665A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF223344
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistri_equal_any.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM0": ["0x04060F000F000D07", "0x0000000000040407"],
      "XMM1": ["0x1939313131311111", "0x0000000000191919"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpistri
;
%macro CompareAndStore 2
  pcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte character check (lsb, positive polarity)
CompareAndStore 0, 0b00000000

; Unsigned byte character check (msb, positive polarity)
CompareAndStore 1, 0b01000000

; Unsigned byte character check (lsb, negative polarity)
CompareAndStore 2, 0b00010000

; Unsigned byte character check (msb, negative polarity)
CompareAndStore 3, 0b01010000

; Unsigned byte character check (lsb, negative masked)
CompareAndStore 4, 0b00110000

; Unsigned byte character check (msb, negative masked)
CompareAndStore 5, 0b01110000

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word character check (msb, positive polarity)
CompareAndStore 6, 0b01000001

; Unsigned word character check (lsb, negative polarity)
CompareAndStore 7, 0b00010001

; Unsigned word character check (msb, negative polarity)
CompareAndStore 8, 0b01010001

; Unsigned word character check (lsb, negative masked)
CompareAndStore 9, 0b00110001

; Unsigned word character check (msb, negative masked)
CompareAndStore 10, 0b01110001

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A00 ; "\0JKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistri_equal_each.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM0": ["0x07000F060E060F00", "0x0000000007040404"],
      "XMM1": ["0x3939191919193939", "0x0000000019191919"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpistri
;
%macro CompareAndStore 2
  pcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
CompareAndStore 0, 0b00001000

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001000

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011000

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011000

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111000

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111000

; --- 16-bit unsigned word tests ---

movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word string check (lsb, positive polarity)
CompareAndStore 6, 0b00001001

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001001

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011001

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011001

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111001

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111001

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x00002121656C706F ; "ople!!\0\0"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x00212121216C6C61 ; "all!!!!\0"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistri_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM0": ["0x05050F000F000902", "0x0000000006000700"],
      "XMM1": ["0x1919313131311111", "0x0000000039393939"],
      "XMM2": ["0x306F000030443057", "0x000030443057697D"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpistri
;
%macro CompareAndStore 2
  pcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
CompareAndStore 0, 0b00001100

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001100

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011100

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011100

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111100

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111100

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001101

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011101

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011101

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111101

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111101

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6F006C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F000030443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistri_ranges.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM0": ["0x00060F000F000D01", "0x0000001010070007"],
      "XMM1": ["0x3111313131311111", "0x0000001818313131"],
      "XMM2": ["0x005A0041007A0061", "0x55AACCBBFF220000"],
      "XMM3": ["0x0065002000270000", "0x00210065004F0065"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to pcmpistri
;
%macro CompareAndStore 2
  pcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Range unsigned byte check (lsb, positive polarity)
CompareAndStore 0, 0b00000100

; Range unsigned byte check (msb, positive polarity)
CompareAndStore 1, 0b01000100

; Range unsigned byte check (lsb, negative polarity)
CompareAndStore 2, 0b00010100

; Range unsigned byte check (msb, negative polarity)
CompareAndStore 3, 0b01010100

; Range unsigned byte check (lsb, negative masked)
CompareAndStore 4, 0b00110100

; Range unsigned byte check (msb, negative masked)
CompareAndStore 5, 0b01110100

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Range unsigned word check (msb, positive polarity)
CompareAndStore 6, 0b01000101

; Range unsigned word check (lsb, negative polarity)
CompareAndStore 7, 0b00010101

; Range unsigned word check (msb, negative polarity)
CompareAndStore 8, 0b01010101

; Range unsigned word check (lsb, negative masked)
CompareAndStore 9, 0b00110101

; Range unsigned word check (msb, negative masked)
CompareAndStore 10, 0b01110101

; --- Edge case test (string begins with null character) ---
movaps xmm2, [rel .data_null]
movaps xmm3, [rel .data_null + 32]

; Range signed byte check (msb)
CompareAndStore 11, 0b01000110

; Range signed byte check (lsb)
CompareAndStore 12, 0b01000110

; Load all our stored indices and flags for result comparing
movaps xmm0, [rel .indices]
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877005A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF220000
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.data_null:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF220000
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x0065002000270000 ; "\0' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistrm_equal_any.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1":  ["0x1939313131311111", "0x0000000000191919"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x0000000000002080", "0x0000000000000000"],
      "XMM5":  ["0xFF00000000000000", "0x0000FF0000000000"],
      "XMM6":  ["0x000000000000DF7F", "0x0000000000000000"],
      "XMM7":  ["0x00FFFFFFFFFFFFFF", "0xFFFF00FFFFFFFFFF"],
      "XMM8":  ["0x000000000000DF7F", "0x0000000000000000"],
      "XMM9":  ["0x00FFFFFFFFFFFFFF", "0xFFFF00FFFFFFFFFF"],
      "XMM10": ["0xFFFFFFFFFFFFFFFF", "0x0000FFFFFFFF0000"],
      "XMM11": ["0x0000000000000090", "0x0000000000000000"],
      "XMM12": ["0x0000000000000000", "0xFFFF00000000FFFF"],
      "XMM13": ["0x0000000000000010", "0x0000000000000000"],
      "XMM14": ["0x0000000000000000", "0x000000000000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpistrm xmm2, xmm3, %2
  movaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte character check (bits, positive polarity)
CompareAndStore 0, 0b00000000, 4

; Unsigned byte character check (mask, positive polarity)
CompareAndStore 1, 0b01000000, 5

; Unsigned byte character check (bits, negative polarity)
CompareAndStore 2, 0b00010000, 6

; Unsigned byte character check (mask, negative polarity)
CompareAndStore 3, 0b01010000, 7

; Unsigned byte character check (bits, negative masked)
CompareAndStore 4, 0b00110000, 8

; Unsigned byte character check (mask, negative masked)
CompareAndStore 5, 0b01110000, 9

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word character check (mask, positive polarity)
CompareAndStore 6, 0b01000001, 10

; Unsigned word character check (bits, negative polarity)
CompareAndStore 7, 0b00010001, 11

; Unsigned word character check (mask, negative polarity)
CompareAndStore 8, 0b01010001, 12

; Unsigned word character check (bits, negative masked)
CompareAndStore 9, 0b00110001, 13

; Unsigned word character check (mask, negative masked)
CompareAndStore 10, 0b01110001, 14

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A00 ; "\0JKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistrm_equal_each.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1":  ["0x3939191919193939", "0x0000000019191919"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x000000000000B43F", "0x0000000000000000"],
      "XMM5":  ["0x0000FFFFFFFFFFFF", "0xFF00FFFF00FF0000"],
      "XMM6":  ["0x0000000000004BC0", "0x0000000000000000"],
      "XMM7":  ["0xFFFF000000000000", "0x00FF0000FF00FFFF"],
      "XMM8":  ["0x000000000000CBC0", "0x0000000000000000"],
      "XMM9":  ["0xFFFF000000000000", "0xFFFF0000FF00FFFF"],
      "XMM10": ["0x00000000000000EF", "0x0000000000000000"],
      "XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000"],
      "XMM12": ["0x0000000000000010", "0x0000000000000000"],
      "XMM13": ["0x0000000000000000", "0x000000000000FFFF"],
      "XMM14": ["0x0000000000000090", "0x0000000000000000"],
      "XMM15": ["0x0000000000000000", "0xFFFF00000000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpistrm xmm2, xmm3, %2
  movaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
CompareAndStore 0, 0b00001000, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001000, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011000, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011000, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111000, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111000, 9

; --- 16-bit unsigned word tests ---

movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Unsigned word string check (bits, positive polarity)
CompareAndStore 6, 0b00001001, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001001, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011001, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011001, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111001, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111001, 15

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x00002121656C706F ; "ople!!\0\0"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x00212121216C6C61 ; "all!!!!\0"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistrm_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1":  ["0x1919313131311111", "0x0000000039393939"],
      "XMM2":  ["0x306F000030443057", "0x000030443057697D"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3"],
      "XMM4":  ["0x0000000000000204", "0x0000000000000000"],
      "XMM5":  ["0x0000000000FF0000", "0x000000000000FF00"],
      "XMM6":  ["0x000000000000FDFB", "0x0000000000000000"],
      "XMM7":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF"],
      "XMM8":  ["0x000000000000FDFB", "0x0000000000000000"],
      "XMM9":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF"],
      "XMM10": ["0x0000000000000020", "0x0000000000000000"],
      "XMM11": ["0x0000000000000000", "0x00000000FFFF0000"],
      "XMM12": ["0x00000000000000DF", "0x0000000000000000"],
      "XMM13": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF"],
      "XMM14": ["0x000000000000005F", "0x0000000000000000"],
      "XMM15": ["0xFFFFFFFFFFFFFFFF", "0x0000FFFF0000FFFF"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpistrm xmm2, xmm3, %2
  movaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
CompareAndStore 0, 0b00001100, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001100, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011100, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011100, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111100, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111100, 9

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001101, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011101, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011101, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111101, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111101, 15

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6F006C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F000030443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/pcmpistrm_ranges.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1":  ["0x3111313131311111", "0x0000000000313131"],
      "XMM2":  ["0x005A0041007A0061", "0x55AACCBBFF220000"],
      "XMM3":  ["0x006500200027003F", "0x00210065004F0065"],
      "XMM4":  ["0x0000000000003DEA", "0x0000000000000000"],
      "XMM5":  ["0xFFFFFF00FF00FF00", "0x0000FFFFFFFF00FF"],
      "XMM6":  ["0x000000000000C215", "0x0000000000000000"],
      "XMM7":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00"],
      "XMM8":  ["0x000000000000C215", "0x0000000000000000"],
      "XMM9":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00"],
      "XMM10": ["0xFFFF000000000000", "0x0000FFFFFFFFFFFF"],
      "XMM11": ["0x0000000000000087", "0x0000000000000000"],
      "XMM12": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000"],
      "XMM13": ["0x0000000000000087", "0x0000000000000000"],
      "XMM14": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000"]
  },
  "HostFeatures": ["SSE4.2"]
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  pcmpistrm xmm2, xmm3, %2
  movaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

movaps xmm2, [rel .data]
movaps xmm3, [rel .data + 32]

; Range unsigned byte check (bits, positive polarity)
CompareAndStore 0, 0b00000100, 4

; Range unsigned byte check (mask, positive polarity)
CompareAndStore 1, 0b01000100, 5

; Range unsigned byte check (bits, negative polarity)
CompareAndStore 2, 0b00010100, 6

; Range unsigned byte check (mask, negative polarity)
CompareAndStore 3, 0b01010100, 7

; Range unsigned byte check (bits, negative masked)
CompareAndStore 4, 0b00110100, 8

; Range unsigned byte check (mask, negative masked)
CompareAndStore 5, 0b01110100, 9

; --- 16-bit unsigned word tests ---
movaps xmm2, [rel .data16]
movaps xmm3, [rel .data16 + 32]

; Range unsigned word check (mask, positive polarity)
CompareAndStore 6, 0b01000101, 10

; Range unsigned word check (bits, negative polarity)
CompareAndStore 7, 0b00010101, 11

; Range unsigned word check (mask, negative polarity)
CompareAndStore 8, 0b01010101, 12

; Range unsigned word check (bits, negative masked)
CompareAndStore 9, 0b00110101, 13

; Range unsigned word check (mask, negative masked)
CompareAndStore 10, 0b01110101, 14

; Load all our stored flags for result comparing
movaps xmm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877005A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF220000
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/H0F3A/sha1rnds4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "XMM1": ["0xA5E1EC3918BE0C95", "0xA3F7BF0143303AFB"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE"],
      "XMM3": ["0xA8B3BD15FA04D6D7", "0xDE761956A1F750B1"],
      "XMM4": ["0xCFBBDFA4E5E4712D", "0x76AD0D46447127D3"],
      "XMM5": ["0x9BDCA44510E70C65", "0x4474BFAA2B70A524"]
  },
  "HostFeatures": ["SHA"]
}
%endif

lea rdx, [rel .data]

; We use XMM2 as the hypothetical E state
; This should not change across multiple invocations
; of SHA1RNDS4
movaps xmm2, [rdx + 16 * 1]

; With imm = 0
movaps xmm1, [rdx + 16 * 0]
sha1rnds4 xmm1, xmm2, 0

; With imm = 1
movaps xmm3, [rdx + 16 * 0]
sha1rnds4 xmm3, xmm2, 1

; With imm = 2
movaps xmm4, [rdx + 16 * 0]
sha1rnds4 xmm4, xmm2, 2

; With imm = 3
movaps xmm5, [rdx + 16 * 0]
sha1rnds4 xmm5, xmm2, 3

hlt

align 16
.data:
db 0xe0, 0xfc, 0x2b, 0xa1, 0x06, 0x4f, 0x6c, 0xa7, 0x0f, 0x06, 0x6a, 0x1e, 0x7f, 0x76, 0x80, 0x9b
db 0xe0, 0x56, 0xed, 0xaa, 0xf3, 0xc3, 0x68, 0x68, 0xde, 0xe6, 0xe6, 0x94, 0xe2, 0xe9, 0xfc, 0xf0


================================================
FILE: unittests/ASM/Includes/checkprecision.mac
================================================
%ifndef CHECK_PRECISION_INC
%define CHECK_PRECISION_INC

; CheckPrecision.inc - NASM include file containing macro to check precision
; of single floating-point number.

;; Clobbers xmm12, xmm13, xmm14, xmm15
;; Returns result in al.
;; Arguments are in memory locations.
%macro check_relerr 3; %1=REF %2=X %3=TOLERANCE
        movss   xmm12, dword [ %1 ] ; xmm12 has REF
        movss   xmm13, dword [ %2 ] ; xmm13 has X
        movss   xmm15, dword [rel abs_mask_float] ; xmm15 has the abs float mask
        movaps  xmm14, xmm12 ; xmm14 has REF
        subss   xmm14, xmm13 ; xmm14 = REF - X
        andps   xmm12, xmm15 ; xmm12 = abs(REF)
        mulss   xmm12, dword [ %3 ] ; xmm12 = abs(REF) * tolerance
        movaps  xmm13, xmm14 ; xmm13 = REF - X
        andps   xmm13, xmm15 ; xmm13 = abs(REF - X)

        xor     eax, eax ; clears eax
        comiss  xmm12, xmm13 ; compares xmm12 and xmm13
        setnb   al ; stores xmm12 >= xmm13, i.e. abs(REF) * tolerance >= abs(REF - X)
%endmacro

;; Double-precision variant.
;; Clobbers xmm12, xmm13, xmm14, xmm15
;; Returns result in al.
;; Arguments are in memory locations.
%macro check_relerr_d 3; %1=REF %2=X %3=TOLERANCE
        movsd   xmm12, qword [ %1 ] ; xmm12 has REF
        movsd   xmm13, qword [ %2 ] ; xmm13 has X
        movsd   xmm15, qword [rel abs_mask_double] ; xmm15 has the abs double mask
        movapd  xmm14, xmm12 ; xmm14 has REF
        subsd   xmm14, xmm13 ; xmm14 = REF - X
        andpd   xmm12, xmm15 ; xmm12 = abs(REF)
        mulsd   xmm12, qword [ %3 ] ; xmm12 = abs(REF) * tolerance
        movapd  xmm13, xmm14 ; xmm13 = REF - X
        andpd   xmm13, xmm15 ; xmm13 = abs(REF - X)

        xor     eax, eax ; clears eax
        comisd  xmm12, xmm13 ; compares xmm12 and xmm13
        setnb   al ; stores xmm12 >= xmm13, i.e. abs(REF) * tolerance >= abs(REF - X)
%endmacro

%macro define_check_data_constants 0
abs_mask_float:
  dd 0x7fffffff   ; Bitmask to get absolute value of a float (single precision)
abs_mask_double:
  dq 0x7fffffffffffffff ; Bitmask to get absolute value of a double
%endmacro
%endif

================================================
FILE: unittests/ASM/Includes/modrm_oob_macros.mac
================================================
%macro w2 2
; Ensures that the load doesn't read past the end.
%1 [r15 - %2]

; Ensures the load doesn't read before the start.
%1 [r14]
%endmacro

%macro r2 2
; Ensures that the load doesn't read past the end.
%1 [r15 - %2]

; Ensures the load doesn't read before the start.
%1 [r14]
%endmacro

%macro w3 3
; Ensures that the load doesn't read past the end.
%1 [r15 - %2], %3

; Ensures the load doesn't read before the start.
%1 [r14], %3
%endmacro

%macro w3_size 3
; Ensures that the load doesn't read past the end.
%1 %3 [r15 - %2]

; Ensures the load doesn't read before the start.
%1 %3 [r14]
%endmacro

%macro r3 3
; Ensures that the load doesn't read past the end.
%1 %3, [r15 - %2]

; Ensures the load doesn't read before the start.
%1 %3, [r14]
%endmacro

%macro rw3 3
r3 %1, %2, %3
w3 %1, %2, %3
%endmacro

%macro w4 4
; Ensures that the load doesn't read past the end.
%1 [r15 - %2], %3, %4

; Ensures the load doesn't read before the start.
%1 [r14], %3, %4
%endmacro

%macro w4_size 4
; Ensures that the load doesn't read past the end.
%1 %3 [r15 - %2], %4

; Ensures the load doesn't read before the start.
%1 %3 [r14], %4
%endmacro

%macro r4 4
; Ensures that the load doesn't read past the end.
%1 %3, [r15 - %2], %4

; Ensures the load doesn't read before the start.
%1 %3, [r14], %4
%endmacro

%macro r4_size 4
; Ensures that the load doesn't read past the end.
%1 %4, %3 [r15 - %2]

; Ensures the load doesn't read before the start.
%1 %4, %3 [r14]
%endmacro

%macro rw4 4
r4 %1, %2, %3, %4
w4 %1, %2, %3, %3
%endmacro

%macro r4_fma 4
; Ensures that the load doesn't read past the end.
%1 %3, %4, [r15 - %2]

; Ensures the load doesn't read before the start.
%1 %3, %4, [r14]
%endmacro

%macro r5_fma_sized 5
; Ensures that the load doesn't read past the end.
%1 %4, %5, %3 [r15 - %2]

; Ensures the load doesn't read before the start.
%1 %4, %5, %3 [r14]
%endmacro

%macro w5_size 5
; Ensures that the load doesn't read past the end.
%1 %3 [r15 - %2], %4, %5

; Ensures the load doesn't read before the start.
%1 %3 [r14], %4, %5
%endmacro

%macro r5_size 5
; Ensures that the load doesn't read past the end.
%1 %4, %3 [r15 - %2], %5

; Ensures the load doesn't read before the start.
%1 %4, %3 [r14], %5
%endmacro


================================================
FILE: unittests/ASM/Includes/x87cw.mac
================================================
%ifndef X87_CW_INC
%define X87_CW_INC

; Sets x87 precision and rounding modes
; Uses the stack and clobbers rax
; Args: precision constant, rounding constant
%macro set_cw_precision_rounding 2
  sub rsp, 2
  fnstcw [rsp]
  movzx eax, word [rsp]

  ; Precision
  and eax, ~(3 << 8)
  or eax, %1 << 8

  ; Rounding
  and eax, ~(3 << 10)
  or eax, %2 << 10

  mov [rsp], ax
  fldcw [rsp]
  add rsp, 2
%endmacro

x87_prec_32 equ 00b
x87_prec_64 equ 10b
x87_prec_80 equ 11b

x87_round_nearest equ 00b
x87_round_down equ 01b
x87_round_up equ 10b
x87_round_towards_zero equ 11b

%endif


================================================
FILE: unittests/ASM/Includes/xsave_macros.mac
================================================
;
; Various macros used to set up data for the XSAVE tests
;
; Define IS_AVX before including this file to enable the
; use of AVX instructions to handle the upper lanes.
;

%ifndef XSAVE_MACROS_INC
%define XSAVE_MACROS_INC

; Initializes the MMX registers to various values using a label to a memory region
%macro set_up_mmx_state 1
  movq mm0, [rel %1 + 32 * 0]
  movq mm1, [rel %1 + 32 * 1]
  movq mm2, [rel %1 + 32 * 2]
  movq mm3, [rel %1 + 32 * 3]
  movq mm4, [rel %1 + 32 * 4]
  movq mm5, [rel %1 + 32 * 5]
  movq mm6, [rel %1 + 32 * 6]
  movq mm7, [rel %1 + 32 * 7]
%endmacro

; Sets up the XMM registers using a given label to a memory region.
%macro set_up_xmm_state 1
  %macro move_to_xmm 2
    %ifdef IS_AVX
      vmovaps ymm%1, [rel %2 + 32 * %1]
    %else
      movaps xmm%1,  [rel %2 + 32 * %1]
    %endif
  %endmacro

  move_to_xmm 0,  %1
  move_to_xmm 1,  %1
  move_to_xmm 2,  %1
  move_to_xmm 3,  %1
  move_to_xmm 4,  %1
  move_to_xmm 5,  %1
  move_to_xmm 6,  %1
  move_to_xmm 7,  %1
  move_to_xmm 8,  %1
  move_to_xmm 9,  %1
  move_to_xmm 10, %1
  move_to_xmm 11, %1
  move_to_xmm 12, %1
  move_to_xmm 13, %1
  move_to_xmm 14, %1
  move_to_xmm 15, %1

  %undef move_to_xmm
%endmacro

; Overwrites the available slots within the legacy FXSAVE region
;
; overwrite_xsave_area .xsave_area
;
; Clobbers RAX
;
%macro overwrite_fxsave_slots 0
  ; Overwrite the three 16byte "available" slots
  mov rax, 0x1111111111111111
  mov qword [rsp + 464 + 8 * 0], rax
  mov rax, 0x2222222222222222
  mov qword [rsp + 464 + 8 * 1], rax
  mov rax, 0x3333333333333333
  mov qword [rsp + 464 + 8 * 2], rax
  mov rax, 0x4444444444444444
  mov qword [rsp + 464 + 8 * 3], rax
  mov rax, 0x5555555555555555
  mov qword [rsp + 464 + 8 * 4], rax
  mov rax, 0x6666666666666666
  mov qword [rsp + 464 + 8 * 5], rax
%endmacro

; Overwrites all MM and XMM registers with -1
;
; Typically used right before an XRSTOR to verify
; data is restored properly
;
; Clobbers RAX
;
%macro corrupt_mmx_and_xmm_registers 0
  ; Corrupt MMX And XMM state
  mov rax, -1
  movq mm0, rax
  movq mm1, rax
  movq mm2, rax
  movq mm3, rax
  movq mm4, rax
  movq mm5, rax
  movq mm6, rax
  movq mm7, rax

  ; Setup XMM state
  movq xmm0, rax
  movq xmm1, rax
  movq xmm2, rax
  movq xmm3, rax
  movq xmm4, rax
  movq xmm5, rax
  movq xmm6, rax
  movq xmm7, rax
  movq xmm8, rax
  movq xmm9, rax
  movq xmm10, rax
  movq xmm11, rax
  movq xmm12, rax
  movq xmm13, rax
  movq xmm14, rax
  movq xmm15, rax
%endmacro

; At the end of the legacy FXSAVE area, there's three 16-byte regions
; available for general purpose use. We re-load these to ensure values
; that we put in here via overwrite_xsave_area aren't clobbered.
;
; Clobbers: RAX, RBX, RCX, RDX, RSI, RDI
;
%macro load_fxsave_slots 0
  ; Load the three 16 bytes of "available" slots to make sure it wasn't overwritten
  ; Reserved can be overwritten regardless
  mov rax, qword [rsp + 464 + 8 * 0]
  mov rbx, qword [rsp + 464 + 8 * 1]
  mov rcx, qword [rsp + 464 + 8 * 2]
  mov rdx, qword [rsp + 464 + 8 * 3]
  mov rsi, qword [rsp + 464 + 8 * 4]
  mov rdi, qword [rsp + 464 + 8 * 5]
%endmacro

; Defines a region of test data to use
%macro define_xmm_data_section 0
align 32
.xmm_data:
  dq 0x1112131415161718
  dq 0xABFDEC3402932039
  dq 0xA1A2A3A4A5A6A7AA
  dq 0xABFD392482039840

  dq 0x2122232425262728
  dq 0xDEFCA93847392992
  dq 0x4142434445464748
  dq 0x3987432929293847

  dq 0x3132333435363738
  dq 0xEADC3284ADCE9339
  dq 0x6162636465666768
  dq 0xACDEFACDEFACDEFA

  dq 0x4142434445464748
  dq 0x3987432929293847
  dq 0x3132333435363738
  dq 0xEADC3284ADCE9339

  dq 0x5152535455565758
  dq 0x3764583402983799
  dq 0x7172737475767778
  dq 0x3459238471238023

  dq 0x6162636465666768
  dq 0xACDEFACDEFACDEFA
  dq 0xA1AAA3A4A5A6A7A8
  dq 0x3784769228479192

  dq 0x7172737475767778
  dq 0x3459238471238023
  dq 0x6162636465666768
  dq 0xACDEFACDEFACDEFA

  dq 0x8182838485868788
  dq 0x9347239480289299
  dq 0x6162636465666768
  dq 0xACDEFACDEFACDEFA

  dq 0xCCC2C3C4C5C6C7C8
  dq 0x3949232903428479
  dq 0xD1D2D3D4DDD6D7D8
  dq 0x3674823989ADEF73

  dq 0xA1AAA3A4A5A6A7A8
  dq 0x3784769228479192
  dq 0xB1B2B3B4B5B6BBB8
  dq 0xADEADE3894353499

  dq 0xF1F2FFF4F5F6F7F8
  dq 0x758734629799389A
  dq 0xD1D2D3D4DDD6D7D8
  dq 0x3674823989ADEF73

  dq 0xE1E2E3EEE5E6E7E8
  dq 0x3756438328472389
  dq 0xB1B2B3B4B5B6BBB8
  dq 0xADEADE3894353499

  dq 0xD1D2D3D4DDD6D7D8
  dq 0x3674823989ADEF73
  dq 0xA1AAA3A4A5A6A7A8
  dq 0x3784769228479192

  dq 0xC1C2C3C4C5CCC7C8
  dq 0xABCDEF3894335820
  dq 0x6162636465666768
  dq 0xACDEFACDEFACDEFA

  dq 0xB1B2B3B4B5B6BBB8
  dq 0xADEADE3894353499
  dq 0xE1E2E3EEE5E6E7E8
  dq 0x3756438328472389

  dq 0xA1A2A3A4A5A6A7AA
  dq 0xABFD392482039840
  dq 0xB1B2B3B4B5B6BBB8
  dq 0xADEADE3894353499
%endmacro

%endif


================================================
FILE: unittests/ASM/JMP.asm
================================================
%ifdef CONFIG
{
  "Ignore": [],
  "RegData": {
    "RAX": "1",
    "RBX": "2",
    "RCX": "3",
    "RDX": "4"
  }
}
%endif

jmp label
label:

mov rsp, 0xe8000000

jmp function
func_return:

lea rbx, [rel function2]
jmp rbx
func2_return:

cmp rcx, rcx
je function3
func3_return:

mov rdx, 4
jne function4
func4_return:

hlt

function:
mov rax, 1
jmp func_return

function2:
mov rbx, 2
jmp func2_return

function3:
mov rcx, 3
jmp func3_return

function4:
mov rdx, 0xDEADBEEF
jmp func4_return

hlt


================================================
FILE: unittests/ASM/Known_Failures
================================================
Test_X87/D9_F8.asm


================================================
FILE: unittests/ASM/Known_Failures_host
================================================
Test_X87/FXAM_Simple.asm
## Tag bits not completely modelled
Test_X87/X87MMXInteraction.asm

================================================
FILE: unittests/ASM/Known_Failures_jit
================================================
Test_FEX_bugs/32bit_syscall.asm


================================================
FILE: unittests/ASM/MOVHPD.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0xDEADBEEFBAD0DAD1",
    "RCX": "0xDEADBEEFBAD0DAD1",
    "XMM0": ["0", "0xDEADBEEFBAD0DAD1"]
  }
}
%endif

; Data we want to store
mov rax, 0xDEADBEEFBAD0DAD1

; Starting address to store to
mov rdi, 0xe8000000

pxor xmm0, xmm0
pxor xmm1, xmm1

mov [rdi], rax

movhpd xmm0, [rdi]
movhpd [rdi + 8], xmm0

xor rcx, rcx
mov rcx, [rdi + 8]

hlt


================================================
FILE: unittests/ASM/MemoryData.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xddccbbaa"
  },
  "MemoryRegions": {
    "0x10000000": "4096"
  },
  "MemoryData": {
    "0x10000000": "AA BB CC DD"
  }
}
%endif

; Simple test to prove that config loader's MemoryData is working

mov rax, [abs 0x10000000]
hlt


================================================
FILE: unittests/ASM/Multiblock/ReachableInvalidCode.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x20"
  }
}
%endif

mov rax, 0
cmp rax, 0

jz finish

; multiblock should gracefully handle these invalid ops
db 0xf, 0x3B ; invalid opcode here

finish:
mov rax, 32

hlt

================================================
FILE: unittests/ASM/OpSize/15_BYTE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  }
}
%endif

db 0x66, 0x66, 0x66, 0x66, 0x66, 0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00

hlt


================================================
FILE: unittests/ASM/OpSize/66_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movupd xmm0, [rdx]

hlt


================================================
FILE: unittests/ASM/OpSize/66_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movupd xmm1, [rdx + 8 * 2]
movupd [rdx + 8 * 0], xmm1
movupd xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/OpSize/66_12.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

; Preload
movupd xmm0, [rdx]

; Lower 64bits
movlpd xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_13.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x0",
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

; Preload
movupd xmm0, [rdx]

; Lower 64bits
movlpd [rdx + 8 * 2], xmm0

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3] ;Ensure this wasn't overwritten

hlt


================================================
FILE: unittests/ASM/OpSize/66_14.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x6162636465666768"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Preload
movupd xmm0, [rdx]
movupd xmm1, [rdx + 8 * 2]

unpcklpd xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/OpSize/66_15.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x5152535455565758", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Preload
movupd xmm0, [rdx]
movupd xmm1, [rdx + 8 * 2]

unpckhpd xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/OpSize/66_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x6162636465666768"]
  }
}
%endif

lea rdx, [rel .data]

movapd xmm0, [rdx]
movhpd xmm0, [rdx + 16]

hlt

align 16
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/OpSize/66_17.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x5152535455565758", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

movapd xmm0, [rdx]
movhpd [rdx + 16], xmm0
movapd xmm1, [rdx + 16]

hlt

align 4096
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/OpSize/66_28.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, xmm0

hlt


================================================
FILE: unittests/ASM/OpSize/66_29.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd [rdx + 8 * 2], xmm0
movapd xmm1, xmm0

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/OpSize/66_2A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3ff0000000000000", "0x0"],
    "XMM1": ["0xc000000000000000", "0xbff0000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000000000001
mov [rdx + 8 * 0], rax
mov rax, 0xFFFFFFFFFFFFFFFE
mov [rdx + 8 * 1], rax

movq mm0, [rdx]
cvtpi2pd xmm0, mm0
cvtpi2pd xmm1, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/OpSize/66_2B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movntpd [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_2C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x0000000200000001",
    "MM1":  "0xFFFFFFFEFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0xbff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0xc000000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

movq mm0, [rdx + 8 * 4]
movq mm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvttpd2pi mm0, xmm2
cvttpd2pi mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_2D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x0000000200000001",
    "MM1":  "0xFFFFFFFEFFFFFFFF",
    "MM2":  "0x8000000080000000",
    "MM3":  "0x8000000080000000"
  }
}
%endif

mov rdx, 0xe0000000

; Set up MXCSR to truncate
mov eax, 0x7F80
mov [rdx + 8 * 0], eax
ldmxcsr [rdx + 8 * 0]

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0xbff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0xc000000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x7ff0000000000000
mov [rdx + 8 * 6], rax
mov rax, 0xfff0000000000000
mov [rdx + 8 * 7], rax

mov rax, 0x7ff8000000000000
mov [rdx + 8 * 8], rax
mov rax, 0x7fefffffffffffff
mov [rdx + 8 * 9], rax

movq mm0, [rdx + 8 * 4]
movq mm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvtpd2pi mm0, xmm2
cvtpd2pi mm1, [rdx + 8 * 2]
cvtpd2pi mm2, [rdx + 8 * 6]
cvtpd2pi mm3, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/OpSize/66_2E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4010000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x7ff8000000000000
mov [rdx + 8 * 4], rax
mov rax, 0x4010000000000000
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
ucomisd xmm0, [rdx + 8 * 2] ; 1.0 <comp> 4.0
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

ucomisd xmm0, [rdx + 8 * 4] ; 1.0 <comp> NaN
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/OpSize/66_2F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4010000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x7ff8000000000000
mov [rdx + 8 * 4], rax
mov rax, 0x4010000000000000
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
comisd xmm0, [rdx + 8 * 2] ; 1.0 <comp> 4.0
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

comisd xmm0, [rdx + 8 * 4] ; 1.0 <comp> NaN
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/OpSize/66_50.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "RDI": "0x0",
    "XMM0": ["0x0", "0x8000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0
mov [rdx + 8 * 0], rax
mov rax, 0x8000000000000000
mov [rdx + 8 * 1], rax

movapd xmm0, [rdx]
movmskpd rax, xmm0

movapd xmm1, [rel .data]
movmskpd rdi, xmm1

hlt

align 16
.data:
dq 0x4142434445464748
dq 0x5152535455565758


================================================
FILE: unittests/ASM/OpSize/66_51.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3ff0000000000000", "0x3ff0000000000000"],
    "XMM1":  ["0x4000000000000000", "0x4000000000000000"],
    "XMM2":  ["0x4008000000000000", "0x4008000000000000"],
    "XMM3":  ["0x4010000000000000", "0x4010000000000000"],
    "XMM4":  ["0x3ff0000000000000", "0x3ff0000000000000"],
    "XMM5":  ["0x4000000000000000", "0x4000000000000000"],
    "XMM6":  ["0x4008000000000000", "0x4008000000000000"],
    "XMM7":  ["0x4010000000000000", "0x4010000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x4010000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x4022000000000000
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x4030000000000000
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x4039000000000000
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

sqrtpd xmm0, xmm0
sqrtpd xmm1, xmm1
sqrtpd xmm2, xmm2
sqrtpd xmm3, xmm3

sqrtpd xmm4, [rdx + 8 * 0]
sqrtpd xmm5, [rdx + 8 * 2]
sqrtpd xmm6, [rdx + 8 * 4]
sqrtpd xmm7, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_54.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1010101010101010", "0x0"],
    "XMM1": ["0x1010101010101010", "0x0"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
andpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
andpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_55.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM1": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
andnpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
andnpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_56.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1111111111111111", "0x2222222222222222"],
    "XMM1": ["0x1111111111111111", "0x2222222222222222"],
    "XMM2": ["0x0101010101010101", "0x0202020202020202"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x1010101010101010
mov [rdx + 8 * 0], rax
mov rax, 0x2020202020202020
mov [rdx + 8 * 1], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0x0202020202020202
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
orpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
orpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_57.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2424242424242424", "0x2424242424242424"],
    "XMM1": ["0x2424242424242424", "0x2424242424242424"],
    "XMM2": ["0x1818181818181818", "0x1818181818181818"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 0], rax
mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 1], rax

mov rax, 0x1818181818181818
mov [rdx + 8 * 2], rax
mov rax, 0x1818181818181818
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
xorpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
xorpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_58.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4008000000000000"],
    "XMM1": ["0x4008000000000000", "0x4008000000000000"],
    "XMM2": ["0x4000000000000000", "0x4000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
addpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
addpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_59.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4018000000000000", "0x4018000000000000"],
    "XMM1": ["0x4018000000000000", "0x4018000000000000"],
    "XMM2": ["0x4000000000000000", "0x4000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4008000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
mulpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
mulpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_5A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x404000003F800000", "0x0"],
    "XMM1": ["0x3FF0000000000000", "0x4008000000000000"],
    "XMM2": ["0xff8000007f800000", "0x0000000000000000"],
    "XMM3": ["0x7f8000007fc00000", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

mov rax, 0x7ff0000000000000
mov [rdx + 8 * 4], rax
mov rax, 0xfff0000000000000
mov [rdx + 8 * 5], rax
mov rax, 0x7ff8000000000000
mov [rdx + 8 * 6], rax
mov rax, 0x7fefffffffffffff
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 2]
movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]

cvtpd2ps xmm0, xmm1
cvtpd2ps xmm2, xmm2
cvtpd2ps xmm3, xmm3

hlt


================================================
FILE: unittests/ASM/OpSize/66_5A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x428b029f42a63326", "0x0000000000000000"],
    "XMM1": ["0x4150f0e342241b6c", "0x0000000000000000"],
    "XMM2": ["0x41aff21340ab4706", "0x0000000000000000"],
    "XMM3": ["0x40aa5bea411ac802", "0x0000000000000000"],
    "XMM4": ["0x428500c641e83ad2", "0x0000000000000000"],
    "XMM5": ["0x42b6ba02419a760c", "0x0000000000000000"],
    "XMM6": ["0x424bd89b4221cdae", "0x0000000000000000"],
    "XMM7": ["0x41bfce514202945e", "0x0000000000000000"],
    "XMM8": ["0x41c1cdc342b5494c", "0x0000000000000000"],
    "XMM9": ["0x42b66f2e42c5e0f9", "0x0000000000000000"],
    "XMM10": ["0x42c6f7d842b59a55", "0x0000000000000000"],
    "XMM11": ["0x4294cbf84281f1e5", "0x0000000000000000"],
    "XMM12": ["0x41cad360420ce913", "0x0000000000000000"],
    "XMM13": ["0x42b4662d40bbf141", "0x0000000000000000"],
    "XMM14": ["0x42501e3a42042015", "0x0000000000000000"],
    "XMM15": ["0x4122ce1242698acb", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvtpd2ps xmm0, [rdx + 16 * 0]
cvtpd2ps xmm1, [rdx + 16 * 1]
cvtpd2ps xmm2, [rdx + 16 * 2]
cvtpd2ps xmm3, [rdx + 16 * 3]
cvtpd2ps xmm4, [rdx + 16 * 4]
cvtpd2ps xmm5, [rdx + 16 * 5]
cvtpd2ps xmm6, [rdx + 16 * 6]
cvtpd2ps xmm7, [rdx + 16 * 7]
cvtpd2ps xmm8, [rdx + 16 * 8]
cvtpd2ps xmm9, [rdx + 16 * 9]
cvtpd2ps xmm10, [rdx + 16 * 10]
cvtpd2ps xmm11, [rdx + 16 * 11]
cvtpd2ps xmm12, [rdx + 16 * 12]
cvtpd2ps xmm13, [rdx + 16 * 13]
cvtpd2ps xmm14, [rdx + 16 * 14]
cvtpd2ps xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/OpSize/66_5B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000100000001", "0x0000000200000002"],
    "XMM1":  ["0x0000000400000004", "0x0000000800000008"],
    "XMM2":  ["0x8000000000000000", "0x8000000080000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3fc000003f800000 ; [1.5, 1.0]
mov [rdx + 8 * 0], rax
mov rax, 0x4039999a40000000 ; [2.9, 2.0]
mov [rdx + 8 * 1], rax

mov rax, 0x4083333340800000 ; [4.1, 4.0]
mov [rdx + 8 * 2], rax
mov rax, 0x4108000041000000 ; [8.5, 8.0]
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x7fc000007f800000
mov [rdx + 8 * 6], rax
mov rax, 0xff800000ff7fffee
mov [rdx + 8 * 7], rax

; Set up MXCSR to truncate
mov eax, 0x7F80
mov [rdx + 8 * 6], eax
ldmxcsr [rdx + 8 * 6]

movapd xmm0, [rdx + 8 * 4]
movapd xmm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvtps2dq xmm0, xmm2
cvtps2dq xmm1, [rdx + 8 * 2]
cvtps2dq xmm2, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_5B_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000004600000053", "0x0000000d00000029"],
    "XMM1":  ["0x0000001600000005", "0x000000050000000a"],
    "XMM2":  ["0x000000430000001d", "0x0000005b00000013"],
    "XMM3":  ["0x0000003300000028", "0x0000001800000021"],
    "XMM4":  ["0x000000180000005b", "0x0000005b00000063"],
    "XMM5":  ["0x000000630000005b", "0x0000004a00000041"],
    "XMM6":  ["0x0000001900000023", "0x0000005a00000006"],
    "XMM7":  ["0x0000003400000021", "0x0000000a0000003a"],
    "XMM8":  ["0x0000005400000030", "0x000000420000005a"],
    "XMM9":  ["0x0000000700000060", "0x0000005f0000001a"],
    "XMM10": ["0x0000002500000058", "0x0000000a00000032"],
    "XMM11": ["0x000000140000004e", "0x000000290000000a"],
    "XMM12": ["0x0000003a0000000f", "0x000000380000000a"],
    "XMM13": ["0x0000000500000035", "0x0000000300000049"],
    "XMM14": ["0x0000004700000039", "0x000000590000003e"],
    "XMM15": ["0x0000001800000030", "0x0000006100000022"]
  }
}
%endif

lea rdx, [rel .data]

cvtps2dq xmm0, [rdx + 16 * 0]
cvtps2dq xmm1, [rdx + 16 * 1]
cvtps2dq xmm2, [rdx + 16 * 2]
cvtps2dq xmm3, [rdx + 16 * 3]
cvtps2dq xmm4, [rdx + 16 * 4]
cvtps2dq xmm5, [rdx + 16 * 5]
cvtps2dq xmm6, [rdx + 16 * 6]
cvtps2dq xmm7, [rdx + 16 * 7]
cvtps2dq xmm8, [rdx + 16 * 8]
cvtps2dq xmm9, [rdx + 16 * 9]
cvtps2dq xmm10, [rdx + 16 * 10]
cvtps2dq xmm11, [rdx + 16 * 11]
cvtps2dq xmm12, [rdx + 16 * 12]
cvtps2dq xmm13, [rdx + 16 * 13]
cvtps2dq xmm14, [rdx + 16 * 14]
cvtps2dq xmm15, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/OpSize/66_5C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM1": ["0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM2": ["0x4000000000000000", "0x4000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4008000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
subpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
subpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_5D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x4000000000000000"],
    "XMM1": ["0x3FF0000000000000", "0x4000000000000000"],
    "XMM2": ["0x3FF0000000000000", "0x4008000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4008000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
minpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
minpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_5E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3FE0000000000000", "0x3FE0000000000000"],
    "XMM1": ["0x3FE0000000000000", "0x3FE0000000000000"],
    "XMM2": ["0x4000000000000000", "0x4000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
divpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
divpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_5F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4000000000000000"],
    "XMM1": ["0x4008000000000000", "0x4000000000000000"],
    "XMM2": ["0x3FF0000000000000", "0x4000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4008000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
maxpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
maxpd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_60.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6545664667476848", "0x6141624263436444"],
    "XMM1": ["0x6545664667476848", "0x6141624263436444"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpcklbw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpcklbw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_61.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6566454667684748", "0x6162414263644344"],
    "XMM1": ["0x6566454667684748", "0x6162414263644344"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpcklwd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpcklwd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_62.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6566676845464748", "0x6162636441424344"],
    "XMM1": ["0x6566676845464748", "0x6162636441424344"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpckldq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpckldq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_63.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x00807F4100807F41", "0x00FF7F4100FF7F41"],
    "XMM1": ["0x00807F4100807F41", "0x00FF7F4100FF7F41"],
    "XMM2": ["0x0000FFFF007F0041", "0x0000FFFF007F0041"]
  }
}
%endif

mov rdx, 0xe0000000

; 16bit signed -> 8bit signed (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0x7F(SCHAR_MAX, 127)
; input < 0x80(-127) = 0x80

mov rax, 0x00008000007F0041
mov [rdx + 8 * 0], rax
mov rax, 0x00008000007F0041
mov [rdx + 8 * 1], rax

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 2], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
packsswb xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
packsswb xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM2": ["0x6162636465666768", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpgtb xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpgtb xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_65.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM2": ["0x6162636465666768", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpgtw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpgtw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_66.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM2": ["0x6162636465666768", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpgtd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpgtd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_67.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x00007F4100007F41", "0x00007F4100007F41"],
    "XMM1": ["0x00007F4100007F41", "0x00007F4100007F41"],
    "XMM2": ["0x0000FFFF007F0041", "0x0000FFFF007F0041"]
  }
}
%endif

mov rdx, 0xe0000000

; 16bit signed -> 8bit unsigned (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0xFF(UCHAR_MAX, 255)
; input < 0x00(Negative) = 0x0

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 0], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 1], rax

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 2], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
packuswb xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
packuswb xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_68.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7555765677577858", "0x7151725273537454"],
    "XMM1": ["0x7555765677577858", "0x7151725273537454"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpckhbw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpckhbw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_69.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7576555677785758", "0x7172515273745354"],
    "XMM1": ["0x7576555677785758", "0x7172515273745354"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpckhwd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpckhwd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_6A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7576777855565758", "0x7172737451525354"],
    "XMM1": ["0x7576777855565758", "0x7172737451525354"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpckhdq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpckhdq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_6B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x00000040FFFF8000", "0x00000040FFFF8000"],
    "XMM1": ["0x00000040FFFF8000", "0x00000040FFFF8000"],
    "XMM2": ["0xFFFFFFFF80000000", "0x0000000000000040"]
  }
}
%endif

mov rdx, 0xe0000000

; 32bit signed -> 16bit signed (saturated)
; input > 0x7FFF(SHRT_MAX, 32767) = 0x7FFF(SHRT_MAX, 32767)
; input < 0x8000(-32767) = 0x8000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov rax, 0x0000000000000040
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 2], rax
mov rax, 0x0000000000000040
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
packssdw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
packssdw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_6C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x6162636465666768"],
    "XMM1": ["0x4142434445464748", "0x6162636465666768"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpcklqdq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpcklqdq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_6D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x5152535455565758", "0x7172737475767778"],
    "XMM1": ["0x5152535455565758", "0x7172737475767778"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
punpckhqdq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
punpckhqdq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_6E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x45464748", "0x0"],
    "XMM1": ["0x5152535455565758", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movaps xmm2, [rdx + 8 * 2]
movaps xmm3, [rdx + 8 * 2]

movd xmm0, dword [rdx + 8 * 0]
; AMD's Architecture programmer's manual claims this mnemonic is still movd, but compilers only accept movq
movq xmm1, qword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/OpSize/66_6F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movdqa xmm0, [rdx + 8 * 0]

movapd xmm2, [rdx + 8 * 2]

movdqa xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_70.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x4546474845464748", "0x4546474845464748"],
    "XMM3": ["0x5152535451525354", "0x5152535451525354"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]
pshufd xmm2, xmm0, 0x0
pshufd xmm3, xmm0, 0xFF

hlt


================================================
FILE: unittests/ASM/OpSize/66_74.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x00000000000000FF", "0x00000000000000FF"],
    "XMM1": ["0x00000000000000FF", "0x00000000000000FF"],
    "XMM2": ["0x6162636465666778", "0x5152535455565748"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666778
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565748
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpeqb xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpeqb xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_75.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x000000000000FFFF", "0x000000000000FFFF"],
    "XMM1": ["0x000000000000FFFF", "0x000000000000FFFF"],
    "XMM2": ["0x6162636465667778", "0x5152535455564748"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465667778
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455564748
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpeqw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpeqw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_76.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM1": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM2": ["0x61626364FFFFFFFF", "0x51525354FFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x71727374FFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x41424344FFFFFFFF
mov [rdx + 8 * 1], rax

mov rax, 0x61626364FFFFFFFF
mov [rdx + 8 * 2], rax
mov rax, 0x51525354FFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pcmpeqd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pcmpeqd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_7C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4028000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4
mov [rdx + 8 * 2], rax
mov rax, 0x4020000000000000 ; 8
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]

haddpd xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/OpSize/66_7D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3ff0000000000000", "0x4010000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x3ff0000000000000; 1.0
mov [rdx + 8 * 1], rax

mov rax, 0x4020000000000000 ; 8
mov [rdx + 8 * 2], rax
mov rax, 0x4010000000000000 ; 4
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]

hsubpd xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/OpSize/66_7E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x65666768",
    "RBX": "0x6162636465666768",
    "RCX": "0x75767778",
    "RSI": "0x7172737475767778"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movups xmm2, [rdx + 8 * 2]
movups xmm3, [rdx + 8 * 3]

movd dword [rdx + 8 * 4], xmm2
; AMD's Architecture programmer's manual claims this mnemonic is still movd, but compilers only accept movq
movq qword [rdx + 8 * 5], xmm2

mov rax, [rdx + 8 * 4]
mov rbx, [rdx + 8 * 5]

movd ecx, xmm3
movq rsi, xmm3

hlt


================================================
FILE: unittests/ASM/OpSize/66_7F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0
mov [rdx + 8 * 2], rax
mov rax, 0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movdqa [rdx + 8 * 2], xmm0
movapd xmm1, [rdx + 8 * 2]

movdqa xmm2, xmm0

hlt


================================================
FILE: unittests/ASM/OpSize/66_C2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM1": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM4": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM6": ["0x0", "0x0"],
    "XMM7": ["0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM9": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

cmppd xmm0, xmm8, 0x00 ; EQ
cmppd xmm1, xmm8, 0x01 ; LT
cmppd xmm2, xmm8, 0x02 ; LTE
cmppd xmm4, xmm8, 0x04 ; NEQ
cmppd xmm5, xmm8, 0x05 ; NLT
cmppd xmm6, xmm8, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FF8000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x7FF8000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x0000000000000000
mov [rdx + 8 * 3], rax

movapd xmm3, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

; Unordered will return true when either input is nan
; [0.0, nan] unord [nan, 0.0] = [1, 1]
cmppd xmm3, xmm8, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [nan, 0.0] = [0, 0]
cmppd xmm7, xmm8, 0x07 ; Ordered

mov rax, 0x7FF8000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x0000000000000000
mov [rdx + 8 * 1], rax

movapd xmm8, [rdx + 8 * 0]
movapd xmm9, [rdx + 8 * 0]

; Ordered will return true when both inputs are NOT nan
; [nan, 0.0] ord [nan, 0.0] = [0, 1]
cmppd xmm8, xmm9, 0x07 ; Ordered

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FF8000000000000
mov [rdx + 8 * 1], rax

movapd xmm9, [rdx + 8 * 0]
movapd xmm10, [rdx + 8 * 0]

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [0.0, nan] = [1, 0]
cmppd xmm9, xmm10, 0x07 ; Ordered

hlt


================================================
FILE: unittests/ASM/OpSize/66_C4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445467778", "0x5152535455565758"],
    "XMM1":  ["0x4142434477784748", "0x5152535455565758"],
    "XMM2":  ["0x4142777845464748", "0x5152535455565758"],
    "XMM3":  ["0x7778434445464748", "0x5152535455565758"],
    "XMM4":  ["0x4142434445464748", "0x5152535455567778"],
    "XMM5":  ["0x4142434445464748", "0x5152535477785758"],
    "XMM6":  ["0x4142434445464748", "0x5152777855565758"],
    "XMM7":  ["0x4142434445464748", "0x7778535455565758"],
    "XMM8":  ["0x4142434445467778", "0x5152535455565758"],
    "XMM9":  ["0x4142434477784748", "0x5152535455565758"],
    "XMM10": ["0x4142777845464748", "0x5152535455565758"],
    "XMM11": ["0x7778434445464748", "0x5152535455565758"],
    "XMM12": ["0x4142434445464748", "0x5152535455567778"],
    "XMM13": ["0x4142434445464748", "0x5152535477785758"],
    "XMM14": ["0x4142434445464748", "0x5152777855565758"],
    "XMM15": ["0x4142434445464748", "0x7778535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7172737475767778
mov [rdx + 8 * 2], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]

movapd xmm8, [rdx + 8 * 0]
movapd xmm9, [rdx + 8 * 0]
movapd xmm10, [rdx + 8 * 0]
movapd xmm11, [rdx + 8 * 0]
movapd xmm12, [rdx + 8 * 0]
movapd xmm13, [rdx + 8 * 0]
movapd xmm14, [rdx + 8 * 0]
movapd xmm15, [rdx + 8 * 0]

pinsrw xmm0, rax, 0
pinsrw xmm1, rax, 1
pinsrw xmm2, rax, 2
pinsrw xmm3, rax, 3
pinsrw xmm4, rax, 4
pinsrw xmm5, rax, 5
pinsrw xmm6, rax, 6
pinsrw xmm7, rax, 7

pinsrw xmm8, [rdx + 8 * 2], 0
pinsrw xmm9, [rdx + 8 * 2], 1
pinsrw xmm10, [rdx + 8 * 2], 2
pinsrw xmm11, [rdx + 8 * 2], 3
pinsrw xmm12, [rdx + 8 * 2], 4
pinsrw xmm13, [rdx + 8 * 2], 5
pinsrw xmm14, [rdx + 8 * 2], 6
pinsrw xmm15, [rdx + 8 * 2], 7

hlt


================================================
FILE: unittests/ASM/OpSize/66_C4_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445467778", "0x5152535455565758"],
    "XMM1":  ["0x4142434477784748", "0x5152535455565758"],
    "XMM2":  ["0x4142777845464748", "0x5152535455565758"],
    "XMM3":  ["0x7778434445464748", "0x5152535455565758"],
    "XMM4":  ["0x4142434445464748", "0x5152535455567778"],
    "XMM5":  ["0x4142434445464748", "0x5152535477785758"],
    "XMM6":  ["0x4142434445464748", "0x5152777855565758"],
    "XMM7":  ["0x4142434445464748", "0x7778535455565758"],
    "XMM8":  ["0x4142434445467778", "0x5152535455565758"],
    "XMM9":  ["0x4142434477784748", "0x5152535455565758"],
    "XMM10": ["0x4142777845464748", "0x5152535455565758"],
    "XMM11": ["0x7778434445464748", "0x5152535455565758"],
    "XMM12": ["0x4142434445464748", "0x5152535455567778"],
    "XMM13": ["0x4142434445464748", "0x5152535477785758"],
    "XMM14": ["0x4142434445464748", "0x5152777855565758"],
    "XMM15": ["0x4142434445464748", "0x7778535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7172737475767778
mov [rdx + 8 * 2], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]

movapd xmm8, [rdx + 8 * 0]
movapd xmm9, [rdx + 8 * 0]
movapd xmm10, [rdx + 8 * 0]
movapd xmm11, [rdx + 8 * 0]
movapd xmm12, [rdx + 8 * 0]
movapd xmm13, [rdx + 8 * 0]
movapd xmm14, [rdx + 8 * 0]
movapd xmm15, [rdx + 8 * 0]

pinsrw xmm0, rax, 0
pinsrw xmm1, rax, 1
pinsrw xmm2, rax, 2
pinsrw xmm3, rax, 3
pinsrw xmm4, rax, 4
pinsrw xmm5, rax, 5
pinsrw xmm6, rax, 6
pinsrw xmm7, rax, 7

pinsrw xmm8, rax, 8
pinsrw xmm9, rax, 9
pinsrw xmm10, rax, 10
pinsrw xmm11, rax, 11
pinsrw xmm12, rax, 12
pinsrw xmm13, rax, 13
pinsrw xmm14, rax, 14
pinsrw xmm15, rax, 15

hlt


================================================
FILE: unittests/ASM/OpSize/66_C5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4748",
    "RBX": "0x4546",
    "RCX": "0x4344",
    "RDX": "0x4142",
    "RBP": "0x5758",
    "RSI": "0x5556",
    "RDI": "0x5354",
    "RSP": "0x5152"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movapd xmm0, [rdx + 8 * 0]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1

pextrw rax, xmm0, 0
pextrw rbx, xmm0, 1
pextrw rcx, xmm0, 2
pextrw rdx, xmm0, 3
pextrw rbp, xmm0, 4
pextrw rsi, xmm0, 5
pextrw rdi, xmm0, 6
pextrw rsp, xmm0, 7

hlt


================================================
FILE: unittests/ASM/OpSize/66_C5_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4748",
    "RBX": "0x4546",
    "RCX": "0x4344",
    "RDX": "0x4142",
    "RBP": "0x5758",
    "RSI": "0x5556",
    "RDI": "0x5354",
    "RSP": "0x5152",
    "R8":  "0x4748",
    "R9":  "0x4546",
    "R10": "0x4344",
    "R11": "0x4142",
    "R12": "0x5758",
    "R13": "0x5556",
    "R14": "0x5354",
    "R15": "0x5152"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movapd xmm0, [rdx + 8 * 0]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1

pextrw rax, xmm0, 0
pextrw rbx, xmm0, 1
pextrw rcx, xmm0, 2
pextrw rdx, xmm0, 3
pextrw rbp, xmm0, 4
pextrw rsi, xmm0, 5
pextrw rdi, xmm0, 6
pextrw rsp, xmm0, 7
pextrw r8, xmm0, 8
pextrw r9, xmm0, 9
pextrw r10, xmm0, 10
pextrw r11, xmm0, 11
pextrw r12, xmm0, 12
pextrw r13, xmm0, 13
pextrw r14, xmm0, 14
pextrw r15, xmm0, 15

hlt


================================================
FILE: unittests/ASM/OpSize/66_C6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x6162636465666768"],
    "XMM1":  ["0x5152535455565758", "0x6162636465666768"],
    "XMM2":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM3":  ["0x5152535455565758", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 2]

shufpd xmm0, xmm4, 0
shufpd xmm1, xmm4, 1
shufpd xmm2, xmm4, 2
shufpd xmm3, xmm4, 3

hlt


================================================
FILE: unittests/ASM/OpSize/66_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xbff0000000000000", "0x4008000000000000"],
    "XMM1": ["0xbff0000000000000", "0x4008000000000000"],
    "XMM2": ["0x3ff0000000000000", "0x4008000000000000"],
    "XMM3": ["0x3ff0000000000000", "0x4008000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 4], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 5], rax

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 6], rax
mov rax, 0x3FF0000000000000
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx]
addsubpd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
addsubpd xmm1, xmm2

movapd xmm2, [rdx + 8 * 4]
addsubpd xmm2, [rdx + 8 * 6]

movapd xmm3, [rdx + 8 * 4]
movapd xmm4, [rdx + 8 * 6]
addsubpd xmm3, xmm4

hlt


================================================
FILE: unittests/ASM/OpSize/66_D1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM1": ["0x0041004300450047", "0x0051005300550057"],
    "XMM2": ["0x0", "0x0"],
    "XMM3": ["0x0", "0x0"],
    "XMM4": ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

; Will Zero
mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

; Will Zero
mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]

psrlw xmm0, [rdx + 8 * 2]
psrlw xmm1, [rdx + 8 * 4]
psrlw xmm2, [rdx + 8 * 6]
psrlw xmm3, [rdx + 8 * 8]
psrlw xmm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM1": ["0x0041424300454647", "0x0051525300555657"],
    "XMM2": ["0x0000414200004546", "0x0000515200005556"],
    "XMM3": ["0x0", "0x0"],
    "XMM4": ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

; Will Zero
mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]

psrld xmm0, [rdx + 8 * 2]
psrld xmm1, [rdx + 8 * 4]
psrld xmm2, [rdx + 8 * 6]
psrld xmm3, [rdx + 8 * 8]
psrld xmm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM1": ["0x0041424344454647", "0x0051525354555657"],
    "XMM2": ["0x0000414243444546", "0x0000515253545556"],
    "XMM3": ["0x0000000041424344", "0x0000000051525354"],
    "XMM4": ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]

psrlq xmm0, [rdx + 8 * 2]
psrlq xmm1, [rdx + 8 * 4]
psrlq xmm2, [rdx + 8 * 6]
psrlq xmm3, [rdx + 8 * 8]
psrlq xmm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddq xmm0, xmm2
paddq xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940"],
    "XMM1":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmullw xmm0, xmm2
pmullw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x0"],
    "XMM1":  ["0x4142434445464748", "0x0"],
    "XMM2":  ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movapd xmm2, [rdx + 8 * 0]

; movq xmm0, xmm2
db 0x66, 0x0f, 0xd6, 11_010_000b
movq [rdx + 8 * 2], xmm2
movapd xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0",
    "RBX": "0x0",
    "RCX": "0xFFFF",
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x0", "0x0"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]

pmovmskb eax, xmm0
pmovmskb ebx, xmm1
pmovmskb ecx, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x2020202000000000", "0x0"],
    "XMM1":  ["0x2020202000000000", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

psubusb xmm0, xmm2
psubusb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_D9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x2020202000000000", "0x0"],
    "XMM1":  ["0x2020202000000000", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

psubusw xmm0, xmm2
psubusw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_DA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminub xmm0, xmm2
pminub xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_DB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1010101010101010", "0x0"],
    "XMM1": ["0x1010101010101010", "0x0"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pand xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pand xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_DC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

paddusb xmm0, xmm2
paddusb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_DD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

paddusw xmm0, xmm2
paddusw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_DE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxub xmm0, xmm2
pmaxub xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_DF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM1": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pandn xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pandn xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x2179b0697d5378c4", "0x3b8e6eae8c165248"],
    "XMM1":  ["0x1ed68638699d35ca", "0x5e2e7560ab7b5262"],
    "XMM2":  ["0x165c42291f28194c", "0x0923643c32130145"],
    "XMM3":  ["0x2179b0697d5378c4", "0x3b8e6eae8c165248"],
    "XMM4":  ["0x1ed68638699d35ca", "0x5e2e7560ab7b5262"],
    "XMM5":  ["0x165c42291f28194c", "0x0923643c32130145"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x2bb883523d4f3197
mov [rdx + 8 * 0], rax
mov rax, 0x1246c77764260189
mov [rdx + 8 * 1], rax

mov rax, 0x163add80bc57bef1
mov [rdx + 8 * 2], rax
mov rax, 0x64d615e5b405a306
mov [rdx + 8 * 3], rax

mov rax, 0x11f4881d94eb39fc
mov [rdx + 8 * 4], rax
mov rax, 0xa9162248f2d0a23a
mov [rdx + 8 * 5], rax

mov rax, 0x0
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

pavgb xmm0, xmm6
pavgb xmm1, xmm7
pavgb xmm2, xmm8

pavgb xmm3, [rdx + 8 * 2]
pavgb xmm4, [rdx + 8 * 4]
pavgb xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM1":  ["0x0041004300450047", "0x0071007300750077"],
    "XMM2":  ["0x0", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM4":  ["0x0041004300450047", "0x0071007300750077"],
    "XMM5":  ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

psraw xmm0, xmm6
psraw xmm1, xmm7
psraw xmm2, xmm8

psraw xmm3, [rdx + 8 * 2]
psraw xmm4, [rdx + 8 * 4]
psraw xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM1":  ["0x0000414200004546", "0x0000717200007576"],
    "XMM2":  ["0x0", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM4":  ["0x0000414200004546", "0x0000717200007576"],
    "XMM5":  ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x10
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x20
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

psrad xmm0, xmm6
psrad xmm1, xmm7
psrad xmm2, xmm8

psrad xmm3, [rdx + 8 * 2]
psrad xmm4, [rdx + 8 * 4]
psrad xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x20f9b0697cd37844", "0x3b8e6eae8c165248"],
    "XMM1":  ["0x1ed685b8691d35ca", "0x5dae74e0ab7b51e2"],
    "XMM2":  ["0x15dc41a91ea818cc", "0x092363bc321300c5"],
    "XMM3":  ["0x20f9b0697cd37844", "0x3b8e6eae8c165248"],
    "XMM4":  ["0x1ed685b8691d35ca", "0x5dae74e0ab7b51e2"],
    "XMM5":  ["0x15dc41a91ea818cc", "0x092363bc321300c5"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x2bb883523d4f3197
mov [rdx + 8 * 0], rax
mov rax, 0x1246c77764260189
mov [rdx + 8 * 1], rax

mov rax, 0x163add80bc57bef1
mov [rdx + 8 * 2], rax
mov rax, 0x64d615e5b405a306
mov [rdx + 8 * 3], rax

mov rax, 0x11f4881d94eb39fc
mov [rdx + 8 * 4], rax
mov rax, 0xa9162248f2d0a23a
mov [rdx + 8 * 5], rax

mov rax, 0x0
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

pavgw xmm0, xmm6
pavgw xmm1, xmm7
pavgw xmm2, xmm8

pavgw xmm3, [rdx + 8 * 2]
pavgw xmm4, [rdx + 8 * 4]
pavgw xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x18D21A1D1B701CCA", "0x24092594272728C2"],
    "XMM1":  ["0x18D21A1D1B701CCA", "0x24092594272728C2"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmulhuw xmm0, xmm2
pmulhuw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2"],
    "XMM1":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445468748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmulhw xmm0, xmm2
pmulhw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000200000001", "0x0"],
    "XMM1":  ["0xFFFFFFFEFFFFFFFF", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0xbff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0xc000000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx + 8 * 4]
movapd xmm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvttpd2dq xmm0, xmm2
cvttpd2dq xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E6_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000004500000053", "0x0000000000000000"],
    "XMM1": ["0x0000000d00000029", "0x0000000000000000"],
    "XMM2": ["0x0000001500000005", "0x0000000000000000"],
    "XMM3": ["0x0000000500000009", "0x0000000000000000"],
    "XMM4": ["0x000000420000001d", "0x0000000000000000"],
    "XMM5": ["0x0000005b00000013", "0x0000000000000000"],
    "XMM6": ["0x0000003200000028", "0x0000000000000000"],
    "XMM7": ["0x0000001700000020", "0x0000000000000000"],
    "XMM8": ["0x000000180000005a", "0x0000000000000000"],
    "XMM9": ["0x0000005b00000062", "0x0000000000000000"],
    "XMM10": ["0x000000630000005a", "0x0000000000000000"],
    "XMM11": ["0x0000004a00000040", "0x0000000000000000"],
    "XMM12": ["0x0000001900000023", "0x0000000000000000"],
    "XMM13": ["0x0000005a00000005", "0x0000000000000000"],
    "XMM14": ["0x0000003400000021", "0x0000000000000000"],
    "XMM15": ["0x0000000a0000003a", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvttpd2dq xmm0, [rdx + 16 * 0]
cvttpd2dq xmm1, [rdx + 16 * 1]
cvttpd2dq xmm2, [rdx + 16 * 2]
cvttpd2dq xmm3, [rdx + 16 * 3]
cvttpd2dq xmm4, [rdx + 16 * 4]
cvttpd2dq xmm5, [rdx + 16 * 5]
cvttpd2dq xmm6, [rdx + 16 * 6]
cvttpd2dq xmm7, [rdx + 16 * 7]
cvttpd2dq xmm8, [rdx + 16 * 8]
cvttpd2dq xmm9, [rdx + 16 * 9]
cvttpd2dq xmm10, [rdx + 16 * 10]
cvttpd2dq xmm11, [rdx + 16 * 11]
cvttpd2dq xmm12, [rdx + 16 * 12]
cvttpd2dq xmm13, [rdx + 16 * 13]
cvttpd2dq xmm14, [rdx + 16 * 14]
cvttpd2dq xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/OpSize/66_E7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movntdq [rdx + 8 * 4], xmm1
movaps xmm0, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0"],
    "XMM1":  ["0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

psubsb xmm0, xmm2
psubsb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0"],
    "XMM1":  ["0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 2]

psubsw xmm0, xmm2
psubsw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_EA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x5152535455565758"],
    "XMM1":  ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pminsw xmm0, xmm2
pminsw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_EB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1111111111111111", "0x2222222222222222"],
    "XMM1": ["0x1111111111111111", "0x2222222222222222"],
    "XMM2": ["0x0101010101010101", "0x0202020202020202"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x1010101010101010
mov [rdx + 8 * 0], rax
mov rax, 0x2020202020202020
mov [rdx + 8 * 1], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0x0202020202020202
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
por xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
por xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_EC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F"],
    "XMM1":  ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddsb xmm0, xmm2
paddsb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_ED.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"],
    "XMM1":  ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddsw xmm0, xmm2
paddsw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_EE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x7172737475767778"],
    "XMM1":  ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

pmaxsw xmm0, xmm2
pmaxsw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_EF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2424242424242424", "0x2424242424242424"],
    "XMM1": ["0x2424242424242424", "0x2424242424242424"],
    "XMM2": ["0x1818181818181818", "0x1818181818181818"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 0], rax
mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 1], rax

mov rax, 0x1818181818181818
mov [rdx + 8 * 2], rax
mov rax, 0x1818181818181818
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pxor xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pxor xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x828486888A8C8E90", "0xE2E4E6E8EAECEEF0"],
    "XMM1":  ["0x4200440046004800", "0x7200740076007800"],
    "XMM2":  ["0x0", "0x0"],
    "XMM3":  ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 0]
movaps xmm2, [rdx + 8 * 0]
movaps xmm3, [rdx + 8 * 0]

psllw xmm0, [rdx + 8 * 2]
psllw xmm1, [rdx + 8 * 4]
psllw xmm2, [rdx + 8 * 6]
psllw xmm3, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/OpSize/66_F2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM1":  ["0x4344000047480000", "0x7374000077780000"],
    "XMM2":  ["0x0", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM4":  ["0x4344000047480000", "0x7374000077780000"],
    "XMM5":  ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x10
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x20
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

pslld xmm0, xmm6
pslld xmm1, xmm7
pslld xmm2, xmm8

pslld xmm3, [rdx + 8 * 2]
pslld xmm4, [rdx + 8 * 4]
pslld xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_F3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM1":  ["0x4546474800000000", "0x7576777800000000"],
    "XMM2":  ["0x0", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x7172737475767778"],
    "XMM4":  ["0x4546474800000000", "0x7576777800000000"],
    "XMM5":  ["0x0", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x20
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x40
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

movapd xmm6, [rdx + 8 * 2]
movapd xmm7, [rdx + 8 * 4]
movapd xmm8, [rdx + 8 * 6]

psllq xmm0, xmm6
psllq xmm1, xmm7
psllq xmm2, xmm8

psllq xmm3, [rdx + 8 * 2]
psllq xmm4, [rdx + 8 * 4]
psllq xmm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/OpSize/66_F4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x000000000003FFFC", "0x000000000000FFFE"],
    "XMM1": ["0x000000000003FFFC", "0x000000000000FFFE"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243440000FFFF
mov [rdx + 8 * 0], rax
mov rax, 0x5152535400007FFF
mov [rdx + 8 * 1], rax

mov rax, 0x6162636400000004
mov [rdx + 8 * 2], rax
mov rax, 0x7172737400000002
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pmuludq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
pmuludq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_F5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov eax, dword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movaps xmm0, [rdx + 8 * 0]

pmaddwd xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_F6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x100", "0x100"],
    "XMM1": ["0xC5", "0x88"],
    "XMM2": ["0x66", "0x5A"],
    "XMM3": ["0x6B", "0x68"],
    "XMM4": ["0x60", "0x68"],
    "XMM5": ["0x65", "0x65"],
    "XMM6": ["0x33", "0x57"],
    "XMM7": ["0x38", "0x5C"],
    "XMM8": ["0x3B", "0x6D"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 2]

psadbw xmm0, [rdx + 8 * 0]

lea rdx, [rel .data]

movaps xmm1, [rdx + 16 * 0]
movaps xmm2, [rdx + 16 * 1]
movaps xmm3, [rdx + 16 * 2]
movaps xmm4, [rdx + 16 * 3]
movaps xmm5, [rdx + 16 * 4]
movaps xmm6, [rdx + 16 * 5]
movaps xmm7, [rdx + 16 * 6]
movaps xmm8, [rdx + 16 * 7]

psadbw xmm1, [rdx + 16 * 8]
psadbw xmm2, [rdx + 16 * 9]
psadbw xmm3, [rdx + 16 * 10]
psadbw xmm4, [rdx + 16 * 11]
psadbw xmm5, [rdx + 16 * 12]
psadbw xmm6, [rdx + 16 * 13]
psadbw xmm7, [rdx + 16 * 14]
psadbw xmm8, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
db '\xe0\xfc\x2b\xa1\x06\x4f\x6c\xa7\x0f\x06\x6a\x1e\x7f\x76\x80\x9b'
db '\xe0\x56\xed\xaa\xf3\xc3\x68\x68\xde\xe6\xe6\x94\xe2\xe9\xfc\xf0'
db '\x6e\x35\xa8\x54\xd7\xab\x8b\x6c\x77\x5f\x92\xca\x25\xa6\x7e\x27'
db '\xc7\xcd\x73\xec\x95\xd6\x6f\x6a\xbb\xae\xf2\xbb\x27\xb9\xa1\xdd'
db '\x73\x4d\xd1\xc7\xd5\x2c\x31\x88\xfe\xe7\xdb\xfd\x1e\x1e\x09\x7f'
db '\x14\xfa\x4e\x95\xef\xe6\x9a\xf2\xa0\x42\x62\x9a\xa4\xa8\x73\x82'
db '\x0e\x0f\x16\x82\x38\x07\x12\x32\x07\x35\x92\xc1\x63\x07\x78\xb3'
db '\xcb\x46\x19\x57\x2b\x37\x2a\x46\x1f\x04\x0e\x79\x3d\xcd\x8d\xa3'
db '\x2b\xf3\x86\x2f\xab\xba\x57\x30\x2e\xd6\x2c\xf0\x46\x4f\x3f\xef'
db '\xef\xd1\xbb\x85\x34\x4b\x3c\xde\x9e\x48\xa3\xb9\x8d\x71\xe3\x9d'
db '\x09\x72\xfb\xde\x8a\x32\x50\x9d\x69\x98\xf1\xf6\x52\xeb\xf7\xee'
db '\xd6\x99\xc2\xff\x30\x1c\x02\xce\x70\x05\xb2\xf1\x56\x9c\x0e\xa6'
db '\x18\x62\xc4\xe2\x86\x38\x76\x30\x2f\xa1\xe4\xa7\x0e\x5d\x53\xeb'
db '\x14\x45\xe0\xb7\xe1\xe8\x02\x68\x1a\xfe\x8e\xc1\x8f\xf2\xeb\x46'
db '\x7f\x5d\x6a\x23\x46\x97\x2e\x03\x98\x12\x32\x8f\x54\x76\x59\xac'
db '\xc8\x76\x5f\xc8\x71\x0c\xd3\xb6\xc5\x19\xea\xab\xa6\x2c\x1d\x88'


================================================
FILE: unittests/ASM/OpSize/66_F7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x41424344FFFFFFFF",
    "RSP": "0x51525354FFFFFFFF",
    "RSI": "0xFFFFFFFFFFFFFFFF",
    "RDI": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x8080808080808080
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

mov rax, 0x8080808000000000
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, 0
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

mov rax, -1
mov [rdx + 8 * 8], rax
mov [rdx + 8 * 9], rax

mov [rdx + 8 * 10], rax
mov [rdx + 8 * 11], rax

mov [rdx + 8 * 12], rax
mov [rdx + 8 * 13], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movaps xmm2, [rdx + 8 * 4]
movaps xmm3, [rdx + 8 * 6]

lea rdi, [rdx + 8 * 8]
maskmovdqu xmm0, xmm1

lea rdi, [rdx + 8 * 10]
maskmovdqu xmm0, xmm2

lea rdi, [rdx + 8 * 12]
maskmovdqu xmm0, xmm3

mov rax, qword [rdx + 8 * 8]
mov rbx, qword [rdx + 8 * 9]

mov rcx, qword [rdx + 8 * 10]
mov rsp, qword [rdx + 8 * 11]

mov rsi, qword [rdx + 8 * 12]
mov rdi, qword [rdx + 8 * 13]

hlt


================================================
FILE: unittests/ASM/OpSize/66_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2020202020202020", "0x2020202020202020"],
    "XMM1": ["0x2020202020202020", "0x2020202020202020"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
psubb xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
psubb xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_F9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2020202020202020", "0x2020202020202020"],
    "XMM1": ["0x2020202020202020", "0x2020202020202020"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
psubw xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
psubw xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_FA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2020202020202020", "0x2020202020202020"],
    "XMM1": ["0x2020202020202020", "0x2020202020202020"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
psubd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
psubd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_FB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2020202020202020", "0x2020202020202020"],
    "XMM1": ["0x2020202020202020", "0x2020202020202020"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
psubq xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
psubq xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/OpSize/66_FC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddb xmm0, xmm2
paddb xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_FD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddw xmm0, xmm2
paddw xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/OpSize/66_FE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM1":  ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

paddd xmm0, xmm2
paddd xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Pause.asm
================================================
%ifdef CONFIG
{
}
%endif

; Set rcx to an absurd number just incase something terrible occurs since pause = `rep nop`
mov rcx, -1

; Just ensure execution.
pause

hlt


================================================
FILE: unittests/ASM/Primary/Primary_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0xD9",
    "RCX": "0x67E9",
    "RDX": "0x656667F9",
    "RBP": "0x6162636465666809",
    "RDI": "0x81",
    "RSP": "0x8081",
    "R8":  "0x80808081",
    "R9":  "0x0",
    "R10": "0x4142441546174719",
    "R11": "0x5152535455565829",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
add byte  [r15 + 8 * 0 + 0], al
add word  [r15 + 8 * 0 + 2], ax
add dword [r15 + 8 * 0 + 4], eax
add qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

add bl,  byte  [r15 + 8 * 2]
add cx,  word  [r15 + 8 * 2]
add edx, dword [r15 + 8 * 2]
add rbp, qword [r15 + 8 * 2]

mov rax, 0x01
add al, 0x80
mov rdi, rax

mov rax, 0x01
add ax, 0x8080
mov rsp, rax

mov rax, 0x01
add eax, 0x80808080
mov r8, rax

mov rax, 0x01
add rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_01_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464848",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock add word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock add word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock add word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock add word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock add word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_01_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock add dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock add dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock add dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock add dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_01_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434445464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock add qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock add qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock add qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_08.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x79",
    "RCX": "0x67E9",
    "RDX": "0x656667F9",
    "RBP": "0x61626364656667E9",
    "RDI": "0x81",
    "RSP": "0x8081",
    "R8":  "0x80808081",
    "R9":  "0xFFFFFFFFFFFFFFFF",
    "R10": "0x414243D545D747D9",
    "R11": "0x51525354555657D9",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
or byte  [r15 + 8 * 0 + 0], al
or word  [r15 + 8 * 0 + 2], ax
or dword [r15 + 8 * 0 + 4], eax
or qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

or bl,  byte  [r15 + 8 * 2]
or cx,  word  [r15 + 8 * 2]
or edx, dword [r15 + 8 * 2]
or rbp, qword [r15 + 8 * 2]

mov rax, 0x01
or al, 0x80
mov rdi, rax

mov rax, 0x01
or ax, 0x8080
mov rsp, rax

mov rax, 0x01
or eax, 0x80808080
mov r8, rax

mov rax, 0x01
or rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_09_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4142434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock or word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock or word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock or word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock or word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock or word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_09_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4142434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock or dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock or dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock or dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock or dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_09_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4142434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock or qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock or qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock or qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0xD9",
    "RCX": "0x67E9",
    "RDX": "0x656667F9",
    "RBP": "0x6162636465666809",
    "RDI": "0x81",
    "RSP": "0x8081",
    "R8":  "0x80808081",
    "R9":  "0x0",
    "R10": "0x4142441546174719",
    "R11": "0x5152535455565829",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
clc
adc byte  [r15 + 8 * 0 + 0], al
clc
adc word  [r15 + 8 * 0 + 2], ax
clc
adc dword [r15 + 8 * 0 + 4], eax
clc
adc qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

clc
adc bl,  byte  [r15 + 8 * 2]
clc
adc cx,  word  [r15 + 8 * 2]
clc
adc edx, dword [r15 + 8 * 2]
clc
adc rbp, qword [r15 + 8 * 2]

mov rax, 0x01
clc
adc al, 0x80
mov rdi, rax

mov rax, 0x01
clc
adc ax, 0x8080
mov rsp, rax

mov rax, 0x01
clc
adc eax, 0x80808080
mov r8, rax

mov rax, 0x01
clc
adc rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_10_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0xDA",
    "RCX": "0x67EA",
    "RDX": "0x656667FA",
    "RBP": "0x616263646566680A",
    "RDI": "0x82",
    "RSP": "0x8082",
    "R8":  "0x80808082",
    "R9":  "0x1",
    "R10": "0x414244164618471A",
    "R11": "0x515253545556582A",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
stc
adc byte  [r15 + 8 * 0 + 0], al
stc
adc word  [r15 + 8 * 0 + 2], ax
stc
adc dword [r15 + 8 * 0 + 4], eax
stc
adc qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

stc
adc bl,  byte  [r15 + 8 * 2]
stc
adc cx,  word  [r15 + 8 * 2]
stc
adc edx, dword [r15 + 8 * 2]
stc
adc rbp, qword [r15 + 8 * 2]

mov rax, 0x01
stc
adc al, 0x80
mov rdi, rax

mov rax, 0x01
stc
adc ax, 0x8080
mov rsp, rax

mov rax, 0x01
stc
adc eax, 0x80808080
mov r8, rax

mov rax, 0x01
stc
adc rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_10_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0xD9",
    "RCX": "0x67E9",
    "RDX": "0x656667F9",
    "RBP": "0x6162636465666809",
    "RDI": "0x81",
    "RSP": "0x8081",
    "R8":  "0x80808081",
    "R9":  "0x0",
    "R10": "0x4142441546174719",
    "R11": "0x5152535455565829",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
clc
lock adc byte  [r15 + 8 * 0 + 0], al
clc
lock adc word  [r15 + 8 * 0 + 2], ax
clc
lock adc dword [r15 + 8 * 0 + 4], eax
clc
lock adc qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

clc
adc bl,  byte  [r15 + 8 * 2]
clc
adc cx,  word  [r15 + 8 * 2]
clc
adc edx, dword [r15 + 8 * 2]
clc
adc rbp, qword [r15 + 8 * 2]

mov rax, 0x01
clc
adc al, 0x80
mov rdi, rax

mov rax, 0x01
clc
adc ax, 0x8080
mov rsp, rax

mov rax, 0x01
clc
adc eax, 0x80808080
mov r8, rax

mov rax, 0x01
clc
adc rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_10_4.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RBX": "0xDA",
    "RCX": "0x67EA",
    "RDX": "0x656667FA",
    "RBP": "0x616263646566680A",
    "RDI": "0x82",
    "RSP": "0x8082",
    "R8":  "0x80808082",
    "R9":  "0x1",
    "R10": "0x414244164618471A",
    "R11": "0x515253545556582A",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
stc
lock adc byte  [r15 + 8 * 0 + 0], al
stc
lock adc word  [r15 + 8 * 0 + 2], ax
stc
lock adc dword [r15 + 8 * 0 + 4], eax
stc
lock adc qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

stc
adc bl,  byte  [r15 + 8 * 2]
stc
adc cx,  word  [r15 + 8 * 2]
stc
adc edx, dword [r15 + 8 * 2]
stc
adc rbp, qword [r15 + 8 * 2]

mov rax, 0x01
stc
adc al, 0x80
mov rdi, rax

mov rax, 0x01
stc
adc ax, 0x8080
mov rsp, rax

mov rax, 0x01
stc
adc eax, 0x80808080
mov r8, rax

mov rax, 0x01
stc
adc rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_18.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x09",
    "RCX": "0x9919",
    "RDX": "0x9A999929",
    "RBP": "0x9E9D9C9B9A999939",
    "RDI": "0x81",
    "RSP": "0x7F81",
    "R8":  "0x7F7F7F81",
    "R9":  "0x02",
    "R10": "0x4142427344754777",
    "R11": "0x5152535455565687",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
clc
sbb byte  [r15 + 8 * 0 + 0], al
clc
sbb word  [r15 + 8 * 0 + 2], ax
clc
sbb dword [r15 + 8 * 0 + 4], eax
clc
sbb qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

clc
sbb bl,  byte  [r15 + 8 * 2]
clc
sbb cx,  word  [r15 + 8 * 2]
clc
sbb edx, dword [r15 + 8 * 2]
clc
sbb rbp, qword [r15 + 8 * 2]

mov rax, 0x01
clc
sbb al, 0x80
mov rdi, rax

mov rax, 0x01
clc
sbb ax, 0x8080
mov rsp, rax

mov rax, 0x01
clc
sbb eax, 0x80808080
mov r8, rax

mov rax, 0x01
clc
sbb rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_18_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x08",
    "RCX": "0x9918",
    "RDX": "0x9A999928",
    "RBP": "0x9E9D9C9B9A999938",
    "RDI": "0x80",
    "RSP": "0x7F80",
    "R8":  "0x7F7F7F80",
    "R9":  "0x01",
    "R10": "0x4142427244744776",
    "R11": "0x5152535455565686",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
stc
sbb byte  [r15 + 8 * 0 + 0], al
stc
sbb word  [r15 + 8 * 0 + 2], ax
stc
sbb dword [r15 + 8 * 0 + 4], eax
stc
sbb qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

stc
sbb bl,  byte  [r15 + 8 * 2]
stc
sbb cx,  word  [r15 + 8 * 2]
stc
sbb edx, dword [r15 + 8 * 2]
stc
sbb rbp, qword [r15 + 8 * 2]

mov rax, 0x01
stc
sbb al, 0x80
mov rdi, rax

mov rax, 0x01
stc
sbb ax, 0x8080
mov rsp, rax

mov rax, 0x01
stc
sbb eax, 0x80808080
mov r8, rax

mov rax, 0x01
stc
sbb rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_18_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x09",
    "RCX": "0x9919",
    "RDX": "0x9A999929",
    "RBP": "0x9E9D9C9B9A999939",
    "RDI": "0x81",
    "RSP": "0x7F81",
    "R8":  "0x7F7F7F81",
    "R9":  "0x02",
    "R10": "0x4142427344754777",
    "R11": "0x5152535455565687",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
clc
lock sbb byte  [r15 + 8 * 0 + 0], al
clc
lock sbb word  [r15 + 8 * 0 + 2], ax
clc
lock sbb dword [r15 + 8 * 0 + 4], eax
clc
lock sbb qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

clc
sbb bl,  byte  [r15 + 8 * 2]
clc
sbb cx,  word  [r15 + 8 * 2]
clc
sbb edx, dword [r15 + 8 * 2]
clc
sbb rbp, qword [r15 + 8 * 2]

mov rax, 0x01
clc
sbb al, 0x80
mov rdi, rax

mov rax, 0x01
clc
sbb ax, 0x8080
mov rsp, rax

mov rax, 0x01
clc
sbb eax, 0x80808080
mov r8, rax

mov rax, 0x01
clc
sbb rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_18_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x08",
    "RCX": "0x9918",
    "RDX": "0x9A999928",
    "RBP": "0x9E9D9C9B9A999938",
    "RDI": "0x80",
    "RSP": "0x7F80",
    "R8":  "0x7F7F7F80",
    "R9":  "0x01",
    "R10": "0x4142427244744776",
    "R11": "0x5152535455565686",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
stc
lock sbb byte  [r15 + 8 * 0 + 0], al
stc
lock sbb word  [r15 + 8 * 0 + 2], ax
stc
lock sbb dword [r15 + 8 * 0 + 4], eax
stc
lock sbb qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

stc
sbb bl,  byte  [r15 + 8 * 2]
stc
sbb cx,  word  [r15 + 8 * 2]
stc
sbb edx, dword [r15 + 8 * 2]
stc
sbb rbp, qword [r15 + 8 * 2]

mov rax, 0x01
stc
sbb al, 0x80
mov rdi, rax

mov rax, 0x01
stc
sbb ax, 0x8080
mov rsp, rax

mov rax, 0x01
stc
sbb eax, 0x80808080
mov r8, rax

mov rax, 0x01
stc
sbb rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_20.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x60",
    "RCX": "0x00",
    "RDX": "0x00",
    "RBP": "0x20",
    "RDI": "0x00",
    "RSP": "0x00",
    "R8":  "0x00",
    "R9":  "0x01",
    "R10": "0x0000004000404740",
    "R11": "0x50",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
and byte  [r15 + 8 * 0 + 0], al
and word  [r15 + 8 * 0 + 2], ax
and dword [r15 + 8 * 0 + 4], eax
and qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

and bl,  byte  [r15 + 8 * 2]
and cx,  word  [r15 + 8 * 2]
and edx, dword [r15 + 8 * 2]
and rbp, qword [r15 + 8 * 2]

mov rax, 0x01
and al, 0x80
mov rdi, rax

mov rax, 0x01
and ax, 0x8080
mov rsp, rax

mov rax, 0x01
and eax, 0x80808080
mov r8, rax

mov rax, 0x01
and rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_23_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0142430001000148",
    "RBX": "0x0142434445464700",
    "RCX": "0x4142434445464700",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4142434445464700"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock and word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock and word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock and word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock and word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock and word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_23_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0100000001464748",
    "RBX": "0x0142434445000000",
    "RCX": "0x4142434445000000",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4142434445000000"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock and dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock and dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock and dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock and dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_23_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0142434445464748",
    "RBX": "0x0100000000000000",
    "RCX": "0x4100000000000000",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4100000000000000"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock and qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock and qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock and qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_28.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x09",
    "RCX": "0x9919",
    "RDX": "0x9A999929",
    "RBP": "0x9E9D9C9B9A999939",
    "RDI": "0x81",
    "RSP": "0x7F81",
    "R8":  "0x7F7F7F81",
    "R9":  "0x02",
    "R10": "0x4142427344754777",
    "R11": "0x5152535455565687",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
sub byte  [r15 + 8 * 0 + 0], al
sub word  [r15 + 8 * 0 + 2], ax
sub dword [r15 + 8 * 0 + 4], eax
sub qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

sub bl,  byte  [r15 + 8 * 2]
sub cx,  word  [r15 + 8 * 2]
sub edx, dword [r15 + 8 * 2]
sub rbp, qword [r15 + 8 * 2]

mov rax, 0x01
sub al, 0x80
mov rdi, rax

mov rax, 0x01
sub ax, 0x8080
mov rsp, rax

mov rax, 0x01
sub eax, 0x80808080
mov r8, rax

mov rax, 0x01
sub rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_29_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464648",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock sub word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock sub word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock sub word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock sub word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock sub word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_29_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock sub dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock sub dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock sub dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock sub dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_29_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434445464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock sub qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock sub qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock sub qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_30.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x19",
    "RCX": "0x67E9",
    "RDX": "0x656667F9",
    "RBP": "0x61626364656667C9",
    "RDI": "0x81",
    "RSP": "0x8081",
    "R8":  "0x80808081",
    "R9":  "0xFFFFFFFFFFFFFFFE",
    "R10": "0x4142439545974799",
    "R11": "0x5152535455565789",
    "R12": "0x6162636465666768"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0xD1
xor byte  [r15 + 8 * 0 + 0], al
xor word  [r15 + 8 * 0 + 2], ax
xor dword [r15 + 8 * 0 + 4], eax
xor qword [r15 + 8 * 1 + 0], rax

mov rbx, 0x71
mov rcx, 0x81
mov rdx, 0x91
mov rbp, 0xA1

xor bl,  byte  [r15 + 8 * 2]
xor cx,  word  [r15 + 8 * 2]
xor edx, dword [r15 + 8 * 2]
xor rbp, qword [r15 + 8 * 2]

mov rax, 0x01
xor al, 0x80
mov rdi, rax

mov rax, 0x01
xor ax, 0x8080
mov rsp, rax

mov rax, 0x01
xor eax, 0x80808080
mov r8, rax

mov rax, 0x01
xor rax, -1
mov r9, rax

mov r10, [r15 + 8 * 0]
mov r11, [r15 + 8 * 1]
mov r12, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_31_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464648",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock xor word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock xor word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock xor word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock xor word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock xor word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_31_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock xor dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock xor dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock xor dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock xor dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_31_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434445464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock xor qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock xor qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock xor qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_38.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x61
cmp byte [rdx + 8 * 0 + 1], al
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, [rdx + 8 * 0]
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_39.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8700",
    "RBX": "0x8300",
    "RCX": "0x0200",
    "RSI": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

mov rax, -256
cmp qword [rdx + 8 * 3 + 0], rax
; cmp = 0x6162636465666768 - -256(0xFFFFFFFFFFFF00) = 0x6162636465666512
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rsi, rax

mov rax, 0x61626364
cmp qword [rdx + 8 * 2 + 0], rax
; cmp = 0x6162636465666768- 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rcx, rax

mov rax, 0x61626364
cmp dword [rdx + 8 * 1 + 0], eax
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rbx, rax

mov rax, 0x6162
cmp word [rdx + 8 * 0 + 2], ax
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_3A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x47
cmp al, byte [rdx + 8 * 0 + 1]
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, [rdx + 8 * 0]
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_3B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8700",
    "RBX": "0x8300",
    "RCX": "0x0200",
    "RSI": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x61620000
mov [rdx + 8 * 0], rax
mov rax, 0x61626364
mov [rdx + 8 * 1], rax
mov rax, 0x61626364
mov [rdx + 8 * 2], rax
mov rax, -256
mov [rdx + 8 * 3], rax

mov rax, 0x6162636465666768
cmp rax, qword [rdx + 8 * 3 + 0]
; cmp = 0x6162636465666768 - -256(0xFFFFFFFFFFFF00) = 0x6162636465666512
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rsi, rax

mov rax, 0x6162636465666768
cmp rax, qword [rdx + 8 * 2 + 0]
; cmp = 0x6162636465666768 - 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rcx, rax

mov rax, 0x5152535455565758
cmp eax, dword [rdx + 8 * 1 + 0]
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rbx, rax

mov rax, 0x0000414243444546
cmp ax, word [rdx + 8 * 0 + 2]
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_3C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0041424344454647
cmp al, 0x61
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, [rdx + 8 * 0]
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_3D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8700",
    "RBX": "0x8300",
    "RCX": "0x0200",
    "RSI": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

mov rax, 0x6162636465666768
cmp rax, -256
; cmp = 0x6162636465666768 - -256(0xFFFFFFFFFFFF00) = 0x6162636465666512
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rsi, rax

mov rax, 0x6162636465666768
cmp rax, 0x61626364
; cmp = 0x6162636465666768- 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rcx, rax

mov rax, 0x5152535455565758
cmp eax, 0x61626364
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rbx, rax

mov rax, 0x0000414243444546
cmp ax, 0x6162
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_50.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4",
    "RBP": "0x5",
    "RSI": "0x6",
    "RDI": "0x7",
    "R15": "0x8"
  }
}
%endif

mov rsp, 0xe0000080

mov rax, 1
mov rbx, 2
mov rcx, 3
mov rdx, 4
mov rbp, 5
mov rsi, 6
mov rdi, 7
mov r15, 8

push r15 ; Sub for rsp
push rdi
push rsi
push rbp
push rdx
push rcx
push rbx
push rax

mov rax, 0
mov rbx, 0
mov rcx, 0
mov rdx, 0
mov rbp, 0
mov rsi, 0
mov rdi, 0
mov r15, 0

pop rax
pop rbx
pop rcx
pop rdx
pop rbp
pop rsi
pop rdi
pop r15

hlt


================================================
FILE: unittests/ASM/Primary/Primary_50_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4",
    "RBP": "0x5",
    "RSI": "0x6",
    "RDI": "0x7",
    "R15": "0x8"
  }
}
%endif

mov rsp, 0xe0000080

mov rax, 1
mov rbx, 2
mov rcx, 3
mov rdx, 4
mov rbp, 5
mov rsi, 6
mov rdi, 7
mov r15, 8

push r15w ; Sub for rsp
push di
push si
push bp
push dx
push cx
push bx
push ax

mov rax, 0
mov rbx, 0
mov rcx, 0
mov rdx, 0
mov rbp, 0
mov rsi, 0
mov rdi, 0
mov r15, 0

pop ax
pop bx
pop cx
pop dx
pop bp
pop si
pop di
pop r15w

hlt


================================================
FILE: unittests/ASM/Primary/Primary_63.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFF81828384",
    "RBX": "0x0000000071727374"
  }
}
%endif

mov rax, 0x81828384
mov rbx, 0x71727374
movsxd rax, eax
movsxd rbx, ebx

hlt


================================================
FILE: unittests/ASM/Primary/Primary_63_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xffffffff81828384",
    "RBX": "0xffffffff81828384",
    "RCX": "0x0000000081828384",
    "RDX": "0x4142434445468384"
  }
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748
mov rdx, 0x4142434445464748
mov rsp, 0x6666666681828384

; Default: 0x48, 0x63, 0xc4
movsxd rax, esp
; Default with o16 prefix: 0x66, 0x48, 0x63, 0xc4
; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o64`
db 0x66
movsxd rbx, esp
; No-rex widening prefix
db 0x63, 0xcc ; movsxd ecx, esp
; o16 prefix with no-rex widening
db 0x66, 0x63, 0xd4 ; movsxd dx, sp

hlt


================================================
FILE: unittests/ASM/Primary/Primary_68.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFBEBDBCBB",
    "RBX": "0x51526162",
    "RSP": "0xE0000014"
  }
}
%endif

mov rsp, 0xe0000020

push qword -0x41424345
push word 0x5152
push word 0x6162

mov rdx, 0xe0000020
mov rax, [rdx - 8]
mov ebx, [rdx - 12]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_69.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xB800",
    "RBX": "0xA9A8A800",
    "RSI": "0x9D9C9B9A99989800"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0x0
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax

imul ax, word [r15 + 8 * 0 + 0], -256
mov word [r15 + 8 * 3 + 0], ax

imul eax, dword [r15 + 8 * 1 + 0], -256
mov dword [r15 + 8 * 4 + 0], eax

imul rax, qword [r15 + 8 * 2 + 0], -256
mov rsi, rax

mov rax, [r15 + 8 * 3]
mov rbx, [r15 + 8 * 4]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_6A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFF81",
    "RSP": "0xE0000018"
  }
}
%endif

mov rsp, 0xe0000020

push -127
mov rdx, 0xe0000020
mov rax, [rdx - 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_6A_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000000FF81",
    "RSP": "0xE0000018"
  }
}
%endif

mov rsp, 0xe0000020

push word 0
push word 0
push word 0
push word -127

mov rdx, 0xe0000020
mov rax, [rdx - 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_6B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5C00",
    "RBX": "0x54D45400",
    "RSI": "0x4ECE4DCD4CCC4C00"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0x0
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax

imul ax, word [r15 + 8 * 0 + 0], -128
mov word [r15 + 8 * 3 + 0], ax

imul eax, dword [r15 + 8 * 1 + 0], -128
mov dword [r15 + 8 * 4 + 0], eax

imul rax, qword [r15 + 8 * 2 + 0], -128
mov rsi, rax

mov rax, [r15 + 8 * 3]
mov rbx, [r15 + 8 * 4]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_84.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0600"
  }
}
%endif

mov rax, 0x4142434445464847
mov rbx, 0x61
test al, bl

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_84_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0"
  }
}
%endif

mov rdx, 0xe0000000
mov dword [rdx], 0xFFFFFF00

mov     r11d, dword[rdx]
test    r11b, r11b
jnz     notzero

mov rax, 0x0
hlt

notzero:
mov rax, 0x1
hlt

================================================
FILE: unittests/ASM/Primary/Primary_85.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0200",
    "RBX": "0x0600",
    "RCX": "0x0600"
  }
}
%endif

mov rax, 0x6162636465666768
mov rdx, 0x71727374
test rax, rdx
; test = 0x6162636465666768 & 0x71727374 = 0x61626360
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rcx, rax
and rcx, 0xffffffffffffefff

mov rax, 0x5152535455565758
mov rdx, 0x71727374
test eax, edx
; test = 0x55565758 & 0x71727374 = 0x51525350
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax
and rbx, 0xffffffffffffefff

mov rax, 0x4142434445464748
mov rdx, 0x7172
test ax, dx
; test = 0x4748 & 0x7172 = 0x4140
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
and rax, 0xffffffffffffefff

hlt


================================================
FILE: unittests/ASM/Primary/Primary_86.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFF48",
    "RBX": "0x41424344454647FF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, -1
xchg byte [rdx + 8 * 0], al
mov rbx, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF4748",
    "RBX": "0x414243444546FFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, -1
xchg word [rdx + 8 * 0], ax
mov rbx, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000045464748",
    "RBX": "0x41424344FFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, -1
xchg dword [rdx + 8 * 0], eax
mov rbx, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, -1
xchg qword [rdx + 8 * 0], rax
mov rbx, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0142430001000148",
    "RBX": "0x0142434445464700",
    "RCX": "0x4142434445464700",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4142434445464700",
    "R14": "0x0000000000004647",
    "R13": "0x0000000000004445",
    "R12": "0x0000000000004841",
    "R11": "0x0000000000004841",
    "R10": "0x0000000000004841"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 1 byte offset within 4byte boundary
mov rax, 1
xchg word [r15 + 8 * 0 + 1], ax
mov r14, rax

; Test 3 byte offset across 4byte boundary
mov rax, 1
xchg word [r15 + 8 * 0 + 3], ax
mov r13, rax

; Test 7 byte offset across 8byte boundary
mov rax, 1
xchg word [r15 + 8 * 0 + 7], ax
mov r12, rax

; Test 15 byte offset across 16byte boundary
mov rax, 1
xchg word [r15 + 8 * 0 + 15], ax
mov r11, rax

; Test 63 byte offset across cacheline boundary
mov rax, 1
xchg word [r15 + 8 * 0 + 63], ax
mov r10, rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0100000001464748",
    "RBX": "0x0142434445000000",
    "RCX": "0x4142434445000000",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4142434445000000",
    "R13": "0x0000000042434445",
    "R12": "0x0000000046474841",
    "R11": "0x0000000046474841",
    "R10": "0x0000000046474841"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 3 byte offset across 4byte boundary
mov rax, 1
xchg dword [r15 + 8 * 0 + 3], eax
mov r13, rax

; Test 7 byte offset across 8byte boundary
mov rax, 1
xchg dword [r15 + 8 * 0 + 7], eax
mov r12, rax

; Test 15 byte offset across 16byte boundary
mov rax, 1
xchg dword [r15 + 8 * 0 + 15], eax
mov r11, rax

; Test 63 byte offset across cacheline boundary
mov rax, 1
xchg dword [r15 + 8 * 0 + 63], eax
mov r10, rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_87_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0142434445464748",
    "RBX": "0x0100000000000000",
    "RCX": "0x4100000000000000",
    "RDX": "0x4142434445464748",
    "RSI": "0x0142434445464748",
    "RDI": "0x4100000000000000",
    "R13": "0x4243444546474841",
    "R12": "0x4243444546474841",
    "R11": "0x4243444546474841"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 7 byte offset across 8byte boundary
mov rax, 1
xchg qword [r15 + 8 * 0 + 7], rax
mov r13, rax

; Test 15 byte offset across 16byte boundary
mov rax, 1
xchg qword [r15 + 8 * 0 + 15], rax
mov r12, rax

; Test 63 byte offset across cacheline boundary
mov rax, 1
xchg qword [r15 + 8 * 0 + 63], rax
mov r11, rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_8C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142",
    "RBX": "0x4143",
    "RCX": "0x4144"
  }
}
%endif
; This relies on some behaviour that isn't guaranteed in 64bit mode

; Technically this can result in an invalid selector which can cause faults
; We currently don't do any selector validation to enforce this
mov rax, 0x4142

mov es, ax

inc rax
mov ss, ax

inc rax
mov ds, ax

; Can't test FS/GS here
; Behaviour is ill-defined and needs to be worked through

mov rax, 0
mov rbx, 0
mov rcx, 0

mov ax, es
mov bx, ss
mov cx, ds

hlt


================================================
FILE: unittests/ASM/Primary/Primary_8C_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142",
    "RBX": "0x4143",
    "RCX": "0x4144"
  }
}
%endif
; This relies on some behaviour that isn't guaranteed in 64bit mode

; Technically this can result in an invalid selector which can cause faults
; We currently don't do any selector validation to enforce this
mov rax, 0x4142

db 0x44 ; REX.R
mov es, ax

inc rax

db 0x44 ; REX.R
mov ss, ax

inc rax

db 0x44 ; REX.R
mov ds, ax

; Can't test FS/GS here
; Behaviour is ill-defined and needs to be worked through

mov rax, 0
mov rbx, 0
mov rcx, 0

db 0x44 ; REX.R
mov ax, es

db 0x44 ; REX.R
mov bx, ss

db 0x44 ; REX.R
mov cx, ds

hlt


================================================
FILE: unittests/ASM/Primary/Primary_8D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4142434445464748",
    "R14": "0x929496989A9C9EA0",
    "R13": "0x0000000045464748",
    "R12": "0x000000009A9C9EA0",
    "R11": "0x828486888A8C8E90",
    "R10": "0x565B60656A6F7478",
    "R9":  "0x41424344454647A9",
    "R8":  "0x92949698FBFF0204",
    "RSI": "0xFFFFFFFFFFFF0204"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov rdx, 0x5152535455565758

; SIB gives us `scale * index + base + offset`
; scale = constant {1, 2, 4, 8}
; Index = <Reg>
; Base = <Reg>
; Offset = Constant {imm8, imm32}
lea r15, [rax]
lea r14, [rax + rdx]

lea r13d, [eax]
lea r12d, [eax + edx]

lea r11, [2 * rax]
lea r10, [4 * rax + rdx]

lea r9, [rax + 0x61]
lea r8, [rax + rdx + 0x61626364]

mov rsi, -1
lea si, [rax + rdx + 0x61626364]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_8D_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000FFFFFFFF",
    "RBX": "0x00000000FFFFFFFF",
    "RCX": "0x414243444546FFFF",
    "RDX": "0x414243444546FFFF",
    "RDI": "0x0000000000000001",
    "RSI": "0x0000000000000001",
    "RBP": "0x0",
    "RSP": "0x0"
  }
}
%endif

mov rax, -1
mov rbx, -1

lea rax, [ebx]

mov rbx, -1
mov rcx, -1

lea ebx, [ecx]

mov rcx, 0x4142434445464748
mov rdx, -1

lea cx, [edx]

mov rdx, 0x4142434445464748
mov rdi, -1

lea dx, [rdi]

mov rdi, 0x4142434445464748
mov rsi, 0xFFFFFFFF00000000
mov rbp, 1

lea rdi, [esi + ebp]

mov rsi, 0x4142434445464748
mov rbp, 0xFFFFFFFF00000000
mov rsp, 1

lea esi, [rbp + rsp]

mov rbp, 0x4142434445464748
mov rsp, 0xFFFFFFFF00000000
mov r9,  0x0000000200000000

lea ebp, [esp + r9d]

mov rsp, 0x4142434445464748
mov r9,  0xFFFFFFFF00000000
mov r10, 0x0000000200000000

lea rsp, [r10d + r9d]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_90.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x42424242",
    "RBX": "0x42424242",
    "RCX": "0x00000000FFFFFFFF",
    "RDX": "0x42424242",
    "RBP": "0x00000000FFFFFFFF",
    "RSI": "0x42424242",
    "RDI": "0x00000000FFFFFFFF",
    "RSP": "0x42424242",
    "R8":  "0x00000000FFFFFFFF",
    "R9":  "0x42424242",
    "R10": "0x00000000FFFFFFFF",
    "R11": "0x42424242",
    "R12": "0x00000000FFFFFFFF",
    "R13": "0x42424242",
    "R14": "0x00000000FFFFFFFF",
    "R15": "0x42424242"
  }
}
%endif

%macro swap32 2
mov %1, -1
mov eax, 0x42424242
xchg %1, eax

mov dword [r15 + 16 * %2 + 0], eax
mov dword [r15 + 16 * %2 + 8], %1

%endmacro

mov r15, 0xe0000000
mov rax, 0

mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax
mov [r15 + 8 * 10], rax
mov [r15 + 8 * 11], rax
mov [r15 + 8 * 12], rax
mov [r15 + 8 * 13], rax
mov [r15 + 8 * 14], rax
mov [r15 + 8 * 15], rax

swap32 eax, 0
swap32 ebx, 1
swap32 ecx, 2
swap32 edx, 3
swap32 ebp, 4
swap32 esi, 5
swap32 edi, 6
swap32 esp, 7
swap32 r8d, 8

mov rax, [r15 + 16 * 0 + 0]
mov rbx, [r15 + 16 * 0 + 8]
mov rcx, [r15 + 16 * 1 + 0]
mov rdx, [r15 + 16 * 1 + 8]
mov rbp, [r15 + 16 * 2 + 0]
mov rsi, [r15 + 16 * 2 + 8]
mov rdi, [r15 + 16 * 3 + 0]
mov rsp, [r15 + 16 * 3 + 8]
mov r8,  [r15 + 16 * 4 + 0]
mov r9,  [r15 + 16 * 4 + 8]
mov r10, [r15 + 16 * 5 + 0]
mov r11, [r15 + 16 * 5 + 8]
mov r12, [r15 + 16 * 6 + 0]
mov r13, [r15 + 16 * 6 + 8]
mov r14, [r15 + 16 * 7 + 0]
mov r15, [r15 + 16 * 7 + 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_90_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000FFFFFFFF",
    "RBX": "0x42424242",
    "RCX": "0x00000000FFFFFFFF",
    "RDX": "0x42424242",
    "RBP": "0x00000000FFFFFFFF",
    "RSI": "0x42424242",
    "RDI": "0x00000000FFFFFFFF",
    "RSP": "0x42424242",
    "R8":  "0x00000000FFFFFFFF",
    "R9":  "0x42424242",
    "R10": "0x00000000FFFFFFFF",
    "R11": "0x42424242",
    "R12": "0x00000000FFFFFFFF",
    "R13": "0x42424242",
    "R14": "0x00000000FFFFFFFF",
    "R15": "0x42424242"
  }
}
%endif

%macro swap32 2
mov %1, -1
mov eax, 0x42424242
xchg %1, eax

mov dword [r15 + 16 * %2 + 0], eax
mov dword [r15 + 16 * %2 + 8], %1

%endmacro

mov r15, 0xe0000000
mov rax, 0

mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax
mov [r15 + 8 * 10], rax
mov [r15 + 8 * 11], rax
mov [r15 + 8 * 12], rax
mov [r15 + 8 * 13], rax
mov [r15 + 8 * 14], rax
mov [r15 + 8 * 15], rax

swap32 r9d, 0
swap32 r10d, 1
swap32 r11d, 2
swap32 r12d, 3
swap32 r13d, 4
swap32 r14d, 5
swap32 r14d, 6
swap32 esp, 7

mov rax, [r15 + 16 * 0 + 0]
mov rbx, [r15 + 16 * 0 + 8]
mov rcx, [r15 + 16 * 1 + 0]
mov rdx, [r15 + 16 * 1 + 8]
mov rbp, [r15 + 16 * 2 + 0]
mov rsi, [r15 + 16 * 2 + 8]
mov rdi, [r15 + 16 * 3 + 0]
mov rsp, [r15 + 16 * 3 + 8]
mov r8,  [r15 + 16 * 4 + 0]
mov r9,  [r15 + 16 * 4 + 8]
mov r10, [r15 + 16 * 5 + 0]
mov r11, [r15 + 16 * 5 + 8]
mov r12, [r15 + 16 * 6 + 0]
mov r13, [r15 + 16 * 6 + 8]
mov r14, [r15 + 16 * 7 + 0]
mov r15, [r15 + 16 * 7 + 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_90_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242424242424242",
    "RBX": "0x4242424242424242",
    "RCX": "0xFFFFFFFFFFFFFFFF",
    "RDX": "0x4242424242424242",
    "RBP": "0xFFFFFFFFFFFFFFFF",
    "RSI": "0x4242424242424242",
    "RDI": "0xFFFFFFFFFFFFFFFF",
    "RSP": "0x4242424242424242",
    "R8":  "0xFFFFFFFFFFFFFFFF",
    "R9":  "0x4242424242424242",
    "R10": "0xFFFFFFFFFFFFFFFF",
    "R11": "0x4242424242424242",
    "R12": "0xFFFFFFFFFFFFFFFF",
    "R13": "0x4242424242424242",
    "R14": "0xFFFFFFFFFFFFFFFF",
    "R15": "0x4242424242424242"
  }
}
%endif

%macro swap64 2
mov %1, -1
mov rax, 0x4242424242424242
xchg %1, rax

mov qword [r15 + 16 * %2 + 0], rax
mov qword [r15 + 16 * %2 + 8], %1

%endmacro

mov r15, 0xe0000000
mov rax, 0

mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax
mov [r15 + 8 * 10], rax
mov [r15 + 8 * 11], rax
mov [r15 + 8 * 12], rax
mov [r15 + 8 * 13], rax
mov [r15 + 8 * 14], rax
mov [r15 + 8 * 15], rax

swap64 rax, 0
swap64 rbx, 1
swap64 rcx, 2
swap64 rdx, 3
swap64 rbp, 4
swap64 rsi, 5
swap64 rdi, 6
swap64 rsp, 7
swap64 r8, 8

mov rax, [r15 + 16 * 0 + 0]
mov rbx, [r15 + 16 * 0 + 8]
mov rcx, [r15 + 16 * 1 + 0]
mov rdx, [r15 + 16 * 1 + 8]
mov rbp, [r15 + 16 * 2 + 0]
mov rsi, [r15 + 16 * 2 + 8]
mov rdi, [r15 + 16 * 3 + 0]
mov rsp, [r15 + 16 * 3 + 8]
mov r8,  [r15 + 16 * 4 + 0]
mov r9,  [r15 + 16 * 4 + 8]
mov r10, [r15 + 16 * 5 + 0]
mov r11, [r15 + 16 * 5 + 8]
mov r12, [r15 + 16 * 6 + 0]
mov r13, [r15 + 16 * 6 + 8]
mov r14, [r15 + 16 * 7 + 0]
mov r15, [r15 + 16 * 7 + 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_90_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFFFF",
    "RBX": "0x4242424242424242",
    "RCX": "0xFFFFFFFFFFFFFFFF",
    "RDX": "0x4242424242424242",
    "RBP": "0xFFFFFFFFFFFFFFFF",
    "RSI": "0x4242424242424242",
    "RDI": "0xFFFFFFFFFFFFFFFF",
    "RSP": "0x4242424242424242",
    "R8":  "0xFFFFFFFFFFFFFFFF",
    "R9":  "0x4242424242424242",
    "R10": "0xFFFFFFFFFFFFFFFF",
    "R11": "0x4242424242424242",
    "R12": "0xFFFFFFFFFFFFFFFF",
    "R13": "0x4242424242424242",
    "R14": "0xFFFFFFFFFFFFFFFF",
    "R15": "0x4242424242424242"
  }
}
%endif

%macro swap64 2
mov %1, -1
mov rax, 0x4242424242424242
xchg %1, rax

mov qword [r15 + 16 * %2 + 0], rax
mov qword [r15 + 16 * %2 + 8], %1

%endmacro

mov r15, 0xe0000000
mov rax, 0

mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax
mov [r15 + 8 * 10], rax
mov [r15 + 8 * 11], rax
mov [r15 + 8 * 12], rax
mov [r15 + 8 * 13], rax
mov [r15 + 8 * 14], rax
mov [r15 + 8 * 15], rax

swap64 r9, 0
swap64 r10, 1
swap64 r11, 2
swap64 r12, 3
swap64 r13, 4
swap64 r14, 5
swap64 r14, 6
swap64 rsp, 7

mov rax, [r15 + 16 * 0 + 0]
mov rbx, [r15 + 16 * 0 + 8]
mov rcx, [r15 + 16 * 1 + 0]
mov rdx, [r15 + 16 * 1 + 8]
mov rbp, [r15 + 16 * 2 + 0]
mov rsi, [r15 + 16 * 2 + 8]
mov rdi, [r15 + 16 * 3 + 0]
mov rsp, [r15 + 16 * 3 + 8]
mov r8,  [r15 + 16 * 4 + 0]
mov r9,  [r15 + 16 * 4 + 8]
mov r10, [r15 + 16 * 5 + 0]
mov r11, [r15 + 16 * 5 + 8]
mov r12, [r15 + 16 * 6 + 0]
mov r13, [r15 + 16 * 6 + 8]
mov r14, [r15 + 16 * 7 + 0]
mov r15, [r15 + 16 * 7 + 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_98.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFFF0"
  }
}
%endif

mov al, 0xF0
cbw
cwde
cdqe

hlt


================================================
FILE: unittests/ASM/Primary/Primary_98_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R10": "0xFFFFFFFF80000001",
    "R11": "0x00000000FFFF8001",
    "R12": "0x414243444546FF81",
    "R13": "0x0000000000000001",
    "R14": "0x0000000000000001",
    "R15": "0x4142434445460001"
  }
}
%endif

; Positive 8bit
mov rax, 0x4142434445464701
cbw
mov r15, rax

; Positive 16bit
mov rax, 0x4142434445460001
cwde
mov r14, rax

; Positive 32bit
mov rax, 0x4142434400000001
cdqe
mov r13, rax

; Negative 8bit
mov rax, 0x4142434445464781
cbw
mov r12, rax

; Negative 16bit
mov rax, 0x4142434445468001
cwde
mov r11, rax

; Negative 32bit
mov rax, 0x4142434480000001
cdqe
mov r10, rax

hlt


================================================
FILE: unittests/ASM/Primary/Primary_99.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFFF0",
    "RDX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rax, 0
mov rdx, 0

mov ax, 0xFFF0
cwd

shl edx, 16
or eax, edx
cdq

shl rdx, 32
or rax, rdx
cqo

hlt


================================================
FILE: unittests/ASM/Primary/Primary_99_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBP": "0xFFFFFFFFFFFFFFFF",
    "RSI": "0xFFFFFFFFFFFFFFFF",

    "RDI": "0xFFFFFFFF",
    "RSP": "0xFFFFFFFF",

    "R8": "0xFFFF",
    "R9": "0xFFFF",

    "R10": "0x01",
    "R11": "0x0",

    "R12": "0x01",
    "R13": "0x0",

    "R14": "0x01",
    "R15": "0xFFFFFFFFFFFF0000"
  }
}
%endif

; Positive 16bit
mov rax, 1
mov rdx, -1
cwd

mov r14, rax
mov r15, rdx

; Positive 32bit

mov rax, 1
mov rdx, -1
cdq

mov r12, rax
mov r13, rdx

; Positive 64bit

mov rax, 1
mov rdx, -1
cqo

mov r10, rax
mov r11, rdx

; Negative 16bit
mov rax, 0xFFFF
mov rdx, 0
cwd

mov r8, rax
mov r9, rdx

; Negative 32bit
mov rax, 0xFFFFFFFF
mov rdx, 0
cdq

mov rdi, rax
mov rsp, rdx

; Negative 64bit
mov rax, -1
mov rdx, 0
cqo

mov rbp, rax
mov rsi, rdx


hlt


================================================
FILE: unittests/ASM/Primary/Primary_9B.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure this executes
fwait

hlt


================================================
FILE: unittests/ASM/Primary/Primary_9C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x202",
    "RBX": "0x202"
  }
}
%endif

mov rsp, 0xe0000010

; Setup to default state
mov rax, 0
push rax
popfq

; These pushes will end up being the default rflags initialization value
pushfq

; nasm doesn't encode 16bit pushf
; put the prefix before the instruction manually
db 0x66
pushfq

mov rax, 0x0
mov rbx, 0x0

mov ax, word [rsp]
mov rbx, qword [rsp + 2]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_9D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x202",
    "RBX": "0x202"
  }
}
%endif

mov rax, 0x0
mov rbx, 0x0
mov rsp, 0xe0000010

; Setup to default state
mov rax, 0
push rax
popfq

; These pushes will end up being the default rflags initialization value
pushfq

cmp rax, 1
popfq
pushfq

mov rax, qword [rsp]

; These pushes will end up being the default rflags initialization value
db 0x66
pushfq

cmp rax, 1
db 0x66
popfq
db 0x66
pushfq

mov bx, word [rsp]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_9E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFD7FF"
  }
}
%endif

; Set EFLAGS to known value with sahf
mov rax, -1
sahf

; Now load back
mov rax, -1
lahf
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00010000
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 10000000
; ================
;         11010111
; OF: LAHF doesn't load - 0

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDX": "0x41",
    "RAX": "0x42"
  }
}
%endif

mov rdx, 0xe0000008
mov rax, 0x41
mov [rdx], rax

mov rdx, 0xe0000000
mov rax, 0x42
mov [rdx], rax

mov rax, -1
; mov rax, [0xe0000008]
db 0x48
db 0xA1
dq 0x00000000e0000008
mov rdx, rax

mov rax, -1
; mov eax, [0xe0000000]
db 0x67
db 0xA1
dd 0xe0000000

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41",
    "RDX": "0x42"
  }
}
%endif

mov rax, 0x41
; mov [0xe0000008], rax
db 0x48
db 0xA3
dq 0x00000000e0000008

mov rax, 0x42
; mov [0xe0000000], eax
db 0x67
db 0xA3
dd 0xe0000000

mov rdx, 0xe0000008
mov rax, [rdx]

mov rdx, 0xe0000000
mov edx, [rdx]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
movsb ; rdi <- rsi
movsb
movsb
movsb

movsb
movsb
movsb
movsb

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REPNE_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000F",
    "RSI": "0xDFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 7]
lea rsi, [rdx + 8 * 0 + 7]

std
mov rcx, 8
repne movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REPNE_many.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
repne movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REP_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000F",
    "RSI": "0xDFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 7]
lea rsi, [rdx + 8 * 0 + 7]

std
mov rcx, 8
rep movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REP_Down_Overlapping.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152535455565758",
    "RDX": "0x5858585858585858",
    "RDI": "0xDFFFFFFF",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx], rax
mov rax, 0x5152535455565758
mov [rdx + 8], rax

; Deliberately overlapping source and destination
lea rdi, [rdx + 7]
lea rsi, [rdx + 8]

std
mov rcx, 8
rep movsb ; rdi <- rsi

mov rdx, [rdx]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REP_Overlapping.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152535455565758",
    "RDX": "0x5152535455565748",
    "RDI": "0xE0000009",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx], rax
mov rax, 0x5152535455565758
mov [rdx + 8], rax

; Deliberately overlapping source and destination
lea rdi, [rdx + 1]
lea rsi, [rdx]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

mov rdx, [rdx + 8]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A4_REP_many.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 8
rep movsb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
movsw ; rdi <- rsi
movsw

movsw
movsw

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 4
rep movsw ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 4
repne movsw ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_REPNE_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000E",
    "RSI": "0xDFFFFFFE"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 6]
lea rsi, [rdx + 8 * 0 + 6]

std
mov rcx, 4
repne movsw ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_REP_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000E",
    "RSI": "0xDFFFFFFE"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 6]
lea rsi, [rdx + 8 * 0 + 6]

std
mov rcx, 4
rep movsw ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
movsd ; rdi <- rsi
movsd

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_dword_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 2
rep movsd ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_dword_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE0000018",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 2
repne movsd ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_dword_REPNE_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000C",
    "RSI": "0xDFFFFFFC"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 4]
lea rsi, [rdx + 8 * 0 + 4]

std
mov rcx, 2
repne movsd ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_dword_REP_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x0",
    "RDI": "0xE000000C",
    "RSI": "0xDFFFFFFC"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 4]
lea rsi, [rdx + 8 * 0 + 4]

std
mov rcx, 2
rep movsd ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x0",
    "RDI": "0xE0000020",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
movsq ; rdi <- rsi
movsq

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]
mov rcx, [rdx + 8 * 4]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_qword_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x0",
    "RDI": "0xE0000020",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 2
rep movsq ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]
mov rcx, [rdx + 8 * 4]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_qword_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x0",
    "RDI": "0xE0000020",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 2]
lea rsi, [rdx + 8 * 0]

cld
mov rcx, 2
repne movsq ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]
mov rcx, [rdx + 8 * 4]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_qword_REPNE_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x0",
    "RDI": "0xE0000008",
    "RSI": "0xDFFFFFF8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 3]
lea rsi, [rdx + 8 * 1]

std
mov rcx, 2
repne movsq ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]
mov rcx, [rdx + 8 * 4]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A5_qword_REP_Down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x0",
    "RDI": "0xE0000008",
    "RSI": "0xDFFFFFF8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 3]
lea rsi, [rdx + 8 * 1]

std
mov rcx, 2
rep movsq ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]
mov rcx, [rdx + 8 * 4]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8300",
    "RDI": "0xE0000009",
    "RSI": "0xE0000001"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x47
mov [rdx + 8 * 0], rax
mov rax, 0x61
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

cld
cmpsb ; rdi cmp rsi
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0200",
    "RCX": "0x5",
    "RDX": "0x0",
    "RDI": "0xE0000005",
    "RSI": "0xE0000015"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 14

lea rdi, [rdx + 8 * 0]
lea rsi, [rdx + 8 * 2]

cld
mov rcx, 10 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestUnmatched\0"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x3",
    "RDX": "0x1",
    "RDI": "0xE0000007",
    "RSI": "0xE0000017"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 11

lea rdi, [rdx + 8 * 0]
lea rsi, [rdx + 8 * 2]

cld
mov rcx, 10 ; Lower String length
repne cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "StringTest\0"
.StringTwo: db "UnmatcTest\0"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REPNE_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xE000000A",
    "RSI": "0xE000001A"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  repne movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 11

lea rdi, [rdx + 8 * 0]
lea rsi, [rdx + 8 * 2]

cld
mov rcx, 10 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestString\0"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xE000000A",
    "RSI": "0xE000001A"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 11

lea rdi, [rdx + 8 * 0]
lea rsi, [rdx + 8 * 2]

cld
mov rcx, 10 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "TestString\0"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP_Smaller.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0600",
    "RCX": "0x5",
    "RDX": "0x0",
    "RDI": "0xE0000005",
    "RSI": "0xE0000015"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 14

lea rdi, [rdx + 8 * 0]
lea rsi, [rdx + 8 * 2]

cld
mov rcx, 10 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "TestString\0"
.StringTwo: db "Test\0"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP_addrmod.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI": "0x0000000010000104",
    "RDI": "0x0000000010000204",
    "RCX": "0x0000000000000000"
  },
  "MemoryRegions": {
    "0x10000000": "4096"
  }
}
%endif

; Checks REP CMPS operation with 0x67 prefix.
; This test ensures that 32-bit address size override works correctly with REP prefix in 64-bit mode

; Source data at 0x10000100
mov rsi, 0x10000100
mov dword [rsi], 0x41424344      ; 'ABCD'

; Destination data at 0x10000200
mov rdi, 0x10000200
mov dword [rdi], 0x41424344      ; same as source

; Set initial RSI/RDI values with high bits set
; Low 32 bits (ESI/EDI) must point to valid memory
mov rsi, 0x5152535410000100
mov rdi, 0x6162636410000200

; Set RCX to number of bytes to compare
mov rcx, 4

; This should make the instruction use ESI/EDI (32-bit) instead of RSI/RDI (64-bit)
; Per x86-64 architecture, writing to 32-bit registers zeros the upper 32 bits
db 0x67
rep cmpsb
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8300",
    "RCX": "0x9",
    "RDX": "0x0",
    "RDI": "0xE000000C",
    "RSI": "0xE000001C"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 14

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 14

lea rdi, [rdx + 8 * 0 + 13]
lea rsi, [rdx + 8 * 2 + 13]

std
mov rcx, 10 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "\0\0\0\0TestString"
.StringTwo: db "\0TestUnmatched"


================================================
FILE: unittests/ASM/Primary/Primary_A6_REP_down_Equal.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4600",
    "RCX": "0x0",
    "RDX": "0x1",
    "RDI": "0xDFFFFFFF",
    "RSI": "0xE000000F"
  }
}
%endif

%macro copy 3
  ; Dest, Src, Size
  mov rdi, %1
  mov rsi, %2
  mov rcx, %3

  cld
  rep movsb
%endmacro

mov rdx, 0xe0000000

lea r15, [rdx + 8 * 0]
lea r14, [rel .StringOne]
copy r15, r14, 11

lea r15, [rdx + 8 * 2]
lea r14, [rel .StringTwo]
copy r15, r14, 11

lea rdi, [rdx + 8 * 0 + 10]
lea rsi, [rdx + 8 * 2 + 10]

std
mov rcx, 11 ; Lower String length
repe cmpsb ; rdi cmp rsi
mov rax, 0
lahf

mov rdx, 0
sete dl

hlt

.StringOne: db "\0TestString"
.StringTwo: db "\0TestString"


================================================
FILE: unittests/ASM/Primary/Primary_A6_addrmod.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI": "0x0000000010000101",
    "RDI": "0x0000000010000201"
  },
  "MemoryRegions": {
    "0x10000000": "4096"
  }
}
%endif

; Check CMPS* operations with 0x67 prefix.
; This test ensures that 32-bit address size override works correctly in 64-bit mode

; Set up source data at 0x10000100
mov rsi, 0x10000100
mov byte [rsi], 0x41      ; 'A'

; Set up destination data at 0x10000200
mov rdi, 0x10000200
mov byte [rdi], 0x41      ; same as source

; Set initial RSI/RDI values with high bits set
; Low 32 bits (ESI/EDI) must point to valid memory
mov rsi, 0x5152535410000100
mov rdi, 0x6162636410000200

; This should make the instruction use ESI/EDI (32-bit) instead of RSI/RDI (64-bit)
; Per x86-64 architecture, writing to 32-bit registers zeros the upper 32 bits
db 0x67
cmpsb
hlt


================================================
FILE: unittests/ASM/Primary/Primary_A6_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000007",
    "RSI": "0xDFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x61
mov [rdx + 8 * 0], rax
mov rax, 0x47
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

std
cmpsb ; rdi cmp rsi
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1600",
    "RDI": "0xE000000C",
    "RSI": "0xE0000004"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x61626364
mov [rdx + 8 * 0], rax
mov rax, 0x55565758
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

cld
cmpsd ; rdi cmp rsi
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_dword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1600",
    "RDI": "0xE0000004",
    "RSI": "0xDFFFFFFC"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x61626364
mov [rdx + 8 * 0], rax
mov rax, 0x55565758
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

std
cmpsd ; rdi cmp rsi
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x9700",
    "RDI": "0xE0000010",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000061626364
mov [rdx + 8 * 0], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

cld
cmpsq ; rdi cmp rsi
; cmp = 0x6162636465666768- 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_qword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x9700",
    "RDI": "0xE0000000",
    "RSI": "0xDFFFFFF8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000061626364
mov [rdx + 8 * 0], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

std
cmpsq ; rdi cmp rsi
; cmp = 0x6162636465666768- 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE000000A",
    "RSI": "0xE0000002"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162
mov [rdx + 8 * 0], rax
mov rax, 0x4546
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

cld
cmpsw ; rdi cmp rsi
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A7_word_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000006",
    "RSI": "0xDFFFFFFE"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162
mov [rdx + 8 * 0], rax
mov rax, 0x4546
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]
lea rsi, [rdx + 8 * 0]

std
cmpsw ; rdi cmp rsi
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0600"
  }
}
%endif

mov rax, 0x4142434445464847
test al, 0x61

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_A9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0200",
    "RBX": "0x0600",
    "RCX": "0x0600"
  }
}
%endif

mov rax, 0x6162636465666768
test rax, 0x71727374
; test = 0x6162636465666768 & 0x71727374 = 0x61626360
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rcx, rax
and rcx, 0xffffffffffffefff

mov rax, 0x5152535455565758
test eax, 0x71727374
; test = 0x55565758 & 0x71727374 = 0x51525350
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax
and rbx, 0xffffffffffffefff

mov rax, 0x4142434445464748
test ax, 0x7172
; test = 0x4748 & 0x7172 = 0x4140
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
and rax, 0xffffffffffffefff

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF2F2F2F2F2F2F2F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF2
stosb ; rdi <- al
stosb
stosb
stosb

stosb
stosb
stosb
stosb

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AA_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF2F2F2F2F2F2F2F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rcx, 8
mov rax, 0xF2
rep stosb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AA_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF2F2F2F2F2F2F2F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rcx, 8
mov rax, 0xF2
repne stosb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AA_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF2F2F2F2F2F2F2F2",
    "RDX": "0x0",
    "RDI": "0xE000000F"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 7]

std
mov rcx, 8
mov rax, 0xF2
repne stosb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AA_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF2F2F2F2F2F2F2F2",
    "RDX": "0x0",
    "RDI": "0xE000000F"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 7]

std
mov rcx, 8
mov rax, 0xF2
rep stosb ; rdi <- rsi

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F1F2F3F4",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4
stosd ; rdi <- eax
stosd

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_dword_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F1F2F3F4",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4
mov rcx, 2
rep stosd ; rdi <- eax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_dword_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F1F2F3F4",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4
mov rcx, 2
repne stosd ; rdi <- eax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_dword_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F1F2F3F4",
    "RDX": "0x0",
    "RDI": "0xE000000C"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 4]

std
mov rax, 0xF1F2F3F4
mov rcx, 2
repne stosd ; rdi <- eax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_dword_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F1F2F3F4",
    "RDX": "0x0",
    "RDI": "0xE000000C"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 4]

std
mov rax, 0xF1F2F3F4
mov rcx, 2
rep stosd ; rdi <- eax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F5F6F7F8",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4F5F6F7F8
stosq ; rdi <- rax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_qword_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F5F6F7F8",
    "RDX": "0xF1F2F3F4F5F6F7F8",
    "RSI": "0x0",
    "RDI": "0xE0000020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4F5F6F7F8
mov rcx, 2
rep stosq ; rdi <- rax

mov rax, [rdx + 8 * 2]
mov rsi, [rdx + 8 * 4]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_qword_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F5F6F7F8",
    "RDX": "0xF1F2F3F4F5F6F7F8",
    "RSI": "0x0",
    "RDI": "0xE0000020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2F3F4F5F6F7F8
mov rcx, 2
repne stosq ; rdi <- rax

mov rax, [rdx + 8 * 2]
mov rsi, [rdx + 8 * 4]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_qword_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F5F6F7F8",
    "RDX": "0xF1F2F3F4F5F6F7F8",
    "RSI": "0x0",
    "RDI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 3]

std
mov rax, 0xF1F2F3F4F5F6F7F8
mov rcx, 2
repne stosq ; rdi <- rax

mov rax, [rdx + 8 * 2]
mov rsi, [rdx + 8 * 4]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_qword_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F3F4F5F6F7F8",
    "RDX": "0xF1F2F3F4F5F6F7F8",
    "RSI": "0x0",
    "RDI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax

lea rdi, [rdx + 8 * 3]

std
mov rax, 0xF1F2F3F4F5F6F7F8
mov rcx, 2
rep stosq ; rdi <- rax

mov rax, [rdx + 8 * 2]
mov rsi, [rdx + 8 * 4]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F1F2F1F2F1F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rax, 0xF1F2
stosw ; rdi <- ax
stosw
stosw
stosw

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_word_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F1F2F1F2F1F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rcx, 0x4
mov rax, 0xF1F2
rep stosw ; rdi <- ax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_word_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F1F2F1F2F1F2",
    "RDX": "0x0",
    "RDI": "0xE0000018"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2]

cld
mov rcx, 0x4
mov rax, 0xF1F2
repne stosw ; rdi <- ax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_word_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F1F2F1F2F1F2",
    "RDX": "0x0",
    "RDI": "0xE000000E"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 6]

std
mov rcx, 0x4
mov rax, 0xF1F2
repne stosw ; rdi <- ax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AB_word_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xF1F2F1F2F1F2F1F2",
    "RDX": "0x0",
    "RDI": "0xE000000E"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rdi, [rdx + 8 * 2 + 6]

std
mov rcx, 0x4
mov rax, 0xF1F2
rep stosw ; rdi <- ax

mov rax, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
lodsb
lodsb
lodsb
lodsb

lodsb
lodsb
lodsb
lodsb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AC_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
rep lodsb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AC_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
repne lodsb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AC_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x57",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 2]

std
mov rax, 0xFF
mov rcx, 8
repne lodsb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AC_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x57",
    "RSI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 2]

std
mov rax, 0xFF
mov rcx, 8
rep lodsb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x71727374",
    "RSI": "0xE0000020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x0
mov [rdx + 8 * 4], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
repne lodsd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_dword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x0
mov [rdx + 8 * 4], rax

lea rsi, [rdx + 8 * 4]

std
mov rax, 0xFF
mov rcx, 8
repne lodsd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xB1B2B3B4B5B6B7B8",
    "RSI": "0xE0000040"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax
mov rax, 0x0
mov [rdx + 8 * 8], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
repne lodsq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_qword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152535455565758",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax
mov rax, 0x0
mov [rdx + 8 * 8], rax

lea rsi, [rdx + 8 * 8]

std
mov rax, 0xFF
mov rcx, 8
repne lodsq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
repne lodsw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REPNE_word_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4546",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 2]

std
mov rax, 0xFF
mov rcx, 8
repne lodsw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x71727374",
    "RSI": "0xE0000020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x0
mov [rdx + 8 * 4], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
rep lodsd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_dword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x0
mov [rdx + 8 * 4], rax

lea rsi, [rdx + 8 * 4]

std
mov rax, 0xFF
mov rcx, 8
rep lodsd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xB1B2B3B4B5B6B7B8",
    "RSI": "0xE0000040"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax
mov rax, 0x0
mov [rdx + 8 * 8], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
rep lodsq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_qword_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152535455565758",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax
mov rax, 0x0
mov [rdx + 8 * 8], rax

lea rsi, [rdx + 8 * 8]

std
mov rax, 0xFF
mov rcx, 8
rep lodsq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
mov rcx, 8
rep lodsw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_REP_word_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4546",
    "RSI": "0xE0000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 2]

std
mov rax, 0xFF
mov rcx, 8
rep lodsw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x71727374",
    "RSI": "0xE0000020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x0
mov [rdx + 8 * 4], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
lodsd
lodsd
lodsd
lodsd

lodsd
lodsd
lodsd
lodsd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xB1B2B3B4B5B6B7B8",
    "RSI": "0xE0000040"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax
mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax
mov rax, 0x0
mov [rdx + 8 * 8], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
lodsq
lodsq
lodsq
lodsq

lodsq
lodsq
lodsq
lodsq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AD_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5152",
    "RSI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

lea rsi, [rdx + 8 * 0]

cld
mov rax, 0xFF
lodsw
lodsw
lodsw
lodsw

lodsw
lodsw
lodsw
lodsw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1200",
    "RDI": "0xE0000001"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x61
scasb
; cmp = 0x61 - 0x48 = 0x19
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00010000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00010010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE_REP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "5",
    "RDI": "0xE0000003"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445466161
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x61
mov rcx, 8
cmp rax, 0x61

rep scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE_REPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "1",
    "RDI": "0xE0000007"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4161434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x61
mov rcx, 8
cmp rax, 0

repne scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE_REPNE_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "5",
    "RDI": "0xE000000D"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5161535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 2]

std
mov rax, 0x61
mov rcx, 8
cmp rax, 0

repne scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE_REP_down.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "7",
    "RDI": "0xE000000F"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445466161
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 2]

std
mov rax, 0x61
mov rcx, 8
cmp rax, 0x61

rep scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AE_addrmod.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x0000000010000101"
  },
  "MemoryRegions": {
    "0x10000000": "4096"
  }
}
%endif

; Checks SCAS* operations with 0x67 prefix.
; This test ensures that 32-bit address size override works correctly in 64-bit mode

; Set up destination data at address 0x10000100
mov rdi, 0x10000100
mov byte [rdi], 0x41      ; 'A'

; Set initial RDI value with high bits set
; Low 32 bits (EDI) = 0x10000100, high 32 bits = 0x61626364
mov rdi, 0x6162636410000100

; Set AL to match the byte we're scanning for
mov al, 0x41

; This should make the instruction use EDI (32-bit) instead of RDI (64-bit)
; Per x86-64 architecture, writing to 32-bit registers zeros the upper 32 bits
db 0x67
scasb
hlt


================================================
FILE: unittests/ASM/Primary/Primary_AF_REP_dword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "6",
    "RDI": "0xE0000008"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434461626364
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x61626364
mov rcx, 8
cmp rax, 0x61626364

rep scasd

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AF_REP_qword.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "6",
    "RDI": "0xE0000010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x6162636465666768
mov rbx, 0x6162636465666768
mov rcx, 8
cmp rax, rbx

rep scasq

hlt


================================================
FILE: unittests/ASM/Primary/Primary_AF_REP_word.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "6",
    "RDI": "0xE0000004"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445466162
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax

lea rdi, [rdx + 8 * 0]

cld
mov rax, 0x6162
mov rcx, 8
cmp rax, 0x6162

rep scasw

hlt


================================================
FILE: unittests/ASM/Primary/Primary_B0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFF41",
    "RBX": "0xFFFFFFFFFFFFFF41",
    "RCX": "0xFFFFFFFFFFFFFF41",
    "RDX": "0xFFFFFFFFFFFFFF41",
    "RBP": "0xFFFFFFFFFFFFFF41",
    "RSI": "0xFFFFFFFFFFFFFF41",
    "RDI": "0xFFFFFFFFFFFFFF41",
    "RSP": "0xFFFFFFFFFFFFFF41",
    "R8":  "0xFFFFFFFFFFFFFF41",
    "R9":  "0xFFFFFFFFFFFFFF41",
    "R10": "0xFFFFFFFFFFFFFF41",
    "R11": "0xFFFFFFFFFFFFFF41",
    "R12": "0xFFFFFFFFFFFFFF41",
    "R13": "0xFFFFFFFFFFFFFF41",
    "R14": "0xFFFFFFFFFFFFFF41",
    "R15": "0xFFFFFFFFFFFFFF41"
  }
}
%endif

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1
mov r8, -1
mov r9, -1
mov r10, -1
mov r11, -1
mov r12, -1
mov r13, -1
mov r14, -1
mov r15, -1


mov al, 0x41
mov bl, 0x41
mov cl, 0x41
mov dl, 0x41
mov bpl, 0x41
mov sil, 0x41
mov dil, 0x41
mov spl, 0x41
mov r8b, 0x41
mov r9b, 0x41
mov r10b, 0x41
mov r11b, 0x41
mov r12b, 0x41
mov r13b, 0x41
mov r14b, 0x41
mov r15b, 0x41

hlt


================================================
FILE: unittests/ASM/Primary/Primary_B8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF4241",
    "RBX": "0xFFFFFFFFFFFF4241",
    "RCX": "0xFFFFFFFFFFFF4241",
    "RDX": "0xFFFFFFFFFFFF4241",
    "RBP": "0xFFFFFFFFFFFF4241",
    "RSI": "0xFFFFFFFFFFFF4241",
    "RDI": "0xFFFFFFFFFFFF4241",
    "RSP": "0xFFFFFFFFFFFF4241",
    "R8":  "0xFFFFFFFFFFFF4241",
    "R9":  "0xFFFFFFFFFFFF4241",
    "R10": "0xFFFFFFFFFFFF4241",
    "R11": "0xFFFFFFFFFFFF4241",
    "R12": "0xFFFFFFFFFFFF4241",
    "R13": "0xFFFFFFFFFFFF4241",
    "R14": "0xFFFFFFFFFFFF4241",
    "R15": "0xFFFFFFFFFFFF4241"
  }
}
%endif

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1
mov r8, -1
mov r9, -1
mov r10, -1
mov r11, -1
mov r12, -1
mov r13, -1
mov r14, -1
mov r15, -1


mov ax, 0x4241
mov bx, 0x4241
mov cx, 0x4241
mov dx, 0x4241
mov bp, 0x4241
mov si, 0x4241
mov di, 0x4241
mov sp, 0x4241
mov r8w, 0x4241
mov r9w, 0x4241
mov r10w, 0x4241
mov r11w, 0x4241
mov r12w, 0x4241
mov r13w, 0x4241
mov r14w, 0x4241
mov r15w, 0x4241

hlt


================================================
FILE: unittests/ASM/Primary/Primary_B8_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000044434241",
    "RBX": "0x0000000044434241",
    "RCX": "0x0000000044434241",
    "RDX": "0x0000000044434241",
    "RBP": "0x0000000044434241",
    "RSI": "0x0000000044434241",
    "RDI": "0x0000000044434241",
    "RSP": "0x0000000044434241",
    "R8":  "0x0000000044434241",
    "R9":  "0x0000000044434241",
    "R10": "0x0000000044434241",
    "R11": "0x0000000044434241",
    "R12": "0x0000000044434241",
    "R13": "0x0000000044434241",
    "R14": "0x0000000044434241",
    "R15": "0x0000000044434241"
  }
}
%endif

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1
mov r8, -1
mov r9, -1
mov r10, -1
mov r11, -1
mov r12, -1
mov r13, -1
mov r14, -1
mov r15, -1

mov eax, 0x44434241
mov ebx, 0x44434241
mov ecx, 0x44434241
mov edx, 0x44434241
mov ebp, 0x44434241
mov esi, 0x44434241
mov edi, 0x44434241
mov esp, 0x44434241
mov r8d, 0x44434241
mov r9d, 0x44434241
mov r10d, 0x44434241
mov r11d, 0x44434241
mov r12d, 0x44434241
mov r13d, 0x44434241
mov r14d, 0x44434241
mov r15d, 0x44434241

hlt


================================================
FILE: unittests/ASM/Primary/Primary_B8_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4847464544434241",
    "RBX": "0x4847464544434241",
    "RCX": "0x4847464544434241",
    "RDX": "0x4847464544434241",
    "RBP": "0x4847464544434241",
    "RSI": "0x4847464544434241",
    "RDI": "0x4847464544434241",
    "RSP": "0x4847464544434241",
    "R8":  "0x4847464544434241",
    "R9":  "0x4847464544434241",
    "R10": "0x4847464544434241",
    "R11": "0x4847464544434241",
    "R12": "0x4847464544434241",
    "R13": "0x4847464544434241",
    "R14": "0x4847464544434241",
    "R15": "0x4847464544434241"
  }
}
%endif

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1
mov rdi, -1
mov rsp, -1
mov r8, -1
mov r9, -1
mov r10, -1
mov r11, -1
mov r12, -1
mov r13, -1
mov r14, -1
mov r15, -1

mov rax, 0x4847464544434241
mov rbx, 0x4847464544434241
mov rcx, 0x4847464544434241
mov rdx, 0x4847464544434241
mov rbp, 0x4847464544434241
mov rsi, 0x4847464544434241
mov rdi, 0x4847464544434241
mov rsp, 0x4847464544434241
mov r8, 0x4847464544434241
mov r9, 0x4847464544434241
mov r10, 0x4847464544434241
mov r11, 0x4847464544434241
mov r12, 0x4847464544434241
mov r13, 0x4847464544434241
mov r14, 0x4847464544434241
mov r15, 0x4847464544434241

hlt


================================================
FILE: unittests/ASM/Primary/Primary_C2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RSP": "0xE000FF20"
  }
}
%endif

mov rsp, 0xe0000020
lea rax, [rel .end]
push rax

mov rax, 1
ret 0xFF00
mov rax, 0

.end:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_C3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RSP": "0xE0000020"
  }
}
%endif

mov rsp, 0xe0000020
lea rax, [rel .end]
push rax

mov rax, 1
ret
mov rax, 0

.end:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSP": "0xE0000FE8",
    "RBP": "0xE0000FF8"
  }
}
%endif

mov rsp, 0xe0001000
mov rbp, 0xe0001000

enter 0x10, 0
hlt


================================================
FILE: unittests/ASM/Primary/Primary_C8_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RSP": "0xE0000FD8",
    "RBP": "0xE0000FF8"
  }
}
%endif

mov rsp, 0xe0001000
mov rbp, 0xe0002000
mov rax, 0x4142434445464748
mov qword [rbp - 8], rax

enter 0x10, 2
mov rax, qword [rsp + 0x18]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_C8_o16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSP": "0xE0000FEE",
    "RBP": "0xE0000FFE"
  }
}
%endif

mov rsp, 0xe0001000
mov rbp, 0xe0001000

; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o64`
db 0x66
enter 0x10, 0
hlt


================================================
FILE: unittests/ASM/Primary/Primary_C9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBP": "0x4142434445464748"
  }
}
%endif

mov rsp, 0xe0000020
mov rbp, 0x4142434445464748

; Act like an ENTER frame without using ENTER
push rbp
mov rbp, rsp
call .target
jmp .end

.target:
mov rax, 1
leave

.end:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_C9_o16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBP": "0xe0004748",
    "RSP": "0xe0000020"
  }
}
%endif

mov rsp, 0xe0000020
mov rbp, 0x4142434445464748

; Act like an ENTER frame without using ENTER
sub rsp, 2
mov [rsp], bp
mov rbp, rsp
call .target
jmp .end

.target:
mov rax, 1

; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o64`
db 0x66
leave

.end:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_CF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RSP": "0xe0000030"
  }
}
%endif

mov esp, 0xe0000030

lea rbx, [rel .end]
mov rcx, 0x33
mov rdx, rsp

mov eax, 0x2b
push rax ; SS
push rdx ; RSP
mov eax, 0x202
push rax ; RFLAGS
push rcx ; CS
push rbx ; RIP

mov eax, -1
iretq

; Super fail
mov eax, 2
hlt

.end_fail:
mov eax, 0
hlt

.end:
mov eax, 1

hlt


================================================
FILE: unittests/ASM/Primary/Primary_D7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFF47",
    "R14": "0xFFFFFFFFFFFFFF57",
    "R13": "0xFFFFFFFFFFFFFF67"
  }
}
%endif

; Save FS/GS
rdfsbase rax
mov [rel .data_backup], rax
rdgsbase rax
mov [rel .data_backup + 8], rax

mov rbx, 0xe0000000
lea r9, [rbx + 8 * 1]
wrfsbase r9
lea r9, [rbx + 8 * 2]
wrgsbase r9

mov rcx, 0x4142434445464748
mov [rbx + 8 * 0], rcx
mov rcx, 0x5152535455565758
mov [rbx + 8 * 1], rcx
mov rcx, 0x6162636465666768
mov [rbx + 8 * 2], rcx

; Base
mov rax, 0xFFFFFFFFFFFFFF01
xlatb
mov r15, rax

; FS
mov rax, 0xFFFFFFFFFFFFFF01
mov rbx, 0
fs xlat
mov r14, rax

; GS
mov rax, 0xFFFFFFFFFFFFFF01
mov rbx, 0
gs xlat
mov r13, rax

; Restore FS/GS
mov rax, [rel .data_backup]
wrfsbase rax
mov rax, [rel .data_backup + 8]
wrgsbase rax

hlt

align 4096
.data_backup:
dq 0
dq 0


================================================
FILE: unittests/ASM/Primary/Primary_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0F"
  }
}
%endif

mov r15, 0xe0000000

mov rbx, 0xFFFFFFFFFFFFFF00
mov [r15 + 8 * 0], rbx
mov rbx, -1
mov [r15 + 8 * 1], rbx

mov rax, 0
mov rcx, 0x10
cmp byte [r15 + rcx - 1], 0xFF

jmp .head

.top:

add rax, 1
cmp byte [r15 + rcx - 1], 0xFF

.head:

loope .top

hlt


================================================
FILE: unittests/ASM/Primary/Primary_E1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0F"
  }
}
%endif

mov r15, 0xe0000000

mov rbx, 0xFFFFFFFFFFFFFF00
mov [r15 + 8 * 0], rbx
mov rbx, -1
mov [r15 + 8 * 1], rbx

mov rax, 0
mov rcx, 0x10
cmp byte [r15 + rcx - 1], 0

jmp .head

.top:

add rax, 1
cmp byte [r15 + rcx - 1], 0

.head:

loopne .top

hlt


================================================
FILE: unittests/ASM/Primary/Primary_E2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x10"
  }
}
%endif

mov rax, 0
mov rcx, 0x11

jmp .head

.top:

add rax, 1

.head:

loop .top

hlt


================================================
FILE: unittests/ASM/Primary/Primary_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x10",
    "RBX": "0x10"
  }
}
%endif

mov rax, 0
mov rcx, 0x10

jmp .head

.top:

add rax, 1
sub rcx, 1

.head:

jrcxz .next
jmp .top
.next:

; Second test
mov rbx, 0
mov rcx, 0xFFFFFFFF00000010
jmp .head2

.top2:
add rbx, 1
sub rcx, 1

.head2:
jecxz .next2
jmp .top2

.next2:

hlt


================================================
FILE: unittests/ASM/Primary/Primary_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x1"
  }
}
%endif

mov rsp, 0xe0000020
mov rax, 0
mov rbx, 0

; NASM doesn't have a way to explicitly encode a 16bit or 32bit relative call
; Manually encode
db 0x66 ; 16bit
db 0xE8 ; CALL
db 0x02 ; +0x0002 (Just past the next JUMP instruction
db 0x00

jmp .end1

.target1:
mov rax, 1
ret

.end1:

; NASM doesn't have a way to explicitly encode a 16bit or 32bit relative call
; Manually encode
db 0xE8 ; CALL
db 0x02 ; +0x00000002 (Just past the next JUMP instruction
db 0x00
db 0x00
db 0x00

jmp .end2

.target2:
mov rbx, 1
ret

.end2:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x1"
  }
}
%endif

mov rsp, 0xe0000020
mov rax, 0
mov rbx, 0

; NASM doesn't have a way to explicitly encode a 16bit or 32bit relative jump
; Manually encode
db 0x66 ; 16bit
db 0xE9 ; JMP
db 0x02 ; +0x0002 (Just past the next JUMP instruction
db 0x00

.back1:
jmp .end1

.target1:
mov rax, 1
jmp .back1

.end1:

; NASM doesn't have a way to explicitly encode a 16bit or 32bit relative jump
; Manually encode
db 0xE9 ; JMP
db 0x02 ; +0x00000002 (Just past the next JUMP instruction
db 0x00
db 0x00
db 0x00

.back2:
jmp .end2

.target2:
mov rbx, 1
jmp .back2

.end2:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_EB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

mov rsp, 0xe0000020
mov rax, 0

; NASM doesn't have a way to explicitly encode a 16bit or 32bit relative jump
; Manually encode
db 0xEB ; JUMP
db 0x02 ; +0x02 (Just past the next JUMP instruction

.back:
jmp .end

.target:
mov rax, 1
jmp .back

.end:
hlt


================================================
FILE: unittests/ASM/Primary/Primary_F5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

; Set CF to known value
clc

cmc

; Get CF
sbb rax, rax
and rax, 1

hlt


================================================
FILE: unittests/ASM/Primary/Primary_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0"
  }
}
%endif

clc

; Get CF
sbb rax, rax
and rax, 1

hlt


================================================
FILE: unittests/ASM/Primary/Primary_F9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

stc

; Get CF
sbb rax, rax
and rax, 1

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0xE0000001"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 0]

mov al, 0x0
cld
scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0xE0000007"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

lea rdi, [rdx + 8 * 1]

mov al, 0x0
std
scasb

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_0_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464848",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

; Test 1 byte offset within 4byte boundary
lock inc word [r15 + 8 * 0 + 1]

; Test 3 byte offset across 4byte boundary
lock inc word [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock inc word [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock inc word [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock inc word [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_0_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434446464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock inc dword [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock inc dword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock inc dword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock inc dword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_0_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434445464748",
    "RBX": "0x4242434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4242434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock inc qword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock inc qword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock inc qword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_1_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464648",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock dec word [r15 + 8 * 0 + 1]

; Test 3 byte offset across 4byte boundary
lock dec word [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock dec word [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock dec word [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock dec word [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_1_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434444464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock dec dword [r15 + 8 * 0 + 3]

; Test 7 byte offset across 8byte boundary
lock dec dword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock dec dword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock dec dword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/Primary_FF_1_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4042434445464748",
    "RBX": "0x4042434445464748",
    "RCX": "0x4142434445464748",
    "RDX": "0x4142434445464748",
    "RSI": "0x4042434445464748",
    "RDI": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock dec qword [r15 + 8 * 0 + 7]

; Test 15 byte offset across 16byte boundary
lock dec qword [r15 + 8 * 0 + 15]

; Test 63 byte offset across cacheline boundary
lock dec qword [r15 + 8 * 0 + 63]

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/Primary/ROL_Flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x0000000001060004"
  }
}
%endif

%macro cfmerge 0

; Get CF
lahf
shr rax, 8
and rax, 1

; Merge in to results
shl r15, 1
or r15, rax

%endmacro

stc
cfmerge

; 8-bit
; Shift 1 past size - Bit Set
mov rbx, 0x800
rol bl, 9
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x000
rol bl, 9
cfmerge

; Shift size - Bit Set
mov rbx, 0x80
rol bl, 8
cfmerge

; Shift size - Bit unset
mov rbx, 0x8000
rol bl, 8
cfmerge

; 8-bit - wrapped
; Shift 1 past size - Bit Set
mov rbx, 0x01
rol bl, 9
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0xFFF2
rol bl, 9
cfmerge

; Shift size - Bit Set
mov rbx, 0xFF
rol bl, 8
cfmerge

; Shift size - Bit unset
mov rbx, 0xFF00
rol bl, 8
cfmerge


; 16-bit
; Shift 1 past size - Bit Set
mov rbx, 0x80000
rol bx, 17
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x00000
rol bx, 17
cfmerge

; Shift size - Bit Set
mov rbx, 0x8000
rol bx, 16
cfmerge

; Shift size - Bit unset
mov rbx, 0x80000
rol bx, 16
cfmerge

; 32-bit
; Shift 1 past size - Bit Set
mov rbx, 0x800000000
rol ebx, 33
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x000000000
rol ebx, 33
cfmerge

; Shift size - Bit Set
mov rbx, 0x80000000
rol ebx, 32
cfmerge

; Shift size - Bit unset
mov rbx, 0x800000000
rol ebx, 32
cfmerge

; 32-bit - Wrapping
; Shift 1 past size - Bit Set
mov rbx, 0x02
rol ebx, 33
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x01
rol ebx, 33
cfmerge

; Shift size - Bit Set
mov rbx, 0x1
rol ebx, 32
cfmerge

; Shift size - Bit unset
mov rbx, 0x02
rol ebx, 32
cfmerge

; 64-bit
; Shift 1 past size - Bit Set
mov rbx, 0x02
rol rbx, 65
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x8000000000000000
rol rbx, 65
cfmerge

; Shift size - Bit Set
mov rbx, 0x1
rol rbx, 64
cfmerge

; Shift size - Bit unset
mov rbx, 0x02
rol rbx, 64
cfmerge

hlt


================================================
FILE: unittests/ASM/Primary/ROL_OF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x00000000000000aa"
  }
}
%endif

%macro clearof 0
mov r12, 0
ror r12, 1
%endmacro

%macro ofmerge 0
mov r14, 0
mov r13, 1
cmovo r14, r13

or r15, r14
shl r15, 1
%endmacro

mov r15, 0
mov r14, 1

; 1 bit rotate
; rol OF = XOR of LSB and MSB after rotate
clearof
mov rax, 0
mov rcx, 1
rol rax, cl
ofmerge

clearof
mov rax, 0x8000000000000000
mov rcx, 1
rol rax, cl
ofmerge

clearof
mov rax, 0xC000000000000000
mov rcx, 1
rol rax, cl
ofmerge

clearof
mov rax, 0x4000000000000000
mov rcx, 1
rol rax, cl
ofmerge

clearof
mov rax, 0
rol rax, 1
ofmerge

clearof
mov rax, 0x8000000000000000
rol rax, 1
ofmerge

clearof
mov rax, 0xC000000000000000
rol rax, 1
ofmerge

clearof
mov rax, 0x4000000000000000
rol rax, 1
ofmerge

hlt


================================================
FILE: unittests/ASM/Primary/ROR_Flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x00000000012a2040"
  }
}
%endif

%macro cfmerge 0

; Get CF
lahf
shr rax, 8
and rax, 1

; Merge in to results
shl r15, 1
or r15, rax

%endmacro

stc
cfmerge

; 8-bit
; Shift 1 past size - Bit Set
mov rbx, 0x800
ror bl, 9
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x000
ror bl, 9
cfmerge

; Shift size - Bit Set
mov rbx, 0x80
ror bl, 8
cfmerge

; Shift size - Bit unset
mov rbx, 0x8000
ror bl, 8
cfmerge

; 8-bit - wrapped
; Shift 1 past size - Bit Set
mov rbx, 0x01
ror bl, 9
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0xFFF2
ror bl, 9
cfmerge

; Shift size - Bit Set
mov rbx, 0xFF
ror bl, 8
cfmerge

; Shift size - Bit unset
mov rbx, 0xFF00
ror bl, 8
cfmerge


; 16-bit
; Shift 1 past size - Bit Set
mov rbx, 0x80000
ror bx, 17
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x00000
ror bx, 17
cfmerge

; Shift size - Bit Set
mov rbx, 0x8000
ror bx, 16
cfmerge

; Shift size - Bit unset
mov rbx, 0x80000
ror bx, 16
cfmerge

; 32-bit
; Shift 1 past size - Bit Set
mov rbx, 0x800000000
ror ebx, 33
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x000000000
ror ebx, 33
cfmerge

; Shift size - Bit Set
mov rbx, 0x80000000
ror ebx, 32
cfmerge

; Shift size - Bit unset
mov rbx, 0x800000000
ror ebx, 32
cfmerge

; 32-bit - Wrapping
; Shift 1 past size - Bit Set
mov rbx, 0x02
ror ebx, 33
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x01
ror ebx, 33
cfmerge

; Shift size - Bit Set
mov rbx, 0x1
ror ebx, 32
cfmerge

; Shift size - Bit unset
mov rbx, 0x02
ror ebx, 32
cfmerge

; 64-bit
; Shift 1 past size - Bit Set
mov rbx, 0x02
ror rbx, 65
cfmerge

; Shift 1 past size - Bit unset
mov rbx, 0x8000000000000000
ror rbx, 65
cfmerge

; Shift size - Bit Set
mov rbx, 0x1
ror rbx, 64
cfmerge

; Shift size - Bit unset
mov rbx, 0x02
ror rbx, 64
cfmerge

hlt


================================================
FILE: unittests/ASM/Primary/ROR_OF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x00000000000000cc"
  }
}
%endif

%macro clearof 0
mov r12, 0
ror r12, 1
%endmacro

%macro ofmerge 0
mov r14, 0
mov r13, 1
cmovo r14, r13

or r15, r14
shl r15, 1
%endmacro

mov r15, 0
mov r14, 1

; 1 bit rotate
; ror OF = XOR or two most significant bits of result
clearof
mov rax, 0
mov rcx, 1
ror rax, cl
ofmerge

clearof
mov rax, 1
mov rcx, 1
ror rax, cl
ofmerge

clearof
mov rax, 0x8000000000000000
mov rcx, 1
ror rax, cl
ofmerge

clearof
mov rax, 0x8000000000000001
mov rcx, 1
ror rax, cl
ofmerge

clearof
mov rax, 0
ror rax, 1
ofmerge

clearof
mov rax, 1
ror rax, 1
ofmerge

clearof
mov rax, 0x8000000000000000
ror rax, 1
ofmerge

clearof
mov rax, 0x8000000000000001
ror rax, 1
ofmerge

hlt


================================================
FILE: unittests/ASM/Primary/SHL.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0xffffffffffffffff",
    "R13": "0xfffffffffffffffe",
    "R12": "0xfffffffffffffffc",
    "R11": "0xfffffffffffffff8",
    "R10": "0xfffffffffffffff0",
    "R9": "0xffffffffffffffe0",
    "R8": "0xffffffffffffffc0",
    "RBP": "0xffffffffffffff80",
    "RSP": "0xffffffffffffff00",
    "RDI": "0xffffffffffffff00",
    "RSI": "0xffffffffffffff00"
  }
}
%endif

mov r14, -1
mov r13, -1
mov r12, -1
mov r11, -1
mov r10, -1
mov r9, -1
mov r8, -1
mov rbp, -1
mov rsp, -1
mov rdi, -1
mov rsi, -1

shl r14b, 0
shl r13b, 1
shl r12b, 2
shl r11b, 3
shl r10b, 4
shl r9b, 5
shl r8b, 6
shl bpl, 7
shl spl, 8
shl dil, 9
shl sil, 10

hlt


================================================
FILE: unittests/ASM/Primary/SHR.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0xffffffffffffffff",
    "R13": "0xffffffffffffff7f",
    "R12": "0xffffffffffffff3f",
    "R11": "0xffffffffffffff1f",
    "R10": "0xffffffffffffff0f",
    "R9": "0xffffffffffffff07",
    "R8": "0xffffffffffffff03",
    "RBP": "0xffffffffffffff01",
    "RSP": "0xffffffffffffff00",
    "RDI": "0xffffffffffffff00",
    "RSI": "0xffffffffffffff00"
  }
}
%endif

mov r14, -1
mov r13, -1
mov r12, -1
mov r11, -1
mov r10, -1
mov r9, -1
mov r8, -1
mov rbp, -1
mov rsp, -1
mov rdi, -1
mov rsi, -1

shr r14b, 0
shr r13b, 1
shr r12b, 2
shr r11b, 3
shr r10b, 4
shr r9b, 5
shr r8b, 6
shr bpl, 7
shr spl, 8
shr dil, 9
shr sil, 10

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546A848"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

add byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445466748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

or byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445A8A848"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

clc
adc byte [rdx + 8 * 0 + 1], 0x61

stc
adc byte [rdx + 8 * 0 + 2], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445A8A848"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

clc
lock adc byte [rdx + 8 * 0 + 1], 0x61

stc
lock adc byte [rdx + 8 * 0 + 2], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445E4E648"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

clc
sbb byte [rdx + 8 * 0 + 1], 0x61

stc
sbb byte [rdx + 8 * 0 + 2], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445E4E648"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

clc
lock sbb byte [rdx + 8 * 0 + 1], 0x61

stc
lock sbb byte [rdx + 8 * 0 + 2], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464148"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

and byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546E648"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

sub byte [rdx + 8 * 0 + 1], 0x61
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445462648"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

xor byte [rdx + 8 * 0 + 1], 0x61
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_80_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

cmp byte [rdx + 8 * 0 + 1], 0x61
; cmp = 0x47 - 0x61 = 0xE6
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, [rdx + 8 * 0]
lahf

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344A6A84748",
    "RBX": "0x51525354B6B8BABC",
    "RCX": "0x61626364C6C8CACC",
    "RDX": "0x6162636465666668"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

add  word [rdx + 8 * 0 + 2], 0x6162
add dword [rdx + 8 * 1 + 0], 0x61626364
add qword [rdx + 8 * 2 + 0], 0x61626364

add qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434465664748",
    "RBX": "0x515253547576777C",
    "RCX": "0x616263646566676C",
    "RDX": "0xFFFFFFFFFFFFFF68"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

or  word [rdx + 8 * 0 + 2], 0x6162
or dword [rdx + 8 * 1 + 0], 0x61626364
or qword [rdx + 8 * 2 + 0], 0x61626364

or qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]


hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142A4A7A6A84748",
    "RBX": "0x51525354181B1E21",
    "RCX": "0x61626365282B2E31",
    "RDX": "0x6162636465666569"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

clc
adc word [rdx + 8 * 0 + 2], 0x6162
clc
adc dword [rdx + 8 * 1 + 0], 0x61626364
clc
adc qword [rdx + 8 * 2 + 0], 0x61626364
clc
adc qword [rdx + 8 * 3 + 0], -256

stc
adc word [rdx + 8 * 0 + 4], 0x6162
stc
adc dword [rdx + 8 * 1 + 0], 0x61626364
stc
adc qword [rdx + 8 * 2 + 0], 0x61626364
stc
adc qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142A4A7A6A84748",
    "RBX": "0x51525354181B1E21",
    "RCX": "0x61626365282B2E31",
    "RDX": "0x6162636465666569"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

clc
lock adc word [rdx + 8 * 0 + 2], 0x6162
clc
lock adc dword [rdx + 8 * 1 + 0], 0x61626364
clc
lock adc qword [rdx + 8 * 2 + 0], 0x61626364
clc
lock adc qword [rdx + 8 * 3 + 0], -256

stc
lock adc word [rdx + 8 * 0 + 4], 0x6162
stc
lock adc dword [rdx + 8 * 1 + 0], 0x61626364
stc
lock adc qword [rdx + 8 * 2 + 0], 0x61626364
stc
lock adc qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142E1E1E3E44748",
    "RBX": "0x515253549291908F",
    "RCX": "0x61626363A2A1A09F",
    "RDX": "0x6162636465666967"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

clc
sbb  word [rdx + 8 * 0 + 2], 0x6162
clc
sbb dword [rdx + 8 * 1 + 0], 0x61626364
clc
sbb qword [rdx + 8 * 2 + 0], 0x61626364
clc
sbb qword [rdx + 8 * 3 + 0], -256

stc
sbb  word [rdx + 8 * 0 + 4], 0x6162
stc
sbb dword [rdx + 8 * 1 + 0], 0x61626364
stc
sbb qword [rdx + 8 * 2 + 0], 0x61626364
stc
sbb qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142E1E1E3E44748",
    "RBX": "0x515253549291908F",
    "RCX": "0x61626363A2A1A09F",
    "RDX": "0x6162636465666967"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

clc
lock sbb  word [rdx + 8 * 0 + 2], 0x6162
clc
lock sbb dword [rdx + 8 * 1 + 0], 0x61626364
clc
lock sbb qword [rdx + 8 * 2 + 0], 0x61626364
clc
lock sbb qword [rdx + 8 * 3 + 0], -256

stc
lock sbb  word [rdx + 8 * 0 + 4], 0x6162
stc
lock sbb dword [rdx + 8 * 1 + 0], 0x61626364
stc
lock sbb qword [rdx + 8 * 2 + 0], 0x61626364
stc
lock sbb qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434441424748",
    "RBX": "0x5152535441424340",
    "RCX": "0x0000000061626360",
    "RDX": "0x6162636465666700"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

and  word [rdx + 8 * 0 + 2], 0x6162
and dword [rdx + 8 * 1 + 0], 0x61626364
and qword [rdx + 8 * 2 + 0], 0x61626364
and qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]


hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344E3E44748",
    "RBX": "0x51525354F3F3F3F4",
    "RCX": "0x6162636404040404",
    "RDX": "0x6162636465666868"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

sub  word [rdx + 8 * 0 + 2], 0x6162
sub dword [rdx + 8 * 1 + 0], 0x61626364
sub qword [rdx + 8 * 2 + 0], 0x61626364
sub qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434424244748",
    "RBX": "0x515253543434343C",
    "RCX": "0x616263640404040C",
    "RDX": "0x9E9D9C9B9A999868"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

xor  word [rdx + 8 * 0 + 2], 0x6162
xor dword [rdx + 8 * 1 + 0], 0x61626364
xor qword [rdx + 8 * 2 + 0], 0x61626364
xor qword [rdx + 8 * 3 + 0], -256

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_81_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8700",
    "RBX": "0x8300",
    "RCX": "0x0200",
    "RSI": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 3], rax

cmp qword [rdx + 8 * 3 + 0], -256
; cmp = 0x6162636465666768 - -256(0xFFFFFFFFFFFF00) = 0x6162636465666512
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rsi, rax

cmp qword [rdx + 8 * 2 + 0], 0x61626364
; cmp = 0x6162636465666768- 0x61626364 = 0x6162636404040404
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rcx, rax

cmp dword [rdx + 8 * 1 + 0], 0x61626364
; cmp = 0x55565758 - 0x61626364 = 0xF3F3F3F4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rbx, rax

cmp word [rdx + 8 * 0 + 2], 0x6162
; cmp = 0x4546 - 0x6162 = 0xE3E4
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464729",
    "RBX": "0x5152535455565739",
    "RCX": "0x6162636465666749"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

add  word [rdx + 8 * 0 + 0], -31
add dword [rdx + 8 * 1 + 0], -31
add qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546FFE9",
    "RBX": "0x51525354FFFFFFF9",
    "RCX": "0xFFFFFFFFFFFFFFE9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

or  word [rdx + 8 * 0 + 0], -31
or dword [rdx + 8 * 1 + 0], -31
or qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142432645274748",
    "RBX": "0x515253545556571B",
    "RCX": "0x616263646566672B"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

clc
adc word [rdx + 8 * 0 + 2], -31
clc
adc dword [rdx + 8 * 1 + 0], -31
clc
adc qword [rdx + 8 * 2 + 0], -31

stc
adc word [rdx + 8 * 0 + 4], -31
stc
adc dword [rdx + 8 * 1 + 0], -31
stc
adc qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142432645274748",
    "RBX": "0x515253545556571B",
    "RCX": "0x616263646566672B"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

clc
lock adc word [rdx + 8 * 0 + 2], -31
clc
lock adc dword [rdx + 8 * 1 + 0], -31
clc
lock adc qword [rdx + 8 * 2 + 0], -31

stc
lock adc word [rdx + 8 * 0 + 4], -31
stc
lock adc dword [rdx + 8 * 1 + 0], -31
stc
lock adc qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142436245654748",
    "RBX": "0x5152535455565795",
    "RCX": "0x61626364656667A5"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

clc
sbb word [rdx + 8 * 0 + 2], -31
clc
sbb dword [rdx + 8 * 1 + 0], -31
clc
sbb qword [rdx + 8 * 2 + 0], -31

stc
sbb word [rdx + 8 * 0 + 4], -31
stc
sbb dword [rdx + 8 * 1 + 0], -31
stc
sbb qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142436245654748",
    "RBX": "0x5152535455565795",
    "RCX": "0x61626364656667A5"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

clc
lock sbb word [rdx + 8 * 0 + 2], -31
clc
lock sbb dword [rdx + 8 * 1 + 0], -31
clc
lock sbb qword [rdx + 8 * 2 + 0], -31

stc
lock sbb word [rdx + 8 * 0 + 4], -31
stc
lock sbb dword [rdx + 8 * 1 + 0], -31
stc
lock sbb qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464740",
    "RBX": "0x5152535455565740",
    "RCX": "0x6162636465666760"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

and  word [rdx + 8 * 0 + 0], -31
and dword [rdx + 8 * 1 + 0], -31
and qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464767",
    "RBX": "0x5152535455565777",
    "RCX": "0x6162636465666787"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

sub  word [rdx + 8 * 0 + 0], -31
sub dword [rdx + 8 * 1 + 0], -31
sub qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B8A9",
    "RBX": "0x51525354AAA9A8B9",
    "RCX": "0x9E9D9C9B9A999889"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

xor  word [rdx + 8 * 0 + 0], -31
xor dword [rdx + 8 * 1 + 0], -31
xor qword [rdx + 8 * 2 + 0], -31

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/1_83_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0700",
    "RBX": "0x0700",
    "RCX": "0x0700"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

cmp qword [rdx + 8 * 2 + 0], -31
; cmp = 0x6162636465666768 - -31 = 0x6162636465666787
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rcx, rax

cmp dword [rdx + 8 * 1 + 0], -31
; cmp = 0x55565758 - -31 = 0x55565777
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf
mov rbx, rax

cmp word [rdx + 8 * 0 + 2], -31
; cmp = 0x4546 - -31 = 0x4577
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000111
; OF: LAHF doesn't load - 0
mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468E48"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

rol byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546A348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

ror byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x06",
    "RCX": "0x04",
    "RDX": "0x02",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x40
mov rsi, 0x40

stc
rcl bl, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl cl, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dl, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl sil, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0006",
    "RCX": "0x0004",
    "RDX": "0x0002",
    "RSI": "0x0000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rcx, 0x0001
mov rdx, 0x4000
mov rsi, 0x4000

stc
rcl bx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl cx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl si, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000006",
    "RCX": "0x00000004",
    "RDX": "0x00000002",
    "RSI": "0x00000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rcx, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000

stc
rcl ebx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl ecx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_02_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0000000000000006",
    "RCX": "0x0000000000000004",
    "RDX": "0x0000000000000002",
    "RSI": "0x0000000000000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rcx, 0x0000000000000001
mov rdx, 0x4000000000000000
mov rsi, 0x4000000000000000

stc
rcl rbx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl rcx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl rdx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl rsi, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x40",
    "RCX": "0x00",
    "RDX": "0x60",
    "RSI": "0x20",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x02
mov rcx, 0x02
mov rdx, 0x80
mov rsi, 0x80

stc
rcr bl, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr cl, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dl, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr sil, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x4000",
    "RCX": "0x0000",
    "RDX": "0x6000",
    "RSI": "0x2000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0002
mov rcx, 0x0002
mov rdx, 0x8000
mov rsi, 0x8000

stc
rcr bx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr cx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr si, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_03_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x40000000",
    "RCX": "0x00000000",
    "RDX": "0x60000000",
    "RSI": "0x20000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000002
mov rcx, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000

stc
rcr ebx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr ecx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_03_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x4000000000000000",
    "RCX": "0x0000000000000000",
    "RDX": "0x6000000000000000",
    "RSI": "0x2000000000000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0000000000000002
mov rcx, 0x0000000000000002
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000

stc
rcr rbx, 2
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr rcx, 2
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr rdx, 2
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr rsi, 2
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468E48"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

shl byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445462348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

shr byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_07.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445462348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

sar byte [rdx + 8 * 0 + 1], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C0_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8300",
    "RBX": "0xFFFFFFFFFFFFFFFE"
  }
}
%endif


mov rax, 0
mov rbx, 0xA142434445464748
sar rbx, 62
lahf
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000011

; Mask out AF since it is undefined
and rax, ~0x1000

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434415194748",
    "RBX": "0x5152535455595D61",
    "RCX": "0x95999da185898d91"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

rol  word [rdx + 8 * 0 + 2], 0x62
rol dword [rdx + 8 * 1 + 0], 0x62
rol qword [rdx + 8 * 2 + 0], 0x62

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434491514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x195999da185898d9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

ror  word [rdx + 8 * 0 + 2], 0x62
ror dword [rdx + 8 * 1 + 0], 0x62
ror qword [rdx + 8 * 2 + 0], 0x62

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434415184748",
    "RBX": "0x5152535455595D60",
    "RCX": "0x95999da000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

shl  word [rdx + 8 * 0 + 2], 0x62
shl dword [rdx + 8 * 1 + 0], 0x62
shl qword [rdx + 8 * 2 + 0], 0x62

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434411514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x00000000185898D9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

shr  word [rdx + 8 * 0 + 2], 0x62
shr dword [rdx + 8 * 1 + 0], 0x62
shr qword [rdx + 8 * 2 + 0], 0x62

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_05_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0300",
    "RBX": "0x2"
  }
}
%endif


mov rax, 0
mov rbx, 0xA142434445464748
shr rbx, 62
lahf
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000011

; Mask out AF since it is undefined
and rax, ~0x1000

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_C1_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434411514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x00000000185898D9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

sar  word [rdx + 8 * 0 + 2], 0x62
sar dword [rdx + 8 * 1 + 0], 0x62
sar qword [rdx + 8 * 2 + 0], 0x62

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468E48"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

rol byte [rdx + 8 * 0 + 1], 1

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546A348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

ror byte [rdx + 8 * 0 + 1], 1

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x03",
    "RCX": "0x02",
    "RDX": "0x81",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x40
mov rsi, 0x80

stc
rcl bl, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl cl, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dl, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl sil, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x03",
    "RCX": "0x02",
    "RDX": "0x81",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x40
mov rsi, 0x80
mov r15, 1

stc
rcl bl, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcl cl, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcl dl, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcl sil, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80",
    "RCX": "0x00",
    "RDX": "0xC0",
    "RSI": "0x40",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x80
mov rsi, 0x80

stc
rcr bl, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr cl, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dl, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr sil, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80",
    "RCX": "0x00",
    "RDX": "0xC0",
    "RSI": "0x40",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x80
mov rsi, 0x80
mov r15, 1

stc
rcr bl, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr cl, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr dl, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr sil, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445468E48"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

shl byte [rdx + 8 * 0 + 1], 1

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445462348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

shr byte [rdx + 8 * 0 + 1], 1

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D0_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445462348"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

sar byte [rdx + 8 * 0 + 1], 1

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243448A8C4748",
    "RBX": "0x51525354AAACAEB0",
    "RCX": "0xC2C4C6C8CACCCED0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

rol  word [rdx + 8 * 0 + 2], 1
rol dword [rdx + 8 * 1 + 0], 1
rol qword [rdx + 8 * 2 + 0], 1

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434422A34748",
    "RBX": "0x515253542AAB2BAC",
    "RCX": "0x30B131B232B333B4"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

ror  word [rdx + 8 * 0 + 2], 1
ror dword [rdx + 8 * 1 + 0], 1
ror qword [rdx + 8 * 2 + 0], 1

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0003",
    "RCX": "0x0002",
    "RDX": "0x0001",
    "RSI": "0x0000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rcx, 0x0001
mov rdx, 0x8000
mov rsi, 0x8000

stc
rcl bx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl cx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl si, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000003",
    "RCX": "0x00000002",
    "RDX": "0x00000001",
    "RSI": "0x00000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rcx, 0x00000001
mov rdx, 0x80000000
mov rsi, 0x80000000

stc
rcl ebx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl ecx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0000000000000003",
    "RCX": "0x0000000000000002",
    "RDX": "0x0000000000000001",
    "RSI": "0x0000000000000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rcx, 0x0000000000000001
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000

stc
rcl rbx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl rcx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl rdx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl rsi, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0003",
    "RCX": "0x0002",
    "RDX": "0x0001",
    "RSI": "0x0000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rcx, 0x0001
mov rdx, 0x8000
mov rsi, 0x8000
mov r15, 1

stc
rcl bx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcl cx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcl dx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcl si, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000003",
    "RCX": "0x00000002",
    "RDX": "0x00000001",
    "RSI": "0x00000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rcx, 0x00000001
mov rdx, 0x80000000
mov rsi, 0x80000000
mov r15, 1

stc
rcl ebx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcl ecx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcl edx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcl esi, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_02_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0000000000000003",
    "RCX": "0x0000000000000002",
    "RDX": "0x0000000000000001",
    "RSI": "0x0000000000000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rcx, 0x0000000000000001
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000
mov r15, 1

stc
rcl rbx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcl rcx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcl rdx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcl rsi, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000",
    "RCX": "0x0000",
    "RDX": "0xC000",
    "RSI": "0x4000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0001
mov rcx, 0x0001
mov rdx, 0x8000
mov rsi, 0x8000

stc
rcr bx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr cx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr si, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80000000",
    "RCX": "0x00000000",
    "RDX": "0xC0000000",
    "RSI": "0x40000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000001
mov rcx, 0x00000001
mov rdx, 0x80000000
mov rsi, 0x80000000

stc
rcr ebx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr ecx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000000000000000",
    "RCX": "0x0000000000000000",
    "RDX": "0xC000000000000000",
    "RSI": "0x4000000000000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rcx, 0x0000000000000001
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000

stc
rcr rbx, 1
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr rcx, 1
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr rdx, 1
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr rsi, 1
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000",
    "RCX": "0x0000",
    "RDX": "0xC000",
    "RSI": "0x4000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rcx, 0x0001
mov rdx, 0x8000
mov rsi, 0x8000
mov r15, 1

stc
rcr bx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr cx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr dx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr si, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80000000",
    "RCX": "0x00000000",
    "RDX": "0xC0000000",
    "RSI": "0x40000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rcx, 0x00000001
mov rdx, 0x80000000
mov rsi, 0x80000000
mov r15, 1

stc
rcr ebx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr ecx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr edx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr esi, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_03_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000000000000000",
    "RCX": "0x0000000000000000",
    "RDX": "0xC000000000000000",
    "RSI": "0x4000000000000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rcx, 0x0000000000000001
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000
mov r15, 1

stc
rcr rbx, 1
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr rcx, 1
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr rdx, 1
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr rsi, 1
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243448A8C4748",
    "RBX": "0x51525354AAACAEB0",
    "RCX": "0xC2C4C6C8CACCCED0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

shl  word [rdx + 8 * 0 + 2], 1
shl dword [rdx + 8 * 1 + 0], 1
shl qword [rdx + 8 * 2 + 0], 1

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434422A34748",
    "RBX": "0x515253542AAB2BAC",
    "RCX": "0x30B131B232B333B4"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

shr  word [rdx + 8 * 0 + 2], 1
shr dword [rdx + 8 * 1 + 0], 1
shr qword [rdx + 8 * 2 + 0], 1

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434422A34748",
    "RBX": "0x515253542AAB2BAC",
    "RCX": "0x30B131B232B333B4"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

sar  word [rdx + 8 * 0 + 2], 1
sar dword [rdx + 8 * 1 + 0], 1
sar qword [rdx + 8 * 2 + 0], 1

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D1_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414141414141FFFF",
    "RBX": "0x00000000FFFFFFFF",
    "RDX": "0xFFFFFFFFFFFFFFFF",
    "RSI": "0x42424242424242FF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4141414141418000
mov rbx, 0x80000000
mov rdx, 0x8000000000000000
mov rsi, 0x4242424242424280

mov cl, 7
sar sil, cl

mov cl, 15
sar ax, cl

mov cl, 31
sar ebx, cl

mov cl, 63
sar rdx, cl

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x06",
    "RDI": "0x04",
    "RDX": "0x02",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rdi, 0x01
mov rdx, 0x40
mov rsi, 0x40
mov rcx, 2

stc
rcl bl, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl dil, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dl, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl sil, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x03",
    "RDI": "0x02",
    "RDX": "0x01",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rdi, 0x01
mov rdx, 0x80
mov rsi, 0x80
mov rcx, 1
mov r15, 1

stc
rcl bl, cl
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcl dil, cl
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcl dl, cl
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcl sil, cl
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x81",
    "RDI": "0x01",
    "RDX": "0xC0",
    "RSI": "0x40",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x02
mov rdi, 0x02
mov rdx, 0x80
mov rsi, 0x80
mov rcx, 8 ; Tests wrapping around features

stc
rcl bl, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl dil, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dl, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl sil, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x40",
    "RDI": "0x00",
    "RDX": "0x60",
    "RSI": "0x20",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x02
mov rdi, 0x02
mov rdx, 0x80
mov rsi, 0x80
mov rcx, 2

stc
rcr bl, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr dil, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dl, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr sil, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80",
    "RDI": "0x00",
    "RDX": "0xC0",
    "RSI": "0x40",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x01
mov rdi, 0x01
mov rdx, 0x80
mov rsi, 0x80
mov r15, 1
mov rcx, 1

stc
rcr bl, cl
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr dil, cl
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr dl, cl
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr sil, cl
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D2_03_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x05",
    "RDI": "0x04",
    "RDX": "0x01",
    "RSI": "0x00",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x02
mov rdi, 0x02
mov rdx, 0x80
mov rsi, 0x80
mov rcx, 8 ; Tests wrapping around features

stc
rcr bl, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr dil, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dl, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr sil, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434415194748",
    "RBX": "0x5152535455595D61",
    "RCX": "0x95999DA185898D91"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov cl, 0x62
rol  word [rdx + 8 * 0 + 2], cl
rol dword [rdx + 8 * 1 + 0], cl
rol qword [rdx + 8 * 2 + 0], cl

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "1",
    "R13": "1",
    "R12": "1",
    "R11": "1",
    "R10": "0",
    "R9": "0",
    "R8": "0",
    "RSP": "0"
  }
}
%endif

mov r15, 1

; Should all set carry
;8bit
mov rax, 0x01

mov cl, 8
rol al, cl

mov r14, 0
cmovc r14, r15

; 16bit
mov rax, 0x0001

mov cl, 16
rol ax, cl

mov r13, 0
cmovc r13, r15

; 32bit
mov rax, 0x00000002

mov cl, 31
rol eax, cl

mov r12, 0
cmovc r12, r15

; 64bit
mov rax, 0x0000000000000002

mov cl, 63
rol rax, cl

mov r11, 0
cmovc r11, r15

; Shouldn't set carry

;8bit
clc
mov rax, 0x01

mov cl, 0
rol al, cl

mov r10, 0
cmovc r10, r15

; 16bit
clc
mov rax, 0x0001

mov cl, 0
rol ax, cl

mov r9, 0
cmovc r9, r15

; 32bit
clc
mov rax, 0x00000001

mov cl, 0
rol eax, cl

mov r8, 0
cmovc r8, r15

; 64bit
clc
mov rax, 0x0000000000000001

mov cl, 0
rol rax, cl

mov rsp, 0
cmovc rsp, r15

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_00_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "1",
    "R13": "1",
    "R12": "1",
    "R11": "1",
    "R10": "0",
    "R9": "0",
    "R8": "0",
    "RSP": "0"
  }
}
%endif

mov r15, 1

; Should all set OF
;8bit
mov rax, 0x40

mov cl, 1
rol al, cl

mov r14, 0
cmovo r14, r15

; 16bit
mov rax, 0x4000

mov cl, 1
rol ax, cl

mov r13, 0
cmovo r13, r15

; 32bit
mov rax, 0x40000000

mov cl, 1
rol eax, cl

mov r12, 0
cmovo r12, r15

; 64bit
mov rax, 0x4000000000000000

mov cl, 1
rol rax, cl

mov r11, 0
cmovo r11, r15

; Let's clear OF really quick
mov rax, 0
rol rax, 1

; Shouldn't set OF

;8bit
clc
mov rax, 0x80

mov cl, 0
rol al, cl

mov r10, 0
cmovo r10, r15

; 16bit
clc
mov rax, 0x8000

mov cl, 0
rol ax, cl

mov r9, 0
cmovo r9, r15

; 32bit
clc
mov rax, 0x80000000

mov cl, 0
rol eax, cl

mov r8, 0
cmovo r8, r15

; 64bit
clc
mov rax, 0x8000000000000000

mov cl, 0
rol rax, cl

mov rsp, 0
cmovo rsp, r15

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434491514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x195999DA185898D9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov cl, 0x62
ror  word [rdx + 8 * 0 + 2], cl
ror dword [rdx + 8 * 1 + 0], cl
ror qword [rdx + 8 * 2 + 0], cl

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_01_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "1",
    "R13": "1",
    "R12": "1",
    "R11": "1",
    "R10": "0",
    "R9": "0",
    "R8": "0",
    "RSP": "0"
  }
}
%endif

mov r15, 1

; Should all set carry
;8bit
mov rax, 0x80

mov cl, 8
ror al, cl

mov r14, 0
cmovc r14, r15

; 16bit
mov rax, 0x8000

mov cl, 16
ror ax, cl

mov r13, 0
cmovc r13, r15

; 32bit
mov rax, 0x40000000

mov cl, 31
ror eax, cl

mov r12, 0
cmovc r12, r15

; 64bit
mov rax, 0x4000000000000000

mov cl, 63
ror rax, cl

mov r11, 0
cmovc r11, r15

; Shouldn't set carry

;8bit
clc
mov rax, 0x80

mov cl, 0
ror al, cl

mov r10, 0
cmovc r10, r15

; 16bit
clc
mov rax, 0x8000

mov cl, 0
ror ax, cl

mov r9, 0
cmovc r9, r15

; 32bit
clc
mov rax, 0x80000000

mov cl, 0
ror eax, cl

mov r8, 0
cmovc r8, r15

; 64bit
clc
mov rax, 0x8000000000000000

mov cl, 0
ror rax, cl

mov rsp, 0
cmovc rsp, r15

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_01_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "1",
    "R13": "1",
    "R12": "1",
    "R11": "1",
    "R10": "0",
    "R9": "0",
    "R8": "0",
    "RSP": "0"
  }
}
%endif

mov r15, 1

; Should all set OF
;8bit
mov rax, 0x41

mov cl, 1
ror al, cl

mov r14, 0
cmovo r14, r15

; 16bit
mov rax, 0x4001

mov cl, 1
ror ax, cl

mov r13, 0
cmovo r13, r15

; 32bit
mov rax, 0x40000001

mov cl, 1
ror eax, cl

mov r12, 0
cmovo r12, r15

; 64bit
mov rax, 0x4000000000000001

mov cl, 1
ror rax, cl

mov r11, 0
cmovo r11, r15

; Let's clear OF really quick
mov rax, 0
ror rax, 1

; Shouldn't set OF

;8bit
clc
mov rax, 0x80

mov cl, 0
ror al, cl

mov r10, 0
cmovo r10, r15

; 16bit
clc
mov rax, 0x8000

mov cl, 0
ror ax, cl

mov r9, 0
cmovo r9, r15

; 32bit
clc
mov rax, 0x80000000

mov cl, 0
ror eax, cl

mov r8, 0
cmovo r8, r15

; 64bit
clc
mov rax, 0x8000000000000000

mov cl, 0
ror rax, cl

mov rsp, 0
cmovo rsp, r15

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0006",
    "RDI": "0x0004",
    "RDX": "0x0002",
    "RSI": "0x0000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rdi, 0x0001
mov rdx, 0x4000
mov rsi, 0x4000
mov rcx, 2

stc
rcl bx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl di, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl dx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl si, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000006",
    "RDI": "0x00000004",
    "RDX": "0x00000002",
    "RSI": "0x00000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 2

stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0000000000000006",
    "RDI": "0x0000000000000004",
    "RDX": "0x0000000000000002",
    "RSI": "0x0000000000000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rdi, 0x0000000000000001
mov rdx, 0x4000000000000000
mov rsi, 0x4000000000000000
mov rcx, 2

stc
rcl rbx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl rdi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl rdx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl rsi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_02_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000006",
    "RDI": "0x00000004",
    "RDX": "0x00000002",
    "RSI": "0x00000000",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 34 ; Test wraparound

stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_02_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000001",
    "RDI": "0x00000001",
    "RDX": "0x40000000",
    "RSI": "0x40000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x40000000
mov rsi, 0x40000000
mov rcx, 32 ; Test wraparound with zero shift

stc
rcl ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcl edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcl edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcl esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x4000",
    "RDI": "0x0000",
    "RDX": "0x6000",
    "RSI": "0x2000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0002
mov rdi, 0x0002
mov rdx, 0x8000
mov rsi, 0x8000
mov rcx, 2

stc
rcr bx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr di, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr dx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr si, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x40000000",
    "RDI": "0x00000000",
    "RDX": "0x60000000",
    "RSI": "0x20000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 2

stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x4000000000000000",
    "RDI": "0x0000000000000000",
    "RDX": "0x6000000000000000",
    "RSI": "0x2000000000000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x0000000000000002
mov rdi, 0x0000000000000002
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000
mov rcx, 2

stc
rcr rbx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr rdi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr rdx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr rsi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000",
    "RDI": "0x0000",
    "RDX": "0xC000",
    "RSI": "0x4000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0001
mov rdi, 0x0001
mov rdx, 0x8000
mov rsi, 0x8000
mov r15, 1
mov rcx, 1

stc
rcr bx, cl
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr di, cl
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr dx, cl
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr si, cl
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x80000000",
    "RDI": "0x00000000",
    "RDX": "0xC0000000",
    "RSI": "0x40000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x00000001
mov rdi, 0x00000001
mov rdx, 0x80000000
mov rsi, 0x80000000
mov r15, 1
mov rcx, 1

stc
rcr ebx, cl
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr edi, cl
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr edx, cl
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr esi, cl
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000000000000000",
    "RDI": "0x0000000000000000",
    "RDX": "0xC000000000000000",
    "RSI": "0x4000000000000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x0",
    "R11": "0x1"
  }
}
%endif

mov rbx, 0x0000000000000001
mov rdi, 0x0000000000000001
mov rdx, 0x8000000000000000
mov rsi, 0x8000000000000000
mov r15, 1
mov rcx, 1

stc
rcr rbx, cl
mov r8, 0
cmovo r8, r15 ; We only care about OF here

clc
rcr rdi, cl
mov r9, 0
cmovo r9, r15 ; We only care about OF here

stc
rcr rdx, cl
mov r10, 0
cmovo r10, r15 ; We only care about OF here

clc
rcr rsi, cl
mov r11, 0
cmovo r11, r15 ; We only care about OF here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x40000000",
    "RDI": "0x00000000",
    "RDX": "0x60000000",
    "RSI": "0x20000000",
    "R8":  "0x1",
    "R9":  "0x1",
    "R10": "0x0",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 34 ; Test wraparound

stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_03_8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x00000002",
    "RDI": "0x00000002",
    "RDX": "0x80000000",
    "RSI": "0x80000000",
    "R8":  "0x1",
    "R9":  "0x0",
    "R10": "0x1",
    "R11": "0x0"
  }
}
%endif

mov rbx, 0x00000002
mov rdi, 0x00000002
mov rdx, 0x80000000
mov rsi, 0x80000000
mov rcx, 32 ; Test wraparound with zero shift

stc
rcr ebx, cl
lahf
mov r8w, ax
shr r8, 8
and r8, 1 ; We only care about carry flag here

clc
rcr edi, cl
lahf
mov r9w, ax
shr r9, 8
and r9, 1 ; We only care about carry flag here

stc
rcr edx, cl
lahf
mov r10w, ax
shr r10, 8
and r10, 1 ; We only care about carry flag here

clc
rcr esi, cl
lahf
mov r11w, ax
shr r11, 8
and r11, 1 ; We only care about carry flag here

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434415184748",
    "RBX": "0x5152535455595D60",
    "RCX": "0x95999DA000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov cl, 0x62
shl  word [rdx + 8 * 0 + 2], cl
shl dword [rdx + 8 * 1 + 0], cl
shl qword [rdx + 8 * 2 + 0], cl

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434411514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x00000000185898D9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov cl, 0x62
shr  word [rdx + 8 * 0 + 2], cl
shr dword [rdx + 8 * 1 + 0], cl
shr qword [rdx + 8 * 2 + 0], cl

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434411514748",
    "RBX": "0x51525354155595D6",
    "RCX": "0x00000000185898D9"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov cl, 0x62
sar  word [rdx + 8 * 0 + 2], cl
sar dword [rdx + 8 * 1 + 0], cl
sar qword [rdx + 8 * 2 + 0], cl

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/2_D3_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3335785C36350000",
    "RBX": "0x3465785C33350000",
    "RCX": "0x6663785C34650332",
    "RDX": "0x3234785C66630F0B",
    "RDI": "0x3234785C32340000",
    "RSI": "0x3035785C32340000",
    "RSP": "0x3564785C30350000",
    "R8":  "0x6532785C35640785",
    "R9":  "0x6435785C65320000",
    "R10": "0x3262785C64350000",
    "R11": "0x6638785C32621E17",
    "R12": "0x3831785C66380000",
    "R13": "0x3434785C38310000",
    "R14": "0x6632785C34340000",
    "R15": "0x3162785C66320000"
  }
}
%endif

lea r15, [rel .data]

mov rax, [r15 + 0]
mov cl, [r15 + 2]
sar ax, cl

mov rbx, [r15 + 4]
mov cl, [r15 + 6]
sar bx, cl

mov rcx, [r15 + 8]
mov cl, [r15 + 10]
sar cx, cl

mov rdx, [r15 + 12]
mov cl, [r15 + 14]
sar dx, cl

mov rdi, [r15 + 16]
mov cl, [r15 + 18]
sar di, cl

mov rsi, [r15 + 20]
mov cl, [r15 + 22]
sar si, cl

mov rsp, [r15 + 24]
mov cl, [r15 + 26]
sar sp, cl

mov r8, [r15 + 28]
mov cl, [r15 + 30]
sar r8w, cl

mov r9, [r15 + 32]
mov cl, [r15 + 34]
sar r9w, cl

mov r10, [r15 + 36]
mov cl, [r15 + 38]
sar r10w, cl

mov r11, [r15 + 40]
mov cl, [r15 + 42]
sar r11w, cl

mov r12, [r15 + 44]
mov cl, [r15 + 46]
sar r12w, cl

mov r13, [r15 + 48]
mov cl, [r15 + 50]
sar r13w, cl

mov r14, [r15 + 52]
mov cl, [r15 + 54]
sar r14w, cl

mov cl, [r15 + 58]
mov r15, [r15 + 56]
sar r15w, cl

hlt

.data:
db '\x56\x53\xe4\xcf\x42\x42\x50\xd5\x2e\x5d\xb2\x8f\x18\x44\x2f\xb1'
db '\xad\x88\x64\x7e\x20\x99\xb4\xf8\xa4\x34\xc7\x65\xd7\x01\x19\xc3'
db '\x8c\xce\x28\x7c\x64\x65\x50\x65\xb7\xda\xaf\x08\xc0\x1f\x31\xbf'
db '\x7f\xeb\xf0\x0b\xf0\x46\x4e\x72\x2c\xf8\xb4\x4b\xa9\x8d\xc9\x33'


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0600"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

test byte [rdx + 8 * 0 + 1], 0x61
; test = 0x47 & 0x61 = 0x41
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B848"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

not byte [rdx + 8 * 0 + 1]

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B848"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

lock not byte [rdx + 8 * 0 + 1]

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B948"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

neg byte [rdx + 8 * 0 + 1]

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xb8aff1dbb3d15c81",
    "RBX": "0xd5a98ccfc669b87a",
    "RCX": "0x30b556de1f6de86b"
  }
}
%endif

lea r15, [rel .data]

lock neg byte [r15 + 0]
lock neg byte [r15 + 1]
lock neg byte [r15 + 2]
lock neg byte [r15 + 3]
lock neg byte [r15 + 4]
lock neg byte [r15 + 5]
lock neg byte [r15 + 6]
lock neg byte [r15 + 7]
lock neg byte [r15 + 8]
lock neg byte [r15 + 9]
lock neg byte [r15 + 10]
lock neg byte [r15 + 11]
lock neg byte [r15 + 12]
lock neg byte [r15 + 13]
lock neg byte [r15 + 14]
lock neg byte [r15 + 15]

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]

hlt

align 4096
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546008E",
    "RBX": "0xFFFFFFFFFFFF0004"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov al, 2
mul byte [rdx + 8 * 0 + 1]
mov word [rdx + 8 * 0], ax

; Ensure this inserts in to AX
mov rax, 0xFFFFFFFFFFFFFF02
mov rbx, 0xFFFFFFFFFFFFFF02
mul bl
mov rbx, rax

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546FF72",
    "RBX": "0xFFFFFFFFFFFF0001"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov al, -2
imul byte [rdx + 8 * 0 + 1]
mov word [rdx + 8 * 0], ax

; Ensure upper bits aren't cleared
mov rax, 0xFFFFFFFFFFFFFF01
mov rbx, 1
imul bl
mov rbx, rax

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_05_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x00000000000003fc"
  }
}
%endif

%macro ofcfmerge 0
  ; Get CF
  setc al
  ; Get OF
  seto bl
  and eax, 1
  and ebx, 1

  ; Merge in to results
  shl edi, 1
  or edi, eax

  ; Merge in to results
  shl edi, 1
  or edi, ebx
%endmacro

mov edi, 0

; Max Negative
mov al, 0x80
mov bl, 0x80

imul bl

ofcfmerge

; Max Positive
mov al, 0x7F
mov bl, 0x7F

imul bl

ofcfmerge

; Max Positive and Max Negative
mov al, 0x7F
mov bl, 0x80

imul bl

ofcfmerge

; Max Positive and Max Negative
mov al, 0x80
mov bl, 0x7F

imul bl

ofcfmerge

; No Overflow

mov al, 0x1
mov bl, 0x1

imul bl

ofcfmerge

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_05_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x00000000000003fc"
  }
}
%endif

%macro ofcfmerge 0
  ; Get CF
  setc al
  ; Get OF
  seto bl
  and eax, 1
  and ebx, 1

  ; Merge in to results
  shl edi, 1
  or edi, eax

  ; Merge in to results
  shl edi, 1
  or edi, ebx
%endmacro

mov edi, 0

; Max Negative
mov ax, 0x8000
mov bx, 0x8000

imul bx

ofcfmerge

; Max Positive
mov ax, 0x7FFF
mov bx, 0x7FFF

imul bx

ofcfmerge

; Max Positive and Max Negative
mov ax, 0x7FFF
mov bx, 0x8000

imul bx

ofcfmerge

; Max Positive and Max Negative
mov ax, 0x8000
mov bx, 0x7FFF

imul bx

ofcfmerge

; No Overflow

mov ax, 0x1
mov bx, 0x1

imul bx

ofcfmerge

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_05_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x00000000000003fc"
  }
}
%endif

%macro ofcfmerge 0
  ; Get CF
  setc al
  ; Get OF
  seto bl
  and eax, 1
  and ebx, 1

  ; Merge in to results
  shl edi, 1
  or edi, eax

  ; Merge in to results
  shl edi, 1
  or edi, ebx
%endmacro

mov edi, 0

; Max Negative
mov eax, 0x80000000
mov ebx, 0x80000000

imul ebx

ofcfmerge

; Max Positive
mov eax, 0x7FFFFFFF
mov ebx, 0x7FFFFFFF

imul ebx

ofcfmerge

; Max Positive and Max Negative
mov eax, 0x7FFFFFFF
mov ebx, 0x80000000

imul ebx

ofcfmerge

; Max Positive and Max Negative
mov eax, 0x80000000
mov ebx, 0x7FFFFFFF

imul ebx

ofcfmerge

; No Overflow

mov eax, 0x1
mov ebx, 0x1

imul ebx

ofcfmerge

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_05_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RDI": "0x00000000000003fc"
  }
}
%endif

%macro ofcfmerge 0
  ; Get CF
  setc al
  ; Get OF
  seto bl
  and eax, 1
  and ebx, 1

  ; Merge in to results
  shl edi, 1
  or edi, eax

  ; Merge in to results
  shl edi, 1
  or edi, ebx
%endmacro

mov edi, 0

; Max Negative
mov rax, 0x8000000000000000
mov rbx, 0x8000000000000000

imul rbx

ofcfmerge

; Max Positive
mov rax, 0x7FFFFFFFFFFFFFFF
mov rbx, 0x7FFFFFFFFFFFFFFF

imul rbx

ofcfmerge

; Max Positive and Max Negative
mov rax, 0x7FFFFFFFFFFFFFFF
mov rbx, 0x8000000000000000

imul rbx

ofcfmerge

; Max Positive and Max Negative
mov rax, 0x8000000000000000
mov rbx, 0x7FFFFFFFFFFFFFFF

imul rbx

ofcfmerge

; No Overflow

mov rax, 0x1
mov rbx, 0x1

imul rbx

ofcfmerge

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445460202"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov ax, 0x8E
div byte [rdx + 8 * 0 + 2]
mov word [rdx + 8 * 0], ax

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445fe00DD",
    "RBX": "0x515253545556D8FF",
    "RCX": "0x6162636465660010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445FE4748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x61626364656667F8
mov [rdx + 8 * 2], rax

; Positive / Negative
mov ax, 0x46
idiv byte [rdx + 8 * 0 + 2]
mov word [rdx + 8 * 0], ax

; Negative / Positive
mov ax, -128
idiv byte [rdx + 8 * 1]
mov word [rdx + 8 * 1], ax

; Negative / Negative
mov ax, -128
idiv byte [rdx + 8 * 2]
mov word [rdx + 8 * 2], ax

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F6_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0021"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x8
mov [rdx + 8 * 0], rax

; Test that 8bit divide divides a 16bit dividend
mov ax, 0x0108
idiv byte [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0200",
    "RBX": "0x0600",
    "RCX": "0x0600"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

test qword [rdx + 8 * 2], 0x71727374
; test = 0x6162636465666768 & 0x71727374 = 0x61626360
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rcx, rax
and rcx, 0xffffffffffffefff

test dword [rdx + 8 * 1], 0x71727374
; test = 0x55565758 & 0x71727374 = 0x51525350
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax
and rbx, 0xffffffffffffefff

test word [rdx + 8 * 0], 0x7172
; test = 0x4748 & 0x7172 = 0x4140
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000
; ================
;         00000010
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
and rax, 0xffffffffffffefff

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8600"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF00000000
mov [rdx + 8 * 0], rax

test qword [rdx + 8 * 0], -1
; test = 0x4748 & 0x7172 = 0x4140
; 0: CF - 00000000
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- Undefined
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 10000000
; ================
;         10000110
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B8B7",
    "RBX": "0x51525354AAA9A8A7",
    "RCX": "0x9E9D9C9B9A999897"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

not  word [rdx + 8 * 0 + 0]
not dword [rdx + 8 * 1 + 0]
not qword [rdx + 8 * 2 + 0]

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B8B7",
    "RBX": "0x51525354AAA9A8A7",
    "RCX": "0x9E9D9C9B9A999897"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

lock not  word [rdx + 8 * 0 + 0]
lock not dword [rdx + 8 * 1 + 0]
lock not qword [rdx + 8 * 2 + 0]

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_02_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x48ae0fda4d2fa480",
    "RBX": "0x2b5774313a974886",
    "RCX": "0x304a56211f6de894"
  }
}
%endif

lea r15, [rel .data]

; Unaligned words
lock not  word [r15 + 0]
lock not  word [r15 + 1]
lock not  word [r15 + 2]
lock not  word [r15 + 3]
lock not  word [r15 + 4]
lock not  word [r15 + 5]
lock not  word [r15 + 6]
lock not  word [r15 + 7]
lock not  word [r15 + 8]
lock not  word [r15 + 9]
lock not  word [r15 + 10]
lock not  word [r15 + 11]
lock not  word [r15 + 12]
lock not  word [r15 + 13]
lock not  word [r15 + 14]
lock not  word [r15 + 15]

; Unaligned dwords
lock not dword [r15 + 0]
lock not dword [r15 + 1]
lock not dword [r15 + 2]
lock not dword [r15 + 3]
lock not dword [r15 + 4]
lock not dword [r15 + 5]
lock not dword [r15 + 6]
lock not dword [r15 + 7]
lock not dword [r15 + 8]
lock not dword [r15 + 9]
lock not dword [r15 + 10]
lock not dword [r15 + 11]
lock not dword [r15 + 12]
lock not dword [r15 + 13]
lock not dword [r15 + 14]
lock not dword [r15 + 15]

; Unaligned qwords
lock not qword [r15 + 0]
lock not qword [r15 + 1]
lock not qword [r15 + 2]
lock not qword [r15 + 3]
lock not qword [r15 + 4]
lock not qword [r15 + 5]
lock not qword [r15 + 6]
lock not qword [r15 + 7]
lock not qword [r15 + 8]
lock not qword [r15 + 9]
lock not qword [r15 + 10]
lock not qword [r15 + 11]
lock not qword [r15 + 12]
lock not qword [r15 + 13]
lock not qword [r15 + 14]
lock not qword [r15 + 15]

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546B8B8",
    "RBX": "0x51525354AAA9A8A8",
    "RCX": "0x9E9D9C9B9A999898"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

neg  word [rdx + 8 * 0 + 0]
neg dword [rdx + 8 * 1 + 0]
neg qword [rdx + 8 * 2 + 0]

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4bad12d95030a781",
    "RBX": "0x2e5a77343d9a4b89",
    "RCX": "0x304a56211f6de894"
  }
}
%endif

lea r15, [rel .data]

; Unaligned words
lock neg  word [r15 + 0]
lock neg  word [r15 + 1]
lock neg  word [r15 + 2]
lock neg  word [r15 + 3]
lock neg  word [r15 + 4]
lock neg  word [r15 + 5]
lock neg  word [r15 + 6]
lock neg  word [r15 + 7]
lock neg  word [r15 + 8]
lock neg  word [r15 + 9]
lock neg  word [r15 + 10]
lock neg  word [r15 + 11]
lock neg  word [r15 + 12]
lock neg  word [r15 + 13]
lock neg  word [r15 + 14]
lock neg  word [r15 + 15]

; Unaligned dwords
lock neg dword [r15 + 0]
lock neg dword [r15 + 1]
lock neg dword [r15 + 2]
lock neg dword [r15 + 3]
lock neg dword [r15 + 4]
lock neg dword [r15 + 5]
lock neg dword [r15 + 6]
lock neg dword [r15 + 7]
lock neg dword [r15 + 8]
lock neg dword [r15 + 9]
lock neg dword [r15 + 10]
lock neg dword [r15 + 11]
lock neg dword [r15 + 12]
lock neg dword [r15 + 13]
lock neg dword [r15 + 14]
lock neg dword [r15 + 15]

; Unaligned qwords
lock neg qword [r15 + 0]
lock neg qword [r15 + 1]
lock neg qword [r15 + 2]
lock neg qword [r15 + 3]
lock neg qword [r15 + 4]
lock neg qword [r15 + 5]
lock neg qword [r15 + 6]
lock neg qword [r15 + 7]
lock neg qword [r15 + 8]
lock neg qword [r15 + 9]
lock neg qword [r15 + 10]
lock neg qword [r15 + 11]
lock neg qword [r15 + 12]
lock neg qword [r15 + 13]
lock neg qword [r15 + 14]
lock neg qword [r15 + 15]

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243441F968610",
    "RBX": "0x25D1437D318C1BE0",
    "RCX": "0xFFFFFFFFFFFF0004",
    "RDX": "0x0000000000000004",
    "RSI": "0xFC1B5FC85401D0C0",
    "RSP": "0x2B27F79B13618682"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov ax, 0x7172
mul word [r15 + 8 * 0 + 0]
mov word [r15 + 8 * 0 + 0], ax
mov word [r15 + 8 * 0 + 2], dx

mov eax, 0x71727374
mul dword [r15 + 8 * 1 + 0]
mov dword [r15 + 8 * 1 + 0], eax
mov dword [r15 + 8 * 1 + 4], edx

mov rax, 0x7172737475767778
mul qword [r15 + 8 * 2 + 0]
mov rsi, rax
mov rsp, rdx

; Ensure zext handling is correct
; 16bit
mov rax, 0xFFFFFFFFFFFF0002
mov rbx, 0xFFFFFFFFFFFF0002
mul bx
mov rcx, rax

; 32bit
mov rax, 0xFFFFFFFF00000002
mov rbx, 0xFFFFFFFF00000002
mul ebx
mov rdx, rax

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x41424344FFDC5C00",
    "RBX": "0xFFFFFFD554D45400",
    "RCX": "0xFFFFFFFFFFFF0002",
    "RDX": "0x0000000000000002",
    "RSI": "0x4ECE4DCD4CCC4C00",
    "RSP": "0xFFFFFFFFFFFFFFCF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov ax, -128
imul word [r15 + 8 * 0 + 0]
mov word [r15 + 8 * 0 + 0], ax
mov word [r15 + 8 * 0 + 2], dx

mov eax, -128
imul dword [r15 + 8 * 1 + 0]
mov dword [r15 + 8 * 1 + 0], eax
mov dword [r15 + 8 * 1 + 4], edx

mov rax, -128
imul qword [r15 + 8 * 2 + 0]
mov rsi, rax
mov rsp, rdx

; Ensure correct zext mechanics

; 16bit - inserts
mov rax, 0xFFFFFFFFFFFF0001
mov rbx, 2
imul bl
mov rcx, rax

; 32bit - Zexts to 64bit
mov rax, 0xFFFFFFFF00000001
mov rbx, 2
imul ebx
mov rdx, rax

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_05_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0"
  }
}
%endif

; Uses CX and BX and stores result in r15
; OF:CF
%macro ofcfmerge 0
  lahf

  ; Load OF
  mov rbx, 0
  seto bl

  shl r15, 1
  or r15, rbx
  shl r15, 1

  ; Insert CF
  shr ax, 8
  and rax, 1
  or r15, rax
%endmacro

mov r8, 0xe0000000
mov r15, 0

mov rax, -1
mov [r8 + 8 * 0], rax
mov rax, -2
mov [r8 + 8 * 1], rax
mov rax, -3
mov [r8 + 8 * 2], rax

mov rax, 1
mov [r8 + 8 * 3], rax
mov rax, 2
mov [r8 + 8 * 4], rax
mov rax, 3
mov [r8 + 8 * 5], rax

; Negative * Negative
mov ax, -128
cwd
imul word [r8 + 8 * 0 + 0]
ofcfmerge

mov eax, -128
cdq
imul dword [r8 + 8 * 1 + 0]
ofcfmerge

mov rax, -128
cqo
imul qword [r8 + 8 * 2 + 0]
ofcfmerge

; Negative * Positive
mov ax, -128
cwd
imul word [r8 + 8 * 3 + 0]
ofcfmerge

mov eax, -128
cdq
imul dword [r8 + 8 * 4 + 0]
ofcfmerge

mov rax, -128
cqo
imul qword [r8 + 8 * 5 + 0]
ofcfmerge

; Positive * Positive
mov ax, 128
cwd
imul word [r8 + 8 * 3 + 0]
ofcfmerge

mov eax, 128
cdq
imul dword [r8 + 8 * 4 + 0]
ofcfmerge

mov rax, 128
cqo
imul qword [r8 + 8 * 5 + 0]
ofcfmerge

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243442A2A0001",
    "RBX": "0x1C1C1C1C00000001",
    "RSI": "0x0000000000000001",
    "RSP": "0x1010101010101010",
    "R11": "0x8000000000000000"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov ax, 0x7172
cwd
div word [r15 + 8 * 0 + 0]
mov word [r15 + 8 * 0 + 0], ax
mov word [r15 + 8 * 0 + 2], dx

mov eax, 0x71727374
cdq
div dword [r15 + 8 * 1 + 0]
mov dword [r15 + 8 * 1 + 0], eax
mov dword [r15 + 8 * 1 + 4], edx

mov rax, 0x7172737475767778
cqo
div qword [r15 + 8 * 2 + 0]
mov rsi, rax
mov rsp, rdx

; 128bit divide where we actually care about the upper bits containing real data
mov rax, 0x0
mov rdx, 0x1
mov rcx, 2
div rcx
mov r11, rax

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_06_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000000",
    "RDX": "0"
  }
}
%endif

mov r15, 0xe0000000

mov eax, 0x2
mov [r15 + 8 * 0], eax

mov rax, 0xFFFFFFFF00000000
mov rdx, 0xFFFFFFFF00000001

div dword [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243442A2A0001",
    "RBX": "0x1C1C1C1C00000001",
    "RCX": "0x0000000000000001",
    "RDX": "0x1010101010101010",
    "RSI": "0x41424344FF800000",
    "RDI": "0xFFFFFF8000000000",
    "RBP": "0x0000000000000000",
    "RSP": "0xFFFFFFFFFFFFFF80",
    "R8":  "0xFFFFFFFF00000004",
    "R9":  "0x0000000000000002",
    "R10": "0x0000000000000001",
    "R11": "0x0000000000000000",
    "R12": "0x4000000000000000",
    "R13": "0x0000000000000000"
  }
}
%endif

mov r15, 0xe0000000

; Positive
mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax
mov rax, 0
mov [r15 + 8 * 3], rax

; Positive
mov rax, 0x4142434445464748
mov [r15 + 8 * 4], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 5], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 6], rax
mov rax, 0
mov [r15 + 8 * 7], rax

; Negative
mov rax, -32
mov [r15 + 8 * 8], rax
mov rax, -64
mov [r15 + 8 * 9], rax
mov rax, -128
mov [r15 + 8 * 10], rax
mov rax, 0
mov [r15 + 8 * 11], rax

; Positive / Positive
mov ax, 0x7172
cwd
idiv word [r15 + 8 * 0 + 0]
mov word [r15 + 8 * 0 + 0], ax
mov word [r15 + 8 * 0 + 2], dx

mov eax, 0x71727374
cdq
idiv dword [r15 + 8 * 1 + 0]
mov dword [r15 + 8 * 1 + 0], eax
mov dword [r15 + 8 * 1 + 4], edx

mov rax, 0x7172737475767778
cqo
idiv qword [r15 + 8 * 2 + 0]
mov qword [r15 + 8 * 2 + 0], rax
mov qword [r15 + 8 * 3 + 0], rdx

; Negative / Positive
mov ax, -128
cwd
idiv word [r15 + 8 * 4 + 0]
mov word [r15 + 8 * 4 + 0], ax
mov word [r15 + 8 * 4 + 2], dx

mov eax, -128
cdq
idiv dword [r15 + 8 * 5 + 0]
mov dword [r15 + 8 * 5 + 0], eax
mov dword [r15 + 8 * 5 + 4], edx

mov rax, -128
cqo
idiv qword [r15 + 8 * 6 + 0]
mov qword [r15 + 8 * 6 + 0], rax
mov qword [r15 + 8 * 7 + 0], rdx

; Negative / Negative
mov ax, -128
cwd
idiv word [r15 + 8 * 8 + 0]
mov word [r15 + 8 * 8 + 0], ax
mov word [r15 + 8 * 8 + 2], dx

mov eax, -128
cdq
idiv dword [r15 + 8 * 9 + 0]
mov dword [r15 + 8 * 9 + 0], eax
mov dword [r15 + 8 * 9 + 4], edx

mov rax, -128
cqo
idiv qword [r15 + 8 * 10 + 0]
mov qword [r15 + 8 * 10 + 0], rax
mov qword [r15 + 8 * 11 + 0], rdx

; 128bit divide where we actually care about the upper bits containing real data
mov rax, 0x0
mov rdx, 0x1
mov rcx, 4
idiv rcx
mov qword [r15 + 8 * 12 + 0], rax
mov qword [r15 + 8 * 13 + 0], rdx

; Positive / Positive results
mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]
mov rdx, [r15 + 8 * 3]

; Negative / Positive results
mov rsi, [r15 + 8 * 4]
mov rdi, [r15 + 8 * 5]
mov rbp, [r15 + 8 * 6]
mov rsp, [r15 + 8 * 7]

; Negative / Negative results
mov r8, [r15 + 8 * 8]
mov r9, [r15 + 8 * 9]
mov r10, [r15 + 8 * 10]
mov r11, [r15 + 8 * 11]

; 128bit results
mov r12, [r15 + 8 * 12]
mov r13, [r15 + 8 * 13]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/3_F7_07_2.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x40000000",
    "RDX": "0"
  }
}
%endif

mov r15, 0xe0000000

mov eax, 0x4
mov [r15 + 8 * 0], eax

mov rax, 0xFFFFFFFF00000000
mov rdx, 0xFFFFFFFF00000001

idiv dword [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/4_FE_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464749"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax

inc byte [rdx + 8 * 0 + 0]
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/4_FE_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464747"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax

dec byte [rdx + 8 * 0 + 0]
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464749",
    "RBX": "0x5152535455565759",
    "RCX": "0x6162636465666769"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

inc  word [r15 + 8 * 0 + 0]
inc dword [r15 + 8 * 1 + 0]
inc qword [r15 + 8 * 2 + 0]

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_00_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0x0",
    "R13": "0x0",
    "R12": "0x0",
    "R11": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax

; Ensures that all the flag setting matches correctly
inc  word [r15 + 8 * 0 + 0]
mov rax, 0
mov r14, 0
lahf
mov r14, rax

lock inc  word [r15 + 8 * 1 + 0]
mov rax, 0
lahf
xor r14, rax

inc dword [r15 + 8 * 2 + 0]
mov rax, 0
mov r13, 0
lahf
mov r13, rax

lock inc dword [r15 + 8 * 3 + 0]
mov rax, 0
lahf
xor r13, rax

inc qword [r15 + 8 * 4 + 0]
mov rax, 0
mov r12, 0
lahf
mov r12, rax

lock inc qword [r15 + 8 * 5 + 0]
mov rax, 0
lahf
xor r12, rax

inc byte [r15 + 8 * 4 + 0]
mov rax, 0
mov r11, 0
lahf
mov r11, rax

lock inc byte [r15 + 8 * 5 + 0]
mov rax, 0
lahf
xor r11, rax


hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_00_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xffffffffffffff00",
    "RBX": "0xffffffffffff0000",
    "RCX": "0xffffffff00000000",
    "RDX": "0x0000000000000000",
    "R8" : "0x0000000000005400",
    "R9" : "0x0000000000005400",
    "R10": "0x0000000000005400",
    "R11": "0x0000000000005400"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xffffffffffffffff
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax

xor rax, rax

; Insure that inc overflow works and sets correct flags
inc  byte [r15 + 8 * 0 + 0]
lahf
mov r8, rax

inc  word [r15 + 8 * 1 + 0]
lahf
mov r9, rax

inc dword [r15 + 8 * 2 + 0]
lahf
mov r10, rax

inc qword [r15 + 8 * 3 + 0]
lahf
mov r11, rax


mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]
mov rdx, [r15 + 8 * 3]


; Mask flags we don't care about
and r8, 0xd400
and r9, 0xd400
and r10, 0xd400
and r11, 0xd400

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464747",
    "RBX": "0x5152535455565757",
    "RCX": "0x6162636465666767"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

dec  word [r15 + 8 * 0 + 0]
dec dword [r15 + 8 * 1 + 0]
dec qword [r15 + 8 * 2 + 0]

mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_01_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0x0",
    "R13": "0x0",
    "R12": "0x0",
    "R11": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax

; Ensures that all the flag setting matches correctly
dec  word [r15 + 8 * 0 + 0]
mov rax, 0
mov r14, 0
lahf
mov r14, rax

lock dec  word [r15 + 8 * 1 + 0]
mov rax, 0
lahf
xor r14, rax

dec dword [r15 + 8 * 2 + 0]
mov rax, 0
mov r13, 0
lahf
mov r13, rax

lock dec dword [r15 + 8 * 3 + 0]
mov rax, 0
lahf
xor r13, rax

dec qword [r15 + 8 * 4 + 0]
mov rax, 0
mov r12, 0
lahf
mov r12, rax

lock dec qword [r15 + 8 * 5 + 0]
mov rax, 0
lahf
xor r12, rax

dec byte [r15 + 8 * 4 + 0]
mov rax, 0
mov r11, 0
lahf
mov r11, rax

lock dec byte [r15 + 8 * 5 + 0]
mov rax, 0
lahf
xor r11, rax


hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_01_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000000000ff",
    "RBX": "0x000000000000ffff",
    "RCX": "0x00000000ffffffff",
    "RDX": "0xffffffffffffffff",
    "R8" : "0x0000000000009400",
    "R9" : "0x0000000000009400",
    "R10": "0x0000000000009400",
    "R11": "0x0000000000009400"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0000000000000000
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax

xor rax, rax

; Insure that dec underflow works and sets correct flags
dec  byte [r15 + 8 * 0 + 0]
lahf
mov r8, rax

dec  word [r15 + 8 * 1 + 0]
lahf
mov r9, rax

dec dword [r15 + 8 * 2 + 0]
lahf
mov r10, rax

dec qword [r15 + 8 * 3 + 0]
lahf
mov r11, rax


mov rax, [r15 + 8 * 0]
mov rbx, [r15 + 8 * 1]
mov rcx, [r15 + 8 * 2]
mov rdx, [r15 + 8 * 3]


; Mask flags we don't care about
and r8, 0xd400
and r9, 0xd400
and r10, 0xd400
and r11, 0xd400

hlt

================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000
lea rsp, [r15 + 8 * 4]

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

lea rbx, [rel .call_tgt]
mov [r15 + 8 * 2], rbx

mov rax, 0
call qword [r15 + 8 * 2]
jmp .end

.call_tgt:
mov rax, [r15 + 8 * 0]
ret

; Couple things that could catch failure
mov rax, 0
jmp .end
mov rax, 0

.end:
hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748"
  }
}
%endif

mov r15, 0xe0000000
lea rsp, [r15 + 8 * 4]

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

lea rbx, [rel .call_tgt]
mov [r15 + 8 * 2], rbx

mov rax, 0
jmp qword [r15 + 8 * 2]
jmp .end

.call_tgt:
mov rax, [r15 + 8 * 0]
jmp .end

; Couple things that could catch failure
mov rax, 0
jmp .end
mov rax, 0

.end:
hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

mov rsp, 0xe000_1000
mov ax, cs
lea edi, [rel .success]

sub rsp, 16
mov [rsp], edi
mov [rsp+4], cs

mov rax, 0
jmp far [rsp]

hlt

.success:
mov rax, 1
hlt


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_05_03_o32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "0xe0000fe8",
    "RSP": "0xe0000ff0"
  }
}
%endif

mov rsp, 0xe000_1000
mov ax, cs
lea edi, [rel .success]

sub rsp, 16
mov [rsp], edi
mov [rsp+4], cs

mov rax, 0
call far dword [esp]

hlt

.success:
mov rax, 1
mov rbx, rsp
o32 retf


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_05_03_o32_imm.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "0xe0000fe8",
    "RSP": "0xe0002224"
  }
}
%endif

mov rsp, 0xe000_1000
mov ax, cs
lea edi, [rel .success]

sub rsp, 16
mov [rsp], edi
mov [rsp+4], cs

mov rax, 0
call far dword [esp]

hlt

.success:
mov rax, 1
mov rbx, rsp
o32 retf 0x1234


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_05_03_o64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "0xe0000fe0",
    "RSP": "0xe0000ff0"
  }
}
%endif

mov rsp, 0xe000_1000
mov ax, cs
lea edi, [rel .success]

sub rsp, 16
mov [rsp], edi
mov [rsp+4], cs

mov rax, 0
o64 call far [rsp]

hlt

.success:
mov rax, 1
mov rbx, rsp
o64 retf


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_05_03_o64_imm.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "0xe0000fe0",
    "RSP": "0xe00090f0"
  }
}
%endif

mov rsp, 0xe000_1000
mov ax, cs
lea edi, [rel .success]

sub rsp, 16
mov [rsp], edi
mov [rsp+4], cs

mov rax, 0
o64 call far [rsp]

hlt

.success:
mov rax, 1
mov rbx, rsp
o64 retf 0x8100


================================================
FILE: unittests/ASM/PrimaryGroup/5_FF_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5758000000006768",
    "RCX": "0xE0000016"
  }
}
%endif

mov r15, 0xe0000000
lea rsp, [r15 + 8 * 4]

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6768
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax

; Encoding doesn't exist in x86-64
; push dword [r15 + 8 * 1 + 0]
push qword [r15 + 8 * 0 + 0]
push  word [r15 + 8 * 1 + 0]

mov rax, [r15 + 8 * 3]
mov rbx, [r15 + 8 * 2]
mov rcx, rsp

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/6_C6_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464761"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax

mov byte [rdx + 8 * 0 + 0], 0x61

mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/PrimaryGroup/6_C7_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445466162",
    "RBX": "0x5152535461626364",
    "RCX": "0x0000000061626364",
    "RDX": "0xFFFFFFFFFFFFFF80"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov  word [rdx + 8 * 0 + 0], 0x6162
mov dword [rdx + 8 * 1 + 0], 0x61626364
mov qword [rdx + 8 * 2 + 0], 0x61626364
mov qword [rdx + 8 * 3 + 0], -128

mov rax, [rdx + 8 * 0]
mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]
mov rdx, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/REP/F3_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434465666768", "0x5152535455565758"],
    "XMM1":  ["0x0000000065666768", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445432748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162633265666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

; Moves 32bits to lower bits
; Doesn't effect upper bits
movss xmm0, xmm2

; Moves 32bits to the lower bits
; Zeroes the upper bits
movss xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REP/F3_10_1.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000042a63326", "0x0000000000000000"],
    "XMM1": ["0x0000000040ab4706", "0x0000000000000000"],
    "XMM2": ["0x0000000041e83ad2", "0x0000000000000000"],
    "XMM3": ["0x000000004221cdae", "0x0000000000000000"],
    "XMM4": ["0x0000000042b5494c", "0x0000000000000000"],
    "XMM5": ["0x0000000042b59a55", "0x0000000000000000"],
    "XMM6": ["0x00000000420ce913", "0x0000000000000000"],
    "XMM7": ["0x0000000042042015", "0x0000000000000000"],
    "XMM8": ["0x00000000423f635c", "0x0000000000000000"],
    "XMM9": ["0x0000000042c08f50", "0x0000000000000000"],
    "XMM10": ["0x0000000042b062c4", "0x0000000000000000"],
    "XMM11": ["0x00000000429b697f", "0x0000000000000000"],
    "XMM12": ["0x000000004176837b", "0x0000000000000000"],
    "XMM13": ["0x000000004253a13b", "0x0000000000000000"],
    "XMM14": ["0x0000000042623422", "0x0000000000000000"],
    "XMM15": ["0x00000000423ee7d8", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]

movss xmm0, [rdx + 16 * 0]
movss xmm1, [rdx + 16 * 1]
movss xmm2, [rdx + 16 * 2]
movss xmm3, [rdx + 16 * 3]
movss xmm4, [rdx + 16 * 4]
movss xmm5, [rdx + 16 * 5]
movss xmm6, [rdx + 16 * 6]
movss xmm7, [rdx + 16 * 7]
movss xmm8, [rdx + 16 * 8]
movss xmm9, [rdx + 16 * 9]
movss xmm10, [rdx + 16 * 10]
movss xmm11, [rdx + 16 * 11]
movss xmm12, [rdx + 16 * 12]
movss xmm13, [rdx + 16 * 13]
movss xmm14, [rdx + 16 * 14]
movss xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REP/F3_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000045464748", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]

; Moves lower 32bits to memory
movss [rdx + 8 * 2], xmm0

; Ensure 128bits weren't written
movapd xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REP/F3_11_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000042a63326", "0x0000000000000000"],
    "XMM1": ["0x0000000040ab4706", "0x0000000000000000"],
    "XMM2": ["0x0000000041e83ad2", "0x0000000000000000"],
    "XMM3": ["0x000000004221cdae", "0x0000000000000000"],
    "XMM4": ["0x0000000042b5494c", "0x0000000000000000"],
    "XMM5": ["0x0000000042b59a55", "0x0000000000000000"],
    "XMM6": ["0x00000000420ce913", "0x0000000000000000"],
    "XMM7": ["0x0000000042042015", "0x0000000000000000"],
    "XMM8": ["0x00000000423f635c", "0x0000000000000000"],
    "XMM9": ["0x0000000042c08f50", "0x0000000000000000"],
    "XMM10": ["0x0000000042b062c4", "0x0000000000000000"],
    "XMM11": ["0x00000000429b697f", "0x0000000000000000"],
    "XMM12": ["0x000000004176837b", "0x0000000000000000"],
    "XMM13": ["0x000000004253a13b", "0x0000000000000000"],
    "XMM14": ["0x0000000042623422", "0x0000000000000000"],
    "XMM15": ["0x00000000423ee7d8", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]

mov rdx, 0xe0000000
mov rax, 0
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax
mov [rdx + 8 * 8], rax
mov [rdx + 8 * 9], rax
mov [rdx + 8 * 10], rax
mov [rdx + 8 * 11], rax
mov [rdx + 8 * 12], rax
mov [rdx + 8 * 13], rax
mov [rdx + 8 * 14], rax
mov [rdx + 8 * 15], rax
mov [rdx + 8 * 16], rax
mov [rdx + 8 * 17], rax
mov [rdx + 8 * 18], rax
mov [rdx + 8 * 19], rax
mov [rdx + 8 * 20], rax
mov [rdx + 8 * 21], rax
mov [rdx + 8 * 22], rax
mov [rdx + 8 * 23], rax
mov [rdx + 8 * 24], rax
mov [rdx + 8 * 25], rax
mov [rdx + 8 * 26], rax
mov [rdx + 8 * 27], rax
mov [rdx + 8 * 28], rax
mov [rdx + 8 * 29], rax
mov [rdx + 8 * 30], rax

movss [rdx + 16 * 0], xmm0
movss [rdx + 16 * 1], xmm1
movss [rdx + 16 * 2], xmm2
movss [rdx + 16 * 3], xmm3
movss [rdx + 16 * 4], xmm4
movss [rdx + 16 * 5], xmm5
movss [rdx + 16 * 6], xmm6
movss [rdx + 16 * 7], xmm7
movss [rdx + 16 * 8], xmm8
movss [rdx + 16 * 9], xmm9
movss [rdx + 16 * 10], xmm10
movss [rdx + 16 * 11], xmm11
movss [rdx + 16 * 12], xmm12
movss [rdx + 16 * 13], xmm13
movss [rdx + 16 * 14], xmm14
movss [rdx + 16 * 15], xmm15

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]

mov rdx, 0xe0000000

movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REP/F3_12.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4546474845464748", "0x5556575855565758"],
    "XMM1":  ["0x4546474845464748", "0x5556575855565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162633265666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm2, [rdx + 8 * 0]
movapd xmm0, [rdx + 8 * 2]
movapd xmm1, [rdx + 8 * 2]

movsldup xmm0, xmm2
movsldup xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/REP/F3_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434441424344", "0x5152535451525354"],
    "XMM1":  ["0x4142434441424344", "0x5152535451525354"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162633265666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm2, [rdx + 8 * 0]
movapd xmm0, [rdx + 8 * 2]
movapd xmm1, [rdx + 8 * 2]

movshdup xmm0, xmm2
movshdup xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/REP/F3_2A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x414243443f800000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440000000", "0x5152535455565758"],
    "XMM2":  ["0x4142434440400000", "0x5152535455565758"],
    "XMM3":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM4":  ["0x41424344C0800000", "0x5152535455565758"],
    "XMM5":  ["0x41424344C0800000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x2
mov [rdx + 8 * 3], rax
mov rax, 0x3
mov [rdx + 8 * 4], rax
mov rax, 0x4
mov [rdx + 8 * 5], rax

; Stick something in the top 32bits to ensure correctness
mov rax, 0x7fc00000FFFFFFFC
mov [rdx + 8 * 6], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]

cvtsi2ss xmm0, rax
cvtsi2ss xmm1, ebx

cvtsi2ss xmm2, dword [rdx + 8 * 4]
cvtsi2ss xmm3, qword [rdx + 8 * 5]

mov rbx, [rdx + 8 * 6]

cvtsi2ss xmm4, ebx
cvtsi2ss xmm5, dword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REP/F3_2A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4054c664ce741f21", "0x40516053e2d6238e"],
    "XMM1": ["0x4044836dcef227d0", "0x402a1e1c58255b03"],
    "XMM2": ["0x401568e0ce5898b3", "0x4035fe425aee6320"],
    "XMM3": ["0x402359004e7ba882", "0x40154b7d41743e96"],
    "XMM4": ["0x403d075a4e4692f7", "0x4050a018bd66277c"],
    "XMM5": ["0x40334ec14efd75e2", "0x4056d7404ea4a8c1"],
    "XMM6": ["0x404439b5ce60c9da", "0x40497b136a400fbb"],
    "XMM7": ["0x4040528bce7a58f7", "0x4037f9ca18bd6627"],
    "XMM8": ["0x4056a9295e80ad52", "0x403839b866e43aa8"],
    "XMM9": ["0x4058bc1f5e80b178", "0x4056cde5c91d14e4"],
    "XMM10": ["0x4056b34a5e80ad67", "0x4058defb00bcbe62"],
    "XMM11": ["0x40503e3c5e80a07c", "0x4052997f0ed3d85a"],
    "XMM12": ["0x40419d225e80833a", "0x40395a6bf8769ec3"],
    "XMM13": ["0x40177e285e802efc", "0x40568cc5974e65bf"],
    "XMM14": ["0x404084025e808108", "0x404a03c74fb549f9"],
    "XMM15": ["0x404d31595e809a63", "0x402459c23b7952d2"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvtsi2ss xmm0, dword [rdx + 16 * 0]
cvtsi2ss xmm1, dword [rdx + 16 * 1]
cvtsi2ss xmm2, dword [rdx + 16 * 2]
cvtsi2ss xmm3, dword [rdx + 16 * 3]
cvtsi2ss xmm4, dword [rdx + 16 * 4]
cvtsi2ss xmm5, dword [rdx + 16 * 5]
cvtsi2ss xmm6, dword [rdx + 16 * 6]
cvtsi2ss xmm7, dword [rdx + 16 * 7]
cvtsi2ss xmm8,  qword [rdx + 16 * 8]
cvtsi2ss xmm9,  qword [rdx + 16 * 9]
cvtsi2ss xmm10, qword [rdx + 16 * 10]
cvtsi2ss xmm11, qword [rdx + 16 * 11]
cvtsi2ss xmm12, qword [rdx + 16 * 12]
cvtsi2ss xmm13, qword [rdx + 16 * 13]
cvtsi2ss xmm14, qword [rdx + 16 * 14]
cvtsi2ss xmm15, qword [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
.data2:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REP/F3_2A_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x414243444F800000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movapd xmm0, [rdx + 8 * 0]

; Ensures that a large "negative" 32bit value converts correctly in cvtsi2ss when treated as a 64bit value
; Upper bits being zero
mov rax, 0xFFFFFFFF
cvtsi2ss xmm0, rax

hlt


================================================
FILE: unittests/ASM/REP/F3_2B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x0000000045464748", "0x0"]
  },
  "HostFeatures": ["SSE4A"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movntss [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REP/F3_2C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4",
    "RBP": "0xFFFFFFFE",
    "RSI": "0xFFFFFFFFFFFFFFFC"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x414243443f800000
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

mov rax, 0x4142434440000000
mov [r15 + 8 * 2], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 3], rax

mov rax, 0x4142434440400000
mov [r15 + 8 * 4], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 5], rax

mov rax, 0x4142434440800000
mov [r15 + 8 * 6], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 7], rax

mov rax, 0x41424344C0000000
mov [r15 + 8 * 8], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 9], rax

mov rax, 0x41424344C0800000
mov [r15 + 8 * 10], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 11], rax

movapd xmm0, [r15 + 8 * 0]
movapd xmm1, [r15 + 8 * 2]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1

cvttss2si eax, xmm0
cvttss2si rbx, xmm1

cvttss2si ebp, [r15 + 8 * 8]
cvttss2si rsi, [r15 + 8 * 10]

cvttss2si ecx, [r15 + 8 * 4]
cvttss2si rdx, [r15 + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REP/F3_2D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF",
    "RCX": "0xFFFFFFFE",
    "RDX": "0xFFFFFFFFFFFFFFFC"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x414243443f800000
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

mov rax, 0x41424344bf800000
mov [r15 + 8 * 2], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 3], rax

mov rax, 0x41424344C0000000
mov [r15 + 8 * 4], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 5], rax

mov rax, 0x41424344C0800000
mov [r15 + 8 * 6], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 7], rax

movapd xmm0, [r15 + 8 * 0]
movapd xmm1, [r15 + 8 * 2]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1

cvtss2si eax, xmm0
cvtss2si rbx, xmm1

cvtss2si ecx, [r15 + 8 * 4]
cvtss2si rdx, [r15 + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REP/F3_51.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x414243443f800000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440000000", "0x5152535455565758"],
    "XMM2":  ["0x4142434440400000", "0x5152535455565758"],
    "XMM3":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM4":  ["0x414243443f800000", "0x5152535455565758"],
    "XMM5":  ["0x4142434440000000", "0x5152535455565758"],
    "XMM6":  ["0x4142434440400000", "0x5152535455565758"],
    "XMM7":  ["0x4142434440800000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

sqrtss xmm0, xmm0
sqrtss xmm1, xmm1
sqrtss xmm2, xmm2
sqrtss xmm3, xmm3

sqrtss xmm4, [rdx + 8 * 0]
sqrtss xmm5, [rdx + 8 * 2]
sqrtss xmm6, [rdx + 8 * 4]
sqrtss xmm7, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REP/F3_52.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RCX": "1",
    "RDX": "1",
    "XMM0":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM1":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM2":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM3":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM4":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM5":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM6":  ["0x4142434400000000", "0x5152535455565758"],
    "XMM7":  ["0x4142434400000000", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif


section .text
global _start

_start:
movapd xmm0, [rel arg1]
movapd xmm1, [rel arg2]
movapd xmm2, [rel arg3]
movapd xmm3, [rel arg4]
movapd xmm4, [rel arg5]
movapd xmm5, [rel arg5]
movapd xmm6, [rel arg5]
movapd xmm7, [rel arg5]

rsqrtss xmm0, xmm0
rsqrtss xmm1, xmm1
rsqrtss xmm2, xmm2
rsqrtss xmm3, xmm3

rsqrtss xmm4, [rel arg1]
rsqrtss xmm5, [rel arg2]
rsqrtss xmm6, [rel arg3]
rsqrtss xmm7, [rel arg4]


; Check precision of the results
; while ensuring we didn't destroy the rest of the register.
%include "checkprecision.mac"

;; We will be storing the low 32bits to memory, then zeroing them.
;; We'll then check precision using checkprecision.mac.
; Zero rsi:
xor esi, esi

pextrd [rel result1], xmm0, 0
pinsrd xmm0, esi, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov rbx, rax

pextrd [rel result2], xmm1, 0
pinsrd xmm1, esi, 0
check_relerr rel eresult2, rel result2, rel tolerance
mov rcx, rax

pextrd [rel result3], xmm2, 0
pinsrd xmm2, esi, 0
check_relerr rel eresult3, rel result3, rel tolerance
mov rdx, rax

pextrd [rel result4], xmm3, 0
pinsrd xmm3, esi, 0
check_relerr rel eresult4, rel result4, rel tolerance

; no need to test the other results which are the same,
; we can just zero them.
pinsrd xmm4, esi, 0
pinsrd xmm5, esi, 0
pinsrd xmm6, esi, 0
pinsrd xmm7, esi, 0
hlt

align 4096
result1 dd 0
result2 dd 0
result3 dd 0
result4 dd 0

align 16

arg1:
dq 0x414243443f800000 ; 1.0
dq 0x5152535455565758

arg2:
dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758

arg3:
dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758

arg4:
dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758

arg5:
dq 0x4142434441c80000 ; 25.0
dq 0x5152535455565758

eresult1:
dd 0x3f800000 ; 1.0

eresult2:
dd 0x3f000000 ; 0.5

eresult3:
dd 0x3eaaaaab ; 1/3 = 0.(3)

eresult4:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/REP/F3_52_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "XMM0":  ["0x414243447f800000", "0x5152535455565758"],
    "XMM1":  ["0x41424344ff800000", "0x5152535455565758"],
    "XMM2":  ["0x4142434400000000", "0x5152535455565758"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

section .text
global _start

_start:
movapd xmm0, [rel arg1]
movapd xmm1, [rel arg2]
movapd xmm2, [rel arg3]

rsqrtss xmm0, xmm0
rsqrtss xmm1, xmm1
rsqrtss xmm2, xmm2

; The last comparison returns nan so we need to check the 
; result manually
ucomiss xmm2, xmm2
setp al ; sets al to 1 if xmm2 is nan
xor esi, esi
pinsrd xmm2, esi, 0 ; inserts 0 in place of nan to test other bits
hlt

section .data
align 32
arg1:
dq 0x4142434400000000 ; 0.0, result is inf
dq 0x5152535455565758

arg2:
dq 0x4142434480000000 ; -0.0, result is -inf
dq 0x5152535455565758

arg3:
dq 0x41424344c0800000 ; -4.0, result is nan
dq 0x5152535455565758


================================================
FILE: unittests/ASM/REP/F3_53.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "XMM0":  ["0x3f80000000000000", "0x3f8000003f800000"],
    "XMM1":  ["0x4080000000000000", "0x4080000040800000"],
    "XMM2":  ["0xdeadbeef7f800000", "0xbadc0ffebadc0ffe"]
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

; Check precision of the results
; while ensuring we didn't destroy the rest of the register.
%include "checkprecision.mac"

section .text
global _start

_start:
movapd xmm0, [rel arg1]
movapd xmm1, [rel arg2]
movapd xmm2, [rel arg3]

rcpss xmm0, xmm0
rcpss xmm1, [rel arg2]
rcpss xmm2, xmm2

xor esi, esi

pextrd [rel result1], xmm0, 0
pinsrd xmm0, esi, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov rbx, rax

pextrd [rel result2], xmm1, 0
pinsrd xmm1, esi, 0
check_relerr rel eresult2, rel result2, rel tolerance

hlt

align 4096
result1 dd 0
result2 dd 0

align 16

arg1:
dq 0x3f8000003f800000 ; 1.0
dq 0x3f8000003f800000

arg2:
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000

arg3:
dq 0xdeadbeef00000000 ; 0.0
dq 0xbadc0ffebadc0ffe

eresult1:
dd 0x3f800000 ; 1.0

eresult2:
dd 0x3e800000 ; 0.5

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/REP/F3_58.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434440a00000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM2":  ["0x4142434441c80000", "0x5152535455565758"],
    "XMM3":  ["0x4142434441800000", "0x5152535455565758"],
    "XMM4":  ["0x4142434441d00000", "0x5152535455565758"],
    "XMM5":  ["0x4142434441e80000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

addss xmm0, xmm1
addss xmm2, xmm3

addss xmm4, [rdx + 8 * 0]
addss xmm5, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REP/F3_59.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM2":  ["0x4142434443100000", "0x5152535455565758"],
    "XMM3":  ["0x4142434441800000", "0x5152535455565758"],
    "XMM4":  ["0x4142434441c80000", "0x5152535455565758"],
    "XMM5":  ["0x4142434442c80000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

mulss xmm0, xmm1 ; 1.0 <op> 4.0
mulss xmm2, xmm3 ; 9.0 <op> 16.0

mulss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
mulss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_5A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM2":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4142434441800000", "0x5152535455565758"],
    "XMM4":  ["0x3FF0000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4010000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000
mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

cvtss2sd xmm0, xmm1 ; 1.0 <op> 4.0
cvtss2sd xmm2, xmm3 ; 9.0 <op> 16.0

cvtss2sd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
cvtss2sd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_5A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4054c664c0000000", "0x4150f0e342241b6c"],
    "XMM1": ["0x401568e0c0000000", "0x40aa5bea411ac802"],
    "XMM2": ["0x403d075a40000000", "0x42b6ba02419a760c"],
    "XMM3": ["0x404439b5c0000000", "0x41bfce514202945e"],
    "XMM4": ["0x4056a92980000000", "0x42b66f2e42c5e0f9"],
    "XMM5": ["0x4056b34aa0000000", "0x4294cbf84281f1e5"],
    "XMM6": ["0x40419d2260000000", "0x42b4662d40bbf141"],
    "XMM7": ["0x40408402a0000000", "0x4122ce1242698acb"],
    "XMM8": ["0x4047ec6b80000000", "0x4283a06842b40f2e"],
    "XMM9": ["0x405811ea00000000", "0x42be038e41ccb7ba"],
    "XMM10": ["0x40560c5880000000", "0x41245b0e42461aa5"],
    "XMM11": ["0x40536d2fe0000000", "0x42252cf2411ce3bd"],
    "XMM12": ["0x402ed06f60000000", "0x425e2c0d4119c75a"],
    "XMM13": ["0x404a742760000000", "0x4041495242910ec1"],
    "XMM14": ["0x404c468440000000", "0x42b17c64427763b2"],
    "XMM15": ["0x4047dcfb00000000", "0x42c16d124206d293"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvtss2sd xmm0, [rdx + 16 * 0]
cvtss2sd xmm1, [rdx + 16 * 1]
cvtss2sd xmm2, [rdx + 16 * 2]
cvtss2sd xmm3, [rdx + 16 * 3]
cvtss2sd xmm4, [rdx + 16 * 4]
cvtss2sd xmm5, [rdx + 16 * 5]
cvtss2sd xmm6, [rdx + 16 * 6]
cvtss2sd xmm7, [rdx + 16 * 7]
cvtss2sd xmm8, [rdx + 16 * 8]
cvtss2sd xmm9, [rdx + 16 * 9]
cvtss2sd xmm10, [rdx + 16 * 10]
cvtss2sd xmm11, [rdx + 16 * 11]
cvtss2sd xmm12, [rdx + 16 * 12]
cvtss2sd xmm13, [rdx + 16 * 13]
cvtss2sd xmm14, [rdx + 16 * 14]
cvtss2sd xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REP/F3_5B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000100000001", "0x0000000200000002"],
    "XMM1":  ["0x0000000400000004", "0x0000000800000008"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3fc000003f800000 ; [1.5, 1.0]
mov [rdx + 8 * 0], rax
mov rax, 0x4039999a40000000 ; [2.9, 2.0]
mov [rdx + 8 * 1], rax

mov rax, 0x4083333340800000 ; [4.1, 4.0]
mov [rdx + 8 * 2], rax
mov rax, 0x4108000041000000 ; [8.5, 8.0]
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx + 8 * 4]
movapd xmm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvttps2dq xmm0, xmm2
cvttps2dq xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REP/F3_5B_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000004500000053", "0x0000000d00000029"],
    "XMM1":  ["0x0000001500000005", "0x0000000500000009"],
    "XMM2":  ["0x000000420000001d", "0x0000005b00000013"],
    "XMM3":  ["0x0000003200000028", "0x0000001700000020"],
    "XMM4":  ["0x000000180000005a", "0x0000005b00000062"],
    "XMM5":  ["0x000000630000005a", "0x0000004a00000040"],
    "XMM6":  ["0x0000001900000023", "0x0000005a00000005"],
    "XMM7":  ["0x0000003400000021", "0x0000000a0000003a"],
    "XMM8":  ["0x000000540000002f", "0x000000410000005a"],
    "XMM9":  ["0x0000000600000060", "0x0000005f00000019"],
    "XMM10": ["0x0000002500000058", "0x0000000a00000031"],
    "XMM11": ["0x000000140000004d", "0x0000002900000009"],
    "XMM12": ["0x000000390000000f", "0x0000003700000009"],
    "XMM13": ["0x0000000400000034", "0x0000000300000048"],
    "XMM14": ["0x0000004700000038", "0x000000580000003d"],
    "XMM15": ["0x000000180000002f", "0x0000006000000021"]
  }
}
%endif

lea rdx, [rel .data]

cvttps2dq xmm0, [rdx + 16 * 0]
cvttps2dq xmm1, [rdx + 16 * 1]
cvttps2dq xmm2, [rdx + 16 * 2]
cvttps2dq xmm3, [rdx + 16 * 3]
cvttps2dq xmm4, [rdx + 16 * 4]
cvttps2dq xmm5, [rdx + 16 * 5]
cvttps2dq xmm6, [rdx + 16 * 6]
cvttps2dq xmm7, [rdx + 16 * 7]
cvttps2dq xmm8, [rdx + 16 * 8]
cvttps2dq xmm9, [rdx + 16 * 9]
cvttps2dq xmm10, [rdx + 16 * 10]
cvttps2dq xmm11, [rdx + 16 * 11]
cvttps2dq xmm12, [rdx + 16 * 12]
cvttps2dq xmm13, [rdx + 16 * 13]
cvttps2dq xmm14, [rdx + 16 * 14]
cvttps2dq xmm15, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REP/F3_5C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x41424344c0400000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM2":  ["0x41424344c0e00000", "0x5152535455565758"],
    "XMM3":  ["0x4142434441800000", "0x5152535455565758"],
    "XMM4":  ["0x4142434441c00000", "0x5152535455565758"],
    "XMM5":  ["0x4142434441a80000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

subss xmm0, xmm1 ; 1.0 <op> 4.0
subss xmm2, xmm3 ; 9.0 <op> 16.0

subss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
subss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_5D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x515253543f800000", "0x5152535455565758"],
    "XMM1":  ["0x5152535440800000", "0x5152535455565758"],
    "XMM2":  ["0x5152535441100000", "0x5152535455565758"],
    "XMM3":  ["0x5152535441800000", "0x5152535455565758"],
    "XMM4":  ["0x515253543f800000", "0x5152535455565758"],
    "XMM5":  ["0x5152535440800000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x515253543f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x5152535440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x5152535441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x5152535441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x5152535441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

minss xmm0, xmm1 ; 1.0 <op> 4.0
minss xmm2, xmm3 ; 9.0 <op> 16.0

minss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
minss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_5E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x414243443e800000", "0x5152535455565758"],
    "XMM1":  ["0x4142434440800000", "0x5152535455565758"],
    "XMM2":  ["0x414243443f100000", "0x5152535455565758"],
    "XMM3":  ["0x4142434441800000", "0x5152535455565758"],
    "XMM4":  ["0x4142434441c80000", "0x5152535455565758"],
    "XMM5":  ["0x4142434440c80000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243443f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4142434441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4142434441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4142434441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

divss xmm0, xmm1 ; 1.0 <op> 4.0
divss xmm2, xmm3 ; 9.0 <op> 16.0

divss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
divss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_5F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x5152535440800000", "0x5152535455565758"],
    "XMM1":  ["0x5152535440800000", "0x5152535455565758"],
    "XMM2":  ["0x5152535441800000", "0x5152535455565758"],
    "XMM3":  ["0x5152535441800000", "0x5152535455565758"],
    "XMM4":  ["0x5152535441c80000", "0x5152535455565758"],
    "XMM5":  ["0x5152535441c80000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x515253543f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x5152535440800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x5152535441100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x5152535441800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x5152535441c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

maxss xmm0, xmm1 ; 1.0 <op> 4.0
maxss xmm2, xmm3 ; 9.0 <op> 16.0

maxss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
maxss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REP/F3_6F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x5152535455565758", "0x6162636465666768"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

movdqu xmm0, [rdx + 8 * 0]
movdqu xmm1, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/REP/F3_70.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5758575857585758"],
    "XMM2": ["0x6162636465666768", "0x7172717271727172"],
    "XMM3": ["0x4142434445464748", "0x5556555657585758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
pshufhw xmm1, xmm0, 0x0
pshufhw xmm2, [rdx + 8 * 2], 0xFF

; Top bit different from low bits
pshufhw xmm3, [rdx], 80

hlt


================================================
FILE: unittests/ASM/REP/F3_7E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x0"],
    "XMM1": ["0x4142434445464748", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm2, [rdx + 8 * 2]

movq xmm0, xmm2
movq xmm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/REP/F3_7F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x5152535455565758", "0x6162636465666768"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758"],
    "XMM3": ["0x0", "0x0"],
    "XMM4": ["0x5152535455565758", "0x6162636465666768"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov rax, 0
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

movdqu xmm0, [rdx + 8 * 0]
movdqu xmm1, [rdx + 8 * 1]

movdqu [rdx + 8 * 3], xmm0

movdqu xmm2, [rdx + 8 * 3]
; Ensure it didn't write past where it should
movdqu xmm3, [rdx + 8 * 5]

movdqu xmm4, xmm1

hlt


================================================
FILE: unittests/ASM/REP/F3_B8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x6",
    "RBX": "0x10",
    "RCX": "0x1D",
    "RDX": "0x0",
    "RSI": "0x20",
    "R14": "0x10",
    "R13": "0x40"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov rax, 0
mov [rdx + 8 * 3], rax

popcnt ax, word [rdx + 8 * 0]
popcnt ebx, dword [rdx + 8 * 1]
popcnt rcx, qword [rdx + 8 * 2]

mov r15, 0
popcnt rdx, r15

mov r15, 0xFFFFFFFFFFFFFFFF
popcnt esi, r15d
popcnt r14w, r15w
popcnt r13, r15

hlt


================================================
FILE: unittests/ASM/REP/F3_BC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3",
    "RBX": "0x3",
    "RCX": "0x3",
    "RDX": "0x40",
    "RSI": "0x0",
    "R14": "0x0",
    "R13": "0x0",
    "R12": "0x20",
    "R11": "0x10"
  },
  "HostFeatures": ["BMI1"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

mov rax, 0
mov [rdx + 8 * 3], rax

tzcnt ax, word [rdx + 8 * 0]
tzcnt ebx, dword [rdx + 8 * 1]
tzcnt rcx, qword [rdx + 8 * 2]

mov r15, 0
mov r12, 0
mov r11, 0
tzcnt rdx, r15
tzcnt r12d, r15d
tzcnt r11w, r15w

mov r15, 0xFFFFFFFFFFFFFFFF
tzcnt esi, r15d
tzcnt r14w, r15w
tzcnt r13, r15

hlt


================================================
FILE: unittests/ASM/REP/F3_BD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RCX": "0x10",
    "RDX": "0x20",
    "RSI": "0x40",
    "RDI": "0",
    "RBP": "0",
    "RSP": "0",
    "R8":  "0x8",
    "R9":  "0x10",
    "R10": "0x20",
    "R15": "0x2A540"
  }
}
%endif

; Uses AX and BX and stores result in r15
; CF:ZF
%macro zfcfmerge 0
  lahf

  ; Shift CF to zero
  shr ax, 8

  ; Move to a temp
  mov bx, ax
  and rbx, 1

  shl r15, 1
  or r15, rbx

  shl r15, 1

  ; Move to a temp
  mov bx, ax

  ; Extract ZF
  shr bx, 6
  and rbx, 1

  ; Insert ZF
  or r15, rbx
%endmacro

mov rax, 0x80000001
cpuid

shr ecx, 5
and ecx, 1
cmp ecx, 1
je .continue

; We don't support the instruction. Leave
mov rax, 0xDEADBEEF41414141
hlt

.continue:

mov rax, 0
mov rbx, 0
mov r15, 0

; Test zeroes
mov rcx, 0
lzcnt cx, cx
zfcfmerge

mov rdx, 0
lzcnt edx, edx
zfcfmerge

mov rsi, 0
lzcnt rsi, rsi
zfcfmerge

; Test highest bit set to 1
mov rdi, 0x8000
lzcnt di, di
zfcfmerge

mov rbp, 0x80000000
lzcnt ebp, ebp
zfcfmerge

mov rsp, 0x8000000000000000
lzcnt rsp, rsp
zfcfmerge

; Test bit in the middle of the range
mov r8, 0x0080
lzcnt r8w, r8w
zfcfmerge

mov r9, 0x00008000
lzcnt r9d, r9d
zfcfmerge

mov r10, 0x00000080000000
lzcnt r10, r10
zfcfmerge

mov rax, 0

hlt


================================================
FILE: unittests/ASM/REP/F3_BD_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x40",
    "RBX": "0x07",
    "RCX": "0x0F",
    "RDX": "0x17",
    "RSI": "0x1F",
    "RDI": "0x27",
    "RBP": "0x2F",
    "RSP": "0x37",
    "R8":  "0x3F",
    "R9":  "0x00",
    "R10": "0x08",
    "R11": "0x10",
    "R12": "0x18",
    "R13": "0x20",
    "R14": "0x28",
    "R15": "0x38"
  }
}
%endif

lea r15, [rel .data]

; We only care about results here
lzcnt rax, qword [r15 + 8 * 0]
lzcnt rbx, qword [r15 + 8 * 1]
lzcnt rcx, qword [r15 + 8 * 2]
lzcnt rdx, qword [r15 + 8 * 3]
lzcnt rsi, qword [r15 + 8 * 4]
lzcnt rdi, qword [r15 + 8 * 5]
lzcnt rbp, qword [r15 + 8 * 6]
lzcnt rsp, qword [r15 + 8 * 7]
lzcnt r8,  qword [r15 + 8 * 8]
lzcnt r9,  qword [r15 + 8 * 9]
lzcnt r10, qword [r15 + 8 * 10]
lzcnt r11, qword [r15 + 8 * 11]
lzcnt r12, qword [r15 + 8 * 12]
lzcnt r13, qword [r15 + 8 * 13]
lzcnt r14, qword [r15 + 8 * 14]
lzcnt r15, qword [r15 + 8 * 15]

hlt

.data:
dq 0x0000000000000000
dq 0x01FFFFFFFFFFFFFF
dq 0x0001FFFFFFFFFFFF
dq 0x000001FFFFFFFFFF
dq 0x00000001FFFFFFFF
dq 0x0000000001FFFFFF
dq 0x000000000001FFFF
dq 0x00000000000001FF
dq 0x0000000000000001
dq 0x8000000000000000
dq 0x0080000000000000
dq 0x0000800000000000
dq 0x0000008000000000
dq 0x0000000080000000
dq 0x0000000000800000


================================================
FILE: unittests/ASM/REP/F3_BD_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x20",
    "RBX": "0x07",
    "RCX": "0x0F",
    "RDX": "0x17",
    "RSI": "0x1F",
    "RDI": "0x00",
    "RBP": "0x08",
    "RSP": "0x10",
    "R8":  "0x1F",
    "R9":  "0x00",
    "R10": "0x06",
    "R11": "0x0E",
    "R12": "0x16",
    "R13": "0x1E",
    "R14": "0x1D",
    "R15": "0x18"
  }
}
%endif

lea r15, [rel .data]

; We only care about results here
lzcnt eax,  dword [r15 + 4 * 0]
lzcnt ebx,  dword [r15 + 4 * 1]
lzcnt ecx,  dword [r15 + 4 * 2]
lzcnt edx,  dword [r15 + 4 * 3]
lzcnt esi,  dword [r15 + 4 * 4]
lzcnt edi,  dword [r15 + 4 * 5]
lzcnt ebp,  dword [r15 + 4 * 6]
lzcnt esp,  dword [r15 + 4 * 7]
lzcnt r8d,  dword [r15 + 4 * 4]
lzcnt r9d,  dword [r15 + 4 * 9]
lzcnt r10d, dword [r15 + 4 * 10]
lzcnt r11d, dword [r15 + 4 * 11]
lzcnt r12d, dword [r15 + 4 * 12]
lzcnt r13d, dword [r15 + 4 * 13]
lzcnt r14d, dword [r15 + 4 * 14]
lzcnt r15d, dword [r15 + 4 * 15]

hlt

.data:
dd 0x00000000
dd 0x01FFFFFF
dd 0x0001FFFF
dd 0x000001FF
dd 0x00000001
dd 0x80000000
dd 0x00800000
dd 0x00008000
dd 0x00000080
dd 0xFFFFFFFF
dd 0x02000000
dd 0x00020000
dd 0x00000200
dd 0x00000002
dd 0x00000004


================================================
FILE: unittests/ASM/REP/F3_BD_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF0010",
    "RBX": "0xFFFFFFFFFFFF0003",
    "RCX": "0xFFFFFFFFFFFF0007",
    "RDX": "0xFFFFFFFFFFFF000B",
    "RSI": "0xFFFFFFFFFFFF000F",
    "RDI": "0xFFFFFFFFFFFF0000",
    "RBP": "0xFFFFFFFFFFFF0004",
    "RSP": "0xFFFFFFFFFFFF0008",
    "R8":  "0xFFFFFFFFFFFF000F",
    "R9":  "0xFFFFFFFFFFFF0000",
    "R10": "0xFFFFFFFFFFFF0002",
    "R11": "0xFFFFFFFFFFFF0006",
    "R12": "0xFFFFFFFFFFFF000A",
    "R13": "0xFFFFFFFFFFFF000E",
    "R14": "0xFFFFFFFFFFFF000D",
    "R15": "0x0000000000000008"
  }
}
%endif

lea r15, [rel .data]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rsi, -1
mov rdi, -1
mov rbp, -1
mov rsp, -1
mov r8,  -1
mov r9,  -1
mov r10, -1
mov r11, -1
mov r12, -1
mov r13, -1
mov r14, -1

; We only care about results here
lzcnt ax,  word [r15 + 2 * 0]
lzcnt bx,  word [r15 + 2 * 1]
lzcnt cx,  word [r15 + 2 * 2]
lzcnt dx,  word [r15 + 2 * 3]
lzcnt si,  word [r15 + 2 * 4]
lzcnt di,  word [r15 + 2 * 5]
lzcnt bp,  word [r15 + 2 * 6]
lzcnt sp,  word [r15 + 2 * 7]
lzcnt r8w,  word [r15 + 2 * 4]
lzcnt r9w,  word [r15 + 2 * 9]
lzcnt r10w, word [r15 + 2 * 10]
lzcnt r11w, word [r15 + 2 * 11]
lzcnt r12w, word [r15 + 2 * 12]
lzcnt r13w, word [r15 + 2 * 13]
lzcnt r14w, word [r15 + 2 * 14]
lzcnt r15w, word [r15 + 2 * 15]
movzx r15d, r15w

hlt

.data:
dw 0x0000
dw 0x1FFF
dw 0x01FF
dw 0x001F
dw 0x0001
dw 0x8000
dw 0x0800
dw 0x0080
dw 0x0008
dw 0xFFFF
dw 0x2000
dw 0x0200
dw 0x0020
dw 0x0002
dw 0x0004


================================================
FILE: unittests/ASM/REP/F3_C2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x51525354FFFFFFFF", "0x5152535440000000"],
    "XMM1": ["0x5152535400000000", "0x5152535440000000"],
    "XMM2": ["0x51525354FFFFFFFF", "0x5152535440000000"],
    "XMM3": ["0x0000000000000000", "0x7FC000007FC00000"],
    "XMM4": ["0x5152535400000000", "0x5152535440000000"],
    "XMM5": ["0x51525354FFFFFFFF", "0x5152535440000000"],
    "XMM6": ["0x5152535400000000", "0x5152535440000000"],
    "XMM7": ["0x00000000FFFFFFFF", "0x7FC000007FC00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x515253543f800000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535440000000
mov [rdx + 8 * 1], rax

mov rax, 0x515253543f800000
mov [rdx + 8 * 2], rax
mov rax, 0x5152535440800000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

cmpss xmm0, xmm8, 0x00 ; EQ
cmpss xmm1, xmm8, 0x01 ; LT
cmpss xmm2, xmm8, 0x02 ; LTE
cmpss xmm4, xmm8, 0x04 ; NEQ
cmpss xmm5, xmm8, 0x05 ; NLT
cmpss xmm6, xmm8, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FC000007FC00000
mov [rdx + 8 * 1], rax

mov rax, 0x7FC0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x7FC0000000000000
mov [rdx + 8 * 3], rax

movapd xmm3, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
cmpss xmm3, xmm8, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
cmpss xmm7, xmm8, 0x07 ; Ordered

hlt


================================================
FILE: unittests/ASM/REP/F3_D6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x4142434445464748",
    "XMM0": ["0x4142434445464748", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 2]
movq mm0, [rdx + 8 * 0]

movq2dq xmm0, mm0

hlt


================================================
FILE: unittests/ASM/REP/F3_E6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3ff0000000000000", "0x4000000000000000"],
    "XMM1":  ["0x4008000000000000", "0x4010000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0000000200000001
mov [rdx + 8 * 2], rax
mov rax, 0x0000000400000003
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]

movapd xmm2, [rdx + 8 * 2]

cvtdq2pd xmm0, xmm2
cvtdq2pd xmm1, [rdx + 8 * 3]

hlt


================================================
FILE: unittests/ASM/REP/F3_E6_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x41613f7980000000", "0x41d532ec06000000"],
    "XMM1": ["0x41c15e6655800000", "0x41df24b752400000"],
    "XMM2": ["0x41d0020642c00000", "0x41cf9b68e1800000"],
    "XMM3": ["0x41b0aa9a73000000", "0x41ba54fc2c000000"],
    "XMM4": ["0x41df36c61d000000", "0x41dc152c8e400000"],
    "XMM5": ["0x41dc3dd2aa800000", "0x41d8ed57a9000000"],
    "XMM6": ["0x41c9c74343800000", "0x41cca045d7000000"],
    "XMM7": ["0x418c574f38000000", "0x41dbde8cbb000000"],
    "XMM8": ["0x4199041730000000", "0x41c1ce3b68800000"],
    "XMM9": ["0x41d240f8cf800000", "0x41b4884abf000000"],
    "XMM10": ["0x41c2ec3ac2800000", "0x41cc816bf5800000"],
    "XMM11": ["0x41d8b7ed0bc00000", "0x41d0c65964800000"],
    "XMM12": ["0x41bd59e453000000", "0x41d8e0dce1400000"],
    "XMM13": ["0x41b90c1b65000000", "0x41d6b8ee29c00000"],
    "XMM14": ["0x41d004d281c00000", "0x41d8ad9d4f800000"],
    "XMM15": ["0x41a20f72c8000000", "0x41d60e777cc00000"]
  }
}
%endif

lea rdx, [rel .data]

cvtdq2pd xmm0, [rdx + 16 * 0]
cvtdq2pd xmm1, [rdx + 16 * 1]
cvtdq2pd xmm2, [rdx + 16 * 2]
cvtdq2pd xmm3, [rdx + 16 * 3]
cvtdq2pd xmm4, [rdx + 16 * 4]
cvtdq2pd xmm5, [rdx + 16 * 5]
cvtdq2pd xmm6, [rdx + 16 * 6]
cvtdq2pd xmm7, [rdx + 16 * 7]
cvtdq2pd xmm8, [rdx + 16 * 8]
cvtdq2pd xmm9, [rdx + 16 * 9]
cvtdq2pd xmm10, [rdx + 16 * 10]
cvtdq2pd xmm11, [rdx + 16 * 11]
cvtdq2pd xmm12, [rdx + 16 * 12]
cvtdq2pd xmm13, [rdx + 16 * 13]
cvtdq2pd xmm14, [rdx + 16 * 14]
cvtdq2pd xmm15, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311


================================================
FILE: unittests/ASM/REPNE/F2_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x5152535455565758"],
    "XMM1":  ["0x6162636465666768", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]


; Moves 64bits to lower bits
; Doesn't effect upper 64bits
movsd xmm0, xmm2

; Moves 64bits to the lower bits
; Zeroes the upper 64bits
movsd xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]

; Moves lower 64bits to memory
movsd [rdx + 8 * 2], xmm0

; Ensure 128bits weren't written
movapd xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_12.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x6162636465666768", "0x6162636465666768"],
    "XMM1":  ["0x6162636465666768", "0x6162636465666768"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 2]

movddup xmm0, xmm2
movddup xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_2A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3ff0000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4000000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4008000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4010000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x2
mov [rdx + 8 * 3], rax
mov rax, 0x3
mov [rdx + 8 * 4], rax
mov rax, 0x4
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]

mov rax, [rdx + 8 * 2]
mov rbx, [rdx + 8 * 3]

cvtsi2sd xmm0, rax
cvtsi2sd xmm1, ebx

cvtsi2sd xmm2, dword [rdx + 8 * 4]
cvtsi2sd xmm3, qword [rdx + 8 * 5]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_2A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc1ce83e425800000", "0x40516053e2d6238e"],
    "XMM1": ["0xc1de44fa05000000", "0x402a1e1c58255b03"],
    "XMM2": ["0xc1cb13165d000000", "0x4035fe425aee6320"],
    "XMM3": ["0x41cf75104d800000", "0x40154b7d41743e96"],
    "XMM4": ["0x41c8d25edd000000", "0x4050a018bd66277c"],
    "XMM5": ["0x41dfaebc40800000", "0x4056d7404ea4a8c1"],
    "XMM6": ["0xc1cc193b3a800000", "0x40497b136a400fbb"],
    "XMM7": ["0xc1cf4b1ee2800000", "0x4037f9ca18bd6627"],
    "XMM8": ["0x43d015aa4a6223e2", "0x403839b866e43aa8"],
    "XMM9": ["0x43d0162f07c84b5e", "0x4056cde5c91d14e4"],
    "XMM10": ["0x43d015acd2a84381", "0x4058defb00bcbe62"],
    "XMM11": ["0x43d0140f8f27bb30", "0x4052997f0ed3d85a"],
    "XMM12": ["0x43d010674894c448", "0x40395a6bf8769ec3"],
    "XMM13": ["0x43d005df8a0902de", "0x40568cc5974e65bf"],
    "XMM14": ["0x43d0102100a7c5ac", "0x404a03c74fb549f9"],
    "XMM15": ["0x43d0134c5657fb6a", "0x402459c23b7952d2"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvtsi2sd xmm0, dword [rdx + 16 * 0]
cvtsi2sd xmm1, dword [rdx + 16 * 1]
cvtsi2sd xmm2, dword [rdx + 16 * 2]
cvtsi2sd xmm3, dword [rdx + 16 * 3]
cvtsi2sd xmm4, dword [rdx + 16 * 4]
cvtsi2sd xmm5, dword [rdx + 16 * 5]
cvtsi2sd xmm6, dword [rdx + 16 * 6]
cvtsi2sd xmm7, dword [rdx + 16 * 7]
cvtsi2sd xmm8,  qword [rdx + 16 * 8]
cvtsi2sd xmm9,  qword [rdx + 16 * 9]
cvtsi2sd xmm10, qword [rdx + 16 * 10]
cvtsi2sd xmm11, qword [rdx + 16 * 11]
cvtsi2sd xmm12, qword [rdx + 16 * 12]
cvtsi2sd xmm13, qword [rdx + 16 * 13]
cvtsi2sd xmm14, qword [rdx + 16 * 14]
cvtsi2sd xmm15, qword [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303
.data2:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REPNE/F2_2B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x0"]
  },
  "HostFeatures": ["SSE4A"]
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movntsd [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_2C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4008000000000000
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4010000000000000
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]

cvttsd2si eax, xmm0
cvttsd2si rbx, xmm1

cvttsd2si ecx, [rdx + 8 * 4]
cvttsd2si rdx, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_2D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x2",
    "RCX": "0x3",
    "RDX": "0x4",
    "R9": "0x8000000000000000",
    "R10": "0x8000000000000000",
    "R11": "0x8000000000000000",
    "R12": "0x8000000000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4000000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4008000000000000
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4010000000000000
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x7ff0000000000000
mov [rdx + 8 * 8], rax
mov rax, 0xfff0000000000000
mov [rdx + 8 * 9], rax
mov rax, 0x7ff8000000000000
mov [rdx + 8 * 10], rax
mov rax, 0x7fefffffffffffff
mov [rdx + 8 * 11], rax


movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]

cvtsd2si eax, xmm0
cvtsd2si rbx, xmm1

cvtsd2si ecx, [rdx + 8 * 4]
cvtsd2si r9, [rdx + 8 * 8]
cvtsd2si r10, [rdx + 8 * 9]
cvtsd2si r11, [rdx + 8 * 10]
cvtsd2si r12, [rdx + 8 * 11]
cvtsd2si rdx, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_2D_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000053",
    "RBX": "0x0000000000000029",
    "RCX": "0x0000000000000005",
    "RDX": "0x0000000000000009",
    "RSI": "0x000000000000001d",
    "RSP": "0x0000000000000028",
    "RBP": "0x0000000000000020",
    "R8": "0x000000000000005a",
    "R9": "0x0000000000000062",
    "R10": "0x000000000000005a",
    "R11": "0x0000000000000040",
    "R12": "0x0000000000000023",
    "R13": "0x0000000000000005",
    "R14": "0x0000000000000021",
    "R15": "0x000000000000003a"
  }
}
%endif

lea r15, [rel .data]

movapd xmm0, [r15 + 16 * 0]
movapd xmm1, [r15 + 16 * 1]
movapd xmm2, [r15 + 16 * 2]
movapd xmm3, [r15 + 16 * 3]
movapd xmm4, [r15 + 16 * 4]
movapd xmm5, [r15 + 16 * 5]
movapd xmm6, [r15 + 16 * 6]
movapd xmm7, [r15 + 16 * 7]
movapd xmm8, [r15 + 16 * 8]
movapd xmm9, [r15 + 16 * 9]
movapd xmm10, [r15 + 16 * 10]
movapd xmm11, [r15 + 16 * 11]
movapd xmm12, [r15 + 16 * 12]
movapd xmm13, [r15 + 16 * 13]
movapd xmm14, [r15 + 16 * 14]
movapd xmm15, [r15 + 16 * 15]

cvttsd2si eax, xmm0
cvttsd2si ebx, xmm1
cvttsd2si ecx, xmm2
cvttsd2si edx, xmm3
cvttsd2si esi, xmm4
cvttsd2si edi, xmm5
cvttsd2si esp, xmm6
cvttsd2si ebp, xmm7
cvttsd2si r8, xmm8
cvttsd2si r9, xmm9
cvttsd2si r10, xmm10
cvttsd2si r11, xmm11
cvttsd2si r12, xmm12
cvttsd2si r13, xmm13
cvttsd2si r14, xmm14
cvttsd2si r15, xmm15

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REPNE/F2_51.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3ff0000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4000000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4008000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM4":  ["0x3ff0000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4000000000000000", "0x5152535455565758"],
    "XMM6":  ["0x4008000000000000", "0x5152535455565758"],
    "XMM7":  ["0x4010000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

sqrtsd xmm0, xmm0
sqrtsd xmm1, xmm1
sqrtsd xmm2, xmm2
sqrtsd xmm3, xmm3

sqrtsd xmm4, [rdx + 8 * 0]
sqrtsd xmm5, [rdx + 8 * 2]
sqrtsd xmm6, [rdx + 8 * 4]
sqrtsd xmm7, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_58.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4014000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4039000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x403a000000000000", "0x5152535455565758"],
    "XMM5":  ["0x403d000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

addsd xmm0, xmm1
addsd xmm2, xmm3

addsd xmm4, [rdx + 8 * 0]
addsd xmm5, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_59.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4062000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x4039000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4059000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

mulsd xmm0, xmm1 ; 1.0 <op> 4.0
mulsd xmm2, xmm3 ; 9.0 <op> 16.0

mulsd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
mulsd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_5A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3FF0000040800000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4022000041800000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x403900003f800000", "0x5152535455565758"],
    "XMM5":  ["0x4039000040800000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

cvtsd2ss xmm0, xmm1 ; 1.0 <op> 4.0
cvtsd2ss xmm2, xmm3 ; 9.0 <op> 16.0

cvtsd2ss xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
cvtsd2ss xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_5A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4054c66442a63326", "0x40516053e2d6238e"],
    "XMM1": ["0x4044836d42241b6c", "0x402a1e1c58255b03"],
    "XMM2": ["0x401568e040ab4706", "0x4035fe425aee6320"],
    "XMM3": ["0x40235900411ac802", "0x40154b7d41743e96"],
    "XMM4": ["0x403d075a41e83ad2", "0x4050a018bd66277c"],
    "XMM5": ["0x40334ec1419a760c", "0x4056d7404ea4a8c1"],
    "XMM6": ["0x404439b54221cdae", "0x40497b136a400fbb"],
    "XMM7": ["0x4040528b4202945e", "0x4037f9ca18bd6627"],
    "XMM8": ["0x4056a92942b5494c", "0x403839b866e43aa8"],
    "XMM9": ["0x4058bc1f42c5e0f9", "0x4056cde5c91d14e4"],
    "XMM10": ["0x4056b34a42b59a55", "0x4058defb00bcbe62"],
    "XMM11": ["0x40503e3c4281f1e5", "0x4052997f0ed3d85a"],
    "XMM12": ["0x40419d22420ce913", "0x40395a6bf8769ec3"],
    "XMM13": ["0x40177e2840bbf141", "0x40568cc5974e65bf"],
    "XMM14": ["0x4040840242042015", "0x404a03c74fb549f9"],
    "XMM15": ["0x404d315942698acb", "0x402459c23b7952d2"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rdx + 16 * 0]
movapd xmm1, [rdx + 16 * 1]
movapd xmm2, [rdx + 16 * 2]
movapd xmm3, [rdx + 16 * 3]
movapd xmm4, [rdx + 16 * 4]
movapd xmm5, [rdx + 16 * 5]
movapd xmm6, [rdx + 16 * 6]
movapd xmm7, [rdx + 16 * 7]
movapd xmm8, [rdx + 16 * 8]
movapd xmm9, [rdx + 16 * 9]
movapd xmm10, [rdx + 16 * 10]
movapd xmm11, [rdx + 16 * 11]
movapd xmm12, [rdx + 16 * 12]
movapd xmm13, [rdx + 16 * 13]
movapd xmm14, [rdx + 16 * 14]
movapd xmm15, [rdx + 16 * 15]


cvtsd2ss xmm0, [rdx + 16 * 0]
cvtsd2ss xmm1, [rdx + 16 * 1]
cvtsd2ss xmm2, [rdx + 16 * 2]
cvtsd2ss xmm3, [rdx + 16 * 3]
cvtsd2ss xmm4, [rdx + 16 * 4]
cvtsd2ss xmm5, [rdx + 16 * 5]
cvtsd2ss xmm6, [rdx + 16 * 6]
cvtsd2ss xmm7, [rdx + 16 * 7]
cvtsd2ss xmm8, [rdx + 16 * 8]
cvtsd2ss xmm9, [rdx + 16 * 9]
cvtsd2ss xmm10, [rdx + 16 * 10]
cvtsd2ss xmm11, [rdx + 16 * 11]
cvtsd2ss xmm12, [rdx + 16 * 12]
cvtsd2ss xmm13, [rdx + 16 * 13]
cvtsd2ss xmm14, [rdx + 16 * 14]
cvtsd2ss xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/REPNE/F2_5C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc008000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0xc01c000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x4038000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4035000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

subsd xmm0, xmm1 ; 1.0 <op> 4.0
subsd xmm2, xmm3 ; 9.0 <op> 16.0

subsd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
subsd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_5D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4022000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x3FF0000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4010000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

minsd xmm0, xmm1 ; 1.0 <op> 4.0
minsd xmm2, xmm3 ; 9.0 <op> 16.0

minsd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
minsd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_5E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3fd0000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x3fe2000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x4039000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4019000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

divsd xmm0, xmm1 ; 1.0 <op> 4.0
divsd xmm2, xmm3 ; 9.0 <op> 16.0

divsd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
divsd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_5F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758"],
    "XMM2":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758"],
    "XMM4":  ["0x4039000000000000", "0x5152535455565758"],
    "XMM5":  ["0x4039000000000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3FF0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x4022000000000000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x4030000000000000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x4039000000000000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

maxsd xmm0, xmm1 ; 1.0 <op> 4.0
maxsd xmm2, xmm3 ; 9.0 <op> 16.0

maxsd xmm4, [rdx + 8 * 0] ; 25.0 <op> 1.0
maxsd xmm5, [rdx + 8 * 2] ; 25.0 <op> 4.0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_70.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x4748474847484748", "0x5152535455565758"],
    "XMM3": ["0x6162616261626162", "0x7172737475767778"],
    "XMM4": ["0x4546454647484748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]
pshuflw xmm2, xmm0, 0x0
pshuflw xmm3, xmm1, 0xFF

; Top bit different from low bits
pshuflw xmm4, [rdx], 80

hlt


================================================
FILE: unittests/ASM/REPNE/F2_7C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4140000040400000", "0x4340000042400000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3f80000040000000 ; 1.0, 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x4080000041000000 ; 4.0, 8.0
mov [rdx + 8 * 1], rax

mov rax, 0x4180000042000000 ; 16.0, 32.0
mov [rdx + 8 * 2], rax
mov rax, 0x4280000043000000 ; 64.0, 128.0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]

haddps xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/REPNE/F2_7D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc0800000bf800000", "0xc2800000c1800000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2.0, 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4100000040800000 ; 8.0, 4.0
mov [rdx + 8 * 1], rax

mov rax, 0x4200000041800000 ; 32.0, 16.0
mov [rdx + 8 * 2], rax
mov rax, 0x4300000042800000 ; 128.0, 64.0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 8 * 2]

hsubps xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/REPNE/F2_C2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000"],
    "XMM1": ["0x0", "0x4000000000000000"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0x7FF8000000000000"],
    "XMM4": ["0x0", "0x4000000000000000"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000"],
    "XMM6": ["0x0", "0x4000000000000000"],
    "XMM7": ["0x0000000000000000", "0x7FF8000000000000"],
    "XMM8": ["0x0000000000000000", "0x0000000000000000"],
    "XMM9": ["0xFFFFFFFFFFFFFFFF", "0x7FF8000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x4008000000000000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

cmpsd xmm0, xmm8, 0x00 ; EQ
cmpsd xmm1, xmm8, 0x01 ; LT
cmpsd xmm2, xmm8, 0x02 ; LTE
cmpsd xmm4, xmm8, 0x04 ; NEQ
cmpsd xmm5, xmm8, 0x05 ; NLT
cmpsd xmm6, xmm8, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FF8000000000000
mov [rdx + 8 * 1], rax

mov rax, 0x7FF8000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x0000000000000000
mov [rdx + 8 * 3], rax

movapd xmm3, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

; Unordered will return true when either input is nan
; [0.0, nan] unord [nan, 0.0] = [1, 1]
cmpsd xmm3, xmm8, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [nan, 0.0] = [0, 0]
cmpsd xmm7, xmm8, 0x07 ; Ordered

mov rax, 0x7FF8000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x0000000000000000
mov [rdx + 8 * 1], rax

movapd xmm8, [rdx + 8 * 0]
movapd xmm9, [rdx + 8 * 0]

; Ordered will return true when both inputs are NOT nan
; [nan, 0.0] ord [nan, 0.0] = [0, 1]
cmpsd xmm8, xmm9, 0x07 ; Ordered

mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FF8000000000000
mov [rdx + 8 * 1], rax

movapd xmm9, [rdx + 8 * 0]
movapd xmm10, [rdx + 8 * 0]

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [0.0, nan] = [1, 0]
cmpsd xmm9, xmm10, 0x07 ; Ordered

hlt


================================================
FILE: unittests/ASM/REPNE/F2_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x41200000c0000000", "0x41200000c0c00000"],
    "XMM1": ["0x41200000c0000000", "0x41200000c0c00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 0], rax
mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
addsubps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
addsubps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/REPNE/F2_D6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x4142434445464748",
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
movq mm0, [rdx + 8 * 2]

movdq2q mm0, xmm0

hlt


================================================
FILE: unittests/ASM/REPNE/F2_E6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0000000200000001", "0x0"],
    "XMM1":  ["0xFFFFFFFEFFFFFFFF", "0x0"],
    "XMM2":  ["0x8000000080000000", "0x0"],
    "XMM3":  ["0x8000000080000000", "0x0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000
mov [rdx + 8 * 1], rax

mov rax, 0xbff0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0xc000000000000000
mov [rdx + 8 * 3], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x7ff0000000000000
mov [rdx + 8 * 6], rax
mov rax, 0xfff0000000000000
mov [rdx + 8 * 7], rax
 
mov rax, 0x7ff8000000000000
mov [rdx + 8 * 8], rax
mov rax, 0x7fefffffffffffff
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 4]
movapd xmm1, [rdx + 8 * 4]

movapd xmm2, [rdx + 8 * 0]

cvtpd2dq xmm0, xmm2
cvtpd2dq xmm1, [rdx + 8 * 2]
cvtpd2dq xmm2, [rdx + 8 * 6]
cvtpd2dq xmm3, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/REPNE/F2_E6_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000004600000053", "0x0000000000000000"],
    "XMM1": ["0x0000000d00000029", "0x0000000000000000"],
    "XMM2": ["0x0000001600000005", "0x0000000000000000"],
    "XMM3": ["0x000000050000000a", "0x0000000000000000"],
    "XMM4": ["0x000000430000001d", "0x0000000000000000"],
    "XMM5": ["0x0000005b00000013", "0x0000000000000000"],
    "XMM6": ["0x0000003300000028", "0x0000000000000000"],
    "XMM7": ["0x0000001800000021", "0x0000000000000000"],
    "XMM8": ["0x000000180000005b", "0x0000000000000000"],
    "XMM9": ["0x0000005b00000063", "0x0000000000000000"],
    "XMM10": ["0x000000630000005b", "0x0000000000000000"],
    "XMM11": ["0x0000004a00000041", "0x0000000000000000"],
    "XMM12": ["0x0000001900000023", "0x0000000000000000"],
    "XMM13": ["0x0000005a00000006", "0x0000000000000000"],
    "XMM14": ["0x0000003400000021", "0x0000000000000000"],
    "XMM15": ["0x0000000a0000003a", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
movapd xmm0, [rel .random_data + 16 * 0]
movapd xmm1, [rel .random_data + 16 * 1]
movapd xmm2, [rel .random_data + 16 * 2]
movapd xmm3, [rel .random_data + 16 * 3]
movapd xmm4, [rel .random_data + 16 * 4]
movapd xmm5, [rel .random_data + 16 * 5]
movapd xmm6, [rel .random_data + 16 * 6]
movapd xmm7, [rel .random_data + 16 * 7]
movapd xmm8, [rel .random_data + 16 * 8]
movapd xmm9, [rel .random_data + 16 * 9]
movapd xmm10, [rel .random_data + 16 * 10]
movapd xmm11, [rel .random_data + 16 * 11]
movapd xmm12, [rel .random_data + 16 * 12]
movapd xmm13, [rel .random_data + 16 * 13]
movapd xmm14, [rel .random_data + 16 * 14]
movapd xmm15, [rel .random_data + 16 * 15]

cvtpd2dq xmm0, [rdx + 16 * 0]
cvtpd2dq xmm1, [rdx + 16 * 1]
cvtpd2dq xmm2, [rdx + 16 * 2]
cvtpd2dq xmm3, [rdx + 16 * 3]
cvtpd2dq xmm4, [rdx + 16 * 4]
cvtpd2dq xmm5, [rdx + 16 * 5]
cvtpd2dq xmm6, [rdx + 16 * 6]
cvtpd2dq xmm7, [rdx + 16 * 7]
cvtpd2dq xmm8, [rdx + 16 * 8]
cvtpd2dq xmm9, [rdx + 16 * 9]
cvtpd2dq xmm10, [rdx + 16 * 10]
cvtpd2dq xmm11, [rdx + 16 * 11]
cvtpd2dq xmm12, [rdx + 16 * 12]
cvtpd2dq xmm13, [rdx + 16 * 13]
cvtpd2dq xmm14, [rdx + 16 * 14]
cvtpd2dq xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86


================================================
FILE: unittests/ASM/REPNE/F2_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x5152535455565758", "0x6162636465666768"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax

lddqu xmm0, [rdx + 8 * 0]
lddqu xmm1, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/SSE4a/extrq_imm.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x48510f254d2fa47f", "0", "0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM1": ["0", "0", "0xb615b9533de8ad09", "0xb76472a37404b890"],
    "XMM2": ["0x3615b9533de8ad09", "0", "0x24426b4c72f110ad", "0x8a6789f2d415a567"],
    "XMM3": ["0", "0", "0x8996f88178236612", "0x19a26b823d3ca2a9"],
    "XMM4": ["0x00000001132df102", "0", "0x00f658ab689712b0", "0xc97d9d031ed21972"],
    "XMM5": ["0x0000000001ecb156", "0", "0x1c86432298df55c8", "0xb29bfeda891be9cc"],
    "XMM6": ["0x00000000432298df", "0", "0x88b0bd28710f2147", "0xc4e95e887fb5ac38"],
    "XMM7": ["0x0000b0bd28710f21", "0", "0xa7df8c2ad03e5be4", "0x6c70d1eec2d395ea"]
  }
}
%endif

; Random data
vmovups ymm0, [rel .random_data + (0 * 16)]
vmovups ymm1, [rel .random_data + (1 * 16)]
vmovups ymm2, [rel .random_data + (2 * 16)]
vmovups ymm3, [rel .random_data + (3 * 16)]
vmovups ymm4, [rel .random_data + (4 * 16)]
vmovups ymm5, [rel .random_data + (5 * 16)]
vmovups ymm6, [rel .random_data + (6 * 16)]
vmovups ymm7, [rel .random_data + (7 * 16)]

; imm extrq
; The upper 64-bits of the xmm are "undefined." Zen will zero.
; Additionally if width and length is > 64 then the results are undefined.
; - Behaviour is actually shift then mask so FEX matches "undefined" behaviour here.
extrq xmm0, 0, 0
extrq xmm1, 0, 63
extrq xmm2, 63, 0
extrq xmm3, 63, 63
extrq xmm4, 0, 31
extrq xmm5, 31, 31
extrq xmm6, 31, 16
extrq xmm7, 48, 8

hlt

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/SSE4a/extrq_variable.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x48510f254d2fa47f", "0", "0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM1": ["0", "0", "0xb615b9533de8ad09", "0xb76472a37404b890"],
    "XMM2": ["0x3615b9533de8ad09", "0", "0x24426b4c72f110ad", "0x8a6789f2d415a567"],
    "XMM3": ["0", "0", "0x8996f88178236612", "0x19a26b823d3ca2a9"],
    "XMM4": ["0x00000001132df102", "0", "0x00f658ab689712b0", "0xc97d9d031ed21972"],
    "XMM5": ["0x0000000001ecb156", "0", "0x1c86432298df55c8", "0xb29bfeda891be9cc"],
    "XMM6": ["0x00000000432298df", "0", "0x88b0bd28710f2147", "0xc4e95e887fb5ac38"],
    "XMM7": ["0x0000b0bd28710f21", "0", "0xa7df8c2ad03e5be4", "0x6c70d1eec2d395ea"]
  }
}
%endif

; Random data
vmovups ymm0, [rel .random_data + (0 * 16)]
vmovups ymm1, [rel .random_data + (1 * 16)]
vmovups ymm2, [rel .random_data + (2 * 16)]
vmovups ymm3, [rel .random_data + (3 * 16)]
vmovups ymm4, [rel .random_data + (4 * 16)]
vmovups ymm5, [rel .random_data + (5 * 16)]
vmovups ymm6, [rel .random_data + (6 * 16)]
vmovups ymm7, [rel .random_data + (7 * 16)]

; Load selections
vmovups ymm8, [rel .data_selection + (0 * 16)]
vmovups ymm9, [rel .data_selection + (1 * 16)]
vmovups ymm10, [rel .data_selection + (2 * 16)]
vmovups ymm11, [rel .data_selection + (3 * 16)]
vmovups ymm12, [rel .data_selection + (4 * 16)]
vmovups ymm13, [rel .data_selection + (5 * 16)]
vmovups ymm14, [rel .data_selection + (6 * 16)]
vmovups ymm15, [rel .data_selection + (7 * 16)]

; variable extrq
; The upper 64-bits of the xmm are "undefined." Zen will zero.
; Additionally if width and length is > 64 then the results are undefined.
; - Behaviour is actually shift then mask so FEX matches "undefined" behaviour here.
extrq xmm0, xmm8
extrq xmm1, xmm9
extrq xmm2, xmm10
extrq xmm3, xmm11
extrq xmm4, xmm12
extrq xmm5, xmm13
extrq xmm6, xmm14
extrq xmm7, xmm15

hlt

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09

%macro d 2
dq (%1 | (%2 << 8)), 0
%endmacro
align 16
.data_selection:
; BitMask, Shift
d 0, 0
d 0, 63
d 63, 0
d 63, 63
d 0, 31
d 31, 31
d 31, 16
d 48, 8


================================================
FILE: unittests/ASM/SSE4a/insertq_imm.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xa7df8c2ad03e5be4", "0", "0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM1": ["0x30b556de1f6de86b", "0", "0xb615b9533de8ad09", "0xb76472a37404b890"],
    "XMM2": ["0xc1029b7f255b4cf4", "0", "0x24426b4c72f110ad", "0x8a6789f2d415a567"],
    "XMM3": ["0x24426b4c72f110ad", "0", "0x8996f88178236612", "0x19a26b823d3ca2a9"],
    "XMM4": ["0xd47dbb9e78236612", "0", "0x00f658ab689712b0", "0xc97d9d031ed21972"],
    "XMM5": ["0x017003bfe89712b0", "0", "0x1c86432298df55c8", "0xb29bfeda891be9cc"],
    "XMM6": ["0x1c8678f6900455c8", "0", "0x88b0bd28710f2147", "0xc4e95e887fb5ac38"],
    "XMM7": ["0x881fa17837c17f47", "0", "0xa7df8c2ad03e5be4", "0x6c70d1eec2d395ea"]
  }
}
%endif

; Random data
vmovups ymm0, [rel .random_data + (0 * 16)]
vmovups ymm1, [rel .random_data + (1 * 16)]
vmovups ymm2, [rel .random_data + (2 * 16)]
vmovups ymm3, [rel .random_data + (3 * 16)]
vmovups ymm4, [rel .random_data + (4 * 16)]
vmovups ymm5, [rel .random_data + (5 * 16)]
vmovups ymm6, [rel .random_data + (6 * 16)]
vmovups ymm7, [rel .random_data + (7 * 16)]
vmovups ymm8, [rel .random_data + (8 * 16)]
vmovups ymm9, [rel .random_data + (9 * 16)]
vmovups ymm10, [rel .random_data + (10 * 16)]
vmovups ymm11, [rel .random_data + (11 * 16)]
vmovups ymm12, [rel .random_data + (12 * 16)]
vmovups ymm13, [rel .random_data + (13 * 16)]
vmovups ymm14, [rel .random_data + (14 * 16)]
vmovups ymm15, [rel .random_data + (15 * 16)]

; imm insertq
; The upper 64-bits of the xmm are "undefined." Zen will zero.
; Additionally if width and length is > 64 then the results are undefined.
; - Behaviour is actually shift then mask so FEX matches "undefined" behaviour here.
insertq xmm0, xmm8, 0, 0
insertq xmm1, xmm9, 0, 63
insertq xmm2, xmm10, 63, 0
insertq xmm3, xmm11, 63, 63
insertq xmm4, xmm12, 0, 31
insertq xmm5, xmm13, 31, 31
insertq xmm6, xmm14, 31, 16
insertq xmm7, xmm15, 48, 8

hlt

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/SSE4a/insertq_variable.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0", "0x30b556de1f6de86b", "0x67d29af330ae762c"],
    "XMM1": ["0x30b556de1f6de86b", "0", "0xb615b9533de8ad09", "0xb76472a37404b890"],
    "XMM2": ["0xe162636465666768", "0", "0x24426b4c72f110ad", "0x8a6789f2d415a567"],
    "XMM3": ["0x24426b4c72f110ad", "0", "0x8996f88178236612", "0x19a26b823d3ca2a9"],
    "XMM4": ["0x42c343c478236612", "0", "0x00f658ab689712b0", "0xc97d9d031ed21972"],
    "XMM5": ["0x0acb4bcc689712b0", "0", "0x1c86432298df55c8", "0xb29bfeda891be9cc"],
    "XMM6": ["0x1c8625a6a7a855c8", "0", "0x88b0bd28710f2147", "0xc4e95e887fb5ac38"],
    "XMM7": ["0x88b3b4b5b6b7b847", "0", "0xa7df8c2ad03e5be4", "0x6c70d1eec2d395ea"]
  }
}
%endif

; Random data
vmovups ymm0, [rel .random_data + (0 * 16)]
vmovups ymm1, [rel .random_data + (1 * 16)]
vmovups ymm2, [rel .random_data + (2 * 16)]
vmovups ymm3, [rel .random_data + (3 * 16)]
vmovups ymm4, [rel .random_data + (4 * 16)]
vmovups ymm5, [rel .random_data + (5 * 16)]
vmovups ymm6, [rel .random_data + (6 * 16)]
vmovups ymm7, [rel .random_data + (7 * 16)]

; Load selections
vmovups ymm8, [rel .data_selection + (0 * 16)]
vmovups ymm9, [rel .data_selection + (1 * 16)]
vmovups ymm10, [rel .data_selection + (2 * 16)]
vmovups ymm11, [rel .data_selection + (3 * 16)]
vmovups ymm12, [rel .data_selection + (4 * 16)]
vmovups ymm13, [rel .data_selection + (5 * 16)]
vmovups ymm14, [rel .data_selection + (6 * 16)]
vmovups ymm15, [rel .data_selection + (7 * 16)]


; variable insertq
; The upper 64-bits of the xmm are "undefined." Zen will zero.
; Additionally if width and length is > 64 then the results are undefined.
; - Behaviour is actually shift then mask so FEX matches "undefined" behaviour here.
insertq xmm0, xmm8
insertq xmm1, xmm9
insertq xmm2, xmm10
insertq xmm3, xmm11
insertq xmm4, xmm12
insertq xmm5, xmm13
insertq xmm6, xmm14
insertq xmm7, xmm15

hlt

align 16
; 256bytes of random data
.random_data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09

%macro d 3
dq %3, (%1 | (%2 << 8))
%endmacro
align 16
.data_selection:
; BitMask, Shift, Data
d 0, 0, 0x4142434445464748
d 0, 63, 0x5152535455565758
d 63, 0, 0x6162636465666768
d 63, 63, 0x7172737475767778
d 0, 31, 0x8182838485868788
d 31, 31, 0x9192939495969798
d 31, 16, 0xA1A2A3A4A5A6A7A8
d 48, 8, 0xB1B2B3B4B5B6B7B8


================================================
FILE: unittests/ASM/STOS.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RDI": "0xE8000020",
    "R11": "0xDAD10"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Data we want to store
mov rax, 0xDEADBEEFBAD0DAD1

; Starting address to store to
mov rdi, 0xe8000000

; How many elements we want to store
mov rcx, 0x10

; Direction to increment (Increment when cleared)
cld

; Store bytes
rep stosw

mov r11, 0
mov r10, 0xe8000000

movzx r12, word [r10 + 0]
add r11, r12
movzx r12, word [r10 + 2]
add r11, r12
movzx r12, word [r10 + 4]
add r11, r12
movzx r12, word [r10 + 6]
add r11, r12
movzx r12, word [r10 + 8]
add r11, r12
movzx r12, word [r10 + 10]
add r11, r12
movzx r12, word [r10 + 12]
add r11, r12
movzx r12, word [r10 + 14]
add r11, r12
movzx r12, word [r10 + 16]
add r11, r12
movzx r12, word [r10 + 18]
add r11, r12
movzx r12, word [r10 + 20]
add r11, r12
movzx r12, word [r10 + 22]
add r11, r12
movzx r12, word [r10 + 24]
add r11, r12
movzx r12, word [r10 + 26]
add r11, r12
movzx r12, word [r10 + 28]
add r11, r12
movzx r12, word [r10 + 30]
add r11, r12

hlt


================================================
FILE: unittests/ASM/STOSQ.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0",
    "RCX": "0",
    "RDI": "0xE8000100",
    "R11": "0"
  },

  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Starting address to store to
mov rdi, 0xe8000000

; How many elements we want to store
; Additional just in case STOS continues past for some reason
mov rcx, 0x100

; Data we want to store
mov rax, 0xDEADBEEFBAD0DAD1

; Direction to increment (Increment when cleared)
cld

; First fill the area with garbage without using STOS
mov rdx, 0
loop_header:
  mov [rdi + rdx * 8], rax
  add rdx, 1
  cmp rdx, rcx
  jne loop_header

; Now use STOS to fill the data with zero

mov rax, 0x0
mov rcx, 0x20
rep stosq

; Now read the data back and ensure it is zero

mov r14, 0xe8000000
mov r13, 0x20
mov r12, 0
mov r11, 0
loop_header2:
  add r11, [r14 + r12 * 8]
  add r12, 1
  cmp r12, r13
  jne loop_header2

hlt


================================================
FILE: unittests/ASM/STOSQ2.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0xDEADBEEFBAD0DAD1"
  },

  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Starting address to store to
mov rdi, 0xe8000000
; Store value
mov rax, 0xDEADBEEFBAD0DAD1
mov [rdi], rax

; Set counter to zero
mov ecx, 0
; Set store value to zero
mov rax, 0

rep STOSQ

; Reload what we just stored
; Ensure that STOSQ didn't write
mov rdi, 0xe8000000
mov rax, [rdi]

hlt


================================================
FILE: unittests/ASM/STOSQ2_REPNE.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0xDEADBEEFBAD0DAD1"
  },

  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Starting address to store to
mov rdi, 0xe8000000
; Store value
mov rax, 0xDEADBEEFBAD0DAD1
mov [rdi], rax

; Set counter to zero
mov ecx, 0
; Set store value to zero
mov rax, 0

repne STOSQ

; Reload what we just stored
; Ensure that STOSQ didn't write
mov rdi, 0xe8000000
mov rax, [rdi]

hlt


================================================
FILE: unittests/ASM/STOSQ_REPNE.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0",
    "RCX": "0",
    "RDI": "0xE8000100",
    "R11": "0"
  },

  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Starting address to store to
mov rdi, 0xe8000000

; How many elements we want to store
; Additional just in case STOS continues past for some reason
mov rcx, 0x100

; Data we want to store
mov rax, 0xDEADBEEFBAD0DAD1

; Direction to increment (Increment when cleared)
cld

; First fill the area with garbage without using STOS
mov rdx, 0
loop_header:
  mov [rdi + rdx * 8], rax
  add rdx, 1
  cmp rdx, rcx
  jne loop_header

; Now use STOS to fill the data with zero

mov rax, 0x0
mov rcx, 0x20
repne stosq

; Now read the data back and ensure it is zero

mov r14, 0xe8000000
mov r13, 0x20
mov r12, 0
mov r11, 0
loop_header2:
  add r11, [r14 + r12 * 8]
  add r12, 1
  cmp r12, r13
  jne loop_header2

hlt


================================================
FILE: unittests/ASM/STOS_REPNE.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RDI": "0xE8000020",
    "R11": "0xDAD10"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Data we want to store
mov rax, 0xDEADBEEFBAD0DAD1

; Starting address to store to
mov rdi, 0xe8000000

; How many elements we want to store
mov rcx, 0x10

; Direction to increment (Increment when cleared)
cld

; Store bytes
repne stosw

mov r11, 0
mov r10, 0xe8000000

movzx r12, word [r10 + 0]
add r11, r12
movzx r12, word [r10 + 2]
add r11, r12
movzx r12, word [r10 + 4]
add r11, r12
movzx r12, word [r10 + 6]
add r11, r12
movzx r12, word [r10 + 8]
add r11, r12
movzx r12, word [r10 + 10]
add r11, r12
movzx r12, word [r10 + 12]
add r11, r12
movzx r12, word [r10 + 14]
add r11, r12
movzx r12, word [r10 + 16]
add r11, r12
movzx r12, word [r10 + 18]
add r11, r12
movzx r12, word [r10 + 20]
add r11, r12
movzx r12, word [r10 + 22]
add r11, r12
movzx r12, word [r10 + 24]
add r11, r12
movzx r12, word [r10 + 26]
add r11, r12
movzx r12, word [r10 + 28]
add r11, r12
movzx r12, word [r10 + 30]
add r11, r12

hlt


================================================
FILE: unittests/ASM/Secondary/07_XX_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "0xFFFFFFFFFFFE0000"
  }
}
%endif

sgdt [rel data]

movzx rax, word [rel data]
mov rbx, qword [rel data + 2]
hlt

align 4096
data:
; Limit
dw 0
; Base
dq 0


================================================
FILE: unittests/ASM/Secondary/07_XX_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000080050033",
    "RBX": "0x4142434480050033",
    "RCX": "0x4142434445460033",
    "RDX": "0x4142434445460033",
    "RDI": "0x0000000080050033",
    "RSP": "0x0000000080050033",
    "RBP": "0x0000000080050033",
    "R8":  "0x4142434445460033",
    "R9":  "0x4142434445460033",
    "R10": "0x4142434445460033"
  }
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748
mov rdx, 0x4142434445464748
mov rsi, 0xe000_0000
mov [rsi], rdx

mov rdi, 0x4142434445464748
mov rsp, 0x4142434445464748
mov rbp, 0x4142434445464748
mov r8, 0x4142434445464748
mov r9, 0x4142434445464748
mov r10, 0x4142434445464748

smsw rax
smsw ebx
smsw cx

smsw [rsi]
mov rdx, [rsi]

; operand-size override prefix
; Nasm complains if o16 is used
; `warning: invalid operand size prefix o16, must be o64`
db 0x66
smsw rdi
repe smsw rsp
repne smsw rbp

db 0x66
smsw r8w
repe smsw r9w
repne smsw r10w

hlt


================================================
FILE: unittests/ASM/Secondary/08_66_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

bt word [rdx], 1
cfmerge

mov r13, 32
bt dword [rdx], r13d
cfmerge

; Ensures correct modulo on value
bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_66_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]

bt r12d, r13d
cfmerge

mov r12, qword [rdx]
bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_F2_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt word [rdx], 1
cfmerge

mov r13, 32
db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt dword [rdx], r13d
cfmerge

db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_F2_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]
db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]
db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt r12d, r13d
cfmerge

mov r12, qword [rdx]
db 0xF2 ; Prefix with F2. Shouldn't change behaviour
bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_F2_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax


hlt


================================================
FILE: unittests/ASM/Secondary/08_F3_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt word [rdx], 1
cfmerge

mov r13, 32
db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt dword [rdx], r13d
cfmerge

db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_F3_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]
db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]
db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt r12d, r13d
cfmerge

mov r12, qword [rdx]
db 0xF3 ; Prefix with F3. Shouldn't change behaviour
bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

bt word [rdx], 1
cfmerge

mov r13, 32
bt dword [rdx], r13d
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]
bt r12d, r13d
cfmerge

mov r12, qword [rdx]
bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_04_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
bts word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

bts qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_05_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x35"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results
movzx r12, word [rdx]

; Test and set
bts r12w, 1
cfmerge

; Ensure it is set
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]

bts r12d, r13d
cfmerge

bt r12d, r13d
cfmerge

mov r12, qword [rdx]
bts r12, 64 * 3
cfmerge

bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_05_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

bts qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_05_3_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

lock bts qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_05_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
lock bts word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
lock bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

lock bts qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
btr word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

btr qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_06_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x20"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]

; Test and set
btr r12w, 1
cfmerge

; Ensure it is set
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]

btr r12d, r13d
cfmerge

bt r12d, r13d
cfmerge

mov r12, qword [rdx]
btr r12, 64 * 3
cfmerge

bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_06_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x2"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

btr qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_06_3_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x2"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

lock btr qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_06_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
lock btr word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
lock btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

lock btr qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
btc word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

btc qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x25"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000002
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

movzx r12, word [rdx]

; Test and set
btc r12w, 1
cfmerge

; Ensure it is set
bt r12w, 1
cfmerge

mov r13, 32
mov r12d, dword [rdx]

btc r12d, r13d
cfmerge

bt r12d, r13d
cfmerge

mov r12, qword [rdx]
btc r12, 64 * 3
cfmerge

bt r12, 64 * 3
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_07_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x2"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

btc qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_07_3_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x2"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0x0000000100000000
mov [rdx + 8 * 0], rax

xor r15, r15 ; Will contain our results

lock btc qword [rdx], 32
cfmerge

bt qword [rdx], 32
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/08_XX_07_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
lock btc word [rdx], 1
cfmerge

; Ensure it is set
bt word [rdx], 1
cfmerge

mov r13, 32
lock btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

lock btc qword [rdx], 64 * 2 + 63
cfmerge

bt qword [rdx], 64 * 2 + 63
cfmerge

hlt


================================================
FILE: unittests/ASM/Secondary/09_F3_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "HostFeatures": ["RDPID"]
}
%endif

mov rax, 0
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748
rdpid ebx

cmp rbx, rcx
setne al

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000000",
    "RDX": "0xFFFFFFFF",
    "RBX": "0x41424344",
    "RCX": "0x51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov eax, 0x80000000
mov edx, 0xFFFFFFFF

; Desired
mov ebx, 0x41424344
mov ecx, 0x51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; edx and eax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000000",
    "RDX": "0xFFFFFFFF",
    "RBX": "0x0000000080000000",
    "RCX": "0x00000000ffffffff",
    "R13": "0xffffffff80000000",
    "R14": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov eax, 0x41424344
mov edx, 0x51525354

; Desired
mov ebx, 0x80000000
mov ecx, 0xFFFFFFFF

; Memory is already Desired and NOT expected
; Finds bug in CAS on AArch64

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; edx and eax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000080000000",
    "RDX": "0x00000000ffffffff",
    "RBX": "0x4141414180000000",
    "RCX": "0x41414141ffffffff",
    "R13": "0xffffffff80000000",
    "R14": "0x0"
  }
}
%endif

; Within 16 byte region but unaligned
mov r15, 0xe0000007

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0xFFFFFFFF41424344
mov rdx, 0xFFFFFFFF51525354

; Desired
mov rbx, 0x4141414180000000
mov rcx, 0x41414141FFFFFFFF

; Memory is already Desired and NOT expected
; Finds bug in CAS on AArch64

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_12.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000080000000",
    "RDX": "0x00000000ffffffff",
    "RBX": "0x4141414180000000",
    "RCX": "0x41414141ffffffff",
    "R13": "0xffffffff80000000",
    "R14": "0x0"
  }
}
%endif

; Spans 16byte boundary and unaligned
mov r15, 0xe0000009

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0xFFFFFFFF41424344
mov rdx, 0xFFFFFFFF51525354

; Desired
mov rbx, 0x4141414180000000
mov rcx, 0x41414141FFFFFFFF

; Memory is already Desired and NOT expected
; Finds bug in CAS on AArch64

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_13.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000080000000",
    "RDX": "0x00000000ffffffff",
    "RBX": "0x4141414180000000",
    "RCX": "0x41414141ffffffff",
    "R13": "0xffffffff80000000",
    "R14": "0x0"
  }
}
%endif

; Spans 64byte boundary and unaligned
mov r15, 0xe000003F

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0xFFFFFFFF41424344
mov rdx, 0xFFFFFFFF51525354

; Desired
mov rbx, 0x4141414180000000
mov rcx, 0x41414141FFFFFFFF

; Memory is already Desired and NOT expected
; Finds bug in CAS on AArch64

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_14.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x5152535455565758",
    "RBX": "0x6162636465666768",
    "R15": "0x7172737475767778",
    "R12": "0x4142434445464748",
    "R13": "0x5152535455565758",
    "R14": "0x0"
  }
}
%endif

mov rcx, 0xe0000000

mov rax, 0x4142434445464748
mov [rcx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rcx + 8 * 1], rax

mov r14, 0
; Expected
mov rax, 0x41424344FFFFFFFF
mov rdx, 0x5152535455565758

; Desired
mov rbx, 0x6162636465666768
mov r15, 0x7172737475767778

; Prefix 66h, ensures it still operates at 16b
db 0x66
cmpxchg16b [rcx]

; Set r14 to 1 if if the memory location was expected
setz r14b

; rdx and rax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r12, [rcx + 8 * 0]
mov r13, [rcx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_15.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Spans 64byte boundary and unaligned
mov r15, 0xe000003F

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

; Prefix 66h, ensures it still operates at 8b
db 0x66
cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x5152535455565758",
    "RBX": "0x6162636465666768",
    "R15": "0x7172737475767778",
    "R12": "0x4142434445464748",
    "R13": "0x5152535455565758",
    "R14": "0x0"
  }
}
%endif

mov rcx, 0xe0000000

mov rax, 0x4142434445464748
mov [rcx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rcx + 8 * 1], rax

mov r14, 0
; Expected
mov rax, 0x41424344FFFFFFFF
mov rdx, 0x5152535455565758

; Desired
mov rbx, 0x6162636465666768
mov r15, 0x7172737475767778

; Prefix F2h, ensures it still operates at 16b
db 0xF2
cmpxchg16b [rcx]

; Set r14 to 1 if if the memory location was expected
setz r14b

; rdx and rax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r12, [rcx + 8 * 0]
mov r13, [rcx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_17.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x5152535455565758",
    "RBX": "0x6162636465666768",
    "R15": "0x7172737475767778",
    "R12": "0x4142434445464748",
    "R13": "0x5152535455565758",
    "R14": "0x0"
  }
}
%endif

mov rcx, 0xe0000000

mov rax, 0x4142434445464748
mov [rcx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rcx + 8 * 1], rax

mov r14, 0
; Expected
mov rax, 0x41424344FFFFFFFF
mov rdx, 0x5152535455565758

; Desired
mov rbx, 0x6162636465666768
mov r15, 0x7172737475767778

; Prefix F3h, ensures it still operates at 16b
db 0xF3
cmpxchg16b [rcx]

; Set r14 to 1 if if the memory location was expected
setz r14b

; rdx and rax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r12, [rcx + 8 * 0]
mov r13, [rcx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_18.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Spans 64byte boundary and unaligned
mov r15, 0xe000003F

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

; Prefix F2h, ensures it still operates at 8b
db 0xF2
cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_19.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Spans 64byte boundary and unaligned
mov r15, 0xe000003F

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

; Prefix F3h, ensures it still operates at 8b
db 0xF3
cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000000",
    "RDX": "0xFFFFFFFF",
    "RBX": "0x41424344",
    "RCX": "0x51525354",
    "R13": "0xFFFFFFFF80000000",
    "R14": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov eax, 0xFFFFFFFF
mov edx, 0xFFFFFFFF

; Desired
mov ebx, 0x41424344
mov ecx, 0x51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; edx and eax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x5152535455565758",
    "RBX": "0x6162636465666768",
    "RCX": "0x7172737475767778",
    "R12": "0x6162636465666768",
    "R13": "0x7172737475767778",
    "R14": "0x1"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

mov r14, 0
; Expected
mov rax, 0x4142434445464748
mov rdx, 0x5152535455565758

; Desired
mov rbx, 0x6162636465666768
mov rcx, 0x7172737475767778

cmpxchg16b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; rdx and rax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r12, [r15 + 8 * 0]
mov r13, [r15 + 8 * 1]

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RDX": "0x5152535455565758",
    "RBX": "0x6162636465666768",
    "RCX": "0x7172737475767778",
    "R12": "0x4142434445464748",
    "R13": "0x5152535455565758",
    "R14": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax

mov r14, 0
; Expected
mov rax, 0x41424344FFFFFFFF
mov rdx, 0x5152535455565758

; Desired
mov rbx, 0x6162636465666768
mov rcx, 0x7172737475767778

cmpxchg16b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; rdx and rax will now contain the memory's data

; Check memory location to ensure it contains what we want
mov r12, [r15 + 8 * 0]
mov r13, [r15 + 8 * 1]

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x80000000",
    "RDX": "0xFFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0xFFFFFFFF80000000",
    "R14": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x41414141FFFFFFFF
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; edx and eax will now contain the memory's data
; It will zext to the full 64bit of the register

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Within 16 byte region but unaligned
mov r15, 0xe0000007

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Spans 16byte boundary and unaligned
mov r15, 0xe0000009

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_01_9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4141414180000000",
    "RDX": "0x41414141FFFFFFFF",
    "RBX": "0xFFFFFFFF41424344",
    "RCX": "0xFFFFFFFF51525354",
    "R13": "0x5152535441424344",
    "R14": "0x1"
  }
}
%endif

; Spans 64byte boundary and unaligned
mov r15, 0xe000003F

mov rax, 0xFFFFFFFF80000000
mov [r15 + 8 * 0], rax

mov r14, 0
; Expected
mov rax, 0x4141414180000000
mov rdx, 0x41414141FFFFFFFF

; Desired
mov rbx, 0xFFFFFFFF41424344
mov rcx, 0xFFFFFFFF51525354

cmpxchg8b [r15]

; Set r14 to 1 if if the memory location was expected
setz r14b

; Memory will now be set to the register data
; EDX:EAX will be the original data

; Check memory location to ensure it contains what we want
mov r13, [r15 + 8 * 0]
hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445460000",
    "RBX": "0x0",
    "RDX": "1",
    "R9": "1",
    "R10": "1"
  },
  "HostFeatures": ["RAND"]
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748

; 16-bit should insert
test_16bit:
rdrand ax
jnc test_16bit

; Mask out RNG
mov r11, 0xFFFFFFFFFFFF0000
and rax, r11

mov r8, 0x4142434445460000
cmp rax, r8

mov rdx, 0
sete dl

; 32-bit and 64-bit should zext
test_32bit:
rdrand ebx
jnc test_32bit

; Mask out RNG
mov r11, 0xFFFFFFFF00000000
and rbx, r11

mov r8, 0x4142434400000000
cmp r11, r8

mov r9, 0
setne r9b

test_64bit:
rdrand rcx
jnc test_32bit

mov r8, 0x0
cmp rcx, r8

mov r10, 0
setne r10b

hlt


================================================
FILE: unittests/ASM/Secondary/09_XX_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445460000",
    "RBX": "0x0",
    "RDX": "1",
    "R9": "1",
    "R10": "1"
  },
  "HostFeatures": ["RAND"]
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x4142434445464748
mov rcx, 0x4142434445464748

; 16-bit should insert
test_16bit:
rdseed ax
jnc test_16bit

; Mask out RNG
mov r11, 0xFFFFFFFFFFFF0000
and rax, r11

mov r8, 0x4142434445460000
cmp rax, r8

mov rdx, 0
sete dl

; 32-bit and 64-bit should zext
test_32bit:
rdseed ebx
jnc test_32bit

; Mask out RNG
mov r11, 0xFFFFFFFF00000000
and rbx, r11

mov r8, 0x4142434400000000
cmp r11, r8

mov r9, 0
setne r9b

test_64bit:
rdseed rcx
jnc test_64bit

mov r8, 0x0
cmp rcx, r8

mov r10, 0
setne r10b

hlt


================================================
FILE: unittests/ASM/Secondary/12_66_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0041004300450047", "0x0051005300550057"],
    "XMM3": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

psrlw xmm0, 32
psrlw xmm1, 16
psrlw xmm2, 8
psrlw xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/12_66_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0041004300450047", "0x0051005300550057"],
    "XMM3": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8000800080008000
mov [rdx + 8 * 4], rax
mov rax, 0x7000700070007000
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]
movapd xmm4, [rdx + 32]

psraw xmm0, 32
psraw xmm1, 16
psraw xmm2, 8
psraw xmm3, 1
psraw xmm4, 16

hlt


================================================
FILE: unittests/ASM/Secondary/12_66_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x4200440046004800", "0x5200540056005800"],
    "XMM3": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

psllw xmm0, 32
psllw xmm1, 16
psllw xmm2, 8
psllw xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/13_66_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000616200006566", "0x0000717200007576"],
    "XMM2": ["0x0041424300454647", "0x0051525300555657"],
    "XMM3": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

psrld xmm0, 32
psrld xmm1, 16
psrld xmm2, 8
psrld xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/13_66_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000616200006566", "0x0000717200007576"],
    "XMM2": ["0x0041424300454647", "0x0051525300555657"],
    "XMM3": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8000800080008000
mov [rdx + 8 * 4], rax
mov rax, 0x7000700070007000
mov [rdx + 8 * 5], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]
movapd xmm4, [rdx + 32]

psrad xmm0, 32
psrad xmm1, 16
psrad xmm2, 8
psrad xmm3, 1
psrad xmm4, 32

hlt


================================================
FILE: unittests/ASM/Secondary/13_66_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x6364000067680000", "0x7374000077780000"],
    "XMM2": ["0x4243440046474800", "0x5253540056575800"],
    "XMM3": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

pslld xmm0, 32
pslld xmm1, 16
pslld xmm2, 8
pslld xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/14_66_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000041424344", "0x0000000051525354"],
    "XMM1": ["0x0000616263646566", "0x0000717273747576"],
    "XMM2": ["0x0041424344454647", "0x0051525354555657"],
    "XMM3": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

psrlq xmm0, 32
psrlq xmm1, 16
psrlq xmm2, 8
psrlq xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/14_66_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4546474800000000", "0x5556575800000000"],
    "XMM1": ["0x6364656667680000", "0x7374757677780000"],
    "XMM2": ["0x4243444546474800", "0x5253545556575800"],
    "XMM3": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

psllq xmm0, 32
psllq xmm1, 16
psllq xmm2, 8
psllq xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/14_66_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0x6162636465666768"],
    "XMM2": ["0x4546474800000000", "0x5556575841424344"],
    "XMM3": ["0x6263646566676800", "0x7273747576777861"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
movapd xmm1, [rdx + 16]
movapd xmm2, [rdx]
movapd xmm3, [rdx + 16]

pslldq xmm0, 16
pslldq xmm1, 8
pslldq xmm2, 4
pslldq xmm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/14_XX_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": ["0x0000000041424344"],
    "MM1": ["0x0000515253545556"],
    "MM2": ["0x0061626364656667"],
    "MM3": ["0x38B939BA3ABB3BBC"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
movq mm1, [rdx + 8]
movq mm2, [rdx + 16]
movq mm3, [rdx + 24]

psrlq mm0, 32
psrlq mm1, 16
psrlq mm2, 8
psrlq mm3, 1

hlt


================================================
FILE: unittests/ASM/Secondary/15_F3_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000434445464748",
    "RBX": "0x0000000045464748",
    "RCX": "0x0000434445464748"
  }
}
%endif

; Save FS
rdfsbase rax
mov [rel .data_backup], rax

mov rax, 0x0000434445464748
mov rbx, -1
mov rcx, -1

wrfsbase rax
rdfsbase ebx ; 32bit
rdfsbase rcx ; 64bit

; Restore FS
mov rdx, [rel .data_backup]
wrfsbase rdx

hlt

align 4096
.data_backup:
dq 0


================================================
FILE: unittests/ASM/Secondary/15_F3_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000434445464748",
    "RBX": "0x0000000045464748",
    "RCX": "0x0000434445464748"
  }
}
%endif

mov rax, 0x0000434445464748
mov rbx, -1
mov rcx, -1

wrgsbase rax
rdgsbase ebx ; 32bit
rdgsbase rcx ; 64bit

hlt


================================================
FILE: unittests/ASM/Secondary/15_F3_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748"
  }
}
%endif

; Save FS
rdfsbase rax
mov [rel .data_backup], rax

mov rdx, 0xe0000000
wrfsbase rdx

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax

mov rax, -1
mov rax, qword [fs:0]

; Restore FS
mov rbx, [rel .data_backup]
wrfsbase rbx

hlt

align 4096
.data_backup:
dq 0


================================================
FILE: unittests/ASM/Secondary/15_F3_02_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000434445464748",
    "RBX": "0x00000000FFFFFFFF"
  }
}
%endif

; Save FS
rdfsbase rax
mov [rel .data_backup], rax

mov rax, 0x0000434445464748
mov rbx, -1

; Ensure that wrfsbase of 32-bit will zero extend
wrfsbase rax
wrfsbase ebx
rdfsbase rbx ; 64bit

; Restore FS
mov rcx, [rel .data_backup]
wrfsbase rcx

hlt

align 4096
.data_backup:
dq 0


================================================
FILE: unittests/ASM/Secondary/15_F3_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000
wrgsbase rdx

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax

mov rax, -1
mov rax, qword [gs:0]

hlt


================================================
FILE: unittests/ASM/Secondary/15_F3_03_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000434445464748",
    "RBX": "0x00000000FFFFFFFF"
  }
}
%endif

mov rax, 0x0000434445464748
mov rbx, -1

; Ensure that wrfsbase of 32-bit will zero extend
wrgsbase rax
wrgsbase ebx
rdgsbase rbx ; 64bit

hlt


================================================
FILE: unittests/ASM/Secondary/15_XX_0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0x1112131415161718", "0x0"],
    "XMM1":  ["0x2122232425262728", "0x0"],
    "XMM2":  ["0x3132333435363738", "0x0"],
    "XMM3":  ["0x4142434445464748", "0x0"],
    "XMM4":  ["0x5152535455565758", "0x0"],
    "XMM5":  ["0x6162636465666768", "0x0"],
    "XMM6":  ["0x7172737475767778", "0x0"],
    "XMM7":  ["0x8182838485868788", "0x0"],
    "XMM8":  ["0xccc2c3c4c5c6c7c8", "0x0"],
    "XMM9":  ["0xa1aaa3a4a5a6a7a8", "0x0"],
    "XMM10": ["0xf1f2fff4f5f6f7f8", "0x0"],
    "XMM11": ["0xe1e2e3eee5e6e7e8", "0x0"],
    "XMM12": ["0xd1d2d3d4ddd6d7d8", "0x0"],
    "XMM13": ["0xc1c2c3c4c5ccc7c8", "0x0"],
    "XMM14": ["0xb1b2b3b4b5b6bbb8", "0x0"],
    "XMM15": ["0xa1a2a3a4a5a6a7aa", "0x0"]
  }
}
%endif

mov rsp, 0xe0000000

; Set up MMX state
mov rax, 0x1112131415161718
movd mm0, rax
mov rax, 0x2122232425262728
movd mm1, rax
mov rax, 0x3132333435363738
movd mm2, rax
mov rax, 0x4142434445464748
movd mm3, rax
mov rax, 0x5152535455565758
movd mm4, rax
mov rax, 0x6162636465666768
movd mm5, rax
mov rax, 0x7172737475767778
movd mm6, rax
mov rax, 0x8182838485868788
movd mm7, rax

; Setup XMM state
mov rax, 0x1112131415161718
movq xmm0, rax
mov rax, 0x2122232425262728
movq xmm1, rax
mov rax, 0x3132333435363738
movq xmm2, rax
mov rax, 0x4142434445464748
movq xmm3, rax
mov rax, 0x5152535455565758
movq xmm4, rax
mov rax, 0x6162636465666768
movq xmm5, rax
mov rax, 0x7172737475767778
movq xmm6, rax
mov rax, 0x8182838485868788
movq xmm7, rax
mov rax, 0xccc2c3c4c5c6c7c8
movq xmm8, rax
mov rax, 0xa1aaa3a4a5a6a7a8
movq xmm9, rax
mov rax, 0xf1f2fff4f5f6f7f8
movq xmm10, rax
mov rax, 0xe1e2e3eee5e6e7e8
movq xmm11, rax
mov rax, 0xd1d2d3d4ddd6d7d8
movq xmm12, rax
mov rax, 0xc1c2c3c4c5ccc7c8
movq xmm13, rax
mov rax, 0xb1b2b3b4b5b6bbb8
movq xmm14, rax
mov rax, 0xa1a2a3a4a5a6a7aa
movq xmm15, rax

; Corrupt state and see what it stores
mov eax, 0x41424344

; Overwrite header
mov dword [rsp + 0], eax
; Overwrite the mm state
mov rax, -1
mov qword [rsp + 32 + 8 * 0], rax
mov qword [rsp + 32 + 8 * 1], rax
mov qword [rsp + 32 + 8 * 2], rax
mov qword [rsp + 32 + 8 * 3], rax
mov qword [rsp + 32 + 8 * 4], rax
mov qword [rsp + 32 + 8 * 5], rax
mov qword [rsp + 32 + 8 * 6], rax
mov qword [rsp + 32 + 8 * 7], rax

; Overwrite the xmm state
mov qword [rsp + 160 + 8 * 0], rax
mov qword [rsp + 160 + 8 * 1], rax
mov qword [rsp + 160 + 8 * 2], rax
mov qword [rsp + 160 + 8 * 3], rax
mov qword [rsp + 160 + 8 * 4], rax
mov qword [rsp + 160 + 8 * 5], rax
mov qword [rsp + 160 + 8 * 6], rax
mov qword [rsp + 160 + 8 * 7], rax
mov qword [rsp + 160 + 8 * 8], rax
mov qword [rsp + 160 + 8 * 9], rax
mov qword [rsp + 160 + 8 * 10], rax
mov qword [rsp + 160 + 8 * 11], rax
mov qword [rsp + 160 + 8 * 12], rax
mov qword [rsp + 160 + 8 * 13], rax
mov qword [rsp + 160 + 8 * 14], rax
mov qword [rsp + 160 + 8 * 15], rax
mov qword [rsp + 160 + 8 * 16], rax
mov qword [rsp + 160 + 8 * 17], rax
mov qword [rsp + 160 + 8 * 18], rax
mov qword [rsp + 160 + 8 * 19], rax
mov qword [rsp + 160 + 8 * 20], rax
mov qword [rsp + 160 + 8 * 21], rax
mov qword [rsp + 160 + 8 * 22], rax
mov qword [rsp + 160 + 8 * 23], rax
mov qword [rsp + 160 + 8 * 24], rax
mov qword [rsp + 160 + 8 * 25], rax
mov qword [rsp + 160 + 8 * 26], rax
mov qword [rsp + 160 + 8 * 27], rax
mov qword [rsp + 160 + 8 * 28], rax
mov qword [rsp + 160 + 8 * 29], rax
mov qword [rsp + 160 + 8 * 30], rax
mov qword [rsp + 160 + 8 * 31], rax

; Overwrite the three reserved 16byte elements
mov qword [rsp + 416 + 8 * 0], rax
mov qword [rsp + 416 + 8 * 1], rax
mov qword [rsp + 416 + 8 * 2], rax
mov qword [rsp + 416 + 8 * 3], rax
mov qword [rsp + 416 + 8 * 4], rax
mov qword [rsp + 416 + 8 * 5], rax

; Overwrite the three 16byte "available" slots
mov rax, 0x1111111111111111
mov qword [rsp + 464 + 8 * 0], rax
mov rax, 0x2222222222222222
mov qword [rsp + 464 + 8 * 1], rax
mov rax, 0x3333333333333333
mov qword [rsp + 464 + 8 * 2], rax
mov rax, 0x4444444444444444
mov qword [rsp + 464 + 8 * 3], rax
mov rax, 0x5555555555555555
mov qword [rsp + 464 + 8 * 4], rax
mov rax, 0x6666666666666666
mov qword [rsp + 464 + 8 * 5], rax

; Now save our state
fxsave [rsp]

; Corrupt MMX And XMM state
mov rax, -1
movd mm0, rax
movd mm1, rax
movd mm2, rax
movd mm3, rax
movd mm4, rax
movd mm5, rax
movd mm6, rax
movd mm7, rax

; Setup XMM state
movq xmm0, rax
movq xmm1, rax
movq xmm2, rax
movq xmm3, rax
movq xmm4, rax
movq xmm5, rax
movq xmm6, rax
movq xmm7, rax
movq xmm8, rax
movq xmm9, rax
movq xmm10, rax
movq xmm11, rax
movq xmm12, rax
movq xmm13, rax
movq xmm14, rax
movq xmm15, rax

; Now reload the state we just saved
fxrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
mov rax, qword [rsp + 464 + 8 * 0]
mov rbx, qword [rsp + 464 + 8 * 1]
mov rcx, qword [rsp + 464 + 8 * 2]
mov rdx, qword [rsp + 464 + 8 * 3]
mov rsi, qword [rsp + 464 + 8 * 4]
mov rdi, qword [rsp + 464 + 8 * 5]

hlt


================================================
FILE: unittests/ASM/Secondary/15_XX_5.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure execution
lfence
hlt


================================================
FILE: unittests/ASM/Secondary/15_XX_6.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure execution
mfence
hlt


================================================
FILE: unittests/ASM/Secondary/15_XX_7.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure execution
sfence
hlt


================================================
FILE: unittests/ASM/Secondary/15_XX_7_2.asm
================================================
%ifdef CONFIG
{
}
%endif

mov rdx, 0xe0000000
clflush [rdx]
hlt


================================================
FILE: unittests/ASM/Secondary/CLFLUSHOPT.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "HostFeatures": ["CLFLOPT"]
}
%endif

mov rdx, 0xe0000000
; Just ensures the code is executed.
clflushopt [rdx]

mov rax, 1
hlt


================================================
FILE: unittests/ASM/Secondary/CLWB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "HostFeatures": ["CLWB"]
}
%endif

mov rdx, 0xe0000000
; Just ensures the code is executed.
clwb [rdx]

mov rax, 1
hlt


================================================
FILE: unittests/ASM/Secondary/Prefetch.asm
================================================
%ifdef CONFIG
{
}
%endif

; Arg 1 = type
; Arg 2 = Reg
%macro prefetch 2
  prefetch%1 [%2]
%endmacro

; Arg1 = prefix
; Arg2 = Reg
%macro prefetch_pre 2
  db %1
  prefetch nta, %2
  db %1
  prefetch t0, %2
  db %1
  prefetch t1, %2
  db %1
  prefetch t2, %2
%endmacro

; Arg 1 = modrm encoding
; Always uses rax
%macro prefetch_res 1
  db 0x0F
  db 0x18
  db %1
%endmacro

; Arg 1 = prefix
; Arg 2 = modrm encoding
%macro prefetch_res_pre 2
  db %1
  prefetch_res %2
%endmacro

; Arg 1 = modrm encoding
; Always uses rax
%macro prefetch_resw 1
  db 0x0F
  db 0x0D
  db %1
%endmacro

; Arg 1 = prefix
; Arg 2 = modrm encoding
%macro prefetch_resw_pre 2
  db %1
  prefetch_resw %2
%endmacro

mov rax, 0xe0000000

prefetch nta, rax
prefetch t0, rax
prefetch t1, rax
prefetch t2, rax

prefetch_pre 0x66, rax
prefetch_pre 0x66, rax
prefetch_pre 0x66, rax
prefetch_pre 0x66, rax

prefetch_pre 0xF2, rax
prefetch_pre 0xF2, rax
prefetch_pre 0xF2, rax
prefetch_pre 0xF2, rax

prefetch_pre 0xF3, rax
prefetch_pre 0xF3, rax
prefetch_pre 0xF3, rax
prefetch_pre 0xF3, rax

prefetch_res (0 << 3)
prefetch_res (1 << 3)
prefetch_res (2 << 3)
prefetch_res (3 << 3)
prefetch_res (4 << 3)
prefetch_res (5 << 3)
prefetch_res (6 << 3)
prefetch_res (7 << 3)

prefetch_res_pre 0x66, (0 << 3)
prefetch_res_pre 0x66, (1 << 3)
prefetch_res_pre 0x66, (2 << 3)
prefetch_res_pre 0x66, (3 << 3)
prefetch_res_pre 0x66, (4 << 3)
prefetch_res_pre 0x66, (5 << 3)
prefetch_res_pre 0x66, (6 << 3)
prefetch_res_pre 0x66, (7 << 3)

prefetch_res_pre 0xF2, (0 << 3)
prefetch_res_pre 0xF2, (1 << 3)
prefetch_res_pre 0xF2, (2 << 3)
prefetch_res_pre 0xF2, (3 << 3)
prefetch_res_pre 0xF2, (4 << 3)
prefetch_res_pre 0xF2, (5 << 3)
prefetch_res_pre 0xF2, (6 << 3)
prefetch_res_pre 0xF2, (7 << 3)

prefetch_res_pre 0xF3, (0 << 3)
prefetch_res_pre 0xF3, (1 << 3)
prefetch_res_pre 0xF3, (2 << 3)
prefetch_res_pre 0xF3, (3 << 3)
prefetch_res_pre 0xF3, (4 << 3)
prefetch_res_pre 0xF3, (5 << 3)
prefetch_res_pre 0xF3, (6 << 3)
prefetch_res_pre 0xF3, (7 << 3)


prefetch_resw (0 << 3)
prefetch_resw (1 << 3)
prefetch_resw (2 << 3)
prefetch_resw (3 << 3)
prefetch_resw (4 << 3)
prefetch_resw (5 << 3)
prefetch_resw (6 << 3)
prefetch_resw (7 << 3)

prefetch_resw_pre 0x66, (0 << 3)
prefetch_resw_pre 0x66, (1 << 3)
prefetch_resw_pre 0x66, (2 << 3)
prefetch_resw_pre 0x66, (3 << 3)
prefetch_resw_pre 0x66, (4 << 3)
prefetch_resw_pre 0x66, (5 << 3)
prefetch_resw_pre 0x66, (6 << 3)
prefetch_resw_pre 0x66, (7 << 3)

prefetch_resw_pre 0xF2, (0 << 3)
prefetch_resw_pre 0xF2, (1 << 3)
prefetch_resw_pre 0xF2, (2 << 3)
prefetch_resw_pre 0xF2, (3 << 3)
prefetch_resw_pre 0xF2, (4 << 3)
prefetch_resw_pre 0xF2, (5 << 3)
prefetch_resw_pre 0xF2, (6 << 3)
prefetch_resw_pre 0xF2, (7 << 3)

prefetch_resw_pre 0xF3, (0 << 3)
prefetch_resw_pre 0xF3, (1 << 3)
prefetch_resw_pre 0xF3, (2 << 3)
prefetch_resw_pre 0xF3, (3 << 3)
prefetch_resw_pre 0xF3, (4 << 3)
prefetch_resw_pre 0xF3, (5 << 3)
prefetch_resw_pre 0xF3, (6 << 3)
prefetch_resw_pre 0xF3, (7 << 3)


hlt


================================================
FILE: unittests/ASM/Secondary/shufps_optimization.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x4054c664c2f837b5", "0x4044836d86ec17ec"],
    "XMM1":  ["0x402a1e1c58255b03", "0x4035fe425aee6320"],
    "XMM2":  ["0x401568e0c9d9d346", "0x40154b7d41743e96"],
    "XMM3":  ["0x40154b7d41743e96", "0x403d075a31a4bdba"],
    "XMM4":  ["0xbd66277c31a4bdba", "0x4ea4a8c17ebaf102"],
    "XMM5":  ["0x4056d74040334ec1", "0x40497b13404439b5"],
    "XMM6":  ["0x404439b5404439b5", "0x4037f9ca18bd6627"],
    "XMM7":  ["0x4037f9ca4037f9ca", "0x403839b866e43aa8"],
    "XMM8":  ["0x403839b8403839b8", "0x4058bc1f212d7732"],
    "XMM9":  ["0x4058bc1f212d7732", "0xa10e0221a10e0221"],
    "XMM10": ["0x4058defb00bcbe62", "0x9eecbfb19eecbfb1"],
    "XMM11": ["0x40503e3c4052997f", "0x40395a6bf8769ec3"],
    "XMM12": ["0x40419d2240395a6b", "0x40177e28240b7803"],
    "XMM13": ["0x240b780340177e28", "0x404a03c74fb549f9"],
    "XMM14": ["0x9f16b11c40408402", "0x404d31595feda661"],
    "XMM15": ["0x5feda6615feda661", "0x7aa25d8d7aa25d8d"]
  }
}
%endif

movaps xmm0, [rel .data + 16 * 0]
movaps xmm1, [rel .data + 16 * 1]

movaps xmm2, [rel .data + 16 * 2]
movaps xmm3, [rel .data + 16 * 3]

movaps xmm4, [rel .data + 16 * 4]
movaps xmm5, [rel .data + 16 * 5]

movaps xmm6, [rel .data + 16 * 6]
movaps xmm7, [rel .data + 16 * 7]

movaps xmm8, [rel .data + 16 * 8]
movaps xmm9, [rel .data + 16 * 9]

movaps xmm10, [rel .data + 16 * 10]
movaps xmm11, [rel .data + 16 * 11]

movaps xmm12, [rel .data + 16 * 12]
movaps xmm13, [rel .data + 16 * 13]

movaps xmm14, [rel .data + 16 * 14]
movaps xmm15, [rel .data + 16 * 15]

shufps xmm0, xmm1, 01000100b
shufps xmm1, xmm2, 11101110b
shufps xmm2, xmm3, 11100100b
shufps xmm3, xmm4, 01001110b
shufps xmm4, xmm5, 10001000b
shufps xmm5, xmm6, 11011101b
shufps xmm6, xmm7, 11100101b
shufps xmm7, xmm8, 11101111b
shufps xmm8, xmm9, 01001111b
shufps xmm9, xmm10, 00000100b
shufps xmm10, xmm11, 00001110b
shufps xmm11, xmm12, 11100111b
shufps xmm12, xmm13, 01000111b
shufps xmm13, xmm14, 11100001b
shufps xmm14, xmm15, 01000001b
shufps xmm15, [rel .data + 16 * 16], 0

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/Secondary/shufps_optimization_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc2f837b5c2f837b5", "0x7aa25d8d7aa25d8d"],
    "XMM1":  ["0x4044836d86ec17ec", "0x4054c664c2f837b5"],
    "XMM2":  ["0x4035fe425aee6320", "0x4054c664c2f837b5"],
    "XMM3":  ["0x402359003eea209b", "0x4054c664c2f837b5"],
    "XMM4":  ["0x4050a018bd66277c", "0x402359003eea209b"],
    "XMM5":  ["0x4ea4a8c17ebaf102", "0x3eea209bbd66277c"],
    "XMM6":  ["0x40497b13404439b5", "0x3eea209b4ea4a8c1"],
    "XMM7":  ["0x4040528b4040528b", "0x3eea209b4ea4a8c1"],
    "XMM8":  ["0x403839b8403839b8", "0x3eea209b4ea4a8c1"],
    "XMM9":  ["0x4056cde54056cde5", "0x403839b8403839b8"],
    "XMM10": ["0x4056b34aa10e0221", "0x4056cde54056cde5"],
    "XMM11": ["0x4052997f0ed3d85a", "0xa10e0221a10e0221"],
    "XMM12": ["0x40419d2240395a6b", "0xa10e0221a10e0221"],
    "XMM13": ["0x40177e2840568cc5", "0x40419d2240395a6b"],
    "XMM14": ["0x9f16b11c40408402", "0x40419d2240395a6b"],
    "XMM15": ["0x5feda661404d3159", "0x9f16b11c40408402"]
  }
}
%endif

movaps xmm0, [rel .data + 16 * 0]
movaps xmm1, [rel .data + 16 * 1]

movaps xmm2, [rel .data + 16 * 2]
movaps xmm3, [rel .data + 16 * 3]

movaps xmm4, [rel .data + 16 * 4]
movaps xmm5, [rel .data + 16 * 5]

movaps xmm6, [rel .data + 16 * 6]
movaps xmm7, [rel .data + 16 * 7]

movaps xmm8, [rel .data + 16 * 8]
movaps xmm9, [rel .data + 16 * 9]

movaps xmm10, [rel .data + 16 * 10]
movaps xmm11, [rel .data + 16 * 11]

movaps xmm12, [rel .data + 16 * 12]
movaps xmm13, [rel .data + 16 * 13]

movaps xmm14, [rel .data + 16 * 14]
movaps xmm15, [rel .data + 16 * 15]

; Test inverted sources from shufps_optimization.asm
shufps xmm1, xmm0, 01000100b
shufps xmm0, [rel .data + 16 * 16], 0
shufps xmm2, xmm1, 11101110b
shufps xmm3, xmm2, 11100100b
shufps xmm4, xmm3, 01001110b
shufps xmm5, xmm4, 10001000b
shufps xmm6, xmm5, 11011101b
shufps xmm7, xmm6, 11100101b
shufps xmm8, xmm7, 11101111b
shufps xmm9, xmm8, 01001111b
shufps xmm10, xmm9, 00000100b
shufps xmm11, xmm10, 00001110b
shufps xmm12, xmm11, 11100111b
shufps xmm13, xmm12, 01000111b
shufps xmm14, xmm13, 11100001b
shufps xmm15, xmm14, 01000001b

hlt

align 16
; 512bytes of random data
.data:
dq 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/Secondary/xsave/xsave.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0x1112131415161718", "0xABFDEC3402932039"],
    "XMM1":  ["0x2122232425262728", "0xDEFCA93847392992"],
    "XMM2":  ["0x3132333435363738", "0xEADC3284ADCE9339"],
    "XMM3":  ["0x4142434445464748", "0x3987432929293847"],
    "XMM4":  ["0x5152535455565758", "0x3764583402983799"],
    "XMM5":  ["0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM6":  ["0x7172737475767778", "0x3459238471238023"],
    "XMM7":  ["0x8182838485868788", "0x9347239480289299"],
    "XMM8":  ["0xCCC2C3C4C5C6C7C8", "0x3949232903428479"],
    "XMM9":  ["0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM10": ["0xF1F2FFF4F5F6F7F8", "0x758734629799389A"],
    "XMM11": ["0xE1E2E3EEE5E6E7E8", "0x3756438328472389"],
    "XMM12": ["0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM13": ["0xC1C2C3C4C5CCC7C8", "0xABCDEF3894335820"],
    "XMM14": ["0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM15": ["0xA1A2A3A4A5A6A7AA", "0xABFD392482039840"]
  },
  "HostFeatures": ["XSAVE"]
}
%endif

%include "xsave_macros.mac"

mov rsp, 0xE0000000

; Set up MMX and XMM state
set_up_mmx_state .xmm_data
set_up_xmm_state .xmm_data

overwrite_fxsave_slots

; Now save our state (X87 and SSE only)
mov eax, 0b011
xsave [rsp]

; Corrupt MMX And XMM state
corrupt_mmx_and_xmm_registers

; Now reload the state we just saved
xrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
load_fxsave_slots

hlt

; Give ourselves a region of 1000 bytes set to 0xFF
align 64
.xsave_data:
  times 1000 db 0xFF

define_xmm_data_section


================================================
FILE: unittests/ASM/Secondary/xsave/xsave_avx.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0x1112131415161718", "0xABFDEC3402932039", "0xA1A2A3A4A5A6A7AA", "0xABFD392482039840"],
    "XMM1":  ["0x2122232425262728", "0xDEFCA93847392992", "0x4142434445464748", "0x3987432929293847"],
    "XMM2":  ["0x3132333435363738", "0xEADC3284ADCE9339", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM3":  ["0x4142434445464748", "0x3987432929293847", "0x3132333435363738", "0xEADC3284ADCE9339"],
    "XMM4":  ["0x5152535455565758", "0x3764583402983799", "0x7172737475767778", "0x3459238471238023"],
    "XMM5":  ["0x6162636465666768", "0xACDEFACDEFACDEFA", "0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM6":  ["0x7172737475767778", "0x3459238471238023", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM7":  ["0x8182838485868788", "0x9347239480289299", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM8":  ["0xCCC2C3C4C5C6C7C8", "0x3949232903428479", "0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM9":  ["0xA1AAA3A4A5A6A7A8", "0x3784769228479192", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM10": ["0xF1F2FFF4F5F6F7F8", "0x758734629799389A", "0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM11": ["0xE1E2E3EEE5E6E7E8", "0x3756438328472389", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM12": ["0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73", "0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM13": ["0xC1C2C3C4C5CCC7C8", "0xABCDEF3894335820", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM14": ["0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499", "0xE1E2E3EEE5E6E7E8", "0x3756438328472389"],
    "XMM15": ["0xA1A2A3A4A5A6A7AA", "0xABFD392482039840", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"]
  }
}
%endif

%define IS_AVX
%include "xsave_macros.mac"

mov rsp, 0xE0000000

; Set up MMX and XMM state
set_up_mmx_state .xmm_data
set_up_xmm_state .xmm_data

overwrite_fxsave_slots

; Now save our state (X87, SSE, and AVX only)
mov eax, 0b111
xsave [rsp]

; Corrupt MMX And XMM state
corrupt_mmx_and_xmm_registers

; Now reload the state we just saved
xrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
load_fxsave_slots

hlt

; Give ourselves a region of 1000 bytes set to 0xFF
align 64
.xsave_data:
  times 1000 db 0xFF

define_xmm_data_section


================================================
FILE: unittests/ASM/Secondary/xsave/xsave_avx_x87.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0xA1A2A3A4A5A6A7AA", "0xABFD392482039840"],
    "XMM1":  ["0x0000000000000000", "0x0000000000000000", "0x4142434445464748", "0x3987432929293847"],
    "XMM2":  ["0x0000000000000000", "0x0000000000000000", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM3":  ["0x0000000000000000", "0x0000000000000000", "0x3132333435363738", "0xEADC3284ADCE9339"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x7172737475767778", "0x3459238471238023"],
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM11": ["0x0000000000000000", "0x0000000000000000", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM12": ["0x0000000000000000", "0x0000000000000000", "0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM14": ["0x0000000000000000", "0x0000000000000000", "0xE1E2E3EEE5E6E7E8", "0x3756438328472389"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"]
  }
}
%endif

%define IS_AVX
%include "xsave_macros.mac"

mov rsp, 0xE0000000

; Set up MMX and XMM state
set_up_mmx_state .xmm_data
set_up_xmm_state .xmm_data

overwrite_fxsave_slots

; Now save our state (X87 and AVX only)
mov eax, 0b101
xsave [rsp]

; Corrupt MMX And XMM state
corrupt_mmx_and_xmm_registers

; Now reload the state we just saved
xrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
load_fxsave_slots

hlt

; Give ourselves a region of 1000 bytes set to 0xFF
align 64
.xsave_data:
  times 1000 db 0xFF

define_xmm_data_section


================================================
FILE: unittests/ASM/Secondary/xsave/xsave_sse.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0",
    "MM1": "0",
    "MM2": "0",
    "MM3": "0",
    "MM4": "0",
    "MM5": "0",
    "MM6": "0",
    "MM7": "0",
    "XMM0":  ["0x1112131415161718", "0xABFDEC3402932039"],
    "XMM1":  ["0x2122232425262728", "0xDEFCA93847392992"],
    "XMM2":  ["0x3132333435363738", "0xEADC3284ADCE9339"],
    "XMM3":  ["0x4142434445464748", "0x3987432929293847"],
    "XMM4":  ["0x5152535455565758", "0x3764583402983799"],
    "XMM5":  ["0x6162636465666768", "0xACDEFACDEFACDEFA"],
    "XMM6":  ["0x7172737475767778", "0x3459238471238023"],
    "XMM7":  ["0x8182838485868788", "0x9347239480289299"],
    "XMM8":  ["0xCCC2C3C4C5C6C7C8", "0x3949232903428479"],
    "XMM9":  ["0xA1AAA3A4A5A6A7A8", "0x3784769228479192"],
    "XMM10": ["0xF1F2FFF4F5F6F7F8", "0x758734629799389A"],
    "XMM11": ["0xE1E2E3EEE5E6E7E8", "0x3756438328472389"],
    "XMM12": ["0xD1D2D3D4DDD6D7D8", "0x3674823989ADEF73"],
    "XMM13": ["0xC1C2C3C4C5CCC7C8", "0xABCDEF3894335820"],
    "XMM14": ["0xB1B2B3B4B5B6BBB8", "0xADEADE3894353499"],
    "XMM15": ["0xA1A2A3A4A5A6A7AA", "0xABFD392482039840"]
  },
  "HostFeatures": ["XSAVE"]
}
%endif

%include "xsave_macros.mac"

mov rsp, 0xE0000000

; Set up MMX and XMM state
set_up_mmx_state .xmm_data
set_up_xmm_state .xmm_data

overwrite_fxsave_slots

; Now save our state (SSE only)
mov eax, 0b010
xsave [rsp]

; Corrupt MMX And XMM state
corrupt_mmx_and_xmm_registers

; Now reload the state we just saved
xrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
load_fxsave_slots

hlt

; Give ourselves a region of 1000 bytes set to 0xFF
align 64
.xsave_data:
  times 1000 db 0xFF

define_xmm_data_section


================================================
FILE: unittests/ASM/Secondary/xsave/xsave_x87.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1111111111111111",
    "RBX": "0x2222222222222222",
    "RCX": "0x3333333333333333",
    "RDX": "0x4444444444444444",
    "RSI": "0x5555555555555555",
    "RDI": "0x6666666666666666",
    "MM0": "0x1112131415161718",
    "MM1": "0x2122232425262728",
    "MM2": "0x3132333435363738",
    "MM3": "0x4142434445464748",
    "MM4": "0x5152535455565758",
    "MM5": "0x6162636465666768",
    "MM6": "0x7172737475767778",
    "MM7": "0x8182838485868788",
    "XMM0":  ["0", "0"],
    "XMM1":  ["0", "0"],
    "XMM2":  ["0", "0"],
    "XMM3":  ["0", "0"],
    "XMM4":  ["0", "0"],
    "XMM5":  ["0", "0"],
    "XMM6":  ["0", "0"],
    "XMM7":  ["0", "0"],
    "XMM8":  ["0", "0"],
    "XMM9":  ["0", "0"],
    "XMM10": ["0", "0"],
    "XMM11": ["0", "0"],
    "XMM12": ["0", "0"],
    "XMM13": ["0", "0"],
    "XMM14": ["0", "0"],
    "XMM15": ["0", "0"]
  },
  "HostFeatures": ["XSAVE"]
}
%endif

%include "xsave_macros.mac"

mov rsp, 0xE0000000

; Set up MMX and XMM state
set_up_mmx_state .xmm_data
set_up_xmm_state .xmm_data

overwrite_fxsave_slots

; Now save our state (X87 only)
mov eax, 0b001
xsave [rsp]

; Corrupt MMX And XMM state
corrupt_mmx_and_xmm_registers

; Now reload the state we just saved
xrstor [rsp]

; Load the three 16bytes of "available" slots to make sure it wasn't overwritten
; Reserved can be overwritten regardless
load_fxsave_slots

hlt

; Give ourselves a region of 1000 bytes set to 0xFF
align 64
.xsave_data:
  times 1000 db 0xFF

define_xmm_data_section


================================================
FILE: unittests/ASM/SecondaryModRM/Reg_2_0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3",
    "RDX": "0x0"
  },
  "HostFeatures": ["XSAVE"]
}
%endif

mov ecx, 0
xgetbv

; Mask only the lower two bits to get host and FEX runners to match.
; This way we can test that we're getting data back.
; Bit 0 and 1 refer to X87 and SSE respectively.
and eax, 0x3

hlt


================================================
FILE: unittests/ASM/SecondaryModRM/Reg_7_1.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["Linux"]
}
%endif

; We can't really check the results of this
; Just ensure we execute it
rdtscp

hlt


================================================
FILE: unittests/ASM/SecondaryModRM/Reg_7_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0"
  },
  "HostFeatures": ["CLZERO"]
}
%endif

; Starting address to store to
mov rax, 0xe8000000

; Set up the cacheline with garbage
mov rbx, 0x4142434445464748
mov [rax + 8 * 0], rbx
mov [rax + 8 * 1], rbx
mov [rax + 8 * 2], rbx
mov [rax + 8 * 3], rbx
mov [rax + 8 * 4], rbx
mov [rax + 8 * 5], rbx
mov [rax + 8 * 6], rbx
mov [rax + 8 * 7], rbx

clzero

mov rbx, 0

add rbx, [rax + 8 * 0]
add rbx, [rax + 8 * 1]
add rbx, [rax + 8 * 2]
add rbx, [rax + 8 * 3]
add rbx, [rax + 8 * 4]
add rbx, [rax + 8 * 5]
add rbx, [rax + 8 * 6]
add rbx, [rax + 8 * 7]

hlt


================================================
FILE: unittests/ASM/SecondaryModRM/Reg_7_4_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x000000020A121A20",
    "RDX": "0x0A121A2000000000"
  },
  "HostFeatures": ["CLZERO"]
}
%endif

; Starting address to store to
mov rax, 0xe8000000

; Set up the cachelines with garbage

; Cacheline 0
mov rbx, 0x0000000041424344
mov [rax + 8 * 0], rbx
mov [rax + 8 * 1], rbx
mov [rax + 8 * 2], rbx
mov [rax + 8 * 3], rbx
mov [rax + 8 * 4], rbx
mov [rax + 8 * 5], rbx
mov [rax + 8 * 6], rbx
mov [rax + 8 * 7], rbx

; Cacheline 1
mov rbx, 0x5152535455565758
mov [rax + 8 * 8], rbx
mov [rax + 8 * 9], rbx
mov [rax + 8 * 10], rbx
mov [rax + 8 * 11], rbx ; clzero here
mov [rax + 8 * 12], rbx
mov [rax + 8 * 13], rbx
mov [rax + 8 * 14], rbx
mov [rax + 8 * 15], rbx

; Cacheline 2
mov rbx, 0x4142434400000000
mov [rax + 8 * 16], rbx
mov [rax + 8 * 17], rbx
mov [rax + 8 * 18], rbx
mov [rax + 8 * 19], rbx
mov [rax + 8 * 20], rbx
mov [rax + 8 * 21], rbx
mov [rax + 8 * 22], rbx
mov [rax + 8 * 23], rbx

; Set RAX to the middle of cacheline 1 to ensure alignment
lea rax, [rax + 8 * 11]

clzero

; Set rax back to the start
mov rax, 0xe8000000

mov rbx, 0
mov rcx, 0
mov rdx, 0

; Cacheline 0 should be unmodified
add rcx, [rax + 8 * 0]
add rcx, [rax + 8 * 1]
add rcx, [rax + 8 * 2]
add rcx, [rax + 8 * 3]
add rcx, [rax + 8 * 4]
add rcx, [rax + 8 * 5]
add rcx, [rax + 8 * 6]
add rcx, [rax + 8 * 7]

; Cacheline 1 Should be zero
add rbx, [rax + 8 * 8]
add rbx, [rax + 8 * 9]
add rbx, [rax + 8 * 10]
add rbx, [rax + 8 * 11]
add rbx, [rax + 8 * 12]
add rbx, [rax + 8 * 13]
add rbx, [rax + 8 * 14]
add rbx, [rax + 8 * 15]

; Cacheline 2 should be unmodified
add rdx, [rax + 8 * 16]
add rdx, [rax + 8 * 17]
add rdx, [rax + 8 * 18]
add rdx, [rax + 8 * 19]
add rdx, [rax + 8 * 20]
add rdx, [rax + 8 * 21]
add rdx, [rax + 8 * 22]
add rdx, [rax + 8 * 23]

hlt


================================================
FILE: unittests/ASM/SelfModifyingCode/Delinking.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x20"
  }
}
%endif

mov rcx, 2
mov rbx, 1
jmp main

patched_op:
db 0x48, 0xc7, 0xc0, 0xff, 0xff, 0xff, 0xff
dec rcx
jmp main

main:

; warm up the cache
cmp rcx, 0
jg patched_op

; should the text exit?
cmp rbx, 0
je end

; patch mov rax, -1 to nops
mov byte [rel patched_op + 0], 0x90
mov byte [rel patched_op + 1], 0x90
mov byte [rel patched_op + 2], 0x90
mov byte [rel patched_op + 3], 0x90
mov byte [rel patched_op + 4], 0x90
mov byte [rel patched_op + 5], 0x90
mov byte [rel patched_op + 6], 0x90

mov rax, 32
mov rcx, 2
mov rbx, 0
jmp main

end:
hlt

================================================
FILE: unittests/ASM/SelfModifyingCode/DifferentBlock.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x20"
  }
}
%endif

mov rsp, 0xe000_1000

jmp main

patched_op:
mov rax,-1
ret

main:

; warm up the cache
call patched_op

mov byte [rel patched_op], 0xC3

mov rax, 32
call patched_op

hlt


================================================
FILE: unittests/ASM/SelfModifyingCode/SameBlock.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x20"
  }
}
%endif


mov rax, 32

; patch mov rax,... to nops
mov byte [rel patched_op + 0], 0x90
mov byte [rel patched_op + 1], 0x90
mov byte [rel patched_op + 2], 0x90
mov byte [rel patched_op + 3], 0x90
mov byte [rel patched_op + 4], 0x90
mov byte [rel patched_op + 5], 0x90
mov byte [rel patched_op + 6], 0x90
mov byte [rel patched_op + 7], 0x90
mov byte [rel patched_op + 8], 0x90
mov byte [rel patched_op + 9], 0x90

patched_op:
mov rax,0xFABCFABCFABC0123 ; 10 bytes long

hlt

================================================
FILE: unittests/ASM/TwoByte/0F_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0
mov [rdx + 8 * 0], rax

mov rax, 201 ; Time
syscall
cmp rax, 0
setne [rdx + 8 * 0]
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_0E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  },
  "HostFeatures": ["3DNOW"]
}
%endif

femms ; Just ensure it runs

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movups xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_10_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x54cbb0180089fbcc", "0x5b0780753bfb542a"],
    "XMM1":  ["0x2a54cbb0180089fb", "0xab5b0780753bfb54"],
    "XMM2":  ["0x542a54cbb0180089", "0xccab5b0780753bfb"],
    "XMM3":  ["0xfb542a54cbb01800", "0xbcccab5b0780753b"],
    "XMM4":  ["0x3bfb542a54cbb018", "0x22bcccab5b078075"],
    "XMM5":  ["0x753bfb542a54cbb0", "0x4922bcccab5b0780"],
    "XMM6":  ["0x80753bfb542a54cb", "0xdd4922bcccab5b07"],
    "XMM7":  ["0x0780753bfb542a54", "0x92dd4922bcccab5b"],
    "XMM8":  ["0x5b0780753bfb542a", "0x7c92dd4922bcccab"],
    "XMM9":  ["0xab5b0780753bfb54", "0x787c92dd4922bccc"],
    "XMM10": ["0xccab5b0780753bfb", "0x2f787c92dd4922bc"],
    "XMM11": ["0xbcccab5b0780753b", "0x772f787c92dd4922"],
    "XMM12": ["0x22bcccab5b078075", "0x54772f787c92dd49"],
    "XMM13": ["0x4922bcccab5b0780", "0xc354772f787c92dd"],
    "XMM14": ["0xdd4922bcccab5b07", "0xbac354772f787c92"],
    "XMM15": ["0x92dd4922bcccab5b", "0xeebac354772f787c"]
  }
}
%endif

lea r15, [rel .data]

; Testing unaligned 128bit loads
movups xmm0, [r15 + 0]
movups xmm1, [r15 + 1]
movups xmm2, [r15 + 2]
movups xmm3, [r15 + 3]
movups xmm4, [r15 + 4]
movups xmm5, [r15 + 5]
movups xmm6, [r15 + 6]
movups xmm7, [r15 + 7]
movups xmm8, [r15 + 8]
movups xmm9, [r15 + 9]
movups xmm10, [r15 + 10]
movups xmm11, [r15 + 11]
movups xmm12, [r15 + 12]
movups xmm13, [r15 + 13]
movups xmm14, [r15 + 14]
movups xmm15, [r15 + 15]

hlt

; 256bytes of random data
align 16
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311
align 16


================================================
FILE: unittests/ASM/TwoByte/0F_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movups [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_12.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movhlps xmm0, xmm1

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_13.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movlps [rdx + 8 * 2], xmm0
movaps xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_13_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movlps xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_14.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x6566676845464748", "0x6162636441424344"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
unpcklps xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_15.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x7576777855565758", "0x7172737451525354"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
unpckhps xmm0, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x6162636465666768"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

lea rdx, [rel .data]

movaps xmm0, [rdx]
movaps xmm1, [rdx + 16]

movlhps xmm0, xmm1

hlt

align 16
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/TwoByte/0F_17.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x6162636465666768"],
    "XMM1": ["0x6162636465666768", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

; Into register
movaps xmm0, [rdx]
movhps xmm0, [rdx + 16]

; Into memory (should only store upper half of xmm into 64-bit region of memory)
movhps [rdx + 32], xmm0
movaps xmm1, [rdx + 32]

hlt

align 4096
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/TwoByte/0F_19.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

%macro nop_enc 1
db 0x0F
db %1
db 0x02

db 0x66
db 0x0F
db %1
db 0x02

db 0x48
db 0x0F
db %1
db 0x02

%endmacro

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

nop word [rdx + 8 * 0]
nop dword [rdx + 8 * 0]
nop qword [rdx + 8 * 0]

; These nops can't be encoded via regular means
nop_enc 0x19
nop_enc 0x1A
nop_enc 0x1B
nop_enc 0x1C
nop_enc 0x1D
nop_enc 0x1E

; Just ensure they didn't do anything to this memory location
movaps xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_28.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_29.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x0000000100000002",
    "XMM0": ["0x3f80000040000000", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0000000100000002
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movaps xmm0, [rdx + 8 * 2]

cvtpi2ps xmm0, mm0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movntps [rdx + 8 * 2], xmm0
movaps xmm1, [rdx + 8 * 2]


hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x0000000100000002",
    "XMM0": ["0x3f80000040000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3f80000040000000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 2]
movaps xmm0, [rdx + 8 * 0]

cvttps2pi mm0, xmm0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x0000000100000002",
    "XMM0": ["0x3f80000040000000", "0x5152535455565758"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3f80000040000000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 2]
movaps xmm0, [rdx + 8 * 0]

cvtps2pi mm0, xmm0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x515253543f800000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535440000000
mov [rdx + 8 * 1], rax

mov rax, 0x5152535440800000
mov [rdx + 8 * 2], rax
mov rax, 0x5152535440800000
mov [rdx + 8 * 3], rax

mov rax, 0x515253547FC00000
mov [rdx + 8 * 4], rax
mov rax, 0x5152535440800000
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
ucomiss xmm0, [rdx + 8 * 2]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

ucomiss xmm0, [rdx + 8 * 4]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_2F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x515253543f800000
mov [rdx + 8 * 0], rax
mov rax, 0x5152535440000000
mov [rdx + 8 * 1], rax

mov rax, 0x5152535440800000
mov [rdx + 8 * 2], rax
mov rax, 0x5152535440800000
mov [rdx + 8 * 3], rax

mov rax, 0x515253547FC00000
mov [rdx + 8 * 4], rax
mov rax, 0x5152535440800000
mov [rdx + 8 * 5], rax

movaps xmm0, [rdx + 8 * 0]
comiss xmm0, [rdx + 8 * 2]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

comiss xmm0, [rdx + 8 * 4]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_31.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax

rdtsc
shl rdx, 32
or rax, rdx
cmp rax, 0
setne [r15 + 8 * 0]
mov rax, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_40.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x80000000
mov r11, 0x1
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovo  rax, [r15 + 8 * 1]
cmovno rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_41.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovno rax, [r15 + 8 * 1]
cmovo  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_42.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovc  rax, [r15 + 8 * 1]
cmovnc rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_43.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnc rax, [r15 + 8 * 1]
cmovc  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_44.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovz  rax, [r15 + 8 * 1]
cmovnz rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_45.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnz rax, [r15 + 8 * 1]
cmovz  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_46.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovbe  rax, [r15 + 8 * 1]
cmovnbe rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_47.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnbe rax, [r15 + 8 * 1]
cmovbe  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_48.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovs  rax, [r15 + 8 * 1]
cmovns rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_49.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovns rax, [r15 + 8 * 1]
cmovs  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x4
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovp  rax, [r15 + 8 * 1]
cmovnp rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x3
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnp rax, [r15 + 8 * 1]
cmovp  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovl  rax, [r15 + 8 * 1]
cmovnl rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnl rax, [r15 + 8 * 1]
cmovl  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovle  rax, [r15 + 8 * 1]
cmovnle rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_4F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x0
mov [r15 + 8 * 0], rax
mov rax, 0x1
mov [r15 + 8 * 1], rax
mov rax, 0x2
mov [r15 + 8 * 2], rax

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, -1
mov rbx, -1
cmovnle rax, [r15 + 8 * 1]
cmovle  rbx, [r15 + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_50.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0",
    "RBX": "0x3"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x8000000080000000
mov [rdx + 8 * 2], rax
mov rax, 0x7000000070000000
mov [rdx + 8 * 3], rax

movaps xmm0, [rdx + 8 * 0]
movaps xmm1, [rdx + 8 * 2]
movmskps rax, xmm0
movmskps rbx, xmm1

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_51.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x3f8000003f800000", "0x3f8000003f800000"],
    "XMM1":  ["0x4000000040000000", "0x4000000040000000"],
    "XMM2":  ["0x4040000040400000", "0x4040000040400000"],
    "XMM3":  ["0x4080000040800000", "0x4080000040800000"],
    "XMM4":  ["0x3f8000003f800000", "0x3f8000003f800000"],
    "XMM5":  ["0x4000000040000000", "0x4000000040000000"],
    "XMM6":  ["0x4040000040400000", "0x4040000040400000"],
    "XMM7":  ["0x4080000040800000", "0x4080000040800000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3f8000003f800000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x3f8000003f800000
mov [rdx + 8 * 1], rax

mov rax, 0x4080000040800000 ; 4.0
mov [rdx + 8 * 2], rax
mov rax, 0x4080000040800000
mov [rdx + 8 * 3], rax

mov rax, 0x4110000041100000 ; 9.0
mov [rdx + 8 * 4], rax
mov rax, 0x4110000041100000
mov [rdx + 8 * 5], rax

mov rax, 0x4180000041800000 ; 16.0
mov [rdx + 8 * 6], rax
mov rax, 0x4180000041800000
mov [rdx + 8 * 7], rax

mov rax, 0x41c8000041c80000 ; 25.0
mov [rdx + 8 * 8], rax
mov rax, 0x41c8000041c80000
mov [rdx + 8 * 9], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 2]
movapd xmm2, [rdx + 8 * 4]
movapd xmm3, [rdx + 8 * 6]
movapd xmm4, [rdx + 8 * 8]
movapd xmm5, [rdx + 8 * 8]
movapd xmm6, [rdx + 8 * 8]
movapd xmm7, [rdx + 8 * 8]

sqrtps xmm0, xmm0
sqrtps xmm1, xmm1
sqrtps xmm2, xmm2
sqrtps xmm3, xmm3

sqrtps xmm4, [rdx + 8 * 0]
sqrtps xmm5, [rdx + 8 * 2]
sqrtps xmm6, [rdx + 8 * 4]
sqrtps xmm7, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_52.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8": "1",
    "R9": "1"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

%include "checkprecision.mac"

; clobbers xmm15
; returns the comparison result in rax
%macro same_pdwords 1 ; receives the xmms register
    movd eax, %1
    movd xmm15, eax
    pshufd xmm15, xmm15, 0 ; has the lower 32bits of %1 accross all lanes 
    pcmpeqd xmm15, %1 ; has equalty mask on all lanes
    movmskps eax, xmm15 ; gets sign bit of each lane into eax
    cmp eax, 0b1111
    sete al
    movzx rax, al
%endmacro

section .text
global _start

_start:

;; This test checks that the rsqrtps returns results within the 1.5*2^-12 relative error
;; margin and that the results are packed as a vector of 4 32bits. Because we pass in
;; the same argument accross the vector we expect the same result accross the vector
;; and we check that with macro same_pdwords

movapd xmm0, [rel arg1]
movapd xmm1, [rel arg2]
movapd xmm2, [rel arg3]
movapd xmm3, [rel arg4]
movapd xmm4, [rel arg5]
movapd xmm5, [rel arg5]
movapd xmm6, [rel arg5]
movapd xmm7, [rel arg5]

rsqrtps xmm0, xmm0
rsqrtps xmm1, xmm1
rsqrtps xmm2, xmm2
rsqrtps xmm3, xmm3

rsqrtps xmm4, [rel arg1]
rsqrtps xmm5, [rel arg2]
rsqrtps xmm6, [rel arg3]
rsqrtps xmm7, [rel arg4]

same_pdwords xmm0
mov r8, rax
same_pdwords xmm1
and r8, rax
same_pdwords xmm2
and r8, rax
same_pdwords xmm3
and r8, rax
same_pdwords xmm4
and r8, rax
same_pdwords xmm5
and r8, rax
same_pdwords xmm6
and r8, rax
same_pdwords xmm7
and r8, rax

pextrd [rel result1], xmm0, 0
pinsrd xmm0, esi, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov r9, rax

pextrd [rel result2], xmm1, 0
pinsrd xmm1, esi, 0
check_relerr rel eresult2, rel result2, rel tolerance
and r9, rax

pextrd [rel result3], xmm2, 0
pinsrd xmm2, esi, 0
check_relerr rel eresult3, rel result3, rel tolerance
and r9, rax

pextrd [rel result4], xmm3, 0
pinsrd xmm3, esi, 0
check_relerr rel eresult4, rel result4, rel tolerance
and r9, rax

hlt

align 4096
result1: dd 0
result2: dd 0
result3: dd 0
result4: dd 0

align 64

arg1: 
dq 0x3f8000003f800000 ; 1.0
dq 0x3f8000003f800000

arg2: 
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000

arg3:
dq 0x4110000041100000 ; 9.0
dq 0x4110000041100000

arg4: 
dq 0x4180000041800000 ; 16.0
dq 0x4180000041800000

arg5:
dq 0x41c8000041c80000 ; 25.0
dq 0x41c8000041c80000

align 32
eresult1:
dd 0x3f800000 ; 1.0
eresult2:
dd 0x3f000000 ; 0.5 
eresult3:
dd 0x3eaaaaab ; 1/3 = 0.(3)
eresult4:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/TwoByte/0F_53.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8": "1",
    "R9": "1"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

%include "checkprecision.mac"

; clobbers xmm15
; returns the comparison result in rax
%macro same_pdwords 1 ; receives the xmms register
    movd eax, %1
    movd xmm15, eax
    pshufd xmm15, xmm15, 0 ; has the lower 32bits of %1 accross all lanes
    pcmpeqd xmm15, %1 ; has equalty mask on all lanes
    movmskps eax, xmm15 ; gets sign bit of each lane into eax
    cmp eax, 0b1111
    sete al
    movzx rax, al
%endmacro

section .text
global _start

_start:

movapd xmm0, [rel arg1]

rcpps xmm0, xmm0
rcpps xmm1, [rel arg2]

same_pdwords xmm0
mov r8, rax
same_pdwords xmm1
and r8, rax

pextrd [rel result1], xmm0, 0
pinsrd xmm0, esi, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov r9, rax

pextrd [rel result2], xmm1, 0
pinsrd xmm1, esi, 0
check_relerr rel eresult2, rel result2, rel tolerance
and r9, rax

hlt

align 4096
result1: dd 0
result2: dd 0

align 64

arg1:
dq 0x3f8000003f800000 ; 1.0
dq 0x3f8000003f800000

arg2:
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000


align 32
eresult1:
dd 0x3f800000 ; 1.0
eresult2:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/TwoByte/0F_54.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1010101010101010", "0x0"],
    "XMM1": ["0x1010101010101010", "0x0"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
andps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
andps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_55.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM1": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x1010101010101010", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
andnps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
andnps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_56.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x1111111111111111", "0x2222222222222222"],
    "XMM1": ["0x1111111111111111", "0x2222222222222222"],
    "XMM2": ["0x0101010101010101", "0x0202020202020202"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x1010101010101010
mov [rdx + 8 * 0], rax
mov rax, 0x2020202020202020
mov [rdx + 8 * 1], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0x0202020202020202
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
orps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
orps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_57.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x2424242424242424", "0x2424242424242424"],
    "XMM1": ["0x2424242424242424", "0x2424242424242424"],
    "XMM2": ["0x1818181818181818", "0x1818181818181818"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 0], rax
mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 1], rax

mov rax, 0x1818181818181818
mov [rdx + 8 * 2], rax
mov rax, 0x1818181818181818
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
xorps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
xorps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_58.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4100000040c00000", "0x4140000041200000"],
    "XMM1": ["0x4100000040c00000", "0x4140000041200000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
addps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
addps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_59.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4140000040a00000", "0x4200000041a80000"],
    "XMM1": ["0x4140000040a00000", "0x4200000041a80000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
mulps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
mulps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4014000000000000", "0x4018000000000000"],
    "XMM1": ["0x4014000000000000", "0x4018000000000000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
cvtps2pd xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
cvtps2pd xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5A_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x0000004600000053", "0x0000000d00000029"],
    "XMM1": ["0x0000001600000005", "0x000000050000000a"],
    "XMM2": ["0x000000430000001d", "0x0000005b00000013"],
    "XMM3": ["0x0000003300000028", "0x0000001800000021"],
    "XMM4": ["0x000000180000005b", "0x0000005b00000063"],
    "XMM5": ["0x000000630000005b", "0x0000004a00000041"],
    "XMM6": ["0x0000001900000023", "0x0000005a00000006"],
    "XMM7": ["0x0000003400000021", "0x0000000a0000003a"],
    "XMM8": ["0x0000005400000030", "0x000000420000005a"],
    "XMM9": ["0x0000000700000060", "0x0000005f0000001a"],
    "XMM10": ["0x0000002500000058", "0x0000000a00000032"],
    "XMM11": ["0x000000140000004e", "0x000000290000000a"],
    "XMM12": ["0x0000003a0000000f", "0x000000380000000a"],
    "XMM13": ["0x0000000500000035", "0x0000000300000049"],
    "XMM14": ["0x0000004700000039", "0x000000590000003e"],
    "XMM15": ["0x0000001800000030", "0x0000006100000022"]
  }
}
%endif

lea rdx, [rel .data]

cvtps2dq xmm0, [rdx + 16 * 0]
cvtps2dq xmm1, [rdx + 16 * 1]
cvtps2dq xmm2, [rdx + 16 * 2]
cvtps2dq xmm3, [rdx + 16 * 3]
cvtps2dq xmm4, [rdx + 16 * 4]
cvtps2dq xmm5, [rdx + 16 * 5]
cvtps2dq xmm6, [rdx + 16 * 6]
cvtps2dq xmm7, [rdx + 16 * 7]
cvtps2dq xmm8, [rdx + 16 * 8]
cvtps2dq xmm9, [rdx + 16 * 9]
cvtps2dq xmm10, [rdx + 16 * 10]
cvtps2dq xmm11, [rdx + 16 * 11]
cvtps2dq xmm12, [rdx + 16 * 12]
cvtps2dq xmm13, [rdx + 16 * 13]
cvtps2dq xmm14, [rdx + 16 * 14]
cvtps2dq xmm15, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/TwoByte/0F_5B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xbf8000003f800000", "0x437f000000000000"],
    "XMM1": ["0xbf8000003f800000", "0x437f000000000000"],
    "XMM2": ["0xFFFFFFFF00000001", "0x000000FF00000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFF00000001 ; -1, 1
mov [rdx + 8 * 2], rax
mov rax, 0x000000FF00000000 ; 255, 0
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
cvtdq2ps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
cvtdq2ps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5B_1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x4ea997604b09fbcc", "0x4eb60f014e6fed51"],
    "XMM1": ["0x4ef925bb4e0af333", "0x4ee5dd764ea8ee5f"],
    "XMM2": ["0x4e7cdb474e801032", "0x4ec7a6ef4d4ebfd0"],
    "XMM3": ["0x4dd2a7e14d8554d4", "0x4dc251f24e2a7a7b"],
    "XMM4": ["0x4ee0a9644ef9b631", "0x4e61e8564dd57f16"],
    "XMM5": ["0x4ec76abd4ee1ee95", "0x4e793e524e207aa8"],
    "XMM6": ["0x4e65022f4e4e3a1a", "0x4d3b0d804da1e6a4"],
    "XMM7": ["0x4edef4664c62ba7a", "0x4e762fed4db6f84e"],
    "XMM8": ["0x4e0e71db4cc820ba", "0x4e7052a14e9095a5"],
    "XMM9": ["0x4da442564e9207c6", "0x4e8169954eecc106"],
    "XMM10": ["0x4e640b604e1761d6", "0x4ddc846b4d08534a"],
    "XMM11": ["0x4e8632cb4ec5bf68", "0x4eb4517c4da46996"],
    "XMM12": ["0x4ec706e74deacf23", "0x4d0af8734dfed3e8"],
    "XMM13": ["0x4eb5c7714dc860db", "0x4e29279d4ee4fad9"],
    "XMM14": ["0x4ec56cea4e802694", "0x4ea5eed94ea2b828"],
    "XMM15": ["0x4eb073bc4d107b96", "0x4cd66f2c4ed0f799"]
  }
}
%endif

lea rdx, [rel .data]

cvtdq2ps xmm0, [rdx + 16 * 0]
cvtdq2ps xmm1, [rdx + 16 * 1]
cvtdq2ps xmm2, [rdx + 16 * 2]
cvtdq2ps xmm3, [rdx + 16 * 3]
cvtdq2ps xmm4, [rdx + 16 * 4]
cvtdq2ps xmm5, [rdx + 16 * 5]
cvtdq2ps xmm6, [rdx + 16 * 6]
cvtdq2ps xmm7, [rdx + 16 * 7]
cvtdq2ps xmm8, [rdx + 16 * 8]
cvtdq2ps xmm9, [rdx + 16 * 9]
cvtdq2ps xmm10, [rdx + 16 * 10]
cvtdq2ps xmm11, [rdx + 16 * 11]
cvtdq2ps xmm12, [rdx + 16 * 12]
cvtdq2ps xmm13, [rdx + 16 * 13]
cvtdq2ps xmm14, [rdx + 16 * 14]
cvtdq2ps xmm15, [rdx + 16 * 15]

hlt

align 16
; 256bytes of random data
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311


================================================
FILE: unittests/ASM/TwoByte/0F_5C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc0800000c0800000", "0xc0800000c0800000"],
    "XMM1": ["0xc0800000c0800000", "0xc0800000c0800000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
subps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
subps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x400000003f800000", "0x4080000040400000"],
    "XMM1": ["0x400000003f800000", "0x4080000040400000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
minps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
minps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x3eaaaaab3e4ccccd", "0x3f0000003edb6db7"],
    "XMM1": ["0x3eaaaaab3e4ccccd", "0x3f0000003edb6db7"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
divps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
divps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_5F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0x40c0000040a00000", "0x4100000040e00000"],
    "XMM1": ["0x40c0000040a00000", "0x4100000040e00000"],
    "XMM2": ["0x40c0000040a00000", "0x4100000040e00000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x400000003f800000 ; 2, 1
mov [rdx + 8 * 0], rax
mov rax, 0x4080000040400000 ; 4, 3
mov [rdx + 8 * 1], rax

mov rax, 0x40c0000040a00000 ; 6, 5
mov [rdx + 8 * 2], rax
mov rax, 0x4100000040e00000 ; 8, 7
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx]
maxps xmm0, [rdx + 8 * 2]

movapd xmm1, [rdx]
movapd xmm2, [rdx + 8 * 2]
maxps xmm1, xmm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_60.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6545664667476848",
    "MM1": "0x6545664667476848",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpcklbw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpcklbw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_61.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6566454667684748",
    "MM1": "0x6566454667684748",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpcklwd mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpcklwd mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_62.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6566676845464748",
    "MM1": "0x6566676845464748",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpckldq mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpckldq mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_63.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00FF7F4100807F41",
    "MM1": "0x00FF7F4100807F41",
    "MM2": "0x0000FFFF007F0041"
  }
}
%endif

mov rdx, 0xe0000000

; 16bit signed -> 8bit signed (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0x7F(SCHAR_MAX, 127)
; input < 0x80(-127) = 0x80

mov rax, 0x00008000007F0041
mov [rdx + 8 * 0], rax
mov rax, 0x00008000007F0041
mov [rdx + 8 * 1], rax

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 2], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
packsswb mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
packsswb mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xFFFFFFFFFFFFFFFF",
    "MM1": "0xFFFFFFFFFFFFFFFF",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpgtb mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpgtb mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_65.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xFFFFFFFFFFFFFFFF",
    "MM1": "0xFFFFFFFFFFFFFFFF",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpgtw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpgtw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_66.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xFFFFFFFFFFFFFFFF",
    "MM1": "0xFFFFFFFFFFFFFFFF",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpgtd mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpgtd mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_67.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00007F4100007F41",
    "MM1": "0x00007F4100007F41",
    "MM2": "0x0000FFFF007F0041"
  }
}
%endif

mov rdx, 0xe0000000

; 16bit signed -> 8bit unsigned (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0xFF(UCHAR_MAX, 255)
; input < 0x00(Negative) = 0x0

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 0], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 1], rax

mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 2], rax
mov rax, 0x0000FFFF007F0041
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
packuswb mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
packuswb mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_68.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6141624263436444",
    "MM1": "0x6141624263436444",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpckhbw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpckhbw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_69.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6162414263644344",
    "MM1": "0x6162414263644344",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpckhwd mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpckhwd mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_6A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6162636441424344",
    "MM1": "0x6162636441424344",
    "MM2": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
punpckhdq mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
punpckhdq mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_6B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xFFFF8000FFFF8000",
    "MM1": "0xFFFF8000FFFF8000",
    "MM2": "0xFFFFFFFF80000000"
  }
}
%endif

mov rdx, 0xe0000000

; 32bit signed -> 16bit signed (saturated)
; input > 0x7FFF(SHRT_MAX, 32767) = 0x7FFF(SHRT_MAX, 32767)
; input < 0x8000(-32767) = 0x8000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov rax, 0x0000000000000040
mov [rdx + 8 * 1], rax

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 2], rax
mov rax, 0x0000000000000040
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
packssdw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
packssdw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_6E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0000000045464748",
    "MM1": "0x5152535455565758",
    "MM2": "0x0000000045464748",
    "MM3": "0x5152535455565758"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov rax, qword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movd mm0, eax
movq mm1, rbx

movd mm2, dword [rdx + 8 * 0]
movq mm3, qword [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_6E_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0000000045464748",
    "MM1": "0x5152535455565758"
  }
}
%endif

mov rax, 0x4142434445464748
mov rbx, 0x5152535455565758

movd mm0, eax
movq mm1, rbx
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_6F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm1, mm0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_70.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x6162636465666768",
    "MM2": "0x4748474847484748",
    "MM3": "0x6162616261626162"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
movq mm1, [rdx + 8 * 2]
pshufw mm2, mm0, 0x0
pshufw mm3, mm1, 0xFF

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_74.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00000000000000FF",
    "MM1": "0x00000000000000FF",
    "MM2": "0x6162636465666778"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666778
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565748
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpeqb mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpeqb mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_75.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x000000000000FFFF",
    "MM1": "0x000000000000FFFF",
    "MM2": "0x6162636465667778"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x7172737475767778
mov [rdx + 8 * 0], rax
mov rax, 0x4142434445464748
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465667778
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455564748
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpeqw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpeqw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_76.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00000000FFFFFFFF",
    "MM1": "0x00000000FFFFFFFF",
    "MM2": "0x61626364FFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x71727374FFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x41424344FFFFFFFF
mov [rdx + 8 * 1], rax

mov rax, 0x61626364FFFFFFFF
mov [rdx + 8 * 2], rax
mov rax, 0x51525354FFFFFFFF
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pcmpeqd mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pcmpeqd mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_77.asm
================================================
%ifdef CONFIG
{
  "RegData": {
  }
}
%endif

emms ; Just ensure it runs

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_7E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000045464748",
    "RBX": "0x5152535455565758"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov eax, dword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movd mm0, eax
movq mm1, rbx

mov rax, 0
mov rbx, 0

movd eax, mm0
movq rbx, mm1

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_7F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq [rdx + 8 * 4], mm0
movq mm1, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_80.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x80000000
mov r11, 0x1
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jo .tgt_1
jno .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_81.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jno .tgt_1
jo .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_82.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jc .tgt_1
jnc .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_83.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnc .tgt_1
jc .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_84.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jz .tgt_1
jnz .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_85.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnz .tgt_1
jz .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_86.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jbe .tgt_1
jnbe .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_87.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnbe .tgt_1
jbe .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_88.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

js .tgt_1
jns .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_89.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jns .tgt_1
js .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x4
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jp .tgt_1
jnp .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x3
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnp .tgt_1
jp .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8B_16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000000fbcc",
    "RBX": "0x00000000000089fb",
    "RCX": "0x0000000000000089",
    "RDX": "0x0000000000001800",
    "RSI": "0x000000000000b018",
    "RDI": "0x000000000000cbb0",
    "RBP": "0x00000000000054cb",
    "RSP": "0x0000000000002a54",
    "R8":  "0x000000000000b018",
    "R9":  "0x000000000000fb54",
    "R10": "0x0000000000003bfb",
    "R11": "0x000000000000753b",
    "R12": "0x0000000000008075",
    "R13": "0x0000000000000780",
    "R14": "0x0000000000005b07",
    "R15": "0x000000000001ab5b"
  }
}
%endif

lea r15, [rel .data]

mov eax,  0
mov ebx,  0
mov ecx,  0
mov edx,  0
mov esi,  0
mov edi,  0
mov ebp,  0
mov esp,  0
mov r8d,  0
mov r9d,  0
mov r10d, 0
mov r11d, 0
mov r12d, 0
mov r13d, 0
mov r14d, 0

; We only care about results here
mov ax,  word [r15 + 0]
mov bx,  word [r15 + 1]
mov cx,  word [r15 + 2]
mov dx,  word [r15 + 3]
mov si,  word [r15 + 4]
mov di,  word [r15 + 5]
mov bp,  word [r15 + 6]
mov sp,  word [r15 + 7]
mov r8w,  word [r15 + 4]
mov r9w,  word [r15 + 9]
mov r10w, word [r15 + 10]
mov r11w, word [r15 + 11]
mov r12w, word [r15 + 12]
mov r13w, word [r15 + 13]
mov r14w, word [r15 + 14]
mov r15w, word [r15 + 15]

hlt

; 256bytes of random data
align 16
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311


================================================
FILE: unittests/ASM/TwoByte/0F_8B_32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x000000000089fbcc",
    "RBX": "0x00000000180089fb",
    "RCX": "0x00000000b0180089",
    "RDX": "0x00000000cbb01800",
    "RSI": "0x0000000054cbb018",
    "RDI": "0x000000002a54cbb0",
    "RBP": "0x00000000542a54cb",
    "RSP": "0x00000000fb542a54",
    "R8":  "0x0000000054cbb018",
    "R9":  "0x00000000753bfb54",
    "R10": "0x0000000080753bfb",
    "R11": "0x000000000780753b",
    "R12": "0x000000005b078075",
    "R13": "0x00000000ab5b0780",
    "R14": "0x00000000ccab5b07",
    "R15": "0x00000000bcccab5b"
  }
}
%endif

lea r15, [rel .data]

; We only care about results here
mov eax,  dword [r15 + 0]
mov ebx,  dword [r15 + 1]
mov ecx,  dword [r15 + 2]
mov edx,  dword [r15 + 3]
mov esi,  dword [r15 + 4]
mov edi,  dword [r15 + 5]
mov ebp,  dword [r15 + 6]
mov esp,  dword [r15 + 7]
mov r8d,  dword [r15 + 4]
mov r9d,  dword [r15 + 9]
mov r10d, dword [r15 + 10]
mov r11d, dword [r15 + 11]
mov r12d, dword [r15 + 12]
mov r13d, dword [r15 + 13]
mov r14d, dword [r15 + 14]
mov r15d, dword [r15 + 15]

hlt


; 256bytes of random data
align 16
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311


================================================
FILE: unittests/ASM/TwoByte/0F_8B_64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x54cbb0180089fbcc",
    "RBX": "0x2a54cbb0180089fb",
    "RCX": "0x542a54cbb0180089",
    "RDX": "0xfb542a54cbb01800",
    "RSI": "0x3bfb542a54cbb018",
    "RDI": "0x753bfb542a54cbb0",
    "RBP": "0x80753bfb542a54cb",
    "RSP": "0x0780753bfb542a54",
    "R8":  "0x3bfb542a54cbb018",
    "R9":  "0xab5b0780753bfb54",
    "R10": "0xccab5b0780753bfb",
    "R11": "0xbcccab5b0780753b",
    "R12": "0x22bcccab5b078075",
    "R13": "0x4922bcccab5b0780",
    "R14": "0xdd4922bcccab5b07",
    "R15": "0x92dd4922bcccab5b"
  }
}
%endif

lea r15, [rel .data]

mov eax,  0
mov ebx,  0
mov ecx,  0
mov edx,  0
mov esi,  0
mov edi,  0
mov ebp,  0
mov esp,  0
mov r8d,  0
mov r9d,  0
mov r10d, 0
mov r11d, 0
mov r12d, 0
mov r13d, 0
mov r14d, 0

; We only care about results here
mov rax, qword [r15 + 0]
mov rbx, qword [r15 + 1]
mov rcx, qword [r15 + 2]
mov rdx, qword [r15 + 3]
mov rsi, qword [r15 + 4]
mov rdi, qword [r15 + 5]
mov rbp, qword [r15 + 6]
mov rsp, qword [r15 + 7]
mov r8,  qword [r15 + 4]
mov r9,  qword [r15 + 9]
mov r10, qword [r15 + 10]
mov r11, qword [r15 + 11]
mov r12, qword [r15 + 12]
mov r13, qword [r15 + 13]
mov r14, qword [r15 + 14]
mov r15, qword [r15 + 15]

hlt

; 256bytes of random data
align 16
.data:
dd 9042892,1422635032,1006326826,1527218293,582798507,2089999689,1417097080,1928248003,1074272523,1060557251,216792327,1674803041,279616115,441777196,715038375,407518795,2094733428,1884598841,447734476,947524986,1895254698,1672830628,673098253,1045402773,864978567,960531374,339530893,196139005,59435495,1870279404,383715765,1032584027,104924620,597456593,1212863084,1007986729,1224991550,344476351,1986036506,1085590199,634942853,956487659,142947491,462458211,1658827823,1125737874,344797902,1512619469,492430419,1669559173,534412544,145721129,420223845,1524873383,1920822367,709486397,1075005959,1656124734,1364988886,1391946848,151501156,1480187379,1752943752,112425311


================================================
FILE: unittests/ASM/TwoByte/0F_8C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jl .tgt_1
jnl .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnl .tgt_1
jl .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jle .tgt_1
jnle .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_8F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0

jnle .tgt_1
jle .tgt_2
jmp .end

.tgt_1:
mov rax, 1
jmp .end

.tgt_2:
mov rbx, 1
jmp .end

.end:
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_90.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x80000000
mov r11, 0x1
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
seto al
setno bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_91.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setno al
seto  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_92.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setc  al
setnc bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_93.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnc al
setc  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_94.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setz  al
setnz bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_95.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnz al
setz  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_96.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setbe  al
setnbe bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_97.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnbe al
setbe  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_98.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
sets  al
setns bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_99.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setns al
sets  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x4
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setp al
setnp  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9B.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x3
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnp al
setp  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9C.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setl  al
setnl bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9D.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnl al
setl  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9E.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x1
mov r11, 0x0
mov r12, 0x2

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setle  al
setnle bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_9F.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1",
    "RBX": "0x0"
  }
}
%endif

mov r15, 0xe0000000

mov r10, 0x2
mov r11, 0x0
mov r12, 0x1

cmp r10d, r12d

mov rax, 0
mov rbx, 0
setnle al
setle  bl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

; CPUID function zero
mov rax, 0

cpuid

; CPUID function zero always returns >0 in EAX
cmp eax, 0
mov rax, 0
setnz al

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A3_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x3"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434457584857",
    "RBX": "0x6162636467687576",
    "RCX": "0x8788919291929394",
    "RDX": "0xA7A8919293949596",
    "RSI": "0xB1B2B3B4B5B6B7B8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax

mov rax, [rdx + 8 * 1]
shld word [rdx + 8 * 0 + 0], ax, 8
shld word [rdx + 8 * 0 + 2], ax, 16
shld word [rdx + 8 * 0 + 4], ax, 32

mov rax, [rdx + 8 * 3]
shld dword [rdx + 8 * 2 + 0], eax, 16
shld dword [rdx + 8 * 2 + 4], eax, 32

mov rax, [rdx + 8 * 5]
shld qword [rdx + 8 * 4 + 0], rax, 16
shld qword [rdx + 8 * 4 + 0], rax, 32
shld qword [rdx + 8 * 6 + 0], rax, 48
shld qword [rdx + 8 * 7 + 0], rax, 64

mov rax, qword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 2]
mov rcx, qword [rdx + 8 * 4]
mov rsi, qword [rdx + 8 * 7]
mov rdx, qword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A4_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0x00000000FFFFFFFF"
  }
}
%endif

mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, -1

shld r14w, r15w, 0
shld r13d, r15d, 0
shld r12, r15, 0
shld r11d, r15d, 0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434457584857",
    "RBX": "0x6162636467687576",
    "RCX": "0x8788919291929394",
    "RDX": "0xA7A8919293949596",
    "RSI": "0xB1B2B3B4B5B6B7B8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax

mov rax, [rdx + 8 * 1]
mov cl, 8
shld word [rdx + 8 * 0 + 0], ax, cl
mov cl, 16
shld word [rdx + 8 * 0 + 2], ax, cl
mov cl, 32
shld word [rdx + 8 * 0 + 4], ax, cl

mov rax, [rdx + 8 * 3]
mov cl, 16
shld dword [rdx + 8 * 2 + 0], eax, cl
mov cl, 32
shld dword [rdx + 8 * 2 + 4], eax, cl

mov rax, [rdx + 8 * 5]
mov cl, 16
shld qword [rdx + 8 * 4 + 0], rax, cl
mov cl, 32
shld qword [rdx + 8 * 4 + 0], rax, cl
mov cl, 48
shld qword [rdx + 8 * 6 + 0], rax, cl
mov cl, 64
shld qword [rdx + 8 * 7 + 0], rax, cl

mov rax, qword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 2]
mov rcx, qword [rdx + 8 * 4]
mov rsi, qword [rdx + 8 * 7]
mov rdx, qword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0x00000000FFFFFFFF"
  }
}
%endif

mov cl, 0
mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, -1

shld r14w, r15w, cl
shld r13d, r15d, cl
shld r12, r15, cl
shld r11d, r15d, cl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0"
  }
}
%endif

mov cl, 0
mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, 0

; Get the incoming flags
mov rax, 0
lahf
mov r11, rax

shld r14w, r15w, cl
shld r13d, r15d, cl
shld r12, r15, cl

; Get the outgoing flags
; None should have changed
mov rax, 0
lahf
xor r11, rax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "r10": "1",
    "RBX": "1",
    "RDX": "1",
    "RSI": "1",
    "RBP": "1",
    "RSP": "0",
    "R8":  "0",
    "r9":  "0"
  }
}
%endif

mov cl, 1
mov r15, -1
mov r14, 0xFFFFFFFFFFFF4000
mov r13, 0xFFFFFFFF40000000
mov r12, 0x4000000000000000

mov r10, 0
mov rbx, 0
mov rdx, 0

mov rsi, 0
mov rdi, 0
mov rbp, 0

mov rsp, 0
mov r8, 0
mov r9, 0

mov r11, 1

; Sign from 0->1  should set OF

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r14w, r15w, cl
cmovo r10, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r13d, r15d, cl
cmovo rbx, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r12, r15, cl
cmovo rdx, r11

; Sign from 1->0 should set OF
mov r15, -1
mov r14, 0xFFFFFFFFFFFF8000
mov r13, 0xFFFFFFFF80000000
mov r12, 0x8000000000000000

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r14w, r15w, cl
cmovo rsi, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r13d, r15d, cl
cmovo rdi, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r12, r15, cl
cmovo rbp, r11

; Sign from 0->0 should NOT set OF
mov r15, -1
mov r14, 0xFFFFFFFFFFFF0000
mov r13, 0xFFFFFFFF00000000
mov r12, 0x0000000000000000

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r14w, r15w, cl
cmovo rsp, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r13d, r15d, cl
cmovo r8, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shld r12, r15, cl
cmovo r9, r11

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, -1
mov r14, 0x4141414141414000
mov r13, 0xFFFFFFFF40000000
mov r12, 0x4000000000000000

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shld r14w, r15w, cl
cmovc rax, rsi
shld r13d, r15d, cl
cmovc rbx, rsi
shld r12, r15, cl
cmovc rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, 0
mov r14, 0x4141414141414000
mov r13, 0xFFFFFFFF40000000
mov r12, 0x4000000000000000

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shld r14w, r15w, cl
cmovz rax, rsi
shld r13d, r15d, cl
cmovz rbx, rsi
shld r12, r15, cl
cmovz rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_A5_7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, 0
mov r14, 0x4141414141412000
mov r13, 0xFFFFFFFF20000000
mov r12, 0x2000000000000000

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shld r14w, r15w, cl
cmovs rax, rsi
shld r13d, r15d, cl
cmovs rbx, rsi
shld r12, r15, cl
cmovs rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
bts word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
bts qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AB_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
bts word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
bts qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AB_2_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
lock bts word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
lock bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
lock bts qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AB_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1F"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
lock bts word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
lock bts dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
lock bts qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434457585847",
    "RBX": "0x6162636477786566",
    "RCX": "0x9596979897988182",
    "RDX": "0x939495969798A1A2",
    "RSI": "0xB1B2B3B4B5B6B7B8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax

mov rax, [rdx + 8 * 1]
shrd word [rdx + 8 * 0 + 0], ax, 8
shrd word [rdx + 8 * 0 + 2], ax, 16
shrd word [rdx + 8 * 0 + 4], ax, 32

mov rax, [rdx + 8 * 3]
shrd dword [rdx + 8 * 2 + 0], eax, 16
shrd dword [rdx + 8 * 2 + 4], eax, 32

mov rax, [rdx + 8 * 5]
shrd qword [rdx + 8 * 4 + 0], rax, 16
shrd qword [rdx + 8 * 4 + 0], rax, 32
shrd qword [rdx + 8 * 6 + 0], rax, 48
shrd qword [rdx + 8 * 7 + 0], rax, 64

mov rax, qword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 2]
mov rcx, qword [rdx + 8 * 4]
mov rsi, qword [rdx + 8 * 7]
mov rdx, qword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AC_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0x00000000FFFFFFFF"
  }
}
%endif

mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, -1

shrd r14w, r15w, 0
shrd r13d, r15d, 0
shrd r12, r15, 0
shrd r11d, r15d, 0

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434457585847",
    "RBX": "0x6162636477786566",
    "RCX": "0x9596979897988182",
    "RDX": "0x939495969798A1A2",
    "RSI": "0xB1B2B3B4B5B6B7B8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x8182838485868788
mov [rdx + 8 * 4], rax
mov rax, 0x9192939495969798
mov [rdx + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [rdx + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [rdx + 8 * 7], rax

mov rax, [rdx + 8 * 1]
mov cl, 8
shrd word [rdx + 8 * 0 + 0], ax, cl
mov cl, 16
shrd word [rdx + 8 * 0 + 2], ax, cl
mov cl, 32
shrd word [rdx + 8 * 0 + 4], ax, cl

mov rax, [rdx + 8 * 3]
mov cl, 16
shrd dword [rdx + 8 * 2 + 0], eax, cl
mov cl, 32
shrd dword [rdx + 8 * 2 + 4], eax, cl

mov rax, [rdx + 8 * 5]
mov cl, 16
shrd qword [rdx + 8 * 4 + 0], rax, cl
mov cl, 32
shrd qword [rdx + 8 * 4 + 0], rax, cl
mov cl, 48
shrd qword [rdx + 8 * 6 + 0], rax, cl
mov cl, 64
shrd qword [rdx + 8 * 7 + 0], rax, cl

mov rax, qword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 2]
mov rcx, qword [rdx + 8 * 4]
mov rsi, qword [rdx + 8 * 7]
mov rdx, qword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0x00000000FFFFFFFF"
  }
}
%endif

mov cl, 0
mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, -1

shrd r14w, r15w, cl
shrd r13d, r15d, cl
shrd r12, r15, cl
shrd r11d, r15d, cl

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFFFFFF",
    "R14": "0x4141414141410000",
    "R13": "0",
    "R12": "0",
    "R11": "0"
  }
}
%endif

mov cl, 0
mov r15, -1
mov r14, 0x4141414141410000
mov r13, 0
mov r12, 0
mov r11, 0

; Get the incoming flags
mov rax, 0
lahf
mov r11, rax

shrd r14w, r15w, cl
shrd r13d, r15d, cl
shrd r12, r15, cl

; Get the outgoing flags
; None should have changed
mov rax, 0
lahf
xor r11, rax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "r10": "1",
    "RBX": "1",
    "RDX": "1",
    "RSI": "1",
    "RBP": "1",
    "RSP": "0",
    "R8":  "0",
    "r9":  "0"
  }
}
%endif

mov cl, 1
mov r15, -1
mov r14, 0xFFFFFFFFFFFF0000
mov r13, 0xFFFFFFFF00000000
mov r12, 0

mov r10, 0
mov rbx, 0
mov rdx, 0

mov rsi, 0
mov rdi, 0
mov rbp, 0

mov rsp, 0
mov r8, 0
mov r9, 0

mov r11, 1

; Sign from 0->1  should set OF

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r14w, r15w, cl
cmovo r10, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r13d, r15d, cl
cmovo rbx, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r12, r15, cl
cmovo rdx, r11

; Sign from 1->0 should set OF
mov r15, 0
mov r14, 0xFFFFFFFFFFFF8000
mov r13, 0xFFFFFFFF80000000
mov r12, 0x8000000000000000

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r14w, r15w, cl
cmovo rsi, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r13d, r15d, cl
cmovo rdi, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r12, r15, cl
cmovo rbp, r11

; Sign from 0->0 should NOT set OF
mov r15, 0xFFFFFFFFFFFFFFFE
mov r14, 0xFFFFFFFFFFFF0000
mov r13, 0xFFFFFFFF00000000
mov r12, 0x0000000000000000

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r14w, r15w, cl
cmovo rsp, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r13d, r15d, cl
cmovo r8, r11

; Let's clear OF really quick
mov rax, 0
ror rax, 1

shrd r12, r15, cl
cmovo r9, r11

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, -1
mov r14, 0x4141414141410002
mov r13, 0xFFFFFFFF00000002
mov r12, 0x0000000000000002

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shrd r14w, r15w, cl
cmovc rax, rsi
shrd r13d, r15d, cl
cmovc rbx, rsi
shrd r12, r15, cl
cmovc rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, 0
mov r14, 0x4141414141410002
mov r13, 0xFFFFFFFF00000002
mov r12, 0x0000000000000002

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shrd r14w, r15w, cl
cmovz rax, rsi
shrd r13d, r15d, cl
cmovz rbx, rsi
shrd r12, r15, cl
cmovz rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AD_7.asm
================================================

%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "1",
    "RDX": "1"
  }
}
%endif

mov cl, 2
mov r15, 0xFFFFFFFFFFFFFFF2
mov r14, 0x4141414141410000
mov r13, 0xFFFFFFFF00000000
mov r12, 0x0000000000000000

mov rax, 0
mov rbx, 0
mov rdx, 0
mov rsi, 1

shrd r14w, r15w, cl
cmovs rax, rsi
shrd r13d, r15d, cl
cmovs rbx, rsi
shrd r12, r15, cl
cmovs rdx, rsi

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x5C00",
    "RBX": "0x54D45400",
    "RSI": "0x4ECE4DCD4CCC4C00"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r15 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r15 + 8 * 2], rax

mov rax, 0x0
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax

mov ax, -128
imul ax, word [r15 + 8 * 0 + 0]
mov word [r15 + 8 * 3 + 0], ax

mov eax, -128
imul eax, dword [r15 + 8 * 1 + 0]
mov dword [r15 + 8 * 4 + 0], eax

mov rax, -128
imul rax, qword [r15 + 8 * 2 + 0]
mov rsi, rax

mov rax, [r15 + 8 * 3]
mov rbx, [r15 + 8 * 4]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_AF_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x000f000000c00030"
  }
}
%endif

; Uses CX and BX and stores result in r15
; OF:CF
%macro ofcfmerge 0
  lahf

  ; Load OF
  mov rbx, 0
  seto bl

  shl r15, 1
  or r15, rbx
  shl r15, 1

  ; Insert CF
  shr ax, 8
  and rax, 1
  or r15, rax
%endmacro

mov r8, 0xe0000000
mov r15, 0

; Negative * Negative
mov eax, 0x00008000
mov ebx, 0x00008000
imul ax, bx
ofcfmerge

mov eax, 0x80000000
mov ebx, 0x80000000
imul eax, ebx
ofcfmerge

; Positive * Positive
mov rax, 128
mov rbx, 32
imul ax, bx
ofcfmerge

mov rax, 128
mov rbx, 32
imul eax, ebx
ofcfmerge

mov rax, 128
mov rbx, 32
imul rax, rbx
ofcfmerge

; Negative * Positive
mov rax, -128
mov rbx, 32
imul ax, bx
ofcfmerge

mov rax, -128
mov rbx, 32
imul eax, ebx
ofcfmerge

mov rax, -128
mov rbx, 32
imul rax, rbx
ofcfmerge

; Positive * Negative
mov rax, 128
mov rbx, -32
imul ax, bx
ofcfmerge

mov rax, 128
mov rbx, -32
imul eax, ebx
ofcfmerge

mov rax, 128
mov rbx, -32
imul rax, rbx
ofcfmerge

; Negative * Negative
mov rax, -128
mov rbx, -32
imul ax, bx
ofcfmerge

mov rax, -128
mov rbx, -32
imul eax, ebx
ofcfmerge

mov rax, -128
mov rbx, -32
imul rax, rbx
ofcfmerge

; Positive * Positive Overflow
mov rax, 128
mov rbx, 256
imul ax, bx
ofcfmerge

mov rax, 128
mov rbx, 256
imul eax, ebx
ofcfmerge

mov rax, 128
mov rbx, 256
imul rax, rbx
ofcfmerge

; Negative * Positive Overflow
mov rax, -128
mov rbx, 256
imul ax, bx
ofcfmerge

mov rax, -128
mov rbx, 256
imul eax, ebx
ofcfmerge

mov rax, -128
mov rbx, 256
imul rax, rbx
ofcfmerge

; Positive * Negative Overflow
mov rax, 128
mov rbx, -256
imul ax, bx
ofcfmerge


; XXX: Claiming this is an overflow
mov rax, 128
mov rbx, -256
imul eax, ebx
ofcfmerge

mov rax, 128
mov rbx, -256
imul rax, rbx
ofcfmerge

; Negative * Negative Overflow
mov rax, -128
mov rbx, -256
imul ax, bx
ofcfmerge

mov rax, -128
mov rbx, -256
imul eax, ebx
ofcfmerge

mov rax, -128
mov rbx, -256
imul rax, rbx
ofcfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546FF48",
    "RBX": "0x51525354FFFF5758",
    "RCX": "0xFFFFFFFF65666768",
    "RDX": "0xFFFFFFFFFFFFFFFF",
    "RBP": "0x4748",
    "RSI": "0x55565758",
    "RDI": "0x6162636465666768",
    "RSP": "0x7172737475767778",
    "R8": "0x7172737475767778"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax

mov rax, 0
mov [r10 + 8 * 4], rax
mov [r10 + 8 * 5], rax
mov [r10 + 8 * 6], rax
mov [r10 + 8 * 7], rax
mov [r10 + 8 * 8], rax

; False
mov rax, 0
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 0], cl
mov [r10 + 8 * 4 + 0], al

; True
mov rax, 0x47
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 1], cl
mov [r10 + 8 * 4 + 1], al

; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 0], cx
mov [r10 + 8 * 5 + 0], ax

; True
mov rax, 0x5556
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 2], cx
mov [r10 + 8 * 5 + 2], ax

; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 0], ecx
mov [r10 + 8 * 6 + 0], eax

; True
mov rax, 0x61626364
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 4], ecx
mov [r10 + 8 * 6 + 4], eax

; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 7 + 0], rax

; True
mov rax, 0x7172737475767778
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 8], rax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]
mov rbp, [r10 + 8 * 4]
mov rsi, [r10 + 8 * 5]
mov rdi, [r10 + 8 * 6]
mov rsp, [r10 + 8 * 7]
mov r8, [r10 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_10.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xffffffffffff9748",
    "R14": "0xffffffffffff4648",
    "R13": "0xffffffffffff8748",
    "R12": "0xffffffffffff4648",
    "R11": "0x0000000045468748",
    "R10": "0xffffffff45464648",
    "R9":  "0x4142434445468648",
    "R8":  "0x4142434445464648"
  }
}
%endif

mov rdx, 0xe0000000

; 8bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFFFFFFFF47
mov rbx, 0
cmpxchg cl, bl
lahf
mov r15, rax

; Match
mov rax, 0xFFFFFFFFFFFFFF48
mov rbx, 0
cmpxchg cl, bl
lahf
mov r14, rax

; 16bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFFFFFF4748
mov rbx, 0
cmpxchg cx, bx
lahf
mov r13, rax

; Match
mov rax, 0xFFFFFFFFFFFF6148
mov rbx, 0
cmpxchg cx, bx
lahf
mov r12, rax

; 32bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg ecx, ebx
lahf
mov r11, rax

; Match
mov rax, 0xFFFFFFFF45466148
mov rbx, 0
cmpxchg ecx, ebx
lahf
mov r10, rax

; 64bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg rcx, rbx
lahf
mov r9, rax

; Match
mov rax, 0x4142434445466148
mov rbx, 0
cmpxchg rcx, rbx
lahf
mov r8, rax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_11.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4142434445466148",
    "R14": "0x4142434445466100",
    "R13": "0x4142434445466148",
    "R12": "0x4142434445460000",
    "R11": "0x4142434445466148",
    "R10": "0x0000000000000000",
    "R9":  "0x4142434445466148",
    "R8":  "0x0000000000000000"
  }
}
%endif

; 8bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFFFFFFFF47
mov rbx, 0
cmpxchg cl, bl
mov r15, rcx

; Match
mov rax, 0xFFFFFFFFFFFFFF48
mov rbx, 0
cmpxchg cl, bl
mov r14, rcx

; 16bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFFFFFF4748
mov rbx, 0
cmpxchg cx, bx
mov r13, rcx

; Match
mov rax, 0xFFFFFFFFFFFF6148
mov rbx, 0
cmpxchg cx, bx
mov r12, rcx

; 32bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg ecx, ebx
mov r11, rcx

; Match
mov rax, 0xFFFFFFFF45466148
mov rbx, 0
cmpxchg ecx, ebx
mov r10, rcx

; 64bit
mov rcx, 0x4142434445466148

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg rcx, rbx
mov r9, rcx

; Match
mov rax, 0x4142434445466148
mov rbx, 0
cmpxchg rcx, rbx
mov r8, rcx

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000055565758",
    "RBX": "0x5152535455565758",
    "RCX": "0x6162636465666768"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov rbx, 0x5152535455565758
mov rcx, 0x6162636465666768
cmpxchg ebx, ecx

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546FF48",
    "RBX": "0x51525354FFFF5758",
    "RCX": "0xFFFFFFFF65666768",
    "RDX": "0xFFFFFFFFFFFFFFFF",
    "RBP": "0x4748",
    "RSI": "0x55565758",
    "RDI": "0x6162636465666768",
    "RSP": "0x7172737475767778",
    "R8": "0x7172737475767778"
  }
}
%endif

; Offset everything by 1 byte
; Everything stays within 16byte boundary but unaligned
mov r10, 0xe0000001

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax

mov rax, 0
mov [r10 + 8 * 4], rax
mov [r10 + 8 * 5], rax
mov [r10 + 8 * 6], rax
mov [r10 + 8 * 7], rax
mov [r10 + 8 * 8], rax

; False
mov rax, 0
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 0], cl
mov [r10 + 8 * 4 + 0], al

; True
mov rax, 0x47
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 1], cl
mov [r10 + 8 * 4 + 1], al

; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 0], cx
mov [r10 + 8 * 5 + 0], ax

; True
mov rax, 0x5556
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 2], cx
mov [r10 + 8 * 5 + 2], ax

; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 0], ecx
mov [r10 + 8 * 6 + 0], eax

; True
mov rax, 0x61626364
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 4], ecx ; Wrong
mov [r10 + 8 * 6 + 4], eax

; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 7 + 0], rax

; True
mov rax, 0x7172737475767778
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 8], rax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]
mov rbp, [r10 + 8 * 4]
mov rsi, [r10 + 8 * 5]
mov rdi, [r10 + 8 * 6]
mov rsp, [r10 + 8 * 7]
mov r8, [r10 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243444546FF48",
    "RBX": "0x51525354FFFF5758",
    "RCX": "0xFFFFFFFF65666768",
    "RDX": "0xFFFFFFFFFFFFFFFF",
    "RBP": "0x4748",
    "RSI": "0x55565758",
    "RDI": "0x6162636465666768",
    "RSP": "0x7172737475767778",
    "R8": "0x7172737475767778"
  }
}
%endif

; Offset everything by 15 bytes
mov r10, 0xe000000f

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax

mov rax, 0
mov [r10 + 8 * 4], rax
mov [r10 + 8 * 5], rax
mov [r10 + 8 * 6], rax
mov [r10 + 8 * 7], rax
mov [r10 + 8 * 8], rax

; False
mov rax, 0
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 0], cl
mov [r10 + 8 * 4 + 0], al

; True
mov rax, 0x47
mov rcx, 0xFF
cmpxchg [r10 + 8 * 0 + 1], cl
mov [r10 + 8 * 4 + 1], al

; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 0], cx
mov [r10 + 8 * 5 + 0], ax

; True
mov rax, 0x5556
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 2], cx
mov [r10 + 8 * 5 + 2], ax

; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 0], ecx
mov [r10 + 8 * 6 + 0], eax

; True
mov rax, 0x61626364
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 4], ecx
mov [r10 + 8 * 6 + 4], eax

; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 7 + 0], rax

; True
mov rax, 0x7172737475767778
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 3 + 0], rcx
mov [r10 + 8 * 8], rax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]
mov rbp, [r10 + 8 * 4]

mov rsi, [r10 + 8 * 5]
mov rdi, [r10 + 8 * 6]

mov rsp, [r10 + 8 * 7]
mov r8, [r10 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455FFFF58",
    "RCX": "0x616263FFFF666768",
    "RDX": "0xFF72737475767778",
    "RBP": "0x81828384858687FF",
    "RSI": "0xB1B2B3B4B5B6B7B8",
    "RDI": "0xC1C2C3C4C5C6C7C8",
    "RSP": "0xFF42434445464748",
    "R8":  "0x51525354555657FF",
    "R9":  "0x6465646556574647",
    "R11": "0x0000000000005841",
    "R12": "0xC8B1A89188718871"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax
mov rax, 0x8182838485868788
mov [r10 + 8 * 4], rax
mov rax, 0x9192939495969798
mov [r10 + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [r10 + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [r10 + 8 * 7], rax
mov rax, 0xC1C2C3C4C5C6C7C8
mov [r10 + 8 * 8], rax

mov rax, 0
mov [r10 + 8 * 9], rax
mov [r10 + 8 * 10], rax
mov [r10 + 8 * 11], rax
mov [r10 + 8 * 12], rax
mov [r10 + 8 * 13], rax

mov rax, 0x4142434445464748
mov [r10 + 8 * 15], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 16], rax

; 16bit unaligned edges test
; Offsets   | Test                                |
; =============================================================
; 1         | Misaligned inside 32bit region      | 32bit CAS
; 3         | Misaligned through to 64bit region  | 64bit CAS
; 7         | Misaligned through to 128bit region | 128bit CAS
; 15        | Misaligned through to 256bit region | Dual 8bit/64bit CAS *CAN TEAR*
; 63        | Misaligned across 64byte cachelines | Dual 8bit/64bit CAS *CAN TEAR*

; Offset 1
; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 0 + 1], cx
mov [r10 + 8 * 9 + 0], ax

; True
mov rax, 0x5657
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 1 + 1], cx
mov [r10 + 8 * 9 + 2], ax

; Offset 3
; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 2 + 3], cx
mov [r10 + 8 * 9 + 4], ax

; True
mov rax, 0x6465
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 2 + 3], cx
mov [r10 + 8 * 9 + 6], ax

; Offset 7
; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 3 + 7], cx
mov [r10 + 8 * 10 + 0], ax

; True
mov rax, 0x8871
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 3 + 7], cx
mov [r10 + 8 * 10 + 2], ax

; Offset 15
; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 4 + 15], cx
mov [r10 + 8 * 10 + 4], ax

; True
mov rax, 0x8871
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 4 + 15], cx
mov [r10 + 8 * 10 + 6], ax

; Offset 63
; False
mov rax, 0
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 7 + 7], cx
mov [r10 + 8 * 10 + 6], ax

; True
mov rax, 0x5841
mov rcx, 0xFFFF
cmpxchg [r10 + 8 * 15 + 7], cx
mov [r10 + 8 * 11 + 0], ax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]
mov rbp, [r10 + 8 * 4]

mov rsi, [r10 + 8 * 7]
mov rdi, [r10 + 8 * 8]

mov rsp, [r10 + 8 * 15]
mov r8, [r10 + 8 * 16]

mov r9, [r10 + 8 * 9]
mov r12, [r10 + 8 * 10]
mov r11, [r10 + 8 * 11]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x414243ffffffff48",
    "RBX": "0x5152535455565758",
    "RCX": "0xffffff6465666768",
    "RDX": "0x71727374757677ff",
    "RBP": "0xffffff8485868788",
    "RSI": "0xffffffb4b5b6b7b8",
    "RDI": "0xc1c2c3c4c5c6c7ff",
    "RSP": "0x4445464744454647",
    "R8":  "0x7861626378616263",
    "R9":  "0x9881828398818283",
    "R10": "0xc8b1b2b3c8b1b2b3"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax
mov rax, 0x8182838485868788
mov [r10 + 8 * 4], rax
mov rax, 0x9192939495969798
mov [r10 + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [r10 + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [r10 + 8 * 7], rax
mov rax, 0xC1C2C3C4C5C6C7C8
mov [r10 + 8 * 8], rax

mov rax, 0
mov [r10 + 8 * 9], rax
mov [r10 + 8 * 10], rax
mov [r10 + 8 * 11], rax
mov [r10 + 8 * 12], rax
mov [r10 + 8 * 13], rax

mov rax, 0x4142434445464748
mov [r10 + 8 * 15], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 16], rax

; 32bit unaligned edges test
; Offsets       | Test                                |
; =============================================================
; 1,2,3         | Misaligned through to 64bit region  | 64bit CAS
; 5,6,7,9,10,11 | Misaligned through to 128bit region | 128bit CAS
; 13,14,15      | Misaligned through to 256bit region | Dual 32bit/64bit CAS *CAN TEAR*
; 61,62,63      | Misaligned across 64byte cachelines | Dual 32bit/64bit CAS *CAN TEAR*

; Offset 1
; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 0 + 1], ecx
mov [r10 + 8 * 9 + 0], eax

; True
mov rax, 0x44454647
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 0 + 1], ecx
mov [r10 + 8 * 9 + 4], eax

; Offset 5
; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 5], ecx
mov [r10 + 8 * 10 + 0], eax

; True
mov rax, 0x78616263
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 2 + 5], ecx
mov [r10 + 8 * 10 + 4], eax

; Offset 13
; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 4 + 5], ecx
mov [r10 + 8 * 11 + 0], eax

; True
mov rax, 0x98818283
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 4 + 5], ecx
mov [r10 + 8 * 11 + 4], eax

; Offset 61
; False
mov rax, 0
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 7 + 5], ecx
mov [r10 + 8 * 12 + 0], eax

; Wrong
; True
mov rax, 0xC8B1B2B3
mov rcx, 0xFFFFFFFF
cmpxchg [r10 + 8 * 7 + 5], ecx
mov [r10 + 8 * 12 + 4], eax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]
mov rbp, [r10 + 8 * 4]
mov rsi, [r10 + 8 * 7]
mov rdi, [r10 + 8 * 8]
mov rsp, [r10 + 8 * 9]

mov r8, [r10 + 8 * 10]
mov r9, [r10 + 8 * 11]
mov r10, [r10 + 8 * 12]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xffffffffffffff48",
    "RBX": "0xffffffffffffffff",
    "RCX": "0x61626364656667ff",
    "RDI": "0xffffffffffffffb8",
    "RSP": "0xc1c2c3c4c5c6c7ff",
    "R8":  "0x6851525354555657",
    "R9":  "0xc8b1b2b3b4b5b6b7",
    "R10": "0xc8b1b2b3b4b5b6b7"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax
mov rax, 0x8182838485868788
mov [r10 + 8 * 4], rax
mov rax, 0x9192939495969798
mov [r10 + 8 * 5], rax
mov rax, 0xA1A2A3A4A5A6A7A8
mov [r10 + 8 * 6], rax
mov rax, 0xB1B2B3B4B5B6B7B8
mov [r10 + 8 * 7], rax
mov rax, 0xC1C2C3C4C5C6C7C8
mov [r10 + 8 * 8], rax

mov rax, 0
mov [r10 + 8 * 9], rax
mov [r10 + 8 * 10], rax
mov [r10 + 8 * 11], rax
mov [r10 + 8 * 12], rax
mov [r10 + 8 * 13], rax

mov rax, 0x4142434445464748
mov [r10 + 8 * 15], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 16], rax

; 64bit unaligned edges test
; Offsets       | Test                                |
; =============================================================
; [1,7]           | Misaligned through to 128bit region | 128bit CAS
; [9,15], [57,63] | Misaligned through to 256bit region | Dual 64bit CAS

; Offset 1
; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 0 + 1], rcx
mov [r10 + 8 * 9 + 0], rax

; True
mov rax, 0x5841424344454647
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 0 + 1], rcx
mov [r10 + 8 * 10], rax

; Offset 9
; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 1 + 1], rcx
mov [r10 + 8 * 9 + 0], rax

; True
mov rax, 0x6851525354555657
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 1 + 1], rcx
mov [r10 + 8 * 10], rax

; Offset 57
; False
mov rax, 0
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 7 + 1], rcx
mov [r10 + 8 * 11 + 0], rax

; True
mov rax, 0xC8B1B2B3B4B5B6B7
mov rcx, 0xFFFFFFFFFFFFFFFF
cmpxchg [r10 + 8 * 7 + 1], rcx
mov [r10 + 8 * 12], rax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdi, [r10 + 8 * 7]
mov rsp, [r10 + 8 * 8]

mov r8, [r10 + 8 * 10]
mov r9, [r10 + 8 * 11]
mov r10, [r10 + 8 * 12]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x4142434445469748",
    "R14": "0x4142434445464600",
    "R13": "0x4142434445468748",
    "R12": "0x4142434445464600",
    "R11": "0x4142434445468748",
    "R10": "0x4142434400004600",
    "R9":  "0x4142434445468748",
    "R8":  "0x0000000000004600"
  }
}
%endif

mov rdx, 0xe0000000

; 8bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0x47
mov rbx, 0
cmpxchg byte [rdx + 8 * 0], bl
mov rax, [rdx + 8 * 0]
lahf
mov r15, rax

; Match
mov rax, 0x48
mov rbx, 0
cmpxchg byte [rdx + 8 * 0], bl
mov rax, [rdx + 8 * 0]
lahf
mov r14, rax

; 16bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0x4748
mov rbx, 0
cmpxchg word [rdx + 8 * 0], bx
mov rax, [rdx + 8 * 0]
lahf
mov r13, rax

; Match
mov rax, 0x6148
mov rbx, 0
cmpxchg word [rdx + 8 * 0], bx
mov rax, [rdx + 8 * 0]
lahf
mov r12, rax

; 32bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0x45464748
mov rbx, 0
cmpxchg dword [rdx + 8 * 0], ebx
mov rax, [rdx + 8 * 0]
lahf
mov r11, rax

; Match
mov rax, 0x45466148
mov rbx, 0
cmpxchg dword [rdx + 8 * 0], ebx
mov rax, [rdx + 8 * 0]
lahf
mov r10, rax

; 64bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0x45464748
mov rbx, 0
cmpxchg qword [rdx + 8 * 0], rbx
mov rax, [rdx + 8 * 0]
lahf
mov r9, rax

; Match
mov rax, 0x4142434445466148
mov rbx, 0
cmpxchg qword [rdx + 8 * 0], rbx
mov rax, [rdx + 8 * 0]
lahf
mov r8, rax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B0_9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xffffffffffff9748",
    "R14": "0xffffffffffff4648",
    "R13": "0xffffffffffff8748",
    "R12": "0xffffffffffff4648",
    "R11": "0x0000000045468748",
    "R10": "0xffffffff45464648",
    "R9":  "0x4142434445468648",
    "R8":  "0x4142434445464648"
  }
}
%endif

mov rdx, 0xe0000000

; 8bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0xFFFFFFFFFFFFFF47
mov rbx, 0
cmpxchg byte [rdx + 8 * 0], bl
lahf
mov r15, rax

; Match
mov rax, 0xFFFFFFFFFFFFFF48
mov rbx, 0
cmpxchg byte [rdx + 8 * 0], bl
lahf
mov r14, rax

; 16bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0xFFFFFFFFFFFF4748
mov rbx, 0
cmpxchg word [rdx + 8 * 0], bx
lahf
mov r13, rax

; Match
mov rax, 0xFFFFFFFFFFFF6148
mov rbx, 0
cmpxchg word [rdx + 8 * 0], bx
lahf
mov r12, rax

; 32bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg dword [rdx + 8 * 0], ebx
lahf
mov r11, rax

; Match
mov rax, 0xFFFFFFFF45466148
mov rbx, 0
cmpxchg dword [rdx + 8 * 0], ebx
lahf
mov r10, rax

; 64bit
mov rax, 0x4142434445466148
mov [rdx + 8 * 0], rax

; Not a match
mov rax, 0xFFFFFFFF45464748
mov rbx, 0
cmpxchg qword [rdx + 8 * 0], rbx
lahf
mov r9, rax

; Match
mov rax, 0x4142434445466148
mov rbx, 0
cmpxchg qword [rdx + 8 * 0], rbx
lahf
mov r8, rax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
btr word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
btr qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B3_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
btr word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
btr qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B3_2_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
lock btr word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
lock btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
lock btr qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B3_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xA"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
lock btr word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
lock btr dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
lock btr qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x41424344454600FF",
    "R14": "0x00000000000000FF",
    "R13": "0x00000000000000FF",
    "R12": "0x41424344454600FF",
    "R11": "0x00000000000000FF",
    "R10": "0x00000000000000FF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax

mov r15, 0x4142434445464748
mov r14, 0x4142434445464748
mov r13, 0x4142434445464748
mov r12, 0x4142434445464748
mov r11, 0x4142434445464748
mov r10, 0x4142434445464748

movzx r15w, byte [rdx + 8 * 0]
movzx r14d, byte [rdx + 8 * 0]
movzx r13,  byte [rdx + 8 * 0]

movzx r12w, al
movzx r11d, al
movzx r10,  al

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_B7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0x000000000000FFFF",
    "R13": "0x000000000000FFFF",
    "R12": "0x000000000000FFFF",
    "R11": "0x000000000000FFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax

mov r14, 0x4142434445464748
mov r13, 0x4142434445464748
mov r12, 0x4142434445464748
mov r11, 0x4142434445464748

movzx r14d, word [rdx + 8 * 0]
movzx r13,  word [rdx + 8 * 0]

movzx r12d, ax
movzx r11,  ax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
btc word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
btc qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BB_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
btc word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
btc qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BB_2_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

lea rdx, [rdx + 8 * 3 + 4]

xor r15, r15 ; Will contain our results

; Test and set
mov r13, -1
lock btc word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, -1
bt word [rdx], r13w
cfmerge

mov r13, -32
lock btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, -64 * 3
lock btc qword [rdx], r13
cfmerge

mov r13, -64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BB_Atomic.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x1A"
  }
}
%endif

%macro cfmerge 0

; Get CF
sbb r14, r14
and r14, 1

; Merge in to results
shl r15, 1
or r15, r14

%endmacro

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFF80000000
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov rax, 0x01
mov [rdx + 8 * 3], eax
mov rax, 0x0
mov [rdx + 8 * 3 + 4], eax

xor r15, r15 ; Will contain our results

; Test and set
mov r13, 1
lock btc word [rdx], r13w
cfmerge

; Ensure it is set
mov r13, 1
bt word [rdx], r13w
cfmerge

mov r13, 32
lock btc dword [rdx], r13d
cfmerge

bt dword [rdx], r13d
cfmerge

mov r13, 64 * 3
lock btc qword [rdx], r13
cfmerge

mov r13, 64 * 3
bt qword [rdx], r13
cfmerge

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFF0000",
    "R14": "0x0",
    "R13": "0x0",
    "R12": "0xFFFFFFFFFFFF0004",
    "R11": "0x04",
    "R10": "0x04",
    "R9":  "0xFFFFFFFFFFFFFFFF",
    "R8":  "0xFFFFFFFFFFFFFFFF",
    "RSI": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x1010101010101010
mov [rdx + 8 * 1], rax
mov rax, 0
mov [rdx + 8 * 2], rax

mov r15, -1
mov r14, -1
mov r13, -1
mov r12, -1
mov r11, -1
mov r10, -1
mov r9,  -1
mov r8,  -1
mov rsi, -1

bsf r15w, word  [rdx + 8 * 0]
bsf r14d, dword [rdx + 8 * 0]
bsf r13,  qword [rdx + 8 * 0]

bsf r12w, word  [rdx + 8 * 1]
bsf r11d, dword [rdx + 8 * 1]
bsf r10,  qword [rdx + 8 * 1]

bsf r9w, word  [rdx + 8 * 2]
bsf r8d, dword [rdx + 8 * 2]
bsf rsi, qword [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0xFFFFFFFFFFFF000F",
    "R14": "0x1F",
    "R13": "0x3F",
    "R12": "0xFFFFFFFFFFFF000C",
    "R11": "0x1C",
    "R10": "0x3C",
    "R9":  "0xFFFFFFFFFFFFFFFF",
    "R8":  "0xFFFFFFFFFFFFFFFF",
    "RSI": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x1010101010101010
mov [rdx + 8 * 1], rax
mov rax, 0
mov [rdx + 8 * 2], rax

mov r15, -1
mov r14, -1
mov r13, -1
mov r12, -1
mov r11, -1
mov r10, -1
mov r9,  -1
mov r8,  -1
mov rsi, -1

bsr r15w, word  [rdx + 8 * 0]
bsr r14d, dword [rdx + 8 * 0]
bsr r13,  qword [rdx + 8 * 0]

bsr r12w, word  [rdx + 8 * 1]
bsr r11d, dword [rdx + 8 * 1]
bsr r10,  qword [rdx + 8 * 1]

bsr r9w, word  [rdx + 8 * 2]
bsr r8d, dword [rdx + 8 * 2]
bsr rsi, qword [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x414243444546FFFF",
    "R14": "0x00000000FFFFFFFF",
    "R13": "0xFFFFFFFFFFFFFFFF",
    "R12": "0x414243444546FFFF",
    "R11": "0x00000000FFFFFFFF",
    "R10": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax

mov r15, 0x4142434445464748
mov r14, 0x4142434445464748
mov r13, 0x4142434445464748
mov r12, 0x4142434445464748
mov r11, 0x4142434445464748
mov r10, 0x4142434445464748

movsx r15w, byte [rdx + 8 * 0]
movsx r14d, byte [rdx + 8 * 0]
movsx r13,  byte [rdx + 8 * 0]

movsx r12w, al
movsx r11d, al
movsx r10,  al

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_BF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R14": "0x00000000FFFFFFFF",
    "R13": "0xFFFFFFFFFFFFFFFF",
    "R12": "0x00000000FFFFFFFF",
    "R11": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax

mov r14, 0x4142434445464748
mov r13, 0x4142434445464748
mov r12, 0x4142434445464748
mov r11, 0x4142434445464748

movsx r14d, word [rdx + 8 * 0]
movsx r13,  word [rdx + 8 * 0]

movsx r12d, ax
movsx r11,  ax

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464749",
    "RBX": "0x5152535455565759",
    "RCX": "0x6162636465666769",
    "RDX": "0x7172737475767779",
    "R15": "0x49",
    "R14": "0x5759",
    "R13": "0x65666769",
    "R12": "0x7172737475767779"
  }
}
%endif

mov r10, 0xe0000000

mov rax, 0x4142434445464748
mov [r10 + 8 * 0], rax
mov rax, 0x5152535455565758
mov [r10 + 8 * 1], rax
mov rax, 0x6162636465666768
mov [r10 + 8 * 2], rax
mov rax, 0x7172737475767778
mov [r10 + 8 * 3], rax

mov rax, 0x01
xadd  byte [r10 + 8 * 0], al
mov rax, 0x01
xadd  word [r10 + 8 * 1], ax
mov rax, 0x01
xadd dword [r10 + 8 * 2], eax
mov rax, 0x01
xadd qword [r10 + 8 * 3], rax

mov rax, [r10 + 8 * 0]
mov rbx, [r10 + 8 * 1]
mov rcx, [r10 + 8 * 2]
mov rdx, [r10 + 8 * 3]

mov r15, 0x00
xadd  byte [r10 + 8 * 0], r15b
mov r14, 0x00
xadd word [r10 + 8 * 1], r14w
mov r13, 0x00
xadd dword [r10 + 8 * 2], r13d
mov r12, 0x00
xadd qword [r10 + 8 * 3], r12

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x02",
    "RBX": "0x02",
    "RCX": "0x02",
    "RDX": "0x02"
  }
}
%endif

mov rax, 0x01
mov rbx, 0x01
mov rcx, 0x01
mov rdx, 0x01

xadd al, al
xadd bx, bx
xadd ecx, ecx
xadd rdx, rdx

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C0_Atomic16.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8642438a8c464848",
    "RBX": "0x824243444546478c",
    "RCX": "0x4142434445464790",
    "RDX": "0x4142434445464748",
    "RSI": "0x8242434445464748",
    "RDI": "0x4142434445464790"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 1 byte offset within 4byte boundary
lock xadd word [r15 + 8 * 0 + 1], ax

; Test 3 byte offset across 4byte boundary
lock xadd word [r15 + 8 * 0 + 3], ax

; Test 7 byte offset across 8byte boundary
lock xadd word [r15 + 8 * 0 + 7], ax

; Test 15 byte offset across 16byte boundary
lock xadd word [r15 + 8 * 0 + 15], ax

; Test 63 byte offset across cacheline boundary
lock xadd word [r15 + 8 * 0 + 63], ax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C0_Atomic32.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x8642434446464748",
    "RBX": "0x8242434445888a8c",
    "RCX": "0x41424344458c8e90",
    "RDX": "0x4142434445464748",
    "RSI": "0x8242434445464748",
    "RDI": "0x41424344458c8e90"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 3 byte offset across 4byte boundary
lock xadd dword [r15 + 8 * 0 + 3], eax

; Test 7 byte offset across 8byte boundary
lock xadd dword [r15 + 8 * 0 + 7], eax

; Test 15 byte offset across 16byte boundary
lock xadd dword [r15 + 8 * 0 + 15], eax

; Test 63 byte offset across cacheline boundary
lock xadd dword [r15 + 8 * 0 + 63], eax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C0_Atomic64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4242434445464748",
    "RBX": "0x8242434445464748",
    "RCX": "0x418486888a8c8e90",
    "RDX": "0x4142434445464748",
    "RSI": "0x8242434445464748",
    "RDI": "0x418486888a8c8e90"
  }
}
%endif

mov r15, 0xe0000000

mov rax, 0x4142434445464748
mov [r15 + 8 * 0], rax
mov [r15 + 8 * 1], rax
mov [r15 + 8 * 2], rax
mov [r15 + 8 * 3], rax
mov [r15 + 8 * 4], rax
mov [r15 + 8 * 5], rax
mov [r15 + 8 * 6], rax
mov [r15 + 8 * 7], rax
mov [r15 + 8 * 8], rax
mov [r15 + 8 * 9], rax

mov rax, 1

; Test 7 byte offset across 8byte boundary
lock xadd qword [r15 + 8 * 0 + 7], rax

; Test 15 byte offset across 16byte boundary
lock xadd qword [r15 + 8 * 0 + 15], rax

; Test 63 byte offset across cacheline boundary
lock xadd qword [r15 + 8 * 0 + 63], rax

mov rax, qword [r15 + 8 * 0]
mov rbx, qword [r15 + 8 * 1]
mov rcx, qword [r15 + 8 * 2]
mov rdx, qword [r15 + 8 * 3]
mov rsi, qword [r15 + 8 * 7]
mov rdi, qword [r15 + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x0"],
    "XMM1": ["0x0", "0xFFFFFFFF00000000"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000"],
    "XMM3": ["0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM4": ["0x0", "0xFFFFFFFFFFFFFFFF"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM6": ["0x0000000000000000", "0x00000000FFFFFFFF"],
    "XMM7": ["0x00000000FFFFFFFF", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3f80000040000000
mov [rdx + 8 * 0], rax
mov rax, 0x4000000040800000
mov [rdx + 8 * 1], rax

mov rax, 0x3f80000040000000
mov [rdx + 8 * 2], rax
mov rax, 0x40a000003f800000
mov [rdx + 8 * 3], rax

movapd xmm0, [rdx + 8 * 0]
movapd xmm1, [rdx + 8 * 0]
movapd xmm2, [rdx + 8 * 0]
movapd xmm3, [rdx + 8 * 0]
movapd xmm4, [rdx + 8 * 0]
movapd xmm5, [rdx + 8 * 0]
movapd xmm6, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

cmpps xmm0, xmm8, 0x00 ; EQ
cmpps xmm1, xmm8, 0x01 ; LT
cmpps xmm2, xmm8, 0x02 ; LTE
cmpps xmm4, xmm8, 0x04 ; NEQ
cmpps xmm5, xmm8, 0x05 ; NLT
cmpps xmm6, xmm8, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
mov rax, 0x0000000000000000
mov [rdx + 8 * 0], rax
mov rax, 0x7FC000007FC00000
mov [rdx + 8 * 1], rax

mov rax, 0x7FC0000000000000
mov [rdx + 8 * 2], rax
mov rax, 0x7FC0000000000000
mov [rdx + 8 * 3], rax

movapd xmm3, [rdx + 8 * 0]
movapd xmm7, [rdx + 8 * 0]
movapd xmm8, [rdx + 8 * 2]

; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
cmpps xmm3, xmm8, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
cmpps xmm7, xmm8, 0x07 ; Ordered

hlt
hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x4142434445464748",
    "RCX": "0x0000000045464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax

mov rax, [rdx + 8 * 0]
movnti [rdx + 8 * 1], rax
movnti [rdx + 8 * 2], eax

mov rbx, [rdx + 8 * 1]
mov rcx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445467778",
    "MM1": "0x4142434477784748",
    "MM2": "0x4142777845464748",
    "MM3": "0x7778434445464748",
    "MM4": "0x4142434445467778",
    "MM5": "0x4142434477784748",
    "MM6": "0x4142777845464748",
    "MM7": "0x7778434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7172737475767778
mov [rdx + 8 * 2], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 0]
movq mm7, [rdx + 8 * 0]

pinsrw mm0, eax, 0
pinsrw mm1, eax, 1
pinsrw mm2, eax, 2
pinsrw mm3, eax, 3

pinsrw mm4, [rdx + 8 * 2], 0
pinsrw mm5, [rdx + 8 * 2], 1
pinsrw mm6, [rdx + 8 * 2], 2
pinsrw mm7, [rdx + 8 * 2], 3

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C4_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445467778",
    "MM1": "0x4142434477784748",
    "MM2": "0x4142777845464748",
    "MM3": "0x7778434445464748",
    "MM4": "0x4142434445467778",
    "MM5": "0x4142434477784748",
    "MM6": "0x4142777845464748",
    "MM7": "0x7778434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x7172737475767778

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 0]
movq mm7, [rdx + 8 * 0]

pinsrw mm0, eax, 0
pinsrw mm1, eax, 1
pinsrw mm2, eax, 2
pinsrw mm3, eax, 3
pinsrw mm4, eax, 4
pinsrw mm5, eax, 5
pinsrw mm6, eax, 6
pinsrw mm7, eax, 7

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4748",
    "RBX": "0x4546",
    "RCX": "0x4344",
    "RDX": "0x4142"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1

pextrw eax, mm0, 0
pextrw ebx, mm0, 1
pextrw ecx, mm0, 2
pextrw edx, mm0, 3

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C5_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4748",
    "RBX": "0x4546",
    "RCX": "0x4344",
    "RDX": "0x4142",
    "RSI": "0x4748",
    "RDI": "0x4546",
    "RBP": "0x4344",
    "RSP": "0x4142"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1

pextrw eax, mm0, 0
pextrw ebx, mm0, 1
pextrw ecx, mm0, 2
pextrw edx, mm0, 3
pextrw esi, mm0, 4
pextrw edi, mm0, 5
pextrw ebp, mm0, 6
pextrw esp, mm0, 7


hlt


================================================
FILE: unittests/ASM/TwoByte/0F_C6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R15": "0x0000000008070605",
    "R14": "0x00000000F8F7F6F5",
    "R13": "0x00000000E8E7E6E5",
    "R12": "0x00000000D8D7D6D5",
    "R11": "0x00000000C8C7C6C5",
    "R10": "0x00000000B8B7B6B5",
    "R9":  "0x00000000A8A7A6A5",
    "R8":  "0x0000000098979695",
    "RSP": "0x8887868584838281",
    "RDI": "0x7877767574737271",
    "RSI": "0x6867666564636261",
    "RBP": "0x5857565554535251",
    "RDX": "0x4847464544434241",
    "RCX": "0x3837363534333231",
    "RBX": "0x2827262524232221",
    "RAX": "0x1817161514131211"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x1112131415161718
mov rbx, 0x2122232425262728
mov rcx, 0x3132333435363738
mov rdx, 0x4142434445464748
mov rbp, 0x5152535455565758
mov rsi, 0x6162636465666768
mov rdi, 0x7172737475767778
mov rsp, 0x8182838485868788
mov  r8, 0x9192939495969798
mov  r9, 0xA1A2A3A4A5A6A7A8
mov r10, 0xB1B2B3B4B5B6B7B8
mov r11, 0xC1C2C3C4C5C6C7C8
mov r12, 0xD1D2D3D4D5D6D7D8
mov r13, 0xE1E2E3E4E5E6E7E8
mov r14, 0xF1F2F3F4F5F6F7F8
mov r15, 0x0102030405060708

bswap rax
bswap rbx
bswap rcx
bswap rdx
bswap rbp
bswap rsi
bswap rdi
bswap rsp

bswap r8d
bswap r9d
bswap r10d
bswap r11d
bswap r12d
bswap r13d
bswap r14d
bswap r15d

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x20A121A222A323A4",
    "MM1": "0x0041004300450047",
    "MM2": "0x0",
    "MM3": "0x0",
    "MM4": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

; Will Zero
mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

; Will Zero
mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]

psrlw mm0, [rdx + 8 * 2]
psrlw mm1, [rdx + 8 * 4]
psrlw mm2, [rdx + 8 * 6]
psrlw mm3, [rdx + 8 * 8]
psrlw mm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x20A121A222A323A4",
    "MM1": "0x0041424300454647",
    "MM2": "0x0000414200004546",
    "MM3": "0x0",
    "MM4": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

; Will Zero
mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]

psrld mm0, [rdx + 8 * 2]
psrld mm1, [rdx + 8 * 4]
psrld mm2, [rdx + 8 * 6]
psrld mm3, [rdx + 8 * 8]
psrld mm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x20A121A222A323A4",
    "MM1": "0x0041424344454647",
    "MM2": "0x0000414243444546",
    "MM3": "0x0000000041424344",
    "MM4": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x0
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x0
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x0
mov [rdx + 8 * 9], rax

; Will Zero
mov rax, 0x40
mov [rdx + 8 * 10], rax
mov rax, 0x0
mov [rdx + 8 * 11], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]

psrlq mm0, [rdx + 8 * 2]
psrlq mm1, [rdx + 8 * 4]
psrlq mm2, [rdx + 8 * 6]
psrlq mm3, [rdx + 8 * 8]
psrlq mm4, [rdx + 8 * 10]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xa2a4a6a8aaacaeb0",
    "MM1": "0xa2a4a6a8aaacaeb0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
paddq mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
paddq mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xFD44929037E4ED40",
    "MM1": "0xFD44929037E4ED40"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

pmullw mm0, mm2
pmullw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFF",
    "RBX": "0x00",
    "RCX": "0x00",
    "RDX": "0xF0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x8080808080808080
mov [rdx + 8 * 0], rax
mov rax, 0x0000000000000000
mov [rdx + 8 * 1], rax
mov rax, 0x7070707070707070
mov [rdx + 8 * 2], rax
mov rax, 0x8080808000000000
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1

pmovmskb eax, mm0
pmovmskb ebx, mm1
pmovmskb ecx, mm2
pmovmskb edx, mm3

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202000000000",
    "MM1": "0x2020202000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

psubusb mm0, mm2
psubusb mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_D9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202000000000",
    "MM1": "0x2020202000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x4142434465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

psubusw mm0, mm2
psubusw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
pminub mm0, mm1
pminub mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x1010101010101010",
    "MM1": "0x1010101010101010",
    "MM2": "0x1010101010101010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pand mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pand mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA2A4A6A8AAACAEB0",
    "MM1": "0xA2A4A6A8AAACAEB0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddusb mm0, mm2
paddusb mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA2A4A6A8AAACAEB0",
    "MM1": "0xA2A4A6A8AAACAEB0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddusw mm0, mm2
paddusw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x5152535455565758",
    "MM1": "0x5152535455565758"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
pmaxub mm0, mm1
pmaxub mm1, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_DF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0",
    "MM1": "0x0",
    "MM2": "0x1010101010101010"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

mov rax, 0x1010101010101010
mov [rdx + 8 * 2], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pandn mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pandn mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2179b0697d5378c4",
    "MM1": "0x1ed68638699d35ca",
    "MM2": "0x165c42291f28194c",
    "MM3": "0x2179b0697d5378c4",
    "MM4": "0x1ed68638699d35ca",
    "MM5": "0x165c42291f28194c"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x2bb883523d4f3197
mov [rdx + 8 * 0], rax
mov rax, 0x1246c77764260189
mov [rdx + 8 * 1], rax

mov rax, 0x163add80bc57bef1
mov [rdx + 8 * 2], rax
mov rax, 0x64d615e5b405a306
mov [rdx + 8 * 3], rax

mov rax, 0x11f4881d94eb39fc
mov [rdx + 8 * 4], rax
mov rax, 0xa9162248f2d0a23a
mov [rdx + 8 * 5], rax

mov rax, 0x0
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

pavgb mm0, mm6
pavgb mm1, mm7

movq mm7, [rdx + 8 * 6]
pavgb mm2, mm7

pavgb mm3, [rdx + 8 * 2]
pavgb mm4, [rdx + 8 * 4]
pavgb mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x0041004300450047",
    "MM2": "0x0",
    "MM3": "0x4142434445464748",
    "MM4": "0x0041004300450047",
    "MM5": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

psraw mm0, mm6
psraw mm1, mm7

movq mm7, [rdx + 8 * 6]
psraw mm2, mm7

psraw mm3, [rdx + 8 * 2]
psraw mm4, [rdx + 8 * 4]
psraw mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x0000414200004546",
    "MM2": "0x0",
    "MM3": "0x4142434445464748",
    "MM4": "0x0000414200004546",
    "MM5": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x10
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x20
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]

movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

psrad mm0, mm6
psrad mm1, mm7

movq mm7, [rdx + 8 * 6]
psrad mm2, mm7

psrad mm3, [rdx + 8 * 2]
psrad mm4, [rdx + 8 * 4]
psrad mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x20f9b0697cd37844",
    "MM1": "0x1ed685b8691d35ca",
    "MM2": "0x15dc41a91ea818cc",
    "MM3": "0x20f9b0697cd37844",
    "MM4": "0x1ed685b8691d35ca",
    "MM5": "0x15dc41a91ea818cc"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x2bb883523d4f3197
mov [rdx + 8 * 0], rax
mov rax, 0x1246c77764260189
mov [rdx + 8 * 1], rax

mov rax, 0x163add80bc57bef1
mov [rdx + 8 * 2], rax
mov rax, 0x64d615e5b405a306
mov [rdx + 8 * 3], rax

mov rax, 0x11f4881d94eb39fc
mov [rdx + 8 * 4], rax
mov rax, 0xa9162248f2d0a23a
mov [rdx + 8 * 5], rax

mov rax, 0x0
mov [rdx + 8 * 6], rax
mov rax, 0x0
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]
movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

pavgw mm0, mm6
pavgw mm1, mm7

movq mm7, [rdx + 8 * 6]
pavgw mm2, mm7

pavgw mm3, [rdx + 8 * 2]
pavgw mm4, [rdx + 8 * 4]
pavgw mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x14BA15E517171851"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov eax, dword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movq mm0, [rdx + 8 * 0]

pmulhuw mm0, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xD7D1D77A17171851"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x8182838445464748 ; -32382, -31868, 17734, 18248
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758 ; 20818, 21332, 21846, 22360
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov eax, dword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movq mm0, [rdx + 8 * 0]

pmulhw mm0, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x0
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]

movntq [rdx + 8 * 1], mm0
movq mm1, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xE0E0E0E0E0E0E0E0",
    "MM1": "0xE0E0E0E0E0E0E0E0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

psubsb mm0, mm2
psubsb mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xDFE0DFE0DFE0DFE0",
    "MM1": "0xDFE0DFE0DFE0DFE0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

psubsw mm0, mm2
psubsw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_EA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4142434445464748"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

pminsw mm0, mm2
pminsw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_EB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x1111111111111111",
    "MM1": "0x1111111111111111",
    "MM2": "0x0101010101010101"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x1010101010101010
mov [rdx + 8 * 0], rax
mov rax, 0x2020202020202020
mov [rdx + 8 * 1], rax

mov rax, 0x0101010101010101
mov [rdx + 8 * 2], rax
mov rax, 0x0202020202020202
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
por mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
por mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_EC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x7F7F7F7F7F7F7F7F",
    "MM1": "0x7F7F7F7F7F7F7F7F"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddsb mm0, mm2
paddsb mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_ED.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x7FFF7FFF7FFF7FFF",
    "MM1": "0x7FFF7FFF7FFF7FFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddsw mm0, mm2
paddsw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_EE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x6162636465666768",
    "MM1": "0x6162636465666768"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

pmaxsw mm0, mm2
pmaxsw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_EF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2424242424242424",
    "MM1": "0x2424242424242424",
    "MM2": "0x1818181818181818"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 0], rax
mov rax, 0x3C3C3C3C3C3C3C3C
mov [rdx + 8 * 1], rax

mov rax, 0x1818181818181818
mov [rdx + 8 * 2], rax
mov rax, 0x1818181818181818
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pxor mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pxor mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x828486888A8C8E90",
    "MM1": "0x4200440046004800",
    "MM2": "0x0",
    "MM3": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x1
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x8
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x10
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

mov rax, 0x20
mov [rdx + 8 * 8], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 9], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]

psllw mm0, [rdx + 8 * 2]
psllw mm1, [rdx + 8 * 4]
psllw mm2, [rdx + 8 * 6]
psllw mm3, [rdx + 8 * 8]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x4142434445464748",
    "MM1": "0x4344000047480000",
    "MM2": "0x0",
    "MM3": "0x4142434445464748",
    "MM4": "0x4344000047480000",
    "MM5": "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x10
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x20
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]

movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

pslld mm0, mm6
pslld mm1, mm7

movq mm7, [rdx + 8 * 6]
pslld mm2, mm7

pslld mm3, [rdx + 8 * 2]
pslld mm4, [rdx + 8 * 4]
pslld mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0":  "0x4142434445464748",
    "MM1":  "0x4546474800000000",
    "MM2":  "0x0",
    "MM3":  "0x4142434445464748",
    "MM4":  "0x4546474800000000",
    "MM5":  "0x0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x0
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

mov rax, 0x20
mov [rdx + 8 * 4], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 5], rax

mov rax, 0x40
mov [rdx + 8 * 6], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 7], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 0]
movq mm3, [rdx + 8 * 0]
movq mm4, [rdx + 8 * 0]
movq mm5, [rdx + 8 * 0]

movq mm6, [rdx + 8 * 2]
movq mm7, [rdx + 8 * 4]

psllq mm0, mm6
psllq mm1, mm7
movq mm7, [rdx + 8 * 6]
psllq mm2, mm7

psllq mm3, [rdx + 8 * 2]
psllq mm4, [rdx + 8 * 4]
psllq mm5, [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x000000000003FFFC",
    "MM1": "0x000000000003FFFC"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x414243440000FFFF
mov [rdx + 8 * 0], rax
mov rax, 0x5152535400007FFF
mov [rdx + 8 * 1], rax

mov rax, 0x6162636400000004
mov [rdx + 8 * 2], rax
mov rax, 0x7172737400000002
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
pmuludq mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
pmuludq mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2A9FE7742F697C44"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax

mov eax, dword [rdx + 8 * 0]
mov rbx, qword [rdx + 8 * 1]

movq mm0, [rdx + 8 * 0]

pmaddwd mm0, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x0000000000000080",
    "MM1": "0x0000000000000083",
    "MM2": "0x0000000000000134",
    "MM3": "0x0000000000000156",
    "MM4": "0x0000000000000140",
    "MM5": "0x000000000000013F",
    "MM6": "0x000000000000008F",
    "MM7": "0x00000000000000D1"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x912277A763B4EB8C
mov [rdx + 8 * 2], rax
mov rax, 0x589490D442F54AFD
mov [rdx + 8 * 3], rax
mov rax, 0xB5E43417A3F6706C
mov [rdx + 8 * 4], rax
mov rax, 0xB4F4B827515F5BFA
mov [rdx + 8 * 5], rax
mov rax, 0x52D0EF1BCB906B6A
mov [rdx + 8 * 6], rax
mov rax, 0x1D0FDF5D05D39C64
mov [rdx + 8 * 7], rax
mov rax, 0xAEFEDEA21EF08810
mov [rdx + 8 * 8], rax
mov rax, 0xF7D80319B125BDE5

movq mm0, [rdx + 8 * 1]
movq mm1, [rdx + 8 * 2]
movq mm2, [rdx + 8 * 3]
movq mm3, [rdx + 8 * 4]
movq mm4, [rdx + 8 * 5]
movq mm5, [rdx + 8 * 6]
movq mm6, [rdx + 8 * 7]
movq mm7, [rdx + 8 * 8]

psadbw mm0, [rdx + 8 * 0]

lea rdx, [rel .data]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 1]
movq mm3, [rdx + 8 * 2]
movq mm4, [rdx + 8 * 3]
movq mm5, [rdx + 8 * 4]
movq mm6, [rdx + 8 * 5]
movq mm7, [rdx + 8 * 6]

psadbw mm1, [rdx + 8 * 7]
psadbw mm2, [rdx + 8 * 8]
psadbw mm3, [rdx + 8 * 9]
psadbw mm4, [rdx + 8 * 10]
psadbw mm5, [rdx + 8 * 11]
psadbw mm6, [rdx + 8 * 12]
psadbw mm7, [rdx + 8 * 13]
hlt

.data:
; 128bytes of random numbers
db 'ba\xa7\x5e\xc8\x0f\x90\x25\xf1\xf8\x49\xbd\xab\x4d\x2b\xa1\xc4'
db 'e4\x69\xe3\x2a\x80\x8d\xd6\x0b\xb2\x6d\xea\xae\x2e\x23\xc2\x2c'
db 'f9\xc6\xee\x06\x53\x96\x00\xae\x8d\x06\xdc\xe1\x11\x06\x0c\x40'
db 'a5\x61\x83\x7c\x13\x25\x43\xea\xa7\x08\x52\xc4\x0f\x91\x2c\x2c'
db '5a\xe7\xcf\xf6\xe3\x6b\x9e\x9e\xd8\x85\xf7\xfd\x4a\x17\xb4\xc9'
db '16\x07\x13\x8c\x83\x89\xc3\x5e\x46\x63\x1a\x31\xb9\x2c\x72\x18'
db '23\xa2\xf0\x4d\x22\x2a\xe4\x86\x84\x1a\xae\xfc\x65\x49\x17\x8e'
db 'c8\xb0\xe3\x6c\xb3\xce\xa1\x2f\xce\x5f\xae\x06\xac\x28\x7d\xb5'


================================================
FILE: unittests/ASM/TwoByte/0F_F6_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x00000000000007F8"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x0
mov [rdx + 8 * 0], rax
mov rax, -1
mov [rdx + 8 * 1], rax

movq mm0, [rdx + 8 * 0]

psadbw mm0, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x41424344FFFFFFFF",
    "RCX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x8080808080808080
mov [rdx + 8 * 1], rax
mov rax, 0x8080808000000000
mov [rdx + 8 * 2], rax
mov rax, 0
mov [rdx + 8 * 3], rax
mov rax, -1
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 1]
movq mm2, [rdx + 8 * 2]
movq mm3, [rdx + 8 * 3]

lea rdi, [rdx + 8 * 4]
maskmovq mm0, mm1

lea rdi, [rdx + 8 * 5]
maskmovq mm0, mm2

lea rdi, [rdx + 8 * 6]
maskmovq mm0, mm3

mov rax, qword [rdx + 8 * 4]
mov rbx, qword [rdx + 8 * 5]
mov rcx, qword [rdx + 8 * 6]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202020202020",
    "MM1": "0x2020202020202020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
psubb mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
psubb mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_F9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202020202020",
    "MM1": "0x2020202020202020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
psubw mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
psubw mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_FA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202020202020",
    "MM1": "0x2020202020202020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
psubd mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
psubd mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_FB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0x2020202020202020",
    "MM1": "0x2020202020202020"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x6162636465666768
mov [rdx + 8 * 0], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 1], rax

mov rax, 0x4142434445464748
mov [rdx + 8 * 2], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 3], rax

movq mm0, [rdx]
psubq mm0, [rdx + 8 * 2]

movq mm1, [rdx]
movq mm2, [rdx + 8 * 2]
psubq mm1, mm2

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_FC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA2A4A6A8AAACAEB0",
    "MM1": "0xA2A4A6A8AAACAEB0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddb mm0, mm2
paddb mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_FD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA2A4A6A8AAACAEB0",
    "MM1": "0xA2A4A6A8AAACAEB0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddw mm0, mm2
paddw mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/TwoByte/0F_FE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM0": "0xA2A4A6A8AAACAEB0",
    "MM1": "0xA2A4A6A8AAACAEB0"
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

movq mm0, [rdx + 8 * 0]
movq mm1, [rdx + 8 * 0]
movq mm2, [rdx + 8 * 2]

paddd mm0, mm2
paddd mm1, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/VEX/andn.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0xFFFFFFFFFFFFFFFF",
      "RBX": "0",
      "RCX": "0xFFFFFFFF",
      "RDX": "0"
  },
  "HostFeatures": ["BMI1"]
}
%endif

mov rax, 0
mov rbx, -1
andn rax, rax, rbx
andn rbx, rbx, rax

mov rcx, 0
mov rdx, -1
andn ecx, ecx, edx
andn edx, edx, ecx

hlt


================================================
FILE: unittests/ASM/VEX/bextr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RBX": "0",
      "RDX": "0xFF",
      "RSI": "0",
      "R8" : "0xDEADBEEFDEADBEEF",
      "R9" : "0xDEADBEEF",
      "R14": "0x7F",
      "R15": "0x838"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; General extraction
mov r14, 0x7FFFFFFFFFFFFFFF
mov r15, 0x838              ; Start at bit 56 and extract 8 bits
bextr r14, r14, r15         ; This results in 0x7F being placed into RAX

; Extraction with 0 bits should clear the destination
mov rbx, -1
mov rcx, 0
bextr rbx, rbx, rcx

; Extraction with 'SrcSize' bits should get the unchanged register
mov r8, 0xDEADBEEFDEADBEEF
mov r9, 16384              ; Start at 0 extract 64 bits
bextr r8, r8, r9           ; r8 should stay the same

; Same tests as above but with 32-bit registers

; General extraction
mov rdx, 0x7FFFFFFFFFFFFFFF
mov rsi, 0x818              ; Start at bit 24 and extract 8 bits
bextr edx, edx, esi         ; This results in 0xFF being placed into EDX

; Extraction with 0 bits should clear RSI to 0
mov rsi, -1
mov rdi, 0
bextr esi, esi, edi

; Extraction with 'SrcSize' bits should get the unchanged register
mov r9, 0xDEADBEEFDEADBEEF
mov r10, 8192               ; Start at 0 extract 32 bits
bextr r9d, r9d, r10d        ; r9 should become 0xDEADBEEF (and r9d stays the same)

hlt


================================================
FILE: unittests/ASM/VEX/blsi.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "1",
      "RBX": "0xFF00000000000000",
      "RCX": "0x0100000000000000",
      "RDX": "1",
      "RSI": "0xFF000000",
      "RDI": "0x01000000"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Trivial test, this should result in 1.
mov rax, 11
blsi rax, rax

; Results in the lowest set bit (bit 56) being extracted
mov rbx, 0xFF00000000000000
mov rcx, 0
blsi rcx, rbx

; Same tests but with 32-bit registers

; Trivial test, this should result in 1.
mov edx, 11
blsi edx, edx

; Results in the lowest set bit (bit 24) being extracted
mov rsi, 0xFF000000
mov rdi, 0
blsi edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/blsmsk.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "1",
      "RBX": "0xFF00000000000000",
      "RCX": "0x01FFFFFFFFFFFFFF",
      "RDX": "1",
      "RSI": "0xFF000000",
      "RDI": "0x01FFFFFF"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Trivial test, this should result in 1.
mov rax, 11
blsmsk rax, rax

; Results in 0x01FFFFFFFFFFFFFF being placed into RCX
mov rbx, 0xFF00000000000000
blsmsk rcx, rbx

; Same tests but with 32-bit registers

; Trivial test, this should result in 1.
mov edx, 11
blsmsk edx, edx

; Results in 0x01FFFFFF being placed in EDI
mov rsi, 0xFF000000
blsmsk edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/blsr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "10",
      "RBX": "0xFF00000000000000",
      "RCX": "0xFE00000000000000",
      "RDX": "10",
      "RSI": "0xFF000000",
      "RDI": "0xFE000000"
  },
  "HostFeatures": ["BMI1"]
}
%endif

; Trivial test, this should result in 10.
mov rax, 11
blsr rax, rax

; Results in 0xFE00000000000000 being placed into RCX
mov rbx, 0xFF00000000000000
blsr rcx, rbx

; Same tests but with 32-bit registers

; Trivial test, this should result in 10.
mov edx, 11
blsr edx, edx

; Results in 0xFE000000 being placed in EDI
mov rsi, 0xFF000000
blsr edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/bzhi.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0xFFFFFFFFFFFFFFFF",
      "RBX": "64",
      "RCX": "0x00000000000003FF",
      "RDX": "10",
      "RSI": "0x00000000FFFFFFFF",
      "RDI": "32",
      "RBP": "0x00000000000003FF",
      "RSP": "10"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; Should not alter the source value
mov rax, -1
mov rbx, 64
bzhi rax, rax, rbx

; General operation
mov rcx, -1
mov rdx, 10
bzhi rcx, rcx, rdx

; 32-bit tests

; Should not alter the source value
mov esi, -1
mov edi, 32
bzhi esi, esi, edi

; General operation
mov ebp, -1
mov esp, 10
bzhi ebp, ebp, esp

hlt


================================================
FILE: unittests/ASM/VEX/fma_fmadd_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc01c000000000000", "0xc035800000000000", "0xc073000000000000", "0xc083f00000000000"],
    "XMM1": ["0x4045c00000000000", "0x4063400000000000", "0xc0b7cf8000000000", "0xc0d2b66000000000"],
    "XMM2": ["0x4040800000000000", "0x4053b00000000000", "0xc0959e0000000000", "0xc0b5448000000000"],
    "XMM3": ["0xc01c000000000000", "0xc035800000000000", "0", "0"],
    "XMM4": ["0x4045c00000000000", "0x4063400000000000", "0", "0"],
    "XMM5": ["0x4040800000000000", "0x4053b00000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmadd231pd ymm0, ymm1, ymm2
vfmadd213pd ymm1, ymm0, ymm2
vfmadd132pd ymm2, ymm1, ymm0

vfmadd231pd xmm3, xmm4, xmm5
vfmadd213pd xmm4, xmm3, xmm5
vfmadd132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmadd_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc1ac0000c0e00000", "0x4294999942400000", "0xc41f8000c3980000", "0x44bd4000446d0000"],
    "XMM1": ["0x431a0000422e0000", "0xc4291999c3c2c000", "0xc695b300c5be7c00", "0x4793e90d47143780"],
    "XMM2": ["0x429d800042040000", "0xc49c1051c4236000", "0xc5aa2400c4acf000", "0x47eceac0476b3d80"],
    "XMM3": ["0xc1ac0000c0e00000", "0x4294999942400000", "0", "0"],
    "XMM4": ["0x431a0000422e0000", "0xc4291999c3c2c000", "0", "0"],
    "XMM5": ["0x429d800042040000", "0xc49c1051c4236000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmadd231ps ymm0, ymm1, ymm2
vfmadd213ps ymm1, ymm0, ymm2
vfmadd132ps ymm2, ymm1, ymm0

vfmadd231ps xmm3, xmm4, xmm5
vfmadd213ps xmm4, xmm3, xmm5
vfmadd132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fmadd_sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc01c000000000000", "0x4008000000000000", "0", "0"],
    "XMM1": ["0x4045c00000000000", "0xc01c000000000000", "0", "0"],
    "XMM2": ["0x4040800000000000", "0x400c000000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfmadd231sd xmm0, xmm1, xmm2
vfmadd213sd xmm1, xmm0, xmm2
vfmadd132sd xmm2, xmm1, xmm0

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmadd_ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x40400000c0e00000", "0x40a0000040800000", "0", "0"],
    "XMM1": ["0xc0e00000422e0000", "0xc1100000c1000000", "0", "0"],
    "XMM2": ["0x4060000042040000", "0xc0f66666c0b00000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfmadd231ss xmm0, xmm1, xmm2
vfmadd213ss xmm1, xmm0, xmm2
vfmadd132ss xmm2, xmm1, xmm0

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fmaddsub_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc026000000000000", "0xc035800000000000", "0xc073c00000000000", "0xc083f00000000000"],
    "XMM1": ["0x4050200000000000", "0x4063400000000000", "0xc0b8a08000000000", "0xc0d2b66000000000"],
    "XMM2": ["0xc054400000000000", "0x4053b00000000000", "0x40c5e14000000000", "0xc0b5448000000000"],
    "XMM3": ["0xc026000000000000", "0xc035800000000000", "0", "0"],
    "XMM4": ["0x4050200000000000", "0x4063400000000000", "0", "0"],
    "XMM5": ["0xc054400000000000", "0x4053b00000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmaddsub231pd ymm0, ymm1, ymm2
vfmaddsub213pd ymm1, ymm0, ymm2
vfmaddsub132pd ymm2, ymm1, ymm0

vfmaddsub231pd xmm3, xmm4, xmm5
vfmaddsub213pd xmm4, xmm3, xmm5
vfmaddsub132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmaddsub_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc1ac0000c1300000", "0x4294999942200000", "0xc41f8000c39e0000", "0x44bd400044690000"],
    "XMM1": ["0x431a000042810000", "0xc4291999c39d4000", "0xc695b300c5c50400", "0x4793e90d47118880"],
    "XMM2": ["0x429d8000c2a20000", "0xc49c105142bd0000", "0xc5aa2400462f0a00", "0x47eceac0c66fea00"],
    "XMM3": ["0xc1ac0000c1300000", "0x4294999942200000", "0", "0"],
    "XMM4": ["0x431a000042810000", "0xc4291999c39d4000", "0", "0"],
    "XMM5": ["0x429d8000c2a20000", "0xc49c105142bd0000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmaddsub231ps ymm0, ymm1, ymm2
vfmaddsub213ps ymm1, ymm0, ymm2
vfmaddsub132ps ymm2, ymm1, ymm0

vfmaddsub231ps xmm3, xmm4, xmm5
vfmaddsub213ps xmm4, xmm3, xmm5
vfmaddsub132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fmsub_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc150000900802300", "0xc3ac00013450041a", "0x40e00000c0c00000", "0xc1f0000041a00000"],
    "XMM1": ["0x42400009c1008f71", "0xc5ac0001a728090b", "0x4150000100800300", "0x43ac00013450031a"],
    "XMM2": ["0xc2401009ca40cfb8", "0x45ad8801c298f9b9", "0xc150000100800300", "0xc3ac00013450031a"],
    "XMM3": ["0xc150000900802300", "0xc3ac00013450041a", "0", "0"],
    "XMM4": ["0x42400009c1008f71", "0xc5ac0001a728090b", "0", "0"],
    "XMM5": ["0xc2401009ca40cfb8", "0x45ad8801c298f9b9", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmsub231pd ymm0, ymm1, ymm2
vfmsub213pd ymm1, ymm0, ymm2
vfmsub132pd ymm2, ymm1, ymm0

vfmsub231pd xmm3, xmm4, xmm5
vfmsub213pd xmm4, xmm3, xmm5
vfmsub132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0
dd 6.0, 7.0

.data2:
dd -6.0, -7.0
dd 20.0, 30.0

.data3:
dd 1.5, 3.5
dd -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmsub_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc1dc0000c1300000", "0x4280999942200000", "0xc4230000c39e0000", "0x44bb000044690000"],
    "XMM1": ["0x433d000042810000", "0xc40ebfffc39d4000", "0xc698a500c5c50400", "0x479208f347118880"],
    "XMM2": ["0xc38ea000c2a20000", "0x4297c7ac42bd0000", "0x47031480462f0a00", "0xc6e85899c66fea00"],
    "XMM3": ["0xc1dc0000c1300000", "0x4280999942200000", "0", "0"],
    "XMM4": ["0x433d000042810000", "0xc40ebfffc39d4000", "0", "0"],
    "XMM5": ["0xc38ea000c2a20000", "0x4297c7ac42bd0000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmsub231ps ymm0, ymm1, ymm2
vfmsub213ps ymm1, ymm0, ymm2
vfmsub132ps ymm2, ymm1, ymm0

vfmsub231ps xmm3, xmm4, xmm5
vfmsub213ps xmm4, xmm3, xmm5
vfmsub132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fmsub_sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc026000000000000", "0x4008000000000000", "0", "0"],
    "XMM1": ["0x4050200000000000", "0xc01c000000000000", "0", "0"],
    "XMM2": ["0xc054400000000000", "0x400c000000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfmsub231sd xmm0, xmm1, xmm2
vfmsub213sd xmm1, xmm0, xmm2
vfmsub132sd xmm2, xmm1, xmm0

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmsub_ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x40400000c1300000", "0x40a0000040800000", "0", "0"],
    "XMM1": ["0xc0e0000042810000", "0xc1100000c1000000", "0", "0"],
    "XMM2": ["0x40600000c2a20000", "0xc0f66666c0b00000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfmsub231ss xmm0, xmm1, xmm2
vfmsub213ss xmm1, xmm0, xmm2
vfmsub132ss xmm2, xmm1, xmm0

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fmsubadd_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc01c000000000000", "0xc03b800000000000", "0xc073000000000000", "0xc084600000000000"],
    "XMM1": ["0x4045c00000000000", "0x4067a00000000000", "0xc0b7cf8000000000", "0xc0d314a000000000"],
    "XMM2": ["0x4040800000000000", "0xc071d40000000000", "0xc0959e0000000000", "0x40e0629000000000"],
    "XMM3": ["0xc01c000000000000", "0xc03b800000000000", "0", "0"],
    "XMM4": ["0x4045c00000000000", "0x4067a00000000000", "0", "0"],
    "XMM5": ["0x4040800000000000", "0xc071d40000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmsubadd231pd ymm0, ymm1, ymm2
vfmsubadd213pd ymm1, ymm0, ymm2
vfmsubadd132pd ymm2, ymm1, ymm0

vfmsubadd231pd xmm3, xmm4, xmm5
vfmsubadd213pd xmm4, xmm3, xmm5
vfmsubadd132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fmsubadd_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xc1dc0000c0e00000", "0x4280999942400000", "0xc4230000c3980000", "0x44bb0000446d0000"],
    "XMM1": ["0x433d0000422e0000", "0xc40ebfffc3c2c000", "0xc698a500c5be7c00", "0x479208f347143780"],
    "XMM2": ["0xc38ea00042040000", "0x4297c7acc4236000", "0x47031480c4acf000", "0xc6e85899476b3d80"],
    "XMM3": ["0xc1dc0000c0e00000", "0x4280999942400000", "0", "0"],
    "XMM4": ["0x433d0000422e0000", "0xc40ebfffc3c2c000", "0", "0"],
    "XMM5": ["0xc38ea00042040000", "0x4297c7acc4236000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfmsubadd231ps ymm0, ymm1, ymm2
vfmsubadd213ps ymm1, ymm0, ymm2
vfmsubadd132ps ymm2, ymm1, ymm0

vfmsubadd231ps xmm3, xmm4, xmm5
vfmsubadd213ps xmm4, xmm3, xmm5
vfmsubadd132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fnmadd_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4150000900802300", "0x43ac00013450041a", "0xc0e00000c0c00000", "0x41f0000041a00000"],
    "XMM1": ["0x42400009c1808f73", "0xc5ac0001a728090b", "0x4150000100800300", "0x43ac00013450031a"],
    "XMM2": ["0x423fe01370809e58", "0xc5aa78018bb7185d", "0x4150000100800300", "0x43ac00013450031a"],
    "XMM3": ["0x4150000900802300", "0x43ac00013450041a", "0", "0"],
    "XMM4": ["0x42400009c1808f73", "0xc5ac0001a728090b", "0", "0"],
    "XMM5": ["0x423fe01370809e58", "0xc5aa78018bb7185d", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfnmadd231pd ymm0, ymm1, ymm2
vfnmadd213pd ymm1, ymm0, ymm2
vfnmadd132pd ymm2, ymm1, ymm0

vfnmadd231pd xmm3, xmm4, xmm5
vfnmadd213pd xmm4, xmm3, xmm5
vfnmadd132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0
dd 6.0, 7.0

.data2:
dd -6.0, -7.0
dd 20.0, 30.0

.data3:
dd 1.5, 3.5
dd -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fnmadd_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x41dc000041300000", "0xc2809999c2200000", "0x44230000439e0000", "0xc4bb0000c4690000"],
    "XMM1": ["0x4344000042870000", "0xc4129999c3a2c000", "0xc698fb00c5c5fc00", "0x4792270d4711b780"],
    "XMM2": ["0x42c78000424c0000", "0xc4873051c4086000", "0xc5addc00c4b3b000", "0x47ea19da47674580"],
    "XMM3": ["0x41dc000041300000", "0xc2809999c2200000", "0", "0"],
    "XMM4": ["0x4344000042870000", "0xc4129999c3a2c000", "0", "0"],
    "XMM5": ["0x42c78000424c0000", "0xc4873051c4086000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfnmadd231ps ymm0, ymm1, ymm2
vfnmadd213ps ymm1, ymm0, ymm2
vfnmadd132ps ymm2, ymm1, ymm0

vfnmadd231ps xmm3, xmm4, xmm5
vfnmadd213ps xmm4, xmm3, xmm5
vfnmadd132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fnmadd_sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4026000000000000", "0x4008000000000000", "0", "0"],
    "XMM1": ["0x4050e00000000000", "0xc01c000000000000", "0", "0"],
    "XMM2": ["0x4049800000000000", "0x400c000000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfnmadd231sd xmm0, xmm1, xmm2
vfnmadd213sd xmm1, xmm0, xmm2
vfnmadd132sd xmm2, xmm1, xmm0

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fnmadd_ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4040000041300000", "0x40a0000040800000", "0", "0"],
    "XMM1": ["0xc0e0000042870000", "0xc1100000c1000000", "0", "0"],
    "XMM2": ["0x40600000424c0000", "0xc0f66666c0b00000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfnmadd231ss xmm0, xmm1, xmm2
vfnmadd213ss xmm1, xmm0, xmm2
vfnmadd132ss xmm2, xmm1, xmm0

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fnmsub_pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x414ffff200ffc600", "0x43ac00013450021a", "0x40e00000c0c00000", "0xc1f0000041a00000"],
    "XMM1": ["0x423ffff381ff1d62", "0xc5ac0001a728070b", "0xc150000100800300", "0xc3ac00013450031a"],
    "XMM2": ["0xc2400ff9ba3fce78", "0x45ad8801c298f79d", "0x4150000100800300", "0x43ac00013450031a"],
    "XMM3": ["0x414ffff200ffc600", "0x43ac00013450021a", "0", "0"],
    "XMM4": ["0x423ffff381ff1d62", "0xc5ac0001a728070b", "0", "0"],
    "XMM5": ["0xc2400ff9ba3fce78", "0x45ad8801c298f79d", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfnmsub231pd ymm0, ymm1, ymm2
vfnmsub213pd ymm1, ymm0, ymm2
vfnmsub132pd ymm2, ymm1, ymm0

vfnmsub231pd xmm3, xmm4, xmm5
vfnmsub213pd xmm4, xmm3, xmm5
vfnmsub132pd xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0
dd 6.0, 7.0

.data2:
dd -6.0, -7.0
dd 20.0, 30.0

.data3:
dd 1.5, 3.5
dd -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fnmsub_ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x41ac000040e00000", "0xc2949999c2400000", "0x441f800043980000", "0xc4bd4000c46d0000"],
    "XMM1": ["0x4313000042220000", "0xc4253fffc3bd4000", "0xc6955d00c5bd8400", "0x4793caf347140880"],
    "XMM2": ["0xc35e4000c24c0000", "0x42b1c7ad42e50000", "0x4700438046286200", "0xc6eb24ffc6740a00"],
    "XMM3": ["0x41ac000040e00000", "0xc2949999c2400000", "0", "0"],
    "XMM4": ["0x4313000042220000", "0xc4253fffc3bd4000", "0", "0"],
    "XMM5": ["0xc35e4000c24c0000", "0x42b1c7ad42e50000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vmovups ymm3, [rel .data]
vmovups ymm4, [rel .data2]
vmovups ymm5, [rel .data3]

vfnmsub231ps ymm0, ymm1, ymm2
vfnmsub213ps ymm1, ymm0, ymm2
vfnmsub132ps ymm2, ymm1, ymm0

vfnmsub231ps xmm3, xmm4, xmm5
vfnmsub213ps xmm4, xmm3, xmm5
vfnmsub132ps xmm5, xmm4, xmm3

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/fma_fnmsub_sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x401c000000000000", "0x4008000000000000", "0", "0"],
    "XMM1": ["0x4044400000000000", "0xc01c000000000000", "0", "0"],
    "XMM2": ["0xc049800000000000", "0x400c000000000000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfnmsub231sd xmm0, xmm1, xmm2
vfnmsub213sd xmm1, xmm0, xmm2
vfnmsub132sd xmm2, xmm1, xmm0

hlt

align 32
.data:
dq 2.0, 3.0
dq 6.0, 7.0

.data2:
dq -6.0, -7.0
dq 20.0, 30.0

.data3:
dq 1.5, 3.5
dq -15.5, -21.5


================================================
FILE: unittests/ASM/VEX/fma_fnmsub_ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4040000040e00000", "0x40a0000040800000", "0", "0"],
    "XMM1": ["0xc0e0000042220000", "0xc1100000c1000000", "0", "0"],
    "XMM2": ["0x40600000c24c0000", "0xc0f66666c0b00000", "0", "0"]
  }
}
%endif

vmovups ymm0, [rel .data]
vmovups ymm1, [rel .data2]
vmovups ymm2, [rel .data3]

vfnmsub231ss xmm0, xmm1, xmm2
vfnmsub213ss xmm1, xmm0, xmm2
vfnmsub132ss xmm2, xmm1, xmm0

hlt

align 32
.data:
dd 2.0, 3.0, 4.0, 5.0
dd 6.0, 7.0, 8.0, 9.0

.data2:
dd -6.0, -7.0, -8.0, -9.0
dd 20.0, 30.0, 40.0, 50.0

.data3:
dd 1.5, 3.5, -5.5, -7.7
dd -15.5, -21.5, 23.5, 30.1


================================================
FILE: unittests/ASM/VEX/full_vpermq_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x00000000063afe2e"
  }
}
%endif

mov rax, 0

%assign i 0
%rep 256

; vpermq all the immediate encodings
vpermq ymm0, [rel .random_data + ((i * 32) % 4096)], i
vmovaps [rel .data_result + (i * 32)], ymm0

; CRC32 (by 64) the results
crc32 rax, qword [rel .data_result + (i * 32) + 0]
crc32 rax, qword [rel .data_result + (i * 32) + 8]
crc32 rax, qword [rel .data_result + (i * 32) + 16]
crc32 rax, qword [rel .data_result + (i * 32) + 24]
%assign i i+1
%endrep

hlt

align 32
.data_result:
times 256 dq 0, 0, 0, 0

align 32
; 8192-bytes of random data
.random_data:
db 0x18, 0xd6, 0xfe, 0x97, 0x75, 0x8c, 0x1a, 0x61, 0xc5, 0xc0, 0x7e, 0x3f, 0x46, 0xf9, 0xb0, 0xa3
db 0x16, 0xb2, 0x1c, 0x7b, 0x04, 0xa7, 0x7e, 0xbe, 0x04, 0xdc, 0xc6, 0x65, 0x43, 0x5e, 0xa4, 0x8e
db 0x40, 0xc5, 0x4f, 0x2f, 0xf2, 0x62, 0x6d, 0x1b, 0x41, 0xb6, 0x9c, 0xa4, 0xbf, 0x1a, 0x57, 0x1e
db 0x83, 0x53, 0x8a, 0x92, 0xbc, 0x67, 0xe8, 0xe8, 0xc3, 0xb9, 0xbc, 0xab, 0x38, 0xa0, 0x8e, 0x5d
db 0xcc, 0x2d, 0xa9, 0xd8, 0x74, 0xee, 0x8f, 0x57, 0x62, 0x9b, 0x0f, 0xf8, 0x76, 0x2a, 0x4d, 0x22
db 0x2f, 0xaf, 0x0b, 0x13, 0xbf, 0xa5, 0x87, 0xc1, 0x0f, 0xfd, 0x0b, 0x69, 0xbc, 0x58, 0x06, 0xa3
db 0xc4, 0x4f, 0x33, 0xf7, 0x53, 0xc8, 0xe6, 0x6b, 0xa5, 0x96, 0x33, 0x37, 0x89, 0xc1, 0x33, 0x4e
db 0xfb, 0x54, 0xbb, 0xd1, 0xcf, 0xa7, 0xe4, 0x77, 0x72, 0x3f, 0x68, 0xd1, 0x7b, 0xc7, 0x4f, 0x99
db 0xd8, 0xdd, 0xf3, 0x85, 0x10, 0x88, 0x0c, 0x1a, 0x80, 0x86, 0xd9, 0xce, 0x9d, 0x88, 0xc7, 0x2e
db 0x2b, 0xcb, 0x34, 0x17, 0xd6, 0x85, 0x1b, 0xa3, 0x8e, 0xc2, 0xbb, 0x74, 0x2c, 0xf2, 0x61, 0x09
db 0xaa, 0x7b, 0x1e, 0x5c, 0x15, 0xb6, 0x47, 0x08, 0xbb, 0x5d, 0x5b, 0x1b, 0x4c, 0xb9, 0xd1, 0x9c
db 0x49, 0xc3, 0x57, 0x93, 0x84, 0x43, 0x97, 0x65, 0x97, 0x5d, 0xb8, 0x4f, 0xe5, 0x69, 0x7e, 0x6e
db 0xc4, 0xee, 0xd3, 0x62, 0xcc, 0xf7, 0xd1, 0xd7, 0x88, 0xfe, 0x9b, 0xaa, 0x31, 0x10, 0x6c, 0x9b
db 0x37, 0x4a, 0x8e, 0x01, 0xbb, 0xe1, 0x02, 0xc0, 0x9a, 0xa4, 0x45, 0x7c, 0xb4, 0xc0, 0x5e, 0xda
db 0xf2, 0x15, 0x3b, 0xe5, 0x95, 0x65, 0xe3, 0xf2, 0xb0, 0x84, 0x6b, 0xb8, 0xf9, 0x11, 0xdd, 0xd4
db 0xed, 0x1d, 0xbf, 0xbd, 0xb9, 0x98, 0xe8, 0xab, 0x08, 0x21, 0xe1, 0x76, 0xcd, 0x31, 0x59, 0x35
db 0x16, 0x95, 0x15, 0xb9, 0x00, 0x2c, 0xb1, 0xf9, 0x7b, 0x4d, 0xaf, 0x80, 0x92, 0xa9, 0x31, 0x91
db 0xfe, 0xaa, 0x8e, 0xe4, 0x45, 0x28, 0x48, 0x40, 0x5c, 0xf7, 0xa9, 0x3f, 0x5a, 0x87, 0x51, 0x30
db 0x7b, 0x55, 0xfa, 0x8c, 0xec, 0xcc, 0x32, 0xd5, 0x8c, 0x5b, 0xa7, 0x1c, 0xc2, 0xee, 0x5f, 0xdb
db 0x3a, 0x5c, 0xdb, 0x3d, 0x8f, 0x17, 0x0c, 0xae, 0x70, 0x35, 0x3a, 0xdd, 0x07, 0xa1, 0x21, 0x53
db 0xa6, 0x4a, 0xa3, 0xd7, 0x65, 0x3f, 0x32, 0xcb, 0x48, 0x4e, 0x2e, 0x12, 0x47, 0x9e, 0x59, 0x8e
db 0xa6, 0x85, 0x04, 0x06, 0x60, 0xcc, 0xc3, 0x54, 0x91, 0x64, 0x14, 0x05, 0xad, 0xe8, 0x2d, 0x77
db 0x5b, 0x5d, 0xca, 0x6b, 0x8c, 0x3a, 0x89, 0x71, 0x30, 0xcd, 0xa0, 0x8e, 0x79, 0xf8, 0xa3, 0xdb
db 0x5c, 0x7b, 0x52, 0xcb, 0x6a, 0xb1, 0x32, 0x31, 0xbe, 0x1f, 0x1a, 0xb8, 0xb8, 0x5f, 0xc4, 0x12
db 0x0c, 0xd6, 0x9e, 0x0c, 0xef, 0xca, 0x5e, 0x71, 0x57, 0x9e, 0x70, 0x91, 0x38, 0x43, 0x5b, 0xd7
db 0x18, 0x83, 0xe2, 0x68, 0x38, 0x29, 0xd7, 0x55, 0x8b, 0x61, 0xfd, 0x6a, 0x81, 0xbf, 0x7c, 0xf1
db 0xc4, 0xcb, 0x4a, 0x45, 0xe5, 0x7f, 0xfe, 0x02, 0x09, 0x9d, 0x6e, 0xbe, 0x45, 0xc3, 0x2a, 0xf3
db 0xe4, 0x64, 0xcc, 0xe8, 0x70, 0x34, 0x96, 0x73, 0x63, 0xad, 0x3f, 0x02, 0x4b, 0xfd, 0xc4, 0x4f
db 0x40, 0x00, 0x29, 0x45, 0x50, 0x54, 0xb2, 0x9b, 0xe5, 0xa5, 0x88, 0xf2, 0xa0, 0xe1, 0x17, 0xe7
db 0xe6, 0xea, 0x20, 0x5b, 0x03, 0xa1, 0xdc, 0x1a, 0x73, 0x26, 0x90, 0x0f, 0x3e, 0x00, 0x05, 0x21
db 0x62, 0x3e, 0x9c, 0xe9, 0xb7, 0xc2, 0x63, 0x22, 0xc4, 0xd1, 0x13, 0x45, 0x43, 0x02, 0x99, 0x76
db 0x72, 0x4a, 0x86, 0xf9, 0xd3, 0x88, 0x96, 0xbd, 0xf1, 0xba, 0xd0, 0xa6, 0x35, 0x9c, 0x8e, 0xa1
db 0x46, 0x52, 0xd3, 0x64, 0xa7, 0x48, 0xba, 0xab, 0x2c, 0x45, 0xb1, 0x38, 0x95, 0xf0, 0xe9, 0xde
db 0x1e, 0x51, 0x16, 0x58, 0xa8, 0x27, 0x93, 0x6d, 0x26, 0x57, 0xb5, 0x26, 0xae, 0xd9, 0x74, 0xd7
db 0x00, 0x02, 0xd9, 0x91, 0xc2, 0xe1, 0xdb, 0xf4, 0x3f, 0xaa, 0x4e, 0x59, 0x35, 0xf1, 0x9b, 0xf9
db 0x13, 0x30, 0xb5, 0xc5, 0x7c, 0x4c, 0x8f, 0x00, 0x28, 0x5e, 0xc1, 0x52, 0xd8, 0x19, 0x0c, 0x0e
db 0x18, 0x4c, 0x92, 0x74, 0x6e, 0xae, 0xae, 0x42, 0x35, 0xfb, 0xe5, 0xc0, 0xf9, 0x08, 0xe2, 0x41
db 0xe6, 0x00, 0x90, 0x83, 0x73, 0xaa, 0x62, 0x80, 0x68, 0x3f, 0x53, 0x46, 0x74, 0x36, 0x43, 0x4c
db 0xd4, 0x62, 0x40, 0xcc, 0x46, 0x2d, 0x67, 0xa5, 0x06, 0x39, 0x7b, 0xaa, 0x64, 0xcd, 0xf4, 0x2a
db 0xf7, 0xd2, 0x0d, 0xcc, 0xb9, 0x7d, 0xb6, 0x73, 0x30, 0xe2, 0x3e, 0x92, 0xbe, 0x09, 0xf5, 0x41
db 0x93, 0xe4, 0x99, 0x96, 0x05, 0xb1, 0x74, 0xeb, 0x35, 0xcb, 0xd4, 0xac, 0xa9, 0x49, 0x34, 0x09
db 0x24, 0x7e, 0xea, 0xad, 0xcf, 0x14, 0xdd, 0xea, 0xe1, 0xf8, 0x77, 0x0d, 0x97, 0x6f, 0xfd, 0x49
db 0x8c, 0x3b, 0xec, 0x5e, 0xbc, 0x3f, 0xbd, 0xdb, 0xaf, 0xff, 0x31, 0xdd, 0xeb, 0xe7, 0xe7, 0x38
db 0x59, 0x2e, 0x1f, 0xf8, 0x00, 0x7f, 0xa6, 0x9c, 0x3e, 0x17, 0x35, 0x3a, 0xbe, 0x7d, 0xe5, 0x18
db 0x06, 0xc6, 0x9e, 0x08, 0xf9, 0x4b, 0xba, 0xde, 0xa2, 0x82, 0x76, 0x26, 0xb1, 0xe6, 0xdc, 0xcd
db 0xd8, 0x15, 0x21, 0x5a, 0x00, 0x05, 0x40, 0x30, 0x47, 0xc5, 0x17, 0x83, 0xa1, 0x36, 0xcf, 0x6d
db 0x7a, 0xe3, 0x52, 0xc3, 0x0b, 0x8b, 0x78, 0x6c, 0x68, 0xd9, 0xa7, 0x52, 0xdc, 0x09, 0x43, 0x00
db 0x5c, 0x2b, 0x63, 0x6b, 0x76, 0x12, 0x69, 0x9f, 0x2b, 0x71, 0x4f, 0x78, 0x64, 0x24, 0x34, 0x4c
db 0x5b, 0x79, 0xac, 0xcc, 0x83, 0xf5, 0x05, 0x86, 0x8b, 0xcc, 0xa9, 0xf0, 0x1c, 0x67, 0xf6, 0x4c
db 0xda, 0x7f, 0xaf, 0x11, 0xd4, 0x6f, 0x37, 0x5e, 0xf3, 0x90, 0x62, 0xfe, 0xb2, 0x2c, 0x42, 0xef
db 0x92, 0x86, 0x58, 0x26, 0x80, 0xdb, 0x87, 0xe3, 0x18, 0xb2, 0x3b, 0x9b, 0xf2, 0x27, 0x00, 0x0a
db 0x87, 0x29, 0x2c, 0xe4, 0xc0, 0xba, 0x50, 0x6e, 0x1a, 0xd1, 0x27, 0xb2, 0xd0, 0xcf, 0x3d, 0x2d
db 0x3e, 0x74, 0xce, 0x0f, 0x1c, 0x13, 0x16, 0xc8, 0x7d, 0xc6, 0xcf, 0x23, 0x9d, 0xde, 0xca, 0x99
db 0x8d, 0x5d, 0x1c, 0xe7, 0x49, 0xbf, 0x30, 0x9c, 0xd7, 0xda, 0x78, 0xfe, 0x92, 0x4d, 0x56, 0x5f
db 0xc6, 0xd7, 0xdd, 0xa2, 0x94, 0x6b, 0xc1, 0x71, 0xd1, 0xb7, 0x3c, 0x7c, 0xb9, 0x05, 0xa8, 0x58
db 0x6e, 0x13, 0x3c, 0x12, 0xe2, 0xeb, 0x1f, 0xce, 0x41, 0x96, 0xda, 0xf8, 0xbe, 0x40, 0x2b, 0xaf
db 0x56, 0x9f, 0xae, 0x34, 0x1c, 0x69, 0xcd, 0x7b, 0x7c, 0x90, 0x36, 0x43, 0x00, 0xa1, 0x57, 0xc6
db 0xc6, 0x99, 0xc2, 0x44, 0x73, 0xe5, 0x65, 0x30, 0x3d, 0xfe, 0x7b, 0xf3, 0x70, 0xc5, 0x3c, 0x47
db 0xce, 0xbc, 0xe9, 0xa7, 0x99, 0xac, 0x14, 0x0a, 0x76, 0x92, 0xa8, 0xb2, 0x59, 0xbb, 0x59, 0xb6
db 0x2b, 0x6c, 0x30, 0x88, 0xca, 0x71, 0x45, 0xe3, 0x15, 0xaf, 0x85, 0xd3, 0xe7, 0x9c, 0xf1, 0xb2
db 0xb6, 0xe1, 0xdb, 0x70, 0x80, 0xe5, 0x36, 0xd9, 0xfe, 0x93, 0x58, 0x22, 0x45, 0x9b, 0x27, 0x98
db 0xbd, 0x6a, 0x8e, 0x6f, 0xfd, 0xa7, 0xb4, 0x7b, 0xa4, 0xb3, 0xc5, 0x64, 0x2c, 0x74, 0x27, 0x16
db 0x95, 0x6e, 0x1f, 0x9e, 0x33, 0x00, 0x09, 0x42, 0x70, 0xcd, 0x43, 0xf4, 0x8e, 0x93, 0xb5, 0xe1
db 0x89, 0xfd, 0x4c, 0x6f, 0xce, 0x0c, 0xde, 0xe7, 0xc7, 0xa4, 0x13, 0x41, 0xe4, 0x46, 0xf8, 0x68
db 0xc4, 0xce, 0x09, 0xd3, 0x28, 0xd9, 0xaf, 0x5e, 0x5b, 0x2f, 0xe6, 0xcb, 0xd1, 0xcc, 0x5d, 0x3c
db 0x4b, 0x01, 0x94, 0x0d, 0xbb, 0x7d, 0x64, 0x80, 0xcd, 0x8f, 0xea, 0xc5, 0x41, 0x7b, 0xc5, 0x40
db 0x92, 0xd0, 0xcb, 0xad, 0x66, 0x2c, 0x21, 0x2d, 0x95, 0xb7, 0x14, 0x4f, 0x21, 0xf4, 0xaa, 0xfe
db 0x60, 0xd4, 0x1f, 0x32, 0x37, 0xcc, 0xc5, 0x38, 0x85, 0x77, 0x29, 0xfd, 0x1c, 0x10, 0x9d, 0x84
db 0x19, 0xe4, 0xed, 0x64, 0xdc, 0xd5, 0xd4, 0x8f, 0x30, 0xf4, 0xbe, 0xc6, 0xfc, 0xfe, 0x60, 0x5c
db 0xba, 0x6a, 0x5f, 0x18, 0x4a, 0xa0, 0xec, 0xa8, 0xec, 0x80, 0xe2, 0x76, 0x1b, 0x61, 0x96, 0xfb
db 0xef, 0x62, 0xe3, 0x60, 0xf5, 0x87, 0xdb, 0x4d, 0x8f, 0x4e, 0xd2, 0xf9, 0x0b, 0x81, 0x03, 0x8b
db 0x56, 0xdc, 0x02, 0xe5, 0x7b, 0xa7, 0xba, 0x8a, 0xac, 0x6c, 0xd3, 0xd0, 0x49, 0xea, 0xc0, 0xdb
db 0x23, 0xba, 0xa6, 0x9f, 0x9c, 0x10, 0x8c, 0x82, 0x07, 0x12, 0x14, 0xf1, 0x6b, 0x8d, 0xe5, 0x71
db 0x9e, 0x58, 0x95, 0xa3, 0x1e, 0xe8, 0x34, 0x9a, 0x7d, 0x7a, 0xf4, 0x10, 0xd5, 0xb2, 0x23, 0xbe
db 0x28, 0x60, 0x07, 0xd0, 0xce, 0x65, 0x3a, 0x79, 0x66, 0x40, 0xc8, 0xb2, 0xb9, 0x0b, 0x16, 0x63
db 0x43, 0x02, 0x64, 0x27, 0xda, 0x87, 0x4f, 0xbe, 0x45, 0xe4, 0xf5, 0x51, 0xfa, 0xc6, 0x59, 0x9d
db 0x11, 0xfd, 0x72, 0xf1, 0x3f, 0x86, 0x91, 0x8f, 0xbe, 0x6a, 0x9a, 0x2b, 0x09, 0xe4, 0x67, 0xec
db 0x29, 0x43, 0x2c, 0xac, 0x34, 0xe2, 0xdc, 0x90, 0x11, 0x80, 0x13, 0x1c, 0xeb, 0x7b, 0xd6, 0xd9
db 0x7a, 0xd2, 0x57, 0xdf, 0xf2, 0x95, 0x47, 0x21, 0xb2, 0xd7, 0x48, 0x89, 0x9c, 0x91, 0xe6, 0x8d
db 0xd5, 0xd6, 0xc0, 0xf1, 0x65, 0xad, 0x40, 0x09, 0x9b, 0x3b, 0xc8, 0x5d, 0xe0, 0xd7, 0x23, 0x5c
db 0xbf, 0x13, 0x84, 0xe4, 0x91, 0xa7, 0xd6, 0x14, 0x96, 0x58, 0xa1, 0x9d, 0xef, 0xcf, 0xf6, 0x03
db 0x5b, 0x38, 0x9d, 0x45, 0xb1, 0x93, 0x86, 0x11, 0x67, 0x82, 0xb3, 0x26, 0x21, 0xb0, 0xb7, 0xc1
db 0x85, 0x63, 0xe5, 0x4a, 0x40, 0x3f, 0x93, 0x8e, 0xe5, 0xb5, 0x62, 0xf7, 0x33, 0x10, 0xb9, 0x4e
db 0x65, 0x05, 0x88, 0x4f, 0x95, 0x7d, 0x45, 0xdc, 0x8c, 0xc5, 0x3f, 0x6a, 0xf4, 0x01, 0x46, 0x68
db 0x0d, 0xd4, 0x89, 0x9e, 0xc1, 0xf5, 0x81, 0x06, 0x7a, 0x55, 0x7b, 0x52, 0xdd, 0x1f, 0x67, 0xf6
db 0x36, 0xed, 0xf1, 0xc5, 0xde, 0x3a, 0xb4, 0x66, 0x9a, 0x15, 0x9f, 0xa1, 0xf5, 0x8a, 0xc0, 0x44
db 0xe1, 0xc2, 0x0a, 0x6b, 0x17, 0xe2, 0xf8, 0x1b, 0x2c, 0x57, 0x30, 0xf1, 0xc0, 0xdd, 0xad, 0x98
db 0x7c, 0x99, 0x25, 0x50, 0x99, 0x43, 0xc7, 0xec, 0x55, 0xac, 0x05, 0xa4, 0x5d, 0xb9, 0x91, 0x96
db 0x43, 0x9b, 0x6b, 0x03, 0xb5, 0x09, 0xf8, 0xaa, 0xf7, 0x7f, 0xc7, 0xec, 0x9c, 0xfd, 0x6a, 0xdf
db 0x30, 0xd1, 0x38, 0xf1, 0x5b, 0x6b, 0xd0, 0x19, 0x8e, 0xba, 0x49, 0x74, 0xd3, 0x10, 0x87, 0x6c
db 0x9c, 0x9f, 0xfd, 0x46, 0xab, 0x84, 0x96, 0x12, 0x5c, 0xa1, 0xc2, 0x1a, 0x33, 0xc0, 0x23, 0xbd
db 0x01, 0xdf, 0xd4, 0x85, 0x69, 0x62, 0xbc, 0x3f, 0x2c, 0xe5, 0xfc, 0x66, 0x14, 0x13, 0xd2, 0xd5
db 0x12, 0x96, 0xdc, 0x3b, 0x14, 0x79, 0x96, 0x9f, 0x07, 0x5a, 0xc1, 0x65, 0xe1, 0xc8, 0x57, 0x7d
db 0x35, 0xa2, 0xcc, 0xaa, 0x94, 0xda, 0xb0, 0x4d, 0xe2, 0x5e, 0x4c, 0x39, 0x0d, 0x8e, 0xb6, 0x2f
db 0xe6, 0xcd, 0x58, 0xb2, 0x86, 0xf4, 0x1f, 0xf6, 0xb0, 0xd8, 0xf4, 0x42, 0x31, 0x4b, 0x6a, 0x5e
db 0x2d, 0x2f, 0xdb, 0x9c, 0xc5, 0x5a, 0x45, 0xd1, 0x17, 0x1a, 0xc7, 0xc5, 0xa0, 0x51, 0xbc, 0xe6
db 0xa9, 0xab, 0xc6, 0x3b, 0x3b, 0xf6, 0xce, 0xb5, 0x3b, 0xdc, 0x95, 0x10, 0x20, 0x1d, 0x1e, 0x64
db 0xa7, 0xe4, 0x1e, 0x86, 0x7a, 0xb1, 0x90, 0x12, 0x1d, 0x21, 0x43, 0x87, 0xbf, 0x19, 0x22, 0x02
db 0x3a, 0xbf, 0xa6, 0x0d, 0x91, 0x2f, 0x66, 0xc9, 0xd5, 0x87, 0xfc, 0xe7, 0xb6, 0x56, 0xf8, 0xc3
db 0x9e, 0x5e, 0x2f, 0xfd, 0xae, 0x3f, 0x01, 0x3b, 0x98, 0x7d, 0x6a, 0x3c, 0xd8, 0xb1, 0xf2, 0xec
db 0xa6, 0x54, 0x2a, 0xac, 0xc5, 0xdd, 0x4c, 0x8b, 0x4f, 0x7a, 0x95, 0x00, 0xe0, 0x9a, 0x35, 0x10
db 0xa8, 0xf5, 0xc7, 0xf6, 0x2c, 0xf2, 0x16, 0x2e, 0xa8, 0xb6, 0xad, 0x01, 0xba, 0x14, 0x63, 0xdd
db 0x5c, 0x02, 0xd8, 0xb3, 0x3d, 0x2c, 0x5b, 0x51, 0xd2, 0xb3, 0x20, 0x14, 0xbd, 0x6e, 0xee, 0x23
db 0x33, 0xd8, 0xd1, 0x8f, 0x02, 0x5b, 0xf4, 0xd0, 0x03, 0x55, 0x81, 0x50, 0x71, 0x47, 0xaa, 0x55
db 0x9c, 0x02, 0xb4, 0x07, 0x76, 0xb9, 0x3e, 0x6b, 0x59, 0x7c, 0xee, 0x46, 0xb4, 0x17, 0xe5, 0x01
db 0xf3, 0x0a, 0xdc, 0x74, 0xbd, 0x65, 0xee, 0x37, 0x10, 0x58, 0xbf, 0xa4, 0xd2, 0x7e, 0xb5, 0x6c
db 0x1e, 0xcd, 0xb8, 0x02, 0x65, 0x27, 0x72, 0x05, 0xe0, 0x40, 0x29, 0x61, 0xfc, 0x46, 0x13, 0x81
db 0x1d, 0xdf, 0x9a, 0x47, 0x20, 0x2a, 0x52, 0x1e, 0x30, 0x77, 0x74, 0x45, 0x30, 0x63, 0xb4, 0xb9
db 0x14, 0xb0, 0x2c, 0x0f, 0xea, 0x93, 0xda, 0xe1, 0xd7, 0xda, 0xf4, 0xb6, 0x71, 0x91, 0x53, 0x36
db 0xef, 0x57, 0xf7, 0xbc, 0xa4, 0x39, 0x3f, 0x42, 0x79, 0xb5, 0xb1, 0xc4, 0x53, 0x2c, 0xfc, 0xe1
db 0xe6, 0x47, 0xcb, 0x5d, 0x32, 0xb8, 0xc9, 0x62, 0x0b, 0x5e, 0xb8, 0xe5, 0x35, 0xf8, 0xd5, 0x45
db 0xa6, 0x80, 0x18, 0x59, 0x16, 0x32, 0x04, 0xe2, 0xe6, 0x0c, 0x4b, 0xbf, 0x38, 0x6a, 0x6d, 0xd8
db 0x96, 0x2c, 0xab, 0xc4, 0xc5, 0xc3, 0x65, 0x22, 0x76, 0x4f, 0x8c, 0xf0, 0x47, 0x90, 0x99, 0xb0
db 0xfc, 0xaa, 0xd0, 0x07, 0x39, 0x38, 0xd8, 0xd5, 0x53, 0x28, 0xf8, 0xac, 0xf5, 0x64, 0x1a, 0x46
db 0xd3, 0xff, 0x76, 0xb4, 0xcb, 0x56, 0xd5, 0xac, 0x05, 0x48, 0xc5, 0xbb, 0xe8, 0x2c, 0x6d, 0xd6
db 0x29, 0xf2, 0xe0, 0x01, 0x8c, 0xa3, 0x70, 0x5c, 0x76, 0x52, 0x16, 0x6a, 0x2f, 0x5a, 0x1c, 0x43
db 0x62, 0x1a, 0x17, 0xd7, 0x29, 0x8f, 0x99, 0xb7, 0xf9, 0xe3, 0x69, 0xe6, 0x61, 0x6a, 0x01, 0xa3
db 0xf2, 0xbe, 0xca, 0xb3, 0xfe, 0x53, 0x03, 0xd1, 0xdf, 0x4c, 0xd6, 0xa3, 0xcf, 0x59, 0xf5, 0xab
db 0xa1, 0x3d, 0x14, 0x84, 0x0e, 0xe2, 0x2b, 0x9a, 0x90, 0x48, 0xb3, 0x30, 0x6a, 0xaa, 0x8c, 0xe7
db 0x36, 0x58, 0xc3, 0x81, 0xf7, 0x3e, 0x27, 0x4b, 0x83, 0xd8, 0x5a, 0x6e, 0x49, 0x14, 0x12, 0x41
db 0xd3, 0x43, 0xa7, 0xa7, 0x0b, 0x38, 0x3b, 0x14, 0xf5, 0xe5, 0xd9, 0x27, 0x57, 0x34, 0x13, 0xcf
db 0xc1, 0x7f, 0xa5, 0x81, 0x29, 0xb0, 0x5f, 0x03, 0x87, 0x74, 0x1a, 0xad, 0x5a, 0xbc, 0xdf, 0xfb
db 0xac, 0x85, 0xdb, 0x62, 0x3b, 0x52, 0x61, 0xef, 0xb1, 0x71, 0xeb, 0x10, 0x8e, 0x54, 0xf1, 0xe1
db 0x66, 0xe7, 0x90, 0xd3, 0x88, 0xc1, 0x78, 0x2c, 0x11, 0xbb, 0xe7, 0xd0, 0x33, 0x45, 0xdb, 0x57
db 0x34, 0x26, 0xbf, 0x9b, 0x44, 0x95, 0x90, 0xe2, 0xca, 0x6b, 0x0f, 0xc2, 0xa9, 0x0a, 0x93, 0x6c
db 0x29, 0x65, 0xa1, 0x1e, 0xb4, 0x2b, 0xf3, 0xe7, 0x00, 0x9e, 0x47, 0xd7, 0x6a, 0x61, 0x5a, 0x68
db 0xe0, 0x45, 0x5d, 0xf6, 0x2f, 0x6f, 0x94, 0x2a, 0xfa, 0x35, 0x74, 0xcc, 0x39, 0x6a, 0x2b, 0xa7
db 0x33, 0xfd, 0xe5, 0xc1, 0x83, 0xbc, 0x2e, 0xa1, 0xa1, 0xc3, 0x44, 0xaa, 0x72, 0x07, 0x8c, 0xa2
db 0x90, 0xdc, 0x96, 0xde, 0xfc, 0x79, 0xb2, 0x3d, 0x70, 0xdd, 0xbe, 0xd8, 0x43, 0x86, 0xd0, 0xee
db 0x12, 0x2f, 0x60, 0xa1, 0xa8, 0x88, 0x0e, 0xdb, 0x21, 0xca, 0x6c, 0xab, 0xd0, 0xc6, 0xa2, 0x77
db 0xb2, 0x97, 0x37, 0x62, 0x32, 0xf5, 0x70, 0x47, 0x1d, 0x22, 0x08, 0x23, 0x9c, 0x4d, 0xd0, 0x7b
db 0x65, 0x46, 0x3d, 0x1d, 0x35, 0xef, 0xf7, 0xb3, 0x13, 0x54, 0xb8, 0xf4, 0x37, 0x26, 0x77, 0x84
db 0x9c, 0xa2, 0x7e, 0x26, 0x2a, 0x0e, 0xbd, 0x39, 0x9f, 0x0f, 0x3b, 0xcb, 0xdf, 0x5a, 0xa8, 0xc0
db 0x50, 0xff, 0x91, 0xd1, 0x81, 0xe0, 0xee, 0x83, 0x07, 0x32, 0x5d, 0xab, 0x31, 0x4c, 0xfe, 0x21
db 0x5f, 0x3b, 0xaa, 0x46, 0xdd, 0xf2, 0x9f, 0x5a, 0x57, 0x34, 0x54, 0x86, 0x3e, 0x1e, 0xda, 0xbe
db 0x78, 0xcb, 0x08, 0xda, 0xc9, 0x76, 0x28, 0x1f, 0xfe, 0x6f, 0x05, 0xd5, 0x7d, 0x4e, 0xbb, 0xf6
db 0x2d, 0x11, 0x2f, 0x9d, 0x4c, 0x05, 0xe6, 0xc5, 0xdc, 0xcd, 0x95, 0x7d, 0x31, 0xc5, 0xb9, 0x0e
db 0xdf, 0x83, 0x30, 0x8b, 0xba, 0xfd, 0xf5, 0xaa, 0x5c, 0xe0, 0x3e, 0xa6, 0xdd, 0x51, 0xf2, 0xb2
db 0xcb, 0x3e, 0x67, 0x8d, 0x59, 0x62, 0x6d, 0x58, 0x92, 0xbe, 0x95, 0xb7, 0x9f, 0x0c, 0x49, 0x3f
db 0xaf, 0x3c, 0x45, 0x76, 0xf6, 0xeb, 0x9b, 0x42, 0x01, 0x2e, 0xd4, 0x8a, 0x9f, 0x92, 0xca, 0xdb
db 0xd3, 0x55, 0x5e, 0x92, 0x01, 0x85, 0x63, 0xa2, 0x61, 0x62, 0x30, 0x64, 0xa2, 0xb2, 0x9e, 0x11
db 0xb9, 0xb1, 0x4d, 0x4b, 0x90, 0x1c, 0xea, 0xd2, 0xd1, 0x3d, 0x11, 0xe1, 0xe1, 0x66, 0xb9, 0x2f
db 0x5f, 0x3c, 0x0a, 0x9a, 0xba, 0x6b, 0x0f, 0x47, 0x54, 0x5d, 0xfc, 0x11, 0x2f, 0x42, 0xdf, 0x7f
db 0x22, 0x60, 0x18, 0x75, 0x05, 0x1d, 0x25, 0x23, 0x75, 0x8d, 0x77, 0x89, 0x03, 0xb7, 0x74, 0x1a
db 0xff, 0xaa, 0x2b, 0xb0, 0x5d, 0x9a, 0xa6, 0x3f, 0xd0, 0x6b, 0xe6, 0x28, 0xae, 0xf9, 0xe6, 0x5a
db 0xbb, 0x3d, 0x0a, 0x94, 0x5f, 0x9b, 0x96, 0x3b, 0xfb, 0x38, 0x8f, 0x93, 0x6f, 0xf2, 0x1e, 0x5c
db 0xc5, 0x8b, 0x44, 0xf7, 0x26, 0xc6, 0x3e, 0xf6, 0x60, 0xbf, 0x54, 0xb4, 0x21, 0xfb, 0xae, 0x80
db 0xdb, 0x1a, 0xd7, 0xa1, 0xb9, 0x1b, 0xc7, 0x8d, 0xb4, 0x11, 0x3e, 0x2b, 0x19, 0xb3, 0xfb, 0xb0
db 0xb3, 0xe0, 0xc7, 0x35, 0x34, 0x80, 0xf0, 0xee, 0xf5, 0x46, 0x66, 0x93, 0x36, 0xd7, 0x0f, 0x44
db 0xb3, 0xe0, 0x15, 0x6a, 0xc7, 0xb1, 0x1e, 0xc4, 0x68, 0x73, 0xe5, 0x43, 0x2e, 0xa6, 0x26, 0xdf
db 0xe3, 0x35, 0x8f, 0xa5, 0x96, 0x52, 0xb8, 0x38, 0xfc, 0x01, 0x76, 0x92, 0xb7, 0x89, 0x09, 0x0a
db 0x88, 0x89, 0x20, 0x7f, 0x0f, 0x84, 0xb6, 0x44, 0x22, 0xa8, 0x27, 0xbf, 0x97, 0x65, 0x3e, 0xa9
db 0x92, 0x64, 0x42, 0xfb, 0xa8, 0xd7, 0xf3, 0x26, 0x28, 0x2c, 0xb6, 0x59, 0x66, 0x80, 0xe7, 0x49
db 0x1c, 0x12, 0x4e, 0x46, 0xfe, 0xf6, 0x3f, 0xdb, 0x79, 0x30, 0x6a, 0x99, 0xf3, 0x8a, 0x9e, 0x26
db 0x4f, 0x62, 0x65, 0xde, 0x9b, 0xb9, 0x03, 0x1c, 0x1e, 0xcf, 0x5c, 0x14, 0x63, 0x3e, 0xa4, 0x18
db 0x99, 0x79, 0x38, 0xbc, 0x96, 0x78, 0xd9, 0xd1, 0x51, 0x0e, 0xe5, 0xb0, 0x6e, 0xaf, 0xa2, 0xd0
db 0x60, 0x4e, 0x71, 0xc9, 0xeb, 0x8a, 0x86, 0xb6, 0x8a, 0x71, 0x66, 0x54, 0x99, 0xd1, 0x52, 0x48
db 0x53, 0x0a, 0x0a, 0xce, 0x84, 0x7a, 0xb8, 0xff, 0x5b, 0x44, 0x4c, 0x21, 0x7a, 0x47, 0xe3, 0xc9
db 0xb5, 0x0d, 0xdb, 0xe7, 0x1b, 0x7a, 0xf9, 0xde, 0x22, 0xb4, 0xf1, 0x64, 0x64, 0x55, 0x79, 0x3b
db 0x84, 0xc1, 0xdb, 0xd9, 0x1a, 0x57, 0x4f, 0x6d, 0x37, 0x4c, 0xe5, 0x36, 0x63, 0xff, 0x0c, 0x2b
db 0x11, 0xa1, 0x12, 0x46, 0x2f, 0x11, 0x70, 0x63, 0x7a, 0x5b, 0x5d, 0xd0, 0x55, 0x40, 0xab, 0xf8
db 0xd5, 0x99, 0xfe, 0x79, 0xfa, 0x4e, 0x35, 0xe2, 0xd7, 0x0f, 0x4d, 0x0e, 0xf9, 0xc1, 0x40, 0x63
db 0x19, 0x5e, 0xb9, 0x2f, 0x65, 0x4d, 0xee, 0x02, 0x1b, 0xcf, 0x5f, 0x2a, 0xb8, 0x5e, 0x2b, 0xdb
db 0x2e, 0x18, 0xaf, 0xc4, 0x49, 0xee, 0xef, 0x36, 0x42, 0xe8, 0xbc, 0x21, 0x0b, 0x07, 0xdb, 0x70
db 0x0e, 0xc2, 0xfa, 0x6e, 0x39, 0x2c, 0xf6, 0xbc, 0xde, 0x87, 0x69, 0x92, 0x45, 0x80, 0x35, 0x14
db 0xba, 0xb2, 0x08, 0x69, 0x26, 0xec, 0x78, 0x17, 0x1c, 0x57, 0x86, 0x8c, 0xe3, 0x58, 0x85, 0x09
db 0x26, 0xb5, 0xd6, 0x8c, 0x9d, 0x61, 0x04, 0x87, 0x00, 0x1b, 0x32, 0x81, 0x46, 0xa8, 0x43, 0x98
db 0xdd, 0x36, 0xa0, 0xff, 0x9f, 0x13, 0x1b, 0x9e, 0x7a, 0x20, 0x4f, 0xa8, 0x0e, 0xd3, 0xdb, 0x31
db 0x1d, 0x07, 0x6a, 0x7e, 0x10, 0xc1, 0x59, 0x0b, 0x96, 0xf7, 0x98, 0x0c, 0x8f, 0xb7, 0x87, 0x10
db 0x91, 0xe7, 0xf0, 0xa3, 0x98, 0x28, 0xce, 0xfd, 0x52, 0xa5, 0xdd, 0xe2, 0xf7, 0xbe, 0x7a, 0x73
db 0xeb, 0x9a, 0xac, 0x7f, 0x53, 0xe9, 0x89, 0xb9, 0x96, 0xba, 0x32, 0x4b, 0x2d, 0xe2, 0x13, 0x00
db 0xed, 0xac, 0x6d, 0x97, 0x62, 0xa0, 0x9b, 0x3a, 0x36, 0x8f, 0x54, 0xf6, 0x0d, 0x46, 0x10, 0xca
db 0x12, 0xb9, 0x81, 0x21, 0x47, 0xf5, 0x20, 0x41, 0xf1, 0x29, 0x91, 0x59, 0xe8, 0x3a, 0x76, 0xb2
db 0xf2, 0x3c, 0xad, 0xba, 0x76, 0xa3, 0x93, 0x84, 0x8c, 0xdb, 0x5f, 0xe0, 0x45, 0x25, 0xcc, 0xc8
db 0x4f, 0x6a, 0x46, 0xa0, 0x1e, 0xa7, 0xd4, 0x88, 0xa5, 0xed, 0x50, 0xde, 0x01, 0xe1, 0xde, 0xe0
db 0x00, 0xa9, 0x07, 0x02, 0x2e, 0xcf, 0xb6, 0x1a, 0x31, 0x43, 0x40, 0xc2, 0x35, 0x17, 0x66, 0x4d
db 0x95, 0xaf, 0x26, 0x9b, 0x18, 0x19, 0xa9, 0x30, 0xb6, 0x7b, 0xb0, 0xb1, 0x26, 0x29, 0x47, 0x45
db 0xd5, 0x2a, 0xa1, 0xac, 0x82, 0x50, 0x74, 0x44, 0x8c, 0x9e, 0xc3, 0x1f, 0x19, 0x26, 0xc5, 0x67
db 0x69, 0xe2, 0x0e, 0xfd, 0x9e, 0xa2, 0x4f, 0x50, 0xed, 0x9f, 0x94, 0x89, 0x9a, 0x73, 0x54, 0xec
db 0x77, 0x22, 0x53, 0xa2, 0xce, 0xa4, 0xf4, 0x6e, 0x7c, 0x6e, 0xda, 0xeb, 0xaf, 0x6f, 0xc9, 0x11
db 0xa9, 0x7d, 0x2a, 0xbb, 0x00, 0x22, 0x99, 0x10, 0xf7, 0x67, 0x4f, 0x2f, 0x2f, 0xfc, 0x79, 0xb6
db 0x83, 0x78, 0x19, 0xe3, 0x44, 0xdb, 0x66, 0xbf, 0xe8, 0xab, 0x32, 0xa5, 0x57, 0x85, 0x8c, 0xae
db 0xae, 0x9d, 0x8b, 0xbb, 0x6e, 0xde, 0xab, 0x6c, 0x64, 0xac, 0xca, 0xd0, 0x4b, 0x1c, 0x1c, 0x30
db 0x9e, 0x88, 0x60, 0x75, 0x0c, 0x42, 0x84, 0x41, 0x76, 0xcd, 0x82, 0xef, 0x80, 0x79, 0x0f, 0xeb
db 0xa3, 0xb8, 0xad, 0x1d, 0x45, 0x4b, 0x0e, 0xd9, 0x07, 0x48, 0x7d, 0x82, 0xd8, 0xf7, 0xcc, 0x2f
db 0x7a, 0x0e, 0xb1, 0x82, 0x95, 0x6e, 0x74, 0x78, 0xe8, 0x0c, 0xb5, 0x12, 0x26, 0x72, 0x55, 0x8b
db 0x0e, 0x29, 0x86, 0xd3, 0xce, 0x88, 0xa6, 0x29, 0xe7, 0x18, 0x9b, 0xbc, 0x13, 0x90, 0x89, 0xf6
db 0xa5, 0x20, 0x70, 0x3b, 0xbd, 0x25, 0x9c, 0x47, 0xb8, 0x9a, 0xd7, 0x72, 0xb5, 0x55, 0xe5, 0x92
db 0x98, 0x89, 0x0f, 0x12, 0xe1, 0xe1, 0x0c, 0x2c, 0x7c, 0xca, 0x27, 0x6e, 0x9b, 0x13, 0x9d, 0xc0
db 0x51, 0xa5, 0x10, 0x31, 0xda, 0xe5, 0x40, 0x7c, 0x3a, 0x67, 0x14, 0xa5, 0x92, 0x99, 0xa0, 0xf2
db 0xa6, 0xba, 0x61, 0xa9, 0x28, 0x2d, 0x10, 0xa8, 0x23, 0x09, 0x26, 0x18, 0x13, 0x6a, 0x87, 0x40
db 0x0b, 0x6c, 0xd3, 0x62, 0x55, 0xdd, 0xa4, 0x7e, 0x2a, 0x70, 0x22, 0xa0, 0x3c, 0x51, 0xcb, 0x98
db 0x26, 0xcf, 0xad, 0xf8, 0xc3, 0x9e, 0xb1, 0x7a, 0xc0, 0xd5, 0xd4, 0xaf, 0xea, 0x8a, 0xfb, 0x03
db 0x1f, 0xae, 0x8c, 0x9b, 0xa3, 0xd2, 0x20, 0xe2, 0xdf, 0xb3, 0xea, 0x97, 0xe9, 0xce, 0x3a, 0xa1
db 0xed, 0xa3, 0x97, 0xd5, 0xe2, 0xa6, 0x47, 0x9d, 0x50, 0x9e, 0xac, 0xdf, 0xd1, 0xbe, 0x32, 0x7b
db 0xd6, 0xaf, 0xe3, 0x9f, 0x4d, 0xbc, 0x58, 0x30, 0x33, 0xd8, 0xcd, 0xd8, 0x5b, 0xfa, 0x5c, 0xdd
db 0x5f, 0x0e, 0x79, 0x1f, 0x1d, 0x0b, 0xd9, 0x9c, 0xb8, 0x78, 0x27, 0x89, 0x39, 0x48, 0xf9, 0x45
db 0xe4, 0xa5, 0x8a, 0x2f, 0xc5, 0xf8, 0xe5, 0xef, 0xef, 0x89, 0x9d, 0x24, 0x2a, 0xf4, 0x7e, 0x60
db 0x38, 0xe1, 0x25, 0x67, 0x87, 0x68, 0x8b, 0x25, 0xfe, 0xb2, 0x20, 0x02, 0x14, 0x04, 0xb6, 0xba
db 0xf7, 0x9c, 0xe8, 0x01, 0x07, 0x8e, 0x0f, 0x21, 0x4c, 0xaf, 0x81, 0xcd, 0x17, 0xb2, 0xae, 0x6c
db 0xbb, 0xfb, 0xb9, 0x8f, 0x8b, 0xc8, 0x07, 0x36, 0x26, 0x0c, 0x40, 0x6f, 0xa9, 0x1e, 0x69, 0x2a
db 0x5a, 0xde, 0x40, 0xb4, 0x00, 0xfa, 0x0f, 0x2e, 0xd9, 0xc4, 0xc9, 0xc7, 0x72, 0xbb, 0xed, 0x26
db 0x0a, 0xf4, 0x81, 0xf7, 0xc6, 0xc6, 0x69, 0xd7, 0xd0, 0x63, 0x97, 0xed, 0x70, 0x2a, 0xa3, 0x30
db 0x48, 0x69, 0x0a, 0xf3, 0xae, 0x0f, 0x92, 0x08, 0x2a, 0xce, 0x55, 0x4e, 0xf2, 0x4b, 0x3e, 0xc8
db 0x98, 0x15, 0x0a, 0x94, 0x21, 0xe9, 0xff, 0xca, 0x45, 0x82, 0x49, 0x08, 0x47, 0xaa, 0x0a, 0xc2
db 0x75, 0x6a, 0xca, 0x28, 0xa1, 0x0a, 0x5a, 0x24, 0xb3, 0xb3, 0x3c, 0xd9, 0xd6, 0xb2, 0x30, 0x75
db 0xc4, 0x6e, 0x85, 0x58, 0x92, 0x44, 0x0c, 0x80, 0xd9, 0x17, 0x94, 0xcb, 0x22, 0x4a, 0xd9, 0x34
db 0xe4, 0x15, 0x0b, 0xf1, 0xbb, 0x33, 0x8c, 0x77, 0x04, 0x0e, 0x90, 0x48, 0x37, 0x76, 0x79, 0x2c
db 0x92, 0xcd, 0xba, 0xa4, 0x74, 0x42, 0x9f, 0x09, 0x7d, 0xa8, 0xc2, 0xd4, 0x36, 0xe3, 0xf3, 0x14
db 0x30, 0xcf, 0xef, 0x5c, 0x41, 0x12, 0x4d, 0xb2, 0x4d, 0xa5, 0xad, 0xf7, 0xe8, 0xba, 0xf9, 0xb6
db 0xd3, 0x03, 0xe3, 0xb5, 0x11, 0x45, 0x43, 0x64, 0x31, 0xff, 0x32, 0xc3, 0xe0, 0xe4, 0x17, 0xe2
db 0xf3, 0xaf, 0x90, 0xce, 0x12, 0xe9, 0x33, 0x90, 0x80, 0xc8, 0x5d, 0x3e, 0x5c, 0xc0, 0x33, 0x7f
db 0x04, 0x3a, 0x2e, 0xaf, 0x76, 0x70, 0x76, 0x0e, 0x41, 0x81, 0xf2, 0xde, 0x48, 0xc8, 0x65, 0x45
db 0xbe, 0x02, 0x12, 0xa9, 0x0c, 0xf9, 0x5e, 0xfc, 0xfe, 0xaa, 0xc1, 0x1a, 0x22, 0x76, 0x19, 0xd5
db 0x76, 0xf4, 0xe6, 0xdd, 0xb3, 0xa1, 0xc9, 0x64, 0x5b, 0x80, 0xd2, 0x54, 0x2c, 0xf6, 0x1b, 0xa7
db 0x2e, 0xa1, 0x7b, 0xdb, 0x6f, 0xea, 0xd7, 0x8e, 0x13, 0x27, 0x1a, 0xbd, 0x4a, 0x09, 0x47, 0xf3
db 0xa0, 0x56, 0x13, 0x95, 0x83, 0x12, 0xe8, 0x9a, 0x7c, 0xf2, 0x84, 0xbb, 0x09, 0xed, 0x91, 0x06
db 0xbc, 0x38, 0xf6, 0xfd, 0xda, 0xbd, 0x93, 0xff, 0xe3, 0x8d, 0xb2, 0x0f, 0xe9, 0x2c, 0x9d, 0x51
db 0xba, 0x08, 0xdc, 0x01, 0x9a, 0x15, 0x34, 0x7e, 0x1f, 0x3a, 0x79, 0xcc, 0x89, 0xfe, 0x9a, 0x73
db 0xd1, 0x45, 0x1d, 0x72, 0xf8, 0xd9, 0x88, 0x6d, 0x3c, 0xf6, 0xb8, 0x09, 0xdf, 0x4f, 0x6e, 0x63
db 0x5d, 0x4d, 0xa9, 0x22, 0xcf, 0x8a, 0x66, 0xb0, 0xb1, 0x82, 0x69, 0x83, 0xd6, 0x1b, 0x4a, 0x69
db 0xad, 0x1b, 0x31, 0x18, 0x1a, 0xc8, 0x7e, 0x90, 0x55, 0x37, 0x69, 0x59, 0x1c, 0x72, 0xe3, 0x91
db 0x41, 0x32, 0xcc, 0xad, 0xf0, 0xa7, 0x2c, 0xfc, 0xed, 0x0f, 0x68, 0x7c, 0x3a, 0x41, 0x8f, 0x82
db 0xb6, 0xe4, 0x7f, 0xe9, 0xb5, 0xcf, 0x19, 0xbf, 0xae, 0xce, 0x1c, 0x87, 0xd3, 0x76, 0x31, 0xf7
db 0xc5, 0x29, 0x0d, 0x75, 0x34, 0x36, 0xdc, 0x4a, 0x4d, 0x7f, 0x0e, 0xce, 0x50, 0x95, 0x62, 0x80
db 0xc6, 0x19, 0xd9, 0x2f, 0xa3, 0x0b, 0x16, 0xe8, 0xf6, 0xa3, 0x30, 0xbf, 0xdb, 0x41, 0x04, 0x05
db 0xb7, 0x10, 0x3d, 0xef, 0x15, 0x78, 0x82, 0xe1, 0xfa, 0x46, 0xe2, 0xd7, 0xb2, 0x4d, 0x0d, 0x9c
db 0x3a, 0x1e, 0xda, 0x32, 0xb2, 0x5d, 0x1a, 0x31, 0x84, 0x13, 0x74, 0xb7, 0x5b, 0x4b, 0xdf, 0x83
db 0xd9, 0x91, 0x53, 0xc1, 0xad, 0xa1, 0x19, 0x13, 0xb0, 0x3f, 0x97, 0xd6, 0x1b, 0xed, 0x5e, 0x29
db 0x87, 0xf8, 0xf4, 0x1f, 0xd6, 0x99, 0x9b, 0x14, 0xf6, 0xe9, 0x9a, 0x96, 0x19, 0x9d, 0xa0, 0xa2
db 0x63, 0xe1, 0x10, 0x2d, 0xa3, 0xdc, 0x65, 0x0a, 0x52, 0x40, 0x61, 0x8f, 0xd5, 0x45, 0x5c, 0x37
db 0x7e, 0xf6, 0x11, 0xb6, 0xd6, 0xf0, 0x92, 0x35, 0xb9, 0x2e, 0xa0, 0x36, 0x3c, 0x63, 0x88, 0x77
db 0xba, 0xb6, 0xc5, 0x8f, 0xef, 0x77, 0x2d, 0xc7, 0x06, 0xae, 0xf4, 0xd2, 0xd3, 0xa3, 0xce, 0x0d
db 0xce, 0x37, 0x5c, 0x3d, 0x10, 0x13, 0xf4, 0x2b, 0x94, 0x23, 0x54, 0x93, 0xe7, 0xd4, 0x07, 0x6c
db 0x81, 0x5e, 0xb6, 0xdd, 0x96, 0x3d, 0x47, 0x7a, 0xe8, 0xe5, 0xa8, 0x67, 0xed, 0x09, 0x1a, 0xcc
db 0xcb, 0x68, 0xad, 0xa7, 0x9e, 0xff, 0x69, 0x54, 0x5d, 0x17, 0x67, 0x50, 0x7c, 0x8f, 0x86, 0xbe
db 0xc5, 0x45, 0x09, 0x63, 0xc0, 0x3b, 0x3d, 0xcd, 0xb5, 0x66, 0x75, 0x45, 0x18, 0x1b, 0x83, 0xb2
db 0xb5, 0x70, 0x2c, 0xef, 0x09, 0x9d, 0x47, 0x7a, 0xb9, 0xe7, 0xd5, 0x04, 0x10, 0x96, 0x0b, 0x6b
db 0xd1, 0x71, 0xcc, 0x8d, 0x73, 0x90, 0x36, 0x92, 0x1f, 0x5b, 0x6b, 0xdf, 0x2d, 0x2c, 0x98, 0xd7
db 0x5d, 0x55, 0xfd, 0x4e, 0xad, 0x9b, 0x0b, 0xba, 0x0b, 0x68, 0xc4, 0xe0, 0xe8, 0x66, 0xe7, 0x91
db 0xcf, 0x0d, 0xe3, 0xc8, 0x1e, 0xde, 0x82, 0xff, 0x02, 0x43, 0xf4, 0xd3, 0x25, 0x53, 0x0b, 0x03
db 0x22, 0x76, 0x6a, 0xff, 0xcf, 0x1f, 0xd6, 0x4d, 0x21, 0x45, 0x11, 0xd3, 0x0b, 0x9c, 0x87, 0x68
db 0x98, 0xb8, 0x82, 0x4e, 0x6e, 0xc5, 0x4d, 0x8a, 0x7a, 0x9f, 0x19, 0x8a, 0xa1, 0xf7, 0xed, 0x79
db 0x56, 0xd6, 0x1b, 0x75, 0xba, 0xf2, 0x8a, 0x1d, 0x83, 0xe1, 0x6a, 0x08, 0x46, 0x0d, 0x4d, 0xb4
db 0xa1, 0xb8, 0x40, 0x24, 0x71, 0xe4, 0xd4, 0xb0, 0x4c, 0x32, 0x49, 0xec, 0x5a, 0x03, 0x95, 0x13
db 0xef, 0xbb, 0x5f, 0xee, 0x1d, 0x95, 0xb1, 0x60, 0xd2, 0xd7, 0x6b, 0x06, 0x8e, 0xf1, 0x96, 0x35
db 0xe0, 0x02, 0x4e, 0x50, 0xb2, 0x7e, 0xaa, 0x6b, 0xe7, 0xf8, 0xd0, 0xd2, 0x3e, 0x96, 0xde, 0x77
db 0xf0, 0xa8, 0xdd, 0x44, 0x00, 0x4b, 0xc1, 0x56, 0xd2, 0xe5, 0x9a, 0x1d, 0xa7, 0x8f, 0x08, 0x3e
db 0x1b, 0x14, 0xc9, 0x9e, 0x46, 0x7e, 0xf0, 0x51, 0x3c, 0x8c, 0x80, 0x21, 0xc6, 0x96, 0xc1, 0x9a
db 0xda, 0x41, 0xa5, 0xc8, 0xc8, 0x18, 0x63, 0x43, 0x20, 0xaf, 0x69, 0x28, 0xa5, 0x38, 0x13, 0xe0
db 0xf4, 0x2e, 0xdc, 0xed, 0x4a, 0x0c, 0x70, 0xc7, 0xcf, 0x45, 0x62, 0x6d, 0x9c, 0x17, 0x8d, 0xb0
db 0x59, 0xc7, 0x2c, 0xff, 0x4d, 0xa7, 0x5d, 0xbb, 0xfd, 0xa2, 0xcb, 0xe0, 0x08, 0x3d, 0x78, 0x35
db 0x53, 0x88, 0x7e, 0xe5, 0xf1, 0x4a, 0x5a, 0xb7, 0xfc, 0xbb, 0xf0, 0x25, 0x5c, 0x32, 0x56, 0x4a
db 0xc8, 0x0b, 0xd2, 0x74, 0x14, 0xe3, 0x8b, 0xc0, 0x1f, 0x78, 0x1a, 0x74, 0xb8, 0x17, 0x09, 0x7c
db 0x74, 0x5e, 0x3a, 0xa9, 0x04, 0x64, 0x7d, 0x89, 0x81, 0x8a, 0x97, 0xac, 0x61, 0x41, 0xbd, 0x20
db 0x51, 0x55, 0x12, 0x95, 0xc3, 0x57, 0xcd, 0x9a, 0x07, 0x66, 0x19, 0xce, 0x9e, 0xed, 0x77, 0x34
db 0xb2, 0x77, 0xd8, 0x3a, 0xbc, 0x59, 0xf1, 0x86, 0x99, 0x2f, 0x19, 0x51, 0x0f, 0xda, 0x59, 0x82
db 0x0a, 0xaa, 0x2e, 0xaa, 0x04, 0x00, 0x8d, 0xe4, 0xe2, 0x53, 0xdf, 0x5c, 0x6e, 0xfb, 0x72, 0x22
db 0x23, 0x6f, 0xe2, 0x4c, 0x74, 0x30, 0xc6, 0x62, 0x5c, 0x85, 0x61, 0x44, 0xf4, 0x9d, 0x07, 0x56
db 0x0c, 0x90, 0xb2, 0x35, 0x8a, 0xcd, 0x02, 0x54, 0x5a, 0x51, 0x40, 0xfa, 0xad, 0x19, 0x5c, 0xdc
db 0xf5, 0xe2, 0xb6, 0x10, 0x58, 0x10, 0x60, 0x1d, 0x04, 0xbd, 0x7c, 0x47, 0xaf, 0x28, 0x3d, 0x68
db 0x80, 0x06, 0xee, 0x4e, 0xbd, 0x90, 0x53, 0x90, 0x3f, 0x7e, 0xed, 0x99, 0x6f, 0x4f, 0xc3, 0xce
db 0x64, 0xa7, 0x9e, 0x5e, 0x85, 0xb3, 0x2f, 0x30, 0x2c, 0xc5, 0xa2, 0x5f, 0xa7, 0x6e, 0xb8, 0x4e
db 0x50, 0xb2, 0x06, 0x3e, 0xaf, 0x9d, 0x2c, 0x25, 0x29, 0x06, 0x4e, 0xd6, 0x4d, 0x58, 0x28, 0xe1
db 0x57, 0x45, 0xf0, 0x0c, 0x09, 0xcf, 0x24, 0x13, 0x83, 0x39, 0xc4, 0xf8, 0x17, 0x26, 0xdc, 0xba
db 0x19, 0x55, 0x83, 0xe9, 0xad, 0xad, 0x51, 0xbe, 0xc4, 0x36, 0x8e, 0xaf, 0x5b, 0x40, 0x4a, 0xfd
db 0x26, 0xfc, 0x42, 0x74, 0xe0, 0xfd, 0x9f, 0x9b, 0xda, 0x31, 0x78, 0x1a, 0x4e, 0x2e, 0xfc, 0x67
db 0x2e, 0x96, 0x54, 0xfe, 0xdd, 0xfe, 0x28, 0x1a, 0x0f, 0xf6, 0x59, 0x9a, 0x76, 0x92, 0xdd, 0x1e
db 0xd2, 0xb7, 0x28, 0xeb, 0x49, 0x72, 0xef, 0x2a, 0xc5, 0xb3, 0x2f, 0x0f, 0xcd, 0xe1, 0xa2, 0x47
db 0xc0, 0x6d, 0x31, 0x77, 0x45, 0xd7, 0x2c, 0x97, 0x83, 0x8d, 0x2a, 0x52, 0x58, 0xc1, 0x2b, 0x36
db 0x06, 0xff, 0x02, 0x0f, 0xba, 0xac, 0x98, 0x29, 0xda, 0x20, 0xf1, 0x2f, 0x45, 0x66, 0xa9, 0xe4
db 0x59, 0x01, 0xa4, 0x11, 0x2d, 0x2e, 0xf8, 0xd4, 0x7d, 0x00, 0x8f, 0xf4, 0xea, 0xfe, 0x4c, 0x12
db 0xd5, 0x12, 0x30, 0x8b, 0xcc, 0xb2, 0x1f, 0x23, 0xd2, 0x63, 0x71, 0x49, 0xb4, 0x76, 0x58, 0xec
db 0x90, 0x09, 0xd5, 0x46, 0x8b, 0xfe, 0x15, 0x97, 0x3d, 0x7e, 0x3f, 0x5f, 0x36, 0xa2, 0x79, 0xc2
db 0x15, 0xcd, 0x24, 0x8d, 0x06, 0x75, 0x3c, 0x79, 0x82, 0xe7, 0x0c, 0x2e, 0x76, 0x3a, 0xcf, 0xbe
db 0xaf, 0xd9, 0x2a, 0x5f, 0xf2, 0x61, 0x22, 0xd5, 0xe3, 0x24, 0xe1, 0xfe, 0x12, 0x3d, 0x03, 0x70
db 0x71, 0xe3, 0x49, 0xe9, 0x10, 0x73, 0x0d, 0x89, 0x5c, 0x53, 0x7c, 0xd2, 0xb4, 0x71, 0xf6, 0x16
db 0x93, 0x67, 0x06, 0x60, 0x13, 0x72, 0xcd, 0x00, 0xe5, 0x50, 0x4c, 0x01, 0xcf, 0x6b, 0x98, 0x6c
db 0xe0, 0xcd, 0x5a, 0xfb, 0x33, 0x3e, 0x52, 0x77, 0xf2, 0x34, 0xdd, 0xb2, 0x94, 0xbb, 0xb5, 0x13
db 0x3f, 0xfd, 0xea, 0x2e, 0xda, 0x8c, 0x02, 0xbb, 0xab, 0x50, 0xd7, 0xfc, 0x26, 0x3e, 0xda, 0x3b
db 0xab, 0x9a, 0xea, 0x6b, 0xb1, 0x64, 0xb8, 0x1a, 0x19, 0x8d, 0x31, 0xed, 0x9b, 0x92, 0xe1, 0x75
db 0x8a, 0xb9, 0x17, 0xb5, 0xe5, 0x29, 0x48, 0xe5, 0xbf, 0x80, 0x94, 0xc6, 0x1e, 0xcf, 0x5a, 0xd7
db 0x47, 0x6e, 0xc5, 0xe8, 0x12, 0x99, 0x48, 0x07, 0xe8, 0x9c, 0xde, 0x0b, 0x59, 0xc0, 0x0f, 0x06
db 0x75, 0x41, 0x61, 0x94, 0x28, 0x5f, 0x87, 0x56, 0xa4, 0x3f, 0x18, 0xf7, 0x93, 0xe0, 0xc9, 0x03
db 0x03, 0x6e, 0x44, 0x16, 0x70, 0xbc, 0x38, 0x7d, 0x4d, 0x17, 0x75, 0x65, 0x5d, 0x9c, 0x79, 0xc7
db 0x29, 0x56, 0x52, 0x4a, 0xfe, 0xef, 0x09, 0x65, 0x9a, 0xc3, 0x78, 0x8d, 0x89, 0xc2, 0x3f, 0x43
db 0x1d, 0xd6, 0x54, 0x59, 0xe4, 0x6f, 0xaf, 0x08, 0x90, 0xff, 0x20, 0xae, 0x87, 0x68, 0x7b, 0x4a
db 0x62, 0xd2, 0x93, 0x57, 0x08, 0xec, 0xbd, 0x5b, 0xde, 0xa0, 0x76, 0x01, 0x6a, 0x27, 0x2e, 0xc9
db 0xf1, 0x99, 0x7a, 0xa5, 0x08, 0x31, 0x85, 0x68, 0x35, 0x94, 0xe4, 0x56, 0x0b, 0x52, 0xdc, 0x5b
db 0x9c, 0x2f, 0xe8, 0x54, 0xb3, 0xee, 0x9d, 0x0e, 0x75, 0xb5, 0x5c, 0xcb, 0xb9, 0x7b, 0x88, 0xf8
db 0x0e, 0x56, 0x11, 0x6f, 0xb4, 0xa5, 0x4f, 0xf3, 0x01, 0xc6, 0xe7, 0xc6, 0xbf, 0x89, 0x88, 0x02
db 0xeb, 0xce, 0x57, 0xe3, 0x59, 0xf0, 0x12, 0xdf, 0x57, 0x4e, 0xb0, 0xba, 0x67, 0x70, 0x94, 0x1e
db 0xa3, 0xd3, 0x60, 0x21, 0xf8, 0xa6, 0xc9, 0xe7, 0x3e, 0x12, 0x93, 0x58, 0x8b, 0xd7, 0x0e, 0x94
db 0x6f, 0xcd, 0x8b, 0x09, 0x6d, 0xaa, 0x5b, 0xb2, 0x2c, 0x9f, 0x6f, 0x85, 0x93, 0xf1, 0xc9, 0xd0
db 0xd2, 0x04, 0xd1, 0x80, 0xe0, 0x11, 0x09, 0x0f, 0x7b, 0x88, 0xfe, 0x05, 0x9a, 0x53, 0x36, 0xac
db 0xd9, 0xb8, 0x45, 0x81, 0x51, 0x99, 0xeb, 0x8d, 0xf2, 0x4d, 0x30, 0x70, 0x62, 0xf4, 0xf9, 0x2f
db 0xf7, 0x52, 0xa1, 0xb2, 0xec, 0xa4, 0xbe, 0x1c, 0xd0, 0xfb, 0x1c, 0xe4, 0x3a, 0xbe, 0x5d, 0xee
db 0x60, 0xdb, 0x4a, 0x70, 0xec, 0x31, 0xc7, 0x89, 0x16, 0x95, 0x94, 0xde, 0x8c, 0x58, 0x3e, 0xad
db 0x58, 0x46, 0x9c, 0x68, 0x2a, 0x2d, 0x43, 0x37, 0x5c, 0xec, 0xf4, 0x03, 0x65, 0xf5, 0x3e, 0xa2
db 0x7d, 0xe0, 0x4d, 0x39, 0x34, 0x7f, 0xe2, 0xdb, 0x90, 0x66, 0xdc, 0xca, 0x2e, 0xf8, 0x3d, 0xd8
db 0x97, 0x55, 0x60, 0x76, 0xc9, 0x53, 0x71, 0xe2, 0xa3, 0xe1, 0x5d, 0x5f, 0x4c, 0xfe, 0x03, 0x2b
db 0x15, 0xd7, 0x2d, 0x5b, 0x56, 0xa0, 0xb5, 0x78, 0x44, 0x98, 0x1f, 0x52, 0xe1, 0x49, 0x13, 0x48
db 0xed, 0x83, 0xa1, 0xfb, 0x23, 0xd0, 0x73, 0x34, 0x5e, 0x1d, 0x2f, 0xa0, 0xc5, 0x1b, 0xd0, 0xe3
db 0x12, 0x28, 0x52, 0x00, 0xe6, 0x7c, 0x6d, 0x6e, 0x87, 0xe2, 0x42, 0x97, 0x1f, 0x5b, 0x2b, 0x93
db 0xba, 0x75, 0xe2, 0x8b, 0x3f, 0x62, 0x5b, 0xb5, 0x36, 0x76, 0x05, 0x1d, 0x2d, 0x7d, 0x29, 0x88
db 0x2d, 0x6a, 0xe7, 0xec, 0x3b, 0x8c, 0x88, 0x70, 0xb7, 0xf2, 0xbb, 0x61, 0xdb, 0xa2, 0x1a, 0x28
db 0xb0, 0x17, 0xdf, 0x18, 0xcc, 0x2a, 0x02, 0xdd, 0xd2, 0x4a, 0x10, 0x2c, 0x9d, 0x65, 0x58, 0x46
db 0x1b, 0xa0, 0xf7, 0x57, 0x7f, 0x76, 0x36, 0xd2, 0xb6, 0xc1, 0xb5, 0x93, 0x6a, 0xbf, 0x97, 0x22
db 0x9f, 0xe2, 0xe9, 0xdb, 0x25, 0xc3, 0x04, 0xf0, 0xed, 0x54, 0xc7, 0xc6, 0xab, 0x30, 0x36, 0xb8
db 0x3a, 0x24, 0xab, 0x10, 0x0c, 0x26, 0x45, 0xb6, 0xd9, 0xf3, 0x19, 0x6b, 0x65, 0xfd, 0x1d, 0x6d
db 0x52, 0x73, 0x14, 0x9d, 0x22, 0x1f, 0xb1, 0x8c, 0x3e, 0x43, 0xbb, 0xf1, 0xca, 0x0f, 0x84, 0x81
db 0xb5, 0xc5, 0xe3, 0xf6, 0xcd, 0xe0, 0x2c, 0xf6, 0x9e, 0x21, 0xaa, 0xc8, 0x18, 0x85, 0x17, 0x78
db 0x60, 0xb5, 0xeb, 0x9f, 0x2e, 0x5b, 0xbe, 0xe9, 0x87, 0x37, 0xa0, 0x9c, 0x51, 0x8a, 0xf4, 0x1a
db 0x17, 0xfd, 0x9d, 0xf0, 0x81, 0x34, 0x55, 0x74, 0x4b, 0x49, 0x86, 0x7b, 0xd1, 0x00, 0x21, 0x92
db 0x69, 0xdf, 0x80, 0x8b, 0xa4, 0xf0, 0x50, 0x24, 0xbf, 0x2e, 0x1f, 0x7e, 0x4f, 0x2a, 0x79, 0x65
db 0x09, 0xc5, 0x6a, 0x4c, 0x87, 0x3d, 0x4d, 0xe7, 0xcd, 0x6a, 0xdd, 0x1d, 0x89, 0xde, 0x0f, 0xe3
db 0xaa, 0xf4, 0x99, 0xf0, 0x17, 0x54, 0x2d, 0x81, 0x97, 0x5a, 0x4f, 0xae, 0x77, 0x2f, 0x2a, 0x93
db 0x68, 0xe8, 0x0a, 0xbc, 0xcf, 0x48, 0xe2, 0x40, 0xe2, 0x55, 0xdd, 0x5c, 0xa5, 0x50, 0x63, 0xea
db 0xb2, 0x53, 0xea, 0x8a, 0x51, 0xbe, 0x93, 0xa7, 0x02, 0xa5, 0x50, 0x18, 0x7c, 0x0e, 0x40, 0x27
db 0x9e, 0x5c, 0x68, 0x17, 0x37, 0xe7, 0x83, 0x8c, 0xc4, 0x31, 0x5a, 0xd1, 0x6e, 0xe4, 0xac, 0xd7
db 0x05, 0x34, 0x2b, 0xdb, 0xe6, 0x75, 0xed, 0xd0, 0xa5, 0x18, 0xe5, 0x7e, 0x78, 0x7e, 0x28, 0xfb
db 0x9c, 0x58, 0x5d, 0x92, 0x9f, 0xfb, 0xa1, 0x86, 0x6e, 0x96, 0x55, 0x60, 0xee, 0xf8, 0x98, 0x39
db 0x18, 0xa1, 0x60, 0x58, 0xc7, 0xf4, 0xf7, 0x5b, 0x30, 0x76, 0x65, 0xa4, 0x9c, 0x75, 0x79, 0x37
db 0x3a, 0xb3, 0x40, 0xde, 0xff, 0xb2, 0x8d, 0x50, 0xd6, 0x05, 0xc2, 0x22, 0x5f, 0xa9, 0x2e, 0xd9
db 0xc3, 0x5c, 0xe5, 0xae, 0xcd, 0xad, 0x73, 0xba, 0xdb, 0x04, 0xe9, 0x1f, 0xeb, 0x6f, 0xd4, 0x02
db 0x3c, 0xfb, 0x68, 0x52, 0x25, 0xd8, 0xb9, 0xba, 0xf3, 0x56, 0x0c, 0x4b, 0xda, 0x00, 0xb1, 0x7d
db 0xd7, 0x4a, 0x04, 0x5e, 0x5f, 0x2f, 0x49, 0x7f, 0xe3, 0x30, 0x18, 0xa2, 0x44, 0x3a, 0x56, 0x80
db 0x2d, 0xdd, 0x77, 0x4d, 0x48, 0x1d, 0x8b, 0x3a, 0x52, 0xf5, 0x63, 0x51, 0xb1, 0xe0, 0xe3, 0x7c
db 0xd2, 0x3d, 0x50, 0xce, 0xbd, 0xf3, 0x8e, 0xfd, 0xe3, 0x0a, 0x3c, 0x6d, 0x85, 0x4a, 0xd2, 0xa8
db 0xe7, 0x7a, 0xac, 0x50, 0x9c, 0xc7, 0x9f, 0x91, 0x5d, 0xc5, 0x66, 0x86, 0x00, 0x31, 0x72, 0x87
db 0x39, 0x22, 0xd0, 0x3a, 0xc7, 0xe3, 0x91, 0x9f, 0x02, 0x55, 0xa2, 0x0c, 0xb2, 0xca, 0x11, 0xec
db 0xb5, 0x22, 0xab, 0x55, 0x8e, 0x05, 0x43, 0xe3, 0xfe, 0xb3, 0xef, 0x1b, 0xfb, 0x8f, 0x79, 0x11
db 0xb3, 0x2a, 0x01, 0x13, 0xaf, 0xa7, 0x7a, 0xcd, 0x85, 0xc9, 0xc0, 0x37, 0x7b, 0x8f, 0x6e, 0xef
db 0x15, 0xe9, 0x7f, 0xf5, 0xf7, 0xee, 0x82, 0xa2, 0x91, 0xc4, 0xa1, 0x33, 0x28, 0xef, 0xf9, 0x47
db 0x31, 0xef, 0x1e, 0xf1, 0x25, 0x3d, 0x9b, 0x84, 0x7a, 0x75, 0x03, 0xb0, 0xbd, 0x96, 0xe5, 0xb2
db 0x12, 0x52, 0x6c, 0x10, 0x1c, 0x1b, 0x8e, 0xe2, 0xd5, 0x08, 0x99, 0x3f, 0x1d, 0x57, 0x12, 0x00
db 0x90, 0xf5, 0x21, 0x76, 0x3e, 0x51, 0xd9, 0x19, 0xb2, 0x41, 0x88, 0x47, 0xc3, 0x95, 0xfb, 0xe0
db 0x38, 0x87, 0xff, 0x52, 0xb4, 0x9d, 0xf2, 0xdf, 0x53, 0x2d, 0x4f, 0x55, 0x4b, 0x4d, 0xa3, 0x0d
db 0x45, 0x9f, 0xd7, 0x33, 0xd8, 0x11, 0xd9, 0x45, 0xd9, 0x13, 0x3a, 0xe5, 0xe6, 0xda, 0x5b, 0xc6
db 0xd4, 0xcb, 0x19, 0x63, 0xb0, 0x5d, 0x2e, 0x37, 0x97, 0x0b, 0x64, 0x09, 0x45, 0xd3, 0x78, 0x2d
db 0xbc, 0xc0, 0x79, 0x0d, 0xf5, 0x51, 0x9b, 0xeb, 0x9c, 0x28, 0xce, 0x14, 0x32, 0x70, 0xae, 0x14
db 0xa1, 0xd5, 0x4f, 0x9d, 0xbc, 0x79, 0x45, 0x2d, 0x89, 0x63, 0x4b, 0x27, 0x37, 0x94, 0x7f, 0xdb
db 0x2f, 0x39, 0xb8, 0x73, 0xaf, 0x3b, 0x4e, 0x97, 0x2f, 0xce, 0x83, 0x04, 0xb8, 0xa3, 0xb7, 0x38
db 0xe9, 0x08, 0x75, 0x35, 0x42, 0x1c, 0xe8, 0xc4, 0xff, 0xca, 0x13, 0xf9, 0x69, 0x53, 0x32, 0x83
db 0x29, 0xe6, 0x17, 0xe7, 0xf3, 0x01, 0xbf, 0xc0, 0xf1, 0xdf, 0x8b, 0x2d, 0x29, 0x5d, 0x68, 0xfb
db 0x18, 0x77, 0x2d, 0x98, 0x29, 0xa3, 0x44, 0x79, 0x47, 0xbe, 0xb9, 0xca, 0x18, 0x29, 0x16, 0xd9
db 0x41, 0x49, 0x55, 0x82, 0xf2, 0xc0, 0x42, 0x13, 0x68, 0x72, 0x75, 0x5f, 0xe4, 0x97, 0xae, 0x6d
db 0x90, 0xc0, 0x57, 0x05, 0x9e, 0x54, 0xa5, 0x1e, 0x4b, 0x6e, 0x46, 0x9d, 0x31, 0xce, 0x0a, 0xde
db 0xed, 0x71, 0x7b, 0x42, 0xa6, 0xf3, 0x6c, 0xc8, 0x21, 0x4a, 0x4f, 0x40, 0x67, 0x90, 0x2c, 0x92
db 0xb6, 0x7a, 0xdc, 0x9b, 0x0c, 0xb8, 0x38, 0x80, 0xf6, 0x77, 0x29, 0xe3, 0x59, 0xbc, 0xfa, 0xe2
db 0xdf, 0x95, 0x5c, 0xec, 0x7c, 0xac, 0x81, 0x5f, 0xd3, 0xcc, 0x50, 0x43, 0xad, 0xb0, 0x6c, 0xb3
db 0x9a, 0x4c, 0x37, 0x6c, 0x18, 0xf1, 0xa3, 0x71, 0x57, 0x39, 0x09, 0xf7, 0xe0, 0xdc, 0x93, 0xe2
db 0xba, 0xd6, 0x2a, 0x81, 0x67, 0x59, 0x36, 0x8e, 0xf1, 0x4b, 0xd5, 0xa6, 0x57, 0x84, 0x63, 0x1a
db 0xea, 0x2f, 0x2f, 0xd8, 0x9b, 0x64, 0x15, 0x6e, 0x32, 0x40, 0xe0, 0x98, 0x11, 0x5f, 0xc2, 0x47
db 0xa9, 0x08, 0x84, 0x0c, 0xa3, 0xea, 0xe6, 0x4b, 0xcc, 0x08, 0x27, 0x22, 0x9b, 0xf7, 0xb7, 0x6e
db 0xfb, 0x86, 0x04, 0x04, 0x26, 0x8d, 0x47, 0x04, 0xf5, 0x48, 0x1c, 0xac, 0xaa, 0x73, 0x46, 0x06
db 0x33, 0xcf, 0xd4, 0xb2, 0xc0, 0x90, 0x5a, 0xc6, 0x0e, 0xe0, 0x3d, 0xd1, 0xf4, 0x9a, 0x7b, 0xaa
db 0xdc, 0x00, 0x62, 0xec, 0xae, 0x0b, 0x59, 0xdf, 0x46, 0x8c, 0x37, 0x48, 0xdf, 0xb9, 0x51, 0xef
db 0x03, 0x3f, 0x14, 0x06, 0xe9, 0xa9, 0x04, 0x5e, 0x05, 0x42, 0x5c, 0x3c, 0x56, 0xe7, 0xdb, 0xeb
db 0x96, 0x19, 0xa5, 0x0d, 0xa9, 0xc5, 0xa6, 0x1e, 0x68, 0x43, 0x1d, 0x09, 0x5e, 0x95, 0x48, 0xaa
db 0x73, 0xe2, 0x6a, 0x65, 0x34, 0xef, 0xf8, 0x2d, 0x3a, 0xe6, 0x0e, 0x56, 0xcb, 0x9a, 0x7d, 0x31
db 0xf0, 0xa8, 0x73, 0xfc, 0x69, 0x22, 0xab, 0xbc, 0xcf, 0xe1, 0x05, 0xa4, 0xe8, 0x14, 0x4e, 0x96
db 0xe1, 0x0c, 0xa1, 0x8d, 0x94, 0x18, 0xa1, 0xa3, 0x71, 0x1f, 0x1f, 0xd2, 0x92, 0x9c, 0xa3, 0x49
db 0xc1, 0xa0, 0x2b, 0xd3, 0x48, 0x5c, 0x59, 0xd5, 0x19, 0xbb, 0xcf, 0x07, 0x68, 0x5e, 0xca, 0x33
db 0xb7, 0x42, 0x04, 0x28, 0x78, 0x3a, 0x5c, 0x12, 0xc6, 0x67, 0x0e, 0x98, 0x30, 0xdb, 0x7f, 0x5a
db 0x4e, 0xc3, 0xd6, 0x3a, 0xec, 0x4a, 0x53, 0x9e, 0x2b, 0x92, 0xe9, 0x5b, 0xef, 0x01, 0x73, 0x1b
db 0x8f, 0x20, 0x91, 0xdb, 0x01, 0x7e, 0x97, 0xff, 0x98, 0xc2, 0x44, 0xa7, 0x35, 0xe6, 0xc0, 0x41
db 0x55, 0x6c, 0xaf, 0x24, 0xd2, 0xac, 0x2f, 0x2b, 0x62, 0xa2, 0xc8, 0x64, 0xf7, 0x0a, 0xf4, 0x94
db 0x3e, 0x29, 0x11, 0xd0, 0x08, 0xac, 0x40, 0xab, 0x14, 0x36, 0xe8, 0xd6, 0x9a, 0xe9, 0xd2, 0xe2
db 0x1b, 0xa8, 0x80, 0x67, 0x45, 0x24, 0xe0, 0x7c, 0xaa, 0xc4, 0x17, 0x0b, 0xbe, 0xbc, 0x7f, 0x17
db 0x44, 0xf7, 0x7a, 0x3a, 0x62, 0x92, 0xfc, 0x66, 0x08, 0x17, 0x60, 0xc8, 0x57, 0x42, 0x8f, 0x4f
db 0xa3, 0x2d, 0xe0, 0x42, 0x22, 0xab, 0x70, 0x65, 0x8f, 0xad, 0xc8, 0x8a, 0x1d, 0xbc, 0x66, 0xc2
db 0xd4, 0xb0, 0x47, 0x58, 0xb8, 0xe2, 0xd8, 0x40, 0x25, 0x05, 0x65, 0x07, 0xfc, 0x31, 0x4b, 0xaf
db 0xba, 0x0a, 0x92, 0x1e, 0xd7, 0xd9, 0x7a, 0x25, 0x39, 0xda, 0x7a, 0xa5, 0x31, 0x77, 0xce, 0x17
db 0x56, 0x12, 0x5d, 0xcf, 0x56, 0x06, 0xeb, 0x76, 0x3a, 0x8b, 0xcb, 0x3b, 0xc1, 0x7b, 0x0b, 0x20
db 0xf7, 0x03, 0xf0, 0xe1, 0xca, 0x77, 0x35, 0x69, 0x84, 0x78, 0x0a, 0x94, 0x09, 0xcd, 0x5f, 0x49
db 0x77, 0x8a, 0xc1, 0xbc, 0x95, 0x38, 0x61, 0x37, 0x37, 0x9a, 0x3d, 0xb4, 0xe2, 0x1a, 0x80, 0x42
db 0xd2, 0x85, 0x59, 0xba, 0xc8, 0xd8, 0x0f, 0x30, 0x3f, 0xdd, 0xef, 0xbe, 0x80, 0xe5, 0x95, 0xc4
db 0x54, 0xb8, 0x8f, 0xc6, 0x60, 0x2c, 0x85, 0xf4, 0xc3, 0x33, 0x63, 0x8e, 0x50, 0x23, 0x4b, 0xee
db 0x13, 0x87, 0xd6, 0xd4, 0xba, 0xb9, 0xcf, 0x05, 0xaf, 0x6a, 0x27, 0x73, 0x56, 0x30, 0xd6, 0x83
db 0x9e, 0x0b, 0x47, 0x12, 0x4f, 0x5c, 0x34, 0xb6, 0x66, 0xc0, 0xd9, 0x90, 0xb2, 0xa6, 0x54, 0xbf
db 0x42, 0x5d, 0xf0, 0x02, 0x9f, 0x6b, 0x73, 0x90, 0xa9, 0x1f, 0x9f, 0xae, 0x83, 0x12, 0xfa, 0x82
db 0x5b, 0x43, 0xcd, 0xfb, 0xf9, 0x4b, 0xb8, 0x10, 0x4b, 0x8c, 0x33, 0xcb, 0xff, 0x75, 0x04, 0xd6
db 0x85, 0x0c, 0x43, 0x9b, 0x17, 0x9b, 0x36, 0x8a, 0xb7, 0x56, 0x1c, 0xe0, 0x7f, 0xed, 0x69, 0xda
db 0x86, 0x5f, 0x79, 0x41, 0xa3, 0x3e, 0x39, 0x4e, 0xad, 0x95, 0x60, 0x8e, 0x6a, 0x12, 0x9f, 0xa5
db 0xbf, 0xd4, 0x2a, 0x9c, 0x48, 0x93, 0xeb, 0x11, 0x92, 0x68, 0xc8, 0xef, 0xa6, 0xbc, 0xf6, 0x84
db 0xc2, 0xfd, 0x48, 0xbf, 0xbb, 0x80, 0x1f, 0x8f, 0xf0, 0xf5, 0xe3, 0x8d, 0xfb, 0x87, 0xe4, 0x6a
db 0x51, 0x13, 0x03, 0x15, 0x7d, 0x30, 0xb8, 0xad, 0x9f, 0x1f, 0x15, 0xf2, 0x40, 0xef, 0x21, 0xcc
db 0x71, 0xe5, 0x80, 0x5d, 0xc8, 0x33, 0xe3, 0xe1, 0x09, 0x7a, 0x66, 0xdd, 0xe2, 0x1d, 0xac, 0xa7
db 0xb4, 0xa7, 0x8e, 0x92, 0x49, 0xbf, 0x60, 0x2d, 0xa3, 0xa8, 0x1a, 0x21, 0x34, 0xcc, 0x29, 0x12
db 0x78, 0x89, 0xc3, 0x52, 0xe8, 0xbc, 0xba, 0x54, 0xbd, 0x19, 0x2e, 0xad, 0xd4, 0x48, 0x4d, 0x2f
db 0x1f, 0xe7, 0x98, 0xd9, 0x6d, 0xbb, 0x85, 0x48, 0x79, 0x81, 0xf2, 0x3d, 0x03, 0xf7, 0x16, 0x7b
db 0xf5, 0x72, 0x5a, 0xf6, 0x56, 0x4d, 0x03, 0x2b, 0x6d, 0xa1, 0xb1, 0x02, 0xa4, 0xf0, 0x43, 0x4b
db 0xbe, 0xec, 0x74, 0xe9, 0x43, 0x0c, 0xd4, 0xeb, 0x06, 0x49, 0x66, 0x27, 0x4b, 0xb9, 0xe8, 0xb3
db 0xaa, 0x60, 0xf8, 0x9b, 0x4f, 0x88, 0x66, 0xfe, 0x7d, 0xc0, 0x21, 0x22, 0x74, 0xcf, 0xe3, 0xf8
db 0xf4, 0xe3, 0x6d, 0x6c, 0xf9, 0x21, 0xc2, 0x86, 0x89, 0x63, 0x5b, 0x27, 0x71, 0x04, 0x03, 0x40
db 0xa9, 0xe6, 0xa3, 0x2f, 0x65, 0xb6, 0x5a, 0xdc, 0xdd, 0x4d, 0x92, 0xe1, 0x86, 0xf5, 0xc5, 0xc0
db 0x05, 0x3c, 0x5b, 0x35, 0xd4, 0x8c, 0x61, 0xed, 0xc7, 0x53, 0x63, 0x4e, 0x6a, 0xfe, 0x6f, 0x52
db 0x96, 0x50, 0x43, 0x99, 0x24, 0x14, 0xeb, 0xb6, 0x58, 0xce, 0xd9, 0xe0, 0x21, 0x64, 0x9c, 0xe4
db 0x05, 0x48, 0xf8, 0xeb, 0x12, 0x03, 0x54, 0x19, 0xf3, 0x17, 0x6c, 0x3b, 0x91, 0x02, 0xe4, 0xfc
db 0xb9, 0x24, 0xa6, 0xc1, 0xc9, 0x5e, 0x31, 0xdb, 0x03, 0x52, 0x91, 0xc8, 0x67, 0xaa, 0xe8, 0x1b
db 0x51, 0x83, 0xf6, 0x6d, 0xf9, 0xad, 0x45, 0x2e, 0x24, 0x9d, 0x1d, 0x80, 0xd5, 0xc0, 0xbf, 0xd5
db 0xca, 0x78, 0x3e, 0x53, 0x86, 0x97, 0xa9, 0xc5, 0xf8, 0x22, 0x41, 0x1a, 0x1b, 0x20, 0x53, 0xc5
db 0xc8, 0x4b, 0xff, 0x56, 0xb8, 0xfb, 0x61, 0x84, 0x83, 0xca, 0xac, 0x68, 0xcd, 0x56, 0x47, 0xa6
db 0x0d, 0x43, 0xfc, 0xbb, 0xaa, 0x60, 0xf1, 0xc4, 0xde, 0xc0, 0xe0, 0xa2, 0x88, 0x59, 0x68, 0x53
db 0x69, 0xba, 0x67, 0x21, 0x54, 0x18, 0x98, 0x51, 0x09, 0x35, 0xdd, 0x3c, 0xec, 0x35, 0x2f, 0xf0
db 0x6e, 0xcf, 0xba, 0x21, 0x04, 0x0a, 0xb4, 0x91, 0xdf, 0xf8, 0xb5, 0x9f, 0xb8, 0xc4, 0x97, 0xf8
db 0x4f, 0x10, 0xca, 0x5b, 0x4a, 0x43, 0x9e, 0x45, 0x0e, 0x51, 0x3f, 0x43, 0x96, 0x40, 0xb8, 0x70
db 0x66, 0xcb, 0xa1, 0x70, 0x5f, 0x4f, 0x9e, 0xb0, 0x2f, 0x03, 0x6f, 0x42, 0xcd, 0xf6, 0x3c, 0x90
db 0x2f, 0x9b, 0x83, 0x01, 0x41, 0x13, 0xa8, 0xf8, 0xbc, 0x19, 0x5c, 0x10, 0x09, 0xb9, 0xd4, 0x5d
db 0x0c, 0xa3, 0x51, 0xb0, 0x1a, 0xfd, 0x18, 0xbb, 0x67, 0x40, 0x4f, 0xc4, 0xab, 0x41, 0x8e, 0xbf
db 0x74, 0x0d, 0xa2, 0xa0, 0xf2, 0x48, 0x94, 0x5e, 0xa6, 0x63, 0xe4, 0xb3, 0x77, 0x2b, 0x79, 0x90
db 0xfe, 0x95, 0x1d, 0x92, 0x3a, 0x23, 0x8b, 0xf2, 0xa9, 0x87, 0x31, 0x9a, 0x06, 0x81, 0xb1, 0x6f
db 0xa6, 0x1e, 0x21, 0xb6, 0x1e, 0x0d, 0x9a, 0xf8, 0x3d, 0x38, 0x9d, 0xcc, 0x0c, 0x49, 0x83, 0x7a
db 0xd5, 0x7e, 0x5a, 0x98, 0xd7, 0xbe, 0x07, 0xf9, 0x79, 0x5e, 0x46, 0x89, 0x8e, 0xc9, 0xdf, 0x27
db 0xf9, 0xa4, 0xc6, 0x5f, 0x08, 0x45, 0x04, 0x2b, 0x62, 0xf5, 0x8e, 0x54, 0x57, 0xd0, 0xcd, 0xf4
db 0x51, 0x9e, 0x9a, 0xc8, 0x82, 0x65, 0x45, 0x8e, 0x49, 0xba, 0x28, 0x55, 0x3e, 0x58, 0xac, 0xf0
db 0xfb, 0xf2, 0xbd, 0x77, 0x9e, 0xc0, 0xb4, 0xf6, 0x9e, 0x52, 0x87, 0x5e, 0xc9, 0x41, 0x26, 0x01
db 0x64, 0x1f, 0x54, 0x53, 0x25, 0xb8, 0x2d, 0x91, 0xc9, 0x68, 0x30, 0xa0, 0x92, 0x37, 0x03, 0xac
db 0x36, 0xdc, 0x43, 0xb8, 0x2b, 0x29, 0x39, 0x8b, 0xfe, 0xda, 0xb6, 0xee, 0x3c, 0x7d, 0x4a, 0xc3
db 0x3e, 0xad, 0x45, 0xf4, 0xab, 0x67, 0xe3, 0x2b, 0xf5, 0xc4, 0xcf, 0x2f, 0xe3, 0xbd, 0x2e, 0x1c
db 0xac, 0xca, 0x37, 0xe3, 0x65, 0x9e, 0x6b, 0xc9, 0x94, 0x4f, 0x6d, 0x93, 0x0f, 0x0a, 0xc7, 0x19
db 0x69, 0xb3, 0x16, 0xe6, 0x70, 0xe7, 0xd0, 0xb1, 0xa9, 0x20, 0xae, 0x1d, 0xf3, 0x9c, 0xde, 0x5a
db 0xb2, 0x97, 0xdc, 0xd2, 0xc5, 0x91, 0x5f, 0x29, 0x2a, 0x47, 0x9b, 0x8a, 0x08, 0xd1, 0x5d, 0xf8
db 0x25, 0xf5, 0x36, 0x75, 0x69, 0x78, 0x8f, 0x78, 0xc5, 0x81, 0x97, 0x1a, 0x3f, 0x31, 0xc9, 0x70
db 0x3c, 0xe2, 0x04, 0x43, 0x41, 0xbd, 0x09, 0x10, 0x74, 0x95, 0x52, 0x53, 0x15, 0x15, 0xe0, 0xe9
db 0x3a, 0x4d, 0x11, 0x43, 0x90, 0xfc, 0x05, 0x0b, 0xd2, 0xbf, 0x3b, 0x14, 0xe5, 0x7e, 0x40, 0xa7
db 0x58, 0x4e, 0x2d, 0x6e, 0xa8, 0xc8, 0x73, 0xd0, 0x15, 0xa9, 0x8b, 0x28, 0x5d, 0xe0, 0x73, 0xac
db 0x8a, 0xdb, 0x28, 0xea, 0xdc, 0x97, 0xae, 0x75, 0x5b, 0xd5, 0x15, 0xe3, 0x7a, 0xc3, 0x39, 0x22
db 0x6c, 0x30, 0x43, 0x6a, 0xf8, 0x53, 0x1d, 0xd7, 0xee, 0x37, 0xd9, 0xe7, 0x56, 0x8c, 0x72, 0x8a
db 0xf1, 0x7d, 0xc9, 0x55, 0x57, 0x8d, 0x9e, 0xa5, 0xef, 0xa0, 0x72, 0xef, 0x3a, 0x28, 0x83, 0x56
db 0x2a, 0xd4, 0xa3, 0x40, 0x6c, 0x41, 0xe2, 0xaf, 0x20, 0xc6, 0xae, 0x52, 0x69, 0x1c, 0x14, 0xe7
db 0x9e, 0x61, 0xb0, 0x18, 0x7d, 0x37, 0xee, 0x74, 0xec, 0x4f, 0x6e, 0x7a, 0x37, 0xaa, 0x72, 0xbb
db 0x66, 0x92, 0x9c, 0x04, 0x01, 0x7d, 0x5d, 0xd3, 0xae, 0xc0, 0x5a, 0x62, 0x29, 0xd4, 0x27, 0x64
db 0xce, 0x99, 0x2c, 0x0d, 0xfc, 0x27, 0xfc, 0x39, 0x5c, 0xfe, 0xe4, 0x64, 0x0e, 0x6d, 0x09, 0x39
db 0x42, 0xf4, 0x6b, 0xa0, 0x6e, 0x65, 0xc8, 0xc5, 0xed, 0xa4, 0x25, 0xbd, 0x63, 0x87, 0x43, 0xd5
db 0x4a, 0xbb, 0x85, 0x52, 0x6b, 0x13, 0x25, 0xc9, 0x8e, 0xb3, 0xb2, 0xa7, 0xfd, 0x96, 0x86, 0xe7
db 0xbb, 0x75, 0x41, 0x24, 0x11, 0xf7, 0xf7, 0xd4, 0xf3, 0x97, 0xb3, 0x19, 0x49, 0x32, 0xc1, 0xd4
db 0xfd, 0xd9, 0x93, 0x9d, 0x80, 0x4b, 0xd1, 0xb8, 0x9c, 0x1b, 0xc7, 0xf9, 0x48, 0x47, 0x68, 0x84
db 0xbe, 0x74, 0xff, 0xef, 0xb5, 0x00, 0x6f, 0x04, 0xa0, 0x91, 0x1f, 0xae, 0x59, 0x06, 0x81, 0xd4
db 0x46, 0xe5, 0x4e, 0xd6, 0xf6, 0x69, 0x67, 0x26, 0x58, 0x86, 0x54, 0x8b, 0x0e, 0x2b, 0x07, 0x51
db 0xe5, 0x52, 0x47, 0x23, 0x50, 0xb1, 0x7c, 0x5b, 0xf0, 0xbc, 0x87, 0x9d, 0x30, 0xff, 0x27, 0xd8
db 0x86, 0xce, 0x9a, 0x13, 0x99, 0x0e, 0xa7, 0x2d, 0xa0, 0x86, 0xae, 0xc8, 0x46, 0x4d, 0x6a, 0xc9
db 0x4d, 0xab, 0x5f, 0x6d, 0xb2, 0xae, 0xdf, 0x1a, 0x0e, 0xa9, 0x10, 0x37, 0xe9, 0xf7, 0xef, 0x29
db 0xea, 0xe1, 0x39, 0xaf, 0xbc, 0xb1, 0xd0, 0x4f, 0x33, 0x69, 0xe7, 0x79, 0xdf, 0xfb, 0x58, 0x51
db 0x95, 0x3e, 0xc1, 0x5b, 0x53, 0x49, 0xbf, 0x85, 0xf5, 0xde, 0x32, 0x3b, 0xf0, 0x25, 0x46, 0x2d
db 0xb4, 0x49, 0xbf, 0x63, 0x93, 0x9c, 0xac, 0x32, 0xfb, 0xc2, 0x03, 0x5e, 0x45, 0x77, 0x41, 0xf9
db 0x38, 0x1e, 0x3c, 0xa0, 0xf6, 0x02, 0x08, 0xe8, 0x19, 0xf4, 0x13, 0xb3, 0x96, 0x15, 0xe8, 0x46
db 0xe7, 0x50, 0x5c, 0x8b, 0x79, 0x5c, 0x36, 0x72, 0x2f, 0x77, 0x0a, 0x68, 0x7a, 0x2f, 0x06, 0x01
db 0x29, 0x1c, 0xd3, 0x76, 0xf2, 0x32, 0x8e, 0xea, 0x55, 0x47, 0xd9, 0xe0, 0xc6, 0x1d, 0x87, 0xdb
db 0x9e, 0xd7, 0xe8, 0xb2, 0x18, 0x60, 0x08, 0x88, 0xc4, 0xdf, 0x7d, 0x0b, 0xfd, 0xfc, 0xb2, 0xeb
db 0x17, 0xcd, 0xf0, 0x79, 0xe1, 0xc1, 0x6e, 0x74, 0xc2, 0x64, 0x17, 0xa5, 0x8b, 0xca, 0x20, 0x95
db 0x3d, 0xc7, 0xe5, 0x30, 0x3d, 0x32, 0x80, 0xd5, 0x32, 0x74, 0x30, 0xf1, 0x09, 0x56, 0x79, 0x22
db 0x07, 0x48, 0x03, 0x55, 0x3c, 0x19, 0xec, 0x88, 0xa2, 0xe6, 0xa2, 0x98, 0x13, 0x7b, 0x14, 0x3f
db 0xa6, 0x8f, 0xef, 0xbc, 0xfc, 0x4e, 0x6e, 0x25, 0x7e, 0xcc, 0x54, 0x5f, 0x9f, 0x16, 0xe5, 0xb5
db 0x24, 0xcc, 0xe8, 0xde, 0xfb, 0x36, 0xad, 0xbe, 0x00, 0x34, 0x32, 0x6c, 0x78, 0xcf, 0xc7, 0x2a
db 0xb0, 0xef, 0x07, 0x67, 0x73, 0x59, 0x4b, 0xc4, 0x58, 0x47, 0xbf, 0x19, 0x0f, 0x5f, 0x87, 0xbf
db 0x14, 0x2c, 0xa1, 0xc7, 0xa1, 0x48, 0xe2, 0x3a, 0x48, 0x1b, 0x2c, 0x0f, 0x52, 0xe0, 0x43, 0x9a
db 0x32, 0x10, 0x0a, 0xdb, 0x27, 0x30, 0x01, 0x55, 0x89, 0x1c, 0x5e, 0xc3, 0x10, 0xf2, 0x48, 0x78
db 0xd7, 0xf7, 0xba, 0xa4, 0x67, 0xda, 0xb4, 0x16, 0x99, 0x44, 0x41, 0x26, 0x85, 0xb6, 0x45, 0xec
db 0x0a, 0x26, 0xf4, 0xfc, 0x3b, 0x6a, 0x55, 0x1d, 0x62, 0xc9, 0xa4, 0x3b, 0x20, 0x54, 0x7c, 0x3e
db 0xb6, 0xd3, 0x88, 0xe5, 0x54, 0xb5, 0xeb, 0xd1, 0x30, 0xb9, 0x98, 0xda, 0x4d, 0x2c, 0x81, 0xf3
db 0x4b, 0x54, 0x3a, 0x24, 0x59, 0x4f, 0x13, 0x58, 0x7f, 0xa0, 0x5b, 0x1f, 0xd6, 0x0a, 0x7a, 0xb5
db 0x36, 0x6b, 0x2f, 0xaa, 0x53, 0xda, 0xe6, 0xa8, 0x61, 0x83, 0x16, 0x3c, 0xa6, 0x81, 0x78, 0xd2
db 0x77, 0x0a, 0x30, 0x76, 0x0e, 0x56, 0x6a, 0x66, 0xa6, 0x5c, 0x91, 0xc8, 0xec, 0xa2, 0x99, 0xa0
db 0x67, 0x5b, 0x21, 0x37, 0x38, 0x01, 0x27, 0x27, 0x8b, 0x1c, 0xd0, 0xe4, 0x91, 0x43, 0xe0, 0xcb
db 0xe5, 0x4b, 0xa0, 0xe7, 0x1c, 0x3f, 0x33, 0x3f, 0xb4, 0x42, 0xb0, 0x64, 0x34, 0xde, 0x8d, 0x35
db 0x2e, 0x59, 0x21, 0xab, 0xa6, 0x71, 0xc9, 0x3a, 0x65, 0x83, 0xa6, 0x0c, 0xb0, 0x36, 0xa0, 0xe2
db 0xb0, 0xb9, 0x8b, 0x5c, 0x2e, 0x3b, 0xa1, 0x03, 0x96, 0x6e, 0x63, 0x9c, 0x2a, 0x02, 0xbd, 0x2b
db 0x21, 0xf9, 0xee, 0x75, 0xab, 0xf4, 0x3c, 0xf4, 0x64, 0xb1, 0xbb, 0x97, 0x10, 0x3a, 0x02, 0x60
db 0x3e, 0x93, 0x97, 0xfd, 0xb4, 0xcb, 0x2f, 0x31, 0x80, 0x7f, 0x74, 0x23, 0x35, 0x6d, 0x4a, 0x83
db 0xa6, 0x70, 0x7b, 0x27, 0xf7, 0xfa, 0x97, 0xe9, 0x54, 0x4d, 0xfe, 0xfd, 0x7c, 0xe8, 0x91, 0x03
db 0x6b, 0x61, 0xc4, 0x8c, 0xa2, 0x41, 0x23, 0x52, 0xc3, 0x9b, 0x5d, 0xb6, 0xb5, 0x1b, 0x9f, 0x3b
db 0x48, 0x09, 0x59, 0xec, 0x3c, 0x23, 0xfd, 0x9e, 0xa8, 0x76, 0x73, 0x92, 0xab, 0x28, 0x6a, 0x57
db 0xe4, 0x88, 0x25, 0x2b, 0x6d, 0xbf, 0xf8, 0xc2, 0xe6, 0xdb, 0x48, 0x58, 0x0a, 0x06, 0xae, 0xd0
db 0x78, 0x25, 0x61, 0xd5, 0xcd, 0x60, 0xf9, 0xf1, 0xaf, 0xb3, 0xb8, 0x86, 0x2f, 0x00, 0x4b, 0xe1
db 0xe6, 0xbd, 0x46, 0xdb, 0x91, 0x76, 0xea, 0x22, 0xb5, 0x85, 0x32, 0x3a, 0x41, 0x6a, 0xfb, 0x86
db 0x45, 0xc9, 0x3e, 0xb1, 0xcb, 0x1d, 0xee, 0x8e, 0x58, 0xcf, 0x24, 0x6b, 0xb1, 0xc5, 0xe0, 0xda
db 0x30, 0xc9, 0xa9, 0x36, 0x5b, 0xeb, 0xc1, 0x93, 0x32, 0x60, 0x15, 0x0a, 0x41, 0xa0, 0x64, 0x0c
db 0x30, 0x89, 0x6e, 0xda, 0xab, 0x7e, 0x49, 0x44, 0xbc, 0xb4, 0x92, 0x0c, 0x29, 0xee, 0xd5, 0x03
db 0xe0, 0xd7, 0x96, 0xcf, 0x00, 0xf8, 0xaf, 0x89, 0xf2, 0xe0, 0x3a, 0x23, 0x45, 0x1a, 0x73, 0x4d
db 0x3f, 0x59, 0x50, 0xe5, 0x98, 0x2a, 0x78, 0x0f, 0x07, 0x31, 0xd2, 0x89, 0x69, 0xa2, 0xe9, 0x8d
db 0xe4, 0xab, 0x41, 0x8b, 0xe9, 0x83, 0x5e, 0xbc, 0xf9, 0x81, 0xbf, 0xe8, 0x24, 0xa4, 0x9d, 0x0a
db 0xd0, 0x7c, 0x3b, 0x40, 0x69, 0x93, 0x26, 0x42, 0x23, 0xb1, 0x38, 0xfa, 0x22, 0x25, 0x15, 0xb5
db 0x17, 0xb8, 0xa0, 0xc3, 0xb0, 0x80, 0x98, 0x04, 0x85, 0x91, 0x2b, 0xa8, 0x79, 0x34, 0xf9, 0x74
db 0x9e, 0x49, 0xad, 0xcf, 0xca, 0xf6, 0x06, 0xd3, 0xdf, 0x27, 0xb6, 0xd8, 0x19, 0x14, 0x84, 0xee
db 0xe6, 0x9c, 0x7c, 0x41, 0x07, 0xbd, 0x26, 0x7b, 0x3c, 0x81, 0x20, 0x8f, 0x1d, 0x50, 0x2d, 0xdd
db 0x31, 0xb9, 0x5a, 0x4c, 0xc0, 0x89, 0x68, 0x79, 0xb4, 0x9e, 0xe6, 0x57, 0x44, 0xee, 0x32, 0xbb
db 0x69, 0xf7, 0x35, 0xbc, 0xcf, 0x96, 0xa7, 0xe0, 0xb4, 0x38, 0xce, 0xde, 0xb9, 0xf6, 0xfe, 0x5a
db 0xb9, 0xe7, 0x3c, 0x01, 0xf2, 0xbd, 0xa8, 0x26, 0xf6, 0x29, 0x0b, 0xe6, 0xd7, 0xe7, 0xa5, 0x62
db 0xb0, 0x0c, 0x9b, 0x01, 0x4f, 0x18, 0x9e, 0x40, 0x28, 0x2a, 0xbb, 0x21, 0xe6, 0x8d, 0x93, 0x22
db 0xbd, 0x01, 0xfc, 0x78, 0x93, 0x29, 0x55, 0x8f, 0x17, 0xe8, 0x09, 0x07, 0xf8, 0x30, 0x20, 0x68
db 0xf2, 0x95, 0xc1, 0x50, 0xad, 0x12, 0x35, 0x46, 0x52, 0x65, 0xaa, 0xb7, 0x35, 0x50, 0x22, 0x91
db 0x36, 0x74, 0x86, 0xab, 0x4b, 0xe8, 0xfd, 0x42, 0x76, 0x41, 0x4a, 0xb4, 0x2c, 0x59, 0x36, 0xc9
db 0xd6, 0xdb, 0x7e, 0xa1, 0x60, 0xcf, 0x13, 0x62, 0x0c, 0x93, 0xdd, 0x3e, 0xfc, 0x3e, 0x36, 0xfc
db 0xfd, 0x7e, 0x48, 0x69, 0x0f, 0x6a, 0xdf, 0x3c, 0xc0, 0x35, 0xcf, 0x81, 0x4b, 0x79, 0x15, 0x2d
db 0xda, 0x5a, 0x7f, 0xef, 0xe1, 0x13, 0x75, 0xef, 0xad, 0x80, 0xf7, 0x4e, 0xa1, 0xfd, 0x5d, 0xf8
db 0x67, 0xc4, 0x4a, 0xe5, 0x9f, 0x28, 0xe8, 0x82, 0xe5, 0xae, 0xac, 0xef, 0xb9, 0x4b, 0xca, 0x44
db 0x9b, 0xdc, 0xf2, 0xd2, 0x57, 0xa6, 0x9c, 0x5e, 0xbb, 0xd5, 0x4e, 0x31, 0xa8, 0xfa, 0x32, 0x26
db 0x4d, 0x46, 0x0d, 0xcb, 0xcf, 0x9e, 0x1a, 0xa3, 0x50, 0x69, 0x06, 0x34, 0xd8, 0xf9, 0x5d, 0xaf
db 0x4f, 0xc5, 0x2b, 0xe0, 0x8b, 0x81, 0x76, 0xc5, 0xce, 0x74, 0x4b, 0xba, 0x02, 0xba, 0xd4, 0x8b
db 0x91, 0xc5, 0x54, 0x28, 0x41, 0x77, 0x5f, 0xaa, 0x83, 0x14, 0xba, 0xf7, 0x8c, 0x51, 0xe2, 0xea
db 0xcf, 0x71, 0x40, 0x2f, 0x07, 0x82, 0xed, 0x81, 0x8c, 0x4f, 0xd3, 0x28, 0xce, 0x5c, 0x16, 0x72
db 0x4f, 0xcc, 0xe8, 0x49, 0x04, 0xae, 0x11, 0xe4, 0xbb, 0x18, 0x6d, 0xc4, 0xb3, 0x30, 0x20, 0xf7
db 0x0c, 0xe8, 0x50, 0x68, 0x37, 0xe9, 0x48, 0x76, 0x8b, 0x33, 0xea, 0xbb, 0x0d, 0x52, 0xbb, 0xab
db 0x6a, 0x62, 0xf4, 0x06, 0xbe, 0x5d, 0x7e, 0x89, 0xfb, 0xca, 0x75, 0xe6, 0xf7, 0x27, 0x59, 0x7c
db 0x91, 0x1a, 0xa3, 0xbb, 0x5a, 0x72, 0xd4, 0x3c, 0x19, 0xa8, 0x00, 0x74, 0x12, 0x24, 0x1c, 0x49
db 0xd9, 0x87, 0xd2, 0xc2, 0x98, 0x63, 0x7a, 0x0e, 0x5c, 0x39, 0x6b, 0x44, 0x29, 0xee, 0xef, 0x18
db 0xd1, 0xab, 0x15, 0xe0, 0x79, 0x50, 0x2a, 0x21, 0xc2, 0x67, 0x21, 0xab, 0x3c, 0x54, 0xc6, 0xf4
db 0xc6, 0x67, 0x96, 0x38, 0x37, 0xb6, 0xd2, 0x1b, 0xf0, 0xf6, 0xf8, 0x03, 0x60, 0x1c, 0xd8, 0x28
db 0xb2, 0x47, 0x53, 0x97, 0xf3, 0x1e, 0xd0, 0xcd, 0xd8, 0x30, 0xad, 0x24, 0xe7, 0xd9, 0x38, 0x97
db 0xb4, 0xc3, 0x1b, 0xb3, 0xea, 0x28, 0x40, 0x99, 0x29, 0x3e, 0x8f, 0x30, 0xb2, 0x55, 0xca, 0x7b


================================================
FILE: unittests/ASM/VEX/mulx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0xFFFFFFFFFFFFFFFE",
      "RBX": "4",
      "RCX": "4",
      "RDX": "0xFFFFFFFE",
      "RSI": "1",
      "RDI": "1"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; Test low
mov rbx, 2
mov rdx, 2
mulx rax, rbx, rbx

; Test high
mov rcx, -1
mov rdx, -1
mulx rax, rdi, rcx

; 32-bit

; Test low
mov ecx, 2
mov edx, 2
mulx edx, ecx, ecx

; Test high
mov esi, -1
mov edx, -1
mulx edx, esi, esi 

hlt


================================================
FILE: unittests/ASM/VEX/pdep.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x00012567",
      "RBX": "0xFF00FFF0",
      "RCX": "0x12005670",
      "RDX": "0x0801256708012567",
      "RSI": "0xFF00FF00FF00FF00",
      "RDI": "0x0800010025006700"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; 32-bit
mov eax, 0x00012567
mov ebx, 0xFF00FFF0
pdep ecx, eax, ebx

; 64-bit
mov rdx, 0x0801256708012567
mov rsi, 0xFF00FF00FF00FF00
pdep rdi, rdx, rsi

hlt


================================================
FILE: unittests/ASM/VEX/pext.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x12345678",
      "RBX": "0xFF00FFF0",
      "RCX": "0x00012567",
      "RDX": "0x1234567812345678",
      "RSI": "0xFF00FF00FF00FF00",
      "RDI": "0x12561256",
      "R8":  "0x1234567812345678",
      "R10": "0x12345678",
      "R11": "0x12345678",
      "R12": "0x00005678"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; 32-bit
mov eax, 0x12345678
mov ebx, 0xFF00FFF0
pext ecx, eax, ebx

; 32-bit full mask
mov r10d,  0x12345678
mov r9d, 0xFFFFFFFF
pext r10d, r10d, r9d

; 32-bit half mask
mov r12d, 0x12345678
mov r9d, 0x0000FFFF
pext r12d, r12d, r9d

; 64-bit
mov rdx, 0x1234567812345678
mov rsi, 0xFF00FF00FF00FF00
pext rdi, rdx, rsi

; 64-bit full mask
mov r8, 0x1234567812345678
mov r9, 0xFFFFFFFFFFFFFFFF
pext r8, r8, r9

; 64-bit half mask
mov r11, 0x1234567812345678
mov r9,  0x00000000FFFFFFFF
pext r11, r11, r9

hlt


================================================
FILE: unittests/ASM/VEX/rorx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x8000000000000000",
      "RBX": "0xFF",
      "RCX": "0xF00000000000000F",
      "RDX": "0x80000000",
      "RSI": "0xFF",
      "RDI": "0xF000000F",
      "R8":  "0",
      "R9": "0x0000000045464748",
      "R10": "0x0000000022a323a4"

  },
  "HostFeatures": ["BMI2"]
}
%endif

; Trivial test
mov rax, 1
rorx rax, rax, 1

; More than one bit
mov rbx, 0xFF
rorx rcx, rbx, 4

; Test that we mask the rotation amount above the operand size (should leave rcx's value alone).
rorx rcx, rcx, 64

; 32-bit

; Trivial test
mov edx, 1
rorx edx, edx, 1

; More than one bit
mov esi, 0xFF
rorx edi, esi, 4,

; Test that we mask the rotation amount above the operand size (should leave edi's value alone).
rorx edi, edi, 32

; Zero-extending behavior
mov r8, 0xFFFFFFFF00000000
rorx r8d, r8d, 0

mov r9, 0x4142434445464748
rorx r9d, r9d, 0xE0

mov r10, 0x4142434445464748
rorx r10d, r10d, 0xE1

hlt


================================================
FILE: unittests/ASM/VEX/sarx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0xF800000000000000",
      "RBX": "4",
      "RCX": "0xFFFFFFFFFFFFFFFF",
      "RDX": "127",
      "RSI": "63",
      "RDI": "0x00000000FFFFFFFF"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; Trivial right shift
mov rax, 0x8000000000000000
mov rbx, 4
sarx rax, rax, rbx

; This is really a shift by 63. This just ensures we properly
; mask the shift value according to the ISA manual.
mov rcx, 0x8000000000000000
mov rdx, 127
sarx rcx, rcx, rdx

; This is really a shift by 31. This just ensures we properly
; mask the shift value according to the ISA manual.
mov edi, 0x80000000
mov esi, 63
sarx edi, edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/shlx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x10",
      "RBX": "4",
      "RCX": "0x8000000000000000",
      "RDX": "127",
      "RSI": "63",
      "RDI": "0x80000000"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; Trivial left shift
mov rax, 1
mov rbx, 4
shlx rax, rax, rbx

; This is really a shift by 63. This just ensures we properly
; mask the shift value according to the ISA manual.
mov rcx, 1
mov rdx, 127
shlx rcx, rcx, rdx

; This is really a shift by 31. This just ensures we properly
; mask the shift value according to the ISA manual.
mov edi, 1
mov esi, 63
shlx edi, edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/shrx.asm
================================================
%ifdef CONFIG
{
  "RegData": {
      "RAX": "0x0800000000000000",
      "RBX": "4",
      "RCX": "1",
      "RDX": "127",
      "RSI": "63",
      "RDI": "1"
  },
  "HostFeatures": ["BMI2"]
}
%endif

; Trivial right shift
mov rax, 0x8000000000000000
mov rbx, 4
shrx rax, rax, rbx

; This is really a shift by 63. This just ensures we properly
; mask the shift value according to the ISA manual.
mov rcx, 0x8000000000000000
mov rdx, 127
shrx rcx, rcx, rdx

; This is really a shift by 31. This just ensures we properly
; mask the shift value according to the ISA manual.
mov edi, 0x80000000
mov esi, 63
shrx edi, edi, esi

hlt


================================================
FILE: unittests/ASM/VEX/vaddpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x4000000000000000", "0x4008000000000000", "0x4010000000000000"],
    "XMM1": ["0x4014000000000000", "0x4018000000000000", "0x401C000000000000", "0x4020000000000000"],
    "XMM2": ["0x4018000000000000", "0x4020000000000000", "0x4024000000000000", "0x4028000000000000"],
    "XMM3": ["0x4018000000000000", "0x4020000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4018000000000000", "0x4020000000000000", "0x4024000000000000", "0x4028000000000000"],
    "XMM5": ["0x4018000000000000", "0x4020000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vaddpd ymm2, ymm0, ymm1
vaddpd xmm3, xmm0, xmm1

; Memory operand
vaddpd ymm4, ymm0, [rdx + 32]
vaddpd xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x4000000000000000 ; 2.0
dq 0x4008000000000000 ; 3.0
dq 0x4010000000000000 ; 4.0

dq 0x4014000000000000 ; 5.0
dq 0x4018000000000000 ; 6.0
dq 0x401C000000000000 ; 7.0
dq 0x4020000000000000 ; 8.0


================================================
FILE: unittests/ASM/VEX/vaddps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM1": ["0x4100000040E00000", "0x40C0000040A00000", "0x4080000040400000", "0x400000003F800000"],
    "XMM2": ["0x4120000041000000", "0x4120000041000000", "0x4120000041000000", "0x4120000041000000"],
    "XMM3": ["0x4120000041000000", "0x4120000041000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4120000041000000", "0x4120000041000000", "0x4120000041000000", "0x4120000041000000"],
    "XMM5": ["0x4120000041000000", "0x4120000041000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vaddps ymm2, ymm0, ymm1
vaddps xmm3, xmm0, xmm1

; Memory operand
vaddps ymm4, ymm0, [rdx + 32]
vaddps xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x400000003F800000 ; 2.0, 1.0
dq 0x4080000040400000 ; 4.0, 3.0
dq 0x40C0000040A00000 ; 6.0, 5.0
dq 0x4100000040E00000 ; 8.0, 7.0

dq 0x4100000040E00000 ; 8.0, 7.0
dq 0x40C0000040A00000 ; 6.0, 5.0
dq 0x4080000040400000 ; 4.0, 3.0
dq 0x400000003F800000 ; 2.0, 1.0


================================================
FILE: unittests/ASM/VEX/vaddsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4014000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2":  ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4":  ["0x403A000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x403D000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM7":  ["0x404B000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 4]

; Register only
vaddsd xmm0, xmm0, xmm1
vaddsd xmm2, xmm2, xmm3

; Memory operand
vaddsd xmm4, xmm4, [rdx + 32 * 0]
vaddsd xmm5, xmm5, [rdx + 32 * 1]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm6, [rdx + 32 * 4]
vaddsd xmm7, xmm5, xmm6

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vaddss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434440A00000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4142434441D00000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM6": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM7": ["0x4142434442240000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vaddss xmm0, xmm0, xmm1
vaddss xmm2, xmm2, xmm3

; Memory operand
vaddss xmm4, xmm4, [rdx + 32 * 0]
vaddss xmm5, xmm5, [rdx + 32 * 1]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm5, [rdx + 32 * 3]
vmovapd ymm6, [rdx + 32 * 4]
vaddss xmm7, xmm5, xmm6

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vaddsubpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3" : ["0xBFF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5" : ["0x3FF0000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
    "XMM7" : ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
    "XMM8" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
    "XMM9" : ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
    "XMM10": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"],
    "XMM11": ["0xBFF0000000000000", "0x4008000000000000", "0xBFF0000000000000", "0x4008000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vaddsubpd xmm2, xmm0, [rdx + 32]
vaddsubpd xmm3, xmm0, xmm1

vaddsubpd xmm4, xmm1, [rdx]
vaddsubpd xmm5, xmm1, xmm0

vaddsubpd ymm6, ymm0, [rdx + 32]
vaddsubpd ymm7, ymm0, ymm1

vaddsubpd ymm8, ymm1, [rdx]
vaddsubpd ymm9, ymm1, ymm0

; Aliasing source/destination vectors
vmovapd ymm10, [rdx]
vaddsubpd ymm10, ymm10, ymm1

vmovapd ymm11, [rdx + 32]
vaddsubpd ymm11, ymm0, ymm11

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000

dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000


================================================
FILE: unittests/ASM/VEX/vaddsubps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x41200000C0000000", "0x41200000C0C00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
    "XMM4": ["0x41200000C0000000", "0x41200000C0C00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
    "XMM6": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"],
    "XMM7": ["0x41200000C0000000", "0x41200000C0C00000", "0x41200000C0000000", "0x41200000C0C00000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vaddsubps xmm2, xmm0, [rdx + 32]
vaddsubps ymm3, ymm0, [rdx + 32]

vaddsubps xmm4, xmm0, xmm1
vaddsubps ymm5, ymm0, ymm1

; Aliasing source/destination vectors
vmovapd ymm6, [rdx]
vaddsubps ymm6, ymm6, ymm1

vmovapd ymm7, [rdx + 32]
vaddsubps ymm7, ymm0, ymm7

hlt

align 32
.data:
dq 0x4080000040400000 ; 4, 3
dq 0x400000003f800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003f800000 ; 2, 1

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vaesdec.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0x7A1FC5A0A07A1FC5", "0xC5A07A1F1FC5A07A", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x85E03A5F5F85E03A", "0x3A5F85E0E03A5F85", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7A1FC5A1A07A1FC4", "0xC5A07A1E1FC5A07B", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x85E03A5FA07A1FC5", "0xC5A07A1EE03A5F85", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesdec xmm1, xmm0, [rdx + 32 * 0]
vaesdec xmm2, xmm0, [rdx + 32 * 1]
vaesdec xmm3, xmm0, [rdx + 32 * 2]
vaesdec xmm4, xmm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesdec256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AES256"],
  "RegData": {
    "XMM1": ["0x7A1FC5A0A07A1FC5", "0xC5A07A1F1FC5A07A", "0x7a1fc5a0a07a1fc5", "0xc5a07a1f1fc5a07a"],
    "XMM2": ["0x85E03A5F5F85E03A", "0x3A5F85E0E03A5F85", "0x85e03a5f5f85e03a", "0x3a5f85e0e03a5f85"],
    "XMM3": ["0x7A1FC5A1A07A1FC4", "0xC5A07A1E1FC5A07B", "0x7a1fc5a1a07a1fc4", "0xc5a07a1e1fc5a07b"],
    "XMM4": ["0x85E03A5FA07A1FC5", "0xC5A07A1EE03A5F85", "0x85e03a5fa07a1fc5", "0xc5a07a1ee03a5f85"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesdec ymm1, ymm0, [rdx + 32 * 0]
vaesdec ymm2, ymm0, [rdx + 32 * 1]
vaesdec ymm3, ymm0, [rdx + 32 * 2]
vaesdec ymm4, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesdeclast.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0xD5D56A6A6AD5D56A", "0x6A6AD5D5D56A6AD5", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x2A2A9595952A2A95", "0x95952A2A2A95952A", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xD5D56A6B6AD5D56B", "0x6A6AD5D4D56A6AD4", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x2A2A95956AD5D56A", "0x6A6AD5D42A95952A", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesdeclast xmm1, xmm0, [rdx + 32 * 0]
vaesdeclast xmm2, xmm0, [rdx + 32 * 1]
vaesdeclast xmm3, xmm0, [rdx + 32 * 2]
vaesdeclast xmm4, xmm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesdeclast256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AES256"],
  "RegData": {
    "XMM1": ["0xD5D56A6A6AD5D56A", "0x6A6AD5D5D56A6AD5", "0xd5d56a6a6ad5d56a", "0x6a6ad5d5d56a6ad5"],
    "XMM2": ["0x2A2A9595952A2A95", "0x95952A2A2A95952A", "0x2a2a9595952a2a95", "0x95952a2a2a95952a"],
    "XMM3": ["0xD5D56A6B6AD5D56B", "0x6A6AD5D4D56A6AD4", "0xd5d56a6b6ad5d56b", "0x6a6ad5d4d56a6ad4"],
    "XMM4": ["0x2A2A95956AD5D56A", "0x6A6AD5D42A95952A", "0x2a2a95956ad5d56a", "0x6a6ad5d42a95952a"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesdeclast ymm1, ymm0, [rdx + 32 * 0]
vaesdeclast ymm2, ymm0, [rdx + 32 * 1]
vaesdeclast ymm3, ymm0, [rdx + 32 * 2]
vaesdeclast ymm4, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesenc.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0x77637B6F637B6F77", "0x7B6F77636F77637B", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x889C84909C849088", "0x8490889C90889C84", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x77637B6E637B6F76", "0x7B6F77626F77637A", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x889C8490637B6F77", "0x7B6F776290889C84", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesenc xmm1, xmm0, [rdx + 32 * 0]
vaesenc xmm2, xmm0, [rdx + 32 * 1]
vaesenc xmm3, xmm0, [rdx + 32 * 2]
vaesenc xmm4, xmm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesenc256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AES256"],
  "RegData": {
    "XMM1": ["0x77637B6F637B6F77", "0x7B6F77636F77637B", "0x77637b6f637b6f77", "0x7b6f77636f77637b"],
    "XMM2": ["0x889C84909C849088", "0x8490889C90889C84", "0x889c84909c849088", "0x8490889c90889c84"],
    "XMM3": ["0x77637B6E637B6F76", "0x7B6F77626F77637A", "0x77637b6e637b6f76", "0x7b6f77626f77637a"],
    "XMM4": ["0x889C8490637B6F77", "0x7B6F776290889C84", "0x889c8490637b6f77", "0x7b6f776290889c84"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesenc ymm1, ymm0, [rdx + 32 * 0]
vaesenc ymm2, ymm0, [rdx + 32 * 1]
vaesenc ymm3, ymm0, [rdx + 32 * 2]
vaesenc ymm4, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesenclast.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0x777B7B777B7B7777", "0x7B77777B77777B7B", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x8884848884848888", "0x8488888488888484", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x777B7B767B7B7776", "0x7B77777A77777B7A", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x888484887B7B7777", "0x7B77777A88888484", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesenclast xmm1, xmm0, [rdx + 32 * 0]
vaesenclast xmm2, xmm0, [rdx + 32 * 1]
vaesenclast xmm3, xmm0, [rdx + 32 * 2]
vaesenclast xmm4, xmm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesenclast256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AES256"],
  "RegData": {
    "XMM1": ["0x777B7B777B7B7777", "0x7B77777B77777B7B", "0x777b7b777b7b7777", "0x7b77777b77777b7b"],
    "XMM2": ["0x8884848884848888", "0x8488888488888484", "0x8884848884848888", "0x8488888488888484"],
    "XMM3": ["0x777B7B767B7B7776", "0x7B77777A77777B7A", "0x777b7b767b7b7776", "0x7b77777a77777b7a"],
    "XMM4": ["0x888484887B7B7777", "0x7B77777A88888484", "0x888484887b7b7777", "0x7b77777a88888484"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

vaesenclast ymm1, ymm0, [rdx + 32 * 0]
vaesenclast ymm2, ymm0, [rdx + 32 * 1]
vaesenclast ymm3, ymm0, [rdx + 32 * 2]
vaesenclast ymm4, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaesimc.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0B0D090E0B0D090E", "0x0B0D090E0B0D090E", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xFFFFFFFF00000000", "0x0B0D090EFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0B0D090E0B0D090E", "0x0B0D090E0B0D090E", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0xFFFFFFFF00000000", "0x0B0D090EFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vaesimc xmm0, [rdx + 32 * 0]
vaesimc xmm1, [rdx + 32 * 1]
vaesimc xmm2, [rdx + 32 * 2]
vaesimc xmm3, [rdx + 32 * 3]
vaesimc xmm4, [rdx + 32 * 4]

vmovapd ymm5, [rdx + 32 * 0]
vmovapd ymm6, [rdx + 32 * 1]
vmovapd ymm7, [rdx + 32 * 2]
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]

vaesimc xmm10, xmm5
vaesimc xmm11, xmm6
vaesimc xmm12, xmm7
vaesimc xmm13, xmm8
vaesimc xmm14, xmm9

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vaeskeygenassist.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x6363636363636363", "0x6363636363636363", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x1616161616161616", "0x1616161616161616", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x7C6363636363637C", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x1616161616161616", "0x7C6363636363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x6363636263636363", "0x6363636263636363", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x1616161416161616", "0x1616161416161616", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x7C6363606363637C", "0x7C6363606363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x1616161216161616", "0x7C6363676363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x6363636663636363", "0x6363636663636363", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x1616161016161616", "0x1616161016161616", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x7C6363646363637C", "0x7C6363646363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x1616161E16161616", "0x7C63636B6363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x6363636A63636363", "0x6363636A63636363", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x1616161C16161616", "0x1616161C16161616", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x7C6363686363637C", "0x7C6363686363637C", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x1616161A16161616", "0x7C63636F6363637C", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vaeskeygenassist xmm0, [rdx + 16 * 0], 0
vaeskeygenassist xmm1, [rdx + 16 * 1], 0
vaeskeygenassist xmm2, [rdx + 16 * 2], 0
vaeskeygenassist xmm3, [rdx + 16 * 3], 0

vaeskeygenassist xmm4, [rdx + 16 * 0], 1
vaeskeygenassist xmm5, [rdx + 16 * 1], 2
vaeskeygenassist xmm6, [rdx + 16 * 2], 3
vaeskeygenassist xmm7, [rdx + 16 * 3], 4

vaeskeygenassist xmm8,  [rdx + 16 * 0], 5
vaeskeygenassist xmm9,  [rdx + 16 * 1], 6
vaeskeygenassist xmm10, [rdx + 16 * 2], 7
vaeskeygenassist xmm11, [rdx + 16 * 3], 8

vaeskeygenassist xmm12, [rdx + 16 * 0], 9
vaeskeygenassist xmm13, [rdx + 16 * 1], 10
vaeskeygenassist xmm14, [rdx + 16 * 2], 11
vaeskeygenassist xmm15, [rdx + 16 * 3], 12

hlt

align 16
.data:
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vandnpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM3": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM5": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vandnpd ymm2, ymm0, ymm1
vandnpd xmm3, xmm0, xmm1

; With memory operand
vandnpd ymm4, ymm0, [rbx]
vandnpd xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vandnps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM3": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM5": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vandnps ymm2, ymm0, ymm1
vandnps xmm3, xmm0, xmm1

; With memory operand
vandnps ymm4, ymm0, [rbx]
vandnps xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vandpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM3": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM5": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vandpd ymm2, ymm0, ymm1
vandpd xmm3, xmm0, xmm1

; With memory operand
vandpd ymm4, ymm0, [rbx]
vandpd xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vandps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM3": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM5": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vandps ymm2, ymm0, ymm1
vandps xmm3, xmm0, xmm1

; With memory operand
vandps ymm4, ymm0, [rbx]
vandps xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vblendpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0xEEEEEEEEEEEEEEEE", "0x9999999999999999"],
    "XMM3": ["0x1111111111111111", "0x3333333333333333", "0x5555555555555555", "0x7777777777777777"],
    "XMM4": ["0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x1111111111111111", "0x3333333333333333", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xAAAAAAAAAAAAAAAA", "0x3333333333333333", "0xEEEEEEEEEEEEEEEE", "0x7777777777777777"],
    "XMM7": ["0x1111111111111111", "0xCCCCCCCCCCCCCCCC", "0x5555555555555555", "0x9999999999999999"],
    "XMM8": ["0xAAAAAAAAAAAAAAAA", "0x3333333333333333", "0x0000000000000000", "0x0000000000000000"],
    "XMM9": ["0x1111111111111111", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Selecting all of one input vector
vblendpd ymm2, ymm0, ymm1, 0    ; All of ymm0
vblendpd ymm3, ymm0, ymm1, 0xFF ; All of ymm1

vblendpd xmm4, xmm0, xmm1, 0    ; All of xmm0
vblendpd xmm5, xmm0, xmm1, 0xFF ; All of xmm1

; Alternating source vectors
vblendpd ymm6, ymm0, ymm1, 0b10101010
vblendpd ymm7, ymm0, ymm1, 0b01010101

vblendpd xmm8, xmm0, xmm1, 0b10101010
vblendpd xmm9, xmm0, xmm1, 0b01010101

hlt

align 32
.data:
dq 0xAAAAAAAAAAAAAAAA
dq 0xCCCCCCCCCCCCCCCC
dq 0xEEEEEEEEEEEEEEEE
dq 0x9999999999999999

dq 0x1111111111111111
dq 0x3333333333333333
dq 0x5555555555555555
dq 0x7777777777777777


================================================
FILE: unittests/ASM/VEX/vblendps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x9999999988888888"],
    "XMM3": ["0x1111111122222222", "0x3333333344444444", "0x5555555566666666", "0x7777777788888888"],
    "XMM4": ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x1111111122222222", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x55555555FFFFFFFF", "0x7777777788888888"],
    "XMM7": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0xEEEEEEEE66666666", "0x9999999988888888"],
    "XMM8": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM9": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x1111111122222222", "0xccccccccdddddddd", "0xeeeeeeeeffffffff", "0x9999999988888888"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Selecting all of one input vector
vblendps ymm2, ymm0, ymm1, 0    ; All of ymm0
vblendps ymm3, ymm0, ymm1, 0xFF ; All of ymm1

vblendps xmm4, xmm0, xmm1, 0    ; All of xmm0
vblendps xmm5, xmm0, xmm1, 0xFF ; All of xmm1

; Alternating source vectors
vblendps ymm6, ymm0, ymm1, 0b10101010
vblendps ymm7, ymm0, ymm1, 0b01010101

vblendps xmm8, xmm0, xmm1, 0b10101010
vblendps xmm9, xmm0, xmm1, 0b01010101

; Different sources between upper and lower selectors
vblendps ymm10, ymm0, ymm1, 0x3

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888


================================================
FILE: unittests/ASM/VEX/vblendvpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0x1111111111111111", "0x3333333333333333", "0x5555555555555555", "0x7777777777777777"],
    "XMM4":  ["0x1111111111111111", "0x3333333333333333", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0xEEEEEEEEEEEEEEEE", "0x9999999999999999"],
    "XMM6":  ["0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x1111111111111111", "0xCCCCCCCCCCCCCCCC", "0x5555555555555555", "0x9999999999999999"],
    "XMM8":  ["0x1111111111111111", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0xAAAAAAAAAAAAAAAA", "0x3333333333333333", "0xEEEEEEEEEEEEEEEE", "0x7777777777777777"],
    "XMM10": ["0xAAAAAAAAAAAAAAAA", "0x3333333333333333", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x1111111111111111", "0x3333333333333333", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rel .mask_all]

; Select all ymm1
vblendvpd ymm3, ymm0, ymm1, ymm2
vblendvpd xmm4, xmm0, xmm1, xmm2

; Select all ymm0
vmovaps ymm2, [rel .mask_none]
vblendvpd ymm5, ymm0, ymm1, ymm2
vblendvpd xmm6, xmm0, xmm1, xmm2

; Interleaved selection from ymm1 and ymm0
vmovaps ymm2, [rel .mask_interleave1]
vblendvpd ymm7, ymm0, ymm1, ymm2
vblendvpd xmm8, xmm0, xmm1, xmm2

; Interleaved selection from ymm0 and ymm1
vmovaps ymm2, [rel .mask_interleave2]
vblendvpd ymm9,  ymm0, ymm1, ymm2
vblendvpd xmm10, xmm0, xmm1, xmm2

; Select all ymm0, with data in upper-bits
vmovaps ymm11, [rel .data_bad]
vmovaps ymm2, [rel .mask_all]
vblendvpd xmm11, xmm0, xmm1, xmm2

hlt

align 32
.data:
dq 0xAAAAAAAAAAAAAAAA
dq 0xCCCCCCCCCCCCCCCC
dq 0xEEEEEEEEEEEEEEEE
dq 0x9999999999999999

dq 0x1111111111111111
dq 0x3333333333333333
dq 0x5555555555555555
dq 0x7777777777777777

.mask_all:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.mask_none:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.mask_interleave1:
dq 0x8000000000000000
dq 0x0000000000000000
dq 0x8000000000000000
dq 0x0000000000000000

.mask_interleave2:
dq 0x0000000000000000
dq 0x8000000000000000
dq 0x0000000000000000
dq 0x8000000000000000

.data_bad:
dq 0x3132333435363738
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768


================================================
FILE: unittests/ASM/VEX/vblendvps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0x1111111122222222", "0x3333333344444444", "0x5555555566666666", "0x7777777788888888"],
    "XMM4":  ["0x1111111122222222", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x9999999988888888"],
    "XMM6":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0xEEEEEEEE66666666", "0x9999999988888888"],
    "XMM8":  ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x55555555FFFFFFFF", "0x7777777788888888"],
    "XMM10": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x1111111122222222", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rel .mask_all]

; Select all ymm1
vblendvps ymm3, ymm0, ymm1, ymm2
vblendvps xmm4, xmm0, xmm1, xmm2

; Select all ymm0
vmovaps ymm2, [rel .mask_none]
vblendvps ymm5, ymm0, ymm1, ymm2
vblendvps xmm6, xmm0, xmm1, xmm2

; Interleaved selection from ymm1 and ymm0
vmovaps ymm2, [rel .mask_interleave1]
vblendvps ymm7, ymm0, ymm1, ymm2
vblendvps xmm8, xmm0, xmm1, xmm2

; Interleaved selection from ymm0 and ymm1
vmovaps ymm2, [rel .mask_interleave2]
vblendvps ymm9,  ymm0, ymm1, ymm2
vblendvps xmm10, xmm0, xmm1, xmm2

; Select all ymm0, with data in upper-bits
vmovaps ymm11, [rel .data_bad]
vmovaps ymm2, [rel .mask_all]
vblendvps xmm11, xmm0, xmm1, xmm2

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888

.mask_all:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.mask_none:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.mask_interleave1:
dq 0x0000000080000000
dq 0x0000000080000000
dq 0x0000000080000000
dq 0x0000000080000000

.mask_interleave2:
dq 0x8000000000000000
dq 0x8000000000000000
dq 0x8000000000000000
dq 0x8000000000000000

.data_bad:
dq 0x3132333435363738
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768


================================================
FILE: unittests/ASM/VEX/vbroadcastf128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F"]
  }
}
%endif

lea rdx, [rel .data]

vbroadcastf128 ymm0, [rdx]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE


================================================
FILE: unittests/ASM/VEX/vbroadcasti128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F"]
  }
}
%endif

lea rdx, [rel .data]

vbroadcasti128 ymm0, [rdx]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE


================================================
FILE: unittests/ASM/VEX/vbroadcastsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0"],
      "XMM3": ["0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vbroadcastsd ymm2, xmm0
vbroadcastsd ymm3, xmm1

; Memory broadcasting
vbroadcastsd ymm4, [rdx + 16]
vbroadcastsd ymm5, [rdx + 24]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vbroadcastss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0"],
      "XMM3": ["0xAAED56E0AAED56E0", "0xAAED56E0AAED56E0", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vbroadcastss ymm2, xmm0
vbroadcastss xmm3, xmm1

; Memory broadcasting
vbroadcastss ymm4, [rdx + 16]
vbroadcastss xmm5, [rdx + 24]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vcmppd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmppd xmm2, xmm0, xmm1, 0x00 ; EQ
vcmppd xmm3, xmm0, xmm1, 0x01 ; LT
vcmppd xmm4, xmm0, xmm1, 0x02 ; LTE
vcmppd xmm5, xmm0, xmm1, 0x04 ; NEQ
vcmppd xmm6, xmm0, xmm1, 0x05 ; NLT
vcmppd xmm7, xmm0, xmm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, nan] unord [nan, 0.0] = [1, 1]
vcmppd xmm10, xmm8, xmm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [nan, 0.0] = [0, 0]
vcmppd xmm11, xmm8, xmm9, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [nan, 0.0] ord [nan, 0.0] = [0, 1]
vcmppd xmm12, xmm9, xmm9, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [0.0, nan] = [1, 0]
vcmppd xmm13, xmm8, xmm8, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0x3FF0000000000000
dq 0x4008000000000000
dq 0x3FF0000000000000
dq 0x4008000000000000

dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000

dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vcmppd_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM11": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM13": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmppd ymm2, ymm0, ymm1, 0x00 ; EQ
vcmppd ymm3, ymm0, ymm1, 0x01 ; LT
vcmppd ymm4, ymm0, ymm1, 0x02 ; LTE
vcmppd ymm5, ymm0, ymm1, 0x04 ; NEQ
vcmppd ymm6, ymm0, ymm1, 0x05 ; NLT
vcmppd ymm7, ymm0, ymm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, nan] unord [nan, 0.0] = [1, 1]
vcmppd ymm10, ymm8, ymm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [nan, 0.0] = [0, 0]
vcmppd ymm11, ymm8, ymm9, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [nan, 0.0] ord [nan, 0.0] = [0, 1]
vcmppd ymm12, ymm9, ymm9, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [0.0, nan] = [1, 0]
vcmppd ymm13, ymm8, ymm8, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0x3FF0000000000000
dq 0x4008000000000000
dq 0x3FF0000000000000
dq 0x4008000000000000

dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000

dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vcmppd_full.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RCX": "448"
  }
}
%endif

%define true 1
%define false 0

%define EQ_OQ 0
%define LT_OS 1
%define LE_OS 2
%define UNORD_Q 3
%define NEQ_UQ 4
%define NLT_US 5
%define NLE_US 6
%define ORD_Q 7
%define EQ_UQ 8
%define NGE_US 9
%define NGT_US 10
%define FALSE_OQ 11
%define NEQ_OQ 12
%define GE_OS 13
%define GT_OS 14
%define TRUE_UQ 15
%define EQ_OS 16
%define LT_OQ 17
%define LE_OQ 18
%define UNORD_S 19
%define NEQ_US 20
%define NLT_UQ 21
%define NLE_UQ 22
%define ORD_S 23
%define EQ_US 24
%define NGE_UQ 25
%define NGT_UQ 26
%define FALSE_OS 27
%define NEQ_OS 28
%define GE_OQ 29
%define GT_OQ 30
%define TRUE_US 31

; Arguments: src1, src2, predicate, true/false
%macro Compare 4
    vcmppd result, %1, %2, %3
    ; Construct expected result
    vmovdqa temp, [rel .false + 32 * %4]
    ; Compare if result and expected are equal
    vxorpd ymm6, ymm7, ymm8
    vptest ymm6, ymm6
    jnz .fail
    ; Increment counter of tests passed
    add ecx, 1
%endmacro

lea rax, [rel .data]
xor ecx, ecx

vmovdqa ymm0, [rax + 32 * 0] ; -4.0
vmovdqa ymm1, [rax + 32 * 1] ; -0.0
vmovdqa ymm2, [rax + 32 * 2] ; 0.0
vmovdqa ymm3, [rax + 32 * 3] ; 4.0
vmovdqa ymm4, [rel .nan]     ; NaN

%macro CompareAll 5
    %define xmm_neg_four %1
    %define xmm_neg_zero %2
    %define xmm_zero %3
    %define xmm_four %4
    %define xmm_nan %5

    Compare xmm_four, xmm_four, EQ_OQ, true
    Compare xmm_neg_four, xmm_four, EQ_OQ, false
    Compare xmm_zero, xmm_neg_zero, EQ_OQ, true
    Compare xmm_zero, xmm_zero, EQ_OQ, true
    Compare xmm_four, xmm_nan, EQ_OQ, false
    Compare xmm_nan, xmm_four, EQ_OQ, false
    Compare xmm_nan, xmm_nan, EQ_OQ, false

    Compare xmm_four, xmm_neg_four, LT_OS, false
    Compare xmm_neg_four, xmm_four, LT_OS, true
    Compare xmm_zero, xmm_neg_zero, LT_OS, false
    Compare xmm_neg_zero, xmm_zero, LT_OS, false
    Compare xmm_nan, xmm_four, LT_OS, false
    Compare xmm_four, xmm_nan, LT_OS, false
    Compare xmm_nan, xmm_nan, LT_OS, false

    Compare xmm_four, xmm_neg_four, LE_OS, false
    Compare xmm_neg_four, xmm_four, LE_OS, true
    Compare xmm_zero, xmm_neg_zero, LE_OS, true
    Compare xmm_neg_zero, xmm_zero, LE_OS, true
    Compare xmm_nan, xmm_four, LE_OS, false
    Compare xmm_four, xmm_nan, LE_OS, false
    Compare xmm_nan, xmm_nan, LE_OS, false

    Compare xmm_four, xmm_neg_four, UNORD_Q, false
    Compare xmm_neg_four, xmm_four, UNORD_Q, false
    Compare xmm_zero, xmm_neg_zero, UNORD_Q, false
    Compare xmm_neg_zero, xmm_zero, UNORD_Q, false
    Compare xmm_nan, xmm_four, UNORD_Q, true
    Compare xmm_four, xmm_nan, UNORD_Q, true
    Compare xmm_nan, xmm_nan, UNORD_Q, true

    Compare xmm_four, xmm_four, NEQ_UQ, false
    Compare xmm_four, xmm_neg_four, NEQ_UQ, true
    Compare xmm_zero, xmm_neg_zero, NEQ_UQ, false
    Compare xmm_zero, xmm_zero, NEQ_UQ, false
    Compare xmm_four, xmm_nan, NEQ_UQ, true
    Compare xmm_nan, xmm_four, NEQ_UQ, true
    Compare xmm_nan, xmm_nan, NEQ_UQ, true

    Compare xmm_four, xmm_neg_four, NLT_US, true
    Compare xmm_neg_four, xmm_four, NLT_US, false
    Compare xmm_zero, xmm_neg_zero, NLT_US, true
    Compare xmm_neg_zero, xmm_zero, NLT_US, true
    Compare xmm_nan, xmm_four, NLT_US, true
    Compare xmm_four, xmm_nan, NLT_US, true
    Compare xmm_nan, xmm_nan, NLT_US, true

    Compare xmm_four, xmm_neg_four, NLE_US, true
    Compare xmm_neg_four, xmm_four, NLE_US, false
    Compare xmm_zero, xmm_neg_zero, NLE_US, false
    Compare xmm_neg_zero, xmm_zero, NLE_US, false
    Compare xmm_nan, xmm_four, NLE_US, true
    Compare xmm_four, xmm_nan, NLE_US, true
    Compare xmm_nan, xmm_nan, NLE_US, true

    Compare xmm_four, xmm_neg_four, ORD_Q, true
    Compare xmm_neg_four, xmm_four, ORD_Q, true
    Compare xmm_zero, xmm_neg_zero, ORD_Q, true
    Compare xmm_neg_zero, xmm_zero, ORD_Q, true
    Compare xmm_nan, xmm_four, ORD_Q, false
    Compare xmm_four, xmm_nan, ORD_Q, false
    Compare xmm_nan, xmm_nan, ORD_Q, false

    Compare xmm_four, xmm_neg_four, EQ_UQ, false
    Compare xmm_four, xmm_four, EQ_UQ, true
    Compare xmm_zero, xmm_neg_zero, EQ_UQ, true
    Compare xmm_zero, xmm_zero, EQ_UQ, true
    Compare xmm_four, xmm_nan, EQ_UQ, true
    Compare xmm_nan, xmm_four, EQ_UQ, true
    Compare xmm_nan, xmm_nan, EQ_UQ, true

    Compare xmm_four, xmm_neg_four, NGE_US, false
    Compare xmm_neg_four, xmm_four, NGE_US, true
    Compare xmm_zero, xmm_neg_zero, NGE_US, false
    Compare xmm_neg_zero, xmm_zero, NGE_US, false
    Compare xmm_nan, xmm_four, NGE_US, true
    Compare xmm_four, xmm_nan, NGE_US, true
    Compare xmm_nan, xmm_nan, NGE_US, true

    Compare xmm_four, xmm_neg_four, NGT_US, false
    Compare xmm_neg_four, xmm_four, NGT_US, true
    Compare xmm_zero, xmm_neg_zero, NGT_US, true
    Compare xmm_neg_zero, xmm_zero, NGT_US, true
    Compare xmm_nan, xmm_four, NGT_US, true
    Compare xmm_four, xmm_nan, NGT_US, true
    Compare xmm_nan, xmm_nan, NGT_US, true

    Compare xmm_four, xmm_neg_four, FALSE_OQ, false
    Compare xmm_neg_four, xmm_four, FALSE_OQ, false
    Compare xmm_zero, xmm_neg_zero, FALSE_OQ, false
    Compare xmm_neg_zero, xmm_zero, FALSE_OQ, false
    Compare xmm_nan, xmm_four, FALSE_OQ, false
    Compare xmm_four, xmm_nan, FALSE_OQ, false
    Compare xmm_nan, xmm_nan, FALSE_OQ, false

    Compare xmm_four, xmm_neg_four, NEQ_OQ, true
    Compare xmm_four, xmm_four, NEQ_OQ, false
    Compare xmm_zero, xmm_neg_zero, NEQ_OQ, false
    Compare xmm_zero, xmm_zero, NEQ_OQ, false
    Compare xmm_four, xmm_nan, NEQ_OQ, false
    Compare xmm_nan, xmm_four, NEQ_OQ, false
    Compare xmm_nan, xmm_nan, NEQ_OQ, false

    Compare xmm_four, xmm_neg_four, GE_OS, true
    Compare xmm_neg_four, xmm_four, GE_OS, false
    Compare xmm_zero, xmm_neg_zero, GE_OS, true
    Compare xmm_neg_zero, xmm_zero, GE_OS, true
    Compare xmm_nan, xmm_four, GE_OS, false
    Compare xmm_four, xmm_nan, GE_OS, false
    Compare xmm_nan, xmm_nan, GE_OS, false

    Compare xmm_four, xmm_neg_four, GT_OS, true
    Compare xmm_neg_four, xmm_four, GT_OS, false
    Compare xmm_zero, xmm_neg_zero, GT_OS, false
    Compare xmm_neg_zero, xmm_zero, GT_OS, false
    Compare xmm_nan, xmm_four, GT_OS, false
    Compare xmm_four, xmm_nan, GT_OS, false
    Compare xmm_nan, xmm_nan, GT_OS, false

    Compare xmm_four, xmm_neg_four, TRUE_UQ, true
    Compare xmm_neg_four, xmm_four, TRUE_UQ, true
    Compare xmm_zero, xmm_neg_zero, TRUE_UQ, true
    Compare xmm_neg_zero, xmm_zero, TRUE_UQ, true
    Compare xmm_nan, xmm_four, TRUE_UQ, true
    Compare xmm_four, xmm_nan, TRUE_UQ, true
    Compare xmm_nan, xmm_nan, TRUE_UQ, true

    Compare xmm_four, xmm_neg_four, EQ_OS, false
    Compare xmm_neg_four, xmm_four, EQ_OS, false
    Compare xmm_zero, xmm_neg_zero, EQ_OS, true
    Compare xmm_neg_zero, xmm_zero, EQ_OS, true
    Compare xmm_nan, xmm_four, EQ_OS, false
    Compare xmm_four, xmm_nan, EQ_OS, false
    Compare xmm_nan, xmm_nan, EQ_OS, false

    Compare xmm_four, xmm_neg_four, LT_OQ, false
    Compare xmm_neg_four, xmm_four, LT_OQ, true
    Compare xmm_zero, xmm_neg_zero, LT_OQ, false
    Compare xmm_neg_zero, xmm_zero, LT_OQ, false
    Compare xmm_nan, xmm_four, LT_OQ, false
    Compare xmm_four, xmm_nan, LT_OQ, false
    Compare xmm_nan, xmm_nan, LT_OQ, false

    Compare xmm_four, xmm_neg_four, LE_OQ, false
    Compare xmm_neg_four, xmm_four, LE_OQ, true
    Compare xmm_zero, xmm_neg_zero, LE_OQ, true
    Compare xmm_neg_zero, xmm_zero, LE_OQ, true
    Compare xmm_nan, xmm_four, LE_OQ, false
    Compare xmm_four, xmm_nan, LE_OQ, false
    Compare xmm_nan, xmm_nan, LE_OQ, false

    Compare xmm_four, xmm_neg_four, UNORD_S, false
    Compare xmm_neg_four, xmm_four, UNORD_S, false
    Compare xmm_zero, xmm_neg_zero, UNORD_S, false
    Compare xmm_neg_zero, xmm_zero, UNORD_S, false
    Compare xmm_nan, xmm_four, UNORD_S, true
    Compare xmm_four, xmm_nan, UNORD_S, true
    Compare xmm_nan, xmm_nan, UNORD_S, true

    Compare xmm_four, xmm_neg_four, NEQ_US, true
    Compare xmm_neg_four, xmm_four, NEQ_US, true
    Compare xmm_zero, xmm_neg_zero, NEQ_US, false
    Compare xmm_neg_zero, xmm_zero, NEQ_US, false
    Compare xmm_nan, xmm_four, NEQ_US, true
    Compare xmm_four, xmm_nan, NEQ_US, true
    Compare xmm_nan, xmm_nan, NEQ_US, true

    Compare xmm_four, xmm_neg_four, NLT_UQ, true
    Compare xmm_neg_four, xmm_four, NLT_UQ, false
    Compare xmm_zero, xmm_neg_zero, NLT_UQ, true
    Compare xmm_neg_zero, xmm_zero, NLT_UQ, true
    Compare xmm_nan, xmm_four, NLT_UQ, true
    Compare xmm_four, xmm_nan, NLT_UQ, true
    Compare xmm_nan, xmm_nan, NLT_UQ, true

    Compare xmm_four, xmm_neg_four, NLE_UQ, true
    Compare xmm_neg_four, xmm_four, NLE_UQ, false
    Compare xmm_zero, xmm_neg_zero, NLE_UQ, false
    Compare xmm_neg_zero, xmm_zero, NLE_UQ, false
    Compare xmm_nan, xmm_four, NLE_UQ, true
    Compare xmm_four, xmm_nan, NLE_UQ, true
    Compare xmm_nan, xmm_nan, NLE_UQ, true

    Compare xmm_four, xmm_neg_four, ORD_S, true
    Compare xmm_neg_four, xmm_four, ORD_S, true
    Compare xmm_zero, xmm_neg_zero, ORD_S, true
    Compare xmm_neg_zero, xmm_zero, ORD_S, true
    Compare xmm_nan, xmm_four, ORD_S, false
    Compare xmm_four, xmm_nan, ORD_S, false
    Compare xmm_nan, xmm_nan, ORD_S, false

    Compare xmm_four, xmm_neg_four, EQ_US, false
    Compare xmm_four, xmm_four, EQ_US, true
    Compare xmm_zero, xmm_neg_zero, EQ_US, true
    Compare xmm_zero, xmm_zero, EQ_US, true
    Compare xmm_four, xmm_nan, EQ_US, true
    Compare xmm_nan, xmm_four, EQ_US, true
    Compare xmm_nan, xmm_nan, EQ_US, true

    Compare xmm_four, xmm_neg_four, NGE_UQ, false
    Compare xmm_neg_four, xmm_four, NGE_UQ, true
    Compare xmm_zero, xmm_neg_zero, NGE_UQ, false
    Compare xmm_neg_zero, xmm_zero, NGE_UQ, false
    Compare xmm_nan, xmm_four, NGE_UQ, true
    Compare xmm_four, xmm_nan, NGE_UQ, true
    Compare xmm_nan, xmm_nan, NGE_UQ, true

    Compare xmm_four, xmm_neg_four, NGT_UQ, false
    Compare xmm_neg_four, xmm_four, NGT_UQ, true
    Compare xmm_zero, xmm_neg_zero, NGT_UQ, true
    Compare xmm_neg_zero, xmm_zero, NGT_UQ, true
    Compare xmm_nan, xmm_four, NGT_UQ, true
    Compare xmm_four, xmm_nan, NGT_UQ, true
    Compare xmm_nan, xmm_nan, NGT_UQ, true

    Compare xmm_four, xmm_neg_four, FALSE_OS, false
    Compare xmm_neg_four, xmm_four, FALSE_OS, false
    Compare xmm_zero, xmm_neg_zero, FALSE_OS, false
    Compare xmm_neg_zero, xmm_zero, FALSE_OS, false
    Compare xmm_nan, xmm_four, FALSE_OS, false
    Compare xmm_four, xmm_nan, FALSE_OS, false
    Compare xmm_nan, xmm_nan, FALSE_OS, false

    Compare xmm_four, xmm_neg_four, NEQ_OS, true
    Compare xmm_four, xmm_four, NEQ_OS, false
    Compare xmm_zero, xmm_neg_zero, NEQ_OS, false
    Compare xmm_zero, xmm_zero, NEQ_OS, false
    Compare xmm_four, xmm_nan, NEQ_OS, false
    Compare xmm_nan, xmm_four, NEQ_OS, false
    Compare xmm_nan, xmm_nan, NEQ_OS, false

    Compare xmm_four, xmm_neg_four, GE_OQ, true
    Compare xmm_neg_four, xmm_four, GE_OQ, false
    Compare xmm_zero, xmm_neg_zero, GE_OQ, true
    Compare xmm_neg_zero, xmm_zero, GE_OQ, true
    Compare xmm_nan, xmm_four, GE_OQ, false
    Compare xmm_four, xmm_nan, GE_OQ, false
    Compare xmm_nan, xmm_nan, GE_OQ, false

    Compare xmm_four, xmm_neg_four, GT_OQ, true
    Compare xmm_neg_four, xmm_four, GT_OQ, false
    Compare xmm_zero, xmm_neg_zero, GT_OQ, false
    Compare xmm_neg_zero, xmm_zero, GT_OQ, false
    Compare xmm_nan, xmm_four, GT_OQ, false
    Compare xmm_four, xmm_nan, GT_OQ, false
    Compare xmm_nan, xmm_nan, GT_OQ, false

    Compare xmm_four, xmm_neg_four, TRUE_US, true
    Compare xmm_neg_four, xmm_four, TRUE_US, true
    Compare xmm_zero, xmm_neg_zero, TRUE_US, true
    Compare xmm_neg_zero, xmm_zero, TRUE_US, true
    Compare xmm_nan, xmm_four, TRUE_US, true
    Compare xmm_four, xmm_nan, TRUE_US, true
    Compare xmm_nan, xmm_nan, TRUE_US, true
%endmacro

%define temp xmm8
%define result xmm7
CompareAll xmm0, xmm1, xmm2, xmm3, xmm4
%define temp ymm8
%define result ymm7
CompareAll ymm0, ymm1, ymm2, ymm3, ymm4

; If ecx is not correct we can use the counter to determine which test failed
.fail:
hlt

align 32
.data:
dq -4.0, -4.0, -4.0, -4.0

dq -0.0, -0.0, -0.0, -0.0

dq 0.0, 0.0, 0.0, 0.0

dq 4.0, 4.0, 4.0, 4.0

.nan:
dq 0x7FF8000000000000
dq 0x7FF8000000000000
dq 0x7FF8000000000000
dq 0x7FF8000000000000

.false:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.true:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

================================================
FILE: unittests/ASM/VEX/vcmpps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmpps xmm2, xmm0, xmm1, 0x00 ; EQ
vcmpps xmm3, xmm0, xmm1, 0x01 ; LT
vcmpps xmm4, xmm0, xmm1, 0x02 ; LTE
vcmpps xmm5, xmm0, xmm1, 0x04 ; NEQ
vcmpps xmm6, xmm0, xmm1, 0x05 ; NLT
vcmpps xmm7, xmm0, xmm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
vcmpps xmm10, xmm8, xmm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
vcmpps xmm11, xmm8, xmm9, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x4000000040800000

dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x3F80000040000000
dq 0x40A000003F800000

dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x0000000000000000
dq 0x7FC000007FC00000

dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000


================================================
FILE: unittests/ASM/VEX/vcmpps_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFF00000000", "0x0000000000000000", "0xFFFFFFFF00000000"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM7":  ["0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000", "0x00000000FFFFFFFF"],
    "XMM10": ["0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF00000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM11": ["0x00000000FFFFFFFF", "0x0000000000000000", "0x00000000FFFFFFFF", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmpps ymm2, ymm0, ymm1, 0x00 ; EQ
vcmpps ymm3, ymm0, ymm1, 0x01 ; LT
vcmpps ymm4, ymm0, ymm1, 0x02 ; LTE
vcmpps ymm5, ymm0, ymm1, 0x04 ; NEQ
vcmpps ymm6, ymm0, ymm1, 0x05 ; NLT
vcmpps ymm7, ymm0, ymm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
vcmpps ymm10, ymm8, ymm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
vcmpps ymm11, ymm8, ymm9, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x3F80000040000000
dq 0x4000000040800000
dq 0x3F80000040000000
dq 0x4000000040800000

dq 0x3F80000040000000
dq 0x40A000003F800000
dq 0x3F80000040000000
dq 0x40A000003F800000

dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x0000000000000000
dq 0x7FC000007FC00000

dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000


================================================
FILE: unittests/ASM/VEX/vcmpps_full.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RCX": "448"
  }
}
%endif

%define true 1
%define false 0

%define EQ_OQ 0
%define LT_OS 1
%define LE_OS 2
%define UNORD_Q 3
%define NEQ_UQ 4
%define NLT_US 5
%define NLE_US 6
%define ORD_Q 7
%define EQ_UQ 8
%define NGE_US 9
%define NGT_US 10
%define FALSE_OQ 11
%define NEQ_OQ 12
%define GE_OS 13
%define GT_OS 14
%define TRUE_UQ 15
%define EQ_OS 16
%define LT_OQ 17
%define LE_OQ 18
%define UNORD_S 19
%define NEQ_US 20
%define NLT_UQ 21
%define NLE_UQ 22
%define ORD_S 23
%define EQ_US 24
%define NGE_UQ 25
%define NGT_UQ 26
%define FALSE_OS 27
%define NEQ_OS 28
%define GE_OQ 29
%define GT_OQ 30
%define TRUE_US 31

; Arguments: src1, src2, predicate, true/false
%macro Compare 4
    vcmpps result, %1, %2, %3
    ; Construct expected result
    vmovdqa temp, [rel .false + 32 * %4]
    ; Compare if result and expected are equal
    vxorps ymm6, ymm7, ymm8
    vptest ymm6, ymm6
    jnz .fail
    ; Increment counter of tests passed
    add ecx, 1
%endmacro

lea rax, [rel .data]
xor ecx, ecx

vmovdqa ymm0, [rax + 32 * 0] ; -4.0
vmovdqa ymm1, [rax + 32 * 1] ; -0.0
vmovdqa ymm2, [rax + 32 * 2] ; 0.0
vmovdqa ymm3, [rax + 32 * 3] ; 4.0
vmovdqa ymm4, [rel .nan]     ; NaN

%macro CompareAll 5
    %define xmm_neg_four %1
    %define xmm_neg_zero %2
    %define xmm_zero %3
    %define xmm_four %4
    %define xmm_nan %5

    Compare xmm_four, xmm_four, EQ_OQ, true
    Compare xmm_neg_four, xmm_four, EQ_OQ, false
    Compare xmm_zero, xmm_neg_zero, EQ_OQ, true
    Compare xmm_zero, xmm_zero, EQ_OQ, true
    Compare xmm_four, xmm_nan, EQ_OQ, false
    Compare xmm_nan, xmm_four, EQ_OQ, false
    Compare xmm_nan, xmm_nan, EQ_OQ, false

    Compare xmm_four, xmm_neg_four, LT_OS, false
    Compare xmm_neg_four, xmm_four, LT_OS, true
    Compare xmm_zero, xmm_neg_zero, LT_OS, false
    Compare xmm_neg_zero, xmm_zero, LT_OS, false
    Compare xmm_nan, xmm_four, LT_OS, false
    Compare xmm_four, xmm_nan, LT_OS, false
    Compare xmm_nan, xmm_nan, LT_OS, false

    Compare xmm_four, xmm_neg_four, LE_OS, false
    Compare xmm_neg_four, xmm_four, LE_OS, true
    Compare xmm_zero, xmm_neg_zero, LE_OS, true
    Compare xmm_neg_zero, xmm_zero, LE_OS, true
    Compare xmm_nan, xmm_four, LE_OS, false
    Compare xmm_four, xmm_nan, LE_OS, false
    Compare xmm_nan, xmm_nan, LE_OS, false

    Compare xmm_four, xmm_neg_four, UNORD_Q, false
    Compare xmm_neg_four, xmm_four, UNORD_Q, false
    Compare xmm_zero, xmm_neg_zero, UNORD_Q, false
    Compare xmm_neg_zero, xmm_zero, UNORD_Q, false
    Compare xmm_nan, xmm_four, UNORD_Q, true
    Compare xmm_four, xmm_nan, UNORD_Q, true
    Compare xmm_nan, xmm_nan, UNORD_Q, true

    Compare xmm_four, xmm_four, NEQ_UQ, false
    Compare xmm_four, xmm_neg_four, NEQ_UQ, true
    Compare xmm_zero, xmm_neg_zero, NEQ_UQ, false
    Compare xmm_zero, xmm_zero, NEQ_UQ, false
    Compare xmm_four, xmm_nan, NEQ_UQ, true
    Compare xmm_nan, xmm_four, NEQ_UQ, true
    Compare xmm_nan, xmm_nan, NEQ_UQ, true

    Compare xmm_four, xmm_neg_four, NLT_US, true
    Compare xmm_neg_four, xmm_four, NLT_US, false
    Compare xmm_zero, xmm_neg_zero, NLT_US, true
    Compare xmm_neg_zero, xmm_zero, NLT_US, true
    Compare xmm_nan, xmm_four, NLT_US, true
    Compare xmm_four, xmm_nan, NLT_US, true
    Compare xmm_nan, xmm_nan, NLT_US, true

    Compare xmm_four, xmm_neg_four, NLE_US, true
    Compare xmm_neg_four, xmm_four, NLE_US, false
    Compare xmm_zero, xmm_neg_zero, NLE_US, false
    Compare xmm_neg_zero, xmm_zero, NLE_US, false
    Compare xmm_nan, xmm_four, NLE_US, true
    Compare xmm_four, xmm_nan, NLE_US, true
    Compare xmm_nan, xmm_nan, NLE_US, true

    Compare xmm_four, xmm_neg_four, ORD_Q, true
    Compare xmm_neg_four, xmm_four, ORD_Q, true
    Compare xmm_zero, xmm_neg_zero, ORD_Q, true
    Compare xmm_neg_zero, xmm_zero, ORD_Q, true
    Compare xmm_nan, xmm_four, ORD_Q, false
    Compare xmm_four, xmm_nan, ORD_Q, false
    Compare xmm_nan, xmm_nan, ORD_Q, false

    Compare xmm_four, xmm_neg_four, EQ_UQ, false
    Compare xmm_four, xmm_four, EQ_UQ, true
    Compare xmm_zero, xmm_neg_zero, EQ_UQ, true
    Compare xmm_zero, xmm_zero, EQ_UQ, true
    Compare xmm_four, xmm_nan, EQ_UQ, true
    Compare xmm_nan, xmm_four, EQ_UQ, true
    Compare xmm_nan, xmm_nan, EQ_UQ, true

    Compare xmm_four, xmm_neg_four, NGE_US, false
    Compare xmm_neg_four, xmm_four, NGE_US, true
    Compare xmm_zero, xmm_neg_zero, NGE_US, false
    Compare xmm_neg_zero, xmm_zero, NGE_US, false
    Compare xmm_nan, xmm_four, NGE_US, true
    Compare xmm_four, xmm_nan, NGE_US, true
    Compare xmm_nan, xmm_nan, NGE_US, true

    Compare xmm_four, xmm_neg_four, NGT_US, false
    Compare xmm_neg_four, xmm_four, NGT_US, true
    Compare xmm_zero, xmm_neg_zero, NGT_US, true
    Compare xmm_neg_zero, xmm_zero, NGT_US, true
    Compare xmm_nan, xmm_four, NGT_US, true
    Compare xmm_four, xmm_nan, NGT_US, true
    Compare xmm_nan, xmm_nan, NGT_US, true

    Compare xmm_four, xmm_neg_four, FALSE_OQ, false
    Compare xmm_neg_four, xmm_four, FALSE_OQ, false
    Compare xmm_zero, xmm_neg_zero, FALSE_OQ, false
    Compare xmm_neg_zero, xmm_zero, FALSE_OQ, false
    Compare xmm_nan, xmm_four, FALSE_OQ, false
    Compare xmm_four, xmm_nan, FALSE_OQ, false
    Compare xmm_nan, xmm_nan, FALSE_OQ, false

    Compare xmm_four, xmm_neg_four, NEQ_OQ, true
    Compare xmm_four, xmm_four, NEQ_OQ, false
    Compare xmm_zero, xmm_neg_zero, NEQ_OQ, false
    Compare xmm_zero, xmm_zero, NEQ_OQ, false
    Compare xmm_four, xmm_nan, NEQ_OQ, false
    Compare xmm_nan, xmm_four, NEQ_OQ, false
    Compare xmm_nan, xmm_nan, NEQ_OQ, false

    Compare xmm_four, xmm_neg_four, GE_OS, true
    Compare xmm_neg_four, xmm_four, GE_OS, false
    Compare xmm_zero, xmm_neg_zero, GE_OS, true
    Compare xmm_neg_zero, xmm_zero, GE_OS, true
    Compare xmm_nan, xmm_four, GE_OS, false
    Compare xmm_four, xmm_nan, GE_OS, false
    Compare xmm_nan, xmm_nan, GE_OS, false

    Compare xmm_four, xmm_neg_four, GT_OS, true
    Compare xmm_neg_four, xmm_four, GT_OS, false
    Compare xmm_zero, xmm_neg_zero, GT_OS, false
    Compare xmm_neg_zero, xmm_zero, GT_OS, false
    Compare xmm_nan, xmm_four, GT_OS, false
    Compare xmm_four, xmm_nan, GT_OS, false
    Compare xmm_nan, xmm_nan, GT_OS, false

    Compare xmm_four, xmm_neg_four, TRUE_UQ, true
    Compare xmm_neg_four, xmm_four, TRUE_UQ, true
    Compare xmm_zero, xmm_neg_zero, TRUE_UQ, true
    Compare xmm_neg_zero, xmm_zero, TRUE_UQ, true
    Compare xmm_nan, xmm_four, TRUE_UQ, true
    Compare xmm_four, xmm_nan, TRUE_UQ, true
    Compare xmm_nan, xmm_nan, TRUE_UQ, true

    Compare xmm_four, xmm_neg_four, EQ_OS, false
    Compare xmm_neg_four, xmm_four, EQ_OS, false
    Compare xmm_zero, xmm_neg_zero, EQ_OS, true
    Compare xmm_neg_zero, xmm_zero, EQ_OS, true
    Compare xmm_nan, xmm_four, EQ_OS, false
    Compare xmm_four, xmm_nan, EQ_OS, false
    Compare xmm_nan, xmm_nan, EQ_OS, false

    Compare xmm_four, xmm_neg_four, LT_OQ, false
    Compare xmm_neg_four, xmm_four, LT_OQ, true
    Compare xmm_zero, xmm_neg_zero, LT_OQ, false
    Compare xmm_neg_zero, xmm_zero, LT_OQ, false
    Compare xmm_nan, xmm_four, LT_OQ, false
    Compare xmm_four, xmm_nan, LT_OQ, false
    Compare xmm_nan, xmm_nan, LT_OQ, false

    Compare xmm_four, xmm_neg_four, LE_OQ, false
    Compare xmm_neg_four, xmm_four, LE_OQ, true
    Compare xmm_zero, xmm_neg_zero, LE_OQ, true
    Compare xmm_neg_zero, xmm_zero, LE_OQ, true
    Compare xmm_nan, xmm_four, LE_OQ, false
    Compare xmm_four, xmm_nan, LE_OQ, false
    Compare xmm_nan, xmm_nan, LE_OQ, false

    Compare xmm_four, xmm_neg_four, UNORD_S, false
    Compare xmm_neg_four, xmm_four, UNORD_S, false
    Compare xmm_zero, xmm_neg_zero, UNORD_S, false
    Compare xmm_neg_zero, xmm_zero, UNORD_S, false
    Compare xmm_nan, xmm_four, UNORD_S, true
    Compare xmm_four, xmm_nan, UNORD_S, true
    Compare xmm_nan, xmm_nan, UNORD_S, true

    Compare xmm_four, xmm_neg_four, NEQ_US, true
    Compare xmm_neg_four, xmm_four, NEQ_US, true
    Compare xmm_zero, xmm_neg_zero, NEQ_US, false
    Compare xmm_neg_zero, xmm_zero, NEQ_US, false
    Compare xmm_nan, xmm_four, NEQ_US, true
    Compare xmm_four, xmm_nan, NEQ_US, true
    Compare xmm_nan, xmm_nan, NEQ_US, true

    Compare xmm_four, xmm_neg_four, NLT_UQ, true
    Compare xmm_neg_four, xmm_four, NLT_UQ, false
    Compare xmm_zero, xmm_neg_zero, NLT_UQ, true
    Compare xmm_neg_zero, xmm_zero, NLT_UQ, true
    Compare xmm_nan, xmm_four, NLT_UQ, true
    Compare xmm_four, xmm_nan, NLT_UQ, true
    Compare xmm_nan, xmm_nan, NLT_UQ, true

    Compare xmm_four, xmm_neg_four, NLE_UQ, true
    Compare xmm_neg_four, xmm_four, NLE_UQ, false
    Compare xmm_zero, xmm_neg_zero, NLE_UQ, false
    Compare xmm_neg_zero, xmm_zero, NLE_UQ, false
    Compare xmm_nan, xmm_four, NLE_UQ, true
    Compare xmm_four, xmm_nan, NLE_UQ, true
    Compare xmm_nan, xmm_nan, NLE_UQ, true

    Compare xmm_four, xmm_neg_four, ORD_S, true
    Compare xmm_neg_four, xmm_four, ORD_S, true
    Compare xmm_zero, xmm_neg_zero, ORD_S, true
    Compare xmm_neg_zero, xmm_zero, ORD_S, true
    Compare xmm_nan, xmm_four, ORD_S, false
    Compare xmm_four, xmm_nan, ORD_S, false
    Compare xmm_nan, xmm_nan, ORD_S, false

    Compare xmm_four, xmm_neg_four, EQ_US, false
    Compare xmm_four, xmm_four, EQ_US, true
    Compare xmm_zero, xmm_neg_zero, EQ_US, true
    Compare xmm_zero, xmm_zero, EQ_US, true
    Compare xmm_four, xmm_nan, EQ_US, true
    Compare xmm_nan, xmm_four, EQ_US, true
    Compare xmm_nan, xmm_nan, EQ_US, true

    Compare xmm_four, xmm_neg_four, NGE_UQ, false
    Compare xmm_neg_four, xmm_four, NGE_UQ, true
    Compare xmm_zero, xmm_neg_zero, NGE_UQ, false
    Compare xmm_neg_zero, xmm_zero, NGE_UQ, false
    Compare xmm_nan, xmm_four, NGE_UQ, true
    Compare xmm_four, xmm_nan, NGE_UQ, true
    Compare xmm_nan, xmm_nan, NGE_UQ, true

    Compare xmm_four, xmm_neg_four, NGT_UQ, false
    Compare xmm_neg_four, xmm_four, NGT_UQ, true
    Compare xmm_zero, xmm_neg_zero, NGT_UQ, true
    Compare xmm_neg_zero, xmm_zero, NGT_UQ, true
    Compare xmm_nan, xmm_four, NGT_UQ, true
    Compare xmm_four, xmm_nan, NGT_UQ, true
    Compare xmm_nan, xmm_nan, NGT_UQ, true

    Compare xmm_four, xmm_neg_four, FALSE_OS, false
    Compare xmm_neg_four, xmm_four, FALSE_OS, false
    Compare xmm_zero, xmm_neg_zero, FALSE_OS, false
    Compare xmm_neg_zero, xmm_zero, FALSE_OS, false
    Compare xmm_nan, xmm_four, FALSE_OS, false
    Compare xmm_four, xmm_nan, FALSE_OS, false
    Compare xmm_nan, xmm_nan, FALSE_OS, false

    Compare xmm_four, xmm_neg_four, NEQ_OS, true
    Compare xmm_four, xmm_four, NEQ_OS, false
    Compare xmm_zero, xmm_neg_zero, NEQ_OS, false
    Compare xmm_zero, xmm_zero, NEQ_OS, false
    Compare xmm_four, xmm_nan, NEQ_OS, false
    Compare xmm_nan, xmm_four, NEQ_OS, false
    Compare xmm_nan, xmm_nan, NEQ_OS, false

    Compare xmm_four, xmm_neg_four, GE_OQ, true
    Compare xmm_neg_four, xmm_four, GE_OQ, false
    Compare xmm_zero, xmm_neg_zero, GE_OQ, true
    Compare xmm_neg_zero, xmm_zero, GE_OQ, true
    Compare xmm_nan, xmm_four, GE_OQ, false
    Compare xmm_four, xmm_nan, GE_OQ, false
    Compare xmm_nan, xmm_nan, GE_OQ, false

    Compare xmm_four, xmm_neg_four, GT_OQ, true
    Compare xmm_neg_four, xmm_four, GT_OQ, false
    Compare xmm_zero, xmm_neg_zero, GT_OQ, false
    Compare xmm_neg_zero, xmm_zero, GT_OQ, false
    Compare xmm_nan, xmm_four, GT_OQ, false
    Compare xmm_four, xmm_nan, GT_OQ, false
    Compare xmm_nan, xmm_nan, GT_OQ, false

    Compare xmm_four, xmm_neg_four, TRUE_US, true
    Compare xmm_neg_four, xmm_four, TRUE_US, true
    Compare xmm_zero, xmm_neg_zero, TRUE_US, true
    Compare xmm_neg_zero, xmm_zero, TRUE_US, true
    Compare xmm_nan, xmm_four, TRUE_US, true
    Compare xmm_four, xmm_nan, TRUE_US, true
    Compare xmm_nan, xmm_nan, TRUE_US, true
%endmacro

%define temp xmm8
%define result xmm7
CompareAll xmm0, xmm1, xmm2, xmm3, xmm4
%define temp ymm8
%define result ymm7
CompareAll ymm0, ymm1, ymm2, ymm3, ymm4

; If ecx is not correct we can use the counter to determine which test failed
.fail:
hlt

align 32
.data:
dd -4.0, -4.0, -4.0, -4.0
dd -4.0, -4.0, -4.0, -4.0

dd -0.0, -0.0, -0.0, -0.0
dd -0.0, -0.0, -0.0, -0.0

dd 0.0, 0.0, 0.0, 0.0
dd 0.0, 0.0, 0.0, 0.0

dd 4.0, 4.0, 4.0, 4.0
dd 4.0, 4.0, 4.0, 4.0

.nan:
dq 0x7FC000007FC00000
dq 0x7FC000007FC00000
dq 0x7FC000007FC00000
dq 0x7FC000007FC00000

.false:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.true:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

================================================
FILE: unittests/ASM/VEX/vcmpsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFFFFFFFFFFFFFFFF", "0x7FF8000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000000000000000", "0x7FF8000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0xFFFFFFFFFFFFFFFF", "0x7FF8000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmpsd xmm2, xmm0, xmm1, 0x00 ; EQ
vcmpsd xmm3, xmm0, xmm1, 0x01 ; LT
vcmpsd xmm4, xmm0, xmm1, 0x02 ; LTE
vcmpsd xmm5, xmm0, xmm1, 0x04 ; NEQ
vcmpsd xmm6, xmm0, xmm1, 0x05 ; NLT
vcmpsd xmm7, xmm0, xmm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, nan] unord [nan, 0.0] = [1, 1]
vcmpsd xmm10, xmm8, xmm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [nan, 0.0] = [0, 0]
vcmpsd xmm11, xmm8, xmm9, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [nan, 0.0] ord [nan, 0.0] = [0, 1]
vcmpsd xmm12, xmm9, xmm8, 0x07 ; Ordered

; Ordered will return true when both inputs are NOT nan
; [0.0, nan] ord [0.0, nan] = [1, 0]
vcmpsd xmm13, xmm8, xmm8, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0x3FF0000000000000
dq 0x4008000000000000
dq 0x3FF0000000000000
dq 0x4008000000000000

dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000

dq 0x7FF8000000000000
dq 0x0000000000000000
dq 0x7FF8000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vcmpsd_full.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RCX": "224"
  }
}
%endif

%define true 1
%define false 0

%define EQ_OQ 0
%define LT_OS 1
%define LE_OS 2
%define UNORD_Q 3
%define NEQ_UQ 4
%define NLT_US 5
%define NLE_US 6
%define ORD_Q 7
%define EQ_UQ 8
%define NGE_US 9
%define NGT_US 10
%define FALSE_OQ 11
%define NEQ_OQ 12
%define GE_OS 13
%define GT_OS 14
%define TRUE_UQ 15
%define EQ_OS 16
%define LT_OQ 17
%define LE_OQ 18
%define UNORD_S 19
%define NEQ_US 20
%define NLT_UQ 21
%define NLE_UQ 22
%define ORD_S 23
%define EQ_US 24
%define NGE_UQ 25
%define NGT_UQ 26
%define FALSE_OS 27
%define NEQ_OS 28
%define GE_OQ 29
%define GT_OQ 30
%define TRUE_US 31

; Arguments: src1, src2, predicate, true/false
%macro Compare 4
    vcmpsd xmm7, %1, %2, %3
    ; Construct expected result
    ; Move truthy/falsey value to the first element based on %4
    ; and [127:64] from src1
    vmovdqa xmm8, [rel .false + 32 * %4]
    vmovsd xmm5, %1, xmm8
    ; Compare if result and expected are equal
    vxorpd ymm6, ymm7, ymm5
    vptest ymm6, ymm6
    jnz .fail
    ; Increment counter of tests passed
    add ecx, 1
%endmacro

lea rax, [rel .data]
xor ecx, ecx

vmovdqa ymm0, [rax + 32 * 0] ; -4.0
vmovdqa ymm1, [rax + 32 * 1] ; -0.0
vmovdqa ymm2, [rax + 32 * 2] ; 0.0
vmovdqa ymm3, [rax + 32 * 3] ; 4.0
vmovdqa ymm4, [rel .nan]     ; NaN

%define xmm_neg_four xmm0
%define xmm_neg_zero xmm1
%define xmm_zero xmm2
%define xmm_four xmm3
%define xmm_nan xmm4

Compare xmm_four, xmm_four, EQ_OQ, true
Compare xmm_neg_four, xmm_four, EQ_OQ, false
Compare xmm_zero, xmm_neg_zero, EQ_OQ, true
Compare xmm_zero, xmm_zero, EQ_OQ, true
Compare xmm_four, xmm_nan, EQ_OQ, false
Compare xmm_nan, xmm_four, EQ_OQ, false
Compare xmm_nan, xmm_nan, EQ_OQ, false

Compare xmm_four, xmm_neg_four, LT_OS, false
Compare xmm_neg_four, xmm_four, LT_OS, true
Compare xmm_zero, xmm_neg_zero, LT_OS, false
Compare xmm_neg_zero, xmm_zero, LT_OS, false
Compare xmm_nan, xmm_four, LT_OS, false
Compare xmm_four, xmm_nan, LT_OS, false
Compare xmm_nan, xmm_nan, LT_OS, false

Compare xmm_four, xmm_neg_four, LE_OS, false
Compare xmm_neg_four, xmm_four, LE_OS, true
Compare xmm_zero, xmm_neg_zero, LE_OS, true
Compare xmm_neg_zero, xmm_zero, LE_OS, true
Compare xmm_nan, xmm_four, LE_OS, false
Compare xmm_four, xmm_nan, LE_OS, false
Compare xmm_nan, xmm_nan, LE_OS, false

Compare xmm_four, xmm_neg_four, UNORD_Q, false
Compare xmm_neg_four, xmm_four, UNORD_Q, false
Compare xmm_zero, xmm_neg_zero, UNORD_Q, false
Compare xmm_neg_zero, xmm_zero, UNORD_Q, false
Compare xmm_nan, xmm_four, UNORD_Q, true
Compare xmm_four, xmm_nan, UNORD_Q, true
Compare xmm_nan, xmm_nan, UNORD_Q, true

Compare xmm_four, xmm_four, NEQ_UQ, false
Compare xmm_four, xmm_neg_four, NEQ_UQ, true
Compare xmm_zero, xmm_neg_zero, NEQ_UQ, false
Compare xmm_zero, xmm_zero, NEQ_UQ, false
Compare xmm_four, xmm_nan, NEQ_UQ, true
Compare xmm_nan, xmm_four, NEQ_UQ, true
Compare xmm_nan, xmm_nan, NEQ_UQ, true

Compare xmm_four, xmm_neg_four, NLT_US, true
Compare xmm_neg_four, xmm_four, NLT_US, false
Compare xmm_zero, xmm_neg_zero, NLT_US, true
Compare xmm_neg_zero, xmm_zero, NLT_US, true
Compare xmm_nan, xmm_four, NLT_US, true
Compare xmm_four, xmm_nan, NLT_US, true
Compare xmm_nan, xmm_nan, NLT_US, true

Compare xmm_four, xmm_neg_four, NLE_US, true
Compare xmm_neg_four, xmm_four, NLE_US, false
Compare xmm_zero, xmm_neg_zero, NLE_US, false
Compare xmm_neg_zero, xmm_zero, NLE_US, false
Compare xmm_nan, xmm_four, NLE_US, true
Compare xmm_four, xmm_nan, NLE_US, true
Compare xmm_nan, xmm_nan, NLE_US, true

Compare xmm_four, xmm_neg_four, ORD_Q, true
Compare xmm_neg_four, xmm_four, ORD_Q, true
Compare xmm_zero, xmm_neg_zero, ORD_Q, true
Compare xmm_neg_zero, xmm_zero, ORD_Q, true
Compare xmm_nan, xmm_four, ORD_Q, false
Compare xmm_four, xmm_nan, ORD_Q, false
Compare xmm_nan, xmm_nan, ORD_Q, false

Compare xmm_four, xmm_neg_four, EQ_UQ, false
Compare xmm_four, xmm_four, EQ_UQ, true
Compare xmm_zero, xmm_neg_zero, EQ_UQ, true
Compare xmm_zero, xmm_zero, EQ_UQ, true
Compare xmm_four, xmm_nan, EQ_UQ, true
Compare xmm_nan, xmm_four, EQ_UQ, true
Compare xmm_nan, xmm_nan, EQ_UQ, true

Compare xmm_four, xmm_neg_four, NGE_US, false
Compare xmm_neg_four, xmm_four, NGE_US, true
Compare xmm_zero, xmm_neg_zero, NGE_US, false
Compare xmm_neg_zero, xmm_zero, NGE_US, false
Compare xmm_nan, xmm_four, NGE_US, true
Compare xmm_four, xmm_nan, NGE_US, true
Compare xmm_nan, xmm_nan, NGE_US, true

Compare xmm_four, xmm_neg_four, NGT_US, false
Compare xmm_neg_four, xmm_four, NGT_US, true
Compare xmm_zero, xmm_neg_zero, NGT_US, true
Compare xmm_neg_zero, xmm_zero, NGT_US, true
Compare xmm_nan, xmm_four, NGT_US, true
Compare xmm_four, xmm_nan, NGT_US, true
Compare xmm_nan, xmm_nan, NGT_US, true

Compare xmm_four, xmm_neg_four, FALSE_OQ, false
Compare xmm_neg_four, xmm_four, FALSE_OQ, false
Compare xmm_zero, xmm_neg_zero, FALSE_OQ, false
Compare xmm_neg_zero, xmm_zero, FALSE_OQ, false
Compare xmm_nan, xmm_four, FALSE_OQ, false
Compare xmm_four, xmm_nan, FALSE_OQ, false
Compare xmm_nan, xmm_nan, FALSE_OQ, false

Compare xmm_four, xmm_neg_four, NEQ_OQ, true
Compare xmm_four, xmm_four, NEQ_OQ, false
Compare xmm_zero, xmm_neg_zero, NEQ_OQ, false
Compare xmm_zero, xmm_zero, NEQ_OQ, false
Compare xmm_four, xmm_nan, NEQ_OQ, false
Compare xmm_nan, xmm_four, NEQ_OQ, false
Compare xmm_nan, xmm_nan, NEQ_OQ, false

Compare xmm_four, xmm_neg_four, GE_OS, true
Compare xmm_neg_four, xmm_four, GE_OS, false
Compare xmm_zero, xmm_neg_zero, GE_OS, true
Compare xmm_neg_zero, xmm_zero, GE_OS, true
Compare xmm_nan, xmm_four, GE_OS, false
Compare xmm_four, xmm_nan, GE_OS, false
Compare xmm_nan, xmm_nan, GE_OS, false

Compare xmm_four, xmm_neg_four, GT_OS, true
Compare xmm_neg_four, xmm_four, GT_OS, false
Compare xmm_zero, xmm_neg_zero, GT_OS, false
Compare xmm_neg_zero, xmm_zero, GT_OS, false
Compare xmm_nan, xmm_four, GT_OS, false
Compare xmm_four, xmm_nan, GT_OS, false
Compare xmm_nan, xmm_nan, GT_OS, false

Compare xmm_four, xmm_neg_four, TRUE_UQ, true
Compare xmm_neg_four, xmm_four, TRUE_UQ, true
Compare xmm_zero, xmm_neg_zero, TRUE_UQ, true
Compare xmm_neg_zero, xmm_zero, TRUE_UQ, true
Compare xmm_nan, xmm_four, TRUE_UQ, true
Compare xmm_four, xmm_nan, TRUE_UQ, true
Compare xmm_nan, xmm_nan, TRUE_UQ, true

Compare xmm_four, xmm_neg_four, EQ_OS, false
Compare xmm_neg_four, xmm_four, EQ_OS, false
Compare xmm_zero, xmm_neg_zero, EQ_OS, true
Compare xmm_neg_zero, xmm_zero, EQ_OS, true
Compare xmm_nan, xmm_four, EQ_OS, false
Compare xmm_four, xmm_nan, EQ_OS, false
Compare xmm_nan, xmm_nan, EQ_OS, false

Compare xmm_four, xmm_neg_four, LT_OQ, false
Compare xmm_neg_four, xmm_four, LT_OQ, true
Compare xmm_zero, xmm_neg_zero, LT_OQ, false
Compare xmm_neg_zero, xmm_zero, LT_OQ, false
Compare xmm_nan, xmm_four, LT_OQ, false
Compare xmm_four, xmm_nan, LT_OQ, false
Compare xmm_nan, xmm_nan, LT_OQ, false

Compare xmm_four, xmm_neg_four, LE_OQ, false
Compare xmm_neg_four, xmm_four, LE_OQ, true
Compare xmm_zero, xmm_neg_zero, LE_OQ, true
Compare xmm_neg_zero, xmm_zero, LE_OQ, true
Compare xmm_nan, xmm_four, LE_OQ, false
Compare xmm_four, xmm_nan, LE_OQ, false
Compare xmm_nan, xmm_nan, LE_OQ, false

Compare xmm_four, xmm_neg_four, UNORD_S, false
Compare xmm_neg_four, xmm_four, UNORD_S, false
Compare xmm_zero, xmm_neg_zero, UNORD_S, false
Compare xmm_neg_zero, xmm_zero, UNORD_S, false
Compare xmm_nan, xmm_four, UNORD_S, true
Compare xmm_four, xmm_nan, UNORD_S, true
Compare xmm_nan, xmm_nan, UNORD_S, true

Compare xmm_four, xmm_neg_four, NEQ_US, true
Compare xmm_neg_four, xmm_four, NEQ_US, true
Compare xmm_zero, xmm_neg_zero, NEQ_US, false
Compare xmm_neg_zero, xmm_zero, NEQ_US, false
Compare xmm_nan, xmm_four, NEQ_US, true
Compare xmm_four, xmm_nan, NEQ_US, true
Compare xmm_nan, xmm_nan, NEQ_US, true

Compare xmm_four, xmm_neg_four, NLT_UQ, true
Compare xmm_neg_four, xmm_four, NLT_UQ, false
Compare xmm_zero, xmm_neg_zero, NLT_UQ, true
Compare xmm_neg_zero, xmm_zero, NLT_UQ, true
Compare xmm_nan, xmm_four, NLT_UQ, true
Compare xmm_four, xmm_nan, NLT_UQ, true
Compare xmm_nan, xmm_nan, NLT_UQ, true

Compare xmm_four, xmm_neg_four, NLE_UQ, true
Compare xmm_neg_four, xmm_four, NLE_UQ, false
Compare xmm_zero, xmm_neg_zero, NLE_UQ, false
Compare xmm_neg_zero, xmm_zero, NLE_UQ, false
Compare xmm_nan, xmm_four, NLE_UQ, true
Compare xmm_four, xmm_nan, NLE_UQ, true
Compare xmm_nan, xmm_nan, NLE_UQ, true

Compare xmm_four, xmm_neg_four, ORD_S, true
Compare xmm_neg_four, xmm_four, ORD_S, true
Compare xmm_zero, xmm_neg_zero, ORD_S, true
Compare xmm_neg_zero, xmm_zero, ORD_S, true
Compare xmm_nan, xmm_four, ORD_S, false
Compare xmm_four, xmm_nan, ORD_S, false
Compare xmm_nan, xmm_nan, ORD_S, false

Compare xmm_four, xmm_neg_four, EQ_US, false
Compare xmm_four, xmm_four, EQ_US, true
Compare xmm_zero, xmm_neg_zero, EQ_US, true
Compare xmm_zero, xmm_zero, EQ_US, true
Compare xmm_four, xmm_nan, EQ_US, true
Compare xmm_nan, xmm_four, EQ_US, true
Compare xmm_nan, xmm_nan, EQ_US, true

Compare xmm_four, xmm_neg_four, NGE_UQ, false
Compare xmm_neg_four, xmm_four, NGE_UQ, true
Compare xmm_zero, xmm_neg_zero, NGE_UQ, false
Compare xmm_neg_zero, xmm_zero, NGE_UQ, false
Compare xmm_nan, xmm_four, NGE_UQ, true
Compare xmm_four, xmm_nan, NGE_UQ, true
Compare xmm_nan, xmm_nan, NGE_UQ, true

Compare xmm_four, xmm_neg_four, NGT_UQ, false
Compare xmm_neg_four, xmm_four, NGT_UQ, true
Compare xmm_zero, xmm_neg_zero, NGT_UQ, true
Compare xmm_neg_zero, xmm_zero, NGT_UQ, true
Compare xmm_nan, xmm_four, NGT_UQ, true
Compare xmm_four, xmm_nan, NGT_UQ, true
Compare xmm_nan, xmm_nan, NGT_UQ, true

Compare xmm_four, xmm_neg_four, FALSE_OS, false
Compare xmm_neg_four, xmm_four, FALSE_OS, false
Compare xmm_zero, xmm_neg_zero, FALSE_OS, false
Compare xmm_neg_zero, xmm_zero, FALSE_OS, false
Compare xmm_nan, xmm_four, FALSE_OS, false
Compare xmm_four, xmm_nan, FALSE_OS, false
Compare xmm_nan, xmm_nan, FALSE_OS, false

Compare xmm_four, xmm_neg_four, NEQ_OS, true
Compare xmm_four, xmm_four, NEQ_OS, false
Compare xmm_zero, xmm_neg_zero, NEQ_OS, false
Compare xmm_zero, xmm_zero, NEQ_OS, false
Compare xmm_four, xmm_nan, NEQ_OS, false
Compare xmm_nan, xmm_four, NEQ_OS, false
Compare xmm_nan, xmm_nan, NEQ_OS, false

Compare xmm_four, xmm_neg_four, GE_OQ, true
Compare xmm_neg_four, xmm_four, GE_OQ, false
Compare xmm_zero, xmm_neg_zero, GE_OQ, true
Compare xmm_neg_zero, xmm_zero, GE_OQ, true
Compare xmm_nan, xmm_four, GE_OQ, false
Compare xmm_four, xmm_nan, GE_OQ, false
Compare xmm_nan, xmm_nan, GE_OQ, false

Compare xmm_four, xmm_neg_four, GT_OQ, true
Compare xmm_neg_four, xmm_four, GT_OQ, false
Compare xmm_zero, xmm_neg_zero, GT_OQ, false
Compare xmm_neg_zero, xmm_zero, GT_OQ, false
Compare xmm_nan, xmm_four, GT_OQ, false
Compare xmm_four, xmm_nan, GT_OQ, false
Compare xmm_nan, xmm_nan, GT_OQ, false

Compare xmm_four, xmm_neg_four, TRUE_US, true
Compare xmm_neg_four, xmm_four, TRUE_US, true
Compare xmm_zero, xmm_neg_zero, TRUE_US, true
Compare xmm_neg_zero, xmm_zero, TRUE_US, true
Compare xmm_nan, xmm_four, TRUE_US, true
Compare xmm_four, xmm_nan, TRUE_US, true
Compare xmm_nan, xmm_nan, TRUE_US, true

; If ecx is not correct we can use the counter to determine which test failed
.fail:
hlt

align 32
.data:
dq -4.0
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dq -0.0
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dq 0.0
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dq 4.0
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

.nan:
dq 0x7FF8000000000000
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

.false:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.true:
dq 0xFFFFFFFFFFFFFFFF
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

================================================
FILE: unittests/ASM/VEX/vcmpss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x51525354FFFFFFFF", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x5152535400000000", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x51525354FFFFFFFF", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x5152535400000000", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x51525354FFFFFFFF", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x5152535400000000", "0x5152535440000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000000000000", "0x7FC000007FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x00000000FFFFFFFF", "0x7FC000007FC00000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vcmpss xmm2, xmm0, xmm1, 0x00 ; EQ
vcmpss xmm3, xmm0, xmm1, 0x01 ; LT
vcmpss xmm4, xmm0, xmm1, 0x02 ; LTE
vcmpss xmm5, xmm0, xmm1, 0x04 ; NEQ
vcmpss xmm6, xmm0, xmm1, 0x05 ; NLT
vcmpss xmm7, xmm0, xmm1, 0x06 ; NLTE

; Unordered and Ordered tests need to be special cased
vmovapd ymm8, [rdx + 32 * 2]
vmovapd ymm9, [rdx + 32 * 3]

; Unordered will return true when either input is nan
; [0.0, 0.0, nan, nan] unord [0.0, nan, 0.0, nan] = [0, 1, 1, 1]
vcmpss xmm10, xmm8, xmm9, 0x03 ; Unordered

; Ordered will return true when both inputs are NOT nan
; [0.0, 0.0, nan, nan] ord [0.0, nan, 0.0, nan] = [1, 0, 0, 0]
vcmpss xmm11, xmm8, xmm9, 0x07 ; Ordered

hlt

align 32
.data:
dq 0x515253543F800000
dq 0x5152535440000000
dq 0x515253543F800000
dq 0x5152535440000000

dq 0x515253543F800000
dq 0x5152535440800000
dq 0x515253543F800000
dq 0x5152535440800000

dq 0x0000000000000000
dq 0x7FC000007FC00000
dq 0x0000000000000000
dq 0x7FC000007FC00000

dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000
dq 0x7FC0000000000000


================================================
FILE: unittests/ASM/VEX/vcmpss_full.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RCX": "224"
  }
}
%endif

%define true 1
%define false 0

%define EQ_OQ 0
%define LT_OS 1
%define LE_OS 2
%define UNORD_Q 3
%define NEQ_UQ 4
%define NLT_US 5
%define NLE_US 6
%define ORD_Q 7
%define EQ_UQ 8
%define NGE_US 9
%define NGT_US 10
%define FALSE_OQ 11
%define NEQ_OQ 12
%define GE_OS 13
%define GT_OS 14
%define TRUE_UQ 15
%define EQ_OS 16
%define LT_OQ 17
%define LE_OQ 18
%define UNORD_S 19
%define NEQ_US 20
%define NLT_UQ 21
%define NLE_UQ 22
%define ORD_S 23
%define EQ_US 24
%define NGE_UQ 25
%define NGT_UQ 26
%define FALSE_OS 27
%define NEQ_OS 28
%define GE_OQ 29
%define GT_OQ 30
%define TRUE_US 31

; Arguments: src1, src2, predicate, true/false
%macro Compare 4
    vcmpss xmm7, %1, %2, %3
    ; Construct expected result
    ; Move truthy/falsey value to the first element based on %4
    ; and [127:32] from src1
    vmovdqa xmm8, [rel .false + 32 * %4]
    vmovss xmm5, %1, xmm8
    ; Compare if result and expected are equal
    vxorps ymm6, ymm7, ymm5
    vptest ymm6, ymm6
    jnz .fail
    ; Increment counter of tests passed
    add ecx, 1
%endmacro

lea rax, [rel .data]
xor ecx, ecx

vmovdqa ymm0, [rax + 32 * 0] ; -4.0
vmovdqa ymm1, [rax + 32 * 1] ; -0.0
vmovdqa ymm2, [rax + 32 * 2] ; 0.0
vmovdqa ymm3, [rax + 32 * 3] ; 4.0
vmovdqa ymm4, [rel .nan]     ; NaN

%define xmm_neg_four xmm0
%define xmm_neg_zero xmm1
%define xmm_zero xmm2
%define xmm_four xmm3
%define xmm_nan xmm4

Compare xmm_four, xmm_four, EQ_OQ, true
Compare xmm_neg_four, xmm_four, EQ_OQ, false
Compare xmm_zero, xmm_neg_zero, EQ_OQ, true
Compare xmm_zero, xmm_zero, EQ_OQ, true
Compare xmm_four, xmm_nan, EQ_OQ, false
Compare xmm_nan, xmm_four, EQ_OQ, false
Compare xmm_nan, xmm_nan, EQ_OQ, false

Compare xmm_four, xmm_neg_four, LT_OS, false
Compare xmm_neg_four, xmm_four, LT_OS, true
Compare xmm_zero, xmm_neg_zero, LT_OS, false
Compare xmm_neg_zero, xmm_zero, LT_OS, false
Compare xmm_nan, xmm_four, LT_OS, false
Compare xmm_four, xmm_nan, LT_OS, false
Compare xmm_nan, xmm_nan, LT_OS, false

Compare xmm_four, xmm_neg_four, LE_OS, false
Compare xmm_neg_four, xmm_four, LE_OS, true
Compare xmm_zero, xmm_neg_zero, LE_OS, true
Compare xmm_neg_zero, xmm_zero, LE_OS, true
Compare xmm_nan, xmm_four, LE_OS, false
Compare xmm_four, xmm_nan, LE_OS, false
Compare xmm_nan, xmm_nan, LE_OS, false

Compare xmm_four, xmm_neg_four, UNORD_Q, false
Compare xmm_neg_four, xmm_four, UNORD_Q, false
Compare xmm_zero, xmm_neg_zero, UNORD_Q, false
Compare xmm_neg_zero, xmm_zero, UNORD_Q, false
Compare xmm_nan, xmm_four, UNORD_Q, true
Compare xmm_four, xmm_nan, UNORD_Q, true
Compare xmm_nan, xmm_nan, UNORD_Q, true

Compare xmm_four, xmm_four, NEQ_UQ, false
Compare xmm_four, xmm_neg_four, NEQ_UQ, true
Compare xmm_zero, xmm_neg_zero, NEQ_UQ, false
Compare xmm_zero, xmm_zero, NEQ_UQ, false
Compare xmm_four, xmm_nan, NEQ_UQ, true
Compare xmm_nan, xmm_four, NEQ_UQ, true
Compare xmm_nan, xmm_nan, NEQ_UQ, true

Compare xmm_four, xmm_neg_four, NLT_US, true
Compare xmm_neg_four, xmm_four, NLT_US, false
Compare xmm_zero, xmm_neg_zero, NLT_US, true
Compare xmm_neg_zero, xmm_zero, NLT_US, true
Compare xmm_nan, xmm_four, NLT_US, true
Compare xmm_four, xmm_nan, NLT_US, true
Compare xmm_nan, xmm_nan, NLT_US, true

Compare xmm_four, xmm_neg_four, NLE_US, true
Compare xmm_neg_four, xmm_four, NLE_US, false
Compare xmm_zero, xmm_neg_zero, NLE_US, false
Compare xmm_neg_zero, xmm_zero, NLE_US, false
Compare xmm_nan, xmm_four, NLE_US, true
Compare xmm_four, xmm_nan, NLE_US, true
Compare xmm_nan, xmm_nan, NLE_US, true

Compare xmm_four, xmm_neg_four, ORD_Q, true
Compare xmm_neg_four, xmm_four, ORD_Q, true
Compare xmm_zero, xmm_neg_zero, ORD_Q, true
Compare xmm_neg_zero, xmm_zero, ORD_Q, true
Compare xmm_nan, xmm_four, ORD_Q, false
Compare xmm_four, xmm_nan, ORD_Q, false
Compare xmm_nan, xmm_nan, ORD_Q, false

Compare xmm_four, xmm_neg_four, EQ_UQ, false
Compare xmm_four, xmm_four, EQ_UQ, true
Compare xmm_zero, xmm_neg_zero, EQ_UQ, true
Compare xmm_zero, xmm_zero, EQ_UQ, true
Compare xmm_four, xmm_nan, EQ_UQ, true
Compare xmm_nan, xmm_four, EQ_UQ, true
Compare xmm_nan, xmm_nan, EQ_UQ, true

Compare xmm_four, xmm_neg_four, NGE_US, false
Compare xmm_neg_four, xmm_four, NGE_US, true
Compare xmm_zero, xmm_neg_zero, NGE_US, false
Compare xmm_neg_zero, xmm_zero, NGE_US, false
Compare xmm_nan, xmm_four, NGE_US, true
Compare xmm_four, xmm_nan, NGE_US, true
Compare xmm_nan, xmm_nan, NGE_US, true

Compare xmm_four, xmm_neg_four, NGT_US, false
Compare xmm_neg_four, xmm_four, NGT_US, true
Compare xmm_zero, xmm_neg_zero, NGT_US, true
Compare xmm_neg_zero, xmm_zero, NGT_US, true
Compare xmm_nan, xmm_four, NGT_US, true
Compare xmm_four, xmm_nan, NGT_US, true
Compare xmm_nan, xmm_nan, NGT_US, true

Compare xmm_four, xmm_neg_four, FALSE_OQ, false
Compare xmm_neg_four, xmm_four, FALSE_OQ, false
Compare xmm_zero, xmm_neg_zero, FALSE_OQ, false
Compare xmm_neg_zero, xmm_zero, FALSE_OQ, false
Compare xmm_nan, xmm_four, FALSE_OQ, false
Compare xmm_four, xmm_nan, FALSE_OQ, false
Compare xmm_nan, xmm_nan, FALSE_OQ, false

Compare xmm_four, xmm_neg_four, NEQ_OQ, true
Compare xmm_four, xmm_four, NEQ_OQ, false
Compare xmm_zero, xmm_neg_zero, NEQ_OQ, false
Compare xmm_zero, xmm_zero, NEQ_OQ, false
Compare xmm_four, xmm_nan, NEQ_OQ, false
Compare xmm_nan, xmm_four, NEQ_OQ, false
Compare xmm_nan, xmm_nan, NEQ_OQ, false

Compare xmm_four, xmm_neg_four, GE_OS, true
Compare xmm_neg_four, xmm_four, GE_OS, false
Compare xmm_zero, xmm_neg_zero, GE_OS, true
Compare xmm_neg_zero, xmm_zero, GE_OS, true
Compare xmm_nan, xmm_four, GE_OS, false
Compare xmm_four, xmm_nan, GE_OS, false
Compare xmm_nan, xmm_nan, GE_OS, false

Compare xmm_four, xmm_neg_four, GT_OS, true
Compare xmm_neg_four, xmm_four, GT_OS, false
Compare xmm_zero, xmm_neg_zero, GT_OS, false
Compare xmm_neg_zero, xmm_zero, GT_OS, false
Compare xmm_nan, xmm_four, GT_OS, false
Compare xmm_four, xmm_nan, GT_OS, false
Compare xmm_nan, xmm_nan, GT_OS, false

Compare xmm_four, xmm_neg_four, TRUE_UQ, true
Compare xmm_neg_four, xmm_four, TRUE_UQ, true
Compare xmm_zero, xmm_neg_zero, TRUE_UQ, true
Compare xmm_neg_zero, xmm_zero, TRUE_UQ, true
Compare xmm_nan, xmm_four, TRUE_UQ, true
Compare xmm_four, xmm_nan, TRUE_UQ, true
Compare xmm_nan, xmm_nan, TRUE_UQ, true

Compare xmm_four, xmm_neg_four, EQ_OS, false
Compare xmm_neg_four, xmm_four, EQ_OS, false
Compare xmm_zero, xmm_neg_zero, EQ_OS, true
Compare xmm_neg_zero, xmm_zero, EQ_OS, true
Compare xmm_nan, xmm_four, EQ_OS, false
Compare xmm_four, xmm_nan, EQ_OS, false
Compare xmm_nan, xmm_nan, EQ_OS, false

Compare xmm_four, xmm_neg_four, LT_OQ, false
Compare xmm_neg_four, xmm_four, LT_OQ, true
Compare xmm_zero, xmm_neg_zero, LT_OQ, false
Compare xmm_neg_zero, xmm_zero, LT_OQ, false
Compare xmm_nan, xmm_four, LT_OQ, false
Compare xmm_four, xmm_nan, LT_OQ, false
Compare xmm_nan, xmm_nan, LT_OQ, false

Compare xmm_four, xmm_neg_four, LE_OQ, false
Compare xmm_neg_four, xmm_four, LE_OQ, true
Compare xmm_zero, xmm_neg_zero, LE_OQ, true
Compare xmm_neg_zero, xmm_zero, LE_OQ, true
Compare xmm_nan, xmm_four, LE_OQ, false
Compare xmm_four, xmm_nan, LE_OQ, false
Compare xmm_nan, xmm_nan, LE_OQ, false

Compare xmm_four, xmm_neg_four, UNORD_S, false
Compare xmm_neg_four, xmm_four, UNORD_S, false
Compare xmm_zero, xmm_neg_zero, UNORD_S, false
Compare xmm_neg_zero, xmm_zero, UNORD_S, false
Compare xmm_nan, xmm_four, UNORD_S, true
Compare xmm_four, xmm_nan, UNORD_S, true
Compare xmm_nan, xmm_nan, UNORD_S, true

Compare xmm_four, xmm_neg_four, NEQ_US, true
Compare xmm_neg_four, xmm_four, NEQ_US, true
Compare xmm_zero, xmm_neg_zero, NEQ_US, false
Compare xmm_neg_zero, xmm_zero, NEQ_US, false
Compare xmm_nan, xmm_four, NEQ_US, true
Compare xmm_four, xmm_nan, NEQ_US, true
Compare xmm_nan, xmm_nan, NEQ_US, true

Compare xmm_four, xmm_neg_four, NLT_UQ, true
Compare xmm_neg_four, xmm_four, NLT_UQ, false
Compare xmm_zero, xmm_neg_zero, NLT_UQ, true
Compare xmm_neg_zero, xmm_zero, NLT_UQ, true
Compare xmm_nan, xmm_four, NLT_UQ, true
Compare xmm_four, xmm_nan, NLT_UQ, true
Compare xmm_nan, xmm_nan, NLT_UQ, true

Compare xmm_four, xmm_neg_four, NLE_UQ, true
Compare xmm_neg_four, xmm_four, NLE_UQ, false
Compare xmm_zero, xmm_neg_zero, NLE_UQ, false
Compare xmm_neg_zero, xmm_zero, NLE_UQ, false
Compare xmm_nan, xmm_four, NLE_UQ, true
Compare xmm_four, xmm_nan, NLE_UQ, true
Compare xmm_nan, xmm_nan, NLE_UQ, true

Compare xmm_four, xmm_neg_four, ORD_S, true
Compare xmm_neg_four, xmm_four, ORD_S, true
Compare xmm_zero, xmm_neg_zero, ORD_S, true
Compare xmm_neg_zero, xmm_zero, ORD_S, true
Compare xmm_nan, xmm_four, ORD_S, false
Compare xmm_four, xmm_nan, ORD_S, false
Compare xmm_nan, xmm_nan, ORD_S, false

Compare xmm_four, xmm_neg_four, EQ_US, false
Compare xmm_four, xmm_four, EQ_US, true
Compare xmm_zero, xmm_neg_zero, EQ_US, true
Compare xmm_zero, xmm_zero, EQ_US, true
Compare xmm_four, xmm_nan, EQ_US, true
Compare xmm_nan, xmm_four, EQ_US, true
Compare xmm_nan, xmm_nan, EQ_US, true

Compare xmm_four, xmm_neg_four, NGE_UQ, false
Compare xmm_neg_four, xmm_four, NGE_UQ, true
Compare xmm_zero, xmm_neg_zero, NGE_UQ, false
Compare xmm_neg_zero, xmm_zero, NGE_UQ, false
Compare xmm_nan, xmm_four, NGE_UQ, true
Compare xmm_four, xmm_nan, NGE_UQ, true
Compare xmm_nan, xmm_nan, NGE_UQ, true

Compare xmm_four, xmm_neg_four, NGT_UQ, false
Compare xmm_neg_four, xmm_four, NGT_UQ, true
Compare xmm_zero, xmm_neg_zero, NGT_UQ, true
Compare xmm_neg_zero, xmm_zero, NGT_UQ, true
Compare xmm_nan, xmm_four, NGT_UQ, true
Compare xmm_four, xmm_nan, NGT_UQ, true
Compare xmm_nan, xmm_nan, NGT_UQ, true

Compare xmm_four, xmm_neg_four, FALSE_OS, false
Compare xmm_neg_four, xmm_four, FALSE_OS, false
Compare xmm_zero, xmm_neg_zero, FALSE_OS, false
Compare xmm_neg_zero, xmm_zero, FALSE_OS, false
Compare xmm_nan, xmm_four, FALSE_OS, false
Compare xmm_four, xmm_nan, FALSE_OS, false
Compare xmm_nan, xmm_nan, FALSE_OS, false

Compare xmm_four, xmm_neg_four, NEQ_OS, true
Compare xmm_four, xmm_four, NEQ_OS, false
Compare xmm_zero, xmm_neg_zero, NEQ_OS, false
Compare xmm_zero, xmm_zero, NEQ_OS, false
Compare xmm_four, xmm_nan, NEQ_OS, false
Compare xmm_nan, xmm_four, NEQ_OS, false
Compare xmm_nan, xmm_nan, NEQ_OS, false

Compare xmm_four, xmm_neg_four, GE_OQ, true
Compare xmm_neg_four, xmm_four, GE_OQ, false
Compare xmm_zero, xmm_neg_zero, GE_OQ, true
Compare xmm_neg_zero, xmm_zero, GE_OQ, true
Compare xmm_nan, xmm_four, GE_OQ, false
Compare xmm_four, xmm_nan, GE_OQ, false
Compare xmm_nan, xmm_nan, GE_OQ, false

Compare xmm_four, xmm_neg_four, GT_OQ, true
Compare xmm_neg_four, xmm_four, GT_OQ, false
Compare xmm_zero, xmm_neg_zero, GT_OQ, false
Compare xmm_neg_zero, xmm_zero, GT_OQ, false
Compare xmm_nan, xmm_four, GT_OQ, false
Compare xmm_four, xmm_nan, GT_OQ, false
Compare xmm_nan, xmm_nan, GT_OQ, false

Compare xmm_four, xmm_neg_four, TRUE_US, true
Compare xmm_neg_four, xmm_four, TRUE_US, true
Compare xmm_zero, xmm_neg_zero, TRUE_US, true
Compare xmm_neg_zero, xmm_zero, TRUE_US, true
Compare xmm_nan, xmm_four, TRUE_US, true
Compare xmm_four, xmm_nan, TRUE_US, true
Compare xmm_nan, xmm_nan, TRUE_US, true

; If ecx is not correct we can use the counter to determine which test failed
.fail:
hlt

align 32
.data:
dd -4.0
dd 0xCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dd -0.0
dd 0xCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dd 0.0
dd 0xCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

dd 4.0
dd 0xCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

.nan:
dq 0xCCCCCCCC7FC00000
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC
dq 0xCCCCCCCCCCCCCCCC

.false:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.true:
dq 0x00000000FFFFFFFF
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

================================================
FILE: unittests/ASM/VEX/vcomisd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vcomisd xmm0, [rdx + 16 * 1] ; 1.0 <comp> 4.0
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

vcomisd xmm0, [rdx + 16 * 2] ; 1.0 <comp> NaN
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt

align 16
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0x4010000000000000
dq 0x4010000000000000

dq 0x7FF8000000000000
dq 0x4010000000000000


================================================
FILE: unittests/ASM/VEX/vcomiss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vcomiss xmm0, [rdx + 16 * 1]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

vcomiss xmm0, [rdx + 16 * 2]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt

align 16
.data:
dq 0x515253543F800000
dq 0x5152535440000000

dq 0x5152535440800000
dq 0x5152535440800000

dq 0x515253547FC00000
dq 0x5152535440800000


================================================
FILE: unittests/ASM/VEX/vcvtdq2pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4008000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x3FF0000000000000", "0x4000000000000000", "0x4008000000000000", "0x4010000000000000"],
    "XMM3":  ["0x4008000000000000", "0x4010000000000000", "0x3FF0000000000000", "0x4000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm2, [rdx + 32]

vcvtdq2pd xmm0, xmm2
vcvtdq2pd xmm1, [rdx + 40]

vcvtdq2pd ymm2, xmm2
vcvtdq2pd ymm3, [rdx + 40]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x0000000200000001
dq 0x0000000400000003
dq 0x0000000200000001
dq 0x0000000400000003


================================================
FILE: unittests/ASM/VEX/vcvtdq2ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xBF8000003F800000", "0x437F000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xBF8000003F800000", "0x437F000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xBF8000003F800000", "0x437F000000000000", "0xBF8000003F800000", "0x437F000000000000"],
    "XMM4": ["0xBF8000003F800000", "0x437F000000000000", "0xBF8000003F800000", "0x437F000000000000"],
    "XMM6": ["0x4E8000004E7E0000", "0x4E8100004E808000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4E8000004E7E0000", "0x4E8100004E808000", "0x4E8000004E7E0000", "0x4E8100004E808000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm1, [rdx + 32]
vmovapd ymm5, [rdx]

vcvtdq2ps xmm2, [rdx + 32]
vcvtdq2ps ymm3, [rdx + 32]
vcvtdq2ps xmm6, [rdx]
vcvtdq2ps ymm7, [rdx]

vcvtdq2ps xmm0, xmm1
vcvtdq2ps ymm4, ymm1

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0xFFFFFFFF00000001 ; -1, 1
dq 0x000000FF00000000 ; 255, 0
dq 0xFFFFFFFF00000001 ; -1, 1
dq 0x000000FF00000000 ; 255, 0


================================================
FILE: unittests/ASM/VEX/vcvtpd2dq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000004600000053", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000D00000029", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000001600000005", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x000000050000000A", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x000000430000001D", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000005B00000013", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000003300000028", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000001800000021", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x000000180000005B", "0x000000180000005B", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000005B00000063", "0x0000005B00000063", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x000000630000005B", "0x000000630000005B", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000004A00000041", "0x0000004A00000041", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000001900000023", "0x0000001900000023", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x0000005A00000006", "0x0000005A00000006", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000003400000021", "0x0000003400000021", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0000000A0000003A", "0x0000000A0000003A", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Preload registers with garbage
vmovaps ymm0,  [rel .random_data + (0 * 32)]
vmovaps ymm1,  [rel .random_data + (1 * 32)]
vmovaps ymm2,  [rel .random_data + (2 * 32)]
vmovaps ymm3,  [rel .random_data + (3 * 32)]
vmovaps ymm4,  [rel .random_data + (4 * 32)]
vmovaps ymm5,  [rel .random_data + (5 * 32)]
vmovaps ymm6,  [rel .random_data + (6 * 32)]
vmovaps ymm7,  [rel .random_data + (7 * 32)]
vmovaps ymm8,  [rel .random_data + (8 * 32)]
vmovaps ymm9,  [rel .random_data + (9 * 32)]
vmovaps ymm10, [rel .random_data + (10 * 32)]
vmovaps ymm11, [rel .random_data + (11 * 32)]
vmovaps ymm12, [rel .random_data + (12 * 32)]
vmovaps ymm13, [rel .random_data + (13 * 32)]
vmovaps ymm14, [rel .random_data + (14 * 32)]
vmovaps ymm15, [rel .random_data + (15 * 32)]

vcvtpd2dq xmm0,  oword [rdx + 32 * 0]
vcvtpd2dq xmm1,  oword [rdx + 32 * 1]
vcvtpd2dq xmm2,  oword [rdx + 32 * 2]
vcvtpd2dq xmm3,  oword [rdx + 32 * 3]
vcvtpd2dq xmm4,  oword [rdx + 32 * 4]
vcvtpd2dq xmm5,  oword [rdx + 32 * 5]
vcvtpd2dq xmm6,  oword [rdx + 32 * 6]
vcvtpd2dq xmm7,  oword [rdx + 32 * 7]
vcvtpd2dq xmm8,  yword [rdx + 32 * 8]
vcvtpd2dq xmm9,  yword [rdx + 32 * 9]
vcvtpd2dq xmm10, yword [rdx + 32 * 10]
vcvtpd2dq xmm11, yword [rdx + 32 * 11]
vcvtpd2dq xmm12, yword [rdx + 32 * 12]
vcvtpd2dq xmm13, yword [rdx + 32 * 13]
vcvtpd2dq xmm14, yword [rdx + 32 * 14]
vcvtpd2dq xmm15, yword [rdx + 32 * 15]

hlt

align 32
.data:
dq 83.0999 , 69.50512
dq 83.0999 , 69.50512

dq 41.02678, 13.05881
dq 41.02678, 13.05881

dq 5.35242 , 21.9932
dq 5.35242 , 21.9932

dq 9.67383 , 5.32372
dq 9.67383 , 5.32372

dq 29.02872, 66.50151
dq 29.02872, 66.50151

dq 19.30764, 91.3633
dq 19.30764, 91.3633

dq 40.45086, 50.96153
dq 40.45086, 50.96153

dq 32.64489, 23.97574
dq 32.64489, 23.97574

dq 90.64316, 24.22547
dq 90.64316, 24.22547

dq 98.9394 , 91.21715
dq 98.9394 , 91.21715

dq 90.80143, 99.48407
dq 90.80143, 99.48407

dq 64.97245, 74.39838
dq 64.97245, 74.39838

dq 35.22761, 25.35321
dq 35.22761, 25.35321

dq 5.8732  , 90.19956
dq 5.8732  , 90.19956

dq 33.03133, 52.02952
dq 33.03133, 52.02952

dq 58.38554, 10.17531
dq 58.38554, 10.17531

dq 47.84703, 84.04831
dq 47.84703, 84.04831

dq 90.02965, 65.81329
dq 90.02965, 65.81329

dq 96.27991, 6.64479
dq 96.27991, 6.64479

dq 25.58971, 95.00694
dq 25.58971, 95.00694

dq 88.1929 , 37.16964
dq 88.1929 , 37.16964

dq 49.52602, 10.27223
dq 49.52602, 10.27223

dq 77.70605, 20.21439
dq 77.70605, 20.21439

dq 9.8056  , 41.29389
dq 9.8056  , 41.29389

dq 15.4071 , 57.54286
dq 15.4071 , 57.54286

dq 9.61117 , 55.54302
dq 9.61117 , 55.54302

dq 52.90745, 4.88086
dq 52.90745, 4.88086

dq 72.52882, 3.0201
dq 72.52882, 3.0201

dq 56.55091, 71.22749
dq 56.55091, 71.22749

dq 61.84736, 88.74295
dq 61.84736, 88.74295

dq 47.72641, 24.17404
dq 47.72641, 24.17404

dq 33.70564, 96.71303
dq 33.70564, 96.71303

; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86


================================================
FILE: unittests/ASM/VEX/vcvtpd2dq_inexact.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000200000001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFEFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000200000001", "0x0000000200000001", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFFFFFFFEFFFFFFFF", "0xFFFFFFFEFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x8000000080000000", "0x8000000080000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 2]
vmovapd ymm1, [rdx + 32 * 2]
vmovapd ymm2, [rdx]

vcvtpd2dq xmm0, xmm2
vcvtpd2dq xmm1, oword [rdx + 32 * 1]

vcvtpd2dq xmm3, ymm2
vcvtpd2dq xmm4, yword [rdx + 32 * 1]

vcvtpd2dq xmm5, yword [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0xBFF0000000000000
dq 0xC000000000000000
dq 0xBFF0000000000000
dq 0xC000000000000000

dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x7ff0000000000000
dq 0xfff0000000000000
dq 0x7ff8000000000000
dq 0x7fefffffffffffff


================================================
FILE: unittests/ASM/VEX/vcvtpd2ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4054C664C2F837B5", "0x40516053E2D6238E", "0x4044836D86EC17EC", "0x402A1E1C58255B03"],
    "XMM1":  ["0x401568E0C9D9D346", "0x4035FE425AEE6320", "0x402359003EEA209B", "0x40154B7D41743E96"],
    "XMM2":  ["0x403D075A31A4BDBA", "0x4050A018BD66277C", "0x40334EC17EBAF102", "0x4056D7404EA4A8C1"],
    "XMM7":  ["0x428B029F42A63326", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x41AFF21340AB4706", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x428500C641E83AD2", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4214ADB642B062C4", "0x41245B0E42461AA5", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x41A1B712429B697F", "0x42252CF2411CE3BD", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x42662BE34176837B", "0x425E2C0D4119C75A", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x409C30014253A13B", "0x4041495242910EC1", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0xff8000007f800000", "0x7f8000007fc00000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]

vcvtpd2ps xmm7,  xmm0
vcvtpd2ps xmm8,  xmm1
vcvtpd2ps xmm9,  xmm2
vcvtpd2ps xmm10, yword [rdx + 32 * 10]
vcvtpd2ps xmm11, yword [rdx + 32 * 11]
vcvtpd2ps xmm12, yword [rdx + 32 * 12]
vcvtpd2ps xmm13, yword [rdx + 32 * 13]
vcvtpd2ps xmm14, yword [rdx + 32 * 14]

hlt

align 32
.data:
dq 83.0999, 69.50512
dq 41.02678, 13.05881

dq 5.35242, 21.9932
dq 9.67383, 5.32372

dq 29.02872, 66.50151
dq 19.30764, 91.3633

dq 40.45086, 50.96153
dq 32.64489, 23.97574

dq 90.64316, 24.22547
dq 98.9394, 91.21715

dq 90.80143, 99.48407
dq 64.97245, 74.39838

dq 35.22761, 25.35321
dq 5.8732, 90.19956

dq 33.03133, 52.02952
dq 58.38554, 10.17531

dq 47.84703, 84.04831
dq 90.02965, 65.81329

dq 96.27991, 6.64479
dq 25.58971, 95.00694

dq 88.1929, 37.16964
dq 49.52602, 10.27223

dq 77.70605, 20.21439
dq 9.8056, 41.29389

dq 15.4071, 57.54286
dq 9.61117, 55.54302

dq 52.90745, 4.88086
dq 72.52882, 3.0201

dq 0x7ff0000000000000, 0xfff0000000000000
dq 0x7ff8000000000000, 0x7fefffffffffffff


================================================
FILE: unittests/ASM/VEX/vcvtph2ps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xFC007C00BC003C00", "0x42487E0003FF0001", "0", "0"],
    "XMM1": ["0x800000007BFFFBFF", "0x42A53DC534D136F3", "0", "0"],
    "XMM2": ["0xBF8000003F800000", "0xFF8000007F800000", "0", "0"],
    "XMM3": ["0x477FE000C77FE000", "0x8000000000000000", "0", "0"],
    "XMM4": ["0xBF8000003F800000", "0xFF8000007F800000", "0", "0"],
    "XMM5": ["0x387FC00033800000", "0x404900007FC00000", "0", "0"],
    "XMM6": ["0xFC007C00BC003C00", "0x42487E0003FF0001", "0x800000007BFFFBFF", "0x42A53DC534D136F3"],
    "XMM7": ["0xBF8000003F800000", "0xFF8000007F800000", "0x387FC00033800000", "0x404900007FC00000"],
    "XMM8": ["0xBF8000003F800000", "0xFF8000007F800000", "0x387FC00033800000", "0x404900007FC00000"],
    "XMM9": ["0x477FE000C77FE000", "0x8000000000000000", "0x3E9A20003EDE6000", "0x4054A0003FB8A000"]
  }
}
%endif

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register
vcvtph2ps xmm2, xmm0
vcvtph2ps xmm3, xmm1

; 128-bit memory
vcvtph2ps xmm4, [rdx]
vcvtph2ps xmm5, [rdx + 8]

; 256-bit

vmovapd ymm6, [rdx]

; 256-bit register
vcvtph2ps ymm7, xmm6

; 256-bit memory
vcvtph2ps ymm8, [rdx]
vcvtph2ps ymm9, [rdx + 16]

hlt

align 32
.data:
dw 0x3C00 ; 1.0
dw 0xBC00 ; -1.0
dw 0x7C00 ; +inf
dw 0xFC00 ; -inf

dw 0x0001 ; min positive subnormal
dw 0x03FF ; max subnormal
dw 0x7E00 ; NaN
dw 0x4248 ; pi

dw 0xFBFF ; min finite value
dw 0x7BFF ; max finite value
dw 0x0000 ; +0.0
dw 0x8000 ; -0.0

dw 0x36F3 ; log_10(e)
dw 0x34D1 ; log_10(2)
dw 0x3DC5 ; log_2(e)
dw 0x42A5 ; log_2(10)


================================================
FILE: unittests/ASM/VEX/vcvtps2dq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000004600000053", "0x0000000D00000029", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000001600000005", "0x000000050000000A", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x000000430000001D", "0x0000005B00000013", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000003300000028", "0x0000001800000021", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x000000180000005B", "0x0000005B00000063", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x000000630000005B", "0x0000004A00000041", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000001900000023", "0x0000005A00000006", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000003400000021", "0x0000000A0000003A", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000005400000030", "0x000000420000005A", "0x0000005400000030", "0x000000420000005A"],
    "XMM9":  ["0x0000000700000060", "0x0000005F0000001A", "0x0000000700000060", "0x0000005F0000001A"],
    "XMM10": ["0x0000002500000058", "0x0000000A00000032", "0x0000002500000058", "0x0000000A00000032"],
    "XMM11": ["0x000000140000004E", "0x000000290000000A", "0x000000140000004E", "0x000000290000000A"],
    "XMM12": ["0x0000003A0000000F", "0x000000380000000A", "0x0000003A0000000F", "0x000000380000000A"],
    "XMM13": ["0x0000000500000035", "0x0000000300000049", "0x0000000500000035", "0x0000000300000049"],
    "XMM14": ["0x0000004700000039", "0x000000590000003E", "0x0000004700000039", "0x000000590000003E"],
    "XMM15": ["0x0000001800000030", "0x0000006100000022", "0x0000001800000030", "0x0000006100000022"]
  }
}
%endif

lea rdx, [rel .data]

vcvtps2dq xmm0,  [rdx + 32 * 0]
vcvtps2dq xmm1,  [rdx + 32 * 1]
vcvtps2dq xmm2,  [rdx + 32 * 2]
vcvtps2dq xmm3,  [rdx + 32 * 3]
vcvtps2dq xmm4,  [rdx + 32 * 4]
vcvtps2dq xmm5,  [rdx + 32 * 5]
vcvtps2dq xmm6,  [rdx + 32 * 6]
vcvtps2dq xmm7,  [rdx + 32 * 7]

vcvtps2dq ymm8,  [rdx + 32 * 8]
vcvtps2dq ymm9,  [rdx + 32 * 9]
vcvtps2dq ymm10, [rdx + 32 * 10]
vcvtps2dq ymm11, [rdx + 32 * 11]
vcvtps2dq ymm12, [rdx + 32 * 12]
vcvtps2dq ymm13, [rdx + 32 * 13]
vcvtps2dq ymm14, [rdx + 32 * 14]
vcvtps2dq ymm15, [rdx + 32 * 15]

hlt

align 32
.data:
dd 83.0999 , 69.50512, 41.02678, 13.05881
dd 83.0999 , 69.50512, 41.02678, 13.05881

dd 5.35242 , 21.9932 , 9.67383 , 5.32372
dd 5.35242 , 21.9932 , 9.67383 , 5.32372

dd 29.02872, 66.50151, 19.30764, 91.3633
dd 29.02872, 66.50151, 19.30764, 91.3633

dd 40.45086, 50.96153, 32.64489, 23.97574
dd 40.45086, 50.96153, 32.64489, 23.97574

dd 90.64316, 24.22547, 98.9394 , 91.21715
dd 90.64316, 24.22547, 98.9394 , 91.21715

dd 90.80143, 99.48407, 64.97245, 74.39838
dd 90.80143, 99.48407, 64.97245, 74.39838

dd 35.22761, 25.35321, 5.8732  , 90.19956
dd 35.22761, 25.35321, 5.8732  , 90.19956

dd 33.03133, 52.02952, 58.38554, 10.17531
dd 33.03133, 52.02952, 58.38554, 10.17531

dd 47.84703, 84.04831, 90.02965, 65.81329
dd 47.84703, 84.04831, 90.02965, 65.81329

dd 96.27991, 6.64479 , 25.58971, 95.00694
dd 96.27991, 6.64479 , 25.58971, 95.00694

dd 88.1929 , 37.16964, 49.52602, 10.27223
dd 88.1929 , 37.16964, 49.52602, 10.27223

dd 77.70605, 20.21439, 9.8056  , 41.29389
dd 77.70605, 20.21439, 9.8056  , 41.29389

dd 15.4071 , 57.54286, 9.61117 , 55.54302
dd 15.4071 , 57.54286, 9.61117 , 55.54302

dd 52.90745, 4.88086 , 72.52882, 3.0201
dd 52.90745, 4.88086 , 72.52882, 3.0201

dd 56.55091, 71.22749, 61.84736, 88.74295
dd 56.55091, 71.22749, 61.84736, 88.74295

dd 47.72641, 24.17404, 33.70564, 96.71303
dd 47.72641, 24.17404, 33.70564, 96.71303


================================================
FILE: unittests/ASM/VEX/vcvtps2dq_inexact.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000100000001", "0x0000000200000002", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000400000004", "0x0000000800000008", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000100000001", "0x0000000200000002", "0x0000000100000001", "0x0000000200000002"],
    "XMM4":  ["0x0000000400000004", "0x0000000800000008", "0x0000000400000004", "0x0000000800000008"],
    "XMM5":  ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Set up MXCSR to truncate
vldmxcsr [rel .mxcsr]

vmovapd ymm0, [rdx + 32 * 2]
vmovapd ymm1, [rdx + 32 * 2]
vmovapd ymm2, [rdx]

vcvtps2dq xmm0, xmm2
vcvtps2dq xmm1, [rdx + 32 * 1]

vcvtps2dq ymm3, ymm2
vcvtps2dq ymm4, [rdx + 32 * 1]
vcvtps2dq ymm5, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3FC000003F800000 ; [1.5, 1.0]
dq 0x4039999A40000000 ; [2.9, 2.0]
dq 0x3FC000003F800000 ; [1.5, 1.0]
dq 0x4039999A40000000 ; [2.9, 2.0]

dq 0x4083333340800000 ; [4.1, 4.0]
dq 0x4108000041000000 ; [8.5, 8.0]
dq 0x4083333340800000 ; [4.1, 4.0]
dq 0x4108000041000000 ; [8.5, 8.0]

dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x7fc000007f800000
dq 0xff800000ff7fffee
dq 0x7bc097cefbc097ce
dq 0x0000000080000000

.mxcsr:
dq 0x0000000000007F80


================================================
FILE: unittests/ASM/VEX/vcvtps2pd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x428B029F42A63326", "0x4150F0E342241B6C", "0x409C30014253A13B", "0x4041495242910EC1"],
    "XMM1":  ["0x41AFF21340AB4706", "0x40AA5BEA411AC802", "0x42662BE34176837B", "0x425E2C0D4119C75A"],
    "XMM2":  ["0x428500C641E83AD2", "0x42B6BA02419A760C", "0x41A1B712429B697F", "0x42252CF2411CE3BD"],
    "XMM7":  ["0x4054C664C0000000", "0x40516053E0000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x401568E0C0000000", "0x4035FE4260000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x403D075A40000000", "0x4050A018C0000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x40560C5880000000", "0x404295B6C0000000", "0x4048C354A0000000", "0x40248B61C0000000"],
    "XMM11": ["0x40536D2FE0000000", "0x403436E240000000", "0x40239C77A0000000", "0x4044A59E40000000"],
    "XMM12": ["0x402ED06F60000000", "0x404CC57C60000000", "0x402338EB40000000", "0x404BC581A0000000"],
    "XMM13": ["0x404A742760000000", "0x4013860020000000", "0x405221D820000000", "0x4008292A40000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]

vcvtps2pd xmm7,  xmm0
vcvtps2pd xmm8,  xmm1
vcvtps2pd xmm9,  xmm2
vcvtps2pd ymm10, [rdx + 32 * 10]
vcvtps2pd ymm11, [rdx + 32 * 11]
vcvtps2pd ymm12, [rdx + 32 * 12]
vcvtps2pd ymm13, [rdx + 32 * 13]

hlt

align 32
.data:
dd 83.0999, 69.50512, 41.02678, 13.05881
dd 52.90745, 4.88086, 72.52882, 3.0201

dd 5.35242, 21.9932, 9.67383, 5.32372
dd 15.4071, 57.54286, 9.61117, 55.54302

dd 29.02872, 66.50151, 19.30764, 91.3633
dd 77.70605, 20.21439, 9.8056, 41.29389

dd 40.45086, 50.96153, 32.64489, 23.97574
dd 88.1929, 37.16964, 49.52602, 10.27223

dd 90.64316, 24.22547, 98.9394, 91.21715
dd 96.27991, 6.64479, 25.58971, 95.00694

dd 90.80143, 99.48407, 64.97245, 74.39838
dd 47.84703, 84.04831, 90.02965, 65.81329

dd 35.22761, 25.35321, 5.8732, 90.19956
dd 33.03133, 52.02952, 58.38554, 10.17531

dd 33.03133, 52.02952, 58.38554, 10.17531
dd 83.0999, 69.50512, 41.02678, 13.05881

dd 47.84703, 84.04831, 90.02965, 65.81329
dd 5.35242, 21.9932, 9.67383, 5.32372

dd 96.27991, 6.64479, 25.58971, 95.00694
dd 29.02872, 66.50151, 19.30764, 91.3633

dd 88.1929, 37.16964, 49.52602, 10.27223
dd 40.45086, 50.96153, 32.64489, 23.97574

dd 77.70605, 20.21439, 9.8056, 41.29389
dd 90.64316, 24.22547, 98.9394, 91.21715

dd 15.4071, 57.54286, 9.61117, 55.54302
dd 90.80143, 99.48407, 64.97245, 74.39838

dd 52.90745, 4.88086, 72.52882, 3.0201
dd 35.22761, 25.35321, 5.8732, 90.19956


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_rd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007BFF00003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"]
  }
}
%endif

; Round Down

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 1
vcvtps2ph xmm3, xmm1, 1

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 1
vcvtps2ph [rel .memarea + 8], xmm1, 1
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 1

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 1
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_rd_mxcsr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007BFF00003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"]
  }
}
%endif

; Set up MXCSR to Round Down
vldmxcsr [rel .mxcsr]

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 100b
vcvtps2ph xmm3, xmm1, 100b

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 100b
vcvtps2ph [rel .memarea + 8], xmm1, 100b
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 100b

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 100b
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0

.mxcsr:
dq 0x0000000000003F80


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_rtne.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007C0000003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"],
    "XMM8": ["0x4800440040003c00", "0xc800c400c000bc00", "0x4142434445464748", "0x4142434445464748"]
  }
}
%endif

; Round to Nearest Even

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 0
vcvtps2ph xmm3, xmm1, 0

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 0
vcvtps2ph [rel .memarea + 8], xmm1, 0
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 0

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 0
vmovapd xmm7, [rel .memarea + 16]

; GCC test failure
vmovaps ymm8, [rel .data_in]
vcvtps2ph [rel .data_bad], ymm8, 0
vmovaps ymm8, [rel .data_bad]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0

align 32
.data_in:
dd 1.0, 2.0, 4.0, 8.0, -1.0, -2.0, -4.0, -8.0

.data_bad:
dq 0x4142434445464748
dq 0x4142434445464748
dq 0x4142434445464748
dq 0x4142434445464748


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_rtne_mxcsr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007C0000003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007C0000003C00", "0x0000FC007C004300", "0", "0"]
  }
}
%endif

; Set up MXCSR to Round to Nearest Even
vldmxcsr [rel .mxcsr]

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 100b
vcvtps2ph xmm3, xmm1, 100b

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 100b
vcvtps2ph [rel .memarea + 8], xmm1, 100b
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 100b

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 100b
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0

.mxcsr:
dq 0x0000000000001F80


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_ru.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007C0000013C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0001FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"],
    "XMM7": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"]
  }
}
%endif

; Round Up

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 2
vcvtps2ph xmm3, xmm1, 2

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 2
vcvtps2ph [rel .memarea + 8], xmm1, 2
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 2

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 2
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_ru_mxcsr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007C0000013C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0001FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"],
    "XMM7": ["0x7E007C0000013C00", "0x0001FC007C004300", "0", "0"]
  }
}
%endif

; Set up MXCSR to Round Up
vldmxcsr [rel .mxcsr]

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 100b
vcvtps2ph xmm3, xmm1, 100b

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 100b
vcvtps2ph [rel .memarea + 8], xmm1, 100b
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 100b

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 100b
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0

.mxcsr:
dq 0x0000000000005F80


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_trunc.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007BFF00003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"]
  }
}
%endif

; Truncate

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 3
vcvtps2ph xmm3, xmm1, 3

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 3
vcvtps2ph [rel .memarea + 8], xmm1, 3
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 3

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 3
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0


================================================
FILE: unittests/ASM/VEX/vcvtps2ph_trunc_mxcsr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0", "0"],
    "XMM1": ["0x7F80000040600000", "0x00000001FF800000", "0", "0"],
    "XMM2": ["0x7E007BFF00003C00", "0x0000000000000000", "0", "0"],
    "XMM3": ["0x0000FC007C004300", "0x0000000000000000", "0", "0"],
    "XMM4": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM5": ["0x008000003F800000", "0x7FC000007F7FFFFF", "0x7F80000040600000", "0x00000001FF800000"],
    "XMM6": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"],
    "XMM7": ["0x7E007BFF00003C00", "0x0000FC007C004300", "0", "0"]
  }
}
%endif

; Set up MXCSR to truncate
vldmxcsr [rel .mxcsr]

lea rdx, [rel .data]

; 128-bit

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; 128-bit register

vcvtps2ph xmm2, xmm0, 100b
vcvtps2ph xmm3, xmm1, 100b

; 128-bit memory
vcvtps2ph [rel .memarea + 0], xmm0, 100b
vcvtps2ph [rel .memarea + 8], xmm1, 100b
vmovapd xmm4, [rel .memarea]

; 256-bit

vmovapd ymm5, [rdx]

; 256-bit register

vcvtps2ph xmm6, ymm5, 100b

; 256-bit memory

vcvtps2ph [rel .memarea + 16], ymm5, 100b
vmovapd xmm7, [rel .memarea + 16]

hlt

align 4096
.data:
dd 0x3F800000, 0x00800000, 0x7F7FFFFF, 0x7FC00000 ; 1.0, FLT_MIN, FLT_MAX, QNaN
dd 0x40600000, 0x7F800000, 0xFF800000, 0x00000001 ; 3.5, +inf, -inf, FLT_TRUE_MIN

; A quaint little area for testing the store variant of VCVTPS2PH
.memarea: times 16 dq 0

.mxcsr:
dq 0x0000000000007F80


================================================
FILE: unittests/ASM/VEX/vcvtsd2si.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x0000000000000001",
    "RBX": "0x0000000000000002",
    "RCX": "0x0000000000000003",
    "RDX": "0x0000000000000004",
    "RSI": "0x00000000ffffffff",
    "RDI": "0xfffffffffffffffe",
    "RSP": "0x00000000fffffffd",
    "RBP": "0xfffffffffffffffc"
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0, [rdx + 8 * 0]
vmovapd xmm1, [rdx + 8 * 2]

vcvtsd2si eax, xmm0
vcvtsd2si rbx, xmm1

vcvtsd2si ecx, [rdx + 8 * 4]
vcvtsd2si rdx, [rdx + 8 * 6]

vcvtsd2si esi, [rel .data + 8 * 8]
vcvtsd2si rdi, [rel .data + 8 * 10]
vcvtsd2si esp, [rel .data + 8 * 12]
vcvtsd2si rbp, [rel .data + 8 * 14]

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x5152535455565758

dq 0x4000000000000000
dq 0x5152535455565758

dq 0x4008000000000000
dq 0x5152535455565758

dq 0x4010000000000000
dq 0x5152535455565758

dq 0xBFF0000000000000
dq 0x5152535455565758

dq 0xC000000000000000
dq 0x5152535455565758

dq 0xC008000000000000
dq 0x5152535455565758

dq 0xC010000000000000
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vcvtsd2ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4054C664C2F837B5", "0x40516053E2D6238E", "0x4044836D86EC17EC", "0x402A1E1C58255B03"],
    "XMM1":  ["0x401568E0C9D9D346", "0x4035FE425AEE6320", "0x402359003EEA209B", "0x40154B7D41743E96"],
    "XMM2":  ["0x403D075A31A4BDBA", "0x4050A018BD66277C", "0x40334EC17EBAF102", "0x4056D7404EA4A8C1"],
    "XMM3":  ["0x404439B5C7CD898B", "0x40497B136A400FBB", "0x4040528BC169C23B", "0x4037F9CA18BD6627"],
    "XMM4":  ["0x4056A929888F861A", "0x403839B866E43AA8", "0x4058BC1F212D7732", "0x4056CDE5C91D14E4"],
    "XMM5":  ["0x4056B34AA10E0221", "0x4058DEFB00BCBE62", "0x40503E3C9EECBFB1", "0x4052997F0ED3D85A"],
    "XMM6":  ["0x40419D2253111F0C", "0x40395A6BF8769EC3", "0x40177E28240B7803", "0x40568CC5974E65BF"],
    "XMM7":  ["0x4054C66442042015", "0x40516053E2D6238E", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x401568E0423F635C", "0x4035FE425AEE6320", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x403D075A42C08F50", "0x4050A018BD66277C", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x404439B542B062C4", "0x40497B136A400FBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x4056A929429B697F", "0x403839B866E43AA8", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x4056B34A4176837B", "0x4058DEFB00BCBE62", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x40419D224253A13B", "0x40395A6BF8769EC3", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 5]
vmovapd ymm6, [rdx + 32 * 6]

vcvtsd2ss xmm7,  xmm0, [rdx + 32 * 7]
vcvtsd2ss xmm8,  xmm1, [rdx + 32 * 8]
vcvtsd2ss xmm9,  xmm2, [rdx + 32 * 9]
vcvtsd2ss xmm10, xmm3, [rdx + 32 * 10]
vcvtsd2ss xmm11, xmm4, [rdx + 32 * 11]
vcvtsd2ss xmm12, xmm5, [rdx + 32 * 12]
vcvtsd2ss xmm13, xmm6, [rdx + 32 * 13]

hlt

align 32
.data:
dq 83.0999, 69.50512
dq 41.02678, 13.05881

dq 5.35242, 21.9932
dq 9.67383, 5.32372

dq 29.02872, 66.50151
dq 19.30764, 91.3633

dq 40.45086, 50.96153
dq 32.64489, 23.97574

dq 90.64316, 24.22547
dq 98.9394, 91.21715

dq 90.80143, 99.48407
dq 64.97245, 74.39838

dq 35.22761, 25.35321
dq 5.8732, 90.19956

dq 33.03133, 52.02952
dq 58.38554, 10.17531

dq 47.84703, 84.04831
dq 90.02965, 65.81329

dq 96.27991, 6.64479
dq 25.58971, 95.00694

dq 88.1929, 37.16964
dq 49.52602, 10.27223

dq 77.70605, 20.21439
dq 9.8056, 41.29389

dq 15.4071, 57.54286
dq 9.61117, 55.54302

dq 52.90745, 4.88086
dq 72.52882, 3.0201


================================================
FILE: unittests/ASM/VEX/vcvtsi2sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x3FF0000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x4000000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4008000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4010000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

mov rax, [rdx + 32]
mov rbx, [rdx + 40]

vcvtsi2sd xmm1, xmm0, rax
vcvtsi2sd xmm2, xmm0, ebx

vcvtsi2sd xmm3, xmm0, dword [rdx + 48]
vcvtsi2sd xmm4, xmm0, qword [rdx + 56]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x0000000000000001
dq 0x0000000000000002
dq 0x0000000000000003
dq 0x0000000000000004


================================================
FILE: unittests/ASM/VEX/vcvtsi2ss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM6":  ["0x414243443F800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434440000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4142434440400000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4142434440800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x41424344C0800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x41424344C0800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0, [rdx]

mov rax, [rdx + 32]
mov rbx, [rdx + 40]

vcvtsi2ss xmm6, xmm0, rax
vcvtsi2ss xmm7, xmm0, ebx

vcvtsi2ss xmm8, xmm0, dword [rdx + 48]
vcvtsi2ss xmm9, xmm0, qword [rdx + 56]

mov rbx, [rdx + 64]

vcvtsi2ss xmm10, xmm0, ebx
vcvtsi2ss xmm11, xmm0, dword [rdx + 64]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x0000000000000001
dq 0x0000000000000002
dq 0x0000000000000003
dq 0x0000000000000004
dq 0x7FC00000FFFFFFFC ; Stick something in the top 32bits to ensure correctness


================================================
FILE: unittests/ASM/VEX/vcvtss2sd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x428B029F42A63326", "0x4150F0E342241B6C", "0x409C30014253A13B", "0x4041495242910EC1"],
    "XMM1":  ["0x41AFF21340AB4706", "0x40AA5BEA411AC802", "0x42662BE34176837B", "0x425E2C0D4119C75A"],
    "XMM2":  ["0x428500C641E83AD2", "0x42B6BA02419A760C", "0x41A1B712429B697F", "0x42252CF2411CE3BD"],
    "XMM3":  ["0x424BD89B4221CDAE", "0x41BFCE514202945E", "0x4214ADB642B062C4", "0x41245B0E42461AA5"],
    "XMM4":  ["0x41C1CDC342B5494C", "0x42B66F2E42C5E0F9", "0x40D4A21F42C08F50", "0x42BE038E41CCB7BA"],
    "XMM5":  ["0x42C6F7D842B59A55", "0x4294CBF84281F1E5", "0x42A818BC423F635C", "0x4283A06842B40F2E"],
    "XMM6":  ["0x41CAD360420CE913", "0x42B4662D40BBF141", "0x42501E3A42042015", "0x4122CE1242698ACB"],
    "XMM7":  ["0x40408402A0000000", "0x4150F0E342241B6C", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4047EC6B80000000", "0x40AA5BEA411AC802", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x405811EA00000000", "0x42B6BA02419A760C", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x40560C5880000000", "0x41BFCE514202945E", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x40536D2FE0000000", "0x42B66F2E42C5E0F9", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x402ED06F60000000", "0x4294CBF84281F1E5", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x404A742760000000", "0x42B4662D40BBF141", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 5]
vmovapd ymm6, [rdx + 32 * 6]


vcvtss2sd xmm7,  xmm0, [rdx + 32 * 7]
vcvtss2sd xmm8,  xmm1, [rdx + 32 * 8]
vcvtss2sd xmm9,  xmm2, [rdx + 32 * 9]
vcvtss2sd xmm10, xmm3, [rdx + 32 * 10]
vcvtss2sd xmm11, xmm4, [rdx + 32 * 11]
vcvtss2sd xmm12, xmm5, [rdx + 32 * 12]
vcvtss2sd xmm13, xmm6, [rdx + 32 * 13]

hlt

align 32
.data:
dd 83.0999, 69.50512, 41.02678, 13.05881
dd 52.90745, 4.88086, 72.52882, 3.0201

dd 5.35242, 21.9932, 9.67383, 5.32372
dd 15.4071, 57.54286, 9.61117, 55.54302

dd 29.02872, 66.50151, 19.30764, 91.3633
dd 77.70605, 20.21439, 9.8056, 41.29389

dd 40.45086, 50.96153, 32.64489, 23.97574
dd 88.1929, 37.16964, 49.52602, 10.27223

dd 90.64316, 24.22547, 98.9394, 91.21715
dd 96.27991, 6.64479, 25.58971, 95.00694

dd 90.80143, 99.48407, 64.97245, 74.39838
dd 47.84703, 84.04831, 90.02965, 65.81329

dd 35.22761, 25.35321, 5.8732, 90.19956
dd 33.03133, 52.02952, 58.38554, 10.17531

dd 33.03133, 52.02952, 58.38554, 10.17531
dd 83.0999, 69.50512, 41.02678, 13.05881

dd 47.84703, 84.04831, 90.02965, 65.81329
dd 5.35242, 21.9932, 9.67383, 5.32372

dd 96.27991, 6.64479, 25.58971, 95.00694
dd 29.02872, 66.50151, 19.30764, 91.3633

dd 88.1929, 37.16964, 49.52602, 10.27223
dd 40.45086, 50.96153, 32.64489, 23.97574

dd 77.70605, 20.21439, 9.8056, 41.29389
dd 90.64316, 24.22547, 98.9394, 91.21715

dd 15.4071, 57.54286, 9.61117, 55.54302
dd 90.80143, 99.48407, 64.97245, 74.39838

dd 52.90745, 4.88086, 72.52882, 3.0201
dd 35.22761, 25.35321, 5.8732, 90.19956


================================================
FILE: unittests/ASM/VEX/vcvtss2si.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x0000000000000001",
    "RBX": "0xFFFFFFFFFFFFFFFF",
    "RCX": "0x00000000FFFFFFFE",
    "RDX": "0xFFFFFFFFFFFFFFFC"
  }
}
%endif

lea r15, [rel .data]

vmovapd xmm0, [r15 + 8 * 0]
vmovapd xmm1, [r15 + 8 * 2]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1

vcvtss2si eax, xmm0
vcvtss2si rbx, xmm1

vcvtss2si ecx, [r15 + 8 * 4]
vcvtss2si rdx, [r15 + 8 * 6]

hlt

align 32
.data:
dq 0x414243443F800000
dq 0x5152535455565758

dq 0x41424344BF800000
dq 0x5152535455565758

dq 0x41424344C0000000
dq 0x5152535455565758

dq 0x41424344C0800000
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vcvttpd2dq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000004500000053", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000D00000029", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000001500000005", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000500000009", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x000000420000001D", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000005B00000013", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000003200000028", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000001700000020", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x000000180000005A", "0x000000180000005A", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000005B00000062", "0x0000005B00000062", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x000000630000005A", "0x000000630000005A", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000004A00000040", "0x0000004A00000040", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000001900000023", "0x0000001900000023", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x0000005A00000005", "0x0000005A00000005", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000003400000021", "0x0000003400000021", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0000000A0000003A", "0x0000000A0000003A", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Preload registers with garbage
vmovaps ymm0,  [rel .random_data + (0 * 32)]
vmovaps ymm1,  [rel .random_data + (1 * 32)]
vmovaps ymm2,  [rel .random_data + (2 * 32)]
vmovaps ymm3,  [rel .random_data + (3 * 32)]
vmovaps ymm4,  [rel .random_data + (4 * 32)]
vmovaps ymm5,  [rel .random_data + (5 * 32)]
vmovaps ymm6,  [rel .random_data + (6 * 32)]
vmovaps ymm7,  [rel .random_data + (7 * 32)]
vmovaps ymm8,  [rel .random_data + (8 * 32)]
vmovaps ymm9,  [rel .random_data + (9 * 32)]
vmovaps ymm10, [rel .random_data + (10 * 32)]
vmovaps ymm11, [rel .random_data + (11 * 32)]
vmovaps ymm12, [rel .random_data + (12 * 32)]
vmovaps ymm13, [rel .random_data + (13 * 32)]
vmovaps ymm14, [rel .random_data + (14 * 32)]
vmovaps ymm15, [rel .random_data + (15 * 32)]

vcvttpd2dq xmm0,  oword [rdx + 32 * 0]
vcvttpd2dq xmm1,  oword [rdx + 32 * 1]
vcvttpd2dq xmm2,  oword [rdx + 32 * 2]
vcvttpd2dq xmm3,  oword [rdx + 32 * 3]
vcvttpd2dq xmm4,  oword [rdx + 32 * 4]
vcvttpd2dq xmm5,  oword [rdx + 32 * 5]
vcvttpd2dq xmm6,  oword [rdx + 32 * 6]
vcvttpd2dq xmm7,  oword [rdx + 32 * 7]
vcvttpd2dq xmm8,  yword [rdx + 32 * 8]
vcvttpd2dq xmm9,  yword [rdx + 32 * 9]
vcvttpd2dq xmm10, yword [rdx + 32 * 10]
vcvttpd2dq xmm11, yword [rdx + 32 * 11]
vcvttpd2dq xmm12, yword [rdx + 32 * 12]
vcvttpd2dq xmm13, yword [rdx + 32 * 13]
vcvttpd2dq xmm14, yword [rdx + 32 * 14]
vcvttpd2dq xmm15, yword [rdx + 32 * 15]

hlt

align 32
.data:
dq 83.0999 , 69.50512
dq 83.0999 , 69.50512

dq 41.02678, 13.05881
dq 41.02678, 13.05881

dq 5.35242 , 21.9932
dq 5.35242 , 21.9932

dq 9.67383 , 5.32372
dq 9.67383 , 5.32372

dq 29.02872, 66.50151
dq 29.02872, 66.50151

dq 19.30764, 91.3633
dq 19.30764, 91.3633

dq 40.45086, 50.96153
dq 40.45086, 50.96153

dq 32.64489, 23.97574
dq 32.64489, 23.97574

dq 90.64316, 24.22547
dq 90.64316, 24.22547

dq 98.9394 , 91.21715
dq 98.9394 , 91.21715

dq 90.80143, 99.48407
dq 90.80143, 99.48407

dq 64.97245, 74.39838
dq 64.97245, 74.39838

dq 35.22761, 25.35321
dq 35.22761, 25.35321

dq 5.8732  , 90.19956
dq 5.8732  , 90.19956

dq 33.03133, 52.02952
dq 33.03133, 52.02952

dq 58.38554, 10.17531
dq 58.38554, 10.17531

dq 47.84703, 84.04831
dq 47.84703, 84.04831

dq 90.02965, 65.81329
dq 90.02965, 65.81329

dq 96.27991, 6.64479
dq 96.27991, 6.64479

dq 25.58971, 95.00694
dq 25.58971, 95.00694

dq 88.1929 , 37.16964
dq 88.1929 , 37.16964

dq 49.52602, 10.27223
dq 49.52602, 10.27223

dq 77.70605, 20.21439
dq 77.70605, 20.21439

dq 9.8056  , 41.29389
dq 9.8056  , 41.29389

dq 15.4071 , 57.54286
dq 15.4071 , 57.54286

dq 9.61117 , 55.54302
dq 9.61117 , 55.54302

dq 52.90745, 4.88086
dq 52.90745, 4.88086

dq 72.52882, 3.0201
dq 72.52882, 3.0201

dq 56.55091, 71.22749
dq 56.55091, 71.22749

dq 61.84736, 88.74295
dq 61.84736, 88.74295

dq 47.72641, 24.17404
dq 47.72641, 24.17404

dq 33.70564, 96.71303
dq 33.70564, 96.71303

; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86


================================================
FILE: unittests/ASM/VEX/vcvttps2dq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000004500000053", "0x0000000D00000029", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000001500000005", "0x0000000500000009", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x000000420000001D", "0x0000005B00000013", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000003200000028", "0x0000001700000020", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x000000180000005A", "0x0000005B00000062", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x000000630000005A", "0x0000004A00000040", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000001900000023", "0x0000005A00000005", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000003400000021", "0x0000000A0000003A", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x000000540000002F", "0x000000410000005A", "0x000000540000002F", "0x000000410000005A"],
    "XMM9":  ["0x0000000600000060", "0x0000005F00000019", "0x0000000600000060", "0x0000005F00000019"],
    "XMM10": ["0x0000002500000058", "0x0000000A00000031", "0x0000002500000058", "0x0000000A00000031"],
    "XMM11": ["0x000000140000004D", "0x0000002900000009", "0x000000140000004D", "0x0000002900000009"],
    "XMM12": ["0x000000390000000F", "0x0000003700000009", "0x000000390000000F", "0x0000003700000009"],
    "XMM13": ["0x0000000400000034", "0x0000000300000048", "0x0000000400000034", "0x0000000300000048"],
    "XMM14": ["0x0000004700000038", "0x000000580000003D", "0x0000004700000038", "0x000000580000003D"],
    "XMM15": ["0x000000180000002F", "0x0000006000000021", "0x000000180000002F", "0x0000006000000021"]
  }
}
%endif

lea rdx, [rel .data]

vcvttps2dq xmm0,  [rdx + 32 * 0]
vcvttps2dq xmm1,  [rdx + 32 * 1]
vcvttps2dq xmm2,  [rdx + 32 * 2]
vcvttps2dq xmm3,  [rdx + 32 * 3]
vcvttps2dq xmm4,  [rdx + 32 * 4]
vcvttps2dq xmm5,  [rdx + 32 * 5]
vcvttps2dq xmm6,  [rdx + 32 * 6]
vcvttps2dq xmm7,  [rdx + 32 * 7]
vcvttps2dq ymm8,  [rdx + 32 * 8]
vcvttps2dq ymm9,  [rdx + 32 * 9]
vcvttps2dq ymm10, [rdx + 32 * 10]
vcvttps2dq ymm11, [rdx + 32 * 11]
vcvttps2dq ymm12, [rdx + 32 * 12]
vcvttps2dq ymm13, [rdx + 32 * 13]
vcvttps2dq ymm14, [rdx + 32 * 14]
vcvttps2dq ymm15, [rdx + 32 * 15]

hlt

align 32
.data:
dd 83.0999 , 69.50512, 41.02678, 13.05881
dd 83.0999 , 69.50512, 41.02678, 13.05881

dd 5.35242 , 21.9932 , 9.67383 , 5.32372
dd 5.35242 , 21.9932 , 9.67383 , 5.32372

dd 29.02872, 66.50151, 19.30764, 91.3633
dd 29.02872, 66.50151, 19.30764, 91.3633

dd 40.45086, 50.96153, 32.64489, 23.97574
dd 40.45086, 50.96153, 32.64489, 23.97574

dd 90.64316, 24.22547, 98.9394 , 91.21715
dd 90.64316, 24.22547, 98.9394 , 91.21715

dd 90.80143, 99.48407, 64.97245, 74.39838
dd 90.80143, 99.48407, 64.97245, 74.39838

dd 35.22761, 25.35321, 5.8732  , 90.19956
dd 35.22761, 25.35321, 5.8732  , 90.19956

dd 33.03133, 52.02952, 58.38554, 10.17531
dd 33.03133, 52.02952, 58.38554, 10.17531

dd 47.84703, 84.04831, 90.02965, 65.81329
dd 47.84703, 84.04831, 90.02965, 65.81329

dd 96.27991, 6.64479 , 25.58971, 95.00694
dd 96.27991, 6.64479 , 25.58971, 95.00694

dd 88.1929 , 37.16964, 49.52602, 10.27223
dd 88.1929 , 37.16964, 49.52602, 10.27223

dd 77.70605, 20.21439, 9.8056  , 41.29389
dd 77.70605, 20.21439, 9.8056  , 41.29389

dd 15.4071 , 57.54286, 9.61117 , 55.54302
dd 15.4071 , 57.54286, 9.61117 , 55.54302

dd 52.90745, 4.88086 , 72.52882, 3.0201
dd 52.90745, 4.88086 , 72.52882, 3.0201

dd 56.55091, 71.22749, 61.84736, 88.74295
dd 56.55091, 71.22749, 61.84736, 88.74295

dd 47.72641, 24.17404, 33.70564, 96.71303
dd 47.72641, 24.17404, 33.70564, 96.71303


================================================
FILE: unittests/ASM/VEX/vcvttsd2si.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x0000000000000001",
    "RBX": "0x0000000000000002",
    "RCX": "0x0000000000000003",
    "RDX": "0x0000000000000004"
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0, [rdx + 8 * 0]
vmovapd xmm1, [rdx + 8 * 2]

vcvttsd2si eax, xmm0
vcvttsd2si rbx, xmm1

vcvttsd2si ecx, [rdx + 8 * 4]
vcvttsd2si rdx, [rdx + 8 * 6]

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x5152535455565758

dq 0x4000000000000000
dq 0x5152535455565758

dq 0x4008000000000000
dq 0x5152535455565758

dq 0x4010000000000000
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vcvttss2si.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x0000000000000001",
    "RBX": "0x0000000000000002",
    "RCX": "0x0000000000000003",
    "RDX": "0x0000000000000004",
    "RBP": "0x00000000FFFFFFFE",
    "RSI": "0xFFFFFFFFFFFFFFFC"
  }
}
%endif

lea r15, [rel .data]

vmovapd xmm0, [r15 + 8 * 0]
vmovapd xmm1, [r15 + 8 * 2]

mov rax, -1
mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rbp, -1
mov rsi, -1

vcvttss2si eax, xmm0
vcvttss2si rbx, xmm1

vcvttss2si ebp, [r15 + 8 * 8]
vcvttss2si rsi, [r15 + 8 * 10]

vcvttss2si ecx, [r15 + 8 * 4]
vcvttss2si rdx, [r15 + 8 * 6]

hlt

align 32
.data:
dq 0x414243443F800000
dq 0x5152535455565758

dq 0x4142434440000000
dq 0x5152535455565758

dq 0x4142434440400000
dq 0x5152535455565758

dq 0x4142434440800000
dq 0x5152535455565758

dq 0x41424344C0000000
dq 0x5152535455565758

dq 0x41424344C0800000
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vdivpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM1": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM2": ["0x3FE0000000000000", "0x3FE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3FE0000000000000", "0x3FE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000"],
    "XMM5": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM6": ["0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000", "0x3FE0000000000000"],
    "XMM7": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vdivpd xmm2, xmm0, [rdx + 32]
vdivpd ymm4, ymm0, [rdx + 32]

; Register only
vdivpd xmm3, xmm0, xmm1
vdivpd ymm5, ymm1, ymm0

; Some tests for aliasing destination and source vectors
vmovapd ymm6, ymm0
vdivpd ymm6, ymm6, ymm1

vmovapd ymm7, ymm0
vdivpd ymm7, ymm1, ymm7

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000

dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000


================================================
FILE: unittests/ASM/VEX/vdivps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM2": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7"],
    "XMM5": ["0x4040000040A00000", "0x4000000040155555", "0x4040000040A00000", "0x4000000040155555"],
    "XMM6": ["0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7", "0x3EAAAAAB3E4CCCCD", "0x3F0000003EDB6DB7"],
    "XMM7": ["0x4040000040A00000", "0x4000000040155555", "0x4040000040A00000", "0x4000000040155555"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vdivps xmm2, xmm0, [rdx + 32]
vdivps ymm4, ymm0, [rdx + 32]

; Register only
vdivps xmm3, xmm0, xmm1
vdivps ymm5, ymm1, ymm0

; Some tests for aliasing destination and source vectors
vmovapd ymm6, ymm0
vdivps ymm6, ymm6, ymm1

vmovapd ymm7, ymm0
vdivps ymm7, ymm1, ymm7

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vdivsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FD0000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x3FE2000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4019000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x3FE47AE147AE147B", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vdivsd xmm0, xmm0, xmm1
vdivsd xmm2, xmm2, xmm3

; Memory operand
vdivsd xmm5, xmm4, [rdx + 32 * 1]
vdivsd xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vdivsd xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vdivss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x414243443E800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x414243443F100000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434440C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x414243443F23D70A", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vdivss xmm0, xmm0, xmm1
vdivss xmm2, xmm2, xmm3

; Memory operand
vdivss xmm5, xmm4, [rdx + 32 * 1]
vdivss xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vdivss xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vdppd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM8":  ["0x41278C496C911A6E", "0x41278C496C911A6E", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x41235CCC64AFB361", "0x41235CCC64AFB361", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x412BACE273945DC5", "0x412BACE273945DC5", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x412CF22EF582FD76", "0x412CF22EF582FD76", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x4121C80E40F3BC7B", "0x4121C80E40F3BC7B", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]
vmovaps xmm5, [rdx + 16 * 5]
vmovaps xmm6, [rdx + 16 * 6]
vmovaps xmm7, [rdx + 16 * 7]

vdppd xmm8,  xmm0, [rdx + 16 * 8],  11111111b
vdppd xmm9,  xmm1, [rdx + 16 * 9],  11111111b
vdppd xmm10, xmm2, [rdx + 16 * 10], 11111111b
vdppd xmm11, xmm3, [rdx + 16 * 11], 11111111b
vdppd xmm12, xmm4, [rdx + 16 * 12], 11111111b
vdppd xmm13, xmm5, [rdx + 16 * 13], 00000000b
vdppd xmm14, xmm6, [rdx + 16 * 14], 11110000b
vdppd xmm15, xmm7, [rdx + 16 * 15], 00001111b

hlt

align 32
.data:
dq 470.4127, 683.87
dq 711.3545, 511.5631
dq 996.8793, 548.682
dq 588.9345, 832.5925
dq 210.6613, 792.6059
dq 298.4494, 154.4895
dq 818.4   , 881.6027
dq 705.3087, 687.478
dq 737.0665, 621.31
dq 755.3097, 189.9614
dq 552.4284, 649.1206
dq 798.252 , 574.5732
dq 593.7565, 577.3129
dq 383.3844, 443.3476
dq 414.3571, 615.1567
dq 94.898  , 438.3107


================================================
FILE: unittests/ASM/VEX/vdpps_128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM8":  ["0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x499A5226499A5226", "0x499A5226499A5226", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x495F7816495F7816", "0x495F7816495F7816", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x496E3962496E3962", "0x496E3962496E3962", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]
vmovaps xmm5, [rdx + 16 * 5]
vmovaps xmm6, [rdx + 16 * 6]
vmovaps xmm7, [rdx + 16 * 7]

vdpps xmm8,  xmm0, [rdx + 16 * 8],  11111111b
vdpps xmm9,  xmm1, [rdx + 16 * 9],  11111111b
vdpps xmm10, xmm2, [rdx + 16 * 10], 11111111b
vdpps xmm11, xmm3, [rdx + 16 * 11], 11111111b
vdpps xmm12, xmm4, [rdx + 16 * 12], 11111111b
vdpps xmm13, xmm5, [rdx + 16 * 13], 00000000b
vdpps xmm14, xmm6, [rdx + 16 * 14], 11110000b
vdpps xmm15, xmm7, [rdx + 16 * 15], 00001111b

hlt

align 32
.data:
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 754.227 , 586.0859, 127.7574, 114.8167
dd 764.4266, 226.6145, 337.864 , 320.3296
dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 265.8255, 536.4473, 754.3489, 460.681
dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 306.0592, 702.7584, 887.6473, 551.5908
dd 620.9001, 520.9829, 232.9532, 510.3388
dd 204.8474, 225.626 , 564.973 , 790.5175
dd 836.1953, 844.5266, 633.5626, 501.7409
dd 393.2616, 674.4415, 244.3265, 971.1598
dd 770.8029, 746.1836, 255.9902, 567.7578
dd 187.7175, 924.181 , 466.4362, 169.8267
dd 651.7481, 462.4206, 396.6924, 355.8538
dd 6.148   , 523.1443, 989.7004, 713.6646
dd 497.5427, 657.6965, 651.0534, 778.5236


================================================
FILE: unittests/ASM/VEX/vdpps_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM8":  ["0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E", "0x492FEB2E492FEB2E"],
    "XMM9":  ["0x499A5226499A5226", "0x499A5226499A5226", "0x499A5226499A5226", "0x499A5226499A5226"],
    "XMM10": ["0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4", "0x494ECFA4494ECFA4"],
    "XMM11": ["0x495F7816495F7816", "0x495F7816495F7816", "0x495F7816495F7816", "0x495F7816495F7816"],
    "XMM12": ["0x496E3962496E3962", "0x496E3962496E3962", "0x496E3962496E3962", "0x496E3962496E3962"],
    "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]
vmovaps ymm4, [rdx + 32 * 4]
vmovaps ymm5, [rdx + 32 * 5]
vmovaps ymm6, [rdx + 32 * 6]
vmovaps ymm7, [rdx + 32 * 7]

vdpps ymm8,  ymm0, [rdx + 32 * 8],  11111111b
vdpps ymm9,  ymm1, [rdx + 32 * 9],  11111111b
vdpps ymm10, ymm2, [rdx + 32 * 10], 11111111b
vdpps ymm11, ymm3, [rdx + 32 * 11], 11111111b
vdpps ymm12, ymm4, [rdx + 32 * 12], 11111111b
vdpps ymm13, ymm5, [rdx + 32 * 13], 00000000b
vdpps ymm14, ymm6, [rdx + 32 * 14], 11110000b
vdpps ymm15, ymm7, [rdx + 32 * 15], 00001111b

hlt

align 32
.data:
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 655.9708, 532.2244, 108.0451, 512.4019

dd 754.227 , 586.0859, 127.7574, 114.8167
dd 754.227 , 586.0859, 127.7574, 114.8167

dd 764.4266, 226.6145, 337.864 , 320.3296
dd 764.4266, 226.6145, 337.864 , 320.3296

dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 296.5247, 480.0057, 28.4267 , 565.9418

dd 265.8255, 536.4473, 754.3489, 460.681
dd 265.8255, 536.4473, 754.3489, 460.681

dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 818.7269, 43.7204 , 464.592 , 847.9381

dd 306.0592, 702.7584, 887.6473, 551.5908
dd 306.0592, 702.7584, 887.6473, 551.5908

dd 620.9001, 520.9829, 232.9532, 510.3388
dd 620.9001, 520.9829, 232.9532, 510.3388

dd 204.8474, 225.626 , 564.973 , 790.5175
dd 204.8474, 225.626 , 564.973 , 790.5175

dd 836.1953, 844.5266, 633.5626, 501.7409
dd 836.1953, 844.5266, 633.5626, 501.7409

dd 393.2616, 674.4415, 244.3265, 971.1598
dd 393.2616, 674.4415, 244.3265, 971.1598

dd 770.8029, 746.1836, 255.9902, 567.7578
dd 770.8029, 746.1836, 255.9902, 567.7578

dd 187.7175, 924.181 , 466.4362, 169.8267
dd 187.7175, 924.181 , 466.4362, 169.8267

dd 651.7481, 462.4206, 396.6924, 355.8538
dd 651.7481, 462.4206, 396.6924, 355.8538

dd 6.148   , 523.1443, 989.7004, 713.6646
dd 6.148   , 523.1443, 989.7004, 713.6646

dd 497.5427, 657.6965, 651.0534, 778.5236
dd 497.5427, 657.6965, 651.0534, 778.5236


================================================
FILE: unittests/ASM/VEX/vextractf128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xEEEEEEEEFFFFFFFF", "0x9999999988888888", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x1111111122222222", "0x3333333344444444", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF99998888"],
    "XMM15": ["0x5555555566666666", "0x7777777788888888", "0x4444333322221111", "0x8888777766665555"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Load junk and overwrite register
vmovapd ymm2, [rdx + 32]
vmovapd ymm3, [rdx + 32]
vextractf128 xmm2, ymm0, 0
vextractf128 xmm3, ymm0, 1

; Store into memory
vextractf128 [rel .scratch1], ymm1, 0
vextractf128 [rel .scratch2], ymm1, 1
vmovapd ymm14, [rel .scratch1]
vmovapd ymm15, [rel .scratch2]

hlt

align 4096
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888

.scratch1:
dq 0x8888777766665555
dq 0x4444333322221111
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF99998888

.scratch2:
dq 0xEEEEFFFF99998888
dq 0xAAAABBBBCCCCDDDD
dq 0x4444333322221111
dq 0x8888777766665555


================================================
FILE: unittests/ASM/VEX/vextracti128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xEEEEEEEEFFFFFFFF", "0x9999999988888888", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x1111111122222222", "0x3333333344444444", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF99998888"],
    "XMM15": ["0x5555555566666666", "0x7777777788888888", "0x4444333322221111", "0x8888777766665555"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Load junk and overwrite register
vmovapd ymm2, [rdx + 32]
vmovapd ymm3, [rdx + 32]
vextracti128 xmm2, ymm0, 0
vextracti128 xmm3, ymm0, 1

; Store into memory
vextracti128 [rel .scratch1], ymm1, 0
vextracti128 [rel .scratch2], ymm1, 1
vmovapd ymm14, [rel .scratch1]
vmovapd ymm15, [rel .scratch2]

hlt

align 4096
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888

.scratch1:
dq 0x8888777766665555
dq 0x4444333322221111
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF99998888

.scratch2:
dq 0xEEEEFFFF99998888
dq 0xAAAABBBBCCCCDDDD
dq 0x4444333322221111
dq 0x8888777766665555


================================================
FILE: unittests/ASM/VEX/vextractps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x000000004d2fa47f",
    "RBX": "0x0000000067d29af3",
    "RCX": "0x8a6789f27404b890",
    "RDX": "0x00f658ab78236612"
  }
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]
vmovaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

vextractps eax, xmm1, 0
vextractps ebx, xmm2, 0xFF
vextractps [rsi + 8 * 0 + 0], xmm3, 2
vextractps [rsi + 8 * 0 + 4], xmm4, 0xFF
vextractps [rsi + 8 * 1 + 0], xmm5, 4
vextractps [rsi + 8 * 1 + 4], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vgather_dpd_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xd7e273f1177f80d2", "0", "0"],
    "XMM5":  ["0x341ce2bf6334292d", "0x1ce2bf6334292db6", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x341ce2bf6334292d", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0xd2ec7be65e82db69", "0", "0"],
    "XMM5":  ["0xea2f8a34fff5e934", "0x8a34fff5e9341ce2", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xea2f8a34fff5e934", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x97ba9a8e3087464d", "0", "0"],
    "XMM5":  ["0x9712ffc5b8c6d8a6", "0xb8c6d8a6df6efe3b", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x9712ffc5b8c6d8a6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xb57f2163f1a6aed4", "0x07ed4949d2f4229d", "0", "0"],
    "XMM5":  ["0x8b27e4deab3fd329", "0xbf8b198471089de2", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xb57f2163f1a6aed4", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x8b27e4deab3fd329", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherdpd xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherdpd xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherdpd xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherdpd xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xd7e273f1177f80d2", "0xa9d7e273f1177f80", "0x35a9d7e273f1177f"],
    "XMM5":  ["0x341ce2bf6334292d", "0x1ce2bf6334292db6", "0xe2bf6334292db6b8", "0xbf6334292db6b85f"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x341ce2bf6334292d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x35a9d7e273f1177f"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xbf6334292db6b85f"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm15, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm14, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm13, [xmm0 * 1 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm12, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm11, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm10, [xmm0 * 1 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm9, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm8, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm7, [xmm0 * 1 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm6, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm5, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm4, [xmm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm3, [xmm0 * 1 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0x7be65e82db698176", "0xd2ec7be65e82db69", "0x7f80d2ec7be65e82", "0xf1177f80d2ec7be6"],
    "XMM5":  ["0xea2f8a34fff5e934", "0x8a34fff5e9341ce2", "0xfff5e9341ce2bf63", "0xe9341ce2bf633429"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xea2f8a34fff5e934", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xf1177f80d2ec7be6"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe9341ce2bf633429"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm15, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm14, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm13, [xmm0 * 2 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm12, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm11, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm10, [xmm0 * 2 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm9, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm8, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm7, [xmm0 * 2 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm6, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm5, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm4, [xmm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm3, [xmm0 * 2 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0x3087464d8aa16a60", "0x97ba9a8e3087464d", "0x33b7153e97ba9a8e", "0xdb69817633b7153e"],
    "XMM5":  ["0x9712ffc5b8c6d8a6", "0xb8c6d8a6df6efe3b", "0xdf6efe3b3fd4ea2f", "0x3fd4ea2f8a34fff5"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x9712ffc5b8c6d8a6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xdb69817633b7153e"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x3fd4ea2f8a34fff5"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm15, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm14, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm13, [xmm0 * 4 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm12, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm11, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm10, [xmm0 * 4 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm9, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm8, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm7, [xmm0 * 4 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm6, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm5, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm4, [xmm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm3, [xmm0 * 4 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dpd_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0xb57f2163f1a6aed4", "0x07ed4949d2f4229d", "0x4735be742ef911b1", "0x1fe6464d0b85efdc"],
    "XMM5":  ["0x8b27e4deab3fd329", "0xbf8b198471089de2", "0x225165965d4e120a", "0xa886da539712ffc5"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xb57f2163f1a6aed4", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x8b27e4deab3fd329", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x1fe6464d0b85efdc"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xa886da539712ffc5"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm15, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm14, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherdpd ymm13, [xmm0 * 8 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm12, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm11, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherdpd ymm10, [xmm0 * 8 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm9, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm8, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherdpd ymm7, [xmm0 * 8 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm6, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm5, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm4, [xmm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherdpd ymm3, [xmm0 * 8 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x177f80d27f80d2ec", "0x73f1177ff1177f80", "0", "0"],
    "XMM5":  ["0x34292db66334292d", "0x2db6b85f292db6b8", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda2566334292d", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x5e82db69db698176", "0xd2ec7be67be65e82", "0", "0"],
    "XMM5":  ["0xe9341ce2fff5e934", "0xbf6334291ce2bf63", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256fff5e934", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0", "0"],
    "XMM5":  ["0xdf6efe3bb8c6d8a6", "0x8a34fff53fd4ea2f", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256b8c6d8a6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0xd2f4229df1a6aed4", "0x0b85efdc2ef911b1", "0", "0"],
    "XMM5":  ["0x71089de2ab3fd329", "0x9712ffc55d4e120a", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256f1a6aed4", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256ab3fd329", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherdps xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherdps xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherdps xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherdps xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xf6300511c21448dd"],
    "XMM4":  ["0x177f80d27f80d2ec", "0x73f1177ff1177f80", "0xd7e273f1e273f117", "0x35a9d7e2a9d7e273"],
    "XMM5":  ["0x34292db66334292d", "0x2db6b85f292db6b8", "0xb85f6135b6b85f61", "0x6135a9d75f6135a9"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda2566334292d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x35a9d7e2f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm15, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm14, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm13, [ymm0 * 1 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm12, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm11, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm10, [ymm0 * 1 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm9, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm8, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm7, [ymm0 * 1 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm6, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm5, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm4, [ymm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm3, [ymm0 * 1 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af52633359", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0x59f4e95cc21448dd"],
    "XMM4":  ["0x5e82db69db698176", "0xd2ec7be67be65e82", "0xf1177f807f80d2ec", "0xa9d7e273e273f117"],
    "XMM5":  ["0xe9341ce2fff5e934", "0xbf6334291ce2bf63", "0x2db6b85f34292db6", "0x6135a9d7b85f6135"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256fff5e934", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xa9d7e273f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm15, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm14, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm13, [ymm0 * 2 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm12, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm11, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm10, [ymm0 * 2 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm9, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm8, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm7, [ymm0 * 2 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm6, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm5, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm4, [ymm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm3, [ymm0 * 2 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xf6300511c21448dd"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0x7be65e82db698176", "0xe273f1177f80d2ec"],
    "XMM5":  ["0xdf6efe3bb8c6d8a6", "0x8a34fff53fd4ea2f", "0xbf633429e9341ce2", "0x6135a9d72db6b85f"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256b8c6d8a6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe273f117f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm15, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm14, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm13, [ymm0 * 4 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm12, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm11, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm10, [ymm0 * 4 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm9, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm8, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm7, [ymm0 * 4 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm6, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm5, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm4, [ymm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm3, [ymm0 * 4 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_dps_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xabf96d9bc21448dd"],
    "XMM4":  ["0xd2f4229df1a6aed4", "0x0b85efdc2ef911b1", "0x97ba9a8e8aa16a60", "0x7f80d2ecdb698176"],
    "XMM5":  ["0x71089de2ab3fd329", "0x9712ffc55d4e120a", "0x8a34fff5df6efe3b", "0x6135a9d7bf633429"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda256f1a6aed4", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256ab3fd329", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x7f80d2ecf7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm15, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm14, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vgatherdps ymm13, [ymm0 * 8 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm12, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm11, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vgatherdps ymm10, [ymm0 * 8 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm9, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm8, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vgatherdps ymm7, [ymm0 * 8 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm6, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm5, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm4, [ymm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vgatherdps ymm3, [ymm0 * 8 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x6135a9d7e273f117", "0x5f6135a9d7e273f1", "0", "0"],
    "XMM5":  ["0x6334292db6b85f61", "0x34292db6b85f6135", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x6135a9d7e273f117", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x6334292db6b85f61", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm15, [xmm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xa9d7e273f1177f80", "0", "0"],
    "XMM5":  ["0x1ce2bf6334292db6", "0xbf6334292db6b85f", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x1ce2bf6334292db6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm15, [xmm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0x7f80d2ec7be65e82", "0", "0"],
    "XMM5":  ["0x8a34fff5e9341ce2", "0xe9341ce2bf633429", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x8a34fff5e9341ce2", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm15, [xmm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0", "0"],
    "XMM5":  ["0xb8c6d8a6df6efe3b", "0x3fd4ea2f8a34fff5", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xb8c6d8a6df6efe3b", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vgatherqpd xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vgatherqpd xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vgatherqpd xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vgatherqpd xmm15, [xmm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x464061a9f6300511"],
    "XMM4":  ["0x6135a9d7e273f117", "0x5f6135a9d7e273f1", "0xb85f6135a9d7e273", "0xb6b85f6135a9d7e2"],
    "XMM5":  ["0x6334292db6b85f61", "0x34292db6b85f6135", "0x292db6b85f6135a9", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x6135a9d7e273f117", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x6334292db6b85f61", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xb6b85f6135a9d7e2"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm15, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm14, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm13, [ymm0 * 1 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm12, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm11, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm10, [ymm0 * 1 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm9, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm8, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm7, [ymm0 * 1 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm6, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm5, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm4, [ymm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm3, [ymm0 * 1 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm15, [ymm0 * 1 + rax + 1], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x98f0351d59f4e95c"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xa9d7e273f1177f80", "0x6135a9d7e273f117", "0xb85f6135a9d7e273"],
    "XMM5":  ["0x1ce2bf6334292db6", "0xbf6334292db6b85f", "0x34292db6b85f6135", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x1ce2bf6334292db6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xb85f6135a9d7e273"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm15, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm14, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm13, [ymm0 * 2 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm12, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm11, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm10, [ymm0 * 2 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm9, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm8, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm7, [ymm0 * 2 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm6, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm5, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm4, [ymm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm3, [ymm0 * 2 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm15, [ymm0 * 2 + rax + 2], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x464061a9f6300511"],
    "XMM4":  ["0x7be65e82db698176", "0x7f80d2ec7be65e82", "0xe273f1177f80d2ec", "0x6135a9d7e273f117"],
    "XMM5":  ["0x8a34fff5e9341ce2", "0xe9341ce2bf633429", "0xbf6334292db6b85f", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x8a34fff5e9341ce2", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7e273f117"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm15, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm14, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm13, [ymm0 * 4 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm12, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm11, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm10, [ymm0 * 4 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm9, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm8, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm7, [ymm0 * 4 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm6, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm5, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm4, [ymm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm3, [ymm0 * 4 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm15, [ymm0 * 4 + rax + 4], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x59fc3ca8abf96d9b"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0x7be65e82db698176", "0xe273f1177f80d2ec"],
    "XMM5":  ["0xb8c6d8a6df6efe3b", "0x3fd4ea2f8a34fff5", "0xe9341ce2bf633429", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xb8c6d8a6df6efe3b", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe273f1177f80d2ec"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm15, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm14, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vgatherqpd ymm13, [ymm0 * 8 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm12, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm11, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vgatherqpd ymm10, [ymm0 * 8 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm9, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm8, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vgatherqpd ymm7, [ymm0 * 8 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm6, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm5, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm4, [ymm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm3, [ymm0 * 8 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qpd_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vgatherqpd ymm15, [ymm0 * 8 + rax + 8], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0", "0", "0"],
    "XMM4":  ["0xd7e273f1e273f117", "0", "0", "0"],
    "XMM5":  ["0xb85f6135b6b85f61", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda256e273f117", "0", "0", "0"],
    "XMM8":  ["0xf1cda256b6b85f61", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [xmm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0", "0", "0"],
    "XMM4":  ["0xf1177f807f80d2ec", "0", "0", "0"],
    "XMM5":  ["0x2db6b85f34292db6", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0", "0", "0"],
    "XMM8":  ["0xf1cda25634292db6", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [xmm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0", "0", "0"],
    "XMM5":  ["0xbf633429e9341ce2", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0", "0", "0"],
    "XMM8":  ["0xf1cda256e9341ce2", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [xmm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0", "0", "0"],
    "XMM4":  ["0x97ba9a8e8aa16a60", "0", "0", "0"],
    "XMM5":  ["0x8a34fff5df6efe3b", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0", "0", "0"],
    "XMM8":  ["0xf1cda256df6efe3b", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [xmm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xf6300511c21448dd", "0", "0"],
    "XMM4":  ["0xd7e273f1e273f117", "0x35a9d7e2a9d7e273", "0", "0"],
    "XMM5":  ["0xb85f6135b6b85f61", "0x6135a9d75f6135a9", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256e273f117", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256b6b85f61", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [ymm0 * 1 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [ymm0 * 1 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [ymm0 * 1 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [ymm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [ymm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [ymm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0x59f4e95cc21448dd", "0", "0"],
    "XMM4":  ["0xf1177f807f80d2ec", "0xa9d7e273e273f117", "0", "0"],
    "XMM5":  ["0x2db6b85f34292db6", "0x6135a9d7b85f6135", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda25634292db6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [ymm0 * 2 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [ymm0 * 2 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [ymm0 * 2 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [ymm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [ymm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [ymm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xf6300511c21448dd", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0xe273f1177f80d2ec", "0", "0"],
    "XMM5":  ["0xbf633429e9341ce2", "0x6135a9d72db6b85f", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256e9341ce2", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [ymm0 * 4 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [ymm0 * 4 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [ymm0 * 4 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [ymm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [ymm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [ymm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xabf96d9bc21448dd", "0", "0"],
    "XMM4":  ["0x97ba9a8e8aa16a60", "0x7f80d2ecdb698176", "0", "0"],
    "XMM5":  ["0x8a34fff5df6efe3b", "0x6135a9d7bf633429", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256df6efe3b", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm15, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm14, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vgatherqps xmm13, [ymm0 * 8 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm12, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm11, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vgatherqps xmm10, [ymm0 * 8 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm9, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm8, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vgatherqps xmm7, [ymm0 * 8 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm6, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm5, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm4, [ymm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm3, [ymm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vgather_qps_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vgatherqps xmm15, [ymm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vhaddpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x4000000000000000", "0x3FF0000000000000", "0x4000000000000000"],
    "XMM1": ["0x4010000000000000", "0x4020000000000000", "0x4010000000000000", "0x4020000000000000"],
    "XMM2": ["0x4008000000000000", "0x4028000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4008000000000000", "0x4028000000000000", "0x4008000000000000", "0x4028000000000000"],
    "XMM4": ["0x4008000000000000", "0x4028000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4008000000000000", "0x4028000000000000", "0x4008000000000000", "0x4028000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vhaddpd xmm2, xmm0, xmm1
vhaddpd ymm3, ymm0, ymm1

vhaddpd xmm4, xmm0, [rdx + 32]
vhaddpd ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x4000000000000000 ; 2.0
dq 0x3FF0000000000000 ; 1.0
dq 0x4000000000000000 ; 2.0

dq 0x4010000000000000 ; 4.0
dq 0x4020000000000000 ; 8.0
dq 0x4010000000000000 ; 4.0
dq 0x4020000000000000 ; 8.0


================================================
FILE: unittests/ASM/VEX/vhaddps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3F80000040000000", "0x4080000041000000", "0x3F80000040000000", "0x4080000041000000"],
    "XMM1": ["0x4180000042000000", "0x4280000043000000", "0x4180000042000000", "0x4280000043000000"],
    "XMM2": ["0x4140000040400000", "0x4340000042400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4140000040400000", "0x4340000042400000", "0x4140000040400000", "0x4340000042400000"],
    "XMM4": ["0x4140000040400000", "0x4340000042400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4140000040400000", "0x4340000042400000", "0x4140000040400000", "0x4340000042400000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vhaddps xmm2, xmm0, xmm1
vhaddps ymm3, ymm0, ymm1

vhaddps xmm4, xmm0, [rdx + 32]
vhaddps ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x3F80000040000000 ; 1.0, 2.0
dq 0x4080000041000000 ; 4.0, 8.0
dq 0x3F80000040000000 ; 1.0, 2.0
dq 0x4080000041000000 ; 4.0, 8.0

dq 0x4180000042000000 ; 16.0, 32.0
dq 0x4280000043000000 ; 64.0, 128.0
dq 0x4180000042000000 ; 16.0, 32.0
dq 0x4280000043000000 ; 64.0, 128.0


================================================
FILE: unittests/ASM/VEX/vhsubpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x3FF0000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3FF0000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x3FF0000000000000", "0x4010000000000000", "0xC08F600000000000", "0xC05D000000000000"],
    "XMM5": ["0x3FF0000000000000", "0x4010000000000000", "0xC08F600000000000", "0xC05D000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vhsubpd xmm2, xmm0, xmm1
vhsubpd xmm3, xmm0, [rdx + 32]

vhsubpd ymm4, ymm0, ymm1
vhsubpd ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4000000000000000 ; 2.0
dq 0x3FF0000000000000 ; 1.0
dq 0x4034000000000000 ; 20.0
dq 0x4090000000000000 ; 1024.0

dq 0x4020000000000000 ; 8.0
dq 0x4010000000000000 ; 4.0
dq 0x4028000000000000 ; 12.0
dq 0x4060000000000000 ; 128.0


================================================
FILE: unittests/ASM/VEX/vhsubps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x447B00003F800000", "0xC53FF000C1880000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x447B00003F800000", "0xC53FF000C1880000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x447B00003F800000", "0xC53FF000C1880000", "0xC2540000C2A80000", "0x43BF800000000000"],
    "XMM5": ["0x447B00003F800000", "0xC53FF000C1880000", "0xC2540000C2A80000", "0x43BF800000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vhsubps xmm2, xmm0, xmm1
vhsubps xmm3, xmm0, [rdx + 32]

vhsubps ymm4, ymm0, ymm1
vhsubps ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x3F80000040000000 ; 1.0  , 2.0
dq 0x41A0000044800000 ; 20.0 , 1024.0
dq 0x42F0000042100000 ; 120.0, 36.0
dq 0x429C000041C80000 ; 78.0 , 25.0

dq 0x42A4000042820000 ; 82.0  , 65.0
dq 0x457FF00044800000 ; 4095.0, 1024.0
dq 0xC1A00000C1A00000 ; -20   , -20
dq 0xC2FE000043800000 ; -127.0, 256.0


================================================
FILE: unittests/ASM/VEX/vinsertf128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM2": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM3": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM5": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM6": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM7": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM8": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM9": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Insert into upper lane
vinsertf128 ymm2, ymm0, xmm1, 1
vinsertf128 ymm3, ymm0, [rdx + 32], 1

; Insert into lower lane
vinsertf128 ymm4, ymm0, xmm1, 0
vinsertf128 ymm5, ymm0, [rdx + 32], 0

; Insert into upper lane - With garbage
vinsertf128 ymm6, ymm0, xmm1, 0xFF
vinsertf128 ymm7, ymm0, [rdx + 32], 0xFF

; Insert into lower lane - With garbage
vinsertf128 ymm8, ymm0, xmm1, 0xFE
vinsertf128 ymm9, ymm0, [rdx + 32], 0xFE

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA

dq 0xBBBBBBBBBBBBBBBB
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC
dq 0x9999999999999999


================================================
FILE: unittests/ASM/VEX/vinserti128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM2": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM3": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM5": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM6": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM7": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM8": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM9": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Insert into upper lane
vinserti128 ymm2, ymm0, xmm1, 1
vinserti128 ymm3, ymm0, [rdx + 32], 1

; Insert into lower lane
vinserti128 ymm4, ymm0, xmm1, 0
vinserti128 ymm5, ymm0, [rdx + 32], 0

; Insert into upper lane - With garbage
vinserti128 ymm6, ymm0, xmm1, 0xFF
vinserti128 ymm7, ymm0, [rdx + 32], 0xFF

; Insert into lower lane - With garbage
vinserti128 ymm8, ymm0, xmm1, 0xFE
vinserti128 ymm9, ymm0, [rdx + 32], 0xFE

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA

dq 0xBBBBBBBBBBBBBBBB
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC
dq 0x9999999999999999


================================================
FILE: unittests/ASM/VEX/vinsertps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x4142434465666768", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4142434461626364", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x7576777845464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4142434445464748", "0x5152535471727374", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4142434445464748", "0x7576777855565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434445464748", "0x5152535475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x7576777845464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4142434475767778", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000065666768", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000000061626364", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0, [rdx]
vmovapd xmm1, [rdx + 16]

; Simple move Reg<-Reg
vinsertps xmm2, xmm0, xmm1, ((0b00 << 6) | (0b00 << 4) | (0b0000))
vinsertps xmm3, xmm0, xmm1, ((0b01 << 6) | (0b00 << 4) | (0b0000))
vinsertps xmm4, xmm0, xmm1, ((0b10 << 6) | (0b01 << 4) | (0b0000))
vinsertps xmm5, xmm0, xmm1, ((0b11 << 6) | (0b10 << 4) | (0b0000))

; Simple move Reg<-Mem
vinsertps xmm6, xmm0, [rdx + 8 * 3], ((0b00 << 6) | (0b11 << 4) | (0b0000))
vinsertps xmm7, xmm0, [rdx + 8 * 3], ((0b01 << 6) | (0b10 << 4) | (0b0000))
vinsertps xmm8, xmm0, [rdx + 8 * 3], ((0b10 << 6) | (0b01 << 4) | (0b0000))
vinsertps xmm9, xmm0, [rdx + 8 * 3], ((0b11 << 6) | (0b00 << 4) | (0b0000))

; Simple move Reg<-Reg with mask
vinsertps xmm10, xmm0, xmm1, ((0b00 << 6) | (0b00 << 4) | (0b0010))
vinsertps xmm11, xmm0, xmm1, ((0b01 << 6) | (0b00 << 4) | (0b0010))

; Full ZMask
vinsertps xmm12, xmm0, xmm1, ((0b00 << 6) | (0b00 << 4) | (0b1111))

; Full ZMask, with garbage in the upper bits
vmovapd ymm13, [rel .data_bad]
vinsertps xmm13, xmm0, xmm1, ((0b00 << 6) | (0b00 << 4) | (0b1111))

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778

.data_bad:
dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888


================================================
FILE: unittests/ASM/VEX/vlddqu.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

lea rdx, [rel .data]

vlddqu xmm0, [rdx + 16 * 1]
vlddqu ymm1, [rdx + 32 * 0]

hlt

align 16
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vldmxcsr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0xFFC0"
  }
}
%endif

lea rdx, [rel .data]

; Currently we only implement setting the rounding mode and FTZ bit,
; so load junk into all the bits and check if we set the mode
;
; Result should be the default MXCSR (0x1F80) with the rounding
; mode bits (bits 13 and 14) and FTZ bit (bit 15) all set.
;
; Essentially just a small test to ensure we are indeed setting and saving
; the bits that we do emulate.

vldmxcsr [rdx]
vstmxcsr [rdx]
mov rax, [rdx]

hlt

align 4
.data:
dq 0x000000000000FFFF


================================================
FILE: unittests/ASM/VEX/vmaskmovdqu.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0x5152535455565758",
    "RCX": "0x41424344FFFFFFFF",
    "RSP": "0x51525354FFFFFFFF",
    "RSI": "0xFFFFFFFFFFFFFFFF",
    "RDI": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]

lea rdi, [rdx + 16 * 4]
vmaskmovdqu xmm0, xmm1

lea rdi, [rdx + 16 * 5]
vmaskmovdqu xmm0, xmm2

lea rdi, [rdx + 16 * 6]
vmaskmovdqu xmm0, xmm3

mov rax, qword [rdx + 8 * 8]
mov rbx, qword [rdx + 8 * 9]

mov rcx, qword [rdx + 8 * 10]
mov rsp, qword [rdx + 8 * 11]

mov rsi, qword [rdx + 8 * 12]
mov rdi, qword [rdx + 8 * 13]

hlt

align 4096
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x8080808080808080
dq 0x8080808080808080

dq 0x8080808000000000
dq 0x8080808000000000

dq 0x0000000000000000
dq 0x0000000000000000

dq -1
dq -1

dq -1
dq -1

dq -1
dq -1


================================================
FILE: unittests/ASM/VEX/vmaskmovpd_load.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x8868C3F30AED56E0", "0x10FCE9E284E6E6DE", "0x1DDDDDDD8DDDDDDD", "0x8CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]

vmaskmovpd ymm3, ymm0, [rdx]
vmaskmovpd xmm4, xmm0, [rdx]

vmaskmovpd ymm5, ymm1, [rdx]
vmaskmovpd xmm6, xmm1, [rdx]

vmaskmovpd ymm7, ymm2, [rdx]
vmaskmovpd xmm8, xmm2, [rdx]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [1, 0, 0, 1])
dq 0x8868C3F30AED56E0
dq 0x10FCE9E284E6E6DE
dq 0x1DDDDDDD8DDDDDDD
dq 0x8CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000


================================================
FILE: unittests/ASM/VEX/vmaskmovpd_store.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x8868C3F30AED56E0", "0x10FCE9E284E6E6DE", "0x1DDDDDDD8DDDDDDD", "0x8CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xA76C4F06A12BFCE0", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM5": ["0xA76C4F06A12BFCE0", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM7": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM9": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmovaps ymm3, [rdx]

vmaskmovpd [rel .scratch1], ymm0, ymm3
vmaskmovpd [rel .scratch2], xmm0, xmm3

vmaskmovpd [rel .scratch3], ymm1, ymm3
vmaskmovpd [rel .scratch4], xmm1, xmm3

vmaskmovpd [rel .scratch5], ymm2, ymm3
vmaskmovpd [rel .scratch6], xmm2, xmm3

; Now reload to verify results
vmovaps ymm4, [rel .scratch1]
vmovaps ymm5, [rel .scratch2]
vmovaps ymm6, [rel .scratch3]
vmovaps ymm7, [rel .scratch4]
vmovaps ymm8, [rel .scratch5]
vmovaps ymm9, [rel .scratch6]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xDDDDDDDDDDDDDDDD
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [1, 0, 0, 1])
dq 0x8868C3F30AED56E0
dq 0x10FCE9E284E6E6DE
dq 0x1DDDDDDD8DDDDDDD
dq 0x8CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000

.scratch1:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch2:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch3:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch4:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch5:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch6:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vmaskmovps_load.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x0868C3F30AED56E0", "0x80FCE9E284E6E6DE", "0x8DDDDDDD8DDDDDDD", "0x0CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0x0000000000000000", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
      "XMM4": ["0x0000000000000000", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]

vmaskmovps ymm3, ymm0, [rdx]
vmaskmovps xmm4, xmm0, [rdx]

vmaskmovps ymm5, ymm1, [rdx]
vmaskmovps xmm6, xmm1, [rdx]

vmaskmovps ymm7, ymm2, [rdx]
vmaskmovps xmm8, xmm2, [rdx]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [0, 0, 1, 1, 1, 1, 0, 0])
dq 0x0868C3F30AED56E0
dq 0x80FCE9E284E6E6DE
dq 0x8DDDDDDD8DDDDDDD
dq 0x0CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000


================================================
FILE: unittests/ASM/VEX/vmaskmovps_store.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x0868C3F30AED56E0", "0x80FCE9E284E6E6DE", "0x8DDDDDDD8DDDDDDD", "0x0CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM7": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM9": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmovaps ymm3, [rdx]

vmaskmovps [rel .scratch1], ymm0, ymm3
vmaskmovps [rel .scratch2], xmm0, xmm3

vmaskmovps [rel .scratch3], ymm1, ymm3
vmaskmovps [rel .scratch4], xmm1, xmm3

vmaskmovps [rel .scratch5], ymm2, ymm3
vmaskmovps [rel .scratch6], xmm2, xmm3

; Now reload to verify results
vmovaps ymm4, [rel .scratch1]
vmovaps ymm5, [rel .scratch2]
vmovaps ymm6, [rel .scratch3]
vmovaps ymm7, [rel .scratch4]
vmovaps ymm8, [rel .scratch5]
vmovaps ymm9, [rel .scratch6]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xDDDDDDDDDDDDDDDD
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [0, 0, 1, 1, 1, 1, 0, 0])
dq 0x0868C3F30AED56E0
dq 0x80FCE9E284E6E6DE
dq 0x8DDDDDDD8DDDDDDD
dq 0x0CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000

.scratch1:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch2:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch3:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch4:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch5:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch6:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vmaxpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4000000000000000", "0x4008000000000000", "0x4000000000000000"],
    "XMM1": ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
    "XMM2": ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
    "XMM5": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vmaxpd xmm2, xmm0, [rdx + 32]
vmaxpd ymm4, ymm0, [rdx + 32]

; Register only
vmaxpd xmm3, xmm0, xmm1
vmaxpd ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x4008000000000000
dq 0x4000000000000000
dq 0x4008000000000000
dq 0x4000000000000000

dq 0x3FF0000000000000
dq 0x4008000000000000
dq 0x3FF0000000000000
dq 0x4008000000000000


================================================
FILE: unittests/ASM/VEX/vmaxps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM2": ["0x40C0000040A00000", "0x4100000040E00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x40C0000040A00000", "0x4100000040E00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM5": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vmaxps xmm2, xmm0, [rdx + 32]
vmaxps ymm4, ymm0, [rdx + 32]

; Register only
vmaxps xmm3, xmm0, xmm1
vmaxps ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vmaxsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4010000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4030000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vmaxsd xmm0, xmm0, xmm1
vmaxsd xmm2, xmm2, xmm3

; Memory operand
vmaxsd xmm5, xmm4, [rdx + 32 * 1]
vmaxsd xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vmaxsd xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vmaxss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434440800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4142434441800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vmaxss xmm0, xmm0, xmm1
vmaxss xmm2, xmm2, xmm3

; Memory operand
vmaxss xmm5, xmm4, [rdx + 32 * 1]
vmaxss xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vmaxss xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vminpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4000000000000000", "0x4008000000000000", "0x4000000000000000"],
    "XMM1": ["0x3FF0000000000000", "0x4008000000000000", "0x3FF0000000000000", "0x4008000000000000"],
    "XMM2": ["0x3FF0000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3FF0000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x3FF0000000000000", "0x4000000000000000", "0x3FF0000000000000", "0x4000000000000000"],
    "XMM5": ["0x3FF0000000000000", "0x4000000000000000", "0x3FF0000000000000", "0x4000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vminpd xmm2, xmm0, [rdx + 32]
vminpd ymm4, ymm0, [rdx + 32]

; Register only
vminpd xmm3, xmm0, xmm1
vminpd ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x4008000000000000
dq 0x4000000000000000
dq 0x4008000000000000
dq 0x4000000000000000

dq 0x3FF0000000000000
dq 0x4008000000000000
dq 0x3FF0000000000000
dq 0x4008000000000000


================================================
FILE: unittests/ASM/VEX/vminps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM2": ["0x400000003F800000", "0x4080000040400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x400000003F800000", "0x4080000040400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM5": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vminps xmm2, xmm0, [rdx + 32]
vminps ymm4, ymm0, [rdx + 32]

; Register only
vminps xmm3, xmm0, xmm1
vminps ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vminsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4022000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x3FF0000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4010000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4030000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vminsd xmm0, xmm0, xmm1
vminsd xmm2, xmm2, xmm3

; Memory operand
vminsd xmm5, xmm4, [rdx + 32 * 1]
vminsd xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vminsd xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vminss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x414243443F800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4142434441100000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x414243443F800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434440800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4142434441800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vminss xmm0, xmm0, xmm1
vminss xmm2, xmm2, xmm3

; Memory operand
vminss xmm5, xmm4, [rdx + 32 * 1]
vminss xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vminss xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vmovapd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovapd ymm1, [rdx]
vmovapd xmm2, [rdx]
vmovapd ymm3, [rdx + 32]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC


================================================
FILE: unittests/ASM/VEX/vmovapd_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x6162636465666768",
    "RBX": "0x7172737475767778",
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 8 * 4], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 8 * 5], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 8 * 6], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 7], rax

; Test truncation
vmovapd ymm2, [rdx + 8 * 4]
vmovapd xmm2, [rdx + 8 * 4]

; Test memory overwrite
vmovapd ymm0, [rdx]
vmovapd [rdx + 8 * 4], ymm0
vmovapd ymm1, ymm0

mov rax, [rdx + 8 * 6]
mov rbx, [rdx + 8 * 7]

hlt


================================================
FILE: unittests/ASM/VEX/vmovaps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovaps ymm1, [rdx]
vmovaps xmm2, [rdx]
vmovaps ymm3, [rdx + 32]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC


================================================
FILE: unittests/ASM/VEX/vmovaps_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x6162636465666768",
    "RBX": "0x7172737475767778",
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 8 * 4], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 8 * 5], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 8 * 6], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 7], rax

; Test truncation
vmovaps ymm2, [rdx + 8 * 4]
vmovaps xmm2, [rdx + 8 * 4]

; Test memory overwrite
vmovaps ymm0, [rdx]
vmovaps [rdx + 8 * 4], ymm0
vmovaps ymm1, ymm0

mov rax, [rdx + 8 * 6]
mov rbx, [rdx + 8 * 7]

hlt


================================================
FILE: unittests/ASM/VEX/vmovddup.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x0808080809090909"],
      "XMM1": ["0xEEEEEEEEFFFFFFFF", "0xEEEEEEEEFFFFFFFF", "0xAAAAAAAABBBBBBBB", "0xAAAAAAAABBBBBBBB"],
      "XMM2": ["0xEEEEEEEEFFFFFFFF", "0xEEEEEEEEFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xEEEEEEEEFFFFFFFF", "0xEEEEEEEEFFFFFFFF", "0xAAAAAAAABBBBBBBB", "0xAAAAAAAABBBBBBBB"],
      "XMM4": ["0xEEEEEEEEFFFFFFFF", "0xEEEEEEEEFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0xEEEEEEEEFFFFFFFF", "0xEEEEEEEEFFFFFFFF", "0xAAAAAAAABBBBBBBB", "0xAAAAAAAABBBBBBBB"],
      "XMM6": ["0xCCCCCCCCDDDDDDDD", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

;; Register duplication
vmovapd ymm0, [rdx]
vmovddup ymm1, ymm0
; 128-bit
vmovddup xmm2, xmm0

;; Same register
vmovapd ymm3, ymm0
vmovddup ymm3, ymm3
; 128-bit
vmovapd ymm4, ymm0
vmovddup xmm4, xmm4

;; From memory
vmovddup ymm5, [rdx]
; 128-bit
vmovddup xmm6, [rdx + 8]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xEE, 0xEE, 0xEE, 0xEE, 0xDD, 0xDD, 0xDD, 0xDD, 0xCC, 0xCC, 0xCC, 0xCC
db 0xBB, 0xBB, 0xBB, 0xBB, 0xAA, 0xAA, 0xAA, 0xAA, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08


================================================
FILE: unittests/ASM/VEX/vmovdqa.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD"],
      "XMM4": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovdqa ymm1, [rdx]
vmovdqa xmm2, [rdx]
vmovdqa ymm3, [rdx + 32]

; Test memory overwrite
mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 32], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 40], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 48], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 56], rax

vmovdqa ymm4, [rdx + 32]
vmovdqa [rdx], xmm4
vmovapd ymm5, [rdx]
vmovdqa [rdx], ymm4
vmovapd ymm6, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD


================================================
FILE: unittests/ASM/VEX/vmovdqu.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD"],
      "XMM4": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovdqu ymm1, [rdx]
vmovdqu xmm2, [rdx]
vmovdqu ymm3, [rdx + 32]

; Test memory overwrite
mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 32], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 40], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 48], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 56], rax

vmovdqu ymm4, [rdx + 32]
vmovdqu [rdx], xmm4
vmovapd ymm5, [rdx]
vmovdqu [rdx], ymm4
vmovapd ymm6, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD


================================================
FILE: unittests/ASM/VEX/vmovhlps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM3": ["0x4150f0e342241b6c", "0xdddddddddddddddd", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
      "XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"]
  }
}
%endif

; Load inputs
vmovapd ymm1, [rel .data]
vmovapd ymm2, [rel .data + 32]
vmovapd ymm5, [rel .data_random]
vmovapd ymm6, [rel .data_random]

vmovhlps xmm1, xmm2, xmm5
vmovhlps xmm3, xmm1, xmm5

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF

.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/VEX/vmovhpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xCCCCCCCCCCCCCCCC", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

;; Register as DST tests
; Load inputs
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx + 32]

vmovhpd xmm1, xmm2, [rdx + 48]
vmovhpd xmm3, xmm1, [rdx + 56]

;; Store to memory test
; Overwrite beginning of data, then yank it back into a vector
; Nothing in memory should be modified except the first 64 bits.
vmovhpd [rdx], xmm2
vmovapd ymm4, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF


================================================
FILE: unittests/ASM/VEX/vmovhps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xCCCCCCCCCCCCCCCC", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

;; Register as DST tests
; Load inputs
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx + 32]

vmovhps xmm1, xmm2, [rdx + 48]
vmovhps xmm3, xmm1, [rdx + 56]

;; Store to memory test
; Overwrite beginning of data, then yank it back into a vector
; Nothing in memory should be modified except the first 64 bits.
vmovhps [rdx], xmm2
vmovapd ymm4, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF


================================================
FILE: unittests/ASM/VEX/vmovlhps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x43cc1ad6970b4549", "0xc4be43cc1ad6970b", "0", "0"],
    "XMM1": ["0x43cc1ad6970b4549", "0xbd7eb46a1278f793", "0xef673dac6e4cbb7b", "0x5b3d85d342718be9"],
    "XMM2": ["0xc4be43cc1ad6970b", "0x4549bd7eb46a1278", "0xf793ef673dac6e4c", "0xbb7b5b3d85d34271"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm2, [rdx + 64]

vmovlhps xmm0, xmm1, xmm2

hlt

align 32
.data:
dq 0xfdecd28fab3fa4a5, 0x7d7ccd8836d09fc2, 0xccdbcfc31f3ff0f3, 0x108390defebac4be
dq 0x43cc1ad6970b4549, 0xbd7eb46a1278f793, 0xef673dac6e4cbb7b, 0x5b3d85d342718be9
dq 0xc4be43cc1ad6970b, 0x4549bd7eb46a1278, 0xf793ef673dac6e4c, 0xbb7b5b3d85d34271
dq 0x000043cc1ad6970b, 0x4549bd7eb46a1278, 0xf793ef673dac6e4c, 0xbb7b5b3d85d34271


================================================
FILE: unittests/ASM/VEX/vmovlpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xEEEEEEEEEEEEEEEE", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM3": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

;; Register as DST tests
; Load inputs
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx + 32]

vmovlpd xmm1, xmm2, [rdx + 48]
vmovlpd xmm3, xmm1, [rdx + 56]

;; Store to memory test
; Overwrite beginning of data, then yank it back into a vector
; Nothing in memory should be modified except the first 64 bits.
vmovlpd [rdx], xmm2
vmovapd ymm4, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF


================================================
FILE: unittests/ASM/VEX/vmovlps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xEEEEEEEEEEEEEEEE", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF"],
      "XMM3": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

;; Register as DST tests
; Load inputs
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx + 32]

vmovlps xmm1, xmm2, [rdx + 48]
vmovlps xmm3, xmm1, [rdx + 56]

;; Store to memory test
; Overwrite beginning of data, then yank it back into a vector
; Nothing in memory should be modified except the first 64 bits.
vmovlps [rdx], xmm2
vmovapd ymm4, [rdx]

hlt

align 4096
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD
db 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xEE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF


================================================
FILE: unittests/ASM/VEX/vmovmskpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x2",
    "RBX": "0xA",
    "RDI": "0x0",
    "RSI": "0x0"
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vmovmskpd rax, xmm0
vmovmskpd rbx, ymm0

vmovmskpd rdi, xmm1
vmovmskpd rsi, ymm1

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x8000000000000000
dq 0x0000000000000000
dq 0x8000000000000000

dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vmovmskps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x00",
    "RBX": "0x03",
    "RDI": "0x00",
    "RSI": "0x33"
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vmovmskps rax, xmm0
vmovmskps rbx, xmm1

vmovmskps rdi, ymm0
vmovmskps rsi, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x8000000080000000
dq 0x7000000070000000
dq 0x8000000080000000
dq 0x7000000070000000


================================================
FILE: unittests/ASM/VEX/vmovntdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

vmovaps xmm0, [rdx + 8 * 0]
vmovaps xmm1, [rdx + 8 * 2]
vmovaps ymm2, [rdx + 8 * 0]

vmovntdq [rdx + 8 * 4], xmm1
vmovaps xmm0, [rdx + 8 * 4]

vmovntdq [rdx + 8 * 4], ymm2
vmovaps ymm3, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/VEX/vmovntdqa.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Load results with random data first.
vmovaps ymm0, [rel .data_random]
vmovaps ymm1, [rel .data_random]

vmovntdqa xmm0, [rdx]
vmovntdqa ymm1, [rdx]

hlt

align 32
.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/VEX/vmovntpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM4": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"],
    "XMM5": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
    "XMM6": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0x41aff21340ab4706", "0x40aa5bea411ac802"],
    "XMM7": ["0x428b029f42a63326", "0x4150f0e342241b6c", "0", "0"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

vmovaps xmm0, [rdx + 8 * 0]
vmovaps xmm1, [rdx + 8 * 2]
vmovaps ymm2, [rdx + 8 * 0]

vmovntpd [rdx + 8 * 4], xmm1
vmovaps xmm0, [rdx + 8 * 4]

vmovntpd [rdx + 8 * 4], ymm2
vmovaps ymm3, [rdx + 8 * 4]

vmovaps ymm4, [rel .data_random]
vmovaps ymm5, [rel .data_random]
vmovaps ymm6, [rel .data_random]
vmovaps ymm7, [rel .data_random]

vmovntpd [rel .data_res1], xmm4
vmovaps xmm4, [rel .data_res1]

vmovntpd [rel .data_res2], xmm5
vmovaps ymm5, [rel .data_res2]

vmovntpd [rel .data_res3], ymm6
vmovaps ymm6, [rel .data_res3]

vmovntpd [rel .data_res4], ymm7
vmovaps xmm7, [rel .data_res4]

hlt

align 4096
.data_random:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

align 32
.data_res1:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

.data_res2:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

.data_res3:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303

.data_res4:
dd 83.0999,69.50512,41.02678,13.05881,5.35242,21.9932,9.67383,5.32372,29.02872,66.50151,19.30764,91.3633,40.45086,50.96153,32.64489,23.97574,90.64316,24.22547,98.9394,91.21715,90.80143,99.48407,64.97245,74.39838,35.22761,25.35321,5.8732,90.19956,33.03133,52.02952,58.38554,10.17531,47.84703,84.04831,90.02965,65.81329,96.27991,6.64479,25.58971,95.00694,88.1929,37.16964,49.52602,10.27223,77.70605,20.21439,9.8056,41.29389,15.4071,57.54286,9.61117,55.54302,52.90745,4.88086,72.52882,3.0201,56.55091,71.22749,61.84736,88.74295,47.72641,24.17404,33.70564,96.71303


================================================
FILE: unittests/ASM/VEX/vmovntps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0x0
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax

vmovaps xmm0, [rdx + 8 * 0]
vmovaps xmm1, [rdx + 8 * 2]
vmovaps ymm2, [rdx + 8 * 0]

vmovntps [rdx + 8 * 4], xmm1
vmovaps xmm0, [rdx + 8 * 4]

vmovntps [rdx + 8 * 4], ymm2
vmovaps ymm3, [rdx + 8 * 4]

hlt


================================================
FILE: unittests/ASM/VEX/vmovq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434445464748", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Load from memory (fill with junk and ensure load zeroes out upper 64-bit lanes)
vmovapd xmm0, [rdx]
vmovq xmm0, [rdx]

; Load and truncate same register
vmovapd ymm1, [rdx]
vmovq xmm1, xmm1

; Store and reload
vmovq [rdx + 8 * 2], xmm1
vmovapd ymm2, [rdx]

hlt


================================================
FILE: unittests/ASM/VEX/vmovq_vmovd_reg.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RBX": "0x4142434445464748",
    "RCX": "0x0000000045464748",
    "XMM0": ["0x7172737475767778", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000075767778", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000000045464748", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434465666768", "0x6162636465666768", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax

mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

; Load from GPR (64-bit)
vmovapd xmm0, [rdx]
vmovq xmm0, rax

; Load from GPR (32-bit)
vmovapd xmm1, [rdx]
vmovd xmm1, eax

; Load 32-bit value
vmovapd xmm2, [rdx]
vmovd xmm2, [edx]

; Store into GPR
vmovapd xmm3, [rdx]
vmovq rbx, xmm3
vmovd ecx, xmm3

; Store into mem
vmovapd xmm4, [rdx + 8 * 2]
vmovd [rdx + 0], xmm4
vmovq [rdx + 8], xmm4
vmovapd ymm5, [rdx]

hlt


================================================
FILE: unittests/ASM/VEX/vmovsd_from_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32]

; Move data into register
vmovsd xmm0, [rdx]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vmovsd_to_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4142434445464748", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0, [rdx]

; Moves lower 64bits to memory
vmovsd [rdx + 16], xmm0

; Ensure 128bits weren't written
vmovapd xmm0, [rdx + 16]

hlt

align 4096
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vmovsd_vectors.asm
================================================

%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x9999999988888888"],
    "XMM1":  ["0x1111111122222222", "0x3333333344444444", "0x5555555566666666", "0x7777777788888888"],
    "XMM2":  ["0x1111111122222222", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xAAAAAAAABBBBBBBB", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0,  [rdx]
vmovapd ymm1,  [rdx + 32]

vmovsd xmm2, xmm0, xmm1
vmovsd xmm3, xmm1, xmm0

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888


================================================
FILE: unittests/ASM/VEX/vmovshdup.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
      "XMM2": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x0808080809090909"],
      "XMM4": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
      "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x0808080808080808"],
      "XMM7": ["0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

;; Broadcast across self
vmovaps ymm1, [rdx]
vmovshdup ymm1, ymm1
; 128-bit version
vmovaps xmm2, [rdx]
vmovshdup xmm2, xmm2

;; Broadcast from different registers
vmovaps ymm3, [rdx]
vmovshdup ymm4, ymm3
; 128-bit version
vmovshdup xmm5, xmm3

;; Broadcast from memory
vmovshdup ymm6, [rdx]
; 128-bit version
vmovshdup xmm7, [rdx]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xEE, 0xEE, 0xEE, 0xEE, 0xDD, 0xDD, 0xDD, 0xDD, 0xCC, 0xCC, 0xCC, 0xCC
db 0xBB, 0xBB, 0xBB, 0xBB, 0xAA, 0xAA, 0xAA, 0xAA, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08


================================================
FILE: unittests/ASM/VEX/vmovsldup.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x0808080809090909"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
      "XMM5": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0xBBBBBBBBBBBBBBBB", "0x0909090909090909"],
      "XMM7": ["0xFFFFFFFFFFFFFFFF", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

;; Broadcast across self
vmovaps ymm1, [rdx]
vmovsldup ymm1, ymm1
; 128-bit version
vmovaps xmm2, [rdx]
vmovsldup xmm2, xmm2

;; Broadcast from different registers
vmovaps ymm3, [rdx]
vmovsldup ymm4, ymm3
; 128-bit version
vmovsldup xmm5, xmm3

;; Broadcast from memory
vmovsldup ymm6, [rdx]
; 128-bit version
vmovsldup xmm7, [rdx]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xEE, 0xEE, 0xEE, 0xEE, 0xDD, 0xDD, 0xDD, 0xDD, 0xCC, 0xCC, 0xCC, 0xCC
db 0xBB, 0xBB, 0xBB, 0xBB, 0xAA, 0xAA, 0xAA, 0xAA, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08


================================================
FILE: unittests/ASM/VEX/vmovss_from_mem.asm
================================================

%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000042A63326", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000040AB4706", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000000041E83AD2", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x000000004221CDAE", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000042B5494C", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000042B59A55", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x00000000420CE913", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000042042015", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x00000000423F635C", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000042C08F50", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000042B062C4", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x00000000429B697F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x000000004176837B", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x000000004253A13B", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000042623422", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x00000000423EE7D8", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]
vmovapd xmm0,  [rdx + 16 * 0]
vmovapd xmm1,  [rdx + 16 * 1]
vmovapd xmm2,  [rdx + 16 * 2]
vmovapd xmm3,  [rdx + 16 * 3]
vmovapd xmm4,  [rdx + 16 * 4]
vmovapd xmm5,  [rdx + 16 * 5]
vmovapd xmm6,  [rdx + 16 * 6]
vmovapd xmm7,  [rdx + 16 * 7]
vmovapd xmm8,  [rdx + 16 * 8]
vmovapd xmm9,  [rdx + 16 * 9]
vmovapd xmm10, [rdx + 16 * 10]
vmovapd xmm11, [rdx + 16 * 11]
vmovapd xmm12, [rdx + 16 * 12]
vmovapd xmm13, [rdx + 16 * 13]
vmovapd xmm14, [rdx + 16 * 14]
vmovapd xmm15, [rdx + 16 * 15]

vmovss xmm0,  [rdx + 16 * 0]
vmovss xmm1,  [rdx + 16 * 1]
vmovss xmm2,  [rdx + 16 * 2]
vmovss xmm3,  [rdx + 16 * 3]
vmovss xmm4,  [rdx + 16 * 4]
vmovss xmm5,  [rdx + 16 * 5]
vmovss xmm6,  [rdx + 16 * 6]
vmovss xmm7,  [rdx + 16 * 7]
vmovss xmm8,  [rdx + 16 * 8]
vmovss xmm9,  [rdx + 16 * 9]
vmovss xmm10, [rdx + 16 * 10]
vmovss xmm11, [rdx + 16 * 11]
vmovss xmm12, [rdx + 16 * 12]
vmovss xmm13, [rdx + 16 * 13]
vmovss xmm14, [rdx + 16 * 14]
vmovss xmm15, [rdx + 16 * 15]

hlt

align 32
; 512bytes of random data
.data:
dd 83.0999 , 69.50512, 41.02678, 13.05881
dd 5.35242 , 21.9932 , 9.67383 , 5.32372
dd 29.02872, 66.50151, 19.30764, 91.3633
dd 40.45086, 50.96153, 32.64489, 23.97574
dd 90.64316, 24.22547, 98.9394 , 91.21715
dd 90.80143, 99.48407, 64.97245, 74.39838
dd 35.22761, 25.35321, 5.8732  , 90.19956
dd 33.03133, 52.02952, 58.38554, 10.17531
dd 47.84703, 84.04831, 90.02965, 65.81329
dd 96.27991, 6.64479 , 25.58971, 95.00694
dd 88.1929 , 37.16964, 49.52602, 10.27223
dd 77.70605, 20.21439, 9.8056  , 41.29389
dd 15.4071 , 57.54286, 9.61117 , 55.54302
dd 52.90745, 4.88086 , 72.52882, 3.0201
dd 56.55091, 71.22749, 61.84736, 88.74295
dd 47.72641, 24.17404, 33.70564, 96.71303


================================================
FILE: unittests/ASM/VEX/vmovss_to_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000042A63326", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000040AB4706", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000000041E83AD2", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x000000004221CDAE", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000042B5494C", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000042B59A55", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x00000000420CE913", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000042042015", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x00000000423F635C", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000042C08F50", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000042B062C4", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x00000000429B697F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x000000004176837B", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x000000004253A13B", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000042623422", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x00000000423EE7D8", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd xmm0,  [rdx + 16 * 0]
vmovapd xmm1,  [rdx + 16 * 1]
vmovapd xmm2,  [rdx + 16 * 2]
vmovapd xmm3,  [rdx + 16 * 3]
vmovapd xmm4,  [rdx + 16 * 4]
vmovapd xmm5,  [rdx + 16 * 5]
vmovapd xmm6,  [rdx + 16 * 6]
vmovapd xmm7,  [rdx + 16 * 7]
vmovapd xmm8,  [rdx + 16 * 8]
vmovapd xmm9,  [rdx + 16 * 9]
vmovapd xmm10, [rdx + 16 * 10]
vmovapd xmm11, [rdx + 16 * 11]
vmovapd xmm12, [rdx + 16 * 12]
vmovapd xmm13, [rdx + 16 * 13]
vmovapd xmm14, [rdx + 16 * 14]
vmovapd xmm15, [rdx + 16 * 15]

mov rdx, 0xe0000000
mov rax, 0
mov [rdx + 8 * 0], rax
mov [rdx + 8 * 1], rax
mov [rdx + 8 * 2], rax
mov [rdx + 8 * 3], rax
mov [rdx + 8 * 4], rax
mov [rdx + 8 * 5], rax
mov [rdx + 8 * 6], rax
mov [rdx + 8 * 7], rax
mov [rdx + 8 * 8], rax
mov [rdx + 8 * 9], rax
mov [rdx + 8 * 10], rax
mov [rdx + 8 * 11], rax
mov [rdx + 8 * 12], rax
mov [rdx + 8 * 13], rax
mov [rdx + 8 * 14], rax
mov [rdx + 8 * 15], rax
mov [rdx + 8 * 16], rax
mov [rdx + 8 * 17], rax
mov [rdx + 8 * 18], rax
mov [rdx + 8 * 19], rax
mov [rdx + 8 * 20], rax
mov [rdx + 8 * 21], rax
mov [rdx + 8 * 22], rax
mov [rdx + 8 * 23], rax
mov [rdx + 8 * 24], rax
mov [rdx + 8 * 25], rax
mov [rdx + 8 * 26], rax
mov [rdx + 8 * 27], rax
mov [rdx + 8 * 28], rax
mov [rdx + 8 * 29], rax
mov [rdx + 8 * 30], rax

vmovss [rdx + 16 * 0], xmm0
vmovss [rdx + 16 * 1], xmm1
vmovss [rdx + 16 * 2], xmm2
vmovss [rdx + 16 * 3], xmm3
vmovss [rdx + 16 * 4], xmm4
vmovss [rdx + 16 * 5], xmm5
vmovss [rdx + 16 * 6], xmm6
vmovss [rdx + 16 * 7], xmm7
vmovss [rdx + 16 * 8], xmm8
vmovss [rdx + 16 * 9], xmm9
vmovss [rdx + 16 * 10], xmm10
vmovss [rdx + 16 * 11], xmm11
vmovss [rdx + 16 * 12], xmm12
vmovss [rdx + 16 * 13], xmm13
vmovss [rdx + 16 * 14], xmm14
vmovss [rdx + 16 * 15], xmm15

lea rdx, [rel .data]
vmovapd xmm0, [rdx + 16 * 0]
vmovapd xmm1, [rdx + 16 * 1]
vmovapd xmm2, [rdx + 16 * 2]
vmovapd xmm3, [rdx + 16 * 3]
vmovapd xmm4, [rdx + 16 * 4]
vmovapd xmm5, [rdx + 16 * 5]
vmovapd xmm6, [rdx + 16 * 6]
vmovapd xmm7, [rdx + 16 * 7]
vmovapd xmm8, [rdx + 16 * 8]
vmovapd xmm9, [rdx + 16 * 9]
vmovapd xmm10, [rdx + 16 * 10]
vmovapd xmm11, [rdx + 16 * 11]
vmovapd xmm12, [rdx + 16 * 12]
vmovapd xmm13, [rdx + 16 * 13]
vmovapd xmm14, [rdx + 16 * 14]
vmovapd xmm15, [rdx + 16 * 15]

mov rdx, 0xe0000000

vmovapd xmm0, [rdx + 16 * 0]
vmovapd xmm1, [rdx + 16 * 1]
vmovapd xmm2, [rdx + 16 * 2]
vmovapd xmm3, [rdx + 16 * 3]
vmovapd xmm4, [rdx + 16 * 4]
vmovapd xmm5, [rdx + 16 * 5]
vmovapd xmm6, [rdx + 16 * 6]
vmovapd xmm7, [rdx + 16 * 7]
vmovapd xmm8, [rdx + 16 * 8]
vmovapd xmm9, [rdx + 16 * 9]
vmovapd xmm10, [rdx + 16 * 10]
vmovapd xmm11, [rdx + 16 * 11]
vmovapd xmm12, [rdx + 16 * 12]
vmovapd xmm13, [rdx + 16 * 13]
vmovapd xmm14, [rdx + 16 * 14]
vmovapd xmm15, [rdx + 16 * 15]

hlt

align 16
; 512bytes of random data
.data:
dd 83.0999 , 69.50512, 41.02678, 13.05881
dd 5.35242 , 21.9932 , 9.67383 , 5.32372
dd 29.02872, 66.50151, 19.30764, 91.3633
dd 40.45086, 50.96153, 32.64489, 23.97574
dd 90.64316, 24.22547, 98.9394 , 91.21715
dd 90.80143, 99.48407, 64.97245, 74.39838
dd 35.22761, 25.35321, 5.8732  , 90.19956
dd 33.03133, 52.02952, 58.38554, 10.17531
dd 47.84703, 84.04831, 90.02965, 65.81329
dd 96.27991, 6.64479 , 25.58971, 95.00694
dd 88.1929 , 37.16964, 49.52602, 10.27223
dd 77.70605, 20.21439, 9.8056  , 41.29389
dd 15.4071 , 57.54286, 9.61117 , 55.54302
dd 52.90745, 4.88086 , 72.52882, 3.0201
dd 56.55091, 71.22749, 61.84736, 88.74295
dd 47.72641, 24.17404, 33.70564, 96.71303


================================================
FILE: unittests/ASM/VEX/vmovss_vectors.asm
================================================

%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x9999999988888888"],
    "XMM1":  ["0x1111111122222222", "0x3333333344444444", "0x5555555566666666", "0x7777777788888888"],
    "XMM2":  ["0xAAAAAAAA22222222", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x11111111BBBBBBBB", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0,  [rdx]
vmovapd ymm1,  [rdx + 32]

vmovss xmm2, xmm0, xmm1
vmovss xmm3, xmm1, xmm0

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888


================================================
FILE: unittests/ASM/VEX/vmovupd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovupd ymm1, [rdx]
vmovupd xmm2, [rdx]
vmovupd ymm3, [rdx + 32]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC


================================================
FILE: unittests/ASM/VEX/vmovupd_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x6162636465666768",
    "RBX": "0x7172737475767778",
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 8 * 4], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 8 * 5], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 8 * 6], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 7], rax

; Test truncation
vmovupd ymm2, [rdx + 8 * 4]
vmovupd xmm2, [rdx + 8 * 4]

; Test memory overwrite
vmovupd ymm0, [rdx]
vmovupd [rdx + 8 * 4], ymm0
vmovupd ymm1, ymm0

mov rax, [rdx + 8 * 6]
mov rbx, [rdx + 8 * 7]

hlt


================================================
FILE: unittests/ASM/VEX/vmovups.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovups ymm1, [rdx]
vmovups xmm2, [rdx]
vmovups ymm3, [rdx + 32]

hlt

align 32
.data:
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC
db 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC, 0xCC


================================================
FILE: unittests/ASM/VEX/vmovups_mem.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x6162636465666768",
    "RBX": "0x7172737475767778",
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4142434445464748
mov [rdx + 8 * 0], rax
mov rax, 0x5152535455565758
mov [rdx + 8 * 1], rax
mov rax, 0x6162636465666768
mov [rdx + 8 * 2], rax
mov rax, 0x7172737475767778
mov [rdx + 8 * 3], rax

mov rax, 0xCCCCCCCCCCCCCCCC
mov [rdx + 8 * 4], rax
mov rax, 0xDDDDDDDDDDDDDDDD
mov [rdx + 8 * 5], rax
mov rax, 0xEEEEEEEEEEEEEEEE
mov [rdx + 8 * 6], rax
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 8 * 7], rax

; Test truncation
vmovups ymm2, [rdx + 8 * 4]
vmovups xmm2, [rdx + 8 * 4]

; Test memory overwrite
vmovups ymm0, [rdx]
vmovups [rdx + 8 * 4], ymm0
vmovups ymm1, ymm0

mov rax, [rdx + 8 * 6]
mov rbx, [rdx + 8 * 7]

hlt


================================================
FILE: unittests/ASM/VEX/vmpsadbw_128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x6C8BABD754A8356E", "0x277EA625CA925F77", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x6A6FD695EC73CDC7", "0xDDA1B927BBF2AEBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x88312CD5C7D14D73", "0x7F091E1EFDDBE7FE", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xF29AE6EF954EFA14", "0x8273A8A49A6242A0", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x3212073882160F0E", "0xB3780763C1923507", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x462A372B571946CB", "0xA38DCD3D790E041F", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x01D700F201DD018B", "0x021B012D00EC015B", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x021B01EA0147019C", "0x017900FB00D801D9", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x010500E801000153", "0x011A015F01530171", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x019C0124018F014D", "0x011F0100011E0116", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0136007E009D01E0", "0x02A802C80245019D", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x009F0115017B0132", "0x013C01AF01F90179", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0077012B011900E8", "0x00BC016E019E0146", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x0100011C010300D5", "0x00F3014A016700CD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vmovaps xmm1, [rdx + 16 * 1]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]
vmovaps xmm5, [rdx + 16 * 5]
vmovaps xmm6, [rdx + 16 * 6]
vmovaps xmm7, [rdx + 16 * 7]

vmpsadbw xmm8,  xmm0, [rdx + 16 * 8],  000b
vmpsadbw xmm9,  xmm1, [rdx + 16 * 9],  001b
vmpsadbw xmm10, xmm2, [rdx + 16 * 10], 010b
vmpsadbw xmm11, xmm3, [rdx + 16 * 11], 011b
vmpsadbw xmm12, xmm4, [rdx + 16 * 12], 100b
vmpsadbw xmm13, xmm5, [rdx + 16 * 13], 101b
vmpsadbw xmm14, xmm6, [rdx + 16 * 14], 110b
vmpsadbw xmm15, xmm7, [rdx + 16 * 15], 111b

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0x6C8BABD754A8356E
dq 0x277EA625CA925F77
dq 0x6A6FD695EC73CDC7
dq 0xDDA1B927BBF2AEBB
dq 0x88312CD5C7D14D73
dq 0x7F091E1EFDDBE7FE
dq 0xF29AE6EF954EFA14
dq 0x8273A8A49A6242A0
dq 0x3212073882160F0E
dq 0xB3780763C1923507
dq 0x462A372B571946CB
dq 0xA38DCD3D790E041F
dq 0x3057BAAB2F86F32B
dq 0xEF3F4F46F02CD62E
dq 0xDE3C4B3485BBD1EF
dq 0x9DE3718DB9A3489E
dq 0x9D50328ADEFB7209
dq 0xEEF7EB52F6F19869
dq 0xCE021C30FFC299D6
dq 0xA60E9C56F1B20570
dq 0x30763886E2C46218
dq 0xEB535D0EA7E4A12F
dq 0x6802E8E1B7E04514
dq 0x46EBF28FC18EFE1A
dq 0x032E9746236A5D7F
dq 0xAC5976548F321298
dq 0xB6D30C71C85F76C8
dq 0x881D2CA6ABEA19C5


================================================
FILE: unittests/ASM/VEX/vmpsadbw_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0C7FCC33573D4A81", "0xB2B1594B0900051F"],
    "XMM1":  ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0x48CACFD5667F2042", "0xC3BF1B89A0DFEE04"],
    "XMM2":  ["0x6C8BABD754A8356E", "0x277EA625CA925F77", "0x77D4FD3ED900079E", "0xA454D66F18BE061B"],
    "XMM3":  ["0x6A6FD695EC73CDC7", "0xDDA1B927BBF2AEBB", "0x22E096464DD75EF1", "0xF8DD0BC501EC1573"],
    "XMM4":  ["0x88312CD5C7D14D73", "0x7F091E1EFDDBE7FE", "0x8952DF26784EFD5F", "0x06BE3C607E0C7DC7"],
    "XMM5":  ["0xF29AE6EF954EFA14", "0x8273A8A49A6242A0", "0x0DCA8E436C33CE72", "0xD237159B6EF41772"],
    "XMM6":  ["0x3212073882160F0E", "0xB3780763C1923507", "0xE482F34CE3FE3EFC", "0xC3F2D5A8975969F8"],
    "XMM7":  ["0x462A372B571946CB", "0xA38DCD3D790E041F", "0x879EF228FB9D8A41", "0xCA0E4DAE9D595C1A"],
    "XMM8":  ["0x01D700F201DD018B", "0x021B012D00EC015B", "0x00AB018100EF0139", "0x015401BB020C0160"],
    "XMM9":  ["0x021B01EA0147019C", "0x017900FB00D801D9", "0x014400CB01160185", "0x016D00CE01A1014C"],
    "XMM10": ["0x010500E801000153", "0x011A015F01530171", "0x01AD00CD027F0105", "0x00F3018301A60197"],
    "XMM11": ["0x019C0124018F014D", "0x011F0100011E0116", "0x01580145016B0106", "0x01E301CD013A0119"],
    "XMM12": ["0x0136007E009D01E0", "0x02A802C80245019D", "0x0149015101AF016A", "0x010600E400E30120"],
    "XMM13": ["0x009F0115017B0132", "0x013C01AF01F90179", "0x015601040159025A", "0x017D01B40202017D"],
    "XMM14": ["0x0077012B011900E8", "0x00BC016E019E0146", "0x01D900F8011201BE", "0x00E5012000B60130"],
    "XMM15": ["0x0100011C010300D5", "0x00F3014A016700CD", "0x011900A400F60156", "0x0063010A019B0185"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]
vmovaps ymm4, [rdx + 32 * 4]
vmovaps ymm5, [rdx + 32 * 5]
vmovaps ymm6, [rdx + 32 * 6]
vmovaps ymm7, [rdx + 32 * 7]

vmpsadbw ymm8,  ymm0, [rdx + 32 * 8],  000000b
vmpsadbw ymm9,  ymm1, [rdx + 32 * 9],  001001b
vmpsadbw ymm10, ymm2, [rdx + 32 * 10], 010010b
vmpsadbw ymm11, ymm3, [rdx + 32 * 11], 011011b
vmpsadbw ymm12, ymm4, [rdx + 32 * 12], 100100b
vmpsadbw ymm13, ymm5, [rdx + 32 * 13], 101101b
vmpsadbw ymm14, ymm6, [rdx + 32 * 14], 110110b
vmpsadbw ymm15, ymm7, [rdx + 32 * 15], 111111b

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0, 0x9B80767F1E6A060F, 0x0C7FCC33573D4A81, 0xB2B1594B0900051F
dq 0x6868C3F3AAED56E0, 0xF0FCE9E294E6E6DE, 0x48CACFD5667F2042, 0xC3BF1B89A0DFEE04
dq 0x6C8BABD754A8356E, 0x277EA625CA925F77, 0x77D4FD3ED900079E, 0xA454D66F18BE061B
dq 0x6A6FD695EC73CDC7, 0xDDA1B927BBF2AEBB, 0x22E096464DD75EF1, 0xF8DD0BC501EC1573
dq 0x88312CD5C7D14D73, 0x7F091E1EFDDBE7FE, 0x8952DF26784EFD5F, 0x06BE3C607E0C7DC7
dq 0xF29AE6EF954EFA14, 0x8273A8A49A6242A0, 0x0DCA8E436C33CE72, 0xD237159B6EF41772
dq 0x3212073882160F0E, 0xB3780763C1923507, 0xE482F34CE3FE3EFC, 0xC3F2D5A8975969F8
dq 0x462A372B571946CB, 0xA38DCD3D790E041F, 0x879EF228FB9D8A41, 0xCA0E4DAE9D595C1A
dq 0x3057BAAB2F86F32B, 0xEF3F4F46F02CD62E, 0x94C77DFE4CE24002, 0xF21AA894D8B40A7B
dq 0xDE3C4B3485BBD1EF, 0x9DE3718DB9A3489E, 0xEB916DE33FC4D6C4, 0xD0514FFFD3EFFCE5
dq 0x9D50328ADEFB7209, 0xEEF7EB52F6F19869, 0xABC6D5DBC52734DA, 0xED34B0EAE12FB881
dq 0xCE021C30FFC299D6, 0xA60E9C56F1B20570, 0xCF0CECBC8DF25E5E, 0xABE3B9B0215B088A
dq 0x30763886E2C46218, 0xEB535D0EA7E4A12F, 0xAA418BA42D1E3354, 0x1701761E8F4456D0
dq 0x6802E8E1B7E04514, 0x46EBF28FC18EFE1A, 0xC42510C384410A30, 0xB029D9C4A89A6C74
dq 0x032E9746236A5D7F, 0xAC5976548F321298, 0xF537B9098166726E, 0x97C312089BF23896
dq 0xB6D30C71C85F76C8, 0x881D2CA6ABEA19C5, 0xF3F32FC9BBDA1589, 0x2732CF8F4E17D917


================================================
FILE: unittests/ASM/VEX/vmulpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
    "XMM1": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM2": ["0x4018000000000000", "0x4018000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4018000000000000", "0x4018000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4018000000000000", "0x4018000000000000", "0x4018000000000000", "0x4018000000000000"],
    "XMM5": ["0x4018000000000000", "0x4018000000000000", "0x4018000000000000", "0x4018000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vmulpd xmm2, xmm0, [rdx + 32]
vmulpd ymm4, ymm0, [rdx + 32]

; Register only
vmulpd xmm3, xmm0, xmm1
vmulpd ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x4008000000000000
dq 0x4008000000000000
dq 0x4008000000000000
dq 0x4008000000000000

dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000


================================================
FILE: unittests/ASM/VEX/vmulps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM2": ["0x4140000040A00000", "0x4200000041A80000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4140000040A00000", "0x4200000041A80000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4140000040A00000", "0x4200000041A80000", "0x4140000040A00000", "0x4200000041A80000"],
    "XMM5": ["0x4140000040A00000", "0x4200000041A80000", "0x4140000040A00000", "0x4200000041A80000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vmulps xmm2, xmm0, [rdx + 32]
vmulps ymm4, ymm0, [rdx + 32]

; Register only
vmulps xmm3, xmm0, xmm1
vmulps ymm5, ymm1, ymm0

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vmulsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4010000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4062000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4039000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4059000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4079000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vmulsd xmm0, xmm0, xmm1
vmulsd xmm2, xmm2, xmm3

; Memory operand
vmulsd xmm5, xmm4, [rdx + 32 * 1]
vmulsd xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vmulsd xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vmulss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434440800000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x4142434443100000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4142434441C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434442C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x4142434443C80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vmulss xmm0, xmm0, xmm1
vmulss xmm2, xmm2, xmm3

; Memory operand
vmulss xmm5, xmm4, [rdx + 32 * 1]
vmulss xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vmulss xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vorpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM3": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM5": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vorpd ymm2, ymm0, ymm1
vorpd xmm3, xmm0, xmm1

; With memory operand
vorpd ymm4, ymm0, [rbx]
vorpd xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vorps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM3": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM5": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vorps ymm2, ymm0, ymm1
vorps xmm3, xmm0, xmm1

; With memory operand
vorps ymm4, ymm0, [rbx]
vorps xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpabsb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2":  ["0x0101010101010101", "0x0101010101010101", "0x0101010101010101", "0x0101010101010101"],
    "XMM3":  ["0xFF000100FF01FF00", "0xFF000100FF01FF00", "0xFF000100FF01FF00", "0xFF000100FF01FF00"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0101010101010101", "0x0101010101010101", "0x0101010101010101", "0x0101010101010101"],
    "XMM6":  ["0x0101010101010101", "0x0101010101010101", "0x0101010101010101", "0x0101010101010101"],
    "XMM7":  ["0x0100010001010100", "0x0100010001010100", "0x0100010001010100", "0x0100010001010100"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0101010101010101", "0x0101010101010101", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0101010101010101", "0x0101010101010101", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0100010001010100", "0x0100010001010100", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0101010101010101", "0x0101010101010101", "0x0101010101010101", "0x0101010101010101"],
    "XMM13": ["0x0101010101010101", "0x0101010101010101", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0101010101010101", "0x0101010101010101", "0x0101010101010101", "0x0101010101010101"],
    "XMM15": ["0x0101010101010101", "0x0101010101010101", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]

; Test with full zero
vpabsb ymm4, [rdx + 32 * 0]
vpabsb xmm8, [rdx + 32 * 0]

; Test with full negative
vpabsb ymm5, [rdx + 32 * 1]
vpabsb xmm9, [rdx + 32 * 1]
vpabsb ymm12, ymm1
vpabsb xmm13, xmm1

; Test with full positive
vpabsb ymm6, [rdx + 32 * 2]
vpabsb xmm10, [rdx + 32 * 2]
vpabsb ymm14, ymm2
vpabsb xmm15, xmm2

; Test a mix
vpabsb ymm7, [rdx + 32 * 3]
vpabsb xmm11, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0101010101010101
dq 0x0101010101010101
dq 0x0101010101010101
dq 0x0101010101010101

dq 0xFF000100FF01FF00
dq 0xFF000100FF01FF00
dq 0xFF000100FF01FF00
dq 0xFF000100FF01FF00


================================================
FILE: unittests/ASM/VEX/vpabsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2":  ["0x0000000100000001", "0x0000000100000001", "0x0000000100000001", "0x0000000100000001"],
    "XMM3":  ["0xFFFFFFFF00000000", "0x00000001FFFFFFFF", "0xFFFFFFFF00000000", "0x00000001FFFFFFFF"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000100000001", "0x0000000100000001", "0x0000000100000001", "0x0000000100000001"],
    "XMM6":  ["0x0000000100000001", "0x0000000100000001", "0x0000000100000001", "0x0000000100000001"],
    "XMM7":  ["0x0000000100000000", "0x0000000100000001", "0x0000000100000000", "0x0000000100000001"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000100000001", "0x0000000100000001", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000100000001", "0x0000000100000001", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000000100000000", "0x0000000100000001", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0000000100000001", "0x0000000100000001", "0x0000000100000001", "0x0000000100000001"],
    "XMM13": ["0x0000000100000001", "0x0000000100000001", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0000000100000001", "0x0000000100000001", "0x0000000100000001", "0x0000000100000001"],
    "XMM15": ["0x0000000100000001", "0x0000000100000001", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]

; Test with full zero
vpabsd ymm4, [rdx + 32 * 0]
vpabsd xmm8, [rdx + 32 * 0]

; Test with full negative
vpabsd ymm5, [rdx + 32 * 1]
vpabsd xmm9, [rdx + 32 * 1]
vpabsd ymm12, ymm1
vpabsd xmm13, xmm1

; Test with full positive
vpabsd ymm6, [rdx + 32 * 2]
vpabsd xmm10, [rdx + 32 * 2]
vpabsd ymm14, ymm2
vpabsd xmm15, xmm2

; Test a mix
vpabsd ymm7, [rdx + 32 * 3]
vpabsd xmm11, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpabsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2":  ["0x0001000100010001", "0x0001000100010001", "0x0001000100010001", "0x0001000100010001"],
    "XMM3":  ["0xFFFFFFFF00000000", "0x00010001FFFF0000", "0xFFFFFFFF00000000", "0x00010001FFFF0000"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0001000100010001", "0x0001000100010001", "0x0001000100010001", "0x0001000100010001"],
    "XMM6":  ["0x0001000100010001", "0x0001000100010001", "0x0001000100010001", "0x0001000100010001"],
    "XMM7":  ["0x0001000100000000", "0x0001000100010000", "0x0001000100000000", "0x0001000100010000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0001000100010001", "0x0001000100010001", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0001000100010001", "0x0001000100010001", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0001000100000000", "0x0001000100010000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x0001000100010001", "0x0001000100010001", "0x0001000100010001", "0x0001000100010001"],
    "XMM13": ["0x0001000100010001", "0x0001000100010001", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x0001000100010001", "0x0001000100010001", "0x0001000100010001", "0x0001000100010001"],
    "XMM15": ["0x0001000100010001", "0x0001000100010001", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]

; Test with full zero
vpabsw ymm4, [rdx + 32 * 0]
vpabsw xmm8, [rdx + 32 * 0]

; Test with full negative
vpabsw ymm5, [rdx + 32 * 1]
vpabsw xmm9, [rdx + 32 * 1]
vpabsw ymm12, ymm1
vpabsw xmm13, xmm1

; Test with full positive
vpabsw ymm6, [rdx + 32 * 2]
vpabsw xmm10, [rdx + 32 * 2]
vpabsw ymm14, ymm2
vpabsw xmm15, xmm2

; Test a mix
vpabsw ymm7, [rdx + 32 * 3]
vpabsw xmm11, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0001000100010001
dq 0x0001000100010001
dq 0x0001000100010001
dq 0x0001000100010001

dq 0xFFFFFFFF00000000
dq 0x00010001FFFF0000
dq 0xFFFFFFFF00000000
dq 0x00010001FFFF0000


================================================
FILE: unittests/ASM/VEX/vpackssdw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x00000040FFFF8000", "0xFFFF800000000040", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x00000040FFFF8000", "0xFFFF800000000040", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x00000040FFFF8000", "0xFFFF800000000040", "0x00000040FFFF8000", "0xFFFF800000000040"],
    "XMM5": ["0x00000040FFFF8000", "0xFFFF800000000040", "0x00000040FFFF8000", "0xFFFF800000000040"]
  }
}
%endif

lea rdx, [rel .data]

; 32bit signed -> 16bit signed (saturated)
; input > 0x7FFF(SHRT_MAX, 32767) = 0x7FFF(SHRT_MAX, 32767)
; input < 0x8000(-32767) = 0x8000

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpackssdw xmm2, xmm0, [rdx + 32]
vpackssdw xmm3, xmm0, xmm1

vpackssdw ymm4, ymm0, [rdx + 32]
vpackssdw ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0xFFFFFFFF80000000
dq 0x0000000000000040
dq 0xFFFFFFFF80000000
dq 0x0000000000000040

dq 0x0000000000000040
dq 0xFFFFFFFF80000000
dq 0x0000000000000040
dq 0xFFFFFFFF80000000


================================================
FILE: unittests/ASM/VEX/vpacksswb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"],
    "XMM5": ["0x00807F4100807F41", "0x00FF7F4100FF7F41", "0x00807F4100807F41", "0x00FF7F4100FF7F41"]
  }
}
%endif

lea rdx, [rel .data]

; 16bit signed -> 8bit signed (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0x7F(SCHAR_MAX, 127)
; input < 0x80(-127) = 0x80

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpacksswb xmm2, xmm0, [rdx + 32]
vpacksswb xmm3, xmm0, xmm1

vpacksswb ymm4, ymm0, [rdx + 32]
vpacksswb ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x00008000007F0041
dq 0x00008000007F0041
dq 0x00008000007F0041
dq 0x00008000007F0041

dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041


================================================
FILE: unittests/ASM/VEX/vpackusdw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000", "0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000"],
    "XMM6":  ["0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000", "0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000"],
    "XMM7":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000", "0xFFFFFFFFFFFFFFFF", "0x00000000FFFF0000"],
    "XMM10": ["0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000", "0xFFFFFFFFFFFFFFFF", "0x12348000FFFF0000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]

vpackusdw xmm3, xmm0, [rdx + 32 * 1]
vpackusdw xmm4, xmm0, [rdx + 32 * 2]

vpackusdw ymm5, ymm0, [rdx + 32 * 1]
vpackusdw ymm6, ymm0, [rdx + 32 * 2]

vpackusdw xmm7, xmm0, xmm1
vpackusdw xmm8, xmm0, xmm2

vpackusdw ymm9, ymm0, ymm1
vpackusdw ymm10, ymm0, ymm2

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x7FFFFFFF00000000
dq 0xFFFFFFFF80000000
dq 0x7FFFFFFF00000000
dq 0xFFFFFFFF80000000

; Values that actually fit in to 16bit unsigned
dq 0x0000FFFF00000000
dq 0x0000123400008000
dq 0x0000FFFF00000000
dq 0x0000123400008000


================================================
FILE: unittests/ASM/VEX/vpackuswb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0x00007F4100007F41", "0x00007F4100007F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x00007F4100007F41", "0x00007F4100007F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x00007F4100007F41", "0x00007F4100007F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x00007F4100007F41", "0x00007F4100007F41", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x00007F4100007F41", "0x00007F4100007F41", "0x7F4100007F410000", "0x7F4100007F410000"],
    "XMM4": ["0x00007F4100007F41", "0x00007F4100007F41", "0x7F4100007F410000", "0x7F4100007F410000"]
  }
}
%endif

lea rdx, [rel .data]

; 16bit signed -> 8bit unsigned (saturated)
; input > 0x7F(SCHAR_MAX, 127) = 0xFF(UCHAR_MAX, 255)
; input < 0x00(Negative) = 0x0

vmovapd ymm0, [rdx]

vpackuswb xmm1, xmm0, [rdx]
vpackuswb xmm2, xmm0, xmm0

vpackuswb ymm3, ymm0, [rdx]
vpackuswb ymm4, ymm0, ymm0

hlt

align 32
.data:
dq 0x0000FFFF007F0041
dq 0x0000FFFF007F0041
dq 0x007F00410000FFFF
dq 0x007F00410000FFFF

================================================
FILE: unittests/ASM/VEX/vpaddb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0102030405060708", "0x1011121314151617", "0x0809AABBCCDDEEFF", "0x4041424344454647"],
    "XMM1": ["0x090A0B0C0D0E0F10", "0xCCEEDDAABBFF0990", "0x2021222324252627", "0x0062636465666768"],
    "XMM2": ["0x0A0C0E1012141618", "0xDCFFEFBDCF141FA7", "0x282ACCDEF0021426", "0x40A3A5A7A9ABADAF"],
    "XMM3": ["0x0A0C0E1012141618", "0xDCFFEFBDCF141FA7", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0A0C0E1012141618", "0xDCFFEFBDCF141FA7", "0x282ACCDEF0021426", "0x40A3A5A7A9ABADAF"],
    "XMM5": ["0x0A0C0E1012141618", "0xDCFFEFBDCF141FA7", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vpaddb ymm2, ymm0, ymm1
vpaddb xmm3, xmm0, xmm1

; Memory operand
vpaddb ymm4, ymm0, [rdx + 32]
vpaddb xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x0102030405060708
dq 0x1011121314151617
dq 0x0809AABBCCDDEEFF
dq 0x4041424344454647

dq 0x090A0B0C0D0E0F10
dq 0xCCEEDDAABBFF0990
dq 0x2021222324252627
dq 0x0062636465666768


================================================
FILE: unittests/ASM/VEX/vpaddd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xFFFFFFFFEEEEEEEE", "0x1011121314151617", "0x0809AABBCCDDEEFF", "0x4041424344454647"],
    "XMM1": ["0x090A0B0C0D0E0F10", "0xCCEEDDAABBFF0990", "0x2021222324252627", "0x0062636465666768"],
    "XMM2": ["0x090A0B0BFBFCFDFE", "0xDCFFEFBDD0141FA7", "0x282ACCDEF1031526", "0x40A3A5A7A9ABADAF"],
    "XMM3": ["0x090A0B0BFBFCFDFE", "0xDCFFEFBDD0141FA7", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x090A0B0BFBFCFDFE", "0xDCFFEFBDD0141FA7", "0x282ACCDEF1031526", "0x40A3A5A7A9ABADAF"],
    "XMM5": ["0x090A0B0BFBFCFDFE", "0xDCFFEFBDD0141FA7", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vpaddd ymm2, ymm0, ymm1
vpaddd xmm3, xmm0, xmm1

; Memory operand
vpaddd ymm4, ymm0, [rdx + 32]
vpaddd xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0xFFFFFFFFEEEEEEEE
dq 0x1011121314151617
dq 0x0809AABBCCDDEEFF
dq 0x4041424344454647

dq 0x090A0B0C0D0E0F10
dq 0xCCEEDDAABBFF0990
dq 0x2021222324252627
dq 0x0062636465666768


================================================
FILE: unittests/ASM/VEX/vpaddq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xEEEEEEEECCCCCCCC", "0x7FFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0x4041424344454647"],
    "XMM1": ["0xAAAAAAAAAAAAAAAA", "0xCCEEDDAABBFF0990", "0x2021222324252627", "0x5555555555555555"],
    "XMM2": ["0x9999999977777776", "0x4CEEDDAABBFF098F", "0x2021222324252626", "0x95969798999A9B9C"],
    "XMM3": ["0x9999999977777776", "0x4CEEDDAABBFF098F", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x9999999977777776", "0x4CEEDDAABBFF098F", "0x2021222324252626", "0x95969798999A9B9C"],
    "XMM5": ["0x9999999977777776", "0x4CEEDDAABBFF098F", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vpaddq ymm2, ymm0, ymm1
vpaddq xmm3, xmm0, xmm1

; Memory operand
vpaddq ymm4, ymm0, [rdx + 32]
vpaddq xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0xEEEEEEEECCCCCCCC
dq 0x7FFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0x4041424344454647

dq 0xAAAAAAAAAAAAAAAA
dq 0xCCEEDDAABBFF0990
dq 0x2021222324252627
dq 0x5555555555555555


================================================
FILE: unittests/ASM/VEX/vpaddsb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F"],
    "XMM4": ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F", "0x7F7F7F7F7F7F7F7F"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpaddsb xmm2, xmm0, xmm1
vpaddsb ymm3, ymm0, ymm1

vpaddsb xmm4, xmm0, [rdx + 32]
vpaddsb ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpaddsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"],
    "XMM4": ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpaddsw xmm2, xmm0, xmm1
vpaddsw ymm3, ymm0, ymm1

vpaddsw xmm4, xmm0, [rdx + 32]
vpaddsw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpaddusb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM4": ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0xA2A4A6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpaddusb xmm2, xmm0, xmm1
vpaddusb ymm3, ymm0, ymm1

vpaddusb xmm4, xmm0, [rdx + 32]
vpaddusb ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpaddusw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xF142434445464748", "0x5152535455565758", "0xF142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0"],
    "XMM4": ["0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0", "0xFFFFA6A8AAACAEB0", "0xC2C4C6C8CACCCED0"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpaddusw xmm2, xmm0, xmm1
vpaddusw ymm3, ymm0, ymm1

vpaddusw xmm4, xmm0, [rdx + 32]
vpaddusw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0xF142434445464748
dq 0x5152535455565758
dq 0xF142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpaddw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0102030405060708", "0x1011121314151617", "0x0809AABBCCDDEEFF", "0x4041424344454647"],
    "XMM1": ["0x090A0B0C0D0E0F10", "0xCCEEDDAABBFF0990", "0x2021222324252627", "0x0062636465666768"],
    "XMM2": ["0x0A0C0E1012141618", "0xDCFFEFBDD0141FA7", "0x282ACCDEF1021526", "0x40A3A5A7A9ABADAF"],
    "XMM3": ["0x0A0C0E1012141618", "0xDCFFEFBDD0141FA7", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0A0C0E1012141618", "0xDCFFEFBDD0141FA7", "0x282ACCDEF1021526", "0x40A3A5A7A9ABADAF"],
    "XMM5": ["0x0A0C0E1012141618", "0xDCFFEFBDD0141FA7", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Registers
vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vpaddw ymm2, ymm0, ymm1
vpaddw xmm3, xmm0, xmm1

; Memory operand
vpaddw ymm4, ymm0, [rdx + 32]
vpaddw xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x0102030405060708
dq 0x1011121314151617
dq 0x0809AABBCCDDEEFF
dq 0x4041424344454647

dq 0x090A0B0C0D0E0F10
dq 0xCCEEDDAABBFF0990
dq 0x2021222324252627
dq 0x0062636465666768


================================================
FILE: unittests/ASM/VEX/vpalignr.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM2":  ["0x7861626364656667", "0x4871727374757677", "0x9881828384858687", "0x1891929394959697"],
      "XMM3":  ["0x5354555657584142", "0x0000000000005152", "0x2324252627281112", "0x0000000000002122"],
      "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0x7861626364656667", "0x4871727374757677", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x5354555657584142", "0x0000000000005152", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x7861626364656667", "0x4871727374757677", "0x9881828384858687", "0x1891929394959697"],
      "XMM9":  ["0x5354555657584142", "0x0000000000005152", "0x2324252627281112", "0x0000000000002122"],
      "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x7861626364656667", "0x4871727374757677", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x5354555657584142", "0x0000000000005152", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
      "XMM15": ["0x6162636465666768", "0x7172737475767778", "0x8182838485868788", "0x9192939495969798"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpalignr ymm2, ymm0, ymm1, 1
vpalignr ymm3, ymm0, ymm1, 22
vpalignr ymm4, ymm0, ymm1, 32

vpalignr xmm5, xmm0, xmm1, 1
vpalignr xmm6, xmm0, xmm1, 22
vpalignr xmm7, xmm0, xmm1, 32

vpalignr ymm8,  ymm0, [rdx + 32], 1
vpalignr ymm9,  ymm0, [rdx + 32], 22
vpalignr ymm10, ymm0, [rdx + 32], 32

vpalignr xmm11, xmm0, [rdx + 32], 1
vpalignr xmm12, xmm0, [rdx + 32], 22
vpalignr xmm13, xmm0, [rdx + 32], 32

vpalignr xmm14, xmm0, [rdx + 32], 0
vpalignr ymm15, ymm0, [rdx + 32], 0

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x1112131415161718
dq 0x2122232425262728

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x8182838485868788
dq 0x9192939495969798


================================================
FILE: unittests/ASM/VEX/vpand.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM3": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4040404445464748", "0x4142434455545558", "0x6062626445464748", "0x4142434475767778"],
    "XMM5": ["0x4040404445464748", "0x4142434455545558", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vpand ymm2, ymm0, ymm1
vpand xmm3, xmm0, xmm1

; With memory operand
vpand ymm4, ymm0, [rbx]
vpand xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpandn.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM3": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8C8C8C8830303030", "0x2020202088898885", "0x8E8C8C8A10101010", "0x000000008A898887"],
    "XMM5": ["0x8C8C8C8830303030", "0x2020202088898885", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vpandn ymm2, ymm0, ymm1
vpandn xmm3, xmm0, xmm1

; With memory operand
vpandn ymm4, ymm0, [rbx]
vpandn xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpavgb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x2179B0697D5378C4", "0x3B8E6EAE8C165248", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x1ED68638699D35CA", "0x5E2E7560AB7B5262", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x165C42291F28194C", "0x0923643C32130145", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x2179B0697D5378C4", "0x3B8E6EAE8C165248", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x1ED68638699D35CA", "0x5E2E7560AB7B5262", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x165C42291F28194C", "0x0923643C32130145", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x2179B0697D5378C4", "0x3B8E6EAE8C165248", "0x2179B0697D5378C4", "0x3B8E6EAE8C165248"],
    "XMM11": ["0x1ED68638699D35CA", "0x5E2E7560AB7B5262", "0x1ED68638699D35CA", "0x5E2E7560AB7B5262"],
    "XMM12": ["0x165C42291F28194C", "0x0923643C32130145", "0x165C42291F28194C", "0x0923643C32130145"],
    "XMM13": ["0x2179B0697D5378C4", "0x3B8E6EAE8C165248", "0x2179B0697D5378C4", "0x3B8E6EAE8C165248"],
    "XMM14": ["0x1ED68638699D35CA", "0x5E2E7560AB7B5262", "0x1ED68638699D35CA", "0x5E2E7560AB7B5262"],
    "XMM15": ["0x165C42291F28194C", "0x0923643C32130145", "0x165C42291F28194C", "0x0923643C32130145"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm6, [rdx + 32 * 1]
vmovapd ymm7, [rdx + 32 * 2]
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx]

; 128-bit register only
vpavgb xmm0, xmm9, xmm6
vpavgb xmm1, xmm9, xmm7
vpavgb xmm2, xmm9, xmm8

; 128-bit memory operand
vpavgb xmm3, xmm9, [rdx + 32 * 1]
vpavgb xmm4, xmm9, [rdx + 32 * 2]
vpavgb xmm5, xmm9, [rdx + 32 * 3]

; 256-bit register only
vpavgb ymm10, ymm9, ymm6
vpavgb ymm11, ymm9, ymm7
vpavgb ymm12, ymm9, ymm8

; 256-bit memory operand
vpavgb ymm13, ymm9, [rdx + 32 * 1]
vpavgb ymm14, ymm9, [rdx + 32 * 2]
vpavgb ymm15, ymm9, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x2BB883523D4F3197
dq 0x1246C77764260189
dq 0x2BB883523D4F3197
dq 0x1246C77764260189

dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306
dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306

dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A
dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A

dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpavgb_aliasing.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM10": ["0x2179B0697D5378C4", "0x3B8E6EAE8C165248", "0x2179B0697D5378C4", "0x3B8E6EAE8C165248"],
    "XMM11": ["0x1ED68638699D35CA", "0x5E2E7560AB7B5262", "0x1ED68638699D35CA", "0x5E2E7560AB7B5262"]
  }
}
%endif

; Small test that ensures aliasing source/dest is handled properly.

lea rdx, [rel .data]

vmovapd ymm6, [rdx + 32]
vmovapd ymm7, [rdx]

; 256-bit register only
vmovapd ymm10, ymm7
vpavgb ymm10, ymm10, ymm6

vmovapd ymm11, [rdx + 64]
vpavgb ymm11, ymm7, ymm11

hlt

align 32
.data:
dq 0x2BB883523D4F3197
dq 0x1246C77764260189
dq 0x2BB883523D4F3197
dq 0x1246C77764260189

dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306
dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306

dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A
dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A


================================================
FILE: unittests/ASM/VEX/vpavgw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x20F9B0697CD37844", "0x3B8E6EAE8C165248", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x15DC41A91EA818CC", "0x092363BC321300C5", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x20F9B0697CD37844", "0x3B8E6EAE8C165248", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x15DC41A91EA818CC", "0x092363BC321300C5", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x20F9B0697CD37844", "0x3B8E6EAE8C165248", "0x20F9B0697CD37844", "0x3B8E6EAE8C165248"],
    "XMM11": ["0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2", "0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2"],
    "XMM12": ["0x15DC41A91EA818CC", "0x092363BC321300C5", "0x15DC41A91EA818CC", "0x092363BC321300C5"],
    "XMM13": ["0x20F9B0697CD37844", "0x3B8E6EAE8C165248", "0x20F9B0697CD37844", "0x3B8E6EAE8C165248"],
    "XMM14": ["0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2", "0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2"],
    "XMM15": ["0x15DC41A91EA818CC", "0x092363BC321300C5", "0x15DC41A91EA818CC", "0x092363BC321300C5"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm6, [rdx + 32 * 1]
vmovapd ymm7, [rdx + 32 * 2]
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx]

; 128-bit register only
vpavgw xmm0, xmm9, xmm6
vpavgw xmm1, xmm9, xmm7
vpavgw xmm2, xmm9, xmm8

; 128-bit memory operand
vpavgw xmm3, xmm9, [rdx + 32 * 1]
vpavgw xmm4, xmm9, [rdx + 32 * 2]
vpavgw xmm5, xmm9, [rdx + 32 * 3]

; 256-bit register only
vpavgw ymm10, ymm9, ymm6
vpavgw ymm11, ymm9, ymm7
vpavgw ymm12, ymm9, ymm8

; 256-bit memory operand
vpavgw ymm13, ymm9, [rdx + 32 * 1]
vpavgw ymm14, ymm9, [rdx + 32 * 2]
vpavgw ymm15, ymm9, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x2BB883523D4F3197
dq 0x1246C77764260189
dq 0x2BB883523D4F3197
dq 0x1246C77764260189

dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306
dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306

dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A
dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A

dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpavgw_aliasing.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM10": ["0x20F9B0697CD37844", "0x3B8E6EAE8C165248", "0x20F9B0697CD37844", "0x3B8E6EAE8C165248"],
    "XMM11": ["0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2", "0x1ED685B8691D35CA", "0x5DAE74E0AB7B51E2"]
  }
}
%endif

; Small test that ensures aliasing source/dest is handled properly.

lea rdx, [rel .data]

vmovapd ymm6, [rdx + 32]
vmovapd ymm7, [rdx]

; 256-bit register only
vmovapd ymm10, ymm7
vpavgw ymm10, ymm10, ymm6

vmovapd ymm11, [rdx + 64]
vpavgw ymm11, ymm7, ymm11

hlt

align 32
.data:
dq 0x2BB883523D4F3197
dq 0x1246C77764260189
dq 0x2BB883523D4F3197
dq 0x1246C77764260189

dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306
dq 0x163ADD80BC57BEF1
dq 0x64D615E5B405A306

dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A
dq 0x11F4881D94EB39FC
dq 0xA9162248F2D0A23A


================================================
FILE: unittests/ASM/VEX/vpblendd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x9999999988888888"],
    "XMM3": ["0x1111111122222222", "0x3333333344444444", "0x5555555566666666", "0x7777777788888888"],
    "XMM4": ["0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x1111111122222222", "0x3333333344444444", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x55555555FFFFFFFF", "0x7777777788888888"],
    "XMM7": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0xEEEEEEEE66666666", "0x9999999988888888"],
    "XMM8": ["0x11111111BBBBBBBB", "0x33333333DDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM9": ["0xAAAAAAAA22222222", "0xCCCCCCCC44444444", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Selecting all of one input vector
vpblendd ymm2, ymm0, ymm1, 0    ; All of ymm0
vpblendd ymm3, ymm0, ymm1, 0xFF ; All of ymm1

vpblendd xmm4, xmm0, xmm1, 0    ; All of xmm0
vpblendd xmm5, xmm0, xmm1, 0xFF ; All of xmm1

; Alternating source vectors
vpblendd ymm6, ymm0, ymm1, 0b10101010
vpblendd ymm7, ymm0, ymm1, 0b01010101

vpblendd xmm8, xmm0, xmm1, 0b10101010
vpblendd xmm9, xmm0, xmm1, 0b01010101

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888

dq 0x1111111122222222
dq 0x3333333344444444
dq 0x5555555566666666
dq 0x7777777788888888


================================================
FILE: unittests/ASM/VEX/vpblendvb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0x1222324252627282", "0xAABBCCDDEEFF9900", "0x8070605040302010", "0x1020304050607080"],
    "XMM4":  ["0x1222324252627282", "0xAABBCCDDEEFF9900", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xAABBCCDDEEFF1122", "0x3344556677889900", "0x1020304050607080", "0x9585756555453525"],
    "XMM6":  ["0xAABBCCDDEEFF1122", "0x3344556677889900", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0xAA22CC42EE621182", "0x33BB55DD77FF9900", "0x1070305050307010", "0x9520754055603580"],
    "XMM8":  ["0xAA22CC42EE621182", "0x33BB55DD77FF9900", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x12BB32DD52FF7222", "0xAA44CC66EE889900", "0x8020604040602080", "0x1085306550457025"],
    "XMM10": ["0x12BB32DD52FF7222", "0xAA44CC66EE889900", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x1222324252627282", "0xaabbccddeeff9900", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rel .mask_all]

; Select all ymm1
vpblendvb ymm3, ymm0, ymm1, ymm2
vpblendvb xmm4, xmm0, xmm1, xmm2

; Select all ymm0
vmovaps ymm2, [rel .mask_none]
vpblendvb ymm5, ymm0, ymm1, ymm2
vpblendvb xmm6, xmm0, xmm1, xmm2

; Interleaved selection from ymm1 and ymm0
vmovaps ymm2, [rel .mask_interleave1]
vpblendvb ymm7, ymm0, ymm1, ymm2
vpblendvb xmm8, xmm0, xmm1, xmm2

; Interleaved selection from ymm0 and ymm1
vmovaps ymm2, [rel .mask_interleave2]
vpblendvb ymm9,  ymm0, ymm1, ymm2
vpblendvb xmm10, xmm0, xmm1, xmm2

; Select all ymm0, with data in upper-bits
vmovaps ymm11, [rel .data_bad]
vmovaps ymm2, [rel .mask_all]
vpblendvb xmm11, xmm0, xmm1, xmm2

hlt

align 32
.data:
dq 0xAABBCCDDEEFF1122
dq 0x3344556677889900
dq 0x1020304050607080
dq 0x9585756555453525

dq 0x1222324252627282
dq 0xAABBCCDDEEFF9900
dq 0x8070605040302010
dq 0x1020304050607080

.mask_all:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.mask_none:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.mask_interleave1:
dq 0x0080008000800080
dq 0x0080008000800080
dq 0x0080008000800080
dq 0x0080008000800080

.mask_interleave2:
dq 0x8000800080008000
dq 0x8000800080008000
dq 0x8000800080008000
dq 0x8000800080008000

.data_bad:
dq 0x3132333435363738
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768


================================================
FILE: unittests/ASM/VEX/vpblendw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xAAAABBBBCCCCDDDD", "0xEEEEFFFF99998888", "0x7777666655554444", "0x222211110000AAAA"],
    "XMM3": ["0x1111222233334444", "0x5555666677778888", "0x9999AAAABBBBCCCC", "0xDDDDEEEEFFFF1111"],
    "XMM4": ["0xAAAABBBBCCCCDDDD", "0xEEEEFFFF99998888", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x1111222233334444", "0x5555666677778888", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x1111BBBB3333DDDD", "0x5555FFFF77778888", "0x99996666BBBB4444", "0xDDDD1111FFFFAAAA"],
    "XMM7": ["0xAAAA2222CCCC4444", "0xEEEE666699998888", "0x7777AAAA5555CCCC", "0x2222EEEE00001111"],
    "XMM8": ["0x1111BBBB3333DDDD", "0x5555FFFF77778888", "0x0000000000000000", "0x0000000000000000"],
    "XMM9": ["0xAAAA2222CCCC4444", "0xEEEE666699998888", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Selecting all of one input vector
vpblendw ymm2, ymm0, ymm1, 0    ; All of ymm0
vpblendw ymm3, ymm0, ymm1, 0xFF ; All of ymm1

vpblendw xmm4, xmm0, xmm1, 0    ; All of xmm0
vpblendw xmm5, xmm0, xmm1, 0xFF ; All of xmm1

; Alternating source vectors
vpblendw ymm6, ymm0, ymm1, 0b10101010
vpblendw ymm7, ymm0, ymm1, 0b01010101

vpblendw xmm8, xmm0, xmm1, 0b10101010
vpblendw xmm9, xmm0, xmm1, 0b01010101

hlt

align 32
.data:
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF99998888
dq 0x7777666655554444
dq 0x222211110000AAAA

dq 0x1111222233334444
dq 0x5555666677778888
dq 0x9999AAAABBBBCCCC
dq 0xDDDDEEEEFFFF1111


================================================
FILE: unittests/ASM/VEX/vpbroadcastb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0"],
      "XMM3": ["0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vpbroadcastb ymm2, xmm0
vpbroadcastb xmm3, xmm1

; Memory broadcasting
vpbroadcastb ymm4, [rdx + 16]
vpbroadcastb xmm5, [rdx + 48]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpbroadcastd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0", "0xA12BFCE0A12BFCE0"],
      "XMM3": ["0xAAED56E0AAED56E0", "0xAAED56E0AAED56E0", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vpbroadcastd ymm2, xmm0
vpbroadcastd xmm3, xmm1

; Memory broadcasting
vpbroadcastd ymm4, [rdx + 16]
vpbroadcastd xmm5, [rdx + 24]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpbroadcastq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0", "0xA76C4F06A12BFCE0"],
      "XMM3": ["0x6868C3F3AAED56E0", "0x6868C3F3AAED56E0", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vpbroadcastq ymm2, xmm0
vpbroadcastq xmm3, xmm1

; Memory broadcasting
vpbroadcastq ymm4, [rdx + 16]
vpbroadcastq xmm5, [rdx + 24]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpbroadcastw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM1": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM2": ["0xFCE0FCE0FCE0FCE0", "0xFCE0FCE0FCE0FCE0", "0xFCE0FCE0FCE0FCE0", "0xFCE0FCE0FCE0FCE0"],
      "XMM3": ["0x56E056E056E056E0", "0x56E056E056E056E0", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]

; Register broadcasting
vpbroadcastw ymm2, xmm0
vpbroadcastw xmm3, xmm1

; Memory broadcasting
vpbroadcastw ymm4, [rdx + 16]
vpbroadcastw xmm5, [rdx + 48]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpclmulqdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM3": ["0x1E2017C5BEE29400", "0x38358E40CC367C7A", "0x0000000000000000", "0x0000000000000000"],
      "XMM4": ["0xE208147952DE57A0", "0x317D360F86C80DC9", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0xBBA54C87DA872B40", "0x6495428B7641EBE6", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]

; Fill result vectors with junk (ensure proper lane clearing is performed)
vmovaps ymm3, [rdx + 32 * 0]
vmovaps ymm4, [rdx + 32 * 0]
vmovaps ymm5, [rdx + 32 * 0]
vmovaps ymm6, [rdx + 32 * 0]

; With imm = 0b00000000
vpclmulqdq xmm3, xmm1, xmm2, 0

; With imm = 0b00000001
vpclmulqdq xmm4, xmm1, xmm2, 1

; With imm = 0b00010000
vpclmulqdq xmm5, xmm1, xmm2, 16

; With imm = 0b00010001
vpclmulqdq xmm6, xmm1, xmm2, 17

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE
dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpclmulqdq_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM2": ["0x6868C3F3AAED56E0", "0xF0FCE9E294E6E6DE", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC"],
      "XMM3": ["0x1E2017C5BEE29400", "0x38358E40CC367C7A", "0x4b4b4b4b4b4b4b4b", "0x4b4b4b4b4b4b4b4b"],
      "XMM4": ["0xE208147952DE57A0", "0x317D360F86C80DC9", "0x4646464646464646", "0x4646464646464646"],
      "XMM5": ["0xBBA54C87DA872B40", "0x6495428B7641EBE6", "0x4444444444444444", "0x4444444444444444"],
      "XMM6": ["0x170B5A1B5CDD42EA", "0x719F094BB2358CA1", "0x4848484848484848", "0x4848484848484848"],
      "XMM7": ["0x1e2017c5bee29400", "0x38358e40cc367c7a", "0", "0"]
  }
}
%endif

lea rdx, [rel .data]

; Load inputs
vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]

; Fill result vectors with junk (ensure proper lane clearing is performed)
vmovaps ymm3, [rdx + 32 * 0]
vmovaps ymm4, [rdx + 32 * 0]
vmovaps ymm5, [rdx + 32 * 0]
vmovaps ymm6, [rdx + 32 * 0]
vmovaps ymm7, [rdx + 32 * 0]

; With imm = 0b00000000
vpclmulqdq ymm3, ymm1, ymm2, 0

; With imm = 0b00000001
vpclmulqdq ymm4, ymm1, ymm2, 1

; With imm = 0b00010000
vpclmulqdq ymm5, ymm1, ymm2, 16

; With imm = 0b00010001
vpclmulqdq ymm6, ymm1, ymm2, 17

; Test zero-extension
; Also test a wacky immediate.
vpclmulqdq xmm7, xmm1, xmm2, 11101110b

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE
dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpcmpeqb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x4142434445464748", "0x7172737475767778", "0x4142434445464748"],
    "XMM1": ["0x6162636465666778", "0x5152535455565748", "0x6162636465666778", "0x5152535455565748"],
    "XMM2": ["0x00000000000000FF", "0x00000000000000FF", "0x00000000000000FF", "0x00000000000000FF"],
    "XMM3": ["0x00000000000000FF", "0x00000000000000FF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x00000000000000FF", "0x00000000000000FF", "0x00000000000000FF", "0x00000000000000FF"],
    "XMM5": ["0x00000000000000FF", "0x00000000000000FF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpeqb ymm2, ymm0, ymm1
vpcmpeqb xmm3, xmm0, xmm1

; Memory operand
vpcmpeqb ymm4, ymm0, [rdx + 32 * 1]
vpcmpeqb xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748

dq 0x6162636465666778
dq 0x5152535455565748
dq 0x6162636465666778
dq 0x5152535455565748


================================================
FILE: unittests/ASM/VEX/vpcmpeqd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x71727374FFFFFFFF", "0x41424344FFFFFFFF", "0x71727374FFFFFFFF", "0x41424344FFFFFFFF"],
    "XMM1": ["0x61626364FFFFFFFF", "0x51525354FFFFFFFF", "0x61626364FFFFFFFF", "0x51525354FFFFFFFF"],
    "XMM2": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM3": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x00000000FFFFFFFF"],
    "XMM5": ["0x00000000FFFFFFFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpeqd ymm2, ymm0, ymm1
vpcmpeqd xmm3, xmm0, xmm1

; Memory operand
vpcmpeqd ymm4, ymm0, [rdx + 32 * 1]
vpcmpeqd xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x71727374FFFFFFFF
dq 0x41424344FFFFFFFF
dq 0x71727374FFFFFFFF
dq 0x41424344FFFFFFFF

dq 0x61626364FFFFFFFF
dq 0x51525354FFFFFFFF
dq 0x61626364FFFFFFFF
dq 0x51525354FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpcmpeqq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0x41424344FFFFFFFF", "0x71727374FFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0x51525354FFFFFFFF", "0x61626364FFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpeqq ymm2, ymm0, ymm1
vpcmpeqq xmm3, xmm0, xmm1

; Memory operand
vpcmpeqq ymm4, ymm0, [rdx + 32 * 1]
vpcmpeqq xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0xFFFFFFFFFFFFFFFF
dq 0x41424344FFFFFFFF
dq 0x71727374FFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0xFFFFFFFFFFFFFFFF
dq 0x51525354FFFFFFFF
dq 0x61626364FFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpcmpeqw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x4142434445464748", "0x7172737475767778", "0x4142434445464748"],
    "XMM1": ["0x6162636465667778", "0x5152535455564748", "0x6162636465667778", "0x5152535455564748"],
    "XMM2": ["0x000000000000FFFF", "0x000000000000FFFF", "0x000000000000FFFF", "0x000000000000FFFF"],
    "XMM3": ["0x000000000000FFFF", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x000000000000FFFF", "0x000000000000FFFF", "0x000000000000FFFF", "0x000000000000FFFF"],
    "XMM5": ["0x000000000000FFFF", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpeqw ymm2, ymm0, ymm1
vpcmpeqw xmm3, xmm0, xmm1

; Memory operand
vpcmpeqw ymm4, ymm0, [rdx + 32 * 1]
vpcmpeqw xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748

dq 0x6162636465667778
dq 0x5152535455564748
dq 0x6162636465667778
dq 0x5152535455564748


================================================
FILE: unittests/ASM/VEX/vpcmpestri_equal_any.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX": ["15"],
      "RDX": ["16"],
      "XMM0": ["0x04070F000F000E05", "0x0000000000040404", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x0121313131311111", "0x0000000000010101", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpestri
;
%macro CompareAndStore 2
  vpcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte character check (lsb, positive polarity)
mov rax, 15 ; Exclude 'l'
mov rdx, 16
CompareAndStore 0, 0b00000000

; Unsigned byte character check (msb, positive polarity)
CompareAndStore 1, 0b01000000

; Unsigned byte character check (lsb, negative polarity)
CompareAndStore 2, 0b00010000

; Unsigned byte character check (msb, negative polarity)
CompareAndStore 3, 0b01010000

; Unsigned byte character check (lsb, negative masked)
CompareAndStore 4, 0b00110000

; Unsigned byte character check (msb, negative masked)
CompareAndStore 5, 0b01110000

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word character check (msb, positive polarity)
CompareAndStore 6, 0b01000001

; Unsigned word character check (lsb, negative polarity)
CompareAndStore 7, 0b00010001

; Unsigned word character check (msb, negative polarity)
CompareAndStore 8, 0b01010001

; Unsigned word character check (lsb, negative masked)
CompareAndStore 9, 0b00110001

; Unsigned word character check (msb, negative masked)
CompareAndStore 10, 0b01110001

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A49 ; "IJKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestri_equal_each.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX": ["4"],
      "RDX": ["3"],
      "XMM0": ["0x0F000B060B060F00", "0x040407000F060706", "0x0704030307000404", "0x0000000000000000"],
      "XMM1": ["0x3939010101012121", "0x0101212119191919", "0x1919191939390101", "0x0000000000000000"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpestri
;
%macro CompareAndStore 2
  vpcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Full length unsigned byte string check (lsb, positive polarity)
mov rax, 16
mov rdx, 16
CompareAndStore 0, 0b00001000

; Full length unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001000

; Full length unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011000

; Full length unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011000

; Full length unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111000

; Full length unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111000

; Non-full length unsigned byte string check (lsb, positive polarity)
mov rax, 8
mov rdx, 7
CompareAndStore 6, 0b00001000

; Non-full length unsigned byte string check (msb, positive polarity)
CompareAndStore 7, 0b01001000

; Non-full length unsigned byte string check (lsb, negative polarity)
CompareAndStore 8, 0b00011000

; Non-full length unsigned byte string check (msb, negative polarity)
CompareAndStore 9, 0b01011000

; Non-full length unsigned byte string check (lsb, negative masked)
CompareAndStore 10, 0b00111000

; Non-full length unsigned byte string check (msb, negative masked)
CompareAndStore 11, 0b01111000

; --- 16-bit unsigned word tests ---

vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Full length unsigned word string check (lsb, positive polarity)
mov rax, 8
mov rdx, 8
CompareAndStore 12, 0b00001001

; Full length unsigned word string check (msb, positive polarity)
CompareAndStore 13, 0b01001001

; Full length unsigned word string check (lsb, negative polarity)
CompareAndStore 14, 0b00011001

; Full length unsigned word string check (msb, negative polarity)
CompareAndStore 15, 0b01011001

; Full length unsigned word string check (lsb, negative masked)
CompareAndStore 16, 0b00111001

; Full length unsigned word string check (msb, negative masked)
CompareAndStore 17, 0b01111001

; Non-full length unsigned word string check (lsb, positive polarity)
mov rax, 4
mov rdx, 3
CompareAndStore 18, 0b00001001

; Non-full length unsigned word string check (msb, positive polarity)
CompareAndStore 19, 0b01001001

; Non-full length unsigned word string check (lsb, negative polarity)
CompareAndStore 20, 0b00011001

; Non-full length unsigned word string check (msb, negative polarity)
CompareAndStore 21, 0b01011001

; Non-full length unsigned word string check (lsb, negative masked)
CompareAndStore 22, 0b00111001

; Non-full length unsigned word string check (msb, negative masked)
CompareAndStore 23, 0b01111001

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x21212121656C706F ; "ople!!!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestri_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX": ["2"],
      "RDX": ["16"],
      "XMM0": ["0x05050F000F000902", "0x0000000007000700", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x1111313131311111", "0x0000000031313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x306F8A9E30443057", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpestri
;
%macro CompareAndStore 2
  vpcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
mov rax, 2
mov rdx, 16
CompareAndStore 0, 0b00001100

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001100

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011100

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011100

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111100

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111100

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001101

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011101

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011101

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111101

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111101

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6FFF6C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E30443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestri_ranges.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX": ["4"],
      "RDX": ["16"],
      "XMM0": ["0x00060F000F000D01", "0x0000000000070007", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x3111313131311111", "0x0000000000313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x005A0041007A0061", "0x55AACCBBFF223344", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x006500200027003F", "0x00210065004F0065", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpestri
;
%macro CompareAndStore 2
  vpcmpestri xmm2, xmm3, %2
  mov [rel .indices + %1], cl

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Range unsigned byte check (lsb, positive polarity)
mov rax, 4
mov rdx, 16
CompareAndStore 0, 0b00000100

; Range unsigned byte check (msb, positive polarity)
CompareAndStore 1, 0b01000100

; Range unsigned byte check (lsb, negative polarity)
CompareAndStore 2, 0b00010100

; Range unsigned byte check (msb, negative polarity)
CompareAndStore 3, 0b01010100

; Range unsigned byte check (lsb, negative masked)
CompareAndStore 4, 0b00110100

; Range unsigned byte check (msb, negative masked)
CompareAndStore 5, 0b01110100

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Range unsigned word check (msb, positive polarity)
CompareAndStore 6, 0b01000101

; Range unsigned word check (lsb, negative polarity)
CompareAndStore 7, 0b00010101

; Range unsigned word check (msb, negative polarity)
CompareAndStore 8, 0b01010101

; Range unsigned word check (lsb, negative masked)
CompareAndStore 9, 0b00110101

; Range unsigned word check (msb, negative masked)
CompareAndStore 10, 0b01110101

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877665A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF223344
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestrm_equal_any.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX":   ["15"],
      "RDX":   ["16"],
      "XMM1":  ["0x0121313131311111", "0x0000000000010101", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x00000000000060A0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0xFF00FF0000000000", "0x00FFFF0000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x0000000000009F5F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0x00FF00FFFFFFFFFF", "0xFF0000FFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x0000000000009F5F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0x00FF00FFFFFFFFFF", "0xFF0000FFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpestrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte character check (bits, positive polarity)
mov rax, 15 ; Exclude 'l'
mov rdx, 16
CompareAndStore 0, 0b00000000, 4

; Unsigned byte character check (mask, positive polarity)
CompareAndStore 1, 0b01000000, 5

; Unsigned byte character check (bits, negative polarity)
CompareAndStore 2, 0b00010000, 6

; Unsigned byte character check (mask, negative polarity)
CompareAndStore 3, 0b01010000, 7

; Unsigned byte character check (bits, negative masked)
CompareAndStore 4, 0b00110000, 8

; Unsigned byte character check (mask, negative masked)
CompareAndStore 5, 0b01110000, 9

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word character check (mask, positive polarity)
CompareAndStore 6, 0b01000001, 10

; Unsigned word character check (bits, negative polarity)
CompareAndStore 7, 0b00010001, 11

; Unsigned word character check (mask, negative polarity)
CompareAndStore 8, 0b01010001, 12

; Unsigned word character check (bits, negative masked)
CompareAndStore 9, 0b00110001, 13

; Unsigned word character check (mask, negative masked)
CompareAndStore 10, 0b01110001, 14

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A49 ; "IJKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestrm_equal_each.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX":   ["8"],
      "RDX":   ["8"],
      "XMM1":  ["0x2121010101012121", "0x0000000001010101", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x000000000000F43F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0x0000FFFFFFFFFFFF", "0xFFFFFFFF00FF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x0000000000000BC0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0xFFFF000000000000", "0x00000000FF00FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x0000000000000BC0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0xFFFF000000000000", "0x00000000FF00FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0x00000000000000EF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM15": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpestrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Full length unsigned byte string check (bits, positive polarity)
mov rax, 16
mov rdx, 16
CompareAndStore 0, 0b00001000, 4

; Full length unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001000, 5

; Full length unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011000, 6

; Full length unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011000, 7

; Full length unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111000, 8

; Full length unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111000, 9

; --- 16-bit unsigned word tests ---

vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Full length unsigned word string check (bits, positive polarity)
mov rax, 8
mov rdx, 8
CompareAndStore 6, 0b00001001, 10

; Full length unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001001, 11

; Full length unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011001, 12

; Full length unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011001, 13

; Full length unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111001, 14

; Full length unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111001, 15

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x21212121656C706F ; "ople!!!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestrm_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX":   ["2"],
      "RDX":   ["16"],
      "XMM1":  ["0x1111313131311111", "0x0000000031313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F8A9E30443057", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x0000000000000204", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0x0000000000FF0000", "0x000000000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x000000000000FDFB", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000FDFB", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0x0000000000000020", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000000", "0x00000000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x00000000000000DF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x00000000000000DF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM15": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to vpcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpestrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
mov rax, 2
mov rdx, 16
CompareAndStore 0, 0b00001100, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001100, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011100, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011100, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111100, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111100, 9

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001101, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011101, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011101, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111101, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111101, 15

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6FFF6C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E30443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpestrm_ranges.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "RAX":   ["4"],
      "RDX":   ["16"],
      "XMM1":  ["0x3111313131311111", "0x0000000000313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x005A0041007A0061", "0x55AACCBBFF223344", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x006500200027003F", "0x00210065004F0065", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x0000000000003DEA", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0xFFFFFF00FF00FF00", "0x0000FFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x000000000000C215", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000C215", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0xFFFF000000000000", "0x0000FFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000087", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000087", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a specified vector in the third argument
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to pcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpestrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0

  mov r15, rax
  ArrangeAndStoreFLAGS %1
  mov rax, r15
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Range unsigned byte check (bits, positive polarity)
mov rax, 4
mov rdx, 16
CompareAndStore 0, 0b00000100, 4

; Range unsigned byte check (mask, positive polarity)
CompareAndStore 1, 0b01000100, 5

; Range unsigned byte check (bits, negative polarity)
CompareAndStore 2, 0b00010100, 6

; Range unsigned byte check (mask, negative polarity)
CompareAndStore 3, 0b01010100, 7

; Range unsigned byte check (bits, negative masked)
CompareAndStore 4, 0b00110100, 8

; Range unsigned byte check (mask, negative masked)
CompareAndStore 5, 0b01110100, 9

; --- 16-bit unsigned word tests ---
; Intentionally don't reset RDX to 8 here to test upper bounds clamping.
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Range unsigned word check (mask, positive polarity)
CompareAndStore 6, 0b01000101, 10

; Range unsigned word check (bits, negative polarity)
CompareAndStore 7, 0b00010101, 11

; Range unsigned word check (mask, negative polarity)
CompareAndStore 8, 0b01010101, 12

; Range unsigned word check (bits, negative masked)
CompareAndStore 9, 0b00110101, 13

; Range unsigned word check (mask, negative masked)
CompareAndStore 10, 0b01110101, 14

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877665A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF223344
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpgtb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x4142434445464748", "0x7172737475767778", "0x4142434445464748"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpgtb ymm2, ymm0, ymm1
vpcmpgtb xmm3, xmm0, xmm1

; Memory operand
vpcmpgtb ymm4, ymm0, [rdx + 32 * 1]
vpcmpgtb xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpcmpgtd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x4142434445464748", "0x7172737475767778", "0x4142434445464748"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpgtd ymm2, ymm0, ymm1
vpcmpgtd xmm3, xmm0, xmm1

; Memory operand
vpcmpgtd ymm4, ymm0, [rdx + 32 * 1]
vpcmpgtd xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpcmpgtq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2":  ["0x0000000000000001", "0x0000000000000001", "0x0000000000000001", "0x0000000000000001"],
    "XMM3":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000001", "0xFFFFFFFFFFFFFFFF", "0x0000000000000001"],
    "XMM4":  ["0x0000000000000001", "0xFFFFFFFFFFFFFFFF", "0x0000000000000001", "0xFFFFFFFFFFFFFFFF"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM11": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM13": ["0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

vmovaps ymm0, [rel .data0]
vmovaps ymm1, [rel .data1]
vmovaps ymm2, [rel .data2]
vmovaps ymm3, [rel .data3]
vmovaps ymm4, [rel .data4]

; Register only
vpcmpgtq ymm5, ymm0, [rel .data4]
vpcmpgtq ymm6, ymm1, [rel .data3]
vpcmpgtq ymm7, ymm2, [rel .data2]
vpcmpgtq xmm8, xmm3, [rel .data1]
vpcmpgtq xmm9, xmm4, [rel .data0]

; Memory operand
vpcmpgtq ymm10, ymm0, [rel .data1]
vpcmpgtq ymm11, ymm1, [rel .data2]
vpcmpgtq ymm12, ymm2, [rel .data3]
vpcmpgtq xmm13, xmm3, [rel .data4]
vpcmpgtq xmm14, xmm4, [rel .data0]

hlt

align 4096
.data0:
dq 0
dq 0
dq 0
dq 0

.data1:
dq -1
dq -1
dq -1
dq -1

.data2:
dq 1
dq 1
dq 1
dq 1

.data3:
dq -1
dq 1
dq -1
dq 1

.data4:
dq 1
dq -1
dq 1
dq -1


================================================
FILE: unittests/ASM/VEX/vpcmpgtw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x7172737475767778", "0x4142434445464748", "0x7172737475767778", "0x4142434445464748"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM3": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

; Register only
vpcmpgtw ymm2, ymm0, ymm1
vpcmpgtw xmm3, xmm0, xmm1

; Memory operand
vpcmpgtw ymm4, ymm0, [rdx + 32 * 1]
vpcmpgtw xmm5, xmm0, [rdx + 32 * 1]

hlt

align 4096
.data:
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpcmpistri_equal_any.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x04060F000F000D07", "0x0000000000040407", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x1939313131311111", "0x0000000000191919", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpistri
;
%macro CompareAndStore 2
  vpcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte character check (lsb, positive polarity)
CompareAndStore 0, 0b00000000

; Unsigned byte character check (msb, positive polarity)
CompareAndStore 1, 0b01000000

; Unsigned byte character check (lsb, negative polarity)
CompareAndStore 2, 0b00010000

; Unsigned byte character check (msb, negative polarity)
CompareAndStore 3, 0b01010000

; Unsigned byte character check (lsb, negative masked)
CompareAndStore 4, 0b00110000

; Unsigned byte character check (msb, negative masked)
CompareAndStore 5, 0b01110000

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word character check (msb, positive polarity)
CompareAndStore 6, 0b01000001

; Unsigned word character check (lsb, negative polarity)
CompareAndStore 7, 0b00010001

; Unsigned word character check (msb, negative polarity)
CompareAndStore 8, 0b01010001

; Unsigned word character check (lsb, negative masked)
CompareAndStore 9, 0b00110001

; Unsigned word character check (msb, negative masked)
CompareAndStore 10, 0b01110001

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A00 ; "\0JKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistri_equal_each.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x07000F060E060F00", "0x0000000007040404", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x3939191919193939", "0x0000000019191919", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpistri
;
%macro CompareAndStore 2
  vpcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
CompareAndStore 0, 0b00001000

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001000

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011000

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011000

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111000

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111000

; --- 16-bit unsigned word tests ---

vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word string check (lsb, positive polarity)
CompareAndStore 6, 0b00001001

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001001

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011001

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011001

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111001

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111001

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x00002121656C706F ; "ople!!\0\0"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x00212121216C6C61 ; "all!!!!\0"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistri_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x05050F000F000902", "0x0000000006000700", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x1919313131311111", "0x0000000039393939", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x306F000030443057", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpistri
;
%macro CompareAndStore 2
  vpcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (lsb, positive polarity)
CompareAndStore 0, 0b00001100

; Unsigned byte string check (msb, positive polarity)
CompareAndStore 1, 0b01001100

; Unsigned byte string check (lsb, negative polarity)
CompareAndStore 2, 0b00011100

; Unsigned byte string check (msb, negative polarity)
CompareAndStore 3, 0b01011100

; Unsigned byte string check (lsb, negative masked)
CompareAndStore 4, 0b00111100

; Unsigned byte string check (msb, negative masked)
CompareAndStore 5, 0b01111100

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101

; Unsigned word string check (msb, positive polarity)
CompareAndStore 7, 0b01001101

; Unsigned word string check (lsb, negative polarity)
CompareAndStore 8, 0b00011101

; Unsigned word string check (msb, negative polarity)
CompareAndStore 9, 0b01011101

; Unsigned word string check (lsb, negative masked)
CompareAndStore 10, 0b00111101

; Unsigned word string check (msb, negative masked)
CompareAndStore 11, 0b01111101

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6F006C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F000030443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistri_ranges.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x00060F000F000D01", "0x0000000000070007", "0x0000000000000000", "0x0000000000000000"],
      "XMM1": ["0x3111313131311111", "0x0000000000313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x005A0041007A0061", "0x55AACCBBFF220000", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3": ["0x006500200027003F", "0x00210065004F0065", "0x8888888888888888", "0x9999999999999999"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from RCX to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the byte offset to store the RCX result to.
; The second parameter is the control values to pass to vpcmpistri
;
%macro CompareAndStore 2
  vpcmpistri xmm2, xmm3, %2
  mov [rel .indices + %1], cl
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Range unsigned byte check (lsb, positive polarity)
CompareAndStore 0, 0b00000100

; Range unsigned byte check (msb, positive polarity)
CompareAndStore 1, 0b01000100

; Range unsigned byte check (lsb, negative polarity)
CompareAndStore 2, 0b00010100

; Range unsigned byte check (msb, negative polarity)
CompareAndStore 3, 0b01010100

; Range unsigned byte check (lsb, negative masked)
CompareAndStore 4, 0b00110100

; Range unsigned byte check (msb, negative masked)
CompareAndStore 5, 0b01110100

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Range unsigned word check (msb, positive polarity)
CompareAndStore 6, 0b01000101

; Range unsigned word check (lsb, negative polarity)
CompareAndStore 7, 0b00010101

; Range unsigned word check (msb, negative polarity)
CompareAndStore 8, 0b01010101

; Range unsigned word check (lsb, negative masked)
CompareAndStore 9, 0b00110101

; Range unsigned word check (msb, negative masked)
CompareAndStore 10, 0b01110101

; Load all our stored indices and flags for result comparing
vmovaps ymm0, [rel .indices]
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877005A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF220000
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.indices:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistrm_equal_any.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1":  ["0x1939313131311111", "0x0000000000191919", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x0000000000002080", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0xFF00000000000000", "0x0000FF0000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x000000000000DF7F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0x00FFFFFFFFFFFFFF", "0xFFFF00FFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000DF7F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0x00FFFFFFFFFFFFFF", "0xFFFF00FFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0xFFFFFFFFFFFFFFFF", "0x0000FFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000090", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000000000000000", "0xFFFF00000000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to vpcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpistrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte character check (bits, positive polarity)
CompareAndStore 0, 0b00000000, 4

; Unsigned byte character check (mask, positive polarity)
CompareAndStore 1, 0b01000000, 5

; Unsigned byte character check (bits, negative polarity)
CompareAndStore 2, 0b00010000, 6

; Unsigned byte character check (mask, negative polarity)
CompareAndStore 3, 0b01010000, 7

; Unsigned byte character check (bits, negative masked)
CompareAndStore 4, 0b00110000, 8

; Unsigned byte character check (mask, negative masked)
CompareAndStore 5, 0b01110000, 9

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word character check (mask, positive polarity)
CompareAndStore 6, 0b01000001, 10

; Unsigned word character check (bits, negative polarity)
CompareAndStore 7, 0b00010001, 11

; Unsigned word character check (mask, negative polarity)
CompareAndStore 8, 0b01010001, 12

; Unsigned word character check (bits, negative masked)
CompareAndStore 9, 0b00110001, 13

; Unsigned word character check (mask, negative masked)
CompareAndStore 10, 0b01110001, 14

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6463626144434241 ; "ABCDabcd"
dq 0x6C6B6A694C4B4A00 ; "\0JKLijkl"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x4120492065726548 ; "Here I A"
dq 0x6C4C612759202C6D ; "m, Y'aLl"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistrm_equal_each.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1":  ["0x3939191919193939", "0x0000000019191919", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F8A9E672C65E5", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x000000000000B43F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0x0000FFFFFFFFFFFF", "0xFF00FFFF00FF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x0000000000004BC0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0xFFFF000000000000", "0x00FF0000FF00FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000CBC0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0xFFFF000000000000", "0xFFFF0000FF00FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0x00000000000000EF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000000000000010", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000000", "0x000000000000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000000000000090", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM15": ["0x0000000000000000", "0xFFFF00000000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to vpcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpistrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
CompareAndStore 0, 0b00001000, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001000, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011000, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011000, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111000, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111000, 9

; --- 16-bit unsigned word tests ---

vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Unsigned word string check (bits, positive polarity)
CompareAndStore 6, 0b00001001, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001001, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011001, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011001, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111001, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111001, 15

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6C6C6548 ; "Hello Pe"
dq 0x00002121656C706F ; "ople!!\0\0"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x00212121216C6C61 ; "all!!!!\0"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x000030443057697D ; "楽しい\0" (Japanese is fun)
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistrm_equal_ordered.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1":  ["0x1919313131311111", "0x0000000039393939", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x306F000030443057", "0x000030443057697D", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x306F8A9E672C65E5", "0x00003044305796E3", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x0000000000000204", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0x0000000000FF0000", "0x000000000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x000000000000FDFB", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000FDFB", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0xFFFFFFFFFF00FFFF", "0xFFFFFFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0x0000000000000020", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000000", "0x00000000FFFF0000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x00000000000000DF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFF0000FFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x000000000000005F", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM15": ["0xFFFFFFFFFFFFFFFF", "0x0000FFFF0000FFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to vpcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpistrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Unsigned byte string check (bits, positive polarity)
CompareAndStore 0, 0b00001100, 4

; Unsigned byte string check (mask, positive polarity)
CompareAndStore 1, 0b01001100, 5

; Unsigned byte string check (bits, negative polarity)
CompareAndStore 2, 0b00011100, 6

; Unsigned byte string check (mask, negative polarity)
CompareAndStore 3, 0b01011100, 7

; Unsigned byte string check (bits, negative masked)
CompareAndStore 4, 0b00111100, 8

; Unsigned byte string check (mask, negative masked)
CompareAndStore 5, 0b01111100, 9

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

CompareAndStore 6, 0b00001101, 10

; Unsigned word string check (mask, positive polarity)
CompareAndStore 7, 0b01001101, 11

; Unsigned word string check (bits, negative polarity)
CompareAndStore 8, 0b00011101, 12

; Unsigned word string check (mask, negative polarity)
CompareAndStore 9, 0b01011101, 13

; Unsigned word string check (bits, negative masked)
CompareAndStore 10, 0b00111101, 14

; Unsigned word string check (mask, negative masked)
CompareAndStore 11, 0b01111101, 15

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x6550206F6F006C6C ; "ll" with junk following it
dq 0x21212121656C706F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x2759206F6C6C6548 ; "Hello Y'"
dq 0x21212121216C6C61 ; "all!!!!!"
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC

.data16:
dq 0x306F000030443057 ; "しい" followed by junk
dq 0x000030443057697D
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x306F8A9E672C65E5 ; "日本語は"
dq 0x00003044305796E3 ; "難しい\0" (Japanese is hard)
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpcmpistrm_ranges.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM1":  ["0x3111313131311111", "0x0000000000313131", "0x0000000000000000", "0x0000000000000000"],
      "XMM2":  ["0x005A0041007A0061", "0x55AACCBBFF220000", "0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB"],
      "XMM3":  ["0x006500200027003F", "0x00210065004F0065", "0x8888888888888888", "0x9999999999999999"],
      "XMM4":  ["0x0000000000003DEA", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5":  ["0xFFFFFF00FF00FF00", "0x0000FFFFFFFF00FF", "0x0000000000000000", "0x0000000000000000"],
      "XMM6":  ["0x000000000000C215", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM8":  ["0x000000000000C215", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM9":  ["0x000000FF00FF00FF", "0xFFFF00000000FF00", "0x0000000000000000", "0x0000000000000000"],
      "XMM10": ["0xFFFF000000000000", "0x0000FFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
      "XMM11": ["0x0000000000000087", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM12": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM13": ["0x0000000000000087", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM14": ["0x0000FFFFFFFFFFFF", "0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

; Adjusts the result from LAHF and SETO so that we have a set of flags organized
; like [OF, SF, ZF, AF, PF, CF] for storing into the .flags region
; of memory.
;
; The first parameter is the byte offset to store the flag result
; at in the .flags region of memory.
;
%macro ArrangeAndStoreFLAGS 1
  lahf
  seto bl
  movzx bx, bl

  shr ax, 8
  shl bx, 5

  mov di, ax
  mov si, ax

  ; Mask and shift
  and di, 0b0000_0000_0000_0100 ; PF
  and si, 0b0000_0000_0001_0000 ; AF
  shr di, 1
  shr si, 2

  ; OR all of them together
  or bx, di
  or bx, si

  ; Reclaim DI for getting ZF/SF and shift into place
  mov di, ax
  and di, 0b0000_0000_1100_0000 ; ZF and SF
  shr di, 3

  ; Finally mask and OR all of the bits together
  and ax, 0b0000_0000_0000_0001 ; CF
  or bx, ax
  or bx, di

  ; Store result to .flags memory
  mov [rel .flags + %1], bl
%endmacro

; Performs the string comparison and moves the result from XMM0 to
; a region of memory in the .indices section specified by a byte
; offset.
;
; The first parameter is the location in memory result flags into.
; The second parameter is the control values to pass to vpcmpistrm
; The third parameter is the XMM number to store the result in XMM0 to.
;
%macro CompareAndStore 3
  vpcmpistrm xmm2, xmm3, %2
  vmovaps xmm%3, xmm0
  ArrangeAndStoreFLAGS %1
%endmacro

vmovaps ymm2, [rel .data]
vmovaps ymm3, [rel .data + 32]

; Range unsigned byte check (bits, positive polarity)
CompareAndStore 0, 0b00000100, 4

; Range unsigned byte check (mask, positive polarity)
CompareAndStore 1, 0b01000100, 5

; Range unsigned byte check (bits, negative polarity)
CompareAndStore 2, 0b00010100, 6

; Range unsigned byte check (mask, negative polarity)
CompareAndStore 3, 0b01010100, 7

; Range unsigned byte check (bits, negative masked)
CompareAndStore 4, 0b00110100, 8

; Range unsigned byte check (mask, negative masked)
CompareAndStore 5, 0b01110100, 9

; --- 16-bit unsigned word tests ---
vmovaps ymm2, [rel .data16]
vmovaps ymm3, [rel .data16 + 32]

; Range unsigned word check (mask, positive polarity)
CompareAndStore 6, 0b01000101, 10

; Range unsigned word check (bits, negative polarity)
CompareAndStore 7, 0b00010101, 11

; Range unsigned word check (mask, negative polarity)
CompareAndStore 8, 0b01010101, 12

; Range unsigned word check (bits, negative masked)
CompareAndStore 9, 0b00110101, 13

; Range unsigned word check (mask, negative masked)
CompareAndStore 10, 0b01110101, 14

; Load all our stored flags for result comparing
vmovaps ymm1, [rel .flags]

hlt

align 4096
.data:
dq 0x998877005A417A61 ; "azAZ" (followed by junk)
dq 0x55AACCBBFF223344
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x726548206D27493F ; "?I'm Her"
dq 0x21216E65704F2065 ; "e Open!!"
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

.data16:
dq 0x005A0041007A0061 ; "azAZ"
dq 0x55AACCBBFF220000
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB

dq 0x006500200027003F ; "?' e"
dq 0x00210065004F0065 ; "eOen!"
dq 0x8888888888888888
dq 0x9999999999999999

.flags:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vperm2f128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM2": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM3": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Permute first 128-bit element from each vector
vperm2f128 ymm2, ymm0, ymm1, 0b00100000
vperm2f128 ymm3, ymm0, [rdx + 32], 0b00100000

; Permute top halves of both vectors
vperm2f128 ymm4, ymm0, ymm1, 0b00110001
vperm2f128 ymm5, ymm0, [rdx + 32], 0b00110001

; Zero out entire vector
vmovapd ymm6, ymm0;
vperm2f128 ymm6, ymm0, ymm1, 0b10001000

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA

dq 0xBBBBBBBBBBBBBBBB
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC
dq 0x9999999999999999


================================================
FILE: unittests/ASM/VEX/vperm2i128.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1": ["0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM2": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM3": ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD"],
    "XMM4": ["0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM5": ["0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Permute first 128-bit element from each vector
vperm2i128 ymm2, ymm0, ymm1, 0b00100000
vperm2i128 ymm3, ymm0, [rdx + 32], 0b00100000

; Permute top halves of both vectors
vperm2i128 ymm4, ymm0, ymm1, 0b00110001
vperm2i128 ymm5, ymm0, [rdx + 32], 0b00110001

; Zero out entire vector
vmovapd ymm6, ymm0;
vperm2i128 ymm6, ymm0, ymm1, 0b10001000

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA

dq 0xBBBBBBBBBBBBBBBB
dq 0xDDDDDDDDDDDDDDDD
dq 0xCCCCCCCCCCCCCCCC
dq 0x9999999999999999


================================================
FILE: unittests/ASM/VEX/vpermd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0000000600000007", "0x0000000400000005", "0x0000000200000003", "0x0000000000000001"],
    "XMM1": ["0x0000000500000005", "0x0000000500000005", "0x0000000500000005", "0x0000000500000005"],
    "XMM2": ["0xFFFFFFF0FFFFFFF1", "0xFFFFFFF2FFFFFFF3", "0xFFFFFFF4FFFFFFF5", "0xFFFFFFF6FFFFFFF7"],
    "XMM3": ["0x8888888899999999", "0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB"],
    "XMM5": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x8888888899999999"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rel .invert]
vmovapd ymm1, [rel .select_elem_5]
vmovapd ymm2, [rel .reverse_quadwords]

vpermd ymm3, ymm0, [rel .data]
vpermd ymm4, ymm1, [rel .data]
vpermd ymm5, ymm2, [rel .data]

hlt

align 32
.data:
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888

.invert:
dq 0x0000000600000007
dq 0x0000000400000005
dq 0x0000000200000003
dq 0x0000000000000001

.select_elem_5:
dq 0x0000000500000005
dq 0x0000000500000005
dq 0x0000000500000005
dq 0x0000000500000005

; Upper bits filled with junk. Should have no impact on operation
.reverse_quadwords:
dq 0xFFFFFFF0FFFFFFF1
dq 0xFFFFFFF2FFFFFFF3
dq 0xFFFFFFF4FFFFFFF5
dq 0xFFFFFFF6FFFFFFF7


================================================
FILE: unittests/ASM/VEX/vpermilpd_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC"],
    "XMM6": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD"],
    "XMM7": ["0xAAAAAAAAAAAAAAAA", "0xBBBBBBBBBBBBBBBB", "0xCCCCCCCCCCCCCCCC", "0xDDDDDDDDDDDDDDDD"]

  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]

vpermilpd xmm1, xmm0, 0000b
vpermilpd xmm2, xmm0, 0011b
vpermilpd xmm3, xmm0, 0010b
vpermilpd xmm4, xmm0, 0001b

vpermilpd ymm5, ymm0, 0000b
vpermilpd ymm6, ymm0, 1111b
vpermilpd ymm7, ymm0, 1010b

hlt

align 32
.data:
dq 0xAAAAAAAAAAAAAAAA
dq 0xBBBBBBBBBBBBBBBB
dq 0xCCCCCCCCCCCCCCCC
dq 0xDDDDDDDDDDDDDDDD


================================================
FILE: unittests/ASM/VEX/vpermilpd_reg.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xFFFFFFFFFFFFFFFF", "0xCCCCCCCCCCCCCCCC", "0xAAAAAAAAAAAAAAAA", "0x9999999999999999"],
    "XMM1": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x9999999999999999", "0xAAAAAAAAAAAAAAAA"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999", "0x9999999999999999"],
    "XMM3": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x9999999999999999", "0xAAAAAAAAAAAAAAAA"],
    "XMM4": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xCCCCCCCCCCCCCCCC", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

vpermilpd ymm1, ymm0, [rel .invert]
vpermilpd ymm2, ymm0, [rel .select_elem_1]
vpermilpd ymm3, ymm0, [rel .reverse_quadwords]

vpermilpd xmm4, xmm0, [rel .invert]
vpermilpd xmm5, xmm0, [rel .select_elem_1]
vpermilpd xmm6, xmm0, [rel .reverse_quadwords]

hlt

align 32
.data:
dq 0xFFFFFFFFFFFFFFFF
dq 0xCCCCCCCCCCCCCCCC
dq 0xAAAAAAAAAAAAAAAA
dq 0x9999999999999999

.invert:
dq 0x0000000000000002
dq 0x0000000000000000
dq 0x0000000000000002
dq 0x0000000000000000

.select_elem_1:
dq 0x0000000000000002
dq 0x0000000000000002
dq 0x0000000000000002
dq 0x0000000000000002

; Upper bits filled with junk. Should have no impact on operation
.reverse_quadwords:
dq 0xFFFFFFFFFFFFFFF2
dq 0xFFFFFFFFFFFFFFF0
dq 0xFFFFFFFFFFFFFFF2
dq 0xFFFFFFFFFFFFFFF0


================================================
FILE: unittests/ASM/VEX/vpermilps_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM5": ["0xCCCCCCCCCCCCCCCC", "0xCCCCCCCCCCCCCCCC", "0x9999999999999999", "0x9999999999999999"],
    "XMM6": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x8888888888888888", "0x8888888888888888"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]

vpermilps xmm1, xmm0, 00000000b
vpermilps xmm2, xmm0, 11111111b
vpermilps xmm3, xmm0, 10101010b

vpermilps ymm4, ymm0, 00000000b
vpermilps ymm5, ymm0, 11111111b
vpermilps ymm6, ymm0, 10101010b

hlt

align 32
.data:
dq 0xAAAAAAAABBBBBBBB
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF
dq 0x9999999988888888


================================================
FILE: unittests/ASM/VEX/vpermilps_reg.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xFFFFFFFFEEEEEEEE", "0xDDDDDDDDCCCCCCCC", "0xBBBBBBBBAAAAAAAA", "0x9999999988888888"],
    "XMM1": ["0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x8888888899999999", "0xAAAAAAAABBBBBBBB"],
    "XMM2": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x9999999999999999", "0x9999999999999999"],
    "XMM3": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x8888888899999999"],
    "XMM4": ["0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xDDDDDDDDDDDDDDDD", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

vpermilps ymm1, ymm0, [rel .invert]
vpermilps ymm2, ymm0, [rel .select_elem_3]
vpermilps ymm3, ymm0, [rel .reverse_quadwords]

vpermilps xmm4, xmm0, [rel .invert]
vpermilps xmm5, xmm0, [rel .select_elem_3]
vpermilps xmm6, xmm0, [rel .reverse_quadwords]

hlt

align 32
.data:
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888

.invert:
dq 0x0000000200000003
dq 0x0000000000000001
dq 0x0000000200000003
dq 0x0000000000000001

.select_elem_3:
dq 0x0000000300000003
dq 0x0000000300000003
dq 0x0000000300000003
dq 0x0000000300000003

; Upper bits filled with junk. Should have no impact on operation
.reverse_quadwords:
dq 0xFFFFFFF0FFFFFFF1
dq 0xFFFFFFF2FFFFFFF3
dq 0xFFFFFFF0FFFFFFF1
dq 0xFFFFFFF2FFFFFFF3


================================================
FILE: unittests/ASM/VEX/vpermpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM2":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM3":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM4":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM5":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM6":  ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"],
    "XMM7":  ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"],
    "XMM8":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM9":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM10": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA"],
    "XMM11": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Permute first element across
vpermpd ymm1, ymm0, 0b00000000
vpermpd ymm2, [rdx], 0b00000000

; Invert vector
vpermpd ymm3, ymm0, 0b00011011
vpermpd ymm4, [rdx], 0b00011011

; Invert self
vmovapd ymm5, ymm0
vpermpd ymm5, ymm5, 0b00011011

; Permute second element
vpermq ymm6, ymm0, 0b01010101
vpermq ymm7, [rdx], 0b01010101

; Permute third element
vpermq ymm8, ymm0, 0b10101010
vpermq ymm9, [rdx], 0b10101010

; Permute fourth element
vpermq ymm10, ymm0, 0b11111111
vpermq ymm11, [rdx], 0b11111111

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA


================================================
FILE: unittests/ASM/VEX/vpermps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0000000600000007", "0x0000000400000005", "0x0000000200000003", "0x0000000000000001"],
    "XMM1": ["0x0000000500000005", "0x0000000500000005", "0x0000000500000005", "0x0000000500000005"],
    "XMM2": ["0xFFFFFFF0FFFFFFF1", "0xFFFFFFF2FFFFFFF3", "0xFFFFFFF4FFFFFFF5", "0xFFFFFFF6FFFFFFF7"],
    "XMM3": ["0x8888888899999999", "0xAAAAAAAABBBBBBBB", "0xCCCCCCCCDDDDDDDD", "0xEEEEEEEEFFFFFFFF"],
    "XMM4": ["0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB", "0xBBBBBBBBBBBBBBBB"],
    "XMM5": ["0xEEEEEEEEFFFFFFFF", "0xCCCCCCCCDDDDDDDD", "0xAAAAAAAABBBBBBBB", "0x8888888899999999"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rel .invert]
vmovapd ymm1, [rel .select_elem_5]
vmovapd ymm2, [rel .reverse_quadwords]

vpermps ymm3, ymm0, [rel .data]
vpermps ymm4, ymm1, [rel .data]
vpermps ymm5, ymm2, [rel .data]

hlt

align 32
.data:
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888

.invert:
dq 0x0000000600000007
dq 0x0000000400000005
dq 0x0000000200000003
dq 0x0000000000000001

.select_elem_5:
dq 0x0000000500000005
dq 0x0000000500000005
dq 0x0000000500000005
dq 0x0000000500000005

; Upper bits filled with junk. Should have no impact on operation
.reverse_quadwords:
dq 0xFFFFFFF0FFFFFFF1
dq 0xFFFFFFF2FFFFFFF3
dq 0xFFFFFFF4FFFFFFF5
dq 0xFFFFFFF6FFFFFFF7


================================================
FILE: unittests/ASM/VEX/vpermq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0xEEEEEEEEEEEEEEEE", "0xFFFFFFFFFFFFFFFF", "0xAAAAAAAAAAAAAAAA"],
    "XMM1":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM2":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM3":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM4":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM5":  ["0xAAAAAAAAAAAAAAAA", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE", "0x3FF0000000000000"],
    "XMM6":  ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"],
    "XMM7":  ["0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE", "0xEEEEEEEEEEEEEEEE"],
    "XMM8":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM9":  ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM10": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA"],
    "XMM11": ["0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA", "0xAAAAAAAAAAAAAAAA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Permute first element across
vpermq ymm1, ymm0, 0b00000000
vpermq ymm2, [rdx], 0b00000000

; Invert vector
vpermq ymm3, ymm0, 0b00011011
vpermq ymm4, [rdx], 0b00011011

; Invert self
vmovapd ymm5, ymm0
vpermq ymm5, ymm5, 0b00011011

; Permute second element
vpermq ymm6, ymm0, 0b01010101
vpermq ymm7, [rdx], 0b01010101

; Permute third element
vpermq ymm8, ymm0, 0b10101010
vpermq ymm9, [rdx], 0b10101010

; Permute fourth element
vpermq ymm10, ymm0, 0b11111111
vpermq ymm11, [rdx], 0b11111111

hlt

align 32
.data:
dq 0x3FF0000000000000
dq 0xEEEEEEEEEEEEEEEE
dq 0xFFFFFFFFFFFFFFFF
dq 0xAAAAAAAAAAAAAAAA


================================================
FILE: unittests/ASM/VEX/vpextrb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x000000000000007F",
    "RBX": "0x0000000000000067",
    "RCX": "0x0000888658818AE8"
  }
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]
vmovaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

vpextrb rax, xmm1, 0
vpextrb rbx, xmm2, 0xFF
vpextrb [rsi + 8 * 0 + 0], xmm3, 2
vpextrb [rsi + 8 * 0 + 1], xmm4, 0xFF
vpextrb [rsi + 8 * 0 + 2], xmm5, 4
vpextrb [rsi + 8 * 0 + 3], xmm6, 5
vpextrb [rsi + 8 * 0 + 4], xmm7, 6
vpextrb [rsi + 8 * 0 + 5], xmm8, 7
mov rcx, [rsi + 8 * 0]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpextrd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x000000004d2fa47f",
    "RBX": "0x0000000067d29af3",
    "RCX": "0x8a6789f27404b890",
    "RDX": "0x00f658ab78236612"
  }
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]
vmovaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

mov rax, -1

vpextrd eax, xmm1, 0
vpextrd ebx, xmm2, 0xFF
vpextrd [rsi + 8 * 0 + 0], xmm3, 2
vpextrd [rsi + 8 * 0 + 4], xmm4, 0xFF
vpextrd [rsi + 8 * 1 + 0], xmm5, 4
vpextrd [rsi + 8 * 1 + 4], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpextrq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x48510f254d2fa47f",
    "RBX": "0x67d29af330ae762c",
    "RCX": "0xb615b9533de8ad09",
    "RDX": "0x8a6789f2d415a567",
    "RDI": "0x8996f88178236612",
    "RSP": "0xc97d9d031ed21972"
  }
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]
vmovaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

vpextrq rax, xmm1, 0
vpextrq rbx, xmm2, 0xFF
vpextrq [rsi + 8 * 0], xmm3, 2
vpextrq [rsi + 8 * 1], xmm4, 0xFF
vpextrq [rsi + 8 * 2], xmm5, 4
vpextrq [rsi + 8 * 3], xmm6, 5
mov rcx, [rsi + 8 * 0]
mov rdx, [rsi + 8 * 1]
mov rdi, [rsi + 8 * 2]
mov rsp, [rsi + 8 * 3]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpextrw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x000000000000A47F",
    "RBX": "0x00000000000067D2",
    "RCX": "0x1ED2A2A98A67B953"
  }
}
%endif

lea rdx, [rel .data]
mov rsi, 0xe0000000

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]
vmovaps xmm8, [rdx + 16 * 7]

mov rax, 0
mov [rsi + 8 * 0], rax

vpextrw eax, xmm1, 0
vpextrw ebx, xmm2, 0xFF

vpextrw [rsi + 8 * 0 + 0], xmm3, 2
vpextrw [rsi + 8 * 0 + 2], xmm4, 0xFF
vpextrw [rsi + 8 * 0 + 4], xmm5, 4
vpextrw [rsi + 8 * 0 + 6], xmm6, 5
mov rcx, [rsi + 8 * 0]

hlt

align 16
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpgather_dd_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x177f80d27f80d2ec", "0x73f1177ff1177f80", "0", "0"],
    "XMM5":  ["0x34292db66334292d", "0x2db6b85f292db6b8", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda2566334292d", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x5e82db69db698176", "0xd2ec7be67be65e82", "0", "0"],
    "XMM5":  ["0xe9341ce2fff5e934", "0xbf6334291ce2bf63", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256fff5e934", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0", "0"],
    "XMM5":  ["0xdf6efe3bb8c6d8a6", "0x8a34fff53fd4ea2f", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256b8c6d8a6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xa2ff64bc388e768d", "0", "0"],
    "XMM4":  ["0xd2f4229df1a6aed4", "0x0b85efdc2ef911b1", "0", "0"],
    "XMM5":  ["0x71089de2ab3fd329", "0x9712ffc55d4e120a", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256f1a6aed4", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256ab3fd329", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherdd xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherdd xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherdd xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherdd xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xf6300511c21448dd"],
    "XMM4":  ["0x177f80d27f80d2ec", "0x73f1177ff1177f80", "0xd7e273f1e273f117", "0x35a9d7e2a9d7e273"],
    "XMM5":  ["0x34292db66334292d", "0x2db6b85f292db6b8", "0xb85f6135b6b85f61", "0x6135a9d75f6135a9"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda2566334292d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x35a9d7e2f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm15, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm14, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm13, [ymm0 * 1 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm12, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm11, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm10, [ymm0 * 1 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm9, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm8, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm7, [ymm0 * 1 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm6, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm5, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm4, [ymm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm3, [ymm0 * 1 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af52633359", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0x59f4e95cc21448dd"],
    "XMM4":  ["0x5e82db69db698176", "0xd2ec7be67be65e82", "0xf1177f807f80d2ec", "0xa9d7e273e273f117"],
    "XMM5":  ["0xe9341ce2fff5e934", "0xbf6334291ce2bf63", "0x2db6b85f34292db6", "0x6135a9d7b85f6135"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256fff5e934", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xa9d7e273f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm15, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm14, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm13, [ymm0 * 2 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm12, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm11, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm10, [ymm0 * 2 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm9, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm8, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm7, [ymm0 * 2 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm6, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm5, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm4, [ymm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm3, [ymm0 * 2 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],

    "XMM3":  ["0x2522e0af6799bee3", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xf6300511c21448dd"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0x7be65e82db698176", "0xe273f1177f80d2ec"],
    "XMM5":  ["0xdf6efe3bb8c6d8a6", "0x8a34fff53fd4ea2f", "0xbf633429e9341ce2", "0x6135a9d72db6b85f"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256b8c6d8a6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe273f117f7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm15, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm14, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm13, [ymm0 * 4 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm12, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm11, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm10, [ymm0 * 4 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm9, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm8, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm7, [ymm0 * 4 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm6, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm5, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm4, [ymm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm3, [ymm0 * 4 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dd_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xa2ff64bc388e768d", "0x13a833a3666d909d", "0xabf96d9bc21448dd"],
    "XMM4":  ["0xd2f4229df1a6aed4", "0x0b85efdc2ef911b1", "0x97ba9a8e8aa16a60", "0x7f80d2ecdb698176"],
    "XMM5":  ["0x71089de2ab3fd329", "0x9712ffc55d4e120a", "0x8a34fff5df6efe3b", "0x6135a9d7bf633429"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0x6135a9d76135a9d7"],
    "XMM7":  ["0xf1cda256f1a6aed4", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xf1cda256ab3fd329", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x7f80d2ecf7b7368a"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7f7b7368a"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm15, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm14, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000000]
vpgatherdd ymm13, [ymm0 * 8 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm12, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm11, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_00000001]
vpgatherdd ymm10, [ymm0 * 8 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm9, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm8, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_10000000]
vpgatherdd ymm7, [ymm0 * 8 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm6, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm5, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm4, [ymm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_11111111]
vpgatherdd ymm3, [ymm0 * 8 + rax], ymm1

; xmm1 will be zero after this.
hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xd7e273f1177f80d2", "0", "0"],
    "XMM5":  ["0x341ce2bf6334292d", "0x1ce2bf6334292db6", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x341ce2bf6334292d", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0xd2ec7be65e82db69", "0", "0"],
    "XMM5":  ["0xea2f8a34fff5e934", "0x8a34fff5e9341ce2", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xea2f8a34fff5e934", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x97ba9a8e3087464d", "0", "0"],
    "XMM5":  ["0x9712ffc5b8c6d8a6", "0xb8c6d8a6df6efe3b", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x9712ffc5b8c6d8a6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xb57f2163f1a6aed4", "0x07ed4949d2f4229d", "0", "0"],
    "XMM5":  ["0x8b27e4deab3fd329", "0xbf8b198471089de2", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xb57f2163f1a6aed4", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x8b27e4deab3fd329", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherdq xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherdq xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherdq xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherdq xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xd7e273f1177f80d2", "0xa9d7e273f1177f80", "0x35a9d7e273f1177f"],
    "XMM5":  ["0x341ce2bf6334292d", "0x1ce2bf6334292db6", "0xe2bf6334292db6b8", "0xbf6334292db6b85f"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x341ce2bf6334292d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x35a9d7e273f1177f"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xbf6334292db6b85f"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm15, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm14, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm13, [xmm0 * 1 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm12, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm11, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm10, [xmm0 * 1 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm9, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm8, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm7, [xmm0 * 1 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm6, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm5, [xmm0 * 1 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm4, [xmm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm3, [xmm0 * 1 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -992, -512, -256, -128, 128, 256, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0x7be65e82db698176", "0xd2ec7be65e82db69", "0x7f80d2ec7be65e82", "0xf1177f80d2ec7be6"],
    "XMM5":  ["0xea2f8a34fff5e934", "0x8a34fff5e9341ce2", "0xfff5e9341ce2bf63", "0xe9341ce2bf633429"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xea2f8a34fff5e934", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xf1177f80d2ec7be6"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe9341ce2bf633429"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm15, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm14, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm13, [xmm0 * 2 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm12, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm11, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm10, [xmm0 * 2 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm9, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm8, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm7, [xmm0 * 2 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm6, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm5, [xmm0 * 2 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm4, [xmm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm3, [xmm0 * 2 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -504, -256, -128, -64, 64, 128, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0x3087464d8aa16a60", "0x97ba9a8e3087464d", "0x33b7153e97ba9a8e", "0xdb69817633b7153e"],
    "XMM5":  ["0x9712ffc5b8c6d8a6", "0xb8c6d8a6df6efe3b", "0xdf6efe3b3fd4ea2f", "0x3fd4ea2f8a34fff5"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x9712ffc5b8c6d8a6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xdb69817633b7153e"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x3fd4ea2f8a34fff5"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm15, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm14, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm13, [xmm0 * 4 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm12, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm11, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm10, [xmm0 * 4 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm9, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm8, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm7, [xmm0 * 4 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm6, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm5, [xmm0 * 4 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm4, [xmm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm3, [xmm0 * 4 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -248, -128, -64, -32, 32, 64, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_dq_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0xead2e747388e768d", "0x88bd62c1a2ff64bc"],
    "XMM4":  ["0xb57f2163f1a6aed4", "0x07ed4949d2f4229d", "0x4735be742ef911b1", "0x1fe6464d0b85efdc"],
    "XMM5":  ["0x8b27e4deab3fd329", "0xbf8b198471089de2", "0x225165965d4e120a", "0xa886da539712ffc5"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xb57f2163f1a6aed4", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x8b27e4deab3fd329", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x1fe6464d0b85efdc"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xa886da539712ffc5"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 32-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm15, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm14, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherdq ymm13, [xmm0 * 8 + rax], ymm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm12, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm11, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherdq ymm10, [xmm0 * 8 + rax], ymm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm9, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm8, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherdq ymm7, [xmm0 * 8 + rax], ymm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm6, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm5, [xmm0 * 8 + rax], ymm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm4, [xmm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherdq ymm3, [xmm0 * 8 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 32-bit integer.
.index_d0:
dd 0, 0, 0, 0, 0, 0, 0, 0

.index_positive_increment:
dd 7, 6, 5, 4, 3, 2, 1, 0

.index_negative_decrement:
dd -8, -7, -6, -5, -4, -3, -2, -1

.index_full_range:
dd -120, -64, -32, -16, 16, 32, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0", "0", "0"],
    "XMM4":  ["0xd7e273f1e273f117", "0", "0", "0"],
    "XMM5":  ["0xb85f6135b6b85f61", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda256e273f117", "0", "0", "0"],
    "XMM8":  ["0xf1cda256b6b85f61", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [xmm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0", "0", "0"],
    "XMM4":  ["0xf1177f807f80d2ec", "0", "0", "0"],
    "XMM5":  ["0x2db6b85f34292db6", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0", "0", "0"],
    "XMM8":  ["0xf1cda25634292db6", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [xmm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0", "0", "0"],
    "XMM5":  ["0xbf633429e9341ce2", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0", "0", "0"],
    "XMM8":  ["0xf1cda256e9341ce2", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [xmm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0", "0", "0"],
    "XMM4":  ["0x97ba9a8e8aa16a60", "0", "0", "0"],
    "XMM5":  ["0x8a34fff5df6efe3b", "0", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0", "0", "0"],
    "XMM8":  ["0xf1cda256df6efe3b", "0", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [xmm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xf6300511c21448dd", "0", "0"],
    "XMM4":  ["0xd7e273f1e273f117", "0x35a9d7e2a9d7e273", "0", "0"],
    "XMM5":  ["0xb85f6135b6b85f61", "0x6135a9d75f6135a9", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256e273f117", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256b6b85f61", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [ymm0 * 1 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [ymm0 * 1 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [ymm0 * 1 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [ymm0 * 1 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [ymm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [ymm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [ymm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af52633359", "0x59f4e95cc21448dd", "0", "0"],
    "XMM4":  ["0xf1177f807f80d2ec", "0xa9d7e273e273f117", "0", "0"],
    "XMM5":  ["0x2db6b85f34292db6", "0x6135a9d7b85f6135", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2567f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda25634292db6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [ymm0 * 2 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [ymm0 * 2 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [ymm0 * 2 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [ymm0 * 2 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [ymm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [ymm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [ymm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0af6799bee3", "0xf6300511c21448dd", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0xe273f1177f80d2ec", "0", "0"],
    "XMM5":  ["0xbf633429e9341ce2", "0x6135a9d72db6b85f", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda256db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256e9341ce2", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [ymm0 * 4 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [ymm0 * 4 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [ymm0 * 4 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [ymm0 * 4 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [ymm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [ymm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [ymm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2522e0afde37bc4f", "0xabf96d9bc21448dd", "0", "0"],
    "XMM4":  ["0x97ba9a8e8aa16a60", "0x7f80d2ecdb698176", "0", "0"],
    "XMM5":  ["0x8a34fff5df6efe3b", "0x6135a9d7bf633429", "0", "0"],
    "XMM6":  ["0x6135a9d76135a9d7", "0x6135a9d76135a9d7", "0", "0"],
    "XMM7":  ["0xf1cda2568aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xf1cda256df6efe3b", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0xf1cda2566135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm15, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm14, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000000]
vpgatherqd xmm13, [ymm0 * 8 + rax], xmm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm12, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm11, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_00000001]
vpgatherqd xmm10, [ymm0 * 8 + rax], xmm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm9, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm8, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_10000000]
vpgatherqd xmm7, [ymm0 * 8 + rax], xmm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm6, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm5, [ymm0 * 8 + rax], xmm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm4, [ymm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm3, [ymm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_00000000:
dd 0, 0, 0, 0, 0, 0, 0, 0

.mask_00000001:
dd 0, 0, 0, 0, 0, 0, 0, 0x8000_0000

.mask_10000000:
dd 0x8000_0000, 0, 0, 0, 0, 0, 0, 0

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qd_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x7f80d2ec6135a9d7", "0x8a34fff5db698176", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_11111111]
vpgatherqd xmm15, [ymm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_11111111:
dd 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000, 0x8000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x6135a9d7e273f117", "0x5f6135a9d7e273f1", "0", "0"],
    "XMM5":  ["0x6334292db6b85f61", "0x34292db6b85f6135", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x6135a9d7e273f117", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x6334292db6b85f61", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm15, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm14, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm13, [xmm0 * 1 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm12, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm11, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm10, [xmm0 * 1 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm9, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm8, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm7, [xmm0 * 1 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm6, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm5, [xmm0 * 1 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm4, [xmm0 * 1 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm3, [xmm0 * 1 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm15, [xmm0 * 1 + rax + 1], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xa9d7e273f1177f80", "0", "0"],
    "XMM5":  ["0x1ce2bf6334292db6", "0xbf6334292db6b85f", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x1ce2bf6334292db6", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm15, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm14, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm13, [xmm0 * 2 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm12, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm11, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm10, [xmm0 * 2 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm9, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm8, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm7, [xmm0 * 2 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm6, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm5, [xmm0 * 2 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm4, [xmm0 * 2 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm3, [xmm0 * 2 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm15, [xmm0 * 2 + rax + 2], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x7be65e82db698176", "0x7f80d2ec7be65e82", "0", "0"],
    "XMM5":  ["0x8a34fff5e9341ce2", "0xe9341ce2bf633429", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0x8a34fff5e9341ce2", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm15, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm14, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm13, [xmm0 * 4 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm12, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm11, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm10, [xmm0 * 4 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm9, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm8, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm7, [xmm0 * 4 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm6, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm5, [xmm0 * 4 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm4, [xmm0 * 4 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm3, [xmm0 * 4 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm15, [xmm0 * 4 + rax + 4], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0", "0"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0", "0"],
    "XMM5":  ["0xb8c6d8a6df6efe3b", "0x3fd4ea2f8a34fff5", "0", "0"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0", "0"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0", "0"],
    "XMM8":  ["0xb8c6d8a6df6efe3b", "0x0f350767409162b7", "0", "0"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0", "0"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm15, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm14, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0000]
vpgatherqq xmm13, [xmm0 * 8 + rax], xmm1

; First element Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm12, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm11, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_0001]
vpgatherqq xmm10, [xmm0 * 8 + rax], xmm1

; Top element mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm9, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm8, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1000]
vpgatherqq xmm7, [xmm0 * 8 + rax], xmm1

; Full Mask
vmovaps xmm0, [rel .index_d0]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm6, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_positive_increment]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm5, [xmm0 * 8 + rax], xmm1

vmovaps xmm0, [rel .index_negative_decrement]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm4, [xmm0 * 8 + rax], xmm1

; Full range, full mask
vmovaps xmm0, [rel .index_full_range]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm3, [xmm0 * 8 + rax], xmm1

; xmm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_128bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0", "0"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 128-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps xmm0, [rel .index_overflow]
vmovaps xmm1, [rel .mask_1111]
vpgatherqq xmm15, [xmm0 * 8 + rax + 8], xmm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_1xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x464061a9f6300511"],
    "XMM4":  ["0x6135a9d7e273f117", "0x5f6135a9d7e273f1", "0xb85f6135a9d7e273", "0xb6b85f6135a9d7e2"],
    "XMM5":  ["0x6334292db6b85f61", "0x34292db6b85f6135", "0x292db6b85f6135a9", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x6135a9d7e273f117", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x6334292db6b85f61", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xb6b85f6135a9d7e2"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm15, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm14, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm13, [ymm0 * 1 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm12, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm11, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm10, [ymm0 * 1 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm9, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm8, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm7, [ymm0 * 1 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm6, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm5, [ymm0 * 1 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm4, [ymm0 * 1 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm3, [ymm0 * 1 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -992, -512, 512, 992

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_1xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 1x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm15, [ymm0 * 1 + rax + 1], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_2xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x002fd22652633359", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x98f0351d59f4e95c"],
    "XMM4":  ["0xe273f1177f80d2ec", "0xa9d7e273f1177f80", "0x6135a9d7e273f117", "0xb85f6135a9d7e273"],
    "XMM5":  ["0x1ce2bf6334292db6", "0xbf6334292db6b85f", "0x34292db6b85f6135", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0xe273f1177f80d2ec", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x1ce2bf6334292db6", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xb85f6135a9d7e273"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm15, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm14, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm13, [ymm0 * 2 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm12, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm11, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm10, [ymm0 * 2 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm9, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm8, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm7, [ymm0 * 2 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm6, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm5, [ymm0 * 2 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm4, [ymm0 * 2 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm3, [ymm0 * 2 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -504, -256, 256, 504

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_2xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 2x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 1
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 1
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 1
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 1
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm15, [ymm0 * 2 + rax + 2], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_4xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0xcb60805f6799bee3", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x464061a9f6300511"],
    "XMM4":  ["0x7be65e82db698176", "0x7f80d2ec7be65e82", "0xe273f1177f80d2ec", "0x6135a9d7e273f117"],
    "XMM5":  ["0x8a34fff5e9341ce2", "0xe9341ce2bf633429", "0xbf6334292db6b85f", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x7be65e82db698176", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0x8a34fff5e9341ce2", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x6135a9d7e273f117"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm15, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm14, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm13, [ymm0 * 4 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm12, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm11, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm10, [ymm0 * 4 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm9, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm8, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm7, [ymm0 * 4 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm6, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm5, [ymm0 * 4 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm4, [ymm0 * 4 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm3, [ymm0 * 4 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -248, -128, 128, 248

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_4xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 4x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 2
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 2
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 2
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 2
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm15, [ymm0 * 4 + rax + 4], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_8xdisp.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM3":  ["0x2b43dbebde37bc4f", "0x6778ae2a2522e0af", "0x71e5d85fc21448dd", "0x59fc3ca8abf96d9b"],
    "XMM4":  ["0x3087464d8aa16a60", "0x33b7153e97ba9a8e", "0x7be65e82db698176", "0xe273f1177f80d2ec"],
    "XMM5":  ["0xb8c6d8a6df6efe3b", "0x3fd4ea2f8a34fff5", "0xe9341ce2bf633429", "0x2db6b85f6135a9d7"],
    "XMM6":  ["0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7", "0x2db6b85f6135a9d7"],
    "XMM7":  ["0x3087464d8aa16a60", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM8":  ["0xb8c6d8a6df6efe3b", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM9":  ["0x2db6b85f6135a9d7", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM10": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xe273f1177f80d2ec"],
    "XMM11": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM12": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0x2db6b85f6135a9d7"],
    "XMM13": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM14": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"],
    "XMM15": ["0xf1cda2562209301d", "0x0f350767409162b7", "0x002fd22652633359", "0xc0a14faff7b7368a"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

lea rax, [rel .data_mid]

vmovapd ymm15, [rel .data]
vmovapd ymm14, [rel .data]
vmovapd ymm13, [rel .data]
vmovapd ymm12, [rel .data]
vmovapd ymm11, [rel .data]
vmovapd ymm10, [rel .data]
vmovapd ymm9, [rel .data]
vmovapd ymm8, [rel .data]
vmovapd ymm7, [rel .data]
vmovapd ymm6, [rel .data]
vmovapd ymm5, [rel .data]
vmovapd ymm4, [rel .data]

; Zero mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm15, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm14, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0000]
vpgatherqq ymm13, [ymm0 * 8 + rax], ymm1

; First element Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm12, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm11, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_0001]
vpgatherqq ymm10, [ymm0 * 8 + rax], ymm1

; Top element mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm9, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm8, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1000]
vpgatherqq ymm7, [ymm0 * 8 + rax], ymm1

; Full Mask
vmovaps ymm0, [rel .index_d0]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm6, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_positive_increment]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm5, [ymm0 * 8 + rax], ymm1

vmovaps ymm0, [rel .index_negative_decrement]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm4, [ymm0 * 8 + rax], ymm1

; Full range, full mask
vmovaps ymm0, [rel .index_full_range]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm3, [ymm0 * 8 + rax], ymm1

; ymm1 will be zero after this.

hlt

align 4096

; Masks only care about the sign bit.
.mask_0000:
dq 0, 0, 0, 0

.mask_0001:
dq 0, 0, 0, 0x8000_0000_0000_0000

.mask_1000:
dq 0x8000_0000_0000_0000, 0, 0, 0

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_d0:
dq 0, 0, 0, 0

.index_positive_increment:
dq 3, 2, 1, 0

.index_negative_decrement:
dq -4, -3, -2, -1

.index_full_range:
dq -120, -64, 64, 120

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vpgather_qq_256bit_8xdisp_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1":  ["0", "0", "0", "0"],
    "XMM15": ["0x2db6b85f6135a9d7", "0xe273f1177f80d2ec", "0x7be65e82db698176", "0x3fd4ea2f8a34fff5"]
  },
  "HostFeatures": ["AVX"]
}
%endif

; 256-bit
; 8x displacement
; 64-bit indexes

; Calculate an address that heavily overflows
lea rax, [rel .data_mid]

mov rbx, -1
sub rbx, rax
sar rbx, 3
mov [rel .index_overflow + 0 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 8
sar rbx, 3
mov [rel .index_overflow + 1 * 8], rbx

mov rbx, -1
sub rbx, rax
sub rbx, 16
sar rbx, 3
mov [rel .index_overflow + 2 * 8], rbx

mov rbx, -1
sub rbx, rax
add bx, 16
sar rbx, 3
mov [rel .index_overflow + 3 * 8], rbx

; Calculate new base which offsets from the overflow
lea rax, [rel .data_mid]
shl rax, 1

vmovapd ymm15, [rel .data]

vmovaps ymm0, [rel .index_overflow]
vmovaps ymm1, [rel .mask_1111]
vpgatherqq ymm15, [ymm0 * 8 + rax + 8], ymm1

hlt

align 4096

.mask_1111:
dq 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000, 0x8000_0000_0000_0000

; Indexing is a signed 64-bit integer.
.index_overflow:
dq 0, 0, 0, 0

; Random data, 512-byte per line
.data:
db 0x1d, 0x30, 0x09, 0x22, 0x56, 0xa2, 0xcd, 0xf1, 0xb7, 0x62, 0x91, 0x40, 0x67, 0x07, 0x35, 0x0f, 0x59, 0x33, 0x63, 0x52, 0x26, 0xd2, 0x2f, 0x00, 0x8a, 0x36, 0xb7, 0xf7, 0xaf, 0x4f, 0xa1, 0xc0, 0xe3, 0xbe, 0x99, 0x67, 0x5f, 0x80, 0x60, 0xcb, 0x43, 0xfa, 0x5b, 0x86, 0xb1, 0x11, 0xbc, 0xb3, 0x7b, 0x43, 0x5b, 0x45, 0x9e, 0x33, 0x89, 0xb5, 0x1b, 0xb9, 0x33, 0x4f, 0xdb, 0x5d, 0x93, 0xd6, 0x4f, 0xbc, 0x37, 0xde, 0xeb, 0xdb, 0x43, 0x2b, 0x05, 0x60, 0xb8, 0x98, 0x5c, 0xa3, 0xe3, 0x1b, 0x33, 0x03, 0x29, 0x4b, 0x12, 0x4c, 0x1e, 0xe6, 0x5e, 0x0e, 0x6c, 0xa1, 0xb9, 0x36, 0xfa, 0x6c, 0x7f, 0xc6, 0xa8, 0x38, 0x73, 0x2a, 0x0a, 0x25, 0x69, 0xa5, 0x97, 0x3f, 0x24, 0x00, 0x30, 0x4d, 0x27, 0xb3, 0x94, 0x48, 0xef, 0x47, 0x98, 0x71, 0x0d, 0x56, 0x76, 0xec, 0x41, 0x12, 0x9b, 0x7b, 0x9c, 0xf5, 0x85, 0x07, 0x2d, 0x6b, 0xc6, 0xc1, 0x2e, 0x72, 0x22, 0x5a, 0x43, 0xff, 0x1e, 0xec, 0x67, 0x2b, 0x31, 0x96, 0x14, 0x2c, 0xb1, 0x5f, 0x5d, 0x0c, 0xc9, 0xad, 0x15, 0x5f, 0xab, 0x66, 0x14, 0x1c, 0x72, 0xfa, 0x23, 0xef, 0x9f, 0x77, 0xf6, 0x50, 0xb0, 0x70, 0xb8, 0x3c, 0x85, 0x9e, 0x90, 0x69, 0x17, 0x25, 0xae, 0x6e, 0xe2, 0x16, 0x7d, 0x42, 0x38, 0xdf, 0x74, 0x72, 0x7b, 0x97, 0xa9, 0x9e, 0x40, 0x24, 0x85, 0xdc, 0x64, 0xfa, 0xb1, 0x8b, 0x95, 0xe6, 0xe4, 0x13, 0x72, 0xf1, 0x52, 0x2f, 0xa0, 0xd6, 0x52, 0xc0, 0x11, 0xa7, 0xfe, 0xd5, 0x3b, 0x56, 0xca, 0xbc, 0x01, 0xce, 0x3d, 0xd2, 0x30, 0x97, 0x1d, 0xdc, 0xeb, 0x9d, 0xa9, 0x3e, 0x09, 0xef, 0xee, 0x7f, 0x09, 0x7b, 0x82, 0x43, 0x15, 0x2e, 0xa4, 0x2e, 0x97, 0x21, 0x92, 0x7e, 0x69, 0x21, 0x25, 0xda, 0x46, 0x7c, 0x0c, 0xcd, 0x1d, 0xde, 0x42, 0x11, 0xa2, 0xef, 0xa2, 0xc8, 0x32, 0x9a, 0x82, 0xcf, 0x72, 0x7e, 0x22, 0xa6, 0x11, 0xfa, 0xec, 0x0b, 0x77, 0x99, 0x38, 0x03, 0xf6, 0x80, 0xba, 0xea, 0x75, 0x19, 0xb0, 0x48, 0x02, 0xb2, 0x6b, 0xc0, 0x8c, 0xfb, 0xfe, 0xaf, 0x94, 0x4f, 0x6f, 0xb4, 0xcb, 0x1c, 0x27, 0xf0, 0x41, 0xb6, 0x46, 0x41, 0x68, 0x3d, 0x05, 0x79, 0x6b, 0xcd, 0xb7, 0x20, 0xdc, 0x40, 0x81, 0x58, 0xcb, 0x33, 0xa3, 0xf3, 0x34, 0xdc, 0x63, 0x2d, 0xa5, 0xb5, 0xa1, 0xd1, 0xfd, 0x49, 0x5b, 0x46, 0x94, 0x01, 0xa8, 0xf2, 0xd8, 0x93, 0x2c, 0xbb, 0x57, 0xfe, 0x7c, 0x77, 0x3b, 0x19, 0x6f, 0x3c, 0xaa, 0x23, 0x5b, 0xc0, 0xe7, 0x00, 0x41, 0x97, 0x91, 0xe8, 0x00, 0x12, 0xdf, 0xf6, 0x5c, 0x2e, 0xc6, 0x8e, 0xc6, 0x77, 0x59, 0x78, 0x9b, 0xef, 0x63, 0xb0, 0xd7, 0xbb, 0xc4, 0x0b, 0x60, 0x65, 0x3f, 0xfe, 0xbf, 0x04, 0x3e, 0xae, 0xc2, 0xa5, 0x90, 0xe1, 0x2a, 0x56, 0x3f, 0x4c, 0x3f, 0x7a, 0x7d, 0xda, 0x81, 0x50, 0xea, 0x4c, 0xfe, 0xc3, 0xf8, 0x5c, 0x2b, 0x67, 0xb3, 0x9f, 0x8b, 0x95, 0xda, 0x6f, 0x5d, 0xdd, 0x82, 0x7f, 0x52, 0xa2, 0xcc, 0x57, 0xec, 0xc4, 0x14, 0xd2, 0x4f, 0x1b, 0xcb, 0xea, 0xaf, 0x0e, 0x0f, 0x53, 0xaa, 0x56, 0x63, 0xea, 0x36, 0xa6, 0x89, 0x1a, 0x66, 0xc0, 0x4e, 0xf4, 0x1e, 0x02, 0x43, 0xde, 0xde, 0xc8, 0x9e, 0x88, 0x6e, 0x32, 0xd4, 0xcb, 0x47, 0x24, 0x7c, 0x28, 0x38, 0xd4, 0x95, 0xb6, 0xa3, 0x91, 0x69, 0xc7, 0x8d, 0xfd, 0x15, 0xf5, 0xbf, 0xb1, 0x98, 0x8c, 0x57, 0x51, 0xbf, 0x83, 0x6a, 0x35, 0x10, 0x03, 0x50, 0xe5, 0xf7, 0xfa, 0xf8, 0xa5, 0xb0, 0xdb, 0xfb, 0x42, 0x93, 0xbb, 0x17, 0xf7, 0x36, 0xbe, 0x26, 0x66, 0x61, 0xe2

db 0xaf, 0xe0, 0x22, 0x25, 0x2a, 0xae, 0x78, 0x67, 0x8f, 0x7e, 0x9e, 0x59, 0xd7, 0xa3, 0x71, 0xcc, 0x43, 0x85, 0x09, 0xf9, 0x18, 0x52, 0x7b, 0x01, 0x73, 0xcb, 0x31, 0x18, 0x66, 0x79, 0x67, 0x10, 0x67, 0xd8, 0xdf, 0x43, 0xaf, 0x2d, 0x9a, 0x09, 0x9c, 0xd1, 0x37, 0x7e, 0xf5, 0x1c, 0x3c, 0x4f, 0x15, 0xe1, 0x6f, 0xfd, 0x13, 0x3d, 0x53, 0x81, 0xa9, 0x93, 0x5f, 0x92, 0x41, 0x48, 0xec, 0x87, 0x87, 0x1d, 0x0b, 0xaa, 0xaa, 0xd3, 0xc2, 0x98, 0x20, 0xce, 0x28, 0xaf, 0x9d, 0x84, 0x69, 0x4a, 0xfd, 0xc0, 0x9c, 0x2e, 0x50, 0x20, 0xb2, 0x00, 0xc1, 0x81, 0x2a, 0x32, 0x8e, 0x95, 0x20, 0xa7, 0xca, 0x39, 0x28, 0x12, 0x23, 0x0e, 0x43, 0xd3, 0x82, 0x76, 0x73, 0x3c, 0xbf, 0xa9, 0x98, 0xf6, 0x39, 0x6d, 0xd9, 0x15, 0x33, 0x1e, 0x07, 0x7c, 0x08, 0x12, 0x23, 0xbd, 0xd3, 0x34, 0x2d, 0x9a, 0x23, 0x21, 0x46, 0xf3, 0x9a, 0x04, 0x25, 0x62, 0xeb, 0x7e, 0x9a, 0xaa, 0xb6, 0x26, 0xaa, 0x85, 0x01, 0x3a, 0xd8, 0xfc, 0x57, 0x98, 0xb9, 0xe4, 0xc4, 0xe9, 0x11, 0x3e, 0x22, 0x95, 0x3b, 0x41, 0x2b, 0x02, 0x04, 0x6c, 0x75, 0xa5, 0xf2, 0xaa, 0x09, 0x9e, 0x6f, 0xab, 0x1d, 0x2a, 0x5c, 0xde, 0x21, 0xb1, 0x96, 0x2d, 0x86, 0x3f, 0xd0, 0x07, 0x18, 0x1f, 0x87, 0xc2, 0x8f, 0xdf, 0x6a, 0x57, 0x6d, 0x3f, 0x80, 0xc5, 0x08, 0x19, 0xa5, 0x09, 0x65, 0x3d, 0xdc, 0x9e, 0x80, 0x3c, 0x2a, 0x0e, 0x7a, 0x40, 0x04, 0x0b, 0xcc, 0x61, 0xdb, 0x73, 0xfc, 0xa5, 0x0a, 0x42, 0x18, 0xc1, 0xd5, 0xbd, 0x18, 0x78, 0xa1, 0xe4, 0xde, 0x44, 0xec, 0x79, 0xb0, 0x27, 0xaa, 0x45, 0x21, 0x57, 0x19, 0x75, 0x09, 0x5c, 0x58, 0xd5, 0xb9, 0x6f, 0x3b, 0x48, 0x59, 0x41, 0x3e, 0xfd, 0x17, 0x43, 0x27, 0xc3, 0x8d, 0x76, 0x8e, 0x38, 0x47, 0xe7, 0xd2, 0xea, 0x54, 0x73, 0x8a, 0x65, 0x4c, 0x49, 0x91, 0xaf, 0x29, 0x65, 0x0d, 0x81, 0xa4, 0x77, 0xd7, 0x32, 0xd0, 0x69, 0xd9, 0x6b, 0xa3, 0x9b, 0x24, 0xd6, 0x0a, 0xd2, 0x77, 0x38, 0x59, 0x0b, 0xc8, 0x5c, 0xc7, 0x0b, 0x1d, 0xd1, 0xfa, 0xa7, 0x45, 0x3c, 0xeb, 0x5c, 0x8e, 0x25, 0x35, 0x81, 0x6d, 0x6d, 0xfe, 0xb4, 0x63, 0x89, 0xe4, 0xf0, 0xa8, 0xda, 0xb7, 0xd4, 0xff, 0x5d, 0x28, 0x97, 0x11, 0xf9, 0x8d, 0xab, 0x29, 0xd5, 0xd3, 0x1c, 0x70, 0x20, 0x4c, 0x41, 0x16, 0x42, 0xfd, 0xfc, 0x62, 0x82, 0x40, 0x59, 0x34, 0x28, 0xd0, 0xd5, 0xfc, 0xac, 0x97, 0xb8, 0x82, 0x0e, 0x4b, 0xae, 0x51, 0x28, 0x1a, 0xf1, 0x87, 0xd3, 0x20, 0xa3, 0xe7, 0x74, 0x69, 0x3c, 0x54, 0x8d, 0xc5, 0x56, 0x1d, 0xcd, 0x75, 0xae, 0x88, 0x17, 0x30, 0xdf, 0x46, 0x4a, 0xbc, 0x64, 0xff, 0xa2, 0xc1, 0x62, 0xbd, 0x88, 0x7b, 0x3e, 0xa1, 0x0c, 0xa9, 0x13, 0x0e, 0xc1, 0xb4, 0x24, 0xe6, 0x96, 0x1b, 0x9c, 0x9b, 0xac, 0x44, 0x33, 0x5b, 0xda, 0xd5, 0x88, 0x4d, 0xfe, 0x81, 0x09, 0x07, 0x17, 0xcf, 0x14, 0x05, 0xaf, 0xf8, 0x72, 0x14, 0x49, 0x5f, 0x06, 0x62, 0xab, 0xe0, 0x42, 0x70, 0x12, 0x59, 0x41, 0x0f, 0x18, 0x83, 0x68, 0x6d, 0xc6, 0x3c, 0xea, 0xe0, 0x6d, 0xd4, 0xae, 0xa6, 0xf1, 0x63, 0x21, 0x7f, 0xb5, 0x9d, 0x22, 0xf4, 0xd2, 0x49, 0x49, 0xed, 0x07, 0xb1, 0x11, 0xf9, 0x2e, 0x74, 0xbe, 0x35, 0x47, 0xdc, 0xef, 0x85, 0x0b, 0x4d, 0x46, 0xe6, 0x1f, 0x60, 0x6a, 0xa1, 0x8a, 0x4d, 0x46, 0x87, 0x30, 0x8e, 0x9a, 0xba, 0x97, 0x3e, 0x15, 0xb7, 0x33, 0x76, 0x81, 0x69, 0xdb, 0x82, 0x5e, 0xe6, 0x7b, 0xec, 0xd2, 0x80, 0x7f, 0x17, 0xf1, 0x73, 0xe2

.data_mid:
db 0xd7, 0xa9, 0x35, 0x61, 0x5f, 0xb8, 0xb6, 0x2d, 0x29, 0x34, 0x63, 0xbf, 0xe2, 0x1c, 0x34, 0xe9, 0xf5, 0xff, 0x34, 0x8a, 0x2f, 0xea, 0xd4, 0x3f, 0x3b, 0xfe, 0x6e, 0xdf, 0xa6, 0xd8, 0xc6, 0xb8, 0xc5, 0xff, 0x12, 0x97, 0x53, 0xda, 0x86, 0xa8, 0x0a, 0x12, 0x4e, 0x5d, 0x96, 0x65, 0x51, 0x22, 0xe2, 0x9d, 0x08, 0x71, 0x84, 0x19, 0x8b, 0xbf, 0x29, 0xd3, 0x3f, 0xab, 0xde, 0xe4, 0x27, 0x8b, 0x99, 0xcc, 0xb1, 0x7c, 0xa5, 0x71, 0x91, 0x9a, 0x0b, 0xad, 0x75, 0x86, 0xe3, 0x9c, 0x4e, 0x0c, 0x01, 0xb3, 0x12, 0x33, 0x90, 0x81, 0x7c, 0x71, 0x2c, 0x70, 0x61, 0xd5, 0x39, 0x0c, 0x45, 0xfc, 0x27, 0xaf, 0xbb, 0xd9, 0x26, 0x1b, 0x33, 0xb4, 0x0d, 0xf8, 0xd6, 0x2d, 0x09, 0xc7, 0x8c, 0xbf, 0x48, 0x53, 0x14, 0x94, 0x76, 0x25, 0xc7, 0x0c, 0x69, 0x49, 0x82, 0xb4, 0x2f, 0x48, 0x38, 0x44, 0x9d, 0x90, 0x6d, 0x66, 0x35, 0xe9, 0x3e, 0x2f, 0x2a, 0xb7, 0xe1, 0xb1, 0x2b, 0x99, 0x08, 0x6f, 0x5c, 0x6c, 0xdf, 0xdb, 0x10, 0xe2, 0xaa, 0x86, 0xe7, 0xf8, 0x9e, 0x62, 0xde, 0xa5, 0x81, 0x6b, 0x20, 0x47, 0xa9, 0x06, 0x49, 0xc0, 0x78, 0x8c, 0x70, 0x93, 0x7e, 0xda, 0xda, 0x5e, 0x3b, 0x23, 0xf9, 0xcc, 0x87, 0xdf, 0x48, 0x4f, 0xd6, 0x77, 0xce, 0x45, 0xe1, 0xdc, 0x0c, 0x7a, 0x0c, 0x50, 0x15, 0x63, 0x8c, 0x48, 0xd3, 0x8e, 0xfa, 0xcc, 0xac, 0x1a, 0x83, 0xde, 0xb1, 0x87, 0x2a, 0x58, 0x5c, 0xa5, 0x20, 0x3d, 0xaa, 0x1e, 0x5d, 0x71, 0xa6, 0x57, 0x75, 0x82, 0xb7, 0x33, 0x9e, 0x6b, 0xf3, 0x35, 0x02, 0x98, 0x03, 0xe1, 0x3b, 0xd2, 0x9f, 0x7a, 0x06, 0x85, 0xef, 0x7d, 0xd9, 0xf2, 0x0c, 0x9e, 0xce, 0xb9, 0xce, 0x13, 0x4a, 0x9e, 0x8a, 0x29, 0xe6, 0xe5, 0xe4, 0x39, 0xba, 0xfd, 0xa3, 0x33, 0xa8, 0x13, 0x9e, 0xa5, 0x11, 0x37, 0x69, 0xbc, 0xda, 0x11, 0x49, 0x2d, 0x4a, 0xef, 0x20, 0x8b, 0x7a, 0xb8, 0x9c, 0xc3, 0xaf, 0x26, 0x71, 0xd9, 0xa2, 0xf6, 0x0f, 0x85, 0x87, 0xa8, 0x6c, 0xf9, 0x99, 0xa2, 0xb2, 0x36, 0x2d, 0x78, 0x10, 0xe4, 0x33, 0x8d, 0xa4, 0x63, 0xea, 0x02, 0xb9, 0xac, 0x2f, 0x90, 0x39, 0x2d, 0x0e, 0x2e, 0xf5, 0x08, 0xa5, 0x5c, 0x8e, 0x71, 0x30, 0x0d, 0x1b, 0x84, 0x7a, 0xd7, 0xd4, 0xab, 0x81, 0x82, 0x18, 0x37, 0xf3, 0x28, 0x6f, 0x4e, 0x28, 0x71, 0xda, 0xc9, 0x99, 0x46, 0x14, 0x46, 0x77, 0x01, 0x16, 0x21, 0xae, 0x83, 0x93, 0x86, 0x7f, 0x5a, 0xee, 0xd5, 0xdf, 0x48, 0x5b, 0x15, 0xc8, 0x09, 0x30, 0x8f, 0x01, 0xcc, 0x95, 0x30, 0xd9, 0xf7, 0x72, 0x97, 0xfd, 0x9d, 0xec, 0x9f, 0xbf, 0x5c, 0xbf, 0x4f, 0xca, 0x33, 0xb4, 0xd2, 0xa2, 0xb9, 0x08, 0x9c, 0x40, 0x25, 0x3f, 0x86, 0xdc, 0x83, 0x70, 0x2f, 0xfb, 0x2a, 0xf8, 0x61, 0x1f, 0xa1, 0x1f, 0x36, 0x04, 0xe2, 0xef, 0x1c, 0xa4, 0xcd, 0x3c, 0x7f, 0xc5, 0x73, 0x9c, 0x2e, 0xeb, 0x03, 0x79, 0xd1, 0x02, 0xfc, 0x6f, 0xbd, 0x5a, 0x95, 0xb2, 0xf6, 0x25, 0x96, 0xe6, 0x80, 0x0a, 0xc5, 0xc7, 0xca, 0x8d, 0x31, 0xae, 0xf0, 0x49, 0xcf, 0x43, 0x06, 0x27, 0x7f, 0x25, 0xc7, 0x4c, 0xb7, 0xfc, 0x73, 0xd3, 0x04, 0xd3, 0xb9, 0x9f, 0x74, 0xed, 0x9e, 0x3c, 0xf0, 0xcf, 0x26, 0x2b, 0xd9, 0xcb, 0x78, 0x2a, 0xef, 0x72, 0xf7, 0xb6, 0x78, 0x30, 0x2d, 0x8c, 0x83, 0x73, 0x66, 0x74, 0x3d, 0x66, 0x0a, 0x74, 0x5a, 0x3f, 0x9f, 0x6e, 0x56, 0x68, 0x01, 0xc2, 0xca, 0x2b, 0xa1, 0x25, 0x36, 0x9c, 0x3b, 0xa4, 0x5e, 0x44, 0xf1, 0x18, 0x1d, 0xb6, 0x1a, 0x3a, 0xee, 0x8d, 0x67, 0x34, 0x9c

db 0xdd, 0x48, 0x14, 0xc2, 0x5f, 0xd8, 0xe5, 0x71, 0x22, 0xbf, 0xbc, 0x84, 0xda, 0xc1, 0xb1, 0x22, 0x55, 0xa4, 0x63, 0x41, 0x77, 0xac, 0x40, 0x2d, 0x44, 0x73, 0x8c, 0x14, 0xba, 0x5e, 0x63, 0x68, 0x65, 0x61, 0x6d, 0xec, 0xe2, 0x6d, 0x37, 0x22, 0x04, 0xeb, 0xc7, 0xd4, 0xc9, 0x62, 0x56, 0x13, 0x96, 0x29, 0x03, 0xf4, 0x55, 0xe2, 0x58, 0x7d, 0xda, 0x52, 0x2e, 0x94, 0x07, 0xe6, 0xef, 0xc0, 0xee, 0x9e, 0x0b, 0xf7, 0xcd, 0x13, 0x8b, 0x7d, 0xea, 0xdc, 0xf8, 0xf1, 0xcb, 0xad, 0x49, 0x97, 0xc9, 0x98, 0x0b, 0xcf, 0x84, 0x8e, 0x8e, 0xbb, 0x06, 0x2e, 0x54, 0xf5, 0xa7, 0xbd, 0x70, 0x7e, 0x38, 0x69, 0x8d, 0xb0, 0x01, 0x7b, 0x41, 0x80, 0x09, 0x44, 0xfd, 0x7e, 0x21, 0xb4, 0xbe, 0x6b, 0x4a, 0xb7, 0xca, 0x2d, 0x19, 0xfe, 0x6d, 0xd6, 0x11, 0x29, 0xbb, 0xb2, 0x16, 0xf1, 0xe7, 0x92, 0x71, 0xda, 0x7e, 0x68, 0x3a, 0xe0, 0xea, 0x89, 0x8d, 0xe0, 0x44, 0x48, 0x25, 0x92, 0x37, 0x54, 0x26, 0xf2, 0xab, 0xb3, 0x3b, 0xdb, 0xbb, 0x2b, 0x5c, 0xf5, 0xbc, 0xc7, 0x97, 0xdb, 0xc7, 0x49, 0x25, 0x7c, 0xc2, 0x80, 0x02, 0x69, 0xd4, 0xda, 0xda, 0xe1, 0x04, 0xf3, 0x19, 0xb8, 0xc9, 0xb2, 0xfb, 0x1e, 0x47, 0xa9, 0x0c, 0xa3, 0x48, 0xce, 0xc2, 0x9e, 0x3b, 0x28, 0x23, 0x5a, 0x20, 0x44, 0x77, 0x40, 0xe2, 0xd7, 0x20, 0xd5, 0x71, 0x6f, 0xd4, 0x3c, 0x68, 0x38, 0x9b, 0x89, 0x2e, 0x2d, 0xa8, 0x1f, 0x99, 0xb5, 0x8a, 0x66, 0x07, 0x59, 0x75, 0x9e, 0xf8, 0xd9, 0xbe, 0x85, 0x6a, 0x20, 0x92, 0x9d, 0xd2, 0x5e, 0x45, 0xc0, 0x60, 0xbe, 0x85, 0x0b, 0x84, 0x47, 0xf5, 0xa8, 0x43, 0x87, 0xf1, 0x21, 0x21, 0xb0, 0x3b, 0x04, 0x13, 0x16, 0x3e, 0xdf, 0xc3, 0xc6, 0x04, 0x73, 0xcd, 0x92, 0x76, 0xfb, 0xe7, 0x9c, 0xd3, 0x46, 0x11, 0x78, 0xca, 0x12, 0xd9, 0x4a, 0x35, 0xf1, 0x6e, 0x89, 0x8b, 0xe9, 0x7a, 0x04, 0xba, 0x18, 0x25, 0x7c, 0x9e, 0xe6, 0x4f, 0xc2, 0x56, 0x05, 0x72, 0xc3, 0x76, 0xee, 0x7d, 0x77, 0x19, 0x7a, 0x73, 0x2c, 0x81, 0xb8, 0xc7, 0xd9, 0x7f, 0x17, 0x5d, 0x30, 0xda, 0x77, 0x3c, 0x14, 0x88, 0xe8, 0xe4, 0xbf, 0xee, 0x21, 0x1c, 0x29, 0x4e, 0x58, 0xa8, 0x8a, 0x5c, 0xae, 0xa2, 0x1c, 0x7c, 0x25, 0x7c, 0x1c, 0x39, 0xa4, 0x28, 0x4b, 0x78, 0x52, 0xae, 0x2c, 0xbb, 0x5f, 0xbf, 0x51, 0x09, 0x20, 0x76, 0xb2, 0x7d, 0xb1, 0x63, 0x84, 0xc5, 0x49, 0x8a, 0x73, 0xdb, 0x76, 0x1d, 0x25, 0x31, 0xf2, 0x1e, 0x19, 0x38, 0xc8, 0x3b, 0x51, 0x3c, 0x13, 0x52, 0x84, 0xae, 0xc2, 0xe4, 0x8a, 0x57, 0x0d, 0xde, 0x8d, 0x18, 0x48, 0x9a, 0xbd, 0xbf, 0xf3, 0xea, 0x79, 0x17, 0x06, 0x96, 0x72, 0x08, 0x60, 0x95, 0xf9, 0x6f, 0x25, 0x0c, 0xb7, 0x9d, 0x98, 0x23, 0x01, 0xc8, 0x7a, 0xdb, 0x75, 0x63, 0x64, 0x14, 0x5e, 0x10, 0xf5, 0x16, 0x48, 0xbc, 0xc6, 0x7e, 0x24, 0xf3, 0xad, 0x57, 0x3f, 0x7d, 0x6c, 0xab, 0x18, 0x8c, 0x12, 0xc5, 0x0c, 0xd8, 0xb5, 0x1e, 0x43, 0x7c, 0x23, 0x17, 0x48, 0xba, 0x76, 0x3b, 0xd9, 0x2b, 0xae, 0x1b, 0xef, 0x58, 0xfa, 0x87, 0xad, 0x9b, 0x6d, 0xf9, 0xab, 0xa8, 0x3c, 0xfc, 0x59, 0x67, 0xa6, 0x2c, 0xc7, 0x75, 0xa4, 0x97, 0xca, 0x18, 0x18, 0x04, 0x2c, 0xb3, 0x0e, 0xa9, 0x69, 0x33, 0x67, 0xa2, 0xc6, 0xbc, 0x98, 0x48, 0x71, 0x11, 0x05, 0x30, 0xf6, 0xa9, 0x61, 0x40, 0x46, 0xf1, 0x41, 0x37, 0xd0, 0x6b, 0x7c, 0x1f, 0x03, 0x5c, 0xe9, 0xf4, 0x59, 0x1d, 0x35, 0xf0, 0x98, 0x42, 0x4a, 0x92, 0x2a, 0xc3, 0x9a, 0xb8, 0xa5


================================================
FILE: unittests/ASM/VEX/vphaddd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xA6A8AAAC86888A8C", "0xE6E8EAECC6C8CACC", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xE6E8EAECC6C8CACC", "0xA6A8AAAC86888A8C", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xA6A8AAAC86888A8C", "0xE6E8EAECC6C8CACC", "0xA6A8AAAC86888A8C", "0xE6E8EAECC6C8CACC"],
    "XMM5": ["0xE6E8EAECC6C8CACC", "0xA6A8AAAC86888A8C", "0xE6E8EAECC6C8CACC", "0xA6A8AAAC86888A8C"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

; Memory Operands
vphaddd xmm2, xmm0, [rdx + 32]
vphaddd xmm3, xmm1, [rdx]

vphaddd ymm4, ymm0, [rdx + 32]
vphaddd ymm5, ymm1, [rdx]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vphaddsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x7FFF7FFF7FFF7FFF", "0x800080007FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x800080007FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x71836D874331472D", "0x800080007FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x800080007FFF7FFF", "0x71836D874331472D", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x7FFF7FFF7FFF7FFF", "0x71836D874331472D", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x800080007FFF7FFF", "0x800080007FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x71836D874331472D", "0x800080007FFF7FFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx]
vmovaps xmm1, [rdx + 16]
vmovaps xmm2, [rdx + 32]
vmovaps xmm3, [rdx + 48]

vphaddsw xmm4,  xmm0, [rdx + 16]
vphaddsw xmm5,  xmm1, [rdx]

vphaddsw xmm6,  xmm2, [rdx + 16]
vphaddsw xmm7,  xmm3, [rdx + 32]

vphaddsw xmm8,  xmm0, [rdx + 32]
vphaddsw xmm9,  xmm1, [rdx + 48]

vphaddsw xmm10, xmm2, [rdx + 48]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080

dq 0x2119221823172416
dq 0x3941384237433644

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080


================================================
FILE: unittests/ASM/VEX/vphaddsw_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x7FFF7FFF7FFF7FFF", "0x800080007FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF80008000"],
    "XMM5":  ["0x800080007FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF80008000", "0x7FFF7FFF7FFF7FFF"],
    "XMM6":  ["0x71836D874331472D", "0x800080007FFF7FFF", "0x4331472D71836D87", "0x7FFF7FFF80008000"],
    "XMM7":  ["0x800080007FFF7FFF", "0x71836D874331472D", "0x7FFF7FFF80008000", "0x4331472D71836D87"],
    "XMM8":  ["0x7FFF7FFF7FFF7FFF", "0x71836D874331472D", "0x7FFF7FFF7FFF7FFF", "0x4331472D71836D87"],
    "XMM9":  ["0x800080007FFF7FFF", "0x800080007FFF7FFF", "0x7FFF7FFF80008000", "0x7FFF7FFF80008000"],
    "XMM10": ["0x71836D874331472D", "0x800080007FFF7FFF", "0x4331472D71836D87", "0x7FFF7FFF80008000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rdx + 64]
vmovaps ymm3, [rdx + 96]

vphaddsw ymm4,  ymm0, [rdx + 32]
vphaddsw ymm5,  ymm1, [rdx]

vphaddsw ymm6,  ymm2, [rdx + 32]
vphaddsw ymm7,  ymm3, [rdx + 64]

vphaddsw ymm8,  ymm0, [rdx + 64]
vphaddsw ymm9,  ymm1, [rdx + 96]

vphaddsw ymm10, ymm2, [rdx + 96]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x4142434445464748

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080
dq 0x8080808080808080
dq 0x7F7F7F7F7F7F7F7F

dq 0x2119221823172416
dq 0x3941384237433644
dq 0x3941384237433644
dq 0x2119221823172416

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080
dq 0x8080808080808080
dq 0x7F7F7F7F7F7F7F7F


================================================
FILE: unittests/ASM/VEX/vphaddw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0xA4A6ACAE84868C8E", "0xE4E6ECEEC4C6CCCE", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xE4E6ECEEC4C6CCCE", "0xA4A6ACAE84868C8E", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xA4A6ACAE84868C8E", "0xE4E6ECEEC4C6CCCE", "0xA4A6ACAE84868C8E", "0xE4E6ECEEC4C6CCCE"],
    "XMM5": ["0xE4E6ECEEC4C6CCCE", "0xA4A6ACAE84868C8E", "0xE4E6ECEEC4C6CCCE", "0xA4A6ACAE84868C8E"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

; Memory Operands
vphaddw xmm2, xmm0, [rdx + 32]
vphaddw xmm3, xmm1, [rdx]

vphaddw ymm4, ymm0, [rdx + 32]
vphaddw ymm5, ymm1, [rdx]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vphminposuw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000000030001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0000000000070001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000010001", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vphminposuw xmm0, [rdx + 16 * 0]
vphminposuw xmm1, [rdx + 16 * 1]
vphminposuw xmm2, [rdx + 16 * 2]
vphminposuw xmm3, [rdx + 16 * 3]

hlt

align 16
.data:

; Pos 0
dq 0x0004000300020001
dq 0x0008000700060005

; Pos 3
dq 0x0001000300020004
dq 0x0008000700060005

; Pos 7
dq 0x0008000300020004
dq 0x0001000700060005

; Pos 7 & 3 & 2
; Should return lowest position
dq 0x0008000100010004
dq 0x0001000700060005


================================================
FILE: unittests/ASM/VEX/vphsubd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xE403E40424042404", "0x0404040404040404", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0404040404040404", "0xE403E40424042404", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xE403E40424042404", "0x0404040404040404", "0x1111111111111111", "0xEEEEEEEFEEEEEEEF"],
    "XMM5": ["0x0404040404040404", "0xE403E40424042404", "0xEEEEEEEFEEEEEEEF", "0x1111111111111111"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vphsubd xmm2, xmm0, [rdx + 32]
vphsubd xmm3, xmm1, [rdx]

vphsubd ymm4, ymm0, [rdx + 32]
vphsubd ymm5, ymm1, [rdx]

hlt

align 32
.data:
dq 0x5142634475468748
dq 0x5152435435562758
dq 0xCCCCCCCCDDDDDDDD
dq 0xEEEEEEEEFFFFFFFF

dq 0x6172637465766778
dq 0x7162736475667768
dq 0x9999999988888888
dq 0x7777777766666666


================================================
FILE: unittests/ASM/VEX/vphsubsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x0202020202020202", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x0202020202020202", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFF01FF0100FF00FF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0xFF01FF0100FF00FF", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0202020202020202", "0xFF01FF0100FF00FF", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFF01FF0100FF00FF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x800080007FFF7FFF", "0x800080007FFF7FFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]

vphsubsw xmm4, xmm0, [rdx + 32]
vphsubsw xmm5, xmm1, [rdx]

vphsubsw xmm6, xmm2, [rdx + 32]
vphsubsw xmm7, xmm1, [rdx + 32 * 2]

vphsubsw xmm8, xmm0, [rdx + 32 * 2]
vphsubsw xmm9, xmm1, [rdx + 32]

vphsubsw xmm10, xmm2, [rdx + 32]
vphsubsw xmm11, xmm3, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080
dq 0x8080808080808080
dq 0x7F7F7F7F7F7F7F7F

dq 0x2119221823172416
dq 0x3941384237433644
dq 0x4598654387293847
dq 0x7620937492893892

dq 0x00007FFF00007FFF
dq 0x7FFFFFFF7FFFFFFF
dq 0x7FFFFFFF7FFFFFFF
dq 0x00007FFF00007FFF


================================================
FILE: unittests/ASM/VEX/vphsubsw_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x0202020202020202", "0x0000000000000000", "0x0202020202020202", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x0202020202020202", "0x0000000000000000", "0x0202020202020202"],
    "XMM6":  ["0xFF01FF0100FF00FF", "0x0000000000000000", "0x80007FFF1FAB7FFF", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0xFF01FF0100FF00FF", "0x0000000000000000", "0x80007FFF1FAB7FFF"],
    "XMM8":  ["0x0202020202020202", "0xFF01FF0100FF00FF", "0x0202020202020202", "0x80007FFF1FAB7FFF"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xFF01FF0100FF00FF", "0x0000000000000000", "0x80007FFF1FAB7FFF", "0x0000000000000000"],
    "XMM11": ["0x800080007FFF7FFF", "0x800080007FFF7FFF", "0x7FFF7FFF80008000", "0x7FFF7FFF80008000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]

vphsubsw ymm4, ymm0, [rdx + 32]
vphsubsw ymm5, ymm1, [rdx]

vphsubsw ymm6, ymm2, [rdx + 32]
vphsubsw ymm7, ymm1, [rdx + 32 * 2]

vphsubsw ymm8, ymm0, [rdx + 32 * 2]
vphsubsw ymm9, ymm1, [rdx + 32]

vphsubsw ymm10, ymm2, [rdx + 32]
vphsubsw ymm11, ymm3, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x7F7F7F7F7F7F7F7F
dq 0x8080808080808080
dq 0x8080808080808080
dq 0x7F7F7F7F7F7F7F7F

dq 0x2119221823172416
dq 0x3941384237433644
dq 0x4598654387293847
dq 0x7620937492893892

dq 0x00007FFF00007FFF
dq 0x7FFFFFFF7FFFFFFF
dq 0x7FFFFFFF7FFFFFFF
dq 0x00007FFF00007FFF


================================================
FILE: unittests/ASM/VEX/vphsubw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0xF202F20212021202", "0x0202020202020202", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0202020202020202", "0xF202F20212021202", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xF202F20212021202", "0x0202020202020202", "0x44457778CCCD1111", "0x11111111EEEFEEEF"],
    "XMM5": ["0x0202020202020202", "0xF202F20212021202", "0x11111111EEEFEEEF", "0x44457778CCCD1111"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vphsubw xmm2, xmm0, [rdx + 32]
vphsubw xmm3, xmm1, [rdx]

vphsubw ymm4, ymm0, [rdx + 32]
vphsubw ymm5, ymm1, [rdx]

hlt

align 32
.data:
dq 0x5142634475468748
dq 0x5152435435562758
dq 0xFFFFCCCCEEEEFFFF
dq 0xEEEE3333EEEE6666

dq 0x6172637465766778
dq 0x7162736475667768
dq 0x9999888877776666
dq 0x1111222233334444


================================================
FILE: unittests/ASM/VEX/vpinsrb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3": ["0x48510F254D2FA47F", "0x2B5774313A974886", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x006B6B6B6B6B6B6B", "0x6B00000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vpxor xmm2, xmm2, xmm2

vpinsrb xmm3, xmm1, [rdx + 8 * 0 + 0], 0x00
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 1], 0x01
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 2], 0x02
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 3], 0x03
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 4], 0x04
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 5], 0x05
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 6], 0x06
vpinsrb xmm3, xmm3, [rdx + 8 * 0 + 7], 0x07
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 0], 0x08
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 1], 0x09
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 2], 0x0A
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 3], 0x0B
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 4], 0x0C
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 5], 0x0D
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 6], 0x0E
vpinsrb xmm3, xmm3, [rdx + 8 * 1 + 7], 0x0F
vpinsrb xmm4, xmm2, [rdx + 8 * 2 + 0], 0xFF
mov rax, [rdx + 16]

vpinsrb xmm4, xmm4, eax, 0x00
vpinsrb xmm4, xmm4, eax, 0x01
vpinsrb xmm4, xmm4, eax, 0x02
vpinsrb xmm4, xmm4, eax, 0x03
vpinsrb xmm4, xmm4, eax, 0x04
vpinsrb xmm4, xmm4, eax, 0x05
vpinsrb xmm4, xmm4, eax, 0x06

hlt

align 32
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpinsrd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0xB615B9533DE8AD09", "0xB76472A37404B890", "0x24426B4C72F110AD", "0x8A6789F2D415A567"],
    "XMM2": ["0x48510F254D2FA47F", "0x2B5774313A974886", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3DE8AD093DE8AD09", "0x3DE8AD093DE8AD09", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000000000000000", "0x1F6DE86B00000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vpxor xmm4, xmm4, xmm4
vmovaps ymm1, [rdx + 32]

vpinsrd xmm2, xmm1, [rdx +  0], 0x00
vpinsrd xmm2, xmm2, [rdx +  4], 0x01
vpinsrd xmm2, xmm2, [rdx +  8], 0x02
vpinsrd xmm2, xmm2, [rdx + 12], 0x03
vpinsrd xmm4, xmm4, [rdx + 16], 0xFF
mov rax, [rdx + 32]

vpinsrd xmm3, xmm3, eax, 0x00
vpinsrd xmm3, xmm3, eax, 0x01
vpinsrd xmm3, xmm3, eax, 0x02
vpinsrd xmm3, xmm3, eax, 0x03

hlt

align 32
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpinsrq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1": ["0x8996F88178236612", "0x19A26B823D3CA2A9", "0x00F658AB689712B0", "0xC97D9D031ED21972"],
    "XMM2": ["0x0000000000000000", "0xB615B9533DE8AD09", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x8996F88178236612", "0x8996F88178236612", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x48510F254D2FA47F", "0x2B5774313A974886", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vpxor xmm2, xmm2, xmm2
vmovaps ymm1, [rdx + 64]

vpinsrq xmm4, xmm1, [rdx +  0], 0x00
vpinsrq xmm4, xmm4, [rdx +  8], 0x01
vpinsrq xmm2, xmm2, [rdx + 32], 0xFF
mov rax, [rdx + 64]

vpinsrq xmm3, xmm3, rax, 0x00
vpinsrq xmm3, xmm3, rax, 0x01

hlt

align 32
; 256bytes of random data
.data:
db 0x7f, 0xa4, 0x2f, 0x4d, 0x25, 0x0f, 0x51, 0x48, 0x86, 0x48, 0x97, 0x3a, 0x31, 0x74, 0x57, 0x2b
db 0x6b, 0xe8, 0x6d, 0x1f, 0xde, 0x56, 0xb5, 0x30, 0x2c, 0x76, 0xae, 0x30, 0xf3, 0x9a, 0xd2, 0x67
db 0x09, 0xad, 0xe8, 0x3d, 0x53, 0xb9, 0x15, 0xb6, 0x90, 0xb8, 0x04, 0x74, 0xa3, 0x72, 0x64, 0xb7
db 0xad, 0x10, 0xf1, 0x72, 0x4c, 0x6b, 0x42, 0x24, 0x67, 0xa5, 0x15, 0xd4, 0xf2, 0x89, 0x67, 0x8a
db 0x12, 0x66, 0x23, 0x78, 0x81, 0xf8, 0x96, 0x89, 0xa9, 0xa2, 0x3c, 0x3d, 0x82, 0x6b, 0xa2, 0x19
db 0xb0, 0x12, 0x97, 0x68, 0xab, 0x58, 0xf6, 0x00, 0x72, 0x19, 0xd2, 0x1e, 0x03, 0x9d, 0x7d, 0xc9
db 0xc8, 0x55, 0xdf, 0x98, 0x22, 0x43, 0x86, 0x1c, 0xcc, 0xe9, 0x1b, 0x89, 0xda, 0xfe, 0x9b, 0xb2
db 0x47, 0x21, 0x0f, 0x71, 0x28, 0xbd, 0xb0, 0x88, 0x38, 0xac, 0xb5, 0x7f, 0x88, 0x5e, 0xe9, 0xc4
db 0xe4, 0x5b, 0x3e, 0xd0, 0x2a, 0x8c, 0xdf, 0xa7, 0xea, 0x95, 0xd3, 0xc2, 0xee, 0xd1, 0x70, 0x6c
db 0x18, 0x77, 0xc1, 0x38, 0x7b, 0xfc, 0xa9, 0x58, 0x92, 0xe8, 0xc6, 0xcd, 0x07, 0x5d, 0x3d, 0x76
db 0xf4, 0x4c, 0x5b, 0x25, 0x7f, 0x9b, 0x02, 0x41, 0x78, 0x39, 0x9e, 0x3e, 0x4c, 0xa2, 0x79, 0xca
db 0x1c, 0xe9, 0xf2, 0x9a, 0xaf, 0x6d, 0xfa, 0x57, 0x10, 0xc7, 0xfd, 0x5f, 0x20, 0x80, 0xf5, 0x65
db 0x3c, 0x77, 0xfb, 0xa8, 0xdf, 0x94, 0x16, 0x4f, 0xc0, 0x78, 0x00, 0x76, 0x03, 0x8c, 0x82, 0x10
db 0x7f, 0x07, 0xe0, 0x02, 0x92, 0xbb, 0xf9, 0x2e, 0xfa, 0x3d, 0x88, 0xc8, 0x24, 0x27, 0xa6, 0x1e
db 0x04, 0x90, 0xf6, 0xf8, 0x76, 0x0a, 0x4c, 0x94, 0xbc, 0xb7, 0x8d, 0x8b, 0xf9, 0x65, 0xf5, 0x07
db 0x7f, 0xc1, 0x37, 0x78, 0xa1, 0x1f, 0xc4, 0x10, 0x6c, 0x29, 0x5e, 0x7e, 0x32, 0x24, 0x92, 0x09


================================================
FILE: unittests/ASM/VEX/vpinsrw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x4142434445467778", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4142434477784748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4142777845464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x7778434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x4142434445464748", "0x5152535455567778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4142434445464748", "0x5152535477785758", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4142434445464748", "0x5152777855565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434445464748", "0x7778535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4142434445467778", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4142434477784748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4142777845464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x7778434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x4142434445464748", "0x5152535455567778", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x4142434445464748", "0x5152535477785758", "0x0000000000000000", "0x0000000000000000"],
    "XMM14": ["0x4142434445464748", "0x5152777855565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM15": ["0x4142434445464748", "0x7778535455565758", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

mov rax, 0x7172737475767778

vmovapd ymm0, [rdx]

vpinsrw xmm1, xmm0, eax, 1
vpinsrw xmm2, xmm0, eax, 2
vpinsrw xmm3, xmm0, eax, 3
vpinsrw xmm4, xmm0, eax, 4
vpinsrw xmm5, xmm0, eax, 5
vpinsrw xmm6, xmm0, eax, 6
vpinsrw xmm7, xmm0, eax, 7

vpinsrw xmm8,  xmm0, [rdx + 32], 0
vpinsrw xmm9,  xmm0, [rdx + 32], 1
vpinsrw xmm10, xmm0, [rdx + 32], 2
vpinsrw xmm11, xmm0, [rdx + 32], 3
vpinsrw xmm12, xmm0, [rdx + 32], 4
vpinsrw xmm13, xmm0, [rdx + 32], 5
vpinsrw xmm14, xmm0, [rdx + 32], 6
vpinsrw xmm15, xmm0, [rdx + 32], 7

vpinsrw xmm0, xmm0, eax, 0

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpmaddubsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFE02FE02FE02FE02", "0xFE02FE02FE02FE02", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x7E027E027E027E02", "0x7E027E027E027E02", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x057306BC07B808B8", "0xBC53BC0EBAE5BA2E", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0xA473A5BCA6B8A7B8", "0x0553070E07E5092E", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx]
vmovaps xmm1, [rdx + 16]
vmovaps xmm2, [rdx + 16 * 2]
vmovaps xmm3, [rdx + 16 * 3]
vmovaps xmm4, [rdx + 16 * 4]

; Zero
vpmaddubsw xmm5, xmm0, [rdx]

; -1
vpmaddubsw xmm6, xmm1, [rdx + 16]

; 127
vpmaddubsw xmm7, xmm2, [rdx + 16 * 2]

; 255 and 127
vpmaddubsw xmm8, xmm1, [rdx + 16 * 2]

; Mixture
vpmaddubsw xmm9,  xmm3, [rdx + 16 * 4]
vpmaddubsw xmm10, xmm4, [rdx + 16 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000

dq -1
dq -1

dq 0x7F7F7F7F7F7F7F7F
dq 0x7F7F7F7F7F7F7F7F

dq 0x8141824383448445
dq 0x21F223F323F424F5

dq 0xE251E352E453E554
dq 0x71A972A873A774A6


================================================
FILE: unittests/ASM/VEX/vpmaddubsw_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFE02FE02FE02FE02", "0xFE02FE02FE02FE02", "0xFE02FE02FE02FE02", "0xFE02FE02FE02FE02"],
    "XMM7":  ["0x7E027E027E027E02", "0x7E027E027E027E02", "0x7E027E027E027E02", "0x7E027E027E027E02"],
    "XMM8":  ["0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF", "0x7FFF7FFF7FFF7FFF"],
    "XMM9":  ["0x057306BC07B808B8", "0xBC53BC0EBAE5BA2E", "0x282026EC08D41910", "0x33B92A54171B2224"],
    "XMM10": ["0xA473A5BCA6B8A7B8", "0x0553070E07E5092E", "0xF02026EC0CD41910", "0xEBB92A54171BDF24"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]
vmovaps ymm4, [rdx + 32 * 4]

; Zero
vpmaddubsw ymm5, ymm0, [rdx]

; -1
vpmaddubsw ymm6, ymm1, [rdx + 32]

; 127
vpmaddubsw ymm7, ymm2, [rdx + 32 * 2]

; 255 and 127
vpmaddubsw ymm8, ymm1, [rdx + 32 * 2]

; Mixture
vpmaddubsw ymm9,  ymm3, [rdx + 32 * 4]
vpmaddubsw ymm10, ymm4, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq -1
dq -1
dq -1
dq -1

dq 0x7F7F7F7F7F7F7F7F
dq 0x7F7F7F7F7F7F7F7F
dq 0x7F7F7F7F7F7F7F7F
dq 0x7F7F7F7F7F7F7F7F

dq 0x8141824383448445
dq 0x21F223F323F424F5
dq 0x3289435639045828
dq 0x7380543834230480

dq 0xE251E352E453E554
dq 0x71A972A873A774A6
dq 0x3438404230894802
dq 0x2348337523752943


================================================
FILE: unittests/ASM/VEX/vpmaddwd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x41FD357ADA74036A", "0xCCCC999AE38E1C72"],
    "XMM5": ["0x32F08FD4383B2524", "0x499DE6944FEA7CE4", "0x41FD357ADA74036A", "0xCCCC999AE38E1C72"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpmaddwd xmm2, xmm0, [rdx + 32]
vpmaddwd xmm3, xmm0, xmm1

vpmaddwd ymm4, ymm0, [rdx + 32]
vpmaddwd ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6666777788889999
dq 0x5555444433332222

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x5555444433332222
dq 0xAAAAAAAAAAAAAAAA


================================================
FILE: unittests/ASM/VEX/vpmaskmovd_load.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x0868C3F30AED56E0", "0x80FCE9E284E6E6DE", "0x8DDDDDDD8DDDDDDD", "0x0CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0x0000000000000000", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
      "XMM4": ["0x0000000000000000", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]

vpmaskmovd ymm3, ymm0, [rdx]
vpmaskmovd xmm4, xmm0, [rdx]

vpmaskmovd ymm5, ymm1, [rdx]
vpmaskmovd xmm6, xmm1, [rdx]

vpmaskmovd ymm7, ymm2, [rdx]
vpmaskmovd xmm8, xmm2, [rdx]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [0, 0, 1, 1, 1, 1, 0, 0])
dq 0x0868C3F30AED56E0
dq 0x80FCE9E284E6E6DE
dq 0x8DDDDDDD8DDDDDDD
dq 0x0CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000


================================================
FILE: unittests/ASM/VEX/vpmaskmovd_store.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x0868C3F30AED56E0", "0x80FCE9E284E6E6DE", "0x8DDDDDDD8DDDDDDD", "0x0CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xFFFFFFFFFFFFFFFF", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xFFFFFFFFFFFFFFFF"],
      "XMM5": ["0xFFFFFFFFFFFFFFFF", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM7": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM9": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmovaps ymm3, [rdx]

vpmaskmovd [rel .scratch1], ymm0, ymm3
vpmaskmovd [rel .scratch2], xmm0, xmm3

vpmaskmovd [rel .scratch3], ymm1, ymm3
vpmaskmovd [rel .scratch4], xmm1, xmm3

vpmaskmovd [rel .scratch5], ymm2, ymm3
vpmaskmovd [rel .scratch6], xmm2, xmm3

; Now reload to verify results
vmovaps ymm4, [rel .scratch1]
vmovaps ymm5, [rel .scratch2]
vmovaps ymm6, [rel .scratch3]
vmovaps ymm7, [rel .scratch4]
vmovaps ymm8, [rel .scratch5]
vmovaps ymm9, [rel .scratch6]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xDDDDDDDDDDDDDDDD
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [0, 0, 1, 1, 1, 1, 0, 0])
dq 0x0868C3F30AED56E0
dq 0x80FCE9E284E6E6DE
dq 0x8DDDDDDD8DDDDDDD
dq 0x0CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000

.scratch1:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch2:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch3:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch4:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch5:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch6:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpmaskmovq_load.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x8868C3F30AED56E0", "0x10FCE9E284E6E6DE", "0x1DDDDDDD8DDDDDDD", "0x8CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xA76C4F06A12BFCE0", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM7": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]

vpmaskmovq ymm3, ymm0, [rdx]
vpmaskmovq xmm4, xmm0, [rdx]

vpmaskmovq ymm5, ymm1, [rdx]
vpmaskmovq xmm6, xmm1, [rdx]

vpmaskmovq ymm7, ymm2, [rdx]
vpmaskmovq xmm8, xmm2, [rdx]

hlt

align 32
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [1, 0, 0, 1])
dq 0x8868C3F30AED56E0
dq 0x10FCE9E284E6E6DE
dq 0x1DDDDDDD8DDDDDDD
dq 0x8CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000


================================================
FILE: unittests/ASM/VEX/vpmaskmovq_store.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
      "XMM0": ["0x8868C3F30AED56E0", "0x10FCE9E284E6E6DE", "0x1DDDDDDD8DDDDDDD", "0x8CCCCCCC0CCCCCCC"],
      "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
      "XMM2": ["0x8000000080000000", "0x8000000080000000", "0x8000000080000000", "0x8000000080000000"],
      "XMM3": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM4": ["0xA76C4F06A12BFCE0", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xEEEEEEEEEEEEEEEE"],
      "XMM5": ["0xA76C4F06A12BFCE0", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM6": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM7": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
      "XMM8": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xDDDDDDDDDDDDDDDD", "0xEEEEEEEEEEEEEEEE"],
      "XMM9": ["0xA76C4F06A12BFCE0", "0x9B80767F1E6A060F", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32]
vmovaps ymm1, [rdx + 64]
vmovaps ymm2, [rdx + 96]
vmovaps ymm3, [rdx]

vpmaskmovq [rel .scratch1], ymm0, ymm3
vpmaskmovq [rel .scratch2], xmm0, xmm3

vpmaskmovq [rel .scratch3], ymm1, ymm3
vpmaskmovq [rel .scratch4], xmm1, xmm3

vpmaskmovq [rel .scratch5], ymm2, ymm3
vpmaskmovq [rel .scratch6], xmm2, xmm3

; Now reload to verify results
vmovaps ymm4, [rel .scratch1]
vmovaps ymm5, [rel .scratch2]
vmovaps ymm6, [rel .scratch3]
vmovaps ymm7, [rel .scratch4]
vmovaps ymm8, [rel .scratch5]
vmovaps ymm9, [rel .scratch6]

hlt

align 4096
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xDDDDDDDDDDDDDDDD
dq 0xEEEEEEEEEEEEEEEE

; Disastrously organized mask (sign mask [1, 0, 0, 1])
dq 0x8868C3F30AED56E0
dq 0x10FCE9E284E6E6DE
dq 0x1DDDDDDD8DDDDDDD
dq 0x8CCCCCCC0CCCCCCC

; No masking at all. Should not touch memory at all.
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

; Select all elements
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000
dq 0x8000000080000000

.scratch1:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch2:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch3:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch4:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch5:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

.scratch6:
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpmaxsb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8062636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM4": ["0x4162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM6": ["0x4162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM7": ["0x4162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxsb xmm2, xmm0, xmm1
vpmaxsb ymm3, ymm0, ymm1

vpmaxsb xmm4, xmm0, [rdx + 32]
vpmaxsb ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxsb ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxsb ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8062636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmaxsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8000000065666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4142434465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM4": ["0x4142434465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM6": ["0x4142434465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM7": ["0x4142434465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxsd xmm2, xmm0, xmm1
vpmaxsd ymm3, ymm0, ymm1

vpmaxsd xmm4, xmm0, [rdx + 32]
vpmaxsd ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxsd ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxsd ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8000000065666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmaxsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8000636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4142636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM4": ["0x4142636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM6": ["0x4142636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM7": ["0x4142636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxsw xmm2, xmm0, xmm1
vpmaxsw ymm3, ymm0, ymm1

vpmaxsw xmm4, xmm0, [rdx + 32]
vpmaxsw ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxsw ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxsw ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8000636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmaxub.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x8182838485868788"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8182838485868788"],
    "XMM4": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8182838485868788"],
    "XMM6": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8182838485868788"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8182838485868788"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxub xmm2, xmm0, xmm1
vpmaxub ymm3, ymm0, ymm1

vpmaxub xmm4, xmm0, [rdx + 32]
vpmaxub ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxub ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxub ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x8182838485868788

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmaxud.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x8172737485767778"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172737485767778"],
    "XMM4": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172737485767778"],
    "XMM6": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172737485767778"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172737485767778"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxud xmm2, xmm0, xmm1
vpmaxud ymm3, ymm0, ymm1

vpmaxud xmm4, xmm0, [rdx + 32]
vpmaxud ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxud ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxud ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x8172737485767778

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmaxuw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x8172837485768778"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172837485768778"],
    "XMM4": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172837485768778"],
    "XMM6": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172837485768778"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x8172837485768778"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmaxuw xmm2, xmm0, xmm1
vpmaxuw ymm3, ymm0, ymm1

vpmaxuw xmm4, xmm0, [rdx + 32]
vpmaxuw ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpmaxuw ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpmaxuw ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x8172837485768778

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminsb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8062636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x8042434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x8042434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x8042434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x8042434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x8042434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x8042434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminsb xmm2, xmm0, xmm1
vpminsb ymm3, ymm0, ymm1

vpminsb xmm4, xmm0, [rdx + 32]
vpminsb ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminsb ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminsb ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8062636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8000000065666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x8000000045464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x8000000045464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x8000000045464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x8000000045464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x8000000045464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x8000000045464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminsd xmm2, xmm0, xmm1
vpminsd ymm3, ymm0, ymm1

vpminsd xmm4, xmm0, [rdx + 32]
vpminsd ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminsd ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminsd ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8000000065666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x8000636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x8000434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x8000434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x8000434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x8000434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x8000434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x8000434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminsw xmm2, xmm0, xmm1
vpminsw ymm3, ymm0, ymm1

vpminsw xmm4, xmm0, [rdx + 32]
vpminsw ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminsw ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminsw ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x8000636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminub.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminub xmm2, xmm0, xmm1
vpminub ymm3, ymm0, ymm1

vpminub xmm4, xmm0, [rdx + 32]
vpminub ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminub ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminub ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminud.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminud xmm2, xmm0, xmm1
vpminud ymm3, ymm0, ymm1

vpminud xmm4, xmm0, [rdx + 32]
vpminud ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminud ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminud ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpminuw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM1": ["0x6162636465666768", "0x5152535455565758", "0x6162636465666768", "0x5152535455565758"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM4": ["0x4142434445464748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM7": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpminuw xmm2, xmm0, xmm1
vpminuw ymm3, ymm0, ymm1

vpminuw xmm4, xmm0, [rdx + 32]
vpminuw ymm5, ymm0, [rdx + 32]

; Some funky combinations for testing fast paths
; Related to SVE sources aliasing the destination
vmovapd ymm6, ymm0
vpminuw ymm6, ymm6, ymm5

vmovapd ymm7, ymm0
vpminuw ymm7, ymm5, ymm7

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x6162636465666768
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovmskb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x0",
    "RBX": "0x0",
    "RCX": "0xFFFF",
    "RSI": "0xF0F0",
    "RDI": "0x55AA",
    "R8":  "0x0",
    "R9":  "0x0",
    "R10": "0xFFFFFFFF",
    "R11": "0xF0F0F0F0",
    "R12": "0x55AA55AA",
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM3": ["0x8080808000000000", "0x8080808000000000", "0x8080808000000000", "0x8080808000000000"],
    "XMM4": ["0x8000800080008000", "0x0080008000800080", "0x8000800080008000", "0x0080008000800080"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

vpmovmskb eax, xmm0
vpmovmskb ebx, xmm1
vpmovmskb ecx, xmm2
vpmovmskb esi, xmm3
vpmovmskb edi, xmm4

vpmovmskb r8,  ymm0
vpmovmskb r9,  ymm1
vpmovmskb r10, ymm2
vpmovmskb r11, ymm3
vpmovmskb r12, ymm4

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x8080808000000000
dq 0x8080808000000000
dq 0x8080808000000000
dq 0x8080808000000000

dq 0x8000800080008000
dq 0x0080008000800080
dq 0x8000800080008000
dq 0x0080008000800080


================================================
FILE: unittests/ASM/VEX/vpmovsxbd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFFFFFF87FFFFFF88", "0xFFFFFF85FFFFFF86", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFFFF87FFFFFF88", "0xFFFFFF85FFFFFF86", "0x0000004300000044", "0x0000004100000042"],
    "XMM3": ["0xFFFFFF87FFFFFF88", "0xFFFFFF85FFFFFF86", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFF87FFFFFF88", "0xFFFFFF85FFFFFF86", "0x0000004300000044", "0x0000004100000042"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxbd xmm1, [rdx]
vpmovsxbd ymm2, [rdx]

; Register only
vpmovsxbd xmm3, xmm0
vpmovsxbd ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovsxbq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFFFFFFFFFFFFFF88", "0xFFFFFFFFFFFFFF87", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFFFFFFFFFFFF88", "0xFFFFFFFFFFFFFF87", "0xFFFFFFFFFFFFFF86", "0xFFFFFFFFFFFFFF85"],
    "XMM3": ["0xFFFFFFFFFFFFFF88", "0xFFFFFFFFFFFFFF87", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFFFF88", "0xFFFFFFFFFFFFFF87", "0xFFFFFFFFFFFFFF86", "0xFFFFFFFFFFFFFF85"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxbq xmm1, [rdx]
vpmovsxbq ymm2, [rdx]

; Register only
vpmovsxbq xmm3, xmm0
vpmovsxbq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovsxbw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFF85FF86FF87FF88", "0x0041004200430044", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFF85FF86FF87FF88", "0x0041004200430044", "0x0055005600570058", "0x0051005200530054"],
    "XMM3": ["0xFF85FF86FF87FF88", "0x0041004200430044", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFF85FF86FF87FF88", "0x0041004200430044", "0x0055005600570058", "0x0051005200530054"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxbw xmm1, [rdx]
vpmovsxbw ymm2, [rdx]

; Register only
vpmovsxbw xmm3, xmm0
vpmovsxbw ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovsxdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFFFFFFFF85868788", "0x0000000041424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFFFFFF85868788", "0x0000000041424344", "0x0000000055565758", "0x0000000051525354"],
    "XMM3": ["0xFFFFFFFF85868788", "0x0000000041424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFF85868788", "0x0000000041424344", "0x0000000055565758", "0x0000000051525354"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxdq xmm1, [rdx]
vpmovsxdq ymm2, [rdx]

; Register only
vpmovsxdq xmm3, xmm0
vpmovsxdq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovsxwd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFFFF8586FFFF8788", "0x0000414200004344", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFF8586FFFF8788", "0x0000414200004344", "0x0000555600005758", "0x0000515200005354"],
    "XMM3": ["0xFFFF8586FFFF8788", "0x0000414200004344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFF8586FFFF8788", "0x0000414200004344", "0x0000555600005758", "0x0000515200005354"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxwd xmm1, [rdx]
vpmovsxwd ymm2, [rdx]

; Register only
vpmovsxwd xmm3, xmm0
vpmovsxwd ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovsxwq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0xFFFFFFFFFFFF8788", "0xFFFFFFFFFFFF8586", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFFFFFFFFFFFF8788", "0xFFFFFFFFFFFF8586", "0x0000000000004344", "0x0000000000004142"],
    "XMM3": ["0xFFFFFFFFFFFF8788", "0xFFFFFFFFFFFF8586", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFFFFFFFFFFFF8788", "0xFFFFFFFFFFFF8586", "0x0000000000004344", "0x0000000000004142"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovsxwq xmm1, [rdx]
vpmovsxwq ymm2, [rdx]

; Register only
vpmovsxwq xmm3, xmm0
vpmovsxwq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxbd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0000008700000088", "0x0000008500000086", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000008700000088", "0x0000008500000086", "0x0000004300000044", "0x0000004100000042"],
    "XMM3": ["0x0000008700000088", "0x0000008500000086", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000008700000088", "0x0000008500000086", "0x0000004300000044", "0x0000004100000042"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxbd xmm1, [rdx]
vpmovzxbd ymm2, [rdx]

; Register only
vpmovzxbd xmm3, xmm0
vpmovzxbd ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxbq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0000000000000088", "0x0000000000000087", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000000000000088", "0x0000000000000087", "0x0000000000000086", "0x0000000000000085"],
    "XMM3": ["0x0000000000000088", "0x0000000000000087", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000000000000088", "0x0000000000000087", "0x0000000000000086", "0x0000000000000085"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxbq xmm1, [rdx]
vpmovzxbq ymm2, [rdx]

; Register only
vpmovzxbq xmm3, xmm0
vpmovzxbq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxbw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0085008600870088", "0x0041004200430044", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0085008600870088", "0x0041004200430044", "0x0055005600570058", "0x0051005200530054"],
    "XMM3": ["0x0085008600870088", "0x0041004200430044", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0085008600870088", "0x0041004200430044", "0x0055005600570058", "0x0051005200530054"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxbw xmm1, [rdx]
vpmovzxbw ymm2, [rdx]

; Register only
vpmovzxbw xmm3, xmm0
vpmovzxbw ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0000000085868788", "0x0000000041424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000000085868788", "0x0000000041424344", "0x0000000055565758", "0x0000000051525354"],
    "XMM3": ["0x0000000085868788", "0x0000000041424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000000085868788", "0x0000000041424344", "0x0000000055565758", "0x0000000051525354"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxdq xmm1, [rdx]
vpmovzxdq ymm2, [rdx]

; Register only
vpmovzxdq xmm3, xmm0
vpmovzxdq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxwd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0000858600008788", "0x0000414200004344", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000858600008788", "0x0000414200004344", "0x0000555600005758", "0x0000515200005354"],
    "XMM3": ["0x0000858600008788", "0x0000414200004344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000858600008788", "0x0000414200004344", "0x0000555600005758", "0x0000515200005354"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxwd xmm1, [rdx]
vpmovzxwd ymm2, [rdx]

; Register only
vpmovzxwd xmm3, xmm0
vpmovzxwd ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmovzxwq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434485868788", "0x5152535455565758", "0x4142434485868788", "0x5152535455565758"],
    "XMM1": ["0x0000000000008788", "0x0000000000008586", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x0000000000008788", "0x0000000000008586", "0x0000000000004344", "0x0000000000004142"],
    "XMM3": ["0x0000000000008788", "0x0000000000008586", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000000000008788", "0x0000000000008586", "0x0000000000004344", "0x0000000000004142"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

; Memory operands
vpmovzxwq xmm1, [rdx]
vpmovzxwq ymm2, [rdx]

; Register only
vpmovzxwq xmm3, xmm0
vpmovzxwq ymm4, xmm0

hlt

align 32
.data:
dq 0x4142434485868788
dq 0x5152535455565758
dq 0x4142434485868788
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpmuldq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0xEE65166050AC19A0", "0xFE1EB34A32B1A0B2", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x28A18CDD2D20FB20", "0x1D6FA69C44CAED04", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xF514CF89A88EDCDE", "0x01E3DC4237BECFCF", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0004B0350897F35A", "0x03CD750E809C18D0", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x066A5FA4AD5148C8", "0x00BCA2DA387E55A2", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x1E0F03011112ED90", "0x18C90F3EC0D58440", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0xEE94B334B2358DF2", "0x1B82409D7AE7FA28", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0xED12F34E8FB5E098", "0xD83D0BA0FF8632DB", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]
vmovaps ymm3, [rdx + 32 * 2]
vmovaps ymm4, [rdx + 32 * 3]
vmovaps ymm5, [rdx + 32 * 4]
vmovaps ymm6, [rdx + 32 * 5]
vmovaps ymm7, [rdx + 32 * 6]
vmovaps ymm8, [rdx + 32 * 7]

vpmuldq xmm1, xmm1, [rdx + 32 * 8]
vpmuldq xmm2, xmm2, [rdx + 32 * 9]
vpmuldq xmm3, xmm3, [rdx + 32 * 10]
vpmuldq xmm4, xmm4, [rdx + 32 * 11]
vpmuldq xmm9, xmm5, [rdx + 32 * 12]
vpmuldq xmm10, xmm6, [rdx + 32 * 13]
vpmuldq xmm11, xmm7, [rdx + 32 * 14]
vpmuldq xmm12, xmm8, [rdx + 32 * 15]

hlt

align 32
; 256bytes of random data
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE

dq 0x6C8BABD754A8356E
dq 0x277EA625CA925F77
dq 0x6C8BABD754A8356E
dq 0x277EA625CA925F77

dq 0x6A6FD695EC73CDC7
dq 0xDDA1B927BBF2AEBB
dq 0x6A6FD695EC73CDC7
dq 0xDDA1B927BBF2AEBB

dq 0x88312CD5C7D14D73
dq 0x7F091E1EFDDBE7FE
dq 0x88312CD5C7D14D73
dq 0x7F091E1EFDDBE7FE

dq 0xF29AE6EF954EFA14
dq 0x8273A8A49A6242A0
dq 0xF29AE6EF954EFA14
dq 0x8273A8A49A6242A0

dq 0x3212073882160F0E
dq 0xB3780763C1923507
dq 0x3212073882160F0E
dq 0xB3780763C1923507

dq 0x462A372B571946CB
dq 0xA38DCD3D790E041F
dq 0x462A372B571946CB
dq 0xA38DCD3D790E041F

dq 0x3057BAAB2F86F32B
dq 0xEF3F4F46F02CD62E
dq 0x3057BAAB2F86F32B
dq 0xEF3F4F46F02CD62E

dq 0xDE3C4B3485BBD1EF
dq 0x9DE3718DB9A3489E
dq 0xDE3C4B3485BBD1EF
dq 0x9DE3718DB9A3489E

dq 0x9D50328ADEFB7209
dq 0xEEF7EB52F6F19869
dq 0x9D50328ADEFB7209
dq 0xEEF7EB52F6F19869

dq 0xCE021C30FFC299D6
dq 0xA60E9C56F1B20570
dq 0xCE021C30FFC299D6
dq 0xA60E9C56F1B20570

dq 0x30763886E2C46218
dq 0xEB535D0EA7E4A12F
dq 0x30763886E2C46218
dq 0xEB535D0EA7E4A12F

dq 0x6802E8E1B7E04514
dq 0x46EBF28FC18EFE1A
dq 0x6802E8E1B7E04514
dq 0x46EBF28FC18EFE1A

dq 0x032E9746236A5D7F
dq 0xAC5976548F321298
dq 0x032E9746236A5D7F
dq 0xAC5976548F321298

dq 0xB6D30C71C85F76C8
dq 0x881D2CA6ABEA19C5
dq 0xB6D30C71C85F76C8
dq 0x881D2CA6ABEA19C5


================================================
FILE: unittests/ASM/VEX/vpmuldq_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0xEE65166050AC19A0", "0xFE1EB34A32B1A0B2", "0xEE65166050AC19A0", "0xFE1EB34A32B1A0B2"],
    "XMM2":  ["0x28A18CDD2D20FB20", "0x1D6FA69C44CAED04", "0x28A18CDD2D20FB20", "0x1D6FA69C44CAED04"],
    "XMM3":  ["0xF514CF89A88EDCDE", "0x01E3DC4237BECFCF", "0xF514CF89A88EDCDE", "0x01E3DC4237BECFCF"],
    "XMM4":  ["0x0004B0350897F35A", "0x03CD750E809C18D0", "0x0004B0350897F35A", "0x03CD750E809C18D0"],
    "XMM9":  ["0x066A5FA4AD5148C8", "0x00BCA2DA387E55A2", "0x066A5FA4AD5148C8", "0x00BCA2DA387E55A2"],
    "XMM10": ["0x1E0F03011112ED90", "0x18C90F3EC0D58440", "0x1E0F03011112ED90", "0x18C90F3EC0D58440"],
    "XMM11": ["0xEE94B334B2358DF2", "0x1B82409D7AE7FA28", "0xEE94B334B2358DF2", "0x1B82409D7AE7FA28"],
    "XMM12": ["0xED12F34E8FB5E098", "0xD83D0BA0FF8632DB", "0xED12F34E8FB5E098", "0xD83D0BA0FF8632DB"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]
vmovaps ymm3, [rdx + 32 * 2]
vmovaps ymm4, [rdx + 32 * 3]
vmovaps ymm5, [rdx + 32 * 4]
vmovaps ymm6, [rdx + 32 * 5]
vmovaps ymm7, [rdx + 32 * 6]
vmovaps ymm8, [rdx + 32 * 7]

vpmuldq ymm1, ymm1, [rdx + 32 * 8]
vpmuldq ymm2, ymm2, [rdx + 32 * 9]
vpmuldq ymm3, ymm3, [rdx + 32 * 10]
vpmuldq ymm4, ymm4, [rdx + 32 * 11]
vpmuldq ymm9, ymm5, [rdx + 32 * 12]
vpmuldq ymm10, ymm6, [rdx + 32 * 13]
vpmuldq ymm11, ymm7, [rdx + 32 * 14]
vpmuldq ymm12, ymm8, [rdx + 32 * 15]

hlt

align 32
; 256bytes of random data
.data:
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F
dq 0xA76C4F06A12BFCE0
dq 0x9B80767F1E6A060F

dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE
dq 0x6868C3F3AAED56E0
dq 0xF0FCE9E294E6E6DE

dq 0x6C8BABD754A8356E
dq 0x277EA625CA925F77
dq 0x6C8BABD754A8356E
dq 0x277EA625CA925F77

dq 0x6A6FD695EC73CDC7
dq 0xDDA1B927BBF2AEBB
dq 0x6A6FD695EC73CDC7
dq 0xDDA1B927BBF2AEBB

dq 0x88312CD5C7D14D73
dq 0x7F091E1EFDDBE7FE
dq 0x88312CD5C7D14D73
dq 0x7F091E1EFDDBE7FE

dq 0xF29AE6EF954EFA14
dq 0x8273A8A49A6242A0
dq 0xF29AE6EF954EFA14
dq 0x8273A8A49A6242A0

dq 0x3212073882160F0E
dq 0xB3780763C1923507
dq 0x3212073882160F0E
dq 0xB3780763C1923507

dq 0x462A372B571946CB
dq 0xA38DCD3D790E041F
dq 0x462A372B571946CB
dq 0xA38DCD3D790E041F

dq 0x3057BAAB2F86F32B
dq 0xEF3F4F46F02CD62E
dq 0x3057BAAB2F86F32B
dq 0xEF3F4F46F02CD62E

dq 0xDE3C4B3485BBD1EF
dq 0x9DE3718DB9A3489E
dq 0xDE3C4B3485BBD1EF
dq 0x9DE3718DB9A3489E

dq 0x9D50328ADEFB7209
dq 0xEEF7EB52F6F19869
dq 0x9D50328ADEFB7209
dq 0xEEF7EB52F6F19869

dq 0xCE021C30FFC299D6
dq 0xA60E9C56F1B20570
dq 0xCE021C30FFC299D6
dq 0xA60E9C56F1B20570

dq 0x30763886E2C46218
dq 0xEB535D0EA7E4A12F
dq 0x30763886E2C46218
dq 0xEB535D0EA7E4A12F

dq 0x6802E8E1B7E04514
dq 0x46EBF28FC18EFE1A
dq 0x6802E8E1B7E04514
dq 0x46EBF28FC18EFE1A

dq 0x032E9746236A5D7F
dq 0xAC5976548F321298
dq 0x032E9746236A5D7F
dq 0xAC5976548F321298

dq 0xB6D30C71C85F76C8
dq 0x881D2CA6ABEA19C5
dq 0xB6D30C71C85F76C8
dq 0x881D2CA6ABEA19C5


================================================
FILE: unittests/ASM/VEX/vpmulhrsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x31A6343B36E09E7A", "0x48134B294E4F5186"],
    "XMM5": ["0x31A6343B36E09E7A", "0x48134B294E4F5186", "0x31A6343B36E09E7A", "0x48134B294E4F5186"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpmulhrsw xmm2, xmm0, xmm1
vpmulhrsw xmm3, xmm0, [rdx + 32]

vpmulhrsw ymm4, ymm0, ymm1
vpmulhrsw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445468748
dq 0x5152535455565758
dq 0x4142434445468748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpmulhuw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x18D21A1D1B701CCA", "0x24092594272728C2", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x18D21A1D1B701CCA", "0x24092594272728C2", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x18D21A1D1B701CCA", "0x24092594272728C2", "0x18D21A1D1B701CCA", "0x24092594272728C2"],
    "XMM5":  ["0x18D21A1D1B701CCA", "0x24092594272728C2", "0x18D21A1D1B701CCA", "0x24092594272728C2"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmulhuw xmm2, xmm0, xmm1
vpmulhuw xmm3, xmm0, [rdx + 32]

vpmulhuw ymm4, ymm0, ymm1
vpmulhuw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpmulhw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2", "0x18D21A1D1B70CF3C", "0x24092594272728C2"],
    "XMM5":  ["0x18D21A1D1B70CF3C", "0x24092594272728C2", "0x18D21A1D1B70CF3C", "0x24092594272728C2"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmulhw xmm2, xmm0, xmm1
vpmulhw xmm3, xmm0, [rdx + 32]

vpmulhw ymm4, ymm0, ymm1
vpmulhw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445468748
dq 0x5152535455565758
dq 0x4142434445468748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpmulld.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0x7A84D3FA541EF1BE", "0x5F0D7667E4D8E24A", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x44683C4CE9AC9780", "0x9DA95E9A6F25EF94", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4BC94EA0CCB0A64C", "0x3CF36EE04F371510", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x1AC415407B8BA3DB", "0x92CDC300DAB0773C", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x6796B1563F8D578C", "0x4C64F16199291FE4", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x01A14EF664207DC6", "0x1D3220DA400E1027", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x75DDBA582C3DD348", "0xA5141C506D8C60D7", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x7873FF38FB240E0D", "0x6C154F1ADB67CD17", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x7A84D3FA541EF1BE", "0x5F0D7667E4D8E24A", "0x7A84D3FA541EF1BE", "0x5F0D7667E4D8E24A"],
    "XMM10": ["0x44683C4CE9AC9780", "0x9DA95E9A6F25EF94", "0x44683C4CE9AC9780", "0x9DA95E9A6F25EF94"],
    "XMM11": ["0x4BC94EA0CCB0A64C", "0x3CF36EE04F371510", "0x4BC94EA0CCB0A64C", "0x3CF36EE04F371510"],
    "XMM12": ["0x1AC415407B8BA3DB", "0x92CDC300DAB0773C", "0x1AC415407B8BA3DB", "0x92CDC300DAB0773C"],
    "XMM13": ["0x6796B1563F8D578C", "0x4C64F16199291FE4", "0x6796B1563F8D578C", "0x4C64F16199291FE4"],
    "XMM14": ["0x01A14EF664207DC6", "0x1D3220DA400E1027", "0x01A14EF664207DC6", "0x1D3220DA400E1027"],
    "XMM15": ["0x75DDBA582C3DD348", "0xA5141C506D8C60D7", "0x75DDBA582C3DD348", "0xA5141C506D8C60D7"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]
vmovaps ymm3, [rdx + 32 * 2]
vmovaps ymm4, [rdx + 32 * 3]
vmovaps ymm5, [rdx + 32 * 4]
vmovaps ymm6, [rdx + 32 * 5]
vmovaps ymm7, [rdx + 32 * 6]
vmovaps ymm8, [rdx + 32 * 7]

vmovaps ymm9, [rdx + 32 * 0]
vmovaps ymm10, [rdx + 32 * 1]
vmovaps ymm11, [rdx + 32 * 2]
vmovaps ymm12, [rdx + 32 * 3]
vmovaps ymm13, [rdx + 32 * 4]
vmovaps ymm14, [rdx + 32 * 5]
vmovaps ymm15, [rdx + 32 * 6]

vpmulld xmm1, xmm1, [rdx + 32 * 8]
vpmulld xmm2, xmm2, [rdx + 32 * 9]
vpmulld xmm3, xmm3, [rdx + 32 * 10]
vpmulld xmm4, xmm4, [rdx + 32 * 11]
vpmulld xmm5, xmm5, [rdx + 32 * 12]
vpmulld xmm6, xmm6, [rdx + 32 * 13]
vpmulld xmm7, xmm7, [rdx + 32 * 14]
vpmulld xmm8, xmm8, [rdx + 32 * 15]

vpmulld ymm9, ymm9, [rdx + 32 * 8]
vpmulld ymm10, ymm10, [rdx + 32 * 9]
vpmulld ymm11, ymm11, [rdx + 32 * 10]
vpmulld ymm12, ymm12, [rdx + 32 * 11]
vpmulld ymm13, ymm13, [rdx + 32 * 12]
vpmulld ymm14, ymm14, [rdx + 32 * 13]
vpmulld ymm15, ymm15, [rdx + 32 * 14]

hlt

align 32
.data:
dd 655.9708, 532.2244, 108.0451, 512.4019
dd 655.9708, 532.2244, 108.0451, 512.4019

dd 754.227 , 586.0859, 127.7574, 114.8167
dd 754.227 , 586.0859, 127.7574, 114.8167

dd 764.4266, 226.6145, 337.864 , 320.3296
dd 764.4266, 226.6145, 337.864 , 320.3296

dd 296.5247, 480.0057, 28.4267 , 565.9418
dd 296.5247, 480.0057, 28.4267 , 565.9418

dd 265.8255, 536.4473, 754.3489, 460.681
dd 265.8255, 536.4473, 754.3489, 460.681

dd 818.7269, 43.7204 , 464.592 , 847.9381
dd 818.7269, 43.7204 , 464.592 , 847.9381

dd 306.0592, 702.7584, 887.6473, 551.5908
dd 306.0592, 702.7584, 887.6473, 551.5908

dd 620.9001, 520.9829, 232.9532, 510.3388
dd 620.9001, 520.9829, 232.9532, 510.3388

dd 204.8474, 225.626 , 564.973 , 790.5175
dd 204.8474, 225.626 , 564.973 , 790.5175

dd 836.1953, 844.5266, 633.5626, 501.7409
dd 836.1953, 844.5266, 633.5626, 501.7409

dd 393.2616, 674.4415, 244.3265, 971.1598
dd 393.2616, 674.4415, 244.3265, 971.1598

dd 770.8029, 746.1836, 255.9902, 567.7578
dd 770.8029, 746.1836, 255.9902, 567.7578

dd 187.7175, 924.181 , 466.4362, 169.8267
dd 187.7175, 924.181 , 466.4362, 169.8267

dd 651.7481, 462.4206, 396.6924, 355.8538
dd 651.7481, 462.4206, 396.6924, 355.8538

dd 6.148   , 523.1443, 989.7004, 713.6646
dd 6.148   , 523.1443, 989.7004, 713.6646

dd 497.5427, 657.6965, 651.0534, 778.5236
dd 497.5427, 657.6965, 651.0534, 778.5236


================================================
FILE: unittests/ASM/VEX/vpmullw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940", "0xFD44929037E4ED40", "0x68847E10A3A4D940"],
    "XMM5":  ["0xFD44929037E4ED40", "0x68847E10A3A4D940", "0xFD44929037E4ED40", "0x68847E10A3A4D940"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmullw xmm2, xmm0, xmm1
vpmullw xmm3, xmm0, [rdx + 32]

vpmullw ymm4, ymm0, ymm1
vpmullw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpmuludq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x000000000003FFFC", "0x000000000000FFFE", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x000000000003FFFC", "0x000000000000FFFE", "0x000000000003FFFC", "0x000000000000FFFE"],
    "XMM4": ["0x000000000003FFFC", "0x000000000000FFFE", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x000000000003FFFC", "0x000000000000FFFE", "0x000000000003FFFC", "0x000000000000FFFE"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpmuludq xmm2, xmm0, [rdx + 32]
vpmuludq ymm3, ymm0, [rdx + 32]

vpmuludq xmm4, xmm0, xmm1
vpmuludq ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x414243440000FFFF
dq 0x5152535400007FFF
dq 0x414243440000FFFF
dq 0x5152535400007FFF

dq 0x6162636400000004
dq 0x7172737400000002
dq 0x6162636400000004
dq 0x7172737400000002


================================================
FILE: unittests/ASM/VEX/vpor.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM3": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0xEFEEEFEE75767778", "0x71727374FFFFFFFF"],
    "XMM5": ["0xCDCECFCC75767778", "0x71727374DDDFDFDD", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vpor ymm2, ymm0, ymm1
vpor xmm3, xmm0, xmm1

; With memory operand
vpor ymm4, ymm0, [rbx]
vpor xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpsadbw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x100", "0x100", "0x000", "0x000"],
    "XMM8":  ["0x327", "0x2F4", "0x000", "0x000"],
    "XMM9":  ["0x2BA", "0x27F", "0x000", "0x000"],
    "XMM10": ["0x2B1", "0x284", "0x000", "0x000"],
    "XMM11": ["0x295", "0x280", "0x000", "0x000"],
    "XMM12": ["0x190", "0x279", "0x000", "0x000"],
    "XMM13": ["0x29B", "0x2A8", "0x000", "0x000"],
    "XMM14": ["0x25B", "0x1EA", "0x000", "0x000"]
  }
}
%endif

lea rdx, [rel .reg_data]

vmovaps xmm0, [rdx + 8 * 2]
psadbw xmm0, [rdx + 8 * 0]

lea rdx, [rel .data]

vmovaps xmm1, [rdx + 16 * 0]
vmovaps xmm2, [rdx + 16 * 1]
vmovaps xmm3, [rdx + 16 * 2]
vmovaps xmm4, [rdx + 16 * 3]
vmovaps xmm5, [rdx + 16 * 4]
vmovaps xmm6, [rdx + 16 * 5]
vmovaps xmm7, [rdx + 16 * 6]

vpsadbw xmm8,  xmm1, [rdx + 16 * 8]
vpsadbw xmm9,  xmm2, [rdx + 16 * 9]
vpsadbw xmm10, xmm3, [rdx + 16 * 10]
vpsadbw xmm11, xmm4, [rdx + 16 * 11]
vpsadbw xmm12, xmm5, [rdx + 16 * 12]
vpsadbw xmm13, xmm6, [rdx + 16 * 13]
vpsadbw xmm14, xmm7, [rdx + 16 * 14]

hlt

align 16

.reg_data:
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778

.data:
dq 0xE0FC2BA1064F6CA7
dq 0x0F066A1E7F76809B

dq 0xE056EDAAF3C36868
dq 0xDEE6E694E2E9FCF0

dq 0x6E35A854D7AB8B6C
dq 0x775F92CA25A67E27

dq 0xC7CD73EC95D66F6A
dq 0xBBAEF2BB27B9A1DD

dq 0x734DD1C7D52C3188
dq 0xFEE7DBFD1E1E097F

dq 0x14FA4E95EFE69AF2
dq 0xA042629AA4A87382

dq 0x0E0F168238071232
dq 0x073592C1630778B3

dq 0xCB4619572B372A46
dq 0x1F040E793DCD8DA3

dq 0x2BF3862FABBA5730
dq 0x2ED62CF0464F3FEF

dq 0xEFD1BB85344B3CDE
dq 0x9E48A3B98D71E39D

dq 0x0972FBDE8A32509D
dq 0x6998F1F652EBF7EE

dq 0xD699C2FF301C02CE
dq 0x7005B2F1569C0EA6

dq 0x1862C4E286387630
dq 0x2FA1E4A70E5D53EB

dq 0x1445E0B7E1E80268
dq 0x1AFE8EC18FF2EB46

dq 0x7F5D6A2346972E03
dq 0x9812328F547659AC

dq 0xC8765FC8710CD3B6
dq 0xC519EAABA62C1D88


================================================
FILE: unittests/ASM/VEX/vpsadbw_256.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x100", "0x100", "0x080", "0x180"],
    "XMM8":  ["0x327", "0x21D", "0x3CA", "0x2BA"],
    "XMM9":  ["0x2BA", "0x3CA", "0x21D", "0x327"],
    "XMM10": ["0x2B1", "0x279", "0x284", "0x1F8"],
    "XMM11": ["0x295", "0x306", "0x280", "0x27B"],
    "XMM12": ["0x245", "0x235", "0x279", "0x42E"],
    "XMM13": ["0x29B", "0x139", "0x35B", "0x396"],
    "XMM14": ["0x25B", "0x390", "0x1EA", "0x2F9"]
  }
}
%endif

lea rdx, [rel .reg_data]

vmovaps ymm0, [rdx + 32]
vpsadbw ymm0, ymm0, [rdx]

lea rdx, [rel .data]

vmovaps ymm1, [rdx + 32 * 0]
vmovaps ymm2, [rdx + 32 * 1]
vmovaps ymm3, [rdx + 32 * 2]
vmovaps ymm4, [rdx + 32 * 3]
vmovaps ymm5, [rdx + 32 * 4]
vmovaps ymm6, [rdx + 32 * 5]
vmovaps ymm7, [rdx + 32 * 6]

vpsadbw ymm8,  ymm1, [rdx + 32 * 8]
vpsadbw ymm9,  ymm2, [rdx + 32 * 9]
vpsadbw ymm10, ymm3, [rdx + 32 * 10]
vpsadbw ymm11, ymm4, [rdx + 32 * 11]
vpsadbw ymm12, ymm5, [rdx + 32 * 12]
vpsadbw ymm13, ymm6, [rdx + 32 * 13]
vpsadbw ymm14, ymm7, [rdx + 32 * 14]

hlt

align 32

.reg_data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x5152535455565758
dq 0x4142434445464748

.data:
dq 0xE0FC2BA1064F6CA7
dq 0x0F066A1E7F76809B
dq 0xDEE6E694E2E9FCF0
dq 0xE056EDAAF3C36868

dq 0xE056EDAAF3C36868
dq 0xDEE6E694E2E9FCF0
dq 0x0F066A1E7F76809B
dq 0xE0FC2BA1064F6CA7

dq 0x6E35A854D7AB8B6C
dq 0xC7CD73EC95D66F6A
dq 0x775F92CA25A67E27
dq 0xBBAEF2BB27B9A1DD

dq 0xC7CD73EC95D66F6A
dq 0x6E35A854D7AB8B6C
dq 0xBBAEF2BB27B9A1DD
dq 0x775F92CA25A67E27

dq 0x734DD1C7D52C3188
dq 0xA042629AA4A87382
dq 0xFEE7DBFD1E1E097F
dq 0x14FA4E95EFE69AF2

dq 0x14FA4E95EFE69AF2
dq 0xA042629AA4A87382
dq 0x734DD1C7D52C3188
dq 0xFEE7DBFD1E1E097F

dq 0x0E0F168238071232
dq 0xCB4619572B372A46
dq 0x073592C1630778B3
dq 0x1F040E793DCD8DA3

dq 0xCB4619572B372A46
dq 0x0E0F168238071232
dq 0x1F040E793DCD8DA3
dq 0x073592C1630778B3

dq 0x2BF3862FABBA5730
dq 0x9E48A3B98D71E39D
dq 0x2ED62CF0464F3FEF
dq 0xEFD1BB85344B3CDE

dq 0xEFD1BB85344B3CDE
dq 0x2ED62CF0464F3FEF
dq 0x9E48A3B98D71E39D
dq 0x2BF3862FABBA5730

dq 0x0972FBDE8A32509D
dq 0x7005B2F1569C0EA6
dq 0x6998F1F652EBF7EE
dq 0xD699C2FF301C02CE

dq 0xD699C2FF301C02CE
dq 0x6998F1F652EBF7EE
dq 0x7005B2F1569C0EA6
dq 0x0972FBDE8A32509D

dq 0xC8765FC8710CD3B6
dq 0x1862C4E286387630
dq 0x2FA1E4A70E5D53EB
dq 0xC519EAABA62C1D88

dq 0x1445E0B7E1E80268
dq 0x9812328F547659AC
dq 0x1AFE8EC18FF2EB46
dq 0x7F5D6A2346972E03

dq 0x7F5D6A2346972E03
dq 0x1445E0B7E1E80268
dq 0x9812328F547659AC
dq 0x1AFE8EC18FF2EB46

dq 0x1445E0B7E1E80268
dq 0xC8765FC8710CD3B6
dq 0xC519EAABA62C1D88
dq 0x1AFE8EC18FF2EB46


================================================
FILE: unittests/ASM/VEX/vpshufb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM5":  ["0x4848484848484848", "0x4848484848484848", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4847464544434241", "0x5857565554535251", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4848484848484848", "0x4848484848484848", "0x5858585858585858", "0x5858585858585858"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x4847464544434241", "0x5857565554535251", "0x5847464544434241", "0x4857565554535251"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]
vmovaps ymm3, [rdx + 32 * 3]
vmovaps ymm4, [rdx + 32 * 4]

vpshufb xmm5, xmm0, xmm1
vpshufb xmm6, xmm0, xmm2
vpshufb xmm7, xmm0, xmm3
vpshufb xmm8, xmm0, xmm4

vpshufb ymm9,  ymm0, ymm1
vpshufb ymm10, ymm0, ymm2
vpshufb ymm11, ymm0, ymm3
vpshufb ymm12, ymm0, ymm4

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464758
dq 0x5152535455565748

dq 0
dq 0
dq 0
dq 0

dq -1
dq -1
dq -1
dq -1

dq 0x8080808080808080
dq 0x8080808080808080
dq 0x8080808080808080
dq 0x8080808080808080

dq 0x0001020304050607
dq 0x08090A0B0C0D0E0F
dq 0x0001020304050607
dq 0x08090A0B0C0D0E0F


================================================
FILE: unittests/ASM/VEX/vpshufd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0xFFFFCCCCDDDDEEEE", "0xAAAABBBB88889999"],
    "XMM2": ["0x4546474845464748", "0x4546474845464748", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7172737471727374", "0x7172737471727374", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4546474845464748", "0x4546474845464748", "0xCCCCDDDDCCCCDDDD", "0xCCCCDDDDCCCCDDDD"],
    "XMM5": ["0x7172737471727374", "0x7172737471727374", "0xAAAABBBBAAAABBBB", "0xAAAABBBBAAAABBBB"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x5556575845464748", "0x5556575845464748", "0x11112222CCCCDDDD", "0x11112222CCCCDDDD"],
    "XMM9": ["0x7576777865666768", "0x7576777865666768", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpshufd xmm2, xmm0, 0x0
vpshufd xmm3, xmm1, 0xFF

vpshufd ymm4, ymm0, 0x0
vpshufd ymm5, ymm1, 0xFF

; Shouldn't modify vector (selector is [3, 2, 1, 0])
; Which would effectively place elements in their
; same location
vpshufd ymm6, ymm0, 0b11100100
vpshufd xmm7, xmm1, 0b11100100

; [2, 0, 2, 0] shuffling
vpshufd ymm8, ymm0, 0b10001000
vpshufd xmm9, xmm1, 0b10001000

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF11112222

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFFCCCCDDDDEEEE
dq 0xAAAABBBB88889999


================================================
FILE: unittests/ASM/VEX/vpshufhw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0xFFFFCCCCDDDDEEEE", "0xAAAABBBB88889999"],
    "XMM2": ["0x4142434445464748", "0x5758575857585758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6162636465666768", "0x7172717271727172", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4142434445464748", "0x5758575857585758", "0xAAAABBBBCCCCDDDD", "0x2222222222222222"],
    "XMM5": ["0x6162636465666768", "0x7172717271727172", "0xFFFFCCCCDDDDEEEE", "0xAAAAAAAAAAAAAAAA"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434445464748", "0x5354575853545758", "0xAAAABBBBCCCCDDDD", "0xFFFF2222FFFF2222"],
    "XMM9": ["0x6162636465666768", "0x7374777873747778", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpshufhw xmm2, xmm0, 0x0
vpshufhw xmm3, xmm1, 0xFF

vpshufhw ymm4, ymm0, 0x0
vpshufhw ymm5, ymm1, 0xFF

; Shouldn't modify vector (selector is [3, 2, 1, 0])
; Which would effectively place elements in their
; same location
vpshufhw ymm6, ymm0, 0b11100100
vpshufhw xmm7, xmm1, 0b11100100

; [2, 0, 2, 0] shuffling
vpshufhw ymm8, ymm0, 0b10001000
vpshufhw xmm9, xmm1, 0b10001000

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF11112222

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFFCCCCDDDDEEEE
dq 0xAAAABBBB88889999


================================================
FILE: unittests/ASM/VEX/vpshuflw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM1": ["0x6162636465666768", "0x7172737475767778", "0xFFFFCCCCDDDDEEEE", "0xAAAABBBB88889999"],
    "XMM2": ["0x4748474847484748", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6162616261626162", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4748474847484748", "0x5152535455565758", "0xDDDDDDDDDDDDDDDD", "0xEEEEFFFF11112222"],
    "XMM5": ["0x6162616261626162", "0x7172737475767778", "0xFFFFFFFFFFFFFFFF", "0xAAAABBBB88889999"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0xAAAABBBBCCCCDDDD", "0xEEEEFFFF11112222"],
    "XMM7": ["0x6162636465666768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4344474843444748", "0x5152535455565758", "0xBBBBDDDDBBBBDDDD", "0xEEEEFFFF11112222"],
    "XMM9": ["0x6364676863646768", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpshuflw xmm2, xmm0, 0x0
vpshuflw xmm3, xmm1, 0xFF

vpshuflw ymm4, ymm0, 0x0
vpshuflw ymm5, ymm1, 0xFF

; Shouldn't modify vector (selector is [3, 2, 1, 0])
; Which would effectively place elements in their
; same location
vpshuflw ymm6, ymm0, 0b11100100
vpshuflw xmm7, xmm1, 0b11100100

; [2, 0, 2, 0] shuffling
vpshuflw ymm8, ymm0, 0b10001000
vpshuflw xmm9, xmm1, 0b10001000

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xAAAABBBBCCCCDDDD
dq 0xEEEEFFFF11112222

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFFCCCCDDDDEEEE
dq 0xAAAABBBB88889999


================================================
FILE: unittests/ASM/VEX/vpsignb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFEFEFEFEFEFEFEFE", "0xFDFDFDFDFDFDFDFD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFEFEFEFE00000000", "0x03030303FD000300", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xFEFEFEFEFEFEFEFE", "0xFDFDFDFDFDFDFDFD", "0xFEFEFEFEFEFEFEFE", "0xFDFDFDFDFDFDFDFD"],
    "XMM7": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM8": ["0xFEFEFEFE00000000", "0x03030303FD000300", "0xFEFEFEFE00000000", "0x03030303FD000300"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

; Test with full zero
vpsignb xmm1, xmm0, [rdx + 32 * 0]
vpsignb ymm5, ymm0, [rdx + 32 * 0]

; Test with full negative
vpsignb xmm2, xmm0, [rdx + 32 * 1]
vpsignb ymm6, ymm0, [rdx + 32 * 1]

; Test with full positive
vpsignb xmm3, xmm0, [rdx + 32 * 2]
vpsignb ymm7, ymm0, [rdx + 32 * 2]

; Test a mix
vpsignb xmm4, xmm0, [rdx + 32 * 3]
vpsignb ymm8, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0101010101010101
dq 0x0101010101010101
dq 0x0101010101010101
dq 0x0101010101010101

dq 0xFFFFFFFF00000000
dq 0x01010101FF000100
dq 0xFFFFFFFF00000000
dq 0x01010101FF000100

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vpsignd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFDFDFDFEFDFDFDFE", "0xFCFCFCFDFCFCFCFD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFDFDFDFE00000000", "0x03030303FCFCFCFD", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xFDFDFDFEFDFDFDFE", "0xFCFCFCFDFCFCFCFD", "0xFDFDFDFEFDFDFDFE", "0xFCFCFCFDFCFCFCFD"],
    "XMM7": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM8": ["0xFDFDFDFE00000000", "0x03030303FCFCFCFD", "0xFDFDFDFE00000000", "0x03030303FCFCFCFD"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

; Test with full zero
vpsignd xmm1, xmm0, [rdx + 32 * 0]
vpsignd ymm5, ymm0, [rdx + 32 * 0]

; Test with full negative
vpsignd xmm2, xmm0, [rdx + 32 * 1]
vpsignd ymm6, ymm0, [rdx + 32 * 1]

; Test with full positive
vpsignd xmm3, xmm0, [rdx + 32 * 2]
vpsignd ymm7, ymm0, [rdx + 32 * 2]

; Test a mix
vpsignd xmm4, xmm0, [rdx + 32 * 3]
vpsignd ymm8, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001
dq 0x0000000100000001

dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF
dq 0xFFFFFFFF00000000
dq 0x00000001FFFFFFFF

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vpsignw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM1": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xFDFEFDFEFDFEFDFE", "0xFCFDFCFDFCFDFCFD", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0202020202020202", "0x0303030303030303", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xFDFEFDFE00000000", "0x03030303FCFD0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xFDFEFDFEFDFEFDFE", "0xFCFDFCFDFCFDFCFD", "0xFDFEFDFEFDFEFDFE", "0xFCFDFCFDFCFDFCFD"],
    "XMM7": ["0x0202020202020202", "0x0303030303030303", "0x0202020202020202", "0x0303030303030303"],
    "XMM8": ["0xFDFEFDFE00000000", "0x03030303FCFD0000", "0xFDFEFDFE00000000", "0x03030303FCFD0000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx + 32 * 4]

; Test with full zero
vpsignw xmm1, xmm0, [rdx + 32 * 0]
vpsignw ymm5, ymm0, [rdx + 32 * 0]

; Test with full negative
vpsignw xmm2, xmm0, [rdx + 32 * 1]
vpsignw ymm6, ymm0, [rdx + 32 * 1]

; Test with full positive
vpsignw xmm3, xmm0, [rdx + 32 * 2]
vpsignw ymm7, ymm0, [rdx + 32 * 2]

; Test a mix
vpsignw xmm4, xmm0, [rdx + 32 * 3]
vpsignw ymm8, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF
dq 0xFFFFFFFFFFFFFFFF

dq 0x0001000100010001
dq 0x0001000100010001
dq 0x0001000100010001
dq 0x0001000100010001

dq 0xFFFFFFFF00000000
dq 0x00010001FFFF0000
dq 0xFFFFFFFF00000000
dq 0x00010001FFFF0000

dq 0x0202020202020202
dq 0x0303030303030303
dq 0x0202020202020202
dq 0x0303030303030303


================================================
FILE: unittests/ASM/VEX/vpslld.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x4142434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4344000047480000", "0x7374000077780000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4344000047480000", "0x7374000077780000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM11": ["0x4344000047480000", "0x7374000077780000", "0x4344000047480000", "0x7374000077780000"],
    "XMM12": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM14": ["0x4344000047480000", "0x7374000077780000", "0x4344000047480000", "0x7374000077780000"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]

vpslld xmm4, xmm0, xmm1
vpslld xmm5, xmm0, xmm2
vpslld xmm6, xmm0, xmm3

vpslld xmm7, xmm0, [rdx + 32 * 1]
vpslld xmm8, xmm0, [rdx + 32 * 2]
vpslld xmm9, xmm0, [rdx + 32 * 3]

vpslld ymm10, ymm0, xmm1
vpslld ymm11, ymm0, xmm2
vpslld ymm12, ymm0, xmm3

vpslld ymm13, ymm0, [rdx + 32 * 1]
vpslld ymm14, ymm0, [rdx + 32 * 2]
vpslld ymm15, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x0000000000000000
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000010
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000020
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpslld_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6364000067680000", "0x7374000077780000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4243440046474800", "0x5253540056575800", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x6364000067680000", "0x7374000077780000", "0x6364000067680000", "0x7374000077780000"],
    "XMM8": ["0x4243440046474800", "0x5253540056575800", "0x4243440046474800", "0x5253540056575800"],
    "XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
    "XMM10": ["0x848688008c8e9000", "0xa4a6a800acaeb000", "0xc4c6c800ccced000", "0xe4e6e800eceef000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpslld xmm2, xmm0, 32
vpslld xmm3, xmm1, 16
vpslld xmm4, xmm0, 8
vpslld xmm5, xmm1, 1

vpslld ymm6, ymm0, 32
vpslld ymm7, ymm1, 16
vpslld ymm8, ymm0, 8
vpslld ymm9, ymm1, 1

vpslld ymm10, ymm10, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpslldq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0000000000000000", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4546474800000000", "0x5556575841424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x6263646566676800", "0x7273747576777861", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x0000000000000000", "0x6162636465666768", "0x0000000000000000", "0x6162636465666768"],
    "XMM8": ["0x4546474800000000", "0x5556575841424344", "0x4546474800000000", "0x5556575841424344"],
    "XMM9": ["0x6263646566676800", "0x7273747576777861", "0x6263646566676800", "0x7273747576777861"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpslldq xmm2, xmm0, 16
vpslldq xmm3, xmm1, 8
vpslldq xmm4, xmm0, 4
vpslldq xmm5, xmm1, 1

vpslldq ymm6, ymm0, 16
vpslldq ymm7, ymm1, 8
vpslldq ymm8, ymm0, 4
vpslldq ymm9, ymm1, 1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsllq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x4142434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4546474800000000", "0x7576777800000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4546474800000000", "0x7576777800000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM11": ["0x4546474800000000", "0x7576777800000000", "0x4546474800000000", "0x7576777800000000"],
    "XMM12": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x4142434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM14": ["0x4546474800000000", "0x7576777800000000", "0x4546474800000000", "0x7576777800000000"],
    "XMM15": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]

vpsllq xmm4, xmm0, xmm1
vpsllq xmm5, xmm0, xmm2
vpsllq xmm6, xmm0, xmm3

vpsllq xmm7, xmm0, [rdx + 32 * 1]
vpsllq xmm8, xmm0, [rdx + 32 * 2]
vpsllq xmm9, xmm0, [rdx + 32 * 3]

vpsllq ymm10, ymm0, xmm1
vpsllq ymm11, ymm0, xmm2
vpsllq ymm12, ymm0, xmm3

vpsllq ymm13, ymm0, [rdx + 32 * 1]
vpsllq ymm14, ymm0, [rdx + 32 * 2]
vpsllq ymm15, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x0000000000000000
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000020
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000040
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsllq_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x4546474800000000", "0x5556575800000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6364656667680000", "0x7374757677780000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4243444546474800", "0x5253545556575800", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x4546474800000000", "0x5556575800000000", "0x4546474800000000", "0x5556575800000000"],
    "XMM7": ["0x6364656667680000", "0x7374757677780000", "0x6364656667680000", "0x7374757677780000"],
    "XMM8": ["0x4243444546474800", "0x5253545556575800", "0x4243444546474800", "0x5253545556575800"],
    "XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
    "XMM10": ["0x8486888a8c8e9000", "0xa4a6a8aaacaeb000", "0xc4c6c8caccced000", "0xe4e6e8eaeceef000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsllq xmm2, xmm0, 32
vpsllq xmm3, xmm1, 16
vpsllq xmm4, xmm0, 8
vpsllq xmm5, xmm1, 1

vpsllq ymm6, ymm0, 32
vpsllq ymm7, ymm1, 16
vpsllq ymm8, ymm0, 8
vpsllq ymm9, ymm1, 1

vpsllq ymm10, ymm10, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsllvd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x00000000FFFFFF80", "0xFFC00000FFFFFC00", "0x0000000055555500", "0x0000000000000000"],
    "XMM3":  ["0x00000000FFFFFF80", "0xFFC00000FFFFFC00", "0x0000000055555500", "0x0000000000000000"],
    "XMM4":  ["0x00000000FFFFFF80", "0xFFC00000FFFFFC00", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x00000000FFFFFF80", "0xFFC00000FFFFFC00", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vpsllvd ymm2, ymm0, ymm1
vpsllvd ymm3, ymm0, [rdx + 32]

vpsllvd xmm4, xmm0, xmm1
vpsllvd xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0xFFFFFFFF7FFFFFFF

dq 0x0000000800000007
dq 0x000000160000000A
dq 0x0000000400000008
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpsllvq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x0000000000000000", "0xFFFFFFFFFFFF0000", "0x0000005555555500", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0xFFFFFFFFFFFF0000", "0x0000005555555500", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0xFFFFFFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0xFFFFFFFFFFFF0000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vpsllvq ymm2, ymm0, ymm1
vpsllvq ymm3, ymm0, [rdx + 32]

vpsllvq xmm4, xmm0, xmm1
vpsllvq xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0xFFFFFFFF7FFFFFFF

dq 0xFFFFFFFFFFFFFF10
dq 0x0000000000000010
dq 0x0000000000000008
dq 0x0000000000000040


================================================
FILE: unittests/ASM/VEX/vpsllw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0x828486888A8C8E90", "0xE2E4E6E8EAECEEF0", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4200440046004800", "0x7200740076007800", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x828486888A8C8E90", "0xE2E4E6E8EAECEEF0", "0x828486888A8C8E90", "0xE2E4E6E8EAECEEF0"],
    "XMM6":  ["0x4200440046004800", "0x7200740076007800", "0x4200440046004800", "0x7200740076007800"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]

vpsllw xmm1, xmm0, [rdx + 32 * 1]
vpsllw xmm2, xmm0, [rdx + 32 * 2]
vpsllw xmm3, xmm0, [rdx + 32 * 3]
vpsllw xmm4, xmm0, [rdx + 32 * 4]

vpsllw ymm5, ymm0, [rdx + 32 * 1]
vpsllw ymm6, ymm0, [rdx + 32 * 2]
vpsllw ymm7, ymm0, [rdx + 32 * 3]
vpsllw ymm8, ymm0, [rdx + 32 * 4]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x0000000000000001
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000008
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000010
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000020
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsllw_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4200440046004800", "0x5200540056005800", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4200440046004800", "0x5200540056005800", "0x4200440046004800", "0x5200540056005800"],
    "XMM9": ["0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0", "0xC2C4C6C8CACCCED0", "0xE2E4E6E8EAECEEF0"],
    "XMM10": ["0x840088008c009000", "0xa400a800ac00b000", "0xc400c800cc00d000", "0xe400e800ec00f000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsllw xmm2, xmm0, 32
vpsllw xmm3, xmm1, 16
vpsllw xmm4, xmm0, 8
vpsllw xmm5, xmm1, 1

vpsllw ymm6, ymm0, 32
vpsllw ymm7, ymm1, 16
vpsllw ymm8, ymm0, 8
vpsllw ymm9, ymm1, 1

vpsllw ymm10, ymm10, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsrad.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xFFFF804200004546", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0xFFFF804200004546", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM11": ["0xFFFF804200004546", "0x0000717200007576", "0x0000414200004546", "0x0000717200007576"],
    "XMM12": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM14": ["0xFFFF804200004546", "0x0000717200007576", "0x0000414200004546", "0x0000717200007576"],
    "XMM15": ["0xFFFFFFFF00000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]

vpsrad xmm4, xmm0, xmm1
vpsrad xmm5, xmm0, xmm2
vpsrad xmm6, xmm0, xmm3

vpsrad xmm7, xmm0, [rdx + 32 * 1]
vpsrad xmm8, xmm0, [rdx + 32 * 2]
vpsrad xmm9, xmm0, [rdx + 32 * 3]

vpsrad ymm10, ymm0, xmm1
vpsrad ymm11, ymm0, xmm2
vpsrad ymm12, ymm0, xmm3

vpsrad ymm13, ymm0, [rdx + 32 * 1]
vpsrad ymm14, ymm0, [rdx + 32 * 2]
vpsrad ymm15, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x8042434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x0000000000000000
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000010
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000020
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsrad_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000616200006566", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0041424300454647", "0x0051525300555657", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000616200006566", "0x0000717200007576", "0x0000616200006566", "0x0000717200007576"],
    "XMM10": ["0x0041424300454647", "0x0051525300555657", "0x0041424300454647", "0x0051525300555657"],
    "XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM13": ["0x0020a1210022a323", "0x0028a929002aab2b", "0x0030b1310032b333", "0x0038b939003abb3b"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm13, [rel .data2]

vpsrad xmm3, xmm0, 32
vpsrad xmm4, xmm1, 16
vpsrad xmm5, xmm0, 8
vpsrad xmm6, xmm1, 1
vpsrad xmm7, xmm2, 32

vpsrad ymm8, ymm0, 32
vpsrad ymm9, ymm1, 16
vpsrad ymm10, ymm0, 8
vpsrad ymm11, ymm1, 1
vpsrad ymm12, ymm2, 32

vpsrad ymm13, ymm13, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x8000800080008000
dq 0x7000700070007000
dq 0x8000800080008000
dq 0x7000700070007000

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsravd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
    "XMM3":  ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0400000000555555", "0xFFFFFFFF00000000"],
    "XMM4":  ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xFF80000000007FFF", "0x00000000FFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vpsravd ymm2, ymm0, ymm1
vpsravd ymm3, ymm0, [rdx + 32]

vpsravd xmm4, xmm0, xmm1
vpsravd xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0xFFFFFFFF7FFFFFFF

dq 0x0000000800000010
dq 0x0000002000000020
dq 0x0000000400000008
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpsraw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM4":  ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xFF80004300450047", "0x0071007300750077", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x8042434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0xFF80004300450047", "0x0071007300750077", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM11": ["0xFF80004300450047", "0x0071007300750077", "0x0041004300450047", "0x0071007300750077"],
    "XMM12": ["0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x8042434445464748", "0x7172737475767778", "0x4142434445464748", "0x7172737475767778"],
    "XMM14": ["0xFF80004300450047", "0x0071007300750077", "0x0041004300450047", "0x0071007300750077"],
    "XMM15": ["0xFFFF000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]

vpsraw xmm4, xmm0, xmm1
vpsraw xmm5, xmm0, xmm2
vpsraw xmm6, xmm0, xmm3

vpsraw xmm7, xmm0, [rdx + 32 * 1]
vpsraw xmm8, xmm0, [rdx + 32 * 2]
vpsraw xmm9, xmm0, [rdx + 32 * 3]

vpsraw ymm10, ymm0, xmm1
vpsraw ymm11, ymm0, xmm2
vpsraw ymm12, ymm0, xmm3

vpsraw ymm13, ymm0, [rdx + 32 * 1]
vpsraw ymm14, ymm0, [rdx + 32 * 2]
vpsraw ymm15, ymm0, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x8042434445464748
dq 0x7172737475767778
dq 0x4142434445464748
dq 0x7172737475767778

dq 0x0000000000000000
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000008
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x0000000000000010
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsraw_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM3":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0041004300450047", "0x0051005300550057", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0041004300450047", "0x0051005300550057", "0x0041004300450047", "0x0051005300550057"],
    "XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM12": ["0xFFFFFFFFFFFFFFFF", "0x0000000000000000", "0xFFFFFFFFFFFFFFFF", "0x0000000000000000"],
    "XMM13": ["0x0020002100220023", "0x00280029002a002b", "0x0030003100320033", "0x00380039003a003b"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm13, [rel .data2]

vpsraw xmm3, xmm0, 32
vpsraw xmm4, xmm1, 16
vpsraw xmm5, xmm0, 8
vpsraw xmm6, xmm1, 1
vpsraw xmm7, xmm2, 16

vpsraw ymm8, ymm0, 32
vpsraw ymm9, ymm1, 16
vpsraw ymm10, ymm0, 8
vpsraw ymm11, ymm1, 1
vpsraw ymm12, ymm2, 16
vpsraw ymm13, ymm13, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x8000800080008000
dq 0x7000700070007000
dq 0x8000800080008000
dq 0x7000700070007000

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsrld.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0041424300454647", "0x0051525300555657", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000414200004546", "0x0000515200005556", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM7":  ["0x0041424300454647", "0x0051525300555657", "0x0041424300454647", "0x0051525300555657"],
    "XMM8":  ["0x0000414200004546", "0x0000515200005556", "0x0000414200004546", "0x0000515200005556"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

vpsrld xmm1, xmm0, [rdx + 32 * 1]
vpsrld xmm2, xmm0, [rdx + 32 * 2]
vpsrld xmm3, xmm0, [rdx + 32 * 3]
vpsrld xmm4, xmm0, [rdx + 32 * 4]
vpsrld xmm5, xmm0, [rdx + 32 * 5]

vpsrld ymm6, ymm0, [rdx + 32 * 1]
vpsrld ymm7, ymm0, [rdx + 32 * 2]
vpsrld ymm8, ymm0, [rdx + 32 * 3]
vpsrld ymm9, ymm0, [rdx + 32 * 4]
vpsrld ymm10, ymm0, [rdx + 32 * 5]

vmovapd ymm11, [rdx + 32]

vpsrld xmm12, xmm0, xmm11
vpsrld ymm13, ymm0, xmm11

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x0000000000000001
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000008
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000010
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000020
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000040
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpsrld_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0000616200006566", "0x0000717200007576", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0041424300454647", "0x0051525300555657", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x0000616200006566", "0x0000717200007576", "0x0000616200006566", "0x0000717200007576"],
    "XMM8": ["0x0041424300454647", "0x0051525300555657", "0x0041424300454647", "0x0051525300555657"],
    "XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM10": ["0x0020a1210022a323", "0x0028a929002aab2b", "0x0030b1310032b333", "0x0038b939003abb3b"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsrld xmm2, xmm0, 32
vpsrld xmm3, xmm1, 16
vpsrld xmm4, xmm0, 8
vpsrld xmm5, xmm1, 1

vpsrld ymm6, ymm0, 32
vpsrld ymm7, ymm1, 16
vpsrld ymm8, ymm0, 8
vpsrld ymm9, ymm1, 1

vpsrld ymm10, ymm10, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsrldq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x41DEADBEEFBAD0DA", "0x0041414141414141", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x41BEEFDEADFAD0CA", "0x0041414141414141", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x41DEADBEEFBAD0DA", "0x0041414141414141", "0x41DEADBEEFBAD0DA", "0x0041414141414141"],
    "XMM5":  ["0x41BEEFDEADFAD0CA", "0x0041414141414141", "0x41DEADBEEFBAD0DA", "0x0041414141414141"],
    "XMM6":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4141414141414141", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4141414141414141", "0x0000000000000000", "0x4141414141414141", "0x0000000000000000"],
    "XMM10": ["0xDEADBEEFBAD0DAD1", "0x4141414141414141", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0xBEEFDEADFAD0CAD1", "0x4141414141414141", "0xDEADBEEFBAD0DAD1", "0x4141414141414141"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpsrldq xmm2, xmm0, 1
vpsrldq xmm3, xmm1, 1
vpsrldq ymm4, ymm0, 1
vpsrldq ymm5, ymm1, 1

vpsrldq xmm6, xmm0, 16
vpsrldq ymm7, ymm1, 16

vpsrldq xmm8, xmm0, 8
vpsrldq ymm9, ymm1, 8

vpsrldq xmm10, xmm0, 0
vpsrldq ymm11, ymm1, 0

hlt

align 32
.data:
dq 0xDEADBEEFBAD0DAD1
dq 0x4141414141414141
dq 0xDEADBEEFBAD0DAD1
dq 0x4141414141414141

dq 0xBEEFDEADFAD0CAD1
dq 0x4141414141414141
dq 0xDEADBEEFBAD0DAD1
dq 0x4141414141414141


================================================
FILE: unittests/ASM/VEX/vpsrlq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0041424344454647", "0x0051525354555657", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000414243444546", "0x0000515253545556", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000041424344", "0x0000000051525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM7":  ["0x0041424344454647", "0x0051525354555657", "0x0041424344454647", "0x0051525354555657"],
    "XMM8":  ["0x0000414243444546", "0x0000515253545556", "0x0000414243444546", "0x0000515253545556"],
    "XMM9":  ["0x0000000041424344", "0x0000000051525354", "0x0000000041424344", "0x0000000051525354"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

vpsrlq xmm1, xmm0, [rdx + 32 * 1]
vpsrlq xmm2, xmm0, [rdx + 32 * 2]
vpsrlq xmm3, xmm0, [rdx + 32 * 3]
vpsrlq xmm4, xmm0, [rdx + 32 * 4]
vpsrlq xmm5, xmm0, [rdx + 32 * 5]

vpsrlq ymm6, ymm0, [rdx + 32 * 1]
vpsrlq ymm7, ymm0, [rdx + 32 * 2]
vpsrlq ymm8, ymm0, [rdx + 32 * 3]
vpsrlq ymm9, ymm0, [rdx + 32 * 4]
vpsrlq ymm10, ymm0, [rdx + 32 * 5]

vmovapd ymm11, [rdx + 32]

vpsrlw xmm12, xmm0, xmm11
vpsrlw ymm13, ymm0, xmm11

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x0000000000000001
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000008
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000010
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000020
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000040
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpsrlq_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000041424344", "0x0000000051525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000616263646566", "0x0000717273747576", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0041424344454647", "0x0051525354555657", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000041424344", "0x0000000051525354", "0x0000000041424344", "0x0000000051525354"],
    "XMM9":  ["0x0000616263646566", "0x0000717273747576", "0x0000616263646566", "0x0000717273747576"],
    "XMM10": ["0x0041424344454647", "0x0051525354555657", "0x0041424344454647", "0x0051525354555657"],
    "XMM11": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM12": ["0x0020a121a222a323", "0x0028a929aa2aab2b", "0x0030b131b232b333", "0x0038b939ba3abb3b"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm12, [rel .data2]

vpsrlq xmm2, xmm0, 64
vpsrlq xmm3, xmm0, 32
vpsrlq xmm4, xmm1, 16
vpsrlq xmm5, xmm0, 8
vpsrlq xmm6, xmm1, 1

vpsrlq ymm7, ymm0, 64
vpsrlq ymm8, ymm0, 32
vpsrlq ymm9, ymm1, 16
vpsrlq ymm10, ymm0, 8
vpsrlq ymm11, ymm1, 1

vpsrlq ymm12, ymm12, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsrlvd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x0080000000007FFF", "0x0000003F00000000", "0x0400000000555555", "0x0000000000000000"],
    "XMM3":  ["0x0080000000007FFF", "0x0000003F00000000", "0x0400000000555555", "0x0000000000000000"],
    "XMM4":  ["0x0080000000007FFF", "0x0000003F00000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0080000000007FFF", "0x0000003F00000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vpsrlvd ymm2, ymm0, ymm1
vpsrlvd ymm3, ymm0, [rdx + 32]

vpsrlvd xmm4, xmm0, xmm1
vpsrlvd xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0xFFFFFFFF7FFFFFFF

dq 0x0000000800000010
dq 0x0000001600000020
dq 0x0000000400000008
dq 0xFFFFFFFFFFFFFFFF


================================================
FILE: unittests/ASM/VEX/vpsrlvq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x0000000000000000", "0x00000FFFFFFFFFFF", "0x0040000000555555", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0x00000FFFFFFFFFFF", "0x0040000000555555", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x00000FFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x00000FFFFFFFFFFF", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]

vpsrlvq ymm2, ymm0, ymm1
vpsrlvq ymm3, ymm0, [rdx + 32]

vpsrlvq xmm4, xmm0, xmm1
vpsrlvq xmm5, xmm0, [rdx + 32]

hlt

align 32
.data:
dq 0x800000007FFFFFFF
dq 0x0FFFFFFFFFFFFFFF
dq 0x4000000055555555
dq 0xFFFFFFFF7FFFFFFF

dq 0xFFFFFFFFFFFFFF10
dq 0x0000000000000010
dq 0x0000000000000008
dq 0x0000000000000040


================================================
FILE: unittests/ASM/VEX/vpsrlw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM1":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x0041004300450047", "0x0051005300550057", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"],
    "XMM7":  ["0x0041004300450047", "0x0051005300550057", "0x0041004300450047", "0x0051005300550057"],
    "XMM8":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM12": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x0000000000000000", "0x0000000000000000"],
    "XMM13": ["0x20A121A222A323A4", "0x28A929AA2AAB2BAC", "0x20A121A222A323A4", "0x28A929AA2AAB2BAC"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]

vpsrlw xmm1, xmm0, [rdx + 32 * 1]
vpsrlw xmm2, xmm0, [rdx + 32 * 2]
vpsrlw xmm3, xmm0, [rdx + 32 * 3]
vpsrlw xmm4, xmm0, [rdx + 32 * 4]
vpsrlw xmm5, xmm0, [rdx + 32 * 5]

vpsrlw ymm6, ymm0, [rdx + 32 * 1]
vpsrlw ymm7, ymm0, [rdx + 32 * 2]
vpsrlw ymm8, ymm0, [rdx + 32 * 3]
vpsrlw ymm9, ymm0, [rdx + 32 * 4]
vpsrlw ymm10, ymm0, [rdx + 32 * 5]

vmovapd ymm11, [rdx + 32]

vpsrlw xmm12, xmm0, xmm11
vpsrlw ymm13, ymm0, xmm11


hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x0000000000000001
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000008
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000010
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000020
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000

dq 0x0000000000000040
dq 0x0000000000000000
dq 0x0000000000000000
dq 0x0000000000000000


================================================
FILE: unittests/ASM/VEX/vpsrlw_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0041004300450047", "0x0051005300550057", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x0041004300450047", "0x0051005300550057", "0x0041004300450047", "0x0051005300550057"],
    "XMM9": ["0x30B131B232B333B4", "0x38B939BA3ABB3BBC", "0x30B131B232B333B4", "0x38B939BA3ABB3BBC"],
    "XMM10": ["0x0020002100220023", "0x00280029002a002b", "0x0030003100320033", "0x00380039003a003b"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]
vmovapd ymm10, [rel .data2]

vpsrlw xmm2, xmm0, 32
vpsrlw xmm3, xmm1, 16
vpsrlw xmm4, xmm0, 8
vpsrlw xmm5, xmm1, 1

vpsrlw ymm6, ymm0, 32
vpsrlw ymm7, ymm1, 16
vpsrlw ymm8, ymm0, 8
vpsrlw ymm9, ymm1, 1

vpsrlw ymm10, ymm10, 0x9

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0x4142434445464748, 0x5152535455565758, 0x6162636465666768, 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsubb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM2": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202020202020", "0x2020202020202020", "0x2020202020202020", "0x2020202020202020"],
    "XMM4": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202020202020", "0x2020202020202020", "0x2020202020202020", "0x2020202020202020"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vpsubb xmm2, xmm0, [rdx + 32]
vpsubb ymm3, ymm0, [rdx + 32]

; Register only
vpsubb xmm4, xmm0, xmm1
vpsubb ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x6162636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778

dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsubd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0xFFFFFFFF65666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0xFFFFFFFF45464748", "0x5152535455565758"],
    "XMM2": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202020202020", "0x2020202020202020", "0x0000000020202020", "0x2020202020202020"],
    "XMM4": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202020202020", "0x2020202020202020", "0x0000000020202020", "0x2020202020202020"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vpsubd xmm2, xmm0, [rdx + 32]
vpsubd ymm3, ymm0, [rdx + 32]

; Register only
vpsubd xmm4, xmm0, xmm1
vpsubd ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFFFFFF65666768
dq 0x7172737475767778

dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFF45464748
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsubq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0xFFFFFFFFFFFFFFFF", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0xFFFFFFFFFFFFFFFF", "0x5152535455565758"],
    "XMM2": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x2020202020202020"],
    "XMM4": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x2020202020202020"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vpsubq xmm2, xmm0, [rdx + 32]
vpsubq ymm3, ymm0, [rdx + 32]

; Register only
vpsubq xmm4, xmm0, xmm1
vpsubq ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFFFFFFFFFFFFFF
dq 0x7172737475767778

dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFFFFFFFFF
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vpsubsb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x8062636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x7FE0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7FE0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0"],
    "XMM4": ["0x7FE0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x7FE0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0", "0xE0E0E0E0E0E0E0E0"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpsubsb xmm2, xmm0, xmm1
vpsubsb ymm3, ymm0, ymm1

vpsubsb xmm4, xmm0, [rdx + 32]
vpsubsb ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x8062636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsubsw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"],
    "XMM1": ["0x8000636465666768", "0x7172737475767778", "0x6162636465666768", "0x7172737475767778"],
    "XMM2": ["0x7FFFDFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7FFFDFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0"],
    "XMM4": ["0x7FFFDFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x7FFFDFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0", "0xDFE0DFE0DFE0DFE0"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpsubsw xmm2, xmm0, xmm1
vpsubsw ymm3, ymm0, ymm1

vpsubsw xmm4, xmm0, [rdx + 32]
vpsubsw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x4142434445464748
dq 0x5152535455565758

dq 0x8000636465666768
dq 0x7172737475767778
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsubusb.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636445464748", "0x5152535455565758", "0x6162636445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434465666768", "0x7172737475767778", "0x4142434465666768", "0x7172737475767778"],
    "XMM2": ["0x2020202000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202000000000", "0x0000000000000000", "0x2020202000000000", "0x0000000000000000"],
    "XMM4": ["0x2020202000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202000000000", "0x0000000000000000", "0x2020202000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpsubusb xmm2, xmm0, xmm1
vpsubusb ymm3, ymm0, ymm1

vpsubusb xmm4, xmm0, [rdx + 32]
vpsubusb ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x6162636445464748
dq 0x5152535455565758
dq 0x6162636445464748
dq 0x5152535455565758

dq 0x4142434465666768
dq 0x7172737475767778
dq 0x4142434465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsubusw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636445464748", "0x5152535455565758", "0x6162636445464748", "0x5152535455565758"],
    "XMM1": ["0x4142434465666768", "0x7172737475767778", "0x4142434465666768", "0x7172737475767778"],
    "XMM2": ["0x2020202000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202000000000", "0x0000000000000000", "0x2020202000000000", "0x0000000000000000"],
    "XMM4": ["0x2020202000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202000000000", "0x0000000000000000", "0x2020202000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vpsubusw xmm2, xmm0, xmm1
vpsubusw ymm3, ymm0, ymm1

vpsubusw xmm4, xmm0, [rdx + 32]
vpsubusw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x6162636445464748
dq 0x5152535455565758
dq 0x6162636445464748
dq 0x5152535455565758

dq 0x4142434465666768
dq 0x7172737475767778
dq 0x4142434465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vpsubw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x6162636465666768", "0x7172737475767778", "0xFFFF636465666768", "0x7172737475767778"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0xFFFF434445464748", "0x5152535455565758"],
    "XMM2": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x2020202020202020", "0x2020202020202020", "0x0000202020202020", "0x2020202020202020"],
    "XMM4": ["0x2020202020202020", "0x2020202020202020", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x2020202020202020", "0x2020202020202020", "0x0000202020202020", "0x2020202020202020"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

; Memory operand
vpsubw xmm2, xmm0, [rdx + 32]
vpsubw ymm3, ymm0, [rdx + 32]

; Register only
vpsubw xmm4, xmm0, xmm1
vpsubw ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x6162636465666768
dq 0x7172737475767778
dq 0xFFFF636465666768
dq 0x7172737475767778

dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFF434445464748
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vptest.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R15":  "0x000000E9D67A759",
    "XMM0": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

; Uses AX and BX and stores result in r15
; CF:ZF
%macro zfcfmerge 0
  lahf

  ; Shift CF to zero
  shr ax, 8

  ; Move to a temp
  mov bx, ax
  and rbx, 1

  shl r15, 1
  or r15, rbx

  shl r15, 1

  ; Move to a temp
  mov bx, ax

  ; Extract ZF
  shr bx, 6
  and rbx, 1

  ; Insert ZF
  or r15, rbx
%endmacro

%macro tests 1
  vptest %{1}0, [rdx + 32 * 3]
  zfcfmerge
  vptest %{1}1, [rdx + 32 * 4]
  zfcfmerge
  vptest %{1}2, [rdx + 32 * 5]
  zfcfmerge
  vptest %{1}0, [rdx + 32 * 6]
  zfcfmerge
  vptest %{1}1, [rdx + 32 * 7]
  zfcfmerge
  vptest %{1}2, [rdx + 32 * 8]
  zfcfmerge
  vptest %{1}0, [rdx + 32 * 9]
  zfcfmerge
  vptest %{1}1, [rdx + 32 * 10]
  zfcfmerge
  vptest %{1}2, [rdx + 32 * 11]
  zfcfmerge
%endmacro

lea rdx, [rel .data]

mov rax, 0
mov rbx, 0
mov r15, 0

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]

tests xmm
tests ymm

hlt

align 32
.data:
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match on not
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7, 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7

; No match on either case
dq 1, 1, 1, 1
dq 2, 2, 2, 2
dq 3, 3, 3, 3


================================================
FILE: unittests/ASM/VEX/vpunpckhbw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x7555765677577858", "0x7151725273537454", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7555765677577858", "0x7151725273537454", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x7555765677577858", "0x7151725273537454", "0x1199119900880088", "0x33BB33BB22AA22AA"],
    "XMM5": ["0x7555765677577858", "0x7151725273537454", "0x1199119900880088", "0x33BB33BB22AA22AA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpckhbw xmm2, xmm0, xmm1
vpunpckhbw xmm3, xmm0, [rdx + 32]

vpunpckhbw ymm4, ymm0, ymm1
vpunpckhbw ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFEEEEDDDDCCCC
dq 0xBBBBAAAA99998888

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x7777666655554444
dq 0x3333222211110000


================================================
FILE: unittests/ASM/VEX/vpunpckhdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x7576777855565758", "0x7172737451525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7576777855565758", "0x7172737451525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x7576777855565758", "0x7172737451525354", "0x88888888CCCCCCCC", "0x99999999DDDDDDDD"],
    "XMM5": ["0x7576777855565758", "0x7172737451525354", "0x88888888CCCCCCCC", "0x99999999DDDDDDDD"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpckhdq xmm2, xmm0, xmm1
vpunpckhdq xmm3, xmm0, [rdx + 32]

vpunpckhdq ymm4, ymm0, ymm1
vpunpckhdq ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888


================================================
FILE: unittests/ASM/VEX/vpunpckhqdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x5152535455565758", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x5152535455565758", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x5152535455565758", "0x7172737475767778", "0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC"],
    "XMM5": ["0x5152535455565758", "0x7172737475767778", "0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpckhqdq xmm2, xmm0, xmm1
vpunpckhqdq xmm3, xmm0, [rdx + 32]

vpunpckhqdq ymm4, ymm0, ymm1
vpunpckhqdq ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBBBBBBBBB
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpunpckhwd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x7576555677785758", "0x7172515273745354", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7576555677785758", "0x7172515273745354", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x7576555677785758", "0x7172515273745354", "0x1111999900008888", "0x3333BBBB2222AAAA"],
    "XMM5": ["0x7576555677785758", "0x7172515273745354", "0x1111999900008888", "0x3333BBBB2222AAAA"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpckhwd xmm2, xmm0, xmm1
vpunpckhwd xmm3, xmm0, [rdx + 32]

vpunpckhwd ymm4, ymm0, ymm1
vpunpckhwd ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFEEEEDDDDCCCC
dq 0xBBBBAAAA99998888

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x7777666655554444
dq 0x3333222211110000


================================================
FILE: unittests/ASM/VEX/vpunpcklbw.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x6545664667476848", "0x6141624263436444", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6545664667476848", "0x6141624263436444", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x6545664667476848", "0x6141624263436444", "0x55DD55DD44CC44CC", "0x77FF77FF66EE66EE"],
    "XMM5": ["0x6545664667476848", "0x6141624263436444", "0x55DD55DD44CC44CC", "0x77FF77FF66EE66EE"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpcklbw xmm2, xmm0, [rdx + 32]
vpunpcklbw xmm3, xmm0, xmm1

vpunpcklbw ymm4, ymm0, [rdx + 32]
vpunpcklbw ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFEEEEDDDDCCCC
dq 0xBBBBAAAA99998888

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x7777666655554444
dq 0x3333222211110000


================================================
FILE: unittests/ASM/VEX/vpunpckldq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x6566676845464748", "0x6162636441424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6566676845464748", "0x6162636441424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x6566676845464748", "0x6162636441424344", "0xAAAAAAAAEEEEEEEE", "0xBBBBBBBBFFFFFFFF"],
    "XMM5": ["0x6566676845464748", "0x6162636441424344", "0xAAAAAAAAEEEEEEEE", "0xBBBBBBBBFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpckldq xmm2, xmm0, [rdx + 32]
vpunpckldq xmm3, xmm0, xmm1

vpunpckldq ymm4, ymm0, [rdx + 32]
vpunpckldq ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888


================================================
FILE: unittests/ASM/VEX/vpunpcklqdq.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x4142434445464748", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4142434445464748", "0x6162636465666768", "0xFFFFFFFFFFFFFFFF", "0xBBBBBBBBBBBBBBBB"],
    "XMM5": ["0x4142434445464748", "0x6162636465666768", "0xFFFFFFFFFFFFFFFF", "0xBBBBBBBBBBBBBBBB"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpcklqdq xmm2, xmm0, [rdx + 32]
vpunpcklqdq xmm3, xmm0, xmm1

vpunpcklqdq ymm4, ymm0, [rdx + 32]
vpunpcklqdq ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBBBBBBBBB
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vpunpcklwd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x6566454667684748", "0x6162414263644344", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6566454667684748", "0x6162414263644344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x6566454667684748", "0x6162414263644344", "0x5555DDDD4444CCCC", "0x7777FFFF6666EEEE"],
    "XMM5": ["0x6566454667684748", "0x6162414263644344", "0x5555DDDD4444CCCC", "0x7777FFFF6666EEEE"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vpunpcklwd xmm2, xmm0, [rdx + 32]
vpunpcklwd xmm3, xmm0, xmm1

vpunpcklwd ymm4, ymm0, [rdx + 32]
vpunpcklwd ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFEEEEDDDDCCCC
dq 0xBBBBAAAA99998888

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x7777666655554444
dq 0x3333222211110000


================================================
FILE: unittests/ASM/VEX/vpxor.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM3": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM5": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vpxor ymm2, ymm0, ymm1
vpxor xmm3, xmm0, xmm1

; With memory operand
vpxor ymm4, ymm0, [rbx]
vpxor xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vrcpps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R8": "1",
    "R9": "1"
  }
}
%endif


section .text
global _start

%include "checkprecision.mac"

; clobbers ymm15
; returns the comparison result in rax
%macro same_pdwords 1 ; receives the ymms register
    vextractf128 xmm15, %1, 0
    vmovd eax, xmm15
    vmovd xmm15, eax
    vbroadcastss ymm15, xmm15  ; broadcast lower 32bits across all 8 lanes
    vpcmpeqd ymm15, ymm15, %1  ; equality mask on all lanes
    vmovmskps eax, ymm15       ; gets sign bit of each lane into eax
    cmp eax, 0b11111111        ; check all 8 lanes
    sete al
    movzx rax, al
%endmacro

; clobbers xmm15
; returns the comparison result in rax
%macro same_pdwords_x 1 ; receives the xmms register
    movd eax, %1
    movd xmm15, eax
    pshufd xmm15, xmm15, 0 ; has the lower 32bits of %1 accross all lanes 
    pcmpeqd xmm15, %1 ; has equalty mask on all lanes
    movmskps eax, xmm15 ; gets sign bit of each lane into eax
    cmp eax, 0b1111
    sete al
    movzx rax, al
%endmacro

_start:
vmovapd ymm0, [rel arg1]
vmovapd ymm1, [rel arg2]

; Register only
vrcpps ymm2, ymm0
vrcpps xmm3, xmm0

; Memory operand
vrcpps ymm4, [rel arg2]
vrcpps xmm5, [rel arg2]

; Check that each register is properly filled
same_pdwords ymm2
mov r8, rax

same_pdwords_x xmm3
and r8, rax

same_pdwords ymm4
and r8, rax

same_pdwords_x xmm5
and r8, rax

; Result checks
vpextrd [rel result], xmm2, 0
check_relerr rel eresult1, rel result, rel tolerance
mov r9, rax

vpextrd [rel result], xmm3, 0
check_relerr rel eresult1, rel result, rel tolerance
and r9, rax

vpextrd [rel result], xmm4, 0
check_relerr rel eresult2, rel result, rel tolerance
and r9, rax

vpextrd [rel result], xmm5, 0
check_relerr rel eresult2, rel result, rel tolerance
and r9, rax
hlt

align 4096
result: times 4 dq 0

align 32
arg1:
dq 0x3F8000003F800000 ; 1.0
dq 0x3F8000003F800000
dq 0x3F8000003F800000
dq 0x3F8000003F800000

arg2:
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000
dq 0x4080000040800000
dq 0x4080000040800000

eresult1:
dd 0x3F800000 ; 1.0

eresult2:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/VEX/vrcpss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R9": "1",
    "XMM0": ["0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000"],
    "XMM1": ["0x4080000040800000", "0x4080000040800000", "0x4080000040800000", "0x4080000040800000"],
    "XMM2": ["0x4080000000000000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x3F80000000000000", "0x3F8000003F800000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

section .text
global _start

%include "checkprecision.mac"

; This test checks that:
; - the results of the reciprocal sqrt is within 1.5*2^-12 error margin.
; - the top 128 bits of ymms registers are zero.
; - bits [127:32] are correctly copied from the first argument to vrcpss.
_start:
vmovapd ymm0, [rel arg1]
vmovapd ymm1, [rel arg2]

; Register only
vrcpss xmm2, xmm1, xmm0

; Memory operand
vrcpss xmm3, xmm0, [rel arg2]

; Check precision
vpextrd [rel result], xmm2, 0
check_relerr rel eresult1, rel result, rel tolerance
and r9, rax

vpextrd [rel result], xmm3, 0
check_relerr rel eresult2, rel result, rel tolerance
mov r9, rax

; Insert 0s in the bottom 32bits
xor rax, rax
vpinsrd xmm2, xmm2, eax, 0
vpinsrd xmm3, xmm3, eax, 0
hlt

align 4096
result: times 2 dq 0

align 32
arg1:
dq 0x3F8000003F800000 ; 1.0
dq 0x3F8000003F800000
dq 0x3F8000003F800000
dq 0x3F8000003F800000

arg2:
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000
dq 0x4080000040800000
dq 0x4080000040800000

eresult1:
dd 0x3F800000 ; 1.0

eresult2:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/VEX/vroundpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x0000000000000000", "0xBFF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x3FF0000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x0000000000000000", "0xBFF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x3FF0000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x8000000000000000"],
    "XMM9":  ["0x0000000000000000", "0xBFF0000000000000", "0x0000000000000000", "0xBFF0000000000000"],
    "XMM10": ["0x3FF0000000000000", "0x8000000000000000", "0x3FF0000000000000", "0x8000000000000000"],
    "XMM11": ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x8000000000000000"],
    "XMM12": ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x8000000000000000"],
    "XMM13": ["0x0000000000000000", "0xBFF0000000000000", "0x0000000000000000", "0xBFF0000000000000"],
    "XMM14": ["0x3FF0000000000000", "0x8000000000000000", "0x3FF0000000000000", "0x8000000000000000"],
    "XMM15": ["0x0000000000000000", "0x8000000000000000", "0x0000000000000000", "0x8000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vroundpd xmm0, [rdx], 00000000b ; Nearest
vroundpd xmm1, [rdx], 00000001b ; -inf
vroundpd xmm2, [rdx], 00000010b ; +inf
vroundpd xmm3, [rdx], 00000011b ; truncate

vroundpd ymm8,  [rdx], 00000000b ; Nearest
vroundpd ymm9,  [rdx], 00000001b ; -inf
vroundpd ymm10, [rdx], 00000010b ; +inf
vroundpd ymm11, [rdx], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundpd xmm4,  [rdx], 00000100b
vroundpd ymm12, [rdx], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundpd xmm5,  [rdx], 00000100b
vroundpd ymm13, [rdx], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundpd xmm6,  [rdx], 00000100b
vroundpd ymm14, [rdx], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundpd xmm7,  [rdx], 00000100b
vroundpd ymm15, [rdx], 00000100b

hlt

align 4096
.data:
dq 0.5, -0.5
dq 0.5, -0.5

.mxcsr:
dq 0, 0


================================================
FILE: unittests/ASM/VEX/vroundps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0xC000000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0xBF80000000000000", "0xC00000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x800000003F800000", "0xBF80000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x8000000000000000", "0xBF8000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x8000000000000000", "0xC000000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0xBF80000000000000", "0xC00000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x800000003F800000", "0xBF80000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x8000000000000000", "0xBF8000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x8000000000000000", "0xC000000040000000", "0x8000000000000000", "0xC000000040000000"],
    "XMM9":  ["0xBF80000000000000", "0xC00000003F800000", "0xBF80000000000000", "0xC00000003F800000"],
    "XMM10": ["0x800000003F800000", "0xBF80000040000000", "0x800000003F800000", "0xBF80000040000000"],
    "XMM11": ["0x8000000000000000", "0xBF8000003F800000", "0x8000000000000000", "0xBF8000003F800000"],
    "XMM12": ["0x8000000000000000", "0xC000000040000000", "0x8000000000000000", "0xC000000040000000"],
    "XMM13": ["0xBF80000000000000", "0xC00000003F800000", "0xBF80000000000000", "0xC00000003F800000"],
    "XMM14": ["0x800000003F800000", "0xBF80000040000000", "0x800000003F800000", "0xBF80000040000000"],
    "XMM15": ["0x8000000000000000", "0xBF8000003F800000", "0x8000000000000000", "0xBF8000003F800000"]
  }
}
%endif

lea rdx, [rel .data]

vroundps xmm0, [rdx], 00000000b ; Nearest
vroundps xmm1, [rdx], 00000001b ; -inf
vroundps xmm2, [rdx], 00000010b ; +inf
vroundps xmm3, [rdx], 00000011b ; truncate

vroundps ymm8,  [rdx], 00000000b ; Nearest
vroundps ymm9,  [rdx], 00000001b ; -inf
vroundps ymm10, [rdx], 00000010b ; +inf
vroundps ymm11, [rdx], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundps xmm4,  [rdx], 00000100b
vroundps ymm12, [rdx], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundps xmm5,  [rdx], 00000100b
vroundps ymm13, [rdx], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundps xmm6,  [rdx], 00000100b
vroundps ymm14, [rdx], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundps xmm7,  [rdx], 00000100b
vroundps ymm15, [rdx], 00000100b

hlt

align 4096
.data:
dd 0.5, -0.5, 1.5, -1.5
dd 0.5, -0.5, 1.5, -1.5

.mxcsr:
dq 0, 0


================================================
FILE: unittests/ASM/VEX/vroundsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0x3FF0000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x3FF0000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x0000000000000000", "0xBFE0000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx]
vmovaps ymm2, [rdx]
vmovaps ymm3, [rdx]
vmovaps ymm4, [rdx]
vmovaps ymm5, [rdx]
vmovaps ymm6, [rdx]
vmovaps ymm7, [rdx]

vroundsd xmm0, xmm0, [rdx], 00000000b ; Nearest
vroundsd xmm1, xmm1, [rdx], 00000001b ; -inf
vroundsd xmm2, xmm2, [rdx], 00000010b ; +inf
vroundsd xmm3, xmm3, [rdx], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundsd xmm4, xmm4, [rdx], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundsd xmm5, xmm5, [rdx], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundsd xmm6, xmm6, [rdx], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundsd xmm7, xmm7, [rdx], 00000100b

hlt

align 4096
.data:
dq 0.5, -0.5
dq 0.5, -0.5

.mxcsr:
dq 0, 0


================================================
FILE: unittests/ASM/VEX/vroundss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2": ["0xBF0000003F800000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xBF0000003F800000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0xBF00000000000000", "0xBFC000003FC00000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx]
vmovaps ymm2, [rdx]
vmovaps ymm3, [rdx]
vmovaps ymm4, [rdx]
vmovaps ymm5, [rdx]
vmovaps ymm6, [rdx]
vmovaps ymm7, [rdx]

vroundss xmm0, xmm0, [rdx], 00000000b ; Nearest
vroundss xmm1, xmm1, [rdx], 00000001b ; -inf
vroundss xmm2, xmm2, [rdx], 00000010b ; +inf
vroundss xmm3, xmm3, [rdx], 00000011b ; truncate

; MXCSR
; Set to nearest
mov eax, 0x1F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundss xmm4, xmm4, [rdx], 00000100b

; Set to -inf
mov eax, 0x3F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundss xmm5, xmm5, [rdx], 00000100b

; Set to +inf
mov eax, 0x5F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundss xmm6, xmm6, [rdx], 00000100b

; Set to truncate
mov eax, 0x7F80
mov [rel .mxcsr], eax
ldmxcsr [rel .mxcsr]

vroundss xmm7, xmm7, [rdx], 00000100b

hlt

align 4096
.data:
dd 0.5, -0.5, 1.5, -1.5
dd 0.5, -0.5, 1.5, -1.5

.mxcsr:
dq 0, 0


================================================
FILE: unittests/ASM/VEX/vrsqrtps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R8": "1",
    "R9": "1"
  }
}
%endif

section .text
global _start

%include "checkprecision.mac"

; clobbers ymm15
; returns the comparison result in rax
%macro same_pdwords 1 ; receives the ymms register
    vextractf128 xmm15, %1, 0
    vmovd eax, xmm15
    vmovd xmm15, eax
    vbroadcastss ymm15, xmm15  ; broadcast lower 32bits across all 8 lanes
    vpcmpeqd ymm15, ymm15, %1  ; equality mask on all lanes
    vmovmskps eax, ymm15       ; gets sign bit of each lane into eax
    cmp eax, 0b11111111        ; check all 8 lanes
    sete al
    movzx rax, al
%endmacro

; clobbers xmm15
; returns the comparison result in rax
%macro same_pdwords_x 1 ; receives the xmms register
    movd eax, %1
    movd xmm15, eax
    pshufd xmm15, xmm15, 0 ; has the lower 32bits of %1 accross all lanes 
    pcmpeqd xmm15, %1 ; has equalty mask on all lanes
    movmskps eax, xmm15 ; gets sign bit of each lane into eax
    cmp eax, 0b1111
    sete al
    movzx rax, al
%endmacro

_start:
vmovapd ymm0, [rel arg1]
vmovapd ymm1, [rel arg2]
vmovapd ymm2, [rel arg3]
vmovapd ymm3, [rel arg4]
vmovapd ymm4, [rel arg5]
vmovapd ymm5, [rel arg5]
vmovapd ymm6, [rel arg5]
vmovapd ymm7, [rel arg5]

; Register only
vrsqrtps ymm0, ymm0
vrsqrtps ymm1, ymm1
vrsqrtps xmm2, xmm2
vrsqrtps xmm3, xmm3

; Memory operand
vrsqrtps ymm4, [rel arg1]
vrsqrtps ymm5, [rel arg2]
vrsqrtps xmm6, [rel arg3]
vrsqrtps xmm7, [rel arg4]

; Check that each register is properly filled
same_pdwords ymm0
mov r8, rax

same_pdwords ymm1
and r8, rax

same_pdwords_x xmm2
and r8, rax

same_pdwords_x xmm3
and r8, rax

; Result checks
vpextrd [rel result1], xmm0, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov r9, rax

vpextrd [rel result2], xmm1, 0
check_relerr rel eresult2, rel result2, rel tolerance
and r9, rax

vpextrd [rel result3], xmm2, 0
check_relerr rel eresult3, rel result3, rel tolerance
and r9, rax

vpextrd [rel result4], xmm3, 0
check_relerr rel eresult4, rel result4, rel tolerance
and r9, rax
hlt

align 4096
result1: times 4 dq 0
result2: times 4 dq 0
result3: times 4 dq 0
result4: times 4 dq 0

align 32
arg1:
dq 0x3F8000003F800000 ; 1.0
dq 0x3F8000003F800000
dq 0x3F8000003F800000
dq 0x3F8000003F800000

arg2:
dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000
dq 0x4080000040800000
dq 0x4080000040800000

arg3:
dq 0x4110000041100000 ; 9.0
dq 0x4110000041100000
dq 0x4110000041100000
dq 0x4110000041100000

arg4:
dq 0x4180000041800000 ; 16.0
dq 0x4180000041800000
dq 0x4180000041800000
dq 0x4180000041800000

arg5:
dq 0x41C8000041C80000 ; 25.0
dq 0x41C8000041C80000
dq 0x41C8000041C80000
dq 0x41C8000041C80000

eresult1:
dd 0x3F800000 ; 1.0

eresult2:
dd 0x3f000000 ; 0.5

eresult3:
dd 0x3eaaaaab ; 1/3 = 0.(3)

eresult4:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/VEX/vrsqrtss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R9": "1",
    "XMM0":  ["0x4142434400000000", "0xEEEEEEEEEEEEEEEE", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4142434400000000", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4142434400000000", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4142434400000000", "0xBBBBBBBBBBBBBBBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x4142434400000000", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4142434400000000", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4142434400000000", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4142434400000000", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x4142434400000000", "0xDDDDDDDDDDDDDDDD", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4142434400000000", "0xCCCCCCCCCCCCCCCC", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4142434400000000", "0xBBBBBBBBBBBBBBBB", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x4142434400000000", "0xAAAAAAAAAAAAAAAA", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

section .text
global _start

%include "checkprecision.mac"

; This test checks that:
; - the results of the reciprocal sqrt is within 1.5*2^-12 error margin.
; - the top 128 bits of ymms registers are zero.
; - bits [127:32] are correctly copied from the first argument to vrsqrtss.

_start:
vmovapd ymm0, [rel arg1]
vmovapd ymm1, [rel arg2]
vmovapd ymm2, [rel arg3]
vmovapd ymm3, [rel arg4]
vmovapd ymm4, [rel arg5]
vmovapd ymm5, [rel arg5]
vmovapd ymm6, [rel arg5]
vmovapd ymm7, [rel arg5]

; Same register
vrsqrtss xmm0, xmm0, xmm0
vrsqrtss xmm1, xmm1, xmm1
vrsqrtss xmm2, xmm2, xmm2
vrsqrtss xmm3, xmm3, xmm3

; Memory operand
vrsqrtss xmm4, xmm4, [rel arg1]
vrsqrtss xmm5, xmm5, [rel arg2]
vrsqrtss xmm6, xmm6, [rel arg3]
vrsqrtss xmm7, xmm7, [rel arg4]

; Memory operand different source register
vrsqrtss xmm8, xmm1, [rel arg1]
vrsqrtss xmm9, xmm2, [rel arg2]
vrsqrtss xmm10, xmm3, [rel arg3]
vrsqrtss xmm11, xmm4, [rel arg4]

; Check precision
vpextrd [rel result1], xmm0, 0
check_relerr rel eresult1, rel result1, rel tolerance
mov r9, rax

vpextrd [rel result2], xmm1, 0
check_relerr rel eresult2, rel result2, rel tolerance
and r9, rax

vpextrd [rel result3], xmm2, 0
check_relerr rel eresult3, rel result3, rel tolerance
and r9, rax

vpextrd [rel result4], xmm3, 0
check_relerr rel eresult4, rel result4, rel tolerance
and r9, rax

vpextrd [rel result1], xmm8, 0
check_relerr rel eresult1, rel result1, rel tolerance
and r9, rax

vpextrd [rel result2], xmm9, 0
check_relerr rel eresult2, rel result2, rel tolerance
and r9, rax

vpextrd [rel result3], xmm10, 0
check_relerr rel eresult3, rel result3, rel tolerance
and r9, rax

vpextrd [rel result4], xmm11, 0
check_relerr rel eresult4, rel result4, rel tolerance
and r9, rax

; Insert 0s in the bottom 32bits.
xor rax, rax
vpinsrd xmm0, xmm0, eax, 0
vpinsrd xmm1, xmm1, eax, 0
vpinsrd xmm2, xmm2, eax, 0
vpinsrd xmm3, xmm3, eax, 0
vpinsrd xmm4, xmm4, eax, 0
vpinsrd xmm5, xmm5, eax, 0
vpinsrd xmm6, xmm6, eax, 0
vpinsrd xmm7, xmm7, eax, 0
vpinsrd xmm8, xmm8, eax, 0
vpinsrd xmm9, xmm9, eax, 0
vpinsrd xmm10, xmm10, eax, 0
vpinsrd xmm11, xmm11, eax, 0

hlt

align 4096
result1: times 2 dq 0
result2: times 2 dq 0
result3: times 2 dq 0
result4: times 2 dq 0

align 32
arg1:
dq 0x414243443F800000 ; 1.0
dq 0xEEEEEEEEEEEEEEEE
dq 0x5152535455565758
dq 0x5152535455565758

arg2:
dq 0x4142434440800000 ; 4.0
dq 0xDDDDDDDDDDDDDDDD
dq 0x5152535455565758
dq 0x5152535455565758

arg3:
dq 0x4142434441100000 ; 9.0
dq 0xCCCCCCCCCCCCCCCC
dq 0x5152535455565758
dq 0x5152535455565758

arg4:
dq 0x4142434441800000 ; 16.0
dq 0xBBBBBBBBBBBBBBBB
dq 0x5152535455565758
dq 0x5152535455565758

arg5:
dq 0x4142434441C80000 ; 25.0
dq 0xAAAAAAAAAAAAAAAA
dq 0x5152535455565758
dq 0x5152535455565758

eresult1:
dd 0x3F800000 ; 1.0

eresult2:
dd 0x3f000000 ; 0.5

eresult3:
dd 0x3eaaaaab ; 1/3 = 0.(3)

eresult4:
dd 0x3e800000 ; 0.25

tolerance:
dd 0x39c00000

define_check_data_constants


================================================
FILE: unittests/ASM/VEX/vshufpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x4142434445464748", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x5152535455565758", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x4142434445464748", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x5152535455565758", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4142434445464748", "0x6162636465666768", "0x3132333435363738", "0x8182838485868788"],
    "XMM7":  ["0x4142434445464748", "0x6162636465666768", "0x3132333435363738", "0x9192939495969798"],
    "XMM8":  ["0x5152535455565758", "0x6162636465666768", "0x2122232425262728", "0x8182838485868788"],
    "XMM9":  ["0x4142434445464748", "0x7172737475767778", "0x3132333435363738", "0x9192939495969798"],
    "XMM10": ["0x4142434445464748", "0x6162636465666768", "0x2122232425262728", "0x9192939495969798"],
    "XMM11": ["0x5152535455565758", "0x7172737475767778", "0x2122232425262728", "0x9192939495969798"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vshufpd xmm2, xmm0, xmm1, 0b00
vshufpd xmm3, xmm0, xmm1, 0b01
vshufpd xmm4, xmm0, xmm1, 0b10
vshufpd xmm5, xmm0, xmm1, 0b11

vshufpd ymm6,  ymm0, ymm1, 0b0000
vshufpd ymm7,  ymm0, ymm1, 0b1000
vshufpd ymm8,  ymm0, ymm1, 0b0101
vshufpd ymm9,  ymm0, ymm1, 0b1010
vshufpd ymm10, ymm0, ymm1, 0b1100
vshufpd ymm11, ymm0, ymm1, 0b1111

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x3132333435363738
dq 0x2122232425262728

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x8182838485868788
dq 0x9192939495969798


================================================
FILE: unittests/ASM/VEX/vshufps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2":  ["0x4546474845464748", "0x6566676865666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x5152535451525354", "0x7172737471727374", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x4546474851525354", "0x7576777871727374", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4546474845464748", "0x6566676865666768", "0x3536373835363738", "0x8586878885868788"],
    "XMM6":  ["0x5152535451525354", "0x7172737471727374", "0x2122232421222324", "0x9192939491929394"],
    "XMM7":  ["0x4546474851525354", "0x7576777871727374", "0x3536373821222324", "0x9596979891929394"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vshufps xmm2, xmm0, xmm1, 0b00000000
vshufps xmm3, xmm0, xmm1, 0b11111111
vshufps xmm4, xmm0, xmm1, 0b10110011

vshufps ymm5, ymm0, ymm1, 0b00000000
vshufps ymm6, ymm0, ymm1, 0b11111111
vshufps ymm7, ymm0, ymm1, 0b10110011

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x3132333435363738
dq 0x2122232425262728

dq 0x6162636465666768
dq 0x7172737475767778
dq 0x8182838485868788
dq 0x9192939495969798


================================================
FILE: unittests/ASM/VEX/vsqrtpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4010000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4000000000000000", "0x4000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4008000000000000", "0x4008000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4010000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM9":  ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM10": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
    "XMM11": ["0x4010000000000000", "0x4010000000000000", "0x4010000000000000", "0x4010000000000000"],
    "XMM12": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM13": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM14": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
    "XMM15": ["0x4010000000000000", "0x4010000000000000", "0x4010000000000000", "0x4010000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 4]
vmovapd ymm6, [rdx + 32 * 4]
vmovapd ymm7, [rdx + 32 * 4]
vmovapd ymm8, [rdx + 32 * 0]
vmovapd ymm9, [rdx + 32 * 1]
vmovapd ymm10, [rdx + 32 * 2]
vmovapd ymm11, [rdx + 32 * 3]

; 128-bit registers
vsqrtpd xmm0, xmm0
vsqrtpd xmm1, xmm1
vsqrtpd xmm2, xmm2
vsqrtpd xmm3, xmm3

; 256-bit registers
vsqrtpd ymm8, ymm8
vsqrtpd ymm9, ymm9
vsqrtpd ymm10, ymm10
vsqrtpd ymm11, ymm11

; 128-bit memory operand
vsqrtpd xmm4, [rdx + 32 * 0]
vsqrtpd xmm5, [rdx + 32 * 1]
vsqrtpd xmm6, [rdx + 32 * 2]
vsqrtpd xmm7, [rdx + 32 * 3]

; 256-bit memory operand
vsqrtpd ymm12, [rdx + 32 * 0]
vsqrtpd ymm13, [rdx + 32 * 1]
vsqrtpd ymm14, [rdx + 32 * 2]
vsqrtpd ymm15, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000

dq 0x4010000000000000 ; 4.0
dq 0x4010000000000000
dq 0x4010000000000000
dq 0x4010000000000000

dq 0x4022000000000000 ; 9.0
dq 0x4022000000000000
dq 0x4022000000000000
dq 0x4022000000000000

dq 0x4030000000000000 ; 16.0
dq 0x4030000000000000
dq 0x4030000000000000
dq 0x4030000000000000

dq 0x4039000000000000 ; 25.0
dq 0x4039000000000000
dq 0x4039000000000000
dq 0x4039000000000000


================================================
FILE: unittests/ASM/VEX/vsqrtps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3F8000003F800000", "0x3F8000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4000000040000000", "0x4000000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4040000040400000", "0x4040000040400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4080000040800000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x3F8000003F800000", "0x3F8000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4000000040000000", "0x4000000040000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4040000040400000", "0x4040000040400000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4080000040800000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000"],
    "XMM9":  ["0x4000000040000000", "0x4000000040000000", "0x4000000040000000", "0x4000000040000000"],
    "XMM10": ["0x4040000040400000", "0x4040000040400000", "0x4040000040400000", "0x4040000040400000"],
    "XMM11": ["0x4080000040800000", "0x4080000040800000", "0x4080000040800000", "0x4080000040800000"],
    "XMM12": ["0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000", "0x3F8000003F800000"],
    "XMM13": ["0x4000000040000000", "0x4000000040000000", "0x4000000040000000", "0x4000000040000000"],
    "XMM14": ["0x4040000040400000", "0x4040000040400000", "0x4040000040400000", "0x4040000040400000"],
    "XMM15": ["0x4080000040800000", "0x4080000040800000", "0x4080000040800000", "0x4080000040800000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 4]
vmovapd ymm6, [rdx + 32 * 4]
vmovapd ymm7, [rdx + 32 * 4]
vmovapd ymm8, [rdx + 32 * 0]
vmovapd ymm9, [rdx + 32 * 1]
vmovapd ymm10, [rdx + 32 * 2]
vmovapd ymm11, [rdx + 32 * 3]

; 128-bit registers
vsqrtps xmm0, xmm0
vsqrtps xmm1, xmm1
vsqrtps xmm2, xmm2
vsqrtps xmm3, xmm3

; 256-bit registers
vsqrtps ymm8, ymm8
vsqrtps ymm9, ymm9
vsqrtps ymm10, ymm10
vsqrtps ymm11, ymm11

; 128-bit memory operand
vsqrtps xmm4, [rdx + 32 * 0]
vsqrtps xmm5, [rdx + 32 * 1]
vsqrtps xmm6, [rdx + 32 * 2]
vsqrtps xmm7, [rdx + 32 * 3]

; 256-bit memory operand
vsqrtps ymm12, [rdx + 32 * 0]
vsqrtps ymm13, [rdx + 32 * 1]
vsqrtps ymm14, [rdx + 32 * 2]
vsqrtps ymm15, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3F8000003F800000 ; 1.0
dq 0x3F8000003F800000
dq 0x3F8000003F800000
dq 0x3F8000003F800000

dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000
dq 0x4080000040800000
dq 0x4080000040800000

dq 0x4110000041100000 ; 9.0
dq 0x4110000041100000
dq 0x4110000041100000
dq 0x4110000041100000

dq 0x4180000041800000 ; 16.0
dq 0x4180000041800000
dq 0x4180000041800000
dq 0x4180000041800000

dq 0x41C8000041C80000 ; 25.0
dq 0x41C8000041C80000
dq 0x41C8000041C80000
dq 0x41C8000041C80000


================================================
FILE: unittests/ASM/VEX/vsqrtsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4000000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4008000000000000", "0x4022000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4010000000000000", "0x4030000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x3FF0000000000000", "0x4039000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4000000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4008000000000000", "0x4022000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4010000000000000", "0x4030000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x3FF0000000000000", "0x4030000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x4000000000000000", "0x4039000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4008000000000000", "0x4010000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x4010000000000000", "0x4022000000000000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 1]
vmovapd ymm6, [rdx + 32 * 2]
vmovapd ymm7, [rdx + 32 * 3]

; Register only
vsqrtsd xmm0, xmm0, xmm0
vsqrtsd xmm1, xmm1, xmm1
vsqrtsd xmm2, xmm2, xmm2
vsqrtsd xmm3, xmm3, xmm3

; Memory operand
vsqrtsd xmm4, xmm4, [rdx + 32 * 0]
vsqrtsd xmm5, xmm5, [rdx + 32 * 1]
vsqrtsd xmm6, xmm6, [rdx + 32 * 2]
vsqrtsd xmm7, xmm7, [rdx + 32 * 3]

; Merge different source register
vsqrtsd xmm8, xmm3, [rdx + 32 * 0]
vsqrtsd xmm9, xmm4, [rdx + 32 * 1]
vsqrtsd xmm10, xmm5, [rdx + 32 * 2]
vsqrtsd xmm11, xmm6, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x3FF0000000000000
dq 0x3FF0000000000000
dq 0x3FF0000000000000

dq 0x4010000000000000 ; 4.0
dq 0x4010000000000000
dq 0x4010000000000000
dq 0x4010000000000000

dq 0x4022000000000000 ; 9.0
dq 0x4022000000000000
dq 0x4022000000000000
dq 0x4022000000000000

dq 0x4030000000000000 ; 16.0
dq 0x4030000000000000
dq 0x4030000000000000
dq 0x4030000000000000

dq 0x4039000000000000 ; 25.0
dq 0x4039000000000000
dq 0x4039000000000000
dq 0x4039000000000000


================================================
FILE: unittests/ASM/VEX/vsqrtss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0":  ["0x3F8000003F800000", "0x3F8000003F800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1":  ["0x4080000040000000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM2":  ["0x4110000040400000", "0x4110000041100000", "0x0000000000000000", "0x0000000000000000"],
    "XMM3":  ["0x4180000040800000", "0x4180000041800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4":  ["0x41C800003F800000", "0x41C8000041C80000", "0x0000000000000000", "0x0000000000000000"],
    "XMM5":  ["0x4080000040000000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6":  ["0x4110000040400000", "0x4110000041100000", "0x0000000000000000", "0x0000000000000000"],
    "XMM7":  ["0x4180000040800000", "0x4180000041800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM8":  ["0x418000003F800000", "0x4180000041800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM9":  ["0x41C8000040000000", "0x41C8000041C80000", "0x0000000000000000", "0x0000000000000000"],
    "XMM10": ["0x4080000040400000", "0x4080000040800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM11": ["0x4110000040800000", "0x4110000041100000", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]
vmovapd ymm5, [rdx + 32 * 1]
vmovapd ymm6, [rdx + 32 * 2]
vmovapd ymm7, [rdx + 32 * 3]

; Register only
vsqrtss xmm0, xmm0, xmm0
vsqrtss xmm1, xmm1, xmm1
vsqrtss xmm2, xmm2, xmm2
vsqrtss xmm3, xmm3, xmm3

; Memory operand
vsqrtss xmm4, xmm4, [rdx + 32 * 0]
vsqrtss xmm5, xmm5, [rdx + 32 * 1]
vsqrtss xmm6, xmm6, [rdx + 32 * 2]
vsqrtss xmm7, xmm7, [rdx + 32 * 3]

; Merge different source register
vsqrtss xmm8, xmm3, [rdx + 32 * 0]
vsqrtss xmm9, xmm4, [rdx + 32 * 1]
vsqrtss xmm10, xmm5, [rdx + 32 * 2]
vsqrtss xmm11, xmm6, [rdx + 32 * 3]

hlt

align 32
.data:
dq 0x3F8000003F800000 ; 1.0
dq 0x3F8000003F800000
dq 0x3F8000003F800000
dq 0x3F8000003F800000

dq 0x4080000040800000 ; 4.0
dq 0x4080000040800000
dq 0x4080000040800000
dq 0x4080000040800000

dq 0x4110000041100000 ; 9.0
dq 0x4110000041100000
dq 0x4110000041100000
dq 0x4110000041100000

dq 0x4180000041800000 ; 16.0
dq 0x4180000041800000
dq 0x4180000041800000
dq 0x4180000041800000

dq 0x41C8000041C80000 ; 25.0
dq 0x41C8000041C80000
dq 0x41C8000041C80000
dq 0x41C8000041C80000


================================================
FILE: unittests/ASM/VEX/vsubpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4008000000000000", "0x4008000000000000", "0x4008000000000000", "0x4008000000000000"],
    "XMM1": ["0x4000000000000000", "0x4000000000000000", "0x4000000000000000", "0x4000000000000000"],
    "XMM3": ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"],
    "XMM5": ["0x3FF0000000000000", "0x3FF0000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000", "0x3FF0000000000000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 0]
vmovapd ymm1, [rdx + 32]

; Register only
vsubpd xmm3, xmm0, xmm1
vsubpd ymm4, ymm0, ymm1

; Memory operand
vsubpd xmm5, xmm0, [rdx + 32]
vsubpd ymm6, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4008000000000000
dq 0x4008000000000000
dq 0x4008000000000000
dq 0x4008000000000000

dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000
dq 0x4000000000000000


================================================
FILE: unittests/ASM/VEX/vsubps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x400000003F800000", "0x4080000040400000", "0x400000003F800000", "0x4080000040400000"],
    "XMM1": ["0x40C0000040A00000", "0x4100000040E00000", "0x40C0000040A00000", "0x4100000040E00000"],
    "XMM3": ["0xC0800000C0800000", "0xC0800000C0800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0xC0800000C0800000", "0xC0800000C0800000", "0xC0800000C0800000", "0xC0800000C0800000"],
    "XMM5": ["0xC0800000C0800000", "0xC0800000C0800000", "0x0000000000000000", "0x0000000000000000"],
    "XMM6": ["0xC0800000C0800000", "0xC0800000C0800000", "0xC0800000C0800000", "0xC0800000C0800000"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 0]
vmovapd ymm1, [rdx + 32]

; Register only
vsubps xmm3, xmm0, xmm1
vsubps ymm4, ymm0, ymm1

; Memory operand
vsubps xmm5, xmm0, [rdx + 32]
vsubps ymm6, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3
dq 0x400000003F800000 ; 2, 1
dq 0x4080000040400000 ; 4, 3

dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7
dq 0x40C0000040A00000 ; 6, 5
dq 0x4100000040E00000 ; 8, 7


================================================
FILE: unittests/ASM/VEX/vsubsd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0xC008000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4010000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0xC01C000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4038000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4035000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0xC022000000000000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4030000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4039000000000000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vsubsd xmm0, xmm0, xmm1
vsubsd xmm2, xmm2, xmm3

; Memory operand
vsubsd xmm5, xmm4, [rdx + 32 * 1]
vsubsd xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vsubsd xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x3FF0000000000000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4010000000000000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4022000000000000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4030000000000000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4039000000000000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vsubss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x41424344C0400000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0x4142434440800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM2": ["0x41424344C0e00000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM4": ["0x4142434441C00000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM5": ["0x4142434441A80000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM7": ["0x41424344C1100000", "0x5152535455565758", "0x0000000000000000", "0x0000000000000000"],
    "XMM8": ["0x4142434441800000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"],
    "XMM9": ["0x4142434441C80000", "0x5152535455565758", "0x5152535455565758", "0x5152535455565758"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx + 32 * 0]
vmovapd ymm1, [rdx + 32 * 1]
vmovapd ymm2, [rdx + 32 * 2]
vmovapd ymm3, [rdx + 32 * 3]
vmovapd ymm4, [rdx + 32 * 4]

; Register only
vsubss xmm0, xmm0, xmm1
vsubss xmm2, xmm2, xmm3

; Memory operand
vsubss xmm5, xmm4, [rdx + 32 * 1]
vsubss xmm4, xmm4, [rdx + 32 * 0]

; Merging different src into destination
vpxor xmm7, xmm7, xmm7
vmovapd ymm8, [rdx + 32 * 3]
vmovapd ymm9, [rdx + 32 * 4]
vsubss xmm7, xmm8, xmm9

hlt

align 32
.data:
dq 0x414243443F800000 ; 1.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434440800000 ; 4.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441100000 ; 9.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441800000 ; 16.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758

dq 0x4142434441C80000 ; 25.0
dq 0x5152535455565758
dq 0x5152535455565758
dq 0x5152535455565758


================================================
FILE: unittests/ASM/VEX/vtestpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R15":  "0x0000000EDDFFB77F",
    "XMM0": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

; Uses AX and BX and stores result in r15
; CF:ZF
%macro zfcfmerge 0
  lahf

  ; Shift CF to zero
  shr ax, 8

  ; Move to a temp
  mov bx, ax
  and rbx, 1

  shl r15, 1
  or r15, rbx

  shl r15, 1

  ; Move to a temp
  mov bx, ax

  ; Extract ZF
  shr bx, 6
  and rbx, 1

  ; Insert ZF
  or r15, rbx
%endmacro

%macro tests 1
  vtestpd %{1}0, [rdx + 32 * 3]
  zfcfmerge
  vtestpd %{1}1, [rdx + 32 * 4]
  zfcfmerge
  vtestpd %{1}2, [rdx + 32 * 5]
  zfcfmerge
  vtestpd %{1}0, [rdx + 32 * 6]
  zfcfmerge
  vtestpd %{1}1, [rdx + 32 * 7]
  zfcfmerge
  vtestpd %{1}2, [rdx + 32 * 8]
  zfcfmerge
  vtestpd %{1}0, [rdx + 32 * 9]
  zfcfmerge
  vtestpd %{1}1, [rdx + 32 * 10]
  zfcfmerge
  vtestpd %{1}2, [rdx + 32 * 11]
  zfcfmerge
%endmacro

lea rdx, [rel .data]

mov rax, 0
mov rbx, 0
mov r15, 0

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]

tests xmm
tests ymm

hlt

align 32
.data:
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match on not
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7, 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7

; No match on either case
dq 1, 1, 1, 1
dq 2, 2, 2, 2
dq 3, 3, 3, 3


================================================
FILE: unittests/ASM/VEX/vtestps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "R15":  "0x000000000003B77F",
    "R14":  "0x000000000003B77F",
    "XMM0": ["0x0000000000000000", "0x0000000000000000", "0x0000000000000000", "0x0000000000000000"],
    "XMM1": ["0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF", "0xFFFFFFFFFFFFFFFF"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0x4142434445464748", "0x5152535455565758"]
  }
}
%endif

; Uses AX and BX and stores result in the passed in register
; CF:ZF
%macro zfcfmerge 1
  lahf

  ; Shift CF to zero
  shr ax, 8

  ; Move to a temp
  mov bx, ax
  and rbx, 1

  shl %{1}, 1
  or %{1}, rbx

  shl %{1}, 1

  ; Move to a temp
  mov bx, ax

  ; Extract ZF
  shr bx, 6
  and rbx, 1

  ; Insert ZF
  or %{1}, rbx
%endmacro

%macro tests 2
  vtestps %{1}0, [rdx + 32 * 3]
  zfcfmerge %{2}
  vtestps %{1}1, [rdx + 32 * 4]
  zfcfmerge %{2}
  vtestps %{1}2, [rdx + 32 * 5]
  zfcfmerge %{2}
  vtestps %{1}0, [rdx + 32 * 6]
  zfcfmerge %{2}
  vtestps %{1}1, [rdx + 32 * 7]
  zfcfmerge %{2}
  vtestps %{1}2, [rdx + 32 * 8]
  zfcfmerge %{2}
  vtestps %{1}0, [rdx + 32 * 9]
  zfcfmerge %{2}
  vtestps %{1}1, [rdx + 32 * 10]
  zfcfmerge %{2}
  vtestps %{1}2, [rdx + 32 * 11]
  zfcfmerge %{2}
%endmacro

lea rdx, [rel .data]

mov rax, 0
mov rbx, 0
mov r15, 0
mov r14, 0

vmovaps ymm0, [rdx + 32 * 0]
vmovaps ymm1, [rdx + 32 * 1]
vmovaps ymm2, [rdx + 32 * 2]

; Accumulate xmm results in r15
tests xmm, r15
; Accumulate ymm results in r14
tests ymm, r14

hlt

align 32
.data:
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x4142434445464748, 0x5152535455565758, 0x4142434445464748, 0x5152535455565758

; Match on not
dq 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
dq 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000
dq 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7, 0xBEBDBCBBBAB9B8B7, 0xAEADACABAAA9A8A7

; No match on either case
dq 1, 1, 1, 1
dq 2, 2, 2, 2
dq 3, 3, 3, 3


================================================
FILE: unittests/ASM/VEX/vucomisd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vucomisd xmm0, [rdx + 16 * 1] ; 1.0 <comp> 4.0
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

vucomisd xmm0, [rdx + 16 * 2] ; 1.0 <comp> NaN
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt

align 16
.data:
dq 0x3FF0000000000000
dq 0x4000000000000000

dq 0x4010000000000000
dq 0x4010000000000000

dq 0x7FF8000000000000
dq 0x4010000000000000


================================================
FILE: unittests/ASM/VEX/vucomiss.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x4700",
    "RBX": "0x0300"
  }
}
%endif

lea rdx, [rel .data]

vmovaps xmm0, [rdx + 16 * 0]
vucomiss xmm0, [rdx + 16 * 1]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000000
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 00000000
; 7: SF - 00000000 <- 0
; ================
;         00000011
; OF: LAHF doesn't load - 0

mov rax, 0
lahf
mov rbx, rax

vucomiss xmm0, [rdx + 16 * 2]
; 0: CF - 00000001
; 1:    - 00000010
; 2: PF - 00000100
; 3:  0 - 00000000
; 4: AF - 00000000 <- 0
; 5:  0 - 00000000
; 6: ZF - 01000000
; 7: SF - 00000000 <- 0
; ================
;         01000111
; OF: LAHF doesn't load - 0

mov rax, 0
lahf

hlt

align 16
.data:
dq 0x515253543F800000
dq 0x5152535440000000

dq 0x5152535440800000
dq 0x5152535440800000

dq 0x515253547FC00000
dq 0x5152535440800000


================================================
FILE: unittests/ASM/VEX/vunpckhpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x5152535455565758", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x5152535455565758", "0x7172737475767778", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x5152535455565758", "0x7172737475767778", "0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC"],
    "XMM5": ["0x5152535455565758", "0x7172737475767778", "0xEEEEEEEEEEEEEEEE", "0xCCCCCCCCCCCCCCCC"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vunpckhpd xmm2, xmm0, xmm1
vunpckhpd xmm3, xmm0, [rdx + 32]

vunpckhpd ymm4, ymm0, ymm1
vunpckhpd ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBBBBBBBBB
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vunpckhps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x7576777855565758", "0x7172737451525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x7576777855565758", "0x7172737451525354", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x7576777855565758", "0x7172737451525354", "0x88888888CCCCCCCC", "0x99999999DDDDDDDD"],
    "XMM5": ["0x7576777855565758", "0x7172737451525354", "0x88888888CCCCCCCC", "0x99999999DDDDDDDD"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vunpckhps xmm2, xmm0, xmm1
vunpckhps xmm3, xmm0, [rdx + 32]

vunpckhps ymm4, ymm0, ymm1
vunpckhps ymm5, ymm0, [rdx + 32]

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888


================================================
FILE: unittests/ASM/VEX/vunpcklpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x4142434445464748", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x4142434445464748", "0x6162636465666768", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x4142434445464748", "0x6162636465666768", "0xFFFFFFFFFFFFFFFF", "0xBBBBBBBBBBBBBBBB"],
    "XMM5": ["0x4142434445464748", "0x6162636465666768", "0xFFFFFFFFFFFFFFFF", "0xBBBBBBBBBBBBBBBB"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx + 32]

vunpcklpd xmm2, xmm0, [rdx + 32]
vunpcklpd xmm3, xmm0, xmm1

vunpcklpd ymm4, ymm0, [rdx + 32]
vunpcklpd ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFFFFFFFFF
dq 0xEEEEEEEEEEEEEEEE

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBBBBBBBBB
dq 0xCCCCCCCCCCCCCCCC


================================================
FILE: unittests/ASM/VEX/vunpcklps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM2": ["0x6566676845464748", "0x6162636441424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM3": ["0x6566676845464748", "0x6162636441424344", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x6566676845464748", "0x6162636441424344", "0xAAAAAAAAEEEEEEEE", "0xBBBBBBBBFFFFFFFF"],
    "XMM5": ["0x6566676845464748", "0x6162636441424344", "0xAAAAAAAAEEEEEEEE", "0xBBBBBBBBFFFFFFFF"]
  }
}
%endif

lea rdx, [rel .data]

vmovaps ymm0, [rdx]
vmovaps ymm1, [rdx + 32]

vunpcklps xmm2, xmm0, [rdx + 32]
vunpcklps xmm3, xmm0, xmm1

vunpcklps ymm4, ymm0, [rdx + 32]
vunpcklps ymm5, ymm0, ymm1

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0xFFFFFFFFEEEEEEEE
dq 0xDDDDDDDDCCCCCCCC

dq 0x6162636465666768
dq 0x7172737475767778
dq 0xBBBBBBBBAAAAAAAA
dq 0x9999999988888888


================================================
FILE: unittests/ASM/VEX/vxorpd.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM3": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM5": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vxorpd ymm2, ymm0, ymm1
vxorpd xmm3, xmm0, xmm1

; With memory operand
vxorpd ymm4, ymm0, [rbx]
vxorpd xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vxorps.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0x6162636465666768", "0x7172737475767778"],
    "XMM1": ["0xCCCCCCCC75767778", "0x61626364DDDDDDDD", "0xEEEEEEEE55565758", "0x41424344FFFFFFFF"],
    "XMM2": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM3": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"],
    "XMM4": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x8F8C8D8A30303030", "0x303030308A898887"],
    "XMM5": ["0x8D8E8F8830303030", "0x30303030888B8A85", "0x0000000000000000", "0x0000000000000000"]
  }
}
%endif

lea rdx, [rel .data1]
lea rbx, [rel .data2]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rbx]

; Register only
vxorps ymm2, ymm0, ymm1
vxorps xmm3, xmm0, xmm1

; With memory operand
vxorps ymm4, ymm0, [rbx]
vxorps xmm5, xmm0, [rbx]

hlt

align 32
.data1:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778

.data2:
dq 0xCCCCCCCC75767778
dq 0x61626364DDDDDDDD
dq 0xEEEEEEEE55565758
dq 0x41424344FFFFFFFF


================================================
FILE: unittests/ASM/VEX/vzeroall.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0", "0", "0", "0"],
    "XMM1": ["0", "0", "0", "0"],
    "XMM2": ["0", "0", "0", "0"],
    "XMM3": ["0", "0", "0", "0"],
    "XMM4": ["0", "0", "0", "0"],
    "XMM5": ["0", "0", "0", "0"],
    "XMM6": ["0", "0", "0", "0"],
    "XMM7": ["0", "0", "0", "0"],
    "XMM8": ["0", "0", "0", "0"],
    "XMM9": ["0", "0", "0", "0"],
    "XMM10": ["0", "0", "0", "0"],
    "XMM11": ["0", "0", "0", "0"],
    "XMM12": ["0", "0", "0", "0"],
    "XMM13": ["0", "0", "0", "0"],
    "XMM14": ["0", "0", "0", "0"],
    "XMM15": ["0", "0", "0", "0"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx]
vmovapd ymm3, [rdx]
vmovapd ymm4, [rdx]
vmovapd ymm5, [rdx]
vmovapd ymm6, [rdx]
vmovapd ymm7, [rdx]
vmovapd ymm8, [rdx]
vmovapd ymm9, [rdx]
vmovapd ymm10, [rdx]
vmovapd ymm11, [rdx]
vmovapd ymm12, [rdx]
vmovapd ymm13, [rdx]
vmovapd ymm14, [rdx]
vmovapd ymm15, [rdx]

vzeroall

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/VEX/vzeroupper.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "XMM0": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM1": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM2": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM3": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM4": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM5": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM6": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM7": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM8": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM9": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM10": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM11": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM12": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM13": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM14": ["0x4142434445464748", "0x5152535455565758", "0", "0"],
    "XMM15": ["0x4142434445464748", "0x5152535455565758", "0", "0"]
  }
}
%endif

lea rdx, [rel .data]

vmovapd ymm0, [rdx]
vmovapd ymm1, [rdx]
vmovapd ymm2, [rdx]
vmovapd ymm3, [rdx]
vmovapd ymm4, [rdx]
vmovapd ymm5, [rdx]
vmovapd ymm6, [rdx]
vmovapd ymm7, [rdx]
vmovapd ymm8, [rdx]
vmovapd ymm9, [rdx]
vmovapd ymm10, [rdx]
vmovapd ymm11, [rdx]
vmovapd ymm12, [rdx]
vmovapd ymm13, [rdx]
vmovapd ymm14, [rdx]
vmovapd ymm15, [rdx]

vzeroupper

hlt

align 32
.data:
dq 0x4142434445464748
dq 0x5152535455565758
dq 0x6162636465666768
dq 0x7172737475767778


================================================
FILE: unittests/ASM/X87/D8_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xc000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fadd dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fmul dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fsub dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fsubr dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fdiv dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fdivr dword [rdx + 8 * 1]
hlt


================================================
FILE: unittests/ASM/X87/D8_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xC000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]

; fadd st(0), st(i)
fadd st0, st1

hlt


================================================
FILE: unittests/ASM/X87/D8_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]
fmul st0, st0
hlt


================================================
FILE: unittests/ASM/X87/D8_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x1"
  }
}
%endif

finit
fld1
fldz
fcom st1

fnstsw ax
cmp ah, 0x31
je good
mov rax, 0
hlt
good:
mov rax, 1
hlt


================================================
FILE: unittests/ASM/X87/D8_D9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
fld1
fldz
fcomp
fld1

hlt


================================================
FILE: unittests/ASM/X87/D8_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0xBFFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fsub st0, st1
hlt


================================================
FILE: unittests/ASM/X87/D8_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fsubr st0, st1
hlt


================================================
FILE: unittests/ASM/X87/D8_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]
fdiv st0, st0
hlt


================================================
FILE: unittests/ASM/X87/D8_F0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0"
  }
}
%endif

; Tests that a division by zero does not set the IE flag
finit
fldz
fld1
fdiv st0, st1

fnstsw ax
and rax, 1
hlt


================================================
FILE: unittests/ASM/X87/D8_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFE"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]

fdivr st0, st1
hlt


================================================
FILE: unittests/ASM/X87/D9_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3fff"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld dword [rdx + 8 * 0]
hlt


================================================
FILE: unittests/ASM/X87/D9_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000",
    "MM7": ["0x8000000000000000", "0x3fff"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fst dword [rdx + 8 * 1]

mov eax, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/D9_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000",
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x0 ; 1.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
fstp dword [rdx + 8 * 2]
fld dword [rdx + 8 * 1]

mov eax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87/D9_05.asm
================================================
%ifdef CONFIG
{
}
%endif

mov rdx, 0xe0000000
; Just to ensure execution
fldcw [rdx]

hlt


================================================
FILE: unittests/ASM/X87/D9_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3fff"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x40800000 ; 4.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
o16 fstenv [rdx + 8 * 3]
fld dword [rdx + 8 * 2]
o16 fldenv [rdx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [rdx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

hlt


================================================
FILE: unittests/ASM/X87/D9_06_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3fff"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x40800000 ; 4.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
o32 fstenv [rdx + 8 * 3]
fld dword [rdx + 8 * 2]
o32 fldenv [rdx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [rdx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

hlt


================================================
FILE: unittests/ASM/X87/D9_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x37F"
  }
}
%endif

mov rdx, 0xe0000000
fnstcw [rdx]
mov eax, 0
mov ax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87/D9_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x4000"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fld st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fld dword [rdx + 8 * 1]

fxch

hlt


================================================
FILE: unittests/ASM/X87/D9_D0.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure execution
fnop
hlt


================================================
FILE: unittests/ASM/X87/D9_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0xC000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
fchs

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fchs

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_E1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
fabs

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fabs

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_E4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x100",
    "RBX": "0x0",
    "RCX": "0x4000"
  }
}
%endif

fld dword [rel positive]
ftst
fnstsw ax
and rax, 0x4700
mov rbx, rax

fldz
ftst
fnstsw ax
and rax, 0x4700
mov rcx, rax

fld dword [rel negative]
ftst
fnstsw ax
and rax, 0x4700

hlt

align 16
positive: dd 3.14159
negative: dd -2.71828

================================================
FILE: unittests/ASM/X87/D9_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

fld1

hlt


================================================
FILE: unittests/ASM/X87/D9_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xD49A784BCD1B8AFE", "0x4000"]
  }
}
%endif

fldl2t

hlt


================================================
FILE: unittests/ASM/X87/D9_EA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xB8AA3B295C17F0BC", "0x3FFF"]
  }
}
%endif

fldl2e

hlt


================================================
FILE: unittests/ASM/X87/D9_EB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xC90FDAA22168C235", "0x4000"]
  }
}
%endif

fldpi

hlt


================================================
FILE: unittests/ASM/X87/D9_EC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x9A209A84FBCFF799", "0x3FFD"]
  }
}
%endif

fldlg2

hlt


================================================
FILE: unittests/ASM/X87/D9_ED.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0xB17217F7D1CF79AC", "0x3FFE"]
  }
}
%endif

fldln2

hlt


================================================
FILE: unittests/ASM/X87/D9_EE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0", "0"]
  }
}
%endif

fldz

hlt


================================================
FILE: unittests/ASM/X87/D9_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x0000000000000000", "0x0000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
f2xm1

hlt

align 8
data:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  }
}
%endif

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fyl2x
fld1

hlt

align 8
data:
  dt 16.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0xC75922E5F71D2DC6", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fptan

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0xC90FDAA22168C235", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fpatan
fld1

hlt

align 8
data:
  dt 7.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F4.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xF000000000000000", "0xBFFF"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fxtract

hlt

align 8
data:
  dt -15.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F4_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0xFFFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],    
    "MM5":  ["0x8000000000000000", "0xFFFF"],
    "MM4":  ["0x0000000000000000", "0x8000"]
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

section .data
    nzer: dq -0.0

section .text
global _start
_start:
finit
fldz
fxtract ; MM7 is -inf, MM6 is 0.0

lea rdx, [rel nzer]
fld qword [rdx]
fxtract ; MM5 is -inf, MM4 is -0.0

hlt


================================================
FILE: unittests/ASM/X87/D9_F5.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xE666666666666668", "0xBFFE"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

hlt

align 4096
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F5_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xC000000000000000", "0xC000"],
    "MM7":  ["0x8000000000000000", "0x4001"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

lea rdx, [rel result1]
fstp tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

lea rdx, [rel result2]
fstp tword [rdx + 8 * 0]

ffreep st0

lea rdx, [rel result1]
fld tword [rdx + 8 * 0]

lea rdx, [rel result2]
fld tword [rdx + 8 * 0]

; MM6 contains result2 (fprem1)
; MM7 contains result1 (fprem)

hlt

align 4096
data:
  dt 7.0
  dq 0
data2:
  dt 11.0
  dq 0

result1:
  dt 0.0
  dq 0.0
result2:
  dt 0.0
  dq 0.0


================================================
FILE: unittests/ASM/X87/D9_F5_3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0xC000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0xC001"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

lea rdx, [rel result1]
fstp tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

lea rdx, [rel result2]
fstp tword [rdx + 8 * 0]

ffreep st0

lea rdx, [rel result1]
fld tword [rdx + 8 * 0]

lea rdx, [rel result2]
fld tword [rdx + 8 * 0]

; MM6 contains result2 (fprem1)
; MM7 contains result1 (fprem)

hlt

align 4096
data:
  dt 7.0
  dq 0
data2:
  dt -11.0
  dq 0

result1:
  dt 0.0
  dq 0.0
result2:
  dt 0.0
  dq 0.0


================================================
FILE: unittests/ASM/X87/D9_F6.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "7",
    "RBX":  "0",
    "MM0":  "0x3ff0000000000000",
    "MM1":  "0x4070000000000000",
    "MM2":  "0x4060000000000000",
    "MM3":  "0x4050000000000000",
    "MM4":  "0x4040000000000000",
    "MM5":  "0x4030000000000000",
    "MM6":  "0x4020000000000000",
    "MM7":  "0x4000000000000000"
  }
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.
mov rax, 0x3ff0000000000000 ; 1.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4000000000000000 ; 2.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4020000000000000 ; 4.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4030000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4040000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4050000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4060000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4070000000000000
mov [rel temp], rax
fld qword [rel temp]

; Store top in RBX
xor rax, rax
xor rbx, rbx
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fdecstp

; Store top in RAX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x3ff0000000000000
fstp qword [rel stack + 8 * 0]
fstp qword [rel stack + 8 * 1]
fstp qword [rel stack + 8 * 2]
fstp qword [rel stack + 8 * 3]
fstp qword [rel stack + 8 * 4]
fstp qword [rel stack + 8 * 5]
fstp qword [rel stack + 8 * 6]
fstp qword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: dq 0


================================================
FILE: unittests/ASM/X87/D9_F7.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "0",
    "MM0":  "0x4060000000000000",
    "MM1":  "0x4050000000000000",
    "MM2":  "0x4040000000000000",
    "MM3":  "0x4030000000000000",
    "MM4":  "0x4020000000000000",
    "MM5":  "0x4000000000000000",
    "MM6":  "0x3ff0000000000000",
    "MM7":  "0x4070000000000000"
  }
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.
mov rax, 0x3ff0000000000000 ; 1.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4000000000000000 ; 2.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4020000000000000 ; 4.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4030000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4040000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4050000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4060000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4070000000000000
mov [rel temp], rax
fld qword [rel temp]

; Store top in RBX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fincstp

; Store top in RAX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x4060000000000000
fstp qword [rel stack + 8 * 0]
fstp qword [rel stack + 8 * 1]
fstp qword [rel stack + 8 * 2]
fstp qword [rel stack + 8 * 3]
fstp qword [rel stack + 8 * 4]
fstp qword [rel stack + 8 * 5]
fstp qword [rel stack + 8 * 6]
fstp qword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: dq 0


================================================
FILE: unittests/ASM/X87/D9_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8666666666666666", "0x4000"],
    "MM7":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87/D9_F9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fyl2xp1
fld1

hlt

align 8
data:
  dt 15.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FA.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsqrt

hlt

align 8
data:
  dt 16.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FB.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8A51407DA8345C92", "0x3FFE"],
    "MM7":  ["0xD76AA47848677021", "0x3FFE"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsincos

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FC.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x3fff"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f834241 ; 1.02546
mov [rdx + 8 * 0], eax

fld dword [rdx + 8 * 0]

frndint

hlt


================================================
FILE: unittests/ASM/X87/D9_FD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4006"],
    "MM7":  ["0xB000000000000000", "0x4001"]
  }
}
%endif

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fscale

hlt

align 8
data:
  dt 4.0
  dq 0

data2:
  dt 5.5
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FD_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0xD000000000000000", "0xC001"]
  }
}
%endif

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fscale

hlt

align 8
data:
  dt 64.0
  dq 0

data2:
  dt -6.5
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xD76AA47848677021", "0x3FFE"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsin

hlt

align 8
data:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87/D9_FF.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xD51132BA9B902522", "0xBFFD"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fcos

hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DA_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xbfff"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fiadd dword [rdx + 8 * 1]

fstp tword [rel data]

movups xmm0, [rel data]

; Test negative

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fiadd dword [rdx + 8 * 1]

fstp tword [rel data]

movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fimul dword [rdx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fimul dword [rdx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
data2:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI":  "0x18"
  }
}
%endif

mov rdx, 0xe0000000
mov rsi, 0

; Matching positive-positive
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Matching negative-negative
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov eax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching negative-positive
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov eax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching positive-negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

hlt


================================================
FILE: unittests/ASM/X87/DA_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0xBFFF"],
    "XMM1":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisub dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisub dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFF"],
    "XMM1":  ["0xC000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisubr dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisubr dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFE"],
    "XMM1":  ["0x8000000000000000", "0xBFFE"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidiv dword [rdx + 8 * 1]

fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidiv dword [rdx + 8 * 1]

fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidivr dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidivr dword [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DA_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovb st0, st1

fldz
cmp eax, 3
fcmovb st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DA_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 1
cmp eax, 1

fcmove st0, st1

fldz
cmp eax, 0
fcmove st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DA_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovbe st0, st1

fldz
cmp eax, 0
fcmovbe st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DA_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovu st0, st1

fldz
cmp eax, 1
fcmovu st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DA_D9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
fld1
fldz
fldz
fcompp
fld1

hlt


================================================
FILE: unittests/ASM/X87/DA_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
fld1
fldz
fldz
fucompp
fld1

hlt


================================================
FILE: unittests/ASM/X87/DB_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 1024
mov [rdx + 8 * 0], eax

fild dword [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87/DB_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fisttp dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DB_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4009"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fist dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DB_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fistp dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DB_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]

fld tword [rdx + 8 * 0]
hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DB_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fstp tword [rdx + 8 * 0]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87/DB_07_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
lea rax, [rdx + 8 * 0]
fstp tword [rax]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87/DB_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovnb st0, st1

fldz
cmp eax, 3
fcmovnb st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DB_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 1
cmp eax, 1

fcmovne st0, st1

fldz
cmp eax, 0
fcmovne st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DB_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x3FFF"],
    "MM6":  ["0x0000000000000000", "0x0000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovnbe st0, st1

fldz
cmp eax, 0
fcmovnbe st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DB_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x0000000000000000", "0x0000"],
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovnu st0, st1

fldz
cmp eax, 1
fcmovnu st0, st2

hlt


================================================
FILE: unittests/ASM/X87/DB_E2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1"
  }
}
%endif

finit ; IOC is 0
fldz
fldz
fdiv st0, st1 ; IOC is 1

fnstsw ax
and rax, 1
mov rbx, rax ; save IOC to RBX

; Clear
fnclex

fnstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/DB_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x037F"
  }
}
%endif

fninit

; Ensures that fnstcw after fninit sets the correct value
fnstcw [rel control]
mov ax, word [rel control]

hlt

align 4096
control: times 2 db 0 ; Reserve space for the FPU control word


================================================
FILE: unittests/ASM/X87/DB_E3_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

; Tests that fninit clears the status word (which includes the IE flag)
fninit
fldz
fldz
fdiv ; sets IE flag

fninit
fnstsw ax

hlt


================================================
FILE: unittests/ASM/X87/DB_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x1",
    "RDI": "0x40",
    "RSI": "0x45",
    "RBP": "0x45"
  }
}
%endif

mov rsp, 0xe000_1000

lea rdx, [rel qnan]
fld tword [rdx + 8 * 0]

lea rdx, [rel minus_one]
fld tword [rdx + 8 * 0]

lea rdx, [rel two]
fld tword [rdx + 8 * 0]

lea rdx, [rel one]
fld tword [rdx + 8 * 0]
fld tword [rdx + 8 * 0]

; Mask for CF, PF, ZF flags
mov rax, 0b1000101

; Stack:
; st(0) = 1.0
; st(1) = 1.0
; st(2) = 2.0
; st(3) = -1.0
; st(4) = QNaN

; st(0) > st(i)
fucomi st3
pushfq
pop rbx
and rbx, rax

; st(0) < st(i)
fucomi st2
pushfq
pop rcx
and rcx, rax

; st(0) == st(i)
fucomi st1
pushfq
pop rdi
and rdi, rax

; st(i) == NaN
fucomi st4
pushfq
pop rsi
and rsi, rax

lea rdx, [rel qnan]
fld tword [rdx + 8 * 0]

; st(0) == NaN
fucomi st1
pushfq
pop rbp
and rbp, rax

hlt

align 8
one:
  dt 1.0

align 8
two:
  dt 2.0

align 8
minus_one:
  dt -1.0

align 8
qnan:
  dq 0xC000000000000001
  dw 0x7FFF


================================================
FILE: unittests/ASM/X87/DB_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x1",
    "RDI": "0x40"
  }
}
%endif

mov rsp, 0xe000_1000

lea rdx, [rel minus_one]
fld tword [rdx + 8 * 0]

lea rdx, [rel two]
fld tword [rdx + 8 * 0]

lea rdx, [rel one]
fld tword [rdx + 8 * 0]
fld tword [rdx + 8 * 0]

; Mask for CF, PF, ZF flags
mov rax, 0b1000101

; Stack:
; st(0) = 1.0
; st(1) = 1.0
; st(2) = 2.0
; st(3) = -1.0

; st(0) > st(i)
fcomi st3
pushfq
pop rbx
and rbx, rax

; st(0) < st(i)
fcomi st2
pushfq
pop rcx
and rcx, rax

; st(0) == st(i)
fcomi st1
pushfq
pop rdi
and rdi, rax

hlt

align 8
one:
  dt 1.0

align 8
two:
  dt 2.0

align 8
minus_one:
  dt -1.0


================================================
FILE: unittests/ASM/X87/DC_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fadd qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87/DC_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fmul qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87/DC_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fsub qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87/DC_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fsubr qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87/DC_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fdiv qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87/DC_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4001"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fdivr qword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dq 8.0


================================================
FILE: unittests/ASM/X87/DC_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0xA000000000000000", "0x4001"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax
mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fld qword [rdx + 8 * 2]

; fadd st(i), st(0)
fadd st2, st0

hlt


================================================
FILE: unittests/ASM/X87/DC_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4001"],
    "MM7":  ["0x8000000000000000", "0x4002"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fmul st1, st0

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DC_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x1"
  }
}
%endif

; Tests undocumented fcom implementation at 0xdc, 0xd0+i
finit
fld1
fldz
; fcom st1
db 0xdc, 0xd1

fnstsw ax
cmp ah, 0x31
je good
mov rax, 0
hlt
good:
mov rax, 1
hlt


================================================
FILE: unittests/ASM/X87/DC_D9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
; Tests undocumented fcomp implementation at 0xdc, 0xd8+i
finit
fld1
fldz
; fcomp
db 0xdc, 0xd9
fld1

hlt


================================================
FILE: unittests/ASM/X87/DC_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fsubr st1, st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DC_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0xBFFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fsub st1, st0

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DC_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFE"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fdivr st1, st0

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87/DC_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x8000000000000000", "0x4001"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fdiv st1, st0

hlt

align 8
data:
  dt 8.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x8000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]
hlt


================================================
FILE: unittests/ASM/X87/DD_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fisttp qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fst qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fstp qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc90fdaa22168c235", "0x4000"],
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"],
    "XMM7": ["0x0000000000000000", "0x0000"],
    "MM0":  ["0xc90fdaa22168c235", "0x4000"],
    "MM1":  ["0x8000000000000000", "0x4005"],
    "MM2":  ["0x8000000000000000", "0x4004"],
    "MM3":  ["0x8000000000000000", "0x4003"],
    "MM4":  ["0x8000000000000000", "0x4002"],
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x0000000000000000", "0x0000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 2
mov [rdx + 2 * 1], rax
mov rax, 4
mov [rdx + 2 * 2], rax
mov rax, 8
mov [rdx + 2 * 3], rax
mov rax, 16
mov [rdx + 2 * 4], rax
mov rax, 32
mov [rdx + 2 * 5], rax
mov rax, 64
mov [rdx + 2 * 6], rax

fldz
fild word [rdx + 2 * 1]
fild word [rdx + 2 * 2]
fild word [rdx + 2 * 3]
fild word [rdx + 2 * 4]
fild word [rdx + 2 * 5]
fild word [rdx + 2 * 6]
fldpi

o32 fnsave [rdx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o32 frstor [rdx]

movups xmm0, [rdx + (0x1C + 10 * 0)]
movups xmm1, [rdx + (0x1C + 10 * 1)]
movups xmm2, [rdx + (0x1C + 10 * 2)]
movups xmm3, [rdx + (0x1C + 10 * 3)]
movups xmm4, [rdx + (0x1C + 10 * 4)]
movups xmm5, [rdx + (0x1C + 10 * 5)]
movups xmm6, [rdx + (0x1C + 10 * 6)]
movups xmm7, [rdx + (0x1C + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt


================================================
FILE: unittests/ASM/X87/DD_04_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0": ["0xc90fdaa22168c235", "0x4000"],
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"],
    "XMM7": ["0x0000000000000000", "0x0000"],
    "MM0":  ["0xc90fdaa22168c235", "0x4000"],
    "MM1":  ["0x8000000000000000", "0x4005"],
    "MM2":  ["0x8000000000000000", "0x4004"],
    "MM3":  ["0x8000000000000000", "0x4003"],
    "MM4":  ["0x8000000000000000", "0x4002"],
    "MM5":  ["0x8000000000000000", "0x4001"],
    "MM6":  ["0x8000000000000000", "0x4000"],
    "MM7":  ["0x0000000000000000", "0x0000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 2
mov [rdx + 2 * 1], rax
mov rax, 4
mov [rdx + 2 * 2], rax
mov rax, 8
mov [rdx + 2 * 3], rax
mov rax, 16
mov [rdx + 2 * 4], rax
mov rax, 32
mov [rdx + 2 * 5], rax
mov rax, 64
mov [rdx + 2 * 6], rax

fldz
fild word [rdx + 2 * 1]
fild word [rdx + 2 * 2]
fild word [rdx + 2 * 3]
fild word [rdx + 2 * 4]
fild word [rdx + 2 * 5]
fild word [rdx + 2 * 6]
fldpi

o16 fnsave [rdx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o16 frstor [rdx]

movups xmm0, [rdx + (0xE + 10 * 0)]
movups xmm1, [rdx + (0xE + 10 * 1)]
movups xmm2, [rdx + (0xE + 10 * 2)]
movups xmm3, [rdx + (0xE + 10 * 3)]
movups xmm4, [rdx + (0xE + 10 * 4)]
movups xmm5, [rdx + (0xE + 10 * 5)]
movups xmm6, [rdx + (0xE + 10 * 6)]
movups xmm7, [rdx + (0xE + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt


================================================
FILE: unittests/ASM/X87/DD_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF3800",
    "RBX": "0xFFFFFFFFFFFF0000"
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

mov rax, -1
mov rbx, -1
fnstsw [rdx + 8 * 1]

fld dword [rdx + 8 * 0]
fnstsw [rdx + 8 * 2]
mov ax, word [rdx + 8 * 2]
mov bx, word [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DD_C0.asm
================================================
%ifdef CONFIG
{
}
%endif

; Just to ensure execution
ffree st0
ffree st1
ffree st2
ffree st3
ffree st4
ffree st5
ffree st6
ffree st7
hlt


================================================
FILE: unittests/ASM/X87/DD_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

; Tests undocumented fxch implementation at 0xdd, 0xc8+i

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fld dword [rdx + 8 * 1]

db 0xdd, 0xc9

hlt


================================================
FILE: unittests/ASM/X87/DD_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fst st1

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_D0_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "MM0": ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
fst st1  ;; copies st0, i.e. 2.0 to st1
fstp st0 ;; pop, st1 becomes st0

;; ensure st0 has valid tag.
fxam     ;; get if top is valid in C2
fstsw ax ;; store work into ax
shr ax, 10
and ax, 1

hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fstp st1

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DD_E9.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
fld1
fldz
fucomp
fld1

hlt


================================================
FILE: unittests/ASM/X87/DE_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xbfff"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fiadd word [rdx + 8 * 1]

fstp tword [rel data]

movups xmm0, [rel data]

; Test negative

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fiadd word [rdx + 8 * 1]

fstp tword [rel data]

movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fimul word [rdx + 8 * 1]
fstp tword [rel data2]
movups xmm0, [rel data2]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fimul word [rdx + 8 * 1]
fstp tword [rel data2]
movups xmm1, [rel data2]

hlt

align 4096
data2:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI":  "0x18"
  }
}
%endif

mov rdx, 0xe0000000
mov rsi, 0

; Matching positive-positive
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Matching negative-negative
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov ax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching negative-positive
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov ax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching positive-negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

hlt


================================================
FILE: unittests/ASM/X87/DE_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0xBFFF"],
    "XMM1":  ["0xC000000000000000", "0x4000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisub word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisub word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFF"],
    "XMM1":  ["0xC000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisubr word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisubr word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_06.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x3FFE"],
    "XMM1":  ["0x8000000000000000", "0xBFFE"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidiv word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidiv word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000000", "0x4000"],
    "XMM1":  ["0x8000000000000000", "0xC000"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidivr word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm0, [rel data]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidivr word [rdx + 8 * 1]
fstp tword [rel data]
movups xmm1, [rel data]

hlt

align 4096
data:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DE_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0xC000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
faddp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DE_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4002"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fmulp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DE_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x3FFF"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Only tests pop behaviour
; Tests undocumented fcomp implementation at 0xde, 0xd0+i
finit
fld1
fldz
; fcomp
db 0xde, 0xd1
fld1

hlt


================================================
FILE: unittests/ASM/X87/DE_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fsubrp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DE_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0xC000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fsubp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DE_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fdivrp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DE_F8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFE"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

; fdivp 2.0, 4.0
; == st1 = 2.0 / 4.0
fdivp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DF_00.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 1024
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 0 + 2], eax

fild word [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87/DF_01.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fisttp word [rdx + 8 * 0]

mov ax, word [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq -1
  dq -1


================================================
FILE: unittests/ASM/X87/DF_02.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4009"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fist word [rdx + 8 * 1]

fld1

mov eax, 0
mov ax, word [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DF_03.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fistp word [rdx + 8 * 1]

fld1

mov eax, 0
mov ax, word [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DF_04.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0506070801020304", "0x0000000000000012"],
    "XMM1":  ["0x6576879821324354", "0x0000000000000000"],
    "XMM2":  ["0xB90984060D355548", "0x000000000000C03B"],
    "XMM3":  ["0xA83732340C01F070", "0x000000000000C03B"],
    "XMM4":  ["0xFFAA6DA43613FED0", "0x000000000000C03A"],
    "XMM5":  ["0x0000000000000001", "0x0000000000000000"],
    "XMM6":  ["0x0000000000000001", "0x0000000000008000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000008000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000008000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000"],
    "XMM10":  ["0x0000000000000001", "0x0000000000008000"],
    "XMM11":  ["0x0000000000000001", "0x0000000000000000"]
  }
}
%endif

fbld [rel .data_0]
fbstp [rel .res_data_0]
movups xmm0, [rel .res_data_0]

fbld [rel .data_1]
fbstp [rel .res_data_1]
movups xmm1, [rel .res_data_1]

; Check encoding of invalid BCD
fbld [rel .data_2]
fstp tword [rel .res_data_2]
movups xmm2, [rel .res_data_2]

fbld [rel .data_3]
fstp tword [rel .res_data_3]
movups xmm3, [rel .res_data_3]

fbld [rel .data_4]
fstp tword [rel .res_data_4]
movups xmm4, [rel .res_data_4]

; Some special values
fld tword [rel .data_5]
fbstp [rel .res_data_5]
movups xmm5, [rel .res_data_5]

fld tword [rel .data_6]
fbstp [rel .res_data_6]
movups xmm6, [rel .res_data_6]

fld tword [rel .data_7]
fbstp [rel .res_data_7]
movups xmm7, [rel .res_data_7]

; Values that choose +- 0 or +-1 depending on rounding mode
; -1 < F < -0
; +0 < F < +1
fld tword [rel .data_8]
fbstp [rel .res_data_8]
movups xmm8, [rel .res_data_8]

fld tword [rel .data_9]
fbstp [rel .res_data_9]
movups xmm9, [rel .res_data_9]

; Swap control word
fnstcw [rel .cw]
mov ax, [rel .cw]
and ax, ~(3 << 10)
or eax, 1 << 10 ; Round down
mov [rel .cw], ax
fldcw [rel .cw]

fld tword [rel .data_10]
fbstp [rel .res_data_10]
movups xmm10, [rel .res_data_10]

; Swap control word
fnstcw [rel .cw]
mov ax, [rel .cw]
and ax, ~(3 << 10)
or eax, 2 << 10 ; Round up
mov [rel .cw], ax
fldcw [rel .cw]

fld tword [rel .data_11]
fbstp [rel .res_data_11]
movups xmm11, [rel .res_data_11]

; Values that generate Invalicating floating point operation exception
; -inf
; +inf
; Negative value too large for destination format
; Positive value too large for destination format
; NaN
; On IA the indefinite BCD result is still stored to memory

; XXX: We don't support IA on this

hlt

align 4096
.cw:
dw 0

.data_0:
dd 0x01020304
dd 0x05060708
dd 0x09101112
dd 0x13141516
.data_1:
dd 0x21324354
dd 0x65768798
dd 0x00000000
dd 0x00000000
.data_2:
dd 0xFFFFFFFF
dd 0xFFFFFFFF
dd 0xFFFFFFFF
dd 0xFFFFFFFF
.data_3:
dd 0xF0F0F0F0
dd 0xF0F0F0F0
dd 0xF0F0F0F0
dd 0xF0F0F0F0
.data_4:
dd 0x0A0B0C0D
dd 0x0E0FAAAB
dd 0xACADAEAF
dd 0xBABBBCBD
.data_5:
dt 1.0
.data_6:
dt -1.0
.data_7:
dt -0.0
.data_8:
dt -0.5
.data_9:
dt 0.5
.data_10:
dt -0.5
.data_11:
dt 0.5

.res_data_0:
dq 0
dq 0
.res_data_1:
dq 0
dq 0
.res_data_2:
dq 0
dq 0
.res_data_3:
dq 0
dq 0
.res_data_4:
dq 0
dq 0
.res_data_5:
dq 0
dq 0
.res_data_6:
dq 0
dq 0
.res_data_7:
dq 0
dq 0
.res_data_8:
dq 0
dq 0
.res_data_9:
dq 0
dq 0
.res_data_10:
dq 0
dq 0
.res_data_11:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/DF_05.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7": ["0x8000000000000000", "0x4009"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 1024
mov [rdx + 8 * 0], rax

fild qword [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87/DF_07.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov rax, -1
mov [rdx + 8 * 1], rax

fld dword [rdx + 8 * 0]

fistp qword [rdx + 8 * 1]

fld1

mov rax, qword [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87/DF_C0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6":  ["0x8000000000000000", "0x4001"],
    "MM7":  ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax
mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]

; Undocumented x87 instruction
; Sets the tag register to empty for the stack register
; Then pops the stack
ffreep st0
fld qword [rdx + 8 * 2] ; Overwrites previous value

hlt


================================================
FILE: unittests/ASM/X87/DF_C8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x3FFF"],
    "MM7": ["0x8000000000000000", "0x4000"]
  }
}
%endif

; Tests undocumented fxch implementation at 0xdf, 0xc8+i

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fld dword [rdx + 8 * 1]

db 0xdf, 0xc9

hlt


================================================
FILE: unittests/ASM/X87/DF_D0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Tests undocumented fstp implementation at 0xdf, 0xd0+i

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

db 0xdf, 0xd1

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DF_D8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM6": ["0x8000000000000000", "0x4001"],
    "MM7": ["0x8000000000000000", "0x3FFF"]
  }
}
%endif

; Tests undocumented fstp implementation at 0xdf, 0xd8+i

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

db 0xdf, 0xd9

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87/DF_E0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF3800",
    "RBX": "0xFFFFFFFFFFFF0000"
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

mov rax, -1
mov rbx, -1
fnstsw ax
mov bx, ax

fld dword [rdx + 8 * 0]
fnstsw ax

hlt


================================================
FILE: unittests/ASM/X87/DF_E8.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x1",
    "RDI": "0x40",
    "RSI": "0x45",
    "RBP": "0x45"
  }
}
%endif

mov rsp, 0xe000_1000

lea rdx, [rel one]
fld tword [rdx + 8 * 0]

lea rdx, [rel two]
fld tword [rdx + 8 * 0]

; Mask for CF, PF, ZF flags
mov rax, 0b1000101

; st(0) > st(i)
fucomip st1
pushfq
pop rbx
and rbx, rax

lea rdx, [rel minus_one]
fld tword [rdx + 8 * 0]

; st(0) < st(i)
fucomip st1
pushfq
pop rcx
and rcx, rax

lea rdx, [rel one]
fld tword [rdx + 8 * 0]

; st(0) == st(i)
fucomip st1
pushfq
pop rdi
and rdi, rax

lea rdx, [rel qnan]
fld tword [rdx + 8 * 0]

; st(0) == NaN
fucomip st1
pushfq
pop rsi
and rsi, rax

lea rdx, [rel qnan]
fld tword [rdx + 8 * 0]

lea rdx, [rel one]
fld tword [rdx + 8 * 0]

; st(i) == NaN
fucomip st1
pushfq
pop rbp
and rbp, rax

hlt

align 8
one:
  dt 1.0

align 8
two:
  dt 2.0

align 8
minus_one:
  dt -1.0

align 8
qnan:
  dq 0xC000000000000001
  dw 0x7FFF


================================================
FILE: unittests/ASM/X87/DF_F0.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x0",
    "RCX": "0x1",
    "RDI": "0x40"
  }
}
%endif

mov rsp, 0xe000_1000

lea rdx, [rel one]
fld tword [rdx + 8 * 0]

lea rdx, [rel two]
fld tword [rdx + 8 * 0]

; Mask for CF, PF, ZF flags
mov rax, 0b1000101

; st(0) > st(i)
fcomip st1
pushfq
pop rbx
and rbx, rax

lea rdx, [rel minus_one]
fld tword [rdx + 8 * 0]

; st(0) < st(i)
fcomip st1
pushfq
pop rcx
and rcx, rax

lea rdx, [rel one]
fld tword [rdx + 8 * 0]

; st(0) == st(i)
fcomip st1
pushfq
pop rdi
and rdi, rax

hlt

align 8
one:
  dt 1.0

align 8
two:
  dt 2.0

align 8
minus_one:
  dt -1.0


================================================
FILE: unittests/ASM/X87/FISTTP_16bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1"
  }
}
%endif

; Test FISTTP with 16-bit integer store
; FISTTP always truncates toward zero, ignoring rounding control word

finit
fld qword [rel .value]

; Convert to int16 using truncation - 1.9 should become 1, not 2
fisttp word [rel .result]

fstsw ax
and rax, 1

; Load result to verify truncation worked
movzx rbx, word [rel .result]

hlt

align 4096
.value: dq 1.9
.result: dw 0

================================================
FILE: unittests/ASM/X87/FISTTP_16bit_neg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "0xFFFF"
  }
}
%endif

; Test FISTTP with negative value - truncation toward zero
; -1.9 should become -1 (0xFFFF in 16-bit two's complement), not -2

finit
fld qword [rel .value]

fisttp word [rel .result]

fstsw ax
and rax, 1

movzx rbx, word [rel .result]

hlt

align 4096
.value: dq -1.9
.result: dw 0

================================================
FILE: unittests/ASM/X87/FISTTP_32bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1"
  }
}
%endif

; Test FISTTP with 32-bit integer store
; FISTTP always truncates toward zero, ignoring rounding control word

finit
fld qword [rel .value]

; Convert to int32 using truncation - 1.9 should become 1, not 2
fisttp dword [rel .result]

fstsw ax
and rax, 1

; Load result to verify truncation worked
mov ebx, dword [rel .result]

hlt

align 4096
.value: dq 1.9
.result: dd 0

================================================
FILE: unittests/ASM/X87/FISTTP_32bit_neg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "0xFFFFFFFF"
  }
}
%endif

; Test FISTTP with negative value - truncation toward zero
; -1.9 should become -1 (0xFFFFFFFF in 32-bit two's complement), not -2

finit
fld qword [rel .value]

fisttp dword [rel .result]

fstsw ax
and rax, 1

mov ebx, dword [rel .result]

hlt

align 4096
.value: dq -1.9
.result: dd 0

================================================
FILE: unittests/ASM/X87/FISTTP_64bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "1"
  }
}
%endif

; Test FISTTP with 64-bit integer store
; FISTTP always truncates toward zero, ignoring rounding control word

finit
fld qword [rel .value]

; Convert to int64 using truncation - 1.9 should become 1, not 2
fisttp qword [rel .result]

fstsw ax
and rax, 1

; Load result to verify truncation worked
mov rbx, qword [rel .result]

hlt

align 4096
.value: dq 1.9
.result: dq 0

================================================
FILE: unittests/ASM/X87/FISTTP_64bit_neg.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "0xFFFFFFFFFFFFFFFF"
  }
}
%endif

; Test FISTTP with negative value - truncation toward zero
; -1.9 should become -1 (0xFFFFFFFFFFFFFFFF in 64-bit two's complement), not -2

finit
fld qword [rel .value]

fisttp qword [rel .result]

fstsw ax
and rax, 1

mov rbx, qword [rel .result]

hlt

align 4096
.value: dq -1.9
.result: dq 0

================================================
FILE: unittests/ASM/X87/FPREM1_Flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

mov rbx, 0xe0000000
o32 fstenv [rbx]
mov dword [rbx+4], 0xFFFFFFFF ; set status word to all one
o32 fldenv [rbx]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

xor rax, rax
fstsw ax
and rax, 0x400 ; C2 should be set to zero

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87/FPREM_Flags.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

mov rbx, 0xe0000000
o32 fstenv [rbx]
mov dword [rbx+4], 0xFFFFFFFF ; set status word to all one
o32 fldenv [rbx]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

xor rax, rax
fstsw ax
and rax, 0x400 ; C2 should be set to zero

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87/FST_AddrModes.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3f800000",
    "RBX": "0x3f800000",
    "RCX": "0x3f800000",
    "R8": "0x3f800000",
    "R9": "0x3f800000",
    "R10": "0x3f800000"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Setup
fld1
lea rdx, [rel base]
mov rsi, 0x64

; Test fst
fst dword [rdx]
fst dword [rdx + 0xa]
fst dword [rdx + rsi]
fst dword [rdx + rsi * 4]
fst dword [rdx + rsi + 0xa]
fst dword [rdx + rsi * 4 + 0xa]

; Result check
mov eax, dword [rdx]
mov ebx, dword [rdx + 0xa]
mov ecx, dword [rdx + rsi]
mov r8d, dword [rdx + rsi * 4]
mov r9d, dword [rdx + rsi + 0xa]
mov r10d, dword [rdx + rsi * 4 + 0xa]

hlt

align 4096
base:
times 4096 db 0


================================================
FILE: unittests/ASM/X87/FScale-Zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8": "0",
    "R9": "0",
    "R10": "0",
    "R11": "0",
    "R12": "0",
    "R13": "0x8000000000000000"
  }
}
%endif

; scale by zero (st1 == 0)
mov rax, 0
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r8, [rel intstor]

; scale by zero (st1 == 1)
mov rax, 1
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r9, [rel intstor]

; scale by zero (st1 == 100)
mov rax, 100
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r10, [rel intstor]

; scale by zero (st1 == 1024)
mov rax, 1024
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r11, [rel intstor]

; scale by zero (st1 == 1048576)
mov rax, 1048576
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r12, [rel intstor]

; tests scaling negative zero
mov rax, 1048576
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fld qword [rel neg_zero]
fscale
fst qword [rel intstor]
mov r13, [rel intstor]

hlt

align 4096
neg_zero: dq 0x8000000000000000   ; -0.0

intstor: dq 0


================================================
FILE: unittests/ASM/X87/FScaleFXtract.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8": "1"
  }
}
%endif
; ,
;     "R9": "1",
;     "R10": "1",
;     "R11": "1",
;     "R12": "1"
section .data
    num0: dq 0.0
    num1: dq 125.78
    num2: dq 1023.12
    num3: dq -23487.152
    num4: dq -1230192.123

;; Tests the FScale / FExtract inverse behaviour
section .text
    global _start
_start:
    
; num0 == 0.0
finit
fld qword [rel num0]
fld st0
fxtract
fscale
fstp st1  ; at this point st0 and st1 should be the same
fcom
fnstsw ax
and ax, 0x4500
cmp ax, 0x4000
setz r8b

; ; num1 == 125.78
; finit
; fld qword [rel num1]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r9b

; ; num2 == 1023.12
; finit
; fld qword [rel num2]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r10b

; ; num3 == -23487.152
; finit
; fld qword [rel num3]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r11b

; ; num4 == -1230192.123
; finit
; fld qword [rel num4]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r12b

hlt


================================================
FILE: unittests/ASM/X87/FXAM_Push.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "8"
  }
}
%endif

mov rdx, 0xe0000000

; This behaviour was seen around Wine 32-bit libraries
; Anything doing a call to a double application would spin
; the x87 stack on to the stack looking for fxam to return empty
; Empty in this case is that C0 and C3 is set while C2 is not

fninit
; Fill the x87 stack
fldz
fldz
fldz
fldz
fldz
fldz
fldz
fldz

mov eax, 0
mov ecx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp qword [rdx + rcx * 8]
fwait
inc ecx
jmp .ExamineStack

.Done:

; Save how many we stored
mov eax, ecx

; Now fill with "Garbage"
fld1
fld1
fld1
fld1
fld1
fld1
fld1
fld1

.Reload:
; Now reload the stack
dec ecx
fld qword [rdx + rcx * 8]
cmp ecx, 0x0
jne .Reload;

hlt


================================================
FILE: unittests/ASM/X87/FXAM_Push_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0"
  }
}
%endif

mov rdx, 0xe0000000

; This behaviour was seen around Wine 32-bit libraries
; Anything doing a call to a double application would spin
; the x87 stack on to the stack looking for fxam to return empty
; Empty in this case is that C0 and C3 is set while C2 is not

fninit
; Empty stack to make sure we don't push anything

mov eax, 0
mov ecx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp qword [rdx + rcx * 8]
fwait
inc ecx
jmp .ExamineStack

.Done:

hlt


================================================
FILE: unittests/ASM/X87/FXAM_Push_Simple.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "8"
  }
}
%endif

fninit
fld1
fld1
fld1
fld1
fld1
fld1
fld1
fld1

mov ebx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp st0
fwait
inc ebx
jmp .ExamineStack
.Done:
mov eax, ebx
hlt


================================================
FILE: unittests/ASM/X87/FXAM_Push_Simple_2.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "8"
  }
}
%endif

mov rdx, 0xe0000000

; This behaviour was seen around Wine 32-bit libraries
; Anything doing a call to a double application would spin
; the x87 stack on to the stack looking for fxam to return empty
; Empty in this case is that C0 and C3 is set while C2 is not

fninit
; Fill the x87 stack
fldz
fldz
fldz
fldz
fldz
fldz
fldz
fldz

mov eax, 0
mov ecx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp qword [rdx + rcx * 8]
fwait
inc ecx
jmp .ExamineStack

.Done:
; Save how many we stored
mov eax, ecx
hlt


================================================
FILE: unittests/ASM/X87/FXAM_Simple.asm
================================================
;; Simpler versions of FXAM_Push* tests.
;; In hostrunner tests this will fail because we mentioned below there's no support
;; for the zero flag. In hostrunner RCX should contain 0x4000 instead of 0x400.
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x6",
    "RBX": "0x0400",
    "RCX": "0x0400",
    "RDX": "0x4100"
  }
}
%endif

mov rdx, 0xe0000000

fninit
;; Before adding anything to the stack, lets examine it.
;; The result should be empty.
fxam
fwait

fnstsw ax 
and ax, 0x4500 ; should be 0x4100 for zero
mov edx, eax

fldz
fxam 
fwait 

fnstsw ax
and ax, 0x4500 ; should be 0x4000 for zero, but there's no support for it at the moment, so it'll return 0x0400 as it does for a normal number.
mov ecx, eax

fld1
fxam
fwait

fnstsw ax
mov ebx, eax
and ebx, 0x4500 ; should be 0x0400 for normal

;; Top should be 6
;; right shift status word by 11 and and with 0x7.
shr eax, 11
and eax, 0x7


hlt


================================================
FILE: unittests/ASM/X87/LoadAtBoundary.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "MM7":  ["0x5354555657584142", "0x0000000000005152"],
    "MM6":  ["0xe94de5eae34fc1c0", "0x0000000000004039"]
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

finit ; enters x87 state

mov rax, 0x100000000
mov rbx, 0x4142434445464748
mov rcx, 0x5152535455565758
mov rdx, (0x100000000 + 0x1000 - 16)

mov [rdx], rbx
mov [rdx + 8], rcx

mov rdx, 0x100000000 + 0x1000

; Do an 80-bit load at the edge of a page.
; Ensuring tword loads don't extend past the end of a page.
fld tword [rdx - 10]

; Do an 80-bit BCD load at the edge of a page.
fbld [rdx - 10]

; Do a BCD store
fbstp [rdx - 10]

; Regular 80-bit store
fstp tword [rdx - 10]

; Loads again to get register state.
fld tword [rdx - 10]
fbld [rdx - 10]

hlt


================================================
FILE: unittests/ASM/X87/Memcopy.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff8000000000000",
    "RBX": "0x3ff8000000000000"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

mov rdx, 0x100000000
mov rax, 0x3ff8000000000000 ; 1.5
mov [rdx], rax

fld qword [rdx]
fstp qword [rdx + 8]

mov rbx, [rdx + 8]
hlt


================================================
FILE: unittests/ASM/X87/MemcopyWithCPUID.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x8000000000000000",
    "RCX": "0x3fff"
  }
}
%endif

; Related to #4274 - ensures that if cpuid clobbers the predicate register,
; we reset the predicate cache.
lea r8, [rel data]
fld tword [r8]

mov rax, 0x0
cpuid ; Will this instruction clobber the predicate register?

fstp tword [rel data2]

mov rbx, [rel data2]
mov rcx, [rel data2+8]
hlt

align 4096

data:
  dt 1.0

align 8

data2:
  times 16 db 0


================================================
FILE: unittests/ASM/X87/Rounding.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x212121211121",
    "RCX": "0xfffefffeffffffff",
    "RDX": "0xfffffffeffffffff",
    "RSI": "0xfffefffeffffffff"
  }
}
%endif

; Rounding tests to ensure rounding modes are actually working

;; Mid-point
finit
fld qword [rel midpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

;; Slightly above midpoint
finit
fld qword [rel samidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

;; Slightly below midpoint
finit
fld qword [rel sbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]


;;; Negative tests
;; Mid-point
finit
fld qword [rel nmidpoint]

; Default rounding is 00 - round to nearest
fist word [rel tmp]
mov ax, word [rel tmp]
or rcx, rax 
shl rcx, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax
shl rcx, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax
shl rcx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax

;; Slightly above midpoint
finit
fld qword [rel nsamidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov dx, word [rel tmp]
shl rdx, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax
shl rdx, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax
shl rdx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax

;; Slightly below midpoint
finit
fld qword [rel nsbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov si, word [rel tmp]
shl rsi, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax
shl rsi, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax
shl rsi, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax

hlt

align 4096
midpoint:
  dq 1.5
samidpoint:
  dq 1.50001
sbmidpoint:
  dq 1.49999
nmidpoint:
  dq -1.5
nsamidpoint:
  dq -1.49999
nsbmidpoint:
  dq -1.50001
tmp: dq 0


================================================
FILE: unittests/ASM/X87/StoreAtBoundary.asm
================================================
%ifdef CONFIG
{
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

finit ; enters x87 state

mov rax, 0x100000000
mov rbx, 0x4142434445464748
mov rcx, 0x5152535455565758
mov rdx, (0x100000000 + 0x1000 - 16)

mov [rdx], rbx
mov [rdx + 8], rcx

mov rdx, 0x100000000 + 0x1000

; Load the data in to an x87 register for storing.
fld tword [rdx - 16]
fld tword [rdx - 16]

; Do an 80-bit store at the edge of a page.
; Ensuring tword stores don't extend past the end of a page.
; If storing past the end of the page, then an unhandled SIGSEGV will occur.
fstp tword [rdx - 10]

; Do an 80-bit bcd store at the edge of a page.
fbstp [rdx - 10]

hlt


================================================
FILE: unittests/ASM/X87/X87MMXInteraction.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0",
    "RBX": "0x0",
    "RCX": "0x8000000000000000",
    "RDX": "0x3FFF",
    "R8": "0xc90fdaa22168c235",
    "R9": "0x4000",
    "R10": "0xc90fdaa22168c235",
    "R11": "0xFFFF"
  }
}
%endif

; Checks that after moving from X87 to MMX States, the
; values are correct and that MMX register writes, puts the top 16 bits as
; all 1s.
finit ; enters x87 state

fldpi ; goes in mm7
fld1  ; goes in mm6

movq mm5, mm7 ; enters mmx state, so 1 is now in st6 and pi in st7, while st5 has a broken pi.
o32 fnsave [rel x87env]

; Top into eax
mov eax, dword [rel x87env + 4]
and eax, 0x3800
shr eax, 11 ; top in eax

; Tag into ebx
mov bx, word [rel x87env + 8]

; st6 is 1
mov rcx, qword [rel x87env + 88]
mov dx, word [rel x87env + 96]

; st7 is pi
mov r8, qword [rel x87env + 98]
mov r9w, word [rel x87env + 106]

; st5 is broken pi
mov r10, qword [rel x87env + 78]
mov r11w, word [rel x87env + 86]

hlt

align 4096
x87env: times 108 db 0


================================================
FILE: unittests/ASM/X87/invalid_div_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test 0.0 / 0.0 = Invalid Operation (should set bit 0 of status word)
fldz
fldz
fdiv

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/invalid_fcos_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test fcos(+infinity) = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov rax, 0x8000000000000000
mov [rel .pos_inf], rax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fcos

fstsw ax
and rax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/ASM/X87/invalid_fist_nan.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FIST with NaN input = Invalid Operation (should set bit 0 of status word)
; Create NaN by computing 0.0 / 0.0
fldz
fldz
fdiv

; Try to convert NaN to integer - this should set Invalid Operation
lea rbx, [rel data]
fist dword [rbx]

fstsw ax
and rax, 1

hlt

align 4096
data:
  dd 0


================================================
FILE: unittests/ASM/X87/invalid_fist_overflow.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FIST with value too large for 32-bit integer = Invalid Operation
; Load a large floating point value that exceeds INT32_MAX
lea rdx, [rel large_value]
fld tword [rdx]

; Try to convert to 32-bit integer - should set Invalid Operation
lea rbx, [rel data]
fist dword [rbx]

fstsw ax
and rax, 1

hlt

align 4096
large_value:
  dt 1e20
data:
  dd 0


================================================
FILE: unittests/ASM/X87/invalid_fist_overflow_16bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FIST with 16-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a large number that will overflow int16

; Load 2^30 (larger than int16 range: max int16 = 32767, 2^30 = 1073741824)
finit
fild dword [rel .thirty]
fld1
fscale

; Try to convert to int16 - this should overflow and be invalid
fistp word [rel .dummy]

fstsw ax
and rax, 1

hlt

align 4096
.thirty: dq 30
.dummy: dw 0


================================================
FILE: unittests/ASM/X87/invalid_fist_overflow_32bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FIST with 32-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a very large number that will overflow int32

; Load 2^50 (larger than int32 range)
finit
fild dword [rel .fifty]
fld1
fscale

; Try to convert to int32 - this should overflow and be invalid
fistp dword [rel .dummy]

fstsw ax
and rax, 1

hlt

align 4096
.fifty: dq 50
.dummy: dd 0


================================================
FILE: unittests/ASM/X87/invalid_fist_overflow_64bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FIST with 64-bit overflow = Invalid Operation (should set bit 0 of status word)
; Create a very large number that will overflow int64

; Load 2^75 (larger than int64 range)
finit
fild dword [rel .seventyfive]
fld1
fscale

; Try to convert to int64 - this should overflow and be invalid
fistp qword [rel .dummy]

fstsw ax
and rax, 1

hlt

align 4096
.seventyfive: dq 75
.dummy: dq 0


================================================
FILE: unittests/ASM/X87/invalid_fprem_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test FPREM with simple operands first
finit

; Load simple operands: fprem(0, 1) should be valid and return 0
fldz
fld1

; Do FPREM: ST(0) = fprem(ST(0), ST(1)) = fprem(1.0, 0.0)
; fprem(1.0, 0.0) should set Invalid Operation because divisor is zero
fprem

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/invalid_fptan_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test fptan(+infinity) = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov rax, 0x8000000000000000
mov [rel .pos_inf], rax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fptan

fstsw ax
and rax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/ASM/X87/invalid_fsin_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test fsin(+infinity) = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov rax, 0x8000000000000000
mov [rel .pos_inf], rax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fsin

fstsw ax
and rax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/ASM/X87/invalid_fsin_neg_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test fsin(-infinity) = Invalid Operation (should set bit 0 of status word)
; Load negative infinity: exponent all 1s, mantissa 0x8000000000000000, sign bit set
mov rax, 0x8000000000000000
mov [rel .neg_inf], rax
mov ax, 0xFFFF
mov [rel .neg_inf + 8], ax

fld tword [rel .neg_inf]
fsin

fstsw ax
and rax, 1

hlt

align 4096
.neg_inf:
dq 0
dw 0


================================================
FILE: unittests/ASM/X87/invalid_fsincos_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test fsincos(+infinity) = Invalid Operation (should set bit 0 of status word)
; Load positive infinity: exponent all 1s, mantissa 0x8000000000000000
mov rax, 0x8000000000000000
mov [rel .pos_inf], rax
mov ax, 0x7FFF
mov [rel .pos_inf + 8], ax

fld tword [rel .pos_inf]
fsincos

fstsw ax
and rax, 1

hlt

align 4096
.pos_inf:
dq 0
dw 0


================================================
FILE: unittests/ASM/X87/invalid_infinity_fsubr_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test ∞ - ∞ using FSUB
fld1
fldz
fdiv ; st0 = +∞

; duplicate +infinity
fld st0

; Reverse subtract ∞ - ∞ using FSUBR - this should be invalid
fsubr

fstsw ax
and rax, 1

hlt

================================================
FILE: unittests/ASM/X87/invalid_infinity_mul_zero.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test ∞ × 0 = Invalid Operation (should set bit 0 of status word)
fld1
fldz
fdiv ; st0 = +∞

; Load zero for multiplication
fldz

; Multiply infinity by zero - this should be invalid
fmul

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/invalid_infinity_ops.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test ∞ - ∞ = Invalid Operation (should set bit 0 of status word)
fld1
fldz
fdiv ; st0 = +∞

; Duplicate infinity on stack
fld st0

; Create -infinity by changing sign
fchs

; Now compute +∞ + (-∞) which should be invalid
fadd

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/invalid_infinity_sub_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test ∞ - ∞ = Invalid Operation (should set bit 0 of status word)
; Create +infinity by dividing 1.0 by 0.0
fld1
fldz
fdiv

; Create +infinity by dividing 1.0 by 0.0 
fld1
fldz
fdiv

; Subtract +∞ - ∞ - this should be invalid
fsub

fstsw ax
and rax, 1

hlt

================================================
FILE: unittests/ASM/X87/invalid_neg_infinity_sub_neg_infinity.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test (-∞) - (-∞) = Invalid Operation (should set bit 0 of status word)
fld1
fchs
fldz
fdiv ; st0 = -∞

; Duplicate -infinity on stack
fld st0

; Subtract (-∞) - (-∞) - this should be invalid
fsub

fstsw ax
and rax, 1

hlt

================================================
FILE: unittests/ASM/X87/invalid_reduced_precision.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test Invalid Operation with reduced precision (64-bit)
; Set precision control to 64-bit (PC = 10b)
fnstcw [rel saved_cw]
mov ax, [rel saved_cw]
and ax, 0xFCFF
or ax, 0x0200
mov [rel new_cw], ax
fldcw [rel new_cw]

; Perform invalid operation: 0.0 / 0.0
fldz
fldz
fdiv

fstsw ax
and rax, 1

; Restore original control word
fldcw [rel saved_cw]

hlt

align 4096
saved_cw:  dw 0
new_cw:    dw 0


================================================
FILE: unittests/ASM/X87/invalid_simple_test.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test with a simple 0/0 that we know works
fldz
fldz
fdiv

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/invalid_sqrt_negative.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  }
}
%endif

; Test sqrt(-1.0) = Invalid Operation (should set bit 0 of status word)
fld1
fchs
fsqrt

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87/precision_test_fabs.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111111", "0x3fff"],
    "XMM2":  ["0x8111111111111111", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111111", "0x3fff"],
    "XMM5":  ["0x8111111111111111", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111111", "0x3fff"],
    "XMM8":  ["0x8111111111111111", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111111", "0x3fff"],
    "XMM11":  ["0x8111111111111111", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fadd.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fcos.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM1":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM2":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM3":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM4":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM5":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM6":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM7":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM8":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM9":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM10":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM11":  ["0x86b5441382debef4", "0x3ffe"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fdiv.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fdivr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fmul.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fprem.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8888888888888880", "0x3ff9"],
    "XMM1":  ["0x8888888888888880", "0x3ff9"],
    "XMM2":  ["0x8888888888888880", "0x3ff9"],
    "XMM3":  ["0x8888888888888880", "0x3ff9"],
    "XMM4":  ["0x8888888888888880", "0x3ff9"],
    "XMM5":  ["0x8888888888888880", "0x3ff9"],
    "XMM6":  ["0x8888888888888880", "0x3ff9"],
    "XMM7":  ["0x8888888888888880", "0x3ff9"],
    "XMM8":  ["0x8888888888888880", "0x3ff9"],
    "XMM9":  ["0x8888888888888880", "0x3ff9"],
    "XMM10":  ["0x8888888888888880", "0x3ff9"],
    "XMM11":  ["0x8888888888888880", "0x3ff9"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fprem1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8888888888888880", "0x3ff9"],
    "XMM1":  ["0x8888888888888880", "0x3ff9"],
    "XMM2":  ["0x8888888888888880", "0x3ff9"],
    "XMM3":  ["0x8888888888888880", "0x3ff9"],
    "XMM4":  ["0x8888888888888880", "0x3ff9"],
    "XMM5":  ["0x8888888888888880", "0x3ff9"],
    "XMM6":  ["0x8888888888888880", "0x3ff9"],
    "XMM7":  ["0x8888888888888880", "0x3ff9"],
    "XMM8":  ["0x8888888888888880", "0x3ff9"],
    "XMM9":  ["0x8888888888888880", "0x3ff9"],
    "XMM10":  ["0x8888888888888880", "0x3ff9"],
    "XMM11":  ["0x8888888888888880", "0x3ff9"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fscale.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8222222222222222", "0x4000"],
    "XMM1":  ["0x8222222222222222", "0x4000"],
    "XMM2":  ["0x8222222222222222", "0x4000"],
    "XMM3":  ["0x8222222222222222", "0x4000"],
    "XMM4":  ["0x8222222222222222", "0x4000"],
    "XMM5":  ["0x8222222222222222", "0x4000"],
    "XMM6":  ["0x8222222222222222", "0x4000"],
    "XMM7":  ["0x8222222222222222", "0x4000"],
    "XMM8":  ["0x8222222222222222", "0x4000"],
    "XMM9":  ["0x8222222222222222", "0x4000"],
    "XMM10":  ["0x8222222222222222", "0x4000"],
    "XMM11":  ["0x8222222222222222", "0x4000"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fsin.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM1":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM2":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM3":  ["0xd9b11c39ec002fd8", "0x3ffe"],
    "XMM4":  ["0xd9b11c39ec002fd8", "0x3ffe"],
    "XMM5":  ["0xd9b11c39ec002fd8", "0x3ffe"],
    "XMM6":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM7":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM8":  ["0xd9b11c39ec002fd9", "0x3ffe"],
    "XMM9":  ["0xd9b11c39ec002fd8", "0x3ffe"],
    "XMM10":  ["0xd9b11c39ec002fd8", "0x3ffe"],
    "XMM11":  ["0xd9b11c39ec002fd8", "0x3ffe"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fsqrt.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8000000000000001", "0x3fff"],
    "XMM1":  ["0x8000000000000800", "0x3fff"],
    "XMM2":  ["0x8000010000000000", "0x3fff"],
    "XMM3":  ["0x8000000000000000", "0x3fff"],
    "XMM4":  ["0x8000000000000000", "0x3fff"],
    "XMM5":  ["0x8000000000000000", "0x3fff"],
    "XMM6":  ["0x8000000000000001", "0x3fff"],
    "XMM7":  ["0x8000000000000800", "0x3fff"],
    "XMM8":  ["0x8000010000000000", "0x3fff"],
    "XMM9":  ["0x8000000000000000", "0x3fff"],
    "XMM10":  ["0x8000000000000000", "0x3fff"],
    "XMM11":  ["0x8000000000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_80bit]
fsqrt
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_64bit]
fsqrt
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_32bit]
fsqrt
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_80bit]
fsqrt
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_64bit]
fsqrt
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_32bit]
fsqrt
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_80bit]
fsqrt
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_64bit]
fsqrt
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_32bit]
fsqrt
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_80bit]
fsqrt
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_64bit]
fsqrt
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_32bit]
fsqrt
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_80bit:
dq 0x8000_0000_0000_0002
dw 0x3fff

.source_64bit:
dq 0x8000_0000_0000_1000
dw 0x3fff

.source_32bit:
dq 0x8000_0200_0000_0000
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fsub.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fsubr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0x3fff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_ftan.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM1":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM2":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM3":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM4":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM5":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM6":  ["0xced9f672ba44b54a", "0x3fff"],
    "XMM7":  ["0xced9f672ba44b54a", "0x3fff"],
    "XMM8":  ["0xced9f672ba44b54a", "0x3fff"],
    "XMM9":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM10":  ["0xced9f672ba44b549", "0x3fff"],
    "XMM11":  ["0xced9f672ba44b549", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fyl2x.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc333333333333333", "0x4001"],
    "XMM1":  ["0xc333333333333333", "0x4001"],
    "XMM2":  ["0xc333333333333333", "0x4001"],
    "XMM3":  ["0xc333333333333333", "0x4001"],
    "XMM4":  ["0xc333333333333333", "0x4001"],
    "XMM5":  ["0xc333333333333333", "0x4001"],
    "XMM6":  ["0xc333333333333333", "0x4001"],
    "XMM7":  ["0xc333333333333333", "0x4001"],
    "XMM8":  ["0xc333333333333333", "0x4001"],
    "XMM9":  ["0xc333333333333333", "0x4001"],
    "XMM10":  ["0xc333333333333333", "0x4001"],
    "XMM11":  ["0xc333333333333333", "0x4001"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
.source_1:
dq 64

; Positive
.source_2:
dq 0x8222_2222_2222_2222
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_fyl2xp1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM1":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM2":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM3":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM4":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM5":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM6":  ["0xc3ed7db72edb35db", "0x4001"],
    "XMM7":  ["0xc3ed7db72edb35db", "0x4001"],
    "XMM8":  ["0xc3ed7db72edb35db", "0x4001"],
    "XMM9":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM10":  ["0xc3ed7db72edb35da", "0x4001"],
    "XMM11":  ["0xc3ed7db72edb35da", "0x4001"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
.source_1:
dq 64

; Positive
.source_2:
dq 0x8222_2222_2222_2222
dw 0x3fff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fabs.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111111", "0x3fff"],
    "XMM2":  ["0x8111111111111111", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111111", "0x3fff"],
    "XMM5":  ["0x8111111111111111", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111111", "0x3fff"],
    "XMM8":  ["0x8111111111111111", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111111", "0x3fff"],
    "XMM11":  ["0x8111111111111111", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fabs
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fabs
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fabs
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fabs
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fadd.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
faddp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fcos.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM1":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM2":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM3":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM4":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM5":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM6":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM7":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM8":  ["0x86b5441382debef5", "0x3ffe"],
    "XMM9":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM10":  ["0x86b5441382debef4", "0x3ffe"],
    "XMM11":  ["0x86b5441382debef4", "0x3ffe"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fcos
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fcos
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fcos
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fcos
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fdiv.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fdivp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fdivr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fdivrp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fmul.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld1
fmulp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fprem.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8888888888888880", "0xbff9"],
    "XMM1":  ["0x8888888888888880", "0xbff9"],
    "XMM2":  ["0x8888888888888880", "0xbff9"],
    "XMM3":  ["0x8888888888888880", "0xbff9"],
    "XMM4":  ["0x8888888888888880", "0xbff9"],
    "XMM5":  ["0x8888888888888880", "0xbff9"],
    "XMM6":  ["0x8888888888888880", "0xbff9"],
    "XMM7":  ["0x8888888888888880", "0xbff9"],
    "XMM8":  ["0x8888888888888880", "0xbff9"],
    "XMM9":  ["0x8888888888888880", "0xbff9"],
    "XMM10":  ["0x8888888888888880", "0xbff9"],
    "XMM11":  ["0x8888888888888880", "0xbff9"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fprem1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8888888888888880", "0xbff9"],
    "XMM1":  ["0x8888888888888880", "0xbff9"],
    "XMM2":  ["0x8888888888888880", "0xbff9"],
    "XMM3":  ["0x8888888888888880", "0xbff9"],
    "XMM4":  ["0x8888888888888880", "0xbff9"],
    "XMM5":  ["0x8888888888888880", "0xbff9"],
    "XMM6":  ["0x8888888888888880", "0xbff9"],
    "XMM7":  ["0x8888888888888880", "0xbff9"],
    "XMM8":  ["0x8888888888888880", "0xbff9"],
    "XMM9":  ["0x8888888888888880", "0xbff9"],
    "XMM10":  ["0x8888888888888880", "0xbff9"],
    "XMM11":  ["0x8888888888888880", "0xbff9"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fprem1
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fscale.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8222222222222222", "0xc000"],
    "XMM1":  ["0x8222222222222222", "0xc000"],
    "XMM2":  ["0x8222222222222222", "0xc000"],
    "XMM3":  ["0x8222222222222222", "0xc000"],
    "XMM4":  ["0x8222222222222222", "0xc000"],
    "XMM5":  ["0x8222222222222222", "0xc000"],
    "XMM6":  ["0x8222222222222222", "0xc000"],
    "XMM7":  ["0x8222222222222222", "0xc000"],
    "XMM8":  ["0x8222222222222222", "0xc000"],
    "XMM9":  ["0x8222222222222222", "0xc000"],
    "XMM10":  ["0x8222222222222222", "0xc000"],
    "XMM11":  ["0x8222222222222222", "0xc000"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld1
fld tword [rel .source_1]
fscale
fxch
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fsin.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM1":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM2":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM3":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM4":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM5":  ["0xd9b11c39ec002fd9", "0xbffe"],
    "XMM6":  ["0xd9b11c39ec002fd8", "0xbffe"],
    "XMM7":  ["0xd9b11c39ec002fd8", "0xbffe"],
    "XMM8":  ["0xd9b11c39ec002fd8", "0xbffe"],
    "XMM9":  ["0xd9b11c39ec002fd8", "0xbffe"],
    "XMM10":  ["0xd9b11c39ec002fd8", "0xbffe"],
    "XMM11":  ["0xd9b11c39ec002fd8", "0xbffe"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fsin
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fsin
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fsin
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fsin
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fsub.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0xbfff"],
    "XMM1":  ["0x8111111111111000", "0xbfff"],
    "XMM2":  ["0x8111110000000000", "0xbfff"],
    "XMM3":  ["0x8111111111111111", "0xbfff"],
    "XMM4":  ["0x8111111111111800", "0xbfff"],
    "XMM5":  ["0x8111120000000000", "0xbfff"],
    "XMM6":  ["0x8111111111111111", "0xbfff"],
    "XMM7":  ["0x8111111111111000", "0xbfff"],
    "XMM8":  ["0x8111110000000000", "0xbfff"],
    "XMM9":  ["0x8111111111111111", "0xbfff"],
    "XMM10":  ["0x8111111111111000", "0xbfff"],
    "XMM11":  ["0x8111110000000000", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fsubr.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x8111111111111111", "0x3fff"],
    "XMM1":  ["0x8111111111111000", "0x3fff"],
    "XMM2":  ["0x8111110000000000", "0x3fff"],
    "XMM3":  ["0x8111111111111111", "0x3fff"],
    "XMM4":  ["0x8111111111111000", "0x3fff"],
    "XMM5":  ["0x8111110000000000", "0x3fff"],
    "XMM6":  ["0x8111111111111111", "0x3fff"],
    "XMM7":  ["0x8111111111111800", "0x3fff"],
    "XMM8":  ["0x8111120000000000", "0x3fff"],
    "XMM9":  ["0x8111111111111111", "0x3fff"],
    "XMM10":  ["0x8111111111111000", "0x3fff"],
    "XMM11":  ["0x8111110000000000", "0x3fff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fld tword [rel .source_zero]
fsubrp
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Negative
.source_1:
dq 0x8111_1111_1111_1111
dw 0xbfff

.source_zero:
dq 0x0
dq 0x0

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_ftan.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM1":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM2":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM3":  ["0xced9f672ba44b54a", "0xbfff"],
    "XMM4":  ["0xced9f672ba44b54a", "0xbfff"],
    "XMM5":  ["0xced9f672ba44b54a", "0xbfff"],
    "XMM6":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM7":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM8":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM9":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM10":  ["0xced9f672ba44b549", "0xbfff"],
    "XMM11":  ["0xced9f672ba44b549", "0xbfff"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_1]
fptan
fstp st0
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
; Positive
.source_1:
dq 0x8222_2222_2222_2222
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fyl2x.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc333333333333333", "0xc001"],
    "XMM1":  ["0xc333333333333333", "0xc001"],
    "XMM2":  ["0xc333333333333333", "0xc001"],
    "XMM3":  ["0xc333333333333333", "0xc001"],
    "XMM4":  ["0xc333333333333333", "0xc001"],
    "XMM5":  ["0xc333333333333333", "0xc001"],
    "XMM6":  ["0xc333333333333333", "0xc001"],
    "XMM7":  ["0xc333333333333333", "0xc001"],
    "XMM8":  ["0xc333333333333333", "0xc001"],
    "XMM9":  ["0xc333333333333333", "0xc001"],
    "XMM10":  ["0xc333333333333333", "0xc001"],
    "XMM11":  ["0xc333333333333333", "0xc001"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2x
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
.source_1:
dq 64

; Negative
.source_2:
dq 0x8222_2222_2222_2222
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/precision_test_neg_fyl2xp1.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM1":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM2":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM3":  ["0xc3ed7db72edb35db", "0xc001"],
    "XMM4":  ["0xc3ed7db72edb35db", "0xc001"],
    "XMM5":  ["0xc3ed7db72edb35db", "0xc001"],
    "XMM6":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM7":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM8":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM9":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM10":  ["0xc3ed7db72edb35da", "0xc001"],
    "XMM11":  ["0xc3ed7db72edb35da", "0xc001"]
  }
}
%endif

%include "x87cw.mac"

mov rsp, 0xe000_1000

finit ; enters x87 state

; 80-bit mode, round-nearest
set_cw_precision_rounding x87_prec_80, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_1]

; 64-bit mode, round-nearest
set_cw_precision_rounding x87_prec_64, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_2]

; 32-bit mode, round-nearest
set_cw_precision_rounding x87_prec_32, x87_round_nearest
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_3]

; 80-bit mode, round-down
set_cw_precision_rounding x87_prec_80, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_4]

; 64-bit mode, round-down
set_cw_precision_rounding x87_prec_64, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_5]

; 32-bit mode, round-down
set_cw_precision_rounding x87_prec_32, x87_round_down
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_6]

; 80-bit mode, round-up
set_cw_precision_rounding x87_prec_80, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_7]

; 64-bit mode, round-up
set_cw_precision_rounding x87_prec_64, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_8]

; 32-bit mode, round-up
set_cw_precision_rounding x87_prec_32, x87_round_up
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_9]

; 80-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_80, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_10]

; 64-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_64, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_11]

; 32-bit mode, round-towards_zero
set_cw_precision_rounding x87_prec_32, x87_round_towards_zero
fld tword [rel .source_2]
fild qword [rel .source_1]
fyl2xp1
fstp tword [rel .result_12]

; Fetch results
movups xmm0, [rel .result_1]
movups xmm1, [rel .result_2]
movups xmm2, [rel .result_3]
movups xmm3, [rel .result_4]
movups xmm4, [rel .result_5]
movups xmm5, [rel .result_6]
movups xmm6, [rel .result_7]
movups xmm7, [rel .result_8]
movups xmm8, [rel .result_9]
movups xmm9, [rel .result_10]
movups xmm10, [rel .result_11]
movups xmm11, [rel .result_12]

hlt

align 4096
.source_1:
dq 64

; Negative
.source_2:
dq 0x8222_2222_2222_2222
dw 0xbfff

.result_1:
dq 0
dq 0

.result_2:
dq 0
dq 0

.result_3:
dq 0
dq 0

.result_4:
dq 0
dq 0

.result_5:
dq 0
dq 0

.result_6:
dq 0
dq 0

.result_7:
dq 0
dq 0

.result_8:
dq 0
dq 0

.result_9:
dq 0
dq 0

.result_10:
dq 0
dq 0

.result_11:
dq 0
dq 0

.result_12:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87/valid_fist_16bit.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0",
    "RBX": "12346"
  }
}
%endif

; Test FIST with valid 16-bit conversion
; Load a value that fits in int16 range

finit
fld qword [rel .value]

; Convert to int16 - this should work without overflow
fistp word [rel .result]

fstsw ax
and rax, 1

; Load the result to verify conversion worked
movzx rbx, word [rel .result]

hlt

align 4096
.value: dq 12345.75
.result: dw 0


================================================
FILE: unittests/ASM/X87/valid_operation.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0"
  }
}
%endif

; Test valid operation should NOT set Invalid Operation bit
; Clear any existing exception flags first
finit

; Perform valid operations
fld1
fld1
fadd

fld1
fdiv

fld1
fsqrt

fstsw ax
and rax, 1

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fadd dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fmul dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0xbff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fsub dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fsubr dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_06_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3fe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fdiv dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fdivr dword [rdx + 8 * 1]

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]

; fadd st(0), st(i)
fadd st0, st1

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]
fmul st0, st0

fst qword [rdx + 8 * 2]
mov rax, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_D9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif


mov rdx, 0xe0000000

; Only tests pop behaviour
fld1
fldz
fcomp
fld1

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_E0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fsub st0, st1

fst qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_E8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fsubr st0, st1

fst qword [rdx]
mov rax, [rdx]
hlt


================================================
FILE: unittests/ASM/X87_F64/D8_F0_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Tests that a division by zero does not set the IE flag
finit
fldz
fld1
fdiv st0, st1

fnstsw ax
and rax, 1
hlt


================================================
FILE: unittests/ASM/X87_F64/D8_F0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]
fdiv st0, st0

fst qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D8_F8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3fe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax
mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 1], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]

fdivr st0, st1

fst qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3f800000"
  },
  "X86ReducedPrecision": "1"
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld dword [rdx + 8 * 0]
fst dword [rdx]

xor eax, eax
mov eax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fst dword [rdx + 8 * 1]

mov eax, [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_03_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3F800000",
    "RBX": "0x40000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x0 ; 1.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
fstp dword [rdx + 8 * 2]
fld dword [rdx + 8 * 1]

mov eax, [rdx + 8 * 2]
fst dword [rdx + 8 * 2]
mov ebx, [rdx + 8 * 2]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_05_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000
; Just to ensure execution
fldcw [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_06_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x40800000 ; 4.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
o32 fstenv [rdx + 8 * 3]
fld dword [rdx + 8 * 2]
o32 fldenv [rdx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [rdx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

fstp qword [rdx + 8]
mov rax, [rdx + 8]
fst qword [rdx + 8]
mov rbx, [rdx + 8]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_06_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax
mov eax, 0x40800000 ; 4.0
mov [rdx + 8 * 2], eax

fld dword [rdx + 8 * 0]
o16 fstenv [rdx + 8 * 3]
fld dword [rdx + 8 * 2]
o16 fldenv [rdx + 8 * 3]

; This will overwrite the previous load
; This is since the control word is stored and reloaded
fld dword [rdx + 8 * 1]

; 14 bytes for 16bit
; 2 Bytes : FCW
; 2 Bytes : FSW
; 2 bytes : FTW
; 2 bytes : Instruction offset
; 2 bytes : Instruction CS selector
; 2 bytes : Data offset
; 2 bytes : Data selector

; 28 bytes for 32bit
; 4 bytes : FCW
; 4 bytes : FSW
; 4 bytes : FTW
; 4 bytes : Instruction pointer
; 2 bytes : instruction pointer selector
; 2 bytes : Opcode
; 4 bytes : data pointer offset
; 4 bytes : data pointer selector

fstp qword [rdx + 8]
mov rax, [rdx + 8]
fst qword [rdx + 8]
mov rbx, [rdx + 8]
hlt


================================================
FILE: unittests/ASM/X87_F64/D9_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x37F"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000
fnstcw [rdx]
mov eax, 0
mov ax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4000000000000000",
    "RBX":  "0x4000000000000000",
    "RCX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fld st0

; dump stack to registers
mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax
mov eax, 0x40000000 ; 2.0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]
fld dword [rdx + 8 * 1]

fxch

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_D0_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Just to ensure execution
fnop
hlt


================================================
FILE: unittests/ASM/X87_F64/D9_E0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0xc000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
fchs

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fchs

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_E1_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
fabs

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fabs

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt -1.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_E4_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x100",
    "RBX": "0x0",
    "RCX": "0x4000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

fld dword [rel positive]
ftst
fnstsw ax
and rax, 0x4700
mov rbx, rax

fldz
ftst
fnstsw ax
and rax, 0x4700
mov rcx, rax

fld dword [rel negative]
ftst
fnstsw ax
and rax, 0x4700

hlt

align 8
positive: dd 3.14159
negative: dd -2.71828

================================================
FILE: unittests/ASM/X87_F64/D9_E8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fld1

fst qword [rcx]
mov rax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_E9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x40549a78"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldl2t

fst dword [rcx] ; Can't compare 64-bit precision with host
mov eax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_EA_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3fb8aa3b"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldl2e

fst dword [rcx] ; Can't compare 64-bit precision with host
mov eax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_EB_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x40490fdb"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldpi

fst dword [rcx] ; Can't compare 64-bit precision with host
mov eax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_EC_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3e9a209b"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldlg2

fst dword [rcx] ; Can't compare 64-bit precision with host
mov eax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_ED_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3f317218"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldln2

fst dword [rcx] ; Can't compare 64-bit precision with host
mov eax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_EE_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": ["0"]
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

fldz

fst qword [rcx]
mov rax, [rcx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_F0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x0"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rbx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]
f2xm1

fst qword [rbx]
mov rax, [rbx]

hlt

align 8
data:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F1_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x4020000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fyl2x
fld1

fstp qword [rcx]
mov rax, [rcx]
fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 16.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

%include "checkprecision.mac"

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fptan

; ST(0) = 1.0, ST(1) = tan(1.0)
fstp qword [rcx]
mov rbx, [rcx]

fstp qword [rcx]
check_relerr_d rel expected_tan, rcx, rel tolerance

hlt

align 8
data:
  dt 1.0
  dq 0
expected_tan:
  dq 0x3ff8eb245cbee3a5 ; tan(1.0)
tolerance:
  dq 0x3cb0000000000000 ; 2^-52, ~1 ULP relative error

define_check_data_constants


================================================
FILE: unittests/ASM/X87_F64/D9_F3_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x3ff921fb54442d18"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fpatan
fld1

fstp qword [rcx]
mov rax, [rcx]

fstp qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 7.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F4_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  ["0xFFF0000000000000"],
    "RBX":  ["0x0000000000000000"],
    "RCX":  ["0xFFF0000000000000"],
    "RDX":  ["0x8000000000000000"]
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "MemoryRegions": {
    "0x100000000": "4096"
  }
}
%endif

; Instead of checking MMX registers, 
; move results to general purpose registers and check them there
; so that hostrunner tests work properly.

finit
fldz
fxtract
fstp qword [rel sigz]
fstp qword [rel expz]

lea rdx, [rel nzer]
fld qword [rdx]
fxtract
fstp qword [rel signz]
fstp qword [rel expnz]

mov rax, [rel expz]
mov rbx, [rel sigz]
mov rcx, [rel expnz]
mov rdx, [rel signz]

hlt


align 4096
nzer: dq -0.0
expz: dq 0
sigz: dq 0
expnz: dq 0
signz: dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F4_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0xbffe000000000000",
    "RBX":  "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fxtract

fstp qword [rcx]
mov rax, [rcx]
fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt -15.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F5_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0xbf666666",
    "RBX":  "0x40400000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

; Store as single precision to get around precision issues

fstp dword [rcx]
mov eax, [rcx]

fst dword [rcx]
mov ebx, [rcx]

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F6_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "7",
    "RBX":  "0",
    "MM0":  "0x3ff0000000000000",
    "MM1":  "0x4070000000000000",
    "MM2":  "0x4060000000000000",
    "MM3":  "0x4050000000000000",
    "MM4":  "0x4040000000000000",
    "MM5":  "0x4030000000000000",
    "MM6":  "0x4020000000000000",
    "MM7":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.
mov rax, 0x3ff0000000000000 ; 1.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4000000000000000 ; 2.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4020000000000000 ; 4.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4030000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4040000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4050000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4060000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4070000000000000
mov [rel temp], rax
fld qword [rel temp]

; Store top in RBX
xor rax, rax
xor rbx, rbx
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fdecstp

; Store top in RAX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x3ff0000000000000
fstp qword [rel stack + 8 * 0]
fstp qword [rel stack + 8 * 1]
fstp qword [rel stack + 8 * 2]
fstp qword [rel stack + 8 * 3]
fstp qword [rel stack + 8 * 4]
fstp qword [rel stack + 8 * 5]
fstp qword [rel stack + 8 * 6]
fstp qword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: times 8 dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F7_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "0",
    "MM0":  "0x4060000000000000",
    "MM1":  "0x4050000000000000",
    "MM2":  "0x4040000000000000",
    "MM3":  "0x4030000000000000",
    "MM4":  "0x4020000000000000",
    "MM5":  "0x4000000000000000",
    "MM6":  "0x3ff0000000000000",
    "MM7":  "0x4070000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Set the stack with different values.
; Then do fincstp and store the stack values into MMX registers through memory
; such that MM0 has the value of ST0 and so on.
mov rax, 0x3ff0000000000000 ; 1.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4000000000000000 ; 2.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4020000000000000 ; 4.0
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4030000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4040000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4050000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4060000000000000
mov [rel temp], rax
fld qword [rel temp]

mov rax, 0x4070000000000000
mov [rel temp], rax
fld qword [rel temp]

; Store top in RBX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7
mov bx, ax

; Move the value of stop
; ST0 is currently 0x4070000000000000
fincstp

; Store top in RAX
xor rax, rax
fnstsw ax
shr ax, 11
and ax, 7

; Now ST0 is 0x4060000000000000
fstp qword [rel stack + 8 * 0]
fstp qword [rel stack + 8 * 1]
fstp qword [rel stack + 8 * 2]
fstp qword [rel stack + 8 * 3]
fstp qword [rel stack + 8 * 4]
fstp qword [rel stack + 8 * 5]
fstp qword [rel stack + 8 * 6]
fstp qword [rel stack + 8 * 7]

movq mm0, [rel stack + 8 * 0]
movq mm1, [rel stack + 8 * 1]
movq mm2, [rel stack + 8 * 2]
movq mm3, [rel stack + 8 * 3]
movq mm4, [rel stack + 8 * 4]
movq mm5, [rel stack + 8 * 5]
movq mm6, [rel stack + 8 * 6]
movq mm7, [rel stack + 8 * 7]

hlt

align 4096
temp: dq 0
stack: times 8 dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x40066666",
    "RBX": "0x40400000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

fstp dword [rdx + 8]
mov eax, [rdx + 8]
fst dword [rdx + 8]
mov ebx, [rdx + 8]

hlt

align 4096
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_F9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3f800000",
    "RBX": "0x41be320c"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fyl2xp1
fld1

mov rcx, 0xe0000000
fstp dword [rcx]
mov eax, [rcx]
fstp dword [rcx]
mov ebx, [rcx]

hlt

align 8
data:
  dt 15.0
  dq 0

data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_FA_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rbx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsqrt

fst qword [rbx]
mov rax, [rbx]

hlt

align 8
data:
  dt 16.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_FB_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

%include "checkprecision.mac"

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsincos

; st0 = cos, st1 = sin
fstp qword [rcx]
check_relerr_d rel expected_cos, rcx, rel tolerance
mov r8, rax

fstp qword [rcx]
check_relerr_d rel expected_sin, rcx, rel tolerance
and rax, r8

hlt

align 8
data:
  dt 1.0
  dq 0
expected_cos:
  dq 0x3fe14a280fb5068c ; cos(1.0)
expected_sin:
  dq 0x3feaed548f090cee ; sin(1.0)
tolerance:
  dq 0x3cb0000000000000 ; 2^-52, ~1 ULP relative error

define_check_data_constants


================================================
FILE: unittests/ASM/X87_F64/D9_FC_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f834241 ; 1.02546
mov [rdx + 8 * 0], eax

fld dword [rdx + 8 * 0]

frndint

fst qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/D9_FD_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0xc01a000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fscale

mov rcx, 0xe0000000
fstp qword [rcx]
mov rax, [rcx]
fstp qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 64.0
  dq 0

data2:
  dt -6.5
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_FD_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x43000000",
    "RBX":  "0x40b00000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fscale

; Store as single precision to get around precision issues

fstp dword [rcx]
mov eax, [rcx]

fst dword [rcx]
mov ebx, [rcx]

hlt

align 8
data:
  dt 4.0
  dq 0

data2:
  dt 5.5
  dq 0


================================================
FILE: unittests/ASM/X87_F64/D9_FE_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

%include "checkprecision.mac"

mov rbx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fsin

fst qword [rbx]

check_relerr_d rel expected, rbx, rel tolerance
hlt

align 8
data:
  dt 1.0
  dq 0
expected:
  dq 0x3feaed548f090cee ; sin(1.0)
tolerance:
  dq 0x3cb0000000000000 ; 2^-52, ~1 ULP relative error

define_check_data_constants


================================================
FILE: unittests/ASM/X87_F64/D9_FF_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0xbfdaa22657537205"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rbx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

fcos

fst qword [rbx]
mov rax, [rbx]

hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DA_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x4000000000000000",
    "RSI":  "0xC000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fimul dword [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fimul dword [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI":  "0x18"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000
mov rsi, 0

; Matching positive-positive
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Matching negative-negative
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov eax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching negative-positive
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov eax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching positive-negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp dword [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0xbff0000000000000",
    "RSI":  "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisub dword [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisub dword [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x3ff0000000000000",
    "RSI":  "0xc008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisubr dword [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fisubr dword [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_06_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x3fe0000000000000",
    "RSI":  "0xbfe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidiv dword [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidiv dword [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x4000000000000000",
    "RSI":  "0xc000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, 2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidivr dword [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov eax, -2
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
fidivr dword [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x0000000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovb st0, st1

fldz
cmp eax, 3
fcmovb st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000000",
    "RBX": "0x3ff0000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 1
cmp eax, 1

fcmove st0, st1

fldz
cmp eax, 0
fcmove st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_D0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000000",
    "RBX": "0x3ff0000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovbe st0, st1

fldz
cmp eax, 0
fcmovbe st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_D8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x0000000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovu st0, st1

fldz
cmp eax, 1
fcmovu st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_D9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

; Only tests pop behaviour
fld1
fldz
fldz
fcompp
fld1

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DA_E9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Only tests pop behaviour
fld1
fldz
fldz
fucompp
fld1

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4090000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 1024
mov [rdx + 8 * 0], eax

fild dword [rdx + 8 * 0]

fstp qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fisttp dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

fst qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": "0x3ff0000000000000", 
    "RCX": "0x4090000000000000"
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fist dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

fstp qword [rdx]
mov rbx, [rdx]

fst qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_03_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": ["0x3ff0000000000000"]
  }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, 0
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fistp dword [rdx + 8 * 1]

fld1

mov eax, [rdx + 8 * 1]

fst qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]

fld tword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 8
data:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DB_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fstp tword [rdx + 8 * 0]
fld tword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 0.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DB_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000000",
    "RBX": "0x3ff0000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 1

fcmovnb st0, st1

fldz
cmp eax, 3
fcmovnb st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x0000000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 1
cmp eax, 1

fcmovne st0, st1

fldz
cmp eax, 0
fcmovne st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]


hlt


================================================
FILE: unittests/ASM/X87_F64/DB_D0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x0000000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 2
cmp eax, 2

fcmovnbe st0, st1

fldz
cmp eax, 0
fcmovnbe st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_D8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x0000000000000000",
    "RBX": "0x3ff0000000000000",
    "RCX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

fld1
fldz

mov eax, 0x0
cmp eax, -1

fcmovnu st0, st1

fldz
cmp eax, 1
fcmovnu st0, st2

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DB_E3.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x037F"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

fninit

; Ensures that fnstcw after fninit sets the correct value
fnstcw [rel control]
mov ax, word [rel control]

hlt

align 4096
control:
times 2 db 0 ; Reserve space for the FPU control word


================================================
FILE: unittests/ASM/X87_F64/DC_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif


lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fadd qword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DC_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fmul qword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 4096
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DC_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xbff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fsub qword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DC_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fsubr qword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DC_06_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3fe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fdiv qword [rdx + 8 * 0]

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]

hlt

align 8
data:
  dt 1.0
  dq 0
data2:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DC_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rbx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fdivr qword [rdx + 8 * 0]

fst qword [rbx]
mov rax, [rbx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dq 8.0


================================================
FILE: unittests/ASM/X87_F64/DC_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4010000000000000",
    "RBX": "0x4000000000000000",
    "RCX": "0x4014000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif


mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 1], rax
mov rax, 0x4010000000000000 ; 4.0
mov [rdx + 8 * 2], rax

fld qword [rdx + 8 * 0]
fld qword [rdx + 8 * 1]
fld qword [rdx + 8 * 2]

; fadd st(i), st(0)
fadd st2, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DC_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4010000000000000",
    "RBX": "0x4020000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fmul st1, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DC_E0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fsubr st1, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DC_E8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0xbff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fsub st1, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]


hlt

align 4096
data:
  dt 1.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DC_F0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x3fe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fdivr st1, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DC_F8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x4010000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fdiv st1, st0

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 8.0
  dq 0
data2:
  dt 2.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x4000000000000000 ; 2.0
mov [rdx + 8 * 0], rax

fld qword [rdx + 8 * 0]

fstp qword [rdx]
mov rax, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DD_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fisttp qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x3ff0000000000000",
    "RCX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fst qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fstp qword [rdx]
mov rbx, [rdx]
fstp qword [rdx]
mov rcx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_03_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4000000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fstp qword [rdx + 8 * 0]

mov rax, [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq 0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_04_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"]
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; We don't test XMM0 or the MM* registers due to precision

mov rdx, 0xe0000000

mov rax, 2
mov [rdx + 2 * 1], rax
mov rax, 4
mov [rdx + 2 * 2], rax
mov rax, 8
mov [rdx + 2 * 3], rax
mov rax, 16
mov [rdx + 2 * 4], rax
mov rax, 32
mov [rdx + 2 * 5], rax
mov rax, 64
mov [rdx + 2 * 6], rax

fldz
fild word [rdx + 2 * 1]
fild word [rdx + 2 * 2]
fild word [rdx + 2 * 3]
fild word [rdx + 2 * 4]
fild word [rdx + 2 * 5]
fild word [rdx + 2 * 6]
fldpi

o16 fnsave [rdx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o16 frstor [rdx]

movups xmm0, [rdx + (0xE + 10 * 0)]
movups xmm1, [rdx + (0xE + 10 * 1)]
movups xmm2, [rdx + (0xE + 10 * 2)]
movups xmm3, [rdx + (0xE + 10 * 3)]
movups xmm4, [rdx + (0xE + 10 * 4)]
movups xmm5, [rdx + (0xE + 10 * 5)]
movups xmm6, [rdx + (0xE + 10 * 6)]
movups xmm7, [rdx + (0xE + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt


================================================
FILE: unittests/ASM/X87_F64/DD_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM1": ["0x8000000000000000", "0x4005"],
    "XMM2": ["0x8000000000000000", "0x4004"],
    "XMM3": ["0x8000000000000000", "0x4003"],
    "XMM4": ["0x8000000000000000", "0x4002"],
    "XMM5": ["0x8000000000000000", "0x4001"],
    "XMM6": ["0x8000000000000000", "0x4000"],
    "XMM7": ["0x0000000000000000", "0x0000"]
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; We don't test XMM0 or the MM* registers due to precision

mov rdx, 0xe0000000

mov rax, 2
mov [rdx + 2 * 1], rax
mov rax, 4
mov [rdx + 2 * 2], rax
mov rax, 8
mov [rdx + 2 * 3], rax
mov rax, 16
mov [rdx + 2 * 4], rax
mov rax, 32
mov [rdx + 2 * 5], rax
mov rax, 64
mov [rdx + 2 * 6], rax

fldz
fild word [rdx + 2 * 1]
fild word [rdx + 2 * 2]
fild word [rdx + 2 * 3]
fild word [rdx + 2 * 4]
fild word [rdx + 2 * 5]
fild word [rdx + 2 * 6]
fldpi

o32 fnsave [rdx]

fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi
fldpi

o32 frstor [rdx]

movups xmm0, [rdx + (0x1C + 10 * 0)]
movups xmm1, [rdx + (0x1C + 10 * 1)]
movups xmm2, [rdx + (0x1C + 10 * 2)]
movups xmm3, [rdx + (0x1C + 10 * 3)]
movups xmm4, [rdx + (0x1C + 10 * 4)]
movups xmm5, [rdx + (0x1C + 10 * 5)]
movups xmm6, [rdx + (0x1C + 10 * 6)]
movups xmm7, [rdx + (0x1C + 10 * 7)]

pslldq xmm0, 6
psrldq xmm0, 6

pslldq xmm1, 6
psrldq xmm1, 6

pslldq xmm2, 6
psrldq xmm2, 6

pslldq xmm3, 6
psrldq xmm3, 6

pslldq xmm4, 6
psrldq xmm4, 6

pslldq xmm5, 6
psrldq xmm5, 6

pslldq xmm6, 6
psrldq xmm6, 6

pslldq xmm7, 6
psrldq xmm7, 6

hlt


================================================
FILE: unittests/ASM/X87_F64/DD_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF3800",
    "RBX": "0xFFFFFFFFFFFF0000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

mov rax, -1
mov rbx, -1
fnstsw [rdx + 8 * 1]

fld dword [rdx + 8 * 0]
fnstsw [rdx + 8 * 2]
mov ax, word [rdx + 8 * 2]
mov bx, word [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87_F64/DD_C0_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Just to ensure execution
ffree st0
ffree st1
ffree st2
ffree st3
ffree st4
ffree st5
ffree st6
ffree st7
hlt


================================================
FILE: unittests/ASM/X87_F64/DD_D0_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1",
    "RBX": "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld qword [rdx]
fst st1  ;; copies st0, i.e. 2.0 to st1
fstp st0 ;; pop, st1 becomes st0

;; ensure st0 has valid tag.
fxam     ;; get if top is valid in C2
fstsw ax ;; store work into ax
shr ax, 10
and ax, 1

; store top in rbx
fst qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dq 2.0


================================================
FILE: unittests/ASM/X87_F64/DD_D0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fst st1

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_D8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4010000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fstp st1

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DD_E9_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x3ff0000000000000",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Only tests pop behaviour
fld1
fldz
fucomp
fld1

mov rdx, 0xe0000000
fstp qword [rdx]
mov rax, [rdx]
fstp qword [rdx]
mov rbx, [rdx]


hlt


================================================
FILE: unittests/ASM/X87_F64/DE_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x4008000000000000",
    "RSI":  "0xbff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fiadd word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fiadd word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]


hlt


================================================
FILE: unittests/ASM/X87_F64/DE_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x4000000000000000",
    "RSI":  "0xC000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fimul word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fimul word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RSI":  "0x18"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000
mov rsi, 0

; Matching positive-positive
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Matching negative-negative
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov ax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching negative-positive
mov rax, 0xbff0000000000000 ; -1.0
mov [rdx + 8 * 0], rax
mov ax, 1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

; Nonmatching positive-negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -1
mov [rdx + 8 * 1], eax

fld qword [rdx + 8 * 0]
ficomp word [rdx + 8 * 1]

; Get the status word
mov rax, 0
fstsw ax
; Extract C3 to see if it was equal
shr ax, 14
and ax, 1
or rsi, rax
shl rsi, 1

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0xbff0000000000000",
    "RSI":  "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisub word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisub word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x3ff0000000000000",
    "RSI":  "0xc008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisubr word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fisubr word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_06_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x3fe0000000000000",
    "RSI":  "0xbfe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidiv word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidiv word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX":  "0x4000000000000000",
    "RSI":  "0xc000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, 2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidivr word [rdx + 8 * 1]

fst qword [rdx]
mov rcx, [rdx]

; Test negative
mov rax, 0x3ff0000000000000 ; 1.0
mov [rdx + 8 * 0], rax
mov ax, -2
mov [rdx + 8 * 1], ax

fld qword [rdx + 8 * 0]
fidivr word [rdx + 8 * 1]

fst qword [rdx]
mov rsi, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/DE_C0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0x4008000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
faddp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DE_C8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0x4020000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fmulp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DE_E0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fsubrp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DE_E8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0xc000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000


lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fsubp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DE_F0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0x4000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]
fdivrp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DE_F8_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x4010000000000000",
    "RBX":  "0x3fe0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

; fdivp 2.0, 4.0
; == st1 = 2.0 / 4.0
fdivp st1, st0

lea rdx, [rel data3]
fld tword [rdx + 8 * 0]

fstp qword [rcx]
mov rax, [rcx]

fst qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dt 2.0
  dq 0
data2:
  dt 4.0
  dq 0
data3:
  dt 4.0
  dq 0


================================================
FILE: unittests/ASM/X87_F64/DF_00_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4090000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 1024
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 0 + 2], eax

fild word [rdx + 8 * 0]
fst qword [rdx + 8 * 0]
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87_F64/DF_01_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x2",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data3]
fisttp word [rdx + 8 * 0]

mov ax, word [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fst qword [rdx + 8 * 0]
mov rbx, [rdx + 8 * 0]

hlt

align 4096
data:
  dt 2.0
  dq 0
data2:
  dt 1.0
  dq 0
data3:
  dq -1
  dq -1


================================================
FILE: unittests/ASM/X87_F64/DF_02_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400"
  },
  "X87ReducedPrecision" : "1"
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fist word [rdx + 8 * 1]

fld1

mov eax, 0
mov ax, word [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87_F64/DF_03_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov eax, -1
mov [rdx + 8 * 1], eax

fld dword [rdx + 8 * 0]

fistp word [rdx + 8 * 1]

fld1

mov eax, 0
mov ax, word [rdx + 8 * 1]

fst qword [rdx + 8 * 0]
mov rbx, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87_F64/DF_04_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "XMM0":  ["0x0506070801000000", "0x0000000000000012"],
    "XMM1":  ["0x6576879821300000", "0x0000000000000000"],
    "XMM2":  ["0xB90984060D300000", "0x000000000000C03B"],
    "XMM3":  ["0xA83732340C000000", "0x000000000000C03B"],
    "XMM4":  ["0xFFAA6DA436100000", "0x000000000000C03A"],
    "XMM5":  ["0x0000000000000001", "0x0000000000000000"],
    "XMM6":  ["0x0000000000000001", "0x0000000000008000"],
    "XMM7":  ["0x0000000000000000", "0x0000000000008000"],
    "XMM8":  ["0x0000000000000000", "0x0000000000008000"],
    "XMM9":  ["0x0000000000000000", "0x0000000000000000"],
    "XMM10":  ["0x0000000000000001", "0x0000000000008000"],
    "XMM11":  ["0x0000000000000001", "0x0000000000000000"]
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

fbld [rel .data_0]
fbstp [rel .res_data_0]
movups xmm0, [rel .res_data_0]
andps xmm0, [rel .precisionMask] ; Mask imprecise bits

fbld [rel .data_1]
fbstp [rel .res_data_1]
movups xmm1, [rel .res_data_1]
andps xmm1, [rel .precisionMask] ; Mask imprecise bits

; Check encoding of invalid BCD
fbld [rel .data_2]
fstp tword [rel .res_data_2]
movups xmm2, [rel .res_data_2]
andps xmm2, [rel .precisionMask] ; Mask imprecise bits

fbld [rel .data_3]
fstp tword [rel .res_data_3]
movups xmm3, [rel .res_data_3]
andps xmm3, [rel .precisionMask] ; Mask imprecise bits

fbld [rel .data_4]
fstp tword [rel .res_data_4]
movups xmm4, [rel .res_data_4]
andps xmm4, [rel .precisionMask] ; Mask imprecise bits

; Some special values
fld tword [rel .data_5]
fbstp [rel .res_data_5]
movups xmm5, [rel .res_data_5]

fld tword [rel .data_6]
fbstp [rel .res_data_6]
movups xmm6, [rel .res_data_6]

fld tword [rel .data_7]
fbstp [rel .res_data_7]
movups xmm7, [rel .res_data_7]

; Values that choose +- 0 or +-1 depending on rounding mode
; -1 < F < -0
; +0 < F < +1
fld tword [rel .data_8]
fbstp [rel .res_data_8]
movups xmm8, [rel .res_data_8]

fld tword [rel .data_9]
fbstp [rel .res_data_9]
movups xmm9, [rel .res_data_9]

; Swap control word
fnstcw [rel .cw]
mov ax, [rel .cw]
and ax, ~(3 << 10)
or eax, 1 << 10 ; Round down
mov [rel .cw], ax
fldcw [rel .cw]

fld tword [rel .data_10]
fbstp [rel .res_data_10]
movups xmm10, [rel .res_data_10]

; Swap control word
fnstcw [rel .cw]
mov ax, [rel .cw]
and ax, ~(3 << 10)
or eax, 2 << 10 ; Round up
mov [rel .cw], ax
fldcw [rel .cw]

fld tword [rel .data_11]
fbstp [rel .res_data_11]
movups xmm11, [rel .res_data_11]

; Values that generate Invalicating floating point operation exception
; -inf
; +inf
; Negative value too large for destination format
; Positive value too large for destination format
; NaN
; On IA the indefinite BCD result is still stored to memory

; XXX: We don't support IA on this

hlt

align 4096
.precisionMask:
dd 0xfff00000
dd 0xffffffff
dd 0xffffffff
dd 0xffffffff


.cw:
dw 0

.data_0:
dd 0x01020304
dd 0x05060708
dd 0x09101112
dd 0x13141516
.data_1:
dd 0x21324354
dd 0x65768798
dd 0x00000000
dd 0x00000000
.data_2:
dd 0xFFFFFFFF
dd 0xFFFFFFFF
dd 0xFFFFFFFF
dd 0xFFFFFFFF
.data_3:
dd 0xF0F0F0F0
dd 0xF0F0F0F0
dd 0xF0F0F0F0
dd 0xF0F0F0F0
.data_4:
dd 0x0A0B0C0D
dd 0x0E0FAAAB
dd 0xACADAEAF
dd 0xBABBBCBD
.data_5:
dt 1.0
.data_6:
dt -1.0
.data_7:
dt -0.0
.data_8:
dt -0.5
.data_9:
dt 0.5
.data_10:
dt -0.5
.data_11:
dt 0.5

.res_data_0:
dq 0
dq 0
.res_data_1:
dq 0
dq 0
.res_data_2:
dq 0
dq 0
.res_data_3:
dq 0
dq 0
.res_data_4:
dq 0
dq 0
.res_data_5:
dq 0
dq 0
.res_data_6:
dq 0
dq 0
.res_data_7:
dq 0
dq 0
.res_data_8:
dq 0
dq 0
.res_data_9:
dq 0
dq 0
.res_data_10:
dq 0
dq 0
.res_data_11:
dq 0
dq 0


================================================
FILE: unittests/ASM/X87_F64/DF_05_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4090000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov rax, 1024
mov [rdx + 8 * 0], rax

fild qword [rdx + 8 * 0]
fst qword [rdx + 8 * 0]
mov rax, [rdx + 8 * 0]

hlt


================================================
FILE: unittests/ASM/X87_F64/DF_07_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x400",
    "RBX": "0x3ff0000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x44800000 ; 1024.0
mov [rdx + 8 * 0], eax
mov rax, -1
mov [rdx + 8 * 1], rax

fld dword [rdx + 8 * 0]

fistp qword [rdx + 8 * 1]

fld1

mov rax, qword [rdx + 8 * 1]

fstp qword [rdx + 8 * 1]

mov rbx, qword [rdx + 8 * 1]

hlt


================================================
FILE: unittests/ASM/X87_F64/DF_E0_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xFFFFFFFFFFFF3800",
    "RBX": "0xFFFFFFFFFFFF0000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, 0x3f800000 ; 1.0
mov [rdx + 8 * 0], eax

mov rax, -1
mov rbx, -1
fnstsw ax
mov bx, ax

fld dword [rdx + 8 * 0]
fnstsw ax

hlt


================================================
FILE: unittests/ASM/X87_F64/FCOM_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x0"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

fld1
fldz
fcomp
fnstsw ax
test ah, 041h
jp good
mov rax, 0
hlt
good:
mov rax, 1
hlt


================================================
FILE: unittests/ASM/X87_F64/FILD_NEG_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0xC090000000000000",
    "RBX": "0xC070000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

mov eax, -1024
mov [rdx + 8 * 0], eax

fild dword [rdx + 8 * 0]

fstp qword [rdx]
mov rax, [rdx]

xor rbx, rbx
mov bx, -256
mov [rdx + 8 * 0], bx
fild word [rdx + 8 * 0]

fstp qword [rdx]
mov rbx, [rdx]

hlt


================================================
FILE: unittests/ASM/X87_F64/FIST_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "RAX": "0xffffffff"
  }
}
%endif

; Test behaviour of overflow
; and storing negative numbers
; to 32-bit registers.

lea rbp, [rel data]
mov rdx, 0xe0000000

fld qword [rbp]
fistp dword [rdx]
mov eax, [rdx]

hlt

align 8
data:
  dq 0xbff0000000000000


================================================
FILE: unittests/ASM/X87_F64/FLDCW_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "RAX": "0x3",
    "RBX": "0x2"
  }
}
%endif

lea rbp, [rel data]
mov rdx, 0xe0000000
mov rcx, 0xe0004000

; save fcw
fnstcw [rdx]
; set rounding to truncate
mov eax, 0
mov ax, [rdx]
or ah, 0xc
mov [rdx+8], ax
fldcw [rdx+8]

fld dword [rbp]
fistp dword [rdx+16]
mov ebx, [rdx+16]

; restore fcw
fldcw [rdx]
fld dword [rbp]
fistp dword[rdx+16]
mov eax, [rdx+16]

hlt

align 8
data:
   dd 0x40266666 ; 2.6


================================================
FILE: unittests/ASM/X87_F64/FLD_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "RAX": "0x40490fdb",
    "RBX": "0x4008000000000000"
  }
}
%endif

lea rbp, [rel data]
mov rdx, 0xe0000000

fld dword [rbp]
fst dword [rdx]

xor rax, rax
mov eax, [rdx]

fld qword [rbp + 4]
fst qword [rdx]

mov rbx, [rdx]

hlt

align 8
data:
  dd 0x40490fdb
  dq 0x4008000000000000


================================================
FILE: unittests/ASM/X87_F64/FPREM1_Flags_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "RAX": "0"
  }
}
%endif

mov rbx, 0xe0000000
o32 fstenv [rbx]
mov dword [rbx+4], 0xFFFFFFFF ; set status word to all one
o32 fldenv [rbx]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem1

xor rax, rax
fstsw ax
and rax, 0x400 ; C2 should be set to zero

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87_F64/FPREM_Flags_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "RAX": "0"
  }
}
%endif

mov rbx, 0xe0000000
o32 fstenv [rbx]
mov dword [rbx+4], 0xFFFFFFFF ; set status word to all one
o32 fldenv [rbx]

lea rdx, [rel data]
fld tword [rdx + 8 * 0]

lea rdx, [rel data2]
fld tword [rdx + 8 * 0]

fprem

xor rax, rax
fstsw ax
and rax, 0x400 ; C2 should be set to zero

hlt

align 8
data:
  dt 3.0
  dq 0
data2:
  dt 5.1
  dq 0


================================================
FILE: unittests/ASM/X87_F64/FScale-Zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "R8": "0",
    "R9": "0",
    "R10": "0",
    "R11": "0",
    "R12": "0"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; scale by zero (st1 == 0)
mov rax, 0
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r8, [rel intstor]

; scale by zero (st1 == 1)
mov rax, 1
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r9, [rel intstor]

; scale by zero (st1 == 100)
mov rax, 100
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r10, [rel intstor]

; scale by zero (st1 == 1024)
mov rax, 1024
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r11, [rel intstor]

; scale by zero (st1 == 1048576)
mov rax, 1048576
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fldz
fscale
fst qword [rel intstor]
mov r12, [rel intstor]

; tests scaling negative zero
mov rax, 1048576
mov qword [rel intstor], rax
finit
fild qword [rel intstor]
fld qword [rel neg_zero]
fscale
fst qword [rel intstor]
mov r13, [rel intstor]

hlt

align 4096
neg_zero: dq 0x8000000000000000   ; -0.0
intstor: dq 0


================================================
FILE: unittests/ASM/X87_F64/FScaleFXtract_F64.asm
================================================
%ifdef CONFIG
{
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" },
  "RegData": {
    "R8": "1"
  }
}
%endif
    ; ,
    ; "R9": "1",
    ; "R10": "1",
    ; "R11": "1",
    ; "R12": "1"
section .data
    num0: dq 0.0
    num1: dq 125.78
    num2: dq 1023.12
    num3: dq -23487.152
    num4: dq -1230192.123

;; Tests the FScale / FXtract inverse behaviour
section .text
    global _start
_start:
    
; num0 == 0.0
finit
fld qword [rel num0]
fld st0
fxtract
fscale
fstp st1  ; at this point st0 and st1 should be the same
fcom
fnstsw ax
and ax, 0x4500
cmp ax, 0x4000
setz r8b

; ; num1 == 125.78
; finit
; fld qword [rel num1]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r9b

; ; num2 == 1023.12
; finit
; fld qword [rel num2]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r10b

; ; num3 == -23487.152
; finit
; fld qword [rel num3]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r11b

; ; num4 == -1230192.123
; finit
; fld qword [rel num4]
; fld st0
; fxtract
; fscale
; fstp st1  ; at this point st0 and st1 should be the same
; fcom
; fnstsw ax
; and ax, 0x4500
; cmp ax, 0x4000
; setz r12b

hlt


================================================
FILE: unittests/ASM/X87_F64/FXAM_Push_2_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RCX": "0"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

; This behaviour was seen around Wine 32-bit libraries
; Anything doing a call to a double application would spin
; the x87 stack on to the stack looking for fxam to return empty
; Empty in this case is that C0 and C3 is set whiel C2 is not

fninit
; Empty stack to make sure we don't push anything

mov eax, 0
mov ecx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp qword [rdx + rcx * 8]
fwait
inc ecx
jmp .ExamineStack

.Done:

hlt


================================================
FILE: unittests/ASM/X87_F64/FXAM_Push_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "8"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rdx, 0xe0000000

; This behaviour was seen around Wine 32-bit libraries
; Anything doing a call to a double application would spin
; the x87 stack on to the stack looking for fxam to return empty
; Empty in this case is that C0 and C3 is set whiel C2 is not

fninit
; Fill the x87 stack
fldz
fldz
fldz
fldz
fldz
fldz
fldz
fldz

mov eax, 0
mov ecx, 0

.ExamineStack:
; Examine st(0)
fxam
fwait
; Get the results in to AX
fnstsw ax
and ax, 0x4500
; Check for empty
cmp ax, 0x4100
je .Done

; Now push the x87 stack value
; We know it isn't empty
fstp qword [rdx + rcx * 8]
fwait
inc ecx
jmp .ExamineStack

.Done:

; Save how many we stored
mov eax, ecx

; Now fill with "Garbage"
fld1
fld1
fld1
fld1
fld1
fld1
fld1
fld1

.Reload:
; Now reload the stack
dec ecx
fld qword [rdx + rcx * 8]
cmp ecx, 0x0
jne .Reload;

hlt


================================================
FILE: unittests/ASM/X87_F64/Rounding_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RBX": "0x212121211121",
    "RCX": "0xfffefffeffffffff",
    "RDX": "0xfffffffeffffffff",
    "RSI": "0xfffefffeffffffff"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

; Rounding tests to ensure rounding modes are actually working

;; Mid-point
finit
fld qword [rel midpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

;; Slightly above midpoint
finit
fld qword [rel samidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

;; Slightly below midpoint
finit
fld qword [rel sbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
or rbx, qword [rel tmp]
shl rbx, 4

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
or rbx, qword [rel tmp]


;;; Negative tests
;; Mid-point
finit
fld qword [rel nmidpoint]

; Default rounding is 00 - round to nearest
fist word [rel tmp]
mov ax, word [rel tmp]
or rcx, rax 
shl rcx, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax
shl rcx, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax
shl rcx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rcx, rax

;; Slightly above midpoint
finit
fld qword [rel nsamidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov dx, word [rel tmp]
shl rdx, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax
shl rdx, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax
shl rdx, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rdx, rax

;; Slightly below midpoint
finit
fld qword [rel nsbmidpoint]

; Default rounding is 00 - round to nearest
fist dword [rel tmp]
mov si, word [rel tmp]
shl rsi, 16

; Round down - 01
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0400
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax
shl rsi, 16

; Round up - 10
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0800
mov word [rel tmp], ax
fldcw word [rel tmp]

fist dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax
shl rsi, 16

; Round toward zero - 11
fstcw word [rel tmp]
movzx rax, word [rel tmp]
and rax, 0xf3ff
or rax, 0x0c00
mov word [rel tmp], ax
fldcw word [rel tmp]

fistp dword [rel tmp]
mov ax, word [rel tmp]
or rsi, rax

hlt

align 4096
midpoint:
  dq 1.5
samidpoint:
  dq 1.50001
sbmidpoint:
  dq 1.49999
nmidpoint:
  dq -1.5
nsamidpoint:
  dq -1.49999
nsbmidpoint:
  dq -1.50001

tmp: dq 0


================================================
FILE: unittests/ASM/X87_F64/fptan_neg_zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x8000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld qword [rdx]

fptan

; ST(0) = 1.0, ST(1) = tan(-0.0) = -0.0
fstp qword [rcx]
mov rax, [rcx]

fstp qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dq 0x8000000000000000 ; -0.0


================================================
FILE: unittests/ASM/X87_F64/fptan_pos_zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x3ff0000000000000",
    "RBX":  "0x0000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld qword [rdx]

fptan

; ST(0) = 1.0, ST(1) = tan(+0.0) = +0.0
fstp qword [rcx]
mov rax, [rcx]

fstp qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dq 0x0000000000000000 ; +0.0


================================================
FILE: unittests/ASM/X87_F64/fsin_neg_zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x8000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld qword [rdx]

fsin

fstp qword [rcx]
mov rax, [rcx]

hlt

align 8
data:
  dq 0x8000000000000000 ; -0.0


================================================
FILE: unittests/ASM/X87_F64/fsin_pos_zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "0x0000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov rcx, 0xe0000000

lea rdx, [rel data]
fld qword [rdx]

fsin

fstp qword [rcx]
mov rax, [rcx]

hlt

align 8
data:
  dq 0x0000000000000000 ; +0.0


================================================
FILE: unittests/ASM/X87_F64/fsincos_neg_zero_F64.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX":  "1",
    "RBX":  "0x8000000000000000"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

%include "checkprecision.mac"

mov rcx, 0xe0000000

lea rdx, [rel data]
fld qword [rdx]

fsincos

; ST(0) = cos(-0.0), ST(1) = sin(-0.0) = -0.0
fstp qword [rcx]
check_relerr_d rel expected_cos, rcx, rel tolerance

fstp qword [rcx]
mov rbx, [rcx]

hlt

align 8
data:
  dq 0x8000000000000000 ; -0.0
expected_cos:
  dq 0x3ff0000000000000 ; 1.0
tolerance:
  dq 0x3cb0000000000000 ; 2^-52, ~1 ULP relative error

define_check_data_constants


================================================
FILE: unittests/ASM/fadd.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "MM7": ["0xc90fdb0000000000", "0x4001"]
  }
}
%endif

; calcuate pi + pi
fld dword [rel pi]
fld dword [rel pi]
faddp

hlt

align 8
pi:     dd 0x40490fdb ; 3.14...
one:    dd 0x3f800000 ; 1.0
ptone:  dd 0x3dcccccd ; 0.1


================================================
FILE: unittests/ASM/fld.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "MM4": ["0xfffffffffffff800", "0xffff"],
    "MM5": ["0xffffff0000000000", "0xffff"],
    "MM6": ["0xaaaaaa0000000000", "0xbfd5"],
    "MM7": ["0xc90fdb0000000000", "0x4000"]
  }
}
%endif

lea rbp, [rel data]

; 32bit FLDs
fld dword [rbp]
fld dword [rbp + 4]
fld dword [rel allf] ; Currently fails due to lack of infinity handling

; 64bit FLDs
fld qword [rel allf] ; Currently fails due to lack of infinity handling

hlt

align 8
data:
        dd 0x40490fdb
        dd 0xaaaaaaaa
allf:   dq 0xffffffffffffffff
        dw 0xffff

================================================
FILE: unittests/ASM/full_pshufd_imm.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x00000000a1aaca0e"
  },
  "HostFeatures": ["SSE4.1"]
}
%endif

mov rax, 0

%assign i 0
%rep 256

; pshufd all the immediate encodings
movaps xmm0, [rel .random_data + ((i * 16) % 4096)]
pshufd xmm0, xmm0, i
movaps [rel .data_result + (i * 16)], xmm0

; CRC32 (by 64) the results
crc32 rax, qword [rel .data_result + (i * 16) + 0]
crc32 rax, qword [rel .data_result + (i * 16) + 8]
%assign i i+1
%endrep

hlt
align 32

.data_result:
times 256 dq 0, 0

; 4096 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86


================================================
FILE: unittests/ASM/full_vpblendw_imm.asm
================================================
%ifdef CONFIG
{
  "HostFeatures": ["AVX"],
  "RegData": {
    "RAX": "0x00000000f7e7c074"
  }
}
%endif

mov rax, 0

%assign i 0
%rep 256

; vpblendw all the immediate encodings
vmovaps ymm0, [rel .random_data + ((i * 32) % 4096)]
vmovaps ymm1, [rel .random_data2 + ((i * 32) % 4096)]
vmovaps ymm2, [rel .random_data3 + ((i * 32) % 4096)]

vpblendw ymm0, ymm1, ymm2, i
vmovaps [rel .data_result + (i * 32)], ymm0

; CRC32 (by 64) the results
crc32 rax, qword [rel .data_result + (i * 32) + 0]
crc32 rax, qword [rel .data_result + (i * 32) + 8]
crc32 rax, qword [rel .data_result + (i * 32) + 16]
crc32 rax, qword [rel .data_result + (i * 32) + 24]
%assign i i+1
%endrep

hlt
align 32

.data_result:
times 256 dq 0, 0, 0, 0

align 32
; 8192 bytes of random data.
.random_data:
db 0x5b, 0x27, 0x12, 0x29, 0xab, 0x84, 0xa2, 0x21, 0x6d, 0x27, 0xbe, 0x3d, 0x17, 0x05, 0x99, 0xb0
db 0xf3, 0xe2, 0x19, 0xf4, 0x42, 0xbb, 0x69, 0x02, 0x67, 0x3a, 0xab, 0x86, 0x9e, 0xda, 0x9f, 0xd5
db 0xba, 0xd4, 0x2d, 0x9d, 0x20, 0x3d, 0xf8, 0xb2, 0x29, 0xc3, 0xc3, 0x98, 0xa8, 0x30, 0x92, 0xe9
db 0x5a, 0x75, 0x0c, 0xcb, 0x28, 0x28, 0xb4, 0x90, 0x93, 0x16, 0x45, 0x10, 0x3a, 0x5d, 0x96, 0x67
db 0xf9, 0x31, 0xbe, 0x48, 0x78, 0xe8, 0x5a, 0xf2, 0x66, 0x29, 0xd9, 0x80, 0x50, 0x80, 0xcb, 0x07
db 0xfe, 0xda, 0x19, 0x0f, 0x22, 0xea, 0x18, 0x5e, 0x12, 0xea, 0x3d, 0x1a, 0xbc, 0x91, 0x51, 0x15
db 0xaa, 0x66, 0x92, 0x61, 0xb4, 0xd4, 0xce, 0x14, 0x9c, 0x86, 0x27, 0x3d, 0xd0, 0xc6, 0x51, 0x1c
db 0xa0, 0xd4, 0x0b, 0x2d, 0x25, 0x30, 0x3b, 0x46, 0x23, 0x07, 0xb5, 0x05, 0x4a, 0xaa, 0x5a, 0x0a
db 0x7b, 0x29, 0xe4, 0x52, 0x6f, 0x6f, 0xc8, 0x62, 0xb8, 0x94, 0x6a, 0x30, 0x66, 0xf1, 0x21, 0xec
db 0xd1, 0xf2, 0x68, 0xda, 0xb7, 0x7f, 0x5a, 0x26, 0x38, 0x46, 0x48, 0xda, 0x5d, 0x64, 0x8d, 0x3d
db 0x2f, 0xf6, 0xc3, 0x63, 0xb8, 0x09, 0x3a, 0xd0, 0x5b, 0xeb, 0x67, 0xd0, 0xaa, 0x63, 0x71, 0x19
db 0x7e, 0x4e, 0x33, 0xe2, 0x15, 0xba, 0x87, 0xa7, 0x7b, 0x25, 0xe4, 0xbb, 0xb5, 0x26, 0x9a, 0xf1
db 0xdd, 0x5a, 0x63, 0xd7, 0x16, 0xc0, 0xc3, 0xc8, 0x1b, 0xad, 0x00, 0x52, 0x63, 0x55, 0xc7, 0xe0
db 0xd9, 0xe9, 0xf4, 0x4c, 0x53, 0xfb, 0x73, 0x57, 0xdc, 0xad, 0x0c, 0xca, 0x73, 0x44, 0x6b, 0xf3
db 0xb7, 0x83, 0x3b, 0xfe, 0xf0, 0x15, 0xbf, 0xe5, 0x15, 0xca, 0xdf, 0x35, 0xeb, 0xe7, 0xe3, 0xa2
db 0xbd, 0x20, 0xad, 0xff, 0x1b, 0x67, 0x0a, 0x9f, 0x60, 0x60, 0xff, 0xa7, 0xc9, 0x19, 0xde, 0xb3
db 0x67, 0xf1, 0x4b, 0x77, 0x7f, 0x0b, 0xb1, 0x29, 0xee, 0xcb, 0xd6, 0x5d, 0x0d, 0xb9, 0x54, 0x49
db 0x10, 0xe3, 0xbd, 0x8a, 0xa0, 0x69, 0xa3, 0x07, 0xbe, 0x8e, 0xea, 0xc6, 0x75, 0x27, 0x66, 0xae
db 0x3c, 0xde, 0xc6, 0x13, 0x1b, 0x50, 0x37, 0x56, 0x7c, 0x01, 0xab, 0x8b, 0x46, 0xdc, 0x80, 0xed
db 0xdf, 0x12, 0x6f, 0x64, 0xdf, 0xe6, 0xf9, 0xbf, 0x15, 0x95, 0xd9, 0x80, 0x19, 0x8c, 0x96, 0x33
db 0x89, 0xbe, 0x25, 0x33, 0x34, 0x82, 0x92, 0x96, 0x05, 0x52, 0xa2, 0xcf, 0x5b, 0x3d, 0xfc, 0xd8
db 0x43, 0x89, 0x2e, 0x16, 0x6d, 0xbd, 0x84, 0x97, 0x77, 0xb5, 0xd6, 0x2b, 0x6b, 0xb1, 0xc6, 0x38
db 0x0a, 0xfe, 0xe1, 0xc9, 0x31, 0x32, 0x7f, 0xd5, 0xc1, 0x03, 0x4a, 0xb2, 0x86, 0x4d, 0x8d, 0x77
db 0xd6, 0x62, 0x52, 0x75, 0xed, 0x27, 0x21, 0xe8, 0x69, 0x6f, 0x6a, 0x5b, 0x59, 0x4d, 0xd2, 0x6c
db 0x2a, 0x97, 0x09, 0x03, 0xc5, 0x29, 0x0d, 0xe1, 0x31, 0x2e, 0x62, 0x21, 0x0e, 0xc2, 0x00, 0x7c
db 0xa2, 0x4c, 0x19, 0x63, 0x24, 0xfc, 0x9b, 0x38, 0x11, 0xbf, 0x20, 0x53, 0x53, 0xac, 0x3f, 0xdb
db 0xfd, 0x2b, 0x39, 0x3c, 0x39, 0x6b, 0xb4, 0x52, 0x1f, 0xf8, 0x8f, 0x3b, 0x47, 0x2b, 0x86, 0xcf
db 0xd2, 0x38, 0xe9, 0x08, 0x73, 0x09, 0x32, 0x5f, 0x6c, 0x3a, 0xdb, 0xfc, 0x1d, 0x91, 0xa4, 0x26
db 0xa3, 0x0c, 0xbc, 0x94, 0xf5, 0xbd, 0x29, 0xcf, 0x72, 0x3d, 0xee, 0x48, 0x06, 0x77, 0x63, 0x70
db 0x47, 0xc9, 0x87, 0x21, 0xb1, 0x9a, 0xdd, 0x5f, 0x71, 0x08, 0xe3, 0x3b, 0xf6, 0x07, 0x9f, 0x2f
db 0x20, 0xa3, 0x02, 0xc8, 0x4d, 0xc8, 0x18, 0xfa, 0x69, 0x32, 0x60, 0x97, 0x2d, 0x2f, 0x26, 0x84
db 0x3d, 0x7a, 0xf6, 0x2f, 0xb1, 0xc9, 0xd2, 0xcd, 0x6e, 0x24, 0x18, 0xa8, 0x0d, 0xb0, 0xe2, 0x41
db 0x1e, 0xdf, 0xc7, 0xee, 0xcd, 0x21, 0x5b, 0xc3, 0x26, 0x26, 0xb3, 0xb4, 0x33, 0x58, 0x79, 0xb5
db 0xc3, 0x24, 0x7c, 0xe3, 0xd7, 0x78, 0x33, 0x22, 0xd5, 0x20, 0x21, 0x86, 0xcf, 0xca, 0x44, 0xba
db 0xd8, 0x05, 0x84, 0x37, 0x69, 0x48, 0xb0, 0xe0, 0x7a, 0xe6, 0x74, 0x53, 0x1e, 0xd0, 0x0c, 0x3c
db 0x33, 0x83, 0x15, 0x43, 0x16, 0x0e, 0x93, 0x39, 0x55, 0x2e, 0x55, 0x1c, 0x09, 0xbd, 0x7a, 0xc3
db 0x80, 0x77, 0x4e, 0xd9, 0xf3, 0xa5, 0xee, 0x94, 0xbf, 0x8e, 0xd0, 0xec, 0x39, 0x33, 0x31, 0x8d
db 0x74, 0x94, 0xd2, 0x24, 0x22, 0x4a, 0xde, 0x51, 0x99, 0xc5, 0x68, 0xf2, 0x2e, 0xd3, 0x8d, 0xc5
db 0x32, 0x31, 0x26, 0xe7, 0x87, 0x47, 0x5f, 0xbc, 0x32, 0x80, 0x43, 0x83, 0x34, 0x36, 0xa1, 0x72
db 0x6b, 0x38, 0x10, 0x93, 0xa7, 0xa3, 0x92, 0xb7, 0x3c, 0x61, 0x1c, 0x4e, 0x0b, 0x86, 0x43, 0xa9
db 0x64, 0xf1, 0xf8, 0xd7, 0xd3, 0xf4, 0xd0, 0xe2, 0x17, 0xd4, 0xbb, 0xe9, 0x2c, 0xc8, 0x76, 0xc5
db 0x87, 0x7f, 0x81, 0x55, 0xbe, 0x87, 0x0e, 0x6b, 0xf6, 0x4f, 0x44, 0x37, 0x92, 0x32, 0x7f, 0x30
db 0xa6, 0x66, 0x09, 0x01, 0x7a, 0x6e, 0xb3, 0x3b, 0x7d, 0x8f, 0x32, 0x0e, 0x3c, 0xdc, 0xba, 0x2e
db 0xf8, 0xec, 0xde, 0xd9, 0xb1, 0xf0, 0x3e, 0xbd, 0x20, 0x4d, 0x01, 0x5a, 0xf4, 0xda, 0x99, 0x23
db 0x81, 0x01, 0x5f, 0x50, 0xce, 0xa8, 0xb9, 0xb1, 0x59, 0xe5, 0xde, 0x47, 0x5b, 0xba, 0x94, 0xd3
db 0x21, 0x7c, 0x49, 0xeb, 0xb5, 0x14, 0xe5, 0x56, 0x93, 0x06, 0x3b, 0xd2, 0x3a, 0x11, 0xca, 0x7a
db 0x14, 0x48, 0x54, 0xc7, 0x9f, 0x03, 0x40, 0x2c, 0x0b, 0x42, 0x8e, 0xac, 0xac, 0x08, 0x04, 0x8e
db 0xb3, 0x15, 0xe5, 0x06, 0xa6, 0x5b, 0xf0, 0x57, 0x08, 0xfa, 0x0f, 0x00, 0x7e, 0x4a, 0x16, 0xa8
db 0xb0, 0x4d, 0x07, 0x1b, 0xbc, 0x3d, 0xd0, 0x86, 0x15, 0xcd, 0x7c, 0xb2, 0xcc, 0x37, 0x6d, 0x15
db 0x8b, 0xd1, 0xe6, 0x3e, 0xfb, 0x6e, 0xe4, 0xea, 0xd9, 0x1f, 0x69, 0x2a, 0xbc, 0xda, 0xd9, 0x78
db 0xee, 0xcb, 0xb6, 0xff, 0x53, 0xfd, 0xd2, 0xb9, 0x18, 0x1f, 0xdf, 0x0e, 0x69, 0xfe, 0x36, 0xb0
db 0x77, 0x28, 0x66, 0xe2, 0xf0, 0x80, 0x4c, 0x11, 0x11, 0xba, 0xb7, 0xfd, 0x67, 0x4f, 0x05, 0xed
db 0x0c, 0xcc, 0x3e, 0x4d, 0xd9, 0xbc, 0x52, 0xe3, 0xec, 0xd9, 0x74, 0x29, 0x30, 0xf2, 0x66, 0xd6
db 0xfb, 0xc3, 0x5c, 0xc1, 0xd8, 0xef, 0x86, 0x08, 0x22, 0xb1, 0x6d, 0xfd, 0xee, 0xc7, 0x12, 0x25
db 0xda, 0xee, 0xd6, 0x28, 0x3b, 0x1d, 0xa7, 0x29, 0xdf, 0x45, 0x3a, 0xa4, 0x36, 0xe0, 0xa4, 0xda
db 0xb1, 0x2c, 0x8a, 0xa5, 0x5c, 0x8c, 0x70, 0xd8, 0xcd, 0x0f, 0xb5, 0x63, 0xd3, 0xaf, 0x59, 0x2b
db 0x7d, 0x86, 0x4a, 0xc4, 0xcc, 0x72, 0x9e, 0x89, 0xf4, 0x38, 0x89, 0x81, 0x64, 0x6f, 0xa5, 0xac
db 0x13, 0x59, 0xc4, 0x0f, 0xfb, 0xcc, 0x4c, 0x1d, 0x67, 0x5a, 0xbf, 0x19, 0xfc, 0x06, 0x71, 0xbd
db 0x7f, 0xb6, 0xb1, 0x95, 0xd3, 0x7b, 0x4c, 0x40, 0x91, 0xa9, 0x26, 0xdd, 0x28, 0x69, 0x90, 0xf6
db 0x5d, 0x16, 0x9f, 0xa9, 0x75, 0x5e, 0xad, 0x8f, 0xc8, 0x0b, 0x57, 0x48, 0xf2, 0x74, 0x77, 0x22
db 0x5d, 0xed, 0xc2, 0x79, 0x27, 0x46, 0x0c, 0x9e, 0x6f, 0x9a, 0x9a, 0xdc, 0xe0, 0x3d, 0x24, 0xc9
db 0xce, 0xf3, 0x34, 0x66, 0x45, 0x07, 0x0b, 0x83, 0x8c, 0xb7, 0xd9, 0x1e, 0xac, 0xc6, 0xf7, 0xef
db 0xe7, 0xd1, 0xbc, 0xa3, 0x21, 0x85, 0x3d, 0x25, 0x90, 0x24, 0x48, 0xb1, 0x00, 0xb0, 0xd2, 0xa6
db 0xd8, 0x4e, 0x46, 0x7c, 0xc4, 0x79, 0x40, 0x95, 0x81, 0xb4, 0xb9, 0xa8, 0x70, 0xf0, 0x12, 0xd6
db 0xdc, 0xb2, 0x7c, 0x0f, 0x47, 0xad, 0x7d, 0x46, 0x78, 0x18, 0x6e, 0xdd, 0x5f, 0xe5, 0xd7, 0x63
db 0x11, 0xf0, 0x5b, 0xa0, 0x48, 0x15, 0xe2, 0x55, 0xc6, 0x7f, 0xf4, 0x2e, 0x0e, 0x49, 0x39, 0x65
db 0x3e, 0x69, 0xc1, 0x27, 0x39, 0xb3, 0x10, 0x1b, 0xf2, 0x35, 0x88, 0x0c, 0x1b, 0xac, 0x4a, 0x15
db 0x31, 0x81, 0x63, 0xe5, 0x3d, 0x56, 0x6f, 0x34, 0x06, 0x5b, 0x1d, 0xa0, 0xea, 0x0c, 0x92, 0x6a
db 0x22, 0x2b, 0x2d, 0xbb, 0xaf, 0xc5, 0x6d, 0x44, 0x1b, 0xb0, 0x69, 0x06, 0x27, 0x54, 0xa5, 0x7f
db 0x07, 0xd4, 0xdc, 0xe5, 0x5c, 0x78, 0x9e, 0xf7, 0x4a, 0x47, 0x9b, 0x21, 0xf6, 0x87, 0x89, 0xad
db 0xec, 0xe4, 0xd6, 0x83, 0xd3, 0x7b, 0x34, 0x00, 0x0b, 0x75, 0xba, 0x4c, 0x0f, 0x46, 0xd2, 0x0c
db 0x58, 0x1b, 0x0f, 0x19, 0xb5, 0xf5, 0xba, 0x8f, 0xbd, 0x17, 0x51, 0xaf, 0xa6, 0x1a, 0x97, 0x8c
db 0x44, 0x30, 0x7c, 0x73, 0x50, 0xca, 0x05, 0xe8, 0x3e, 0x19, 0x4a, 0x5a, 0x6b, 0x4d, 0x01, 0x05
db 0xea, 0x1b, 0x70, 0xb6, 0xe6, 0x39, 0x5d, 0x99, 0x3b, 0xae, 0xed, 0x7c, 0xa6, 0xc7, 0x29, 0x6f
db 0xeb, 0x0a, 0xba, 0x03, 0xd3, 0xba, 0x62, 0x21, 0xa0, 0xb7, 0xb5, 0xbf, 0x40, 0xb8, 0x4e, 0xc3
db 0x89, 0xa0, 0xa9, 0xe8, 0xc8, 0x2b, 0xfd, 0x23, 0x32, 0x53, 0xe5, 0x35, 0xc1, 0x23, 0x97, 0xc1
db 0x87, 0x10, 0x41, 0x21, 0xb3, 0xf6, 0x53, 0xcf, 0x28, 0x47, 0x9c, 0x69, 0x42, 0xcf, 0x0e, 0x11
db 0x69, 0x7f, 0xc6, 0xdf, 0xc3, 0xbf, 0x04, 0x7f, 0x3a, 0xc6, 0xa1, 0x3d, 0xc6, 0x5b, 0x56, 0x8b
db 0x52, 0x23, 0x41, 0xd7, 0x35, 0x7f, 0x86, 0xd2, 0x59, 0xcf, 0xae, 0x28, 0xa3, 0xa2, 0x23, 0x4b
db 0x78, 0x78, 0x94, 0x3f, 0x2f, 0xf0, 0xb8, 0x94, 0xa2, 0x62, 0xb9, 0x83, 0xc7, 0x5f, 0x64, 0x45
db 0x54, 0xaf, 0x43, 0x93, 0x7f, 0xa1, 0xe8, 0x71, 0x38, 0xc8, 0x21, 0xf4, 0xa6, 0xab, 0x2b, 0xd3
db 0x44, 0xa2, 0x74, 0x94, 0x99, 0x3f, 0x56, 0xbc, 0x0a, 0x12, 0xe7, 0x6e, 0x1b, 0x7f, 0x98, 0xad
db 0x28, 0xa6, 0xc8, 0x87, 0x7a, 0x88, 0xcb, 0xcf, 0x9f, 0x95, 0xa7, 0xf1, 0x66, 0xfe, 0x43, 0x3d
db 0x71, 0x5b, 0x3a, 0xb7, 0xe4, 0xa8, 0x6f, 0x46, 0xa1, 0xaa, 0x66, 0xd2, 0x9e, 0x84, 0xfd, 0x42
db 0x98, 0x17, 0x3e, 0xde, 0xaa, 0x18, 0xc9, 0x9c, 0x53, 0x88, 0x2b, 0x92, 0xce, 0x00, 0x8b, 0xb4
db 0x15, 0x7a, 0x39, 0xb7, 0x57, 0xf9, 0xf2, 0x17, 0x0a, 0x8c, 0x05, 0x7b, 0x3f, 0x2a, 0xb0, 0xb7
db 0x8a, 0xbb, 0x9a, 0x0d, 0xe4, 0x0d, 0x6a, 0xbd, 0x8a, 0xe9, 0xbd, 0xca, 0xb2, 0x6a, 0xbe, 0x76
db 0x2c, 0xbe, 0x45, 0x3f, 0x22, 0x03, 0xb1, 0xab, 0x2d, 0xe0, 0x70, 0x52, 0xe5, 0x27, 0x8e, 0xbc
db 0xa9, 0x8d, 0x13, 0xf4, 0xe5, 0xd7, 0xeb, 0x4e, 0x30, 0x3f, 0x76, 0x3b, 0x64, 0xad, 0x57, 0x53
db 0x91, 0x89, 0xf4, 0x9a, 0xd1, 0x38, 0x3d, 0x58, 0xdc, 0x83, 0x65, 0x4a, 0x36, 0x30, 0x73, 0x92
db 0x8c, 0x2f, 0x7d, 0x1e, 0x15, 0x3c, 0xca, 0x54, 0x6f, 0x17, 0xbd, 0xba, 0x97, 0x7e, 0x28, 0x11
db 0x8e, 0x96, 0x9f, 0x46, 0x84, 0x69, 0xe3, 0xc2, 0x8e, 0x1e, 0xea, 0x6b, 0x17, 0xa7, 0xf8, 0x17
db 0xc3, 0xd9, 0x9c, 0x53, 0x79, 0x95, 0x32, 0xf6, 0x78, 0xcd, 0x5d, 0x2f, 0x30, 0x06, 0xe8, 0x9f
db 0x5e, 0xb2, 0x4e, 0x56, 0xf5, 0x31, 0xc3, 0x41, 0xae, 0x4b, 0x0a, 0xbd, 0xdc, 0xce, 0xea, 0xfa
db 0x27, 0x09, 0x4e, 0xd1, 0x24, 0x14, 0x33, 0x8b, 0x21, 0x48, 0x99, 0x92, 0x07, 0xa4, 0x1a, 0x87
db 0x34, 0x15, 0xa6, 0x12, 0x92, 0x3f, 0xf0, 0x3e, 0x18, 0x3c, 0x65, 0x3a, 0x8b, 0x17, 0x9b, 0xf2
db 0xd9, 0x93, 0xa0, 0x19, 0x2b, 0x73, 0x59, 0x29, 0x6f, 0xb7, 0x75, 0x4b, 0x42, 0x24, 0x43, 0xa4
db 0x20, 0xd8, 0x59, 0x8d, 0x9f, 0xd6, 0x64, 0xa1, 0xeb, 0xe3, 0x65, 0x82, 0x69, 0x74, 0x1a, 0x2b
db 0x8d, 0x9a, 0x59, 0x5d, 0x47, 0x75, 0x63, 0xcd, 0xe4, 0x14, 0x48, 0x5f, 0x67, 0x00, 0x12, 0x3c
db 0x58, 0x27, 0x5e, 0x83, 0xde, 0xd8, 0x97, 0xd9, 0x09, 0xd9, 0x06, 0x64, 0x96, 0x67, 0xb4, 0x4f
db 0xb9, 0x58, 0x87, 0xc9, 0xb1, 0xdd, 0x64, 0x8f, 0x4e, 0x8f, 0xa9, 0xfa, 0x40, 0xe6, 0x8f, 0xaa
db 0x22, 0x26, 0x16, 0x15, 0x6a, 0xa3, 0x88, 0xae, 0xa2, 0xbc, 0xa3, 0xa3, 0x56, 0xa1, 0x74, 0x6c
db 0xa2, 0xd0, 0x47, 0x4b, 0x98, 0x0a, 0xea, 0xdd, 0xe8, 0x9c, 0xe1, 0x37, 0x44, 0x1a, 0xc0, 0xc7
db 0x83, 0x07, 0x42, 0xca, 0x98, 0x36, 0xd7, 0x43, 0x18, 0x51, 0x32, 0xf6, 0x99, 0x61, 0x73, 0x79
db 0x51, 0xc4, 0xe9, 0x5b, 0x9e, 0xa8, 0xb4, 0x28, 0x49, 0xbb, 0x44, 0x90, 0xe2, 0xf7, 0x7e, 0x61
db 0x27, 0xbb, 0x85, 0x58, 0xd0, 0xdc, 0x94, 0x53, 0x02, 0x50, 0xfe, 0xc7, 0x37, 0xa2, 0x20, 0x1b
db 0x57, 0x00, 0x9b, 0x7c, 0xa4, 0x6c, 0xa6, 0xb1, 0xae, 0xd0, 0x03, 0x67, 0x2b, 0x82, 0xd9, 0x99
db 0x76, 0xd0, 0xc7, 0x7d, 0x2d, 0xbd, 0x39, 0x28, 0xcf, 0xe1, 0x13, 0xce, 0x1c, 0xe6, 0x4c, 0xa7
db 0x7a, 0x8c, 0x4f, 0xa6, 0x30, 0x77, 0x6b, 0x78, 0x39, 0x6e, 0x10, 0xd1, 0x9c, 0x9a, 0xda, 0x2d
db 0xc9, 0xef, 0xd7, 0xb1, 0xb8, 0xdf, 0x21, 0xce, 0x96, 0x53, 0xaa, 0xa6, 0x76, 0x52, 0x56, 0x0e
db 0xe6, 0x7f, 0xed, 0x88, 0x15, 0x2a, 0xc1, 0xfe, 0xb3, 0x35, 0x54, 0x09, 0x9b, 0x5d, 0x21, 0x62
db 0xc8, 0x6f, 0x2c, 0x6e, 0x56, 0xc8, 0xd9, 0x40, 0x67, 0xeb, 0x26, 0xf5, 0xcb, 0x18, 0xb1, 0x89
db 0xfe, 0x58, 0x1a, 0xff, 0x41, 0xb5, 0xd6, 0xe5, 0xb3, 0x82, 0x29, 0x82, 0xee, 0xbb, 0xb2, 0x5a
db 0x71, 0xf2, 0xca, 0xf1, 0x2f, 0xa7, 0x4d, 0xb1, 0x5c, 0xbc, 0xc3, 0x1a, 0xb4, 0x20, 0x6a, 0x7e
db 0xb9, 0x5e, 0xcb, 0x9b, 0xf3, 0x1c, 0x2b, 0x16, 0xab, 0x15, 0x8d, 0xb5, 0x81, 0xf3, 0xbb, 0xc1
db 0x8e, 0x2c, 0xd6, 0xd1, 0xa8, 0x23, 0x3c, 0x98, 0x3f, 0x4e, 0xff, 0x97, 0x77, 0xd1, 0xbd, 0xda
db 0xff, 0x9c, 0x55, 0x01, 0x1c, 0x4b, 0x4b, 0x1a, 0xa9, 0x3d, 0xe9, 0xbd, 0x3c, 0x5b, 0xfd, 0x65
db 0x34, 0x9c, 0x78, 0x8c, 0x83, 0x46, 0x72, 0xed, 0x66, 0xee, 0x00, 0xac, 0xca, 0x09, 0xaa, 0x3a
db 0x2c, 0xc1, 0x7e, 0xde, 0x44, 0xbd, 0xe3, 0x5a, 0x11, 0x41, 0xc7, 0xc8, 0x65, 0x7a, 0xc7, 0xbb
db 0x44, 0xad, 0x97, 0x17, 0xe8, 0x9f, 0x29, 0x2b, 0x78, 0x6d, 0x96, 0xb6, 0x9c, 0x3a, 0x6a, 0xc2
db 0xab, 0x9a, 0x16, 0x6f, 0x05, 0x78, 0x0d, 0x83, 0xa5, 0x46, 0x8c, 0xd7, 0x57, 0x1e, 0x80, 0x2f
db 0x7e, 0x81, 0x68, 0xa4, 0xc4, 0x3d, 0x6c, 0xae, 0x6b, 0x98, 0xb9, 0xe4, 0xb4, 0xfb, 0xf4, 0x19
db 0xf9, 0xcd, 0xbb, 0xd0, 0xbc, 0x22, 0xdd, 0x2c, 0xbe, 0x11, 0x01, 0xc2, 0x53, 0xdd, 0xa3, 0x3a
db 0xbf, 0x5f, 0x2a, 0x94, 0x8b, 0x58, 0x6e, 0xe3, 0x4e, 0x1b, 0x0d, 0x30, 0x1b, 0x1c, 0x6c, 0x24
db 0x0e, 0xd9, 0x1c, 0xe1, 0x4d, 0x42, 0x48, 0xa0, 0x07, 0xb1, 0xe8, 0x10, 0xa1, 0x51, 0x6a, 0x82
db 0x2e, 0x99, 0xb3, 0xbf, 0xe3, 0xff, 0x3c, 0x77, 0xf4, 0x0c, 0x1f, 0x22, 0x53, 0xd0, 0x99, 0x60
db 0x5d, 0x65, 0x80, 0xb9, 0xa3, 0xb7, 0x25, 0x6d, 0xa6, 0x4f, 0xb5, 0x72, 0xaa, 0x4d, 0x0d, 0x49
db 0x4c, 0x34, 0xc5, 0xf4, 0x1b, 0x5c, 0x3f, 0x6c, 0xbb, 0x86, 0xba, 0xc5, 0x32, 0xee, 0x23, 0x95
db 0xe5, 0x42, 0x66, 0x92, 0x89, 0x5e, 0xf4, 0xd4, 0x2d, 0x04, 0xf2, 0xbc, 0xd7, 0xc8, 0xc9, 0xd7
db 0xe3, 0xdb, 0x4e, 0x4b, 0xda, 0x37, 0x1f, 0xfa, 0x9c, 0xaf, 0x4b, 0x1e, 0xab, 0x64, 0x2a, 0x59
db 0x24, 0x0f, 0xb4, 0xaf, 0xd6, 0x32, 0x30, 0xcd, 0x7c, 0xf3, 0x0f, 0xa9, 0xac, 0x3f, 0x55, 0xa2
db 0x92, 0x21, 0x58, 0x4e, 0x99, 0xbc, 0x9f, 0xfd, 0x16, 0x7c, 0x4e, 0x5b, 0xb4, 0xc7, 0x5f, 0x8d
db 0x0e, 0x26, 0x72, 0x17, 0x02, 0x7d, 0x12, 0xa0, 0xc5, 0xc1, 0x66, 0xd3, 0x19, 0x49, 0x42, 0xfb
db 0x18, 0xd7, 0x18, 0x79, 0xd3, 0x32, 0xfc, 0x4a, 0xab, 0x82, 0x72, 0x0a, 0x90, 0xb7, 0xbc, 0x00
db 0x16, 0x99, 0xd3, 0x9a, 0x76, 0xc6, 0x44, 0x92, 0x9b, 0x2b, 0x6a, 0x35, 0xca, 0x4e, 0x2e, 0x9c
db 0x7f, 0xcb, 0xd3, 0x65, 0x1c, 0xa6, 0x95, 0x2c, 0x3d, 0xe4, 0xd3, 0xe6, 0xe7, 0xe0, 0xde, 0x1e
db 0x54, 0xb3, 0x09, 0x3e, 0x34, 0x35, 0x68, 0x53, 0x01, 0x02, 0xf1, 0x4c, 0x89, 0x19, 0xe3, 0xc6
db 0x4a, 0x51, 0x49, 0xf5, 0x5f, 0x3e, 0xcd, 0xae, 0x6e, 0xeb, 0x90, 0x1a, 0x53, 0x93, 0x0b, 0xe8
db 0xc2, 0x6e, 0xee, 0xf3, 0x38, 0x5d, 0xb8, 0xaf, 0x58, 0x4b, 0xe0, 0xfd, 0x07, 0xcf, 0x15, 0x89
db 0x2b, 0x01, 0x35, 0xbb, 0xa0, 0x2f, 0x7e, 0xd3, 0x34, 0x7b, 0x1f, 0x81, 0x12, 0x7f, 0xb0, 0xff
db 0xe7, 0xa0, 0xf2, 0xc4, 0x86, 0x98, 0x45, 0xe2, 0xa1, 0x1e, 0x4c, 0xc0, 0x23, 0x05, 0x49, 0x0b
db 0x0d, 0xc3, 0x1e, 0x30, 0x20, 0xc6, 0x34, 0xb7, 0xe1, 0x09, 0x84, 0xd5, 0x2a, 0x40, 0x75, 0x9b
db 0x46, 0xbb, 0xa5, 0xfe, 0xbd, 0x7d, 0x39, 0xe4, 0x7b, 0x38, 0xdc, 0x9c, 0xaf, 0xc8, 0x12, 0xf4
db 0x78, 0xb8, 0x51, 0x4a, 0x21, 0xfe, 0xf9, 0x77, 0xf6, 0xb5, 0xad, 0x69, 0xc9, 0x4d, 0xbf, 0x67
db 0xfc, 0x5d, 0x80, 0x7c, 0x76, 0x2c, 0xe5, 0xf2, 0xd7, 0x7f, 0xce, 0xb5, 0x1c, 0x09, 0xa5, 0xc3
db 0x98, 0x18, 0x2d, 0x18, 0xfb, 0x61, 0x13, 0xea, 0xbc, 0x87, 0x3a, 0x3f, 0xb4, 0xaf, 0x3c, 0x3b
db 0x3b, 0xb6, 0xd2, 0xc7, 0x5c, 0x2c, 0xe1, 0x11, 0xb3, 0x9d, 0xf1, 0x52, 0xba, 0xb5, 0xf0, 0x69
db 0xcd, 0xd2, 0x93, 0x9e, 0x80, 0x45, 0x78, 0x17, 0x6d, 0x52, 0x51, 0xad, 0xed, 0x6d, 0x9e, 0x15
db 0xca, 0xb1, 0xfe, 0x22, 0x7b, 0x87, 0xb8, 0x40, 0x06, 0x2d, 0xb0, 0xbb, 0x05, 0x7c, 0x52, 0xd2
db 0xcd, 0xc8, 0x9c, 0xea, 0xd3, 0x4c, 0xb5, 0x06, 0xb4, 0x70, 0xad, 0x09, 0xa5, 0xb8, 0x66, 0xba
db 0x31, 0x0d, 0xe0, 0xe2, 0xcf, 0x62, 0x9f, 0x6d, 0x6d, 0x1a, 0x47, 0x21, 0xd5, 0x33, 0x6b, 0xd7
db 0x75, 0xff, 0x98, 0x6c, 0xb2, 0x78, 0x6d, 0x45, 0x50, 0xeb, 0xfb, 0xea, 0xb7, 0x2a, 0x27, 0x02
db 0xc4, 0x03, 0xde, 0x56, 0x23, 0x26, 0x10, 0x21, 0x57, 0x9c, 0x3b, 0x4c, 0x79, 0x2c, 0x3e, 0xfe
db 0xc8, 0x16, 0xe4, 0xd6, 0x60, 0xb8, 0x46, 0xe3, 0x4b, 0x7e, 0x3d, 0xb3, 0x83, 0x19, 0x54, 0x65
db 0x51, 0x7a, 0x81, 0xdd, 0x07, 0x33, 0x92, 0x08, 0x64, 0x0b, 0xc2, 0x06, 0x5c, 0x07, 0x81, 0x40
db 0x1b, 0xb4, 0x5a, 0x47, 0x2b, 0xdc, 0x96, 0x98, 0x4c, 0x65, 0xad, 0x8e, 0x8e, 0x77, 0xbe, 0x99
db 0x60, 0x4c, 0xb5, 0x6b, 0xed, 0xb7, 0x52, 0x5d, 0x99, 0x2e, 0x93, 0x40, 0xfe, 0x45, 0x83, 0x28
db 0x9b, 0x8b, 0x7f, 0x77, 0x2b, 0xdc, 0x61, 0xbe, 0x62, 0x28, 0xe8, 0x23, 0x3f, 0xdb, 0x1d, 0x6d
db 0x3b, 0xe8, 0x90, 0x05, 0x12, 0xf2, 0xb4, 0xf0, 0x1b, 0xbb, 0x2f, 0x4b, 0x9e, 0x9f, 0x0e, 0x4e
db 0x9e, 0x6a, 0x38, 0x7e, 0x97, 0x13, 0x90, 0x57, 0xb9, 0x49, 0x52, 0xb7, 0x4f, 0xd3, 0xc1, 0x39
db 0x95, 0x20, 0xd4, 0x83, 0x48, 0x0e, 0x7a, 0x9d, 0x89, 0x9d, 0xf4, 0xec, 0xe7, 0xcc, 0xde, 0x0a
db 0xac, 0xc5, 0xb0, 0x4d, 0xc5, 0x25, 0x74, 0x62, 0x66, 0x51, 0x4f, 0xeb, 0x4e, 0x9d, 0x3d, 0x04
db 0x27, 0xec, 0xfe, 0x8d, 0x03, 0x20, 0x38, 0x30, 0x5d, 0xf3, 0xf0, 0x97, 0xbb, 0xa9, 0xd1, 0xea
db 0x73, 0x73, 0x40, 0x2c, 0x0b, 0xa7, 0xc9, 0x8d, 0xac, 0x75, 0xc4, 0x46, 0x7c, 0xc2, 0x9a, 0x26
db 0x07, 0xae, 0x02, 0x27, 0x42, 0xa8, 0x90, 0xb6, 0x9b, 0x98, 0xec, 0x2e, 0xf6, 0xf6, 0x17, 0xda
db 0x9f, 0xfb, 0x54, 0xea, 0xae, 0x96, 0xfe, 0xd6, 0x35, 0x4f, 0x07, 0x9f, 0xf4, 0x57, 0x36, 0xfe
db 0xb1, 0x43, 0xee, 0xe3, 0x21, 0x00, 0x43, 0x12, 0xf2, 0xff, 0xa5, 0x37, 0x65, 0x01, 0xf0, 0xb4
db 0xe8, 0x68, 0xa3, 0xff, 0x31, 0x5f, 0x3f, 0x56, 0xa5, 0xd2, 0xcc, 0xab, 0xa4, 0x90, 0xf9, 0x98
db 0x0b, 0xdc, 0x0d, 0x20, 0x3c, 0x33, 0xda, 0xf1, 0x54, 0xd5, 0x6d, 0xc4, 0xa9, 0xc4, 0x54, 0x29
db 0x56, 0x69, 0x96, 0x98, 0x74, 0x13, 0x72, 0x1f, 0x95, 0xe9, 0xe2, 0xab, 0x60, 0x74, 0x91, 0x96
db 0xdf, 0xa4, 0xd6, 0x62, 0x3c, 0x35, 0x7e, 0xc4, 0x21, 0x16, 0xa3, 0x32, 0xac, 0x20, 0x52, 0xd4
db 0xbb, 0xc2, 0xa5, 0x97, 0x86, 0x4a, 0x55, 0xf4, 0x09, 0xf2, 0x0e, 0xd6, 0x1a, 0xfa, 0x00, 0x67
db 0x45, 0x57, 0xb3, 0xaa, 0xe5, 0x7c, 0x17, 0x8d, 0xde, 0x75, 0xd7, 0x49, 0x6e, 0xb0, 0xb2, 0xa0
db 0x58, 0xd8, 0x01, 0xf0, 0x22, 0x9c, 0xe4, 0xeb, 0x71, 0x5f, 0x4d, 0x38, 0xf2, 0x7e, 0xee, 0xba
db 0xf9, 0x39, 0xff, 0x42, 0x91, 0x00, 0x63, 0x5c, 0x86, 0x02, 0x81, 0x51, 0x10, 0xfb, 0xcf, 0x2a
db 0xcf, 0x16, 0xd9, 0x8f, 0x3a, 0xbb, 0x29, 0xcb, 0xe2, 0xc9, 0xd9, 0xe2, 0xd9, 0x05, 0x1b, 0x46
db 0x08, 0x2c, 0x6d, 0x5b, 0x1a, 0x7d, 0x5b, 0xca, 0x5b, 0xae, 0x18, 0x48, 0x15, 0x3b, 0x85, 0xd1
db 0x29, 0xcf, 0xaf, 0xa5, 0x68, 0xe9, 0x8d, 0x9e, 0x0b, 0xe1, 0x55, 0x54, 0x68, 0x28, 0x9b, 0x4c
db 0x94, 0x30, 0x3a, 0xc0, 0xaa, 0xf8, 0xeb, 0x7b, 0x58, 0x53, 0x5f, 0x25, 0x2e, 0xbf, 0x72, 0x26
db 0xd8, 0x9c, 0xa9, 0xfe, 0x30, 0xe0, 0x68, 0x25, 0xba, 0x71, 0x1a, 0x82, 0xbb, 0xee, 0x03, 0xc9
db 0x4b, 0x0a, 0x22, 0xda, 0x93, 0xa0, 0x72, 0x49, 0x72, 0x3a, 0x8f, 0xbe, 0x39, 0x04, 0x7c, 0x06
db 0xa1, 0x50, 0xa1, 0x94, 0xb4, 0x66, 0x91, 0xee, 0x76, 0xa4, 0xbe, 0x21, 0x33, 0xbe, 0xa9, 0x68
db 0xe6, 0x03, 0xdd, 0x25, 0x3b, 0x78, 0xe3, 0x5a, 0x0c, 0xcf, 0x2b, 0xa2, 0x03, 0x63, 0x8d, 0xd7
db 0xc4, 0xf0, 0x6e, 0xea, 0xe1, 0x76, 0x93, 0x38, 0x7b, 0x85, 0xef, 0xff, 0xce, 0xb0, 0xe1, 0xe3
db 0x86, 0x3d, 0xb6, 0xae, 0xee, 0xf7, 0x92, 0x8a, 0x1b, 0x29, 0x00, 0x9b, 0x85, 0xaf, 0xa2, 0x5e
db 0x90, 0xd9, 0xdc, 0xca, 0xde, 0xde, 0xab, 0xfe, 0x05, 0x61, 0x3c, 0xb6, 0x2f, 0x40, 0x59, 0x1f
db 0x73, 0x80, 0x52, 0xf6, 0x6f, 0x28, 0x30, 0x4b, 0xf2, 0x88, 0x9e, 0x63, 0x84, 0x1b, 0xd2, 0xf4
db 0x67, 0x3b, 0xaf, 0x48, 0x27, 0xfd, 0x7e, 0x30, 0x6e, 0xb8, 0x81, 0xbf, 0xe5, 0x4c, 0x19, 0x16
db 0x24, 0xd0, 0x8e, 0x3a, 0xc9, 0xcd, 0xc8, 0x6f, 0x2e, 0x99, 0xda, 0xb8, 0x7c, 0xd9, 0xbb, 0x2c
db 0xe3, 0xdf, 0xd0, 0x96, 0xe2, 0xcc, 0x99, 0x5b, 0x1d, 0xff, 0x81, 0x74, 0x84, 0x0b, 0x9d, 0x09
db 0x3e, 0x1b, 0x0c, 0x42, 0x3d, 0x96, 0x15, 0x44, 0xed, 0x97, 0x9a, 0x99, 0x68, 0x02, 0x2c, 0x79
db 0x8f, 0xcc, 0xff, 0x83, 0x5e, 0x6e, 0x97, 0x00, 0x50, 0x83, 0xc2, 0x29, 0x2b, 0x27, 0xe6, 0x4f
db 0x18, 0xb0, 0x45, 0xa9, 0xf8, 0x30, 0x35, 0x7f, 0x20, 0xdd, 0xd7, 0x07, 0x32, 0x55, 0x95, 0x4a
db 0xf3, 0xf5, 0x35, 0x5b, 0xac, 0xef, 0xfa, 0xbb, 0x54, 0xba, 0x4d, 0x79, 0x66, 0xce, 0x38, 0x5e
db 0x23, 0xd7, 0x1b, 0x03, 0x37, 0x74, 0xa7, 0xe0, 0xb1, 0x2c, 0xe5, 0xa4, 0x00, 0x36, 0x9a, 0xe9
db 0x36, 0xd4, 0x3e, 0x35, 0x37, 0xb2, 0xc1, 0x71, 0x90, 0x80, 0x3b, 0xd8, 0x6b, 0x7e, 0x79, 0x0a
db 0x7d, 0xe3, 0x3d, 0xc8, 0xd3, 0xb3, 0x56, 0xb6, 0xef, 0x73, 0x3d, 0x24, 0x07, 0x0e, 0xeb, 0x8e
db 0x9b, 0x25, 0xaf, 0x3b, 0xa3, 0x92, 0xf5, 0x19, 0x16, 0xba, 0x1f, 0x6f, 0x92, 0x4b, 0x3f, 0x3c
db 0xc8, 0xac, 0xdd, 0x70, 0xc6, 0x3b, 0x45, 0x0b, 0xa5, 0xe0, 0x8f, 0xa4, 0xd6, 0x56, 0xd8, 0xb9
db 0xc1, 0x1a, 0x53, 0x76, 0x37, 0x60, 0xc9, 0xf4, 0xc8, 0x0a, 0x17, 0x6d, 0x1d, 0xb8, 0x8e, 0xec
db 0xa8, 0x9c, 0x71, 0x08, 0x1f, 0x45, 0x96, 0xc8, 0xed, 0x1e, 0x47, 0x09, 0xbb, 0xe6, 0xee, 0x36
db 0x8e, 0x87, 0xc6, 0xeb, 0xe5, 0x88, 0xd8, 0xab, 0x98, 0x41, 0x4f, 0x2a, 0x49, 0x15, 0x68, 0xf6
db 0x51, 0xaf, 0xc7, 0x74, 0x7c, 0xaa, 0x26, 0x1a, 0x2f, 0xe6, 0x96, 0x86, 0x7c, 0x00, 0xa4, 0x57
db 0x90, 0x1f, 0x83, 0x02, 0x0c, 0xb2, 0xec, 0x27, 0x7f, 0xbc, 0x78, 0x11, 0x64, 0xbe, 0x34, 0x25
db 0xbd, 0xf8, 0x56, 0x00, 0x5f, 0xdd, 0x85, 0x95, 0x23, 0xad, 0xe9, 0x26, 0x1e, 0xd3, 0xfc, 0x22
db 0xe6, 0x35, 0x07, 0xbc, 0xf6, 0x88, 0x19, 0x61, 0x2e, 0xd5, 0x0d, 0xc0, 0x98, 0x79, 0x59, 0x0a
db 0x33, 0x44, 0xa8, 0x70, 0xd8, 0xda, 0x45, 0x72, 0xdb, 0x83, 0xf7, 0xbe, 0xbb, 0x93, 0xc9, 0xaa
db 0xf5, 0xfb, 0xdc, 0x0a, 0x55, 0x54, 0xd1, 0xae, 0x9e, 0x14, 0x38, 0x24, 0x06, 0x6e, 0x4d, 0x17
db 0xaa, 0xb1, 0xe4, 0x55, 0x9b, 0x7c, 0xc2, 0xe7, 0xb6, 0x82, 0x1b, 0x5d, 0x21, 0x20, 0xfc, 0x34
db 0x51, 0xf7, 0xfd, 0x20, 0x17, 0x4b, 0xd1, 0x9f, 0xc7, 0x2a, 0x57, 0x62, 0x4a, 0x60, 0x3f, 0xfa
db 0x70, 0x75, 0x1a, 0x3e, 0x9d, 0xbd, 0x6c, 0xe3, 0x60, 0xc3, 0xd3, 0xa6, 0x3b, 0x73, 0xa5, 0x4f
db 0x06, 0x79, 0xf4, 0x6e, 0x3a, 0xae, 0xa4, 0x98, 0x86, 0xb9, 0x1b, 0x8b, 0x66, 0xd9, 0x96, 0xdb
db 0xa5, 0x47, 0xd3, 0xa8, 0x05, 0x3c, 0x50, 0x57, 0x8a, 0x8f, 0xe0, 0x7f, 0xaf, 0x75, 0x30, 0x44
db 0x01, 0xce, 0x17, 0xb8, 0x89, 0xd4, 0x12, 0xaa, 0xe5, 0x2e, 0xe2, 0x75, 0x70, 0x06, 0x02, 0x5c
db 0xbd, 0x85, 0xaa, 0x75, 0x02, 0x98, 0xe0, 0x0f, 0xe9, 0x94, 0x43, 0x84, 0x8c, 0xca, 0xc1, 0x53
db 0x2f, 0x5c, 0x9a, 0x04, 0x9c, 0x2c, 0x50, 0xc7, 0x6d, 0x13, 0x70, 0x8f, 0x7d, 0xa5, 0x09, 0xc0
db 0x2b, 0x75, 0x55, 0x57, 0xc0, 0x51, 0xad, 0x86, 0x18, 0xc5, 0x9a, 0x9f, 0x1d, 0x99, 0x3e, 0xbd
db 0x38, 0x24, 0x33, 0xd6, 0x04, 0x98, 0xde, 0x19, 0xcc, 0xb3, 0x72, 0x53, 0x6b, 0xbb, 0x38, 0x03
db 0xdc, 0x86, 0xe3, 0x1b, 0x12, 0x04, 0x86, 0x92, 0x3d, 0x3f, 0xf4, 0x4d, 0x73, 0x8a, 0xe7, 0x67
db 0x68, 0xae, 0x63, 0x13, 0x7b, 0x48, 0x90, 0xce, 0x35, 0xfb, 0xf3, 0x46, 0x17, 0xb3, 0xcd, 0x2f
db 0xeb, 0xb5, 0x7a, 0x11, 0xa9, 0xe1, 0xa6, 0xab, 0x0c, 0x9e, 0x9f, 0xd1, 0x08, 0xae, 0xc1, 0x68
db 0xd2, 0xfc, 0x41, 0x36, 0xa8, 0xf4, 0x97, 0xbf, 0x86, 0x61, 0x90, 0x51, 0x02, 0x2e, 0x9a, 0x64
db 0x4e, 0xfb, 0xd1, 0xe5, 0x73, 0x24, 0x07, 0xb5, 0x70, 0xa1, 0xa2, 0xb7, 0xcb, 0x0c, 0xbc, 0x1a
db 0x4a, 0x55, 0x9e, 0x3f, 0x3b, 0xdb, 0x33, 0x4c, 0x01, 0x63, 0x1f, 0xbe, 0xae, 0x05, 0x3e, 0x45
db 0x9e, 0xcf, 0x2e, 0x5f, 0x3b, 0x83, 0x8a, 0xc7, 0xd7, 0x39, 0x3b, 0xfc, 0x54, 0xf0, 0x10, 0x42
db 0x9d, 0x5e, 0x12, 0xc2, 0xb8, 0x8c, 0x4e, 0x26, 0xd7, 0xa0, 0xa1, 0x7a, 0xc0, 0x27, 0x72, 0x52
db 0xdb, 0xc5, 0xed, 0xe1, 0x86, 0x19, 0x0a, 0xff, 0x43, 0x3d, 0x1c, 0x12, 0xb2, 0xbe, 0x5c, 0x12
db 0x4b, 0xbf, 0xff, 0x20, 0xe3, 0xde, 0x4a, 0x74, 0x89, 0x67, 0x42, 0xc3, 0xaf, 0xe3, 0x8a, 0x8a
db 0x57, 0x88, 0xdf, 0xbe, 0x1a, 0x0c, 0x58, 0xa1, 0xfe, 0x21, 0x57, 0x97, 0xf6, 0xef, 0xba, 0x34
db 0x54, 0x60, 0x00, 0x71, 0x09, 0x4a, 0x5b, 0x89, 0x61, 0x4a, 0x67, 0x19, 0x34, 0x44, 0x83, 0x21
db 0x3d, 0xeb, 0x67, 0xff, 0xf7, 0x68, 0xbb, 0x29, 0xa0, 0x74, 0x5e, 0xad, 0x78, 0xb4, 0x11, 0xc5
db 0x5e, 0x0e, 0xc0, 0xd4, 0xe7, 0x50, 0x40, 0xa1, 0xb5, 0x98, 0xdb, 0x75, 0x1f, 0xa5, 0xbc, 0x1b
db 0xeb, 0x13, 0x18, 0x0e, 0x92, 0x54, 0x17, 0x2d, 0x5b, 0xf8, 0x09, 0x50, 0x27, 0x49, 0xf5, 0x01
db 0xb9, 0x51, 0xd1, 0x85, 0x34, 0x67, 0xd8, 0xb9, 0x5f, 0x01, 0x7b, 0xfc, 0xe7, 0x1e, 0xc8, 0xfc
db 0x2f, 0xda, 0x81, 0xfd, 0x76, 0x69, 0x5b, 0x47, 0x98, 0x1b, 0x9b, 0xee, 0x9b, 0x18, 0x8e, 0x30
db 0x85, 0x9d, 0x45, 0xde, 0xa8, 0x9b, 0x4e, 0x57, 0x26, 0x90, 0x0b, 0x9a, 0xe0, 0xf7, 0xfa, 0x08
db 0x1d, 0xe3, 0xca, 0xb8, 0xaa, 0xda, 0x4e, 0xe3, 0xb6, 0x33, 0x05, 0x9a, 0x75, 0x70, 0x18, 0x86
db 0x60, 0x31, 0xc1, 0x05, 0x56, 0x02, 0x30, 0xbd, 0xff, 0x3b, 0xa9, 0xca, 0xe4, 0x84, 0xe6, 0x96
db 0x47, 0xcf, 0x8b, 0xa8, 0xd4, 0x63, 0x8f, 0x8f, 0x55, 0x4a, 0xbc, 0x4c, 0x3c, 0x61, 0x96, 0x38
db 0xcc, 0x10, 0x7e, 0x4e, 0x5c, 0x97, 0xd3, 0x54, 0x22, 0xde, 0xfb, 0x03, 0x81, 0x4e, 0x6d, 0x76
db 0xb5, 0xab, 0x8f, 0xba, 0xf5, 0xf0, 0x1a, 0xf9, 0x69, 0x64, 0x30, 0xb3, 0x19, 0x30, 0x54, 0x97
db 0x14, 0x66, 0x5c, 0xcf, 0x48, 0x0f, 0x74, 0xf3, 0xbe, 0x16, 0x10, 0x6c, 0xb4, 0x93, 0x86, 0xd1
db 0x21, 0xd0, 0x6a, 0x12, 0x35, 0x03, 0x45, 0x99, 0xaa, 0xe1, 0x0a, 0xd9, 0x58, 0x83, 0x2f, 0x97
db 0xcb, 0x0d, 0x81, 0x4b, 0x82, 0x01, 0x6f, 0xd6, 0x20, 0xee, 0xf3, 0xbf, 0xdc, 0x3d, 0x67, 0x6c
db 0xa5, 0x7c, 0x6d, 0x21, 0x09, 0x99, 0x2e, 0x0a, 0x98, 0x7c, 0x50, 0x56, 0x19, 0x54, 0xcc, 0x79
db 0xe1, 0x84, 0x18, 0x86, 0xf8, 0x5a, 0x1b, 0xf7, 0x1f, 0x38, 0xe0, 0x3a, 0xb9, 0x50, 0xc1, 0xf1
db 0xbe, 0x66, 0x89, 0xe2, 0x68, 0x4a, 0x11, 0x0b, 0xfb, 0x84, 0x02, 0x38, 0x31, 0xf4, 0xda, 0x50
db 0xb6, 0x5f, 0x27, 0x62, 0xc7, 0x5a, 0x0f, 0x99, 0xb7, 0x7e, 0x4a, 0x49, 0xe9, 0x67, 0xe0, 0xa5
db 0x0d, 0x08, 0x95, 0xf0, 0xe4, 0x3b, 0x62, 0x30, 0x2b, 0x89, 0x21, 0xdd, 0x52, 0x99, 0x12, 0x16
db 0x83, 0x94, 0x6a, 0x38, 0x1f, 0x8d, 0x81, 0xbf, 0x1f, 0xf9, 0xe0, 0x9c, 0x80, 0xcc, 0x7c, 0xfe
db 0x33, 0x35, 0x27, 0x26, 0xca, 0xcc, 0x1f, 0x43, 0xcd, 0xb0, 0x74, 0x0e, 0xff, 0x1c, 0x86, 0x43
db 0xab, 0x44, 0xbc, 0x31, 0xff, 0xa4, 0x54, 0x95, 0xd4, 0x79, 0x9e, 0xc0, 0xed, 0x87, 0x1c, 0x2e
db 0x50, 0x47, 0xad, 0xc0, 0x2f, 0x5e, 0x8c, 0x15, 0xfb, 0x86, 0x2c, 0xa5, 0x61, 0x2a, 0x60, 0x12
db 0xbc, 0x1f, 0x84, 0xe9, 0x75, 0x55, 0x7e, 0x2c, 0x11, 0xd0, 0xfc, 0x66, 0x89, 0x86, 0x2f, 0x26
db 0x43, 0x1e, 0xa6, 0x6c, 0xa6, 0x40, 0xa9, 0x37, 0x65, 0x99, 0x72, 0xe1, 0x1a, 0xdc, 0x23, 0x53
db 0x09, 0x8e, 0xa1, 0xd6, 0xda, 0xd9, 0x95, 0xaf, 0x58, 0xe0, 0x2a, 0x4a, 0xd3, 0xbd, 0xbd, 0x86

db 0xcb, 0x6c, 0xc1, 0xc1, 0x6a, 0xd5, 0xbe, 0x87, 0x35, 0x87, 0xbc, 0x44, 0x4e, 0xf5, 0x91, 0xc1
db 0x2c, 0x5b, 0x4b, 0x4e, 0xbc, 0xe7, 0x7b, 0x43, 0xc1, 0x2d, 0xb5, 0xc2, 0x30, 0xcd, 0x07, 0xda
db 0x2d, 0xd8, 0x27, 0x1f, 0xcf, 0x4f, 0x94, 0xf8, 0x90, 0xd3, 0x77, 0xed, 0x44, 0xff, 0x39, 0x42
db 0x4d, 0xd1, 0x0a, 0x1e, 0xc0, 0xb2, 0xca, 0x28, 0xe2, 0xfe, 0xc9, 0x11, 0x88, 0xb6, 0xa3, 0xb4
db 0xf2, 0xd3, 0xc4, 0x9c, 0x74, 0x7a, 0x88, 0x45, 0xf5, 0xcd, 0x73, 0x93, 0x70, 0xeb, 0xc0, 0x4c
db 0x6e, 0xae, 0x2a, 0x60, 0x1b, 0x4b, 0x34, 0x06, 0xb3, 0x1a, 0x3b, 0x2b, 0xd8, 0x1e, 0xaa, 0x05
db 0x25, 0x38, 0x5e, 0x9a, 0x4b, 0x12, 0x6c, 0x6e, 0x14, 0xa9, 0xde, 0x61, 0x32, 0xf8, 0xf3, 0x03
db 0x33, 0x71, 0xf6, 0xd7, 0x50, 0xdd, 0x28, 0x04, 0xac, 0xaa, 0x18, 0xe0, 0x8d, 0xa9, 0x95, 0x9a
db 0xb2, 0x01, 0x57, 0x3d, 0xfe, 0xc0, 0x1d, 0xc3, 0x1b, 0x44, 0x21, 0xc4, 0xaa, 0xf9, 0x8b, 0x4b
db 0x9b, 0x63, 0xb9, 0xbb, 0x40, 0xc9, 0x0a, 0xb9, 0xad, 0x22, 0x2b, 0xb9, 0x00, 0xdd, 0x4f, 0x97
db 0x63, 0xe7, 0xf3, 0x30, 0x9c, 0x41, 0xde, 0xc6, 0xbe, 0xfe, 0x6b, 0x97, 0x2a, 0x40, 0x1e, 0x93
db 0x60, 0xb8, 0x55, 0x35, 0x76, 0xa5, 0x1a, 0x66, 0x43, 0x2f, 0x63, 0x93, 0x55, 0x76, 0xf6, 0x2a
db 0x12, 0x7a, 0x0a, 0x3e, 0xd2, 0x0e, 0x57, 0xb5, 0xc7, 0x0d, 0x23, 0xca, 0x9f, 0xfe, 0xbf, 0x5e
db 0xe6, 0x07, 0x9c, 0x68, 0x7e, 0x5e, 0x72, 0xc9, 0xc8, 0x24, 0x6b, 0x7c, 0x01, 0x8b, 0xd1, 0x97
db 0x33, 0xbc, 0xb1, 0xa0, 0x37, 0x1e, 0x47, 0xc0, 0x5f, 0x06, 0xeb, 0x43, 0x5b, 0x67, 0x94, 0xa6
db 0x74, 0x71, 0xe4, 0xc3, 0x2c, 0x4e, 0xc5, 0xad, 0x64, 0x39, 0xb9, 0x69, 0x6f, 0xbd, 0x21, 0x40
db 0x3b, 0xda, 0x20, 0x58, 0xb1, 0x45, 0xe9, 0xa8, 0x44, 0x93, 0xae, 0xc9, 0x31, 0x55, 0x5e, 0x8d
db 0x94, 0x18, 0x42, 0x86, 0x21, 0xc0, 0x11, 0x6c, 0x2f, 0x46, 0x84, 0xa7, 0x3c, 0xbe, 0x2b, 0x07
db 0xdb, 0x25, 0xc6, 0x1d, 0xfe, 0x53, 0x22, 0x5e, 0x09, 0x12, 0x0d, 0xfe, 0x6b, 0x50, 0x9e, 0x4b
db 0x9a, 0xde, 0x8f, 0xf5, 0x5d, 0xc4, 0x78, 0x3f, 0x62, 0xf2, 0xea, 0xed, 0x91, 0x6b, 0x5e, 0x21
db 0x35, 0x7e, 0x98, 0xd7, 0x5e, 0x98, 0xe2, 0x69, 0x19, 0x3d, 0x2b, 0xab, 0x81, 0x76, 0x28, 0xea
db 0x21, 0x26, 0xaf, 0x36, 0xa7, 0xc4, 0x65, 0x5e, 0x48, 0x61, 0xee, 0xaa, 0xd5, 0xe7, 0x7b, 0x7c
db 0x51, 0x6c, 0xe5, 0x5d, 0xe0, 0xb2, 0x65, 0x4c, 0x75, 0x08, 0x1f, 0xa6, 0x5a, 0x42, 0xf0, 0xa1
db 0x3d, 0xba, 0xf7, 0x65, 0x44, 0x77, 0x5c, 0xfc, 0x71, 0x30, 0xed, 0xdb, 0x8a, 0x08, 0x5d, 0xec
db 0xc8, 0x91, 0x55, 0xba, 0xe1, 0xad, 0x37, 0x07, 0x97, 0x73, 0x02, 0xe4, 0xa8, 0xf5, 0x84, 0xf4
db 0x0f, 0x32, 0x0e, 0x72, 0xa7, 0x66, 0x9f, 0x8a, 0xc2, 0xdf, 0x11, 0x60, 0x58, 0xff, 0x5d, 0x9c
db 0xac, 0x3f, 0x20, 0x8d, 0x62, 0xc0, 0xce, 0xcc, 0x36, 0xe0, 0xe6, 0xc0, 0x44, 0xa2, 0xee, 0x7d
db 0x9d, 0x21, 0x2c, 0x57, 0x52, 0x12, 0x4a, 0xf6, 0x78, 0x5e, 0x9c, 0xd4, 0x30, 0x4c, 0x43, 0xe6
db 0xd3, 0xdc, 0x51, 0x4d, 0x48, 0xf0, 0xa4, 0x75, 0xff, 0x30, 0x90, 0x45, 0xf6, 0xfa, 0x8c, 0xc0
db 0xc9, 0xf0, 0xf4, 0x2d, 0xeb, 0x68, 0xae, 0x53, 0xaf, 0x3b, 0x05, 0xdd, 0x07, 0xb8, 0xf3, 0x26
db 0x02, 0x56, 0xef, 0xba, 0x87, 0xbe, 0x11, 0x50, 0xea, 0x9a, 0x6b, 0x54, 0xcf, 0x82, 0x34, 0x78
db 0x5b, 0x95, 0xb1, 0x68, 0x6c, 0xed, 0x90, 0xab, 0x4a, 0x08, 0x4a, 0x8b, 0x22, 0x30, 0x82, 0xa3
db 0x43, 0x94, 0xb3, 0x44, 0x99, 0x83, 0x4d, 0x62, 0x26, 0xf5, 0x56, 0x0c, 0x1a, 0xb8, 0x85, 0x46
db 0x23, 0x1c, 0xfc, 0xf3, 0x5c, 0xa5, 0x3a, 0x34, 0x32, 0x67, 0x95, 0x66, 0xa6, 0x40, 0x8b, 0x92
db 0x17, 0x71, 0x11, 0x37, 0xcb, 0xfc, 0xaf, 0x69, 0x51, 0x86, 0x55, 0xa9, 0x1e, 0x35, 0x46, 0x41
db 0x5c, 0x08, 0xea, 0x6c, 0x52, 0xe8, 0xd1, 0x33, 0x01, 0x84, 0xce, 0x73, 0xd1, 0x7a, 0x6e, 0xfc
db 0xd1, 0x9f, 0x03, 0xb2, 0xdb, 0xd4, 0xed, 0x39, 0xef, 0xe8, 0xeb, 0x1f, 0x12, 0xf4, 0xc7, 0x44
db 0xd4, 0x66, 0x19, 0xac, 0xd5, 0x8e, 0xe2, 0x8c, 0xc1, 0x5b, 0x63, 0xde, 0xee, 0xa2, 0x39, 0xcc
db 0x4a, 0x52, 0x6c, 0x06, 0x72, 0x95, 0x72, 0xd2, 0x79, 0x29, 0x0e, 0x87, 0xcf, 0x58, 0x68, 0xe8
db 0x38, 0xac, 0x37, 0xdb, 0xac, 0x38, 0xf1, 0xcf, 0x1d, 0xdd, 0x08, 0xad, 0xfb, 0xfd, 0x07, 0x2b
db 0xae, 0x87, 0x96, 0x15, 0xac, 0x4b, 0x3d, 0xdd, 0x1c, 0xd8, 0xdb, 0x60, 0x9f, 0x45, 0x34, 0xab
db 0xcb, 0xd3, 0x86, 0x1f, 0xde, 0xcc, 0x00, 0x6c, 0x50, 0xd1, 0x9d, 0x4e, 0xaf, 0x2f, 0xb7, 0x85
db 0x70, 0xcc, 0xbf, 0xd8, 0x9b, 0x84, 0x67, 0xce, 0xaa, 0xd6, 0x74, 0xee, 0x35, 0x4d, 0x7b, 0xe4
db 0xa9, 0xae, 0x7f, 0x81, 0xf4, 0x29, 0xdf, 0x6d, 0x15, 0xc6, 0x02, 0xbf, 0x55, 0x6f, 0x74, 0xed
db 0x6f, 0x35, 0x03, 0xc0, 0x42, 0x7c, 0xd3, 0x9c, 0x38, 0xb7, 0x3d, 0xa4, 0xc7, 0x42, 0x4a, 0xf5
db 0x02, 0x8a, 0x63, 0xac, 0xb4, 0xf8, 0x0b, 0x19, 0xda, 0x0d, 0x3b, 0xad, 0xf8, 0xa8, 0xaa, 0x59
db 0x52, 0x71, 0x0b, 0xd6, 0xe2, 0xb3, 0x73, 0xa5, 0x49, 0xa3, 0x36, 0x65, 0x23, 0x29, 0x12, 0xbf
db 0x33, 0x75, 0xc2, 0x79, 0xec, 0x4e, 0xcf, 0xf4, 0x21, 0xf3, 0xe2, 0xa4, 0x82, 0xd9, 0x55, 0x84
db 0xda, 0x46, 0x18, 0xfc, 0x73, 0xac, 0xaf, 0x68, 0xc4, 0x49, 0x47, 0x6b, 0x6e, 0x50, 0x33, 0x27
db 0x47, 0x5e, 0xc7, 0x1d, 0xb7, 0x2e, 0x8c, 0x9f, 0xbe, 0xac, 0x5c, 0x11, 0x0a, 0x64, 0x14, 0xda
db 0x5d, 0xdc, 0xe2, 0xd2, 0xa0, 0x96, 0xcd, 0x08, 0xf3, 0xcf, 0xc6, 0xc8, 0x0f, 0x30, 0x07, 0x1f
db 0x67, 0xb9, 0x63, 0xf5, 0x73, 0xd3, 0xfb, 0x7e, 0xe1, 0xc1, 0x49, 0xc7, 0x9c, 0x4f, 0x99, 0xc0
db 0x1b, 0x7f, 0x60, 0x66, 0xcb, 0x3e, 0x38, 0x11, 0x0b, 0xdd, 0xc8, 0x68, 0xd8, 0x49, 0xe2, 0x66
db 0x79, 0x8f, 0xb1, 0xb2, 0xe6, 0xb5, 0x33, 0xf0, 0x28, 0x81, 0x7b, 0x48, 0xbf, 0x98, 0xfd, 0x88
db 0x29, 0xbb, 0x01, 0x85, 0x56, 0xd7, 0x61, 0xfc, 0xc3, 0x5b, 0x29, 0x33, 0xe6, 0x14, 0x9a, 0xdd
db 0xdc, 0x7b, 0x0b, 0x89, 0x95, 0xb2, 0x32, 0xaa, 0x06, 0x21, 0x82, 0x10, 0x91, 0x83, 0xe2, 0xa4
db 0x73, 0x5a, 0xb5, 0xbb, 0x64, 0x36, 0x6c, 0x65, 0x6a, 0x67, 0x2e, 0x44, 0x32, 0xf6, 0xf5, 0x47
db 0xbb, 0x35, 0x12, 0x75, 0xc4, 0xf6, 0xb2, 0x25, 0x8a, 0xc9, 0x4f, 0x40, 0xe5, 0x7c, 0x11, 0x03
db 0x8c, 0x88, 0x43, 0xca, 0x1e, 0x5e, 0xeb, 0xc4, 0x7f, 0x4d, 0x0d, 0x95, 0x18, 0xf8, 0xb4, 0x7b
db 0xca, 0xe4, 0x18, 0x2d, 0x27, 0x8b, 0xef, 0x04, 0x5a, 0x19, 0x0a, 0xba, 0x99, 0x06, 0x6c, 0x65
db 0x68, 0x82, 0x98, 0x35, 0x7e, 0x1d, 0x3f, 0xe5, 0xd3, 0x31, 0x3f, 0x26, 0x8a, 0x9b, 0xe4, 0x7c
db 0x90, 0xbd, 0x50, 0xb5, 0x47, 0x81, 0x95, 0xd7, 0xd2, 0xb8, 0xf5, 0xc8, 0x58, 0x4e, 0x98, 0xd0
db 0x36, 0xea, 0xcc, 0x4d, 0x83, 0xba, 0x7a, 0xf6, 0xff, 0x34, 0xa3, 0x3e, 0xdb, 0xe8, 0x1d, 0xd4
db 0x5e, 0xd2, 0x40, 0xbb, 0xa6, 0x5f, 0x42, 0x6a, 0xe8, 0x15, 0x66, 0x44, 0x69, 0x8d, 0x9f, 0x4d
db 0x0f, 0xbd, 0x0d, 0x9e, 0x44, 0x0b, 0xf0, 0xf8, 0xbe, 0x7a, 0x10, 0xc9, 0xcc, 0x87, 0xb9, 0x87
db 0xcb, 0xbb, 0x62, 0x5d, 0xb9, 0x3e, 0xa9, 0xa7, 0x71, 0x4e, 0xd1, 0x20, 0x2a, 0x26, 0x31, 0xf1
db 0x29, 0x67, 0x1b, 0x53, 0xa3, 0x27, 0x0e, 0x0b, 0xdd, 0xca, 0x14, 0xf3, 0xb9, 0x95, 0xf3, 0xf8
db 0x92, 0x16, 0x61, 0x00, 0x87, 0x17, 0x74, 0x7a, 0x5e, 0x08, 0xf1, 0xed, 0xc4, 0x76, 0x96, 0xb1
db 0x65, 0xf8, 0xd5, 0xec, 0xc8, 0x99, 0x1e, 0x37, 0xe8, 0x18, 0xf3, 0xaa, 0x5a, 0xa0, 0x9d, 0x56
db 0xc8, 0x3a, 0x9e, 0x31, 0x70, 0x80, 0x3a, 0x14, 0xc3, 0x59, 0x3b, 0x3c, 0x79, 0x00, 0x72, 0x22
db 0xbe, 0xf5, 0x25, 0x96, 0xe6, 0xa4, 0x8a, 0x4b, 0xe3, 0xa8, 0x2f, 0x0c, 0x58, 0xf5, 0x45, 0x75
db 0x1b, 0xc3, 0x58, 0x3b, 0x6b, 0x80, 0x14, 0xa9, 0xe5, 0x86, 0xe5, 0x21, 0x4c, 0xe4, 0xa5, 0xb6
db 0xeb, 0xcb, 0x84, 0x08, 0x49, 0x03, 0x31, 0x5c, 0xf2, 0x2c, 0xcf, 0xa3, 0xdf, 0x35, 0x30, 0xa4
db 0xeb, 0xcd, 0x7f, 0xc6, 0xdd, 0x8a, 0x81, 0xa6, 0x2f, 0x0f, 0x2c, 0x0b, 0x6a, 0xe8, 0x28, 0x64
db 0x97, 0x85, 0x70, 0xff, 0x8a, 0x51, 0xbc, 0xd5, 0x04, 0x07, 0x39, 0xa4, 0x38, 0x83, 0x97, 0x55
db 0xf7, 0x40, 0x2b, 0x24, 0x4d, 0x34, 0xae, 0x8b, 0xa9, 0xeb, 0x4f, 0x36, 0x42, 0xd4, 0x61, 0x28
db 0x64, 0xed, 0xb6, 0xf3, 0xec, 0xb4, 0x15, 0xff, 0x7a, 0xcb, 0x08, 0x3f, 0x49, 0x98, 0x4c, 0x3f
db 0x01, 0xc4, 0x36, 0x99, 0x26, 0xee, 0x11, 0x89, 0x94, 0x35, 0x7d, 0x24, 0xcc, 0x0c, 0x07, 0x03
db 0x65, 0xf6, 0xe6, 0x34, 0x11, 0xee, 0xdc, 0x5c, 0x2e, 0x31, 0x86, 0xa1, 0x80, 0x83, 0x37, 0x09
db 0x4a, 0xab, 0xd7, 0x52, 0x36, 0xec, 0x13, 0x28, 0xa8, 0x81, 0x69, 0x41, 0x3b, 0x3b, 0xcb, 0xca
db 0x90, 0xcf, 0x0b, 0x95, 0x46, 0x06, 0xc3, 0xbb, 0x8d, 0x47, 0xc1, 0xc4, 0x92, 0x5d, 0xd5, 0x56
db 0xe4, 0xb8, 0x0a, 0x99, 0x83, 0xaa, 0xc3, 0xd8, 0x06, 0xf7, 0x51, 0x4b, 0x31, 0xdf, 0x68, 0x3b
db 0x3b, 0xaa, 0x17, 0xd3, 0xd7, 0xba, 0x24, 0x41, 0xd1, 0x94, 0xd2, 0xd1, 0x3b, 0x96, 0xf6, 0xd2
db 0x2d, 0x9f, 0xf2, 0x92, 0x02, 0x2f, 0x51, 0xe5, 0x85, 0x99, 0xeb, 0x1b, 0x46, 0xd6, 0x82, 0xa5
db 0x30, 0x17, 0x33, 0xc1, 0xdb, 0xda, 0x5d, 0x13, 0xb2, 0xf3, 0x4d, 0x4d, 0xdb, 0x46, 0x3a, 0x4e
db 0xc0, 0x9f, 0x51, 0x0e, 0xf3, 0xff, 0x5d, 0xa1, 0xe2, 0x95, 0x6f, 0xf4, 0x49, 0x77, 0xed, 0x45
db 0x55, 0x46, 0xc2, 0xfa, 0xa7, 0x5f, 0x02, 0x81, 0xed, 0x1a, 0xf5, 0x1f, 0x96, 0xa9, 0x38, 0x82
db 0x89, 0xd0, 0xfd, 0xd3, 0xd0, 0x6d, 0x60, 0x3e, 0x1b, 0xe3, 0x8c, 0x5e, 0xd0, 0x2b, 0x2d, 0x7e
db 0xc2, 0x61, 0x2c, 0xa2, 0x8a, 0x75, 0x29, 0xea, 0x8f, 0x66, 0x0d, 0xb4, 0x21, 0x90, 0x04, 0x6b
db 0xbf, 0x24, 0xde, 0x9b, 0x9e, 0xbf, 0x30, 0xbb, 0x5b, 0x79, 0x51, 0xbb, 0xc0, 0x19, 0x51, 0xb2
db 0x6a, 0xd8, 0x5a, 0xa1, 0xca, 0xc2, 0xf5, 0x03, 0x04, 0xf5, 0x1f, 0x6f, 0x8a, 0x66, 0x20, 0x99
db 0xcb, 0xfb, 0xac, 0x3e, 0x26, 0x60, 0x2d, 0x5e, 0x02, 0x12, 0xd5, 0xe7, 0xe3, 0x49, 0x4f, 0x51
db 0x93, 0x03, 0x42, 0x74, 0x40, 0xf6, 0x32, 0xaa, 0x4e, 0xcf, 0xc0, 0xb7, 0xbe, 0x9d, 0x7c, 0xa0
db 0xd2, 0x95, 0xe1, 0xf6, 0x84, 0x5d, 0xda, 0x70, 0x95, 0x46, 0x11, 0x3d, 0x82, 0x23, 0xdc, 0xbc
db 0x2c, 0xe9, 0x37, 0x65, 0xc4, 0x89, 0x83, 0x1c, 0xea, 0x85, 0x75, 0xf6, 0xab, 0x9b, 0x91, 0xbf
db 0xbc, 0xbf, 0xfd, 0x81, 0xf9, 0xd8, 0x3d, 0xcc, 0x57, 0x9e, 0x37, 0xe4, 0xc5, 0xb2, 0xcd, 0x2f
db 0x03, 0x74, 0x6f, 0x6a, 0xf3, 0x9e, 0xcc, 0x4f, 0xd1, 0x35, 0xb6, 0xeb, 0xbb, 0x13, 0x9d, 0x3a
db 0x17, 0x22, 0x46, 0x16, 0x42, 0xbb, 0x63, 0xfb, 0x76, 0xba, 0x3f, 0x6c, 0xa3, 0x91, 0xf6, 0x9a
db 0x03, 0x1c, 0xd1, 0x8e, 0xc0, 0x19, 0x18, 0x4e, 0x6a, 0x49, 0x0a, 0x2b, 0x45, 0xd3, 0x3b, 0x93
db 0xda, 0x94, 0x10, 0xf8, 0x4f, 0x23, 0xa7, 0xfe, 0x7e, 0x52, 0xbc, 0x2f, 0x5a, 0x1d, 0x9d, 0x5b
db 0x23, 0xf2, 0xea, 0xb8, 0x85, 0x92, 0x10, 0xc3, 0x1f, 0x9a, 0xb0, 0x40, 0x81, 0x77, 0x3b, 0xed
db 0x13, 0xf8, 0xc9, 0x87, 0x53, 0x00, 0xe1, 0x72, 0x81, 0xdc, 0x19, 0x90, 0x14, 0xac, 0x36, 0x5b
db 0x84, 0x0c, 0x7b, 0x85, 0x01, 0x09, 0x73, 0x79, 0xdf, 0xf8, 0x4d, 0xcb, 0xff, 0xf0, 0xa5, 0x5a
db 0xfd, 0x8e, 0xb6, 0xf5, 0x21, 0x91, 0xf7, 0x24, 0x90, 0x7e, 0xa2, 0x74, 0xc6, 0x46, 0x69, 0x74
db 0x8b, 0x9e, 0x11, 0x6e, 0x69, 0xfd, 0x8f, 0x30, 0x0d, 0x7e, 0xf1, 0xc2, 0x67, 0xca, 0x44, 0xb3
db 0x47, 0x24, 0x22, 0xf0, 0x61, 0x7e, 0xb7, 0x60, 0x9f, 0xff, 0x49, 0x16, 0xc0, 0xe2, 0x7e, 0x9d
db 0xf8, 0x58, 0x3b, 0xa2, 0xe7, 0xf8, 0x11, 0x20, 0xaf, 0xe8, 0xec, 0x95, 0x2d, 0xf1, 0x31, 0xd6
db 0xc1, 0xe8, 0xdd, 0xac, 0x34, 0xf8, 0x01, 0x4c, 0x63, 0xf2, 0x2d, 0x15, 0xd0, 0xa8, 0xb1, 0xa8
db 0x0e, 0x6c, 0x64, 0xb1, 0x08, 0x3a, 0x9c, 0xa9, 0x30, 0x69, 0x67, 0xbc, 0xf5, 0x93, 0x28, 0x8a
db 0x5f, 0x96, 0xcb, 0xfd, 0x3d, 0x75, 0x87, 0x10, 0x77, 0xc5, 0x05, 0x3c, 0x17, 0xf4, 0xa1, 0xfa
db 0x23, 0x11, 0xec, 0x69, 0xaf, 0xc2, 0x6a, 0xe5, 0x79, 0xf3, 0xfc, 0x2f, 0x28, 0x61, 0xa1, 0xf2
db 0x7b, 0xc6, 0x53, 0x1d, 0xf5, 0xaa, 0xa8, 0x7d, 0x91, 0x21, 0xac, 0xf1, 0xc9, 0x7a, 0x97, 0xdb
db 0x83, 0x64, 0x0e, 0x35, 0xb8, 0x81, 0x90, 0xa5, 0xdf, 0x07, 0x3c, 0xba, 0xce, 0x9c, 0x84, 0x01
db 0x8a, 0x4e, 0x66, 0xe6, 0x69, 0xba, 0xb0, 0xda, 0x9f, 0x4a, 0x19, 0x3a, 0xd0, 0x08, 0xd1, 0xf6
db 0x4a, 0xf3, 0x5f, 0x04, 0xdd, 0x07, 0xd5, 0x41, 0xaf, 0x5c, 0x9a, 0x43, 0x0b, 0xcd, 0x9c, 0x6a
db 0x62, 0x11, 0x2f, 0x2f, 0x01, 0x77, 0x98, 0x4d, 0xf2, 0xfe, 0x47, 0x49, 0x55, 0xd2, 0xac, 0x19
db 0x64, 0x5a, 0xb8, 0xf1, 0xe4, 0xba, 0x53, 0xcf, 0x97, 0x6d, 0x3b, 0x8d, 0x03, 0x49, 0xbc, 0x57
db 0x95, 0x64, 0xff, 0x80, 0x07, 0xb9, 0x02, 0x52, 0x7e, 0xbd, 0x11, 0xd9, 0x25, 0x37, 0x2a, 0x11
db 0x9e, 0xad, 0x3d, 0x96, 0x57, 0x56, 0x9c, 0x57, 0x43, 0x62, 0x94, 0x76, 0x3a, 0xc1, 0xaa, 0x76
db 0xe6, 0x58, 0x9b, 0xe9, 0x65, 0xfb, 0xad, 0x1d, 0xfe, 0x1d, 0x75, 0x2a, 0xac, 0x22, 0xdb, 0x4a
db 0x89, 0x8d, 0x1f, 0xa3, 0x53, 0x47, 0x8c, 0x3c, 0x06, 0xcb, 0xaa, 0x3f, 0x21, 0x86, 0x37, 0xab
db 0x28, 0x42, 0xff, 0xb9, 0x9a, 0x25, 0x96, 0x12, 0x11, 0xee, 0xa1, 0x34, 0x33, 0xdf, 0x39, 0x39
db 0xc8, 0x19, 0xaa, 0x33, 0x7e, 0x06, 0xc2, 0x1c, 0xd5, 0x4c, 0xdd, 0xb9, 0x56, 0x70, 0xe9, 0x75
db 0x65, 0x08, 0xa6, 0xa0, 0x5e, 0x36, 0x38, 0xc8, 0x91, 0x4d, 0xd7, 0x37, 0xac, 0xc3, 0x22, 0x10
db 0x44, 0x14, 0x22, 0xe2, 0x94, 0x43, 0xd3, 0x27, 0xa4, 0xe4, 0x3c, 0x67, 0x26, 0x1f, 0x8f, 0xab
db 0xdf, 0x86, 0x71, 0xe4, 0x98, 0x29, 0x93, 0x9a, 0x7a, 0xf6, 0x79, 0x17, 0xf7, 0x59, 0x31, 0xe4
db 0x73, 0x47, 0xac, 0xa2, 0x1e, 0xa4, 0x0f, 0xb9, 0xf4, 0xee, 0x5b, 0xd3, 0xe8, 0xd0, 0x89, 0x6b
db 0xad, 0xa5, 0x46, 0x5e, 0x07, 0x73, 0x41, 0x72, 0xb7, 0xf4, 0x43, 0x79, 0x46, 0xab, 0xa0, 0xfd
db 0x2a, 0x93, 0x08, 0x09, 0x2c, 0xab, 0x60, 0xa9, 0x4a, 0xd5, 0x68, 0x78, 0xa0, 0x55, 0x2d, 0x55
db 0x7b, 0xd8, 0x3b, 0xd6, 0xf6, 0x88, 0x49, 0x86, 0xef, 0xb3, 0xea, 0xb0, 0xf5, 0xa3, 0x1a, 0x98
db 0x93, 0xd9, 0x21, 0x3b, 0xa5, 0xdb, 0xd7, 0xec, 0x53, 0xcb, 0x6c, 0x4d, 0x56, 0x67, 0xbe, 0x59
db 0x12, 0x92, 0xc6, 0x9a, 0x5a, 0x64, 0x84, 0x3d, 0x2e, 0x27, 0x77, 0x4b, 0x87, 0x42, 0x82, 0x45
db 0xf0, 0xd1, 0x9c, 0x52, 0xff, 0x2e, 0x7d, 0x09, 0xb7, 0x7a, 0x3a, 0x97, 0x7b, 0x40, 0x29, 0x5d
db 0x10, 0x7a, 0xb6, 0x65, 0x51, 0x8d, 0xca, 0xa0, 0x61, 0x6a, 0x48, 0xae, 0x3d, 0xe9, 0x94, 0xe8
db 0x25, 0x66, 0x46, 0x60, 0x39, 0x9c, 0x63, 0x55, 0xdb, 0x79, 0x34, 0xb2, 0x83, 0xb4, 0xc9, 0x44
db 0xc3, 0x20, 0xb6, 0xc3, 0xba, 0x9d, 0x5d, 0x47, 0x9a, 0x95, 0x62, 0x26, 0x49, 0x2f, 0x9f, 0x95
db 0x15, 0x35, 0xf4, 0x5d, 0xa6, 0x18, 0xe3, 0xd9, 0x63, 0xa0, 0x2f, 0xc2, 0x08, 0xb6, 0x98, 0x26
db 0x5a, 0x94, 0x13, 0xc9, 0x41, 0xa1, 0x68, 0x50, 0x2e, 0x0f, 0xcc, 0x50, 0x3b, 0x79, 0xbf, 0x13
db 0x0a, 0x38, 0x78, 0x0e, 0x8b, 0xf3, 0xf3, 0x5f, 0x0e, 0xa9, 0xab, 0x3b, 0xe4, 0xc6, 0x6b, 0xb8
db 0x9f, 0xb6, 0xa7, 0x98, 0xdc, 0x4a, 0xbb, 0x82, 0xc8, 0x26, 0x26, 0x4d, 0xa3, 0x83, 0x1b, 0x7d
db 0x6b, 0x8f, 0x70, 0xed, 0x55, 0xf8, 0x85, 0xb4, 0x07, 0xd2, 0x0e, 0xc9, 0x8d, 0x37, 0x0e, 0x5b
db 0xe1, 0xd1, 0x98, 0x06, 0x42, 0x92, 0x99, 0x98, 0x27, 0xfc, 0x16, 0x97, 0xf5, 0x30, 0xe2, 0xed
db 0x42, 0x0d, 0xa6, 0x69, 0x3a, 0x17, 0xd8, 0x62, 0xae, 0x95, 0x27, 0xf6, 0xa6, 0xeb, 0x65, 0xfc
db 0x21, 0xb2, 0x3a, 0xec, 0x5c, 0x7e, 0x1e, 0x20, 0x56, 0xc8, 0xec, 0xc5, 0xbd, 0xd3, 0xfd, 0xd3
db 0xe2, 0x26, 0x43, 0x6c, 0x32, 0x54, 0xc0, 0x0a, 0x95, 0x7f, 0x24, 0x9d, 0x9c, 0x4d, 0x99, 0xdd
db 0x5d, 0x16, 0x50, 0x99, 0x1c, 0xea, 0xe8, 0xd1, 0x7a, 0xee, 0x2e, 0xde, 0x21, 0xe7, 0x6c, 0x70
db 0x85, 0x4b, 0x3c, 0x7d, 0x7a, 0x80, 0x20, 0xfe, 0x47, 0x52, 0xee, 0x80, 0x60, 0x65, 0x4b, 0xbd
db 0x53, 0x11, 0xc6, 0x28, 0x70, 0xf9, 0x02, 0x6d, 0xd1, 0xae, 0xec, 0x36, 0xc0, 0x93, 0x26, 0xf1
db 0xed, 0x32, 0x9f, 0x88, 0x9d, 0x06, 0x9a, 0xa7, 0x60, 0x85, 0x2a, 0x08, 0x54, 0xac, 0x50, 0x10
db 0x5a, 0x8e, 0xe1, 0xc0, 0x0e, 0xc7, 0x95, 0xd4, 0x72, 0x1c, 0x8e, 0x1f, 0x82, 0x77, 0x6b, 0xfe
db 0xa2, 0x49, 0xe1, 0xe4, 0x25, 0x4a, 0x41, 0xfc, 0x8b, 0x8f, 0xee, 0x77, 0x38, 0x1c, 0x53, 0xe6
db 0x80, 0x96, 0x2e, 0x2c, 0x17, 0xd5, 0xec, 0x00, 0xf9, 0x70, 0xb0, 0x0a, 0xe4, 0x08, 0x05, 0x07
db 0x79, 0x67, 0x08, 0x2e, 0x9a, 0x15, 0x4d, 0x97, 0x35, 0x3b, 0x30, 0xa0, 0xf8, 0xf1, 0x23, 0x3f
db 0x0a, 0x12, 0x4d, 0x4f, 0xe8, 0xf7, 0x86, 0x15, 0x6b, 0x6c, 0x78, 0x26, 0x04, 0xcf, 0x2b, 0xfa
db 0xf3, 0xfe, 0x6e, 0x22, 0x9e, 0x50, 0x4c, 0x04, 0x42, 0x6e, 0x75, 0xd6, 0xcc, 0xc0, 0x3c, 0xfd
db 0x73, 0x8c, 0xb5, 0x9a, 0xfb, 0x7f, 0x34, 0xe0, 0x29, 0x73, 0x9b, 0x07, 0x26, 0x68, 0x6b, 0xb0
db 0xdd, 0x0d, 0xef, 0xfe, 0xb0, 0x7e, 0x5a, 0x73, 0xb8, 0x37, 0x63, 0x60, 0xb2, 0x3c, 0xa3, 0x3e
db 0xbb, 0xce, 0x66, 0x9e, 0x30, 0xd4, 0x01, 0xc9, 0xf0, 0x17, 0x3b, 0xed, 0x4c, 0x9d, 0xd1, 0xbf
db 0xe7, 0x7e, 0xb5, 0x59, 0x7d, 0xef, 0xb8, 0x22, 0xb5, 0x81, 0x2c, 0x83, 0x76, 0xc6, 0xee, 0x37
db 0xfb, 0x8e, 0xc1, 0x38, 0x22, 0xc4, 0x75, 0xda, 0x67, 0x95, 0x74, 0x34, 0xb9, 0x24, 0x4b, 0xa2
db 0x06, 0x7a, 0x29, 0xe2, 0xfb, 0x55, 0x3e, 0x98, 0x6d, 0x79, 0x62, 0x95, 0xeb, 0x46, 0xc4, 0x0d
db 0x61, 0xe1, 0x8d, 0x70, 0x4a, 0xd3, 0x1a, 0x7f, 0x60, 0x12, 0xd9, 0x14, 0x80, 0xc2, 0x00, 0xcb
db 0xe6, 0xa1, 0x3c, 0xc0, 0x7d, 0xf7, 0xdc, 0xfa, 0x8e, 0xde, 0x6e, 0x9d, 0xfa, 0xd8, 0xdb, 0xc3
db 0x24, 0x8d, 0x98, 0x3f, 0x59, 0xd1, 0xfa, 0x58, 0xcc, 0x3e, 0x0f, 0x8c, 0x0e, 0x7e, 0xed, 0x29
db 0x28, 0x5a, 0xdb, 0x8e, 0x4c, 0xc6, 0x5f, 0xf0, 0x3f, 0x54, 0xe8, 0x59, 0x2b, 0xf0, 0x5c, 0x78
db 0x28, 0xdd, 0x62, 0xc6, 0x22, 0xe7, 0x1a, 0xfe, 0x9f, 0x2e, 0x9a, 0x12, 0xa0, 0x9f, 0xbf, 0x24
db 0xfd, 0x25, 0x89, 0x1c, 0xf7, 0x03, 0xef, 0x1a, 0x26, 0xca, 0x17, 0x07, 0x3b, 0xf2, 0xb6, 0x99
db 0xed, 0xb7, 0xe9, 0xa4, 0x7c, 0x97, 0xcd, 0x7d, 0xf5, 0x76, 0x0a, 0xa2, 0x2f, 0x56, 0xe6, 0x67
db 0x9c, 0xcf, 0x8d, 0x16, 0x56, 0x6e, 0x69, 0xa6, 0x0a, 0xd9, 0x2f, 0x68, 0x42, 0xd3, 0xfe, 0x5c
db 0x3f, 0xb0, 0xf3, 0xcc, 0xc0, 0x6c, 0xbd, 0xe5, 0x0f, 0x35, 0xd9, 0x3a, 0x49, 0x59, 0x36, 0x36
db 0xad, 0x8f, 0xff, 0xa8, 0xa6, 0xb7, 0x23, 0xb6, 0xfa, 0xae, 0x3c, 0x4f, 0x89, 0x37, 0x0f, 0x48
db 0x04, 0xbe, 0x98, 0x63, 0xf7, 0xe9, 0xad, 0xb4, 0x9e, 0xd4, 0xb1, 0xd9, 0xc1, 0xdb, 0x07, 0x12
db 0x49, 0xa2, 0xa4, 0x8c, 0x99, 0xeb, 0xb1, 0xaa, 0x6d, 0x65, 0xf6, 0x54, 0x75, 0x31, 0x49, 0x1d
db 0xf9, 0x4e, 0xbb, 0xf1, 0x41, 0x7a, 0x89, 0x4d, 0xc8, 0x24, 0x9f, 0xda, 0xaa, 0x26, 0xc7, 0x82
db 0x6f, 0xf8, 0x04, 0xf4, 0xda, 0x6f, 0x34, 0xe8, 0x42, 0x9d, 0x47, 0xa3, 0xa6, 0x15, 0x2b, 0x15
db 0x8b, 0xbc, 0x2d, 0xd2, 0xa7, 0x2e, 0x52, 0x05, 0x6a, 0x56, 0x01, 0x1e, 0xf2, 0xb7, 0xa9, 0xfd
db 0xf3, 0x21, 0x69, 0xa5, 0xe4, 0x84, 0x7d, 0x17, 0x12, 0x91, 0x2c, 0xf0, 0xe0, 0x59, 0x46, 0x5e
db 0x81, 0xdb, 0x38, 0xbc, 0x02, 0xc0, 0xa7, 0x32, 0xee, 0xbd, 0x35, 0x73, 0xa9, 0x34, 0xec, 0x07
db 0xce, 0xa2, 0x29, 0x24, 0xc1, 0xc5, 0xc4, 0x84, 0x3a, 0x80, 0xdc, 0x2b, 0x8b, 0xe9, 0xe2, 0xa0
db 0xee, 0xe2, 0xeb, 0xd7, 0x16, 0x53, 0xce, 0x0f, 0x7f, 0x91, 0xd4, 0x1b, 0x06, 0x8f, 0xa3, 0xa9
db 0x2d, 0xf8, 0x0c, 0x40, 0x27, 0xf1, 0x91, 0x45, 0xbb, 0xf1, 0xa0, 0x33, 0x59, 0x7b, 0xc1, 0x90
db 0xe7, 0xc0, 0x4d, 0xd4, 0x9f, 0xc7, 0xe4, 0x31, 0xd0, 0x0f, 0xed, 0xad, 0x3e, 0x50, 0x1f, 0x82
db 0x80, 0xdf, 0x43, 0xdb, 0xe2, 0x9f, 0xdb, 0x19, 0x5f, 0x80, 0xaf, 0xa5, 0x25, 0xc7, 0xb9, 0x5e
db 0x82, 0xd6, 0x6b, 0x49, 0xee, 0x8c, 0xda, 0x3e, 0x4e, 0xef, 0x25, 0x1f, 0x8b, 0xe9, 0xa8, 0x6a
db 0xa1, 0x0c, 0xe2, 0x57, 0xd3, 0x97, 0xaa, 0xa5, 0xc3, 0x3b, 0x04, 0x81, 0xde, 0x3a, 0x8b, 0x65
db 0xf7, 0xa0, 0x0b, 0xe5, 0x8d, 0xe3, 0x7c, 0xc4, 0xd3, 0x4e, 0x2c, 0xec, 0xa2, 0x3f, 0x3e, 0x62
db 0x3f, 0x21, 0x88, 0x1d, 0x1d, 0x59, 0xb5, 0xca, 0xd9, 0xf8, 0x61, 0x49, 0x57, 0x15, 0xd0, 0x03
db 0xfa, 0xc5, 0x32, 0xd9, 0xd3, 0x13, 0x93, 0x6b, 0x93, 0x8e, 0x65, 0xba, 0x70, 0xac, 0x11, 0x0a
db 0x1d, 0xb6, 0xe8, 0x8a, 0x5a, 0x13, 0x15, 0xe0, 0x58, 0x9a, 0x91, 0x2a, 0x28, 0xe7, 0xc2, 0xac
db 0xc5, 0x18, 0xd1, 0xc9, 0xeb, 0xd9, 0xa9, 0x5b, 0xab, 0x4e, 0xb4, 0x17, 0xf3, 0x49, 0x22, 0xc9
db 0x2c, 0xca, 0x53, 0xdf, 0x0a, 0x84, 0x14, 0x07, 0x48, 0x45, 0xda, 0x90, 0xc2, 0x76, 0xba, 0xfd
db 0x55, 0x75, 0x53, 0xda, 0x90, 0x6b, 0x16, 0x90, 0x0e, 0xe1, 0xef, 0x27, 0x6a, 0x38, 0xe5, 0x5e
db 0xfe, 0x78, 0xd0, 0x71, 0xda, 0xe9, 0xf9, 0x5e, 0x7d, 0xac, 0x27, 0x8d, 0xae, 0xe8, 0x59, 0x7c
db 0x3c, 0xd2, 0xa0, 0xa4, 0x2b, 0x86, 0x8c, 0xd7, 0x76, 0x11, 0xb7, 0x09, 0xdd, 0xdf, 0x03, 0x01
db 0x08, 0x5b, 0xb1, 0x42, 0x99, 0x06, 0xf8, 0xe8, 0xe5, 0x7f, 0xb2, 0x26, 0xe1, 0xb6, 0x3f, 0x7c
db 0xc1, 0x38, 0xd8, 0x03, 0x33, 0x3f, 0xe6, 0x88, 0xcc, 0x73, 0x02, 0xbf, 0xef, 0x67, 0xe1, 0x14
db 0x96, 0x28, 0xba, 0x62, 0xed, 0x4e, 0x95, 0x6a, 0x95, 0xa8, 0xf9, 0x92, 0xd6, 0xb6, 0xc1, 0x5f
db 0x8f, 0x55, 0x69, 0x0b, 0xc8, 0x5b, 0x9d, 0x9a, 0xab, 0x51, 0x0b, 0x45, 0x12, 0x74, 0xa8, 0x7f
db 0x5f, 0x12, 0x2b, 0x2b, 0xd2, 0x9f, 0x95, 0xf5, 0x19, 0xd1, 0xc4, 0x37, 0xb7, 0xfc, 0x91, 0x06
db 0xff, 0xea, 0xb1, 0x45, 0xcb, 0x37, 0xe2, 0xce, 0x5e, 0x5f, 0x5f, 0x2f, 0x8c, 0xc1, 0x99, 0x39
db 0x23, 0x8a, 0xd6, 0x89, 0xd8, 0xdc, 0xa0, 0xa7, 0x57, 0xa6, 0xf6, 0xbd, 0xd7, 0xda, 0xf2, 0x8b
db 0x58, 0x06, 0xc2, 0x99, 0x99, 0x30, 0x84, 0xd2, 0xf5, 0xf0, 0x23, 0x03, 0xde, 0xb0, 0x88, 0xdb
db 0xca, 0x43, 0xae, 0xc0, 0x3b, 0x47, 0xa5, 0x4b, 0x00, 0xc5, 0xbd, 0xcc, 0x20, 0xb0, 0x94, 0x1f
db 0xe1, 0x01, 0xec, 0x35, 0x45, 0xa0, 0x61, 0x56, 0x47, 0x50, 0x72, 0xc8, 0x14, 0xfa, 0xe6, 0x2c
db 0xfe, 0xfa, 0xed, 0xef, 0x33, 0xd2, 0x4a, 0x3d, 0x45, 0xa8, 0xaf, 0x5f, 0x8b, 0xd8, 0xb5, 0x66
db 0x12, 0x09, 0x51, 0x0a, 0x47, 0x9b, 0xaa, 0xe1, 0x1e, 0x24, 0x4c, 0xd5, 0x92, 0x5f, 0x0c, 0x5e
db 0x90, 0x7f, 0x06, 0x77, 0x03, 0x16, 0xb8, 0x23, 0xfa, 0x24, 0x79, 0x1c, 0x7e, 0x7c, 0x77, 0xb4
db 0x81, 0x8f, 0x5b, 0xe2, 0x7e, 0xcc, 0xcb, 0xdf, 0x51, 0x5c, 0x3b, 0xb5, 0x21, 0x28, 0x82, 0xc7
db 0x0e, 0xa6, 0xdf, 0xe9, 0x91, 0x19, 0x6b, 0x70, 0x76, 0x19, 0xd8, 0x59, 0xdd, 0x37, 0x9b, 0xf2
db 0xfd, 0x03, 0x4f, 0x62, 0xc9, 0x4f, 0xb0, 0x9c, 0xd0, 0xf4, 0x63, 0x8b, 0x82, 0x1c, 0x70, 0xe4
db 0x69, 0x5c, 0x7d, 0x32, 0xb6, 0xba, 0xfe, 0x5e, 0xd5, 0xe3, 0x25, 0x45, 0x6c, 0xbf, 0x7c, 0x12
db 0xa3, 0xb8, 0x45, 0x07, 0xc7, 0xe1, 0xdc, 0xa6, 0xab, 0x45, 0xd0, 0x09, 0x36, 0xf9, 0x38, 0x85
db 0x98, 0x1f, 0x11, 0x98, 0x93, 0x80, 0x0b, 0x77, 0x7f, 0x60, 0x6a, 0x86, 0x0f, 0x78, 0x7b, 0x6f
db 0xbc, 0x10, 0x24, 0x87, 0xaf, 0x1b, 0x94, 0x7c, 0xea, 0x58, 0xe3, 0x1e, 0x90, 0x4c, 0x56, 0x49
db 0xc9, 0x2c, 0x54, 0xf1, 0xe1, 0xe4, 0xba, 0xfc, 0x3d, 0xce, 0x58, 0x31, 0x31, 0x98, 0x08, 0x48
db 0x73, 0x28, 0x09, 0x6f, 0x77, 0xe6, 0x90, 0x09, 0x91, 0x3a, 0x29, 0x99, 0xec, 0x61, 0x0d, 0x3b
db 0xdc, 0xe3, 0xeb, 0x32, 0x72, 0xac, 0xbe, 0x03, 0xf9, 0x65, 0xa5, 0xd1, 0xb5, 0xb5, 0xf6, 0x3c
db 0x16, 0x2f, 0x83, 0x7e, 0x5a, 0x72, 0x67, 0xbc, 0xf6, 0xb8, 0xa4, 0xa3, 0x9e, 0x38, 0x75, 0x7b
db 0x42, 0xc7, 0xa5, 0xa6, 0x9a, 0x83, 0x25, 0xed, 0x0d, 0xda, 0x4e, 0x78, 0x6c, 0x55, 0x57, 0x6e
db 0x5e, 0xaf, 0x3e, 0x9b, 0x52, 0xda, 0x4f, 0xbf, 0x75, 0xee, 0x50, 0x5f, 0xcc, 0x8f, 0x11, 0xe7
db 0xac, 0x63, 0xa5, 0x4a, 0xaa, 0x80, 0x95, 0x78, 0xac, 0x71, 0x98, 0x86, 0xf4, 0x3f, 0x20, 0x3a
db 0xa7, 0x18, 0x88, 0x6f, 0xe3, 0x79, 0x79, 0xb8, 0x98, 0x07, 0x55, 0xf0, 0xbd, 0x21, 0xba, 0x52
db 0x94, 0xec, 0x54, 0x01, 0x54, 0x2b, 0x50, 0xaf, 0x28, 0x3b, 0xfe, 0xbc, 0x60, 0xd2, 0xf6, 0xc3
db 0xfd, 0xf8, 0x9c, 0x02, 0xfc, 0x62, 0xee, 0x5f, 0x83, 0xfe, 0x7e, 0xc6, 0x8d, 0xd5, 0xe9, 0xff
db 0xdb, 0xf5, 0x2c, 0xcd, 0x1b, 0x52, 0xcc, 0x6a, 0x8a, 0x01, 0x61, 0x09, 0x76, 0x78, 0x03, 0x5c
db 0x81, 0x1d, 0xc9, 0xff, 0x46, 0x92, 0xd7, 0x7f, 0xb6, 0xaf, 0x90, 0x38, 0xf7, 0x2d, 0x7f, 0x5a
db 0xd7, 0xce, 0x87, 0x1b, 0x9d, 0xa4, 0x67, 0x7a, 0xce, 0x7b, 0xe8, 0x8f, 0xff, 0x2c, 0x37, 0xbc
db 0xb6, 0x41, 0x66, 0x32, 0xf1, 0x0d, 0x1e, 0x12, 0x3c, 0x7b, 0xff, 0x18, 0xc1, 0x1b, 0x3b, 0x51
db 0xd0, 0xde, 0x9d, 0x84, 0xf3, 0x01, 0x63, 0x38, 0x4b, 0x85, 0x39, 0x6b, 0x93, 0x25, 0xea, 0x05
db 0x85, 0xe9, 0xbd, 0x9a, 0x1d, 0x94, 0xcf, 0x9d, 0x1d, 0xed, 0x6e, 0xd4, 0x69, 0x83, 0x57, 0x14
db 0x92, 0x3f, 0x40, 0xae, 0xc6, 0xf8, 0xb9, 0x46, 0x9b, 0xf1, 0xdd, 0x51, 0x3e, 0x0d, 0x6f, 0xa0
db 0x1f, 0xf7, 0xbb, 0x16, 0x19, 0x5f, 0x0c, 0x43, 0x30, 0xfb, 0x46, 0xbc, 0x01, 0xfb, 0x9c, 0x81
db 0x3a, 0xd1, 0x33, 0x5a, 0xfe, 0xf5, 0x0d, 0x30, 0x86, 0x71, 0x0a, 0xd4, 0xc1, 0xe6, 0xb5, 0x88
db 0x26, 0x50, 0xe6, 0xc8, 0xc5, 0xc6, 0xab, 0x69, 0x5d, 0xfb, 0x55, 0x11, 0xbf, 0x51, 0xfd, 0xda
db 0xae, 0x63, 0xb8, 0x46, 0xf6, 0x54, 0x7f, 0xd1, 0x48, 0x79, 0xa9, 0x93, 0x22, 0x7e, 0xa3, 0x72
db 0x73, 0xf9, 0xeb, 0xac, 0xc5, 0x3b, 0xf1, 0xb0, 0x2a, 0x86, 0x21, 0xbe, 0x73, 0x4a, 0x1e, 0x95
db 0xc2, 0xcf, 0x85, 0x40, 0x23, 0x7b, 0x33, 0x4f, 0xa1, 0x0f, 0xdf, 0x64, 0x21, 0x9b, 0xf9, 0x2b
db 0xc0, 0x1a, 0xd6, 0xf8, 0x86, 0x2c, 0xa7, 0x0a, 0x5b, 0xdd, 0xd7, 0x9a, 0x9d, 0x2e, 0x34, 0x37
db 0xe1, 0x40, 0x3a, 0x43, 0xab, 0x13, 0xb3, 0x7d, 0xf0, 0x9d, 0x66, 0x97, 0x18, 0x73, 0x0f, 0xcb
db 0xbe, 0xa6, 0x5d, 0xab, 0x53, 0xa8, 0xf7, 0xd6, 0x90, 0x5b, 0x72, 0xa2, 0x6b, 0x35, 0xc7, 0xba
db 0xc7, 0xe5, 0x19, 0x01, 0xc3, 0x63, 0x09, 0x95, 0x36, 0x75, 0xf6, 0x92, 0x77, 0xf0, 0xe2, 0xd9
db 0x40, 0x6f, 0xd6, 0xae, 0x34, 0x82, 0x1a, 0x2f, 0xf3, 0x3e, 0x15, 0x12, 0x6b, 0x08, 0xa4, 0xc8
db 0xe1, 0xff, 0xa4, 0xf6, 0x52, 0x18, 0x8e, 0xea, 0xf0, 0xa1, 0xb0, 0x93, 0x42, 0x5e, 0xa3, 0xe5
db 0xd0, 0x09, 0x1b, 0x08, 0xf7, 0xa8, 0x09, 0x42, 0x48, 0xaa, 0x0a, 0x93, 0xf7, 0x6d, 0x14, 0x9a
db 0x8e, 0xf1, 0xfe, 0x13, 0x82, 0xf8, 0x4a, 0x86, 0xd1, 0x6f, 0x23, 0x5d, 0xf2, 0x2b, 0x78, 0xb2
db 0xee, 0x8a, 0xff, 0xc1, 0x07, 0x21, 0x87, 0xa2, 0xf4, 0x48, 0x33, 0x97, 0x3a, 0x37, 0x68, 0x46
db 0xb8, 0x9e, 0x24, 0x0e, 0x8f, 0x41, 0x0b, 0x30, 0xed, 0x95, 0x16, 0x89, 0x3a, 0x9e, 0x00, 0xbf
db 0x83, 0x82, 0xfb, 0xfd, 0x60, 0x9d, 0x8e, 0x70, 0x5f, 0xf2, 0x6f, 0x7c, 0x7a, 0x11, 0x81, 0x76
db 0xf6, 0xa9, 0x3a, 0x08, 0x59, 0xb4, 0xd0, 0x3a, 0x9c, 0xb8, 0x11, 0xe2, 0x1d, 0x16, 0x60, 0x5c
db 0xcb, 0x46, 0x67, 0xe9, 0x1e, 0xb6, 0x74, 0x6d, 0x9b, 0x5f, 0x47, 0xaa, 0xdf, 0x9d, 0x74, 0x90
db 0xb9, 0x53, 0xb5, 0x85, 0x46, 0xbb, 0x67, 0xc2, 0xbc, 0x08, 0xcb, 0x5e, 0x4d, 0x57, 0xd1, 0x63
db 0xbc, 0x2b, 0xdb, 0xe3, 0xf4, 0x6b, 0xee, 0xd1, 0x65, 0x52, 0x33, 0x61, 0x30, 0x9d, 0x61, 0x43
db 0x5c, 0xf8, 0xa5, 0x16, 0xd4, 0x3a, 0x3c, 0x52, 0xd9, 0x94, 0xe9, 0x41, 0x63, 0xf6, 0x48, 0xfb
db 0xe1, 0x6d, 0x20, 0x41, 0xbc, 0xa5, 0x67, 0xe1, 0x04, 0x02, 0xf3, 0xf8, 0x2d, 0x43, 0x50, 0xef
db 0xf6, 0x01, 0x11, 0x38, 0xe5, 0xa7, 0xc6, 0x73, 0xcb, 0xe3, 0x5d, 0x23, 0x22, 0x27, 0x6d, 0x89
db 0x8e, 0x38, 0x90, 0xbc, 0xfe, 0xb6, 0xfe, 0x88, 0x96, 0x12, 0xa6, 0xc6, 0x75, 0xb0, 0x3f, 0x1b

align 32
; 8192-bytes of random data
.random_data2:
db 0x18, 0xe0, 0x94, 0xf6, 0x0b, 0x6f, 0xbf, 0xca, 0x7c, 0x31, 0x4d, 0x34, 0x28, 0x68, 0xe6, 0xf3
db 0x3f, 0xb1, 0x8b, 0x43, 0x35, 0x4b, 0x58, 0x87, 0x48, 0x41, 0x94, 0x8c, 0xf2, 0xb3, 0xc7, 0xed
db 0x4b, 0x55, 0xc7, 0x9e, 0x03, 0x36, 0x2f, 0x16, 0xeb, 0xb7, 0xb8, 0x15, 0xe6, 0xb5, 0x06, 0x6e
db 0x00, 0x5b, 0x54, 0xcd, 0xe1, 0x34, 0xe5, 0xee, 0xc6, 0xf5, 0x6f, 0x71, 0x16, 0x81, 0x1e, 0x85
db 0xf8, 0x68, 0xbe, 0x81, 0xc3, 0xca, 0xa7, 0x48, 0x03, 0xc4, 0xfa, 0x63, 0xbe, 0x2e, 0xd2, 0x23
db 0xf2, 0xaf, 0xb4, 0x0a, 0x57, 0x16, 0x49, 0x65, 0x7b, 0x84, 0x05, 0x44, 0xe6, 0x6b, 0x78, 0xfe
db 0xff, 0xae, 0x2e, 0x20, 0xf1, 0xd6, 0xed, 0x08, 0xa0, 0x0d, 0xd2, 0x4b, 0xd4, 0x5f, 0x98, 0xfc
db 0xfc, 0x91, 0x06, 0x30, 0xec, 0x8c, 0x92, 0x2a, 0x71, 0x12, 0x8d, 0x61, 0xfd, 0xe4, 0x50, 0x64
db 0xc1, 0xce, 0xc3, 0x07, 0xd9, 0xee, 0x0e, 0x0e, 0xac, 0x27, 0xa8, 0xc4, 0x21, 0xcf, 0x34, 0xff
db 0x28, 0x94, 0xb1, 0xa1, 0xbe, 0xa1, 0x14, 0x14, 0x58, 0x64, 0x2c, 0x21, 0x39, 0xb2, 0xa0, 0x77
db 0x86, 0xfd, 0x91, 0xed, 0x5e, 0x8f, 0x65, 0x7f, 0xc9, 0x74, 0x8f, 0x98, 0x17, 0x82, 0x8f, 0x04
db 0x0a, 0x40, 0x58, 0x3d, 0x29, 0xff, 0x93, 0x8d, 0xb1, 0x56, 0x56, 0x1f, 0x0a, 0xed, 0x7f, 0xfb
db 0x96, 0xd8, 0x80, 0x2b, 0xcb, 0x5f, 0x5b, 0xdf, 0x4a, 0x84, 0xcb, 0x3a, 0xdd, 0xe7, 0x0a, 0xfc
db 0x4b, 0x60, 0x38, 0x31, 0x36, 0x5f, 0x44, 0x02, 0x5f, 0xdd, 0x64, 0xaa, 0xcd, 0xc7, 0x4f, 0xe9
db 0x65, 0xf8, 0xe4, 0x7c, 0x71, 0xe1, 0x51, 0xbe, 0x56, 0xe5, 0x19, 0x97, 0x91, 0xbe, 0x85, 0x5d
db 0x29, 0xf7, 0xac, 0x1b, 0xad, 0x45, 0xb5, 0x27, 0xa3, 0x97, 0x74, 0x3c, 0x50, 0x37, 0xc7, 0x25
db 0x8c, 0x5c, 0xea, 0x74, 0x18, 0x2d, 0xcf, 0x28, 0xb7, 0x56, 0x8a, 0xf4, 0xd1, 0x9c, 0xf8, 0x49
db 0xcf, 0xc2, 0xa5, 0x3d, 0x0a, 0x64, 0xff, 0x0c, 0xa0, 0x93, 0x58, 0x4c, 0x17, 0x7f, 0xeb, 0xeb
db 0x4b, 0xd8, 0xad, 0xfb, 0xb5, 0x8f, 0xa1, 0x85, 0xeb, 0xda, 0x94, 0x98, 0x40, 0xde, 0x3b, 0x3f
db 0x03, 0xd2, 0xc0, 0xe0, 0x2d, 0x2d, 0x1a, 0x4e, 0x08, 0x3a, 0xe7, 0x5a, 0x3c, 0x5b, 0xe4, 0xe8
db 0x52, 0x34, 0xde, 0x62, 0x38, 0x3c, 0xbe, 0x11, 0x31, 0x3e, 0xa9, 0xb4, 0x0e, 0x77, 0xa5, 0x54
db 0x91, 0xec, 0x2e, 0x60, 0x9a, 0xb7, 0xf9, 0x35, 0xc2, 0x17, 0x76, 0x2a, 0xd6, 0x26, 0x03, 0x0a
db 0x3f, 0x36, 0x7e, 0x64, 0x4c, 0xdc, 0xb2, 0x47, 0x59, 0xa2, 0x6b, 0x18, 0x3d, 0xbb, 0x73, 0xb2
db 0xc0, 0x09, 0x12, 0xd7, 0xbe, 0x5b, 0x46, 0x54, 0x48, 0x81, 0xa6, 0x62, 0x2a, 0xa7, 0x8e, 0x4f
db 0xbf, 0x3b, 0x07, 0x37, 0x8f, 0xbc, 0x37, 0xe2, 0xc8, 0x6a, 0xcc, 0x6f, 0x04, 0x8a, 0x50, 0x56
db 0x42, 0x35, 0xf2, 0x78, 0x33, 0xd8, 0xa3, 0xc6, 0x8b, 0x7f, 0x8a, 0xde, 0x3f, 0xed, 0x69, 0x2a
db 0x6e, 0xbf, 0x48, 0x2c, 0x92, 0x44, 0x74, 0xf6, 0x89, 0xb5, 0x54, 0x16, 0x97, 0x01, 0x03, 0x79
db 0x5c, 0x4c, 0x4f, 0x82, 0xd7, 0x80, 0x8a, 0x9d, 0xc8, 0xc3, 0xcf, 0x9e, 0x71, 0x46, 0x24, 0x30
db 0xe1, 0x2f, 0x7f, 0x2d, 0x13, 0xa1, 0xc8, 0x5b, 0xcb, 0x28, 0x2e, 0xd5, 0xe9, 0xc1, 0xc2, 0xf9
db 0x9b, 0xfc, 0x91, 0xc8, 0x18, 0x5d, 0x7a, 0x7f, 0x2c, 0xf2, 0x33, 0xff, 0xc4, 0x76, 0xa8, 0x17
db 0xfe, 0x5b, 0xba, 0x30, 0x42, 0x25, 0x7b, 0x98, 0xa8, 0xcb, 0x00, 0x46, 0xb5, 0xa0, 0x34, 0xd3
db 0xdd, 0x40, 0xb0, 0xd2, 0xf6, 0x67, 0x89, 0x33, 0x5d, 0x0f, 0x34, 0xc8, 0xd2, 0x10, 0x75, 0xba
db 0xd4, 0x33, 0xc4, 0xd9, 0xbb, 0x71, 0x60, 0xde, 0x31, 0xe0, 0xe0, 0xe4, 0x3e, 0x82, 0xf7, 0x99
db 0x6b, 0x1b, 0x04, 0xd1, 0x82, 0xa6, 0x17, 0x4b, 0x01, 0xde, 0x3b, 0xf6, 0x8d, 0x80, 0x82, 0x62
db 0x86, 0xd8, 0xc2, 0x58, 0xa2, 0x9c, 0x44, 0xec, 0x4d, 0x59, 0x99, 0x9c, 0x44, 0x2b, 0x6d, 0xbf
db 0x9b, 0x52, 0xe8, 0x1f, 0x34, 0x58, 0xfa, 0xdc, 0x75, 0x2d, 0x71, 0xc5, 0x7a, 0x08, 0x3d, 0xf4
db 0xa2, 0x14, 0x08, 0x89, 0x04, 0x34, 0x9d, 0x73, 0xed, 0x48, 0xa7, 0x6c, 0x83, 0x9a, 0xf0, 0x38
db 0xfb, 0xd6, 0x2b, 0xc2, 0x63, 0x12, 0x10, 0xd4, 0x59, 0x55, 0xa6, 0x9f, 0x95, 0x55, 0x43, 0xf6
db 0x70, 0x4b, 0xec, 0x1d, 0x32, 0xad, 0x1f, 0xa8, 0x32, 0xeb, 0x57, 0xbf, 0x4e, 0x7b, 0xae, 0xdf
db 0x96, 0xf0, 0x39, 0x2d, 0x09, 0xed, 0x03, 0x78, 0x99, 0xea, 0x87, 0x1f, 0xcc, 0xa9, 0x27, 0x22
db 0xb8, 0x63, 0x52, 0x23, 0xf0, 0x96, 0x65, 0xc4, 0x6c, 0xf2, 0xb4, 0xbf, 0xd9, 0x40, 0xbb, 0x04
db 0xb2, 0xf1, 0x4a, 0xc8, 0xd5, 0x7c, 0xfe, 0x1d, 0xc7, 0x3a, 0xc5, 0x41, 0x29, 0x7c, 0x23, 0xd7
db 0x8d, 0x69, 0x41, 0xd7, 0x22, 0xda, 0xfe, 0xd8, 0x0e, 0xbf, 0xf8, 0x51, 0xd5, 0x0c, 0xb5, 0x90
db 0xb3, 0x22, 0xb0, 0xc1, 0x75, 0x59, 0xe8, 0xbe, 0x24, 0x05, 0x26, 0x68, 0xfe, 0xf7, 0xcc, 0x35
db 0x91, 0x70, 0x7c, 0x11, 0x60, 0xd4, 0x22, 0x2b, 0x99, 0x11, 0x28, 0x37, 0x26, 0x5b, 0x9b, 0x09
db 0xd7, 0xb4, 0x7c, 0xc4, 0x3a, 0x2a, 0xb4, 0x96, 0x19, 0x12, 0xf3, 0x7d, 0x3c, 0xa3, 0x9e, 0x1e
db 0xd6, 0x24, 0x9a, 0x39, 0x41, 0xab, 0xe2, 0xdf, 0xa0, 0xcf, 0xd6, 0xd1, 0x06, 0x3d, 0x56, 0x61
db 0xa3, 0x3d, 0xec, 0xbc, 0x97, 0x60, 0xdd, 0x1e, 0x71, 0x65, 0x9f, 0x49, 0xd2, 0xc1, 0xd2, 0xa4
db 0x2b, 0x5c, 0x44, 0x7e, 0x70, 0x4b, 0x23, 0xb0, 0x6e, 0x1c, 0x20, 0x84, 0xd8, 0x2b, 0x83, 0xc8
db 0x6d, 0x63, 0xcc, 0x81, 0x82, 0x55, 0x7d, 0x18, 0xe8, 0x3f, 0x46, 0xc2, 0x47, 0xdc, 0xca, 0x6d
db 0x4b, 0xd8, 0xec, 0x4d, 0x38, 0xd6, 0x82, 0xa5, 0x8c, 0x10, 0xfe, 0x0d, 0x56, 0x7a, 0x95, 0xf6
db 0xdd, 0xd5, 0x6e, 0x74, 0x1c, 0x02, 0x90, 0x8b, 0x84, 0x62, 0x9b, 0x7d, 0x27, 0x6d, 0x4b, 0xfe
db 0xb8, 0x5d, 0xa7, 0xf0, 0x49, 0xcd, 0x93, 0xd7, 0xb0, 0x72, 0x84, 0x6b, 0xa4, 0xd9, 0xf8, 0x4d
db 0x5a, 0x52, 0x69, 0xf0, 0xdc, 0x39, 0x15, 0x5c, 0x6f, 0xd1, 0x35, 0xa3, 0xc8, 0x8b, 0x9c, 0x8f
db 0xff, 0xff, 0xe3, 0x12, 0xc7, 0x84, 0xc5, 0xec, 0x80, 0x83, 0x8f, 0x79, 0x41, 0xbb, 0x87, 0xca
db 0x6c, 0x6b, 0x61, 0x10, 0x0e, 0x9e, 0x01, 0xaf, 0xf8, 0x74, 0xdd, 0x86, 0x8e, 0xdb, 0x27, 0xb9
db 0xd9, 0xc3, 0x1f, 0xfd, 0x16, 0x18, 0x7b, 0xe7, 0x2d, 0xbc, 0x0a, 0x7a, 0xa1, 0x67, 0x57, 0xc9
db 0xc5, 0x8d, 0x2d, 0x11, 0x36, 0x63, 0x02, 0xa3, 0x3f, 0x9b, 0x8f, 0x41, 0x7f, 0xaf, 0x89, 0xaf
db 0xe4, 0x2a, 0x91, 0x66, 0xed, 0xb0, 0xf3, 0x12, 0xaf, 0xd5, 0x02, 0xdc, 0x07, 0xd6, 0x6d, 0xff
db 0x70, 0x9a, 0xc9, 0xe0, 0x31, 0xeb, 0xe1, 0x60, 0xd9, 0xda, 0x81, 0x9e, 0x70, 0xf6, 0x51, 0x0b
db 0x8e, 0xc3, 0xd3, 0x65, 0xdf, 0xe5, 0x2d, 0x2f, 0xf2, 0x3d, 0x92, 0xdd, 0x46, 0x0a, 0x83, 0xe2
db 0x69, 0x90, 0x28, 0xa4, 0x39, 0xe3, 0x67, 0x20, 0x95, 0x48, 0xe1, 0xa6, 0xad, 0x12, 0x96, 0x9e
db 0xb3, 0x11, 0xe7, 0xe6, 0x4c, 0x49, 0x0a, 0x42, 0x47, 0xb4, 0xdd, 0xb5, 0xe8, 0x06, 0x9f, 0x2b
db 0x30, 0x9c, 0x19, 0xc5, 0xaa, 0x81, 0xcf, 0x2f, 0x90, 0x09, 0x94, 0x21, 0x69, 0x80, 0x6f, 0x3b
db 0xcd, 0x9c, 0xb6, 0x1f, 0x7f, 0x3c, 0x2b, 0xf8, 0x1c, 0xed, 0x77, 0xda, 0x0f, 0xb1, 0x96, 0x5a
db 0x1d, 0x88, 0x55, 0x81, 0x33, 0x8c, 0xa9, 0x61, 0xc7, 0xe0, 0xfe, 0xbb, 0x1e, 0x26, 0xd6, 0x26
db 0x13, 0x42, 0x3c, 0x79, 0xf7, 0x66, 0xca, 0x9e, 0xe8, 0x56, 0xf4, 0x76, 0xbb, 0x7b, 0x1a, 0x0a
db 0x83, 0x1d, 0x7f, 0x6b, 0xdd, 0x2d, 0xc2, 0xb0, 0x32, 0x57, 0x17, 0x23, 0x83, 0x95, 0xdb, 0x2f
db 0x4f, 0x06, 0xc8, 0x50, 0x38, 0xf3, 0xa6, 0x36, 0x64, 0x78, 0xf2, 0xf2, 0xde, 0xcb, 0x26, 0x19
db 0xee, 0x5f, 0x0e, 0xdd, 0x5d, 0xd0, 0xa4, 0xef, 0xe9, 0xa9, 0xe3, 0x7c, 0xe6, 0x12, 0x89, 0x91
db 0xc6, 0xc0, 0x68, 0x61, 0x1d, 0x81, 0x6e, 0xd1, 0x7a, 0x7f, 0x4d, 0xc1, 0x03, 0x7c, 0x54, 0x4e
db 0xb6, 0xe9, 0x51, 0x41, 0x51, 0x1b, 0x65, 0x80, 0x81, 0xbc, 0x51, 0x8c, 0x60, 0xe5, 0x18, 0xdb
db 0x13, 0x6b, 0xcb, 0x13, 0x49, 0xf1, 0x24, 0xbe, 0x37, 0x18, 0xda, 0x36, 0xde, 0x77, 0xd8, 0x60
db 0xdd, 0x3d, 0x21, 0x83, 0xc0, 0xfe, 0x6e, 0x29, 0x37, 0xe0, 0x8a, 0x5b, 0xbc, 0x3e, 0xc8, 0x11
db 0x9b, 0xef, 0x2e, 0x52, 0xa5, 0xd0, 0x99, 0xe2, 0xe5, 0x5c, 0xfc, 0x21, 0x53, 0x03, 0x4c, 0x4a
db 0xdf, 0x42, 0x45, 0x6f, 0x07, 0x74, 0x46, 0x80, 0x41, 0x9e, 0x44, 0x3b, 0x0b, 0x46, 0x7b, 0xf3
db 0x20, 0xa8, 0x87, 0x71, 0xc0, 0xad, 0x74, 0x7a, 0xbc, 0x19, 0x22, 0xdd, 0xcc, 0xc2, 0xf8, 0x1c
db 0xee, 0x92, 0x00, 0xb7, 0xfb, 0x41, 0x8b, 0x74, 0x56, 0x35, 0x53, 0x5a, 0x6f, 0xc6, 0xf6, 0x5a
db 0x98, 0xb5, 0xd4, 0xb2, 0x0f, 0xb7, 0xc1, 0x11, 0x47, 0x16, 0x2f, 0xaf, 0x8e, 0xdf, 0xf3, 0x72
db 0x51, 0x51, 0xf6, 0x4e, 0x9b, 0x77, 0x10, 0x7d, 0xff, 0xe0, 0x99, 0x53, 0x9c, 0x9e, 0xbe, 0xa4
db 0xb6, 0xa3, 0x8f, 0x67, 0xba, 0x3c, 0xb2, 0x91, 0x2c, 0xeb, 0x1f, 0x4f, 0x49, 0xc9, 0xf0, 0x69
db 0x21, 0xe2, 0x48, 0xe8, 0xe1, 0xcb, 0x85, 0xa7, 0x6a, 0x1d, 0x64, 0x1f, 0x7a, 0xfb, 0xde, 0x1f
db 0x6d, 0xcd, 0x8c, 0xe8, 0xbf, 0x16, 0xf4, 0x18, 0xb7, 0x8f, 0x89, 0x94, 0xe8, 0xe1, 0xb0, 0x76
db 0xd3, 0xe8, 0x74, 0xd7, 0xc4, 0x7d, 0x61, 0x2e, 0xbb, 0x5f, 0xd5, 0x05, 0xce, 0xbf, 0x09, 0xfe
db 0x4d, 0x9e, 0x9a, 0x9b, 0x10, 0xa1, 0x27, 0x6a, 0x86, 0xda, 0x7a, 0xfb, 0xf3, 0xd6, 0x3c, 0x61
db 0x6a, 0xaf, 0x5d, 0xa9, 0x93, 0x19, 0x9b, 0xa6, 0x5e, 0x37, 0xb1, 0xc2, 0xb0, 0x91, 0x9b, 0x84
db 0xea, 0x33, 0x52, 0x2d, 0x18, 0x44, 0x5f, 0xeb, 0x84, 0xa1, 0xbc, 0x00, 0xe4, 0x36, 0x7e, 0x58
db 0x2e, 0x02, 0xc4, 0x00, 0x09, 0x85, 0xe8, 0x7e, 0xc3, 0xe0, 0xfe, 0x98, 0x73, 0x74, 0x4c, 0x6b
db 0xc9, 0xe3, 0x5a, 0x41, 0xa8, 0xbe, 0xea, 0xfc, 0x33, 0x49, 0x13, 0xf9, 0x32, 0x9c, 0x71, 0x73
db 0x05, 0x40, 0x90, 0x9b, 0xae, 0xe3, 0x38, 0x10, 0x86, 0x52, 0x80, 0x25, 0x88, 0xd8, 0x0a, 0x49
db 0x0b, 0xa2, 0xfa, 0x38, 0x7c, 0x74, 0x47, 0x20, 0xd0, 0x5c, 0x9f, 0x3d, 0x54, 0x3f, 0x52, 0x6d
db 0x7b, 0x4a, 0xcd, 0x7a, 0x2e, 0x4d, 0x2f, 0x9f, 0xf7, 0x51, 0x39, 0x7a, 0xc5, 0x0a, 0xe7, 0x37
db 0xa9, 0xb3, 0xc4, 0xd8, 0xf0, 0x04, 0xcc, 0xa0, 0xf8, 0x79, 0x55, 0x73, 0x2c, 0x23, 0x8a, 0x2a
db 0x39, 0xc8, 0xe5, 0xad, 0x24, 0x19, 0x35, 0xce, 0x46, 0xaf, 0xe1, 0xbb, 0x8e, 0x95, 0x33, 0xf4
db 0xe2, 0xc6, 0xe0, 0xcf, 0x4a, 0x7b, 0x7c, 0x70, 0xcb, 0x8c, 0x7e, 0xfc, 0x12, 0x9a, 0x78, 0xb5
db 0x62, 0x42, 0x52, 0xdd, 0x51, 0xd5, 0x9c, 0xb9, 0x60, 0x75, 0xf3, 0xf0, 0xad, 0xde, 0x55, 0xcb
db 0x96, 0x54, 0x10, 0x41, 0x9f, 0xf1, 0x44, 0x92, 0xc6, 0xa2, 0xf9, 0xb8, 0xe4, 0x11, 0x38, 0x56
db 0xf3, 0x0f, 0x73, 0x5f, 0xfd, 0xc1, 0x44, 0x8b, 0x72, 0xb9, 0xdc, 0x75, 0xef, 0xe8, 0x85, 0x46
db 0xf4, 0x14, 0xed, 0x79, 0x17, 0xa8, 0xea, 0xe5, 0xf6, 0x00, 0x28, 0x23, 0x89, 0xee, 0x6a, 0x06
db 0xdf, 0x09, 0x7a, 0x54, 0x5e, 0xe1, 0x6b, 0x6b, 0x7f, 0xdf, 0x69, 0x76, 0xd2, 0x7a, 0xd3, 0x77
db 0xe5, 0x98, 0xc2, 0x6a, 0x19, 0x7c, 0x73, 0xa8, 0xfe, 0x3d, 0xb0, 0x52, 0xc0, 0xfc, 0x3f, 0x83
db 0xe0, 0x60, 0x7a, 0xa6, 0x2f, 0xdd, 0x53, 0x4d, 0x35, 0x8d, 0xc2, 0xe3, 0x8e, 0xba, 0xf6, 0x32
db 0x28, 0xa6, 0x13, 0xe7, 0x0d, 0x1d, 0xd4, 0x20, 0xd7, 0x5e, 0xee, 0xcf, 0xbd, 0xb4, 0x0a, 0xf9
db 0xaf, 0x27, 0x05, 0x83, 0xcd, 0x53, 0x9d, 0x1f, 0x05, 0xcf, 0xa1, 0x56, 0xec, 0xe9, 0xd4, 0xb5
db 0x8b, 0x05, 0x4a, 0x81, 0xc8, 0x3d, 0xa4, 0xad, 0xda, 0x64, 0x90, 0xb2, 0xd9, 0xaf, 0xaf, 0x1b
db 0x54, 0xcf, 0xfc, 0xf3, 0x18, 0xa3, 0xbb, 0xc7, 0xe1, 0xa3, 0x29, 0xbb, 0x71, 0x1c, 0x76, 0x08
db 0x99, 0x1f, 0x78, 0xd9, 0xa9, 0x38, 0x83, 0xf2, 0x85, 0xac, 0x93, 0x4d, 0xf8, 0x93, 0xfd, 0x55
db 0x71, 0x52, 0x16, 0xf7, 0xba, 0x24, 0xaa, 0x7e, 0x3e, 0x95, 0xc3, 0x86, 0xfc, 0x5a, 0xe9, 0x27
db 0x87, 0x44, 0xe4, 0x9c, 0xb4, 0x60, 0xc9, 0x08, 0xd1, 0xc2, 0x3a, 0x3f, 0xc2, 0xca, 0x7e, 0xe7
db 0x12, 0x11, 0xe8, 0x44, 0xc7, 0xbc, 0x45, 0x5c, 0x28, 0xd6, 0x6c, 0x7f, 0x38, 0x0c, 0x43, 0x3d
db 0x67, 0xad, 0xac, 0x76, 0xa8, 0x80, 0x2b, 0x39, 0xa6, 0x12, 0xa0, 0x79, 0x13, 0x19, 0x29, 0x5a
db 0x7a, 0x00, 0xf1, 0x90, 0x9c, 0x85, 0x90, 0xe0, 0x13, 0x50, 0x0a, 0x47, 0x98, 0x8a, 0x72, 0xfc
db 0x5f, 0x21, 0x2c, 0xf0, 0x01, 0x95, 0xee, 0x11, 0xb3, 0x1e, 0xc9, 0x3e, 0x3e, 0x01, 0x27, 0xa2
db 0xfe, 0x59, 0x6e, 0xf1, 0x2c, 0x7c, 0x1a, 0x91, 0x7a, 0xb9, 0x43, 0xbd, 0xf6, 0x54, 0x4e, 0xd7
db 0xb3, 0x9f, 0xff, 0xa0, 0xe6, 0x43, 0x56, 0xb6, 0xa8, 0xab, 0x99, 0xcf, 0x8a, 0xda, 0x4c, 0x21
db 0xbf, 0x27, 0x5f, 0xb4, 0xd7, 0x43, 0x8c, 0x44, 0x56, 0x7b, 0xd5, 0x28, 0xd8, 0x47, 0xab, 0xdb
db 0xb9, 0xc8, 0x90, 0x01, 0x9a, 0xb1, 0x55, 0x44, 0x9c, 0x2d, 0xff, 0x08, 0x59, 0x77, 0x95, 0x5e
db 0x6b, 0x96, 0x6f, 0x60, 0xd9, 0xff, 0xa3, 0xce, 0x1b, 0x08, 0xab, 0x85, 0xeb, 0xc7, 0x85, 0xcf
db 0x2d, 0x9c, 0xea, 0x49, 0xaa, 0x74, 0x3a, 0x6e, 0xda, 0x43, 0x1a, 0xef, 0xc7, 0x41, 0xd6, 0xae
db 0x6f, 0x41, 0x5f, 0xc2, 0x91, 0xc6, 0xf5, 0x45, 0xd4, 0x9c, 0x7f, 0x52, 0x7a, 0x77, 0x8d, 0x26
db 0xa0, 0x06, 0x0b, 0x64, 0xd9, 0x74, 0x1a, 0x52, 0x2a, 0x5b, 0x84, 0x14, 0xc0, 0xbc, 0xe4, 0x6b
db 0xbe, 0xc3, 0x01, 0xea, 0x9f, 0x87, 0xaa, 0x07, 0xc0, 0x40, 0x08, 0xf6, 0x00, 0x41, 0x72, 0xb4
db 0x20, 0x7f, 0x46, 0x10, 0x59, 0xce, 0xf3, 0x68, 0x8a, 0x80, 0x10, 0x39, 0xb8, 0xeb, 0xfb, 0x6b
db 0x85, 0xf7, 0x60, 0x64, 0xc9, 0x28, 0xe2, 0xfd, 0x0c, 0x6d, 0x3f, 0xc3, 0x3e, 0x86, 0xd7, 0x6e
db 0xaa, 0x67, 0x35, 0x37, 0x83, 0xc8, 0x1c, 0xf0, 0x05, 0xbb, 0xa6, 0x6c, 0xca, 0x89, 0x08, 0x75
db 0x6f, 0x9e, 0xd3, 0xff, 0x5e, 0xb7, 0x0f, 0x99, 0x9b, 0xf5, 0x66, 0xd8, 0xb9, 0x50, 0x5c, 0xb6
db 0xaf, 0x07, 0x23, 0xd7, 0x93, 0x7c, 0xf1, 0x01, 0xbb, 0x0c, 0xf2, 0xd4, 0xe9, 0xd0, 0xad, 0xc3
db 0x6b, 0x80, 0xf0, 0xf6, 0x84, 0x77, 0x53, 0xa9, 0x61, 0x7c, 0x74, 0x5f, 0xbe, 0x16, 0xd3, 0xa9
db 0xf5, 0x5e, 0x3d, 0xa7, 0xcf, 0x21, 0x12, 0x18, 0xd2, 0xc9, 0xe3, 0x37, 0x26, 0x9f, 0x6d, 0x9c
db 0x38, 0x6e, 0x4c, 0xfd, 0x65, 0x67, 0xe7, 0x0e, 0x0d, 0xb1, 0x4e, 0x15, 0x03, 0xd8, 0xf6, 0xac
db 0x0b, 0xd4, 0x4a, 0x78, 0xa9, 0x22, 0x37, 0xf5, 0x62, 0xf7, 0x41, 0xc7, 0xce, 0xa1, 0xb5, 0xcf
db 0x19, 0x1d, 0x93, 0x54, 0x62, 0xeb, 0xb2, 0x4b, 0x2e, 0xa8, 0xd7, 0xd0, 0x01, 0x0c, 0xae, 0x4f
db 0x2c, 0x67, 0x94, 0xb2, 0x2d, 0x96, 0x43, 0x03, 0x92, 0xcb, 0x8a, 0x5b, 0xa4, 0x9c, 0xff, 0xcf
db 0xc4, 0x4f, 0x69, 0xde, 0x06, 0x7f, 0xf8, 0x26, 0x3c, 0x72, 0x8b, 0xe7, 0x8b, 0xf3, 0x15, 0x38
db 0x1b, 0x8a, 0x29, 0x9c, 0x28, 0x2d, 0x98, 0x54, 0x6a, 0x69, 0xf4, 0xe6, 0x51, 0x45, 0x05, 0x85
db 0x26, 0x72, 0x0f, 0x82, 0xcb, 0xce, 0x8a, 0xc0, 0xf9, 0x5c, 0x30, 0xcc, 0x15, 0xd0, 0xcd, 0x42
db 0x9e, 0x1d, 0x44, 0x69, 0xc8, 0x11, 0x73, 0x09, 0x14, 0xcc, 0xfc, 0x90, 0xd1, 0x21, 0x20, 0xa4
db 0xab, 0xd1, 0x2e, 0x71, 0x79, 0x8e, 0x62, 0x7e, 0xd4, 0x7a, 0xb5, 0xe6, 0x59, 0x0b, 0xc7, 0x42
db 0xb8, 0xd5, 0x3b, 0x51, 0xb1, 0x2a, 0xe6, 0x75, 0xe6, 0xf8, 0xe4, 0x18, 0xb0, 0x5e, 0x68, 0x3f
db 0x9f, 0xb3, 0x33, 0x0a, 0x69, 0xf4, 0x8a, 0x05, 0xcf, 0x87, 0x4f, 0x9c, 0xbb, 0xcd, 0x1a, 0xef
db 0x96, 0xd9, 0xce, 0x54, 0xf9, 0x9c, 0x96, 0xfe, 0xc9, 0x6f, 0xb6, 0x0c, 0x0b, 0x47, 0x08, 0xa2
db 0x59, 0xf3, 0x07, 0xe9, 0x47, 0x58, 0x1b, 0x42, 0x72, 0x2f, 0x4d, 0xb8, 0xad, 0x98, 0xe1, 0xa4
db 0x1d, 0xb6, 0xf9, 0x19, 0x99, 0x23, 0x52, 0xe2, 0x7e, 0x6a, 0x13, 0x3b, 0x2d, 0x29, 0x8f, 0x27
db 0xe0, 0xaa, 0x98, 0x16, 0xfe, 0x64, 0xd8, 0x95, 0x94, 0xba, 0xad, 0xe0, 0xf1, 0xfa, 0x0c, 0xcd
db 0x5a, 0xd8, 0x0b, 0x08, 0xe3, 0x02, 0xfa, 0x56, 0xb4, 0x59, 0xdb, 0xc6, 0x77, 0x61, 0xdb, 0x11
db 0x50, 0xc9, 0x3d, 0x5e, 0x49, 0x62, 0xad, 0xc0, 0xd0, 0xfa, 0x6a, 0x32, 0x49, 0x6e, 0xad, 0x8d
db 0xcd, 0xad, 0xa6, 0x70, 0xf8, 0x8b, 0x70, 0x23, 0x3f, 0xfb, 0x85, 0xc4, 0x3d, 0x24, 0x82, 0xbc
db 0x52, 0x7a, 0x22, 0xd1, 0xd5, 0xdd, 0x04, 0x5b, 0x44, 0xf5, 0x9c, 0x28, 0x73, 0xea, 0x62, 0x85
db 0x1a, 0xed, 0xcf, 0x7d, 0x29, 0x77, 0x19, 0x23, 0x15, 0x29, 0x36, 0xa6, 0x7e, 0x86, 0x49, 0x77
db 0x6f, 0xcd, 0x0f, 0xdb, 0xc5, 0x56, 0xdc, 0x00, 0x65, 0x60, 0x18, 0xa1, 0x85, 0x05, 0x13, 0xa1
db 0x86, 0xa4, 0x4a, 0xcb, 0x67, 0x95, 0x59, 0x8a, 0x49, 0xf4, 0x51, 0xfc, 0x6f, 0xc8, 0x72, 0x6a
db 0x73, 0x4d, 0x2c, 0x79, 0x37, 0x21, 0x2a, 0xd4, 0x35, 0x32, 0xbc, 0x5b, 0x27, 0x62, 0x3b, 0x3c
db 0xa9, 0x6d, 0xde, 0x06, 0x87, 0x0f, 0x1a, 0xbd, 0xb7, 0x46, 0x22, 0x0d, 0x1f, 0xdd, 0xc9, 0x33
db 0x94, 0xb8, 0xf7, 0xd8, 0xd6, 0xd6, 0x74, 0x9d, 0x64, 0x6d, 0xe9, 0x9d, 0x2f, 0x8c, 0x46, 0xda
db 0xc4, 0x97, 0x8f, 0xf2, 0xc4, 0x9b, 0xf0, 0x94, 0x95, 0x4e, 0x5e, 0xfb, 0xe7, 0x8e, 0x70, 0x18
db 0x1c, 0x53, 0x5a, 0x55, 0x06, 0x23, 0x67, 0x78, 0x4d, 0x06, 0x93, 0xe8, 0x08, 0x2d, 0x13, 0x28
db 0xa1, 0x4c, 0x4c, 0x8c, 0xed, 0x91, 0x05, 0xba, 0x46, 0xb0, 0xa4, 0x3c, 0xc3, 0xae, 0x78, 0xa0
db 0x2a, 0x0c, 0x8f, 0x39, 0xdf, 0x61, 0x00, 0x6a, 0x81, 0xd9, 0x78, 0xcc, 0xcf, 0x16, 0xa9, 0x46
db 0xc4, 0x0c, 0x04, 0x00, 0x2f, 0x56, 0x27, 0xb9, 0x4f, 0x1a, 0x2b, 0x9c, 0x0f, 0x35, 0x84, 0xb1
db 0x5c, 0x8d, 0xbf, 0x68, 0x43, 0x09, 0xf2, 0xca, 0xb8, 0x2e, 0xa3, 0xe8, 0xf4, 0x8b, 0x69, 0x2f
db 0xd2, 0x4b, 0x9e, 0xbd, 0x6d, 0xc7, 0x33, 0x47, 0x4f, 0x62, 0x6b, 0xac, 0xbd, 0x69, 0x9d, 0x35
db 0x20, 0x4f, 0x3e, 0xef, 0x7b, 0x81, 0xae, 0x91, 0xa8, 0xbb, 0x86, 0xf9, 0x02, 0xf9, 0x16, 0x7d
db 0x21, 0x37, 0x11, 0xb5, 0x18, 0x59, 0x60, 0x0f, 0xd6, 0x69, 0xcc, 0xd1, 0xb2, 0xca, 0xc0, 0xdf
db 0x2f, 0x94, 0xce, 0xd6, 0x9c, 0xf5, 0xc1, 0x9e, 0x88, 0x01, 0x35, 0xb7, 0xda, 0xf9, 0x79, 0x27
db 0xe5, 0xa9, 0xca, 0x5b, 0xdf, 0x5d, 0xaf, 0x52, 0x66, 0xb7, 0x79, 0x1e, 0x3b, 0xf7, 0xf2, 0x11
db 0x4d, 0xde, 0x5f, 0x5b, 0x63, 0x76, 0x83, 0xdb, 0x54, 0x9d, 0xec, 0x9d, 0xb2, 0x37, 0x03, 0xf1
db 0xeb, 0x01, 0xb1, 0x0b, 0x6a, 0xe0, 0xce, 0x4b, 0xfe, 0x4f, 0x90, 0xe9, 0x5f, 0xd0, 0x15, 0x91
db 0x19, 0xb6, 0x24, 0x8c, 0x3b, 0xd9, 0x26, 0x13, 0x8e, 0x60, 0xcf, 0xf9, 0x2f, 0x56, 0x36, 0xb2
db 0xa1, 0xeb, 0xa3, 0xbd, 0x6c, 0x55, 0x4e, 0x31, 0xb9, 0x3b, 0xfb, 0xed, 0x28, 0xbe, 0xe7, 0x67
db 0x15, 0x2c, 0xf5, 0x40, 0x57, 0xd7, 0xc0, 0x30, 0xe9, 0x11, 0x06, 0x93, 0x87, 0x13, 0x72, 0x44
db 0x58, 0x18, 0x08, 0x70, 0xb0, 0xae, 0x15, 0x5e, 0x6a, 0xce, 0x68, 0x6b, 0x53, 0x4f, 0xd5, 0x5c
db 0x97, 0xeb, 0xd8, 0xe0, 0xd2, 0xb5, 0x85, 0x90, 0x12, 0x4e, 0x99, 0x0f, 0x13, 0x47, 0x25, 0xcb
db 0x1b, 0xf2, 0xad, 0x33, 0xf9, 0x53, 0x23, 0x8d, 0xd6, 0x57, 0xa3, 0xdf, 0xfe, 0xce, 0xd0, 0xb1
db 0x87, 0xd2, 0x99, 0x7e, 0x58, 0xbc, 0x25, 0xe9, 0x08, 0xb5, 0x3c, 0x58, 0xad, 0xb1, 0xa0, 0x8b
db 0xdc, 0xdf, 0xe7, 0x66, 0x1c, 0x34, 0x20, 0x77, 0xf2, 0xf7, 0x78, 0x1e, 0xe0, 0xc3, 0x17, 0xdd
db 0x28, 0x52, 0x13, 0x0b, 0x8b, 0xe1, 0xbe, 0x98, 0x41, 0xe9, 0xda, 0x2d, 0xc3, 0xaf, 0x2f, 0xd7
db 0x7f, 0x01, 0x3d, 0xd9, 0x51, 0x01, 0x76, 0xe6, 0xff, 0x5f, 0x28, 0x4c, 0x8f, 0xd3, 0x1d, 0x47
db 0x02, 0x59, 0x94, 0x0a, 0x6d, 0x5b, 0xe1, 0xd9, 0x46, 0xe9, 0xbd, 0x9c, 0x50, 0xdb, 0x67, 0x86
db 0x5f, 0xb6, 0x9b, 0xa1, 0x04, 0x53, 0xb4, 0xda, 0xc9, 0xc6, 0x72, 0xe2, 0x8b, 0x4a, 0x5c, 0x1b
db 0x30, 0xe4, 0xa2, 0x02, 0x83, 0x19, 0xd4, 0x89, 0x7a, 0x82, 0x3a, 0x4c, 0xa1, 0xde, 0x62, 0xfc
db 0x26, 0xb4, 0xc0, 0x04, 0x25, 0x1d, 0x54, 0xed, 0x1c, 0xa6, 0x13, 0x05, 0x01, 0xc3, 0x0b, 0x77
db 0x64, 0x83, 0x1d, 0xe7, 0xa9, 0xb5, 0x10, 0x67, 0x4e, 0x17, 0x08, 0xf3, 0x09, 0x1c, 0xd0, 0x7c
db 0xb4, 0x6d, 0x2a, 0x3b, 0xc6, 0x80, 0xd3, 0xf2, 0xe9, 0x1a, 0xc3, 0xed, 0xd6, 0x7f, 0xab, 0xea
db 0x9b, 0xec, 0xe2, 0xe8, 0xfe, 0x67, 0x6b, 0x5d, 0x4c, 0x09, 0xc6, 0x8f, 0xfa, 0xa0, 0xa7, 0x85
db 0x3d, 0xa5, 0x19, 0x74, 0x2c, 0xb2, 0xfe, 0x15, 0x46, 0xf9, 0x50, 0x37, 0xe5, 0xe6, 0x21, 0x56
db 0x45, 0x12, 0x13, 0x2b, 0x08, 0x93, 0xd1, 0x82, 0x74, 0x89, 0x82, 0x14, 0xe1, 0xb3, 0x91, 0x84
db 0xb6, 0x2d, 0xcc, 0xd7, 0x75, 0x3c, 0x0c, 0xc4, 0x97, 0x94, 0x4c, 0x5d, 0x28, 0x25, 0xea, 0x0f
db 0xeb, 0x98, 0x5d, 0xff, 0xde, 0x6e, 0x68, 0x32, 0xdf, 0xc7, 0x5b, 0xdc, 0x87, 0x96, 0x38, 0x53
db 0xdb, 0x9b, 0xa9, 0xb7, 0x67, 0x8f, 0xd1, 0x2a, 0xb4, 0x61, 0x28, 0x7a, 0xcf, 0xea, 0x45, 0x4a
db 0x63, 0xaa, 0x49, 0x23, 0xd4, 0x63, 0x30, 0x46, 0x4e, 0xbf, 0xef, 0x2e, 0x19, 0xd1, 0xb8, 0x38
db 0x7b, 0x58, 0x39, 0xbc, 0x3f, 0x2e, 0x31, 0xdf, 0x7c, 0xd5, 0xca, 0xda, 0xd9, 0x78, 0xb4, 0xcb
db 0x3b, 0xcd, 0xd1, 0xe6, 0xa4, 0x27, 0xab, 0x5d, 0x2a, 0xa8, 0x62, 0xb3, 0x6d, 0x19, 0x6d, 0xa1
db 0xde, 0x94, 0x32, 0x0e, 0x70, 0x7f, 0x16, 0x0c, 0xa4, 0x20, 0x30, 0xd1, 0x1c, 0xbd, 0xa0, 0x4a
db 0x9e, 0x3b, 0xec, 0xd1, 0x53, 0x2b, 0xfe, 0x2a, 0x5c, 0x35, 0xaa, 0x65, 0xe3, 0x4b, 0x75, 0x8f
db 0xc6, 0xd8, 0x1e, 0xa9, 0x04, 0x8a, 0xb2, 0xcc, 0xab, 0xec, 0xbe, 0x4d, 0x69, 0xf1, 0x15, 0x9c
db 0x5e, 0xdc, 0x57, 0xb6, 0x73, 0x3b, 0x4b, 0x73, 0xd8, 0x1e, 0xb1, 0x48, 0x2c, 0xa2, 0x85, 0xea
db 0x26, 0x97, 0x62, 0x62, 0x74, 0xb9, 0xb3, 0x34, 0x39, 0x22, 0xf3, 0x40, 0xfe, 0xcc, 0x02, 0x9f
db 0xc6, 0x27, 0x99, 0x3c, 0xea, 0x99, 0x6b, 0xf8, 0x6e, 0x72, 0xbd, 0x2b, 0x42, 0x98, 0xc6, 0x8d
db 0xac, 0xfd, 0x9b, 0x43, 0xf5, 0x8e, 0xa4, 0x79, 0xe4, 0x7e, 0x3e, 0xf9, 0x02, 0x99, 0x63, 0x8f
db 0x1e, 0x77, 0x2a, 0xcd, 0x85, 0xc5, 0x64, 0x5e, 0xdb, 0xcc, 0x5f, 0x0d, 0x5f, 0xde, 0xe6, 0x80
db 0xe6, 0x4e, 0xfd, 0x9e, 0x2b, 0x07, 0x8b, 0x34, 0x97, 0x55, 0xf0, 0x34, 0x22, 0xe1, 0x11, 0x49
db 0x3a, 0x95, 0xfa, 0x80, 0x39, 0xfc, 0xfe, 0x84, 0x27, 0x90, 0x46, 0x54, 0x21, 0xab, 0x3f, 0x30
db 0x18, 0xdb, 0xda, 0x25, 0x0e, 0x4b, 0x1c, 0xd8, 0x09, 0xdc, 0x3d, 0x48, 0x32, 0x91, 0xa5, 0x2c
db 0x7c, 0x4d, 0xf7, 0x6f, 0x50, 0xf7, 0x77, 0x4f, 0x1a, 0xf2, 0xe7, 0xa8, 0x1d, 0x40, 0x3c, 0x6c
db 0x5c, 0xbb, 0xcf, 0x48, 0x89, 0x59, 0xff, 0x79, 0x1a, 0xd7, 0x95, 0xbd, 0x7b, 0xd1, 0x9e, 0x06
db 0xc9, 0x47, 0x8e, 0xf4, 0x55, 0xe6, 0xe3, 0xf9, 0x23, 0xdf, 0x05, 0x5e, 0x48, 0x16, 0xbf, 0x93
db 0x47, 0x79, 0x0f, 0x3c, 0xec, 0xa5, 0x18, 0x0e, 0x61, 0x0d, 0xc4, 0x79, 0x59, 0xa6, 0xe7, 0xf8
db 0xc1, 0x97, 0x1d, 0x40, 0xe9, 0x20, 0xb5, 0xea, 0x27, 0x11, 0xa6, 0x6b, 0x2e, 0xe6, 0x96, 0xec
db 0xa7, 0x41, 0x90, 0x15, 0x44, 0x00, 0xde, 0x37, 0xca, 0x9d, 0x14, 0x33, 0x84, 0x0c, 0x1e, 0x59
db 0x02, 0x02, 0x5f, 0x4a, 0xa9, 0x99, 0xd8, 0x40, 0x1b, 0x66, 0x1b, 0x2d, 0x32, 0x05, 0xe7, 0xcf
db 0xec, 0x15, 0xb6, 0x22, 0x0b, 0x99, 0x38, 0xdd, 0x70, 0xdf, 0x08, 0x0e, 0xb2, 0xaf, 0x76, 0xd3
db 0xa6, 0xd3, 0xd5, 0xbf, 0x0a, 0x95, 0x1b, 0xdc, 0x53, 0x26, 0xd0, 0xb7, 0x76, 0x53, 0x5f, 0x69
db 0xaf, 0xce, 0x4d, 0xe1, 0x6e, 0x2b, 0xdb, 0x13, 0x6e, 0xb4, 0xed, 0x26, 0x38, 0x1b, 0xa5, 0xf1
db 0xee, 0x10, 0x60, 0x4b, 0x90, 0x90, 0x42, 0x59, 0xdb, 0x76, 0x44, 0x5b, 0x0f, 0xa3, 0x9c, 0xa9
db 0x56, 0x3a, 0x5f, 0x05, 0x55, 0xf4, 0xe7, 0x2c, 0x2f, 0xcb, 0xdd, 0xd5, 0x0f, 0x1e, 0x8b, 0x3b
db 0xd5, 0x2a, 0x1a, 0x5e, 0x2a, 0x62, 0xa6, 0x26, 0x40, 0xe1, 0x50, 0xc3, 0xbc, 0x3b, 0xfd, 0x85
db 0x7a, 0xb4, 0xdc, 0xa7, 0xd7, 0x3b, 0x53, 0x5e, 0xf2, 0x6b, 0xc3, 0x85, 0x70, 0x3d, 0x1d, 0xcd
db 0x57, 0x3d, 0x25, 0xb6, 0x2d, 0x83, 0x61, 0x12, 0x90, 0x54, 0x16, 0xb8, 0x62, 0x85, 0x2a, 0x79
db 0x75, 0x0b, 0xb1, 0xf1, 0x04, 0xc7, 0x73, 0x1b, 0x6d, 0x15, 0xe8, 0x1f, 0xa5, 0xf2, 0x6e, 0x18
db 0x70, 0x58, 0x4f, 0xe4, 0x6e, 0x53, 0xdd, 0xf2, 0x96, 0x77, 0xa1, 0xb1, 0xb1, 0x44, 0xff, 0x61
db 0x6e, 0x6b, 0x54, 0x33, 0xcc, 0x88, 0x66, 0x8b, 0x33, 0x23, 0x1c, 0xa1, 0x87, 0xe7, 0x3d, 0x9c
db 0x12, 0xb1, 0xcd, 0x10, 0xf5, 0xa4, 0xf7, 0xff, 0xf5, 0x1b, 0xab, 0x91, 0xfc, 0x16, 0x90, 0x9c
db 0x7f, 0x3b, 0x70, 0x40, 0x26, 0x0e, 0xa0, 0xc4, 0xf0, 0x78, 0xaf, 0xcc, 0xef, 0x52, 0x6c, 0x4e
db 0x3b, 0xa1, 0xbf, 0xbf, 0x65, 0x97, 0xbf, 0xa6, 0x28, 0x6c, 0x76, 0xdb, 0x28, 0x5c, 0x22, 0x1b
db 0xd4, 0x0c, 0xa0, 0x15, 0x76, 0xfd, 0x4a, 0x6c, 0x2d, 0xf8, 0xc2, 0x9f, 0x97, 0x14, 0xe3, 0xac
db 0xf1, 0x19, 0x43, 0x35, 0x7a, 0xdd, 0x62, 0x24, 0x4c, 0xae, 0x0f, 0x2c, 0x11, 0x59, 0x29, 0xeb
db 0x1c, 0x48, 0x44, 0x8f, 0x62, 0xef, 0xd7, 0x8c, 0x33, 0x92, 0x4b, 0x5a, 0x8d, 0xfb, 0xe2, 0xba
db 0x5e, 0xe0, 0xf2, 0x10, 0x96, 0x1f, 0x69, 0x48, 0x23, 0x04, 0x3d, 0x28, 0x37, 0x30, 0x77, 0x7b
db 0x71, 0x2b, 0x19, 0x69, 0x81, 0x3b, 0x9a, 0x7b, 0xc7, 0x59, 0x25, 0x5b, 0xb9, 0xc0, 0xf7, 0x60
db 0xe6, 0x04, 0x61, 0x76, 0x20, 0xd1, 0x80, 0x85, 0x6e, 0x10, 0x49, 0x44, 0xd7, 0x83, 0xc7, 0x4c
db 0x4b, 0xb4, 0xb4, 0x5e, 0xc7, 0x02, 0xab, 0x0b, 0x1b, 0xb5, 0x8a, 0x1a, 0xf5, 0xc3, 0x87, 0x68
db 0xad, 0x8f, 0xa3, 0x58, 0x06, 0xf0, 0x65, 0x09, 0x27, 0x17, 0x7d, 0xc2, 0xb8, 0xfd, 0x4a, 0xab
db 0x81, 0x98, 0x09, 0x36, 0xa1, 0xa9, 0x25, 0x8f, 0xbc, 0x13, 0x67, 0xc7, 0x90, 0xbb, 0xbe, 0xe6
db 0x8b, 0x1b, 0x49, 0x18, 0xe1, 0x27, 0xdc, 0x6b, 0x2b, 0x41, 0xc1, 0x5a, 0x8d, 0xde, 0xbc, 0x8a
db 0x0b, 0xc4, 0xb5, 0x2d, 0x1f, 0x02, 0xc6, 0xbe, 0x67, 0x6b, 0x70, 0x3c, 0x40, 0xde, 0x75, 0xb2
db 0x0f, 0xcb, 0x40, 0x3d, 0x9a, 0x58, 0x1a, 0x77, 0x15, 0xc0, 0xa2, 0xfa, 0xa7, 0xbb, 0x58, 0x6b
db 0x6c, 0xd5, 0x02, 0xad, 0x36, 0x7d, 0xb6, 0x2a, 0x7c, 0x5e, 0x99, 0x63, 0xd8, 0x29, 0x8b, 0xfd
db 0x1b, 0x6a, 0x99, 0x9a, 0x51, 0x7a, 0x28, 0xce, 0xb8, 0x75, 0x71, 0x21, 0xab, 0x44, 0x15, 0x39
db 0x4a, 0xab, 0x79, 0xb7, 0x60, 0xf4, 0xeb, 0x7c, 0xb7, 0x95, 0x56, 0x5f, 0xdf, 0x92, 0x23, 0xb8
db 0x03, 0xe6, 0x44, 0xde, 0x2f, 0x97, 0xfa, 0x06, 0x31, 0xc1, 0xf7, 0x9c, 0xed, 0xcf, 0x04, 0x52
db 0x9a, 0xff, 0xeb, 0xee, 0x73, 0x81, 0x05, 0xca, 0x9a, 0x86, 0x53, 0x18, 0x20, 0xee, 0x1c, 0x9c
db 0x27, 0xda, 0xef, 0x99, 0x3e, 0x11, 0xe0, 0x1b, 0x51, 0xd3, 0xcf, 0xa4, 0x96, 0xdb, 0x14, 0x2e
db 0xc4, 0xe6, 0x3c, 0x47, 0x66, 0x3e, 0x7e, 0xd2, 0x5d, 0x70, 0xfb, 0x35, 0xd5, 0x56, 0x32, 0x3f
db 0x21, 0xdf, 0xd2, 0xac, 0x75, 0xb3, 0xac, 0x66, 0x23, 0x6a, 0x41, 0x03, 0x73, 0xef, 0x5a, 0x5c
db 0x03, 0x4d, 0x13, 0x2b, 0x84, 0x7a, 0x5a, 0x84, 0xce, 0xa5, 0x0f, 0x68, 0xe0, 0x60, 0xa9, 0xdc
db 0xd9, 0x8d, 0x52, 0xf9, 0x7b, 0x9b, 0x64, 0x87, 0x8a, 0x9f, 0xfb, 0x5e, 0x73, 0x62, 0xc4, 0xf2
db 0x8c, 0x5e, 0x1e, 0x00, 0x17, 0x93, 0x16, 0x54, 0x82, 0x0e, 0x6a, 0x30, 0xff, 0x43, 0xd5, 0x79
db 0x1d, 0x17, 0x38, 0x11, 0x45, 0xcd, 0x3d, 0x25, 0x92, 0xb6, 0x86, 0x55, 0x6e, 0x94, 0xe9, 0x18
db 0x2b, 0x88, 0x27, 0xc3, 0xa6, 0xa9, 0x94, 0x92, 0xd1, 0x15, 0x1e, 0xe9, 0xd5, 0x06, 0x29, 0x1c
db 0xdd, 0x5f, 0xed, 0xf4, 0x68, 0xf3, 0x9c, 0x61, 0xcd, 0xfc, 0xc0, 0x5c, 0x36, 0xcd, 0x25, 0xa7
db 0xc6, 0x2e, 0xd9, 0xdf, 0xdd, 0x17, 0x8a, 0x05, 0x42, 0x37, 0x1b, 0x8e, 0x9b, 0x31, 0xff, 0xb3
db 0xcb, 0x19, 0xcb, 0x68, 0x6d, 0x79, 0x43, 0xa5, 0x9c, 0x2b, 0x67, 0x21, 0xee, 0xbf, 0xc9, 0x3c
db 0xb1, 0xb5, 0x72, 0x51, 0xf9, 0x24, 0x06, 0xb7, 0x65, 0x2c, 0x4c, 0x45, 0x4a, 0xf7, 0xd3, 0x2a
db 0x1e, 0x09, 0x50, 0xda, 0x4b, 0x91, 0x54, 0x6e, 0x88, 0x3b, 0xf8, 0xcf, 0x2f, 0xfe, 0x62, 0x90
db 0xc6, 0xbc, 0xbc, 0xa0, 0x73, 0x97, 0x62, 0x10, 0xe3, 0x4f, 0x4d, 0x8b, 0x42, 0x6b, 0xb0, 0x7b
db 0xb9, 0x81, 0x62, 0x54, 0x22, 0x62, 0xc4, 0xcf, 0xa4, 0x02, 0x0e, 0x24, 0x65, 0x9d, 0x26, 0x34

db 0x23, 0x42, 0x0a, 0x4b, 0xfc, 0x7c, 0xf9, 0x30, 0x7a, 0xf1, 0x99, 0x75, 0x2c, 0xbe, 0xdf, 0xdb
db 0x41, 0x1e, 0xa5, 0x94, 0x4c, 0x60, 0xfb, 0x12, 0x29, 0x9d, 0x82, 0x93, 0xd8, 0xe6, 0xd0, 0xf2
db 0xd5, 0xdd, 0x74, 0xd2, 0x0c, 0x13, 0x56, 0xa2, 0xe7, 0x8a, 0x49, 0x8c, 0x25, 0xc4, 0x77, 0x3a
db 0xce, 0x04, 0xe2, 0x01, 0xa0, 0x84, 0x87, 0xc4, 0x83, 0xf7, 0xfd, 0xc1, 0x4b, 0xfa, 0x2c, 0xdb
db 0x15, 0x75, 0x68, 0x69, 0xb2, 0x59, 0x9a, 0x1d, 0x40, 0x46, 0x5c, 0x59, 0x91, 0x8b, 0xa2, 0x40
db 0xe5, 0x7d, 0x09, 0x02, 0x12, 0xe2, 0x8e, 0xc7, 0x84, 0xf3, 0xac, 0xa3, 0x4f, 0x65, 0x1f, 0x80
db 0xd6, 0xa4, 0x97, 0xc5, 0xdf, 0x9d, 0x88, 0x8d, 0xef, 0x27, 0xe7, 0xfe, 0x79, 0x9c, 0x1f, 0xde
db 0x7f, 0x6c, 0xb5, 0x57, 0x76, 0x16, 0x89, 0xa3, 0x5e, 0xaf, 0x5d, 0x75, 0xb0, 0x5e, 0x04, 0xff
db 0x20, 0xc1, 0x34, 0x0d, 0x6e, 0xb6, 0x90, 0x3b, 0x29, 0x32, 0x80, 0x9e, 0xdb, 0x68, 0xea, 0x7d
db 0xb8, 0x7f, 0x14, 0xd2, 0x9f, 0x2a, 0xc6, 0xe0, 0xbc, 0xee, 0x85, 0x38, 0x01, 0x2d, 0x70, 0x49
db 0x10, 0x73, 0x06, 0x6e, 0xa2, 0xb6, 0xda, 0x0c, 0xc7, 0x71, 0x3d, 0xae, 0x48, 0x95, 0xe0, 0xe0
db 0xa8, 0x02, 0x0e, 0xd8, 0xa1, 0x5d, 0x7a, 0x95, 0x0f, 0xeb, 0x72, 0x8b, 0x92, 0x4e, 0xad, 0x0e
db 0x6a, 0x9a, 0x5c, 0x60, 0x74, 0x8b, 0x87, 0xc7, 0xe2, 0x51, 0xb5, 0xe0, 0x04, 0xc1, 0x20, 0x04
db 0x09, 0x3c, 0xb4, 0x08, 0x30, 0x4d, 0x37, 0x12, 0x55, 0x95, 0x9a, 0x74, 0xa1, 0x9d, 0x34, 0x6e
db 0x3d, 0x14, 0x3e, 0x65, 0x5a, 0x1b, 0x77, 0xf5, 0x64, 0xc1, 0x22, 0x36, 0x86, 0x70, 0x0e, 0x29
db 0x03, 0x7e, 0xac, 0x5f, 0x89, 0x26, 0x5e, 0xa3, 0x0e, 0xaa, 0x4b, 0xd9, 0x51, 0x89, 0x10, 0x5d
db 0xdd, 0x2b, 0x10, 0xeb, 0xc0, 0x23, 0x84, 0x35, 0x76, 0x5f, 0xa1, 0x17, 0x69, 0x8e, 0xd2, 0xe6
db 0x36, 0x87, 0xd0, 0xf4, 0x5f, 0x69, 0x5e, 0xcc, 0xed, 0x22, 0x8f, 0x6e, 0xac, 0xfa, 0x10, 0x50
db 0x00, 0xd9, 0x54, 0x5d, 0x57, 0x8e, 0xe0, 0xdc, 0x7f, 0x77, 0x45, 0x60, 0x19, 0xc6, 0xae, 0x86
db 0x59, 0x8e, 0xc5, 0xfd, 0x57, 0x8f, 0x3c, 0x01, 0x74, 0xf1, 0x1f, 0xff, 0xc9, 0x72, 0x4b, 0xe9
db 0x9f, 0xa3, 0xcb, 0xcf, 0x33, 0x58, 0x14, 0x10, 0x30, 0xfe, 0xfd, 0x52, 0x3d, 0x2f, 0xd6, 0xc3
db 0xa7, 0xc5, 0x11, 0x38, 0xf0, 0x2a, 0xc9, 0x6c, 0xb8, 0xaf, 0xeb, 0x80, 0xdf, 0x46, 0x16, 0xe9
db 0x8c, 0xf8, 0xaf, 0xf8, 0x49, 0x5f, 0x9b, 0x63, 0x11, 0x71, 0x16, 0xb2, 0x9b, 0xa5, 0x7b, 0x4d
db 0xab, 0x99, 0xd5, 0xc7, 0xe6, 0xc8, 0x1a, 0x95, 0x27, 0x38, 0x02, 0xd3, 0x85, 0x3d, 0x7b, 0x15
db 0x5c, 0xd3, 0x99, 0xfc, 0x5b, 0x5e, 0x72, 0xdf, 0xd8, 0x42, 0x91, 0xf1, 0x8f, 0x05, 0x01, 0x0b
db 0xe0, 0x80, 0x90, 0x49, 0x32, 0x8b, 0x7c, 0x74, 0x4e, 0x76, 0x1a, 0x91, 0xc2, 0x01, 0x22, 0xf4
db 0xbb, 0x68, 0xcf, 0xd7, 0xe8, 0x44, 0x53, 0x65, 0x17, 0xdd, 0x22, 0x52, 0xc6, 0xcf, 0xec, 0xce
db 0x1d, 0xb1, 0x00, 0xb4, 0xfb, 0x1a, 0xc2, 0x28, 0xd7, 0x6a, 0x14, 0x66, 0xd9, 0xaa, 0xd6, 0x61
db 0x2b, 0xe4, 0x78, 0x5b, 0x77, 0xf9, 0x53, 0xc3, 0x8d, 0x86, 0x37, 0x71, 0xc2, 0x0b, 0xef, 0x20
db 0xa3, 0xb7, 0x06, 0x74, 0xf3, 0xa5, 0x56, 0x17, 0xac, 0x50, 0x19, 0x15, 0x07, 0xda, 0x12, 0x72
db 0x21, 0x6e, 0x52, 0xcf, 0x71, 0x55, 0x36, 0x29, 0x85, 0xff, 0xc5, 0x22, 0xeb, 0x17, 0x2d, 0x2a
db 0xab, 0x0b, 0x73, 0x50, 0x98, 0x06, 0x45, 0x51, 0x6f, 0xbd, 0x66, 0x4d, 0x7e, 0xa1, 0xf8, 0xc1
db 0xd2, 0x1d, 0x33, 0x68, 0xd7, 0x24, 0x38, 0x03, 0xab, 0xd9, 0xe2, 0xb6, 0xd1, 0x05, 0x0d, 0x71
db 0x43, 0x01, 0xf3, 0x9d, 0xc5, 0xd4, 0x76, 0xeb, 0x1b, 0x42, 0x38, 0x50, 0x0b, 0xde, 0x1a, 0xa9
db 0x26, 0x19, 0x86, 0xf7, 0xa5, 0xab, 0x1d, 0xd1, 0xf5, 0xbd, 0xaa, 0x45, 0x1c, 0x1a, 0x84, 0xeb
db 0x20, 0x63, 0xe2, 0x73, 0x92, 0xfc, 0xc3, 0xfd, 0x7d, 0x93, 0x90, 0x28, 0xee, 0xe1, 0x07, 0x9d
db 0x11, 0x36, 0x51, 0xeb, 0x52, 0x07, 0xcc, 0x7a, 0x08, 0x2b, 0x07, 0x6a, 0xef, 0x9c, 0x0e, 0xb7
db 0xff, 0xa8, 0xcb, 0x9b, 0x67, 0x62, 0x41, 0x5a, 0x66, 0x3e, 0xbc, 0xc5, 0xef, 0x21, 0xf0, 0xee
db 0x96, 0x1d, 0xc8, 0x35, 0x87, 0x1e, 0x10, 0x70, 0xa6, 0xb0, 0x8b, 0x2a, 0x5f, 0xf2, 0x93, 0xf5
db 0xb3, 0x08, 0xa0, 0x54, 0xee, 0x2e, 0xfc, 0xdc, 0xca, 0x95, 0x42, 0x80, 0xab, 0xdf, 0x53, 0xd7
db 0xfd, 0x9d, 0xb7, 0x6d, 0x80, 0xe2, 0x81, 0xac, 0x39, 0xf3, 0x68, 0x1b, 0x96, 0xe5, 0xa4, 0x8b
db 0xc6, 0xca, 0x33, 0xd1, 0x93, 0xee, 0x3a, 0x69, 0x62, 0xd7, 0x6e, 0x24, 0x9a, 0xca, 0x0f, 0x38
db 0xa6, 0x21, 0x21, 0x82, 0x0d, 0x37, 0xe9, 0x54, 0x08, 0x31, 0xf9, 0x92, 0x84, 0x09, 0x2f, 0x0c
db 0xc1, 0xdb, 0x28, 0xd0, 0xa4, 0xb1, 0xee, 0x70, 0x62, 0xc0, 0x93, 0xc4, 0x7e, 0x2b, 0x63, 0x4e
db 0x4a, 0x4a, 0x95, 0x88, 0xf0, 0xa9, 0xe5, 0x6f, 0x65, 0xec, 0xf7, 0x21, 0xdd, 0x49, 0x3d, 0x15
db 0xeb, 0x67, 0x0a, 0x98, 0x02, 0xa8, 0xc8, 0xd4, 0x2e, 0x1e, 0xd3, 0x51, 0xc8, 0x66, 0xd3, 0x3d
db 0xcc, 0xda, 0x6c, 0x15, 0x64, 0xa2, 0xb5, 0xcf, 0xd6, 0x68, 0x73, 0xfd, 0xae, 0x08, 0xbb, 0xdb
db 0x69, 0x5a, 0x8a, 0x7a, 0x14, 0x54, 0x59, 0xca, 0x0c, 0x89, 0x6f, 0x85, 0x77, 0x9e, 0xab, 0xc1
db 0x2d, 0x00, 0x51, 0xf2, 0xad, 0x61, 0x15, 0xe7, 0x1e, 0x53, 0x27, 0x97, 0xec, 0xc7, 0x48, 0x4b
db 0x4c, 0x9a, 0x77, 0x59, 0x53, 0x4a, 0x26, 0x59, 0xcd, 0x9e, 0x6c, 0x8a, 0x42, 0xdf, 0xfe, 0xef
db 0x8f, 0xa1, 0x3d, 0x25, 0x32, 0x4d, 0xd1, 0xbe, 0x62, 0x0d, 0x01, 0x08, 0x97, 0x09, 0xaf, 0x9e
db 0x9c, 0x7f, 0xe9, 0xb3, 0xec, 0xc2, 0xc5, 0xc0, 0x97, 0xd3, 0x8d, 0xad, 0xd5, 0x49, 0x6a, 0x3d
db 0x84, 0x92, 0x95, 0xf8, 0x00, 0xf3, 0x2a, 0xba, 0x56, 0xb7, 0x95, 0xf5, 0x1a, 0x28, 0x79, 0x8e
db 0x1e, 0x40, 0xee, 0xfc, 0x4d, 0xe2, 0x8d, 0x47, 0x7f, 0x13, 0x0c, 0x85, 0x54, 0xbb, 0x06, 0xfa
db 0x53, 0x95, 0x2d, 0x63, 0x23, 0xa8, 0xe9, 0x62, 0x3a, 0x0a, 0x81, 0x1f, 0x39, 0x79, 0x3d, 0xea
db 0x70, 0xb7, 0x29, 0x80, 0xc2, 0x25, 0x39, 0x5d, 0x98, 0x80, 0x7e, 0x09, 0x82, 0xd1, 0xc1, 0x12
db 0x3e, 0x98, 0x54, 0x05, 0x63, 0xff, 0x2f, 0x10, 0x60, 0x13, 0xf5, 0x27, 0xe8, 0xda, 0x8d, 0xeb
db 0x2a, 0xba, 0x01, 0xee, 0xb0, 0xbc, 0xef, 0x5f, 0x6d, 0x03, 0x48, 0x8f, 0x79, 0x21, 0xa4, 0x40
db 0x66, 0x85, 0x9c, 0x8b, 0x01, 0xa5, 0xbb, 0x9e, 0x79, 0xb8, 0xad, 0x59, 0xff, 0x11, 0xe6, 0x10
db 0xdb, 0xf0, 0x66, 0x87, 0xbc, 0x7b, 0x9b, 0x75, 0xef, 0x90, 0xbf, 0x22, 0x35, 0x79, 0xd1, 0x9d
db 0x2c, 0x4f, 0x22, 0x50, 0x8b, 0x23, 0x15, 0x4c, 0xb4, 0xb4, 0x64, 0xab, 0x4b, 0x7a, 0xa3, 0x19
db 0x95, 0xd6, 0xd8, 0x33, 0x57, 0xb9, 0xfc, 0x53, 0x66, 0x84, 0x61, 0x0a, 0xc7, 0x76, 0x84, 0xd8
db 0xa6, 0xf4, 0xbd, 0x46, 0xf7, 0x0e, 0xf9, 0x42, 0x08, 0xb6, 0x82, 0x94, 0x2e, 0x04, 0x9e, 0x0d
db 0x55, 0x18, 0xb2, 0xb8, 0xbf, 0xe0, 0xe1, 0xac, 0x15, 0xae, 0x60, 0x4c, 0x1d, 0xf1, 0x71, 0x7a
db 0xda, 0xed, 0xeb, 0x79, 0x33, 0x52, 0x5b, 0x38, 0x2d, 0x94, 0x38, 0x11, 0x8f, 0xf9, 0x37, 0x2c
db 0x39, 0x9c, 0x90, 0x8b, 0x45, 0x32, 0x79, 0x59, 0x3f, 0x4d, 0x43, 0x07, 0x88, 0xb5, 0xf6, 0x48
db 0x5c, 0xcd, 0x57, 0x85, 0x81, 0xb3, 0x09, 0x2c, 0xab, 0xb2, 0x16, 0xd9, 0x28, 0x32, 0x6c, 0xf3
db 0x75, 0xf3, 0xa4, 0xed, 0x19, 0xa0, 0xd5, 0x79, 0x62, 0xd6, 0xe1, 0xa0, 0x26, 0x54, 0x43, 0x76
db 0x59, 0xd7, 0x39, 0x01, 0x4d, 0x87, 0x52, 0x1a, 0xac, 0x9c, 0x53, 0x15, 0x2c, 0xff, 0x42, 0x90
db 0x62, 0xf9, 0x7a, 0xcb, 0x63, 0xce, 0x2e, 0x22, 0x72, 0x90, 0x7c, 0x13, 0x98, 0x87, 0x5b, 0x3c
db 0x09, 0x24, 0xc3, 0xaa, 0xad, 0x68, 0x93, 0xca, 0x80, 0xd4, 0xee, 0x2e, 0x6f, 0xd2, 0xad, 0x43
db 0xfa, 0x4a, 0x8b, 0x93, 0x80, 0x70, 0x66, 0x79, 0xf0, 0xa5, 0xef, 0x90, 0x26, 0xd9, 0xa2, 0x72
db 0x9c, 0xb5, 0x83, 0x6c, 0xc5, 0x91, 0x19, 0xd9, 0x18, 0x41, 0xf9, 0x04, 0x3a, 0x83, 0x8c, 0x0c
db 0xd3, 0x2c, 0x8e, 0x3e, 0xb7, 0x5c, 0x19, 0xf6, 0x04, 0x1d, 0x09, 0xe8, 0x7e, 0x9d, 0x00, 0xfa
db 0x08, 0x15, 0x1f, 0xdf, 0xf9, 0x42, 0x86, 0x63, 0x99, 0x8c, 0xec, 0x34, 0xcd, 0x20, 0x82, 0xc6
db 0x64, 0x9b, 0x1f, 0xec, 0x73, 0x38, 0x5c, 0xaa, 0xec, 0xe0, 0x76, 0x5b, 0x64, 0x93, 0x48, 0x0d
db 0x33, 0x2b, 0xfb, 0x02, 0x97, 0xd6, 0xee, 0xb4, 0x79, 0x92, 0xf6, 0xb0, 0x11, 0xdc, 0x97, 0x8e
db 0x33, 0x84, 0xe6, 0x68, 0xb2, 0x65, 0x60, 0x61, 0xa8, 0x83, 0xf5, 0xf9, 0x2a, 0x7e, 0x4a, 0x75
db 0x84, 0x85, 0x37, 0x34, 0x47, 0x4d, 0xf4, 0x8b, 0xcd, 0x19, 0xee, 0xda, 0x94, 0xee, 0x01, 0xef
db 0x5d, 0x75, 0x43, 0x78, 0xa4, 0xe0, 0x76, 0x7a, 0x76, 0x65, 0x74, 0x82, 0xd5, 0x2a, 0x95, 0x23
db 0x59, 0x2d, 0x63, 0xcf, 0x2c, 0xad, 0x85, 0xe2, 0x80, 0x3f, 0x4c, 0xcd, 0xb2, 0x75, 0x45, 0xd5
db 0x62, 0x0f, 0xc8, 0xd9, 0x68, 0x4c, 0xe9, 0xb8, 0x09, 0xed, 0x2e, 0x5b, 0x67, 0xaf, 0x65, 0x04
db 0x26, 0x96, 0x33, 0xb1, 0x01, 0x3e, 0x85, 0x0f, 0x8d, 0xf3, 0x52, 0x2c, 0x31, 0x93, 0xfe, 0x64
db 0x9a, 0x96, 0xd7, 0x19, 0xe0, 0x3c, 0xfc, 0xa9, 0x3f, 0xd2, 0xae, 0x49, 0x9c, 0x26, 0x6e, 0xe2
db 0xbf, 0x0f, 0x73, 0xf6, 0x44, 0xc5, 0x1b, 0x1c, 0x04, 0xde, 0xae, 0xf3, 0xdb, 0x80, 0xa5, 0x83
db 0xa4, 0xbf, 0xc7, 0x68, 0x58, 0xbf, 0x6d, 0xa3, 0x19, 0xf6, 0x0d, 0x74, 0xf2, 0x52, 0x36, 0x66
db 0xef, 0x42, 0xb5, 0xaa, 0x3d, 0x77, 0x00, 0x17, 0xab, 0x56, 0x08, 0x28, 0x31, 0xbe, 0xc9, 0xac
db 0xb9, 0xdf, 0x48, 0xcb, 0xd3, 0x24, 0xba, 0x7c, 0x19, 0xdd, 0x53, 0x1b, 0xb6, 0xac, 0xe0, 0xf8
db 0xdf, 0x38, 0x85, 0xc9, 0x24, 0x78, 0xdb, 0xf0, 0xe8, 0xab, 0xf2, 0xc2, 0xb7, 0x07, 0xf5, 0xfa
db 0x45, 0xc2, 0x2c, 0x3d, 0x20, 0x68, 0xdc, 0x2d, 0xcf, 0xb8, 0xe2, 0xe0, 0x29, 0x3c, 0x94, 0x37
db 0x7f, 0x23, 0x69, 0x9f, 0x11, 0x2c, 0x81, 0xf0, 0x3f, 0x11, 0x4c, 0xb5, 0xde, 0x9c, 0xe0, 0x17
db 0x97, 0x38, 0x64, 0x78, 0xdf, 0x36, 0x82, 0x23, 0x38, 0x64, 0x3d, 0x8a, 0x3d, 0xc2, 0xac, 0x29
db 0x5d, 0x63, 0x6b, 0x4d, 0xb1, 0x69, 0x7c, 0xfd, 0x74, 0xe9, 0x92, 0xf4, 0x62, 0x73, 0x77, 0x4a
db 0x80, 0xcd, 0x1a, 0xba, 0x6e, 0xba, 0x52, 0x12, 0xaf, 0xdd, 0x90, 0xf7, 0x73, 0x2f, 0xf9, 0x51
db 0x81, 0x89, 0xa1, 0x80, 0xaa, 0x87, 0xfc, 0x58, 0x6d, 0x1f, 0x2d, 0x6f, 0x5e, 0xe0, 0x23, 0x5e
db 0x0d, 0xaf, 0x86, 0x0d, 0x7a, 0x2d, 0x61, 0x4e, 0x5a, 0xcf, 0x3f, 0x11, 0xc5, 0x9b, 0xf0, 0x17
db 0x88, 0x96, 0x05, 0x95, 0x92, 0xc6, 0x65, 0x87, 0x13, 0x86, 0x77, 0x9c, 0x65, 0x14, 0xdd, 0x65
db 0xb9, 0xbb, 0x96, 0x15, 0x48, 0x6b, 0x23, 0x89, 0x99, 0x3c, 0x7b, 0xef, 0x03, 0x2a, 0xd2, 0x0a
db 0xfe, 0x5c, 0xee, 0x8a, 0x1c, 0x1e, 0x6c, 0x1e, 0xac, 0xb3, 0x61, 0x8e, 0x95, 0xad, 0x6a, 0x82
db 0x32, 0x3f, 0xef, 0x9d, 0x36, 0x92, 0x06, 0x4e, 0x1a, 0x25, 0x0d, 0xfb, 0xfe, 0xcc, 0x37, 0xe8
db 0xed, 0x9f, 0xc2, 0x90, 0x37, 0xad, 0x51, 0xe9, 0xb9, 0x20, 0x52, 0x1a, 0xe0, 0x55, 0xa1, 0x1a
db 0x09, 0x00, 0x4a, 0x45, 0xe2, 0x7a, 0x86, 0x3e, 0xbb, 0xf9, 0xb9, 0xc2, 0x0b, 0xa7, 0xf4, 0x75
db 0xca, 0xe3, 0x7a, 0xf4, 0xfd, 0xaf, 0x6a, 0x39, 0x72, 0x9c, 0x41, 0x8c, 0x10, 0x7e, 0xad, 0x8e
db 0xf9, 0x35, 0x2b, 0x85, 0x5d, 0x05, 0x9f, 0x99, 0x37, 0x3c, 0x46, 0x68, 0x5b, 0x5a, 0x70, 0x77
db 0x4a, 0xd7, 0x85, 0x79, 0x21, 0x3f, 0xdd, 0xdd, 0xc8, 0x3c, 0x03, 0x8c, 0x3d, 0xef, 0xad, 0x1e
db 0x44, 0x3e, 0xc1, 0xd0, 0x89, 0xa9, 0xfd, 0xab, 0x38, 0x30, 0x91, 0xc3, 0x49, 0x28, 0x8a, 0x7f
db 0x31, 0x44, 0xb0, 0xb9, 0x7d, 0x32, 0x14, 0x53, 0xe0, 0xa0, 0xed, 0xe2, 0xe2, 0x6c, 0xd7, 0x55
db 0x74, 0x6e, 0x8c, 0x70, 0x4d, 0x56, 0xa3, 0x7e, 0x99, 0x96, 0xea, 0x00, 0x5b, 0x13, 0x40, 0xdc
db 0xaa, 0x4a, 0xcf, 0x99, 0xf6, 0x76, 0x34, 0xa6, 0x53, 0x78, 0xbc, 0x5d, 0x32, 0x41, 0xe0, 0x33
db 0x21, 0xe5, 0xe6, 0x06, 0x73, 0x6c, 0xda, 0xa2, 0x10, 0x28, 0xd4, 0x6d, 0x1b, 0x02, 0x58, 0x8b
db 0xed, 0x7e, 0x65, 0x5e, 0x4c, 0x73, 0xd7, 0xce, 0xf0, 0x9c, 0x48, 0xa7, 0x6c, 0xca, 0x74, 0xdc
db 0x53, 0xc3, 0xee, 0x7a, 0x47, 0xc3, 0xb7, 0xe6, 0xb4, 0xb0, 0xba, 0x48, 0xdf, 0x46, 0x28, 0x03
db 0x68, 0x22, 0x20, 0x92, 0xca, 0xc9, 0xcf, 0xb6, 0x7f, 0xc9, 0xc2, 0x70, 0xa2, 0xe8, 0x43, 0x89
db 0xd2, 0x52, 0xb5, 0x62, 0xc5, 0x70, 0x50, 0x72, 0x98, 0x59, 0x5b, 0x07, 0xf7, 0x98, 0x5c, 0xab
db 0x04, 0xca, 0x79, 0x03, 0x67, 0x4d, 0xcc, 0xc0, 0x16, 0x69, 0x55, 0x07, 0x54, 0xa5, 0x41, 0xd7
db 0x57, 0x53, 0x4c, 0x1f, 0x35, 0xaf, 0x26, 0x6e, 0x4f, 0x63, 0xc4, 0x0d, 0xb4, 0x6a, 0x5c, 0x40
db 0xbd, 0xf4, 0x5c, 0x03, 0x7c, 0x51, 0x7a, 0xa2, 0x9d, 0x0c, 0xf8, 0x81, 0xb9, 0x92, 0x0e, 0x53
db 0xd1, 0xd5, 0x9a, 0x31, 0xf7, 0x0a, 0x1d, 0x70, 0xbc, 0x07, 0x82, 0xce, 0xad, 0x0a, 0xb0, 0x47
db 0x86, 0xe6, 0xc1, 0x44, 0xe6, 0x87, 0xca, 0xe6, 0x61, 0x8c, 0xc2, 0x33, 0xae, 0x11, 0x48, 0xcd
db 0xd0, 0x33, 0x9c, 0xf3, 0x11, 0xe6, 0x2a, 0x4d, 0x30, 0x0b, 0xcb, 0xe2, 0xc7, 0x77, 0x60, 0xb1
db 0xd8, 0xe3, 0xa0, 0x44, 0xe2, 0x87, 0xfa, 0xf6, 0x7e, 0xba, 0x5d, 0x32, 0xa1, 0x53, 0xe6, 0x05
db 0xe1, 0x15, 0xe8, 0xd2, 0x01, 0x46, 0xaf, 0xb1, 0x78, 0x0c, 0x64, 0x2d, 0x3a, 0x8c, 0xf2, 0xb8
db 0x07, 0xee, 0x9d, 0xda, 0x5b, 0x60, 0x07, 0xe4, 0x5e, 0x0b, 0x1d, 0x54, 0xbb, 0x00, 0xc9, 0x99
db 0x0c, 0x6b, 0x4e, 0x18, 0xcc, 0x82, 0x04, 0xa6, 0x8b, 0xc0, 0x07, 0xf4, 0x2b, 0xb4, 0x10, 0x25
db 0xd7, 0xdc, 0x3b, 0xa2, 0xa5, 0x0d, 0x06, 0xa1, 0xdf, 0x32, 0xb2, 0x3b, 0xa0, 0xdc, 0x70, 0x0e
db 0x16, 0x76, 0xc9, 0xbd, 0x73, 0x3d, 0x6e, 0x97, 0x55, 0xe0, 0x4f, 0x16, 0x8e, 0x42, 0x93, 0x0c
db 0x58, 0x64, 0x91, 0x76, 0x3f, 0xaf, 0x71, 0x25, 0x50, 0x66, 0xdd, 0x54, 0x2b, 0x47, 0x6b, 0x0d
db 0x4a, 0x8d, 0xaa, 0x29, 0x22, 0xad, 0xf5, 0x7f, 0x9d, 0xd8, 0x29, 0x25, 0x0f, 0x59, 0x10, 0x05
db 0xb9, 0x56, 0x25, 0x98, 0x60, 0x75, 0xa1, 0x32, 0xa0, 0x69, 0xb4, 0x37, 0x23, 0x5c, 0xcc, 0xe0
db 0x07, 0x94, 0xfb, 0xb0, 0x63, 0x9a, 0xf7, 0x90, 0x28, 0xe0, 0x1c, 0x82, 0xaf, 0xe9, 0xb0, 0x39
db 0xa3, 0x86, 0x61, 0x9c, 0xf6, 0x41, 0x71, 0x70, 0xb0, 0x8e, 0x44, 0x83, 0x51, 0xec, 0x4d, 0x2e
db 0xca, 0x62, 0xa1, 0xef, 0xc1, 0x95, 0x6b, 0xfd, 0x6c, 0x72, 0x57, 0x36, 0xcb, 0xde, 0x91, 0x52
db 0xc8, 0x1d, 0x09, 0x8f, 0x38, 0x74, 0xd5, 0x2c, 0xcc, 0x0b, 0x58, 0x57, 0xc5, 0xba, 0xb8, 0x29
db 0x4c, 0x79, 0x03, 0xab, 0x0d, 0x22, 0x45, 0x0e, 0x27, 0x93, 0x2b, 0xb1, 0x68, 0x5a, 0x66, 0xb1
db 0xa8, 0x85, 0x0d, 0xaa, 0x90, 0x84, 0x7a, 0x98, 0xf0, 0xe8, 0x3f, 0x41, 0x64, 0x1b, 0x7c, 0x79
db 0x9d, 0xfd, 0x3a, 0xf0, 0x00, 0xea, 0xa5, 0x7c, 0xe2, 0x89, 0xb9, 0xc0, 0x85, 0xf4, 0x0a, 0x1a
db 0x2c, 0xd9, 0xf4, 0x7e, 0x60, 0x67, 0xf4, 0x86, 0xf5, 0xe9, 0x22, 0x18, 0x97, 0x99, 0x7c, 0x94
db 0x96, 0x5e, 0xde, 0xf2, 0x50, 0x5c, 0xa3, 0x3e, 0xb2, 0x18, 0x6a, 0xd9, 0x22, 0x85, 0xb6, 0x15
db 0x92, 0x57, 0xc2, 0xe0, 0x33, 0x3b, 0x4d, 0x87, 0x9a, 0xe2, 0xfb, 0xfc, 0x14, 0xc4, 0x42, 0x0a
db 0xa8, 0xcc, 0x58, 0xe1, 0x9d, 0xaf, 0xc9, 0x84, 0x1c, 0x25, 0xcb, 0xa3, 0x00, 0xf7, 0x34, 0x21
db 0x60, 0x14, 0x34, 0xf7, 0xdd, 0xdd, 0x8e, 0x6d, 0x5e, 0xa0, 0x29, 0x49, 0x00, 0xcd, 0xc8, 0x5b
db 0xf8, 0x20, 0x92, 0x36, 0x28, 0xac, 0x94, 0xaa, 0x64, 0x6c, 0x27, 0xb2, 0x08, 0xb5, 0x93, 0xea
db 0x16, 0xdd, 0xdc, 0x54, 0x7d, 0x7e, 0x5b, 0x80, 0xab, 0x4f, 0xed, 0x3d, 0xfb, 0x73, 0xdc, 0x9a
db 0x88, 0xa4, 0x29, 0x69, 0x7f, 0x5f, 0xc2, 0xba, 0x52, 0x75, 0x95, 0x91, 0x05, 0x39, 0x27, 0x60
db 0xdc, 0xee, 0xea, 0x9e, 0x27, 0x8a, 0x7b, 0xd4, 0x4c, 0x0b, 0xbc, 0x12, 0x52, 0xf4, 0x28, 0xd2
db 0xe8, 0x0f, 0xbb, 0xc8, 0x5a, 0x1e, 0x9b, 0x1b, 0xed, 0x59, 0x7a, 0x67, 0x46, 0xd4, 0x3f, 0x28
db 0xee, 0x6f, 0xac, 0x78, 0x45, 0xe2, 0xd8, 0x6e, 0x71, 0x8d, 0xa5, 0xfb, 0xbd, 0x8f, 0x35, 0xd9
db 0x29, 0x1c, 0x0a, 0x40, 0xee, 0x39, 0xe3, 0x2f, 0x1a, 0xa8, 0xd2, 0xf2, 0xdd, 0xdb, 0x62, 0xf5
db 0x49, 0x1e, 0x3d, 0x11, 0xde, 0x1c, 0x5d, 0xe5, 0x5f, 0xd4, 0xdc, 0x8a, 0x98, 0x55, 0x57, 0x1a
db 0xee, 0x81, 0xbf, 0xed, 0x18, 0xda, 0x5a, 0x8e, 0x81, 0x3f, 0x31, 0x3c, 0x11, 0x52, 0x8d, 0x13
db 0xd9, 0xe9, 0x17, 0x8f, 0xd5, 0x50, 0x7b, 0xbf, 0x2f, 0x24, 0xcb, 0xfa, 0xcc, 0x92, 0xd7, 0xba
db 0xa6, 0xab, 0xcc, 0xcf, 0xed, 0xe7, 0xe7, 0x7f, 0x2e, 0x10, 0xbb, 0xa2, 0x57, 0x66, 0xee, 0xc7
db 0x69, 0x1b, 0x40, 0x30, 0xad, 0x46, 0x26, 0x72, 0xd5, 0xf9, 0x88, 0xb3, 0x11, 0x82, 0x49, 0x5a
db 0x8d, 0x82, 0xb7, 0xf3, 0xbc, 0x6b, 0x5c, 0x18, 0x9b, 0x85, 0x91, 0xb6, 0x9f, 0x05, 0xf1, 0x80
db 0xd3, 0x62, 0x32, 0xcf, 0x9f, 0xb8, 0x15, 0x36, 0x63, 0x09, 0x63, 0xaf, 0x4d, 0xa2, 0x5a, 0x02
db 0xe2, 0x57, 0xf5, 0xa5, 0x44, 0xe2, 0x51, 0xbd, 0x92, 0x51, 0x53, 0xc2, 0xef, 0x60, 0x57, 0xdf
db 0xa7, 0x15, 0x4c, 0xbf, 0x66, 0x7d, 0x78, 0xa0, 0x07, 0x53, 0xb4, 0x70, 0x3a, 0x8d, 0x4f, 0x57
db 0xae, 0xae, 0xa3, 0x99, 0xb8, 0x65, 0xc7, 0xee, 0x8b, 0xd7, 0x15, 0x80, 0x90, 0xb7, 0x8f, 0xb1
db 0x6e, 0x41, 0x3b, 0x97, 0x4c, 0xf1, 0x14, 0xc3, 0xfe, 0x93, 0x3d, 0xaa, 0x1c, 0xb2, 0xb3, 0xf4
db 0x97, 0xd1, 0x53, 0x09, 0x49, 0x3e, 0xfd, 0x72, 0x17, 0x06, 0xf7, 0xa8, 0x52, 0x9b, 0x86, 0x8d
db 0x4d, 0xc6, 0xf5, 0xd4, 0xd9, 0x36, 0xf7, 0xbe, 0xa9, 0x66, 0xcf, 0xc2, 0xd2, 0xb1, 0xee, 0x9f
db 0x9d, 0x8c, 0xbc, 0x53, 0xec, 0x55, 0x3a, 0xad, 0x85, 0xca, 0xd3, 0x8f, 0xa5, 0x0c, 0xb9, 0xa7
db 0x8e, 0x95, 0x5f, 0x9b, 0xeb, 0x5f, 0xad, 0x39, 0x3a, 0xb2, 0x36, 0x51, 0x0c, 0x5f, 0x1d, 0xe6
db 0xa5, 0x13, 0x67, 0x7f, 0xcb, 0x78, 0x85, 0xc2, 0x58, 0xb5, 0xb3, 0xeb, 0x3d, 0x15, 0x62, 0x97
db 0xd2, 0x99, 0x50, 0x30, 0x12, 0x49, 0x9e, 0x03, 0x54, 0x45, 0x8f, 0x89, 0x2f, 0x60, 0xb2, 0x8c
db 0x50, 0x25, 0x15, 0x08, 0x4f, 0xed, 0x09, 0x88, 0x50, 0x76, 0x3c, 0xf3, 0x4f, 0x20, 0x3b, 0xef
db 0xea, 0xfc, 0x07, 0x21, 0xb6, 0xc7, 0x8c, 0xfd, 0x54, 0xb1, 0xc3, 0xd6, 0xb0, 0xab, 0x7d, 0xfa
db 0x94, 0xb4, 0x64, 0x21, 0xc0, 0x3d, 0x78, 0x7f, 0x6d, 0xcb, 0xbb, 0x8d, 0xa7, 0x51, 0x99, 0x55
db 0x47, 0x9f, 0x97, 0x69, 0xda, 0x86, 0xa3, 0x41, 0xed, 0x2b, 0x32, 0x7c, 0xac, 0xf4, 0x8a, 0x40
db 0xec, 0xc1, 0x1b, 0xf7, 0x80, 0xb9, 0x64, 0x57, 0x89, 0x2c, 0xb2, 0x91, 0xa7, 0x55, 0xe7, 0xef
db 0xe5, 0x3c, 0x90, 0x90, 0xc0, 0xfd, 0x8a, 0x31, 0x6f, 0xcf, 0x37, 0x91, 0x41, 0x25, 0xb2, 0x89
db 0x23, 0xa5, 0x30, 0xb6, 0xb9, 0xcf, 0xb9, 0x10, 0x3d, 0xc8, 0xb8, 0x61, 0xc7, 0x02, 0x35, 0x26
db 0x0f, 0x74, 0xaf, 0xca, 0x3a, 0x55, 0x48, 0x55, 0x46, 0xe1, 0xce, 0x55, 0x06, 0x67, 0x8b, 0x73
db 0x32, 0xc1, 0xa2, 0xaf, 0x5a, 0xa1, 0x14, 0xfd, 0x4f, 0xc2, 0xe0, 0x90, 0xfe, 0x0a, 0xc3, 0x3d
db 0x25, 0x67, 0x4f, 0xf7, 0xd6, 0xe6, 0xfb, 0x0a, 0x80, 0x52, 0xb5, 0xd6, 0x58, 0x46, 0x34, 0xb8
db 0x95, 0x46, 0x3c, 0x22, 0xa5, 0x76, 0x90, 0xe6, 0x9e, 0xac, 0x13, 0x36, 0xf2, 0x70, 0xcc, 0x86
db 0x7d, 0x46, 0x76, 0x85, 0xb1, 0x6a, 0x83, 0xc3, 0x54, 0xba, 0xa2, 0x62, 0xca, 0x3c, 0x6a, 0xc1
db 0x74, 0x6a, 0xc4, 0x64, 0x66, 0x00, 0x78, 0x7e, 0xf5, 0x71, 0x55, 0x61, 0x6f, 0xae, 0xf3, 0x3d
db 0x7f, 0x2c, 0x1b, 0x4e, 0x8f, 0xa9, 0x92, 0x7b, 0xe1, 0x2c, 0x33, 0x4d, 0x4c, 0x05, 0x40, 0xe1
db 0x15, 0x6d, 0xd6, 0xde, 0xd8, 0x42, 0xa2, 0x45, 0x6a, 0x9c, 0x70, 0xcc, 0xf2, 0x00, 0xf4, 0xd0
db 0xaf, 0x95, 0xb1, 0x36, 0x63, 0x89, 0xfc, 0x92, 0x63, 0xdc, 0x08, 0xe3, 0xa0, 0x08, 0x60, 0xcb
db 0x3c, 0xa0, 0xc8, 0xbc, 0xfe, 0x06, 0x50, 0x96, 0x28, 0xf2, 0x8e, 0x30, 0x00, 0x08, 0xb9, 0xbe
db 0x05, 0xbd, 0xd5, 0x3b, 0xad, 0xf4, 0x10, 0x0f, 0xe1, 0xe1, 0x23, 0xbb, 0x2d, 0x41, 0x37, 0x5d
db 0x8d, 0xc3, 0xfd, 0xfb, 0x21, 0x31, 0xee, 0xa9, 0x2f, 0xd8, 0x55, 0xec, 0xd6, 0x92, 0x29, 0x4b
db 0x85, 0x2f, 0x64, 0x99, 0x05, 0x35, 0x66, 0xc2, 0x77, 0xb8, 0xe6, 0xe4, 0x40, 0x57, 0xf8, 0x5c
db 0xf0, 0x95, 0x7e, 0x67, 0x49, 0xc0, 0x20, 0x9c, 0xcc, 0x47, 0xff, 0xb0, 0xb4, 0xfa, 0x45, 0x7b
db 0xc0, 0xe3, 0x7f, 0xdd, 0x3e, 0xf0, 0x2c, 0x7f, 0x9d, 0xbb, 0x79, 0x9c, 0xb7, 0x08, 0xd4, 0x56
db 0xa8, 0xfe, 0xed, 0xd7, 0x4d, 0xd1, 0x24, 0x49, 0x5c, 0x6d, 0x93, 0x68, 0xc0, 0xe6, 0xb7, 0xbc
db 0xdb, 0x99, 0x2e, 0xf0, 0xc5, 0xe1, 0xfd, 0x1d, 0x5f, 0x4c, 0x36, 0xc7, 0xef, 0xf9, 0x50, 0x17
db 0x4b, 0xd3, 0xe7, 0x60, 0x46, 0xfd, 0xb8, 0x1c, 0x59, 0x90, 0x6a, 0x42, 0x9d, 0xe5, 0x1f, 0x89
db 0xa9, 0x8e, 0x94, 0x54, 0x02, 0xc6, 0x3a, 0xc6, 0xab, 0x8b, 0x32, 0xcb, 0xde, 0x46, 0x7a, 0x29
db 0xb2, 0x13, 0x17, 0x92, 0x8e, 0x85, 0x47, 0x25, 0x02, 0xd1, 0xf5, 0x08, 0xed, 0x95, 0x3d, 0x91
db 0x28, 0xf5, 0x8a, 0xa5, 0x65, 0xb1, 0xf1, 0x3c, 0x0e, 0xb8, 0x06, 0x7b, 0x20, 0xb2, 0x2f, 0x03
db 0x68, 0xea, 0x5a, 0x86, 0x95, 0xf8, 0x82, 0xdb, 0x48, 0xfe, 0x2c, 0x66, 0x0e, 0xe6, 0x54, 0x14
db 0xa9, 0x13, 0xd8, 0xf8, 0x5a, 0xa0, 0x07, 0x0f, 0x13, 0xc0, 0xac, 0xa7, 0x99, 0x32, 0xc2, 0x46
db 0x9c, 0x0c, 0x22, 0x08, 0x04, 0x6e, 0x43, 0x3a, 0x56, 0x6f, 0x8c, 0xd1, 0x3e, 0x77, 0x62, 0x67
db 0x4b, 0x20, 0x52, 0xcf, 0xa4, 0xcb, 0x59, 0xd5, 0xef, 0xbd, 0x70, 0x6b, 0x1f, 0x23, 0xc4, 0x57
db 0x6f, 0x2b, 0xe1, 0xc2, 0xfd, 0xd2, 0x51, 0xac, 0x08, 0x19, 0xa2, 0x54, 0x11, 0x4d, 0xc8, 0x11
db 0x96, 0x25, 0x89, 0x09, 0x29, 0xba, 0x34, 0x17, 0x5e, 0xe1, 0x8a, 0xfe, 0x40, 0x54, 0x5b, 0x22
db 0xf6, 0x91, 0x18, 0xab, 0xfa, 0x73, 0xa5, 0x3c, 0xdb, 0x93, 0xa5, 0xcb, 0x40, 0x49, 0xe5, 0x62
db 0xb7, 0xae, 0xd6, 0x8b, 0x60, 0xd7, 0xee, 0xb9, 0xbc, 0x77, 0xc8, 0x77, 0x1e, 0x51, 0x73, 0xd1
db 0xae, 0x05, 0x8a, 0x85, 0x9c, 0xba, 0xd9, 0x6e, 0xcb, 0xf0, 0x15, 0x56, 0xb9, 0x3c, 0x5d, 0xa3
db 0xde, 0x01, 0x9a, 0x9b, 0x03, 0xb4, 0xed, 0x78, 0xf4, 0xd5, 0xb7, 0xc7, 0x1b, 0x77, 0xfc, 0xea
db 0x45, 0x9a, 0x89, 0xbe, 0xd0, 0x8e, 0xfc, 0x9d, 0x23, 0x7b, 0xac, 0x6f, 0x7a, 0x6d, 0xfa, 0x7e
db 0xbb, 0x00, 0xc2, 0xef, 0xd1, 0x9e, 0x79, 0xf4, 0x6d, 0x85, 0x61, 0x4c, 0xe4, 0x11, 0xde, 0x80
db 0x72, 0x30, 0xfc, 0x01, 0xa8, 0x60, 0xcb, 0xe4, 0x22, 0x7a, 0x9b, 0xa5, 0x28, 0x3d, 0x57, 0x4b
db 0x51, 0x0c, 0xa7, 0x72, 0x6f, 0x6f, 0x4f, 0x57, 0x6c, 0xb2, 0x36, 0xc1, 0x11, 0xba, 0xa8, 0x59
db 0xc4, 0x2d, 0x5f, 0x4e, 0x30, 0x0c, 0x5d, 0xe1, 0xea, 0xb2, 0x0b, 0xf3, 0xe3, 0x76, 0x21, 0x90
db 0xc4, 0x7c, 0x4a, 0x07, 0xb1, 0x5a, 0x98, 0x2f, 0x1e, 0x81, 0x2c, 0xe8, 0xae, 0x62, 0x65, 0xc6
db 0xbb, 0x4a, 0xeb, 0xfa, 0xa1, 0x84, 0x93, 0xcb, 0x11, 0x75, 0xf9, 0x1b, 0x14, 0x1f, 0x60, 0xfc
db 0x9e, 0x23, 0xcb, 0x87, 0xfc, 0xe4, 0x1e, 0x42, 0xb6, 0x4b, 0x9b, 0x7d, 0xdb, 0xfb, 0x37, 0xf3
db 0x8f, 0xc8, 0x64, 0x2c, 0xad, 0xa4, 0xd8, 0x37, 0xfd, 0xb3, 0x9c, 0xd9, 0x78, 0x20, 0xfe, 0x80
db 0x9a, 0x80, 0x5d, 0x43, 0x7c, 0x97, 0xbe, 0x52, 0xd9, 0x2f, 0xad, 0x3c, 0xe2, 0x93, 0xc7, 0x7b
db 0x5b, 0x2b, 0x2e, 0xb0, 0xb1, 0x42, 0x74, 0x62, 0xdd, 0xe9, 0x27, 0xa9, 0x56, 0x7b, 0xf7, 0x9c
db 0x28, 0x7b, 0x9f, 0x31, 0xcb, 0x05, 0x80, 0x35, 0xe2, 0x38, 0x8d, 0x30, 0x13, 0x7f, 0x9b, 0xfc
db 0x82, 0x41, 0xc8, 0x76, 0xa1, 0x49, 0xf5, 0x49, 0x83, 0x72, 0xce, 0x23, 0x9e, 0xe4, 0xf1, 0xc9
db 0x38, 0x56, 0xd6, 0xf1, 0xcd, 0xd3, 0xf0, 0xb6, 0xce, 0xfa, 0x6d, 0xa7, 0xa6, 0x19, 0xc1, 0xf7
db 0x37, 0x29, 0x3c, 0xa9, 0xea, 0x90, 0x50, 0xb3, 0xde, 0xc0, 0x2a, 0x58, 0x21, 0xa9, 0xcd, 0x46
db 0xc2, 0x33, 0xf7, 0x93, 0x54, 0xba, 0xe4, 0x5c, 0x0f, 0x9c, 0x04, 0x68, 0x96, 0xbc, 0x36, 0xaa
db 0x14, 0xbe, 0x39, 0x5a, 0xff, 0x69, 0xe6, 0x0d, 0xaf, 0x61, 0x68, 0x06, 0xee, 0x47, 0x7f, 0xfd
db 0x56, 0x67, 0xce, 0xc9, 0x44, 0x96, 0xac, 0x4f, 0x36, 0x7f, 0x7c, 0xef, 0xfc, 0xe4, 0x35, 0x8b
db 0x6e, 0xb9, 0x92, 0x6e, 0xe9, 0xc7, 0x60, 0xac, 0x1e, 0x19, 0x78, 0xd8, 0x3d, 0xc7, 0x39, 0x1b
db 0xa6, 0x1f, 0x9e, 0x34, 0x91, 0x18, 0x9b, 0x89, 0x3e, 0x5e, 0xf4, 0x95, 0xc1, 0x26, 0x9e, 0x02
db 0xb6, 0x0a, 0x2f, 0x26, 0xb9, 0x66, 0xfb, 0xb4, 0x24, 0x24, 0x78, 0x5c, 0x6c, 0x55, 0x14, 0xae
db 0xf5, 0xe6, 0xce, 0xaf, 0xcf, 0x6a, 0x87, 0x6c, 0x31, 0x62, 0x66, 0xdd, 0x8f, 0x9e, 0x98, 0x17
db 0x8c, 0x19, 0x3f, 0xef, 0xb7, 0x92, 0x24, 0x8b, 0x18, 0x8f, 0xbe, 0x27, 0x97, 0xb8, 0x64, 0x87
db 0xb5, 0x99, 0x81, 0x47, 0xb0, 0x7a, 0x75, 0x4c, 0x42, 0x05, 0xc8, 0x8c, 0xd3, 0x6a, 0x41, 0x4b
db 0xdb, 0x8b, 0xdc, 0xa3, 0x3c, 0x1f, 0xff, 0x0f, 0xa8, 0xf5, 0xcc, 0x50, 0x15, 0xf1, 0x72, 0x4b
db 0xb8, 0x52, 0x34, 0x67, 0x77, 0x5f, 0x86, 0xe8, 0x9a, 0x2e, 0xe4, 0xec, 0x60, 0x16, 0x83, 0xc7
db 0x5d, 0x15, 0x3b, 0xe8, 0xee, 0x65, 0xd4, 0x5c, 0x43, 0x3e, 0x01, 0x52, 0xd0, 0xf2, 0xc8, 0xd5
db 0x97, 0x6e, 0xfe, 0x08, 0x2d, 0x19, 0x05, 0x28, 0xc8, 0x58, 0xdf, 0x94, 0x03, 0xf4, 0x57, 0xcf
db 0xe1, 0xf7, 0x72, 0x93, 0x38, 0xee, 0x23, 0xc8, 0x88, 0xb5, 0x34, 0xd4, 0xa0, 0x12, 0x42, 0xcf
db 0xb6, 0x10, 0xb3, 0x16, 0x66, 0x79, 0x2c, 0x5a, 0x95, 0xa8, 0x21, 0x72, 0xdd, 0x8e, 0xb0, 0x0b
db 0xc0, 0xe0, 0x62, 0x22, 0xa8, 0xcc, 0x67, 0xc8, 0x98, 0xbf, 0x74, 0xc4, 0x76, 0x5e, 0x60, 0xd3
db 0x36, 0xa9, 0x00, 0x95, 0xdd, 0x21, 0x3a, 0xb2, 0xee, 0x5a, 0x6c, 0x9e, 0xb7, 0x0e, 0x32, 0x23
db 0x6d, 0xec, 0xaf, 0x42, 0x41, 0x5d, 0x68, 0x14, 0x58, 0x80, 0xca, 0x2d, 0xb9, 0xf3, 0xb8, 0xdb
db 0x96, 0xb8, 0xed, 0xba, 0xdc, 0x26, 0xb0, 0x5d, 0xe9, 0x9a, 0x80, 0xdc, 0x0e, 0xb3, 0x74, 0x49
db 0x77, 0x54, 0xb6, 0xb4, 0x6a, 0x8c, 0x8f, 0x74, 0x86, 0x33, 0xcd, 0xcb, 0x94, 0x2d, 0x1e, 0xd4
db 0x1e, 0x8c, 0x6f, 0x85, 0x12, 0xa3, 0xa0, 0x6f, 0x29, 0x28, 0x5d, 0x6f, 0x6a, 0x07, 0xa8, 0xf4
db 0x92, 0x4f, 0xc3, 0xf5, 0x05, 0x28, 0x0e, 0xe9, 0x8a, 0x60, 0x9c, 0xd2, 0x60, 0x04, 0x4f, 0xae
db 0xef, 0x46, 0x2d, 0x06, 0x8d, 0x9b, 0x93, 0xef, 0x06, 0x32, 0xa2, 0xf3, 0xba, 0xd7, 0x96, 0xbc
db 0x84, 0xd5, 0x4b, 0x6e, 0x06, 0xb0, 0xb6, 0xcf, 0x13, 0x88, 0xd8, 0x52, 0xf7, 0x3f, 0xc4, 0x0e
db 0x67, 0x2c, 0x91, 0x4c, 0x26, 0x46, 0xfc, 0x8e, 0xf0, 0x69, 0x18, 0xee, 0xcd, 0x80, 0x17, 0x11
db 0xbc, 0x47, 0x55, 0x34, 0x74, 0x03, 0xc9, 0x14, 0xee, 0xdd, 0x95, 0x03, 0xf7, 0xd2, 0x81, 0xb3
db 0x68, 0xc6, 0xbd, 0x77, 0x9a, 0x7e, 0xc1, 0xcb, 0xf2, 0xb0, 0xe6, 0x78, 0xce, 0x83, 0xa1, 0xd7
db 0x6f, 0x87, 0x8d, 0xcf, 0xbc, 0x6b, 0x33, 0xb4, 0xe4, 0xc3, 0xc3, 0x48, 0xdf, 0x5d, 0xf7, 0xfe
db 0xc5, 0xb3, 0x49, 0x42, 0x0e, 0x2d, 0xd1, 0x4c, 0xf6, 0x37, 0xe7, 0x18, 0xd9, 0xa2, 0xc0, 0xaf
db 0x60, 0x0b, 0xf1, 0xca, 0x54, 0x5f, 0xcc, 0x64, 0x1b, 0x09, 0xda, 0xd2, 0x50, 0xec, 0x38, 0xa0
db 0x05, 0x8e, 0x0d, 0x76, 0xfd, 0x5b, 0x72, 0x3e, 0xac, 0x9a, 0xc7, 0xdb, 0xaf, 0x50, 0x9b, 0xfb
db 0x69, 0xb9, 0xdd, 0x48, 0xaa, 0xa6, 0x0c, 0x49, 0xff, 0x6f, 0x3b, 0xc3, 0x03, 0x13, 0x22, 0x26
db 0xf7, 0xd2, 0xb1, 0x13, 0x52, 0x49, 0xaa, 0x76, 0xe3, 0x48, 0xc0, 0x61, 0xea, 0xb3, 0xc4, 0x5b
db 0xc0, 0x88, 0x55, 0xb9, 0x71, 0xad, 0xb3, 0xc6, 0x99, 0xc3, 0x12, 0x51, 0x72, 0x3e, 0xd1, 0x92
db 0x40, 0x9a, 0xfe, 0xc7, 0x0f, 0xd9, 0x4f, 0x0f, 0x64, 0x91, 0xba, 0xfc, 0xc1, 0x62, 0xbf, 0xa8
db 0x45, 0x63, 0xae, 0xf6, 0x25, 0xa4, 0xf3, 0x50, 0x61, 0x86, 0xc4, 0xc3, 0x22, 0xf8, 0xd5, 0x37
db 0x70, 0x12, 0x9c, 0xba, 0xc0, 0xad, 0xa6, 0xa0, 0xc9, 0xfa, 0xd4, 0xe4, 0x57, 0x01, 0xa1, 0x0b
db 0x07, 0xab, 0xee, 0xe5, 0x35, 0x4c, 0x76, 0x83, 0x49, 0xfd, 0x11, 0x34, 0xed, 0xd8, 0xf7, 0x2c

align 32
; 8192-bytes of random data
.random_data3:
db 0x18, 0xd6, 0xfe, 0x97, 0x75, 0x8c, 0x1a, 0x61, 0xc5, 0xc0, 0x7e, 0x3f, 0x46, 0xf9, 0xb0, 0xa3
db 0x16, 0xb2, 0x1c, 0x7b, 0x04, 0xa7, 0x7e, 0xbe, 0x04, 0xdc, 0xc6, 0x65, 0x43, 0x5e, 0xa4, 0x8e
db 0x40, 0xc5, 0x4f, 0x2f, 0xf2, 0x62, 0x6d, 0x1b, 0x41, 0xb6, 0x9c, 0xa4, 0xbf, 0x1a, 0x57, 0x1e
db 0x83, 0x53, 0x8a, 0x92, 0xbc, 0x67, 0xe8, 0xe8, 0xc3, 0xb9, 0xbc, 0xab, 0x38, 0xa0, 0x8e, 0x5d
db 0xcc, 0x2d, 0xa9, 0xd8, 0x74, 0xee, 0x8f, 0x57, 0x62, 0x9b, 0x0f, 0xf8, 0x76, 0x2a, 0x4d, 0x22
db 0x2f, 0xaf, 0x0b, 0x13, 0xbf, 0xa5, 0x87, 0xc1, 0x0f, 0xfd, 0x0b, 0x69, 0xbc, 0x58, 0x06, 0xa3
db 0xc4, 0x4f, 0x33, 0xf7, 0x53, 0xc8, 0xe6, 0x6b, 0xa5, 0x96, 0x33, 0x37, 0x89, 0xc1, 0x33, 0x4e
db 0xfb, 0x54, 0xbb, 0xd1, 0xcf, 0xa7, 0xe4, 0x77, 0x72, 0x3f, 0x68, 0xd1, 0x7b, 0xc7, 0x4f, 0x99
db 0xd8, 0xdd, 0xf3, 0x85, 0x10, 0x88, 0x0c, 0x1a, 0x80, 0x86, 0xd9, 0xce, 0x9d, 0x88, 0xc7, 0x2e
db 0x2b, 0xcb, 0x34, 0x17, 0xd6, 0x85, 0x1b, 0xa3, 0x8e, 0xc2, 0xbb, 0x74, 0x2c, 0xf2, 0x61, 0x09
db 0xaa, 0x7b, 0x1e, 0x5c, 0x15, 0xb6, 0x47, 0x08, 0xbb, 0x5d, 0x5b, 0x1b, 0x4c, 0xb9, 0xd1, 0x9c
db 0x49, 0xc3, 0x57, 0x93, 0x84, 0x43, 0x97, 0x65, 0x97, 0x5d, 0xb8, 0x4f, 0xe5, 0x69, 0x7e, 0x6e
db 0xc4, 0xee, 0xd3, 0x62, 0xcc, 0xf7, 0xd1, 0xd7, 0x88, 0xfe, 0x9b, 0xaa, 0x31, 0x10, 0x6c, 0x9b
db 0x37, 0x4a, 0x8e, 0x01, 0xbb, 0xe1, 0x02, 0xc0, 0x9a, 0xa4, 0x45, 0x7c, 0xb4, 0xc0, 0x5e, 0xda
db 0xf2, 0x15, 0x3b, 0xe5, 0x95, 0x65, 0xe3, 0xf2, 0xb0, 0x84, 0x6b, 0xb8, 0xf9, 0x11, 0xdd, 0xd4
db 0xed, 0x1d, 0xbf, 0xbd, 0xb9, 0x98, 0xe8, 0xab, 0x08, 0x21, 0xe1, 0x76, 0xcd, 0x31, 0x59, 0x35
db 0x16, 0x95, 0x15, 0xb9, 0x00, 0x2c, 0xb1, 0xf9, 0x7b, 0x4d, 0xaf, 0x80, 0x92, 0xa9, 0x31, 0x91
db 0xfe, 0xaa, 0x8e, 0xe4, 0x45, 0x28, 0x48, 0x40, 0x5c, 0xf7, 0xa9, 0x3f, 0x5a, 0x87, 0x51, 0x30
db 0x7b, 0x55, 0xfa, 0x8c, 0xec, 0xcc, 0x32, 0xd5, 0x8c, 0x5b, 0xa7, 0x1c, 0xc2, 0xee, 0x5f, 0xdb
db 0x3a, 0x5c, 0xdb, 0x3d, 0x8f, 0x17, 0x0c, 0xae, 0x70, 0x35, 0x3a, 0xdd, 0x07, 0xa1, 0x21, 0x53
db 0xa6, 0x4a, 0xa3, 0xd7, 0x65, 0x3f, 0x32, 0xcb, 0x48, 0x4e, 0x2e, 0x12, 0x47, 0x9e, 0x59, 0x8e
db 0xa6, 0x85, 0x04, 0x06, 0x60, 0xcc, 0xc3, 0x54, 0x91, 0x64, 0x14, 0x05, 0xad, 0xe8, 0x2d, 0x77
db 0x5b, 0x5d, 0xca, 0x6b, 0x8c, 0x3a, 0x89, 0x71, 0x30, 0xcd, 0xa0, 0x8e, 0x79, 0xf8, 0xa3, 0xdb
db 0x5c, 0x7b, 0x52, 0xcb, 0x6a, 0xb1, 0x32, 0x31, 0xbe, 0x1f, 0x1a, 0xb8, 0xb8, 0x5f, 0xc4, 0x12
db 0x0c, 0xd6, 0x9e, 0x0c, 0xef, 0xca, 0x5e, 0x71, 0x57, 0x9e, 0x70, 0x91, 0x38, 0x43, 0x5b, 0xd7
db 0x18, 0x83, 0xe2, 0x68, 0x38, 0x29, 0xd7, 0x55, 0x8b, 0x61, 0xfd, 0x6a, 0x81, 0xbf, 0x7c, 0xf1
db 0xc4, 0xcb, 0x4a, 0x45, 0xe5, 0x7f, 0xfe, 0x02, 0x09, 0x9d, 0x6e, 0xbe, 0x45, 0xc3, 0x2a, 0xf3
db 0xe4, 0x64, 0xcc, 0xe8, 0x70, 0x34, 0x96, 0x73, 0x63, 0xad, 0x3f, 0x02, 0x4b, 0xfd, 0xc4, 0x4f
db 0x40, 0x00, 0x29, 0x45, 0x50, 0x54, 0xb2, 0x9b, 0xe5, 0xa5, 0x88, 0xf2, 0xa0, 0xe1, 0x17, 0xe7
db 0xe6, 0xea, 0x20, 0x5b, 0x03, 0xa1, 0xdc, 0x1a, 0x73, 0x26, 0x90, 0x0f, 0x3e, 0x00, 0x05, 0x21
db 0x62, 0x3e, 0x9c, 0xe9, 0xb7, 0xc2, 0x63, 0x22, 0xc4, 0xd1, 0x13, 0x45, 0x43, 0x02, 0x99, 0x76
db 0x72, 0x4a, 0x86, 0xf9, 0xd3, 0x88, 0x96, 0xbd, 0xf1, 0xba, 0xd0, 0xa6, 0x35, 0x9c, 0x8e, 0xa1
db 0x46, 0x52, 0xd3, 0x64, 0xa7, 0x48, 0xba, 0xab, 0x2c, 0x45, 0xb1, 0x38, 0x95, 0xf0, 0xe9, 0xde
db 0x1e, 0x51, 0x16, 0x58, 0xa8, 0x27, 0x93, 0x6d, 0x26, 0x57, 0xb5, 0x26, 0xae, 0xd9, 0x74, 0xd7
db 0x00, 0x02, 0xd9, 0x91, 0xc2, 0xe1, 0xdb, 0xf4, 0x3f, 0xaa, 0x4e, 0x59, 0x35, 0xf1, 0x9b, 0xf9
db 0x13, 0x30, 0xb5, 0xc5, 0x7c, 0x4c, 0x8f, 0x00, 0x28, 0x5e, 0xc1, 0x52, 0xd8, 0x19, 0x0c, 0x0e
db 0x18, 0x4c, 0x92, 0x74, 0x6e, 0xae, 0xae, 0x42, 0x35, 0xfb, 0xe5, 0xc0, 0xf9, 0x08, 0xe2, 0x41
db 0xe6, 0x00, 0x90, 0x83, 0x73, 0xaa, 0x62, 0x80, 0x68, 0x3f, 0x53, 0x46, 0x74, 0x36, 0x43, 0x4c
db 0xd4, 0x62, 0x40, 0xcc, 0x46, 0x2d, 0x67, 0xa5, 0x06, 0x39, 0x7b, 0xaa, 0x64, 0xcd, 0xf4, 0x2a
db 0xf7, 0xd2, 0x0d, 0xcc, 0xb9, 0x7d, 0xb6, 0x73, 0x30, 0xe2, 0x3e, 0x92, 0xbe, 0x09, 0xf5, 0x41
db 0x93, 0xe4, 0x99, 0x96, 0x05, 0xb1, 0x74, 0xeb, 0x35, 0xcb, 0xd4, 0xac, 0xa9, 0x49, 0x34, 0x09
db 0x24, 0x7e, 0xea, 0xad, 0xcf, 0x14, 0xdd, 0xea, 0xe1, 0xf8, 0x77, 0x0d, 0x97, 0x6f, 0xfd, 0x49
db 0x8c, 0x3b, 0xec, 0x5e, 0xbc, 0x3f, 0xbd, 0xdb, 0xaf, 0xff, 0x31, 0xdd, 0xeb, 0xe7, 0xe7, 0x38
db 0x59, 0x2e, 0x1f, 0xf8, 0x00, 0x7f, 0xa6, 0x9c, 0x3e, 0x17, 0x35, 0x3a, 0xbe, 0x7d, 0xe5, 0x18
db 0x06, 0xc6, 0x9e, 0x08, 0xf9, 0x4b, 0xba, 0xde, 0xa2, 0x82, 0x76, 0x26, 0xb1, 0xe6, 0xdc, 0xcd
db 0xd8, 0x15, 0x21, 0x5a, 0x00, 0x05, 0x40, 0x30, 0x47, 0xc5, 0x17, 0x83, 0xa1, 0x36, 0xcf, 0x6d
db 0x7a, 0xe3, 0x52, 0xc3, 0x0b, 0x8b, 0x78, 0x6c, 0x68, 0xd9, 0xa7, 0x52, 0xdc, 0x09, 0x43, 0x00
db 0x5c, 0x2b, 0x63, 0x6b, 0x76, 0x12, 0x69, 0x9f, 0x2b, 0x71, 0x4f, 0x78, 0x64, 0x24, 0x34, 0x4c
db 0x5b, 0x79, 0xac, 0xcc, 0x83, 0xf5, 0x05, 0x86, 0x8b, 0xcc, 0xa9, 0xf0, 0x1c, 0x67, 0xf6, 0x4c
db 0xda, 0x7f, 0xaf, 0x11, 0xd4, 0x6f, 0x37, 0x5e, 0xf3, 0x90, 0x62, 0xfe, 0xb2, 0x2c, 0x42, 0xef
db 0x92, 0x86, 0x58, 0x26, 0x80, 0xdb, 0x87, 0xe3, 0x18, 0xb2, 0x3b, 0x9b, 0xf2, 0x27, 0x00, 0x0a
db 0x87, 0x29, 0x2c, 0xe4, 0xc0, 0xba, 0x50, 0x6e, 0x1a, 0xd1, 0x27, 0xb2, 0xd0, 0xcf, 0x3d, 0x2d
db 0x3e, 0x74, 0xce, 0x0f, 0x1c, 0x13, 0x16, 0xc8, 0x7d, 0xc6, 0xcf, 0x23, 0x9d, 0xde, 0xca, 0x99
db 0x8d, 0x5d, 0x1c, 0xe7, 0x49, 0xbf, 0x30, 0x9c, 0xd7, 0xda, 0x78, 0xfe, 0x92, 0x4d, 0x56, 0x5f
db 0xc6, 0xd7, 0xdd, 0xa2, 0x94, 0x6b, 0xc1, 0x71, 0xd1, 0xb7, 0x3c, 0x7c, 0xb9, 0x05, 0xa8, 0x58
db 0x6e, 0x13, 0x3c, 0x12, 0xe2, 0xeb, 0x1f, 0xce, 0x41, 0x96, 0xda, 0xf8, 0xbe, 0x40, 0x2b, 0xaf
db 0x56, 0x9f, 0xae, 0x34, 0x1c, 0x69, 0xcd, 0x7b, 0x7c, 0x90, 0x36, 0x43, 0x00, 0xa1, 0x57, 0xc6
db 0xc6, 0x99, 0xc2, 0x44, 0x73, 0xe5, 0x65, 0x30, 0x3d, 0xfe, 0x7b, 0xf3, 0x70, 0xc5, 0x3c, 0x47
db 0xce, 0xbc, 0xe9, 0xa7, 0x99, 0xac, 0x14, 0x0a, 0x76, 0x92, 0xa8, 0xb2, 0x59, 0xbb, 0x59, 0xb6
db 0x2b, 0x6c, 0x30, 0x88, 0xca, 0x71, 0x45, 0xe3, 0x15, 0xaf, 0x85, 0xd3, 0xe7, 0x9c, 0xf1, 0xb2
db 0xb6, 0xe1, 0xdb, 0x70, 0x80, 0xe5, 0x36, 0xd9, 0xfe, 0x93, 0x58, 0x22, 0x45, 0x9b, 0x27, 0x98
db 0xbd, 0x6a, 0x8e, 0x6f, 0xfd, 0xa7, 0xb4, 0x7b, 0xa4, 0xb3, 0xc5, 0x64, 0x2c, 0x74, 0x27, 0x16
db 0x95, 0x6e, 0x1f, 0x9e, 0x33, 0x00, 0x09, 0x42, 0x70, 0xcd, 0x43, 0xf4, 0x8e, 0x93, 0xb5, 0xe1
db 0x89, 0xfd, 0x4c, 0x6f, 0xce, 0x0c, 0xde, 0xe7, 0xc7, 0xa4, 0x13, 0x41, 0xe4, 0x46, 0xf8, 0x68
db 0xc4, 0xce, 0x09, 0xd3, 0x28, 0xd9, 0xaf, 0x5e, 0x5b, 0x2f, 0xe6, 0xcb, 0xd1, 0xcc, 0x5d, 0x3c
db 0x4b, 0x01, 0x94, 0x0d, 0xbb, 0x7d, 0x64, 0x80, 0xcd, 0x8f, 0xea, 0xc5, 0x41, 0x7b, 0xc5, 0x40
db 0x92, 0xd0, 0xcb, 0xad, 0x66, 0x2c, 0x21, 0x2d, 0x95, 0xb7, 0x14, 0x4f, 0x21, 0xf4, 0xaa, 0xfe
db 0x60, 0xd4, 0x1f, 0x32, 0x37, 0xcc, 0xc5, 0x38, 0x85, 0x77, 0x29, 0xfd, 0x1c, 0x10, 0x9d, 0x84
db 0x19, 0xe4, 0xed, 0x64, 0xdc, 0xd5, 0xd4, 0x8f, 0x30, 0xf4, 0xbe, 0xc6, 0xfc, 0xfe, 0x60, 0x5c
db 0xba, 0x6a, 0x5f, 0x18, 0x4a, 0xa0, 0xec, 0xa8, 0xec, 0x80, 0xe2, 0x76, 0x1b, 0x61, 0x96, 0xfb
db 0xef, 0x62, 0xe3, 0x60, 0xf5, 0x87, 0xdb, 0x4d, 0x8f, 0x4e, 0xd2, 0xf9, 0x0b, 0x81, 0x03, 0x8b
db 0x56, 0xdc, 0x02, 0xe5, 0x7b, 0xa7, 0xba, 0x8a, 0xac, 0x6c, 0xd3, 0xd0, 0x49, 0xea, 0xc0, 0xdb
db 0x23, 0xba, 0xa6, 0x9f, 0x9c, 0x10, 0x8c, 0x82, 0x07, 0x12, 0x14, 0xf1, 0x6b, 0x8d, 0xe5, 0x71
db 0x9e, 0x58, 0x95, 0xa3, 0x1e, 0xe8, 0x34, 0x9a, 0x7d, 0x7a, 0xf4, 0x10, 0xd5, 0xb2, 0x23, 0xbe
db 0x28, 0x60, 0x07, 0xd0, 0xce, 0x65, 0x3a, 0x79, 0x66, 0x40, 0xc8, 0xb2, 0xb9, 0x0b, 0x16, 0x63
db 0x43, 0x02, 0x64, 0x27, 0xda, 0x87, 0x4f, 0xbe, 0x45, 0xe4, 0xf5, 0x51, 0xfa, 0xc6, 0x59, 0x9d
db 0x11, 0xfd, 0x72, 0xf1, 0x3f, 0x86, 0x91, 0x8f, 0xbe, 0x6a, 0x9a, 0x2b, 0x09, 0xe4, 0x67, 0xec
db 0x29, 0x43, 0x2c, 0xac, 0x34, 0xe2, 0xdc, 0x90, 0x11, 0x80, 0x13, 0x1c, 0xeb, 0x7b, 0xd6, 0xd9
db 0x7a, 0xd2, 0x57, 0xdf, 0xf2, 0x95, 0x47, 0x21, 0xb2, 0xd7, 0x48, 0x89, 0x9c, 0x91, 0xe6, 0x8d
db 0xd5, 0xd6, 0xc0, 0xf1, 0x65, 0xad, 0x40, 0x09, 0x9b, 0x3b, 0xc8, 0x5d, 0xe0, 0xd7, 0x23, 0x5c
db 0xbf, 0x13, 0x84, 0xe4, 0x91, 0xa7, 0xd6, 0x14, 0x96, 0x58, 0xa1, 0x9d, 0xef, 0xcf, 0xf6, 0x03
db 0x5b, 0x38, 0x9d, 0x45, 0xb1, 0x93, 0x86, 0x11, 0x67, 0x82, 0xb3, 0x26, 0x21, 0xb0, 0xb7, 0xc1
db 0x85, 0x63, 0xe5, 0x4a, 0x40, 0x3f, 0x93, 0x8e, 0xe5, 0xb5, 0x62, 0xf7, 0x33, 0x10, 0xb9, 0x4e
db 0x65, 0x05, 0x88, 0x4f, 0x95, 0x7d, 0x45, 0xdc, 0x8c, 0xc5, 0x3f, 0x6a, 0xf4, 0x01, 0x46, 0x68
db 0x0d, 0xd4, 0x89, 0x9e, 0xc1, 0xf5, 0x81, 0x06, 0x7a, 0x55, 0x7b, 0x52, 0xdd, 0x1f, 0x67, 0xf6
db 0x36, 0xed, 0xf1, 0xc5, 0xde, 0x3a, 0xb4, 0x66, 0x9a, 0x15, 0x9f, 0xa1, 0xf5, 0x8a, 0xc0, 0x44
db 0xe1, 0xc2, 0x0a, 0x6b, 0x17, 0xe2, 0xf8, 0x1b, 0x2c, 0x57, 0x30, 0xf1, 0xc0, 0xdd, 0xad, 0x98
db 0x7c, 0x99, 0x25, 0x50, 0x99, 0x43, 0xc7, 0xec, 0x55, 0xac, 0x05, 0xa4, 0x5d, 0xb9, 0x91, 0x96
db 0x43, 0x9b, 0x6b, 0x03, 0xb5, 0x09, 0xf8, 0xaa, 0xf7, 0x7f, 0xc7, 0xec, 0x9c, 0xfd, 0x6a, 0xdf
db 0x30, 0xd1, 0x38, 0xf1, 0x5b, 0x6b, 0xd0, 0x19, 0x8e, 0xba, 0x49, 0x74, 0xd3, 0x10, 0x87, 0x6c
db 0x9c, 0x9f, 0xfd, 0x46, 0xab, 0x84, 0x96, 0x12, 0x5c, 0xa1, 0xc2, 0x1a, 0x33, 0xc0, 0x23, 0xbd
db 0x01, 0xdf, 0xd4, 0x85, 0x69, 0x62, 0xbc, 0x3f, 0x2c, 0xe5, 0xfc, 0x66, 0x14, 0x13, 0xd2, 0xd5
db 0x12, 0x96, 0xdc, 0x3b, 0x14, 0x79, 0x96, 0x9f, 0x07, 0x5a, 0xc1, 0x65, 0xe1, 0xc8, 0x57, 0x7d
db 0x35, 0xa2, 0xcc, 0xaa, 0x94, 0xda, 0xb0, 0x4d, 0xe2, 0x5e, 0x4c, 0x39, 0x0d, 0x8e, 0xb6, 0x2f
db 0xe6, 0xcd, 0x58, 0xb2, 0x86, 0xf4, 0x1f, 0xf6, 0xb0, 0xd8, 0xf4, 0x42, 0x31, 0x4b, 0x6a, 0x5e
db 0x2d, 0x2f, 0xdb, 0x9c, 0xc5, 0x5a, 0x45, 0xd1, 0x17, 0x1a, 0xc7, 0xc5, 0xa0, 0x51, 0xbc, 0xe6
db 0xa9, 0xab, 0xc6, 0x3b, 0x3b, 0xf6, 0xce, 0xb5, 0x3b, 0xdc, 0x95, 0x10, 0x20, 0x1d, 0x1e, 0x64
db 0xa7, 0xe4, 0x1e, 0x86, 0x7a, 0xb1, 0x90, 0x12, 0x1d, 0x21, 0x43, 0x87, 0xbf, 0x19, 0x22, 0x02
db 0x3a, 0xbf, 0xa6, 0x0d, 0x91, 0x2f, 0x66, 0xc9, 0xd5, 0x87, 0xfc, 0xe7, 0xb6, 0x56, 0xf8, 0xc3
db 0x9e, 0x5e, 0x2f, 0xfd, 0xae, 0x3f, 0x01, 0x3b, 0x98, 0x7d, 0x6a, 0x3c, 0xd8, 0xb1, 0xf2, 0xec
db 0xa6, 0x54, 0x2a, 0xac, 0xc5, 0xdd, 0x4c, 0x8b, 0x4f, 0x7a, 0x95, 0x00, 0xe0, 0x9a, 0x35, 0x10
db 0xa8, 0xf5, 0xc7, 0xf6, 0x2c, 0xf2, 0x16, 0x2e, 0xa8, 0xb6, 0xad, 0x01, 0xba, 0x14, 0x63, 0xdd
db 0x5c, 0x02, 0xd8, 0xb3, 0x3d, 0x2c, 0x5b, 0x51, 0xd2, 0xb3, 0x20, 0x14, 0xbd, 0x6e, 0xee, 0x23
db 0x33, 0xd8, 0xd1, 0x8f, 0x02, 0x5b, 0xf4, 0xd0, 0x03, 0x55, 0x81, 0x50, 0x71, 0x47, 0xaa, 0x55
db 0x9c, 0x02, 0xb4, 0x07, 0x76, 0xb9, 0x3e, 0x6b, 0x59, 0x7c, 0xee, 0x46, 0xb4, 0x17, 0xe5, 0x01
db 0xf3, 0x0a, 0xdc, 0x74, 0xbd, 0x65, 0xee, 0x37, 0x10, 0x58, 0xbf, 0xa4, 0xd2, 0x7e, 0xb5, 0x6c
db 0x1e, 0xcd, 0xb8, 0x02, 0x65, 0x27, 0x72, 0x05, 0xe0, 0x40, 0x29, 0x61, 0xfc, 0x46, 0x13, 0x81
db 0x1d, 0xdf, 0x9a, 0x47, 0x20, 0x2a, 0x52, 0x1e, 0x30, 0x77, 0x74, 0x45, 0x30, 0x63, 0xb4, 0xb9
db 0x14, 0xb0, 0x2c, 0x0f, 0xea, 0x93, 0xda, 0xe1, 0xd7, 0xda, 0xf4, 0xb6, 0x71, 0x91, 0x53, 0x36
db 0xef, 0x57, 0xf7, 0xbc, 0xa4, 0x39, 0x3f, 0x42, 0x79, 0xb5, 0xb1, 0xc4, 0x53, 0x2c, 0xfc, 0xe1
db 0xe6, 0x47, 0xcb, 0x5d, 0x32, 0xb8, 0xc9, 0x62, 0x0b, 0x5e, 0xb8, 0xe5, 0x35, 0xf8, 0xd5, 0x45
db 0xa6, 0x80, 0x18, 0x59, 0x16, 0x32, 0x04, 0xe2, 0xe6, 0x0c, 0x4b, 0xbf, 0x38, 0x6a, 0x6d, 0xd8
db 0x96, 0x2c, 0xab, 0xc4, 0xc5, 0xc3, 0x65, 0x22, 0x76, 0x4f, 0x8c, 0xf0, 0x47, 0x90, 0x99, 0xb0
db 0xfc, 0xaa, 0xd0, 0x07, 0x39, 0x38, 0xd8, 0xd5, 0x53, 0x28, 0xf8, 0xac, 0xf5, 0x64, 0x1a, 0x46
db 0xd3, 0xff, 0x76, 0xb4, 0xcb, 0x56, 0xd5, 0xac, 0x05, 0x48, 0xc5, 0xbb, 0xe8, 0x2c, 0x6d, 0xd6
db 0x29, 0xf2, 0xe0, 0x01, 0x8c, 0xa3, 0x70, 0x5c, 0x76, 0x52, 0x16, 0x6a, 0x2f, 0x5a, 0x1c, 0x43
db 0x62, 0x1a, 0x17, 0xd7, 0x29, 0x8f, 0x99, 0xb7, 0xf9, 0xe3, 0x69, 0xe6, 0x61, 0x6a, 0x01, 0xa3
db 0xf2, 0xbe, 0xca, 0xb3, 0xfe, 0x53, 0x03, 0xd1, 0xdf, 0x4c, 0xd6, 0xa3, 0xcf, 0x59, 0xf5, 0xab
db 0xa1, 0x3d, 0x14, 0x84, 0x0e, 0xe2, 0x2b, 0x9a, 0x90, 0x48, 0xb3, 0x30, 0x6a, 0xaa, 0x8c, 0xe7
db 0x36, 0x58, 0xc3, 0x81, 0xf7, 0x3e, 0x27, 0x4b, 0x83, 0xd8, 0x5a, 0x6e, 0x49, 0x14, 0x12, 0x41
db 0xd3, 0x43, 0xa7, 0xa7, 0x0b, 0x38, 0x3b, 0x14, 0xf5, 0xe5, 0xd9, 0x27, 0x57, 0x34, 0x13, 0xcf
db 0xc1, 0x7f, 0xa5, 0x81, 0x29, 0xb0, 0x5f, 0x03, 0x87, 0x74, 0x1a, 0xad, 0x5a, 0xbc, 0xdf, 0xfb
db 0xac, 0x85, 0xdb, 0x62, 0x3b, 0x52, 0x61, 0xef, 0xb1, 0x71, 0xeb, 0x10, 0x8e, 0x54, 0xf1, 0xe1
db 0x66, 0xe7, 0x90, 0xd3, 0x88, 0xc1, 0x78, 0x2c, 0x11, 0xbb, 0xe7, 0xd0, 0x33, 0x45, 0xdb, 0x57
db 0x34, 0x26, 0xbf, 0x9b, 0x44, 0x95, 0x90, 0xe2, 0xca, 0x6b, 0x0f, 0xc2, 0xa9, 0x0a, 0x93, 0x6c
db 0x29, 0x65, 0xa1, 0x1e, 0xb4, 0x2b, 0xf3, 0xe7, 0x00, 0x9e, 0x47, 0xd7, 0x6a, 0x61, 0x5a, 0x68
db 0xe0, 0x45, 0x5d, 0xf6, 0x2f, 0x6f, 0x94, 0x2a, 0xfa, 0x35, 0x74, 0xcc, 0x39, 0x6a, 0x2b, 0xa7
db 0x33, 0xfd, 0xe5, 0xc1, 0x83, 0xbc, 0x2e, 0xa1, 0xa1, 0xc3, 0x44, 0xaa, 0x72, 0x07, 0x8c, 0xa2
db 0x90, 0xdc, 0x96, 0xde, 0xfc, 0x79, 0xb2, 0x3d, 0x70, 0xdd, 0xbe, 0xd8, 0x43, 0x86, 0xd0, 0xee
db 0x12, 0x2f, 0x60, 0xa1, 0xa8, 0x88, 0x0e, 0xdb, 0x21, 0xca, 0x6c, 0xab, 0xd0, 0xc6, 0xa2, 0x77
db 0xb2, 0x97, 0x37, 0x62, 0x32, 0xf5, 0x70, 0x47, 0x1d, 0x22, 0x08, 0x23, 0x9c, 0x4d, 0xd0, 0x7b
db 0x65, 0x46, 0x3d, 0x1d, 0x35, 0xef, 0xf7, 0xb3, 0x13, 0x54, 0xb8, 0xf4, 0x37, 0x26, 0x77, 0x84
db 0x9c, 0xa2, 0x7e, 0x26, 0x2a, 0x0e, 0xbd, 0x39, 0x9f, 0x0f, 0x3b, 0xcb, 0xdf, 0x5a, 0xa8, 0xc0
db 0x50, 0xff, 0x91, 0xd1, 0x81, 0xe0, 0xee, 0x83, 0x07, 0x32, 0x5d, 0xab, 0x31, 0x4c, 0xfe, 0x21
db 0x5f, 0x3b, 0xaa, 0x46, 0xdd, 0xf2, 0x9f, 0x5a, 0x57, 0x34, 0x54, 0x86, 0x3e, 0x1e, 0xda, 0xbe
db 0x78, 0xcb, 0x08, 0xda, 0xc9, 0x76, 0x28, 0x1f, 0xfe, 0x6f, 0x05, 0xd5, 0x7d, 0x4e, 0xbb, 0xf6
db 0x2d, 0x11, 0x2f, 0x9d, 0x4c, 0x05, 0xe6, 0xc5, 0xdc, 0xcd, 0x95, 0x7d, 0x31, 0xc5, 0xb9, 0x0e
db 0xdf, 0x83, 0x30, 0x8b, 0xba, 0xfd, 0xf5, 0xaa, 0x5c, 0xe0, 0x3e, 0xa6, 0xdd, 0x51, 0xf2, 0xb2
db 0xcb, 0x3e, 0x67, 0x8d, 0x59, 0x62, 0x6d, 0x58, 0x92, 0xbe, 0x95, 0xb7, 0x9f, 0x0c, 0x49, 0x3f
db 0xaf, 0x3c, 0x45, 0x76, 0xf6, 0xeb, 0x9b, 0x42, 0x01, 0x2e, 0xd4, 0x8a, 0x9f, 0x92, 0xca, 0xdb
db 0xd3, 0x55, 0x5e, 0x92, 0x01, 0x85, 0x63, 0xa2, 0x61, 0x62, 0x30, 0x64, 0xa2, 0xb2, 0x9e, 0x11
db 0xb9, 0xb1, 0x4d, 0x4b, 0x90, 0x1c, 0xea, 0xd2, 0xd1, 0x3d, 0x11, 0xe1, 0xe1, 0x66, 0xb9, 0x2f
db 0x5f, 0x3c, 0x0a, 0x9a, 0xba, 0x6b, 0x0f, 0x47, 0x54, 0x5d, 0xfc, 0x11, 0x2f, 0x42, 0xdf, 0x7f
db 0x22, 0x60, 0x18, 0x75, 0x05, 0x1d, 0x25, 0x23, 0x75, 0x8d, 0x77, 0x89, 0x03, 0xb7, 0x74, 0x1a
db 0xff, 0xaa, 0x2b, 0xb0, 0x5d, 0x9a, 0xa6, 0x3f, 0xd0, 0x6b, 0xe6, 0x28, 0xae, 0xf9, 0xe6, 0x5a
db 0xbb, 0x3d, 0x0a, 0x94, 0x5f, 0x9b, 0x96, 0x3b, 0xfb, 0x38, 0x8f, 0x93, 0x6f, 0xf2, 0x1e, 0x5c
db 0xc5, 0x8b, 0x44, 0xf7, 0x26, 0xc6, 0x3e, 0xf6, 0x60, 0xbf, 0x54, 0xb4, 0x21, 0xfb, 0xae, 0x80
db 0xdb, 0x1a, 0xd7, 0xa1, 0xb9, 0x1b, 0xc7, 0x8d, 0xb4, 0x11, 0x3e, 0x2b, 0x19, 0xb3, 0xfb, 0xb0
db 0xb3, 0xe0, 0xc7, 0x35, 0x34, 0x80, 0xf0, 0xee, 0xf5, 0x46, 0x66, 0x93, 0x36, 0xd7, 0x0f, 0x44
db 0xb3, 0xe0, 0x15, 0x6a, 0xc7, 0xb1, 0x1e, 0xc4, 0x68, 0x73, 0xe5, 0x43, 0x2e, 0xa6, 0x26, 0xdf
db 0xe3, 0x35, 0x8f, 0xa5, 0x96, 0x52, 0xb8, 0x38, 0xfc, 0x01, 0x76, 0x92, 0xb7, 0x89, 0x09, 0x0a
db 0x88, 0x89, 0x20, 0x7f, 0x0f, 0x84, 0xb6, 0x44, 0x22, 0xa8, 0x27, 0xbf, 0x97, 0x65, 0x3e, 0xa9
db 0x92, 0x64, 0x42, 0xfb, 0xa8, 0xd7, 0xf3, 0x26, 0x28, 0x2c, 0xb6, 0x59, 0x66, 0x80, 0xe7, 0x49
db 0x1c, 0x12, 0x4e, 0x46, 0xfe, 0xf6, 0x3f, 0xdb, 0x79, 0x30, 0x6a, 0x99, 0xf3, 0x8a, 0x9e, 0x26
db 0x4f, 0x62, 0x65, 0xde, 0x9b, 0xb9, 0x03, 0x1c, 0x1e, 0xcf, 0x5c, 0x14, 0x63, 0x3e, 0xa4, 0x18
db 0x99, 0x79, 0x38, 0xbc, 0x96, 0x78, 0xd9, 0xd1, 0x51, 0x0e, 0xe5, 0xb0, 0x6e, 0xaf, 0xa2, 0xd0
db 0x60, 0x4e, 0x71, 0xc9, 0xeb, 0x8a, 0x86, 0xb6, 0x8a, 0x71, 0x66, 0x54, 0x99, 0xd1, 0x52, 0x48
db 0x53, 0x0a, 0x0a, 0xce, 0x84, 0x7a, 0xb8, 0xff, 0x5b, 0x44, 0x4c, 0x21, 0x7a, 0x47, 0xe3, 0xc9
db 0xb5, 0x0d, 0xdb, 0xe7, 0x1b, 0x7a, 0xf9, 0xde, 0x22, 0xb4, 0xf1, 0x64, 0x64, 0x55, 0x79, 0x3b
db 0x84, 0xc1, 0xdb, 0xd9, 0x1a, 0x57, 0x4f, 0x6d, 0x37, 0x4c, 0xe5, 0x36, 0x63, 0xff, 0x0c, 0x2b
db 0x11, 0xa1, 0x12, 0x46, 0x2f, 0x11, 0x70, 0x63, 0x7a, 0x5b, 0x5d, 0xd0, 0x55, 0x40, 0xab, 0xf8
db 0xd5, 0x99, 0xfe, 0x79, 0xfa, 0x4e, 0x35, 0xe2, 0xd7, 0x0f, 0x4d, 0x0e, 0xf9, 0xc1, 0x40, 0x63
db 0x19, 0x5e, 0xb9, 0x2f, 0x65, 0x4d, 0xee, 0x02, 0x1b, 0xcf, 0x5f, 0x2a, 0xb8, 0x5e, 0x2b, 0xdb
db 0x2e, 0x18, 0xaf, 0xc4, 0x49, 0xee, 0xef, 0x36, 0x42, 0xe8, 0xbc, 0x21, 0x0b, 0x07, 0xdb, 0x70
db 0x0e, 0xc2, 0xfa, 0x6e, 0x39, 0x2c, 0xf6, 0xbc, 0xde, 0x87, 0x69, 0x92, 0x45, 0x80, 0x35, 0x14
db 0xba, 0xb2, 0x08, 0x69, 0x26, 0xec, 0x78, 0x17, 0x1c, 0x57, 0x86, 0x8c, 0xe3, 0x58, 0x85, 0x09
db 0x26, 0xb5, 0xd6, 0x8c, 0x9d, 0x61, 0x04, 0x87, 0x00, 0x1b, 0x32, 0x81, 0x46, 0xa8, 0x43, 0x98
db 0xdd, 0x36, 0xa0, 0xff, 0x9f, 0x13, 0x1b, 0x9e, 0x7a, 0x20, 0x4f, 0xa8, 0x0e, 0xd3, 0xdb, 0x31
db 0x1d, 0x07, 0x6a, 0x7e, 0x10, 0xc1, 0x59, 0x0b, 0x96, 0xf7, 0x98, 0x0c, 0x8f, 0xb7, 0x87, 0x10
db 0x91, 0xe7, 0xf0, 0xa3, 0x98, 0x28, 0xce, 0xfd, 0x52, 0xa5, 0xdd, 0xe2, 0xf7, 0xbe, 0x7a, 0x73
db 0xeb, 0x9a, 0xac, 0x7f, 0x53, 0xe9, 0x89, 0xb9, 0x96, 0xba, 0x32, 0x4b, 0x2d, 0xe2, 0x13, 0x00
db 0xed, 0xac, 0x6d, 0x97, 0x62, 0xa0, 0x9b, 0x3a, 0x36, 0x8f, 0x54, 0xf6, 0x0d, 0x46, 0x10, 0xca
db 0x12, 0xb9, 0x81, 0x21, 0x47, 0xf5, 0x20, 0x41, 0xf1, 0x29, 0x91, 0x59, 0xe8, 0x3a, 0x76, 0xb2
db 0xf2, 0x3c, 0xad, 0xba, 0x76, 0xa3, 0x93, 0x84, 0x8c, 0xdb, 0x5f, 0xe0, 0x45, 0x25, 0xcc, 0xc8
db 0x4f, 0x6a, 0x46, 0xa0, 0x1e, 0xa7, 0xd4, 0x88, 0xa5, 0xed, 0x50, 0xde, 0x01, 0xe1, 0xde, 0xe0
db 0x00, 0xa9, 0x07, 0x02, 0x2e, 0xcf, 0xb6, 0x1a, 0x31, 0x43, 0x40, 0xc2, 0x35, 0x17, 0x66, 0x4d
db 0x95, 0xaf, 0x26, 0x9b, 0x18, 0x19, 0xa9, 0x30, 0xb6, 0x7b, 0xb0, 0xb1, 0x26, 0x29, 0x47, 0x45
db 0xd5, 0x2a, 0xa1, 0xac, 0x82, 0x50, 0x74, 0x44, 0x8c, 0x9e, 0xc3, 0x1f, 0x19, 0x26, 0xc5, 0x67
db 0x69, 0xe2, 0x0e, 0xfd, 0x9e, 0xa2, 0x4f, 0x50, 0xed, 0x9f, 0x94, 0x89, 0x9a, 0x73, 0x54, 0xec
db 0x77, 0x22, 0x53, 0xa2, 0xce, 0xa4, 0xf4, 0x6e, 0x7c, 0x6e, 0xda, 0xeb, 0xaf, 0x6f, 0xc9, 0x11
db 0xa9, 0x7d, 0x2a, 0xbb, 0x00, 0x22, 0x99, 0x10, 0xf7, 0x67, 0x4f, 0x2f, 0x2f, 0xfc, 0x79, 0xb6
db 0x83, 0x78, 0x19, 0xe3, 0x44, 0xdb, 0x66, 0xbf, 0xe8, 0xab, 0x32, 0xa5, 0x57, 0x85, 0x8c, 0xae
db 0xae, 0x9d, 0x8b, 0xbb, 0x6e, 0xde, 0xab, 0x6c, 0x64, 0xac, 0xca, 0xd0, 0x4b, 0x1c, 0x1c, 0x30
db 0x9e, 0x88, 0x60, 0x75, 0x0c, 0x42, 0x84, 0x41, 0x76, 0xcd, 0x82, 0xef, 0x80, 0x79, 0x0f, 0xeb
db 0xa3, 0xb8, 0xad, 0x1d, 0x45, 0x4b, 0x0e, 0xd9, 0x07, 0x48, 0x7d, 0x82, 0xd8, 0xf7, 0xcc, 0x2f
db 0x7a, 0x0e, 0xb1, 0x82, 0x95, 0x6e, 0x74, 0x78, 0xe8, 0x0c, 0xb5, 0x12, 0x26, 0x72, 0x55, 0x8b
db 0x0e, 0x29, 0x86, 0xd3, 0xce, 0x88, 0xa6, 0x29, 0xe7, 0x18, 0x9b, 0xbc, 0x13, 0x90, 0x89, 0xf6
db 0xa5, 0x20, 0x70, 0x3b, 0xbd, 0x25, 0x9c, 0x47, 0xb8, 0x9a, 0xd7, 0x72, 0xb5, 0x55, 0xe5, 0x92
db 0x98, 0x89, 0x0f, 0x12, 0xe1, 0xe1, 0x0c, 0x2c, 0x7c, 0xca, 0x27, 0x6e, 0x9b, 0x13, 0x9d, 0xc0
db 0x51, 0xa5, 0x10, 0x31, 0xda, 0xe5, 0x40, 0x7c, 0x3a, 0x67, 0x14, 0xa5, 0x92, 0x99, 0xa0, 0xf2
db 0xa6, 0xba, 0x61, 0xa9, 0x28, 0x2d, 0x10, 0xa8, 0x23, 0x09, 0x26, 0x18, 0x13, 0x6a, 0x87, 0x40
db 0x0b, 0x6c, 0xd3, 0x62, 0x55, 0xdd, 0xa4, 0x7e, 0x2a, 0x70, 0x22, 0xa0, 0x3c, 0x51, 0xcb, 0x98
db 0x26, 0xcf, 0xad, 0xf8, 0xc3, 0x9e, 0xb1, 0x7a, 0xc0, 0xd5, 0xd4, 0xaf, 0xea, 0x8a, 0xfb, 0x03
db 0x1f, 0xae, 0x8c, 0x9b, 0xa3, 0xd2, 0x20, 0xe2, 0xdf, 0xb3, 0xea, 0x97, 0xe9, 0xce, 0x3a, 0xa1
db 0xed, 0xa3, 0x97, 0xd5, 0xe2, 0xa6, 0x47, 0x9d, 0x50, 0x9e, 0xac, 0xdf, 0xd1, 0xbe, 0x32, 0x7b
db 0xd6, 0xaf, 0xe3, 0x9f, 0x4d, 0xbc, 0x58, 0x30, 0x33, 0xd8, 0xcd, 0xd8, 0x5b, 0xfa, 0x5c, 0xdd
db 0x5f, 0x0e, 0x79, 0x1f, 0x1d, 0x0b, 0xd9, 0x9c, 0xb8, 0x78, 0x27, 0x89, 0x39, 0x48, 0xf9, 0x45
db 0xe4, 0xa5, 0x8a, 0x2f, 0xc5, 0xf8, 0xe5, 0xef, 0xef, 0x89, 0x9d, 0x24, 0x2a, 0xf4, 0x7e, 0x60
db 0x38, 0xe1, 0x25, 0x67, 0x87, 0x68, 0x8b, 0x25, 0xfe, 0xb2, 0x20, 0x02, 0x14, 0x04, 0xb6, 0xba
db 0xf7, 0x9c, 0xe8, 0x01, 0x07, 0x8e, 0x0f, 0x21, 0x4c, 0xaf, 0x81, 0xcd, 0x17, 0xb2, 0xae, 0x6c
db 0xbb, 0xfb, 0xb9, 0x8f, 0x8b, 0xc8, 0x07, 0x36, 0x26, 0x0c, 0x40, 0x6f, 0xa9, 0x1e, 0x69, 0x2a
db 0x5a, 0xde, 0x40, 0xb4, 0x00, 0xfa, 0x0f, 0x2e, 0xd9, 0xc4, 0xc9, 0xc7, 0x72, 0xbb, 0xed, 0x26
db 0x0a, 0xf4, 0x81, 0xf7, 0xc6, 0xc6, 0x69, 0xd7, 0xd0, 0x63, 0x97, 0xed, 0x70, 0x2a, 0xa3, 0x30
db 0x48, 0x69, 0x0a, 0xf3, 0xae, 0x0f, 0x92, 0x08, 0x2a, 0xce, 0x55, 0x4e, 0xf2, 0x4b, 0x3e, 0xc8
db 0x98, 0x15, 0x0a, 0x94, 0x21, 0xe9, 0xff, 0xca, 0x45, 0x82, 0x49, 0x08, 0x47, 0xaa, 0x0a, 0xc2
db 0x75, 0x6a, 0xca, 0x28, 0xa1, 0x0a, 0x5a, 0x24, 0xb3, 0xb3, 0x3c, 0xd9, 0xd6, 0xb2, 0x30, 0x75
db 0xc4, 0x6e, 0x85, 0x58, 0x92, 0x44, 0x0c, 0x80, 0xd9, 0x17, 0x94, 0xcb, 0x22, 0x4a, 0xd9, 0x34
db 0xe4, 0x15, 0x0b, 0xf1, 0xbb, 0x33, 0x8c, 0x77, 0x04, 0x0e, 0x90, 0x48, 0x37, 0x76, 0x79, 0x2c
db 0x92, 0xcd, 0xba, 0xa4, 0x74, 0x42, 0x9f, 0x09, 0x7d, 0xa8, 0xc2, 0xd4, 0x36, 0xe3, 0xf3, 0x14
db 0x30, 0xcf, 0xef, 0x5c, 0x41, 0x12, 0x4d, 0xb2, 0x4d, 0xa5, 0xad, 0xf7, 0xe8, 0xba, 0xf9, 0xb6
db 0xd3, 0x03, 0xe3, 0xb5, 0x11, 0x45, 0x43, 0x64, 0x31, 0xff, 0x32, 0xc3, 0xe0, 0xe4, 0x17, 0xe2
db 0xf3, 0xaf, 0x90, 0xce, 0x12, 0xe9, 0x33, 0x90, 0x80, 0xc8, 0x5d, 0x3e, 0x5c, 0xc0, 0x33, 0x7f
db 0x04, 0x3a, 0x2e, 0xaf, 0x76, 0x70, 0x76, 0x0e, 0x41, 0x81, 0xf2, 0xde, 0x48, 0xc8, 0x65, 0x45
db 0xbe, 0x02, 0x12, 0xa9, 0x0c, 0xf9, 0x5e, 0xfc, 0xfe, 0xaa, 0xc1, 0x1a, 0x22, 0x76, 0x19, 0xd5
db 0x76, 0xf4, 0xe6, 0xdd, 0xb3, 0xa1, 0xc9, 0x64, 0x5b, 0x80, 0xd2, 0x54, 0x2c, 0xf6, 0x1b, 0xa7
db 0x2e, 0xa1, 0x7b, 0xdb, 0x6f, 0xea, 0xd7, 0x8e, 0x13, 0x27, 0x1a, 0xbd, 0x4a, 0x09, 0x47, 0xf3
db 0xa0, 0x56, 0x13, 0x95, 0x83, 0x12, 0xe8, 0x9a, 0x7c, 0xf2, 0x84, 0xbb, 0x09, 0xed, 0x91, 0x06
db 0xbc, 0x38, 0xf6, 0xfd, 0xda, 0xbd, 0x93, 0xff, 0xe3, 0x8d, 0xb2, 0x0f, 0xe9, 0x2c, 0x9d, 0x51
db 0xba, 0x08, 0xdc, 0x01, 0x9a, 0x15, 0x34, 0x7e, 0x1f, 0x3a, 0x79, 0xcc, 0x89, 0xfe, 0x9a, 0x73
db 0xd1, 0x45, 0x1d, 0x72, 0xf8, 0xd9, 0x88, 0x6d, 0x3c, 0xf6, 0xb8, 0x09, 0xdf, 0x4f, 0x6e, 0x63
db 0x5d, 0x4d, 0xa9, 0x22, 0xcf, 0x8a, 0x66, 0xb0, 0xb1, 0x82, 0x69, 0x83, 0xd6, 0x1b, 0x4a, 0x69
db 0xad, 0x1b, 0x31, 0x18, 0x1a, 0xc8, 0x7e, 0x90, 0x55, 0x37, 0x69, 0x59, 0x1c, 0x72, 0xe3, 0x91
db 0x41, 0x32, 0xcc, 0xad, 0xf0, 0xa7, 0x2c, 0xfc, 0xed, 0x0f, 0x68, 0x7c, 0x3a, 0x41, 0x8f, 0x82
db 0xb6, 0xe4, 0x7f, 0xe9, 0xb5, 0xcf, 0x19, 0xbf, 0xae, 0xce, 0x1c, 0x87, 0xd3, 0x76, 0x31, 0xf7
db 0xc5, 0x29, 0x0d, 0x75, 0x34, 0x36, 0xdc, 0x4a, 0x4d, 0x7f, 0x0e, 0xce, 0x50, 0x95, 0x62, 0x80
db 0xc6, 0x19, 0xd9, 0x2f, 0xa3, 0x0b, 0x16, 0xe8, 0xf6, 0xa3, 0x30, 0xbf, 0xdb, 0x41, 0x04, 0x05
db 0xb7, 0x10, 0x3d, 0xef, 0x15, 0x78, 0x82, 0xe1, 0xfa, 0x46, 0xe2, 0xd7, 0xb2, 0x4d, 0x0d, 0x9c
db 0x3a, 0x1e, 0xda, 0x32, 0xb2, 0x5d, 0x1a, 0x31, 0x84, 0x13, 0x74, 0xb7, 0x5b, 0x4b, 0xdf, 0x83
db 0xd9, 0x91, 0x53, 0xc1, 0xad, 0xa1, 0x19, 0x13, 0xb0, 0x3f, 0x97, 0xd6, 0x1b, 0xed, 0x5e, 0x29
db 0x87, 0xf8, 0xf4, 0x1f, 0xd6, 0x99, 0x9b, 0x14, 0xf6, 0xe9, 0x9a, 0x96, 0x19, 0x9d, 0xa0, 0xa2
db 0x63, 0xe1, 0x10, 0x2d, 0xa3, 0xdc, 0x65, 0x0a, 0x52, 0x40, 0x61, 0x8f, 0xd5, 0x45, 0x5c, 0x37
db 0x7e, 0xf6, 0x11, 0xb6, 0xd6, 0xf0, 0x92, 0x35, 0xb9, 0x2e, 0xa0, 0x36, 0x3c, 0x63, 0x88, 0x77
db 0xba, 0xb6, 0xc5, 0x8f, 0xef, 0x77, 0x2d, 0xc7, 0x06, 0xae, 0xf4, 0xd2, 0xd3, 0xa3, 0xce, 0x0d
db 0xce, 0x37, 0x5c, 0x3d, 0x10, 0x13, 0xf4, 0x2b, 0x94, 0x23, 0x54, 0x93, 0xe7, 0xd4, 0x07, 0x6c
db 0x81, 0x5e, 0xb6, 0xdd, 0x96, 0x3d, 0x47, 0x7a, 0xe8, 0xe5, 0xa8, 0x67, 0xed, 0x09, 0x1a, 0xcc
db 0xcb, 0x68, 0xad, 0xa7, 0x9e, 0xff, 0x69, 0x54, 0x5d, 0x17, 0x67, 0x50, 0x7c, 0x8f, 0x86, 0xbe
db 0xc5, 0x45, 0x09, 0x63, 0xc0, 0x3b, 0x3d, 0xcd, 0xb5, 0x66, 0x75, 0x45, 0x18, 0x1b, 0x83, 0xb2
db 0xb5, 0x70, 0x2c, 0xef, 0x09, 0x9d, 0x47, 0x7a, 0xb9, 0xe7, 0xd5, 0x04, 0x10, 0x96, 0x0b, 0x6b
db 0xd1, 0x71, 0xcc, 0x8d, 0x73, 0x90, 0x36, 0x92, 0x1f, 0x5b, 0x6b, 0xdf, 0x2d, 0x2c, 0x98, 0xd7
db 0x5d, 0x55, 0xfd, 0x4e, 0xad, 0x9b, 0x0b, 0xba, 0x0b, 0x68, 0xc4, 0xe0, 0xe8, 0x66, 0xe7, 0x91
db 0xcf, 0x0d, 0xe3, 0xc8, 0x1e, 0xde, 0x82, 0xff, 0x02, 0x43, 0xf4, 0xd3, 0x25, 0x53, 0x0b, 0x03
db 0x22, 0x76, 0x6a, 0xff, 0xcf, 0x1f, 0xd6, 0x4d, 0x21, 0x45, 0x11, 0xd3, 0x0b, 0x9c, 0x87, 0x68
db 0x98, 0xb8, 0x82, 0x4e, 0x6e, 0xc5, 0x4d, 0x8a, 0x7a, 0x9f, 0x19, 0x8a, 0xa1, 0xf7, 0xed, 0x79
db 0x56, 0xd6, 0x1b, 0x75, 0xba, 0xf2, 0x8a, 0x1d, 0x83, 0xe1, 0x6a, 0x08, 0x46, 0x0d, 0x4d, 0xb4
db 0xa1, 0xb8, 0x40, 0x24, 0x71, 0xe4, 0xd4, 0xb0, 0x4c, 0x32, 0x49, 0xec, 0x5a, 0x03, 0x95, 0x13
db 0xef, 0xbb, 0x5f, 0xee, 0x1d, 0x95, 0xb1, 0x60, 0xd2, 0xd7, 0x6b, 0x06, 0x8e, 0xf1, 0x96, 0x35
db 0xe0, 0x02, 0x4e, 0x50, 0xb2, 0x7e, 0xaa, 0x6b, 0xe7, 0xf8, 0xd0, 0xd2, 0x3e, 0x96, 0xde, 0x77
db 0xf0, 0xa8, 0xdd, 0x44, 0x00, 0x4b, 0xc1, 0x56, 0xd2, 0xe5, 0x9a, 0x1d, 0xa7, 0x8f, 0x08, 0x3e
db 0x1b, 0x14, 0xc9, 0x9e, 0x46, 0x7e, 0xf0, 0x51, 0x3c, 0x8c, 0x80, 0x21, 0xc6, 0x96, 0xc1, 0x9a
db 0xda, 0x41, 0xa5, 0xc8, 0xc8, 0x18, 0x63, 0x43, 0x20, 0xaf, 0x69, 0x28, 0xa5, 0x38, 0x13, 0xe0
db 0xf4, 0x2e, 0xdc, 0xed, 0x4a, 0x0c, 0x70, 0xc7, 0xcf, 0x45, 0x62, 0x6d, 0x9c, 0x17, 0x8d, 0xb0
db 0x59, 0xc7, 0x2c, 0xff, 0x4d, 0xa7, 0x5d, 0xbb, 0xfd, 0xa2, 0xcb, 0xe0, 0x08, 0x3d, 0x78, 0x35
db 0x53, 0x88, 0x7e, 0xe5, 0xf1, 0x4a, 0x5a, 0xb7, 0xfc, 0xbb, 0xf0, 0x25, 0x5c, 0x32, 0x56, 0x4a
db 0xc8, 0x0b, 0xd2, 0x74, 0x14, 0xe3, 0x8b, 0xc0, 0x1f, 0x78, 0x1a, 0x74, 0xb8, 0x17, 0x09, 0x7c
db 0x74, 0x5e, 0x3a, 0xa9, 0x04, 0x64, 0x7d, 0x89, 0x81, 0x8a, 0x97, 0xac, 0x61, 0x41, 0xbd, 0x20
db 0x51, 0x55, 0x12, 0x95, 0xc3, 0x57, 0xcd, 0x9a, 0x07, 0x66, 0x19, 0xce, 0x9e, 0xed, 0x77, 0x34
db 0xb2, 0x77, 0xd8, 0x3a, 0xbc, 0x59, 0xf1, 0x86, 0x99, 0x2f, 0x19, 0x51, 0x0f, 0xda, 0x59, 0x82
db 0x0a, 0xaa, 0x2e, 0xaa, 0x04, 0x00, 0x8d, 0xe4, 0xe2, 0x53, 0xdf, 0x5c, 0x6e, 0xfb, 0x72, 0x22
db 0x23, 0x6f, 0xe2, 0x4c, 0x74, 0x30, 0xc6, 0x62, 0x5c, 0x85, 0x61, 0x44, 0xf4, 0x9d, 0x07, 0x56
db 0x0c, 0x90, 0xb2, 0x35, 0x8a, 0xcd, 0x02, 0x54, 0x5a, 0x51, 0x40, 0xfa, 0xad, 0x19, 0x5c, 0xdc
db 0xf5, 0xe2, 0xb6, 0x10, 0x58, 0x10, 0x60, 0x1d, 0x04, 0xbd, 0x7c, 0x47, 0xaf, 0x28, 0x3d, 0x68
db 0x80, 0x06, 0xee, 0x4e, 0xbd, 0x90, 0x53, 0x90, 0x3f, 0x7e, 0xed, 0x99, 0x6f, 0x4f, 0xc3, 0xce
db 0x64, 0xa7, 0x9e, 0x5e, 0x85, 0xb3, 0x2f, 0x30, 0x2c, 0xc5, 0xa2, 0x5f, 0xa7, 0x6e, 0xb8, 0x4e
db 0x50, 0xb2, 0x06, 0x3e, 0xaf, 0x9d, 0x2c, 0x25, 0x29, 0x06, 0x4e, 0xd6, 0x4d, 0x58, 0x28, 0xe1
db 0x57, 0x45, 0xf0, 0x0c, 0x09, 0xcf, 0x24, 0x13, 0x83, 0x39, 0xc4, 0xf8, 0x17, 0x26, 0xdc, 0xba
db 0x19, 0x55, 0x83, 0xe9, 0xad, 0xad, 0x51, 0xbe, 0xc4, 0x36, 0x8e, 0xaf, 0x5b, 0x40, 0x4a, 0xfd
db 0x26, 0xfc, 0x42, 0x74, 0xe0, 0xfd, 0x9f, 0x9b, 0xda, 0x31, 0x78, 0x1a, 0x4e, 0x2e, 0xfc, 0x67
db 0x2e, 0x96, 0x54, 0xfe, 0xdd, 0xfe, 0x28, 0x1a, 0x0f, 0xf6, 0x59, 0x9a, 0x76, 0x92, 0xdd, 0x1e
db 0xd2, 0xb7, 0x28, 0xeb, 0x49, 0x72, 0xef, 0x2a, 0xc5, 0xb3, 0x2f, 0x0f, 0xcd, 0xe1, 0xa2, 0x47
db 0xc0, 0x6d, 0x31, 0x77, 0x45, 0xd7, 0x2c, 0x97, 0x83, 0x8d, 0x2a, 0x52, 0x58, 0xc1, 0x2b, 0x36
db 0x06, 0xff, 0x02, 0x0f, 0xba, 0xac, 0x98, 0x29, 0xda, 0x20, 0xf1, 0x2f, 0x45, 0x66, 0xa9, 0xe4
db 0x59, 0x01, 0xa4, 0x11, 0x2d, 0x2e, 0xf8, 0xd4, 0x7d, 0x00, 0x8f, 0xf4, 0xea, 0xfe, 0x4c, 0x12
db 0xd5, 0x12, 0x30, 0x8b, 0xcc, 0xb2, 0x1f, 0x23, 0xd2, 0x63, 0x71, 0x49, 0xb4, 0x76, 0x58, 0xec
db 0x90, 0x09, 0xd5, 0x46, 0x8b, 0xfe, 0x15, 0x97, 0x3d, 0x7e, 0x3f, 0x5f, 0x36, 0xa2, 0x79, 0xc2
db 0x15, 0xcd, 0x24, 0x8d, 0x06, 0x75, 0x3c, 0x79, 0x82, 0xe7, 0x0c, 0x2e, 0x76, 0x3a, 0xcf, 0xbe
db 0xaf, 0xd9, 0x2a, 0x5f, 0xf2, 0x61, 0x22, 0xd5, 0xe3, 0x24, 0xe1, 0xfe, 0x12, 0x3d, 0x03, 0x70
db 0x71, 0xe3, 0x49, 0xe9, 0x10, 0x73, 0x0d, 0x89, 0x5c, 0x53, 0x7c, 0xd2, 0xb4, 0x71, 0xf6, 0x16
db 0x93, 0x67, 0x06, 0x60, 0x13, 0x72, 0xcd, 0x00, 0xe5, 0x50, 0x4c, 0x01, 0xcf, 0x6b, 0x98, 0x6c
db 0xe0, 0xcd, 0x5a, 0xfb, 0x33, 0x3e, 0x52, 0x77, 0xf2, 0x34, 0xdd, 0xb2, 0x94, 0xbb, 0xb5, 0x13
db 0x3f, 0xfd, 0xea, 0x2e, 0xda, 0x8c, 0x02, 0xbb, 0xab, 0x50, 0xd7, 0xfc, 0x26, 0x3e, 0xda, 0x3b
db 0xab, 0x9a, 0xea, 0x6b, 0xb1, 0x64, 0xb8, 0x1a, 0x19, 0x8d, 0x31, 0xed, 0x9b, 0x92, 0xe1, 0x75
db 0x8a, 0xb9, 0x17, 0xb5, 0xe5, 0x29, 0x48, 0xe5, 0xbf, 0x80, 0x94, 0xc6, 0x1e, 0xcf, 0x5a, 0xd7
db 0x47, 0x6e, 0xc5, 0xe8, 0x12, 0x99, 0x48, 0x07, 0xe8, 0x9c, 0xde, 0x0b, 0x59, 0xc0, 0x0f, 0x06
db 0x75, 0x41, 0x61, 0x94, 0x28, 0x5f, 0x87, 0x56, 0xa4, 0x3f, 0x18, 0xf7, 0x93, 0xe0, 0xc9, 0x03
db 0x03, 0x6e, 0x44, 0x16, 0x70, 0xbc, 0x38, 0x7d, 0x4d, 0x17, 0x75, 0x65, 0x5d, 0x9c, 0x79, 0xc7
db 0x29, 0x56, 0x52, 0x4a, 0xfe, 0xef, 0x09, 0x65, 0x9a, 0xc3, 0x78, 0x8d, 0x89, 0xc2, 0x3f, 0x43
db 0x1d, 0xd6, 0x54, 0x59, 0xe4, 0x6f, 0xaf, 0x08, 0x90, 0xff, 0x20, 0xae, 0x87, 0x68, 0x7b, 0x4a
db 0x62, 0xd2, 0x93, 0x57, 0x08, 0xec, 0xbd, 0x5b, 0xde, 0xa0, 0x76, 0x01, 0x6a, 0x27, 0x2e, 0xc9
db 0xf1, 0x99, 0x7a, 0xa5, 0x08, 0x31, 0x85, 0x68, 0x35, 0x94, 0xe4, 0x56, 0x0b, 0x52, 0xdc, 0x5b
db 0x9c, 0x2f, 0xe8, 0x54, 0xb3, 0xee, 0x9d, 0x0e, 0x75, 0xb5, 0x5c, 0xcb, 0xb9, 0x7b, 0x88, 0xf8
db 0x0e, 0x56, 0x11, 0x6f, 0xb4, 0xa5, 0x4f, 0xf3, 0x01, 0xc6, 0xe7, 0xc6, 0xbf, 0x89, 0x88, 0x02
db 0xeb, 0xce, 0x57, 0xe3, 0x59, 0xf0, 0x12, 0xdf, 0x57, 0x4e, 0xb0, 0xba, 0x67, 0x70, 0x94, 0x1e
db 0xa3, 0xd3, 0x60, 0x21, 0xf8, 0xa6, 0xc9, 0xe7, 0x3e, 0x12, 0x93, 0x58, 0x8b, 0xd7, 0x0e, 0x94
db 0x6f, 0xcd, 0x8b, 0x09, 0x6d, 0xaa, 0x5b, 0xb2, 0x2c, 0x9f, 0x6f, 0x85, 0x93, 0xf1, 0xc9, 0xd0
db 0xd2, 0x04, 0xd1, 0x80, 0xe0, 0x11, 0x09, 0x0f, 0x7b, 0x88, 0xfe, 0x05, 0x9a, 0x53, 0x36, 0xac
db 0xd9, 0xb8, 0x45, 0x81, 0x51, 0x99, 0xeb, 0x8d, 0xf2, 0x4d, 0x30, 0x70, 0x62, 0xf4, 0xf9, 0x2f
db 0xf7, 0x52, 0xa1, 0xb2, 0xec, 0xa4, 0xbe, 0x1c, 0xd0, 0xfb, 0x1c, 0xe4, 0x3a, 0xbe, 0x5d, 0xee
db 0x60, 0xdb, 0x4a, 0x70, 0xec, 0x31, 0xc7, 0x89, 0x16, 0x95, 0x94, 0xde, 0x8c, 0x58, 0x3e, 0xad
db 0x58, 0x46, 0x9c, 0x68, 0x2a, 0x2d, 0x43, 0x37, 0x5c, 0xec, 0xf4, 0x03, 0x65, 0xf5, 0x3e, 0xa2
db 0x7d, 0xe0, 0x4d, 0x39, 0x34, 0x7f, 0xe2, 0xdb, 0x90, 0x66, 0xdc, 0xca, 0x2e, 0xf8, 0x3d, 0xd8
db 0x97, 0x55, 0x60, 0x76, 0xc9, 0x53, 0x71, 0xe2, 0xa3, 0xe1, 0x5d, 0x5f, 0x4c, 0xfe, 0x03, 0x2b
db 0x15, 0xd7, 0x2d, 0x5b, 0x56, 0xa0, 0xb5, 0x78, 0x44, 0x98, 0x1f, 0x52, 0xe1, 0x49, 0x13, 0x48
db 0xed, 0x83, 0xa1, 0xfb, 0x23, 0xd0, 0x73, 0x34, 0x5e, 0x1d, 0x2f, 0xa0, 0xc5, 0x1b, 0xd0, 0xe3
db 0x12, 0x28, 0x52, 0x00, 0xe6, 0x7c, 0x6d, 0x6e, 0x87, 0xe2, 0x42, 0x97, 0x1f, 0x5b, 0x2b, 0x93
db 0xba, 0x75, 0xe2, 0x8b, 0x3f, 0x62, 0x5b, 0xb5, 0x36, 0x76, 0x05, 0x1d, 0x2d, 0x7d, 0x29, 0x88
db 0x2d, 0x6a, 0xe7, 0xec, 0x3b, 0x8c, 0x88, 0x70, 0xb7, 0xf2, 0xbb, 0x61, 0xdb, 0xa2, 0x1a, 0x28
db 0xb0, 0x17, 0xdf, 0x18, 0xcc, 0x2a, 0x02, 0xdd, 0xd2, 0x4a, 0x10, 0x2c, 0x9d, 0x65, 0x58, 0x46
db 0x1b, 0xa0, 0xf7, 0x57, 0x7f, 0x76, 0x36, 0xd2, 0xb6, 0xc1, 0xb5, 0x93, 0x6a, 0xbf, 0x97, 0x22
db 0x9f, 0xe2, 0xe9, 0xdb, 0x25, 0xc3, 0x04, 0xf0, 0xed, 0x54, 0xc7, 0xc6, 0xab, 0x30, 0x36, 0xb8
db 0x3a, 0x24, 0xab, 0x10, 0x0c, 0x26, 0x45, 0xb6, 0xd9, 0xf3, 0x19, 0x6b, 0x65, 0xfd, 0x1d, 0x6d
db 0x52, 0x73, 0x14, 0x9d, 0x22, 0x1f, 0xb1, 0x8c, 0x3e, 0x43, 0xbb, 0xf1, 0xca, 0x0f, 0x84, 0x81
db 0xb5, 0xc5, 0xe3, 0xf6, 0xcd, 0xe0, 0x2c, 0xf6, 0x9e, 0x21, 0xaa, 0xc8, 0x18, 0x85, 0x17, 0x78
db 0x60, 0xb5, 0xeb, 0x9f, 0x2e, 0x5b, 0xbe, 0xe9, 0x87, 0x37, 0xa0, 0x9c, 0x51, 0x8a, 0xf4, 0x1a
db 0x17, 0xfd, 0x9d, 0xf0, 0x81, 0x34, 0x55, 0x74, 0x4b, 0x49, 0x86, 0x7b, 0xd1, 0x00, 0x21, 0x92
db 0x69, 0xdf, 0x80, 0x8b, 0xa4, 0xf0, 0x50, 0x24, 0xbf, 0x2e, 0x1f, 0x7e, 0x4f, 0x2a, 0x79, 0x65
db 0x09, 0xc5, 0x6a, 0x4c, 0x87, 0x3d, 0x4d, 0xe7, 0xcd, 0x6a, 0xdd, 0x1d, 0x89, 0xde, 0x0f, 0xe3
db 0xaa, 0xf4, 0x99, 0xf0, 0x17, 0x54, 0x2d, 0x81, 0x97, 0x5a, 0x4f, 0xae, 0x77, 0x2f, 0x2a, 0x93
db 0x68, 0xe8, 0x0a, 0xbc, 0xcf, 0x48, 0xe2, 0x40, 0xe2, 0x55, 0xdd, 0x5c, 0xa5, 0x50, 0x63, 0xea
db 0xb2, 0x53, 0xea, 0x8a, 0x51, 0xbe, 0x93, 0xa7, 0x02, 0xa5, 0x50, 0x18, 0x7c, 0x0e, 0x40, 0x27
db 0x9e, 0x5c, 0x68, 0x17, 0x37, 0xe7, 0x83, 0x8c, 0xc4, 0x31, 0x5a, 0xd1, 0x6e, 0xe4, 0xac, 0xd7
db 0x05, 0x34, 0x2b, 0xdb, 0xe6, 0x75, 0xed, 0xd0, 0xa5, 0x18, 0xe5, 0x7e, 0x78, 0x7e, 0x28, 0xfb
db 0x9c, 0x58, 0x5d, 0x92, 0x9f, 0xfb, 0xa1, 0x86, 0x6e, 0x96, 0x55, 0x60, 0xee, 0xf8, 0x98, 0x39
db 0x18, 0xa1, 0x60, 0x58, 0xc7, 0xf4, 0xf7, 0x5b, 0x30, 0x76, 0x65, 0xa4, 0x9c, 0x75, 0x79, 0x37
db 0x3a, 0xb3, 0x40, 0xde, 0xff, 0xb2, 0x8d, 0x50, 0xd6, 0x05, 0xc2, 0x22, 0x5f, 0xa9, 0x2e, 0xd9
db 0xc3, 0x5c, 0xe5, 0xae, 0xcd, 0xad, 0x73, 0xba, 0xdb, 0x04, 0xe9, 0x1f, 0xeb, 0x6f, 0xd4, 0x02
db 0x3c, 0xfb, 0x68, 0x52, 0x25, 0xd8, 0xb9, 0xba, 0xf3, 0x56, 0x0c, 0x4b, 0xda, 0x00, 0xb1, 0x7d
db 0xd7, 0x4a, 0x04, 0x5e, 0x5f, 0x2f, 0x49, 0x7f, 0xe3, 0x30, 0x18, 0xa2, 0x44, 0x3a, 0x56, 0x80
db 0x2d, 0xdd, 0x77, 0x4d, 0x48, 0x1d, 0x8b, 0x3a, 0x52, 0xf5, 0x63, 0x51, 0xb1, 0xe0, 0xe3, 0x7c
db 0xd2, 0x3d, 0x50, 0xce, 0xbd, 0xf3, 0x8e, 0xfd, 0xe3, 0x0a, 0x3c, 0x6d, 0x85, 0x4a, 0xd2, 0xa8
db 0xe7, 0x7a, 0xac, 0x50, 0x9c, 0xc7, 0x9f, 0x91, 0x5d, 0xc5, 0x66, 0x86, 0x00, 0x31, 0x72, 0x87
db 0x39, 0x22, 0xd0, 0x3a, 0xc7, 0xe3, 0x91, 0x9f, 0x02, 0x55, 0xa2, 0x0c, 0xb2, 0xca, 0x11, 0xec
db 0xb5, 0x22, 0xab, 0x55, 0x8e, 0x05, 0x43, 0xe3, 0xfe, 0xb3, 0xef, 0x1b, 0xfb, 0x8f, 0x79, 0x11
db 0xb3, 0x2a, 0x01, 0x13, 0xaf, 0xa7, 0x7a, 0xcd, 0x85, 0xc9, 0xc0, 0x37, 0x7b, 0x8f, 0x6e, 0xef
db 0x15, 0xe9, 0x7f, 0xf5, 0xf7, 0xee, 0x82, 0xa2, 0x91, 0xc4, 0xa1, 0x33, 0x28, 0xef, 0xf9, 0x47
db 0x31, 0xef, 0x1e, 0xf1, 0x25, 0x3d, 0x9b, 0x84, 0x7a, 0x75, 0x03, 0xb0, 0xbd, 0x96, 0xe5, 0xb2
db 0x12, 0x52, 0x6c, 0x10, 0x1c, 0x1b, 0x8e, 0xe2, 0xd5, 0x08, 0x99, 0x3f, 0x1d, 0x57, 0x12, 0x00
db 0x90, 0xf5, 0x21, 0x76, 0x3e, 0x51, 0xd9, 0x19, 0xb2, 0x41, 0x88, 0x47, 0xc3, 0x95, 0xfb, 0xe0
db 0x38, 0x87, 0xff, 0x52, 0xb4, 0x9d, 0xf2, 0xdf, 0x53, 0x2d, 0x4f, 0x55, 0x4b, 0x4d, 0xa3, 0x0d
db 0x45, 0x9f, 0xd7, 0x33, 0xd8, 0x11, 0xd9, 0x45, 0xd9, 0x13, 0x3a, 0xe5, 0xe6, 0xda, 0x5b, 0xc6
db 0xd4, 0xcb, 0x19, 0x63, 0xb0, 0x5d, 0x2e, 0x37, 0x97, 0x0b, 0x64, 0x09, 0x45, 0xd3, 0x78, 0x2d
db 0xbc, 0xc0, 0x79, 0x0d, 0xf5, 0x51, 0x9b, 0xeb, 0x9c, 0x28, 0xce, 0x14, 0x32, 0x70, 0xae, 0x14
db 0xa1, 0xd5, 0x4f, 0x9d, 0xbc, 0x79, 0x45, 0x2d, 0x89, 0x63, 0x4b, 0x27, 0x37, 0x94, 0x7f, 0xdb
db 0x2f, 0x39, 0xb8, 0x73, 0xaf, 0x3b, 0x4e, 0x97, 0x2f, 0xce, 0x83, 0x04, 0xb8, 0xa3, 0xb7, 0x38
db 0xe9, 0x08, 0x75, 0x35, 0x42, 0x1c, 0xe8, 0xc4, 0xff, 0xca, 0x13, 0xf9, 0x69, 0x53, 0x32, 0x83
db 0x29, 0xe6, 0x17, 0xe7, 0xf3, 0x01, 0xbf, 0xc0, 0xf1, 0xdf, 0x8b, 0x2d, 0x29, 0x5d, 0x68, 0xfb
db 0x18, 0x77, 0x2d, 0x98, 0x29, 0xa3, 0x44, 0x79, 0x47, 0xbe, 0xb9, 0xca, 0x18, 0x29, 0x16, 0xd9
db 0x41, 0x49, 0x55, 0x82, 0xf2, 0xc0, 0x42, 0x13, 0x68, 0x72, 0x75, 0x5f, 0xe4, 0x97, 0xae, 0x6d
db 0x90, 0xc0, 0x57, 0x05, 0x9e, 0x54, 0xa5, 0x1e, 0x4b, 0x6e, 0x46, 0x9d, 0x31, 0xce, 0x0a, 0xde
db 0xed, 0x71, 0x7b, 0x42, 0xa6, 0xf3, 0x6c, 0xc8, 0x21, 0x4a, 0x4f, 0x40, 0x67, 0x90, 0x2c, 0x92
db 0xb6, 0x7a, 0xdc, 0x9b, 0x0c, 0xb8, 0x38, 0x80, 0xf6, 0x77, 0x29, 0xe3, 0x59, 0xbc, 0xfa, 0xe2
db 0xdf, 0x95, 0x5c, 0xec, 0x7c, 0xac, 0x81, 0x5f, 0xd3, 0xcc, 0x50, 0x43, 0xad, 0xb0, 0x6c, 0xb3
db 0x9a, 0x4c, 0x37, 0x6c, 0x18, 0xf1, 0xa3, 0x71, 0x57, 0x39, 0x09, 0xf7, 0xe0, 0xdc, 0x93, 0xe2
db 0xba, 0xd6, 0x2a, 0x81, 0x67, 0x59, 0x36, 0x8e, 0xf1, 0x4b, 0xd5, 0xa6, 0x57, 0x84, 0x63, 0x1a
db 0xea, 0x2f, 0x2f, 0xd8, 0x9b, 0x64, 0x15, 0x6e, 0x32, 0x40, 0xe0, 0x98, 0x11, 0x5f, 0xc2, 0x47
db 0xa9, 0x08, 0x84, 0x0c, 0xa3, 0xea, 0xe6, 0x4b, 0xcc, 0x08, 0x27, 0x22, 0x9b, 0xf7, 0xb7, 0x6e
db 0xfb, 0x86, 0x04, 0x04, 0x26, 0x8d, 0x47, 0x04, 0xf5, 0x48, 0x1c, 0xac, 0xaa, 0x73, 0x46, 0x06
db 0x33, 0xcf, 0xd4, 0xb2, 0xc0, 0x90, 0x5a, 0xc6, 0x0e, 0xe0, 0x3d, 0xd1, 0xf4, 0x9a, 0x7b, 0xaa
db 0xdc, 0x00, 0x62, 0xec, 0xae, 0x0b, 0x59, 0xdf, 0x46, 0x8c, 0x37, 0x48, 0xdf, 0xb9, 0x51, 0xef
db 0x03, 0x3f, 0x14, 0x06, 0xe9, 0xa9, 0x04, 0x5e, 0x05, 0x42, 0x5c, 0x3c, 0x56, 0xe7, 0xdb, 0xeb
db 0x96, 0x19, 0xa5, 0x0d, 0xa9, 0xc5, 0xa6, 0x1e, 0x68, 0x43, 0x1d, 0x09, 0x5e, 0x95, 0x48, 0xaa
db 0x73, 0xe2, 0x6a, 0x65, 0x34, 0xef, 0xf8, 0x2d, 0x3a, 0xe6, 0x0e, 0x56, 0xcb, 0x9a, 0x7d, 0x31
db 0xf0, 0xa8, 0x73, 0xfc, 0x69, 0x22, 0xab, 0xbc, 0xcf, 0xe1, 0x05, 0xa4, 0xe8, 0x14, 0x4e, 0x96
db 0xe1, 0x0c, 0xa1, 0x8d, 0x94, 0x18, 0xa1, 0xa3, 0x71, 0x1f, 0x1f, 0xd2, 0x92, 0x9c, 0xa3, 0x49
db 0xc1, 0xa0, 0x2b, 0xd3, 0x48, 0x5c, 0x59, 0xd5, 0x19, 0xbb, 0xcf, 0x07, 0x68, 0x5e, 0xca, 0x33
db 0xb7, 0x42, 0x04, 0x28, 0x78, 0x3a, 0x5c, 0x12, 0xc6, 0x67, 0x0e, 0x98, 0x30, 0xdb, 0x7f, 0x5a
db 0x4e, 0xc3, 0xd6, 0x3a, 0xec, 0x4a, 0x53, 0x9e, 0x2b, 0x92, 0xe9, 0x5b, 0xef, 0x01, 0x73, 0x1b
db 0x8f, 0x20, 0x91, 0xdb, 0x01, 0x7e, 0x97, 0xff, 0x98, 0xc2, 0x44, 0xa7, 0x35, 0xe6, 0xc0, 0x41
db 0x55, 0x6c, 0xaf, 0x24, 0xd2, 0xac, 0x2f, 0x2b, 0x62, 0xa2, 0xc8, 0x64, 0xf7, 0x0a, 0xf4, 0x94
db 0x3e, 0x29, 0x11, 0xd0, 0x08, 0xac, 0x40, 0xab, 0x14, 0x36, 0xe8, 0xd6, 0x9a, 0xe9, 0xd2, 0xe2
db 0x1b, 0xa8, 0x80, 0x67, 0x45, 0x24, 0xe0, 0x7c, 0xaa, 0xc4, 0x17, 0x0b, 0xbe, 0xbc, 0x7f, 0x17
db 0x44, 0xf7, 0x7a, 0x3a, 0x62, 0x92, 0xfc, 0x66, 0x08, 0x17, 0x60, 0xc8, 0x57, 0x42, 0x8f, 0x4f
db 0xa3, 0x2d, 0xe0, 0x42, 0x22, 0xab, 0x70, 0x65, 0x8f, 0xad, 0xc8, 0x8a, 0x1d, 0xbc, 0x66, 0xc2
db 0xd4, 0xb0, 0x47, 0x58, 0xb8, 0xe2, 0xd8, 0x40, 0x25, 0x05, 0x65, 0x07, 0xfc, 0x31, 0x4b, 0xaf
db 0xba, 0x0a, 0x92, 0x1e, 0xd7, 0xd9, 0x7a, 0x25, 0x39, 0xda, 0x7a, 0xa5, 0x31, 0x77, 0xce, 0x17
db 0x56, 0x12, 0x5d, 0xcf, 0x56, 0x06, 0xeb, 0x76, 0x3a, 0x8b, 0xcb, 0x3b, 0xc1, 0x7b, 0x0b, 0x20
db 0xf7, 0x03, 0xf0, 0xe1, 0xca, 0x77, 0x35, 0x69, 0x84, 0x78, 0x0a, 0x94, 0x09, 0xcd, 0x5f, 0x49
db 0x77, 0x8a, 0xc1, 0xbc, 0x95, 0x38, 0x61, 0x37, 0x37, 0x9a, 0x3d, 0xb4, 0xe2, 0x1a, 0x80, 0x42
db 0xd2, 0x85, 0x59, 0xba, 0xc8, 0xd8, 0x0f, 0x30, 0x3f, 0xdd, 0xef, 0xbe, 0x80, 0xe5, 0x95, 0xc4
db 0x54, 0xb8, 0x8f, 0xc6, 0x60, 0x2c, 0x85, 0xf4, 0xc3, 0x33, 0x63, 0x8e, 0x50, 0x23, 0x4b, 0xee
db 0x13, 0x87, 0xd6, 0xd4, 0xba, 0xb9, 0xcf, 0x05, 0xaf, 0x6a, 0x27, 0x73, 0x56, 0x30, 0xd6, 0x83
db 0x9e, 0x0b, 0x47, 0x12, 0x4f, 0x5c, 0x34, 0xb6, 0x66, 0xc0, 0xd9, 0x90, 0xb2, 0xa6, 0x54, 0xbf
db 0x42, 0x5d, 0xf0, 0x02, 0x9f, 0x6b, 0x73, 0x90, 0xa9, 0x1f, 0x9f, 0xae, 0x83, 0x12, 0xfa, 0x82
db 0x5b, 0x43, 0xcd, 0xfb, 0xf9, 0x4b, 0xb8, 0x10, 0x4b, 0x8c, 0x33, 0xcb, 0xff, 0x75, 0x04, 0xd6
db 0x85, 0x0c, 0x43, 0x9b, 0x17, 0x9b, 0x36, 0x8a, 0xb7, 0x56, 0x1c, 0xe0, 0x7f, 0xed, 0x69, 0xda
db 0x86, 0x5f, 0x79, 0x41, 0xa3, 0x3e, 0x39, 0x4e, 0xad, 0x95, 0x60, 0x8e, 0x6a, 0x12, 0x9f, 0xa5
db 0xbf, 0xd4, 0x2a, 0x9c, 0x48, 0x93, 0xeb, 0x11, 0x92, 0x68, 0xc8, 0xef, 0xa6, 0xbc, 0xf6, 0x84
db 0xc2, 0xfd, 0x48, 0xbf, 0xbb, 0x80, 0x1f, 0x8f, 0xf0, 0xf5, 0xe3, 0x8d, 0xfb, 0x87, 0xe4, 0x6a
db 0x51, 0x13, 0x03, 0x15, 0x7d, 0x30, 0xb8, 0xad, 0x9f, 0x1f, 0x15, 0xf2, 0x40, 0xef, 0x21, 0xcc
db 0x71, 0xe5, 0x80, 0x5d, 0xc8, 0x33, 0xe3, 0xe1, 0x09, 0x7a, 0x66, 0xdd, 0xe2, 0x1d, 0xac, 0xa7
db 0xb4, 0xa7, 0x8e, 0x92, 0x49, 0xbf, 0x60, 0x2d, 0xa3, 0xa8, 0x1a, 0x21, 0x34, 0xcc, 0x29, 0x12
db 0x78, 0x89, 0xc3, 0x52, 0xe8, 0xbc, 0xba, 0x54, 0xbd, 0x19, 0x2e, 0xad, 0xd4, 0x48, 0x4d, 0x2f
db 0x1f, 0xe7, 0x98, 0xd9, 0x6d, 0xbb, 0x85, 0x48, 0x79, 0x81, 0xf2, 0x3d, 0x03, 0xf7, 0x16, 0x7b
db 0xf5, 0x72, 0x5a, 0xf6, 0x56, 0x4d, 0x03, 0x2b, 0x6d, 0xa1, 0xb1, 0x02, 0xa4, 0xf0, 0x43, 0x4b
db 0xbe, 0xec, 0x74, 0xe9, 0x43, 0x0c, 0xd4, 0xeb, 0x06, 0x49, 0x66, 0x27, 0x4b, 0xb9, 0xe8, 0xb3
db 0xaa, 0x60, 0xf8, 0x9b, 0x4f, 0x88, 0x66, 0xfe, 0x7d, 0xc0, 0x21, 0x22, 0x74, 0xcf, 0xe3, 0xf8
db 0xf4, 0xe3, 0x6d, 0x6c, 0xf9, 0x21, 0xc2, 0x86, 0x89, 0x63, 0x5b, 0x27, 0x71, 0x04, 0x03, 0x40
db 0xa9, 0xe6, 0xa3, 0x2f, 0x65, 0xb6, 0x5a, 0xdc, 0xdd, 0x4d, 0x92, 0xe1, 0x86, 0xf5, 0xc5, 0xc0
db 0x05, 0x3c, 0x5b, 0x35, 0xd4, 0x8c, 0x61, 0xed, 0xc7, 0x53, 0x63, 0x4e, 0x6a, 0xfe, 0x6f, 0x52
db 0x96, 0x50, 0x43, 0x99, 0x24, 0x14, 0xeb, 0xb6, 0x58, 0xce, 0xd9, 0xe0, 0x21, 0x64, 0x9c, 0xe4
db 0x05, 0x48, 0xf8, 0xeb, 0x12, 0x03, 0x54, 0x19, 0xf3, 0x17, 0x6c, 0x3b, 0x91, 0x02, 0xe4, 0xfc
db 0xb9, 0x24, 0xa6, 0xc1, 0xc9, 0x5e, 0x31, 0xdb, 0x03, 0x52, 0x91, 0xc8, 0x67, 0xaa, 0xe8, 0x1b
db 0x51, 0x83, 0xf6, 0x6d, 0xf9, 0xad, 0x45, 0x2e, 0x24, 0x9d, 0x1d, 0x80, 0xd5, 0xc0, 0xbf, 0xd5
db 0xca, 0x78, 0x3e, 0x53, 0x86, 0x97, 0xa9, 0xc5, 0xf8, 0x22, 0x41, 0x1a, 0x1b, 0x20, 0x53, 0xc5
db 0xc8, 0x4b, 0xff, 0x56, 0xb8, 0xfb, 0x61, 0x84, 0x83, 0xca, 0xac, 0x68, 0xcd, 0x56, 0x47, 0xa6
db 0x0d, 0x43, 0xfc, 0xbb, 0xaa, 0x60, 0xf1, 0xc4, 0xde, 0xc0, 0xe0, 0xa2, 0x88, 0x59, 0x68, 0x53
db 0x69, 0xba, 0x67, 0x21, 0x54, 0x18, 0x98, 0x51, 0x09, 0x35, 0xdd, 0x3c, 0xec, 0x35, 0x2f, 0xf0
db 0x6e, 0xcf, 0xba, 0x21, 0x04, 0x0a, 0xb4, 0x91, 0xdf, 0xf8, 0xb5, 0x9f, 0xb8, 0xc4, 0x97, 0xf8
db 0x4f, 0x10, 0xca, 0x5b, 0x4a, 0x43, 0x9e, 0x45, 0x0e, 0x51, 0x3f, 0x43, 0x96, 0x40, 0xb8, 0x70
db 0x66, 0xcb, 0xa1, 0x70, 0x5f, 0x4f, 0x9e, 0xb0, 0x2f, 0x03, 0x6f, 0x42, 0xcd, 0xf6, 0x3c, 0x90
db 0x2f, 0x9b, 0x83, 0x01, 0x41, 0x13, 0xa8, 0xf8, 0xbc, 0x19, 0x5c, 0x10, 0x09, 0xb9, 0xd4, 0x5d
db 0x0c, 0xa3, 0x51, 0xb0, 0x1a, 0xfd, 0x18, 0xbb, 0x67, 0x40, 0x4f, 0xc4, 0xab, 0x41, 0x8e, 0xbf
db 0x74, 0x0d, 0xa2, 0xa0, 0xf2, 0x48, 0x94, 0x5e, 0xa6, 0x63, 0xe4, 0xb3, 0x77, 0x2b, 0x79, 0x90
db 0xfe, 0x95, 0x1d, 0x92, 0x3a, 0x23, 0x8b, 0xf2, 0xa9, 0x87, 0x31, 0x9a, 0x06, 0x81, 0xb1, 0x6f
db 0xa6, 0x1e, 0x21, 0xb6, 0x1e, 0x0d, 0x9a, 0xf8, 0x3d, 0x38, 0x9d, 0xcc, 0x0c, 0x49, 0x83, 0x7a
db 0xd5, 0x7e, 0x5a, 0x98, 0xd7, 0xbe, 0x07, 0xf9, 0x79, 0x5e, 0x46, 0x89, 0x8e, 0xc9, 0xdf, 0x27
db 0xf9, 0xa4, 0xc6, 0x5f, 0x08, 0x45, 0x04, 0x2b, 0x62, 0xf5, 0x8e, 0x54, 0x57, 0xd0, 0xcd, 0xf4
db 0x51, 0x9e, 0x9a, 0xc8, 0x82, 0x65, 0x45, 0x8e, 0x49, 0xba, 0x28, 0x55, 0x3e, 0x58, 0xac, 0xf0
db 0xfb, 0xf2, 0xbd, 0x77, 0x9e, 0xc0, 0xb4, 0xf6, 0x9e, 0x52, 0x87, 0x5e, 0xc9, 0x41, 0x26, 0x01
db 0x64, 0x1f, 0x54, 0x53, 0x25, 0xb8, 0x2d, 0x91, 0xc9, 0x68, 0x30, 0xa0, 0x92, 0x37, 0x03, 0xac
db 0x36, 0xdc, 0x43, 0xb8, 0x2b, 0x29, 0x39, 0x8b, 0xfe, 0xda, 0xb6, 0xee, 0x3c, 0x7d, 0x4a, 0xc3
db 0x3e, 0xad, 0x45, 0xf4, 0xab, 0x67, 0xe3, 0x2b, 0xf5, 0xc4, 0xcf, 0x2f, 0xe3, 0xbd, 0x2e, 0x1c
db 0xac, 0xca, 0x37, 0xe3, 0x65, 0x9e, 0x6b, 0xc9, 0x94, 0x4f, 0x6d, 0x93, 0x0f, 0x0a, 0xc7, 0x19
db 0x69, 0xb3, 0x16, 0xe6, 0x70, 0xe7, 0xd0, 0xb1, 0xa9, 0x20, 0xae, 0x1d, 0xf3, 0x9c, 0xde, 0x5a
db 0xb2, 0x97, 0xdc, 0xd2, 0xc5, 0x91, 0x5f, 0x29, 0x2a, 0x47, 0x9b, 0x8a, 0x08, 0xd1, 0x5d, 0xf8
db 0x25, 0xf5, 0x36, 0x75, 0x69, 0x78, 0x8f, 0x78, 0xc5, 0x81, 0x97, 0x1a, 0x3f, 0x31, 0xc9, 0x70
db 0x3c, 0xe2, 0x04, 0x43, 0x41, 0xbd, 0x09, 0x10, 0x74, 0x95, 0x52, 0x53, 0x15, 0x15, 0xe0, 0xe9
db 0x3a, 0x4d, 0x11, 0x43, 0x90, 0xfc, 0x05, 0x0b, 0xd2, 0xbf, 0x3b, 0x14, 0xe5, 0x7e, 0x40, 0xa7
db 0x58, 0x4e, 0x2d, 0x6e, 0xa8, 0xc8, 0x73, 0xd0, 0x15, 0xa9, 0x8b, 0x28, 0x5d, 0xe0, 0x73, 0xac
db 0x8a, 0xdb, 0x28, 0xea, 0xdc, 0x97, 0xae, 0x75, 0x5b, 0xd5, 0x15, 0xe3, 0x7a, 0xc3, 0x39, 0x22
db 0x6c, 0x30, 0x43, 0x6a, 0xf8, 0x53, 0x1d, 0xd7, 0xee, 0x37, 0xd9, 0xe7, 0x56, 0x8c, 0x72, 0x8a
db 0xf1, 0x7d, 0xc9, 0x55, 0x57, 0x8d, 0x9e, 0xa5, 0xef, 0xa0, 0x72, 0xef, 0x3a, 0x28, 0x83, 0x56
db 0x2a, 0xd4, 0xa3, 0x40, 0x6c, 0x41, 0xe2, 0xaf, 0x20, 0xc6, 0xae, 0x52, 0x69, 0x1c, 0x14, 0xe7
db 0x9e, 0x61, 0xb0, 0x18, 0x7d, 0x37, 0xee, 0x74, 0xec, 0x4f, 0x6e, 0x7a, 0x37, 0xaa, 0x72, 0xbb
db 0x66, 0x92, 0x9c, 0x04, 0x01, 0x7d, 0x5d, 0xd3, 0xae, 0xc0, 0x5a, 0x62, 0x29, 0xd4, 0x27, 0x64
db 0xce, 0x99, 0x2c, 0x0d, 0xfc, 0x27, 0xfc, 0x39, 0x5c, 0xfe, 0xe4, 0x64, 0x0e, 0x6d, 0x09, 0x39
db 0x42, 0xf4, 0x6b, 0xa0, 0x6e, 0x65, 0xc8, 0xc5, 0xed, 0xa4, 0x25, 0xbd, 0x63, 0x87, 0x43, 0xd5
db 0x4a, 0xbb, 0x85, 0x52, 0x6b, 0x13, 0x25, 0xc9, 0x8e, 0xb3, 0xb2, 0xa7, 0xfd, 0x96, 0x86, 0xe7
db 0xbb, 0x75, 0x41, 0x24, 0x11, 0xf7, 0xf7, 0xd4, 0xf3, 0x97, 0xb3, 0x19, 0x49, 0x32, 0xc1, 0xd4
db 0xfd, 0xd9, 0x93, 0x9d, 0x80, 0x4b, 0xd1, 0xb8, 0x9c, 0x1b, 0xc7, 0xf9, 0x48, 0x47, 0x68, 0x84
db 0xbe, 0x74, 0xff, 0xef, 0xb5, 0x00, 0x6f, 0x04, 0xa0, 0x91, 0x1f, 0xae, 0x59, 0x06, 0x81, 0xd4
db 0x46, 0xe5, 0x4e, 0xd6, 0xf6, 0x69, 0x67, 0x26, 0x58, 0x86, 0x54, 0x8b, 0x0e, 0x2b, 0x07, 0x51
db 0xe5, 0x52, 0x47, 0x23, 0x50, 0xb1, 0x7c, 0x5b, 0xf0, 0xbc, 0x87, 0x9d, 0x30, 0xff, 0x27, 0xd8
db 0x86, 0xce, 0x9a, 0x13, 0x99, 0x0e, 0xa7, 0x2d, 0xa0, 0x86, 0xae, 0xc8, 0x46, 0x4d, 0x6a, 0xc9
db 0x4d, 0xab, 0x5f, 0x6d, 0xb2, 0xae, 0xdf, 0x1a, 0x0e, 0xa9, 0x10, 0x37, 0xe9, 0xf7, 0xef, 0x29
db 0xea, 0xe1, 0x39, 0xaf, 0xbc, 0xb1, 0xd0, 0x4f, 0x33, 0x69, 0xe7, 0x79, 0xdf, 0xfb, 0x58, 0x51
db 0x95, 0x3e, 0xc1, 0x5b, 0x53, 0x49, 0xbf, 0x85, 0xf5, 0xde, 0x32, 0x3b, 0xf0, 0x25, 0x46, 0x2d
db 0xb4, 0x49, 0xbf, 0x63, 0x93, 0x9c, 0xac, 0x32, 0xfb, 0xc2, 0x03, 0x5e, 0x45, 0x77, 0x41, 0xf9
db 0x38, 0x1e, 0x3c, 0xa0, 0xf6, 0x02, 0x08, 0xe8, 0x19, 0xf4, 0x13, 0xb3, 0x96, 0x15, 0xe8, 0x46
db 0xe7, 0x50, 0x5c, 0x8b, 0x79, 0x5c, 0x36, 0x72, 0x2f, 0x77, 0x0a, 0x68, 0x7a, 0x2f, 0x06, 0x01
db 0x29, 0x1c, 0xd3, 0x76, 0xf2, 0x32, 0x8e, 0xea, 0x55, 0x47, 0xd9, 0xe0, 0xc6, 0x1d, 0x87, 0xdb
db 0x9e, 0xd7, 0xe8, 0xb2, 0x18, 0x60, 0x08, 0x88, 0xc4, 0xdf, 0x7d, 0x0b, 0xfd, 0xfc, 0xb2, 0xeb
db 0x17, 0xcd, 0xf0, 0x79, 0xe1, 0xc1, 0x6e, 0x74, 0xc2, 0x64, 0x17, 0xa5, 0x8b, 0xca, 0x20, 0x95
db 0x3d, 0xc7, 0xe5, 0x30, 0x3d, 0x32, 0x80, 0xd5, 0x32, 0x74, 0x30, 0xf1, 0x09, 0x56, 0x79, 0x22
db 0x07, 0x48, 0x03, 0x55, 0x3c, 0x19, 0xec, 0x88, 0xa2, 0xe6, 0xa2, 0x98, 0x13, 0x7b, 0x14, 0x3f
db 0xa6, 0x8f, 0xef, 0xbc, 0xfc, 0x4e, 0x6e, 0x25, 0x7e, 0xcc, 0x54, 0x5f, 0x9f, 0x16, 0xe5, 0xb5
db 0x24, 0xcc, 0xe8, 0xde, 0xfb, 0x36, 0xad, 0xbe, 0x00, 0x34, 0x32, 0x6c, 0x78, 0xcf, 0xc7, 0x2a
db 0xb0, 0xef, 0x07, 0x67, 0x73, 0x59, 0x4b, 0xc4, 0x58, 0x47, 0xbf, 0x19, 0x0f, 0x5f, 0x87, 0xbf
db 0x14, 0x2c, 0xa1, 0xc7, 0xa1, 0x48, 0xe2, 0x3a, 0x48, 0x1b, 0x2c, 0x0f, 0x52, 0xe0, 0x43, 0x9a
db 0x32, 0x10, 0x0a, 0xdb, 0x27, 0x30, 0x01, 0x55, 0x89, 0x1c, 0x5e, 0xc3, 0x10, 0xf2, 0x48, 0x78
db 0xd7, 0xf7, 0xba, 0xa4, 0x67, 0xda, 0xb4, 0x16, 0x99, 0x44, 0x41, 0x26, 0x85, 0xb6, 0x45, 0xec
db 0x0a, 0x26, 0xf4, 0xfc, 0x3b, 0x6a, 0x55, 0x1d, 0x62, 0xc9, 0xa4, 0x3b, 0x20, 0x54, 0x7c, 0x3e
db 0xb6, 0xd3, 0x88, 0xe5, 0x54, 0xb5, 0xeb, 0xd1, 0x30, 0xb9, 0x98, 0xda, 0x4d, 0x2c, 0x81, 0xf3
db 0x4b, 0x54, 0x3a, 0x24, 0x59, 0x4f, 0x13, 0x58, 0x7f, 0xa0, 0x5b, 0x1f, 0xd6, 0x0a, 0x7a, 0xb5
db 0x36, 0x6b, 0x2f, 0xaa, 0x53, 0xda, 0xe6, 0xa8, 0x61, 0x83, 0x16, 0x3c, 0xa6, 0x81, 0x78, 0xd2
db 0x77, 0x0a, 0x30, 0x76, 0x0e, 0x56, 0x6a, 0x66, 0xa6, 0x5c, 0x91, 0xc8, 0xec, 0xa2, 0x99, 0xa0
db 0x67, 0x5b, 0x21, 0x37, 0x38, 0x01, 0x27, 0x27, 0x8b, 0x1c, 0xd0, 0xe4, 0x91, 0x43, 0xe0, 0xcb
db 0xe5, 0x4b, 0xa0, 0xe7, 0x1c, 0x3f, 0x33, 0x3f, 0xb4, 0x42, 0xb0, 0x64, 0x34, 0xde, 0x8d, 0x35
db 0x2e, 0x59, 0x21, 0xab, 0xa6, 0x71, 0xc9, 0x3a, 0x65, 0x83, 0xa6, 0x0c, 0xb0, 0x36, 0xa0, 0xe2
db 0xb0, 0xb9, 0x8b, 0x5c, 0x2e, 0x3b, 0xa1, 0x03, 0x96, 0x6e, 0x63, 0x9c, 0x2a, 0x02, 0xbd, 0x2b
db 0x21, 0xf9, 0xee, 0x75, 0xab, 0xf4, 0x3c, 0xf4, 0x64, 0xb1, 0xbb, 0x97, 0x10, 0x3a, 0x02, 0x60
db 0x3e, 0x93, 0x97, 0xfd, 0xb4, 0xcb, 0x2f, 0x31, 0x80, 0x7f, 0x74, 0x23, 0x35, 0x6d, 0x4a, 0x83
db 0xa6, 0x70, 0x7b, 0x27, 0xf7, 0xfa, 0x97, 0xe9, 0x54, 0x4d, 0xfe, 0xfd, 0x7c, 0xe8, 0x91, 0x03
db 0x6b, 0x61, 0xc4, 0x8c, 0xa2, 0x41, 0x23, 0x52, 0xc3, 0x9b, 0x5d, 0xb6, 0xb5, 0x1b, 0x9f, 0x3b
db 0x48, 0x09, 0x59, 0xec, 0x3c, 0x23, 0xfd, 0x9e, 0xa8, 0x76, 0x73, 0x92, 0xab, 0x28, 0x6a, 0x57
db 0xe4, 0x88, 0x25, 0x2b, 0x6d, 0xbf, 0xf8, 0xc2, 0xe6, 0xdb, 0x48, 0x58, 0x0a, 0x06, 0xae, 0xd0
db 0x78, 0x25, 0x61, 0xd5, 0xcd, 0x60, 0xf9, 0xf1, 0xaf, 0xb3, 0xb8, 0x86, 0x2f, 0x00, 0x4b, 0xe1
db 0xe6, 0xbd, 0x46, 0xdb, 0x91, 0x76, 0xea, 0x22, 0xb5, 0x85, 0x32, 0x3a, 0x41, 0x6a, 0xfb, 0x86
db 0x45, 0xc9, 0x3e, 0xb1, 0xcb, 0x1d, 0xee, 0x8e, 0x58, 0xcf, 0x24, 0x6b, 0xb1, 0xc5, 0xe0, 0xda
db 0x30, 0xc9, 0xa9, 0x36, 0x5b, 0xeb, 0xc1, 0x93, 0x32, 0x60, 0x15, 0x0a, 0x41, 0xa0, 0x64, 0x0c
db 0x30, 0x89, 0x6e, 0xda, 0xab, 0x7e, 0x49, 0x44, 0xbc, 0xb4, 0x92, 0x0c, 0x29, 0xee, 0xd5, 0x03
db 0xe0, 0xd7, 0x96, 0xcf, 0x00, 0xf8, 0xaf, 0x89, 0xf2, 0xe0, 0x3a, 0x23, 0x45, 0x1a, 0x73, 0x4d
db 0x3f, 0x59, 0x50, 0xe5, 0x98, 0x2a, 0x78, 0x0f, 0x07, 0x31, 0xd2, 0x89, 0x69, 0xa2, 0xe9, 0x8d
db 0xe4, 0xab, 0x41, 0x8b, 0xe9, 0x83, 0x5e, 0xbc, 0xf9, 0x81, 0xbf, 0xe8, 0x24, 0xa4, 0x9d, 0x0a
db 0xd0, 0x7c, 0x3b, 0x40, 0x69, 0x93, 0x26, 0x42, 0x23, 0xb1, 0x38, 0xfa, 0x22, 0x25, 0x15, 0xb5
db 0x17, 0xb8, 0xa0, 0xc3, 0xb0, 0x80, 0x98, 0x04, 0x85, 0x91, 0x2b, 0xa8, 0x79, 0x34, 0xf9, 0x74
db 0x9e, 0x49, 0xad, 0xcf, 0xca, 0xf6, 0x06, 0xd3, 0xdf, 0x27, 0xb6, 0xd8, 0x19, 0x14, 0x84, 0xee
db 0xe6, 0x9c, 0x7c, 0x41, 0x07, 0xbd, 0x26, 0x7b, 0x3c, 0x81, 0x20, 0x8f, 0x1d, 0x50, 0x2d, 0xdd
db 0x31, 0xb9, 0x5a, 0x4c, 0xc0, 0x89, 0x68, 0x79, 0xb4, 0x9e, 0xe6, 0x57, 0x44, 0xee, 0x32, 0xbb
db 0x69, 0xf7, 0x35, 0xbc, 0xcf, 0x96, 0xa7, 0xe0, 0xb4, 0x38, 0xce, 0xde, 0xb9, 0xf6, 0xfe, 0x5a
db 0xb9, 0xe7, 0x3c, 0x01, 0xf2, 0xbd, 0xa8, 0x26, 0xf6, 0x29, 0x0b, 0xe6, 0xd7, 0xe7, 0xa5, 0x62
db 0xb0, 0x0c, 0x9b, 0x01, 0x4f, 0x18, 0x9e, 0x40, 0x28, 0x2a, 0xbb, 0x21, 0xe6, 0x8d, 0x93, 0x22
db 0xbd, 0x01, 0xfc, 0x78, 0x93, 0x29, 0x55, 0x8f, 0x17, 0xe8, 0x09, 0x07, 0xf8, 0x30, 0x20, 0x68
db 0xf2, 0x95, 0xc1, 0x50, 0xad, 0x12, 0x35, 0x46, 0x52, 0x65, 0xaa, 0xb7, 0x35, 0x50, 0x22, 0x91
db 0x36, 0x74, 0x86, 0xab, 0x4b, 0xe8, 0xfd, 0x42, 0x76, 0x41, 0x4a, 0xb4, 0x2c, 0x59, 0x36, 0xc9
db 0xd6, 0xdb, 0x7e, 0xa1, 0x60, 0xcf, 0x13, 0x62, 0x0c, 0x93, 0xdd, 0x3e, 0xfc, 0x3e, 0x36, 0xfc
db 0xfd, 0x7e, 0x48, 0x69, 0x0f, 0x6a, 0xdf, 0x3c, 0xc0, 0x35, 0xcf, 0x81, 0x4b, 0x79, 0x15, 0x2d
db 0xda, 0x5a, 0x7f, 0xef, 0xe1, 0x13, 0x75, 0xef, 0xad, 0x80, 0xf7, 0x4e, 0xa1, 0xfd, 0x5d, 0xf8
db 0x67, 0xc4, 0x4a, 0xe5, 0x9f, 0x28, 0xe8, 0x82, 0xe5, 0xae, 0xac, 0xef, 0xb9, 0x4b, 0xca, 0x44
db 0x9b, 0xdc, 0xf2, 0xd2, 0x57, 0xa6, 0x9c, 0x5e, 0xbb, 0xd5, 0x4e, 0x31, 0xa8, 0xfa, 0x32, 0x26
db 0x4d, 0x46, 0x0d, 0xcb, 0xcf, 0x9e, 0x1a, 0xa3, 0x50, 0x69, 0x06, 0x34, 0xd8, 0xf9, 0x5d, 0xaf
db 0x4f, 0xc5, 0x2b, 0xe0, 0x8b, 0x81, 0x76, 0xc5, 0xce, 0x74, 0x4b, 0xba, 0x02, 0xba, 0xd4, 0x8b
db 0x91, 0xc5, 0x54, 0x28, 0x41, 0x77, 0x5f, 0xaa, 0x83, 0x14, 0xba, 0xf7, 0x8c, 0x51, 0xe2, 0xea
db 0xcf, 0x71, 0x40, 0x2f, 0x07, 0x82, 0xed, 0x81, 0x8c, 0x4f, 0xd3, 0x28, 0xce, 0x5c, 0x16, 0x72
db 0x4f, 0xcc, 0xe8, 0x49, 0x04, 0xae, 0x11, 0xe4, 0xbb, 0x18, 0x6d, 0xc4, 0xb3, 0x30, 0x20, 0xf7
db 0x0c, 0xe8, 0x50, 0x68, 0x37, 0xe9, 0x48, 0x76, 0x8b, 0x33, 0xea, 0xbb, 0x0d, 0x52, 0xbb, 0xab
db 0x6a, 0x62, 0xf4, 0x06, 0xbe, 0x5d, 0x7e, 0x89, 0xfb, 0xca, 0x75, 0xe6, 0xf7, 0x27, 0x59, 0x7c
db 0x91, 0x1a, 0xa3, 0xbb, 0x5a, 0x72, 0xd4, 0x3c, 0x19, 0xa8, 0x00, 0x74, 0x12, 0x24, 0x1c, 0x49
db 0xd9, 0x87, 0xd2, 0xc2, 0x98, 0x63, 0x7a, 0x0e, 0x5c, 0x39, 0x6b, 0x44, 0x29, 0xee, 0xef, 0x18
db 0xd1, 0xab, 0x15, 0xe0, 0x79, 0x50, 0x2a, 0x21, 0xc2, 0x67, 0x21, 0xab, 0x3c, 0x54, 0xc6, 0xf4
db 0xc6, 0x67, 0x96, 0x38, 0x37, 0xb6, 0xd2, 0x1b, 0xf0, 0xf6, 0xf8, 0x03, 0x60, 0x1c, 0xd8, 0x28
db 0xb2, 0x47, 0x53, 0x97, 0xf3, 0x1e, 0xd0, 0xcd, 0xd8, 0x30, 0xad, 0x24, 0xe7, 0xd9, 0x38, 0x97
db 0xb4, 0xc3, 0x1b, 0xb3, 0xea, 0x28, 0x40, 0x99, 0x29, 0x3e, 0x8f, 0x30, 0xb2, 0x55, 0xca, 0x7b


================================================
FILE: unittests/ASM/jump.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x1"
  }
}
%endif

mov esi, 50

.jump_start:
mov edi, 1
test edi, edi
nop
nop
nop
nop
nop
nop
nop
nop

jz .local
mov eax, 1
jmp .end

.local:
mov eax, 0

.end:
sub esi, 1
test esi, esi
jz .jump_start
hlt


================================================
FILE: unittests/ASM/lea.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0x1BD5B7DDE",
    "RBX": "0x0DEADBF18"
  }
}
%endif

mov r15, 0xDEADBEEF
mov r14, 0x5

lea rax, [r15*2]
lea rbx, [r15+r14*8 + 1]

hlt


================================================
FILE: unittests/ASM/modrm_oob/DDD.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; DDD
r3 pi2fw, 8, mm0
r3 pi2fd, 8, mm0
r3 pf2iw, 8, mm0
r3 pf2id, 8, mm0
r3 pfrcpv, 8, mm0
r3 pfrsqrtv, 8, mm0
r3 pfnacc, 8, mm0
r3 pfpnacc, 8, mm0
r3 pfcmpge, 8, mm0
r3 pfmin, 8, mm0
r3 pfrcp, 8, mm0
r3 pfrsqrt, 8, mm0
r3 pfsub, 8, mm0
r3 pfadd, 8, mm0
r3 pfcmpgt, 8, mm0
r3 pfmax, 8, mm0
r3 pfrcpit1, 8, mm0
r3 pfrsqit1, 8, mm0
r3 pfsubr, 8, mm0
r3 pfacc, 8, mm0
r3 pfcmpeq, 8, mm0
r3 pfmul, 8, mm0
r3 pfrcpit2, 8, mm0
; Nasm doesn't understand this instruction.
; r3 pmulhrw, 8, mm0
r3 pswapd, 8, mm0
r3 pavgusb, 8, mm0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/H0F38.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; H0F38
r3 pshufb, 8, mm0
r3 pshufb, 16, xmm0

r3 phaddw, 8, mm0
r3 phaddw, 16, xmm0

r3 phaddd, 8, mm0
r3 phaddd, 16, xmm0

r3 phaddsw, 8, mm0
r3 phaddsw, 16, xmm0

r3 pmaddubsw, 8, mm0
r3 pmaddubsw, 16, xmm0

r3 phsubw, 8, mm0
r3 phsubw, 16, xmm0

r3 phsubd, 8, mm0
r3 phsubd, 16, xmm0

r3 phsubsw, 8, mm0
r3 phsubsw, 16, xmm0

r3 psignb, 8, mm0
r3 psignb, 16, xmm0

r3 psignw, 8, mm0
r3 psignw, 16, xmm0

r3 psignd, 8, mm0
r3 psignd, 16, xmm0

r3 pmulhrsw, 8, mm0
r3 pmulhrsw, 16, xmm0

r3 pblendvb, 16, xmm0
r3 blendvps, 16, xmm0
r3 blendvpd, 16, xmm0
r3 ptest, 16, xmm0

r3 pabsb, 8, mm0
r3 pabsb, 16, xmm0

r3 pabsw, 8, mm0
r3 pabsw, 16, xmm0

r3 pabsd, 8, mm0
r3 pabsd, 16, xmm0

r3 pmovsxbw, 16, xmm0
r3 pmovsxbd, 16, xmm0
r3 pmovsxbq, 16, xmm0
r3 pmovsxwd, 16, xmm0
r3 pmovsxwq, 16, xmm0
r3 pmovsxdq, 16, xmm0
r3 pmuldq, 16, xmm0
r3 pcmpeqq, 16, xmm0
r3 movntdqa, 16, xmm0
r3 packusdw, 16, xmm0
r3 pmovzxbw, 16, xmm0
r3 pmovzxbd, 16, xmm0
r3 pmovzxbq, 16, xmm0
r3 pmovzxwd, 16, xmm0
r3 pmovzxwq, 16, xmm0
r3 pmovzxdq, 16, xmm0
r3 pcmpgtq, 16, xmm0
r3 pminsb, 16, xmm0
r3 pminsd, 16, xmm0
r3 pminuw, 16, xmm0
r3 pminud, 16, xmm0
r3 pmaxsb, 16, xmm0
r3 pmaxsd, 16, xmm0
r3 pmaxuw, 16, xmm0
r3 pmaxud, 16, xmm0
r3 pmulld, 16, xmm0
r3 sha1nexte, 16, xmm0
r3 sha1msg1, 16, xmm0
r3 sha1msg2, 16, xmm0
r3 sha256rnds2, 16, xmm0
r3 sha256msg1, 16, xmm0
r3 sha256msg2, 16, xmm0
r3 aesimc, 16, xmm0
r3 aesenc, 16, xmm0
r3 aesenclast, 16, xmm0
r3 aesdec, 16, xmm0
r3 aesdeclast, 16, xmm0

rw3 movbe, 2, ax
rw3 movbe, 4, eax
rw3 movbe, 8, rax

r4_size crc32, 1, byte, eax
r4_size crc32, 2, word, eax
r4_size crc32, 4, dword, eax
r4_size crc32, 8, qword, rax

r3 adcx, 4, eax
r3 adcx, 8, rax

r3 adox, 4, eax
r3 adox, 8, rax

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/H0F3A.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; H0F3A
w4 pextrq, 8, xmm0, 0
r4 pinsrq, 8, xmm0, 0
w4 pextrd, 4, xmm0, 0
r4 pinsrd, 4, xmm0, 0

r4 palignr, 8, mm0, 0
r4 palignr, 16, xmm0, 0
r4 roundps, 16, xmm0, 0
r4 roundpd, 16, xmm0, 0
r4 roundpd, 16, xmm0, 0
r4 roundss, 4, xmm0, 0
r4 roundsd, 8, xmm0, 0
r4 blendps, 16, xmm0, 0
r4 blendpd, 16, xmm0, 0
r4 pblendw, 16, xmm0, 0
r4 palignr, 8, mm0, 0
r4 palignr, 16, xmm0, 0
w4 pextrb, 1, xmm0, 0
w4 pextrw, 2, xmm0, 0
w4 extractps, 4, xmm0, 0
r4 pinsrb, 1, xmm0, 0
r4 insertps, 4, xmm0, 0
r4 dpps, 16, xmm0, 0
r4 dppd, 16, xmm0, 0
r4 mpsadbw, 16, xmm0, 0
r4 pclmulqdq, 16, xmm0, 0
r4 pcmpestrm, 16, xmm0, 0
r4 pcmpestri, 16, xmm0, 0
r4 pcmpistrm, 16, xmm0, 0
r4 pcmpistri, 16, xmm0, 0
r4 sha1rnds4, 16, xmm0, 0
r4 aeskeygenassist, 16, xmm0, 0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/Primary.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Primary table
rw3 add, 8, rax
rw3 add, 4, eax
rw3 add, 2, ax
rw3 add, 1, al

w3 lock add, 8, rax
w3 lock add, 4, eax
w3 lock add, 2, ax
w3 lock add, 1, al

rw3 or, 8, rax
rw3 or, 4, eax
rw3 or, 2, ax
rw3 or, 1, al

w3 lock or, 8, rax
w3 lock or, 4, eax
w3 lock or, 2, ax
w3 lock or, 1, al

rw3 adc, 8, rax
rw3 adc, 4, eax
rw3 adc, 2, ax
rw3 adc, 1, al

w3 lock adc, 8, rax
w3 lock adc, 4, eax
w3 lock adc, 2, ax
w3 lock adc, 1, al

rw3 sbb, 8, rax
rw3 sbb, 4, eax
rw3 sbb, 2, ax
rw3 sbb, 1, al

w3 lock sbb, 8, rax
w3 lock sbb, 4, eax
w3 lock sbb, 2, ax
w3 lock sbb, 1, al

rw3 and, 8, rax
rw3 and, 4, eax
rw3 and, 2, ax
rw3 and, 1, al

w3 lock and, 8, rax
w3 lock and, 4, eax
w3 lock and, 2, ax
w3 lock and, 1, al

rw3 xor, 8, rax
rw3 xor, 4, eax
rw3 xor, 2, ax
rw3 xor, 1, al

w3 lock xor, 8, rax
w3 lock xor, 4, eax
w3 lock xor, 2, ax
w3 lock xor, 1, al

rw3 cmp, 8, rax
rw3 cmp, 4, eax
rw3 cmp, 2, ax
rw3 cmp, 1, al

r4 imul, 8, rax, 4
r4 imul, 4, eax, 4
r4 imul, 2, ax, 4

r4 imul, 8, rax, 0x1004
r4 imul, 4, eax, 0x1004
r4 imul, 2, ax, 0x1004

rw3 test, 8, rax
rw3 test, 4, eax
rw3 test, 2, ax
rw3 test, 1, al

rw3 xchg, 8, rax
rw3 xchg, 4, eax
rw3 xchg, 2, ax
rw3 xchg, 1, al

rw3 mov, 8, rax
rw3 mov, 4, eax
rw3 mov, 2, ax
rw3 mov, 1, al

r3 movsxd, 4, rax

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/PrimaryGroup.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Primary Group

w4_size add, 1, byte, 1
w4_size add, 2, word, 1
w4_size add, 4, dword, 1
w4_size add, 8, qword, 1

w4_size lock add, 1, byte, 1
w4_size lock add, 2, word, 1
w4_size lock add, 4, dword, 1
w4_size lock add, 8, qword, 1

w4_size or, 1, byte, 1
w4_size or, 2, word, 1
w4_size or, 4, dword, 1
w4_size or, 8, qword, 1

w4_size lock or, 1, byte, 1
w4_size lock or, 2, word, 1
w4_size lock or, 4, dword, 1
w4_size lock or, 8, qword, 1

w4_size adc, 1, byte, 1
w4_size adc, 2, word, 1
w4_size adc, 4, dword, 1
w4_size adc, 8, qword, 1

w4_size lock adc, 1, byte, 1
w4_size lock adc, 2, word, 1
w4_size lock adc, 4, dword, 1
w4_size lock adc, 8, qword, 1

w4_size sbb, 1, byte, 1
w4_size sbb, 2, word, 1
w4_size sbb, 4, dword, 1
w4_size sbb, 8, qword, 1

w4_size lock sbb, 1, byte, 1
w4_size lock sbb, 2, word, 1
w4_size lock sbb, 4, dword, 1
w4_size lock sbb, 8, qword, 1

w4_size and, 1, byte, 1
w4_size and, 2, word, 1
w4_size and, 4, dword, 1
w4_size and, 8, qword, 1

w4_size lock and, 1, byte, 1
w4_size lock and, 2, word, 1
w4_size lock and, 4, dword, 1
w4_size lock and, 8, qword, 1

w4_size sub, 1, byte, 1
w4_size sub, 2, word, 1
w4_size sub, 4, dword, 1
w4_size sub, 8, qword, 1

w4_size lock sub, 1, byte, 1
w4_size lock sub, 2, word, 1
w4_size lock sub, 4, dword, 1
w4_size lock sub, 8, qword, 1

w4_size xor, 1, byte, 1
w4_size xor, 2, word, 1
w4_size xor, 4, dword, 1
w4_size xor, 8, qword, 1

w4_size lock xor, 1, byte, 1
w4_size lock xor, 2, word, 1
w4_size lock xor, 4, dword, 1
w4_size lock xor, 8, qword, 1

w4_size cmp, 1, byte, 1
w4_size cmp, 2, word, 1
w4_size cmp, 4, dword, 1
w4_size cmp, 8, qword, 1

w4_size rol, 1, byte, 1
w4_size rol, 2, word, 1
w4_size rol, 4, dword, 1
w4_size rol, 8, qword, 1

w4_size ror, 1, byte, 1
w4_size ror, 2, word, 1
w4_size ror, 4, dword, 1
w4_size ror, 8, qword, 1

w4_size rcl, 1, byte, 1
w4_size rcl, 2, word, 1
w4_size rcl, 4, dword, 1
w4_size rcl, 8, qword, 1

w4_size rcr, 1, byte, 1
w4_size rcr, 2, word, 1
w4_size rcr, 4, dword, 1
w4_size rcr, 8, qword, 1

w4_size shl, 1, byte, 1
w4_size shl, 2, word, 1
w4_size shl, 4, dword, 1
w4_size shl, 8, qword, 1

w4_size shr, 1, byte, 1
w4_size shr, 2, word, 1
w4_size shr, 4, dword, 1
w4_size shr, 8, qword, 1

w4_size sar, 1, byte, 1
w4_size sar, 2, word, 1
w4_size sar, 4, dword, 1
w4_size sar, 8, qword, 1

w4_size test, 1, byte, 1
w4_size test, 2, word, 1
w4_size test, 4, dword, 1
w4_size test, 8, qword, 1

w3_size not, 1, byte
w3_size not, 2, word
w3_size not, 4, dword
w3_size not, 8, qword

w3_size lock not, 1, byte
w3_size lock not, 2, word
w3_size lock not, 4, dword
w3_size lock not, 8, qword

w3_size neg, 1, byte
w3_size neg, 2, word
w3_size neg, 4, dword
w3_size neg, 8, qword

w3_size lock neg, 1, byte
w3_size lock neg, 2, word
w3_size lock neg, 4, dword
w3_size lock neg, 8, qword

w3_size mul, 1, byte
w3_size mul, 2, word
w3_size mul, 4, dword
w3_size mul, 8, qword

w3_size imul, 1, byte
w3_size imul, 2, word
w3_size imul, 4, dword
w3_size imul, 8, qword

w3_size div, 1, byte
w3_size div, 2, word
w3_size div, 4, dword
w3_size div, 8, qword

w3_size idiv, 1, byte
w3_size idiv, 2, word
w3_size idiv, 4, dword
w3_size idiv, 8, qword

w3_size inc, 1, byte
w3_size inc, 2, word
w3_size inc, 4, dword
w3_size inc, 8, qword

w3_size lock inc, 1, byte
w3_size lock inc, 2, word
w3_size lock inc, 4, dword
w3_size lock inc, 8, qword

w3_size dec, 1, byte
w3_size dec, 2, word
w3_size dec, 4, dword
w3_size dec, 8, qword

w3_size lock dec, 1, byte
w3_size lock dec, 2, word
w3_size lock dec, 4, dword
w3_size lock dec, 8, qword

w4_size mov, 1, byte, 1
w4_size mov, 2, word, 1
w4_size mov, 4, dword, 1
w4_size mov, 8, qword, 1

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/Secondary.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary table
rw3 movups, 16, xmm0
rw3 movlps, 8, xmm0
r3 unpcklps, 16, xmm0
r3 unpckhps, 16, xmm0
rw3 movhps, 8, xmm0
rw3 movaps, 16, xmm0
r3 cvtpi2ps, 8, xmm0
w3 movntps, 16, xmm0
r3 cvttps2pi, 8, mm0
r3 cvtps2pi, 8, mm0
r3 ucomiss, 4, xmm0
r3 comiss, 4, xmm0

r3 cmovo, 8, rax
r3 cmovo, 4, eax
r3 cmovo, 2, ax

r3 cmovno, 8, rax
r3 cmovno, 4, eax
r3 cmovno, 2, ax

r3 cmovb, 8, rax
r3 cmovb, 4, eax
r3 cmovb, 2, ax

r3 cmovnb, 8, rax
r3 cmovnb, 4, eax
r3 cmovnb, 2, ax

r3 cmovz, 8, rax
r3 cmovz, 4, eax
r3 cmovz, 2, ax

r3 cmovnz, 8, rax
r3 cmovnz, 4, eax
r3 cmovnz, 2, ax

r3 cmovbe, 8, rax
r3 cmovbe, 4, eax
r3 cmovbe, 2, ax

r3 cmovnbe, 8, rax
r3 cmovnbe, 4, eax
r3 cmovnbe, 2, ax

r3 cmovs, 8, rax
r3 cmovs, 4, eax
r3 cmovs, 2, ax

r3 cmovns, 8, rax
r3 cmovns, 4, eax
r3 cmovns, 2, ax

r3 cmovp, 8, rax
r3 cmovp, 4, eax
r3 cmovp, 2, ax

r3 cmovnp, 8, rax
r3 cmovnp, 4, eax
r3 cmovnp, 2, ax

r3 cmovl, 8, rax
r3 cmovl, 4, eax
r3 cmovl, 2, ax

r3 cmovnl, 8, rax
r3 cmovnl, 4, eax
r3 cmovnl, 2, ax

r3 cmovle, 8, rax
r3 cmovle, 4, eax
r3 cmovle, 2, ax

r3 cmovnle, 8, rax
r3 cmovnle, 4, eax
r3 cmovnle, 2, ax

r3 sqrtps, 16, xmm0
r3 rsqrtps, 16, xmm0
r3 rcpps, 16, xmm0
r3 andps, 16, xmm0
r3 andnps, 16, xmm0
r3 orps, 16, xmm0
r3 xorps, 16, xmm0
r3 addps, 16, xmm0
r3 mulps, 16, xmm0
r3 cvtps2pd, 8, xmm0
r3 cvtdq2ps, 16, xmm0
r3 subps, 16, xmm0
r3 minps, 16, xmm0
r3 divps, 16, xmm0
r3 maxps, 16, xmm0
r3 punpcklbw, 16, xmm0
r3 punpcklwd, 16, xmm0
r3 punpckldq, 16, xmm0
r3 packsswb, 16, xmm0
r3 pcmpgtb, 16, xmm0
r3 pcmpgtw, 16, xmm0
r3 pcmpgtd, 16, xmm0
r3 packuswb, 16, xmm0
r3 punpckhbw, 16, xmm0
r3 punpckhwd, 16, xmm0
r3 punpckhdq, 16, xmm0
r3 packssdw, 16, xmm0

rw3 movd, 4, mm0
rw3 movq, 8, mm0
r4 pshufw, 8, mm0, 0
r3 pcmpeqb, 8, mm0
r3 pcmpeqw, 8, mm0
r3 pcmpeqd, 8, mm0

rw3 movd, 4, xmm0
rw3 movq, 8, xmm0

w2 seto, 1
w2 setno, 1
w2 setb, 1
w2 setnb, 1
w2 setz, 1
w2 setnz, 1
w2 setbe, 1
w2 setnbe, 1
w2 sets, 1
w2 setns, 1
w2 setp, 1
w2 setnp, 1
w2 setl, 1
w2 setnl, 1
w2 setle, 1
w2 setnle, 1

mov rax, 0
w3 bt, 2, ax
w3 bt, 4, eax
w3 bt, 8, rax

w4 shld, 2, ax, 1
w4 shld, 4, eax, 1
w4 shld, 8, rax, 1

mov cl, 1
w4 shld, 2, ax, cl
w4 shld, 4, eax, cl
w4 shld, 8, rax, cl

mov rax, 0
w3 bts, 2, ax
w3 bts, 4, eax
w3 bts, 8, rax

w4 shrd, 2, ax, 1
w4 shrd, 4, eax, 1
w4 shrd, 8, rax, 1

mov cl, 1
w4 shrd, 2, ax, cl
w4 shrd, 4, eax, cl
w4 shld, 8, rax, cl

r3 imul, 8, rax
r3 imul, 4, eax
r3 imul, 2, ax

w3 cmpxchg, 8, rax
w3 cmpxchg, 4, eax
w3 cmpxchg, 2, ax
w3 cmpxchg, 1, al

mov rax, 0
w3 btr, 2, ax
w3 btr, 4, eax
w3 btr, 8, rax

; MOVZX is a bit special
movzx rax, byte [r15 - 1]
movzx rax, byte [r14]
movzx rax, word [r15 - 2]
movzx rax, word [r14]

mov rax, 0
w3 btc, 2, ax
w3 btc, 4, eax
w3 btc, 8, rax

r3 bsf, 2, ax
r3 bsf, 4, eax
r3 bsf, 8, rax

r3 bsr, 2, ax
r3 bsr, 4, eax
r3 bsr, 8, rax

; MOVSX is a bit special
movsx rax, byte [r15 - 1]
movsx rax, byte [r14]
movsx rax, word [r15 - 2]
movsx rax, word [r14]

w3 xadd, 1, al
w3 xadd, 2, ax
w3 xadd, 4, eax
w3 xadd, 8, rax

w3 lock xadd, 1, al
w3 lock xadd, 2, ax
w3 lock xadd, 4, eax
w3 lock xadd, 8, rax


r4 cmpps, 16, xmm0, 0
r4 cmpps, 16, xmm0, 1
r4 cmpps, 16, xmm0, 2
r4 cmpps, 16, xmm0, 3
r4 cmpps, 16, xmm0, 4
r4 cmpps, 16, xmm0, 5
r4 cmpps, 16, xmm0, 6
r4 cmpps, 16, xmm0, 7

w3 movnti, 4, eax
w3 movnti, 8, rax

r4 pinsrw, 2, xmm0, 0
r4 shufps, 16, xmm0, 0

r3 psrlw, 8, mm0
r3 psrld, 8, mm0
r3 psrlq, 8, mm0
r3 paddq, 8, mm0
r3 pmullw, 8, mm0
r3 psubusb, 8, mm0
r3 psubusw, 8, mm0
r3 pminub, 8, mm0
r3 pand, 8, mm0
r3 paddusb, 8, mm0
r3 paddusw, 8, mm0
r3 pmaxub, 8, mm0
r3 pandn, 8, mm0
r3 pavgb, 8, mm0
r3 psraw, 8, mm0
r3 psrad, 8, mm0
r3 pavgw, 8, mm0
r3 pmulhuw, 8, mm0
r3 pmulhw, 8, mm0

w3 movntq, 8, mm0


r3 psubsb, 8, mm0
r3 psubsw, 8, mm0
r3 pminsw, 8, mm0
r3 por, 8, mm0
r3 paddsb, 8, mm0
r3 paddsw, 8, mm0
r3 pmaxsw, 8, mm0
r3 pxor, 8, mm0
r3 psllw, 8, mm0
r3 pslld, 8, mm0
r3 psllq, 8, mm0
r3 pmuludq, 8, mm0
r3 pmaddwd, 8, mm0
r3 psadbw, 8, mm0
r3 psubb, 8, mm0
r3 psubw, 8, mm0
r3 psubd, 8, mm0
r3 psubq, 8, mm0
r3 paddb, 8, mm0
r3 paddw, 8, mm0
r3 paddd, 8, mm0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/SecondaryGroup.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary Group
; w2 sldt, 2
; w2 str, 2
; w2 verr, 2
; w2 verw, 2
; SGDT
; SIDT
w2 smsw, 2

w4_size bt, 2, word, 0
w4_size bt, 4, dword, 0
w4_size bt, 8, qword, 0

w4_size bts, 2, word, 0
w4_size bts, 4, dword, 0
w4_size bts, 8, qword, 0

w4_size btr, 2, word, 0
w4_size btr, 4, dword, 0
w4_size btr, 8, qword, 0

w4_size btc, 2, word, 0
w4_size btc, 4, dword, 0
w4_size btc, 8, qword, 0

w2 cmpxchg8b, 8
w2 cmpxchg16b, 16

w2 fxsave, 512
r2 fxrstor, 512

w2 stmxcsr, 4
r2 ldmxcsr, 4

; XSAVE/XRSTOR size is variable and can't be tested here.

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/SecondaryModRM.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary ModRM

; clzero is a bit special
lea rax, [r15 - 64]
clzero rax

lea rax, [r14]
clzero rax

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/SecondaryOpSize.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary Opsize
rw3 movupd, 16, xmm0
rw3 movlpd, 8, xmm0
r3 unpcklpd, 16, xmm0
r3 unpckhpd, 16, xmm0
rw3 movhpd, 8, xmm0
rw3 movapd, 16, xmm0
r3 cvtpi2pd, 8, xmm0
w3 movntpd, 16, xmm0
r3 cvttpd2pi, 16, mm0
r3 cvtpd2pi, 16, mm0
r3 ucomisd, 8, xmm0
r3 comisd, 8, xmm0

r3 sqrtpd, 16, xmm0
r3 andpd, 16, xmm0
r3 andnpd, 16, xmm0
r3 orpd, 16, xmm0
r3 xorpd, 16, xmm0
r3 addpd, 16, xmm0
r3 mulpd, 16, xmm0
r3 cvtpd2ps, 16, xmm0
r3 cvtps2dq, 16, xmm0
r3 subpd, 16, xmm0
r3 minpd, 16, xmm0
r3 divpd, 16, xmm0
r3 maxpd, 16, xmm0

r3 punpcklbw, 16, xmm0
r3 punpcklwd, 16, xmm0
r3 punpckldq, 16, xmm0
r3 packsswb, 16, xmm0
r3 pcmpgtb, 16, xmm0
r3 pcmpgtw, 16, xmm0
r3 pcmpgtd, 16, xmm0
r3 packuswb, 16, xmm0
r3 punpckhbw, 16, xmm0
r3 punpckhwd, 16, xmm0
r3 punpckhdq, 16, xmm0
r3 packssdw, 16, xmm0
r3 punpcklqdq, 16, xmm0
r3 punpckhqdq, 16, xmm0

rw3 movdqa, 16, xmm0

r4 pshufd, 16, xmm0, 1

r3 pcmpeqb, 16, xmm0
r3 pcmpeqw, 16, xmm0
r3 pcmpeqd, 16, xmm0
r3 haddpd, 16, xmm0
r3 hsubpd, 16, xmm0

r4 cmppd, 16, xmm0, 0
r4 cmppd, 16, xmm0, 1
r4 cmppd, 16, xmm0, 2
r4 cmppd, 16, xmm0, 3
r4 cmppd, 16, xmm0, 4
r4 cmppd, 16, xmm0, 5
r4 cmppd, 16, xmm0, 6
r4 cmppd, 16, xmm0, 7

r4 pinsrw, 2, xmm0, 0
w4 pextrw, 2, xmm0, 0

r4 shufpd, 16, xmm0, 0
r3 addsubpd, 16, xmm0
r3 psrlw, 16, xmm0
r3 psrld, 16, xmm0
r3 psrlq, 16, xmm0
r3 paddq, 16, xmm0
r3 psubusb, 16, xmm0
r3 psubusw, 16, xmm0
r3 pminub, 16, xmm0
r3 pand, 16, xmm0
r3 paddusb, 16, xmm0
r3 paddusw, 16, xmm0
r3 pmaxub, 16, xmm0
r3 pandn, 16, xmm0

r3 pavgb, 16, xmm0
r3 psraw, 16, xmm0
r3 psrad, 16, xmm0
r3 pavgw, 16, xmm0
r3 pmulhuw, 16, xmm0
r3 pmulhw, 16, xmm0
r3 cvttpd2dq, 16, xmm0
w3 movntdq, 16, xmm0
r3 psubsb, 16, xmm0
r3 pminsw, 16, xmm0
r3 por, 16, xmm0
r3 paddsb, 16, xmm0
r3 paddsw, 16, xmm0
r3 pmaxsw, 16, xmm0
r3 pxor, 16, xmm0
r3 psllw, 16, xmm0
r3 pslld, 16, xmm0
r3 psllq, 16, xmm0
r3 pmuludq, 16, xmm0
r3 pmaddwd, 16, xmm0
r3 psadbw, 16, xmm0
r3 psubb, 16, xmm0
r3 psubw, 16, xmm0
r3 psubd, 16, xmm0
r3 psubq, 16, xmm0
r3 paddb, 16, xmm0
r3 paddw, 16, xmm0
r3 paddd, 16, xmm0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/SecondaryREP.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary REP
rw3 movss, 4, xmm0
r3 movsldup, 16, xmm0
r3 movshdup, 16, xmm0

; cvtsi2ss is a bit special
cvtsi2ss xmm0, dword [r15 - 4]
cvtsi2ss xmm0, dword [r14]

cvtsi2ss xmm0, qword [r15 - 8]
cvtsi2ss xmm0, qword [r14]

w3 movntss, 4, xmm0

r3 cvttss2si, 4, eax
r3 cvttss2si, 8, rax

r3 cvtss2si, 4, eax
r3 cvtss2si, 8, rax

r3 sqrtss, 4, xmm0
r3 rsqrtss, 4, xmm0
r3 rcpss, 4, xmm0
r3 addss, 4, xmm0
r3 mulss, 4, xmm0
r3 cvtss2sd, 4, xmm0
r3 cvttps2dq, 16, xmm0
r3 subss, 4, xmm0
r3 minss, 4, xmm0
r3 divss, 4, xmm0
r3 maxss, 4, xmm0

rw3 movdqu, 16, xmm0
r4 pshufhw, 16, xmm0, 0
rw3 movq, 8, xmm0

r3 popcnt, 2, ax
r3 popcnt, 4, eax
r3 popcnt, 8, rax

r3 tzcnt, 2, ax
r3 tzcnt, 4, eax
r3 tzcnt, 8, rax

r3 lzcnt, 2, ax
r3 lzcnt, 4, eax
r3 lzcnt, 8, rax

r4 cmpss, 4, xmm0, 0
r4 cmpss, 4, xmm0, 1
r4 cmpss, 4, xmm0, 2
r4 cmpss, 4, xmm0, 3
r4 cmpss, 4, xmm0, 4
r4 cmpss, 4, xmm0, 5
r4 cmpss, 4, xmm0, 6
r4 cmpss, 4, xmm0, 7

r3 cvtdq2pd, 8, xmm0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/SecondaryREPNE.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; Secondary REPNE
rw3 movsd, 8, xmm0
r3 movddup, 8, xmm0

; cvtsi2sd is a bit special
cvtsi2sd xmm0, dword [r15 - 4]
cvtsi2sd xmm0, dword [r14]

cvtsi2sd xmm0, qword [r15 - 8]
cvtsi2sd xmm0, qword [r14]

w3 movntsd, 8, xmm0

r3 cvttsd2si, 8, rax
r3 cvtsd2si, 8, rax

r3 sqrtsd, 8, xmm0
r3 addsd, 8, xmm0
r3 mulsd, 8, xmm0
r3 cvtsd2ss, 8, xmm0
r3 subsd, 8, xmm0
r3 minsd, 8, xmm0
r3 divsd, 8, xmm0
r3 maxsd, 8, xmm0

r4 pshuflw, 16, xmm0, 0
r3 haddps, 16, xmm0
r3 hsubps, 16, xmm0

r4 cmpsd, 8, xmm0, 0
r4 cmpsd, 8, xmm0, 1
r4 cmpsd, 8, xmm0, 2
r4 cmpsd, 8, xmm0, 3
r4 cmpsd, 8, xmm0, 4
r4 cmpsd, 8, xmm0, 5
r4 cmpsd, 8, xmm0, 6
r4 cmpsd, 8, xmm0, 7

r3 addsubps, 16, xmm0
r3 cvtpd2dq, 16, xmm0
r3 lddqu, 16, xmm0

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/VEX.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; VEX map 1
%macro r_avx 1
r3 %1, 16, xmm0
r3 %1, 32, ymm0
%endmacro

%macro r_avx_2_reg 1
r4 %1, 16, xmm0, xmm1
r4 %1, 32, ymm0, ymm1
%endmacro

%macro r_avx_fma 1
r4_fma %1, 16, xmm0, xmm1
r4_fma %1, 32, ymm0, ymm2
%endmacro

%macro r2_avx 2
r4 %1, 16, xmm0, %2
r4 %1, 32, ymm0, %2
%endmacro

%macro w_avx 1
w3 %1, 16, xmm0
w3 %1, 32, ymm0
%endmacro

%macro rw_avx 1
r_avx %1
w_avx %1
%endmacro

; VEX
rw_avx vmovups
rw_avx vmovupd
rw3 vmovss, 4, xmm0
rw3 vmovsd, 8, xmm0

rw3 vmovlps, 8, xmm0
rw3 vmovlpd, 8, xmm0

r_avx vmovsldup
r_avx vmovddup

r_avx vunpcklps
r_avx vunpcklpd
r_avx vunpckhps
r_avx vunpckhpd

rw3 vmovhps, 8, xmm0
rw3 vmovhpd, 8, xmm0

r_avx vmovshdup
r_avx vsqrtps
r_avx vsqrtpd

r3 vsqrtss, 4, xmm0
r3 vsqrtsd, 8, xmm0

r_avx vrsqrtps
r3 vrsqrtss, 4, xmm0

r_avx vrcpps
r3 vrcpss, 4, xmm0

r_avx vandps
r_avx vandpd
r_avx vandnps
r_avx vandnpd
r_avx vorps
r_avx vorpd
r_avx vxorps
r_avx vxorpd
r_avx vpunpcklbw
r_avx vpunpcklwd
r_avx vpunpckldq

r_avx vpacksswb
r_avx vpcmpgtb
r_avx vpcmpgtw
r_avx vpcmpgtd
r_avx vpackuswb

r2_avx vpshufd, 0
r2_avx vpshufhw, 0
r2_avx vpshuflw, 0

r_avx vpcmpeqb
r_avx vpcmpeqw
r_avx vpcmpeqd

r2_avx vcmpps, 0
r2_avx vcmpps, 1
r2_avx vcmpps, 2
r2_avx vcmpps, 3
r2_avx vcmpps, 4
r2_avx vcmpps, 5
r2_avx vcmpps, 6
r2_avx vcmpps, 7

r2_avx vcmppd, 0
r2_avx vcmppd, 1
r2_avx vcmppd, 2
r2_avx vcmppd, 3
r2_avx vcmppd, 4
r2_avx vcmppd, 5
r2_avx vcmppd, 6
r2_avx vcmppd, 7

r4 vcmpss, 4, xmm0, 0
r4 vcmpss, 4, xmm0, 1
r4 vcmpss, 4, xmm0, 2
r4 vcmpss, 4, xmm0, 3
r4 vcmpss, 4, xmm0, 4
r4 vcmpss, 4, xmm0, 5
r4 vcmpss, 4, xmm0, 6
r4 vcmpss, 4, xmm0, 7

r4 vcmpsd, 8, xmm0, 0
r4 vcmpsd, 8, xmm0, 1
r4 vcmpsd, 8, xmm0, 2
r4 vcmpsd, 8, xmm0, 3
r4 vcmpsd, 8, xmm0, 4
r4 vcmpsd, 8, xmm0, 5
r4 vcmpsd, 8, xmm0, 6
r4 vcmpsd, 8, xmm0, 7

r4 vpinsrw, 2, xmm0, 0
w4 vpextrw, 2, xmm0, 0

r2_avx vshufps, 0
r2_avx vshufpd, 0

rw_avx vmovaps
rw_avx vmovapd

r4_size vcvtsi2ss, 4, dword, xmm0
r4_size vcvtsi2ss, 8, qword, xmm0

r4_size vcvtsi2sd, 4, dword, xmm0
r4_size vcvtsi2sd, 8, qword, xmm0

w_avx vmovntps
w_avx vmovntpd

r4_size vcvttss2si, 4, dword, eax
r4_size vcvttss2si, 4, dword, rax

r4_size vcvttsd2si, 8, qword, eax
r4_size vcvttsd2si, 8, qword, rax

r4_size vcvtss2si, 4, dword, eax
r4_size vcvtss2si, 4, dword, rax

r4_size vcvtsd2si, 8, qword, eax
r4_size vcvtsd2si, 8, qword, rax

r4_size vucomiss, 4, dword, xmm0
r4_size vucomisd, 8, qword, xmm0

r4_size vcomiss, 4, dword, xmm0
r4_size vcomisd, 8, qword, xmm0

r_avx vaddps
r_avx vaddpd
r4_size vaddss, 4, dword, xmm0
r4_size vaddsd, 8, qword, xmm0

r_avx vmulps
r_avx vmulpd
r4_size vmulss, 4, dword, xmm0
r4_size vmulsd, 8, qword, xmm0

r4_size vcvtps2pd, 8, qword, xmm0
r4_size vcvtps2pd, 16, oword, ymm0

r4_size vcvtpd2ps, 16, oword, xmm0
r4_size vcvtpd2ps, 32, yword, xmm0

r4_size vcvtss2sd, 4, dword, xmm0
r4_size vcvtsd2ss, 8, qword, xmm0

r_avx vcvtdq2ps
r_avx vcvtps2dq
r_avx vcvttps2dq

r_avx vsubps
r_avx vsubpd
r4_size vsubss, 4, dword, xmm0
r4_size vsubsd, 8, qword, xmm0

r_avx vminps
r_avx vminpd
r4_size vminss, 4, dword, xmm0
r4_size vminsd, 8, qword, xmm0

r_avx vdivps
r_avx vdivpd
r4_size vdivss, 4, dword, xmm0
r4_size vdivsd, 8, qword, xmm0

r_avx vmaxps
r_avx vmaxpd
r4_size vmaxss, 4, dword, xmm0
r4_size vmaxsd, 8, qword, xmm0

r_avx vpunpckhbw
r_avx vpunpckhwd
r_avx vpunpckhdq
r_avx vpackssdw
r_avx vpunpcklqdq
r_avx vpunpckhqdq

rw3 vmovq, 8, xmm0
rw3 vmovd, 4, xmm0

r_avx vmovdqa
r_avx vmovdqu
r_avx vhaddpd
r_avx vhaddps
r_avx vhsubpd
r_avx vhsubps
r_avx vaddsubpd
r_avx vaddsubps

r_avx vpsrlw
r_avx vpsrld
r_avx vpsrlq
r_avx vpaddq
r_avx vpmullw
r_avx vpsubusb
r_avx vpsubusw
r_avx vpand
r_avx vpaddusb
r_avx vpmaxub
r_avx vpandn
r_avx vpavgb
r_avx vpsraw
r_avx vpsrad
r_avx vpavgw
r_avx vpmulhuw
r_avx vpmulhw
r4_size vcvttpd2dq, 16, oword, xmm0
r4_size vcvttpd2dq, 32, yword, xmm0
r_avx vcvtdq2pd
r4_size vcvtpd2dq, 16, oword, xmm0
r4_size vcvtpd2dq, 32, yword, xmm0
w_avx vmovntdq
r_avx vpsubsb
r_avx vpsubsw
r_avx vpminsw
r_avx vpor
r_avx vpaddsb
r_avx vpaddsw
r_avx vpmaxsw
r_avx vpxor
r_avx vlddqu
r_avx vpsllw
r_avx vpslld
r_avx vpsllq
r_avx vpmuludq
r_avx vpmaddwd
r_avx vpsadbw
r_avx vpsubb
r_avx vpsubw
r_avx vpsubd
r_avx vpsubq
r_avx vpaddb
r_avx vpaddw
r_avx vpaddd
r_avx vpaddq

; VEX Map 2
r_avx vpshufb
r_avx vphaddw
r_avx vphaddd
r_avx vphaddsw
r_avx vpmaddubsw
r_avx vphsubw
r_avx vphsubd
r_avx vphsubsw
r_avx vpsignb
r_avx vpsignw
r_avx vpsignd
r_avx vpsignd
r_avx vpmulhrsw
r_avx vpermilps
r_avx vpermilpd
r_avx vtestps
r_avx vtestpd

r4_size vcvtph2ps, 8, qword, xmm0
r4_size vcvtph2ps, 16, oword, ymm0

r3 vpermps, 32, ymm0
r_avx vptest
r4_size vbroadcastss, 4, dword, xmm0
r4_size vbroadcastss, 4, dword, ymm0

r4_size vbroadcastsd, 8, qword, ymm0
r3 vbroadcastf128, 16, ymm0

r_avx vpabsb
r_avx vpabsw
r_avx vpabsd

r4_size vpmovsxbw, 8, qword, xmm0
r4_size vpmovsxbw, 16, oword, ymm0

r4_size vpmovsxbd, 4, dword, xmm0
r4_size vpmovsxbd, 8, qword, ymm0

r4_size vpmovsxbq, 2, word, xmm0
r4_size vpmovsxbq, 4, dword, ymm0

r4_size vpmovsxwd, 8, qword, xmm0
r4_size vpmovsxwd, 16, oword, ymm0

r4_size vpmovsxwq, 4, dword, xmm0
r4_size vpmovsxwq, 8, qword, ymm0

r4_size vpmovsxdq, 8, qword, xmm0
r4_size vpmovsxdq, 16, oword, ymm0

r_avx vpmuldq
r_avx vpcmpeqq
r_avx vmovntdqa
r_avx vpackusdw

; VMASKMOVPS/PD is complex and can't be tested here.

r4_size vpmovzxbw, 8, qword, xmm0
r4_size vpmovzxbw, 16, oword, ymm0

r4_size vpmovzxbd, 4, dword, xmm0
r4_size vpmovzxbd, 8, qword, ymm0

r4_size vpmovzxbq, 2, word, xmm0
r4_size vpmovzxbq, 4, dword, ymm0

r4_size vpmovzxwd, 8, qword, xmm0
r4_size vpmovzxwd, 16, oword, ymm0

r4_size vpmovzxwq, 4, dword, xmm0
r4_size vpmovzxwq, 8, qword, ymm0

r4_size vpmovzxdq, 8, qword, xmm0
r4_size vpmovzxdq, 16, oword, ymm0

r3 vpermd, 32, ymm0
r_avx vpcmpgtq
r_avx vpminsb
r_avx vpminsd
r_avx vpminuw
r_avx vpminud
r_avx vpmaxsb
r_avx vpmaxsd
r_avx vpmaxuw
r_avx vpmaxud
r_avx vpmulld
r3 vphminposuw, 16, xmm0
r_avx vpsrlvd
r_avx vpsrlvq
r_avx vpsravd
r_avx vpsllvd
r_avx vpsllvq

r4_size vpbroadcastd, 4, dword, xmm0
r4_size vpbroadcastd, 4, dword, ymm0

r4_size vpbroadcastq, 8, qword, xmm0
r4_size vpbroadcastq, 8, qword, ymm0

r4_size vbroadcasti128, 16, oword, ymm0

; VPMASKMOVD/Q is complex and can't be tested here.
; V{P,}GATHER* is complex and can't be tested here.
r_avx_fma vfmaddsub132pd
r_avx_fma vfmsubadd132pd
r_avx_fma vfmaddsub132ps
r_avx_fma vfmsubadd132ps

r_avx_fma vfmadd132pd
r_avx_fma vfmadd132ps
r_avx_fma vfmsub132pd
r_avx_fma vfmsub132ps
r_avx_fma vfnmadd132pd
r_avx_fma vfnmadd132ps
r_avx_fma vfnmsub132pd
r_avx_fma vfnmsub132ps
r_avx_fma vfmadd213pd
r_avx_fma vfmadd213ps
r_avx_fma vfmsub213pd
r_avx_fma vfmsub213ps
r_avx_fma vfnmadd213pd
r_avx_fma vfnmadd213ps
r_avx_fma vfnmsub213pd
r_avx_fma vfnmsub213ps
r_avx_fma vfmadd231pd
r_avx_fma vfmadd231ps
r_avx_fma vfmsub231pd
r_avx_fma vfmsub231ps
r_avx_fma vfnmadd231pd
r_avx_fma vfnmadd231ps
r_avx_fma vfnmsub231pd
r_avx_fma vfnmsub231ps
r_avx_fma vfmaddsub213pd
r_avx_fma vfmaddsub213ps
r_avx_fma vfmsubadd213pd
r_avx_fma vfmsubadd213ps
r_avx_fma vfmaddsub231pd
r_avx_fma vfmaddsub231ps
r_avx_fma vfmsubadd231pd
r_avx_fma vfmsubadd231ps

r5_fma_sized vfmadd132sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmadd132ss, 4, dword, xmm0, xmm1

r5_fma_sized vfmsub132sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmsub132ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmadd132sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmadd132ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmsub132sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmsub132ss, 4, dword, xmm0, xmm1

r5_fma_sized vfmadd213sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmadd213ss, 4, dword, xmm0, xmm1

r5_fma_sized vfmsub213sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmsub213ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmadd213sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmadd213ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmsub213sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmsub213ss, 4, dword, xmm0, xmm1

r5_fma_sized vfmadd231sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmadd231ss, 4, dword, xmm0, xmm1

r5_fma_sized vfmsub231sd, 8, qword, xmm0, xmm1
r5_fma_sized vfmsub231ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmadd231sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmadd231ss, 4, dword, xmm0, xmm1

r5_fma_sized vfnmsub231sd, 8, qword, xmm0, xmm1
r5_fma_sized vfnmsub231ss, 4, dword, xmm0, xmm1

r3 vaesimc, 16, xmm0
r3 vaesenc, 16, xmm0
r3 vaesenclast, 16, xmm0
r3 vaesdec, 16, xmm0
r3 vaesdeclast, 16, xmm0

r5_fma_sized andn, 4, dword, eax, ebx
r5_fma_sized andn, 8, qword, rax, rbx

; bzhi is a bit special.
bzhi eax, dword [r15 - 4], ebx
bzhi eax, dword [r14], ebx

bzhi rax, qword [r15 - 8], rbx
bzhi rax, qword [r14], rbx

r5_fma_sized pext, 4, dword, eax, ebx
r5_fma_sized pext, 8, qword, rax, rbx

r5_fma_sized pdep, 4, dword, eax, ebx
r5_fma_sized pdep, 8, qword, rax, rbx

r5_fma_sized mulx, 4, dword, eax, ebx
r5_fma_sized mulx, 8, qword, rax, rbx

; bextr is a bit special.
bextr eax, dword [r15 - 4], ebx
bextr eax, dword [r14], ebx

bextr rax, qword [r15 - 8], rbx
bextr rax, qword [r14], rbx

; shlx is a bit special.
shlx eax, dword [r15 - 4], ebx
shlx eax, dword [r14], ebx

shlx rax, qword [r15 - 8], rbx
shlx rax, qword [r14], rbx

; sarx is a bit special.
sarx eax, dword [r15 - 4], ebx
sarx eax, dword [r14], ebx

sarx rax, qword [r15 - 8], rbx
sarx rax, qword [r14], rbx

; shrx is a bit special.
shrx eax, dword [r15 - 4], ebx
shrx eax, dword [r14], ebx

shrx rax, qword [r15 - 8], rbx
shrx rax, qword [r14], rbx

; VEX Map 3
r4 vpermq, 32, ymm0, 0
r4 vpermpd, 32, ymm0, 0

r2_avx vpblendd, 0
r2_avx vpermilps, 0
r2_avx vpermilpd, 0
r4 vperm2f128, 32, ymm0, 0
r2_avx vroundps, 0
r2_avx vroundpd, 0

r4 vroundss, 4, xmm0, 0
r4 vroundsd, 8, xmm0, 0
r2_avx vblendps, 0
r2_avx vblendpd, 0
r2_avx vpblendw, 0
r2_avx vpalignr, 0

w5_size vpextrb, 1, byte, xmm0, 0
w5_size vpextrw, 2, word, xmm0, 0
w5_size vpextrd, 4, dword, xmm0, 0
w5_size vextractps, 4, dword, xmm0, 0
w5_size vpextrq, 8, qword, xmm0, 0
r4 vinsertf128, 16, ymm0, 0
w5_size vextractf128, 16, oword, ymm0, 0
w5_size vcvtps2ph, 8, qword, xmm0, 0
w5_size vcvtps2ph, 16, oword, ymm0, 0

r5_size vpinsrb, 1, byte, xmm0, 0
r5_size vinsertps, 4, dword, xmm0, 0
r5_size vpinsrd, 4, dword, xmm0, 0
r5_size vpinsrq, 8, qword, xmm0, 0
r4 vinserti128, 16, ymm0, 0
w5_size vextracti128, 16, oword, ymm0, 0

r2_avx vdpps, 0
r4 vdppd, 16, xmm0, 0
r2_avx vmpsadbw, 0
r2_avx vpclmulqdq, 0
r4 vperm2i128, 32, ymm0, 0

r_avx_2_reg vblendvps
r_avx_2_reg vblendvpd
r_avx_2_reg vpblendvb

r4 vpcmpestrm, 16, xmm0, 0
r4 vpcmpestri, 16, xmm0, 0
r4 vpcmpistrm, 16, xmm0, 0
r4 vpcmpistri, 16, xmm0, 0
r4 vaeskeygenassist, 16, xmm0, 0

r4 rorx, 4, eax, 1
r4 rorx, 8, rax, 1

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/VEXGroup.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; VEX group 15
w2 vstmxcsr, 4
w2 vldmxcsr, 4

; VEX group 17
r3 blsr, 4, eax
r3 blsr, 8, rax
r3 blsmsk, 4, eax
r3 blsmsk, 8, rax
r3 blsi, 4, eax
r3 blsi, 8, rax

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/X87.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; X87

; These macros using the w* versions are actually reads.
%macro x87_f48_op 1
fldz
w3_size %1, 4, dword
w3_size %1, 8, qword
ffreep
%endmacro

%macro x87_i24_op 1
fldz
w3_size %1, 2, word
w3_size %1, 4, dword
ffreep
%endmacro

x87_f48_op fadd
x87_f48_op fmul
x87_f48_op fcom

; fcomp is special
fldz
w3_size fcomp, 4, dword
fldz
w3_size fcomp, 8, qword

x87_f48_op fsub
x87_f48_op fsubr
x87_f48_op fdiv
x87_f48_op fdivr

; fld is special
w3_size fld, 4, dword
ffreep
w3_size fld, 8, qword
ffreep
w3_size fld, 10, tword
ffreep

; fst is special
fldz
w3_size fst, 4, dword
w3_size fst, 8, qword

; fstp is special
fldz
w3_size fstp, 4, dword
fldz
w3_size fstp, 8, qword
fldz
w3_size fstp, 10, tword

w2 fnstenv, 28
w2 fldenv, 28

w2 o16 fnstenv, 14
w2 o16 fldenv, 14

w2 fnstcw, 2
w2 fldcw, 2

x87_i24_op fiadd
x87_i24_op fimul
x87_i24_op ficom

; ficomp is special
fldz
w3_size ficomp, 2, word
fldz
w3_size ficomp, 4, dword

x87_i24_op fisub
x87_i24_op fisubr

x87_i24_op fidiv
x87_i24_op fidivr

; fild is special
w3_size fild, 2, word
ffreep
w3_size fild, 4, dword
ffreep
w3_size fild, 8, qword
ffreep

; fist is special
fldz
w3_size fist, 2, word
w3_size fist, 4, dword

; fistp is special
fldz
w3_size fistp, 2, word
fldz
w3_size fistp, 4, dword
fldz
w3_size fistp, 8, qword

; fisttp is special
fldz
w3_size fisttp, 2, word
fldz
w3_size fisttp, 4, dword
fldz
w3_size fisttp, 8, qword

w2 fnsave, 108
w2 frstor, 108

w2 fnstsw, 2

w2 o16 fnsave, 94
w2 o16 frstor, 94

w3_size fbld, 10, tword
w3_size fbstp, 10, tword

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/modrm_oob/X87_Reduced.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096",
    "0x100002000": "4096"
  },
  "Env": { "FEX_X87REDUCEDPRECISION" : "1" }
}
%endif

mov r15, 0x100001000
mov r14, 0x100002000
mov rax, 0

%include "modrm_oob_macros.mac"

; X87

; These macros using the w* versions are actually reads.
%macro x87_f48_op 1
fldz
w3_size %1, 4, dword
w3_size %1, 8, qword
ffreep
%endmacro

%macro x87_i24_op 1
fldz
w3_size %1, 2, word
w3_size %1, 4, dword
ffreep
%endmacro

x87_f48_op fadd
x87_f48_op fmul
x87_f48_op fcom

; fcomp is special
fldz
w3_size fcomp, 4, dword
fldz
w3_size fcomp, 8, qword

x87_f48_op fsub
x87_f48_op fsubr
x87_f48_op fdiv
x87_f48_op fdivr

; fld is special
w3_size fld, 4, dword
ffreep
w3_size fld, 8, qword
ffreep
w3_size fld, 10, tword
ffreep

; fst is special
fldz
w3_size fst, 4, dword
w3_size fst, 8, qword

; fstp is special
fldz
w3_size fstp, 4, dword
fldz
w3_size fstp, 8, qword
fldz
w3_size fstp, 10, tword

w2 fnstenv, 28
w2 fldenv, 28

w2 o16 fnstenv, 14
w2 o16 fldenv, 14

w2 fnstcw, 2
w2 fldcw, 2

x87_i24_op fiadd
x87_i24_op fimul
x87_i24_op ficom

; ficomp is special
fldz
w3_size ficomp, 2, word
fldz
w3_size ficomp, 4, dword

x87_i24_op fisub
x87_i24_op fisubr

x87_i24_op fidiv
x87_i24_op fidivr

; fild is special
w3_size fild, 2, word
ffreep
w3_size fild, 4, dword
ffreep
w3_size fild, 8, qword
ffreep

; fist is special
fldz
w3_size fist, 2, word
w3_size fist, 4, dword

; fistp is special
fldz
w3_size fistp, 2, word
fldz
w3_size fistp, 4, dword
fldz
w3_size fistp, 8, qword

; fisttp is special
fldz
w3_size fisttp, 2, word
fldz
w3_size fisttp, 4, dword
fldz
w3_size fisttp, 8, qword

w2 fnsave, 108
w2 frstor, 108

w2 fnstsw, 2

w2 o16 fnsave, 94
w2 o16 frstor, 94

w3_size fbld, 10, tword
w3_size fbstp, 10, tword

; Done
mov rax, 1
hlt


================================================
FILE: unittests/ASM/mov.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RAX": "0xFFFFFFFFFFFFFFFF",
    "RBX": "0xFFFFFFFFFFFFFFFF",
    "RCX": "0xFFFFFFFFFFFFFFFF",
    "RDX": "0xDEADBEEFBAD0DAD1",
    "R15": "0xDEADBEEFBAD0DAD1"
  }
}
%endif

mov rax, -1
mov rbx, -1
mov rcx, -1

mov r15, qword 0xDEADBEEFBAD0DAD1
mov rdx, qword 0xDEADBEEFBAD0DAD1

;mov al, dl
;mov bx, dx
;mov ecx, edx
;mov al, -1
;mov ax, -1
;mov eax, -1
;mov rax, qword -1
;mov rax, 0
;mov al, al
;mov rbx, -1
;mov bx, ax
;mov ax, ax
;mov ax, ax
;mov eax, eax
;mov rax, rax
hlt


================================================
FILE: unittests/ASM/movups.asm
================================================
%ifdef CONFIG
{
  "Ignore": ["RAX", "RDX"],
  "RegData": {
    "RAX" : "0x0000FFFF",
    "XMM0": ["0x3f800000", "0x40000000"],
    "XMM1": ["0x3f800000", "0x40000000"],
    "XMM2": ["0x3f800000", "0x40000000"],
    "XMM3": ["0x3f800000", "0x8100000080000000"],
    "XMM4": ["0xDEADBEEFBFD0DAD1", "0x4141414142424242"],
    "XMM5": ["0xDEADBEEFBAD0DAD1", "0"],
    "XMM6": ["0xDEADBEEFBFD0DAD1", "0"]
  }
}
%endif

jmp label
label:
mov rax, 0x3f800000
;mov rsi, 0xdeadbeefbaddad1
;rdseed eax
;vpermd ymm0, ymm1, ymm2
mov rdx, 0xe0000000
mov [rdx], eax
mov eax, 0x40000000
mov [rdx + 8], eax

movups xmm0, [rdx]
movups xmm1, xmm0

movups [rdx + 16], xmm1
movups xmm2, [rdx + 16]

; Upper moves
mov eax, 0x80000000

mov [rdx + 32], eax
mov eax, 0x81000000
mov [rdx + 36], eax

movups xmm3, xmm0
movhps xmm3, [rdx + 32]

mov rax, 0xDEADBEEFBAD0DAD1
mov [rdx + 32], rax
mov rax, 0x4141414142424242
mov [rdx + 40], rax
movups xmm4, [rdx + 32]
movq xmm5, [rdx + 32]

por xmm4, xmm0
movq xmm6, xmm4
paddq xmm7, xmm6
mov rax, 0xFFFFFFFFFFFFFFFF
mov [rdx + 32], rax
mov [rdx + 40], rax
movdqu xmm8, [rdx + 32]
pmovmskb eax, xmm8

;fcomp dword [0]
;fldl2t
;fld1
hlt


================================================
FILE: unittests/ASM/movzx.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "RBX": "0xFFFFFFFFFFFF00D1",
    "RCX": "0x00000000000000D1",
    "RDX": "0xDAD1",
    "RDI": "0xDAD1"
  }
}
%endif

mov rax, qword 0xDEADBEEFBAD0DAD1

mov rbx, -1
mov rcx, -1
mov rdx, -1
mov rdi, -1

movzx bx,  al ; 8bit-> 16bit
movzx ecx, al ; 8bit-> 32bit
movzx edx, ax ; 16bit-> 32bit
movzx rdi, ax ; 16bit -> 64bit

hlt

================================================
FILE: unittests/ASM/pslldq.asm
================================================
%ifdef CONFIG
{
  "Match": "All",
  "RegData": {
    "XMM0": ["0xadbeefbad0dad100", "0x41414141414141de"],
    "XMM1": ["0x41deadbeefbad0da", "0x0041414141414141"]
  }
}
%endif

mov rdx, 0xe8000000
mov rax, 0xDEADBEEFBAD0DAD1
mov rcx, 0x4141414141414141

mov [rdx], rax
mov [rdx + 8], rcx

movups xmm0, [rdx]
pslldq xmm0, 1

movups xmm1, [rdx]
psrldq xmm1, 1

hlt


================================================
FILE: unittests/ASM/x87_stack.asm
================================================
%ifdef CONFIG
{
  "RegData": {
    "RAX": "0x4142434445464748",
    "RBX": "0"
  }
}
%endif

lea rax, [rel .data]
lea rbx, [rel .data_mov]

fld qword [rax]
fstp qword [rbx]

mov rax, [rbx]
mov rbx, [rbx + 8]
hlt

align 4096
.data:
dq 0x4142434445464748
dq 0x5152535455565758

.data_mov:
dq 0
dq 0


================================================
FILE: unittests/CMakeLists.txt
================================================
if (NOT MINGW)
  add_subdirectory(APITests/)
  add_subdirectory(POSIX/)
  add_subdirectory(gvisor-tests/)
  add_subdirectory(gcc-target-tests-32/)
  add_subdirectory(gcc-target-tests-64/)
  add_subdirectory(Utilities/)

  if (BUILD_THUNKS)
    add_subdirectory(ThunkLibs)
    add_subdirectory(ThunkFunctionalTests)
  endif()

  if (BUILD_FEX_LINUX_TESTS)
    add_subdirectory(FEXLinuxTests/)
  endif()
endif()

add_subdirectory(ASM/)
add_subdirectory(32Bit_ASM/)
if (ENABLE_VIXL_DISASSEMBLER)
  # Tests are only valid to run if the vixl disassembler is enabled and the active JIT is the ARM64 JIT.
  add_subdirectory(InstructionCountCI/)
endif()


================================================
FILE: unittests/Example.asm
================================================
; If you want a specific configuration at the top of asm file then make sure to wrap it in ifdef and endif.
; This allows the python script to extract the json and nasm to ignore the section
;
; X86 State option that can be compared
; - All: Makes sure all options are compared
; - None: No options
; ===== Specific options ====
; -- GPRs --
; RAX, RBX, RCX, RDX
; RSI, RDI, RBP, RSP
; R8-R15
; -- XMM --
; XMM0-XX15
; -- Misc --
; RIP
; FS, GS
; Flags
; -- X87 / MMX / 3DNow --
; MM0-MM7
; ===========================
; Match: Forces full matching of types
;   - Type: String or List of strings
;   - Default: All
; Ignore: Forces types to be ignored when matching. Overwrites Matches
;   - Default: None
;   - Type: String or List of strings
; RegData: Makes sure that a register contains specific data
;   - Default: Any data
;   - Type: Dict of key:value pairs
;   - >64bit registers should contain a list of values for each 64bit value
;
; Additional config options
; ABI : {SystemV-64, Win64, None}
;   - Default: SystemV-64
; StackSize : Stack size that the test needs
;   - Default : 4096
;   - Stack address starts at: [0xc000'0000, 0xc000'0000 + StackSize)
; EntryPoint : Entrypoint for the assembly
;   - Default: 1
;   - 0 is invalid since that is special cased
; MemoryRegions: Memory Regions for the tests to use
;   - Default: No memory regions generated
;   - Dict of key:value pairs
;   - Key indicates the memory base
;   - Value indicates the memory region size
;   - WARNING: Emulator sets up some default regions that you don't want to intersect with
;   - Additionally the VM only has 64GB of virtual memory. If you go past this size, expect failure
;   - 0xb000'0000 - FS Memory base
;   - 0xc000'0000 - Stack pointer base
;   - 0xd000'0000 - Linux BRK memory base
; MemoryData: Prepopulate one or more memory regions with data
;   - Default: None
;   - Dict of key:value pairs
;   - Key is address
;   - Value is a string with hex data.
;       - No leading 0x needed.
;       - Spaces allowed

%ifdef CONFIG
{
  "Match": "All",
  "Ignore": ["XMM0", "Flags"],
  "RegData": {
    "RAX": "1"
  },
  "MemoryRegions": {
    "0x100000000": "4096"
  },
  "MemoryData": {
    "0x100000000" : "00000001 00000000 00000000 00000000",
    "0x100000020" : "fa aa 55 33",
    "0x100000038" : "0x123456789"
  }
}
%endif

mov eax, 1
ret


================================================
FILE: unittests/FEXLinuxTests/CMakeLists.txt
================================================
include(ExternalProject)
ExternalProject_Add(FEXLinuxTests
  PREFIX FEXLinuxTests
  SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tests"
  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/FEXLinuxTests_64"
  CMAKE_ARGS
  "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
  "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${X86_64_TOOLCHAIN_FILE}"
  "-DENABLE_CLANG_THUNKS=True"
  "-DBITNESS=64"
  INSTALL_COMMAND ""
  BUILD_ALWAYS ON)

ExternalProject_Add(FEXLinuxTests_32
  PREFIX FEXLinuxTests_32
  SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/tests"
  BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/FEXLinuxTests_32"
  CMAKE_ARGS
  "-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}"
  "-DCMAKE_TOOLCHAIN_FILE:FILEPATH=${X86_32_TOOLCHAIN_FILE}"
  "-DENABLE_CLANG_THUNKS=True"
  "-DBITNESS=32"
  INSTALL_COMMAND ""
  BUILD_ALWAYS ON)

# this kind of sucks, but reglob
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS tests/*.cpp)
file(GLOB_RECURSE TESTS_64_ONLY CONFIGURE_DEPENDS tests/*.64.cpp)
file(GLOB_RECURSE TESTS_32_ONLY CONFIGURE_DEPENDS tests/*.32.cpp)

# Apply bitness-specific exclude lists
list(REMOVE_ITEM TESTS ${TESTS_64_ONLY})
list(REMOVE_ITEM TESTS ${TESTS_32_ONLY})

function(AddTests Tests BinDirectory Bitness)
  foreach(TEST ${Tests})
    get_filename_component(TEST_NAME ${TEST} NAME_WE)
    set(BIN_PATH "${CMAKE_CURRENT_BINARY_DIR}/${BinDirectory}/${TEST_NAME}.${Bitness}")
    set(TEST_CASE "${TEST_NAME}.${Bitness}")

    if(TEST_NAME STREQUAL "thunk_testlib")
      # Test thunking only if thunks are enabled and supported
      if(NOT BUILD_THUNKS OR ENABLE_GLIBC_ALLOCATOR_HOOK_FAULT)
        continue()
      endif()
    endif()

    # Add jit test case
    add_test(NAME "${TEST_CASE}.jit.flt"
      COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
      "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
      "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
      "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
      "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
      "${TEST_CASE}"
      "guest"
      "$<TARGET_FILE:FEX>"
      "${BIN_PATH}")

    set_property(TEST "${TEST_CASE}.jit.flt" APPEND PROPERTY ENVIRONMENT "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=500")

    if(TEST_NAME STREQUAL "thunk_testlib")
      set_property(TEST "${TEST_CASE}.jit.flt" APPEND PROPERTY ENVIRONMENT "FEX_THUNKCONFIG=${CMAKE_SOURCE_DIR}/Data/CI/FEXLinuxTestsThunks.json")
    endif()

    if (ARCHITECTURE_x86_64 AND NOT TEST_NAME STREQUAL "thunk_testlib")
      # Add host test case
      add_test(NAME "${TEST_CASE}.host.flt"
        COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
        "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures_Host"
        "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
        "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests_Host"
        "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
        "${TEST_CASE}"
        "host"
        "${BIN_PATH}")
      set_property(TEST "${TEST_CASE}.host.flt" APPEND PROPERTY SKIP_RETURN_CODE 125)
    endif()
    set_property(TEST "${TEST_CASE}.jit.flt" APPEND PROPERTY SKIP_RETURN_CODE 125)
  endforeach()
endfunction()

# Execute combined 32-bit and 64-bit tests.
AddTests("${TESTS}" "FEXLinuxTests_64" 64)
AddTests("${TESTS}" "FEXLinuxTests_32" 32)
# Execute tests that are only 64-bit.
AddTests("${TESTS_64_ONLY}" "FEXLinuxTests_64" 64)
# Execute tests that are only 32-bit.
AddTests("${TESTS_32_ONLY}" "FEXLinuxTests_32" 32)

if(TEST thunk_testlib.64.jit.flt)
  # Ensure libfex_thunk_test is found even when using an uncommon install prefix
  set_property(TEST "thunk_testlib.32.jit.flt" APPEND PROPERTY ENVIRONMENT "LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib")
  set_property(TEST "thunk_testlib.64.jit.flt" APPEND PROPERTY ENVIRONMENT "LD_LIBRARY_PATH=${CMAKE_INSTALL_PREFIX}/lib")
endif()

# Only emulated
add_custom_target(fex_linux_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "30" ${TEST_JOB_FLAG} "-R" "\.*\.jit\.flt$$"
  DEPENDS FEXLinuxTests FEXLinuxTests_32 FEX)

# Only host
add_custom_target(fex_linux_tests_host
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "30" ${TEST_JOB_FLAG} "-R" "\.*\.host\.flt$$"
  DEPENDS FEXLinuxTests FEXLinuxTests_32)

# Both host and emulated
add_custom_target(fex_linux_tests_all
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "30" ${TEST_JOB_FLAG} "-R" "\.*\.flt$$"
  DEPENDS FEXLinuxTests FEXLinuxTests_32 FEX)


================================================
FILE: unittests/FEXLinuxTests/Disabled_Tests
================================================
###
### Disabled tests ###
###

# These sometimes crash FEX with SIGSEGV
timer-sigev-thread.32
timer-sigev-thread.64


================================================
FILE: unittests/FEXLinuxTests/Disabled_Tests_Host
================================================


================================================
FILE: unittests/FEXLinuxTests/Expected_Output
================================================


================================================
FILE: unittests/FEXLinuxTests/Flake_Tests
================================================
smc-mt-1.32
smc-mt-2.32
smc-mt-1.64
smc-mt-2.64
pthread_cancel.64
pthread_cancel.32


================================================
FILE: unittests/FEXLinuxTests/Known_Failures
================================================
###
### Disabled tests ###
###

# These sometimes crash FEX with SIGSEGV
timer-sigev-thread.32
timer-sigev-thread.64

# These trigger various quirks in FEX's signal handling
synchronous-signal-block.32
synchronous-signal-block.64

###
### Failing Tests ###
###

# these will be fixed with FEX_TICKET(1725)
sigtest_samask.32
sigtest_samask.64
sigtest_sigmask.32
sigtest_sigmask.64

# Disabled since FEX's FaultSafeMemcpy is intentionally stub-implemented
syscalls_efault.32
syscalls_efault.64

# partial instruction decode is known to fail
noexec_protect.64


================================================
FILE: unittests/FEXLinuxTests/tests/CMakeLists.txt
================================================
cmake_minimum_required(VERSION 3.14)
project(FEXLinuxTests)

set(CMAKE_CXX_STANDARD 20)

set(GENERATE_GUEST_INSTALL_TARGETS TRUE)

# Use intel masm syntax. ATT style asm syntax is archaic and hard to read.
add_compile_options(-masm=intel)
# Override a define so catch2 doesn't use ATT style inline asm
add_definitions(-D"CATCH_BREAK_INTO_DEBUGGER\(\)"="[]{ if\( Catch::isDebuggerActive\(\) \) { __builtin_trap\(\)\; } }\(\)")

file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS *.cpp)
if(BITNESS EQUAL 64)
  file(GLOB_RECURSE TESTS_32_ONLY CONFIGURE_DEPENDS *.32.cpp)
  list(REMOVE_ITEM TESTS ${TESTS_32_ONLY})
else()
  file(GLOB_RECURSE TESTS_64_ONLY CONFIGURE_DEPENDS *.64.cpp)
  list(REMOVE_ITEM TESTS ${TESTS_64_ONLY})
endif()

option(CATCH_BUILD_STATIC_LIBRARY "" ON)
set(CATCH_BUILD_STATIC_LIBRARY ON)
add_subdirectory(../../../External/Catch2/ Catch2)

foreach(TEST ${TESTS})
  get_filename_component(TEST_NAME ${TEST} NAME_WE)

  add_executable(${TEST_NAME}.${BITNESS} ${TEST})
  target_link_libraries(${TEST_NAME}.${BITNESS} PRIVATE Catch2::Catch2WithMain)
  target_include_directories(${TEST_NAME}.${BITNESS} PRIVATE include/)
endforeach()

target_link_libraries(pthread_cancel.${BITNESS} PRIVATE pthread)

target_link_options(smc-1-dynamic.${BITNESS} PRIVATE -z execstack)

target_link_libraries(smc-mt-1.${BITNESS} PRIVATE pthread)

target_link_libraries(smc-mt-2.${BITNESS} PRIVATE pthread)

target_link_libraries(smc-shared-1.${BITNESS} PRIVATE rt pthread)

target_link_libraries(smc-shared-2.${BITNESS} PRIVATE rt pthread)

target_link_libraries(thunk_testlib.${BITNESS} PRIVATE ${CMAKE_DL_LIBS})

target_link_libraries(timer-sigev-thread.${BITNESS} PRIVATE rt pthread)

target_link_libraries(smc-unexec-stack.${BITNESS} PRIVATE -Wl,-z,noexecstack)

target_link_options(smc-exec-stack.${BITNESS} PRIVATE -Wl,-z,execstack)

# Must use lld because it has the nognustack option
target_link_options(smc-missing-gnustack.${BITNESS} PRIVATE -fuse-ld=lld -Wl,-z,nognustack)


================================================
FILE: unittests/FEXLinuxTests/tests/cpu/cpu_count.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <cpuid.h>
#include <optional>
#include <unistd.h>
#include <thread>

TEST_CASE("cpu count - libraries") {
  const auto hw_concurrency = std::thread::hardware_concurrency();
  CHECK(hw_concurrency == sysconf(_SC_NPROCESSORS_CONF));
  CHECK(hw_concurrency == sysconf(_SC_NPROCESSORS_ONLN));
}

struct core_info {
  uint32_t local_apicid;
  uint32_t max_addressible_ids;
  uint32_t cores;
  uint32_t threads;
  bool HTT;
};

struct cpuid_fn {
  uint32_t eax, ebx, ecx, edx;
};

cpuid_fn get_cpuid(uint32_t func, uint32_t leaf = 0) {
  cpuid_fn fn {};
  __asm volatile("cpuid" : "=a"(fn.eax), "=b"(fn.ebx), "=c"(fn.ecx), "=d"(fn.edx) : "a"(func), "c"(leaf));

  return fn;
}

std::optional<core_info> cpuid_calculate_core_info() {
  core_info info {};

  // Legacy path
  const auto cpuid_fn_0 = get_cpuid(0);
  if (cpuid_fn_0.eax < 1) {
    return std::nullopt;
  }

  const auto cpuid_fn_1 = get_cpuid(1);

  info.local_apicid = cpuid_fn_1.ebx >> 24;
  info.HTT = (cpuid_fn_1.edx >> 28) & 1;

  const auto cpuid_fn_8000_0000 = get_cpuid(0x8000'0000U);

  if (cpuid_fn_8000_0000.eax < 0x8000'0008) {
    return std::nullopt;
  }

  const auto cpuid_fn_8000_0008 = get_cpuid(0x8000'0008U);
  const uint32_t apic_id_size = (cpuid_fn_8000_0008.ecx >> 12) & 0xF;

  // E.5.2: MNLP (Maximum Number of Logical Processors)
  uint32_t MNLP {};

  if (apic_id_size) {
    // Extended topology.
    MNLP = 1 << apic_id_size;
  } else {
    // Legacy path.
    MNLP = (cpuid_fn_8000_0008.ecx & 0xF) + 1;
  }

  info.max_addressible_ids = MNLP;

  const auto cpuid_fn_4 = get_cpuid(4);
  if (cpuid_fn_4.eax & 0xF) {
    // Intel exclusive cpuid function, AMD returns zero as unsupported.
    // `Maximum number of addressable IDs for processor cores in the physical package`
    info.cores = (cpuid_fn_4.eax >> 26) + 1;
    if (info.HTT) {
      // `A value of 1 for HTT indicates the value in CPUID.1.EBX[23:16] (the Maximum number of addressable IDs for logical processors in
      // this package) is valid for the package. `Maximum number of addressable IDs for logical processors in this physical package`
      info.threads = (cpuid_fn_1.ebx >> 16) & 0xFF;
    } else {
      info.threads = info.cores;
    }
  } else if (info.HTT) {
    info.cores = (cpuid_fn_1.ebx >> 16) & 0xFF;
    info.threads = info.cores * 2;
  } else {
    // Legacy path means cores/threads is equal to MNLP.
    info.cores = info.threads = MNLP;
  }

  return info;
}

TEST_CASE("cpu count - cpuid") {
  const auto hw_concurrency = std::thread::hardware_concurrency();
  const auto core_info = cpuid_calculate_core_info();
  REQUIRE(core_info.has_value());
  CHECK(core_info->local_apicid < hw_concurrency);
  CHECK(core_info->local_apicid < core_info->max_addressible_ids);
  CHECK(core_info->max_addressible_ids >= hw_concurrency);
  if (core_info->HTT) {
    // May not be entirely correct on systems that mix HTT and non-HTT cpu cores.
    CHECK((core_info->cores * 2) == core_info->threads);
  } else {
    CHECK(core_info->cores == core_info->threads);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/fd/test_close_range.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <cstdint>
#include <unistd.h>

TEST_CASE("Close Range") {
  int fd_base = dup(STDOUT_FILENO);
  for (size_t i = 0; i < 15; ++i) {
    REQUIRE(dup(fd_base) >= 0);
  }

  // Specifically testing last as ~0U to ensure FEX doesn't hang
  constexpr uint32_t SYS_close_range = 436;
  ::syscall(SYS_close_range, fd_base + 1, ~0U, 0);

  // Ensure that fd_base itself wasn't closed in close_range
  CHECK(close(fd_base) == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/fs/self_symlink.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <filesystem>

TEST_CASE("proc-self symlink") {
  // Saw with the Darwinia Linux game port.
  // It sanity checks that `/proc/self/exe` is a symlink and also that it points to a regular file.
  // This uses newfsstatat or statx behind the scenes which FEX didn't handle this edge-case correctly.

  // Create a path with /proc/self/exe
  std::filesystem::path path {"/proc/self/exe"};

  // Check the status of the file with status first.
  std::error_code ec;
  auto status = std::filesystem::status(path, ec);

  // No error
  REQUIRE(!ec);
  CHECK(status.type() == std::filesystem::file_type::regular);

  // Now check the status with symlink_status.
  status = std::filesystem::symlink_status(path, ec);

  // No error
  REQUIRE(!ec);
  CHECK(status.type() == std::filesystem::file_type::symlink);

  // The game would then continue to read std::filesystem::read_symlink.
}


================================================
FILE: unittests/FEXLinuxTests/tests/include/fpstate.h
================================================
#pragma once
#include <cstdint>

namespace FEX::Unittests {
#ifndef __x86_64__
struct __uint128_t {
  uint64_t raw[2];
};
#endif
struct fpx_sw_bytes {
  static constexpr uint32_t FP_XSTATE_MAGIC_1 = 0x46505853;
  static constexpr uint32_t FP_XSTATE_MAGIC_2 = 0x46505845;

  enum FeatureFlag : uint32_t {
    FEATURE_FP = 1U << 0,
    FEATURE_SSE = 1U << 1,
    FEATURE_YMM = 1U << 2,
    FEATURE_BNDREGS = 1U << 3,
    FEATURE_BNDCSR = 1U << 4,
    FEATURE_OPMASK = 1U << 5,
    FEATURE_ZMM_Hi256 = 1U << 6,
    FEATURE_Hi16_ZMM = 1U << 7,
    FEATURE_PT_UNIMPL = 1U << 8,
    FEATURE_PKRU = 1U << 9,
    FEATURE_PASID = 1U << 10,
    FEATURE_RESERVED11 = 1U << 11,
    FEATURE_RESERVED12 = 1U << 12,
    FEATURE_RESERVED13 = 1U << 13,
    FEATURE_RESERVED14 = 1U << 14,
    FEATURE_LBR = 1U << 15,
    FEATURE_RESERVED16 = 1U << 16,
    FEATURE_XTILE_CFG = 1U << 17,
    FEATURE_XTILE_DATA = 1U << 18,
  };

  bool HasExtendedContext() const {
    return magic1 == FP_XSTATE_MAGIC_1;
  }

  bool HasYMMH() const {
    return (xfeatures & FEATURE_YMM) != 0;
  }

  // If magic1 is set to FP_XSTATE_MAGIC_1, then the encompassing
  // frame is an xstate frame. If 0, then it's a legacy frame.
  uint32_t magic1;

  // Total size of the fpstate area
  // - magic1 = 0                 -> sizeof(fpstate)
  // - magic1 = FP_XSTATE_MAGIC_1 -> sizeof(xstate) + extensions (if any)
  uint32_t extended_size;

  // Feature bitmask describing supported features.
  uint64_t xfeatures;

  // Actual XSAVE state size, based on above xfeatures
  uint32_t xstate_size;

  // Reserved data
  uint32_t padding[7];
};
static_assert(sizeof(fpx_sw_bytes) == 48);

struct xstate_header {
  uint64_t xfeatures;
  uint64_t reserved1[2];
  uint64_t reserved2[5];
};
static_assert(sizeof(xstate_header) == 64);

struct ymmh_state {
  __uint128_t ymmh_space[16];
};
static_assert(sizeof(ymmh_state) == 256);

#ifdef __x86_64__
struct _libc_fpstate {
  // This is in FXSAVE format
  uint16_t fcw;
  uint16_t fsw;
  uint16_t ftw;
  uint16_t fop;
  uint64_t fip;
  uint64_t fdp;
  uint32_t mxcsr;
  uint32_t mxcsr_mask;
  __uint128_t _st[8];
  __uint128_t _xmm[16];
  uint32_t _res[12];

  // Linux uses 12 of the bytes relegated for software purposes
  // to store info describing any existing XSAVE context data.
  fpx_sw_bytes sw_reserved;
};
static_assert(sizeof(FEX::Unittests::_libc_fpstate) == 512, "This needs to be the right size");

/**
 * Extended state that includes both the main fpstate
 * and the extended state.
 */
struct xstate {
  _libc_fpstate fpstate;
  xstate_header xstate_hdr;
  ymmh_state ymmh;
};
static_assert(sizeof(xstate) == 832);
#else

struct _libc_fpreg {
  uint16_t significand[4];
  uint16_t exponent;
};
static_assert(sizeof(FEX::Unittests::_libc_fpreg) == 10, "This needs to be the right size");

enum fpstate_magic {
  // Legacy fpstate
  MAGIC_FPU = 0xFFFF'0000,
  // Contains extended state information
  MAGIC_XFPSTATE = 0x0,
};
struct _libc_fpstate {
  uint32_t fcw;
  uint32_t fsw;
  uint32_t ftw;
  uint32_t fop;
  uint32_t cssel;
  uint32_t dataoff;
  uint32_t datasel;
  FEX::Unittests::_libc_fpreg _st[8];
  uint32_t status;

  // Extended FPU data
  uint32_t pad[6]; // Ignored FXSR data
  uint32_t mxcsr;
  uint32_t reserved;
  __uint128_t _st_pad[8];   // Ignored st data
  __uint128_t _xmm[8];      // First 8 XMM registers
  uint32_t pad2[44];        // Second 8 XMM registers plus padding
  fpx_sw_bytes sw_reserved; // extended state encoding
};
static_assert(sizeof(FEX::Unittests::_libc_fpstate) == 624, "This needs to be the right size");

struct xstate {
  _libc_fpstate fpstate;
  xstate_header xstate_hdr;
  ymmh_state ymmh;
};
static_assert(sizeof(xstate) == 944);
#endif
} // namespace FEX::Unittests


================================================
FILE: unittests/FEXLinuxTests/tests/include/simple_x86.h
================================================
#pragma once
#include <cstdint>

class SimpleX86Emit final {
public:
  enum Reg {
    RAX = 0,
    RCX = 1,
    RDX = 2,
    RBX = 3,
    RSP = 4,
    RBP = 5,
    RSI = 6,
    RDI = 7,
    // r8 and higher not implemented.
  };
  SimpleX86Emit(void* Ptr, std::size_t size)
    : Ptr {static_cast<uint8_t*>(Ptr)}
    , EndPtr {static_cast<uint8_t*>(Ptr) + size} {}

  void ret() {
    db<uint8_t>(0xc3);
  }

  void mov(Reg reg, uint32_t val) {
    db<uint8_t>(0xB8 + reg);
    db(val);
  }

  void dd(uint32_t val) {
    db(val);
  }

  bool HadError() const {
    return _HadError;
  }

private:
  uint8_t* Ptr;
  uint8_t* EndPtr;
  bool _HadError {};

  template<typename T>
  void db(T v) {
    static_assert(sizeof(uint32_t) == 4);
    std::size_t i {};
    for (i = 0; i < sizeof(T) && Ptr != EndPtr; ++i) {
      *Ptr = v >> (i * 8);
      ++Ptr;
    }

    _HadError = i != sizeof(T);
  }
};


================================================
FILE: unittests/FEXLinuxTests/tests/signal/Syscall_state.32.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <csetjmp>
#include <unistd.h>
#include <sys/syscall.h>
#include <signal.h>
#include <string.h>
#include <sys/wait.h>

struct CPUState {
  uint32_t Registers[8];
  uint32_t eflags;
};

CPUState CapturedState {};

enum RegNums {
  TEST_REG_EAX = 0,
  TEST_REG_EBX,
  TEST_REG_ECX,
  TEST_REG_EDX,
  TEST_REG_ESI,
  TEST_REG_EDI,
  TEST_REG_ESP,
  TEST_REG_EBP,
};

__attribute__((naked)) void DoZeroRegSyscallFault(CPUState State) {
  // i386 stores arguments on the stack.
  __asm volatile(
    R"(
    // Load flags
    push dword ptr [esp + %[FlagsOffset]]
    popfd

    // Do getpid syscall.
    // Overwrites some arguments.
    // Syscall num
    mov eax, dword ptr [esp + %[RAXOffset]]

    // Load remaining registers that we can
    mov ebx, dword ptr [esp + %[RBXOffset]];
    mov ecx, dword ptr [esp + %[RCXOffset]];
    mov edx, dword ptr [esp + %[RDXOffset]]
    mov esi, dword ptr [esp + %[RSIOffset]]
    mov edi, dword ptr [esp + %[RDIOffset]];
    mov ebp, dword ptr [esp + %[RBPOffset]];
    // Can't load RSP

    int 0x80;

    // Immediately fault
    hlt;

    // We long jump from the signal handler, so this won't continue.
  )"
    :
    // The stack is offset by 4-bytes due to the call.
    : [RAXOffset] "i"(offsetof(CPUState, Registers[TEST_REG_EAX]) + 4), [RDXOffset] "i"(offsetof(CPUState, Registers[TEST_REG_EDX]) + 4),
      [RSIOffset] "i"(offsetof(CPUState, Registers[TEST_REG_ESI]) + 4), [RDIOffset] "i"(offsetof(CPUState, Registers[TEST_REG_EDI]) + 4),
      [RBXOffset] "i"(offsetof(CPUState, Registers[TEST_REG_EBX]) + 4), [RCXOffset] "i"(offsetof(CPUState, Registers[TEST_REG_ECX]) + 4),
      [RBPOffset] "i"(offsetof(CPUState, Registers[TEST_REG_EBP]) + 4), [FlagsOffset] "i"(offsetof(CPUState, eflags) + 4)

    : "memory");
}

static jmp_buf LongJump {};
static void CapturingHandler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  auto RAX = _context->uc_mcontext.gregs[REG_EAX];
  if (RAX > -4095U) {
    // Failure to syscall
    fprintf(stderr, "Parent thread failed to syscall: %d %s\n", static_cast<uint32_t>(-RAX), strerror(-RAX));
    _exit(1);
  }

  CPUState& State = CapturedState;

  // These aren't 1:1 mapped
#define COPY(REG) State.Registers[TEST_REG_##REG] = _context->uc_mcontext.gregs[REG_##REG];
  COPY(EAX);
  COPY(EBX);
  COPY(ECX);
  COPY(EDX);
  COPY(ESI);
  COPY(EDI);
  COPY(ESP);
  COPY(EBP);

  longjmp(LongJump, 1);
}


TEST_CASE("getppid: State") {
  // Set up a signal handler for SIGSEGV
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  CPUState Object = {
    .Registers =
      {
        0x1011'1213ULL,
        0x2022'2223ULL,
        0x3033'3233ULL,
        0x4044'4243ULL,
        0x5055'5253ULL,
        0x6066'6263ULL,
        0x7077'7273ULL,
        0x8088'8283ULL,
      },
    .eflags = (1U << 0) | // CF
              (1U << 1) | // RA1
              (1U << 2) | // PF
              (1U << 4) | // AF
              (1U << 6) | // ZF
              (1U << 7) | // CF
              (1U << 9) | // IF (Is always 1 in userspace)
              (1U << 11)  // OF
  };

  constexpr uint64_t SyscallNum = SYS_sched_yield;
  Object.Registers[TEST_REG_EAX] = SyscallNum;
  int Value = setjmp(LongJump);
  if (Value == 0) {
    DoZeroRegSyscallFault(Object);
  }

  for (size_t i = 0; i < (sizeof(Object.Registers) / sizeof(Object.Registers[0])); ++i) {
    if (i == TEST_REG_ESP || i == TEST_REG_EAX) {
      // Needs special handling.
      continue;
    }

    CHECK(Object.Registers[i] == CapturedState.Registers[i]);
  }

  // Syscall success return
  CHECK(CapturedState.Registers[TEST_REG_EAX] == 0);
  // RSP is untested here.
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/Syscall_state.64.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <csetjmp>
#include <unistd.h>
#include <sys/syscall.h>
#include <signal.h>
#include <string.h>
#include <sys/wait.h>

struct CPUState {
  uint64_t Registers[16];

  uint64_t eflags;
};

CPUState CapturedState {};

// This refers to the label defined in the inline ASM below.
extern "C" uint64_t HLT_INST;
void* const HaltLocation = &HLT_INST;

__attribute__((naked)) void DoZeroRegSyscallFault(CPUState* State) {
  // x86-64 ABI puts State pointer in to RDI
  __asm volatile(R"(
    // Save some registers
    push rbx
    push rbp
    push r12
    push r13
    push r14
    push r15

    // Load flags
    push qword ptr [rdi + %[FlagsOffset]]
    popfq

    // Do getpid syscall.
    // Overwrites some arguments.
    // Syscall num
    mov rax, qword ptr [rdi + %[RAXOffset]]

    // Load remaining registers that we can
    mov rbx, qword ptr [rdi + %[RBXOffset]];
    mov rcx, qword ptr [rdi + %[RCXOffset]];
    mov rdx, qword ptr [rdi + %[RDXOffset]]
    mov rsi, qword ptr [rdi + %[RSIOffset]]
    mov rbp, qword ptr [rdi + %[RBPOffset]];
    // Can't load RSP
    mov r8, qword ptr [rdi + %[R8Offset]]
    mov r9, qword ptr [rdi + %[R9Offset]];
    mov r10, qword ptr [rdi + %[R10Offset]]
    mov r11, qword ptr [rdi + %[R11Offset]];
    mov r12, qword ptr [rdi + %[R12Offset]];
    mov r13, qword ptr [rdi + %[R13Offset]];
    mov r14, qword ptr [rdi + %[R14Offset]];
    mov r15, qword ptr [rdi + %[R15Offset]];

    // Overwrite RDI last.
    mov rdi, qword ptr [rdi + %[RDIOffset]];

    syscall;

    // Immediately fault
    HLT_INST:
    hlt;

    // We long jump from the signal handler, so this won't continue.
  )"
                 :
                 : [RAXOffset] "i"(offsetof(CPUState, Registers[REG_RAX])), [RDXOffset] "i"(offsetof(CPUState, Registers[REG_RDX])),
                   [R10Offset] "i"(offsetof(CPUState, Registers[REG_R10])), [R8Offset] "i"(offsetof(CPUState, Registers[REG_R8])),
                   [RSIOffset] "i"(offsetof(CPUState, Registers[REG_RSI])), [RDIOffset] "i"(offsetof(CPUState, Registers[REG_RDI])),
                   [RBXOffset] "i"(offsetof(CPUState, Registers[REG_RBX])), [RCXOffset] "i"(offsetof(CPUState, Registers[REG_RCX])),
                   [RBPOffset] "i"(offsetof(CPUState, Registers[REG_RBP])), [R9Offset] "i"(offsetof(CPUState, Registers[REG_R9])),
                   [R11Offset] "i"(offsetof(CPUState, Registers[REG_R11])), [R12Offset] "i"(offsetof(CPUState, Registers[REG_R12])),
                   [R13Offset] "i"(offsetof(CPUState, Registers[REG_R13])), [R14Offset] "i"(offsetof(CPUState, Registers[REG_R14])),
                   [R15Offset] "i"(offsetof(CPUState, Registers[REG_R15])), [FlagsOffset] "i"(offsetof(CPUState, eflags))

                 : "memory");
}


static jmp_buf LongJump {};
static void CapturingHandler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  auto RAX = _context->uc_mcontext.gregs[REG_RAX];
  if (RAX > -4095U) {
    // Failure to syscall
    fprintf(stderr, "Parent thread failed to syscall: %ld %s\n", static_cast<uint64_t>(-RAX), strerror(-RAX));
    _exit(1);
  }

  CPUState& State = CapturedState;

  memcpy(&State.Registers, _context->uc_mcontext.gregs, sizeof(State.Registers));

  longjmp(LongJump, 1);
}


TEST_CASE("getppid: State") {
  // Set up a signal handler for SIGSEGV
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  CPUState Object = {
    .Registers =
      {
        0x1011'1213'1415'1617ULL,
        0x2022'2223'2425'2627ULL,
        0x3033'3233'3435'3637ULL,
        0x4044'4243'4445'4647ULL,
        0x5055'5253'5455'5657ULL,
        0x6066'6263'6465'6667ULL,
        0x7077'7273'7475'7677ULL,
        0x8088'8283'8485'8687ULL,
        0x9099'9293'9495'9697ULL,
        0xA0AA'A2A3'A4A5'A6A7ULL,
        0xB0BB'B2B3'B4B5'B6B7ULL,
        0xC0CC'C2C3'C4C5'C6C7ULL,
        0xD0DD'D2D3'D4D5'D6D7ULL,
        0xE0EE'E2E3'E4E5'E6E7ULL,
        0xF0FF'F2F3'F4F5'F6F7ULL,
        0x0000'0203'0405'0607ULL,
      },
    .eflags = (1U << 0) | // CF
              (1U << 1) | // RA1
              (1U << 2) | // PF
              (1U << 4) | // AF
              (1U << 6) | // ZF
              (1U << 7) | // CF
              (1U << 9) | // IF (Is always 1 in userspace)
              (1U << 11)  // OF
  };

  constexpr uint64_t SyscallNum = SYS_sched_yield;
  Object.Registers[REG_RAX] = SyscallNum;
  int Value = setjmp(LongJump);
  if (Value == 0) {
    DoZeroRegSyscallFault(&Object);
  }

  for (size_t i = 0; i < 16; ++i) {
    if (i == REG_R11 || i == REG_RCX || i == REG_RSP || i == REG_RAX) {
      // Needs special handling.
      continue;
    }

    CHECK(Object.Registers[i] == CapturedState.Registers[i]);
  }

  // Syscall success return
  CHECK(CapturedState.Registers[REG_RAX] == 0);

  // syscall instruction RCX return.
  CHECK(CapturedState.Registers[REG_RCX] == reinterpret_cast<uint64_t>(HaltLocation));

  // Syscall instruction R11 eflags return.
  CHECK(Object.eflags == CapturedState.Registers[REG_R11]);

  // RSP is untested here.
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/SystemInstructions.64.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

enum trapno {
  X86_TRAPNO_UD = 6,
  X86_TRAPNO_GP = 13,
};

#define CONCAT(x, y) x##y
#define TestSymbols(num)                       \
  extern "C" uint64_t CONCAT(TestBegin_, num); \
  extern "C" uint64_t CONCAT(TestEnd_, num);

#define Test(num, asm, trapno, errno, si_code, signal)                                                      \
  capturing_handler_skip = (unsigned long)&CONCAT(TestEnd_, num) - (unsigned long)&CONCAT(TestBegin_, num); \
  const unsigned long EXPECTED_RIP = (unsigned long)&CONCAT(TestBegin_, num);                               \
  const int EXPECTED_TRAPNO = trapno;                                                                       \
  const int EXPECTED_ERR = errno;                                                                           \
  const int EXPECTED_SI_CODE = si_code;                                                                     \
  const int EXPECTED_SIGNAL = signal;                                                                       \
  __asm volatile("TestBegin_" #num ":" asm ";"                                                              \
                                           "TestEnd_" #num ":" ::                                           \
                                             : "memory");


#define TEST(num, name, asm, trapno, errno, _si_code, _signal)      \
  TestSymbols(num);                                                 \
  TEST_CASE("Signals: " #name) {                                    \
    struct sigaction act {};                                        \
    act.sa_sigaction = CapturingHandler;                            \
    act.sa_flags = SA_SIGINFO;                                      \
    sigaction(SIGSEGV, &act, nullptr);                              \
    sigaction(SIGTRAP, &act, nullptr);                              \
    sigaction(SIGILL, &act, nullptr);                               \
                                                                    \
    Test(num, asm, trapno, errno, _si_code, _signal);               \
                                                                    \
    REQUIRE(from_handler.has_value());                              \
    CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);       \
    CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO); \
    CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);       \
    CHECK(from_handler->si_code == EXPECTED_SI_CODE);               \
    CHECK(from_handler->signal == EXPECTED_SIGNAL);                 \
  }

// Instructions that explicitly are supported but must only work in CPL-0
TEST(0, "rdmsr", "rdmsr", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(1, "outs", "outs dx, byte ptr [rsi]", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(2, "ins", "ins byte ptr [rdi], dx", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(3, "cli", "cli", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(4, "clts", "clts", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(5, "invlpg", "invlpg [rax]", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(6, "lmsw", "lmsw [rax]", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(7, "ltr", "ltr [rax]", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(8, "mov cr0", "mov cr0, rax", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(9, "mov cr8", "mov cr8, rax", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(10, "mov rax, cr0", "mov rax, cr0", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(11, "mov rax, cr8", "mov rax, cr8", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(12, "mov rax, dr0", "mov rax, dr0", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(13, "mov dr0, rax", "mov dr0, rax", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(14, "rdpmc", "rdpmc", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(15, "sti", "sti", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
TEST(16, "swapgs", "swapgs", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
#ifdef __clang__
TEST(17, "sysret", "sysret", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
#else
TEST(17, "sysret", "sysretd", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);
#endif
TEST(18, "wrmsr", "wrmsr", X86_TRAPNO_GP, 0, 0x80, SIGSEGV);

// Instructions not implemented
TEST(19, "monitor", "monitor", X86_TRAPNO_UD, 0, 2, SIGILL);
TEST(20, "mwait", "mwait", X86_TRAPNO_UD, 0, 2, SIGILL);
TEST(21, "sysenter", "sysenter", X86_TRAPNO_UD, 0, 2, SIGILL);
#ifdef __clang__
TEST(22, "sysexit", "sysexit", X86_TRAPNO_UD, 0, 2, SIGILL);
#else
TEST(22, "sysexit", "sysexitd", X86_TRAPNO_UD, 0, 2, SIGILL);
#endif

// Differs between dr8 and dr0-7 variants.
// dr0-7: SIGSEGV
// dr8-15: SIGILL
// TEST(20, "mov rax, dr8", "mov rax, dr8", X86_TRAPNO_UD, 0, 2, SIGILL);


================================================
FILE: unittests/FEXLinuxTests/tests/signal/eflags_signal.cpp
================================================
#include <atomic>
#include <catch2/catch_test_macros.hpp>
#include <fstream>
#include <functional>
#include <optional>
#include <signal.h>
#include <thread>
#include <sys/syscall.h>
#include <linux/futex.h>

#if __SIZEOF_POINTER__ == 4
#define DO_ASM(x, y)                                                                                                               \
  __asm volatile(x                                    /* Need to late move syscall number since incoming asm will overwrite eax */ \
                 " mov eax, %[Syscall];"              /* Notify we are ready (Without touching flags) */                           \
                 "mov dword ptr [%[ReadyNotify]], 1;" /* Do a futex */                                                             \
                 "int 0x80;" y                                                                                                     \
                 :                                                                                                                 \
                 : [Syscall] "i"(SYS_futex), "b"(Futex), "c"(FUTEX_WAIT), "d"(0), "S"(0), [ReadyNotify] "r"(ReadyNotify)           \
                 : "cc", "memory", "eax")
#else

#define DO_ASM(x, y)                                                                                                                                            \
  __asm volatile(                                                                                                                                               \
    x /* Do a futex */                                                                                                                                          \
    " mov rax, %[Syscall];"                                                                                                                                     \
    " mov rdi, %[FutexAddr];"                                                                                                                                   \
    " mov rsi, %[FutexOp];"                                                                                                                                     \
    " mov rdx, %[ExpectedValue];"                                                                                                                               \
    " mov r10, %[TimeoutAddr];" /* Notify we are ready (Without touching flags) */                                                                              \
    "mov dword ptr [%[ReadyNotify]], 1;"                                                                                                                        \
    "syscall;" y                                                                                                                                                \
    :                                                                                                                                                           \
    : [Syscall] "i"(SYS_futex), [FutexAddr] "r"(Futex), [FutexOp] "i"(FUTEX_WAIT), [ExpectedValue] "i"(0), [TimeoutAddr] "i"(0), [ReadyNotify] "r"(ReadyNotify) \
    : "cc", "memory", "rax", "rdi", "rsi", "rdx", "r10")
#endif

static void ClearCFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Clear CF
    "clc;",
    // CF should still be cleared.
    "jnc 1f;"
    "int3;"
    "1:");
}

static void SetCFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Set CF
    "stc;",
    // CF should still be set.
    "jc 1f;"
    "int3;"
    "1:");
}

static void ClearPFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Clear PF
    "mov eax, 0;"
    "inc eax;",

    // PF should still be cleared.
    "jnp 1f;"
    "int3;"
    "1:");
}

static void SetPFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Set PF
    "mov eax, 0x80;"
    "inc eax;",
    // PF should still be set.
    "jp 1f;"
    "int3;"
    "1:");
}

static void ClearZFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Clear ZF
    "mov eax, 2;"
    "dec eax;",
    // ZF should still be cleared.
    "jnz 1f;"
    "int3;"
    "1:");
}

static void SetZFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Set ZF
    "mov eax, 1;"
    "dec eax;",
    // ZF should still be set.
    "jz 1f;"
    "int3;"
    "1:");
}

static void ClearSFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Clear SF
    "mov eax, 1;"
    "dec eax;",
    // SF should still be cleared.
    "jns 1f;"
    "int3;"
    "1:");
}

static void SetSFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Set SF
    "mov eax, 0;"
    "dec eax;",
    // SF should still be set.
    "js 1f;"
    "int3;"
    "1:");
}

static void ClearOFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Clear OF
    "mov eax, 0;"
    "inc eax;",
    // OF should still be cleared.
    "jno 1f;"
    "int3;"
    "1:");
}

static void SetOFAndWait(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify) {
  DO_ASM(
    // Set OF
    "mov eax, 0x7fffffff;"
    "inc eax;",
    // OF should still be set.
    "jo 1f;"
    "int3;"
    "1:");
}

struct CapturingData {
  int Signal;
  uint64_t eflags;
};

std::optional<CapturingData> from_handler;
constexpr uint32_t EFL_CF = 0;
constexpr uint32_t EFL_PF = 2;
constexpr uint32_t EFL_ZF = 6;
constexpr uint32_t EFL_SF = 7;
constexpr uint32_t EFL_OF = 11;

static void CapturingHandler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  from_handler = {
    .Signal = signal,
    .eflags = static_cast<uint64_t>(_context->uc_mcontext.gregs[REG_EFL]),
  };
}

using TestHandler = std::function<void(std::atomic<uint32_t>* Futex, std::atomic<uint32_t>* ReadyNotify)>;

static void ThreadHandler(std::atomic<uint32_t>* Mutex, std::atomic<uint32_t>* ReadyNotify, std::atomic<uint32_t>* ThreadID, TestHandler Test) {
  // Unblock SIGTERM.
  sigset_t BlockMask {};
  sigemptyset(&BlockMask);
  sigaddset(&BlockMask, SIGTERM);
  sigprocmask(SIG_UNBLOCK, &BlockMask, nullptr);

  // Set up a signal handler for SIGTERM
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGTERM, &act, nullptr);

  *ThreadID = ::gettid();
  Test(Mutex, ReadyNotify);
}

void WaitForThreadAsleep(uint32_t tid) {
  std::string Path = "/proc/" + std::to_string(::getpid()) + "/task/" + std::to_string(tid) + "/status";
  std::ifstream fs {Path, std::fstream::binary};
  std::string Line;

  while (true) {
    fs.clear();
    fs.seekg(0);
    while (std::getline(fs, Line)) {
      if (fs.eof()) {
        break;
      }

      if (Line.find("State") == Line.npos) {
        continue;
      }

      char State {};
      if (sscanf(Line.c_str(), "State: %c", &State) == 1) {
        if (State == 'S') {
          return;
        }
        break;
      }
    }
  }
}
void RunTest(uint32_t FlagLocation, uint32_t FlagValue, TestHandler Test) {
  // Block SIGTERM.
  sigset_t BlockMask {};
  sigemptyset(&BlockMask);
  sigaddset(&BlockMask, SIGTERM);
  sigprocmask(SIG_BLOCK, &BlockMask, nullptr);
  std::atomic<uint32_t> Mutex {};
  std::atomic<uint32_t> ReadyNotify {};
  std::atomic<uint32_t> ThreadID {};

  std::thread t(ThreadHandler, &Mutex, &ReadyNotify, &ThreadID, Test);

  while (ReadyNotify.load() == 0)
    ;
  // Wait for thread to get in to the futex.
  WaitForThreadAsleep(ThreadID.load());

  tgkill(::getpid(), ThreadID.load(), SIGTERM);

  t.join();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler.value().Signal == SIGTERM);
  CHECK(((from_handler.value().eflags >> FlagLocation) & 1) == FlagValue);
  from_handler.reset();
}

TEST_CASE("Signal-Flags-CF-0") {
  RunTest(EFL_CF, 0, ClearCFAndWait);
}
TEST_CASE("Signal-Flags-CF-1") {
  RunTest(EFL_CF, 1, SetCFAndWait);
}
TEST_CASE("Signal-Flags-PF-0") {
  RunTest(EFL_PF, 0, ClearPFAndWait);
}
TEST_CASE("Signal-Flags-PF-1") {
  RunTest(EFL_PF, 1, SetPFAndWait);
}
TEST_CASE("Signal-Flags-ZF-0") {
  RunTest(EFL_ZF, 0, ClearZFAndWait);
}
TEST_CASE("Signal-Flags-ZF-1") {
  RunTest(EFL_ZF, 1, SetZFAndWait);
}
TEST_CASE("Signal-Flags-SF-0") {
  RunTest(EFL_SF, 0, ClearSFAndWait);
}
TEST_CASE("Signal-Flags-SF-1") {
  RunTest(EFL_SF, 1, SetSFAndWait);
}
TEST_CASE("Signal-Flags-OF-0") {
  RunTest(EFL_OF, 0, ClearOFAndWait);
}
TEST_CASE("Signal-Flags-OF-1") {
  RunTest(EFL_OF, 1, SetOFAndWait);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/into.32.cpp
================================================
#include <atomic>
#include <signal.h>

std::atomic<bool> CorrectFaultData {false};
static void handler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  if (signal != SIGSEGV) {
    return;
  }

  if (siginfo->si_addr != nullptr) {
    return;
  }

  if (_context->uc_mcontext.gregs[REG_TRAPNO] != 4) {
    return;
  }

  CorrectFaultData = true;
}

int main() {
  struct sigaction act {};
  act.sa_sigaction = handler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  __asm volatile(R"(
  mov eax, 0x7f;
  inc al;
  into;
  )" ::
                   : "eax");

  return CorrectFaultData ? 0 : 1;
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_hlt.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
    IntInstruction:
    hlt;
    ret;
    )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction);
constexpr int EXPECTED_TRAPNO = 0xD;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGSEGV;

TEST_CASE("Signals: Invalid HLT") {
  capturing_handler_skip = 1;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_int.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
  IntInstruction:
  int 0x2d;
  ret;
  )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction);
constexpr int EXPECTED_TRAPNO = 13;
constexpr int EXPECTED_ERR = 362;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGSEGV;

TEST_CASE("Signals: Invalid INT") {
  capturing_handler_skip = 2;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_int1.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
  IntInstruction:
  .byte 0xF1; # int1
  ret;
  )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction) + 1;
constexpr int EXPECTED_TRAPNO = 1;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 1;
constexpr int EXPECTED_SIGNAL = SIGTRAP;

TEST_CASE("Signals: Invalid INT1") {
  capturing_handler_skip = 0;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_int3.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
  IntInstruction:
  int3;
  ret;
  )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction) + 1;
constexpr int EXPECTED_TRAPNO = 3;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGTRAP;

TEST_CASE("Signals: Invalid INT3") {
  capturing_handler_skip = 0;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_ud2.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
  IntInstruction:
  ud2;
  ret;
  )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction);
constexpr int EXPECTED_TRAPNO = 6;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 2;
constexpr int EXPECTED_SIGNAL = SIGILL;

TEST_CASE("Signals: Invalid UD2") {
  capturing_handler_skip = 2;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_util.h
================================================
#include <cstdint>
#include <signal.h>
#include <optional>

struct CapturedHandlerState {
  mcontext_t mctx;
  int signal;
  int si_code;
};

std::optional<CapturedHandlerState> from_handler;

// Number of bytes to skip to resume from the signal handler
int capturing_handler_skip = 0;

// Number of times the signal handler has caught a signal
int capturing_handler_calls = 0;

// Signal handler that writes its context data to the global from_handler
static void CapturingHandler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  from_handler = {_context->uc_mcontext, signal, siginfo->si_code};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  _context->uc_mcontext.gregs[FEX_IP_REG] += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}

#if __SIZEOF_POINTER__ == 4
struct sigcontext_32 {
  uint16_t gs, gsh;
  uint16_t fs, fsh;
  uint16_t es, esh;
  uint16_t ds, dsh;
  uint32_t di;
  uint32_t si;
  uint32_t bp;
  uint32_t sp;
  uint32_t bx;
  uint32_t dx;
  uint32_t cx;
  uint32_t ax;
  uint32_t trapno;
  uint32_t err;
  uint32_t ip;
  uint16_t cs, csh;
  uint32_t flags;
  uint32_t sp_at_signal;
  uint16_t ss, ssh;

  uint32_t fpstate;
  uint32_t oldmask;
  uint32_t cr2;
};
struct sigframe_ia32 {
  uint32_t pretcode;
  int signal;
  sigcontext_32 sc;
  // <...>
  // Some extra state
};

struct rt_sigframe_ia32 {
  uint32_t pretcode;
  int signal;
  uint32_t pinfo;
  uint32_t puc;
  siginfo_t info;
  ucontext_t uc;
  // <...>
  // Some extra state
};

struct CapturedHandlerState_32 {
  sigcontext_32 mctx;
  int signal;
  int si_code;
};

struct CapturedHandlerState_regparm_32 {
  int signal;
  siginfo_t* siginfo;
  void* context;
};

// This capturing handler is for non-realtime signals pulling arguments from the stack.
std::optional<CapturedHandlerState_32> from_handler_32;
// This capturing handler is for non-realtime signals pulling arguments from regparm ABI.
std::optional<CapturedHandlerState_regparm_32> from_handler_regparm_32;

/*
 * This signal handler is for testing 32-bit non-realtime signal support.
 * This handler gives a signal, and a sigcontext_32 object.
 *
 * The arguments are passed on the stack for this function.
 */
static void CapturingHandler_non_realtime(int signal, ...) {
  // Getting the context frame is really hard, so hardwire some magic.
  // Getting the frame address returns
  // struct frame {
  //  uint32_t ????;
  //  uint32_t pret;
  //  uint32_t signal;
  //  sigcontext_32 sc;
  sigframe_ia32* frame = (sigframe_ia32*)((size_t)__builtin_frame_address(0) + 4);
  sigcontext_32* context = &frame->sc;

  from_handler_32 = {*context, signal, 0};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  context->ip += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}

/*
 * This signal handler is for testing 32-bit realtime signal support.
 * This handler gives a signal, a siginfo_t, and mcontext_t object.
 *
 * The arguments are passed on the stack for this function.
 */
[[gnu::regparm(3)]]
static void CapturingHandler_realtime_regparm(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  from_handler = {_context->uc_mcontext, signal, siginfo->si_code};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  _context->uc_mcontext.gregs[FEX_IP_REG] += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}

/*
 * This signal handler is for testing 32-bit non-realtime signal support.
 * This handler gives a signal, and that's it
 *
 * siginfo and context objects should always be nullptr in this case.
 *
 * The arguments are passed on in registers for this function.
 */
[[gnu::regparm(3)]]
static void CapturingHandler_non_realtime_regparm(int signal, siginfo_t* siginfo, void* context) {
  // Getting the context frame is really hard, so hardwire some magic.
  // Getting the frame address returns
  // struct frame {
  //  uint32_t ????;
  //  uint32_t pret;
  //  uint32_t signal;
  //  sigcontext_32 sc;
  // If volatile isn't used then the compiler optimizes this out.
  volatile sigframe_ia32* frame = (volatile sigframe_ia32*)((size_t)__builtin_frame_address(0) + 4);
  volatile sigcontext_32* context_stack = &frame->sc;

  // siginfo and context should be nullptr.
  from_handler_regparm_32 = {signal, siginfo, context};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  context_stack->ip += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}

/*
 * This signal handler is for testing 32-bit realtime signal support.
 * This handler gives a signal, a siginfo_t, and mcontext_t object.
 *
 * The arguments are passed on the stack for this function.
 */
static void CapturingHandler_realtime() {
  // Getting the context frame is really hard, so hardwire some magic.
  // Getting the frame address returns
  // struct frame {
  //  uint32_t ????;
  //  rt_sigframe_ia32 frame;
  rt_sigframe_ia32* frame = (rt_sigframe_ia32*)((size_t)__builtin_frame_address(0) + 4);
  int signal = frame->signal;
  siginfo_t* siginfo = &frame->info;
  ucontext_t* _context = &frame->uc;

  from_handler = {_context->uc_mcontext, signal, siginfo->si_code};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  _context->uc_mcontext.gregs[FEX_IP_REG] += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}

/*
 * This signal handler is for testing 32-bit realtime signal support.
 * This handler gives a signal, a siginfo_t, and mcontext_t object.
 *
 * The arguments are passed on in registers for this function.
 * This one is specifically is for testing if the glibc handler is working correctly.
 * It matches `CapturingHandler_realtime_regparm` but without the `regparm` ABI.
 */
static void CapturingHandler_realtime_glibc_helper(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;

  from_handler = {_context->uc_mcontext, signal, siginfo->si_code};
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  _context->uc_mcontext.gregs[FEX_IP_REG] += capturing_handler_skip;
#undef FEX_IP_REG
  capturing_handler_calls++;
}
#endif


================================================
FILE: unittests/FEXLinuxTests/tests/signal/invalid_vex.32.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <cstdlib>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
  IntInstruction:
  // vaddss xmm0,xmm15,xmm2
  .byte 0xc5, 0x82, 0x58, 0xc2;
  ret;
  )");
}

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&IntInstruction);

TEST_CASE("Signals: Invalid VEX.vvvv") {
  capturing_handler_skip = 4;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/noexec_protect.64.cpp
================================================
#include "simple_x86.h"
#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <cstdlib>
#include <csetjmp>

bool Caught = false;
uint64_t CaughtAddr {};
static jmp_buf LongJump {};

static void SIGSEGV_Handler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  // Needs to be an access error.
  REQUIRE(siginfo->si_code == SEGV_ACCERR);

  // Page fault
  REQUIRE(_context->uc_mcontext.gregs[REG_TRAPNO] == 14);

  CaughtAddr = reinterpret_cast<uint64_t>(siginfo->si_addr);

  Caught = true;
  longjmp(LongJump, 1);
}

TEST_CASE("Signals: Test No-Exec") {
  struct sigaction act {};
  act.sa_sigaction = SIGSEGV_Handler;
  act.sa_flags = SA_SIGINFO;

  sigaction(SIGSEGV, &act, nullptr);
  auto PageSize = sysconf(_SC_PAGESIZE);
  PageSize = PageSize > 0 ? PageSize : 0x1000;

  void* Ptr = mmap(nullptr, PageSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(Ptr != MAP_FAILED);

  SimpleX86Emit emit(Ptr, PageSize);
  emit.mov(SimpleX86Emit::Reg::RAX, 1);
  emit.ret();

  using func_ptr = uint32_t (*)();
  func_ptr func = reinterpret_cast<func_ptr>(Ptr);

  // First time should execute fine.
  Caught = false;
  if (setjmp(LongJump) == 0) {
    int res = func();
    REQUIRE(res == 1);
  } else {
    REQUIRE(Caught == false);
  }

  // Protect as non-executable
  REQUIRE(mprotect(Ptr, PageSize, PROT_READ | PROT_WRITE) == 0);

  // This should now fail to execute due to No-Exec.
  Caught = false;
  if (setjmp(LongJump) == 0) {
    int res = func();
    // Shouldn't get reached.
    REQUIRE(res == 1);
    REQUIRE(false);
  } else {
    REQUIRE(Caught == true);
  }
}

TEST_CASE("Signals: Partial decode") {
  struct sigaction act {};
  act.sa_sigaction = SIGSEGV_Handler;
  act.sa_flags = SA_SIGINFO;

  sigaction(SIGSEGV, &act, nullptr);
  auto PageSize = sysconf(_SC_PAGESIZE);
  PageSize = PageSize > 0 ? PageSize : 0x1000;

  void* Ptr = mmap(nullptr, PageSize * 2, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(Ptr != MAP_FAILED);

  SimpleX86Emit emit(static_cast<uint8_t*>(Ptr) + PageSize - 1, PageSize + 1);

  // MOV <uint32_t> hits the end of the page, writing only the opcode.
  emit.mov(SimpleX86Emit::Reg::RAX, 0x42424242);
  emit.ret();

  // Protect second page
  REQUIRE(mprotect(static_cast<uint8_t*>(Ptr) + PageSize, PageSize, PROT_NONE) == 0);

  using func_ptr = uint32_t (*)();
  func_ptr func = reinterpret_cast<func_ptr>(static_cast<uint8_t*>(Ptr) + PageSize - 1);
  Caught = false;
  if (setjmp(LongJump) == 0) {
    REQUIRE(func() != 0x42424242);
  } else {
    REQUIRE(Caught == true);
    CHECK(CaughtAddr == (reinterpret_cast<uint64_t>(Ptr) + PageSize));
  }

  Caught = false;
  CaughtAddr = 0;

  // Protect second page
  REQUIRE(mprotect(static_cast<uint8_t*>(Ptr) + PageSize, PageSize, PROT_READ | PROT_WRITE | PROT_EXEC) == 0);

  if (setjmp(LongJump) == 0) {
    CHECK(func() == 0x42424242);
  } else {
    REQUIRE(false);
  }

  CHECK(Caught == false);
  CHECK(CaughtAddr == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/pthread_cancel.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

// Derived from example in https://manual.cs50.io/3/pthread_cancel
// <<Manual pages for the C standard library, C POSIX library, and the CS50 Library>>

std::atomic<bool> thread_ready;
std::atomic<bool> cancel_sent;

static pthread_key_t key;

void key_dtor(void* ptr) {
  puts("key_dtor: Thread aborted\n");
  free(ptr);
}

#define handle_error_en(en, msg) \
  do {                           \
    errno = en;                  \
    perror(msg);                 \
    exit(EXIT_FAILURE);          \
  } while (0)

static void* thread_func(void* ignored_argument) {
  pthread_key_create(&key, &key_dtor);
  pthread_setspecific(key, malloc(32));
  int s;

  /* Disable cancellation for a while, so that we don't
     immediately react to a cancellation request. */

  s = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
  if (s != 0) {
    handle_error_en(s, "pthread_setcancelstate");
  }

  printf("thread_func(): started; cancellation disabled\n");
  thread_ready = true;

  while (!cancel_sent.load())
    ;
  printf("thread_func(): about to enable cancellation\n");

  s = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
  if (s != 0) {
    handle_error_en(s, "pthread_setcancelstate");
  }

  /* sleep() is a cancellation point. */

  for (;;) {
    sleep(1000); /* Should get canceled while we sleep */
  }

  /* Should never get here. */

  printf("thread_func(): not canceled!\n");
  return NULL;
}

TEST_CASE("pthreads cancel") {
  pthread_t thr;
  void* res;
  int s;

  /* Start a thread and then send it a cancellation request. */

  REQUIRE(pthread_create(&thr, NULL, &thread_func, NULL) == 0);

  while (!thread_ready.load())
    ;

  printf("main(): sending cancellation request\n");
  REQUIRE(pthread_cancel(thr) == 0);

  cancel_sent = true;

  /* Join with thread to see what its exit status was. */

  REQUIRE(pthread_join(thr, &res) == 0);

  CHECK(res == PTHREAD_CANCELED);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigill_flags.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <array>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <cstdlib>
#include <optional>

__attribute__((naked, nocf_check)) static void SafeRet() {
  __asm volatile(R"(
  ret;
  )");
}

struct capture_data {
  uintptr_t RIP;
  uintptr_t siginfo_RIP;
  uint64_t Register_Err;
  uint64_t TrapNo;
};

std::optional<capture_data> data;
static void sigsegv_check(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  auto mcontext = &_context->uc_mcontext;

#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  data.emplace(capture_data {
    .RIP = static_cast<uintptr_t>(mcontext->gregs[FEX_IP_REG]),
    .siginfo_RIP = reinterpret_cast<uintptr_t>(siginfo->si_addr),
    .Register_Err = static_cast<uint64_t>(mcontext->gregs[REG_ERR]),
    .TrapNo = static_cast<uint64_t>(mcontext->gregs[REG_TRAPNO]),
  });

  // Change RIP to a safe return so we can continue testing.
  mcontext->gregs[FEX_IP_REG] = reinterpret_cast<greg_t>(&SafeRet);
}

TEST_CASE("Signals: SIGILL flags") {
  struct sigaction act {};
  act.sa_sigaction = sigsegv_check;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  struct TestArray {
    int Prot;
    uint64_t RegisterErr;
  };

  constexpr static std::array<TestArray, 2> ProtArray {{
    {PROT_READ | PROT_WRITE, 21}, {PROT_READ, 21},

    // FEX doesn't currently support reporting this correctly.
    // { PROT_NONE, 20 },
  }};

  for (auto& Prot : ProtArray) {
    void* ptr = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
    REQUIRE(ptr != MAP_FAILED);

    // Fill JIT space with `ret` instruction.
    memset(ptr, 0xc3, 4096);

    // Protect with various NOEXEC protections.
    REQUIRE(mprotect(ptr, 4096, Prot.Prot) == 0);

    using func_type = void (*)();
    auto func = reinterpret_cast<func_type>(ptr);

    data.reset();

    // Jump to prepared JIT function with NOEXEC permissions.
    // Will immediately fault.
    func();

    REQUIRE(data.has_value());
    CHECK(data->RIP == reinterpret_cast<uintptr_t>(ptr));
    CHECK(data->siginfo_RIP == reinterpret_cast<uintptr_t>(ptr));
    CHECK(data->Register_Err == Prot.RegisterErr);
    CHECK(data->TrapNo == 14);
    REQUIRE(munmap(ptr, 4096) == 0);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigill_xstate_magic.cpp
================================================
#include "fpstate.h"
#include <catch2/catch_test_macros.hpp>

#include <array>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <cstdlib>
#include <optional>

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void InvalidINT() {
  __asm volatile(R"(
    hlt;
    ret;
    )");
}

__attribute__((naked, nocf_check)) static void SafeRet() {
  __asm volatile(R"(
  ret;
  )");
}

struct capture_data {
  uint32_t magic1;
  uint32_t magic2;
};

std::optional<capture_data> data;
static void signal_check(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  auto mcontext = &_context->uc_mcontext;

#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif
  auto xstate = reinterpret_cast<FEX::Unittests::xstate*>(_context->uc_mcontext.fpregs);
  auto magic1 = xstate->fpstate.sw_reserved.magic1;
  uint32_t magic2 {};
  if (magic1 == FEX::Unittests::fpx_sw_bytes::FP_XSTATE_MAGIC_1) {
    auto magic2_addr =
      reinterpret_cast<uint32_t*>(reinterpret_cast<uintptr_t>(xstate) + xstate->fpstate.sw_reserved.extended_size - sizeof(uint32_t));
    magic2 = *magic2_addr;
  }

  data.emplace(capture_data {
    .magic1 = magic1,
    .magic2 = magic2,
  });

  // Change RIP to a safe return so we can continue testing.
  mcontext->gregs[FEX_IP_REG] = reinterpret_cast<greg_t>(&SafeRet);
}

TEST_CASE("Signals: SIGILL flags") {
  struct sigaction act {};
  act.sa_sigaction = signal_check;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  InvalidINT();

  REQUIRE(data.has_value());
  CHECK(data->magic1 == FEX::Unittests::fpx_sw_bytes::FP_XSTATE_MAGIC_1);
  CHECK(data->magic2 == FEX::Unittests::fpx_sw_bytes::FP_XSTATE_MAGIC_2);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/signal_df_reset.64.cpp
================================================
// SPDX-License-Identifier: MIT
#include <catch2/catch_test_macros.hpp>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <ucontext.h>
#include <unistd.h>

int Page {};
void* signal_page {};
void* signal_second_page {};

void* backwards_page {};
void* backwards_second_page {};

__attribute__((naked)) void backwards_set(void* dest, uint8_t value, uint32_t pad, size_t size) {
  __asm volatile(R"(
    std;
    jmp 1f;
    1:
    mov rax, rsi;
    rep stosb;
    jmp 2f;
    2:
    cld;
    ret;
  )" ::
                   : "memory", "cc");
}

__attribute__((naked)) void forward_set(void* dest, uint8_t value, uint32_t pad, size_t size) {
  __asm volatile(R"(
    mov rax, rsi;
    rep stosb;
    ret;
  )" ::
                   : "memory", "cc");
}

void sig_handler(int signum, siginfo_t* info, void* context) {
  // Ensure the fault address isn't in the signal page.
  auto addr = reinterpret_cast<uint64_t>(info->si_addr);
  // This REQUIRE will fail if DF isn't reset on signal handler.
  REQUIRE(!(addr >= (uint64_t)signal_page && addr <= (uint64_t)signal_second_page));
  forward_set(signal_second_page, 1, 0, 4096);

  // mprotect the page that was originally written to, allowing the code to continue.
  REQUIRE(mprotect(backwards_page, Page, PROT_READ | PROT_WRITE) == 0);
}

TEST_CASE("DF flaga reset on signal") {
  struct sigaction act {};

  act.sa_flags = SA_SIGINFO;
  act.sa_sigaction = &sig_handler;
  REQUIRE(sigaction(SIGSEGV, &act, NULL) == 0);

  Page = sysconf(_SC_PAGESIZE);

  // Allocate pages with protections to ensure correct direction.
  signal_page = ::mmap(nullptr, Page * 3, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(signal_page != MAP_FAILED);
  signal_second_page = reinterpret_cast<void*>(reinterpret_cast<uint64_t>(signal_page) + Page);

  // Again for backward direction.
  backwards_page = ::mmap(nullptr, Page * 3, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
  REQUIRE(backwards_page != MAP_FAILED);
  backwards_second_page = reinterpret_cast<void*>(reinterpret_cast<uint64_t>(backwards_page) + Page);

  // Allow the middle page to read/write.
  REQUIRE(mprotect(signal_second_page, Page, PROT_READ | PROT_WRITE) == 0);

  // Allow the middle page to read/write.
  REQUIRE(mprotect(backwards_second_page, Page, PROT_READ | PROT_WRITE) == 0);

  // Copy backwards and cause a signal.
  backwards_set(backwards_second_page, 1, 0, 4096);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/signal_flags.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

__attribute__((naked)) static void InvalidINT_SetPF() {
  __asm volatile(R"(
  mov eax, 0x80
  inc eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_ClearPF() {
  __asm volatile(R"(
  mov eax, 0
  inc eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_SetCF() {
  __asm volatile(R"(
  stc
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_ClearCF() {
  __asm volatile(R"(
  clc
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_SetZF() {
  __asm volatile(R"(
  mov eax, 1
  dec eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_ClearZF() {
  __asm volatile(R"(
  mov eax, 2
  dec eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_SetSF() {
  __asm volatile(R"(
  mov eax, 0
  dec eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_ClearSF() {
  __asm volatile(R"(
  mov eax, 1
  dec eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_SetOF() {
  __asm volatile(R"(
  mov eax, 0x7fffffff
  inc eax
  int3;
  ret;
  )");
}

__attribute__((naked)) static void InvalidINT_ClearOF() {
  __asm volatile(R"(
  mov eax, 0
  inc eax
  int3;
  ret;
  )");
}

constexpr int EXPECTED_TRAPNO = 3;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGTRAP;

constexpr uint32_t EFL_CF = 0;
constexpr uint32_t EFL_PF = 2;
constexpr uint32_t EFL_ZF = 6;
constexpr uint32_t EFL_SF = 7;
constexpr uint32_t EFL_OF = 11;

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

using FunctionPtr = void (*)();
void SetupAndCallTest(FunctionPtr Func, uint32_t FlagOffset, uint32_t ExpectedFlag) {
  capturing_handler_skip = 0;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  sigaction(SIGTRAP, &act, nullptr);
  sigaction(SIGILL, &act, nullptr);

  Func();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
  // Extract Flag
  CHECK(((from_handler->mctx.gregs[REG_EFL] >> FlagOffset) & 1) == ExpectedFlag);
}

TEST_CASE("Signals: PF on Signal") {
  SetupAndCallTest(InvalidINT_SetPF, EFL_PF, 1);
}

TEST_CASE("Signals: NoPF on Signal") {
  SetupAndCallTest(InvalidINT_ClearPF, EFL_PF, 0);
}

TEST_CASE("Signals: CF on Signal") {
  SetupAndCallTest(InvalidINT_SetCF, EFL_CF, 1);
}

TEST_CASE("Signals: NoCF on Signal") {
  SetupAndCallTest(InvalidINT_ClearCF, EFL_CF, 0);
}

TEST_CASE("Signals: ZF on Signal") {
  SetupAndCallTest(InvalidINT_SetZF, EFL_ZF, 1);
}

TEST_CASE("Signals: NoZF on Signal") {
  SetupAndCallTest(InvalidINT_ClearZF, EFL_ZF, 0);
}

TEST_CASE("Signals: SF on Signal") {
  SetupAndCallTest(InvalidINT_SetSF, EFL_SF, 1);
}

TEST_CASE("Signals: NoSF on Signal") {
  SetupAndCallTest(InvalidINT_ClearSF, EFL_SF, 0);
}

TEST_CASE("Signals: OF on Signal") {
  SetupAndCallTest(InvalidINT_SetOF, EFL_OF, 1);
}

TEST_CASE("Signals: NoOF on Signal") {
  SetupAndCallTest(InvalidINT_ClearOF, EFL_OF, 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/signal_order.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <unistd.h>
#include <signal.h>
#include <sys/syscall.h>
#include <stdio.h>

constexpr uint64_t ExpectedOrder[64] = {
  0,  1,  2,  3,  4,  5,  6,  7,  0,  8,  9,  10, 11, 12, 13, 14, 15, 16, 0,  17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
  30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,
};

uint64_t Count {};
uint64_t Order[64] = {};

#ifndef __x86_64__
[[gnu::regparm(3)]]
#endif
static void handler(int signal, siginfo_t* siginfo, void* context) {
  REQUIRE((signal > 0 && signal < 65));
  if (signal < 1 || signal > 64) {
    return;
  }
  Order[signal - 1] = Count;
  ++Count;
}

#ifdef __x86_64__
__attribute__((naked)) void asm_handler(int signal, siginfo_t* siginfo, void* context) {
  __asm volatile(R"(
  call %[Handler];
  ret;
  )" ::[Handler] "r"(handler)
                 : "memory");
}

__attribute__((naked)) void restorer() {
  __asm volatile(R"(
  mov eax, %[sigreturn];
  syscall;
  )" ::[sigreturn] "i"(SYS_rt_sigreturn)
                 : "memory");
}
#else
__attribute__((naked)) void asm_handler(int signal, siginfo_t* siginfo, void* context) {
  __asm volatile(R"(
  mov ebx, %[Handler];
  mov eax, [esp + 4];
  mov ecx, [esp + 8];
  mov edx, [esp + 12];
  call ebx;
  ret;
  )" ::[Handler] "r"(handler)
                 : "eax", "ecx", "edx", "memory");
}


__attribute__((naked)) void restorer() {
  __asm volatile(R"(
  mov eax, %[sigreturn];
  int 0x80;
  )" ::[sigreturn] "i"(SYS_rt_sigreturn)
                 : "memory");
}
#endif


struct __attribute__((packed)) GuestSAMask {
  uint64_t Val;
};

struct __attribute__((packed)) GuestSigAction {
  union {
    void (*handler)(int);
    void (*sigaction)(int, siginfo_t*, void*);
  } sigaction_handler;

  size_t sa_flags;
  void (*restorer)(void);
  GuestSAMask sa_mask;
};

TEST_CASE("signal order") {

#define SA_RESTORER 0x04000000
  struct GuestSigAction act {};
  act.sigaction_handler.sigaction = (decltype(act.sigaction_handler.sigaction))asm_handler;
  act.restorer = restorer;
  act.sa_flags = SA_SIGINFO | SA_RESTORER;
  for (size_t i = 1; i <= 64; ++i) {
    ::syscall(SYS_rt_sigaction, i, &act, nullptr, 8);
  }

  auto pid = ::getpid();
  auto tid = ::gettid();
  for (size_t i = 1; i <= 64; ++i) {
    if (i == SIGKILL || i == SIGSTOP) {
      continue;
    }
    tgkill(pid, tid, i);
  }

  for (size_t i = 1; i <= 64; ++i) {
    CHECK(Order[i - 1] == ExpectedOrder[i - 1]);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_defer.cpp
================================================
// A test where a signal is masked, a value is set that the signal handler will overwrite, and then the signal is unmasked to allow it to
// fire. FEX-Emu had a bug where it wasn't properly deferring signals from sigprocmask if one of the signals was masked by the guest application.
// In older glibc versions (glibc-2.26), the `raise` implementation would block signals, tgkill, and then unblock signals,
// expecting the signal to fire once the signals were unblocked.

#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>
#include <catch2/generators/catch_generators_range.hpp>

#include <signal.h>
#include <unistd.h>

static uint32_t CheckValue {};
void sig_handler(int signum, siginfo_t* info, void* context) {
  REQUIRE(CheckValue == 2);
  CheckValue = 0x1;
}

static void RaiseSignal(int Signal) {
  sigset_t Prev {};
  sigset_t New {};

  // Mask all signals
  sigfillset(&New);
  int Ret = sigprocmask(SIG_BLOCK, &New, &Prev);

  REQUIRE(Ret != -1);

  // Try to raise the signal, even though it is blocked
  Ret = tgkill(::getpid(), ::gettid(), Signal);
  REQUIRE(Ret != -1);

  // Set the check value
  CHECK(CheckValue == 0);
  CheckValue = 0x2;

  // Unmask the signal
  Ret = sigprocmask(SIG_SETMASK, &Prev, nullptr);
  REQUIRE(Ret != -1);
}

TEST_CASE("Signals: Defer Signals") {
  auto tested_signal = GENERATE(range(1, 65));

  if (tested_signal != SIGKILL && tested_signal != SIGSTOP && tested_signal != 32 && tested_signal != 33) {
    struct sigaction sa {};
    sa.sa_sigaction = sig_handler;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = SA_RESTART | SA_SIGINFO;
    sigaction(tested_signal, &sa, nullptr);
    CheckValue = 0;
    RaiseSignal(tested_signal);
    CHECK(CheckValue == 1);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_no_defer.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

volatile int count = 0;
volatile int count2 = 0;

#define NUMCOUNT 10
#define SIGN SIGTSTP

void sig_handler(int signum, siginfo_t* info, void* context) {
  printf("Inside handler function\n");
  if (count != 0) {
    printf("SA_NODEFER bug\n");
    exit(-1);
  }

  if (count2 != 0) {
    printf("Nested raise correctly raised, trying sigprocmask\n");
    sigset_t old;
    // test if sigmask returned by sigprocmask is the one currently active
    sigprocmask(0, 0, &old);
    sigprocmask(SIG_SETMASK, &old, 0);
  }

  if (count2 < NUMCOUNT) {
    printf("Nested Raising %d, %d of %d times\n", signum, 1 + count, NUMCOUNT);
    count2++;
    raise(signum);
    count++;
  } else {
    // Return to caller
  }
}

TEST_CASE("Signals: No defer") {
  struct sigaction act = {0};

  act.sa_flags = SA_SIGINFO | SA_NODEFER;
  act.sa_sigaction = &sig_handler;
  REQUIRE(sigaction(SIGN, &act, NULL) == 0);

  raise(SIGN);
  CHECK(count == 10);
  CHECK(count2 == 10);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_samask.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

volatile bool loop = true;
volatile bool last = false;
volatile int count = 0;
volatile int count2 = 0;

// OPTIONS
// TESTSIGPROCMASK

#define NUMCOUNT 10
#define SIGN SIGTSTP

void sig_handler(int signum) {
  loop = false;
  printf("Inside handler function\n");

  if (last) {
    printf("Handling last raise\n");
    return;
  }

  if (count2 != count) {
    printf("Signal reentering bug\n");
    exit(-1);
  }

  if (count < NUMCOUNT) {
    printf("Nested Raising sig%d, %d of %d times\n", signum, 1 + count, NUMCOUNT);
    count2++;
    raise(signum);
    printf("Nested raise correctly blocked, trying sigprocmask\n");
    sigset_t old;
    // test if sigmask returned by sigprocmask is the one currently active
    sigprocmask(0, 0, &old);
    sigprocmask(SIG_SETMASK, &old, 0);
    printf("sigprocmask worked correctly, should trigger next iteration on signal return\n");
    count++;
  }
}

TEST_CASE("Signals: samask") {
  REQUIRE(signal(SIGN, sig_handler) == 0);

  // test if sigmask blocks during execution as expected
  last = false;
  loop = true;
  while (loop) {
    printf("Inside main loop, raising signal\n");
    raise(SIGN);
    REQUIRE_FALSE(loop);
  }
  last = true;
  loop = true;

  // test if sigmask returned by sigprocmask is the one set by the signal return
  sigset_t old;
  sigprocmask(0, 0, &old);
  sigprocmask(SIG_SETMASK, &old, 0);

  while (loop) {
    printf("Inside last loop, raising signal\n");
    raise(SIGN);
    REQUIRE_FALSE(loop);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_siginfo.32.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <cstdint>
#include <syscall.h>

extern "C" void IntInstruction();

__attribute__((naked)) static void CauseInt() {
  __asm volatile(R"(
  IntInstruction:
  int 1;
  ret; # For RIP modification
  )");
}

static uint32_t EXPECTED_RIP = reinterpret_cast<uint32_t>(&IntInstruction);
constexpr int EXPECTED_TRAPNO = 13;
constexpr int EXPECTED_ERR = 10;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGSEGV;

struct ActionHandler {
  void* handler;
  uint32_t sa_mask;
  uint32_t sa_flags;
  void* restorer;
};

struct rt_ActionHandler {
  void* handler;

  uint32_t sa_flags;
  void* restorer;
  uint64_t sa_mask;
};

TEST_CASE("sigaction: no siginfo") {
  // On 32-bit, non-realtime sigaction still receives a context on the stack.
  // This is an implementation detail of Linux and not enforced by POSIX.
  // This can be modified by the userspace application as it is part of the uapi.
  // This is how Linux allows an application to modify its context on signal return.
  // This unit test is testing this by incrementing EIP by 2.
  capturing_handler_skip = 2;
  ActionHandler act {};
  act.handler = (void*)CapturingHandler_non_realtime;
  act.sa_flags = 0;
  syscall(SYS_sigaction, SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler_32.has_value());
  CHECK(from_handler_32->mctx.ip == EXPECTED_RIP);
  CHECK(from_handler_32->mctx.trapno == EXPECTED_TRAPNO);
  CHECK(from_handler_32->mctx.err == EXPECTED_ERR);
  CHECK(from_handler_32->signal == EXPECTED_SIGNAL);
}

TEST_CASE("sigaction: siginfo - regparm") {
  // On 32-bit, siginfo sigaction supports receiving siginfo and context in regparm.
  // This can be modified by the userspace application as it is part of the uapi.
  // This unit test is testing this by incrementing EIP by 2.
  capturing_handler_skip = 2;
  ActionHandler act {};
  act.handler = (void*)CapturingHandler_realtime_regparm;
  act.sa_flags = SA_SIGINFO;
  syscall(SYS_sigaction, SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK((uint32_t)from_handler->mctx.gregs[REG_EIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}

TEST_CASE("sigaction: no siginfo - regparm") {
  // On 32-bit, siginfo sigaction supports receiving siginfo and context in regparm.
  // This can be modified by the userspace application as it is part of the uapi.
  // This unit test is testing this by incrementing EIP by 2.
  capturing_handler_skip = 2;
  ActionHandler act {};
  act.handler = (void*)CapturingHandler_non_realtime_regparm;
  act.sa_flags = 0;
  syscall(SYS_sigaction, SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler_regparm_32.has_value());
  CHECK(from_handler_regparm_32->signal == EXPECTED_SIGNAL);
  CHECK(from_handler_regparm_32->siginfo == nullptr);
  CHECK(from_handler_regparm_32->context == nullptr);
}

TEST_CASE("sigaction: siginfo - stack") {
  // On 32-bit, siginfo sigaction put the frame on the stack.
  // This can be modified by the userspace application as it is part of the uapi.
  // This unit test is testing this by incrementing EIP by 2.
  capturing_handler_skip = 2;
  ActionHandler act {};
  act.handler = (void*)CapturingHandler_realtime;
  act.sa_flags = SA_SIGINFO;
  syscall(SYS_sigaction, SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_EIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}

TEST_CASE("rt_sigaction: no siginfo") {
  // On 32-bit, classic rt_sigaction still receives a context on the stack.
  // This can be modified by the userspace application as it is part of the uapi.
  // This is how to modify the context on sigreturn.
  capturing_handler_skip = 2;
  rt_ActionHandler act {};
  act.handler = (void*)CapturingHandler_non_realtime;
  act.sa_flags = 0;
  syscall(SYS_rt_sigaction, SIGSEGV, &act, nullptr, 8);

  CauseInt();

  REQUIRE(from_handler_32.has_value());
  CHECK(from_handler_32->mctx.ip == EXPECTED_RIP);
  CHECK(from_handler_32->mctx.trapno == EXPECTED_TRAPNO);
  CHECK(from_handler_32->mctx.err == EXPECTED_ERR);
  CHECK(from_handler_32->signal == EXPECTED_SIGNAL);
}

TEST_CASE("rt_sigaction: siginfo - regparm") {
  // On 32-bit, a realtime sigaction supports arguments being received on the stack AND regparm.
  // This unit test ensures that the regparm implementation is working correctly.
  capturing_handler_skip = 2;
  rt_ActionHandler act {};
  act.handler = (void*)CapturingHandler_realtime_regparm;
  act.sa_flags = SA_SIGINFO;
  syscall(SYS_rt_sigaction, SIGSEGV, &act, nullptr, 8);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_EIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}

TEST_CASE("rt_sigaction: siginfo - stack") {
  // On 32-bit, a realtime sigaction supports arguments being received on the stack AND regparm.
  // This unit test ensures that the stack implementation is working correctly.
  capturing_handler_skip = 2;
  rt_ActionHandler act {};
  act.handler = (void*)CapturingHandler_realtime;
  act.sa_flags = SA_SIGINFO;
  syscall(SYS_rt_sigaction, SIGSEGV, &act, nullptr, 8);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_EIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}

TEST_CASE("sigaction: siginfo - glibc") {
  // Test to ensure that regular glibc sigaction works.
  capturing_handler_skip = 2;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler_realtime_glibc_helper;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_EIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_siginfo.64.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <cstdint>

extern "C" void IntInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled
__attribute__((naked, nocf_check)) static void CauseInt() {
  __asm volatile(R"(
  IntInstruction:
  int 1;
  ret; # For RIP modification
  )");
}

static uint64_t EXPECTED_RIP = reinterpret_cast<uint64_t>(&IntInstruction);
constexpr int EXPECTED_TRAPNO = 13;
constexpr int EXPECTED_ERR = 10;
constexpr int EXPECTED_SI_CODE = 128;
constexpr int EXPECTED_SIGNAL = SIGSEGV;

TEST_CASE("siginfo") {
  // On x86-64, the signal handler receives siginfo even if SA_SIGINFO isn't set.
  // This flag is effectively a no-op, not changing behaviour.
  capturing_handler_skip = 2;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = 0;
  sigaction(SIGSEGV, &act, nullptr);

  CauseInt();

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/sigtest_sigmask.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <ucontext.h>
#include <unistd.h>

volatile bool loop = false;
volatile bool inhandler = false;

#define SIGN SIGTSTP

void sig_handler(int signum, siginfo_t* info, void* context) {
  loop = false;
  printf("Inside handler function\n");
  if (inhandler) {
    printf("Signal reentering bug\n");
    exit(-1);
  }
  inhandler = true;
  raise(signum);

  auto uctx = (ucontext_t*)context;
  sigfillset(&uctx->uc_sigmask);
}

TEST_CASE("Signals: sigmask") {
  struct sigaction act = {0};

  act.sa_flags = SA_SIGINFO;
  act.sa_sigaction = &sig_handler;
  REQUIRE(sigaction(SIGN, &act, NULL) == 0);

  loop = true;
  while (loop) {
    printf("Inside main loop, raising signal\n");
    raise(SIGN);

    // Ensure the signal got indeed raised
    REQUIRE_FALSE(loop);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/synchronous-signal-block.cpp
================================================
// Triggering synchronous POSIX signals while they're masked triggers a
// process exit. In contrast, if an asynchronous signals is triggered, the
// corresponding signal handler will be invoked once the signal is unmasked.
//
// To test synchronous signals, the test forks and triggers the signal in the
// child process. For asynchronous signals, a signal handler that sets a global
// variable is used.

#include <sys/mman.h>
#include <sys/wait.h>

#include <cinttypes>
#include <cstdint>
#include <optional>
#include <setjmp.h>
#include <signal.h>
#include <stdio.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>

#include <catch2/catch_test_macros.hpp>
#include <catch2/generators/catch_generators.hpp>

static jmp_buf jmpbuf;

struct HandledSignal {
  int signal;
  uintptr_t addr;
};

struct HandledSignal_ERR {
  int signal;
  size_t ERR;
};

static std::optional<HandledSignal> handled_signal;
static std::optional<HandledSignal_ERR> handled_signal_err;

static void handler(int sig, siginfo_t* si, void* context) {
  printf("Got %d at address: 0x%lx\n", sig, (long)si->si_addr);
  handled_signal = {sig, reinterpret_cast<uintptr_t>(si->si_addr)};
  siglongjmp(jmpbuf, 1);
}

// Helper that masks all signals and unmasks them on destruction
struct GuardedSignalMask {
  sigset_t oldset {};

  GuardedSignalMask() {
    sigset_t set;
    sigfillset(&set);
    sigprocmask(SIG_SETMASK, &set, &oldset);
  }

  ~GuardedSignalMask() {
    sigprocmask(SIG_SETMASK, &oldset, nullptr);
  }
};

// Checks if the given function causes the process to exit.
// The function is executed in a process fork.
template<typename F>
std::optional<int> CheckIfExitsFromSignal(F&& f) {
  if (fork() == 0) {
    GuardedSignalMask guard;
    std::forward<F>(f)();
    exit(1);
  } else {
    int status = 0;
    wait(&status);
    return status;
  }
}

// Checks if the given function causes a signal handler to be invoked
template<typename F>
std::optional<HandledSignal> CheckIfSignalHandlerCalled(F&& f) {
  handled_signal = {};
  struct sigaction oldsa[4];

  if (!sigsetjmp(jmpbuf, 1)) {
    // Handle all signals by the test handler
    struct sigaction sa;
    sa.sa_flags = SA_SIGINFO;
    sigemptyset(&sa.sa_mask);
    sa.sa_sigaction = handler;
    sigaction(SIGSEGV, &sa, &oldsa[0]);
    sigaction(SIGBUS, &sa, &oldsa[1]);
    sigaction(SIGILL, &sa, &oldsa[2]);
    sigaction(SIGFPE, &sa, &oldsa[3]);

    // Mask signals and run given callback
    GuardedSignalMask guard;
    std::forward<F>(f)();
  }

  // Restore previous signal handlers
  sigaction(SIGSEGV, &oldsa[0], nullptr);
  sigaction(SIGBUS, &oldsa[1], nullptr);
  sigaction(SIGILL, &oldsa[2], nullptr);
  sigaction(SIGFPE, &oldsa[3], nullptr);

  return handled_signal;
}

static void handler_read(int sig, siginfo_t* si, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  auto mcontext = &_context->uc_mcontext;
  printf("Got %d at address: 0x%lx with 0x%zx\n", sig, (long)si->si_addr, (size_t)mcontext->gregs[REG_ERR]);
  handled_signal_err = {sig, (size_t)mcontext->gregs[REG_ERR]};
  siglongjmp(jmpbuf, 1);
}

template<typename F>
std::optional<HandledSignal> CheckIfSignalHandlerCalledWithRegERR(F&& f) {
  handled_signal = {};
  struct sigaction oldsa[4];

  if (!sigsetjmp(jmpbuf, 1)) {
    // Handle all signals by the test handler
    struct sigaction sa;
    sa.sa_flags = SA_SIGINFO;
    sigemptyset(&sa.sa_mask);
    sa.sa_sigaction = handler_read;
    sigaction(SIGSEGV, &sa, &oldsa[0]);
    sigaction(SIGBUS, &sa, &oldsa[1]);
    sigaction(SIGILL, &sa, &oldsa[2]);
    sigaction(SIGFPE, &sa, &oldsa[3]);

    // Mask signals and run given callback
    std::forward<F>(f)();
  }

  // Restore previous signal handlers
  sigaction(SIGSEGV, &oldsa[0], nullptr);
  sigaction(SIGBUS, &oldsa[1], nullptr);
  sigaction(SIGILL, &oldsa[2], nullptr);
  sigaction(SIGFPE, &oldsa[3], nullptr);

  return handled_signal;
}

TEST_CASE("Signals: Error Flag - Read") {
  // Check that the signal handler is delayed until unmasking.
  auto handled_signal = CheckIfSignalHandlerCalledWithRegERR([&]() {
    uint8_t* Code = (uint8_t*)mmap(nullptr, 4096, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    printf("Read: %d\n", Code[0]);
  });
  REQUIRE(handled_signal_err.has_value());
  CHECK(handled_signal_err->signal == SIGSEGV);
  constexpr size_t Expected = 0x4;
  CHECK(handled_signal_err->ERR == Expected); // USER
}

TEST_CASE("Signals: Error Flag - Write") {
  // Check that the signal handler is delayed until unmasking.
  auto handled_signal = CheckIfSignalHandlerCalledWithRegERR([&]() {
    uint8_t* Code = (uint8_t*)mmap(nullptr, 4096, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
    Code[0] = 1;
  });
  REQUIRE(handled_signal_err.has_value());
  CHECK(handled_signal_err->signal == SIGSEGV);
  constexpr size_t Expected = 0x6;
  CHECK(handled_signal_err->ERR == Expected); // USER + WRITE
}

// For ssegv, we fail to do default signal catching behaviour
TEST_CASE("Signals: ssegv") {
  auto status = CheckIfExitsFromSignal([]() { *(int*)0x32 = 0x64; });
  REQUIRE(status.has_value());
  CHECK(WIFSIGNALED(*status) == true);
  CHECK(WTERMSIG(*status) == SIGSEGV);
}

// For sill, we fail to do default signal catching behaviour
TEST_CASE("Signals: sill") {
  auto status = CheckIfExitsFromSignal([]() { asm volatile("ud2\n"); });
  REQUIRE(status.has_value());
  CHECK(WIFSIGNALED(*status) == true);
  CHECK(WTERMSIG(*status) == SIGILL);
}

// sbus and abus fail on arm because of sigbus handling
TEST_CASE("Signals: sbus") {
  auto map1 = mmap(nullptr, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, -1, 0);
  auto map2 = (char*)mremap(map1, 4096, 8192, MREMAP_MAYMOVE);

  auto status = CheckIfExitsFromSignal([&]() { map2[4096] = 2; });
  REQUIRE(status.has_value());
  CHECK(WIFSIGNALED(*status) == true);
  CHECK(WTERMSIG(*status) == SIGBUS);
}

// sfpe and afpe fail on arm because we don't raise FPE
TEST_CASE("Signals: sfpe") {
  auto status = CheckIfExitsFromSignal([&]() {
    volatile int a = 10;
    volatile int b = 0;
    volatile int c = a / b;
    printf("result: %d\n", c);
  });
  REQUIRE(status.has_value());
  CHECK(WIFSIGNALED(*status) == true);
  CHECK(WTERMSIG(*status) == SIGFPE);
}

// These fail to queue the signals
TEST_CASE("Signals: asynchronous") {
  int tested_signal = GENERATE(SIGSEGV, SIGILL, SIGBUS, SIGFPE);

  // Check that the signal handler is delayed until unmasking.
  bool handled_asynchronously = false;
  auto handled_signal = CheckIfSignalHandlerCalled([&]() {
    GuardedSignalMask guard {};
    raise(tested_signal);

    // Verify the rest of this function is still executed
    handled_asynchronously = true;

    // Destructor of GuardedSignalMask will unmask signals now,
    // after which the signal handler should run
  });
  REQUIRE(handled_signal.has_value());
  CHECK(handled_signal->signal == tested_signal);
  CHECK(handled_asynchronously);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/timer-sigev-thread.cpp
================================================
// Simple test of timer_create + SIGEV_THREAD, glibc implements it via SIG32

#include <catch2/catch_test_macros.hpp>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include <cassert>
#include <optional>
#include <signal.h>
#include <time.h>

int test;

std::optional<bool> sigval_ack;

void timer_handler(union sigval sv) {
  sigval_ack = sv.sival_ptr == &test;
  printf("timer_handler called, ok = %d\n", *sigval_ack);
}

// These sometimes crash FEX with SIGSEGV
TEST_CASE("timer_create and SIGEV_THREAD", "[!mayfail]") {
  timer_t timer;
  sigevent sige;
  itimerspec spec;

  memset(&sige, 0, sizeof(sige));

  sige.sigev_notify = SIGEV_THREAD;
  sige.sigev_notify_function = &timer_handler;
  sige.sigev_value.sival_ptr = &test;

  timer_create(CLOCK_REALTIME, &sige, &timer);

  memset(&spec, 0, sizeof(spec));

  spec.it_value.tv_sec = 0;
  spec.it_value.tv_nsec = 1;

  timer_settime(timer, 0, &spec, NULL);

  while (!sigval_ack) {
    usleep(10);
  }

  REQUIRE(sigval_ack.has_value());
  CHECK(*sigval_ack == true);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/trap_flag.cpp
================================================
#include "invalid_util.h"

#include <catch2/catch_test_macros.hpp>

#include <atomic>
#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

extern "C" void RetInstruction();

#pragma GCC diagnostic ignored "-Wattributes" // Suppress warning in case control-flow checks aren't enabled

#if __SIZEOF_POINTER__ == 4
__attribute__((naked, nocf_check)) static void TestTF() {
  __asm volatile(R"(
  pushfd;
  or dword ptr [esp], 0x100;
  popfd;
  nop;
  nop;
  nop;
  pushfd;
  and dword ptr [esp], ~0x100;
  popfd;
  RetInstruction:
  ret;
  )");
}
#else
__attribute__((naked, nocf_check)) static void TestTF() {
  __asm volatile(R"(
  pushfq;
  or qword ptr [rsp], 0x100;
  popfq;
  nop;
  nop;
  nop;
  pushfq;
  and qword ptr [rsp], ~0x100;
  popfq;
  RetInstruction:
  ret;
  )");
}
#endif

unsigned long EXPECTED_RIP = reinterpret_cast<unsigned long>(&RetInstruction);
constexpr int EXPECTED_TRAPNO = 1;
constexpr int EXPECTED_ERR = 0;
constexpr int EXPECTED_SI_CODE = 2;
constexpr int EXPECTED_SIGNAL = SIGTRAP;
constexpr int EXPECTED_SIGNAL_COUNT = 6;

TEST_CASE("Signals: Trap Flag") {
  capturing_handler_skip = 0;
  struct sigaction act {};
  act.sa_sigaction = CapturingHandler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGTRAP, &act, nullptr);

  TestTF();

#ifndef REG_RIP
#define REG_RIP REG_EIP
#endif

  REQUIRE(from_handler.has_value());
  CHECK(from_handler->mctx.gregs[REG_RIP] == EXPECTED_RIP);
  CHECK(from_handler->mctx.gregs[REG_TRAPNO] == EXPECTED_TRAPNO);
  CHECK(from_handler->mctx.gregs[REG_ERR] == EXPECTED_ERR);
  CHECK(from_handler->si_code == EXPECTED_SI_CODE);
  CHECK(from_handler->signal == EXPECTED_SIGNAL);
  CHECK(capturing_handler_calls == EXPECTED_SIGNAL_COUNT);
}


================================================
FILE: unittests/FEXLinuxTests/tests/signal/x87_state.64.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <signal.h>
#include <sys/mman.h>
#include <stdint.h>
#include <stdio.h>
#include <cstdlib>

struct DataStruct {
  uint64_t dual[2];
};
constexpr static DataStruct data[8] = {
  {0x1112131415161718ULL, 0x191A1B1C1D1E1F10ULL}, {0x2122232425262728ULL, 0x292A2B2C2D2E2F20ULL},
  {0x3132333435363738ULL, 0x393A3B3C3D3E3F30ULL}, {0x4142434445464748ULL, 0x494A4B4C4D4E4F40ULL},
  {0x5152535455565758ULL, 0x595A5B5C5D5E5F50ULL}, {0x6162636465666768ULL, 0x696A6B6C6D6E6F60ULL},
  {0x7172737475767778ULL, 0x797A7B7C7D7E7F70ULL}, {0x8182838485868788ULL, 0x898A8B8C8D8E8F80ULL},
};

extern "C" void RetInstruction();
__attribute__((naked, nocf_check)) static void TestFromSignal(const DataStruct* data) {
  __asm volatile(R"(

  finit;
  // Load 8 zeroes to be safe.
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;

  // Empty them
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);

  // Now load **7** values. Keeping the last one zero and our stack top not wrapped around.
  fld tbyte ptr [rdi + (0 * 16)];
  fld tbyte ptr [rdi + (1 * 16)];
  fld tbyte ptr [rdi + (2 * 16)];
  fld tbyte ptr [rdi + (3 * 16)];
  fld tbyte ptr [rdi + (4 * 16)];
  fld tbyte ptr [rdi + (5 * 16)];
  fld tbyte ptr [rdi + (6 * 16)];

  hlt;
  RetInstruction:
  ret;
  )" ::
                   : "memory", "cc");
}

extern "C" void RetSetInstruction();
__attribute__((naked, nocf_check)) static void TestSetInSignal(DataStruct* data) {
  __asm volatile(R"(
  finit;
  // Load 8 zeroes to be safe.
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;
  fldz;

  // Empty them
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);
  ffreep st(0);

  hlt;
  RetSetInstruction:

  // Store values until the status word says nothing is left.
  mov eax, 0;

2:

  fstsw ax;
  and eax, (7 << 11);
  jz 3f;
  fstp tbyte ptr [rdi];
  add rdi, 16;

  jmp 2b

3:

  // Now load **7** values. Keeping the last one zero and our stack top not wrapped around.
  fld tbyte ptr [rdi + (0 * 16)];
  fld tbyte ptr [rdi + (1 * 16)];
  fld tbyte ptr [rdi + (2 * 16)];
  fld tbyte ptr [rdi + (3 * 16)];
  fld tbyte ptr [rdi + (4 * 16)];
  fld tbyte ptr [rdi + (5 * 16)];
  fld tbyte ptr [rdi + (6 * 16)];

  ret;
  )" ::
                   : "memory", "cc");
}

static DataStruct signal_data[8];

static void Correct_Handler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  auto mcontext = &_context->uc_mcontext;

  for (size_t i = 0; i < 8; ++i) {
    memcpy(&signal_data[i], &mcontext->fpregs->_st[i], 10);
  }
#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif

  mcontext->gregs[FEX_IP_REG] = reinterpret_cast<greg_t>(RetInstruction);
}

static void Set_Signal_Handler(int signal, siginfo_t* siginfo, void* context) {
  ucontext_t* _context = (ucontext_t*)context;
  auto mcontext = &_context->uc_mcontext;

  // Set the first seven values
  for (size_t i = 0; i < 8; ++i) {
    memcpy(&mcontext->fpregs->_st[i], &data[i], sizeof(mcontext->fpregs->_st[i]));
  }

  // Adjust the x87 TOP to 1
  mcontext->fpregs->swd = (mcontext->fpregs->swd & ~(3 << 11)) | (1 << 11);
  // Make sure to set the tag words as valid.
  mcontext->fpregs->ftw = 0xFFFE;

#ifdef REG_RIP
#define FEX_IP_REG REG_RIP
#else
#define FEX_IP_REG REG_EIP
#endif

  mcontext->gregs[FEX_IP_REG] = reinterpret_cast<greg_t>(RetSetInstruction);
}

TEST_CASE("Signals: X87 State in handler") {
  struct sigaction act {};
  act.sa_sigaction = Correct_Handler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);
  TestFromSignal(data);

  constexpr static DataStruct test_data[8] = {
    {0x7172737475767778, 0x7f70}, {0x6162636465666768, 0x6f60}, {0x5152535455565758, 0x5f50}, {0x4142434445464748, 0x4f40},
    {0x3132333435363738, 0x3f30}, {0x2122232425262728, 0x2f20}, {0x1112131415161718, 0x1f10}, {0x0, 0x0}};

  for (size_t i = 0; i < 8; ++i) {
    CHECK(memcmp(&test_data[i], &signal_data[i], sizeof(DataStruct)) == 0);
  }
}

TEST_CASE("Signals: X87 State set state in handler") {
  struct sigaction act {};
  act.sa_sigaction = Set_Signal_Handler;
  act.sa_flags = SA_SIGINFO;
  sigaction(SIGSEGV, &act, nullptr);

  DataStruct output_data[8] {};
  TestSetInSignal(output_data);

  constexpr static DataStruct test_data[8] = {
    {0x1112131415161718, 0x1f10}, {0x2122232425262728, 0x2f20}, {0x3132333435363738, 0x3f30}, {0x4142434445464748, 0x4f40},
    {0x5152535455565758, 0x5f50}, {0x6162636465666768, 0x6f60}, {0x7172737475767778, 0x7f70}, {0x0, 0x0}};

  for (size_t i = 0; i < 8; ++i) {
    CHECK(memcmp(&test_data[i], &output_data[i], sizeof(DataStruct)) == 0);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-1-dynamic.cpp
================================================
#define EXECSTACK

/*
We cannot test the omagic or the static version of this, due to cross compiling issues
//#define OMAGIC // when the g++ driver is used to link, -Wl,--omagic breaks -static, so this can't be tested
*/
/*
  tests for smc changes in .text, stack and bss
*/

char data_sym[16384];
char text_sym[16384] __attribute__((section(".text")));

#include "smc-common.h"

#include <catch2/catch_test_macros.hpp>

TEST_CASE("SMC: Changes in stack") {
  // stack, depends on -z execstack or mprotect
  char stack[16384];
  auto code = (char*)(((uintptr_t)stack + 4095) & ~4095);

#if !defined(EXECSTACK)
  mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
#endif

  CHECK(test(code, "stack") == 0);
}

TEST_CASE("SMC: Changes in data section") {
  // data_sym, must use mprotect
  auto code = (char*)(((uintptr_t)data_sym + 4095) & ~4095);
  mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
  CHECK(test(code, "data_sym") == 0);
}

TEST_CASE("SMC: Changes in text section") {
  // text_sym, depends on -Wl,omagic or mprotect
  auto code = (char*)(((uintptr_t)text_sym + 4095) & ~4095);

#if !defined(OMAGIC)
  mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);
#endif

  CHECK(test(code, "text_sym") == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-2.cpp
================================================
/*
  tests for smc changes memory mapped via mmap, mremap, shmat without mirroring
*/

#include "smc-common.h"

#include <catch2/catch_test_macros.hpp>

TEST_CASE("SMC: mmap") {
  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, 0, 0);
  CHECK(test(code, "mmap") == 0);
}

TEST_CASE("SMC: mremap") {
  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANON, 0, 0);
  auto code2 = (char*)mremap(code, 0, 4096, MREMAP_MAYMOVE);
  CHECK(test(code2, "mremap") == 0);
}

TEST_CASE("SMC: shmat") {
  auto shm = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code = (char*)shmat(shm, nullptr, SHM_EXEC);
  CHECK(test(code, "shmat") == 0);
}

TEST_CASE("SMC: shmat_mremap") {
  auto shm = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code = (char*)shmat(shm, nullptr, SHM_EXEC);
  auto code2 = (char*)mremap(code, 0, 4096, MREMAP_MAYMOVE);
  CHECK(test(code2, "shmat_mremap") == 0);
}

TEST_CASE("SMC: mmap_shmdt") {
  auto shmid = shmget(IPC_PRIVATE, 4096 * 3, IPC_CREAT | 0777);
  auto ptrshm = (char*)shmat(shmid, 0, 0);
  shmctl(shmid, IPC_RMID, NULL);
  auto ptrmmap = (char*)mmap(ptrshm + 4096, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0);
  shmdt(ptrshm);
  CHECK(test(ptrmmap, "mmap_shmdt") == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-common.h
================================================
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cassert>
#include <cstring>

#include <unistd.h>
#include <fcntl.h>

#include <sys/mman.h>
#include <sys/shm.h>
#include <sys/wait.h>


int test(char* code, const char* name) {
  // mov eax, imm32
  code[0] = 0xB8;
  code[1] = 0xAA;
  code[2] = 0xBB;
  code[3] = 0xCC;
  code[4] = 0xDD;

  // ret
  code[5] = 0xC3;

  auto fn = (int (*)())code;
  auto e1 = fn();

  // patch imm
  code[3] = 0xFE;
  auto e2 = fn();

  mprotect(code, 4096, PROT_READ | PROT_EXEC);

  mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);

  // patch imm
  code[3] = 0xF3;

  mprotect(code, 4096, PROT_READ | PROT_EXEC);

  auto e3 = fn();

  mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);

  // patch imm
  code[3] = 0xF1;

  auto e4 = fn();

  int failure_set = 0;

  failure_set |= (e1 != 0xDDCCBBAA) << 0;
  printf("%s-1: %X, %s\n", name, e1, e1 != 0xDDCCBBAA ? "FAIL" : "PASS");
  failure_set |= (e2 != 0xDDFEBBAA) << 1;
  printf("%s-2: %X, %s\n", name, e2, e2 != 0xDDFEBBAA ? "FAIL" : "PASS");
  failure_set |= (e3 != 0xDDF3BBAA) << 2;
  printf("%s-3: %X, %s\n", name, e3, e3 != 0xDDF3BBAA ? "FAIL" : "PASS");
  failure_set |= (e4 != 0xDDF1BBAA) << 3;
  printf("%s-4: %X, %s\n", name, e4, e4 != 0xDDF1BBAA ? "FAIL" : "PASS");

  return failure_set;
}

int test_shared(char* code, char* codeexec, const char* name) {
  assert(code != codeexec);
  code[0] = 0xB8;
  code[1] = 0xAA;
  code[2] = 0xBB;
  code[3] = 0xCC;
  code[4] = 0xDD;

  code[5] = 0xC3;

  auto fn = (int (*)())codeexec;
  auto e1 = fn();
  code[3] = 0xFE;
  auto e2 = fn();

  int failure_set = 0;

  failure_set |= (e1 != 0xDDCCBBAA) << 0;
  printf("%s-1: %X, %s\n", name, e1, e1 != 0xDDCCBBAA ? "FAIL" : "PASS");
  failure_set |= (e2 != 0xDDFEBBAA) << 1;
  printf("%s-2: %X, %s\n", name, e2, e2 != 0xDDFEBBAA ? "FAIL" : "PASS");

  return failure_set;
}

int test_forked(char* code, char* codeexec, const char* name) {
  code[0] = 0xB8;
  code[1] = 0xAA;
  code[2] = 0xBB;
  code[3] = 0xCC;
  code[4] = 0xDD;

  code[5] = 0xC3;

  auto fn = (int (*)())codeexec;
  auto e1 = fn();
  auto pid = fork();
  if (pid == 0) {
    code[3] = 0xFE;
    exit(0);
  } else {
    int status;
    wait(&status);
    return WEXITSTATUS(status);
  }
  auto e2 = fn();

  printf("%s-1: %X, %s\n", name, e1, e1 != 0xDDCCBBAA ? "FAIL" : "PASS");
  printf("%s-2: %X, %s\n", name, e2, e2 != 0xDDFEBBAA ? "FAIL" : "PASS");
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-exec-stack.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <signal.h>
#include <ucontext.h>
#include <sys/mman.h>

bool got_signal = false;

static void sigsegv_handler(int signal, siginfo_t* siginfo, void* context) {
  REQUIRE(siginfo->si_code == SEGV_ACCERR);
  got_signal = true;
  size_t page_size = sysconf(_SC_PAGESIZE);
  void* fault_page = (void*)((uintptr_t)(siginfo->si_addr) & ~(page_size - 1));
  REQUIRE(mprotect(fault_page, page_size, PROT_READ | PROT_WRITE | PROT_EXEC) == 0);
}

void register_signal_handler() {
  struct sigaction act {};
  act.sa_sigaction = sigsegv_handler;
  act.sa_flags = SA_SIGINFO;
  REQUIRE(sigaction(SIGSEGV, &act, nullptr) == 0);
}

TEST_CASE("smc-exec-stack: PT_GNU_STACK == RWX") {
  register_signal_handler();

  // Try executing from stack
  char stack[16384];
  auto stack_code = (char*)(((uintptr_t)stack + 4095) & ~4095);
  *stack_code = 0xC3; // ret
  ((void (*)())(stack_code))();
  CHECK(got_signal == false);
  got_signal = false;
}

TEST_CASE("smc-exec-stack: mmap other memory") {
  register_signal_handler();

  // Executing from other memory should fail
  size_t page_size = sysconf(_SC_PAGESIZE);
  uint8_t* mem_code = static_cast<uint8_t*>(mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  REQUIRE(mem_code != nullptr);
  *mem_code = 0xC3; // ret
  ((void (*)())(mem_code))();

  CHECK(got_signal == true);
  got_signal = false;

  REQUIRE(munmap(mem_code, page_size) == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-missing-gnustack.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <signal.h>
#include <ucontext.h>
#include <sys/mman.h>

bool got_signal = false;

static void sigsegv_handler(int signal, siginfo_t* siginfo, void* context) {
  REQUIRE(siginfo->si_code == SEGV_ACCERR);
  got_signal = true;
  size_t page_size = sysconf(_SC_PAGESIZE);
  void* fault_page = (void*)((uintptr_t)(siginfo->si_addr) & ~(page_size - 1));
  REQUIRE(mprotect(fault_page, page_size, PROT_READ | PROT_WRITE | PROT_EXEC) == 0);
}

void register_signal_handler() {
  struct sigaction act {};
  act.sa_sigaction = sigsegv_handler;
  act.sa_flags = SA_SIGINFO;
  REQUIRE(sigaction(SIGSEGV, &act, nullptr) == 0);
}

TEST_CASE("smc-missing-gnustack: PT_GNU_STACK missing") {
  register_signal_handler();

  // Try executing from stack
  char stack[16384];
  auto stack_code = (char*)(((uintptr_t)stack + 4095) & ~4095);
  *stack_code = 0xC3; // ret
  ((void (*)())(stack_code))();

#ifdef __i386__
  CHECK(got_signal == false);
#else
  CHECK(got_signal == true);
#endif
  got_signal = false;
}

TEST_CASE("smc-missing-gnustack: mmap other memory") {
  register_signal_handler();
  // Executing from other memory should fail on 64 bit but work on 32 bit
  size_t page_size = sysconf(_SC_PAGESIZE);
  uint8_t* mem_code = static_cast<uint8_t*>(mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  REQUIRE(mem_code != nullptr);
  *mem_code = 0xC3; // ret
  ((void (*)())(mem_code))();

#ifdef __i386__
  CHECK(got_signal == false);
#else
  CHECK(got_signal == true);
#endif
  got_signal = false;

  REQUIRE(munmap(mem_code, page_size) == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-mt-1.cpp
================================================
/*
  tests concurrent invalidation of different code from different threads

  creates 10 threads
  each thread does an smc test 10 times

*/
#include <cstdio>
#include <pthread.h>
#include <sys/mman.h>

#include <atomic>

#include <catch2/catch_test_macros.hpp>

std::atomic<int> result;
std::atomic<bool> go;

void* thread(void*) {

  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, 0, 0);

  for (int k = 0; k < 10; k++) {
    code[0] = 0xB8;
    code[1] = 0xAA;
    code[2] = 0xBB;
    code[3] = 0xCC;
    code[4] = 0xDD;

    code[5] = 0xC3;

    while (!go)
      ;

    auto fn = (int (*)())code;
    auto e1 = fn();
    code[3] = 0xFE;
    auto e2 = fn();

    mprotect(code, 4096, PROT_READ | PROT_EXEC);

    mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);

    code[3] = 0xF3;

    mprotect(code, 4096, PROT_READ | PROT_EXEC);

    auto e3 = fn();

    mprotect(code, 4096, PROT_READ | PROT_WRITE | PROT_EXEC);

    code[3] = 0xF1;

    auto e4 = fn();

    result |= e1 != 0xDDCCBBAA;
    printf("Exec1: %X, %s\n", e1, e1 != 0xDDCCBBAA ? "FAIL" : "PASS");
    result |= e2 != 0xDDFEBBAA;
    printf("Exec2: %X, %s\n", e2, e2 != 0xDDFEBBAA ? "FAIL" : "PASS");
    result |= e3 != 0xDDF3BBAA;
    printf("Exec3: %X, %s\n", e3, e3 != 0xDDF3BBAA ? "FAIL" : "PASS");
    result |= e4 != 0xDDF1BBAA;
    printf("Exec4: %X, %s\n", e4, e4 != 0xDDF1BBAA ? "FAIL" : "PASS");
  }

  return 0;
}

TEST_CASE("SMC: Concurrent invalidation of different code from different threads") {
  pthread_t tid[10];
  for (int i = 0; i < 10; i++) {
    pthread_create(&tid[i], 0, &thread, 0);
  }

  go = true;

  for (int i = 0; i < 10; i++) {
    void* rv;
    pthread_join(tid[i], &rv);
  }

  CHECK(result == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-mt-2.cpp
================================================
/*
  tests one thread modifying another thread's code

  main thread
  - allocates code buffer
  - starts secondary thread
  - waits to be signaled from secondary thread
  - modifies the code
  - signals secondary thread to claim the code is modified
  - waits for secondary thread to exit, while making sure it doesn't run the old code after modification
  - exits


  secondary thread
  - generates some code and runs it once
  - signals main thread to modify the code
  - waits to be signaled that code was modified
  - calls the to be code and checks if the result is the modified or non modified one
  - exits

*/

#include <cstdio>
#include <cstdlib>
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>

#include <atomic>

#include <catch2/catch_test_macros.hpp>

std::atomic<bool> ready_for_modification;
std::atomic<bool> waiting_for_modification;
std::atomic<bool> thread_unblocked;
std::atomic<int> thread_counter;

char* code;

void* thread(void*) {
  printf("Generating code on thread\n");
  code[0] = 0xB8;
  code[1] = 0xAA;
  code[2] = 0xBB;
  code[3] = 0xCC;
  code[4] = 0xDD;

  code[5] = 0xC3;

  auto fn = (int (*)())code;

  fn();

  ready_for_modification = true;
  printf("Waiting for code to be modified\n");

  while (!waiting_for_modification)
    ;

  while (fn() == 0xDDCCBBAA) {
    thread_counter++;
  }

  thread_unblocked = true;
  printf("Thread exiting\n");

  return 0;
}

void RunIteration() {
  printf("Starting Iteration\n");
  code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, 0, 0);
  ready_for_modification = false;
  waiting_for_modification = false;
  thread_unblocked = false;
  thread_counter = 0;

  pthread_t tid;
  pthread_create(&tid, 0, &thread, 0);

  while (!ready_for_modification)
    ;

  printf("Modifying code from another thread\n");

  code[3] = 0xFE;

  waiting_for_modification = true;

  auto counter = thread_counter.load();

  printf("Waiting for thread to get unblocked\n");

  bool once = false;
  while (!thread_unblocked) {
    if (thread_counter != counter) {
      // depending on the patch timing, this might happen once
      if (once) {
        printf("Thread should have been patched to not modify counter here\n");
        exit(1);
      }
      printf("Thread overshoot once, this is non fatal\n");
      once = true;
      counter = thread_counter.load();
    }
  }

  printf("Iteration should finish now\n");
  void* rv;
  pthread_join(tid, &rv);
  printf("Iteration done\n");
  munmap(code, 4096);
}

TEST_CASE("SMC: One thread modifying another thread's code") {
  for (int i = 0; i < 100; i++) {
    RunIteration();
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-shared-1.cpp
================================================
/*
    tests shared / mirrored mappings
*/

#include "smc-common.h"

#include <catch2/catch_test_macros.hpp>

TEST_CASE("SMC: mmap_mremap") {
  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANON, 0, 0);

  auto code2 = (char*)mremap(code, 0, 4096, MREMAP_MAYMOVE);

  CHECK(test_shared(code, code2, "mmap_mremap") == 0);
}

TEST_CASE("SMC: mmap_mremap_mid") {
  auto code = (char*)mmap(0, 8192, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANON, 0, 0);

  auto code2 = (char*)mremap(code + 4096, 0, 4096, MREMAP_MAYMOVE);

  CHECK(test_shared(code + 4096, code2, "mmap_mremap_mid") == 0);
}

TEST_CASE("SMC: shmat") {
  auto shm = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code3 = (char*)shmat(shm, nullptr, 0);
  auto code4 = (char*)shmat(shm, nullptr, SHM_EXEC);
  CHECK(test_shared(code3, code4, "shmat") == 0);
}

TEST_CASE("SMC: shmat_mremap") {
  auto shm2 = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code5 = (char*)shmat(shm2, nullptr, SHM_EXEC);
  auto code6 = (char*)mremap(code5, 0, 4096, MREMAP_MAYMOVE);

  CHECK(test_shared(code5, code6, "shmat_mremap") == 0);
}

TEST_CASE("SMC: shmat_mremap_mid") {
  auto shm2 = shmget(IPC_PRIVATE, 8192, IPC_CREAT | 0777);
  auto code5 = (char*)shmat(shm2, nullptr, SHM_EXEC);
  auto code6 = (char*)mremap(code5 + 4096, 0, 4096, MREMAP_MAYMOVE);

  CHECK(test_shared(code5 + 4096, code6, "shmat_mremap_mid") == 0);
}

TEST_CASE("SMC: mmap_mmap") {
  char file[] = "smc-tests.XXXXXXXX";
  int fd = mkstemp(file);
  unlink(file);
  REQUIRE(ftruncate(fd, 4096) == 0);

  auto code7 = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  auto code8 = (char*)mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
  CHECK(test_shared(code7, code8, "mmap_mmap") == 0);
}

TEST_CASE("SMC: mmap_mmap_fd_fd2") {
  char file[] = "smc-tests.XXXXXXXX";
  int fd = mkstemp(file);
  int fd2 = open(file, O_RDONLY);
  unlink(file);
  REQUIRE(ftruncate(fd, 4096) == 0);

  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  auto code2 = (char*)mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_SHARED, fd2, 0);
  CHECK(test_shared(code, code2, "mmap_mmap_fd_fd2") == 0);
}

TEST_CASE("SMC: shm_open_mmap_mmap") {
  char file[] = "smc-tests.XXXXXXXX";
  mktemp(file);
  int fd = shm_open(file, O_RDWR | O_CREAT, 0700);
  shm_unlink(file);
  REQUIRE(ftruncate(fd, 4096) == 0);

  auto code7 = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  auto code8 = (char*)mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
  CHECK(test_shared(code7, code8, "shm_open_mmap_mmap") == 0);
}

TEST_CASE("SMC: shm_open_mmap_mmap_fd_fd2") {
  char file[] = "smc-tests.XXXXXXXX";
  mktemp(file);
  int fd = shm_open(file, O_RDWR | O_CREAT, 0700);
  int fd2 = shm_open(file, O_RDONLY, 0700);
  shm_unlink(file);
  REQUIRE(ftruncate(fd, 4096) == 0);

  auto code7 = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  auto code8 = (char*)mmap(0, 4096, PROT_READ | PROT_EXEC, MAP_SHARED, fd2, 0);
  CHECK(test_shared(code7, code8, "shm_open_mmap_mmap_fd_fd2") == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-shared-2.cpp
================================================

/*
    tests shared / mirrored mappings
*/

#include "smc-common.h"

#include <catch2/catch_test_macros.hpp>

TEST_CASE("SMC: mmap_fork") {
  auto code = (char*)mmap(0, 4096, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_ANON, 0, 0);
  CHECK(test_forked(code, code, "mmap_fork") == 0);
}

TEST_CASE("SMC: shmat_fork") {
  auto shm = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code = (char*)shmat(shm, nullptr, SHM_EXEC);
  CHECK(test_forked(code, code, "shmat_fork") == 0);
}

TEST_CASE("SMC: fork_shmat_same_shmid") {
  auto shm = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0777);
  auto code3 = (char*)shmat(shm, nullptr, 0);
  // NOTE: Forking in a test will fork the entire Catch2 test runtime.
  //       That's not great, but it doesn't seem to cause any issues other
  //       than printing test results twice
  if (fork() == 0) {
    auto code4 = (char*)shmat(shm, nullptr, SHM_EXEC);
    CHECK(test_shared(code3, code4, "fork_shmat_same_shmid") == 0);
  } else {
    int status;
    wait(&status);
    CHECK(WEXITSTATUS(status) == 0);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/smc/smc-unexec-stack.cpp
================================================
#include <catch2/catch_test_macros.hpp>
#include <signal.h>
#include <ucontext.h>
#include <sys/mman.h>

bool got_signal = false;

static void sigsegv_handler(int signal, siginfo_t* siginfo, void* context) {
  REQUIRE(siginfo->si_code == SEGV_ACCERR);
  got_signal = true;
  size_t page_size = sysconf(_SC_PAGESIZE);
  void* fault_page = (void*)((uintptr_t)(siginfo->si_addr) & ~(page_size - 1));
  REQUIRE(mprotect(fault_page, page_size, PROT_READ | PROT_WRITE | PROT_EXEC) == 0);
}

void register_signal_handler() {
  struct sigaction act {};
  act.sa_sigaction = sigsegv_handler;
  act.sa_flags = SA_SIGINFO;
  REQUIRE(sigaction(SIGSEGV, &act, nullptr) == 0);
}

TEST_CASE("smc-unexec-stack: PT_GNU_STACK == RW") {
  register_signal_handler();

  // Try executing from stack
  char stack[16384];
  auto stack_code = (char*)(((uintptr_t)stack + 4095) & ~4095);
  *stack_code = 0xC3; // ret instruction
  ((void (*)())(stack_code))();

  CHECK(got_signal == true);
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/execveat_memfd.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <cstdio>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>

static std::vector<char> LoadFile(const char* Path) {
  int fd = open(Path, O_RDONLY);
  REQUIRE(fd != -1);

  struct stat st {};
  REQUIRE(fstat(fd, &st) != -1);

  std::vector<char> Result {};
  Result.resize(st.st_size);

  size_t DidRead {};
  do {
    auto Read = read(fd, Result.data() + DidRead, Result.size() - DidRead);

    if (Read == -1) {
      if (errno == EINTR || errno == EAGAIN) {
        continue;
      }
      REQUIRE(errno != 0);
    }

    DidRead += Read;
  } while (DidRead != st.st_size);

  return Result;
}

TEST_CASE("execveat - memfd - MFD_CLOEXEC") {
  auto MapsFile = LoadFile("/usr/bin/true");
  REQUIRE(MapsFile.size() != 0);

  int fd = memfd_create("Anonymous", MFD_CLOEXEC | MFD_ALLOW_SEALING);
  REQUIRE(fd != -1);

  size_t Written {};
  do {
    auto Wrote = write(fd, MapsFile.data() + Written, MapsFile.size() - Written);
    if (Wrote == -1) {
      if (errno == EINTR || errno == EAGAIN) {
        continue;
      }
      REQUIRE(errno != 0);
    }
    Written += Wrote;
  } while (Written != MapsFile.size());

  const char* argv[] = {"tmp", nullptr};
  auto Res = ::syscall(SYS_execveat, fd, "", argv, nullptr, AT_EMPTY_PATH);

  // Will only get here if execveat fails.
  close(fd);
  REQUIRE(Res == 0);
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/futimesat.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <fcntl.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/time.h>

struct compat_timeval {
  long tv_sec;
  long tv_usec;
};

uint64_t compat_futimesat(int dirfd, const char* pathname, const struct compat_timeval times[2]) {
  return ::syscall(SYS_futimesat, dirfd, pathname, times);
}

TEST_CASE("futimesat - invalid - minimum") {
  compat_timeval tvs[2] {};
  tvs[0].tv_sec = 0;
  tvs[0].tv_usec = -1;

  tvs[1].tv_sec = 0;
  tvs[1].tv_usec = -1;

  char file[] = "futimesat-tests.XXXXXXXX";
  int fd = mkstemp(file);
  REQUIRE(fd != -1);

  REQUIRE(compat_futimesat(fd, nullptr, tvs) == -1);
  CHECK(errno == EINVAL);
  REQUIRE(unlinkat(AT_FDCWD, file, 0) != -1);
  REQUIRE(close(fd) != -1);
}

TEST_CASE("futimesat - invalid - maximum") {
  compat_timeval tvs[2] {};
  tvs[0].tv_sec = 0;
  tvs[0].tv_usec = 1000000;

  tvs[1].tv_sec = 0;
  tvs[1].tv_usec = 1000000;

  char file[] = "futimesat-tests.XXXXXXXX";
  int fd = mkstemp(file);
  REQUIRE(fd != -1);

  REQUIRE(compat_futimesat(fd, nullptr, tvs) == -1);
  CHECK(errno == EINVAL);
  REQUIRE(unlinkat(AT_FDCWD, file, 0) != -1);
  REQUIRE(close(fd) != -1);
}

TEST_CASE("futimesat - valid - null") {
  char file[] = "futimesat-tests.XXXXXXXX";
  int fd = mkstemp(file);
  REQUIRE(fd != -1);

  timespec time {};
  REQUIRE(clock_gettime(CLOCK_REALTIME, &time) == 0);

  // Remove the nanoseconds to ensure consistent time setting.
  time.tv_nsec = 0;

  // A small sleep because CPU time and filesystem time might be slightly off.
  sleep(1);

  // Sets the time to "Now".
  REQUIRE(compat_futimesat(fd, nullptr, nullptr) == 0);
  REQUIRE(unlinkat(AT_FDCWD, file, 0) != -1);

  // Get the stat information of the file.
  struct stat sb {};
  REQUIRE(fstat(fd, &sb) == 0);
  CHECK(sb.st_atim.tv_sec >= time.tv_sec);
  CHECK(sb.st_mtim.tv_sec >= time.tv_sec);

  REQUIRE(close(fd) != -1);
}

TEST_CASE("futimesat - valid - future") {
  char file[] = "futimesat-tests.XXXXXXXX";
  int fd = mkstemp(file);
  REQUIRE(fd != -1);

  timespec time {};
  REQUIRE(clock_gettime(CLOCK_REALTIME, &time) == 0);

  compat_timeval tvs[2] {};
  tvs[0].tv_sec = time.tv_sec + 60;
  tvs[0].tv_usec = 0;

  tvs[1].tv_sec = time.tv_sec + 60;
  tvs[1].tv_usec = 0;

  // Sets the time to "Now".
  REQUIRE(compat_futimesat(fd, nullptr, tvs) == 0);
  REQUIRE(unlinkat(AT_FDCWD, file, 0) != -1);

  // Get the stat information of the file.
  struct stat sb {};
  REQUIRE(fstat(fd, &sb) == 0);
  CHECK(sb.st_atim.tv_sec == tvs[0].tv_sec);
  CHECK(sb.st_mtim.tv_sec == tvs[1].tv_sec);

  REQUIRE(close(fd) != -1);
}

TEST_CASE("futimesat - valid - past") {
  char file[] = "futimesat-tests.XXXXXXXX";
  int fd = mkstemp(file);
  REQUIRE(fd != -1);

  timespec time {};
  REQUIRE(clock_gettime(CLOCK_REALTIME, &time) == 0);

  compat_timeval tvs[2] {};
  tvs[0].tv_sec = time.tv_sec - 60;
  tvs[0].tv_usec = 0;

  tvs[1].tv_sec = time.tv_sec - 60;
  tvs[1].tv_usec = 0;

  // Sets the time to "Now".
  REQUIRE(compat_futimesat(fd, nullptr, tvs) == 0);
  REQUIRE(unlinkat(AT_FDCWD, file, 0) != -1);

  // Get the stat information of the file.
  struct stat sb {};
  REQUIRE(fstat(fd, &sb) == 0);
  CHECK(sb.st_atim.tv_sec == tvs[0].tv_sec);
  CHECK(sb.st_mtim.tv_sec == tvs[1].tv_sec);

  REQUIRE(close(fd) != -1);
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/personality.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <cstdint>
#include <sys/personality.h>
#include <sys/utsname.h>
#include <string_view>

constexpr uint32_t QUERY_PERSONA = ~0U;
TEST_CASE("default - query") {
  REQUIRE(::personality(0) != -1);

  auto persona = ::personality(QUERY_PERSONA);
  CHECK(persona == 0);
}

TEST_CASE("default - set all") {
  REQUIRE(::personality(-2U) != -1);
  auto persona = ::personality(QUERY_PERSONA);
  CHECK(persona == -2U);
}

TEST_CASE("default - check linux32") {
  REQUIRE(::personality(0) != -1);

  struct utsname name {};
  uname(&name);
  CHECK(std::string_view(name.machine) == "x86_64");

  CHECK(::personality(PER_LINUX32) != -1);
  auto persona = ::personality(QUERY_PERSONA);
  CHECK(persona == PER_LINUX32);

  uname(&name);
  CHECK(std::string_view(name.machine) == "i686");
}

TEST_CASE("default - check uname26") {
  REQUIRE(::personality(UNAME26) != -1);
  auto persona = ::personality(QUERY_PERSONA);
  CHECK(persona == UNAME26);

  struct utsname name {};
  uname(&name);
  CHECK(std::string_view(name.release).starts_with("2.6."));
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/syscall_exit.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <unistd.h>
#include <sys/wait.h>
#include <sys/syscall.h>

TEST_CASE("fork - exit") {
  int child_pid = ::fork();
  if (child_pid == 0) {
    ::syscall(SYS_exit, 1);
    // unreachable
    std::terminate();
  } else {
    int status {};
    int exited_child = ::waitpid(child_pid, &status, 0);
    bool exited = WIFEXITED(status);
    REQUIRE(WIFEXITED(status) == 1);
    CHECK(WEXITSTATUS(status) == 1);
  }
}

TEST_CASE("fork - signal") {
  int child_pid = ::fork();
  if (child_pid == 0) {
    ::syscall(SYS_tgkill, ::getpid(), ::gettid(), SIGKILL);
    // unreachable
    std::terminate();
  } else {
    int status {};
    int exited_child = ::waitpid(child_pid, &status, 0);
    bool exited = WIFEXITED(status);
    REQUIRE(WIFSIGNALED(status) == 1);
    CHECK(WTERMSIG(status) == SIGKILL);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/syscall_sigaltstack.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <unistd.h>
#include <sys/wait.h>
#include <sys/syscall.h>

TEST_CASE("sysaltstack - minimum") {
  char test[4096];
  constexpr size_t EXPECTED_MIN = 2048;

  stack_t stack {
    .ss_sp = test,
    .ss_flags = 0,
    .ss_size = 0,
  };
  for (size_t i = 1; i < sizeof(test); ++i) {
    stack.ss_size = i;
    CHECK(sigaltstack(&stack, nullptr) == (i < EXPECTED_MIN ? -1 : 0));
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/syscalls/syscalls_efault.cpp
================================================
#include <catch2/catch_test_macros.hpp>

#include <cstdint>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <poll.h>
#include <signal.h>

TEST_CASE("poll") {
  // poll can return EFAULT if first argument is pointed to invalid pointer.
  // Using mmap specifically for allocating with PROT_NONE.
  struct pollfd* invalid_fds =
    reinterpret_cast<struct pollfd*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  auto ret = ::syscall(SYS_poll, invalid_fds, 1, 0);
  REQUIRE(ret == -1);
  CHECK(errno == EFAULT);
}

TEST_CASE("ppoll") {
  // ppoll can return EFAULT for arguments 1, 3, 4.
  // Using mmap specifically for allocating with PROT_NONE.
  struct pollfd* invalid_fds =
    reinterpret_cast<struct pollfd*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  struct timespec* invalid_timespec =
    reinterpret_cast<struct timespec*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  sigset_t* invalid_sigset = reinterpret_cast<sigset_t*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));

  SECTION("invalid fds") {
    auto ret = ::syscall(SYS_ppoll, invalid_fds, 1, 0, nullptr, nullptr);
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("invalid timespec") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };
    auto ret = ::syscall(SYS_ppoll, &valid_fds, 1, invalid_timespec, nullptr, sizeof(uint64_t));
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("invalid sigset") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    struct timespec valid_ts {};
    auto ret = ::syscall(SYS_ppoll, &valid_fds, 1, &valid_ts, invalid_sigset, sizeof(uint64_t));
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("valid configuration") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    struct timespec valid_ts {};
    sigset_t valid_sigset {};
    sigemptyset(&valid_sigset);
    auto ret = ::syscall(SYS_ppoll, &valid_fds, 1, &valid_ts, &valid_sigset, sizeof(uint64_t));
    REQUIRE(ret == 0);
  }

  SECTION("invalid timespec write-back") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    // Kernel will read timespec, but it then can't write the result back.
    mprotect(invalid_timespec, sysconf(_SC_PAGESIZE), PROT_READ | PROT_WRITE);
    invalid_timespec->tv_sec = 1;
    mprotect(invalid_timespec, sysconf(_SC_PAGESIZE), PROT_READ);

    sigset_t valid_sigset {};
    sigemptyset(&valid_sigset);
    auto ret = ::syscall(SYS_ppoll, &valid_fds, 1, invalid_timespec, &valid_sigset, sizeof(uint64_t));
    REQUIRE(ret == 0);
    CHECK(invalid_timespec->tv_sec == 1);
  }
}

struct timespec64 {
  uint64_t tv_sec, tv_nsec;
};

static const timespec64 readonly_ts {
  .tv_sec = 1,
  .tv_nsec = 0,
};


TEST_CASE("ppoll_64") {
#ifndef SYS_ppoll_time64
#define SYS_ppoll_time64 SYS_ppoll
#endif
  // ppoll can return EFAULT for arguments 1, 3, 4
  // Using mmap specifically for allocating with PROT_NONE.
  struct pollfd* invalid_fds =
    reinterpret_cast<struct pollfd*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  timespec64* invalid_timespec =
    reinterpret_cast<timespec64*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
  sigset_t* invalid_sigset = reinterpret_cast<sigset_t*>(mmap(nullptr, sysconf(_SC_PAGESIZE), PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));

  SECTION("invalid fds") {
    auto ret = ::syscall(SYS_ppoll_time64, invalid_fds, 1, 0, nullptr, nullptr);
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("invalid timespec") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };
    auto ret = ::syscall(SYS_ppoll_time64, &valid_fds, 1, invalid_timespec, nullptr, sizeof(uint64_t));
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("invalid sigset") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    timespec64 valid_ts {};
    auto ret = ::syscall(SYS_ppoll_time64, &valid_fds, 1, &valid_ts, invalid_sigset, sizeof(uint64_t));
    REQUIRE(ret == -1);
    CHECK(errno == EFAULT);
  }

  SECTION("valid configuration") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    timespec64 valid_ts {};
    sigset_t valid_sigset {};
    sigemptyset(&valid_sigset);
    auto ret = ::syscall(SYS_ppoll_time64, &valid_fds, 1, &valid_ts, &valid_sigset, sizeof(uint64_t));
    REQUIRE(ret == 0);
  }

  SECTION("invalid timespec write-back") {
    struct pollfd valid_fds {
      .fd = STDOUT_FILENO,
      .events = 0,
      .revents = 0,
    };

    // Kernel will read timespec, but it then can't write the result back.
    sigset_t valid_sigset {};
    sigemptyset(&valid_sigset);
    auto ret = ::syscall(SYS_ppoll_time64, &valid_fds, 1, &readonly_ts, &valid_sigset, sizeof(uint64_t));
    REQUIRE(ret == 0);
    CHECK(readonly_ts.tv_sec == 1);
  }
}


================================================
FILE: unittests/FEXLinuxTests/tests/thunks/thunk_testlib.cpp
================================================
#define GUEST_THUNK_LIBRARY

#include <dlfcn.h>

#include <stdexcept>

#include <catch2/catch_test_macros.hpp>

#include "../../../../ThunkLibs/libfex_thunk_test/api.h"

struct Fixture {
  void* lib = []() {
    auto ret = dlopen("libfex_thunk_test.so", RTLD_LAZY);
    if (!ret) {
      throw std::runtime_error("Failed to open lib\n");
    }
    return ret;
  }();

#define GET_SYMBOL(name) decltype(&::name) name = (decltype(name))dlsym(lib, #name)
  GET_SYMBOL(GetDoubledValue);

  GET_SYMBOL(MakeOpaqueType);
  GET_SYMBOL(ReadOpaqueTypeData);
  GET_SYMBOL(DestroyOpaqueType);

  GET_SYMBOL(MakeUnionType);
  GET_SYMBOL(GetUnionTypeA);

  GET_SYMBOL(MakeReorderingType);
  GET_SYMBOL(GetReorderingTypeMember);
  GET_SYMBOL(GetReorderingTypeMemberWithoutRepacking);
  GET_SYMBOL(ModifyReorderingTypeMembers);
  GET_SYMBOL(QueryOffsetOf);

  GET_SYMBOL(RanCustomRepack);

  GET_SYMBOL(FunctionWithDivergentSignature);

  GET_SYMBOL(ReadData1);
};

TEST_CASE_METHOD(Fixture, "Trivial") {
  CHECK(GetDoubledValue(10) == 20);
}

TEST_CASE_METHOD(Fixture, "Opaque data types") {
  {
    auto data = MakeOpaqueType(0x1234);
    CHECK(ReadOpaqueTypeData(data) == 0x1234);
    DestroyOpaqueType(data);
  }

  {
    auto data = MakeUnionType(0x1, 0x2, 0x3, 0x4);
    CHECK(GetUnionTypeA(&data) == 0x04030201);
  }
}

TEST_CASE_METHOD(Fixture, "Automatic struct repacking") {
  {
    // Test repacking of return values
    ReorderingType test_struct = MakeReorderingType(0x1234, 0x5678);
    REQUIRE(test_struct.a == 0x1234);
    REQUIRE(test_struct.b == 0x5678);

    // Test offsets of the host-side guest_layout wrapper match the guest-side ones
    CHECK(QueryOffsetOf(&test_struct, 0) == offsetof(ReorderingType, a));
    CHECK(QueryOffsetOf(&test_struct, 1) == offsetof(ReorderingType, b));

    // Test repacking of input pointers
    CHECK(GetReorderingTypeMember(&test_struct, 0) == 0x1234);
    CHECK(GetReorderingTypeMember(&test_struct, 1) == 0x5678);

    // Test that we can force reinterpreting the data in guest layout as host layout
    CHECK(GetReorderingTypeMemberWithoutRepacking(&test_struct, 0) == 0x5678);
    CHECK(GetReorderingTypeMemberWithoutRepacking(&test_struct, 1) == 0x1234);

    // Test repacking of output pointers
    ModifyReorderingTypeMembers(&test_struct);
    CHECK(GetReorderingTypeMember(&test_struct, 0) == 0x1235);
    CHECK(GetReorderingTypeMember(&test_struct, 1) == 0x567a);
  };
}

TEST_CASE_METHOD(Fixture, "Assisted struct repacking") {
  CustomRepackedType data {};
  CHECK(RanCustomRepack(&data) == 1);
}

TEST_CASE_METHOD(Fixture, "Function signature with differing parameter sizes") {
  CHECK(FunctionWithDivergentSignature(DivType {1}, DivType {2}, DivType {3}, DivType {4}) == 0x01020304);
}

// Test Vulkan-like linked lists
TEST_CASE_METHOD(Fixture, "Assisted repacking of linked lists") {
  const int s2_data = 0xcddeeff;
  TestStruct2 s2 {
    .Next = nullptr,
    .Type = StructType::Struct2,
    .Data1 = s2_data,
  };

  const int s1_data = 0x1234567;
  TestStruct1 s1 {
    .Next = &s2,
    .Type = StructType::Struct1,
    .Data2 = 0xab,
    .Data1 = s1_data,
  };

  CHECK(ReadData1(&s1, 0) == s1_data);
  CHECK(ReadData1(&s1, 1) == s2_data);
}


================================================
FILE: unittests/FEXLinuxTests/tests/vdso/vdso_test.cpp
================================================
#include <map>
#include <sys/auxv.h>
#include <string>
#include <unistd.h>
#include <fstream>
#include <elf.h>
#include <dlfcn.h>
#include <cstdint>
#include <sys/time.h>
#include <time.h>
#include <stdio.h>

#include <catch2/catch_test_macros.hpp>

using time_type = int (*)(time_t* tloc);
time_type time_vdso = (time_type)::time;

using gettimeofday_type = int (*)(struct timeval* tv, struct timezone* tz);
gettimeofday_type gettimeofday_vdso = (gettimeofday_type)::gettimeofday;

using gettime_type = int (*)(clockid_t, struct timespec*);
gettime_type gettime_vdso = (gettime_type)::clock_gettime;

using getres_type = int (*)(clockid_t, struct timespec*);
getres_type getres_vdso = (getres_type)::clock_getres;

using getcpu_type = int (*)(uint32_t* cpu, uint32_t* node);
getcpu_type getcpu_vdso = (getcpu_type)::getcpu;

#if __SIZEOF_POINTER__ == 4
struct timespec64 {
  int64_t tv_sec;
  int64_t tv_nsec;
};

using gettime64_type = int (*)(clockid_t, struct timespec64*);
gettime64_type gettime64_vdso = nullptr;
#endif

class VDSOParser {
#if __SIZEOF_POINTER__ == 8
  using ELFHeader = Elf64_Ehdr;
  using ELFSectionHeader = Elf64_Shdr;
  using ELFSymbol = Elf64_Sym;
#else
  using ELFHeader = Elf32_Ehdr;
  using ELFSectionHeader = Elf32_Shdr;
  using ELFSymbol = Elf32_Sym;

#endif
public:
  VDSOParser(uint8_t* Ptr) {
    ELFHeader* Header = (ELFHeader*)Ptr;
    uint64_t SectionHeaderOffset = Header->e_shoff;
    uint16_t SectionHeaderCount = Header->e_shnum;
    ELFSectionHeader* SHdrs = (ELFSectionHeader*)(&Ptr[SectionHeaderOffset]);

    const ELFSectionHeader* DynamicSymHeader {nullptr};
    const ELFSectionHeader* DynamicStringHeader {nullptr};
    for (size_t i = 0; i < SectionHeaderCount; ++i) {
      if (SHdrs[i].sh_type == SHT_STRTAB && SHdrs[i].sh_addr) {
        DynamicStringHeader = &SHdrs[i];
      }
      if (SHdrs[i].sh_type == SHT_DYNSYM) {
        DynamicSymHeader = &SHdrs[i];
      }
    }

    size_t NumDynSymSymbols = DynamicSymHeader->sh_size / DynamicSymHeader->sh_entsize;
    const char* DynStrTab = reinterpret_cast<const char*>(&Ptr[DynamicStringHeader->sh_offset]);

    for (size_t i = 0; i < NumDynSymSymbols; ++i) {
      uint64_t offset = DynamicSymHeader->sh_offset + i * DynamicSymHeader->sh_entsize;
      const ELFSymbol* Symbol = reinterpret_cast<const ELFSymbol*>(&Ptr[offset]);

      const char* Name = &DynStrTab[Symbol->st_name];
      if (Symbol->st_info != 0) {
        uint8_t* SymbolPtr = Symbol->st_value + Ptr;
        VDSOSymbols[Name] = SymbolPtr;
        printf("Found VDSO symbol '%s' at %p\n", Name, SymbolPtr);
      }
    }
  }

  uint8_t* GetVDSOSymbol(const char* String) {
    auto it = VDSOSymbols.find(String);
    if (it != VDSOSymbols.end()) {
      return it->second;
    }

    return nullptr;
  }

  std::map<std::string, uint8_t*> VDSOSymbols;
};

static void LoadVDSO() {
  uint64_t Begin = ::getauxval(AT_SYSINFO_EHDR);
  if (!Begin) {
    printf("No VDSO\n");
    return;
  }

  VDSOParser VDSO((uint8_t*)Begin);
  auto it = VDSO.GetVDSOSymbol("__vdso_time");
  if (it) {
    time_vdso = reinterpret_cast<time_type>(it);
  }

  it = VDSO.GetVDSOSymbol("__vdso_gettimeofday");
  if (it) {
    gettimeofday_vdso = reinterpret_cast<gettimeofday_type>(it);
  }

  it = VDSO.GetVDSOSymbol("__vdso_clock_gettime");
  if (it) {
    gettime_vdso = reinterpret_cast<gettime_type>(it);
  }

  it = VDSO.GetVDSOSymbol("__vdso_clock_getres");
  if (it) {
    getres_vdso = reinterpret_cast<getres_type>(it);
  }

  it = VDSO.GetVDSOSymbol("__vdso_getcpu");
  if (it) {
    getcpu_vdso = reinterpret_cast<getcpu_type>(it);
  }

#if __SIZEOF_POINTER__ == 4
  it = VDSO.GetVDSOSymbol("__vdso_clock_gettime64");
  if (it) {
    gettime64_vdso = reinterpret_cast<gettime64_type>(it);
  }
#endif
}


TEST_CASE("VDSO") {
  LoadVDSO();
  REQUIRE(time_vdso != 0);
  REQUIRE(gettimeofday_vdso != 0);
  REQUIRE(gettime_vdso != 0);
  REQUIRE(getres_vdso != 0);
  REQUIRE(getcpu_vdso != 0);

  // There are few strict guarantees on the return values of these functions,
  // so instead we make some educated guesses to check for valid outputs below

  time_t tloc {};
  {
    int result = time_vdso(&tloc);
    printf("time\n");
    CHECK(result != -1);
    printf("\tResult: %d\n", result);
    printf("\tTime_t: 0x%lx\n", tloc);
    CHECK(tloc > 946684800); // Ensure it's later than year 2000
  }

  {
    timeval tv {};
    int result = gettimeofday_vdso(&tv, nullptr);
    printf("gettimeofday\n");
    CHECK(result == 0);
    printf("\tTime: 0x%lx 0x%lx\n", tv.tv_sec, tv.tv_usec);
    // Ensure gettimeofday and time results are consistent
    CHECK(tv.tv_sec >= tloc);
    CHECK(tv.tv_sec <= tloc + 1);
  }

  {
    timespec ts {};
    int result = gettime_vdso(CLOCK_MONOTONIC, &ts);
    printf("clock_gettime\n");
    CHECK(result == 0);
    printf("\tTime: 0x%lx 0x%lx\n", ts.tv_sec, ts.tv_nsec);
  }

  {
    timespec ts {};
    int result = getres_vdso(CLOCK_MONOTONIC, &ts);
    printf("clock_getres\n");
    CHECK(result == 0);
    printf("\tTime: 0x%lx 0x%lx\n", ts.tv_sec, ts.tv_nsec);
    CHECK(ts.tv_sec == 0);
    CHECK(ts.tv_nsec > 0);
  }

  {
    uint32_t cpu, node;
    int result = getcpu_vdso(&cpu, &node);
    printf("getcpu\n");
    CHECK(result == 0);
    printf("\tCPU: 0x%x\n", cpu);
    printf("\tNode: 0x%x\n", node);
  }

#if __SIZEOF_POINTER__ == 4
  if (gettime64_vdso) {
    timespec64 ts {};
    int result = gettime64_vdso(CLOCK_MONOTONIC, &ts);
    printf("clock_gettime64\n");
    CHECK(result == 0);
    printf("\tTime: 0x%llx 0x%llx\n", ts.tv_sec, ts.tv_nsec);
  }
#endif
}


================================================
FILE: unittests/InstructionCountCI/AFP/H0F3A.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "roundss xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintn s16, s17"
      ]
    },
    "roundss xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintm s16, s17"
      ]
    },
    "roundss xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintp s16, s17"
      ]
    },
    "roundss xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintz s16, s17"
      ]
    },
    "roundss xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frinti s16, s17"
      ]
    },
    "roundsd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintn d16, d17"
      ]
    },
    "roundsd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintm d16, d17"
      ]
    },
    "roundsd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintp d16, d17"
      ]
    },
    "roundsd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintz d16, d17"
      ]
    },
    "roundsd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frinti d16, d17"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/SVE256/Secondary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "cvtpi2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf v0.2s, v2.2s",
        "ptrue p0.d, vl1",
        "mov z16.d, p0/m, z0.d"
      ]
    },
    "cvtpi2ps xmm0, mm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "scvtf v0.2s, v2.2s",
        "ptrue p0.d, vl1",
        "mov z16.d, p0/m, z0.d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/SVE256/Secondary_REP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": [
      "RPRES"
    ]
  },
  "Instructions": {
    "cvtsi2ss xmm0, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf s16, w4"
      ]
    },
    "cvtsi2ss xmm0, dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "scvtf s16, s2"
      ]
    },
    "cvtsi2ss xmm0, qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x4]",
        "scvtf s16, x20"
      ]
    },
    "sqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x51",
      "ExpectedArm64ASM": [
        "fsqrt s16, s17"
      ]
    },
    "rsqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "0xf3 0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s17",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "rcpss xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "0xf3 0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "addss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd s16, s16, s17"
      ]
    },
    "mulss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul s16, s16, s17"
      ]
    },
    "cvtss2sd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "fcvt d16, s17"
      ]
    },
    "cvtss2sd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d16, s2"
      ]
    },
    "subss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub s16, s16, s17"
      ]
    },
    "minss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "fmin s16, s16, s17"
      ]
    },
    "divss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv s16, s16, s17"
      ]
    },
    "maxss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "fmax s16, s16, s17"
      ]
    },
    "cmpss xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "ptrue p0.s, vl1",
        "mov z16.s, p0/m, z0.s"
      ]
    },
    "cmpss xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s0, s17, s16",
        "mvn v0.8b, v0.8b",
        "ptrue p0.s, vl1",
        "mov z16.s, p0/m, z0.s"
      ]
    },
    "cmpss xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "ptrue p0.s, vl1",
        "mov z16.s, p0/m, z0.s"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/SVE256/Secondary_REPNE.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "cvtsi2sd xmm0, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d16, w4"
      ]
    },
    "cvtsi2sd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d16, w20"
      ]
    },
    "cvtsi2sd xmm0, rax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d16, x4"
      ]
    },
    "cvtsi2sd xmm0, qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf d16, d2"
      ]
    },
    "sqrtsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x51"
      ],
      "ExpectedArm64ASM": [
        "fsqrt d16, d17"
      ]
    },
    "addsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd d16, d16, d17"
      ]
    },
    "mulsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul d16, d16, d17"
      ]
    },
    "cvtsd2ss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "fcvt s16, d17"
      ]
    },
    "cvtsd2ss xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "fcvt s16, d2"
      ]
    },
    "subsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub d16, d16, d17"
      ]
    },
    "minsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "fmin d16, d16, d17"
      ]
    },
    "divsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv d16, d16, d17"
      ]
    },
    "maxsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "fmax d16, d16, d17"
      ]
    },
    "cmpsd xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "ptrue p0.d, vl1",
        "mov z16.d, p0/m, z0.d"
      ]
    },
    "cmpsd xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d0, d17, d16",
        "mvn v0.8b, v0.8b",
        "ptrue p0.d, vl1",
        "mov z16.d, p0/m, z0.d"
      ]
    },
    "cmpsd xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "ptrue p0.d, vl1",
        "mov z16.d, p0/m, z0.d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/Secondary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "cvtpi2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf v0.2s, v2.2s",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtpi2ps xmm0, mm0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "scvtf v0.2s, v2.2s",
        "mov v16.d[0], v0.d[0]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/Secondary_REP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES"
    ]
  },
  "Instructions": {
    "cvtsi2ss xmm0, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf s16, w4"
      ]
    },
    "cvtsi2ss xmm0, dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "scvtf s16, s2"
      ]
    },
    "cvtsi2ss xmm0, qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x4]",
        "scvtf s16, x20"
      ]
    },
    "sqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x51",
      "ExpectedArm64ASM": [
        "fsqrt s16, s17"
      ]
    },
    "rsqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "0xf3 0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s17",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "rcpss xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "0xf3 0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "addss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd s16, s16, s17"
      ]
    },
    "mulss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul s16, s16, s17"
      ]
    },
    "cvtss2sd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "fcvt d16, s17"
      ]
    },
    "cvtss2sd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d16, s2"
      ]
    },
    "subss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub s16, s16, s17"
      ]
    },
    "minss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "fmin s16, s16, s17"
      ]
    },
    "divss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv s16, s16, s17"
      ]
    },
    "maxss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "fmax s16, s16, s17"
      ]
    },
    "cmpss xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s16, s17, s16"
      ]
    },
    "cmpss xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s0, s17, s16",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/Secondary_REPNE.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "cvtsi2sd xmm0, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d16, w4"
      ]
    },
    "cvtsi2sd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d16, w20"
      ]
    },
    "cvtsi2sd xmm0, rax": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d16, x4"
      ]
    },
    "cvtsi2sd xmm0, qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf d16, d2"
      ]
    },
    "sqrtsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x51"
      ],
      "ExpectedArm64ASM": [
        "fsqrt d16, d17"
      ]
    },
    "addsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd d16, d16, d17"
      ]
    },
    "mulsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul d16, d16, d17"
      ]
    },
    "cvtsd2ss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "fcvt s16, d17"
      ]
    },
    "cvtsd2ss xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "fcvt s16, d2"
      ]
    },
    "subsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub d16, d16, d17"
      ]
    },
    "minsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "fmin d16, d16, d17"
      ]
    },
    "divsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv d16, d16, d17"
      ]
    },
    "maxsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "fmax d16, d16, d17"
      ]
    },
    "cmpsd xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d16, d17, d16"
      ]
    },
    "cmpsd xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d0, d17, d16",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/VEX_map1.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": [
      "RPRES"
    ]
  },
  "Instructions": {
    "vsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt s16, s18"
      ]
    },
    "vsqrtsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt d16, d18"
      ]
    },
    "vrsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "Map 1 0b10 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s18",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vrcpss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "FEAT_FPRES could make this more optimal",
        "Map 1 0b10 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s16, s18, s17"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt s16, s18, s17"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s16, s18, s17"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s0, s18, s17",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d16, d18, d17"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt d16, d18, d17"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d16, d18, d17"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d0, d18, d17",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s16, w4"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s16, x4"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d16, w4"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d16, x4"
      ]
    },
    "vmulss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul s16, s17, s18"
      ]
    },
    "vmulsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul d16, d17, d18"
      ]
    },
    "vcvtss2sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt d16, s18"
      ]
    },
    "vcvtsd2ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt s16, d18"
      ]
    },
    "vsubss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub s16, s17, s18"
      ]
    },
    "vsubsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub d16, d17, d18"
      ]
    },
    "vminss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmin s16, s17, s18"
      ]
    },
    "vminsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmin d16, d17, d18"
      ]
    },
    "vdivss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv s16, s17, s18"
      ]
    },
    "vdivsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv d16, d17, d18"
      ]
    },
    "vmaxss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmax s16, s17, s18"
      ]
    },
    "vmaxsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmax d16, d17, d18"
      ]
    },
    "vminps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmin v16.4s, v17.4s, v18.4s"
      ]
    },
    "vminps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.s, p7/z, z18.s, z17.s",
        "not p0.b, p7/z, p0.b",
        "mov z0.d, z17.d",
        "mov z0.s, p0/m, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vminpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmin v16.2d, v17.2d, v18.2d"
      ]
    },
    "vminpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.d, p7/z, z18.d, z17.d",
        "not p0.b, p7/z, p0.b",
        "mov z0.d, z17.d",
        "mov z0.d, p0/m, z18.d",
        "mov z16.d, z0.d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AFP/VEX_map3.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "vroundss xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintn s16, s17"
      ]
    },
    "vroundss xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintm s16, s17"
      ]
    },
    "vroundss xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintp s16, s17"
      ]
    },
    "vroundss xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintz s16, s17"
      ]
    },
    "vroundss xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frinti s16, s17"
      ]
    },
    "vroundsd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintn d16, d17"
      ]
    },
    "vroundsd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintm d16, d17"
      ]
    },
    "vroundsd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintp d16, d17"
      ]
    },
    "vroundsd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintz d16, d17"
      ]
    },
    "vroundsd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frinti d16, d17"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/FMA4.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "AFP",
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vfmaddsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubaddps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubaddpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x5f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x68 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x68 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x69 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x69 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmaddsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfmsubsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x78 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x78 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmaddss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x79 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmaddsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    },
    "vfnmsubsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x10000",
        "str x20, [x28, #24]",
        "mov w1, #0x401",
        "str x1, [x28, #1496]",
        "ldr x0, [x28, #2912]",
        "br x0"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map1.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "FCMA",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vmovups xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovups xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovups ymm0, ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Spurious moves",
        "Map 1 0b00 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovups ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovupd xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovupd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovupd ymm0, ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Spurious moves",
        "Map 1 0b01 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovupd ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovss xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovsd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b11 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovups [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovups [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x11 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vmovupd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovupd [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x11 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vmovss [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "db 0xc5, 0xf2, 0x11, 0xc2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "vmovss xmm2, xmm1, xmm0",
        "Need to manually encode since nasm won't encode this",
        "Map 1 0b10 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v18.16b, v17.16b",
        "mov v18.s[0], v16.s[0]",
        "stp xzr, xzr, [x28, #224]"
      ]
    },
    "vmovsd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b11 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "db 0xc5, 0xf3, 0x11, 0xc2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "vmovsd xmm2, xmm1, xmm0",
        "Need to manually encode since nasm won't encode this",
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b11 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v18.16b, v17.16b",
        "mov v18.d[0], v16.d[0]",
        "stp xzr, xzr, [x28, #224]"
      ]
    },
    "vmovlps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b00 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "ld1 {v16.d}[0], [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovlpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b01 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "ld1 {v16.d}[0], [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovsldup xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn1 v16.4s, v2.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovsldup ymm0, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x12 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q2, q3, [x4]",
        "trn1 v16.4s, v2.4s, v2.4s",
        "trn1 v2.4s, v3.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vmovddup xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "dup v16.2d, v2.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovddup ymm0, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x12 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q2, q3, [x4]",
        "dup v16.2d, v2.d[0]",
        "dup v2.2d, v3.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovlps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vmovlpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vunpcklps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.4s, v17.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vunpcklps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x14 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldp q3, q4, [x4]",
        "zip1 v16.4s, v17.4s, v3.4s",
        "zip1 v2.4s, v2.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vunpcklpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.2d, v17.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vunpcklpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x14 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldp q3, q4, [x4]",
        "zip1 v16.2d, v17.2d, v3.2d",
        "zip1 v2.2d, v2.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vunpckhps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.4s, v17.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vunpckhps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x15 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldp q3, q4, [x4]",
        "zip2 v16.4s, v17.4s, v3.4s",
        "zip2 v2.4s, v2.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vunpckhpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.2d, v17.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vunpckhpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x15 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldp q3, q4, [x4]",
        "zip2 v16.2d, v17.2d, v3.2d",
        "zip2 v2.2d, v2.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmovhps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "ld1 {v16.d}[1], [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovhpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "ld1 {v16.d}[1], [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovlhps xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v17.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovshdup xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn2 v16.4s, v2.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovshdup ymm0, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q2, q3, [x4]",
        "trn2 v16.4s, v2.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vmovhps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.d}[1], [x4]"
      ]
    },
    "vmovhpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.d}[1], [x4]"
      ]
    },
    "vmovmskps rax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x50 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "ldr q3, [x28, #3168]",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "vmovmskps rax, ymm0": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 1 0b00 0x50 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ushr v3.4s, v16.4s, #31",
        "ldr q4, [x28, #3168]",
        "ushl v3.4s, v3.4s, v4.4s",
        "addv s3, v3.4s",
        "mov w20, v3.s[0]",
        "ushr v2.4s, v2.4s, #31",
        "ushl v2.4s, v2.4s, v4.4s",
        "addv s2, v2.4s",
        "mov w21, v2.s[0]",
        "orr x4, x20, x21, lsl #4"
      ]
    },
    "vmovmskpd rax, xmm0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x50 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp2 v2.4s, v16.4s, v16.4s",
        "mov x20, v2.d[0]",
        "bfi x20, x20, #31, #32",
        "lsr x4, x20, #62"
      ]
    },
    "vmovmskpd rax, ymm0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 1 0b01 0x50 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "uzp2 v3.4s, v16.4s, v16.4s",
        "mov x20, v3.d[0]",
        "bfi x20, x20, #31, #32",
        "lsr x20, x20, #62",
        "uzp2 v2.4s, v2.4s, v2.4s",
        "mov x21, v2.d[0]",
        "bfi x21, x21, #31, #32",
        "lsr x21, x21, #62",
        "orr x4, x20, x21, lsl #2"
      ]
    },
    "vsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsqrtps ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x51 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "fsqrt v16.4s, v17.4s",
        "fsqrt v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vsqrtpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsqrtpd ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x51 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "fsqrt v16.2d, v17.2d",
        "fsqrt v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt s0, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsqrtsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt d0, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vrsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v17.4s",
        "fdiv v16.4s, v0.4s, v1.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vrsqrtps ymm0, ymm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b00 0x52 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v17.4s",
        "fdiv v16.4s, v0.4s, v1.4s",
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v2.4s",
        "fdiv v2.4s, v0.4s, v1.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vrsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s18",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vrcpps xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v16.4s, v0.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vrcpps ymm0, ymm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b00 0x53 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v16.4s, v0.4s, v17.4s",
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v2.4s, v0.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vrcpss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vandps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x54 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vandps ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x54 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v16.16b, v16.16b, v17.16b",
        "and v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vandpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x54 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vandpd ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x54 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v16.16b, v16.16b, v17.16b",
        "and v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vandnps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x55 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vandnps ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x55 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "bic v16.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vandnpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x55 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vandnpd ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x55 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "bic v16.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vorps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x56 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vorps ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x56 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "orr v16.16b, v16.16b, v17.16b",
        "orr v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vorpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x56 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vorpd ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x56 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "orr v16.16b, v16.16b, v17.16b",
        "orr v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vxorps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vxorps ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "eor v16.16b, v16.16b, v17.16b",
        "eor v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vxorpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vxorpd ymm0, ymm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "eor v16.16b, v16.16b, v17.16b",
        "eor v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vxorps xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b00 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vxorps ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b00 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vxorpd xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vxorpd ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpcklbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x60 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpcklbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x60 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip1 v16.16b, v17.16b, v18.16b",
        "zip1 v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpcklwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x61 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpcklwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x61 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip1 v16.8h, v17.8h, v18.8h",
        "zip1 v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpckldq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x62 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckldq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x62 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip1 v16.4s, v17.4s, v18.4s",
        "zip1 v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpacksswb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x63 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtn v16.8b, v17.8h",
        "sqxtn2 v16.16b, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpacksswb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0x63 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqxtn v16.8b, v17.8h",
        "sqxtn2 v16.16b, v18.8h",
        "sqxtn v2.8b, v2.8h",
        "sqxtn2 v2.16b, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpgtb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x64 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpgtb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x64 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmgt v16.16b, v17.16b, v18.16b",
        "cmgt v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpgtw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x65 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpgtw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x65 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmgt v16.8h, v17.8h, v18.8h",
        "cmgt v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpgtd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x66 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpgtd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x66 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmgt v16.4s, v17.4s, v18.4s",
        "cmgt v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpackuswb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x67 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtun v16.8b, v17.8h",
        "sqxtun2 v16.16b, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpackuswb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0x67 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqxtun v16.8b, v17.8h",
        "sqxtun2 v16.16b, v18.8h",
        "sqxtun v2.8b, v2.8h",
        "sqxtun2 v2.16b, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2680]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2680]",
        "ldr q2, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2680]",
        "ldr q2, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufd ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.4s, v17.s[0]",
        "dup v2.4s, v2.s[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufd ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2680]",
        "ldr q3, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufd ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2680]",
        "ldr q3, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufd ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2680]",
        "ldr q3, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufhw xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[4]",
        "trn1 v16.2d, v17.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufhw xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2672]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufhw xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2672]",
        "ldr q2, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufhw xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2672]",
        "ldr q2, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufhw ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v3.8h, v17.h[4]",
        "trn1 v16.2d, v17.2d, v3.2d",
        "dup v3.8h, v2.h[4]",
        "trn1 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufhw ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2672]",
        "ldr q3, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufhw ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2672]",
        "ldr q3, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshufhw ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2672]",
        "ldr q3, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshuflw xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[0]",
        "trn2 v16.2d, v2.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshuflw xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2664]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshuflw xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2664]",
        "ldr q2, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshuflw xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2664]",
        "ldr q2, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshuflw ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v3.8h, v17.h[0]",
        "trn2 v16.2d, v3.2d, v17.2d",
        "dup v3.8h, v2.h[0]",
        "trn2 v2.2d, v3.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpshuflw ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2664]",
        "ldr q3, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshuflw ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2664]",
        "ldr q3, [x0, #32]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpshuflw ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr x0, [x28, #2664]",
        "ldr q3, [x0, #48]",
        "tbl v16.16b, {v17.16b}, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpeqb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x74 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpeqb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x74 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmeq v16.16b, v17.16b, v18.16b",
        "cmeq v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpeqw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x75 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpeqw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x75 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmeq v16.8h, v17.8h, v18.8h",
        "cmeq v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpeqd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x76 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpeqd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x76 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmeq v16.4s, v17.4s, v18.4s",
        "cmeq v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vzeroupper": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Might be able to use DZ ZVA",
        "Map 1 0b01 0x77 L=0"
      ],
      "ExpectedArm64ASM": [
        "add x0, x28, #0xc0 (192)",
        "dc zva, x0",
        "add x0, x28, #0x100 (256)",
        "dc zva, x0",
        "add x0, x28, #0x140 (320)",
        "dc zva, x0",
        "add x0, x28, #0x180 (384)",
        "dc zva, x0"
      ]
    },
    "vzeroall": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Might be able to use DZ ZVA",
        "Map 1 0b01 0x77 L=1"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "movi v17.2d, #0x0",
        "movi v18.2d, #0x0",
        "movi v19.2d, #0x0",
        "movi v20.2d, #0x0",
        "movi v21.2d, #0x0",
        "movi v22.2d, #0x0",
        "movi v23.2d, #0x0",
        "movi v24.2d, #0x0",
        "movi v25.2d, #0x0",
        "movi v26.2d, #0x0",
        "movi v27.2d, #0x0",
        "movi v28.2d, #0x0",
        "movi v29.2d, #0x0",
        "movi v30.2d, #0x0",
        "movi v31.2d, #0x0",
        "add x0, x28, #0xc0 (192)",
        "dc zva, x0",
        "add x0, x28, #0x100 (256)",
        "dc zva, x0",
        "add x0, x28, #0x140 (320)",
        "dc zva, x0",
        "add x0, x28, #0x180 (384)",
        "dc zva, x0"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x00": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmeq v16.4s, v17.4s, v18.4s",
        "fcmeq v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v16.4s, v18.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x01": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v16.4s, v18.4s, v17.4s",
        "fcmgt v2.4s, v3.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v16.4s, v18.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x02": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v16.4s, v18.4s, v17.4s",
        "fcmge v2.4s, v3.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x03": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b",
        "fcmge v0.4s, v2.4s, v3.4s",
        "fcmgt v1.4s, v3.4s, v2.4s",
        "orr v2.16b, v0.16b, v1.16b",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v17.4s, v18.4s",
        "mvn v16.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x04": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmeq v16.4s, v17.4s, v18.4s",
        "mvn v16.16b, v16.16b",
        "fcmeq v2.4s, v2.4s, v3.4s",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v2.4s, v18.4s, v17.4s",
        "mvn v16.16b, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x05": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v4.4s, v18.4s, v17.4s",
        "mvn v16.16b, v4.16b",
        "fcmgt v2.4s, v3.4s, v2.4s",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v2.4s, v18.4s, v17.4s",
        "mvn v16.16b, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x06": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v4.4s, v18.4s, v17.4s",
        "mvn v16.16b, v4.16b",
        "fcmge v2.4s, v3.4s, v2.4s",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x07": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "fcmge v0.4s, v2.4s, v3.4s",
        "fcmgt v1.4s, v3.4s, v2.4s",
        "orr v2.16b, v0.16b, v1.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x00": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmeq v16.2d, v17.2d, v18.2d",
        "fcmeq v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v16.2d, v18.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x01": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v16.2d, v18.2d, v17.2d",
        "fcmgt v2.2d, v3.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v16.2d, v18.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x02": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v16.2d, v18.2d, v17.2d",
        "fcmge v2.2d, v3.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x03": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b",
        "fcmge v0.2d, v2.2d, v3.2d",
        "fcmgt v1.2d, v3.2d, v2.2d",
        "orr v2.16b, v0.16b, v1.16b",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v17.2d, v18.2d",
        "mvn v16.16b, v16.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x04": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmeq v16.2d, v17.2d, v18.2d",
        "mvn v16.16b, v16.16b",
        "fcmeq v2.2d, v2.2d, v3.2d",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v2.2d, v18.2d, v17.2d",
        "mvn v16.16b, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x05": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v4.2d, v18.2d, v17.2d",
        "mvn v16.16b, v4.16b",
        "fcmgt v2.2d, v3.2d, v2.2d",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v2.2d, v18.2d, v17.2d",
        "mvn v16.16b, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x06": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v4.2d, v18.2d, v17.2d",
        "mvn v16.16b, v4.16b",
        "fcmge v2.2d, v3.2d, v2.2d",
        "mvn v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x07": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "fcmge v0.2d, v2.2d, v3.2d",
        "fcmgt v1.2d, v3.2d, v2.2d",
        "orr v2.16b, v0.16b, v1.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s0, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt s0, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s0, s18, s17",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d0, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt d0, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d0, d18, d17",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrw xmm0, xmm0, eax, 000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.h[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[1], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[7], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpextrw eax, xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "vpextrw eax, xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[1]"
      ]
    },
    "vpextrw eax, xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "vpextrw [rax], xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[1], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 00b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "dup v3.4s, v18.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 00b": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v17.s[0]",
        "dup v5.4s, v18.s[0]",
        "zip1 v16.2d, v4.2d, v5.2d",
        "dup v2.4s, v2.s[0]",
        "dup v3.4s, v3.s[0]",
        "zip1 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 01b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr x0, [x28, #2688]",
        "ldr q4, [x0, #16]",
        "tbl v16.16b, {v17.16b, v18.16b}, v4.16b",
        "tbl v2.16b, {v2.16b, v3.16b}, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #32]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 10b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr x0, [x28, #2688]",
        "ldr q4, [x0, #32]",
        "tbl v16.16b, {v17.16b, v18.16b}, v4.16b",
        "tbl v2.16b, {v2.16b, v3.16b}, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #48]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 11b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr x0, [x28, #2688]",
        "ldr q4, [x0, #48]",
        "tbl v16.16b, {v17.16b, v18.16b}, v4.16b",
        "tbl v2.16b, {v2.16b, v3.16b}, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vshufpd xmm0, xmm1, xmm2, 0b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufpd ymm0, ymm1, ymm2, 0b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip1 v16.2d, v17.2d, v18.2d",
        "zip1 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vshufpd xmm0, xmm1, xmm2, 1b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v17.16b, v18.16b, #8",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vshufpd ymm0, ymm1, ymm2, 1b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ext v16.16b, v17.16b, v18.16b, #8",
        "zip1 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmovaps xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovaps ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovaps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovaps ymm0, ymm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovapd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovapd ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovapd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovapd ymm0, ymm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovaps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovaps [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vmovapd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovapd [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s0, w4",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s0, x4",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "mov v16.16b, v17.16b",
        "scvtf s0, s2",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, qword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x4]",
        "mov v16.16b, v17.16b",
        "scvtf s0, x20",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d0, w4",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d0, x4",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "mov v16.16b, v17.16b",
        "scvtf d0, w20",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, qword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mov v16.16b, v17.16b",
        "scvtf d0, d2",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovntps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovntps [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    },
    "vmovntpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovntpd [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    },
    "vcvttss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "vcvttss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64z s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "vcvttss2si rax, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "frint64z s2, s2",
        "fcvtzs x4, s2"
      ]
    },
    "vcvttsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "vcvttsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64z d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "vcvttsd2si rax, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "frint64z d2, d2",
        "fcvtzs x4, d2"
      ]
    },
    "vcvtss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "vcvtss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64x s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "vcvtss2si rax, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "frint64x s2, s2",
        "fcvtzs x4, s2"
      ]
    },
    "vcvtsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "vcvtsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64x d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "vcvtsd2si rax, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "frint64x d2, d2",
        "fcvtzs x4, d2"
      ]
    },
    "vucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vcomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vcomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vaddps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fadd v16.4s, v17.4s, v18.4s",
        "fadd v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vaddpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fadd v16.2d, v17.2d, v18.2d",
        "fadd v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vaddss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fadd s0, s17, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fadd d0, d17, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmulps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmulps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fmul v16.4s, v17.4s, v18.4s",
        "fmul v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vmulpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmulpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fmul v16.2d, v17.2d, v18.2d",
        "fmul v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmulss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul s0, s17, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmulsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul d0, d17, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtl v16.2d, v17.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtpd2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "fcvtn v16.2s, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtpd2ps xmm0, yword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q2, q3, [x4]",
        "fcvtn v2.2s, v2.2d",
        "fcvtn v3.2s, v3.2d",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v3.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtpd2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtn v16.2s, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtss2sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt d0, s18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtss2sd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "mov v16.16b, v17.16b",
        "fcvt d0, s2",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsd2ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt s0, d18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtsd2ss xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mov v16.16b, v17.16b",
        "fcvt s0, d2",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtdq2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "scvtf v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtdq2ps ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "scvtf v16.4s, v17.4s",
        "scvtf v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vcvtps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frint32x v3.4s, v17.4s",
        "fcvtzs v16.4s, v3.4s",
        "frint32x v2.4s, v2.4s",
        "fcvtzs v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vcvttps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvttps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frint32z v3.4s, v17.4s",
        "fcvtzs v16.4s, v3.4s",
        "frint32z v2.4s, v2.4s",
        "fcvtzs v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x5c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fsub v16.4s, v17.4s, v18.4s",
        "fsub v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x5c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fsub v16.2d, v17.2d, v18.2d",
        "fsub v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vsubss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub s0, s17, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vsubsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub d0, d17, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vminps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vminps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b00 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b",
        "fcmgt v0.4s, v3.4s, v2.4s",
        "bif v2.16b, v3.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vminpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vminpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b01 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b",
        "fcmgt v0.2d, v3.2d, v2.2d",
        "bif v2.16b, v3.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vminss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b10 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp s17, s18",
        "fcsel s0, s17, s18, mi",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vminsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp d17, d18",
        "fcsel d0, d17, d18, mi",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdivps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdivps ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "fdiv v16.4s, v16.4s, v18.4s",
        "fdiv v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vdivps ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "fdiv v16.4s, v17.4s, v16.4s",
        "fdiv v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vdivps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fdiv v16.4s, v17.4s, v18.4s",
        "fdiv v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vdivpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdivpd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "fdiv v16.2d, v17.2d, v16.2d",
        "fdiv v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vdivpd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "fdiv v16.2d, v16.2d, v18.2d",
        "fdiv v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vdivpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fdiv v16.2d, v17.2d, v18.2d",
        "fdiv v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vdivss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv s0, s17, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdivsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv d0, d17, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaxps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaxps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b00 0x5f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b",
        "fcmgt v0.4s, v3.4s, v2.4s",
        "bit v2.16b, v3.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaxpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaxpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b01 0x5f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b",
        "fcmgt v0.2d, v3.2d, v2.2d",
        "bit v2.16b, v3.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaxss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b10 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp s17, s18",
        "fcsel s0, s17, s18, gt",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaxsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp d17, d18",
        "fcsel d0, d17, d18, gt",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckhbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x68 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckhbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x68 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip2 v16.16b, v17.16b, v18.16b",
        "zip2 v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpckhwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x69 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckhwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x69 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip2 v16.8h, v17.8h, v18.8h",
        "zip2 v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpckhdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckhdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x6a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip2 v16.4s, v17.4s, v18.4s",
        "zip2 v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpackssdw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x6b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtn v16.4h, v17.4s",
        "sqxtn2 v16.8h, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpackssdw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0x6b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqxtn v16.4h, v17.4s",
        "sqxtn2 v16.8h, v18.4s",
        "sqxtn v2.4h, v2.4s",
        "sqxtn2 v2.8h, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpcklqdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpcklqdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x6c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip1 v16.2d, v17.2d, v18.2d",
        "zip1 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpunpckhqdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpunpckhqdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x6d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "zip2 v16.2d, v17.2d, v18.2d",
        "zip2 v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmovd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovq xmm0, qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovdqa [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovdqu xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovdqu [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vhaddpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x7c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "faddp v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vhaddpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x7c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "faddp v16.2d, v17.2d, v18.2d",
        "faddp v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vhaddps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x7c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "faddp v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vhaddps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0x7c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "faddp v16.4s, v17.4s, v18.4s",
        "faddp v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vhsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x7d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.2d, v17.2d, v18.2d",
        "uzp2 v3.2d, v17.2d, v18.2d",
        "fsub v16.2d, v2.2d, v3.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vhsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0x7d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.2d, v17.2d, v18.2d",
        "uzp2 v5.2d, v17.2d, v18.2d",
        "fsub v16.2d, v4.2d, v5.2d",
        "uzp1 v4.2d, v2.2d, v3.2d",
        "uzp2 v2.2d, v2.2d, v3.2d",
        "fsub v2.2d, v4.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vhsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0x7d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v18.4s",
        "uzp2 v3.4s, v17.4s, v18.4s",
        "fsub v16.4s, v2.4s, v3.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vhsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b11 0x7d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.4s, v17.4s, v18.4s",
        "uzp2 v5.4s, v17.4s, v18.4s",
        "fsub v16.4s, v4.4s, v5.4s",
        "uzp1 v4.4s, v2.4s, v3.4s",
        "uzp2 v2.4s, v2.4s, v3.4s",
        "fsub v2.4s, v4.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vmovd dword [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "vmovq qword [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vmovdqa ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovdqa [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vmovdqu ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vmovdqu [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stp q16, q2, [x4]"
      ]
    },
    "vaddsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v18.16b, v2.16b",
        "fadd v16.2d, v17.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #3072]",
        "eor v5.16b, v18.16b, v4.16b",
        "fadd v16.2d, v17.2d, v5.2d",
        "eor v3.16b, v3.16b, v4.16b",
        "fadd v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v18.16b, v2.16b",
        "fadd v16.4s, v17.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #3040]",
        "eor v5.16b, v18.16b, v4.16b",
        "fadd v16.4s, v17.4s, v5.4s",
        "eor v3.16b, v3.16b, v4.16b",
        "fadd v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xd1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "ushl v16.8h, v17.8h, v0.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b01 0xd1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "ushl v16.8h, v17.8h, v0.8h",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "ushl v2.8h, v2.8h, v0.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xd2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrld ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b01 0xd2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "ushl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xd3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v17.2d, v0.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlq ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b01 0xd3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v17.2d, v0.2d",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "neg v0.2d, v0.2d",
        "ushl v2.2d, v2.2d, v0.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xd4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xd4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "add v16.2d, v17.2d, v18.2d",
        "add v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpmullw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xd5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mul v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmullw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xd4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mul v16.8h, v17.8h, v18.8h",
        "mul v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmovq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vpmovmskb rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3296]",
        "cmlt v3.16b, v16.16b, #0",
        "and v2.16b, v3.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "vpmovmskb rax, ymm0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #3296]",
        "cmlt v4.16b, v16.16b, #0",
        "and v4.16b, v4.16b, v3.16b",
        "addp v4.16b, v4.16b, v4.16b",
        "addp v4.16b, v4.16b, v4.16b",
        "addp v4.8b, v4.8b, v4.8b",
        "umov w20, v4.h[0]",
        "cmlt v2.16b, v2.16b, #0",
        "and v2.16b, v2.16b, v3.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w21, v2.h[0]",
        "orr x20, x20, x21, lsl #16",
        "mov w4, w20"
      ]
    },
    "vpsubusb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xd8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubusb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xd8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uqsub v16.16b, v17.16b, v18.16b",
        "uqsub v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpsubusw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xd9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubusw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xd9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uqsub v16.8h, v17.8h, v18.8h",
        "uqsub v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminub xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xda 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminub ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umin v16.16b, v17.16b, v16.16b",
        "umin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpminub ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umin v16.16b, v16.16b, v18.16b",
        "umin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpminub ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umin v16.16b, v17.16b, v18.16b",
        "umin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpand xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xdb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpand ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xdb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "and v16.16b, v17.16b, v18.16b",
        "and v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddusb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xdc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddusb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xdc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uqadd v16.16b, v17.16b, v18.16b",
        "uqadd v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddusw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddusw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xdd 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uqadd v16.8h, v17.8h, v18.8h",
        "uqadd v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxub xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxub ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umax v16.16b, v16.16b, v18.16b",
        "umax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxub ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umax v16.16b, v17.16b, v16.16b",
        "umax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxub ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umax v16.16b, v17.16b, v18.16b",
        "umax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpandn xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v18.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpandn ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xdf 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "bic v16.16b, v18.16b, v17.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpavgb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "urhadd v16.16b, v17.16b, v16.16b",
        "urhadd v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "urhadd v16.16b, v16.16b, v18.16b",
        "urhadd v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "urhadd v16.16b, v17.16b, v18.16b",
        "urhadd v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpsraw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xe1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "sshl v16.8h, v17.8h, v0.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsraw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b01 0xe1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "sshl v16.8h, v17.8h, v0.8h",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "sshl v2.8h, v2.8h, v0.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrad xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xe2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrad ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b01 0xe2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v17.4s, v0.4s",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "sshl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpavgw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "urhadd v16.8h, v17.8h, v16.8h",
        "urhadd v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "urhadd v16.8h, v16.8h, v18.8h",
        "urhadd v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpavgw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "urhadd v16.8h, v17.8h, v18.8h",
        "urhadd v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmulhuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xe4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umull2 v0.4s, v17.8h, v18.8h",
        "umull v16.4s, v17.4h, v18.4h",
        "uzp2 v16.8h, v16.8h, v0.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmulhuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xe4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umull2 v0.4s, v17.8h, v18.8h",
        "umull v16.4s, v17.4h, v18.4h",
        "uzp2 v16.8h, v16.8h, v0.8h",
        "umull2 v0.4s, v2.8h, v3.8h",
        "umull v2.4s, v2.4h, v3.4h",
        "uzp2 v2.8h, v2.8h, v0.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmulhw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xe5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smull2 v0.4s, v17.8h, v18.8h",
        "smull v16.4s, v17.4h, v18.4h",
        "uzp2 v16.8h, v16.8h, v0.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmulhw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xe5 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smull2 v0.4s, v17.8h, v18.8h",
        "smull v16.4s, v17.4h, v18.4h",
        "uzp2 v16.8h, v16.8h, v0.8h",
        "smull2 v0.4s, v2.8h, v3.8h",
        "smull v2.4s, v2.4h, v3.4h",
        "uzp2 v2.8h, v2.8h, v0.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vcvttpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvttpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frint32z v3.2d, v17.2d",
        "fcvtzs v3.2d, v3.2d",
        "xtn v3.2s, v3.2d",
        "frint32z v2.2d, v2.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v2.2s, v2.2d",
        "zip1 v16.2d, v3.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtdq2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "scvtf v16.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtdq2pd ymm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "scvtf v16.2d, v2.2d",
        "sxtl2 v2.2d, v17.4s",
        "scvtf v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vcvtpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b11 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frint32x v3.2d, v17.2d",
        "fcvtzs v3.2d, v3.2d",
        "xtn v3.2s, v3.2d",
        "frint32x v2.2d, v2.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v2.2s, v2.2d",
        "zip1 v16.2d, v3.2d, v2.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovntdq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovntdq [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    },
    "vpsubsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xe8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqsub v16.16b, v17.16b, v18.16b",
        "sqsub v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpsubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xe9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqsub v16.8h, v17.8h, v18.8h",
        "sqsub v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xea 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminsw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smin v16.8h, v17.8h, v16.8h",
        "smin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smin v16.8h, v16.8h, v18.8h",
        "smin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smin v16.8h, v17.8h, v18.8h",
        "smin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpor xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xeb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpor ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xeb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "orr v16.16b, v17.16b, v18.16b",
        "orr v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xec 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xec 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqadd v16.16b, v17.16b, v18.16b",
        "sqadd v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xed 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xed 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqadd v16.8h, v17.8h, v18.8h",
        "sqadd v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xee 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxsw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smax v16.8h, v17.8h, v16.8h",
        "smax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smax v16.8h, v16.8h, v18.8h",
        "smax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smax v16.8h, v17.8h, v18.8h",
        "smax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpxor xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xef 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpxor ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xef 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "eor v16.16b, v17.16b, v18.16b",
        "eor v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpxor xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0xef 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpxor ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0xef 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vlddqu xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xf0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vlddqu ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xf0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldp q16, q2, [x4]",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "ushl v16.8h, v17.8h, v0.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 1 0b01 0xf1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "ushl v16.8h, v17.8h, v0.8h",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "ushl v2.8h, v2.8h, v0.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpslld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "ushl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpslld ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 1 0b01 0xf2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "ushl v16.4s, v17.4s, v0.4s",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "ushl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "ushl v16.2d, v17.2d, v0.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllq ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 1 0b01 0xf3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "ushl v16.2d, v17.2d, v0.2d",
        "uqshl d0, d18, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "ushl v2.2d, v2.2d, v0.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpmuludq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v17.4s",
        "uzp1 v3.4s, v18.4s, v18.4s",
        "umull v16.2d, v2.2s, v3.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmuludq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xf4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.4s, v17.4s, v17.4s",
        "uzp1 v5.4s, v18.4s, v18.4s",
        "umull v16.2d, v4.2s, v5.2s",
        "uzp1 v2.4s, v2.4s, v2.4s",
        "uzp1 v3.4s, v3.4s, v3.4s",
        "umull v2.2d, v2.2s, v3.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaddwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smull v2.4s, v17.4h, v18.4h",
        "smull2 v3.4s, v17.8h, v18.8h",
        "addp v16.4s, v2.4s, v3.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaddwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xf5 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smull v4.4s, v17.4h, v18.4h",
        "smull2 v5.4s, v17.8h, v18.8h",
        "addp v16.4s, v4.4s, v5.4s",
        "smull v4.4s, v2.4h, v3.4h",
        "smull2 v2.4s, v2.8h, v3.8h",
        "addp v2.4s, v4.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsadbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0xf6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uabdl v2.8h, v17.8b, v18.8b",
        "uabdl2 v3.8h, v17.16b, v18.16b",
        "addv h2, v2.8h",
        "addv h3, v3.8h",
        "zip1 v16.2d, v2.2d, v3.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsadbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 1 0b01 0xf6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uabdl v4.8h, v17.8b, v18.8b",
        "uabdl2 v5.8h, v17.16b, v18.16b",
        "addv h4, v4.8h",
        "addv h5, v5.8h",
        "zip1 v16.2d, v4.2d, v5.2d",
        "uabdl v4.8h, v2.8b, v3.8b",
        "uabdl2 v2.8h, v2.16b, v3.16b",
        "addv h3, v4.8h",
        "addv h2, v2.8h",
        "zip1 v2.2d, v3.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmlt v2.16b, v17.16b, #0",
        "ldr q3, [x11]",
        "bsl v2.16b, v16.16b, v3.16b",
        "str q2, [x11]"
      ]
    },
    "vpsubb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xf8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sub v16.16b, v17.16b, v18.16b",
        "sub v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpsubw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xf9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sub v16.8h, v17.8h, v18.8h",
        "sub v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpsubd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xfa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xfa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sub v16.4s, v17.4s, v18.4s",
        "sub v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsubq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xfb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsubq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xfb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sub v16.2d, v17.2d, v18.2d",
        "sub v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xfc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xfc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "add v16.16b, v17.16b, v18.16b",
        "add v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xfd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xfd 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "add v16.8h, v17.8h, v18.8h",
        "add v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpaddd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xfe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpaddd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xfe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "add v16.4s, v17.4s, v18.4s",
        "add v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map1_FCMA.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP",
      "FCMA"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vaddsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v2.16b, v18.16b, v18.16b, #8",
        "fcadd v16.2d, v17.2d, v2.2d, #90",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddsubpd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "ext v4.16b, v18.16b, v18.16b, #8",
        "fcadd v16.2d, v16.2d, v4.2d, #90",
        "ext v3.16b, v3.16b, v3.16b, #8",
        "fcadd v2.2d, v2.2d, v3.2d, #90",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "ext v4.16b, v16.16b, v16.16b, #8",
        "fcadd v16.2d, v17.2d, v4.2d, #90",
        "ext v3.16b, v3.16b, v3.16b, #8",
        "fcadd v2.2d, v2.2d, v3.2d, #90",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ext v4.16b, v18.16b, v18.16b, #8",
        "fcadd v16.2d, v17.2d, v4.2d, #90",
        "ext v3.16b, v3.16b, v3.16b, #8",
        "fcadd v2.2d, v2.2d, v3.2d, #90",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v18.4s",
        "fcadd v16.4s, v17.4s, v2.4s, #90",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "rev64 v4.4s, v16.4s",
        "fcadd v16.4s, v17.4s, v4.4s, #90",
        "rev64 v3.4s, v3.4s",
        "fcadd v2.4s, v2.4s, v3.4s, #90",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubps ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "rev64 v4.4s, v18.4s",
        "fcadd v16.4s, v16.4s, v4.4s, #90",
        "rev64 v3.4s, v3.4s",
        "fcadd v2.4s, v2.4s, v3.4s, #90",
        "str q2, [x28, #192]"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "rev64 v4.4s, v18.4s",
        "fcadd v16.4s, v17.4s, v4.4s, #90",
        "rev64 v3.4s, v3.4s",
        "fcadd v2.4s, v2.4s, v3.4s, #90",
        "str q2, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map1_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVE256"
    ]
  },
  "Instructions": {
    "vmovntps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntps [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    },
    "vmovntpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntpd [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    },
    "vmovntdq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntdq [rax], ymm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "stnp q16, q2, [x4]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map1_flagm.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "vucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vcomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vcomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map2.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "vpshufb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x00 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.16b, #0x8f",
        "and v2.16b, v18.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpshufb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.16b, #0x8f",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "and v5.16b, v18.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v5.16b",
        "and v2.16b, v4.16b, v2.16b",
        "tbl v2.16b, {v3.16b}, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vphaddw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x01 128-bit"
      ],
      "ExpectedArm64ASM": [
        "addp v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphaddw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "addp v16.8h, v17.8h, v18.8h",
        "addp v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vphaddd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "addp v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphaddd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "addp v16.4s, v17.4s, v18.4s",
        "addp v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vphaddsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sqadd v16.8h, v2.8h, v3.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphaddsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.8h, v17.8h, v18.8h",
        "uzp2 v5.8h, v17.8h, v18.8h",
        "sqadd v16.8h, v4.8h, v5.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sqadd v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaddubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x04 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "sxtl v3.8h, v18.8b",
        "mul v2.8h, v2.8h, v3.8h",
        "uxtl2 v3.8h, v17.16b",
        "sxtl2 v4.8h, v18.16b",
        "mul v3.8h, v3.8h, v4.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sqadd v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaddubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x04 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uxtl v4.8h, v17.8b",
        "sxtl v5.8h, v18.8b",
        "mul v4.8h, v4.8h, v5.8h",
        "uxtl2 v5.8h, v17.16b",
        "sxtl2 v6.8h, v18.16b",
        "mul v5.8h, v5.8h, v6.8h",
        "uzp1 v6.8h, v4.8h, v5.8h",
        "uzp2 v4.8h, v4.8h, v5.8h",
        "sqadd v16.8h, v6.8h, v4.8h",
        "uxtl v4.8h, v2.8b",
        "sxtl v5.8h, v3.8b",
        "mul v4.8h, v4.8h, v5.8h",
        "uxtl2 v2.8h, v2.16b",
        "sxtl2 v3.8h, v3.16b",
        "mul v2.8h, v2.8h, v3.8h",
        "uzp1 v3.8h, v4.8h, v2.8h",
        "uzp2 v2.8h, v4.8h, v2.8h",
        "sqadd v2.8h, v3.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vphsubw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sub v16.8h, v2.8h, v3.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphsubw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.8h, v17.8h, v18.8h",
        "uzp2 v5.8h, v17.8h, v18.8h",
        "sub v16.8h, v4.8h, v5.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sub v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vphsubd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x06 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v18.4s",
        "uzp2 v3.4s, v17.4s, v18.4s",
        "sub v16.4s, v2.4s, v3.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphsubd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.4s, v17.4s, v18.4s",
        "uzp2 v5.4s, v17.4s, v18.4s",
        "sub v16.4s, v4.4s, v5.4s",
        "uzp1 v4.4s, v2.4s, v3.4s",
        "uzp2 v2.4s, v2.4s, v3.4s",
        "sub v2.4s, v4.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vphsubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x07 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sqsub v16.8h, v2.8h, v3.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vphsubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x07 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.8h, v17.8h, v18.8h",
        "uzp2 v5.8h, v17.8h, v18.8h",
        "sqsub v16.8h, v4.8h, v5.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sqsub v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpsignb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.16b, v18.16b, #7",
        "srshr v2.16b, v2.16b, #7",
        "mul v16.16b, v17.16b, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsignb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqshl v4.16b, v18.16b, #7",
        "srshr v4.16b, v4.16b, #7",
        "mul v16.16b, v17.16b, v4.16b",
        "sqshl v3.16b, v3.16b, #7",
        "srshr v3.16b, v3.16b, #7",
        "mul v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpsignw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.8h, v18.8h, #15",
        "srshr v2.8h, v2.8h, #15",
        "mul v16.8h, v17.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsignw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqshl v4.8h, v18.8h, #15",
        "srshr v4.8h, v4.8h, #15",
        "mul v16.8h, v17.8h, v4.8h",
        "sqshl v3.8h, v3.8h, #15",
        "srshr v3.8h, v3.8h, #15",
        "mul v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpsignd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.4s, v18.4s, #31",
        "srshr v2.4s, v2.4s, #31",
        "mul v16.4s, v17.4s, v2.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsignd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x0a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqshl v4.4s, v18.4s, #31",
        "srshr v4.4s, v4.4s, #31",
        "mul v16.4s, v17.4s, v4.4s",
        "sqshl v3.4s, v3.4s, #31",
        "srshr v3.4s, v3.4s, #31",
        "mul v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmulhrsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smull v2.4s, v17.4h, v18.4h",
        "smull2 v3.4s, v17.8h, v18.8h",
        "sshr v2.4s, v2.4s, #14",
        "sshr v3.4s, v3.4s, #14",
        "movi v4.4s, #0x1",
        "add v2.4s, v2.4s, v4.4s",
        "add v3.4s, v3.4s, v4.4s",
        "shrn v2.4h, v2.4s, #1",
        "mov v0.16b, v2.16b",
        "shrn2 v0.8h, v3.4s, #1",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmulhrsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x0b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smull v4.4s, v17.4h, v18.4h",
        "smull2 v5.4s, v17.8h, v18.8h",
        "sshr v4.4s, v4.4s, #14",
        "sshr v5.4s, v5.4s, #14",
        "movi v6.4s, #0x1",
        "add v4.4s, v4.4s, v6.4s",
        "add v5.4s, v5.4s, v6.4s",
        "shrn v4.4h, v4.4s, #1",
        "mov v0.16b, v4.16b",
        "shrn2 v0.8h, v5.4s, #1",
        "mov v16.16b, v0.16b",
        "smull v4.4s, v2.4h, v3.4h",
        "smull2 v2.4s, v2.8h, v3.8h",
        "sshr v4.4s, v4.4s, #14",
        "sshr v2.4s, v2.4s, #14",
        "movi v3.4s, #0x1",
        "add v4.4s, v4.4s, v3.4s",
        "add v2.4s, v2.4s, v3.4s",
        "shrn v4.4h, v4.4s, #1",
        "shrn2 v4.8h, v2.4s, #1",
        "str q4, [x28, #192]"
      ]
    },
    "vpermilps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.4s, #0x3",
        "and v2.16b, v18.16b, v2.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "shl v2.16b, v2.16b, #2",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "dup v3.4s, w20",
        "add v2.16b, v3.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v4.4s, #0x3",
        "and v4.16b, v18.16b, v4.16b",
        "trn1 v4.16b, v4.16b, v4.16b",
        "trn1 v4.8h, v4.8h, v4.8h",
        "shl v4.16b, v4.16b, #2",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "dup v5.4s, w20",
        "add v4.16b, v5.16b, v4.16b",
        "tbl v16.16b, {v17.16b}, v4.16b",
        "movi v4.4s, #0x3",
        "and v3.16b, v3.16b, v4.16b",
        "trn1 v3.16b, v3.16b, v3.16b",
        "trn1 v3.8h, v3.8h, v3.8h",
        "shl v3.16b, v3.16b, #2",
        "dup v4.4s, w20",
        "add v3.16b, v4.16b, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v2.2d, v18.2d, #1",
        "mov w0, #0x1",
        "dup v3.2d, x0",
        "and v2.16b, v2.16b, v3.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "trn1 v2.4s, v2.4s, v2.4s",
        "shl v2.16b, v2.16b, #3",
        "mov x20, #0x100",
        "movk x20, #0x302, lsl #16",
        "movk x20, #0x504, lsl #32",
        "movk x20, #0x706, lsl #48",
        "dup v3.2d, x20",
        "add v2.16b, v3.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "Map 2 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ushr v4.2d, v18.2d, #1",
        "mov w0, #0x1",
        "dup v5.2d, x0",
        "and v4.16b, v4.16b, v5.16b",
        "trn1 v4.16b, v4.16b, v4.16b",
        "trn1 v4.8h, v4.8h, v4.8h",
        "trn1 v4.4s, v4.4s, v4.4s",
        "shl v4.16b, v4.16b, #3",
        "mov x20, #0x100",
        "movk x20, #0x302, lsl #16",
        "movk x20, #0x504, lsl #32",
        "movk x20, #0x706, lsl #48",
        "dup v5.2d, x20",
        "add v4.16b, v5.16b, v4.16b",
        "tbl v16.16b, {v17.16b}, v4.16b",
        "ushr v3.2d, v3.2d, #1",
        "mov w0, #0x1",
        "dup v4.2d, x0",
        "and v3.16b, v3.16b, v4.16b",
        "trn1 v3.16b, v3.16b, v3.16b",
        "trn1 v3.8h, v3.8h, v3.8h",
        "trn1 v3.4s, v3.4s, v3.4s",
        "shl v3.16b, v3.16b, #3",
        "dup v4.2d, x20",
        "add v3.16b, v4.16b, v3.16b",
        "tbl v2.16b, {v2.16b}, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vtestps xmm0, xmm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "dup v2.4s, w20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestps ymm0, ymm1": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x0e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v17.16b, v16.16b",
        "and v5.16b, v3.16b, v2.16b",
        "ushr v4.4s, v4.4s, #31",
        "ushr v5.4s, v5.4s, #31",
        "add v4.4s, v5.4s, v4.4s",
        "addv s4, v4.4s",
        "mov w20, v4.s[0]",
        "bic v4.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "ushr v4.4s, v4.4s, #31",
        "ushr v2.4s, v2.4s, #31",
        "add v2.4s, v2.4s, v4.4s",
        "addv s2, v2.4s",
        "mov w21, v2.s[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestpd xmm0, xmm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "dup v2.2d, x20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestpd ymm0, ymm1": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v17.16b, v16.16b",
        "and v5.16b, v3.16b, v2.16b",
        "ushr v4.2d, v4.2d, #63",
        "ushr v5.2d, v5.2d, #63",
        "add v4.2d, v5.2d, v4.2d",
        "addp v4.2d, v4.2d, v4.2d",
        "mov x20, v4.d[0]",
        "bic v4.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "ushr v4.2d, v4.2d, #63",
        "ushr v2.2d, v2.2d, #63",
        "add v2.2d, v2.2d, v4.2d",
        "addp v2.2d, v2.2d, v2.2d",
        "mov x21, v2.d[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vcvtph2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtl v16.4s, v17.4h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtph2ps ymm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x13 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtl v16.4s, v17.4h",
        "fcvtl2 v2.4s, v17.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpermps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v4.4s, #0x7",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "dup v5.4s, w20",
        "and v6.16b, v17.16b, v4.16b",
        "trn1 v6.16b, v6.16b, v6.16b",
        "trn1 v6.8h, v6.8h, v6.8h",
        "shl v6.16b, v6.16b, #2",
        "add v6.16b, v6.16b, v5.16b",
        "mov v0.16b, v18.16b",
        "mov v1.16b, v3.16b",
        "tbl v16.16b, {v0.16b, v1.16b}, v6.16b",
        "and v2.16b, v2.16b, v4.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "shl v2.16b, v2.16b, #2",
        "add v2.16b, v2.16b, v5.16b",
        "mov v0.16b, v18.16b",
        "mov v1.16b, v3.16b",
        "tbl v2.16b, {v0.16b, v1.16b}, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vptest xmm0, xmm1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vptest ymm0, ymm1": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v16.16b, v17.16b",
        "bic v5.16b, v17.16b, v16.16b",
        "and v6.16b, v2.16b, v3.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "umax v3.8h, v4.8h, v6.8h",
        "umax v2.8h, v5.8h, v2.8h",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vbroadcastss xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x18 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vbroadcastss ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vbroadcastsd ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.2d}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vbroadcastf128 ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x1a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vpabsb xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x1c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpabsb ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x1c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "abs v16.16b, v17.16b",
        "abs v2.16b, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpabsw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x1d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.8h, v17.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpabsw ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x1d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "abs v16.8h, v17.8h",
        "abs v2.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpabsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x1e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpabsd ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x1e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "abs v16.4s, v17.4s",
        "abs v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxbw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.8h, v17.8b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxbw ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x20 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "sxtl v16.8h, v17.8b",
        "sxtl v2.8h, v2.8b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxbd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.8h, v17.8b",
        "sxtl v16.4s, v2.4h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxbd ymm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x21 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "sxtl v3.8h, v17.8b",
        "sxtl v16.4s, v3.4h",
        "sxtl v2.8h, v2.8b",
        "sxtl v2.4s, v2.4h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxbq xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.8h, v17.8b",
        "sxtl v2.4s, v2.4h",
        "sxtl v16.2d, v2.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxbq ymm0, xmm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x22 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[1]",
        "sxtl v3.8h, v17.8b",
        "sxtl v3.4s, v3.4h",
        "sxtl v16.2d, v3.2s",
        "sxtl v2.8h, v2.8b",
        "sxtl v2.4s, v2.4h",
        "sxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxwd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x23 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.4s, v17.4h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxwd ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x23 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "sxtl v16.4s, v17.4h",
        "sxtl v2.4s, v2.4h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxwq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x24 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.4s, v17.4h",
        "sxtl v16.2d, v2.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxwq ymm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x24 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "sxtl v3.4s, v17.4h",
        "sxtl v16.2d, v3.2s",
        "sxtl v2.4s, v2.4h",
        "sxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovsxdq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x25 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.2d, v17.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovsxdq ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x25 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "sxtl v16.2d, v17.2s",
        "sxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmuldq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v17.4s",
        "uzp1 v3.4s, v18.4s, v18.4s",
        "smull v16.2d, v2.2s, v3.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmuldq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "uzp1 v4.4s, v17.4s, v17.4s",
        "uzp1 v5.4s, v18.4s, v18.4s",
        "smull v16.2d, v4.2s, v5.2s",
        "uzp1 v2.4s, v2.4s, v2.4s",
        "uzp1 v3.4s, v3.4s, v3.4s",
        "smull v2.2d, v2.2s, v3.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpeqq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpeqq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmeq v16.2d, v17.2d, v18.2d",
        "cmeq v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vmovntdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x2a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovntdqa ymm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x2a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "ldr q2, [x4, #16]",
        "str q2, [x28, #192]"
      ]
    },
    "vpackusdw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x2b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtun v16.4h, v17.4s",
        "sqxtun2 v16.8h, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpackusdw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x2b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "sqxtun v16.4h, v17.4s",
        "sqxtun2 v16.8h, v18.4s",
        "sqxtun v2.4h, v2.4s",
        "sqxtun2 v2.8h, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "Map 2 0b01 0x2c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x2d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovps [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]"
      ]
    },
    "vmaskmovps [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "Map 2 0b01 0x2e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[3], [x1]"
      ]
    },
    "vmaskmovpd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]"
      ]
    },
    "vmaskmovpd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x2f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[1], [x1]"
      ]
    },
    "vpmovzxbw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x30 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.8h, v17.8b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxbw ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x30 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "uxtl v16.8h, v17.8b",
        "uxtl v2.8h, v2.8b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovzxbd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x31 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v16.4s, v2.4h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxbd ymm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x31 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "uxtl v3.8h, v17.8b",
        "uxtl v16.4s, v3.4h",
        "uxtl v2.8h, v2.8b",
        "uxtl v2.4s, v2.4h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovzxbq xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x32 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v2.4s, v2.4h",
        "uxtl v16.2d, v2.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxbq ymm0, xmm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x32 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[1]",
        "uxtl v3.8h, v17.8b",
        "uxtl v3.4s, v3.4h",
        "uxtl v16.2d, v3.2s",
        "uxtl v2.8h, v2.8b",
        "uxtl v2.4s, v2.4h",
        "uxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovzxwd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x33 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.4s, v17.4h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxwd ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x33 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "uxtl v16.4s, v17.4h",
        "uxtl v2.4s, v2.4h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovzxwq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x34 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.4s, v17.4h",
        "uxtl v16.2d, v2.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxwq ymm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x34 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "uxtl v3.4s, v17.4h",
        "uxtl v16.2d, v3.2s",
        "uxtl v2.4s, v2.4h",
        "uxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmovzxdq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x35 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.2d, v17.2s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmovzxdq ymm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x35 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "uxtl v16.2d, v17.2s",
        "uxtl v2.2d, v2.2s",
        "str q2, [x28, #192]"
      ]
    },
    "vpermd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x36 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v4.4s, #0x7",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "dup v5.4s, w20",
        "and v6.16b, v17.16b, v4.16b",
        "trn1 v6.16b, v6.16b, v6.16b",
        "trn1 v6.8h, v6.8h, v6.8h",
        "shl v6.16b, v6.16b, #2",
        "add v6.16b, v6.16b, v5.16b",
        "mov v0.16b, v18.16b",
        "mov v1.16b, v3.16b",
        "tbl v16.16b, {v0.16b, v1.16b}, v6.16b",
        "and v2.16b, v2.16b, v4.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "shl v2.16b, v2.16b, #2",
        "add v2.16b, v2.16b, v5.16b",
        "mov v0.16b, v18.16b",
        "mov v1.16b, v3.16b",
        "tbl v2.16b, {v0.16b, v1.16b}, v2.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpcmpgtq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x37 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpcmpgtq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x37 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "cmgt v16.2d, v17.2d, v18.2d",
        "cmgt v2.2d, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x38 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminsb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smin v16.16b, v17.16b, v16.16b",
        "smin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smin v16.16b, v16.16b, v18.16b",
        "smin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smin v16.16b, v17.16b, v18.16b",
        "smin v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x39 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminsd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smin v16.4s, v17.4s, v16.4s",
        "smin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smin v16.4s, v16.4s, v18.4s",
        "smin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpminsd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smin v16.4s, v17.4s, v18.4s",
        "smin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpminuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminuw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umin v16.8h, v17.8h, v16.8h",
        "umin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminuw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umin v16.8h, v16.8h, v18.8h",
        "umin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umin v16.8h, v17.8h, v18.8h",
        "umin v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpminud xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpminud ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umin v16.4s, v17.4s, v16.4s",
        "umin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpminud ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umin v16.4s, v16.4s, v18.4s",
        "umin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpminud ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umin v16.4s, v17.4s, v18.4s",
        "umin v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.16b, v17.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxsb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smax v16.16b, v16.16b, v18.16b",
        "smax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smax v16.16b, v17.16b, v16.16b",
        "smax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smax v16.16b, v17.16b, v18.16b",
        "smax v2.16b, v2.16b, v3.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxsd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "smax v16.4s, v17.4s, v16.4s",
        "smax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "smax v16.4s, v16.4s, v18.4s",
        "smax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxsd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "smax v16.4s, v17.4s, v18.4s",
        "smax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.8h, v17.8h, v18.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxuw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umax v16.8h, v17.8h, v16.8h",
        "umax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxuw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umax v16.8h, v16.8h, v18.8h",
        "umax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umax v16.8h, v17.8h, v18.8h",
        "umax v2.8h, v2.8h, v3.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxud xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaxud ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "umax v16.4s, v16.4s, v18.4s",
        "umax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxud ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #192]",
        "umax v16.4s, v17.4s, v16.4s",
        "umax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaxud ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "umax v16.4s, v17.4s, v18.4s",
        "umax v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpmulld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mul v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmulld ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x40 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mul v16.4s, v17.4s, v18.4s",
        "mul v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vphminposuw xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x41 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3008]",
        "zip1 v3.8h, v2.8h, v17.8h",
        "zip2 v2.8h, v2.8h, v17.8h",
        "umin v2.4s, v3.4s, v2.4s",
        "uminv s2, v2.4s",
        "rev32 v16.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlvd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x45 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlvd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x45 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v3.4s",
        "neg v0.4s, v0.4s",
        "ushl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlvq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x45 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v17.2d, v0.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlvq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x45 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v17.2d, v0.2d",
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v3.2d, v0.2d",
        "bif v0.16b, v3.16b, v1.16b",
        "neg v0.2d, v0.2d",
        "ushl v2.2d, v2.2d, v0.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpsravd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x46 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x1f",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsravd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v0.4s, #0x1f",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v17.4s, v0.4s",
        "movi v0.4s, #0x1f",
        "umin v0.4s, v0.4s, v3.4s",
        "neg v0.4s, v0.4s",
        "sshl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllvd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x47 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllvd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x47 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "ushl v16.4s, v17.4s, v0.4s",
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v3.4s",
        "ushl v2.4s, v2.4s, v0.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllvq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x47 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "ushl v16.2d, v17.2d, v0.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllvq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x47 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "ushl v16.2d, v17.2d, v0.2d",
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v3.2d, v0.2d",
        "bif v0.16b, v3.16b, v1.16b",
        "ushl v2.2d, v2.2d, v0.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vpbroadcastd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastd ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastd ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastq xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.2d}, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastq ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastq ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x59 256-bit"
      ],
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ld1r {v16.2d}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vbroadcasti128 ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x5a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastb xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x78 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.16b, v17.b[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastb xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x78 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.16b}, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastb ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x78 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.16b, v17.b[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastb ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x78 256-bit"
      ],
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ld1r {v16.16b}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x79 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.8h, v17.h[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastw xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x79 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.8h}, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpbroadcastw ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x79 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.8h, v17.h[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpbroadcastw ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x79 256-bit"
      ],
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ld1r {v16.8h}, [x4]",
        "str q16, [x28, #192]"
      ]
    },
    "vpmaskmovd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaskmovd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaskmovq xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaskmovq ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaskmovd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]"
      ]
    },
    "vpmaskmovd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[3], [x1]"
      ]
    },
    "vpmaskmovq [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]"
      ]
    },
    "vpmaskmovq [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[1], [x1]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 46,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.s}[3], [x1]",
        "mov w0, v4.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v4.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v4.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v4.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v3.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #2",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v3.d[0]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v3.d[1]",
        "tbz x0, #63, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov x0, v18.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[0], [x1]",
        "mov x0, v18.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v16.d}[1], [x1]",
        "mov x0, v4.d[0]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.d}[0], [x1]",
        "mov x0, v4.d[1]",
        "tbz x0, #63, #+0x10",
        "mov x0, v3.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.d}[1], [x1]",
        "movi v18.2d, #0x0",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vfmaddsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmaddsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsubadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsubadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.4s, v17.4s",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.4s, v17.4s",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fneg v3.4s, v3.4s",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.2d, v17.2d",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.2d, v17.2d",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fneg v3.2d, v3.2d",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fmls v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fmls v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.4s, v17.4s",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.4s, v17.4s",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fneg v3.4s, v3.4s",
        "fmls v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.2d, v17.2d",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.2d, v17.2d",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fneg v3.2d, v3.2d",
        "fmls v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.4s, v18.4s",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.4s, v18.4s",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fneg v4.4s, v4.4s",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.2d, v18.2d",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.2d, v18.2d",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fneg v4.2d, v4.2d",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fmls v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fmls v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.4s, v18.4s",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.4s, v18.4s",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fneg v4.4s, v4.4s",
        "fmls v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v0.2d, v18.2d",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v0.2d, v18.2d",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fneg v4.2d, v4.2d",
        "fmls v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmla v16.4s, v17.4s, v18.4s",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmla v16.2d, v17.2d, v18.2d",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v16.4s, v16.4s",
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v16.4s, v16.4s",
        "fmla v16.4s, v17.4s, v18.4s",
        "fneg v2.4s, v2.4s",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v16.2d, v16.2d",
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v16.2d, v16.2d",
        "fmla v16.2d, v17.2d, v18.2d",
        "fneg v2.2d, v2.2d",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmls v16.4s, v17.4s, v18.4s",
        "fmls v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmls v16.2d, v17.2d, v18.2d",
        "fmls v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v16.4s, v16.4s",
        "fmls v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v16.4s, v16.4s",
        "fmls v16.4s, v17.4s, v18.4s",
        "fneg v2.4s, v2.4s",
        "fmls v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fneg v16.2d, v16.2d",
        "fmls v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fneg v16.2d, v16.2d",
        "fmls v16.2d, v17.2d, v18.2d",
        "fneg v2.2d, v2.2d",
        "fmls v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmaddsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsubadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsubadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmaddsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmaddsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsubadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsubadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vaesimc xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xdb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "aesimc v16.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaesenc xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xdc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "aesmc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaesenc ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0xdc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "aesmc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "mov v0.16b, v3.16b",
        "aese v0.16b, v2.16b",
        "aesmc v0.16b, v0.16b",
        "eor v2.16b, v0.16b, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vaesenclast xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaesenclast ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xdd 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "mov v0.16b, v3.16b",
        "aese v0.16b, v2.16b",
        "eor v2.16b, v0.16b, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vaesdec xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xde 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "aesimc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaesdec ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "aesimc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "mov v0.16b, v3.16b",
        "aesd v0.16b, v2.16b",
        "aesimc v0.16b, v0.16b",
        "eor v2.16b, v0.16b, v4.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vaesdeclast xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaesdeclast ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xdf 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b",
        "mov v0.16b, v3.16b",
        "aesd v0.16b, v2.16b",
        "eor v2.16b, v0.16b, v4.16b",
        "str q2, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map2_AFP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vfmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s16, s17, s18, s16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d16, d17, d18, d16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s16, s17, s18, s16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d16, d17, d18, d16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s16, s17, s18, s16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d16, d17, d18, d16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s16, s17, s18, s16",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d16, d17, d18, d16",
        "stp xzr, xzr, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map2_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVE256"
    ]
  },
  "Instructions": {
    "vmovntdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x2a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldnt1b {z16.b}, p6/z, [x4]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmovntdqa ymm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x2a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldnt1b {z16.b}, p6/z, [x4]",
        "ldnt1b {z2.b}, p6/z, [x4, #1, mul vl]",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x2c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "cmplt p0.s, p6/z, z2.s, #0",
        "ld1w {z2.s}, p0/z, [x4, #1, mul vl]",
        "msr nzcv, x20",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x2d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "cmplt p0.d, p6/z, z2.d, #0",
        "ld1d {z2.d}, p0/z, [x4, #1, mul vl]",
        "msr nzcv, x20",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovps [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x2e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "cmplt p0.s, p6/z, z2.s, #0",
        "st1w {z3.s}, p0, [x4, #1, mul vl]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x2f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "cmplt p0.d, p6/z, z2.d, #0",
        "st1d {z3.d}, p0, [x4, #1, mul vl]",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #1",
        "sshll v3.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #3",
        "sshll v3.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #1",
        "sshll v6.2d, v17.2s, #1",
        "sshll2 v7.2d, v3.4s, #1",
        "sshll v3.2d, v3.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw #2]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #3",
        "sshll v6.2d, v17.2s, #3",
        "sshll2 v7.2d, v3.4s, #3",
        "sshll v3.2d, v3.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d, lsl #2]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #1",
        "shl v2.2d, v2.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d, lsl #2]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d, lsl #2]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #3",
        "shl v2.2d, v2.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #1",
        "shl v3.2d, v3.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #2",
        "shl v3.2d, v3.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d, lsl #3]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #1",
        "sshll v3.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #3",
        "sshll v3.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #1",
        "sshll v6.2d, v17.2s, #1",
        "sshll2 v7.2d, v3.4s, #1",
        "sshll v3.2d, v3.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z3.s, sxtw #2]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #3",
        "sshll v6.2d, v17.2s, #3",
        "sshll2 v7.2d, v3.4s, #3",
        "sshll v3.2d, v3.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d, lsl #2]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #1",
        "shl v2.2d, v2.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z17.d, lsl #2]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d, lsl #2]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #3",
        "shl v2.2d, v2.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [x4, z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [x4, z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #1",
        "shl v3.2d, v3.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #2",
        "shl v3.2d, v3.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z3.d, lsl #3]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #1",
        "sshll v3.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #3",
        "sshll v3.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*1], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [z17.s]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [z3.s]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*2], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #1",
        "sshll v6.2d, v17.2s, #1",
        "sshll2 v7.2d, v3.4s, #1",
        "sshll v3.2d, v3.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*4], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z3.s, sxtw #2]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdd ymm0, [ymm1*8], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #3",
        "sshll v6.2d, v17.2s, #3",
        "sshll2 v7.2d, v3.4s, #3",
        "sshll v3.2d, v3.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*1], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*2], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*4], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherdq ymm0, [xmm1*8], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z17.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*1], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z17.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*2], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #1",
        "shl v2.2d, v2.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*4], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #2",
        "shl v2.2d, v2.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqd xmm0, [ymm1*8], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #3",
        "shl v2.2d, v2.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*1], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z17.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*2], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #1",
        "shl v3.2d, v3.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*4], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #2",
        "shl v3.2d, v3.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vpgatherqq ymm0, [ymm1*8], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z3.d, lsl #3]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [z17.s]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #1",
        "sshll v3.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll2 v2.2d, v17.4s, #3",
        "sshll v3.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*1], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [z17.s]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "ld1w {z0.s}, p0/z, [z3.s]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*2], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #1",
        "sshll v6.2d, v17.2s, #1",
        "sshll2 v7.2d, v3.4s, #1",
        "sshll v3.2d, v3.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*4], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "cmplt p0.s, p6/z, z4.s, #0",
        "mov w0, #0x0",
        "ld1w {z0.s}, p0/z, [x0, z3.s, sxtw #2]",
        "mov z2.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdps ymm0, [ymm1*8], ymm2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "sshll2 v5.2d, v17.4s, #3",
        "sshll v6.2d, v17.2s, #3",
        "sshll2 v7.2d, v3.4s, #3",
        "sshll v3.2d, v3.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z6.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z5.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "cmplt p0.s, p6/z, z4.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z7.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z2.s, p0/m, z0.s",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*1], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*2], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*4], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherdpd ymm0, [xmm1*8], ymm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #224]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [z4.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z17.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z2.d]",
        "xtn v0.2s, v0.2d",
        "sel z2.s, p0, z0.s, z16.s",
        "movi v18.2d, #0x0",
        "zip1 v16.2d, v2.2d, v18.2d",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*1], xmm2": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z17.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*2], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #1",
        "shl v2.2d, v2.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*4], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #2",
        "shl v2.2d, v2.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqps xmm0, [ymm1*8], xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v3.2d, v17.2d, #3",
        "shl v2.2d, v2.2d, #3",
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "punpklo p1.h, p0.b",
        "ld1w {z0.d}, p1/z, [z3.d]",
        "punpkhi p1.h, p0.b",
        "ld1w {z1.d}, p1/z, [z2.d]",
        "uzp1 v0.4s, v0.4s, v1.4s",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*1], xmm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*2], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*4], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z2.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd xmm0, [xmm1*8], xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*1], ymm2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z17.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*2], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #1",
        "shl v3.2d, v3.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*4], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "shl v5.2d, v17.2d, #2",
        "shl v3.2d, v3.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [z5.d]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "ld1d {z0.d}, p0/z, [z3.d]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vgatherqpd ymm0, [ymm1*8], ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "cmplt p0.d, p6/z, z4.d, #0",
        "mov w0, #0x0",
        "ld1d {z0.d}, p0/z, [x0, z3.d, lsl #3]",
        "mov z2.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20",
        "stp xzr, xzr, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vfmaddsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmaddsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsubadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsubadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v17.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v17.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "eor v3.16b, v3.16b, v5.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmla v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fmla v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmla v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fmla v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmls z0.s, p6/m, z16.s, z18.s",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z17.d",
        "fnmls z0.s, p6/m, z16.s, z18.s",
        "mov z16.d, z0.d",
        "fnmls z3.s, p6/m, z2.s, z4.s",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmls z0.d, p6/m, z16.d, z18.d",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z17.d",
        "fnmls z0.d, p6/m, z16.d, z18.d",
        "mov z16.d, z0.d",
        "fnmls z3.d, p6/m, z2.d, z4.d",
        "str q3, [x28, #192]"
      ]
    },
    "vfmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmls v0.4s, v16.4s, v18.4s",
        "mov v16.16b, v0.16b",
        "fmls v3.4s, v2.4s, v4.4s",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v17.16b",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v17.16b",
        "fmls v0.2d, v16.2d, v18.2d",
        "mov v16.16b, v0.16b",
        "fmls v3.2d, v2.2d, v4.2d",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmla z0.s, p6/m, z16.s, z18.s",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z17.d",
        "fnmla z0.s, p6/m, z16.s, z18.s",
        "mov z16.d, z0.d",
        "fnmla z3.s, p6/m, z2.s, z4.s",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmla z0.d, p6/m, z16.d, z18.d",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z17.d",
        "fnmla z0.d, p6/m, z16.d, z18.d",
        "mov z16.d, z0.d",
        "fnmla z3.d, p6/m, z2.d, z4.d",
        "str q3, [x28, #192]"
      ]
    },
    "vfnmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s16, s18, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d16, d18, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmls z0.s, p6/m, z17.s, z16.s",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z18.d",
        "fnmls z0.s, p6/m, z17.s, z16.s",
        "mov z16.d, z0.d",
        "fnmls z4.s, p6/m, z3.s, z2.s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmls z0.d, p6/m, z17.d, z16.d",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z18.d",
        "fnmls z0.d, p6/m, z17.d, z16.d",
        "mov z16.d, z0.d",
        "fnmls z4.d, p6/m, z3.d, z2.d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmls v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "fmls v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v0.16b, v18.16b",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov v0.16b, v18.16b",
        "fmls v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "fmls v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmla z0.s, p6/m, z17.s, z16.s",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z18.d",
        "fnmla z0.s, p6/m, z17.s, z16.s",
        "mov z16.d, z0.d",
        "fnmla z4.s, p6/m, z3.s, z2.s",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmla z0.d, p6/m, z17.d, z16.d",
        "mov z16.d, z0.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "mov z0.d, z18.d",
        "fnmla z0.d, p6/m, z17.d, z16.d",
        "mov z16.d, z0.d",
        "fnmla z4.d, p6/m, z3.d, z2.d",
        "str q4, [x28, #192]"
      ]
    },
    "vfnmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s17, s16, s18",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d17, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmla v16.4s, v17.4s, v18.4s",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmla v16.2d, v17.2d, v18.2d",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmadd d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmls z16.s, p6/m, z17.s, z18.s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fnmls z16.s, p6/m, z17.s, z18.s",
        "fnmls z2.s, p6/m, z3.s, z4.s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmls z16.d, p6/m, z17.d, z18.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fnmls z16.d, p6/m, z17.d, z18.d",
        "fnmls z2.d, p6/m, z3.d, z4.d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmsub d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmls v16.4s, v17.4s, v18.4s",
        "fmls v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fmls v16.2d, v17.2d, v18.2d",
        "fmls v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmsub d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmla z16.s, p6/m, z17.s, z18.s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fnmla z16.s, p6/m, z17.s, z18.s",
        "fnmla z2.s, p6/m, z3.s, z4.s",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmla z16.d, p6/m, z17.d, z18.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "fnmla z16.d, p6/m, z17.d, z18.d",
        "fnmla z2.d, p6/m, z3.d, z4.d",
        "str q2, [x28, #192]"
      ]
    },
    "vfnmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd s0, s17, s18, s16",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfnmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmadd d0, d17, d18, d16",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmaddsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsubadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.4s, v17.4s, v16.4s",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.4s, v3.4s, v2.4s",
        "str q4, [x28, #192]"
      ]
    },
    "vfmsubadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v18.16b, v2.16b",
        "mov v0.16b, v2.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v18.16b, v5.16b",
        "mov v0.16b, v6.16b",
        "fmla v0.2d, v17.2d, v16.2d",
        "mov v16.16b, v0.16b",
        "eor v4.16b, v4.16b, v5.16b",
        "fmla v4.2d, v3.2d, v2.2d",
        "str q4, [x28, #192]"
      ]
    },
    "vfmaddsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3040]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmaddsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmaddsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3072]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsubadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3104]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.4s, v17.4s, v18.4s",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.4s, v3.4s, v4.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vfmsubadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v16.16b, v2.16b",
        "mov v16.16b, v2.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vfmsubadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #3136]",
        "eor v6.16b, v16.16b, v5.16b",
        "mov v16.16b, v6.16b",
        "fmla v16.2d, v17.2d, v18.2d",
        "eor v2.16b, v2.16b, v5.16b",
        "fmla v2.2d, v3.2d, v4.2d",
        "str q2, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map2_flagm.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "SVE128",
      "SVE256",
      "SVEBITPERM"
    ]
  },
  "Instructions": {
    "vtestps xmm0, xmm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "dup v2.4s, w20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestps ymm0, ymm1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "Map 2 0b01 0x0e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v17.16b, v16.16b",
        "and v5.16b, v3.16b, v2.16b",
        "ushr v4.4s, v4.4s, #31",
        "ushr v5.4s, v5.4s, #31",
        "add v4.4s, v5.4s, v4.4s",
        "addv s4, v4.4s",
        "mov w20, v4.s[0]",
        "bic v4.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "ushr v4.4s, v4.4s, #31",
        "ushr v2.4s, v2.4s, #31",
        "add v2.4s, v2.4s, v4.4s",
        "addv s2, v2.4s",
        "mov w21, v2.s[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestpd xmm0, xmm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "dup v2.2d, x20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestpd ymm0, ymm1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "Map 2 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v17.16b, v16.16b",
        "and v5.16b, v3.16b, v2.16b",
        "ushr v4.2d, v4.2d, #63",
        "ushr v5.2d, v5.2d, #63",
        "add v4.2d, v5.2d, v4.2d",
        "addp v4.2d, v4.2d, v4.2d",
        "mov x20, v4.d[0]",
        "bic v4.16b, v17.16b, v16.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "ushr v4.2d, v4.2d, #63",
        "ushr v2.2d, v2.2d, #63",
        "add v2.2d, v2.2d, v4.2d",
        "addp v2.2d, v2.2d, v2.2d",
        "mov x21, v2.d[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vptest xmm0, xmm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vptest ymm0, ymm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "and v4.16b, v16.16b, v17.16b",
        "bic v5.16b, v17.16b, v16.16b",
        "and v6.16b, v2.16b, v3.16b",
        "bic v2.16b, v3.16b, v2.16b",
        "umax v3.8h, v4.8h, v6.8h",
        "umax v2.8h, v5.8h, v2.8h",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vmaskmovps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "Map 2 0b01 0x2c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmaskmovpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x2d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vmaskmovps [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]"
      ]
    },
    "vmaskmovps [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "Map 2 0b01 0x2e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[3], [x1]"
      ]
    },
    "vmaskmovpd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]"
      ]
    },
    "vmaskmovpd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x2f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[1], [x1]"
      ]
    },
    "vpmaskmovd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaskmovd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov w0, v17.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v17.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v17.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "ld1 {v0.s}[3], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaskmovq xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpmaskmovq ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v0.2d, #0x0",
        "mov x0, v17.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v17.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v16.16b, v0.16b",
        "movi v0.2d, #0x0",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "ld1 {v0.d}[1], [x1]",
        "mov v2.16b, v0.16b",
        "str q2, [x28, #192]"
      ]
    },
    "vpmaskmovd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]"
      ]
    },
    "vpmaskmovd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov w0, v16.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[0], [x4]",
        "add x1, x4, #0x4 (4)",
        "mov w0, v16.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v16.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v17.s}[3], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov w0, v2.s[0]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[0], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[1]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[1], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[2]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[2], [x1]",
        "add x1, x1, #0x4 (4)",
        "mov w0, v2.s[3]",
        "tbz w0, #31, #+0x8",
        "st1 {v3.s}[3], [x1]"
      ]
    },
    "vpmaskmovq [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]"
      ]
    },
    "vpmaskmovq [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "mov x0, v16.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[0], [x4]",
        "add x1, x4, #0x8 (8)",
        "mov x0, v16.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v17.d}[1], [x1]",
        "add x1, x4, #0x10 (16)",
        "mov x0, v2.d[0]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[0], [x1]",
        "add x1, x1, #0x8 (8)",
        "mov x0, v2.d[1]",
        "tbz x0, #63, #+0x8",
        "st1 {v3.d}[1], [x1]"
      ]
    },
    "andn eax, ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 32-bit"
      ],
      "ExpectedArm64ASM": [
        "bic w4, w7, w6",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "andn rax, rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 64-bit"
      ],
      "ExpectedArm64ASM": [
        "bic x4, x7, x6",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "bzhi eax, ebx, ecx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b00 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl w20, w20, w7",
        "bic w20, w6, w20",
        "tst x7, #0xe0",
        "csel w4, w6, w20, ne",
        "cset x20, eq",
        "cmp w4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bzhi rax, rbx, rcx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b00 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl x20, x20, x7",
        "bic x20, x6, x20",
        "tst x7, #0xc0",
        "csel x4, x6, x20, ne",
        "cset x20, eq",
        "cmp x4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "pdep eax, ebx, ecx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov w4, #0x0",
        "cbz w7, #+0x2c",
        "neg w2, w1",
        "and w2, w2, w1",
        "sbfx w3, w0, #0, #1",
        "eor w1, w1, w2",
        "and w2, w3, w2",
        "neg w3, w1",
        "orr w4, w4, w2",
        "lsr w0, w0, #1",
        "and w2, w1, w3",
        "cbnz w2, #-0x1c"
      ]
    },
    "pdep rax, rbx, rcx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov x4, #0x0",
        "cbz x7, #+0x2c",
        "neg x2, x1",
        "and x2, x2, x1",
        "sbfx x3, x0, #0, #1",
        "eor x1, x1, x2",
        "and x2, x3, x2",
        "neg x3, x1",
        "orr x4, x4, x2",
        "lsr x0, x0, #1",
        "and x2, x1, x3",
        "cbnz x2, #-0x1c"
      ]
    },
    "bextr eax, ebx, ecx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb w20, w7",
        "lsr w21, w6, w20",
        "mov w22, #0x0",
        "cmp w20, #0x1f (31)",
        "csel w20, w21, w22, ls",
        "ubfx w21, w7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl w22, w22, w21",
        "bic w22, w20, w22",
        "cmp w21, #0x1f (31)",
        "csel w4, w22, w20, ls",
        "cmp w4, #0x0 (0)"
      ]
    },
    "bextr rax, rbx, rcx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb x20, w7",
        "lsr x21, x6, x20",
        "mov w22, #0x0",
        "cmp x20, #0x3f (63)",
        "csel x20, x21, x22, ls",
        "ubfx x21, x7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl x22, x22, x21",
        "bic x22, x20, x22",
        "cmp x21, #0x3f (63)",
        "csel x4, x22, x20, ls",
        "cmp x4, #0x0 (0)"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map3.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "AFP",
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vpermq ymm0, ymm1, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v17.16b, v17.16b, #8",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "zip1 v16.2d, v2.2d, v17.2d",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v2.16b, v17.16b, #8",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 4": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermq ymm0, ymm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[1]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 6": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "trn2 v16.2d, v2.2d, v17.2d",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 8": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "zip1 v16.2d, v17.2d, v2.2d",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 9": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v17.16b, v2.16b, #8",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 10": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[0]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 11": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v2.16b, v2.16b, #8",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 12": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v2.d[1]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 13": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "trn2 v16.2d, v17.2d, v2.2d",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 14": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[1]",
        "dup v2.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[1]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermq ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[1]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermpd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermpd ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[1]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermpd ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[0]",
        "str q16, [x28, #192]"
      ]
    },
    "vpermpd ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v2.d[1]",
        "str q16, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], v17.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[1], v17.s[1]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[2], v17.s[2]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v17.4s",
        "trn2 v16.4s, v2.4s, v16.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0110b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3200]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 0111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3216]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[3], v17.s[3]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3232]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v16.4s",
        "trn2 v16.4s, v2.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3248]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3264]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1110b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3280]",
        "tbx v16.16b, {v17.16b}, v2.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendd xmm0, xmm1, 1111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpblendd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "str q2, [x28, #192]"
      ]
    },
    "vpblendd ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "rev64 v4.4s, v17.4s",
        "trn2 v16.4s, v4.4s, v16.4s",
        "rev64 v3.4s, v3.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpblendd ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #192]",
        "ldr q3, [x28, #208]",
        "rev64 v4.4s, v16.4s",
        "trn2 v16.4s, v4.4s, v17.4s",
        "rev64 v2.4s, v2.4s",
        "trn2 v2.4s, v2.4s, v3.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vpblendd ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilps xmm0, xmm1, 01010101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[1]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilps xmm0, xmm1, 10101010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[2]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilps xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[3]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilps ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.4s, v17.s[0]",
        "dup v2.4s, v2.s[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilps ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.4s, v17.s[1]",
        "dup v2.4s, v2.s[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilps ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.4s, v17.s[2]",
        "dup v2.4s, v2.s[2]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilps ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.4s, v17.s[3]",
        "dup v2.4s, v2.s[3]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilpd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v17.16b, v17.16b, #8",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilpd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilpd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[1]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[0]",
        "dup v2.2d, v2.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v17.16b, v17.16b, #8",
        "dup v2.2d, v2.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v2.2d, v2.d[0]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilpd ymm0, ymm1, 0011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[1]",
        "dup v2.2d, v2.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0100b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[0]",
        "ext v2.16b, v2.16b, v2.16b, #8",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0101b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v17.16b, v17.16b, #8",
        "ext v2.16b, v2.16b, v2.16b, #8",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0110b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v2.16b, v2.16b, v2.16b, #8",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilpd ymm0, ymm1, 0111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[1]",
        "ext v2.16b, v2.16b, v2.16b, #8",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v17.16b, v17.16b, #8",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilpd ymm0, ymm1, 1011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1100b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[0]",
        "dup v2.2d, v2.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1101b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ext v16.16b, v17.16b, v17.16b, #8",
        "dup v2.2d, v2.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vpermilpd ymm0, ymm1, 1110b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v2.2d, v2.d[1]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpermilpd ymm0, ymm1, 1111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "dup v16.2d, v17.d[1]",
        "dup v2.2d, v2.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q17, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q17, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q16, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q16, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q16, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00001000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00011000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00101000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00111000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "movi v16.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10001000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.4s, v17.4s",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundps ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintn v16.4s, v17.4s",
        "frintn v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vroundps ymm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintm v16.4s, v17.4s",
        "frintm v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vroundps ymm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintp v16.4s, v17.4s",
        "frintp v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vroundps ymm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintz v16.4s, v17.4s",
        "frintz v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vroundps ymm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frinti v16.4s, v17.4s",
        "frinti v2.4s, v2.4s",
        "str q2, [x28, #192]"
      ]
    },
    "vroundpd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundpd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundpd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundpd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundpd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.2d, v17.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundpd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintn v16.2d, v17.2d",
        "frintn v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vroundpd ymm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintm v16.2d, v17.2d",
        "frintm v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vroundpd ymm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintp v16.2d, v17.2d",
        "frintp v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vroundpd ymm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frintz v16.2d, v17.2d",
        "frintz v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vroundpd ymm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "frinti v16.2d, v17.2d",
        "frinti v2.2d, v2.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vroundss xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn s0, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundss xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm s0, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundss xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp s0, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundss xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz s0, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundss xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti s0, s17",
        "mov v16.s[0], v0.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn d0, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm d0, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp d0, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz d0, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti d0, d17",
        "mov v16.d[0], v0.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 0000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 0001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 1111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]",
        "mov v2.s[3], v3.s[3]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 00b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 01b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 10b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v18.d[1]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 11b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v18.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0100b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v2.d[0], v3.d[0]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0101b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "mov v2.d[0], v3.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0110b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v18.d[1]",
        "mov v2.d[0], v3.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0111b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v2.d[0], v3.d[0]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v2.d[1], v3.d[1]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "mov v2.d[1], v3.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v18.d[1]",
        "mov v2.d[1], v3.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1011b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v2.d[1], v3.d[1]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1101b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1110b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v18.d[1]",
        "str q2, [x28, #192]"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 00000001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[0], v18.h[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "mov v16.16b, v17.16b",
        "mov v16.h[0], v18.h[0]",
        "mov v2.h[0], v3.h[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v18.16b, v17.16b, #1",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v18.16b, v17.16b, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 17": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v17.16b, v2.16b, #1",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ext v16.16b, v18.16b, v17.16b, #1",
        "ext v3.16b, v3.16b, v2.16b, #1",
        "str q3, [x28, #192]"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ext v16.16b, v18.16b, v17.16b, #15",
        "ext v3.16b, v3.16b, v2.16b, #15",
        "str q3, [x28, #192]"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 16": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 17": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v3.2d, #0x0",
        "ext v16.16b, v17.16b, v3.16b, #1",
        "ext v2.16b, v2.16b, v3.16b, #1",
        "str q2, [x28, #192]"
      ]
    },
    "vpextrb rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[0]"
      ]
    },
    "vpextrb rax, xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[15]"
      ]
    },
    "vpextrw rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "vpextrw rax, xmm0, 7": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "vpextrd rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "vpextrd rax, xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "vpextrb [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[0], [x4]"
      ]
    },
    "vpextrb [rax], xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[15], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 7": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "vpextrd [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[0], [x4]"
      ]
    },
    "vpextrd [rax], xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[3], [x4]"
      ]
    },
    "vextractps eax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "vextractps eax, xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "vinsertf128 ymm0, ymm1, xmm2, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vinsertf128 ymm0, ymm1, xmm2, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vextractf128 xmm0, ymm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vextractf128 xmm0, ymm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff3fffff",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffffbfffff",
        "orr x0, x0, #0x800000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff7fffff",
        "orr x0, x0, #0x400000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "orr x0, x20, #0xc00000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtn v16.4h, v17.4s"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff3fffff",
        "msr fpcr, x0",
        "fcvtn v3.4h, v17.4s",
        "mov v0.16b, v3.16b",
        "fcvtn2 v0.8h, v2.4s",
        "mov v16.16b, v0.16b",
        "msr fpcr, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffffbfffff",
        "orr x0, x0, #0x800000",
        "msr fpcr, x0",
        "fcvtn v3.4h, v17.4s",
        "mov v0.16b, v3.16b",
        "fcvtn2 v0.8h, v2.4s",
        "mov v16.16b, v0.16b",
        "msr fpcr, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff7fffff",
        "orr x0, x0, #0x400000",
        "msr fpcr, x0",
        "fcvtn v3.4h, v17.4s",
        "mov v0.16b, v3.16b",
        "fcvtn2 v0.8h, v2.4s",
        "mov v16.16b, v0.16b",
        "msr fpcr, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "mrs x20, fpcr",
        "orr x0, x20, #0xc00000",
        "msr fpcr, x0",
        "fcvtn v3.4h, v17.4s",
        "mov v0.16b, v3.16b",
        "fcvtn2 v0.8h, v2.4s",
        "mov v16.16b, v0.16b",
        "msr fpcr, x20",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "fcvtn v3.4h, v17.4s",
        "mov v0.16b, v3.16b",
        "fcvtn2 v0.8h, v2.4s",
        "mov v16.16b, v0.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrb xmm0, xmm0, eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.b[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrb xmm0, xmm1, eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.b[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrb xmm0, xmm1, eax, 15": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.b[15], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[3], v18.s[3]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrd xmm0, xmm0, eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrd xmm0, xmm1, eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrd xmm0, xmm1, eax, 3": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[3], w4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrq xmm0, xmm0, rax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], x4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrq xmm0, xmm1, rax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[0], x4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpinsrq xmm0, xmm1, rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[1], x4",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vinserti128 ymm0, ymm1, xmm2, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vinserti128 ymm0, ymm1, xmm2, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vextracti128 xmm0, ymm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vextracti128 xmm0, ymm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 00001111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 11110000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v17.4s, v18.4s",
        "faddp v2.4s, v2.4s, v2.4s",
        "faddp s2, v2.2s",
        "dup v16.4s, v2.s[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 00001111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 11110000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "fmul v4.4s, v17.4s, v18.4s",
        "faddp v4.4s, v4.4s, v4.4s",
        "faddp s4, v4.2s",
        "dup v16.4s, v4.s[0]",
        "fmul v2.4s, v2.4s, v3.4s",
        "faddp v2.4s, v2.4s, v2.4s",
        "faddp s2, v2.2s",
        "dup v2.4s, v2.s[0]",
        "str q2, [x28, #192]"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 00001111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 11110000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.2d, v17.2d, v18.2d",
        "faddp d2, v2.2d",
        "dup v16.2d, v2.d[0]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 000b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 001b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 010b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 011b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 100b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 101b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 110b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 111b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 000b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[0]",
        "ext v5.16b, v17.16b, v17.16b, #0",
        "ext v6.16b, v17.16b, v17.16b, #1",
        "ext v7.16b, v17.16b, v17.16b, #2",
        "ext v8.16b, v17.16b, v17.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 001b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[1]",
        "ext v5.16b, v17.16b, v17.16b, #0",
        "ext v6.16b, v17.16b, v17.16b, #1",
        "ext v7.16b, v17.16b, v17.16b, #2",
        "ext v8.16b, v17.16b, v17.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 010b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[2]",
        "ext v5.16b, v17.16b, v17.16b, #0",
        "ext v6.16b, v17.16b, v17.16b, #1",
        "ext v7.16b, v17.16b, v17.16b, #2",
        "ext v8.16b, v17.16b, v17.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 011b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[3]",
        "ext v5.16b, v17.16b, v17.16b, #0",
        "ext v6.16b, v17.16b, v17.16b, #1",
        "ext v7.16b, v17.16b, v17.16b, #2",
        "ext v8.16b, v17.16b, v17.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 100b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[0]",
        "ext v5.16b, v17.16b, v17.16b, #4",
        "ext v6.16b, v17.16b, v17.16b, #5",
        "ext v7.16b, v17.16b, v17.16b, #6",
        "ext v8.16b, v17.16b, v17.16b, #7",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 101b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[1]",
        "ext v5.16b, v17.16b, v17.16b, #4",
        "ext v6.16b, v17.16b, v17.16b, #5",
        "ext v7.16b, v17.16b, v17.16b, #6",
        "ext v8.16b, v17.16b, v17.16b, #7",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 110b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[2]",
        "ext v5.16b, v17.16b, v17.16b, #4",
        "ext v6.16b, v17.16b, v17.16b, #5",
        "ext v7.16b, v17.16b, v17.16b, #6",
        "ext v8.16b, v17.16b, v17.16b, #7",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 111b": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v4.4s, v18.s[3]",
        "ext v5.16b, v17.16b, v17.16b, #4",
        "ext v6.16b, v17.16b, v17.16b, #5",
        "ext v7.16b, v17.16b, v17.16b, #6",
        "ext v8.16b, v17.16b, v17.16b, #7",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v4.8h, v8.8b, v4.8b",
        "addp v5.8h, v5.8h, v7.8h",
        "addp v4.8h, v6.8h, v4.8h",
        "trn1 v6.4s, v5.4s, v4.4s",
        "trn2 v4.4s, v5.4s, v4.4s",
        "addp v16.8h, v6.8h, v4.8h",
        "dup v3.4s, v3.s[0]",
        "ext v4.16b, v2.16b, v2.16b, #0",
        "ext v5.16b, v2.16b, v2.16b, #1",
        "ext v6.16b, v2.16b, v2.16b, #2",
        "ext v2.16b, v2.16b, v2.16b, #3",
        "uabdl v4.8h, v4.8b, v3.8b",
        "uabdl v5.8h, v5.8b, v3.8b",
        "uabdl v6.8h, v6.8b, v3.8b",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addp v3.8h, v4.8h, v6.8h",
        "addp v2.8h, v5.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "str q2, [x28, #192]"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 00000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "pmull v16.1q, v17.1d, v18.1d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 00001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v17.d[1]",
        "pmull v16.1q, v0.1d, v18.1d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 10000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v18.d[1]",
        "pmull v16.1q, v0.1d, v17.1d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 10001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "pmull2 v16.1q, v17.2d, v18.2d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 00000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "pmull v16.1q, v17.1d, v18.1d",
        "pmull v2.1q, v2.1d, v3.1d",
        "str q2, [x28, #192]"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 00001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v0.2d, v17.d[1]",
        "pmull v16.1q, v0.1d, v18.1d",
        "dup v0.2d, v2.d[1]",
        "pmull v2.1q, v0.1d, v3.1d",
        "str q2, [x28, #192]"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 10000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "dup v0.2d, v18.d[1]",
        "pmull v16.1q, v0.1d, v17.1d",
        "dup v0.2d, v3.d[1]",
        "pmull v2.1q, v0.1d, v2.1d",
        "str q2, [x28, #192]"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 10001b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "pmull2 v16.1q, v17.2d, v18.2d",
        "pmull2 v2.1q, v2.2d, v3.2d",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q17, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q17, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q16, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q16, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str q18, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "str q2, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "str q16, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00001000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "str q17, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00011000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00101000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "str q18, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00111000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #224]",
        "movi v16.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10001000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #208]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v18.16b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x28, #224]",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendvps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.4s, v19.4s, #31",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendvps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.4s, v19.4s, #31",
        "mov v16.16b, v5.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "sshr v4.4s, v4.4s, #31",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    },
    "vblendvpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.2d, v19.2d, #63",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendvpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.2d, v19.2d, #63",
        "mov v16.16b, v5.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "sshr v4.2d, v4.2d, #63",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    },
    "vpblendvb xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.16b, v19.16b, #7",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendvb ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.16b, v19.16b, #7",
        "mov v16.16b, v5.16b",
        "bsl v16.16b, v18.16b, v17.16b",
        "sshr v4.16b, v4.16b, #7",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    },
    "vaeskeygenassist xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #3184]",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v2.16b",
        "tbl v16.16b, {v16.16b}, v3.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vaeskeygenassist xmm0, xmm1, 0xFF": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldr q3, [x28, #3184]",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v2.16b",
        "tbl v16.16b, {v16.16b}, v3.16b",
        "mov x0, #0xff00000000",
        "dup v1.2d, x0",
        "eor v16.16b, v16.16b, v1.16b",
        "stp xzr, xzr, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map3_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "SVE256"
    ]
  },
  "Instructions": {
    "vblendvps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.4s, v19.4s, #31",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z2.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendvps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.4s, v19.4s, #31",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z5.d",
        "sshr v4.4s, v4.4s, #31",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    },
    "vblendvpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.2d, v19.2d, #63",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z2.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vblendvpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.2d, v19.2d, #63",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z5.d",
        "sshr v4.2d, v4.2d, #63",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    },
    "vpblendvb xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x4c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.16b, v19.16b, #7",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z2.d",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpblendvb ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 3 0b01 0x4c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #224]",
        "ldr q4, [x28, #240]",
        "sshr v5.16b, v19.16b, #7",
        "movprfx z16, z18",
        "bsl z16.d, z16.d, z17.d, z5.d",
        "sshr v4.16b, v4.16b, #7",
        "bsl v4.16b, v3.16b, v2.16b",
        "str q4, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/AVX128/VEX_map_group.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVE256",
      "SVE128"
    ]
  },
  "Instructions": {
    "vpsrlw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.8h, v17.8h, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ushr v16.8h, v17.8h, #15",
        "ushr v2.8h, v2.8h, #15",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpsraw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsraw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.8h, v17.8h, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsraw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.8h, v17.8h, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsraw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsraw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "sshr v16.8h, v17.8h, #15",
        "sshr v2.8h, v2.8h, #15",
        "str q2, [x28, #192]"
      ]
    },
    "vpsraw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "sshr v16.8h, v17.8h, #15",
        "sshr v2.8h, v2.8h, #15",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.8h, v17.8h, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v16.8h, v17.8h, #15",
        "shl v2.8h, v2.8h, #15",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrld xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrld xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.4s, v17.4s, #31",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrld xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrld ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrld ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ushr v16.4s, v17.4s, #31",
        "ushr v2.4s, v2.4s, #31",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrld ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrad xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrad xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.4s, v17.4s, #31",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrad xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.4s, v17.4s, #31",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrad ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrad ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "sshr v16.4s, v17.4s, #31",
        "sshr v2.4s, v2.4s, #31",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrad ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "sshr v16.4s, v17.4s, #31",
        "sshr v2.4s, v2.4s, #31",
        "str q2, [x28, #192]"
      ]
    },
    "vpslld xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslld xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.4s, v17.4s, #31",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpslld xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpslld ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslld ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v16.4s, v17.4s, #31",
        "shl v2.4s, v2.4s, #31",
        "str q2, [x28, #192]"
      ]
    },
    "vpslld ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlq xmm0, xmm1, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.2d, v17.2d, #63",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlq xmm0, xmm1, 64": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrlq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlq ymm0, ymm1, 63": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "ushr v16.2d, v17.2d, #63",
        "ushr v2.2d, v2.2d, #63",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrlq ymm0, ymm1, 64": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrldq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrldq xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v17.16b, v2.16b, #15",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrldq xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsrldq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrldq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v3.2d, #0x0",
        "ext v16.16b, v17.16b, v3.16b, #15",
        "ext v2.16b, v2.16b, v3.16b, #15",
        "str q2, [x28, #192]"
      ]
    },
    "vpsrldq ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllq xmm0, xmm1, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.2d, v17.2d, #63",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllq xmm0, xmm1, 64": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpsllq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllq ymm0, ymm1, 63": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "shl v16.2d, v17.2d, #63",
        "shl v2.2d, v2.2d, #63",
        "str q2, [x28, #192]"
      ]
    },
    "vpsllq ymm0, ymm1, 64": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v16.2d, #0x0",
        "movi v2.2d, #0x0",
        "str q2, [x28, #192]"
      ]
    },
    "vpslldq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stp xzr, xzr, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslldq xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v2.16b, v17.16b, #1",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpslldq xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    },
    "vpslldq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "str q2, [x28, #192]",
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslldq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "movi v3.2d, #0x0",
        "ext v16.16b, v3.16b, v17.16b, #1",
        "ext v3.16b, v3.16b, v2.16b, #1",
        "str q3, [x28, #192]"
      ]
    },
    "vpslldq ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "stp xzr, xzr, [x28, #192]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Atomics.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "lock add byte [rax], cl": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x00",
      "ExpectedArm64ASM": [
        "ldaddalb w7, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #24",
        "cmn w0, w7, lsl #24",
        "add w26, w20, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add word [rax], cx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddalh w7, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #16",
        "cmn w0, w7, lsl #16",
        "add w26, w20, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add dword [rax], ecx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddal w7, w20, [x4]",
        "eor x27, x20, x7",
        "adds w26, w20, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or byte [rax], cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x08",
      "ExpectedArm64ASM": [
        "ldsetalb w7, w20, [x4]",
        "orr w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or word [rax], cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "ldsetalh w7, w20, [x4]",
        "orr w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or dword [rax], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "ldsetal w7, w20, [x4]",
        "orr w20, w20, w7",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock adc byte [rax], cl": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x10",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddalb w20, w20, [x4]",
        "eor x27, x20, x7",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "add w23, w20, w22",
        "uxtb w26, w23",
        "cmp w26, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "bic w20, w20, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "lock adc word [rax], cx": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddalh w20, w20, [x4]",
        "eor x27, x20, x7",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "add w23, w20, w22",
        "uxth w26, w23",
        "cmp w26, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "bic w20, w20, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "lock adc dword [rax], ecx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddal w20, w20, [x4]",
        "eor x27, x20, x7",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w20, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock sbb byte [rax], cl": {
      "ExpectedInstructionCount": 20,
      "Comment": "0x18",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddalb w1, w20, [x4]",
        "eor x27, x20, x7",
        "uxtb w20, w20",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "lock sbb word [rax], cx": {
      "ExpectedInstructionCount": 20,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddalh w1, w20, [x4]",
        "eor x27, x20, x7",
        "uxth w20, w20",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "lock sbb dword [rax], ecx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddal w1, w20, [x4]",
        "eor x27, x20, x7",
        "sbcs w26, w20, w7"
      ]
    },
    "lock and byte [rax], cl": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x20",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and word [rax], cx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and dword [rax], ecx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock sub byte [rax], cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddalb w1, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w20, w7"
      ]
    },
    "lock sub word [rax], cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddalh w1, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w20, w7"
      ]
    },
    "lock sub dword [rax], ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddal w1, w20, [x4]",
        "eor x27, x20, x7",
        "subs w26, w20, w7"
      ]
    },
    "lock xor byte [rax], cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x30",
      "ExpectedArm64ASM": [
        "ldeoralb w7, w20, [x4]",
        "eor w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor word [rax], cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "ldeoralh w7, w20, [x4]",
        "eor w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor dword [rax], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "ldeoral w7, w20, [x4]",
        "eor w20, w20, w7",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock add qword [rax], rcx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddal x7, x20, [x4]",
        "eor x27, x20, x7",
        "adds x26, x20, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xchg byte [rax], cl": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x86",
      "ExpectedArm64ASM": [
        "swpalb w7, w20, [x4]",
        "bfxil x7, x20, #0, #8"
      ]
    },
    "xchg word [rax], cx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpalh w7, w20, [x4]",
        "bfxil x7, x20, #0, #16"
      ]
    },
    "xchg dword [rax], ecx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpal w7, w7, [x4]"
      ]
    },
    "xchg qword [rax], rcx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpal x7, x7, [x4]"
      ]
    },
    "xadd byte [rax], bl": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "ldaddalb w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd word [rax], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "ldaddalh w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd dword [rax], ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldaddal w20, w6, [x4]",
        "eor x27, x6, x20",
        "adds w26, w6, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd qword [rax], rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "ldaddal x6, x20, [x4]",
        "eor x27, x20, x6",
        "adds x26, x20, x6",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x6, x20"
      ]
    },
    "lock add byte [rax], 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalb w20, w27, [x4]",
        "lsl w0, w27, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add byte [rax], 0xFF": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldaddalb w20, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, #0xff (255)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add word [rax], 0x100": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddalh w20, w27, [x4]",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldaddalh w20, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add dword [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddal w20, w27, [x4]",
        "adds w26, w27, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldaddal w20, w20, [x4]",
        "mvn w27, w20",
        "subs w26, w20, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add qword [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add qword [rax], -2147483647": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add word [rax], 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalh w20, w27, [x4]",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add dword [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal w20, w27, [x4]",
        "adds w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock add qword [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or byte [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetalb w20, w20, [x4]",
        "orr w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or byte [rax], 0xFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldsetalb w20, w20, [x4]",
        "orr w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or word [rax], 0x100": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetal w20, w20, [x4]",
        "orr w20, w20, #0x100",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldsetal w20, w21, [x4]",
        "orr w20, w21, w20",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0x100",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0xffffffff80000001",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock or word [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock or dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetal w20, w20, [x4]",
        "orr w20, w20, #0x1",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0x1",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock adc byte [rax], 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddalb w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w27",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "lock adc byte [rax], 0xFF": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "cinc w21, w20, lo",
        "ldaddalb w21, w21, [x4]",
        "mvn w27, w21",
        "cinc w20, w20, lo",
        "add w22, w21, w20",
        "uxtb w26, w22",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock adc word [rax], 0x100": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "ldaddalh w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w27",
        "ubfx x20, x20, #15, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "lock adc word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "cinc w21, w20, lo",
        "ldaddalh w21, w21, [x4]",
        "mvn w27, w21",
        "cinc w20, w20, lo",
        "add w22, w21, w20",
        "uxth w26, w22",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock adc dword [rax], 0x100": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "ldaddal w21, w27, [x4]",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w27, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock adc dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "cinc w21, w20, lo",
        "ldaddal w21, w21, [x4]",
        "mvn w27, w21",
        "mrs x22, nzcv",
        "eor w22, w22, #0x20000000",
        "msr nzcv, x22",
        "adcs w26, w21, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock adc qword [rax], 0x100": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x27, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock adc qword [rax], -2147483647": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x27, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock adc word [rax], 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddalh w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w27",
        "ubfx x20, x20, #15, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "lock adc dword [rax], 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddal w21, w27, [x4]",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w27, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock adc qword [rax], 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x27, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock sbb byte [rax], 1": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalb w1, w27, [x4]",
        "uxtb w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock sbb byte [rax], 0xFF": {
      "ExpectedInstructionCount": 18,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalb w1, w21, [x4]",
        "mvn w27, w21",
        "uxtb w21, w21",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w26, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock sbb word [rax], 0x100": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w27, [x4]",
        "uxth w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock sbb word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 18,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w21, [x4]",
        "mvn w27, w21",
        "uxth w21, w21",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w26, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock sbb dword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w27, [x4]",
        "sbcs w26, w27, w20"
      ]
    },
    "lock sbb dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w21, [x4]",
        "mvn w27, w21",
        "sbcs w26, w21, w20"
      ]
    },
    "lock sbb qword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock sbb qword [rax], -2147483647": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock sbb word [rax], 1": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w27, [x4]",
        "uxth w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "lock sbb dword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w27, [x4]",
        "sbcs w26, w27, w20"
      ]
    },
    "lock sbb qword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock and byte [rax], 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and byte [rax], 0xFF": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w1, w20",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and word [rax], 0x100": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and dword [rax], 0x100": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn w1, w20",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, #0x100",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w1, w20",
        "ldclral w1, w21, [x4]",
        "ands w26, w21, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and qword [rax], 0x100": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0x100",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and qword [rax], -2147483647": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0xffffffff80000001",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and word [rax], 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and dword [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock and qword [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock sub byte [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddalb w1, w27, [x4]",
        "lsl w0, w27, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w27, #0x1 (1)"
      ]
    },
    "lock sub byte [rax], 0xFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "neg w1, w20",
        "ldaddalb w1, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, #0xff (255)"
      ]
    },
    "lock sub word [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg w1, w20",
        "ldaddalh w1, w27, [x4]",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x100 (256)"
      ]
    },
    "lock sub word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "neg w1, w20",
        "ldaddalh w1, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20"
      ]
    },
    "lock sub dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg w1, w20",
        "ldaddal w1, w27, [x4]",
        "subs w26, w27, #0x100 (256)"
      ]
    },
    "lock sub dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "neg w1, w20",
        "ldaddal w1, w20, [x4]",
        "mvn w27, w20",
        "adds w26, w20, #0x1 (1)"
      ]
    },
    "lock sub qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, #0x100 (256)"
      ]
    },
    "lock sub qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, x20"
      ]
    },
    "lock sub word [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddalh w1, w27, [x4]",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x1 (1)"
      ]
    },
    "lock sub dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddal w1, w27, [x4]",
        "subs w26, w27, #0x1 (1)"
      ]
    },
    "lock sub qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, #0x1 (1)"
      ]
    },
    "lock xor byte [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoralb w20, w20, [x4]",
        "eor w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor byte [rax], 0xFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldeoralb w20, w20, [x4]",
        "eor w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor word [rax], 0x100": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoral w20, w20, [x4]",
        "eor w20, w20, #0x100",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldeoral w20, w21, [x4]",
        "eor w20, w21, w20",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0x100",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0xffffffff80000001",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock xor word [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "lock xor dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoral w20, w20, [x4]",
        "eor w20, w20, #0x1",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0x1",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock dec byte [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP3 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldaddalb w20, w27, [x4]",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "lock not byte [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf6 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldeoralb w20, w20, [x4]"
      ]
    },
    "lock not word [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldeoralh w20, w20, [x4]"
      ]
    },
    "lock not dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldeoral w20, w20, [x4]"
      ]
    },
    "lock not qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldeoral x20, x20, [x4]"
      ]
    },
    "lock neg byte [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf6 /3",
      "ExpectedArm64ASM": [
        "ldrb w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casalb w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "cmp wzr, w27, lsl #24",
        "neg w26, w27"
      ]
    },
    "lock neg word [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldrh w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casalh w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "cmp wzr, w27, lsl #16",
        "neg w26, w27"
      ]
    },
    "lock neg dword [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldr w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casal w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "negs w26, w27"
      ]
    },
    "lock neg qword [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldr x1, [x4]",
        "mov x3, x1",
        "neg x2, x1",
        "casal x1, x2, [x4]",
        "sub x2, x1, x3",
        "cbnz x2, #-0x10",
        "mov x27, x1",
        "negs x26, x27"
      ]
    },
    "lock dec word [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldaddalh w20, w27, [x4]",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "lock dec dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldaddal w20, w27, [x4]",
        "cset x20, hs",
        "subs w26, w27, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock dec qword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldaddal x20, x27, [x4]",
        "cset x20, hs",
        "subs x26, x27, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock inc byte [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalb w20, w27, [x4]",
        "cset x21, hs",
        "lsl w0, w27, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "lock inc word [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalh w20, w27, [x4]",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "lock inc dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal w20, w27, [x4]",
        "cset x20, hs",
        "adds w26, w27, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock inc qword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal x20, x27, [x4]",
        "cset x20, hs",
        "adds x26, x27, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/CMakeLists.txt
================================================
# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE JSON_SOURCES CONFIGURE_DEPENDS *.json)

set(JSON_DEPENDS "")
set(JSON_UPDATE_DEPENDS "")

foreach(JSON_SRC ${JSON_SOURCES})
  file(RELATIVE_PATH REL_JSON ${CMAKE_SOURCE_DIR} ${JSON_SRC})
  file(RELATIVE_PATH REL_TEST_JSON ${CMAKE_CURRENT_SOURCE_DIR} ${JSON_SRC})
  get_filename_component(JSON_NAME ${JSON_SRC} NAME)
  get_filename_component(JSON_DIR "${REL_JSON}" DIRECTORY)
  set(OUTPUT_JSON_FOLDER "${CMAKE_BINARY_DIR}/${JSON_DIR}")

  # Generate build directory
  file(MAKE_DIRECTORY "${OUTPUT_JSON_FOLDER}")
  set(OUTPUT_JSON_NAME "${OUTPUT_JSON_FOLDER}/${JSON_NAME}.instcountci")
  set(OUTPUT_JSON_NEWNUMBERS_NAME "${OUTPUT_JSON_FOLDER}/${JSON_NAME}.instcountci.json")

  add_custom_command(OUTPUT ${OUTPUT_JSON_NAME}
    DEPENDS "${JSON_SRC}"
    DEPENDS "${CMAKE_SOURCE_DIR}/Scripts/InstructionCountParser.py"
    COMMAND "python3" ARGS "${CMAKE_SOURCE_DIR}/Scripts/InstructionCountParser.py" "${JSON_SRC}" "${OUTPUT_JSON_NAME}")

  list(APPEND JSON_DEPENDS "${OUTPUT_JSON_NAME}")

  if (NOT MINGW_BUILD)
    set(LAUNCH_PROGRAM "${CMAKE_BINARY_DIR}/Bin/CodeSizeValidation")
  else()
    set(LAUNCH_PROGRAM "wine" "${CMAKE_BINARY_DIR}/Bin/CodeSizeValidation.exe")
  endif()

  file(RELATIVE_PATH JSON_PATH_RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} ${JSON_SRC})
  set(TEST_NAME "InstCountCI/Test_${JSON_PATH_RELATIVE}.instcountci")
  set(TEST_NAME_UPDATE_NUMBERS "InstCountCI/Test_${JSON_PATH_RELATIVE}.new_numbers")

  add_test(NAME ${TEST_NAME}
    COMMAND ${LAUNCH_PROGRAM} "${OUTPUT_JSON_NAME}" "${OUTPUT_JSON_NEWNUMBERS_NAME}")

  add_test(NAME ${TEST_NAME_UPDATE_NUMBERS}
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/UpdateInstructionCountJson.py" "${JSON_SRC}" "${OUTPUT_JSON_NEWNUMBERS_NAME}")

  set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${CMAKE_BINARY_DIR}/Bin/CodeSizeValidation")
  set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${OUTPUT_JSON_NAME}")

  set_property(TEST ${TEST_NAME_UPDATE_NUMBERS} APPEND PROPERTY DEPENDS "${TEST_NAME}")
endforeach()

add_custom_target(instcountci_test_files ALL
  DEPENDS "${JSON_DEPENDS}")

add_custom_target(instcountci_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  DEPENDS instcountci_test_files
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "InstCountCI/\.*.instcountci$$")

add_custom_target(instcountci_update_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "InstCountCI/\.*new_numbers$$")


================================================
FILE: unittests/InstructionCountCI/Crypto/H0F38.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "CRYPTO"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "sha1nexte xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x38 0xc8"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "sha1h s2, s2",
        "dup v2.4s, v2.s[0]",
        "add v2.4s, v17.4s, v2.4s",
        "mov v16.16b, v17.16b",
        "mov v16.s[3], v2.s[3]"
      ]
    },
    "sha1msg2 xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x38 0xca"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2680]",
        "ldr q2, [x0, #432]",
        "tbl v3.16b, {v16.16b}, v2.16b",
        "tbl v4.16b, {v17.16b}, v2.16b",
        "sha1su1 v3.4s, v4.4s",
        "tbl v16.16b, {v3.16b}, v2.16b"
      ]
    },
    "sha256rnds2 xmm0, xmm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0x66 0x0f 0x38 0xcb"
      ],
      "ExpectedArm64ASM": [
        "zip2 v2.2d, v17.2d, v16.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v16.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v16.4s, v2.4s"
      ]
    },
    "sha256msg1 xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0xcc"
      ],
      "ExpectedArm64ASM": [
        "sha256su0 v16.4s, v17.4s"
      ]
    },
    "sha256msg2 xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x38 0xcd"
      ],
      "ExpectedArm64ASM": [
        "ext v2.16b, v16.16b, v16.16b, #12",
        "dup v3.4s, v16.s[3]",
        "zip2 v3.2d, v3.2d, v17.2d",
        "movi v16.2d, #0x0",
        "sha256su1 v16.4s, v2.4s, v3.4s"
      ]
    },
    "aesimc xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0xdb"
      ],
      "ExpectedArm64ASM": [
        "aesimc v16.16b, v17.16b"
      ]
    },
    "aesenc xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x38 0xdc"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "aese v16.16b, v2.16b",
        "aesmc v16.16b, v16.16b",
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "aesenclast xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0xdd"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "aese v16.16b, v2.16b",
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "aesdec xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x38 0xde"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "aesd v16.16b, v2.16b",
        "aesimc v16.16b, v16.16b",
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "aesdeclast xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0xdf"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "aesd v16.16b, v2.16b",
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "crc32 eax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x38 0xf0"
      ],
      "ExpectedArm64ASM": [
        "crc32cb w4, w4, w6"
      ]
    },
    "crc32 eax, bx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x38 0xf1"
      ],
      "ExpectedArm64ASM": [
        "crc32ch w4, w4, w6"
      ]
    },
    "crc32 eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x38 0xf1"
      ],
      "ExpectedArm64ASM": [
        "crc32cw w4, w4, w6"
      ]
    },
    "crc32 rax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x38 0xf0"
      ],
      "ExpectedArm64ASM": [
        "crc32cb w4, w4, w6"
      ]
    },
    "crc32 rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf2 0x0f 0x38 0xf1"
      ],
      "ExpectedArm64ASM": [
        "crc32cx w4, w4, x6"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Crypto/H0F3A.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "CRYPTO"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "pclmulqdq xmm0, xmm1, 00000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x44"
      ],
      "ExpectedArm64ASM": [
        "pmull v16.1q, v16.1d, v17.1d"
      ]
    },
    "pclmulqdq xmm0, xmm1, 00001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x44"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v16.d[1]",
        "pmull v16.1q, v0.1d, v17.1d"
      ]
    },
    "pclmulqdq xmm0, xmm1, 10000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x44"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v17.d[1]",
        "pmull v16.1q, v0.1d, v16.1d"
      ]
    },
    "pclmulqdq xmm0, xmm1, 10001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x44"
      ],
      "ExpectedArm64ASM": [
        "pmull2 v16.1q, v16.2d, v17.2d"
      ]
    },
    "aeskeygenassist xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0xdf"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3184]",
        "movi v3.2d, #0x0",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v3.16b",
        "tbl v16.16b, {v16.16b}, v2.16b"
      ]
    },
    "aeskeygenassist xmm0, xmm1, 0xFF": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0x66 0x0f 0x3a 0xdf"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3184]",
        "movi v3.2d, #0x0",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v3.16b",
        "tbl v16.16b, {v16.16b}, v2.16b",
        "mov x0, #0xff00000000",
        "dup v1.2d, x0",
        "eor v16.16b, v16.16b, v1.16b"
      ]
    },
    "sha1rnds4 xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x66 0x0f 0x3a 0xcc"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3568]",
        "movi v3.2d, #0x0",
        "ldr x0, [x28, #2680]",
        "ldr q4, [x0, #432]",
        "tbl v5.16b, {v16.16b}, v4.16b",
        "tbl v6.16b, {v17.16b}, v4.16b",
        "add v2.4s, v6.4s, v2.4s",
        "sha1c q5, s3, v2.4s",
        "tbl v16.16b, {v5.16b}, v4.16b"
      ]
    },
    "sha1rnds4 xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x66 0x0f 0x3a 0xcc"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3584]",
        "movi v3.2d, #0x0",
        "ldr x0, [x28, #2680]",
        "ldr q4, [x0, #432]",
        "tbl v5.16b, {v16.16b}, v4.16b",
        "tbl v6.16b, {v17.16b}, v4.16b",
        "add v2.4s, v6.4s, v2.4s",
        "sha1p q5, s3, v2.4s",
        "tbl v16.16b, {v5.16b}, v4.16b"
      ]
    },
    "sha1rnds4 xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x66 0x0f 0x3a 0xcc"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3600]",
        "movi v3.2d, #0x0",
        "ldr x0, [x28, #2680]",
        "ldr q4, [x0, #432]",
        "tbl v5.16b, {v16.16b}, v4.16b",
        "tbl v6.16b, {v17.16b}, v4.16b",
        "add v2.4s, v6.4s, v2.4s",
        "sha1m q5, s3, v2.4s",
        "tbl v16.16b, {v5.16b}, v4.16b"
      ]
    },
    "sha1rnds4 xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x66 0x0f 0x3a 0xcc"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3616]",
        "movi v3.2d, #0x0",
        "ldr x0, [x28, #2680]",
        "ldr q4, [x0, #432]",
        "tbl v5.16b, {v16.16b}, v4.16b",
        "tbl v6.16b, {v17.16b}, v4.16b",
        "add v2.4s, v6.4s, v2.4s",
        "sha1p q5, s3, v2.4s",
        "tbl v16.16b, {v5.16b}, v4.16b"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/DDD.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "RPRES"
    ]
  },
  "Comment": [
    "These 3DNow! instructions are optimal assuming that FEX doesn't SRA MMX registers",
    "This accounts for the overhead of loading and storing the registers in each instruction",
    "Could technically save some instructions by using SRA for MMX registers."
  ],
  "Instructions": {
    "pi2fw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x0f 0x0f 0x0c"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "uzp1 v2.4h, v2.4h, v2.4h",
        "sxtl v2.4s, v2.4h",
        "scvtf v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pi2fd mm0, mm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x0f 0x0f 0x0d"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "scvtf v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pf2iw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x0f 0x0f 0x1c"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fcvtzs v2.2s, v2.2s",
        "uzp1 v2.4h, v2.4h, v2.4h",
        "sxtl v2.4s, v2.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pf2id mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0x0f 0x0f 0x1d"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "frint32z v2.4s, v2.4s",
        "fcvtzs v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcpv mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0x0f 0x0f 0x86"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v2.4s, v0.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrsqrtv mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0x0f 0x0f 0x87"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fabs v3.4s, v2.4s",
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v3.4s",
        "fdiv v3.4s, v0.4s, v1.4s",
        "movi v0.2s, #0x80, lsl #24",
        "bit v3.8b, v2.8b, v0.8b",
        "str d3, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfnacc mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0x0f 0x8a",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uzp1 v4.2s, v2.2s, v3.2s",
        "uzp2 v2.2s, v2.2s, v3.2s",
        "fsub v2.4s, v4.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfpnacc mm0, mm1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0x0f 0x8e",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "dup v4.2s, v2.s[1]",
        "fsub s2, s2, s4",
        "faddp v3.4s, v3.4s, v3.4s",
        "mov v2.s[1], v3.s[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfcmpge mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0x90",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fcmge v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfmin mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x0f 0x94",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fcmgt v0.4s, v2.4s, v3.4s",
        "bif v3.16b, v2.16b, v0.16b",
        "str d3, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcp mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x0f 0x0f 0x96"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fmov s0, #0x70 (1.0000)",
        "fdiv s2, s0, s2",
        "dup v2.2s, v2.s[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrsqrt mm0, mm1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0x0f 0x0f 0x97"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fabs v3.4s, v2.4s",
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v3.4s",
        "fdiv v3.4s, v0.4s, v1.4s",
        "movi v0.2s, #0x80, lsl #24",
        "bit v3.8b, v2.8b, v0.8b",
        "dup v2.2s, v3.s[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfsub mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0x9a",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fsub v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfadd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0x9e",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fadd v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfcmpgt mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0xa0",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fcmgt v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfmax mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x0f 0xa4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fcmgt v0.4s, v2.4s, v3.4s",
        "bit v3.16b, v2.16b, v0.16b",
        "str d3, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcpit1 mm0, mm1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x0f 0xa6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcpit1 mm0, mm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x0f 0xa6",
      "ExpectedArm64ASM": []
    },
    "pfrsqit1 mm0, mm1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x0f 0xa7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrsqit1 mm0, mm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x0f 0xa7",
      "ExpectedArm64ASM": []
    },
    "pfsubr mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0xaa",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fsub v2.4s, v2.4s, v3.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfcmpeq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0xb0",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fcmeq v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfmul mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0xb4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "fmul v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcpit2 mm0, mm1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x0f 0xb6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcpit2 mm0, mm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x0f 0xb6",
      "ExpectedArm64ASM": []
    },
    "db 0x0f, 0x0f, 0xc1, 0xb7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "nasm doesn't support emitting this instruction",
        "pmulhrw mm0, mm1",
        "0x0f 0x0f 0xb7"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "smull v2.4s, v2.4h, v3.4h",
        "movi v3.4s, #0x80, lsl #8",
        "add v2.4s, v2.4s, v3.4s",
        "shrn v2.4h, v2.4s, #16",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pswapd mm0, mm1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x0f 0xbb",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "rev64 v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pavgusb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x0f 0xbf",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "urhadd v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/AddressingLimitations.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "Instructions that explicitly push against the limits of ARM's loadstore instructions"
  ],
  "Instructions": {
    "movzx rax, byte [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, byte [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, byte [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, byte [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, byte [ecx + 4095]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xfff (4095)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, byte [ecx + 4096]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x1000 (4096)",
        "mov w20, w20",
        "ldrb w4, [x20]"
      ]
    },
    "movzx rax, word [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx + 8190]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x1ffe",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx + 8191]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x1fff",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "movzx rax, word [ecx + 8192]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x2000 (8192)",
        "mov w20, w20",
        "ldrh w4, [x20]"
      ]
    },
    "mov eax, dword [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 16380]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffc",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 16381]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 16382]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffe",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 16383]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3fff",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov eax, dword [ecx + 16384]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x4000 (16384)",
        "mov w20, w20",
        "ldr w4, [x20]"
      ]
    },
    "mov rax, qword [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32760]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff8",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32761]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32762]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffa",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32763]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffb",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32764]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffc",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32765]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffd",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32766]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffe",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32767]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7fff",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "mov rax, qword [ecx + 32768]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x8000 (32768)",
        "mov w20, w20",
        "ldr x4, [x20]"
      ]
    },
    "movzx rax, byte [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldrb w4, [x7, x20, sxtx]"
      ]
    },
    "movzx rax, byte [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurb w4, [x7, #-256]"
      ]
    },
    "movzx rax, byte [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #255]"
      ]
    },
    "movzx rax, byte [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #256]"
      ]
    },
    "movzx rax, byte [rcx + 4095]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #4095]"
      ]
    },
    "movzx rax, byte [rcx + 4096]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x1000",
        "ldrb w4, [x7, x20, sxtx]"
      ]
    },
    "movzx rax, word [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "movzx rax, word [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurh w4, [x7, #-256]"
      ]
    },
    "movzx rax, word [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurh w4, [x7, #255]"
      ]
    },
    "movzx rax, word [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w4, [x7, #256]"
      ]
    },
    "movzx rax, word [rcx + 8190]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w4, [x7, #8190]"
      ]
    },
    "movzx rax, word [rcx + 8191]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x1fff",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "movzx rax, word [rcx + 8192]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x2000",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur w4, [x7, #-256]"
      ]
    },
    "mov eax, dword [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur w4, [x7, #255]"
      ]
    },
    "mov eax, dword [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w4, [x7, #256]"
      ]
    },
    "mov eax, dword [rcx + 16380]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w4, [x7, #16380]"
      ]
    },
    "mov eax, dword [rcx + 16381]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [rcx + 16382]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffe",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [rcx + 16383]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3fff",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [rcx + 16384]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x4000",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur x4, [x7, #-256]"
      ]
    },
    "mov rax, qword [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur x4, [x7, #255]"
      ]
    },
    "mov rax, qword [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr x4, [x7, #256]"
      ]
    },
    "mov rax, qword [rcx + 32760]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr x4, [x7, #32760]"
      ]
    },
    "mov rax, qword [rcx + 32761]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32762]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffa",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32763]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffb",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32764]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffc",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32765]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffd",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32766]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ffe",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32767]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7fff",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "mov rax, qword [rcx + 32768]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x8000",
        "ldr x4, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [rcx + 16379]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffb",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [rcx + 16380]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr s16, [x7, #16380]"
      ]
    },
    "movss xmm0, [rcx + 16381]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur s16, [x7, #-256]"
      ]
    },
    "movss xmm0, [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur s16, [x7, #255]"
      ]
    },
    "movss xmm0, [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr s16, [x7, #256]"
      ]
    },
    "movsd xmm0, [rcx + 32759]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff7",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movsd xmm0, [rcx + 32760]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr d16, [x7, #32760]"
      ]
    },
    "movsd xmm0, [rcx + 32761]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movsd xmm0, [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movsd xmm0, [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #-256]"
      ]
    },
    "movsd xmm0, [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #255]"
      ]
    },
    "movsd xmm0, [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr d16, [x7, #256]"
      ]
    },
    "movq xmm0, [rcx + 65519]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0xffef",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movq xmm0, [rcx + 65520]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff0",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movq xmm0, [rcx + 65521]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff1",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movq xmm0, [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movq xmm0, [rcx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #-256]"
      ]
    },
    "movq xmm0, [rcx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #255]"
      ]
    },
    "movq xmm0, [rcx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr d16, [x7, #256]"
      ]
    },
    "movss xmm0, [ecx + 16379]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffb",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx + 16380]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffc",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx + 16381]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movss xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr s16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 32759]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff7",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 32760]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff8",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 32761]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 65519]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0xffef",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 65520]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff0",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 65521]": {
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff1",
        "add x20, x7, x20",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "mov w20, w20",
        "ldr d16, [x20]"
      ]
    },
    "prefetch [rcx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x101 (257)",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rcx - 256]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "sub x20, x7, #0x100 (256)",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rcx + 255]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x7, #0xff (255)",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rcx + 256]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x7, #0x100 (256)",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rcx + 32760]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff8",
        "add x20, x7, x20",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rcx + 32761]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "add x20, x7, x20",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rax + rcx*1]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rax + rcx*2]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #1",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rax + rcx*4]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #2",
        "prfm pldl1keep, [x20]"
      ]
    },
    "prefetch [rax + rcx*8]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #3",
        "prfm pldl1keep, [x20]"
      ]
    },
    "movzx ebx, byte [rax + rcx*1]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w6, [x4, x7, sxtx]"
      ]
    },
    "movzx ebx, byte [rax + rcx*2]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #1",
        "ldrb w6, [x20]"
      ]
    },
    "movzx ebx, byte [rax + rcx*4]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #2",
        "ldrb w6, [x20]"
      ]
    },
    "movzx ebx, byte [rax + rcx*8]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #3",
        "ldrb w6, [x20]"
      ]
    },
    "movzx ebx, word [rax + rcx*1]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w6, [x4, x7, sxtx]"
      ]
    },
    "movzx ebx, word [rax + rcx*2]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w6, [x4, x7, sxtx #1]"
      ]
    },
    "movzx ebx, word [rax + rcx*4]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #2",
        "ldrh w6, [x20]"
      ]
    },
    "movzx ebx, word [rax + rcx*8]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #3",
        "ldrh w6, [x20]"
      ]
    },
    "mov ebx, [rax + rcx*1]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w6, [x4, x7, sxtx]"
      ]
    },
    "mov ebx, [rax + rcx*2]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #1",
        "ldr w6, [x20]"
      ]
    },
    "mov ebx, [rax + rcx*4]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w6, [x4, x7, sxtx #2]"
      ]
    },
    "mov ebx, [rax + rcx*8]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #3",
        "ldr w6, [x20]"
      ]
    },
    "mov rbx, [rax + rcx*1]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr x6, [x4, x7, sxtx]"
      ]
    },
    "mov rbx, [rax + rcx*2]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #1",
        "ldr x6, [x20]"
      ]
    },
    "mov rbx, [rax + rcx*4]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add x20, x4, x7, lsl #2",
        "ldr x6, [x20]"
      ]
    },
    "mov rbx, [rax + rcx*8]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr x6, [x4, x7, sxtx #3]"
      ]
    },
    "mov ebx, fs:0x14": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr x20, [x28, #1000]",
        "ldr w6, [x20, #20]"
      ]
    },
    "mov rbx, gs:0x14": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr x20, [x28, #992]",
        "ldur x6, [x20, #20]"
      ]
    },
    "Multiple segment registers": {
      "x86Insts": [
        "mov rax, gs:0x100",
        "mov rbx, gs:0x14"
      ],
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "ldr x20, [x28, #992]",
        "ldr x4, [x20, #256]",
        "ldr x20, [x28, #992]",
        "ldur x6, [x20, #20]"
      ]
    },
    "vmovdqu ymm7,yword [rsi+0x60]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldp q23, q2, [x10, #96]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi+0x120]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldp q23, q2, [x10, #288]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi-0x60]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldp q23, q2, [x10, #-96]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi-0x400]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldp q23, q2, [x10, #-1024]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi-0x420]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "sub x20, x10, #0x420 (1056)",
        "ldp q23, q2, [x20]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi+0x3d0]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldp q23, q2, [x10, #976]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqu ymm7,yword [rsi+0x400]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "add x20, x10, #0x400 (1024)",
        "ldp q23, q2, [x20]",
        "str q2, [x28, #304]"
      ]
    },
    "vmovdqa yword [rcx+0x60],ymm1": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "stp q17, q2, [x7, #96]"
      ]
    },
    "vmovdqa yword [rcx+0x3d0],ymm1": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "stp q17, q2, [x7, #976]"
      ]
    },
    "vmovdqa yword [rcx-0x3d0],ymm1": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "stp q17, q2, [x7, #-976]"
      ]
    },
    "vmovdqa yword [rcx+rsi-0x3d0],ymm1": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #208]",
        "add x20, x7, x10",
        "stp q17, q2, [x20, #-976]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/AddressingLimitations_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "Instructions that explicitly push against the limits of ARM's loadstore instructions"
  ],
  "Instructions": {
    "movzx eax, byte [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldrb w4, [x7, x20, sxtx]"
      ]
    },
    "movzx eax, byte [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurb w4, [x7, #-256]"
      ]
    },
    "movzx eax, byte [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #255]"
      ]
    },
    "movzx eax, byte [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #256]"
      ]
    },
    "movzx eax, byte [ecx + 4095]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrb w4, [x7, #4095]"
      ]
    },
    "movzx eax, byte [ecx + 4096]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x1000",
        "ldrb w4, [x7, x20, sxtx]"
      ]
    },
    "movzx eax, word [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "movzx eax, word [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurh w4, [x7, #-256]"
      ]
    },
    "movzx eax, word [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldurh w4, [x7, #255]"
      ]
    },
    "movzx eax, word [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w4, [x7, #256]"
      ]
    },
    "movzx eax, word [ecx + 8190]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldrh w4, [x7, #8190]"
      ]
    },
    "movzx eax, word [ecx + 8191]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x1fff",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "movzx eax, word [ecx + 8192]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x2000",
        "ldrh w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur w4, [x7, #-256]"
      ]
    },
    "mov eax, dword [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur w4, [x7, #255]"
      ]
    },
    "mov eax, dword [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w4, [x7, #256]"
      ]
    },
    "mov eax, dword [ecx + 16380]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr w4, [x7, #16380]"
      ]
    },
    "mov eax, dword [ecx + 16381]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [ecx + 16382]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffe",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [ecx + 16383]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3fff",
        "ldr w4, [x7, x20, sxtx]"
      ]
    },
    "mov eax, dword [ecx + 16384]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "add w20, w7, #0x4000 (16384)",
        "ldr w4, [x20]"
      ]
    },
    "movss xmm0, [ecx + 16379]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffb",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [ecx + 16380]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr s16, [x7, #16380]"
      ]
    },
    "movss xmm0, [ecx + 16381]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffd",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr s16, [x7, x20, sxtx]"
      ]
    },
    "movss xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur s16, [x7, #-256]"
      ]
    },
    "movss xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur s16, [x7, #255]"
      ]
    },
    "movss xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr s16, [x7, #256]"
      ]
    },
    "movsd xmm0, [ecx + 32759]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff7",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 32760]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff8",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx + 32761]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0x7ff9",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movsd xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movsd xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #-256]"
      ]
    },
    "movsd xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #255]"
      ]
    },
    "movsd xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr d16, [x7, #256]"
      ]
    },
    "movq xmm0, [ecx + 65519]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0xffef",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 65520]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff0",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx + 65521]": {
      "ExpectedInstructionCount": 3,
      "ExpectedArm64ASM": [
        "mov w20, #0xfff1",
        "add w20, w7, w20",
        "ldr d16, [x20]"
      ]
    },
    "movq xmm0, [ecx - 257]": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "mov x20, #0xfffffffffffffeff",
        "ldr d16, [x7, x20, sxtx]"
      ]
    },
    "movq xmm0, [ecx - 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #-256]"
      ]
    },
    "movq xmm0, [ecx + 255]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldur d16, [x7, #255]"
      ]
    },
    "movq xmm0, [ecx + 256]": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ldr d16, [x7, #256]"
      ]
    },
    "mov ebx, fs:0x14": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #1000]",
        "ldr w6, [x20, #20]"
      ]
    },
    "mov ebx, gs:0x14": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #992]",
        "ldr w6, [x20, #20]"
      ]
    },
    "Multiple segment registers": {
      "x86Insts": [
        "mov eax, gs:0x100",
        "mov ebx, gs:0x14"
      ],
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 4,
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #992]",
        "ldr w4, [x20, #256]",
        "ldr w20, [x28, #992]",
        "ldr w6, [x20, #20]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/MultiInst.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2",
      "FRINTTS",
      "MOPS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "These are instruction combinations that could be more optimal if FEX optimized for them"
  ],
  "Instructions": {
    "cpuid constant": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 11,
      "Comment": [
        "CPUID function call with constant function id"
      ],
      "x86Insts": [
        "mov rax, 0",
        "cpuid"
      ],
      "ExpectedArm64ASM": [
        "mov w4, #0x0",
        "isb",
        "mov w20, #0x16",
        "mov w6, #0x6547",
        "movk w6, #0x756e, lsl #16",
        "mov w21, #0x746e",
        "movk w21, #0x6c65, lsl #16",
        "mov w5, #0x6e69",
        "movk w5, #0x4965, lsl #16",
        "mov x7, x21",
        "mov x4, x20"
      ]
    },
    "xgetbv constant": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 3,
      "Comment": [
        "XGETBV function call with constant function id"
      ],
      "x86Insts": [
        "mov rcx, 0",
        "xgetbv"
      ],
      "ExpectedArm64ASM": [
        "mov w7, #0x0",
        "mov w4, #0x7",
        "mov w5, #0x0"
      ]
    },
    "signed div narrow": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 5,
      "Comment": [
        "div narrowing with known smaller sources",
        "dividend in rdx:rax"
      ],
      "x86Insts": [
        "cqo",
        "idiv rcx"
      ],
      "ExpectedArm64ASM": [
        "asr x5, x4, #63",
        "sdiv x20, x4, x7",
        "msub x22, x20, x7, x4",
        "mov x5, x22",
        "mov x4, x20"
      ]
    },
    "unsigned div narrow": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 5,
      "Comment": [
        "div narrowing with known smaller sources",
        "dividend in rdx:rax"
      ],
      "x86Insts": [
        "mov rdx, 0",
        "div rcx"
      ],
      "ExpectedArm64ASM": [
        "mov w5, #0x0",
        "udiv x20, x4, x7",
        "msub x22, x20, x7, x4",
        "mov x5, x22",
        "mov x4, x20"
      ]
    },
    "unsigned div narrow with flags": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "div narrowing with known smaller sources",
        "dividend in rdx:rax"
      ],
      "x86Insts": [
        "xor rdx, rdx",
        "div rcx"
      ],
      "ExpectedArm64ASM": [
        "subs w26, w5, w5",
        "mov w5, #0x0",
        "udiv x20, x4, x7",
        "msub x22, x20, x7, x4",
        "mov x5, x22",
        "mov x4, x20"
      ]
    },
    "signed div narrow 32-bit": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 10,
      "x86Insts": [
        "mov    eax, edi",
        "cdq",
        "idiv    esi"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w11",
        "asr w5, w4, #31",
        "mov w20, w10",
        "mov x0, x4",
        "bfi x0, x5, #32, #32",
        "sxtw x1, w20",
        "sdiv x22, x0, x1",
        "msub x21, x22, x1, x0",
        "mov w4, w22",
        "mov w5, w21"
      ]
    },
    "unsigned div narrow 32-bit": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 7,
      "x86Insts": [
        "mov    eax, edi",
        "xor    edx, edx",
        "div    esi"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w11",
        "subs w26, w5, w5",
        "mov w5, #0x0",
        "udiv w20, w4, w10",
        "msub w22, w20, w10, w4",
        "mov w4, w20",
        "mov w5, w22"
      ]
    },
    "push ax, bx": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Mergable 16-bit pushes. May or may not be an optimization."
      ],
      "x86Insts": [
        "push ax",
        "push bx"
      ],
      "ExpectedArm64ASM": [
        "strh w4, [x8, #-2]!",
        "strh w6, [x8, #-2]!"
      ]
    },
    "push rax, rbx": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Mergable 64-bit pushes"
      ],
      "x86Insts": [
        "push rax",
        "push rbx"
      ],
      "ExpectedArm64ASM": [
        "stp x6, x4, [x8, #-16]!"
      ]
    },
    "adds xmm0, xmm1, xmm2": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Redundant scalar adds that can get eliminated without AFP."
      ],
      "x86Insts": [
        "addss xmm0, xmm1",
        "addss xmm0, xmm2"
      ],
      "ExpectedArm64ASM": [
        "fadd s0, s16, s17",
        "mov v16.s[0], v0.s[0]",
        "fadd s0, s16, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "positive movsb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "movsb"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldrb w21, [x10]",
        "strb w21, [x11]",
        "add x10, x10, x20",
        "add x11, x11, x20",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive movsw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "movsw"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldrh w21, [x10]",
        "strh w21, [x11]",
        "add x10, x10, x20, lsl #1",
        "add x11, x11, x20, lsl #1",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive movsd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "movsd"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldr w21, [x10]",
        "str w21, [x11]",
        "add x10, x10, x20, lsl #2",
        "add x11, x11, x20, lsl #2",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive movsq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "movsq"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldr x21, [x10]",
        "str x21, [x11]",
        "add x10, x10, x20, lsl #3",
        "add x11, x11, x20, lsl #3",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative movsb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "movsb"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldrb w21, [x10]",
        "strb w21, [x11]",
        "add x10, x10, x20",
        "add x11, x11, x20",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative movsw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "movsw"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldrh w21, [x10]",
        "strh w21, [x11]",
        "add x10, x10, x20, lsl #1",
        "add x11, x11, x20, lsl #1",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative movsd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "movsd"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldr w21, [x10]",
        "str w21, [x11]",
        "add x10, x10, x20, lsl #2",
        "add x11, x11, x20, lsl #2",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative movsq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "movsq"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldr x21, [x10]",
        "str x21, [x11]",
        "add x10, x10, x20, lsl #3",
        "add x11, x11, x20, lsl #3",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive rep movsb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 54,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep movsb"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xa4",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x18",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x14",
        "ldrb w3, [x2], #1",
        "strb w3, [x1], #1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x22, x0, x2",
        "add x21, x1, x2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "positive rep movsw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 55,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep movsw"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #1",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "ldrh w3, [x2], #2",
        "strh w3, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x22, x0, x2, lsl #1",
        "add x21, x1, x2, lsl #1",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "positive rep movsd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 55,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep movsd"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #2",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "ldr w3, [x2], #4",
        "str w3, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x22, x0, x2, lsl #2",
        "add x21, x1, x2, lsl #2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "positive rep movsq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 55,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep movsq"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #3",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "ldr x3, [x2], #8",
        "str x3, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x22, x0, x2, lsl #3",
        "add x21, x1, x2, lsl #3",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "negative rep movsb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 62,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep movsb"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xc4",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x28",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x1 (1)",
        "add x2, x2, #0x1 (1)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1f (31)",
        "sub x2, x2, #0x1f (31)",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1f (31)",
        "add x2, x2, #0x1f (31)",
        "ldrb w3, [x2], #-1",
        "strb w3, [x1], #-1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x22, x0, x2",
        "sub x21, x1, x2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "negative rep movsw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 63,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep movsw"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #1",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x2 (2)",
        "add x2, x2, #0x2 (2)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1e (30)",
        "sub x2, x2, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1e (30)",
        "add x2, x2, #0x1e (30)",
        "ldrh w3, [x2], #-2",
        "strh w3, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x22, x0, x2, lsl #1",
        "sub x21, x1, x2, lsl #1",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "negative rep movsd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 63,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep movsd"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #2",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x4 (4)",
        "add x2, x2, #0x4 (4)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1c (28)",
        "sub x2, x2, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1c (28)",
        "add x2, x2, #0x1c (28)",
        "ldr w3, [x2], #-4",
        "str w3, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x22, x0, x2, lsl #2",
        "sub x21, x1, x2, lsl #2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "negative rep movsq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 63,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep movsq"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #3",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x8 (8)",
        "add x2, x2, #0x8 (8)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x18 (24)",
        "sub x2, x2, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x18 (24)",
        "add x2, x2, #0x18 (24)",
        "ldr x3, [x2], #-8",
        "str x3, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x22, x0, x2, lsl #3",
        "sub x21, x1, x2, lsl #3",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]",
        "mov x11, x22",
        "mov x10, x21"
      ]
    },
    "positive rep stosb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 13,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep stosb"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxtb w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x1c",
        "mrs x2, nzcv",
        "setp [x1]!, x0!, x21",
        "setm [x1]!, x0!, x21",
        "sete [x1]!, x0!, x21",
        "msr nzcv, x2",
        "add x11, x11, x7",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive rep stosw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 29,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep stosw"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxth w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w21",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x10",
        "strh w21, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #1",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive rep stosd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 29,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep stosd"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w21",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x10",
        "str w21, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "positive rep stosq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 28,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "cld",
        "rep stosq"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x10",
        "str x4, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #3",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative rep stosb": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 15,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep stosb"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "uxtb w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x24",
        "sub x1, x1, x0",
        "add x1, x1, #0x1 (1)",
        "mrs x2, nzcv",
        "setp [x1]!, x0!, x21",
        "setm [x1]!, x0!, x21",
        "sete [x1]!, x0!, x21",
        "msr nzcv, x2",
        "sub x11, x11, x7",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative rep stosw": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 31,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep stosw"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "uxth w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w21",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1e (30)",
        "strh w21, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #1",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative rep stosd": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 31,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep stosd"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov w21, w4",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w21",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1c (28)",
        "str w21, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #2",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "negative rep stosq": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 30,
      "Comment": [
        "When direction flag is a compile time constant we can optimize",
        "loads and stores can turn in to post-increment when known"
      ],
      "x86Insts": [
        "std",
        "rep stosq"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mov x0, x7",
        "mov x1, x11",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x18 (24)",
        "str x4, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #3",
        "mov w7, #0x0",
        "strb w20, [x28, #1018]"
      ]
    },
    "Sekiro spill block": {
      "x86InstructionCount": 119,
      "ExpectedInstructionCount": 115,
      "Comment": [
        "This block of code came from the settings screen when it loaded",
        "It was originally at RIP: 0x14232cca0 and has been deobfuscated"
      ],
      "x86Insts": [
        "mov    QWORD [rsp+0x8],rcx",
        "push   rbx",
        "push   rbp",
        "push   rsi",
        "push   rdi",
        "push   r12",
        "push   r13",
        "push   r14",
        "push   r15",
        "sub    rsp,0x18",
        "mov    ecx,dword [rdx+0x24]",
        "mov    esi,dword [rdx]",
        "mov    ebp,dword [rdx+0x4]",
        "mov    r14d,dword [rdx+0x8]",
        "mov    r15d,dword [rdx+0xc]",
        "mov    r12d,dword [rdx+0x10]",
        "mov    r13d,dword [rdx+0x14]",
        "mov    r11d,dword [rdx+0x18]",
        "mov    ebx,dword [rdx+0x1c]",
        "mov    edi,dword [rdx+0x20]",
        "imul   eax,ecx,0x13",
        "mov    dword [rsp+0x68],ecx",
        "add    eax,0x1000000",
        "shr    eax,0x19",
        "add    eax,esi",
        "sar    eax,0x1a",
        "add    eax,ebp",
        "sar    eax,0x19",
        "add    eax,r14d",
        "sar    eax,0x1a",
        "add    eax,r15d",
        "sar    eax,0x19",
        "add    eax,r12d",
        "sar    eax,0x1a",
        "add    eax,r13d",
        "sar    eax,0x19",
        "add    eax,r11d",
        "sar    eax,0x1a",
        "add    eax,ebx",
        "sar    eax,0x19",
        "add    eax,edi",
        "sar    eax,0x1a",
        "add    eax,ecx",
        "sar    eax,0x19",
        "imul   eax,eax,0x13",
        "add    esi,eax",
        "mov    eax,esi",
        "sar    eax,0x1a",
        "add    ebp,eax",
        "shl    eax,0x1a",
        "sub    esi,eax",
        "mov    ecx,ebp",
        "mov    rax,qword [rsp+0x60]",
        "sar    ecx,0x19",
        "add    r14d,ecx",
        "shl    ecx,0x19",
        "mov    edx,r14d",
        "sub    ebp,ecx",
        "sar    edx,0x1a",
        "add    r15d,edx",
        "mov    dword [rax],esi",
        "mov    r8d,r15d",
        "shl    edx,0x1a",
        "sar    r8d,0x19",
        "sub    r14d,edx",
        "add    r12d,r8d",
        "mov    dword [rax+0x4],ebp",
        "mov    r9d,r12d",
        "shl    r8d,0x19",
        "sar    r9d,0x1a",
        "sub    r15d,r8d",
        "add    r13d,r9d",
        "mov    dword [rax+0x8],r14d",
        "shl    r9d,0x1a",
        "mov    r10d,r13d",
        "sar    r10d,0x19",
        "sub    r12d,r9d",
        "add    r11d,r10d",
        "mov    dword [rax+0xc],r15d",
        "mov    dword [rsp+0x70],r11d",
        "mov    rsi,rax",
        "sar    r11d,0x1a",
        "add    ebx,r11d",
        "mov    dword [rax+0x10],r12d",
        "mov    dword [rsp+0x78],ebx",
        "sar    ebx,0x19",
        "add    edi,ebx",
        "mov    dword [rsp],edi",
        "sar    edi,0x1a",
        "add    dword [rsp+0x68],edi",
        "shl    r10d,0x19",
        "mov    ecx,dword [rsp+0x68]",
        "sub    r13d,r10d",
        "mov    dword [rax+0x14],r13d",
        "mov    eax,dword [rsp+0x70]",
        "shl    r11d,0x1a",
        "sub    eax,r11d",
        "shl    ebx,0x19",
        "mov    dword [rsi+0x18],eax",
        "mov    eax,dword [rsp+0x78]",
        "sub    eax,ebx",
        "shl    edi,0x1a",
        "mov    dword [rsi+0x1c],eax",
        "mov    eax,dword [rsp]",
        "sub    eax,edi",
        "mov    dword [rsi+0x20],eax",
        "mov    eax,ecx",
        "and    eax,0xfe000000",
        "sub    ecx,eax",
        "mov    dword [rsi+0x24],ecx",
        "add    rsp,0x18",
        "pop    r15",
        "pop    r14",
        "pop    r13",
        "pop    r12",
        "pop    rdi",
        "pop    rsi",
        "pop    rbp",
        "pop    rbx"
      ],
      "ExpectedArm64ASM": [
        "str x7, [x8, #8]",
        "stp x9, x6, [x8, #-16]!",
        "stp x11, x10, [x8, #-16]!",
        "stp x17, x16, [x8, #-16]!",
        "stp x29, x19, [x8, #-16]!",
        "sub x8, x8, #0x18 (24)",
        "ldr w7, [x5, #36]",
        "ldr w10, [x5]",
        "ldr w9, [x5, #4]",
        "ldr w19, [x5, #8]",
        "ldr w29, [x5, #12]",
        "ldr w16, [x5, #16]",
        "ldr w17, [x5, #20]",
        "ldr w15, [x5, #24]",
        "ldr w6, [x5, #28]",
        "ldr w11, [x5, #32]",
        "mov w20, #0x13",
        "mul w4, w7, w20",
        "str w7, [x8, #104]",
        "mov w21, #0x1000000",
        "add w4, w4, w21",
        "lsr w4, w4, #25",
        "add w4, w4, w10",
        "asr w4, w4, #26",
        "add w4, w4, w9",
        "asr w4, w4, #25",
        "add w4, w4, w19",
        "asr w4, w4, #26",
        "add w4, w4, w29",
        "asr w4, w4, #25",
        "add w4, w4, w16",
        "asr w4, w4, #26",
        "add w4, w4, w17",
        "asr w4, w4, #25",
        "add w4, w4, w15",
        "asr w4, w4, #26",
        "add w4, w4, w6",
        "asr w4, w4, #25",
        "add w4, w4, w11",
        "asr w4, w4, #26",
        "add w4, w4, w7",
        "asr w4, w4, #25",
        "mul w4, w4, w20",
        "add w10, w10, w4",
        "asr w4, w10, #26",
        "add w9, w9, w4",
        "lsl w4, w4, #26",
        "sub w10, w10, w4",
        "mov w7, w9",
        "ldr x4, [x8, #96]",
        "asr w7, w7, #25",
        "add w19, w19, w7",
        "lsl w7, w7, #25",
        "mov w5, w19",
        "sub w9, w9, w7",
        "asr w5, w5, #26",
        "add w29, w29, w5",
        "str w10, [x4]",
        "mov w12, w29",
        "lsl w5, w5, #26",
        "asr w12, w12, #25",
        "sub w19, w19, w5",
        "add w16, w16, w12",
        "str w9, [x4, #4]",
        "mov w13, w16",
        "lsl w12, w12, #25",
        "asr w13, w13, #26",
        "sub w29, w29, w12",
        "add w17, w17, w13",
        "str w19, [x4, #8]",
        "lsl w13, w13, #26",
        "asr w14, w17, #25",
        "sub w16, w16, w13",
        "add w15, w15, w14",
        "str w29, [x4, #12]",
        "str w15, [x8, #112]",
        "mov x10, x4",
        "asr w15, w15, #26",
        "add w6, w6, w15",
        "str w16, [x4, #16]",
        "str w6, [x8, #120]",
        "asr w6, w6, #25",
        "add w11, w11, w6",
        "str w11, [x8]",
        "asr w11, w11, #26",
        "ldr w20, [x8, #104]",
        "add w20, w20, w11",
        "str w20, [x8, #104]",
        "lsl w14, w14, #25",
        "ldr w7, [x8, #104]",
        "sub w17, w17, w14",
        "str w17, [x4, #20]",
        "ldr w4, [x8, #112]",
        "lsl w15, w15, #26",
        "sub w4, w4, w15",
        "lsl w6, w6, #25",
        "str w4, [x10, #24]",
        "ldr w4, [x8, #120]",
        "sub w4, w4, w6",
        "lsl w11, w11, #26",
        "str w4, [x10, #28]",
        "ldr w4, [x8]",
        "sub w4, w4, w11",
        "str w4, [x10, #32]",
        "and w4, w7, #0xfe000000",
        "sub w7, w7, w4",
        "str w7, [x10, #36]",
        "mvn w27, w8",
        "adds x26, x8, #0x18 (24)",
        "mov x8, x26",
        "ldp x29, x19, [x8], #16",
        "ldp x17, x16, [x8], #16",
        "ldp x11, x10, [x8], #16",
        "ldp x9, x6, [x8], #16",
        "cfinv"
      ]
    },
    "Control - random block using cvtss2si 1": {
      "x86InstructionCount": 7,
      "ExpectedInstructionCount": 13,
      "x86Insts": [
        "mov    rcx,rdx",
        "cvttss2si rax,xmm1",
        "add    rax,rcx",
        "mov    qword [rsp],rax",
        "mulss  xmm0,dword [rbp]",
        "xor    ecx,ecx",
        "comiss xmm0,xmm2"
      ],
      "ExpectedArm64ASM": [
        "mov x7, x5",
        "frint64z s2, s17",
        "fcvtzs x4, s2",
        "add x4, x4, x7",
        "str x4, [x8]",
        "ldr s2, [x9]",
        "fmul s0, s16, s2",
        "mov v16.s[0], v0.s[0]",
        "mov w7, #0x0",
        "fcmp s16, s18",
        "cset x26, vc",
        "axflag",
        "mov x27, x7"
      ]
    },
    "Control - random block using cvtss2si 2": {
      "x86InstructionCount": 6,
      "ExpectedInstructionCount": 9,
      "x86Insts": [
        "movss  xmm1,dword [rbp+0x40]",
        "roundss xmm1,xmm1,0x1",
        "cvtss2si eax,xmm1",
        "mov    dword [r15+0xb4],eax",
        "mov    dword [rbp+0x48],0x3f800000",
        "test   r14,r14"
      ],
      "ExpectedArm64ASM": [
        "ldr s17, [x9, #64]",
        "frintm s0, s17",
        "mov v17.s[0], v0.s[0]",
        "frint32x s2, s17",
        "fcvtzs w4, s2",
        "str w4, [x29, #180]",
        "mov w20, #0x3f800000",
        "str w20, [x9, #72]",
        "subs x26, x19, #0x0 (0)"
      ]
    },
    "Long-lived ymm_high test": {
      "x86InstructionCount": 16,
      "ExpectedInstructionCount": 132,
      "Comment": [
        "Inspired from a Geekbench benchmark hammering this instruction",
        "Keeps a bunch of ymm_high values live that can get spilled in to spill-slots",
        "These can instead be spilled back in to the context without any stack usage",
        "Useful to ensure we aren't evicting cachelines unnnecessarily"
      ],
      "x86Insts": [
        "vpmaddwd ymm0, ymm1, ymm15",
        "vpmaddwd ymm1, ymm2, ymm14",
        "vpmaddwd ymm2, ymm3, ymm13",
        "vpmaddwd ymm3, ymm4, ymm12",
        "vpmaddwd ymm4, ymm5, ymm11",
        "vpmaddwd ymm5, ymm6, ymm10",
        "vpmaddwd ymm6, ymm7, ymm9",
        "vpmaddwd ymm7, ymm8, ymm8",
        "vpmaddwd ymm8, ymm9, ymm7",
        "vpmaddwd ymm9, ymm10, ymm6",
        "vpmaddwd ymm10, ymm11, ymm5",
        "vpmaddwd ymm11, ymm12, ymm4",
        "vpmaddwd ymm12, ymm13, ymm3",
        "vpmaddwd ymm13, ymm14, ymm2",
        "vpmaddwd ymm14, ymm15, ymm1",
        "vpmaddwd ymm15, ymm0, ymm0"
      ],
      "ExpectedArm64ASM": [
        "sub sp, sp, #0xa0 (160)",
        "ldr q2, [x28, #208]",
        "ldr q3, [x28, #432]",
        "smull v4.4s, v17.4h, v31.4h",
        "smull2 v5.4s, v17.8h, v31.8h",
        "addp v16.4s, v4.4s, v5.4s",
        "smull v4.4s, v2.4h, v3.4h",
        "smull2 v2.4s, v2.8h, v3.8h",
        "addp v2.4s, v4.4s, v2.4s",
        "ldr q4, [x28, #224]",
        "ldr q5, [x28, #416]",
        "smull v6.4s, v18.4h, v30.4h",
        "smull2 v7.4s, v18.8h, v30.8h",
        "addp v17.4s, v6.4s, v7.4s",
        "smull v6.4s, v4.4h, v5.4h",
        "smull2 v4.4s, v4.8h, v5.8h",
        "addp v4.4s, v6.4s, v4.4s",
        "ldr q6, [x28, #240]",
        "ldr q7, [x28, #400]",
        "smull v8.4s, v19.4h, v29.4h",
        "smull2 v9.4s, v19.8h, v29.8h",
        "addp v18.4s, v8.4s, v9.4s",
        "smull v8.4s, v6.4h, v7.4h",
        "smull2 v6.4s, v6.8h, v7.8h",
        "addp v6.4s, v8.4s, v6.4s",
        "ldr q8, [x28, #256]",
        "ldr q9, [x28, #384]",
        "smull v10.4s, v20.4h, v28.4h",
        "smull2 v11.4s, v20.8h, v28.8h",
        "addp v19.4s, v10.4s, v11.4s",
        "smull v10.4s, v8.4h, v9.4h",
        "smull2 v8.4s, v8.8h, v9.8h",
        "addp v8.4s, v10.4s, v8.4s",
        "ldr q10, [x28, #272]",
        "ldr q11, [x28, #368]",
        "smull v12.4s, v21.4h, v27.4h",
        "smull2 v13.4s, v21.8h, v27.8h",
        "addp v20.4s, v12.4s, v13.4s",
        "smull v12.4s, v10.4h, v11.4h",
        "smull2 v10.4s, v10.8h, v11.8h",
        "addp v10.4s, v12.4s, v10.4s",
        "ldr q12, [x28, #288]",
        "ldr q13, [x28, #352]",
        "smull v14.4s, v22.4h, v26.4h",
        "smull2 v15.4s, v22.8h, v26.8h",
        "addp v21.4s, v14.4s, v15.4s",
        "smull v14.4s, v12.4h, v13.4h",
        "smull2 v12.4s, v12.8h, v13.8h",
        "addp v12.4s, v14.4s, v12.4s",
        "ldr q14, [x28, #304]",
        "ldr q15, [x28, #336]",
        "str q2, [sp]",
        "smull v2.4s, v23.4h, v25.4h",
        "str q3, [sp, #32]",
        "smull2 v3.4s, v23.8h, v25.8h",
        "addp v22.4s, v2.4s, v3.4s",
        "smull v2.4s, v14.4h, v15.4h",
        "smull2 v3.4s, v14.8h, v15.8h",
        "addp v2.4s, v2.4s, v3.4s",
        "ldr q3, [x28, #320]",
        "smull v14.4s, v24.4h, v24.4h",
        "str q4, [sp, #64]",
        "smull2 v4.4s, v24.8h, v24.8h",
        "addp v23.4s, v14.4s, v4.4s",
        "smull v4.4s, v3.4h, v3.4h",
        "smull2 v3.4s, v3.8h, v3.8h",
        "addp v3.4s, v4.4s, v3.4s",
        "smull v4.4s, v25.4h, v23.4h",
        "smull2 v14.4s, v25.8h, v23.8h",
        "addp v24.4s, v4.4s, v14.4s",
        "smull v4.4s, v15.4h, v3.4h",
        "smull2 v14.4s, v15.8h, v3.8h",
        "addp v4.4s, v4.4s, v14.4s",
        "smull v14.4s, v26.4h, v22.4h",
        "smull2 v15.4s, v26.8h, v22.8h",
        "addp v25.4s, v14.4s, v15.4s",
        "smull v14.4s, v13.4h, v2.4h",
        "smull2 v13.4s, v13.8h, v2.8h",
        "addp v13.4s, v14.4s, v13.4s",
        "smull v14.4s, v27.4h, v21.4h",
        "smull2 v15.4s, v27.8h, v21.8h",
        "addp v26.4s, v14.4s, v15.4s",
        "smull v14.4s, v11.4h, v12.4h",
        "smull2 v11.4s, v11.8h, v12.8h",
        "addp v11.4s, v14.4s, v11.4s",
        "smull v14.4s, v28.4h, v20.4h",
        "smull2 v15.4s, v28.8h, v20.8h",
        "addp v27.4s, v14.4s, v15.4s",
        "smull v14.4s, v9.4h, v10.4h",
        "smull2 v9.4s, v9.8h, v10.8h",
        "addp v9.4s, v14.4s, v9.4s",
        "smull v14.4s, v29.4h, v19.4h",
        "smull2 v15.4s, v29.8h, v19.8h",
        "addp v28.4s, v14.4s, v15.4s",
        "smull v14.4s, v7.4h, v8.4h",
        "smull2 v7.4s, v7.8h, v8.8h",
        "addp v7.4s, v14.4s, v7.4s",
        "smull v14.4s, v30.4h, v18.4h",
        "smull2 v15.4s, v30.8h, v18.8h",
        "addp v29.4s, v14.4s, v15.4s",
        "smull v14.4s, v5.4h, v6.4h",
        "smull2 v5.4s, v5.8h, v6.8h",
        "addp v5.4s, v14.4s, v5.4s",
        "smull v14.4s, v31.4h, v17.4h",
        "smull2 v15.4s, v31.8h, v17.8h",
        "addp v30.4s, v14.4s, v15.4s",
        "ldr q14, [sp, #32]",
        "ldr q15, [sp, #64]",
        "str q6, [sp, #96]",
        "smull v6.4s, v14.4h, v15.4h",
        "smull2 v14.4s, v14.8h, v15.8h",
        "addp v6.4s, v6.4s, v14.4s",
        "smull v14.4s, v16.4h, v16.4h",
        "smull2 v15.4s, v16.8h, v16.8h",
        "addp v31.4s, v14.4s, v15.4s",
        "ldr q14, [sp]",
        "smull v15.4s, v14.4h, v14.4h",
        "str q8, [sp, #128]",
        "smull2 v8.4s, v14.8h, v14.8h",
        "addp v8.4s, v15.4s, v8.4s",
        "stp q6, q8, [x28, #416]",
        "stp q7, q5, [x28, #384]",
        "stp q11, q9, [x28, #352]",
        "stp q4, q13, [x28, #320]",
        "stp q2, q3, [x28, #288]",
        "stp q10, q12, [x28, #256]",
        "ldr q2, [sp, #96]",
        "ldr q3, [sp, #128]",
        "stp q2, q3, [x28, #224]",
        "ldr q2, [sp, #64]",
        "stp q14, q2, [x28, #192]",
        "add sp, sp, #0xa0 (160)"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/MultiInst_32bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "These are instruction combinations that could be more optimal if FEX optimized for them"
  ],
  "Instructions": {
    "Load variables from structs": {
      "x86InstructionCount": 7,
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned",
        "Loads a bunch of values from structs passed as arguments",
        "Loads failed to use LRCPC2/ldapur with small immediate offset when TSO is enabled, but is fine when TSO isn't enabled."
      ],
      "x86Insts": [
        "mov edi, [ecx + 8]",
        "mov edx, [ecx + 4]",
        "mov ebx, [ecx]",
        "mov esi, [ecx + 0xc]",
        "imul edx, edi",
        "mov eax, [ebx + 0xc]",
        "sub eax, [ebx + 4]"
      ],
      "ExpectedArm64ASM": [
        "ldr w11, [x7, #8]",
        "ldr w5, [x7, #4]",
        "ldr w6, [x7]",
        "ldr w10, [x7, #12]",
        "mul w5, w5, w11",
        "ldr w4, [x6, #12]",
        "ldr w20, [x6, #4]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "mov x4, x26"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/MultiInst_AFP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES"
    ]
  },
  "Comment": [
    "These are instruction combinations that could be more optimal if FEX optimized for them"
  ],
  "Instructions": {
    "adds xmm0, xmm1, xmm2": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Redundant scalar operations should get eliminated with AFP"
      ],
      "x86Insts": [
        "addss xmm0, xmm1",
        "addss xmm0, xmm2"
      ],
      "ExpectedArm64ASM": [
        "fadd s16, s16, s17",
        "fadd s16, s16, s18"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/MultiInst_TSO.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "TSO",
      "LRCPC",
      "LRCPC2",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "These are instruction combinations that could be more optimal if FEX optimized for them"
  ],
  "Instructions": {
    "Load variables from memory": {
      "x86InstructionCount": 6,
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Just to ensure small atomic offset loads are using LRCPC2"
      ],
      "x86Insts": [
        "mov edi, [rcx]",
        "mov edx, [rcx + 4]",
        "mov rbx, [rcx + 8]",
        "mov rsi, [rcx + 16]",
        "mov ax, [rcx + 24]",
        "mov bl, [rcx + 26]"
      ],
      "ExpectedArm64ASM": [
        "ldapur w11, [x7]",
        "nop",
        "ldapur w5, [x7, #4]",
        "nop",
        "ldapur x6, [x7, #8]",
        "nop",
        "ldapur x10, [x7, #16]",
        "nop",
        "ldapurh w20, [x7, #24]",
        "nop",
        "bfxil x4, x20, #0, #16",
        "ldapurb w20, [x7, #26]",
        "bfxil x6, x20, #0, #8"
      ]
    },
    "Store variables to memory": {
      "x86InstructionCount": 6,
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Just to ensure small atomic offset stores are using LRCPC2"
      ],
      "x86Insts": [
        "mov [rcx], edi",
        "mov [rcx + 4], edx",
        "mov [rcx + 8], rbx",
        "mov [rcx + 16], rsi",
        "mov [rcx + 24], ax",
        "mov [rcx + 26], bl"
      ],
      "ExpectedArm64ASM": [
        "nop",
        "stlur w11, [x7]",
        "nop",
        "stlur w5, [x7, #4]",
        "nop",
        "stlur x6, [x7, #8]",
        "nop",
        "stlur x10, [x7, #16]",
        "nop",
        "stlurh w4, [x7, #24]",
        "stlurb w6, [x7, #26]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/MultiInst_TSO_32bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "TSO",
      "LRCPC",
      "LRCPC2",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [
    "These are instruction combinations that could be more optimal if FEX optimized for them"
  ],
  "Instructions": {
    "Load variables from structs": {
      "x86InstructionCount": 7,
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Saw this in 32-bit libvulkan_freedreno.so:tu_cs_begin_sub_stream_aligned",
        "Loads a bunch of values from structs passed as arguments",
        "Loads failed to use LRCPC2/ldapur with small immediate offset when possible"
      ],
      "x86Insts": [
        "mov edi, [ecx + 8]",
        "mov edx, [ecx + 4]",
        "mov ebx, [ecx]",
        "mov esi, [ecx + 0xc]",
        "imul edx, edi",
        "mov eax, [ebx + 0xc]",
        "sub eax, [ebx + 4]"
      ],
      "ExpectedArm64ASM": [
        "ldapur w11, [x7, #8]",
        "nop",
        "ldapur w5, [x7, #4]",
        "nop",
        "ldapur w6, [x7]",
        "nop",
        "ldapur w10, [x7, #12]",
        "nop",
        "mul w5, w5, w11",
        "ldapur w4, [x6, #12]",
        "nop",
        "ldapur w20, [x6, #4]",
        "nop",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "mov x4, x26"
      ]
    },
    "Load variables from memory": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Just to ensure small atomic offset loads are using LRCPC2"
      ],
      "x86Insts": [
        "mov edi, [ecx]",
        "mov edx, [ecx + 4]",
        "mov ax, [ecx + 24]",
        "mov bl, [ecx + 26]"
      ],
      "ExpectedArm64ASM": [
        "ldapur w11, [x7]",
        "nop",
        "ldapur w5, [x7, #4]",
        "nop",
        "ldapurh w20, [x7, #24]",
        "nop",
        "bfxil x4, x20, #0, #16",
        "ldapurb w20, [x7, #26]",
        "bfxil x6, x20, #0, #8"
      ]
    },
    "Store variables to memory": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Just to ensure small atomic offset stores are using LRCPC2"
      ],
      "x86Insts": [
        "mov [ecx], edi",
        "mov [ecx + 4], edx",
        "mov [ecx + 24], ax",
        "mov [ecx + 26], bl"
      ],
      "ExpectedArm64ASM": [
        "nop",
        "stlur w11, [x7]",
        "nop",
        "stlur w5, [x7, #4]",
        "nop",
        "stlurh w4, [x7, #24]",
        "stlurb w6, [x7, #26]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FEXOpt/libnss.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "CRYPTO",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Comment": [],
  "Instructions": {
    "libnss3 sha": {
      "x86InstructionCount": 168,
      "ExpectedInstructionCount": 521,
      "Comment": [
        "This block of code comes from libnss3 which causes panic spilling in FEX's RA.",
        "This code is hit in steamwebhelper calling in to this function.",
        "No correct behaviour to return here, just need to compare output logs and ensure panic spilling doesn't occur."
      ],
      "x86Insts": [
        "endbr64",
        "movdqu  xmm2, [rdi+0x100]",
        "movdqu  xmm3, [rdi+0x110]",
        "movdqu  xmm8, [rdi]",
        "movdqu  xmm7, [rdi+0x10]",
        "pshufd  xmm0, xmm2, 0xb1",
        "pshufd  xmm2, xmm3, 0x1b",
        "movdqu  xmm6, [rdi+0x20]",
        "movdqu  xmm5, [rdi+0x30]",
        "movdqa  xmm3, xmm0",
        "palignr xmm3, xmm2, 0x8",
        "pblendw xmm2, xmm0, 0xf0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "movdqa  xmm4, xmm2",
        "movdqa  xmm1, xmm3",
        "pshufb  xmm5, xmm0",
        "pshufb  xmm6, xmm0",
        "pshufb  xmm7, xmm0",
        "pshufb  xmm8, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "paddd   xmm0, xmm8",
        "sha256msg1 xmm8, xmm7",
        "sha256rnds2 xmm4, xmm3",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm5",
        "palignr xmm0, xmm6, 0x4",
        "paddd   xmm8, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm8, xmm5",
        "paddd   xmm0, xmm7",
        "sha256msg1 xmm7, xmm6",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm8",
        "palignr xmm0, xmm5, 0x4",
        "paddd   xmm7, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm7, xmm8",
        "paddd   xmm0, xmm6",
        "sha256msg1 xmm6, xmm5",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm7",
        "palignr xmm0, xmm8, 0x4",
        "paddd   xmm6, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm6, xmm7",
        "paddd   xmm0, xmm5",
        "sha256msg1 xmm5, xmm8",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm6",
        "palignr xmm0, xmm7, 0x4",
        "paddd   xmm5, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm5, xmm6",
        "paddd   xmm0, xmm8",
        "sha256msg1 xmm8, xmm7",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm5",
        "palignr xmm0, xmm6, 0x4",
        "paddd   xmm8, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm8, xmm5",
        "paddd   xmm0, xmm7",
        "sha256msg1 xmm7, xmm6",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm8",
        "palignr xmm0, xmm5, 0x4",
        "paddd   xmm7, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm7, xmm8",
        "paddd   xmm0, xmm6",
        "sha256msg1 xmm6, xmm5",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm7",
        "palignr xmm0, xmm8, 0x4",
        "paddd   xmm6, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm6, xmm7",
        "paddd   xmm0, xmm5",
        "sha256msg1 xmm5, xmm8",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm6",
        "palignr xmm0, xmm7, 0x4",
        "paddd   xmm5, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm5, xmm6",
        "paddd   xmm0, xmm8",
        "sha256msg1 xmm8, xmm7",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm5",
        "palignr xmm0, xmm6, 0x4",
        "paddd   xmm8, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm8, xmm5",
        "paddd   xmm0, xmm7",
        "sha256msg1 xmm7, xmm6",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm8",
        "palignr xmm0, xmm5, 0x4",
        "paddd   xmm7, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm7, xmm8",
        "paddd   xmm0, xmm6",
        "sha256msg1 xmm6, xmm5",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm7",
        "palignr xmm0, xmm8, 0x4",
        "paddd   xmm6, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm6, xmm7",
        "paddd   xmm0, xmm5",
        "sha256msg1 xmm5, xmm8",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm6",
        "palignr xmm0, xmm7, 0x4",
        "paddd   xmm7, [r15 + 0x1_1000]",
        "paddd   xmm5, xmm0",
        "movdqa  xmm0, [r15 + 0x1_1000]",
        "sha256msg2 xmm5, xmm6",
        "paddd   xmm6, [r15 + 0x1_1000]",
        "paddd   xmm5, [r15 + 0x1_1000]",
        "paddd   xmm0, xmm8",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm0, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm7",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm7, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm6",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm6, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "movdqa  xmm0, xmm5",
        "sha256rnds2 xmm4, xmm1",
        "pshufd  xmm0, xmm5, 0xe",
        "sha256rnds2 xmm1, xmm4",
        "paddd   xmm4, xmm2",
        "paddd   xmm1, xmm3",
        "pshufd  xmm4, xmm4, 0xb1",
        "pshufd  xmm1, xmm1, 0x1b",
        "movdqa  xmm0, xmm1",
        "pblendw xmm0, xmm4, 0xf0",
        "palignr xmm4, xmm1, 0x8",
        "movups  [rdi+0x100], xmm0",
        "movups  [rdi+0x110], xmm4"
      ],
      "ExpectedArm64ASM": [
        "ldr q18, [x11, #256]",
        "ldr q19, [x11, #272]",
        "ldr q24, [x11]",
        "ldr q23, [x11, #16]",
        "rev64 v16.4s, v18.4s",
        "rev64 v2.4s, v19.4s",
        "ext v18.16b, v2.16b, v2.16b, #8",
        "ldr q22, [x11, #32]",
        "ldr q21, [x11, #48]",
        "ext v19.16b, v18.16b, v16.16b, #8",
        "mov v18.d[1], v16.d[1]",
        "mov w20, #0x1000",
        "movk w20, #0x1, lsl #16",
        "ldr q16, [x29, x20, sxtx]",
        "mov v20.16b, v18.16b",
        "mov v17.16b, v19.16b",
        "movi v2.16b, #0x8f",
        "and v2.16b, v16.16b, v2.16b",
        "tbl v21.16b, {v21.16b}, v2.16b",
        "movi v2.16b, #0x8f",
        "and v2.16b, v16.16b, v2.16b",
        "tbl v22.16b, {v22.16b}, v2.16b",
        "movi v2.16b, #0x8f",
        "and v2.16b, v16.16b, v2.16b",
        "tbl v23.16b, {v23.16b}, v2.16b",
        "movi v2.16b, #0x8f",
        "and v2.16b, v16.16b, v2.16b",
        "tbl v24.16b, {v24.16b}, v2.16b",
        "ldr q16, [x29, x20, sxtx]",
        "add v16.4s, v16.4s, v24.4s",
        "sha256su0 v24.4s, v23.4s",
        "zip2 v2.2d, v19.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v19.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v22.16b, v21.16b, #4",
        "add v24.4s, v24.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v24.16b, v24.16b, #12",
        "dup v3.4s, v24.s[3]",
        "zip2 v3.2d, v3.2d, v21.2d",
        "movi v24.2d, #0x0",
        "sha256su1 v24.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v23.4s",
        "sha256su0 v23.4s, v22.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v21.16b, v24.16b, #4",
        "add v23.4s, v23.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v23.16b, v23.16b, #12",
        "dup v3.4s, v23.s[3]",
        "zip2 v3.2d, v3.2d, v24.2d",
        "movi v23.2d, #0x0",
        "sha256su1 v23.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v22.4s",
        "sha256su0 v22.4s, v21.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v24.16b, v23.16b, #4",
        "add v22.4s, v22.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v22.16b, v22.16b, #12",
        "dup v3.4s, v22.s[3]",
        "zip2 v3.2d, v3.2d, v23.2d",
        "movi v22.2d, #0x0",
        "sha256su1 v22.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v21.4s",
        "sha256su0 v21.4s, v24.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v23.16b, v22.16b, #4",
        "add v21.4s, v21.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v21.16b, v21.16b, #12",
        "dup v3.4s, v21.s[3]",
        "zip2 v3.2d, v3.2d, v22.2d",
        "movi v21.2d, #0x0",
        "sha256su1 v21.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v24.4s",
        "sha256su0 v24.4s, v23.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v22.16b, v21.16b, #4",
        "add v24.4s, v24.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v24.16b, v24.16b, #12",
        "dup v3.4s, v24.s[3]",
        "zip2 v3.2d, v3.2d, v21.2d",
        "movi v24.2d, #0x0",
        "sha256su1 v24.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v23.4s",
        "sha256su0 v23.4s, v22.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v21.16b, v24.16b, #4",
        "add v23.4s, v23.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v23.16b, v23.16b, #12",
        "dup v3.4s, v23.s[3]",
        "zip2 v3.2d, v3.2d, v24.2d",
        "movi v23.2d, #0x0",
        "sha256su1 v23.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v22.4s",
        "sha256su0 v22.4s, v21.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v24.16b, v23.16b, #4",
        "add v22.4s, v22.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v22.16b, v22.16b, #12",
        "dup v3.4s, v22.s[3]",
        "zip2 v3.2d, v3.2d, v23.2d",
        "movi v22.2d, #0x0",
        "sha256su1 v22.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v21.4s",
        "sha256su0 v21.4s, v24.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v23.16b, v22.16b, #4",
        "add v21.4s, v21.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v21.16b, v21.16b, #12",
        "dup v3.4s, v21.s[3]",
        "zip2 v3.2d, v3.2d, v22.2d",
        "movi v21.2d, #0x0",
        "sha256su1 v21.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v24.4s",
        "sha256su0 v24.4s, v23.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v22.16b, v21.16b, #4",
        "add v24.4s, v24.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v24.16b, v24.16b, #12",
        "dup v3.4s, v24.s[3]",
        "zip2 v3.2d, v3.2d, v21.2d",
        "movi v24.2d, #0x0",
        "sha256su1 v24.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v23.4s",
        "sha256su0 v23.4s, v22.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v21.16b, v24.16b, #4",
        "add v23.4s, v23.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v23.16b, v23.16b, #12",
        "dup v3.4s, v23.s[3]",
        "zip2 v3.2d, v3.2d, v24.2d",
        "movi v23.2d, #0x0",
        "sha256su1 v23.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v22.4s",
        "sha256su0 v22.4s, v21.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v24.16b, v23.16b, #4",
        "add v22.4s, v22.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v22.16b, v22.16b, #12",
        "dup v3.4s, v22.s[3]",
        "zip2 v3.2d, v3.2d, v23.2d",
        "movi v22.2d, #0x0",
        "sha256su1 v22.4s, v2.4s, v3.4s",
        "add v16.4s, v16.4s, v21.4s",
        "sha256su0 v21.4s, v24.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "ext v16.16b, v23.16b, v22.16b, #4",
        "ldr q2, [x29, x20, sxtx]",
        "add v23.4s, v23.4s, v2.4s",
        "add v21.4s, v21.4s, v16.4s",
        "ldr q16, [x29, x20, sxtx]",
        "ext v2.16b, v21.16b, v21.16b, #12",
        "dup v3.4s, v21.s[3]",
        "zip2 v3.2d, v3.2d, v22.2d",
        "movi v21.2d, #0x0",
        "sha256su1 v21.4s, v2.4s, v3.4s",
        "ldr q2, [x29, x20, sxtx]",
        "add v22.4s, v22.4s, v2.4s",
        "ldr q2, [x29, x20, sxtx]",
        "add v21.4s, v21.4s, v2.4s",
        "add v16.4s, v16.4s, v24.4s",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v16.s[0]",
        "ext v16.16b, v16.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "mov v16.16b, v23.16b",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v23.s[0]",
        "ext v16.16b, v23.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "mov v16.16b, v22.16b",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v22.s[0]",
        "ext v16.16b, v22.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "mov v16.16b, v21.16b",
        "zip2 v2.2d, v17.2d, v20.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v17.2d, v20.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v20.4s, v2.4s",
        "dup v2.4s, v21.s[0]",
        "ext v16.16b, v21.16b, v2.16b, #8",
        "zip2 v2.2d, v20.2d, v17.2d",
        "rev64 v2.4s, v2.4s",
        "zip1 v3.2d, v20.2d, v17.2d",
        "rev64 v3.4s, v3.4s",
        "dup v4.2d, v16.d[0]",
        "mov v5.16b, v2.16b",
        "sha256h q5, q3, v4.4s",
        "sha256h2 q3, q2, v4.4s",
        "zip2 v2.2d, v3.2d, v5.2d",
        "rev64 v17.4s, v2.4s",
        "add v20.4s, v20.4s, v18.4s",
        "add v17.4s, v17.4s, v19.4s",
        "rev64 v20.4s, v20.4s",
        "rev64 v2.4s, v17.4s",
        "ext v17.16b, v2.16b, v2.16b, #8",
        "mov v16.16b, v17.16b",
        "mov v16.d[1], v20.d[1]",
        "ext v20.16b, v17.16b, v20.16b, #8",
        "str q16, [x11, #256]",
        "str q20, [x11, #272]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Atomics.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "lock add byte [rax], cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x00",
      "ExpectedArm64ASM": [
        "ldaddalb w7, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #24",
        "cmn w0, w7, lsl #24",
        "add w26, w20, w7",
        "cfinv"
      ]
    },
    "lock add word [rax], cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddalh w7, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #16",
        "cmn w0, w7, lsl #16",
        "add w26, w20, w7",
        "cfinv"
      ]
    },
    "lock add dword [rax], ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddal w7, w20, [x4]",
        "eor x27, x20, x7",
        "adds w26, w20, w7",
        "cfinv"
      ]
    },
    "lock or byte [rax], cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x08",
      "ExpectedArm64ASM": [
        "ldsetalb w7, w20, [x4]",
        "orr w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock or word [rax], cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "ldsetalh w7, w20, [x4]",
        "orr w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock or dword [rax], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "ldsetal w7, w20, [x4]",
        "orr w20, w20, w7",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock adc byte [rax], cl": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x10",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddalb w20, w20, [x4]",
        "eor x27, x20, x7",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "add w23, w20, w22",
        "uxtb w26, w23",
        "cmp w26, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "bic w20, w20, w21",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock adc word [rax], cx": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddalh w20, w20, [x4]",
        "eor x27, x20, x7",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "add w23, w20, w22",
        "uxth w26, w23",
        "cmp w26, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "bic w20, w20, w21",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock adc dword [rax], ecx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "ldaddal w20, w20, [x4]",
        "eor x27, x20, x7",
        "cfinv",
        "adcs w26, w20, w7",
        "cfinv"
      ]
    },
    "lock sbb byte [rax], cl": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x18",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddalb w1, w20, [x4]",
        "eor x27, x20, x7",
        "uxtb w20, w20",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock sbb word [rax], cx": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddalh w1, w20, [x4]",
        "eor x27, x20, x7",
        "uxth w20, w20",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock sbb dword [rax], ecx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "cinc w20, w7, lo",
        "neg w1, w20",
        "ldaddal w1, w20, [x4]",
        "eor x27, x20, x7",
        "sbcs w26, w20, w7"
      ]
    },
    "lock and byte [rax], cl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x20",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock and word [rax], cx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock and dword [rax], ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "mvn w1, w7",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, w7",
        "cfinv"
      ]
    },
    "lock sub byte [rax], cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddalb w1, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w20, w7"
      ]
    },
    "lock sub word [rax], cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddalh w1, w20, [x4]",
        "eor x27, x20, x7",
        "lsl w0, w20, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w20, w7"
      ]
    },
    "lock sub dword [rax], ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "neg w1, w7",
        "ldaddal w1, w20, [x4]",
        "eor x27, x20, x7",
        "subs w26, w20, w7"
      ]
    },
    "lock xor byte [rax], cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x30",
      "ExpectedArm64ASM": [
        "ldeoralb w7, w20, [x4]",
        "eor w26, w20, w7",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock xor word [rax], cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "ldeoralh w7, w20, [x4]",
        "eor w26, w20, w7",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock xor dword [rax], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "ldeoral w7, w20, [x4]",
        "eor w20, w20, w7",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock add qword [rax], rcx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "ldaddal x7, x20, [x4]",
        "eor x27, x20, x7",
        "adds x26, x20, x7",
        "cfinv"
      ]
    },
    "xadd byte [rax], bl": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "ldaddalb w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #8",
        "cfinv"
      ]
    },
    "xadd word [rax], bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "ldaddalh w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #16",
        "cfinv"
      ]
    },
    "xadd dword [rax], ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldaddal w20, w6, [x4]",
        "eor x27, x6, x20",
        "adds w26, w6, w20",
        "cfinv"
      ]
    },
    "xadd qword [rax], rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "ldaddal x6, x20, [x4]",
        "eor x27, x20, x6",
        "adds x26, x20, x6",
        "cfinv",
        "mov x6, x20"
      ]
    },
    "lock add byte [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalb w20, w27, [x4]",
        "lsl w0, w27, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w27, #0x1 (1)",
        "cfinv"
      ]
    },
    "lock add byte [rax], 0xFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldaddalb w20, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, #0xff (255)",
        "cfinv"
      ]
    },
    "lock add word [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddalh w20, w27, [x4]",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x100 (256)",
        "cfinv"
      ]
    },
    "lock add word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldaddalh w20, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "cfinv"
      ]
    },
    "lock add dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddal w20, w27, [x4]",
        "adds w26, w27, #0x100 (256)",
        "cfinv"
      ]
    },
    "lock add dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldaddal w20, w20, [x4]",
        "mvn w27, w20",
        "subs w26, w20, #0x1 (1)",
        "cfinv"
      ]
    },
    "lock add qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, #0x100 (256)",
        "cfinv"
      ]
    },
    "lock add qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, x20",
        "cfinv"
      ]
    },
    "lock add word [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalh w20, w27, [x4]",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x1 (1)",
        "cfinv"
      ]
    },
    "lock add dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal w20, w27, [x4]",
        "adds w26, w27, #0x1 (1)",
        "cfinv"
      ]
    },
    "lock add qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal x20, x27, [x4]",
        "adds x26, x27, #0x1 (1)",
        "cfinv"
      ]
    },
    "lock or byte [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetalb w20, w20, [x4]",
        "orr w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock or byte [rax], 0xFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldsetalb w20, w20, [x4]",
        "orr w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock or word [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock or word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock or dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetal w20, w20, [x4]",
        "orr w20, w20, #0x100",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldsetal w20, w21, [x4]",
        "orr w20, w21, w20",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0x100",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0xffffffff80000001",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock or word [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetalh w20, w20, [x4]",
        "orr w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock or dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetal w20, w20, [x4]",
        "orr w20, w20, #0x1",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock or qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldsetal x20, x20, [x4]",
        "orr x20, x20, #0x1",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock adc byte [rax], 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddalb w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w27",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock adc byte [rax], 0xFF": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "cinc w21, w20, lo",
        "ldaddalb w21, w21, [x4]",
        "mvn w27, w21",
        "cinc w20, w20, lo",
        "add w22, w21, w20",
        "uxtb w26, w22",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock adc word [rax], 0x100": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "ldaddalh w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w27",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock adc word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "cinc w21, w20, lo",
        "ldaddalh w21, w21, [x4]",
        "mvn w27, w21",
        "cinc w20, w20, lo",
        "add w22, w21, w20",
        "uxth w26, w22",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock adc dword [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "ldaddal w21, w27, [x4]",
        "cfinv",
        "adcs w26, w27, w20",
        "cfinv"
      ]
    },
    "lock adc dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "cinc w21, w20, lo",
        "ldaddal w21, w21, [x4]",
        "mvn w27, w21",
        "cfinv",
        "adcs w26, w21, w20",
        "cfinv"
      ]
    },
    "lock adc qword [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "cfinv",
        "adcs x26, x27, x20",
        "cfinv"
      ]
    },
    "lock adc qword [rax], -2147483647": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "cfinv",
        "adcs x26, x27, x20",
        "cfinv"
      ]
    },
    "lock adc word [rax], 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddalh w21, w27, [x4]",
        "cinc w20, w20, lo",
        "add w21, w27, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w27",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock adc dword [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "ldaddal w21, w27, [x4]",
        "cfinv",
        "adcs w26, w27, w20",
        "cfinv"
      ]
    },
    "lock adc qword [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc x21, x20, lo",
        "ldaddal x21, x27, [x4]",
        "cfinv",
        "adcs x26, x27, x20",
        "cfinv"
      ]
    },
    "lock sbb byte [rax], 1": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalb w1, w27, [x4]",
        "uxtb w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock sbb byte [rax], 0xFF": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalb w1, w21, [x4]",
        "mvn w27, w21",
        "uxtb w21, w21",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w21",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock sbb word [rax], 0x100": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w27, [x4]",
        "uxth w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock sbb word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w21, [x4]",
        "mvn w27, w21",
        "uxth w21, w21",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w21",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock sbb dword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w27, [x4]",
        "sbcs w26, w27, w20"
      ]
    },
    "lock sbb dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w21, [x4]",
        "mvn w27, w21",
        "sbcs w26, w21, w20"
      ]
    },
    "lock sbb qword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock sbb qword [rax], -2147483647": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock sbb word [rax], 1": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddalh w1, w27, [x4]",
        "uxth w21, w27",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock sbb dword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w21, w20, lo",
        "neg w1, w21",
        "ldaddal w1, w27, [x4]",
        "sbcs w26, w27, w20"
      ]
    },
    "lock sbb qword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc x21, x20, lo",
        "neg x1, x21",
        "ldaddal x1, x27, [x4]",
        "sbcs x26, x27, x20"
      ]
    },
    "lock and byte [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock and byte [rax], 0xFF": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w1, w20",
        "ldclralb w1, w20, [x4]",
        "and w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock and word [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock and word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock and dword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn w1, w20",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, #0x100",
        "cfinv"
      ]
    },
    "lock and dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w1, w20",
        "ldclral w1, w21, [x4]",
        "ands w26, w21, w20",
        "cfinv"
      ]
    },
    "lock and qword [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0x100",
        "cfinv"
      ]
    },
    "lock and qword [rax], -2147483647": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0xffffffff80000001",
        "cfinv"
      ]
    },
    "lock and word [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclralh w1, w20, [x4]",
        "and w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock and dword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn w1, w20",
        "ldclral w1, w20, [x4]",
        "ands w26, w20, #0x1",
        "cfinv"
      ]
    },
    "lock and qword [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mvn x1, x20",
        "ldclral x1, x20, [x4]",
        "ands x26, x20, #0x1",
        "cfinv"
      ]
    },
    "lock sub byte [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddalb w1, w27, [x4]",
        "lsl w0, w27, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w27, #0x1 (1)"
      ]
    },
    "lock sub byte [rax], 0xFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "neg w1, w20",
        "ldaddalb w1, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, #0xff (255)"
      ]
    },
    "lock sub word [rax], 0x100": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg w1, w20",
        "ldaddalh w1, w27, [x4]",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x100 (256)"
      ]
    },
    "lock sub word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "neg w1, w20",
        "ldaddalh w1, w21, [x4]",
        "mvn w27, w21",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20"
      ]
    },
    "lock sub dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg w1, w20",
        "ldaddal w1, w27, [x4]",
        "subs w26, w27, #0x100 (256)"
      ]
    },
    "lock sub dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "neg w1, w20",
        "ldaddal w1, w20, [x4]",
        "mvn w27, w20",
        "adds w26, w20, #0x1 (1)"
      ]
    },
    "lock sub qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, #0x100 (256)"
      ]
    },
    "lock sub qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, x20"
      ]
    },
    "lock sub word [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddalh w1, w27, [x4]",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x1 (1)"
      ]
    },
    "lock sub dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg w1, w20",
        "ldaddal w1, w27, [x4]",
        "subs w26, w27, #0x1 (1)"
      ]
    },
    "lock sub qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "neg x1, x20",
        "ldaddal x1, x27, [x4]",
        "subs x26, x27, #0x1 (1)"
      ]
    },
    "lock xor byte [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoralb w20, w20, [x4]",
        "eor w26, w20, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock xor byte [rax], 0xFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldeoralb w20, w20, [x4]",
        "eor w26, w20, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "lock xor word [rax], 0x100": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0x100",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock xor word [rax], 0xFFFF": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0xffff",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock xor dword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoral w20, w20, [x4]",
        "eor w20, w20, #0x100",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor dword [rax], 0xFFFFFFFF": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldeoral w20, w21, [x4]",
        "eor w20, w21, w20",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], 0x100": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0x100",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], -2147483647": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffff80000001",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0xffffffff80000001",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock xor word [rax], 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoralh w20, w20, [x4]",
        "eor w26, w20, #0x1",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "lock xor dword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoral w20, w20, [x4]",
        "eor w20, w20, #0x1",
        "subs w26, w20, #0x0 (0)"
      ]
    },
    "lock xor qword [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldeoral x20, x20, [x4]",
        "eor x20, x20, #0x1",
        "subs x26, x20, #0x0 (0)"
      ]
    },
    "lock dec byte [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP3 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldaddalb w20, w27, [x4]",
        "sub w26, w27, #0x1 (1)",
        "setf8 w26",
        "bic w20, w27, w26",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock not byte [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf6 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "ldeoralb w20, w20, [x4]"
      ]
    },
    "lock not word [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldeoralh w20, w20, [x4]"
      ]
    },
    "lock not dword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldeoral w20, w20, [x4]"
      ]
    },
    "lock not qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldeoral x20, x20, [x4]"
      ]
    },
    "lock neg byte [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf6 /3",
      "ExpectedArm64ASM": [
        "ldrb w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casalb w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "cmp wzr, w27, lsl #24",
        "neg w26, w27"
      ]
    },
    "lock neg word [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldrh w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casalh w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "cmp wzr, w27, lsl #16",
        "neg w26, w27"
      ]
    },
    "lock neg dword [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldr w1, [x4]",
        "mov w3, w1",
        "neg w2, w1",
        "casal w1, w2, [x4]",
        "sub w2, w1, w3",
        "cbnz w2, #-0x10",
        "mov w27, w1",
        "negs w26, w27"
      ]
    },
    "lock neg qword [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "ldr x1, [x4]",
        "mov x3, x1",
        "neg x2, x1",
        "casal x1, x2, [x4]",
        "sub x2, x1, x3",
        "cbnz x2, #-0x10",
        "mov x27, x1",
        "negs x26, x27"
      ]
    },
    "lock dec word [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "ldaddalh w20, w27, [x4]",
        "sub w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w27, w26",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock dec dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ldaddal w20, w27, [x4]",
        "cset x20, hs",
        "subs w26, w27, #0x1 (1)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock dec qword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ldaddal x20, x27, [x4]",
        "cset x20, hs",
        "subs x26, x27, #0x1 (1)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock inc byte [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalb w20, w27, [x4]",
        "add w26, w27, #0x1 (1)",
        "setf8 w26",
        "bic w20, w26, w27",
        "rmif x20, #7, #nzcV"
      ]
    },
    "lock inc word [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddalh w20, w27, [x4]",
        "add w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w26, w27",
        "rmif x20, #15, #nzcV"
      ]
    },
    "lock inc dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal w20, w27, [x4]",
        "cset x20, hs",
        "adds w26, w27, #0x1 (1)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock inc qword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "ldaddal x20, x27, [x4]",
        "cset x20, hs",
        "adds x26, x27, #0x1 (1)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock xadd byte [rcx], al": {
      "ExpectedInstructionCount": 8,
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "ldaddalb w20, w21, [x7]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, w20",
        "bfxil x4, x21, #0, #8",
        "cfinv"
      ]
    },
    "lock xadd word [rcx], ax": {
      "ExpectedInstructionCount": 8,
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "ldaddalh w20, w21, [x7]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "bfxil x4, x21, #0, #16",
        "cfinv"
      ]
    },
    "lock xadd dword [rcx], eax": {
      "ExpectedInstructionCount": 5,
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "ldaddal w20, w4, [x7]",
        "eor x27, x4, x20",
        "adds w26, w4, w20",
        "cfinv"
      ]
    },
    "lock xadd qword [rcx], rax": {
      "ExpectedInstructionCount": 5,
      "ExpectedArm64ASM": [
        "ldaddal x4, x20, [x7]",
        "eor x27, x20, x4",
        "adds x26, x20, x4",
        "cfinv",
        "mov x4, x20"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/FlagOpts.json
================================================
{
  "Features": {
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Chained add": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 5,
      "x86Insts": [
        "add rax, rbx",
        "adc rcx, rcx"
      ],
      "ExpectedArm64ASM": [
        "adds x4, x4, x6",
        "mov w27, #0x0",
        "adcs x26, x7, x7",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "Chained sub": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 4,
      "x86Insts": [
        "sub rax, rbx",
        "sbb rcx, rdx"
      ],
      "ExpectedArm64ASM": [
        "subs x4, x4, x6",
        "eor x27, x7, x5",
        "sbcs x26, x7, x5",
        "mov x7, x26"
      ]
    },
    "Inverted add": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 4,
      "x86Insts": [
        "add rax, rbx",
        "adc rcx, rdx",
        "cmc"
      ],
      "ExpectedArm64ASM": [
        "adds x4, x4, x6",
        "eor x27, x7, x5",
        "adcs x26, x7, x5",
        "mov x7, x26"
      ]
    },
    "Inverted sub": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 5,
      "x86Insts": [
        "sub rax, rbx",
        "sbb rcx, rcx",
        "cmc"
      ],
      "ExpectedArm64ASM": [
        "subs x4, x4, x6",
        "mov w27, #0x0",
        "sbcs x26, x7, x7",
        "mov x7, x26",
        "cfinv"
      ]
    },
    "ADC dead": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 3,
      "x86Insts": [
        "add rax, rbx",
        "adc rcx, rcx",
        "test rcx, rcx"
      ],
      "ExpectedArm64ASM": [
        "adds x4, x4, x6",
        "adc x7, x7, x7",
        "subs x26, x7, #0x0 (0)"
      ]
    },
    "INC consumed": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "add rax, rbx",
        "inc rax"
      ],
      "ExpectedArm64ASM": [
        "adds x4, x4, x6",
        "cset x20, lo",
        "adds x26, x4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "INC dead": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 4,
      "x86Insts": [
        "add rax, rbx",
        "inc rax",
        "test rax, rdx"
      ],
      "ExpectedArm64ASM": [
        "add x4, x4, x6",
        "add x4, x4, #0x1 (1)",
        "ands x26, x4, x5",
        "cfinv"
      ]
    },
    "DEC consumed": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "sub rax, rbx",
        "dec rax"
      ],
      "ExpectedArm64ASM": [
        "subs x4, x4, x6",
        "cset x20, hs",
        "subs x26, x4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "DEC dead": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 4,
      "x86Insts": [
        "sub rax, rbx",
        "dec rax",
        "test rax, rcx"
      ],
      "ExpectedArm64ASM": [
        "sub x4, x4, x6",
        "sub x4, x4, #0x1 (1)",
        "ands x26, x4, x7",
        "cfinv"
      ]
    },
    "8-bit DEC consumed": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 11,
      "x86Insts": [
        "sub al, ah",
        "dec al"
      ],
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w20, w4, w20",
        "bfxil x4, x20, #0, #8",
        "uxtb w27, w4",
        "sub w26, w27, #0x1 (1)",
        "setf8 w26",
        "bic w20, w27, w26",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "8-bit DEC dead": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 9,
      "x86Insts": [
        "sub al, ah",
        "dec al",
        "test al, al"
      ],
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "sub w20, w4, w20",
        "bfxil x4, x20, #0, #8",
        "uxtb w20, w4",
        "sub w20, w20, #0x1 (1)",
        "bfxil x4, x20, #0, #8",
        "cmn wzr, w4, lsl #24",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "Variable shift dead": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 3,
      "x86Insts": [
        "sar rax, cl",
        "test rax, rdx"
      ],
      "ExpectedArm64ASM": [
        "asr x4, x4, x7",
        "ands x26, x4, x5",
        "cfinv"
      ]
    },
    "Variable rotate-through-carry dead": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 11,
      "x86Insts": [
        "rcr rax, cl",
        "test rax, rdx"
      ],
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "cbz x20, #+0x20",
        "lsr x20, x4, x7",
        "cset x21, lo",
        "neg x22, x7",
        "lsl x23, x4, x22",
        "orr x20, x20, x23, lsl #1",
        "lsl x21, x21, x22",
        "orr x4, x20, x21",
        "ands x26, x4, x5",
        "cfinv"
      ]
    },
    "Partial NZCV select (cmp)": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "cmp rax, rbx",
        "setz cl",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "cmp x4, x6",
        "cset x20, eq",
        "bfxil x7, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "Partial NZCV select (add)": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "add rax, rbx",
        "setz cl",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "adds x4, x4, x6",
        "cset x20, eq",
        "bfxil x7, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "AND use only ZF": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "and eax, ebx",
        "setz cl",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "ands w4, w4, w6",
        "cset x20, eq",
        "bfxil x7, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "AND use only PF": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 10,
      "x86Insts": [
        "and eax, ebx",
        "setp cl",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "and w26, w4, w6",
        "mov x4, x26",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "and w20, w20, #0x1",
        "bfxil x7, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "UCOMISS use only PF": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 4,
      "x86Insts": [
        "ucomiss xmm0, xmm1",
        "setnp cl",
        "test rax, rax"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "bfxil x7, x26, #0, #8",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "Test use only zero - self 16-bit": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 6,
      "x86Insts": [
        "test ax, ax",
        "setz al",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "tst w4, #0xffff",
        "cset x20, eq",
        "bfxil x4, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "Test use only zero - non constant 16-bit": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 7,
      "x86Insts": [
        "test ax, bx",
        "setz al",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "and w0, w4, w6",
        "tst w0, #0xffff",
        "cset x20, eq",
        "bfxil x4, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "Test use only zero - constant 8-bit": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 8,
      "x86Insts": [
        "test al, 137",
        "setnz al",
        "test cl, cl"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x89",
        "and w0, w4, w20",
        "tst w0, #0xff",
        "cset x20, ne",
        "bfxil x4, x20, #0, #8",
        "cmn wzr, w7, lsl #24",
        "cfinv",
        "mov x26, x7"
      ]
    },
    "Dead cmpxchg flags": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 10,
      "x86Insts": [
        "cmpxchg8b [rbp]",
        "test rax, rax"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "mov x21, x5",
        "caspal w20, w21, w6, w7, [x9]",
        "mrs x0, nzcv",
        "cmp w20, w4",
        "ccmp w21, w5, #nzcv, eq",
        "rmif x0, #0, #NzCV",
        "csel x4, x20, x4, ne",
        "csel x5, x21, x5, ne",
        "subs x26, x4, #0x0 (0)"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/H0F38.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "ptest xmm0, xmm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0x66 0x0f 0x38 0x17"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "adcx eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cfinv",
        "adcs w4, w6, w4",
        "rmif x20, #28, #NZcV",
        "cfinv"
      ]
    },
    "adcx rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 REX.W 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cfinv",
        "adcs x4, x6, x4",
        "rmif x20, #28, #NZcV",
        "cfinv"
      ]
    },
    "adox eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ccmp wzr, #0, #nzcv, vs",
        "adcs w4, w6, w4",
        "ccmp wzr, #0, #nzcV, lo",
        "rmif x20, #28, #NZCv"
      ]
    },
    "adox rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 REX.W 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ccmp wzr, #0, #nzcv, vs",
        "adcs x4, x6, x4",
        "ccmp wzr, #0, #nzcV, lo",
        "rmif x20, #28, #NZCv"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/HotBlocks.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "The Witcher 3": {
      "x86InstructionCount": 7,
      "ExpectedInstructionCount": 9,
      "x86Insts": [
        "mov eax, 0x1",
        "lock xadd qword [rcx], rax",
        "mov rdx, rax",
        "and edx, 0x1f",
        "inc rdx",
        "shl rdx, 0x6",
        "add rdx, rcx"
      ],
      "ExpectedArm64ASM": [
        "mov w4, #0x1",
        "ldaddal x4, x4, [x7]",
        "and w5, w4, #0x1f",
        "add x5, x5, #0x1 (1)",
        "lsl x5, x5, #6",
        "eor x27, x5, x7",
        "adds x26, x5, x7",
        "cfinv",
        "mov x5, x26"
      ]
    },
    "FMOD scalar loop": {
      "x86InstructionCount": 38,
      "ExpectedInstructionCount": 64,
      "x86Insts": [
        "mov     esi, ecx",
        "mov     rdx, rbp",
        "mov     rax, rbx",
        "movss   xmm2, dword [rdx]",
        "add     rax, 0x20",
        "mulss   xmm2, xmm0",
        "add     rdx, 0x20",
        "addss   xmm2, dword [rax-0x20]",
        "movss   dword [rax-0x20], xmm2",
        "movss   xmm2, dword [rdx-0x1c]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x1c]",
        "movss   dword [rax-0x1c], xmm2",
        "movss   xmm2, dword [rdx-0x18]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x18]",
        "movss   dword [rax-0x18], xmm2",
        "movss   xmm2, dword [rdx-0x14]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x14]",
        "movss   dword [rax-0x14], xmm2",
        "movss   xmm2, dword [rdx-0x10]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x10]",
        "movss   dword [rax-0x10], xmm2",
        "movss   xmm2, dword [rdx-0xc]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0xc]",
        "movss   dword [rax-0xc], xmm2",
        "movss   xmm2, dword [rdx-0x8]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x8]",
        "movss   dword [rax-0x8], xmm2",
        "movss   xmm2, dword [rdx-0x4]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x4]",
        "movss   dword [rax-0x4], xmm2",
        "sub     esi, 0x1"
      ],
      "ExpectedArm64ASM": [
        "mov w10, w7",
        "mov x5, x9",
        "mov x4, x6",
        "ldr s18, [x5]",
        "add x4, x4, #0x20 (32)",
        "fmul s0, s18, s16",
        "mov v18.s[0], v0.s[0]",
        "add x5, x5, #0x20 (32)",
        "ldur s2, [x4, #-32]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-32]",
        "ldur s18, [x5, #-28]",
        "fmul s0, s18, s17",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-28]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-28]",
        "ldur s18, [x5, #-24]",
        "fmul s0, s18, s16",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-24]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-24]",
        "ldur s18, [x5, #-20]",
        "fmul s0, s18, s17",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-20]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-20]",
        "ldur s18, [x5, #-16]",
        "fmul s0, s18, s16",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-16]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-16]",
        "ldur s18, [x5, #-12]",
        "fmul s0, s18, s17",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-12]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-12]",
        "ldur s18, [x5, #-8]",
        "fmul s0, s18, s16",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-8]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-8]",
        "ldur s18, [x5, #-4]",
        "fmul s0, s18, s17",
        "mov v18.s[0], v0.s[0]",
        "ldur s2, [x4, #-4]",
        "fadd s0, s18, s2",
        "mov v18.s[0], v0.s[0]",
        "stur s18, [x4, #-4]",
        "subs w26, w10, #0x1 (1)",
        "mov x27, x10",
        "mov x10, x26"
      ]
    },
    "Scalar vector add loop": {
      "x86InstructionCount": 5,
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Saw this in bytemark"
      ],
      "x86Insts": [
        "movdqu  xmm0, [r12+rax]",
        "paddq   xmm0, xmm1",
        "movups  [r12+rax], xmm0",
        "add     rax, 0x10",
        "cmp     rsi, rax"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x16, x4, sxtx]",
        "add v16.2d, v16.2d, v17.2d",
        "str q16, [x16, x4, sxtx]",
        "add x4, x4, #0x10 (16)",
        "eor x27, x10, x4",
        "subs x26, x10, x4"
      ]
    },
    "bytemark data xor loop": {
      "x86InstructionCount": 9,
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Saw this in bytemark"
      ],
      "x86Insts": [
        "mov     rdx, rax",
        "mov     rcx, rax",
        "mov     r14, rsi",
        "add     rax, 0x1",
        "shr     rdx, 0x6",
        "and     ecx, 0x3f",
        "shl     r14, cl",
        "xor     qword [rbx+rdx*8], r14",
        "cmp     rdi, rax"
      ],
      "ExpectedArm64ASM": [
        "mov x5, x4",
        "mov x7, x4",
        "mov x19, x10",
        "add x4, x4, #0x1 (1)",
        "lsr x5, x5, #6",
        "and w7, w7, #0x3f",
        "lsl x19, x19, x7",
        "ldr x20, [x6, x5, sxtx #3]",
        "eor x20, x20, x19",
        "str x20, [x6, x5, sxtx #3]",
        "eor x27, x11, x4",
        "subs x26, x11, x4"
      ]
    },
    "bytemark num sort": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Saw this in bytemark"
      ],
      "x86Insts": [
        "mov    r13, qword [rsi+r9*8]",
        "mov    r11, r9",
        "or     r11, 0x1",
        "cmp    r13, qword [rsi+r11*8]"
      ],
      "ExpectedArm64ASM": [
        "ldr x17, [x10, x13, sxtx #3]",
        "orr x15, x13, #0x1",
        "ldr x20, [x10, x15, sxtx #3]",
        "eor x27, x17, x20",
        "subs x26, x17, x20"
      ]
    },
    "bytemark fpemu": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Saw this in bytemark"
      ],
      "x86Insts": [
        "movzx  r10d,word [rdx+0x4]",
        "movzx  edi,word [rdx+0x6]",
        "mov    dword [rsp+0xc],edi",
        "movzx  ebx,word [rdx+0x8]",
        "movzx  edi,word [rdx+0xa]",
        "mov    dword [rsp+0x10],edi",
        "lea    r8,[r11+r11*2]",
        "cmp    qword [r13+r8*4+0x4],0x0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w14, [x5, #4]",
        "ldrh w11, [x5, #6]",
        "str w11, [x8, #12]",
        "ldrh w6, [x5, #8]",
        "ldrh w11, [x5, #10]",
        "str w11, [x8, #16]",
        "add x12, x15, x15, lsl #1",
        "add x20, x17, x12, lsl #2",
        "ldur x27, [x20, #4]",
        "subs x26, x27, #0x0 (0)"
      ]
    },
    "bytemark DivideInternalFPF": {
      "x86InstructionCount": 13,
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Saw this in bytemark"
      ],
      "x86Insts": [
        "push   rbp",
        "push   r15",
        "push   r14",
        "push   r13",
        "push   r12",
        "push   rbx",
        "sub    rsp,0x18",
        "mov    qword [rsp],rdx",
        "movzx  r10d,byte [rdi]",
        "lea    ecx,[r10+r10*4]",
        "movzx  eax,byte [rsi]",
        "add    eax,ecx",
        "cmp    eax,0x18"
      ],
      "ExpectedArm64ASM": [
        "stp x29, x9, [x8, #-16]!",
        "stp x17, x19, [x8, #-16]!",
        "stp x6, x16, [x8, #-16]!",
        "sub x8, x8, #0x18 (24)",
        "str x5, [x8]",
        "ldrb w14, [x11]",
        "add x20, x14, x14, lsl #2",
        "mov w7, w20",
        "ldrb w4, [x10]",
        "add w4, w4, w7",
        "mvn w27, w4",
        "subs w26, w4, #0x18 (24)"
      ]
    },
    "bytemark huffman 1": {
      "x86InstructionCount": 18,
      "ExpectedInstructionCount": 21,
      "x86Insts": [
        "mov    r9,rdx",
        "mov    r8,rcx",
        "nop    dword [rax+0x0]",
        "mov    r10d,esi",
        "shr    r10d,0x3",
        "movzx  r10d,byte [rbp+r10*1+0x0]",
        "mov    r11d,esi",
        "and    r11d,0x7",
        "bt     r10d,r11d",
        "lea    r8,[r8+r8*4]",
        "lea    r8,[rbx+r8*4+0x10]",
        "cmovae r8,r9",
        "movsxd r8,dword [r8]",
        "add    rsi,0x1",
        "lea    r10,[r8+r8*4]",
        "lea    r9,[rbx+r10*4]",
        "add    r9,0xc",
        "cmp    dword [rbx+r10*4+0xc],0xffffffff"
      ],
      "ExpectedArm64ASM": [
        "mov x13, x5",
        "mov x12, x7",
        "lsr w14, w10, #3",
        "ldrb w14, [x9, x14, sxtx]",
        "and w15, w10, #0x7",
        "lsr w20, w14, w15",
        "rmif x20, #63, #nzCv",
        "add x12, x12, x12, lsl #2",
        "add x20, x6, #0x10 (16)",
        "add x12, x20, x12, lsl #2",
        "csel x12, x13, x12, lo",
        "ldr w20, [x12]",
        "sxtw x12, w20",
        "add x10, x10, #0x1 (1)",
        "add x14, x12, x12, lsl #2",
        "add x13, x6, x14, lsl #2",
        "add x13, x13, #0xc (12)",
        "add x20, x6, x14, lsl #2",
        "ldr w20, [x20, #12]",
        "mvn w27, w20",
        "adds w26, w20, #0x1 (1)"
      ]
    },
    "bytemark huffman 2": {
      "x86InstructionCount": 10,
      "ExpectedInstructionCount": 17,
      "x86Insts": [
        "movsxd r9,r8d",
        "lea    r9,[r9+r9*4]",
        "cmp    dword [rbx+r9*4+0xc],ecx",
        "sete   cl",
        "xor    cl,0x31",
        "mov    byte [rsp+rdi*1+0x50],cl",
        "add    rdi,0x1",
        "mov    ecx,r8d",
        "mov    r8d,dword [rbx+r9*4+0x8]",
        "cmp    r8d,0xfffffffe"
      ],
      "ExpectedArm64ASM": [
        "sxtw x13, w12",
        "add x13, x13, x13, lsl #2",
        "add x20, x6, x13, lsl #2",
        "ldr w20, [x20, #12]",
        "cmp w20, w7",
        "cset x20, eq",
        "bfxil x7, x20, #0, #8",
        "mov w20, #0x31",
        "eor x7, x7, x20",
        "add x20, x8, x11",
        "strb w7, [x20, #80]",
        "add x11, x11, #0x1 (1)",
        "mov w7, w12",
        "add x20, x6, x13, lsl #2",
        "ldr w12, [x20, #8]",
        "mvn w27, w12",
        "adds w26, w12, #0x2 (2)"
      ]
    },
    "bytemark huffman 3": {
      "x86InstructionCount": 19,
      "ExpectedInstructionCount": 34,
      "x86Insts": [
        "mov    ecx,eax",
        "and    cl,0x7",
        "mov    r8b,0x1",
        "shl    r8b,cl",
        "mov    r9d,eax",
        "shr    r9d,0x3",
        "movzx  r10d,byte [rbp+r9*1+0x0]",
        "mov    r11b,0xfe",
        "mov    ecx,eax",
        "rol    r11b,cl",
        "and    r11b,r10b",
        "or     r8b,r10b",
        "cmp    byte [rsp+rdi*1+0x4f],0x31",
        "movzx  ecx,r8b",
        "movzx  r8d,r11b",
        "cmove  r8d,ecx",
        "add    rax,0x1",
        "mov    byte [rbp+r9*1+0x0],r8b",
        "add    rdi,0xffffffffffffffff"
      ],
      "ExpectedArm64ASM": [
        "and w7, w4, #0xffffff07",
        "mov w20, #0x1",
        "bfxil x12, x20, #0, #8",
        "lsl w20, w12, w7",
        "bfxil x12, x20, #0, #8",
        "lsr w13, w4, #3",
        "ldrb w14, [x9, x13, sxtx]",
        "mov w20, #0xfe",
        "bfxil x15, x20, #0, #8",
        "mov w7, w4",
        "and x20, x7, #0x7",
        "mov x21, x15",
        "bfi w21, w15, #24, #8",
        "neg x20, x20",
        "ror w20, w21, w20",
        "bfxil x15, x20, #0, #8",
        "and w20, w15, w14",
        "bfxil x15, x20, #0, #8",
        "orr w20, w12, w14",
        "bfxil x12, x20, #0, #8",
        "mov w20, #0x31",
        "add x21, x8, x11",
        "ldrb w21, [x21, #79]",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "uxtb w7, w12",
        "uxtb w12, w15",
        "csel w12, w7, w12, eq",
        "add x4, x4, #0x1 (1)",
        "strb w12, [x9, x13, sxtx]",
        "mvn w27, w11",
        "subs x26, x11, #0x1 (1)",
        "cfinv",
        "mov x11, x26"
      ]
    },
    "bytemark nn": {
      "x86InstructionCount": 12,
      "ExpectedInstructionCount": 24,
      "x86Insts": [
        "mulpd  xmm3,xmm2",
        "movupd xmm4,oword [rax+rdx*8+0x216190]",
        "mulpd  xmm4,xmm6",
        "addpd  xmm4,xmm3",
        "movupd xmm3,oword [rax+rdx*8+0x2155d0]",
        "addpd  xmm3,xmm4",
        "movupd oword [rax+rdx*8+0x2155d0],xmm3",
        "movupd xmm3,oword [rax+rdx*8+0x217c10]",
        "addpd  xmm3,xmm4",
        "movupd oword [rax+rdx*8+0x217c10],xmm3",
        "add    rdx,0x2",
        "cmp    rdx,0x22"
      ],
      "ExpectedArm64ASM": [
        "fmul v19.2d, v19.2d, v18.2d",
        "add x20, x4, x5, lsl #3",
        "mov w21, #0x6190",
        "movk w21, #0x21, lsl #16",
        "ldr q20, [x20, x21, sxtx]",
        "fmul v20.2d, v20.2d, v22.2d",
        "fadd v20.2d, v20.2d, v19.2d",
        "add x20, x4, x5, lsl #3",
        "mov w21, #0x55d0",
        "movk w21, #0x21, lsl #16",
        "ldr q19, [x20, x21, sxtx]",
        "fadd v19.2d, v19.2d, v20.2d",
        "add x20, x4, x5, lsl #3",
        "str q19, [x20, x21, sxtx]",
        "add x20, x4, x5, lsl #3",
        "mov w21, #0x7c10",
        "movk w21, #0x21, lsl #16",
        "ldr q19, [x20, x21, sxtx]",
        "fadd v19.2d, v19.2d, v20.2d",
        "add x20, x4, x5, lsl #3",
        "str q19, [x20, x21, sxtx]",
        "add x5, x5, #0x2 (2)",
        "subs x26, x5, #0x22 (34)",
        "mov x27, x5"
      ]
    },
    "000000000020e9bd <DoNNetIteration+0x21d>:": {
      "x86InstructionCount": 35,
      "ExpectedInstructionCount": 53,
      "x86Insts": [
        "mov    rsi,0xfffffffffffffee8",
        "xchg   ax,ax",
        "mov    ecx,eax",
        "imul   eax,edx,0x8149a",
        "imul   edx,ecx,0x3e322",
        "add    edx,eax",
        "movsxd rax,edx",
        "imul   rdx,rax,0xffffffff8646c299",
        "shr    rdx,0x20",
        "add    edx,eax",
        "mov    edi,edx",
        "shr    edi,0x1f",
        "sar    edx,0x13",
        "add    edx,edi",
        "imul   edx,edx,0xf408b",
        "sub    eax,edx",
        "movsxd rdx,eax",
        "imul   rdi,rdx,0x14f8b589",
        "mov    r8,rdi",
        "shr    r8,0x3f",
        "sar    rdi,0x2d",
        "add    edi,r8d",
        "imul   edi,edi,0x186a0",
        "sub    edx,edi",
        "mov    edi,edx",
        "neg    edi",
        "cmovs  edi,edx",
        "xorps  xmm0,xmm0",
        "cvtsi2sd xmm0,rdi",
        "divsd  xmm0,xmm1",
        "addsd  xmm0,xmm2",
        "mulsd  xmm0,xmm3",
        "movsd  qword [rsi+0x216428],xmm0",
        "mov    edx,ecx",
        "add    rsi,0x8"
      ],
      "ExpectedArm64ASM": [
        "mov x10, #0xfffffffffffffee8",
        "mov w7, w4",
        "mov w20, #0x149a",
        "movk w20, #0x8, lsl #16",
        "mul w4, w5, w20",
        "mov w20, #0xe322",
        "movk w20, #0x3, lsl #16",
        "mul w5, w7, w20",
        "add w5, w5, w4",
        "sxtw x4, w5",
        "mov x20, #0xffffffffffffc299",
        "movk x20, #0x8646, lsl #16",
        "mul x5, x4, x20",
        "lsr x5, x5, #32",
        "add w5, w5, w4",
        "lsr w11, w5, #31",
        "asr w5, w5, #19",
        "add w5, w5, w11",
        "mov w20, #0x408b",
        "movk w20, #0xf, lsl #16",
        "mul w5, w5, w20",
        "sub w4, w4, w5",
        "sxtw x5, w4",
        "mov w20, #0xb589",
        "movk w20, #0x14f8, lsl #16",
        "mul x11, x5, x20",
        "lsr x12, x11, #63",
        "asr x11, x11, #45",
        "add w11, w11, w12",
        "mov w20, #0x86a0",
        "movk w20, #0x1, lsl #16",
        "mul w11, w11, w20",
        "sub w5, w5, w11",
        "mov w11, w5",
        "negs w11, w11",
        "csel w11, w5, w11, mi",
        "movi v16.2d, #0x0",
        "scvtf d0, x11",
        "mov v16.d[0], v0.d[0]",
        "fdiv d0, d16, d17",
        "mov v16.d[0], v0.d[0]",
        "fadd d0, d16, d18",
        "mov v16.d[0], v0.d[0]",
        "fmul d0, d16, d19",
        "mov v16.d[0], v0.d[0]",
        "mov w20, #0x6428",
        "movk w20, #0x21, lsl #16",
        "str d16, [x10, x20, sxtx]",
        "mov w5, w7",
        "adds x26, x10, #0x8 (8)",
        "cfinv",
        "mov x27, x10",
        "mov x10, x26"
      ]
    },
    "glibc AVX memcpy block 1": {
      "x86InstructionCount": 20,
      "ExpectedInstructionCount": 26,
      "x86Insts": [
        "vmovdqu ymm5,yword [rsi+0x20]",
        "vmovdqu ymm6,yword [rsi+0x40]",
        "lea    rcx,[rdi+rdx*1-0x81]",
        "vmovdqu ymm7,yword [rsi+0x60]",
        "vmovdqu ymm8,yword [rsi+rdx*1-0x20]",
        "sub    rsi,rdi",
        "and    rcx,0xffffffffffffffe0",
        "add    rsi,rcx",
        "nop    dword [rax+0x0]",
        "vmovdqu ymm1,yword [rsi+0x60]",
        "vmovdqu ymm2,yword [rsi+0x40]",
        "vmovdqu ymm3,yword [rsi+0x20]",
        "vmovdqu ymm4,yword [rsi]",
        "add    rsi,0xffffffffffffff80",
        "vmovdqa yword [rcx+0x60],ymm1",
        "vmovdqa yword [rcx+0x40],ymm2",
        "vmovdqa yword [rcx+0x20],ymm3",
        "vmovdqa yword [rcx],ymm4",
        "add    rcx,0xffffffffffffff80",
        "cmp    rdi,rcx"
      ],
      "ExpectedArm64ASM": [
        "ldp q21, q2, [x10, #32]",
        "ldp q22, q3, [x10, #64]",
        "sub x20, x11, #0x81 (129)",
        "add x7, x20, x5",
        "ldp q23, q4, [x10, #96]",
        "add x20, x10, x5",
        "ldp q24, q5, [x20, #-32]",
        "sub x10, x10, x11",
        "and x7, x7, #0xffffffffffffffe0",
        "add x10, x10, x7",
        "ldp q17, q6, [x10, #96]",
        "ldp q18, q7, [x10, #64]",
        "ldp q19, q8, [x10, #32]",
        "ldp q20, q9, [x10]",
        "sub x10, x10, #0x80 (128)",
        "stp q17, q6, [x7, #96]",
        "stp q18, q7, [x7, #64]",
        "stp q19, q8, [x7, #32]",
        "stp q20, q9, [x7]",
        "sub x7, x7, #0x80 (128)",
        "eor x27, x11, x7",
        "subs x26, x11, x7",
        "stp q4, q5, [x28, #304]",
        "stp q2, q3, [x28, #272]",
        "stp q8, q9, [x28, #240]",
        "stp q6, q7, [x28, #208]"
      ]
    },
    "glibc AVX memcpy block 2": {
      "x86InstructionCount": 22,
      "ExpectedInstructionCount": 31,
      "x86Insts": [
        "vmovdqu ymm5,yword [rsi+rdx*1-0x20]",
        "vmovdqu ymm6,yword [rsi+rdx*1-0x40]",
        "mov    rcx,rdi",
        "or     rdi,0x1f",
        "vmovdqu ymm7,yword [rsi+rdx*1-0x60]",
        "vmovdqu ymm8,yword [rsi+rdx*1-0x80]",
        "sub    rsi,rcx",
        "inc    rdi",
        "add    rsi,rdi",
        "lea    rdx,[rcx+rdx*1-0x80]",
        "nop    dword [rax+rax*1+0x0]",
        "vmovdqu ymm1,yword [rsi]",
        "vmovdqu ymm2,yword [rsi+0x20]",
        "vmovdqu ymm3,yword [rsi+0x40]",
        "vmovdqu ymm4,yword [rsi+0x60]",
        "sub    rsi,0xffffffffffffff80",
        "vmovdqa yword [rdi],ymm1",
        "vmovdqa yword [rdi+0x20],ymm2",
        "vmovdqa yword [rdi+0x40],ymm3",
        "vmovdqa yword [rdi+0x60],ymm4",
        "sub    rdi,0xffffffffffffff80",
        "cmp    rdx,rdi"
      ],
      "ExpectedArm64ASM": [
        "add x20, x10, x5",
        "ldp q21, q2, [x20, #-32]",
        "add x20, x10, x5",
        "ldp q22, q3, [x20, #-64]",
        "mov x7, x11",
        "orr x11, x11, #0x1f",
        "add x20, x10, x5",
        "ldp q23, q4, [x20, #-96]",
        "add x20, x10, x5",
        "ldp q24, q5, [x20, #-128]",
        "sub x10, x10, x7",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, x11",
        "sub x20, x7, #0x80 (128)",
        "add x5, x20, x5",
        "ldp q17, q6, [x10]",
        "ldp q18, q7, [x10, #32]",
        "ldp q19, q8, [x10, #64]",
        "ldp q20, q9, [x10, #96]",
        "add x10, x10, #0x80 (128)",
        "stp q17, q6, [x11]",
        "stp q18, q7, [x11, #32]",
        "stp q19, q8, [x11, #64]",
        "stp q20, q9, [x11, #96]",
        "add x11, x11, #0x80 (128)",
        "eor x27, x5, x11",
        "subs x26, x5, x11",
        "stp q4, q5, [x28, #304]",
        "stp q2, q3, [x28, #272]",
        "stp q8, q9, [x28, #240]",
        "stp q6, q7, [x28, #208]"
      ]
    },
    "bytemark strsift": {
      "x86InstructionCount": 15,
      "ExpectedInstructionCount": 19,
      "x86Insts": [
        "mov    rsi,rdx",
        "and    rsi,0xfffffffffffffffc",
        "movq   xmm0,rcx",
        "pshufd xmm0,xmm0,0x44",
        "mov    rdi,qword [rsp+0x20]",
        "lea    rdi,[rdi+r13*8]",
        "xor    r8d,r8d",
        "movdqu xmm1,oword [rdi+r8*8-0x10]",
        "movdqu xmm2,oword [rdi+r8*8]",
        "paddq  xmm1,xmm0",
        "paddq  xmm2,xmm0",
        "movdqu oword [rdi+r8*8-0x10],xmm1",
        "movdqu oword [rdi+r8*8],xmm2",
        "add    r8,0x4",
        "cmp    rsi,r8"
      ],
      "ExpectedArm64ASM": [
        "and x10, x5, #0xfffffffffffffffc",
        "fmov d16, x7",
        "dup v16.2d, v16.d[0]",
        "ldr x11, [x8, #32]",
        "add x11, x11, x17, lsl #3",
        "mov w12, #0x0",
        "add x20, x11, x12, lsl #3",
        "ldur q17, [x20, #-16]",
        "add x20, x11, x12, lsl #3",
        "ldr q18, [x20]",
        "add v17.2d, v17.2d, v16.2d",
        "add v18.2d, v18.2d, v16.2d",
        "add x20, x11, x12, lsl #3",
        "stur q17, [x20, #-16]",
        "add x20, x11, x12, lsl #3",
        "str q18, [x20]",
        "add x12, x12, #0x4 (4)",
        "eor x27, x10, x12",
        "subs x26, x10, x12"
      ]
    },
    "bytemark idea 1": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 11,
      "x86Insts": [
        "movzx  eax,ax",
        "imul   r8d,eax",
        "mov    eax,r8d",
        "shr    eax,0x10",
        "movzx  r9d,r8w",
        "sub    r8d,eax",
        "cmp    r9d,eax",
        "adc    r8d,0x0"
      ],
      "ExpectedArm64ASM": [
        "uxth w4, w4",
        "mul w12, w12, w4",
        "lsr w4, w12, #16",
        "uxth w13, w12",
        "sub w12, w12, w4",
        "cmp w13, w4",
        "cset w0, lo",
        "adds w26, w12, w0",
        "cfinv",
        "mov x27, x12",
        "mov x12, x26"
      ]
    },
    "bytemark idea 2": {
      "x86InstructionCount": 12,
      "ExpectedInstructionCount": 15,
      "x86Insts": [
        "movzx  eax,ax",
        "imul   r10d,eax",
        "mov    eax,r10d",
        "shr    eax,0x10",
        "movzx  esi,r10w",
        "sub    r10d,eax",
        "cmp    esi,eax",
        "adc    r10d,0x0",
        "mov    eax,r10d",
        "mov    esi,r9d",
        "xor    si,di",
        "movzx  r10d,word [rsp+r8*1+0x158]"
      ],
      "ExpectedArm64ASM": [
        "uxth w4, w4",
        "mul w14, w14, w4",
        "lsr w4, w14, #16",
        "uxth w10, w14",
        "sub w14, w14, w4",
        "cmp w10, w4",
        "cinc w14, w14, lo",
        "mov w4, w14",
        "mov w10, w13",
        "eor w26, w10, w11",
        "cmn wzr, w26, lsl #16",
        "bfxil x10, x26, #0, #16",
        "add x20, x8, x12",
        "ldrh w14, [x20, #344]",
        "cfinv"
      ]
    },
    "bytemark idea 3": {
      "x86InstructionCount": 11,
      "ExpectedInstructionCount": 13,
      "x86Insts": [
        "movzx  eax,si",
        "imul   r8d,eax",
        "mov    eax,r8d",
        "shr    eax,0x10",
        "movzx  esi,r8w",
        "sub    r8d,eax",
        "cmp    esi,eax",
        "adc    r8d,0x0",
        "mov    esi,r8d",
        "movzx  r8d,word [rsp+rdi*1+0x66]",
        "test   dx,dx"
      ],
      "ExpectedArm64ASM": [
        "uxth w4, w10",
        "mul w12, w12, w4",
        "lsr w4, w12, #16",
        "uxth w10, w12",
        "sub w12, w12, w4",
        "cmp w10, w4",
        "cinc w12, w12, lo",
        "mov w10, w12",
        "add x20, x8, x11",
        "ldrh w12, [x20, #102]",
        "cmn wzr, w5, lsl #16",
        "cfinv",
        "mov x26, x5"
      ]
    },
    "Factorio drawSprite+0x890": {
      "x86InstructionCount": 3,
      "ExpectedInstructionCount": 5,
      "Comment": "first load should be rip relative",
      "x86Insts": [
        "movss  xmm9,dword [rbp]",
        "and    r9d,0x800000",
        "movss  dword [rbp-0x58],xmm9"
      ],
      "ExpectedArm64ASM": [
        "ldr s25, [x9]",
        "ands w26, w13, #0x800000",
        "mov x13, x26",
        "stur s25, [x9, #-88]",
        "cfinv"
      ]
    },
    "Factorio drawSprite+0xf2": {
      "x86InstructionCount": 9,
      "ExpectedInstructionCount": 11,
      "x86Insts": [
        "movss  xmm8,dword [rbp-0x58]",
        "mov    byte [rbp-0x49],r13b",
        "mov    byte [rbp-0x4a],r14b",
        "mov    rdx,qword [rdi+0x8]",
        "mov    qword [rbp-0x38],rbx",
        "mov    byte [rbp-0x4b],al",
        "mov    qword [rbp-0x40],r11",
        "movss  dword [rbp-0x48],xmm8",
        "cmp    rdx,qword [rdi]"
      ],
      "ExpectedArm64ASM": [
        "ldur s24, [x9, #-88]",
        "sturb w17, [x9, #-73]",
        "sturb w19, [x9, #-74]",
        "ldr x5, [x11, #8]",
        "stur x6, [x9, #-56]",
        "sturb w4, [x9, #-75]",
        "stur x15, [x9, #-64]",
        "stur s24, [x9, #-72]",
        "ldr x20, [x11]",
        "eor x27, x5, x20",
        "subs x26, x5, x20"
      ]
    },
    "Factorio drawSprite+0x520": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 8,
      "x86Insts": [
        "sub    ecx,0x9",
        "xor    r11d,r11d",
        "cmp    cl,0x1",
        "cmovbe r11,rsi"
      ],
      "ExpectedArm64ASM": [
        "sub w7, w7, #0x9 (9)",
        "mov w15, #0x0",
        "mov w20, #0x1",
        "lsl w0, w7, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w7, #0x1 (1)",
        "mov x27, x7",
        "csel x15, x10, x15, ls"
      ]
    },
    "pcmpistri xmm0, xmm1, 0_0_00_11_01b": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "A Hat In Time spends at least 5% CPU time in this instruction",
        "Comes from vcruntime140.dll wcsstr"
      ],
      "ExpectedArm64ASM": [
        "str x30, [sp, #-16]!",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v17.16b",
        "mov w0, #0xd",
        "ldr x1, [x28, #2336]",
        "ldr x3, [x28, #2344]",
        "blr x1",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "mov w27, #0x0",
        "uxth w21, w20",
        "mov w22, #0x8",
        "rbit w0, w21",
        "clz w23, w0",
        "cmp x21, #0x0 (0)",
        "csel x7, x22, x23, eq",
        "mov w26, #0x1",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/HotBlocks_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Sonic Mania movie player": {
      "x86InstructionCount": 10,
      "ExpectedInstructionCount": 15,
      "Comment": "Used to be hottest block in Sonic Mania",
      "x86Insts": [
        "movzx   edx, byte [esi+ecx]",
        "movzx   ecx, byte [esi+edi]",
        "or      edx, 0xffff0000",
        "shl     edx, 0x8",
        "inc     esi",
        "or      edx, ecx",
        "mov     ecx, dword [ebp+0xc]",
        "or      dword [eax], edx",
        "add     eax, 0x4",
        "cmp     esi, ebx"
      ],
      "ExpectedArm64ASM": [
        "add w20, w10, w7",
        "ldrb w5, [x20]",
        "add w20, w10, w11",
        "ldrb w7, [x20]",
        "orr w5, w5, #0xffff0000",
        "lsl w5, w5, #8",
        "add w10, w10, #0x1 (1)",
        "orr w5, w5, w7",
        "ldr w7, [x9, #12]",
        "ldr w20, [x4]",
        "orr w20, w20, w5",
        "str w20, [x4]",
        "add w4, w4, #0x4 (4)",
        "eor x27, x10, x6",
        "subs w26, w10, w6"
      ]
    },
    "wine mscrt.dll memmove": {
      "x86InstructionCount": 12,
      "ExpectedInstructionCount": 13,
      "Comment": "Hot in Sonic Mania",
      "x86Insts": [
        "movdqu  xmm0, [esi]",
        "movdqu  xmm1, [esi+0x10]",
        "movdqu  xmm2, [esi+0x20]",
        "movdqu  xmm3, [esi+0x30]",
        "movdqa  [edi], xmm0",
        "movdqa  [edi+0x10], xmm1",
        "movdqa  [edi+0x20], xmm2",
        "movdqa  [edi+0x30], xmm3",
        "add     esi, 0x40",
        "add     edi, 0x40",
        "sub     ecx, 0x40",
        "cmp     ecx, 0x40"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x10]",
        "ldr q17, [x10, #16]",
        "ldr q18, [x10, #32]",
        "ldr q19, [x10, #48]",
        "str q16, [x11]",
        "str q17, [x11, #16]",
        "str q18, [x11, #32]",
        "str q19, [x11, #48]",
        "add w10, w10, #0x40 (64)",
        "add w11, w11, #0x40 (64)",
        "sub w7, w7, #0x40 (64)",
        "subs w26, w7, #0x40 (64)",
        "mov x27, x7"
      ]
    },
    "dxvk hotblock from MGRR": {
      "x86InstructionCount": 14,
      "ExpectedInstructionCount": 27,
      "Comment": [
        "Hottest block in Metal Gear Rising: Revengeance render thread"
      ],
      "x86Insts": [
        "mov     edx, dword [eax+0xc]",
        "mov     eax, dword [eax+0x8]",
        "mov     dword [ebp-0x34], esi",
        "mov     ecx, eax",
        "mov     ebx, edx",
        "mov     esi, dword [ebp-0x24]",
        "add     ecx, 0xffffffff",
        "adc     ebx, 0xffffffff",
        "mov     dword [ebp-0x28], ecx",
        "mov     dword [ebp-0x2c], ebx",
        "mov     ebx, ecx",
        "mov     dword [ebp-0x30], ecx",
        "mov     ecx, dword [ebp-0x2c]",
        "lock cmpxchg8b qword [esi+0x8]"
      ],
      "ExpectedArm64ASM": [
        "ldr w5, [x4, #12]",
        "ldr w4, [x4, #8]",
        "stur w10, [x9, #-52]",
        "mov x7, x4",
        "mov x6, x5",
        "ldur w10, [x9, #-36]",
        "mov w20, #0xffffffff",
        "subs w7, w7, #0x1 (1)",
        "mvn w27, w6",
        "adcs w26, w6, w20",
        "mov x6, x26",
        "stur w7, [x9, #-40]",
        "stur w6, [x9, #-44]",
        "mov x6, x7",
        "stur w7, [x9, #-48]",
        "ldur w7, [x9, #-44]",
        "add w20, w10, #0x8 (8)",
        "mov x22, x4",
        "mov x23, x5",
        "caspal w22, w23, w6, w7, [x20]",
        "mrs x0, nzcv",
        "cmp w22, w4",
        "ccmp w23, w5, #nzcv, eq",
        "rmif x0, #0, #NzCV",
        "csel x4, x22, x4, ne",
        "csel x5, x23, x5, ne",
        "cfinv"
      ]
    },
    "Psychonauts matrix swizzle": {
      "x86InstructionCount": 103,
      "ExpectedInstructionCount": 113,
      "Comment": [
        "Hottest block in Windows Psychonauts",
        "Doing a 4x4 32-bit float matrix swizzle",
        "Only data movement, no manipulation of the floats"
      ],
      "x86Insts": [
        "push    ebp",
        "mov     ebp, esp",
        "sub     esp, 0x44",
        "mov     [ebp-0x44], ecx",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax]",
        "fstp    dword [ebp-0x40]",
        "mov     ecx, dword [ebp-0x44]",
        "fld     dword [ecx+0x10]",
        "fstp    dword [ebp-0x3c]",
        "mov     edx, dword [ebp-0x44]",
        "fld     dword [edx+0x20]",
        "fstp    dword [ebp-0x38]",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax+0x30]",
        "fstp    dword [ebp-0x34]",
        "mov     ecx, dword [ebp-0x44]",
        "fld     dword [ecx+0x4]",
        "fstp    dword [ebp-0x30]",
        "mov     edx, dword [ebp-0x44]",
        "fld     dword [edx+0x14]",
        "fstp    dword [ebp-0x2c]",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax+0x24]",
        "fstp    dword [ebp-0x28]",
        "mov     ecx, dword [ebp-0x44]",
        "fld     dword [ecx+0x34]",
        "fstp    dword [ebp-0x24]",
        "mov     edx, dword [ebp-0x44]",
        "fld     dword [edx+0x8]",
        "fstp    dword [ebp-0x20]",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax+0x18]",
        "fstp    dword [ebp-0x1c]",
        "mov     ecx, dword [ebp-0x44]",
        "fld     dword [ecx+0x28]",
        "fstp    dword [ebp-0x18]",
        "mov     edx, dword [ebp-0x44]",
        "fld     dword [edx+0x38]",
        "fstp    dword [ebp-0x14]",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax+0xc]",
        "fstp    dword [ebp-0x10]",
        "mov     ecx, dword [ebp-0x44]",
        "fld     dword [ecx+0x1c]",
        "fstp    dword [ebp-0xc]",
        "mov     edx, dword [ebp-0x44]",
        "fld     dword [edx+0x2c]",
        "fstp    dword [ebp-0x8]",
        "mov     eax, dword [ebp-0x44]",
        "fld     dword [eax+0x3c]",
        "fstp    dword [ebp-0x4]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x40]",
        "fstp    dword [ecx]",
        "mov     edx, dword [ebp+0x8]",
        "fld     dword [ebp-0x3c]",
        "fstp    dword [edx+0x4]",
        "mov     eax, dword [ebp+0x8]",
        "fld     dword [ebp-0x38]",
        "fstp    dword [eax+0x8]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x34]",
        "fstp    dword [ecx+0xc]",
        "mov     edx, dword [ebp+0x8]",
        "fld     dword [ebp-0x30]",
        "fstp    dword [edx+0x10]",
        "mov     eax, dword [ebp+0x8]",
        "fld     dword [ebp-0x2c]",
        "fstp    dword [eax+0x14]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x28]",
        "fstp    dword [ecx+0x18]",
        "mov     edx, dword [ebp+0x8]",
        "fld     dword [ebp-0x24]",
        "fstp    dword [edx+0x1c]",
        "mov     eax, dword [ebp+0x8]",
        "fld     dword [ebp-0x20]",
        "fstp    dword [eax+0x20]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x1c]",
        "fstp    dword [ecx+0x24]",
        "mov     edx, dword [ebp+0x8]",
        "fld     dword [ebp-0x18]",
        "fstp    dword [edx+0x28]",
        "mov     eax, dword [ebp+0x8]",
        "fld     dword [ebp-0x14]",
        "fstp    dword [eax+0x2c]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x10]",
        "fstp    dword [ecx+0x30]",
        "mov     edx, dword [ebp+0x8]",
        "fld     dword [ebp-0xc]",
        "fstp    dword [edx+0x34]",
        "mov     eax, dword [ebp+0x8]",
        "fld     dword [ebp-0x8]",
        "fstp    dword [eax+0x38]",
        "mov     ecx, dword [ebp+0x8]",
        "fld     dword [ebp-0x4]",
        "fstp    dword [ecx+0x3c]",
        "mov     eax, dword [ebp+0x8]",
        "mov     esp, ebp",
        "pop     ebp"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "subs w26, w8, #0x44 (68)",
        "mov x27, x8",
        "mov x8, x26",
        "stur w7, [x9, #-68]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4]",
        "stur s2, [x9, #-64]",
        "ldur w7, [x9, #-68]",
        "ldr s2, [x7, #16]",
        "stur s2, [x9, #-60]",
        "ldur w5, [x9, #-68]",
        "ldr s2, [x5, #32]",
        "stur s2, [x9, #-56]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4, #48]",
        "stur s2, [x9, #-52]",
        "ldur w7, [x9, #-68]",
        "ldr s2, [x7, #4]",
        "stur s2, [x9, #-48]",
        "ldur w5, [x9, #-68]",
        "ldr s2, [x5, #20]",
        "stur s2, [x9, #-44]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4, #36]",
        "stur s2, [x9, #-40]",
        "ldur w7, [x9, #-68]",
        "ldr s2, [x7, #52]",
        "stur s2, [x9, #-36]",
        "ldur w5, [x9, #-68]",
        "ldr s2, [x5, #8]",
        "stur s2, [x9, #-32]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4, #24]",
        "stur s2, [x9, #-28]",
        "ldur w7, [x9, #-68]",
        "ldr s2, [x7, #40]",
        "stur s2, [x9, #-24]",
        "ldur w5, [x9, #-68]",
        "ldr s2, [x5, #56]",
        "stur s2, [x9, #-20]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4, #12]",
        "stur s2, [x9, #-16]",
        "ldur w7, [x9, #-68]",
        "ldr s2, [x7, #28]",
        "stur s2, [x9, #-12]",
        "ldur w5, [x9, #-68]",
        "ldr s2, [x5, #44]",
        "stur s2, [x9, #-8]",
        "ldur w4, [x9, #-68]",
        "ldr s2, [x4, #60]",
        "stur s2, [x9, #-4]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-64]",
        "str s2, [x7]",
        "ldr w5, [x9, #8]",
        "ldur s2, [x9, #-60]",
        "str s2, [x5, #4]",
        "ldr w4, [x9, #8]",
        "ldur s2, [x9, #-56]",
        "str s2, [x4, #8]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-52]",
        "str s2, [x7, #12]",
        "ldr w5, [x9, #8]",
        "ldur s2, [x9, #-48]",
        "str s2, [x5, #16]",
        "ldr w4, [x9, #8]",
        "ldur s2, [x9, #-44]",
        "str s2, [x4, #20]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-40]",
        "str s2, [x7, #24]",
        "ldr w5, [x9, #8]",
        "ldur s2, [x9, #-36]",
        "str s2, [x5, #28]",
        "ldr w4, [x9, #8]",
        "ldur s2, [x9, #-32]",
        "str s2, [x4, #32]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-28]",
        "str s2, [x7, #36]",
        "ldr w5, [x9, #8]",
        "ldur s2, [x9, #-24]",
        "str s2, [x5, #40]",
        "ldr w4, [x9, #8]",
        "ldur s2, [x9, #-20]",
        "str s2, [x4, #44]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-16]",
        "str s2, [x7, #48]",
        "ldr w5, [x9, #8]",
        "ldur s2, [x9, #-12]",
        "str s2, [x5, #52]",
        "ldr w4, [x9, #8]",
        "ldur s2, [x9, #-8]",
        "str s2, [x4, #56]",
        "ldr w7, [x9, #8]",
        "ldur s2, [x9, #-4]",
        "str s2, [x7, #60]",
        "ldr w4, [x9, #8]",
        "mov x8, x9",
        "ldr w9, [x8], #4",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/HotBlocks_AFP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2",
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "FMOD scalar loop": {
      "x86InstructionCount": 38,
      "ExpectedInstructionCount": 48,
      "x86Insts": [
        "mov     esi, ecx",
        "mov     rdx, rbp",
        "mov     rax, rbx",
        "movss   xmm2, dword [rdx]",
        "add     rax, 0x20",
        "mulss   xmm2, xmm0",
        "add     rdx, 0x20",
        "addss   xmm2, dword [rax-0x20]",
        "movss   dword [rax-0x20], xmm2",
        "movss   xmm2, dword [rdx-0x1c]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x1c]",
        "movss   dword [rax-0x1c], xmm2",
        "movss   xmm2, dword [rdx-0x18]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x18]",
        "movss   dword [rax-0x18], xmm2",
        "movss   xmm2, dword [rdx-0x14]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x14]",
        "movss   dword [rax-0x14], xmm2",
        "movss   xmm2, dword [rdx-0x10]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x10]",
        "movss   dword [rax-0x10], xmm2",
        "movss   xmm2, dword [rdx-0xc]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0xc]",
        "movss   dword [rax-0xc], xmm2",
        "movss   xmm2, dword [rdx-0x8]",
        "mulss   xmm2, xmm0",
        "addss   xmm2, dword [rax-0x8]",
        "movss   dword [rax-0x8], xmm2",
        "movss   xmm2, dword [rdx-0x4]",
        "mulss   xmm2, xmm1",
        "addss   xmm2, dword [rax-0x4]",
        "movss   dword [rax-0x4], xmm2",
        "sub     esi, 0x1"
      ],
      "ExpectedArm64ASM": [
        "mov w10, w7",
        "mov x5, x9",
        "mov x4, x6",
        "ldr s18, [x5]",
        "add x4, x4, #0x20 (32)",
        "fmul s18, s18, s16",
        "add x5, x5, #0x20 (32)",
        "ldur s2, [x4, #-32]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-32]",
        "ldur s18, [x5, #-28]",
        "fmul s18, s18, s17",
        "ldur s2, [x4, #-28]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-28]",
        "ldur s18, [x5, #-24]",
        "fmul s18, s18, s16",
        "ldur s2, [x4, #-24]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-24]",
        "ldur s18, [x5, #-20]",
        "fmul s18, s18, s17",
        "ldur s2, [x4, #-20]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-20]",
        "ldur s18, [x5, #-16]",
        "fmul s18, s18, s16",
        "ldur s2, [x4, #-16]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-16]",
        "ldur s18, [x5, #-12]",
        "fmul s18, s18, s17",
        "ldur s2, [x4, #-12]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-12]",
        "ldur s18, [x5, #-8]",
        "fmul s18, s18, s16",
        "ldur s2, [x4, #-8]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-8]",
        "ldur s18, [x5, #-4]",
        "fmul s18, s18, s17",
        "ldur s2, [x4, #-4]",
        "fadd s18, s18, s2",
        "stur s18, [x4, #-4]",
        "subs w26, w10, #0x1 (1)",
        "mov x27, x10",
        "mov x10, x26"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/HotBlocks_TSO_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "TSO",
      "LRCPC",
      "LRCPC2",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "The Sims 1 hot block": {
      "x86InstructionCount": 47,
      "ExpectedInstructionCount": 97,
      "Comment": [
        "Hottest in-game block from The Sims 1, Legacy Collection",
        "Consumed 6.13% of a CPU core on Oryon-1",
        "Some interesting problems in this block:",
        "  - LRCPC2 small immediate loadstores aren't getting used",
        "  - Memory stores of zero aren't using wzr register",
        "  - Vector loadstores are using large negative numbers which Arm64 can't optimize",
        "    - Would need to generate a temporary and convert negative offsets to positive (Or close enough for simm9)",
        "  - LRCPC3 isn't used for vector loadstores at all",
        "    - Where the excessive dmb comes from",
        "    - No hardware ships with LRCPC3 yet anyway"
      ],
      "x86Insts": [
        "push    ebp",
        "mov     ebp, esp",
        "sub     esp, 0x1c4",
        "mov     eax, dword [0xa37400]",
        "xor     eax, ebp",
        "mov     dword [ebp-0x4], eax",
        "mov     edx, dword [ebp+0x8]",
        "push    ebx",
        "push    esi",
        "mov     esi, dword [ebp+0x18]",
        "push    edi",
        "mov     edi, ecx",
        "mov     dword [ebp-0x184], edx",
        "mov     dword [ebp-0x180], 0x0",
        "lea     ecx, [ebp-0x16c]",
        "add     esi, 0x28",
        "mov     edx, 0x6",
        "mov     eax, dword [edi+0x3190]",
        "mov     dword [ebp-0x188], eax",
        "mov     eax, dword [ebp+0xc]",
        "mov     dword [ebp-0x178], eax",
        "mov     eax, dword [ebp+0x10]",
        "mov     dword [ebp-0x170], eax",
        "movzx   eax, byte [ebp+0x14]",
        "mov     dword [ebp-0x17c], eax",
        "movq    xmm0, qword [esi-0x28]",
        "xorps   xmm1, xmm1",
        "mov     eax, dword [esi-0x20]",
        "lea     esi, [esi+0x4c]",
        "movq    qword [ebp-0x1c4], xmm0",
        "lea     ecx, [ecx+0x3c]",
        "mov     dword [ebp-0x1bc], eax",
        "movups  xmm0, [esi-0x58]",
        "movups  [ebp-0x1a8], xmm1",
        "movups  [ebp-0x1b8], xmm0",
        "movups  xmm0, [ebp-0x1c4]",
        "movups  [ebp-0x198], xmm1",
        "psrldq  xmm1, 0xc",
        "movups  [ecx-0x3c], xmm0",
        "movups  xmm0, [ebp-0x1b4]",
        "movups  [ecx-0x2c], xmm0",
        "movups  xmm0, [ebp-0x1a4]",
        "movups  [ecx-0x1c], xmm0",
        "movq    xmm0, qword [ebp-0x194]",
        "movq    qword [ecx-0xc], xmm0",
        "movd    dword [ecx-0x4], xmm1",
        "sub     edx, 0x1"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "sub w8, w8, #0x1c4 (452)",
        "mov w20, #0x7400",
        "movk w20, #0xa3, lsl #16",
        "ldapur w4, [x20]",
        "nop",
        "eor w4, w4, w9",
        "nop",
        "stlur w4, [x9, #-4]",
        "ldapur w5, [x9, #8]",
        "nop",
        "stp w10, w6, [x8, #-8]!",
        "ldapur w10, [x9, #24]",
        "nop",
        "str w11, [x8, #-4]!",
        "mov x11, x7",
        "sub w20, w9, #0x184 (388)",
        "nop",
        "stlur w5, [x20]",
        "sub w20, w9, #0x180 (384)",
        "nop",
        "stlur wzr, [x20]",
        "sub w7, w9, #0x16c (364)",
        "add w10, w10, #0x28 (40)",
        "mov w5, #0x6",
        "mov w20, #0x3190",
        "add w20, w11, w20",
        "ldapur w4, [x20]",
        "nop",
        "sub w20, w9, #0x188 (392)",
        "nop",
        "stlur w4, [x20]",
        "ldapur w4, [x9, #12]",
        "nop",
        "sub w20, w9, #0x178 (376)",
        "nop",
        "stlur w4, [x20]",
        "ldapur w4, [x9, #16]",
        "nop",
        "sub w20, w9, #0x170 (368)",
        "nop",
        "stlur w4, [x20]",
        "ldapurb w4, [x9, #20]",
        "sub w20, w9, #0x17c (380)",
        "nop",
        "stlur w4, [x20]",
        "ldur d16, [x10, #-40]",
        "dmb ishld",
        "movi v17.2d, #0x0",
        "ldapur w4, [x10, #-32]",
        "nop",
        "add w10, w10, #0x4c (76)",
        "mov x20, #0xfffffffffffffe3c",
        "dmb ish",
        "str d16, [x9, x20, sxtx]",
        "add w7, w7, #0x3c (60)",
        "sub w21, w9, #0x1bc (444)",
        "nop",
        "stlur w4, [x21]",
        "ldur q16, [x10, #-88]",
        "dmb ishld",
        "mov v2.16b, v17.16b",
        "mov x21, #0xfffffffffffffe58",
        "dmb ish",
        "str q17, [x9, x21, sxtx]",
        "mov x21, #0xfffffffffffffe48",
        "dmb ish",
        "str q16, [x9, x21, sxtx]",
        "ldr q16, [x9, x20, sxtx]",
        "dmb ishld",
        "mov x20, #0xfffffffffffffe68",
        "dmb ish",
        "str q17, [x9, x20, sxtx]",
        "ext v17.16b, v17.16b, v2.16b, #12",
        "dmb ish",
        "stur q16, [x7, #-60]",
        "mov x20, #0xfffffffffffffe4c",
        "ldr q16, [x9, x20, sxtx]",
        "dmb ishld",
        "dmb ish",
        "stur q16, [x7, #-44]",
        "mov x20, #0xfffffffffffffe5c",
        "ldr q16, [x9, x20, sxtx]",
        "dmb ishld",
        "dmb ish",
        "stur q16, [x7, #-28]",
        "mov x20, #0xfffffffffffffe6c",
        "ldr d16, [x9, x20, sxtx]",
        "dmb ishld",
        "dmb ish",
        "stur d16, [x7, #-12]",
        "sub w20, w7, #0x4 (4)",
        "str s17, [x20]",
        "subs w26, w5, #0x1 (1)",
        "mov x27, x5",
        "mov x5, x26"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Primary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "add bl, cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x00",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmn w0, w7, lsl #24",
        "add w26, w6, w7",
        "bfxil x6, x26, #0, #8",
        "cfinv"
      ]
    },
    "add bx, cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmn w0, w7, lsl #16",
        "add w26, w6, w7",
        "bfxil x6, x26, #0, #16",
        "cfinv"
      ]
    },
    "add ebx, ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "adds w26, w6, w7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "add rbx, rcx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "adds x26, x6, x7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "db 0x02, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x02",
        "add bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmn w0, w6, lsl #24",
        "add w26, w7, w6",
        "bfxil x7, x26, #0, #8",
        "cfinv"
      ]
    },
    "db 0x66, 0x03, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x03",
        "add bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmn w0, w6, lsl #16",
        "add w26, w7, w6",
        "bfxil x7, x26, #0, #16",
        "cfinv"
      ]
    },
    "db 0x03, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x03",
        "add ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "adds w26, w7, w6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x03, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x03",
        "add rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "adds x26, x7, x6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "add al, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x04",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add ax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x1 (1)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x1 (1)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add al, -1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x04",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8",
        "cfinv"
      ]
    },
    "add ax, -1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "bfxil x4, x26, #0, #16",
        "cfinv"
      ]
    },
    "add eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs w26, w4, #0x1 (1)",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "add rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs x26, x4, #0x1 (1)",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or bl, bh": {
      "ExpectedInstructionCount": 5,
      "Comment": "",
      "ExpectedArm64ASM": [
        "lsr w20, w6, #8",
        "orr w26, w6, w20",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "cfinv"
      ]
    },
    "or bl, cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x08",
      "ExpectedArm64ASM": [
        "orr w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "cfinv"
      ]
    },
    "or bx, cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "cfinv"
      ]
    },
    "or ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr w6, w6, w7",
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "or rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr x6, x6, x7",
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "db 0x0A, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x0A",
        "or bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "cfinv"
      ]
    },
    "db 0x66, 0x0B, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x0B",
        "or bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "cfinv"
      ]
    },
    "db 0x0B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x0B",
        "or ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w7, w7, w6",
        "subs w26, w7, #0x0 (0)"
      ]
    },
    "db 0x48, 0x0B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x0B",
        "or rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr x7, x7, x6",
        "subs x26, x7, #0x0 (0)"
      ]
    },
    "or al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0C",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or ax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #16",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "or al, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0C",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or ax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xffff",
        "cmn wzr, w26, lsl #16",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "orr w4, w4, w20",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "orr x4, x4, x20",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc bl, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x10",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxtb x20, w7",
        "cinc w21, w20, lo",
        "add w22, w6, w21",
        "uxtb w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x21, #63, #nzCv",
        "eor w20, w6, w20",
        "eor w21, w26, w6",
        "bic w20, w21, w20",
        "rmif x20, #7, #nzcV",
        "bfxil x6, x26, #0, #8"
      ]
    },
    "adc bx, cx": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxth x20, w7",
        "cinc w21, w20, lo",
        "add w22, w6, w21",
        "uxth w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x21, #63, #nzCv",
        "eor w20, w6, w20",
        "eor w21, w26, w6",
        "bic w20, w21, w20",
        "rmif x20, #15, #nzcV",
        "bfxil x6, x26, #0, #16"
      ]
    },
    "adc ebx, ecx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "cfinv",
        "adcs w26, w6, w7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "adc rbx, rcx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "cfinv",
        "adcs x26, x6, x7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "db 0x12, 0xcb": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x12",
        "adc bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxtb x20, w6",
        "cinc w21, w20, lo",
        "add w22, w7, w21",
        "uxtb w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x21, #63, #nzCv",
        "eor w20, w7, w20",
        "eor w21, w26, w7",
        "bic w20, w21, w20",
        "rmif x20, #7, #nzcV",
        "bfxil x7, x26, #0, #8"
      ]
    },
    "db 0x66, 0x13, 0xcb": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x13",
        "adc bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxth x20, w6",
        "cinc w21, w20, lo",
        "add w22, w7, w21",
        "uxth w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x21, #63, #nzCv",
        "eor w20, w7, w20",
        "eor w21, w26, w7",
        "bic w20, w21, w20",
        "rmif x20, #15, #nzcV",
        "bfxil x7, x26, #0, #16"
      ]
    },
    "db 0x13, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x13",
        "adc ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "cfinv",
        "adcs w26, w7, w6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x13, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x13",
        "adc rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "cfinv",
        "adcs x26, x7, x6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "adc al, 1": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x14",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w4",
        "rmif x20, #7, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "adc ax, 1": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w4",
        "rmif x20, #15, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "adc eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc al, -1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x14",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w4, w26",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "adc ax, -1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w4, w26",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "adc eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "adc rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sbb bl, cl": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x18",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxtb w20, w6",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #7, #nzcV",
        "bfxil x6, x26, #0, #8"
      ]
    },
    "sbb bx, cx": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxth w20, w6",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #15, #nzcV",
        "bfxil x6, x26, #0, #16"
      ]
    },
    "sbb ebx, ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "sbcs w26, w6, w7",
        "mov x6, x26"
      ]
    },
    "sbb rbx, rcx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "sbcs x26, x6, x7",
        "mov x6, x26"
      ]
    },
    "db 0x1A, 0xcb": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0x1A",
        "sbb bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxtb w20, w7",
        "uxtb x21, w6",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #7, #nzcV",
        "bfxil x7, x26, #0, #8"
      ]
    },
    "db 0x66, 0x1B, 0xcb": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0x1B",
        "sbb bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxth w20, w7",
        "uxth x21, w6",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x22, #63, #nzCv",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "rmif x20, #15, #nzcV",
        "bfxil x7, x26, #0, #16"
      ]
    },
    "db 0x1B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x1B",
        "sbb ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "sbcs w26, w7, w6",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x1B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x1B",
        "sbb rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "sbcs x26, x7, x6",
        "mov x7, x26"
      ]
    },
    "sbb al, 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x1C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #7, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb ax, 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxth w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #15, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb al, -1": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x1C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w21",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sbb ax, -1": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "uxth w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w21",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sbb eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "sbcs w26, w4, w20",
        "mov x4, x26"
      ]
    },
    "sbb rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "sbcs x26, x4, x20",
        "mov x4, x26"
      ]
    },
    "and bl, cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x20",
      "ExpectedArm64ASM": [
        "and w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "cfinv"
      ]
    },
    "and bx, cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "and w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "cfinv"
      ]
    },
    "and ebx, ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "ands w26, w6, w7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "and rbx, rcx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "ands x26, x6, x7",
        "cfinv",
        "mov x6, x26"
      ]
    },
    "db 0x22, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x22",
        "and bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "and w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "cfinv"
      ]
    },
    "db 0x66, 0x23, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x23",
        "and bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "and w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "cfinv"
      ]
    },
    "db 0x23, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x23",
        "and ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "ands w26, w7, w6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x23, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x23",
        "and rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "ands x26, x7, x6",
        "cfinv",
        "mov x7, x26"
      ]
    },
    "and al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x24",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffffff01",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and ax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffff0001",
        "cmn wzr, w26, lsl #16",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and al, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x24",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "and ax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #16",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "and eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ands w26, w4, w20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ands x26, x4, x20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub bl, cl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w6, w7",
        "bfxil x6, x26, #0, #8"
      ]
    },
    "sub bx, cx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w6, w7",
        "bfxil x6, x26, #0, #16"
      ]
    },
    "sub ebx, ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs w26, w6, w7",
        "mov x6, x26"
      ]
    },
    "sub rbx, rcx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs x26, x6, x7",
        "mov x6, x26"
      ]
    },
    "db 0x2A, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x2A",
        "sub bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmp w0, w6, lsl #24",
        "sub w26, w7, w6",
        "bfxil x7, x26, #0, #8"
      ]
    },
    "db 0x66, 0x2B, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x2B",
        "sub bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmp w0, w6, lsl #16",
        "sub w26, w7, w6",
        "bfxil x7, x26, #0, #16"
      ]
    },
    "db 0x2B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x2B",
        "sub ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs w26, w7, w6",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x2B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x2B",
        "sub rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs x26, x7, x6",
        "mov x7, x26"
      ]
    },
    "sub al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x2C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sub ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sub eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x2C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sub ax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sub eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "sub rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "xor bl, cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x30",
      "ExpectedArm64ASM": [
        "eor w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "cfinv"
      ]
    },
    "xor bx, cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "cfinv"
      ]
    },
    "xor ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor w6, w6, w7",
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "xor rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor x6, x6, x7",
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "db 0x32, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x32",
        "xor bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "cfinv"
      ]
    },
    "db 0x66, 0x33, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x33",
        "xor bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "cfinv"
      ]
    },
    "db 0x33, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x33",
        "xor ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w7, w7, w6",
        "subs w26, w7, #0x0 (0)"
      ]
    },
    "db 0x48, 0x33, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x33",
        "xor rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x7, x7, x6",
        "subs x26, x7, #0x0 (0)"
      ]
    },
    "xor al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x34",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "xor ax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #16",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "xor eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp bl, cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x38",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w6, w7"
      ]
    },
    "xor al, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x34",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "xor ax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xffff",
        "cmn wzr, w26, lsl #16",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "xor eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "mvn w4, w4",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "mvn x4, x4",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp bx, cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w6, w7"
      ]
    },
    "cmp ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs w26, w6, w7"
      ]
    },
    "cmp rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs x26, x6, x7"
      ]
    },
    "db 0x3A, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x3A",
        "cmp bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmp w0, w6, lsl #24",
        "sub w26, w7, w6"
      ]
    },
    "db 0x66, 0x3B, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x3B",
        "cmp bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmp w0, w6, lsl #16",
        "sub w26, w7, w6"
      ]
    },
    "db 0x3B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x3B",
        "cmp ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs w26, w7, w6"
      ]
    },
    "db 0x48, 0x3B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x3B",
        "cmp rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs x26, x7, x6"
      ]
    },
    "cmp al, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp ax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)"
      ]
    },
    "cmp ax, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20"
      ]
    },
    "cmp eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)"
      ]
    },
    "cmp rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)"
      ]
    },
    "imul ax, bx, 257": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "sxth x21, w6",
        "mul x20, x21, x20",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx, 257": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "smull x21, w6, w20",
        "asr x21, x21, #32",
        "mul w4, w6, w20",
        "sbfx x20, x4, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx, 257": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "smulh x21, x6, x20",
        "mul x4, x6, x20",
        "asr x20, x4, #63",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul ax, bx, 3": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "sxth x21, w6",
        "mul x20, x21, x20",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx, 3": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "smull x21, w6, w20",
        "asr x21, x21, #32",
        "mul w4, w6, w20",
        "sbfx x20, x4, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx, 3": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "smulh x21, x6, x20",
        "mul x4, x6, x20",
        "asr x20, x4, #63",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "test al, bl": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "and w26, w4, w6",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "test ax, bx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "and w26, w4, w6",
        "cmn wzr, w26, lsl #16",
        "cfinv"
      ]
    },
    "test eax, ebx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "ands w26, w4, w6",
        "cfinv"
      ]
    },
    "test rax, rbx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "ands x26, x4, x6",
        "cfinv"
      ]
    },
    "o16 pushf": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "strh w20, [x8, #-2]!"
      ]
    },
    "pushfq": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "str x20, [x8, #-8]!"
      ]
    },
    "popf": {
      "ExpectedInstructionCount": 40,
      "Comment": "0x9d",
      "ExpectedArm64ASM": [
        "ldr x20, [x8], #8",
        "mov w21, #0x202",
        "orr x27, x20, x21",
        "mvn w20, w27",
        "rmif x20, #63, #nzCv",
        "ubfx w26, w20, #2, #1",
        "rmif x27, #4, #nZcv",
        "rmif x27, #4, #Nzcv",
        "ubfx w20, w27, #8, #1",
        "ldrb w21, [x28, #1016]",
        "and w21, w21, #0xfffffffe",
        "mov w22, #0x1",
        "mrs x23, nzcv",
        "cmp x20, #0x0 (0)",
        "csel x20, x21, x22, eq",
        "strb w20, [x28, #1016]",
        "ubfx w20, w27, #9, #1",
        "strb w20, [x28, #1017]",
        "ubfx w20, w27, #10, #1",
        "sub x20, x22, x20, lsl #1",
        "msr nzcv, x23",
        "rmif x27, #11, #nzcV",
        "ubfx w21, w27, #12, #1",
        "strb w21, [x28, #1020]",
        "ubfx w21, w27, #14, #1",
        "strb w21, [x28, #1022]",
        "ubfx w21, w27, #16, #1",
        "strb w21, [x28, #1024]",
        "ubfx w21, w27, #17, #1",
        "strb w21, [x28, #1025]",
        "ubfx w21, w27, #18, #1",
        "strb w21, [x28, #1026]",
        "ubfx w21, w27, #19, #1",
        "strb w21, [x28, #1027]",
        "ubfx w21, w27, #20, #1",
        "strb w21, [x28, #1028]",
        "ubfx w21, w27, #21, #1",
        "strb w21, [x28, #1029]",
        "mov w21, #0x10001",
        "strb w20, [x28, #1018]"
      ]
    },
    "sahf": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x9e",
      "ExpectedArm64ASM": [
        "ubfx w20, w4, #8, #8",
        "mov w21, #0x28",
        "bic x20, x20, x21",
        "orr x27, x20, #0x2",
        "mvn w20, w27",
        "rmif x20, #63, #nzCv",
        "ubfx w26, w20, #2, #1",
        "rmif x27, #4, #nZcv",
        "rmif x27, #4, #Nzcv"
      ]
    },
    "lahf": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x9f",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "bfi x4, x20, #8, #8"
      ]
    },
    "cmpsb": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "ldrb w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20",
        "add x10, x10, x20"
      ]
    },
    "cmpsw": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "ldrh w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1",
        "add x10, x10, x20, lsl #1"
      ]
    },
    "cmpsd": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "ldr w21, [x10]",
        "eor x27, x21, x20",
        "subs w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2",
        "add x10, x10, x20, lsl #2"
      ]
    },
    "cmpsq": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "ldr x21, [x10]",
        "eor x27, x21, x20",
        "subs x26, x21, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3",
        "add x10, x10, x20, lsl #3"
      ]
    },
    "repz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "test al, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa8",
      "ExpectedArm64ASM": [
        "and w26, w4, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test ax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "and w26, w4, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "cfinv"
      ]
    },
    "test rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "cfinv"
      ]
    },
    "test al, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xa8",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "test ax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #16",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "test eax, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "test rax, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "scasb": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20"
      ]
    },
    "scasw": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "scasd": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "scasq": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "repz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.eq #-0x20"
      ]
    },
    "repz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.eq #-0x20"
      ]
    },
    "repz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.eq #-0x18"
      ]
    },
    "repz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.eq #-0x18"
      ]
    },
    "repnz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.ne #-0x18"
      ]
    },
    "repnz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.ne #-0x18"
      ]
    },
    "cmc": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf5",
      "ExpectedArm64ASM": [
        "cfinv"
      ]
    },
    "clc": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf8",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "stc": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf9",
      "ExpectedArm64ASM": [
        "rmif xzr, #63, #nzCv"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/PrimaryGroup.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "add al, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "or al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "adc al, 1": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w4",
        "rmif x20, #7, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb al, 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w21, w26",
        "rmif x20, #7, #nzcV",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "and al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffffff01",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "xor al, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "cmp al, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /7",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "add al, -1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8",
        "cfinv"
      ]
    },
    "or al, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "adc al, -1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w4, w26",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sbb al, -1": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "rmif x20, #63, #nzCv",
        "bic w20, w26, w21",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "and al, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "cfinv",
        "mov x26, x4"
      ]
    },
    "sub al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "xor al, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "cmp al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /7",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)"
      ]
    },
    "add ax, 256": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x100 (256)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x100",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x100",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, 256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x100",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x100",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub eax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x100",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x100",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "add ax, -256": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff00",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0xffffff00",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0xffffffffffffff00",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, -256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffff00",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, -256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffff00",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffff00",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffff00",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0xffffff00",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0xffffffffffffff00",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub eax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0xffffff00",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0xffffffffffffff00",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "cmp rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "add ax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "cfinv",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x1 (1)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x1 (1)",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "add ax, -1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "bfxil x4, x26, #0, #16",
        "cfinv"
      ]
    },
    "add eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs w26, w4, #0x1 (1)",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "add rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs x26, x4, #0x1 (1)",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "or eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /-1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "orr w4, w4, w20",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /-1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "orr x4, x4, x20",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "cfinv",
        "adcs w26, w4, w20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "adc rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "cfinv",
        "adcs x26, x4, x20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sbb eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "sbcs w26, w4, w20",
        "mov x4, x26"
      ]
    },
    "sbb rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "sbcs x26, x4, x20",
        "mov x4, x26"
      ]
    },
    "and eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ands w26, w4, w20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "and rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ands x26, x4, x20",
        "cfinv",
        "mov x4, x26"
      ]
    },
    "sub eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "sub rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "xor eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mvn w4, w4",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mvn x4, x4",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)"
      ]
    },
    "cmp rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)"
      ]
    },
    "rol al, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC0 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #24, #8",
        "ror w20, w20, #30",
        "bfxil x4, x20, #0, #8",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "ror al, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC0 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #8, #8",
        "ror w20, w20, #2",
        "bfxil x4, x20, #0, #8",
        "eor x20, x20, #0x80",
        "rmif x20, #6, #nzCv"
      ]
    },
    "rcl al, 2": {
      "ExpectedInstructionCount": 19,
      "Comment": "GROUP2 0xC0 /2",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "mov w22, #0x0",
        "bfi x22, x20, #55, #8",
        "bfi x22, x21, #63, #1",
        "bfi x22, x20, #46, #8",
        "bfi x22, x21, #54, #1",
        "bfi x22, x20, #37, #8",
        "bfi x22, x21, #45, #1",
        "bfi x22, x20, #28, #8",
        "bfi x22, x21, #36, #1",
        "bfi x22, x20, #19, #8",
        "bfi x22, x21, #27, #1",
        "bfxil x22, x20, #0, #8",
        "ror x20, x22, #62",
        "bfxil x4, x20, #0, #8",
        "ror x20, x22, #61",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "rcr al, 2": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xC0 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "uxtb w21, w4",
        "bfi x21, x20, #8, #1",
        "bfi x21, x21, #9, #9",
        "bfi x21, x21, #18, #18",
        "bfi x21, x21, #36, #9",
        "lsr x20, x21, #2",
        "bfxil x4, x20, #0, #8",
        "eor x20, x21, #0x2",
        "rmif x20, #0, #nzCv"
      ]
    },
    "shl al, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC0 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x4, #0x40",
        "rmif x20, #5, #nzCv",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "shr al, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC0 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w26, w20, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x2",
        "rmif x20, #0, #nzCv",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sar al, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC0 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w26, w20, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x2",
        "rmif x20, #0, #nzCv",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "rol ax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #30",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "rol eax, 2": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "ror w4, w4, #30",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "rol rax, 2": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "ror x4, x4, #62",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "ror ax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #2",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, #0x8000",
        "rmif x20, #14, #nzCv"
      ]
    },
    "ror eax, 2": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "ror w4, w4, #2",
        "eor x20, x4, #0x80000000",
        "rmif x20, #30, #nzCv"
      ]
    },
    "ror rax, 2": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "ror x4, x4, #2",
        "eor x20, x4, #0x8000000000000000",
        "rmif x20, #62, #nzCv"
      ]
    },
    "rcl ax, 2": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "cset x21, lo",
        "mov w22, #0x0",
        "bfi x22, x20, #47, #16",
        "bfi x22, x21, #63, #1",
        "bfi x22, x20, #30, #16",
        "bfi x22, x21, #46, #1",
        "bfi x22, x20, #13, #16",
        "bfi x22, x21, #29, #1",
        "bfxil x22, x20, #0, #16",
        "ror x20, x22, #62",
        "bfxil x4, x20, #0, #16",
        "ror x20, x22, #61",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "rcl eax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "lsl w20, w4, #2",
        "cset x21, lo",
        "orr w20, w20, w4, lsr #31",
        "eor x22, x4, #0x40000000",
        "rmif x22, #29, #nzCv",
        "orr w4, w20, w21, lsl #1"
      ]
    },
    "rcl rax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "lsl x20, x4, #2",
        "cset x21, lo",
        "orr x20, x20, x4, lsr #63",
        "eor x22, x4, #0x4000000000000000",
        "rmif x22, #61, #nzCv",
        "orr x4, x20, x21, lsl #1"
      ]
    },
    "rcr ax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "uxth w21, w4",
        "bfi x21, x20, #16, #1",
        "bfi x21, x21, #17, #17",
        "bfi x21, x21, #34, #17",
        "lsr x20, x21, #2",
        "bfxil x4, x20, #0, #16",
        "eor x20, x21, #0x2",
        "rmif x20, #0, #nzCv"
      ]
    },
    "rcr eax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, #2",
        "cset x21, lo",
        "orr w20, w20, w4, lsl #31",
        "eor x22, x4, #0x2",
        "rmif x22, #0, #nzCv",
        "orr w4, w20, w21, lsl #30"
      ]
    },
    "rcr rax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, #2",
        "cset x21, lo",
        "orr x20, x20, x4, lsl #63",
        "eor x22, x4, #0x2",
        "rmif x22, #0, #nzCv",
        "orr x4, x20, x21, lsl #62"
      ]
    },
    "shl ax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x4, #0x4000",
        "rmif x20, #13, #nzCv",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shl eax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x40000000",
        "rmif x20, #29, #nzCv",
        "mov x4, x26"
      ]
    },
    "shl rax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x4000000000000000",
        "rmif x20, #61, #nzCv",
        "mov x4, x26"
      ]
    },
    "shr ax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w26, w20, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x2",
        "rmif x20, #0, #nzCv",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shr eax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "lsr w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "shr rax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "lsr x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "sar ax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w26, w20, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x2",
        "rmif x20, #0, #nzCv",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sar eax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "asr w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "sar rax, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "asr x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "rol al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd0 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #24, #8",
        "ror w20, w20, #31",
        "bfxil x4, x20, #0, #8",
        "eor x21, x20, #0x1",
        "rmif x21, #63, #nzCv",
        "eor w20, w20, w20, lsr #7",
        "rmif x20, #0, #nzcV"
      ]
    },
    "ror al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd0 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #8, #8",
        "ror w20, w20, #1",
        "bfxil x4, x20, #0, #8",
        "eor x21, x20, #0x80",
        "rmif x21, #6, #nzCv",
        "eor w20, w20, w20, lsr #1",
        "rmif x20, #6, #nzcV"
      ]
    },
    "rcl al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd0 /2",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "orr w21, w21, w20, lsl #1",
        "eor x22, x20, #0x80",
        "rmif x22, #6, #nzCv",
        "eor w20, w21, w20",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x21, #0, #8"
      ]
    },
    "rcr al, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd0 /3",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "eor x22, x20, #0x1",
        "rmif x22, #63, #nzCv",
        "ubfx w20, w20, #1, #7",
        "bfi w20, w21, #7, #1",
        "bfxil x4, x20, #0, #8",
        "eor w20, w20, w20, lsr #1",
        "rmif x20, #6, #nzcV"
      ]
    },
    "shl al, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd0 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmn wzr, w26, lsl #24",
        "eor x20, x4, #0x80",
        "rmif x20, #6, #nzCv",
        "eor w20, w26, w4",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "shr al, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd0 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w26, w20, #1",
        "cmn wzr, w26, lsl #24",
        "eor x21, x20, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sar al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd0 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w26, w20, #1",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "rol ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #31",
        "bfxil x4, x20, #0, #16",
        "eor x21, x20, #0x1",
        "rmif x21, #63, #nzCv",
        "eor w20, w20, w20, lsr #15",
        "rmif x20, #0, #nzcV"
      ]
    },
    "rol eax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "ror w4, w4, #31",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "eor w20, w4, w4, lsr #31",
        "rmif x20, #0, #nzcV"
      ]
    },
    "rol rax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "ror x4, x4, #63",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "eor x20, x4, x4, lsr #63",
        "rmif x20, #0, #nzcV"
      ]
    },
    "ror ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #1",
        "bfxil x4, x20, #0, #16",
        "eor x21, x20, #0x8000",
        "rmif x21, #14, #nzCv",
        "eor w20, w20, w20, lsr #1",
        "rmif x20, #14, #nzcV"
      ]
    },
    "ror eax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "ror w4, w4, #1",
        "eor x20, x4, #0x80000000",
        "rmif x20, #30, #nzCv",
        "eor w20, w4, w4, lsr #1",
        "rmif x20, #30, #nzcV"
      ]
    },
    "ror rax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "ror x4, x4, #1",
        "eor x20, x4, #0x8000000000000000",
        "rmif x20, #62, #nzCv",
        "eor x20, x4, x4, lsr #1",
        "rmif x20, #62, #nzcV"
      ]
    },
    "rcl ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "cset x21, lo",
        "orr w21, w21, w20, lsl #1",
        "eor x22, x20, #0x8000",
        "rmif x22, #14, #nzCv",
        "eor w20, w21, w20",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "rcl eax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "cset x21, lo",
        "orr w4, w21, w20, lsl #1",
        "eor x21, x20, #0x80000000",
        "rmif x21, #30, #nzCv",
        "eor w20, w4, w20",
        "rmif x20, #31, #nzcV"
      ]
    },
    "rcl rax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "orr x20, x20, x4, lsl #1",
        "eor x21, x4, #0x8000000000000000",
        "rmif x21, #62, #nzCv",
        "eor x21, x20, x4",
        "rmif x21, #63, #nzcV",
        "mov x4, x20"
      ]
    },
    "rcr ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "rmif x21, #63, #nzCv",
        "ubfx w21, w4, #1, #15",
        "orr w20, w21, w20, lsl #15",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, x20, lsr #1",
        "rmif x20, #14, #nzcV"
      ]
    },
    "rcr eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "rmif x21, #63, #nzCv",
        "extr w4, w20, w4, #1",
        "eor x20, x4, x4, lsr #1",
        "rmif x20, #30, #nzcV"
      ]
    },
    "rcr rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "rmif x21, #63, #nzCv",
        "extr x4, x20, x4, #1",
        "eor x20, x4, x4, lsr #1",
        "rmif x20, #62, #nzcV"
      ]
    },
    "shl ax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmn wzr, w26, lsl #16",
        "eor x20, x4, #0x8000",
        "rmif x20, #14, #nzCv",
        "eor w20, w26, w4",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shl eax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x80000000",
        "rmif x20, #30, #nzCv",
        "eor w20, w26, w4",
        "rmif x20, #31, #nzcV",
        "mov x4, x26"
      ]
    },
    "shl rax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x8000000000000000",
        "rmif x20, #62, #nzCv",
        "eor x20, x26, x4",
        "rmif x20, #63, #nzcV",
        "mov x4, x26"
      ]
    },
    "shr ax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w26, w20, #1",
        "cmn wzr, w26, lsl #16",
        "eor x21, x20, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shr eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "lsr w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "rmif x4, #31, #nzcV",
        "mov x4, x26"
      ]
    },
    "shr rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "lsr x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "rmif x4, #63, #nzcV",
        "mov x4, x26"
      ]
    },
    "sar ax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w26, w20, #1",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sar eax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "asr w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "mov x4, x26"
      ]
    },
    "sar rax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "asr x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "rmif x20, #63, #nzCv",
        "mov x4, x26"
      ]
    },
    "rol al, cl": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd2 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x7",
        "mov x21, x4",
        "bfi w21, w4, #24, #8",
        "neg x20, x20",
        "ror w20, w21, w20",
        "bfxil x4, x20, #0, #8",
        "and x21, x7, #0x1f",
        "cbz w21, #+0x14",
        "eor x0, x20, x20, lsr #7",
        "mvn x1, x20",
        "rmif x1, #63, #nzCv",
        "rmif x0, #0, #nzcV"
      ]
    },
    "ror al, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd2 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x7",
        "mov x21, x4",
        "bfi w21, w4, #8, #8",
        "ror w20, w21, w20",
        "bfxil x4, x20, #0, #8",
        "and x21, x7, #0x1f",
        "cbz w21, #+0x14",
        "eor x0, x20, x20, lsr #1",
        "mvn x1, x20",
        "rmif x1, #6, #nzCv",
        "rmif x0, #6, #nzcV"
      ]
    },
    "rcl al, cl": {
      "ExpectedInstructionCount": 27,
      "Comment": "GROUP2 0xd2 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x68",
        "and x20, x7, #0x1f",
        "uxtb w21, w4",
        "cset x22, lo",
        "mov w23, #0x0",
        "bfi x23, x21, #55, #8",
        "bfi x23, x22, #63, #1",
        "bfi x23, x21, #46, #8",
        "bfi x23, x22, #54, #1",
        "bfi x23, x21, #37, #8",
        "bfi x23, x22, #45, #1",
        "bfi x23, x21, #28, #8",
        "bfi x23, x22, #36, #1",
        "bfi x23, x21, #19, #8",
        "bfi x23, x22, #27, #1",
        "bfxil x23, x21, #0, #8",
        "neg x21, x20",
        "ror x21, x23, x21",
        "bfxil x4, x21, #0, #8",
        "mov w22, #0x3f",
        "sub x20, x22, x20",
        "ror x20, x23, x20",
        "eor x22, x20, #0x1",
        "rmif x22, #63, #nzCv",
        "eor x20, x20, x21, lsr #7",
        "rmif x20, #0, #nzcV"
      ]
    },
    "rcr al, cl": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP2 0xd2 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x40",
        "and x20, x7, #0x1f",
        "cset x21, lo",
        "uxtb w22, w4",
        "bfi x22, x21, #8, #1",
        "bfi x22, x22, #9, #9",
        "bfi x22, x22, #18, #18",
        "bfi x22, x22, #36, #9",
        "lsr x21, x22, x20",
        "bfxil x4, x21, #0, #8",
        "sub w20, w20, #0x1 (1)",
        "lsr w20, w22, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv",
        "eor w20, w21, w21, lsr #1",
        "rmif x20, #6, #nzcV"
      ]
    },
    "shl al, cl": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd2 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x1c",
        "cmn wzr, w20, lsl #24",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w4, w20",
        "rmif x0, #7, #nzCv",
        "rmif x2, #7, #nzcV",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "shr al, cl": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd2 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x24",
        "cmn wzr, w21, lsl #24",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w21",
        "rmif x0, #63, #nzCv",
        "rmif x2, #7, #nzcV",
        "bfxil x4, x21, #0, #8"
      ]
    },
    "sar al, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd2 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x1c",
        "cmn wzr, w21, lsl #24",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "rmif x0, #63, #nzCv",
        "bfxil x4, x21, #0, #8"
      ]
    },
    "rol ax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "mov x21, x4",
        "bfi w21, w4, #16, #16",
        "neg x22, x20",
        "ror w21, w21, w22",
        "bfxil x4, x21, #0, #16",
        "cbz w20, #+0x14",
        "eor x0, x21, x21, lsr #15",
        "mvn x1, x21",
        "rmif x1, #63, #nzCv",
        "rmif x0, #0, #nzcV"
      ]
    },
    "rol eax, cl": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "neg x21, x20",
        "ror w4, w4, w21",
        "cbz w20, #+0x14",
        "eor x0, x4, x4, lsr #31",
        "mvn x1, x4",
        "rmif x1, #63, #nzCv",
        "rmif x0, #0, #nzcV"
      ]
    },
    "rol rax, cl": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "neg x21, x20",
        "ror x4, x4, x21",
        "cbz x20, #+0x14",
        "eor x0, x4, x4, lsr #63",
        "mvn x1, x4",
        "rmif x1, #63, #nzCv",
        "rmif x0, #0, #nzcV"
      ]
    },
    "ror ax, cl": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "mov x21, x4",
        "bfi w21, w4, #16, #16",
        "ror w21, w21, w20",
        "bfxil x4, x21, #0, #16",
        "cbz w20, #+0x14",
        "eor x0, x21, x21, lsr #1",
        "mvn x1, x21",
        "rmif x1, #14, #nzCv",
        "rmif x0, #14, #nzcV"
      ]
    },
    "ror eax, cl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "ror w4, w4, w20",
        "cbz w20, #+0x14",
        "eor x0, x4, x4, lsr #1",
        "mvn x1, x4",
        "rmif x1, #30, #nzCv",
        "rmif x0, #30, #nzcV"
      ]
    },
    "ror rax, cl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "ror x4, x4, x20",
        "cbz x20, #+0x14",
        "eor x0, x4, x4, lsr #1",
        "mvn x1, x4",
        "rmif x1, #62, #nzCv",
        "rmif x0, #62, #nzcV"
      ]
    },
    "rcl ax, cl": {
      "ExpectedInstructionCount": 23,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x58",
        "and x20, x7, #0x1f",
        "uxth w21, w4",
        "cset x22, lo",
        "mov w23, #0x0",
        "bfi x23, x21, #47, #16",
        "bfi x23, x22, #63, #1",
        "bfi x23, x21, #30, #16",
        "bfi x23, x22, #46, #1",
        "bfi x23, x21, #13, #16",
        "bfi x23, x22, #29, #1",
        "bfxil x23, x21, #0, #16",
        "neg x21, x20",
        "ror x21, x23, x21",
        "bfxil x4, x21, #0, #16",
        "mov w22, #0x3f",
        "sub x20, x22, x20",
        "ror x20, x23, x20",
        "eor x22, x20, #0x1",
        "rmif x22, #63, #nzCv",
        "eor x20, x20, x21, lsr #15",
        "rmif x20, #0, #nzcV"
      ]
    },
    "rcl eax, cl": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and w20, w7, #0x1f",
        "cbz x20, #+0x3c",
        "lsl w20, w4, w7",
        "cset x21, lo",
        "neg w22, w7",
        "lsr w23, w4, w22",
        "orr w20, w20, w23, lsr #1",
        "lsr w22, w4, w22",
        "eor x23, x22, #0x1",
        "rmif x23, #63, #nzCv",
        "sub w23, w7, #0x1 (1)",
        "lsl w21, w21, w23",
        "orr w4, w20, w21",
        "eor w20, w4, w22, lsl #31",
        "rmif x20, #31, #nzcV",
        "b #+0x8",
        "mov w4, w4"
      ]
    },
    "rcl rax, cl": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "cbz x20, #+0x38",
        "lsl x20, x4, x7",
        "cset x21, lo",
        "neg x22, x7",
        "lsr x23, x4, x22",
        "orr x20, x20, x23, lsr #1",
        "lsr x22, x4, x22",
        "eor x23, x22, #0x1",
        "rmif x23, #63, #nzCv",
        "sub x23, x7, #0x1 (1)",
        "lsl x21, x21, x23",
        "orr x4, x20, x21",
        "eor x20, x4, x22, lsl #63",
        "rmif x20, #63, #nzcV"
      ]
    },
    "rcr ax, cl": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x3c",
        "and x20, x7, #0x1f",
        "cset x21, lo",
        "uxth w22, w4",
        "bfi x22, x21, #16, #1",
        "bfi x22, x22, #17, #17",
        "bfi x22, x22, #34, #17",
        "lsr x21, x22, x20",
        "bfxil x4, x21, #0, #16",
        "sub w20, w20, #0x1 (1)",
        "lsr w20, w22, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv",
        "eor w20, w21, w21, lsr #1",
        "rmif x20, #14, #nzcV"
      ]
    },
    "rcr eax, cl": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and w20, w7, #0x1f",
        "cbz x20, #+0x3c",
        "lsr w20, w4, w7",
        "cset x21, lo",
        "neg w22, w7",
        "lsl w23, w4, w22",
        "orr w20, w20, w23, lsl #1",
        "sub w23, w7, #0x1 (1)",
        "lsr w23, w4, w23",
        "eor x23, x23, #0x1",
        "rmif x23, #63, #nzCv",
        "lsl w21, w21, w22",
        "orr w4, w20, w21",
        "eor w20, w4, w4, lsr #1",
        "rmif x20, #30, #nzcV",
        "b #+0x8",
        "mov w4, w4"
      ]
    },
    "rcr rax, cl": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "cbz x20, #+0x38",
        "lsr x20, x4, x7",
        "cset x21, lo",
        "neg x22, x7",
        "lsl x23, x4, x22",
        "orr x20, x20, x23, lsl #1",
        "sub x23, x7, #0x1 (1)",
        "lsr x23, x4, x23",
        "eor x23, x23, #0x1",
        "rmif x23, #63, #nzCv",
        "lsl x21, x21, x22",
        "orr x4, x20, x21",
        "eor x20, x4, x4, lsr #1",
        "rmif x20, #62, #nzcV"
      ]
    },
    "shl ax, cl": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x1c",
        "cmn wzr, w20, lsl #16",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w4, w20",
        "rmif x0, #15, #nzCv",
        "rmif x2, #15, #nzcV",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "shl eax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x20",
        "ands w26, w20, w20",
        "neg w0, w7",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "eor w2, w4, w20",
        "rmif x0, #63, #nzCv",
        "rmif x2, #31, #nzcV",
        "mov x4, x20"
      ]
    },
    "shl rax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x20",
        "ands x26, x20, x20",
        "neg x0, x7",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x20",
        "rmif x0, #63, #nzCv",
        "rmif x2, #63, #nzcV",
        "mov x4, x20"
      ]
    },
    "shr ax, cl": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x24",
        "cmn wzr, w21, lsl #16",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w21",
        "rmif x0, #63, #nzCv",
        "rmif x2, #15, #nzcV",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "shr eax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x20",
        "ands w26, w20, w20",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "eor w2, w4, w20",
        "rmif x0, #63, #nzCv",
        "rmif x2, #31, #nzcV",
        "mov x4, x20"
      ]
    },
    "shr rax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x20",
        "ands x26, x20, x20",
        "sub x0, x7, #0x1 (1)",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x20",
        "rmif x0, #63, #nzCv",
        "rmif x2, #63, #nzcV",
        "mov x4, x20"
      ]
    },
    "sar ax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x1c",
        "cmn wzr, w21, lsl #16",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "rmif x0, #63, #nzCv",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "sar eax, cl": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "asr w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x18",
        "ands w26, w20, w20",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "rmif x0, #63, #nzCv",
        "mov x4, x20"
      ]
    },
    "sar rax, cl": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "asr x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x18",
        "ands x26, x20, x20",
        "sub x0, x7, #0x1 (1)",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "rmif x0, #63, #nzCv",
        "mov x4, x20"
      ]
    },
    "test bl, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf6 /0",
      "ExpectedArm64ASM": [
        "and w26, w6, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "not bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf6 /2",
      "ExpectedArm64ASM": [
        "eor x6, x6, #0xff"
      ]
    },
    "neg bl": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf6 /3",
      "ExpectedArm64ASM": [
        "cmp wzr, w6, lsl #24",
        "neg w26, w6",
        "mov x20, x6",
        "bfxil x20, x26, #0, #8",
        "mov x27, x6",
        "mov x6, x20"
      ]
    },
    "mul bl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf6 /4",
      "ExpectedArm64ASM": [
        "uxtb x20, w6",
        "uxtb x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "ubfx x20, x20, #8, #8",
        "cmp x20, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul bl": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf6 /5",
      "ExpectedArm64ASM": [
        "sxtb x20, w6",
        "sxtb x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "sbfx x21, x20, #8, #8",
        "sbfx x20, x20, #7, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "div bl": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf6 /6",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "uxth w23, w4",
        "udiv w22, w23, w20",
        "msub w21, w22, w20, w23",
        "bfi x22, x21, #8, #8",
        "bfxil x4, x22, #0, #16"
      ]
    },
    "idiv bl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf6 /7",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "sxth x23, w4",
        "sxtb x20, w20",
        "sdiv x22, x23, x20",
        "msub x21, x22, x20, x23",
        "bfi x22, x21, #8, #8",
        "bfxil x4, x22, #0, #16"
      ]
    },
    "test bx, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "and w26, w6, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test ebx, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "ands w26, w6, #0x1",
        "cfinv"
      ]
    },
    "test rbx, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "ands x26, x6, #0x1",
        "cfinv"
      ]
    },
    "test bx, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "cmn wzr, w6, lsl #16",
        "cfinv",
        "mov x26, x6"
      ]
    },
    "test ebx, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "test rbx, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "neg bx": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "cmp wzr, w6, lsl #16",
        "neg w26, w6",
        "mov x20, x6",
        "bfxil x20, x26, #0, #16",
        "mov x27, x6",
        "mov x6, x20"
      ]
    },
    "neg ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "negs w26, w6",
        "mov x27, x6",
        "mov x6, x26"
      ]
    },
    "neg rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "negs x26, x6",
        "mov x27, x6",
        "mov x6, x26"
      ]
    },
    "mul bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "uxth x20, w6",
        "uxth x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "ubfx x20, x20, #16, #16",
        "bfxil x5, x20, #0, #16",
        "cmp x20, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "mul ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov w21, w4",
        "mul x20, x20, x21",
        "mov w4, w20",
        "lsr x5, x20, #32",
        "cmp x5, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "mul rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "umulh x5, x6, x4",
        "mul x4, x6, x4",
        "cmp x5, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "sxth x20, w6",
        "sxth x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "sbfx x21, x20, #16, #16",
        "bfxil x5, x21, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul ebx": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "sxtw x20, w6",
        "sxtw x21, w4",
        "mul x20, x20, x21",
        "mov w4, w20",
        "lsr x5, x20, #32",
        "asr x21, x20, #32",
        "sxtw x20, w20",
        "sbfx x20, x20, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "smulh x5, x6, x4",
        "mul x4, x6, x4",
        "asr x20, x4, #63",
        "cmp x5, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "div bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf7 /6",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w0, w4",
        "bfi w0, w5, #16, #16",
        "udiv w22, w0, w20",
        "msub w21, w22, w20, w0",
        "bfxil x4, x22, #0, #16",
        "bfxil x5, x21, #0, #16"
      ]
    },
    "inc al": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP3 0xfe /0",
      "ExpectedArm64ASM": [
        "uxtb w27, w4",
        "add w26, w27, #0x1 (1)",
        "setf8 w26",
        "bic w20, w26, w27",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "dec al": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP3 0xfe /1",
      "ExpectedArm64ASM": [
        "uxtb w27, w4",
        "sub w26, w27, #0x1 (1)",
        "setf8 w26",
        "bic w20, w27, w26",
        "rmif x20, #7, #nzcV",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "inc ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "add w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w26, w27",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "inc eax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds w26, w4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "inc rax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds x26, x4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "sub w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w27, w26",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "dec eax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs w26, w4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec rax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs x26, x4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Primary_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FlagM",
      "FlagM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "push es": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x06",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #960]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop es": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x07",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #960]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #976]"
      ]
    },
    "push cs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0e",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #962]",
        "str w20, [x8, #-4]!"
      ]
    },
    "push ss": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x16",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #964]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop ss": {
      "ExpectedInstructionCount": 22,
      "Comment": "0x17",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "ldrb w21, [x28, #1016]",
        "mov w22, #0x1",
        "and w21, w21, #0x1",
        "ldrb w23, [x28, #1016]",
        "and w23, w23, #0xfffffffe",
        "mrs x12, nzcv",
        "cmp x21, #0x0 (0)",
        "csel x21, x23, x22, eq",
        "strb w21, [x28, #1016]",
        "strh w20, [x28, #964]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #984]",
        "msr nzcv, x12"
      ]
    },
    "push ds": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x1e",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #966]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop ds": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x1f",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #966]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #988]"
      ]
    },
    "daa": {
      "ExpectedInstructionCount": 21,
      "Comment": "0x27",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, hs",
        "and x22, x20, #0xf",
        "cmp x22, #0x9 (9)",
        "cset x22, hi",
        "eor x23, x27, x26",
        "ubfx w23, w23, #4, #1",
        "orr x22, x23, x22",
        "cmp x20, #0x99 (153)",
        "cset x23, ls",
        "and x21, x21, x23",
        "add x23, x20, #0x6 (6)",
        "cmp x22, #0x0 (0)",
        "csel x20, x23, x20, ne",
        "add x23, x20, #0x60 (96)",
        "cmp x21, #0x0 (0)",
        "csel x26, x23, x20, eq",
        "bfxil x4, x26, #0, #8",
        "cmn wzr, w26, lsl #24",
        "rmif x21, #63, #nzCv",
        "eor w27, w26, w22, lsl #4"
      ]
    },
    "das": {
      "ExpectedInstructionCount": 25,
      "Comment": "0x2f",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "and x22, x20, #0xf",
        "cmp x22, #0x9 (9)",
        "cset x22, hi",
        "eor x23, x27, x26",
        "ubfx w23, w23, #4, #1",
        "orr x22, x23, x22",
        "cmp x20, #0x99 (153)",
        "cset x23, hi",
        "orr x21, x21, x23",
        "cmp x20, #0x6 (6)",
        "csel x23, x22, x21, lo",
        "orr w23, w21, w23",
        "sub x12, x20, #0x6 (6)",
        "cmp x22, #0x0 (0)",
        "csel x20, x12, x20, ne",
        "sub x12, x20, #0x60 (96)",
        "cmp x21, #0x0 (0)",
        "csel x26, x12, x20, ne",
        "bfxil x4, x26, #0, #8",
        "cmn wzr, w26, lsl #24",
        "eor x20, x23, #0x1",
        "rmif x20, #63, #nzCv",
        "eor w27, w26, w22, lsl #4"
      ]
    },
    "aaa": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x37",
      "ExpectedArm64ASM": [
        "and x20, x4, #0xf",
        "cmp x20, #0x9 (9)",
        "cset x20, hi",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x21, x20",
        "cmp wzr, w20",
        "eor w27, w26, w20, lsl #4",
        "add w20, w4, #0x106 (262)",
        "csel w20, w20, w4, lo",
        "mov w21, #0xff0f",
        "and w20, w20, w21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "aas": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x3f",
      "ExpectedArm64ASM": [
        "and x20, x4, #0xf",
        "cmp x20, #0x9 (9)",
        "cset x20, hi",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x21, x20",
        "cmp wzr, w20",
        "eor w27, w26, w20, lsl #4",
        "sub w20, w4, #0x106 (262)",
        "csel w20, w20, w4, lo",
        "mov w21, #0xff0f",
        "and w20, w20, w21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "inc ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x40",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "add w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w26, w27",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "inc eax": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x40",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds w26, w4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x48",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "sub w26, w27, #0x1 (1)",
        "setf16 w26",
        "bic w20, w27, w26",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "push ax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "strh w4, [x8, #-2]!"
      ]
    },
    "push eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "str w4, [x8, #-4]!"
      ]
    },
    "dec eax": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x48",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs w26, w4, #0x1 (1)",
        "rmif x20, #63, #nzCv",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "pusha": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x60",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "stp w7, w4, [x8, #-8]!",
        "stp w6, w5, [x8, #-8]!",
        "stp w9, w20, [x8, #-8]!",
        "stp w11, w10, [x8, #-8]!"
      ]
    },
    "pushad": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x60",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "stp w7, w4, [x8, #-8]!",
        "stp w6, w5, [x8, #-8]!",
        "stp w9, w20, [x8, #-8]!",
        "stp w11, w10, [x8, #-8]!"
      ]
    },
    "popa": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x61",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "ldp w11, w10, [x20], #8",
        "ldr w9, [x20], #4",
        "add x20, x20, #0x4 (4)",
        "mov x8, x20",
        "ldp w6, w5, [x8], #8",
        "ldp w7, w4, [x8], #8"
      ]
    },
    "popad": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x61",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "ldp w11, w10, [x20], #8",
        "ldr w9, [x20], #4",
        "add x20, x20, #0x4 (4)",
        "mov x8, x20",
        "ldp w6, w5, [x8], #8",
        "ldp w7, w4, [x8], #8"
      ]
    },
    "o16 pushf": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "strh w20, [x8, #-2]!"
      ]
    },
    "pushfd": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "str w20, [x8, #-4]!"
      ]
    },
    "aam": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xd4",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "mov w21, #0xa",
        "udiv x22, x20, x21",
        "msub x12, x22, x21, x20",
        "add x26, x12, x22, lsl #8",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "aad": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xd5",
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "mov w21, #0xa",
        "mul x20, x20, x21",
        "add x20, x4, x20",
        "and x26, x20, #0xff",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "db 0xd4, 0x40": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "aam with a different immediate byte base",
        "0xd4"
      ],
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "mov w21, #0x40",
        "udiv x22, x20, x21",
        "msub x12, x22, x21, x20",
        "add x26, x12, x22, lsl #8",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "db 0xd5, 0x40": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "aad with a different immediate byte base",
        "0xd5"
      ],
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "mov w21, #0x40",
        "mul x20, x20, x21",
        "add x20, x4, x20",
        "and x26, x20, #0xff",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "cfinv"
      ]
    },
    "salc": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xd6",
      "ExpectedArm64ASM": [
        "csetm w20, lo",
        "bfxil x4, x20, #0, #8"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Secondary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Instructions": {
    "ucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0x2e",
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "comiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0x2f",
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "cmovo ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, vs",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovo eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, vs"
      ]
    },
    "cmovo rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, vs"
      ]
    },
    "cmovno ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, vc",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovno eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, vc"
      ]
    },
    "cmovno rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, vc"
      ]
    },
    "cmovb ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, lo",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovb eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, lo"
      ]
    },
    "cmovb rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, lo"
      ]
    },
    "cmovnb ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, hs",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnb eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, hs"
      ]
    },
    "cmovnb rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, hs"
      ]
    },
    "cmovz ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovz eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, eq"
      ]
    },
    "cmovz rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, eq"
      ]
    },
    "cmovnz ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnz eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ne"
      ]
    },
    "cmovnz rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ne"
      ]
    },
    "cmovbe ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ls",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovbe eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ls"
      ]
    },
    "cmovbe rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ls"
      ]
    },
    "cmovnbe ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, hi",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnbe eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, hi"
      ]
    },
    "cmovnbe rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, hi"
      ]
    },
    "cmovs ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, mi",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovs eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, mi"
      ]
    },
    "cmovs rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, mi"
      ]
    },
    "cmovns ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, pl",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovns eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, pl"
      ]
    },
    "cmovns rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, pl"
      ]
    },
    "cmovpe ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x21"
      ]
    },
    "cmovpe eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w4, w6, w4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovpe rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel x4, x6, x4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovnp ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x21"
      ]
    },
    "cmovnp eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w4, w6, w4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovnp rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel x4, x6, x4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovl ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, lt",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovl eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, lt"
      ]
    },
    "cmovl rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, lt"
      ]
    },
    "cmovnl ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ge",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnl eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ge"
      ]
    },
    "cmovnl rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ge"
      ]
    },
    "cmovle ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, le",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovle eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, le"
      ]
    },
    "cmovle rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, le"
      ]
    },
    "cmovnle ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, gt",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnle eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, gt"
      ]
    },
    "cmovnle rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, gt"
      ]
    },
    "seto al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x90",
      "ExpectedArm64ASM": [
        "cset x20, vs",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setno al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x91",
      "ExpectedArm64ASM": [
        "cset x20, vc",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setb al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x92",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnb al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x93",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setz al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x94",
      "ExpectedArm64ASM": [
        "cset x20, eq",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnz al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x95",
      "ExpectedArm64ASM": [
        "cset x20, ne",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setbe al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x96",
      "ExpectedArm64ASM": [
        "cset x20, ls",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnbe al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x97",
      "ExpectedArm64ASM": [
        "cset x20, hi",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "sets al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x98",
      "ExpectedArm64ASM": [
        "cset x20, mi",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setns al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x99",
      "ExpectedArm64ASM": [
        "cset x20, pl",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setpe al": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x9a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "and w20, w20, #0x1",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnp al": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x9b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "and w20, w20, #0x1",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setl al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lt",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnl al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9d",
      "ExpectedArm64ASM": [
        "cset x20, ge",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setle al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9e",
      "ExpectedArm64ASM": [
        "cset x20, le",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnle al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9f",
      "ExpectedArm64ASM": [
        "cset x20, gt",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "bt ax, bx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w20, w4, w20",
        "rmif x20, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt [rax], bx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt eax, ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "rmif x20, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt [rax], ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt rax, rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "rmif x20, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt [rax], rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "shld ax, bx, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x22, x21, #1",
        "lsr w20, w20, #15",
        "orr x26, x22, x20",
        "cmn wzr, w26, lsl #16",
        "eor x20, x21, #0x8000",
        "rmif x20, #14, #nzCv",
        "eor w20, w26, w21",
        "rmif x20, #15, #nzcV",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shld ax, bx, 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x22, x21, #15",
        "lsr w20, w20, #1",
        "orr x26, x22, x20",
        "cmn wzr, w26, lsl #16",
        "eor x20, x21, #0x2",
        "rmif x20, #0, #nzCv",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shld ax, bx, 16": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x21, x21, #16",
        "orr x26, x21, x20",
        "cmn wzr, w26, lsl #16",
        "bfxil x4, x26, #0, #16",
        "cfinv"
      ]
    },
    "shld ax, bx, 31": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x21, x21, #31",
        "lsr w20, w20, #17",
        "orr x26, x21, x20",
        "cmn wzr, w26, lsl #16",
        "bfxil x4, x26, #0, #16",
        "cfinv"
      ]
    },
    "shld eax, ebx, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #31",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x80000000",
        "rmif x20, #30, #nzCv",
        "eor w20, w26, w4",
        "rmif x20, #31, #nzcV",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #17",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x20000",
        "rmif x20, #16, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 16": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #16",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x10000",
        "rmif x20, #15, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 31": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #63",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x8000000000000000",
        "rmif x20, #62, #nzCv",
        "eor x20, x26, x4",
        "rmif x20, #63, #nzcV",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #49",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2000000000000",
        "rmif x20, #48, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 32": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #32",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x100000000",
        "rmif x20, #31, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 63": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "rmif x20, #0, #nzCv",
        "mov x4, x26"
      ]
    },
    "shld ax, bx, cl": {
      "ExpectedInstructionCount": 21,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "and x22, x7, #0x1f",
        "mov w23, #0x10",
        "sub x23, x23, x22",
        "lsl x24, x21, x22",
        "lsr w20, w20, w23",
        "orr x20, x24, x20",
        "mrs x23, nzcv",
        "cmp x22, #0x0 (0)",
        "csel x20, x21, x20, eq",
        "msr nzcv, x23",
        "and w0, w22, #0x1f",
        "cbz w0, #+0x1c",
        "cmn wzr, w20, lsl #16",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w21, w20",
        "rmif x0, #15, #nzCv",
        "rmif x2, #15, #nzcV",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "shld eax, ebx, cl": {
      "ExpectedInstructionCount": 20,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "and x21, x7, #0x1f",
        "neg x22, x21",
        "lsl x23, x20, x21",
        "lsr w22, w6, w22",
        "orr x22, x23, x22",
        "mrs x23, nzcv",
        "cmp x21, #0x0 (0)",
        "csel x22, x20, x22, eq",
        "msr nzcv, x23",
        "and w0, w21, #0x1f",
        "cbz w0, #+0x20",
        "ands w26, w22, w22",
        "neg w0, w21",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w22",
        "rmif x0, #63, #nzCv",
        "rmif x2, #31, #nzcV",
        "mov w4, w22"
      ]
    },
    "shld rax, rbx, cl": {
      "ExpectedInstructionCount": 19,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "neg x21, x20",
        "lsl x22, x4, x20",
        "lsr x21, x6, x21",
        "orr x21, x22, x21",
        "mrs x22, nzcv",
        "cmp x20, #0x0 (0)",
        "csel x21, x4, x21, eq",
        "msr nzcv, x22",
        "and w0, w20, #0x3f",
        "cbz x0, #+0x20",
        "ands x26, x21, x21",
        "neg x0, x20",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x21",
        "rmif x0, #63, #nzCv",
        "rmif x2, #63, #nzcV",
        "mov x4, x21"
      ]
    },
    "bts ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w21, w4, w20",
        "rmif x21, #63, #nzCv",
        "mov w21, #0x1",
        "lsl w20, w21, w20",
        "orr w20, w4, w20",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "bts [rax], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "rmif x20, #63, #nzCv",
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "orr w4, w4, w20",
        "cfinv"
      ]
    },
    "bts [rax], ebx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "rmif x20, #63, #nzCv",
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "orr x4, x4, x20",
        "cfinv"
      ]
    },
    "bts [rax], rbx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts [rax], bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts [rax], ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts [rax], rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "imul ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "sxth x21, w6",
        "mul x20, x20, x21",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "smull x20, w4, w6",
        "asr x20, x20, #32",
        "mul w4, w4, w6",
        "sbfx x21, x4, #31, #1",
        "cmp x20, x21",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "smulh x20, x4, x6",
        "mul x4, x4, x6",
        "asr x21, x4, #63",
        "cmp x20, x21",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rsi, rax, 0xffffffff8646c299": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffc299",
        "movk x20, #0x8646, lsl #16",
        "smulh x21, x4, x20",
        "mul x10, x4, x20",
        "asr x20, x10, #63",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "cmpxchg cl, bl": {
      "ExpectedInstructionCount": 7,
      "ExpectedArm64ASM": [
        "eor x27, x4, x7",
        "lsl w0, w4, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w4, w7",
        "bfxil x4, x7, #0, #8",
        "csel x20, x6, x7, eq",
        "bfxil x7, x20, #0, #8"
      ]
    },
    "cmpxchg cx, bx": {
      "ExpectedInstructionCount": 7,
      "ExpectedArm64ASM": [
        "eor x27, x4, x7",
        "lsl w0, w4, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w4, w7",
        "bfxil x4, x7, #0, #16",
        "csel x20, x6, x7, eq",
        "bfxil x7, x20, #0, #16"
      ]
    },
    "cmpxchg ecx, ebx": {
      "ExpectedInstructionCount": 6,
      "ExpectedArm64ASM": [
        "mov w20, w7",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "csel x4, x4, x20, eq",
        "mov w20, w6",
        "csel x7, x20, x7, eq"
      ]
    },
    "cmpxchg rcx, rbx": {
      "ExpectedInstructionCount": 6,
      "ExpectedArm64ASM": [
        "eor x27, x4, x7",
        "subs x26, x4, x7",
        "csel x20, x6, x7, eq",
        "mov x21, x7",
        "mov x7, x20",
        "mov x4, x21"
      ]
    },
    "cmpxchg [rcx], rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "casal x4, x6, [x7]",
        "eor x27, x20, x4",
        "subs x26, x20, x4"
      ]
    },
    "cmpxchg al, bl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xb0",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "lsl w0, w4, #24",
        "cmp w0, w4, lsl #24",
        "sub w26, w4, w4",
        "bfxil x4, x6, #0, #8"
      ]
    },
    "cmpxchg [rcx], bl": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "uxtb x21, w4",
        "mov w1, w4",
        "casalb w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, w20",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "cmpxchg ax, bx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "lsl w0, w4, #16",
        "cmp w0, w4, lsl #16",
        "sub w26, w4, w4",
        "bfxil x4, x6, #0, #16"
      ]
    },
    "cmpxchg [rcx], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth x21, w4",
        "mov w1, w4",
        "casalh w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmpxchg eax, ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "subs w26, w4, w4",
        "mov x4, x6"
      ]
    },
    "cmpxchg [rcx], ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov w21, w4",
        "mov w1, w4",
        "casal w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "subs w26, w21, w20",
        "csel x4, x4, x20, eq"
      ]
    },
    "cmpxchg rax, rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "subs x26, x4, x4",
        "mov x4, x6"
      ]
    },
    "btr ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w21, w4, w20",
        "rmif x21, #63, #nzCv",
        "mov w21, #0x1",
        "lsl w20, w21, w20",
        "bic w20, w4, w20",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "btr [rax], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "rmif x20, #63, #nzCv",
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "bic w4, w4, w20",
        "cfinv"
      ]
    },
    "btr [rax], ebx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "rmif x20, #63, #nzCv",
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "bic x4, x4, x20",
        "cfinv"
      ]
    },
    "btr [rax], rbx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr [rax], bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr [rax], ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr [rax], rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "mov w21, #0x1",
        "lsl w21, w21, w20",
        "eor w21, w4, w21",
        "lsr w20, w21, w20",
        "rmif x20, #63, #nzCv",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "btc [rax], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "eor w4, w4, w20",
        "lsr w20, w4, w6",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc [rax], ebx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "eor x4, x4, x20",
        "lsr x20, x4, x6",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc [rax], rbx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc [rax], bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc [rax], ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc [rax], rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bsf ax, bx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w0, w6",
        "clz w20, w0",
        "tst w6, #0xffff",
        "csel x20, x4, x20, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "bsf eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w0, w6",
        "clz w20, w0",
        "tst w6, w6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsf rax, rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit x0, x6",
        "clz x20, x0",
        "tst x6, x6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsr ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0xf",
        "lsl w20, w6, #16",
        "clz w20, w20",
        "sub x20, x0, x20",
        "tst w6, #0xffff",
        "csel x20, x4, x20, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "bsr eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0x1f",
        "clz w20, w6",
        "sub x20, x0, x20",
        "tst w6, w6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsr rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0x3f",
        "clz x20, x6",
        "sub x20, x0, x20",
        "tst x6, x6",
        "csel x4, x4, x20, eq"
      ]
    },
    "xadd al, bl": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "uxtb w21, w6",
        "eor x27, x20, x21",
        "lsl w0, w20, #24",
        "cmn w0, w21, lsl #24",
        "add w26, w20, w21",
        "bfxil x6, x20, #0, #8",
        "bfxil x4, x26, #0, #8",
        "cfinv"
      ]
    },
    "xadd [rax], bl": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "ldaddalb w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #8",
        "cfinv"
      ]
    },
    "xadd ax, bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "uxth w21, w6",
        "eor x27, x20, x21",
        "lsl w0, w20, #16",
        "cmn w0, w21, lsl #16",
        "add w26, w20, w21",
        "bfxil x6, x20, #0, #16",
        "bfxil x4, x26, #0, #16",
        "cfinv"
      ]
    },
    "xadd [rax], bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "ldaddalh w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #16",
        "cfinv"
      ]
    },
    "xadd eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "mov w21, w6",
        "eor x27, x20, x21",
        "adds w26, w20, w21",
        "cfinv",
        "mov x6, x20",
        "mov x4, x26"
      ]
    },
    "xadd [rax], ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldaddal w20, w6, [x4]",
        "eor x27, x6, x20",
        "adds w26, w6, w20",
        "cfinv"
      ]
    },
    "xadd rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "eor x27, x4, x6",
        "adds x26, x4, x6",
        "cfinv",
        "mov x6, x4",
        "mov x4, x26"
      ]
    },
    "xadd [rax], rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "ldaddal x6, x20, [x4]",
        "eor x27, x20, x6",
        "adds x26, x20, x6",
        "cfinv",
        "mov x6, x20"
      ]
    },
    "pmovmskb eax, mm0": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xd7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #3296]",
        "cmlt v2.16b, v2.16b, #0",
        "and v2.8b, v2.8b, v3.8b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "maskmovq mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xf7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "cmlt v2.16b, v2.16b, #0",
        "ldr d3, [x28, #1056]",
        "ldr d4, [x11]",
        "bsl v2.8b, v3.8b, v4.8b",
        "str d2, [x11]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/SecondaryGroup.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2",
      "RNG"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "sgdt [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP7 0x0F 0x1 /0",
      "ExpectedArm64ASM": [
        "strh wzr, [x4]",
        "mov x20, #0xfffffffffffe0000",
        "stur x20, [x4, #2]"
      ]
    },
    "bt ax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt rax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "cfinv"
      ]
    },
    "bt ax, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #14, #nzCv",
        "cfinv"
      ]
    },
    "bt eax, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #30, #nzCv",
        "cfinv"
      ]
    },
    "bt rax, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "rmif x4, #62, #nzCv",
        "cfinv"
      ]
    },
    "bt word [rax], 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt dword [rax], 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt qword [rax], 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt word [rax], 15": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt dword [rax], 31": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bt qword [rax], 63": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts ax, 0": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "orr w20, w4, #0x1",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "bts eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "orr w4, w4, #0x1",
        "cfinv"
      ]
    },
    "bts rax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "orr x4, x4, #0x1",
        "cfinv"
      ]
    },
    "bts ax, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #14, #nzCv",
        "orr w20, w4, #0x8000",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "bts eax, 31": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #30, #nzCv",
        "orr w4, w4, #0x80000000",
        "cfinv"
      ]
    },
    "bts rax, 63": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "rmif x4, #62, #nzCv",
        "orr x4, x4, #0x8000000000000000",
        "cfinv"
      ]
    },
    "bts word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bts qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock bts qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr ax, 0": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "and w20, w4, #0xfffffffe",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "btr eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "and w4, w4, #0xfffffffe",
        "cfinv"
      ]
    },
    "btr rax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #63, #nzCv",
        "and x4, x4, #0xfffffffffffffffe",
        "cfinv"
      ]
    },
    "btr ax, 15": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #14, #nzCv",
        "and w20, w4, #0xffff7fff",
        "bfxil x4, x20, #0, #16",
        "cfinv"
      ]
    },
    "btr eax, 31": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #30, #nzCv",
        "and w4, w4, #0x7fffffff",
        "cfinv"
      ]
    },
    "btr rax, 63": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "rmif x4, #62, #nzCv",
        "and x4, x4, #0x7fffffffffffffff",
        "cfinv"
      ]
    },
    "btr word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btr qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btr qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc ax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w20, w4, #0x1",
        "rmif x20, #63, #nzCv",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "btc eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "rmif x4, #63, #nzCv"
      ]
    },
    "btc rax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "rmif x4, #63, #nzCv"
      ]
    },
    "btc ax, 15": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w20, w4, #0x8000",
        "rmif x20, #14, #nzCv",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "btc eax, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x80000000",
        "rmif x4, #30, #nzCv"
      ]
    },
    "btc rax, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x8000000000000000",
        "rmif x4, #62, #nzCv"
      ]
    },
    "btc word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "btc qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc word [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc dword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc qword [rax], 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc word [rax], 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc dword [rax], 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "lock btc qword [rax], 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "rmif x20, #63, #nzCv"
      ]
    },
    "cmpxchg8b [rbp]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "mov x21, x5",
        "caspal w20, w21, w6, w7, [x9]",
        "mrs x0, nzcv",
        "cmp w20, w4",
        "ccmp w21, w5, #nzcv, eq",
        "rmif x0, #0, #NzCV",
        "csel x4, x20, x4, ne",
        "csel x5, x21, x5, ne"
      ]
    },
    "cmpxchg16b [rbp]": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "mov x21, x5",
        "caspal x20, x21, x6, x7, [x9]",
        "mrs x0, nzcv",
        "cmp x20, x4",
        "ccmp x21, x5, #nzcv, eq",
        "rmif x0, #0, #NzCV",
        "csel x4, x20, x4, ne",
        "csel x5, x21, x5, ne"
      ]
    },
    "rdrand ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x20, rndr",
        "bfxil x4, x20, #0, #16",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "rdrand eax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x20, rndr",
        "mov w4, w20",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "rdrand rax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x4, rndr",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "rdseed ax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x20, rndrrs",
        "bfxil x4, x20, #0, #16",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "rdseed eax": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x20, rndrrs",
        "mov w4, w20",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "rdseed rax": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x4, rndrrs",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "rmif x20, #63, #NZCV"
      ]
    },
    "psrlw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.8h, v16.8h, #15"
      ]
    },
    "psrlw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psraw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psraw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psraw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.8h, v16.8h, #15"
      ]
    },
    "psraw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.8h, v16.8h, #15"
      ]
    },
    "psllw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.8h, v16.8h, #15"
      ]
    },
    "psllw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrld mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrld mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrld xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.4s, v16.4s, #31"
      ]
    },
    "psrld xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrad mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrad mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrad xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.4s, v16.4s, #31"
      ]
    },
    "psrad xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.4s, v16.4s, #31"
      ]
    },
    "pslld mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "pslld mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "pslld xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.4s, v16.4s, #31"
      ]
    },
    "pslld xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrlq mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlq mm0, 63": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.2d, v2.2d, #63",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq mm0, 64": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlq xmm0, 63": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.2d, v16.2d, #63"
      ]
    },
    "psrlq xmm0, 64": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrldq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrldq xmm0, 15": {
      "ExpectedInstructionCount": 2,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v16.16b, v2.16b, #15"
      ]
    },
    "psrldq xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psllq mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllq mm0, 63": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.2d, v2.2d, #63",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq mm0, 64": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllq xmm0, 63": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.2d, v16.2d, #63"
      ]
    },
    "psllq xmm0, 64": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "fxsave [rax]": {
      "ExpectedInstructionCount": 68,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4, #2]",
        "ldrb w20, [x28, #1202]",
        "strb w20, [x4, #4]",
        "ldrb w20, [x28, #1051]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #32]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #64]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #80]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #96]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #112]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #128]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #144]",
        "stp q16, q17, [x4, #160]",
        "stp q18, q19, [x4, #192]",
        "stp q20, q21, [x4, #224]",
        "stp q22, q23, [x4, #256]",
        "stp q24, q25, [x4, #288]",
        "stp q26, q27, [x4, #320]",
        "stp q28, q29, [x4, #352]",
        "stp q30, q31, [x4, #384]",
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "mov w21, #0xffff",
        "stp w20, w21, [x4, #24]"
      ]
    },
    "rdfsbase eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldr w4, [x28, #1000]"
      ]
    },
    "rdfsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldr x4, [x28, #1000]"
      ]
    },
    "fxrstor [rax]": {
      "ExpectedInstructionCount": 48,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldrh w20, [x4, #2]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x4, #4]",
        "strb w20, [x28, #1202]",
        "ldp q2, q3, [x4, #32]",
        "str q2, [x28, #1056]",
        "str q3, [x28, #1072]",
        "ldp q2, q3, [x4, #64]",
        "str q2, [x28, #1088]",
        "str q3, [x28, #1104]",
        "ldp q2, q3, [x4, #96]",
        "str q2, [x28, #1120]",
        "str q3, [x28, #1136]",
        "ldp q2, q3, [x4, #128]",
        "str q2, [x28, #1152]",
        "str q3, [x28, #1168]",
        "ldp q16, q17, [x4, #160]",
        "ldp q18, q19, [x4, #192]",
        "ldp q20, q21, [x4, #224]",
        "ldp q22, q23, [x4, #256]",
        "ldp q24, q25, [x4, #288]",
        "ldp q26, q27, [x4, #320]",
        "ldp q28, q29, [x4, #352]",
        "ldp q30, q31, [x4, #384]",
        "ldr w20, [x4, #24]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0"
      ]
    },
    "rdgsbase eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldr w4, [x28, #992]"
      ]
    },
    "rdgsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldr x4, [x28, #992]"
      ]
    },
    "ldmxcsr [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0"
      ]
    },
    "wrfsbase eax": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "str x20, [x28, #1000]"
      ]
    },
    "wrfsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "str x4, [x28, #1000]"
      ]
    },
    "stmxcsr [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "str w20, [x4]"
      ]
    },
    "wrgsbase eax": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "str x20, [x28, #992]"
      ]
    },
    "wrgsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "str x4, [x28, #992]"
      ]
    },
    "xsave [rax]": {
      "ExpectedInstructionCount": 98,
      "Comment": "GROUP15 0x0F 0xAE /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "cbnz x20, #+0x8",
        "b #+0xe4",
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4, #2]",
        "ldrb w20, [x28, #1202]",
        "strb w20, [x4, #4]",
        "ldrb w20, [x28, #1051]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #32]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #64]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #80]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #96]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #112]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #128]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #144]",
        "ubfx x20, x4, #1, #1",
        "cbnz x20, #+0x8",
        "b #+0x24",
        "stp q16, q17, [x4, #160]",
        "stp q18, q19, [x4, #192]",
        "stp q20, q21, [x4, #224]",
        "stp q22, q23, [x4, #256]",
        "stp q24, q25, [x4, #288]",
        "stp q26, q27, [x4, #320]",
        "stp q28, q29, [x4, #352]",
        "stp q30, q31, [x4, #384]",
        "ubfx x20, x4, #2, #1",
        "cbnz x20, #+0x8",
        "b #+0x44",
        "ldp q2, q3, [x28, #192]",
        "stp q2, q3, [x4, #576]",
        "ldp q2, q3, [x28, #224]",
        "stp q2, q3, [x4, #608]",
        "ldp q2, q3, [x28, #256]",
        "stp q2, q3, [x4, #640]",
        "ldp q2, q3, [x28, #288]",
        "stp q2, q3, [x4, #672]",
        "ldp q2, q3, [x28, #320]",
        "stp q2, q3, [x4, #704]",
        "ldp q2, q3, [x28, #352]",
        "stp q2, q3, [x4, #736]",
        "ldp q2, q3, [x28, #384]",
        "stp q2, q3, [x4, #768]",
        "ldp q2, q3, [x28, #416]",
        "stp q2, q3, [x4, #800]",
        "ubfx x20, x4, #1, #2",
        "cbnz x20, #+0x8",
        "b #+0x14",
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "mov w21, #0xffff",
        "stp w20, w21, [x4, #24]",
        "ubfx x20, x4, #0, #3",
        "str x20, [x4, #512]"
      ]
    },
    "xsaveopt [rax]": {
      "ExpectedInstructionCount": 98,
      "Comment": "GROUP15 0x0F 0xAE /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "cbnz x20, #+0x8",
        "b #+0xe4",
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4, #2]",
        "ldrb w20, [x28, #1202]",
        "strb w20, [x4, #4]",
        "ldrb w20, [x28, #1051]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #32]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #64]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #80]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #96]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #112]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #128]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #144]",
        "ubfx x20, x4, #1, #1",
        "cbnz x20, #+0x8",
        "b #+0x24",
        "stp q16, q17, [x4, #160]",
        "stp q18, q19, [x4, #192]",
        "stp q20, q21, [x4, #224]",
        "stp q22, q23, [x4, #256]",
        "stp q24, q25, [x4, #288]",
        "stp q26, q27, [x4, #320]",
        "stp q28, q29, [x4, #352]",
        "stp q30, q31, [x4, #384]",
        "ubfx x20, x4, #2, #1",
        "cbnz x20, #+0x8",
        "b #+0x44",
        "ldp q2, q3, [x28, #192]",
        "stp q2, q3, [x4, #576]",
        "ldp q2, q3, [x28, #224]",
        "stp q2, q3, [x4, #608]",
        "ldp q2, q3, [x28, #256]",
        "stp q2, q3, [x4, #640]",
        "ldp q2, q3, [x28, #288]",
        "stp q2, q3, [x4, #672]",
        "ldp q2, q3, [x28, #320]",
        "stp q2, q3, [x4, #704]",
        "ldp q2, q3, [x28, #352]",
        "stp q2, q3, [x4, #736]",
        "ldp q2, q3, [x28, #384]",
        "stp q2, q3, [x4, #768]",
        "ldp q2, q3, [x28, #416]",
        "stp q2, q3, [x4, #800]",
        "ubfx x20, x4, #1, #2",
        "cbnz x20, #+0x8",
        "b #+0x14",
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "mov w21, #0xffff",
        "stp w20, w21, [x4, #24]",
        "ubfx x20, x4, #0, #3",
        "str x20, [x4, #512]"
      ]
    },
    "lfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /5",
      "ExpectedArm64ASM": [
        "dmb ld"
      ]
    },
    "xrstor [rax]": {
      "ExpectedInstructionCount": 133,
      "Comment": "GROUP15 0x0F 0xAE /5",
      "ExpectedArm64ASM": [
        "sub sp, sp, #0x40 (64)",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #0, #1",
        "cbnz x20, #+0x8",
        "b #+0x7c",
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldrh w20, [x4, #2]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x4, #4]",
        "strb w20, [x28, #1202]",
        "ldp q2, q3, [x4, #32]",
        "str q2, [x28, #1056]",
        "str q3, [x28, #1072]",
        "ldp q2, q3, [x4, #64]",
        "str q2, [x28, #1088]",
        "str q3, [x28, #1104]",
        "ldp q2, q3, [x4, #96]",
        "str q2, [x28, #1120]",
        "str q3, [x28, #1136]",
        "ldp q2, q3, [x4, #128]",
        "str q2, [x28, #1152]",
        "str q3, [x28, #1168]",
        "b #+0x4c",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]",
        "movi v2.2d, #0x0",
        "str q2, [x28, #1056]",
        "str q2, [x28, #1072]",
        "str q2, [x28, #1088]",
        "str q2, [x28, #1104]",
        "str q2, [x28, #1120]",
        "str q2, [x28, #1136]",
        "str q2, [x28, #1152]",
        "str q2, [x28, #1168]",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #1, #1",
        "cbnz x20, #+0x8",
        "b #+0x28",
        "ldp q16, q17, [x4, #160]",
        "ldp q18, q19, [x4, #192]",
        "ldp q20, q21, [x4, #224]",
        "ldp q22, q23, [x4, #256]",
        "ldp q24, q25, [x4, #288]",
        "ldp q26, q27, [x4, #320]",
        "ldp q28, q29, [x4, #352]",
        "ldp q30, q31, [x4, #384]",
        "b #+0x44",
        "movi v31.2d, #0x0",
        "mov v30.16b, v31.16b",
        "mov v29.16b, v31.16b",
        "mov v28.16b, v31.16b",
        "mov v27.16b, v31.16b",
        "mov v26.16b, v31.16b",
        "mov v25.16b, v31.16b",
        "mov v24.16b, v31.16b",
        "mov v23.16b, v31.16b",
        "mov v22.16b, v31.16b",
        "mov v21.16b, v31.16b",
        "mov v20.16b, v31.16b",
        "mov v19.16b, v31.16b",
        "mov v18.16b, v31.16b",
        "mov v17.16b, v31.16b",
        "mov v16.16b, v31.16b",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #2, #1",
        "cbnz x20, #+0x8",
        "b #+0x58",
        "ldp q2, q3, [x4, #576]",
        "ldp q4, q5, [x4, #608]",
        "ldp q6, q7, [x4, #640]",
        "ldp q8, q9, [x4, #672]",
        "ldp q10, q11, [x4, #704]",
        "ldp q12, q13, [x4, #736]",
        "ldp q14, q15, [x4, #768]",
        "str q2, [sp]",
        "str q3, [sp, #32]",
        "ldp q2, q3, [x4, #800]",
        "stp q2, q3, [x28, #416]",
        "stp q14, q15, [x28, #384]",
        "stp q12, q13, [x28, #352]",
        "stp q10, q11, [x28, #320]",
        "stp q8, q9, [x28, #288]",
        "stp q6, q7, [x28, #256]",
        "stp q4, q5, [x28, #224]",
        "ldr q2, [sp]",
        "ldr q3, [sp, #32]",
        "stp q2, q3, [x28, #192]",
        "b #+0x28",
        "movi v2.2d, #0x0",
        "stp q2, q2, [x28, #416]",
        "stp q2, q2, [x28, #384]",
        "stp q2, q2, [x28, #352]",
        "stp q2, q2, [x28, #320]",
        "stp q2, q2, [x28, #288]",
        "stp q2, q2, [x28, #256]",
        "stp q2, q2, [x28, #224]",
        "stp q2, q2, [x28, #192]",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #1, #2",
        "cbnz x20, #+0x8",
        "b #+0x34",
        "ldr w20, [x4, #24]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "b #+0x4",
        "add sp, sp, #0x40 (64)"
      ]
    },
    "mfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /6",
      "ExpectedArm64ASM": [
        "dmb sy"
      ]
    },
    "clwb [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /6",
      "ExpectedArm64ASM": [
        "dc cvac, x4"
      ]
    },
    "sfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dmb st"
      ]
    },
    "clflush [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dc civac, x4",
        "dsb ish"
      ]
    },
    "clflushopt [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dc civac, x4"
      ]
    },
    "prefetchnta [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /0"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1strm, [x4]"
      ]
    },
    "prefetcht0 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /1"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1keep, [x4]"
      ]
    },
    "prefetcht1 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /2"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl2keep, [x4]"
      ]
    },
    "prefetcht2 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /3"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl3keep, [x4]"
      ]
    },
    "db 0x0f, 0x18, 0x20;": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "GROUP16 0x0F 0x18 /4",
        "nop dword [rax]",
        "NOP implementation"
      ],
      "ExpectedArm64ASM": []
    },
    "db 0x0f, 0x0d, 0x00": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /0",
        "prefetch_exclusive [rax]"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1keep, [x4]"
      ]
    },
    "prefetchw [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /1"
      ],
      "ExpectedArm64ASM": [
        "prfm pstl1keep, [x4]"
      ]
    },
    "prefetchwt1 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /2"
      ],
      "ExpectedArm64ASM": [
        "prfm pstl1keep, [x4]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/SecondaryModRM.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "CLZERO",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "xgetbv": {
      "ExpectedInstructionCount": 52,
      "Comment": "0xF 0x01 /2 RM-0",
      "ExpectedArm64ASM": [
        "sub sp, sp, #0xf0 (240)",
        "mov x3, sp",
        "st1 {v2.2d, v3.2d}, [x3], #32",
        "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x3], #64",
        "st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x3], #64",
        "st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x3], #64",
        "stp x18, x30, [x3], #16",
        "mrs x3, nzcv",
        "str w3, [x28, #1032]",
        "str x25, [x28, #176]",
        "stp x4, x7, [x28, #32]",
        "stp x5, x6, [x28, #48]",
        "stp x8, x9, [x28, #64]",
        "stp x10, x11, [x28, #80]",
        "stp x12, x13, [x28, #96]",
        "stp x14, x15, [x28, #112]",
        "stp x16, x17, [x28, #128]",
        "stp x19, x29, [x28, #144]",
        "stp w26, w27, [x28, #16]",
        "add x3, x28, #0x1c0 (448)",
        "st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #64",
        "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64",
        "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64",
        "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64",
        "mov w1, w7",
        "ldr x0, [x28, #1544]",
        "ldr x2, [x28, #1560]",
        "blr x2",
        "ldr x25, [x28, #176]",
        "ldr w4, [x28, #1032]",
        "msr nzcv, x4",
        "add x4, x28, #0x1c0 (448)",
        "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x4], #64",
        "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x4], #64",
        "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x4], #64",
        "ld1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x4], #64",
        "ldp x4, x7, [x28, #32]",
        "ldp x5, x6, [x28, #48]",
        "ldp x8, x9, [x28, #64]",
        "ldp x10, x11, [x28, #80]",
        "ldp x12, x13, [x28, #96]",
        "ldp x14, x15, [x28, #112]",
        "ldp x16, x17, [x28, #128]",
        "ldp x19, x29, [x28, #144]",
        "ldp w26, w27, [x28, #16]",
        "ld1 {v2.2d, v3.2d}, [sp], #32",
        "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
        "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64",
        "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64",
        "ldp x18, x30, [sp], #16",
        "mov w4, w0",
        "lsr x5, x0, #32"
      ]
    },
    "rdtscp": {
      "Skip": "Yes",
      "ExpectedInstructionCount": 21,
      "Comment": "0xF 0x01 /7 RM-1",
      "ExpectedArm64ASM": [
        "dmb ld",
        "mrs x20, S3_3_c14_c0_2",
        "lsl w4, w20, #7",
        "lsr x5, x20, #25",
        "mrs x0, nzcv",
        "str w0, [x28, #1000]",
        "str x8, [x28, #312]",
        "mov w0, #0x100",
        "str x0, [x28, #1312]",
        "sub sp, sp, #0x10 (16)",
        "mov w8, #0xa8",
        "mov x0, sp",
        "add x1, sp, #0x4 (4)",
        "svc #0x0",
        "ldp w0, w1, [sp]",
        "sub sp, sp, #0x10 (16)",
        "ldr w8, [x28, #1000]",
        "msr nzcv, x8",
        "ldr x8, [x28, #312]",
        "str xzr, [x28, #1312]",
        "orr x7, x0, x1, lsl #12"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Secondary_OpSize.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FCMA"
    ]
  },
  "Instructions": {
    "ucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0x2e",
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "comisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0x2f",
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "pmovmskb eax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x66 0x0f 0xd7",
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3296]",
        "cmlt v3.16b, v16.16b, #0",
        "and v2.16b, v3.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "maskmovdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xf7",
      "ExpectedArm64ASM": [
        "cmlt v2.16b, v17.16b, #0",
        "ldr q3, [x11]",
        "bsl v2.16b, v16.16b, v3.16b",
        "str q2, [x11]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Secondary_REP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP",
      "CSSC"
    ]
  },
  "Instructions": {
    "popcnt ax, bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "fmov s0, w20",
        "cnt v0.8b, v0.8b",
        "addp v0.8b, v0.8b, v0.8b",
        "umov w20, v0.b[0]",
        "bfxil x4, x20, #0, #16",
        "mov w27, #0x0",
        "cmp w20, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "fmov s0, w6",
        "cnt v0.8b, v0.8b",
        "addv b0, v0.8b",
        "umov w4, v0.b[0]",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "fmov d0, x6",
        "cnt v0.8b, v0.8b",
        "addv b0, v0.8b",
        "umov w4, v0.b[0]",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "tzcnt ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w20, w6",
        "orr w20, w20, #0x8000",
        "clz w20, w20",
        "bfxil x4, x20, #0, #16",
        "cmn wzr, w20, lsl #16",
        "eor x20, x20, #0x10",
        "rmif x20, #3, #nzCv"
      ]
    },
    "tzcnt eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w4, w6",
        "clz w4, w4",
        "cmp w4, #0x0 (0)",
        "eor x20, x4, #0x20",
        "rmif x20, #4, #nzCv"
      ]
    },
    "tzcnt rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit x4, x6",
        "clz x4, x4",
        "cmp x4, #0x0 (0)",
        "eor x20, x4, #0x40",
        "rmif x20, #5, #nzCv"
      ]
    },
    "lzcnt ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "lsl w20, w6, #16",
        "orr w20, w20, #0x8000",
        "clz w20, w20",
        "bfxil x4, x20, #0, #16",
        "cmn wzr, w20, lsl #16",
        "eor x20, x20, #0x10",
        "rmif x20, #3, #nzCv"
      ]
    },
    "lzcnt eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "clz w4, w6",
        "cmp w4, #0x0 (0)",
        "eor x20, x4, #0x20",
        "rmif x20, #4, #nzCv"
      ]
    },
    "lzcnt rax, rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "clz x4, x6",
        "cmp x4, #0x0 (0)",
        "eor x20, x4, #0x40",
        "rmif x20, #5, #nzCv"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/Secondary_REP_CSSC.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2",
      "CSSC"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ]
  },
  "Instructions": {
    "popcnt ax, bx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "cnt w20, w20",
        "bfxil x4, x20, #0, #16",
        "mov w27, #0x0",
        "cmp w20, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "cnt w4, w6",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt rax, rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "cnt x4, x6",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/VEX_map1.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "FCMA",
      "RPRES",
      "AFP"
    ]
  },
  "Instructions": {
    "vucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vcomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vcomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "axflag"
      ]
    },
    "vpmovmskb rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3296]",
        "cmlt v3.16b, v16.16b, #0",
        "and v2.16b, v3.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "vpmovmskb rax, ymm0": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2496]",
        "ld1b {z2.b}, p7/z, [x0]",
        "mrs x0, nzcv",
        "mov z0.d, #0",
        "cmplt p0.b, p7/z, z16.b, #0",
        "not z0.b, p0/m, z16.b",
        "orr z0.b, p0/m, z0.b, z16.b",
        "mov z3.d, z0.d",
        "msr nzcv, x0",
        "and z2.d, z3.d, z2.d",
        "movprfx z0, z2",
        "addp z0.b, p7/m, z0.b, z2.b",
        "uzp1 z2.b, z0.b, z0.b",
        "uzp2 z1.b, z0.b, z0.b",
        "splice z2.d, p6, z2.d, z1.d",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "mov w4, v2.s[0]"
      ]
    },
    "vmaskmovdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmlt v2.16b, v17.16b, #0",
        "ldr q3, [x11]",
        "bsl v2.16b, v16.16b, v3.16b",
        "str q2, [x11]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/VEX_map2.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "SVEBITPERM"
    ]
  },
  "Instructions": {
    "vtestps xmm0, xmm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "dup v2.4s, w20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestps ymm0, ymm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "mov z2.s, w20",
        "and z3.d, z17.d, z16.d",
        "bic z4.d, z17.d, z16.d",
        "and z3.d, z3.d, z2.d",
        "and z2.d, z4.d, z2.d",
        "umaxv h3, p7, z3.h",
        "umaxv h2, p7, z2.h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestpd xmm0, xmm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "dup v2.2d, x20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vtestpd ymm0, ymm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "mov z2.d, x20",
        "and z3.d, z17.d, z16.d",
        "bic z4.d, z17.d, z16.d",
        "and z3.d, z3.d, z2.d",
        "and z2.d, z4.d, z2.d",
        "umaxv h3, p7, z3.h",
        "umaxv h2, p7, z2.h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vptest xmm0, xmm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vptest ymm0, ymm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "and z2.d, z16.d, z17.d",
        "bic z3.d, z17.d, z16.d",
        "umaxv h2, p7, z2.h",
        "umaxv h3, p7, z3.h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "rmif x21, #63, #nzCv",
        "mov w26, #0x1"
      ]
    },
    "vmaskmovps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z2.s}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z2.d}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z2.s}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z2.d}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "andn eax, ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 32-bit"
      ],
      "ExpectedArm64ASM": [
        "bic w4, w7, w6",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "andn rax, rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 64-bit"
      ],
      "ExpectedArm64ASM": [
        "bic x4, x7, x6",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "bzhi eax, ebx, ecx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b00 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl w20, w20, w7",
        "bic w20, w6, w20",
        "tst x7, #0xe0",
        "csel w4, w6, w20, ne",
        "cset x20, eq",
        "cmp w4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "bzhi rax, rbx, rcx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 2 0b00 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl x20, x20, x7",
        "bic x20, x6, x20",
        "tst x7, #0xc0",
        "csel x4, x6, x20, ne",
        "cset x20, eq",
        "cmp x4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "pdep eax, ebx, ecx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov w4, #0x0",
        "cbz w7, #+0x2c",
        "neg w2, w1",
        "and w2, w2, w1",
        "sbfx w3, w0, #0, #1",
        "eor w1, w1, w2",
        "and w2, w3, w2",
        "neg w3, w1",
        "orr w4, w4, w2",
        "lsr w0, w0, #1",
        "and w2, w1, w3",
        "cbnz w2, #-0x1c"
      ]
    },
    "pdep rax, rbx, rcx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov x4, #0x0",
        "cbz x7, #+0x2c",
        "neg x2, x1",
        "and x2, x2, x1",
        "sbfx x3, x0, #0, #1",
        "eor x1, x1, x2",
        "and x2, x3, x2",
        "neg x3, x1",
        "orr x4, x4, x2",
        "lsr x0, x0, #1",
        "and x2, x1, x3",
        "cbnz x2, #-0x1c"
      ]
    },
    "bextr eax, ebx, ecx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb w20, w7",
        "lsr w21, w6, w20",
        "mov w22, #0x0",
        "cmp w20, #0x1f (31)",
        "csel w20, w21, w22, ls",
        "ubfx w21, w7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl w22, w22, w21",
        "bic w22, w20, w22",
        "cmp w21, #0x1f (31)",
        "csel w4, w22, w20, ls",
        "cmp w4, #0x0 (0)"
      ]
    },
    "bextr rax, rbx, rcx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb x20, w7",
        "lsr x21, x6, x20",
        "mov w22, #0x0",
        "cmp x20, #0x3f (63)",
        "csel x20, x21, x22, ls",
        "ubfx x21, x7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl x22, x22, x21",
        "bic x22, x20, x22",
        "cmp x21, #0x3f (63)",
        "csel x4, x22, x20, ls",
        "cmp x4, #0x0 (0)"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/VEX_map_group.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "AFP"
    ]
  },
  "Instructions": {
    "blsr eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map group 17 0b001 32-bit"
      ],
      "ExpectedArm64ASM": [
        "sub w20, w6, #0x1 (1)",
        "and w4, w20, w6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp w4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "blsr rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map group 17 0b001 64-bit"
      ],
      "ExpectedArm64ASM": [
        "sub x20, x6, #0x1 (1)",
        "and x4, x20, x6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp x4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "blsmsk eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map group 17 0b010 32-bit"
      ],
      "ExpectedArm64ASM": [
        "sub w20, w6, #0x1 (1)",
        "eor w4, w20, w6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp w4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "blsmsk rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map group 17 0b010 64-bit"
      ],
      "ExpectedArm64ASM": [
        "sub x20, x6, #0x1 (1)",
        "eor x4, x20, x6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp x4, #0x0 (0)",
        "rmif x20, #63, #nzCv"
      ]
    },
    "blsi eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map group 17 0b011 32-bit"
      ],
      "ExpectedArm64ASM": [
        "neg w20, w6",
        "and w4, w6, w20",
        "cmp w4, #0x0 (0)",
        "cset x20, eq",
        "rmif x20, #63, #nzCv"
      ]
    },
    "blsi rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map group 17 0b011 64-bit"
      ],
      "ExpectedArm64ASM": [
        "neg x20, x6",
        "and x4, x6, x20",
        "cmp x4, #0x0 (0)",
        "cset x20, eq",
        "rmif x20, #63, #nzCv"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87-Crysis2Max-fmodel.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "sub esp,0x104",
        "mov eax,dword  [ebp + 0x10]",
        "fld dword  [eax]",
        "mov ecx,dword  [0x100de354]",
        "fadd dword  [eax + 0x7c]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [eax]",
        "fsub dword  [eax + 0x7c]",
        "fmul dword  [ecx]",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [eax + 0x78]",
        "fadd dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [eax + 0x4]",
        "fsub dword  [eax + 0x78]",
        "fmul dword  [ecx + 0x4]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [eax + 0x74]",
        "fadd dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [eax + 0x8]",
        "fsub dword  [eax + 0x74]",
        "fmul dword  [ecx + 0x8]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [eax + 0x70]",
        "fadd dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [eax + 0xc]",
        "fsub dword  [eax + 0x70]",
        "fmul dword  [ecx + 0xc]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [eax + 0x6c]",
        "fadd dword  [eax + 0x10]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [eax + 0x10]",
        "fsub dword  [eax + 0x6c]",
        "fmul dword  [ecx + 0x10]",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [eax + 0x68]",
        "fadd dword  [eax + 0x14]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [eax + 0x14]",
        "fsub dword  [eax + 0x68]",
        "fmul dword  [ecx + 0x14]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [eax + 0x64]",
        "fadd dword  [eax + 0x18]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [eax + 0x18]",
        "fsub dword  [eax + 0x64]",
        "fmul dword  [ecx + 0x18]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [eax + 0x60]",
        "fadd dword  [eax + 0x1c]",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [eax + 0x1c]",
        "fsub dword  [eax + 0x60]",
        "fmul dword  [ecx + 0x1c]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [eax + 0x5c]",
        "fadd dword  [eax + 0x20]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [eax + 0x20]",
        "fsub dword  [eax + 0x5c]",
        "fmul dword  [ecx + 0x20]",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [eax + 0x58]",
        "fadd dword  [eax + 0x24]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [eax + 0x24]",
        "fsub dword  [eax + 0x58]",
        "fmul dword  [ecx + 0x24]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [eax + 0x54]",
        "fadd dword  [eax + 0x28]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [eax + 0x28]",
        "fsub dword  [eax + 0x54]",
        "fmul dword  [ecx + 0x28]",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [eax + 0x50]",
        "fadd dword  [eax + 0x2c]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [eax + 0x2c]",
        "fsub dword  [eax + 0x50]",
        "fmul dword  [ecx + 0x2c]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [eax + 0x4c]",
        "fadd dword  [eax + 0x30]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [eax + 0x30]",
        "fsub dword  [eax + 0x4c]",
        "fmul dword  [ecx + 0x30]",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [eax + 0x48]",
        "fadd dword  [eax + 0x34]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [eax + 0x34]",
        "fsub dword  [eax + 0x48]",
        "fmul dword  [ecx + 0x34]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [eax + 0x44]",
        "fadd dword  [eax + 0x38]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [eax + 0x38]",
        "fsub dword  [eax + 0x44]",
        "fmul dword  [ecx + 0x38]",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [eax + 0x40]",
        "fadd dword  [eax + 0x3c]",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [eax + 0x3c]",
        "fsub dword  [eax + 0x40]",
        "mov eax,[0x100de358]",
        "fmul dword  [ecx + 0x3c]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + -0x44]",
        "fadd dword  [ebp + -0x80]",
        "fstp dword  [ebp + 0xffffff00]",
        "fld dword  [ebp + -0x80]",
        "fsub dword  [ebp + -0x44]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + 0xffffff3c]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x7c]",
        "fstp dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + -0x7c]",
        "fsub dword  [ebp + -0x48]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xffffff38]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x78]",
        "fstp dword  [ebp + 0xffffff08]",
        "fld dword  [ebp + -0x78]",
        "fsub dword  [ebp + -0x4c]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + 0xffffff34]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x74]",
        "fstp dword  [ebp + 0xffffff0c]",
        "fld dword  [ebp + -0x74]",
        "fsub dword  [ebp + -0x50]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + 0xffffff30]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + 0xffffff10]",
        "fld dword  [ebp + -0x70]",
        "fsub dword  [ebp + -0x54]",
        "fmul dword  [eax + 0x10]",
        "fstp dword  [ebp + 0xffffff2c]",
        "fld dword  [ebp + -0x58]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + 0xffffff14]",
        "fld dword  [ebp + -0x6c]",
        "fsub dword  [ebp + -0x58]",
        "fmul dword  [eax + 0x14]",
        "fstp dword  [ebp + 0xffffff28]",
        "fld dword  [ebp + -0x5c]",
        "fadd dword  [ebp + -0x68]",
        "fstp dword  [ebp + 0xffffff18]",
        "fld dword  [ebp + -0x68]",
        "fsub dword  [ebp + -0x5c]",
        "fmul dword  [eax + 0x18]",
        "fstp dword  [ebp + 0xffffff24]",
        "fld dword  [ebp + -0x60]",
        "fadd dword  [ebp + -0x64]",
        "fstp dword  [ebp + 0xffffff1c]",
        "fld dword  [ebp + -0x64]",
        "fsub dword  [ebp + -0x60]",
        "fmul dword  [eax + 0x1c]",
        "fstp dword  [ebp + 0xffffff20]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x4]",
        "fstp dword  [ebp + 0xffffff40]",
        "fld dword  [ebp + -0x4]",
        "fsub dword  [ebp + -0x40]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + 0xffffff7c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + 0xffffff44]",
        "fld dword  [ebp + -0x8]",
        "fsub dword  [ebp + -0x3c]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xffffff78]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + 0xffffff48]",
        "fld dword  [ebp + -0xc]",
        "fsub dword  [ebp + -0x38]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + 0xffffff74]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + 0xffffff4c]",
        "fld dword  [ebp + -0x10]",
        "fsub dword  [ebp + -0x34]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + 0xffffff70]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + 0xffffff50]",
        "fld dword  [ebp + -0x14]",
        "fsub dword  [ebp + -0x30]",
        "fmul dword  [eax + 0x10]",
        "fstp dword  [ebp + 0xffffff6c]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [ebp + 0xffffff54]",
        "fld dword  [ebp + -0x18]",
        "fsub dword  [ebp + -0x2c]",
        "fmul dword  [eax + 0x14]",
        "fstp dword  [ebp + 0xffffff68]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ebp + 0xffffff58]",
        "fld dword  [ebp + -0x1c]",
        "fsub dword  [ebp + -0x28]",
        "fmul dword  [eax + 0x18]",
        "fstp dword  [ebp + 0xffffff64]",
        "fld dword  [ebp + -0x24]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [ebp + 0xffffff5c]",
        "fld dword  [ebp + -0x20]",
        "fsub dword  [ebp + -0x24]",
        "fmul dword  [eax + 0x1c]",
        "mov eax,[0x100de35c]",
        "fstp dword  [ebp + 0xffffff60]",
        "fld dword  [ebp + 0xffffff1c]",
        "fadd dword  [ebp + 0xffffff00]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [ebp + 0xffffff00]",
        "fsub dword  [ebp + 0xffffff1c]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [ebp + 0xffffff18]",
        "fadd dword  [ebp + 0xffffff04]",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [ebp + 0xffffff04]",
        "fsub dword  [ebp + 0xffffff18]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + 0xffffff14]",
        "fadd dword  [ebp + 0xffffff08]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff08]",
        "fsub dword  [ebp + 0xffffff14]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff10]",
        "fadd dword  [ebp + 0xffffff0c]",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [ebp + 0xffffff0c]",
        "fsub dword  [ebp + 0xffffff10]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + 0xffffff20]",
        "fadd dword  [ebp + 0xffffff3c]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + 0xffffff3c]",
        "fsub dword  [ebp + 0xffffff20]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [ebp + 0xffffff24]",
        "fadd dword  [ebp + 0xffffff38]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + 0xffffff38]",
        "fsub dword  [ebp + 0xffffff24]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + 0xffffff28]",
        "fadd dword  [ebp + 0xffffff34]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff34]",
        "fsub dword  [ebp + 0xffffff28]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff2c]",
        "fadd dword  [ebp + 0xffffff30]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + 0xffffff30]",
        "fsub dword  [ebp + 0xffffff2c]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + 0xffffff5c]",
        "fadd dword  [ebp + 0xffffff40]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + 0xffffff40]",
        "fsub dword  [ebp + 0xffffff5c]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [ebp + 0xffffff58]",
        "fadd dword  [ebp + 0xffffff44]",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [ebp + 0xffffff44]",
        "fsub dword  [ebp + 0xffffff58]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + 0xffffff54]",
        "fadd dword  [ebp + 0xffffff48]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff48]",
        "fsub dword  [ebp + 0xffffff54]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff50]",
        "fadd dword  [ebp + 0xffffff4c]",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [ebp + 0xffffff4c]",
        "fsub dword  [ebp + 0xffffff50]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + 0xffffff60]",
        "fadd dword  [ebp + 0xffffff7c]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + 0xffffff7c]",
        "fsub dword  [ebp + 0xffffff60]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [ebp + 0xffffff64]",
        "fadd dword  [ebp + 0xffffff78]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + 0xffffff78]",
        "fsub dword  [ebp + 0xffffff64]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + 0xffffff68]",
        "fadd dword  [ebp + 0xffffff74]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff74]",
        "fsub dword  [ebp + 0xffffff68]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + 0xffffff6c]",
        "fadd dword  [ebp + 0xffffff70]",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + 0xffffff70]",
        "fsub dword  [ebp + 0xffffff6c]",
        "fmul dword  [eax + 0xc]",
        "mov eax,[0x100de360]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [eax]",
        "fstp dword  [ebp + 0x10]",
        "fld dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xfffffefc]",
        "fld dword  [ebp + -0x74]",
        "fadd dword  [ebp + -0x80]",
        "fstp dword  [ebp + 0xffffff00]",
        "fld dword  [ebp + -0x80]",
        "fsub dword  [ebp + -0x74]",
        "fld dword  [ebp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff0c]",
        "fld dword  [ebp + -0x78]",
        "fadd dword  [ebp + -0x7c]",
        "fstp dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + -0x7c]",
        "fsub dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xfffffefc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff08]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + 0xffffff10]",
        "fld dword  [ebp + -0x64]",
        "fsub dword  [ebp + -0x70]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff1c]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + 0xffffff14]",
        "fld dword  [ebp + -0x68]",
        "fsub dword  [ebp + -0x6c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff18]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x60]",
        "fstp dword  [ebp + 0xffffff20]",
        "fld dword  [ebp + -0x60]",
        "fsub dword  [ebp + -0x54]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff2c]",
        "fld dword  [ebp + -0x58]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + 0xffffff24]",
        "fld dword  [ebp + -0x5c]",
        "fsub dword  [ebp + -0x58]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff28]",
        "fld dword  [ebp + -0x44]",
        "fadd dword  [ebp + -0x50]",
        "fstp dword  [ebp + 0xffffff30]",
        "fld dword  [ebp + -0x44]",
        "fsub dword  [ebp + -0x50]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff3c]",
        "fld dword  [ebp + -0x48]",
        "mov eax,[0x100de364]",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + 0xffffff34]",
        "fld dword  [ebp + -0x48]",
        "fsub dword  [ebp + -0x4c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff38]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x34]",
        "fstp dword  [ebp + 0xffffff40]",
        "fld dword  [ebp + -0x40]",
        "fsub dword  [ebp + -0x34]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff4c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x38]",
        "fstp dword  [ebp + 0xffffff44]",
        "fld dword  [ebp + -0x3c]",
        "fsub dword  [ebp + -0x38]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff48]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x24]",
        "fstp dword  [ebp + 0xffffff50]",
        "fld dword  [ebp + -0x24]",
        "fsub dword  [ebp + -0x30]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff5c]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + 0xffffff54]",
        "fld dword  [ebp + -0x28]",
        "fsub dword  [ebp + -0x2c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff58]",
        "fld dword  [ebp + -0x20]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + 0xffffff60]",
        "fld dword  [ebp + -0x20]",
        "fsub dword  [ebp + -0x14]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff6c]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [ebp + 0xffffff64]",
        "fld dword  [ebp + -0x1c]",
        "fsub dword  [ebp + -0x18]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff68]",
        "fld dword  [ebp + -0x10]",
        "fadd dword  [ebp + -0x4]",
        "fstp dword  [ebp + 0xffffff70]",
        "fld dword  [ebp + -0x4]",
        "fsub dword  [ebp + -0x10]",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff7c]",
        "fld dword  [ebp + -0xc]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + 0xffffff74]",
        "fld dword  [ebp + -0x8]",
        "fsub dword  [ebp + -0xc]",
        "fmulp",
        "fstp dword  [ebp + 0xffffff78]",
        "fld dword  [eax]",
        "fstp dword  [ebp + 0x10]",
        "fld dword  [ebp + 0xffffff04]",
        "fadd dword  [ebp + 0xffffff00]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [ebp + 0xffffff00]",
        "fsub dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [ebp + 0xffffff0c]",
        "fadd dword  [ebp + 0xffffff08]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff0c]",
        "fsub dword  [ebp + 0xffffff08]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [ebp + -0x74]",
        "fadd dword  [ebp + -0x78]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff14]",
        "fadd dword  [ebp + 0xffffff10]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + 0xffffff10]",
        "fsub dword  [ebp + 0xffffff14]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff1c]",
        "fadd dword  [ebp + 0xffffff18]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + 0xffffff1c]",
        "fsub dword  [ebp + 0xffffff18]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x68]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff20]",
        "fadd dword  [ebp + 0xffffff24]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + 0xffffff20]",
        "fsub dword  [ebp + 0xffffff24]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + 0xffffff28]",
        "fadd dword  [ebp + 0xffffff2c]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff2c]",
        "fsub dword  [ebp + 0xffffff28]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff30]",
        "fadd dword  [ebp + 0xffffff34]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + 0xffffff30]",
        "fsub dword  [ebp + 0xffffff34]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff38]",
        "fadd dword  [ebp + 0xffffff3c]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + 0xffffff3c]",
        "fsub dword  [ebp + 0xffffff38]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [ebp + -0x44]",
        "fld st0",
        "fadd dword  [ebp + -0x48]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x50]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + -0x48]",
        "fld st0",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff44]",
        "fadd dword  [ebp + 0xffffff40]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + 0xffffff40]",
        "fsub dword  [ebp + 0xffffff44]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [ebp + 0xffffff4c]",
        "fadd dword  [ebp + 0xffffff48]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff4c]",
        "fsub dword  [ebp + 0xffffff48]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x34]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff54]",
        "fadd dword  [ebp + 0xffffff50]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + 0xffffff50]",
        "fsub dword  [ebp + 0xffffff54]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff5c]",
        "fadd dword  [ebp + 0xffffff58]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + 0xffffff5c]",
        "fsub dword  [ebp + 0xffffff58]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x2c]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff60]",
        "fadd dword  [ebp + 0xffffff64]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + 0xffffff60]",
        "fsub dword  [ebp + 0xffffff64]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + 0xffffff68]",
        "fadd dword  [ebp + 0xffffff6c]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff6c]",
        "fsub dword  [ebp + 0xffffff68]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff70]",
        "fadd dword  [ebp + 0xffffff74]",
        "fstp dword  [ebp + -0x10]",
        "mov eax,dword  [ebp + 0x8]",
        "fld dword  [ebp + 0xffffff70]",
        "mov ecx,dword  [ebp + 0xc]",
        "fsub dword  [ebp + 0xffffff74]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + 0xffffff78]",
        "fadd dword  [ebp + 0xffffff7c]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + 0xffffff7c]",
        "fsub dword  [ebp + 0xffffff78]",
        "fmulp st3",
        "fxch st2",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x10]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [ebp + -0xc]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0xc]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + -0x80]",
        "fstp dword  [eax + 0x400]",
        "fld dword  [ebp + -0x70]",
        "fstp dword  [eax + 0x300]",
        "fld dword  [ebp + -0x78]",
        "fstp dword  [eax + 0x200]",
        "fld dword  [ebp + -0x68]",
        "fstp dword  [eax + 0x100]",
        "fld dword  [ebp + -0x7c]",
        "fstp dword  [eax]",
        "fld dword  [ebp + -0x7c]",
        "fstp dword  [ecx]",
        "fld dword  [ebp + -0x6c]",
        "fstp dword  [ecx + 0x100]",
        "fld dword  [ebp + -0x74]",
        "fstp dword  [ecx + 0x200]",
        "fld dword  [ebp + -0x64]",
        "fstp dword  [ecx + 0x300]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x60]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + -0x60]",
        "fstp dword  [eax + 0x380]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + -0x50]",
        "fstp dword  [eax + 0x280]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + -0x58]",
        "fstp dword  [eax + 0x180]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + -0x48]",
        "fstp dword  [eax + 0x80]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + -0x5c]",
        "fstp dword  [ecx + 0x80]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x54]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + -0x4c]",
        "fstp dword  [ecx + 0x180]",
        "fld st1",
        "fadd dword  [ebp + -0x54]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + -0x54]",
        "fstp dword  [ecx + 0x280]",
        "fxch",
        "fstp dword  [ecx + 0x380]",
        "fld dword  [ebp + -0x20]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [eax + 0x3c0]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [eax + 0x340]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [eax + 0x2c0]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [eax + 0x240]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [eax + 0x1c0]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [eax + 0x140]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [eax + 0xc0]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [eax + 0x40]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ecx + 0x40]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ecx + 0xc0]",
        "fld dword  [ebp + -0x14]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ecx + 0x140]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ecx + 0x1c0]",
        "fld dword  [ebp + -0x14]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ecx + 0x240]",
        "fld st1",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ecx + 0x2c0]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ecx + 0x340]",
        "fstp dword  [ecx + 0x3c0]",
        "leave",
        "ret"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "subs w26, w8, #0x104 (260)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xe354",
        "movk w20, #0x100d, lsl #16",
        "ldr w7, [x20]",
        "ldr s3, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-128]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr s2, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-124]",
        "ldr s2, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr s2, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-120]",
        "ldr s2, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-12]",
        "ldr s2, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-116]",
        "ldr s2, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-16]",
        "ldr s2, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-112]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-20]",
        "ldr s2, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-108]",
        "ldr s2, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-24]",
        "ldr s2, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-104]",
        "ldr s2, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-28]",
        "ldr s2, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-100]",
        "ldr s2, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-32]",
        "ldr s2, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-96]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-36]",
        "ldr s2, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-92]",
        "ldr s2, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-40]",
        "ldr s2, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-88]",
        "ldr s2, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-44]",
        "ldr s2, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-84]",
        "ldr s2, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-48]",
        "ldr s2, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-80]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-52]",
        "ldr s2, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-76]",
        "ldr s2, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-56]",
        "ldr s2, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-72]",
        "ldr s2, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-60]",
        "ldr s2, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-68]",
        "ldr s2, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xe358",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "ldr s3, [x7, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-64]",
        "ldur s2, [x9, #-68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-256]",
        "ldur s2, [x9, #-128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-196]",
        "ldur s2, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-252]",
        "ldur s2, [x9, #-124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-200]",
        "ldur s2, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-248]",
        "ldur s2, [x9, #-120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-204]",
        "ldur s2, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-244]",
        "ldur s2, [x9, #-116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-208]",
        "ldur s2, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-240]",
        "ldur s2, [x9, #-112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-212]",
        "ldur s2, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-236]",
        "ldur s2, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-216]",
        "ldur s2, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-232]",
        "ldur s2, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-220]",
        "ldur s2, [x9, #-96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-228]",
        "ldur s2, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-224]",
        "ldur s2, [x9, #-64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-192]",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-132]",
        "ldur s2, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-188]",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-136]",
        "ldur s2, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-184]",
        "ldur s2, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-140]",
        "ldur s2, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-180]",
        "ldur s2, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-144]",
        "ldur s2, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-176]",
        "ldur s2, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-148]",
        "ldur s2, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-172]",
        "ldur s2, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-152]",
        "ldur s2, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-168]",
        "ldur s2, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-156]",
        "ldur s2, [x9, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-164]",
        "ldur s2, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xe35c",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-160]",
        "ldur s2, [x9, #-228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-256]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-128]",
        "ldur s2, [x9, #-256]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-100]",
        "ldur s2, [x9, #-232]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-252]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-124]",
        "ldur s2, [x9, #-252]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-232]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-236]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-248]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-248]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-236]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-240]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-244]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-116]",
        "ldur s2, [x9, #-244]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-240]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-96]",
        "ldur s2, [x9, #-196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-68]",
        "ldur s2, [x9, #-220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-92]",
        "ldur s2, [x9, #-200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-72]",
        "ldur s2, [x9, #-216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-76]",
        "ldur s2, [x9, #-212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-84]",
        "ldur s2, [x9, #-208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-80]",
        "ldur s2, [x9, #-164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-64]",
        "ldur s2, [x9, #-192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-36]",
        "ldur s2, [x9, #-168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-60]",
        "ldur s2, [x9, #-188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-40]",
        "ldur s2, [x9, #-172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-56]",
        "ldur s2, [x9, #-184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-44]",
        "ldur s2, [x9, #-176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-52]",
        "ldur s2, [x9, #-180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-48]",
        "ldur s2, [x9, #-160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-32]",
        "ldur s2, [x9, #-132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldur s2, [x9, #-156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-28]",
        "ldur s2, [x9, #-136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldur s2, [x9, #-152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-24]",
        "ldur s2, [x9, #-140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-12]",
        "ldur s2, [x9, #-148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-20]",
        "ldur s2, [x9, #-144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xe360",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-16]",
        "ldr s2, [x4]",
        "str s2, [x9, #16]",
        "ldr s2, [x4, #4]",
        "mov x20, #0xfffffffffffffefc",
        "str s2, [x9, x20, sxtx]",
        "ldur s2, [x9, #-116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-256]",
        "ldur s2, [x9, #-128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-244]",
        "ldur s2, [x9, #-120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-252]",
        "ldur s2, [x9, #-124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9, x20, sxtx]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-248]",
        "ldur s2, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-240]",
        "ldur s2, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-228]",
        "ldur s2, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-236]",
        "ldur s2, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-232]",
        "ldur s2, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-224]",
        "ldur s2, [x9, #-96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-212]",
        "ldur s2, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-220]",
        "ldur s2, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-216]",
        "ldur s2, [x9, #-68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-208]",
        "ldur s2, [x9, #-68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-196]",
        "ldur s2, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xe364",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "ldur s5, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-204]",
        "ldur s2, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-200]",
        "ldur s2, [x9, #-64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-192]",
        "ldur s2, [x9, #-64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-180]",
        "ldur s2, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-188]",
        "ldur s2, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-184]",
        "ldur s2, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-176]",
        "ldur s2, [x9, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-164]",
        "ldur s2, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-172]",
        "ldur s2, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-168]",
        "ldur s2, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-160]",
        "ldur s2, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-148]",
        "ldur s2, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-156]",
        "ldur s2, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-152]",
        "ldur s2, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-144]",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s5, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-132]",
        "ldur s2, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-140]",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-136]",
        "ldr s2, [x4]",
        "str s2, [x9, #16]",
        "ldur s2, [x9, #-252]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-256]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-128]",
        "ldur s2, [x9, #-256]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-252]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-124]",
        "ldur s2, [x9, #-244]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-248]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-244]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-248]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-116]",
        "ldur s2, [x9, #-116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-236]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-240]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-240]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-236]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-232]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-232]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-100]",
        "ldur s2, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-96]",
        "ldur s2, [x9, #-224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-92]",
        "ldur s2, [x9, #-216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-84]",
        "ldur s2, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-80]",
        "ldur s2, [x9, #-208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-76]",
        "ldur s2, [x9, #-200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-72]",
        "ldur s2, [x9, #-196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s4, [x9, #-200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-68]",
        "ldur s2, [x9, #-68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldur s5, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-72]",
        "ldur s5, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-80]",
        "ldur s5, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-72]",
        "ldur s5, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-76]",
        "ldur s5, [x9, #-188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-64]",
        "ldur s5, [x9, #-192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-60]",
        "ldur s5, [x9, #-180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-56]",
        "ldur s5, [x9, #-180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-52]",
        "ldur s5, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-56]",
        "ldur s5, [x9, #-172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-48]",
        "ldur s5, [x9, #-176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-44]",
        "ldur s5, [x9, #-164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-36]",
        "ldur s5, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x9, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-48]",
        "ldur s5, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-44]",
        "ldur s5, [x9, #-160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-32]",
        "ldur s5, [x9, #-160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-28]",
        "ldur s5, [x9, #-152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-24]",
        "ldur s5, [x9, #-148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-20]",
        "ldur s5, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-24]",
        "ldur s5, [x9, #-144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-16]",
        "ldr w4, [x9, #8]",
        "ldur s5, [x9, #-144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w7, [x9, #12]",
        "ldur s7, [x9, #-140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-12]",
        "ldur s5, [x9, #-136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x9, #-8]",
        "ldur s5, [x9, #-132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s7, [x9, #-136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-4]",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s5, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-8]",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-16]",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-8]",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-12]",
        "ldur s3, [x9, #-128]",
        "str s3, [x4, #1024]",
        "ldur s3, [x9, #-112]",
        "str s3, [x4, #768]",
        "ldur s3, [x9, #-120]",
        "str s3, [x4, #512]",
        "ldur s3, [x9, #-104]",
        "str s3, [x4, #256]",
        "ldur s3, [x9, #-124]",
        "str s3, [x4]",
        "ldur s3, [x9, #-124]",
        "str s3, [x7]",
        "ldur s3, [x9, #-108]",
        "str s3, [x7, #256]",
        "ldur s3, [x9, #-116]",
        "str s3, [x7, #512]",
        "ldur s3, [x9, #-100]",
        "str s3, [x7, #768]",
        "ldur s3, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-96]",
        "ldur s3, [x9, #-96]",
        "str s3, [x4, #896]",
        "ldur s3, [x9, #-80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-80]",
        "ldur s3, [x9, #-80]",
        "str s3, [x4, #640]",
        "ldur s3, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-88]",
        "ldur s3, [x9, #-88]",
        "str s3, [x4, #384]",
        "ldur s3, [x9, #-72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-72]",
        "ldur s3, [x9, #-72]",
        "str s3, [x4, #128]",
        "ldur s3, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-92]",
        "ldur s3, [x9, #-92]",
        "str s3, [x7, #128]",
        "ldur s3, [x9, #-76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s8, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-76]",
        "ldur s3, [x9, #-76]",
        "str s3, [x7, #384]",
        "ldur s3, [x9, #-84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x9, #-84]",
        "ldur s3, [x9, #-84]",
        "str s3, [x7, #640]",
        "strb wzr, [x28, #1049]",
        "str s2, [x7, #896]",
        "ldur s2, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-32]",
        "ldur s2, [x9, #-64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #960]",
        "ldur s2, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #832]",
        "ldur s2, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-16]",
        "ldur s2, [x9, #-48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #704]",
        "ldur s2, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #576]",
        "ldur s2, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-24]",
        "ldur s2, [x9, #-56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #448]",
        "ldur s2, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #320]",
        "ldur s2, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldur s2, [x9, #-40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #192]",
        "ldur s2, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #64]",
        "ldur s2, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-28]",
        "ldur s2, [x9, #-60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #64]",
        "ldur s2, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #192]",
        "ldur s2, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-12]",
        "ldur s2, [x9, #-44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #320]",
        "ldur s2, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #448]",
        "ldur s2, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-20]",
        "ldur s2, [x9, #-52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #576]",
        "ldur s2, [x9, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #704]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #832]",
        "str s5, [x7, #960]",
        "mov x8, x9",
        "ldp w9, w20, [x8], #8",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xf8f8",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ],
      "x86InstructionCount": 809,
      "ExpectedInstructionCount": 7755
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87-HalfLife.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 70,
      "ExpectedInstructionCount": 412,
      "x86Insts": [
        "sub esp,0x2c",
        "mov ecx,dword [esp + 0x34]",
        "mov edx,dword [esp + 0x30]",
        "mov eax,dword [esp + 0x38]",
        "fld dword [ecx]",
        "fld dword [edx]",
        "fld st1",
        "fsub st0,st1",
        "fld dword [ecx + 0x4]",
        "fld dword [edx + 0x4]",
        "fld st1",
        "fsub st0,st1",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x8]",
        "fld dword [edx + 0x8]",
        "fld st1",
        "fsub st0,st1",
        "fstp dword [esp + 0x14]",
        "fld dword [eax]",
        "fsubr st7,st0",
        "fxch st7",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + 0x4]",
        "fsubr st4,st0",
        "fxch st4",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + 0x8]",
        "fsubr st2,st0",
        "fxch st6",
        "fsubrp st7,st0",
        "fxch st2",
        "fsubrp st3,st0",
        "fxch st4",
        "fsubp",
        "fxch st2",
        "fmul st0",
        "fldz",
        "faddp",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp",
        "faddp",
        "fld dword [esp + 0x14]",
        "fld st0",
        "fmulp",
        "faddp",
        "fld dword [esp + 0x18]",
        "fld st0",
        "fmulp",
        "fldz",
        "faddp",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fmulp",
        "faddp",
        "fxch st4",
        "fmul st0",
        "faddp st4,st0",
        "fxch st4",
        "fmul st0",
        "fldz",
        "faddp",
        "fxch",
        "fmul st0",
        "faddp",
        "fxch",
        "fmul st0",
        "faddp",
        "fxch st2",
        "fucomi st0,st1"
      ],
      "ExpectedArm64ASM": [
        "subs w20, w8, #0x2c (44)",
        "mov x27, x8",
        "mov x8, x20",
        "ldr w7, [x8, #52]",
        "ldr w5, [x8, #48]",
        "ldr w4, [x8, #56]",
        "ldr s2, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "ldr s7, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x5, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #20]",
        "ldr s9, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #28]",
        "ldr s5, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "movi v6.2d, #0x0",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s8, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q4, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q7, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x707",
        "lsr w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0xf8f8",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 37,
      "ExpectedInstructionCount": 213,
      "x86Insts": [
        "sub esp,0x1c",
        "mov edx,dword [esp + 0x20]",
        "mov eax,dword [esp + 0x24]",
        "fld dword [edx]",
        "fabs",
        "fld dword [eax]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld dword [edx + 0x4]",
        "fabs",
        "fld dword [eax + 0x4]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld dword [edx + 0x8]",
        "fabs",
        "fld dword [eax + 0x8]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld st2",
        "fmulp st3",
        "fxch",
        "fmul st0",
        "faddp st2,st0",
        "fmul st0",
        "faddp",
        "fld st0",
        "fsqrt",
        "fucomi st0,st0"
      ],
      "ExpectedArm64ASM": [
        "mvn w27, w8",
        "subs w8, w8, #0x1c (28)",
        "ldr w5, [x8, #32]",
        "ldr w4, [x8, #36]",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "bic v2.16b, v2.16b, v3.16b",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr q4, [x28, #3552]",
        "bic v3.16b, v3.16b, v4.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "strb w20, [x28, #1040]",
        "csetm x20, ls",
        "dup v4.2d, x20",
        "bsl v4.16b, v3.16b, v2.16b",
        "ldr s2, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "bic v2.16b, v2.16b, v3.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr q5, [x28, #3552]",
        "bic v3.16b, v3.16b, v5.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "strb w20, [x28, #1040]",
        "csetm x20, ls",
        "dup v5.2d, x20",
        "bsl v5.16b, v3.16b, v2.16b",
        "ldr s2, [x5, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "bic v2.16b, v2.16b, v3.16b",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "bic v3.16b, v3.16b, v6.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "strb w20, [x28, #1040]",
        "csetm x20, ls",
        "dup v6.2d, x20",
        "bsl v6.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1888]",
        "ldr x3, [x28, #1896]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q3, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 32,
      "ExpectedInstructionCount": 231,
      "x86Insts": [
        "fld dword [ecx]",
        "fld dword [edx + 0x4]",
        "fld dword [ecx + 0x4]",
        "fld dword [edx]",
        "fld dword [ecx + 0x8]",
        "fstp dword [esp]",
        "fld dword [edx + 0x8]",
        "fld st4",
        "fmul st4",
        "fld st3",
        "fmul st3",
        "fsubp",
        "fmul dword [eax + 0x8]",
        "fxch st2",
        "fmul dword [esp]",
        "fxch st5",
        "fmul st1",
        "fsubp st5,st0",
        "fld dword [eax + 0x4]",
        "fmulp st5",
        "fxch st4",
        "faddp",
        "fxch st3",
        "fmulp",
        "fxch",
        "fmul dword [esp]",
        "mov byte [esp],0x1",
        "fsubp",
        "fmul dword [eax]",
        "faddp",
        "fdivrp",
        "fstp dword [esi]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x7, #8]",
        "str s6, [x8]",
        "ldr s6, [x5, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s5, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s5, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov w20, #0x1",
        "strb w20, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x10]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "str q2, [x20, #1056]",
        "str q3, [x22, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block4": {
      "x86InstructionCount": 54,
      "ExpectedInstructionCount": 75,
      "x86Insts": [
        "push ebp",
        "push edi",
        "push esi",
        "push ebx",
        "sub esp,0x4c",
        "mov eax,dword [esp + 0x68]",
        "lea ebp,[esp + 0x38]",
        "lea esi,[esp + 0x30]",
        "fld qword [0x00052098]",
        "mov dword [esp + 0xc],esi",
        "mov edi,dword [esp + 0x64]",
        "mov dword [esp + 0x8],ebp",
        "mov ebx,dword [esp + 0x6c]",
        "mov dword [esp + 0x28],eax",
        "mov eax,dword [esp + 0x60]",
        "fmul dword [eax + 0x4]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "mov eax,dword [esp + 0x60]",
        "mov dword [esp + 0xc],esi",
        "mov dword [esp + 0x8],ebp",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x18]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x1c]",
        "fld qword [0x00052098]",
        "fmul dword [eax]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "mov eax,dword [esp + 0x60]",
        "mov dword [esp + 0xc],esi",
        "mov dword [esp + 0x8],ebp",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x20]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x24]",
        "fld qword [0x00052098]",
        "fmul dword [eax + 0x8]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "test edi,edi",
        "mov eax,dword [esp + 0x28]",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]"
      ],
      "ExpectedArm64ASM": [
        "stp w11, w9, [x8, #-8]!",
        "stp w6, w10, [x8, #-8]!",
        "subs w26, w8, #0x4c (76)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w4, [x8, #104]",
        "add w9, w8, #0x38 (56)",
        "add w10, w8, #0x30 (48)",
        "mov w20, #0x2098",
        "movk w20, #0x5, lsl #16",
        "ldr d2, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str w10, [x8, #12]",
        "ldr w11, [x8, #100]",
        "str w9, [x8, #8]",
        "ldr w6, [x8, #108]",
        "str w4, [x8, #40]",
        "ldr w4, [x8, #96]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x8]",
        "mov w20, #0x44",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 49,
      "ExpectedInstructionCount": 300,
      "x86Insts": [
        "fld dword [esp + 0x80]",
        "fsub dword [esp + 0x7c]",
        "mov eax,dword [esp + 0x88]",
        "mov ecx,dword [esp + 0x8c]",
        "movss xmm1,dword [esp + 0x7c]",
        "mov dword [esp + 0x38],edx",
        "fst dword [esp + 0x34]",
        "fld dword [esp + 0x24]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [eax]",
        "fsub dword [ebp]",
        "movss xmm0,dword [esp + 0x2c]",
        "fmul st1",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x44]",
        "fld dword [eax + 0x4]",
        "fsub dword [ebp + 0x4]",
        "fmul st1",
        "fadd dword [ebp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fld dword [eax + 0x8]",
        "fsub dword [ebp + 0x8]",
        "fmulp",
        "fadd dword [ebp + 0x8]",
        "mov dword [esp + 0x1c],ecx",
        "movss dword [esp + 0x10],xmm0",
        "lea ecx,[esp + 0x44]",
        "movss dword [esp + 0xc],xmm1",
        "mov dword [esp + 0x18],ecx",
        "mov dword [esp + 0x14],ebp",
        "fstp dword [esp + 0x4c]",
        "fldz",
        "fld dword [esp + 0x28]",
        "fxch",
        "fucomip st0,st1",
        "fstp st0",
        "seta byte [esp + 0x30]",
        "movzx eax,byte [esp + 0x30]",
        "movsx eax,word [esi + eax*0x2 + 0x4]",
        "mov dword [esp + 0x8],eax",
        "mov dword [esp + 0x4],ebx",
        "mov dword [esp],edi",
        "call 0x0002b5b0",
        "mov edx,dword [esp + 0x38]",
        "test al,al"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x8, #136]",
        "ldr w7, [x8, #140]",
        "ldr s17, [x8, #124]",
        "str w5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #52]",
        "ldr s3, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s4, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s16, [x8, #44]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str w7, [x8, #28]",
        "str s16, [x8, #16]",
        "add w7, w8, #0x44 (68)",
        "str s17, [x8, #12]",
        "str w7, [x8, #24]",
        "str w9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "movi v2.2d, #0x0",
        "ldr s3, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "cset x20, hi",
        "strb w20, [x8, #48]",
        "ldrb w4, [x8, #48]",
        "add w20, w10, w4, lsl #1",
        "ldrh w20, [x20, #4]",
        "sxth w4, w20",
        "str w4, [x8, #8]",
        "str w6, [x8, #4]",
        "str w11, [x8]",
        "mov w20, #0xb2",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xe0e0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 39,
      "ExpectedInstructionCount": 294,
      "x86Insts": [
        "push ebp",
        "push edi",
        "push esi",
        "push ebx",
        "sub esp,0x4",
        "mov ecx,dword [esp + 0x20]",
        "mov ebx,dword [esp + 0x24]",
        "mov eax,dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x18]",
        "fld dword [ecx]",
        "fmul dword [ebx + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "mov edi,dword [esp + 0x2c]",
        "mov esi,dword [esp + 0x30]",
        "fld dword [ecx + 0x4]",
        "fmul dword [ebx]",
        "fsubp",
        "fld dword [ebx]",
        "fmul dword [ecx + 0x8]",
        "fld dword [ecx]",
        "fmul dword [ebx + 0x8]",
        "fsubp",
        "fld dword [ecx + 0x4]",
        "fmul dword [ebx + 0x8]",
        "fld dword [ebx + 0x4]",
        "fmul dword [ecx + 0x8]",
        "fsubp",
        "fld dword [eax + 0x8]",
        "fmul st3",
        "fld dword [eax + 0x4]",
        "fmul st3",
        "faddp",
        "fld dword [eax]",
        "fmul st2",
        "faddp",
        "fldz",
        "fxch",
        "fucomi st0,st1",
        "fstp st1"
      ],
      "ExpectedArm64ASM": [
        "stp w11, w9, [x8, #-8]!",
        "stp w6, w10, [x8, #-8]!",
        "subs w20, w8, #0x4 (4)",
        "mov x27, x8",
        "mov x8, x20",
        "ldr w7, [x8, #32]",
        "ldr w6, [x8, #36]",
        "ldr w4, [x8, #28]",
        "ldr w5, [x8, #24]",
        "ldr s2, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr w11, [x8, #44]",
        "ldr w10, [x8, #48]",
        "ldr s3, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "movi v6.2d, #0x0",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q5, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q4, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q3, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w22, w22, w20",
        "mov w23, #0xf0f",
        "lsr w22, w23, w22",
        "orr w21, w21, w22",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 244,
      "x86Insts": [
        "fld dword [ebx + 0x4]",
        "fld dword [ebx]",
        "fld dword [ebx + 0x8]",
        "fld dword [edx]",
        "fmul st3",
        "fld dword [edx + 0x4]",
        "fmul st3",
        "fsubp",
        "fmul dword [eax + 0x8]",
        "fxch st2",
        "fmul dword [edx + 0x8]",
        "fld dword [edx]",
        "fmul st2",
        "fsubp",
        "fmul dword [eax + 0x4]",
        "faddp st2,st0",
        "fmul dword [edx + 0x4]",
        "fxch st2",
        "fmul dword [edx + 0x8]",
        "fsubp st2,st0",
        "fxch",
        "fmul dword [eax]",
        "faddp",
        "fdiv st0,st1",
        "fstp dword [edi]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x5, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s6, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s6, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s5, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s5, [x5, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x11]",
        "strb w21, [x28, #1051]",
        "str q2, [x20, #1056]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x8",
        "sub w21, w22, w21",
        "mov w22, #0xf8f8",
        "lsr w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 72,
      "x86Insts": [
        "fstp st0",
        "fstp st3",
        "fstp st0",
        "fstp st3",
        "fxch",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fstp qword [esp]",
        "call 0x0006d0d8",
        "fld dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [esp + 0x38]",
        "fld dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fxch st6",
        "fxch st5",
        "fxch",
        "fxch st4",
        "fxch st3",
        "fxch"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "add x22, x28, x22, lsl #4",
        "ldr q4, [x22, #1056]",
        "strb wzr, [x28, #1049]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x23, x28, x20, lsl #4",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #56]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #44]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "ldr q5, [x12, #1056]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d6, d0",
        "str d6, [x8]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "mov w13, #0x24",
        "movk w13, #0x1, lsl #16",
        "str w13, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str q3, [x21, #1056]",
        "str q4, [x22, #1056]",
        "str q5, [x23, #1056]",
        "str q2, [x12, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 72,
      "x86Insts": [
        "fstp st0",
        "fstp st3",
        "fstp st0",
        "fstp st3",
        "fxch",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fstp qword [esp]",
        "call 0x0006d0d8",
        "fld dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [esp + 0x38]",
        "fld dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fxch st6",
        "fxch st5",
        "fxch",
        "fxch st4",
        "fxch st3",
        "fxch"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "add x22, x28, x22, lsl #4",
        "ldr q4, [x22, #1056]",
        "strb wzr, [x28, #1049]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x23, x28, x20, lsl #4",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #56]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #44]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "ldr q5, [x12, #1056]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d6, d0",
        "str d6, [x8]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "mov w13, #0x24",
        "movk w13, #0x1, lsl #16",
        "str w13, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str q3, [x21, #1056]",
        "str q4, [x22, #1056]",
        "str q5, [x23, #1056]",
        "str q2, [x12, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 125,
      "ExpectedInstructionCount": 17,
      "x86Insts": [
        "push esi",
        "push ebx",
        "sub esp,0xa4",
        "mov ebx,dword [esp + 0xb0]",
        "lea esi,[esp + 0x18]",
        "mov eax,gs:[0x14]",
        "mov dword [esp + 0x9c],eax",
        "xor eax,eax",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x18]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x1c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x20]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x24]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x28]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x2c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x30]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x34]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x38]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x3c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x40]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x44]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x48]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x4c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x50]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x54]",
        "call 0x00018ad0",
        "mov dword [esp + 0x58],eax",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x7c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x80]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x84]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x88]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x8c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x90]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x94]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x98]",
        "call 0x00019700",
        "mov edx,0x1f",
        "mov dword [esp + 0x8],edx",
        "mov dword [esp + 0x4],eax",
        "lea eax,[esp + 0x5c]",
        "mov dword [esp],eax",
        "call 0x0006d0dc",
        "mov eax,dword [ebx + 0x130]",
        "mov byte [esp + 0x7b],0x0",
        "mov edx,dword [eax]",
        "mov dword [esp],eax",
        "mov dword [esp + 0x4],esi",
        "call dword [edx + 0xc4]",
        "mov eax,dword [esp + 0x9c]",
        "xor eax,dword gs:[0x14]"
      ],
      "ExpectedArm64ASM": [
        "stp w6, w10, [x8, #-8]!",
        "sub w8, w8, #0xa4 (164)",
        "ldr w6, [x8, #176]",
        "add w10, w8, #0x18 (24)",
        "ldr w20, [x28, #992]",
        "ldr w4, [x20, #20]",
        "str w4, [x8, #156]",
        "subs w26, w4, w4",
        "mov w4, #0x0",
        "mov w20, #0x869c",
        "movk w20, #0x5, lsl #16",
        "add w20, w6, w20",
        "ldr w4, [x20]",
        "str w4, [x8]",
        "mov w20, #0x30",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87-Oblivion.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 911,
      "ExpectedInstructionCount": 7343,
      "x86Insts": [
        "sub esp,0x118",
        "fld dword [ecx + 0x1084]",
        "fadd dword [ecx + 0x1008]",
        "fstp dword [esp]",
        "fld dword [ecx + 0x1080]",
        "fadd dword [ecx + 0x100c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x107c]",
        "fadd dword [ecx + 0x1010]",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x1078]",
        "fadd dword [ecx + 0x1014]",
        "fstp dword [esp + 0xc]",
        "fld dword [ecx + 0x1074]",
        "fadd dword [ecx + 0x1018]",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x1070]",
        "fadd dword [ecx + 0x101c]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx + 0x106c]",
        "fadd dword [ecx + 0x1020]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx + 0x1068]",
        "fadd dword [ecx + 0x1024]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ecx + 0x1064]",
        "fadd dword [ecx + 0x1028]",
        "fstp dword [esp + 0x20]",
        "fld dword [ecx + 0x1060]",
        "fadd dword [ecx + 0x102c]",
        "fstp dword [esp + 0x24]",
        "fld dword [ecx + 0x105c]",
        "fadd dword [ecx + 0x1030]",
        "fstp dword [esp + 0x28]",
        "fld dword [ecx + 0x1058]",
        "fadd dword [ecx + 0x1034]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ecx + 0x1054]",
        "fadd dword [ecx + 0x1038]",
        "fstp dword [esp + 0x30]",
        "fld dword [ecx + 0x1050]",
        "fadd dword [ecx + 0x103c]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + 0x104c]",
        "fadd dword [ecx + 0x1040]",
        "fstp dword [esp + 0x38]",
        "fld dword [ecx + 0x1048]",
        "fadd dword [ecx + 0x1044]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fld dword [esp + 0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x34]",
        "fld st0",
        "fld dword [esp + 0x8]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x24]",
        "fadd dword [esp + 0x18]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fstp dword [esp + 0x60]",
        "fxch st4",
        "fsubrp st5,st0",
        "fld dword [0x00b3c1d0]",
        "fmulp st5",
        "fxch st4",
        "fstp dword [esp + 0x64]",
        "fsubrp",
        "fmul dword [0x00b3c1d4]",
        "fstp dword [esp + 0x68]",
        "fsubrp",
        "fmul dword [0x00b3c1d8]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x30]",
        "fmul dword [0x00b3c1dc]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x2c]",
        "fmul dword [0x00b3c1e0]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x28]",
        "fmul dword [0x00b3c1e4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x24]",
        "fmul dword [0x00b3c1e8]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fmul dword [0x00b3c1ec]",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fld dword [esp + 0x44]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp]",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "fld dword [esp + 0x48]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x58]",
        "fld st0",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0xc]",
        "fxch st4",
        "fsubrp st5,st0",
        "fld dword [0x00b3c1f0]",
        "fld st0",
        "fmulp st6",
        "fxch st5",
        "fstp dword [esp + 0x10]",
        "fxch",
        "fsubrp st2,st0",
        "fld dword [0x00b3c1f4]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x14]",
        "fsubp st2,st0",
        "fld dword [0x00b3c1f8]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x54]",
        "fld dword [0x00b3c1fc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x80]",
        "fld st0",
        "fld dword [esp + 0x64]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x68]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x2c]",
        "fsubrp",
        "fmul st4",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x7c]",
        "fmul st2",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x78]",
        "fmul st3",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x70]",
        "fsub dword [esp + 0x74]",
        "fmulp",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fxch",
        "fsubrp st2,st0",
        "fld dword [0x00b3c200]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x4c]",
        "fsubr dword [esp + 0x4]",
        "fld dword [0x00b3c204]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fsubr dword [esp + 0x10]",
        "fmul st2",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "fadd dword [esp + 0x20]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x68]",
        "fsubr dword [esp + 0x20]",
        "fmul st2",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x28]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0x78]",
        "fsubr dword [esp + 0x30]",
        "fmul st2",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x34]",
        "fsub dword [esp + 0x38]",
        "fmul st1",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x48]",
        "fld st0",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp]",
        "fsubr dword [esp + 0x44]",
        "fld dword [0x00b3c208]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fmul st1",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x68]",
        "fmul st1",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x70]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x74]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x74]",
        "fsub dword [esp + 0x78]",
        "fmul st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x80]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x7c]",
        "fsub dword [esp + 0x80]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x1c]",
        "fst dword [esp + 0xc0]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0xa0]",
        "fld dword [esp + 0xa0]",
        "fchs",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fsub dword [esp + 0x18]",
        "fstp dword [esp + 0xdc]",
        "fld dword [esp + 0x18]",
        "fchs",
        "fsub dword [esp + 0x1c]",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0xfc]",
        "fld dword [esp + 0x3c]",
        "fst dword [esp + 0xc8]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [esp + 0xb8]",
        "fld dword [esp + 0xb8]",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0xa8]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp + 0x34]",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x98]",
        "fld dword [esp + 0x98]",
        "fchs",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x38]",
        "fstp dword [esp + 0xd4]",
        "fld dword [esp + 0x38]",
        "fchs",
        "fsub dword [esp + 0x3c]",
        "fst qword [esp + 0x110]",
        "fsub dword [esp + 0x28]",
        "fsub dword [esp + 0x2c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x34]",
        "fstp dword [esp + 0xe4]",
        "fld dword [esp + 0x30]",
        "fsubr qword [esp + 0x110]",
        "fsub dword [esp + 0x20]",
        "fstp dword [esp + 0x104]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x30]",
        "fstp dword [esp + 0xf4]",
        "fld dword [esp]",
        "fchs",
        "fstp dword [esp + 0x10c]",
        "fld dword [esp + 0x4]",
        "fstp dword [esp + 0x90]",
        "fld dword [esp + 0xc]",
        "fst dword [esp + 0xb0]",
        "fchs",
        "fsub dword [esp + 0x8]",
        "fstp dword [esp + 0xec]",
        "fld dword [ecx + 0x1008]",
        "fsub dword [ecx + 0x1084]",
        "fmul dword [0x00b3c190]",
        "fstp dword [esp]",
        "fld dword [ecx + 0x100c]",
        "fsub dword [ecx + 0x1080]",
        "fmul dword [0x00b3c194]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x1010]",
        "fsub dword [ecx + 0x107c]",
        "fmul dword [0x00b3c198]",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x1014]",
        "fsub dword [ecx + 0x1078]",
        "fmul dword [0x00b3c19c]",
        "fstp dword [esp + 0xc]",
        "fld dword [ecx + 0x1018]",
        "fsub dword [ecx + 0x1074]",
        "fmul dword [0x00b3c1a0]",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x101c]",
        "fsub dword [ecx + 0x1070]",
        "fmul dword [0x00b3c1a4]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx + 0x1020]",
        "fsub dword [ecx + 0x106c]",
        "fmul dword [0x00b3c1a8]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx + 0x1024]",
        "fsub dword [ecx + 0x1068]",
        "fmul dword [0x00b3c1ac]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ecx + 0x1028]",
        "fsub dword [ecx + 0x1064]",
        "fmul dword [0x00b3c1b0]",
        "fstp dword [esp + 0x20]",
        "fld dword [ecx + 0x102c]",
        "fsub dword [ecx + 0x1060]",
        "fmul dword [0x00b3c1b4]",
        "fstp dword [esp + 0x24]",
        "fld dword [ecx + 0x1030]",
        "fsub dword [ecx + 0x105c]",
        "fmul dword [0x00b3c1b8]",
        "fstp dword [esp + 0x28]",
        "fld dword [ecx + 0x1034]",
        "fsub dword [ecx + 0x1058]",
        "fmul dword [0x00b3c1bc]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ecx + 0x1038]",
        "fsub dword [ecx + 0x1054]",
        "fmul dword [0x00b3c1c0]",
        "fstp dword [esp + 0x30]",
        "fld dword [ecx + 0x103c]",
        "fsub dword [ecx + 0x1050]",
        "fmul dword [0x00b3c1c4]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + 0x1040]",
        "fsub dword [ecx + 0x104c]",
        "fmul dword [0x00b3c1c8]",
        "fstp dword [esp + 0x38]",
        "fld dword [ecx + 0x1044]",
        "fsub dword [ecx + 0x1048]",
        "fmul dword [0x00b3c1cc]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x34]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x24]",
        "fadd dword [esp + 0x18]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp]",
        "fsub dword [esp + 0x3c]",
        "fmul dword [0x00b3c1d0]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp + 0x38]",
        "fmul dword [0x00b3c1d4]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0x34]",
        "fmul dword [0x00b3c1d8]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x30]",
        "fmul dword [0x00b3c1dc]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x2c]",
        "fmul dword [0x00b3c1e0]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x28]",
        "fmul dword [0x00b3c1e4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x24]",
        "fmul dword [0x00b3c1e8]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fmul dword [0x00b3c1ec]",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x60]",
        "fmul st6",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x48]",
        "fsub dword [esp + 0x5c]",
        "fmul st4",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x58]",
        "fmul st5",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x54]",
        "fmul dword [0x00b3c1fc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x80]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x68]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x80]",
        "fmulp st6",
        "fxch st5",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x7c]",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x78]",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x70]",
        "fsub dword [esp + 0x74]",
        "fmul dword [0x00b3c1fc]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fsubr dword [esp + 0x4]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fld dword [esp + 0x10]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fld st0",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x5c]",
        "fsubr dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "fld dword [esp + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x28]",
        "fld st0",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x68]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x6c]",
        "fsubr dword [esp + 0x24]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp + 0x30]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0x78]",
        "fxch",
        "fsubrp st2,st0",
        "fxch",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x34]",
        "fsubrp st2,st0",
        "fmulp",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x48]",
        "fld st0",
        "fld dword [esp + 0x44]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x50]",
        "fld st0",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x58]",
        "fld st0",
        "fld dword [esp + 0x54]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x18]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fld dword [esp + 0x64]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x20]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fld st0",
        "fld dword [esp + 0x74]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x30]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x80]",
        "fld st0",
        "fld dword [esp + 0x7c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fsubrp",
        "fmulp",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xc4]",
        "fld dword [esp + 0xc4]",
        "fld st0",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xbc]",
        "fld dword [esp + 0xbc]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xa4]",
        "fld st4",
        "fadd st0,st2",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0xb4]",
        "fld dword [esp + 0xb4]",
        "fadd st0,st1",
        "fstp dword [esp + 0xac]",
        "fld st4",
        "fadd st0,st1",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x94]",
        "fld dword [esp + 0x94]",
        "fchs",
        "fstp dword [esp + 0x110]",
        "fld dword [esp + 0x110]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xd0]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x14]",
        "fadd st0,st5",
        "fstp dword [esp + 0x9c]",
        "fld dword [esp + 0x9c]",
        "fchs",
        "fstp dword [esp + 0x88]",
        "fld dword [esp + 0x88]",
        "fsub dword [esp + 0x18]",
        "fsub st0,st1",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x28]",
        "fchs",
        "fsubrp st3,st0",
        "fld st0",
        "fsubp st3,st0",
        "fld st5",
        "fsubp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsubrp st2,st0",
        "fld dword [esp + 0x8]",
        "fsubr st0,st2",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0xe8]",
        "fld dword [esp + 0x14]",
        "fsubp st2,st0",
        "fld dword [esp + 0x18]",
        "fsubp st2,st0",
        "fld st4",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xe0]",
        "fld dword [esp + 0x30]",
        "fld st0",
        "fsubp st2,st0",
        "fld dword [esp + 0x8]",
        "fsubr st0,st2",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0xf0]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x10]",
        "faddp st5,st0",
        "mov eax,dword [ecx + 0x1004]",
        "mov edx,dword [ecx + 0x1000]",
        "fxch st4",
        "lea eax,[edx + eax*0x4]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xf8]",
        "fld dword [esp + 0x20]",
        "fchs",
        "fsubrp st4,st0",
        "fxch st3",
        "fsubrp",
        "fsub st0,st3",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsub dword [esp]",
        "fstp dword [esp + 0x108]",
        "fsubrp st2,st0",
        "fxch",
        "fstp dword [esp + 0x100]",
        "fld dword [esp + 0x90]",
        "fstp dword [eax]",
        "fld dword [esp + 0x94]",
        "fstp dword [eax + 0x40]",
        "fld dword [esp + 0x98]",
        "fstp dword [eax + 0x80]",
        "fld dword [esp + 0x9c]",
        "fstp dword [eax + 0xc0]",
        "fld dword [esp + 0xa0]",
        "fstp dword [eax + 0x100]",
        "fld dword [esp + 0xa4]",
        "fst dword [eax + 0x140]",
        "fld dword [esp + 0xa8]",
        "fst dword [eax + 0x180]",
        "fld dword [esp + 0xac]",
        "fst dword [eax + 0x1c0]",
        "fld dword [esp + 0xb0]",
        "fst dword [eax + 0x200]",
        "fld dword [esp + 0xb4]",
        "fstp dword [eax + 0x240]",
        "fld dword [esp + 0xb8]",
        "fst dword [eax + 0x280]",
        "fld dword [esp + 0xbc]",
        "fstp dword [eax + 0x2c0]",
        "fld dword [esp + 0xc0]",
        "fstp dword [eax + 0x300]",
        "fxch st5",
        "fst dword [eax + 0x340]",
        "fld dword [esp + 0xc8]",
        "fstp dword [eax + 0x380]",
        "fxch st6",
        "fst dword [eax + 0x3c0]",
        "fldz",
        "fstp dword [eax + 0x400]",
        "fchs",
        "fstp dword [eax + 0x440]",
        "fld dword [esp + 0xc8]",
        "fchs",
        "fstp dword [eax + 0x480]",
        "fxch st5",
        "fchs",
        "fstp dword [eax + 0x4c0]",
        "fld dword [esp + 0xc0]",
        "fchs",
        "fstp dword [eax + 0x500]",
        "fld dword [esp + 0xbc]",
        "fchs",
        "fstp dword [eax + 0x540]",
        "fxch st3",
        "fchs",
        "fstp dword [eax + 0x580]",
        "fld dword [esp + 0xb4]",
        "fchs",
        "fstp dword [eax + 0x5c0]",
        "fxch st3",
        "fchs",
        "fstp dword [eax + 0x600]",
        "fxch",
        "fchs",
        "fstp dword [eax + 0x640]",
        "fxch",
        "fchs",
        "fstp dword [eax + 0x680]",
        "fchs",
        "fstp dword [eax + 0x6c0]",
        "fld dword [esp + 0x8c]",
        "fstp dword [eax + 0x700]",
        "fld dword [esp + 0x88]",
        "fstp dword [eax + 0x740]",
        "fld dword [esp + 0x84]",
        "fstp dword [eax + 0x780]",
        "fld dword [esp + 0x110]",
        "fstp dword [eax + 0x7c0]",
        "cmp dword [ecx + 0x1000],ecx",
        "lea eax,[ecx + 0x800]"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x118 (280)",
        "ldr s2, [x7, #4228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8]",
        "ldr s2, [x7, #4224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #4220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr s2, [x7, #4216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x7, #4212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x7, #4208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x7, #4204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x7, #4200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x7, #4196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #32]",
        "ldr s2, [x7, #4192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #36]",
        "ldr s2, [x7, #4188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldr s2, [x7, #4184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x7, #4180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x7, #4176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x7, #4172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x7, #4168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #4164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #68]",
        "ldr s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #76]",
        "ldr s8, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #80]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #88]",
        "ldr s8, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #100]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d4",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #104]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d8",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1dc",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #116]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e4",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #120]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e8",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1ec",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8]",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1f0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1f4",
        "movk w20, #0xb3, lsl #16",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1f8",
        "movk w20, #0xb3, lsl #16",
        "ldr s5, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s6, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1fc",
        "movk w20, #0xb3, lsl #16",
        "ldr s6, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #32]",
        "ldr s8, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #36]",
        "ldr s8, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #40]",
        "ldr s8, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #68]",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #72]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc200",
        "movk w20, #0xb3, lsl #16",
        "ldr s6, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc204",
        "movk w20, #0xb3, lsl #16",
        "ldr s7, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #88]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #100]",
        "ldr s8, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #104]",
        "ldr s8, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #120]",
        "ldr s8, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s8, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc208",
        "movk w20, #0xb3, lsl #16",
        "ldr s8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str s2, [x8, #192]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #160]",
        "ldr s2, [x8, #160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v2.16b, v2.16b, v9.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #140]",
        "ldr s2, [x8, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #220]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v2.16b, v2.16b, v9.16b",
        "ldr s9, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #252]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str s2, [x8, #200]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #184]",
        "ldr s2, [x8, #184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #168]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #152]",
        "ldr s2, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v2.16b, v2.16b, v9.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #132]",
        "ldr s2, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #212]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v2.16b, v2.16b, v9.16b",
        "ldr s9, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #272]",
        "ldr s9, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #228]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d9, [x8, #272]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #260]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #244]",
        "ldr s2, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v2.16b, v2.16b, v9.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #268]",
        "ldr s2, [x8, #4]",
        "str s2, [x8, #144]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str s2, [x8, #176]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v9.16b, v2.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #236]",
        "ldr s2, [x7, #4104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc190",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8]",
        "ldr s2, [x7, #4108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc194",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #4112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc198",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr s2, [x7, #4116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc19c",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x7, #4120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1a0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x7, #4124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1a4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x7, #4128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4204]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1a8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x7, #4132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1ac",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x7, #4136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1b0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #32]",
        "ldr s2, [x7, #4140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1b4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #36]",
        "ldr s2, [x7, #4144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1b8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldr s2, [x7, #4148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1bc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x7, #4152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1c0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x7, #4156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1c4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x7, #4160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1c8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x7, #4164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x7, #4168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1cc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #84]",
        "ldr s2, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #88]",
        "ldr s2, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #96]",
        "ldr s2, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #100]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1d8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1dc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #116]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #120]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1e8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1ec",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8]",
        "ldr s2, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xc1fc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #68]",
        "ldr s4, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #72]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #84]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #88]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #100]",
        "ldr s4, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #104]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #116]",
        "ldr s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #196]",
        "ldr s5, [x8, #196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #188]",
        "ldr s8, [x8, #188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #164]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s10, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #180]",
        "ldr s8, [x8, #180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #172]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s10, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s10, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #148]",
        "ldr s8, [x8, #148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr q10, [x28, #3552]",
        "eor v8.16b, v8.16b, v10.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #272]",
        "ldr s8, [x8, #272]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s10, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #208]",
        "ldr s8, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s11, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s11",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v11.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #156]",
        "ldr s8, [x8, #156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr q11, [x28, #3552]",
        "eor v8.16b, v8.16b, v11.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #136]",
        "ldr s8, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s11, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s11",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v11.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #216]",
        "ldr s8, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr q11, [x28, #3552]",
        "eor v8.16b, v8.16b, v11.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #64]",
        "ldr s7, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s11, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s11",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v11.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #232]",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #224]",
        "ldr s8, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s11, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s11",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v11.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #240]",
        "ldr s9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s11, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s11",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v11.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x7, #4100]",
        "ldr w5, [x7, #4096]",
        "strb wzr, [x28, #1049]",
        "add w4, w5, w4, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #64]",
        "ldr s4, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #248]",
        "ldr s7, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v7.16b, v7.16b, v9.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #64]",
        "ldr s7, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #264]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #256]",
        "ldr s4, [x8, #144]",
        "str s4, [x4]",
        "ldr s4, [x8, #148]",
        "str s4, [x4, #64]",
        "ldr s4, [x8, #152]",
        "str s4, [x4, #128]",
        "ldr s4, [x8, #156]",
        "str s4, [x4, #192]",
        "ldr s4, [x8, #160]",
        "str s4, [x4, #256]",
        "ldr s4, [x8, #164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str s4, [x4, #320]",
        "ldr s4, [x8, #168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str s4, [x4, #384]",
        "ldr s4, [x8, #172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str s4, [x4, #448]",
        "ldr s4, [x8, #176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str s4, [x4, #512]",
        "ldr s4, [x8, #180]",
        "str s4, [x4, #576]",
        "ldr s4, [x8, #184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v11.16b, v0.16b",
        "str s4, [x4, #640]",
        "ldr s4, [x8, #188]",
        "str s4, [x4, #704]",
        "ldr s4, [x8, #192]",
        "str s4, [x4, #768]",
        "strb wzr, [x28, #1049]",
        "str s5, [x4, #832]",
        "ldr s4, [x8, #200]",
        "str s4, [x4, #896]",
        "strb wzr, [x28, #1049]",
        "str s2, [x4, #960]",
        "movi v2.2d, #0x0",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1024]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1088]",
        "ldr s2, [x8, #200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1152]",
        "strb wzr, [x28, #1049]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v6.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1216]",
        "ldr s2, [x8, #192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1280]",
        "ldr s2, [x8, #188]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1344]",
        "strb wzr, [x28, #1049]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v11.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1408]",
        "ldr s2, [x8, #180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1472]",
        "strb wzr, [x28, #1049]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v10.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1536]",
        "strb wzr, [x28, #1049]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v9.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1600]",
        "strb wzr, [x28, #1049]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v8.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1664]",
        "ldr q2, [x28, #3552]",
        "eor v2.16b, v7.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #1728]",
        "ldr s2, [x8, #140]",
        "str s2, [x4, #1792]",
        "ldr s2, [x8, #136]",
        "str s2, [x4, #1856]",
        "ldr s2, [x8, #132]",
        "str s2, [x4, #1920]",
        "ldr s2, [x8, #272]",
        "str s2, [x4, #1984]",
        "ldr w20, [x7, #4096]",
        "eor x27, x20, x7",
        "subs w26, w20, w7",
        "add w4, w7, #0x800 (2048)",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 630,
      "ExpectedInstructionCount": 4711,
      "x86Insts": [
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax + 0x40]",
        "fld dword [eax + 0x44]",
        "fadd st0,st1",
        "fstp dword [eax + 0x44]",
        "fld dword [eax + 0x3c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x40]",
        "fld dword [eax + 0x38]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x3c]",
        "fld dword [eax + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x38]",
        "fld dword [eax + 0x30]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x34]",
        "fld dword [eax + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x30]",
        "fld dword [eax + 0x28]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x2c]",
        "fld dword [eax + 0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x28]",
        "fld dword [eax + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x24]",
        "fld dword [eax + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x20]",
        "fld dword [eax + 0x18]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x1c]",
        "fld dword [eax + 0x14]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x18]",
        "fld dword [eax + 0x10]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x14]",
        "fld dword [eax + 0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x10]",
        "fld dword [eax + 0x8]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0xc]",
        "fld dword [eax + 0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x8]",
        "fld dword [eax]",
        "fst qword [esp + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [eax + 0x4]",
        "fld dword [eax + 0x3c]",
        "fld dword [eax + 0x44]",
        "fadd st0,st1",
        "fstp dword [eax + 0x44]",
        "fld dword [eax + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x3c]",
        "fld dword [eax + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x34]",
        "fld dword [eax + 0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x2c]",
        "fld dword [eax + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x24]",
        "fld dword [eax + 0x14]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x1c]",
        "fld dword [eax + 0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x14]",
        "faddp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fstp dword [eax + 0xc]",
        "fadd st0,st0",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x30]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x28]",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x10]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x80]",
        "fld dword [eax + 0x20]",
        "fld dword [eax + 0x40]",
        "fld st3",
        "fld qword [0x00a77b70]",
        "fmul st1",
        "fxch",
        "faddp st4,st0",
        "fld st2",
        "fld qword [0x00a77b68]",
        "fmul st1",
        "fxch st5",
        "faddp",
        "fld st2",
        "fld qword [0x00a77b60]",
        "fmul st1",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0xc0]",
        "fld qword [esp + 0x28]",
        "fadd st0,st6",
        "fsub st0,st4",
        "fld qword [esp + 0x18]",
        "fsub st1,st0",
        "fsubp",
        "fsub st0,st3",
        "fstp dword [esp + 0xd0]",
        "fld st5",
        "fmul st1",
        "fsubr qword [esp + 0x80]",
        "fld st4",
        "fmul st3",
        "fsubp",
        "fld st3",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0xb8]",
        "fld st5",
        "fmul st5",
        "fsubr qword [esp + 0x80]",
        "fld st4",
        "fmul st2",
        "faddp",
        "fld st3",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0xc8]",
        "fld qword [esp + 0x20]",
        "fsubrp st6,st0",
        "fxch st5",
        "faddp st3,st0",
        "fxch st2",
        "fsub qword [esp + 0x18]",
        "faddp",
        "fstp dword [esp + 0x80]",
        "fld dword [eax + 0x18]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x8]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x20]",
        "fld dword [eax + 0x28]",
        "fst qword [esp + 0x90]",
        "fld dword [eax + 0x38]",
        "fst qword [esp + 0x28]",
        "fld qword [0x00a77b50]",
        "fmul st4",
        "fxch st4",
        "faddp st3,st0",
        "fld qword [0x00a77b48]",
        "fmul st2",
        "fxch st3",
        "faddp st2,st0",
        "fld qword [0x00a77b40]",
        "fmul st1",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0xb4]",
        "fld qword [esp + 0x18]",
        "fld qword [esp + 0x90]",
        "fsub st1,st0",
        "fxch",
        "fsub qword [esp + 0x28]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0xc4]",
        "fld qword [esp + 0x18]",
        "fmul st3",
        "fsub qword [esp + 0x20]",
        "fxch",
        "fmul st2",
        "fsubp",
        "fld qword [esp + 0x28]",
        "fmul st4",
        "faddp",
        "fstp dword [esp + 0xa8]",
        "fld qword [esp + 0x18]",
        "fmul st1",
        "fsub qword [esp + 0x20]",
        "fld qword [esp + 0x90]",
        "fmul st4",
        "faddp",
        "fld qword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0x90]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x8]",
        "fadd st0,st0",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x34]",
        "fst qword [esp + 0x98]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0xa0]",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x14]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x14]",
        "fstp qword [esp + 0x88]",
        "fld dword [eax + 0x24]",
        "fstp qword [esp + 0x18]",
        "fld dword [eax + 0x44]",
        "fstp qword [esp + 0x28]",
        "fmul st4",
        "fadd qword [esp + 0x88]",
        "fld qword [esp + 0x18]",
        "fmul st6",
        "faddp",
        "fld qword [esp + 0x28]",
        "fmul st7",
        "faddp",
        "fstp dword [esp + 0xb0]",
        "fld qword [esp + 0xa0]",
        "fadd qword [esp + 0x20]",
        "fsub qword [esp + 0x18]",
        "fld qword [esp + 0x98]",
        "fsub st1,st0",
        "fsubp",
        "fsub qword [esp + 0x28]",
        "fstp dword [esp + 0x30]",
        "fld qword [esp + 0x20]",
        "fmul st6",
        "fsubr qword [esp + 0x88]",
        "fld qword [esp + 0x18]",
        "fmul st5",
        "fsubp",
        "fld qword [esp + 0x28]",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0xa0]",
        "fld qword [esp + 0x20]",
        "fld st0",
        "fmulp st6",
        "fld qword [esp + 0x88]",
        "fsubrp st6,st0",
        "fld qword [esp + 0x18]",
        "fmulp st7",
        "fxch st5",
        "faddp st6,st0",
        "fld qword [esp + 0x28]",
        "fld st0",
        "fmulp st5",
        "fxch st6",
        "fsubrp st4,st0",
        "fxch st3",
        "fstp dword [esp + 0x28]",
        "fld qword [esp + 0x8]",
        "fsubrp st4,st0",
        "fxch st3",
        "fadd qword [esp + 0x18]",
        "fsub qword [esp + 0x98]",
        "faddp st4,st0",
        "fxch st3",
        "fmul qword [0x00a77bd8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + 0x1c]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [eax + 0x2c]",
        "fld dword [eax + 0x3c]",
        "fld dword [esp + 0x4]",
        "fmul st6",
        "fadd st0,st3",
        "fld st2",
        "fmul st6",
        "faddp",
        "fld st1",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0xc]",
        "fst qword [esp + 0x20]",
        "fsub st0,st2",
        "fsub st0,st1",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x98]",
        "fld qword [esp + 0x20]",
        "fmul st5",
        "fsub st0,st3",
        "fld st2",
        "fmul st5",
        "fsubp",
        "fld st1",
        "fmul st7",
        "faddp",
        "fstp dword [esp + 0x88]",
        "fld qword [esp + 0x20]",
        "fmulp st4",
        "fxch st3",
        "fsubrp st2,st0",
        "fmulp st4",
        "faddp st3,st0",
        "fmulp",
        "fsubp",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0xb4]",
        "fld dword [esp + 0xc0]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0xb0]",
        "fld st0",
        "fadd st0,st2",
        "fmul qword [0x00a77b38]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x38]",
        "fsubrp",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0xc4]",
        "fld dword [esp + 0xd0]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x98]",
        "fst qword [esp + 0xd0]",
        "fld dword [esp + 0x30]",
        "fst qword [esp + 0x98]",
        "faddp",
        "fmul qword [0x00a77bd0]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x3c]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0xa8]",
        "fst qword [esp + 0xa8]",
        "fld dword [esp + 0xb8]",
        "fst qword [esp + 0xb8]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x88]",
        "fst qword [esp + 0x88]",
        "fld dword [esp + 0xa0]",
        "fst qword [esp + 0xa0]",
        "faddp",
        "fmul qword [0x00a77b30]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x40]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x90]",
        "fst qword [esp + 0x90]",
        "fld dword [esp + 0xc8]",
        "fst qword [esp + 0xc8]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x20]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fst qword [esp + 0x28]",
        "faddp",
        "fmul qword [0x00a77b28]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x44]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x18]",
        "fld dword [esp + 0x80]",
        "fst qword [esp + 0x80]",
        "fadd st0,st1",
        "fstp dword [esp + 0x48]",
        "fsubr qword [esp + 0x80]",
        "fstp dword [esp + 0x6c]",
        "fld qword [esp + 0xc8]",
        "fsub qword [esp + 0x90]",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0x28]",
        "fsub qword [esp + 0x20]",
        "fmul qword [0x00a77b20]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x4c]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x68]",
        "fld qword [esp + 0xb8]",
        "fsub qword [esp + 0xa8]",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0xa0]",
        "fsub qword [esp + 0x88]",
        "fmul qword [0x00a77b18]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x50]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x64]",
        "fsubrp",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0x98]",
        "fsub qword [esp + 0xd0]",
        "fmul qword [0x00a77be0]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x54]",
        "fsubrp",
        "fstp dword [esp + 0x60]",
        "fxch st2",
        "fsubrp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x8]",
        "fsubrp",
        "fmul qword [0x00a77b10]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "lea eax,[ecx + ecx*0x8]",
        "fld st0",
        "mov ecx,dword [ebp + 0xc]",
        "fadd st0,st2",
        "shl eax,0x4",
        "add eax,0xb183d0",
        "fstp dword [esp + 0x58]",
        "fsubrp",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "fchs",
        "fmul dword [eax]",
        "fstp dword [ecx]",
        "fld dword [eax + 0x4]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x4]",
        "fld dword [eax + 0x8]",
        "fld dword [esp + 0x64]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x8]",
        "fld dword [eax + 0xc]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0xc]",
        "fld dword [eax + 0x10]",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x10]",
        "fld dword [eax + 0x14]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x14]",
        "fld dword [eax + 0x18]",
        "fld dword [esp + 0x74]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x18]",
        "fld dword [eax + 0x1c]",
        "fld dword [esp + 0x78]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x1c]",
        "fld dword [eax + 0x20]",
        "fld dword [esp + 0x7c]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x20]",
        "fld dword [eax + 0x24]",
        "fmul dword [esp + 0x7c]",
        "fstp dword [ecx + 0x24]",
        "fld dword [eax + 0x28]",
        "fmul dword [esp + 0x78]",
        "fstp dword [ecx + 0x28]",
        "fld dword [eax + 0x2c]",
        "fmul dword [esp + 0x74]",
        "fstp dword [ecx + 0x2c]",
        "fmul dword [eax + 0x30]",
        "fstp dword [ecx + 0x30]",
        "fmul dword [eax + 0x34]",
        "fstp dword [ecx + 0x34]",
        "fmul dword [eax + 0x38]",
        "fstp dword [ecx + 0x38]",
        "fmul dword [eax + 0x3c]",
        "fstp dword [ecx + 0x3c]",
        "fmul dword [eax + 0x40]",
        "fstp dword [ecx + 0x40]",
        "fmul dword [eax + 0x44]",
        "fstp dword [ecx + 0x44]",
        "fld dword [esp + 0x58]",
        "fld dword [eax + 0x48]",
        "fmul st1",
        "fstp dword [ecx + 0x48]",
        "fld dword [esp + 0x54]",
        "fld dword [eax + 0x4c]",
        "fmul st1",
        "fstp dword [ecx + 0x4c]",
        "fld dword [esp + 0x50]",
        "fld dword [eax + 0x50]",
        "fmul st1",
        "fstp dword [ecx + 0x50]",
        "fld dword [esp + 0x4c]",
        "fld dword [eax + 0x54]",
        "fmul st1",
        "fstp dword [ecx + 0x54]",
        "fld dword [esp + 0x48]",
        "fld dword [eax + 0x58]",
        "fmul st1",
        "fstp dword [ecx + 0x58]",
        "fld dword [esp + 0x44]",
        "fld dword [eax + 0x5c]",
        "fmul st1",
        "fstp dword [ecx + 0x5c]",
        "fld dword [esp + 0x40]",
        "fst qword [esp + 0x18]",
        "fmul dword [eax + 0x60]",
        "fstp dword [ecx + 0x60]",
        "fld dword [esp + 0x3c]",
        "fst qword [esp + 0x80]",
        "fmul dword [eax + 0x64]",
        "fstp dword [ecx + 0x64]",
        "fld dword [esp + 0x38]",
        "fld dword [eax + 0x68]",
        "fmul st1",
        "fstp dword [ecx + 0x68]",
        "fmul dword [eax + 0x6c]",
        "fstp dword [ecx + 0x6c]",
        "fld dword [eax + 0x70]",
        "fmul qword [esp + 0x80]",
        "fstp dword [ecx + 0x70]",
        "fld dword [eax + 0x74]",
        "fmul qword [esp + 0x18]",
        "fstp dword [ecx + 0x74]",
        "fmul dword [eax + 0x78]",
        "fstp dword [ecx + 0x78]",
        "fmul dword [eax + 0x7c]",
        "fstp dword [ecx + 0x7c]",
        "fmul dword [eax + 0x80]",
        "fstp dword [ecx + 0x80]",
        "fmul dword [eax + 0x84]",
        "fstp dword [ecx + 0x84]",
        "fmul dword [eax + 0x88]",
        "fstp dword [ecx + 0x88]",
        "fmul dword [eax + 0x8c]",
        "fstp dword [ecx + 0x8c]",
        "mov esp,ebp",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x9, #8]",
        "ldr s2, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #68]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #64]",
        "ldr s2, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #60]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #56]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #52]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #48]",
        "ldr s2, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #44]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #40]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #36]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #32]",
        "ldr s2, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #28]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #24]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #20]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #16]",
        "ldr s2, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #8]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d4, d0",
        "str d4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str s3, [x4, #4]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s5, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4, #68]",
        "ldr s5, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #60]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4, #52]",
        "ldr s5, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #44]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4, #36]",
        "ldr s5, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #28]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #4]",
        "ldr s3, [x8, #4]",
        "str s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "str d3, [x8, #24]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d4, d0",
        "str d4, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d4, d0",
        "str d4, [x8, #128]",
        "ldr s4, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "mov w20, #0x7b70",
        "movk w20, #0xa7, lsl #16",
        "ldr d6, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov w20, #0x7b68",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov w20, #0x7b60",
        "movk w20, #0xa7, lsl #16",
        "ldr d8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #192]",
        "ldr d3, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr d9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #208]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr d9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #184]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr d9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #200]",
        "ldr d3, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr d3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #128]",
        "ldr s2, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0x7b58",
        "movk w20, #0xa7, lsl #16",
        "ldr d3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "str d3, [x8, #24]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d4, d0",
        "str d4, [x8, #32]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #144]",
        "ldr s5, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #40]",
        "mov w21, #0x7b50",
        "movk w21, #0xa7, lsl #16",
        "ldr d9, [x21]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w21, #0x7b48",
        "movk w21, #0xa7, lsl #16",
        "ldr d3, [x21]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w21, #0x7b40",
        "movk w21, #0xa7, lsl #16",
        "ldr d4, [x21]",
        "str x30, [sp, #-16]!",
        "fmov d0, d4",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #180]",
        "ldr d2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #144]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr d10, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d10",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d10, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d10",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #196]",
        "ldr d2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d10, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d10",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #168]",
        "ldr d2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #144]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #144]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #152]",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #160]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #32]",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #136]",
        "ldr s5, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #24]",
        "ldr s5, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d5, d0",
        "str d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #176]",
        "ldr d2, [x8, #160]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr d2, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #160]",
        "ldr d2, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr d7, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr d7, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr d7, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #40]",
        "ldr d5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr d5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "mov w21, #0x7bd8",
        "movk w21, #0xa7, lsl #16",
        "ldr d5, [x21]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d5, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s5, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #20]",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d8, d0",
        "str d8, [x8, #32]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr d8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #152]",
        "ldr d7, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #136]",
        "ldr d7, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "mov w20, #0x7b38",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #124]",
        "ldr s6, [x8, #196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #208]",
        "ldr s9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #152]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov w20, #0x7bd0",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #60]",
        "ldr d9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #120]",
        "ldr s8, [x8, #168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #168]",
        "ldr s9, [x8, #184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #184]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #136]",
        "ldr s9, [x8, #160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #160]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov w20, #0x7b30",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #64]",
        "ldr d9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #144]",
        "ldr s9, [x8, #200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #200]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #32]",
        "ldr s9, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov w20, #0x7b28",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #68]",
        "ldr d9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #112]",
        "ldr s8, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #128]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #72]",
        "ldr d9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #108]",
        "ldr d8, [x8, #200]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #144]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr d8, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov w20, #0x7b20",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #76]",
        "ldr d9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #104]",
        "ldr d8, [x8, #184]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #168]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr d8, [x8, #160]",
        "str x30, [sp, #-16]!",
        "fmov d0, d8",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov w20, #0x7b18",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d10, d0",
        "str d10, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #80]",
        "ldr d9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #100]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #8]",
        "ldr d6, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr d7, [x8, #208]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "mov w20, #0x7be0",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #84]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0x7b10",
        "movk w20, #0xa7, lsl #16",
        "ldr d3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w4, w7, w7, lsl #3",
        "ldr w7, [x9, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "lsl w4, w4, #4",
        "mov w20, #0x83d0",
        "movk w20, #0xb1, lsl #16",
        "mvn w27, w4",
        "adds w26, w4, w20",
        "mov x4, x26",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #88]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v3.16b, v2.16b, v3.16b",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr q5, [x28, #3552]",
        "eor v5.16b, v4.16b, v5.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #4]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s5, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "eor v6.16b, v5.16b, v6.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #8]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr q7, [x28, #3552]",
        "eor v7.16b, v6.16b, v7.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #12]",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s7, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr q8, [x28, #3552]",
        "eor v8.16b, v7.16b, v8.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s8, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr q9, [x28, #3552]",
        "eor v9.16b, v8.16b, v9.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #20]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr q10, [x28, #3552]",
        "eor v9.16b, v9.16b, v10.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #24]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr q10, [x28, #3552]",
        "eor v9.16b, v9.16b, v10.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #28]",
        "ldr s3, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr q10, [x28, #3552]",
        "eor v9.16b, v9.16b, v10.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #32]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #36]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #40]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s9, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #44]",
        "ldr s3, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #48]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #52]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #56]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #60]",
        "ldr s3, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #64]",
        "ldr s3, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #68]",
        "ldr s2, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #72]",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x7, #76]",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x7, #80]",
        "ldr s5, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x7, #84]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x7, #88]",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #92]",
        "ldr s8, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #24]",
        "ldr s9, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #96]",
        "ldr s8, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d9, d0",
        "str d9, [x8, #128]",
        "ldr s9, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #100]",
        "ldr s8, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x7, #104]",
        "ldr s9, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #108]",
        "ldr s8, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #112]",
        "ldr s8, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr d9, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov d0, d9",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x7, #116]",
        "ldr s8, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x7, #120]",
        "ldr s7, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x7, #124]",
        "ldr s6, [x4, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x7, #128]",
        "ldr s5, [x4, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x7, #132]",
        "ldr s4, [x4, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #136]",
        "ldr s3, [x4, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #140]",
        "mov x8, x9",
        "ldr w9, [x8], #4",
        "cfinv",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 649,
      "ExpectedInstructionCount": 3256,
      "x86Insts": [
        "fld dword [esi + 0x64]",
        "mov eax,dword [esi + 0x88]",
        "fstp dword [esp + 0x5c]",
        "mov ecx,dword [esi + 0x8c]",
        "fld dword [esi + 0x70]",
        "mov edx,dword [esi + 0x90]",
        "fstp dword [esp + 0x60]",
        "mov dword [esp + 0x2e4],0x3f",
        "fld dword [esi + 0x7c]",
        "mov dword [esp + 0x94],eax",
        "fstp dword [esp + 0x64]",
        "mov dword [esp + 0x98],ecx",
        "fld dword [esi + 0x68]",
        "mov dword [esp + 0x9c],edx",
        "fstp dword [esp + 0x14]",
        "mov dword [esp + 0xe8],eax",
        "fld dword [esi + 0x74]",
        "mov dword [esp + 0xec],ecx",
        "fstp dword [esp + 0x18]",
        "mov dword [esp + 0xf0],edx",
        "fld dword [esi + 0x80]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esi + 0xf4]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x7c]",
        "fmul dword [esp + 0x1c]",
        "fstp dword [esp + 0x84]",
        "fld dword [esi + 0x6c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esi + 0x78]",
        "fstp dword [esp + 0x18]",
        "fld dword [esi + 0x84]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esi + 0xf0]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fmul dword [esp + 0x1c]",
        "fstp dword [esp + 0x58]",
        "fld dword [esi + 0x100]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x54]",
        "fmul st1",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x10]",
        "fld dword [esp + 0x50]",
        "fmul st2",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x2c]",
        "fld dword [esp + 0x58]",
        "fmul st3",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fmul st4",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x7c]",
        "fmul st4",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x84]",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x5c]",
        "fmul st4",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x60]",
        "fmul st4",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x64]",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x94]",
        "fld dword [esp + 0x84]",
        "fadd st0,st1",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x98]",
        "fld dword [esp + 0x7c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x9c]",
        "fld dword [esp + 0x5c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x84]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x44]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x10]",
        "fstp dword [esp + 0x5c]",
        "mov eax,dword [esp + 0x5c]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xf4],eax",
        "fstp dword [esp + 0x60]",
        "mov ecx,dword [esp + 0x60]",
        "fld dword [esp + 0x14]",
        "mov dword [esp + 0xf8],ecx",
        "fstp dword [esp + 0x64]",
        "mov edx,dword [esp + 0x64]",
        "fxch st4",
        "mov dword [esp + 0xfc],edx",
        "fst dword [esp + 0x5c]",
        "fxch st3",
        "fst dword [esp + 0x84]",
        "fxch st5",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x100],eax",
        "fstp dword [esp + 0x18]",
        "mov ecx,dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0x104],ecx",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fxch st3",
        "mov dword [esp + 0x108],edx",
        "fst dword [esp + 0x5c]",
        "fxch st5",
        "fst dword [esp + 0x84]",
        "fxch st3",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x10c],eax",
        "fstp dword [esp + 0x18]",
        "mov ecx,dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0x110],ecx",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fxch st5",
        "mov dword [esp + 0x114],edx",
        "fstp dword [esp + 0x5c]",
        "fxch st2",
        "fstp dword [esp + 0x84]",
        "fxch st3",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x118],eax",
        "mov eax,dword [ebx + 0x88]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov ecx,dword [esp + 0x18]",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fld dword [ebx + 0x64]",
        "mov dword [esp + 0x11c],ecx",
        "mov ecx,dword [ebx + 0x8c]",
        "fstp dword [esp + 0x70]",
        "fld dword [ebx + 0x70]",
        "mov dword [esp + 0x120],edx",
        "mov edx,dword [ebx + 0x90]",
        "fstp dword [esp + 0x74]",
        "fld dword [ebx + 0x7c]",
        "mov dword [esp + 0x94],eax",
        "mov dword [esp + 0x98],ecx",
        "mov dword [esp + 0x9c],edx",
        "fstp dword [esp + 0x78]",
        "mov dword [esp + 0xac],eax",
        "fld dword [ebx + 0x68]",
        "mov dword [esp + 0xb0],ecx",
        "fstp dword [esp + 0x2c]",
        "mov dword [esp + 0xb4],edx",
        "fld dword [ebx + 0x74]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebx + 0x80]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebx + 0xf4]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fstp dword [esp + 0x48]",
        "fmul dword [esp + 0x34]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebx + 0x6c]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebx + 0x78]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebx + 0x84]",
        "fstp dword [esp + 0x64]",
        "fld dword [ebx + 0xf0]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0x5c]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fmul dword [esp + 0x64]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebx + 0x100]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x5c]",
        "fld dword [esp + 0x2c]",
        "fmul st2",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x84]",
        "fld dword [esp + 0x10]",
        "fmul st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x40]",
        "fmul st4",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x48]",
        "fmul st4",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x44]",
        "fmul st4",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x70]",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x74]",
        "fmul st4",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x94]",
        "fld dword [esp + 0x14]",
        "fadd st0,st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x98]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x9c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xb8],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xbc],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "fxch st4",
        "mov dword [esp + 0xc0],edx",
        "fst dword [esp + 0x5c]",
        "fxch st3",
        "fst dword [esp + 0x84]",
        "fxch st5",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xc4],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xc8],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "fxch st3",
        "mov dword [esp + 0xcc],edx",
        "fst dword [esp + 0x5c]",
        "fxch st5",
        "fst dword [esp + 0x84]",
        "fxch st3",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xd0],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xd4],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "mov dword [esp + 0xd8],edx",
        "fxch st5",
        "push 0x0",
        "fstp dword [esp + 0x60]",
        "fxch st2",
        "fstp dword [esp + 0x88]",
        "fxch st3",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x2c]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x5c]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x90]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x30]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x60]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x88]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x80]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fstp dword [esp + 0x74]",
        "mov eax,dword [esp + 0x74]",
        "fld dword [esp + 0x30]",
        "mov dword [esp + 0xe0],eax",
        "fstp dword [esp + 0x78]",
        "mov ecx,dword [esp + 0x78]",
        "fld dword [esp + 0x14]",
        "mov dword [esp + 0xe4],ecx",
        "fstp dword [esp + 0x7c]",
        "mov edx,dword [esp + 0x7c]",
        "lea ecx,[esp + 0x190]",
        "mov dword [esp + 0xe8],edx",
        "call 0x0070df30",
        "mov dword [esp + 0x198],esi",
        "add esi,0xec",
        "push esi",
        "lea ecx,[esp + 0x190]",
        "mov dword [esp + 0x314],0x0",
        "call 0x0070e040",
        "mov ecx,0x19",
        "lea esi,[esp + 0x1b8]",
        "lea edi,[esp + 0x21c]",
        "rep movsd",
        "mov dword [esp + 0x198],ebx",
        "add ebx,0xec",
        "push ebx",
        "lea ecx,[esp + 0x190]",
        "call 0x0070e040",
        "mov ecx,0x19",
        "lea esi,[esp + 0x1b8]",
        "lea edi,[esp + 0x284]",
        "rep movsd",
        "lea esi,[esp + 0x124]",
        "mov edi,0x5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x10, #100]",
        "ldr w4, [x10, #136]",
        "str s2, [x8, #92]",
        "ldr w7, [x10, #140]",
        "ldr s2, [x10, #112]",
        "ldr w5, [x10, #144]",
        "str s2, [x8, #96]",
        "mov w20, #0x3f",
        "str w20, [x8, #740]",
        "ldr s2, [x10, #124]",
        "str w4, [x8, #148]",
        "str s2, [x8, #100]",
        "str w7, [x8, #152]",
        "ldr s2, [x10, #104]",
        "str w5, [x8, #156]",
        "str s2, [x8, #20]",
        "str w4, [x8, #232]",
        "ldr s2, [x10, #116]",
        "str w7, [x8, #236]",
        "str s2, [x8, #24]",
        "str w5, [x8, #240]",
        "ldr s2, [x10, #128]",
        "str s2, [x8, #28]",
        "ldr s2, [x10, #244]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #140]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #124]",
        "ldr s3, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #132]",
        "ldr s2, [x10, #108]",
        "str s2, [x8, #20]",
        "ldr s2, [x10, #120]",
        "str s2, [x8, #24]",
        "ldr s2, [x10, #132]",
        "str s2, [x8, #28]",
        "ldr s2, [x10, #240]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #84]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #80]",
        "ldr s3, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #88]",
        "ldr s2, [x10, #256]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #40]",
        "ldr s3, [x8, #40]",
        "str s3, [x8, #16]",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #40]",
        "ldr s4, [x8, #40]",
        "str s4, [x8, #44]",
        "ldr s5, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #40]",
        "ldr s5, [x8, #40]",
        "str s5, [x8, #20]",
        "ldr s6, [x8, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #76]",
        "ldr s6, [x8, #76]",
        "str s6, [x8, #68]",
        "ldr s6, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #140]",
        "ldr s6, [x8, #140]",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #88]",
        "ldr s6, [x8, #88]",
        "str s6, [x8, #64]",
        "ldr s6, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #80]",
        "ldr s6, [x8, #80]",
        "str s6, [x8, #132]",
        "ldr s6, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #84]",
        "ldr s6, [x8, #84]",
        "str s6, [x8, #124]",
        "ldr s6, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s6, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #132]",
        "ldr s6, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #124]",
        "ldr s7, [x8, #156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #16]",
        "str s8, [x8, #92]",
        "ldr w4, [x8, #92]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #244]",
        "str s8, [x8, #96]",
        "ldr w7, [x8, #96]",
        "ldr s8, [x8, #20]",
        "str w7, [x8, #248]",
        "str s8, [x8, #100]",
        "ldr w5, [x8, #100]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #252]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #256]",
        "str s8, [x8, #24]",
        "ldr w7, [x8, #24]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #260]",
        "str s8, [x8, #28]",
        "ldr w5, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #264]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #268]",
        "str s8, [x8, #24]",
        "ldr w7, [x8, #24]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #272]",
        "str s8, [x8, #28]",
        "ldr w5, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #276]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s3, [x8, #76]",
        "str s3, [x8, #64]",
        "ldr s3, [x8, #140]",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #88]",
        "str s3, [x8, #68]",
        "ldr s3, [x8, #80]",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #84]",
        "str s3, [x8, #44]",
        "ldr s3, [x8, #40]",
        "str s3, [x8, #16]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "str s2, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s2, [x8, #44]",
        "str w4, [x8, #280]",
        "ldr w4, [x6, #136]",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #16]",
        "ldr w7, [x8, #24]",
        "str s2, [x8, #28]",
        "ldr w5, [x8, #28]",
        "ldr s2, [x6, #100]",
        "str w7, [x8, #284]",
        "ldr w7, [x6, #140]",
        "str s2, [x8, #112]",
        "ldr s2, [x6, #112]",
        "str w5, [x8, #288]",
        "ldr w5, [x6, #144]",
        "str s2, [x8, #116]",
        "ldr s2, [x6, #124]",
        "str w4, [x8, #148]",
        "str w7, [x8, #152]",
        "str w5, [x8, #156]",
        "str s2, [x8, #120]",
        "str w4, [x8, #172]",
        "ldr s2, [x6, #104]",
        "str w7, [x8, #176]",
        "str s2, [x8, #44]",
        "str w5, [x8, #180]",
        "ldr s2, [x6, #116]",
        "str s2, [x8, #48]",
        "ldr s2, [x6, #128]",
        "str s2, [x8, #52]",
        "ldr s2, [x6, #244]",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #64]",
        "ldr s3, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x6, #108]",
        "str s2, [x8, #92]",
        "ldr s2, [x6, #120]",
        "str s2, [x8, #96]",
        "ldr s2, [x6, #132]",
        "str s2, [x8, #100]",
        "ldr s2, [x6, #240]",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #44]",
        "ldr s3, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "ldr s2, [x6, #256]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #20]",
        "str s3, [x8, #92]",
        "ldr s4, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #20]",
        "ldr s4, [x8, #20]",
        "str s4, [x8, #132]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "ldr s5, [x8, #20]",
        "str s5, [x8, #124]",
        "ldr s6, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #40]",
        "ldr s6, [x8, #40]",
        "str s6, [x8, #64]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #84]",
        "ldr s6, [x8, #84]",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #80]",
        "ldr s6, [x8, #80]",
        "str s6, [x8, #68]",
        "ldr s6, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #88]",
        "ldr s6, [x8, #88]",
        "str s6, [x8, #20]",
        "ldr s6, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #140]",
        "ldr s6, [x8, #140]",
        "str s6, [x8, #44]",
        "ldr s6, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0x0",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #76]",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #148]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s6, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "ldr s6, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #44]",
        "ldr s7, [x8, #156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #184]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #188]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #192]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #196]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #200]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #204]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #208]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #212]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "str w5, [x8, #216]",
        "strb wzr, [x28, #1049]",
        "str w20, [x8, #-4]!",
        "str s3, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #136]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #128]",
        "ldr s3, [x8, #44]",
        "str s3, [x8, #68]",
        "ldr s3, [x8, #88]",
        "str s3, [x8, #76]",
        "ldr s3, [x8, #84]",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #92]",
        "str s3, [x8, #24]",
        "ldr s3, [x8, #144]",
        "str s3, [x8, #48]",
        "ldr s3, [x8, #80]",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "str s2, [x8, #116]",
        "ldr w4, [x8, #116]",
        "ldr s2, [x8, #48]",
        "str w4, [x8, #224]",
        "str s2, [x8, #120]",
        "ldr w7, [x8, #120]",
        "ldr s2, [x8, #20]",
        "str w7, [x8, #228]",
        "str s2, [x8, #124]",
        "ldr w5, [x8, #124]",
        "add w7, w8, #0x190 (400)",
        "str w5, [x8, #232]",
        "mov w20, #0xa3f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xfefe",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block4": {
      "x86InstructionCount": 2050,
      "ExpectedInstructionCount": 36,
      "x86Insts": [
        "fldz",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x37",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33c14",
        "push 0x52424157",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x38",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33c04",
        "push 0x41574157",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x2b",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bfc",
        "push 0x444c4853",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bf0",
        "push 0x48534946",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bdc",
        "push 0x4853494c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bcc",
        "push 0x48535246",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x52485446",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x1000073",
        "push 0xb",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bc4",
        "push 0x4e445242",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000076",
        "push 0xb",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bbc",
        "push 0x52485446",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0xe0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa32700",
        "push 0x4b434f4c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0xc0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bb4",
        "push 0x4e45504f",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x48534946",
        "push 0x49465352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x3d",
        "push 0x21000475",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ba8",
        "push 0x47444946",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4853494c",
        "push 0x48535352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x44",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b98",
        "push 0x47444853",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x48535246",
        "push 0x52465352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x3e",
        "push 0x1000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b88",
        "push 0x47445246",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x54414241",
        "push 0x54414f46",
        "push 0x54414552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100075",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b74",
        "push 0x54414744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b64",
        "push 0x45484744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534241",
        "push 0x50534f46",
        "push 0x50534552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000075",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b50",
        "push 0x50534744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x41464241",
        "push 0x41464f46",
        "push 0x41464552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000075",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b40",
        "push 0x41464744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x54414241",
        "push 0x54414f46",
        "push 0x54414552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100077",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b30",
        "push 0x54415244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x4b534241",
        "push 0x4b534f46",
        "push 0x4c505344",
        "push 0x3",
        "push 0x40",
        "push 0x80077",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b24",
        "push 0x4b535244",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b14",
        "push 0x45485244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534241",
        "push 0x50534f46",
        "push 0x50534552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b00",
        "push 0x50535244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x41464241",
        "push 0x41464f46",
        "push 0x41464552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33af0",
        "push 0x41465244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x4c505344",
        "push 0x49465352",
        "push 0x48534946",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33adc",
        "push 0x49464b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x52465352",
        "push 0x48535246",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ac8",
        "push 0x52464b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x48535352",
        "push 0x4853494c",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ab4",
        "push 0x48534b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x414d5352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x40",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33aa0",
        "push 0x414d4b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x49445352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x3f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a8c",
        "push 0x49444b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x4f505352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x43",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a78",
        "push 0x4f504b57",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x574e5352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x41",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a5c",
        "push 0x574e4b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x48535246",
        "push 0x4853494c",
        "push 0x48534946",
        "push 0x444c4853",
        "push 0x4c505344",
        "push 0x5",
        "push 0x40",
        "push 0x75",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a48",
        "push 0x52414944",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x48535246",
        "push 0x4853494c",
        "push 0x48534946",
        "push 0x444c4853",
        "push 0x4c505344",
        "push 0x5",
        "push 0x40",
        "push 0x75",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a30",
        "push 0x45574944",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x0",
        "push 0x3f",
        "push 0x10000092",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a24",
        "push 0x504d4156",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x14",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a18",
        "push 0x47445553",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000112",
        "push 0x39",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a08",
        "push 0x414d5453",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x4f505543",
        "push 0x1",
        "push 0x43",
        "push 0x800000",
        "fldz",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339fc",
        "push 0x4e534f50",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x49445543",
        "push 0x1",
        "push 0x3f",
        "push 0x800000",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339ec",
        "push 0x45534944",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339cc",
        "push 0x594d5544",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x2f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339bc",
        "push 0x49564e49",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x2e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339b0",
        "push 0x4c4d4843",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x41505543",
        "push 0x2",
        "push 0x42",
        "push 0x1000173",
        "push 0x30",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339a4",
        "push 0x41524150",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000173",
        "push 0x31",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa3399c",
        "push 0x434e4c53",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000062",
        "push 0x6",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33994",
        "push 0x4d524843",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x594c4152",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000066",
        "push 0x22",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33988",
        "push 0x4f4d4544",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4f4d4544",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x1000062",
        "push 0x22",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33980",
        "push 0x594c4152",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4d4c4143",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000062",
        "push 0x21",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33978",
        "push 0x5a4e5246",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x5a4e5246",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000066",
        "push 0x21",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33970",
        "push 0x4d4c4143",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x0",
        "push -0x1",
        "push 0x1000112",
        "push 0x29",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33964",
        "push 0x4559454e",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x80000072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa3395c",
        "push 0x5448474c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x81000072",
        "push 0x46",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33950",
        "push 0x4b524144",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push 0x40",
        "push 0xf0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33948",
        "push 0x4c505344",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x163",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa3393c",
        "push 0x50525453",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x81000242",
        "push 0x3c",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33930",
        "push 0x454c4554",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x81000012",
        "push 0x3a",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33924",
        "push 0x54435444",
        "call 0x00417220",
        "add esp,0x20",
        "fldz",
        "push 0x0",
        "push 0x40",
        "push 0x1000072",
        "push 0x34",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33910",
        "push 0x53424153",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x35",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33908",
        "push 0x434c4652",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100001a",
        "push 0x3b",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa338f8",
        "push 0x47444552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100070",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338e4",
        "push 0x54414552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338d4",
        "push 0x45484552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338bc",
        "push 0x50534552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338ac",
        "push 0x41464552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33898",
        "push 0x54414f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x80072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33888",
        "push 0x4b534f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33878",
        "push 0x45484f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33860",
        "push 0x50534f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33850",
        "push 0x41464f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4b535244",
        "push 0x4b534241",
        "push 0x4c505344",
        "push 0x3",
        "push 0x40",
        "push 0x80027",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33840",
        "push 0x4b534241",
        "call 0x00417220",
        "add esp,0x2c",
        "push 0x54414744",
        "push 0x54415244",
        "push 0x54414241",
        "fldz",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100027",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3382c",
        "push 0x54414241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x594d5544",
        "push 0x45484744",
        "push 0x45485244",
        "push 0x45484241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3381c",
        "push 0x45484241",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x41464744",
        "push 0x41465244",
        "push 0x41464241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3380c",
        "push 0x41464241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534744",
        "push 0x50535244",
        "push 0x50534241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337f8",
        "push 0x50534241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337ec",
        "push 0x49465352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337dc",
        "push 0x52465352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337cc",
        "push 0x48535352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x40",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337bc",
        "push 0x414d5352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337ac",
        "push 0x49445352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x43",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3379c",
        "push 0x4f505352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x42",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33788",
        "push 0x41505352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x41",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33770",
        "push 0x574e5352",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100017a",
        "push 0x47",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3375c",
        "push 0x44575352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3374c",
        "push 0x49445543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33740",
        "push 0x4f505543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33730",
        "push 0x41505543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000012",
        "push 0x28",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33714",
        "push 0x4d4d4f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33704",
        "push 0x4f48475a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336f8",
        "push 0x43494c5a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "fldz",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336e8",
        "push 0x454b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336d0",
        "push 0x414b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336b4",
        "push 0x434b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3369c",
        "push 0x484b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3368c",
        "push 0x4152575a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33678",
        "push 0x4c52575a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33668",
        "push 0x4d4f5a5a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33650",
        "push 0x5a44485a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33638",
        "push 0x4149465a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33620",
        "push 0x4152465a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33608",
        "push 0x4154535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335f8",
        "push 0x4541445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335e8",
        "push 0x4552445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335d4",
        "push 0x4c52445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335c4",
        "push 0x4143535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335b0",
        "push 0x414c435a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33598",
        "push 0x4450535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33588",
        "push 0x5649585a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33578",
        "push 0x3130305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33568",
        "push 0x3230305a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33558",
        "push 0x3330305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33548",
        "push 0x3430305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33538",
        "push 0x3530305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33528",
        "push 0x3630305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33518",
        "push 0x3730305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33508",
        "push 0x3830305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334f8",
        "push 0x3930305a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "fldz",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334e8",
        "push 0x3031305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334d8",
        "push 0x3131305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334c8",
        "push 0x3231305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334b8",
        "push 0x3331305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334a8",
        "push 0x3431305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33498",
        "push 0x3531305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33488",
        "push 0x3631305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33478",
        "push 0x3731305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33468",
        "push 0x3831305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33458",
        "push 0x3931305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33448",
        "push 0x3032305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x40000062",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33434",
        "push 0x55484f43",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x40000062",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33420",
        "push 0x52434f43",
        "call 0x00417220",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33414",
        "push 0x58415742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33408",
        "push 0x4f425742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333f8",
        "push 0x41445742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333ec",
        "push 0x414d5742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333e0",
        "push 0x57535742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333d4",
        "push 0x4f424142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333c4",
        "push 0x55434142",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "fldz",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333b4",
        "push 0x41474142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333a4",
        "push 0x52474142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33394",
        "push 0x45484142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33384",
        "push 0x48534142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3336c",
        "push 0x31304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33354",
        "push 0x32304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3333c",
        "push 0x33304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33324",
        "push 0x34304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3330c",
        "push 0x35304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332f4",
        "push 0x36304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332dc",
        "push 0x37304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332c4",
        "push 0x38304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332ac",
        "push 0x39304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33294",
        "push 0x30314142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3327c",
        "push 0x31305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33264",
        "push 0x32305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3324c",
        "push 0x33305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33234",
        "push 0x34305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3321c",
        "push 0x35305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33204",
        "push 0x36305742",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331ec",
        "push 0x37305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331d4",
        "push 0x38305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331bc",
        "push 0x39305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331a4",
        "push 0x30315742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x594c4152",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x40000063",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33198",
        "push 0x4e525554",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x170",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x6",
        "push 0xa33188",
        "push 0x46464553",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3316c",
        "push 0x4854594d",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33154",
        "push 0x4c48594d",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10000360",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33148",
        "push 0x4e414552",
        "call 0x00417220",
        "add esp,0x20"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov w20, #0x0",
        "str w20, [x8, #-4]!",
        "mov w21, #0xffffffff",
        "str w21, [x8, #-4]!",
        "mov w21, #0x172",
        "movk w21, #0x100, lsl #16",
        "str w21, [x8, #-4]!",
        "mov w21, #0x37",
        "stp w7, w21, [x8, #-8]!",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8]",
        "str w20, [x8, #-4]!",
        "mov w20, #0x3c14",
        "movk w20, #0xa3, lsl #16",
        "str w20, [x8, #-4]!",
        "mov w20, #0x4157",
        "movk w20, #0x5242, lsl #16",
        "str w20, [x8, #-4]!",
        "mov w20, #0x22",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 368,
      "ExpectedInstructionCount": 128,
      "x86Insts": [
        "mov ebx,dword [eax + 0x68]",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x68]",
        "sub esp,0x14",
        "fstp dword [esp + 0x10]",
        "movzx ecx,al",
        "fld1",
        "mov dword [esp + 0x38],ecx",
        "fstp dword [esp + 0xc]",
        "movzx edx,bl",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "mov dword [esp + 0x40],eax",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],edx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx eax,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],eax",
        "fld1",
        "movzx ecx,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],ecx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx edx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],edx",
        "fldz",
        "shr ebx,0x10",
        "movzx eax,bl",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],eax",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "fst dword [esp + 0x30]",
        "mov ecx,dword [esp + 0x24]",
        "mov edx,dword [esp + 0x28]",
        "mov eax,dword [esp + 0x2c]",
        "mov dword [0x00b45e14],ecx",
        "mov ecx,dword [esp + 0x30]",
        "mov [0x00b45e1c],eax",
        "mov dword [0x00b45e20],ecx",
        "mov dword [0x00b45e18],edx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x6c]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x40],eax",
        "fstp dword [esp + 0xc]",
        "movzx eax,al",
        "fldz",
        "mov dword [esp + 0x38],eax",
        "fstp dword [esp + 0x8]",
        "mov edx,dword [esi + 0x20]",
        "fild dword [esp + 0x38]",
        "mov ebx,dword [edx + 0x6c]",
        "fld qword [0x00a3ddd8]",
        "movzx ecx,bl",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],ecx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx edx,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],edx",
        "fld1",
        "movzx eax,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],eax",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx ecx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],ecx",
        "fldz",
        "shr ebx,0x10",
        "movzx edx,bl",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],edx",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "mov eax,dword [esp + 0x24]",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "mov ecx,dword [esp + 0x28]",
        "mov edx,dword [esp + 0x2c]",
        "fst dword [esp + 0x30]",
        "mov [0x00b45e24],eax",
        "mov eax,dword [esp + 0x30]",
        "mov dword [0x00b45e2c],edx",
        "mov [0x00b45e30],eax",
        "mov dword [0x00b45e28],ecx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x70]",
        "fstp dword [esp + 0x10]",
        "movzx edx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x38],edx",
        "fldz",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0x8]",
        "mov ebx,dword [ecx + 0x70]",
        "fild dword [esp + 0x38]",
        "mov dword [esp + 0x40],eax",
        "fld qword [0x00a3ddd8]",
        "movzx eax,bl",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],eax",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx ecx,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],ecx",
        "fld1",
        "movzx edx,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],edx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx eax,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],eax",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "shr ebx,0x10",
        "movzx ecx,bl",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],ecx",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "mov edx,dword [esp + 0x24]",
        "mov eax,dword [esp + 0x28]",
        "fst dword [esp + 0x30]",
        "mov ecx,dword [esp + 0x2c]",
        "mov dword [0x00b45e34],edx",
        "mov edx,dword [esp + 0x30]",
        "mov [0x00b45e38],eax",
        "mov dword [0x00b45e3c],ecx",
        "mov dword [0x00b45e40],edx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [esi + 0x24]",
        "fstp dword [esp + 0x10]",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x4c]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e44]",
        "fld dword [esi + 0x2c]",
        "mov edx,dword [esi + 0x24]",
        "mov eax,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [edx + 0x50]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x50]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e48]",
        "fld dword [esi + 0x2c]",
        "mov ecx,dword [esi + 0x24]",
        "add esp,0x8",
        "fstp dword [esp + 0x8]",
        "fld1",
        "fstp dword [esp + 0x4]",
        "fldz",
        "fstp dword [esp]",
        "call 0x004ed660",
        "push ecx",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp]",
        "call 0x004ed660",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e4c]",
        "mov ecx,dword [esi + 0x24]",
        "fld dword [esi + 0x2c]",
        "add esp,0x8",
        "fstp dword [esp + 0x8]",
        "fld1",
        "fstp dword [esp + 0x4]",
        "fldz",
        "fstp dword [esp]",
        "call 0x004ed680",
        "push ecx",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp]",
        "call 0x004ed680",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e50]",
        "fld dword [esi + 0x2c]",
        "mov ecx,dword [esi + 0x24]",
        "mov edx,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x58]",
        "fstp dword [esp + 0x4]",
        "fld dword [edx + 0x58]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e54]",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [esi + 0x24]",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x5c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x5c]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e58]",
        "fld dword [esi + 0x2c]",
        "mov edx,dword [esi + 0x24]",
        "mov eax,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [edx + 0x54]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x54]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "add esp,0x14"
      ],
      "ExpectedArm64ASM": [
        "ldr w6, [x4, #104]",
        "ldr s2, [x10, #44]",
        "ldr w4, [x9, #104]",
        "mvn w27, w8",
        "subs w26, w8, #0x14 (20)",
        "mov x8, x26",
        "str s2, [x8, #16]",
        "uxtb w7, w4",
        "ldr q2, [x28, #3328]",
        "str w7, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "uxtb w5, w6",
        "movi v2.2d, #0x0",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr w20, [x8, #56]",
        "sxtw x20, w20",
        "mrs x21, nzcv",
        "mov w22, #0x0",
        "cmp x20, #0x0 (0)",
        "mov w23, #0x8000",
        "csel x12, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov w13, #0x3f",
        "mov x0, #0x3f",
        "clz x14, x20",
        "sub x14, x0, x14",
        "sub x14, x13, x14",
        "lsl x15, x20, x14",
        "mov w16, #0x403e",
        "sub x14, x16, x14",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x14, eq",
        "orr x20, x12, x20",
        "fmov d2, x15",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "str w4, [x8, #64]",
        "mov w20, #0xddd8",
        "movk w20, #0xa3, lsl #16",
        "ldr d3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #56]",
        "str s2, [x8, #4]",
        "str w5, [x8, #56]",
        "ldr w20, [x8, #56]",
        "sxtw x20, w20",
        "mrs x21, nzcv",
        "cmp x20, #0x0 (0)",
        "csel x23, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov x0, #0x3f",
        "clz x12, x20",
        "sub x12, x0, x12",
        "sub x12, x13, x12",
        "lsl x13, x20, x12",
        "sub x12, x16, x12",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x12, eq",
        "orr x20, x23, x20",
        "fmov d2, x13",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #56]",
        "str s2, [x8]",
        "mov w20, #0x5e",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xc0c0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 315,
      "ExpectedInstructionCount": 32,
      "x86Insts": [
        "mov eax,dword [esp + 0x110]",
        "fldz",
        "mov ecx,dword [eax]",
        "mov edx,dword [esp + 0x5c]",
        "mov ebx,dword [edx + 0x18]",
        "mov esi,dword [esp + 0x58]",
        "mov dword [ebx + 0xc],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [ebx + 0x10],edx",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebx + 0x14],eax",
        "mov ecx,dword [esi + 0x50]",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784370",
        "mov ecx,dword [esi + 0x50]",
        "fstp dword [esp + 0x54]",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784370",
        "fstp dword [esp + 0x64]",
        "mov eax,dword [esp + 0x11c]",
        "lea ebp,[ebx + 0x1c]",
        "mov esi,eax",
        "mov ecx,0x9",
        "mov edi,ebp",
        "rep movsd",
        "fld dword [eax + 0x4]",
        "mov ecx,dword [esp + 0x120]",
        "sub esp,0xc",
        "fmul dword [ecx + 0x4]",
        "fld dword [ecx]",
        "fmul dword [eax]",
        "faddp",
        "fld dword [eax + 0x8]",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + 0xc]",
        "fmul dword [ecx]",
        "fld dword [eax + 0x10]",
        "fmul dword [ecx + 0x4]",
        "faddp",
        "fld dword [eax + 0x14]",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x44]",
        "fld dword [eax + 0x18]",
        "fmul dword [ecx]",
        "fld dword [eax + 0x1c]",
        "fmul dword [ecx + 0x4]",
        "faddp",
        "fld dword [eax + 0x20]",
        "mov eax,esp",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x48]",
        "mov ecx,dword [esp + 0x48]",
        "fld dword [esp + 0x44]",
        "mov dword [eax],ecx",
        "fstp dword [esp + 0x4c]",
        "mov edx,dword [esp + 0x4c]",
        "fld dword [esp + 0x34]",
        "mov dword [eax + 0x4],edx",
        "fstp dword [esp + 0x50]",
        "mov ecx,dword [esp + 0x50]",
        "fld dword [esp + 0x3c]",
        "mov dword [eax + 0x8],ecx",
        "push ecx",
        "mov ecx,ebp",
        "fstp dword [esp]",
        "call 0x0078f050",
        "fld dword [esp + 0x54]",
        "sub esp,0x8",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x24]",
        "mov ecx,ebp",
        "fstp dword [esp]",
        "call 0x0078ef60",
        "fld dword [ebp + 0xc]",
        "mov esi,dword [esp + 0x58]",
        "fld dword [0x00b2b71c]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebp]",
        "fld dword [0x00b2b718]",
        "fld st0",
        "fmulp st2",
        "fxch st3",
        "faddp",
        "fld dword [ebp + 0x18]",
        "fld dword [0x00b2b720]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp + 0x10]",
        "fmul st2",
        "fld dword [ebp + 0x4]",
        "fmul st4",
        "faddp",
        "fld dword [ebp + 0x1c]",
        "fmul st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + 0x14]",
        "fmulp st2",
        "fld dword [ebp + 0x8]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul dword [ebp + 0x20]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x1c]",
        "fstp dword [esp + 0x3c]",
        "mov edx,dword [esp + 0x3c]",
        "fld dword [esp + 0x38]",
        "mov dword [ebx],edx",
        "fstp dword [esp + 0x40]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esp + 0x28]",
        "mov dword [ebx + 0x4],eax",
        "fstp dword [esp + 0x44]",
        "mov ecx,dword [esp + 0x44]",
        "fldz",
        "mov dword [ebx + 0x8],ecx",
        "mov ecx,dword [esi + 0x68]",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784210",
        "fmul dword [esp + 0x6c]",
        "fstp dword [ebx + 0x18]",
        "mov ecx,dword [esi + 0x5c]",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784210",
        "fmul dword [esp + 0x80]",
        "push 0xb2b724",
        "mov ecx,ebx",
        "fstp dword [esp + 0x54]",
        "call 0x0078fcc0",
        "fmul qword [0x00a8ba48]",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x34]",
        "fsubr qword [0x00a65a18]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "fabs",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "fmul qword [0x00a8c698]",
        "fld1",
        "fsubrp",
        "fstp dword [esp + 0x38]",
        "fld dword [0x00b2b72c]",
        "fld st0",
        "fmul dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [0x00b2b728]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "fsubrp",
        "fstp dword [esp + 0x24]",
        "fld dword [ebx + 0x8]",
        "fld dword [0x00b2b724]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebx]",
        "fmulp st4",
        "fxch",
        "fsubrp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x30]",
        "fmul dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fmulp st2",
        "fsubrp",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fld dword [esp + 0x24]",
        "fld dword [esp + 0x4c]",
        "fld st1",
        "fmulp st2",
        "fld st2",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul st0",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "call 0x00982c30",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "mov ecx,dword [esi + 0x70]",
        "fld1",
        "push ecx",
        "fdivrp",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x20]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fstp dword [esp + 0x44]",
        "fmul dword [esp + 0x50]",
        "fstp dword [esp + 0x48]",
        "fldz",
        "fstp dword [esp]",
        "call 0x00784210",
        "fsub qword [0x00a2faa0]",
        "mov edx,dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x40]",
        "sub esp,0xc",
        "fadd st0,st0",
        "mov eax,esp",
        "mov dword [eax],edx",
        "fmul qword [0x00a3d360]",
        "fstp dword [esp + 0x28]",
        "fld1",
        "fst dword [esp + 0xb8]",
        "fldz",
        "fst dword [esp + 0xbc]",
        "fst dword [esp + 0xc0]",
        "fst dword [esp + 0xc4]",
        "fst dword [esp + 0xcc]",
        "fst dword [esp + 0xd0]",
        "fstp dword [esp + 0xd4]",
        "fst dword [esp + 0xc8]",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x28]",
        "mov edx,dword [esp + 0x50]",
        "fmul dword [esp + 0x90]",
        "mov dword [eax + 0x4],ecx",
        "push ecx",
        "mov dword [eax + 0x8],edx",
        "fmul dword [esp + 0x44]",
        "lea ecx,[esp + 0xbc]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp dword [esp]",
        "call 0x0078f160",
        "lea eax,[esp + 0xac]",
        "push eax",
        "lea ecx,[esp + 0xd4]",
        "push ecx",
        "mov ecx,ebp",
        "call 0x0078edd0",
        "cmp dword [esp + 0x124],0x0",
        "mov esi,eax",
        "mov ecx,0x9",
        "mov edi,ebp",
        "rep movsd",
        "fld dword [ebp + 0xc]",
        "fld dword [0x00b2b71c]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebp]",
        "fld dword [0x00b2b718]",
        "fld st0",
        "fmulp st2",
        "fxch st3",
        "faddp",
        "fld dword [ebp + 0x18]",
        "fld dword [0x00b2b720]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp + 0x10]",
        "fmul st2",
        "fld dword [ebp + 0x4]",
        "fmul st4",
        "faddp",
        "fld dword [ebp + 0x1c]",
        "fmul st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + 0x14]",
        "fmulp st2",
        "fld dword [ebp + 0x8]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul dword [ebp + 0x20]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x1c]",
        "fstp dword [esp + 0x3c]",
        "mov edx,dword [esp + 0x3c]",
        "fld dword [esp + 0x38]",
        "mov dword [ebx],edx",
        "fstp dword [esp + 0x40]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esp + 0x28]",
        "mov dword [ebx + 0x4],eax",
        "mov eax,dword [esp + 0x10c]",
        "fstp dword [esp + 0x44]",
        "mov ecx,dword [esp + 0x44]",
        "mov dword [ebx + 0x8],ecx"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x8, #272]",
        "movi v2.2d, #0x0",
        "ldr w7, [x4]",
        "ldr w5, [x8, #92]",
        "ldr w6, [x5, #24]",
        "ldr w10, [x8, #88]",
        "str w7, [x6, #12]",
        "ldr w5, [x4, #4]",
        "str w5, [x6, #16]",
        "ldr w4, [x4, #8]",
        "str w4, [x6, #20]",
        "ldr w7, [x10, #80]",
        "str w7, [x8, #-4]!",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8]",
        "mov w20, #0x31",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 214,
      "ExpectedInstructionCount": 1743,
      "x86Insts": [
        "fld dword [ecx + 0xc]",
        "fld dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [ecx + 0x18]",
        "fld dword [ecx]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [ecx + 0xc]",
        "fld dword [ecx + -0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [ecx]",
        "fld dword [ecx + -0x18]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [ecx + -0xc]",
        "fld dword [ecx + -0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [ecx + -0x18]",
        "fld dword [ecx]",
        "fld dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [ecx]",
        "fld dword [ecx + -0xc]",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0xc]",
        "fld st0",
        "fmul st6",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fadd st0,st4",
        "fstp dword [esp + 0x8]",
        "fsubp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x38]",
        "fsubp",
        "fstp dword [esp + 0x40]",
        "fxch",
        "fmul st4",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x18]",
        "fld st0",
        "fmul st4",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fsubp",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x4c]",
        "fsubp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fmul qword [0x00a77be0]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x48]",
        "fmul qword [0x00a77bd8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x4c]",
        "fmul qword [0x00a77bd0]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x38]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x4c]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x3c]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x48]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x3c]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x40]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x44]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fmul qword [0x00a77bc8]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x3c]",
        "fmul qword [0x00a77bc0]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x40]",
        "fmul qword [0x00a77bb8]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x44]",
        "fmul qword [0x00a77bb0]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x48]",
        "fmul qword [0x00a77ba8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x4c]",
        "fmul qword [0x00a77ba0]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x38]",
        "fchs",
        "fld st0",
        "fmul st2",
        "fstp dword [esp + 0x58]",
        "fld qword [0x00a77b98]",
        "fmul st1",
        "fxch",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x3c]",
        "fchs",
        "fld st0",
        "fld qword [0x00a77b90]",
        "fmul st1",
        "fxch",
        "fstp dword [esp + 0x54]",
        "fxch",
        "fmul qword [0x00a77b88]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld qword [0x00a77b80]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fmul qword [0x00a77b78]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x48]",
        "fld qword [0x00a77b88]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x40]",
        "fxch st2",
        "fchs",
        "fmul st3",
        "add eax,0x18",
        "add ecx,0x4",
        "sub edx,0x1",
        "fstp dword [esp + 0x44]",
        "fxch",
        "fchs",
        "fmulp",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fchs",
        "fmul qword [0x00a77b80]",
        "fstp dword [esp + 0x4c]",
        "fmul qword [0x00a77b78]",
        "fstp dword [esp + 0x38]",
        "fld dword [eax + -0x1c]",
        "fadd dword [esp + 0x38]",
        "fstp dword [eax + -0x1c]",
        "fld dword [eax + -0x18]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [eax + -0x18]",
        "fld dword [esp + 0x40]",
        "fadd dword [eax + -0x14]",
        "fstp dword [eax + -0x14]",
        "fld dword [eax + -0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [eax + -0x10]",
        "fld dword [eax + -0xc]",
        "fadd dword [esp + 0x48]",
        "fstp dword [eax + -0xc]",
        "fld dword [eax + -0x8]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [eax + -0x8]",
        "fld dword [esp + 0x50]",
        "fadd dword [eax + -0x4]",
        "fstp dword [eax + -0x4]",
        "fld dword [eax]",
        "fadd dword [esp + 0x54]",
        "fstp dword [eax]",
        "fld dword [eax + 0x4]",
        "fadd dword [esp + 0x58]",
        "fstp dword [eax + 0x4]",
        "fld dword [eax + 0x8]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [eax + 0x8]",
        "fld dword [esp + 0x60]",
        "fadd dword [eax + 0xc]",
        "fstp dword [eax + 0xc]",
        "fld dword [eax + 0x10]",
        "fadd dword [esp + 0x64]",
        "fstp dword [eax + 0x10]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x7, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #24]",
        "ldr s3, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7, #12]",
        "ldur s2, [x7, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7]",
        "ldur s3, [x7, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x7, #-12]",
        "ldur s2, [x7, #-36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "stur s3, [x7, #-24]",
        "ldr s3, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s5, [x7, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x7, #24]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str s3, [x7]",
        "ldur s3, [x7, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q6, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #4]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "ldr s3, [x7, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w23, w20, #0x6 (6)",
        "and w23, w23, #0x7",
        "add x23, x28, x23, lsl #4",
        "ldr q7, [x23, #1056]",
        "add x23, x28, x20, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add x13, x28, x22, lsl #4",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "add w22, w22, #0x1 (1)",
        "and w22, w22, #0x7",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #64]",
        "add x22, x28, x22, lsl #4",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #76]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7be0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bd8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bd0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bc8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bc0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bb8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7bb0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7ba8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w14, #0x7ba0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "add x14, x28, x20, lsl #4",
        "ldr q3, [x14, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #88]",
        "mov w14, #0x7b98",
        "movk w14, #0xa7, lsl #16",
        "ldr d4, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d4",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q5, [x28, #3552]",
        "eor v2.16b, v2.16b, v5.16b",
        "mov w14, #0x7b90",
        "movk w14, #0xa7, lsl #16",
        "ldr d5, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d5",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #84]",
        "strb wzr, [x28, #1049]",
        "mov w14, #0x7b88",
        "movk w14, #0xa7, lsl #16",
        "ldr d6, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "eor v2.16b, v2.16b, v6.16b",
        "mov w15, #0x7b80",
        "movk w15, #0xa7, lsl #16",
        "ldr d6, [x15]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #80]",
        "mov w16, #0x7b78",
        "movk w16, #0xa7, lsl #16",
        "ldr d6, [x16]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #100]",
        "ldr s2, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr d6, [x14]",
        "str x30, [sp, #-16]!",
        "fmov d0, d6",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #60]",
        "ldr s6, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #64]",
        "strb wzr, [x28, #1049]",
        "ldr q7, [x28, #3552]",
        "eor v7.16b, v6.16b, v7.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w4, w4, #0x18 (24)",
        "add w7, w7, #0x4 (4)",
        "subs w26, w5, #0x1 (1)",
        "mov x27, x5",
        "mov x5, x26",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #68]",
        "strb wzr, [x28, #1049]",
        "ldr q7, [x28, #3552]",
        "eor v2.16b, v2.16b, v7.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q5, [x28, #3552]",
        "eor v5.16b, v2.16b, v5.16b",
        "ldr d7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #76]",
        "ldr d7, [x16]",
        "str x30, [sp, #-16]!",
        "fmov d0, d7",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #56]",
        "ldur s2, [x4, #-28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-28]",
        "ldur s2, [x4, #-24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-24]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s7, [x4, #-20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-20]",
        "ldur s2, [x4, #-16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-16]",
        "ldur s2, [x4, #-12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-12]",
        "ldur s2, [x4, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-8]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s7, [x4, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x4, #-4]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr s2, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #4]",
        "ldr s2, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #8]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #12]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s7, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #16]",
        "strb w20, [x28, #1051]",
        "str q6, [x23, #1056]",
        "str q4, [x21, #1056]",
        "str q3, [x13, #1056]",
        "str q5, [x22, #1056]",
        "str q2, [x12, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf8f8",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 229,
      "ExpectedInstructionCount": 1903,
      "x86Insts": [
        "movzx eax,word [esi + edx*0x8]",
        "fld dword [esi + edx*0x8 + 0x4]",
        "fstp dword [esp + 0x24]",
        "mov esi,dword [esp + 0x1c8]",
        "fld dword [esp + 0x9c]",
        "movzx eax,ax",
        "mov ecx,eax",
        "imul ecx,dword [esp + 0x1e4]",
        "lea eax,[eax + eax*0x2]",
        "add eax,eax",
        "add eax,eax",
        "lea edi,[eax + esi*0x1 + 0x8]",
        "mov dword [esp + 0x10],edi",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0x98]",
        "fmul dword [eax + esi*0x1]",
        "faddp",
        "fld dword [esp + 0xa0]",
        "fmul dword [edi]",
        "faddp",
        "fadd dword [esp + 0x88]",
        "fstp dword [esp + 0xd0]",
        "fld dword [esp + 0xa8]",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0xa4]",
        "fmul dword [eax + esi*0x1]",
        "faddp",
        "fld dword [esp + 0xac]",
        "fmul dword [edi]",
        "faddp",
        "fadd dword [esp + 0x8c]",
        "fstp dword [esp + 0xd4]",
        "fld dword [esp + 0xb4]",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0xb0]",
        "fmul dword [eax + esi*0x1]",
        "mov esi,edi",
        "faddp",
        "fld dword [esp + 0xb8]",
        "fmul dword [esi]",
        "mov esi,dword [esp + 0x38]",
        "lea edi,[esi + eax*0x1 + 0x8]",
        "mov dword [esp + 0x10],edi",
        "faddp",
        "fadd dword [esp + 0x90]",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x64]",
        "fld st0",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp st2,st0",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0xe8]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x74]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp st2,st0",
        "fld dword [esp + 0x78]",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xec]",
        "fld dword [esp + 0x7c]",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x80]",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp",
        "fstp dword [esp + 0xf0]",
        "fld st2",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld st5",
        "fmul dword [esi + eax*0x1]",
        "faddp",
        "fld st4",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld st2",
        "fmul dword [esi + eax*0x1]",
        "faddp",
        "fld dword [esp + 0x78]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x80]",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld dword [esp + 0x7c]",
        "fmul dword [esi + eax*0x1]",
        "mov esi,edi",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [esi]",
        "mov esi,dword [esp + 0x20]",
        "lea edi,[esi + eax*0x1 + 0x4]",
        "mov dword [esp + 0x10],edi",
        "faddp",
        "lea edi,[esi + eax*0x1 + 0x8]",
        "mov dword [esp + 0xbc],edi",
        "mov edi,dword [esp + 0x10]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + eax*0x1]",
        "fmulp st5",
        "fld dword [edi]",
        "mov edi,dword [esp + 0xbc]",
        "fmulp st3",
        "fxch st4",
        "faddp st2,st0",
        "fld dword [edi]",
        "mov edi,dword [esp + 0x10]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fmul dword [esi + eax*0x1]",
        "fld dword [edi]",
        "mov edi,dword [esp + 0xbc]",
        "fmulp st2",
        "faddp",
        "fld dword [esp + 0x78]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x7c]",
        "fmul dword [esi + eax*0x1]",
        "mov eax,dword [esp + 0x10]",
        "fld dword [esp + 0x80]",
        "fmul dword [eax]",
        "mov eax,dword [esp + 0x1d4]",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0xd0]",
        "fld dword [esp + 0x24]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0xc0]",
        "fld dword [esp + 0xd4]",
        "fmul st1",
        "fstp dword [esp + 0xc4]",
        "fld dword [esp + 0xd8]",
        "fmul st1",
        "fstp dword [esp + 0xc8]",
        "fld dword [esp + 0xc0]",
        "fadd dword [ecx + eax*0x1]",
        "fstp dword [ecx + eax*0x1]",
        "add edx,0x1",
        "cmp edx,dword [esp + 0x1c]",
        "fld dword [esp + 0xc4]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea eax,[ecx + eax*0x1 + 0x8]",
        "fld dword [eax]",
        "fadd dword [esp + 0xc8]",
        "fstp dword [eax]",
        "mov eax,dword [esp + 0x1dc]",
        "fld dword [esp + 0xe8]",
        "fmul st1",
        "fstp dword [esp + 0xdc]",
        "fld dword [esp + 0xec]",
        "fmul st1",
        "fstp dword [esp + 0xe0]",
        "fld dword [esp + 0xf0]",
        "fmul st1",
        "fstp dword [esp + 0xe4]",
        "fld dword [esp + 0xdc]",
        "fadd dword [ecx + ebp*0x1]",
        "fstp dword [ecx + ebp*0x1]",
        "fld dword [esp + 0xe0]",
        "fadd dword [ecx + ebp*0x1 + 0x4]",
        "fstp dword [ecx + ebp*0x1 + 0x4]",
        "fld dword [esp + 0xe4]",
        "fadd dword [ecx + ebp*0x1 + 0x8]",
        "fstp dword [ecx + ebp*0x1 + 0x8]",
        "fld dword [esp + 0x58]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x5c]",
        "fmul st1",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x44]",
        "fld dword [ecx + eax*0x1]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [ecx + eax*0x1]",
        "fld dword [esp + 0x40]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea eax,[ecx + eax*0x1 + 0x8]",
        "fld dword [esp + 0x44]",
        "fadd dword [eax]",
        "fstp dword [eax]",
        "mov eax,dword [esp + 0x1e0]",
        "fld dword [esp + 0x4c]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x50]",
        "fmul st1",
        "fstp dword [esp + 0x30]",
        "fmul dword [esp + 0x54]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + eax*0x1]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [ecx + eax*0x1]",
        "fld dword [esp + 0x30]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea ecx,[ecx + eax*0x1 + 0x8]",
        "fld dword [esp + 0x34]",
        "fadd dword [ecx]",
        "fstp dword [ecx]"
      ],
      "ExpectedArm64ASM": [
        "add w20, w10, w5, lsl #3",
        "ldrh w4, [x20]",
        "add w20, w10, w5, lsl #3",
        "ldr s2, [x20, #4]",
        "str s2, [x8, #36]",
        "ldr w10, [x8, #456]",
        "ldr s2, [x8, #156]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "uxth w4, w4",
        "mov x7, x4",
        "ldr w20, [x8, #484]",
        "mul w7, w7, w20",
        "add w4, w4, w4, lsl #1",
        "add w4, w4, w4",
        "add w4, w4, w4",
        "add w20, w4, #0x8 (8)",
        "add w11, w20, w10",
        "str w11, [x8, #16]",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #152]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #160]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #208]",
        "ldr s2, [x8, #168]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #164]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #172]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #212]",
        "ldr s2, [x8, #180]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #176]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov x10, x11",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #184]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w10, [x8, #56]",
        "add w20, w10, #0x8 (8)",
        "add w11, w20, w4",
        "str w11, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #144]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #216]",
        "ldr s2, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s5, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s6, [x20, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #232]",
        "ldr s3, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s6, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s8, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s9, [x20, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #236]",
        "ldr s6, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s9, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add w20, w4, w6",
        "ldr s9, [x20, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #240]",
        "add w20, w10, w4",
        "ldr s6, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w20, w10, w4",
        "ldr s8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #88]",
        "add w20, w10, w4",
        "ldr s6, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w20, w10, w4",
        "ldr s8, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #92]",
        "ldr s6, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w20, w10, w4",
        "ldr s8, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add w20, w10, w4",
        "ldr s9, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "mov x10, x11",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s8, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w10, [x8, #32]",
        "add w20, w10, #0x4 (4)",
        "add w11, w20, w4",
        "str w11, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w20, w10, #0x8 (8)",
        "add w11, w20, w4",
        "str w11, [x8, #188]",
        "ldr w11, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "add w20, w10, w4",
        "ldr s6, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s6, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w11, [x8, #188]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w11, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #76]",
        "add w20, w10, w4",
        "ldr s2, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w11, [x8, #188]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w10, w4",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x8, #16]",
        "ldr s3, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x8, #468]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #84]",
        "ldr s2, [x8, #208]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #192]",
        "ldr s2, [x8, #212]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #196]",
        "ldr s2, [x8, #216]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #200]",
        "ldr s2, [x8, #192]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20]",
        "add w5, w5, #0x1 (1)",
        "ldr w20, [x8, #28]",
        "eor x27, x5, x20",
        "subs w26, w5, w20",
        "ldr s2, [x8, #196]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "ldr s4, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w4, w20, w4",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x8, #200]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x8, #476]",
        "ldr s2, [x8, #232]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #220]",
        "ldr s2, [x8, #236]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #224]",
        "ldr s2, [x8, #240]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #228]",
        "ldr s2, [x8, #220]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20]",
        "ldr s2, [x8, #224]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "ldr s4, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20, #4]",
        "ldr s2, [x8, #228]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "ldr s4, [x20, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w9",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20, #8]",
        "ldr s2, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #68]",
        "add w20, w7, w4",
        "ldr s2, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20]",
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "ldr s4, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w4, w20, w4",
        "ldr s2, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x8, #480]",
        "ldr s2, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #52]",
        "add w20, w7, w4",
        "ldr s2, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20]",
        "ldr s2, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "ldr s3, [x20, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w7, w4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w7, w20, w4",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x7]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfefe",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 260,
      "ExpectedInstructionCount": 80,
      "x86Insts": [
        "fld dword [edi]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "lea ecx,[esp + 0x10]",
        "fld1",
        "push ecx",
        "fdivrp",
        "lea edx,[esp + 0x50]",
        "push edx",
        "lea ecx,[esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [edi]",
        "fchs",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x40]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fmul dword [esp + 0x44]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x30]",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fstp dword [esp + 0x64]",
        "fmul dword [esp + 0x38]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x64]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x68]",
        "fadd dword [esp + 0x74]",
        "fstp dword [esp + 0x5c]",
        "call 0x00716e00",
        "mov ecx,dword [eax]",
        "mov dword [esi + 0x20],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [esi + 0x24],edx",
        "mov ecx,dword [eax + 0x8]",
        "mov dword [esi + 0x28],ecx",
        "mov edx,dword [eax + 0xc]",
        "mov dword [esi + 0x2c],edx",
        "fld dword [edi + 0x4]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [edi + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fchs",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x34]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fstp dword [esp + 0x5c]",
        "fmul dword [esp + 0x3c]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x64]",
        "fadd dword [esp + 0x58]",
        "fstp dword [esp + 0x74]",
        "mov eax,dword [esp + 0x74]",
        "fld dword [esp + 0x68]",
        "mov dword [esp + 0x4c],eax",
        "fadd dword [esp + 0x5c]",
        "lea eax,[esp + 0x10]",
        "push eax",
        "fstp dword [esp + 0x7c]",
        "mov ecx,dword [esp + 0x7c]",
        "fld dword [esp + 0x70]",
        "mov dword [esp + 0x54],ecx",
        "fadd dword [esp + 0x64]",
        "lea ecx,[esp + 0x50]",
        "push ecx",
        "lea ecx,[esp + 0x7c]",
        "fstp dword [esp + 0x84]",
        "mov edx,dword [esp + 0x84]",
        "mov dword [esp + 0x5c],edx",
        "call 0x00716e00",
        "mov edx,dword [eax]",
        "mov dword [esi + 0x30],edx",
        "mov ecx,dword [eax + 0x4]",
        "mov dword [esi + 0x34],ecx",
        "mov edx,dword [eax + 0x8]",
        "mov dword [esi + 0x38],edx",
        "mov eax,dword [eax + 0xc]",
        "mov dword [esi + 0x3c],eax",
        "fld dword [edi + 0x8]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmul dword [edi + 0x8]",
        "fstp dword [esp + 0x8]",
        "fchs",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x40]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x78]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x58]",
        "mov ecx,dword [esp + 0x58]",
        "fld dword [esp + 0x78]",
        "mov dword [esp + 0x4c],ecx",
        "fadd dword [esp + 0x68]",
        "lea ecx,[esp + 0x10]",
        "push ecx",
        "lea ecx,[esp + 0x78]",
        "fstp dword [esp + 0x60]",
        "mov edx,dword [esp + 0x60]",
        "fld dword [esp + 0x80]",
        "mov dword [esp + 0x54],edx",
        "fadd dword [esp + 0x70]",
        "lea edx,[esp + 0x50]",
        "push edx",
        "fstp dword [esp + 0x68]",
        "mov eax,dword [esp + 0x68]",
        "mov dword [esp + 0x5c],eax",
        "call 0x00716e00",
        "mov ecx,dword [eax]",
        "mov dword [esi + 0x40],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [esi + 0x44],edx",
        "mov ecx,dword [eax + 0x8]",
        "mov dword [esi + 0x48],ecx",
        "mov edx,dword [eax + 0xc]",
        "mov dword [esi + 0x4c],edx",
        "fld dword [edi + 0xc]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [edi + 0xc]",
        "fchs",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x40]",
        "fmul st1",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x78]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x58]",
        "mov eax,dword [esp + 0x58]",
        "fld dword [esp + 0x78]",
        "mov dword [esp + 0x4c],eax",
        "fadd dword [esp + 0x68]",
        "lea eax,[esp + 0x10]",
        "fstp dword [esp + 0x5c]",
        "mov ecx,dword [esp + 0x5c]",
        "fld dword [esp + 0x7c]",
        "mov dword [esp + 0x50],ecx",
        "fadd dword [esp + 0x6c]",
        "lea ecx,[esp + 0x4c]",
        "fstp dword [esp + 0x60]",
        "mov edx,dword [esp + 0x60]",
        "mov dword [esp + 0x54],edx"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0xf928",
        "movk w20, #0xa2, lsl #16",
        "ldr d3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0x1f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 206,
      "ExpectedInstructionCount": 183,
      "x86Insts": [
        "fld dword [0x00b42a74]",
        "push ecx",
        "fstp dword [0x00b42a20]",
        "lea ecx,[esp + 0x48]",
        "fld dword [0x00b42a78]",
        "fstp dword [0x00b42a24]",
        "fld dword [0x00b42a7c]",
        "fstp dword [0x00b42a28]",
        "fld dword [0x00b42a68]",
        "fstp dword [0x00b42a2c]",
        "fld dword [0x00b42a6c]",
        "fstp dword [0x00b42a30]",
        "fld dword [0x00b42a70]",
        "fstp dword [0x00b42a34]",
        "fld dword [0x00b42a5c]",
        "fstp dword [0x00b42a38]",
        "fld dword [0x00b42a60]",
        "fstp dword [0x00b42a3c]",
        "fld dword [0x00b42a64]",
        "fstp dword [0x00b42a40]",
        "fld dword [0x00b42a50]",
        "fstp dword [0x00b42a44]",
        "fld dword [0x00b42a54]",
        "fstp dword [0x00b42a48]",
        "fld dword [0x00b42a58]",
        "fstp dword [0x00b42a4c]",
        "fst dword [esp + 0x48]",
        "fst dword [esp + 0x58]",
        "fstp dword [esp + 0x68]",
        "fst dword [esp + 0x4c]",
        "fst dword [esp + 0x50]",
        "fst dword [esp + 0x54]",
        "fst dword [esp + 0x5c]",
        "fst dword [esp + 0x60]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x4]",
        "fstp dword [esp]",
        "call 0x00793aa0",
        "fld dword [esp + 0x50]",
        "fld dword [0x00b42a78]",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a74]",
        "fst qword [esp + 0x30]",
        "fld dword [esp + 0x44]",
        "fld dword [esp + 0x5c]",
        "fld dword [0x00b42a7c]",
        "fst qword [esp + 0x10]",
        "fld st2",
        "fmul st4",
        "fld st6",
        "fmul st6",
        "faddp",
        "fld st2",
        "fmulp st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x54]",
        "fld dword [esp + 0x48]",
        "fld dword [esp + 0x60]",
        "fstp qword [esp]",
        "fld st0",
        "fmulp st5",
        "fld st1",
        "fmulp st6",
        "fxch st4",
        "faddp st5,st0",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp st5,st0",
        "fxch st4",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x58]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x4c]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x64]",
        "fstp qword [esp + 0x8]",
        "fmul qword [esp + 0x30]",
        "fxch",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp + 0x8]",
        "mov eax,dword [esp + 0x38]",
        "fmul qword [esp + 0x10]",
        "mov ecx,dword [esp + 0x3c]",
        "mov [0x00b2ba7c],eax",
        "mov dword [0x00b2ba80],ecx",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a6c]",
        "mov dword [0x00b2ba84],edx",
        "fst qword [esp + 0x30]",
        "fld dword [0x00b42a68]",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a70]",
        "fstp qword [esp + 0x10]",
        "fmul st3",
        "fld st6",
        "fmulp st2",
        "faddp",
        "fld st1",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "fld st2",
        "mov [0x00b2ba88],eax",
        "fmul qword [esp + 0x28]",
        "fld st4",
        "fmul qword [esp + 0x30]",
        "faddp",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "mov dword [0x00b2ba8c],ecx",
        "fmul qword [esp + 0x28]",
        "fld qword [esp + 0x20]",
        "fmul qword [esp + 0x30]",
        "faddp",
        "fld qword [esp + 0x8]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a60]",
        "mov dword [0x00b2ba90],edx",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a5c]",
        "fst qword [esp + 0x30]",
        "fld dword [0x00b42a64]",
        "fstp qword [esp + 0x10]",
        "fmul st3",
        "fld st6",
        "fmulp st2",
        "faddp",
        "fld st1",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "fld st2",
        "fmul qword [esp + 0x30]",
        "fld st4",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "fmul qword [esp + 0x30]",
        "fld qword [esp + 0x20]",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp + 0x8]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "fld dword [0x00b42a54]",
        "mov ecx,dword [esp + 0x3c]",
        "fld dword [0x00b42a50]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a58]",
        "mov [0x00b2ba94],eax",
        "fxch st4",
        "mov dword [0x00b2ba98],ecx",
        "fmul st1",
        "mov dword [0x00b2ba9c],edx",
        "fxch st7",
        "fmul st2",
        "faddp st7,st0",
        "fxch st2",
        "fmul st3",
        "faddp st6,st0",
        "fxch st5",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "mov [0x00b2baa0],eax",
        "fmul st2",
        "fxch st3",
        "fmul st4",
        "faddp st2,st0",
        "fld qword [esp]",
        "fmul st1",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "mov dword [0x00b2baa4],ecx",
        "fmulp st2",
        "fld qword [esp + 0x20]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul qword [esp + 0x8]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "mov dword [0x00b2baa8],edx",
        "mov esp,ebp",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x2a74",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "str w7, [x8, #-4]!",
        "mov w20, #0x2a20",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "add w7, w8, #0x48 (72)",
        "mov w20, #0x2a78",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a24",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a7c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a28",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a68",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a2c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a6c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a30",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a70",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a34",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a5c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a38",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a60",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a3c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a64",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a40",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a50",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a44",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a54",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a48",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a58",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a4c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #72]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #88]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #104]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #80]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #92]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #96]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #100]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8]",
        "mov w22, #0xc5",
        "movk w22, #0x1, lsl #16",
        "str w22, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w23, #0x8",
        "sub w20, w23, w20",
        "mov w23, #0xe0e0",
        "lsr w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87-Psychonauts.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 520,
      "ExpectedInstructionCount": 4570,
      "x86Insts": [
        "sub esp,0x88",
        "fld dword [ecx + 0x4]",
        "mov edx,dword [ecx + 0x18]",
        "fld dword [ecx + 0x10]",
        "mov dword [esp + 0x14],edx",
        "fld dword [ecx + 0x14]",
        "mov edx,dword [ecx + 0x1c]",
        "fld dword [ecx + 0x20]",
        "mov dword [esp + 0x10],edx",
        "fld dword [ecx + 0x24]",
        "fld dword [eax]",
        "fsub dword [eax + 0x44]",
        "fld dword [eax + 0x40]",
        "fadd dword [eax + 0x4]",
        "fld dword [eax + 0x20]",
        "fsub dword [eax + 0x64]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x24]",
        "fadd dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fmul st7",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fmul st7",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd st0,st2",
        "fstp dword [esp + 0x80]",
        "fld dword [esp]",
        "fadd st0,st1",
        "fstp dword [esp + 0x78]",
        "fxch",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x60]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x40]",
        "fld dword [eax + 0x44]",
        "fadd dword [eax]",
        "fld dword [eax + 0x4]",
        "fsub dword [eax + 0x40]",
        "fld dword [eax + 0x64]",
        "fadd dword [eax + 0x20]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x24]",
        "fsub dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fmul st7",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fmul st7",
        "fstp dword [esp]",
        "fld st1",
        "fsub dword [esp]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x4]",
        "fadd st0,st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [esp + 0x5c]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x64]",
        "fstp st0",
        "fld dword [eax + 0x8]",
        "fsub dword [eax + 0x4c]",
        "fld dword [eax + 0xc]",
        "fadd dword [eax + 0x48]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st5",
        "fld dword [esp]",
        "fmul st5",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul st3",
        "fld dword [esp]",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x28]",
        "fsub dword [eax + 0x6c]",
        "fld dword [eax + 0x2c]",
        "fadd dword [eax + 0x68]",
        "fstp dword [esp]",
        "fld dword [esp + 0x10]",
        "fmul st1",
        "fld dword [esp]",
        "fmul dword [esp + 0x14]",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x50]",
        "fld dword [eax + 0x4c]",
        "fadd dword [eax + 0x8]",
        "fld dword [eax + 0xc]",
        "fsub dword [eax + 0x48]",
        "fstp dword [esp]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fld dword [esp]",
        "fmul dword [esp + 0x10]",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul dword [esp + 0x10]",
        "fld dword [esp]",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x6c]",
        "fadd dword [eax + 0x28]",
        "fld dword [eax + 0x2c]",
        "fsub dword [eax + 0x68]",
        "fst dword [esp]",
        "fmul st4",
        "fld st1",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul st5",
        "fxch",
        "fmul st4",
        "fsubp",
        "fstp dword [esp]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x84]",
        "fld dword [eax + 0x10]",
        "fsub dword [eax + 0x54]",
        "fld dword [eax + 0x14]",
        "fadd dword [eax + 0x50]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st3",
        "fld dword [esp]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul st1",
        "fld dword [esp]",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x30]",
        "fsub dword [eax + 0x74]",
        "fld dword [eax + 0x34]",
        "fadd dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st2",
        "fld dword [esp]",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x38]",
        "fld dword [eax + 0x54]",
        "fadd dword [eax + 0x10]",
        "fld dword [eax + 0x14]",
        "fsub dword [eax + 0x50]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st2",
        "fld dword [esp]",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fld dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x74]",
        "fadd dword [eax + 0x30]",
        "fld dword [eax + 0x34]",
        "fsub dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st3",
        "fld dword [esp]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul st1",
        "fld dword [esp]",
        "fmul st3",
        "faddp",
        "fstp dword [esp]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x54]",
        "fld dword [eax + 0x18]",
        "fsub dword [eax + 0x5c]",
        "fld dword [eax + 0x1c]",
        "fadd dword [eax + 0x58]",
        "fld st1",
        "fmul dword [esp + 0x14]",
        "fld st1",
        "fmul dword [esp + 0x10]",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fxch",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fld dword [eax + 0x38]",
        "fsub dword [eax + 0x7c]",
        "fld dword [eax + 0x78]",
        "fadd dword [eax + 0x3c]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st6",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul st3",
        "fxch",
        "fmul st4",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp]",
        "fadd st0,st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x48]",
        "fld dword [eax + 0x5c]",
        "fadd dword [eax + 0x18]",
        "fld dword [eax + 0x1c]",
        "fsub dword [eax + 0x58]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0xc]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "fsubp",
        "fstp st2",
        "fstp st0",
        "fld dword [eax + 0x7c]",
        "fadd dword [eax + 0x38]",
        "fld dword [eax + 0x3c]",
        "fsub dword [eax + 0x78]",
        "fld st1",
        "fmul dword [esp + 0x10]",
        "fld st1",
        "fmul dword [esp + 0x14]",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x7c]",
        "fsubp",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x80]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x78]",
        "fld dword [esp + 0x8]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [eax + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x8]",
        "fstp st1",
        "fsub dword [esp]",
        "fstp dword [eax + 0xc]",
        "fld dword [esp + 0x80]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x78]",
        "fsub dword [esp + 0x28]",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x8]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x70]",
        "fld st3",
        "fsub st0,st1",
        "fstp dword [eax + 0x10]",
        "fld st1",
        "fadd st0,st3",
        "fstp dword [eax + 0x14]",
        "fadd st0,st3",
        "fstp dword [eax + 0x18]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x60]",
        "fsub dword [esp + 0x38]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x40]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x48]",
        "fld dword [esp + 0x1c]",
        "fadd dword [esp + 0x50]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld dword [esp + 0x4]",
        "fadd st0,st3",
        "fstp dword [eax + 0x20]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x24]",
        "fxch st2",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x28]",
        "fsub st0,st1",
        "fstp dword [eax + 0x2c]",
        "fstp st0",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x68]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld st2",
        "fsub st0,st1",
        "fstp dword [eax + 0x30]",
        "fld dword [esp + 0x4]",
        "fadd st0,st2",
        "fstp dword [eax + 0x34]",
        "fadd st0,st2",
        "fstp dword [eax + 0x38]",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x3c]",
        "fstp st0",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x24]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp + 0x34]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + 0x40]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [eax + 0x44]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x48]",
        "fstp st1",
        "fsub dword [esp]",
        "fstp dword [eax + 0x4c]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x2c]",
        "fld dword [esp + 0x34]",
        "fsub dword [esp + 0x3c]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x44]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x4c]",
        "fld st3",
        "fsub st0,st1",
        "fstp dword [eax + 0x50]",
        "fld st1",
        "fadd st0,st3",
        "fstp dword [eax + 0x54]",
        "fadd st0,st3",
        "fstp dword [eax + 0x58]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x5c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x64]",
        "fld st2",
        "fadd dword [esp + 0x74]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x7c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld dword [esp + 0x4]",
        "fadd st0,st3",
        "fstp dword [eax + 0x60]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x64]",
        "fxch st2",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x68]",
        "fsub st0,st1",
        "fstp dword [eax + 0x6c]",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x6c]",
        "fstp dword [esp + 0x8]",
        "fsubr dword [esp + 0x74]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x84]",
        "fld st1",
        "fsub st0,st1",
        "fmul st3",
        "fstp dword [esp + 0x4]",
        "fadd st0,st1",
        "fmulp st2",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fsub st0,st1",
        "fstp dword [eax + 0x70]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x74]",
        "fadd dword [esp + 0xc]",
        "fstp dword [eax + 0x78]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x7c]",
        "add esp,0x88"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x88 (136)",
        "ldr s2, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x7, #24]",
        "ldr s3, [x7, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str w5, [x8, #20]",
        "ldr s4, [x7, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w5, [x7, #28]",
        "ldr s5, [x7, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str w5, [x8, #16]",
        "ldr s6, [x7, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #8]",
        "ldr s9, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8]",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #4]",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8]",
        "ldr s9, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #128]",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "ldr s9, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #96]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #64]",
        "ldr s7, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #8]",
        "ldr s9, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8]",
        "ldr s9, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #4]",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8]",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #36]",
        "ldr s9, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8, #52]",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #92]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #100]",
        "ldr s7, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #88]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #48]",
        "ldr s7, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #104]",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #80]",
        "ldr s7, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "ldr s10, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s10",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v10.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v10.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s9, s0",
        "str s9, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "ldr s7, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #68]",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #76]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #116]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #132]",
        "ldr s7, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #40]",
        "ldr s7, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #24]",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #56]",
        "ldr s7, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "ldr s8, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v9.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #44]",
        "ldr s5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #60]",
        "ldr s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #108]",
        "ldr s5, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #84]",
        "ldr s5, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8]",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #8]",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #112]",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #72]",
        "ldr s5, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #16]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #124]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #8]",
        "ldr s4, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #12]",
        "ldr s4, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x4, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x4, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #24]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #28]",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #32]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #36]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #44]",
        "ldr s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #48]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #52]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #56]",
        "ldr s4, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #60]",
        "ldr s4, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #64]",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #68]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #72]",
        "ldr s4, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #76]",
        "ldr s4, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x4, #80]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x4, #84]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #88]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #92]",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #96]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x4, #100]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #104]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #108]",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #12]",
        "ldr s4, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #112]",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #116]",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #120]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #124]",
        "adds w26, w8, #0x88 (136)",
        "cfinv",
        "mov x27, x8",
        "mov x8, x26",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 434,
      "ExpectedInstructionCount": 3932,
      "x86Insts": [
        "sub esp,0x90",
        "fld dword [ecx + 0x4]",
        "fld st0",
        "fmul dword [ecx + 0x8]",
        "fld st0",
        "fadd dword [ecx + 0x8]",
        "fld dword [eax + 0x40]",
        "fadd dword [eax]",
        "fld dword [eax + 0x44]",
        "fadd dword [eax + 0x4]",
        "fld dword [eax]",
        "fsub dword [eax + 0x40]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x4]",
        "fsub dword [eax + 0x44]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x20]",
        "fadd dword [eax + 0x60]",
        "fld dword [eax + 0x64]",
        "fadd dword [eax + 0x24]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x20]",
        "fsub dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [eax + 0x24]",
        "fsub dword [eax + 0x64]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x64]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x3c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + 0x8]",
        "fadd dword [eax + 0x48]",
        "fld dword [eax + 0x4c]",
        "fadd dword [eax + 0xc]",
        "fld dword [eax + 0x8]",
        "fsub dword [eax + 0x48]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0xc]",
        "fsub dword [eax + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x28]",
        "fadd dword [eax + 0x68]",
        "fld dword [eax + 0x6c]",
        "fadd dword [eax + 0x2c]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x28]",
        "fsub dword [eax + 0x68]",
        "fstp dword [esp]",
        "fld dword [eax + 0x2c]",
        "fsub dword [eax + 0x6c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x84]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x20]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "fsubp",
        "fstp dword [esp + 0x68]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x88]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x80]",
        "fxch",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x30]",
        "fld dword [eax + 0x10]",
        "fadd dword [eax + 0x50]",
        "fld dword [eax + 0x54]",
        "fadd dword [eax + 0x14]",
        "fld dword [eax + 0x10]",
        "fsub dword [eax + 0x50]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x14]",
        "fsub dword [eax + 0x54]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x30]",
        "fadd dword [eax + 0x70]",
        "fld dword [eax + 0x74]",
        "fadd dword [eax + 0x34]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x30]",
        "fsub dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld dword [eax + 0x34]",
        "fsub dword [eax + 0x74]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x6c]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x4c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fsub st0,st1",
        "fmul st5",
        "fstp dword [esp + 0x38]",
        "fadd st0,st1",
        "fmul st4",
        "fstp dword [esp + 0x40]",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st0",
        "fadd st0,st2",
        "fmul st5",
        "fstp dword [esp + 0x48]",
        "fsub st0,st1",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fstp st0",
        "fld dword [eax + 0x58]",
        "fadd dword [eax + 0x18]",
        "fld dword [eax + 0x1c]",
        "fadd dword [eax + 0x5c]",
        "fld dword [eax + 0x18]",
        "fsub dword [eax + 0x58]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x1c]",
        "fsub dword [eax + 0x5c]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x78]",
        "fadd dword [eax + 0x38]",
        "fld dword [eax + 0x3c]",
        "fadd dword [eax + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x38]",
        "fsub dword [eax + 0x78]",
        "fstp dword [esp]",
        "fld dword [eax + 0x3c]",
        "fsub dword [eax + 0x7c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x8c]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x2c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x1c]",
        "fxch",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "fsubp",
        "fstp dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp st2",
        "fstp st0",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x48]",
        "fld dword [esp + 0x28]",
        "fsub dword [esp + 0x58]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x28]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x80]",
        "fsub dword [esp]",
        "fld dword [esp + 0x30]",
        "fsub st0,st3",
        "fstp dword [esp + 0x10]",
        "fld dword [esp]",
        "fadd dword [esp + 0x80]",
        "fstp dword [esp]",
        "fxch st2",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp + 0xc]",
        "fxch",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x60]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x18]",
        "fstp dword [eax + 0x64]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x68]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0x6c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x70]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x74]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x78]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x7c]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x78]",
        "fld dword [esp + 0x60]",
        "fsub dword [esp + 0x38]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x78]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x1c]",
        "fadd dword [esp + 0x68]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x88]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x1c]",
        "fstp dword [esp]",
        "fld dword [esp + 0x88]",
        "fsub dword [esp + 0x14]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + 0x40]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + 0x44]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x48]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0x4c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x50]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x54]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x58]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x5c]",
        "fld dword [esp + 0x20]",
        "fsub dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x24]",
        "fld st1",
        "fsub st0,st1",
        "fmul st3",
        "fstp dword [esp + 0x14]",
        "faddp",
        "fmul st1",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x20]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x2c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st4",
        "fstp dword [esp]",
        "fadd st0,st1",
        "fmul st3",
        "fstp dword [esp + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x34]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x34]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x14]",
        "fadd st0,st1",
        "fstp dword [eax + 0x20]",
        "fld st1",
        "fadd dword [esp + 0x18]",
        "fstp dword [eax + 0x24]",
        "fsub dword [esp + 0x14]",
        "fstp dword [eax + 0x28]",
        "fld dword [esp + 0x18]",
        "fsub st0,st1",
        "fstp dword [eax + 0x2c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x30]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x34]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x38]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x3c]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x64]",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x6c]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x74]",
        "fld dword [esp + 0x8c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x74]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x8c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0xc]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x10]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x14]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x18]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x1c]",
        "add esp,0x90"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x90 (144)",
        "ldr s2, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x7, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #100]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #60]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #68]",
        "ldr s5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #96]",
        "ldr s5, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #120]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #80]",
        "ldr s5, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #40]",
        "ldr s5, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #32]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #36]",
        "ldr s5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #104]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #136]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #128]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #48]",
        "ldr s5, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #108]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #76]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #52]",
        "ldr s5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #56]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #64]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #72]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #88]",
        "ldr s5, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr s8, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8]",
        "ldr s8, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr s9, [x4, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #124]",
        "ldr s8, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x8, #140]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #44]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #112]",
        "ldr s5, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "ldr s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #24]",
        "ldr s5, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #8]",
        "ldr s5, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #4]",
        "ldr s5, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldr s6, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #128]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8]",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #96]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s6, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #100]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #104]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #108]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #112]",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #116]",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #120]",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #124]",
        "ldr s3, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #8]",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #4]",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8]",
        "ldr s6, [x8, #136]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x4, #64]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x4, #68]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #72]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #76]",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #80]",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #84]",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #88]",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x4, #92]",
        "ldr s3, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #112]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s4, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #24]",
        "ldr s4, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #4]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #32]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x4, #36]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #40]",
        "ldr s2, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #44]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #48]",
        "ldr s2, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #52]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #56]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #60]",
        "ldr s2, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #100]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #4]",
        "ldr s4, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #16]",
        "ldr s5, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #124]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8]",
        "ldr s5, [x8, #132]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #140]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4]",
        "ldr s5, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x4, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #8]",
        "ldr s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #12]",
        "ldr s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #16]",
        "ldr s2, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #20]",
        "ldr s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #24]",
        "ldr s2, [x8, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4, #28]",
        "mvn w27, w8",
        "adds w26, w8, #0x90 (144)",
        "cfinv",
        "mov x8, x26",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfefe",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 702,
      "ExpectedInstructionCount": 92,
      "x86Insts": [
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x7c],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x78],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x74],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x70],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x6c],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x68],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x64],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x60],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x5c],eax",
        "lea eax,[ebp + 0xffffff04]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffef8]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffeec]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffee0]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe68],eax",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe6c],eax",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe70],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe20],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe24],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe28],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe8c],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe90],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe94],eax",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe44]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe48]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe4c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe2c]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe30]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe34]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe38]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe3c]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe40]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe5c]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe60]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe64]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe74]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe78]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe7c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe80]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe84]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe88]",
        "fld dword [ebp + 0xfffffe2c]",
        "fld dword [ebp + 0xfffffe44]",
        "faddp",
        "fld dword [ebp + 0xfffffe38]",
        "faddp",
        "fld dword [ebp + 0xfffffe5c]",
        "faddp",
        "fld dword [ebp + 0xfffffe74]",
        "faddp",
        "fld dword [ebp + 0xfffffe80]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe30]",
        "fld dword [ebp + 0xfffffe48]",
        "faddp",
        "fld dword [ebp + 0xfffffe3c]",
        "faddp",
        "fld dword [ebp + 0xfffffe60]",
        "faddp",
        "fld dword [ebp + 0xfffffe78]",
        "faddp",
        "fld dword [ebp + 0xfffffe84]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe34]",
        "fld dword [ebp + 0xfffffe4c]",
        "faddp",
        "fld dword [ebp + 0xfffffe40]",
        "faddp",
        "fld dword [ebp + 0xfffffe64]",
        "faddp",
        "fld dword [ebp + 0xfffffe7c]",
        "faddp",
        "fld dword [ebp + 0xfffffe88]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe58]",
        "fld dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe20]",
        "fld dword [ebp + 0xfffffe68]",
        "faddp",
        "fld dword [ebp + 0xfffffe8c]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe24]",
        "fld dword [ebp + 0xfffffe6c]",
        "faddp",
        "fld dword [ebp + 0xfffffe90]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe58]",
        "fld dword [ebp + 0xfffffe28]",
        "fld dword [ebp + 0xfffffe70]",
        "faddp",
        "fld dword [ebp + 0xfffffe94]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe58]",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffebc],eax",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffec0],eax",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffec4],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe98],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe9c],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffea0],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffed4],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffed8],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffedc],eax",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "faddp",
        "fstp dword [ebp + 0xfffffef8]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "faddp",
        "fstp dword [ebp + 0xfffffefc]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "faddp",
        "fstp dword [ebp + 0xffffff00]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "fld dword [ebp + 0xffffff04]",
        "fld dword [ebp + 0xffffff04]",
        "fmulp",
        "fld dword [ebp + 0xffffff08]",
        "fld dword [ebp + 0xffffff08]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xffffff0c]",
        "fld dword [ebp + 0xffffff0c]",
        "fmulp",
        "faddp",
        "fstp qword [esp]",
        "call 0x0811028c",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + -0x80]",
        "fldz",
        "fxch",
        "fucomip st0,st1",
        "fstp st0",
        "seta al",
        "test al,al"
      ],
      "ExpectedArm64ASM": [
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "stur w4, [x9, #-124]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "stur w4, [x9, #-120]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "stur w4, [x9, #-116]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-112]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-108]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-104]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0x18 (24)",
        "stur w4, [x9, #-100]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0x18 (24)",
        "stur w4, [x9, #-96]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "mvn w27, w4",
        "adds w26, w4, #0x18 (24)",
        "mov x4, x26",
        "stur w4, [x9, #-92]",
        "sub w4, w9, #0xfc (252)",
        "str w4, [x8]",
        "mov w20, #0x140",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "cfinv"
      ]
    },
    "Block4": {
      "x86InstructionCount": 351,
      "ExpectedInstructionCount": 2809,
      "x86Insts": [
        "mov ebp,dword [esp + 0x64]",
        "fadd dword [ebp + 0x8]",
        "add ebp,0x10",
        "mov dword [esp + 0x64],ebp",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x34]",
        "fadd dword [ebp + -0x4]",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x38]",
        "fadd dword [ebp]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x3c]",
        "fsub dword [ebp + 0x4]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x8]",
        "mov ebp,dword [ebp + -0x4]",
        "mov dword [esp + 0x34],ebp",
        "mov ebp,dword [esp + 0x64]",
        "mov ebp,dword [ebp]",
        "mov dword [esp + 0x38],ebp",
        "mov ebp,dword [esp + 0x64]",
        "fld dword [ebp + 0x4]",
        "fchs",
        "fstp dword [esp + 0x3c]",
        "fld dword [edi + -0x8]",
        "fadd dword [edx + -0x8]",
        "fld dword [edi + -0x4]",
        "fchs",
        "fsub dword [edx + -0x4]",
        "fld dword [edi + -0x8]",
        "fsub dword [edx + -0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [edx + -0x4]",
        "fsub dword [edi + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [edi]",
        "fadd dword [edx]",
        "fstp dword [esp + 0x44]",
        "fld dword [edi + 0x4]",
        "fchs",
        "fsub dword [edx + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [edi]",
        "fsub dword [edx]",
        "fstp dword [esp + 0x54]",
        "fld dword [edx + 0x4]",
        "fsub dword [edi + 0x4]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ecx + -0x8]",
        "fadd dword [esi + -0x8]",
        "fld dword [ecx + -0x4]",
        "fadd dword [esi + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [esi + -0x8]",
        "fsub dword [ecx + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [esi + -0x4]",
        "fsub dword [ecx + -0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx]",
        "fadd dword [esi]",
        "fstp dword [esp + 0x48]",
        "fld dword [ecx + 0x4]",
        "fadd dword [esi + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esi]",
        "fsub dword [ecx]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + 0x4]",
        "fsub dword [ecx + 0x4]",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [edi + -0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [edi + -0x4]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [edi]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [edi + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esi + -0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [esi + -0x4]",
        "fstp st0",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esi]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esi + 0x4]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx + -0x8]",
        "fld dword [esp + 0x6c]",
        "fmul st1",
        "fld dword [esp + 0x68]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld st2",
        "fmul st2",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx]",
        "fld st2",
        "fmul st1",
        "fld dword [esp + 0x34]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx + -0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x30]",
        "fadd dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x30]",
        "fchs",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x30]",
        "fsub dword [ebp + 0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x30]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x30]",
        "fchs",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x50]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x30]",
        "fsub dword [ebp]",
        "fstp dword [esp + 0x54]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebp + 0x8]",
        "fadd dword [ebx + 0x8]",
        "fld dword [ebp + 0xc]",
        "fadd dword [ebx + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebp + 0x8]",
        "fsub dword [ebx + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + 0xc]",
        "fsub dword [ebx + 0xc]",
        "fstp dword [esp + 0x18]",
        "fld dword [ebp]",
        "fadd dword [ebx]",
        "fstp dword [esp + 0x48]",
        "fld dword [ebx + 0x4]",
        "fadd dword [ebp + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [ebp]",
        "fsub dword [ebx]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebx + 0x4]",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [ebp + 0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [ebp + 0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x50]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x30]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x68]",
        "fmul st1",
        "fld dword [esp + 0x6c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp]",
        "sub ebp,0x10",
        "fld dword [esp + 0x34]",
        "mov dword [esp + 0x30],ebp",
        "fmul st1",
        "add ecx,0x10",
        "fld st3",
        "add edx,0x10",
        "fmul st3",
        "add esi,0x10",
        "add edi,0x10",
        "faddp",
        "sub ebx,0x10",
        "fstp dword [ebp + 0x14]",
        "mov ebp,dword [esp + 0x70]",
        "sub ebp,0x10",
        "fstp st0",
        "mov dword [esp + 0x70],ebp",
        "fstp st0",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x14]",
        "sub ebp,0x10",
        "fsub dword [esp + 0x18]",
        "mov dword [esp + 0x24],ebp",
        "fld dword [esp + 0x1c]",
        "mov ebp,dword [esp + 0x7c]",
        "fsub dword [esp + 0x20]",
        "dec ebp",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x7c],ebp",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x18]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x14]",
        "fstp st0",
        "fstp st0"
      ],
      "ExpectedArm64ASM": [
        "ldr w9, [x8, #100]",
        "ldr s2, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w9, w9, #0x10 (16)",
        "str w9, [x8, #100]",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #40]",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur w9, [x9, #-4]",
        "str w9, [x8, #52]",
        "ldr w9, [x8, #100]",
        "ldr w9, [x9]",
        "str w9, [x8, #56]",
        "ldr w9, [x8, #100]",
        "ldr s3, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x23, x28, x22, lsl #4",
        "ldr q4, [x28, #3552]",
        "eor v3.16b, v3.16b, v4.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #60]",
        "ldur s3, [x11, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s4, [x5, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s4, [x11, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x12, x28, x22, lsl #4",
        "ldr q5, [x28, #3552]",
        "eor v4.16b, v4.16b, v5.16b",
        "ldur s5, [x5, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldur s5, [x11, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s6, [x5, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add x13, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "ldur s5, [x5, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x11, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #28]",
        "ldr s5, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #68]",
        "ldr s5, [x11, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "eor v5.16b, v5.16b, v6.16b",
        "ldr s6, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #80]",
        "ldr s5, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #84]",
        "ldr s5, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x11, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #92]",
        "ldur s5, [x7, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x10, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x7, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s7, [x10, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add x22, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldur s6, [x10, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldur s7, [x7, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #32]",
        "ldur s6, [x10, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldur s7, [x7, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #24]",
        "ldr s6, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #72]",
        "ldr s6, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x10, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #76]",
        "ldr s6, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "ldr s6, [x10, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #88]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x11, #-8]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x11, #-4]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x11]",
        "ldr s6, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x11, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x10, #-8]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x10, #-4]",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x10]",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x10, #4]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x5, #-8]",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x5, #-4]",
        "ldr s3, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x5]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x5, #4]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x7, #-8]",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x7, #-4]",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x7]",
        "ldr s5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #4]",
        "ldr s3, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "ldr s4, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s4, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "ldr q5, [x28, #3552]",
        "eor v4.16b, v4.16b, v5.16b",
        "ldr s5, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s5, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "ldr s6, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #20]",
        "ldr s5, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #28]",
        "ldr s5, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #68]",
        "ldr s5, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "ldr q6, [x28, #3552]",
        "eor v5.16b, v5.16b, v6.16b",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #80]",
        "ldr s5, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #48]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #84]",
        "ldr s5, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #112]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #92]",
        "ldr s5, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldr s6, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #32]",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #24]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #72]",
        "ldr s6, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #76]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s7, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #88]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #8]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #12]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9]",
        "ldr s6, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #4]",
        "ldr w9, [x8, #112]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #8]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9]",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #4]",
        "ldr w9, [x8, #48]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x9, #8]",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x9]",
        "sub w9, w9, #0x10 (16)",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str w9, [x8, #48]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w7, w7, #0x10 (16)",
        "add w5, w5, #0x10 (16)",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w10, w10, #0x10 (16)",
        "add w11, w11, #0x10 (16)",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "sub w6, w6, #0x10 (16)",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #20]",
        "ldr w9, [x8, #112]",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #112]",
        "ldr w9, [x8, #36]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "subs w9, w9, #0x10 (16)",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str w9, [x8, #36]",
        "ldr s4, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #124]",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "cset x14, hs",
        "subs w26, w9, #0x1 (1)",
        "rmif x14, #63, #nzCv",
        "mov x27, x9",
        "mov x9, x26",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str w9, [x8, #124]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x6, #24]",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x6, #28]",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x6, #16]",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x6, #20]",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]",
        "str q6, [x22, #1056]",
        "str q5, [x13, #1056]",
        "str q4, [x12, #1056]",
        "str q3, [x23, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf0f0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 346,
      "ExpectedInstructionCount": 2804,
      "x86Insts": [
        "mov ebp,dword [esp + 0x64]",
        "fadd dword [ebp + 0x8]",
        "add ebp,0x10",
        "mov dword [esp + 0x64],ebp",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x34]",
        "fadd dword [ebp + -0x4]",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x38]",
        "fadd dword [ebp]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x3c]",
        "fsub dword [ebp + 0x4]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x8]",
        "mov ebp,dword [ebp + -0x4]",
        "mov dword [esp + 0x34],ebp",
        "mov ebp,dword [esp + 0x64]",
        "mov ebp,dword [ebp]",
        "mov dword [esp + 0x38],ebp",
        "mov ebp,dword [esp + 0x64]",
        "fld dword [ebp + 0x4]",
        "fchs",
        "fstp dword [esp + 0x3c]",
        "fld dword [edx + -0x8]",
        "fadd dword [edi + -0x8]",
        "fld dword [edi + -0x4]",
        "fadd dword [edx + -0x4]",
        "fld dword [edi + -0x8]",
        "fsub dword [edx + -0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [edi + -0x4]",
        "fsub dword [edx + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [edi]",
        "fadd dword [edx]",
        "fstp dword [esp + 0x44]",
        "fld dword [edi + 0x4]",
        "fadd dword [edx + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [edi]",
        "fsub dword [edx]",
        "fstp dword [esp + 0x58]",
        "fld dword [edi + 0x4]",
        "fsub dword [edx + 0x4]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ecx + -0x8]",
        "fadd dword [esi + -0x8]",
        "fld dword [ecx + -0x4]",
        "fadd dword [esi + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [esi + -0x8]",
        "fsub dword [ecx + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [esi + -0x4]",
        "fsub dword [ecx + -0x4]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx]",
        "fadd dword [esi]",
        "fstp dword [esp + 0x48]",
        "fld dword [ecx + 0x4]",
        "fadd dword [esi + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [esi]",
        "fsub dword [ecx]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + 0x4]",
        "fsub dword [ecx + 0x4]",
        "fstp dword [esp + 0x54]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [edi + -0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [edi + -0x4]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [edi]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [edi + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esi + -0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esi + -0x4]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esi]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fstp dword [esi + 0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx + -0x8]",
        "fld dword [esp + 0x6c]",
        "fmul st1",
        "fld dword [esp + 0x68]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld st2",
        "fmul st2",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx]",
        "fld st2",
        "fmul st1",
        "fld dword [esp + 0x34]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "faddp",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [ecx + -0x8]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x18]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x4c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x58]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebp + 0x8]",
        "fadd dword [ebx + 0x8]",
        "fld dword [ebp + 0xc]",
        "fadd dword [ebx + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebp + 0x8]",
        "fsub dword [ebx + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + 0xc]",
        "fsub dword [ebx + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld dword [ebx]",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x48]",
        "fld dword [ebp + 0x4]",
        "fadd dword [ebx + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [ebp]",
        "fsub dword [ebx]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebx + 0x4]",
        "fstp dword [esp + 0x54]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [ebp + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x68]",
        "fmul st1",
        "fld dword [esp + 0x6c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "sub ebp,0x10",
        "mov dword [esp + 0x28],ebp",
        "add ecx,0x10",
        "faddp",
        "add edx,0x10",
        "add esi,0x10",
        "fstp dword [ebp + 0x14]",
        "mov ebp,dword [esp + 0x70]",
        "sub ebp,0x10",
        "fstp st0",
        "mov dword [esp + 0x70],ebp",
        "fstp st0",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x14]",
        "sub ebp,0x10",
        "fadd dword [esp + 0x18]",
        "mov dword [esp + 0x24],ebp",
        "fld dword [esp + 0x1c]",
        "mov ebp,dword [esp + 0x7c]",
        "fsub dword [esp + 0x20]",
        "add edi,0x10",
        "fld dword [esp + 0x30]",
        "sub ebx,0x10",
        "dec ebp",
        "fmul st1",
        "mov dword [esp + 0x7c],ebp",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x18]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x14]",
        "fstp st0",
        "fstp st0"
      ],
      "ExpectedArm64ASM": [
        "ldr w9, [x8, #100]",
        "ldr s2, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w9, w9, #0x10 (16)",
        "str w9, [x8, #100]",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #44]",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur w9, [x9, #-4]",
        "str w9, [x8, #52]",
        "ldr w9, [x8, #100]",
        "ldr w9, [x9]",
        "str w9, [x8, #56]",
        "ldr w9, [x8, #100]",
        "ldr s3, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x23, x28, x22, lsl #4",
        "ldr q4, [x28, #3552]",
        "eor v3.16b, v3.16b, v4.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x8, #60]",
        "ldur s3, [x5, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s4, [x11, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldur s4, [x11, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s5, [x5, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add x12, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldur s5, [x11, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s6, [x5, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add x13, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #24]",
        "ldur s5, [x11, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x5, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #28]",
        "ldr s5, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #68]",
        "ldr s5, [x11, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #76]",
        "ldr s5, [x11]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #88]",
        "ldr s5, [x11, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x5, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #92]",
        "ldur s5, [x7, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x10, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldur s6, [x7, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s7, [x10, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add x22, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldur s6, [x10, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldur s7, [x7, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #32]",
        "ldur s6, [x10, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldur s7, [x7, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "ldr s6, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #72]",
        "ldr s6, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x10, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #80]",
        "ldr s6, [x10]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x7]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "ldr s6, [x10, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x7, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #84]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x11, #-8]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x11, #-4]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x11]",
        "ldr s6, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x11, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x10, #-8]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x10, #-4]",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x10]",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x10, #4]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x5, #-8]",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x5, #-4]",
        "ldr s3, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x5]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x5, #4]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "stur s5, [x7, #-8]",
        "ldr s5, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "stur s3, [x7, #-4]",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x7]",
        "ldr s5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x7, #4]",
        "ldr s3, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s4, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s4, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s5, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s5, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #24]",
        "ldr s5, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #28]",
        "ldr s5, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #68]",
        "ldr s5, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #76]",
        "ldr s5, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #88]",
        "ldr s5, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w9, [x8, #112]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x8, #92]",
        "ldr s5, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #16]",
        "ldr s6, [x9, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #32]",
        "ldr s6, [x9, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "ldr s6, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #72]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #80]",
        "ldr s6, [x9]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x6]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #96]",
        "ldr s6, [x9, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w9, [x8, #36]",
        "ldr s7, [x6, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #84]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #8]",
        "ldr s6, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #12]",
        "ldr s6, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9]",
        "ldr s6, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x9, #4]",
        "ldr w9, [x8, #112]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #8]",
        "ldr s3, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #68]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #72]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9]",
        "ldr s3, [x8, #76]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #80]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #4]",
        "ldr w9, [x8, #40]",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x9, #8]",
        "ldr s5, [x8, #104]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #108]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x9]",
        "ldr s5, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #40]",
        "add w7, w7, #0x10 (16)",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w5, w5, #0x10 (16)",
        "add w10, w10, #0x10 (16)",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x9, #20]",
        "ldr w9, [x8, #112]",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #112]",
        "ldr w9, [x8, #36]",
        "ldr s3, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "sub w9, w9, #0x10 (16)",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str w9, [x8, #36]",
        "ldr s4, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w9, [x8, #124]",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w11, w11, #0x10 (16)",
        "ldr s5, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "subs w6, w6, #0x10 (16)",
        "cset x14, hs",
        "subs w26, w9, #0x1 (1)",
        "rmif x14, #63, #nzCv",
        "mov x27, x9",
        "mov x9, x26",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str w9, [x8, #124]",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x6, #24]",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s3, s0",
        "str s3, [x6, #28]",
        "ldr s3, [x8, #84]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #88]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #92]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #96]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s5, s0",
        "str s5, [x6, #16]",
        "ldr s5, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x6, #20]",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]",
        "str q6, [x22, #1056]",
        "str q5, [x13, #1056]",
        "str q4, [x12, #1056]",
        "str q3, [x23, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf0f0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 409,
      "ExpectedInstructionCount": 2204,
      "x86Insts": [
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x30]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x2c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x28]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x24]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x20]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x1c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x18]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x14]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x10]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0xc]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fld dword [ebp + -0x30]",
        "fstp dword [esp + 0x40]",
        "fld dword [ebp + -0x2c]",
        "fstp dword [esp + 0x3c]",
        "fld dword [ebp + -0x28]",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + -0x24]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebp + -0x20]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebp + -0x1c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x18]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x14]",
        "fstp dword [esp + 0x24]",
        "fld dword [ebp + -0x10]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + -0xc]",
        "fstp dword [esp + 0x1c]",
        "fxch st5",
        "fstp dword [esp + 0x18]",
        "fxch st3",
        "fstp dword [esp + 0x14]",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fstp dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp],ebx",
        "call 0x0818d57a"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-48]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-44]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-40]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-36]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-32]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-28]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-24]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-20]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-16]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-12]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldur s8, [x9, #-48]",
        "str s8, [x8, #64]",
        "ldur s8, [x9, #-44]",
        "str s8, [x8, #60]",
        "ldur s8, [x9, #-40]",
        "str s8, [x8, #56]",
        "ldur s8, [x9, #-36]",
        "str s8, [x8, #52]",
        "ldur s8, [x9, #-32]",
        "str s8, [x8, #48]",
        "ldur s8, [x9, #-28]",
        "str s8, [x8, #44]",
        "ldur s8, [x9, #-24]",
        "str s8, [x8, #40]",
        "ldur s8, [x9, #-20]",
        "str s8, [x8, #36]",
        "ldur s8, [x9, #-16]",
        "str s8, [x8, #32]",
        "ldur s8, [x9, #-12]",
        "str s8, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "str w6, [x8]",
        "mov w20, #0x462",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 418,
      "ExpectedInstructionCount": 2211,
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "push ebx",
        "sub esp,0x84",
        "mov ebx,dword [ebp + 0x8]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x30]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x2c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x28]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x24]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x20]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x1c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x18]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x14]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x10]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0xc]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fld dword [ebp + -0x30]",
        "fstp dword [esp + 0x40]",
        "fld dword [ebp + -0x2c]",
        "fstp dword [esp + 0x3c]",
        "fld dword [ebp + -0x28]",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + -0x24]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebp + -0x20]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebp + -0x1c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x18]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x14]",
        "fstp dword [esp + 0x24]",
        "fld dword [ebp + -0x10]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + -0xc]",
        "fstp dword [esp + 0x1c]",
        "fxch st5",
        "fstp dword [esp + 0x18]",
        "fxch st3",
        "fstp dword [esp + 0x14]",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fstp dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp],ebx",
        "call 0x0818d57a",
        "mov eax,ebx",
        "add esp,0x84",
        "pop ebx",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "str w6, [x8, #-4]!",
        "subs w26, w8, #0x84 (132)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w6, [x9, #8]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-48]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-44]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-40]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-36]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-32]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-28]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-24]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-20]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-16]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-12]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #36]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s9",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v9.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v8.16b",
        "mov v1.16b, v9.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "ldur s8, [x9, #-48]",
        "str s8, [x8, #64]",
        "ldur s8, [x9, #-44]",
        "str s8, [x8, #60]",
        "ldur s8, [x9, #-40]",
        "str s8, [x8, #56]",
        "ldur s8, [x9, #-36]",
        "str s8, [x8, #52]",
        "ldur s8, [x9, #-32]",
        "str s8, [x8, #48]",
        "ldur s8, [x9, #-28]",
        "str s8, [x8, #44]",
        "ldur s8, [x9, #-24]",
        "str s8, [x8, #40]",
        "ldur s8, [x9, #-20]",
        "str s8, [x8, #36]",
        "ldur s8, [x9, #-16]",
        "str s8, [x8, #32]",
        "ldur s8, [x9, #-12]",
        "str s8, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #24]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #20]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #16]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #12]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x8, #4]",
        "str w6, [x8]",
        "mov w20, #0x46f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 231,
      "ExpectedInstructionCount": 1963,
      "x86Insts": [
        "fadd dword [esp + 0x40]",
        "lea edx,[ecx + ecx*0x2]",
        "lea esi,[edx + ecx*0x2]",
        "lea ebx,[ecx + -0x2]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x34]",
        "lea edi,[esi + ecx*0x2]",
        "fadd dword [esp + 0x40]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + esi*0x4 + -0x8]",
        "fadd dword [eax + ebx*0x4]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fchs",
        "fsub dword [eax + esi*0x4 + -0x4]",
        "fld dword [eax + ebx*0x4]",
        "fsub dword [eax + esi*0x4 + -0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + -0x4]",
        "fsub dword [eax + ecx*0x4 + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fadd dword [eax + edi*0x4 + -0x8]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fadd dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fsub dword [eax + edi*0x4 + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fsub dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ebx*0x4]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + -0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + -0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + -0x4]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st3",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + -0x8]",
        "fld st3",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "faddp",
        "fstp dword [eax + esi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + -0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4]",
        "fadd dword [eax + esi*0x4]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fchs",
        "fsub dword [eax + esi*0x4 + 0x4]",
        "fld dword [eax + ecx*0x4]",
        "fsub dword [eax + esi*0x4]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + 0x4]",
        "fsub dword [eax + ecx*0x4 + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4]",
        "fadd dword [eax + edx*0x4]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fadd dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4]",
        "fsub dword [eax + edi*0x4]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fsub dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4]",
        "fadd st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld st1",
        "fadd st0,st3",
        "fmul st1",
        "fstp dword [eax + edi*0x4]",
        "fxch",
        "fsub st0,st2",
        "fmul st1",
        "fstp dword [eax + edi*0x4 + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fadd dword [eax + esi*0x4 + 0x8]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fchs",
        "fsub dword [eax + esi*0x4 + 0xc]",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fsub dword [eax + esi*0x4 + 0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + 0xc]",
        "fsub dword [eax + ecx*0x4 + 0xc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + 0x8]",
        "fadd dword [eax + edx*0x4 + 0x8]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fadd dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + 0x8]",
        "fsub dword [eax + edi*0x4 + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fsub dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4 + 0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + 0xc]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + 0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st2",
        "fmul st2",
        "fld st4",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + 0x8]",
        "fxch st2",
        "fmul st2",
        "fxch st3",
        "fmul st1",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [eax + esi*0x4 + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + 0x8]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + 0xc]",
        "pop edi",
        "pop esi",
        "fstp st0",
        "pop ebp",
        "fstp st0",
        "pop ebx",
        "add esp,0x74"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w5, w7, w7, lsl #1",
        "add w10, w5, w7, lsl #1",
        "sub w6, w7, #0x2 (2)",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w11, w10, w7, lsl #1",
        "ldr s4, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add x23, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add x12, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #44]",
        "ldr s4, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #40]",
        "add w13, w4, w10, lsl #2",
        "ldur s4, [x13, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w13, w4, w6, lsl #2",
        "ldr s5, [x13]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w13, w4, w7, lsl #2",
        "ldur s5, [x13, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x13, x28, x22, lsl #4",
        "ldr q6, [x28, #3552]",
        "eor v5.16b, v5.16b, v6.16b",
        "add w14, w4, w10, lsl #2",
        "ldur s6, [x14, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w14, w4, w6, lsl #2",
        "ldr s6, [x14]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w14, w4, w10, lsl #2",
        "ldur s7, [x14, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add x14, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldur s6, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w5, lsl #2",
        "ldur s6, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldur s7, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add x22, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #24]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w6, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "stur s7, [x15, #-4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-8]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x15, #-8]",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "eor v5.16b, v5.16b, v6.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #24]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x15]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr q7, [x28, #3552]",
        "eor v6.16b, v6.16b, v7.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr q6, [x28, #3552]",
        "eor v5.16b, v5.16b, v6.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #24]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #8]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #12]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #12]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x15, #8]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x15, #12]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #40]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x15, #12]",
        "ldp w11, w10, [x8], #8",
        "ldp w9, w6, [x8], #8",
        "mvn w27, w8",
        "adds w26, w8, #0x74 (116)",
        "cfinv",
        "mov x8, x26",
        "strb w20, [x28, #1051]",
        "str q7, [x22, #1056]",
        "str q6, [x14, #1056]",
        "str q5, [x13, #1056]",
        "str q4, [x12, #1056]",
        "str q3, [x23, #1056]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfcfc",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 222,
      "ExpectedInstructionCount": 1957,
      "x86Insts": [
        "fadd dword [esp + 0x40]",
        "lea edx,[ecx + ecx*0x2]",
        "lea esi,[edx + ecx*0x2]",
        "lea ebx,[ecx + -0x2]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x34]",
        "lea edi,[esi + ecx*0x2]",
        "fadd dword [esp + 0x40]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [eax + esi*0x4 + -0x8]",
        "fadd dword [eax + ebx*0x4]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fadd dword [eax + esi*0x4 + -0x4]",
        "fld dword [eax + ebx*0x4]",
        "fsub dword [eax + esi*0x4 + -0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fsub dword [eax + esi*0x4 + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + -0x8]",
        "fadd dword [eax + edx*0x4 + -0x8]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fadd dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fsub dword [eax + edi*0x4 + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fsub dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ebx*0x4]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + -0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + -0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + -0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st3",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + -0x8]",
        "fld st3",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "faddp",
        "fstp dword [eax + esi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + -0x8]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4]",
        "fadd dword [eax + esi*0x4]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fadd dword [eax + esi*0x4 + 0x4]",
        "fld dword [eax + ecx*0x4]",
        "fsub dword [eax + esi*0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fsub dword [eax + esi*0x4 + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edx*0x4]",
        "fadd dword [eax + edi*0x4]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fadd dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4]",
        "fsub dword [eax + edi*0x4]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fsub dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + 0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4]",
        "fadd st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld st1",
        "fadd st0,st3",
        "fmul st1",
        "fstp dword [eax + edi*0x4]",
        "fxch",
        "fsub st0,st2",
        "fmul st1",
        "fstp dword [eax + edi*0x4 + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fadd dword [eax + esi*0x4 + 0x8]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fadd dword [eax + esi*0x4 + 0xc]",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fsub dword [eax + esi*0x4 + 0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fsub dword [eax + esi*0x4 + 0xc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + 0x8]",
        "fadd dword [eax + edx*0x4 + 0x8]",
        "fld dword [eax + edi*0x4 + 0xc]",
        "fadd dword [eax + edx*0x4 + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + 0x8]",
        "fsub dword [eax + edi*0x4 + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fsub dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4 + 0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + 0xc]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + 0xc]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st2",
        "fmul st2",
        "fld st4",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + 0x8]",
        "fxch st2",
        "fmul st2",
        "fxch st3",
        "fmul st1",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [eax + esi*0x4 + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + 0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + 0xc]",
        "pop edi",
        "pop esi",
        "fstp st0",
        "pop ebp",
        "fstp st0",
        "pop ebx",
        "add esp,0x74"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w5, w7, w7, lsl #1",
        "add w10, w5, w7, lsl #1",
        "sub w6, w7, #0x2 (2)",
        "ldr s3, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #52]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w11, w10, w7, lsl #1",
        "ldr s4, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add x23, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #116]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #56]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add x12, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #48]",
        "ldr s4, [x8, #60]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #120]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x8, #44]",
        "add w13, w4, w10, lsl #2",
        "ldur s4, [x13, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w13, w4, w6, lsl #2",
        "ldr s5, [x13]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w13, w4, w7, lsl #2",
        "ldur s5, [x13, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w13, w4, w10, lsl #2",
        "ldur s6, [x13, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add x13, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w14, w4, w6, lsl #2",
        "ldr s6, [x14]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w14, w4, w10, lsl #2",
        "ldur s7, [x14, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add x14, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldur s6, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldur s6, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "add x22, x28, x22, lsl #4",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w6, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "stur s7, [x15, #-4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-8]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x15, #-8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "stur s6, [x15, #-8]",
        "ldr s6, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "stur s4, [x15, #-4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w5, lsl #2",
        "ldr s6, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #4]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr s7, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x15]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #64]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "ldr q7, [x28, #3552]",
        "eor v6.16b, v6.16b, v7.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s6, s0",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "ldr s8, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s8",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v8.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v8.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x8, #20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #8]",
        "ldr s7, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s7",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "add w15, w4, w7, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v7.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s7, s0",
        "str s7, [x15, #12]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #16]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w5, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #12]",
        "ldr s4, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldr s6, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s6",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v6.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v7.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "mov v1.16b, v7.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v6.16b, v0.16b",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v6.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x15, #8]",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1049]",
        "add w15, w4, w10, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x15, #12]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #24]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x8, #28]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #32]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s4, s0",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #44]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x8, #48]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v5.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "mov v1.16b, v5.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "add w15, w4, w11, lsl #2",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v4.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s8, s0",
        "str s8, [x15, #12]",
        "ldp w11, w10, [x8], #8",
        "ldp w9, w6, [x8], #8",
        "mvn w27, w8",
        "adds w26, w8, #0x74 (116)",
        "cfinv",
        "mov x8, x26",
        "strb w20, [x28, #1051]",
        "str q7, [x22, #1056]",
        "str q6, [x14, #1056]",
        "str q5, [x13, #1056]",
        "str q4, [x12, #1056]",
        "str q3, [x23, #1056]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfcfc",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 420,
      "ExpectedInstructionCount": 1954,
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "sub esp,0x14",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x78",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x38",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x7c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x3c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x78",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x78",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x38",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x7c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x7c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x3c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x38]",
        "mov eax,dword [ebp + -0x8]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x3c]",
        "mov eax,dword [ebp + -0x4]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x70",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x30",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x74",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x34",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x70",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x70",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x30",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x74",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x74",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x34",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x30",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553144]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x34",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x68",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x28",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x6c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x2c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x68",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x68",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x28",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x6c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x6c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x2c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x28",
        "fld dword [ebp + -0x8]",
        "fsub dword [ebp + -0x4]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x2c",
        "fld dword [ebp + -0x8]",
        "fadd dword [ebp + -0x4]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x60",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x20",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x64",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x24",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x60",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x60",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x20",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x64",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x64",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x24",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x20",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x0855314c]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x24",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x58",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x18",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x1c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x5c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x58",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x58",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x18",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x5c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x5c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x1c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x18]",
        "mov eax,dword [ebp + -0x4]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x1c]",
        "mov eax,dword [ebp + -0x8]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x10",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x50",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x14",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x54",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x50",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x50",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x10",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x54",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x54",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x14",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x10",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x14",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x0855314c]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x8",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x48",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0xc",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x48",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x48",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x8",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x4c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0xc",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x8",
        "fld dword [ebp + -0x4]",
        "fadd dword [ebp + -0x8]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0xc",
        "fld dword [ebp + -0x4]",
        "fsub dword [ebp + -0x8]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x44",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x40]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax]",
        "faddp",
        "fstp dword [edx]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x44",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x44",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x4",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x8]",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553144]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "mov dword [esp],eax",
        "call 0x0816de98",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "mov dword [esp],eax",
        "call 0x0816de98",
        "leave"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "sub w8, w8, #0x14 (20)",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x78 (120)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x38 (56)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x7c (124)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x3c (60)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x78 (120)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x78 (120)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x38 (56)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x7c (124)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x7c (124)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x3c (60)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x38 (56)",
        "ldur w4, [x9, #-8]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x3c (60)",
        "ldur w4, [x9, #-4]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x70 (112)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x30 (48)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x74 (116)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x34 (52)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x70 (112)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x70 (112)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x30 (48)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x74 (116)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x74 (116)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x34 (52)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x30 (48)",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w20, #0x3140",
        "movk w20, #0x855, lsl #16",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov w21, #0x3144",
        "movk w21, #0x855, lsl #16",
        "ldr s4, [x21]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x34 (52)",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w22, #0x3148",
        "movk w22, #0x855, lsl #16",
        "ldr s3, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x68 (104)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x28 (40)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x6c (108)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x2c (44)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x68 (104)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x68 (104)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x28 (40)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x6c (108)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x6c (108)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x2c (44)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x28 (40)",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "mov w23, #0x313c",
        "movk w23, #0x855, lsl #16",
        "ldr s3, [x23]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x2c (44)",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x23]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x60 (96)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x20 (32)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x64 (100)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x24 (36)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x60 (96)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x60 (96)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x20 (32)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x64 (100)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x64 (100)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x24 (36)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x20 (32)",
        "ldur s2, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov w12, #0x314c",
        "movk w12, #0x855, lsl #16",
        "ldr s4, [x12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x24 (36)",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x58 (88)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x18 (24)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x1c (28)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x5c (92)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x58 (88)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x58 (88)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x18 (24)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x5c (92)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x5c (92)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x1c (28)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x18 (24)",
        "ldur w4, [x9, #-4]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x1c (28)",
        "ldur w4, [x9, #-8]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x10 (16)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x50 (80)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x14 (20)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x54 (84)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x50 (80)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x50 (80)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x10 (16)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x54 (84)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x54 (84)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x14 (20)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x10 (16)",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x14 (20)",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x8 (8)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x48 (72)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0xc (12)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4c (76)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x48 (72)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x48 (72)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x8 (8)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4c (76)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x4c (76)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0xc (12)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x8 (8)",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x23]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0xc (12)",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x23]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x40 (64)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4 (4)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x44 (68)",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x40 (64)",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x40 (64)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "ldr s3, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x44 (68)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x44 (68)",
        "ldr s2, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x4 (4)",
        "ldr s3, [x5]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x22]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr w4, [x9, #8]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "adds w26, w4, #0x4 (4)",
        "mov x27, x4",
        "mov x4, x26",
        "ldur s2, [x9, #-4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x20]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldur s3, [x9, #-8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x21]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v4.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "str w4, [x8]",
        "mov w20, #0x47c",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "cfinv",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xe0e0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "CSSC",
      "AFP"
    ]
  },
  "Instructions": {
    "fadd dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom dword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd8 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp dword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xd8 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom st0, st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xd8 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp st0, st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd8 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xd8 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdiv st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fld dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd9 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst dword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]"
      ]
    },
    "fstp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd9 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldenv [rax]": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "add x20, x4, #0x4 (4)",
        "ldr w20, [x20]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "add x20, x4, #0x8 (8)",
        "ldr w20, [x20]",
        "and w20, w20, w20, lsr #1",
        "mov w21, #0x55555555",
        "bic w20, w21, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]"
      ]
    },
    "fnstenv [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd9 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "str w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "str w20, [x4, #4]",
        "ldrb w20, [x28, #1202]",
        "orr w20, w20, w20, lsl #4",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsl #2",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsl #1",
        "and w20, w20, #0x55555555",
        "orr w20, w20, w20, lsl #1",
        "eor w20, w20, #0xffff",
        "str w20, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]"
      ]
    },
    "fnstcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]"
      ]
    },
    "fld st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st3": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st4": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st5": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st6": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fxch st0, st0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xd9 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1049]"
      ]
    },
    "fxch st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fnop": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xd9 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fchs": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xd9 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fabs": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xd9 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "ldr q3, [x28, #3552]",
        "bic v2.16b, v2.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ftst": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "mov w20, #0x0",
        "fmov d3, x20",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fxam": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "ubfx x21, x21, #15, #1",
        "strb w21, [x28, #1049]",
        "ldrb w21, [x28, #1202]",
        "lsr w20, w21, w20",
        "and w20, w20, #0x1",
        "mrs x21, nzcv",
        "cmp w20, #0x1 (1)",
        "cset x22, ne",
        "strb w22, [x28, #1048]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fld1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3328]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2t": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3344]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2e": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3360]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldpi": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3376]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldlg2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3392]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldln2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3408]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldz": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "f2xm1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1856]",
        "ldr x3, [x28, #1864]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fyl2x": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2080]",
        "ldr x3, [x28, #2088]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]"
      ]
    },
    "fptan": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1872]",
        "ldr x3, [x28, #1880]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3328]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q3, [x22, #1056]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fpatan": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2096]",
        "ldr x3, [x28, #2104]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]"
      ]
    },
    "fxtract": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xd9 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1952]",
        "ldr x3, [x28, #1960]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1968]",
        "ldr x3, [x28, #1976]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q2, [x22, #1056]",
        "str q3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fprem1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2112]",
        "ldr x3, [x28, #2120]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fdecstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fincstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fprem": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2128]",
        "ldr x3, [x28, #2136]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fyl2xp1": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3328]",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2080]",
        "ldr x3, [x28, #2088]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb w20, [x28, #1051]",
        "str q3, [x22, #1056]",
        "str q2, [x21, #1056]"
      ]
    },
    "fsqrt": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1888]",
        "ldr x3, [x28, #1896]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsincos": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1936]",
        "ldr x3, [x28, #1944]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov v4.16b, v1.16b",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q4, [x22, #1056]",
        "str q3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frndint": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1840]",
        "ldr x3, [x28, #1848]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fscale": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2144]",
        "ldr x3, [x28, #2152]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsin": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1904]",
        "ldr x3, [x28, #1912]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fcos": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1920]",
        "ldr x3, [x28, #1928]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fiadd dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fimul dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ficom dword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xda !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "ficomp dword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xda !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fisubr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidiv dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidivr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xd0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xda 11b 0xd8 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xd9 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xda /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdb /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdc /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdd /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xde /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdf /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fucompp": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xda 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild dword [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "sxtw x20, w20",
        "mrs x21, nzcv",
        "mov w22, #0x0",
        "cmp x20, #0x0 (0)",
        "mov w23, #0x8000",
        "csel x23, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov w24, #0x3f",
        "mov x0, #0x3f",
        "clz x30, x20",
        "sub x30, x0, x30",
        "sub x24, x24, x30",
        "lsl x30, x20, x24",
        "mov w18, #0x403e",
        "sub x24, x18, x24",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x24, eq",
        "orr x20, x23, x20",
        "fmov d2, x30",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdb !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1760]",
        "ldr x3, [x28, #1768]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist dword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1712]",
        "ldr x3, [x28, #1720]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "str w20, [x4]"
      ]
    },
    "fistp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1712]",
        "ldr x3, [x28, #1720]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld tword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp tword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcmovnb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fnclex": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xdb 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1040]"
      ]
    },
    "fninit": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdb 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fucomi st0, st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomi st0, st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomi st0, st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdb 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "rmif x21, #63, #nzCv",
        "rmif x22, #62, #nZcv",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]"
      ]
    },
    "fadd qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom qword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdc !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp qword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xdc !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fadd st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fmul st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xe0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fsubr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xe8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fsub st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xf0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fdivr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xf8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fdiv st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fld qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1776]",
        "ldr x3, [x28, #1784]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "str x21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst qword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x4]"
      ]
    },
    "fstp qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frstor [rax]": {
      "ExpectedInstructionCount": 76,
      "Comment": [
        "0xdd !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w22, w20, #8, #1",
        "ubfx w23, w20, #9, #1",
        "ubfx w24, w20, #10, #1",
        "ubfx w30, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w22, [x28, #1048]",
        "strb w23, [x28, #1049]",
        "strb w24, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w22, #0x55555555",
        "bic w20, w22, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]",
        "mov x20, #0xffffffffffffffff",
        "mov w22, #0xffff",
        "fmov d2, x20",
        "fmov v2.D[1], x22",
        "ldur q3, [x4, #28]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x21, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #38]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr q3, [x4, #48]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #58]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #68]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #78]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #88]",
        "and v2.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q2, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur d2, [x4, #98]",
        "ldr h3, [x4, #106]",
        "mov v2.h[4], v3.h[0]",
        "add x0, x28, x20, lsl #4",
        "str q2, [x0, #1056]"
      ]
    },
    "fnsave [rax]": {
      "ExpectedInstructionCount": 79,
      "Comment": [
        "0xdd !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrh w21, [x28, #1200]",
        "str w21, [x4]",
        "ldrb w21, [x28, #1051]",
        "lsl x21, x21, #11",
        "ldrb w22, [x28, #1048]",
        "orr x21, x21, x22, lsl #8",
        "ldrb w22, [x28, #1049]",
        "orr x21, x21, x22, lsl #9",
        "ldrb w22, [x28, #1050]",
        "orr x21, x21, x22, lsl #10",
        "ldrb w22, [x28, #1054]",
        "orr x21, x21, x22, lsl #14",
        "ldrb w22, [x28, #1040]",
        "orr x21, x21, x22",
        "str w21, [x4, #4]",
        "ldrb w21, [x28, #1202]",
        "orr w21, w21, w21, lsl #4",
        "and w21, w21, #0xf0f0f0f",
        "orr w21, w21, w21, lsl #2",
        "and w21, w21, #0x33333333",
        "orr w21, w21, w21, lsl #1",
        "and w21, w21, #0x55555555",
        "orr w21, w21, w21, lsl #1",
        "eor w21, w21, #0xffff",
        "str w21, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #28]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #38]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #58]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #68]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #78]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #88]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur d2, [x4, #98]",
        "dup v2.8h, v2.h[4]",
        "str h2, [x4, #106]",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fnstsw [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4]"
      ]
    },
    "fld and fnstsw": {
      "x86InstructionCount": 5,
      "ExpectedInstructionCount": 83,
      "x86Insts": [
        "fld dword [rax]",
        "fld dword [rax + 4]",
        "fld dword [rax + 8]",
        "fld dword [rax + 12]",
        "fnstsw [rbx]"
      ],
      "ExpectedArm64ASM": [
        "sub sp, sp, #0x20 (32)",
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr s3, [x4, #4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s3",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "ldr s4, [x4, #8]",
        "str x30, [sp, #-16]!",
        "fmov s0, s4",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v4.16b, v0.16b",
        "ldr s5, [x4, #12]",
        "str x30, [sp, #-16]!",
        "fmov s0, s5",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v5.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q5, [x21, #1056]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "add x22, x28, x22, lsl #4",
        "str q4, [x22, #1056]",
        "add w23, w20, #0x2 (2)",
        "and w23, w23, #0x7",
        "add x23, x28, x23, lsl #4",
        "str q3, [x23, #1056]",
        "add w24, w20, #0x3 (3)",
        "and w24, w24, #0x7",
        "add x24, x28, x24, lsl #4",
        "str q2, [x24, #1056]",
        "ldrb w30, [x28, #1202]",
        "mov w18, #0x8",
        "sub w18, w18, w20",
        "str x24, [sp]",
        "mov w24, #0xf0f",
        "lsr w24, w24, w18",
        "orr w24, w30, w24",
        "strb w24, [x28, #1202]",
        "lsl x30, x20, #11",
        "ldrb w18, [x28, #1048]",
        "orr x30, x30, x18, lsl #8",
        "ldrb w18, [x28, #1049]",
        "orr x30, x30, x18, lsl #9",
        "ldrb w18, [x28, #1050]",
        "orr x30, x30, x18, lsl #10",
        "ldrb w18, [x28, #1054]",
        "orr x30, x30, x18, lsl #14",
        "ldrb w18, [x28, #1040]",
        "orr x30, x30, x18",
        "strh w30, [x6]",
        "str q5, [x21, #1056]",
        "str q4, [x22, #1056]",
        "str q3, [x23, #1056]",
        "ldr x21, [sp]",
        "str q2, [x21, #1056]",
        "mov w21, #0x8",
        "sub w20, w21, w20",
        "mov w21, #0xf0f",
        "lsr w20, w21, w20",
        "orr w20, w24, w20",
        "strb w20, [x28, #1202]",
        "add sp, sp, #0x20 (32)"
      ]
    },
    "ffree st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdd 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st3": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st4": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st5": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st6": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st7": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xdd 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fst st1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st3": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st4": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st5": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st6": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st0": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdd 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucom st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdd 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomp st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdd 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdd 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fiadd word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fimul word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ficom word [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xde !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "ficomp word [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xde !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fisubr word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidiv word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidivr word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "faddp st0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st3": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st4": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st5": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st6": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st7": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st3": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st4": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st5": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st6": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st7": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcompp": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xde 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fsubrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr q3, [x23, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe8": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fsubp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fdivrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr q3, [x23, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf8": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fdivp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild word [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "mrs x21, nzcv",
        "mov w22, #0x0",
        "cmp x20, #0x0 (0)",
        "mov w23, #0x8000",
        "csel x23, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov w24, #0x3f",
        "mov x0, #0x3f",
        "clz x30, x20",
        "sub x30, x0, x30",
        "sub x24, x24, x30",
        "lsl x30, x20, x24",
        "mov w18, #0x403e",
        "sub x24, x18, x24",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x24, eq",
        "orr x20, x23, x20",
        "fmov d2, x30",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp word [rax]": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xdf !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "and x21, x21, #0x7fff",
        "mrs x22, nzcv",
        "tst x21, #0x7fff",
        "cset x23, eq",
        "mov w24, #0x400e",
        "cmp x21, x24",
        "cset x21, hs",
        "orr x21, x23, x21",
        "strb w21, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1744]",
        "ldr x3, [x28, #1752]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist word [rax]": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xdf !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "mov x20, v2.d[1]",
        "and x20, x20, #0x7fff",
        "mrs x21, nzcv",
        "tst x20, #0x7fff",
        "cset x22, eq",
        "mov w23, #0x400e",
        "cmp x20, x23",
        "cset x20, hs",
        "orr x20, x22, x20",
        "strb w20, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1696]",
        "ldr x3, [x28, #1704]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "strh w20, [x4]",
        "msr nzcv, x21"
      ]
    },
    "fistp word [rax]": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xdf !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "and x21, x21, #0x7fff",
        "mrs x22, nzcv",
        "tst x21, #0x7fff",
        "cset x23, eq",
        "mov w24, #0x400e",
        "cmp x21, x24",
        "cset x21, hs",
        "orr x21, x23, x21",
        "strb w21, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1696]",
        "ldr x3, [x28, #1704]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbld tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdf !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #2000]",
        "ldr x3, [x28, #2008]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbstp tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdf !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1984]",
        "ldr x3, [x28, #1992]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffreep st0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st5": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st6": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fnstsw ax": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "fucomip st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdf 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdf 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "eor x23, x23, #0x1",
        "rmif x23, #63, #nzCv",
        "rmif x24, #62, #nZcv",
        "eor w26, w22, #0x1",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdf 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdf 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "eor x23, x23, #0x1",
        "rmif x23, #63, #nzCv",
        "rmif x24, #62, #nZcv",
        "eor w26, w22, #0x1",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "rmif x22, #63, #nzCv",
        "rmif x23, #62, #nZcv",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_32": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld dword [rax]",
        "fstp dword [rdx]",
        "fld dword [rax + 4]",
        "fstp dword [rdx + 4]",
        "fld dword [rax + 8]",
        "fstp dword [rdx + 8]",
        "fld dword [rax + 12]",
        "fstp dword [rdx + 12]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str s2, [x5]",
        "ldr s2, [x4, #4]",
        "str s2, [x5, #4]",
        "ldr s2, [x4, #8]",
        "str s2, [x5, #8]",
        "ldr s2, [x4, #12]",
        "str s2, [x5, #12]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_64": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld qword [rax]",
        "fstp qword [rdx]",
        "fld qword [rax + 8]",
        "fstp qword [rdx + 8]",
        "fld qword [rax + 16]",
        "fstp qword [rdx + 16]",
        "fld qword [rax + 32]",
        "fstp qword [rdx + 32]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str d2, [x5]",
        "ldr d2, [x4, #8]",
        "str d2, [x5, #8]",
        "ldr d2, [x4, #16]",
        "str d2, [x5, #16]",
        "ldr d2, [x4, #32]",
        "str d2, [x5, #32]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_80": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 38,
      "x86Insts": [
        "fld tword [rax]",
        "fstp tword [rdx]",
        "fld tword [rax + 10]",
        "fstp tword [rdx + 10]",
        "fld tword [rax + 20]",
        "fstp tword [rdx + 20]",
        "fld tword [rax + 30]",
        "fstp tword [rdx + 30]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str d2, [x5]",
        "mov x20, v2.d[1]",
        "strh w20, [x5, #8]",
        "add x20, x4, #0xa (10)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #10]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0xa (10)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x14 (20)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #20]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x14 (20)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x1e (30)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #30]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x1e (30)",
        "strh w20, [x21, #8]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87_f64-Crysis2Max-fmodel.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "sub esp,0x104",
        "mov eax,dword  [ebp + 0x10]",
        "fld dword  [eax]",
        "mov ecx,dword  [0x100de354]",
        "fadd dword  [eax + 0x7c]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [eax]",
        "fsub dword  [eax + 0x7c]",
        "fmul dword  [ecx]",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [eax + 0x78]",
        "fadd dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [eax + 0x4]",
        "fsub dword  [eax + 0x78]",
        "fmul dword  [ecx + 0x4]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [eax + 0x74]",
        "fadd dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [eax + 0x8]",
        "fsub dword  [eax + 0x74]",
        "fmul dword  [ecx + 0x8]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [eax + 0x70]",
        "fadd dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [eax + 0xc]",
        "fsub dword  [eax + 0x70]",
        "fmul dword  [ecx + 0xc]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [eax + 0x6c]",
        "fadd dword  [eax + 0x10]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [eax + 0x10]",
        "fsub dword  [eax + 0x6c]",
        "fmul dword  [ecx + 0x10]",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [eax + 0x68]",
        "fadd dword  [eax + 0x14]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [eax + 0x14]",
        "fsub dword  [eax + 0x68]",
        "fmul dword  [ecx + 0x14]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [eax + 0x64]",
        "fadd dword  [eax + 0x18]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [eax + 0x18]",
        "fsub dword  [eax + 0x64]",
        "fmul dword  [ecx + 0x18]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [eax + 0x60]",
        "fadd dword  [eax + 0x1c]",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [eax + 0x1c]",
        "fsub dword  [eax + 0x60]",
        "fmul dword  [ecx + 0x1c]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [eax + 0x5c]",
        "fadd dword  [eax + 0x20]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [eax + 0x20]",
        "fsub dword  [eax + 0x5c]",
        "fmul dword  [ecx + 0x20]",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [eax + 0x58]",
        "fadd dword  [eax + 0x24]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [eax + 0x24]",
        "fsub dword  [eax + 0x58]",
        "fmul dword  [ecx + 0x24]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [eax + 0x54]",
        "fadd dword  [eax + 0x28]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [eax + 0x28]",
        "fsub dword  [eax + 0x54]",
        "fmul dword  [ecx + 0x28]",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [eax + 0x50]",
        "fadd dword  [eax + 0x2c]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [eax + 0x2c]",
        "fsub dword  [eax + 0x50]",
        "fmul dword  [ecx + 0x2c]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [eax + 0x4c]",
        "fadd dword  [eax + 0x30]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [eax + 0x30]",
        "fsub dword  [eax + 0x4c]",
        "fmul dword  [ecx + 0x30]",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [eax + 0x48]",
        "fadd dword  [eax + 0x34]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [eax + 0x34]",
        "fsub dword  [eax + 0x48]",
        "fmul dword  [ecx + 0x34]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [eax + 0x44]",
        "fadd dword  [eax + 0x38]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [eax + 0x38]",
        "fsub dword  [eax + 0x44]",
        "fmul dword  [ecx + 0x38]",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [eax + 0x40]",
        "fadd dword  [eax + 0x3c]",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [eax + 0x3c]",
        "fsub dword  [eax + 0x40]",
        "mov eax,[0x100de358]",
        "fmul dword  [ecx + 0x3c]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + -0x44]",
        "fadd dword  [ebp + -0x80]",
        "fstp dword  [ebp + 0xffffff00]",
        "fld dword  [ebp + -0x80]",
        "fsub dword  [ebp + -0x44]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + 0xffffff3c]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x7c]",
        "fstp dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + -0x7c]",
        "fsub dword  [ebp + -0x48]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xffffff38]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x78]",
        "fstp dword  [ebp + 0xffffff08]",
        "fld dword  [ebp + -0x78]",
        "fsub dword  [ebp + -0x4c]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + 0xffffff34]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x74]",
        "fstp dword  [ebp + 0xffffff0c]",
        "fld dword  [ebp + -0x74]",
        "fsub dword  [ebp + -0x50]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + 0xffffff30]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + 0xffffff10]",
        "fld dword  [ebp + -0x70]",
        "fsub dword  [ebp + -0x54]",
        "fmul dword  [eax + 0x10]",
        "fstp dword  [ebp + 0xffffff2c]",
        "fld dword  [ebp + -0x58]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + 0xffffff14]",
        "fld dword  [ebp + -0x6c]",
        "fsub dword  [ebp + -0x58]",
        "fmul dword  [eax + 0x14]",
        "fstp dword  [ebp + 0xffffff28]",
        "fld dword  [ebp + -0x5c]",
        "fadd dword  [ebp + -0x68]",
        "fstp dword  [ebp + 0xffffff18]",
        "fld dword  [ebp + -0x68]",
        "fsub dword  [ebp + -0x5c]",
        "fmul dword  [eax + 0x18]",
        "fstp dword  [ebp + 0xffffff24]",
        "fld dword  [ebp + -0x60]",
        "fadd dword  [ebp + -0x64]",
        "fstp dword  [ebp + 0xffffff1c]",
        "fld dword  [ebp + -0x64]",
        "fsub dword  [ebp + -0x60]",
        "fmul dword  [eax + 0x1c]",
        "fstp dword  [ebp + 0xffffff20]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x4]",
        "fstp dword  [ebp + 0xffffff40]",
        "fld dword  [ebp + -0x4]",
        "fsub dword  [ebp + -0x40]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + 0xffffff7c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + 0xffffff44]",
        "fld dword  [ebp + -0x8]",
        "fsub dword  [ebp + -0x3c]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xffffff78]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + 0xffffff48]",
        "fld dword  [ebp + -0xc]",
        "fsub dword  [ebp + -0x38]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + 0xffffff74]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + 0xffffff4c]",
        "fld dword  [ebp + -0x10]",
        "fsub dword  [ebp + -0x34]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + 0xffffff70]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + 0xffffff50]",
        "fld dword  [ebp + -0x14]",
        "fsub dword  [ebp + -0x30]",
        "fmul dword  [eax + 0x10]",
        "fstp dword  [ebp + 0xffffff6c]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [ebp + 0xffffff54]",
        "fld dword  [ebp + -0x18]",
        "fsub dword  [ebp + -0x2c]",
        "fmul dword  [eax + 0x14]",
        "fstp dword  [ebp + 0xffffff68]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ebp + 0xffffff58]",
        "fld dword  [ebp + -0x1c]",
        "fsub dword  [ebp + -0x28]",
        "fmul dword  [eax + 0x18]",
        "fstp dword  [ebp + 0xffffff64]",
        "fld dword  [ebp + -0x24]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [ebp + 0xffffff5c]",
        "fld dword  [ebp + -0x20]",
        "fsub dword  [ebp + -0x24]",
        "fmul dword  [eax + 0x1c]",
        "mov eax,[0x100de35c]",
        "fstp dword  [ebp + 0xffffff60]",
        "fld dword  [ebp + 0xffffff1c]",
        "fadd dword  [ebp + 0xffffff00]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [ebp + 0xffffff00]",
        "fsub dword  [ebp + 0xffffff1c]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [ebp + 0xffffff18]",
        "fadd dword  [ebp + 0xffffff04]",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [ebp + 0xffffff04]",
        "fsub dword  [ebp + 0xffffff18]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + 0xffffff14]",
        "fadd dword  [ebp + 0xffffff08]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff08]",
        "fsub dword  [ebp + 0xffffff14]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff10]",
        "fadd dword  [ebp + 0xffffff0c]",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [ebp + 0xffffff0c]",
        "fsub dword  [ebp + 0xffffff10]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + 0xffffff20]",
        "fadd dword  [ebp + 0xffffff3c]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + 0xffffff3c]",
        "fsub dword  [ebp + 0xffffff20]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [ebp + 0xffffff24]",
        "fadd dword  [ebp + 0xffffff38]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + 0xffffff38]",
        "fsub dword  [ebp + 0xffffff24]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + 0xffffff28]",
        "fadd dword  [ebp + 0xffffff34]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff34]",
        "fsub dword  [ebp + 0xffffff28]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff2c]",
        "fadd dword  [ebp + 0xffffff30]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + 0xffffff30]",
        "fsub dword  [ebp + 0xffffff2c]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + 0xffffff5c]",
        "fadd dword  [ebp + 0xffffff40]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + 0xffffff40]",
        "fsub dword  [ebp + 0xffffff5c]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [ebp + 0xffffff58]",
        "fadd dword  [ebp + 0xffffff44]",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [ebp + 0xffffff44]",
        "fsub dword  [ebp + 0xffffff58]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + 0xffffff54]",
        "fadd dword  [ebp + 0xffffff48]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff48]",
        "fsub dword  [ebp + 0xffffff54]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff50]",
        "fadd dword  [ebp + 0xffffff4c]",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [ebp + 0xffffff4c]",
        "fsub dword  [ebp + 0xffffff50]",
        "fmul dword  [eax + 0xc]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + 0xffffff60]",
        "fadd dword  [ebp + 0xffffff7c]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + 0xffffff7c]",
        "fsub dword  [ebp + 0xffffff60]",
        "fmul dword  [eax]",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [ebp + 0xffffff64]",
        "fadd dword  [ebp + 0xffffff78]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + 0xffffff78]",
        "fsub dword  [ebp + 0xffffff64]",
        "fmul dword  [eax + 0x4]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + 0xffffff68]",
        "fadd dword  [ebp + 0xffffff74]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff74]",
        "fsub dword  [ebp + 0xffffff68]",
        "fmul dword  [eax + 0x8]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + 0xffffff6c]",
        "fadd dword  [ebp + 0xffffff70]",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + 0xffffff70]",
        "fsub dword  [ebp + 0xffffff6c]",
        "fmul dword  [eax + 0xc]",
        "mov eax,[0x100de360]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [eax]",
        "fstp dword  [ebp + 0x10]",
        "fld dword  [eax + 0x4]",
        "fstp dword  [ebp + 0xfffffefc]",
        "fld dword  [ebp + -0x74]",
        "fadd dword  [ebp + -0x80]",
        "fstp dword  [ebp + 0xffffff00]",
        "fld dword  [ebp + -0x80]",
        "fsub dword  [ebp + -0x74]",
        "fld dword  [ebp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff0c]",
        "fld dword  [ebp + -0x78]",
        "fadd dword  [ebp + -0x7c]",
        "fstp dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + -0x7c]",
        "fsub dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xfffffefc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff08]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + 0xffffff10]",
        "fld dword  [ebp + -0x64]",
        "fsub dword  [ebp + -0x70]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff1c]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + 0xffffff14]",
        "fld dword  [ebp + -0x68]",
        "fsub dword  [ebp + -0x6c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff18]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x60]",
        "fstp dword  [ebp + 0xffffff20]",
        "fld dword  [ebp + -0x60]",
        "fsub dword  [ebp + -0x54]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff2c]",
        "fld dword  [ebp + -0x58]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + 0xffffff24]",
        "fld dword  [ebp + -0x5c]",
        "fsub dword  [ebp + -0x58]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff28]",
        "fld dword  [ebp + -0x44]",
        "fadd dword  [ebp + -0x50]",
        "fstp dword  [ebp + 0xffffff30]",
        "fld dword  [ebp + -0x44]",
        "fsub dword  [ebp + -0x50]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff3c]",
        "fld dword  [ebp + -0x48]",
        "mov eax,[0x100de364]",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + 0xffffff34]",
        "fld dword  [ebp + -0x48]",
        "fsub dword  [ebp + -0x4c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff38]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x34]",
        "fstp dword  [ebp + 0xffffff40]",
        "fld dword  [ebp + -0x40]",
        "fsub dword  [ebp + -0x34]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff4c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x38]",
        "fstp dword  [ebp + 0xffffff44]",
        "fld dword  [ebp + -0x3c]",
        "fsub dword  [ebp + -0x38]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff48]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x24]",
        "fstp dword  [ebp + 0xffffff50]",
        "fld dword  [ebp + -0x24]",
        "fsub dword  [ebp + -0x30]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff5c]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + 0xffffff54]",
        "fld dword  [ebp + -0x28]",
        "fsub dword  [ebp + -0x2c]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff58]",
        "fld dword  [ebp + -0x20]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + 0xffffff60]",
        "fld dword  [ebp + -0x20]",
        "fsub dword  [ebp + -0x14]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + 0xffffff6c]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [ebp + 0xffffff64]",
        "fld dword  [ebp + -0x1c]",
        "fsub dword  [ebp + -0x18]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + 0xffffff68]",
        "fld dword  [ebp + -0x10]",
        "fadd dword  [ebp + -0x4]",
        "fstp dword  [ebp + 0xffffff70]",
        "fld dword  [ebp + -0x4]",
        "fsub dword  [ebp + -0x10]",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + 0xffffff7c]",
        "fld dword  [ebp + -0xc]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + 0xffffff74]",
        "fld dword  [ebp + -0x8]",
        "fsub dword  [ebp + -0xc]",
        "fmulp",
        "fstp dword  [ebp + 0xffffff78]",
        "fld dword  [eax]",
        "fstp dword  [ebp + 0x10]",
        "fld dword  [ebp + 0xffffff04]",
        "fadd dword  [ebp + 0xffffff00]",
        "fstp dword  [ebp + -0x80]",
        "fld dword  [ebp + 0xffffff00]",
        "fsub dword  [ebp + 0xffffff04]",
        "fld dword  [ebp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword  [ebp + -0x7c]",
        "fld dword  [ebp + 0xffffff0c]",
        "fadd dword  [ebp + 0xffffff08]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff0c]",
        "fsub dword  [ebp + 0xffffff08]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x74]",
        "fld dword  [ebp + -0x74]",
        "fadd dword  [ebp + -0x78]",
        "fstp dword  [ebp + -0x78]",
        "fld dword  [ebp + 0xffffff14]",
        "fadd dword  [ebp + 0xffffff10]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + 0xffffff10]",
        "fsub dword  [ebp + 0xffffff14]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff1c]",
        "fadd dword  [ebp + 0xffffff18]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + 0xffffff1c]",
        "fsub dword  [ebp + 0xffffff18]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x64]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x68]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x70]",
        "fstp dword  [ebp + -0x70]",
        "fld dword  [ebp + -0x68]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + -0x68]",
        "fld dword  [ebp + -0x64]",
        "fadd dword  [ebp + -0x6c]",
        "fstp dword  [ebp + -0x6c]",
        "fld dword  [ebp + 0xffffff20]",
        "fadd dword  [ebp + 0xffffff24]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + 0xffffff20]",
        "fsub dword  [ebp + 0xffffff24]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + 0xffffff28]",
        "fadd dword  [ebp + 0xffffff2c]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff2c]",
        "fsub dword  [ebp + 0xffffff28]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + -0x54]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + 0xffffff30]",
        "fadd dword  [ebp + 0xffffff34]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + 0xffffff30]",
        "fsub dword  [ebp + 0xffffff34]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff38]",
        "fadd dword  [ebp + 0xffffff3c]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + 0xffffff3c]",
        "fsub dword  [ebp + 0xffffff38]",
        "fld st1",
        "fmulp",
        "fstp dword  [ebp + -0x44]",
        "fld dword  [ebp + -0x44]",
        "fld st0",
        "fadd dword  [ebp + -0x48]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x50]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + -0x48]",
        "fld st0",
        "fadd dword  [ebp + -0x4c]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + 0xffffff44]",
        "fadd dword  [ebp + 0xffffff40]",
        "fstp dword  [ebp + -0x40]",
        "fld dword  [ebp + 0xffffff40]",
        "fsub dword  [ebp + 0xffffff44]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x3c]",
        "fld dword  [ebp + 0xffffff4c]",
        "fadd dword  [ebp + 0xffffff48]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff4c]",
        "fsub dword  [ebp + 0xffffff48]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x34]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x34]",
        "fstp dword  [ebp + -0x38]",
        "fld dword  [ebp + 0xffffff54]",
        "fadd dword  [ebp + 0xffffff50]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + 0xffffff50]",
        "fsub dword  [ebp + 0xffffff54]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff5c]",
        "fadd dword  [ebp + 0xffffff58]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + 0xffffff5c]",
        "fsub dword  [ebp + 0xffffff58]",
        "fld st2",
        "fmulp",
        "fstp dword  [ebp + -0x24]",
        "fld dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + -0x30]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x28]",
        "fstp dword  [ebp + -0x28]",
        "fld dword  [ebp + -0x2c]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0x2c]",
        "fld dword  [ebp + 0xffffff60]",
        "fadd dword  [ebp + 0xffffff64]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + 0xffffff60]",
        "fsub dword  [ebp + 0xffffff64]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + 0xffffff68]",
        "fadd dword  [ebp + 0xffffff6c]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff6c]",
        "fsub dword  [ebp + 0xffffff68]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + 0xffffff70]",
        "fadd dword  [ebp + 0xffffff74]",
        "fstp dword  [ebp + -0x10]",
        "mov eax,dword  [ebp + 0x8]",
        "fld dword  [ebp + 0xffffff70]",
        "mov ecx,dword  [ebp + 0xc]",
        "fsub dword  [ebp + 0xffffff74]",
        "fld st3",
        "fmulp",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + 0xffffff78]",
        "fadd dword  [ebp + 0xffffff7c]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + 0xffffff7c]",
        "fsub dword  [ebp + 0xffffff78]",
        "fmulp st3",
        "fxch st2",
        "fstp dword  [ebp + -0x4]",
        "fld dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x10]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [ebp + -0xc]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0xc]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + -0x80]",
        "fstp dword  [eax + 0x400]",
        "fld dword  [ebp + -0x70]",
        "fstp dword  [eax + 0x300]",
        "fld dword  [ebp + -0x78]",
        "fstp dword  [eax + 0x200]",
        "fld dword  [ebp + -0x68]",
        "fstp dword  [eax + 0x100]",
        "fld dword  [ebp + -0x7c]",
        "fstp dword  [eax]",
        "fld dword  [ebp + -0x7c]",
        "fstp dword  [ecx]",
        "fld dword  [ebp + -0x6c]",
        "fstp dword  [ecx + 0x100]",
        "fld dword  [ebp + -0x74]",
        "fstp dword  [ecx + 0x200]",
        "fld dword  [ebp + -0x64]",
        "fstp dword  [ecx + 0x300]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x60]",
        "fstp dword  [ebp + -0x60]",
        "fld dword  [ebp + -0x60]",
        "fstp dword  [eax + 0x380]",
        "fld dword  [ebp + -0x50]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x50]",
        "fld dword  [ebp + -0x50]",
        "fstp dword  [eax + 0x280]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x58]",
        "fstp dword  [ebp + -0x58]",
        "fld dword  [ebp + -0x58]",
        "fstp dword  [eax + 0x180]",
        "fld dword  [ebp + -0x48]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + -0x48]",
        "fld dword  [ebp + -0x48]",
        "fstp dword  [eax + 0x80]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x5c]",
        "fstp dword  [ebp + -0x5c]",
        "fld dword  [ebp + -0x5c]",
        "fstp dword  [ecx + 0x80]",
        "fld dword  [ebp + -0x4c]",
        "fadd dword  [ebp + -0x54]",
        "fstp dword  [ebp + -0x4c]",
        "fld dword  [ebp + -0x4c]",
        "fstp dword  [ecx + 0x180]",
        "fld st1",
        "fadd dword  [ebp + -0x54]",
        "fstp dword  [ebp + -0x54]",
        "fld dword  [ebp + -0x54]",
        "fstp dword  [ecx + 0x280]",
        "fxch",
        "fstp dword  [ecx + 0x380]",
        "fld dword  [ebp + -0x20]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + -0x20]",
        "fld dword  [ebp + -0x40]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [eax + 0x3c0]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x20]",
        "fstp dword  [eax + 0x340]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [ebp + -0x10]",
        "fld dword  [ebp + -0x30]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [eax + 0x2c0]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x10]",
        "fstp dword  [eax + 0x240]",
        "fld dword  [ebp + -0x18]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x18]",
        "fld dword  [ebp + -0x38]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [eax + 0x1c0]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x18]",
        "fstp dword  [eax + 0x140]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [ebp + -0x8]",
        "fld dword  [ebp + -0x28]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [eax + 0xc0]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x8]",
        "fstp dword  [eax + 0x40]",
        "fld dword  [ebp + -0x1c]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + -0x1c]",
        "fld dword  [ebp + -0x3c]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ecx + 0x40]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0x1c]",
        "fstp dword  [ecx + 0xc0]",
        "fld dword  [ebp + -0x14]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ebp + -0xc]",
        "fld dword  [ebp + -0x2c]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ecx + 0x140]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0xc]",
        "fstp dword  [ecx + 0x1c0]",
        "fld dword  [ebp + -0x14]",
        "fld st1",
        "faddp",
        "fstp dword  [ebp + -0x14]",
        "fld dword  [ebp + -0x34]",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ecx + 0x240]",
        "fld st1",
        "fadd dword  [ebp + -0x14]",
        "fstp dword  [ecx + 0x2c0]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword  [ecx + 0x340]",
        "fstp dword  [ecx + 0x3c0]",
        "leave",
        "ret"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "subs w26, w8, #0x104 (260)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "mov w20, #0xe354",
        "movk w20, #0x100d, lsl #16",
        "ldr w7, [x20]",
        "ldr s3, [x4, #124]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-128]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr s3, [x4, #124]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr s2, [x4, #120]",
        "fcvt d2, s2",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-124]",
        "ldr s2, [x4, #4]",
        "fcvt d2, s2",
        "ldr s3, [x4, #120]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr s2, [x4, #116]",
        "fcvt d2, s2",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-120]",
        "ldr s2, [x4, #8]",
        "fcvt d2, s2",
        "ldr s3, [x4, #116]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-12]",
        "ldr s2, [x4, #112]",
        "fcvt d2, s2",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-116]",
        "ldr s2, [x4, #12]",
        "fcvt d2, s2",
        "ldr s3, [x4, #112]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-16]",
        "ldr s2, [x4, #108]",
        "fcvt d2, s2",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-112]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr s3, [x4, #108]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #16]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-20]",
        "ldr s2, [x4, #104]",
        "fcvt d2, s2",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-108]",
        "ldr s2, [x4, #20]",
        "fcvt d2, s2",
        "ldr s3, [x4, #104]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-24]",
        "ldr s2, [x4, #100]",
        "fcvt d2, s2",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-104]",
        "ldr s2, [x4, #24]",
        "fcvt d2, s2",
        "ldr s3, [x4, #100]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #24]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-28]",
        "ldr s2, [x4, #96]",
        "fcvt d2, s2",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-100]",
        "ldr s2, [x4, #28]",
        "fcvt d2, s2",
        "ldr s3, [x4, #96]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #28]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-32]",
        "ldr s2, [x4, #92]",
        "fcvt d2, s2",
        "ldr s3, [x4, #32]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-96]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr s3, [x4, #92]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #32]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-36]",
        "ldr s2, [x4, #88]",
        "fcvt d2, s2",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-92]",
        "ldr s2, [x4, #36]",
        "fcvt d2, s2",
        "ldr s3, [x4, #88]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #36]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-40]",
        "ldr s2, [x4, #84]",
        "fcvt d2, s2",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-88]",
        "ldr s2, [x4, #40]",
        "fcvt d2, s2",
        "ldr s3, [x4, #84]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #40]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-44]",
        "ldr s2, [x4, #80]",
        "fcvt d2, s2",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-84]",
        "ldr s2, [x4, #44]",
        "fcvt d2, s2",
        "ldr s3, [x4, #80]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #44]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-48]",
        "ldr s2, [x4, #76]",
        "fcvt d2, s2",
        "ldr s3, [x4, #48]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-80]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr s3, [x4, #76]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #48]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-52]",
        "ldr s2, [x4, #72]",
        "fcvt d2, s2",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-76]",
        "ldr s2, [x4, #52]",
        "fcvt d2, s2",
        "ldr s3, [x4, #72]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #52]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-56]",
        "ldr s2, [x4, #68]",
        "fcvt d2, s2",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-72]",
        "ldr s2, [x4, #56]",
        "fcvt d2, s2",
        "ldr s3, [x4, #68]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x7, #56]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-60]",
        "ldr s2, [x4, #64]",
        "fcvt d2, s2",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-68]",
        "ldr s2, [x4, #60]",
        "fcvt d2, s2",
        "ldr s3, [x4, #64]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xe358",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "ldr s3, [x7, #60]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-64]",
        "ldur s2, [x9, #-68]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-128]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-256]",
        "ldur s2, [x9, #-128]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-68]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-196]",
        "ldur s2, [x9, #-72]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-124]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-252]",
        "ldur s2, [x9, #-124]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-72]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-200]",
        "ldur s2, [x9, #-76]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-120]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-248]",
        "ldur s2, [x9, #-120]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-76]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-204]",
        "ldur s2, [x9, #-80]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-116]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-244]",
        "ldur s2, [x9, #-116]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-80]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-208]",
        "ldur s2, [x9, #-84]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-112]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-240]",
        "ldur s2, [x9, #-112]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-84]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-212]",
        "ldur s2, [x9, #-88]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-108]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-236]",
        "ldur s2, [x9, #-108]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-88]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-216]",
        "ldur s2, [x9, #-92]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-104]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-232]",
        "ldur s2, [x9, #-104]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-92]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-220]",
        "ldur s2, [x9, #-96]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-100]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-228]",
        "ldur s2, [x9, #-100]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-96]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-224]",
        "ldur s2, [x9, #-64]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-192]",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-64]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-132]",
        "ldur s2, [x9, #-60]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-188]",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-60]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-136]",
        "ldur s2, [x9, #-56]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-184]",
        "ldur s2, [x9, #-12]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-56]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-140]",
        "ldur s2, [x9, #-52]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-180]",
        "ldur s2, [x9, #-16]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-52]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-144]",
        "ldur s2, [x9, #-48]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-20]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-176]",
        "ldur s2, [x9, #-20]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-48]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-148]",
        "ldur s2, [x9, #-44]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-172]",
        "ldur s2, [x9, #-24]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-44]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-152]",
        "ldur s2, [x9, #-40]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-28]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-168]",
        "ldur s2, [x9, #-28]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-40]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-156]",
        "ldur s2, [x9, #-36]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-32]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-164]",
        "ldur s2, [x9, #-32]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-36]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "mov w20, #0xe35c",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "fcvt s2, d2",
        "stur s2, [x9, #-160]",
        "ldur s2, [x9, #-228]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-256]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-128]",
        "ldur s2, [x9, #-256]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-228]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-100]",
        "ldur s2, [x9, #-232]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-252]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-124]",
        "ldur s2, [x9, #-252]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-232]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-236]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-248]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-248]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-236]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-240]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-244]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-116]",
        "ldur s2, [x9, #-244]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-240]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-224]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-196]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-96]",
        "ldur s2, [x9, #-196]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-224]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-68]",
        "ldur s2, [x9, #-220]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-200]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-92]",
        "ldur s2, [x9, #-200]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-220]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-72]",
        "ldur s2, [x9, #-216]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-204]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-204]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-216]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-76]",
        "ldur s2, [x9, #-212]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-208]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-84]",
        "ldur s2, [x9, #-208]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-212]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-80]",
        "ldur s2, [x9, #-164]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-192]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-64]",
        "ldur s2, [x9, #-192]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-164]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-36]",
        "ldur s2, [x9, #-168]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-188]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-60]",
        "ldur s2, [x9, #-188]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-168]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-40]",
        "ldur s2, [x9, #-172]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-184]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-56]",
        "ldur s2, [x9, #-184]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-172]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-44]",
        "ldur s2, [x9, #-176]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-180]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-52]",
        "ldur s2, [x9, #-180]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-176]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-48]",
        "ldur s2, [x9, #-160]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-132]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-32]",
        "ldur s2, [x9, #-132]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-160]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldur s2, [x9, #-156]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-136]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-28]",
        "ldur s2, [x9, #-136]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-156]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldur s2, [x9, #-152]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-140]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-24]",
        "ldur s2, [x9, #-140]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-152]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-12]",
        "ldur s2, [x9, #-148]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-144]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-20]",
        "ldur s2, [x9, #-144]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-148]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "mov w20, #0xe360",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "fcvt s2, d2",
        "stur s2, [x9, #-16]",
        "ldr s2, [x4]",
        "str s2, [x9, #16]",
        "ldr s2, [x4, #4]",
        "mov x20, #0xfffffffffffffefc",
        "str s2, [x9, x20, sxtx]",
        "ldur s2, [x9, #-116]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-128]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-256]",
        "ldur s2, [x9, #-128]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-116]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x9, #16]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "stur s2, [x9, #-244]",
        "ldur s2, [x9, #-120]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-124]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-252]",
        "ldur s2, [x9, #-124]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-120]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "ldr s4, [x9, x20, sxtx]",
        "fcvt d4, s4",
        "fmul d2, d2, d4",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "stur s2, [x9, #-248]",
        "ldur s2, [x9, #-100]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-112]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-240]",
        "ldur s2, [x9, #-100]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-112]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-228]",
        "ldur s2, [x9, #-104]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-108]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-236]",
        "ldur s2, [x9, #-104]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-108]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-232]",
        "ldur s2, [x9, #-84]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-96]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-224]",
        "ldur s2, [x9, #-96]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-84]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-212]",
        "ldur s2, [x9, #-88]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-92]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-220]",
        "ldur s2, [x9, #-92]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-88]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-216]",
        "ldur s2, [x9, #-68]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-80]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-208]",
        "ldur s2, [x9, #-68]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-80]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-196]",
        "ldur s2, [x9, #-72]",
        "fcvt d2, s2",
        "mov w20, #0xe364",
        "movk w20, #0x100d, lsl #16",
        "ldr w4, [x20]",
        "ldur s5, [x9, #-76]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-204]",
        "ldur s2, [x9, #-72]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-76]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-200]",
        "ldur s2, [x9, #-64]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-52]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-192]",
        "ldur s2, [x9, #-64]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-52]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-180]",
        "ldur s2, [x9, #-60]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-56]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-188]",
        "ldur s2, [x9, #-60]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-56]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-184]",
        "ldur s2, [x9, #-48]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-36]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-176]",
        "ldur s2, [x9, #-36]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-48]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-164]",
        "ldur s2, [x9, #-44]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-40]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-172]",
        "ldur s2, [x9, #-40]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-44]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-168]",
        "ldur s2, [x9, #-32]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-20]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-160]",
        "ldur s2, [x9, #-32]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-20]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-148]",
        "ldur s2, [x9, #-28]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-24]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-156]",
        "ldur s2, [x9, #-28]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-24]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-152]",
        "ldur s2, [x9, #-16]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-4]",
        "fcvt d5, s5",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "stur s2, [x9, #-144]",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldur s5, [x9, #-16]",
        "fcvt d5, s5",
        "fsub d2, d2, d5",
        "fmul d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "stur s2, [x9, #-132]",
        "ldur s2, [x9, #-12]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-140]",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fmul d2, d4, d2",
        "fcvt s2, d2",
        "stur s2, [x9, #-136]",
        "ldr s2, [x4]",
        "str s2, [x9, #16]",
        "ldur s2, [x9, #-252]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-256]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-128]",
        "ldur s2, [x9, #-256]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-252]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x9, #16]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "stur s2, [x9, #-124]",
        "ldur s2, [x9, #-244]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-248]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-244]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-248]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-116]",
        "ldur s2, [x9, #-116]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-120]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-120]",
        "ldur s2, [x9, #-236]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-240]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-240]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-236]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-228]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-232]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-228]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-232]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-100]",
        "ldur s2, [x9, #-100]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-104]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-104]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-112]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-112]",
        "ldur s2, [x9, #-104]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-108]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-104]",
        "ldur s2, [x9, #-100]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-108]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-108]",
        "ldur s2, [x9, #-224]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-220]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-96]",
        "ldur s2, [x9, #-224]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-220]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-92]",
        "ldur s2, [x9, #-216]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-212]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-212]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-216]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-84]",
        "ldur s2, [x9, #-84]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-88]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-88]",
        "ldur s2, [x9, #-208]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-204]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-80]",
        "ldur s2, [x9, #-208]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-204]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-76]",
        "ldur s2, [x9, #-200]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-196]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "stur s2, [x9, #-72]",
        "ldur s2, [x9, #-196]",
        "fcvt d2, s2",
        "ldur s4, [x9, #-200]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-68]",
        "ldur s2, [x9, #-68]",
        "fcvt d4, s2",
        "ldur s5, [x9, #-72]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "stur s5, [x9, #-72]",
        "ldur s5, [x9, #-72]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-80]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-80]",
        "ldur s5, [x9, #-72]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-76]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-72]",
        "ldur s5, [x9, #-76]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "stur s5, [x9, #-76]",
        "ldur s5, [x9, #-188]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-192]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-64]",
        "ldur s5, [x9, #-192]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-188]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-60]",
        "ldur s5, [x9, #-180]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-184]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-56]",
        "ldur s5, [x9, #-180]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-184]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-52]",
        "ldur s5, [x9, #-56]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-52]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-56]",
        "ldur s5, [x9, #-172]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-176]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-48]",
        "ldur s5, [x9, #-176]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-172]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-44]",
        "ldur s5, [x9, #-164]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-168]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-164]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-168]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-36]",
        "ldur s5, [x9, #-40]",
        "fcvt d5, s5",
        "ldur s6, [x9, #-36]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-48]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-40]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-48]",
        "ldur s5, [x9, #-44]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-40]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-40]",
        "ldur s5, [x9, #-44]",
        "fcvt d5, s5",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x9, #-44]",
        "ldur s5, [x9, #-160]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-156]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-32]",
        "ldur s5, [x9, #-160]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-156]",
        "fcvt d7, s7",
        "fsub d5, d5, d7",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-28]",
        "ldur s5, [x9, #-152]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-148]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-24]",
        "ldur s5, [x9, #-148]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-152]",
        "fcvt d7, s7",
        "fsub d5, d5, d7",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-20]",
        "ldur s5, [x9, #-24]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-20]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-24]",
        "ldur s5, [x9, #-144]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-140]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-16]",
        "ldr w4, [x9, #8]",
        "ldur s5, [x9, #-144]",
        "fcvt d5, s5",
        "ldr w7, [x9, #12]",
        "ldur s7, [x9, #-140]",
        "fcvt d7, s7",
        "fsub d5, d5, d7",
        "fmul d5, d5, d3",
        "fcvt s5, d5",
        "stur s5, [x9, #-12]",
        "ldur s5, [x9, #-136]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-132]",
        "fcvt d7, s7",
        "fadd d5, d5, d7",
        "fcvt s5, d5",
        "stur s5, [x9, #-8]",
        "ldur s5, [x9, #-132]",
        "fcvt d5, s5",
        "ldur s7, [x9, #-136]",
        "fcvt d7, s7",
        "fsub d5, d5, d7",
        "fmul d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "stur s3, [x9, #-4]",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldur s5, [x9, #-4]",
        "fcvt d7, s5",
        "fadd d3, d3, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "stur s3, [x9, #-8]",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-8]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-16]",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-8]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-8]",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d3, d3, d7",
        "fcvt s3, d3",
        "stur s3, [x9, #-12]",
        "ldur s3, [x9, #-128]",
        "str s3, [x4, #1024]",
        "ldur s3, [x9, #-112]",
        "str s3, [x4, #768]",
        "ldur s3, [x9, #-120]",
        "str s3, [x4, #512]",
        "ldur s3, [x9, #-104]",
        "str s3, [x4, #256]",
        "ldur s3, [x9, #-124]",
        "str s3, [x4]",
        "ldur s3, [x9, #-124]",
        "str s3, [x7]",
        "ldur s3, [x9, #-108]",
        "str s3, [x7, #256]",
        "ldur s3, [x9, #-116]",
        "str s3, [x7, #512]",
        "ldur s3, [x9, #-100]",
        "str s3, [x7, #768]",
        "ldur s3, [x9, #-80]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-96]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-96]",
        "ldur s3, [x9, #-96]",
        "str s3, [x4, #896]",
        "ldur s3, [x9, #-80]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-88]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-80]",
        "ldur s3, [x9, #-80]",
        "str s3, [x4, #640]",
        "ldur s3, [x9, #-72]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-88]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-88]",
        "ldur s3, [x9, #-88]",
        "str s3, [x4, #384]",
        "ldur s3, [x9, #-72]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-92]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-72]",
        "ldur s3, [x9, #-72]",
        "str s3, [x4, #128]",
        "ldur s3, [x9, #-76]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-92]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-92]",
        "ldur s3, [x9, #-92]",
        "str s3, [x7, #128]",
        "ldur s3, [x9, #-76]",
        "fcvt d3, s3",
        "ldur s8, [x9, #-84]",
        "fcvt d8, s8",
        "fadd d3, d3, d8",
        "fcvt s3, d3",
        "stur s3, [x9, #-76]",
        "ldur s3, [x9, #-76]",
        "str s3, [x7, #384]",
        "ldur s3, [x9, #-84]",
        "fcvt d3, s3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x9, #-84]",
        "ldur s3, [x9, #-84]",
        "str s3, [x7, #640]",
        "strb wzr, [x28, #1049]",
        "str s2, [x7, #896]",
        "ldur s2, [x9, #-32]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-32]",
        "ldur s2, [x9, #-64]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-32]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #960]",
        "ldur s2, [x9, #-48]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-32]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #832]",
        "ldur s2, [x9, #-24]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-16]",
        "ldur s2, [x9, #-48]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #704]",
        "ldur s2, [x9, #-56]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-16]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #576]",
        "ldur s2, [x9, #-24]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-24]",
        "ldur s2, [x9, #-56]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #448]",
        "ldur s2, [x9, #-40]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #320]",
        "ldur s2, [x9, #-28]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldur s2, [x9, #-40]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #192]",
        "ldur s2, [x9, #-60]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #64]",
        "ldur s2, [x9, #-28]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-28]",
        "ldur s2, [x9, #-60]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-28]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #64]",
        "ldur s2, [x9, #-44]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-28]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #192]",
        "ldur s2, [x9, #-20]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-12]",
        "ldur s2, [x9, #-44]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #320]",
        "ldur s2, [x9, #-52]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #448]",
        "ldur s2, [x9, #-20]",
        "fcvt d2, s2",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x9, #-20]",
        "ldur s2, [x9, #-52]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-20]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #576]",
        "ldur s2, [x9, #-20]",
        "fcvt d2, s2",
        "fadd d2, d6, d2",
        "fcvt s2, d2",
        "str s2, [x7, #704]",
        "fadd d2, d6, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x7, #832]",
        "str s5, [x7, #960]",
        "mov x8, x9",
        "ldp w9, w20, [x8], #8",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xf8f8",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ],
      "x86InstructionCount": 809,
      "ExpectedInstructionCount": 1712
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87_f64-HalfLife.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 70,
      "ExpectedInstructionCount": 109,
      "x86Insts": [
        "sub esp,0x2c",
        "mov ecx,dword [esp + 0x34]",
        "mov edx,dword [esp + 0x30]",
        "mov eax,dword [esp + 0x38]",
        "fld dword [ecx]",
        "fld dword [edx]",
        "fld st1",
        "fsub st0,st1",
        "fld dword [ecx + 0x4]",
        "fld dword [edx + 0x4]",
        "fld st1",
        "fsub st0,st1",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x8]",
        "fld dword [edx + 0x8]",
        "fld st1",
        "fsub st0,st1",
        "fstp dword [esp + 0x14]",
        "fld dword [eax]",
        "fsubr st7,st0",
        "fxch st7",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + 0x4]",
        "fsubr st4,st0",
        "fxch st4",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + 0x8]",
        "fsubr st2,st0",
        "fxch st6",
        "fsubrp st7,st0",
        "fxch st2",
        "fsubrp st3,st0",
        "fxch st4",
        "fsubp",
        "fxch st2",
        "fmul st0",
        "fldz",
        "faddp",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp",
        "faddp",
        "fld dword [esp + 0x14]",
        "fld st0",
        "fmulp",
        "faddp",
        "fld dword [esp + 0x18]",
        "fld st0",
        "fmulp",
        "fldz",
        "faddp",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fmulp",
        "faddp",
        "fxch st4",
        "fmul st0",
        "faddp st4,st0",
        "fxch st4",
        "fmul st0",
        "fldz",
        "faddp",
        "fxch",
        "fmul st0",
        "faddp",
        "fxch",
        "fmul st0",
        "faddp",
        "fxch st2",
        "fucomi st0,st1"
      ],
      "ExpectedArm64ASM": [
        "sub w20, w8, #0x2c (44)",
        "mov x27, x8",
        "mov x8, x20",
        "ldr w7, [x8, #52]",
        "ldr w5, [x8, #48]",
        "ldr w4, [x8, #56]",
        "ldr s2, [x7]",
        "fcvt d2, s2",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fsub d4, d2, d3",
        "ldr s5, [x7, #4]",
        "fcvt d5, s5",
        "ldr s6, [x5, #4]",
        "fcvt d6, s6",
        "fsub d7, d5, d6",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "ldr s7, [x7, #8]",
        "fcvt d7, s7",
        "ldr s8, [x5, #8]",
        "fcvt d8, s8",
        "fsub d9, d7, d8",
        "fcvt s9, d9",
        "str s9, [x8, #20]",
        "ldr s9, [x4]",
        "fcvt d9, s9",
        "fsub d2, d9, d2",
        "mov w20, #0x0",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x4, #4]",
        "fcvt d2, s2",
        "fsub d5, d2, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x8, #28]",
        "ldr s5, [x4, #8]",
        "fcvt d5, s5",
        "fsub d7, d5, d7",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d9",
        "strb wzr, [x28, #1049]",
        "fsub d2, d6, d2",
        "strb wzr, [x28, #1049]",
        "fsub d5, d8, d5",
        "strb wzr, [x28, #1049]",
        "fmul d4, d4, d4",
        "fmov d6, x20",
        "fadd d4, d4, d6",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fmul d6, d6, d6",
        "fadd d4, d4, d6",
        "ldr s6, [x8, #20]",
        "fcvt d6, s6",
        "fmul d6, d6, d6",
        "fadd d4, d4, d6",
        "ldr s6, [x8, #24]",
        "fcvt d6, s6",
        "fmul d6, d6, d6",
        "fmov d8, x20",
        "fadd d6, d6, d8",
        "ldr s8, [x8, #28]",
        "fcvt d8, s8",
        "fmul d8, d8, d8",
        "fadd d6, d6, d8",
        "strb wzr, [x28, #1049]",
        "fmul d7, d7, d7",
        "fadd d6, d6, d7",
        "strb wzr, [x28, #1049]",
        "fmul d3, d3, d3",
        "fmov d7, x20",
        "fadd d3, d3, d7",
        "strb wzr, [x28, #1049]",
        "fmul d2, d2, d2",
        "fadd d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fmul d3, d5, d5",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcmp d4, d6",
        "cset x26, vc",
        "axflag",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d4, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d6, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x707",
        "lsr w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0xf8f8",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 37,
      "ExpectedInstructionCount": 70,
      "x86Insts": [
        "sub esp,0x1c",
        "mov edx,dword [esp + 0x20]",
        "mov eax,dword [esp + 0x24]",
        "fld dword [edx]",
        "fabs",
        "fld dword [eax]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld dword [edx + 0x4]",
        "fabs",
        "fld dword [eax + 0x4]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld dword [edx + 0x8]",
        "fabs",
        "fld dword [eax + 0x8]",
        "fabs",
        "fxch",
        "fucomi st0,st1",
        "fcmovbe st0,st1",
        "fstp st1",
        "fld st2",
        "fmulp st3",
        "fxch",
        "fmul st0",
        "faddp st2,st0",
        "fmul st0",
        "faddp",
        "fld st0",
        "fsqrt",
        "fucomi st0,st0"
      ],
      "ExpectedArm64ASM": [
        "mvn w27, w8",
        "sub w8, w8, #0x1c (28)",
        "ldr w5, [x8, #32]",
        "ldr w4, [x8, #36]",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "fabs d2, d2",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fabs d3, d3",
        "strb wzr, [x28, #1049]",
        "fcmp d2, d3",
        "axflag",
        "csetm x20, ls",
        "dup v4.2d, x20",
        "bsl v4.16b, v3.16b, v2.16b",
        "ldr s2, [x5, #4]",
        "fcvt d2, s2",
        "fabs d2, d2",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fabs d3, d3",
        "strb wzr, [x28, #1049]",
        "fcmp d2, d3",
        "axflag",
        "csetm x20, ls",
        "dup v5.2d, x20",
        "bsl v5.16b, v3.16b, v2.16b",
        "ldr s2, [x5, #8]",
        "fcvt d2, s2",
        "fabs d2, d2",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fabs d3, d3",
        "strb wzr, [x28, #1049]",
        "fcmp d2, d3",
        "axflag",
        "csetm x20, ls",
        "dup v6.2d, x20",
        "bsl v6.16b, v3.16b, v2.16b",
        "fmul d2, d4, d4",
        "strb wzr, [x28, #1049]",
        "fmul d3, d5, d5",
        "fadd d2, d2, d3",
        "fmul d3, d6, d6",
        "fadd d2, d2, d3",
        "fsqrt d3, d2",
        "fcmp d3, d3",
        "cset x26, vc",
        "axflag",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d3, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 32,
      "ExpectedInstructionCount": 60,
      "x86Insts": [
        "fld dword [ecx]",
        "fld dword [edx + 0x4]",
        "fld dword [ecx + 0x4]",
        "fld dword [edx]",
        "fld dword [ecx + 0x8]",
        "fstp dword [esp]",
        "fld dword [edx + 0x8]",
        "fld st4",
        "fmul st4",
        "fld st3",
        "fmul st3",
        "fsubp",
        "fmul dword [eax + 0x8]",
        "fxch st2",
        "fmul dword [esp]",
        "fxch st5",
        "fmul st1",
        "fsubp st5,st0",
        "fld dword [eax + 0x4]",
        "fmulp st5",
        "fxch st4",
        "faddp",
        "fxch st3",
        "fmulp",
        "fxch",
        "fmul dword [esp]",
        "mov byte [esp],0x1",
        "fsubp",
        "fmul dword [eax]",
        "faddp",
        "fdivrp",
        "fstp dword [esi]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x7]",
        "fcvt d2, s2",
        "ldr s3, [x5, #4]",
        "fcvt d3, s3",
        "ldr s4, [x7, #4]",
        "fcvt d4, s4",
        "ldr s5, [x5]",
        "fcvt d5, s5",
        "ldr s6, [x7, #8]",
        "str s6, [x8]",
        "ldr s6, [x5, #8]",
        "fcvt d6, s6",
        "fmul d7, d2, d3",
        "fmul d8, d4, d5",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #8]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "strb wzr, [x28, #1049]",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d5, d5, d8",
        "strb wzr, [x28, #1049]",
        "fmul d2, d2, d6",
        "fsub d2, d5, d2",
        "ldr s5, [x4, #4]",
        "fcvt d5, s5",
        "fmul d2, d2, d5",
        "strb wzr, [x28, #1049]",
        "fadd d2, d7, d2",
        "strb wzr, [x28, #1049]",
        "fmul d4, d4, d6",
        "strb wzr, [x28, #1049]",
        "ldr s5, [x8]",
        "fcvt d5, s5",
        "fmul d3, d3, d5",
        "mov w20, #0x1",
        "strb w20, [x8]",
        "fsub d3, d4, d3",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "fdiv d3, d2, d3",
        "fcvt s4, d3",
        "str s4, [x10]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "str d2, [x20, #1056]",
        "str d3, [x22, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block4": {
      "x86InstructionCount": 54,
      "ExpectedInstructionCount": 36,
      "x86Insts": [
        "push ebp",
        "push edi",
        "push esi",
        "push ebx",
        "sub esp,0x4c",
        "mov eax,dword [esp + 0x68]",
        "lea ebp,[esp + 0x38]",
        "lea esi,[esp + 0x30]",
        "fld qword [0x00052098]",
        "mov dword [esp + 0xc],esi",
        "mov edi,dword [esp + 0x64]",
        "mov dword [esp + 0x8],ebp",
        "mov ebx,dword [esp + 0x6c]",
        "mov dword [esp + 0x28],eax",
        "mov eax,dword [esp + 0x60]",
        "fmul dword [eax + 0x4]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "mov eax,dword [esp + 0x60]",
        "mov dword [esp + 0xc],esi",
        "mov dword [esp + 0x8],ebp",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x18]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x1c]",
        "fld qword [0x00052098]",
        "fmul dword [eax]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "mov eax,dword [esp + 0x60]",
        "mov dword [esp + 0xc],esi",
        "mov dword [esp + 0x8],ebp",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x20]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x24]",
        "fld qword [0x00052098]",
        "fmul dword [eax + 0x8]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp qword [esp]",
        "call 0x0006d100",
        "test edi,edi",
        "mov eax,dword [esp + 0x28]",
        "fld qword [esp + 0x38]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fld qword [esp + 0x30]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]"
      ],
      "ExpectedArm64ASM": [
        "stp w11, w9, [x8, #-8]!",
        "stp w6, w10, [x8, #-8]!",
        "subs w26, w8, #0x4c (76)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w4, [x8, #104]",
        "add w9, w8, #0x38 (56)",
        "add w10, w8, #0x30 (48)",
        "mov w20, #0x2098",
        "movk w20, #0x5, lsl #16",
        "ldr d2, [x20]",
        "str w10, [x8, #12]",
        "ldr w11, [x8, #100]",
        "str w9, [x8, #8]",
        "ldr w6, [x8, #108]",
        "str w4, [x8, #40]",
        "ldr w4, [x8, #96]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "str d2, [x8]",
        "mov w20, #0x44",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 49,
      "ExpectedInstructionCount": 88,
      "x86Insts": [
        "fld dword [esp + 0x80]",
        "fsub dword [esp + 0x7c]",
        "mov eax,dword [esp + 0x88]",
        "mov ecx,dword [esp + 0x8c]",
        "movss xmm1,dword [esp + 0x7c]",
        "mov dword [esp + 0x38],edx",
        "fst dword [esp + 0x34]",
        "fld dword [esp + 0x24]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [eax]",
        "fsub dword [ebp]",
        "movss xmm0,dword [esp + 0x2c]",
        "fmul st1",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x44]",
        "fld dword [eax + 0x4]",
        "fsub dword [ebp + 0x4]",
        "fmul st1",
        "fadd dword [ebp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fld dword [eax + 0x8]",
        "fsub dword [ebp + 0x8]",
        "fmulp",
        "fadd dword [ebp + 0x8]",
        "mov dword [esp + 0x1c],ecx",
        "movss dword [esp + 0x10],xmm0",
        "lea ecx,[esp + 0x44]",
        "movss dword [esp + 0xc],xmm1",
        "mov dword [esp + 0x18],ecx",
        "mov dword [esp + 0x14],ebp",
        "fstp dword [esp + 0x4c]",
        "fldz",
        "fld dword [esp + 0x28]",
        "fxch",
        "fucomip st0,st1",
        "fstp st0",
        "seta byte [esp + 0x30]",
        "movzx eax,byte [esp + 0x30]",
        "movsx eax,word [esi + eax*0x2 + 0x4]",
        "mov dword [esp + 0x8],eax",
        "mov dword [esp + 0x4],ebx",
        "mov dword [esp],edi",
        "call 0x0002b5b0",
        "mov edx,dword [esp + 0x38]",
        "test al,al"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #128]",
        "fcvt d2, s2",
        "ldr s3, [x8, #124]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr w4, [x8, #136]",
        "ldr w7, [x8, #140]",
        "ldr s17, [x8, #124]",
        "str w5, [x8, #56]",
        "fcvt s3, d2",
        "str s3, [x8, #52]",
        "ldr s3, [x8, #36]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "mov w20, #0x0",
        "strb wzr, [x28, #1049]",
        "ldr s4, [x8, #124]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr s4, [x9]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "ldr s16, [x8, #44]",
        "fmul d2, d2, d3",
        "ldr s4, [x9]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x4, #4]",
        "fcvt d2, s2",
        "ldr s4, [x9, #4]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d2, d3",
        "ldr s4, [x9, #4]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x4, #8]",
        "fcvt d2, s2",
        "ldr s4, [x9, #8]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fmul d2, d3, d2",
        "ldr s3, [x9, #8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "str w7, [x8, #28]",
        "str s16, [x8, #16]",
        "add w7, w8, #0x44 (68)",
        "str s17, [x8, #12]",
        "str w7, [x8, #24]",
        "str w9, [x8, #20]",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "fmov d2, x20",
        "ldr s3, [x8, #40]",
        "fcvt d3, s3",
        "strb wzr, [x28, #1049]",
        "fcmp d2, d3",
        "cset x26, vc",
        "axflag",
        "cset x20, hi",
        "strb w20, [x8, #48]",
        "ldrb w4, [x8, #48]",
        "add w20, w10, w4, lsl #1",
        "ldrh w20, [x20, #4]",
        "sxth w4, w20",
        "str w4, [x8, #8]",
        "str w6, [x8, #4]",
        "str w11, [x8]",
        "mov w20, #0xb2",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xe0e0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 39,
      "ExpectedInstructionCount": 92,
      "x86Insts": [
        "push ebp",
        "push edi",
        "push esi",
        "push ebx",
        "sub esp,0x4",
        "mov ecx,dword [esp + 0x20]",
        "mov ebx,dword [esp + 0x24]",
        "mov eax,dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x18]",
        "fld dword [ecx]",
        "fmul dword [ebx + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "mov edi,dword [esp + 0x2c]",
        "mov esi,dword [esp + 0x30]",
        "fld dword [ecx + 0x4]",
        "fmul dword [ebx]",
        "fsubp",
        "fld dword [ebx]",
        "fmul dword [ecx + 0x8]",
        "fld dword [ecx]",
        "fmul dword [ebx + 0x8]",
        "fsubp",
        "fld dword [ecx + 0x4]",
        "fmul dword [ebx + 0x8]",
        "fld dword [ebx + 0x4]",
        "fmul dword [ecx + 0x8]",
        "fsubp",
        "fld dword [eax + 0x8]",
        "fmul st3",
        "fld dword [eax + 0x4]",
        "fmul st3",
        "faddp",
        "fld dword [eax]",
        "fmul st2",
        "faddp",
        "fldz",
        "fxch",
        "fucomi st0,st1",
        "fstp st1"
      ],
      "ExpectedArm64ASM": [
        "stp w11, w9, [x8, #-8]!",
        "stp w6, w10, [x8, #-8]!",
        "sub w20, w8, #0x4 (4)",
        "mov x27, x8",
        "mov x8, x20",
        "ldr w7, [x8, #32]",
        "ldr w6, [x8, #36]",
        "ldr w4, [x8, #28]",
        "ldr w5, [x8, #24]",
        "ldr s2, [x7]",
        "fcvt d2, s2",
        "ldr s3, [x6, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w9, [x8, #40]",
        "ldr w11, [x8, #44]",
        "ldr w10, [x8, #48]",
        "ldr s3, [x7, #4]",
        "fcvt d3, s3",
        "ldr s4, [x6]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fsub d2, d2, d3",
        "ldr s3, [x6]",
        "fcvt d3, s3",
        "ldr s4, [x7, #8]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr s4, [x7]",
        "fcvt d4, s4",
        "ldr s5, [x6, #8]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fsub d3, d3, d4",
        "ldr s4, [x7, #4]",
        "fcvt d4, s4",
        "ldr s5, [x6, #8]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "ldr s5, [x6, #4]",
        "fcvt d5, s5",
        "ldr s6, [x7, #8]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fsub d4, d4, d5",
        "ldr s5, [x4, #8]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "ldr s6, [x4, #4]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "ldr s6, [x4]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fadd d5, d5, d6",
        "mov w20, #0x0",
        "fmov d6, x20",
        "strb wzr, [x28, #1049]",
        "fcmp d5, d6",
        "cset x26, vc",
        "axflag",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d5, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d4, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d3, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w22, w22, w20",
        "mov w23, #0xf0f",
        "lsr w22, w23, w22",
        "orr w21, w21, w22",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 61,
      "x86Insts": [
        "fld dword [ebx + 0x4]",
        "fld dword [ebx]",
        "fld dword [ebx + 0x8]",
        "fld dword [edx]",
        "fmul st3",
        "fld dword [edx + 0x4]",
        "fmul st3",
        "fsubp",
        "fmul dword [eax + 0x8]",
        "fxch st2",
        "fmul dword [edx + 0x8]",
        "fld dword [edx]",
        "fmul st2",
        "fsubp",
        "fmul dword [eax + 0x4]",
        "faddp st2,st0",
        "fmul dword [edx + 0x4]",
        "fxch st2",
        "fmul dword [edx + 0x8]",
        "fsubp st2,st0",
        "fxch",
        "fmul dword [eax]",
        "faddp",
        "fdiv st0,st1",
        "fstp dword [edi]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x6, #4]",
        "fcvt d2, s2",
        "ldr s3, [x6]",
        "fcvt d3, s3",
        "ldr s4, [x6, #8]",
        "fcvt d4, s4",
        "ldr s5, [x5]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "ldr s6, [x5, #4]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fsub d5, d5, d6",
        "ldr s6, [x4, #8]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x5, #8]",
        "fcvt d6, s6",
        "fmul d3, d3, d6",
        "ldr s6, [x5]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d3, d3, d6",
        "ldr s6, [x4, #4]",
        "fcvt d6, s6",
        "fmul d3, d3, d6",
        "fadd d3, d5, d3",
        "ldr s5, [x5, #4]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "strb wzr, [x28, #1049]",
        "ldr s5, [x5, #8]",
        "fcvt d5, s5",
        "fmul d2, d2, d5",
        "fsub d2, d4, d2",
        "strb wzr, [x28, #1049]",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d2, d2, d4",
        "fadd d2, d3, d2",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "fdiv d2, d2, d3",
        "fcvt s3, d2",
        "str s3, [x11]",
        "strb w21, [x28, #1051]",
        "str d2, [x20, #1056]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x8",
        "sub w21, w22, w21",
        "mov w22, #0xf8f8",
        "lsr w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 47,
      "x86Insts": [
        "fstp st0",
        "fstp st3",
        "fstp st0",
        "fstp st3",
        "fxch",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fstp qword [esp]",
        "call 0x0006d0d8",
        "fld dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [esp + 0x38]",
        "fld dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fxch st6",
        "fxch st5",
        "fxch",
        "fxch st4",
        "fxch st3",
        "fxch"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "add x22, x28, x22, lsl #4",
        "ldr d4, [x22, #1056]",
        "strb wzr, [x28, #1049]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x23, x28, x20, lsl #4",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d3",
        "str s5, [x8, #56]",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d4",
        "str s5, [x8, #44]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "ldr d5, [x12, #1056]",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d5",
        "str s6, [x8, #40]",
        "str d2, [x8]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "mov w13, #0x24",
        "movk w13, #0x1, lsl #16",
        "str w13, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str d3, [x21, #1056]",
        "str d4, [x22, #1056]",
        "str d5, [x23, #1056]",
        "str d2, [x12, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 25,
      "ExpectedInstructionCount": 47,
      "x86Insts": [
        "fstp st0",
        "fstp st3",
        "fstp st0",
        "fstp st3",
        "fxch",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fstp qword [esp]",
        "call 0x0006d0d8",
        "fld dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [esp + 0x38]",
        "fld dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fxch st6",
        "fxch st5",
        "fxch",
        "fxch st4",
        "fxch st3",
        "fxch"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "add x22, x28, x22, lsl #4",
        "ldr d4, [x22, #1056]",
        "strb wzr, [x28, #1049]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x23, x28, x20, lsl #4",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d3",
        "str s5, [x8, #56]",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d4",
        "str s5, [x8, #44]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "ldr d5, [x12, #1056]",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d5",
        "str s6, [x8, #40]",
        "str d2, [x8]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "mov w13, #0x24",
        "movk w13, #0x1, lsl #16",
        "str w13, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str d3, [x21, #1056]",
        "str d4, [x22, #1056]",
        "str d5, [x23, #1056]",
        "str d2, [x12, #1056]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 125,
      "ExpectedInstructionCount": 17,
      "x86Insts": [
        "push esi",
        "push ebx",
        "sub esp,0xa4",
        "mov ebx,dword [esp + 0xb0]",
        "lea esi,[esp + 0x18]",
        "mov eax,gs:[0x14]",
        "mov dword [esp + 0x9c],eax",
        "xor eax,eax",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x18]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x1c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x20]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x24]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x28]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x2c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x30]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x34]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x38]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x3c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x40]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x44]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x48]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x4c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x50]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x54]",
        "call 0x00018ad0",
        "mov dword [esp + 0x58],eax",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x7c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x80]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x84]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x88]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x8c]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x90]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x94]",
        "call 0x00019090",
        "mov eax,dword [ebx + 0x5869c]",
        "mov dword [esp],eax",
        "fstp dword [esp + 0x98]",
        "call 0x00019700",
        "mov edx,0x1f",
        "mov dword [esp + 0x8],edx",
        "mov dword [esp + 0x4],eax",
        "lea eax,[esp + 0x5c]",
        "mov dword [esp],eax",
        "call 0x0006d0dc",
        "mov eax,dword [ebx + 0x130]",
        "mov byte [esp + 0x7b],0x0",
        "mov edx,dword [eax]",
        "mov dword [esp],eax",
        "mov dword [esp + 0x4],esi",
        "call dword [edx + 0xc4]",
        "mov eax,dword [esp + 0x9c]",
        "xor eax,dword gs:[0x14]"
      ],
      "ExpectedArm64ASM": [
        "stp w6, w10, [x8, #-8]!",
        "sub w8, w8, #0xa4 (164)",
        "ldr w6, [x8, #176]",
        "add w10, w8, #0x18 (24)",
        "ldr w20, [x28, #992]",
        "ldr w4, [x20, #20]",
        "str w4, [x8, #156]",
        "subs w26, w4, w4",
        "mov w4, #0x0",
        "mov w20, #0x869c",
        "movk w20, #0x5, lsl #16",
        "add w20, w6, w20",
        "ldr w4, [x20]",
        "str w4, [x8]",
        "mov w20, #0x30",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87_f64-Oblivion.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 911,
      "ExpectedInstructionCount": 1695,
      "x86Insts": [
        "sub esp,0x118",
        "fld dword [ecx + 0x1084]",
        "fadd dword [ecx + 0x1008]",
        "fstp dword [esp]",
        "fld dword [ecx + 0x1080]",
        "fadd dword [ecx + 0x100c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x107c]",
        "fadd dword [ecx + 0x1010]",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x1078]",
        "fadd dword [ecx + 0x1014]",
        "fstp dword [esp + 0xc]",
        "fld dword [ecx + 0x1074]",
        "fadd dword [ecx + 0x1018]",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x1070]",
        "fadd dword [ecx + 0x101c]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx + 0x106c]",
        "fadd dword [ecx + 0x1020]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx + 0x1068]",
        "fadd dword [ecx + 0x1024]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ecx + 0x1064]",
        "fadd dword [ecx + 0x1028]",
        "fstp dword [esp + 0x20]",
        "fld dword [ecx + 0x1060]",
        "fadd dword [ecx + 0x102c]",
        "fstp dword [esp + 0x24]",
        "fld dword [ecx + 0x105c]",
        "fadd dword [ecx + 0x1030]",
        "fstp dword [esp + 0x28]",
        "fld dword [ecx + 0x1058]",
        "fadd dword [ecx + 0x1034]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ecx + 0x1054]",
        "fadd dword [ecx + 0x1038]",
        "fstp dword [esp + 0x30]",
        "fld dword [ecx + 0x1050]",
        "fadd dword [ecx + 0x103c]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + 0x104c]",
        "fadd dword [ecx + 0x1040]",
        "fstp dword [esp + 0x38]",
        "fld dword [ecx + 0x1048]",
        "fadd dword [ecx + 0x1044]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fld dword [esp + 0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x34]",
        "fld st0",
        "fld dword [esp + 0x8]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x24]",
        "fadd dword [esp + 0x18]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fstp dword [esp + 0x60]",
        "fxch st4",
        "fsubrp st5,st0",
        "fld dword [0x00b3c1d0]",
        "fmulp st5",
        "fxch st4",
        "fstp dword [esp + 0x64]",
        "fsubrp",
        "fmul dword [0x00b3c1d4]",
        "fstp dword [esp + 0x68]",
        "fsubrp",
        "fmul dword [0x00b3c1d8]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x30]",
        "fmul dword [0x00b3c1dc]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x2c]",
        "fmul dword [0x00b3c1e0]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x28]",
        "fmul dword [0x00b3c1e4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x24]",
        "fmul dword [0x00b3c1e8]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fmul dword [0x00b3c1ec]",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fld dword [esp + 0x44]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp]",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "fld dword [esp + 0x48]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x58]",
        "fld st0",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0xc]",
        "fxch st4",
        "fsubrp st5,st0",
        "fld dword [0x00b3c1f0]",
        "fld st0",
        "fmulp st6",
        "fxch st5",
        "fstp dword [esp + 0x10]",
        "fxch",
        "fsubrp st2,st0",
        "fld dword [0x00b3c1f4]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x14]",
        "fsubp st2,st0",
        "fld dword [0x00b3c1f8]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x54]",
        "fld dword [0x00b3c1fc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x80]",
        "fld st0",
        "fld dword [esp + 0x64]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x68]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x2c]",
        "fsubrp",
        "fmul st4",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x7c]",
        "fmul st2",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x78]",
        "fmul st3",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x70]",
        "fsub dword [esp + 0x74]",
        "fmulp",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fxch",
        "fsubrp st2,st0",
        "fld dword [0x00b3c200]",
        "fld st0",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x4c]",
        "fsubr dword [esp + 0x4]",
        "fld dword [0x00b3c204]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fsubr dword [esp + 0x10]",
        "fmul st2",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "fadd dword [esp + 0x20]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x68]",
        "fsubr dword [esp + 0x20]",
        "fmul st2",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x28]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0x78]",
        "fsubr dword [esp + 0x30]",
        "fmul st2",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x34]",
        "fsub dword [esp + 0x38]",
        "fmul st1",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x48]",
        "fld st0",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp]",
        "fsubr dword [esp + 0x44]",
        "fld dword [0x00b3c208]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fmul st1",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x68]",
        "fmul st1",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x70]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x74]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x74]",
        "fsub dword [esp + 0x78]",
        "fmul st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x80]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x7c]",
        "fsub dword [esp + 0x80]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x1c]",
        "fst dword [esp + 0xc0]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0xa0]",
        "fld dword [esp + 0xa0]",
        "fchs",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fsub dword [esp + 0x18]",
        "fstp dword [esp + 0xdc]",
        "fld dword [esp + 0x18]",
        "fchs",
        "fsub dword [esp + 0x1c]",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0xfc]",
        "fld dword [esp + 0x3c]",
        "fst dword [esp + 0xc8]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [esp + 0xb8]",
        "fld dword [esp + 0xb8]",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0xa8]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp + 0x34]",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x98]",
        "fld dword [esp + 0x98]",
        "fchs",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x38]",
        "fstp dword [esp + 0xd4]",
        "fld dword [esp + 0x38]",
        "fchs",
        "fsub dword [esp + 0x3c]",
        "fst qword [esp + 0x110]",
        "fsub dword [esp + 0x28]",
        "fsub dword [esp + 0x2c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x34]",
        "fstp dword [esp + 0xe4]",
        "fld dword [esp + 0x30]",
        "fsubr qword [esp + 0x110]",
        "fsub dword [esp + 0x20]",
        "fstp dword [esp + 0x104]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x30]",
        "fstp dword [esp + 0xf4]",
        "fld dword [esp]",
        "fchs",
        "fstp dword [esp + 0x10c]",
        "fld dword [esp + 0x4]",
        "fstp dword [esp + 0x90]",
        "fld dword [esp + 0xc]",
        "fst dword [esp + 0xb0]",
        "fchs",
        "fsub dword [esp + 0x8]",
        "fstp dword [esp + 0xec]",
        "fld dword [ecx + 0x1008]",
        "fsub dword [ecx + 0x1084]",
        "fmul dword [0x00b3c190]",
        "fstp dword [esp]",
        "fld dword [ecx + 0x100c]",
        "fsub dword [ecx + 0x1080]",
        "fmul dword [0x00b3c194]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x1010]",
        "fsub dword [ecx + 0x107c]",
        "fmul dword [0x00b3c198]",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x1014]",
        "fsub dword [ecx + 0x1078]",
        "fmul dword [0x00b3c19c]",
        "fstp dword [esp + 0xc]",
        "fld dword [ecx + 0x1018]",
        "fsub dword [ecx + 0x1074]",
        "fmul dword [0x00b3c1a0]",
        "fstp dword [esp + 0x10]",
        "fld dword [ecx + 0x101c]",
        "fsub dword [ecx + 0x1070]",
        "fmul dword [0x00b3c1a4]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx + 0x1020]",
        "fsub dword [ecx + 0x106c]",
        "fmul dword [0x00b3c1a8]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx + 0x1024]",
        "fsub dword [ecx + 0x1068]",
        "fmul dword [0x00b3c1ac]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ecx + 0x1028]",
        "fsub dword [ecx + 0x1064]",
        "fmul dword [0x00b3c1b0]",
        "fstp dword [esp + 0x20]",
        "fld dword [ecx + 0x102c]",
        "fsub dword [ecx + 0x1060]",
        "fmul dword [0x00b3c1b4]",
        "fstp dword [esp + 0x24]",
        "fld dword [ecx + 0x1030]",
        "fsub dword [ecx + 0x105c]",
        "fmul dword [0x00b3c1b8]",
        "fstp dword [esp + 0x28]",
        "fld dword [ecx + 0x1034]",
        "fsub dword [ecx + 0x1058]",
        "fmul dword [0x00b3c1bc]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ecx + 0x1038]",
        "fsub dword [ecx + 0x1054]",
        "fmul dword [0x00b3c1c0]",
        "fstp dword [esp + 0x30]",
        "fld dword [ecx + 0x103c]",
        "fsub dword [ecx + 0x1050]",
        "fmul dword [0x00b3c1c4]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + 0x1040]",
        "fsub dword [ecx + 0x104c]",
        "fmul dword [0x00b3c1c8]",
        "fstp dword [esp + 0x38]",
        "fld dword [ecx + 0x1044]",
        "fsub dword [ecx + 0x1048]",
        "fmul dword [0x00b3c1cc]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x34]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x24]",
        "fadd dword [esp + 0x18]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp]",
        "fsub dword [esp + 0x3c]",
        "fmul dword [0x00b3c1d0]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp + 0x38]",
        "fmul dword [0x00b3c1d4]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0x34]",
        "fmul dword [0x00b3c1d8]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x30]",
        "fmul dword [0x00b3c1dc]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x2c]",
        "fmul dword [0x00b3c1e0]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x28]",
        "fmul dword [0x00b3c1e4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x24]",
        "fmul dword [0x00b3c1e8]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fmul dword [0x00b3c1ec]",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x60]",
        "fmul st6",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x48]",
        "fsub dword [esp + 0x5c]",
        "fmul st4",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x58]",
        "fmul st5",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x54]",
        "fmul dword [0x00b3c1fc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x80]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x68]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x78]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x80]",
        "fmulp st6",
        "fxch st5",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x7c]",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x6c]",
        "fsub dword [esp + 0x78]",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x70]",
        "fsub dword [esp + 0x74]",
        "fmul dword [0x00b3c1fc]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fld dword [esp]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x48]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fsubr dword [esp + 0x4]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "fld dword [esp + 0x10]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fld st0",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x58]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x5c]",
        "fsubr dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "fld dword [esp + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x28]",
        "fld st0",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x68]",
        "fxch",
        "fsubrp st2,st0",
        "fld st3",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x6c]",
        "fsubr dword [esp + 0x24]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp + 0x30]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fadd dword [esp + 0x34]",
        "fstp dword [esp + 0x78]",
        "fxch",
        "fsubrp st2,st0",
        "fxch",
        "fmulp st3",
        "fxch st2",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x34]",
        "fsubrp st2,st0",
        "fmulp",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x48]",
        "fld st0",
        "fld dword [esp + 0x44]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x50]",
        "fld st0",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x58]",
        "fld st0",
        "fld dword [esp + 0x54]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x18]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fld dword [esp + 0x64]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x20]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x28]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fld st0",
        "fld dword [esp + 0x74]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x30]",
        "fsubrp",
        "fmul st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x80]",
        "fld st0",
        "fld dword [esp + 0x7c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fsubrp",
        "fmulp",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x3c]",
        "fld st0",
        "fld dword [esp + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xc4]",
        "fld dword [esp + 0xc4]",
        "fld st0",
        "fld dword [esp + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xbc]",
        "fld dword [esp + 0xbc]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xa4]",
        "fld st4",
        "fadd st0,st2",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0xb4]",
        "fld dword [esp + 0xb4]",
        "fadd st0,st1",
        "fstp dword [esp + 0xac]",
        "fld st4",
        "fadd st0,st1",
        "fadd dword [esp + 0x24]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x94]",
        "fld dword [esp + 0x94]",
        "fchs",
        "fstp dword [esp + 0x110]",
        "fld dword [esp + 0x110]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xd0]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x14]",
        "fadd st0,st5",
        "fstp dword [esp + 0x9c]",
        "fld dword [esp + 0x9c]",
        "fchs",
        "fstp dword [esp + 0x88]",
        "fld dword [esp + 0x88]",
        "fsub dword [esp + 0x18]",
        "fsub st0,st1",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x28]",
        "fchs",
        "fsubrp st3,st0",
        "fld st0",
        "fsubp st3,st0",
        "fld st5",
        "fsubp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsubrp st2,st0",
        "fld dword [esp + 0x8]",
        "fsubr st0,st2",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0xe8]",
        "fld dword [esp + 0x14]",
        "fsubp st2,st0",
        "fld dword [esp + 0x18]",
        "fsubp st2,st0",
        "fld st4",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xe0]",
        "fld dword [esp + 0x30]",
        "fld st0",
        "fsubp st2,st0",
        "fld dword [esp + 0x8]",
        "fsubr st0,st2",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0xf0]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x10]",
        "faddp st5,st0",
        "mov eax,dword [ecx + 0x1004]",
        "mov edx,dword [ecx + 0x1000]",
        "fxch st4",
        "lea eax,[edx + eax*0x4]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsubp st2,st0",
        "fxch",
        "fstp dword [esp + 0xf8]",
        "fld dword [esp + 0x20]",
        "fchs",
        "fsubrp st4,st0",
        "fxch st3",
        "fsubrp",
        "fsub st0,st3",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fld st0",
        "fsub dword [esp]",
        "fstp dword [esp + 0x108]",
        "fsubrp st2,st0",
        "fxch",
        "fstp dword [esp + 0x100]",
        "fld dword [esp + 0x90]",
        "fstp dword [eax]",
        "fld dword [esp + 0x94]",
        "fstp dword [eax + 0x40]",
        "fld dword [esp + 0x98]",
        "fstp dword [eax + 0x80]",
        "fld dword [esp + 0x9c]",
        "fstp dword [eax + 0xc0]",
        "fld dword [esp + 0xa0]",
        "fstp dword [eax + 0x100]",
        "fld dword [esp + 0xa4]",
        "fst dword [eax + 0x140]",
        "fld dword [esp + 0xa8]",
        "fst dword [eax + 0x180]",
        "fld dword [esp + 0xac]",
        "fst dword [eax + 0x1c0]",
        "fld dword [esp + 0xb0]",
        "fst dword [eax + 0x200]",
        "fld dword [esp + 0xb4]",
        "fstp dword [eax + 0x240]",
        "fld dword [esp + 0xb8]",
        "fst dword [eax + 0x280]",
        "fld dword [esp + 0xbc]",
        "fstp dword [eax + 0x2c0]",
        "fld dword [esp + 0xc0]",
        "fstp dword [eax + 0x300]",
        "fxch st5",
        "fst dword [eax + 0x340]",
        "fld dword [esp + 0xc8]",
        "fstp dword [eax + 0x380]",
        "fxch st6",
        "fst dword [eax + 0x3c0]",
        "fldz",
        "fstp dword [eax + 0x400]",
        "fchs",
        "fstp dword [eax + 0x440]",
        "fld dword [esp + 0xc8]",
        "fchs",
        "fstp dword [eax + 0x480]",
        "fxch st5",
        "fchs",
        "fstp dword [eax + 0x4c0]",
        "fld dword [esp + 0xc0]",
        "fchs",
        "fstp dword [eax + 0x500]",
        "fld dword [esp + 0xbc]",
        "fchs",
        "fstp dword [eax + 0x540]",
        "fxch st3",
        "fchs",
        "fstp dword [eax + 0x580]",
        "fld dword [esp + 0xb4]",
        "fchs",
        "fstp dword [eax + 0x5c0]",
        "fxch st3",
        "fchs",
        "fstp dword [eax + 0x600]",
        "fxch",
        "fchs",
        "fstp dword [eax + 0x640]",
        "fxch",
        "fchs",
        "fstp dword [eax + 0x680]",
        "fchs",
        "fstp dword [eax + 0x6c0]",
        "fld dword [esp + 0x8c]",
        "fstp dword [eax + 0x700]",
        "fld dword [esp + 0x88]",
        "fstp dword [eax + 0x740]",
        "fld dword [esp + 0x84]",
        "fstp dword [eax + 0x780]",
        "fld dword [esp + 0x110]",
        "fstp dword [eax + 0x7c0]",
        "cmp dword [ecx + 0x1000],ecx",
        "lea eax,[ecx + 0x800]"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x118 (280)",
        "ldr s2, [x7, #4228]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4104]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8]",
        "ldr s2, [x7, #4224]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4108]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #4220]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4112]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr s2, [x7, #4216]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4116]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x7, #4212]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4120]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x7, #4208]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4124]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x7, #4204]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4128]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x7, #4200]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4132]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x7, #4196]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4136]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #32]",
        "ldr s2, [x7, #4192]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4140]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #36]",
        "ldr s2, [x7, #4188]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4144]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldr s2, [x7, #4184]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4148]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x7, #4180]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4152]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x7, #4176]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4156]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x7, #4172]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4160]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x7, #4168]",
        "fcvt d2, s2",
        "ldr s3, [x7, #4164]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #68]",
        "ldr s4, [x8, #56]",
        "fcvt d4, s4",
        "ldr s5, [x8, #4]",
        "fcvt d5, s5",
        "fadd d6, d4, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d6",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #52]",
        "fcvt d6, s6",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "fadd d8, d6, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #76]",
        "ldr s8, [x8, #48]",
        "fcvt d8, s8",
        "ldr s9, [x8, #12]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #80]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #16]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #40]",
        "fcvt d8, s8",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #88]",
        "ldr s8, [x8, #36]",
        "fcvt d8, s8",
        "ldr s9, [x8, #24]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #32]",
        "fcvt d8, s8",
        "ldr s9, [x8, #28]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "mov w20, #0xc1d0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #100]",
        "fsub d2, d5, d4",
        "mov w20, #0xc1d4",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #104]",
        "fsub d2, d7, d6",
        "mov w20, #0xc1d8",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s3, [x8, #48]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xc1dc",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "ldr s3, [x8, #44]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xc1e0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #116]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #40]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xc1e4",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #120]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "ldr s3, [x8, #36]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xc1e8",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #28]",
        "fcvt d2, s2",
        "ldr s3, [x8, #32]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w20, #0xc1ec",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8]",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #72]",
        "fcvt d5, s5",
        "fadd d6, d4, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d6",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #88]",
        "fcvt d6, s6",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fadd d8, d6, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #84]",
        "fcvt d8, s8",
        "ldr s9, [x8, #80]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "mov w20, #0xc1f0",
        "movk w20, #0xb3, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d5, d4",
        "mov w20, #0xc1f4",
        "movk w20, #0xb3, lsl #16",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d2, d2, d4",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "fsub d2, d7, d6",
        "mov w20, #0xc1f8",
        "movk w20, #0xb3, lsl #16",
        "ldr s5, [x20]",
        "fcvt d5, s5",
        "fmul d2, d2, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "ldr s6, [x8, #84]",
        "fcvt d6, s6",
        "fsub d2, d2, d6",
        "mov w20, #0xc1fc",
        "movk w20, #0xb3, lsl #16",
        "ldr s6, [x20]",
        "fcvt d6, s6",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #128]",
        "fcvt d2, s2",
        "ldr s7, [x8, #100]",
        "fcvt d7, s7",
        "fadd d8, d2, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #32]",
        "ldr s8, [x8, #124]",
        "fcvt d8, s8",
        "ldr s9, [x8, #104]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #36]",
        "ldr s8, [x8, #120]",
        "fcvt d8, s8",
        "ldr s9, [x8, #108]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #40]",
        "ldr s8, [x8, #116]",
        "fcvt d8, s8",
        "ldr s9, [x8, #112]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "fsub d2, d7, d2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #104]",
        "fcvt d2, s2",
        "ldr s7, [x8, #124]",
        "fcvt d7, s7",
        "fsub d2, d2, d7",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #108]",
        "fcvt d2, s2",
        "ldr s7, [x8, #120]",
        "fcvt d7, s7",
        "fsub d2, d2, d7",
        "fmul d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #112]",
        "fcvt d2, s2",
        "ldr s7, [x8, #116]",
        "fcvt d7, s7",
        "fsub d2, d2, d7",
        "fmul d2, d6, d2",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fadd d7, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s7, d7",
        "str s7, [x8, #68]",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "fadd d8, d7, d8",
        "fcvt s8, d8",
        "str s8, [x8, #72]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d6, d2",
        "mov w20, #0xc200",
        "movk w20, #0xb3, lsl #16",
        "ldr s6, [x20]",
        "fcvt d6, s6",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "fsub d2, d2, d7",
        "mov w20, #0xc204",
        "movk w20, #0xb3, lsl #16",
        "ldr s7, [x20]",
        "fcvt d7, s7",
        "fmul d2, d2, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #28]",
        "fcvt d2, s2",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d2, d8",
        "fcvt s8, d8",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #24]",
        "fcvt d8, s8",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #88]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fsub d2, d8, d2",
        "fmul d2, d2, d6",
        "fcvt s2, d2",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s8, [x8, #24]",
        "fcvt d8, s8",
        "fsub d2, d2, d8",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "ldr s8, [x8, #32]",
        "fcvt d8, s8",
        "fadd d8, d2, d8",
        "fcvt s8, d8",
        "str s8, [x8, #100]",
        "ldr s8, [x8, #40]",
        "fcvt d8, s8",
        "ldr s9, [x8, #36]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #104]",
        "ldr s8, [x8, #32]",
        "fcvt d8, s8",
        "fsub d2, d8, d2",
        "fmul d2, d2, d6",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #36]",
        "fcvt d2, s2",
        "ldr s8, [x8, #40]",
        "fcvt d8, s8",
        "fsub d2, d2, d8",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s8, [x8, #48]",
        "fcvt d8, s8",
        "fadd d8, d2, d8",
        "fcvt s8, d8",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #56]",
        "fcvt d8, s8",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #120]",
        "ldr s8, [x8, #48]",
        "fcvt d8, s8",
        "fsub d2, d8, d2",
        "fmul d2, d2, d6",
        "fcvt s2, d2",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "ldr s8, [x8, #56]",
        "fcvt d8, s8",
        "fsub d2, d2, d8",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "ldr s8, [x8, #68]",
        "fcvt d8, s8",
        "fadd d8, d2, d8",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x8, #68]",
        "fcvt d8, s8",
        "fsub d2, d8, d2",
        "mov w20, #0xc208",
        "movk w20, #0xb3, lsl #16",
        "ldr s8, [x20]",
        "fcvt d8, s8",
        "fmul d2, d2, d8",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "ldr s9, [x8, #76]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #76]",
        "fcvt d2, s2",
        "ldr s9, [x8, #80]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #88]",
        "fcvt d2, s2",
        "ldr s9, [x8, #84]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #84]",
        "fcvt d2, s2",
        "ldr s9, [x8, #88]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #92]",
        "fcvt d2, s2",
        "ldr s9, [x8, #96]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #104]",
        "fcvt d2, s2",
        "ldr s9, [x8, #100]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #100]",
        "fcvt d2, s2",
        "ldr s9, [x8, #104]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #112]",
        "fcvt d2, s2",
        "ldr s9, [x8, #108]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #108]",
        "fcvt d2, s2",
        "ldr s9, [x8, #112]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #120]",
        "fcvt d2, s2",
        "ldr s9, [x8, #116]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #116]",
        "fcvt d2, s2",
        "ldr s9, [x8, #120]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #128]",
        "fcvt d2, s2",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #124]",
        "fcvt d2, s2",
        "ldr s9, [x8, #128]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #28]",
        "fcvt d9, s2",
        "str s2, [x8, #192]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "fadd d2, d9, d2",
        "fcvt s2, d2",
        "str s2, [x8, #160]",
        "ldr s2, [x8, #160]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x8, #140]",
        "ldr s2, [x8, #140]",
        "fcvt d2, s2",
        "ldr s9, [x8, #24]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #220]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "ldr s9, [x8, #28]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "ldr s9, [x8, #16]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #252]",
        "ldr s2, [x8, #60]",
        "fcvt d9, s2",
        "str s2, [x8, #200]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "fadd d2, d9, d2",
        "fcvt s2, d2",
        "str s2, [x8, #184]",
        "ldr s2, [x8, #184]",
        "fcvt d2, s2",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #168]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "ldr s9, [x8, #36]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #152]",
        "ldr s2, [x8, #152]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x8, #132]",
        "ldr s2, [x8, #132]",
        "fcvt d2, s2",
        "ldr s9, [x8, #56]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #212]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "ldr s9, [x8, #60]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "str d2, [x8, #272]",
        "ldr s9, [x8, #40]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "ldr s9, [x8, #44]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #228]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "ldr d9, [x8, #272]",
        "fsub d2, d9, d2",
        "ldr s9, [x8, #32]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #260]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "ldr s9, [x8, #48]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #244]",
        "ldr s2, [x8]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x8, #268]",
        "ldr s2, [x8, #4]",
        "str s2, [x8, #144]",
        "ldr s2, [x8, #12]",
        "fcvt d9, s2",
        "str s2, [x8, #176]",
        "fneg v2.2d, v9.2d",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #236]",
        "ldr s2, [x7, #4104]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4228]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc190",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8]",
        "ldr s2, [x7, #4108]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4224]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc194",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #4112]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4220]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc198",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr s2, [x7, #4116]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4216]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc19c",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x7, #4120]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4212]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1a0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x7, #4124]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4208]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1a4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x7, #4128]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4204]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1a8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x7, #4132]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4200]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1ac",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x7, #4136]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4196]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1b0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #32]",
        "ldr s2, [x7, #4140]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4192]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1b4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #36]",
        "ldr s2, [x7, #4144]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4188]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1b8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldr s2, [x7, #4148]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4184]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1bc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x7, #4152]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4180]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1c0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x7, #4156]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4176]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1c4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x7, #4160]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4172]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1c8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x7, #4164]",
        "fcvt d2, s2",
        "ldr s9, [x7, #4168]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1cc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "ldr s9, [x8, #4]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "ldr s9, [x8, #12]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "ldr s9, [x8, #16]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #84]",
        "ldr s2, [x8, #40]",
        "fcvt d2, s2",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #88]",
        "ldr s2, [x8, #36]",
        "fcvt d2, s2",
        "ldr s9, [x8, #24]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #32]",
        "fcvt d2, s2",
        "ldr s9, [x8, #28]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #96]",
        "ldr s2, [x8]",
        "fcvt d2, s2",
        "ldr s9, [x8, #60]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1d0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #100]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "ldr s9, [x8, #56]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1d4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1d8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s9, [x8, #48]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1dc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "ldr s9, [x8, #44]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1e0",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #116]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s9, [x8, #40]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1e4",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #120]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "ldr s9, [x8, #36]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1e8",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #28]",
        "fcvt d2, s2",
        "ldr s9, [x8, #32]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1ec",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8]",
        "ldr s2, [x8, #92]",
        "fcvt d2, s2",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #88]",
        "fcvt d2, s2",
        "ldr s9, [x8, #76]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #84]",
        "fcvt d2, s2",
        "ldr s9, [x8, #80]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #68]",
        "fcvt d2, s2",
        "ldr s9, [x8, #96]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #76]",
        "fcvt d2, s2",
        "ldr s9, [x8, #88]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "ldr s9, [x8, #84]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "mov w20, #0xc1fc",
        "movk w20, #0xb3, lsl #16",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #128]",
        "fcvt d2, s2",
        "ldr s9, [x8, #100]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #124]",
        "fcvt d2, s2",
        "ldr s9, [x8, #104]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #120]",
        "fcvt d2, s2",
        "ldr s9, [x8, #108]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #116]",
        "fcvt d2, s2",
        "ldr s9, [x8, #112]",
        "fcvt d9, s9",
        "fadd d2, d2, d9",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #100]",
        "fcvt d2, s2",
        "ldr s9, [x8, #128]",
        "fcvt d9, s9",
        "fsub d2, d2, d9",
        "fmul d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #104]",
        "fcvt d2, s2",
        "ldr s3, [x8, #124]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fmul d2, d4, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #108]",
        "fcvt d2, s2",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fmul d2, d5, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #112]",
        "fcvt d2, s2",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #68]",
        "ldr s4, [x8, #8]",
        "fcvt d4, s4",
        "ldr s5, [x8, #4]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "str s5, [x8, #72]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "fsub d2, d2, d4",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #28]",
        "fcvt d2, s2",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #84]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "str s5, [x8, #88]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "fsub d2, d2, d4",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "ldr s3, [x8, #32]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #100]",
        "ldr s4, [x8, #40]",
        "fcvt d4, s4",
        "ldr s5, [x8, #36]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "str s5, [x8, #104]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #36]",
        "fcvt d2, s2",
        "fsub d2, d2, d4",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #112]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s3, [x8, #48]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #116]",
        "ldr s4, [x8, #56]",
        "fcvt d4, s4",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fadd d5, d4, d5",
        "fcvt s5, d5",
        "str s5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fmul d2, d6, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #124]",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "fsub d2, d2, d4",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #128]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #8]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #88]",
        "fcvt d2, s2",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #16]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "ldr s3, [x8, #92]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #24]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #28]",
        "ldr s2, [x8, #104]",
        "fcvt d2, s2",
        "ldr s3, [x8, #100]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #32]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #36]",
        "ldr s2, [x8, #112]",
        "fcvt d2, s2",
        "ldr s3, [x8, #108]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #40]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #120]",
        "fcvt d2, s2",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #48]",
        "fsub d2, d3, d2",
        "fmul d2, d2, d8",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "ldr s2, [x8, #128]",
        "fcvt d2, s2",
        "ldr s3, [x8, #124]",
        "fcvt d3, s3",
        "fadd d4, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #56]",
        "fsub d2, d3, d2",
        "fmul d2, d8, d2",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #60]",
        "fcvt d3, s2",
        "ldr s4, [x8, #28]",
        "fcvt d4, s4",
        "fadd d5, d3, d4",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x8, #196]",
        "ldr s5, [x8, #196]",
        "fcvt d6, s5",
        "ldr s7, [x8, #44]",
        "fcvt d7, s7",
        "fadd d8, d6, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #188]",
        "ldr s8, [x8, #188]",
        "fcvt d8, s8",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "ldr s9, [x8, #52]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #164]",
        "fadd d8, d3, d7",
        "ldr s10, [x8, #12]",
        "fcvt d10, s10",
        "fadd d8, d8, d10",
        "fcvt s8, d8",
        "str s8, [x8, #180]",
        "ldr s8, [x8, #180]",
        "fcvt d8, s8",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #172]",
        "fadd d8, d3, d9",
        "ldr s10, [x8, #36]",
        "fcvt d10, s10",
        "fadd d8, d8, d10",
        "fcvt s8, d8",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #64]",
        "fcvt d8, s8",
        "ldr s10, [x8, #4]",
        "fcvt d10, s10",
        "fadd d8, d8, d10",
        "fcvt s8, d8",
        "str s8, [x8, #148]",
        "ldr s8, [x8, #148]",
        "fcvt d8, s8",
        "fneg v8.2d, v8.2d",
        "fcvt s8, d8",
        "str s8, [x8, #272]",
        "ldr s8, [x8, #272]",
        "fcvt d8, s8",
        "ldr s10, [x8, #56]",
        "fcvt d10, s10",
        "fsub d8, d8, d10",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #208]",
        "ldr s8, [x8, #64]",
        "fcvt d8, s8",
        "ldr s11, [x8, #20]",
        "fcvt d11, s11",
        "fadd d8, d8, d11",
        "fadd d8, d8, d4",
        "fcvt s8, d8",
        "str s8, [x8, #156]",
        "ldr s8, [x8, #156]",
        "fcvt d8, s8",
        "fneg v8.2d, v8.2d",
        "fcvt s8, d8",
        "str s8, [x8, #136]",
        "ldr s8, [x8, #136]",
        "fcvt d8, s8",
        "ldr s11, [x8, #24]",
        "fcvt d11, s11",
        "fsub d8, d8, d11",
        "fsub d8, d8, d10",
        "fcvt s8, d8",
        "str s8, [x8, #216]",
        "ldr s8, [x8, #40]",
        "fcvt d8, s8",
        "fneg v8.2d, v8.2d",
        "fsub d7, d8, d7",
        "fsub d7, d7, d10",
        "fsub d7, d7, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s7, d7",
        "str s7, [x8, #64]",
        "ldr s7, [x8, #64]",
        "fcvt d7, s7",
        "fsub d8, d7, d9",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "fsub d9, d8, d9",
        "ldr s11, [x8, #12]",
        "fcvt d11, s11",
        "fsub d9, d9, d11",
        "fcvt s9, d9",
        "str s9, [x8, #232]",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "ldr s9, [x8, #24]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fsub d8, d8, d4",
        "mov w20, #0x0",
        "strb wzr, [x28, #1049]",
        "fcvt s8, d8",
        "str s8, [x8, #224]",
        "ldr s8, [x8, #48]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "fsub d9, d7, d9",
        "ldr s11, [x8, #12]",
        "fcvt d11, s11",
        "fsub d9, d9, d11",
        "fcvt s9, d9",
        "str s9, [x8, #240]",
        "ldr s9, [x8, #24]",
        "fcvt d9, s9",
        "ldr s11, [x8, #16]",
        "fcvt d11, s11",
        "fadd d9, d9, d11",
        "fadd d4, d4, d9",
        "ldr w4, [x7, #4100]",
        "ldr w5, [x7, #4096]",
        "strb wzr, [x28, #1049]",
        "add w4, w5, w4, lsl #2",
        "fcvt s4, d4",
        "str s4, [x8, #64]",
        "ldr s4, [x8, #64]",
        "fcvt d4, s4",
        "fsub d7, d7, d4",
        "strb wzr, [x28, #1049]",
        "fcvt s7, d7",
        "str s7, [x8, #248]",
        "ldr s7, [x8, #32]",
        "fcvt d7, s7",
        "fneg v7.2d, v7.2d",
        "fsub d7, d7, d8",
        "strb wzr, [x28, #1049]",
        "fsub d7, d7, d10",
        "fsub d7, d7, d3",
        "fcvt s7, d7",
        "str s7, [x8, #64]",
        "ldr s7, [x8, #64]",
        "fcvt d7, s7",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fsub d8, d7, d8",
        "fcvt s8, d8",
        "str s8, [x8, #264]",
        "fsub d4, d7, d4",
        "strb wzr, [x28, #1049]",
        "fcvt s4, d4",
        "str s4, [x8, #256]",
        "ldr s4, [x8, #144]",
        "str s4, [x4]",
        "ldr s4, [x8, #148]",
        "str s4, [x4, #64]",
        "ldr s4, [x8, #152]",
        "str s4, [x4, #128]",
        "ldr s4, [x8, #156]",
        "str s4, [x4, #192]",
        "ldr s4, [x8, #160]",
        "str s4, [x4, #256]",
        "ldr s4, [x8, #164]",
        "fcvt d7, s4",
        "str s4, [x4, #320]",
        "ldr s4, [x8, #168]",
        "fcvt d8, s4",
        "str s4, [x4, #384]",
        "ldr s4, [x8, #172]",
        "fcvt d9, s4",
        "str s4, [x4, #448]",
        "ldr s4, [x8, #176]",
        "fcvt d10, s4",
        "str s4, [x4, #512]",
        "ldr s4, [x8, #180]",
        "str s4, [x4, #576]",
        "ldr s4, [x8, #184]",
        "fcvt d11, s4",
        "str s4, [x4, #640]",
        "ldr s4, [x8, #188]",
        "str s4, [x4, #704]",
        "ldr s4, [x8, #192]",
        "str s4, [x4, #768]",
        "strb wzr, [x28, #1049]",
        "str s5, [x4, #832]",
        "ldr s4, [x8, #200]",
        "str s4, [x4, #896]",
        "strb wzr, [x28, #1049]",
        "str s2, [x4, #960]",
        "fmov d2, x20",
        "fcvt s2, d2",
        "str s2, [x4, #1024]",
        "fneg v2.2d, v3.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1088]",
        "ldr s2, [x8, #200]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1152]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v6.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1216]",
        "ldr s2, [x8, #192]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1280]",
        "ldr s2, [x8, #188]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1344]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v11.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1408]",
        "ldr s2, [x8, #180]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1472]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v10.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1536]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v9.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1600]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v8.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1664]",
        "fneg v2.2d, v7.2d",
        "fcvt s2, d2",
        "str s2, [x4, #1728]",
        "ldr s2, [x8, #140]",
        "str s2, [x4, #1792]",
        "ldr s2, [x8, #136]",
        "str s2, [x4, #1856]",
        "ldr s2, [x8, #132]",
        "str s2, [x4, #1920]",
        "ldr s2, [x8, #272]",
        "str s2, [x4, #1984]",
        "ldr w20, [x7, #4096]",
        "eor x27, x20, x7",
        "subs w26, w20, w7",
        "add w4, w7, #0x800 (2048)",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 630,
      "ExpectedInstructionCount": 938,
      "x86Insts": [
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax + 0x40]",
        "fld dword [eax + 0x44]",
        "fadd st0,st1",
        "fstp dword [eax + 0x44]",
        "fld dword [eax + 0x3c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x40]",
        "fld dword [eax + 0x38]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x3c]",
        "fld dword [eax + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x38]",
        "fld dword [eax + 0x30]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x34]",
        "fld dword [eax + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x30]",
        "fld dword [eax + 0x28]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x2c]",
        "fld dword [eax + 0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x28]",
        "fld dword [eax + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x24]",
        "fld dword [eax + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x20]",
        "fld dword [eax + 0x18]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x1c]",
        "fld dword [eax + 0x14]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x18]",
        "fld dword [eax + 0x10]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x14]",
        "fld dword [eax + 0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x10]",
        "fld dword [eax + 0x8]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0xc]",
        "fld dword [eax + 0x4]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x8]",
        "fld dword [eax]",
        "fst qword [esp + 0x20]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [eax + 0x4]",
        "fld dword [eax + 0x3c]",
        "fld dword [eax + 0x44]",
        "fadd st0,st1",
        "fstp dword [eax + 0x44]",
        "fld dword [eax + 0x34]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x3c]",
        "fld dword [eax + 0x2c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x34]",
        "fld dword [eax + 0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x2c]",
        "fld dword [eax + 0x1c]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x24]",
        "fld dword [eax + 0x14]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x1c]",
        "fld dword [eax + 0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [eax + 0x14]",
        "faddp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fstp dword [eax + 0xc]",
        "fadd st0,st0",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x30]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x28]",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x10]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x80]",
        "fld dword [eax + 0x20]",
        "fld dword [eax + 0x40]",
        "fld st3",
        "fld qword [0x00a77b70]",
        "fmul st1",
        "fxch",
        "faddp st4,st0",
        "fld st2",
        "fld qword [0x00a77b68]",
        "fmul st1",
        "fxch st5",
        "faddp",
        "fld st2",
        "fld qword [0x00a77b60]",
        "fmul st1",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0xc0]",
        "fld qword [esp + 0x28]",
        "fadd st0,st6",
        "fsub st0,st4",
        "fld qword [esp + 0x18]",
        "fsub st1,st0",
        "fsubp",
        "fsub st0,st3",
        "fstp dword [esp + 0xd0]",
        "fld st5",
        "fmul st1",
        "fsubr qword [esp + 0x80]",
        "fld st4",
        "fmul st3",
        "fsubp",
        "fld st3",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0xb8]",
        "fld st5",
        "fmul st5",
        "fsubr qword [esp + 0x80]",
        "fld st4",
        "fmul st2",
        "faddp",
        "fld st3",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0xc8]",
        "fld qword [esp + 0x20]",
        "fsubrp st6,st0",
        "fxch st5",
        "faddp st3,st0",
        "fxch st2",
        "fsub qword [esp + 0x18]",
        "faddp",
        "fstp dword [esp + 0x80]",
        "fld dword [eax + 0x18]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x8]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0x20]",
        "fld dword [eax + 0x28]",
        "fst qword [esp + 0x90]",
        "fld dword [eax + 0x38]",
        "fst qword [esp + 0x28]",
        "fld qword [0x00a77b50]",
        "fmul st4",
        "fxch st4",
        "faddp st3,st0",
        "fld qword [0x00a77b48]",
        "fmul st2",
        "fxch st3",
        "faddp st2,st0",
        "fld qword [0x00a77b40]",
        "fmul st1",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0xb4]",
        "fld qword [esp + 0x18]",
        "fld qword [esp + 0x90]",
        "fsub st1,st0",
        "fxch",
        "fsub qword [esp + 0x28]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0xc4]",
        "fld qword [esp + 0x18]",
        "fmul st3",
        "fsub qword [esp + 0x20]",
        "fxch",
        "fmul st2",
        "fsubp",
        "fld qword [esp + 0x28]",
        "fmul st4",
        "faddp",
        "fstp dword [esp + 0xa8]",
        "fld qword [esp + 0x18]",
        "fmul st1",
        "fsub qword [esp + 0x20]",
        "fld qword [esp + 0x90]",
        "fmul st4",
        "faddp",
        "fld qword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0x90]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x8]",
        "fadd st0,st0",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x34]",
        "fst qword [esp + 0x98]",
        "fld dword [esp + 0x14]",
        "fst qword [esp + 0xa0]",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0x14]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x14]",
        "fstp qword [esp + 0x88]",
        "fld dword [eax + 0x24]",
        "fstp qword [esp + 0x18]",
        "fld dword [eax + 0x44]",
        "fstp qword [esp + 0x28]",
        "fmul st4",
        "fadd qword [esp + 0x88]",
        "fld qword [esp + 0x18]",
        "fmul st6",
        "faddp",
        "fld qword [esp + 0x28]",
        "fmul st7",
        "faddp",
        "fstp dword [esp + 0xb0]",
        "fld qword [esp + 0xa0]",
        "fadd qword [esp + 0x20]",
        "fsub qword [esp + 0x18]",
        "fld qword [esp + 0x98]",
        "fsub st1,st0",
        "fsubp",
        "fsub qword [esp + 0x28]",
        "fstp dword [esp + 0x30]",
        "fld qword [esp + 0x20]",
        "fmul st6",
        "fsubr qword [esp + 0x88]",
        "fld qword [esp + 0x18]",
        "fmul st5",
        "fsubp",
        "fld qword [esp + 0x28]",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0xa0]",
        "fld qword [esp + 0x20]",
        "fld st0",
        "fmulp st6",
        "fld qword [esp + 0x88]",
        "fsubrp st6,st0",
        "fld qword [esp + 0x18]",
        "fmulp st7",
        "fxch st5",
        "faddp st6,st0",
        "fld qword [esp + 0x28]",
        "fld st0",
        "fmulp st5",
        "fxch st6",
        "fsubrp st4,st0",
        "fxch st3",
        "fstp dword [esp + 0x28]",
        "fld qword [esp + 0x8]",
        "fsubrp st4,st0",
        "fxch st3",
        "fadd qword [esp + 0x18]",
        "fsub qword [esp + 0x98]",
        "faddp st4,st0",
        "fxch st3",
        "fmul qword [0x00a77bd8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + 0x1c]",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [eax + 0x2c]",
        "fld dword [eax + 0x3c]",
        "fld dword [esp + 0x4]",
        "fmul st6",
        "fadd st0,st3",
        "fld st2",
        "fmul st6",
        "faddp",
        "fld st1",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + 0xc]",
        "fst qword [esp + 0x20]",
        "fsub st0,st2",
        "fsub st0,st1",
        "fmul qword [0x00a77b58]",
        "fstp dword [esp + 0x98]",
        "fld qword [esp + 0x20]",
        "fmul st5",
        "fsub st0,st3",
        "fld st2",
        "fmul st5",
        "fsubp",
        "fld st1",
        "fmul st7",
        "faddp",
        "fstp dword [esp + 0x88]",
        "fld qword [esp + 0x20]",
        "fmulp st4",
        "fxch st3",
        "fsubrp st2,st0",
        "fmulp st4",
        "faddp st3,st0",
        "fmulp",
        "fsubp",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0xb4]",
        "fld dword [esp + 0xc0]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0xb0]",
        "fld st0",
        "fadd st0,st2",
        "fmul qword [0x00a77b38]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x38]",
        "fsubrp",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0xc4]",
        "fld dword [esp + 0xd0]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x98]",
        "fst qword [esp + 0xd0]",
        "fld dword [esp + 0x30]",
        "fst qword [esp + 0x98]",
        "faddp",
        "fmul qword [0x00a77bd0]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x3c]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0xa8]",
        "fst qword [esp + 0xa8]",
        "fld dword [esp + 0xb8]",
        "fst qword [esp + 0xb8]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x88]",
        "fst qword [esp + 0x88]",
        "fld dword [esp + 0xa0]",
        "fst qword [esp + 0xa0]",
        "faddp",
        "fmul qword [0x00a77b30]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x40]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x90]",
        "fst qword [esp + 0x90]",
        "fld dword [esp + 0xc8]",
        "fst qword [esp + 0xc8]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x20]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fst qword [esp + 0x28]",
        "faddp",
        "fmul qword [0x00a77b28]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x44]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x18]",
        "fld dword [esp + 0x80]",
        "fst qword [esp + 0x80]",
        "fadd st0,st1",
        "fstp dword [esp + 0x48]",
        "fsubr qword [esp + 0x80]",
        "fstp dword [esp + 0x6c]",
        "fld qword [esp + 0xc8]",
        "fsub qword [esp + 0x90]",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0x28]",
        "fsub qword [esp + 0x20]",
        "fmul qword [0x00a77b20]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x4c]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x68]",
        "fld qword [esp + 0xb8]",
        "fsub qword [esp + 0xa8]",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0xa0]",
        "fsub qword [esp + 0x88]",
        "fmul qword [0x00a77b18]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fst qword [esp + 0x30]",
        "fadd st0,st1",
        "fstp dword [esp + 0x50]",
        "fsubr qword [esp + 0x30]",
        "fstp dword [esp + 0x64]",
        "fsubrp",
        "fstp dword [esp + 0x8]",
        "fld qword [esp + 0x98]",
        "fsub qword [esp + 0xd0]",
        "fmul qword [0x00a77be0]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x54]",
        "fsubrp",
        "fstp dword [esp + 0x60]",
        "fxch st2",
        "fsubrp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x8]",
        "fsubrp",
        "fmul qword [0x00a77b10]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x4]",
        "fld dword [esp + 0x8]",
        "lea eax,[ecx + ecx*0x8]",
        "fld st0",
        "mov ecx,dword [ebp + 0xc]",
        "fadd st0,st2",
        "shl eax,0x4",
        "add eax,0xb183d0",
        "fstp dword [esp + 0x58]",
        "fsubrp",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x5c]",
        "fld st0",
        "fchs",
        "fmul dword [eax]",
        "fstp dword [ecx]",
        "fld dword [eax + 0x4]",
        "fld dword [esp + 0x60]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x4]",
        "fld dword [eax + 0x8]",
        "fld dword [esp + 0x64]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x8]",
        "fld dword [eax + 0xc]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0xc]",
        "fld dword [eax + 0x10]",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x10]",
        "fld dword [eax + 0x14]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fchs",
        "fmulp st2",
        "fxch",
        "fstp dword [ecx + 0x14]",
        "fld dword [eax + 0x18]",
        "fld dword [esp + 0x74]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x18]",
        "fld dword [eax + 0x1c]",
        "fld dword [esp + 0x78]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x1c]",
        "fld dword [eax + 0x20]",
        "fld dword [esp + 0x7c]",
        "fchs",
        "fmulp",
        "fstp dword [ecx + 0x20]",
        "fld dword [eax + 0x24]",
        "fmul dword [esp + 0x7c]",
        "fstp dword [ecx + 0x24]",
        "fld dword [eax + 0x28]",
        "fmul dword [esp + 0x78]",
        "fstp dword [ecx + 0x28]",
        "fld dword [eax + 0x2c]",
        "fmul dword [esp + 0x74]",
        "fstp dword [ecx + 0x2c]",
        "fmul dword [eax + 0x30]",
        "fstp dword [ecx + 0x30]",
        "fmul dword [eax + 0x34]",
        "fstp dword [ecx + 0x34]",
        "fmul dword [eax + 0x38]",
        "fstp dword [ecx + 0x38]",
        "fmul dword [eax + 0x3c]",
        "fstp dword [ecx + 0x3c]",
        "fmul dword [eax + 0x40]",
        "fstp dword [ecx + 0x40]",
        "fmul dword [eax + 0x44]",
        "fstp dword [ecx + 0x44]",
        "fld dword [esp + 0x58]",
        "fld dword [eax + 0x48]",
        "fmul st1",
        "fstp dword [ecx + 0x48]",
        "fld dword [esp + 0x54]",
        "fld dword [eax + 0x4c]",
        "fmul st1",
        "fstp dword [ecx + 0x4c]",
        "fld dword [esp + 0x50]",
        "fld dword [eax + 0x50]",
        "fmul st1",
        "fstp dword [ecx + 0x50]",
        "fld dword [esp + 0x4c]",
        "fld dword [eax + 0x54]",
        "fmul st1",
        "fstp dword [ecx + 0x54]",
        "fld dword [esp + 0x48]",
        "fld dword [eax + 0x58]",
        "fmul st1",
        "fstp dword [ecx + 0x58]",
        "fld dword [esp + 0x44]",
        "fld dword [eax + 0x5c]",
        "fmul st1",
        "fstp dword [ecx + 0x5c]",
        "fld dword [esp + 0x40]",
        "fst qword [esp + 0x18]",
        "fmul dword [eax + 0x60]",
        "fstp dword [ecx + 0x60]",
        "fld dword [esp + 0x3c]",
        "fst qword [esp + 0x80]",
        "fmul dword [eax + 0x64]",
        "fstp dword [ecx + 0x64]",
        "fld dword [esp + 0x38]",
        "fld dword [eax + 0x68]",
        "fmul st1",
        "fstp dword [ecx + 0x68]",
        "fmul dword [eax + 0x6c]",
        "fstp dword [ecx + 0x6c]",
        "fld dword [eax + 0x70]",
        "fmul qword [esp + 0x80]",
        "fstp dword [ecx + 0x70]",
        "fld dword [eax + 0x74]",
        "fmul qword [esp + 0x18]",
        "fstp dword [ecx + 0x74]",
        "fmul dword [eax + 0x78]",
        "fstp dword [ecx + 0x78]",
        "fmul dword [eax + 0x7c]",
        "fstp dword [ecx + 0x7c]",
        "fmul dword [eax + 0x80]",
        "fstp dword [ecx + 0x80]",
        "fmul dword [eax + 0x84]",
        "fstp dword [ecx + 0x84]",
        "fmul dword [eax + 0x88]",
        "fstp dword [ecx + 0x88]",
        "fmul dword [eax + 0x8c]",
        "fstp dword [ecx + 0x8c]",
        "mov esp,ebp",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x9, #8]",
        "ldr s2, [x4, #64]",
        "fcvt d2, s2",
        "ldr s3, [x4, #68]",
        "fcvt d3, s3",
        "fadd d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x4, #68]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #64]",
        "ldr s2, [x4, #56]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #60]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #56]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #52]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #48]",
        "ldr s2, [x4, #40]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #44]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #40]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #36]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #32]",
        "ldr s2, [x4, #24]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #28]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #24]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #20]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #16]",
        "ldr s2, [x4, #8]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x4, #8]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "str d2, [x8, #32]",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "fcvt d4, s3",
        "str s3, [x4, #4]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr s5, [x4, #68]",
        "fcvt d5, s5",
        "fadd d5, d5, d3",
        "fcvt s5, d5",
        "str s5, [x4, #68]",
        "ldr s5, [x4, #52]",
        "fcvt d5, s5",
        "fadd d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #60]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "fadd d5, d5, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x4, #52]",
        "ldr s5, [x4, #36]",
        "fcvt d5, s5",
        "fadd d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #44]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "fadd d5, d5, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x4, #36]",
        "ldr s5, [x4, #20]",
        "fcvt d5, s5",
        "fadd d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x4, #28]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fadd d5, d5, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x4, #20]",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x8, #4]",
        "ldr s3, [x8, #4]",
        "str s3, [x4, #12]",
        "fadd d2, d2, d2",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "str d2, [x8, #24]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "str d3, [x8, #40]",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "str d3, [x8, #128]",
        "ldr s4, [x4, #32]",
        "fcvt d4, s4",
        "ldr s5, [x4, #64]",
        "fcvt d5, s5",
        "mov w20, #0x7b70",
        "movk w20, #0xa7, lsl #16",
        "ldr d6, [x20]",
        "fmul d6, d6, d2",
        "strb wzr, [x28, #1049]",
        "fadd d3, d3, d2",
        "mov w20, #0x7b68",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "fmul d7, d7, d4",
        "strb wzr, [x28, #1049]",
        "fadd d3, d4, d3",
        "mov w20, #0x7b60",
        "movk w20, #0xa7, lsl #16",
        "ldr d8, [x20]",
        "fmul d8, d8, d5",
        "strb wzr, [x28, #1049]",
        "fadd d3, d5, d3",
        "fcvt s3, d3",
        "str s3, [x8, #192]",
        "ldr d3, [x8, #40]",
        "fadd d3, d3, d2",
        "fsub d3, d3, d4",
        "ldr d9, [x8, #24]",
        "fsub d3, d3, d9",
        "fsub d3, d3, d9",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "str s3, [x8, #208]",
        "fmul d3, d2, d8",
        "ldr d9, [x8, #128]",
        "fsub d3, d9, d3",
        "fmul d9, d4, d6",
        "fsub d3, d3, d9",
        "fmul d9, d5, d7",
        "fadd d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x8, #184]",
        "fmul d3, d2, d7",
        "ldr d9, [x8, #128]",
        "fsub d3, d9, d3",
        "fmul d9, d4, d8",
        "fadd d3, d3, d9",
        "fmul d9, d5, d6",
        "fsub d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x8, #200]",
        "ldr d3, [x8, #32]",
        "fsub d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fadd d2, d4, d2",
        "strb wzr, [x28, #1049]",
        "ldr d3, [x8, #24]",
        "fsub d2, d2, d3",
        "fadd d2, d5, d2",
        "fcvt s2, d2",
        "str s2, [x8, #128]",
        "ldr s2, [x4, #24]",
        "fcvt d2, s2",
        "mov w20, #0x7b58",
        "movk w20, #0xa7, lsl #16",
        "ldr d3, [x20]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #8]",
        "fcvt d2, s2",
        "str d2, [x8, #24]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "str d3, [x8, #32]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "str d4, [x8, #144]",
        "ldr s5, [x4, #56]",
        "fcvt d5, s5",
        "str d5, [x8, #40]",
        "mov w21, #0x7b50",
        "movk w21, #0xa7, lsl #16",
        "ldr d9, [x21]",
        "fmul d9, d9, d2",
        "strb wzr, [x28, #1049]",
        "fadd d2, d3, d2",
        "mov w21, #0x7b48",
        "movk w21, #0xa7, lsl #16",
        "ldr d3, [x21]",
        "fmul d3, d3, d4",
        "strb wzr, [x28, #1049]",
        "fadd d2, d4, d2",
        "mov w21, #0x7b40",
        "movk w21, #0xa7, lsl #16",
        "ldr d4, [x21]",
        "fmul d4, d4, d5",
        "strb wzr, [x28, #1049]",
        "fadd d2, d5, d2",
        "fcvt s2, d2",
        "str s2, [x8, #180]",
        "ldr d2, [x8, #24]",
        "ldr d5, [x8, #144]",
        "fsub d2, d2, d5",
        "strb wzr, [x28, #1049]",
        "ldr d10, [x8, #40]",
        "fsub d2, d2, d10",
        "ldr d10, [x20]",
        "fmul d2, d2, d10",
        "fcvt s2, d2",
        "str s2, [x8, #196]",
        "ldr d2, [x8, #24]",
        "fmul d2, d2, d3",
        "ldr d10, [x8, #32]",
        "fsub d2, d2, d10",
        "strb wzr, [x28, #1049]",
        "fmul d5, d5, d4",
        "fsub d2, d2, d5",
        "ldr d5, [x8, #40]",
        "fmul d5, d5, d9",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #168]",
        "ldr d2, [x8, #24]",
        "fmul d2, d2, d4",
        "ldr d5, [x8, #32]",
        "fsub d2, d2, d5",
        "ldr d5, [x8, #144]",
        "fmul d5, d5, d9",
        "fadd d2, d2, d5",
        "ldr d5, [x8, #40]",
        "fmul d5, d5, d3",
        "fsub d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #144]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "str d2, [x8, #8]",
        "fadd d2, d2, d2",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #52]",
        "fcvt d2, s2",
        "str d2, [x8, #152]",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "str d5, [x8, #160]",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x4, #20]",
        "fcvt d2, s2",
        "str d2, [x8, #32]",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "str d5, [x8, #136]",
        "ldr s5, [x4, #36]",
        "fcvt d5, s5",
        "str d5, [x8, #24]",
        "ldr s5, [x4, #68]",
        "fcvt d5, s5",
        "str d5, [x8, #40]",
        "fmul d2, d2, d6",
        "ldr d5, [x8, #136]",
        "fadd d2, d2, d5",
        "ldr d5, [x8, #24]",
        "fmul d5, d5, d7",
        "fadd d2, d2, d5",
        "ldr d5, [x8, #40]",
        "fmul d5, d5, d8",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #176]",
        "ldr d2, [x8, #160]",
        "ldr d5, [x8, #32]",
        "fadd d2, d2, d5",
        "ldr d5, [x8, #24]",
        "fsub d2, d2, d5",
        "ldr d5, [x8, #152]",
        "fsub d2, d2, d5",
        "fsub d2, d2, d5",
        "ldr d5, [x8, #40]",
        "fsub d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr d2, [x8, #32]",
        "fmul d2, d2, d8",
        "ldr d5, [x8, #136]",
        "fsub d2, d5, d2",
        "ldr d5, [x8, #24]",
        "fmul d5, d5, d6",
        "fsub d2, d2, d5",
        "ldr d5, [x8, #40]",
        "fmul d5, d5, d7",
        "fadd d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #160]",
        "ldr d2, [x8, #32]",
        "fmul d5, d7, d2",
        "ldr d7, [x8, #136]",
        "fsub d5, d7, d5",
        "ldr d7, [x8, #24]",
        "fmul d7, d8, d7",
        "strb wzr, [x28, #1049]",
        "fadd d5, d7, d5",
        "ldr d7, [x8, #40]",
        "fmul d6, d6, d7",
        "strb wzr, [x28, #1049]",
        "fsub d5, d5, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s5, d5",
        "str s5, [x8, #40]",
        "ldr d5, [x8, #8]",
        "fsub d2, d5, d2",
        "strb wzr, [x28, #1049]",
        "ldr d5, [x8, #24]",
        "fadd d2, d2, d5",
        "ldr d5, [x8, #152]",
        "fsub d2, d2, d5",
        "fadd d2, d7, d2",
        "strb wzr, [x28, #1049]",
        "mov w21, #0x7bd8",
        "movk w21, #0xa7, lsl #16",
        "ldr d5, [x21]",
        "fmul d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x4, #28]",
        "fcvt d2, s2",
        "ldr d5, [x20]",
        "fmul d2, d2, d5",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s5, [x4, #44]",
        "fcvt d5, s5",
        "ldr s6, [x4, #60]",
        "fcvt d6, s6",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fmul d7, d7, d9",
        "fadd d7, d7, d2",
        "fmul d8, d5, d3",
        "fadd d7, d7, d8",
        "fmul d8, d6, d4",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #20]",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "str d7, [x8, #32]",
        "fsub d7, d7, d5",
        "fsub d7, d7, d6",
        "ldr d8, [x20]",
        "fmul d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #152]",
        "ldr d7, [x8, #32]",
        "fmul d7, d7, d3",
        "fsub d7, d7, d2",
        "fmul d8, d5, d4",
        "fsub d7, d7, d8",
        "fmul d8, d6, d9",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #136]",
        "ldr d7, [x8, #32]",
        "fmul d4, d4, d7",
        "strb wzr, [x28, #1049]",
        "fsub d2, d4, d2",
        "fmul d4, d9, d5",
        "fadd d2, d4, d2",
        "fmul d3, d3, d6",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #32]",
        "ldr s2, [x8, #180]",
        "fcvt d2, s2",
        "ldr s3, [x8, #192]",
        "fcvt d3, s3",
        "fadd d4, d3, d2",
        "fcvt s4, d4",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "ldr s5, [x8, #176]",
        "fcvt d5, s5",
        "fadd d6, d5, d4",
        "mov w20, #0x7b38",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "fmul d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "fadd d8, d7, d6",
        "fcvt s8, d8",
        "str s8, [x8, #56]",
        "fsub d6, d7, d6",
        "fcvt s6, d6",
        "str s6, [x8, #124]",
        "ldr s6, [x8, #196]",
        "fcvt d6, s6",
        "ldr s7, [x8, #208]",
        "fcvt d7, s7",
        "fadd d8, d7, d6",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #152]",
        "fcvt d8, s8",
        "str d8, [x8, #208]",
        "ldr s9, [x8, #48]",
        "fcvt d9, s9",
        "str d9, [x8, #152]",
        "fadd d8, d8, d9",
        "mov w20, #0x7bd0",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "str d9, [x8, #48]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #60]",
        "ldr d9, [x8, #48]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #120]",
        "ldr s8, [x8, #168]",
        "fcvt d8, s8",
        "str d8, [x8, #168]",
        "ldr s9, [x8, #184]",
        "fcvt d9, s9",
        "str d9, [x8, #184]",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #136]",
        "fcvt d8, s8",
        "str d8, [x8, #136]",
        "ldr s9, [x8, #160]",
        "fcvt d9, s9",
        "str d9, [x8, #160]",
        "fadd d8, d8, d9",
        "mov w20, #0x7b30",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "str d9, [x8, #48]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #64]",
        "ldr d9, [x8, #48]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #144]",
        "fcvt d8, s8",
        "str d8, [x8, #144]",
        "ldr s9, [x8, #200]",
        "fcvt d9, s9",
        "str d9, [x8, #200]",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #32]",
        "fcvt d8, s8",
        "str d8, [x8, #32]",
        "ldr s9, [x8, #40]",
        "fcvt d9, s9",
        "str d9, [x8, #40]",
        "fadd d8, d8, d9",
        "mov w20, #0x7b28",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "str d9, [x8, #48]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #68]",
        "ldr d9, [x8, #48]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #112]",
        "ldr s8, [x8, #24]",
        "fcvt d8, s8",
        "ldr s9, [x8, #128]",
        "fcvt d9, s9",
        "str d9, [x8, #128]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #72]",
        "ldr d9, [x8, #128]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #108]",
        "ldr d8, [x8, #200]",
        "ldr d9, [x8, #144]",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr d8, [x8, #40]",
        "ldr d9, [x8, #32]",
        "fsub d8, d8, d9",
        "mov w20, #0x7b20",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "str d9, [x8, #48]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #76]",
        "ldr d9, [x8, #48]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #104]",
        "ldr d8, [x8, #184]",
        "ldr d9, [x8, #168]",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr d8, [x8, #160]",
        "ldr d9, [x8, #136]",
        "fsub d8, d8, d9",
        "mov w20, #0x7b18",
        "movk w20, #0xa7, lsl #16",
        "ldr d9, [x20]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "str d9, [x8, #48]",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #80]",
        "ldr d9, [x8, #48]",
        "fsub d8, d9, d8",
        "fcvt s8, d8",
        "str s8, [x8, #100]",
        "fsub d6, d7, d6",
        "fcvt s6, d6",
        "str s6, [x8, #8]",
        "ldr d6, [x8, #152]",
        "ldr d7, [x8, #208]",
        "fsub d6, d6, d7",
        "mov w20, #0x7be0",
        "movk w20, #0xa7, lsl #16",
        "ldr d7, [x20]",
        "fmul d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "fadd d8, d7, d6",
        "fcvt s8, d8",
        "str s8, [x8, #84]",
        "fsub d6, d7, d6",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "fsub d2, d5, d4",
        "mov w20, #0x7b10",
        "movk w20, #0xa7, lsl #16",
        "ldr d3, [x20]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "add w4, w7, w7, lsl #3",
        "ldr w7, [x9, #12]",
        "fadd d4, d3, d2",
        "lsl w4, w4, #4",
        "mov w20, #0x83d0",
        "movk w20, #0xb1, lsl #16",
        "mvn w27, w4",
        "adds w26, w4, w20",
        "mov x4, x26",
        "fcvt s4, d4",
        "str s4, [x8, #88]",
        "fsub d2, d3, d2",
        "fcvt s2, d2",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #92]",
        "fcvt d2, s2",
        "fneg v3.2d, v2.2d",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x7]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "fneg v5.2d, v4.2d",
        "fmul d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7, #4]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "ldr s5, [x8, #100]",
        "fcvt d5, s5",
        "fneg v6.2d, v5.2d",
        "fmul d3, d3, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7, #8]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "fneg v7.2d, v6.2d",
        "fmul d3, d3, d7",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7, #12]",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "ldr s7, [x8, #108]",
        "fcvt d7, s7",
        "fneg v8.2d, v7.2d",
        "fmul d3, d3, d8",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr s8, [x8, #112]",
        "fcvt d8, s8",
        "fneg v9.2d, v8.2d",
        "fmul d3, d3, d9",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7, #20]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr s9, [x8, #116]",
        "fcvt d9, s9",
        "fneg v9.2d, v9.2d",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #24]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr s9, [x8, #120]",
        "fcvt d9, s9",
        "fneg v9.2d, v9.2d",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #28]",
        "ldr s3, [x4, #32]",
        "fcvt d3, s3",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fneg v9.2d, v9.2d",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #32]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #36]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr s9, [x8, #120]",
        "fcvt d9, s9",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #40]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr s9, [x8, #116]",
        "fcvt d9, s9",
        "fmul d3, d3, d9",
        "fcvt s3, d3",
        "str s3, [x7, #44]",
        "ldr s3, [x4, #48]",
        "fcvt d3, s3",
        "fmul d3, d8, d3",
        "fcvt s3, d3",
        "str s3, [x7, #48]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "fmul d3, d7, d3",
        "fcvt s3, d3",
        "str s3, [x7, #52]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "fmul d3, d6, d3",
        "fcvt s3, d3",
        "str s3, [x7, #56]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "fmul d3, d5, d3",
        "fcvt s3, d3",
        "str s3, [x7, #60]",
        "ldr s3, [x4, #64]",
        "fcvt d3, s3",
        "fmul d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x7, #64]",
        "ldr s3, [x4, #68]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #68]",
        "ldr s2, [x8, #88]",
        "fcvt d2, s2",
        "ldr s3, [x4, #72]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x7, #72]",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "ldr s4, [x4, #76]",
        "fcvt d4, s4",
        "fmul d4, d4, d3",
        "fcvt s4, d4",
        "str s4, [x7, #76]",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "ldr s5, [x4, #80]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "fcvt s5, d5",
        "str s5, [x7, #80]",
        "ldr s5, [x8, #76]",
        "fcvt d5, s5",
        "ldr s6, [x4, #84]",
        "fcvt d6, s6",
        "fmul d6, d6, d5",
        "fcvt s6, d6",
        "str s6, [x7, #84]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x4, #88]",
        "fcvt d7, s7",
        "fmul d7, d7, d6",
        "fcvt s7, d7",
        "str s7, [x7, #88]",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "ldr s8, [x4, #92]",
        "fcvt d8, s8",
        "fmul d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x7, #92]",
        "ldr s8, [x8, #64]",
        "fcvt d8, s8",
        "str d8, [x8, #24]",
        "ldr s9, [x4, #96]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x7, #96]",
        "ldr s8, [x8, #60]",
        "fcvt d8, s8",
        "str d8, [x8, #128]",
        "ldr s9, [x4, #100]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x7, #100]",
        "ldr s8, [x8, #56]",
        "fcvt d8, s8",
        "ldr s9, [x4, #104]",
        "fcvt d9, s9",
        "fmul d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x7, #104]",
        "ldr s9, [x4, #108]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x7, #108]",
        "ldr s8, [x4, #112]",
        "fcvt d8, s8",
        "ldr d9, [x8, #128]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x7, #112]",
        "ldr s8, [x4, #116]",
        "fcvt d8, s8",
        "ldr d9, [x8, #24]",
        "fmul d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x7, #116]",
        "ldr s8, [x4, #120]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x7, #120]",
        "ldr s7, [x4, #124]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x7, #124]",
        "ldr s6, [x4, #128]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x7, #128]",
        "ldr s5, [x4, #132]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x7, #132]",
        "ldr s4, [x4, #136]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x7, #136]",
        "ldr s3, [x4, #140]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7, #140]",
        "mov x8, x9",
        "ldr w9, [x8], #4",
        "cfinv",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 649,
      "ExpectedInstructionCount": 958,
      "x86Insts": [
        "fld dword [esi + 0x64]",
        "mov eax,dword [esi + 0x88]",
        "fstp dword [esp + 0x5c]",
        "mov ecx,dword [esi + 0x8c]",
        "fld dword [esi + 0x70]",
        "mov edx,dword [esi + 0x90]",
        "fstp dword [esp + 0x60]",
        "mov dword [esp + 0x2e4],0x3f",
        "fld dword [esi + 0x7c]",
        "mov dword [esp + 0x94],eax",
        "fstp dword [esp + 0x64]",
        "mov dword [esp + 0x98],ecx",
        "fld dword [esi + 0x68]",
        "mov dword [esp + 0x9c],edx",
        "fstp dword [esp + 0x14]",
        "mov dword [esp + 0xe8],eax",
        "fld dword [esi + 0x74]",
        "mov dword [esp + 0xec],ecx",
        "fstp dword [esp + 0x18]",
        "mov dword [esp + 0xf0],edx",
        "fld dword [esi + 0x80]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esi + 0xf4]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x7c]",
        "fmul dword [esp + 0x1c]",
        "fstp dword [esp + 0x84]",
        "fld dword [esi + 0x6c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esi + 0x78]",
        "fstp dword [esp + 0x18]",
        "fld dword [esi + 0x84]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esi + 0xf0]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x18]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fmul dword [esp + 0x1c]",
        "fstp dword [esp + 0x58]",
        "fld dword [esi + 0x100]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x54]",
        "fmul st1",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x10]",
        "fld dword [esp + 0x50]",
        "fmul st2",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x2c]",
        "fld dword [esp + 0x58]",
        "fmul st3",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fst dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fmul st4",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x7c]",
        "fmul st4",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x84]",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x5c]",
        "fmul st4",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x60]",
        "fmul st4",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x64]",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x94]",
        "fld dword [esp + 0x84]",
        "fadd st0,st1",
        "fstp dword [esp + 0x84]",
        "fld dword [esp + 0x98]",
        "fld dword [esp + 0x7c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x9c]",
        "fld dword [esp + 0x5c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x84]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x44]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x10]",
        "fstp dword [esp + 0x5c]",
        "mov eax,dword [esp + 0x5c]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xf4],eax",
        "fstp dword [esp + 0x60]",
        "mov ecx,dword [esp + 0x60]",
        "fld dword [esp + 0x14]",
        "mov dword [esp + 0xf8],ecx",
        "fstp dword [esp + 0x64]",
        "mov edx,dword [esp + 0x64]",
        "fxch st4",
        "mov dword [esp + 0xfc],edx",
        "fst dword [esp + 0x5c]",
        "fxch st3",
        "fst dword [esp + 0x84]",
        "fxch st5",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x100],eax",
        "fstp dword [esp + 0x18]",
        "mov ecx,dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0x104],ecx",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fxch st3",
        "mov dword [esp + 0x108],edx",
        "fst dword [esp + 0x5c]",
        "fxch st5",
        "fst dword [esp + 0x84]",
        "fxch st3",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x10c],eax",
        "fstp dword [esp + 0x18]",
        "mov ecx,dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0x110],ecx",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fxch st5",
        "mov dword [esp + 0x114],edx",
        "fstp dword [esp + 0x5c]",
        "fxch st2",
        "fstp dword [esp + 0x84]",
        "fxch st3",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x2c]",
        "fadd dword [esp + 0x10]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "mov eax,dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x118],eax",
        "mov eax,dword [ebx + 0x88]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x10]",
        "mov ecx,dword [esp + 0x18]",
        "fstp dword [esp + 0x1c]",
        "mov edx,dword [esp + 0x1c]",
        "fld dword [ebx + 0x64]",
        "mov dword [esp + 0x11c],ecx",
        "mov ecx,dword [ebx + 0x8c]",
        "fstp dword [esp + 0x70]",
        "fld dword [ebx + 0x70]",
        "mov dword [esp + 0x120],edx",
        "mov edx,dword [ebx + 0x90]",
        "fstp dword [esp + 0x74]",
        "fld dword [ebx + 0x7c]",
        "mov dword [esp + 0x94],eax",
        "mov dword [esp + 0x98],ecx",
        "mov dword [esp + 0x9c],edx",
        "fstp dword [esp + 0x78]",
        "mov dword [esp + 0xac],eax",
        "fld dword [ebx + 0x68]",
        "mov dword [esp + 0xb0],ecx",
        "fstp dword [esp + 0x2c]",
        "mov dword [esp + 0xb4],edx",
        "fld dword [ebx + 0x74]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebx + 0x80]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebx + 0xf4]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fstp dword [esp + 0x48]",
        "fmul dword [esp + 0x34]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebx + 0x6c]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebx + 0x78]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebx + 0x84]",
        "fstp dword [esp + 0x64]",
        "fld dword [ebx + 0xf0]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fld dword [esp + 0x5c]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fmul dword [esp + 0x64]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebx + 0x100]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x5c]",
        "fld dword [esp + 0x2c]",
        "fmul st2",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x84]",
        "fld dword [esp + 0x10]",
        "fmul st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x14]",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x40]",
        "fmul st4",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x48]",
        "fmul st4",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x44]",
        "fmul st4",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x70]",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x74]",
        "fmul st4",
        "fstp dword [esp + 0x8c]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x78]",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x94]",
        "fld dword [esp + 0x14]",
        "fadd st0,st1",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x98]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x9c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xb8],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xbc],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "fxch st4",
        "mov dword [esp + 0xc0],edx",
        "fst dword [esp + 0x5c]",
        "fxch st3",
        "fst dword [esp + 0x84]",
        "fxch st5",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xc4],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xc8],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "fxch st3",
        "mov dword [esp + 0xcc],edx",
        "fst dword [esp + 0x5c]",
        "fxch st5",
        "fst dword [esp + 0x84]",
        "fxch st3",
        "fst dword [esp + 0x7c]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x8c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x4c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd st0,st3",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd st0,st2",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st5",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x14]",
        "fstp dword [esp + 0x70]",
        "mov eax,dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0xd0],eax",
        "fstp dword [esp + 0x74]",
        "mov ecx,dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "mov dword [esp + 0xd4],ecx",
        "fstp dword [esp + 0x78]",
        "mov edx,dword [esp + 0x78]",
        "mov dword [esp + 0xd8],edx",
        "fxch st5",
        "push 0x0",
        "fstp dword [esp + 0x60]",
        "fxch st2",
        "fstp dword [esp + 0x88]",
        "fxch st3",
        "fstp dword [esp + 0x80]",
        "fld dword [esp + 0x2c]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x58]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x54]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x5c]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x90]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x50]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x30]",
        "fadd dword [esp + 0x14]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x44]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x60]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x88]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x80]",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0x18]",
        "fstp dword [esp + 0x74]",
        "mov eax,dword [esp + 0x74]",
        "fld dword [esp + 0x30]",
        "mov dword [esp + 0xe0],eax",
        "fstp dword [esp + 0x78]",
        "mov ecx,dword [esp + 0x78]",
        "fld dword [esp + 0x14]",
        "mov dword [esp + 0xe4],ecx",
        "fstp dword [esp + 0x7c]",
        "mov edx,dword [esp + 0x7c]",
        "lea ecx,[esp + 0x190]",
        "mov dword [esp + 0xe8],edx",
        "call 0x0070df30",
        "mov dword [esp + 0x198],esi",
        "add esi,0xec",
        "push esi",
        "lea ecx,[esp + 0x190]",
        "mov dword [esp + 0x314],0x0",
        "call 0x0070e040",
        "mov ecx,0x19",
        "lea esi,[esp + 0x1b8]",
        "lea edi,[esp + 0x21c]",
        "rep movsd",
        "mov dword [esp + 0x198],ebx",
        "add ebx,0xec",
        "push ebx",
        "lea ecx,[esp + 0x190]",
        "call 0x0070e040",
        "mov ecx,0x19",
        "lea esi,[esp + 0x1b8]",
        "lea edi,[esp + 0x284]",
        "rep movsd",
        "lea esi,[esp + 0x124]",
        "mov edi,0x5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x10, #100]",
        "ldr w4, [x10, #136]",
        "str s2, [x8, #92]",
        "ldr w7, [x10, #140]",
        "ldr s2, [x10, #112]",
        "ldr w5, [x10, #144]",
        "str s2, [x8, #96]",
        "mov w20, #0x3f",
        "str w20, [x8, #740]",
        "ldr s2, [x10, #124]",
        "str w4, [x8, #148]",
        "str s2, [x8, #100]",
        "str w7, [x8, #152]",
        "ldr s2, [x10, #104]",
        "str w5, [x8, #156]",
        "str s2, [x8, #20]",
        "str w4, [x8, #232]",
        "ldr s2, [x10, #116]",
        "str w7, [x8, #236]",
        "str s2, [x8, #24]",
        "str w5, [x8, #240]",
        "ldr s2, [x10, #128]",
        "str s2, [x8, #28]",
        "ldr s2, [x10, #244]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "fcvt d2, s2",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #140]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #124]",
        "ldr s3, [x8, #28]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #132]",
        "ldr s2, [x10, #108]",
        "str s2, [x8, #20]",
        "ldr s2, [x10, #120]",
        "str s2, [x8, #24]",
        "ldr s2, [x10, #132]",
        "str s2, [x8, #28]",
        "ldr s2, [x10, #240]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "fcvt d2, s2",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #84]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #80]",
        "ldr s3, [x8, #28]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #88]",
        "ldr s2, [x10, #256]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "fcvt d2, s2",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #40]",
        "ldr s3, [x8, #40]",
        "str s3, [x8, #16]",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "fmul d4, d4, d2",
        "fcvt s4, d4",
        "str s4, [x8, #40]",
        "ldr s4, [x8, #40]",
        "str s4, [x8, #44]",
        "ldr s5, [x8, #88]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #40]",
        "ldr s5, [x8, #40]",
        "str s5, [x8, #20]",
        "ldr s6, [x8, #140]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #76]",
        "ldr s6, [x8, #76]",
        "str s6, [x8, #68]",
        "ldr s6, [x8, #124]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #140]",
        "ldr s6, [x8, #140]",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #132]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #88]",
        "ldr s6, [x8, #88]",
        "str s6, [x8, #64]",
        "ldr s6, [x8, #92]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #80]",
        "ldr s6, [x8, #80]",
        "str s6, [x8, #132]",
        "ldr s6, [x8, #96]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #84]",
        "ldr s6, [x8, #84]",
        "str s6, [x8, #124]",
        "ldr s6, [x8, #100]",
        "fcvt d6, s6",
        "fmul d2, d2, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #148]",
        "fcvt d2, s2",
        "ldr s6, [x8, #132]",
        "fcvt d6, s6",
        "fadd d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #132]",
        "ldr s6, [x8, #152]",
        "fcvt d6, s6",
        "ldr s7, [x8, #124]",
        "fcvt d7, s7",
        "fadd d7, d7, d6",
        "fcvt s7, d7",
        "str s7, [x8, #124]",
        "ldr s7, [x8, #156]",
        "fcvt d7, s7",
        "ldr s8, [x8, #92]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #132]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #124]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #92]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #68]",
        "fcvt d8, s8",
        "ldr s9, [x8, #16]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #72]",
        "fcvt d8, s8",
        "ldr s9, [x8, #44]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #64]",
        "fcvt d8, s8",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #16]",
        "str s8, [x8, #92]",
        "ldr w4, [x8, #92]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #244]",
        "str s8, [x8, #96]",
        "ldr w7, [x8, #96]",
        "ldr s8, [x8, #20]",
        "str w7, [x8, #248]",
        "str s8, [x8, #100]",
        "ldr w5, [x8, #100]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #252]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "fadd d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #132]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #256]",
        "str s8, [x8, #24]",
        "ldr w7, [x8, #24]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #260]",
        "str s8, [x8, #28]",
        "ldr w5, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #264]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "fadd d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #132]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #268]",
        "str s8, [x8, #24]",
        "ldr w7, [x8, #24]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #272]",
        "str s8, [x8, #28]",
        "ldr w5, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #276]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s3, [x8, #76]",
        "str s3, [x8, #64]",
        "ldr s3, [x8, #140]",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #88]",
        "str s3, [x8, #68]",
        "ldr s3, [x8, #80]",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #84]",
        "str s3, [x8, #44]",
        "ldr s3, [x8, #40]",
        "str s3, [x8, #16]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "fadd d2, d6, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "fadd d2, d7, d2",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #64]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "ldr s3, [x8, #72]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #92]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #44]",
        "fcvt d2, s2",
        "ldr s3, [x8, #132]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "ldr s3, [x8, #124]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #20]",
        "str s2, [x8, #20]",
        "ldr w4, [x8, #20]",
        "ldr s2, [x8, #44]",
        "str w4, [x8, #280]",
        "ldr w4, [x6, #136]",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #16]",
        "ldr w7, [x8, #24]",
        "str s2, [x8, #28]",
        "ldr w5, [x8, #28]",
        "ldr s2, [x6, #100]",
        "str w7, [x8, #284]",
        "ldr w7, [x6, #140]",
        "str s2, [x8, #112]",
        "ldr s2, [x6, #112]",
        "str w5, [x8, #288]",
        "ldr w5, [x6, #144]",
        "str s2, [x8, #116]",
        "ldr s2, [x6, #124]",
        "str w4, [x8, #148]",
        "str w7, [x8, #152]",
        "str w5, [x8, #156]",
        "str s2, [x8, #120]",
        "str w4, [x8, #172]",
        "ldr s2, [x6, #104]",
        "str w7, [x8, #176]",
        "str s2, [x8, #44]",
        "str w5, [x8, #180]",
        "ldr s2, [x6, #116]",
        "str s2, [x8, #48]",
        "ldr s2, [x6, #128]",
        "str s2, [x8, #52]",
        "ldr s2, [x6, #244]",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #44]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #64]",
        "ldr s3, [x8, #48]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #52]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x6, #108]",
        "str s2, [x8, #92]",
        "ldr s2, [x6, #120]",
        "str s2, [x8, #96]",
        "ldr s2, [x6, #132]",
        "str s2, [x8, #100]",
        "ldr s2, [x6, #240]",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #92]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #96]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #44]",
        "ldr s3, [x8, #100]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #16]",
        "ldr s2, [x6, #256]",
        "str s2, [x8, #40]",
        "ldr s2, [x8, #40]",
        "fcvt d2, s2",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "fmul d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #20]",
        "str s3, [x8, #92]",
        "ldr s4, [x8, #44]",
        "fcvt d4, s4",
        "fmul d4, d4, d2",
        "fcvt s4, d4",
        "str s4, [x8, #20]",
        "ldr s4, [x8, #20]",
        "str s4, [x8, #132]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "ldr s5, [x8, #20]",
        "str s5, [x8, #124]",
        "ldr s6, [x8, #64]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #40]",
        "ldr s6, [x8, #40]",
        "str s6, [x8, #64]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #84]",
        "ldr s6, [x8, #84]",
        "str s6, [x8, #72]",
        "ldr s6, [x8, #68]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #80]",
        "ldr s6, [x8, #80]",
        "str s6, [x8, #68]",
        "ldr s6, [x8, #112]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #88]",
        "ldr s6, [x8, #88]",
        "str s6, [x8, #20]",
        "ldr s6, [x8, #116]",
        "fcvt d6, s6",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #140]",
        "ldr s6, [x8, #140]",
        "str s6, [x8, #44]",
        "ldr s6, [x8, #120]",
        "fcvt d6, s6",
        "fmul d2, d2, d6",
        "mov w20, #0x0",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #76]",
        "str s2, [x8, #16]",
        "ldr s2, [x8, #148]",
        "fcvt d2, s2",
        "ldr s6, [x8, #20]",
        "fcvt d6, s6",
        "fadd d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "ldr s6, [x8, #152]",
        "fcvt d6, s6",
        "ldr s7, [x8, #44]",
        "fcvt d7, s7",
        "fadd d7, d7, d6",
        "fcvt s7, d7",
        "str s7, [x8, #44]",
        "ldr s7, [x8, #156]",
        "fcvt d7, s7",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #132]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #184]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #188]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #192]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "fadd d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #132]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #196]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #200]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "str w5, [x8, #204]",
        "str s3, [x8, #92]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #124]",
        "ldr s8, [x8, #40]",
        "str s8, [x8, #64]",
        "ldr s8, [x8, #84]",
        "str s8, [x8, #72]",
        "ldr s8, [x8, #80]",
        "str s8, [x8, #68]",
        "ldr s8, [x8, #88]",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #140]",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #76]",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "fadd d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d7",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #64]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #72]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #68]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "ldr s9, [x8, #92]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #20]",
        "ldr s8, [x8, #44]",
        "fcvt d8, s8",
        "ldr s9, [x8, #132]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #44]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "ldr s9, [x8, #124]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x8, #20]",
        "str s8, [x8, #112]",
        "ldr w4, [x8, #112]",
        "ldr s8, [x8, #44]",
        "str w4, [x8, #208]",
        "str s8, [x8, #116]",
        "ldr w7, [x8, #116]",
        "ldr s8, [x8, #16]",
        "str w7, [x8, #212]",
        "str s8, [x8, #120]",
        "ldr w5, [x8, #120]",
        "str w5, [x8, #216]",
        "strb wzr, [x28, #1049]",
        "str w20, [x8, #-4]!",
        "str s3, [x8, #96]",
        "strb wzr, [x28, #1049]",
        "str s4, [x8, #136]",
        "strb wzr, [x28, #1049]",
        "str s5, [x8, #128]",
        "ldr s3, [x8, #44]",
        "str s3, [x8, #68]",
        "ldr s3, [x8, #88]",
        "str s3, [x8, #76]",
        "ldr s3, [x8, #84]",
        "str s3, [x8, #72]",
        "ldr s3, [x8, #92]",
        "str s3, [x8, #24]",
        "ldr s3, [x8, #144]",
        "str s3, [x8, #48]",
        "ldr s3, [x8, #80]",
        "str s3, [x8, #20]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "fadd d2, d6, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "fadd d2, d7, d2",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #72]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "ldr s3, [x8, #96]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "ldr s3, [x8, #136]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #128]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #20]",
        "ldr s2, [x8, #24]",
        "str s2, [x8, #116]",
        "ldr w4, [x8, #116]",
        "ldr s2, [x8, #48]",
        "str w4, [x8, #224]",
        "str s2, [x8, #120]",
        "ldr w7, [x8, #120]",
        "ldr s2, [x8, #20]",
        "str w7, [x8, #228]",
        "str s2, [x8, #124]",
        "ldr w5, [x8, #124]",
        "add w7, w8, #0x190 (400)",
        "str w5, [x8, #232]",
        "mov w20, #0xa3f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xfefe",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block4": {
      "x86InstructionCount": 2050,
      "ExpectedInstructionCount": 30,
      "x86Insts": [
        "fldz",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x37",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33c14",
        "push 0x52424157",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x38",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33c04",
        "push 0x41574157",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x2b",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bfc",
        "push 0x444c4853",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bf0",
        "push 0x48534946",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bdc",
        "push 0x4853494c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bcc",
        "push 0x48535246",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x52485446",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x1000073",
        "push 0xb",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bc4",
        "push 0x4e445242",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000076",
        "push 0xb",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bbc",
        "push 0x52485446",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0xe0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa32700",
        "push 0x4b434f4c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0xc0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x0",
        "push 0xa33bb4",
        "push 0x4e45504f",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x48534946",
        "push 0x49465352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x3d",
        "push 0x21000475",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ba8",
        "push 0x47444946",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4853494c",
        "push 0x48535352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x44",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b98",
        "push 0x47444853",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x48535246",
        "push 0x52465352",
        "push 0x4c505344",
        "push 0x3",
        "push 0x3e",
        "push 0x1000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b88",
        "push 0x47445246",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x54414241",
        "push 0x54414f46",
        "push 0x54414552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100075",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b74",
        "push 0x54414744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b64",
        "push 0x45484744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534241",
        "push 0x50534f46",
        "push 0x50534552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000075",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b50",
        "push 0x50534744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x41464241",
        "push 0x41464f46",
        "push 0x41464552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000075",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b40",
        "push 0x41464744",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x54414241",
        "push 0x54414f46",
        "push 0x54414552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100077",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b30",
        "push 0x54415244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x4b534241",
        "push 0x4b534f46",
        "push 0x4c505344",
        "push 0x3",
        "push 0x40",
        "push 0x80077",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b24",
        "push 0x4b535244",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b14",
        "push 0x45485244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534241",
        "push 0x50534f46",
        "push 0x50534552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33b00",
        "push 0x50535244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x41464241",
        "push 0x41464f46",
        "push 0x41464552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000077",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33af0",
        "push 0x41465244",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x4c505344",
        "push 0x49465352",
        "push 0x48534946",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33adc",
        "push 0x49464b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x52465352",
        "push 0x48535246",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ac8",
        "push 0x52464b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x48535352",
        "push 0x4853494c",
        "push 0x3",
        "push 0x40",
        "push 0x100007f",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33ab4",
        "push 0x48534b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x2c",
        "push 0x4c505344",
        "push 0x414d5352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x40",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33aa0",
        "push 0x414d4b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x49445352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x3f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a8c",
        "push 0x49444b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x4f505352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x43",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a78",
        "push 0x4f504b57",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x574e5352",
        "push 0x2",
        "push 0x40",
        "push 0x100007f",
        "push 0x41",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a5c",
        "push 0x574e4b57",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x48535246",
        "push 0x4853494c",
        "push 0x48534946",
        "push 0x444c4853",
        "push 0x4c505344",
        "push 0x5",
        "push 0x40",
        "push 0x75",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a48",
        "push 0x52414944",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x48535246",
        "push 0x4853494c",
        "push 0x48534946",
        "push 0x444c4853",
        "push 0x4c505344",
        "push 0x5",
        "push 0x40",
        "push 0x75",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a30",
        "push 0x45574944",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x0",
        "push 0x3f",
        "push 0x10000092",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a24",
        "push 0x504d4156",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x14",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a18",
        "push 0x47445553",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000112",
        "push 0x39",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa33a08",
        "push 0x414d5453",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x4f505543",
        "push 0x1",
        "push 0x43",
        "push 0x800000",
        "fldz",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339fc",
        "push 0x4e534f50",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x49445543",
        "push 0x1",
        "push 0x3f",
        "push 0x800000",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339ec",
        "push 0x45534944",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x45484241",
        "push 0x45484f46",
        "push 0x45484552",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x21000075",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x2",
        "push 0xa339cc",
        "push 0x594d5544",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x0",
        "push -0x1",
        "push 0x1000172",
        "push 0x2f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339bc",
        "push 0x49564e49",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x2e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339b0",
        "push 0x4c4d4843",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x41505543",
        "push 0x2",
        "push 0x42",
        "push 0x1000173",
        "push 0x30",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa339a4",
        "push 0x41524150",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000173",
        "push 0x31",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa3399c",
        "push 0x434e4c53",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x1000062",
        "push 0x6",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33994",
        "push 0x4d524843",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x594c4152",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000066",
        "push 0x22",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33988",
        "push 0x4f4d4544",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4f4d4544",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x1000062",
        "push 0x22",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33980",
        "push 0x594c4152",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4d4c4143",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000062",
        "push 0x21",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33978",
        "push 0x5a4e5246",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x5a4e5246",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x41000066",
        "push 0x21",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33970",
        "push 0x4d4c4143",
        "call 0x00417220",
        "add esp,0x28",
        "push 0x0",
        "push -0x1",
        "push 0x1000112",
        "push 0x29",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33964",
        "push 0x4559454e",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x80000072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa3395c",
        "push 0x5448474c",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x81000072",
        "push 0x46",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33950",
        "push 0x4b524144",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push 0x40",
        "push 0xf0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33948",
        "push 0x4c505344",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x1",
        "push 0x40",
        "push 0x163",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa3393c",
        "push 0x50525453",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x81000242",
        "push 0x3c",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33930",
        "push 0x454c4554",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x81000012",
        "push 0x3a",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33924",
        "push 0x54435444",
        "call 0x00417220",
        "add esp,0x20",
        "fldz",
        "push 0x0",
        "push 0x40",
        "push 0x1000072",
        "push 0x34",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33910",
        "push 0x53424153",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x35",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa33908",
        "push 0x434c4652",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100001a",
        "push 0x3b",
        "push ecx",
        "fstp dword [esp]",
        "push 0x4",
        "push 0xa338f8",
        "push 0x47444552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100070",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338e4",
        "push 0x54414552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338d4",
        "push 0x45484552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338bc",
        "push 0x50534552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000070",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa338ac",
        "push 0x41464552",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33898",
        "push 0x54414f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x80072",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33888",
        "push 0x4b534f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33878",
        "push 0x45484f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33860",
        "push 0x50534f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000072",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33850",
        "push 0x41464f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4b535244",
        "push 0x4b534241",
        "push 0x4c505344",
        "push 0x3",
        "push 0x40",
        "push 0x80027",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33840",
        "push 0x4b534241",
        "call 0x00417220",
        "add esp,0x2c",
        "push 0x54414744",
        "push 0x54415244",
        "push 0x54414241",
        "fldz",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x100027",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3382c",
        "push 0x54414241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x594d5544",
        "push 0x45484744",
        "push 0x45485244",
        "push 0x45484241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0x8",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3381c",
        "push 0x45484241",
        "call 0x00417220",
        "fldz",
        "add esp,0x34",
        "push 0x41464744",
        "push 0x41465244",
        "push 0x41464241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0xa",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3380c",
        "push 0x41464241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x50534744",
        "push 0x50535244",
        "push 0x50534241",
        "push 0x4c505344",
        "push 0x4",
        "push 0x40",
        "push 0x1000025",
        "push 0x9",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337f8",
        "push 0x50534241",
        "call 0x00417220",
        "fldz",
        "add esp,0x30",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3d",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337ec",
        "push 0x49465352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3e",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337dc",
        "push 0x52465352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x44",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337cc",
        "push 0x48535352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x40",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337bc",
        "push 0x414d5352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x3f",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa337ac",
        "push 0x49445352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x43",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3379c",
        "push 0x4f505352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x42",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33788",
        "push 0x41505352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100007a",
        "push 0x41",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33770",
        "push 0x574e5352",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x100017a",
        "push 0x47",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3375c",
        "push 0x44575352",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa3374c",
        "push 0x49445543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33740",
        "push 0x4f505543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1f0",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33730",
        "push 0x41505543",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x1000012",
        "push 0x28",
        "push ecx",
        "fstp dword [esp]",
        "push 0x5",
        "push 0xa33714",
        "push 0x4d4d4f46",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33704",
        "push 0x4f48475a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336f8",
        "push 0x43494c5a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "fldz",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336e8",
        "push 0x454b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336d0",
        "push 0x414b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa336b4",
        "push 0x434b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3369c",
        "push 0x484b535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3368c",
        "push 0x4152575a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33678",
        "push 0x4c52575a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33668",
        "push 0x4d4f5a5a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33650",
        "push 0x5a44485a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33638",
        "push 0x4149465a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33620",
        "push 0x4152465a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33608",
        "push 0x4154535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335f8",
        "push 0x4541445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335e8",
        "push 0x4552445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335d4",
        "push 0x4c52445a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335c4",
        "push 0x4143535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa335b0",
        "push 0x414c435a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33598",
        "push 0x4450535a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33588",
        "push 0x5649585a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33578",
        "push 0x3130305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33568",
        "push 0x3230305a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33558",
        "push 0x3330305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33548",
        "push 0x3430305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33538",
        "push 0x3530305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33528",
        "push 0x3630305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33518",
        "push 0x3730305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33508",
        "push 0x3830305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334f8",
        "push 0x3930305a",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "fldz",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334e8",
        "push 0x3031305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334d8",
        "push 0x3131305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334c8",
        "push 0x3231305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334b8",
        "push 0x3331305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa334a8",
        "push 0x3431305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33498",
        "push 0x3531305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33488",
        "push 0x3631305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33478",
        "push 0x3731305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33468",
        "push 0x3831305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33458",
        "push 0x3931305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x40112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33448",
        "push 0x3032305a",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x40000062",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33434",
        "push 0x55484f43",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x40000062",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x3",
        "push 0xa33420",
        "push 0x52434f43",
        "call 0x00417220",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33414",
        "push 0x58415742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33408",
        "push 0x4f425742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333f8",
        "push 0x41445742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333ec",
        "push 0x414d5742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333e0",
        "push 0x57535742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333d4",
        "push 0x4f424142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333c4",
        "push 0x55434142",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "fldz",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333b4",
        "push 0x41474142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa333a4",
        "push 0x52474142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33394",
        "push 0x45484142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33384",
        "push 0x48534142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3336c",
        "push 0x31304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33354",
        "push 0x32304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3333c",
        "push 0x33304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33324",
        "push 0x34304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3330c",
        "push 0x35304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332f4",
        "push 0x36304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332dc",
        "push 0x37304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332c4",
        "push 0x38304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa332ac",
        "push 0x39304142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33294",
        "push 0x30314142",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3327c",
        "push 0x31305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33264",
        "push 0x32305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3324c",
        "push 0x33305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33234",
        "push 0x34305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3321c",
        "push 0x35305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33204",
        "push 0x36305742",
        "call 0x00417220",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fldz",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331ec",
        "push 0x37305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331d4",
        "push 0x38305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331bc",
        "push 0x39305742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa331a4",
        "push 0x30315742",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x594c4152",
        "push 0x4c505344",
        "push 0x2",
        "push 0x40",
        "push 0x40000063",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33198",
        "push 0x4e525554",
        "call 0x00417220",
        "fldz",
        "add esp,0x28",
        "push 0x4c505344",
        "push 0x1",
        "push -0x1",
        "push 0x170",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x6",
        "push 0xa33188",
        "push 0x46464553",
        "call 0x00417220",
        "fldz",
        "add esp,0x24",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa3316c",
        "push 0x4854594d",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x20112",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33154",
        "push 0x4c48594d",
        "call 0x00417220",
        "fldz",
        "add esp,0x20",
        "push 0x0",
        "push -0x1",
        "push 0x10000360",
        "push 0x0",
        "push ecx",
        "fstp dword [esp]",
        "push 0x1",
        "push 0xa33148",
        "push 0x4e414552",
        "call 0x00417220",
        "add esp,0x20"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "fmov d2, x20",
        "str w20, [x8, #-4]!",
        "mov w21, #0xffffffff",
        "str w21, [x8, #-4]!",
        "mov w21, #0x172",
        "movk w21, #0x100, lsl #16",
        "str w21, [x8, #-4]!",
        "mov w21, #0x37",
        "stp w7, w21, [x8, #-8]!",
        "fcvt s2, d2",
        "str s2, [x8]",
        "str w20, [x8, #-4]!",
        "mov w20, #0x3c14",
        "movk w20, #0xa3, lsl #16",
        "str w20, [x8, #-4]!",
        "mov w20, #0x4157",
        "movk w20, #0x5242, lsl #16",
        "str w20, [x8, #-4]!",
        "mov w20, #0x22",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 368,
      "ExpectedInstructionCount": 49,
      "x86Insts": [
        "mov ebx,dword [eax + 0x68]",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x68]",
        "sub esp,0x14",
        "fstp dword [esp + 0x10]",
        "movzx ecx,al",
        "fld1",
        "mov dword [esp + 0x38],ecx",
        "fstp dword [esp + 0xc]",
        "movzx edx,bl",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "mov dword [esp + 0x40],eax",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],edx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx eax,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],eax",
        "fld1",
        "movzx ecx,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],ecx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx edx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],edx",
        "fldz",
        "shr ebx,0x10",
        "movzx eax,bl",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],eax",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "fst dword [esp + 0x30]",
        "mov ecx,dword [esp + 0x24]",
        "mov edx,dword [esp + 0x28]",
        "mov eax,dword [esp + 0x2c]",
        "mov dword [0x00b45e14],ecx",
        "mov ecx,dword [esp + 0x30]",
        "mov [0x00b45e1c],eax",
        "mov dword [0x00b45e20],ecx",
        "mov dword [0x00b45e18],edx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x6c]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x40],eax",
        "fstp dword [esp + 0xc]",
        "movzx eax,al",
        "fldz",
        "mov dword [esp + 0x38],eax",
        "fstp dword [esp + 0x8]",
        "mov edx,dword [esi + 0x20]",
        "fild dword [esp + 0x38]",
        "mov ebx,dword [edx + 0x6c]",
        "fld qword [0x00a3ddd8]",
        "movzx ecx,bl",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],ecx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx edx,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],edx",
        "fld1",
        "movzx eax,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],eax",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx ecx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],ecx",
        "fldz",
        "shr ebx,0x10",
        "movzx edx,bl",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],edx",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "mov eax,dword [esp + 0x24]",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "mov ecx,dword [esp + 0x28]",
        "mov edx,dword [esp + 0x2c]",
        "fst dword [esp + 0x30]",
        "mov [0x00b45e24],eax",
        "mov eax,dword [esp + 0x30]",
        "mov dword [0x00b45e2c],edx",
        "mov [0x00b45e30],eax",
        "mov dword [0x00b45e28],ecx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [ebp + 0x70]",
        "fstp dword [esp + 0x10]",
        "movzx edx,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x38],edx",
        "fldz",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0x8]",
        "mov ebx,dword [ecx + 0x70]",
        "fild dword [esp + 0x38]",
        "mov dword [esp + 0x40],eax",
        "fld qword [0x00a3ddd8]",
        "movzx eax,bl",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],eax",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x24]",
        "fld dword [esi + 0x2c]",
        "movzx ecx,byte [esp + 0x41]",
        "fstp dword [esp + 0x10]",
        "mov dword [esp + 0x38],ecx",
        "fld1",
        "movzx edx,bh",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x38]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x38],edx",
        "fild dword [esp + 0x38]",
        "fdivrp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x38]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x28]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esi + 0x2c]",
        "fstp dword [esp + 0x10]",
        "shr eax,0x10",
        "fld1",
        "movzx eax,al",
        "fstp dword [esp + 0xc]",
        "mov dword [esp + 0x40],eax",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fild dword [esp + 0x40]",
        "fld qword [0x00a3ddd8]",
        "fdiv st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "shr ebx,0x10",
        "movzx ecx,bl",
        "fstp dword [esp + 0x4]",
        "mov dword [esp + 0x40],ecx",
        "fild dword [esp + 0x40]",
        "fdivrp",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x40]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [esp + 0x2c]",
        "fld1",
        "mov edx,dword [esp + 0x24]",
        "mov eax,dword [esp + 0x28]",
        "fst dword [esp + 0x30]",
        "mov ecx,dword [esp + 0x2c]",
        "mov dword [0x00b45e34],edx",
        "mov edx,dword [esp + 0x30]",
        "mov [0x00b45e38],eax",
        "mov dword [0x00b45e3c],ecx",
        "mov dword [0x00b45e40],edx",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [esi + 0x24]",
        "fstp dword [esp + 0x10]",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x4c]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e44]",
        "fld dword [esi + 0x2c]",
        "mov edx,dword [esi + 0x24]",
        "mov eax,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [edx + 0x50]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x50]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e48]",
        "fld dword [esi + 0x2c]",
        "mov ecx,dword [esi + 0x24]",
        "add esp,0x8",
        "fstp dword [esp + 0x8]",
        "fld1",
        "fstp dword [esp + 0x4]",
        "fldz",
        "fstp dword [esp]",
        "call 0x004ed660",
        "push ecx",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp]",
        "call 0x004ed660",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e4c]",
        "mov ecx,dword [esi + 0x24]",
        "fld dword [esi + 0x2c]",
        "add esp,0x8",
        "fstp dword [esp + 0x8]",
        "fld1",
        "fstp dword [esp + 0x4]",
        "fldz",
        "fstp dword [esp]",
        "call 0x004ed680",
        "push ecx",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp]",
        "call 0x004ed680",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e50]",
        "fld dword [esi + 0x2c]",
        "mov ecx,dword [esi + 0x24]",
        "mov edx,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [ecx + 0x58]",
        "fstp dword [esp + 0x4]",
        "fld dword [edx + 0x58]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e54]",
        "fld dword [esi + 0x2c]",
        "mov eax,dword [esi + 0x24]",
        "mov ecx,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x5c]",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x5c]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "fstp dword [0x00b45e58]",
        "fld dword [esi + 0x2c]",
        "mov edx,dword [esi + 0x24]",
        "mov eax,dword [esi + 0x20]",
        "fstp dword [esp + 0x10]",
        "fld1",
        "fstp dword [esp + 0xc]",
        "fldz",
        "fstp dword [esp + 0x8]",
        "fld dword [edx + 0x54]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x54]",
        "fstp dword [esp]",
        "call 0x00410eb0",
        "add esp,0x14"
      ],
      "ExpectedArm64ASM": [
        "ldr w6, [x4, #104]",
        "ldr s2, [x10, #44]",
        "ldr w4, [x9, #104]",
        "mvn w27, w8",
        "subs w26, w8, #0x14 (20)",
        "mov x8, x26",
        "str s2, [x8, #16]",
        "uxtb w7, w4",
        "mov x20, #0x3ff0000000000000",
        "fmov d2, x20",
        "str w7, [x8, #56]",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "uxtb w5, w6",
        "mov w20, #0x0",
        "fmov d2, x20",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr w20, [x8, #56]",
        "scvtf d2, w20",
        "str w4, [x8, #64]",
        "mov w20, #0xddd8",
        "movk w20, #0xa3, lsl #16",
        "ldr d3, [x20]",
        "fdiv d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #56]",
        "str s2, [x8, #4]",
        "str w5, [x8, #56]",
        "ldr w20, [x8, #56]",
        "scvtf d2, w20",
        "fdiv d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #56]",
        "str s2, [x8]",
        "mov w20, #0x5e",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xc0c0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 315,
      "ExpectedInstructionCount": 27,
      "x86Insts": [
        "mov eax,dword [esp + 0x110]",
        "fldz",
        "mov ecx,dword [eax]",
        "mov edx,dword [esp + 0x5c]",
        "mov ebx,dword [edx + 0x18]",
        "mov esi,dword [esp + 0x58]",
        "mov dword [ebx + 0xc],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [ebx + 0x10],edx",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebx + 0x14],eax",
        "mov ecx,dword [esi + 0x50]",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784370",
        "mov ecx,dword [esi + 0x50]",
        "fstp dword [esp + 0x54]",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784370",
        "fstp dword [esp + 0x64]",
        "mov eax,dword [esp + 0x11c]",
        "lea ebp,[ebx + 0x1c]",
        "mov esi,eax",
        "mov ecx,0x9",
        "mov edi,ebp",
        "rep movsd",
        "fld dword [eax + 0x4]",
        "mov ecx,dword [esp + 0x120]",
        "sub esp,0xc",
        "fmul dword [ecx + 0x4]",
        "fld dword [ecx]",
        "fmul dword [eax]",
        "faddp",
        "fld dword [eax + 0x8]",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + 0xc]",
        "fmul dword [ecx]",
        "fld dword [eax + 0x10]",
        "fmul dword [ecx + 0x4]",
        "faddp",
        "fld dword [eax + 0x14]",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x44]",
        "fld dword [eax + 0x18]",
        "fmul dword [ecx]",
        "fld dword [eax + 0x1c]",
        "fmul dword [ecx + 0x4]",
        "faddp",
        "fld dword [eax + 0x20]",
        "mov eax,esp",
        "fmul dword [ecx + 0x8]",
        "faddp",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x28]",
        "fstp dword [esp + 0x48]",
        "mov ecx,dword [esp + 0x48]",
        "fld dword [esp + 0x44]",
        "mov dword [eax],ecx",
        "fstp dword [esp + 0x4c]",
        "mov edx,dword [esp + 0x4c]",
        "fld dword [esp + 0x34]",
        "mov dword [eax + 0x4],edx",
        "fstp dword [esp + 0x50]",
        "mov ecx,dword [esp + 0x50]",
        "fld dword [esp + 0x3c]",
        "mov dword [eax + 0x8],ecx",
        "push ecx",
        "mov ecx,ebp",
        "fstp dword [esp]",
        "call 0x0078f050",
        "fld dword [esp + 0x54]",
        "sub esp,0x8",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x24]",
        "mov ecx,ebp",
        "fstp dword [esp]",
        "call 0x0078ef60",
        "fld dword [ebp + 0xc]",
        "mov esi,dword [esp + 0x58]",
        "fld dword [0x00b2b71c]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebp]",
        "fld dword [0x00b2b718]",
        "fld st0",
        "fmulp st2",
        "fxch st3",
        "faddp",
        "fld dword [ebp + 0x18]",
        "fld dword [0x00b2b720]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp + 0x10]",
        "fmul st2",
        "fld dword [ebp + 0x4]",
        "fmul st4",
        "faddp",
        "fld dword [ebp + 0x1c]",
        "fmul st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + 0x14]",
        "fmulp st2",
        "fld dword [ebp + 0x8]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul dword [ebp + 0x20]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x1c]",
        "fstp dword [esp + 0x3c]",
        "mov edx,dword [esp + 0x3c]",
        "fld dword [esp + 0x38]",
        "mov dword [ebx],edx",
        "fstp dword [esp + 0x40]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esp + 0x28]",
        "mov dword [ebx + 0x4],eax",
        "fstp dword [esp + 0x44]",
        "mov ecx,dword [esp + 0x44]",
        "fldz",
        "mov dword [ebx + 0x8],ecx",
        "mov ecx,dword [esi + 0x68]",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784210",
        "fmul dword [esp + 0x6c]",
        "fstp dword [ebx + 0x18]",
        "mov ecx,dword [esi + 0x5c]",
        "fldz",
        "push ecx",
        "fstp dword [esp]",
        "call 0x00784210",
        "fmul dword [esp + 0x80]",
        "push 0xb2b724",
        "mov ecx,ebx",
        "fstp dword [esp + 0x54]",
        "call 0x0078fcc0",
        "fmul qword [0x00a8ba48]",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x34]",
        "fsubr qword [0x00a65a18]",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "fabs",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "fmul qword [0x00a8c698]",
        "fld1",
        "fsubrp",
        "fstp dword [esp + 0x38]",
        "fld dword [0x00b2b72c]",
        "fld st0",
        "fmul dword [ebx + 0x4]",
        "fld dword [ebx + 0x8]",
        "fld dword [0x00b2b728]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "fsubrp",
        "fstp dword [esp + 0x24]",
        "fld dword [ebx + 0x8]",
        "fld dword [0x00b2b724]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebx]",
        "fmulp st4",
        "fxch",
        "fsubrp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x30]",
        "fmul dword [ebx]",
        "fld dword [ebx + 0x4]",
        "fmulp st2",
        "fsubrp",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x30]",
        "fld dword [esp + 0x24]",
        "fld dword [esp + 0x4c]",
        "fld st1",
        "fmulp st2",
        "fld st2",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul st0",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "call 0x00982c30",
        "fstp dword [esp + 0x1c]",
        "fld dword [esp + 0x1c]",
        "mov ecx,dword [esi + 0x70]",
        "fld1",
        "push ecx",
        "fdivrp",
        "fstp dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x20]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fstp dword [esp + 0x44]",
        "fmul dword [esp + 0x50]",
        "fstp dword [esp + 0x48]",
        "fldz",
        "fstp dword [esp]",
        "call 0x00784210",
        "fsub qword [0x00a2faa0]",
        "mov edx,dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x40]",
        "sub esp,0xc",
        "fadd st0,st0",
        "mov eax,esp",
        "mov dword [eax],edx",
        "fmul qword [0x00a3d360]",
        "fstp dword [esp + 0x28]",
        "fld1",
        "fst dword [esp + 0xb8]",
        "fldz",
        "fst dword [esp + 0xbc]",
        "fst dword [esp + 0xc0]",
        "fst dword [esp + 0xc4]",
        "fst dword [esp + 0xcc]",
        "fst dword [esp + 0xd0]",
        "fstp dword [esp + 0xd4]",
        "fst dword [esp + 0xc8]",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x28]",
        "mov edx,dword [esp + 0x50]",
        "fmul dword [esp + 0x90]",
        "mov dword [eax + 0x4],ecx",
        "push ecx",
        "mov dword [eax + 0x8],edx",
        "fmul dword [esp + 0x44]",
        "lea ecx,[esp + 0xbc]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x2c]",
        "fstp dword [esp]",
        "call 0x0078f160",
        "lea eax,[esp + 0xac]",
        "push eax",
        "lea ecx,[esp + 0xd4]",
        "push ecx",
        "mov ecx,ebp",
        "call 0x0078edd0",
        "cmp dword [esp + 0x124],0x0",
        "mov esi,eax",
        "mov ecx,0x9",
        "mov edi,ebp",
        "rep movsd",
        "fld dword [ebp + 0xc]",
        "fld dword [0x00b2b71c]",
        "fld st0",
        "fmulp st2",
        "fld dword [ebp]",
        "fld dword [0x00b2b718]",
        "fld st0",
        "fmulp st2",
        "fxch st3",
        "faddp",
        "fld dword [ebp + 0x18]",
        "fld dword [0x00b2b720]",
        "fld st0",
        "fmulp st2",
        "fxch st2",
        "faddp",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp + 0x10]",
        "fmul st2",
        "fld dword [ebp + 0x4]",
        "fmul st4",
        "faddp",
        "fld dword [ebp + 0x1c]",
        "fmul st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + 0x14]",
        "fmulp st2",
        "fld dword [ebp + 0x8]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul dword [ebp + 0x20]",
        "faddp",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0x1c]",
        "fstp dword [esp + 0x3c]",
        "mov edx,dword [esp + 0x3c]",
        "fld dword [esp + 0x38]",
        "mov dword [ebx],edx",
        "fstp dword [esp + 0x40]",
        "mov eax,dword [esp + 0x40]",
        "fld dword [esp + 0x28]",
        "mov dword [ebx + 0x4],eax",
        "mov eax,dword [esp + 0x10c]",
        "fstp dword [esp + 0x44]",
        "mov ecx,dword [esp + 0x44]",
        "mov dword [ebx + 0x8],ecx"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x8, #272]",
        "mov w20, #0x0",
        "fmov d2, x20",
        "ldr w7, [x4]",
        "ldr w5, [x8, #92]",
        "ldr w6, [x5, #24]",
        "ldr w10, [x8, #88]",
        "str w7, [x6, #12]",
        "ldr w5, [x4, #4]",
        "str w5, [x6, #16]",
        "ldr w4, [x4, #8]",
        "str w4, [x6, #20]",
        "ldr w7, [x10, #80]",
        "str w7, [x8, #-4]!",
        "fcvt s2, d2",
        "str s2, [x8]",
        "mov w20, #0x31",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "lsl w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 214,
      "ExpectedInstructionCount": 432,
      "x86Insts": [
        "fld dword [ecx + 0xc]",
        "fld dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [ecx + 0x18]",
        "fld dword [ecx]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [ecx + 0xc]",
        "fld dword [ecx + -0xc]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [ecx]",
        "fld dword [ecx + -0x18]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [ecx + -0xc]",
        "fld dword [ecx + -0x24]",
        "fld st0",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [ecx + -0x18]",
        "fld dword [ecx]",
        "fld dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [ecx + 0x18]",
        "fadd st0,st1",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fst dword [ecx]",
        "fld dword [ecx + -0xc]",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0xc]",
        "fld st0",
        "fmul st6",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fadd st0,st4",
        "fstp dword [esp + 0x8]",
        "fsubp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x38]",
        "fsubp",
        "fstp dword [esp + 0x40]",
        "fxch",
        "fmul st4",
        "fstp dword [esp + 0x4]",
        "fld dword [ecx + 0x18]",
        "fld st0",
        "fmul st4",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fadd st0,st2",
        "fstp dword [esp + 0x8]",
        "fsubp",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x4c]",
        "fsubp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fmul qword [0x00a77be0]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x48]",
        "fmul qword [0x00a77bd8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x4c]",
        "fmul qword [0x00a77bd0]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x38]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x4c]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x38]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x3c]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x48]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x3c]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x40]",
        "fst dword [esp + 0x8]",
        "fld dword [esp + 0x44]",
        "fadd st1,st0",
        "fxch",
        "fstp dword [esp + 0x40]",
        "fsubr dword [esp + 0x8]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x38]",
        "fmul qword [0x00a77bc8]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x3c]",
        "fmul qword [0x00a77bc0]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x40]",
        "fmul qword [0x00a77bb8]",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x44]",
        "fmul qword [0x00a77bb0]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x48]",
        "fmul qword [0x00a77ba8]",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x4c]",
        "fmul qword [0x00a77ba0]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x38]",
        "fchs",
        "fld st0",
        "fmul st2",
        "fstp dword [esp + 0x58]",
        "fld qword [0x00a77b98]",
        "fmul st1",
        "fxch",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x3c]",
        "fchs",
        "fld st0",
        "fld qword [0x00a77b90]",
        "fmul st1",
        "fxch",
        "fstp dword [esp + 0x54]",
        "fxch",
        "fmul qword [0x00a77b88]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld qword [0x00a77b80]",
        "fmul st1",
        "fstp dword [esp + 0x50]",
        "fmul qword [0x00a77b78]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x48]",
        "fld qword [0x00a77b88]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x4c]",
        "fld st0",
        "fmulp st4",
        "fxch st3",
        "fstp dword [esp + 0x40]",
        "fxch st2",
        "fchs",
        "fmul st3",
        "add eax,0x18",
        "add ecx,0x4",
        "sub edx,0x1",
        "fstp dword [esp + 0x44]",
        "fxch",
        "fchs",
        "fmulp",
        "fstp dword [esp + 0x48]",
        "fld dword [esp + 0x38]",
        "fld st0",
        "fchs",
        "fmul qword [0x00a77b80]",
        "fstp dword [esp + 0x4c]",
        "fmul qword [0x00a77b78]",
        "fstp dword [esp + 0x38]",
        "fld dword [eax + -0x1c]",
        "fadd dword [esp + 0x38]",
        "fstp dword [eax + -0x1c]",
        "fld dword [eax + -0x18]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [eax + -0x18]",
        "fld dword [esp + 0x40]",
        "fadd dword [eax + -0x14]",
        "fstp dword [eax + -0x14]",
        "fld dword [eax + -0x10]",
        "fadd dword [esp + 0x44]",
        "fstp dword [eax + -0x10]",
        "fld dword [eax + -0xc]",
        "fadd dword [esp + 0x48]",
        "fstp dword [eax + -0xc]",
        "fld dword [eax + -0x8]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [eax + -0x8]",
        "fld dword [esp + 0x50]",
        "fadd dword [eax + -0x4]",
        "fstp dword [eax + -0x4]",
        "fld dword [eax]",
        "fadd dword [esp + 0x54]",
        "fstp dword [eax]",
        "fld dword [eax + 0x4]",
        "fadd dword [esp + 0x58]",
        "fstp dword [eax + 0x4]",
        "fld dword [eax + 0x8]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [eax + 0x8]",
        "fld dword [esp + 0x60]",
        "fadd dword [eax + 0xc]",
        "fstp dword [eax + 0xc]",
        "fld dword [eax + 0x10]",
        "fadd dword [esp + 0x64]",
        "fstp dword [eax + 0x10]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x7, #12]",
        "fcvt d2, s2",
        "ldr s3, [x7, #24]",
        "fcvt d3, s3",
        "fadd d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x7, #24]",
        "ldr s3, [x7]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x7, #12]",
        "ldur s2, [x7, #-12]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x7]",
        "ldur s3, [x7, #-24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "stur s2, [x7, #-12]",
        "ldur s2, [x7, #-36]",
        "fcvt d2, s2",
        "fadd d3, d3, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "fcvt d4, s3",
        "stur s3, [x7, #-24]",
        "ldr s3, [x7]",
        "fcvt d3, s3",
        "ldr s5, [x7, #24]",
        "fcvt d5, s5",
        "fadd d5, d5, d3",
        "fcvt s5, d5",
        "str s5, [x7, #24]",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "fcvt d5, s3",
        "str s3, [x7]",
        "ldur s3, [x7, #-12]",
        "fcvt d3, s3",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d6, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "fmul d3, d3, d6",
        "fcvt s3, d3",
        "str s3, [x8, #4]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "ldr s3, [x7, #12]",
        "fcvt d3, s3",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add w23, w20, #0x6 (6)",
        "and w23, w23, #0x7",
        "add x23, x28, x23, lsl #4",
        "ldr d7, [x23, #1056]",
        "add x23, x28, x20, lsl #4",
        "fmul d8, d3, d7",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "ldr s8, [x8, #8]",
        "fcvt d8, s8",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x12, x28, x20, lsl #4",
        "fadd d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #8]",
        "fsub d2, d2, d3",
        "add x13, x28, x22, lsl #4",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "add w22, w22, #0x1 (1)",
        "and w22, w22, #0x7",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "fadd d8, d3, d2",
        "fcvt s8, d8",
        "str s8, [x8, #56]",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #64]",
        "add x22, x28, x22, lsl #4",
        "strb wzr, [x28, #1049]",
        "fmul d2, d5, d6",
        "fcvt s2, d2",
        "str s2, [x8, #4]",
        "ldr s2, [x7, #24]",
        "fcvt d2, s2",
        "fmul d3, d2, d7",
        "fcvt s3, d3",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x8, #8]",
        "fsub d2, d4, d2",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "fadd d4, d3, d2",
        "fcvt s4, d4",
        "str s4, [x8, #76]",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #8]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "mov w14, #0x7be0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "mov w14, #0x7bd8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #76]",
        "fcvt d2, s2",
        "mov w14, #0x7bd0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "fcvt s3, d2",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "fcvt s3, d2",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #72]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "fcvt s3, d2",
        "str s3, [x8, #8]",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "mov w14, #0x7bc8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "mov w14, #0x7bc0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "mov w14, #0x7bb8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #68]",
        "fcvt d2, s2",
        "mov w14, #0x7bb0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "mov w14, #0x7ba8",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #76]",
        "fcvt d2, s2",
        "mov w14, #0x7ba0",
        "movk w14, #0xa7, lsl #16",
        "ldr d3, [x14]",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "add x14, x28, x20, lsl #4",
        "ldr d3, [x14, #1056]",
        "fmul d4, d2, d3",
        "fcvt s4, d4",
        "str s4, [x8, #88]",
        "mov w14, #0x7b98",
        "movk w14, #0xa7, lsl #16",
        "ldr d4, [x14]",
        "fmul d4, d4, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #92]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "mov w14, #0x7b90",
        "movk w14, #0xa7, lsl #16",
        "ldr d5, [x14]",
        "fmul d5, d5, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d2",
        "str s6, [x8, #84]",
        "strb wzr, [x28, #1049]",
        "mov w14, #0x7b88",
        "movk w14, #0xa7, lsl #16",
        "ldr d6, [x14]",
        "fmul d2, d2, d6",
        "fcvt s2, d2",
        "str s2, [x8, #96]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "fneg v2.2d, v2.2d",
        "mov w15, #0x7b80",
        "movk w15, #0xa7, lsl #16",
        "ldr d6, [x15]",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #80]",
        "mov w16, #0x7b78",
        "movk w16, #0xa7, lsl #16",
        "ldr d6, [x16]",
        "fmul d2, d2, d6",
        "fcvt s2, d2",
        "str s2, [x8, #100]",
        "ldr s2, [x8, #68]",
        "fcvt d2, s2",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldr s2, [x8, #72]",
        "fcvt d2, s2",
        "ldr d6, [x14]",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8, #60]",
        "ldr s6, [x8, #76]",
        "fcvt d6, s6",
        "fmul d4, d4, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s7, d4",
        "str s7, [x8, #64]",
        "strb wzr, [x28, #1049]",
        "fneg v7.2d, v6.2d",
        "fmul d3, d7, d3",
        "add w4, w4, #0x18 (24)",
        "add w7, w7, #0x4 (4)",
        "subs w26, w5, #0x1 (1)",
        "mov x27, x5",
        "mov x5, x26",
        "fcvt s7, d3",
        "str s7, [x8, #68]",
        "strb wzr, [x28, #1049]",
        "fneg v2.2d, v2.2d",
        "fmul d2, d5, d2",
        "fcvt s2, d2",
        "str s2, [x8, #72]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "fneg v5.2d, v2.2d",
        "ldr d7, [x15]",
        "fmul d5, d5, d7",
        "fcvt s7, d5",
        "str s7, [x8, #76]",
        "ldr d7, [x16]",
        "fmul d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x8, #56]",
        "ldur s2, [x4, #-28]",
        "fcvt d2, s2",
        "ldr s7, [x8, #56]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-28]",
        "ldur s2, [x4, #-24]",
        "fcvt d2, s2",
        "ldr s7, [x8, #60]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-24]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "ldur s7, [x4, #-20]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-20]",
        "ldur s2, [x4, #-16]",
        "fcvt d2, s2",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-16]",
        "ldur s2, [x4, #-12]",
        "fcvt d2, s2",
        "ldr s7, [x8, #72]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-12]",
        "ldur s2, [x4, #-8]",
        "fcvt d2, s2",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-8]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "ldur s7, [x4, #-4]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "stur s2, [x4, #-4]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr s7, [x8, #84]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr s2, [x4, #4]",
        "fcvt d2, s2",
        "ldr s7, [x8, #88]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x4, #4]",
        "ldr s2, [x4, #8]",
        "fcvt d2, s2",
        "ldr s7, [x8, #92]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x4, #8]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s2, d2",
        "str s2, [x4, #12]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr s7, [x8, #100]",
        "fcvt d7, s7",
        "fadd d2, d2, d7",
        "fcvt s7, d2",
        "str s7, [x4, #16]",
        "strb w20, [x28, #1051]",
        "str d6, [x23, #1056]",
        "str d4, [x21, #1056]",
        "str d3, [x13, #1056]",
        "str d5, [x22, #1056]",
        "str d2, [x12, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf8f8",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 229,
      "ExpectedInstructionCount": 466,
      "x86Insts": [
        "movzx eax,word [esi + edx*0x8]",
        "fld dword [esi + edx*0x8 + 0x4]",
        "fstp dword [esp + 0x24]",
        "mov esi,dword [esp + 0x1c8]",
        "fld dword [esp + 0x9c]",
        "movzx eax,ax",
        "mov ecx,eax",
        "imul ecx,dword [esp + 0x1e4]",
        "lea eax,[eax + eax*0x2]",
        "add eax,eax",
        "add eax,eax",
        "lea edi,[eax + esi*0x1 + 0x8]",
        "mov dword [esp + 0x10],edi",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0x98]",
        "fmul dword [eax + esi*0x1]",
        "faddp",
        "fld dword [esp + 0xa0]",
        "fmul dword [edi]",
        "faddp",
        "fadd dword [esp + 0x88]",
        "fstp dword [esp + 0xd0]",
        "fld dword [esp + 0xa8]",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0xa4]",
        "fmul dword [eax + esi*0x1]",
        "faddp",
        "fld dword [esp + 0xac]",
        "fmul dword [edi]",
        "faddp",
        "fadd dword [esp + 0x8c]",
        "fstp dword [esp + 0xd4]",
        "fld dword [esp + 0xb4]",
        "fmul dword [eax + esi*0x1 + 0x4]",
        "fld dword [esp + 0xb0]",
        "fmul dword [eax + esi*0x1]",
        "mov esi,edi",
        "faddp",
        "fld dword [esp + 0xb8]",
        "fmul dword [esi]",
        "mov esi,dword [esp + 0x38]",
        "lea edi,[esi + eax*0x1 + 0x8]",
        "mov dword [esp + 0x10],edi",
        "faddp",
        "fadd dword [esp + 0x90]",
        "fstp dword [esp + 0xd8]",
        "fld dword [esp + 0x64]",
        "fld st0",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x68]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp st2,st0",
        "fld dword [esp + 0x6c]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [esp + 0xe8]",
        "fld dword [esp + 0x70]",
        "fld st0",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x74]",
        "fld st0",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp st2,st0",
        "fld dword [esp + 0x78]",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0xec]",
        "fld dword [esp + 0x7c]",
        "fmul dword [eax + ebx*0x1]",
        "fld dword [esp + 0x80]",
        "fmul dword [eax + ebx*0x1 + 0x4]",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [eax + ebx*0x1 + 0x8]",
        "faddp",
        "fstp dword [esp + 0xf0]",
        "fld st2",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld st5",
        "fmul dword [esi + eax*0x1]",
        "faddp",
        "fld st4",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld st2",
        "fmul dword [esi + eax*0x1]",
        "faddp",
        "fld dword [esp + 0x78]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x80]",
        "fmul dword [esi + eax*0x1 + 0x4]",
        "fld dword [esp + 0x7c]",
        "fmul dword [esi + eax*0x1]",
        "mov esi,edi",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [esi]",
        "mov esi,dword [esp + 0x20]",
        "lea edi,[esi + eax*0x1 + 0x4]",
        "mov dword [esp + 0x10],edi",
        "faddp",
        "lea edi,[esi + eax*0x1 + 0x8]",
        "mov dword [esp + 0xbc],edi",
        "mov edi,dword [esp + 0x10]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + eax*0x1]",
        "fmulp st5",
        "fld dword [edi]",
        "mov edi,dword [esp + 0xbc]",
        "fmulp st3",
        "fxch st4",
        "faddp st2,st0",
        "fld dword [edi]",
        "mov edi,dword [esp + 0x10]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x4c]",
        "fmul dword [esi + eax*0x1]",
        "fld dword [edi]",
        "mov edi,dword [esp + 0xbc]",
        "fmulp st2",
        "faddp",
        "fld dword [esp + 0x78]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x7c]",
        "fmul dword [esi + eax*0x1]",
        "mov eax,dword [esp + 0x10]",
        "fld dword [esp + 0x80]",
        "fmul dword [eax]",
        "mov eax,dword [esp + 0x1d4]",
        "faddp",
        "fld dword [esp + 0x84]",
        "fmul dword [edi]",
        "faddp",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0xd0]",
        "fld dword [esp + 0x24]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0xc0]",
        "fld dword [esp + 0xd4]",
        "fmul st1",
        "fstp dword [esp + 0xc4]",
        "fld dword [esp + 0xd8]",
        "fmul st1",
        "fstp dword [esp + 0xc8]",
        "fld dword [esp + 0xc0]",
        "fadd dword [ecx + eax*0x1]",
        "fstp dword [ecx + eax*0x1]",
        "add edx,0x1",
        "cmp edx,dword [esp + 0x1c]",
        "fld dword [esp + 0xc4]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea eax,[ecx + eax*0x1 + 0x8]",
        "fld dword [eax]",
        "fadd dword [esp + 0xc8]",
        "fstp dword [eax]",
        "mov eax,dword [esp + 0x1dc]",
        "fld dword [esp + 0xe8]",
        "fmul st1",
        "fstp dword [esp + 0xdc]",
        "fld dword [esp + 0xec]",
        "fmul st1",
        "fstp dword [esp + 0xe0]",
        "fld dword [esp + 0xf0]",
        "fmul st1",
        "fstp dword [esp + 0xe4]",
        "fld dword [esp + 0xdc]",
        "fadd dword [ecx + ebp*0x1]",
        "fstp dword [ecx + ebp*0x1]",
        "fld dword [esp + 0xe0]",
        "fadd dword [ecx + ebp*0x1 + 0x4]",
        "fstp dword [ecx + ebp*0x1 + 0x4]",
        "fld dword [esp + 0xe4]",
        "fadd dword [ecx + ebp*0x1 + 0x8]",
        "fstp dword [ecx + ebp*0x1 + 0x8]",
        "fld dword [esp + 0x58]",
        "fmul st1",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x5c]",
        "fmul st1",
        "fstp dword [esp + 0x40]",
        "fld dword [esp + 0x60]",
        "fmul st1",
        "fstp dword [esp + 0x44]",
        "fld dword [ecx + eax*0x1]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [ecx + eax*0x1]",
        "fld dword [esp + 0x40]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea eax,[ecx + eax*0x1 + 0x8]",
        "fld dword [esp + 0x44]",
        "fadd dword [eax]",
        "fstp dword [eax]",
        "mov eax,dword [esp + 0x1e0]",
        "fld dword [esp + 0x4c]",
        "fmul st1",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x50]",
        "fmul st1",
        "fstp dword [esp + 0x30]",
        "fmul dword [esp + 0x54]",
        "fstp dword [esp + 0x34]",
        "fld dword [ecx + eax*0x1]",
        "fadd dword [esp + 0x2c]",
        "fstp dword [ecx + eax*0x1]",
        "fld dword [esp + 0x30]",
        "fadd dword [ecx + eax*0x1 + 0x4]",
        "fstp dword [ecx + eax*0x1 + 0x4]",
        "lea ecx,[ecx + eax*0x1 + 0x8]",
        "fld dword [esp + 0x34]",
        "fadd dword [ecx]",
        "fstp dword [ecx]"
      ],
      "ExpectedArm64ASM": [
        "add w20, w10, w5, lsl #3",
        "ldrh w4, [x20]",
        "add w20, w10, w5, lsl #3",
        "ldr s2, [x20, #4]",
        "str s2, [x8, #36]",
        "ldr w10, [x8, #456]",
        "ldr s2, [x8, #156]",
        "fcvt d2, s2",
        "uxth w4, w4",
        "mov x7, x4",
        "ldr w20, [x8, #484]",
        "mul w7, w7, w20",
        "add w4, w4, w4, lsl #1",
        "add w4, w4, w4",
        "add w4, w4, w4",
        "add w20, w4, #0x8 (8)",
        "add w11, w20, w10",
        "str w11, [x8, #16]",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #152]",
        "fcvt d3, s3",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #160]",
        "fcvt d3, s3",
        "ldr s4, [x11]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #136]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #208]",
        "ldr s2, [x8, #168]",
        "fcvt d2, s2",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #164]",
        "fcvt d3, s3",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #172]",
        "fcvt d3, s3",
        "ldr s4, [x11]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #140]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #212]",
        "ldr s2, [x8, #180]",
        "fcvt d2, s2",
        "add w20, w4, w10",
        "ldr s3, [x20, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #176]",
        "fcvt d3, s3",
        "add w20, w4, w10",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "mov x10, x11",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #184]",
        "fcvt d3, s3",
        "ldr s4, [x10]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr w10, [x8, #56]",
        "add w20, w10, #0x8 (8)",
        "add w11, w20, w4",
        "str w11, [x8, #16]",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #144]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #216]",
        "ldr s2, [x8, #100]",
        "fcvt d2, s2",
        "add w20, w4, w6",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d3, d2, d3",
        "ldr s4, [x8, #104]",
        "fcvt d4, s4",
        "add w20, w4, w6",
        "ldr s5, [x20, #4]",
        "fcvt d5, s5",
        "fmul d5, d4, d5",
        "fadd d3, d3, d5",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "add w20, w4, w6",
        "ldr s6, [x20, #8]",
        "fcvt d6, s6",
        "fmul d6, d5, d6",
        "fadd d3, d3, d6",
        "strb wzr, [x28, #1049]",
        "fcvt s3, d3",
        "str s3, [x8, #232]",
        "ldr s3, [x8, #112]",
        "fcvt d3, s3",
        "add w20, w4, w6",
        "ldr s6, [x20]",
        "fcvt d6, s6",
        "fmul d6, d3, d6",
        "ldr s7, [x8, #116]",
        "fcvt d7, s7",
        "add w20, w4, w6",
        "ldr s8, [x20, #4]",
        "fcvt d8, s8",
        "fmul d8, d7, d8",
        "fadd d6, d6, d8",
        "ldr s8, [x8, #120]",
        "fcvt d8, s8",
        "add w20, w4, w6",
        "ldr s9, [x20, #8]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d6, d6, d8",
        "strb wzr, [x28, #1049]",
        "fcvt s6, d6",
        "str s6, [x8, #236]",
        "ldr s6, [x8, #124]",
        "fcvt d6, s6",
        "add w20, w4, w6",
        "ldr s8, [x20]",
        "fcvt d8, s8",
        "fmul d6, d6, d8",
        "ldr s8, [x8, #128]",
        "fcvt d8, s8",
        "add w20, w4, w6",
        "ldr s9, [x20, #4]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d6, d6, d8",
        "ldr s8, [x8, #132]",
        "fcvt d8, s8",
        "add w20, w4, w6",
        "ldr s9, [x20, #8]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d6, d6, d8",
        "fcvt s6, d6",
        "str s6, [x8, #240]",
        "add w20, w10, w4",
        "ldr s6, [x20, #4]",
        "fcvt d6, s6",
        "fmul d6, d4, d6",
        "add w20, w10, w4",
        "ldr s8, [x20]",
        "fcvt d8, s8",
        "fmul d8, d2, d8",
        "fadd d6, d6, d8",
        "ldr s8, [x11]",
        "fcvt d8, s8",
        "fmul d8, d5, d8",
        "fadd d6, d6, d8",
        "fcvt s6, d6",
        "str s6, [x8, #88]",
        "add w20, w10, w4",
        "ldr s6, [x20, #4]",
        "fcvt d6, s6",
        "fmul d6, d7, d6",
        "add w20, w10, w4",
        "ldr s8, [x20]",
        "fcvt d8, s8",
        "fmul d8, d3, d8",
        "fadd d6, d6, d8",
        "ldr s8, [x8, #120]",
        "fcvt d8, s8",
        "ldr s9, [x11]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d6, d6, d8",
        "fcvt s6, d6",
        "str s6, [x8, #92]",
        "ldr s6, [x8, #128]",
        "fcvt d6, s6",
        "add w20, w10, w4",
        "ldr s8, [x20, #4]",
        "fcvt d8, s8",
        "fmul d6, d6, d8",
        "ldr s8, [x8, #124]",
        "fcvt d8, s8",
        "add w20, w10, w4",
        "ldr s9, [x20]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "mov x10, x11",
        "fadd d6, d6, d8",
        "ldr s8, [x8, #132]",
        "fcvt d8, s8",
        "ldr s9, [x10]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "ldr w10, [x8, #32]",
        "add w20, w10, #0x4 (4)",
        "add w11, w20, w4",
        "str w11, [x8, #16]",
        "fadd d6, d6, d8",
        "add w20, w10, #0x8 (8)",
        "add w11, w20, w4",
        "str w11, [x8, #188]",
        "ldr w11, [x8, #16]",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "add w20, w10, w4",
        "ldr s6, [x20]",
        "fcvt d6, s6",
        "fmul d2, d2, d6",
        "ldr s6, [x11]",
        "fcvt d6, s6",
        "ldr w11, [x8, #188]",
        "fmul d4, d4, d6",
        "strb wzr, [x28, #1049]",
        "fadd d2, d4, d2",
        "ldr s4, [x11]",
        "fcvt d4, s4",
        "ldr w11, [x8, #16]",
        "fmul d4, d5, d4",
        "strb wzr, [x28, #1049]",
        "fadd d2, d4, d2",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #76]",
        "add w20, w10, w4",
        "ldr s2, [x20]",
        "fcvt d2, s2",
        "fmul d2, d3, d2",
        "ldr s3, [x11]",
        "fcvt d3, s3",
        "ldr w11, [x8, #188]",
        "fmul d3, d7, d3",
        "fadd d2, d3, d2",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "ldr s4, [x11]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #80]",
        "ldr s2, [x8, #124]",
        "fcvt d2, s2",
        "add w20, w10, w4",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x8, #16]",
        "ldr s3, [x8, #128]",
        "fcvt d3, s3",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr w4, [x8, #468]",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #132]",
        "fcvt d3, s3",
        "ldr s4, [x11]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #84]",
        "ldr s2, [x8, #208]",
        "fcvt d2, s2",
        "ldr s3, [x8, #36]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #192]",
        "ldr s2, [x8, #212]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #196]",
        "ldr s2, [x8, #216]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #200]",
        "ldr s2, [x8, #192]",
        "fcvt d2, s2",
        "add w20, w7, w4",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20]",
        "add w5, w5, #0x1 (1)",
        "ldr w20, [x8, #28]",
        "eor x27, x5, x20",
        "subs w26, w5, w20",
        "ldr s2, [x8, #196]",
        "fcvt d2, s2",
        "add w20, w7, w4",
        "ldr s4, [x20, #4]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w4, w20, w4",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr s4, [x8, #200]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x8, #476]",
        "ldr s2, [x8, #232]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #220]",
        "ldr s2, [x8, #236]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #224]",
        "ldr s2, [x8, #240]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #228]",
        "ldr s2, [x8, #220]",
        "fcvt d2, s2",
        "add w20, w7, w9",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w9",
        "fcvt s2, d2",
        "str s2, [x20]",
        "ldr s2, [x8, #224]",
        "fcvt d2, s2",
        "add w20, w7, w9",
        "ldr s4, [x20, #4]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w9",
        "fcvt s2, d2",
        "str s2, [x20, #4]",
        "ldr s2, [x8, #228]",
        "fcvt d2, s2",
        "add w20, w7, w9",
        "ldr s4, [x20, #8]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w9",
        "fcvt s2, d2",
        "str s2, [x20, #8]",
        "ldr s2, [x8, #88]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #60]",
        "ldr s2, [x8, #92]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #64]",
        "ldr s2, [x8, #96]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #68]",
        "add w20, w7, w4",
        "ldr s2, [x20]",
        "fcvt d2, s2",
        "ldr s4, [x8, #60]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20]",
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "add w20, w7, w4",
        "ldr s4, [x20, #4]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w4, w20, w4",
        "ldr s2, [x8, #68]",
        "fcvt d2, s2",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fadd d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x8, #480]",
        "ldr s2, [x8, #76]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #80]",
        "fcvt d2, s2",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #84]",
        "fcvt d2, s2",
        "fmul d2, d3, d2",
        "fcvt s2, d2",
        "str s2, [x8, #52]",
        "add w20, w7, w4",
        "ldr s2, [x20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #44]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20]",
        "ldr s2, [x8, #48]",
        "fcvt d2, s2",
        "add w20, w7, w4",
        "ldr s3, [x20, #4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "add w20, w7, w4",
        "fcvt s2, d2",
        "str s2, [x20, #4]",
        "add w20, w7, #0x8 (8)",
        "add w7, w20, w4",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "ldr s3, [x7]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x7]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfefe",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 260,
      "ExpectedInstructionCount": 29,
      "x86Insts": [
        "fld dword [edi]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "lea ecx,[esp + 0x10]",
        "fld1",
        "push ecx",
        "fdivrp",
        "lea edx,[esp + 0x50]",
        "push edx",
        "lea ecx,[esp + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [edi]",
        "fchs",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x40]",
        "fmul st1",
        "fstp dword [esp + 0x70]",
        "fmul dword [esp + 0x44]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x30]",
        "fld dword [esp + 0x10]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fstp dword [esp + 0x64]",
        "fmul dword [esp + 0x38]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x6c]",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x64]",
        "fadd dword [esp + 0x70]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x68]",
        "fadd dword [esp + 0x74]",
        "fstp dword [esp + 0x5c]",
        "call 0x00716e00",
        "mov ecx,dword [eax]",
        "mov dword [esi + 0x20],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [esi + 0x24],edx",
        "mov ecx,dword [eax + 0x8]",
        "mov dword [esi + 0x28],ecx",
        "mov edx,dword [eax + 0xc]",
        "mov dword [esi + 0x2c],edx",
        "fld dword [edi + 0x4]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [edi + 0x4]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fchs",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x34]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x58]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fstp dword [esp + 0x5c]",
        "fmul dword [esp + 0x3c]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x64]",
        "fadd dword [esp + 0x58]",
        "fstp dword [esp + 0x74]",
        "mov eax,dword [esp + 0x74]",
        "fld dword [esp + 0x68]",
        "mov dword [esp + 0x4c],eax",
        "fadd dword [esp + 0x5c]",
        "lea eax,[esp + 0x10]",
        "push eax",
        "fstp dword [esp + 0x7c]",
        "mov ecx,dword [esp + 0x7c]",
        "fld dword [esp + 0x70]",
        "mov dword [esp + 0x54],ecx",
        "fadd dword [esp + 0x64]",
        "lea ecx,[esp + 0x50]",
        "push ecx",
        "lea ecx,[esp + 0x7c]",
        "fstp dword [esp + 0x84]",
        "mov edx,dword [esp + 0x84]",
        "mov dword [esp + 0x5c],edx",
        "call 0x00716e00",
        "mov edx,dword [eax]",
        "mov dword [esi + 0x30],edx",
        "mov ecx,dword [eax + 0x4]",
        "mov dword [esi + 0x34],ecx",
        "mov edx,dword [eax + 0x8]",
        "mov dword [esi + 0x38],edx",
        "mov eax,dword [eax + 0xc]",
        "mov dword [esi + 0x3c],eax",
        "fld dword [edi + 0x8]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmul dword [edi + 0x8]",
        "fstp dword [esp + 0x8]",
        "fchs",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x40]",
        "fld dword [esp + 0xc]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x78]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x58]",
        "mov ecx,dword [esp + 0x58]",
        "fld dword [esp + 0x78]",
        "mov dword [esp + 0x4c],ecx",
        "fadd dword [esp + 0x68]",
        "lea ecx,[esp + 0x10]",
        "push ecx",
        "lea ecx,[esp + 0x78]",
        "fstp dword [esp + 0x60]",
        "mov edx,dword [esp + 0x60]",
        "fld dword [esp + 0x80]",
        "mov dword [esp + 0x54],edx",
        "fadd dword [esp + 0x70]",
        "lea edx,[esp + 0x50]",
        "push edx",
        "fstp dword [esp + 0x68]",
        "mov eax,dword [esp + 0x68]",
        "mov dword [esp + 0x5c],eax",
        "call 0x00716e00",
        "mov ecx,dword [eax]",
        "mov dword [esi + 0x40],ecx",
        "mov edx,dword [eax + 0x4]",
        "mov dword [esi + 0x44],edx",
        "mov ecx,dword [eax + 0x8]",
        "mov dword [esi + 0x48],ecx",
        "mov edx,dword [eax + 0xc]",
        "mov dword [esi + 0x4c],edx",
        "fld dword [edi + 0xc]",
        "fmul st0",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fadd qword [0x00a2f928]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "call 0x00982c30",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0xc]",
        "fld1",
        "fdivrp",
        "fstp dword [esp + 0x8]",
        "fld dword [edi + 0xc]",
        "fchs",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x40]",
        "fmul st1",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x44]",
        "fmul st1",
        "fstp dword [esp + 0x68]",
        "fmul dword [esp + 0x48]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x28]",
        "fld dword [esp + 0x8]",
        "fld st0",
        "fmulp st2",
        "fxch",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fstp dword [esp + 0x78]",
        "fmul dword [esp + 0x30]",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x74]",
        "fadd dword [esp + 0x64]",
        "fstp dword [esp + 0x58]",
        "mov eax,dword [esp + 0x58]",
        "fld dword [esp + 0x78]",
        "mov dword [esp + 0x4c],eax",
        "fadd dword [esp + 0x68]",
        "lea eax,[esp + 0x10]",
        "fstp dword [esp + 0x5c]",
        "mov ecx,dword [esp + 0x5c]",
        "fld dword [esp + 0x7c]",
        "mov dword [esp + 0x50],ecx",
        "fadd dword [esp + 0x6c]",
        "lea ecx,[esp + 0x4c]",
        "fstp dword [esp + 0x60]",
        "mov edx,dword [esp + 0x60]",
        "mov dword [esp + 0x54],edx"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x11]",
        "fcvt d2, s2",
        "fmul d2, d2, d2",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "mov w20, #0xf928",
        "movk w20, #0xa2, lsl #16",
        "ldr d3, [x20]",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "mov w20, #0x1f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "ldrb w21, [x28, #1051]",
        "add w21, w21, #0x7 (7)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 206,
      "ExpectedInstructionCount": 117,
      "x86Insts": [
        "fld dword [0x00b42a74]",
        "push ecx",
        "fstp dword [0x00b42a20]",
        "lea ecx,[esp + 0x48]",
        "fld dword [0x00b42a78]",
        "fstp dword [0x00b42a24]",
        "fld dword [0x00b42a7c]",
        "fstp dword [0x00b42a28]",
        "fld dword [0x00b42a68]",
        "fstp dword [0x00b42a2c]",
        "fld dword [0x00b42a6c]",
        "fstp dword [0x00b42a30]",
        "fld dword [0x00b42a70]",
        "fstp dword [0x00b42a34]",
        "fld dword [0x00b42a5c]",
        "fstp dword [0x00b42a38]",
        "fld dword [0x00b42a60]",
        "fstp dword [0x00b42a3c]",
        "fld dword [0x00b42a64]",
        "fstp dword [0x00b42a40]",
        "fld dword [0x00b42a50]",
        "fstp dword [0x00b42a44]",
        "fld dword [0x00b42a54]",
        "fstp dword [0x00b42a48]",
        "fld dword [0x00b42a58]",
        "fstp dword [0x00b42a4c]",
        "fst dword [esp + 0x48]",
        "fst dword [esp + 0x58]",
        "fstp dword [esp + 0x68]",
        "fst dword [esp + 0x4c]",
        "fst dword [esp + 0x50]",
        "fst dword [esp + 0x54]",
        "fst dword [esp + 0x5c]",
        "fst dword [esp + 0x60]",
        "fstp dword [esp + 0x64]",
        "fld dword [esp + 0x4]",
        "fstp dword [esp]",
        "call 0x00793aa0",
        "fld dword [esp + 0x50]",
        "fld dword [0x00b42a78]",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a74]",
        "fst qword [esp + 0x30]",
        "fld dword [esp + 0x44]",
        "fld dword [esp + 0x5c]",
        "fld dword [0x00b42a7c]",
        "fst qword [esp + 0x10]",
        "fld st2",
        "fmul st4",
        "fld st6",
        "fmul st6",
        "faddp",
        "fld st2",
        "fmulp st2",
        "faddp",
        "fstp dword [esp + 0x38]",
        "fld dword [esp + 0x54]",
        "fld dword [esp + 0x48]",
        "fld dword [esp + 0x60]",
        "fstp qword [esp]",
        "fld st0",
        "fmulp st5",
        "fld st1",
        "fmulp st6",
        "fxch st4",
        "faddp st5,st0",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp st5,st0",
        "fxch st4",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x58]",
        "fst qword [esp + 0x20]",
        "fld dword [esp + 0x4c]",
        "fst qword [esp + 0x18]",
        "fld dword [esp + 0x64]",
        "fstp qword [esp + 0x8]",
        "fmul qword [esp + 0x30]",
        "fxch",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp + 0x8]",
        "mov eax,dword [esp + 0x38]",
        "fmul qword [esp + 0x10]",
        "mov ecx,dword [esp + 0x3c]",
        "mov [0x00b2ba7c],eax",
        "mov dword [0x00b2ba80],ecx",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a6c]",
        "mov dword [0x00b2ba84],edx",
        "fst qword [esp + 0x30]",
        "fld dword [0x00b42a68]",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a70]",
        "fstp qword [esp + 0x10]",
        "fmul st3",
        "fld st6",
        "fmulp st2",
        "faddp",
        "fld st1",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "fld st2",
        "mov [0x00b2ba88],eax",
        "fmul qword [esp + 0x28]",
        "fld st4",
        "fmul qword [esp + 0x30]",
        "faddp",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "mov dword [0x00b2ba8c],ecx",
        "fmul qword [esp + 0x28]",
        "fld qword [esp + 0x20]",
        "fmul qword [esp + 0x30]",
        "faddp",
        "fld qword [esp + 0x8]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a60]",
        "mov dword [0x00b2ba90],edx",
        "fst qword [esp + 0x28]",
        "fld dword [0x00b42a5c]",
        "fst qword [esp + 0x30]",
        "fld dword [0x00b42a64]",
        "fstp qword [esp + 0x10]",
        "fmul st3",
        "fld st6",
        "fmulp st2",
        "faddp",
        "fld st1",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "fld st2",
        "fmul qword [esp + 0x30]",
        "fld st4",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "fmul qword [esp + 0x30]",
        "fld qword [esp + 0x20]",
        "fmul qword [esp + 0x28]",
        "faddp",
        "fld qword [esp + 0x8]",
        "fmul qword [esp + 0x10]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "fld dword [0x00b42a54]",
        "mov ecx,dword [esp + 0x3c]",
        "fld dword [0x00b42a50]",
        "mov edx,dword [esp + 0x40]",
        "fld dword [0x00b42a58]",
        "mov [0x00b2ba94],eax",
        "fxch st4",
        "mov dword [0x00b2ba98],ecx",
        "fmul st1",
        "mov dword [0x00b2ba9c],edx",
        "fxch st7",
        "fmul st2",
        "faddp st7,st0",
        "fxch st2",
        "fmul st3",
        "faddp st6,st0",
        "fxch st5",
        "fstp dword [esp + 0x38]",
        "mov eax,dword [esp + 0x38]",
        "mov [0x00b2baa0],eax",
        "fmul st2",
        "fxch st3",
        "fmul st4",
        "faddp st2,st0",
        "fld qword [esp]",
        "fmul st1",
        "faddp st2,st0",
        "fxch",
        "fstp dword [esp + 0x3c]",
        "mov ecx,dword [esp + 0x3c]",
        "fld qword [esp + 0x18]",
        "mov dword [0x00b2baa4],ecx",
        "fmulp st2",
        "fld qword [esp + 0x20]",
        "fmulp st3",
        "fxch",
        "faddp st2,st0",
        "fmul qword [esp + 0x8]",
        "faddp",
        "fstp dword [esp + 0x40]",
        "mov edx,dword [esp + 0x40]",
        "mov dword [0x00b2baa8],edx",
        "mov esp,ebp",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x2a74",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "str w7, [x8, #-4]!",
        "mov w20, #0x2a20",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "add w7, w8, #0x48 (72)",
        "mov w20, #0x2a78",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a24",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a7c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a28",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a68",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a2c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a6c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a30",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a70",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a34",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a5c",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a38",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a60",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a3c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a64",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a40",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a50",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a44",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a54",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a48",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "mov w20, #0x2a58",
        "movk w20, #0xb4, lsl #16",
        "ldr s2, [x20]",
        "mov w20, #0x2a4c",
        "movk w20, #0xb4, lsl #16",
        "str s2, [x20]",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvt s3, d2",
        "str s3, [x8, #72]",
        "fcvt s3, d2",
        "str s3, [x8, #88]",
        "fcvt s2, d2",
        "str s2, [x8, #104]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvt s3, d2",
        "str s3, [x8, #76]",
        "fcvt s3, d2",
        "str s3, [x8, #80]",
        "fcvt s3, d2",
        "str s3, [x8, #84]",
        "fcvt s3, d2",
        "str s3, [x8, #92]",
        "fcvt s3, d2",
        "str s3, [x8, #96]",
        "fcvt s2, d2",
        "str s2, [x8, #100]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "fcvt s3, d2",
        "str s3, [x8]",
        "mov w22, #0xc5",
        "movk w22, #0x1, lsl #16",
        "str w22, [x8, #-4]!",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w23, #0x8",
        "sub w20, w23, w20",
        "mov w23, #0xe0e0",
        "lsr w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87_f64-Psychonauts.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 32,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "Block1": {
      "x86InstructionCount": 520,
      "ExpectedInstructionCount": 938,
      "x86Insts": [
        "sub esp,0x88",
        "fld dword [ecx + 0x4]",
        "mov edx,dword [ecx + 0x18]",
        "fld dword [ecx + 0x10]",
        "mov dword [esp + 0x14],edx",
        "fld dword [ecx + 0x14]",
        "mov edx,dword [ecx + 0x1c]",
        "fld dword [ecx + 0x20]",
        "mov dword [esp + 0x10],edx",
        "fld dword [ecx + 0x24]",
        "fld dword [eax]",
        "fsub dword [eax + 0x44]",
        "fld dword [eax + 0x40]",
        "fadd dword [eax + 0x4]",
        "fld dword [eax + 0x20]",
        "fsub dword [eax + 0x64]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x24]",
        "fadd dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fmul st7",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fmul st7",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd st0,st2",
        "fstp dword [esp + 0x80]",
        "fld dword [esp]",
        "fadd st0,st1",
        "fstp dword [esp + 0x78]",
        "fxch",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x60]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x40]",
        "fld dword [eax + 0x44]",
        "fadd dword [eax]",
        "fld dword [eax + 0x4]",
        "fsub dword [eax + 0x40]",
        "fld dword [eax + 0x64]",
        "fadd dword [eax + 0x20]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x24]",
        "fsub dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fmul st7",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fmul st7",
        "fstp dword [esp]",
        "fld st1",
        "fsub dword [esp]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x4]",
        "fadd st0,st1",
        "fstp dword [esp + 0x34]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [esp + 0x5c]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x64]",
        "fstp st0",
        "fld dword [eax + 0x8]",
        "fsub dword [eax + 0x4c]",
        "fld dword [eax + 0xc]",
        "fadd dword [eax + 0x48]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st5",
        "fld dword [esp]",
        "fmul st5",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul st3",
        "fld dword [esp]",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x28]",
        "fsub dword [eax + 0x6c]",
        "fld dword [eax + 0x2c]",
        "fadd dword [eax + 0x68]",
        "fstp dword [esp]",
        "fld dword [esp + 0x10]",
        "fmul st1",
        "fld dword [esp]",
        "fmul dword [esp + 0x14]",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x58]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x50]",
        "fld dword [eax + 0x4c]",
        "fadd dword [eax + 0x8]",
        "fld dword [eax + 0xc]",
        "fsub dword [eax + 0x48]",
        "fstp dword [esp]",
        "fld dword [esp + 0x14]",
        "fmul st1",
        "fld dword [esp]",
        "fmul dword [esp + 0x10]",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul dword [esp + 0x10]",
        "fld dword [esp]",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x6c]",
        "fadd dword [eax + 0x28]",
        "fld dword [eax + 0x2c]",
        "fsub dword [eax + 0x68]",
        "fst dword [esp]",
        "fmul st4",
        "fld st1",
        "fmul st6",
        "faddp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul st5",
        "fxch",
        "fmul st4",
        "fsubp",
        "fstp dword [esp]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x74]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x84]",
        "fld dword [eax + 0x10]",
        "fsub dword [eax + 0x54]",
        "fld dword [eax + 0x14]",
        "fadd dword [eax + 0x50]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st3",
        "fld dword [esp]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fmul st1",
        "fld dword [esp]",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x30]",
        "fsub dword [eax + 0x74]",
        "fld dword [eax + 0x34]",
        "fadd dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st2",
        "fld dword [esp]",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fld dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x20]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x28]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x38]",
        "fld dword [eax + 0x54]",
        "fadd dword [eax + 0x10]",
        "fld dword [eax + 0x14]",
        "fsub dword [eax + 0x50]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st2",
        "fld dword [esp]",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fld dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x74]",
        "fadd dword [eax + 0x30]",
        "fld dword [eax + 0x34]",
        "fsub dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld st0",
        "fmul st3",
        "fld dword [esp]",
        "fmul st3",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul st1",
        "fld dword [esp]",
        "fmul st3",
        "faddp",
        "fstp dword [esp]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x3c]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x54]",
        "fld dword [eax + 0x18]",
        "fsub dword [eax + 0x5c]",
        "fld dword [eax + 0x1c]",
        "fadd dword [eax + 0x58]",
        "fld st1",
        "fmul dword [esp + 0x14]",
        "fld st1",
        "fmul dword [esp + 0x10]",
        "fsubp",
        "fstp dword [esp + 0xc]",
        "fxch",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fld dword [eax + 0x38]",
        "fsub dword [eax + 0x7c]",
        "fld dword [eax + 0x78]",
        "fadd dword [eax + 0x3c]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st6",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul st3",
        "fxch",
        "fmul st4",
        "faddp",
        "fstp dword [esp]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp]",
        "fadd st0,st1",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x48]",
        "fld dword [eax + 0x5c]",
        "fadd dword [eax + 0x18]",
        "fld dword [eax + 0x1c]",
        "fsub dword [eax + 0x58]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "faddp",
        "fstp dword [esp + 0xc]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "fsubp",
        "fstp st2",
        "fstp st0",
        "fld dword [eax + 0x7c]",
        "fadd dword [eax + 0x38]",
        "fld dword [eax + 0x3c]",
        "fsub dword [eax + 0x78]",
        "fld st1",
        "fmul dword [esp + 0x10]",
        "fld st1",
        "fmul dword [esp + 0x14]",
        "fsubp",
        "fstp dword [esp + 0x4]",
        "fmul dword [esp + 0x10]",
        "fxch",
        "fmul dword [esp + 0x14]",
        "faddp",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0xc]",
        "fsub dword [esp + 0x4]",
        "fstp dword [esp + 0x7c]",
        "fsubp",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x80]",
        "fld dword [esp + 0x28]",
        "fadd dword [esp + 0x78]",
        "fld dword [esp + 0x8]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [eax + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x8]",
        "fstp st1",
        "fsub dword [esp]",
        "fstp dword [eax + 0xc]",
        "fld dword [esp + 0x80]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x78]",
        "fsub dword [esp + 0x28]",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x8]",
        "fld dword [esp + 0x30]",
        "fsub dword [esp + 0x70]",
        "fld st3",
        "fsub st0,st1",
        "fstp dword [eax + 0x10]",
        "fld st1",
        "fadd st0,st3",
        "fstp dword [eax + 0x14]",
        "fadd st0,st3",
        "fstp dword [eax + 0x18]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x60]",
        "fsub dword [esp + 0x38]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x40]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x48]",
        "fld dword [esp + 0x1c]",
        "fadd dword [esp + 0x50]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld dword [esp + 0x4]",
        "fadd st0,st3",
        "fstp dword [eax + 0x20]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x24]",
        "fxch st2",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x28]",
        "fsub st0,st1",
        "fstp dword [eax + 0x2c]",
        "fstp st0",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x68]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld st2",
        "fsub st0,st1",
        "fstp dword [eax + 0x30]",
        "fld dword [esp + 0x4]",
        "fadd st0,st2",
        "fstp dword [eax + 0x34]",
        "fadd st0,st2",
        "fstp dword [eax + 0x38]",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x3c]",
        "fstp st0",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x24]",
        "fld dword [esp + 0x3c]",
        "fadd dword [esp + 0x34]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + 0x40]",
        "fld dword [esp]",
        "fadd st0,st2",
        "fstp dword [eax + 0x44]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x48]",
        "fstp st1",
        "fsub dword [esp]",
        "fstp dword [eax + 0x4c]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x2c]",
        "fld dword [esp + 0x34]",
        "fsub dword [esp + 0x3c]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x44]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x4c]",
        "fld st3",
        "fsub st0,st1",
        "fstp dword [eax + 0x50]",
        "fld st1",
        "fadd st0,st3",
        "fstp dword [eax + 0x54]",
        "fadd st0,st3",
        "fstp dword [eax + 0x58]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x5c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x64]",
        "fld st2",
        "fadd dword [esp + 0x74]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x7c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st6",
        "fstp dword [esp + 0x4]",
        "faddp",
        "fmul st4",
        "fld dword [esp + 0x4]",
        "fadd st0,st3",
        "fstp dword [eax + 0x60]",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x64]",
        "fxch st2",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x68]",
        "fsub st0,st1",
        "fstp dword [eax + 0x6c]",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x5c]",
        "fstp dword [esp + 0xc]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x6c]",
        "fstp dword [esp + 0x8]",
        "fsubr dword [esp + 0x74]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x84]",
        "fld st1",
        "fsub st0,st1",
        "fmul st3",
        "fstp dword [esp + 0x4]",
        "fadd st0,st1",
        "fmulp st2",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fsub st0,st1",
        "fstp dword [eax + 0x70]",
        "fld dword [esp + 0x4]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x74]",
        "fadd dword [esp + 0xc]",
        "fstp dword [eax + 0x78]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0x4]",
        "fstp dword [eax + 0x7c]",
        "add esp,0x88"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x88 (136)",
        "ldr s2, [x7, #4]",
        "fcvt d2, s2",
        "ldr w5, [x7, #24]",
        "ldr s3, [x7, #16]",
        "fcvt d3, s3",
        "str w5, [x8, #20]",
        "ldr s4, [x7, #20]",
        "fcvt d4, s4",
        "ldr w5, [x7, #28]",
        "ldr s5, [x7, #32]",
        "fcvt d5, s5",
        "str w5, [x8, #16]",
        "ldr s6, [x7, #36]",
        "fcvt d6, s6",
        "ldr s7, [x4]",
        "fcvt d7, s7",
        "ldr s8, [x4, #68]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #64]",
        "fcvt d8, s8",
        "ldr s9, [x4, #4]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "ldr s9, [x4, #32]",
        "fcvt d9, s9",
        "ldr s10, [x4, #100]",
        "fcvt d10, s10",
        "fsub d9, d9, d10",
        "fcvt s9, d9",
        "str s9, [x8, #8]",
        "ldr s9, [x4, #36]",
        "fcvt d9, s9",
        "ldr s10, [x4, #96]",
        "fcvt d10, s10",
        "fadd d9, d9, d10",
        "fcvt s9, d9",
        "str s9, [x8]",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "ldr s10, [x8]",
        "fcvt d10, s10",
        "fsub d9, d9, d10",
        "fmul d9, d9, d2",
        "fcvt s9, d9",
        "str s9, [x8, #4]",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "ldr s10, [x8, #8]",
        "fcvt d10, s10",
        "fadd d9, d9, d10",
        "fmul d9, d9, d2",
        "fcvt s9, d9",
        "str s9, [x8]",
        "ldr s9, [x8, #4]",
        "fcvt d9, s9",
        "fadd d9, d9, d7",
        "fcvt s9, d9",
        "str s9, [x8, #128]",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #120]",
        "strb wzr, [x28, #1049]",
        "ldr s9, [x8, #4]",
        "fcvt d9, s9",
        "fsub d7, d7, d9",
        "fcvt s7, d7",
        "str s7, [x8, #96]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fsub d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8, #64]",
        "ldr s7, [x4, #68]",
        "fcvt d7, s7",
        "ldr s8, [x4]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #4]",
        "fcvt d8, s8",
        "ldr s9, [x4, #64]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "ldr s9, [x4, #100]",
        "fcvt d9, s9",
        "ldr s10, [x4, #32]",
        "fcvt d10, s10",
        "fadd d9, d9, d10",
        "fcvt s9, d9",
        "str s9, [x8, #8]",
        "ldr s9, [x4, #36]",
        "fcvt d9, s9",
        "ldr s10, [x4, #96]",
        "fcvt d10, s10",
        "fsub d9, d9, d10",
        "fcvt s9, d9",
        "str s9, [x8]",
        "ldr s9, [x8, #8]",
        "fcvt d9, s9",
        "ldr s10, [x8]",
        "fcvt d10, s10",
        "fsub d9, d9, d10",
        "fmul d9, d9, d2",
        "fcvt s9, d9",
        "str s9, [x8, #4]",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "ldr s10, [x8, #8]",
        "fcvt d10, s10",
        "fadd d9, d9, d10",
        "fmul d9, d9, d2",
        "fcvt s9, d9",
        "str s9, [x8]",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fsub d9, d7, d9",
        "fcvt s9, d9",
        "str s9, [x8, #36]",
        "ldr s9, [x8, #4]",
        "fcvt d9, s9",
        "fadd d9, d9, d8",
        "fcvt s9, d9",
        "str s9, [x8, #52]",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fadd d7, d9, d7",
        "fcvt s7, d7",
        "str s7, [x8, #92]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fsub d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8, #100]",
        "ldr s7, [x4, #8]",
        "fcvt d7, s7",
        "ldr s8, [x4, #76]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #12]",
        "fcvt d8, s8",
        "ldr s9, [x4, #72]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "fmul d8, d7, d3",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fmul d9, d9, d4",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fmul d7, d7, d4",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d8, d8, d3",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #40]",
        "fcvt d7, s7",
        "ldr s8, [x4, #108]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #44]",
        "fcvt d8, s8",
        "ldr s9, [x4, #104]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fmul d8, d8, d7",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "ldr s10, [x8, #20]",
        "fcvt d10, s10",
        "fmul d9, d9, d10",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "ldr s9, [x8, #16]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "strb wzr, [x28, #1049]",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fmul d7, d7, d9",
        "fadd d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "ldr s8, [x8, #12]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #88]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "ldr s8, [x8, #8]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #48]",
        "ldr s7, [x8, #12]",
        "fcvt d7, s7",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #104]",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #80]",
        "ldr s7, [x4, #76]",
        "fcvt d7, s7",
        "ldr s8, [x4, #8]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #12]",
        "fcvt d8, s8",
        "ldr s9, [x4, #72]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x8, #20]",
        "fcvt d8, s8",
        "fmul d8, d8, d7",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "ldr s10, [x8, #16]",
        "fcvt d10, s10",
        "fmul d9, d9, d10",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "ldr s9, [x8, #20]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #108]",
        "fcvt d7, s7",
        "ldr s8, [x4, #40]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #44]",
        "fcvt d8, s8",
        "ldr s9, [x4, #104]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s9, d8",
        "str s9, [x8]",
        "fmul d8, d8, d4",
        "fmul d9, d7, d3",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d8, d8, d3",
        "strb wzr, [x28, #1049]",
        "fmul d7, d7, d4",
        "fsub d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8]",
        "ldr s7, [x8, #12]",
        "fcvt d7, s7",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #68]",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #76]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "ldr s8, [x8, #12]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #116]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "ldr s8, [x8, #8]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #132]",
        "ldr s7, [x4, #16]",
        "fcvt d7, s7",
        "ldr s8, [x4, #84]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #20]",
        "fcvt d8, s8",
        "ldr s9, [x4, #80]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "fmul d8, d7, d5",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fmul d9, d9, d6",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fmul d7, d7, d6",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d8, d8, d5",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #48]",
        "fcvt d7, s7",
        "ldr s8, [x4, #116]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "ldr s8, [x4, #52]",
        "fcvt d8, s8",
        "ldr s9, [x4, #112]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "fmul d8, d7, d6",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fmul d9, d9, d5",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d8, d8, d6",
        "strb wzr, [x28, #1049]",
        "fmul d7, d7, d5",
        "fadd d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "ldr s8, [x8, #12]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "ldr s8, [x8, #8]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #40]",
        "ldr s7, [x8, #12]",
        "fcvt d7, s7",
        "ldr s8, [x8, #4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #24]",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #56]",
        "ldr s7, [x4, #84]",
        "fcvt d7, s7",
        "ldr s8, [x4, #16]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #20]",
        "fcvt d8, s8",
        "ldr s9, [x4, #80]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "fmul d8, d7, d6",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fmul d9, d9, d5",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "ldr s8, [x8]",
        "fcvt d8, s8",
        "fmul d8, d8, d6",
        "strb wzr, [x28, #1049]",
        "fmul d7, d7, d5",
        "fadd d7, d8, d7",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #116]",
        "fcvt d7, s7",
        "ldr s8, [x4, #48]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #52]",
        "fcvt d8, s8",
        "ldr s9, [x4, #112]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "fmul d8, d7, d5",
        "ldr s9, [x8]",
        "fcvt d9, s9",
        "fmul d9, d9, d6",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "fmul d6, d7, d6",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fmul d5, d7, d5",
        "fadd d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #44]",
        "ldr s5, [x8, #8]",
        "fcvt d5, s5",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #60]",
        "ldr s5, [x8, #4]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #108]",
        "ldr s5, [x8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #84]",
        "ldr s5, [x4, #24]",
        "fcvt d5, s5",
        "ldr s6, [x4, #92]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x4, #28]",
        "fcvt d6, s6",
        "ldr s7, [x4, #88]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x8, #20]",
        "fcvt d7, s7",
        "fmul d7, d5, d7",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fmul d8, d6, d8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fmul d5, d5, d7",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #20]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #56]",
        "fcvt d6, s6",
        "ldr s7, [x4, #124]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "ldr s7, [x4, #120]",
        "fcvt d7, s7",
        "ldr s8, [x4, #60]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fmul d8, d6, d4",
        "fmul d9, d7, d3",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "fmul d7, d7, d4",
        "strb wzr, [x28, #1049]",
        "fmul d6, d6, d3",
        "fadd d6, d7, d6",
        "fcvt s6, d6",
        "str s6, [x8]",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8, #12]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #8]",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fadd d6, d6, d5",
        "fcvt s6, d6",
        "str s6, [x8, #112]",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #72]",
        "ldr s5, [x4, #92]",
        "fcvt d5, s5",
        "ldr s6, [x4, #24]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #28]",
        "fcvt d6, s6",
        "ldr s7, [x4, #88]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fmul d7, d5, d4",
        "fmul d8, d6, d3",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #12]",
        "fmul d4, d6, d4",
        "strb wzr, [x28, #1049]",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "ldr s4, [x4, #124]",
        "fcvt d4, s4",
        "ldr s5, [x4, #56]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x4, #60]",
        "fcvt d5, s5",
        "ldr s6, [x4, #120]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fmul d6, d4, d6",
        "ldr s7, [x8, #20]",
        "fcvt d7, s7",
        "fmul d7, d5, d7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #4]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x8, #20]",
        "fcvt d6, s6",
        "fmul d4, d4, d6",
        "fadd d4, d5, d4",
        "ldr s5, [x8, #4]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "fadd d5, d4, d3",
        "fcvt s5, d5",
        "str s5, [x8, #16]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #124]",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "ldr s5, [x8, #128]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "ldr s6, [x8, #120]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "ldr s7, [x8, #88]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x8, #112]",
        "fcvt d7, s7",
        "ldr s8, [x8, #48]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8]",
        "fadd d7, d6, d4",
        "fcvt s7, d7",
        "str s7, [x4]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "fcvt s7, d7",
        "str s7, [x4, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "fcvt s4, d4",
        "str s4, [x4, #8]",
        "ldr s4, [x8]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "fcvt s4, d4",
        "str s4, [x4, #12]",
        "ldr s4, [x8, #128]",
        "fcvt d4, s4",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "ldr s6, [x8, #40]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #88]",
        "fcvt d6, s6",
        "ldr s7, [x8, #8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "ldr s7, [x8, #48]",
        "fcvt d7, s7",
        "ldr s8, [x8, #112]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fsub d8, d4, d7",
        "fcvt s8, d8",
        "str s8, [x4, #16]",
        "fadd d8, d6, d5",
        "fcvt s8, d8",
        "str s8, [x4, #20]",
        "fadd d4, d7, d4",
        "fcvt s4, d4",
        "str s4, [x4, #24]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d5, d6",
        "fcvt s4, d4",
        "str s4, [x4, #28]",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "ldr s5, [x8, #56]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #24]",
        "fcvt d5, s5",
        "ldr s6, [x8, #64]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "ldr s7, [x8, #72]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "ldr s7, [x8, #28]",
        "fcvt d7, s7",
        "ldr s8, [x8, #80]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fsub d8, d6, d7",
        "fmul d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "fadd d6, d6, d7",
        "fmul d6, d6, d2",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d7, d7, d4",
        "fcvt s7, d7",
        "str s7, [x4, #32]",
        "fadd d7, d6, d5",
        "fcvt s7, d7",
        "str s7, [x4, #36]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fsub d4, d4, d7",
        "fcvt s4, d4",
        "str s4, [x4, #40]",
        "fsub d4, d5, d6",
        "fcvt s4, d4",
        "str s4, [x4, #44]",
        "ldr s4, [x8, #56]",
        "fcvt d4, s4",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "ldr s6, [x8, #24]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x8, #104]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x8, #80]",
        "fcvt d7, s7",
        "ldr s8, [x8, #28]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fsub d8, d6, d7",
        "fmul d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "fadd d6, d6, d7",
        "fmul d6, d6, d2",
        "fsub d7, d4, d6",
        "fcvt s7, d7",
        "str s7, [x4, #48]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "fcvt s7, d7",
        "str s7, [x4, #52]",
        "fadd d4, d6, d4",
        "fcvt s4, d4",
        "str s4, [x4, #56]",
        "ldr s4, [x8, #4]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "fcvt s4, d4",
        "str s4, [x4, #60]",
        "ldr s4, [x8, #44]",
        "fcvt d4, s4",
        "ldr s5, [x8, #36]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "ldr s6, [x8, #52]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #68]",
        "fcvt d6, s6",
        "ldr s7, [x8, #20]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8]",
        "fadd d7, d6, d4",
        "fcvt s7, d7",
        "str s7, [x4, #64]",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "fcvt s7, d7",
        "str s7, [x4, #68]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "fcvt s4, d4",
        "str s4, [x4, #72]",
        "ldr s4, [x8]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "fcvt s4, d4",
        "str s4, [x4, #76]",
        "ldr s4, [x8, #36]",
        "fcvt d4, s4",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "ldr s6, [x8, #60]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #20]",
        "fcvt d6, s6",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "ldr s8, [x8, #76]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fsub d8, d4, d7",
        "fcvt s8, d8",
        "str s8, [x4, #80]",
        "fadd d8, d6, d5",
        "fcvt s8, d8",
        "str s8, [x4, #84]",
        "fadd d4, d7, d4",
        "fcvt s4, d4",
        "str s4, [x4, #88]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d5, d6",
        "fcvt s4, d4",
        "str s4, [x4, #92]",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #84]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "ldr s6, [x8, #100]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #116]",
        "fcvt d6, s6",
        "fadd d6, d3, d6",
        "ldr s7, [x8, #132]",
        "fcvt d7, s7",
        "ldr s8, [x8, #124]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fsub d8, d6, d7",
        "fmul d8, d8, d2",
        "fcvt s8, d8",
        "str s8, [x8, #4]",
        "fadd d6, d6, d7",
        "fmul d6, d6, d2",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d7, d7, d4",
        "fcvt s7, d7",
        "str s7, [x4, #96]",
        "fadd d7, d6, d5",
        "fcvt s7, d7",
        "str s7, [x4, #100]",
        "strb wzr, [x28, #1049]",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fsub d4, d4, d7",
        "fcvt s4, d4",
        "str s4, [x4, #104]",
        "fsub d4, d5, d6",
        "fcvt s4, d4",
        "str s4, [x4, #108]",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #12]",
        "ldr s4, [x8, #100]",
        "fcvt d4, s4",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #116]",
        "fcvt d4, s4",
        "fsub d3, d4, d3",
        "ldr s4, [x8, #124]",
        "fcvt d4, s4",
        "ldr s5, [x8, #132]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fsub d5, d3, d4",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #4]",
        "fadd d3, d4, d3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "fsub d3, d3, d2",
        "fcvt s3, d3",
        "str s3, [x4, #112]",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "ldr s4, [x8, #8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #116]",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #120]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #124]",
        "adds w26, w8, #0x88 (136)",
        "cfinv",
        "mov x27, x8",
        "mov x8, x26",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block2": {
      "x86InstructionCount": 434,
      "ExpectedInstructionCount": 834,
      "x86Insts": [
        "sub esp,0x90",
        "fld dword [ecx + 0x4]",
        "fld st0",
        "fmul dword [ecx + 0x8]",
        "fld st0",
        "fadd dword [ecx + 0x8]",
        "fld dword [eax + 0x40]",
        "fadd dword [eax]",
        "fld dword [eax + 0x44]",
        "fadd dword [eax + 0x4]",
        "fld dword [eax]",
        "fsub dword [eax + 0x40]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x4]",
        "fsub dword [eax + 0x44]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x20]",
        "fadd dword [eax + 0x60]",
        "fld dword [eax + 0x64]",
        "fadd dword [eax + 0x24]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x20]",
        "fsub dword [eax + 0x60]",
        "fstp dword [esp]",
        "fld dword [eax + 0x24]",
        "fsub dword [eax + 0x64]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x54]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x64]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x3c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x44]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [esp + 0x60]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [esp + 0x78]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [esp + 0x50]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + 0x8]",
        "fadd dword [eax + 0x48]",
        "fld dword [eax + 0x4c]",
        "fadd dword [eax + 0xc]",
        "fld dword [eax + 0x8]",
        "fsub dword [eax + 0x48]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0xc]",
        "fsub dword [eax + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x28]",
        "fadd dword [eax + 0x68]",
        "fld dword [eax + 0x6c]",
        "fadd dword [eax + 0x2c]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x28]",
        "fsub dword [eax + 0x68]",
        "fstp dword [esp]",
        "fld dword [eax + 0x2c]",
        "fsub dword [eax + 0x6c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x74]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x84]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x20]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x24]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "fsubp",
        "fstp dword [esp + 0x68]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x88]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x80]",
        "fxch",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x30]",
        "fld dword [eax + 0x10]",
        "fadd dword [eax + 0x50]",
        "fld dword [eax + 0x54]",
        "fadd dword [eax + 0x14]",
        "fld dword [eax + 0x10]",
        "fsub dword [eax + 0x50]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x14]",
        "fsub dword [eax + 0x54]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x30]",
        "fadd dword [eax + 0x70]",
        "fld dword [eax + 0x74]",
        "fadd dword [eax + 0x34]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x30]",
        "fsub dword [eax + 0x70]",
        "fstp dword [esp]",
        "fld dword [eax + 0x34]",
        "fsub dword [eax + 0x74]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x5c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x6c]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x4c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x34]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fsub st0,st1",
        "fmul st5",
        "fstp dword [esp + 0x38]",
        "fadd st0,st1",
        "fmul st4",
        "fstp dword [esp + 0x40]",
        "fstp st0",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st0",
        "fadd st0,st2",
        "fmul st5",
        "fstp dword [esp + 0x48]",
        "fsub st0,st1",
        "fmul st4",
        "fstp dword [esp + 0x58]",
        "fstp st0",
        "fld dword [eax + 0x58]",
        "fadd dword [eax + 0x18]",
        "fld dword [eax + 0x1c]",
        "fadd dword [eax + 0x5c]",
        "fld dword [eax + 0x18]",
        "fsub dword [eax + 0x58]",
        "fstp dword [esp + 0x8]",
        "fld dword [eax + 0x1c]",
        "fsub dword [eax + 0x5c]",
        "fstp dword [esp + 0x4]",
        "fld dword [eax + 0x78]",
        "fadd dword [eax + 0x38]",
        "fld dword [eax + 0x3c]",
        "fadd dword [eax + 0x7c]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + 0x38]",
        "fsub dword [eax + 0x78]",
        "fstp dword [esp]",
        "fld dword [eax + 0x3c]",
        "fsub dword [eax + 0x7c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [esp + 0x7c]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [esp + 0x8c]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esp + 0x2c]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esp + 0x70]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fld st1",
        "fmul st4",
        "fld st1",
        "fmul st4",
        "fsubp",
        "fstp dword [esp + 0x1c]",
        "fxch",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp dword [esp + 0x14]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fld st1",
        "fmul st3",
        "fld st1",
        "fmul st5",
        "fsubp",
        "fstp dword [esp]",
        "fmul st2",
        "fxch",
        "fmul st3",
        "faddp",
        "fstp st2",
        "fstp st0",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x48]",
        "fld dword [esp + 0x28]",
        "fsub dword [esp + 0x58]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x28]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x80]",
        "fsub dword [esp]",
        "fld dword [esp + 0x30]",
        "fsub st0,st3",
        "fstp dword [esp + 0x10]",
        "fld dword [esp]",
        "fadd dword [esp + 0x80]",
        "fstp dword [esp]",
        "fxch st2",
        "fadd dword [esp + 0x30]",
        "fstp dword [esp + 0xc]",
        "fxch",
        "fld st0",
        "fadd st0,st2",
        "fstp dword [eax + 0x60]",
        "fld dword [esp + 0x10]",
        "fadd dword [esp + 0x18]",
        "fstp dword [eax + 0x64]",
        "fxch",
        "fsub st0,st1",
        "fstp dword [eax + 0x68]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0x6c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x70]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x74]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x78]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x7c]",
        "fld dword [esp + 0x38]",
        "fadd dword [esp + 0x60]",
        "fld dword [esp + 0x40]",
        "fadd dword [esp + 0x78]",
        "fld dword [esp + 0x60]",
        "fsub dword [esp + 0x38]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x78]",
        "fsub dword [esp + 0x40]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x1c]",
        "fadd dword [esp + 0x68]",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x88]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x68]",
        "fsub dword [esp + 0x1c]",
        "fstp dword [esp]",
        "fld dword [esp + 0x88]",
        "fsub dword [esp + 0x14]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + 0x40]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + 0x44]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x48]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0x4c]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x50]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x54]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x58]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x5c]",
        "fld dword [esp + 0x20]",
        "fsub dword [esp + 0x70]",
        "fld dword [esp + 0x2c]",
        "fadd dword [esp + 0x24]",
        "fld st1",
        "fsub st0,st1",
        "fmul st3",
        "fstp dword [esp + 0x14]",
        "faddp",
        "fmul st1",
        "fld dword [esp + 0x70]",
        "fadd dword [esp + 0x20]",
        "fld dword [esp + 0x24]",
        "fsub dword [esp + 0x2c]",
        "fld st1",
        "fsub st0,st1",
        "fmul st4",
        "fstp dword [esp]",
        "fadd st0,st1",
        "fmul st3",
        "fstp dword [esp + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x34]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x44]",
        "fstp dword [esp + 0x18]",
        "fld dword [esp + 0x34]",
        "fadd dword [esp + 0x3c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x14]",
        "fadd st0,st1",
        "fstp dword [eax + 0x20]",
        "fld st1",
        "fadd dword [esp + 0x18]",
        "fstp dword [eax + 0x24]",
        "fsub dword [esp + 0x14]",
        "fstp dword [eax + 0x28]",
        "fld dword [esp + 0x18]",
        "fsub st0,st1",
        "fstp dword [eax + 0x2c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x30]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x34]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x38]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x3c]",
        "fld dword [esp + 0x5c]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x6c]",
        "fadd dword [esp + 0x64]",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x5c]",
        "fstp dword [esp + 0x8]",
        "fld dword [esp + 0x64]",
        "fsub dword [esp + 0x6c]",
        "fstp dword [esp + 0x4]",
        "fld dword [esp + 0x7c]",
        "fadd dword [esp + 0x74]",
        "fld dword [esp + 0x8c]",
        "fadd dword [esp + 0x84]",
        "fstp dword [esp + 0x10]",
        "fld dword [esp + 0x74]",
        "fsub dword [esp + 0x7c]",
        "fstp dword [esp]",
        "fld dword [esp + 0x84]",
        "fsub dword [esp + 0x8c]",
        "fstp dword [esp + 0xc]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + 0xc]",
        "fld dword [esp + 0x8]",
        "fsub dword [esp + 0xc]",
        "fstp dword [eax + 0x10]",
        "fld dword [esp]",
        "fadd dword [esp + 0x4]",
        "fstp dword [eax + 0x14]",
        "fld dword [esp + 0xc]",
        "fadd dword [esp + 0x8]",
        "fstp dword [eax + 0x18]",
        "fld dword [esp + 0x4]",
        "fsub dword [esp]",
        "fstp dword [eax + 0x1c]",
        "add esp,0x90"
      ],
      "ExpectedArm64ASM": [
        "sub w8, w8, #0x90 (144)",
        "ldr s2, [x7, #4]",
        "fcvt d2, s2",
        "ldr s3, [x7, #8]",
        "fcvt d3, s3",
        "fmul d3, d2, d3",
        "ldr s4, [x7, #8]",
        "fcvt d4, s4",
        "fadd d4, d3, d4",
        "ldr s5, [x4, #64]",
        "fcvt d5, s5",
        "ldr s6, [x4]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #68]",
        "fcvt d6, s6",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x4]",
        "fcvt d7, s7",
        "ldr s8, [x4, #64]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "ldr s8, [x4, #68]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #32]",
        "fcvt d7, s7",
        "ldr s8, [x4, #96]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #100]",
        "fcvt d8, s8",
        "ldr s9, [x4, #36]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #32]",
        "fcvt d8, s8",
        "ldr s9, [x4, #96]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x4, #36]",
        "fcvt d8, s8",
        "ldr s9, [x4, #100]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fadd d8, d7, d5",
        "fcvt s8, d8",
        "str s8, [x8, #84]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #100]",
        "strb wzr, [x28, #1049]",
        "fsub d5, d5, d7",
        "fcvt s5, d5",
        "str s5, [x8, #60]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fsub d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8, #68]",
        "ldr s5, [x8, #8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #96]",
        "ldr s5, [x8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #120]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #80]",
        "ldr s5, [x8, #4]",
        "fcvt d5, s5",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #40]",
        "ldr s5, [x4, #8]",
        "fcvt d5, s5",
        "ldr s6, [x4, #72]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #76]",
        "fcvt d6, s6",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x4, #8]",
        "fcvt d7, s7",
        "ldr s8, [x4, #72]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "ldr s8, [x4, #76]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #40]",
        "fcvt d7, s7",
        "ldr s8, [x4, #104]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #108]",
        "fcvt d8, s8",
        "ldr s9, [x4, #44]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #40]",
        "fcvt d8, s8",
        "ldr s9, [x4, #104]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x4, #44]",
        "fcvt d8, s8",
        "ldr s9, [x4, #108]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fadd d8, d7, d5",
        "fcvt s8, d8",
        "str s8, [x8, #116]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #132]",
        "strb wzr, [x28, #1049]",
        "fsub d5, d5, d7",
        "fcvt s5, d5",
        "str s5, [x8, #32]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fsub d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8, #36]",
        "ldr s5, [x8, #8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fmul d7, d5, d4",
        "fmul d8, d6, d3",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #104]",
        "fmul d6, d6, d4",
        "strb wzr, [x28, #1049]",
        "fmul d5, d5, d3",
        "fadd d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8, #136]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fmul d7, d5, d3",
        "fmul d8, d6, d4",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #128]",
        "strb wzr, [x28, #1049]",
        "fmul d5, d5, d4",
        "strb wzr, [x28, #1049]",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #48]",
        "ldr s5, [x4, #16]",
        "fcvt d5, s5",
        "ldr s6, [x4, #80]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #84]",
        "fcvt d6, s6",
        "ldr s7, [x4, #20]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x4, #16]",
        "fcvt d7, s7",
        "ldr s8, [x4, #80]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #20]",
        "fcvt d7, s7",
        "ldr s8, [x4, #84]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #48]",
        "fcvt d7, s7",
        "ldr s8, [x4, #112]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #116]",
        "fcvt d8, s8",
        "ldr s9, [x4, #52]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #48]",
        "fcvt d8, s8",
        "ldr s9, [x4, #112]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x4, #52]",
        "fcvt d8, s8",
        "ldr s9, [x4, #116]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fadd d8, d7, d5",
        "fcvt s8, d8",
        "str s8, [x8, #92]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #108]",
        "strb wzr, [x28, #1049]",
        "fsub d5, d5, d7",
        "fcvt s5, d5",
        "str s5, [x8, #76]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fsub d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8, #52]",
        "ldr s5, [x8, #8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fsub d7, d5, d6",
        "fmul d7, d7, d2",
        "fcvt s7, d7",
        "str s7, [x8, #56]",
        "fadd d5, d6, d5",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #64]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fadd d7, d6, d5",
        "fmul d7, d7, d2",
        "fcvt s7, d7",
        "str s7, [x8, #72]",
        "fsub d5, d6, d5",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #88]",
        "ldr s5, [x4, #88]",
        "fcvt d5, s5",
        "ldr s6, [x4, #24]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x4, #28]",
        "fcvt d6, s6",
        "ldr s7, [x4, #92]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "ldr s7, [x4, #24]",
        "fcvt d7, s7",
        "ldr s8, [x4, #88]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #8]",
        "ldr s7, [x4, #28]",
        "fcvt d7, s7",
        "ldr s8, [x4, #92]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #4]",
        "ldr s7, [x4, #120]",
        "fcvt d7, s7",
        "ldr s8, [x4, #56]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "ldr s8, [x4, #60]",
        "fcvt d8, s8",
        "ldr s9, [x4, #124]",
        "fcvt d9, s9",
        "fadd d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #16]",
        "ldr s8, [x4, #56]",
        "fcvt d8, s8",
        "ldr s9, [x4, #120]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8]",
        "ldr s8, [x4, #60]",
        "fcvt d8, s8",
        "ldr s9, [x4, #124]",
        "fcvt d9, s9",
        "fsub d8, d8, d9",
        "fcvt s8, d8",
        "str s8, [x8, #12]",
        "fadd d8, d7, d5",
        "fcvt s8, d8",
        "str s8, [x8, #124]",
        "ldr s8, [x8, #16]",
        "fcvt d8, s8",
        "fadd d8, d8, d6",
        "fcvt s8, d8",
        "str s8, [x8, #140]",
        "strb wzr, [x28, #1049]",
        "fsub d5, d5, d7",
        "fcvt s5, d5",
        "str s5, [x8, #44]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fsub d5, d6, d5",
        "fcvt s5, d5",
        "str s5, [x8, #112]",
        "ldr s5, [x8, #8]",
        "fcvt d5, s5",
        "ldr s6, [x8, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "ldr s7, [x8, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fmul d7, d5, d3",
        "fmul d8, d6, d4",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "fmul d5, d5, d4",
        "strb wzr, [x28, #1049]",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "ldr s5, [x8, #12]",
        "fcvt d5, s5",
        "ldr s6, [x8, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #4]",
        "fcvt d6, s6",
        "ldr s7, [x8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fmul d7, d5, d4",
        "fmul d8, d6, d3",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8]",
        "fmul d4, d6, d4",
        "strb wzr, [x28, #1049]",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "ldr s5, [x8, #72]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "ldr s6, [x8, #88]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #24]",
        "ldr s5, [x8, #72]",
        "fcvt d5, s5",
        "ldr s6, [x8, #80]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #8]",
        "ldr s5, [x8, #88]",
        "fcvt d5, s5",
        "ldr s6, [x8, #40]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #4]",
        "ldr s5, [x8, #128]",
        "fcvt d5, s5",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #48]",
        "fcvt d6, s6",
        "fsub d6, d6, d3",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldr s6, [x8]",
        "fcvt d6, s6",
        "ldr s7, [x8, #128]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8]",
        "strb wzr, [x28, #1049]",
        "ldr s6, [x8, #48]",
        "fcvt d6, s6",
        "fadd d3, d3, d6",
        "fcvt s3, d3",
        "str s3, [x8, #12]",
        "strb wzr, [x28, #1049]",
        "fadd d3, d5, d4",
        "fcvt s3, d3",
        "str s3, [x4, #96]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "ldr s6, [x8, #24]",
        "fcvt d6, s6",
        "fadd d3, d3, d6",
        "fcvt s3, d3",
        "str s3, [x4, #100]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d4, d5",
        "fcvt s3, d3",
        "str s3, [x4, #104]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #108]",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "ldr s4, [x8, #12]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #112]",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "ldr s4, [x8, #4]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #116]",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "ldr s4, [x8, #8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #120]",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "ldr s4, [x8]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #124]",
        "ldr s3, [x8, #56]",
        "fcvt d3, s3",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #64]",
        "fcvt d4, s4",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "ldr s6, [x8, #56]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #8]",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "ldr s6, [x8, #64]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #4]",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x8, #20]",
        "fcvt d6, s6",
        "ldr s7, [x8, #136]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "ldr s7, [x8, #28]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8]",
        "ldr s6, [x8, #136]",
        "fcvt d6, s6",
        "ldr s7, [x8, #20]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #12]",
        "fadd d6, d5, d3",
        "fcvt s6, d6",
        "str s6, [x4, #64]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fadd d6, d6, d4",
        "fcvt s6, d6",
        "str s6, [x4, #68]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "str s3, [x4, #72]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x4, #76]",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "ldr s4, [x8, #12]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #80]",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "ldr s4, [x8, #4]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #84]",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "ldr s4, [x8, #8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #88]",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "ldr s4, [x8]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x4, #92]",
        "ldr s3, [x8, #32]",
        "fcvt d3, s3",
        "ldr s4, [x8, #112]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #44]",
        "fcvt d4, s4",
        "ldr s5, [x8, #36]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fsub d5, d3, d4",
        "fmul d5, d5, d2",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "fadd d3, d3, d4",
        "fmul d3, d3, d2",
        "ldr s4, [x8, #112]",
        "fcvt d4, s4",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #36]",
        "fcvt d5, s5",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fsub d6, d4, d5",
        "fmul d6, d6, d2",
        "fcvt s6, d6",
        "str s6, [x8]",
        "fadd d4, d5, d4",
        "fmul d2, d4, d2",
        "fcvt s2, d2",
        "str s2, [x8, #12]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s4, [x8, #52]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "ldr s4, [x8, #76]",
        "fcvt d4, s4",
        "ldr s5, [x8, #68]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #24]",
        "ldr s4, [x8, #52]",
        "fcvt d4, s4",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #68]",
        "fcvt d4, s4",
        "ldr s5, [x8, #76]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #4]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fadd d4, d4, d2",
        "fcvt s4, d4",
        "str s4, [x4, #32]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "fadd d4, d3, d4",
        "fcvt s4, d4",
        "str s4, [x4, #36]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fsub d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x4, #40]",
        "ldr s2, [x8, #24]",
        "fcvt d2, s2",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #44]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #48]",
        "ldr s2, [x8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #52]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #56]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #60]",
        "ldr s2, [x8, #92]",
        "fcvt d2, s2",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #108]",
        "fcvt d3, s3",
        "ldr s4, [x8, #100]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #8]",
        "ldr s4, [x8, #100]",
        "fcvt d4, s4",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #4]",
        "ldr s4, [x8, #124]",
        "fcvt d4, s4",
        "ldr s5, [x8, #116]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #140]",
        "fcvt d5, s5",
        "ldr s6, [x8, #132]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #16]",
        "ldr s5, [x8, #116]",
        "fcvt d5, s5",
        "ldr s6, [x8, #124]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8]",
        "ldr s5, [x8, #132]",
        "fcvt d5, s5",
        "ldr s6, [x8, #140]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #12]",
        "fadd d5, d4, d2",
        "fcvt s5, d5",
        "str s5, [x4]",
        "ldr s5, [x8, #16]",
        "fcvt d5, s5",
        "fadd d5, d5, d3",
        "fcvt s5, d5",
        "str s5, [x4, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d2, d2, d4",
        "fcvt s2, d2",
        "str s2, [x4, #8]",
        "ldr s2, [x8, #16]",
        "fcvt d2, s2",
        "fsub d2, d3, d2",
        "fcvt s2, d2",
        "str s2, [x4, #12]",
        "ldr s2, [x8, #8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #12]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #16]",
        "ldr s2, [x8]",
        "fcvt d2, s2",
        "ldr s3, [x8, #4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #20]",
        "ldr s2, [x8, #12]",
        "fcvt d2, s2",
        "ldr s3, [x8, #8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #24]",
        "ldr s2, [x8, #4]",
        "fcvt d2, s2",
        "ldr s3, [x8]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4, #28]",
        "mvn w27, w8",
        "adds w26, w8, #0x90 (144)",
        "cfinv",
        "mov x8, x26",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfefe",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block3": {
      "x86InstructionCount": 702,
      "ExpectedInstructionCount": 92,
      "x86Insts": [
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x7c],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x78],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "mov dword [ebp + -0x74],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x70],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x6c],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0xc",
        "mov dword [ebp + -0x68],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x64],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "add eax,0x1",
        "shl eax,0x5",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x60],eax",
        "mov eax,dword [ebp + 0xffffff44]",
        "mov ecx,dword [eax + 0x4]",
        "mov edx,dword [ebp + 0xffffff7c]",
        "mov eax,edx",
        "add eax,eax",
        "add eax,edx",
        "shl eax,0x5",
        "add eax,0x40",
        "lea eax,[ecx + eax*0x1]",
        "add eax,0x18",
        "mov dword [ebp + -0x5c],eax",
        "lea eax,[ebp + 0xffffff04]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffef8]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffeec]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "lea eax,[ebp + 0xfffffee0]",
        "mov dword [esp],eax",
        "call 0x0819ba1a",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe68],eax",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe6c],eax",
        "mov eax,dword [ebp + -0x7c]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe70],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe20],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe24],eax",
        "mov eax,dword [ebp + -0x78]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe28],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe8c],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe90],eax",
        "mov eax,dword [ebp + -0x74]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffe94],eax",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe44]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe48]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe4c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe2c]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe30]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe34]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe38]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe3c]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe40]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe5c]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe60]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe64]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffeec],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffef0],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffef4],eax",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffeec]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffeec]",
        "fld dword [ebp + 0xfffffef0]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef0]",
        "fld dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffef4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffeec]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe74]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffef0]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe78]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffef4]",
        "fld dword [0x085cefdc]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe7c]",
        "mov eax,dword [ebp + 0xffffff04]",
        "mov dword [ebp + 0xfffffee0],eax",
        "mov eax,dword [ebp + 0xffffff08]",
        "mov dword [ebp + 0xfffffee4],eax",
        "mov eax,dword [ebp + 0xffffff0c]",
        "mov dword [ebp + 0xfffffee8],eax",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmulp",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + 0xfffffee0]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee0]",
        "fld dword [ebp + 0xfffffee4]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x4]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee4]",
        "fld dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x68]",
        "fld dword [eax + 0x8]",
        "fmul dword [ebp + -0x80]",
        "fsubp",
        "fstp dword [ebp + 0xfffffee8]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax]",
        "fld dword [ebp + 0xfffffee0]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe80]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x4]",
        "fld dword [ebp + 0xfffffee4]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe84]",
        "mov eax,dword [ebp + -0x74]",
        "fld dword [eax + 0x8]",
        "fld dword [ebp + 0xfffffee8]",
        "fld dword [0x085cefe0]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + 0xfffffe88]",
        "fld dword [ebp + 0xfffffe2c]",
        "fld dword [ebp + 0xfffffe44]",
        "faddp",
        "fld dword [ebp + 0xfffffe38]",
        "faddp",
        "fld dword [ebp + 0xfffffe5c]",
        "faddp",
        "fld dword [ebp + 0xfffffe74]",
        "faddp",
        "fld dword [ebp + 0xfffffe80]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe30]",
        "fld dword [ebp + 0xfffffe48]",
        "faddp",
        "fld dword [ebp + 0xfffffe3c]",
        "faddp",
        "fld dword [ebp + 0xfffffe60]",
        "faddp",
        "fld dword [ebp + 0xfffffe78]",
        "faddp",
        "fld dword [ebp + 0xfffffe84]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe34]",
        "fld dword [ebp + 0xfffffe4c]",
        "faddp",
        "fld dword [ebp + 0xfffffe40]",
        "faddp",
        "fld dword [ebp + 0xfffffe64]",
        "faddp",
        "fld dword [ebp + 0xfffffe7c]",
        "faddp",
        "fld dword [ebp + 0xfffffe88]",
        "faddp",
        "fld dword [0x085cefe4]",
        "fdivp",
        "fstp dword [ebp + 0xfffffe58]",
        "fld dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe20]",
        "fld dword [ebp + 0xfffffe68]",
        "faddp",
        "fld dword [ebp + 0xfffffe8c]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe50]",
        "fld dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe24]",
        "fld dword [ebp + 0xfffffe6c]",
        "faddp",
        "fld dword [ebp + 0xfffffe90]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe54]",
        "fld dword [ebp + 0xfffffe58]",
        "fld dword [ebp + 0xfffffe28]",
        "fld dword [ebp + 0xfffffe70]",
        "faddp",
        "fld dword [ebp + 0xfffffe94]",
        "faddp",
        "fld dword [0x085cefe8]",
        "fdivp",
        "faddp",
        "fstp dword [ebp + 0xfffffe58]",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffebc],eax",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffec0],eax",
        "mov eax,dword [ebp + -0x70]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffec4],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffe98],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffe9c],eax",
        "mov eax,dword [ebp + -0x6c]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffea0],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax]",
        "mov dword [ebp + 0xfffffed4],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax + 0x4]",
        "mov dword [ebp + 0xfffffed8],eax",
        "mov eax,dword [ebp + -0x68]",
        "mov eax,dword [eax + 0x8]",
        "mov dword [ebp + 0xfffffedc],eax",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax]",
        "faddp",
        "fstp dword [ebp + 0xfffffef8]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x4]",
        "faddp",
        "fstp dword [ebp + 0xfffffefc]",
        "mov eax,dword [ebp + -0x70]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x6c]",
        "fld dword [eax + 0x8]",
        "faddp",
        "fstp dword [ebp + 0xffffff00]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + 0xffffff04]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x4]",
        "fsubp",
        "fstp dword [ebp + 0xffffff08]",
        "mov eax,dword [ebp + -0x78]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + -0x7c]",
        "fld dword [eax + 0x8]",
        "fsubp",
        "fstp dword [ebp + 0xffffff0c]",
        "fld dword [ebp + 0xffffff04]",
        "fld dword [ebp + 0xffffff04]",
        "fmulp",
        "fld dword [ebp + 0xffffff08]",
        "fld dword [ebp + 0xffffff08]",
        "fmulp",
        "faddp",
        "fld dword [ebp + 0xffffff0c]",
        "fld dword [ebp + 0xffffff0c]",
        "fmulp",
        "faddp",
        "fstp qword [esp]",
        "call 0x0811028c",
        "fstp dword [ebp + -0x80]",
        "fld dword [ebp + -0x80]",
        "fldz",
        "fxch",
        "fucomip st0,st1",
        "fstp st0",
        "seta al",
        "test al,al"
      ],
      "ExpectedArm64ASM": [
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "stur w4, [x9, #-124]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "stur w4, [x9, #-120]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "stur w4, [x9, #-116]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-112]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-108]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "add w4, w4, #0xc (12)",
        "stur w4, [x9, #-104]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0x18 (24)",
        "stur w4, [x9, #-100]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "add w4, w4, #0x1 (1)",
        "lsl w4, w4, #5",
        "add w4, w7, w4",
        "add w4, w4, #0x18 (24)",
        "stur w4, [x9, #-96]",
        "ldur w4, [x9, #-188]",
        "ldr w7, [x4, #4]",
        "ldur w5, [x9, #-132]",
        "add w4, w5, w5",
        "add w4, w4, w5",
        "lsl w4, w4, #5",
        "add w4, w4, #0x40 (64)",
        "add w4, w7, w4",
        "mvn w27, w4",
        "adds w26, w4, #0x18 (24)",
        "mov x4, x26",
        "stur w4, [x9, #-92]",
        "sub w4, w9, #0xfc (252)",
        "str w4, [x8]",
        "mov w20, #0x140",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "cfinv"
      ]
    },
    "Block4": {
      "x86InstructionCount": 351,
      "ExpectedInstructionCount": 644,
      "x86Insts": [
        "mov ebp,dword [esp + 0x64]",
        "fadd dword [ebp + 0x8]",
        "add ebp,0x10",
        "mov dword [esp + 0x64],ebp",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x34]",
        "fadd dword [ebp + -0x4]",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x38]",
        "fadd dword [ebp]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x3c]",
        "fsub dword [ebp + 0x4]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x8]",
        "mov ebp,dword [ebp + -0x4]",
        "mov dword [esp + 0x34],ebp",
        "mov ebp,dword [esp + 0x64]",
        "mov ebp,dword [ebp]",
        "mov dword [esp + 0x38],ebp",
        "mov ebp,dword [esp + 0x64]",
        "fld dword [ebp + 0x4]",
        "fchs",
        "fstp dword [esp + 0x3c]",
        "fld dword [edi + -0x8]",
        "fadd dword [edx + -0x8]",
        "fld dword [edi + -0x4]",
        "fchs",
        "fsub dword [edx + -0x4]",
        "fld dword [edi + -0x8]",
        "fsub dword [edx + -0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [edx + -0x4]",
        "fsub dword [edi + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [edi]",
        "fadd dword [edx]",
        "fstp dword [esp + 0x44]",
        "fld dword [edi + 0x4]",
        "fchs",
        "fsub dword [edx + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [edi]",
        "fsub dword [edx]",
        "fstp dword [esp + 0x54]",
        "fld dword [edx + 0x4]",
        "fsub dword [edi + 0x4]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ecx + -0x8]",
        "fadd dword [esi + -0x8]",
        "fld dword [ecx + -0x4]",
        "fadd dword [esi + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [esi + -0x8]",
        "fsub dword [ecx + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [esi + -0x4]",
        "fsub dword [ecx + -0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [ecx]",
        "fadd dword [esi]",
        "fstp dword [esp + 0x48]",
        "fld dword [ecx + 0x4]",
        "fadd dword [esi + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [esi]",
        "fsub dword [ecx]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + 0x4]",
        "fsub dword [ecx + 0x4]",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [edi + -0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [edi + -0x4]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [edi]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [edi + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esi + -0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [esi + -0x4]",
        "fstp st0",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esi]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x50]",
        "fstp dword [esi + 0x4]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx + -0x8]",
        "fld dword [esp + 0x6c]",
        "fmul st1",
        "fld dword [esp + 0x68]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld st2",
        "fmul st2",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx]",
        "fld st2",
        "fmul st1",
        "fld dword [esp + 0x34]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx + -0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x30]",
        "fadd dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x30]",
        "fchs",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x30]",
        "fsub dword [ebp + 0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x30]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x30]",
        "fchs",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x50]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x30]",
        "fsub dword [ebp]",
        "fstp dword [esp + 0x54]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebp + 0x8]",
        "fadd dword [ebx + 0x8]",
        "fld dword [ebp + 0xc]",
        "fadd dword [ebx + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebp + 0x8]",
        "fsub dword [ebx + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + 0xc]",
        "fsub dword [ebx + 0xc]",
        "fstp dword [esp + 0x18]",
        "fld dword [ebp]",
        "fadd dword [ebx]",
        "fstp dword [esp + 0x48]",
        "fld dword [ebx + 0x4]",
        "fadd dword [ebp + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [ebp]",
        "fsub dword [ebx]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebx + 0x4]",
        "fstp dword [esp + 0x58]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [ebp + 0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x50]",
        "fsub dword [esp + 0x4c]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [ebp + 0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x4c]",
        "fadd dword [esp + 0x50]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x30]",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x68]",
        "fmul st1",
        "fld dword [esp + 0x6c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fadd dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp]",
        "sub ebp,0x10",
        "fld dword [esp + 0x34]",
        "mov dword [esp + 0x30],ebp",
        "fmul st1",
        "add ecx,0x10",
        "fld st3",
        "add edx,0x10",
        "fmul st3",
        "add esi,0x10",
        "add edi,0x10",
        "faddp",
        "sub ebx,0x10",
        "fstp dword [ebp + 0x14]",
        "mov ebp,dword [esp + 0x70]",
        "sub ebp,0x10",
        "fstp st0",
        "mov dword [esp + 0x70],ebp",
        "fstp st0",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x14]",
        "sub ebp,0x10",
        "fsub dword [esp + 0x18]",
        "mov dword [esp + 0x24],ebp",
        "fld dword [esp + 0x1c]",
        "mov ebp,dword [esp + 0x7c]",
        "fsub dword [esp + 0x20]",
        "dec ebp",
        "fld dword [esp + 0x2c]",
        "mov dword [esp + 0x7c],ebp",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x18]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fsub dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x14]",
        "fstp st0",
        "fstp st0"
      ],
      "ExpectedArm64ASM": [
        "ldr w9, [x8, #100]",
        "ldr s2, [x9, #8]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w9, w9, #0x10 (16)",
        "str w9, [x8, #100]",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "ldr s3, [x9]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s3, [x9, #4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #40]",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur w9, [x9, #-4]",
        "str w9, [x8, #52]",
        "ldr w9, [x8, #100]",
        "ldr w9, [x9]",
        "str w9, [x8, #56]",
        "ldr w9, [x8, #100]",
        "ldr s3, [x9, #4]",
        "fcvt d3, s3",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x23, x28, x22, lsl #4",
        "fneg v3.2d, v3.2d",
        "fcvt s3, d3",
        "str s3, [x8, #60]",
        "ldur s3, [x11, #-8]",
        "fcvt d3, s3",
        "ldur s4, [x5, #-8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldur s4, [x11, #-4]",
        "fcvt d4, s4",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x12, x28, x22, lsl #4",
        "fneg v4.2d, v4.2d",
        "ldur s5, [x5, #-4]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldur s5, [x11, #-8]",
        "fcvt d5, s5",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s6, [x5, #-8]",
        "fcvt d6, s6",
        "add x13, x28, x22, lsl #4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "ldur s5, [x5, #-4]",
        "fcvt d5, s5",
        "ldur s6, [x11, #-4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #28]",
        "ldr s5, [x11]",
        "fcvt d5, s5",
        "ldr s6, [x5]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #68]",
        "ldr s5, [x11, #4]",
        "fcvt d5, s5",
        "fneg v5.2d, v5.2d",
        "ldr s6, [x5, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #80]",
        "ldr s5, [x11]",
        "fcvt d5, s5",
        "ldr s6, [x5]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #84]",
        "ldr s5, [x5, #4]",
        "fcvt d5, s5",
        "ldr s6, [x11, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #92]",
        "ldur s5, [x7, #-8]",
        "fcvt d5, s5",
        "ldur s6, [x10, #-8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldur s6, [x7, #-4]",
        "fcvt d6, s6",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s7, [x10, #-4]",
        "fcvt d7, s7",
        "add x22, x28, x22, lsl #4",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldur s6, [x10, #-8]",
        "fcvt d6, s6",
        "ldur s7, [x7, #-8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #32]",
        "ldur s6, [x10, #-4]",
        "fcvt d6, s6",
        "ldur s7, [x7, #-4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #24]",
        "ldr s6, [x7]",
        "fcvt d6, s6",
        "ldr s7, [x10]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #72]",
        "ldr s6, [x7, #4]",
        "fcvt d6, s6",
        "ldr s7, [x10, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #76]",
        "ldr s6, [x10]",
        "fcvt d6, s6",
        "ldr s7, [x7]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "ldr s6, [x10, #4]",
        "fcvt d6, s6",
        "ldr s7, [x7, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #88]",
        "fadd d6, d5, d3",
        "fcvt s6, d6",
        "stur s6, [x11, #-8]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fsub d6, d4, d6",
        "fcvt s6, d6",
        "stur s6, [x11, #-4]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x11]",
        "ldr s6, [x8, #80]",
        "fcvt d6, s6",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x11, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "stur s3, [x10, #-8]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "stur s3, [x10, #-4]",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "ldr s4, [x8, #72]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x10]",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x10, #4]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x5, #-8]",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x5, #-4]",
        "ldr s3, [x8, #88]",
        "fcvt d3, s3",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fmul d5, d2, d3",
        "ldr s6, [x8, #52]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x5]",
        "fmul d4, d2, d4",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x5, #4]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #28]",
        "fcvt d4, s4",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr w9, [x8, #36]",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x7, #-8]",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x7, #-4]",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "ldr s4, [x8, #88]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #56]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x7]",
        "ldr s5, [x8, #56]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x7, #4]",
        "ldr s3, [x9, #8]",
        "fcvt d3, s3",
        "ldr w9, [x8, #48]",
        "ldr s4, [x9, #8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr w9, [x8, #36]",
        "ldr s4, [x9, #12]",
        "fcvt d4, s4",
        "ldr w9, [x8, #48]",
        "fneg v4.2d, v4.2d",
        "ldr s5, [x9, #12]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr w9, [x8, #36]",
        "ldr s5, [x9, #8]",
        "fcvt d5, s5",
        "ldr w9, [x8, #48]",
        "ldr s6, [x9, #8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #20]",
        "ldr s5, [x9, #12]",
        "fcvt d5, s5",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #48]",
        "fcvt s5, d5",
        "str s5, [x8, #28]",
        "ldr s5, [x9]",
        "fcvt d5, s5",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #68]",
        "ldr s5, [x9, #4]",
        "fcvt d5, s5",
        "ldr w9, [x8, #48]",
        "fneg v5.2d, v5.2d",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #80]",
        "ldr s5, [x9]",
        "fcvt d5, s5",
        "ldr w9, [x8, #48]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #84]",
        "ldr s5, [x9, #4]",
        "fcvt d5, s5",
        "ldr w9, [x8, #36]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #112]",
        "fcvt s5, d5",
        "str s5, [x8, #92]",
        "ldr s5, [x9, #8]",
        "fcvt d5, s5",
        "ldr s6, [x6, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "ldr s7, [x6, #12]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldr s6, [x9, #8]",
        "fcvt d6, s6",
        "ldr s7, [x6, #8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #32]",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "ldr s7, [x6, #12]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #24]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "ldr s7, [x6]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #72]",
        "ldr s6, [x6, #4]",
        "fcvt d6, s6",
        "ldr s7, [x9, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #76]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "ldr s7, [x6]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "ldr w9, [x8, #36]",
        "ldr s7, [x6, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #88]",
        "fadd d6, d5, d3",
        "fcvt s6, d6",
        "str s6, [x9, #8]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fsub d6, d4, d6",
        "fcvt s6, d6",
        "str s6, [x9, #12]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x9]",
        "ldr s6, [x8, #80]",
        "fcvt d6, s6",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x9, #4]",
        "ldr w9, [x8, #112]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "str s3, [x9, #8]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "ldr s4, [x8, #72]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x9]",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x9, #4]",
        "ldr w9, [x8, #48]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "ldr s6, [x8, #108]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x9, #8]",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #88]",
        "fcvt d3, s3",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "fmul d6, d2, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x9]",
        "sub w9, w9, #0x10 (16)",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "str w9, [x8, #48]",
        "fmul d4, d5, d4",
        "add w7, w7, #0x10 (16)",
        "add w5, w5, #0x10 (16)",
        "fmul d3, d2, d3",
        "add w10, w10, #0x10 (16)",
        "add w11, w11, #0x10 (16)",
        "fadd d3, d4, d3",
        "sub w6, w6, #0x10 (16)",
        "fcvt s3, d3",
        "str s3, [x9, #20]",
        "ldr w9, [x8, #112]",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #112]",
        "ldr w9, [x8, #36]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "subs w9, w9, #0x10 (16)",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "str w9, [x8, #36]",
        "ldr s4, [x8, #28]",
        "fcvt d4, s4",
        "ldr w9, [x8, #124]",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "cset x14, hs",
        "subs w26, w9, #0x1 (1)",
        "rmif x14, #63, #nzCv",
        "mov x27, x9",
        "mov x9, x26",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "str w9, [x8, #124]",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #40]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x6, #24]",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x6, #28]",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "ldr s4, [x8, #88]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #56]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #60]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x6, #16]",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #56]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fsub d5, d5, d6",
        "fcvt s7, d5",
        "str s7, [x6, #20]",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]",
        "str d6, [x22, #1056]",
        "str d5, [x13, #1056]",
        "str d4, [x12, #1056]",
        "str d3, [x23, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf0f0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block5": {
      "x86InstructionCount": 346,
      "ExpectedInstructionCount": 643,
      "x86Insts": [
        "mov ebp,dword [esp + 0x64]",
        "fadd dword [ebp + 0x8]",
        "add ebp,0x10",
        "mov dword [esp + 0x64],ebp",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x6c]",
        "fld dword [esp + 0x34]",
        "fadd dword [ebp + -0x4]",
        "fmul dword [esp + 0x74]",
        "fstp dword [esp + 0x68]",
        "fld dword [esp + 0x38]",
        "fadd dword [ebp]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x3c]",
        "fsub dword [ebp + 0x4]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x8]",
        "mov ebp,dword [ebp + -0x4]",
        "mov dword [esp + 0x34],ebp",
        "mov ebp,dword [esp + 0x64]",
        "mov ebp,dword [ebp]",
        "mov dword [esp + 0x38],ebp",
        "mov ebp,dword [esp + 0x64]",
        "fld dword [ebp + 0x4]",
        "fchs",
        "fstp dword [esp + 0x3c]",
        "fld dword [edx + -0x8]",
        "fadd dword [edi + -0x8]",
        "fld dword [edi + -0x4]",
        "fadd dword [edx + -0x4]",
        "fld dword [edi + -0x8]",
        "fsub dword [edx + -0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [edi + -0x4]",
        "fsub dword [edx + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [edi]",
        "fadd dword [edx]",
        "fstp dword [esp + 0x44]",
        "fld dword [edi + 0x4]",
        "fadd dword [edx + 0x4]",
        "fstp dword [esp + 0x4c]",
        "fld dword [edi]",
        "fsub dword [edx]",
        "fstp dword [esp + 0x58]",
        "fld dword [edi + 0x4]",
        "fsub dword [edx + 0x4]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ecx + -0x8]",
        "fadd dword [esi + -0x8]",
        "fld dword [ecx + -0x4]",
        "fadd dword [esi + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [esi + -0x8]",
        "fsub dword [ecx + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [esi + -0x4]",
        "fsub dword [ecx + -0x4]",
        "fstp dword [esp + 0x14]",
        "fld dword [ecx]",
        "fadd dword [esi]",
        "fstp dword [esp + 0x48]",
        "fld dword [ecx + 0x4]",
        "fadd dword [esi + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [esi]",
        "fsub dword [ecx]",
        "fstp dword [esp + 0x60]",
        "fld dword [esi + 0x4]",
        "fsub dword [ecx + 0x4]",
        "fstp dword [esp + 0x54]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [edi + -0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [edi + -0x4]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [edi]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [edi + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [esi + -0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [esi + -0x4]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [esi]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fstp dword [esi + 0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx + -0x8]",
        "fld dword [esp + 0x6c]",
        "fmul st1",
        "fld dword [esp + 0x68]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld st2",
        "fmul st2",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fsubp",
        "fstp dword [edx]",
        "fld st2",
        "fmul st1",
        "fld dword [esp + 0x34]",
        "fmul st3",
        "faddp",
        "fstp dword [edx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "faddp",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [ecx + -0x8]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "faddp",
        "fstp dword [ecx]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "fsubp",
        "fstp dword [ecx + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0x8]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x18]",
        "fld dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0xc]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x1c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x44]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fadd dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x4c]",
        "fld dword [ebp]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp]",
        "mov ebp,dword [esp + 0x24]",
        "fstp dword [esp + 0x58]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fsub dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fstp dword [esp + 0x5c]",
        "fld dword [ebp + 0x8]",
        "fadd dword [ebx + 0x8]",
        "fld dword [ebp + 0xc]",
        "fadd dword [ebx + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [ebp + 0x8]",
        "fsub dword [ebx + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + 0xc]",
        "fsub dword [ebx + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld dword [ebx]",
        "fadd dword [ebp]",
        "fstp dword [esp + 0x48]",
        "fld dword [ebp + 0x4]",
        "fadd dword [ebx + 0x4]",
        "fstp dword [esp + 0x50]",
        "fld dword [ebp]",
        "fsub dword [ebx]",
        "fstp dword [esp + 0x60]",
        "fld dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x24]",
        "fsub dword [ebx + 0x4]",
        "fstp dword [esp + 0x54]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x48]",
        "fadd dword [esp + 0x44]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x50]",
        "fadd dword [esp + 0x4c]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x70]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [ebp + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [ebp + 0xc]",
        "fld dword [esp + 0x44]",
        "fsub dword [esp + 0x48]",
        "fstp dword [ebp]",
        "fld dword [esp + 0x4c]",
        "fsub dword [esp + 0x50]",
        "fstp dword [ebp + 0x4]",
        "mov ebp,dword [esp + 0x28]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld dword [esp + 0x68]",
        "fmul st2",
        "fld dword [esp + 0x6c]",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp + 0x8]",
        "fld dword [esp + 0x68]",
        "fmul st1",
        "fld dword [esp + 0x6c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebp + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x58]",
        "fsub dword [esp + 0x54]",
        "fld dword [esp + 0x60]",
        "fadd dword [esp + 0x5c]",
        "fld dword [esp + 0x34]",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [ebp]",
        "fld dword [esp + 0x34]",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "sub ebp,0x10",
        "mov dword [esp + 0x28],ebp",
        "add ecx,0x10",
        "faddp",
        "add edx,0x10",
        "add esi,0x10",
        "fstp dword [ebp + 0x14]",
        "mov ebp,dword [esp + 0x70]",
        "sub ebp,0x10",
        "fstp st0",
        "mov dword [esp + 0x70],ebp",
        "fstp st0",
        "mov ebp,dword [esp + 0x24]",
        "fld dword [esp + 0x14]",
        "sub ebp,0x10",
        "fadd dword [esp + 0x18]",
        "mov dword [esp + 0x24],ebp",
        "fld dword [esp + 0x1c]",
        "mov ebp,dword [esp + 0x7c]",
        "fsub dword [esp + 0x20]",
        "add edi,0x10",
        "fld dword [esp + 0x30]",
        "sub ebx,0x10",
        "dec ebp",
        "fmul st1",
        "mov dword [esp + 0x7c],ebp",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x18]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x1c]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x54]",
        "fadd dword [esp + 0x58]",
        "fld dword [esp + 0x5c]",
        "fsub dword [esp + 0x60]",
        "fld dword [esp + 0x38]",
        "fmul st1",
        "fld dword [esp + 0x3c]",
        "fmul st3",
        "faddp",
        "fstp dword [ebx + 0x10]",
        "fld dword [esp + 0x3c]",
        "fmul st1",
        "fld dword [esp + 0x38]",
        "fmul st3",
        "fsubp",
        "fstp dword [ebx + 0x14]",
        "fstp st0",
        "fstp st0"
      ],
      "ExpectedArm64ASM": [
        "ldr w9, [x8, #100]",
        "ldr s2, [x9, #8]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w9, w9, #0x10 (16)",
        "str w9, [x8, #100]",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #108]",
        "ldr s2, [x8, #52]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #104]",
        "ldr s2, [x8, #56]",
        "fcvt d2, s2",
        "ldr s3, [x9]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #48]",
        "ldr s2, [x8, #60]",
        "fcvt d2, s2",
        "ldr s3, [x9, #4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x8, #120]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x8, #44]",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur w9, [x9, #-4]",
        "str w9, [x8, #52]",
        "ldr w9, [x8, #100]",
        "ldr w9, [x9]",
        "str w9, [x8, #56]",
        "ldr w9, [x8, #100]",
        "ldr s3, [x9, #4]",
        "fcvt d3, s3",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x23, x28, x22, lsl #4",
        "fneg v3.2d, v3.2d",
        "fcvt s3, d3",
        "str s3, [x8, #60]",
        "ldur s3, [x5, #-8]",
        "fcvt d3, s3",
        "ldur s4, [x11, #-8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldur s4, [x11, #-4]",
        "fcvt d4, s4",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s5, [x5, #-4]",
        "fcvt d5, s5",
        "add x12, x28, x22, lsl #4",
        "fadd d4, d4, d5",
        "ldur s5, [x11, #-8]",
        "fcvt d5, s5",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s6, [x5, #-8]",
        "fcvt d6, s6",
        "add x13, x28, x22, lsl #4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #24]",
        "ldur s5, [x11, #-4]",
        "fcvt d5, s5",
        "ldur s6, [x5, #-4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #28]",
        "ldr s5, [x11]",
        "fcvt d5, s5",
        "ldr s6, [x5]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #68]",
        "ldr s5, [x11, #4]",
        "fcvt d5, s5",
        "ldr s6, [x5, #4]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #76]",
        "ldr s5, [x11]",
        "fcvt d5, s5",
        "ldr s6, [x5]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #88]",
        "ldr s5, [x11, #4]",
        "fcvt d5, s5",
        "ldr s6, [x5, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x8, #92]",
        "ldur s5, [x7, #-8]",
        "fcvt d5, s5",
        "ldur s6, [x10, #-8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldur s6, [x7, #-4]",
        "fcvt d6, s6",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldur s7, [x10, #-4]",
        "fcvt d7, s7",
        "add x22, x28, x22, lsl #4",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldur s6, [x10, #-8]",
        "fcvt d6, s6",
        "ldur s7, [x7, #-8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #32]",
        "ldur s6, [x10, #-4]",
        "fcvt d6, s6",
        "ldur s7, [x7, #-4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "ldr s6, [x7]",
        "fcvt d6, s6",
        "ldr s7, [x10]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #72]",
        "ldr s6, [x7, #4]",
        "fcvt d6, s6",
        "ldr s7, [x10, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #80]",
        "ldr s6, [x10]",
        "fcvt d6, s6",
        "ldr s7, [x7]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "ldr s6, [x10, #4]",
        "fcvt d6, s6",
        "ldr s7, [x7, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #84]",
        "fadd d6, d5, d3",
        "fcvt s6, d6",
        "stur s6, [x11, #-8]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fadd d6, d6, d4",
        "fcvt s6, d6",
        "stur s6, [x11, #-4]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x11]",
        "ldr s6, [x8, #80]",
        "fcvt d6, s6",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x11, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "stur s3, [x10, #-8]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x10, #-4]",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "ldr s4, [x8, #72]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x10]",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x10, #4]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "ldr s6, [x8, #104]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "stur s5, [x5, #-8]",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x5, #-4]",
        "ldr s3, [x8, #88]",
        "fcvt d3, s3",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "fmul d5, d2, d3",
        "ldr s6, [x8, #52]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x5]",
        "fmul d4, d2, d4",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x5, #4]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #28]",
        "fcvt d4, s4",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #48]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "stur s5, [x7, #-8]",
        "ldr s5, [x8, #48]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "stur s3, [x7, #-4]",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "ldr s4, [x8, #88]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #56]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x7]",
        "ldr s5, [x8, #56]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x7, #4]",
        "ldr s3, [x9, #8]",
        "fcvt d3, s3",
        "ldr w9, [x8, #40]",
        "ldr s4, [x9, #8]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr w9, [x8, #36]",
        "ldr s4, [x9, #12]",
        "fcvt d4, s4",
        "ldr w9, [x8, #40]",
        "ldr s5, [x9, #12]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr w9, [x8, #36]",
        "ldr s5, [x9, #8]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #8]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #24]",
        "ldr s5, [x9, #12]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #28]",
        "ldr s5, [x9]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #68]",
        "ldr s5, [x9, #4]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #76]",
        "ldr s5, [x9]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #36]",
        "fcvt s5, d5",
        "str s5, [x8, #88]",
        "ldr s5, [x9, #4]",
        "fcvt d5, s5",
        "ldr w9, [x8, #40]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr w9, [x8, #112]",
        "fcvt s5, d5",
        "str s5, [x8, #92]",
        "ldr s5, [x9, #8]",
        "fcvt d5, s5",
        "ldr s6, [x6, #8]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "ldr s7, [x6, #12]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #16]",
        "ldr s6, [x9, #8]",
        "fcvt d6, s6",
        "ldr s7, [x6, #8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #32]",
        "ldr s6, [x9, #12]",
        "fcvt d6, s6",
        "ldr s7, [x6, #12]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "ldr s6, [x6]",
        "fcvt d6, s6",
        "ldr s7, [x9]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #72]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "ldr s7, [x6, #4]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #80]",
        "ldr s6, [x9]",
        "fcvt d6, s6",
        "ldr s7, [x6]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #96]",
        "ldr s6, [x9, #4]",
        "fcvt d6, s6",
        "ldr w9, [x8, #36]",
        "ldr s7, [x6, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #84]",
        "fadd d6, d5, d3",
        "fcvt s6, d6",
        "str s6, [x9, #8]",
        "ldr s6, [x8, #16]",
        "fcvt d6, s6",
        "fadd d6, d6, d4",
        "fcvt s6, d6",
        "str s6, [x9, #12]",
        "ldr s6, [x8, #72]",
        "fcvt d6, s6",
        "ldr s7, [x8, #68]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x9]",
        "ldr s6, [x8, #80]",
        "fcvt d6, s6",
        "ldr s7, [x8, #76]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x9, #4]",
        "ldr w9, [x8, #112]",
        "strb wzr, [x28, #1049]",
        "fsub d3, d3, d5",
        "fcvt s3, d3",
        "str s3, [x9, #8]",
        "ldr s3, [x8, #16]",
        "fcvt d3, s3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #68]",
        "fcvt d3, s3",
        "ldr s4, [x8, #72]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x9]",
        "ldr s3, [x8, #76]",
        "fcvt d3, s3",
        "ldr s4, [x8, #80]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "fcvt s3, d3",
        "str s3, [x9, #4]",
        "ldr w9, [x8, #40]",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "ldr s6, [x8, #108]",
        "fcvt d6, s6",
        "fmul d6, d6, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x9, #8]",
        "ldr s5, [x8, #104]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #108]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fadd d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x9, #12]",
        "ldr s3, [x8, #88]",
        "fcvt d3, s3",
        "ldr s4, [x8, #84]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #96]",
        "fcvt d4, s4",
        "ldr s5, [x8, #92]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fmul d5, d5, d3",
        "fmul d6, d2, d4",
        "fsub d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x9]",
        "ldr s5, [x8, #52]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "fmul d3, d2, d3",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #40]",
        "add w7, w7, #0x10 (16)",
        "fadd d3, d4, d3",
        "add w5, w5, #0x10 (16)",
        "add w10, w10, #0x10 (16)",
        "fcvt s3, d3",
        "str s3, [x9, #20]",
        "ldr w9, [x8, #112]",
        "sub w9, w9, #0x10 (16)",
        "str w9, [x8, #112]",
        "ldr w9, [x8, #36]",
        "ldr s3, [x8, #20]",
        "fcvt d3, s3",
        "sub w9, w9, #0x10 (16)",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "str w9, [x8, #36]",
        "ldr s4, [x8, #28]",
        "fcvt d4, s4",
        "ldr w9, [x8, #124]",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "add w11, w11, #0x10 (16)",
        "ldr s5, [x8, #48]",
        "fcvt d5, s5",
        "subs w6, w6, #0x10 (16)",
        "cset x14, hs",
        "subs w26, w9, #0x1 (1)",
        "rmif x14, #63, #nzCv",
        "mov x27, x9",
        "mov x9, x26",
        "fmul d5, d5, d4",
        "str w9, [x8, #124]",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x6, #24]",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d4, d5, d4",
        "ldr s5, [x8, #48]",
        "fcvt d5, s5",
        "fmul d3, d5, d3",
        "fsub d3, d4, d3",
        "fcvt s3, d3",
        "str s3, [x6, #28]",
        "ldr s3, [x8, #84]",
        "fcvt d3, s3",
        "ldr s4, [x8, #88]",
        "fcvt d4, s4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #92]",
        "fcvt d4, s4",
        "ldr s5, [x8, #96]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #56]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #60]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fadd d5, d5, d6",
        "fcvt s5, d5",
        "str s5, [x6, #16]",
        "ldr s5, [x8, #60]",
        "fcvt d5, s5",
        "fmul d5, d5, d4",
        "ldr s6, [x8, #56]",
        "fcvt d6, s6",
        "fmul d6, d6, d3",
        "fsub d5, d5, d6",
        "fcvt s7, d5",
        "str s7, [x6, #20]",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]",
        "str d6, [x22, #1056]",
        "str d5, [x13, #1056]",
        "str d4, [x12, #1056]",
        "str d3, [x23, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w22, w22, w20",
        "orr w21, w21, w22",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xf0f0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block6": {
      "x86InstructionCount": 409,
      "ExpectedInstructionCount": 556,
      "x86Insts": [
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x30]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x2c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x28]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x24]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x20]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x1c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x18]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x14]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x10]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0xc]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fld dword [ebp + -0x30]",
        "fstp dword [esp + 0x40]",
        "fld dword [ebp + -0x2c]",
        "fstp dword [esp + 0x3c]",
        "fld dword [ebp + -0x28]",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + -0x24]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebp + -0x20]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebp + -0x1c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x18]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x14]",
        "fstp dword [esp + 0x24]",
        "fld dword [ebp + -0x10]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + -0xc]",
        "fstp dword [esp + 0x1c]",
        "fxch st5",
        "fstp dword [esp + 0x18]",
        "fxch st3",
        "fstp dword [esp + 0x14]",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fstp dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp],ebx",
        "call 0x0818d57a"
      ],
      "ExpectedArm64ASM": [
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-48]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-44]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-40]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-36]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-32]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-28]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-24]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-20]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-16]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-12]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #16]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #32]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #48]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #12]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #4]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #28]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #8]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #44]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #12]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #60]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #8]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #4]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #24]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #8]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #40]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #12]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #56]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #20]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #8]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #36]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #52]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #4]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #16]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #8]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #32]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #12]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #48]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldur s8, [x9, #-48]",
        "str s8, [x8, #64]",
        "ldur s8, [x9, #-44]",
        "str s8, [x8, #60]",
        "ldur s8, [x9, #-40]",
        "str s8, [x8, #56]",
        "ldur s8, [x9, #-36]",
        "str s8, [x8, #52]",
        "ldur s8, [x9, #-32]",
        "str s8, [x8, #48]",
        "ldur s8, [x9, #-28]",
        "str s8, [x8, #44]",
        "ldur s8, [x9, #-24]",
        "str s8, [x8, #40]",
        "ldur s8, [x9, #-20]",
        "str s8, [x8, #36]",
        "ldur s8, [x9, #-16]",
        "str s8, [x8, #32]",
        "ldur s8, [x9, #-12]",
        "str s8, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d3",
        "str s2, [x8, #20]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d4",
        "str s2, [x8, #16]",
        "fcvt s2, d5",
        "str s2, [x8, #12]",
        "fcvt s2, d6",
        "str s2, [x8, #8]",
        "fcvt s2, d7",
        "str s2, [x8, #4]",
        "str w6, [x8]",
        "mov w20, #0x462",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block7": {
      "x86InstructionCount": 418,
      "ExpectedInstructionCount": 563,
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "push ebx",
        "sub esp,0x84",
        "mov ebx,dword [ebp + 0x8]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x30]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x2c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x28]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x30]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x34]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x38]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x3c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x24]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x20]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x1c]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x18]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x20]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x24]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x28]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x2c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x14]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0x10]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "fstp dword [ebp + -0xc]",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x10]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x14]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x18]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x1c]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0xc]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x1c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x2c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x3c]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x8]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x18]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x28]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x38]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x4]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x14]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x24]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x34]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax]",
        "fmulp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x4]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x10]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0x8]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x20]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x10]",
        "fld dword [eax + 0xc]",
        "mov eax,dword [ebp + 0xc]",
        "fld dword [eax + 0x30]",
        "fmulp",
        "faddp",
        "fld dword [ebp + -0x30]",
        "fstp dword [esp + 0x40]",
        "fld dword [ebp + -0x2c]",
        "fstp dword [esp + 0x3c]",
        "fld dword [ebp + -0x28]",
        "fstp dword [esp + 0x38]",
        "fld dword [ebp + -0x24]",
        "fstp dword [esp + 0x34]",
        "fld dword [ebp + -0x20]",
        "fstp dword [esp + 0x30]",
        "fld dword [ebp + -0x1c]",
        "fstp dword [esp + 0x2c]",
        "fld dword [ebp + -0x18]",
        "fstp dword [esp + 0x28]",
        "fld dword [ebp + -0x14]",
        "fstp dword [esp + 0x24]",
        "fld dword [ebp + -0x10]",
        "fstp dword [esp + 0x20]",
        "fld dword [ebp + -0xc]",
        "fstp dword [esp + 0x1c]",
        "fxch st5",
        "fstp dword [esp + 0x18]",
        "fxch st3",
        "fstp dword [esp + 0x14]",
        "fxch",
        "fstp dword [esp + 0x10]",
        "fstp dword [esp + 0xc]",
        "fstp dword [esp + 0x8]",
        "fstp dword [esp + 0x4]",
        "mov dword [esp],ebx",
        "call 0x0818d57a",
        "mov eax,ebx",
        "add esp,0x84",
        "pop ebx",
        "pop ebp"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "str w6, [x8, #-4]!",
        "subs w26, w8, #0x84 (132)",
        "mov x27, x8",
        "mov x8, x26",
        "ldr w6, [x9, #8]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-48]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-44]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-40]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #48]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #52]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #56]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #60]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-36]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-32]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-28]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-24]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #32]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #36]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #16]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #40]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #32]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #44]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #48]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-20]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #12]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #44]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #60]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-16]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #8]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #40]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #56]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-12]",
        "ldr w4, [x9, #16]",
        "ldr s2, [x4, #16]",
        "fcvt d2, s2",
        "ldr w4, [x9, #12]",
        "ldr s3, [x4, #4]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #20]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #24]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #36]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #28]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4, #52]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #16]",
        "ldr s3, [x4, #16]",
        "fcvt d3, s3",
        "ldr w4, [x9, #12]",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #20]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #16]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #24]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #32]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4, #28]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #48]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fadd d3, d3, d4",
        "ldr w4, [x9, #16]",
        "ldr s4, [x4]",
        "fcvt d4, s4",
        "ldr w4, [x9, #12]",
        "ldr s5, [x4, #12]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #4]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #28]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #8]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #44]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4, #12]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #60]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "fadd d4, d4, d5",
        "ldr w4, [x9, #16]",
        "ldr s5, [x4]",
        "fcvt d5, s5",
        "ldr w4, [x9, #12]",
        "ldr s6, [x4, #8]",
        "fcvt d6, s6",
        "fmul d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #4]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #24]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #8]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #40]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4, #12]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #56]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "fadd d5, d5, d6",
        "ldr w4, [x9, #16]",
        "ldr s6, [x4]",
        "fcvt d6, s6",
        "ldr w4, [x9, #12]",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #4]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #20]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #8]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #36]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4, #12]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4, #52]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "fadd d6, d6, d7",
        "ldr w4, [x9, #16]",
        "ldr s7, [x4]",
        "fcvt d7, s7",
        "ldr w4, [x9, #12]",
        "ldr s8, [x4]",
        "fcvt d8, s8",
        "fmul d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #4]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #16]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #8]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #32]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldr w4, [x9, #16]",
        "ldr s8, [x4, #12]",
        "fcvt d8, s8",
        "ldr w4, [x9, #12]",
        "ldr s9, [x4, #48]",
        "fcvt d9, s9",
        "fmul d8, d8, d9",
        "fadd d7, d7, d8",
        "ldur s8, [x9, #-48]",
        "str s8, [x8, #64]",
        "ldur s8, [x9, #-44]",
        "str s8, [x8, #60]",
        "ldur s8, [x9, #-40]",
        "str s8, [x8, #56]",
        "ldur s8, [x9, #-36]",
        "str s8, [x8, #52]",
        "ldur s8, [x9, #-32]",
        "str s8, [x8, #48]",
        "ldur s8, [x9, #-28]",
        "str s8, [x8, #44]",
        "ldur s8, [x9, #-24]",
        "str s8, [x8, #40]",
        "ldur s8, [x9, #-20]",
        "str s8, [x8, #36]",
        "ldur s8, [x9, #-16]",
        "str s8, [x8, #32]",
        "ldur s8, [x9, #-12]",
        "str s8, [x8, #28]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d2",
        "str s2, [x8, #24]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d3",
        "str s2, [x8, #20]",
        "strb wzr, [x28, #1049]",
        "fcvt s2, d4",
        "str s2, [x8, #16]",
        "fcvt s2, d5",
        "str s2, [x8, #12]",
        "fcvt s2, d6",
        "str s2, [x8, #8]",
        "fcvt s2, d7",
        "str s2, [x8, #4]",
        "str w6, [x8]",
        "mov w20, #0x46f",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "strb wzr, [x28, #1202]"
      ]
    },
    "Block8": {
      "x86InstructionCount": 231,
      "ExpectedInstructionCount": 497,
      "x86Insts": [
        "fadd dword [esp + 0x40]",
        "lea edx,[ecx + ecx*0x2]",
        "lea esi,[edx + ecx*0x2]",
        "lea ebx,[ecx + -0x2]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x34]",
        "lea edi,[esi + ecx*0x2]",
        "fadd dword [esp + 0x40]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x28]",
        "fld dword [eax + esi*0x4 + -0x8]",
        "fadd dword [eax + ebx*0x4]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fchs",
        "fsub dword [eax + esi*0x4 + -0x4]",
        "fld dword [eax + ebx*0x4]",
        "fsub dword [eax + esi*0x4 + -0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + -0x4]",
        "fsub dword [eax + ecx*0x4 + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fadd dword [eax + edi*0x4 + -0x8]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fadd dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fsub dword [eax + edi*0x4 + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fsub dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ebx*0x4]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + -0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + -0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + -0x4]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st3",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + -0x8]",
        "fld st3",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "faddp",
        "fstp dword [eax + esi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + -0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4]",
        "fadd dword [eax + esi*0x4]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fchs",
        "fsub dword [eax + esi*0x4 + 0x4]",
        "fld dword [eax + ecx*0x4]",
        "fsub dword [eax + esi*0x4]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + 0x4]",
        "fsub dword [eax + ecx*0x4 + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4]",
        "fadd dword [eax + edx*0x4]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fadd dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4]",
        "fsub dword [eax + edi*0x4]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fsub dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4]",
        "fadd st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld st1",
        "fadd st0,st3",
        "fmul st1",
        "fstp dword [eax + edi*0x4]",
        "fxch",
        "fsub st0,st2",
        "fmul st1",
        "fstp dword [eax + edi*0x4 + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fadd dword [eax + esi*0x4 + 0x8]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fchs",
        "fsub dword [eax + esi*0x4 + 0xc]",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fsub dword [eax + esi*0x4 + 0x8]",
        "fstp dword [esp + 0x14]",
        "fld dword [eax + esi*0x4 + 0xc]",
        "fsub dword [eax + ecx*0x4 + 0xc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + 0x8]",
        "fadd dword [eax + edx*0x4 + 0x8]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fadd dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + 0x8]",
        "fsub dword [eax + edi*0x4 + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fsub dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x18]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4 + 0x8]",
        "fld st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + ecx*0x4 + 0xc]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + 0x8]",
        "fstp st1",
        "fld dword [esp + 0x10]",
        "fadd st0,st1",
        "fstp dword [eax + edx*0x4 + 0xc]",
        "fstp st0",
        "fld dword [esp + 0x18]",
        "fadd dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st2",
        "fmul st2",
        "fld st4",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + 0x8]",
        "fxch st2",
        "fmul st2",
        "fxch st3",
        "fmul st1",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [eax + esi*0x4 + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fsub dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x28]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + 0x8]",
        "fld dword [esp + 0x28]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + 0xc]",
        "pop edi",
        "pop esi",
        "fstp st0",
        "pop ebp",
        "fstp st0",
        "pop ebx",
        "add esp,0x74"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w5, w7, w7, lsl #1",
        "add w10, w5, w7, lsl #1",
        "sub w6, w7, #0x2 (2)",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #52]",
        "fcvt d3, s3",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w11, w10, w7, lsl #1",
        "ldr s4, [x8, #64]",
        "fcvt d4, s4",
        "add x23, x28, x22, lsl #4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #116]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr s4, [x8, #56]",
        "fcvt d4, s4",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "add x12, x28, x22, lsl #4",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #44]",
        "ldr s4, [x8, #60]",
        "fcvt d4, s4",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #40]",
        "add w13, w4, w10, lsl #2",
        "ldur s4, [x13, #-8]",
        "fcvt d4, s4",
        "add w13, w4, w6, lsl #2",
        "ldr s5, [x13]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w13, w4, w7, lsl #2",
        "ldur s5, [x13, #-4]",
        "fcvt d5, s5",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add x13, x28, x22, lsl #4",
        "fneg v5.2d, v5.2d",
        "add w14, w4, w10, lsl #2",
        "ldur s6, [x14, #-4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "add w14, w4, w6, lsl #2",
        "ldr s6, [x14]",
        "fcvt d6, s6",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w14, w4, w10, lsl #2",
        "ldur s7, [x14, #-8]",
        "fcvt d7, s7",
        "add x14, x28, x22, lsl #4",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldur s6, [x15, #-4]",
        "fcvt d6, s6",
        "add w15, w4, w7, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w5, lsl #2",
        "ldur s6, [x15, #-8]",
        "fcvt d6, s6",
        "add w15, w4, w11, lsl #2",
        "ldur s7, [x15, #-8]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "fcvt d8, s8",
        "add x22, x28, x22, lsl #4",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #24]",
        "fadd d7, d6, d4",
        "add w15, w4, w6, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fsub d7, d5, d7",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "stur s7, [x15, #-4]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-8]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fadd d4, d4, d5",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fmul d6, d2, d4",
        "fmul d7, d3, d5",
        "fsub d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s6, d6",
        "stur s6, [x15, #-8]",
        "fmul d5, d2, d5",
        "fmul d4, d3, d4",
        "fadd d4, d5, d4",
        "add w15, w4, w10, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "ldr s5, [x8, #24]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "ldr s6, [x8, #32]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #40]",
        "fcvt d6, s6",
        "fmul d6, d6, d5",
        "ldr s7, [x8, #44]",
        "fcvt d7, s7",
        "fmul d7, d7, d4",
        "fadd d6, d6, d7",
        "add w15, w4, w11, lsl #2",
        "fcvt s6, d6",
        "stur s6, [x15, #-8]",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fmul d5, d6, d5",
        "ldr s6, [x8, #40]",
        "fcvt d6, s6",
        "fmul d4, d6, d4",
        "fsub d4, d5, d4",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15]",
        "fcvt d4, s4",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #4]",
        "fcvt d5, s5",
        "fneg v5.2d, v5.2d",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "fcvt d6, s6",
        "add w15, w4, w7, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15]",
        "fcvt d6, s6",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #24]",
        "fadd d7, d6, d4",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fsub d7, d5, d7",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fadd d4, d4, d5",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fsub d6, d4, d5",
        "ldr s7, [x8, #64]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s6, d6",
        "str s6, [x15]",
        "fadd d4, d5, d4",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "add w15, w4, w10, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "ldr s5, [x8, #24]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "ldr s6, [x8, #32]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #64]",
        "fcvt d6, s6",
        "fneg v6.2d, v6.2d",
        "fadd d7, d5, d4",
        "fmul d7, d7, d6",
        "add w15, w4, w11, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d5, d4",
        "fmul d4, d4, d6",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15, #8]",
        "fcvt d4, s4",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15, #8]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #12]",
        "fcvt d5, s5",
        "fneg v5.2d, v5.2d",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #8]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #20]",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "fcvt d6, s6",
        "add w15, w4, w7, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15, #8]",
        "fcvt d6, s6",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #24]",
        "fadd d7, d6, d4",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #8]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fsub d7, d5, d7",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #12]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fadd d4, d4, d5",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #12]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fmul d6, d3, d4",
        "fmul d7, d2, d5",
        "fsub d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s8, d6",
        "str s8, [x15, #8]",
        "strb wzr, [x28, #1049]",
        "fmul d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fmul d2, d2, d4",
        "fadd d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "add w15, w4, w10, lsl #2",
        "fcvt s2, d2",
        "str s2, [x15, #12]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x8, #28]",
        "fcvt d3, s3",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #44]",
        "fcvt d4, s4",
        "fmul d4, d4, d3",
        "ldr s5, [x8, #40]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fadd d4, d4, d5",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #40]",
        "fcvt d4, s4",
        "fmul d4, d4, d3",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fsub d4, d4, d5",
        "add w15, w4, w11, lsl #2",
        "fcvt s8, d4",
        "str s8, [x15, #12]",
        "ldp w11, w10, [x8], #8",
        "ldp w9, w6, [x8], #8",
        "mvn w27, w8",
        "adds w26, w8, #0x74 (116)",
        "cfinv",
        "mov x8, x26",
        "strb w20, [x28, #1051]",
        "str d7, [x22, #1056]",
        "str d6, [x14, #1056]",
        "str d5, [x13, #1056]",
        "str d4, [x12, #1056]",
        "str d3, [x23, #1056]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfcfc",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block9": {
      "x86InstructionCount": 222,
      "ExpectedInstructionCount": 494,
      "x86Insts": [
        "fadd dword [esp + 0x40]",
        "lea edx,[ecx + ecx*0x2]",
        "lea esi,[edx + ecx*0x2]",
        "lea ebx,[ecx + -0x2]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x34]",
        "lea edi,[esi + ecx*0x2]",
        "fadd dword [esp + 0x40]",
        "fmul dword [esp + 0x74]",
        "fld dword [esp + 0x38]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x30]",
        "fld dword [esp + 0x3c]",
        "fsub dword [esp + 0x40]",
        "fmul dword [esp + 0x78]",
        "fstp dword [esp + 0x2c]",
        "fld dword [eax + esi*0x4 + -0x8]",
        "fadd dword [eax + ebx*0x4]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fadd dword [eax + esi*0x4 + -0x4]",
        "fld dword [eax + ebx*0x4]",
        "fsub dword [eax + esi*0x4 + -0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + -0x4]",
        "fsub dword [eax + esi*0x4 + -0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + -0x8]",
        "fadd dword [eax + edx*0x4 + -0x8]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fadd dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + -0x8]",
        "fsub dword [eax + edi*0x4 + -0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + -0x4]",
        "fsub dword [eax + edi*0x4 + -0x4]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ebx*0x4]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + -0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + -0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + -0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st3",
        "fmul st2",
        "fld st3",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + -0x8]",
        "fld st3",
        "fmul st1",
        "fld st3",
        "fmul st3",
        "faddp",
        "fstp dword [eax + esi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + -0x8]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + -0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4]",
        "fadd dword [eax + esi*0x4]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fadd dword [eax + esi*0x4 + 0x4]",
        "fld dword [eax + ecx*0x4]",
        "fsub dword [eax + esi*0x4]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + 0x4]",
        "fsub dword [eax + esi*0x4 + 0x4]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edx*0x4]",
        "fadd dword [eax + edi*0x4]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fadd dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4]",
        "fsub dword [eax + edi*0x4]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0x4]",
        "fsub dword [eax + edi*0x4 + 0x4]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + 0x4]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + 0x4]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st1",
        "fsub st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4]",
        "fadd st0,st1",
        "fmul dword [esp + 0x40]",
        "fstp dword [eax + esi*0x4 + 0x4]",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x40]",
        "fchs",
        "fld st1",
        "fadd st0,st3",
        "fmul st1",
        "fstp dword [eax + edi*0x4]",
        "fxch",
        "fsub st0,st2",
        "fmul st1",
        "fstp dword [eax + edi*0x4 + 0x4]",
        "fstp st0",
        "fstp st0",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fadd dword [eax + esi*0x4 + 0x8]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fadd dword [eax + esi*0x4 + 0xc]",
        "fld dword [eax + ecx*0x4 + 0x8]",
        "fsub dword [eax + esi*0x4 + 0x8]",
        "fstp dword [esp + 0x18]",
        "fld dword [eax + ecx*0x4 + 0xc]",
        "fsub dword [eax + esi*0x4 + 0xc]",
        "fstp dword [esp + 0x1c]",
        "fld dword [eax + edi*0x4 + 0x8]",
        "fadd dword [eax + edx*0x4 + 0x8]",
        "fld dword [eax + edi*0x4 + 0xc]",
        "fadd dword [eax + edx*0x4 + 0xc]",
        "fstp dword [esp + 0x10]",
        "fld dword [eax + edx*0x4 + 0x8]",
        "fsub dword [eax + edi*0x4 + 0x8]",
        "fstp dword [esp + 0x20]",
        "fld dword [eax + edx*0x4 + 0xc]",
        "fsub dword [eax + edi*0x4 + 0xc]",
        "fstp dword [esp + 0x14]",
        "fld st0",
        "fadd st0,st3",
        "fstp dword [eax + ecx*0x4 + 0x8]",
        "fld dword [esp + 0x10]",
        "fadd st0,st2",
        "fstp dword [eax + ecx*0x4 + 0xc]",
        "fxch st2",
        "fsub st0,st2",
        "fstp dword [eax + edx*0x4 + 0x8]",
        "fstp st1",
        "fsub dword [esp + 0x10]",
        "fstp dword [eax + edx*0x4 + 0xc]",
        "fld dword [esp + 0x18]",
        "fsub dword [esp + 0x14]",
        "fld dword [esp + 0x20]",
        "fadd dword [esp + 0x1c]",
        "fld st2",
        "fmul st2",
        "fld st4",
        "fmul st2",
        "fsubp",
        "fstp dword [eax + esi*0x4 + 0x8]",
        "fxch st2",
        "fmul st2",
        "fxch st3",
        "fmul st1",
        "faddp st3,st0",
        "fxch st2",
        "fstp dword [eax + esi*0x4 + 0xc]",
        "fstp st0",
        "fstp st0",
        "fld dword [esp + 0x14]",
        "fadd dword [esp + 0x18]",
        "fld dword [esp + 0x1c]",
        "fsub dword [esp + 0x20]",
        "fld dword [esp + 0x30]",
        "fmul st1",
        "fld dword [esp + 0x2c]",
        "fmul st3",
        "faddp",
        "fstp dword [eax + edi*0x4 + 0x8]",
        "fld dword [esp + 0x2c]",
        "fmul st1",
        "fld dword [esp + 0x30]",
        "fmul st3",
        "fsubp",
        "fstp dword [eax + edi*0x4 + 0xc]",
        "pop edi",
        "pop esi",
        "fstp st0",
        "pop ebp",
        "fstp st0",
        "pop ebx",
        "add esp,0x74"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x8, #64]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w5, w7, w7, lsl #1",
        "add w10, w5, w7, lsl #1",
        "sub w6, w7, #0x2 (2)",
        "ldr s3, [x8, #116]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldr s3, [x8, #52]",
        "fcvt d3, s3",
        "add w22, w20, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w11, w10, w7, lsl #1",
        "ldr s4, [x8, #64]",
        "fcvt d4, s4",
        "add x23, x28, x22, lsl #4",
        "fadd d3, d3, d4",
        "ldr s4, [x8, #116]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "ldr s4, [x8, #56]",
        "fcvt d4, s4",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "add x12, x28, x22, lsl #4",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #48]",
        "ldr s4, [x8, #60]",
        "fcvt d4, s4",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #120]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "fcvt s4, d4",
        "str s4, [x8, #44]",
        "add w13, w4, w10, lsl #2",
        "ldur s4, [x13, #-8]",
        "fcvt d4, s4",
        "add w13, w4, w6, lsl #2",
        "ldr s5, [x13]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w13, w4, w7, lsl #2",
        "ldur s5, [x13, #-4]",
        "fcvt d5, s5",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w13, w4, w10, lsl #2",
        "ldur s6, [x13, #-4]",
        "fcvt d6, s6",
        "add x13, x28, x22, lsl #4",
        "fadd d5, d5, d6",
        "add w14, w4, w6, lsl #2",
        "ldr s6, [x14]",
        "fcvt d6, s6",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w14, w4, w10, lsl #2",
        "ldur s7, [x14, #-8]",
        "fcvt d7, s7",
        "add x14, x28, x22, lsl #4",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldur s6, [x15, #-4]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldur s6, [x15, #-8]",
        "fcvt d6, s6",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "add w22, w22, #0x7 (7)",
        "and w22, w22, #0x7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "fcvt d8, s8",
        "add x22, x28, x22, lsl #4",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-8]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldur s7, [x15, #-4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldur s8, [x15, #-4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #20]",
        "fadd d7, d6, d4",
        "add w15, w4, w6, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "stur s7, [x15, #-4]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-8]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fmul d6, d2, d4",
        "fmul d7, d3, d5",
        "fsub d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s6, d6",
        "stur s6, [x15, #-8]",
        "fmul d5, d2, d5",
        "fmul d4, d3, d4",
        "fadd d4, d5, d4",
        "add w15, w4, w10, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "ldr s5, [x8, #24]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "ldr s6, [x8, #32]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fmul d6, d6, d5",
        "ldr s7, [x8, #48]",
        "fcvt d7, s7",
        "fmul d7, d7, d4",
        "fadd d6, d6, d7",
        "add w15, w4, w11, lsl #2",
        "fcvt s6, d6",
        "stur s6, [x15, #-8]",
        "ldr s6, [x8, #48]",
        "fcvt d6, s6",
        "fmul d5, d6, d5",
        "ldr s6, [x8, #44]",
        "fcvt d6, s6",
        "fmul d4, d6, d4",
        "fsub d4, d5, d4",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "stur s4, [x15, #-4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15]",
        "fcvt d4, s4",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #4]",
        "fcvt d5, s5",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #4]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #4]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w5, lsl #2",
        "ldr s6, [x15]",
        "fcvt d6, s6",
        "add w15, w4, w11, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #4]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #4]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #20]",
        "fadd d7, d6, d4",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #4]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fsub d6, d4, d5",
        "ldr s7, [x8, #64]",
        "fcvt d7, s7",
        "fmul d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s6, d6",
        "str s6, [x15]",
        "fadd d4, d5, d4",
        "ldr s5, [x8, #64]",
        "fcvt d5, s5",
        "fmul d4, d4, d5",
        "add w15, w4, w10, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "ldr s4, [x8, #20]",
        "fcvt d4, s4",
        "ldr s5, [x8, #24]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "ldr s5, [x8, #28]",
        "fcvt d5, s5",
        "ldr s6, [x8, #32]",
        "fcvt d6, s6",
        "fsub d5, d5, d6",
        "ldr s6, [x8, #64]",
        "fcvt d6, s6",
        "fneg v6.2d, v6.2d",
        "fadd d7, d5, d4",
        "fmul d7, d7, d6",
        "add w15, w4, w11, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d5, d4",
        "fmul d4, d4, d6",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #4]",
        "add w15, w4, w7, lsl #2",
        "ldr s4, [x15, #8]",
        "fcvt d4, s4",
        "add w15, w4, w10, lsl #2",
        "ldr s5, [x15, #8]",
        "fcvt d5, s5",
        "fadd d4, d4, d5",
        "add w15, w4, w7, lsl #2",
        "ldr s5, [x15, #12]",
        "fcvt d5, s5",
        "add w15, w4, w10, lsl #2",
        "ldr s6, [x15, #12]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #8]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #24]",
        "add w15, w4, w7, lsl #2",
        "ldr s6, [x15, #12]",
        "fcvt d6, s6",
        "add w15, w4, w10, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "fsub d6, d6, d7",
        "fcvt s6, d6",
        "str s6, [x8, #28]",
        "add w15, w4, w11, lsl #2",
        "ldr s6, [x15, #8]",
        "fcvt d6, s6",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "fadd d6, d6, d7",
        "add w15, w4, w11, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "add w15, w4, w5, lsl #2",
        "ldr s8, [x15, #12]",
        "fcvt d8, s8",
        "fadd d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #16]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #8]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #8]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #32]",
        "add w15, w4, w5, lsl #2",
        "ldr s7, [x15, #12]",
        "fcvt d7, s7",
        "add w15, w4, w11, lsl #2",
        "ldr s8, [x15, #12]",
        "fcvt d8, s8",
        "fsub d7, d7, d8",
        "fcvt s7, d7",
        "str s7, [x8, #20]",
        "fadd d7, d6, d4",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #8]",
        "ldr s7, [x8, #16]",
        "fcvt d7, s7",
        "fadd d7, d7, d5",
        "add w15, w4, w7, lsl #2",
        "fcvt s7, d7",
        "str s7, [x15, #12]",
        "strb wzr, [x28, #1049]",
        "fsub d4, d4, d6",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #16]",
        "fcvt d4, s4",
        "fsub d4, d5, d4",
        "add w15, w4, w5, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #12]",
        "ldr s4, [x8, #24]",
        "fcvt d4, s4",
        "ldr s5, [x8, #20]",
        "fcvt d5, s5",
        "fsub d4, d4, d5",
        "ldr s5, [x8, #32]",
        "fcvt d5, s5",
        "ldr s6, [x8, #28]",
        "fcvt d6, s6",
        "fadd d5, d5, d6",
        "fmul d6, d3, d4",
        "fmul d7, d2, d5",
        "fsub d6, d6, d7",
        "add w15, w4, w10, lsl #2",
        "fcvt s8, d6",
        "str s8, [x15, #8]",
        "strb wzr, [x28, #1049]",
        "fmul d3, d3, d5",
        "strb wzr, [x28, #1049]",
        "fmul d2, d2, d4",
        "fadd d2, d3, d2",
        "strb wzr, [x28, #1049]",
        "add w15, w4, w10, lsl #2",
        "fcvt s2, d2",
        "str s2, [x15, #12]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr s2, [x8, #20]",
        "fcvt d2, s2",
        "ldr s3, [x8, #24]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x8, #28]",
        "fcvt d3, s3",
        "ldr s4, [x8, #32]",
        "fcvt d4, s4",
        "fsub d3, d3, d4",
        "ldr s4, [x8, #48]",
        "fcvt d4, s4",
        "fmul d4, d4, d3",
        "ldr s5, [x8, #44]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fadd d4, d4, d5",
        "add w15, w4, w11, lsl #2",
        "fcvt s4, d4",
        "str s4, [x15, #8]",
        "ldr s4, [x8, #44]",
        "fcvt d4, s4",
        "fmul d4, d4, d3",
        "ldr s5, [x8, #48]",
        "fcvt d5, s5",
        "fmul d5, d5, d2",
        "fsub d4, d4, d5",
        "add w15, w4, w11, lsl #2",
        "fcvt s8, d4",
        "str s8, [x15, #12]",
        "ldp w11, w10, [x8], #8",
        "ldp w9, w6, [x8], #8",
        "mvn w27, w8",
        "adds w26, w8, #0x74 (116)",
        "cfinv",
        "mov x8, x26",
        "strb w20, [x28, #1051]",
        "str d7, [x22, #1056]",
        "str d6, [x14, #1056]",
        "str d5, [x13, #1056]",
        "str d4, [x12, #1056]",
        "str d3, [x23, #1056]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xfcfc",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Block10": {
      "x86InstructionCount": 420,
      "ExpectedInstructionCount": 594,
      "x86Insts": [
        "push ebp",
        "mov ebp,esp",
        "sub esp,0x14",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x78",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x38",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x7c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x3c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x78",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x78",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x38",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x7c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x7c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x3c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x38]",
        "mov eax,dword [ebp + -0x8]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x3c]",
        "mov eax,dword [ebp + -0x4]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x70",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x30",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x74",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x34",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x70",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x70",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x30",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x74",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x74",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x34",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x30",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553144]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x34",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x68",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x28",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x6c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x2c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x68",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x68",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x28",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x6c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x6c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x2c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x28",
        "fld dword [ebp + -0x8]",
        "fsub dword [ebp + -0x4]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x2c",
        "fld dword [ebp + -0x8]",
        "fadd dword [ebp + -0x4]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x60",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x20",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x64",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x24",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x60",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x60",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x20",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x64",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x64",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x24",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x20",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x4]",
        "fld dword [0x0855314c]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x24",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x58",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x18",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x1c",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x5c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x58",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x58",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x18",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x5c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x5c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x1c",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x18]",
        "mov eax,dword [ebp + -0x4]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x1c]",
        "mov eax,dword [ebp + -0x8]",
        "mov dword [edx],eax",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x10",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x50",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x14",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x54",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x50",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x50",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x10",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x54",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x54",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x14",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x10",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553148]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x14",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x0855314c]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x8",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x48",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0xc",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4c",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x48",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x48",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x8",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4c",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x4c",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0xc",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x8",
        "fld dword [ebp + -0x4]",
        "fadd dword [ebp + -0x8]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0xc",
        "fld dword [ebp + -0x4]",
        "fsub dword [ebp + -0x8]",
        "fld dword [0x0855313c]",
        "fmulp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x8]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x44",
        "fld dword [eax]",
        "fsubp",
        "fstp dword [ebp + -0x4]",
        "mov eax,dword [ebp + 0x8]",
        "lea edx,[eax + 0x40]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "fld dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "fld dword [eax]",
        "faddp",
        "fstp dword [edx]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x44",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x44",
        "fld dword [edx]",
        "mov edx,dword [ebp + 0x8]",
        "add edx,0x4",
        "fld dword [edx]",
        "faddp",
        "fstp dword [eax]",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553148]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553140]",
        "fmulp",
        "faddp",
        "mov eax,dword [ebp + 0x8]",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x4",
        "fld dword [ebp + -0x4]",
        "fld dword [0x08553140]",
        "fmulp",
        "fld dword [ebp + -0x8]",
        "fld dword [0x08553144]",
        "fmulp",
        "faddp",
        "fstp dword [eax]",
        "mov eax,dword [ebp + 0x8]",
        "mov dword [esp],eax",
        "call 0x0816de98",
        "mov eax,dword [ebp + 0x8]",
        "add eax,0x40",
        "mov dword [esp],eax",
        "call 0x0816de98",
        "leave"
      ],
      "ExpectedArm64ASM": [
        "str w9, [x8, #-4]!",
        "mov x9, x8",
        "sub w8, w8, #0x14 (20)",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x78 (120)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x38 (56)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x7c (124)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x3c (60)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x78 (120)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x78 (120)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x38 (56)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x7c (124)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x7c (124)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x3c (60)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x38 (56)",
        "ldur w4, [x9, #-8]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x3c (60)",
        "ldur w4, [x9, #-4]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x70 (112)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x30 (48)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x74 (116)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x34 (52)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x70 (112)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x70 (112)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x30 (48)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x74 (116)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x74 (116)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x34 (52)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x30 (48)",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "mov w20, #0x3140",
        "movk w20, #0x855, lsl #16",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "mov w21, #0x3144",
        "movk w21, #0x855, lsl #16",
        "ldr s4, [x21]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x34 (52)",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "mov w22, #0x3148",
        "movk w22, #0x855, lsl #16",
        "ldr s3, [x22]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x68 (104)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x28 (40)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x6c (108)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x2c (44)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x68 (104)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x68 (104)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x28 (40)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x6c (108)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x6c (108)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x2c (44)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x28 (40)",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "mov w23, #0x313c",
        "movk w23, #0x855, lsl #16",
        "ldr s3, [x23]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x2c (44)",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x23]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x60 (96)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x20 (32)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x64 (100)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x24 (36)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x60 (96)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x60 (96)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x20 (32)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x64 (100)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x64 (100)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x24 (36)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x20 (32)",
        "ldur s2, [x9, #-8]",
        "fcvt d2, s2",
        "ldr s3, [x22]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-4]",
        "fcvt d3, s3",
        "mov w12, #0x314c",
        "movk w12, #0x855, lsl #16",
        "ldr s4, [x12]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x24 (36)",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldr s3, [x22]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x58 (88)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x18 (24)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x1c (28)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x5c (92)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x58 (88)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x58 (88)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x18 (24)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x5c (92)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x5c (92)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x1c (28)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x18 (24)",
        "ldur w4, [x9, #-4]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x1c (28)",
        "ldur w4, [x9, #-8]",
        "str w4, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x10 (16)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x50 (80)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x14 (20)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x54 (84)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x50 (80)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x50 (80)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x10 (16)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x54 (84)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x54 (84)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x14 (20)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x10 (16)",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldr s4, [x22]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x14 (20)",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldr s3, [x22]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldr s4, [x12]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x8 (8)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x48 (72)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0xc (12)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4c (76)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x48 (72)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x48 (72)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x8 (8)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4c (76)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x4c (76)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0xc (12)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x8 (8)",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "ldr s3, [x23]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0xc (12)",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "ldr s3, [x23]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x40 (64)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-8]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x4 (4)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x44 (68)",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fsub d2, d2, d3",
        "fcvt s2, d2",
        "stur s2, [x9, #-4]",
        "ldr w4, [x9, #8]",
        "add w5, w4, #0x40 (64)",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x40 (64)",
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldr w4, [x9, #8]",
        "ldr s3, [x4]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x5]",
        "ldr w4, [x9, #8]",
        "add w4, w4, #0x44 (68)",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x44 (68)",
        "ldr s2, [x5]",
        "fcvt d2, s2",
        "ldr w5, [x9, #8]",
        "add w5, w5, #0x4 (4)",
        "ldr s3, [x5]",
        "fcvt d3, s3",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldr s3, [x22]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldr s4, [x20]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "ldr w4, [x9, #8]",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "adds w26, w4, #0x4 (4)",
        "mov x27, x4",
        "mov x4, x26",
        "ldur s2, [x9, #-4]",
        "fcvt d2, s2",
        "ldr s3, [x20]",
        "fcvt d3, s3",
        "fmul d2, d2, d3",
        "ldur s3, [x9, #-8]",
        "fcvt d3, s3",
        "ldr s4, [x21]",
        "fcvt d4, s4",
        "fmul d3, d3, d4",
        "fadd d2, d2, d3",
        "fcvt s2, d2",
        "str s2, [x4]",
        "ldr w4, [x9, #8]",
        "str w4, [x8]",
        "mov w20, #0x47c",
        "movk w20, #0x1, lsl #16",
        "str w20, [x8, #-4]!",
        "cfinv",
        "ldrb w21, [x28, #1051]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x8",
        "sub w21, w23, w21",
        "mov w23, #0xe0e0",
        "lsr w21, w23, w21",
        "bic w21, w22, w21",
        "strb w21, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/FlagM/x87_f64.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "fadd dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom dword [rax]": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcomp dword [rax]": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xd8 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fadd d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmul d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom st0, st0": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xd8 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "mrs x20, nzcv",
        "fcmp d2, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcom st0, st1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st3": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st4": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st5": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st6": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st7": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xd8 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcomp st0, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xd8 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "fcmp d2, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st1": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xd8 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fcmp d3, d2",
        "cset x23, vs",
        "axflag",
        "cset x24, lo",
        "cset x30, eq",
        "strb w24, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st3": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st4": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st5": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st6": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st7": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xd8 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdiv st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fld dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcvt s2, d2",
        "str s2, [x4]"
      ]
    },
    "fstp dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvt s2, d2",
        "str s2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldenv [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd9 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "ubfx w21, w20, #10, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "strh w20, [x28, #1200]",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w21, #0x55555555",
        "bic w20, w21, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldcw [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "ubfx w21, w20, #10, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "strh w20, [x28, #1200]"
      ]
    },
    "fnstenv [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd9 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "str w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "str w20, [x4, #4]",
        "ldrb w20, [x28, #1202]",
        "orr w20, w20, w20, lsl #4",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsl #2",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsl #1",
        "and w20, w20, #0x55555555",
        "orr w20, w20, w20, lsl #1",
        "eor w20, w20, #0xffff",
        "str w20, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]"
      ]
    },
    "fnstcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]"
      ]
    },
    "fld st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st3": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st4": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st5": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st6": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fxch st0, st0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xd9 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1049]"
      ]
    },
    "fxch st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fnop": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xd9 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fchs": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fneg v2.2d, v2.2d",
        "str d2, [x20, #1056]"
      ]
    },
    "fabs": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fabs d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ftst": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov w21, #0x0",
        "fmov d3, x21",
        "fcmp d2, d3",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fxam": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov x21, v2.d[0]",
        "lsr x21, x21, #63",
        "strb w21, [x28, #1049]",
        "ldrb w21, [x28, #1202]",
        "lsr w20, w21, w20",
        "and w20, w20, #0x1",
        "mrs x21, nzcv",
        "cmp w20, #0x1 (1)",
        "cset x22, ne",
        "strb w22, [x28, #1048]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fld1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x3ff0000000000000",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2t": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xa372",
        "movk x20, #0x979, lsl #16",
        "movk x20, #0x934f, lsl #32",
        "movk x20, #0x400a, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2e": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x82fe",
        "movk x20, #0x652b, lsl #16",
        "movk x20, #0x1547, lsl #32",
        "movk x20, #0x3ff7, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldpi": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x2d18",
        "movk x20, #0x5444, lsl #16",
        "movk x20, #0x21fb, lsl #32",
        "movk x20, #0x4009, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldlg2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x79ff",
        "movk x20, #0x509f, lsl #16",
        "movk x20, #0x4413, lsl #32",
        "movk x20, #0x3fd3, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldln2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x39ef",
        "movk x20, #0xfefa, lsl #16",
        "movk x20, #0x2e42, lsl #32",
        "movk x20, #0x3fe6, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldz": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "f2xm1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #2240]",
        "ldr x3, [x28, #2248]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x20, #1056]"
      ]
    },
    "fyl2x": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2256]",
        "ldr x3, [x28, #2264]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]"
      ]
    },
    "fptan": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2992]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "mov x22, #0x3ff0000000000000",
        "fmov d3, x22",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d3, [x22, #1056]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fpatan": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2224]",
        "ldr x3, [x28, #2232]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]"
      ]
    },
    "fxtract": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xd9 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov x22, v2.d[0]",
        "mov x23, #0xfff0000000000000",
        "fmov d3, x23",
        "ubfx x23, x22, #52, #11",
        "sub x23, x23, #0x3ff (1023)",
        "scvtf d4, x23",
        "and x23, x22, #0x800fffffffffffff",
        "orr x23, x23, #0x3ff0000000000000",
        "fmov d5, x23",
        "mrs x23, nzcv",
        "tst x22, #0x7fffffffffffffff",
        "fcsel d2, d2, d5, eq",
        "fcsel d3, d3, d4, eq",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "msr nzcv, x23",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d2, [x22, #1056]",
        "str d3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fprem1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2288]",
        "ldr x3, [x28, #2296]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fdecstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fincstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fprem": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2272]",
        "ldr x3, [x28, #2280]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fyl2xp1": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xd9 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x3ff0000000000000",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "fmov d1, d3",
        "ldr x0, [x28, #2256]",
        "ldr x3, [x28, #2264]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "strb w20, [x28, #1051]",
        "str d3, [x22, #1056]",
        "str d2, [x21, #1056]"
      ]
    },
    "fsqrt": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsqrt d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsincos": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2976]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "fmov d0, d2",
        "ldr x0, [x28, #2984]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d2, [x22, #1056]",
        "str d3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frndint": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fscale": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2304]",
        "ldr x3, [x28, #2312]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x20, #1056]"
      ]
    },
    "fsin": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2976]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fcos": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2984]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fiadd dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fimul dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ficom dword [rax]": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xda !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "ficomp dword [rax]": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xda !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fisubr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fidiv dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fidivr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xd0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xda 11b 0xd8 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xd9 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xda /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdb /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdc /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdd /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xde /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdf /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fucompp": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xda 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "msr nzcv, x22",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdb !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs w21, d2",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist dword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdb !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d0, d2",
        "fcvtzs w20, d0",
        "str w20, [x4]"
      ]
    },
    "fistp dword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "frinti d0, d2",
        "fcvtzs w21, d0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcmovnb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fnclex": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xdb 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1040]"
      ]
    },
    "fninit": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdb 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "rbit w1, w20",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x20, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fucomi st0, st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdb 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fucomi st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdb 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fcomi st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag"
      ]
    },
    "fadd qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom qword [rax]": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdc !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcomp qword [rax]": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xdc !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fadd st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fadd d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fmul st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmul d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xe0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fsubr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xe8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fsub st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xf0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fdivr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xf8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fdiv st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fld qword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp qword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs x21, d2",
        "str x21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst qword [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdd !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "str d2, [x4]"
      ]
    },
    "fstp qword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str d2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frstor [rax]": {
      "ExpectedInstructionCount": 141,
      "Comment": [
        "0xdd !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "lsr w20, w20, #10",
        "and w20, w20, #0x3",
        "rbit w1, w20",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x20, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w22, w20, #8, #1",
        "ubfx w23, w20, #9, #1",
        "ubfx w24, w20, #10, #1",
        "ubfx w30, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w22, [x28, #1048]",
        "strb w23, [x28, #1049]",
        "strb w24, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w22, #0x55555555",
        "bic w20, w22, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]",
        "mov x20, #0xffffffffffffffff",
        "mov w22, #0xffff",
        "fmov d2, x20",
        "fmov v2.D[1], x22",
        "ldur q3, [x4, #28]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x21, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #38]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr q3, [x4, #48]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #58]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #68]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #78]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #88]",
        "and v2.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add x0, x28, x20, lsl #4",
        "str d2, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur d2, [x4, #98]",
        "ldr h3, [x4, #106]",
        "mov v2.h[4], v3.h[0]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add x0, x28, x20, lsl #4",
        "str d2, [x0, #1056]"
      ]
    },
    "fnsave [rax]": {
      "ExpectedInstructionCount": 143,
      "Comment": [
        "0xdd !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrh w21, [x28, #1200]",
        "str w21, [x4]",
        "ldrb w21, [x28, #1051]",
        "lsl x21, x21, #11",
        "ldrb w22, [x28, #1048]",
        "orr x21, x21, x22, lsl #8",
        "ldrb w22, [x28, #1049]",
        "orr x21, x21, x22, lsl #9",
        "ldrb w22, [x28, #1050]",
        "orr x21, x21, x22, lsl #10",
        "ldrb w22, [x28, #1054]",
        "orr x21, x21, x22, lsl #14",
        "ldrb w22, [x28, #1040]",
        "orr x21, x21, x22",
        "str w21, [x4, #4]",
        "mov w21, #0x0",
        "ldrb w22, [x28, #1202]",
        "orr w22, w22, w22, lsl #4",
        "and w22, w22, #0xf0f0f0f",
        "orr w22, w22, w22, lsl #2",
        "and w22, w22, #0x33333333",
        "orr w22, w22, w22, lsl #1",
        "and w22, w22, #0x55555555",
        "orr w22, w22, w22, lsl #1",
        "eor w22, w22, #0xffff",
        "str w22, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #28]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #38]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #58]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #68]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #78]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #88]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur d2, [x4, #98]",
        "dup v2.8h, v2.h[4]",
        "str h2, [x4, #106]",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fnstsw [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4]"
      ]
    },
    "ffree st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdd 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st3": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st4": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st5": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st6": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st7": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xdd 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fst st1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st3": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st4": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st5": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st6": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st0": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdd 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucom st0": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdd 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "mrs x20, nzcv",
        "fcmp d2, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fucom st1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st3": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st4": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st5": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st6": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st7": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdd 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucomp st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xdd 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "fcmp d2, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st1": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xdd 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fcmp d3, d2",
        "cset x23, vs",
        "axflag",
        "cset x24, lo",
        "cset x30, eq",
        "strb w24, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st3": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st4": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st5": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st6": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st7": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xdd 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fiadd word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fimul word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ficom word [rax]": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "axflag",
        "cset x22, lo",
        "cset x23, eq",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "ficomp word [rax]": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xde !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fisubr word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fidiv word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fidivr word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "faddp st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fadd d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fadd d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xde 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmul d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fmul d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcompp": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xde 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "axflag",
        "cset x23, lo",
        "cset x24, eq",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "msr nzcv, x22",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fsubrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fsub d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fsub d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe8": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fsubp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fsub d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fdivrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fdiv d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fdiv d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf8": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fdivp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fdiv d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild word [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp word [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs x21, d2",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist word [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdf !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d0, d2",
        "fcvtzs x20, d0",
        "strh w20, [x4]"
      ]
    },
    "fistp word [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "frinti d0, d2",
        "fcvtzs x21, d0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbld tword [rax]": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xdf !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #2000]",
        "ldr x3, [x28, #2008]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbstp tword [rax]": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xdf !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1984]",
        "ldr x3, [x28, #1992]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffreep st0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st5": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st6": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fnstsw ax": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "fucomip st0": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdf 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st3": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st4": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st5": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st6": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st7": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st0": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdf 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st3": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st4": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st5": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st6": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st7": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xdf 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "axflag",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_32": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld dword [rax]",
        "fstp dword [rdx]",
        "fld dword [rax + 4]",
        "fstp dword [rdx + 4]",
        "fld dword [rax + 8]",
        "fstp dword [rdx + 8]",
        "fld dword [rax + 12]",
        "fstp dword [rdx + 12]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str s2, [x5]",
        "ldr s2, [x4, #4]",
        "str s2, [x5, #4]",
        "ldr s2, [x4, #8]",
        "str s2, [x5, #8]",
        "ldr s2, [x4, #12]",
        "str s2, [x5, #12]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_64": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld qword [rax]",
        "fstp qword [rdx]",
        "fld qword [rax + 8]",
        "fstp qword [rdx + 8]",
        "fld qword [rax + 16]",
        "fstp qword [rdx + 16]",
        "fld qword [rax + 32]",
        "fstp qword [rdx + 32]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str d2, [x5]",
        "ldr d2, [x4, #8]",
        "str d2, [x5, #8]",
        "ldr d2, [x4, #16]",
        "str d2, [x5, #16]",
        "ldr d2, [x4, #32]",
        "str d2, [x5, #32]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_80": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 38,
      "x86Insts": [
        "fld tword [rax]",
        "fstp tword [rdx]",
        "fld tword [rax + 10]",
        "fstp tword [rdx + 10]",
        "fld tword [rax + 20]",
        "fstp tword [rdx + 20]",
        "fld tword [rax + 30]",
        "fstp tword [rdx + 30]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str d2, [x5]",
        "mov x20, v2.d[1]",
        "strh w20, [x5, #8]",
        "add x20, x4, #0xa (10)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #10]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0xa (10)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x14 (20)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #20]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x14 (20)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x1e (30)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #30]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x1e (30)",
        "strh w20, [x21, #8]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/H0F38.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "CRYPTO"
    ]
  },
  "Instructions": {
    "pshufb mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x00"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "movi v4.16b, #0x87",
        "and v3.8b, v3.8b, v4.8b",
        "tbl v2.8b, {v2.16b}, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufb xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x00"
      ],
      "ExpectedArm64ASM": [
        "movi v2.16b, #0x8f",
        "and v2.16b, v17.16b, v2.16b",
        "tbl v16.16b, {v16.16b}, v2.16b"
      ]
    },
    "phaddw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "NP 0x0f 0x38 0x01"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "addp v2.4h, v3.4h, v2.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phaddw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x01"
      ],
      "ExpectedArm64ASM": [
        "addp v16.8h, v16.8h, v17.8h"
      ]
    },
    "phaddd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "NP 0x0f 0x38 0x02"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "addp v2.2s, v3.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phaddd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x02"
      ],
      "ExpectedArm64ASM": [
        "addp v16.4s, v16.4s, v17.4s"
      ]
    },
    "phaddsw mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x03"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uzp1 v4.4h, v2.4h, v3.4h",
        "uzp2 v2.4h, v2.4h, v3.4h",
        "sqadd v2.8h, v4.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phaddsw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x03"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v16.8h, v17.8h",
        "uzp2 v3.8h, v16.8h, v17.8h",
        "sqadd v16.8h, v2.8h, v3.8h"
      ]
    },
    "pmaddubsw mm0, mm1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "NP 0x0f 0x38 0x04"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uxtl v2.8h, v2.8b",
        "sxtl v3.8h, v3.8b",
        "smull v4.4s, v2.4h, v3.4h",
        "smull2 v2.4s, v2.8h, v3.8h",
        "addp v2.4s, v4.4s, v2.4s",
        "sqxtn v2.4h, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmaddubsw xmm0, xmm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "{u,s}xtl{,2} and uzp{1,2} can be more optimal",
        "Up-front zero extend and sign extend the elements in place",
        "This allows extracting even and odd elements up-front so we don't need the unzips at the end",
        "Requires implementing IR ops for BIC (vector, immediate)",
        "0x66 0x0f 0x38 0x04"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v16.8b",
        "sxtl v3.8h, v17.8b",
        "mul v2.8h, v2.8h, v3.8h",
        "uxtl2 v3.8h, v16.16b",
        "sxtl2 v4.8h, v17.16b",
        "mul v3.8h, v3.8h, v4.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sqadd v16.8h, v4.8h, v2.8h"
      ]
    },
    "phsubw mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x05"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uzp1 v4.4h, v2.4h, v3.4h",
        "uzp2 v2.4h, v2.4h, v3.4h",
        "sub v2.8h, v4.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phsubw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x05"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v16.8h, v17.8h",
        "uzp2 v3.8h, v16.8h, v17.8h",
        "sub v16.8h, v2.8h, v3.8h"
      ]
    },
    "phsubd mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x06"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uzp1 v4.2s, v2.2s, v3.2s",
        "uzp2 v2.2s, v2.2s, v3.2s",
        "sub v2.4s, v4.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phsubd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x06"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v16.4s, v17.4s",
        "uzp2 v3.4s, v16.4s, v17.4s",
        "sub v16.4s, v2.4s, v3.4s"
      ]
    },
    "phsubsw mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x07"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uzp1 v4.4h, v2.4h, v3.4h",
        "uzp2 v2.4h, v2.4h, v3.4h",
        "sqsub v2.8h, v4.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "phsubsw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x07"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v16.8h, v17.8h",
        "uzp2 v3.8h, v16.8h, v17.8h",
        "sqsub v16.8h, v2.8h, v3.8h"
      ]
    },
    "psignb mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x08"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqshl v2.8b, v2.8b, #7",
        "srshr v2.8b, v2.8b, #7",
        "mul v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psignb xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x08"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.16b, v17.16b, #7",
        "srshr v2.16b, v2.16b, #7",
        "mul v16.16b, v16.16b, v2.16b"
      ]
    },
    "psignw mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x09"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqshl v2.4h, v2.4h, #15",
        "srshr v2.4h, v2.4h, #15",
        "mul v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psignw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x09"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.8h, v17.8h, #15",
        "srshr v2.8h, v2.8h, #15",
        "mul v16.8h, v16.8h, v2.8h"
      ]
    },
    "psignd mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "NP 0x0f 0x38 0x0a"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqshl v2.2s, v2.2s, #31",
        "srshr v2.2s, v2.2s, #31",
        "mul v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psignd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x0a"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.4s, v17.4s, #31",
        "srshr v2.4s, v2.4s, #31",
        "mul v16.4s, v16.4s, v2.4s"
      ]
    },
    "pmulhrsw mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Might be able to use sqdmulh",
        "NP 0x0f 0x38 0x0b"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "smull v2.4s, v2.4h, v3.4h",
        "sshr v2.4s, v2.4s, #14",
        "movi v3.4s, #0x1",
        "add v2.4s, v2.4s, v3.4s",
        "shrn v2.4h, v2.4s, #1",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmulhrsw xmm0, xmm1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Might be able to use sqdmulh",
        "0x66 0x0f 0x38 0x0b"
      ],
      "ExpectedArm64ASM": [
        "smull v2.4s, v16.4h, v17.4h",
        "smull2 v3.4s, v16.8h, v17.8h",
        "sshr v2.4s, v2.4s, #14",
        "sshr v3.4s, v3.4s, #14",
        "movi v4.4s, #0x1",
        "add v2.4s, v2.4s, v4.4s",
        "add v3.4s, v3.4s, v4.4s",
        "shrn v2.4h, v2.4s, #1",
        "mov v0.16b, v2.16b",
        "shrn2 v0.8h, v3.4s, #1",
        "mov v16.16b, v0.16b"
      ]
    },
    "pblendvb xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x10"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.16b, v16.16b, #7",
        "bit v16.16b, v17.16b, v2.16b"
      ]
    },
    "blendvps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x14"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.4s, v16.4s, #31",
        "bit v16.16b, v17.16b, v2.16b"
      ]
    },
    "blendvpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x15"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.2d, v16.2d, #63",
        "bit v16.16b, v17.16b, v2.16b"
      ]
    },
    "pblendvb xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x10"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.16b, v16.16b, #7",
        "bit v17.16b, v18.16b, v2.16b"
      ]
    },
    "blendvps xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x14"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.4s, v16.4s, #31",
        "bit v17.16b, v18.16b, v2.16b"
      ]
    },
    "blendvpd xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x15"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.2d, v16.2d, #63",
        "bit v17.16b, v18.16b, v2.16b"
      ]
    },
    "ptest xmm0, xmm1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x38 0x17"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "pabsb mm0, mm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "NP 0x0f 0x38 0x1c"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "abs v2.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pabsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x1c"
      ],
      "ExpectedArm64ASM": [
        "abs v16.16b, v17.16b"
      ]
    },
    "pabsw mm0, mm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "NP 0x0f 0x38 0x1d"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "abs v2.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pabsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x1d"
      ],
      "ExpectedArm64ASM": [
        "abs v16.8h, v17.8h"
      ]
    },
    "pabsd mm0, mm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "NP 0x0f 0x38 0x1e"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "abs v2.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pabsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x1e"
      ],
      "ExpectedArm64ASM": [
        "abs v16.4s, v17.4s"
      ]
    },
    "pmovzxbw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x30"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.8h, v17.8b"
      ]
    },
    "pmovzxbd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x31"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v16.4s, v2.4h"
      ]
    },
    "pmovzxbq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0x32"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v2.4s, v2.4h",
        "uxtl v16.2d, v2.2s"
      ]
    },
    "pmovzxwd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x33"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.4s, v17.4h"
      ]
    },
    "pmovzxwq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0x34"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.4s, v17.4h",
        "uxtl v16.2d, v2.2s"
      ]
    },
    "pmovzxdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x35"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.2d, v17.2s"
      ]
    },
    "pcmpgtq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x37"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.2d, v16.2d, v17.2d"
      ]
    },
    "pminsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x38"
      ],
      "ExpectedArm64ASM": [
        "smin v16.16b, v16.16b, v17.16b"
      ]
    },
    "pminsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x39"
      ],
      "ExpectedArm64ASM": [
        "smin v16.4s, v16.4s, v17.4s"
      ]
    },
    "pminuw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3a"
      ],
      "ExpectedArm64ASM": [
        "umin v16.8h, v16.8h, v17.8h"
      ]
    },
    "pminud xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3b"
      ],
      "ExpectedArm64ASM": [
        "umin v16.4s, v16.4s, v17.4s"
      ]
    },
    "pmaxsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3c"
      ],
      "ExpectedArm64ASM": [
        "smax v16.16b, v16.16b, v17.16b"
      ]
    },
    "pmaxsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3d"
      ],
      "ExpectedArm64ASM": [
        "smax v16.4s, v16.4s, v17.4s"
      ]
    },
    "pmaxuw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3e"
      ],
      "ExpectedArm64ASM": [
        "umax v16.8h, v16.8h, v17.8h"
      ]
    },
    "pmaxud xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x3f"
      ],
      "ExpectedArm64ASM": [
        "umax v16.4s, v16.4s, v17.4s"
      ]
    },
    "pmulld xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x38 0x40"
      ],
      "ExpectedArm64ASM": [
        "mul v16.4s, v16.4s, v17.4s"
      ]
    },
    "phminposuw xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x38 0x41"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3008]",
        "zip1 v3.8h, v2.8h, v17.8h",
        "zip2 v2.8h, v2.8h, v17.8h",
        "umin v2.4s, v3.4s, v2.4s",
        "uminv s2, v2.4s",
        "rev32 v16.8h, v2.8h"
      ]
    },
    "movbe ax, word [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x38 0xf0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x6]",
        "rev w20, w20",
        "bfxil x4, x20, #16, #16"
      ]
    },
    "movbe eax, dword [rbx]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x38 0xf0"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x6]",
        "rev w4, w20"
      ]
    },
    "movbe rax, qword [rbx]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "REX.W 0x66 0x0f 0x38 0xf0"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x6]",
        "rev x4, x20"
      ]
    },
    "adcx eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "eor w21, w20, #0x20000000",
        "msr nzcv, x21",
        "adcs w4, w6, w4",
        "cset x21, lo",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "adcx rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 REX.W 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "eor w21, w20, #0x20000000",
        "msr nzcv, x21",
        "adcs x4, x6, x4",
        "cset x21, lo",
        "bfi w20, w21, #29, #1",
        "msr nzcv, x20"
      ]
    },
    "adox eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xf3 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ccmp wzr, #0, #nzcv, vs",
        "adcs w4, w6, w4",
        "cset x21, hs",
        "bfi w20, w21, #28, #1",
        "msr nzcv, x20"
      ]
    },
    "adox rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xf3 REX.W 0x0f 0x38 0xf6"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ccmp wzr, #0, #nzcv, vs",
        "adcs x4, x6, x4",
        "cset x21, hs",
        "bfi w20, w21, #28, #1",
        "msr nzcv, x20"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/H0F3A.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "CRYPTO"
    ]
  },
  "Comment": [
    "SSE4.2 string instructions are skipped here.",
    "Entirely because they are nightmare implementations of instructions."
  ],
  "Instructions": {
    "palignr mm0, mm1, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "NP 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "palignr mm0, mm1, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "NP 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "ext v2.8b, v2.8b, v3.8b, #1",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "palignr mm0, mm1, 255": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "NP 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "roundps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x08"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.4s, v17.4s"
      ]
    },
    "roundps xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x08"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.4s, v17.4s"
      ]
    },
    "roundps xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x08"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.4s, v17.4s"
      ]
    },
    "roundps xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x08"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.4s, v17.4s"
      ]
    },
    "roundps xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x08"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.4s, v17.4s"
      ]
    },
    "roundpd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x09"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.2d, v17.2d"
      ]
    },
    "roundpd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x09"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.2d, v17.2d"
      ]
    },
    "roundpd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x09"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.2d, v17.2d"
      ]
    },
    "roundpd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x09"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.2d, v17.2d"
      ]
    },
    "roundpd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x09"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.2d, v17.2d"
      ]
    },
    "roundss xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintn s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "roundss xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintm s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "roundss xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintp s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "roundss xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frintz s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "roundss xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x0a"
      ],
      "ExpectedArm64ASM": [
        "frinti s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "roundsd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Nearest rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintn d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "roundsd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "-inf rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintm d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "roundsd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "+inf rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintp d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "roundsd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "truncate rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frintz d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "roundsd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "host rounding mode rounding",
        "0x66 0x0f 0x3a 0x0b"
      ],
      "ExpectedArm64ASM": [
        "frinti d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "blendps xmm0, xmm1, 0000b": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": []
    },
    "blendps xmm0, xmm1, 0001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], v17.s[0]"
      ]
    },
    "blendps xmm0, xmm1, 0010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[1], v17.s[1]"
      ]
    },
    "blendps xmm0, xmm1, 0011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[0]"
      ]
    },
    "blendps xmm0, xmm1, 0100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[2], v17.s[2]"
      ]
    },
    "blendps xmm0, xmm1, 0101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v17.4s",
        "trn2 v16.4s, v2.4s, v16.4s"
      ]
    },
    "blendps xmm0, xmm1, 0110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3200]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 0111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3216]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 1000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[3], v17.s[3]"
      ]
    },
    "blendps xmm0, xmm1, 1001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3232]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 1010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v16.4s",
        "trn2 v16.4s, v2.4s, v17.4s"
      ]
    },
    "blendps xmm0, xmm1, 1011b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3248]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 1100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "blendps xmm0, xmm1, 1101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3264]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 1110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3280]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "blendps xmm0, xmm1, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0c"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "blendpd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0x66 0x0f 0x3a 0x0d"
      ],
      "ExpectedArm64ASM": []
    },
    "blendpd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0d"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[0]"
      ]
    },
    "blendpd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0d"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "blendpd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0d"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "pblendw xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": []
    },
    "pblendw xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.h[0], v17.h[0]"
      ]
    },
    "pblendw xmm0, xmm1, 11010111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2712]",
        "ldr q2, [x0, #3440]",
        "tbx v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pblendw xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], v17.s[0]"
      ]
    },
    "pblendw xmm0, xmm1, 00001100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[1], v17.s[1]"
      ]
    },
    "pblendw xmm0, xmm1, 00110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[2], v17.s[2]"
      ]
    },
    "pblendw xmm0, xmm1, 11000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[3], v17.s[3]"
      ]
    },
    "pblendw xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[0]"
      ]
    },
    "pblendw xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "pblendw xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0e"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "palignr xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "palignr xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v17.16b, v16.16b, #1"
      ]
    },
    "palignr xmm0, xmm1, 255": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x0f"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "pextrb eax, xmm0, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x14"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[0]"
      ]
    },
    "pextrb eax, xmm0, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x14"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[15]"
      ]
    },
    "pextrw eax, xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x15"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "pextrw eax, xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x15"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "pextrd eax, xmm0, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "pextrd eax, xmm0, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "pextrq rax, xmm0, 0b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "mov x4, v16.d[0]"
      ]
    },
    "pextrq rax, xmm0, 1b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "mov x4, v16.d[1]"
      ]
    },
    "pextrb [rax], xmm0, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x14"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[0], [x4]"
      ]
    },
    "pextrb [rax], xmm0, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x14"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[15], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x15"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x15"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "pextrd [rax], xmm0, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[0], [x4]"
      ]
    },
    "pextrd [rax], xmm0, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[3], [x4]"
      ]
    },
    "pextrq [rax], xmm0, 0b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.d}[0], [x4]"
      ]
    },
    "pextrq [rax], xmm0, 1b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x16"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.d}[1], [x4]"
      ]
    },
    "extractps eax, xmm0, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x17"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "extractps eax, xmm0, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x17"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "pinsrb xmm0, eax, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "mov v16.b[0], w4"
      ]
    },
    "pinsrb xmm0, eax, 0001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "mov v16.b[1], w4"
      ]
    },
    "pinsrb xmm0, eax, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "mov v16.b[15], w4"
      ]
    },
    "pinsrb xmm0, [rax], 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.b}[0], [x4]"
      ]
    },
    "pinsrb xmm0, [rax], 0001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.b}[1], [x4]"
      ]
    },
    "pinsrb xmm0, [rax], 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x20"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.b}[15], [x4]"
      ]
    },
    "insertps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x21"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], v17.s[0]"
      ]
    },
    "insertps xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x21"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "insertps xmm0, xmm1, 00010000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x21"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[1], v17.s[0]"
      ]
    },
    "pinsrd xmm0, eax, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[0], w4"
      ]
    },
    "pinsrd xmm0, eax, 01b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[1], w4"
      ]
    },
    "pinsrd xmm0, eax, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "mov v16.s[3], w4"
      ]
    },
    "pinsrq xmm0, rax, 0b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[0], x4"
      ]
    },
    "pinsrq xmm0, rax, 1b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], x4"
      ]
    },
    "pinsrd xmm0, [rax], 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.s}[0], [x4]"
      ]
    },
    "pinsrd xmm0, [rax], 01b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.s}[1], [x4]"
      ]
    },
    "pinsrd xmm0, [rax], 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.s}[3], [x4]"
      ]
    },
    "pinsrq xmm0, [rax], 0b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[0], [x4]"
      ]
    },
    "pinsrq xmm0, [rax], 1b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 REX.W 0x0f 0x3a 0x22"
      ],
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[1], [x4]"
      ]
    },
    "dpps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 11110001b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "zip1 v16.4s, v3.4s, v2.4s"
      ]
    },
    "dpps xmm0, xmm1, 11110010b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "zip1 v16.2s, v2.2s, v3.2s"
      ]
    },
    "dpps xmm0, xmm1, 11110011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddp v2.4s, v2.4s, v2.4s",
        "faddp s2, v2.2s",
        "dup v16.2s, v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11110100b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "dpps xmm0, xmm1, 11110101b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddp v2.4s, v2.4s, v2.4s",
        "faddp s2, v2.2s",
        "zip1 v16.2d, v2.2d, v2.2d"
      ]
    },
    "dpps xmm0, xmm1, 11110110b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "mov v2.s[1], v3.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[2], v3.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11110111b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[3], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "ext v16.16b, v2.16b, v3.16b, #4"
      ]
    },
    "dpps xmm0, xmm1, 11111001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "mov v2.s[0], v3.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v3.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "zip1 v16.4s, v2.4s, v3.4s"
      ]
    },
    "dpps xmm0, xmm1, 11111011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[2], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111100b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "dpps xmm0, xmm1, 11111101b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[1], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111110b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddp v3.4s, v3.4s, v3.4s",
        "faddp s3, v3.2s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddp v2.4s, v2.4s, v2.4s",
        "faddp s2, v2.2s",
        "dup v16.4s, v2.s[0]"
      ]
    },
    "dppd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.2d, v16.2d, v17.2d",
        "faddp d2, v2.2d",
        "dup v16.2d, v2.d[0]"
      ]
    },
    "mpsadbw xmm0, xmm1, 000b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "ext v3.16b, v16.16b, v16.16b, #0",
        "ext v4.16b, v16.16b, v16.16b, #1",
        "ext v5.16b, v16.16b, v16.16b, #2",
        "ext v6.16b, v16.16b, v16.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 001b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "ext v3.16b, v16.16b, v16.16b, #0",
        "ext v4.16b, v16.16b, v16.16b, #1",
        "ext v5.16b, v16.16b, v16.16b, #2",
        "ext v6.16b, v16.16b, v16.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 010b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[2]",
        "ext v3.16b, v16.16b, v16.16b, #0",
        "ext v4.16b, v16.16b, v16.16b, #1",
        "ext v5.16b, v16.16b, v16.16b, #2",
        "ext v6.16b, v16.16b, v16.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 011b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[3]",
        "ext v3.16b, v16.16b, v16.16b, #0",
        "ext v4.16b, v16.16b, v16.16b, #1",
        "ext v5.16b, v16.16b, v16.16b, #2",
        "ext v6.16b, v16.16b, v16.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 100b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "ext v3.16b, v16.16b, v16.16b, #4",
        "ext v4.16b, v16.16b, v16.16b, #5",
        "ext v5.16b, v16.16b, v16.16b, #6",
        "ext v6.16b, v16.16b, v16.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 101b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "ext v3.16b, v16.16b, v16.16b, #4",
        "ext v4.16b, v16.16b, v16.16b, #5",
        "ext v5.16b, v16.16b, v16.16b, #6",
        "ext v6.16b, v16.16b, v16.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 110b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[2]",
        "ext v3.16b, v16.16b, v16.16b, #4",
        "ext v4.16b, v16.16b, v16.16b, #5",
        "ext v5.16b, v16.16b, v16.16b, #6",
        "ext v6.16b, v16.16b, v16.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "mpsadbw xmm0, xmm1, 111b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x66 0x0f 0x3a 0x42"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[3]",
        "ext v3.16b, v16.16b, v16.16b, #4",
        "ext v4.16b, v16.16b, v16.16b, #5",
        "ext v5.16b, v16.16b, v16.16b, #6",
        "ext v6.16b, v16.16b, v16.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/H0F3A_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "dpps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dpps xmm0, xmm1, 11110001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "zip1 v16.4s, v3.4s, v2.4s"
      ]
    },
    "dpps xmm0, xmm1, 11110010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "zip1 v16.2s, v2.2s, v3.2s"
      ]
    },
    "dpps xmm0, xmm1, 11110011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddv s2, p6, z2.s",
        "dup v16.2s, v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11110100b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "dpps xmm0, xmm1, 11110101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddv s2, p6, z2.s",
        "zip1 v16.2d, v2.2d, v2.2d"
      ]
    },
    "dpps xmm0, xmm1, 11110110b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "mov v2.s[1], v3.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[2], v3.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11110111b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[3], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "ext v16.16b, v2.16b, v3.16b, #4"
      ]
    },
    "dpps xmm0, xmm1, 11111001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "mov v2.s[0], v3.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v3.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111010b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "zip1 v16.4s, v2.4s, v3.4s"
      ]
    },
    "dpps xmm0, xmm1, 11111011b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[2], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111100b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "dpps xmm0, xmm1, 11111101b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[1], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111110b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul v3.4s, v16.4s, v17.4s",
        "faddv s3, p6, z3.s",
        "dup v3.4s, v3.s[0]",
        "mov v16.16b, v3.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "dpps xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x40"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v16.4s, v17.4s",
        "faddv s2, p6, z2.s",
        "dup v16.4s, v2.s[0]"
      ]
    },
    "dppd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "dppd xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x66 0x0f 0x3a 0x41"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.2d, v16.2d, v17.2d",
        "faddv d2, p6, z2.d",
        "dup v16.2d, v2.d[0]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/MOPS/Primary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "MOPS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "rep movsb": {
      "ExpectedInstructionCount": 109,
      "Comment": "0xa4",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0xc0",
        "cbz x0, #+0xa4",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x18",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x14",
        "ldrb w3, [x2], #1",
        "strb w3, [x1], #1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2",
        "add x20, x1, x2",
        "b #+0xdc",
        "cbz x0, #+0xc4",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x28",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x1 (1)",
        "add x2, x2, #0x1 (1)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1f (31)",
        "sub x2, x2, #0x1f (31)",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1f (31)",
        "add x2, x2, #0x1f (31)",
        "ldrb w3, [x2], #-1",
        "strb w3, [x1], #-1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2",
        "sub x20, x1, x2",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsw": {
      "ExpectedInstructionCount": 111,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0xc4",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #1",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "ldrh w3, [x2], #2",
        "strh w3, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #1",
        "add x20, x1, x2, lsl #1",
        "b #+0xe0",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #1",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x2 (2)",
        "add x2, x2, #0x2 (2)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1e (30)",
        "sub x2, x2, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1e (30)",
        "add x2, x2, #0x1e (30)",
        "ldrh w3, [x2], #-2",
        "strh w3, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #1",
        "sub x20, x1, x2, lsl #1",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsd": {
      "ExpectedInstructionCount": 111,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0xc4",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #2",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "ldr w3, [x2], #4",
        "str w3, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #2",
        "add x20, x1, x2, lsl #2",
        "b #+0xe0",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #2",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x4 (4)",
        "add x2, x2, #0x4 (4)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1c (28)",
        "sub x2, x2, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1c (28)",
        "add x2, x2, #0x1c (28)",
        "ldr w3, [x2], #-4",
        "str w3, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #2",
        "sub x20, x1, x2, lsl #2",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsq": {
      "ExpectedInstructionCount": 111,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0xc4",
        "cbz x0, #+0xa8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x1c",
        "lsl x0, x0, #3",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x7c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "ldr x3, [x2], #8",
        "str x3, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #3",
        "add x20, x1, x2, lsl #3",
        "b #+0xe0",
        "cbz x0, #+0xc8",
        "mrs x3, nzcv",
        "sub x0, x1, x2",
        "cmp x0, x7",
        "mov x0, x7",
        "bc.lt #+0x2c",
        "lsl x0, x0, #3",
        "sub x1, x1, x0",
        "sub x2, x2, x0",
        "add x1, x1, #0x8 (8)",
        "add x2, x2, #0x8 (8)",
        "cpyfp [x1]!, [x2]!, x0!",
        "cpyfm [x1]!, [x2]!, x0!",
        "cpyfe [x1]!, [x2]!, x0!",
        "msr nzcv, x3",
        "b #+0x8c",
        "msr nzcv, x3",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x18 (24)",
        "sub x2, x2, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x18 (24)",
        "add x2, x2, #0x18 (24)",
        "ldr x3, [x2], #-8",
        "str x3, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #3",
        "sub x20, x1, x2, lsl #3",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "cmpsb": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "ldrb w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20",
        "add x10, x10, x20"
      ]
    },
    "cmpsw": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "ldrh w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1",
        "add x10, x10, x20, lsl #1"
      ]
    },
    "cmpsd": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "ldr w21, [x10]",
        "eor x27, x21, x20",
        "subs w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2",
        "add x10, x10, x20, lsl #2"
      ]
    },
    "cmpsq": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "ldr x21, [x10]",
        "eor x27, x21, x20",
        "subs x26, x21, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3",
        "add x10, x10, x20, lsl #3"
      ]
    },
    "repz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "stosb": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xaa",
      "ExpectedArm64ASM": [
        "strb w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20"
      ]
    },
    "stosw": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "strh w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "stosd": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "str w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "stosq": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "str x4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "rep stosb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaa",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x24",
        "cbz x0, #+0x1c",
        "mrs x2, nzcv",
        "setp [x1]!, x0!, x20",
        "setm [x1]!, x0!, x20",
        "sete [x1]!, x0!, x20",
        "msr nzcv, x2",
        "add x11, x11, x7",
        "b #+0x28",
        "cbz x0, #+0x24",
        "sub x1, x1, x0",
        "add x1, x1, #0x1 (1)",
        "mrs x2, nzcv",
        "setp [x1]!, x0!, x20",
        "setm [x1]!, x0!, x20",
        "sete [x1]!, x0!, x20",
        "msr nzcv, x2",
        "sub x11, x11, x7",
        "mov w7, #0x0"
      ]
    },
    "rep stosw": {
      "ExpectedInstructionCount": 55,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w20",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x10",
        "strh w20, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #1",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w20",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1e (30)",
        "strh w20, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #1",
        "mov w7, #0x0"
      ]
    },
    "rep stosd": {
      "ExpectedInstructionCount": 55,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w20",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x10",
        "str w20, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #2",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w20",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1c (28)",
        "str w20, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #2",
        "mov w7, #0x0"
      ]
    },
    "rep stosq": {
      "ExpectedInstructionCount": 54,
      "Comment": [
        "Unrolling the loop for faster memset can be done.",
        "Taking advantage of ARM MOPs instructions can be done",
        "0xab"
      ],
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w20, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x10",
        "str x4, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #3",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x18 (24)",
        "str x4, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #3",
        "mov w7, #0x0"
      ]
    },
    "lodsb": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xac",
      "ExpectedArm64ASM": [
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20"
      ]
    },
    "lodsw": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #1"
      ]
    },
    "lodsd": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldr w4, [x10]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #2"
      ]
    },
    "lodsq": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldr x4, [x10]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #3"
      ]
    },
    "rep lodsb": {
      "ExpectedInstructionCount": 17,
      "Comment": "0xac",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x20",
        "cbz x7, #+0x18",
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "b #-0x14",
        "b #+0x1c",
        "cbz x7, #+0x18",
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "b #-0x14"
      ]
    },
    "rep lodsw": {
      "ExpectedInstructionCount": 17,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x20",
        "cbz x7, #+0x18",
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x2 (2)",
        "b #-0x14",
        "b #+0x1c",
        "cbz x7, #+0x18",
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x2 (2)",
        "b #-0x14"
      ]
    },
    "rep lodsd": {
      "ExpectedInstructionCount": 15,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x1c",
        "cbz x7, #+0x14",
        "ldr w4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x4 (4)",
        "b #-0x10",
        "b #+0x18",
        "cbz x7, #+0x14",
        "ldr w4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x4 (4)",
        "b #-0x10"
      ]
    },
    "rep lodsq": {
      "ExpectedInstructionCount": 15,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x1c",
        "cbz x7, #+0x14",
        "ldr x4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x8 (8)",
        "b #-0x10",
        "b #+0x18",
        "cbz x7, #+0x14",
        "ldr x4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x8 (8)",
        "b #-0x10"
      ]
    },
    "scasb": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20"
      ]
    },
    "scasw": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "scasd": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "scasq": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "repz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.eq #-0x20"
      ]
    },
    "repz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.eq #-0x20"
      ]
    },
    "repz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.eq #-0x18"
      ]
    },
    "repz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.eq #-0x18"
      ]
    },
    "repnz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.ne #-0x18"
      ]
    },
    "repnz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.ne #-0x18"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Primary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "MOPS"
    ]
  },
  "Instructions": {
    "add bl, cl": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x00",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmn w0, w7, lsl #24",
        "add w26, w6, w7",
        "bfxil x6, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "add bx, cx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmn w0, w7, lsl #16",
        "add w26, w6, w7",
        "bfxil x6, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "add ebx, ecx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "adds w26, w6, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "add rbx, rcx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x01",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "adds x26, x6, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "db 0x02, 0xcb": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0x02",
        "add bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmn w0, w6, lsl #24",
        "add w26, w7, w6",
        "bfxil x7, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x66, 0x03, 0xcb": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0x03",
        "add bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmn w0, w6, lsl #16",
        "add w26, w7, w6",
        "bfxil x7, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x03, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x03",
        "add ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "adds w26, w7, w6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x03, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x03",
        "add rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "adds x26, x7, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "add al, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x04",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add ax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add al, -1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x04",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "add ax, -1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "bfxil x4, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "add eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs w26, w4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "add rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x05",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs x26, x4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or bl, bh": {
      "ExpectedInstructionCount": 7,
      "Comment": "",
      "ExpectedArm64ASM": [
        "lsr w20, w6, #8",
        "orr w26, w6, w20",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "or bl, cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x08",
      "ExpectedArm64ASM": [
        "orr w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "or bx, cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "or ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr w6, w6, w7",
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "or rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x09",
      "ExpectedArm64ASM": [
        "orr x6, x6, x7",
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "db 0x0A, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x0A",
        "or bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x66, 0x0B, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x0B",
        "or bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x0B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x0B",
        "or ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr w7, w7, w6",
        "subs w26, w7, #0x0 (0)"
      ]
    },
    "db 0x48, 0x0B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x0B",
        "or rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "orr x7, x7, x6",
        "subs x26, x7, #0x0 (0)"
      ]
    },
    "or al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0C",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or ax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "or al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0C",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or ax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xffff",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "orr w4, w4, w20",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0D",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "orr x4, x4, x20",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc bl, cl": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x10",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxtb x20, w7",
        "cinc w21, w20, lo",
        "add w22, w6, w21",
        "uxtb w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w6, w20",
        "eor w21, w26, w6",
        "bic w20, w21, w20",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x6, x26, #0, #8",
        "msr nzcv, x22"
      ]
    },
    "adc bx, cx": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxth x20, w7",
        "cinc w21, w20, lo",
        "add w22, w6, w21",
        "uxth w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w6, w20",
        "eor w21, w26, w6",
        "bic w20, w21, w20",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x6, x26, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "adc ebx, ecx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "adcs w26, w6, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "adc rbx, rcx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x11",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "adcs x26, x6, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "db 0x12, 0xcb": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0x12",
        "adc bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxtb x20, w6",
        "cinc w21, w20, lo",
        "add w22, w7, w21",
        "uxtb w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w7, w20",
        "eor w21, w26, w7",
        "bic w20, w21, w20",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x7, x26, #0, #8",
        "msr nzcv, x22"
      ]
    },
    "db 0x66, 0x13, 0xcb": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0x13",
        "adc bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxth x20, w6",
        "cinc w21, w20, lo",
        "add w22, w7, w21",
        "uxth w26, w22",
        "cmp w26, w21",
        "cset x21, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w7, w20",
        "eor w21, w26, w7",
        "bic w20, w21, w20",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x7, x26, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "db 0x13, 0xcb": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x13",
        "adc ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "adcs w26, w7, w6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x13, 0xcb": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x13",
        "adc rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "adcs x26, x7, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "adc al, 1": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x14",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w4",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "adc ax, 1": {
      "ExpectedInstructionCount": 17,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w4",
        "ubfx x20, x20, #15, #1",
        "bfi w21, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "adc eax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc al, -1": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x14",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w4, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x21"
      ]
    },
    "adc ax, -1": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxth w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w4, w26",
        "ubfx x20, x20, #15, #1",
        "bfi w21, w20, #28, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x21"
      ]
    },
    "adc eax, -1": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "adc rax, -1": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x15",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sbb bl, cl": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x18",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxtb w20, w6",
        "uxtb x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x6, x26, #0, #8",
        "msr nzcv, x23"
      ]
    },
    "sbb bx, cx": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "uxth w20, w6",
        "uxth x21, w7",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x6, x26, #0, #16",
        "msr nzcv, x23"
      ]
    },
    "sbb ebx, ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "sbcs w26, w6, w7",
        "mov x6, x26"
      ]
    },
    "sbb rbx, rcx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x19",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "sbcs x26, x6, x7",
        "mov x6, x26"
      ]
    },
    "db 0x1A, 0xcb": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0x1A",
        "sbb bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxtb w20, w7",
        "uxtb x21, w6",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxtb w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x7, x26, #0, #8",
        "msr nzcv, x23"
      ]
    },
    "db 0x66, 0x1B, 0xcb": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0x1B",
        "sbb bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "uxth w20, w7",
        "uxth x21, w6",
        "cinc w22, w21, lo",
        "sub w23, w20, w22",
        "uxth w26, w23",
        "cmp w20, w22",
        "cset x22, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w21, w20, w21",
        "eor w20, w26, w20",
        "and w20, w20, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x7, x26, #0, #16",
        "msr nzcv, x23"
      ]
    },
    "db 0x1B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x1B",
        "sbb ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "sbcs w26, w7, w6",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x1B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x1B",
        "sbb rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "sbcs x26, x7, x6",
        "mov x7, x26"
      ]
    },
    "sbb al, 1": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x1C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "msr nzcv, x22",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb ax, 1": {
      "ExpectedInstructionCount": 18,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxth w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "msr nzcv, x22",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb al, -1": {
      "ExpectedInstructionCount": 16,
      "Comment": "0x1C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w26, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x22"
      ]
    },
    "sbb ax, -1": {
      "ExpectedInstructionCount": 16,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "uxth w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxth w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #16",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w26, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "sbb eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "sbcs w26, w4, w20",
        "mov x4, x26"
      ]
    },
    "sbb rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x1D",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "sbcs x26, x4, x20",
        "mov x4, x26"
      ]
    },
    "and bl, cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x20",
      "ExpectedArm64ASM": [
        "and w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "and bx, cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "and w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "and ebx, ecx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "ands w26, w6, w7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "and rbx, rcx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x21",
      "ExpectedArm64ASM": [
        "ands x26, x6, x7",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x26"
      ]
    },
    "db 0x22, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x22",
        "and bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "and w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x66, 0x23, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x23",
        "and bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "and w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x23, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x23",
        "and ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "ands w26, w7, w6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x23, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x23",
        "and rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "ands x26, x7, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x7, x26"
      ]
    },
    "and al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x24",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffffff01",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and ax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffff0001",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and eax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x24",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "and ax, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "and eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ands w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x25",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ands x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub bl, cl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x28",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w6, w7",
        "bfxil x6, x26, #0, #8"
      ]
    },
    "sub bx, cx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w6, w7",
        "bfxil x6, x26, #0, #16"
      ]
    },
    "sub ebx, ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs w26, w6, w7",
        "mov x6, x26"
      ]
    },
    "sub rbx, rcx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x29",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs x26, x6, x7",
        "mov x6, x26"
      ]
    },
    "db 0x2A, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x2A",
        "sub bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmp w0, w6, lsl #24",
        "sub w26, w7, w6",
        "bfxil x7, x26, #0, #8"
      ]
    },
    "db 0x66, 0x2B, 0xcb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0x2B",
        "sub bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmp w0, w6, lsl #16",
        "sub w26, w7, w6",
        "bfxil x7, x26, #0, #16"
      ]
    },
    "db 0x2B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x2B",
        "sub ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs w26, w7, w6",
        "mov x7, x26"
      ]
    },
    "db 0x48, 0x2B, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x2B",
        "sub rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs x26, x7, x6",
        "mov x7, x26"
      ]
    },
    "sub al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x2C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sub ax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sub eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x2C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sub ax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sub eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "sub rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x2D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "xor bl, cl": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x30",
      "ExpectedArm64ASM": [
        "eor w26, w6, w7",
        "cmn wzr, w26, lsl #24",
        "bfxil x6, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xor bx, cx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor w26, w6, w7",
        "cmn wzr, w26, lsl #16",
        "bfxil x6, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xor ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor w6, w6, w7",
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "xor rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x31",
      "ExpectedArm64ASM": [
        "eor x6, x6, x7",
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "db 0x32, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x32",
        "xor bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w26, w7, w6",
        "cmn wzr, w26, lsl #24",
        "bfxil x7, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x66, 0x33, 0xcb": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x33",
        "xor bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w26, w7, w6",
        "cmn wzr, w26, lsl #16",
        "bfxil x7, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0x33, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x33",
        "xor ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor w7, w7, w6",
        "subs w26, w7, #0x0 (0)"
      ]
    },
    "db 0x48, 0x33, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x33",
        "xor rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x7, x7, x6",
        "subs x26, x7, #0x0 (0)"
      ]
    },
    "xor al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x34",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "xor ax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "xor eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp bl, cl": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x38",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #24",
        "cmp w0, w7, lsl #24",
        "sub w26, w6, w7"
      ]
    },
    "xor al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x34",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "xor ax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xffff",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "xor eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "mvn w4, w4",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x35",
      "ExpectedArm64ASM": [
        "mvn x4, x4",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp bx, cx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "lsl w0, w6, #16",
        "cmp w0, w7, lsl #16",
        "sub w26, w6, w7"
      ]
    },
    "cmp ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs w26, w6, w7"
      ]
    },
    "cmp rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x39",
      "ExpectedArm64ASM": [
        "eor x27, x6, x7",
        "subs x26, x6, x7"
      ]
    },
    "db 0x3A, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x3A",
        "cmp bl, cl but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #24",
        "cmp w0, w6, lsl #24",
        "sub w26, w7, w6"
      ]
    },
    "db 0x66, 0x3B, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x3B",
        "cmp bx, cx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "lsl w0, w7, #16",
        "cmp w0, w6, lsl #16",
        "sub w26, w7, w6"
      ]
    },
    "db 0x3B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x3B",
        "cmp ebx, ecx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs w26, w7, w6"
      ]
    },
    "db 0x48, 0x3B, 0xcb": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x3B",
        "cmp rbx, rcx but modrm.rm as source"
      ],
      "ExpectedArm64ASM": [
        "eor x27, x7, x6",
        "subs x26, x7, x6"
      ]
    },
    "cmp al, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3C",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp ax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3C",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)"
      ]
    },
    "cmp ax, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20"
      ]
    },
    "cmp eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)"
      ]
    },
    "cmp rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x3D",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)"
      ]
    },
    "push ax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "strh w4, [x8, #-2]!"
      ]
    },
    "push rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "str x4, [x8, #-8]!"
      ]
    },
    "pop ax": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8f",
      "ExpectedArm64ASM": [
        "ldrh w20, [x8], #2",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "pop rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8f",
      "ExpectedArm64ASM": [
        "ldr x4, [x8], #8"
      ]
    },
    "movsxd rax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x63",
      "ExpectedArm64ASM": [
        "sxtw x4, w6"
      ]
    },
    "push word 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x68",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "strh w20, [x8, #-2]!"
      ]
    },
    "push qword 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x68",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "str x20, [x8, #-8]!"
      ]
    },
    "imul ax, bx, 257": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "sxth x21, w6",
        "mul x20, x21, x20",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx, 257": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "smull x21, w6, w20",
        "asr x21, x21, #32",
        "mul w4, w6, w20",
        "sbfx x20, x4, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx, 257": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x69",
      "ExpectedArm64ASM": [
        "mov w20, #0x101",
        "smulh x21, x6, x20",
        "mul x4, x6, x20",
        "asr x20, x4, #63",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "push word -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x6a",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "strh w20, [x8, #-2]!"
      ]
    },
    "push dword -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x6a",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "str x20, [x8, #-8]!"
      ]
    },
    "push qword -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x6a",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "str x20, [x8, #-8]!"
      ]
    },
    "imul ax, bx, 3": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "sxth x21, w6",
        "mul x20, x21, x20",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx, 3": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "smull x21, w6, w20",
        "asr x21, x21, #32",
        "mul w4, w6, w20",
        "sbfx x20, x4, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx, 3": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x6b",
      "ExpectedArm64ASM": [
        "mov w20, #0x3",
        "smulh x21, x6, x20",
        "mul x4, x6, x20",
        "asr x20, x4, #63",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "test al, bl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "and w26, w4, w6",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test ax, bx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "and w26, w4, w6",
        "cmn wzr, w26, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "ands w26, w4, w6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test rax, rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "ands x26, x4, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test al, al": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "test ax, ax": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "test eax, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "test rax, rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x84",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "db 0x86, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x86",
        "xchg bl, cl"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "bfxil x20, x7, #0, #8",
        "bfxil x7, x6, #0, #8",
        "mov x6, x20"
      ]
    },
    "db 0x86, 0xd9": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x86",
        "xchg cl, bl"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x7",
        "bfxil x20, x6, #0, #8",
        "bfxil x6, x7, #0, #8",
        "mov x7, x20"
      ]
    },
    "xchg [rax], cl": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x86",
      "ExpectedArm64ASM": [
        "swpalb w7, w20, [x4]",
        "bfxil x7, x20, #0, #8"
      ]
    },
    "db 0x66, 0x87, 0xcb": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x87",
        "xchg bx, cx"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "bfxil x20, x7, #0, #16",
        "bfxil x7, x6, #0, #16",
        "mov x6, x20"
      ]
    },
    "db 0x66, 0x87, 0xd9": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0x87",
        "xchg cx, bx"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x7",
        "bfxil x20, x6, #0, #16",
        "bfxil x6, x7, #0, #16",
        "mov x7, x20"
      ]
    },
    "xchg [rax], cx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpalh w7, w20, [x4]",
        "bfxil x7, x20, #0, #16"
      ]
    },
    "db 0x87, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x87",
        "xchg ebx, ecx"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w7",
        "mov w7, w6",
        "mov x6, x20"
      ]
    },
    "db 0x87, 0xd9": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x87",
        "xchg ecx, ebx"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov w6, w7",
        "mov x7, x20"
      ]
    },
    "xchg [rax], ecx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpal w7, w7, [x4]"
      ]
    },
    "db 0x48, 0x87, 0xcb": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x87",
        "xchg rbx, rcx"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "mov x6, x7",
        "mov x7, x20"
      ]
    },
    "db 0x48, 0x87, 0xd9": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x87",
        "xchg rcx, rbx"
      ],
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "mov x6, x7",
        "mov x7, x20"
      ]
    },
    "xchg [rax], rcx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x87",
      "ExpectedArm64ASM": [
        "swpal x7, x7, [x4]"
      ]
    },
    "mov [rax], bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x88",
      "ExpectedArm64ASM": [
        "strb w6, [x4]"
      ]
    },
    "mov [rax], bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x89",
      "ExpectedArm64ASM": [
        "strh w6, [x4]"
      ]
    },
    "mov [rax], ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x89",
      "ExpectedArm64ASM": [
        "str w6, [x4]"
      ]
    },
    "mov [rax], rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x89",
      "ExpectedArm64ASM": [
        "str x6, [x4]"
      ]
    },
    "mov bl, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8a",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "bfxil x6, x20, #0, #8"
      ]
    },
    "mov bx, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8b",
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "bfxil x6, x20, #0, #16"
      ]
    },
    "mov ebx, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8b",
      "ExpectedArm64ASM": [
        "ldr w6, [x4]"
      ]
    },
    "mov rbx, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8b",
      "ExpectedArm64ASM": [
        "ldr x6, [x4]"
      ]
    },
    "mov ax, cs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #962]",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "mov eax, cs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #962]"
      ]
    },
    "mov rax, cs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #962]"
      ]
    },
    "mov ax, es": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #960]",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "mov eax, es": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #960]"
      ]
    },
    "mov rax, es": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #960]"
      ]
    },
    "mov ax, ss": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #964]",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "mov eax, ss": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #964]"
      ]
    },
    "mov rax, ss": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #964]"
      ]
    },
    "mov ax, ds": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #966]",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "mov eax, ds": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #966]"
      ]
    },
    "mov rax, ds": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "ldrh w4, [x28, #966]"
      ]
    },
    "mov ax, gs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "and x4, x4, #0xffffffffffff0000"
      ]
    },
    "mov eax, gs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "mov rax, gs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "mov ax, fs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "and x4, x4, #0xffffffffffff0000"
      ]
    },
    "mov eax, fs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "mov rax, fs": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8c",
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "lea ax, [rbx+rcx*1 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [rbx+rcx*1 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7",
        "mov w4, w20"
      ]
    },
    "lea rax, [rbx+rcx*1 + 0]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x4, x6, x7"
      ]
    },
    "lea ax, [rbx+rcx*2 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #1",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [rbx+rcx*2 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #1",
        "mov w4, w20"
      ]
    },
    "lea rax, [rbx+rcx*2 + 0]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x4, x6, x7, lsl #1"
      ]
    },
    "lea ax, [rbx+rcx*4 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #2",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [rbx+rcx*4 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #2",
        "mov w4, w20"
      ]
    },
    "lea rax, [rbx+rcx*4 + 0]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x4, x6, x7, lsl #2"
      ]
    },
    "lea ax, [rbx+rcx*8 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #3",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [rbx+rcx*8 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #3",
        "mov w4, w20"
      ]
    },
    "lea rax, [rbx+rcx*8 + 0]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x4, x6, x7, lsl #3"
      ]
    },
    "lea ax, [ebx+ecx*1 + 0]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7",
        "mov w20, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [ebx+ecx*1 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7",
        "mov w4, w20"
      ]
    },
    "lea rax, [ebx+ecx*1 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7",
        "mov w4, w20"
      ]
    },
    "lea ax, [ebx+ecx*2 + 0]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #1",
        "mov w20, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [ebx+ecx*2 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #1",
        "mov w4, w20"
      ]
    },
    "lea rax, [ebx+ecx*2 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #1",
        "mov w4, w20"
      ]
    },
    "lea ax, [ebx+ecx*4 + 0]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #2",
        "mov w20, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [ebx+ecx*4 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #2",
        "mov w4, w20"
      ]
    },
    "lea rax, [ebx+ecx*4 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #2",
        "mov w4, w20"
      ]
    },
    "lea ax, [ebx+ecx*8 + 0]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #3",
        "mov w20, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lea eax, [ebx+ecx*8 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #3",
        "mov w4, w20"
      ]
    },
    "lea rax, [ebx+ecx*8 + 0]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8d",
      "ExpectedArm64ASM": [
        "add x20, x6, x7, lsl #3",
        "mov w4, w20"
      ]
    },
    "mov cs, ax": {
      "ExpectedInstructionCount": 4,
      "Skip": "Yes",
      "Comment": "0x8e"
    },
    "mov es, ax": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x8e",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "strh w20, [x28, #960]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #976]"
      ]
    },
    "mov ss, ax": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x8e",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "strh w20, [x28, #964]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #984]"
      ]
    },
    "mov ds, ax": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x8e",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "strh w20, [x28, #966]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #988]"
      ]
    },
    "mov gs, ax": {
      "ExpectedInstructionCount": 0,
      "Skip": "Yes",
      "Comment": "0x8e"
    },
    "mov fs, ax": {
      "ExpectedInstructionCount": 0,
      "Skip": "Yes",
      "Comment": "0x8e"
    },
    "pop word [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8f",
      "ExpectedArm64ASM": [
        "ldrh w20, [x8], #2",
        "strh w20, [x4]"
      ]
    },
    "pop qword [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x8f",
      "ExpectedArm64ASM": [
        "ldr x20, [x8], #8",
        "str x20, [x4]"
      ]
    },
    "xchg ax, bx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x90",
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "bfxil x20, x4, #0, #16",
        "bfxil x4, x6, #0, #16",
        "mov x6, x20"
      ]
    },
    "xchg eax, ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x90",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "mov w4, w6",
        "mov x6, x20"
      ]
    },
    "xchg rax, rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x90",
      "ExpectedArm64ASM": [
        "mov x20, x6",
        "mov x6, x4",
        "mov x4, x20"
      ]
    },
    "nop": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x90",
      "ExpectedArm64ASM": []
    },
    "pause": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xF3 0x90",
      "ExpectedArm64ASM": [
        "yield"
      ]
    },
    "cbw": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x98",
      "ExpectedArm64ASM": [
        "sxtb w20, w4",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cwde": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x98",
      "ExpectedArm64ASM": [
        "sxth w4, w4"
      ]
    },
    "cdqe": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x98",
      "ExpectedArm64ASM": [
        "sxtw x4, w4"
      ]
    },
    "cwd": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x98",
      "ExpectedArm64ASM": [
        "sbfx w20, w4, #15, #1",
        "bfxil x5, x20, #0, #16"
      ]
    },
    "cdq": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x99",
      "ExpectedArm64ASM": [
        "asr w5, w4, #31"
      ]
    },
    "cqo": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x99",
      "ExpectedArm64ASM": [
        "asr x5, x4, #63"
      ]
    },
    "fwait": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x9b",
      "ExpectedArm64ASM": []
    },
    "pushf": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "str x20, [x8, #-8]!"
      ]
    },
    "pushfq": {
      "ExpectedInstructionCount": 39,
      "Comment": "0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "ldrb w21, [x28, #1016]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1017]",
        "orr x20, x20, x21, lsl #9",
        "ldrsb x21, [x28, #1018]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #10",
        "cset x21, vs",
        "orr x20, x20, x21, lsl #11",
        "ldrb w21, [x28, #1020]",
        "orr x20, x20, x21, lsl #12",
        "ldrb w21, [x28, #1022]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1024]",
        "orr x20, x20, x21, lsl #16",
        "ldrb w21, [x28, #1025]",
        "orr x20, x20, x21, lsl #17",
        "ldrb w21, [x28, #1026]",
        "orr x20, x20, x21, lsl #18",
        "ldrb w21, [x28, #1027]",
        "orr x20, x20, x21, lsl #19",
        "ldrb w21, [x28, #1028]",
        "orr x20, x20, x21, lsl #20",
        "ldrb w21, [x28, #1029]",
        "orr x20, x20, x21, lsl #21",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "str x20, [x8, #-8]!"
      ]
    },
    "popf": {
      "ExpectedInstructionCount": 44,
      "Comment": "0x9d",
      "ExpectedArm64ASM": [
        "ldr x20, [x8], #8",
        "mov w21, #0x202",
        "orr x27, x20, x21",
        "mvn w20, w27",
        "ubfx x21, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "ubfx w26, w20, #2, #1",
        "ubfx x20, x27, #6, #1",
        "bfi w22, w20, #30, #1",
        "ubfx x20, x27, #7, #1",
        "bfi w22, w20, #31, #1",
        "ubfx w20, w27, #8, #1",
        "ldrb w21, [x28, #1016]",
        "and w21, w21, #0xfffffffe",
        "mov w23, #0x1",
        "cmp x20, #0x0 (0)",
        "csel x20, x21, x23, eq",
        "strb w20, [x28, #1016]",
        "ubfx w20, w27, #9, #1",
        "strb w20, [x28, #1017]",
        "ubfx w20, w27, #10, #1",
        "sub x20, x23, x20, lsl #1",
        "ubfx x21, x27, #11, #1",
        "bfi w22, w21, #28, #1",
        "ubfx w21, w27, #12, #1",
        "strb w21, [x28, #1020]",
        "ubfx w21, w27, #14, #1",
        "strb w21, [x28, #1022]",
        "ubfx w21, w27, #16, #1",
        "strb w21, [x28, #1024]",
        "ubfx w21, w27, #17, #1",
        "strb w21, [x28, #1025]",
        "ubfx w21, w27, #18, #1",
        "strb w21, [x28, #1026]",
        "ubfx w21, w27, #19, #1",
        "strb w21, [x28, #1027]",
        "ubfx w21, w27, #20, #1",
        "strb w21, [x28, #1028]",
        "ubfx w21, w27, #21, #1",
        "strb w21, [x28, #1029]",
        "mov w21, #0x10001",
        "msr nzcv, x22",
        "strb w20, [x28, #1018]"
      ]
    },
    "sahf": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x9e",
      "ExpectedArm64ASM": [
        "ubfx w20, w4, #8, #8",
        "mov w21, #0x28",
        "bic x20, x20, x21",
        "orr x27, x20, #0x2",
        "mvn w20, w27",
        "ubfx x21, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "ubfx w26, w20, #2, #1",
        "ubfx x20, x27, #6, #1",
        "bfi w22, w20, #30, #1",
        "ubfx x20, x27, #7, #1",
        "bfi w22, w20, #31, #1",
        "msr nzcv, x22"
      ]
    },
    "lahf": {
      "ExpectedInstructionCount": 14,
      "Comment": "0x9f",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x20, x21, lsl #4",
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w21, w0, w0, lsr #1",
        "orr x21, x21, #0xfffffffffffffffe",
        "orn x20, x20, x21, ror #62",
        "mrs x21, nzcv",
        "and x21, x21, #0xc0000000",
        "orr x20, x20, x21, lsr #24",
        "orr x20, x20, #0x2",
        "bfi x4, x20, #8, #8"
      ]
    },
    "mov rax, [qword 0xe0000008]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xa1"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x8",
        "movk w20, #0xe000, lsl #16",
        "ldr x4, [x20]"
      ]
    },
    "mov eax, [qword 0xe0000000]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xa1"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0xe0000000",
        "ldr w4, [x20]"
      ]
    },
    "mov [qword 0xe0000008], rax": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xa3"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x8",
        "movk w20, #0xe000, lsl #16",
        "str x4, [x20]"
      ]
    },
    "mov [qword 0xe0000000], eax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xa3"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0xe0000000",
        "str w4, [x20]"
      ]
    },
    "movsb": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xa4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x10]",
        "strb w20, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20",
        "add x11, x11, x20"
      ]
    },
    "movsw": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xa5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x10]",
        "strh w20, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #1",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "movsd": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xa5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x10]",
        "str w20, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #2",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "movsq": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xa5"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x10]",
        "str x20, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #3",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "rep movsb": {
      "ExpectedInstructionCount": 83,
      "Comment": "0xa4",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0x94",
        "cbz x0, #+0x78",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x14",
        "ldrb w3, [x2], #1",
        "strb w3, [x1], #1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2",
        "add x20, x1, x2",
        "b #+0xa0",
        "cbz x0, #+0x88",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1f (31)",
        "sub x2, x2, #0x1f (31)",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1f (31)",
        "add x2, x2, #0x1f (31)",
        "ldrb w3, [x2], #-1",
        "strb w3, [x1], #-1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2",
        "sub x20, x1, x2",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsw": {
      "ExpectedInstructionCount": 83,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0x94",
        "cbz x0, #+0x78",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "ldrh w3, [x2], #2",
        "strh w3, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #1",
        "add x20, x1, x2, lsl #1",
        "b #+0xa0",
        "cbz x0, #+0x88",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1e (30)",
        "sub x2, x2, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1e (30)",
        "add x2, x2, #0x1e (30)",
        "ldrh w3, [x2], #-2",
        "strh w3, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #1",
        "sub x20, x1, x2, lsl #1",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsd": {
      "ExpectedInstructionCount": 83,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0x94",
        "cbz x0, #+0x78",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "ldr w3, [x2], #4",
        "str w3, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #2",
        "add x20, x1, x2, lsl #2",
        "b #+0xa0",
        "cbz x0, #+0x88",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x1c (28)",
        "sub x2, x2, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x1c (28)",
        "add x2, x2, #0x1c (28)",
        "ldr w3, [x2], #-4",
        "str w3, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #2",
        "sub x20, x1, x2, lsl #2",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "rep movsq": {
      "ExpectedInstructionCount": 83,
      "Comment": "0xa5",
      "ExpectedArm64ASM": [
        "ldrsb x22, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "mov x2, x10",
        "tbnz w22, #1, #+0x94",
        "cbz x0, #+0x78",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x54",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x34",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #32",
        "stp q0, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "ldr x3, [x2], #8",
        "str x3, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "add x21, x0, x2, lsl #3",
        "add x20, x1, x2, lsl #3",
        "b #+0xa0",
        "cbz x0, #+0x88",
        "sub x3, x1, x2",
        "tbz x3, #63, #+0x8",
        "neg x3, x3",
        "sub x3, x3, #0x20 (32)",
        "tbnz x3, #63, #+0x64",
        "sub x1, x1, #0x18 (24)",
        "sub x2, x2, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x44",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x1c",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x14",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x3c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "ldp q0, q1, [x2], #-32",
        "stp q0, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x1c",
        "add x1, x1, #0x18 (24)",
        "add x2, x2, #0x18 (24)",
        "ldr x3, [x2], #-8",
        "str x3, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0xc",
        "mov x0, x11",
        "mov x1, x10",
        "mov x2, x7",
        "sub x21, x0, x2, lsl #3",
        "sub x20, x1, x2, lsl #3",
        "mov w7, #0x0",
        "mov x11, x21",
        "mov x10, x20"
      ]
    },
    "cmpsb": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "ldrb w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20",
        "add x10, x10, x20"
      ]
    },
    "cmpsw": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "ldrh w21, [x10]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1",
        "add x10, x10, x20, lsl #1"
      ]
    },
    "cmpsd": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "ldr w21, [x10]",
        "eor x27, x21, x20",
        "subs w26, w21, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2",
        "add x10, x10, x20, lsl #2"
      ]
    },
    "cmpsq": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xa7"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "ldr x21, [x10]",
        "eor x27, x21, x20",
        "subs x26, x21, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3",
        "add x10, x10, x20, lsl #3"
      ]
    },
    "repz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nzcv, ne",
        "b.eq #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsb": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa6",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrb w26, [x11]",
        "ldrb w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #24",
        "cmp w0, w26, lsl #24",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsw": {
      "ExpectedInstructionCount": 25,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x64",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "add x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldrh w26, [x11]",
        "ldrh w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "sub x10, x10, #0x2 (2)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "lsl w0, w27, #16",
        "cmp w0, w26, lsl #16",
        "sub w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsd": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "add x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr w26, [x11]",
        "ldr w27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "sub x10, x10, #0x4 (4)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs w26, w27, w26",
        "mov x27, x20"
      ]
    },
    "repnz cmpsq": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xa7",
      "ExpectedArm64ASM": [
        "cbz x7, #+0x5c",
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "add x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "b #+0x20",
        "ldr x26, [x11]",
        "ldr x27, [x10]",
        "subs x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "sub x10, x10, #0x8 (8)",
        "ccmp x27, x26, #nZcv, ne",
        "b.ne #-0x18",
        "eor x20, x27, x26",
        "subs x26, x27, x26",
        "mov x27, x20"
      ]
    },
    "test al, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa8",
      "ExpectedArm64ASM": [
        "and w26, w4, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test ax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "and w26, w4, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xa8",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "test ax, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "test eax, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "test rax, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xa9",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "stosb": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xaa",
      "ExpectedArm64ASM": [
        "strb w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20"
      ]
    },
    "stosw": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "strh w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "stosd": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "str w4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "stosq": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "str x4, [x11]",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "rep stosb": {
      "ExpectedInstructionCount": 55,
      "Comment": "0xaa",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.16b, w20",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x10",
        "strb w20, [x1], #1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1f (31)",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.16b, w20",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x40 (64)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x40 (64)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x20 (32)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1f (31)",
        "strb w20, [x1], #-1",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7",
        "mov w7, #0x0"
      ]
    },
    "rep stosw": {
      "ExpectedInstructionCount": 55,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w20",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x10",
        "strh w20, [x1], #2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #1",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1e (30)",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.8h, w20",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x20 (32)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x20 (32)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x10 (16)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1e (30)",
        "strh w20, [x1], #-2",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #1",
        "mov w7, #0x0"
      ]
    },
    "rep stosd": {
      "ExpectedInstructionCount": 55,
      "Comment": "0xab",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "ldrsb x21, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w21, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w20",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x10",
        "str w20, [x1], #4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #2",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x1c (28)",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.4s, w20",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x10 (16)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x10 (16)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x8 (8)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x1c (28)",
        "str w20, [x1], #-4",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #2",
        "mov w7, #0x0"
      ]
    },
    "rep stosq": {
      "ExpectedInstructionCount": 54,
      "Comment": [
        "Unrolling the loop for faster memset can be done.",
        "Taking advantage of ARM MOPs instructions can be done",
        "0xab"
      ],
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "mov x0, x7",
        "mov x1, x11",
        "tbnz w20, #1, #+0x64",
        "cbz x0, #+0x58",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #32",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x2c",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x10",
        "str x4, [x1], #8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "add x11, x11, x7, lsl #3",
        "b #+0x68",
        "cbz x0, #+0x60",
        "sub x1, x1, #0x18 (24)",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x3c",
        "dup v1.2d, x4",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x14",
        "stp q1, q1, [x1], #-32",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x8 (8)",
        "tbz x0, #63, #-0xc",
        "add x0, x0, #0x8 (8)",
        "cbz x0, #+0x30",
        "sub x0, x0, #0x4 (4)",
        "tbnz x0, #63, #+0x10",
        "stp q1, q1, [x1], #-32",
        "sub x0, x0, #0x4 (4)",
        "tbz x0, #63, #-0x8",
        "add x0, x0, #0x4 (4)",
        "cbz x0, #+0x14",
        "add x1, x1, #0x18 (24)",
        "str x4, [x1], #-8",
        "sub x0, x0, #0x1 (1)",
        "cbnz x0, #-0x8",
        "sub x11, x11, x7, lsl #3",
        "mov w7, #0x0"
      ]
    },
    "lodsb": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xac",
      "ExpectedArm64ASM": [
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20"
      ]
    },
    "lodsw": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #1"
      ]
    },
    "lodsd": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldr w4, [x10]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #2"
      ]
    },
    "lodsq": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldr x4, [x10]",
        "ldrsb x20, [x28, #1018]",
        "add x10, x10, x20, lsl #3"
      ]
    },
    "rep lodsb": {
      "ExpectedInstructionCount": 17,
      "Comment": "0xac",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x20",
        "cbz x7, #+0x18",
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x1 (1)",
        "b #-0x14",
        "b #+0x1c",
        "cbz x7, #+0x18",
        "ldrb w20, [x10]",
        "bfxil x4, x20, #0, #8",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x1 (1)",
        "b #-0x14"
      ]
    },
    "rep lodsw": {
      "ExpectedInstructionCount": 17,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x20",
        "cbz x7, #+0x18",
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x2 (2)",
        "b #-0x14",
        "b #+0x1c",
        "cbz x7, #+0x18",
        "ldrh w20, [x10]",
        "bfxil x4, x20, #0, #16",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x2 (2)",
        "b #-0x14"
      ]
    },
    "rep lodsd": {
      "ExpectedInstructionCount": 15,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x1c",
        "cbz x7, #+0x14",
        "ldr w4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x4 (4)",
        "b #-0x10",
        "b #+0x18",
        "cbz x7, #+0x14",
        "ldr w4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x4 (4)",
        "b #-0x10"
      ]
    },
    "rep lodsq": {
      "ExpectedInstructionCount": 15,
      "Comment": "0xad",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x1c",
        "cbz x7, #+0x14",
        "ldr x4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "add x10, x10, #0x8 (8)",
        "b #-0x10",
        "b #+0x18",
        "cbz x7, #+0x14",
        "ldr x4, [x10]",
        "sub x7, x7, #0x1 (1)",
        "sub x10, x10, #0x8 (8)",
        "b #-0x10"
      ]
    },
    "scasb": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20"
      ]
    },
    "scasw": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #1"
      ]
    },
    "scasd": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #2"
      ]
    },
    "scasq": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "ldrsb x20, [x28, #1018]",
        "add x11, x11, x20, lsl #3"
      ]
    },
    "repz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.eq #-0x20"
      ]
    },
    "repz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.eq #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.eq #-0x20"
      ]
    },
    "repz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.eq #-0x18"
      ]
    },
    "repz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.eq #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.eq #-0x18"
      ]
    },
    "repnz scasb": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xae",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x1 (1)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrb w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x1 (1)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasw": {
      "ExpectedInstructionCount": 23,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x2c",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x2 (2)",
        "b.ne #-0x20",
        "b #+0x28",
        "cbz x7, #+0x24",
        "ldrh w20, [x11]",
        "eor x27, x4, x20",
        "lsl w0, w4, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x2 (2)",
        "b.ne #-0x20"
      ]
    },
    "repnz scasd": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x4 (4)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr w20, [x11]",
        "eor x27, x4, x20",
        "subs w26, w4, w20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x4 (4)",
        "b.ne #-0x18"
      ]
    },
    "repnz scasq": {
      "ExpectedInstructionCount": 19,
      "Comment": "0xaf",
      "ExpectedArm64ASM": [
        "ldrsb x20, [x28, #1018]",
        "lsr x20, x20, #63",
        "cbz x20, #+0x8",
        "b #+0x24",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "add x11, x11, #0x8 (8)",
        "b.ne #-0x18",
        "b #+0x20",
        "cbz x7, #+0x1c",
        "ldr x20, [x11]",
        "eor x27, x4, x20",
        "subs x26, x4, x20",
        "sub x7, x7, #0x1 (1)",
        "sub x11, x11, #0x8 (8)",
        "b.ne #-0x18"
      ]
    },
    "mov al, 0x0": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "and x4, x4, #0xffffffffffffff00"
      ]
    },
    "xor al, al": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "subs w26, w4, w4",
        "and x4, x4, #0xffffffffffffff00"
      ]
    },
    "mov ah, 0x0": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "and x4, x4, #0xffffffffffff00ff"
      ]
    },
    "xor ah, ah": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "subs w26, w4, w4",
        "and x4, x4, #0xffffffffffff00ff"
      ]
    },
    "mov al, 0xff": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xb0",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0xff"
      ]
    },
    "mov al, 0x82": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xb0",
      "ExpectedArm64ASM": [
        "mov w20, #0x82",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "mov ah, 0xff": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "orr x4, x4, #0xff00"
      ]
    },
    "mov ax, 0xffff": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xb8",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0xffff"
      ]
    },
    "mov ax, 0x4243": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xb8",
      "ExpectedArm64ASM": [
        "mov w20, #0x4243",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "mov ax, 0x0": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "and x4, x4, #0xffffffffffff0000"
      ]
    },
    "xor ax, ax": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "subs w26, w4, w4",
        "and x4, x4, #0xffffffffffff0000"
      ]
    },
    "mov eax, 0x0": {
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "xor eax, eax": {
      "ExpectedInstructionCount": 2,
      "ExpectedArm64ASM": [
        "subs w26, w4, w4",
        "mov w4, #0x0"
      ]
    },
    "mov eax, 0xffffffff": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "movz+movk doesn't turn in to bitfield move",
        "0xb8"
      ],
      "ExpectedArm64ASM": [
        "mov w4, #0xffffffff"
      ]
    },
    "mov eax, 0x44454647": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xb8",
      "ExpectedArm64ASM": [
        "mov w4, #0x4647",
        "movk w4, #0x4445, lsl #16"
      ]
    },
    "mov rax, 0x0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xb0",
      "ExpectedArm64ASM": [
        "mov w4, #0x0"
      ]
    },
    "xor rax, rax": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xb0",
      "ExpectedArm64ASM": [
        "subs w26, w4, w4",
        "mov w4, #0x0"
      ]
    },
    "mov rax, 0xffffffffffffffff": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xb8",
      "ExpectedArm64ASM": [
        "mov x4, #0xffffffffffffffff"
      ]
    },
    "mov rax, 0x5152535455565758": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xb8",
      "ExpectedArm64ASM": [
        "mov x4, #0x5758",
        "movk x4, #0x5556, lsl #16",
        "movk x4, #0x5354, lsl #32",
        "movk x4, #0x5152, lsl #48"
      ]
    },
    "xlat": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xd7",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "ldrb w20, [x6, x20, sxtx]",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "retf 0x1234": {
      "ExpectedInstructionCount": 15,
      "Comment": "0xca",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "ldp w21, w22, [x20], #8",
        "mov w23, #0x1234",
        "add x8, x20, x23",
        "strh w22, [x28, #962]",
        "ubfx w20, w22, #2, #1",
        "and w22, w22, #0xfff8",
        "add x0, x28, x20, lsl #3",
        "ldr x20, [x0, #1184]",
        "ldr x20, [x20, w22, uxtw]",
        "lsr x22, x20, #32",
        "and w23, w22, #0xff000000",
        "orr w20, w23, w20, lsr #16",
        "bfi w20, w22, #16, #8",
        "str w20, [x28, #980]"
      ]
    },
    "retf": {
      "ExpectedInstructionCount": 12,
      "Comment": "0xcb",
      "ExpectedArm64ASM": [
        "ldp w20, w21, [x8], #8",
        "strh w21, [x28, #962]",
        "ubfx w22, w21, #2, #1",
        "and w21, w21, #0xfff8",
        "add x0, x28, x22, lsl #3",
        "ldr x22, [x0, #1184]",
        "ldr x21, [x22, w21, uxtw]",
        "lsr x22, x21, #32",
        "and w23, w22, #0xff000000",
        "orr w21, w23, w21, lsr #16",
        "bfi w21, w22, #16, #8",
        "str w21, [x28, #980]"
      ]
    },
    "cmc": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf5",
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "clc": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf8",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "stc": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf9",
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "cli": {
      "ExpectedInstructionCount": 7,
      "Skip": "Yes",
      "Comment": "0xfa"
    },
    "sti": {
      "ExpectedInstructionCount": 7,
      "Skip": "Yes",
      "Comment": "0xfb"
    },
    "cld": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xfc",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "strb w20, [x28, #1018]"
      ]
    },
    "std": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xfd",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "strb w20, [x28, #1018]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/PrimaryGroup.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Comment": [
    "Instructions in this table that are marked optimal don't have their flag calculation part of this assumption",
    "Flags calculation can dramatically change an instruction's lengths so this is mostly ignored here."
  ],
  "Instructions": {
    "add al, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "or al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "adc al, 1": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w26, w4",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "sbb al, 1": {
      "ExpectedInstructionCount": 18,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w21, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "msr nzcv, x22",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "and al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "and x26, x4, #0xffffffffffffff01",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub al, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #8",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "xor al, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0x1",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "cmp al, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /7",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "add al, -1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x80 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "or al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /1",
      "ExpectedArm64ASM": [
        "orr x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "adc al, -1": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP1 0x80 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "cinc w20, w20, lo",
        "add w21, w4, w20",
        "uxtb w26, w21",
        "cmp w26, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "bic w20, w4, w26",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x21"
      ]
    },
    "sbb al, -1": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP1 0x80 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "uxtb w21, w4",
        "cinc w20, w20, lo",
        "sub w22, w21, w20",
        "uxtb w26, w22",
        "cmp w21, w20",
        "cset x20, hs",
        "cmn wzr, w26, lsl #24",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bic w20, w26, w21",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x22"
      ]
    },
    "and al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /4",
      "ExpectedArm64ASM": [
        "cmn wzr, w4, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x4"
      ]
    },
    "sub al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /5",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "xor al, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x80 /6",
      "ExpectedArm64ASM": [
        "eor x26, x4, #0xff",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "cmp al, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x80 /7",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "mvn w27, w4",
        "lsl w0, w4, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w4, #0xff (255)"
      ]
    },
    "add ax, 256": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x100 (256)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x100",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x100",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, 256": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 256": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x100",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, 256": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x100",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, 256": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x100",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub eax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x100",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x100",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "add ax, -256": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff00",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, -256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, -256": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x81 /0",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x100 (256)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0xffffff00",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0xffffffffffffff00",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, -256": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffff00",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, -256": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x81 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffff00",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffff00",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, -256": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x81 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffff00",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, -256": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0xffffff00",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, -256": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x81 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0xffffffffffffff00",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub eax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, -256": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x81 /5",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0xffffff00",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0xffffffffffffff00",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "cmp rax, -256": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x81 /7",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x100 (256)",
        "mov x27, x4"
      ]
    },
    "add ax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, #0x1 (1)",
        "mov x20, x4",
        "bfxil x20, x26, #0, #16",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x20"
      ]
    },
    "add eax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "adds w26, w4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "add rax, 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "adds x26, x4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "or eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "orr w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /1",
      "ExpectedArm64ASM": [
        "orr x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "adc rax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb eax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs w26, w4, w20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sbb rax, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "sbcs x26, x4, x20",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "and eax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "ands w26, w4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "ands x26, x4, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub eax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "sub rax, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "xor eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "subs w26, w4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "cmp rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "subs x26, x4, #0x1 (1)",
        "mov x27, x4"
      ]
    },
    "add ax, -1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "mvn w27, w4",
        "lsl w0, w4, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w4, w20",
        "bfxil x4, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "add eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs w26, w4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "add rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /0",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "subs x26, x4, #0x1 (1)",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "or eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /-1",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "orr w4, w4, w20",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "or rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /-1",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "orr x4, x4, x20",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "adc eax, -1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "adc rax, -1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP1 0x83 /2",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "adcs x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sbb eax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "mvn w27, w4",
        "sbcs w26, w4, w20",
        "mov x4, x26"
      ]
    },
    "sbb rax, -1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP1 0x83 /3",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "mvn w27, w4",
        "sbcs x26, x4, x20",
        "mov x4, x26"
      ]
    },
    "and eax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "ands w26, w4, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "and rax, -1": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP1 0x83 /4",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "ands x26, x4, x20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x4, x26"
      ]
    },
    "sub eax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "sub rax, -1": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP1 0x83 /5",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)",
        "mov x4, x26"
      ]
    },
    "xor eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mvn w4, w4",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "xor rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /6",
      "ExpectedArm64ASM": [
        "mvn x4, x4",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "cmp eax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds w26, w4, #0x1 (1)"
      ]
    },
    "cmp rax, -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP1 0x83 /7",
      "ExpectedArm64ASM": [
        "mvn w27, w4",
        "adds x26, x4, #0x1 (1)"
      ]
    },
    "rol al, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC0 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #24, #8",
        "ror w20, w20, #30",
        "bfxil x4, x20, #0, #8",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "ror al, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC0 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #8, #8",
        "ror w20, w20, #2",
        "bfxil x4, x20, #0, #8",
        "eor x20, x20, #0x80",
        "ubfx x20, x20, #7, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rcl al, 2": {
      "ExpectedInstructionCount": 22,
      "Comment": "GROUP2 0xC0 /2",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "mov w22, #0x0",
        "bfi x22, x20, #55, #8",
        "bfi x22, x21, #63, #1",
        "bfi x22, x20, #46, #8",
        "bfi x22, x21, #54, #1",
        "bfi x22, x20, #37, #8",
        "bfi x22, x21, #45, #1",
        "bfi x22, x20, #28, #8",
        "bfi x22, x21, #36, #1",
        "bfi x22, x20, #19, #8",
        "bfi x22, x21, #27, #1",
        "bfxil x22, x20, #0, #8",
        "ror x20, x22, #62",
        "bfxil x4, x20, #0, #8",
        "ror x20, x22, #61",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rcr al, 2": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xC0 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "uxtb w21, w4",
        "bfi x21, x20, #8, #1",
        "bfi x21, x21, #9, #9",
        "bfi x21, x21, #18, #18",
        "bfi x21, x21, #36, #9",
        "lsr x20, x21, #2",
        "bfxil x4, x20, #0, #8",
        "eor x20, x21, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "shl al, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC0 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x4, #0x40",
        "ubfx x20, x20, #6, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "shr al, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC0 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w26, w20, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sar al, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC0 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w26, w20, #2",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "rol ax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #30",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rol eax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "ror w4, w4, #30",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rol rax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /0",
      "ExpectedArm64ASM": [
        "ror x4, x4, #62",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "ror ax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #2",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, #0x8000",
        "ubfx x20, x20, #15, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "ror eax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "ror w4, w4, #2",
        "eor x20, x4, #0x80000000",
        "ubfx x20, x20, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "ror rax, 2": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xC1 /1",
      "ExpectedArm64ASM": [
        "ror x4, x4, #2",
        "eor x20, x4, #0x8000000000000000",
        "lsr x20, x20, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rcl ax, 2": {
      "ExpectedInstructionCount": 18,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "cset x21, lo",
        "mov w22, #0x0",
        "bfi x22, x20, #47, #16",
        "bfi x22, x21, #63, #1",
        "bfi x22, x20, #30, #16",
        "bfi x22, x21, #46, #1",
        "bfi x22, x20, #13, #16",
        "bfi x22, x21, #29, #1",
        "bfxil x22, x20, #0, #16",
        "ror x20, x22, #62",
        "bfxil x4, x20, #0, #16",
        "ror x20, x22, #61",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rcl eax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "lsl w20, w4, #2",
        "cset x21, lo",
        "orr w20, w20, w4, lsr #31",
        "eor x22, x4, #0x40000000",
        "ubfx x22, x22, #30, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "orr w4, w20, w21, lsl #1",
        "msr nzcv, x23"
      ]
    },
    "rcl rax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /2",
      "ExpectedArm64ASM": [
        "lsl x20, x4, #2",
        "cset x21, lo",
        "orr x20, x20, x4, lsr #63",
        "eor x22, x4, #0x4000000000000000",
        "ubfx x22, x22, #62, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "orr x4, x20, x21, lsl #1",
        "msr nzcv, x23"
      ]
    },
    "rcr ax, 2": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "uxth w21, w4",
        "bfi x21, x20, #16, #1",
        "bfi x21, x21, #17, #17",
        "bfi x21, x21, #34, #17",
        "lsr x20, x21, #2",
        "bfxil x4, x20, #0, #16",
        "eor x20, x21, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "rcr eax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, #2",
        "cset x21, lo",
        "orr w20, w20, w4, lsl #31",
        "eor x22, x4, #0x2",
        "ubfx x22, x22, #1, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "orr w4, w20, w21, lsl #30",
        "msr nzcv, x23"
      ]
    },
    "rcr rax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, #2",
        "cset x21, lo",
        "orr x20, x20, x4, lsl #63",
        "eor x22, x4, #0x2",
        "ubfx x22, x22, #1, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "orr x4, x20, x21, lsl #62",
        "msr nzcv, x23"
      ]
    },
    "shl ax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x4, #0x4000",
        "ubfx x20, x20, #14, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shl eax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x40000000",
        "ubfx x20, x20, #30, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shl rax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /4",
      "ExpectedArm64ASM": [
        "lsl x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x4000000000000000",
        "ubfx x20, x20, #62, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shr ax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w26, w20, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shr eax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "lsr w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shr rax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /5",
      "ExpectedArm64ASM": [
        "lsr x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "sar ax, 2": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w26, w20, #2",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sar eax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "asr w26, w4, #2",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "sar rax, 2": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xC1 /7",
      "ExpectedArm64ASM": [
        "asr x26, x4, #2",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "rol al, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd0 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #24, #8",
        "ror w20, w20, #31",
        "bfxil x4, x20, #0, #8",
        "eor x21, x20, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w20, w20, lsr #7",
        "ubfx x20, x20, #0, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "ror al, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd0 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #8, #8",
        "ror w20, w20, #1",
        "bfxil x4, x20, #0, #8",
        "eor x21, x20, #0x80",
        "ubfx x21, x21, #7, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w20, w20, lsr #1",
        "ubfx x20, x20, #6, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rcl al, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd0 /2",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "orr w21, w21, w20, lsl #1",
        "eor x22, x20, #0x80",
        "ubfx x22, x22, #7, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w20, w21, w20",
        "ubfx x20, x20, #7, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x4, x21, #0, #8",
        "msr nzcv, x23"
      ]
    },
    "rcr al, 1": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd0 /3",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "eor x22, x20, #0x1",
        "ubfx x22, x22, #0, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "ubfx w20, w20, #1, #7",
        "bfi w20, w21, #7, #1",
        "bfxil x4, x20, #0, #8",
        "eor w20, w20, w20, lsr #1",
        "ubfx x20, x20, #6, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "shl al, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd0 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmn wzr, w26, lsl #24",
        "eor x20, x4, #0x80",
        "ubfx x20, x20, #7, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w26, w4",
        "ubfx x20, x20, #7, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "shr al, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd0 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w26, w20, #1",
        "cmn wzr, w26, lsl #24",
        "eor x21, x20, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "ubfx x20, x20, #7, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "sar al, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd0 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w26, w20, #1",
        "cmn wzr, w26, lsl #24",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #8"
      ]
    },
    "rol ax, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #31",
        "bfxil x4, x20, #0, #16",
        "eor x21, x20, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w20, w20, lsr #15",
        "ubfx x20, x20, #0, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rol eax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "ror w4, w4, #31",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w4, w4, lsr #31",
        "ubfx x20, x20, #0, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "rol rax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd1 /0",
      "ExpectedArm64ASM": [
        "ror x4, x4, #63",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor x20, x4, x4, lsr #63",
        "ubfx x20, x20, #0, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "ror ax, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "bfi w20, w4, #16, #16",
        "ror w20, w20, #1",
        "bfxil x4, x20, #0, #16",
        "eor x21, x20, #0x8000",
        "ubfx x21, x21, #15, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w20, w20, lsr #1",
        "ubfx x20, x20, #14, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "ror eax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "ror w4, w4, #1",
        "eor x20, x4, #0x80000000",
        "ubfx x20, x20, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w4, w4, lsr #1",
        "ubfx x20, x20, #30, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "ror rax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd1 /1",
      "ExpectedArm64ASM": [
        "ror x4, x4, #1",
        "eor x20, x4, #0x8000000000000000",
        "lsr x20, x20, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor x20, x4, x4, lsr #1",
        "ubfx x20, x20, #62, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21"
      ]
    },
    "rcl ax, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "cset x21, lo",
        "orr w21, w21, w20, lsl #1",
        "eor x22, x20, #0x8000",
        "ubfx x22, x22, #15, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor w20, w21, w20",
        "ubfx x20, x20, #15, #1",
        "bfi w23, w20, #28, #1",
        "bfxil x4, x21, #0, #16",
        "msr nzcv, x23"
      ]
    },
    "rcl eax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "cset x21, lo",
        "orr w4, w21, w20, lsl #1",
        "eor x21, x20, #0x80000000",
        "ubfx x21, x21, #31, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor w20, w4, w20",
        "ubfx x20, x20, #31, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rcl rax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /2",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "orr x20, x20, x4, lsl #1",
        "eor x21, x4, #0x8000000000000000",
        "lsr x21, x21, #63",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "eor x21, x20, x4",
        "lsr x21, x21, #63",
        "bfi w22, w21, #28, #1",
        "msr nzcv, x22",
        "mov x4, x20"
      ]
    },
    "rcr ax, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "ubfx w21, w4, #1, #15",
        "orr w20, w21, w20, lsl #15",
        "bfxil x4, x20, #0, #16",
        "eor x20, x20, x20, lsr #1",
        "ubfx x20, x20, #14, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rcr eax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "extr w4, w20, w4, #1",
        "eor x20, x4, x4, lsr #1",
        "ubfx x20, x20, #30, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rcr rax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd1 /3",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "eor x21, x4, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "extr x4, x20, x4, #1",
        "eor x20, x4, x4, lsr #1",
        "ubfx x20, x20, #62, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "shl ax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmn wzr, w26, lsl #16",
        "eor x20, x4, #0x8000",
        "ubfx x20, x20, #15, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w26, w4",
        "ubfx x20, x20, #15, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shl eax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x80000000",
        "ubfx x20, x20, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w26, w4",
        "ubfx x20, x20, #31, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shl rax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /4",
      "ExpectedArm64ASM": [
        "lsl x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x8000000000000000",
        "lsr x20, x20, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor x20, x26, x4",
        "lsr x20, x20, #63",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shr ax, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w26, w20, #1",
        "cmn wzr, w26, lsl #16",
        "eor x21, x20, #0x1",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shr eax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "lsr w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "ubfx x20, x4, #31, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shr rax, 1": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd1 /5",
      "ExpectedArm64ASM": [
        "lsr x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "lsr x20, x4, #63",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "sar ax, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w26, w20, #1",
        "cmn wzr, w26, lsl #16",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "sar eax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "asr w26, w4, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "sar rax, 1": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xd1 /7",
      "ExpectedArm64ASM": [
        "asr x26, x4, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "rol al, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd2 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x7",
        "mov x21, x4",
        "bfi w21, w4, #24, #8",
        "neg x20, x20",
        "ror w20, w21, w20",
        "bfxil x4, x20, #0, #8",
        "and x21, x7, #0x1f",
        "cbz w21, #+0x1c",
        "eor x0, x20, x20, lsr #7",
        "mvn x1, x20",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "ror al, cl": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP2 0xd2 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x7",
        "mov x21, x4",
        "bfi w21, w4, #8, #8",
        "ror w20, w21, w20",
        "bfxil x4, x20, #0, #8",
        "and x21, x7, #0x1f",
        "cbz w21, #+0x24",
        "eor x0, x20, x20, lsr #1",
        "mvn x1, x20",
        "lsr w0, w0, #6",
        "lsr w1, w1, #7",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "rcl al, cl": {
      "ExpectedInstructionCount": 31,
      "Comment": "GROUP2 0xd2 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x78",
        "and x20, x7, #0x1f",
        "uxtb w21, w4",
        "cset x22, lo",
        "mov w23, #0x0",
        "bfi x23, x21, #55, #8",
        "bfi x23, x22, #63, #1",
        "bfi x23, x21, #46, #8",
        "bfi x23, x22, #54, #1",
        "bfi x23, x21, #37, #8",
        "bfi x23, x22, #45, #1",
        "bfi x23, x21, #28, #8",
        "bfi x23, x22, #36, #1",
        "bfi x23, x21, #19, #8",
        "bfi x23, x22, #27, #1",
        "bfxil x23, x21, #0, #8",
        "neg x21, x20",
        "ror x21, x23, x21",
        "bfxil x4, x21, #0, #8",
        "mov w22, #0x3f",
        "sub x20, x22, x20",
        "ror x20, x23, x20",
        "eor x22, x20, #0x1",
        "ubfx x22, x22, #0, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor x20, x20, x21, lsr #7",
        "ubfx x20, x20, #0, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "rcr al, cl": {
      "ExpectedInstructionCount": 21,
      "Comment": "GROUP2 0xd2 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x50",
        "and x20, x7, #0x1f",
        "cset x21, lo",
        "uxtb w22, w4",
        "bfi x22, x21, #8, #1",
        "bfi x22, x22, #9, #9",
        "bfi x22, x22, #18, #18",
        "bfi x22, x22, #36, #9",
        "lsr x21, x22, x20",
        "bfxil x4, x21, #0, #8",
        "sub w20, w20, #0x1 (1)",
        "lsr w20, w22, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "eor w20, w21, w21, lsr #1",
        "ubfx x20, x20, #6, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "shl al, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd2 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x2c",
        "cmn wzr, w20, lsl #24",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w4, w20",
        "mrs x1, nzcv",
        "lsr x0, x0, #8",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #7",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "shr al, cl": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP2 0xd2 /5",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "lsr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x30",
        "cmn wzr, w21, lsl #24",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w21",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #7",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "bfxil x4, x21, #0, #8"
      ]
    },
    "sar al, cl": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd2 /7",
      "ExpectedArm64ASM": [
        "sxtb x20, w4",
        "asr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x24",
        "cmn wzr, w21, lsl #24",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "msr nzcv, x1",
        "bfxil x4, x21, #0, #8"
      ]
    },
    "rol ax, cl": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "mov x21, x4",
        "bfi w21, w4, #16, #16",
        "neg x22, x20",
        "ror w21, w21, w22",
        "bfxil x4, x21, #0, #16",
        "cbz w20, #+0x1c",
        "eor x0, x21, x21, lsr #15",
        "mvn x1, x21",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "rol eax, cl": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "neg x21, x20",
        "ror w4, w4, w21",
        "cbz w20, #+0x1c",
        "eor x0, x4, x4, lsr #31",
        "mvn x1, x4",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "rol rax, cl": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xd3 /0",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "neg x21, x20",
        "ror x4, x4, x21",
        "cbz x20, #+0x1c",
        "eor x0, x4, x4, lsr #63",
        "mvn x1, x4",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "ror ax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "mov x21, x4",
        "bfi w21, w4, #16, #16",
        "ror w21, w21, w20",
        "bfxil x4, x21, #0, #16",
        "cbz w20, #+0x24",
        "eor x0, x21, x21, lsr #1",
        "mvn x1, x21",
        "lsr w0, w0, #14",
        "lsr w1, w1, #15",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "ror eax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "ror w4, w4, w20",
        "cbz w20, #+0x24",
        "eor x0, x4, x4, lsr #1",
        "mvn x1, x4",
        "lsr w0, w0, #30",
        "lsr w1, w1, #31",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "ror rax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /1",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "ror x4, x4, x20",
        "cbz x20, #+0x24",
        "eor x0, x4, x4, lsr #1",
        "mvn x1, x4",
        "lsr x0, x0, #62",
        "lsr x1, x1, #63",
        "mrs x2, nzcv",
        "bfi w2, w0, #28, #1",
        "bfi w2, w1, #29, #1",
        "msr nzcv, x2"
      ]
    },
    "rcl ax, cl": {
      "ExpectedInstructionCount": 27,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x68",
        "and x20, x7, #0x1f",
        "uxth w21, w4",
        "cset x22, lo",
        "mov w23, #0x0",
        "bfi x23, x21, #47, #16",
        "bfi x23, x22, #63, #1",
        "bfi x23, x21, #30, #16",
        "bfi x23, x22, #46, #1",
        "bfi x23, x21, #13, #16",
        "bfi x23, x22, #29, #1",
        "bfxil x23, x21, #0, #16",
        "neg x21, x20",
        "ror x21, x23, x21",
        "bfxil x4, x21, #0, #16",
        "mov w22, #0x3f",
        "sub x20, x22, x20",
        "ror x20, x23, x20",
        "eor x22, x20, #0x1",
        "ubfx x22, x22, #0, #1",
        "mrs x23, nzcv",
        "bfi w23, w22, #29, #1",
        "eor x20, x20, x21, lsr #15",
        "ubfx x20, x20, #0, #1",
        "bfi w23, w20, #28, #1",
        "msr nzcv, x23"
      ]
    },
    "rcl eax, cl": {
      "ExpectedInstructionCount": 21,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and w20, w7, #0x1f",
        "cbz x20, #+0x4c",
        "lsl w20, w4, w7",
        "cset x21, lo",
        "neg w22, w7",
        "lsr w23, w4, w22",
        "orr w20, w20, w23, lsr #1",
        "lsr w22, w4, w22",
        "eor x23, x22, #0x1",
        "ubfx x23, x23, #0, #1",
        "mrs x24, nzcv",
        "bfi w24, w23, #29, #1",
        "sub w23, w7, #0x1 (1)",
        "lsl w21, w21, w23",
        "orr w4, w20, w21",
        "eor w20, w4, w22, lsl #31",
        "ubfx x20, x20, #31, #1",
        "bfi w24, w20, #28, #1",
        "msr nzcv, x24",
        "b #+0x8",
        "mov w4, w4"
      ]
    },
    "rcl rax, cl": {
      "ExpectedInstructionCount": 19,
      "Comment": "GROUP2 0xd3 /2",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "cbz x20, #+0x48",
        "lsl x20, x4, x7",
        "cset x21, lo",
        "neg x22, x7",
        "lsr x23, x4, x22",
        "orr x20, x20, x23, lsr #1",
        "lsr x22, x4, x22",
        "eor x23, x22, #0x1",
        "ubfx x23, x23, #0, #1",
        "mrs x24, nzcv",
        "bfi w24, w23, #29, #1",
        "sub x23, x7, #0x1 (1)",
        "lsl x21, x21, x23",
        "orr x4, x20, x21",
        "eor x20, x4, x22, lsl #63",
        "lsr x20, x20, #63",
        "bfi w24, w20, #28, #1",
        "msr nzcv, x24"
      ]
    },
    "rcr ax, cl": {
      "ExpectedInstructionCount": 20,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x1f",
        "cbz x20, #+0x4c",
        "and x20, x7, #0x1f",
        "cset x21, lo",
        "uxth w22, w4",
        "bfi x22, x21, #16, #1",
        "bfi x22, x22, #17, #17",
        "bfi x22, x22, #34, #17",
        "lsr x21, x22, x20",
        "bfxil x4, x21, #0, #16",
        "sub w20, w20, #0x1 (1)",
        "lsr w20, w22, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "eor w20, w21, w21, lsr #1",
        "ubfx x20, x20, #14, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22"
      ]
    },
    "rcr eax, cl": {
      "ExpectedInstructionCount": 21,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and w20, w7, #0x1f",
        "cbz x20, #+0x4c",
        "lsr w20, w4, w7",
        "cset x21, lo",
        "neg w22, w7",
        "lsl w23, w4, w22",
        "orr w20, w20, w23, lsl #1",
        "sub w23, w7, #0x1 (1)",
        "lsr w23, w4, w23",
        "eor x23, x23, #0x1",
        "ubfx x23, x23, #0, #1",
        "mrs x24, nzcv",
        "bfi w24, w23, #29, #1",
        "lsl w21, w21, w22",
        "orr w4, w20, w21",
        "eor w20, w4, w4, lsr #1",
        "ubfx x20, x20, #30, #1",
        "bfi w24, w20, #28, #1",
        "msr nzcv, x24",
        "b #+0x8",
        "mov w4, w4"
      ]
    },
    "rcr rax, cl": {
      "ExpectedInstructionCount": 19,
      "Comment": "GROUP2 0xd3 /3",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "cbz x20, #+0x48",
        "lsr x20, x4, x7",
        "cset x21, lo",
        "neg x22, x7",
        "lsl x23, x4, x22",
        "orr x20, x20, x23, lsl #1",
        "sub x23, x7, #0x1 (1)",
        "lsr x23, x4, x23",
        "eor x23, x23, #0x1",
        "ubfx x23, x23, #0, #1",
        "mrs x24, nzcv",
        "bfi w24, w23, #29, #1",
        "lsl x21, x21, x22",
        "orr x4, x20, x21",
        "eor x20, x4, x4, lsr #1",
        "ubfx x20, x20, #62, #1",
        "bfi w24, w20, #28, #1",
        "msr nzcv, x24"
      ]
    },
    "shl ax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x2c",
        "cmn wzr, w20, lsl #16",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w4, w20",
        "mrs x1, nzcv",
        "lsr x0, x0, #16",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #15",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "shl eax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x2c",
        "ands w26, w20, w20",
        "neg w0, w7",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "eor w2, w4, w20",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #31",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "shl rax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /4",
      "ExpectedArm64ASM": [
        "lsl x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x2c",
        "ands x26, x20, x20",
        "neg x0, x7",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x20",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr x2, x2, #63",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "shr ax, cl": {
      "ExpectedInstructionCount": 16,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "lsr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x30",
        "cmn wzr, w21, lsl #16",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w21",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #15",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "shr eax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x2c",
        "ands w26, w20, w20",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "eor w2, w4, w20",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #31",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "shr rax, cl": {
      "ExpectedInstructionCount": 14,
      "Comment": "GROUP2 0xd3 /5",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x2c",
        "ands x26, x20, x20",
        "sub x0, x7, #0x1 (1)",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x20",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr x2, x2, #63",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "sar ax, cl": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "asr w21, w20, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x24",
        "cmn wzr, w21, lsl #16",
        "mov x26, x21",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "msr nzcv, x1",
        "bfxil x4, x21, #0, #16"
      ]
    },
    "sar eax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "asr w20, w4, w7",
        "and w0, w7, #0x1f",
        "cbz w0, #+0x20",
        "ands w26, w20, w20",
        "sub x0, x7, #0x1 (1)",
        "lsr w0, w4, w0",
        "mvn x0, x0",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "sar rax, cl": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP2 0xd3 /7",
      "ExpectedArm64ASM": [
        "asr x20, x4, x7",
        "and w0, w7, #0x3f",
        "cbz x0, #+0x20",
        "ands x26, x20, x20",
        "sub x0, x7, #0x1 (1)",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "msr nzcv, x1",
        "mov x4, x20"
      ]
    },
    "test bl, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf6 /0",
      "ExpectedArm64ASM": [
        "and w26, w6, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "not bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf6 /2",
      "ExpectedArm64ASM": [
        "eor x6, x6, #0xff"
      ]
    },
    "not bh": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf6 /2",
      "ExpectedArm64ASM": [
        "eor x6, x6, #0xff00"
      ]
    },
    "neg bl": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf6 /3",
      "ExpectedArm64ASM": [
        "cmp wzr, w6, lsl #24",
        "neg w26, w6",
        "mov x20, x6",
        "bfxil x20, x26, #0, #8",
        "mov x27, x6",
        "mov x6, x20"
      ]
    },
    "mul bl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf6 /4",
      "ExpectedArm64ASM": [
        "uxtb x20, w6",
        "uxtb x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "ubfx x20, x20, #8, #8",
        "cmp x20, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul bl": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf6 /5",
      "ExpectedArm64ASM": [
        "sxtb x20, w6",
        "sxtb x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "sbfx x21, x20, #8, #8",
        "sbfx x20, x20, #7, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "div bl": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf6 /6",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "uxth w23, w4",
        "udiv w22, w23, w20",
        "msub w21, w22, w20, w23",
        "bfi x22, x21, #8, #8",
        "bfxil x4, x22, #0, #16"
      ]
    },
    "idiv bl": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf6 /7",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "sxth x23, w4",
        "sxtb x20, w20",
        "sdiv x22, x23, x20",
        "msub x21, x22, x20, x23",
        "bfi x22, x21, #8, #8",
        "bfxil x4, x22, #0, #16"
      ]
    },
    "test bx, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "and w26, w6, #0x1",
        "cmp w26, #0x0 (0)"
      ]
    },
    "test ebx, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "ands w26, w6, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test rbx, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "ands x26, x6, #0x1",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "test bx, -1": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "cmn wzr, w6, lsl #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x26, x6"
      ]
    },
    "test ebx, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "subs w26, w6, #0x0 (0)"
      ]
    },
    "test rbx, -1": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /0",
      "ExpectedArm64ASM": [
        "subs x26, x6, #0x0 (0)"
      ]
    },
    "not bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /1",
      "ExpectedArm64ASM": [
        "eor x6, x6, #0xffff"
      ]
    },
    "not ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /1",
      "ExpectedArm64ASM": [
        "mvn w6, w6"
      ]
    },
    "not rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP2 0xf7 /1",
      "ExpectedArm64ASM": [
        "mvn x6, x6"
      ]
    },
    "neg bx": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "cmp wzr, w6, lsl #16",
        "neg w26, w6",
        "mov x20, x6",
        "bfxil x20, x26, #0, #16",
        "mov x27, x6",
        "mov x6, x20"
      ]
    },
    "neg ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "negs w26, w6",
        "mov x27, x6",
        "mov x6, x26"
      ]
    },
    "neg rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP2 0xf7 /2",
      "ExpectedArm64ASM": [
        "negs x26, x6",
        "mov x27, x6",
        "mov x6, x26"
      ]
    },
    "mul bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "uxth x20, w6",
        "uxth x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "ubfx x20, x20, #16, #16",
        "bfxil x5, x20, #0, #16",
        "cmp x20, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "mul ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov w21, w4",
        "mul x20, x20, x21",
        "mov w4, w20",
        "lsr x5, x20, #32",
        "cmp x5, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "mul rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "GROUP2 0xf7 /3",
      "ExpectedArm64ASM": [
        "umulh x5, x6, x4",
        "mul x4, x6, x4",
        "cmp x5, #0x0 (0)",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "sxth x20, w6",
        "sxth x21, w4",
        "mul x20, x20, x21",
        "bfxil x4, x20, #0, #16",
        "sbfx x21, x20, #16, #16",
        "bfxil x5, x21, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul ebx": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "sxtw x20, w6",
        "sxtw x21, w4",
        "mul x20, x20, x21",
        "mov w4, w20",
        "lsr x5, x20, #32",
        "asr x21, x20, #32",
        "sxtw x20, w20",
        "sbfx x20, x20, #31, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP2 0xf7 /5",
      "ExpectedArm64ASM": [
        "smulh x5, x6, x4",
        "mul x4, x6, x4",
        "asr x20, x4, #63",
        "cmp x5, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "div bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf7 /6",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w0, w4",
        "bfi w0, w5, #16, #16",
        "udiv w22, w0, w20",
        "msub w21, w22, w20, w0",
        "bfxil x4, x22, #0, #16",
        "bfxil x5, x21, #0, #16"
      ]
    },
    "div ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP2 0xf7 /6",
      "ExpectedArm64ASM": [
        "mov w1, w6",
        "mov x0, x4",
        "bfi x0, x5, #32, #32",
        "udiv x20, x0, x1",
        "msub x22, x20, x1, x0",
        "mov w4, w20",
        "mov w5, w22"
      ]
    },
    "div rbx": {
      "ExpectedInstructionCount": 15,
      "Comment": "GROUP2 0xf7 /6",
      "ExpectedArm64ASM": [
        "cbz x5, #+0x2c",
        "mov x0, x5",
        "mov x1, x4",
        "mov x2, x6",
        "ldr x3, [x28, #2960]",
        "str x30, [sp, #-16]!",
        "blr x3",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "mov x22, x1",
        "b #+0xc",
        "udiv x20, x4, x6",
        "msub x22, x20, x6, x4",
        "mov x5, x22",
        "mov x4, x20"
      ]
    },
    "idiv bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /7",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w0, w4",
        "bfi w0, w5, #16, #16",
        "sxth w1, w20",
        "sdiv w22, w0, w1",
        "msub w21, w22, w1, w0",
        "bfxil x4, x22, #0, #16",
        "bfxil x5, x21, #0, #16"
      ]
    },
    "idiv ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP2 0xf7 /7",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov x0, x4",
        "bfi x0, x5, #32, #32",
        "sxtw x1, w20",
        "sdiv x22, x0, x1",
        "msub x21, x22, x1, x0",
        "mov w4, w22",
        "mov w5, w21"
      ]
    },
    "idiv rbx": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP2 0xf7 /7",
      "ExpectedArm64ASM": [
        "asr x0, x4, #63",
        "eor x0, x0, x5",
        "cbz x0, #+0x2c",
        "mov x0, x5",
        "mov x1, x4",
        "mov x2, x6",
        "ldr x3, [x28, #2968]",
        "str x30, [sp, #-16]!",
        "blr x3",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "mov x22, x1",
        "b #+0xc",
        "sdiv x20, x4, x6",
        "msub x22, x20, x6, x4",
        "mov x5, x22",
        "mov x4, x20"
      ]
    },
    "inc al": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP3 0xfe /0",
      "ExpectedArm64ASM": [
        "uxtb w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x20"
      ]
    },
    "dec al": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP3 0xfe /1",
      "ExpectedArm64ASM": [
        "uxtb w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #8",
        "msr nzcv, x20"
      ]
    },
    "inc ax": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x20"
      ]
    },
    "inc eax": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds w26, w4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "inc rax": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /0",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds x26, x4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec ax": {
      "ExpectedInstructionCount": 10,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x20"
      ]
    },
    "dec eax": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs w26, w4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec rax": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP4 0xfe /1",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs x26, x4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "push ax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP4 0xff /6",
      "ExpectedArm64ASM": [
        "strh w4, [x8, #-2]!"
      ]
    },
    "call far [rsp]": {
      "ExpectedInstructionCount": 17,
      "Comment": "GROUP5 0xff /3",
      "ExpectedArm64ASM": [
        "ldr w20, [x8]",
        "ldrh w21, [x8, #4]",
        "ldrh w22, [x28, #962]",
        "mov w23, #0x4",
        "movk w23, #0x1, lsl #16",
        "stp x23, x22, [x8, #-16]!",
        "strh w21, [x28, #962]",
        "ubfx w22, w21, #2, #1",
        "and w21, w21, #0xfff8",
        "add x0, x28, x22, lsl #3",
        "ldr x22, [x0, #1184]",
        "ldr x21, [x22, w21, uxtw]",
        "lsr x22, x21, #32",
        "and w23, w22, #0xff000000",
        "orr w21, w23, w21, lsr #16",
        "bfi w21, w22, #16, #8",
        "str w21, [x28, #980]"
      ]
    },
    "push rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP4 0xff /6",
      "ExpectedArm64ASM": [
        "str x4, [x8, #-8]!"
      ]
    },
    "jmp far [rsp]": {
      "ExpectedInstructionCount": 13,
      "Comment": "GROUP5 0xff /5",
      "ExpectedArm64ASM": [
        "ldr w20, [x8]",
        "ldrh w21, [x8, #4]",
        "strh w21, [x28, #962]",
        "ubfx w22, w21, #2, #1",
        "and w21, w21, #0xfff8",
        "add x0, x28, x22, lsl #3",
        "ldr x22, [x0, #1184]",
        "ldr x21, [x22, w21, uxtw]",
        "lsr x22, x21, #32",
        "and w23, w22, #0xff000000",
        "orr w21, w23, w21, lsr #16",
        "bfi w21, w22, #16, #8",
        "str w21, [x28, #980]"
      ]
    },
    "mov byte [rax], 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP11 0xc6 /0",
      "ExpectedArm64ASM": [
        "strb wzr, [x4]"
      ]
    },
    "mov word [rax], 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "strh wzr, [x4]"
      ]
    },
    "mov dword [rax], 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "str wzr, [x4]"
      ]
    },
    "mov qword [rax], 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "str xzr, [x4]"
      ]
    },
    "mov byte [rax], 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc6 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "strb w20, [x4]"
      ]
    },
    "mov word [rax], 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "strh w20, [x4]"
      ]
    },
    "mov dword [rax], 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "str w20, [x4]"
      ]
    },
    "mov qword [rax], 1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "str x20, [x4]"
      ]
    },
    "mov byte [rax], -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc6 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xff",
        "strb w20, [x4]"
      ]
    },
    "mov word [rax], -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffff",
        "strh w20, [x4]"
      ]
    },
    "mov dword [rax], -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "str w20, [x4]"
      ]
    },
    "mov qword [rax], -1": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP11 0xc7 /0",
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "str x20, [x4]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Primary_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FlagM",
      "FlagM2"
    ]
  },
  "Instructions": {
    "push es": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x06",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #960]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop es": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x07",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #960]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #976]"
      ]
    },
    "push cs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0e",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #962]",
        "str w20, [x8, #-4]!"
      ]
    },
    "push ss": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x16",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #964]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop ss": {
      "ExpectedInstructionCount": 22,
      "Comment": "0x17",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "ldrb w21, [x28, #1016]",
        "mov w22, #0x1",
        "and w21, w21, #0x1",
        "ldrb w23, [x28, #1016]",
        "and w23, w23, #0xfffffffe",
        "mrs x12, nzcv",
        "cmp x21, #0x0 (0)",
        "csel x21, x23, x22, eq",
        "strb w21, [x28, #1016]",
        "strh w20, [x28, #964]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #984]",
        "msr nzcv, x12"
      ]
    },
    "push ds": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x1e",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #966]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop ds": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x1f",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #966]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #988]"
      ]
    },
    "daa": {
      "ExpectedInstructionCount": 23,
      "Comment": "0x27",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, hs",
        "and x22, x20, #0xf",
        "cmp x22, #0x9 (9)",
        "cset x22, hi",
        "eor x23, x27, x26",
        "ubfx w23, w23, #4, #1",
        "orr x22, x23, x22",
        "cmp x20, #0x99 (153)",
        "cset x23, ls",
        "and x21, x21, x23",
        "add x23, x20, #0x6 (6)",
        "cmp x22, #0x0 (0)",
        "csel x20, x23, x20, ne",
        "add x23, x20, #0x60 (96)",
        "cmp x21, #0x0 (0)",
        "csel x26, x23, x20, eq",
        "bfxil x4, x26, #0, #8",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "eor w27, w26, w22, lsl #4",
        "msr nzcv, x20"
      ]
    },
    "das": {
      "ExpectedInstructionCount": 27,
      "Comment": "0x2f",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "cset x21, lo",
        "and x22, x20, #0xf",
        "cmp x22, #0x9 (9)",
        "cset x22, hi",
        "eor x23, x27, x26",
        "ubfx w23, w23, #4, #1",
        "orr x22, x23, x22",
        "cmp x20, #0x99 (153)",
        "cset x23, hi",
        "orr x21, x21, x23",
        "cmp x20, #0x6 (6)",
        "csel x23, x22, x21, lo",
        "orr w23, w21, w23",
        "sub x12, x20, #0x6 (6)",
        "cmp x22, #0x0 (0)",
        "csel x20, x12, x20, ne",
        "sub x12, x20, #0x60 (96)",
        "cmp x21, #0x0 (0)",
        "csel x26, x12, x20, ne",
        "bfxil x4, x26, #0, #8",
        "cmn wzr, w26, lsl #24",
        "eor x20, x23, #0x1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w27, w26, w22, lsl #4",
        "msr nzcv, x21"
      ]
    },
    "aaa": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x37",
      "ExpectedArm64ASM": [
        "and x20, x4, #0xf",
        "cmp x20, #0x9 (9)",
        "cset x20, hi",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x21, x20",
        "cmp wzr, w20",
        "eor w27, w26, w20, lsl #4",
        "add w20, w4, #0x106 (262)",
        "csel w20, w20, w4, lo",
        "mov w21, #0xff0f",
        "and w20, w20, w21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "aas": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x3f",
      "ExpectedArm64ASM": [
        "and x20, x4, #0xf",
        "cmp x20, #0x9 (9)",
        "cset x20, hi",
        "eor x21, x27, x26",
        "ubfx w21, w21, #4, #1",
        "orr x20, x21, x20",
        "cmp wzr, w20",
        "eor w27, w26, w20, lsl #4",
        "sub w20, w4, #0x106 (262)",
        "csel w20, w20, w4, lo",
        "mov w21, #0xff0f",
        "and w20, w20, w21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "inc ax": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x40",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x20"
      ]
    },
    "inc eax": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x40",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "adds w26, w4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "dec ax": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x48",
      "ExpectedArm64ASM": [
        "uxth w27, w4",
        "mov w20, #0x1",
        "cset x21, hs",
        "lsl w0, w27, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w27, #0x1 (1)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "bfxil x4, x26, #0, #16",
        "msr nzcv, x20"
      ]
    },
    "push ax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "strh w4, [x8, #-2]!"
      ]
    },
    "push eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x50",
      "ExpectedArm64ASM": [
        "str w4, [x8, #-4]!"
      ]
    },
    "dec eax": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x48",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "subs w26, w4, #0x1 (1)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x27, x4",
        "mov x4, x26"
      ]
    },
    "pusha": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x60",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "stp w7, w4, [x8, #-8]!",
        "stp w6, w5, [x8, #-8]!",
        "stp w9, w20, [x8, #-8]!",
        "stp w11, w10, [x8, #-8]!"
      ]
    },
    "pushad": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x60",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "stp w7, w4, [x8, #-8]!",
        "stp w6, w5, [x8, #-8]!",
        "stp w9, w20, [x8, #-8]!",
        "stp w11, w10, [x8, #-8]!"
      ]
    },
    "popa": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x61",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "ldp w11, w10, [x20], #8",
        "ldr w9, [x20], #4",
        "add x20, x20, #0x4 (4)",
        "mov x8, x20",
        "ldp w6, w5, [x8], #8",
        "ldp w7, w4, [x8], #8"
      ]
    },
    "popad": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x61",
      "ExpectedArm64ASM": [
        "mov x20, x8",
        "ldp w11, w10, [x20], #8",
        "ldr w9, [x20], #4",
        "add x20, x20, #0x4 (4)",
        "mov x8, x20",
        "ldp w6, w5, [x8], #8",
        "ldp w7, w4, [x8], #8"
      ]
    },
    "aam": {
      "ExpectedInstructionCount": 10,
      "Comment": "0xd4",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "mov w21, #0xa",
        "udiv x22, x20, x21",
        "msub x12, x22, x21, x20",
        "add x26, x12, x22, lsl #8",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "aad": {
      "ExpectedInstructionCount": 10,
      "Comment": "0xd5",
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "mov w21, #0xa",
        "mul x20, x20, x21",
        "add x20, x4, x20",
        "and x26, x20, #0xff",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0xd4, 0x40": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "aam with a different immediate byte base",
        "0xd4"
      ],
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "mov w21, #0x40",
        "udiv x22, x20, x21",
        "msub x12, x22, x21, x20",
        "add x26, x12, x22, lsl #8",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "db 0xd5, 0x40": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "aad with a different immediate byte base",
        "0xd5"
      ],
      "ExpectedArm64ASM": [
        "lsr w20, w4, #8",
        "mov w21, #0x40",
        "mul x20, x20, x21",
        "add x20, x4, x20",
        "and x26, x20, #0xff",
        "bfxil x4, x26, #0, #16",
        "cmn wzr, w26, lsl #24",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "salc": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xd6",
      "ExpectedArm64ASM": [
        "csetm w20, lo",
        "bfxil x4, x20, #0, #8"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/RPRES/DDD.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP",
      "RPRES"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "pfrcpv mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0x0f 0x0f 0x86"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "frecpe v0.2s, v2.2s",
        "frecps v1.2s, v0.2s, v2.2s",
        "fmul v2.2s, v0.2s, v1.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrsqrtv mm0, mm1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0x0f 0x0f 0x87"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fabs v3.4s, v2.4s",
        "frsqrte v0.2s, v3.2s",
        "fmul v1.2s, v0.2s, v0.2s",
        "frsqrts v1.2s, v1.2s, v3.2s",
        "fmul v3.2s, v0.2s, v1.2s",
        "movi v0.2s, #0x80, lsl #24",
        "bit v3.8b, v2.8b, v0.8b",
        "str d3, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrcp mm0, mm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0x0f 0x0f 0x96"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "frecpe s0, s2",
        "frecps s1, s0, s2",
        "fmul s2, s0, s1",
        "dup v2.2s, v2.s[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pfrsqrt mm0, mm1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0x0f 0x0f 0x97"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "fabs v3.4s, v2.4s",
        "frsqrte v0.2s, v3.2s",
        "fmul v1.2s, v0.2s, v0.2s",
        "frsqrts v1.2s, v1.2s, v3.2s",
        "fmul v3.2s, v0.2s, v1.2s",
        "movi v0.2s, #0x80, lsl #24",
        "bit v3.8b, v2.8b, v0.8b",
        "dup v2.2s, v3.s[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/RPRES/Secondary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "AFP",
      "RPRES"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "rsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "frsqrte v16.4s, v17.4s"
      ]
    },
    "rcpps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "frecpe v16.4s, v17.4s"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/RPRES/Secondary_REP_AFP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "RPRES",
      "AFP"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {
    "rsqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "frsqrte s16, s17"
      ]
    },
    "rcpss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xf3 0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "frecpe s16, s17"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/RPRES/VEX_map1_AFP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "vrsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frsqrte v16.4s, v17.4s"
      ]
    },
    "vrsqrtps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x52 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frsqrte z16.s, z17.s"
      ]
    },
    "vrsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "AFP can make this more optimal",
        "Map 1 0b10 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "frsqrte s16, s18"
      ]
    },
    "vrcpps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frecpe v16.4s, v17.4s"
      ]
    },
    "vrcpps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x53 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frecpe z16.s, z17.s"
      ]
    },
    "vrcpss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "frecpe s16, s18"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Repeat.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256"
    ]
  },
  "Instructions": {}
}


================================================
FILE: unittests/InstructionCountCI/SSE42_Strings.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "CRYPTO"
    ],
    "Comment": [
      "Look at the official documentation for more information about this layout.",
      "imm of each string comparison operation changes behaviour of the operation.",
      "[1:0] - Source Data Format",
      "      - 00b: Unsigned bytes",
      "      - 01b: Unsigned words",
      "      - 10b: Signed bytes",
      "      - 11b: Signed words",
      "[3:2] - Aggregation Operation",
      "      - 00b: Equal Any",
      "      - 01b: Range",
      "      - 10b: Equal Each",
      "      - 11b: Equal Ordered",
      "[5:4] - Polarity",
      "      - 00b: Positive Polarity (IntRes2 = IntRes1)",
      "      - 01b: Negative Polarity (IntRes2 = -1 ^ IntRes1)",
      "      - 10b: Positive Masked (IntRes2 = IntRes1)",
      "      - 11b: Negative Masked (IntRes2[i] = ~IntRes1[i])",
      "[6]   - Output selection",
      "      - 0b: ECX = LSB",
      "      - 1b: ECX = MSB",
      "[7]   - Reserved"
    ]
  },
  "Instructions": {
    "pcmpestrm xmm0, xmm1, 0_0_00_00_00b": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0x66 0x0f 0x3A 0x60"
      ],
      "ExpectedArm64ASM": [
        "ldr x3, [x28, #2320]",
        "ldr x0, [x28, #2328]",
        "stp x0, x30, [sp, #-16]!",
        "mov x0, x4",
        "mov x1, x5",
        "mov w2, #0x0",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v17.16b",
        "blr x3",
        "ldp xzr, x30, [sp], #16",
        "mov w20, w0",
        "mov w27, #0x0",
        "uxth w0, w20",
        "fmov s16, w0",
        "mov w26, #0x1",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "pcmpestri xmm0, xmm1, 0_0_00_00_00b": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0x66 0x0f 0x3A 0x61"
      ],
      "ExpectedArm64ASM": [
        "ldr x3, [x28, #2320]",
        "ldr x0, [x28, #2328]",
        "stp x0, x30, [sp, #-16]!",
        "mov x0, x4",
        "mov x1, x5",
        "mov w2, #0x0",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v17.16b",
        "blr x3",
        "ldp xzr, x30, [sp], #16",
        "mov w20, w0",
        "mov w27, #0x0",
        "uxth w21, w20",
        "mov w22, #0x10",
        "rbit w0, w21",
        "clz w23, w0",
        "cmp x21, #0x0 (0)",
        "csel x7, x22, x23, eq",
        "mov w26, #0x1",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "pcmpistrm xmm0, xmm1, 0_0_00_00_00b": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0x66 0x0f 0x3A 0x62"
      ],
      "ExpectedArm64ASM": [
        "str x30, [sp, #-16]!",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v17.16b",
        "mov w0, #0x0",
        "ldr x1, [x28, #2336]",
        "ldr x3, [x28, #2344]",
        "blr x1",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "mov w27, #0x0",
        "uxth w0, w20",
        "fmov s16, w0",
        "mov w26, #0x1",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "pcmpistri xmm0, xmm1, 0_0_00_00_00b": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0x66 0x0f 0x3A 0x63"
      ],
      "ExpectedArm64ASM": [
        "str x30, [sp, #-16]!",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v17.16b",
        "mov w0, #0x0",
        "ldr x1, [x28, #2336]",
        "ldr x3, [x28, #2344]",
        "blr x1",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "mov w27, #0x0",
        "uxth w21, w20",
        "mov w22, #0x10",
        "rbit w0, w21",
        "clz w23, w0",
        "cmp x21, #0x0 (0)",
        "csel x7, x22, x23, eq",
        "mov w26, #0x1",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Comment": [
    "MMX instructions are defined as optimal without SRA being used for these instructions.",
    "Could remove a bunch of instructions if those are under SRA",
    "Vector shifts by vector elements can be optimized with SVE wide element shifts",
    "Vector multiply returning high can be optimized with SVE"
  ],
  "Instructions": {
    "femms": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x0b",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1202]"
      ]
    },
    "movups xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x10",
      "ExpectedArm64ASM": []
    },
    "movups xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x10",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movups xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x10",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "movups [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x11",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "movlps xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x12",
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[0], [x4]"
      ]
    },
    "movlps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x13",
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "movhlps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x12",
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[1]"
      ]
    },
    "unpcklps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x14",
      "ExpectedArm64ASM": [
        "zip1 v16.4s, v16.4s, v17.4s"
      ]
    },
    "unpckhps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x15",
      "ExpectedArm64ASM": [
        "zip2 v16.4s, v16.4s, v17.4s"
      ]
    },
    "movhps xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x16",
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[1], [x4]"
      ]
    },
    "movlhps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x16",
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[0]"
      ]
    },
    "movhps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x17",
      "ExpectedArm64ASM": [
        "st1 {v16.d}[1], [x4]"
      ]
    },
    "nop": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x19",
      "ExpectedArm64ASM": []
    },
    "movaps xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0x28",
      "ExpectedArm64ASM": []
    },
    "movaps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x28",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movaps xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x28",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "movaps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x29",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "cvtpi2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf v0.2s, v2.2s",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtpi2ps xmm0, mm0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "scvtf v0.2s, v2.2s",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "movntps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x2b",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "cvttps2pi mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x2c",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x4]",
        "frint32z v2.4s, v2.4s",
        "fcvtzs v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "cvttps2pi mm0, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x2c",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "frint32z v2.4s, v16.4s",
        "fcvtzs v2.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "cvtps2pi mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x2d",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x4]",
        "frint32x v2.4s, v2.4s",
        "fcvtzs v2.2s, v2.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "cvtps2pi mm0, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x2d",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "frint32x v2.4s, v16.4s",
        "fcvtzs v2.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "ucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x2e",
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "comiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x2f",
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "rdtsc": {
      "Skip": "Yes",
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0x31",
      "ExpectedArm64ASM": [
        "mrs x20, S3_3_c14_c0_2",
        "lsl w4, w20, #7",
        "lsr x5, x20, #25"
      ]
    },
    "cmovo ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, vs",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovo eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, vs"
      ]
    },
    "cmovo rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x40",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, vs"
      ]
    },
    "cmovno ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, vc",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovno eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, vc"
      ]
    },
    "cmovno rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x41",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, vc"
      ]
    },
    "cmovb ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, lo",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovb eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, lo"
      ]
    },
    "cmovb rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x42",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, lo"
      ]
    },
    "cmovnb ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, hs",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnb eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, hs"
      ]
    },
    "cmovnb rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x43",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, hs"
      ]
    },
    "cmovz ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovz eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, eq"
      ]
    },
    "cmovz rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x44",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, eq"
      ]
    },
    "cmovnz ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnz eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ne"
      ]
    },
    "cmovnz rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x45",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ne"
      ]
    },
    "cmovbe ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ls",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovbe eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ls"
      ]
    },
    "cmovbe rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x46",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ls"
      ]
    },
    "cmovnbe ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, hi",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnbe eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, hi"
      ]
    },
    "cmovnbe rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x47",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, hi"
      ]
    },
    "cmovs ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, mi",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovs eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, mi"
      ]
    },
    "cmovs rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x48",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, mi"
      ]
    },
    "cmovns ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, pl",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovns eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, pl"
      ]
    },
    "cmovns rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x49",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, pl"
      ]
    },
    "cmovpe ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x21"
      ]
    },
    "cmovpe eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w4, w6, w4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovpe rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel x4, x6, x4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovnp ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w20, w6, w4, ne",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x21"
      ]
    },
    "cmovnp eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel w4, w6, w4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovnp rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x4b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "mrs x21, nzcv",
        "tst w20, #0x1",
        "csel x4, x6, x4, ne",
        "msr nzcv, x21"
      ]
    },
    "cmovl ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, lt",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovl eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, lt"
      ]
    },
    "cmovl rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4c",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, lt"
      ]
    },
    "cmovnl ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, ge",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnl eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, ge"
      ]
    },
    "cmovnl rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4d",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, ge"
      ]
    },
    "cmovle ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, le",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovle eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, le"
      ]
    },
    "cmovle rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4e",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, le"
      ]
    },
    "cmovnle ax, bx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel w20, w6, w4, gt",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmovnle eax, ebx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel w4, w6, w4, gt"
      ]
    },
    "cmovnle rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x4f",
      "ExpectedArm64ASM": [
        "csel x4, x6, x4, gt"
      ]
    },
    "movmskps eax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x50",
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "ldr q3, [x28, #3168]",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "movmskps rax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x50",
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "ldr q3, [x28, #3168]",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "sqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x51",
      "ExpectedArm64ASM": [
        "fsqrt v16.4s, v17.4s"
      ]
    },
    "rsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v17.4s",
        "fdiv v16.4s, v0.4s, v1.4s"
      ]
    },
    "rcpps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v16.4s, v0.4s, v17.4s"
      ]
    },
    "andps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x54",
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b"
      ]
    },
    "andnps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x55",
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b"
      ]
    },
    "orps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x56",
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b"
      ]
    },
    "xorps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x57",
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "xorps xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x57",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "addps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x58",
      "ExpectedArm64ASM": [
        "fadd v16.4s, v16.4s, v17.4s"
      ]
    },
    "mulps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x59",
      "ExpectedArm64ASM": [
        "fmul v16.4s, v16.4s, v17.4s"
      ]
    },
    "cvtps2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x5a",
      "ExpectedArm64ASM": [
        "fcvtl v16.2d, v17.2s"
      ]
    },
    "cvtps2pd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x5a",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "fcvtl v16.2d, v2.2s"
      ]
    },
    "cvtdq2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x5b",
      "ExpectedArm64ASM": [
        "scvtf v16.4s, v17.4s"
      ]
    },
    "subps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x5c",
      "ExpectedArm64ASM": [
        "fsub v16.4s, v16.4s, v17.4s"
      ]
    },
    "minps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x5d",
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v17.4s, v16.4s",
        "bif v16.16b, v17.16b, v0.16b"
      ]
    },
    "divps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x5e",
      "ExpectedArm64ASM": [
        "fdiv v16.4s, v16.4s, v17.4s"
      ]
    },
    "maxps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x5f",
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v17.4s, v16.4s",
        "bit v16.16b, v17.16b, v0.16b"
      ]
    },
    "punpcklbw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x60",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip1 v2.8b, v2.8b, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpcklbw mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x60",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip1 v2.8b, v2.8b, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpcklwd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x61",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip1 v2.4h, v2.4h, v3.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpcklwd mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x61",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip1 v2.4h, v2.4h, v3.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckldq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x62",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip1 v2.2s, v2.2s, v3.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckldq mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x62",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip1 v2.2s, v2.2s, v3.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "packsswb mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x63",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip1 v2.2d, v2.2d, v3.2d",
        "sqxtn v2.8b, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "packsswb mm0, [rax]": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x63",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip1 v2.2d, v2.2d, v3.2d",
        "sqxtn v2.8b, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "packsswb mm0, mm0": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x63",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "zip1 v2.2d, v2.2d, v2.2d",
        "sqxtn v2.8b, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpgtb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x64",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmgt v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpgtw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x65",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmgt v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpgtd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x66",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmgt v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhbw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x68",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip2 v2.8b, v2.8b, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhbw mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x68",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip2 v2.8b, v2.8b, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhwd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x69",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip2 v2.4h, v2.4h, v3.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhwd mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x69",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip2 v2.4h, v2.4h, v3.4h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhdq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x6a",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip2 v2.2s, v2.2s, v3.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "punpckhdq mm0, [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x6a",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x4]",
        "zip2 v2.2s, v2.2s, v3.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "packssdw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x6b",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "zip1 v2.2d, v2.2d, v3.2d",
        "sqxtn v2.4h, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movd mm0, eax": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x6e",
      "ExpectedArm64ASM": [
        "fmov s2, w4",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movd mm0, [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x6e",
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movq mm0, mm0": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x6f",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movq mm0, mm1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x6f",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movq mm0, [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0x6f",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, mm1, 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "dup v2.4h, v2.h[0]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, [rax], 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "dup v2.4h, v2.h[0]",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, mm1, 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr x0, [x28, #2664]",
        "ldr d3, [x0, #16]",
        "tbl v2.8b, {v2.16b}, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, [rax], 1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldr x0, [x28, #2664]",
        "ldr d3, [x0, #16]",
        "tbl v2.8b, {v2.16b}, v3.8b",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, mm1, 0xff": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "dup v2.4h, v2.h[3]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pshufw mm0, [rax], 0xff": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0x70",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "dup v2.4h, v2.h[3]",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpeqb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x74",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmeq v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpeqw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x75",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmeq v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pcmpeqd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0x76",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "cmeq v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "emms": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0x77",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1202]"
      ]
    },
    "movd eax, mm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x7e",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov w4, v2.s[0]"
      ]
    },
    "movd [rax], mm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x7e",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "str s2, [x4]"
      ]
    },
    "db 0x0f, 0x7f, 0xc1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "movq mm0, mm1",
        "Manual encoded since nasm would encode 0x6f version",
        "0x0f 0x7f"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "str d2, [x28, #1072]",
        "strh w20, [x28, #1080]"
      ]
    },
    "movq [rax], mm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x7f",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "str d2, [x4]"
      ]
    },
    "seto al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x90",
      "ExpectedArm64ASM": [
        "cset x20, vs",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setno al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x91",
      "ExpectedArm64ASM": [
        "cset x20, vc",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setb al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x92",
      "ExpectedArm64ASM": [
        "cset x20, lo",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnb al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x93",
      "ExpectedArm64ASM": [
        "cset x20, hs",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setz al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x94",
      "ExpectedArm64ASM": [
        "cset x20, eq",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnz al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x95",
      "ExpectedArm64ASM": [
        "cset x20, ne",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setbe al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x96",
      "ExpectedArm64ASM": [
        "cset x20, ls",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnbe al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x97",
      "ExpectedArm64ASM": [
        "cset x20, hi",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "sets al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x98",
      "ExpectedArm64ASM": [
        "cset x20, mi",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setns al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x99",
      "ExpectedArm64ASM": [
        "cset x20, pl",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setpe al": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x9a",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "and w20, w20, #0x1",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnp al": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x9b",
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "and w20, w20, #0x1",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setl al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9c",
      "ExpectedArm64ASM": [
        "cset x20, lt",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnl al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9d",
      "ExpectedArm64ASM": [
        "cset x20, ge",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setle al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9e",
      "ExpectedArm64ASM": [
        "cset x20, le",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "setnle al": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0x9f",
      "ExpectedArm64ASM": [
        "cset x20, gt",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "push fs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xa0",
      "ExpectedArm64ASM": [
        "ldr x20, [x28, #1000]",
        "str x20, [x8, #-8]!"
      ]
    },
    "pop fs": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xa1",
      "ExpectedArm64ASM": [
        "ldr x20, [x8], #8",
        "strh w20, [x28, #970]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #1000]"
      ]
    },
    "bt ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w20, w4, w20",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt [rax], bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt [rax], ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt rax, rbx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt [rax], rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xa3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "ldrb w21, [x4, x21, sxtx]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "shld ax, bx, 0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": []
    },
    "shld ax, bx, 1": {
      "ExpectedInstructionCount": 15,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x22, x21, #1",
        "lsr w20, w20, #15",
        "orr x26, x22, x20",
        "cmn wzr, w26, lsl #16",
        "eor x20, x21, #0x8000",
        "ubfx x20, x20, #15, #1",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "eor w20, w26, w21",
        "ubfx x20, x20, #15, #1",
        "bfi w22, w20, #28, #1",
        "msr nzcv, x22",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shld ax, bx, 15": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x22, x21, #15",
        "lsr w20, w20, #1",
        "orr x26, x22, x20",
        "cmn wzr, w26, lsl #16",
        "eor x20, x21, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "bfxil x4, x26, #0, #16"
      ]
    },
    "shld ax, bx, 16": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x21, x21, #16",
        "orr x26, x21, x20",
        "cmn wzr, w26, lsl #16",
        "bfxil x4, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "shld ax, bx, 31": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "lsl x21, x21, #31",
        "lsr w20, w20, #17",
        "orr x26, x21, x20",
        "cmn wzr, w26, lsl #16",
        "bfxil x4, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "shld eax, ebx, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "mov w4, w4"
      ]
    },
    "shld eax, ebx, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #31",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x80000000",
        "ubfx x20, x20, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w26, w4",
        "ubfx x20, x20, #31, #1",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 15": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #17",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x20000",
        "ubfx x20, x20, #17, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 16": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #16",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x10000",
        "ubfx x20, x20, #16, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld eax, ebx, 31": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr w26, w4, w6, #1",
        "cmp w26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": []
    },
    "shld rax, rbx, 1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #63",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x8000000000000000",
        "lsr x20, x20, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor x20, x26, x4",
        "lsr x20, x20, #63",
        "bfi w21, w20, #28, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 15": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #49",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2000000000000",
        "ubfx x20, x20, #49, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 32": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #32",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x100000000",
        "ubfx x20, x20, #32, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld rax, rbx, 63": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xac",
      "ExpectedArm64ASM": [
        "extr x26, x4, x6, #1",
        "cmp x26, #0x0 (0)",
        "eor x20, x4, #0x2",
        "ubfx x20, x20, #1, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21",
        "mov x4, x26"
      ]
    },
    "shld ax, bx, cl": {
      "ExpectedInstructionCount": 25,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth w21, w4",
        "and x22, x7, #0x1f",
        "mov w23, #0x10",
        "sub x23, x23, x22",
        "lsl x24, x21, x22",
        "lsr w20, w20, w23",
        "orr x20, x24, x20",
        "mrs x23, nzcv",
        "cmp x22, #0x0 (0)",
        "csel x20, x21, x20, eq",
        "msr nzcv, x23",
        "and w0, w22, #0x1f",
        "cbz w0, #+0x2c",
        "cmn wzr, w20, lsl #16",
        "mov x26, x20",
        "mvn x0, x20",
        "eor w2, w21, w20",
        "mrs x1, nzcv",
        "lsr x0, x0, #16",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #15",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "shld eax, ebx, cl": {
      "ExpectedInstructionCount": 23,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "and x21, x7, #0x1f",
        "neg x22, x21",
        "lsl x23, x20, x21",
        "lsr w22, w6, w22",
        "orr x22, x23, x22",
        "mrs x23, nzcv",
        "cmp x21, #0x0 (0)",
        "csel x22, x20, x22, eq",
        "msr nzcv, x23",
        "and w0, w21, #0x1f",
        "cbz w0, #+0x2c",
        "ands w26, w22, w22",
        "neg w0, w21",
        "lsr w0, w20, w0",
        "mvn x0, x0",
        "eor w2, w20, w22",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr w2, w2, #31",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov w4, w22"
      ]
    },
    "shld rax, rbx, cl": {
      "ExpectedInstructionCount": 22,
      "Comment": "0x0f 0xad",
      "ExpectedArm64ASM": [
        "and x20, x7, #0x3f",
        "neg x21, x20",
        "lsl x22, x4, x20",
        "lsr x21, x6, x21",
        "orr x21, x22, x21",
        "mrs x22, nzcv",
        "cmp x20, #0x0 (0)",
        "csel x21, x4, x21, eq",
        "msr nzcv, x22",
        "and w0, w20, #0x3f",
        "cbz x0, #+0x2c",
        "ands x26, x21, x21",
        "neg x0, x20",
        "lsr x0, x4, x0",
        "mvn x0, x0",
        "eor x2, x4, x21",
        "mrs x1, nzcv",
        "bfi w1, w0, #29, #1",
        "lsr x2, x2, #63",
        "bfi w1, w2, #28, #1",
        "msr nzcv, x1",
        "mov x4, x21"
      ]
    },
    "push gs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xa8",
      "ExpectedArm64ASM": [
        "ldr x20, [x28, #992]",
        "str x20, [x8, #-8]!"
      ]
    },
    "pop gs": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xa9",
      "ExpectedArm64ASM": [
        "ldr x20, [x8], #8",
        "strh w20, [x28, #968]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #992]"
      ]
    },
    "bts ax, bx": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w21, w4, w20",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "mov w21, #0x1",
        "lsl w20, w21, w20",
        "orr w20, w4, w20",
        "bfxil x4, x20, #0, #16",
        "eor w20, w22, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts [rax], bx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts eax, ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "orr w4, w4, w20",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts [rax], ebx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts rax, rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "orr x4, x4, x20",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts [rax], rbx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xab",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "orr x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts [rax], bx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts [rax], ebx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts [rax], rbx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldsetalb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "imul ax, bx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "sxth x20, w4",
        "sxth x21, w6",
        "mul x20, x20, x21",
        "sbfx x21, x20, #16, #16",
        "bfxil x4, x20, #0, #16",
        "sbfx x20, x20, #15, #1",
        "cmp x21, x20",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul eax, ebx": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "smull x20, w4, w6",
        "asr x20, x20, #32",
        "mul w4, w4, w6",
        "sbfx x21, x4, #31, #1",
        "cmp x20, x21",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "imul rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xaf",
      "ExpectedArm64ASM": [
        "smulh x20, x4, x6",
        "mul x4, x4, x6",
        "asr x21, x4, #63",
        "cmp x20, x21",
        "ccmp xzr, #0, #nzcV, eq"
      ]
    },
    "cmpxchg al, bl": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xb0",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "lsl w0, w4, #24",
        "cmp w0, w4, lsl #24",
        "sub w26, w4, w4",
        "bfxil x4, x6, #0, #8"
      ]
    },
    "cmpxchg [rcx], bl": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "uxtb x21, w4",
        "mov w1, w4",
        "casalb w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmp w0, w20, lsl #24",
        "sub w26, w21, w20",
        "bfxil x4, x20, #0, #8"
      ]
    },
    "cmpxchg ax, bx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "lsl w0, w4, #16",
        "cmp w0, w4, lsl #16",
        "sub w26, w4, w4",
        "bfxil x4, x6, #0, #16"
      ]
    },
    "cmpxchg [rcx], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "uxth x21, w4",
        "mov w1, w4",
        "casalh w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmp w0, w20, lsl #16",
        "sub w26, w21, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "cmpxchg eax, ebx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "subs w26, w4, w4",
        "mov x4, x6"
      ]
    },
    "cmpxchg [rcx], ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "mov w21, w4",
        "mov w1, w4",
        "casal w1, w20, [x7]",
        "mov w20, w1",
        "eor x27, x21, x20",
        "subs w26, w21, w20",
        "csel x4, x4, x20, eq"
      ]
    },
    "cmpxchg rax, rbx": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov w27, #0x0",
        "subs x26, x4, x4",
        "mov x4, x6"
      ]
    },
    "cmpxchg [rcx], rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xb1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "casal x4, x6, [x7]",
        "eor x27, x20, x4",
        "subs x26, x20, x4"
      ]
    },
    "btr ax, bx": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "lsr w21, w4, w20",
        "ubfx x21, x21, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "mov w21, #0x1",
        "lsl w20, w21, w20",
        "bic w20, w4, w20",
        "bfxil x4, x20, #0, #16",
        "eor w20, w22, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr [rax], bx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr eax, ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "lsr w20, w4, w6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "bic w4, w4, w20",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr [rax], ebx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr rax, rbx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "lsr x20, x4, x6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "bic x4, x4, x20",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr [rax], rbx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "bic x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "movzx ax, bl": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "lock btr [rax], bx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr [rax], ebx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr [rax], rbx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldclralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "movzx ax, byte [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "movzx eax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "uxtb w4, w6"
      ]
    },
    "movzx eax, byte [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "ldrb w4, [x4]"
      ]
    },
    "movzx rax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "uxtb w4, w6"
      ]
    },
    "movzx rax, byte [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb6",
      "ExpectedArm64ASM": [
        "ldrb w4, [x4]"
      ]
    },
    "movzx eax, bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb7",
      "ExpectedArm64ASM": [
        "uxth w4, w6"
      ]
    },
    "movzx eax, word [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb7",
      "ExpectedArm64ASM": [
        "ldrh w4, [x4]"
      ]
    },
    "movzx rax, bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb7",
      "ExpectedArm64ASM": [
        "uxth w4, w6"
      ]
    },
    "movzx rax, word [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xb7",
      "ExpectedArm64ASM": [
        "ldrh w4, [x4]"
      ]
    },
    "btc ax, bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "and x20, x6, #0xf",
        "mov w21, #0x1",
        "lsl w21, w21, w20",
        "eor w21, w4, w21",
        "lsr w20, w21, w20",
        "ubfx x20, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w20, #29, #1",
        "bfxil x4, x21, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "btc [rax], bx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc eax, ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl w20, w20, w6",
        "eor w4, w4, w20",
        "lsr w20, w4, w6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc [rax], ebx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc rax, rbx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "lsl x20, x20, x6",
        "eor x4, x4, x20",
        "lsr x20, x4, x6",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc [rax], rbx": {
      "ExpectedInstructionCount": 13,
      "Comment": "0x0f 0xbb",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "ldrb w23, [x4, x21, sxtx]",
        "eor x22, x23, x22",
        "strb w22, [x4, x21, sxtx]",
        "lsr w20, w23, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc [rax], bx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #13",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc [rax], ebx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "sbfx x21, x6, #3, #29",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc [rax], rbx": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xb3",
      "ExpectedArm64ASM": [
        "ubfx x20, x6, #0, #3",
        "asr x21, x6, #3",
        "mov w22, #0x1",
        "lsl x22, x22, x20",
        "add x21, x4, x21",
        "ldeoralb w22, w21, [x21]",
        "lsr w20, w21, w20",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bsf ax, bx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w0, w6",
        "clz w20, w0",
        "tst w6, #0xffff",
        "csel x20, x4, x20, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "bsf eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w0, w6",
        "clz w20, w0",
        "tst w6, w6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsf rax, rbx": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit x0, x6",
        "clz x20, x0",
        "tst x6, x6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsr ax, bx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0xf",
        "lsl w20, w6, #16",
        "clz w20, w20",
        "sub x20, x0, x20",
        "tst w6, #0xffff",
        "csel x20, x4, x20, eq",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "bsr eax, ebx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0x1f",
        "clz w20, w6",
        "sub x20, x0, x20",
        "tst w6, w6",
        "csel x4, x4, x20, eq"
      ]
    },
    "bsr rax, rbx": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xbd",
      "ExpectedArm64ASM": [
        "mov x0, #0x3f",
        "clz x20, x6",
        "sub x20, x0, x20",
        "tst x6, x6",
        "csel x4, x4, x20, eq"
      ]
    },
    "movsx ax, bl": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "sxtb w20, w6",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "movsx ax, byte [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "sxtb w20, w20",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "movsx eax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "sxtb w4, w6"
      ]
    },
    "movsx eax, byte [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "sxtb w4, w20"
      ]
    },
    "movsx rax, bl": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "sxtb x4, w6"
      ]
    },
    "movsx rax, byte [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xbe",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "sxtb x4, w20"
      ]
    },
    "movsx eax, bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xbf",
      "ExpectedArm64ASM": [
        "sxth w4, w6"
      ]
    },
    "movsx eax, word [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xbf",
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth w4, w20"
      ]
    },
    "movsx rax, bx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xbf",
      "ExpectedArm64ASM": [
        "sxth x4, w6"
      ]
    },
    "movsx rax, word [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xbf",
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x4, w20"
      ]
    },
    "xadd al, bl": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w4",
        "uxtb w21, w6",
        "eor x27, x20, x21",
        "lsl w0, w20, #24",
        "cmn w0, w21, lsl #24",
        "add w26, w20, w21",
        "bfxil x6, x20, #0, #8",
        "bfxil x4, x26, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd [rax], bl": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xc0",
      "ExpectedArm64ASM": [
        "uxtb w20, w6",
        "ldaddalb w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #24",
        "cmn w0, w20, lsl #24",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #8",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd ax, bx": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w4",
        "uxth w21, w6",
        "eor x27, x20, x21",
        "lsl w0, w20, #16",
        "cmn w0, w21, lsl #16",
        "add w26, w20, w21",
        "bfxil x6, x20, #0, #16",
        "bfxil x4, x26, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd [rax], bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "ldaddalh w20, w21, [x4]",
        "eor x27, x21, x20",
        "lsl w0, w21, #16",
        "cmn w0, w20, lsl #16",
        "add w26, w21, w20",
        "bfxil x6, x21, #0, #16",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd eax, ebx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "mov w21, w6",
        "eor x27, x20, x21",
        "adds w26, w20, w21",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x6, x20",
        "mov x4, x26"
      ]
    },
    "xadd [rax], ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldaddal w20, w6, [x4]",
        "eor x27, x6, x20",
        "adds w26, w6, w20",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "xadd rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "eor x27, x4, x6",
        "adds x26, x4, x6",
        "mrs x20, nzcv",
        "eor w20, w20, #0x20000000",
        "msr nzcv, x20",
        "mov x6, x4",
        "mov x4, x26"
      ]
    },
    "xadd [rax], rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc1",
      "ExpectedArm64ASM": [
        "ldaddal x6, x20, [x4]",
        "eor x27, x20, x6",
        "adds x26, x20, x6",
        "mrs x21, nzcv",
        "eor w21, w21, #0x20000000",
        "msr nzcv, x21",
        "mov x6, x20"
      ]
    },
    "cmpps xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v16.4s, v17.4s"
      ]
    },
    "cmpps xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmgt v16.4s, v17.4s, v16.4s"
      ]
    },
    "cmpps xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v16.4s, v17.4s, v16.4s"
      ]
    },
    "cmpps xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v16.4s, v17.4s",
        "fcmgt v1.4s, v17.4s, v16.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b"
      ]
    },
    "cmpps xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v16.4s, v17.4s",
        "mvn v16.16b, v16.16b"
      ]
    },
    "cmpps xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmgt v2.4s, v17.4s, v16.4s",
        "mvn v16.16b, v2.16b"
      ]
    },
    "cmpps xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v2.4s, v17.4s, v16.4s",
        "mvn v16.16b, v2.16b"
      ]
    },
    "cmpps xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v16.4s, v17.4s",
        "fcmgt v1.4s, v17.4s, v16.4s",
        "orr v16.16b, v0.16b, v1.16b"
      ]
    },
    "movnti [rax], ebx": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xc3",
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "str w20, [x4]"
      ]
    },
    "movnti [rax], rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc3",
      "ExpectedArm64ASM": [
        "str x6, [x4]"
      ]
    },
    "pinsrw mm0, eax, 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov v2.h[0], w4",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, eax, 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov v2.h[1], w4",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, eax, 2": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov v2.h[2], w4",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, eax, 3": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov v2.h[3], w4",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, eax, 4": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "mov v2.h[0], w4",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, [rax], 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ld1 {v2.h}[0], [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, [rax], 1": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ld1 {v2.h}[1], [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, [rax], 2": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ld1 {v2.h}[2], [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, [rax], 3": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ld1 {v2.h}[3], [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pinsrw mm0, [rax], 4": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x0f 0xc4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ld1 {v2.h}[0], [x4]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pextrw eax, mm0, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "umov w4, v2.h[0]"
      ]
    },
    "pextrw eax, mm0, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "umov w4, v2.h[1]"
      ]
    },
    "pextrw eax, mm0, 2": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "umov w4, v2.h[2]"
      ]
    },
    "pextrw eax, mm0, 3": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "umov w4, v2.h[3]"
      ]
    },
    "pextrw eax, mm0, 4": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "umov w4, v2.h[0]"
      ]
    },
    "shufps xmm0, xmm1, 01000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Dst[63:0]    = Src1[63:0]",
        "Dest[127:64] = Src2[63:0]",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v16.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11101110b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Dst[63:0]    = Src1[127:64]",
        "Dest[127:64] = Src2[127:64]",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v16.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11100100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Dst[63:0]    = Src1[63:0]",
        "Dest[127:64] = Src2[127:64]",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "shufps xmm0, xmm1, 01001110b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Dst[63:0]    = Src1[63:0]",
        "Dest[127:64] = Src2[127:64]",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v16.16b, v17.16b, #8"
      ]
    },
    "shufps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "dup v3.4s, v17.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 00000101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "dup v3.4s, v17.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 00001010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "dup v3.4s, v17.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 00001111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "dup v3.4s, v17.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 01010000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "dup v3.4s, v17.s[1]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 01010101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "dup v3.4s, v17.s[1]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 01011010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "dup v3.4s, v17.s[1]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 01011111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "dup v3.4s, v17.s[1]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 10100000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "dup v3.4s, v17.s[2]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 10100101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "dup v3.4s, v17.s[2]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 10101010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "dup v3.4s, v17.s[2]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 10101111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "dup v3.4s, v17.s[2]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 11110000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "dup v3.4s, v17.s[3]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 11110101b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "dup v3.4s, v17.s[3]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 11111010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "dup v3.4s, v17.s[3]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, xmm1, 11100000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "zip2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11100101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "zip2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11101010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "zip2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11101111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "zip2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 01000000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[0]",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 01000101b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[1]",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 01001010b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[2]",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 01001111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Bottom elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Bottom 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "zip1 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 01010100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Bottom 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "zip1 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 10100100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Bottom 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[2]",
        "zip1 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 11110100b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Bottom 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[3]",
        "zip1 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 00001110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "zip2 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 01011110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[1]",
        "zip2 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 10101110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[2]",
        "zip2 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 11111110b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Top elements duplicated, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[3]",
        "zip2 v16.2d, v16.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 01000111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "odd elements inverted, Low 64-bits inserted",
        "SRA quirks with RA fail to understand that v16 is dead",
        "Could InsElement directly in to v16 but it does two moves instead",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.s[0], v16.s[3]",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11100111b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "odd elements inverted, Top 64-bits inserted",
        "SRA quirks with RA fail to understand that v16 is dead",
        "Could InsElement directly in to v16 but it does two moves instead",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.s[0], v16.s[3]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "shufps xmm0, xmm1, 11100001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Lower 32-bit elements inverted, Top 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v16.4s",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "shufps xmm0, xmm1, 01000001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Lower 32-bit elements inverted, Low 64-bits inserted",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v16.4s",
        "zip1 v16.2d, v2.2d, v17.2d"
      ]
    },
    "shufps xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Duplicate selected element between each 64-bit segment",
        "0x0f 0xc6"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v16.s[3]",
        "dup v3.4s, v17.s[3]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "shufps xmm0, [rax], 0": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v3.4s, v16.s[0]",
        "dup v2.4s, v2.s[0]",
        "zip1 v16.2d, v3.2d, v2.2d"
      ]
    },
    "shufps xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v16.16b, v17.16b}, v2.16b"
      ]
    },
    "shufps xmm1, xmm0, 1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #16]",
        "mov v0.16b, v17.16b",
        "mov v1.16b, v16.16b",
        "tbl v17.16b, {v0.16b, v1.16b}, v2.16b"
      ]
    },
    "shufps xmm0, [rax], 1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ldr x0, [x28, #2688]",
        "ldr q3, [x0, #16]",
        "mov v0.16b, v16.16b",
        "mov v1.16b, v2.16b",
        "tbl v16.16b, {v0.16b, v1.16b}, v3.16b"
      ]
    },
    "shufps xmm0, [rax], 0xFF": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v3.4s, v16.s[3]",
        "dup v2.4s, v2.s[3]",
        "zip1 v16.2d, v3.2d, v2.2d"
      ]
    },
    "bswap eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc8",
      "ExpectedArm64ASM": [
        "rev w4, w4"
      ]
    },
    "bswap rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x0f 0xc8",
      "ExpectedArm64ASM": [
        "rev x4, x4"
      ]
    },
    "psrlw mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xd1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "ushl v2.8h, v2.8h, v0.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xd2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "ushl v2.4s, v2.4s, v0.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xd3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "neg v0.2d, v0.2d",
        "ushl v2.2d, v2.2d, v0.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "add v2.2d, v3.2d, v2.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmullw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "mul v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmovmskb eax, mm0": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xd7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #3296]",
        "cmlt v2.16b, v2.16b, #0",
        "and v2.8b, v2.8b, v3.8b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "psubusb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd8",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "uqsub v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psubusw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd9",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "uqsub v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pminub mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xda",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "umin v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pand mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xdb",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "and v2.8b, v3.8b, v2.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddusb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xdc",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "uqadd v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddusw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xdd",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "uqadd v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmaxub mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xde",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "umax v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pandn mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xdf",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "bic v2.8b, v2.8b, v3.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pavgb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe0",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "urhadd v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xe1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "sshl v2.8h, v2.8h, v0.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad mm0, mm1": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xe2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "sshl v2.4s, v2.4s, v0.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pavgw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "urhadd v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmulhuw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xe4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "umull v2.4s, v2.4h, v3.4h",
        "shrn v2.4h, v2.4s, #16",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmulhw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xe5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "smull v2.4s, v2.4h, v3.4h",
        "shrn v2.4h, v2.4s, #16",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "movntq [rax], mm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0xe7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "str d2, [x4]"
      ]
    },
    "psubsb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe8",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqsub v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psubsw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe9",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqsub v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pminsw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xea",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "smin v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "por mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xeb",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "orr v2.8b, v3.8b, v2.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddsb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xec",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqadd v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddsw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xed",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sqadd v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmaxsw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xee",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "smax v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pxor mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xef",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "eor v2.8b, v3.8b, v2.8b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pxor mm0, mm0": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x0f 0xef",
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw mm0, mm1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xf1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "ushl v2.8h, v2.8h, v0.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld mm0, mm1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xf2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "ushl v2.4s, v2.4s, v0.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq mm0, mm1": {
      "ExpectedInstructionCount": 11,
      "Comment": "0x0f 0xf3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uqshl d0, d3, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "ushl v2.2d, v2.2d, v0.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmuludq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf4",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "umull v2.2d, v2.2s, v3.2s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pmaddwd mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xf5",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "smull v2.4s, v2.4h, v3.4h",
        "addp v2.4s, v2.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psadbw mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xf6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "uabdl v2.8h, v2.8b, v3.8b",
        "addv h2, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "maskmovq mm0, mm1": {
      "ExpectedInstructionCount": 9,
      "Comment": "0x0f 0xf7",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "cmlt v2.16b, v2.16b, #0",
        "ldr d3, [x28, #1056]",
        "ldr d4, [x11]",
        "bsl v2.8b, v3.8b, v4.8b",
        "str d2, [x11]"
      ]
    },
    "psubb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf8",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sub v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psubw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf9",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sub v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psubd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xfa",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sub v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psubq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xfb",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "sub v2.2d, v3.2d, v2.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddb mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xfc",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "add v2.16b, v3.16b, v2.16b",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xfd",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "add v2.8h, v3.8h, v2.8h",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "paddd mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xfe",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1072]",
        "ldr d3, [x28, #1056]",
        "add v2.4s, v3.4s, v2.4s",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/SecondaryGroup.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "RNG"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "sgdt [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP7 0x0F 0x1 /0",
      "ExpectedArm64ASM": [
        "strh wzr, [x4]",
        "mov x20, #0xfffffffffffe0000",
        "stur x20, [x4, #2]"
      ]
    },
    "bt ax, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt eax, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt rax, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt ax, 15": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #15, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt eax, 31": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt rax, 63": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /4",
      "ExpectedArm64ASM": [
        "lsr x20, x4, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bt word [rax], 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt dword [rax], 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt qword [rax], 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt word [rax], 15": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt dword [rax], 31": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bt qword [rax], 63": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts ax, 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr w20, w4, #0x1",
        "bfxil x4, x20, #0, #16",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts eax, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr w4, w4, #0x1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts rax, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr x4, x4, #0x1",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts ax, 15": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #15, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr w20, w4, #0x8000",
        "bfxil x4, x20, #0, #16",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts eax, 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr w4, w4, #0x80000000",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts rax, 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /5",
      "ExpectedArm64ASM": [
        "lsr x20, x4, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "orr x4, x4, #0x8000000000000000",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "bts word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "orr x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bts qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "orr x21, x20, #0x80",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldsetalb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock bts qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldsetalb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr ax, 0": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and w20, w4, #0xfffffffe",
        "bfxil x4, x20, #0, #16",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr eax, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and w4, w4, #0xfffffffe",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr rax, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and x4, x4, #0xfffffffffffffffe",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr ax, 15": {
      "ExpectedInstructionCount": 7,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #15, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and w20, w4, #0xffff7fff",
        "bfxil x4, x20, #0, #16",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr eax, 31": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and w4, w4, #0x7fffffff",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr rax, 63": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "lsr x20, x4, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "and x4, x4, #0x7fffffffffffffff",
        "eor w20, w21, #0x20000000",
        "msr nzcv, x20"
      ]
    },
    "btr word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "and x21, x20, #0xfffffffffffffffe",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btr qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "and x21, x20, #0xffffffffffffff7f",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldclralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btr qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldclralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc ax, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w20, w4, #0x1",
        "ubfx x21, x20, #0, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "btc eax, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x1",
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc rax, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x1",
        "ubfx x20, x4, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc ax, 15": {
      "ExpectedInstructionCount": 6,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w20, w4, #0x8000",
        "ubfx x21, x20, #15, #1",
        "mrs x22, nzcv",
        "bfi w22, w21, #29, #1",
        "bfxil x4, x20, #0, #16",
        "msr nzcv, x22"
      ]
    },
    "btc eax, 31": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor w4, w4, #0x80000000",
        "ubfx x20, x4, #31, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc rax, 63": {
      "ExpectedInstructionCount": 5,
      "Comment": "GROUP8 0x0F 0xBA /7",
      "ExpectedArm64ASM": [
        "eor x4, x4, #0x8000000000000000",
        "lsr x20, x4, #63",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4]",
        "eor x21, x20, #0x1",
        "strb w21, [x4]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #1]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #1]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #3]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #3]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "btc qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "ldrb w20, [x4, #7]",
        "eor x21, x20, #0x80",
        "strb w21, [x4, #7]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc word [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc dword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc qword [rax], 0": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x1",
        "add x21, x4, #0x0 (0)",
        "ldeoralb w20, w20, [x21]",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc word [rax], 15": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x1 (1)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc dword [rax], 31": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x3 (3)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lock btc qword [rax], 63": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP8 0x0F 0xBA /6",
      "ExpectedArm64ASM": [
        "mov w20, #0x80",
        "add x21, x4, #0x7 (7)",
        "ldeoralb w20, w20, [x21]",
        "lsr w20, w20, #7",
        "eor x20, x20, #0x1",
        "ubfx x20, x20, #0, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "cmpxchg8b [rbp]": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP9 0x0F 0xC7 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "mov x21, x5",
        "caspal w20, w21, w6, w7, [x9]",
        "mrs x0, nzcv",
        "cmp w20, w4",
        "ccmp w21, w5, #nzcv, eq",
        "cset w1, eq",
        "bfi w0, w1, #30, #1",
        "msr nzcv, x0",
        "csel x4, x20, x4, ne",
        "csel x5, x21, x5, ne"
      ]
    },
    "cmpxchg16b [rbp]": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP9 0x0F 0xC7 /1",
      "ExpectedArm64ASM": [
        "mov x20, x4",
        "mov x21, x5",
        "caspal x20, x21, x6, x7, [x9]",
        "mrs x0, nzcv",
        "cmp x20, x4",
        "ccmp x21, x5, #nzcv, eq",
        "cset w1, eq",
        "bfi w0, w1, #30, #1",
        "msr nzcv, x0",
        "csel x4, x20, x4, ne",
        "csel x5, x21, x5, ne"
      ]
    },
    "rdrand ax": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x20, rndr",
        "bfxil x4, x20, #0, #16",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdrand eax": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x20, rndr",
        "mov w4, w20",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdrand rax": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP9 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "mrs x4, rndr",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdseed ax": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x20, rndrrs",
        "bfxil x4, x20, #0, #16",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdseed eax": {
      "ExpectedInstructionCount": 9,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x20, rndrrs",
        "mov w4, w20",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdseed rax": {
      "ExpectedInstructionCount": 8,
      "Comment": "GROUP9 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x4, rndrrs",
        "cset x20, eq",
        "mov w26, #0x1",
        "mov w27, #0x0",
        "mov w0, w27",
        "bfi w0, w20, #29, #1",
        "mov w20, w0",
        "msr nzcv, x20"
      ]
    },
    "rdpid eax": {
      "ExpectedInstructionCount": 20,
      "Comment": "GROUP9 0xF3 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "str w0, [x28, #1032]",
        "str x25, [x28, #176]",
        "str x8, [x28, #64]",
        "mov w0, #0x100",
        "str x0, [x28, #1480]",
        "sub sp, sp, #0x10 (16)",
        "mov w8, #0xa8",
        "mov x0, sp",
        "add x1, sp, #0x4 (4)",
        "svc #0x0",
        "ldp w0, w1, [sp]",
        "sub sp, sp, #0x10 (16)",
        "ldr x25, [x28, #176]",
        "ldr w8, [x28, #1032]",
        "msr nzcv, x8",
        "ldr x8, [x28, #64]",
        "str xzr, [x28, #1480]",
        "orr x20, x0, x1, lsl #12",
        "mov w4, w20"
      ]
    },
    "rdpid rax": {
      "ExpectedInstructionCount": 20,
      "Comment": "GROUP9 0xF3 0x0F 0xC7 /7",
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "str w0, [x28, #1032]",
        "str x25, [x28, #176]",
        "str x8, [x28, #64]",
        "mov w0, #0x100",
        "str x0, [x28, #1480]",
        "sub sp, sp, #0x10 (16)",
        "mov w8, #0xa8",
        "mov x0, sp",
        "add x1, sp, #0x4 (4)",
        "svc #0x0",
        "ldp w0, w1, [sp]",
        "sub sp, sp, #0x10 (16)",
        "ldr x25, [x28, #176]",
        "ldr w8, [x28, #1032]",
        "msr nzcv, x8",
        "ldr x8, [x28, #64]",
        "str xzr, [x28, #1480]",
        "orr x20, x0, x1, lsl #12",
        "mov w4, w20"
      ]
    },
    "psrlw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.8h, v16.8h, #15"
      ]
    },
    "psrlw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psraw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psraw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psraw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.8h, v16.8h, #15"
      ]
    },
    "psraw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.8h, v16.8h, #15"
      ]
    },
    "psllw mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllw mm0, 15": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.8h, v2.8h, #15",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw mm0, 16": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllw xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.8h, v16.8h, #15"
      ]
    },
    "psllw xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP12 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrld mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrld mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrld xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.4s, v16.4s, #31"
      ]
    },
    "psrld xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrad mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrad mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sshr v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrad xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.4s, v16.4s, #31"
      ]
    },
    "psrad xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "sshr v16.4s, v16.4s, #31"
      ]
    },
    "pslld mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "pslld mm0, 31": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.4s, v2.4s, #31",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld mm0, 32": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "pslld xmm0, 31": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.4s, v16.4s, #31"
      ]
    },
    "pslld xmm0, 32": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP13 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrlq mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlq mm0, 63": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ushr v2.2d, v2.2d, #63",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq mm0, 64": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": []
    },
    "psrlq xmm0, 63": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "ushr v16.2d, v16.2d, #63"
      ]
    },
    "psrlq xmm0, 64": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /2",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psrldq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": []
    },
    "psrldq xmm0, 15": {
      "ExpectedInstructionCount": 2,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v16.16b, v2.16b, #15"
      ]
    },
    "psrldq xmm0, 16": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /3",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psllq mm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllq mm0, 63": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "shl v2.2d, v2.2d, #63",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq mm0, 64": {
      "ExpectedInstructionCount": 7,
      "Type": "MMX",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "movi v2.2d, #0x0",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq xmm0, 0": {
      "ExpectedInstructionCount": 0,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": []
    },
    "psllq xmm0, 63": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "shl v16.2d, v16.2d, #63"
      ]
    },
    "psllq xmm0, 64": {
      "ExpectedInstructionCount": 1,
      "Type": "SSE",
      "Comment": "GROUP14 0x0F 0xC7 /6",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "fxsave [rax]": {
      "ExpectedInstructionCount": 68,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4, #2]",
        "ldrb w20, [x28, #1202]",
        "strb w20, [x4, #4]",
        "ldrb w20, [x28, #1051]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #32]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #64]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #80]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #96]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #112]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #128]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #144]",
        "stp q16, q17, [x4, #160]",
        "stp q18, q19, [x4, #192]",
        "stp q20, q21, [x4, #224]",
        "stp q22, q23, [x4, #256]",
        "stp q24, q25, [x4, #288]",
        "stp q26, q27, [x4, #320]",
        "stp q28, q29, [x4, #352]",
        "stp q30, q31, [x4, #384]",
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "mov w21, #0xffff",
        "stp w20, w21, [x4, #24]"
      ]
    },
    "rdfsbase eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldr w4, [x28, #1000]"
      ]
    },
    "rdfsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /0",
      "ExpectedArm64ASM": [
        "ldr x4, [x28, #1000]"
      ]
    },
    "fxrstor [rax]": {
      "ExpectedInstructionCount": 48,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldrh w20, [x4, #2]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x4, #4]",
        "strb w20, [x28, #1202]",
        "ldp q2, q3, [x4, #32]",
        "str q2, [x28, #1056]",
        "str q3, [x28, #1072]",
        "ldp q2, q3, [x4, #64]",
        "str q2, [x28, #1088]",
        "str q3, [x28, #1104]",
        "ldp q2, q3, [x4, #96]",
        "str q2, [x28, #1120]",
        "str q3, [x28, #1136]",
        "ldp q2, q3, [x4, #128]",
        "str q2, [x28, #1152]",
        "str q3, [x28, #1168]",
        "ldp q16, q17, [x4, #160]",
        "ldp q18, q19, [x4, #192]",
        "ldp q20, q21, [x4, #224]",
        "ldp q22, q23, [x4, #256]",
        "ldp q24, q25, [x4, #288]",
        "ldp q26, q27, [x4, #320]",
        "ldp q28, q29, [x4, #352]",
        "ldp q30, q31, [x4, #384]",
        "ldr w20, [x4, #24]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0"
      ]
    },
    "rdgsbase eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldr w4, [x28, #992]"
      ]
    },
    "rdgsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /1",
      "ExpectedArm64ASM": [
        "ldr x4, [x28, #992]"
      ]
    },
    "ldmxcsr [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0"
      ]
    },
    "wrfsbase eax": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "str x20, [x28, #1000]"
      ]
    },
    "wrfsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /2",
      "ExpectedArm64ASM": [
        "str x4, [x28, #1000]"
      ]
    },
    "stmxcsr [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "str w20, [x4]"
      ]
    },
    "wrgsbase eax": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "mov w20, w4",
        "str x20, [x28, #992]"
      ]
    },
    "wrgsbase rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /3",
      "ExpectedArm64ASM": [
        "str x4, [x28, #992]"
      ]
    },
    "xsave [rax]": {
      "ExpectedInstructionCount": 98,
      "Comment": "GROUP15 0x0F 0xAE /4",
      "ExpectedArm64ASM": [
        "ubfx x20, x4, #0, #1",
        "cbnz x20, #+0x8",
        "b #+0xe4",
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4, #2]",
        "ldrb w20, [x28, #1202]",
        "strb w20, [x4, #4]",
        "ldrb w20, [x28, #1051]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #32]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #64]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #80]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #96]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #112]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #128]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #144]",
        "ubfx x20, x4, #1, #1",
        "cbnz x20, #+0x8",
        "b #+0x24",
        "stp q16, q17, [x4, #160]",
        "stp q18, q19, [x4, #192]",
        "stp q20, q21, [x4, #224]",
        "stp q22, q23, [x4, #256]",
        "stp q24, q25, [x4, #288]",
        "stp q26, q27, [x4, #320]",
        "stp q28, q29, [x4, #352]",
        "stp q30, q31, [x4, #384]",
        "ubfx x20, x4, #2, #1",
        "cbnz x20, #+0x8",
        "b #+0x44",
        "ldp q2, q3, [x28, #192]",
        "stp q2, q3, [x4, #576]",
        "ldp q2, q3, [x28, #224]",
        "stp q2, q3, [x4, #608]",
        "ldp q2, q3, [x28, #256]",
        "stp q2, q3, [x4, #640]",
        "ldp q2, q3, [x28, #288]",
        "stp q2, q3, [x4, #672]",
        "ldp q2, q3, [x28, #320]",
        "stp q2, q3, [x4, #704]",
        "ldp q2, q3, [x28, #352]",
        "stp q2, q3, [x4, #736]",
        "ldp q2, q3, [x28, #384]",
        "stp q2, q3, [x4, #768]",
        "ldp q2, q3, [x28, #416]",
        "stp q2, q3, [x4, #800]",
        "ubfx x20, x4, #1, #2",
        "cbnz x20, #+0x8",
        "b #+0x14",
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "mov w21, #0xffff",
        "stp w20, w21, [x4, #24]",
        "ubfx x20, x4, #0, #3",
        "str x20, [x4, #512]"
      ]
    },
    "lfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /5",
      "ExpectedArm64ASM": [
        "dmb ld"
      ]
    },
    "xrstor [rax]": {
      "ExpectedInstructionCount": 133,
      "Comment": "GROUP15 0x0F 0xAE /5",
      "ExpectedArm64ASM": [
        "sub sp, sp, #0x40 (64)",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #0, #1",
        "cbnz x20, #+0x8",
        "b #+0x7c",
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldrh w20, [x4, #2]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldrb w20, [x4, #4]",
        "strb w20, [x28, #1202]",
        "ldp q2, q3, [x4, #32]",
        "str q2, [x28, #1056]",
        "str q3, [x28, #1072]",
        "ldp q2, q3, [x4, #64]",
        "str q2, [x28, #1088]",
        "str q3, [x28, #1104]",
        "ldp q2, q3, [x4, #96]",
        "str q2, [x28, #1120]",
        "str q3, [x28, #1136]",
        "ldp q2, q3, [x4, #128]",
        "str q2, [x28, #1152]",
        "str q3, [x28, #1168]",
        "b #+0x4c",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]",
        "movi v2.2d, #0x0",
        "str q2, [x28, #1056]",
        "str q2, [x28, #1072]",
        "str q2, [x28, #1088]",
        "str q2, [x28, #1104]",
        "str q2, [x28, #1120]",
        "str q2, [x28, #1136]",
        "str q2, [x28, #1152]",
        "str q2, [x28, #1168]",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #1, #1",
        "cbnz x20, #+0x8",
        "b #+0x28",
        "ldp q16, q17, [x4, #160]",
        "ldp q18, q19, [x4, #192]",
        "ldp q20, q21, [x4, #224]",
        "ldp q22, q23, [x4, #256]",
        "ldp q24, q25, [x4, #288]",
        "ldp q26, q27, [x4, #320]",
        "ldp q28, q29, [x4, #352]",
        "ldp q30, q31, [x4, #384]",
        "b #+0x44",
        "movi v31.2d, #0x0",
        "mov v30.16b, v31.16b",
        "mov v29.16b, v31.16b",
        "mov v28.16b, v31.16b",
        "mov v27.16b, v31.16b",
        "mov v26.16b, v31.16b",
        "mov v25.16b, v31.16b",
        "mov v24.16b, v31.16b",
        "mov v23.16b, v31.16b",
        "mov v22.16b, v31.16b",
        "mov v21.16b, v31.16b",
        "mov v20.16b, v31.16b",
        "mov v19.16b, v31.16b",
        "mov v18.16b, v31.16b",
        "mov v17.16b, v31.16b",
        "mov v16.16b, v31.16b",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #2, #1",
        "cbnz x20, #+0x8",
        "b #+0x58",
        "ldp q2, q3, [x4, #576]",
        "ldp q4, q5, [x4, #608]",
        "ldp q6, q7, [x4, #640]",
        "ldp q8, q9, [x4, #672]",
        "ldp q10, q11, [x4, #704]",
        "ldp q12, q13, [x4, #736]",
        "ldp q14, q15, [x4, #768]",
        "str q2, [sp]",
        "str q3, [sp, #32]",
        "ldp q2, q3, [x4, #800]",
        "stp q2, q3, [x28, #416]",
        "stp q14, q15, [x28, #384]",
        "stp q12, q13, [x28, #352]",
        "stp q10, q11, [x28, #320]",
        "stp q8, q9, [x28, #288]",
        "stp q6, q7, [x28, #256]",
        "stp q4, q5, [x28, #224]",
        "ldr q2, [sp]",
        "ldr q3, [sp, #32]",
        "stp q2, q3, [x28, #192]",
        "b #+0x28",
        "movi v2.2d, #0x0",
        "stp q2, q2, [x28, #416]",
        "stp q2, q2, [x28, #384]",
        "stp q2, q2, [x28, #352]",
        "stp q2, q2, [x28, #320]",
        "stp q2, q2, [x28, #288]",
        "stp q2, q2, [x28, #256]",
        "stp q2, q2, [x28, #224]",
        "stp q2, q2, [x28, #192]",
        "ldr x20, [x4, #512]",
        "ubfx x20, x20, #1, #2",
        "cbnz x20, #+0x8",
        "b #+0x34",
        "ldr w20, [x4, #24]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "b #+0x4",
        "add sp, sp, #0x40 (64)"
      ]
    },
    "mfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /6",
      "ExpectedArm64ASM": [
        "dmb sy"
      ]
    },
    "clwb [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /6",
      "ExpectedArm64ASM": [
        "dc cvac, x4"
      ]
    },
    "sfence": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dmb st"
      ]
    },
    "clflush [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dc civac, x4",
        "dsb ish"
      ]
    },
    "clflushopt [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "GROUP15 0x0F 0xAE /7",
      "ExpectedArm64ASM": [
        "dc civac, x4"
      ]
    },
    "prefetchnta [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /0"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1strm, [x4]"
      ]
    },
    "prefetcht0 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /1"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1keep, [x4]"
      ]
    },
    "prefetcht1 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /2"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl2keep, [x4]"
      ]
    },
    "prefetcht2 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUP16 0x0F 0x18 /3"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl3keep, [x4]"
      ]
    },
    "db 0x0f, 0x18, 0x20;": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "GROUP16 0x0F 0x18 /4",
        "nop dword [rax]",
        "NOP implementation"
      ],
      "ExpectedArm64ASM": []
    },
    "db 0x0f, 0x0d, 0x00": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /0",
        "prefetch_exclusive [rax]"
      ],
      "ExpectedArm64ASM": [
        "prfm pldl1keep, [x4]"
      ]
    },
    "prefetchw [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /1"
      ],
      "ExpectedArm64ASM": [
        "prfm pstl1keep, [x4]"
      ]
    },
    "prefetchwt1 [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "GROUPP 0x0F 0x0D /2"
      ],
      "ExpectedArm64ASM": [
        "prfm pstl1keep, [x4]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/SecondaryModRM.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "CLZERO"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "xgetbv": {
      "ExpectedInstructionCount": 52,
      "Comment": "0xF 0x01 /2 RM-0",
      "ExpectedArm64ASM": [
        "sub sp, sp, #0xf0 (240)",
        "mov x3, sp",
        "st1 {v2.2d, v3.2d}, [x3], #32",
        "st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x3], #64",
        "st1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x3], #64",
        "st1 {v12.2d, v13.2d, v14.2d, v15.2d}, [x3], #64",
        "stp x18, x30, [x3], #16",
        "mrs x3, nzcv",
        "str w3, [x28, #1032]",
        "str x25, [x28, #176]",
        "stp x4, x7, [x28, #32]",
        "stp x5, x6, [x28, #48]",
        "stp x8, x9, [x28, #64]",
        "stp x10, x11, [x28, #80]",
        "stp x12, x13, [x28, #96]",
        "stp x14, x15, [x28, #112]",
        "stp x16, x17, [x28, #128]",
        "stp x19, x29, [x28, #144]",
        "stp w26, w27, [x28, #16]",
        "add x3, x28, #0x1c0 (448)",
        "st1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x3], #64",
        "st1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x3], #64",
        "st1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x3], #64",
        "st1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x3], #64",
        "mov w1, w7",
        "ldr x0, [x28, #1544]",
        "ldr x2, [x28, #1560]",
        "blr x2",
        "ldr x25, [x28, #176]",
        "ldr w4, [x28, #1032]",
        "msr nzcv, x4",
        "add x4, x28, #0x1c0 (448)",
        "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [x4], #64",
        "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [x4], #64",
        "ld1 {v24.2d, v25.2d, v26.2d, v27.2d}, [x4], #64",
        "ld1 {v28.2d, v29.2d, v30.2d, v31.2d}, [x4], #64",
        "ldp x4, x7, [x28, #32]",
        "ldp x5, x6, [x28, #48]",
        "ldp x8, x9, [x28, #64]",
        "ldp x10, x11, [x28, #80]",
        "ldp x12, x13, [x28, #96]",
        "ldp x14, x15, [x28, #112]",
        "ldp x16, x17, [x28, #128]",
        "ldp x19, x29, [x28, #144]",
        "ldp w26, w27, [x28, #16]",
        "ld1 {v2.2d, v3.2d}, [sp], #32",
        "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [sp], #64",
        "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [sp], #64",
        "ld1 {v12.2d, v13.2d, v14.2d, v15.2d}, [sp], #64",
        "ldp x18, x30, [sp], #16",
        "mov w4, w0",
        "lsr x5, x0, #32"
      ]
    },
    "rdtscp": {
      "Skip": "Yes",
      "ExpectedInstructionCount": 21,
      "Comment": "0xF 0x01 /7 RM-1",
      "ExpectedArm64ASM": [
        "dmb ld",
        "mrs x20, S3_3_c14_c0_2",
        "lsl w4, w20, #7",
        "lsr x5, x20, #25",
        "mrs x0, nzcv",
        "str w0, [x28, #1000]",
        "str x8, [x28, #312]",
        "mov w0, #0x100",
        "str x0, [x28, #1312]",
        "sub sp, sp, #0x10 (16)",
        "mov w8, #0xa8",
        "mov x0, sp",
        "add x1, sp, #0x4 (4)",
        "svc #0x0",
        "ldp w0, w1, [sp]",
        "sub sp, sp, #0x10 (16)",
        "ldr w8, [x28, #1000]",
        "msr nzcv, x8",
        "ldr x8, [x28, #312]",
        "str xzr, [x28, #1312]",
        "orr x7, x0, x1, lsl #12"
      ]
    },
    "clzero rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xF 0x01 /7 RM-4",
      "ExpectedArm64ASM": [
        "dc zva, x4"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "push fs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xa0",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #970]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop fs": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xa1",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #970]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #1000]"
      ]
    },
    "push gs": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x0f 0xa8",
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #968]",
        "str w20, [x8, #-4]!"
      ]
    },
    "pop gs": {
      "ExpectedInstructionCount": 12,
      "Comment": "0x0f 0xa9",
      "ExpectedArm64ASM": [
        "ldr w20, [x8], #4",
        "strh w20, [x28, #968]",
        "ubfx w21, w20, #2, #1",
        "and w20, w20, #0xfff8",
        "add x0, x28, x21, lsl #3",
        "ldr x21, [x0, #1184]",
        "ldr x20, [x21, w20, uxtw]",
        "lsr x21, x20, #32",
        "and w22, w21, #0xff000000",
        "orr w20, w22, w20, lsr #16",
        "bfi w20, w21, #16, #8",
        "str w20, [x28, #992]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_OpSize.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FCMA",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "movupd xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x66 0x0f 0x10",
      "ExpectedArm64ASM": []
    },
    "movupd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x10",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movupd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x10",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "movupd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x11",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "movlpd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x12",
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[0], [x4]"
      ]
    },
    "movlpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x13",
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "unpcklpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x14",
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v16.2d, v17.2d"
      ]
    },
    "unpckhpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x15",
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v16.2d, v17.2d"
      ]
    },
    "movhpd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x16",
      "ExpectedArm64ASM": [
        "ld1 {v16.d}[1], [x4]"
      ]
    },
    "movhpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x17",
      "ExpectedArm64ASM": [
        "st1 {v16.d}[1], [x4]"
      ]
    },
    "movapd xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x66 0x0f 0x28",
      "ExpectedArm64ASM": []
    },
    "movapd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x28",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movapd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x28",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "movapd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x29",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "cvtpi2pd xmm0, mm0": {
      "ExpectedInstructionCount": 6,
      "Comment": "0x66 0x0f 0x2a",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "sxtl v2.2d, v2.2s",
        "scvtf v16.2d, v2.2d"
      ]
    },
    "movntpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x2b",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "cvttpd2pi mm0, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x66 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "frint32z v2.2d, v16.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v2.2s, v2.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "cvtpd2pi mm0, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x66 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "frint32x v2.2d, v16.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v2.2s, v2.2d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "ucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0x2e",
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "comisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0x2f",
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "movmskpd eax, xmm0": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0x50",
      "ExpectedArm64ASM": [
        "uzp2 v2.4s, v16.4s, v16.4s",
        "mov x20, v2.d[0]",
        "bfi x20, x20, #31, #32",
        "lsr x4, x20, #62"
      ]
    },
    "sqrtpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x51",
      "ExpectedArm64ASM": [
        "fsqrt v16.2d, v17.2d"
      ]
    },
    "addpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x58",
      "ExpectedArm64ASM": [
        "fadd v16.2d, v16.2d, v17.2d"
      ]
    },
    "mulpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x59",
      "ExpectedArm64ASM": [
        "fmul v16.2d, v16.2d, v17.2d"
      ]
    },
    "cvtpd2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "fcvtn v16.2s, v17.2d"
      ]
    },
    "cvtpd2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "fcvtn v16.2s, v2.2d"
      ]
    },
    "cvtps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x5b",
      "ExpectedArm64ASM": [
        "frint32x v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s"
      ]
    },
    "cvtps2dq xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x5b",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "frint32x v2.4s, v2.4s",
        "fcvtzs v16.4s, v2.4s"
      ]
    },
    "subpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x5c",
      "ExpectedArm64ASM": [
        "fsub v16.2d, v16.2d, v17.2d"
      ]
    },
    "minpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x5d",
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v17.2d, v16.2d",
        "bif v16.16b, v17.16b, v0.16b"
      ]
    },
    "divpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x5e",
      "ExpectedArm64ASM": [
        "fdiv v16.2d, v16.2d, v17.2d"
      ]
    },
    "maxpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x5f",
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v17.2d, v16.2d",
        "bit v16.16b, v17.16b, v0.16b"
      ]
    },
    "punpcklbw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x60",
      "ExpectedArm64ASM": [
        "zip1 v16.16b, v16.16b, v17.16b"
      ]
    },
    "punpcklbw xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x60",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.16b, v16.16b, v2.16b"
      ]
    },
    "punpcklwd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x61",
      "ExpectedArm64ASM": [
        "zip1 v16.8h, v16.8h, v17.8h"
      ]
    },
    "punpcklwd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x61",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.8h, v16.8h, v2.8h"
      ]
    },
    "punpckldq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x62",
      "ExpectedArm64ASM": [
        "zip1 v16.4s, v16.4s, v17.4s"
      ]
    },
    "punpckldq xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x62",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.4s, v16.4s, v2.4s"
      ]
    },
    "packsswb xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x63",
      "ExpectedArm64ASM": [
        "sqxtn v16.8b, v16.8h",
        "sqxtn2 v16.16b, v17.8h"
      ]
    },
    "packsswb xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0x63",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "sqxtn v16.8b, v16.8h",
        "sqxtn2 v16.16b, v2.8h"
      ]
    },
    "packsswb xmm0, xmm0": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0x63",
      "ExpectedArm64ASM": [
        "mov v0.16b, v16.16b",
        "sqxtn v16.8b, v16.8h",
        "sqxtn2 v16.16b, v0.8h"
      ]
    },
    "pcmpgtb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x64",
      "ExpectedArm64ASM": [
        "cmgt v16.16b, v16.16b, v17.16b"
      ]
    },
    "pcmpgtw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x65",
      "ExpectedArm64ASM": [
        "cmgt v16.8h, v16.8h, v17.8h"
      ]
    },
    "pcmpgtd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x66",
      "ExpectedArm64ASM": [
        "cmgt v16.4s, v16.4s, v17.4s"
      ]
    },
    "punpckhbw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x68",
      "ExpectedArm64ASM": [
        "zip2 v16.16b, v16.16b, v17.16b"
      ]
    },
    "punpckhbw xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x68",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.16b, v16.16b, v2.16b"
      ]
    },
    "punpckhwd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x69",
      "ExpectedArm64ASM": [
        "zip2 v16.8h, v16.8h, v17.8h"
      ]
    },
    "punpckhwd xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x69",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.8h, v16.8h, v2.8h"
      ]
    },
    "punpckhdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6a",
      "ExpectedArm64ASM": [
        "zip2 v16.4s, v16.4s, v17.4s"
      ]
    },
    "punpckhdq xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x6a",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.4s, v16.4s, v2.4s"
      ]
    },
    "packssdw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0x6b",
      "ExpectedArm64ASM": [
        "sqxtn v16.4h, v16.4s",
        "sqxtn2 v16.8h, v17.4s"
      ]
    },
    "punpcklqdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6c",
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v16.2d, v17.2d"
      ]
    },
    "punpckhqdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6d",
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v16.2d, v17.2d"
      ]
    },
    "movd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6e",
      "ExpectedArm64ASM": [
        "ldr s16, [x4]"
      ]
    },
    "movd xmm0, eax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6e",
      "ExpectedArm64ASM": [
        "fmov s16, w4"
      ]
    },
    "movq xmm0, qword [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6e",
      "ExpectedArm64ASM": [
        "ldr d16, [x4]"
      ]
    },
    "movq xmm0, rax": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6e",
      "ExpectedArm64ASM": [
        "fmov d16, x4"
      ]
    },
    "movdqa xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0x66 0x0f 0x6f",
      "ExpectedArm64ASM": []
    },
    "movdqa xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6f",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x6f",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "pshufd xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Broadcast element 0",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]"
      ]
    },
    "pshufd xmm0, xmm1, 11100100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Identity copy",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "pshufd xmm0, xmm1, 01010000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Zip with self",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.4s, v17.4s, v17.4s"
      ]
    },
    "pshufd xmm0, [rax], 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast element 0 from memory",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v16.4s, v2.s[0]"
      ]
    },
    "pshufd xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Broadcast element 0",
        "Element 0 becomes element 1",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2680]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pshufd xmm0, [rax], 1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Broadcast element 0 from Memory",
        "Element 0 becomes element 1",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ldr x0, [x28, #2680]",
        "ldr q3, [x0, #16]",
        "tbl v16.16b, {v2.16b}, v3.16b"
      ]
    },
    "pshufd xmm0, xmm1, 0xff": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Broadcast element 3",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[3]"
      ]
    },
    "pshufd xmm0, [rax], 0xff": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast element 3 from memory",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v16.4s, v2.s[3]"
      ]
    },
    "pshufd xmm0, [rax], 00_00_11_10b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Inverse elements",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v3.4s, v2.s[0]",
        "ext v16.16b, v2.16b, v3.16b, #8"
      ]
    },
    "pshufd xmm0, [rax], 00_01_00_01b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Weird reversed low elements and broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "rev64 v2.4s, v2.4s",
        "zip1 v16.2d, v2.2d, v2.2d"
      ]
    },
    "pshufd xmm0, [rax], 00_01_10_11b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Inverse elements",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "rev64 v2.4s, v2.4s",
        "ext v16.16b, v2.16b, v2.16b, #8"
      ]
    },
    "pshufd xmm0, [rax], 00_10_00_10b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Weird reversed even elements and broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "uzp1 v2.4s, v2.4s, v2.4s",
        "ext v16.16b, v2.16b, v2.16b, #4"
      ]
    },
    "pshufd xmm0, [rax], 00_11_00_11b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Weird Low plus high element reversed and broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v2.16b, v2.16b, v2.16b, #4",
        "zip2 v16.2d, v2.2d, v2.2d"
      ]
    },
    "pshufd xmm0, [rax], 00_11_10_01b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Vector rotate - One element",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v16.16b, v2.16b, v2.16b, #4"
      ]
    },
    "pshufd xmm0, [rax], 01_00_01_00b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Duplicate bottom 64-bits",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v16.2d, v2.d[0]"
      ]
    },
    "pshufd xmm0, [rax], 01_00_11_10b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Vector rotate - Two elements",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v16.16b, v2.16b, v2.16b, #8"
      ]
    },
    "pshufd xmm0, [rax], 10_00_10_00b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Even elements broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "uzp1 v16.4s, v2.4s, v2.4s"
      ]
    },
    "pshufd xmm0, [rax], 10_01_10_11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Vector rotate - Three elements",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ldr x0, [x28, #2680]",
        "ldr q3, [x0, #2480]",
        "tbl v16.16b, {v2.16b}, v3.16b"
      ]
    },
    "pshufd xmm0, [rax], 01_10_01_10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Weird middle elements swizzle plus broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v2.16b, v2.16b, v2.16b, #4",
        "rev64 v2.4s, v2.4s",
        "zip1 v16.2d, v2.2d, v2.2d"
      ]
    },
    "pshufd xmm0, [rax], 01_11_01_11b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Weird reversed upper elements and broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "uzp2 v2.4s, v2.4s, v2.4s",
        "ext v16.16b, v2.16b, v2.16b, #4"
      ]
    },
    "pshufd xmm0, [rax], 10_01_10_01b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Middle two elements broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v2.16b, v2.16b, v2.16b, #4",
        "zip1 v16.2d, v2.2d, v2.2d"
      ]
    },
    "pshufd xmm0, [rax], 10_11_00_01b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Inverse elements",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "rev64 v16.4s, v2.4s"
      ]
    },
    "pshufd xmm0, [rax], 10_11_10_11b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Weird top two elements reverse and broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v2.2d, v2.2d, v2.2d",
        "ext v16.16b, v2.16b, v2.16b, #4"
      ]
    },
    "pshufd xmm0, [rax], 11_00_11_00b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Weird low plus high element broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "ext v2.16b, v2.16b, v2.16b, #4",
        "zip2 v2.2d, v2.2d, v2.2d",
        "ext v16.16b, v2.16b, v2.16b, #4"
      ]
    },
    "pshufd xmm0, [rax], 11_01_11_01b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Odd elements broadcast",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "uzp2 v16.4s, v2.4s, v2.4s"
      ]
    },
    "pshufd xmm0, [rax], 11_10_11_10b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Duplicate Top 64-bits",
        "0x66 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "dup v16.2d, v2.d[1]"
      ]
    },
    "pcmpeqb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x74",
      "ExpectedArm64ASM": [
        "cmeq v16.16b, v16.16b, v17.16b"
      ]
    },
    "pcmpeqw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x75",
      "ExpectedArm64ASM": [
        "cmeq v16.8h, v16.8h, v17.8h"
      ]
    },
    "pcmpeqd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x76",
      "ExpectedArm64ASM": [
        "cmeq v16.4s, v16.4s, v17.4s"
      ]
    },
    "extrq xmm0, 64, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "SSE4a",
        "0x66 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "fmov d2, x20",
        "and v16.16b, v16.16b, v2.16b"
      ]
    },
    "extrq xmm0, 32, 32": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "SSE4a",
        "0x66 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "ushr v2.2d, v16.2d, #32",
        "mov w20, #0xffffffff",
        "fmov d3, x20",
        "and v16.16b, v2.16b, v3.16b"
      ]
    },
    "extrq xmm0, 0, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "SSE4a",
        "0x66 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "fmov d2, x20",
        "and v16.16b, v16.16b, v2.16b"
      ]
    },
    "extrq xmm0, xmm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "SSE4a",
        "0x66 0x0f 0x79"
      ],
      "ExpectedArm64ASM": [
        "mov w0, #0x3f",
        "dup v2.2d, x0",
        "and v3.8b, v17.8b, v2.8b",
        "ushr v4.2d, v17.2d, #8",
        "and v2.8b, v4.8b, v2.8b",
        "neg v0.2d, v2.2d",
        "ushl v2.2d, v16.2d, v0.2d",
        "mov x20, v3.d[0]",
        "mrs x21, nzcv",
        "mov x0, #0xffffffffffffffff",
        "cmp x20, #0x0 (0)",
        "lsl x1, x0, x20",
        "csinv x20, x0, x1, eq",
        "fmov d3, x20",
        "and v16.16b, v2.16b, v3.16b",
        "msr nzcv, x21"
      ]
    },
    "haddpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7c",
      "ExpectedArm64ASM": [
        "faddp v16.2d, v16.2d, v17.2d"
      ]
    },
    "hsubpd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0x7c",
      "ExpectedArm64ASM": [
        "uzp1 v2.2d, v16.2d, v17.2d",
        "uzp2 v3.2d, v16.2d, v17.2d",
        "fsub v16.2d, v2.2d, v3.2d"
      ]
    },
    "movd eax, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "movq rax, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "mov x4, v16.d[0]"
      ]
    },
    "movd dword [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "movq qword [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "movdqa [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0x7f",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "cmppd xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v16.2d, v17.2d"
      ]
    },
    "cmppd xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmgt v16.2d, v17.2d, v16.2d"
      ]
    },
    "cmppd xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v16.2d, v17.2d, v16.2d"
      ]
    },
    "cmppd xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v16.2d, v17.2d",
        "fcmgt v1.2d, v17.2d, v16.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b"
      ]
    },
    "cmppd xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v16.2d, v17.2d",
        "mvn v16.16b, v16.16b"
      ]
    },
    "cmppd xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmgt v2.2d, v17.2d, v16.2d",
        "mvn v16.16b, v2.16b"
      ]
    },
    "cmppd xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v2.2d, v17.2d, v16.2d",
        "mvn v16.16b, v2.16b"
      ]
    },
    "cmppd xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xc2",
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v16.2d, v17.2d",
        "fcmgt v1.2d, v17.2d, v16.2d",
        "orr v16.16b, v0.16b, v1.16b"
      ]
    },
    "pinsrw xmm0, eax, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[0], w4"
      ]
    },
    "pinsrw xmm0, eax, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[1], w4"
      ]
    },
    "pinsrw xmm0, eax, 010b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[2], w4"
      ]
    },
    "pinsrw xmm0, eax, 011b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[3], w4"
      ]
    },
    "pinsrw xmm0, eax, 100b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[4], w4"
      ]
    },
    "pinsrw xmm0, eax, 101b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[5], w4"
      ]
    },
    "pinsrw xmm0, eax, 110b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[6], w4"
      ]
    },
    "pinsrw xmm0, eax, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "mov v16.h[7], w4"
      ]
    },
    "pinsrw xmm0, [rax], 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[0], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[1], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 010b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[2], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 011b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[3], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 100b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[4], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 101b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[5], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 110b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[6], [x4]"
      ]
    },
    "pinsrw xmm0, [rax], 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc4",
      "ExpectedArm64ASM": [
        "ld1 {v16.h}[7], [x4]"
      ]
    },
    "pextrw eax, xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "pextrw eax, xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[1]"
      ]
    },
    "pextrw eax, xmm0, 010b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[2]"
      ]
    },
    "pextrw eax, xmm0, 011b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[3]"
      ]
    },
    "pextrw eax, xmm0, 100b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[4]"
      ]
    },
    "pextrw eax, xmm0, 101b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[5]"
      ]
    },
    "pextrw eax, xmm0, 110b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[6]"
      ]
    },
    "pextrw eax, xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "pextrw [rax], xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[1], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 010b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[2], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 011b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[3], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 100b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[4], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 101b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[5], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 110b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[6], [x4]"
      ]
    },
    "pextrw [rax], xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc5",
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "shufpd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v16.2d, v17.2d"
      ]
    },
    "shufpd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ext v16.16b, v16.16b, v17.16b, #8"
      ]
    },
    "shufpd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "shufpd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v16.2d, v17.2d"
      ]
    },
    "shufpd xmm1, xmm0, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "zip1 v17.2d, v17.2d, v16.2d"
      ]
    },
    "shufpd xmm1, xmm0, 01b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "ext v17.16b, v17.16b, v16.16b, #8"
      ]
    },
    "shufpd xmm1, xmm0, 10b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "mov v17.d[1], v16.d[1]"
      ]
    },
    "shufpd xmm1, xmm0, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xc6",
      "ExpectedArm64ASM": [
        "zip2 v17.2d, v17.2d, v16.2d"
      ]
    },
    "addsubpd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xd0",
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v17.16b, v2.16b",
        "fadd v16.2d, v16.2d, v2.2d"
      ]
    },
    "psrlw xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xd1",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "ushl v16.8h, v16.8h, v0.8h"
      ]
    },
    "psrld xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xd2",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v16.4s, v0.4s"
      ]
    },
    "psrlq xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xd3",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v16.2d, v0.2d"
      ]
    },
    "paddq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xd4",
      "ExpectedArm64ASM": [
        "add v16.2d, v16.2d, v17.2d"
      ]
    },
    "pmullw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xd3",
      "ExpectedArm64ASM": [
        "mul v16.8h, v16.8h, v17.8h"
      ]
    },
    "pmovmskb eax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0x66 0x0f 0xd7",
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3296]",
        "cmlt v3.16b, v16.16b, #0",
        "and v2.16b, v3.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "psubusb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xd8",
      "ExpectedArm64ASM": [
        "uqsub v16.16b, v16.16b, v17.16b"
      ]
    },
    "psubusw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xd9",
      "ExpectedArm64ASM": [
        "uqsub v16.8h, v16.8h, v17.8h"
      ]
    },
    "pminub xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xda",
      "ExpectedArm64ASM": [
        "umin v16.16b, v16.16b, v17.16b"
      ]
    },
    "pand xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xdb",
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b"
      ]
    },
    "paddusb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xdc",
      "ExpectedArm64ASM": [
        "uqadd v16.16b, v16.16b, v17.16b"
      ]
    },
    "paddusw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xdd",
      "ExpectedArm64ASM": [
        "uqadd v16.8h, v16.8h, v17.8h"
      ]
    },
    "pmaxub xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xde",
      "ExpectedArm64ASM": [
        "umax v16.16b, v16.16b, v17.16b"
      ]
    },
    "pandn xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xdf",
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b"
      ]
    },
    "pavgb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe0",
      "ExpectedArm64ASM": [
        "urhadd v16.16b, v16.16b, v17.16b"
      ]
    },
    "psraw xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xe1",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "neg v0.8h, v0.8h",
        "sshl v16.8h, v16.8h, v0.8h"
      ]
    },
    "psrad xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xe2",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v16.4s, v0.4s"
      ]
    },
    "pavgw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe3",
      "ExpectedArm64ASM": [
        "urhadd v16.8h, v16.8h, v17.8h"
      ]
    },
    "pmulhuw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xe4",
      "ExpectedArm64ASM": [
        "umull2 v0.4s, v16.8h, v17.8h",
        "umull v16.4s, v16.4h, v17.4h",
        "uzp2 v16.8h, v16.8h, v0.8h"
      ]
    },
    "pmulhw xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xe5",
      "ExpectedArm64ASM": [
        "smull2 v0.4s, v16.8h, v17.8h",
        "smull v16.4s, v16.4h, v17.4h",
        "uzp2 v16.8h, v16.8h, v0.8h"
      ]
    },
    "cvttpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xe6",
      "ExpectedArm64ASM": [
        "frint32z v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d"
      ]
    },
    "movntdq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe7",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "psubsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe8",
      "ExpectedArm64ASM": [
        "sqsub v16.16b, v16.16b, v17.16b"
      ]
    },
    "psubsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe9",
      "ExpectedArm64ASM": [
        "sqsub v16.8h, v16.8h, v17.8h"
      ]
    },
    "pminsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xea",
      "ExpectedArm64ASM": [
        "smin v16.8h, v16.8h, v17.8h"
      ]
    },
    "por xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xeb",
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b"
      ]
    },
    "paddsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xec",
      "ExpectedArm64ASM": [
        "sqadd v16.16b, v16.16b, v17.16b"
      ]
    },
    "paddsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xed",
      "ExpectedArm64ASM": [
        "sqadd v16.8h, v16.8h, v17.8h"
      ]
    },
    "pmaxsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xee",
      "ExpectedArm64ASM": [
        "smax v16.8h, v16.8h, v17.8h"
      ]
    },
    "pxor xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xef",
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "pxor xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xef",
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "psllw xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xf1",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.8h, v0.h[0]",
        "ushl v16.8h, v16.8h, v0.8h"
      ]
    },
    "pslld xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xf2",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.4s, v0.s[0]",
        "ushl v16.4s, v16.4s, v0.4s"
      ]
    },
    "psllq xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xf3",
      "ExpectedArm64ASM": [
        "uqshl d0, d17, #57",
        "ushr d0, d0, #57",
        "dup v0.2d, v0.d[0]",
        "ushl v16.2d, v16.2d, v0.2d"
      ]
    },
    "pmuludq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xf4",
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v16.4s, v16.4s",
        "uzp1 v3.4s, v17.4s, v17.4s",
        "umull v16.2d, v2.2s, v3.2s"
      ]
    },
    "pmaddwd xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0x66 0x0f 0xf5",
      "ExpectedArm64ASM": [
        "smull v2.4s, v16.4h, v17.4h",
        "smull2 v3.4s, v16.8h, v17.8h",
        "addp v16.4s, v2.4s, v3.4s"
      ]
    },
    "psadbw xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x66 0x0f 0xf6",
      "ExpectedArm64ASM": [
        "uabdl v2.8h, v16.8b, v17.8b",
        "uabdl2 v3.8h, v16.16b, v17.16b",
        "addv h2, v2.8h",
        "addv h3, v3.8h",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "maskmovdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": "0x66 0x0f 0xf7",
      "ExpectedArm64ASM": [
        "cmlt v2.16b, v17.16b, #0",
        "ldr q3, [x11]",
        "bsl v2.16b, v16.16b, v3.16b",
        "str q2, [x11]"
      ]
    },
    "psubb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xf8",
      "ExpectedArm64ASM": [
        "sub v16.16b, v16.16b, v17.16b"
      ]
    },
    "psubw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xf9",
      "ExpectedArm64ASM": [
        "sub v16.8h, v16.8h, v17.8h"
      ]
    },
    "psubd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xfa",
      "ExpectedArm64ASM": [
        "sub v16.4s, v16.4s, v17.4s"
      ]
    },
    "psubq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xfb",
      "ExpectedArm64ASM": [
        "sub v16.2d, v16.2d, v17.2d"
      ]
    },
    "paddb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xfc",
      "ExpectedArm64ASM": [
        "add v16.16b, v16.16b, v17.16b"
      ]
    },
    "paddw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xfd",
      "ExpectedArm64ASM": [
        "add v16.8h, v16.8h, v17.8h"
      ]
    },
    "paddd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xfe",
      "ExpectedArm64ASM": [
        "add v16.4s, v16.4s, v17.4s"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_OpSize_FCMA.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FCMA"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "addsubpd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xd0",
      "ExpectedArm64ASM": [
        "ext v2.16b, v17.16b, v17.16b, #8",
        "fcadd v16.2d, v16.2d, v2.2d, #90"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_OpSize_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "psrlw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xd1",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsr z16.h, p6/m, z16.h, z0.d"
      ]
    },
    "psrld xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xd2",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsr z16.s, p6/m, z16.s, z0.d"
      ]
    },
    "psrlq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xd3",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsr z16.d, p6/m, z16.d, z0.d"
      ]
    },
    "psraw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xe1",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "asr z16.h, p6/m, z16.h, z0.d"
      ]
    },
    "psrad xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xe2",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "asr z16.s, p6/m, z16.s, z0.d"
      ]
    },
    "pmulhuw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe4",
      "ExpectedArm64ASM": [
        "umulh z16.h, z16.h, z17.h"
      ]
    },
    "pmulhw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0x66 0x0f 0xe5",
      "ExpectedArm64ASM": [
        "smulh z16.h, z16.h, z17.h"
      ]
    },
    "psllw xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xf1",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsl z16.h, p6/m, z16.h, z0.d"
      ]
    },
    "pslld xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xf2",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsl z16.s, p6/m, z16.s, z0.d"
      ]
    },
    "psllq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0x66 0x0f 0xf3",
      "ExpectedArm64ASM": [
        "mov z0.d, d17",
        "lsl z16.d, p6/m, z16.d, z0.d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_OpSize_SVE256.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "pmulhuw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "SVE-256bit changes behaviour slightly",
        "0x66 0x0f 0xe4"
      ],
      "ExpectedArm64ASM": [
        "umulh z16.h, p6/m, z16.h, z17.h"
      ]
    },
    "pmulhw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "SVE-256bit changes behaviour slightly",
        "0x66 0x0f 0xe5"
      ],
      "ExpectedArm64ASM": [
        "smulh z16.h, p6/m, z16.h, z17.h"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_REP.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "FRINTTS",
      "CSSC"
    ]
  },
  "Instructions": {
    "movss xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x10",
      "ExpectedArm64ASM": [
        "mov v16.s[0], v17.s[0]"
      ]
    },
    "movss xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x10",
      "ExpectedArm64ASM": [
        "ldr s16, [x4]"
      ]
    },
    "movss [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x11",
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "movsldup xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x12",
      "ExpectedArm64ASM": [
        "trn1 v16.4s, v17.4s, v17.4s"
      ]
    },
    "movsldup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x12",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn1 v16.4s, v2.4s, v2.4s"
      ]
    },
    "movshdup xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x16",
      "ExpectedArm64ASM": [
        "trn2 v16.4s, v17.4s, v17.4s"
      ]
    },
    "movshdup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x16",
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn2 v16.4s, v2.4s, v2.4s"
      ]
    },
    "cvtsi2ss xmm0, eax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf s0, w4",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cvtsi2ss xmm0, dword [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "scvtf s0, s2",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cvtsi2ss xmm0, rax": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x2a",
      "ExpectedArm64ASM": [
        "scvtf s0, x4",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cvtsi2ss xmm0, qword [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x4]",
        "scvtf s0, x20",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "movntss [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x2b",
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "cvttss2si eax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "fcvtzs w20, s16",
        "mov w21, #0x80000000",
        "ldr s2, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s2, s16",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvttss2si eax, dword [rbx]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "fcvtzs w20, s2",
        "mov w21, #0x80000000",
        "ldr s3, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvttss2si rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "fcvtzs x20, s16",
        "mov x21, #0x8000000000000000",
        "ldr s2, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s2, s16",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvttss2si rax, dword [rbx]": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "fcvtzs x20, s2",
        "mov x21, #0x8000000000000000",
        "ldr s3, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvtss2si eax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frinti s2, s16",
        "fcvtzs w20, s2",
        "mov w21, #0x80000000",
        "ldr s3, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvtss2si eax, dword [rbx]": {
      "ExpectedInstructionCount": 9,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frinti s2, s2",
        "fcvtzs w20, s2",
        "mov w21, #0x80000000",
        "ldr s3, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvtss2si rax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frinti s2, s16",
        "fcvtzs x20, s2",
        "mov x21, #0x8000000000000000",
        "ldr s3, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "cvtss2si rax, dword [rbx]": {
      "ExpectedInstructionCount": 9,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frinti s2, s2",
        "fcvtzs x20, s2",
        "mov x21, #0x8000000000000000",
        "ldr s3, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "sqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x51",
      "ExpectedArm64ASM": [
        "fsqrt s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "rsqrtss xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf3 0x0f 0x52"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s17",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "rcpss xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0x53"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "addss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd s0, s16, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "mulss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul s0, s16, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cvtss2sd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "fcvt d0, s17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtss2sd xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0x5a",
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d0, s2",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvttps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": "0xf3 0x0f 0x5b",
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3520]",
        "ldr q3, [x28, #3424]",
        "fcvtzs v4.4s, v17.4s",
        "fcmgt v3.4s, v3.4s, v17.4s",
        "mov v16.16b, v3.16b",
        "bsl v16.16b, v4.16b, v2.16b"
      ]
    },
    "subss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub s0, s16, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "minss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "fcmp s16, s17",
        "fcsel s0, s16, s17, mi",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20"
      ]
    },
    "divss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv s0, s16, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "maxss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "fcmp s16, s17",
        "fcsel s0, s16, s17, gt",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20"
      ]
    },
    "movdqu xmm0, xmm0": {
      "ExpectedInstructionCount": 0,
      "Comment": "0xf3 0x0f 0x6f",
      "ExpectedArm64ASM": []
    },
    "movdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x6f",
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "movdqu xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x6f",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "pshufhw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast upper-half element 0",
        "0xf3 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[4]",
        "trn1 v16.2d, v17.2d, v2.2d"
      ]
    },
    "pshufhw xmm0, xmm1, 11100100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Identity copy",
        "0xf3 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "pshufhw xmm0, xmm1, 01010000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Upper elements Self-zip",
        "0xf3 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2672]",
        "ldr q2, [x0, #1280]",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pshufhw xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Broadcast element 0 in the upper-half",
        "Upper-half Element 0 gets turned in to element 1",
        "0xf3 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2672]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pshufhw xmm0, xmm1, 0xff": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast upper-half Element 3",
        "0xf3 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[7]",
        "trn1 v16.2d, v17.2d, v2.2d"
      ]
    },
    "movq xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "mov v16.8b, v16.8b"
      ]
    },
    "movq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "mov v16.8b, v17.8b"
      ]
    },
    "movq xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x7e",
      "ExpectedArm64ASM": [
        "ldr d16, [x4]"
      ]
    },
    "movdqu [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf3 0x0f 0x7f",
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "popcnt ax, bx": {
      "ExpectedInstructionCount": 9,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "uxth w20, w6",
        "fmov s0, w20",
        "cnt v0.8b, v0.8b",
        "addp v0.8b, v0.8b, v0.8b",
        "umov w20, v0.b[0]",
        "bfxil x4, x20, #0, #16",
        "mov w27, #0x0",
        "cmp w20, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "fmov s0, w6",
        "cnt v0.8b, v0.8b",
        "addv b0, v0.8b",
        "umov w4, v0.b[0]",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "popcnt rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xb8",
      "ExpectedArm64ASM": [
        "fmov d0, x6",
        "cnt v0.8b, v0.8b",
        "addv b0, v0.8b",
        "umov w4, v0.b[0]",
        "mov w27, #0x0",
        "cmp w4, #0x0 (0)",
        "mov w26, #0x1"
      ]
    },
    "tzcnt ax, bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w20, w6",
        "orr w20, w20, #0x8000",
        "clz w20, w20",
        "bfxil x4, x20, #0, #16",
        "cmn wzr, w20, lsl #16",
        "eor x20, x20, #0x10",
        "ubfx x20, x20, #4, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "tzcnt eax, ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit w4, w6",
        "clz w4, w4",
        "cmp w4, #0x0 (0)",
        "eor x20, x4, #0x20",
        "ubfx x20, x20, #5, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "tzcnt rax, rbx": {
      "ExpectedInstructionCount": 8,
      "Comment": "0xf3 0x0f 0xbc",
      "ExpectedArm64ASM": [
        "rbit x4, x6",
        "clz x4, x4",
        "cmp x4, #0x0 (0)",
        "eor x20, x4, #0x40",
        "ubfx x20, x20, #6, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lzcnt ax, bx": {
      "ExpectedInstructionCount": 10,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "lsl w20, w6, #16",
        "orr w20, w20, #0x8000",
        "clz w20, w20",
        "bfxil x4, x20, #0, #16",
        "cmn wzr, w20, lsl #16",
        "eor x20, x20, #0x10",
        "ubfx x20, x20, #4, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lzcnt eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "clz w4, w6",
        "cmp w4, #0x0 (0)",
        "eor x20, x4, #0x20",
        "ubfx x20, x20, #5, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "lzcnt rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": "0xf3 0x0f 0xbd",
      "ExpectedArm64ASM": [
        "clz x4, x6",
        "cmp x4, #0x0 (0)",
        "eor x20, x4, #0x40",
        "ubfx x20, x20, #6, #1",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "cmpss xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s0, s17, s16",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s0, s17, s16",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s17, s16",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq s0, s17, s16",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s17, s16",
        "mvn v2.16b, v2.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "cmpss xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf3 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge s0, s16, s17",
        "fcmgt s1, s17, s16",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "movq2dq xmm0, mm0": {
      "ExpectedInstructionCount": 4,
      "Comment": "0xf3 0x0f 0xd6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d16, [x28, #1056]"
      ]
    },
    "cvtdq2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0xe6",
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "scvtf v16.2d, v2.2d"
      ]
    },
    "cvtdq2pd xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0xe6",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "sxtl v2.2d, v2.2s",
        "scvtf v16.2d, v2.2d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_REPNE.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "FCMA",
      "AFP"
    ]
  },
  "Instructions": {
    "movsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x10",
      "ExpectedArm64ASM": [
        "mov v16.d[0], v17.d[0]"
      ]
    },
    "movsd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x10",
      "ExpectedArm64ASM": [
        "ldr d16, [x4]"
      ]
    },
    "movsd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x11",
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "movddup xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x12",
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]"
      ]
    },
    "movddup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x12",
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "dup v16.2d, v2.d[0]"
      ]
    },
    "cvtsi2sd xmm0, eax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d0, w4",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtsi2sd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d0, w20",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtsi2sd xmm0, rax": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "scvtf d0, x4",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtsi2sd xmm0, qword [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0x2a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "scvtf d0, d2",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "movntsd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x2b",
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "cvttsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "frint32z d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "cvttsd2si eax, qword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr d2, [x6]",
        "frint32z d2, d2",
        "fcvtzs w4, d2"
      ]
    },
    "cvttsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "frint64z d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "cvttsd2si rax, qword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr d2, [x6]",
        "frint64z d2, d2",
        "fcvtzs x4, d2"
      ]
    },
    "cvtsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frint32x d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "cvtsd2si eax, qword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr d2, [x6]",
        "frint32x d2, d2",
        "fcvtzs w4, d2"
      ]
    },
    "cvtsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frint64x d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "cvtsd2si rax, qword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr d2, [x6]",
        "frint64x d2, d2",
        "fcvtzs x4, d2"
      ]
    },
    "sqrtsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x51"
      ],
      "ExpectedArm64ASM": [
        "fsqrt d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "addsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x58"
      ],
      "ExpectedArm64ASM": [
        "fadd d0, d16, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "mulsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x59"
      ],
      "ExpectedArm64ASM": [
        "fmul d0, d16, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cvtsd2ss xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "fcvt s0, d17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "cvtsd2ss xmm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0x5a"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "fcvt s0, d2",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "subsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x5c"
      ],
      "ExpectedArm64ASM": [
        "fsub d0, d16, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "minsd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf2 0x0f 0x5d"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "fcmp d16, d17",
        "fcsel d0, d16, d17, mi",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20"
      ]
    },
    "divsd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0x5e"
      ],
      "ExpectedArm64ASM": [
        "fdiv d0, d16, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "maxsd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf2 0x0f 0x5f"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "fcmp d16, d17",
        "fcsel d0, d16, d17, gt",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20"
      ]
    },
    "pshuflw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast element 0",
        "0xf2 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[0]",
        "trn2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "pshuflw xmm0, xmm1, 11100100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Identity copy",
        "0xf2 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "pshuflw xmm0, xmm1, 01010000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Lower elements Self-zip",
        "0xf2 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2664]",
        "ldr q2, [x0, #1280]",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pshuflw xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Broadcast first element in to Elements 1,2,3",
        "Element 0 gets turned in to element 1",
        "0xf2 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2664]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "pshuflw xmm0, xmm1, 0xff": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Broadcast Element 3",
        "0xf2 0x0f 0x70"
      ],
      "ExpectedArm64ASM": [
        "dup v2.8h, v17.h[3]",
        "trn2 v16.2d, v2.2d, v17.2d"
      ]
    },
    "insertq xmm0, xmm1, 0, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "SSE4a",
        "0xf2 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "fmov d2, x20",
        "and v3.8b, v17.8b, v2.8b",
        "mvn v2.16b, v2.16b",
        "and v2.8b, v16.8b, v2.8b",
        "orr v16.8b, v2.8b, v3.8b"
      ]
    },
    "insertq xmm0, xmm1, 64, 0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "SSE4a",
        "0xf2 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "fmov d2, x20",
        "and v3.8b, v17.8b, v2.8b",
        "mvn v2.16b, v2.16b",
        "and v2.8b, v16.8b, v2.8b",
        "orr v16.8b, v2.8b, v3.8b"
      ]
    },
    "insertq xmm0, xmm1, 32, 32": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "SSE4a",
        "0xf2 0x0f 0x78"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0xffffffff",
        "fmov d2, x20",
        "and v3.8b, v17.8b, v2.8b",
        "shl v3.2d, v3.2d, #32",
        "shl v2.2d, v2.2d, #32",
        "mvn v2.16b, v2.16b",
        "and v2.8b, v16.8b, v2.8b",
        "orr v16.8b, v2.8b, v3.8b"
      ]
    },
    "insertq xmm0, xmm1": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "SSE4a",
        "0xf2 0x0f 0x79"
      ],
      "ExpectedArm64ASM": [
        "dup v2.2d, v17.d[1]",
        "mov w0, #0x3f",
        "dup v3.2d, x0",
        "and v4.8b, v2.8b, v3.8b",
        "ushr v2.2d, v2.2d, #8",
        "and v2.8b, v2.8b, v3.8b",
        "mov x20, v4.d[0]",
        "mrs x21, nzcv",
        "mov x0, #0xffffffffffffffff",
        "cmp x20, #0x0 (0)",
        "lsl x1, x0, x20",
        "csinv x20, x0, x1, eq",
        "fmov d3, x20",
        "and v4.16b, v17.16b, v3.16b",
        "ushl v4.2d, v4.2d, v2.2d",
        "ushl v3.2d, v3.2d, v2.2d",
        "mvn v2.16b, v3.16b",
        "and v2.8b, v16.8b, v2.8b",
        "orr v16.8b, v2.8b, v4.8b",
        "msr nzcv, x21"
      ]
    },
    "haddps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0x7c",
      "ExpectedArm64ASM": [
        "faddp v16.4s, v16.4s, v17.4s"
      ]
    },
    "hsubps xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0x7d",
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v16.4s, v17.4s",
        "uzp2 v3.4s, v16.4s, v17.4s",
        "fsub v16.4s, v2.4s, v3.4s"
      ]
    },
    "cmpsd xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d0, d17, d16",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d0, d17, d16",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d17, d16",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 4": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmeq d0, d17, d16",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 5": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 6": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d17, d16",
        "mvn v2.16b, v2.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "cmpsd xmm0, xmm1, 7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xf2 0x0f 0xc2"
      ],
      "ExpectedArm64ASM": [
        "fcmge d0, d16, d17",
        "fcmgt d1, d17, d16",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "addsubps xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0xd0",
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v17.16b, v2.16b",
        "fadd v16.4s, v16.4s, v2.4s"
      ]
    },
    "movdq2q mm0, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0xf2 0x0f 0xd6",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "str d16, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "cvtpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0xe6",
      "ExpectedArm64ASM": [
        "frint32x v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d"
      ]
    },
    "lddqu xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": "0xf2 0x0f 0xf0",
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_REPNE_FCMA.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FCMA"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "addsubps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf2 0x0f 0xd0",
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v17.4s",
        "fcadd v16.4s, v16.4s, v2.4s, #90"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_REPNE_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "FCMA",
      "AFP"
    ]
  },
  "Instructions": {
    "cvtpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf2 0x0f 0xe6",
      "ExpectedArm64ASM": [
        "frint32x v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_REP_FRINTTS.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "cvttss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "frint32z s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "cvttss2si eax, dword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frint32z s2, s2",
        "fcvtzs w4, s2"
      ]
    },
    "cvttss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "frint64z s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "cvttss2si rax, dword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0x2c",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frint64z s2, s2",
        "fcvtzs x4, s2"
      ]
    },
    "cvtss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frint32x s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "cvtss2si eax, dword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frint32x s2, s2",
        "fcvtzs w4, s2"
      ]
    },
    "cvtss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "frint64x s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "cvtss2si rax, dword [rbx]": {
      "ExpectedInstructionCount": 3,
      "Comment": "0xf3 0x0f 0x2d",
      "ExpectedArm64ASM": [
        "ldr s2, [x6]",
        "frint64x s2, s2",
        "fcvtzs x4, s2"
      ]
    },
    "cvttps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": "0xf3 0x0f 0x5b",
      "ExpectedArm64ASM": [
        "frint32z v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/Secondary_SVE128.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "SVE256",
      "AFP"
    ]
  },
  "Instructions": {
    "movmskps eax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x50",
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "index z3.s, #0, #1",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "movmskps rax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": "0x0f 0x50",
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "index z3.s, #0, #1",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "psrlw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsr z2.h, p6/m, z2.h, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrld mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsr z2.s, p6/m, z2.s, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrlq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xd3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsr z2.d, p6/m, z2.d, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psraw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "asr z2.h, p6/m, z2.h, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psrad mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xe2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "asr z2.s, p6/m, z2.s, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllw mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf1",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsl z2.h, p6/m, z2.h, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "pslld mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf2",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsl z2.s, p6/m, z2.s, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    },
    "psllq mm0, mm1": {
      "ExpectedInstructionCount": 8,
      "Comment": "0x0f 0xf3",
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1051]",
        "mov w20, #0xffff",
        "strb w20, [x28, #1202]",
        "ldr d2, [x28, #1056]",
        "ldr d3, [x28, #1072]",
        "lsl z2.d, p6/m, z2.d, z3.d",
        "str d2, [x28, #1056]",
        "strh w20, [x28, #1064]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map1.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256"
    ],
    "DisabledHostFeatures": [
      "FCMA",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2",
      "FRINTTS"
    ]
  },
  "Instructions": {
    "vmovups xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b"
      ]
    },
    "vmovups xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "SVE 128-bit load already zero's the upper bits",
        "Map 1 0b00 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovups ymm0, ymm0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "Spurious moves",
        "Map 1 0b00 0x10 256-bit"
      ],
      "ExpectedArm64ASM": []
    },
    "vmovups ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovupd xmm0, xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b"
      ]
    },
    "vmovupd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "SVE 128-bit load already zero's the upper bits",
        "Map 1 0b01 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovupd ymm0, ymm0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "Spurious moves",
        "Map 1 0b01 0x10 256-bit"
      ],
      "ExpectedArm64ASM": []
    },
    "vmovupd ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x10 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovss xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "32-bit vector load already zero's the upper bits",
        "Map 1 0b10 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s16, [x4]"
      ]
    },
    "vmovss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b10 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]"
      ]
    },
    "vmovsd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "32-bit vector load already zero's the upper bits",
        "Map 1 0b11 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d16, [x4]"
      ]
    },
    "vmovsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b11 0x10 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v18.d[0]"
      ]
    },
    "vmovups [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovups [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x11 256-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vmovupd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovupd [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x11 256-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vmovss [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str s16, [x4]"
      ]
    },
    "db 0xc5, 0xf2, 0x11, 0xc2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "vmovss xmm2, xmm1, xmm0",
        "Need to manually encode since nasm won't encode this",
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b10 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v18.16b, v17.16b",
        "mov v18.s[0], v16.s[0]"
      ]
    },
    "vmovsd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b11 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "db 0xc5, 0xf3, 0x11, 0xc2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "vmovsd xmm2, xmm1, xmm0",
        "Need to manually encode since nasm won't encode this",
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b11 0x11 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v18.16b, v17.16b",
        "mov v18.d[0], v16.d[0]"
      ]
    },
    "vmovlps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b00 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "vmovlpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Insert in to first element could be more optimal, which is the common case.",
        "Map 1 0b01 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "vmovsldup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn1 v16.4s, v2.4s, v2.4s"
      ]
    },
    "vmovsldup ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Could potentially be considered optimal.",
        "Ideally the load happens directly in the destination register",
        "This would lower memory pressure of this instruction by 1 temporary",
        "But the more optimal implementation is still the same number of instructions",
        "Map 1 0b10 0x12 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "trn1 z16.s, z2.s, z2.s"
      ]
    },
    "vmovddup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x12 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "dup v16.2d, v2.d[0]"
      ]
    },
    "vmovddup ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Could potentially be considered optimal.",
        "Ideally the load happens directly in the destination register",
        "This would lower memory pressure of this instruction by 1 temporary",
        "But the more optimal implementation is still the same number of instructions",
        "Map 1 0b11 0x12 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "trn1 z16.d, z2.d, z2.d"
      ]
    },
    "vmovlps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vmovlpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vunpcklps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.4s, v17.4s, v2.4s"
      ]
    },
    "vunpcklps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b00 0x14 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "zip1 z3.s, z17.s, z2.s",
        "zip2 z2.s, z17.s, z2.s",
        "mov z1.q, q2",
        "mov z16.d, z3.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vunpcklpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 v16.2d, v17.2d, v2.2d"
      ]
    },
    "vunpcklpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0x14 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "zip1 z3.d, z17.d, z2.d",
        "zip2 z2.d, z17.d, z2.d",
        "mov z1.q, q2",
        "mov z16.d, z3.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vunpckhps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.4s, v17.4s, v2.4s"
      ]
    },
    "vunpckhps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b00 0x15 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "zip1 z3.s, z17.s, z2.s",
        "zip2 z2.s, z17.s, z2.s",
        "mov z1.q, z3.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vunpckhpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip2 v16.2d, v17.2d, v2.2d"
      ]
    },
    "vunpckhpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x15 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "zip1 z3.d, z17.d, z2.d",
        "zip2 z2.d, z17.d, z2.d",
        "mov z1.q, z3.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vmovhps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.8b, v17.8b",
        "ldr d3, [x4]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v3.d[0]"
      ]
    },
    "vmovhpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.8b, v17.8b",
        "ldr d3, [x4]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v3.d[0]"
      ]
    },
    "vmovlhps xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.8b, v17.8b",
        "mov v3.8b, v17.8b",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v3.d[0]"
      ]
    },
    "vmovshdup xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "trn2 v16.4s, v2.4s, v2.4s"
      ]
    },
    "vmovshdup ymm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "trn2 z16.s, z2.s, z2.s"
      ]
    },
    "vmovhps [rax], xmm0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.d[0], v16.d[1]",
        "str d2, [x4]"
      ]
    },
    "vmovhpd [rax], xmm0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.d[0], v16.d[1]",
        "str d2, [x4]"
      ]
    },
    "vmovmskps rax, xmm0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x50 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v2.4s, v16.4s, #31",
        "index z3.s, #0, #1",
        "ushl v2.4s, v2.4s, v3.4s",
        "addv s2, v2.4s",
        "mov w4, v2.s[0]"
      ]
    },
    "vmovmskps rax, ymm0": {
      "ExpectedInstructionCount": 32,
      "Comment": [
        "Map 1 0b00 0x50 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, v16.s[0]",
        "lsr w20, w20, #31",
        "mov w21, v16.s[1]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #1",
        "mov w21, v16.s[2]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #2",
        "mov w21, v16.s[3]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #3",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov w21, v16.s[0]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #4",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov w21, v16.s[1]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #5",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov w21, v16.s[2]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #6",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov w21, v16.s[3]",
        "lsr w21, w21, #31",
        "orr x20, x20, x21, lsl #7",
        "mov w4, w20"
      ]
    },
    "vmovmskpd rax, xmm0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x50 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp2 v2.4s, v16.4s, v16.4s",
        "mov x20, v2.d[0]",
        "bfi x20, x20, #31, #32",
        "lsr x4, x20, #62"
      ]
    },
    "vmovmskpd rax, ymm0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 1 0b01 0x50 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, v16.d[0]",
        "lsr x20, x20, #63",
        "mov x21, v16.d[1]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #1",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov x21, v16.d[0]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #2",
        "not p0.b, p7/z, p6.b",
        "compact z0.d, p0, z16.d",
        "mov x21, v16.d[1]",
        "lsr x21, x21, #63",
        "orr x20, x20, x21, lsl #3",
        "mov w4, w20"
      ]
    },
    "vsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt v16.4s, v17.4s"
      ]
    },
    "vsqrtps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x51 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt z16.s, p7/m, z17.s"
      ]
    },
    "vsqrtpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt v16.2d, v17.2d"
      ]
    },
    "vsqrtpd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x51 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt z16.d, p7/m, z17.d"
      ]
    },
    "vsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt s0, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vsqrtsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x51 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsqrt d0, d18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vrsqrtps xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fsqrt v1.4s, v17.4s",
        "fdiv v16.4s, v0.4s, v1.4s"
      ]
    },
    "vrsqrtps ymm0, ymm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x52 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fsqrt z0.s, p7/m, z17.s",
        "fmov z16.s, #0x70 (1.0000)",
        "fdiv z16.s, p7/m, z16.s, z0.s"
      ]
    },
    "vrsqrtss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0x52 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fsqrt s1, s18",
        "fdiv s0, s0, s1",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vrcpps xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov v0.4s, #0x70 (1.0000)",
        "fdiv v16.4s, v0.4s, v17.4s"
      ]
    },
    "vrcpps ymm0, ymm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x53 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov z0.s, #0x70 (1.0000)",
        "fdiv z0.s, p7/m, z0.s, z17.s",
        "mov z16.d, z0.d"
      ]
    },
    "vrcpss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0x53 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmov s0, #0x70 (1.0000)",
        "fdiv s0, s0, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vandps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x54 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b"
      ]
    },
    "vandps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x54 256-bit"
      ],
      "ExpectedArm64ASM": [
        "and z16.d, z16.d, z17.d"
      ]
    },
    "vandpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x54 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v16.16b, v17.16b"
      ]
    },
    "vandpd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x54 256-bit"
      ],
      "ExpectedArm64ASM": [
        "and z16.d, z16.d, z17.d"
      ]
    },
    "vandnps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x55 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b"
      ]
    },
    "vandnps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x55 256-bit"
      ],
      "ExpectedArm64ASM": [
        "bic z16.d, z17.d, z16.d"
      ]
    },
    "vandnpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x55 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v17.16b, v16.16b"
      ]
    },
    "vandnpd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x55 256-bit"
      ],
      "ExpectedArm64ASM": [
        "bic z16.d, z17.d, z16.d"
      ]
    },
    "vorps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x56 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b"
      ]
    },
    "vorps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x56 256-bit"
      ],
      "ExpectedArm64ASM": [
        "orr z16.d, z16.d, z17.d"
      ]
    },
    "vorpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x56 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v16.16b, v17.16b"
      ]
    },
    "vorpd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x56 256-bit"
      ],
      "ExpectedArm64ASM": [
        "orr z16.d, z16.d, z17.d"
      ]
    },
    "vxorps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "vxorps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "eor z16.d, z16.d, z17.d"
      ]
    },
    "vxorpd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v16.16b, v17.16b"
      ]
    },
    "vxorpd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "eor z16.d, z16.d, z17.d"
      ]
    },
    "vxorps xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b00 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vxorps ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b00 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vxorpd xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0x57 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vxorpd ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0x57 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpunpcklbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x60 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpunpcklbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x60 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.b, z17.b, z18.b",
        "zip2 z3.b, z17.b, z18.b",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpunpcklwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x61 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpunpcklwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x61 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.h, z17.h, z18.h",
        "zip2 z3.h, z17.h, z18.h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpunpckldq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x62 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpunpckldq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x62 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.s, z17.s, z18.s",
        "zip2 z3.s, z17.s, z18.s",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpacksswb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x63 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtn v16.8b, v17.8h",
        "sqxtn2 v16.16b, v18.8h"
      ]
    },
    "vpacksswb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 1 0b01 0x63 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtnb z1.b, z18.h",
        "uzp1 z1.b, z1.b, z1.b",
        "sqxtnb z2.b, z17.h",
        "uzp1 z2.b, z2.b, z2.b",
        "splice z2.b, p6, z2.b, z1.b",
        "mov z1.d, z2.d[1]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[2]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpcmpgtb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x64 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpcmpgtb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x64 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpgt p0.b, p7/z, z17.b, z18.b",
        "not z0.b, p0/m, z17.b",
        "movprfx z16.b, p0/z, z17.b",
        "orr z16.b, p0/m, z16.b, z0.b",
        "msr nzcv, x0"
      ]
    },
    "vpcmpgtw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x65 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpcmpgtw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x65 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpgt p0.h, p7/z, z17.h, z18.h",
        "not z0.h, p0/m, z17.h",
        "movprfx z16.h, p0/z, z17.h",
        "orr z16.h, p0/m, z16.h, z0.h",
        "msr nzcv, x0"
      ]
    },
    "vpcmpgtd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x66 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpcmpgtd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x66 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpgt p0.s, p7/z, z17.s, z18.s",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s",
        "msr nzcv, x0"
      ]
    },
    "vpackuswb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x67 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtun v16.8b, v17.8h",
        "sqxtun2 v16.16b, v18.8h"
      ]
    },
    "vpackuswb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 1 0b01 0x67 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtunb z1.b, z18.h",
        "uzp1 z1.b, z1.b, z1.b",
        "sqxtunb z2.b, z17.h",
        "uzp1 z2.b, z2.b, z2.b",
        "splice z2.b, p6, z2.b, z1.b",
        "mov z1.d, z2.d[1]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[2]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpshufd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v17.s[0]",
        "mov v2.s[2], v17.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[0]"
      ]
    },
    "vpshufd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.s[0], v17.s[1]",
        "mov v2.s[1], v17.s[0]",
        "mov v2.s[2], v17.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[0]"
      ]
    },
    "vpshufd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.s[0], v17.s[2]",
        "mov v2.s[1], v17.s[0]",
        "mov v2.s[2], v17.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[0]"
      ]
    },
    "vpshufd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.s[0], v17.s[3]",
        "mov v2.s[1], v17.s[0]",
        "mov v2.s[2], v17.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[0]"
      ]
    },
    "vpshufd ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, s17",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpshufd ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[1]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpshufd ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[2]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpshufd ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b01 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[3]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpshufhw xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[4], v17.h[4]",
        "mov v2.h[5], v17.h[4]",
        "mov v2.h[6], v17.h[4]",
        "mov v16.16b, v2.16b",
        "mov v16.h[7], v17.h[4]"
      ]
    },
    "vpshufhw xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[4], v17.h[5]",
        "mov v2.h[5], v17.h[4]",
        "mov v2.h[6], v17.h[4]",
        "mov v16.16b, v2.16b",
        "mov v16.h[7], v17.h[4]"
      ]
    },
    "vpshufhw xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[4], v17.h[6]",
        "mov v2.h[5], v17.h[4]",
        "mov v2.h[6], v17.h[4]",
        "mov v16.16b, v2.16b",
        "mov v16.h[7], v17.h[4]"
      ]
    },
    "vpshufhw xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[4], v17.h[7]",
        "mov v2.h[5], v17.h[4]",
        "mov v2.h[6], v17.h[4]",
        "mov v16.16b, v2.16b",
        "mov v16.h[7], v17.h[4]"
      ]
    },
    "vpshufhw ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[4]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #7",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshufhw ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[5]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[13]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #7",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshufhw ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[6]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[14]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #7",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshufhw ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b10 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[7]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[15]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #7",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshuflw xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[0], v17.h[0]",
        "mov v2.h[1], v17.h[0]",
        "mov v2.h[2], v17.h[0]",
        "mov v16.16b, v2.16b",
        "mov v16.h[3], v17.h[0]"
      ]
    },
    "vpshuflw xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[0], v17.h[1]",
        "mov v2.h[1], v17.h[0]",
        "mov v2.h[2], v17.h[0]",
        "mov v16.16b, v2.16b",
        "mov v16.h[3], v17.h[0]"
      ]
    },
    "vpshuflw xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[0], v17.h[2]",
        "mov v2.h[1], v17.h[0]",
        "mov v2.h[2], v17.h[0]",
        "mov v16.16b, v2.16b",
        "mov v16.h[3], v17.h[0]"
      ]
    },
    "vpshuflw xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x70 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "mov v2.h[0], v17.h[3]",
        "mov v2.h[1], v17.h[0]",
        "mov v2.h[2], v17.h[0]",
        "mov v16.16b, v2.16b",
        "mov v16.h[3], v17.h[0]"
      ]
    },
    "vpshuflw ymm0, ymm1, 00b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, h17",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-8",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #0",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-7",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #3",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshuflw ymm0, ymm1, 01b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[1]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-8",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[9]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #0",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-7",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #3",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshuflw ymm0, ymm1, 10b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[2]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-8",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[10]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #0",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-7",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #3",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpshuflw ymm0, ymm1, 11b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b11 0x70 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.h, z17.h[3]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-8",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[11]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #0",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-7",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, h17",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[8]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #3",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpcmpeqb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x74 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpcmpeqb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x74 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpeq p0.b, p7/z, z17.b, z18.b",
        "not z0.b, p0/m, z17.b",
        "movprfx z16.b, p0/z, z17.b",
        "orr z16.b, p0/m, z16.b, z0.b",
        "msr nzcv, x0"
      ]
    },
    "vpcmpeqw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x75 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpcmpeqw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x75 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpeq p0.h, p7/z, z17.h, z18.h",
        "not z0.h, p0/m, z17.h",
        "movprfx z16.h, p0/z, z17.h",
        "orr z16.h, p0/m, z16.h, z0.h",
        "msr nzcv, x0"
      ]
    },
    "vpcmpeqd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x76 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpcmpeqd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x76 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpeq p0.s, p7/z, z17.s, z18.s",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s",
        "msr nzcv, x0"
      ]
    },
    "vzeroupper": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Might need to revisit this if move renaming ends up slower than some other clearing",
        "Map 1 0b01 0x77 L=0"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "mov v17.16b, v17.16b",
        "mov v18.16b, v18.16b",
        "mov v19.16b, v19.16b",
        "mov v20.16b, v20.16b",
        "mov v21.16b, v21.16b",
        "mov v22.16b, v22.16b",
        "mov v23.16b, v23.16b",
        "mov v24.16b, v24.16b",
        "mov v25.16b, v25.16b",
        "mov v26.16b, v26.16b",
        "mov v27.16b, v27.16b",
        "mov v28.16b, v28.16b",
        "mov v29.16b, v29.16b",
        "mov v30.16b, v30.16b",
        "mov v31.16b, v31.16b"
      ]
    },
    "vzeroall": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 1 0b01 0x77 L=1"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0",
        "movi v17.2d, #0x0",
        "movi v18.2d, #0x0",
        "movi v19.2d, #0x0",
        "movi v20.2d, #0x0",
        "movi v21.2d, #0x0",
        "movi v22.2d, #0x0",
        "movi v23.2d, #0x0",
        "movi v24.2d, #0x0",
        "movi v25.2d, #0x0",
        "movi v26.2d, #0x0",
        "movi v27.2d, #0x0",
        "movi v28.2d, #0x0",
        "movi v29.2d, #0x0",
        "movi v30.2d, #0x0",
        "movi v31.2d, #0x0"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v17.4s, v18.4s"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x00": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq p0.s, p7/z, z17.s, z18.s",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v16.4s, v18.4s, v17.4s"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x01": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.s, p7/z, z18.s, z17.s",
        "not z0.s, p0/m, z18.s",
        "movprfx z16.s, p0/z, z18.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v16.4s, v18.4s, v17.4s"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x02": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge p0.s, p7/z, z18.s, z17.s",
        "not z0.s, p0/m, z18.s",
        "movprfx z16.s, p0/z, z18.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x03": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmuo p0.s, p7/z, z17.s, z18.s",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.4s, v17.4s, v18.4s",
        "mvn v16.16b, v16.16b"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmne p0.s, p7/z, z17.s, z18.s",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v2.4s, v18.4s, v17.4s",
        "mvn v16.16b, v2.16b"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x05": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.s, p7/z, z18.s, z17.s",
        "not z0.s, p0/m, z18.s",
        "movprfx z2.s, p0/z, z18.s",
        "orr z2.s, p0/m, z2.s, z0.s",
        "not z16.b, p7/m, z2.b"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v2.4s, v18.4s, v17.4s",
        "mvn v16.16b, v2.16b"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x06": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge p0.s, p7/z, z18.s, z17.s",
        "not z0.s, p0/m, z18.s",
        "movprfx z2.s, p0/z, z18.s",
        "orr z2.s, p0/m, z2.s, z0.s",
        "not z16.b, p7/m, z2.b"
      ]
    },
    "vcmpps xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.4s, v17.4s, v18.4s",
        "fcmgt v1.4s, v18.4s, v17.4s",
        "orr v16.16b, v0.16b, v1.16b"
      ]
    },
    "vcmpps ymm0, ymm1, ymm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmuo p0.s, p7/z, z17.s, z18.s",
        "not p0.b, p7/z, p0.b",
        "not z0.s, p0/m, z17.s",
        "movprfx z16.s, p0/z, z17.s",
        "orr z16.s, p0/m, z16.s, z0.s"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v17.2d, v18.2d"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x00": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq p0.d, p7/z, z17.d, z18.d",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v16.2d, v18.2d, v17.2d"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x01": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.d, p7/z, z18.d, z17.d",
        "not z0.d, p0/m, z18.d",
        "movprfx z16.d, p0/z, z18.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v16.2d, v18.2d, v17.2d"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x02": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge p0.d, p7/z, z18.d, z17.d",
        "not z0.d, p0/m, z18.d",
        "movprfx z16.d, p0/z, z18.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b",
        "mvn v16.16b, v16.16b"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x03": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmuo p0.d, p7/z, z17.d, z18.d",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmeq v16.2d, v17.2d, v18.2d",
        "mvn v16.16b, v16.16b"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmne p0.d, p7/z, z17.d, z18.d",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v2.2d, v18.2d, v17.2d",
        "mvn v16.16b, v2.16b"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x05": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.d, p7/z, z18.d, z17.d",
        "not z0.d, p0/m, z18.d",
        "movprfx z2.d, p0/z, z18.d",
        "orr z2.d, p0/m, z2.d, z0.d",
        "not z16.b, p7/m, z2.b"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v2.2d, v18.2d, v17.2d",
        "mvn v16.16b, v2.16b"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x06": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge p0.d, p7/z, z18.d, z17.d",
        "not z0.d, p0/m, z18.d",
        "movprfx z2.d, p0/z, z18.d",
        "orr z2.d, p0/m, z2.d, z0.d",
        "not z16.b, p7/m, z2.b"
      ]
    },
    "vcmppd xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge v0.2d, v17.2d, v18.2d",
        "fcmgt v1.2d, v18.2d, v17.2d",
        "orr v16.16b, v0.16b, v1.16b"
      ]
    },
    "vcmppd ymm0, ymm1, ymm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xC2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmuo p0.d, p7/z, z17.d, z18.d",
        "not p0.b, p7/z, p0.b",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s0, s18, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt s0, s18, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s18, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq s0, s18, s17",
        "mvn v0.8b, v0.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge s2, s18, s17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v2.s[0]"
      ]
    },
    "vcmpss xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b10 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge s0, s17, s18",
        "fcmgt s1, s18, s17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x00": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d0, d18, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x01": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmgt d0, d18, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x02": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d18, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x03": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x04": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmeq d0, d18, d17",
        "mvn v0.8b, v0.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x05": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x06": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmge d2, d18, d17",
        "mvn v2.16b, v2.16b",
        "mov v16.16b, v17.16b",
        "mov v16.d[0], v2.d[0]"
      ]
    },
    "vcmpsd xmm0, xmm1, xmm2, 0x07": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b11 0xC2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcmge d0, d17, d18",
        "fcmgt d1, d18, d17",
        "orr v0.8b, v0.8b, v1.8b",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vpinsrw xmm0, xmm0, eax, 000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.h[0], w4",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[0], w4"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[1], w4"
      ]
    },
    "vpinsrw xmm0, xmm1, eax, 111b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xC4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.h[7], w4"
      ]
    },
    "vpextrw eax, xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "vpextrw eax, xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[1]"
      ]
    },
    "vpextrw eax, xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "vpextrw [rax], xmm0, 000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[1], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 00b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v17.s[0]",
        "dup v3.4s, v18.s[0]",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 00b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, s17",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 01b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #16]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 01b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[1]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 10b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #32]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 10b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[2]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vshufps xmm0, xmm1, xmm2, 11b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2688]",
        "ldr q2, [x0, #48]",
        "tbl v16.16b, {v17.16b, v18.16b}, v2.16b"
      ]
    },
    "vshufps ymm0, ymm1, ymm2, 11b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 1 0b00 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, z17.s[3]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vshufpd xmm0, xmm1, xmm2, 0b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v17.2d, v18.2d"
      ]
    },
    "vshufpd ymm0, ymm1, ymm2, 0b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 1 0b01 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.d, d17",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vshufpd xmm0, xmm1, xmm2, 1b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xC6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v17.16b, v18.16b, #8"
      ]
    },
    "vshufpd ymm0, ymm1, ymm2, 1b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 1 0b01 0xC6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.d, z17.d[1]",
        "mov z2.d, z17.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vmovaps xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovaps ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovaps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovaps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vmovapd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovapd ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovapd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vmovapd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vmovaps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovaps [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vmovapd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovapd [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s0, w4",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcvtsi2ss xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf s0, x4",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, eax": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d0, w4",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcvtsi2sd xmm0, xmm1, rax": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x2A 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "scvtf d0, x4",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vmovntps [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntps [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p7, [x4]"
      ]
    },
    "vmovntpd [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x2B 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntpd [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x2B 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p7, [x4]"
      ]
    },
    "vcvttss2si eax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtzs w20, s16",
        "mov w21, #0x80000000",
        "ldr s2, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s2, s16",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvttss2si rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtzs x20, s16",
        "mov x21, #0x8000000000000000",
        "ldr s2, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s2, s16",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvttsd2si eax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtzs w20, d16",
        "mov w21, #0x80000000",
        "ldr d2, [x28, #3472]",
        "mrs x22, nzcv",
        "fcmp d2, d16",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvttsd2si rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtzs x20, d16",
        "mov x21, #0x8000000000000000",
        "ldr d2, [x28, #3504]",
        "mrs x22, nzcv",
        "fcmp d2, d16",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvtss2si eax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti s2, s16",
        "fcvtzs w20, s2",
        "mov w21, #0x80000000",
        "ldr s3, [x28, #3424]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvtss2si rax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti s2, s16",
        "fcvtzs x20, s2",
        "mov x21, #0x8000000000000000",
        "ldr s3, [x28, #3456]",
        "mrs x22, nzcv",
        "fcmp s3, s2",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvtsd2si eax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti d2, d16",
        "fcvtzs w20, d2",
        "mov w21, #0x80000000",
        "ldr d3, [x28, #3472]",
        "mrs x22, nzcv",
        "fcmp d3, d2",
        "csel w4, w20, w21, gt",
        "msr nzcv, x22"
      ]
    },
    "vcvtsd2si rax, xmm0": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti d2, d16",
        "fcvtzs x20, d2",
        "mov x21, #0x8000000000000000",
        "ldr d3, [x28, #3504]",
        "mrs x22, nzcv",
        "fcmp d3, d2",
        "csel x4, x20, x21, gt",
        "msr nzcv, x22"
      ]
    },
    "vucomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vucomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vcomiss xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp s16, s17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vcomisd xmm0, xmm1": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmp d16, d17",
        "cset x26, vc",
        "mov w27, #0x0",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "vaddps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd v16.4s, v17.4s, v18.4s"
      ]
    },
    "vaddps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd z16.s, z17.s, z18.s"
      ]
    },
    "vaddpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd v16.2d, v17.2d, v18.2d"
      ]
    },
    "vaddpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fadd z16.d, z17.d, z18.d"
      ]
    },
    "vaddss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fadd s0, s17, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vaddsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fadd d0, d17, d18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vmulps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v16.4s, v17.4s, v18.4s"
      ]
    },
    "vmulps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul z16.s, z17.s, z18.s"
      ]
    },
    "vmulpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v16.2d, v17.2d, v18.2d"
      ]
    },
    "vmulpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul z16.d, z17.d, z18.d"
      ]
    },
    "vmulss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul s0, s17, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vmulsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fmul d0, d17, d18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcvtps2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtl v2.2d, v17.2s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtpd2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "fcvtn v16.2s, v2.2d"
      ]
    },
    "vcvtpd2ps xmm0, yword [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z2.b}, p7/z, [x4]",
        "fcvtnt z2.s, p7/m, z2.d",
        "uzp2 z2.s, z2.s, z2.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtpd2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtn v16.2s, v17.2d"
      ]
    },
    "vcvtss2sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt d0, s18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vcvtsd2ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x5a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fcvt s0, d18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vcvtdq2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "scvtf v16.4s, v17.4s"
      ]
    },
    "vcvtdq2ps ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "scvtf z16.s, p7/m, z17.s"
      ]
    },
    "vcvtps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v2.4s, v17.4s",
        "ldr q3, [x28, #3520]",
        "ldr q4, [x28, #3424]",
        "fcvtzs v5.4s, v2.4s",
        "fcmgt v2.4s, v4.4s, v2.4s",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v5.16b, v3.16b"
      ]
    },
    "vcvtps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 1 0b01 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z2.s, p7/m, z17.s",
        "ldr x0, [x28, #2608]",
        "ld1b {z3.b}, p7/z, [x0]",
        "ldr x0, [x28, #2560]",
        "ld1b {z4.b}, p7/z, [x0]",
        "fcvtzs z5.s, p7/m, z2.s",
        "fcmgt p0.s, p7/z, z4.s, z2.s",
        "not z0.s, p0/m, z4.s",
        "movprfx z2.s, p0/z, z4.s",
        "orr z2.s, p0/m, z2.s, z0.s",
        "movprfx z0, z5",
        "bsl z0.d, z0.d, z3.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vcvttps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3520]",
        "ldr q3, [x28, #3424]",
        "fcvtzs v4.4s, v17.4s",
        "fcmgt v3.4s, v3.4s, v17.4s",
        "mov v16.16b, v3.16b",
        "bsl v16.16b, v4.16b, v2.16b"
      ]
    },
    "vcvttps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b10 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2608]",
        "ld1b {z2.b}, p7/z, [x0]",
        "ldr x0, [x28, #2560]",
        "ld1b {z3.b}, p7/z, [x0]",
        "fcvtzs z4.s, p7/m, z17.s",
        "fcmgt p0.s, p7/z, z3.s, z17.s",
        "not z0.s, p0/m, z3.s",
        "movprfx z3.s, p0/z, z3.s",
        "orr z3.s, p0/m, z3.s, z0.s",
        "movprfx z0, z4",
        "bsl z0.d, z0.d, z2.d, z3.d",
        "mov z16.d, z0.d"
      ]
    },
    "vsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub v16.4s, v17.4s, v18.4s"
      ]
    },
    "vsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub z16.s, z17.s, z18.s"
      ]
    },
    "vsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub v16.2d, v17.2d, v18.2d"
      ]
    },
    "vsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x5c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fsub z16.d, z17.d, z18.d"
      ]
    },
    "vsubss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub s0, s17, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vsubsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x5c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fsub d0, d17, d18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vminps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b"
      ]
    },
    "vminps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b00 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.s, p7/z, z18.s, z17.s",
        "not p0.b, p7/z, p0.b",
        "mov z0.d, z17.d",
        "mov z0.s, p0/m, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vminpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bif v16.16b, v18.16b, v0.16b"
      ]
    },
    "vminpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x5d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.d, p7/z, z18.d, z17.d",
        "not p0.b, p7/z, p0.b",
        "mov z0.d, z17.d",
        "mov z0.d, p0/m, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vminss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp s17, s18",
        "fcsel s0, s17, s18, mi",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20"
      ]
    },
    "vminsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x5d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp d17, d18",
        "fcsel d0, d17, d18, mi",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20"
      ]
    },
    "vdivps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b00 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv v16.4s, v17.4s, v18.4s"
      ]
    },
    "vdivps ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vdivps ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "fdiv z0.s, p7/m, z0.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vdivps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b00 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "fdiv z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vdivpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv v16.2d, v17.2d, v18.2d"
      ]
    },
    "vdivpd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "fdiv z0.d, p7/m, z0.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vdivpd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fdiv z16.d, p7/m, z16.d, z18.d"
      ]
    },
    "vdivpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "fdiv z16.d, p7/m, z16.d, z18.d"
      ]
    },
    "vdivss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b10 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv s0, s17, s18",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vdivsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x5e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "fdiv d0, d17, d18",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vmaxps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b00 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.4s, v18.4s, v17.4s",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b"
      ]
    },
    "vmaxps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b00 0x5f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.s, p7/z, z18.s, z17.s",
        "mov z0.d, z17.d",
        "mov z0.s, p0/m, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vmaxpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt v0.2d, v18.2d, v17.2d",
        "mov v16.16b, v17.16b",
        "bit v16.16b, v18.16b, v0.16b"
      ]
    },
    "vmaxpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x5f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcmgt p0.d, p7/z, z18.d, z17.d",
        "mov z0.d, z17.d",
        "mov z0.d, p0/m, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vmaxss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b10 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp s17, s18",
        "fcsel s0, s17, s18, gt",
        "mov v16.s[0], v0.s[0]",
        "msr nzcv, x20"
      ]
    },
    "vmaxsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b11 0x5f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v16.16b, v17.16b",
        "fcmp d17, d18",
        "fcsel d0, d17, d18, gt",
        "mov v16.d[0], v0.d[0]",
        "msr nzcv, x20"
      ]
    },
    "vpunpckhbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x68 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpunpckhbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x68 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.b, z17.b, z18.b",
        "zip2 z3.b, z17.b, z18.b",
        "mov z1.q, z2.q[1]",
        "mov z16.d, z3.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vpunpckhwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x69 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpunpckhwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x69 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.h, z17.h, z18.h",
        "zip2 z3.h, z17.h, z18.h",
        "mov z1.q, z2.q[1]",
        "mov z16.d, z3.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vpunpckhdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpunpckhdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x6a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.s, z17.s, z18.s",
        "zip2 z3.s, z17.s, z18.s",
        "mov z1.q, z2.q[1]",
        "mov z16.d, z3.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vpackssdw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x6b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtn v16.4h, v17.4s",
        "sqxtn2 v16.8h, v18.4s"
      ]
    },
    "vpackssdw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 1 0b01 0x6b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtnb z1.h, z18.s",
        "uzp1 z1.h, z1.h, z1.h",
        "sqxtnb z2.h, z17.s",
        "uzp1 z2.h, z2.h, z2.h",
        "splice z2.h, p6, z2.h, z1.h",
        "mov z1.d, z2.d[1]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[2]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpunpcklqdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpunpcklqdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 1 0b01 0x6c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.d, z17.d, z18.d",
        "zip2 z3.d, z17.d, z18.d",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpunpckhqdq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "zip2 v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpunpckhqdq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0x6d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z2.d, z17.d, z18.d",
        "zip2 z3.d, z17.d, z18.d",
        "mov z1.q, z2.q[1]",
        "mov z16.d, z3.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vmovd xmm0, dword [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr s16, [x4]"
      ]
    },
    "vmovq xmm0, qword [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d16, [x4]"
      ]
    },
    "vmovdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovdqa [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vmovdqu xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vmovdqu [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x6f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str q16, [x4]"
      ]
    },
    "vhaddpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "faddp v16.2d, v17.2d, v18.2d"
      ]
    },
    "vhaddpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 1 0b01 0x7c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "faddp z0.d, p7/m, z0.d, z18.d",
        "uzp1 z2.d, z0.d, z0.d",
        "uzp2 z1.d, z0.d, z0.d",
        "splice z2.d, p6, z2.d, z1.d",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vhaddps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b11 0x7c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "faddp v16.4s, v17.4s, v18.4s"
      ]
    },
    "vhaddps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 1 0b11 0x7c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "faddp z0.s, p7/m, z0.s, z18.s",
        "uzp1 z2.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z2.d, p6, z2.d, z1.d",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vhsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0x7d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.2d, v17.2d, v18.2d",
        "uzp2 v3.2d, v17.2d, v18.2d",
        "fsub v16.2d, v2.2d, v3.2d"
      ]
    },
    "vhsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 1 0b01 0x7d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.d, z17.d, z18.d",
        "uzp2 z3.d, z17.d, z18.d",
        "fsub z2.d, z2.d, z3.d",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vhsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0x7d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v18.4s",
        "uzp2 v3.4s, v17.4s, v18.4s",
        "fsub v16.4s, v2.4s, v3.4s"
      ]
    },
    "vhsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 1 0b11 0x7d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.s, z17.s, z18.s",
        "uzp2 z3.s, z17.s, z18.s",
        "fsub z2.s, z2.s, z3.s",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vmovd dword [rax], xmm0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0x7e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "mov v0.s[0], v16.s[0]",
        "mov v2.16b, v0.16b",
        "str s2, [x4]"
      ]
    },
    "vmovq qword [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vmovdqa ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovdqa [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vmovdqu ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vmovdqu [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b10 0x7f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1b {z16.b}, p7, [x4]"
      ]
    },
    "vaddsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v18.16b, v2.16b",
        "fadd v16.2d, v17.2d, v2.2d"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2384]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "fadd z16.d, z17.d, z2.d"
      ]
    },
    "vaddsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v18.16b, v2.16b",
        "fadd v16.4s, v17.4s, v2.4s"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2368]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "fadd z16.s, z17.s, z2.s"
      ]
    },
    "vpsrlw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsr z2.h, p6/m, z2.h, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsrlw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xd1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsr z16.h, p7/m, z16.h, z0.d"
      ]
    },
    "vpsrld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsr z2.s, p6/m, z2.s, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsrld ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xd2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsr z16.s, p7/m, z16.s, z0.d"
      ]
    },
    "vpsrlq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsr z2.d, p6/m, z2.d, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsrlq ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xd3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsr z16.d, p7/m, z16.d, z0.d"
      ]
    },
    "vpaddq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpaddq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "add z16.d, z17.d, z18.d"
      ]
    },
    "vpmullw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mul v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpmullw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mul z16.h, z17.h, z18.h"
      ]
    },
    "vmovq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "str d16, [x4]"
      ]
    },
    "vpmovmskb rax, xmm0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3296]",
        "cmlt v3.16b, v16.16b, #0",
        "and v2.16b, v3.16b, v2.16b",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "addp v2.8b, v2.8b, v2.8b",
        "umov w4, v2.h[0]"
      ]
    },
    "vpmovmskb rax, ymm0": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 1 0b01 0xd7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2496]",
        "ld1b {z2.b}, p7/z, [x0]",
        "mrs x0, nzcv",
        "mov z0.d, #0",
        "cmplt p0.b, p7/z, z16.b, #0",
        "not z0.b, p0/m, z16.b",
        "orr z0.b, p0/m, z0.b, z16.b",
        "mov z3.d, z0.d",
        "msr nzcv, x0",
        "and z2.d, z3.d, z2.d",
        "movprfx z0, z2",
        "addp z0.b, p7/m, z0.b, z2.b",
        "uzp1 z2.b, z0.b, z0.b",
        "uzp2 z1.b, z0.b, z0.b",
        "splice z2.d, p6, z2.d, z1.d",
        "addp v2.16b, v2.16b, v2.16b",
        "addp v2.8b, v2.8b, v2.8b",
        "mov w4, v2.s[0]"
      ]
    },
    "vpsubusb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpsubusb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub z16.b, z17.b, z18.b"
      ]
    },
    "vpsubusw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpsubusw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xd9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uqsub z16.h, z17.h, z18.h"
      ]
    },
    "vpminub xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xda 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpminub ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.b, p7/m, z16.b, z17.b"
      ]
    },
    "vpminub ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpminub ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xda 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umin z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpand xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpand ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "and z16.d, z17.d, z18.d"
      ]
    },
    "vpaddusb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpaddusb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd z16.b, z17.b, z18.b"
      ]
    },
    "vpaddusw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpaddusw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdd 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uqadd z16.h, z17.h, z18.h"
      ]
    },
    "vpmaxub xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpmaxub ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpmaxub ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.b, p7/m, z16.b, z17.b"
      ]
    },
    "vpmaxub ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xde 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umax z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpandn xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "bic v16.16b, v18.16b, v17.16b"
      ]
    },
    "vpandn ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xdf 256-bit"
      ],
      "ExpectedArm64ASM": [
        "bic z16.d, z18.d, z17.d"
      ]
    },
    "vpavgb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpavgb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd z16.b, p7/m, z16.b, z17.b"
      ]
    },
    "vpavgb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpavgb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "urhadd z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpsraw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xe1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "asr z2.h, p6/m, z2.h, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsraw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xe1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "asr z16.h, p7/m, z16.h, z0.d"
      ]
    },
    "vpsrad xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xe2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "asr z2.s, p6/m, z2.s, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsrad ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xe2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "asr z16.s, p7/m, z16.s, z0.d"
      ]
    },
    "vpavgw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpavgw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd z16.h, p7/m, z16.h, z17.h"
      ]
    },
    "vpavgw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "urhadd z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpavgw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xe3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "urhadd z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpmulhuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xe4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z17",
        "umulh z2.h, p6/m, z2.h, z18.h",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpmulhuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umulh z16.h, z17.h, z18.h"
      ]
    },
    "vpmulhw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xe5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z17",
        "smulh z2.h, p6/m, z2.h, z18.h",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpmulhw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe5 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smulh z16.h, z17.h, z18.h"
      ]
    },
    "vcvttpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 1 0b01 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x28, #3520]",
        "ldr q3, [x28, #3472]",
        "fcvtzs z4.s, p6/m, z17.d",
        "uzp1 z4.s, z4.s, z4.s",
        "mov v4.8b, v4.8b",
        "fcmgt v3.2d, v3.2d, v17.2d",
        "shrn v3.2s, v3.2d, #32",
        "mov v16.16b, v3.16b",
        "bsl v16.16b, v4.16b, v2.16b"
      ]
    },
    "vcvttpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 1 0b01 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3520]",
        "ldr x0, [x28, #2584]",
        "ld1b {z3.b}, p7/z, [x0]",
        "fcvtzs z4.s, p7/m, z17.d",
        "uzp1 z4.s, z4.s, z4.s",
        "mov v4.16b, v4.16b",
        "fcmgt p0.d, p7/z, z3.d, z17.d",
        "not z0.d, p0/m, z3.d",
        "movprfx z3.d, p0/z, z3.d",
        "orr z3.d, p0/m, z3.d, z0.d",
        "shrnb z3.s, z3.d, #32",
        "uzp1 z3.s, z3.s, z3.s",
        "movprfx z0, z4",
        "bsl z0.d, z0.d, z2.d, z3.d",
        "mov z16.d, z0.d"
      ]
    },
    "vcvtdq2pd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "scvtf v16.2d, v2.2d"
      ]
    },
    "vcvtdq2pd ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z2.d, z17.s",
        "scvtf z16.d, p7/m, z2.d"
      ]
    },
    "vcvtpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 1 0b11 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v2.2d, v17.2d",
        "ldr d3, [x28, #3520]",
        "ldr q4, [x28, #3472]",
        "fcvtzs z5.s, p6/m, z2.d",
        "uzp1 z5.s, z5.s, z5.s",
        "mov v5.8b, v5.8b",
        "fcmgt v2.2d, v4.2d, v2.2d",
        "shrn v2.2s, v2.2d, #32",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v5.16b, v3.16b"
      ]
    },
    "vcvtpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 1 0b11 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z2.d, p7/m, z17.d",
        "ldr q3, [x28, #3520]",
        "ldr x0, [x28, #2584]",
        "ld1b {z4.b}, p7/z, [x0]",
        "fcvtzs z5.s, p7/m, z2.d",
        "uzp1 z5.s, z5.s, z5.s",
        "mov v5.16b, v5.16b",
        "fcmgt p0.d, p7/z, z4.d, z2.d",
        "not z0.d, p0/m, z4.d",
        "movprfx z2.d, p0/z, z4.d",
        "orr z2.d, p0/m, z2.d, z0.d",
        "shrnb z2.s, z2.d, #32",
        "uzp1 z2.s, z2.s, z2.s",
        "movprfx z0, z5",
        "bsl z0.d, z0.d, z3.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vmovntdq [rax], xmm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p6, [x4]"
      ]
    },
    "vmovntdq [rax], ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "stnt1b {z16.b}, p7, [x4]"
      ]
    },
    "vpsubsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpsubsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub z16.b, z17.b, z18.b"
      ]
    },
    "vpsubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpsubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xe9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqsub z16.h, z17.h, z18.h"
      ]
    },
    "vpminsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xea 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpminsw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.h, p7/m, z16.h, z17.h"
      ]
    },
    "vpminsw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpminsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xea 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smin z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpor xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xeb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "orr v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpor ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xeb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "orr z16.d, z17.d, z18.d"
      ]
    },
    "vpaddsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xec 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpaddsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xec 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd z16.b, z17.b, z18.b"
      ]
    },
    "vpaddsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xed 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpaddsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xed 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqadd z16.h, z17.h, z18.h"
      ]
    },
    "vpmaxsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xee 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpmaxsw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.h, p7/m, z16.h, z17.h"
      ]
    },
    "vpmaxsw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpmaxsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xee 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smax z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpxor xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xef 128-bit"
      ],
      "ExpectedArm64ASM": [
        "eor v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpxor ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xef 256-bit"
      ],
      "ExpectedArm64ASM": [
        "eor z16.d, z17.d, z18.d"
      ]
    },
    "vpxor xmm0, xmm1, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0xef 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpxor ymm0, ymm1, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "xor with itself to get zero register",
        "Map 1 0b01 0xef 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vlddqu xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b11 0xf0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q16, [x4]"
      ]
    },
    "vlddqu ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b11 0xf0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vpsllw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf1 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsl z2.h, p6/m, z2.h, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsllw ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xf1 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsl z16.h, p7/m, z16.h, z0.d"
      ]
    },
    "vpslld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf2 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsl z2.s, p6/m, z2.s, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpslld ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xf2 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsl z16.s, p7/m, z16.s, z0.d"
      ]
    },
    "vpsllq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf3 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z2, z17",
        "lsl z2.d, p6/m, z2.d, z0.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpsllq ymm0, ymm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xf3 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, d18",
        "movprfx z16, z17",
        "lsl z16.d, p7/m, z16.d, z0.d"
      ]
    },
    "vpmuludq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xf4 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v17.4s",
        "uzp1 v3.4s, v18.4s, v18.4s",
        "umull v16.2d, v2.2s, v3.2s"
      ]
    },
    "vpmuludq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf4 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.s, z17.s, z17.s",
        "uzp1 z3.s, z18.s, z18.s",
        "umullb z0.d, z2.s, z3.s",
        "umullt z1.d, z2.s, z3.s",
        "zip1 z16.d, z0.d, z1.d"
      ]
    },
    "vpmaddwd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xf5 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smull v2.4s, v17.4h, v18.4h",
        "smull2 v3.4s, v17.8h, v18.8h",
        "addp v16.4s, v2.4s, v3.4s"
      ]
    },
    "vpmaddwd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 1 0b01 0xf5 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smullb z0.s, z17.h, z18.h",
        "smullt z1.s, z17.h, z18.h",
        "zip1 z2.s, z0.s, z1.s",
        "smullb z0.s, z17.h, z18.h",
        "smullt z1.s, z17.h, z18.h",
        "zip2 z3.s, z0.s, z1.s",
        "movprfx z0, z2",
        "addp z0.s, p7/m, z0.s, z3.s",
        "uzp1 z16.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z16.d, p6, z16.d, z1.d"
      ]
    },
    "vpsadbw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 1 0b01 0xf6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uabdl v2.8h, v17.8b, v18.8b",
        "uabdl2 v3.8h, v17.16b, v18.16b",
        "addv h2, v2.8h",
        "addv h3, v3.8h",
        "zip1 v16.2d, v2.2d, v3.2d"
      ]
    },
    "vpsadbw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 36,
      "Comment": [
        "Map 1 0b01 0xf6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uabdlb z0.h, z17.b, z18.b",
        "uabdlt z1.h, z17.b, z18.b",
        "zip1 z2.h, z0.h, z1.h",
        "uabdlb z0.h, z17.b, z18.b",
        "uabdlt z1.h, z17.b, z18.b",
        "zip2 z3.h, z0.h, z1.h",
        "addv h4, v2.8h",
        "addv h5, v3.8h",
        "zip1 z4.d, z4.d, z5.d",
        "mov z2.q, z2.q[1]",
        "mov z3.q, z3.q[1]",
        "addv h2, v2.8h",
        "addv h3, v3.8h",
        "mov z1.d, d3",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z4.b, p0/m, z1.b",
        "mov z1.d, z4.d[1]",
        "mov z2.d, z4.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z4.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vmaskmovdqu xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xf7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmlt v2.16b, v17.16b, #0",
        "ldr q3, [x11]",
        "bsl v2.16b, v16.16b, v3.16b",
        "str q2, [x11]"
      ]
    },
    "vpsubb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xf8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpsubb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xf8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sub z16.b, z17.b, z18.b"
      ]
    },
    "vpsubw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xf9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpsubw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xf9 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sub z16.h, z17.h, z18.h"
      ]
    },
    "vpsubd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpsubd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sub z16.s, z17.s, z18.s"
      ]
    },
    "vpsubq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sub v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpsubq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfb 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sub z16.d, z17.d, z18.d"
      ]
    },
    "vpaddb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpaddb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "add z16.b, z17.b, z18.b"
      ]
    },
    "vpaddw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpaddw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfd 256-bit"
      ],
      "ExpectedArm64ASM": [
        "add z16.h, z17.h, z18.h"
      ]
    },
    "vpaddd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "add v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpaddd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 1 0b01 0xfe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "add z16.s, z17.s, z18.s"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map1_FCMA.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FCMA"
    ],
    "DisabledHostFeatures": []
  },
  "Instructions": {
    "vaddsubpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v2.16b, v18.16b, v18.16b, #8",
        "fcadd v16.2d, v17.2d, v2.2d, #90"
      ]
    },
    "vaddsubpd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "ext z2.b, z2.b, z18.b, #8",
        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z16",
        "ext z2.b, z2.b, z16.b, #8",
        "movprfx z16, z17",
        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
      ]
    },
    "vaddsubpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 1 0b01 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "ext z2.b, z2.b, z18.b, #8",
        "movprfx z16, z17",
        "fcadd z16.d, p7/m, z16.d, z2.d, #90"
      ]
    },
    "vaddsubps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0xd0 128-bit"
      ],
      "ExpectedArm64ASM": [
        "rev64 v2.4s, v18.4s",
        "fcadd v16.4s, v17.4s, v2.4s, #90"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "revw z2.d, p7/m, z16.d",
        "movprfx z16, z17",
        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
      ]
    },
    "vaddsubps ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Aliasing source and destination",
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "revw z2.d, p7/m, z18.d",
        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
      ]
    },
    "vaddsubps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xd0 256-bit"
      ],
      "ExpectedArm64ASM": [
        "revw z2.d, p7/m, z18.d",
        "movprfx z16, z17",
        "fcadd z16.s, p7/m, z16.s, z2.s, #90"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map1_FRINTTS.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256",
      "FRINTTS"
    ],
    "DisabledHostFeatures": [
      "FCMA",
      "RPRES",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "vcvttss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "vcvttss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64z s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "vcvttsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "vcvttsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64z d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "vcvtss2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x s2, s16",
        "fcvtzs w4, s2"
      ]
    },
    "vcvtss2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64x s2, s16",
        "fcvtzs x4, s2"
      ]
    },
    "vcvtsd2si eax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x d2, d16",
        "fcvtzs w4, d2"
      ]
    },
    "vcvtsd2si rax, xmm0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b11 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint64x d2, d16",
        "fcvtzs x4, d2"
      ]
    },
    "vcvtps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b01 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s"
      ]
    },
    "vcvtps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "Map 1 0b01 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z2.s, p7/m, z17.s",
        "ldr x0, [x28, #2608]",
        "ld1b {z3.b}, p7/z, [x0]",
        "ldr x0, [x28, #2560]",
        "ld1b {z4.b}, p7/z, [x0]",
        "fcvtzs z5.s, p7/m, z2.s",
        "fcmgt p0.s, p7/z, z4.s, z2.s",
        "not z0.s, p0/m, z4.s",
        "movprfx z2.s, p0/z, z4.s",
        "orr z2.s, p0/m, z2.s, z0.s",
        "movprfx z0, z5",
        "bsl z0.d, z0.d, z3.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vcvttps2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 1 0b10 0x5b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z v2.4s, v17.4s",
        "fcvtzs v16.4s, v2.4s"
      ]
    },
    "vcvttps2dq ymm0, ymm1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 1 0b10 0x5b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2608]",
        "ld1b {z2.b}, p7/z, [x0]",
        "ldr x0, [x28, #2560]",
        "ld1b {z3.b}, p7/z, [x0]",
        "fcvtzs z4.s, p7/m, z17.s",
        "fcmgt p0.s, p7/z, z3.s, z17.s",
        "not z0.s, p0/m, z3.s",
        "movprfx z3.s, p0/z, z3.s",
        "orr z3.s, p0/m, z3.s, z0.s",
        "movprfx z0, z4",
        "bsl z0.d, z0.d, z2.d, z3.d",
        "mov z16.d, z0.d"
      ]
    },
    "vcvttpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b01 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32z v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d"
      ]
    },
    "vcvttpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 1 0b01 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3520]",
        "ldr x0, [x28, #2584]",
        "ld1b {z3.b}, p7/z, [x0]",
        "fcvtzs z4.s, p7/m, z17.d",
        "uzp1 z4.s, z4.s, z4.s",
        "mov v4.16b, v4.16b",
        "fcmgt p0.d, p7/z, z3.d, z17.d",
        "not z0.d, p0/m, z3.d",
        "movprfx z3.d, p0/z, z3.d",
        "orr z3.d, p0/m, z3.d, z0.d",
        "shrnb z3.s, z3.d, #32",
        "uzp1 z3.s, z3.s, z3.s",
        "movprfx z0, z4",
        "bsl z0.d, z0.d, z2.d, z3.d",
        "mov z16.d, z0.d"
      ]
    },
    "vcvtpd2dq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 1 0b11 0xe6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frint32x v2.2d, v17.2d",
        "fcvtzs v2.2d, v2.2d",
        "xtn v16.2s, v2.2d"
      ]
    },
    "vcvtpd2dq xmm0, ymm1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 1 0b11 0xe6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z2.d, p7/m, z17.d",
        "ldr q3, [x28, #3520]",
        "ldr x0, [x28, #2584]",
        "ld1b {z4.b}, p7/z, [x0]",
        "fcvtzs z5.s, p7/m, z2.d",
        "uzp1 z5.s, z5.s, z5.s",
        "mov v5.16b, v5.16b",
        "fcmgt p0.d, p7/z, z4.d, z2.d",
        "not z0.d, p0/m, z4.d",
        "movprfx z2.d, p0/z, z4.d",
        "orr z2.d, p0/m, z2.d, z0.d",
        "shrnb z2.s, z2.d, #32",
        "uzp1 z2.s, z2.s, z2.s",
        "movprfx z0, z5",
        "bsl z0.d, z0.d, z3.d, z2.d",
        "mov z16.d, z0.d"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map2.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256",
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "SVEBITPERM"
    ]
  },
  "Instructions": {
    "vpshufb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x00 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.16b, #0x8f",
        "and v2.16b, v18.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "vpshufb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.b, #-113",
        "and z2.d, z18.d, z2.d",
        "tbl v3.16b, {v17.16b}, v2.16b",
        "mov z1.q, z17.q[1]",
        "mov z4.d, z17.d",
        "mov z4.b, p6/m, z1.b",
        "tbl v2.16b, {v4.16b}, v2.16b",
        "mov z1.q, q2",
        "mov z16.d, z3.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vphaddw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x01 128-bit"
      ],
      "ExpectedArm64ASM": [
        "addp v16.8h, v17.8h, v18.8h"
      ]
    },
    "vphaddw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "addp z0.h, p7/m, z0.h, z18.h",
        "uzp1 z2.h, z0.h, z0.h",
        "uzp2 z1.h, z0.h, z0.h",
        "splice z2.d, p6, z2.d, z1.d",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vphaddd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "addp v16.4s, v17.4s, v18.4s"
      ]
    },
    "vphaddd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z0, z17",
        "addp z0.s, p7/m, z0.s, z18.s",
        "uzp1 z2.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z2.d, p6, z2.d, z1.d",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vphaddsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sqadd v16.8h, v2.8h, v3.8h"
      ]
    },
    "vphaddsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.h, z17.h, z18.h",
        "uzp2 z3.h, z17.h, z18.h",
        "sqadd z2.h, z2.h, z3.h",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpmaddubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x04 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "sxtl v3.8h, v18.8b",
        "mul v2.8h, v2.8h, v3.8h",
        "uxtl2 v3.8h, v17.16b",
        "sxtl2 v4.8h, v18.16b",
        "mul v3.8h, v3.8h, v4.8h",
        "uzp1 v4.8h, v2.8h, v3.8h",
        "uzp2 v2.8h, v2.8h, v3.8h",
        "sqadd v16.8h, v4.8h, v2.8h"
      ]
    },
    "vpmaddubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map 2 0b01 0x04 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z2.h, z17.b",
        "sunpklo z3.h, z18.b",
        "mul z2.h, z2.h, z3.h",
        "uunpkhi z3.h, z17.b",
        "sunpkhi z4.h, z18.b",
        "mul z3.h, z3.h, z4.h",
        "uzp1 z4.h, z2.h, z3.h",
        "uzp2 z2.h, z2.h, z3.h",
        "sqadd z16.h, z4.h, z2.h"
      ]
    },
    "vphsubw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sub v16.8h, v2.8h, v3.8h"
      ]
    },
    "vphsubw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.h, z17.h, z18.h",
        "uzp2 z3.h, z17.h, z18.h",
        "sub z2.h, z2.h, z3.h",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vphsubd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x06 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v18.4s",
        "uzp2 v3.4s, v17.4s, v18.4s",
        "sub v16.4s, v2.4s, v3.4s"
      ]
    },
    "vphsubd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.s, z17.s, z18.s",
        "uzp2 z3.s, z17.s, z18.s",
        "sub z2.s, z2.s, z3.s",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vphsubsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x07 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.8h, v17.8h, v18.8h",
        "uzp2 v3.8h, v17.8h, v18.8h",
        "sqsub v16.8h, v2.8h, v3.8h"
      ]
    },
    "vphsubsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x07 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.h, z17.h, z18.h",
        "uzp2 z3.h, z17.h, z18.h",
        "sqsub z2.h, z2.h, z3.h",
        "mov z1.d, z2.d[2]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[1]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpsignb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.16b, v18.16b, #7",
        "srshr v2.16b, v2.16b, #7",
        "mul v16.16b, v17.16b, v2.16b"
      ]
    },
    "vpsignb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "sqshl z2.b, p7/m, z2.b, #7",
        "srshr z2.b, p7/m, z2.b, #7",
        "mul z16.b, z17.b, z2.b"
      ]
    },
    "vpsignw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.8h, v18.8h, #15",
        "srshr v2.8h, v2.8h, #15",
        "mul v16.8h, v17.8h, v2.8h"
      ]
    },
    "vpsignw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "sqshl z2.h, p7/m, z2.h, #15",
        "srshr z2.h, p7/m, z2.h, #15",
        "mul z16.h, z17.h, z2.h"
      ]
    },
    "vpsignd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqshl v2.4s, v18.4s, #31",
        "srshr v2.4s, v2.4s, #31",
        "mul v16.4s, v17.4s, v2.4s"
      ]
    },
    "vpsignd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x0a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "sqshl z2.s, p7/m, z2.s, #31",
        "srshr z2.s, p7/m, z2.s, #31",
        "mul z16.s, z17.s, z2.s"
      ]
    },
    "vpmulhrsw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smull v2.4s, v17.4h, v18.4h",
        "smull2 v3.4s, v17.8h, v18.8h",
        "sshr v2.4s, v2.4s, #14",
        "sshr v3.4s, v3.4s, #14",
        "movi v4.4s, #0x1",
        "add v2.4s, v2.4s, v4.4s",
        "add v3.4s, v3.4s, v4.4s",
        "shrn v2.4h, v2.4s, #1",
        "mov v0.16b, v2.16b",
        "shrn2 v0.8h, v3.4s, #1",
        "mov v16.16b, v0.16b"
      ]
    },
    "vpmulhrsw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x0b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smullb z0.s, z17.h, z18.h",
        "smullt z1.s, z17.h, z18.h",
        "zip1 z2.s, z0.s, z1.s",
        "smullb z0.s, z17.h, z18.h",
        "smullt z1.s, z17.h, z18.h",
        "zip2 z3.s, z0.s, z1.s",
        "asr z2.s, p7/m, z2.s, #14",
        "asr z3.s, p7/m, z3.s, #14",
        "mov z4.s, #1",
        "add z2.s, z2.s, z4.s",
        "add z3.s, z3.s, z4.s",
        "shrnb z2.h, z2.s, #1",
        "uzp1 z2.h, z2.h, z2.h",
        "shrnb z1.h, z3.s, #1",
        "uzp1 z1.h, z1.h, z1.h",
        "movprfx z16, z2",
        "splice z16.h, p6, z16.h, z1.h"
      ]
    },
    "vpermilps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.4s, #0x3",
        "and v2.16b, v18.16b, v2.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "shl v2.16b, v2.16b, #2",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "dup v3.4s, w20",
        "add v2.16b, v3.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "vpermilps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "Map 2 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.s, #3",
        "and z2.d, z18.d, z2.d",
        "trn1 z2.b, z2.b, z2.b",
        "trn1 z2.h, z2.h, z2.h",
        "lsl z2.b, p7/m, z2.b, #2",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "mov z3.s, w20",
        "movi v4.2d, #0x0",
        "mov z5.b, #16",
        "mov z1.q, q5",
        "not p0.b, p7/z, p6.b",
        "mov z4.b, p0/m, z1.b",
        "add z3.b, z3.b, z4.b",
        "add z2.b, z3.b, z2.b",
        "tbl z16.b, {z17.b}, z2.b"
      ]
    },
    "vpermilpd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "Map 2 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v2.2d, v18.2d, #1",
        "mov w0, #0x1",
        "dup v3.2d, x0",
        "and v2.16b, v2.16b, v3.16b",
        "trn1 v2.16b, v2.16b, v2.16b",
        "trn1 v2.8h, v2.8h, v2.8h",
        "trn1 v2.4s, v2.4s, v2.4s",
        "shl v2.16b, v2.16b, #3",
        "mov x20, #0x100",
        "movk x20, #0x302, lsl #16",
        "movk x20, #0x504, lsl #32",
        "movk x20, #0x706, lsl #48",
        "dup v3.2d, x20",
        "add v2.16b, v3.16b, v2.16b",
        "tbl v16.16b, {v17.16b}, v2.16b"
      ]
    },
    "vpermilpd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "Map 2 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z18",
        "lsr z2.d, p7/m, z2.d, #1",
        "mov z3.d, #1",
        "and z2.d, z2.d, z3.d",
        "trn1 z2.b, z2.b, z2.b",
        "trn1 z2.h, z2.h, z2.h",
        "trn1 z2.s, z2.s, z2.s",
        "lsl z2.b, p7/m, z2.b, #3",
        "mov x20, #0x100",
        "movk x20, #0x302, lsl #16",
        "movk x20, #0x504, lsl #32",
        "movk x20, #0x706, lsl #48",
        "mov z3.d, x20",
        "movi v4.2d, #0x0",
        "mov z5.b, #16",
        "mov z1.q, q5",
        "not p0.b, p7/z, p6.b",
        "mov z4.b, p0/m, z1.b",
        "add z3.b, z3.b, z4.b",
        "add z2.b, z3.b, z2.b",
        "tbl z16.b, {z17.b}, z2.b"
      ]
    },
    "vtestps xmm0, xmm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "dup v2.4s, w20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestps ymm0, ymm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x80000000",
        "mov z2.s, w20",
        "and z3.d, z17.d, z16.d",
        "bic z4.d, z17.d, z16.d",
        "and z3.d, z3.d, z2.d",
        "and z2.d, z4.d, z2.d",
        "umaxv h3, p7, z3.h",
        "umaxv h2, p7, z2.h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestpd xmm0, xmm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "dup v2.2d, x20",
        "and v3.16b, v17.16b, v16.16b",
        "bic v4.16b, v17.16b, v16.16b",
        "and v3.16b, v3.16b, v2.16b",
        "and v2.16b, v4.16b, v2.16b",
        "umaxv h3, v3.8h",
        "umaxv h2, v2.8h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vtestpd ymm0, ymm1": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x8000000000000000",
        "mov z2.d, x20",
        "and z3.d, z17.d, z16.d",
        "bic z4.d, z17.d, z16.d",
        "and z3.d, z3.d, z2.d",
        "and z2.d, z4.d, z2.d",
        "umaxv h3, p7, z3.h",
        "umaxv h2, p7, z2.h",
        "umov w20, v3.h[0]",
        "umov w21, v2.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vcvtph2ps xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtl v16.4s, v17.4h"
      ]
    },
    "vcvtph2ps xmm0, [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x13 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "fcvtl v16.4s, v2.4h"
      ]
    },
    "vcvtph2ps ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x13 256-bit"
      ],
      "ExpectedArm64ASM": [
        "zip1 z16.h, z17.h, z17.h",
        "fcvtlt z16.s, p7/m, z16.h"
      ]
    },
    "vcvtph2ps ymm0, [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x13 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x4]",
        "zip1 z16.h, z2.h, z2.h",
        "fcvtlt z16.s, p7/m, z16.h"
      ]
    },
    "vpermps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.s, #7",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "mov z3.s, w20",
        "and z2.d, z17.d, z2.d",
        "trn1 z2.b, z2.b, z2.b",
        "trn1 z2.h, z2.h, z2.h",
        "lsl z2.b, p7/m, z2.b, #2",
        "add z2.b, z2.b, z3.b",
        "tbl z16.b, {z18.b}, z2.b"
      ]
    },
    "vptest xmm0, xmm1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "and v2.16b, v16.16b, v17.16b",
        "bic v3.16b, v17.16b, v16.16b",
        "umaxv h2, v2.8h",
        "umaxv h3, v3.8h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vptest ymm0, ymm1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b01 0x16 256-bit"
      ],
      "ExpectedArm64ASM": [
        "and z2.d, z16.d, z17.d",
        "bic z3.d, z17.d, z16.d",
        "umaxv h2, p7, z2.h",
        "umaxv h3, p7, z3.h",
        "umov w20, v2.h[0]",
        "umov w21, v3.h[0]",
        "mov w27, #0x0",
        "cmp x21, #0x0 (0)",
        "cset x21, ne",
        "cmp w20, #0x0 (0)",
        "mrs x20, nzcv",
        "bfi w20, w21, #29, #1",
        "mov w26, #0x1",
        "msr nzcv, x20"
      ]
    },
    "vbroadcastss xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x18 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]"
      ]
    },
    "vbroadcastss ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1rw {z16.s}, p7/z, [x4]"
      ]
    },
    "vbroadcastsd ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1rd {z16.d}, p7/z, [x4]"
      ]
    },
    "vbroadcastf128 ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1rqb {z16.b}, p7/z, [x4]"
      ]
    },
    "vpabsb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.16b, v17.16b"
      ]
    },
    "vpabsb ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "abs z16.b, p7/m, z17.b"
      ]
    },
    "vpabsw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.8h, v17.8h"
      ]
    },
    "vpabsw ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "abs z16.h, p7/m, z17.h"
      ]
    },
    "vpabsd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "abs v16.4s, v17.4s"
      ]
    },
    "vpabsd ymm0, ymm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x1e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "abs z16.s, p7/m, z17.s"
      ]
    },
    "vpmovsxbw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.8h, v17.8b"
      ]
    },
    "vpmovsxbw ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x20 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z16.h, z17.b"
      ]
    },
    "vpmovsxbd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.8h, v17.8b",
        "sxtl v16.4s, v2.4h"
      ]
    },
    "vpmovsxbd ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x21 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z2.h, z17.b",
        "sunpklo z16.s, z2.h"
      ]
    },
    "vpmovsxbq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.8h, v17.8b",
        "sxtl v2.4s, v2.4h",
        "sxtl v16.2d, v2.2s"
      ]
    },
    "vpmovsxbq ymm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x22 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z2.h, z17.b",
        "sunpklo z2.s, z2.h",
        "sunpklo z16.d, z2.s"
      ]
    },
    "vpmovsxwd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x23 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.4s, v17.4h"
      ]
    },
    "vpmovsxwd ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x23 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z16.s, z17.h"
      ]
    },
    "vpmovsxwq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x24 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.4s, v17.4h",
        "sxtl v16.2d, v2.2s"
      ]
    },
    "vpmovsxwq ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x24 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z2.s, z17.h",
        "sunpklo z16.d, z2.s"
      ]
    },
    "vpmovsxdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x25 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v16.2d, v17.2s"
      ]
    },
    "vpmovsxdq ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x25 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sunpklo z16.d, z17.s"
      ]
    },
    "vpmuldq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x28 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 v2.4s, v17.4s, v17.4s",
        "uzp1 v3.4s, v18.4s, v18.4s",
        "smull v16.2d, v2.2s, v3.2s"
      ]
    },
    "vpmuldq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x28 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uzp1 z2.s, z17.s, z17.s",
        "uzp1 z3.s, z18.s, z18.s",
        "smullb z0.d, z2.s, z3.s",
        "smullt z1.d, z2.s, z3.s",
        "zip1 z16.d, z0.d, z1.d"
      ]
    },
    "vpcmpeqq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x29 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmeq v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpcmpeqq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x29 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpeq p0.d, p7/z, z17.d, z18.d",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d",
        "msr nzcv, x0"
      ]
    },
    "vmovntdqa xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x2a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldnt1b {z16.b}, p6/z, [x4]"
      ]
    },
    "vmovntdqa ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x2a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldnt1b {z16.b}, p7/z, [x4]"
      ]
    },
    "vpackusdw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x2b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtun v16.4h, v17.4s",
        "sqxtun2 v16.8h, v18.4s"
      ]
    },
    "vpackusdw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x2b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "sqxtunb z1.h, z18.s",
        "uzp1 z1.h, z1.h, z1.h",
        "sqxtunb z2.h, z17.s",
        "uzp1 z2.h, z2.h, z2.h",
        "splice z2.h, p6, z2.h, z1.h",
        "mov z1.d, z2.d[1]",
        "mov z3.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z3.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z2.d[2]",
        "mov z16.d, z3.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vmaskmovps xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z2.s}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x2d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z2.d}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovps [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vmaskmovpd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x2f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmovzxbw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x30 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.8h, v17.8b"
      ]
    },
    "vpmovzxbw ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x30 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z16.h, z17.b"
      ]
    },
    "vpmovzxbd xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x31 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v16.4s, v2.4h"
      ]
    },
    "vpmovzxbd ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x31 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z2.h, z17.b",
        "uunpklo z16.s, z2.h"
      ]
    },
    "vpmovzxbq xmm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x32 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.8h, v17.8b",
        "uxtl v2.4s, v2.4h",
        "uxtl v16.2d, v2.2s"
      ]
    },
    "vpmovzxbq ymm0, xmm1": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x32 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z2.h, z17.b",
        "uunpklo z2.s, z2.h",
        "uunpklo z16.d, z2.s"
      ]
    },
    "vpmovzxwd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x33 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.4s, v17.4h"
      ]
    },
    "vpmovzxwd ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x33 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z16.s, z17.h"
      ]
    },
    "vpmovzxwq xmm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x34 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v2.4s, v17.4h",
        "uxtl v16.2d, v2.2s"
      ]
    },
    "vpmovzxwq ymm0, xmm1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x34 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z2.s, z17.h",
        "uunpklo z16.d, z2.s"
      ]
    },
    "vpmovzxdq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x35 128-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtl v16.2d, v17.2s"
      ]
    },
    "vpmovzxdq ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x35 256-bit"
      ],
      "ExpectedArm64ASM": [
        "uunpklo z16.d, z17.s"
      ]
    },
    "vpermd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b01 0x36 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.s, #7",
        "mov w20, #0x100",
        "movk w20, #0x302, lsl #16",
        "mov z3.s, w20",
        "and z2.d, z17.d, z2.d",
        "trn1 z2.b, z2.b, z2.b",
        "trn1 z2.h, z2.h, z2.h",
        "lsl z2.b, p7/m, z2.b, #2",
        "add z2.b, z2.b, z3.b",
        "tbl z16.b, {z18.b}, z2.b"
      ]
    },
    "vpcmpgtq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x37 128-bit"
      ],
      "ExpectedArm64ASM": [
        "cmgt v16.2d, v17.2d, v18.2d"
      ]
    },
    "vpcmpgtq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x37 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x0, nzcv",
        "cmpgt p0.d, p7/z, z17.d, z18.d",
        "not z0.d, p0/m, z17.d",
        "movprfx z16.d, p0/z, z17.d",
        "orr z16.d, p0/m, z16.d, z0.d",
        "msr nzcv, x0"
      ]
    },
    "vpminsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x38 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpminsb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.b, p7/m, z16.b, z17.b"
      ]
    },
    "vpminsb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpminsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smin z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpminsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x39 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smin v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpminsd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.s, p7/m, z16.s, z17.s"
      ]
    },
    "vpminsd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smin z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpminsd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smin z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpminuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpminuw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.h, p7/m, z16.h, z17.h"
      ]
    },
    "vpminuw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpminuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umin z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpminud xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umin v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpminud ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.s, p7/m, z16.s, z17.s"
      ]
    },
    "vpminud ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umin z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpminud ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umin z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpmaxsb xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.16b, v17.16b, v18.16b"
      ]
    },
    "vpmaxsb ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpmaxsb ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.b, p7/m, z16.b, z17.b"
      ]
    },
    "vpmaxsb ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smax z16.b, p7/m, z16.b, z18.b"
      ]
    },
    "vpmaxsd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "smax v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpmaxsd ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.s, p7/m, z16.s, z17.s"
      ]
    },
    "vpmaxsd ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "smax z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpmaxsd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "smax z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpmaxuw xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.8h, v17.8h, v18.8h"
      ]
    },
    "vpmaxuw ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.h, p7/m, z16.h, z17.h"
      ]
    },
    "vpmaxuw ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpmaxuw ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umax z16.h, p7/m, z16.h, z18.h"
      ]
    },
    "vpmaxud xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umax v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpmaxud ymm0, ymm0, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Aliasing source and destination",
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpmaxud ymm0, ymm1, ymm0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "umax z16.s, p7/m, z16.s, z17.s"
      ]
    },
    "vpmaxud ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0x3f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "umax z16.s, p7/m, z16.s, z18.s"
      ]
    },
    "vpmulld xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mul v16.4s, v17.4s, v18.4s"
      ]
    },
    "vpmulld ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x40 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mul z16.s, z17.s, z18.s"
      ]
    },
    "vphminposuw xmm0, xmm1": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x41 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3008]",
        "zip1 v3.8h, v2.8h, v17.8h",
        "zip2 v2.8h, v2.8h, v17.8h",
        "umin v2.4s, v3.4s, v2.4s",
        "uminv s2, v2.4s",
        "rev32 v16.8h, v2.8h"
      ]
    },
    "vpsrlvd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x45 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "ushl v16.4s, v17.4s, v0.4s"
      ]
    },
    "vpsrlvd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x45 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, #32",
        "umin z1.s, p7/m, z1.s, z18.s",
        "movprfx z16, z17",
        "lsr z16.s, p7/m, z16.s, z1.s"
      ]
    },
    "vpsrlvq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x45 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "neg v0.2d, v0.2d",
        "ushl v16.2d, v17.2d, v0.2d"
      ]
    },
    "vpsrlvq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x45 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.d, #64",
        "umin z1.d, p7/m, z1.d, z18.d",
        "movprfx z16, z17",
        "lsr z16.d, p7/m, z16.d, z1.d"
      ]
    },
    "vpsravd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x46 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x1f",
        "umin v0.4s, v0.4s, v18.4s",
        "neg v0.4s, v0.4s",
        "sshl v16.4s, v17.4s, v0.4s"
      ]
    },
    "vpsravd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.s, #31",
        "umin z0.s, p7/m, z0.s, z18.s",
        "movprfx z16, z17",
        "asr z16.s, p7/m, z16.s, z0.s"
      ]
    },
    "vpsllvd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x47 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.4s, #0x20",
        "umin v0.4s, v0.4s, v18.4s",
        "ushl v16.4s, v17.4s, v0.4s"
      ]
    },
    "vpsllvd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x47 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.s, #32",
        "umin z1.s, p7/m, z1.s, z18.s",
        "movprfx z16, z17",
        "lsl z16.s, p7/m, z16.s, z1.s"
      ]
    },
    "vpsllvq xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x47 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w0, #0x40",
        "dup v0.2d, x0",
        "cmhi v1.2d, v18.2d, v0.2d",
        "bif v0.16b, v18.16b, v1.16b",
        "ushl v16.2d, v17.2d, v0.2d"
      ]
    },
    "vpsllvq ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x47 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.d, #64",
        "umin z1.d, p7/m, z1.d, z18.d",
        "movprfx z16, z17",
        "lsl z16.d, p7/m, z16.d, z1.d"
      ]
    },
    "vpbroadcastd xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.4s, v17.s[0]"
      ]
    },
    "vpbroadcastd xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x58 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.4s}, [x4]"
      ]
    },
    "vpbroadcastd ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.s, s17"
      ]
    },
    "vpbroadcastd ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x58 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1rw {z16.s}, p7/z, [x4]"
      ]
    },
    "vpbroadcastq xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.2d, v17.d[0]"
      ]
    },
    "vpbroadcastq xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x59 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.2d}, [x4]"
      ]
    },
    "vpbroadcastq ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x59 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, d17"
      ]
    },
    "vpbroadcastq ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x59 256-bit"
      ],
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ld1rd {z16.d}, p7/z, [x4]"
      ]
    },
    "vbroadcasti128 ymm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x5a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1rqb {z16.b}, p7/z, [x4]"
      ]
    },
    "vpbroadcastb xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x78 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.16b, v17.b[0]"
      ]
    },
    "vpbroadcastb xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x78 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.16b}, [x4]"
      ]
    },
    "vpbroadcastb ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x78 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.b, b17"
      ]
    },
    "vpbroadcastb ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x78 256-bit"
      ],
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ld1rb {z16.b}, p7/z, [x4]"
      ]
    },
    "vpbroadcastw xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x79 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v16.8h, v17.h[0]"
      ]
    },
    "vpbroadcastw xmm0, [rax]": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x79 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ld1r {v16.8h}, [x4]"
      ]
    },
    "vpbroadcastw ymm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0x79 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.h, h17"
      ]
    },
    "vpbroadcastw ymm0, [rax]": {
      "ExpectedInstructiqonCount": -1,
      "Comment": [
        "Map 2 0b01 0x79 256-bit"
      ],
      "ExpectedInstructionCount": 1,
      "ExpectedArm64ASM": [
        "ld1rh {z16.h}, p7/z, [x4]"
      ]
    },
    "vpmaskmovd xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z17.s, #0",
        "ld1w {z2.s}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z17.s, #0",
        "ld1w {z16.s}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq xmm0, xmm1, [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0x8c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z17.d, #0",
        "ld1d {z2.d}, p0/z, [x4]",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq ymm0, ymm1, [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z17.d, #0",
        "ld1d {z16.d}, p0/z, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovd [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z16.s, #0",
        "st1w {z17.s}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq [rax], xmm0, xmm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpmaskmovq [rax], ymm0, ymm1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x8e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z16.d, #0",
        "st1d {z17.d}, p0, [x4]",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "sel z2.s, p0, z0.s, z16.s",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "sel z2.s, p0, z0.s, z16.s",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 51,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v5.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[3], [x1]",
        "mov w0, v3.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v3.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v3.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v3.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 51,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v5.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[3], [x1]",
        "mov w0, v3.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v3.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v3.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v3.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x90 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherdq ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x90 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqd xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x91 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "sel z2.d, p0, z0.d, z16.d",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "shl v5.2d, v17.2d, #1",
        "shl v4.2d, v4.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "shl v5.2d, v17.2d, #2",
        "shl v4.2d, v4.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vpgatherqq ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x91 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "sel z2.s, p0, z0.s, z16.s",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p6/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "sel z2.s, p0, z0.s, z16.s",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 51,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v5.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v5.s}[3], [x1]",
        "mov w0, v3.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[0]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v3.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[1]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v3.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[2]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v3.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[3]",
        "add x1, x4, w0, sxtw #1",
        "ld1 {v2.s}[3], [x1]",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.s, p7/z, z18.s, #0",
        "ld1w {z0.s}, p0/z, [x4, z17.s, sxtw #2]",
        "mov z16.s, p0/m, z0.s",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdps ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 51,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v5.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v17.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v5.s}[3], [x1]",
        "mov w0, v3.s[0]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[0]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v3.s[1]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[1]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[1], [x1]",
        "mov w0, v3.s[2]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[2]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[2], [x1]",
        "mov w0, v3.s[3]",
        "tbz w0, #31, #+0x10",
        "smov x0, v4.s[3]",
        "add x1, x4, w0, sxtw #3",
        "ld1 {v2.s}[3], [x1]",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sxtl v2.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x92 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshll v2.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd ymm0, [xmm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sxtl2 v4.2d, v17.4s",
        "sxtl v5.2d, v17.2s",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd ymm0, [xmm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #1",
        "sshll v5.2d, v17.2s, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd ymm0, [xmm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #2",
        "sshll v5.2d, v17.2s, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherdpd ymm0, [xmm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "Map 2 0b01 0x92 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "sshll2 v4.2d, v17.4s, #3",
        "sshll v5.2d, v17.2s, #3",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "mov v2.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v2.s}[1], [x1]",
        "movi v18.2d, #0x0",
        "zip1 v2.2d, v2.2d, v18.2d",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.8b, v2.8b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [ymm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [ymm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #1",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [ymm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #2",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqps xmm0, [ymm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mrs x20, nzcv",
        "mov v3.16b, v16.16b",
        "mov w0, v18.s[0]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[0], [x1]",
        "mov w0, v18.s[1]",
        "tbz w0, #31, #+0x10",
        "mov x0, v17.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[1], [x1]",
        "mov w0, v18.s[2]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[0]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[2], [x1]",
        "mov w0, v18.s[3]",
        "tbz w0, #31, #+0x10",
        "mov x0, v2.d[1]",
        "add x1, x4, x0, lsl #3",
        "ld1 {v3.s}[3], [x1]",
        "movi v2.2d, #0x0",
        "mov z1.q, q2",
        "not p0.b, p7/z, p6.b",
        "mov z3.b, p0/m, z1.b",
        "mov v16.16b, v3.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd xmm0, [xmm1*1 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd xmm0, [xmm1*2 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd xmm0, [xmm1*4 + rax], xmm2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v2.2d, v17.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z2.d]",
        "sel z2.d, p0, z0.d, z16.d",
        "movi v18.2d, #0x0",
        "mov z1.q, q18",
        "not p0.b, p7/z, p6.b",
        "mov z2.b, p0/m, z1.b",
        "mov v16.16b, v2.16b",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd xmm0, [xmm1*8 + rax], xmm2": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b01 0x93 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "sel z2.d, p0, z0.d, z16.d",
        "mov v16.16b, v2.16b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd ymm0, [ymm1*1 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd ymm0, [ymm1*2 + rax], ymm2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "shl v5.2d, v17.2d, #1",
        "shl v4.2d, v4.2d, #1",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd ymm0, [ymm1*4 + rax], ymm2": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z16.q[1]",
        "mov z3.q, z18.q[1]",
        "mov z4.q, z17.q[1]",
        "shl v5.2d, v17.2d, #2",
        "shl v4.2d, v4.2d, #2",
        "mrs x20, nzcv",
        "cmplt p0.d, p6/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z5.d]",
        "sel z5.d, p0, z0.d, z16.d",
        "cmplt p0.d, p6/z, z3.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z4.d]",
        "mov z2.d, p0/m, z0.d",
        "mov z1.q, q2",
        "mov z16.d, z5.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vgatherqpd ymm0, [ymm1*8 + rax], ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x93 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "cmplt p0.d, p7/z, z18.d, #0",
        "ld1d {z0.d}, p0/z, [x4, z17.d, lsl #3]",
        "mov z16.d, p0/m, z0.d",
        "movi v18.2d, #0x0",
        "msr nzcv, x20"
      ]
    },
    "vfmaddsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v17.16b, v2.16b",
        "fmla v2.4s, v16.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2368]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z17.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmaddsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x96 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v17.16b, v2.16b",
        "fmla v2.2d, v16.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x96 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2384]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z17.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsubadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v17.16b, v2.16b",
        "fmla v2.4s, v16.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2400]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z17.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsubadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0x97 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v17.16b, v2.16b",
        "fmla v2.2d, v16.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x97 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2416]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z17.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmla v2.4s, v16.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fmla z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x98 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmla v2.2d, v16.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x98 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fmla z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmla v2.4s, v16.4s, v18.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x99 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmla v2.2d, v16.2d, v18.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmls z2.s, p6/m, z16.s, z18.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmls z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmls z2.d, p6/m, z16.d, z18.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmls z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmls z2.s, p6/m, z16.s, z18.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmls z2.d, p6/m, z16.d, z18.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmls v2.4s, v16.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fmls z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmadd132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmls v2.2d, v16.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fmls z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmadd132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmls v2.4s, v16.4s, v18.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v17.16b",
        "fmls v2.2d, v16.2d, v18.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub132ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmla z2.s, p6/m, z16.s, z18.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub132ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmla z0.s, p7/m, z16.s, z18.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmsub132pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmla z2.d, p6/m, z16.d, z18.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub132pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0x9e 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z17.d",
        "fnmla z0.d, p7/m, z16.d, z18.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmsub132ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmla z2.s, p6/m, z16.s, z18.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub132sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0x9f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z17.d",
        "fnmla z2.d, p6/m, z16.d, z18.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmla v2.4s, v17.4s, v16.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fmla z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmla v2.2d, v17.2d, v16.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xa8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fmla z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmla v2.4s, v17.4s, v16.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmla v2.2d, v17.2d, v16.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmls z2.s, p6/m, z17.s, z16.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmls z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaa 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmls z2.d, p6/m, z17.d, z16.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xaa 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmls z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmls z2.s, p6/m, z17.s, z16.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xab 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmls z2.d, p6/m, z17.d, z16.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmls v2.4s, v17.4s, v16.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fmls z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xac 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmls v2.2d, v17.2d, v16.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xac 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fmls z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmadd213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmls v2.4s, v17.4s, v16.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xad 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v18.16b",
        "fmls v2.2d, v17.2d, v16.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmla z2.s, p6/m, z17.s, z16.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmla z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xae 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmla z2.d, p6/m, z17.d, z16.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xae 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z0.d, z18.d",
        "fnmla z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfnmsub213ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmla z2.s, p6/m, z17.s, z16.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub213sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xaf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z18.d",
        "fnmla z2.d, p6/m, z17.d, z16.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmla v2.4s, v17.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xb8 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmla v2.2d, v17.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xb8 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmla z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vfmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmla v2.4s, v17.4s, v18.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xb9 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmla v2.2d, v17.2d, v18.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmls z2.s, p6/m, z17.s, z18.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmls z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xba 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmls z2.d, p6/m, z17.d, z18.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xba 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmls z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vfmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmls z2.s, p6/m, z17.s, z18.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmls z2.d, p6/m, z17.d, z18.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmls v2.4s, v17.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfnmadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmls v2.2d, v17.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xbc 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fmls z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vfnmadd231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmls v2.4s, v17.4s, v18.4s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmadd231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "fmls v2.2d, v17.2d, v18.2d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmla z2.s, p6/m, z17.s, z18.s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmla z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfnmsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xbe 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmla z2.d, p6/m, z17.d, z18.d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xbe 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fnmla z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vfnmsub231ss xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmla z2.s, p6/m, z17.s, z18.s",
        "mov v0.16b, v16.16b",
        "mov v0.s[0], v2.s[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfnmsub231sd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xbf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.d, z16.d",
        "fnmla z2.d, p6/m, z17.d, z18.d",
        "mov v0.16b, v16.16b",
        "mov v0.d[0], v2.d[0]",
        "mov v2.16b, v0.16b",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v18.16b, v2.16b",
        "fmla v2.4s, v17.4s, v16.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2368]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmaddsub213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v18.16b, v2.16b",
        "fmla v2.2d, v17.2d, v16.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2384]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsubadd213ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v18.16b, v2.16b",
        "fmla v2.4s, v17.4s, v16.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd213ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2400]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.s, p7/m, z17.s, z16.s",
        "mov z16.d, z0.d"
      ]
    },
    "vfmsubadd213pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xa7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v18.16b, v2.16b",
        "fmla v2.2d, v17.2d, v16.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd213pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 2 0b01 0xa7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2416]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z18.d, z2.d",
        "mov z0.d, z2.d",
        "fmla z0.d, p7/m, z17.d, z16.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmaddsub231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3040]",
        "eor v2.16b, v16.16b, v2.16b",
        "fmla v2.4s, v17.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2368]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z16.d, z2.d",
        "mov z16.d, z2.d",
        "fmla z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfmaddsub231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xb6 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3072]",
        "eor v2.16b, v16.16b, v2.16b",
        "fmla v2.2d, v17.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmaddsub231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb6 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2384]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z16.d, z2.d",
        "mov z16.d, z2.d",
        "fmla z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vfmsubadd231ps xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3104]",
        "eor v2.16b, v16.16b, v2.16b",
        "fmla v2.4s, v17.4s, v18.4s",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd231ps ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2400]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z16.d, z2.d",
        "mov z16.d, z2.d",
        "fmla z16.s, p7/m, z17.s, z18.s"
      ]
    },
    "vfmsubadd231pd xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xb7 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3136]",
        "eor v2.16b, v16.16b, v2.16b",
        "fmla v2.2d, v17.2d, v18.2d",
        "mov v16.16b, v2.16b"
      ]
    },
    "vfmsubadd231pd ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xb7 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x0, [x28, #2416]",
        "ld1b {z2.b}, p7/z, [x0]",
        "eor z2.d, z16.d, z2.d",
        "mov z16.d, z2.d",
        "fmla z16.d, p7/m, z17.d, z18.d"
      ]
    },
    "vaesimc xmm0, xmm1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xdb 128-bit"
      ],
      "ExpectedArm64ASM": [
        "aesimc v16.16b, v17.16b"
      ]
    },
    "vaesenc xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xdc 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "aesmc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b"
      ]
    },
    "vaesenc ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 2 0b01 0xdc 256-bit"
      ]
    },
    "vaesenclast xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xdd 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aese v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b"
      ]
    },
    "vaesenclast ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 2 0b01 0xdd 256-bit"
      ]
    },
    "vaesdec xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b01 0xde 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "aesimc v0.16b, v0.16b",
        "eor v16.16b, v0.16b, v18.16b"
      ]
    },
    "vaesdec ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 2 0b01 0xde 256-bit"
      ]
    },
    "vaesdeclast xmm0, xmm1, xmm2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v0.16b, v17.16b",
        "aesd v0.16b, v2.16b",
        "eor v16.16b, v0.16b, v18.16b"
      ]
    },
    "vaesdeclast ymm0, ymm1, ymm2": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 2 0b01 0xdf 256-bit"
      ]
    },
    "andn eax, ebx, ecx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 32-bit"
      ],
      "ExpectedArm64ASM": [
        "bic w4, w7, w6",
        "subs w26, w4, #0x0 (0)"
      ]
    },
    "andn rax, rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b00 0xf2 64-bit"
      ],
      "ExpectedArm64ASM": [
        "bic x4, x7, x6",
        "subs x26, x4, #0x0 (0)"
      ]
    },
    "bzhi eax, ebx, ecx": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b00 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl w20, w20, w7",
        "bic w20, w6, w20",
        "tst x7, #0xe0",
        "csel w4, w6, w20, ne",
        "cset x20, eq",
        "cmp w4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "bzhi rax, rbx, rcx": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 2 0b00 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xffffffffffffffff",
        "lsl x20, x20, x7",
        "bic x20, x6, x20",
        "tst x7, #0xc0",
        "csel x4, x6, x20, ne",
        "cset x20, eq",
        "cmp x4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "pext eax, ebx, ecx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b10 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "cbz w7, #+0x2c",
        "mov w0, w7",
        "mov w2, w6",
        "mov w4, wzr",
        "cbz w0, #+0x20",
        "clz w1, w0",
        "lsl w2, w2, w1",
        "lsl w0, w0, w1",
        "extr w4, w4, w2, #31",
        "bfc w0, #31, #1",
        "b #-0x18",
        "mov w4, wzr"
      ]
    },
    "pext rax, rbx, rcx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b10 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "cbz x7, #+0x2c",
        "mov x0, x7",
        "mov x2, x6",
        "mov x4, xzr",
        "cbz x0, #+0x20",
        "clz x1, x0",
        "lsl x2, x2, x1",
        "lsl x0, x0, x1",
        "extr x4, x4, x2, #63",
        "bfc x0, #63, #1",
        "b #-0x18",
        "mov x4, xzr"
      ]
    },
    "pdep eax, ebx, ecx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov w4, #0x0",
        "cbz w7, #+0x2c",
        "neg w2, w1",
        "and w2, w2, w1",
        "sbfx w3, w0, #0, #1",
        "eor w1, w1, w2",
        "and w2, w3, w2",
        "neg w3, w1",
        "orr w4, w4, w2",
        "lsr w0, w0, #1",
        "and w2, w1, w3",
        "cbnz w2, #-0x1c"
      ]
    },
    "pdep rax, rbx, rcx": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 2 0b11 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x0, x6",
        "mov x1, x7",
        "mov x4, #0x0",
        "cbz x7, #+0x2c",
        "neg x2, x1",
        "and x2, x2, x1",
        "sbfx x3, x0, #0, #1",
        "eor x1, x1, x2",
        "and x2, x3, x2",
        "neg x3, x1",
        "orr x4, x4, x2",
        "lsr x0, x0, #1",
        "and x2, x1, x3",
        "cbnz x2, #-0x1c"
      ]
    },
    "mulx eax, ebx, ecx": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 2 0b11 0xf6 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mul w6, w7, w5",
        "ubfx x0, x7, #0, #32",
        "ubfx x1, x5, #0, #32",
        "mul x4, x0, x1",
        "lsr x4, x4, #32"
      ]
    },
    "mulx eax, eax, ebx": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Same two destinations should only compute high part",
        "Map 2 0b11 0xf6 32-bit"
      ],
      "ExpectedArm64ASM": [
        "ubfx x0, x6, #0, #32",
        "ubfx x1, x5, #0, #32",
        "mul x4, x0, x1",
        "lsr x4, x4, #32"
      ]
    },
    "mulx eax, ebx, [ecx]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 2 0b11 0xf6 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w7",
        "ldr w20, [x20]",
        "mul w6, w20, w5",
        "ubfx x0, x20, #0, #32",
        "ubfx x1, x5, #0, #32",
        "mul x4, x0, x1",
        "lsr x4, x4, #32"
      ]
    },
    "mulx rax, rbx, rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b11 0xf6 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mul x6, x7, x5",
        "umulh x4, x7, x5"
      ]
    },
    "mulx rax, rax, rbx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Same two destinations should only compute high part",
        "Map 2 0b11 0xf6 64-bit"
      ],
      "ExpectedArm64ASM": [
        "umulh x4, x6, x5"
      ]
    },
    "mulx rax, rbx, [rcx]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b11 0xf6 64-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x7]",
        "mul x6, x20, x5",
        "umulh x4, x20, x5"
      ]
    },
    "bextr eax, ebx, ecx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb w20, w7",
        "lsr w21, w6, w20",
        "mov w22, #0x0",
        "cmp w20, #0x1f (31)",
        "csel w20, w21, w22, ls",
        "ubfx w21, w7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl w22, w22, w21",
        "bic w22, w20, w22",
        "cmp w21, #0x1f (31)",
        "csel w4, w22, w20, ls",
        "cmp w4, #0x0 (0)"
      ]
    },
    "bextr rax, rbx, rcx": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 2 0b00 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "uxtb x20, w7",
        "lsr x21, x6, x20",
        "mov w22, #0x0",
        "cmp x20, #0x3f (63)",
        "csel x20, x21, x22, ls",
        "ubfx x21, x7, #8, #8",
        "mov x22, #0xffffffffffffffff",
        "lsl x22, x22, x21",
        "bic x22, x20, x22",
        "cmp x21, #0x3f (63)",
        "csel x4, x22, x20, ls",
        "cmp x4, #0x0 (0)"
      ]
    },
    "shlx eax, ebx, ecx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "lsl w4, w6, w7"
      ]
    },
    "shlx eax, [ebx], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b01 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldr w20, [x20]",
        "lsl w4, w20, w7"
      ]
    },
    "shlx rax, rbx, rcx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b01 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "lsl x4, x6, x7"
      ]
    },
    "shlx rax, [rbx], rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b01 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x6]",
        "lsl x4, x20, x7"
      ]
    },
    "sarx eax, ebx, ecx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b10 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "asr w4, w6, w7"
      ]
    },
    "sarx eax, [ebx], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b10 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldr w20, [x20]",
        "asr w4, w20, w7"
      ]
    },
    "sarx rax, rbx, rcx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b10 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "asr x4, x6, x7"
      ]
    },
    "sarx rax, [rbx], rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b10 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x6]",
        "asr x4, x20, x7"
      ]
    },
    "shrx eax, ebx, ecx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b11 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "lsr w4, w6, w7"
      ]
    },
    "shrx eax, [ebx], ecx": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 2 0b11 0xf7 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w20, w6",
        "ldr w20, [x20]",
        "lsr w4, w20, w7"
      ]
    },
    "shrx rax, rbx, rcx": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 2 0b11 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "lsr x4, x6, x7"
      ]
    },
    "shrx rax, [rbx], rcx": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 2 0b11 0xf7 64-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr x20, [x6]",
        "lsr x4, x20, x7"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map2_svebitperm.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256",
      "SVE128",
      "SVEBITPERM"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "pext eax, ebx, ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b10 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, w6",
        "fmov s1, w7",
        "bext z0.s, z0.s, z1.s",
        "mov w4, v0.s[0]"
      ]
    },
    "pext rax, rbx, rcx": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b10 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov d0, x6",
        "fmov d1, x7",
        "bext z0.d, z0.d, z1.d",
        "mov x4, v0.d[0]"
      ]
    },
    "pdep eax, ebx, ecx": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b11 0xf5 32-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov s0, w6",
        "fmov s1, w7",
        "bdep z0.s, z0.s, z1.s",
        "mov w4, v0.s[0]"
      ]
    },
    "pdep rax, rbx, rcx": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 2 0b11 0xf5 64-bit"
      ],
      "ExpectedArm64ASM": [
        "fmov d0, x6",
        "fmov d1, x7",
        "bdep z0.d, z0.d, z1.d",
        "mov x4, v0.d[0]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map3.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256",
      "SVE128"
    ],
    "DisabledHostFeatures": [
      "AFP"
    ]
  },
  "Instructions": {
    "vpermq ymm0, ymm1, 1": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 2": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 3": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 4": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 5": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 6": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 7": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 8": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 9": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 10": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 11": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 12": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 13": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 14": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermq ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, d17"
      ]
    },
    "vpermq ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[1]"
      ]
    },
    "vpermq ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[2]"
      ]
    },
    "vpermq ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x00 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[3]"
      ]
    },
    "vpermpd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, d17"
      ]
    },
    "vpermpd ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[1]"
      ]
    },
    "vpermpd ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[2]"
      ]
    },
    "vpermpd ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x01 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, z17.d[3]"
      ]
    },
    "vpblendd xmm0, xmm1, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0011b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0100b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0101b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0110b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 0111b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v16.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1000b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1011b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v16.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1100b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1101b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v16.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1110b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v16.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendd xmm0, xmm1, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x02 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpblendd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": []
    },
    "vpblendd ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[7]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpblendd ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, s16",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z16.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpblendd ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x02 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpermilps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[0]",
        "mov v2.s[1], v17.s[0]",
        "mov v2.s[2], v17.s[0]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[0]"
      ]
    },
    "vpermilps xmm0, xmm1, 01010101b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[1]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v17.s[1]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[1]"
      ]
    },
    "vpermilps xmm0, xmm1, 10101010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[2]",
        "mov v2.s[1], v17.s[2]",
        "mov v2.s[2], v17.s[2]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[2]"
      ]
    },
    "vpermilps xmm0, xmm1, 11111111b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x03 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v17.s[3]",
        "mov v2.s[1], v17.s[3]",
        "mov v2.s[2], v17.s[3]",
        "mov v16.16b, v2.16b",
        "mov v16.s[3], v17.s[3]"
      ]
    },
    "vpermilps ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s17",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpermilps ymm0, ymm1, 01010101b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpermilps ymm0, ymm1, 10101010b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpermilps ymm0, ymm1, 11111111b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x03 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[7]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd xmm0, xmm1, 00b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v17.d[0]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[0]"
      ]
    },
    "vpermilpd xmm0, xmm1, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v17.d[1]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[0]"
      ]
    },
    "vpermilpd xmm0, xmm1, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v17.d[0]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "vpermilpd xmm0, xmm1, 11b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x05 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v17.d[1]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "vpermilpd ymm0, ymm1, 0000b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0001b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0010b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0011b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0100b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0101b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0110b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 0111b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1000b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1001b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1010b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1011b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1100b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1101b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1110b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vpermilpd ymm0, ymm1, 1111b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x05 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00000011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00010011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00100011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00110011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00001000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00011000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00101000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 00111000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10001000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2f128 ymm0, ymm1, ymm2, 10000011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x06 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vroundps xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.4s, v17.4s"
      ]
    },
    "vroundps xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.4s, v17.4s"
      ]
    },
    "vroundps xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.4s, v17.4s"
      ]
    },
    "vroundps xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.4s, v17.4s"
      ]
    },
    "vroundps xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x08 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.4s, v17.4s"
      ]
    },
    "vroundps ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn z16.s, p7/m, z17.s"
      ]
    },
    "vroundps ymm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm z16.s, p7/m, z17.s"
      ]
    },
    "vroundps ymm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp z16.s, p7/m, z17.s"
      ]
    },
    "vroundps ymm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz z16.s, p7/m, z17.s"
      ]
    },
    "vroundps ymm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x08 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z16.s, p7/m, z17.s"
      ]
    },
    "vroundpd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn v16.2d, v17.2d"
      ]
    },
    "vroundpd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm v16.2d, v17.2d"
      ]
    },
    "vroundpd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp v16.2d, v17.2d"
      ]
    },
    "vroundpd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz v16.2d, v17.2d"
      ]
    },
    "vroundpd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x09 128-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti v16.2d, v17.2d"
      ]
    },
    "vroundpd ymm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintn z16.d, p7/m, z17.d"
      ]
    },
    "vroundpd ymm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintm z16.d, p7/m, z17.d"
      ]
    },
    "vroundpd ymm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintp z16.d, p7/m, z17.d"
      ]
    },
    "vroundpd ymm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frintz z16.d, p7/m, z17.d"
      ]
    },
    "vroundpd ymm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x09 256-bit"
      ],
      "ExpectedArm64ASM": [
        "frinti z16.d, p7/m, z17.d"
      ]
    },
    "vroundss xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintn s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vroundss xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintm s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vroundss xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintp s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vroundss xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintz s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vroundss xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frinti s0, s17",
        "mov v16.s[0], v0.s[0]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintn d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintm d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintp d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frintz d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vroundsd xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x0b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v16.16b",
        "frinti d0, d17",
        "mov v16.d[0], v0.d[0]"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 0001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.s[0], v18.s[0]",
        "mov v2.s[1], v17.s[1]",
        "mov v2.s[2], v17.s[2]",
        "mov v2.s[3], v17.s[3]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vblendps xmm0, xmm1, xmm2, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 50,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.s, s18",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[1]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[2]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[3]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[4]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[5]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z17.s[6]",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, z18.s[7]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vblendps ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z18.d"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 00b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 01b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v18.d[0]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v17.d[1]"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 10b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.d[0], v17.d[0]",
        "mov v16.16b, v2.16b",
        "mov v16.d[1], v18.d[1]"
      ]
    },
    "vblendpd xmm0, xmm1, xmm2, 11b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0d 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v18.16b"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0001b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0010b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0011b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0100b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0101b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0110b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 0111b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1000b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1001b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1010b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1011b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1100b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1101b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d18",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z17.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1110b": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.d, d17",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-2",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[1]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #-1",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[2]",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #0",
        "mov z2.d, p0/m, z1.d",
        "msr nzcv, x0",
        "mov z1.d, z18.d[3]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.d, #-2, #1",
        "cmpeq p0.d, p7/z, z0.d, #1",
        "mov z16.d, p0/m, z1.d",
        "msr nzcv, x0"
      ]
    },
    "vblendpd ymm0, ymm1, ymm2, 1111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0d 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z18.d"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 00000001b": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov v2.h[0], v18.h[0]",
        "mov v2.h[1], v17.h[1]",
        "mov v2.h[2], v17.h[2]",
        "mov v2.h[3], v17.h[3]",
        "mov v2.h[4], v17.h[4]",
        "mov v2.h[5], v17.h[5]",
        "mov v2.h[6], v17.h[6]",
        "mov v2.h[7], v17.h[7]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpblendw xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v18.16b"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 98,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.h, h18",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-8",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[1]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-7",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[2]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[3]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[4]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[5]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[6]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[7]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #-1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z18.h[8]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #0",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[9]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #1",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[10]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #2",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[11]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #3",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[12]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #4",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[13]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #5",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[14]",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #6",
        "mov z2.h, p0/m, z1.h",
        "msr nzcv, x0",
        "mov z1.h, z17.h[15]",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.h, #-8, #1",
        "cmpeq p0.h, p7/z, z0.h, #7",
        "mov z16.h, p0/m, z1.h",
        "msr nzcv, x0"
      ]
    },
    "vpblendw ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0e 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z18.d"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v18.16b"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 1": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v18.16b, v17.16b, #1"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v16.16b, v18.16b, v17.16b, #15"
      ]
    },
    "vpalignr xmm0, xmm1, xmm2, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x0f 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "ext v16.16b, v17.16b, v0.16b, #0"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z18.d"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v2.16b, v18.16b, v17.16b, #1",
        "mov z1.q, z17.q[1]",
        "mov z3.d, z17.d",
        "mov z3.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z4.d, z18.d",
        "mov z4.b, p6/m, z1.b",
        "ext v4.16b, v4.16b, v3.16b, #1",
        "mov z1.q, q4",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 15": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "ext v2.16b, v18.16b, v17.16b, #15",
        "mov z1.q, z17.q[1]",
        "mov z3.d, z17.d",
        "mov z3.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z4.d, z18.d",
        "mov z4.b, p6/m, z1.b",
        "ext v4.16b, v4.16b, v3.16b, #15",
        "mov z1.q, q4",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpalignr ymm0, ymm1, ymm2, 16": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x0f 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v0.2d, #0x0",
        "ext v2.16b, v17.16b, v0.16b, #0",
        "mov z1.q, z17.q[1]",
        "mov z3.d, z17.d",
        "mov z3.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z4.d, z18.d",
        "mov z4.b, p6/m, z1.b",
        "movi v0.2d, #0x0",
        "ext v4.16b, v3.16b, v0.16b, #0",
        "mov z1.q, q4",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpextrb rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[0]"
      ]
    },
    "vpextrb rax, xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.b[15]"
      ]
    },
    "vpextrw rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[0]"
      ]
    },
    "vpextrw rax, xmm0, 7": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "umov w4, v16.h[7]"
      ]
    },
    "vpextrd rax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "vpextrd rax, xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "vpextrb [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[0], [x4]"
      ]
    },
    "vpextrb [rax], xmm0, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x14 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.b}[15], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[0], [x4]"
      ]
    },
    "vpextrw [rax], xmm0, 7": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x15 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.h}[7], [x4]"
      ]
    },
    "vpextrd [rax], xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[0], [x4]"
      ]
    },
    "vpextrd [rax], xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x16 128-bit"
      ],
      "ExpectedArm64ASM": [
        "st1 {v16.s}[3], [x4]"
      ]
    },
    "vextractps eax, xmm0, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[0]"
      ]
    },
    "vextractps eax, xmm0, 3": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x17 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, v16.s[3]"
      ]
    },
    "vinsertf128 ymm0, ymm1, xmm2, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.q, q18",
        "mov z16.d, z17.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vinsertf128 ymm0, ymm1, xmm2, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x18 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.q, q18",
        "mov z16.d, z17.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vextractf128 xmm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vextractf128 xmm0, ymm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x19 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff3fffff",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000001b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffffbfffff",
        "orr x0, x0, #0x800000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000010b": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff7fffff",
        "orr x0, x0, #0x400000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000011b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "orr x0, x20, #0xc00000",
        "msr fpcr, x0",
        "fcvtn v16.4h, v17.4s",
        "msr fpcr, x20"
      ]
    },
    "vcvtps2ph xmm0, xmm1, 00000100b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x1D 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtn v16.4h, v17.4s"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "nearest rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff3fffff",
        "msr fpcr, x0",
        "fcvtnt z2.h, p7/m, z17.s",
        "uzp2 z2.h, z2.h, z2.h",
        "msr fpcr, x20",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000001b": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "-inf rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffffbfffff",
        "orr x0, x0, #0x800000",
        "msr fpcr, x0",
        "fcvtnt z2.h, p7/m, z17.s",
        "uzp2 z2.h, z2.h, z2.h",
        "msr fpcr, x20",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000010b": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "+inf rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "and x0, x20, #0xffffffffff7fffff",
        "orr x0, x0, #0x400000",
        "msr fpcr, x0",
        "fcvtnt z2.h, p7/m, z17.s",
        "uzp2 z2.h, z2.h, z2.h",
        "msr fpcr, x20",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "truncate rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, fpcr",
        "orr x0, x20, #0xc00000",
        "msr fpcr, x0",
        "fcvtnt z2.h, p7/m, z17.s",
        "uzp2 z2.h, z2.h, z2.h",
        "msr fpcr, x20",
        "mov v16.16b, v2.16b"
      ]
    },
    "vcvtps2ph xmm0, ymm1, 00000100b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "host mode rounding",
        "Map 3 0b01 0x1D 256-bit"
      ],
      "ExpectedArm64ASM": [
        "fcvtnt z2.h, p7/m, z17.s",
        "uzp2 z2.h, z2.h, z2.h",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpinsrb xmm0, xmm0, eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.b[0], w4",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpinsrb xmm0, xmm1, eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.b[0], w4"
      ]
    },
    "vpinsrb xmm0, xmm1, eax, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x20 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.b[15], w4"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b0000))": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], v18.s[0]"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b00 << 6) | (0b00 << 4) | (0b1111))": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vinsertps xmm0, xmm1, xmm2, ((0b11 << 6) | (0b11 << 4) | (0b0000))": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x21 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[3], v18.s[3]"
      ]
    },
    "vpinsrd xmm0, xmm0, eax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.s[0], w4",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpinsrd xmm0, xmm1, eax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[0], w4"
      ]
    },
    "vpinsrd xmm0, xmm1, eax, 3": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.s[3], w4"
      ]
    },
    "vpinsrq xmm0, xmm0, rax, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v2.16b, v16.16b",
        "mov v2.d[0], x4",
        "mov v16.16b, v2.16b"
      ]
    },
    "vpinsrq xmm0, xmm1, rax, 0": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[0], x4"
      ]
    },
    "vpinsrq xmm0, xmm1, rax, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x22 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b",
        "mov v16.d[1], x4"
      ]
    },
    "vinserti128 ymm0, ymm1, xmm2, 0": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.q, q18",
        "mov z16.d, z17.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vinserti128 ymm0, ymm1, xmm2, 1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x38 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z1.q, q18",
        "mov z16.d, z17.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vextracti128 xmm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vextracti128 xmm0, ymm1, 1": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x39 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z2.q, z17.q[1]",
        "mov v16.16b, v2.16b"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdpps xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.4s, v17.4s, v18.4s",
        "faddv s2, p6, z2.s",
        "dup v16.4s, v2.s[0]"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 00001111b": {
      "ExpectedInstructionCount": 109,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul z3.s, z17.s, z18.s",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s2",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z3.s, p0/m, z1.s",
        "msr nzcv, x0",
        "movprfx z0, z3",
        "faddp z0.s, p7/m, z0.s, z2.s",
        "uzp1 z3.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z3.d, p6, z3.d, z1.d",
        "movprfx z0, z3",
        "faddp z0.s, p7/m, z0.s, z2.s",
        "uzp1 z3.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z3.d, p6, z3.d, z1.d",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdpps ymm0, ymm1, ymm2, 11111111b": {
      "ExpectedInstructionCount": 61,
      "Comment": [
        "Map 3 0b01 0x40 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "fmul z3.s, z17.s, z18.s",
        "movprfx z0, z3",
        "faddp z0.s, p7/m, z0.s, z2.s",
        "uzp1 z3.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z3.d, p6, z3.d, z1.d",
        "movprfx z0, z3",
        "faddp z0.s, p7/m, z0.s, z2.s",
        "uzp1 z3.s, z0.s, z0.s",
        "uzp2 z1.s, z0.s, z0.s",
        "splice z3.d, p6, z3.d, z1.d",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-4",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-3",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #-1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #0",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #1",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #2",
        "mov z2.s, p0/m, z1.s",
        "msr nzcv, x0",
        "mov z1.s, s3",
        "mov z16.d, z2.d",
        "mrs x0, nzcv",
        "index z0.s, #-4, #1",
        "cmpeq p0.s, p7/z, z0.s, #3",
        "mov z16.s, p0/m, z1.s",
        "msr nzcv, x0"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 00000000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 00001111b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 11110000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vdppd xmm0, xmm1, xmm2, 11111111b": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x41 128-bit"
      ],
      "ExpectedArm64ASM": [
        "fmul v2.2d, v17.2d, v18.2d",
        "faddv d2, p6, z2.d",
        "dup v16.2d, v2.d[0]"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 000b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 001b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 010b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 011b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 100b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 101b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 110b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw xmm0, xmm1, xmm2, 111b": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "Map 3 0b01 0x42 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v16.8h, v4.8h, v2.8h"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 000b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 001b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 010b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 011b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #0",
        "ext v4.16b, v17.16b, v17.16b, #1",
        "ext v5.16b, v17.16b, v17.16b, #2",
        "ext v6.16b, v17.16b, v17.16b, #3",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 100b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[0]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 101b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[1]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 110b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[2]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vmpsadbw ymm0, ymm1, ymm2, 111b": {
      "ExpectedInstructionCount": 34,
      "Comment": [
        "Map 3 0b01 0x42 256-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v2.4s, v18.s[3]",
        "ext v3.16b, v17.16b, v17.16b, #4",
        "ext v4.16b, v17.16b, v17.16b, #5",
        "ext v5.16b, v17.16b, v17.16b, #6",
        "ext v6.16b, v17.16b, v17.16b, #7",
        "uabdl v3.8h, v3.8b, v2.8b",
        "uabdl v4.8h, v4.8b, v2.8b",
        "uabdl v5.8h, v5.8b, v2.8b",
        "uabdl v2.8h, v6.8b, v2.8b",
        "addp v3.8h, v3.8h, v5.8h",
        "addp v2.8h, v4.8h, v2.8h",
        "trn1 v4.4s, v3.4s, v2.4s",
        "trn2 v2.4s, v3.4s, v2.4s",
        "addp v2.8h, v4.8h, v2.8h",
        "mov z3.q, z17.q[1]",
        "mov z4.q, z18.q[1]",
        "dup v4.4s, v4.s[0]",
        "ext v5.16b, v3.16b, v3.16b, #0",
        "ext v6.16b, v3.16b, v3.16b, #1",
        "ext v7.16b, v3.16b, v3.16b, #2",
        "ext v3.16b, v3.16b, v3.16b, #3",
        "uabdl v5.8h, v5.8b, v4.8b",
        "uabdl v6.8h, v6.8b, v4.8b",
        "uabdl v7.8h, v7.8b, v4.8b",
        "uabdl v3.8h, v3.8b, v4.8b",
        "addp v4.8h, v5.8h, v7.8h",
        "addp v3.8h, v6.8h, v3.8h",
        "trn1 v5.4s, v4.4s, v3.4s",
        "trn2 v3.4s, v4.4s, v3.4s",
        "addp v3.8h, v5.8h, v3.8h",
        "mov z1.q, q3",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 00000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "pmull v16.1q, v17.1d, v18.1d"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 00001b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v17.d[1]",
        "pmull v16.1q, v0.1d, v18.1d"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 10000b": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "dup v0.2d, v18.d[1]",
        "pmull v16.1q, v0.1d, v17.1d"
      ]
    },
    "vpclmulqdq xmm0, xmm1, xmm2, 10001b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x44 128-bit"
      ],
      "ExpectedArm64ASM": [
        "pmull2 v16.1q, v17.2d, v18.2d"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 00000b": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 00001b": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 10000b": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ]
    },
    "vpclmulqdq ymm0, ymm1, ymm2, 10001b": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x44 256-bit"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00000011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00010011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00100011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110000b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110001b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110010b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00110011b": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z2.b, p6/m, z1.b",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00001000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00011000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00101000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 00111000b": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10001000b": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000000b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q17",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000001b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z17.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000010b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, q18",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vperm2i128 ymm0, ymm1, ymm2, 10000011b": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "Map 3 0b01 0x46 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "mov z1.q, z18.q[1]",
        "mov z16.d, z2.d",
        "mov z16.b, p6/m, z1.b"
      ]
    },
    "vblendvps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x4a 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.4s, v19.4s, #31",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b"
      ]
    },
    "vblendvps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x4a 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z19",
        "asr z2.s, p7/m, z2.s, #31",
        "movprfx z0, z18",
        "bsl z0.d, z0.d, z17.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vblendvpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x4b 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.2d, v19.2d, #63",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b"
      ]
    },
    "vblendvpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x4b 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z19",
        "asr z2.d, p7/m, z2.d, #63",
        "movprfx z0, z18",
        "bsl z0.d, z0.d, z17.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vpblendvb xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map 3 0b01 0x4c 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v2.16b, v19.16b, #7",
        "mov v16.16b, v2.16b",
        "bsl v16.16b, v18.16b, v17.16b"
      ]
    },
    "vpblendvb ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0x4c 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z2, z19",
        "asr z2.b, p7/m, z2.b, #7",
        "movprfx z0, z18",
        "bsl z0.d, z0.d, z17.d, z2.d",
        "mov z16.d, z0.d"
      ]
    },
    "vfmaddsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5c 128-bit"
      ]
    },
    "vfmaddsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5c 256-bit"
      ]
    },
    "vfmaddsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5d 128-bit"
      ]
    },
    "vfmaddsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5d 256-bit"
      ]
    },
    "vfmsubaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5e 128-bit"
      ]
    },
    "vfmsubaddps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5e 256-bit"
      ]
    },
    "vfmsubaddpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5f 128-bit"
      ]
    },
    "vfmsubaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x5f 256-bit"
      ]
    },
    "vfmaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x68 128-bit"
      ]
    },
    "vfmaddps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x68 256-bit"
      ]
    },
    "vfmaddpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x69 128-bit"
      ]
    },
    "vfmaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x69 256-bit"
      ]
    },
    "vfmaddss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6a 128-bit"
      ]
    },
    "vfmaddsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6b 128-bit"
      ]
    },
    "vfmsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6c 128-bit"
      ]
    },
    "vfmsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6c 256-bit"
      ]
    },
    "vfmsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6d 128-bit"
      ]
    },
    "vfmsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6d 256-bit"
      ]
    },
    "vfmsubss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6e 128-bit"
      ]
    },
    "vfmsubsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x6f 128-bit"
      ]
    },
    "vfnmaddps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x78 128-bit"
      ]
    },
    "vfnmaddpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x78 256-bit"
      ]
    },
    "vfnmaddss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x79 128-bit"
      ]
    },
    "vfnmaddsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7a 128-bit"
      ]
    },
    "vfnmsubps xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7c 128-bit"
      ]
    },
    "vfnmsubps ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7c 256-bit"
      ]
    },
    "vfnmsubpd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7d 128-bit"
      ]
    },
    "vfnmsubpd ymm0, ymm1, ymm2, ymm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7d 256-bit"
      ]
    },
    "vfnmsubss xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7e 128-bit"
      ]
    },
    "vfnmsubsd xmm0, xmm1, xmm2, xmm3": {
      "ExpectedInstructionCount": -1,
      "Skip": "Yes",
      "Comment": [
        "Map 3 0b01 0x7f 128-bit"
      ]
    },
    "vaeskeygenassist xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "Map 3 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3184]",
        "movi v3.2d, #0x0",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v3.16b",
        "tbl v16.16b, {v16.16b}, v2.16b"
      ]
    },
    "vaeskeygenassist xmm0, xmm1, 0xFF": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map 3 0b01 0xdf 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3184]",
        "movi v3.2d, #0x0",
        "mov v16.16b, v17.16b",
        "aese v16.16b, v3.16b",
        "tbl v16.16b, {v16.16b}, v2.16b",
        "mov x0, #0xff00000000",
        "dup v1.2d, x0",
        "eor v16.16b, v16.16b, v1.16b"
      ]
    },
    "rorx eax, ebx, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w6"
      ]
    },
    "rorx eax, eax, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w4"
      ]
    },
    "rorx eax, ebx, 31": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 32-bit"
      ],
      "ExpectedArm64ASM": [
        "ror w4, w6, #31"
      ]
    },
    "rorx eax, ebx, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w6"
      ]
    },
    "rorx eax, eax, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 32-bit"
      ],
      "ExpectedArm64ASM": [
        "mov w4, w4"
      ]
    },
    "rorx rax, rbx, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x4, x6"
      ]
    },
    "rorx rax, rax, 0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "Map 3 0b11 0xf0 64-bit"
      ],
      "ExpectedArm64ASM": []
    },
    "rorx rax, rbx, 63": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 64-bit"
      ],
      "ExpectedArm64ASM": [
        "ror x4, x6, #63"
      ]
    },
    "rorx rax, rbx, 64": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map 3 0b11 0xf0 64-bit"
      ],
      "ExpectedArm64ASM": [
        "mov x4, x6"
      ]
    },
    "rorx rax, rax, 64": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "Map 3 0b11 0xf0 64-bit"
      ],
      "ExpectedArm64ASM": []
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/VEX_map_group.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE256"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "vpsrlw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.8h, v17.8h, #15"
      ]
    },
    "vpsrlw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrlw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsrlw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsr z16.h, p7/m, z16.h, #15"
      ]
    },
    "vpsrlw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsraw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsraw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.8h, v17.8h, #15"
      ]
    },
    "vpsraw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.8h, v17.8h, #15"
      ]
    },
    "vpsraw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsraw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "asr z16.h, p7/m, z16.h, #15"
      ]
    },
    "vpsraw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "asr z16.h, p7/m, z16.h, #15"
      ]
    },
    "vpsllw xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllw xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.8h, v17.8h, #15"
      ]
    },
    "vpsllw xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsllw ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsllw ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsl z16.h, p7/m, z16.h, #15"
      ]
    },
    "vpsllw ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 12 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrld xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrld xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.4s, v17.4s, #31"
      ]
    },
    "vpsrld xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrld ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsrld ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsr z16.s, p7/m, z16.s, #31"
      ]
    },
    "vpsrld ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrad xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrad xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.4s, v17.4s, #31"
      ]
    },
    "vpsrad xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b100 128-bit"
      ],
      "ExpectedArm64ASM": [
        "sshr v16.4s, v17.4s, #31"
      ]
    },
    "vpsrad ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsrad ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "asr z16.s, p7/m, z16.s, #31"
      ]
    },
    "vpsrad ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b100 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "asr z16.s, p7/m, z16.s, #31"
      ]
    },
    "vpslld xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslld xmm0, xmm1, 31": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.4s, v17.4s, #31"
      ]
    },
    "vpslld xmm0, xmm1, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpslld ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpslld ymm0, ymm1, 31": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsl z16.s, p7/m, z16.s, #31"
      ]
    },
    "vpslld ymm0, ymm1, 32": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 13 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrlq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrlq xmm0, xmm1, 63": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "ushr v16.2d, v17.2d, #63"
      ]
    },
    "vpsrlq xmm0, xmm1, 64": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b010 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrlq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsrlq ymm0, ymm1, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsr z16.d, p7/m, z16.d, #63"
      ]
    },
    "vpsrlq ymm0, ymm1, 64": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b010 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrldq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsrldq xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v17.16b, v2.16b, #15"
      ]
    },
    "vpsrldq xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b011 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsrldq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsrldq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v3.16b, v17.16b, v2.16b, #15",
        "movprfx z1, z17",
        "ext z1.b, z1.b, z2.b, #31",
        "mov z2.d, z1.d",
        "mov z1.q, q2",
        "mov z16.d, z3.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpsrldq ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b011 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsllq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpsllq xmm0, xmm1, 63": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "shl v16.2d, v17.2d, #63"
      ]
    },
    "vpsllq xmm0, xmm1, 64": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b110 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpsllq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpsllq ymm0, ymm1, 63": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movprfx z16, z17",
        "lsl z16.d, p7/m, z16.d, #63"
      ]
    },
    "vpsllq ymm0, ymm1, 64": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b110 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpslldq xmm0, xmm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "mov v16.16b, v17.16b"
      ]
    },
    "vpslldq xmm0, xmm1, 15": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v16.16b, v2.16b, v17.16b, #1"
      ]
    },
    "vpslldq xmm0, xmm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b111 128-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vpslldq ymm0, ymm1, 0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "mov z16.d, p7/m, z17.d"
      ]
    },
    "vpslldq ymm0, ymm1, 15": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ext v3.16b, v2.16b, v17.16b, #1",
        "ext z2.b, z2.b, z17.b, #17",
        "mov z1.q, q2",
        "mov z16.d, z3.d",
        "not p0.b, p7/z, p6.b",
        "mov z16.b, p0/m, z1.b"
      ]
    },
    "vpslldq ymm0, ymm1, 16": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "Map group 14 0b111 256-bit"
      ],
      "ExpectedArm64ASM": [
        "movi v16.2d, #0x0"
      ]
    },
    "vldmxcsr [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "Map group 15 0b010"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "and w20, w20, #0xffc0",
        "str w20, [x28, #972]",
        "ubfx w21, w20, #13, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0"
      ]
    },
    "vstmxcsr [rax]": {
      "ExpectedInstructionCount": 3,
      "Comment": [
        "Map group 15 0b011"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x28, #972]",
        "and w20, w20, #0xffc0",
        "str w20, [x4]"
      ]
    },
    "blsr eax, ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map group 17 0b001 32-bit"
      ],
      "ExpectedArm64ASM": [
        "sub w20, w6, #0x1 (1)",
        "and w4, w20, w6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp w4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "blsr rax, rbx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map group 17 0b001 64-bit"
      ],
      "ExpectedArm64ASM": [
        "sub x20, x6, #0x1 (1)",
        "and x4, x20, x6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp x4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "blsmsk eax, ebx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map group 17 0b010 32-bit"
      ],
      "ExpectedArm64ASM": [
        "sub w20, w6, #0x1 (1)",
        "eor w4, w20, w6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp w4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "blsmsk rax, rbx": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "Map group 17 0b010 64-bit"
      ],
      "ExpectedArm64ASM": [
        "sub x20, x6, #0x1 (1)",
        "eor x4, x20, x6",
        "cmp x6, #0x0 (0)",
        "cset x20, ne",
        "cmp x4, #0x0 (0)",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "blsi eax, ebx": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map group 17 0b011 32-bit"
      ],
      "ExpectedArm64ASM": [
        "neg w20, w6",
        "and w4, w6, w20",
        "cmp w4, #0x0 (0)",
        "cset x20, eq",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    },
    "blsi rax, rbx": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "Map group 17 0b011 64-bit"
      ],
      "ExpectedArm64ASM": [
        "neg x20, x6",
        "and x4, x6, x20",
        "cmp x4, #0x0 (0)",
        "cset x20, eq",
        "mrs x21, nzcv",
        "bfi w21, w20, #29, #1",
        "msr nzcv, x21"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/X87ldst-SVE.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [
      "SVE128",
      "SVE256"
    ],
    "DisabledHostFeatures": [
      "AFP",
      "FLAGM",
      "FLAGM2",
      "RPRES"
    ]
  },
  "Instructions": {
    "fstp tword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": "Single 80-bit store.",
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "st1h {z2.h}, p2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "2-store 80bit": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 20,
      "x86Insts": [
        "fstp tword [rax]",
        "fstp tword [rax+10]"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "st1h {z2.h}, p2, [x4]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x21, x4, #0xa (10)",
        "st1h {z2.h}, p2, [x21]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "8-store 80bit": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 48,
      "x86Insts": [
        "fstp tword [rax]",
        "fstp tword [rax+10]",
        "fstp tword [rax+20]",
        "fstp tword [rax+30]",
        "fstp tword [rax+40]",
        "fstp tword [rax+50]",
        "fstp tword [rax+60]",
        "fstp tword [rax+70]"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "st1h {z2.h}, p2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0xa (10)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0x14 (20)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0x1e (30)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0x28 (40)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0x32 (50)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x22, x4, #0x3c (60)",
        "st1h {z2.h}, p2, [x22]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x21, x4, #0x46 (70)",
        "st1h {z2.h}, p2, [x21]",
        "strb w20, [x28, #1051]",
        "strb wzr, [x28, #1202]"
      ]
    },
    "fld tword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": "Single 80-bit store.",
      "ExpectedArm64ASM": [
        "ld1h {z2.h}, p2/z, [x4]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "2-load 80bit": {
      "x86InstructionCount": 2,
      "ExpectedInstructionCount": 20,
      "x86Insts": [
        "fld tword [rax]",
        "fld tword [rax+10]"
      ],
      "ExpectedArm64ASM": [
        "ld1h {z2.h}, p2/z, [x4]",
        "add x20, x4, #0xa (10)",
        "ld1h {z3.h}, p2/z, [x20]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q3, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "8-load 80bit": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 49,
      "x86Insts": [
        "fld tword [rax]",
        "fld tword [rax+10]",
        "fld tword [rax+20]",
        "fld tword [rax+30]",
        "fld tword [rax+40]",
        "fld tword [rax+50]",
        "fld tword [rax+60]",
        "fld tword [rax+70]"
      ],
      "ExpectedArm64ASM": [
        "ld1h {z2.h}, p2/z, [x4]",
        "add x20, x4, #0xa (10)",
        "ld1h {z3.h}, p2/z, [x20]",
        "add x20, x4, #0x14 (20)",
        "ld1h {z4.h}, p2/z, [x20]",
        "add x20, x4, #0x1e (30)",
        "ld1h {z5.h}, p2/z, [x20]",
        "add x20, x4, #0x28 (40)",
        "ld1h {z6.h}, p2/z, [x20]",
        "add x20, x4, #0x32 (50)",
        "ld1h {z7.h}, p2/z, [x20]",
        "add x20, x4, #0x3c (60)",
        "ld1h {z8.h}, p2/z, [x20]",
        "add x20, x4, #0x46 (70)",
        "ld1h {z9.h}, p2/z, [x20]",
        "ldrb w20, [x28, #1051]",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q9, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q8, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q7, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q6, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q5, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q4, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "str q3, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "str q2, [x20, #1056]",
        "mov w20, #0xff",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/x87.json
================================================
{
  "Features": {
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "CSSC",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "fadd dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom dword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd8 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp dword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xd8 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom st0, st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xd8 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcom st0, st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp st0, st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd8 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xd8 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd8 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdiv st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st0, st0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd8 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st0, st1": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st3": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st4": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st5": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st6": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st0, st7": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd8 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fld dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd9 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov s0, s2",
        "ldr x0, [x28, #1632]",
        "ldr x3, [x28, #1640]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst dword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]"
      ]
    },
    "fstp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd9 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1664]",
        "ldr x3, [x28, #1672]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov s2, s0",
        "str s2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldenv [rax]": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "add x20, x4, #0x4 (4)",
        "ldr w20, [x20]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "add x20, x4, #0x8 (8)",
        "ldr w20, [x20]",
        "and w20, w20, w20, lsr #1",
        "mov w21, #0x55555555",
        "bic w20, w21, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]"
      ]
    },
    "fnstenv [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd9 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "str w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "str w20, [x4, #4]",
        "ldrb w20, [x28, #1202]",
        "orr w20, w20, w20, lsl #4",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsl #2",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsl #1",
        "and w20, w20, #0x55555555",
        "orr w20, w20, w20, lsl #1",
        "eor w20, w20, #0xffff",
        "str w20, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]"
      ]
    },
    "fnstcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]"
      ]
    },
    "fld st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st3": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st4": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st5": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st6": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fxch st0, st0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xd9 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1049]"
      ]
    },
    "fxch st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fxch st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str q3, [x21, #1056]",
        "str q2, [x20, #1056]"
      ]
    },
    "fnop": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xd9 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fchs": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xd9 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "ldr q3, [x28, #3552]",
        "eor v2.16b, v2.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fabs": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xd9 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "ldr q3, [x28, #3552]",
        "bic v2.16b, v2.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ftst": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "mov w20, #0x0",
        "fmov d3, x20",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fxam": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "ubfx x21, x21, #15, #1",
        "strb w21, [x28, #1049]",
        "ldrb w21, [x28, #1202]",
        "lsr w20, w21, w20",
        "and w20, w20, #0x1",
        "mrs x21, nzcv",
        "cmp w20, #0x1 (1)",
        "cset x22, ne",
        "strb w22, [x28, #1048]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fld1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3328]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2t": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3344]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2e": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3360]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldpi": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3376]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldlg2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3392]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldln2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3408]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldz": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "movi v2.2d, #0x0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "f2xm1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1856]",
        "ldr x3, [x28, #1864]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fyl2x": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2080]",
        "ldr x3, [x28, #2088]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]"
      ]
    },
    "fptan": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1872]",
        "ldr x3, [x28, #1880]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldr q3, [x28, #3328]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q3, [x22, #1056]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fpatan": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2096]",
        "ldr x3, [x28, #2104]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w20, [x28, #1051]",
        "str q2, [x21, #1056]"
      ]
    },
    "fxtract": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xd9 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1952]",
        "ldr x3, [x28, #1960]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1968]",
        "ldr x3, [x28, #1976]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q2, [x22, #1056]",
        "str q3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fprem1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2112]",
        "ldr x3, [x28, #2120]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fdecstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fincstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fprem": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2128]",
        "ldr x3, [x28, #2136]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fyl2xp1": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldr q2, [x28, #3328]",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2080]",
        "ldr x3, [x28, #2088]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "strb w20, [x28, #1051]",
        "str q3, [x22, #1056]",
        "str q2, [x21, #1056]"
      ]
    },
    "fsqrt": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1888]",
        "ldr x3, [x28, #1896]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsincos": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1936]",
        "ldr x3, [x28, #1944]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v3.16b, v0.16b",
        "mov v4.16b, v1.16b",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str q4, [x22, #1056]",
        "str q3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frndint": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1840]",
        "ldr x3, [x28, #1848]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fscale": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2144]",
        "ldr x3, [x28, #2152]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsin": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1904]",
        "ldr x3, [x28, #1912]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fcos": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1920]",
        "ldr x3, [x28, #1928]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb wzr, [x28, #1050]",
        "str q2, [x20, #1056]"
      ]
    },
    "fiadd dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fimul dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ficom dword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xda !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "ficomp dword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xda !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fisubr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidiv dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidivr dword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xda !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "str x30, [sp, #-16]!",
        "mov w1, w20",
        "ldr x0, [x28, #1824]",
        "ldr x3, [x28, #1832]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmove st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xd0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xda 11b 0xd8 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xd9 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xda /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdb /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdc /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdd /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xde /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdf /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fucompp": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xda 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild dword [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "sxtw x20, w20",
        "mrs x21, nzcv",
        "mov w22, #0x0",
        "cmp x20, #0x0 (0)",
        "mov w23, #0x8000",
        "csel x23, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov w24, #0x3f",
        "mov x0, #0x3f",
        "clz x30, x20",
        "sub x30, x0, x30",
        "sub x24, x24, x30",
        "lsl x30, x20, x24",
        "mov w18, #0x403e",
        "sub x24, x18, x24",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x24, eq",
        "orr x20, x23, x20",
        "fmov d2, x30",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdb !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1760]",
        "ldr x3, [x28, #1768]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist dword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1712]",
        "ldr x3, [x28, #1720]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "str w20, [x4]"
      ]
    },
    "fistp dword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1712]",
        "ldr x3, [x28, #1720]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld tword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp tword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcmovnb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fnclex": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xdb 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1040]"
      ]
    },
    "fninit": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdb 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fucomi st0, st0": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xdb 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st1": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st2": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st3": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st4": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st5": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st6": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fucomi st0, st7": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st0": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xdb 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st1": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st2": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st3": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st4": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st5": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st6": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fcomi st0, st7": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdb 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "eor x21, x21, #0x1",
        "mrs x23, nzcv",
        "bfi w23, w21, #29, #1",
        "bfi w23, w22, #30, #1",
        "eor w26, w20, #0x1",
        "strb w20, [x28, #1040]",
        "msr nzcv, x23"
      ]
    },
    "fadd qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fcom qword [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdc !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fcomp qword [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xdc !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr qword [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xdc !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fadd st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fadd st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fmul st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fmul st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xe0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fsubr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsubr st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fsubr st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xe8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fsub st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fsub st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xf0": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fdivr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdivr st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "fdivr st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xf8": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "fdiv st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st1, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st2, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st3, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st4, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st5, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st6, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fdiv st7, st0": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fld qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1776]",
        "ldr x3, [x28, #1784]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "str x21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst qword [rax]": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x4]"
      ]
    },
    "fstp qword [rax]": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frstor [rax]": {
      "ExpectedInstructionCount": 76,
      "Comment": [
        "0xdd !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w22, w20, #8, #1",
        "ubfx w23, w20, #9, #1",
        "ubfx w24, w20, #10, #1",
        "ubfx w30, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w22, [x28, #1048]",
        "strb w23, [x28, #1049]",
        "strb w24, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w22, #0x55555555",
        "bic w20, w22, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]",
        "mov x20, #0xffffffffffffffff",
        "mov w22, #0xffff",
        "fmov d2, x20",
        "fmov v2.D[1], x22",
        "ldur q3, [x4, #28]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x21, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #38]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr q3, [x4, #48]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #58]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #68]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #78]",
        "and v3.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #88]",
        "and v2.16b, v3.16b, v2.16b",
        "add x0, x28, x20, lsl #4",
        "str q2, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur d2, [x4, #98]",
        "ldr h3, [x4, #106]",
        "mov v2.h[4], v3.h[0]",
        "add x0, x28, x20, lsl #4",
        "str q2, [x0, #1056]"
      ]
    },
    "fnsave [rax]": {
      "ExpectedInstructionCount": 79,
      "Comment": [
        "0xdd !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrh w21, [x28, #1200]",
        "str w21, [x4]",
        "ldrb w21, [x28, #1051]",
        "lsl x21, x21, #11",
        "ldrb w22, [x28, #1048]",
        "orr x21, x21, x22, lsl #8",
        "ldrb w22, [x28, #1049]",
        "orr x21, x21, x22, lsl #9",
        "ldrb w22, [x28, #1050]",
        "orr x21, x21, x22, lsl #10",
        "ldrb w22, [x28, #1054]",
        "orr x21, x21, x22, lsl #14",
        "ldrb w22, [x28, #1040]",
        "orr x21, x21, x22",
        "str w21, [x4, #4]",
        "ldrb w21, [x28, #1202]",
        "orr w21, w21, w21, lsl #4",
        "and w21, w21, #0xf0f0f0f",
        "orr w21, w21, w21, lsl #2",
        "and w21, w21, #0x33333333",
        "orr w21, w21, w21, lsl #1",
        "and w21, w21, #0x55555555",
        "orr w21, w21, w21, lsl #1",
        "eor w21, w21, #0xffff",
        "str w21, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #28]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #38]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #58]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #68]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #78]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur q2, [x4, #88]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr q2, [x0, #1056]",
        "stur d2, [x4, #98]",
        "dup v2.8h, v2.h[4]",
        "str h2, [x4, #106]",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fnstsw [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4]"
      ]
    },
    "ffree st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdd 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st3": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st4": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st5": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st6": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st7": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xdd 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fst st1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st3": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st4": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st5": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st6": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st0": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdd 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str q2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucom st0": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdd 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st2": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st3": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st4": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st5": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st6": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucom st7": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "fucomp st0": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xdd 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st1": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdd 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w22, [x28, #1040]",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st2": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st3": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st4": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st5": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st6": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st7": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdd 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fiadd word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fimul word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "ficom word [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xde !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x20, x0",
        "ubfx x21, x20, #1, #1",
        "ubfx x22, x20, #0, #1",
        "ubfx x20, x20, #2, #1",
        "orr w21, w21, w20",
        "orr w22, w22, w20",
        "strb w21, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "strb w20, [x28, #1040]"
      ]
    },
    "ficomp word [rax]": {
      "ExpectedInstructionCount": 37,
      "Comment": [
        "0xde !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fisubr word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidiv word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "fidivr word [rax]": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "str x30, [sp, #-16]!",
        "sxth w1, w20",
        "ldr x0, [x28, #1808]",
        "ldr x3, [x28, #1816]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x20, #1056]"
      ]
    },
    "faddp st0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st3": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st4": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st5": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st6": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st7": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2016]",
        "ldr x3, [x28, #2024]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "0xde 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st1": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st2": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st3": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st4": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st5": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st6": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st7": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2048]",
        "ldr x3, [x28, #2056]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcompp": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xde 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "strb w21, [x28, #1040]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fsubrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr q3, [x23, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe8": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fsubp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2032]",
        "ldr x3, [x28, #2040]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf0": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fdivrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr q3, [x23, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf8": {
      "ExpectedInstructionCount": 20,
      "Comment": [
        "fdivp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st1, st0": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xde 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "strb w21, [x28, #1051]",
        "str q2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st2, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st3, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st4, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st5, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st6, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st7, st0": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xde 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #2064]",
        "ldr x3, [x28, #2072]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild word [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "mrs x21, nzcv",
        "mov w22, #0x0",
        "cmp x20, #0x0 (0)",
        "mov w23, #0x8000",
        "csel x23, x23, x22, lt",
        "cneg x20, x20, mi",
        "mov w24, #0x3f",
        "mov x0, #0x3f",
        "clz x30, x20",
        "sub x30, x0, x30",
        "sub x24, x24, x30",
        "lsl x30, x20, x24",
        "mov w18, #0x403e",
        "sub x24, x18, x24",
        "cmp x20, #0x0 (0)",
        "csel x20, x22, x24, eq",
        "orr x20, x23, x20",
        "fmov d2, x30",
        "fmov v2.D[1], x20",
        "msr nzcv, x21",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp word [rax]": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xdf !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "and x21, x21, #0x7fff",
        "mrs x22, nzcv",
        "tst x21, #0x7fff",
        "cset x23, eq",
        "mov w24, #0x400e",
        "cmp x21, x24",
        "cset x21, hs",
        "orr x21, x23, x21",
        "strb w21, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1744]",
        "ldr x3, [x28, #1752]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist word [rax]": {
      "ExpectedInstructionCount": 22,
      "Comment": [
        "0xdf !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr q2, [x20, #1056]",
        "mov x20, v2.d[1]",
        "and x20, x20, #0x7fff",
        "mrs x21, nzcv",
        "tst x20, #0x7fff",
        "cset x22, eq",
        "mov w23, #0x400e",
        "cmp x20, x23",
        "cset x20, hs",
        "orr x20, x22, x20",
        "strb w20, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1696]",
        "ldr x3, [x28, #1704]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w20, w0",
        "strh w20, [x4]",
        "msr nzcv, x21"
      ]
    },
    "fistp word [rax]": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xdf !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "mov x21, v2.d[1]",
        "and x21, x21, #0x7fff",
        "mrs x22, nzcv",
        "tst x21, #0x7fff",
        "cset x23, eq",
        "mov w24, #0x400e",
        "cmp x21, x24",
        "cset x21, hs",
        "orr x21, x23, x21",
        "strb w21, [x28, #1040]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1696]",
        "ldr x3, [x28, #1704]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov w21, w0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbld tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdf !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #2000]",
        "ldr x3, [x28, #2008]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str q2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbstp tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdf !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1984]",
        "ldr x3, [x28, #1992]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffreep st0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st5": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st6": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fnstsw ax": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "fucomip st0": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdf 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "eor x23, x23, #0x1",
        "mrs x30, nzcv",
        "bfi w30, w23, #29, #1",
        "bfi w30, w24, #30, #1",
        "eor w26, w22, #0x1",
        "strb w22, [x28, #1040]",
        "msr nzcv, x30",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st2": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st3": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st4": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st5": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st6": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st7": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st0": {
      "ExpectedInstructionCount": 31,
      "Comment": [
        "0xdf 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v2.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st1": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xdf 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr q3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x22, x0",
        "ubfx x23, x22, #1, #1",
        "ubfx x24, x22, #0, #1",
        "ubfx x22, x22, #2, #1",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "eor x23, x23, #0x1",
        "mrs x30, nzcv",
        "bfi w30, w23, #29, #1",
        "bfi w30, w24, #30, #1",
        "eor w26, w22, #0x1",
        "strb w22, [x28, #1040]",
        "msr nzcv, x30",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st2": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st3": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st4": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st5": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st6": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st7": {
      "ExpectedInstructionCount": 35,
      "Comment": [
        "0xdf 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr q2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr q3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "mov v1.16b, v3.16b",
        "ldr x0, [x28, #1792]",
        "ldr x3, [x28, #1800]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov x21, x0",
        "ubfx x22, x21, #1, #1",
        "ubfx x23, x21, #0, #1",
        "ubfx x21, x21, #2, #1",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "eor x22, x22, #0x1",
        "mrs x24, nzcv",
        "bfi w24, w22, #29, #1",
        "bfi w24, w23, #30, #1",
        "eor w26, w21, #0x1",
        "strb w21, [x28, #1040]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "msr nzcv, x24",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_32": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld dword [rax]",
        "fstp dword [rdx]",
        "fld dword [rax + 4]",
        "fstp dword [rdx + 4]",
        "fld dword [rax + 8]",
        "fstp dword [rdx + 8]",
        "fld dword [rax + 12]",
        "fstp dword [rdx + 12]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str s2, [x5]",
        "ldr s2, [x4, #4]",
        "str s2, [x5, #4]",
        "ldr s2, [x4, #8]",
        "str s2, [x5, #8]",
        "ldr s2, [x4, #12]",
        "str s2, [x5, #12]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_64": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld qword [rax]",
        "fstp qword [rdx]",
        "fld qword [rax + 8]",
        "fstp qword [rdx + 8]",
        "fld qword [rax + 16]",
        "fstp qword [rdx + 16]",
        "fld qword [rax + 32]",
        "fstp qword [rdx + 32]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str d2, [x5]",
        "ldr d2, [x4, #8]",
        "str d2, [x5, #8]",
        "ldr d2, [x4, #16]",
        "str d2, [x5, #16]",
        "ldr d2, [x4, #32]",
        "str d2, [x5, #32]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Multiple fld/fst": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 22,
      "x86Insts": [
        "fld     qword [ebp+16380]",
        "fstp    qword [eax-0x4]",
        "fld     qword [ebp-0x8]",
        "fstp    qword [eax+16370]"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffc",
        "add x20, x9, x20",
        "mov w20, w20",
        "ldr d2, [x20]",
        "sub x20, x4, #0x4 (4)",
        "mov w20, w20",
        "str d2, [x20]",
        "sub x20, x9, #0x8 (8)",
        "mov w20, w20",
        "ldr d2, [x20]",
        "mov w20, #0x3ff2",
        "add x20, x4, x20",
        "mov w20, w20",
        "str d2, [x20]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_80": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 38,
      "x86Insts": [
        "fld tword [rax]",
        "fstp tword [rdx]",
        "fld tword [rax + 10]",
        "fstp tword [rdx + 10]",
        "fld tword [rax + 20]",
        "fstp tword [rdx + 20]",
        "fld tword [rax + 30]",
        "fstp tword [rdx + 30]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str d2, [x5]",
        "mov x20, v2.d[1]",
        "strh w20, [x5, #8]",
        "add x20, x4, #0xa (10)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #10]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0xa (10)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x14 (20)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #20]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x14 (20)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x1e (30)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #30]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x1e (30)",
        "strh w20, [x21, #8]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/x87_32Bit.json
================================================
{
  "Features": {
    "Bitness": 32,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "CSSC",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "Multiple fld/fst": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 13,
      "x86Insts": [
        "fld     dword [ebp+16380]",
        "fstp    dword [eax-0x4]",
        "fld     dword [ebp-0x8]",
        "fstp    dword [eax+16370]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x9, #16380]",
        "stur s2, [x4, #-4]",
        "ldur s2, [x9, #-8]",
        "mov w20, #0x3ff2",
        "str s2, [x4, x20, sxtx]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/x87_f64.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 64,
    "EnabledHostFeatures": [],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "AFP",
      "FLAGM",
      "FLAGM2"
    ]
  },
  "Instructions": {
    "fadd dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom dword [rax]": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd8 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcomp dword [rax]": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xd8 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fadd d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmul d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom st0, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd8 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "mrs x20, nzcv",
        "fcmp d2, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcom st0, st1": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st3": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st4": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st5": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st6": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcom st0, st7": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xd8 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fcomp st0, st0": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd8 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "fcmp d2, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xd8 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fcmp d3, d2",
        "cset x23, vs",
        "cset x24, lo",
        "cset x30, eq",
        "orr w24, w24, w23",
        "orr w30, w30, w23",
        "strb w24, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st2": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st3": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st4": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st5": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st6": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomp st0, st7": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd8 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdiv st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st0, st0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd8 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st0, st1": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st2": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st3": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st4": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st5": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st6": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st0, st7": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xd8 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fld dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "fcvt d2, s2",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst dword [rax]": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcvt s2, d2",
        "str s2, [x4]"
      ]
    },
    "fstp dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvt s2, d2",
        "str s2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldenv [rax]": {
      "ExpectedInstructionCount": 33,
      "Comment": [
        "0xd9 !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "ubfx w21, w20, #10, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "strh w20, [x28, #1200]",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w21, w20, #8, #1",
        "ubfx w22, w20, #9, #1",
        "ubfx w23, w20, #10, #1",
        "ubfx w24, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w21, [x28, #1048]",
        "strb w22, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w21, #0x55555555",
        "bic w20, w21, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldcw [rax]": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "ubfx w21, w20, #10, #3",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "strh w20, [x28, #1200]"
      ]
    },
    "fnstenv [rax]": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xd9 !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "str w20, [x4]",
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "str w20, [x4, #4]",
        "ldrb w20, [x28, #1202]",
        "orr w20, w20, w20, lsl #4",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsl #2",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsl #1",
        "and w20, w20, #0x55555555",
        "orr w20, w20, w20, lsl #1",
        "eor w20, w20, #0xffff",
        "str w20, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]"
      ]
    },
    "fnstcw [rax]": {
      "ExpectedInstructionCount": 2,
      "Comment": [
        "0xd9 !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x28, #1200]",
        "strh w20, [x4]"
      ]
    },
    "fld st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st2": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st3": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st4": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st5": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st6": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xd9 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xd9 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fxch st0, st0": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xd9 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1049]"
      ]
    },
    "fxch st0, st1": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st2": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st3": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st4": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st5": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st6": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fxch st0, st7": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xd9 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "strb wzr, [x28, #1049]",
        "str d3, [x21, #1056]",
        "str d2, [x20, #1056]"
      ]
    },
    "fnop": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xd9 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fchs": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fneg v2.2d, v2.2d",
        "str d2, [x20, #1056]"
      ]
    },
    "fabs": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fabs d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ftst": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov w21, #0x0",
        "fmov d3, x21",
        "fcmp d2, d3",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fxam": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov x21, v2.d[0]",
        "lsr x21, x21, #63",
        "strb w21, [x28, #1049]",
        "ldrb w21, [x28, #1202]",
        "lsr w20, w21, w20",
        "and w20, w20, #0x1",
        "mrs x21, nzcv",
        "cmp w20, #0x1 (1)",
        "cset x22, ne",
        "strb w22, [x28, #1048]",
        "strb w20, [x28, #1050]",
        "strb w22, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fld1": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x3ff0000000000000",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2t": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0xa372",
        "movk x20, #0x979, lsl #16",
        "movk x20, #0x934f, lsl #32",
        "movk x20, #0x400a, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldl2e": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x82fe",
        "movk x20, #0x652b, lsl #16",
        "movk x20, #0x1547, lsl #32",
        "movk x20, #0x3ff7, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldpi": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x2d18",
        "movk x20, #0x5444, lsl #16",
        "movk x20, #0x21fb, lsl #32",
        "movk x20, #0x4009, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldlg2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x79ff",
        "movk x20, #0x509f, lsl #16",
        "movk x20, #0x4413, lsl #32",
        "movk x20, #0x3fd3, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldln2": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x39ef",
        "movk x20, #0xfefa, lsl #16",
        "movk x20, #0x2e42, lsl #32",
        "movk x20, #0x3fe6, lsl #48",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fldz": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd9 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "f2xm1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #2240]",
        "ldr x3, [x28, #2248]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x20, #1056]"
      ]
    },
    "fyl2x": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2256]",
        "ldr x3, [x28, #2264]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]"
      ]
    },
    "fptan": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xd9 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2992]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "mov x22, #0x3ff0000000000000",
        "fmov d3, x22",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d3, [x22, #1056]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fpatan": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2224]",
        "ldr x3, [x28, #2232]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb w20, [x28, #1051]",
        "str d2, [x21, #1056]"
      ]
    },
    "fxtract": {
      "ExpectedInstructionCount": 30,
      "Comment": [
        "0xd9 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mov x22, v2.d[0]",
        "mov x23, #0xfff0000000000000",
        "fmov d3, x23",
        "ubfx x23, x22, #52, #11",
        "sub x23, x23, #0x3ff (1023)",
        "scvtf d4, x23",
        "and x23, x22, #0x800fffffffffffff",
        "orr x23, x23, #0x3ff0000000000000",
        "fmov d5, x23",
        "mrs x23, nzcv",
        "tst x22, #0x7fffffffffffffff",
        "fcsel d2, d2, d5, eq",
        "fcsel d3, d3, d4, eq",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "msr nzcv, x23",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d2, [x22, #1056]",
        "str d3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0x303",
        "lsr w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fprem1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2288]",
        "ldr x3, [x28, #2296]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fdecstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fincstp": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xd9 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fprem": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xd9 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2272]",
        "ldr x3, [x28, #2280]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fyl2xp1": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xd9 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "mov x20, #0x3ff0000000000000",
        "fmov d2, x20",
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "fmov d1, d3",
        "ldr x0, [x28, #2256]",
        "ldr x3, [x28, #2264]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "strb w20, [x28, #1051]",
        "str d3, [x22, #1056]",
        "str d2, [x21, #1056]"
      ]
    },
    "fsqrt": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsqrt d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsincos": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xd9 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2976]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "fmov d0, d2",
        "ldr x0, [x28, #2984]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb wzr, [x28, #1050]",
        "strb w20, [x28, #1051]",
        "add x22, x28, x20, lsl #4",
        "str d2, [x22, #1056]",
        "str d3, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frndint": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "0xd9 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fscale": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xd9 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d3",
        "fmov d1, d2",
        "ldr x0, [x28, #2304]",
        "ldr x3, [x28, #2312]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "str d2, [x20, #1056]"
      ]
    },
    "fsin": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2976]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fcos": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xd9 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmov d0, d2",
        "ldr x0, [x28, #2984]",
        "str x30, [sp, #-16]!",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "strb wzr, [x28, #1050]",
        "str d2, [x20, #1056]"
      ]
    },
    "fiadd dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fimul dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ficom dword [rax]": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xda !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "ficomp dword [rax]": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xda !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fisubr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fidiv dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fidivr dword [rax]": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, lo",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmove st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, eq",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xda 11b 0xd0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xda 11b 0xd7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ls",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xda 11b 0xd8 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xd9 /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xda /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdb /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdc /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdd /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xde /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xda 11b 0xdf /1"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eon w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fucompp": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xda 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w20",
        "orr w24, w24, w20",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "msr nzcv, x22",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr w20, [x4]",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp dword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdb !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs w21, d2",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist dword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdb !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d0, d2",
        "fcvtzs w20, d0",
        "str w20, [x4]"
      ]
    },
    "fistp dword [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "frinti d0, d2",
        "fcvtzs w21, d0",
        "str w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fld tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp tword [rax]": {
      "ExpectedInstructionCount": 21,
      "Comment": [
        "0xdb !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcmovnb st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnb st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hs",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovne st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, ne",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnbe st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "csetm x20, hi",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st0": {
      "ExpectedInstructionCount": 10,
      "Comment": [
        "0xdb 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "bsl v2.16b, v3.16b, v3.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st1": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st2": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st3": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st4": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st5": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st6": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fcmovnu st0, st7": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdb 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "eor w0, w26, w26, lsr #4",
        "eor w0, w0, w0, lsr #2",
        "eor w20, w0, w0, lsr #1",
        "sbfx x20, x20, #0, #1",
        "dup v2.2d, x20",
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d4, [x20, #1056]",
        "bsl v2.16b, v3.16b, v4.16b",
        "str d2, [x20, #1056]"
      ]
    },
    "fnclex": {
      "ExpectedInstructionCount": 1,
      "Comment": [
        "0xdb 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "strb wzr, [x28, #1040]"
      ]
    },
    "fninit": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdb 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x0",
        "rbit w1, w20",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x20, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fucomi st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fucomi st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st0": {
      "ExpectedInstructionCount": 7,
      "Comment": [
        "0xdb 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st1": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st2": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st3": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st4": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st5": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st6": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fcomi st0, st7": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdb 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le"
      ]
    },
    "fadd qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fcom qword [rax]": {
      "ExpectedInstructionCount": 16,
      "Comment": [
        "0xdc !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fcomp qword [rax]": {
      "ExpectedInstructionCount": 24,
      "Comment": [
        "0xdc !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsub qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr qword [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdc !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fadd st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fadd d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fadd st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xc8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fmul st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fmul d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fmul st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xe0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fsubr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsubr st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fsubr st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xe8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fsub st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fsub d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fsub st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "db 0xdc, 0xf0": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fdivr st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdivr st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "fdivr st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x21, #1056]"
      ]
    },
    "db 0xdc, 0xf8": {
      "ExpectedInstructionCount": 5,
      "Comment": [
        "fdiv st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xdc 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "fdiv d2, d2, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st1, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st2, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st3, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st4, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st5, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st6, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fdiv st7, st0": {
      "ExpectedInstructionCount": 9,
      "Comment": [
        "0xdc 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fld qword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp qword [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs x21, d2",
        "str x21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst qword [rax]": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdd !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "str d2, [x4]"
      ]
    },
    "fstp qword [rax]": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str d2, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "frstor [rax]": {
      "ExpectedInstructionCount": 141,
      "Comment": [
        "0xdd !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "strh w20, [x28, #1200]",
        "lsr w20, w20, #10",
        "and w20, w20, #0x3",
        "rbit w1, w20",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x20, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "ldr w20, [x4, #4]",
        "ubfx w21, w20, #11, #3",
        "strb w21, [x28, #1051]",
        "ubfx w22, w20, #8, #1",
        "ubfx w23, w20, #9, #1",
        "ubfx w24, w20, #10, #1",
        "ubfx w30, w20, #14, #1",
        "ubfx w20, w20, #0, #1",
        "strb w22, [x28, #1048]",
        "strb w23, [x28, #1049]",
        "strb w24, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "strb w20, [x28, #1040]",
        "ldr w20, [x4, #8]",
        "and w20, w20, w20, lsr #1",
        "mov w22, #0x55555555",
        "bic w20, w22, w20",
        "orr w20, w20, w20, lsr #1",
        "and w20, w20, #0x33333333",
        "orr w20, w20, w20, lsr #2",
        "and w20, w20, #0xf0f0f0f",
        "orr w20, w20, w20, lsr #4",
        "strb w20, [x28, #1202]",
        "mov x20, #0xffffffffffffffff",
        "mov w22, #0xffff",
        "fmov d2, x20",
        "fmov v2.D[1], x22",
        "ldur q3, [x4, #28]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x21, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #38]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldr q3, [x4, #48]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #58]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #68]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #78]",
        "and v3.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v3.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d3, d0",
        "add x0, x28, x20, lsl #4",
        "str d3, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur q3, [x4, #88]",
        "and v2.16b, v3.16b, v2.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add x0, x28, x20, lsl #4",
        "str d2, [x0, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "ldur d2, [x4, #98]",
        "ldr h3, [x4, #106]",
        "mov v2.h[4], v3.h[0]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "add x0, x28, x20, lsl #4",
        "str d2, [x0, #1056]"
      ]
    },
    "fnsave [rax]": {
      "ExpectedInstructionCount": 143,
      "Comment": [
        "0xdd !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrh w21, [x28, #1200]",
        "str w21, [x4]",
        "ldrb w21, [x28, #1051]",
        "lsl x21, x21, #11",
        "ldrb w22, [x28, #1048]",
        "orr x21, x21, x22, lsl #8",
        "ldrb w22, [x28, #1049]",
        "orr x21, x21, x22, lsl #9",
        "ldrb w22, [x28, #1050]",
        "orr x21, x21, x22, lsl #10",
        "ldrb w22, [x28, #1054]",
        "orr x21, x21, x22, lsl #14",
        "ldrb w22, [x28, #1040]",
        "orr x21, x21, x22",
        "str w21, [x4, #4]",
        "mov w21, #0x0",
        "ldrb w22, [x28, #1202]",
        "orr w22, w22, w22, lsl #4",
        "and w22, w22, #0xf0f0f0f",
        "orr w22, w22, w22, lsl #2",
        "and w22, w22, #0x33333333",
        "orr w22, w22, w22, lsl #1",
        "and w22, w22, #0x55555555",
        "orr w22, w22, w22, lsl #1",
        "eor w22, w22, #0xffff",
        "str w22, [x4, #8]",
        "str wzr, [x4, #12]",
        "str wzr, [x4, #16]",
        "str wzr, [x4, #20]",
        "str wzr, [x4, #24]",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #28]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #38]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str q2, [x4, #48]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #58]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #68]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #78]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur q2, [x4, #88]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x0, x28, x20, lsl #4",
        "ldr d2, [x0, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "stur d2, [x4, #98]",
        "dup v2.8h, v2.h[4]",
        "str h2, [x4, #106]",
        "rbit w1, w21",
        "lsr w1, w1, #30",
        "mrs x0, fpcr",
        "bfi x0, x1, #22, #2",
        "lsr x1, x21, #2",
        "bfi x0, x1, #24, #1",
        "msr fpcr, x0",
        "mov w20, #0x37f",
        "strh w20, [x28, #1200]",
        "strb wzr, [x28, #1051]",
        "strb wzr, [x28, #1202]",
        "strb wzr, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb wzr, [x28, #1050]",
        "strb wzr, [x28, #1054]",
        "strb wzr, [x28, #1040]"
      ]
    },
    "fnstsw [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdd !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "strh w20, [x4]"
      ]
    },
    "ffree st0": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdd 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st1": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st2": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st3": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st4": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st5": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st6": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffree st7": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xdd 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st0": {
      "ExpectedInstructionCount": 0,
      "Comment": [
        "0xdd 11b 0xd0 /2"
      ],
      "ExpectedArm64ASM": []
    },
    "fst st1": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd1 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st2": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd2 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x2 (2)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st3": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd3 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x3 (3)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st4": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd4 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x4 (4)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st5": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd5 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x5 (5)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st6": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd6 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x6 (6)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fst st7": {
      "ExpectedInstructionCount": 12,
      "Comment": [
        "0xdd 11b 0xd7 /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st0": {
      "ExpectedInstructionCount": 11,
      "Comment": [
        "0xdd 11b 0xd8 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdd 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xda /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdb /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdc /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdd /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xde /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fstp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdd 11b 0xdf /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "add w21, w21, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "str d2, [x22, #1056]",
        "ldrb w22, [x28, #1202]",
        "mov w23, #0x1",
        "lsl w21, w23, w21",
        "orr w21, w22, w21",
        "lsl w20, w23, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucom st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdd 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "mrs x20, nzcv",
        "fcmp d2, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "fucom st1": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st3": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st4": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st5": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st6": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucom st7": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdd 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w20",
        "orr w23, w23, w20",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x21"
      ]
    },
    "fucomp st0": {
      "ExpectedInstructionCount": 23,
      "Comment": [
        "0xdd 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "fcmp d2, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st1": {
      "ExpectedInstructionCount": 25,
      "Comment": [
        "0xdd 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fcmp d3, d2",
        "cset x23, vs",
        "cset x24, lo",
        "cset x30, eq",
        "orr w24, w24, w23",
        "orr w30, w30, w23",
        "strb w24, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w23, [x28, #1050]",
        "strb w30, [x28, #1054]",
        "msr nzcv, x22",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st2": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st3": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st4": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st5": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st6": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomp st7": {
      "ExpectedInstructionCount": 27,
      "Comment": [
        "0xdd 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "mrs x21, nzcv",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x21",
        "strb w22, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fiadd word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fadd d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fimul word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fmul d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "ficom word [rax]": {
      "ExpectedInstructionCount": 18,
      "Comment": [
        "0xde !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x21, vs",
        "cset x22, lo",
        "cset x23, eq",
        "orr w22, w22, w21",
        "orr w23, w23, w21",
        "strb w22, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w21, [x28, #1050]",
        "strb w23, [x28, #1054]",
        "msr nzcv, x20"
      ]
    },
    "ficomp word [rax]": {
      "ExpectedInstructionCount": 26,
      "Comment": [
        "0xde !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "mrs x20, nzcv",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x22, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w22",
        "orr w24, w24, w22",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w22, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w22, w21, #0x1 (1)",
        "and w22, w22, #0x7",
        "msr nzcv, x20",
        "strb w22, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisub word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fisubr word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /5"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fsub d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "fidiv word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d3, d2",
        "str d2, [x20, #1056]"
      ]
    },
    "fidivr word [rax]": {
      "ExpectedInstructionCount": 8,
      "Comment": [
        "0xde !11b /7"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, w20",
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fdiv d2, d2, d3",
        "str d2, [x20, #1056]"
      ]
    },
    "faddp st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xd8 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fadd d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fadd d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "faddp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fadd d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xde 11b 0xc8 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fmul d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st1": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xc9 /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fmul d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st2": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xca /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st3": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcb /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st4": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcc /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st5": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcd /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st6": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xce /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fmulp st7": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xcf /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fmul d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcompp": {
      "ExpectedInstructionCount": 29,
      "Comment": [
        "0xde 11b 0xd9 /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "mrs x22, nzcv",
        "add x20, x28, x20, lsl #4",
        "ldr d3, [x20, #1056]",
        "fcmp d3, d2",
        "cset x20, vs",
        "cset x23, lo",
        "cset x24, eq",
        "orr w23, w23, w20",
        "orr w24, w24, w20",
        "strb w23, [x28, #1048]",
        "strb wzr, [x28, #1049]",
        "strb w20, [x28, #1050]",
        "strb w24, [x28, #1054]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "msr nzcv, x22",
        "strb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x8",
        "sub w20, w22, w20",
        "mov w22, #0xc0c0",
        "lsr w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fsubrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fsub d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xe1 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fsub d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe2 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe3 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe4 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe5 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe6 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubrp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xe7 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xe8": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fsubp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fsub d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fsub d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fsubp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fsub d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf0": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fdivrp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fdiv d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x23, x28, x20, lsl #4",
        "ldr d3, [x23, #1056]",
        "fdiv d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivrp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "db 0xde, 0xf8": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "fdivp st0, st0",
        "Needs manual encoding since otherwise nasm will emit the 0xd8 variant.",
        "0xde 11b 0xf8 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fdiv d2, d2, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st1, st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xde 11b 0xf9 /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d3, [x22, #1056]",
        "fdiv d2, d3, d2",
        "strb w21, [x28, #1051]",
        "str d2, [x22, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st2, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfa /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st3, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfb /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st4, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfc /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st5, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfd /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st6, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xfe /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fdivp st7, st0": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xde 11b 0xff /7"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d3, [x21, #1056]",
        "fdiv d2, d3, d2",
        "add w22, w20, #0x1 (1)",
        "and w22, w22, #0x7",
        "strb w22, [x28, #1051]",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fild word [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /0"
      ],
      "ExpectedArm64ASM": [
        "ldrh w20, [x4]",
        "sxth x20, w20",
        "scvtf d2, x20",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fisttp word [rax]": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf !11b /1"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcvtzs x21, d2",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fist word [rax]": {
      "ExpectedInstructionCount": 6,
      "Comment": [
        "0xdf !11b /2"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x20, x28, x20, lsl #4",
        "ldr d2, [x20, #1056]",
        "frinti d0, d2",
        "fcvtzs x20, d0",
        "strh w20, [x4]"
      ]
    },
    "fistp word [rax]": {
      "ExpectedInstructionCount": 14,
      "Comment": [
        "0xdf !11b /3"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "frinti d0, d2",
        "fcvtzs x21, d0",
        "strh w21, [x4]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbld tword [rax]": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xdf !11b /4"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #2000]",
        "ldr x3, [x28, #2008]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1680]",
        "ldr x3, [x28, #1688]",
        "blr x0",
        "ldr x30, [sp], #16",
        "fmov d2, d0",
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "str d2, [x21, #1056]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "orr w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fbstp tword [rax]": {
      "ExpectedInstructionCount": 28,
      "Comment": [
        "0xdf !11b /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "str x30, [sp, #-16]!",
        "fmov d0, d2",
        "ldr x0, [x28, #1648]",
        "ldr x3, [x28, #1656]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str x30, [sp, #-16]!",
        "mov v0.16b, v2.16b",
        "ldr x0, [x28, #1984]",
        "ldr x3, [x28, #1992]",
        "blr x0",
        "ldr x30, [sp], #16",
        "mov v2.16b, v0.16b",
        "str d2, [x4]",
        "mov x21, v2.d[1]",
        "strh w21, [x4, #8]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "ffreep st0": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc0 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st1": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc1 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st2": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc2 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st3": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc3 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st4": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc4 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st5": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc5 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st6": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc6 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "ffreep st7": {
      "ExpectedInstructionCount": 4,
      "Comment": [
        "0xdf 11b 0xc7 /0"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w20, w20, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]"
      ]
    },
    "fnstsw ax": {
      "ExpectedInstructionCount": 13,
      "Comment": [
        "0xdf 11b 0xe0 /4"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "lsl x20, x20, #11",
        "ldrb w21, [x28, #1048]",
        "orr x20, x20, x21, lsl #8",
        "ldrb w21, [x28, #1049]",
        "orr x20, x20, x21, lsl #9",
        "ldrb w21, [x28, #1050]",
        "orr x20, x20, x21, lsl #10",
        "ldrb w21, [x28, #1054]",
        "orr x20, x20, x21, lsl #14",
        "ldrb w21, [x28, #1040]",
        "orr x20, x20, x21",
        "bfxil x4, x20, #0, #16"
      ]
    },
    "fucomip st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdf 11b 0xe8 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdf 11b 0xe9 /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xea /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st3": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xeb /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st4": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xec /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st5": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xed /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st6": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xee /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fucomip st7": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xef /5"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st0": {
      "ExpectedInstructionCount": 15,
      "Comment": [
        "0xdf 11b 0xf0 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add x21, x28, x20, lsl #4",
        "ldr d2, [x21, #1056]",
        "fcmp d2, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st1": {
      "ExpectedInstructionCount": 17,
      "Comment": [
        "0xdf 11b 0xf1 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "add x22, x28, x20, lsl #4",
        "ldr d3, [x22, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st2": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf2 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x2 (2)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st3": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf3 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x3 (3)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st4": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf4 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x4 (4)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st5": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf5 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x5 (5)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st6": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf6 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x6 (6)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "fcomip st7": {
      "ExpectedInstructionCount": 19,
      "Comment": [
        "0xdf 11b 0xf7 /6"
      ],
      "ExpectedArm64ASM": [
        "ldrb w20, [x28, #1051]",
        "add w21, w20, #0x7 (7)",
        "and w21, w21, #0x7",
        "add x21, x28, x21, lsl #4",
        "ldr d2, [x21, #1056]",
        "add x21, x28, x20, lsl #4",
        "ldr d3, [x21, #1056]",
        "fcmp d3, d2",
        "cset x26, vc",
        "csetm x0, eq",
        "ccmn x26, x0, #nzCv, le",
        "add w21, w20, #0x1 (1)",
        "and w21, w21, #0x7",
        "strb w21, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_32": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld dword [rax]",
        "fstp dword [rdx]",
        "fld dword [rax + 4]",
        "fstp dword [rdx + 4]",
        "fld dword [rax + 8]",
        "fstp dword [rdx + 8]",
        "fld dword [rax + 12]",
        "fstp dword [rdx + 12]"
      ],
      "ExpectedArm64ASM": [
        "ldr s2, [x4]",
        "str s2, [x5]",
        "ldr s2, [x4, #4]",
        "str s2, [x5, #4]",
        "ldr s2, [x4, #8]",
        "str s2, [x5, #8]",
        "ldr s2, [x4, #12]",
        "str s2, [x5, #12]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_64": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 16,
      "x86Insts": [
        "fld qword [rax]",
        "fstp qword [rdx]",
        "fld qword [rax + 8]",
        "fstp qword [rdx + 8]",
        "fld qword [rax + 16]",
        "fstp qword [rdx + 16]",
        "fld qword [rax + 32]",
        "fstp qword [rdx + 32]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "str d2, [x5]",
        "ldr d2, [x4, #8]",
        "str d2, [x5, #8]",
        "ldr d2, [x4, #16]",
        "str d2, [x5, #16]",
        "ldr d2, [x4, #32]",
        "str d2, [x5, #32]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "Multiple fld/fst": {
      "x86InstructionCount": 4,
      "ExpectedInstructionCount": 22,
      "x86Insts": [
        "fld     qword [ebp+16380]",
        "fstp    qword [eax-0x4]",
        "fld     qword [ebp-0x8]",
        "fstp    qword [eax+16370]"
      ],
      "ExpectedArm64ASM": [
        "mov w20, #0x3ffc",
        "add x20, x9, x20",
        "mov w20, w20",
        "ldr d2, [x20]",
        "sub x20, x4, #0x4 (4)",
        "mov w20, w20",
        "str d2, [x20]",
        "sub x20, x9, #0x8 (8)",
        "mov w20, w20",
        "ldr d2, [x20]",
        "mov w20, #0x3ff2",
        "add x20, x4, x20",
        "mov w20, w20",
        "str d2, [x20]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    },
    "memcpy4_80": {
      "x86InstructionCount": 8,
      "ExpectedInstructionCount": 38,
      "x86Insts": [
        "fld tword [rax]",
        "fstp tword [rdx]",
        "fld tword [rax + 10]",
        "fstp tword [rdx + 10]",
        "fld tword [rax + 20]",
        "fstp tword [rdx + 20]",
        "fld tword [rax + 30]",
        "fstp tword [rdx + 30]"
      ],
      "ExpectedArm64ASM": [
        "ldr d2, [x4]",
        "add x20, x4, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "str d2, [x5]",
        "mov x20, v2.d[1]",
        "strh w20, [x5, #8]",
        "add x20, x4, #0xa (10)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #10]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0xa (10)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x14 (20)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #20]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x14 (20)",
        "strh w20, [x21, #8]",
        "add x20, x4, #0x1e (30)",
        "ldr d2, [x20]",
        "add x20, x20, #0x8 (8)",
        "ld1 {v2.h}[4], [x20]",
        "stur d2, [x5, #30]",
        "mov x20, v2.d[1]",
        "add x21, x5, #0x1e (30)",
        "strh w20, [x21, #8]",
        "ldrb w20, [x28, #1051]",
        "ldrb w21, [x28, #1202]",
        "mov w22, #0x1",
        "add w20, w20, #0x7 (7)",
        "and w20, w20, #0x7",
        "lsl w20, w22, w20",
        "bic w20, w21, w20",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/InstructionCountCI/x87_f64_32Bit.json
================================================
{
  "Features": {
    "Env": {
      "FEX_X87REDUCEDPRECISION": "1"
    },
    "Bitness": 32,
    "EnabledHostFeatures": [
      "AFP",
      "RPRES",
      "FLAGM",
      "FLAGM2"
    ],
    "DisabledHostFeatures": [
      "SVE128",
      "SVE256",
      "CSSC"
    ]
  },
  "Instructions": {
    "fstp dword [ebx*4+0x204a20]": {
      "ExpectedInstructionCount": 16,
      "ExpectedArm64ASM": [
        "mov w20, #0x4a20",
        "movk w20, #0x20, lsl #16",
        "add w20, w20, w6, lsl #2",
        "ldrb w21, [x28, #1051]",
        "add x22, x28, x21, lsl #4",
        "ldr d2, [x22, #1056]",
        "fcvt s2, d2",
        "str s2, [x20]",
        "add w20, w21, #0x1 (1)",
        "and w20, w20, #0x7",
        "strb w20, [x28, #1051]",
        "ldrb w20, [x28, #1202]",
        "mov w22, #0x1",
        "lsl w21, w22, w21",
        "bic w20, w20, w21",
        "strb w20, [x28, #1202]"
      ]
    }
  }
}


================================================
FILE: unittests/POSIX/CMakeLists.txt
================================================

# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE POSIX_TESTS CONFIGURE_DEPENDS ${CMAKE_SOURCE_DIR}/External/fex-posixtest-bins/conformance/*.test)

foreach(POSIX_TEST ${POSIX_TESTS})

  string(REPLACE "/fex-posixtest-bins/" ";" TEST_NAME_LIST ${POSIX_TEST})
  list(GET TEST_NAME_LIST 1 TEST_NAME)
  string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})

  add_test(NAME "${TEST_NAME}.jit.posix"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
    "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
    "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
    "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
    "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
    "${TEST_NAME}"
    "guest"
    "$<TARGET_FILE:FEX>"
    "${POSIX_TEST}")
  set_property(TEST "${TEST_NAME}.jit.posix" APPEND PROPERTY SKIP_RETURN_CODE 125)
  set_property(TEST "${TEST_NAME}.jit.posix" APPEND PROPERTY ENVIRONMENT "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=500")
endforeach()

add_custom_target(posix_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.posix")


================================================
FILE: unittests/POSIX/Disabled_Tests
================================================
# these are inconsistent or fail on CI
nanosleep-2-1.test
conformance-interfaces-clock_nanosleep-2-1.test
conformance-interfaces-difftime-1-1.test

# These tests are inconsistent
conformance-interfaces-mmap-12-1.test
conformance-interfaces-mmap-6-1.test
conformance-interfaces-mmap-6-2.test
conformance-interfaces-mmap-6-3.test

# These tests take too long to run, and might timeout
conformance-interfaces-clock-1-1.test
conformance-interfaces-clock_gettime-4-1.test
conformance-interfaces-clock_getcpuclockid-1-1.test

# These tests fail when run natively on x86-64 host
conformance-interfaces-clock_getcpuclockid-2-1.test
conformance-interfaces-mlock-speculative-12-1.test
conformance-interfaces-mmap-11-4.test
conformance-interfaces-mmap-23-1.test
conformance-interfaces-pthread_attr_getschedpolicy-2-1.test
conformance-interfaces-pthread_attr_getscope-1-1.test
conformance-interfaces-pthread_attr_setschedpolicy-4-1.test
conformance-interfaces-pthread_attr_setscope-4-1.test
conformance-interfaces-sched_getparam-6-1.test
conformance-interfaces-sched_getscheduler-7-1.test
conformance-interfaces-sigset-8-1.test
conformance-interfaces-strftime-1-1.test
src-conformance-interfaces-pthread_attr_getschedpolicy-2-1.test
conformance-interfaces-munmap-2-1.test
conformance-interfaces-mlockall-3-7.test
conformance-interfaces-mlockall-speculative-15-1.test
conformance-interfaces-pthread_attr_getschedparam-1-1.test
conformance-interfaces-pthread_attr_setschedpolicy-speculative-5-1.test
conformance-interfaces-sched_setparam-10-1.test
conformance-interfaces-sched_setparam-23-6.test
conformance-interfaces-sched_setparam-9-1.test
conformance-interfaces-sched_setscheduler-1-1.test
conformance-interfaces-sched_setscheduler-16-1.test
conformance-interfaces-sched_setscheduler-17-5.test
conformance-interfaces-sched_setscheduler-4-1.test
conformance-interfaces-sigaddset-1-core-buildonly.test
conformance-interfaces-sigaddset-4-core-buildonly.test
conformance-interfaces-sigdelset-1-core-buildonly.test
conformance-interfaces-sigdelset-4-core-buildonly.test
conformance-interfaces-sighold-3-core-buildonly.test
conformance-interfaces-sigignore-5-core-buildonly.test
conformance-interfaces-sigismember-5-core-buildonly.test
conformance-interfaces-sigqueue-9-1.test
conformance-interfaces-sigrelse-3-core-buildonly.test
conformance-interfaces-sigset-6-1.test
conformance-interfaces-sigset-7-1.test
conformance-interfaces-aio_cancel-10-1.test
conformance-interfaces-aio_cancel-1-1.test
conformance-interfaces-aio_cancel-2-1.test
conformance-interfaces-aio_cancel-2-2.test
conformance-interfaces-aio_cancel-4-1.test
conformance-interfaces-aio_cancel-5-1.test
conformance-interfaces-aio_cancel-6-1.test
conformance-interfaces-aio_cancel-7-1.test
conformance-interfaces-aio_cancel-8-1.test
conformance-interfaces-aio_cancel-9-1.test
conformance-interfaces-aio_error-1-1.test
conformance-interfaces-aio_error-2-1.test
conformance-interfaces-aio_error-3-1.test
conformance-interfaces-aio_fsync-12-1.test
conformance-interfaces-aio_fsync-14-1.test
conformance-interfaces-aio_fsync-2-1.test
conformance-interfaces-aio_fsync-3-1.test
conformance-interfaces-aio_fsync-4-1.test
conformance-interfaces-aio_fsync-4-2.test
conformance-interfaces-aio_fsync-5-1.test
conformance-interfaces-aio_fsync-8-1.test
conformance-interfaces-aio_fsync-8-2.test
conformance-interfaces-aio_fsync-8-3.test
conformance-interfaces-aio_fsync-8-4.test
conformance-interfaces-aio_fsync-9-1.test
conformance-interfaces-aio_read-10-1.test
conformance-interfaces-aio_read-11-1.test
conformance-interfaces-aio_read-11-2.test
conformance-interfaces-aio_read-2-1.test
conformance-interfaces-aio_read-3-1.test
conformance-interfaces-aio_read-3-2.test
conformance-interfaces-aio_read-4-1.test
conformance-interfaces-aio_read-5-1.test
conformance-interfaces-aio_read-7-1.test
conformance-interfaces-aio_read-8-1.test
conformance-interfaces-aio_return-1-1.test
conformance-interfaces-aio_return-3-1.test
conformance-interfaces-aio_return-3-2.test
conformance-interfaces-aio_suspend-3-1.test
conformance-interfaces-aio_suspend-5-1.test
conformance-interfaces-aio_write-1-1.test
conformance-interfaces-aio_write-1-2.test
conformance-interfaces-aio_write-2-1.test
conformance-interfaces-aio_write-3-1.test
conformance-interfaces-aio_write-5-1.test
conformance-interfaces-aio_write-6-1.test
conformance-interfaces-aio_write-8-1.test
conformance-interfaces-aio_write-8-2.test
conformance-interfaces-aio_write-9-1.test
conformance-interfaces-aio_write-9-2.test
conformance-interfaces-lio_listio-5-1.test
conformance-interfaces-lio_listio-6-1.test
conformance-interfaces-lio_listio-8-1.test
conformance-interfaces-lio_listio-9-1.test
conformance-interfaces-pthread_mutex_init-speculative-5-2.test
conformance-interfaces-sched_get_priority_max-1-3.test
conformance-interfaces-sched_get_priority_min-1-3.test
conformance-interfaces-sched_setparam-23-2.test
conformance-interfaces-sched_setparam-23-3.test
conformance-interfaces-sched_setparam-23-4.test
conformance-interfaces-sched_setparam-23-5.test
conformance-interfaces-sched_setparam-25-2.test
conformance-interfaces-sched_setscheduler-17-2.test
conformance-interfaces-sched_setscheduler-17-3.test
conformance-interfaces-sched_setscheduler-17-4.test
conformance-interfaces-sched_setscheduler-19-2.test
conformance-interfaces-sched_setscheduler-19-3.test
conformance-interfaces-sched_setscheduler-19-4.test
conformance-definitions-mqueue_h-10-1.test
conformance-definitions-mqueue_h-11-1.test
conformance-definitions-mqueue_h-1-1.test
conformance-definitions-mqueue_h-2-1.test
conformance-definitions-mqueue_h-3-1.test
conformance-definitions-mqueue_h-4-1.test
conformance-definitions-mqueue_h-5-1.test
conformance-definitions-mqueue_h-6-1.test
conformance-definitions-mqueue_h-7-1.test
conformance-definitions-mqueue_h-8-1.test
conformance-definitions-mqueue_h-9-1.test
conformance-interfaces-clock_settime-1-1.test
conformance-interfaces-clock_settime-19-1.test
conformance-interfaces-clock_settime-7-1.test
conformance-interfaces-clock_settime-7-2.test
conformance-interfaces-clock_settime-8-1.test
conformance-interfaces-mq_close-5-1.test
conformance-interfaces-mq_open-10-1.test
conformance-interfaces-mq_open-14-1.test
conformance-interfaces-mq_open-17-1.test
conformance-interfaces-mq_open-22-1.test
conformance-interfaces-mq_open-24-1.test
conformance-interfaces-mq_open-25-1.test
conformance-interfaces-mq_open-28-1.test
conformance-interfaces-mq_open-30-1.test
conformance-interfaces-mq_open-4-1.test
conformance-interfaces-mq_send-6-1.test
conformance-interfaces-mq_timedsend-17-1.test
conformance-interfaces-mq_timedsend-6-1.test
conformance-interfaces-mq_unlink-2-3.test
conformance-interfaces-pthread_attr_setscope-5-1.test
conformance-interfaces-sched_getscheduler-2-1.test
conformance-interfaces-sched_setparam-12-1.test
conformance-interfaces-sched_setparam-13-1.test
conformance-interfaces-sched_setparam-14-1.test
conformance-interfaces-sched_setparam-15-1.test
conformance-interfaces-sched_setparam-16-1.test
conformance-interfaces-sched_setparam-17-1.test
conformance-interfaces-sched_setparam-18-1.test
conformance-interfaces-sched_setparam-19-1.test
conformance-interfaces-sched_setparam-3-1.test
conformance-interfaces-sched_setparam-6-1.test
conformance-interfaces-sched_setparam-7-1.test
conformance-interfaces-sched_setparam-8-1.test
conformance-interfaces-sched_setscheduler-10-1.test
conformance-interfaces-sched_setscheduler-11-1.test
conformance-interfaces-sched_setscheduler-12-1.test
conformance-interfaces-sched_setscheduler-13-1.test
conformance-interfaces-sched_setscheduler-14-1.test
conformance-interfaces-sched_setscheduler-2-1.test
conformance-interfaces-sched_setscheduler-5-1.test
conformance-interfaces-sched_setscheduler-6-1.test
conformance-interfaces-sched_setscheduler-7-1.test
conformance-interfaces-sched_setscheduler-9-1.test
conformance-interfaces-shm_open-10-1.test
conformance-interfaces-shm_open-12-1.test
conformance-interfaces-shm_open-19-1.test
conformance-interfaces-shm_open-2-1.test
conformance-interfaces-shm_open-24-1.test
conformance-interfaces-shm_open-27-1.test
conformance-interfaces-shm_open-29-1.test
conformance-interfaces-shm_open-3-1.test
conformance-interfaces-shm_open-36-1.test
conformance-interfaces-shm_open-42-1.test
conformance-interfaces-shm_open-6-1.test
conformance-interfaces-shm_open-7-1.test
conformance-interfaces-shm_open-9-1.test
conformance-interfaces-timer_getoverrun-3-1.test
src-conformance-interfaces-shm_open-10-1.test
src-conformance-interfaces-shm_open-12-1.test
src-conformance-interfaces-shm_open-19-1.test
src-conformance-interfaces-shm_open-24-1.test
src-conformance-interfaces-shm_open-29-1.test
src-conformance-interfaces-shm_open-6-1.test

# These use signals and will fail on x86-64
conformance-interfaces-sigaction-8-1.test
conformance-interfaces-sigaction-8-10.test
conformance-interfaces-sigaction-8-11.test
conformance-interfaces-sigaction-8-12.test
conformance-interfaces-sigaction-8-13.test
conformance-interfaces-sigaction-8-14.test
conformance-interfaces-sigaction-8-15.test
conformance-interfaces-sigaction-8-16.test
conformance-interfaces-sigaction-8-17.test
conformance-interfaces-sigaction-8-18.test
conformance-interfaces-sigaction-8-19.test
conformance-interfaces-sigaction-8-2.test
conformance-interfaces-sigaction-8-20.test
conformance-interfaces-sigaction-8-21.test
conformance-interfaces-sigaction-8-22.test
conformance-interfaces-sigaction-8-23.test
conformance-interfaces-sigaction-8-24.test
conformance-interfaces-sigaction-8-25.test
conformance-interfaces-sigaction-8-26.test
conformance-interfaces-sigaction-8-3.test
conformance-interfaces-sigaction-8-4.test
conformance-interfaces-sigaction-8-5.test
conformance-interfaces-sigaction-8-6.test
conformance-interfaces-sigaction-8-7.test
conformance-interfaces-sigaction-8-8.test
conformance-interfaces-sigaction-8-9.test
conformance-interfaces-sigaction-12-27.test
conformance-interfaces-sigaction-12-28.test
conformance-interfaces-sigaction-12-29.test
conformance-interfaces-sigaction-12-30.test
conformance-interfaces-sigaction-12-31.test
conformance-interfaces-sigaction-12-32.test
conformance-interfaces-sigaction-12-33.test
conformance-interfaces-sigaction-12-34.test
conformance-interfaces-sigaction-12-35.test
conformance-interfaces-sigaction-12-36.test
conformance-interfaces-sigaction-12-37.test
conformance-interfaces-sigaction-12-38.test
conformance-interfaces-sigaction-12-39.test
conformance-interfaces-sigaction-12-40.test
conformance-interfaces-sigaction-12-41.test
conformance-interfaces-sigaction-12-42.test
conformance-interfaces-sigaction-12-43.test
conformance-interfaces-sigaction-12-44.test
conformance-interfaces-sigaction-12-45.test
conformance-interfaces-sigaction-12-46.test
conformance-interfaces-sigaction-12-47.test
conformance-interfaces-sigaction-12-48.test
conformance-interfaces-sigaction-12-49.test
conformance-interfaces-sigaction-12-50.test
conformance-interfaces-sigaction-12-51.test
conformance-interfaces-sigaction-12-52.test
conformance-interfaces-sigaction-13-1.test
conformance-interfaces-sigaction-13-10.test
conformance-interfaces-sigaction-13-11.test
conformance-interfaces-sigaction-13-12.test
conformance-interfaces-sigaction-13-13.test
conformance-interfaces-sigaction-13-14.test
conformance-interfaces-sigaction-13-15.test
conformance-interfaces-sigaction-13-16.test
conformance-interfaces-sigaction-13-17.test
conformance-interfaces-sigaction-13-18.test
conformance-interfaces-sigaction-13-19.test
conformance-interfaces-sigaction-13-2.test
conformance-interfaces-sigaction-13-20.test
conformance-interfaces-sigaction-13-21.test
conformance-interfaces-sigaction-13-22.test
conformance-interfaces-sigaction-13-23.test
conformance-interfaces-sigaction-13-24.test
conformance-interfaces-sigaction-13-25.test
conformance-interfaces-sigaction-13-26.test
conformance-interfaces-sigaction-13-3.test
conformance-interfaces-sigaction-13-4.test
conformance-interfaces-sigaction-13-5.test
conformance-interfaces-sigaction-13-6.test
conformance-interfaces-sigaction-13-7.test
conformance-interfaces-sigaction-13-8.test
conformance-interfaces-sigaction-13-9.test
conformance-interfaces-sigaltstack-1-1.test
conformance-interfaces-sigaltstack-6-1.test
conformance-interfaces-sigaltstack-7-1.test
conformance-interfaces-sigaltstack-9-1.test
conformance-interfaces-signal-3-1.test
conformance-interfaces-sigset-3-1.test
conformance-interfaces-sigset-4-1.test
conformance-interfaces-sigset-5-1.test
conformance-interfaces-raise-10000-1.test
conformance-interfaces-raise-2-1.test

# Signals change this behaviour
conformance-interfaces-sigpending-1-1.test
conformance-interfaces-sigpending-2-1.test

# Both of these pass signals in their handler
# Since we exit the signal handler our sa_mask changes
# and we don't currently resolve this
conformance-interfaces-sigpending-1-2.test
conformance-interfaces-sigpending-1-3.test

# Causes long timeout with signal change
conformance-interfaces-mmap-11-2.test
conformance-interfaces-munmap-1-1.test
conformance-interfaces-munmap-1-2.test
conformance-interfaces-kill-1-2.test # uses rt_sigtimedwait

# Unstable tests
conformance-interfaces-clock_nanosleep-10-1.test # uses sigabort
conformance-interfaces-clock_nanosleep-9-1.test # uses sigabort
conformance-interfaces-nanosleep-5-2.test # uses sigabort
conformance-interfaces-nanosleep-7-1.test # uses sigabort
conformance-interfaces-nanosleep-7-2.test # uses sigabort
conformance-interfaces-sigprocmask-6-1.test

# unstable because of threaded unit test running
conformance-interfaces-mlockall-8-1.test

# Race happy
conformance-interfaces-nanosleep-1-3.test
conformance-interfaces-nanosleep-2-1.test
conformance-interfaces-clock_nanosleep-1-1.test
conformance-interfaces-clock_nanosleep-1-3.test
conformance-interfaces-clock_nanosleep-2-2.test
conformance-interfaces-mmap-13-1.test
conformance-interfaces-clock_getres-3-1.test

# mmap behaviour has changed versus what this is expecting
# It is expecting mmap to fail if MAP_PRIVATE nor MAP_SHARED is set
# Sadly it increments flags until MAP_SHARED_VALIDATE is set which allocates correctly
conformance-interfaces-mmap-21-1.test

# mmap behaviour has changed versus what this is expecting
# It wants EOVERFLOW but kernel now returns ENOMEM
conformance-interfaces-mmap-31-1.test

# Sending signals to the process group causes every test to become flakey
# Need to run these single threaded or change the program group prior to launch
# !!! DO NOT REENABLE THESE UNTIL WE HAVE THIS IN PLACE IN CI !!!
conformance-interfaces-killpg-1-1.test
conformance-interfaces-killpg-1-2.test
conformance-interfaces-killpg-2-1.test
conformance-interfaces-killpg-4-1.test
conformance-interfaces-killpg-5-1.test
conformance-interfaces-killpg-6-1.test
conformance-interfaces-killpg-8-1.test

# This test is flaky
# It puts the thread to sleep for 1 second and expects to wake up within 10ms of the timer
# If the kernel is doing other things then this 10ms time is too strict and fails periodically
conformance-interfaces-sigtimedwait-1-1.test

# We accidentally pass through signals to the guest that shouldn't be
conformance-interfaces-sigsuspend-1-1.test

# Received signal in handler even though it should be masked
conformance-interfaces-sigaction-25-1.test
conformance-interfaces-sigaction-25-2.test
conformance-interfaces-sigaction-25-3.test
conformance-interfaces-sigaction-25-4.test
conformance-interfaces-sigaction-25-5.test
conformance-interfaces-sigaction-25-6.test
conformance-interfaces-sigaction-25-7.test
conformance-interfaces-sigaction-25-8.test
conformance-interfaces-sigaction-25-9.test
conformance-interfaces-sigaction-25-10.test
conformance-interfaces-sigaction-25-11.test
conformance-interfaces-sigaction-25-12.test
conformance-interfaces-sigaction-25-13.test
conformance-interfaces-sigaction-25-14.test
conformance-interfaces-sigaction-25-15.test
conformance-interfaces-sigaction-25-16.test
conformance-interfaces-sigaction-25-17.test
conformance-interfaces-sigaction-25-18.test
conformance-interfaces-sigaction-25-19.test
conformance-interfaces-sigaction-25-20.test
conformance-interfaces-sigaction-25-21.test
conformance-interfaces-sigaction-25-22.test
conformance-interfaces-sigaction-25-23.test
conformance-interfaces-sigaction-25-24.test
conformance-interfaces-sigaction-25-25.test
conformance-interfaces-sigaction-25-26.test

# This test is flakey on the interpreter
conformance-behavior-WIFEXITED-1-3.test


================================================
FILE: unittests/POSIX/Expected_Output
================================================
conformance-behavior-timers-2-1.test 0
conformance-behavior-WIFEXITED-1-1.test 0
conformance-behavior-WIFEXITED-1-2.test 0
conformance-behavior-WIFEXITED-1-3.test 0
conformance-definitions-errno_h-3-2.test 0
conformance-definitions-errno_h-4-1.test 0
conformance-definitions-signal_h-13-1.test 0
conformance-interfaces-clock-1-1.test 0
conformance-interfaces-clock-2-1.test 0
conformance-interfaces-clock_getcpuclockid-1-1.test 0
conformance-interfaces-clock_getres-1-1.test 0
conformance-interfaces-clock_getres-3-1.test 0
conformance-interfaces-clock_getres-5-1.test 0
conformance-interfaces-clock_getres-6-1.test 0
conformance-interfaces-clock_getres-6-2.test 0
conformance-interfaces-clock_getres-7-1.test 0
conformance-interfaces-clock_getres-8-1.test 0
conformance-interfaces-clock_gettime-1-1.test 0
conformance-interfaces-clock_gettime-1-2.test 0
conformance-interfaces-clock_gettime-2-1.test 0
conformance-interfaces-clock_gettime-3-1.test 0
conformance-interfaces-clock_gettime-4-1.test 0
conformance-interfaces-clock_gettime-7-1.test 0
conformance-interfaces-clock_gettime-8-1.test 0
conformance-interfaces-clock_gettime-8-2.test 0
conformance-interfaces-clock_nanosleep-10-1.test 0
conformance-interfaces-clock_nanosleep-11-1.test 0
conformance-interfaces-clock_nanosleep-1-1.test 0
conformance-interfaces-clock_nanosleep-13-1.test 0
conformance-interfaces-clock_nanosleep-1-3.test 0
conformance-interfaces-clock_nanosleep-1-4.test 0
conformance-interfaces-clock_nanosleep-1-5.test 0
conformance-interfaces-clock_nanosleep-2-1.test 0
conformance-interfaces-clock_nanosleep-2-2.test 0
conformance-interfaces-clock_nanosleep-2-3.test 0
conformance-interfaces-clock_nanosleep-3-1.test 0
conformance-interfaces-clock_nanosleep-9-1.test 0
conformance-interfaces-clock_settime-17-1.test 0
conformance-interfaces-clock_settime-17-2.test 0
conformance-interfaces-clock_settime-20-1.test 0
conformance-interfaces-clock_settime-6-1.test 0
conformance-interfaces-ctime-1-1.test 0
conformance-interfaces-difftime-1-1.test 0
conformance-interfaces-fsync-4-1.test 0
conformance-interfaces-fsync-5-1.test 0
conformance-interfaces-fsync-7-1.test 0
conformance-interfaces-gmtime-1-1.test 0
conformance-interfaces-gmtime-2-1.test 0
conformance-interfaces-kill-1-1.test 0
conformance-interfaces-kill-1-2.test 0
conformance-interfaces-kill-2-1.test 0
conformance-interfaces-killpg-1-1.test 0
conformance-interfaces-killpg-1-2.test 0
conformance-interfaces-killpg-2-1.test 0
conformance-interfaces-killpg-4-1.test 0
conformance-interfaces-killpg-5-1.test 0
conformance-interfaces-killpg-6-1.test 0
conformance-interfaces-killpg-8-1.test 0
conformance-interfaces-localtime-1-1.test 0
conformance-interfaces-mktime-1-1.test 0
conformance-interfaces-mlock-10-1.test 0
conformance-interfaces-mlock-5-1.test 0
conformance-interfaces-mlock-8-1.test 0
conformance-interfaces-mlockall-13-1.test 0
conformance-interfaces-mlockall-13-2.test 0
conformance-interfaces-mlockall-8-1.test 0
conformance-interfaces-mmap-10-1.test 0
conformance-interfaces-mmap-11-1.test 0
conformance-interfaces-mmap-11-2.test 0
conformance-interfaces-mmap-1-1.test 0
conformance-interfaces-mmap-12-1.test 0
conformance-interfaces-mmap-13-1.test 0
conformance-interfaces-mmap-14-1.test 0
conformance-interfaces-mmap-19-1.test 0
conformance-interfaces-mmap-3-1.test 0
conformance-interfaces-mmap-5-1.test 0
conformance-interfaces-mmap-6-1.test 0
conformance-interfaces-mmap-6-2.test 0
conformance-interfaces-mmap-6-3.test 0
conformance-interfaces-mmap-6-4.test 0
conformance-interfaces-mmap-6-5.test 0
conformance-interfaces-mmap-6-6.test 0
conformance-interfaces-mmap-7-1.test 0
conformance-interfaces-mmap-7-2.test 0
conformance-interfaces-mmap-9-1.test 0
conformance-interfaces-munlock-10-1.test 0
conformance-interfaces-munlock-11-1.test 0
conformance-interfaces-munlock-7-1.test 0
conformance-interfaces-munlockall-5-1.test 0
conformance-interfaces-munmap-1-1.test 0
conformance-interfaces-munmap-1-2.test 0
conformance-interfaces-munmap-3-1.test 0
conformance-interfaces-munmap-4-1.test 0
conformance-interfaces-munmap-8-1.test 0
conformance-interfaces-munmap-9-1.test 0
conformance-interfaces-nanosleep-10000-1.test 0
conformance-interfaces-nanosleep-1-1.test 0
conformance-interfaces-nanosleep-1-2.test 0
conformance-interfaces-nanosleep-1-3.test 0
conformance-interfaces-nanosleep-2-1.test 0
conformance-interfaces-nanosleep-3-1.test 0
conformance-interfaces-nanosleep-3-2.test 0
conformance-interfaces-nanosleep-5-1.test 0
conformance-interfaces-nanosleep-5-2.test 0
conformance-interfaces-nanosleep-6-1.test 0
conformance-interfaces-nanosleep-7-1.test 0
conformance-interfaces-nanosleep-7-2.test 0
conformance-interfaces-pthread_atfork-1-1.test 0
conformance-interfaces-pthread_atfork-2-1.test 0
conformance-interfaces-pthread_attr_destroy-2-1.test 0
conformance-interfaces-pthread_attr_destroy-3-1.test 0
conformance-interfaces-pthread_attr_getdetachstate-1-1.test 0
conformance-interfaces-pthread_attr_getdetachstate-1-2.test 0
conformance-interfaces-pthread_attr_getinheritsched-1-1.test 0
conformance-interfaces-pthread_attr_init-1-1.test 0
conformance-interfaces-pthread_attr_init-4-1.test 0
conformance-interfaces-pthread_attr_setdetachstate-1-1.test 0
conformance-interfaces-pthread_attr_setdetachstate-1-2.test 0
conformance-interfaces-pthread_attr_setdetachstate-4-1.test 0
conformance-interfaces-pthread_attr_setinheritsched-1-1.test 0
conformance-interfaces-pthread_attr_setinheritsched-4-1.test 0
conformance-interfaces-pthread_attr_setschedparam-speculative-3-1.test 0
conformance-interfaces-pthread_attr_setschedparam-speculative-3-2.test 0
conformance-interfaces-pthread_condattr_destroy-1-1.test 0
conformance-interfaces-pthread_condattr_destroy-2-1.test 0
conformance-interfaces-pthread_condattr_destroy-3-1.test 0
conformance-interfaces-pthread_condattr_init-3-1.test 0
conformance-interfaces-pthread_cond_destroy-1-1.test 0
conformance-interfaces-pthread_cond_destroy-3-1.test 0
conformance-interfaces-pthread_cond_init-1-1.test 0
conformance-interfaces-pthread_cond_init-2-1.test 0
conformance-interfaces-pthread_cond_init-3-1.test 0
conformance-interfaces-pthread_mutex_destroy-2-1.test 0
conformance-interfaces-pthread_mutex_destroy-3-1.test 0
conformance-interfaces-pthread_mutex_destroy-5-1.test 0
conformance-interfaces-pthread_mutex_destroy-speculative-4-2.test 0
conformance-interfaces-pthread_mutex_init-2-1.test 0
conformance-interfaces-pthread_mutex_init-3-1.test 0
conformance-interfaces-pthread_mutex_lock-2-1.test 0
conformance-interfaces-pthread_mutex_unlock-3-1.test 0
conformance-interfaces-raise-10000-1.test 0
conformance-interfaces-raise-1-1.test 0
conformance-interfaces-raise-1-2.test 0
conformance-interfaces-raise-2-1.test 0
conformance-interfaces-raise-4-1.test 0
conformance-interfaces-raise-6-1.test 0
conformance-interfaces-raise-7-1.test 0
conformance-interfaces-sched_getparam-1-1.test 0
conformance-interfaces-sched_getparam-2-1.test 0
conformance-interfaces-sched_getparam-3-1.test 0
conformance-interfaces-sched_getparam-4-1.test 0
conformance-interfaces-sched_getparam-speculative-7-1.test 0
conformance-interfaces-sched_get_priority_max-1-1.test 0
conformance-interfaces-sched_get_priority_max-1-2.test 0
conformance-interfaces-sched_get_priority_max-1-4.test 0
conformance-interfaces-sched_get_priority_max-2-1.test 0
conformance-interfaces-sched_get_priority_min-1-1.test 0
conformance-interfaces-sched_get_priority_min-1-2.test 0
conformance-interfaces-sched_get_priority_min-1-4.test 0
conformance-interfaces-sched_get_priority_min-2-1.test 0
conformance-interfaces-sched_getscheduler-1-1.test 0
conformance-interfaces-sched_getscheduler-3-1.test 0
conformance-interfaces-sched_getscheduler-4-1.test 0
conformance-interfaces-sched_getscheduler-5-1.test 0
conformance-interfaces-sched_rr_get_interval-1-1.test 0
conformance-interfaces-sched_rr_get_interval-2-1.test 0
conformance-interfaces-sched_rr_get_interval-3-1.test 0
conformance-interfaces-sched_rr_get_interval-speculative-5-1.test 0
conformance-interfaces-sched_setparam-1-1.test 0
conformance-interfaces-sched_setparam-22-1.test 0
conformance-interfaces-sched_setparam-23-1.test 0
conformance-interfaces-sched_setparam-23-7.test 0
conformance-interfaces-sched_setparam-25-1.test 0
conformance-interfaces-sched_setparam-26-1.test 0
conformance-interfaces-sched_setparam-27-1.test 0
conformance-interfaces-sched_setparam-5-1.test 0
conformance-interfaces-sched_setscheduler-17-1.test 0
conformance-interfaces-sched_setscheduler-17-6.test 0
conformance-interfaces-sched_setscheduler-17-7.test 0
conformance-interfaces-sched_setscheduler-19-1.test 0
conformance-interfaces-sched_setscheduler-19-5.test 0
conformance-interfaces-sched_setscheduler-20-1.test 0
conformance-interfaces-sched_setscheduler-21-1.test 0
conformance-interfaces-sched_yield-2-1.test 0
conformance-interfaces-sem_init-6-1.test 0
conformance-interfaces-sem_open-5-1.test 0
conformance-interfaces-sigaction-1-10.test 0
conformance-interfaces-sigaction-1-11.test 0
conformance-interfaces-sigaction-1-12.test 0
conformance-interfaces-sigaction-1-13.test 0
conformance-interfaces-sigaction-1-14.test 0
conformance-interfaces-sigaction-1-15.test 0
conformance-interfaces-sigaction-1-16.test 0
conformance-interfaces-sigaction-1-17.test 0
conformance-interfaces-sigaction-1-18.test 0
conformance-interfaces-sigaction-1-19.test 0
conformance-interfaces-sigaction-1-1.test 0
conformance-interfaces-sigaction-1-20.test 0
conformance-interfaces-sigaction-12-10.test 0
conformance-interfaces-sigaction-12-11.test 0
conformance-interfaces-sigaction-12-12.test 0
conformance-interfaces-sigaction-12-13.test 0
conformance-interfaces-sigaction-12-14.test 0
conformance-interfaces-sigaction-12-15.test 0
conformance-interfaces-sigaction-12-16.test 0
conformance-interfaces-sigaction-12-17.test 0
conformance-interfaces-sigaction-12-18.test 0
conformance-interfaces-sigaction-12-19.test 0
conformance-interfaces-sigaction-1-21.test 0
conformance-interfaces-sigaction-12-1.test 0
conformance-interfaces-sigaction-12-20.test 0
conformance-interfaces-sigaction-12-21.test 0
conformance-interfaces-sigaction-12-22.test 0
conformance-interfaces-sigaction-12-23.test 0
conformance-interfaces-sigaction-12-24.test 0
conformance-interfaces-sigaction-12-25.test 0
conformance-interfaces-sigaction-12-26.test 0
conformance-interfaces-sigaction-12-27.test 0
conformance-interfaces-sigaction-12-28.test 0
conformance-interfaces-sigaction-12-29.test 0
conformance-interfaces-sigaction-1-22.test 0
conformance-interfaces-sigaction-12-2.test 0
conformance-interfaces-sigaction-12-30.test 0
conformance-interfaces-sigaction-12-31.test 0
conformance-interfaces-sigaction-12-32.test 0
conformance-interfaces-sigaction-12-33.test 0
conformance-interfaces-sigaction-12-34.test 0
conformance-interfaces-sigaction-12-35.test 0
conformance-interfaces-sigaction-12-36.test 0
conformance-interfaces-sigaction-12-37.test 0
conformance-interfaces-sigaction-12-38.test 0
conformance-interfaces-sigaction-12-39.test 0
conformance-interfaces-sigaction-1-23.test 0
conformance-interfaces-sigaction-12-3.test 0
conformance-interfaces-sigaction-12-40.test 0
conformance-interfaces-sigaction-12-41.test 0
conformance-interfaces-sigaction-12-42.test 0
conformance-interfaces-sigaction-12-43.test 0
conformance-interfaces-sigaction-12-44.test 0
conformance-interfaces-sigaction-12-45.test 0
conformance-interfaces-sigaction-12-46.test 0
conformance-interfaces-sigaction-12-47.test 0
conformance-interfaces-sigaction-12-48.test 0
conformance-interfaces-sigaction-12-49.test 0
conformance-interfaces-sigaction-1-24.test 0
conformance-interfaces-sigaction-12-4.test 0
conformance-interfaces-sigaction-12-50.test 0
conformance-interfaces-sigaction-12-51.test 0
conformance-interfaces-sigaction-12-52.test 0
conformance-interfaces-sigaction-1-25.test 0
conformance-interfaces-sigaction-12-5.test 0
conformance-interfaces-sigaction-1-26.test 0
conformance-interfaces-sigaction-12-6.test 0
conformance-interfaces-sigaction-12-7.test 0
conformance-interfaces-sigaction-12-8.test 0
conformance-interfaces-sigaction-12-9.test 0
conformance-interfaces-sigaction-1-2.test 0
conformance-interfaces-sigaction-13-10.test 0
conformance-interfaces-sigaction-13-11.test 0
conformance-interfaces-sigaction-13-12.test 0
conformance-interfaces-sigaction-13-13.test 0
conformance-interfaces-sigaction-13-14.test 0
conformance-interfaces-sigaction-13-15.test 0
conformance-interfaces-sigaction-13-16.test 0
conformance-interfaces-sigaction-13-17.test 0
conformance-interfaces-sigaction-13-18.test 0
conformance-interfaces-sigaction-13-19.test 0
conformance-interfaces-sigaction-13-1.test 0
conformance-interfaces-sigaction-13-20.test 0
conformance-interfaces-sigaction-13-21.test 0
conformance-interfaces-sigaction-13-22.test 0
conformance-interfaces-sigaction-13-23.test 0
conformance-interfaces-sigaction-13-24.test 0
conformance-interfaces-sigaction-13-25.test 0
conformance-interfaces-sigaction-13-26.test 0
conformance-interfaces-sigaction-13-2.test 0
conformance-interfaces-sigaction-13-3.test 0
conformance-interfaces-sigaction-13-4.test 0
conformance-interfaces-sigaction-13-5.test 0
conformance-interfaces-sigaction-13-6.test 0
conformance-interfaces-sigaction-13-7.test 0
conformance-interfaces-sigaction-13-8.test 0
conformance-interfaces-sigaction-13-9.test 0
conformance-interfaces-sigaction-1-3.test 0
conformance-interfaces-sigaction-1-4.test 0
conformance-interfaces-sigaction-1-5.test 0
conformance-interfaces-sigaction-1-6.test 0
conformance-interfaces-sigaction-17-10.test 0
conformance-interfaces-sigaction-17-11.test 0
conformance-interfaces-sigaction-17-12.test 0
conformance-interfaces-sigaction-17-13.test 0
conformance-interfaces-sigaction-17-14.test 0
conformance-interfaces-sigaction-17-15.test 0
conformance-interfaces-sigaction-17-16.test 0
conformance-interfaces-sigaction-17-17.test 0
conformance-interfaces-sigaction-17-18.test 0
conformance-interfaces-sigaction-17-19.test 0
conformance-interfaces-sigaction-17-1.test 0
conformance-interfaces-sigaction-17-20.test 0
conformance-interfaces-sigaction-17-21.test 0
conformance-interfaces-sigaction-17-22.test 0
conformance-interfaces-sigaction-17-23.test 0
conformance-interfaces-sigaction-17-24.test 0
conformance-interfaces-sigaction-17-25.test 0
conformance-interfaces-sigaction-17-26.test 0
conformance-interfaces-sigaction-17-2.test 0
conformance-interfaces-sigaction-17-3.test 0
conformance-interfaces-sigaction-17-4.test 0
conformance-interfaces-sigaction-17-5.test 0
conformance-interfaces-sigaction-17-6.test 0
conformance-interfaces-sigaction-17-7.test 0
conformance-interfaces-sigaction-17-8.test 0
conformance-interfaces-sigaction-17-9.test 0
conformance-interfaces-sigaction-1-7.test 0
conformance-interfaces-sigaction-1-8.test 0
conformance-interfaces-sigaction-1-9.test 0
conformance-interfaces-sigaction-2-10.test 0
conformance-interfaces-sigaction-2-11.test 0
conformance-interfaces-sigaction-21-1.test 0
conformance-interfaces-sigaction-2-12.test 0
conformance-interfaces-sigaction-2-13.test 0
conformance-interfaces-sigaction-2-14.test 0
conformance-interfaces-sigaction-2-15.test 0
conformance-interfaces-sigaction-2-16.test 0
conformance-interfaces-sigaction-2-17.test 0
conformance-interfaces-sigaction-2-18.test 0
conformance-interfaces-sigaction-2-19.test 0
conformance-interfaces-sigaction-2-1.test 0
conformance-interfaces-sigaction-2-20.test 0
conformance-interfaces-sigaction-22-10.test 0
conformance-interfaces-sigaction-22-11.test 0
conformance-interfaces-sigaction-22-12.test 0
conformance-interfaces-sigaction-22-13.test 0
conformance-interfaces-sigaction-22-14.test 0
conformance-interfaces-sigaction-22-15.test 0
conformance-interfaces-sigaction-22-16.test 0
conformance-interfaces-sigaction-22-17.test 0
conformance-interfaces-sigaction-22-18.test 0
conformance-interfaces-sigaction-22-19.test 0
conformance-interfaces-sigaction-2-21.test 0
conformance-interfaces-sigaction-22-1.test 0
conformance-interfaces-sigaction-22-20.test 0
conformance-interfaces-sigaction-22-21.test 0
conformance-interfaces-sigaction-22-22.test 0
conformance-interfaces-sigaction-22-23.test 0
conformance-interfaces-sigaction-22-24.test 0
conformance-interfaces-sigaction-22-25.test 0
conformance-interfaces-sigaction-22-26.test 0
conformance-interfaces-sigaction-2-22.test 0
conformance-interfaces-sigaction-22-2.test 0
conformance-interfaces-sigaction-2-23.test 0
conformance-interfaces-sigaction-22-3.test 0
conformance-interfaces-sigaction-2-24.test 0
conformance-interfaces-sigaction-22-4.test 0
conformance-interfaces-sigaction-2-25.test 0
conformance-interfaces-sigaction-22-5.test 0
conformance-interfaces-sigaction-2-26.test 0
conformance-interfaces-sigaction-22-6.test 0
conformance-interfaces-sigaction-22-7.test 0
conformance-interfaces-sigaction-22-8.test 0
conformance-interfaces-sigaction-22-9.test 0
conformance-interfaces-sigaction-2-2.test 0
conformance-interfaces-sigaction-2-3.test 0
conformance-interfaces-sigaction-2-4.test 0
conformance-interfaces-sigaction-25-10.test 0
conformance-interfaces-sigaction-25-11.test 0
conformance-interfaces-sigaction-25-12.test 0
conformance-interfaces-sigaction-25-13.test 0
conformance-interfaces-sigaction-25-14.test 0
conformance-interfaces-sigaction-25-15.test 0
conformance-interfaces-sigaction-25-16.test 0
conformance-interfaces-sigaction-25-17.test 0
conformance-interfaces-sigaction-25-18.test 0
conformance-interfaces-sigaction-25-19.test 0
conformance-interfaces-sigaction-25-1.test 0
conformance-interfaces-sigaction-25-20.test 0
conformance-interfaces-sigaction-25-21.test 0
conformance-interfaces-sigaction-25-22.test 0
conformance-interfaces-sigaction-25-23.test 0
conformance-interfaces-sigaction-25-24.test 0
conformance-interfaces-sigaction-25-25.test 0
conformance-interfaces-sigaction-25-26.test 0
conformance-interfaces-sigaction-25-2.test 0
conformance-interfaces-sigaction-25-3.test 0
conformance-interfaces-sigaction-25-4.test 0
conformance-interfaces-sigaction-25-5.test 0
conformance-interfaces-sigaction-25-6.test 0
conformance-interfaces-sigaction-25-7.test 0
conformance-interfaces-sigaction-25-8.test 0
conformance-interfaces-sigaction-25-9.test 0
conformance-interfaces-sigaction-2-5.test 0
conformance-interfaces-sigaction-2-6.test 0
conformance-interfaces-sigaction-2-7.test 0
conformance-interfaces-sigaction-2-8.test 0
conformance-interfaces-sigaction-2-9.test 0
conformance-interfaces-sigaction-3-10.test 0
conformance-interfaces-sigaction-3-11.test 0
conformance-interfaces-sigaction-3-12.test 0
conformance-interfaces-sigaction-3-13.test 0
conformance-interfaces-sigaction-3-14.test 0
conformance-interfaces-sigaction-3-15.test 0
conformance-interfaces-sigaction-3-16.test 0
conformance-interfaces-sigaction-3-17.test 0
conformance-interfaces-sigaction-3-18.test 0
conformance-interfaces-sigaction-3-19.test 0
conformance-interfaces-sigaction-3-1.test 0
conformance-interfaces-sigaction-3-20.test 0
conformance-interfaces-sigaction-3-21.test 0
conformance-interfaces-sigaction-3-22.test 0
conformance-interfaces-sigaction-3-23.test 0
conformance-interfaces-sigaction-3-24.test 0
conformance-interfaces-sigaction-3-25.test 0
conformance-interfaces-sigaction-3-26.test 0
conformance-interfaces-sigaction-3-2.test 0
conformance-interfaces-sigaction-3-3.test 0
conformance-interfaces-sigaction-3-4.test 0
conformance-interfaces-sigaction-3-5.test 0
conformance-interfaces-sigaction-3-6.test 0
conformance-interfaces-sigaction-3-7.test 0
conformance-interfaces-sigaction-3-8.test 0
conformance-interfaces-sigaction-3-9.test 0
conformance-interfaces-sigaction-4-100.test 0
conformance-interfaces-sigaction-4-101.test 0
conformance-interfaces-sigaction-4-102.test 0
conformance-interfaces-sigaction-4-103.test 0
conformance-interfaces-sigaction-4-104.test 0
conformance-interfaces-sigaction-4-10.test 0
conformance-interfaces-sigaction-4-11.test 0
conformance-interfaces-sigaction-4-12.test 0
conformance-interfaces-sigaction-4-13.test 0
conformance-interfaces-sigaction-4-14.test 0
conformance-interfaces-sigaction-4-15.test 0
conformance-interfaces-sigaction-4-16.test 0
conformance-interfaces-sigaction-4-17.test 0
conformance-interfaces-sigaction-4-18.test 0
conformance-interfaces-sigaction-4-19.test 0
conformance-interfaces-sigaction-4-1.test 0
conformance-interfaces-sigaction-4-20.test 0
conformance-interfaces-sigaction-4-21.test 0
conformance-interfaces-sigaction-4-22.test 0
conformance-interfaces-sigaction-4-23.test 0
conformance-interfaces-sigaction-4-24.test 0
conformance-interfaces-sigaction-4-25.test 0
conformance-interfaces-sigaction-4-26.test 0
conformance-interfaces-sigaction-4-27.test 0
conformance-interfaces-sigaction-4-28.test 0
conformance-interfaces-sigaction-4-29.test 0
conformance-interfaces-sigaction-4-2.test 0
conformance-interfaces-sigaction-4-30.test 0
conformance-interfaces-sigaction-4-31.test 0
conformance-interfaces-sigaction-4-32.test 0
conformance-interfaces-sigaction-4-33.test 0
conformance-interfaces-sigaction-4-34.test 0
conformance-interfaces-sigaction-4-35.test 0
conformance-interfaces-sigaction-4-36.test 0
conformance-interfaces-sigaction-4-37.test 0
conformance-interfaces-sigaction-4-38.test 0
conformance-interfaces-sigaction-4-39.test 0
conformance-interfaces-sigaction-4-3.test 0
conformance-interfaces-sigaction-4-40.test 0
conformance-interfaces-sigaction-4-41.test 0
conformance-interfaces-sigaction-4-42.test 0
conformance-interfaces-sigaction-4-43.test 0
conformance-interfaces-sigaction-4-44.test 0
conformance-interfaces-sigaction-4-45.test 0
conformance-interfaces-sigaction-4-46.test 0
conformance-interfaces-sigaction-4-47.test 0
conformance-interfaces-sigaction-4-48.test 0
conformance-interfaces-sigaction-4-49.test 0
conformance-interfaces-sigaction-4-4.test 0
conformance-interfaces-sigaction-4-50.test 0
conformance-interfaces-sigaction-4-51.test 0
conformance-interfaces-sigaction-4-52.test 0
conformance-interfaces-sigaction-4-53.test 0
conformance-interfaces-sigaction-4-54.test 0
conformance-interfaces-sigaction-4-55.test 0
conformance-interfaces-sigaction-4-56.test 0
conformance-interfaces-sigaction-4-57.test 0
conformance-interfaces-sigaction-4-58.test 0
conformance-interfaces-sigaction-4-59.test 0
conformance-interfaces-sigaction-4-5.test 0
conformance-interfaces-sigaction-4-60.test 0
conformance-interfaces-sigaction-4-61.test 0
conformance-interfaces-sigaction-4-62.test 0
conformance-interfaces-sigaction-4-63.test 0
conformance-interfaces-sigaction-4-64.test 0
conformance-interfaces-sigaction-4-65.test 0
conformance-interfaces-sigaction-4-66.test 0
conformance-interfaces-sigaction-4-67.test 0
conformance-interfaces-sigaction-4-68.test 0
conformance-interfaces-sigaction-4-69.test 0
conformance-interfaces-sigaction-4-6.test 0
conformance-interfaces-sigaction-4-70.test 0
conformance-interfaces-sigaction-4-71.test 0
conformance-interfaces-sigaction-4-72.test 0
conformance-interfaces-sigaction-4-73.test 0
conformance-interfaces-sigaction-4-74.test 0
conformance-interfaces-sigaction-4-75.test 0
conformance-interfaces-sigaction-4-76.test 0
conformance-interfaces-sigaction-4-77.test 0
conformance-interfaces-sigaction-4-78.test 0
conformance-interfaces-sigaction-4-79.test 0
conformance-interfaces-sigaction-4-7.test 0
conformance-interfaces-sigaction-4-80.test 0
conformance-interfaces-sigaction-4-81.test 0
conformance-interfaces-sigaction-4-82.test 0
conformance-interfaces-sigaction-4-83.test 0
conformance-interfaces-sigaction-4-84.test 0
conformance-interfaces-sigaction-4-85.test 0
conformance-interfaces-sigaction-4-86.test 0
conformance-interfaces-sigaction-4-87.test 0
conformance-interfaces-sigaction-4-88.test 0
conformance-interfaces-sigaction-4-89.test 0
conformance-interfaces-sigaction-4-8.test 0
conformance-interfaces-sigaction-4-90.test 0
conformance-interfaces-sigaction-4-91.test 0
conformance-interfaces-sigaction-4-92.test 0
conformance-interfaces-sigaction-4-93.test 0
conformance-interfaces-sigaction-4-94.test 0
conformance-interfaces-sigaction-4-95.test 0
conformance-interfaces-sigaction-4-96.test 0
conformance-interfaces-sigaction-4-97.test 0
conformance-interfaces-sigaction-4-98.test 0
conformance-interfaces-sigaction-4-99.test 0
conformance-interfaces-sigaction-4-9.test 0
conformance-interfaces-sigaction-6-10.test 0
conformance-interfaces-sigaction-6-11.test 0
conformance-interfaces-sigaction-6-12.test 0
conformance-interfaces-sigaction-6-13.test 0
conformance-interfaces-sigaction-6-14.test 0
conformance-interfaces-sigaction-6-15.test 0
conformance-interfaces-sigaction-6-16.test 0
conformance-interfaces-sigaction-6-17.test 0
conformance-interfaces-sigaction-6-18.test 0
conformance-interfaces-sigaction-6-19.test 0
conformance-interfaces-sigaction-6-1.test 0
conformance-interfaces-sigaction-6-20.test 0
conformance-interfaces-sigaction-6-21.test 0
conformance-interfaces-sigaction-6-22.test 0
conformance-interfaces-sigaction-6-23.test 0
conformance-interfaces-sigaction-6-24.test 0
conformance-interfaces-sigaction-6-25.test 0
conformance-interfaces-sigaction-6-26.test 0
conformance-interfaces-sigaction-6-2.test 0
conformance-interfaces-sigaction-6-3.test 0
conformance-interfaces-sigaction-6-4.test 0
conformance-interfaces-sigaction-6-5.test 0
conformance-interfaces-sigaction-6-6.test 0
conformance-interfaces-sigaction-6-7.test 0
conformance-interfaces-sigaction-6-8.test 0
conformance-interfaces-sigaction-6-9.test 0
conformance-interfaces-sigaction-8-10.test 0
conformance-interfaces-sigaction-8-11.test 0
conformance-interfaces-sigaction-8-12.test 0
conformance-interfaces-sigaction-8-13.test 0
conformance-interfaces-sigaction-8-14.test 0
conformance-interfaces-sigaction-8-15.test 0
conformance-interfaces-sigaction-8-16.test 0
conformance-interfaces-sigaction-8-17.test 0
conformance-interfaces-sigaction-8-18.test 0
conformance-interfaces-sigaction-8-19.test 0
conformance-interfaces-sigaction-8-1.test 0
conformance-interfaces-sigaction-8-20.test 0
conformance-interfaces-sigaction-8-21.test 0
conformance-interfaces-sigaction-8-22.test 0
conformance-interfaces-sigaction-8-23.test 0
conformance-interfaces-sigaction-8-24.test 0
conformance-interfaces-sigaction-8-25.test 0
conformance-interfaces-sigaction-8-26.test 0
conformance-interfaces-sigaction-8-2.test 0
conformance-interfaces-sigaction-8-3.test 0
conformance-interfaces-sigaction-8-4.test 0
conformance-interfaces-sigaction-8-5.test 0
conformance-interfaces-sigaction-8-6.test 0
conformance-interfaces-sigaction-8-7.test 0
conformance-interfaces-sigaction-8-8.test 0
conformance-interfaces-sigaction-8-9.test 0
conformance-interfaces-sigaddset-1-3.test 0
conformance-interfaces-sigaddset-2-1.test 0
conformance-interfaces-sigaltstack-10-1.test 0
conformance-interfaces-sigaltstack-11-1.test 0
conformance-interfaces-sigaltstack-1-1.test 0
conformance-interfaces-sigaltstack-12-1.test 0
conformance-interfaces-sigaltstack-2-1.test 0
conformance-interfaces-sigaltstack-3-1.test 0
conformance-interfaces-sigaltstack-5-1.test 0
conformance-interfaces-sigaltstack-6-1.test 0
conformance-interfaces-sigaltstack-7-1.test 0
conformance-interfaces-sigaltstack-8-1.test 0
conformance-interfaces-sigaltstack-9-1.test 0
conformance-interfaces-sigaltstack-9-buildonly.test 0
conformance-interfaces-sigdelset-1-3.test 0
conformance-interfaces-sigdelset-1-4.test 0
conformance-interfaces-sigdelset-2-1.test 0
conformance-interfaces-sigemptyset-1-1.test 0
conformance-interfaces-sigemptyset-2-1.test 0
conformance-interfaces-sigfillset-1-1.test 0
conformance-interfaces-sigfillset-2-1.test 0
conformance-interfaces-sighold-1-1.test 0
conformance-interfaces-sighold-2-1.test 0
conformance-interfaces-sigignore-1-1.test 0
conformance-interfaces-sigignore-4-1.test 0
conformance-interfaces-sigignore-6-1.test 0
conformance-interfaces-sigignore-6-2.test 0
conformance-interfaces-sigismember-3-1.test 0
conformance-interfaces-sigismember-4-1.test 0
conformance-interfaces-signal-1-1.test 0
conformance-interfaces-signal-2-1.test 0
conformance-interfaces-signal-3-1.test 0
conformance-interfaces-signal-5-1.test 0
conformance-interfaces-signal-6-1.test 0
conformance-interfaces-signal-7-1.test 0
conformance-interfaces-sigpause-4-1.test 0
conformance-interfaces-sigpending-1-1.test 0
conformance-interfaces-sigpending-1-2.test 0
conformance-interfaces-sigpending-1-3.test 0
conformance-interfaces-sigpending-2-1.test 0
conformance-interfaces-sigprocmask-10-1.test 0
conformance-interfaces-sigprocmask-12-1.test 0
conformance-interfaces-sigprocmask-15-1.test 0
conformance-interfaces-sigprocmask-4-1.test 0
conformance-interfaces-sigprocmask-5-1.test 0
conformance-interfaces-sigprocmask-6-1.test 0
conformance-interfaces-sigprocmask-7-1.test 0
conformance-interfaces-sigprocmask-8-1.test 0
conformance-interfaces-sigprocmask-8-2.test 0
conformance-interfaces-sigprocmask-8-3.test 0
conformance-interfaces-sigprocmask-9-1.test 0
conformance-interfaces-sigqueue-10-1.test 0
conformance-interfaces-sigqueue-11-1.test 0
conformance-interfaces-sigqueue-1-1.test 0
conformance-interfaces-sigqueue-12-1.test 0
conformance-interfaces-sigqueue-2-1.test 0
conformance-interfaces-sigqueue-2-2.test 0
conformance-interfaces-sigqueue-3-1.test 0
conformance-interfaces-sigqueue-4-1.test 0
conformance-interfaces-sigqueue-5-1.test 0
conformance-interfaces-sigqueue-6-1.test 0
conformance-interfaces-sigqueue-7-1.test 0
conformance-interfaces-sigqueue-8-1.test 0
conformance-interfaces-sigrelse-1-1.test 0
conformance-interfaces-sigrelse-2-1.test 0
conformance-interfaces-sigset-10-1.test 0
conformance-interfaces-sigset-1-1.test 0
conformance-interfaces-sigset-2-1.test 0
conformance-interfaces-sigset-3-1.test 0
conformance-interfaces-sigset-4-1.test 0
conformance-interfaces-sigset-5-1.test 0
conformance-interfaces-sigset-9-1.test 0
conformance-interfaces-sigsuspend-1-1.test 0
conformance-interfaces-sigsuspend-3-1.test 0
conformance-interfaces-sigsuspend-4-1.test 0
conformance-interfaces-sigsuspend-6-1.test 0
conformance-interfaces-sigtimedwait-1-1.test 0
conformance-interfaces-sigtimedwait-2-1.test 0
conformance-interfaces-sigtimedwait-4-1.test 0
conformance-interfaces-sigtimedwait-5-1.test 0
conformance-interfaces-sigtimedwait-6-1.test 0
conformance-interfaces-sigwait-1-1.test 0
conformance-interfaces-sigwait-2-1.test 0
conformance-interfaces-sigwait-3-1.test 0
conformance-interfaces-sigwait-4-1.test 0
conformance-interfaces-sigwait-8-1.test 0
conformance-interfaces-sigwaitinfo-1-1.test 0
conformance-interfaces-sigwaitinfo-2-1.test 0
conformance-interfaces-sigwaitinfo-3-1.test 0
conformance-interfaces-sigwaitinfo-5-1.test 0
conformance-interfaces-sigwaitinfo-6-1.test 0
conformance-interfaces-sigwaitinfo-7-1.test 0
conformance-interfaces-sigwaitinfo-8-1.test 0
conformance-interfaces-sigwaitinfo-9-1.test 0
conformance-interfaces-strftime-2-1.test 0
conformance-interfaces-strftime-3-1.test 0
conformance-interfaces-time-1-1.test 0
conformance-interfaces-timer_create-speculative-15-1.test 0

================================================
FILE: unittests/POSIX/Flake_Tests
================================================
# CPU scheduling can cause these tests to take > 10ms for their error margin.
conformance-interfaces-sigtimedwait-1-1.test
conformance-interfaces-sigtimedwait-2-1.test


================================================
FILE: unittests/POSIX/Known_Failures
================================================
# these are disabled
# These tests are inconsistent
conformance-interfaces-mmap-12-1.test
conformance-interfaces-mmap-6-1.test
conformance-interfaces-mmap-6-2.test
conformance-interfaces-mmap-6-3.test

# These tests take too long to run, and might timeout
conformance-interfaces-clock-1-1.test
conformance-interfaces-clock_gettime-4-1.test
conformance-interfaces-clock_getcpuclockid-1-1.test

# These tests fail when run natively on x86-64 host
conformance-interfaces-clock_getcpuclockid-2-1.test
conformance-interfaces-mlock-speculative-12-1.test
conformance-interfaces-mmap-11-4.test
conformance-interfaces-mmap-23-1.test
conformance-interfaces-pthread_attr_getschedpolicy-2-1.test
conformance-interfaces-pthread_attr_getscope-1-1.test
conformance-interfaces-pthread_attr_setschedpolicy-4-1.test
conformance-interfaces-pthread_attr_setscope-4-1.test
conformance-interfaces-sched_getparam-6-1.test
conformance-interfaces-sched_getscheduler-7-1.test
conformance-interfaces-sigset-8-1.test
conformance-interfaces-strftime-1-1.test
src-conformance-interfaces-pthread_attr_getschedpolicy-2-1.test
conformance-interfaces-munmap-2-1.test
conformance-interfaces-mlockall-3-7.test
conformance-interfaces-mlockall-speculative-15-1.test
conformance-interfaces-pthread_attr_getschedparam-1-1.test
conformance-interfaces-pthread_attr_setschedpolicy-speculative-5-1.test
conformance-interfaces-sched_setparam-10-1.test
conformance-interfaces-sched_setparam-23-6.test
conformance-interfaces-sched_setparam-9-1.test
conformance-interfaces-sched_setscheduler-1-1.test
conformance-interfaces-sched_setscheduler-16-1.test
conformance-interfaces-sched_setscheduler-17-5.test
conformance-interfaces-sched_setscheduler-4-1.test
conformance-interfaces-sigaddset-1-core-buildonly.test
conformance-interfaces-sigaddset-4-core-buildonly.test
conformance-interfaces-sigdelset-1-core-buildonly.test
conformance-interfaces-sigdelset-4-core-buildonly.test
conformance-interfaces-sighold-3-core-buildonly.test
conformance-interfaces-sigignore-5-core-buildonly.test
conformance-interfaces-sigismember-5-core-buildonly.test
conformance-interfaces-sigqueue-9-1.test
conformance-interfaces-sigrelse-3-core-buildonly.test
conformance-interfaces-sigset-6-1.test
conformance-interfaces-sigset-7-1.test
conformance-interfaces-aio_cancel-10-1.test
conformance-interfaces-aio_cancel-1-1.test
conformance-interfaces-aio_cancel-2-1.test
conformance-interfaces-aio_cancel-2-2.test
conformance-interfaces-aio_cancel-4-1.test
conformance-interfaces-aio_cancel-5-1.test
conformance-interfaces-aio_cancel-6-1.test
conformance-interfaces-aio_cancel-7-1.test
conformance-interfaces-aio_cancel-8-1.test
conformance-interfaces-aio_cancel-9-1.test
conformance-interfaces-aio_error-1-1.test
conformance-interfaces-aio_error-2-1.test
conformance-interfaces-aio_error-3-1.test
conformance-interfaces-aio_fsync-12-1.test
conformance-interfaces-aio_fsync-14-1.test
conformance-interfaces-aio_fsync-2-1.test
conformance-interfaces-aio_fsync-3-1.test
conformance-interfaces-aio_fsync-4-1.test
conformance-interfaces-aio_fsync-4-2.test
conformance-interfaces-aio_fsync-5-1.test
conformance-interfaces-aio_fsync-8-1.test
conformance-interfaces-aio_fsync-8-2.test
conformance-interfaces-aio_fsync-8-3.test
conformance-interfaces-aio_fsync-8-4.test
conformance-interfaces-aio_fsync-9-1.test
conformance-interfaces-aio_read-10-1.test
conformance-interfaces-aio_read-11-1.test
conformance-interfaces-aio_read-11-2.test
conformance-interfaces-aio_read-2-1.test
conformance-interfaces-aio_read-3-1.test
conformance-interfaces-aio_read-3-2.test
conformance-interfaces-aio_read-4-1.test
conformance-interfaces-aio_read-5-1.test
conformance-interfaces-aio_read-7-1.test
conformance-interfaces-aio_read-8-1.test
conformance-interfaces-aio_return-1-1.test
conformance-interfaces-aio_return-3-1.test
conformance-interfaces-aio_return-3-2.test
conformance-interfaces-aio_suspend-3-1.test
conformance-interfaces-aio_suspend-5-1.test
conformance-interfaces-aio_write-1-1.test
conformance-interfaces-aio_write-1-2.test
conformance-interfaces-aio_write-2-1.test
conformance-interfaces-aio_write-3-1.test
conformance-interfaces-aio_write-5-1.test
conformance-interfaces-aio_write-6-1.test
conformance-interfaces-aio_write-8-1.test
conformance-interfaces-aio_write-8-2.test
conformance-interfaces-aio_write-9-1.test
conformance-interfaces-aio_write-9-2.test
conformance-interfaces-lio_listio-5-1.test
conformance-interfaces-lio_listio-6-1.test
conformance-interfaces-lio_listio-8-1.test
conformance-interfaces-lio_listio-9-1.test
conformance-interfaces-pthread_mutex_init-speculative-5-2.test
conformance-interfaces-sched_get_priority_max-1-3.test
conformance-interfaces-sched_get_priority_min-1-3.test
conformance-interfaces-sched_setparam-23-2.test
conformance-interfaces-sched_setparam-23-3.test
conformance-interfaces-sched_setparam-23-4.test
conformance-interfaces-sched_setparam-23-5.test
conformance-interfaces-sched_setparam-25-2.test
conformance-interfaces-sched_setscheduler-17-2.test
conformance-interfaces-sched_setscheduler-17-3.test
conformance-interfaces-sched_setscheduler-17-4.test
conformance-interfaces-sched_setscheduler-19-2.test
conformance-interfaces-sched_setscheduler-19-3.test
conformance-interfaces-sched_setscheduler-19-4.test
conformance-definitions-mqueue_h-10-1.test
conformance-definitions-mqueue_h-11-1.test
conformance-definitions-mqueue_h-1-1.test
conformance-definitions-mqueue_h-2-1.test
conformance-definitions-mqueue_h-3-1.test
conformance-definitions-mqueue_h-4-1.test
conformance-definitions-mqueue_h-5-1.test
conformance-definitions-mqueue_h-6-1.test
conformance-definitions-mqueue_h-7-1.test
conformance-definitions-mqueue_h-8-1.test
conformance-definitions-mqueue_h-9-1.test
conformance-interfaces-clock_settime-1-1.test
conformance-interfaces-clock_settime-19-1.test
conformance-interfaces-clock_settime-7-1.test
conformance-interfaces-clock_settime-7-2.test
conformance-interfaces-clock_settime-8-1.test
conformance-interfaces-mq_close-5-1.test
conformance-interfaces-mq_open-10-1.test
conformance-interfaces-mq_open-14-1.test
conformance-interfaces-mq_open-17-1.test
conformance-interfaces-mq_open-22-1.test
conformance-interfaces-mq_open-24-1.test
conformance-interfaces-mq_open-25-1.test
conformance-interfaces-mq_open-28-1.test
conformance-interfaces-mq_open-30-1.test
conformance-interfaces-mq_open-4-1.test
conformance-interfaces-mq_send-6-1.test
conformance-interfaces-mq_timedsend-17-1.test
conformance-interfaces-mq_timedsend-6-1.test
conformance-interfaces-mq_unlink-2-3.test
conformance-interfaces-pthread_attr_setscope-5-1.test
conformance-interfaces-sched_getscheduler-2-1.test
conformance-interfaces-sched_setparam-12-1.test
conformance-interfaces-sched_setparam-13-1.test
conformance-interfaces-sched_setparam-14-1.test
conformance-interfaces-sched_setparam-15-1.test
conformance-interfaces-sched_setparam-16-1.test
conformance-interfaces-sched_setparam-17-1.test
conformance-interfaces-sched_setparam-18-1.test
conformance-interfaces-sched_setparam-19-1.test
conformance-interfaces-sched_setparam-3-1.test
conformance-interfaces-sched_setparam-6-1.test
conformance-interfaces-sched_setparam-7-1.test
conformance-interfaces-sched_setparam-8-1.test
conformance-interfaces-sched_setscheduler-10-1.test
conformance-interfaces-sched_setscheduler-11-1.test
conformance-interfaces-sched_setscheduler-12-1.test
conformance-interfaces-sched_setscheduler-13-1.test
conformance-interfaces-sched_setscheduler-14-1.test
conformance-interfaces-sched_setscheduler-2-1.test
conformance-interfaces-sched_setscheduler-5-1.test
conformance-interfaces-sched_setscheduler-6-1.test
conformance-interfaces-sched_setscheduler-7-1.test
conformance-interfaces-sched_setscheduler-9-1.test
conformance-interfaces-shm_open-10-1.test
conformance-interfaces-shm_open-12-1.test
conformance-interfaces-shm_open-19-1.test
conformance-interfaces-shm_open-2-1.test
conformance-interfaces-shm_open-24-1.test
conformance-interfaces-shm_open-27-1.test
conformance-interfaces-shm_open-29-1.test
conformance-interfaces-shm_open-3-1.test
conformance-interfaces-shm_open-36-1.test
conformance-interfaces-shm_open-42-1.test
conformance-interfaces-shm_open-6-1.test
conformance-interfaces-shm_open-7-1.test
conformance-interfaces-shm_open-9-1.test
conformance-interfaces-timer_getoverrun-3-1.test
src-conformance-interfaces-shm_open-10-1.test
src-conformance-interfaces-shm_open-12-1.test
src-conformance-interfaces-shm_open-19-1.test
src-conformance-interfaces-shm_open-24-1.test
src-conformance-interfaces-shm_open-29-1.test
src-conformance-interfaces-shm_open-6-1.test

# These use signals and will fail on x86-64
conformance-interfaces-sigaction-8-1.test
conformance-interfaces-sigaction-8-10.test
conformance-interfaces-sigaction-8-11.test
conformance-interfaces-sigaction-8-12.test
conformance-interfaces-sigaction-8-13.test
conformance-interfaces-sigaction-8-14.test
conformance-interfaces-sigaction-8-15.test
conformance-interfaces-sigaction-8-16.test
conformance-interfaces-sigaction-8-17.test
conformance-interfaces-sigaction-8-18.test
conformance-interfaces-sigaction-8-19.test
conformance-interfaces-sigaction-8-2.test
conformance-interfaces-sigaction-8-20.test
conformance-interfaces-sigaction-8-21.test
conformance-interfaces-sigaction-8-22.test
conformance-interfaces-sigaction-8-23.test
conformance-interfaces-sigaction-8-24.test
conformance-interfaces-sigaction-8-25.test
conformance-interfaces-sigaction-8-26.test
conformance-interfaces-sigaction-8-3.test
conformance-interfaces-sigaction-8-4.test
conformance-interfaces-sigaction-8-5.test
conformance-interfaces-sigaction-8-6.test
conformance-interfaces-sigaction-8-7.test
conformance-interfaces-sigaction-8-8.test
conformance-interfaces-sigaction-8-9.test
conformance-interfaces-sigaction-12-27.test
conformance-interfaces-sigaction-12-28.test
conformance-interfaces-sigaction-12-29.test
conformance-interfaces-sigaction-12-30.test
conformance-interfaces-sigaction-12-31.test
conformance-interfaces-sigaction-12-32.test
conformance-interfaces-sigaction-12-33.test
conformance-interfaces-sigaction-12-34.test
conformance-interfaces-sigaction-12-35.test
conformance-interfaces-sigaction-12-36.test
conformance-interfaces-sigaction-12-37.test
conformance-interfaces-sigaction-12-38.test
conformance-interfaces-sigaction-12-39.test
conformance-interfaces-sigaction-12-40.test
conformance-interfaces-sigaction-12-41.test
conformance-interfaces-sigaction-12-42.test
conformance-interfaces-sigaction-12-43.test
conformance-interfaces-sigaction-12-44.test
conformance-interfaces-sigaction-12-45.test
conformance-interfaces-sigaction-12-46.test
conformance-interfaces-sigaction-12-47.test
conformance-interfaces-sigaction-12-48.test
conformance-interfaces-sigaction-12-49.test
conformance-interfaces-sigaction-12-50.test
conformance-interfaces-sigaction-12-51.test
conformance-interfaces-sigaction-12-52.test
conformance-interfaces-sigaction-13-1.test
conformance-interfaces-sigaction-13-10.test
conformance-interfaces-sigaction-13-11.test
conformance-interfaces-sigaction-13-12.test
conformance-interfaces-sigaction-13-13.test
conformance-interfaces-sigaction-13-14.test
conformance-interfaces-sigaction-13-15.test
conformance-interfaces-sigaction-13-16.test
conformance-interfaces-sigaction-13-17.test
conformance-interfaces-sigaction-13-18.test
conformance-interfaces-sigaction-13-19.test
conformance-interfaces-sigaction-13-2.test
conformance-interfaces-sigaction-13-20.test
conformance-interfaces-sigaction-13-21.test
conformance-interfaces-sigaction-13-22.test
conformance-interfaces-sigaction-13-23.test
conformance-interfaces-sigaction-13-24.test
conformance-interfaces-sigaction-13-25.test
conformance-interfaces-sigaction-13-26.test
conformance-interfaces-sigaction-13-3.test
conformance-interfaces-sigaction-13-4.test
conformance-interfaces-sigaction-13-5.test
conformance-interfaces-sigaction-13-6.test
conformance-interfaces-sigaction-13-7.test
conformance-interfaces-sigaction-13-8.test
conformance-interfaces-sigaction-13-9.test
conformance-interfaces-sigaltstack-1-1.test
conformance-interfaces-sigaltstack-6-1.test
conformance-interfaces-sigaltstack-7-1.test
conformance-interfaces-sigaltstack-9-1.test
conformance-interfaces-signal-3-1.test
conformance-interfaces-sigset-3-1.test
conformance-interfaces-sigset-4-1.test
conformance-interfaces-sigset-5-1.test
conformance-interfaces-raise-10000-1.test
conformance-interfaces-raise-2-1.test

# Signals change this behaviour
conformance-interfaces-sigpending-1-1.test
conformance-interfaces-sigpending-2-1.test

# Both of these pass signals in their handler
# Since we exit the signal handler our sa_mask changes
# and we don't currently resolve this
conformance-interfaces-sigpending-1-2.test
conformance-interfaces-sigpending-1-3.test

# Causes long timeout with signal change
conformance-interfaces-mmap-11-2.test
conformance-interfaces-munmap-1-1.test
conformance-interfaces-munmap-1-2.test

conformance-interfaces-killpg-1-2.test # uses rt_sigsuspend
conformance-interfaces-kill-1-2.test # uses rt_sigtimedwait

# Unstable tests
conformance-interfaces-clock_nanosleep-10-1.test # uses sigabort
conformance-interfaces-clock_nanosleep-9-1.test # uses sigabort
conformance-interfaces-nanosleep-5-2.test # uses sigabort
conformance-interfaces-nanosleep-7-1.test # uses sigabort
conformance-interfaces-nanosleep-7-2.test # uses sigabort
conformance-interfaces-sigprocmask-6-1.test

# unstable because of threaded unit test running
conformance-interfaces-mlockall-8-1.test

# Race happy
conformance-interfaces-nanosleep-1-3.test
conformance-interfaces-clock_nanosleep-1-3.test
conformance-interfaces-clock_nanosleep-2-2.test

# these are inconsistent or fail on CI
conformance-interfaces-clock_nanosleep-2-1.test
conformance-interfaces-difftime-1-1.test
conformance-interfaces-nanosleep-2-1.test
conformance-interfaces-mmap-13-1.test
conformance-interfaces-clock_getres-3-1.test
conformance-interfaces-clock_nanosleep-1-1.test
conformance-interfaces-clock_nanosleep-1-1.test

# mmap behaviour has changed versus what this is expecting
# It is expecting mmap to fail if MAP_PRIVATE nor MAP_SHARED is set
# Sadly it increments flags until MAP_SHARED_VALIDATE is set which allocates correctly
conformance-interfaces-mmap-21-1.test

# mmap behaviour has changed versus what this is expecting
# It wants EOVERFLOW but kernel now returns ENOMEM
conformance-interfaces-mmap-31-1.test

# Sending signals to the process group causes every test to become flakey
# Need to run these single threaded or change the program group prior to launch
# !!! DO NOT REENABLE THESE UNTIL WE HAVE THIS IN PLACE IN CI !!!
conformance-interfaces-killpg-1-1.test
conformance-interfaces-killpg-1-2.test
conformance-interfaces-killpg-2-1.test
conformance-interfaces-killpg-4-1.test
conformance-interfaces-killpg-5-1.test
conformance-interfaces-killpg-6-1.test
conformance-interfaces-killpg-8-1.test

# This test is flaky
# It puts the thread to sleep for 1 second and expects to wake up within 10ms of the timer
# If the kernel is doing other things then this 10ms time is too strict and fails periodically
conformance-interfaces-sigtimedwait-1-1.test

# This test relies on old Linux behaviour
# This is setting a "invalid" flag that it expects the kernel to return EINVAL on
# Sadly since this test is so old it happens to set SS_ONSTACK which is a no-op on
# newer kernels. So this is expected to fail.
conformance-interfaces-sigaltstack-11-1.test

# This test fails since FEX doesn't fully support SS_DISABLE
conformance-interfaces-sigaltstack-2-1.test

# We accidentally pass through signals to the guest that shouldn't be
conformance-interfaces-sigsuspend-1-1.test

# Received signal in handler even though it should be masked
conformance-interfaces-sigaction-25-1.test
conformance-interfaces-sigaction-25-2.test
conformance-interfaces-sigaction-25-3.test
conformance-interfaces-sigaction-25-4.test
conformance-interfaces-sigaction-25-5.test
conformance-interfaces-sigaction-25-6.test
conformance-interfaces-sigaction-25-7.test
conformance-interfaces-sigaction-25-8.test
conformance-interfaces-sigaction-25-9.test
conformance-interfaces-sigaction-25-10.test
conformance-interfaces-sigaction-25-11.test
conformance-interfaces-sigaction-25-12.test
conformance-interfaces-sigaction-25-13.test
conformance-interfaces-sigaction-25-14.test
conformance-interfaces-sigaction-25-15.test
conformance-interfaces-sigaction-25-16.test
conformance-interfaces-sigaction-25-17.test
conformance-interfaces-sigaction-25-18.test
conformance-interfaces-sigaction-25-19.test
conformance-interfaces-sigaction-25-20.test
conformance-interfaces-sigaction-25-21.test
conformance-interfaces-sigaction-25-22.test
conformance-interfaces-sigaction-25-23.test
conformance-interfaces-sigaction-25-24.test
conformance-interfaces-sigaction-25-25.test
conformance-interfaces-sigaction-25-26.test

# This test is flakey on the interpreter
conformance-behavior-WIFEXITED-1-3.test


================================================
FILE: unittests/Readme.md
================================================
# FEX Unit tests

FEX has its own test suite for x86-64 emulation, and we also use gcc's target tests, posixtest, and gvisor's tests. We use a combination of CMake/CTest and python runner scripts.

We also regularly run and pass qemu's and valgrind's tests for validation, but those aren't in CI right now.

## x86/64 testing
- A lot of handwritten assembly unit tests in [32Bit_ASM](32Bit_ASM) and [ASM](ASM) folders, run via our TestHarnessHelper
- A few handwritten IR tests in [IR](IR), run via our IRLoader
- gcc-target-tests-32 and gcc-target-tests-64, run via FEX. The tests binaries are in [External/fex-gcc-target-tests-bins](../External/fex-gcc-target-tests-bins)


## Syscall testing
- 64-bit posixtest from http://posixtest.sourceforge.net/, run via FEX. The tests binaries are in [External/fex-posixtest-bins](../External/fex-posixtest-bins)
- 64-bit gvisor tests from https://github.com/google/gvisor, run via FEX. The tests binaries are in [External/fex-gvisor-tests-bins](../External/fex-gvisor-tests-bins)


================================================
FILE: unittests/ThunkFunctionalTests/CMakeLists.txt
================================================
set(FUNCTIONAL_DEPENDS "")

function(AddThunksTest Bin ThunksFile)
  if (NOT ThunksFile)
    set(TEST_NAME ThunkFunctionalTest-NoThunks-${Bin})
  else()
    set(TEST_NAME ThunkFunctionalTest-Thunks-${Bin})
  endif()

  add_test(NAME ${TEST_NAME}
    COMMAND "$<TARGET_FILE:FEX>"
    ${Bin})
    set_property(TEST ${TEST_NAME} APPEND PROPERTY DEPENDS "${Bin}")
    set_property(TEST "${TEST_NAME}" APPEND PROPERTY ENVIRONMENT
      "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=500;FEX_THUNKHOSTLIBS=${HOSTLIBS_DATA_DIRECTORY}/HostThunks;FEX_THUNKGUESTLIBS=${CMAKE_INSTALL_PREFIX}/share/fex-emu/GuestThunks")

    if (ThunksFile)
      set_property(TEST "${TEST_NAME}" APPEND PROPERTY ENVIRONMENT "FEX_THUNKCONFIG=${CMAKE_SOURCE_DIR}/Data/CI/${ThunksFile}")
    endif()
  list(APPEND FUNCTIONAL_DEPENDS "${TEST_NAME}")
endfunction()

function(AddTest Bin ThunksFile)
  AddThunksTest("${Bin}" "")
  AddThunksTest("${Bin}" "${ThunksFile}")
endfunction()

AddTest("/usr/bin/glxinfo" "GLThunks.json")
AddTest("/usr/bin/vulkaninfo" "VulkanThunks.json")

add_custom_target(thunk_functional_tests_nothunks
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "ThunkFunctionalTest-NoThunks-\.*"
  DEPENDS "${FUNCTIONAL_DEPENDS}")

add_custom_target(thunk_functional_tests_thunks
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "ThunkFunctionalTest-Thunks-\.*"
  DEPENDS "${FUNCTIONAL_DEPENDS}")

add_custom_target(thunk_functional_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "ThunkFunctionalTest\.*"
  DEPENDS "${FUNCTIONAL_DEPENDS}")


================================================
FILE: unittests/ThunkLibs/CMakeLists.txt
================================================
add_executable(thunkgentest generator.cpp abi.cpp)
target_link_libraries(thunkgentest PRIVATE Catch2::Catch2WithMain)
target_link_libraries(thunkgentest PRIVATE fmt::fmt)
target_link_libraries(thunkgentest PRIVATE thunkgenlib)
catch_discover_tests(thunkgentest TEST_SUFFIX ".ThunkGen")

add_custom_target(thunkgen_tests
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.ThunkGen")
add_dependencies(thunkgen_tests thunkgentest)


================================================
FILE: unittests/ThunkLibs/abi.cpp
================================================
#include <clang/Frontend/CompilerInstance.h>
#include <catch2/catch_all.hpp>

#include <data_layout.h>
#include <interface.h>
#include "common.h"

#include <fmt/format.h>

#include <string_view>

using Catch::Matchers::ContainsSubstring;

// run_tool will leak memory when the ToolAction throws an exception, so
// disable AddressSanitizer's leak detection
const char* __asan_default_options() {
  return "detect_leaks=0";
}

inline std::ostream& operator<<(std::ostream& os, TypeCompatibility compat) {
  if (compat == TypeCompatibility::Full) {
    os << "Compatible";
  } else if (compat == TypeCompatibility::Repackable) {
    os << "Repackable";
  } else if (compat == TypeCompatibility::None) {
    os << "Incompatible";
  } else {
    os << "(INVALID)";
  }
  return os;
}

class DataLayoutCompareActionForTest;

namespace {

struct Fixture {
  /**
   * Parses annotations from the input source and generates data layout descriptions from it.
   *
   * Input code with common definitions (types, functions, ...) should be specified in "prelude".
   * It will be prepended to "code" before processing and also to the generator output.
   */
  std::unique_ptr<DataLayoutCompareActionForTest> compute_data_layout(std::string_view prelude, std::string_view code, GuestABI);
};

} // namespace

class DataLayoutCompareActionForTest : public DataLayoutCompareAction {
  std::unordered_map<const clang::Type*, TypeCompatibility> type_compat_cache;

  // Persistent reference taken to enable accessing the ASTContext after CompilerInstance::ExecuteAction returns
  llvm::IntrusiveRefCntPtr<clang::ASTContext> ast_context;
  std::shared_ptr<clang::Preprocessor> preprocessor;

public:
  DataLayoutCompareActionForTest(std::unique_ptr<ABI> guest_layout)
    : DataLayoutCompareAction(*guest_layout)
    , guest_layout(std::move(guest_layout)) {}

  void ExecuteAction() override {
    AnalysisAction::ExecuteAction();

    ast_context = &getCompilerInstance().getASTContext();
    preprocessor = getCompilerInstance().getPreprocessorPtr();
    host_layout = ComputeDataLayout(*ast_context, types);
  }

  std::unique_ptr<ABI> guest_layout;
  std::unordered_map<const clang::Type*, TypeInfo> host_layout;

  TypeCompatibility GetTypeCompatibility(std::string_view type_name) {
    for (const auto& [type, _] : host_layout) {
      if (clang::QualType {type, 0}.getAsString() == type_name) {
        return DataLayoutCompareAction::GetTypeCompatibility(*ast_context, type, host_layout, type_compat_cache);
      }
    }

    throw std::runtime_error("No data layout information recorded for type \"" + std::string {type_name} + "\"");
  }
};

/**
 * Same as clang::FrontendActionFactory but takes an external FrontendAction
 * reference instead of constructing an internal one. Since the FrontendAction
 * lifetime may extend past this ToolAction, state captured by the
 * FrontendAction can be accessed after the ToolAction returns.
 */
class ThunkTestToolAction : public clang::tooling::ToolAction {
public:
  clang::FrontendAction& ScopedToolAction;

public:
  ThunkTestToolAction(clang::FrontendAction& action)
    : ScopedToolAction(action) {}
  ~ThunkTestToolAction() = default;

  // Same as FrontendActionFactory but keeps ScopedToolAction alive when returning
  bool runInvocation(std::shared_ptr<clang::CompilerInvocation> invocation, clang::FileManager* files,
                     std::shared_ptr<clang::PCHContainerOperations> pch, clang::DiagnosticConsumer* diag_consumer) override {

#if LLVM_VERSION_MAJOR >= 21
    auto diagnostics =
      clang::CompilerInstance::createDiagnostics(files->getVirtualFileSystem(), invocation->getDiagnosticOpts(), diag_consumer, false);
#elif LLVM_VERSION_MAJOR == 20
    auto diagnostics =
      clang::CompilerInstance::createDiagnostics(files->getVirtualFileSystem(), &invocation->getDiagnosticOpts(), diag_consumer, false);
#else
    auto diagnostics = clang::CompilerInstance::createDiagnostics(&invocation->getDiagnosticOpts(), diag_consumer, false);
#endif

#if LLVM_VERSION_MAJOR >= 21
    clang::CompilerInstance Compiler(std::move(invocation), std::move(pch));
#else
    clang::CompilerInstance Compiler(std::move(pch));
    Compiler.setInvocation(std::move(invocation));
#endif
    Compiler.setFileManager(files);
#if LLVM_VERSION_MAJOR >= 22
    auto Diags = clang::CompilerInstance::createDiagnostics(Compiler.getVirtualFileSystem(), Compiler.getDiagnosticOpts(), diag_consumer, false);
    Compiler.setDiagnostics(std::move(Diags));
#elif LLVM_VERSION_MAJOR >= 20
    Compiler.createDiagnostics(Compiler.getVirtualFileSystem(), diag_consumer, false);
#else
    Compiler.createDiagnostics(diag_consumer, false);
#endif
    if (!Compiler.hasDiagnostics()) {
      return false;
    }

#if LLVM_VERSION_MAJOR >= 22
    Compiler.createSourceManager();
#else
    Compiler.createSourceManager(*files);
#endif

    const bool Success = Compiler.ExecuteAction(ScopedToolAction);

    files->clearStatCache();
    return Success;
  }
};

std::unique_ptr<DataLayoutCompareActionForTest> Fixture::compute_data_layout(std::string_view prelude, std::string_view code, GuestABI guest_abi) {
  const std::string full_code = std::string {prelude} + std::string {code};

  // Compute guest data layout
  auto data_layout_analysis_factory = std::make_unique<AnalyzeDataLayoutActionFactory>();
  run_tool(*data_layout_analysis_factory, full_code, false, guest_abi);

  // Compute host data layout
  auto ScopedToolAction = std::make_unique<DataLayoutCompareActionForTest>(data_layout_analysis_factory->TakeDataLayout());
  run_tool(std::make_unique<ThunkTestToolAction>(*ScopedToolAction), full_code, false, std::nullopt);

  return ScopedToolAction;
}

static std::string FormatDataLayout(const std::unordered_map<const clang::Type*, TypeInfo>& layout) {
  std::string ret;

  for (const auto& [type, info] : layout) {
    auto basic_info = info.get_if_simple_or_struct();
    if (!basic_info) {
      continue;
    }

    ret += fmt::format("  Host entry {}: {} ({})\n", clang::QualType {type, 0}.getAsString().c_str(), basic_info->size_bits / 8,
                       basic_info->alignment_bits / 8);

    if (auto struct_info = info.get_if_struct()) {
      for (const auto& member : struct_info->members) {
        ret += fmt::format("    Offset {}-{}: {} {}{}\n", member.offset_bits / 8, (member.offset_bits + member.size_bits - 1) / 8,
                           member.type_name.c_str(), member.member_name.c_str(),
                           member.array_size ? fmt::format("[{}]", member.array_size.value()).c_str() : "");
      }
    }
  }

  return ret;
}

TEST_CASE_METHOD(Fixture, "DataLayout") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  SECTION("Trivial") {
    auto action = compute_data_layout("#include <thunks_common.h>\n",
                                      "struct A { int a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    REQUIRE(action->guest_layout->contains("A"));

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
  }

  SECTION("Builtin types") {
    auto action = compute_data_layout("#include <thunks_common.h>\n",
                                      "struct A { char a; short b; int c; float d; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    REQUIRE(action->guest_layout->contains("A"));

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("char") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("short") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("int") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("float") == TypeCompatibility::Full);
  }

  SECTION("Padding after int16_t") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int16_t a; int32_t b; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
  }

  SECTION("Array of int16_t") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int16_t a[64]; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
  }

  const auto compat_full64_repackable32 = (guest_abi == GuestABI::X86_32 ? TypeCompatibility::Repackable : TypeCompatibility::Full);

  SECTION("Type with platform-dependent size (size_t)") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdlib>\n",
                                      "struct A { size_t a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("int64_t has stricter alignment requirements on 64-bit platforms") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int64_t a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->alignment_bits == (guest_abi == GuestABI::X86_32 ? 32 : 64));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Array of int64_t") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int64_t a[64]; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("int64_t with explicit alignment specification") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct alignas(8) A { int64_t a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->alignment_bits == 64);
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
  }

  SECTION("int64_t alignment requirements propagate to parent struct") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int32_t a; int32_t b; int64_t c; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->alignment_bits == (guest_abi == GuestABI::X86_32 ? 32 : 64));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Padding before int64_t member") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int32_t a; int64_t b; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->members[1].offset_bits == (guest_abi == GuestABI::X86_32 ? 32 : 64));

    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Padding at end of struct due to int64_t alignment (like VkMemoryHeap)") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { int64_t a; int32_t b; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->size_bits == (guest_abi == GuestABI::X86_32 ? 96 : 128));

    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Different struct definition between guest and host; different member order") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct A { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct A { int32_t b; int32_t a; };\n"
                                      "#endif\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->members.at(0).member_name == "b");
    CHECK(action->guest_layout->at("A").get_if_struct()->members.at(1).member_name == "a");

    REQUIRE(!action->host_layout.empty());
    CHECK(action->host_layout.begin()->second.get_if_struct()->members.at(0).member_name == "a");
    CHECK(action->host_layout.begin()->second.get_if_struct()->members.at(1).member_name == "b");

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
  }

  SECTION("Different struct definition between guest and host; different member size") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct A { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct A { int32_t a; int64_t b; };\n"
                                      "#endif\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->guest_layout->at("A").get_if_struct()->members.at(0).size_bits == 32);
    CHECK(action->guest_layout->at("A").get_if_struct()->members.at(1).size_bits == 64);

    REQUIRE(!action->host_layout.empty());
    CHECK(action->host_layout.begin()->second.get_if_struct()->members.at(0).size_bits == 32);
    CHECK(action->host_layout.begin()->second.get_if_struct()->members.at(1).size_bits == 32);

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
  }

  SECTION("Different struct definition between guest and host; completely different members") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct A { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct A { int32_t c; int32_t d; };\n"
                                      "#endif\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::None);
  }

  SECTION("Different struct definition between guest and host; member missing from guest") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct A { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct A { int32_t a; };\n"
                                      "#endif\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::None);
  }

  SECTION("Different struct definition between guest and host; member missing from host") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct A { int32_t a; };\n"
                                      "#else\n"
                                      "struct A { int32_t a; int32_t b; };\n"
                                      "#endif\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));

    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::None);
  }

  SECTION("Nesting structs of consistent data layout") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct C { int32_t a; int16_t b; };\n"
                                      "struct B { C a; int16_t b; };\n"
                                      "struct A { int32_t a; B b; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    REQUIRE(action->guest_layout->contains("B"));
    REQUIRE(action->guest_layout->contains("C"));
    CHECK(action->guest_layout->at("A").get_if_struct()->members.at(0).size_bits == 32);

    CHECK(action->GetTypeCompatibility("struct C") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Full);
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
  }

  SECTION("Nesting repackable structs by embedding") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct C { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct C { int32_t b; int32_t a; };\n"
                                      "#endif\n"
                                      "struct B { C a; int16_t b; };\n"
                                      "struct A { int32_t a; B b; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    REQUIRE(action->guest_layout->contains("B"));
    REQUIRE(action->guest_layout->contains("C"));
    CHECK(action->guest_layout->at("A").get_if_struct()->size_bits == 128);
    CHECK(action->guest_layout->at("A").get_if_struct()->alignment_bits == 32);

    CHECK(action->GetTypeCompatibility("struct C") == TypeCompatibility::Repackable);
    CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Repackable);
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
  }

  SECTION("Embedded union type (like VkRenderingAttachmentInfo)") {
    SECTION("without annotation") {
      CHECK_THROWS_WITH(compute_data_layout("#include <thunks_common.h>\n"
                                            "#include <cstdint>\n",
                                            "union B { int32_t a; uint32_t b; };\n"
                                            "struct A { B a; };\n"
                                            "template<> struct fex_gen_type<A> {};\n",
                                            guest_abi),
                        ContainsSubstring("unannotated member") && ContainsSubstring("union type"));
    }

    SECTION("with annotation") {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "union B { int32_t a; uint32_t b; };\n"
                                        "struct A { B a; };\n"
                                        "template<> struct fex_gen_type<B> : fexgen::assume_compatible_data_layout {};\n"
                                        "template<> struct fex_gen_type<A> {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("A"));
      CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
    }
  }
}

TEST_CASE_METHOD(Fixture, "DataLayoutPointers") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  const auto compat_full64_repackable32 = (guest_abi == GuestABI::X86_32 ? TypeCompatibility::Repackable : TypeCompatibility::Full);

  SECTION("Pointer to data with consistent layout") {
    std::string type = GENERATE("char", "short", "int", "float", "struct B { int a; }");
    INFO(type);
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { " + type +
                                        "* a; };\n"
                                        "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    // The pointer itself needs repacking on 32-bit. On 64-bit, no repacking is needed at all.
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
    if (!type.starts_with("struct B")) {
      CHECK(action->GetTypeCompatibility(type) == TypeCompatibility::Full);
    }
  }

  SECTION("Pointer to struct with consistent layout") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct B { int32_t a; };\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("B"));
    CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Full);
    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Unannotated pointer to incomplete type") {
    CHECK_THROWS_WITH(compute_data_layout("#include <thunks_common.h>\n"
                                          "#include <cstdint>\n",
                                          "struct B;\n"
                                          "struct A { B* a; };\n"
                                          "template<> struct fex_gen_type<A> {};\n",
                                          guest_abi),
                      ContainsSubstring("incomplete type"));
  }

  SECTION("Unannotated pointer to repackable type") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct B { int32_t a; int32_t b; };\n"
                                      "#else\n"
                                      "struct B { int32_t a; int64_t b; };\n"
                                      "#endif\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::None);
  }

  SECTION("Nesting repackable structs by pointers") {
    SECTION("Innermost struct is compatible") {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "struct C { int32_t a; int32_t b; };\n"
                                        "struct B { C* a; int16_t b; };\n"
                                        "struct A { int32_t a; B b; };\n"
                                        "template<> struct fex_gen_type<A> {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("A"));
      REQUIRE(action->guest_layout->contains("B"));
      REQUIRE(action->guest_layout->contains("C"));

      // 64-bit is fully compatible, but 32-bit needs to zero-extend the pointer itself
      CHECK(action->GetTypeCompatibility("struct C") == TypeCompatibility::Full);
      CHECK(action->GetTypeCompatibility("struct B") == compat_full64_repackable32);
      CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
    }

    SECTION("Innermost struct is incompatible") {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "#ifdef HOST\n"
                                        "struct C { int32_t a; int32_t b; };\n"
                                        "#else\n"
                                        "struct C { int32_t b; int32_t a; };\n"
                                        "#endif\n"
                                        "struct B { C* a; int16_t b; };\n"
                                        "struct A { int32_t a; B b; };\n"
                                        "template<> struct fex_gen_type<A> {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("A"));
      REQUIRE(action->guest_layout->contains("B"));
      REQUIRE(action->guest_layout->contains("C"));

      CHECK(action->GetTypeCompatibility("struct C") == TypeCompatibility::Repackable);
      CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::None);
      CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::None);
    }

    SECTION("Innermost struct is incompatible but the pointer member is annotated") {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "#ifdef HOST\n"
                                        "struct C { int32_t a; int32_t b; };\n"
                                        "#else\n"
                                        "struct C { int32_t b; int32_t a; };\n"
                                        "#endif\n"
                                        "struct B { C* a; int16_t b; };\n"
                                        "struct A { int32_t a; B b; };\n"
                                        "template<> struct fex_gen_config<&B::a> : fexgen::custom_repack {};\n"
                                        "template<> struct fex_gen_type<A> {};\n"
                                        "template<> struct fex_gen_type<C> {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("A"));
      REQUIRE(action->guest_layout->contains("B"));
      REQUIRE(action->guest_layout->contains("C"));

      CHECK(action->GetTypeCompatibility("struct C") == TypeCompatibility::Repackable);
      CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Repackable);
      CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
    }
  }

  SECTION("Unannotated pointer to union type") {
    CHECK_THROWS_WITH(compute_data_layout("#include <thunks_common.h>\n"
                                          "#include <cstdint>\n",
                                          "union B { int32_t a; uint32_t b; };\n"
                                          "struct A { B* a; };\n"
                                          "template<> struct fex_gen_type<A> {};\n",
                                          guest_abi),
                      ContainsSubstring("unannotated member") && ContainsSubstring("union type"));
  }

  SECTION("Pointer to union type with assume_compatible_data_layout annotation") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "union B { int32_t a; uint32_t b; };\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_type<B> : fexgen::assume_compatible_data_layout {};\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Pointer to union type with custom_repack annotation") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "union B { int32_t a; uint32_t b; };\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_config<&A::a> : fexgen::custom_repack {};\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
  }

  SECTION("Pointer to opaque type") {
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct B;\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_type<B> : fexgen::opaque_type {};\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == compat_full64_repackable32);
  }

  SECTION("Pointer member with custom repacking code") {
    // Data layout analysis only needs to know about the custom_repack
    // annotation. The actual custom repacking code isn't needed for the
    // test.

    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "#ifdef HOST\n"
                                      "struct B { int32_t a; };\n"
                                      "#else\n"
                                      "struct B { int32_t b; };\n"
                                      "#endif\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_config<&A::a> : fexgen::custom_repack {};\n"
                                      "template<> struct fex_gen_type<A> {};\n"
                                      "template<> struct fex_gen_type<B> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    REQUIRE(action->guest_layout->contains("B"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
    CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::None);
  }

  SECTION("Custom repacking induces repacking requirement") {
    // Data layout analysis only needs to know about the custom_repack
    // annotation. The actual custom repacking code isn't needed for the
    // test.

    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct B {};\n"
                                      "struct A { B* a; };\n"
                                      "template<> struct fex_gen_config<&A::a> : fexgen::custom_repack {};\n"
                                      "template<> struct fex_gen_type<A> {};\n"
                                      "template<> struct fex_gen_type<B> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    REQUIRE(action->guest_layout->contains("B"));
    CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Repackable);
    CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Full);
  }

  SECTION("Self-referencing struct (like VkBaseOutStructure)") {
    // Without annotation
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { A* a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK_THROWS_WITH(action->GetTypeCompatibility("struct A"), ContainsSubstring("recursive reference"));

    // With annotation
    if (guest_abi == GuestABI::X86_64) {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "struct A { A* a; };\n"
                                        "template<> struct fex_gen_type<A> : fexgen::assume_compatible_data_layout {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("A"));
      CHECK(action->GetTypeCompatibility("struct A") == TypeCompatibility::Full);
    }
  }

  SECTION("Circularly referencing structs") {
    // Without annotation
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct B;\n"
                                      "struct A { B* a; };\n"
                                      "struct B { A* a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    REQUIRE(action->guest_layout->contains("B"));
    CHECK_THROWS_WITH(action->GetTypeCompatibility("struct A"), ContainsSubstring("recursive reference"));
    CHECK_THROWS_WITH(action->GetTypeCompatibility("struct B"), ContainsSubstring("recursive reference"));

    // With annotation
    if (guest_abi == GuestABI::X86_64) {
      auto action = compute_data_layout("#include <thunks_common.h>\n"
                                        "#include <cstdint>\n",
                                        "struct B;\n"
                                        "struct A { B* a; };\n"
                                        "struct B { A* a; };\n"
                                        "template<> struct fex_gen_type<B> : fexgen::assume_compatible_data_layout {};\n",
                                        guest_abi);

      INFO(FormatDataLayout(action->host_layout));

      REQUIRE(action->guest_layout->contains("B"));
      CHECK(action->GetTypeCompatibility("struct B") == TypeCompatibility::Full);
    }
  }

  SECTION("Pointers to void") {
    // Without annotation
    auto action = compute_data_layout("#include <thunks_common.h>\n"
                                      "#include <cstdint>\n",
                                      "struct A { void* a; };\n"
                                      "template<> struct fex_gen_type<A> {};\n",
                                      guest_abi);

    INFO(FormatDataLayout(action->host_layout));

    REQUIRE(action->guest_layout->contains("A"));
    CHECK(action->GetTypeCompatibility("struct A") == (guest_abi == GuestABI::X86_32 ? TypeCompatibility::None : TypeCompatibility::Full));
  }

  // TODO: Double pointers to compatible data: struct B { int a ; }; struct A { B** b; };
}


================================================
FILE: unittests/ThunkLibs/common.h
================================================
#pragma once

#include <clang/Frontend/TextDiagnosticPrinter.h>
#include <clang/Tooling/Tooling.h>

#include <llvm/Support/raw_os_ostream.h>

#include <optional>

/**
 * Prints diagnostics to console like clang::TextDiagnosticPrinter.
 * A copy of the first error message is stored so that it can be queried
 * after compiling.
 */
class TestDiagnosticConsumer : public clang::TextDiagnosticPrinter {
  bool silent;

  std::optional<std::string> first_error;

public:
#if LLVM_VERSION_MAJOR >= 21
  TestDiagnosticConsumer(bool silent_, clang::DiagnosticOptions& diag_opts)
    : clang::TextDiagnosticPrinter(llvm::errs(), diag_opts)
    , silent(silent_) {}
#else
  TestDiagnosticConsumer(bool silent_)
    : clang::TextDiagnosticPrinter(llvm::errs(), new clang::DiagnosticOptions)
    , silent(silent_) {}
#endif

  void HandleDiagnostic(clang::DiagnosticsEngine::Level level, const clang::Diagnostic& diag) override {
    if (level >= clang::DiagnosticsEngine::Error && !first_error) {
      llvm::SmallVector<char, 64> message;
      diag.FormatDiagnostic(message);
      first_error = std::string(message.begin(), message.end());
    }

    if (silent && level != clang::DiagnosticsEngine::Fatal) {
      return;
    }

    clang::TextDiagnosticPrinter::HandleDiagnostic(level, diag);
  }

  std::optional<std::string> GetFirstError() const {
    return first_error;
  }
};

enum class GuestABI {
  X86_32,
  X86_64,
};

inline std::ostream& operator<<(std::ostream& os, GuestABI abi) {
  if (abi == GuestABI::X86_32) {
    os << "X86_32";
  } else if (abi == GuestABI::X86_64) {
    os << "X86_64";
  }
  return os;
}

/**
 * Run the given ToolAction on the input code.
 *
 * The "silent" parameter is used to suppress non-fatal diagnostics in tests that expect failure
 */
inline void
run_tool(clang::tooling::ToolAction& action, std::string_view code, bool silent = false, std::optional<GuestABI> guest_abi = std::nullopt) {
  const char* memory_filename = "gen_input.cpp";
  auto adjuster = clang::tooling::getClangStripDependencyFileAdjuster();
  std::vector<std::string> args = {"clang-tool", "-fsyntax-only", "-std=c++20", "-Werror", "-I.", memory_filename};
  if (CLANG_RESOURCE_DIR[0] != 0) {
    args.push_back("-resource-dir");
    args.push_back(CLANG_RESOURCE_DIR);
  }
  if (guest_abi == GuestABI::X86_64) {
    args.push_back("-target");
    args.push_back("x86_64-linux-gnu");
    args.push_back("-isystem");
    args.push_back("/usr/x86_64-linux-gnu/include/");
  } else if (guest_abi == GuestABI::X86_32) {
    args.push_back("-target");
    args.push_back("i686-linux-gnu");
    args.push_back("-isystem");
    args.push_back("/usr/i686-linux-gnu/include/");
  } else {
    args.push_back("-DHOST");
  }

  // Corresponds to the content of GeneratorInterface.h
  const char* common_header_code = R"(namespace fexgen {
struct returns_guest_pointer {};
struct custom_host_impl {};
struct callback_annotation_base { bool prevent_multiple; };
struct callback_stub : callback_annotation_base {};

struct custom_repack {};
struct emit_layout_wrappers {};

struct opaque_type {};
struct assume_compatible_data_layout {};

struct ptr_passthrough {};

} // namespace fexgen

template<auto, int, typename = void> struct fex_gen_param {};

template<typename>
struct fex_gen_type;
template<auto>
struct fex_gen_config;

)";

  llvm::IntrusiveRefCntPtr<llvm::vfs::OverlayFileSystem> overlay_fs(new llvm::vfs::OverlayFileSystem(llvm::vfs::getRealFileSystem()));
  llvm::IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> memory_fs(new llvm::vfs::InMemoryFileSystem);
  overlay_fs->pushOverlay(memory_fs);
  memory_fs->addFile(memory_filename, 0, llvm::MemoryBuffer::getMemBufferCopy(code));
  memory_fs->addFile("thunks_common.h", 0, llvm::MemoryBuffer::getMemBufferCopy(common_header_code));
  llvm::IntrusiveRefCntPtr<clang::FileManager> files(new clang::FileManager(clang::FileSystemOptions(), overlay_fs));

  auto invocation = clang::tooling::ToolInvocation(args, &action, files.get(), std::make_shared<clang::PCHContainerOperations>());

#if LLVM_VERSION_MAJOR >= 21
  clang::DiagnosticOptions diag_opts;
  TestDiagnosticConsumer consumer(silent, diag_opts);
#else
  TestDiagnosticConsumer consumer(silent);
#endif

  invocation.setDiagnosticConsumer(&consumer);

  // Process the actual ToolAction.
  // NOTE: If the ToolAction throws an exception, clang will leak memory here.
  invocation.run();

  if (auto error = consumer.GetFirstError()) {
    throw std::runtime_error(*error);
  }
}

inline void run_tool(std::unique_ptr<clang::tooling::ToolAction> action, std::string_view code, bool silent = false,
                     std::optional<GuestABI> guest_abi = std::nullopt) {
  return run_tool(*action, code, silent, guest_abi);
}


================================================
FILE: unittests/ThunkLibs/generator.cpp
================================================
#include <catch2/catch_all.hpp>

#include <clang/ASTMatchers/ASTMatchers.h>
#include <clang/ASTMatchers/ASTMatchFinder.h>
#include <clang/Basic/Version.h>
#include <clang/Frontend/CompilerInstance.h>
#include <clang/Tooling/Tooling.h>

#include <interface.h>

#include <filesystem>
#include <fstream>
#include <string_view>

#include "common.h"

/**
 * This class parses its input code and stores it alongside its AST representation.
 *
 * Use this with HasASTMatching in Catch2's CHECK_THAT/REQUIRE_THAT macros.
 */
struct SourceWithAST {
  std::string code;
  std::unique_ptr<clang::ASTUnit> ast;

  SourceWithAST(std::string_view input, bool silent_compile = false);
};

std::ostream& operator<<(std::ostream& os, const SourceWithAST& ast) {
  os << ast.code;

  // Additionally, change this to true to print the full AST on test failures
  const bool print_ast = false;
  if (print_ast) {
    for (auto it = ast.ast->top_level_begin(); it != ast.ast->top_level_end(); ++it) {
      // Skip header declarations
      if (!ast.ast->isInMainFileID((*it)->getBeginLoc())) {
        continue;
      }

      auto llvm_os = llvm::raw_os_ostream {os};
      (*it)->dump(llvm_os);
    }
  }
  return os;
}

struct Fixture {
  Fixture() {
    tmpdir = std::string {P_tmpdir} + "/thunkgentestXXXXXX";
    if (!mkdtemp(tmpdir.data())) {
      std::abort();
    }
    std::filesystem::create_directory(tmpdir);
    output_filenames = {
      tmpdir + "/thunkgen_guest",
      tmpdir + "/thunkgen_host",
    };
  }

  ~Fixture() {
    std::filesystem::remove_all(tmpdir);
  }

  struct GenOutput {
    SourceWithAST guest;
    SourceWithAST host;
  };

  /**
   * Runs the given given code through the thunk generator and verifies the output compiles.
   *
   * Input code with common definitions (types, functions, ...) should be specified in "prelude".
   * It will be prepended to "code" before processing and also to the generator output.
   */
  SourceWithAST run_thunkgen_guest(std::string_view prelude, std::string_view code, bool silent = false);
  SourceWithAST run_thunkgen_host(std::string_view prelude, std::string_view code, GuestABI = GuestABI::X86_64, bool silent = false);
  GenOutput run_thunkgen(std::string_view prelude, std::string_view code, bool silent = false);

  const std::string libname = "libtest";
  std::string tmpdir;
  OutputFilenames output_filenames;
};

using namespace clang::ast_matchers;

class MatchCallback : public MatchFinder::MatchCallback {
  bool success = false;

  using CheckFn = std::function<bool(const MatchFinder::MatchResult&)>;
  std::vector<CheckFn> binding_checks;

public:
  template<typename NodeType>
  void check_binding(std::string_view binding_name, bool (*check_fn)(const NodeType*)) {
    // Decorate the given check with node extraction and wrap it in a type-erased interface
    binding_checks.push_back([check_fn, binding_name = std::string(binding_name)](const MatchFinder::MatchResult& result) {
      if (auto node = result.Nodes.getNodeAs<NodeType>(binding_name.c_str())) {
        return check_fn(node);
      }
      return false;
    });
  }

  void run(const MatchFinder::MatchResult& result) override {
    success = true; // NOTE: If there are no callbacks, this signals that the match was found at all

    for (auto& binding_check : binding_checks) {
      success = success && binding_check(result);
    }
  }

  bool matched() const noexcept {
    return success;
  }
};

/**
 * This class connects the libclang AST to Catch2 test matchers, allowing for
 * code compiled via SourceWithAST objects to be pattern-matched using the
 * libclang ASTMatcher API.
 */
template<typename ClangMatcher>
class HasASTMatching : public Catch::Matchers::MatcherBase<SourceWithAST> {
  ClangMatcher matcher;
  MatchCallback callback;

public:
  HasASTMatching(const ClangMatcher& matcher_)
    : matcher(matcher_) {}

  template<typename NodeT>
  HasASTMatching& check_binding(std::string_view binding_name, bool (*check_fn)(const NodeT*)) {
    callback.check_binding(binding_name, check_fn);
    return *this;
  }

  bool match(const SourceWithAST& code) const override {
    MatchCallback result = callback;
    clang::ast_matchers::MatchFinder finder;
    finder.addMatcher(matcher, &result);
    finder.matchAST(code.ast->getASTContext());
    return result.matched();
  }

  std::string describe() const override {
    std::ostringstream ss;
    ss << "should compile and match the given AST pattern";
    return ss.str();
  }
};

HasASTMatching<DeclarationMatcher> matches(const DeclarationMatcher& matcher_) {
  return HasASTMatching<DeclarationMatcher>(matcher_);
}

HasASTMatching<StatementMatcher> matches(const StatementMatcher& matcher_) {
  return HasASTMatching<StatementMatcher>(matcher_);
}

/**
 * Catch matcher that checks if a tested C++ source defines a function with the given name
 */
class DefinesPublicFunction : public HasASTMatching<DeclarationMatcher> {
  std::string function_name;

public:
  DefinesPublicFunction(std::string_view name)
    : HasASTMatching(functionDecl(hasName(name)))
    , function_name(name) {}

  std::string describe() const override {
    std::ostringstream ss;
    ss << "should define and export a function called \"" + function_name + "\"";
    return ss.str();
  }
};

SourceWithAST::SourceWithAST(std::string_view input, bool silent_compile)
  : code(input) {
  // Call run_tool with a ToolAction that assigns this->ast

  struct ToolAction : clang::tooling::ToolAction {
    std::unique_ptr<clang::ASTUnit>& ast;

    ToolAction(std::unique_ptr<clang::ASTUnit>& ast_)
      : ast(ast_) {}

    bool runInvocation(std::shared_ptr<clang::CompilerInvocation> invocation, clang::FileManager* files,
                       std::shared_ptr<clang::PCHContainerOperations> pch, clang::DiagnosticConsumer* diag_consumer) override {
#if LLVM_VERSION_MAJOR >= 21
      auto diagnostics =
        clang::CompilerInstance::createDiagnostics(files->getVirtualFileSystem(), invocation->getDiagnosticOpts(), diag_consumer, false);
#elif LLVM_VERSION_MAJOR == 20
      auto diagnostics =
        clang::CompilerInstance::createDiagnostics(files->getVirtualFileSystem(), &invocation->getDiagnosticOpts(), diag_consumer, false);
#else
      auto diagnostics = clang::CompilerInstance::createDiagnostics(&invocation->getDiagnosticOpts(), diag_consumer, false);
#endif

#if LLVM_VERSION_MAJOR >= 21
      ast = clang::ASTUnit::LoadFromCompilerInvocation(invocation, std::move(pch), nullptr, std::move(diagnostics), files);
#else
      ast = clang::ASTUnit::LoadFromCompilerInvocation(invocation, std::move(pch), std::move(diagnostics), files);
#endif
      return (ast != nullptr);
    }
  } tool_action {ast};

  run_tool(tool_action, code, silent_compile);
}

/**
 * Generates guest thunk library code from the given input
 */
SourceWithAST Fixture::run_thunkgen_guest(std::string_view prelude, std::string_view code, bool silent) {
  const std::string full_code = std::string {prelude} + std::string {code};

  // These tests don't deal with data layout differences, so just run data
  // layout analysis with host configuration
  auto data_layout_analysis_factory = std::make_unique<AnalyzeDataLayoutActionFactory>();
  run_tool(*data_layout_analysis_factory, full_code, silent);
  auto& data_layout = data_layout_analysis_factory->GetDataLayout();

  run_tool(std::make_unique<GenerateThunkLibsActionFactory>(libname, output_filenames, data_layout), full_code, silent);

  std::string result = "#include <cstdint>\n"
                       "#define MAKE_THUNK(lib, name, hash) extern \"C\" int fexthunks_##lib##_##name(void*);\n"
                       "template<typename>\n"
                       "struct callback_thunk_defined;\n"
                       "#define MAKE_CALLBACK_THUNK(name, sig, hash) template<> struct callback_thunk_defined<sig> {};\n"
                       "#define FEX_PACKFN_LINKAGE\n"
                       "template<typename Target>\n"
                       "Target *MakeHostTrampolineForGuestFunction(uint8_t HostPacker[32], void (*)(uintptr_t, void*), Target*);\n"
                       "template<typename Target>\n"
                       "Target *AllocateHostTrampolineForGuestFunction(Target*);\n";
  const auto& filename = output_filenames.guest;
  {
    std::ifstream file(filename);
    const auto current_size = result.size();
    const auto new_data_size = std::filesystem::file_size(filename);
    result.resize(result.size() + new_data_size);
    file.read(result.data() + current_size, result.size());
  }
  return SourceWithAST {std::string {prelude} + result};
}

/**
 * Generates host thunk library code from the given input
 */
SourceWithAST Fixture::run_thunkgen_host(std::string_view prelude, std::string_view code, GuestABI guest_abi, bool silent) {
  const std::string full_code = std::string {prelude} + std::string {code};

  // These tests don't deal with data layout differences, so just run data
  // layout analysis with host configuration
  auto data_layout_analysis_factory = std::make_unique<AnalyzeDataLayoutActionFactory>();
  run_tool(*data_layout_analysis_factory, full_code, silent, guest_abi);
  auto& data_layout = data_layout_analysis_factory->GetDataLayout();

  run_tool(std::make_unique<GenerateThunkLibsActionFactory>(libname, output_filenames, data_layout), full_code, silent);

  std::string result =
    "#include <array>\n"
    "#include <cstdint>\n"
    "#include <cstring>\n"
    "#include <dlfcn.h>\n"
    "#include <type_traits>\n"
    "template<typename Fn>\n"
    "struct function_traits;\n"
    "template<typename Result, typename Arg>\n"
    "struct function_traits<Result(*)(Arg)> {\n"
    "    using result_t = Result;\n"
    "    using arg_t = Arg;\n"
    "};\n"
    "template<auto Fn>\n"
    "static typename function_traits<decltype(Fn)>::result_t\n"
    "fexfn_type_erased_unpack(void* argsv) {\n"
    "    using args_t = typename function_traits<decltype(Fn)>::arg_t;\n"
    "    return Fn(reinterpret_cast<args_t>(argsv));\n"
    "}\n"
    "#define LOAD_INTERNAL_GUESTPTR_VIA_CUSTOM_ABI(arg)\n"
    "struct GuestcallInfo {\n"
    "  uintptr_t HostPacker;\n"
    "  void (*CallCallback)(uintptr_t, uintptr_t, void*);\n"
    "  uintptr_t GuestUnpacker;\n"
    "  uintptr_t GuestTarget;\n"
    "};\n"
    "struct ParameterAnnotations {};\n"
    "template<typename, typename...>\n"
    "struct GuestWrapperForHostFunction {\n"
    "  template<ParameterAnnotations...> static void Call(void*);\n"
    "};\n"
    "struct ExportEntry { uint8_t* sha256; void(*fn)(void *); };\n"
    "void *dlsym_default(void* handle, const char* symbol);\n"
    "template<typename T> inline constexpr bool has_compatible_data_layout = std::is_integral_v<T> || std::is_enum_v<T>;\n"
    "template<typename T>\n"
    "struct guest_layout {\n"
    "  T data;\n"
    "};\n"
    "\n"
    "template<typename T, std::size_t N>\n"
    "struct guest_layout<T[N]> {\n"
    "  using type = std::enable_if_t<!std::is_pointer_v<T>, T>;\n"
    "  std::array<guest_layout<type>, N> data;\n"
    "};\n"
    "\n"
    "template<typename T>\n"
    "struct guest_layout<T*> {\n"
    "#ifdef IS_32BIT_THUNK\n"
    "  using type = uint32_t;\n"
    "#else\n"
    "  using type = uint64_t;\n"
    "#endif\n"
    "  type data;\n"
    "};\n"
    "\n"
    "template<typename T>\n"
    "struct host_layout {\n"
    "  T data;\n"
    "\n"
    "  explicit host_layout(const guest_layout<T>&);\n"
    "  template<typename U> explicit host_layout(const guest_layout<U>&) requires (std::is_integral_v<U> && sizeof(U) <= sizeof(T) && "
    "std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>);\n"
    "};\n"
    "\n"
    "template<typename T> constexpr bool is_long = std::is_same_v<T, long> || std::is_same_v<T, unsigned long>;\n"
    "template<typename T> constexpr bool is_longlong = std::is_same_v<T, long long> || std::is_same_v<T, unsigned long long>;\n"
    "template<typename T>\n"
    "struct host_layout<T*> {\n"
    "  T* data;\n"
    "  explicit host_layout(const guest_layout<T*>&);\n"
    "  template<typename U> explicit host_layout(const guest_layout<U*>&) requires (std::is_integral_v<U> && sizeof(U) == sizeof(long) && "
    "sizeof(long) == 8 && is_long<std::remove_cv_t<T>> && std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>);\n"
    "  template<typename U> explicit host_layout(const guest_layout<U*>&) requires (std::is_integral_v<U> && sizeof(U) == sizeof(long "
    "long) && is_longlong<std::remove_cv_t<T>> && std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>);\n"
    "  template<typename U> explicit host_layout(const guest_layout<U*>&) requires (std::is_same_v<std::remove_cv_t<T>, char> && "
    "std::is_integral_v<U> && std::is_convertible_v<T, U> && sizeof(U) == 1);\n"
    "  template<typename U> explicit host_layout(const guest_layout<U*>&) requires (std::is_same_v<std::remove_cv_t<T>, wchar_t> && "
    "std::is_integral_v<U> && std::is_convertible_v<T, U> && sizeof(U) == sizeof(wchar_t));\n"
    "};\n"
    "\n"
    "template<typename T> struct host_to_guest_convertible {\n"
    "  operator guest_layout<T>();\n"
    "  operator guest_layout<const unsigned long long*>() const requires(std::is_same_v<T, const unsigned long*>);\n"
    "  operator guest_layout<const uint8_t*>() const requires(std::is_same_v<T, const char*>);\n"
    "  operator guest_layout<uint8_t*>() const requires(std::is_same_v<T, char*>);\n"
    "  operator guest_layout<uint32_t*>() const requires(std::is_same_v<T, wchar_t*>);\n"
    "  template<typename U> operator guest_layout<U>() const requires (std::is_integral_v<U> && sizeof(U) == sizeof(T) && "
    "std::is_convertible_v<T, U> && std::is_signed_v<T> == std::is_signed_v<U>);\n"
    "#if IS_32BIT_THUNK\n"
    "  operator guest_layout<uint32_t>() const requires(std::is_same_v<T, size_t>);\n"
    "#endif\n"
    "};\n"
    "\n"
    "template<typename T, size_t N>\n"
    "struct host_layout<T[N]> {\n"
    "  std::array<T, N> data;\n"
    "  host_layout(const guest_layout<T[N]>& from);\n"
    "};\n"
    "\n"
    "template<typename T, typename GuestT>\n"
    "struct repack_wrapper {};\n"
    "template<typename T, typename GuestT>\n"
    "repack_wrapper<T, GuestT> make_repack_wrapper(guest_layout<GuestT>& orig_arg);\n"
    "template<typename T> host_to_guest_convertible<T> to_guest(const host_layout<T>& from);\n"
    "template<typename F> void FinalizeHostTrampolineForGuestFunction(F*);\n"
    "template<typename F> void FinalizeHostTrampolineForGuestFunction(guest_layout<F*>);\n"
    "template<typename T> T& unwrap_host(host_layout<T>&);\n"
    "template<typename T, typename GuestT> T* unwrap_host(repack_wrapper<T*, GuestT>&);\n"
    "template<typename T> const host_layout<T>& to_host_layout(const T& t);\n";

  auto& filename = output_filenames.host;
  {
    std::ifstream file(filename);
    const auto prelude_size = result.size();
    const auto new_data_size = std::filesystem::file_size(filename);
    result.resize(result.size() + new_data_size);
    file.read(result.data() + prelude_size, result.size());

    // Force all functions to be non-static, since having to define them
    // would add a lot of noise to simple tests.
    while (true) {
      auto pos = result.find("static ", prelude_size);
      if (pos == std::string::npos) {
        break;
      }
      result.replace(pos, 6, "      "); // Replace "static" with 6 spaces (avoiding reallocation)
    }
  }
  return SourceWithAST {std::string {prelude} + result, silent};
}

Fixture::GenOutput Fixture::run_thunkgen(std::string_view prelude, std::string_view code, bool silent) {
  return {run_thunkgen_guest(prelude, code, silent), run_thunkgen_host(prelude, code, GuestABI::X86_64, silent)};
}

#if CLANG_VERSION_MAJOR <= 15
// Old clang versions require an explicit "struct" prefix
#define CLANG_STRUCT_PREFIX "struct "
#define asStructString(name) asString(CLANG_STRUCT_PREFIX name)
#else
#define CLANG_STRUCT_PREFIX
#define asStructString(name) asString(name)
#endif

TEST_CASE_METHOD(Fixture, "Trivial") {
  const auto output = run_thunkgen("", "#include <thunks_common.h>\n"
                                       "void func();\n"
                                       "template<auto> struct fex_gen_config {};\n"
                                       "template<> struct fex_gen_config<func> {};\n");

  // Guest code
  CHECK_THAT(output.guest, DefinesPublicFunction("func"));

  CHECK_THAT(output.guest, matches(functionDecl(hasName("fexfn_pack_func"), returns(asString("void")), parameterCountIs(0))));

  // Host code
  CHECK_THAT(output.host,
             matches(varDecl(hasName("exports"), hasType(constantArrayType(hasElementType(asStructString("ExportEntry")), hasSize(2))),
                             hasInitializer(initListExpr(
                               hasInit(0, expr()), hasInit(1, initListExpr(hasInit(0, implicitCastExpr()), hasInit(1, implicitCastExpr())))))
                             // TODO: check null termination
                             )));
}

// Unknown annotations trigger an error
TEST_CASE_METHOD(Fixture, "UnknownAnnotation") {
  REQUIRE_THROWS(run_thunkgen("void func();\n",
                              "struct invalid_annotation {};\n"
                              "template<auto> struct fex_gen_config {};\n"
                              "template<> struct fex_gen_config<func> : invalid_annotation {};\n",
                              true));

  REQUIRE_THROWS(run_thunkgen("void func();\n",
                              "template<auto> struct fex_gen_config {};\n"
                              "template<> struct fex_gen_config<func> { int invalid_field_annotation; };\n",
                              true));
}

TEST_CASE_METHOD(Fixture, "VersionedLibrary") {
  const auto output = run_thunkgen_host("", "template<auto> struct fex_gen_config { int version = 123; };\n");

#if CLANG_VERSION_MAJOR >= 17
  CHECK_THAT(output, matches(callExpr(callee(functionDecl(hasName("dlopen"))), hasArgument(0, stringLiteral().bind("libname"))))
                       .check_binding("libname", +[](const clang::StringLiteral* lit) { return lit->getString().ends_with(".so.123"); }));
#else
  CHECK_THAT(output, matches(callExpr(callee(functionDecl(hasName("dlopen"))), hasArgument(0, stringLiteral().bind("libname"))))
                       .check_binding("libname", +[](const clang::StringLiteral* lit) { return lit->getString().endswith(".so.123"); }));
#endif
}

TEST_CASE_METHOD(Fixture, "FunctionPointerViaType") {
  const auto output = run_thunkgen("", "template<typename> struct fex_gen_type {};\n"
                                       "template<> struct fex_gen_type<int(char, char)> {};\n");

  // Guest should apply MAKE_CALLBACK_THUNK to this signature
  CHECK_THAT(output.guest, matches(classTemplateSpecializationDecl(
                             // Should have signature matching input function
                             hasName("callback_thunk_defined"), hasTemplateArgument(0, refersToType(asString("int (char, char)"))))));

  // Host should export the unpacking function for callback arguments
  CHECK_THAT(output.host,
             matches(varDecl(hasName("exports"), hasType(constantArrayType(hasElementType(asStructString("ExportEntry")), hasSize(2))),
                             hasInitializer(hasDescendant(declRefExpr(
                               to(cxxMethodDecl(hasName("Call"), ofClass(hasName("GuestWrapperForHostFunction"))).bind("funcptr")))))))
               .check_binding(
                 "funcptr", +[](const clang::CXXMethodDecl* decl) {
                   auto parent = llvm::cast<clang::ClassTemplateSpecializationDecl>(decl->getParent());
                   return parent->getTemplateArgs().get(0).getAsType().getAsString() == "int (unsigned char, unsigned char)";
                 }));
}

// Parameter is a function pointer
TEST_CASE_METHOD(Fixture, "FunctionPointerParameter") {
  const auto output = run_thunkgen("", "void func(int (*funcptr)(char, char));\n"
                                       "template<auto> struct fex_gen_config {};\n"
                                       "template<> struct fex_gen_config<func> {};\n");

  CHECK_THAT(output.guest, matches(functionDecl(
                             // Should have signature matching input function
                             hasName("fexfn_pack_func"), returns(asString("void")), parameterCountIs(1),
                             hasParameter(0, hasType(asString("int (*)(char, char)"))))));

  // Host packing function should call FinalizeHostTrampolineForGuestFunction on the argument
  CHECK_THAT(output.host,
             matches(functionDecl(hasName("fexfn_unpack_libtest_func"), hasDescendant(callExpr(callee(functionDecl(hasName("FinalizeHostTra"
                                                                                                                           "mpolineForGuest"
                                                                                                                           "Function"))),
                                                                                               hasArgument(0, expr().bind("funcptr"))))))
               .check_binding(
                 "funcptr", +[](const clang::Expr* funcptr) {
                   // Check that the argument type matches the function pointer
                   return funcptr->getType().getAsString() == "guest_layout<int (*)(char, char)>";
                 }));

  // Host should export the unpacking function for function pointer arguments
  CHECK_THAT(output.host,
             matches(varDecl(hasName("exports"), hasType(constantArrayType(hasElementType(asStructString("ExportEntry")), hasSize(3))),
                             hasInitializer(hasDescendant(declRefExpr(to(cxxMethodDecl(hasName("Call"), ofClass(hasName("GuestWrapperForHos"
                                                                                                                        "tFunctio"
                                                                                                                        "n"))))))))));
}

TEST_CASE_METHOD(Fixture, "MultipleParameters") {
  const std::string prelude = "struct TestStruct { int member; };\n";

  auto output = run_thunkgen(prelude, "void func(int arg, char, unsigned long, TestStruct);\n"
                                      "template<auto> struct fex_gen_config {};\n"
                                      "template<> struct fex_gen_config<func> {};\n");

  // Guest code
  CHECK_THAT(output.guest, DefinesPublicFunction("func"));

  CHECK_THAT(output.guest, matches(functionDecl(hasName("fexfn_pack_func"), returns(asString("void")), parameterCountIs(4),
                                                hasParameter(0, hasType(asString("int"))), hasParameter(1, hasType(asString("char"))),
                                                hasParameter(2, hasType(asString("unsigned long"))),
                                                hasParameter(3, hasType(asStructString("TestStruct"))))));

  // Host code
  CHECK_THAT(output.host,
             matches(varDecl(hasName("exports"), hasType(constantArrayType(hasElementType(asStructString("ExportEntry")), hasSize(2))),
                             hasInitializer(initListExpr(
                               hasInit(0, expr()), hasInit(1, initListExpr(hasInit(0, implicitCastExpr()), hasInit(1, implicitCastExpr())))))
                             // TODO: check null termination
                             )));

  CHECK_THAT(
    output.host,
    matches(functionDecl(
      hasName("fexfn_unpack_libtest_func"),
      // Packed argument struct should contain all parameters
      parameterCountIs(1),
      hasParameter(0, hasType(pointerType(pointee(hasUnqualifiedDesugaredType(recordType(hasDeclaration(decl(
                        has(fieldDecl(hasType(asString("guest_layout<int32_t>")))), has(fieldDecl(hasType(asString("guest_layout<uint8_t>")))),
                        has(fieldDecl(hasType(asString("guest_layout<uint64_t>")))),
                        has(fieldDecl(hasType(asString("guest_layout<" CLANG_STRUCT_PREFIX "TestStruct>")))))))))))))));
}

// Returning a function pointer should trigger an error unless an annotation is provided
TEST_CASE_METHOD(Fixture, "ReturnFunctionPointer") {
  const std::string prelude = "using funcptr = void (*)(char, char);\n";

  REQUIRE_THROWS(run_thunkgen_guest(prelude,
                                    "funcptr func(int);\n"
                                    "template<auto> struct fex_gen_config {};\n"
                                    "template<> struct fex_gen_config<func> {};\n",
                                    true));

  REQUIRE_NOTHROW(run_thunkgen_guest(prelude, "#include <thunks_common.h>\n"
                                              "funcptr func(int);\n"
                                              "template<auto> struct fex_gen_config {};\n"
                                              "template<> struct fex_gen_config<func> : fexgen::returns_guest_pointer {};\n"));
}

TEST_CASE_METHOD(Fixture, "VariadicFunction") {
  const std::string prelude = "void func(int arg, ...);\n";

  const auto output = run_thunkgen_guest(prelude, "template<auto> struct fex_gen_config {};\n"
                                                  "template<> struct fex_gen_config<func> {\n"
                                                  "  using uniform_va_type = char;\n"
                                                  "};\n");

  CHECK_THAT(output, matches(functionDecl(hasName("fexfn_pack_func_internal"), returns(asString("void")), parameterCountIs(3),
                                          hasParameter(0, hasType(asString("int"))), hasParameter(1, hasType(asString("unsigned long"))),
                                          hasParameter(2, hasType(pointerType(pointee(asString("char"))))))));
}

// Variadic functions without annotation trigger an error
TEST_CASE_METHOD(Fixture, "VariadicFunctionsWithoutAnnotation") {
  REQUIRE_THROWS(run_thunkgen_guest("void func(int arg, ...);\n",
                                    "template<auto> struct fex_gen_config {};\n"
                                    "template<> struct fex_gen_config<func> {};\n",
                                    true));
}

// Tests generation of guest_layout/host_layout wrappers and related helpers
TEST_CASE_METHOD(Fixture, "LayoutWrappers") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  const auto host_layout_is_trivial =
    matches(classTemplateSpecializationDecl(hasName("host_layout"), hasAnyTemplateArgument(refersToType(asString("struct A"))),
                                            has(fieldDecl(hasName("data"), hasType(hasCanonicalType(asString("struct A")))))));
  const auto layout_undefined = [](const char* type) {
    return matches(classTemplateSpecializationDecl(hasName(type), hasAnyTemplateArgument(refersToType(asString("struct A")))).bind("layout"))
      .check_binding("layout", +[](const clang::ClassTemplateSpecializationDecl* decl) { return !decl->isCompleteDefinition(); });
  };
  const auto guest_converter_defined = matches(functionDecl(
    hasName("to_guest"),
    // Parameter is a host_layout<A> (ignoring qualifiers and references)
    hasParameter(
      0, hasType(references(classTemplateSpecializationDecl(hasName("host_layout"), hasAnyTemplateArgument(refersToType(asString("struct "
                                                                                                                                 "A"))))))),
    // Return value is a guest_layout<A>
    returns(asString("guest_layout<" CLANG_STRUCT_PREFIX "A>"))));
  const auto guest_converter_undefined = matches(functionDecl(
    hasName("to_guest"),
    // Parameter is a host_layout<A> (ignoring qualifiers and references)
    hasParameter(
      0, hasType(references(classTemplateSpecializationDecl(hasName("host_layout"), hasAnyTemplateArgument(refersToType(asString("struct "
                                                                                                                                 "A"))))))),
    isDeleted()));

  const std::string code = "template<typename> struct fex_gen_type {};\n"
                           "template<> struct fex_gen_type<A> {};\n";

  // For fully compatible types, both guest_layout and host_layout directly
  // reference the original struct
  SECTION("Fully compatible type") {
    const char* struct_def = "struct A { int a; int b; };\n";
    const auto output = run_thunkgen_host(struct_def, code, guest_abi);
    CHECK_THAT(output,
               matches(classTemplateSpecializationDecl(hasName("guest_layout"), hasAnyTemplateArgument(refersToType(asString("struct A"))),
                                                       has(fieldDecl(hasName("data"), hasType(hasCanonicalType(asString("struct A"))))))));
    CHECK_THAT(output, guest_converter_defined);

    CHECK_THAT(output, host_layout_is_trivial);
  }

  // For repackable types, guest_layout explicitly lists its members
  SECTION("Repackable type") {
    const char* struct_def = "#ifdef HOST\n"
                             "struct A { int a; int b; };\n"
                             "#else\n"
                             "struct A { int b; int a; };\n"
                             "#endif\n";
    const auto output = run_thunkgen_host(struct_def, code, guest_abi);
    CHECK_THAT(output, matches(classTemplateSpecializationDecl(
                         hasName("guest_layout"), hasAnyTemplateArgument(refersToType(asString("struct A"))),
                         // The member "data" exists and is defined to a struct...
                         has(fieldDecl(hasName("data"), hasType(hasCanonicalType(hasDeclaration(decl(
                                                          // ... the members of which also use guest_layout (with fixed-size integers)
                                                          has(fieldDecl(hasName("a"), hasType(asString("guest_layout<int32_t>")))),
                                                          has(fieldDecl(hasName("b"), hasType(asString("guest_layout<int32_t>")))))))))))));
    CHECK_THAT(output, guest_converter_defined);

    CHECK_THAT(output, host_layout_is_trivial);
  }

  // For incompatible types, use of guest_layout nor host_layout should be prohibited
  SECTION("Incompatible type, unannotated") {
    const char* struct_def = "#ifdef HOST\n"
                             "struct A { int a; int b; };\n"
                             "#else\n"
                             "struct A { int c; int d; };\n"
                             "#endif\n";
    const auto output = run_thunkgen_host(struct_def, code, guest_abi);
    CHECK_THAT(output, layout_undefined("guest_layout"));
    CHECK_THAT(output, guest_converter_undefined);
    CHECK_THAT(output, layout_undefined("host_layout"));
  }

  // Layout wrappers can be enabled even for incompatible types using the emit_layout_wrappers annotation
  SECTION("Incompatible type, annotated") {
    // A slightly different setup is used here in order to construct a type which...
    // - has incompatible data layout (for both 32-bit and 64-bit guests)
    // - has consistently named members in struct A (which is required to emit layout wrappers)
    const char* struct_def = "#ifdef HOST\n"
                             "struct B { int a; };\n"
                             "#else\n"
                             "struct B { int b; };\n"
                             "#endif\n"
                             "struct A { B* a; int b; };\n";
    const std::string code = "#include <thunks_common.h>\n"
                             "template<typename> struct fex_gen_type {};\n"
                             "template<> struct fex_gen_type<A> : fexgen::emit_layout_wrappers {};\n";
    const auto output = run_thunkgen_host(struct_def, code, guest_abi);
    CHECK_THAT(output, matches(classTemplateSpecializationDecl(
                         hasName("guest_layout"), hasAnyTemplateArgument(refersToType(recordType(hasDeclaration(recordDecl(hasName("A")))))),
                         // The member "data" exists and is defined to a struct...
                         has(fieldDecl(hasName("data"), hasType(hasCanonicalType(hasDeclaration(decl(
                                                          // ... the members of which also use guest_layout
                                                          has(fieldDecl(hasName("a"), hasType(asString("guest_layout<" CLANG_STRUCT_PREFIX "B "
                                                                                                       "*>")))),
                                                          has(fieldDecl(hasName("b"), hasType(asString("guest_layout<int32_t>")))))))))))));
    CHECK_THAT(output, guest_converter_defined);

    CHECK_THAT(output, host_layout_is_trivial);
  }
}

// Some integer types are differently sized on the guest than on the host.
// All integer types are mapped to fixed-size equivalents when mentioned in a
// guest context hence. This test ensures the mapping is done correctly and
// the resulting guest_layout instantiations are convertible to host_layout.
TEST_CASE_METHOD(Fixture, "Mapping guest integers to fixed-size") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  // Run each test a second time to ensure fixed-size integer mapping is
  // applied to pointees as well
  const std::string ptr = GENERATE("", " *");

  // Run each test with and without the ptr_passthrough annotation
  const bool passthrough_guest_type = GENERATE(false, true);

  // These types are differently sized on 32-bit guests
  SECTION("(u)intptr_t / size_t / long") {
    const std::string type = GENERATE("long", "unsigned long", "uintptr_t", "intptr_t", "size_t");
    INFO(type + ptr);
    const auto code = "#include <thunks_common.h>\n"
                      "#include <cstddef>\n"
                      "#include <cstdint>\n"
                      "void func(" +
                      type + ptr +
                      ");\n"
                      "template<> struct fex_gen_config<func> : fexgen::custom_host_impl {};\n" +
                      (passthrough_guest_type ? "template<> struct fex_gen_param<func, 0> : fexgen::ptr_passthrough {};\n" : "");
    if (!ptr.empty() && guest_abi == GuestABI::X86_32 && !passthrough_guest_type) {
      // Guest points to a 32-bit integer, but the host to a 64-bit one.
      // This should be detected as a failure.
      CHECK_THROWS_WITH(run_thunkgen_host("", code, guest_abi, true),
                        Catch::Matchers::ContainsSubstring("initialization of 'host_layout", Catch::CaseSensitive::No));
    } else {
      const auto output = run_thunkgen_host("", code, guest_abi);
      std::string expected_type = "guest_layout<";
      if (type == "size_t" || type.starts_with("u")) {
        expected_type += "u";
      }
      expected_type += (guest_abi == GuestABI::X86_32 ? +"int32_t" : "int64_t");
      expected_type += ptr + ">";
      CHECK_THAT(output, matches(functionDecl(hasName("fexfn_unpack_libtest_func"),
                                              // Packed argument struct should contain all parameters
                                              parameterCountIs(1),
                                              hasParameter(0, hasType(pointerType(pointee(hasUnqualifiedDesugaredType(recordType(
                                                                hasDeclaration(decl(has(fieldDecl(hasType(asString(expected_type)))))))))))))));

      // For passthrough parameters, the target function signature should
      // match the guest_layout type
      if (passthrough_guest_type) {
        CHECK_THAT(output, matches(functionDecl(hasName("fexfn_impl_libtest_func"), parameterCountIs(1),
                                                hasParameter(0, hasType(asString(expected_type))))));
      }
    }
  }

  // Most integer types are uniquely defined by specifying their size and
  // their signedness. (w)char-types and long-types are special:
  // * "char", "signed char", and "unsigned char" are different types
  // * "wchar_t" is mapped to "guest_layout<uint32_t>", but uint32_t
  //   itself is a type alias for "int"
  // * "long long" is mapped to "guest_layout<uint64_t>", but uint64_t
  //   itself is a type alias for "long" on 64-bit (which is a different
  //   type than "long long")
  //
  // This test section ensures that the correct fixed-size integers are used
  // *and* that they can be converted to host_layout.
  SECTION("Special integer types") {
    const std::string type = GENERATE("long long", "unsigned long long", "char", "unsigned char", "signed char", "wchar_t");
    std::string fixed_size_type = ((type.starts_with("unsigned") || type == "char" || type == "wchar_t") ? "u" : "");
    if (type.ends_with("long long")) {
      fixed_size_type += "int64_t";
    } else if (type.ends_with("wchar_t")) {
      fixed_size_type += "int32_t";
    } else {
      fixed_size_type += "int8_t";
    }
    INFO(type + ptr + ", expecting " + fixed_size_type + ptr);
    const auto code = "#include <thunks_common.h>\n"
                      "#include <cstdint>\n"
                      "void func(" +
                      type + ptr +
                      ");\n"
                      "template<> struct fex_gen_config<func> : fexgen::custom_host_impl {};\n" +
                      (passthrough_guest_type ? "template<> struct fex_gen_param<func, 0> : fexgen::ptr_passthrough {};\n" : "");
    const auto output = run_thunkgen_host("", code, guest_abi);
    CHECK_THAT(output, matches(functionDecl(
                         hasName("fexfn_unpack_libtest_func"),
                         // Packed argument struct should contain all parameters
                         parameterCountIs(1),
                         hasParameter(0, hasType(pointerType(pointee(hasUnqualifiedDesugaredType(recordType(hasDeclaration(
                                           decl(has(fieldDecl(hasType(asString("guest_layout<" + fixed_size_type + ptr + ">")))))))))))))));

    // For passthrough parameters, the target function signature should
    // match the guest_layout type
    if (passthrough_guest_type) {
      CHECK_THAT(output, matches(functionDecl(hasName("fexfn_impl_libtest_func"), parameterCountIs(1),
                                              hasParameter(0, hasType(asString("guest_layout<" + fixed_size_type + ptr + ">"))))));
    }
  }
}

TEST_CASE_METHOD(Fixture, "StructRepacking") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  // All tests use the same function, but the prelude defining its parameter type "A" varies
  const std::string code = "#include <thunks_common.h>\n"
                           "void func(A*);\n"
                           "template<auto> struct fex_gen_config {};\n"
                           "template<> struct fex_gen_config<func> : fexgen::custom_host_impl {};\n";

  SECTION("Pointer to struct with consistent data layout") {
    CHECK_NOTHROW(run_thunkgen_host("struct A { int a; };\n", code, guest_abi));
  }

  SECTION("Pointer to struct with unannotated pointer member with inconsistent data layout") {
    const auto prelude = "#ifdef HOST\n"
                         "struct B { int a; };\n"
                         "#else\n"
                         "struct B { int b; };\n"
                         "#endif\n"
                         "struct A { B* a; };\n";

    SECTION("Parameter unannotated") {
      CHECK_THROWS(run_thunkgen_host(prelude, code, guest_abi, true));
    }

    SECTION("Parameter annotated as ptr_passthrough") {
      CHECK_NOTHROW(run_thunkgen_host(prelude, code + "template<> struct fex_gen_param<func, 0, A*> : fexgen::ptr_passthrough {};\n", guest_abi));
    }

    SECTION("Struct member annotated as custom_repack") {
      CHECK_NOTHROW(run_thunkgen_host("struct A { void* a; };\n",
                                      code + "template<> struct fex_gen_config<&A::a> : fexgen::custom_repack {};\n", guest_abi));
    }
  }

  SECTION("Pointer to struct with pointer member of consistent data layout") {
    std::string type = GENERATE("char", "short", "int", "float");
    REQUIRE_NOTHROW(run_thunkgen_host("struct A { " + type + "* a; };\n", code, guest_abi));
  }

  SECTION("Pointer to struct with pointer member of opaque type") {
    const auto prelude = "struct B;\n"
                         "struct A { B* a; };\n";

    // Unannotated
    REQUIRE_THROWS_WITH(run_thunkgen_host(prelude, code, guest_abi), Catch::Matchers::ContainsSubstring("incomplete type"));

    // Annotated as opaque_type
    CHECK_NOTHROW(run_thunkgen_host(prelude, code + "template<> struct fex_gen_type<B> : fexgen::opaque_type {};\n", guest_abi));
  }
}

TEST_CASE_METHOD(Fixture, "VoidPointerParameter") {
  auto guest_abi = GENERATE(GuestABI::X86_32, GuestABI::X86_64);
  INFO(guest_abi);

  SECTION("Unannotated") {
    const char* code = "#include <thunks_common.h>\n"
                       "void func(void*);\n"
                       "template<> struct fex_gen_config<func> {};\n";
    if (guest_abi == GuestABI::X86_32) {
      // TODO: Currently not considered an error
      //            CHECK_THROWS_WITH(run_thunkgen_host("", code, guest_abi, true), Catch::Matchers::ContainsSubstring("unsupported parameter type", Catch::CaseSensitive::No));
    } else {
      // Pointee data is assumed to be compatible on 64-bit
      CHECK_NOTHROW(run_thunkgen_host("", code, guest_abi));
    }
  }

  SECTION("Passthrough") {
    const char* code = "#include <thunks_common.h>\n"
                       "void func(void*);\n"
                       "template<> struct fex_gen_config<func> : fexgen::custom_host_impl {};\n"
                       "template<> struct fex_gen_param<func, 0, void*> : fexgen::ptr_passthrough {};\n";
    CHECK_NOTHROW(run_thunkgen_host("", code, guest_abi));
  }

  SECTION("Assumed compatible") {
    const char* code = "#include <thunks_common.h>\n"
                       "void func(void*);\n"
                       "template<> struct fex_gen_config<func> {};\n"
                       "template<> struct fex_gen_param<func, 0, void*> : fexgen::assume_compatible_data_layout {};\n";
    CHECK_NOTHROW(run_thunkgen_host("", code, guest_abi));
  }

  SECTION("Unannotated in struct") {
    const char* prelude = "struct A { void* a; };\n";
    const char* code = "#include <thunks_common.h>\n"
                       "void func(A*);\n"
                       "template<> struct fex_gen_config<func> {};\n";
    if (guest_abi == GuestABI::X86_32) {
      CHECK_THROWS_WITH(run_thunkgen_host(prelude, code, guest_abi, true),
                        Catch::Matchers::ContainsSubstring("unsupported parameter type", Catch::CaseSensitive::No));
    } else {
      CHECK_NOTHROW(run_thunkgen_host(prelude, code, guest_abi));
    }
  }

  SECTION("Custom repack in struct") {
    const char* prelude = "struct A { void* a; };\n";
    const char* code = "#include <thunks_common.h>\n"
                       "void func(A*);\n"
                       "template<> struct fex_gen_config<&A::a> : fexgen::custom_repack {};\n"
                       "template<> struct fex_gen_config<func> {};\n";
    CHECK_NOTHROW(run_thunkgen_host(prelude, code, guest_abi));
  }
}


================================================
FILE: unittests/Utilities/CMakeLists.txt
================================================
add_executable(DeleteOldSHMRegions DeleteOldSHMRegions.cpp)

set_target_properties(DeleteOldSHMRegions PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/TestUtilities")
add_custom_target(remove_old_shm_regions
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}/TestUtilities/"
  USES_TERMINAL
  COMMAND "DeleteOldSHMRegions")


================================================
FILE: unittests/Utilities/DeleteOldSHMRegions.cpp
================================================
#include <cstdint>
#include <fstream>
#include <stdio.h>
#include <sys/shm.h>
#include <unistd.h>

static bool ctime_is_old(uint64_t ctime) {
  time_t curtime;
  time(&curtime);

  // If it is older than ten minutes.
  return (curtime - ctime) > 600;
}

int main() {
  // clang-format off
  // key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap
  //	 0     360448   777                  4096 165187 165187      0  1002  1002  1002  1002 1679659857 1679659857 1679659857                  4096                     0
  //	 0      32769   777                  4096 153814 153814      0  1002  1002  1002  1002 1676490841 1676490841 1676490841                     0                  4096
  // clang-format on
  std::ifstream fs {"/proc/sysvipc/shm", std::fstream::binary};
  std::string Line;

  // Remove first line
  std::getline(fs, Line);

  const auto current_uid = getuid();
  while (std::getline(fs, Line)) {
    if (fs.eof()) {
      break;
    }
    int shmid;
    int uid;
    int attach;
    uint64_t ctime;
    if (sscanf(Line.c_str(), "%*d %d %*d %*d %*d %*d %d %d %*d %*d %*d %*d %*d %ld", &shmid, &attach, &uid, &ctime) == 4) {
      // If the UID matches and nothing is attached AND it is old then delete it.
      if (uid == current_uid && attach == 0 && ctime_is_old(ctime)) {
        shmctl(shmid, IPC_RMID, nullptr);
      }
    }
  }
  return 0;
}


================================================
FILE: unittests/gcc-target-tests-32/CMakeLists.txt
================================================
# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS ${CMAKE_SOURCE_DIR}/External/fex-gcc-target-tests-bins/32/*)

foreach(TEST ${TESTS})
  # Extract test file name
  string(REPLACE "/fex-gcc-target-tests-bins/32/" ";" TEST_NAME_LIST ${TEST})
  list(GET TEST_NAME_LIST 1 TEST_FILE_NAME)

  # Loop over block sizes
  foreach(BLOCK_SIZE 500 1)
    # Create TEST_NAME from TEST_FILE_NAME
    string(REPLACE "/" "-" TEST_NAME ${TEST_FILE_NAME})
    string(APPEND TEST_NAME ".n${BLOCK_SIZE}.gcc-target-32")

    add_test(NAME "${TEST_NAME}"
      COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
      "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
      "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
      "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
      "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
      "${TEST_NAME}"
      "guest"
      "$<TARGET_FILE:FEX>"
      "${TEST}")
    set_property(TEST "${TEST_NAME}" APPEND PROPERTY SKIP_RETURN_CODE 125)
    set_property(TEST "${TEST_NAME}" APPEND PROPERTY ENVIRONMENT "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=${BLOCK_SIZE}")
  endforeach()
endforeach()

add_custom_target(gcc_target_tests_32
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "40" ${TEST_JOB_FLAG} "-R" "\.*.gcc-target-32$$")


================================================
FILE: unittests/gcc-target-tests-32/Disabled_Tests
================================================
# Uses AVX
pr57275.c.gcc-target-test-32.n1.gcc-target-32
pr57275.c.gcc-target-test-32.n500.gcc-target-32

# Consistently crashes on Solidrun only in CI
20080723-1.c.gcc-target-test-32.n1.gcc-target-32
20080723-1.c.gcc-target-test-32.n500.gcc-target-32

# These tests fail because of things unrelated to the sse4.1 instructions
sse4_1-ceil-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-ceil-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-ceilf-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-ceilf-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-floor-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-floor-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-floorf-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-floorf-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-rint-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-rint-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-rintf-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-rintf-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-round-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-round-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-roundf-sfix-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-roundf-sfix-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-rint-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-rint-vec.c.gcc-target-test-32.n500.gcc-target-32

sse4_1-roundf-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-roundf-vec.c.gcc-target-test-32.n500.gcc-target-32


# These tests fail on Nvidia Xavier ONLY
sse4_1-rintf-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-rintf-vec.c.gcc-target-test-32.n500.gcc-target-32
sse4_1-round-vec.c.gcc-target-test-32.n1.gcc-target-32
sse4_1-round-vec.c.gcc-target-test-32.n500.gcc-target-32

# This has a race with SIGPROF
mcount_pic.c.gcc-target-test-32.n1.gcc-target-32
mcount_pic.c.gcc-target-test-32.n500.gcc-target-32

================================================
FILE: unittests/gcc-target-tests-32/Expected_Output
================================================
# default to zero

================================================
FILE: unittests/gcc-target-tests-32/Known_Failures
================================================
sse2-mmx-pextrw.c.gcc-target-test-32.n500.gcc-target-32
sse2-mmx-pextrw.c.gcc-target-test-32.n1.gcc-target-32

# Consistently crashes on Solidrun only in CI
20080723-1.c.gcc-target-test-32

# These tests fail because of things unrelated to the sse4.1 instructions
sse4_1-ceil-sfix-vec.c.gcc-target-test-32
sse4_1-ceilf-sfix-vec.c.gcc-target-test-32
sse4_1-floor-sfix-vec.c.gcc-target-test-32
sse4_1-floorf-sfix-vec.c.gcc-target-test-32
sse4_1-rint-sfix-vec.c.gcc-target-test-32
sse4_1-rintf-sfix-vec.c.gcc-target-test-32
sse4_1-round-sfix-vec.c.gcc-target-test-32
sse4_1-roundf-sfix-vec.c.gcc-target-test-32
sse4_1-rint-vec.c.gcc-target-test-32
sse4_1-roundf-vec.c.gcc-target-test-32

# These tests fail on Nvidia Xavier ONLY
sse4_1-rintf-vec.c.gcc-target-test-32
sse4_1-round-vec.c.gcc-target-test-32

# This has a race with SIGPROF
mcount_pic.c.gcc-target-test-32

# See https://github.com/FEX-Emu/FEX/issues/4436
# Test has invalid asm. Fails on host as well.
# It passes for -n 500 due to x87 stack optimization pass
# not actually doing a fld/fst pair and instead doing the 
# memory store directly.
pr88240.c.gcc-target-test-32.n1.gcc-target-32


================================================
FILE: unittests/gcc-target-tests-64/CMakeLists.txt
================================================
# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS ${CMAKE_SOURCE_DIR}/External/fex-gcc-target-tests-bins/64/*)

foreach(TEST ${TESTS})
  # Extract test file name
  string(REPLACE "/fex-gcc-target-tests-bins/64/" ";" TEST_NAME_LIST ${TEST})
  list(GET TEST_NAME_LIST 1 TEST_FILE_NAME)

  # Loop over block sizes
  foreach(BLOCK_SIZE 500 1)
    # Create TEST_NAME from TEST_FILE_NAME
    string(REPLACE "/" "-" TEST_NAME ${TEST_FILE_NAME})
    string(APPEND TEST_NAME ".n${BLOCK_SIZE}.gcc-target-64")

    add_test(NAME "${TEST_NAME}"
      COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
      "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
      "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
      "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
      "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
      "${TEST_NAME}"
      "guest"
      "$<TARGET_FILE:FEX>"
      "${TEST}")
    set_property(TEST "${TEST_NAME}" APPEND PROPERTY SKIP_RETURN_CODE 125)
    set_property(TEST "${TEST_NAME}" APPEND PROPERTY ENVIRONMENT "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=${BLOCK_SIZE}")
  endforeach()
endforeach()

add_custom_target(gcc_target_tests_64
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "ctest" "--output-on-failure" "--timeout" "40" ${TEST_JOB_FLAG} "-R" "\.*.gcc-target-64$$")


================================================
FILE: unittests/gcc-target-tests-64/Disabled_Tests
================================================
# This relies on SIGPROF which means we need real signal support to handle this
# Crashes or hangs depending on which runner is running it.
# Also this has a race with SIGPROF
mcount_pic.c.gcc-target-test-64


================================================
FILE: unittests/gcc-target-tests-64/Expected_Output
================================================
# default to zero

================================================
FILE: unittests/gcc-target-tests-64/Known_Failures
================================================
# This has invalid asm generated for its test
# 'foo' gets the argument in edi
# Passes the argument to bar in eax
# 'bar' accepts the argument in edi
# inline asm is incorrect, needs to use =d and d on the value
asm-5.c.gcc-target-test-64.n500.gcc-target-64
asm-5.c.gcc-target-test-64.n1.gcc-target-64

# Fails even on host device
# 'test_pextrw' does a zero extend to the gpr
# Which means its -3339 value turns in to 0xf2f5
# While the 'compute_correct_result' value does a sign extension
# Which turns the value in to 0xfffff2f5
# This causes its comparison to fail
sse2-mmx-pextrw.c.gcc-target-test-64.n500.gcc-target-64
sse2-mmx-pextrw.c.gcc-target-test-64.n1.gcc-target-64

# See https://github.com/FEX-Emu/FEX/issues/4436
# Test has invalid asm. Fails on host as well.
# It passes for -n 500 due to x87 stack optimization pass
# not actually doing a fld/fst pair and instead doing the 
# memory store directly.
pr88240.c.gcc-target-test-64.n1.gcc-target-64

================================================
FILE: unittests/gvisor-tests/CMakeLists.txt
================================================
# Careful. Globbing can't see changes to the contents of files
# Need to do a fresh clean to see changes
file(GLOB_RECURSE TESTS CONFIGURE_DEPENDS ${CMAKE_SOURCE_DIR}/External/fex-gvisor-tests-bins/*_test)

foreach(TEST ${TESTS})
  string(REPLACE "/fex-gvisor-tests-bins/" ";" TEST_NAME_LIST ${TEST})
  list(GET TEST_NAME_LIST 1 TEST_NAME)
  string(REPLACE "/" "-" TEST_NAME ${TEST_NAME})

  add_test(NAME "${TEST_NAME}.jit.gvisor"
    COMMAND "python3" "${CMAKE_SOURCE_DIR}/Scripts/guest_test_runner.py"
    "${CMAKE_CURRENT_SOURCE_DIR}/Known_Failures"
    "${CMAKE_CURRENT_SOURCE_DIR}/Expected_Output"
    "${CMAKE_CURRENT_SOURCE_DIR}/Disabled_Tests"
    "${CMAKE_CURRENT_SOURCE_DIR}/Flake_Tests"
    "${TEST_NAME}"
    "guest"
    "$<TARGET_FILE:FEX>"
    "${TEST}")
  set_property(TEST "${TEST_NAME}.jit.gvisor" APPEND PROPERTY SKIP_RETURN_CODE 125)
  set_property(TEST "${TEST_NAME}.jit.gvisor" APPEND PROPERTY ENVIRONMENT "FEX_OUTPUTLOG=stderr;FEX_SILENTLOG=0;FEX_MAXINST=500")
endforeach()

set(RM_DIR_COMMAND "rm $ENV{FEX_ROOTFS}/tmp 2> /dev/null || true")

add_custom_target(gvisor_tests
  VERBATIM
  WORKING_DIRECTORY "${CMAKE_BINARY_DIR}"
  USES_TERMINAL
  COMMAND "sh" "-c" "${RM_DIR_COMMAND}"
  COMMAND "ctest" "--output-on-failure" "--timeout" "302" ${TEST_JOB_FLAG} "-R" "\.*.gvisor$$")


================================================
FILE: unittests/gvisor-tests/Disabled_Tests
================================================
# never terminates (even though we use timeout)
kill_test

# erratic test results
xattr_test
mlock_test

# these are flaky on x86
stat_times_test

# This periodically fails on ARM hosts. `PosixError(errno=13 Permission denied) open(/proc/self/oom_score_adj, 0x1, 0)`
proc_pid_oomscore_test

# Spins forever
ptrace_test

# The behaviour of this is different between x86 and ARM
# SIGSEGV kills a thread successfully on ARM but not on x86
time_test

# The behaviour of this test changes depending on if running inside the test harness or not
# Works outside of the test harness but fails inside of it
pause_test

# This takes advantage of CLONE_VM in a way that breaks FEX's TLS usage
# Need to workaround this somehow
aio_test

# The Solidrun CI board isn't running a kernel with namespaces enabled
# gvisor doesn't error check correctly in this case and returns hard failure instead of correctly exiting
proc_pid_uid_gid_map_test

# This test is expecting the kernel to update the timeout variable but by default the kernel keeps time outs "sticky"
# See personality flag STICKY_TIMEOUTS
select_test

# This test uses syscalls that a userspace process doesn't have access to
syslog_test

# This very specifically is testing bad pointers passed in to the kernel to test EFAULT
access_test
sigprocmask_test

# Relies on some utility executables to be installed
exec_test

# This relies on splice behaviour that doesn't work even on x86 host
eventfd_test

# Fails even on host x86 device
# Expects the tmp folder to be empty and cleaned up between CI runs by a system process
# Does tests that removes user priveleges to touch those files
open_create_test

# Fails even on host x86 device
# Expects the /tmp folder to be cleaned up between INDIVIDUAL tests
# OpenTest.OTrunc creates /tmp/truncd
# OpenTest.OTruncAndReadOnlyDir attempts to create the same directory just afterwards
# Neither clean up after themselves
open_test

# This test doesn't correctly check to see if seccomp is enabled
# Attempts using regardless of enabled or not
seccomp_test

# Relies on ptrace to execute very simple binaries
exec_binary_test

# Relies on ptrace to send the parent process a signal
prctl_test

# Doesn't event pass on real x86 host
# GetrusageTest.IgnoreSIGCHLD is expecting memory usage to be zero, but it isn't
getrusage_test

# Doesn't event pass on real x86 host
sendfile_test

# Does a couple of things we don't support currently
# ForkTest.SigAltStack : Fork should inherit altstack
# ForkTest.Affinity : Affinity masks should be inherited
fork_test

# Requires accurate exit status from vfork
# Which we currently don't achieve
vfork_test

# Doesn't even pass on real x86 host
# Takes a long time to run as well
pty_test

# This syscall isn't implemented on ARM64
mempolicy_test

# Relies on namespaces to exist on the host (and user accessible) and isn't guaranteed by our CI
prctl_setuid_test

# This does 32-bit syscalls from a 64-bit process. Which we don't support yet
32bit_test

# We are lying to the guest application about true affinity. Missing some edges here
affinity_test

# Doesn't even pass on real x86 host
# Uses some private sandbox socket system?
# Seems like it needs some environment variables set that is google specific
connect_external_test

# Doesn't even pass on real x86 host
# Needs some helper executable to work
sigaltstack_test

# Sets up an alarm to fire every 500ms
# Sets up rcx and r11 to a known good value
# sets a boolean in memory and spins until the signal handler sets another boolean
# Stores the resulting values to a memory address
# Then checks to ensure the signal handler picked up the correct context values
# Seems like FEX currently doesn't?
sigiret_test

# Doesn't even pass on real x86 host
# This relies on splice behaviour that doesn't work even on x86 host
# Hangs after failing
splice_test

# Relies on ptrace to run
wait_test
sysret_test

# Has race conditions in the code and takes a long time to run
tcp_socket_test

# Doesn't even pass on real x86 host
socket_bind_to_device_test
socket_netlink_route_test
socket_bind_to_device_distribution_test
socket_bind_to_device_sequence_test
socket_inet_loopback_nogotsan_test
socket_ipv4_udp_unbound_loopback_nogotsan_test
socket_ipv4_udp_unbound_loopback_test

# This takes forever even on x86 host
socket_inet_loopback_test
socket_ip_tcp_generic_loopback_test
socket_stress_test
alarm_test
itimer_test
futex_test
fcntl_test

# these take a long time
ppoll_test
flock_test
pipe_test
epoll_test
concurrency_test

# This requires partial read support in the kernel
# Arm64 devices don't have this
partial_bad_buffer_test

# Depending on board can work or not, depending on kernel configuration
# IPv6 maybe?
socket_ipv6_udp_unbound_external_networking_test

# This test is flaky
proc_net_test
rtsignal_test
sigstop_test
stat_test

# These search for folders in /
# Might need to rbind some folders to make it happy
# This currently passes on x86-64 hosts
# This fails on AArch64 because getdents isn't emulated
getdents_test

# Flaky
udp_socket_test

# This test checks for the header layout from `/proc/net/udp` which has changed slightly with newer kernels
proc_net_udp_test

# This test is broken since kernel commit: 36e2c7421f02
# Upstream gvisor has the offending tests removed
inotify_test

# Needs testing
semaphore_test

# This test does an unsafe timer test. (NoTimeout subtest specifically).
# Sets a timer for 100ms then starts a pselect with unlimited timeout.
# If the timer fires before the pselect starts then pselect will hang forever.
# Since we are running these tests on a fully loaded system, we tend to context switch,
# causing the chance of a hang to occur quite frequently.
pselect_test

# Somewhere between kernel versions 5.15 and 6.0, timerfd lseek behaviour has changed.
# On older kernels it would return -ESPIPE, on new kernels it returns 0.
# This test expects -ESPIPE behaviour.
timerfd_test

# Sub-test SealGrowPartialWriteTruncated tests some behaviour of F_SEAL_GROW that changes behaviour depending on kernel version.
#  - Grows the memfd region to 1 page
#  - Seals it
#  - Seeks to 3/4 in to the page
#  - Tries to write one page of data
#  - Expects the kernel to only write 1024 bytes
#  - Newer kernels return EPERM instead
# Sub-test SealWriteWithMmap tests behaviour of F_SEAL_WRITE that changes behaviour depending on kernel version
# - Seals the memfd region
# - Tries to map it with MAP_SHARED and PROT_WRITE - Expects failure
# - Tries to map it with MAP_SHARED and PROT_READ - Expects failure
# - Older Linux kernel versions accidentally allowed PROT_READ mappings through.
memfd_test

# Subtest `TestInvalidFlag` tests expected invalid flag 0x80.
# With a new enough kernel this is now RWF_NOAPPEND
preadv2_test
pwritev2_test

# Tries to set a bunch of socket options that used to work on older kernels, but now the get rejected with EOPNOTSUPP
socket_abstract_test
socket_domain_test
socket_filesystem_test
socket_ip_tcp_loopback_test
socket_ip_udp_loopback_test

# Does some tricky signal masking and signal timer setup to try and ensure `sigtimedwait` still sends a timedwait signal when signals are blocked
# With a new enough kernel it seems `sigtimedwait` now immediately return in this situation.
timers_test


================================================
FILE: unittests/gvisor-tests/Expected_Output
================================================
32bit_test 0
accept_bind_stream_test 0
accept_bind_test 0
access_test 0
affinity_test 0
aio_test 0
alarm_test 0
arch_prctl_test 0
bad_test 0
bind_test 0
brk_test 0
chdir_test 0
chmod_test 0
chown_test 0
chroot_test 0
clock_getres_test 0
clock_gettime_test 0
clock_nanosleep_test 0
concurrency_test 0
connect_external_test 0
creat_test 0
dev_test 0
dup_test 0
epoll_test 0
eventfd_test 0
exceptions_test 0
exec_binary_test 0
exec_test 0
exit_test 0
fadvise64_test 0
fallocate_test 0
fault_test 0
fchdir_test 0
fcntl_test 0
flock_test 0
fork_test 0
fpsig_fork_test 0
fpsig_nested_test 0
fsync_test 0
futex_test 0
getcpu_host_test 0
getcpu_test 0
getdents_test 0
getrandom_test 0
getrusage_test 0
inotify_test 0
ioctl_test 0
ip6tables_test 0
iptables_test 0
itimer_test 0
kcov_test 0
kill_test 0
link_test 0
lseek_test 0
madvise_test 0
membarrier_test 0
memfd_test 0
memory_accounting_test 0
mempolicy_test 0
mincore_test 0
mkdir_test 0
mknod_test 0
mlock_test 0
mmap_test 0
mount_test 0
mremap_test 0
msync_test 0
munmap_test 0
network_namespace_test 0
open_create_test 0
open_test 0
packet_socket_raw_test 0
packet_socket_test 0
partial_bad_buffer_test 0
pause_test 0
ping_socket_test 0
pipe_test 0
poll_test 0
ppoll_test 0
prctl_setuid_test 0
prctl_test 0
pread64_test 0
preadv2_test 0
preadv_test 0
priority_test 0
proc_net_tcp_test 0
proc_net_test 0
proc_net_udp_test 0
proc_net_unix_test 0
proc_pid_oomscore_test 0
proc_pid_smaps_test 0
proc_pid_uid_gid_map_test 0
proc_test 0
pselect_test 0
ptrace_test 0
pty_root_test 0
pty_test 0
pwrite64_test 0
pwritev2_test 0
raw_socket_hdrincl_test 0
raw_socket_icmp_test 0
raw_socket_test 0
readahead_test 0
read_test 0
readv_socket_test 0
readv_test 0
rename_test 0
rlimits_test 0
rseq_test 0
rtsignal_test 0
sched_test 0
sched_yield_test 0
seccomp_test 0
select_test 0
semaphore_test 0
sendfile_socket_test 0
sendfile_test 0
shm_test 0
sigaction_test 0
sigaltstack_test 0
sigiret_test 0
signalfd_test 0
sigprocmask_test 0
sigstop_test 0
sigtimedwait_test 0
socket_abstract_non_blocking_test 0
socket_abstract_test 0
socket_bind_to_device_distribution_test 0
socket_bind_to_device_sequence_test 0
socket_bind_to_device_test 0
socket_blocking_ip_test 0
socket_blocking_local_test 0
socket_capability_test 0
socket_domain_non_blocking_test 0
socket_domain_test 0
socket_filesystem_non_blocking_test 0
socket_filesystem_test 0
socket_inet_loopback_nogotsan_test 0
socket_inet_loopback_test 0
socket_ip_tcp_generic_loopback_test 0
socket_ip_tcp_loopback_non_blocking_test 0
socket_ip_tcp_loopback_test 0
socket_ip_tcp_udp_generic_loopback_test 0
socket_ip_udp_loopback_non_blocking_test 0
socket_ip_udp_loopback_test 0
socket_ip_unbound_netlink_test 0
socket_ip_unbound_test 0
socket_ipv4_udp_unbound_external_networking_test 0
socket_ipv4_udp_unbound_loopback_netlink_test 0
socket_ipv4_udp_unbound_loopback_nogotsan_test 0
socket_ipv4_udp_unbound_loopback_test 0
socket_ipv6_udp_unbound_external_networking_test 0
socket_ipv6_udp_unbound_loopback_netlink_test 0
socket_ipv6_udp_unbound_loopback_test 0
socket_netdevice_test 0
socket_netlink_route_test 0
socket_netlink_test 0
socket_netlink_uevent_test 0
socket_non_stream_blocking_local_test 0
socket_non_stream_blocking_udp_test 0
socket_stream_blocking_local_test 0
socket_stream_blocking_tcp_test 0
socket_stream_local_test 0
socket_stream_nonblock_local_test 0
socket_stress_test 0
socket_test 0
socket_unix_dgram_local_test 0
socket_unix_dgram_non_blocking_test 0
socket_unix_pair_test 0
socket_unix_seqpacket_local_test 0
socket_unix_stream_test 0
socket_unix_unbound_abstract_test 0
socket_unix_unbound_dgram_test 0
socket_unix_unbound_filesystem_test 0
socket_unix_unbound_seqpacket_test 0
socket_unix_unbound_stream_test 0
splice_test 0
statfs_test 0
stat_test 0
stat_times_test 0
sticky_test 0
symlink_test 0
sync_file_range_test 0
sync_test 0
sysinfo_test 0
syslog_test 0
sysret_test 0
tcp_socket_test 0
tgkill_test 0
timerfd_test 0
timers_test 0
time_test 0
tkill_test 0
truncate_test 0
tuntap_hostinet_test 0
tuntap_test 0
udp_bind_test 0
udp_socket_test 0
uidgid_test 0
uname_test 0
unlink_test 0
unshare_test 0
utimes_test 0
vdso_clock_gettime_test 0
vdso_test 0
vfork_test 0
vsyscall_test 0
wait_test 0
write_test 0
xattr_test 0


================================================
FILE: unittests/gvisor-tests/Flake_Tests
================================================
pselect_test
poll_test
# These are majorly flakey
socket_abstract_non_blocking_test
socket_abstract_test
socket_bind_to_device_distribution_test
socket_bind_to_device_sequence_test
socket_bind_to_device_test
socket_blocking_ip_test
socket_blocking_local_test
socket_capability_test
socket_domain_non_blocking_test
socket_domain_test
socket_filesystem_non_blocking_test
socket_filesystem_test
socket_inet_loopback_nogotsan_test
socket_inet_loopback_test
socket_ip_tcp_generic_loopback_test
socket_ip_tcp_loopback_non_blocking_test
socket_ip_tcp_loopback_test
socket_ip_tcp_udp_generic_loopback_test
socket_ip_udp_loopback_non_blocking_test
socket_ip_udp_loopback_test
socket_ip_unbound_netlink_test
socket_ip_unbound_test
socket_ipv4_udp_unbound_external_networking_test
socket_ipv4_udp_unbound_loopback_netlink_test
socket_ipv4_udp_unbound_loopback_nogotsan_test
socket_ipv4_udp_unbound_loopback_test
socket_ipv6_udp_unbound_external_networking_test
socket_ipv6_udp_unbound_loopback_netlink_test
socket_ipv6_udp_unbound_loopback_test
socket_netdevice_test
socket_netlink_route_test
socket_netlink_test
socket_netlink_uevent_test
socket_non_stream_blocking_local_test
socket_non_stream_blocking_udp_test
socket_stream_blocking_local_test
socket_stream_blocking_tcp_test
socket_stream_local_test
socket_stream_nonblock_local_test
socket_stress_test
socket_test
socket_unix_dgram_local_test
socket_unix_dgram_non_blocking_test
socket_unix_pair_test
socket_unix_seqpacket_local_test
socket_unix_stream_test
socket_unix_unbound_abstract_test
socket_unix_unbound_dgram_test
socket_unix_unbound_filesystem_test
socket_unix_unbound_seqpacket_test
socket_unix_unbound_stream_test


================================================
FILE: unittests/gvisor-tests/Known_Failures
================================================
# these fail on x86 CI
proc_pid_oomscore_test

# The behaviour of this is different between x86 and ARM
# SIGSEGV kills a thread successfully on ARM but not on x86
time_test

# these are disabled
# never terminates (even though we use timeout)
kill_test

# erratic test results
xattr_test
mlock_test

# These used to crash and fail its test, now hang
#mremap_test

# these are flaky on x86
stat_times_test

# Spins forever
ptrace_test

# The behaviour of this test changes depending on if running inside the test harness or not
# Works outside of the test harness but fails inside of it
pause_test

# This takes advantage of CLONE_VM in a way that breaks FEX's TLS usage
# Need to workaround this somehow
aio_test

# The Solidrun CI board isn't running a kernel with namespaces enabled
# gvisor doesn't error check correctly in this case and returns hard failure instead of correctly exiting
proc_pid_uid_gid_map_test

# This test is expecting the kernel to update the timeout variable but by default the kernel keeps time outs "sticky"
# See personality flag STICKY_TIMEOUTS
select_test

# This test uses syscalls that a userspace process doesn't have access to
syslog_test

# This very specifically is testing bad pointers passed in to the kernel to test EFAULT
access_test
sigprocmask_test

# Relies on some utility executables to be installed
exec_test

# This relies on splice behaviour that doesn't work even on x86 host
eventfd_test

# Relies on tricky behaviour
# MMapDeathTest.TruncateAfterCOWBreak : Needs to capture SIGBUS. Test fails, handwritten test works
# MMapTest.ProtNoneDeath : Needs to capture SIGBUS. Test fails, handwritten test works
# MMapTest.NoProtExecDeath : Needs to support SIGSEGV from memory mapped without PROT_EXEC but WITH READ + WRITE
# NoExceedLimitData : Requires brk to respect RLIMIT_DATA
mmap_test

# Fails even on host x86 device
# Expects the tmp folder to be empty and cleaned up between CI runs by a system process
# Does tests that removes user priveleges to touch those files
open_create_test

# Fails even on host x86 device
# Expects the /tmp folder to be cleaned up between INDIVIDUAL tests
# OpenTest.OTrunc creates /tmp/truncd
# OpenTest.OTruncAndReadOnlyDir attempts to create the same directory just afterwards
# Neither clean up after themselves
open_test

# This test doesn't correctly check to see if seccomp is enabled
# Attempts using regardless of enabled or not
seccomp_test

# Relies on ptrace to execute very simple binaries
exec_binary_test

# Relies on ptrace to send the parent process a signal
prctl_test

# Doesn't event pass on real x86 host
# GetrusageTest.IgnoreSIGCHLD is expecting memory usage to be zero, but it isn't
getrusage_test

# Doesn't even pass on real x86 host
sendfile_test

# Does a couple of things we don't support currently
# ForkTest.SigAltStack : Fork should inherit altstack
# ForkTest.Affinity : Affinity masks should be inherited
# CloneTest.NonCanonicalTLS : Non-Canonical TLS addresses should be rejecte with -EPERM
fork_test

# Requires accurate exit status from vfork
# Which we currently don't achieve
vfork_test

# Doesn't even pass on real x86 host
# ProcSelfFd.GetdentsDuplicates : Expects fcntl F_DUPFD, 1024 to work. Returns -1
# ProcSelfFdInfo.GetdentsDuplicates : Same as above
# ProcSelfAuxv.EntryPresence expects AT_SYSINFO_EHDR to exist. We don't support VDSO atm
# ProcPidCmdline.SubprocessForkSameCmdline expects to read parent pid's /proc/{pid}/cmdline and get executable name. Expecting a match.
# ProcPidExe.Subprocess : Same as above
# ProcPidEnviron.MatchesEnviron Doesn't expect us injecting environment variables in to its container space
proc_test

# Doesn't even pass on real x86 host
# Takes a long time to run as well
pty_test

# This syscall isn't implemented on ARM64
mempolicy_test

# Relies on namespaces to exist on the host (and user accessible) and isn't guaranteed by our CI
prctl_setuid_test

# This does 32-bit syscalls from a 64-bit process. Which we don't support yet
32bit_test

# We are lying to the guest application about true affinity. Missing some edges here
affinity_test

# Doesn't even pass on real x86 host
# Uses some private sandbox socket system?
# Seems like it needs some environment variables set that is google specific
connect_external_test

# Tests a bunch of different exception exits and we don't support them all
exceptions_test

# These search for folders in /
# Might need to rbind some folders to make it happy
# This currently passes on x86-64 hosts
# This fails on AArch64 because getdents isn't emulated
getdents_test

# Doesn't even pass on real x86 host
# Needs some helper executable to work
sigaltstack_test

# Sets up an alarm to fire every 500ms
# Sets up rcx and r11 to a known good value
# sets a boolean in memory and spins until the signal handler sets another boolean
# Stores the resulting values to a memory address
# Then checks to ensure the signal handler picked up the correct context values
# Seems like FEX currently doesn't?
sigiret_test

# Doesn't even pass on real x86 host
# This relies on splice behaviour that doesn't work even on x86 host
# Hangs after failing
splice_test

# Relies on ptrace to run
wait_test
sysret_test

# Has race conditions in the code and takes a long time to run
tcp_socket_test

# Doesn't even pass on real x86 host
socket_bind_to_device_test
socket_netlink_route_test
socket_bind_to_device_distribution_test
socket_bind_to_device_sequence_test
socket_inet_loopback_nogotsan_test
socket_ipv4_udp_unbound_loopback_nogotsan_test
socket_ipv4_udp_unbound_loopback_test

# This takes forever even on x86 host
socket_inet_loopback_test
socket_ip_tcp_generic_loopback_test
socket_stress_test
alarm_test
itimer_test
futex_test
fcntl_test

# these take a long time
ppoll_test
flock_test
pipe_test
epoll_test
concurrency_test

# This requires partial read support in the kernel
# Tries reading a page's worth of bytes from an FD across a page with no permissions
# Expecting a partial read to succeed
# Arm64 devices don't have this
partial_bad_buffer_test

# Depending on board can work or not, depending on kernel configuration
# IPv6 maybe?
socket_ipv6_udp_unbound_external_networking_test

# This test is flaky
proc_net_test
rtsignal_test
sigstop_test
stat_test

# Flaky
udp_socket_test

# This test checks for the header layout from `/proc/net/udp` which has changed slightly with newer kernels
proc_net_udp_test

# This test is broken since kernel commit: 36e2c7421f02
# Upstream gvisor has the offending tests removed
inotify_test

# Needs testing
semaphore_test

# This test does an unsafe timer test. (NoTimeout subtest specifically).
# Sets a timer for 100ms then starts a pselect with unlimited timeout.
# If the timer fires before the pselect starts then pselect will hang forever.
# Since we are running these tests on a fully loaded system, we tend to context switch,
# causing the chance of a hang to occur quite frequently.
pselect_test

# Somewhere between kernel versions 5.15 and 6.0, timerfd lseek behaviour has changed.
# On older kernels it would return -ESPIPE, on new kernels it returns 0.
# This test expects -ESPIPE behaviour.
timerfd_test

# Tests that poll correctly returns EFAULT
poll_test